From fe95502b073095f88282497ac9acd1e548772414 Mon Sep 17 00:00:00 2001 From: eastb233 Date: Tue, 20 Oct 2020 09:45:39 +0800 Subject: [PATCH 1/2] Add URL and Source0 url --- gcc.spec | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/gcc.spec b/gcc.spec index e203c61..5e4eb3b 100644 --- a/gcc.spec +++ b/gcc.spec @@ -61,7 +61,9 @@ Name: gcc Version: %{gcc_version} Release: %{DATE}.12 License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD -Source0: gcc-9.3.0.tar.xz +URL: https://gcc.gnu.org + +Source0: https://ftp.gnu.org/gnu/gcc/gcc-9.3.0/gcc-9.3.0.tar.xz %global isl_version 0.16.1 BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) -- Gitee From 01e0ec8ea6306ca9fc59f90ebe04e80cb2e1691d Mon Sep 17 00:00:00 2001 From: eastb233 Date: Wed, 30 Dec 2020 09:54:10 +0800 Subject: [PATCH 2/2] Upload GCC feature and bugfix patches. - avoid-cycling-on-vertain-subreg-reloads.patch: Add patch source comment - change-gcc-BASE-VER.patch: Likewise - dont-generate-IF_THEN_ELSE.patch: Likewise - fix-ICE-in-compute_live_loop_exits.patch: Likewise - fix-ICE-in-eliminate_stmt.patch: Likewise - fix-ICE-in-vect_create_epilog_for_reduction.patch: Likewise - fix-ICE-in-vect_stmt_to_vectorize.patch: Likewise - fix-ICE-in-verify_ssa.patch: Likewise - fix-ICE-when-vectorizing-nested-cycles.patch: Likewise - fix-cost-of-plus.patch: Likewise - ipa-const-prop-self-recursion-bugfix.patch: Likewise - simplify-removing-subregs.patch: Likewise - medium-code-mode.patch: Bugfix - fix-when-peeling-for-alignment.patch: Move to ... - fix-PR-92351-When-peeling-for-alignment.patch: ... this - AArch64-Fix-constraints-for-CPY-M.patch: New file - Apply-maximum-nunits-for-BB-SLP.patch: New file - Fix-EXTRACT_LAST_REDUCTION-segfault.patch: New file - Fix-up-push_partial_def-little-endian-bitfield.patch: New file - Fix-zero-masking-for-vcvtps2ph.patch: New file - IRA-Handle-fully-tied-destinations.patch: New file - SLP-VECT-Add-check-to-fix-96837.patch: New file - aarch64-Fix-ash-lr-lshr-mode-3-expanders.patch: New file - aarch64-Fix-bf16-and-matrix-g++-gfortran.patch: New file - aarch64-Fix-mismatched-SVE-predicate-modes.patch: New file - aarch64-fix-sve-acle-error.patch: New file - adjust-vector-cost-and-move-EXTRACT_LAST_REDUCTION-costing.patch: New file - bf16-and-matrix-characteristic.patch: New file - fix-ICE-IPA-compare-VRP-types.patch: New file - fix-ICE-in-affine-combination.patch: New file - fix-ICE-in-pass-vect.patch: New file - fix-ICE-in-vect_update_misalignment_for_peel.patch: New file - fix-addlosymdi-ICE-in-pass-reload.patch: New file - fix-an-ICE-in-vect_recog_mask_conversion_pattern.patch: New file - fix-avx512vl-vcvttpd2dq-2-fail.patch: New file - fix-issue499-add-nop-convert.patch: New file - fix-issue604-ldist-dependency-fixup.patch: New file - modulo-sched-Carefully-process-loop-counter-initiali.patch: New file - re-PR-target-91124-gcc.target-i386-avx512vl-vpshldvd.patch: New file - reduction-paths-with-unhandled-live-stmt.patch: New file - redundant-loop-elimination.patch: New file - sccvn-Improve-handling-of-load-masked-with-integer.patch: New file - speed-up-DDG-analysis-and-fix-bootstrap-compare-debug.patch: New file - store-merging-Consider-also-overlapping-stores-earlier.patch: New file - tree-optimization-96920-another-ICE-when-vectorizing.patch: New file - tree-optimization-97812-fix-range-query-in-VRP-asser.patch: New file - vectorizable-comparison-Swap-operands-only-once.patch: New file - x86-Fix-bf16-and-matrix.patch: New file --- AArch64-Fix-constraints-for-CPY-M.patch | 67 + Apply-maximum-nunits-for-BB-SLP.patch | 694 + Fix-EXTRACT_LAST_REDUCTION-segfault.patch | 82 + ...h_partial_def-little-endian-bitfield.patch | 51 + Fix-zero-masking-for-vcvtps2ph.patch | 139 + IRA-Handle-fully-tied-destinations.patch | 155 + SLP-VECT-Add-check-to-fix-96837.patch | 99 + ...h64-Fix-ash-lr-lshr-mode-3-expanders.patch | 165 + ...h64-Fix-bf16-and-matrix-g++-gfortran.patch | 1613 + ...4-Fix-mismatched-SVE-predicate-modes.patch | 34 + aarch64-fix-sve-acle-error.patch | 2128 + ...-move-EXTRACT_LAST_REDUCTION-costing.patch | 88 + avoid-cycling-on-vertain-subreg-reloads.patch | 6 + bf16-and-matrix-characteristic.patch | 466067 +++++++++++++++ change-gcc-BASE-VER.patch | 6 + dont-generate-IF_THEN_ELSE.patch | 6 + fix-ICE-IPA-compare-VRP-types.patch | 51 + fix-ICE-in-affine-combination.patch | 396 + fix-ICE-in-compute_live_loop_exits.patch | 6 + fix-ICE-in-eliminate_stmt.patch | 18 +- fix-ICE-in-pass-vect.patch | 37 + ...-in-vect_create_epilog_for_reduction.patch | 6 + fix-ICE-in-vect_stmt_to_vectorize.patch | 6 + ...in-vect_update_misalignment_for_peel.patch | 784 + fix-ICE-in-verify_ssa.patch | 6 + fix-ICE-when-vectorizing-nested-cycles.patch | 6 + fix-PR-92351-When-peeling-for-alignment.patch | 152 + fix-addlosymdi-ICE-in-pass-reload.patch | 30 + ...n-vect_recog_mask_conversion_pattern.patch | 115 + fix-avx512vl-vcvttpd2dq-2-fail.patch | 301 + fix-cost-of-plus.patch | 3 + fix-issue499-add-nop-convert.patch | 928 + fix-issue604-ldist-dependency-fixup.patch | 108 + fix-when-peeling-for-alignment.patch | 23 - gcc.spec | 181 +- ipa-const-prop-self-recursion-bugfix.patch | 15 +- medium-code-mode.patch | 6 +- ...efully-process-loop-counter-initiali.patch | 251 + ...24-gcc.target-i386-avx512vl-vpshldvd.patch | 215 + ...ction-paths-with-unhandled-live-stmt.patch | 64 + redundant-loop-elimination.patch | 486 + ...handling-of-load-masked-with-integer.patch | 2397 + simplify-removing-subregs.patch | 6 + ...ysis-and-fix-bootstrap-compare-debug.patch | 718 + ...ider-also-overlapping-stores-earlier.patch | 359 + ...n-96920-another-ICE-when-vectorizing.patch | 316 + ...n-97812-fix-range-query-in-VRP-asser.patch | 48 + ...e-comparison-Swap-operands-only-once.patch | 19 + x86-Fix-bf16-and-matrix.patch | 321 + 49 files changed, 479702 insertions(+), 76 deletions(-) create mode 100644 AArch64-Fix-constraints-for-CPY-M.patch create mode 100644 Apply-maximum-nunits-for-BB-SLP.patch create mode 100644 Fix-EXTRACT_LAST_REDUCTION-segfault.patch create mode 100644 Fix-up-push_partial_def-little-endian-bitfield.patch create mode 100644 Fix-zero-masking-for-vcvtps2ph.patch create mode 100644 IRA-Handle-fully-tied-destinations.patch create mode 100644 SLP-VECT-Add-check-to-fix-96837.patch create mode 100644 aarch64-Fix-ash-lr-lshr-mode-3-expanders.patch create mode 100644 aarch64-Fix-bf16-and-matrix-g++-gfortran.patch create mode 100644 aarch64-Fix-mismatched-SVE-predicate-modes.patch create mode 100644 aarch64-fix-sve-acle-error.patch create mode 100644 adjust-vector-cost-and-move-EXTRACT_LAST_REDUCTION-costing.patch create mode 100644 bf16-and-matrix-characteristic.patch create mode 100644 fix-ICE-IPA-compare-VRP-types.patch create mode 100644 fix-ICE-in-affine-combination.patch create mode 100644 fix-ICE-in-pass-vect.patch create mode 100644 fix-ICE-in-vect_update_misalignment_for_peel.patch create mode 100644 fix-PR-92351-When-peeling-for-alignment.patch create mode 100644 fix-addlosymdi-ICE-in-pass-reload.patch create mode 100644 fix-an-ICE-in-vect_recog_mask_conversion_pattern.patch create mode 100644 fix-avx512vl-vcvttpd2dq-2-fail.patch create mode 100644 fix-issue499-add-nop-convert.patch create mode 100644 fix-issue604-ldist-dependency-fixup.patch delete mode 100644 fix-when-peeling-for-alignment.patch create mode 100644 modulo-sched-Carefully-process-loop-counter-initiali.patch create mode 100644 re-PR-target-91124-gcc.target-i386-avx512vl-vpshldvd.patch create mode 100644 reduction-paths-with-unhandled-live-stmt.patch create mode 100644 redundant-loop-elimination.patch create mode 100644 sccvn-Improve-handling-of-load-masked-with-integer.patch create mode 100644 speed-up-DDG-analysis-and-fix-bootstrap-compare-debug.patch create mode 100644 store-merging-Consider-also-overlapping-stores-earlier.patch create mode 100644 tree-optimization-96920-another-ICE-when-vectorizing.patch create mode 100644 tree-optimization-97812-fix-range-query-in-VRP-asser.patch create mode 100644 vectorizable-comparison-Swap-operands-only-once.patch create mode 100644 x86-Fix-bf16-and-matrix.patch diff --git a/AArch64-Fix-constraints-for-CPY-M.patch b/AArch64-Fix-constraints-for-CPY-M.patch new file mode 100644 index 0000000..5fcb38e --- /dev/null +++ b/AArch64-Fix-constraints-for-CPY-M.patch @@ -0,0 +1,67 @@ +This backport contains 1 patch from gcc main stream tree. +The commit id of these patchs list as following in the order of time. + +0001-AArch64-Fix-constraints-for-CPY-M.patch +3c2707f33af46ac145769872b65e25fd0b870903 + +diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md +index cbf29a82e28..59bf4a69507 100644 +--- a/gcc/config/aarch64/aarch64-sve.md ++++ b/gcc/config/aarch64/aarch64-sve.md +@@ -6523,7 +6523,7 @@ + (define_insn "@aarch64_sel_dup" + [(set (match_operand:SVE_FULL 0 "register_operand" "=?w, w, ??w, ?&w, ??&w, ?&w") + (unspec:SVE_FULL +- [(match_operand: 3 "register_operand" "Upa, Upa, Upl, Upl, Upl, Upl") ++ [(match_operand: 3 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") + (vec_duplicate:SVE_FULL + (match_operand: 1 "register_operand" "r, w, r, w, r, w")) + (match_operand:SVE_FULL 2 "aarch64_simd_reg_or_zero" "0, 0, Dz, Dz, w, w")] +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cpy_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cpy_1.c +new file mode 100644 +index 00000000000..1d8f429caeb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cpy_1.c +@@ -0,0 +1,42 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#include ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++/* ++** dup_x0_m: ++** add (x[0-9]+), x0, #?1 ++** mov (p[0-7])\.b, p15\.b ++** mov z0\.d, \2/m, \1 ++** ret ++*/ ++svuint64_t ++dup_x0_m (svuint64_t z0, uint64_t x0) ++{ ++ register svbool_t pg asm ("p15"); ++ asm volatile ("" : "=Upa" (pg)); ++ return svdup_u64_m (z0, pg, x0 + 1); ++} ++ ++/* ++** dup_d1_z: ++** mov (p[0-7])\.b, p15\.b ++** mov z0\.d, \1/m, d1 ++** ret ++*/ ++svfloat64_t ++dup_d1_z (svfloat64_t z0, float64_t d1) ++{ ++ register svbool_t pg asm ("p15"); ++ asm volatile ("" : "=Upa" (pg)); ++ return svdup_f64_m (z0, pg, d1); ++} ++ ++#ifdef __cplusplus ++} ++#endif diff --git a/Apply-maximum-nunits-for-BB-SLP.patch b/Apply-maximum-nunits-for-BB-SLP.patch new file mode 100644 index 0000000..43fc0e0 --- /dev/null +++ b/Apply-maximum-nunits-for-BB-SLP.patch @@ -0,0 +1,694 @@ +This backport contains 1 patch from gcc main stream tree. +The commit id of these patchs list as following in the order of time. + +0001-Apply-maximum-nunits-for-BB-SLP.patch +9b75f56d4b7951c60a656396dddd4a65787b95bc + +diff -Nurp a/gcc/testsuite/gcc.dg/vect/bb-slp-4.c b/gcc/testsuite/gcc.dg/vect/bb-slp-4.c +--- a/gcc/testsuite/gcc.dg/vect/bb-slp-4.c 2020-12-20 18:46:19.539633230 +0800 ++++ b/gcc/testsuite/gcc.dg/vect/bb-slp-4.c 2020-12-20 18:48:12.799633230 +0800 +@@ -38,5 +38,4 @@ int main (void) + return 0; + } + +-/* { dg-final { scan-tree-dump-times "basic block vectorized" 0 "slp2" } } */ +- ++/* { dg-final { scan-tree-dump-times "basic block vectorized" 1 "slp2" } } */ +diff -Nurp a/gcc/testsuite/gcc.dg/vect/bb-slp-bool-1.c b/gcc/testsuite/gcc.dg/vect/bb-slp-bool-1.c +--- a/gcc/testsuite/gcc.dg/vect/bb-slp-bool-1.c 1970-01-01 08:00:00.000000000 +0800 ++++ b/gcc/testsuite/gcc.dg/vect/bb-slp-bool-1.c 2020-12-20 18:48:12.799633230 +0800 +@@ -0,0 +1,44 @@ ++#include "tree-vect.h" ++ ++void __attribute__ ((noipa)) ++f1 (_Bool *x, unsigned short *y) ++{ ++ x[0] = (y[0] == 1); ++ x[1] = (y[1] == 1); ++} ++ ++void __attribute__ ((noipa)) ++f2 (_Bool *x, unsigned short *y) ++{ ++ x[0] = (y[0] == 1); ++ x[1] = (y[1] == 1); ++ x[2] = (y[2] == 1); ++ x[3] = (y[3] == 1); ++ x[4] = (y[4] == 1); ++ x[5] = (y[5] == 1); ++ x[6] = (y[6] == 1); ++ x[7] = (y[7] == 1); ++} ++ ++_Bool x[8]; ++unsigned short y[8] = { 11, 1, 9, 5, 1, 44, 1, 1 }; ++ ++int ++main (void) ++{ ++ check_vect (); ++ ++ f1 (x, y); ++ ++ if (x[0] || !x[1]) ++ __builtin_abort (); ++ ++ x[1] = 0; ++ ++ f2 (x, y); ++ ++ if (x[0] || !x[1] || x[2] | x[3] || !x[4] || x[5] || !x[6] || !x[7]) ++ __builtin_abort (); ++ ++ return 0; ++} +diff -Nurp a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_14.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_14.c +--- a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_14.c 1970-01-01 08:00:00.000000000 +0800 ++++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_14.c 2020-12-20 18:48:11.811633230 +0800 +@@ -0,0 +1,26 @@ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++/* ++** foo: ++** ( ++** ldr d([0-9]+), \[x1\] ++** ldr q([0-9]+), \[x0\] ++** saddw v([0-9]+)\.4s, v\2\.4s, v\1\.4h ++** str q\3, \[x0\] ++** | ++** ldr q([0-9]+), \[x0\] ++** ldr d([0-9]+), \[x1\] ++** saddw v([0-9]+)\.4s, v\4\.4s, v\5\.4h ++** str q\6, \[x0\] ++** ) ++** ret ++*/ ++void ++foo (int *x, short *y) ++{ ++ x[0] += y[0]; ++ x[1] += y[1]; ++ x[2] += y[2]; ++ x[3] += y[3]; ++} +diff -Nurp a/gcc/testsuite/gcc.target/i386/pr84101.c b/gcc/testsuite/gcc.target/i386/pr84101.c +--- a/gcc/testsuite/gcc.target/i386/pr84101.c 2020-12-20 18:46:18.383633230 +0800 ++++ b/gcc/testsuite/gcc.target/i386/pr84101.c 2020-12-20 18:48:11.611633230 +0800 +@@ -18,4 +18,5 @@ uint64_pair_t pair(int num) + return p ; + } + +-/* { dg-final { scan-tree-dump-not "basic block vectorized" "slp2" } } */ ++/* See PR92266 for the XFAIL. */ ++/* { dg-final { scan-tree-dump-not "basic block vectorized" "slp2" { xfail ilp32 } } } */ +diff -Nurp a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c +--- a/gcc/tree-vect-data-refs.c 2020-12-20 18:46:19.911633230 +0800 ++++ b/gcc/tree-vect-data-refs.c 2020-12-20 18:48:11.047633230 +0800 +@@ -4312,9 +4312,8 @@ vect_analyze_data_refs (vec_info *vinfo, + + /* Set vectype for STMT. */ + scalar_type = TREE_TYPE (DR_REF (dr)); +- STMT_VINFO_VECTYPE (stmt_info) +- = get_vectype_for_scalar_type (vinfo, scalar_type); +- if (!STMT_VINFO_VECTYPE (stmt_info)) ++ tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type); ++ if (!vectype) + { + if (dump_enabled_p ()) + { +@@ -4345,14 +4344,19 @@ vect_analyze_data_refs (vec_info *vinfo, + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "got vectype for stmt: %G%T\n", +- stmt_info->stmt, STMT_VINFO_VECTYPE (stmt_info)); ++ stmt_info->stmt, vectype); + } + + /* Adjust the minimal vectorization factor according to the + vector type. */ +- vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info)); ++ vf = TYPE_VECTOR_SUBPARTS (vectype); + *min_vf = upper_bound (*min_vf, vf); + ++ /* Leave the BB vectorizer to pick the vector type later, based on ++ the final dataref group size and SLP node size. */ ++ if (is_a (vinfo)) ++ STMT_VINFO_VECTYPE (stmt_info) = vectype; ++ + if (gatherscatter != SG_NONE) + { + gather_scatter_info gs_info; +diff -Nurp a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c +--- a/gcc/tree-vect-patterns.c 2020-12-20 18:46:19.979633230 +0800 ++++ b/gcc/tree-vect-patterns.c 2020-12-20 18:48:11.227633230 +0800 +@@ -4142,9 +4142,10 @@ vect_recog_bool_pattern (stmt_vec_info s + && STMT_VINFO_DATA_REF (stmt_vinfo)) + { + stmt_vec_info pattern_stmt_info; +- vectype = STMT_VINFO_VECTYPE (stmt_vinfo); +- gcc_assert (vectype != NULL_TREE); +- if (!VECTOR_MODE_P (TYPE_MODE (vectype))) ++ tree nunits_vectype; ++ if (!vect_get_vector_types_for_stmt (stmt_vinfo, &vectype, ++ &nunits_vectype) ++ || !VECTOR_MODE_P (TYPE_MODE (vectype))) + return NULL; + + if (check_bool_pattern (var, vinfo, bool_stmts)) +diff -Nurp a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c +--- a/gcc/tree-vect-slp.c 2020-12-20 18:46:17.763633230 +0800 ++++ b/gcc/tree-vect-slp.c 2020-12-20 18:48:11.227633230 +0800 +@@ -606,6 +606,77 @@ again: + return 0; + } + ++/* Try to assign vector type VECTYPE to STMT_INFO for BB vectorization. ++ Return true if we can, meaning that this choice doesn't conflict with ++ existing SLP nodes that use STMT_INFO. */ ++ ++static bool ++vect_update_shared_vectype (stmt_vec_info stmt_info, tree vectype) ++{ ++ tree old_vectype = STMT_VINFO_VECTYPE (stmt_info); ++ if (old_vectype && useless_type_conversion_p (vectype, old_vectype)) ++ return true; ++ ++ if (STMT_VINFO_GROUPED_ACCESS (stmt_info) ++ && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info))) ++ { ++ /* We maintain the invariant that if any statement in the group is ++ used, all other members of the group have the same vector type. */ ++ stmt_vec_info first_info = DR_GROUP_FIRST_ELEMENT (stmt_info); ++ stmt_vec_info member_info = first_info; ++ for (; member_info; member_info = DR_GROUP_NEXT_ELEMENT (member_info)) ++ if (STMT_VINFO_NUM_SLP_USES (member_info) > 0 ++ || is_pattern_stmt_p (member_info)) ++ break; ++ ++ if (!member_info) ++ { ++ for (member_info = first_info; member_info; ++ member_info = DR_GROUP_NEXT_ELEMENT (member_info)) ++ STMT_VINFO_VECTYPE (member_info) = vectype; ++ return true; ++ } ++ } ++ else if (STMT_VINFO_NUM_SLP_USES (stmt_info) == 0 ++ && !is_pattern_stmt_p (stmt_info)) ++ { ++ STMT_VINFO_VECTYPE (stmt_info) = vectype; ++ return true; ++ } ++ ++ if (dump_enabled_p ()) ++ { ++ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, ++ "Build SLP failed: incompatible vector" ++ " types for: %G", stmt_info->stmt); ++ dump_printf_loc (MSG_NOTE, vect_location, ++ " old vector type: %T\n", old_vectype); ++ dump_printf_loc (MSG_NOTE, vect_location, ++ " new vector type: %T\n", vectype); ++ } ++ return false; ++} ++ ++/* Try to infer and assign a vector type to all the statements in STMTS. ++ Used only for BB vectorization. */ ++ ++static bool ++vect_update_all_shared_vectypes (vec stmts) ++{ ++ tree vectype, nunits_vectype; ++ if (!vect_get_vector_types_for_stmt (stmts[0], &vectype, ++ &nunits_vectype, stmts.length ())) ++ return false; ++ ++ stmt_vec_info stmt_info; ++ unsigned int i; ++ FOR_EACH_VEC_ELT (stmts, i, stmt_info) ++ if (!vect_update_shared_vectype (stmt_info, vectype)) ++ return false; ++ ++ return true; ++} ++ + /* Return true if call statements CALL1 and CALL2 are similar enough + to be combined into the same SLP group. */ + +@@ -751,6 +822,7 @@ vect_build_slp_tree_1 (unsigned char *sw + stmt_vec_info stmt_info; + FOR_EACH_VEC_ELT (stmts, i, stmt_info) + { ++ vec_info *vinfo = stmt_info->vinfo; + gimple *stmt = stmt_info->stmt; + swap[i] = 0; + matches[i] = false; +@@ -784,7 +856,7 @@ vect_build_slp_tree_1 (unsigned char *sw + + tree nunits_vectype; + if (!vect_get_vector_types_for_stmt (stmt_info, &vectype, +- &nunits_vectype) ++ &nunits_vectype, group_size) + || (nunits_vectype + && !vect_record_max_nunits (stmt_info, group_size, + nunits_vectype, max_nunits))) +@@ -796,6 +868,10 @@ vect_build_slp_tree_1 (unsigned char *sw + + gcc_assert (vectype); + ++ if (is_a (vinfo) ++ && !vect_update_shared_vectype (stmt_info, vectype)) ++ continue; ++ + if (gcall *call_stmt = dyn_cast (stmt)) + { + rhs_code = CALL_EXPR; +@@ -1328,7 +1404,8 @@ vect_build_slp_tree_2 (vec_info *vinfo, + FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild) + if (SLP_TREE_DEF_TYPE (grandchild) != vect_external_def) + break; +- if (!grandchild) ++ if (!grandchild ++ && vect_update_all_shared_vectypes (oprnd_info->def_stmts)) + { + /* Roll back. */ + this_tree_size = old_tree_size; +@@ -1369,7 +1446,8 @@ vect_build_slp_tree_2 (vec_info *vinfo, + do extra work to cancel the pattern so the uses see the + scalar version. */ + && !is_pattern_stmt_p (stmt_info) +- && !oprnd_info->any_pattern) ++ && !oprnd_info->any_pattern ++ && vect_update_all_shared_vectypes (oprnd_info->def_stmts)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, +@@ -1488,7 +1566,9 @@ vect_build_slp_tree_2 (vec_info *vinfo, + FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild) + if (SLP_TREE_DEF_TYPE (grandchild) != vect_external_def) + break; +- if (!grandchild) ++ if (!grandchild ++ && (vect_update_all_shared_vectypes ++ (oprnd_info->def_stmts))) + { + /* Roll back. */ + this_tree_size = old_tree_size; +@@ -2026,8 +2106,8 @@ vect_analyze_slp_instance (vec_info *vin + if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) + { + scalar_type = TREE_TYPE (DR_REF (dr)); +- vectype = get_vectype_for_scalar_type (vinfo, scalar_type); + group_size = DR_GROUP_SIZE (stmt_info); ++ vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size); + } + else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info)) + { +@@ -2669,22 +2749,13 @@ vect_slp_analyze_node_operations_1 (vec_ + Memory accesses already got their vector type assigned + in vect_analyze_data_refs. */ + bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); +- if (bb_vinfo +- && ! STMT_VINFO_DATA_REF (stmt_info)) ++ if (bb_vinfo && STMT_VINFO_VECTYPE (stmt_info) == boolean_type_node) + { +- tree vectype, nunits_vectype; +- if (!vect_get_vector_types_for_stmt (stmt_info, &vectype, +- &nunits_vectype)) +- /* We checked this when building the node. */ +- gcc_unreachable (); +- if (vectype == boolean_type_node) +- { +- vectype = vect_get_mask_type_for_stmt (stmt_info); +- if (!vectype) +- /* vect_get_mask_type_for_stmt has already explained the +- failure. */ +- return false; +- } ++ tree vectype = vect_get_mask_type_for_stmt (stmt_info, node); ++ if (!vectype) ++ /* vect_get_mask_type_for_stmt has already explained the ++ failure. */ ++ return false; + + stmt_vec_info sstmt_info; + unsigned int i; +@@ -3585,7 +3656,7 @@ vect_get_constant_vectors (slp_tree op_n + && vect_mask_constant_operand_p (stmt_vinfo)) + vector_type = truth_type_for (stmt_vectype); + else +- vector_type = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op)); ++ vector_type = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), op_node); + + unsigned int number_of_vectors + = vect_get_num_vectors (SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) +diff -Nurp a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c +--- a/gcc/tree-vect-stmts.c 2020-12-20 18:46:17.707633230 +0800 ++++ b/gcc/tree-vect-stmts.c 2020-12-20 18:48:11.227633230 +0800 +@@ -798,7 +798,7 @@ vect_prologue_cost_for_slp_op (slp_tree + /* Without looking at the actual initializer a vector of + constants can be implemented as load from the constant pool. + When all elements are the same we can use a splat. */ +- tree vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op)); ++ tree vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), node); + unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length (); + unsigned num_vects_to_check; + unsigned HOST_WIDE_INT const_nunits; +@@ -3308,7 +3308,7 @@ vectorizable_call (stmt_vec_info stmt_in + /* If all arguments are external or constant defs, infer the vector type + from the scalar type. */ + if (!vectype_in) +- vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type); ++ vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node); + if (vec_stmt) + gcc_assert (vectype_in); + if (!vectype_in) +@@ -4106,7 +4106,8 @@ vectorizable_simd_clone_call (stmt_vec_i + && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR) + { + tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i)); +- arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type); ++ arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type, ++ slp_node); + if (arginfo[i].vectype == NULL + || (simd_clone_subparts (arginfo[i].vectype) + > bestn->simdclone->simdlen)) +@@ -4805,7 +4806,7 @@ vectorizable_conversion (stmt_vec_info s + /* If op0 is an external or constant def, infer the vector type + from the scalar type. */ + if (!vectype_in) +- vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type); ++ vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node); + if (vec_stmt) + gcc_assert (vectype_in); + if (!vectype_in) +@@ -5558,7 +5559,7 @@ vectorizable_shift (stmt_vec_info stmt_i + /* If op0 is an external or constant def, infer the vector type + from the scalar type. */ + if (!vectype) +- vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0)); ++ vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node); + if (vec_stmt) + gcc_assert (vectype); + if (!vectype) +@@ -5656,7 +5657,8 @@ vectorizable_shift (stmt_vec_info stmt_i + "vector/vector shift/rotate found.\n"); + + if (!op1_vectype) +- op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1)); ++ op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1), ++ slp_node); + incompatible_op1_vectype_p + = (op1_vectype == NULL_TREE + || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype), +@@ -6000,7 +6002,8 @@ vectorizable_operation (stmt_vec_info st + vectype = vectype_out; + } + else +- vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0)); ++ vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), ++ slp_node); + } + if (vec_stmt) + gcc_assert (vectype); +@@ -8903,7 +8906,7 @@ vectorizable_load (stmt_vec_info stmt_in + condition operands are supportable using vec_is_simple_use. */ + + static bool +-vect_is_simple_cond (tree cond, vec_info *vinfo, ++vect_is_simple_cond (tree cond, vec_info *vinfo, slp_tree slp_node, + tree *comp_vectype, enum vect_def_type *dts, + tree vectype) + { +@@ -8966,7 +8969,8 @@ vect_is_simple_cond (tree cond, vec_info + scalar_type = build_nonstandard_integer_type + (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))), + TYPE_UNSIGNED (scalar_type)); +- *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type); ++ *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type, ++ slp_node); + } + + return true; +@@ -9073,7 +9077,7 @@ vectorizable_condition (stmt_vec_info st + then_clause = gimple_assign_rhs2 (stmt); + else_clause = gimple_assign_rhs3 (stmt); + +- if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo, ++ if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo, slp_node, + &comp_vectype, &dts[0], slp_node ? NULL : vectype) + || !comp_vectype) + return false; +@@ -9564,7 +9568,8 @@ vectorizable_comparison (stmt_vec_info s + /* Invariant comparison. */ + if (!vectype) + { +- vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1)); ++ vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1), ++ slp_node); + if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits)) + return false; + } +@@ -10322,31 +10327,93 @@ get_related_vectype_for_scalar_type (mac + /* Function get_vectype_for_scalar_type. + + Returns the vector type corresponding to SCALAR_TYPE as supported +- by the target. */ ++ by the target. If GROUP_SIZE is nonzero and we're performing BB ++ vectorization, make sure that the number of elements in the vector ++ is no bigger than GROUP_SIZE. */ + + tree +-get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type) ++get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, ++ unsigned int group_size) + { ++ /* For BB vectorization, we should always have a group size once we've ++ constructed the SLP tree; the only valid uses of zero GROUP_SIZEs ++ are tentative requests during things like early data reference ++ analysis and pattern recognition. */ ++ if (is_a (vinfo)) ++ gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0); ++ else ++ group_size = 0; ++ + tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode, + scalar_type); + if (vectype && vinfo->vector_mode == VOIDmode) + vinfo->vector_mode = TYPE_MODE (vectype); + ++ /* Register the natural choice of vector type, before the group size ++ has been applied. */ + if (vectype) + vinfo->used_vector_modes.add (TYPE_MODE (vectype)); + ++ /* If the natural choice of vector type doesn't satisfy GROUP_SIZE, ++ try again with an explicit number of elements. */ ++ if (vectype ++ && group_size ++ && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size)) ++ { ++ /* Start with the biggest number of units that fits within ++ GROUP_SIZE and halve it until we find a valid vector type. ++ Usually either the first attempt will succeed or all will ++ fail (in the latter case because GROUP_SIZE is too small ++ for the target), but it's possible that a target could have ++ a hole between supported vector types. ++ ++ If GROUP_SIZE is not a power of 2, this has the effect of ++ trying the largest power of 2 that fits within the group, ++ even though the group is not a multiple of that vector size. ++ The BB vectorizer will then try to carve up the group into ++ smaller pieces. */ ++ unsigned int nunits = 1 << floor_log2 (group_size); ++ do ++ { ++ vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode, ++ scalar_type, nunits); ++ nunits /= 2; ++ } ++ while (nunits > 1 && !vectype); ++ } ++ + return vectype; + } + ++/* Return the vector type corresponding to SCALAR_TYPE as supported ++ by the target. NODE, if nonnull, is the SLP tree node that will ++ use the returned vector type. */ ++ ++tree ++get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node) ++{ ++ unsigned int group_size = 0; ++ if (node) ++ { ++ group_size = SLP_TREE_SCALAR_OPS (node).length (); ++ if (group_size == 0) ++ group_size = SLP_TREE_SCALAR_STMTS (node).length (); ++ } ++ return get_vectype_for_scalar_type (vinfo, scalar_type, group_size); ++} ++ + /* Function get_mask_type_for_scalar_type. + + Returns the mask type corresponding to a result of comparison +- of vectors of specified SCALAR_TYPE as supported by target. */ ++ of vectors of specified SCALAR_TYPE as supported by target. ++ NODE, if nonnull, is the SLP tree node that will use the returned ++ vector type. */ + + tree +-get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type) ++get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type, ++ slp_tree node) + { +- tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type); ++ tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, node); + + if (!vectype) + return NULL; +@@ -11033,6 +11100,9 @@ vect_gen_while_not (gimple_seq *seq, tre + + /* Try to compute the vector types required to vectorize STMT_INFO, + returning true on success and false if vectorization isn't possible. ++ If GROUP_SIZE is nonzero and we're performing BB vectorization, ++ take sure that the number of elements in the vectors is no bigger ++ than GROUP_SIZE. + + On success: + +@@ -11050,11 +11120,21 @@ vect_gen_while_not (gimple_seq *seq, tre + opt_result + vect_get_vector_types_for_stmt (stmt_vec_info stmt_info, + tree *stmt_vectype_out, +- tree *nunits_vectype_out) ++ tree *nunits_vectype_out, ++ unsigned int group_size) + { + vec_info *vinfo = stmt_info->vinfo; + gimple *stmt = stmt_info->stmt; + ++ /* For BB vectorization, we should always have a group size once we've ++ constructed the SLP tree; the only valid uses of zero GROUP_SIZEs ++ are tentative requests during things like early data reference ++ analysis and pattern recognition. */ ++ if (is_a (vinfo)) ++ gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0); ++ else ++ group_size = 0; ++ + *stmt_vectype_out = NULL_TREE; + *nunits_vectype_out = NULL_TREE; + +@@ -11085,7 +11165,7 @@ vect_get_vector_types_for_stmt (stmt_vec + + tree vectype; + tree scalar_type = NULL_TREE; +- if (STMT_VINFO_VECTYPE (stmt_info)) ++ if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info)) + { + *stmt_vectype_out = vectype = STMT_VINFO_VECTYPE (stmt_info); + if (dump_enabled_p ()) +@@ -11094,15 +11174,17 @@ vect_get_vector_types_for_stmt (stmt_vec + } + else + { +- gcc_assert (!STMT_VINFO_DATA_REF (stmt_info)); +- if (gimple_call_internal_p (stmt, IFN_MASK_STORE)) ++ if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info)) ++ scalar_type = TREE_TYPE (DR_REF (dr)); ++ else if (gimple_call_internal_p (stmt, IFN_MASK_STORE)) + scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3)); + else + scalar_type = TREE_TYPE (gimple_get_lhs (stmt)); + + /* Pure bool ops don't participate in number-of-units computation. + For comparisons use the types being compared. */ +- if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type) ++ if (!STMT_VINFO_DATA_REF (stmt_info) ++ && VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type) + && is_gimple_assign (stmt) + && gimple_assign_rhs_code (stmt) != COND_EXPR) + { +@@ -11122,9 +11204,16 @@ vect_get_vector_types_for_stmt (stmt_vec + } + + if (dump_enabled_p ()) +- dump_printf_loc (MSG_NOTE, vect_location, +- "get vectype for scalar type: %T\n", scalar_type); +- vectype = get_vectype_for_scalar_type (vinfo, scalar_type); ++ { ++ if (group_size) ++ dump_printf_loc (MSG_NOTE, vect_location, ++ "get vectype for scalar type (group size %d):" ++ " %T\n", group_size, scalar_type); ++ else ++ dump_printf_loc (MSG_NOTE, vect_location, ++ "get vectype for scalar type: %T\n", scalar_type); ++ } ++ vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size); + if (!vectype) + return opt_result::failure_at (stmt, + "not vectorized:" +@@ -11155,7 +11244,8 @@ vect_get_vector_types_for_stmt (stmt_vec + dump_printf_loc (MSG_NOTE, vect_location, + "get vectype for smallest scalar type: %T\n", + scalar_type); +- nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type); ++ nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type, ++ group_size); + if (!nunits_vectype) + return opt_result::failure_at + (stmt, "not vectorized: unsupported data-type %T\n", +@@ -11183,10 +11273,11 @@ vect_get_vector_types_for_stmt (stmt_vec + + /* Try to determine the correct vector type for STMT_INFO, which is a + statement that produces a scalar boolean result. Return the vector +- type on success, otherwise return NULL_TREE. */ ++ type on success, otherwise return NULL_TREE. NODE, if nonnull, ++ is the SLP tree node that will use the returned vector type. */ + + opt_tree +-vect_get_mask_type_for_stmt (stmt_vec_info stmt_info) ++vect_get_mask_type_for_stmt (stmt_vec_info stmt_info, slp_tree node) + { + vec_info *vinfo = stmt_info->vinfo; + gimple *stmt = stmt_info->stmt; +@@ -11198,7 +11289,7 @@ vect_get_mask_type_for_stmt (stmt_vec_in + && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt)))) + { + scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt)); +- mask_type = get_mask_type_for_scalar_type (vinfo, scalar_type); ++ mask_type = get_mask_type_for_scalar_type (vinfo, scalar_type, node); + + if (!mask_type) + return opt_tree::failure_at (stmt, +diff -Nurp a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h +--- a/gcc/tree-vectorizer.h 2020-12-20 18:46:17.851633230 +0800 ++++ b/gcc/tree-vectorizer.h 2020-12-20 18:48:11.227633230 +0800 +@@ -1618,8 +1618,9 @@ extern void vect_update_inits_of_drs (lo + /* In tree-vect-stmts.c. */ + extern tree get_related_vectype_for_scalar_type (machine_mode, tree, + poly_uint64 = 0); +-extern tree get_vectype_for_scalar_type (vec_info *, tree); +-extern tree get_mask_type_for_scalar_type (vec_info *, tree); ++extern tree get_vectype_for_scalar_type (vec_info *, tree, unsigned int = 0); ++extern tree get_vectype_for_scalar_type (vec_info *, tree, slp_tree); ++extern tree get_mask_type_for_scalar_type (vec_info *, tree, slp_tree = 0); + extern tree get_same_sized_vectype (tree, tree); + extern bool vect_chooses_same_modes_p (vec_info *, machine_mode); + extern bool vect_get_loop_mask_type (loop_vec_info); +@@ -1671,8 +1672,8 @@ extern void optimize_mask_stores (struct + extern gcall *vect_gen_while (tree, tree, tree); + extern tree vect_gen_while_not (gimple_seq *, tree, tree, tree); + extern opt_result vect_get_vector_types_for_stmt (stmt_vec_info, tree *, +- tree *); +-extern opt_tree vect_get_mask_type_for_stmt (stmt_vec_info); ++ tree *, unsigned int = 0); ++extern opt_tree vect_get_mask_type_for_stmt (stmt_vec_info, slp_tree = 0); + + /* In tree-vect-data-refs.c. */ + extern bool vect_can_force_dr_alignment_p (const_tree, poly_uint64); diff --git a/Fix-EXTRACT_LAST_REDUCTION-segfault.patch b/Fix-EXTRACT_LAST_REDUCTION-segfault.patch new file mode 100644 index 0000000..aa3b320 --- /dev/null +++ b/Fix-EXTRACT_LAST_REDUCTION-segfault.patch @@ -0,0 +1,82 @@ +This backport contains 2 patch from gcc main stream tree. +The commit id of these patchs list as following in the order of time. + +0001-Fix-EXTRACT_LAST_REDUCTION-handling-of-pattern-stmts.patch +9ec35478ccf0f3539988a054b7996278706a7710 + +0001-Fix-EXTRACT_LAST_REDUCTION-segfault.patch +dc176c3ccd6a8cd3f809f3c1549ad00674061eb5 + +diff -Nurp a/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-6.c b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-6.c +--- a/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-6.c 1969-12-31 19:00:00.000000000 -0500 ++++ b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-6.c 2020-12-14 21:16:26.492000000 -0500 +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++ ++int ++f (int *y) ++{ ++ int res = 0; ++ for (int i = 0; i < 100; ++i) ++ res = (y[i] & 1) == 0 && (y[i] < 10) ? res : 1; ++ return res; ++} +diff -Nurp a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c +--- a/gcc/tree-vect-stmts.c 2020-12-14 21:15:27.004000000 -0500 ++++ b/gcc/tree-vect-stmts.c 2020-12-14 21:16:26.492000000 -0500 +@@ -1777,9 +1777,10 @@ vect_finish_stmt_generation_1 (stmt_vec_ + stmt_vec_info + vect_finish_replace_stmt (stmt_vec_info stmt_info, gimple *vec_stmt) + { +- gcc_assert (gimple_get_lhs (stmt_info->stmt) == gimple_get_lhs (vec_stmt)); ++ gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt; ++ gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt)); + +- gimple_stmt_iterator gsi = gsi_for_stmt (stmt_info->stmt); ++ gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt); + gsi_replace (&gsi, vec_stmt, true); + + return vect_finish_stmt_generation_1 (stmt_info, vec_stmt); +@@ -9118,10 +9119,12 @@ vectorizable_condition (stmt_vec_info st + if (new_code == ERROR_MARK) + must_invert_cmp_result = true; + else +- cond_code = new_code; ++ { ++ cond_code = new_code; ++ /* Make sure we don't accidentally use the old condition. */ ++ cond_expr = NULL_TREE; ++ } + } +- /* Make sure we don't accidentally use the old condition. */ +- cond_expr = NULL_TREE; + std::swap (then_clause, else_clause); + } + +@@ -9426,20 +9429,21 @@ vectorizable_condition (stmt_vec_info st + vect_finish_stmt_generation (stmt_info, new_stmt, gsi); + vec_compare = vec_compare_name; + } ++ gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt; ++ tree lhs = gimple_get_lhs (old_stmt); + gcall *new_stmt = gimple_build_call_internal + (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare, + vec_then_clause); +- gimple_call_set_lhs (new_stmt, scalar_dest); +- SSA_NAME_DEF_STMT (scalar_dest) = new_stmt; +- if (stmt_info->stmt == gsi_stmt (*gsi)) ++ gimple_call_set_lhs (new_stmt, lhs); ++ SSA_NAME_DEF_STMT (lhs) = new_stmt; ++ if (old_stmt == gsi_stmt (*gsi)) + new_stmt_info = vect_finish_replace_stmt (stmt_info, new_stmt); + else + { + /* In this case we're moving the definition to later in the + block. That doesn't matter because the only uses of the + lhs are in phi statements. */ +- gimple_stmt_iterator old_gsi +- = gsi_for_stmt (stmt_info->stmt); ++ gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt); + gsi_remove (&old_gsi, true); + new_stmt_info + = vect_finish_stmt_generation (stmt_info, new_stmt, gsi); diff --git a/Fix-up-push_partial_def-little-endian-bitfield.patch b/Fix-up-push_partial_def-little-endian-bitfield.patch new file mode 100644 index 0000000..b707a36 --- /dev/null +++ b/Fix-up-push_partial_def-little-endian-bitfield.patch @@ -0,0 +1,51 @@ +This backport contains 1 patch from gcc main stream tree. +The commit id of these patchs list as following in the order of time. + +c69325a5db450dbac198f76f1162734af05a1061 +0001-sccvn-Fix-up-push_partial_def-little-endian-bitfield.patch + +diff -urpN a/gcc/testsuite/gcc.c-torture/execute/pr97764.c b/gcc/testsuite/gcc.c-torture/execute/pr97764.c +--- a/gcc/testsuite/gcc.c-torture/execute/pr97764.c 1969-12-31 19:00:00.000000000 -0500 ++++ b/gcc/testsuite/gcc.c-torture/execute/pr97764.c 2020-12-07 03:42:13.404000000 -0500 +@@ -0,0 +1,14 @@ ++/* PR tree-optimization/97764 */ ++/* { dg-require-effective-target int32plus } */ ++ ++struct S { int b : 3; int c : 28; int d : 1; }; ++ ++int ++main () ++{ ++ struct S e = {}; ++ e.c = -1; ++ if (e.d) ++ __builtin_abort (); ++ return 0; ++} +diff -urpN a/gcc/tree-ssa-sccvn.c b/gcc/tree-ssa-sccvn.c +--- a/gcc/tree-ssa-sccvn.c 2020-12-07 03:43:37.792000000 -0500 ++++ b/gcc/tree-ssa-sccvn.c 2020-12-07 03:42:13.404000000 -0500 +@@ -2013,12 +2013,12 @@ vn_walk_cb_data::push_partial_def (const + } + else + { +- size = MIN (size, (HOST_WIDE_INT) needed_len * BITS_PER_UNIT); + if (pd.offset >= 0) + { + /* LSB of this_buffer[0] byte should be at pd.offset bits + in buffer. */ + unsigned int msk; ++ size = MIN (size, (HOST_WIDE_INT) needed_len * BITS_PER_UNIT); + amnt = pd.offset % BITS_PER_UNIT; + if (amnt) + shift_bytes_in_array_left (this_buffer, len + 1, amnt); +@@ -2046,6 +2046,9 @@ vn_walk_cb_data::push_partial_def (const + { + amnt = (unsigned HOST_WIDE_INT) pd.offset % BITS_PER_UNIT; + if (amnt) ++ size -= BITS_PER_UNIT - amnt; ++ size = MIN (size, (HOST_WIDE_INT) needed_len * BITS_PER_UNIT); ++ if (amnt) + shift_bytes_in_array_left (this_buffer, len + 1, amnt); + } + memcpy (p, this_buffer + (amnt != 0), size / BITS_PER_UNIT); diff --git a/Fix-zero-masking-for-vcvtps2ph.patch b/Fix-zero-masking-for-vcvtps2ph.patch new file mode 100644 index 0000000..df8c5a8 --- /dev/null +++ b/Fix-zero-masking-for-vcvtps2ph.patch @@ -0,0 +1,139 @@ +This backport contains 1 patch from gcc main stream tree. +The commit id of these patchs list as following in the order of time. + +0001-Fix-zero-masking-for-vcvtps2ph-when-dest-operand-is-.patch +43088bb4dadd3d14b6b594c5f9363fe879f3d7f7 + +diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md +index 87354451c58..7815d77bcbf 100644 +--- a/gcc/config/i386/sse.md ++++ b/gcc/config/i386/sse.md +@@ -21775,19 +21775,19 @@ + (set_attr "prefix" "maybe_evex") + (set_attr "mode" "V4SF")]) + +-(define_insn "*vcvtps2ph_store" ++(define_insn "*vcvtps2ph_store" + [(set (match_operand:V4HI 0 "memory_operand" "=m") + (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v") + (match_operand:SI 2 "const_0_to_255_operand" "N")] + UNSPEC_VCVTPS2PH))] + "TARGET_F16C || TARGET_AVX512VL" +- "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}" ++ "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "maybe_evex") + (set_attr "mode" "V4SF")]) + + (define_insn "vcvtps2ph256" +- [(set (match_operand:V8HI 0 "nonimmediate_operand" "=vm") ++ [(set (match_operand:V8HI 0 "register_operand" "=v") + (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "v") + (match_operand:SI 2 "const_0_to_255_operand" "N")] + UNSPEC_VCVTPS2PH))] +@@ -21798,8 +21798,20 @@ + (set_attr "btver2_decode" "vector") + (set_attr "mode" "V8SF")]) + ++(define_insn "*vcvtps2ph256" ++ [(set (match_operand:V8HI 0 "memory_operand" "=m") ++ (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "v") ++ (match_operand:SI 2 "const_0_to_255_operand" "N")] ++ UNSPEC_VCVTPS2PH))] ++ "TARGET_F16C || TARGET_AVX512VL" ++ "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}" ++ [(set_attr "type" "ssecvt") ++ (set_attr "prefix" "maybe_evex") ++ (set_attr "btver2_decode" "vector") ++ (set_attr "mode" "V8SF")]) ++ + (define_insn "avx512f_vcvtps2ph512" +- [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm") ++ [(set (match_operand:V16HI 0 "register_operand" "=v") + (unspec:V16HI + [(match_operand:V16SF 1 "register_operand" "v") + (match_operand:SI 2 "const_0_to_255_operand" "N")] +@@ -21810,6 +21822,18 @@ + (set_attr "prefix" "evex") + (set_attr "mode" "V16SF")]) + ++(define_insn "*avx512f_vcvtps2ph512" ++ [(set (match_operand:V16HI 0 "memory_operand" "=m") ++ (unspec:V16HI ++ [(match_operand:V16SF 1 "register_operand" "v") ++ (match_operand:SI 2 "const_0_to_255_operand" "N")] ++ UNSPEC_VCVTPS2PH))] ++ "TARGET_AVX512F" ++ "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}" ++ [(set_attr "type" "ssecvt") ++ (set_attr "prefix" "evex") ++ (set_attr "mode" "V16SF")]) ++ + ;; For gather* insn patterns + (define_mode_iterator VEC_GATHER_MODE + [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF]) +diff --git a/gcc/config/i386/subst.md b/gcc/config/i386/subst.md +index a5ca144c7f7..58ea9dc83e2 100644 +--- a/gcc/config/i386/subst.md ++++ b/gcc/config/i386/subst.md +@@ -73,6 +73,18 @@ + (match_operand:SUBST_V 2 "nonimm_or_0_operand" "0C") + (match_operand: 3 "register_operand" "Yk")))]) + ++(define_subst_attr "merge_mask_name" "merge_mask" "" "_merge_mask") ++(define_subst_attr "merge_mask_operand3" "merge_mask" "" "%{%3%}") ++(define_subst "merge_mask" ++ [(set (match_operand:SUBST_V 0) ++ (match_operand:SUBST_V 1))] ++ "TARGET_AVX512F" ++ [(set (match_dup 0) ++ (vec_merge:SUBST_V ++ (match_dup 1) ++ (match_dup 0) ++ (match_operand: 2 "register_operand" "Yk")))]) ++ + (define_subst_attr "mask_scalar_merge_name" "mask_scalar_merge" "" "_mask") + (define_subst_attr "mask_scalar_merge_operand3" "mask_scalar_merge" "" "%{%3%}") + (define_subst_attr "mask_scalar_merge_operand4" "mask_scalar_merge" "" "%{%4%}") +diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtps2ph-pr95254.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtps2ph-pr95254.c +new file mode 100644 +index 00000000000..9e0da947368 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtps2ph-pr95254.c +@@ -0,0 +1,12 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mavx512f" } */ ++ ++#include ++extern __m256i res; ++void ++foo (__m512 a, __mmask16 m) ++{ ++ res = _mm512_maskz_cvtps_ph (m, a, 10); ++} ++ ++/* { dg-final { scan-assembler-not "vcvtps2ph\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]\[^\n\]*res\[^\n\]*\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)"} } */ +diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr95254.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr95254.c +new file mode 100644 +index 00000000000..0c685ea66fd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr95254.c +@@ -0,0 +1,18 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mavx512vl -mavx512f" } */ ++ ++#include ++extern __m128i res; ++void ++foo (__m256 a, __mmask8 m) ++{ ++ res = _mm256_maskz_cvtps_ph (m, a, 10); ++} ++ ++void ++foo1 (__m128 a, __mmask8 m) ++{ ++ res = _mm_maskz_cvtps_ph (m, a, 10); ++} ++ ++/* { dg-final { scan-assembler-not "vcvtps2ph\[ \\t\]+\[^\{\n\]*%\[xy\]mm\[0-9\]\[^\n\]*res\[^\n\]*\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)"} } */ diff --git a/IRA-Handle-fully-tied-destinations.patch b/IRA-Handle-fully-tied-destinations.patch new file mode 100644 index 0000000..ad181cd --- /dev/null +++ b/IRA-Handle-fully-tied-destinations.patch @@ -0,0 +1,155 @@ +This backport contains 1 patch from gcc main stream tree. +The commit id of these patchs list as following in the order of time. + +0001-IRA-Handle-fully-tied-destinations-in-a-similar-way-.patch +9b0365879b3c4917f5a2485a1fca8bb678484bfe + +diff --git a/gcc/ira-lives.c b/gcc/ira-lives.c +index cce73a1c3d4..098b0e73953 100644 +--- a/gcc/ira-lives.c ++++ b/gcc/ira-lives.c +@@ -633,9 +633,28 @@ check_and_make_def_use_conflict (rtx dreg, rtx orig_dreg, + + /* Check and make if necessary conflicts for definition DEF of class + DEF_CL of the current insn with input operands. Process only +- constraints of alternative ALT. */ ++ constraints of alternative ALT. ++ ++ One of three things is true when this function is called: ++ ++ (1) DEF is an earlyclobber for alternative ALT. Input operands then ++ conflict with DEF in ALT unless they explicitly match DEF via 0-9 ++ constraints. ++ ++ (2) DEF matches (via 0-9 constraints) an operand that is an ++ earlyclobber for alternative ALT. Other input operands then ++ conflict with DEF in ALT. ++ ++ (3) [FOR_TIE_P] Some input operand X matches DEF for alternative ALT. ++ Input operands with a different value from X then conflict with ++ DEF in ALT. ++ ++ However, there's still a judgement call to make when deciding ++ whether a conflict in ALT is important enough to be reflected ++ in the pan-alternative allocno conflict set. */ + static void +-check_and_make_def_conflict (int alt, int def, enum reg_class def_cl) ++check_and_make_def_conflict (int alt, int def, enum reg_class def_cl, ++ bool for_tie_p) + { + int use, use_match; + ira_allocno_t a; +@@ -669,14 +688,40 @@ check_and_make_def_conflict (int alt, int def, enum reg_class def_cl) + if (use == def || recog_data.operand_type[use] == OP_OUT) + continue; + ++ /* An earlyclobber on DEF doesn't apply to an input operand X if X ++ explicitly matches DEF, but it applies to other input operands ++ even if they happen to be the same value as X. ++ ++ In contrast, if an input operand X is tied to a non-earlyclobber ++ DEF, there's no conflict with other input operands that have the ++ same value as X. */ ++ if (op_alt[use].matches == def ++ || (for_tie_p ++ && rtx_equal_p (recog_data.operand[use], ++ recog_data.operand[op_alt[def].matched]))) ++ continue; ++ + if (op_alt[use].anything_ok) + use_cl = ALL_REGS; + else + use_cl = op_alt[use].cl; ++ if (use_cl == NO_REGS) ++ continue; ++ ++ /* If DEF is simply a tied operand, ignore cases in which this ++ alternative requires USE to have a likely-spilled class. ++ Adding a conflict would just constrain USE further if DEF ++ happens to be allocated first. */ ++ if (for_tie_p && targetm.class_likely_spilled_p (use_cl)) ++ continue; + + /* If there's any alternative that allows USE to match DEF, do not + record a conflict. If that causes us to create an invalid +- instruction due to the earlyclobber, reload must fix it up. */ ++ instruction due to the earlyclobber, reload must fix it up. ++ ++ Likewise, if we're treating a tied DEF like a partial earlyclobber, ++ do not record a conflict if there's another alternative in which ++ DEF is neither tied nor earlyclobber. */ + for (alt1 = 0; alt1 < recog_data.n_alternatives; alt1++) + { + if (!TEST_BIT (preferred_alternatives, alt1)) +@@ -691,6 +736,12 @@ check_and_make_def_conflict (int alt, int def, enum reg_class def_cl) + && recog_data.constraints[use - 1][0] == '%' + && op_alt1[use - 1].matches == def)) + break; ++ if (for_tie_p ++ && !op_alt1[def].earlyclobber ++ && op_alt1[def].matched < 0 ++ && alternative_class (op_alt1, def) != NO_REGS ++ && alternative_class (op_alt1, use) != NO_REGS) ++ break; + } + + if (alt1 < recog_data.n_alternatives) +@@ -701,8 +752,7 @@ check_and_make_def_conflict (int alt, int def, enum reg_class def_cl) + + if ((use_match = op_alt[use].matches) >= 0) + { +- if (use_match == def) +- continue; ++ gcc_checking_assert (use_match != def); + + if (op_alt[use_match].anything_ok) + use_cl = ALL_REGS; +@@ -717,7 +767,11 @@ check_and_make_def_conflict (int alt, int def, enum reg_class def_cl) + /* Make conflicts of early clobber pseudo registers of the current + insn with its inputs. Avoid introducing unnecessary conflicts by + checking classes of the constraints and pseudos because otherwise +- significant code degradation is possible for some targets. */ ++ significant code degradation is possible for some targets. ++ ++ For these purposes, tying an input to an output makes that output act ++ like an earlyclobber for inputs with a different value, since the output ++ register then has a predetermined purpose on input to the instruction. */ + static void + make_early_clobber_and_input_conflicts (void) + { +@@ -732,15 +786,19 @@ make_early_clobber_and_input_conflicts (void) + if (TEST_BIT (preferred_alternatives, alt)) + for (def = 0; def < n_operands; def++) + { +- def_cl = NO_REGS; +- if (op_alt[def].earlyclobber) ++ if (op_alt[def].anything_ok) ++ def_cl = ALL_REGS; ++ else ++ def_cl = op_alt[def].cl; ++ if (def_cl != NO_REGS) + { +- if (op_alt[def].anything_ok) +- def_cl = ALL_REGS; +- else +- def_cl = op_alt[def].cl; +- check_and_make_def_conflict (alt, def, def_cl); ++ if (op_alt[def].earlyclobber) ++ check_and_make_def_conflict (alt, def, def_cl, false); ++ else if (op_alt[def].matched >= 0 ++ && !targetm.class_likely_spilled_p (def_cl)) ++ check_and_make_def_conflict (alt, def, def_cl, true); + } ++ + if ((def_match = op_alt[def].matches) >= 0 + && (op_alt[def_match].earlyclobber + || op_alt[def].earlyclobber)) +@@ -749,7 +807,7 @@ make_early_clobber_and_input_conflicts (void) + def_cl = ALL_REGS; + else + def_cl = op_alt[def_match].cl; +- check_and_make_def_conflict (alt, def, def_cl); ++ check_and_make_def_conflict (alt, def, def_cl, false); + } + } + } diff --git a/SLP-VECT-Add-check-to-fix-96837.patch b/SLP-VECT-Add-check-to-fix-96837.patch new file mode 100644 index 0000000..bfc60bc --- /dev/null +++ b/SLP-VECT-Add-check-to-fix-96837.patch @@ -0,0 +1,99 @@ +This backport contains 2 patchs from gcc main stream tree. +The commit id of these patchs list as following in the order of time. + +97b798d80baf945ea28236eef3fa69f36626b579 +0001-SLP-VECT-Add-check-to-fix-96837.patch + +373b99dc40949efa697326f378e5022a02e0328b +0002-Add-a-testcase-for-PR-target-96827.patch + +diff -uprN a/gcc/testsuite/gcc.dg/vect/bb-slp-49.c b/gcc/testsuite/gcc.dg/vect/bb-slp-49.c +--- a/gcc/testsuite/gcc.dg/vect/bb-slp-49.c 1970-01-01 08:00:00.000000000 +0800 ++++ b/gcc/testsuite/gcc.dg/vect/bb-slp-49.c 2020-11-17 15:58:12.118126065 +0800 +@@ -0,0 +1,28 @@ ++/* This checks that vectorized constructors have the correct ordering. */ ++/* { dg-require-effective-target vect_int } */ ++ ++typedef int V __attribute__((__vector_size__(16))); ++ ++__attribute__((__noipa__)) void ++foo (unsigned int x, V *y) ++{ ++ unsigned int a[4] = { x + 0, x + 2, x + 4, x + 6 }; ++ for (unsigned int i = 0; i < 3; ++i) ++ if (a[i] == 1234) ++ a[i]--; ++ *y = (V) { a[3], a[2], a[1], a[0] }; ++} ++ ++int ++main () ++{ ++ V b; ++ foo (0, &b); ++ if (b[0] != 6 || b[1] != 4 || b[2] != 2 || b[3] != 0) ++ __builtin_abort (); ++ return 0; ++} ++ ++/* See that we vectorize an SLP instance. */ ++/* { dg-final { scan-tree-dump "Analyzing vectorizable constructor" "slp1" } } */ ++/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "slp1" } } */ +diff -uprN a/gcc/testsuite/gcc.target/i386/pr96827.c b/gcc/testsuite/gcc.target/i386/pr96827.c +--- a/gcc/testsuite/gcc.target/i386/pr96827.c 1970-01-01 08:00:00.000000000 +0800 ++++ b/gcc/testsuite/gcc.target/i386/pr96827.c 2020-11-17 15:58:15.182126065 +0800 +@@ -0,0 +1,41 @@ ++/* { dg-do run { target sse2_runtime } } */ ++/* { dg-options "-O3 -msse2 -mfpmath=sse" } */ ++ ++typedef unsigned short int __uint16_t; ++typedef unsigned int __uint32_t; ++typedef __uint16_t uint16_t; ++typedef __uint32_t uint32_t; ++typedef int __v4si __attribute__ ((__vector_size__ (16))); ++typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__)); ++extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++_mm_store_si128 (__m128i *__P, __m128i __B) ++{ ++ *__P = __B; ++} ++extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) ++_mm_set_epi32 (int __q3, int __q2, int __q1, int __q0) ++{ ++ return __extension__ (__m128i)(__v4si){ __q0, __q1, __q2, __q3 }; ++} ++typedef uint16_t u16; ++typedef uint32_t u32; ++extern int printf (const char *__restrict __format, ...); ++void do_the_thing(u32 idx, __m128i *dude) ++{ ++ u32 dude_[4] = { idx+0, idx+2, idx+4, idx+6 }; ++ for (u32 i = 0; i < 3; ++i) ++ if (dude_[i] == 1234) ++ dude_[i]--; ++ *dude = _mm_set_epi32(dude_[0], dude_[1], dude_[2], dude_[3]); ++} ++int main() ++{ ++ __m128i dude; ++ u32 idx = 0; ++ do_the_thing(idx, &dude); ++ __attribute__((aligned(16))) u32 dude_[4]; ++ _mm_store_si128((__m128i*)dude_, dude); ++ if (!(6 == dude_[0] && 4 == dude_[1] && 2 == dude_[2] && 0 == dude_[3])) ++ __builtin_abort (); ++ return 0; ++} +diff -uprN a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c +--- a/gcc/tree-vect-slp.c 2020-11-17 15:55:57.098126065 +0800 ++++ b/gcc/tree-vect-slp.c 2020-11-17 15:59:25.862126065 +0800 +@@ -1842,7 +1842,8 @@ vect_supported_load_permutation_p (slp_i + /* Reduction (there are no data-refs in the root). + In reduction chain the order of the loads is not important. */ + if (!STMT_VINFO_DATA_REF (stmt_info) +- && !REDUC_GROUP_FIRST_ELEMENT (stmt_info)) ++ && !REDUC_GROUP_FIRST_ELEMENT (stmt_info) ++ && !SLP_INSTANCE_ROOT_STMT (slp_instn)) + vect_attempt_slp_rearrange_stmts (slp_instn); + + /* In basic block vectorization we allow any subchain of an interleaving diff --git a/aarch64-Fix-ash-lr-lshr-mode-3-expanders.patch b/aarch64-Fix-ash-lr-lshr-mode-3-expanders.patch new file mode 100644 index 0000000..e28c8a6 --- /dev/null +++ b/aarch64-Fix-ash-lr-lshr-mode-3-expanders.patch @@ -0,0 +1,165 @@ +This backport contains 1 patch from gcc main stream tree. +The commit id of these patchs list as following in the order of time. + +7a6588fe65432c0f1a8b5fdefba81700ebf88711 +0001-aarch64-Fix-ash-lr-lshr-mode-3-expanders-PR94488.patch + +diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md +index 24a11fb5040..9f0e2bd1e6f 100644 +--- a/gcc/config/aarch64/aarch64-simd.md ++++ b/gcc/config/aarch64/aarch64-simd.md +@@ -1105,31 +1105,17 @@ + tmp)); + DONE; + } +- else +- { +- operands[2] = force_reg (SImode, operands[2]); +- } +- } +- else if (MEM_P (operands[2])) +- { +- operands[2] = force_reg (SImode, operands[2]); + } + +- if (REG_P (operands[2])) +- { +- rtx tmp = gen_reg_rtx (mode); +- emit_insn (gen_aarch64_simd_dup (tmp, +- convert_to_mode (mode, +- operands[2], +- 0))); +- emit_insn (gen_aarch64_simd_reg_sshl (operands[0], operands[1], +- tmp)); +- DONE; +- } +- else +- FAIL; +-} +-) ++ operands[2] = force_reg (SImode, operands[2]); ++ ++ rtx tmp = gen_reg_rtx (mode); ++ emit_insn (gen_aarch64_simd_dup (tmp, convert_to_mode (mode, ++ operands[2], ++ 0))); ++ emit_insn (gen_aarch64_simd_reg_sshl (operands[0], operands[1], tmp)); ++ DONE; ++}) + + (define_expand "lshr3" + [(match_operand:VDQ_I 0 "register_operand") +@@ -1152,31 +1138,19 @@ + tmp)); + DONE; + } +- else +- operands[2] = force_reg (SImode, operands[2]); +- } +- else if (MEM_P (operands[2])) +- { +- operands[2] = force_reg (SImode, operands[2]); + } + +- if (REG_P (operands[2])) +- { +- rtx tmp = gen_reg_rtx (SImode); +- rtx tmp1 = gen_reg_rtx (mode); +- emit_insn (gen_negsi2 (tmp, operands[2])); +- emit_insn (gen_aarch64_simd_dup (tmp1, +- convert_to_mode (mode, +- tmp, 0))); +- emit_insn (gen_aarch64_simd_reg_shl_unsigned (operands[0], +- operands[1], +- tmp1)); +- DONE; +- } +- else +- FAIL; +-} +-) ++ operands[2] = force_reg (SImode, operands[2]); ++ ++ rtx tmp = gen_reg_rtx (SImode); ++ rtx tmp1 = gen_reg_rtx (mode); ++ emit_insn (gen_negsi2 (tmp, operands[2])); ++ emit_insn (gen_aarch64_simd_dup (tmp1, ++ convert_to_mode (mode, tmp, 0))); ++ emit_insn (gen_aarch64_simd_reg_shl_unsigned (operands[0], operands[1], ++ tmp1)); ++ DONE; ++}) + + (define_expand "ashr3" + [(match_operand:VDQ_I 0 "register_operand") +@@ -1199,31 +1173,19 @@ + tmp)); + DONE; + } +- else +- operands[2] = force_reg (SImode, operands[2]); +- } +- else if (MEM_P (operands[2])) +- { +- operands[2] = force_reg (SImode, operands[2]); + } + +- if (REG_P (operands[2])) +- { +- rtx tmp = gen_reg_rtx (SImode); +- rtx tmp1 = gen_reg_rtx (mode); +- emit_insn (gen_negsi2 (tmp, operands[2])); +- emit_insn (gen_aarch64_simd_dup (tmp1, +- convert_to_mode (mode, +- tmp, 0))); +- emit_insn (gen_aarch64_simd_reg_shl_signed (operands[0], +- operands[1], +- tmp1)); +- DONE; +- } +- else +- FAIL; +-} +-) ++ operands[2] = force_reg (SImode, operands[2]); ++ ++ rtx tmp = gen_reg_rtx (SImode); ++ rtx tmp1 = gen_reg_rtx (mode); ++ emit_insn (gen_negsi2 (tmp, operands[2])); ++ emit_insn (gen_aarch64_simd_dup (tmp1, convert_to_mode (mode, ++ tmp, 0))); ++ emit_insn (gen_aarch64_simd_reg_shl_signed (operands[0], operands[1], ++ tmp1)); ++ DONE; ++}) + + (define_expand "vashl3" + [(match_operand:VDQ_I 0 "register_operand") +diff --git a/gcc/testsuite/gcc.c-torture/compile/pr94488.c b/gcc/testsuite/gcc.c-torture/compile/pr94488.c +new file mode 100644 +index 00000000000..6e20a4168de +--- /dev/null ++++ b/gcc/testsuite/gcc.c-torture/compile/pr94488.c +@@ -0,0 +1,22 @@ ++/* PR target/94488 */ ++ ++typedef unsigned long V __attribute__((__vector_size__(16))); ++typedef long W __attribute__((__vector_size__(16))); ++ ++void ++foo (V *x, unsigned long y) ++{ ++ *x = *x >> (unsigned int) y; ++} ++ ++void ++bar (V *x, unsigned long y) ++{ ++ *x = *x << (unsigned int) y; ++} ++ ++void ++baz (W *x, unsigned long y) ++{ ++ *x = *x >> (unsigned int) y; ++} diff --git a/aarch64-Fix-bf16-and-matrix-g++-gfortran.patch b/aarch64-Fix-bf16-and-matrix-g++-gfortran.patch new file mode 100644 index 0000000..6bc36da --- /dev/null +++ b/aarch64-Fix-bf16-and-matrix-g++-gfortran.patch @@ -0,0 +1,1613 @@ +This backport contains 5 patchs from gcc main stream tree. +The commit id of these patchs list as following in the order of time. + +0001-re-PR-target-88838-SVE-Use-32-bit-WHILELO-in-LP64-mo.patch +9b884225bfc609606f9b169b021c4da93feba48e + +0002-C-Avoid-aka-types-that-just-add-tags.patch +558798156b41fcbe5ba68b75171708cad135b041 + +0003-g-.dg-tree-ssa-pr61034.C-Add-param-max-inline-insns-.patch +cfcf3551c432da3a8154ef11a26a4d75655deb3d + +0004-C-Avoid-exposing-internal-details-in-aka-types.patch +56898e437a538c7edc0724a3650f5cb81c9d5721 + +0005-C-Avoid-exposing-internal-details-in-aka-types.patch +10bce48f104de56503b17954ed79f019df3252e3 + +diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c +index bf3db074a..2c10743b9 100644 +--- a/gcc/c-family/c-common.c ++++ b/gcc/c-family/c-common.c +@@ -48,6 +48,7 @@ along with GCC; see the file COPYING3. If not see + #include "gimplify.h" + #include "substring-locations.h" + #include "spellcheck.h" ++#include "c-spellcheck.h" + #include "selftest.h" + + cpp_reader *parse_in; /* Declared in c-pragma.h. */ +@@ -7685,6 +7686,52 @@ set_underlying_type (tree x) + } + } + ++/* Return true if it is worth exposing the DECL_ORIGINAL_TYPE of TYPE to ++ the user in diagnostics, false if it would be better to use TYPE itself. ++ TYPE is known to satisfy typedef_variant_p. */ ++ ++bool ++user_facing_original_type_p (const_tree type) ++{ ++ gcc_assert (typedef_variant_p (type)); ++ tree decl = TYPE_NAME (type); ++ ++ /* Look through any typedef in "user" code. */ ++ if (!DECL_IN_SYSTEM_HEADER (decl) && !DECL_IS_BUILTIN (decl)) ++ return true; ++ ++ /* If the original type is also named and is in the user namespace, ++ assume it too is a user-facing type. */ ++ tree orig_type = DECL_ORIGINAL_TYPE (decl); ++ if (tree orig_id = TYPE_IDENTIFIER (orig_type)) ++ if (!name_reserved_for_implementation_p (IDENTIFIER_POINTER (orig_id))) ++ return true; ++ ++ switch (TREE_CODE (orig_type)) ++ { ++ /* Don't look through to an anonymous vector type, since the syntax ++ we use for them in diagnostics isn't real C or C++ syntax. ++ And if ORIG_TYPE is named but in the implementation namespace, ++ TYPE is likely to be more meaningful to the user. */ ++ case VECTOR_TYPE: ++ return false; ++ ++ /* Don't expose anonymous tag types that are presumably meant to be ++ known by their typedef name. Also don't expose tags that are in ++ the implementation namespace, such as: ++ ++ typedef struct __foo foo; */ ++ case RECORD_TYPE: ++ case UNION_TYPE: ++ case ENUMERAL_TYPE: ++ return false; ++ ++ /* Look through to anything else. */ ++ default: ++ return true; ++ } ++} ++ + /* Record the types used by the current global variable declaration + being parsed, so that we can decide later to emit their debug info. + Those types are in types_used_by_cur_var_decl, and we are going to +diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h +index 46b8d265a..73ce7c5df 100644 +--- a/gcc/c-family/c-common.h ++++ b/gcc/c-family/c-common.h +@@ -1063,6 +1063,7 @@ extern tree builtin_type_for_size (int, bool); + extern void c_common_mark_addressable_vec (tree); + + extern void set_underlying_type (tree); ++extern bool user_facing_original_type_p (const_tree); + extern void record_types_used_by_current_var_decl (tree); + extern vec *make_tree_vector (void); + extern void release_tree_vector (vec *); +diff --git a/gcc/c/c-objc-common.c b/gcc/c/c-objc-common.c +index 2b76737a7..10d72c57d 100644 +--- a/gcc/c/c-objc-common.c ++++ b/gcc/c/c-objc-common.c +@@ -28,6 +28,8 @@ along with GCC; see the file COPYING3. If not see + #include "langhooks.h" + #include "c-objc-common.h" + #include "gcc-rich-location.h" ++#include "stringpool.h" ++#include "attribs.h" + + static bool c_tree_printer (pretty_printer *, text_info *, const char *, + int, bool, bool, bool, bool *, const char **); +@@ -62,6 +64,122 @@ c_objc_common_init (void) + return c_common_init (); + } + ++/* Decide whether it's worth saying that TYPE is also known as some other ++ type. Return the other type if so, otherwise return TYPE. */ ++ ++static tree ++get_aka_type (tree type) ++{ ++ if (type == error_mark_node) ++ return type; ++ ++ tree result; ++ if (typedef_variant_p (type)) ++ { ++ /* Saying that "foo" is also known as "struct foo" or ++ "struct " is unlikely to be useful, since users of ++ structure-like types would already know that they're structures. ++ The same applies to unions and enums; in general, printing the ++ tag is only useful if it has a different name. */ ++ tree orig_type = DECL_ORIGINAL_TYPE (TYPE_NAME (type)); ++ tree_code code = TREE_CODE (orig_type); ++ tree orig_id = TYPE_IDENTIFIER (orig_type); ++ if ((code == RECORD_TYPE || code == UNION_TYPE || code == ENUMERAL_TYPE) ++ && (!orig_id || TYPE_IDENTIFIER (type) == orig_id)) ++ return type; ++ ++ if (!user_facing_original_type_p (type)) ++ return type; ++ ++ result = get_aka_type (orig_type); ++ } ++ else ++ { ++ tree canonical = TYPE_CANONICAL (type); ++ if (canonical && TREE_CODE (type) != TREE_CODE (canonical)) ++ return canonical; ++ ++ /* Recursive calls might choose a middle ground between TYPE ++ (which has no typedefs stripped) and CANONICAL (which has ++ all typedefs stripped). So try to reuse TYPE or CANONICAL if ++ convenient, but be prepared to create a new type if necessary. */ ++ switch (TREE_CODE (type)) ++ { ++ case POINTER_TYPE: ++ case REFERENCE_TYPE: ++ { ++ tree target_type = get_aka_type (TREE_TYPE (type)); ++ ++ if (target_type == TREE_TYPE (type)) ++ return type; ++ ++ if (canonical && target_type == TREE_TYPE (canonical)) ++ return canonical; ++ ++ result = (TREE_CODE (type) == POINTER_TYPE ++ ? build_pointer_type (target_type) ++ : build_reference_type (target_type)); ++ break; ++ } ++ ++ case ARRAY_TYPE: ++ { ++ tree element_type = get_aka_type (TREE_TYPE (type)); ++ tree index_type = (TYPE_DOMAIN (type) ++ ? get_aka_type (TYPE_DOMAIN (type)) ++ : NULL_TREE); ++ ++ if (element_type == TREE_TYPE (type) ++ && index_type == TYPE_DOMAIN (type)) ++ return type; ++ ++ if (canonical ++ && element_type == TREE_TYPE (canonical) ++ && index_type == TYPE_DOMAIN (canonical)) ++ return canonical; ++ ++ result = build_array_type (element_type, index_type, ++ TYPE_TYPELESS_STORAGE (type)); ++ break; ++ } ++ ++ case FUNCTION_TYPE: ++ { ++ tree return_type = get_aka_type (TREE_TYPE (type)); ++ ++ tree args = TYPE_ARG_TYPES (type); ++ if (args == error_mark_node) ++ return type; ++ ++ auto_vec arg_types; ++ bool type_ok_p = true; ++ while (args && args != void_list_node) ++ { ++ tree arg_type = get_aka_type (TREE_VALUE (args)); ++ arg_types.safe_push (arg_type); ++ type_ok_p &= (arg_type == TREE_VALUE (args)); ++ args = TREE_CHAIN (args); ++ } ++ ++ if (type_ok_p && return_type == TREE_TYPE (type)) ++ return type; ++ ++ unsigned int i; ++ tree arg_type; ++ FOR_EACH_VEC_ELT_REVERSE (arg_types, i, arg_type) ++ args = tree_cons (NULL_TREE, arg_type, args); ++ result = build_function_type (return_type, args); ++ break; ++ } ++ ++ default: ++ return canonical ? canonical : type; ++ } ++ } ++ return build_type_attribute_qual_variant (result, TYPE_ATTRIBUTES (type), ++ TYPE_QUALS (type)); ++} ++ + /* Print T to CPP. */ + + static void +@@ -83,11 +201,12 @@ print_type (c_pretty_printer *cpp, tree t, bool *quoted) + stripped version. But sometimes the stripped version looks + exactly the same, so we don't want it after all. To avoid + printing it in that case, we play ugly obstack games. */ +- if (TYPE_CANONICAL (t) && t != TYPE_CANONICAL (t)) ++ tree aka_type = get_aka_type (t); ++ if (aka_type != t) + { + c_pretty_printer cpp2; + /* Print the stripped version into a temporary printer. */ +- cpp2.type_id (TYPE_CANONICAL (t)); ++ cpp2.type_id (aka_type); + struct obstack *ob2 = cpp2.buffer->obstack; + /* Get the stripped version from the temporary printer. */ + const char *aka = (char *) obstack_base (ob2); +@@ -107,7 +226,7 @@ print_type (c_pretty_printer *cpp, tree t, bool *quoted) + pp_c_whitespace (cpp); + if (*quoted) + pp_begin_quote (cpp, pp_show_color (cpp)); +- cpp->type_id (TYPE_CANONICAL (t)); ++ cpp->type_id (aka_type); + if (*quoted) + pp_end_quote (cpp, pp_show_color (cpp)); + pp_right_brace (cpp); +diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h +index 4bba1887f..e802dcbeb 100644 +--- a/gcc/cp/cp-tree.h ++++ b/gcc/cp/cp-tree.h +@@ -5662,6 +5662,13 @@ enum auto_deduction_context + #define TFF_NO_TEMPLATE_BINDINGS (1 << 13) + #define TFF_POINTER (1 << 14) + ++/* These constants can be used as bit flags to control strip_typedefs. ++ ++ STF_USER_VISIBLE: use heuristics to try to avoid stripping user-facing ++ aliases of internal details. This is intended for diagnostics, ++ where it should (for example) give more useful "aka" types. */ ++const unsigned int STF_USER_VISIBLE = 1U; ++ + /* Returns the TEMPLATE_DECL associated to a TEMPLATE_TEMPLATE_PARM + node. */ + #define TEMPLATE_TEMPLATE_PARM_TEMPLATE_DECL(NODE) \ +@@ -7221,8 +7228,10 @@ extern int zero_init_p (const_tree); + extern bool check_abi_tag_redeclaration (const_tree, const_tree, + const_tree); + extern bool check_abi_tag_args (tree, tree); +-extern tree strip_typedefs (tree, bool * = NULL); +-extern tree strip_typedefs_expr (tree, bool * = NULL); ++extern tree strip_typedefs (tree, bool * = NULL, ++ unsigned int = 0); ++extern tree strip_typedefs_expr (tree, bool * = NULL, ++ unsigned int = 0); + extern tree copy_binfo (tree, tree, tree, + tree *, int); + extern int member_p (const_tree); +diff --git a/gcc/cp/error.c b/gcc/cp/error.c +index 4a0aed2b7..5beaf2dc1 100644 +--- a/gcc/cp/error.c ++++ b/gcc/cp/error.c +@@ -408,7 +408,7 @@ dump_template_bindings (cxx_pretty_printer *pp, tree parms, tree args, + pop_deferring_access_checks (); + /* Strip typedefs. We can't just use TFF_CHASE_TYPEDEF because + pp_simple_type_specifier doesn't know about it. */ +- t = strip_typedefs (t); ++ t = strip_typedefs (t, NULL, STF_USER_VISIBLE); + dump_type (pp, t, TFF_PLAIN_IDENTIFIER); + } + } +@@ -447,7 +447,11 @@ dump_type (cxx_pretty_printer *pp, tree t, int flags) + || DECL_SELF_REFERENCE_P (decl) + || (!flag_pretty_templates + && DECL_LANG_SPECIFIC (decl) && DECL_TEMPLATE_INFO (decl))) +- t = strip_typedefs (t); ++ { ++ unsigned int stf_flags = (!(pp->flags & pp_c_flag_gnu_v3) ++ ? STF_USER_VISIBLE : 0); ++ t = strip_typedefs (t, NULL, stf_flags); ++ } + else if (alias_template_specialization_p (t)) + { + dump_alias_template_specialization (pp, t, flags); +@@ -3193,7 +3197,7 @@ type_to_string (tree typ, int verbose, bool postprocessed, bool *quote, + && !uses_template_parms (typ)) + { + int aka_start, aka_len; char *p; +- tree aka = strip_typedefs (typ); ++ tree aka = strip_typedefs (typ, NULL, STF_USER_VISIBLE); + if (quote && *quote) + pp_end_quote (cxx_pp, show_color); + pp_string (cxx_pp, " {aka"); +diff --git a/gcc/cp/tree.c b/gcc/cp/tree.c +index 3f3583c82..6a1f760ba 100644 +--- a/gcc/cp/tree.c ++++ b/gcc/cp/tree.c +@@ -1421,7 +1421,10 @@ apply_identity_attributes (tree result, tree attribs, bool *remove_attributes) + return cp_build_type_attribute_variant (result, new_attribs); + } + +-/* Builds a qualified variant of T that is not a typedef variant. ++/* Builds a qualified variant of T that is either not a typedef variant ++ (the default behavior) or not a typedef variant of a user-facing type ++ (if FLAGS contains STF_USER_FACING). ++ + E.g. consider the following declarations: + typedef const int ConstInt; + typedef ConstInt* PtrConstInt; +@@ -1446,7 +1449,7 @@ apply_identity_attributes (tree result, tree attribs, bool *remove_attributes) + stripped. */ + + tree +-strip_typedefs (tree t, bool *remove_attributes) ++strip_typedefs (tree t, bool *remove_attributes, unsigned int flags) + { + tree result = NULL, type = NULL, t0 = NULL; + +@@ -1461,7 +1464,7 @@ strip_typedefs (tree t, bool *remove_attributes) + for (; t; t = TREE_CHAIN (t)) + { + gcc_assert (!TREE_PURPOSE (t)); +- tree elt = strip_typedefs (TREE_VALUE (t), remove_attributes); ++ tree elt = strip_typedefs (TREE_VALUE (t), remove_attributes, flags); + if (elt != TREE_VALUE (t)) + changed = true; + vec_safe_push (vec, elt); +@@ -1485,28 +1488,29 @@ strip_typedefs (tree t, bool *remove_attributes) + switch (TREE_CODE (t)) + { + case POINTER_TYPE: +- type = strip_typedefs (TREE_TYPE (t), remove_attributes); ++ type = strip_typedefs (TREE_TYPE (t), remove_attributes, flags); + result = build_pointer_type (type); + break; + case REFERENCE_TYPE: +- type = strip_typedefs (TREE_TYPE (t), remove_attributes); ++ type = strip_typedefs (TREE_TYPE (t), remove_attributes, flags); + result = cp_build_reference_type (type, TYPE_REF_IS_RVALUE (t)); + break; + case OFFSET_TYPE: +- t0 = strip_typedefs (TYPE_OFFSET_BASETYPE (t), remove_attributes); +- type = strip_typedefs (TREE_TYPE (t), remove_attributes); ++ t0 = strip_typedefs (TYPE_OFFSET_BASETYPE (t), remove_attributes, flags); ++ type = strip_typedefs (TREE_TYPE (t), remove_attributes, flags); + result = build_offset_type (t0, type); + break; + case RECORD_TYPE: + if (TYPE_PTRMEMFUNC_P (t)) + { +- t0 = strip_typedefs (TYPE_PTRMEMFUNC_FN_TYPE (t), remove_attributes); ++ t0 = strip_typedefs (TYPE_PTRMEMFUNC_FN_TYPE (t), ++ remove_attributes, flags); + result = build_ptrmemfunc_type (t0); + } + break; + case ARRAY_TYPE: +- type = strip_typedefs (TREE_TYPE (t), remove_attributes); +- t0 = strip_typedefs (TYPE_DOMAIN (t), remove_attributes); ++ type = strip_typedefs (TREE_TYPE (t), remove_attributes, flags); ++ t0 = strip_typedefs (TYPE_DOMAIN (t), remove_attributes, flags); + result = build_cplus_array_type (type, t0); + break; + case FUNCTION_TYPE: +@@ -1525,7 +1529,7 @@ strip_typedefs (tree t, bool *remove_attributes) + && (TYPE_ATTRIBUTES (t) || TYPE_USER_ALIGN (t))) + is_variant = true; + +- type = strip_typedefs (TREE_TYPE (t), remove_attributes); ++ type = strip_typedefs (TREE_TYPE (t), remove_attributes, flags); + tree canon_spec = (flag_noexcept_type + ? canonical_eh_spec (TYPE_RAISES_EXCEPTIONS (t)) + : NULL_TREE); +@@ -1539,7 +1543,7 @@ strip_typedefs (tree t, bool *remove_attributes) + if (arg_node == void_list_node) + break; + arg_type = strip_typedefs (TREE_VALUE (arg_node), +- remove_attributes); ++ remove_attributes, flags); + gcc_assert (arg_type); + if (arg_type == TREE_VALUE (arg_node) && !changed) + continue; +@@ -1603,9 +1607,10 @@ strip_typedefs (tree t, bool *remove_attributes) + tree arg = TREE_VEC_ELT (args, i); + tree strip_arg; + if (TYPE_P (arg)) +- strip_arg = strip_typedefs (arg, remove_attributes); ++ strip_arg = strip_typedefs (arg, remove_attributes, flags); + else +- strip_arg = strip_typedefs_expr (arg, remove_attributes); ++ strip_arg = strip_typedefs_expr (arg, remove_attributes, ++ flags); + TREE_VEC_ELT (new_args, i) = strip_arg; + if (strip_arg != arg) + changed = true; +@@ -1621,7 +1626,7 @@ strip_typedefs (tree t, bool *remove_attributes) + else + ggc_free (new_args); + } +- tree ctx = strip_typedefs (TYPE_CONTEXT (t), remove_attributes); ++ tree ctx = strip_typedefs (TYPE_CONTEXT (t), remove_attributes, flags); + if (!changed && ctx == TYPE_CONTEXT (t) && !typedef_variant_p (t)) + return t; + tree name = fullname; +@@ -1634,7 +1639,7 @@ strip_typedefs (tree t, bool *remove_attributes) + break; + case DECLTYPE_TYPE: + result = strip_typedefs_expr (DECLTYPE_TYPE_EXPR (t), +- remove_attributes); ++ remove_attributes, flags); + if (result == DECLTYPE_TYPE_EXPR (t)) + result = NULL_TREE; + else +@@ -1644,7 +1649,8 @@ strip_typedefs (tree t, bool *remove_attributes) + tf_none)); + break; + case UNDERLYING_TYPE: +- type = strip_typedefs (UNDERLYING_TYPE_TYPE (t), remove_attributes); ++ type = strip_typedefs (UNDERLYING_TYPE_TYPE (t), ++ remove_attributes, flags); + result = finish_underlying_type (type); + break; + default: +@@ -1655,15 +1661,18 @@ strip_typedefs (tree t, bool *remove_attributes) + { + if (typedef_variant_p (t)) + { +- /* Explicitly get the underlying type, as TYPE_MAIN_VARIANT doesn't +- strip typedefs with attributes. */ +- result = TYPE_MAIN_VARIANT (DECL_ORIGINAL_TYPE (TYPE_NAME (t))); +- result = strip_typedefs (result); ++ if ((flags & STF_USER_VISIBLE) ++ && !user_facing_original_type_p (t)) ++ return t; ++ result = strip_typedefs (DECL_ORIGINAL_TYPE (TYPE_NAME (t)), ++ remove_attributes, flags); + } + else + result = TYPE_MAIN_VARIANT (t); + } +- gcc_assert (!typedef_variant_p (result)); ++ gcc_assert (!typedef_variant_p (result) ++ || ((flags & STF_USER_VISIBLE) ++ && !user_facing_original_type_p (result))); + + if (COMPLETE_TYPE_P (result) && !COMPLETE_TYPE_P (t)) + /* If RESULT is complete and T isn't, it's likely the case that T +@@ -1712,7 +1721,7 @@ strip_typedefs (tree t, bool *remove_attributes) + sizeof(TT) is replaced by sizeof(T). */ + + tree +-strip_typedefs_expr (tree t, bool *remove_attributes) ++strip_typedefs_expr (tree t, bool *remove_attributes, unsigned int flags) + { + unsigned i,n; + tree r, type, *ops; +@@ -1729,7 +1738,7 @@ strip_typedefs_expr (tree t, bool *remove_attributes) + /* Some expressions have type operands, so let's handle types here rather + than check TYPE_P in multiple places below. */ + if (TYPE_P (t)) +- return strip_typedefs (t, remove_attributes); ++ return strip_typedefs (t, remove_attributes, flags); + + code = TREE_CODE (t); + switch (code) +@@ -1743,8 +1752,10 @@ strip_typedefs_expr (tree t, bool *remove_attributes) + + case TRAIT_EXPR: + { +- tree type1 = strip_typedefs (TRAIT_EXPR_TYPE1 (t), remove_attributes); +- tree type2 = strip_typedefs (TRAIT_EXPR_TYPE2 (t), remove_attributes); ++ tree type1 = strip_typedefs (TRAIT_EXPR_TYPE1 (t), ++ remove_attributes, flags); ++ tree type2 = strip_typedefs (TRAIT_EXPR_TYPE2 (t), ++ remove_attributes, flags); + if (type1 == TRAIT_EXPR_TYPE1 (t) + && type2 == TRAIT_EXPR_TYPE2 (t)) + return t; +@@ -1761,7 +1772,8 @@ strip_typedefs_expr (tree t, bool *remove_attributes) + tree it; + for (it = t; it; it = TREE_CHAIN (it)) + { +- tree val = strip_typedefs_expr (TREE_VALUE (it), remove_attributes); ++ tree val = strip_typedefs_expr (TREE_VALUE (it), ++ remove_attributes, flags); + vec_safe_push (vec, val); + if (val != TREE_VALUE (it)) + changed = true; +@@ -1788,7 +1800,7 @@ strip_typedefs_expr (tree t, bool *remove_attributes) + for (i = 0; i < n; ++i) + { + tree op = strip_typedefs_expr (TREE_VEC_ELT (t, i), +- remove_attributes); ++ remove_attributes, flags); + vec->quick_push (op); + if (op != TREE_VEC_ELT (t, i)) + changed = true; +@@ -1813,18 +1825,19 @@ strip_typedefs_expr (tree t, bool *remove_attributes) + vec *vec + = vec_safe_copy (CONSTRUCTOR_ELTS (t)); + n = CONSTRUCTOR_NELTS (t); +- type = strip_typedefs (TREE_TYPE (t), remove_attributes); ++ type = strip_typedefs (TREE_TYPE (t), remove_attributes, flags); + for (i = 0; i < n; ++i) + { + constructor_elt *e = &(*vec)[i]; +- tree op = strip_typedefs_expr (e->value, remove_attributes); ++ tree op = strip_typedefs_expr (e->value, remove_attributes, flags); + if (op != e->value) + { + changed = true; + e->value = op; + } + gcc_checking_assert +- (e->index == strip_typedefs_expr (e->index, remove_attributes)); ++ (e->index == strip_typedefs_expr (e->index, remove_attributes, ++ flags)); + } + + if (!changed && type == TREE_TYPE (t)) +@@ -1868,12 +1881,13 @@ strip_typedefs_expr (tree t, bool *remove_attributes) + case REINTERPRET_CAST_EXPR: + case CAST_EXPR: + case NEW_EXPR: +- type = strip_typedefs (type, remove_attributes); ++ type = strip_typedefs (type, remove_attributes, flags); + /* fallthrough */ + + default: + for (i = 0; i < n; ++i) +- ops[i] = strip_typedefs_expr (TREE_OPERAND (t, i), remove_attributes); ++ ops[i] = strip_typedefs_expr (TREE_OPERAND (t, i), ++ remove_attributes, flags); + break; + } + +diff --git a/gcc/testsuite/g++.dg/diagnostic/aka5.h b/gcc/testsuite/g++.dg/diagnostic/aka5.h +new file mode 100644 +index 000000000..0c7404d76 +--- /dev/null ++++ b/gcc/testsuite/g++.dg/diagnostic/aka5.h +@@ -0,0 +1,22 @@ ++#ifdef IS_SYSTEM_HEADER ++#pragma GCC system_header ++#endif ++ ++typedef enum __internal_enum { A, B } user_enum; ++typedef user_enum *user_enum_ptr; ++ ++typedef struct __internal_struct { int i; } user_struct; ++typedef user_struct user_struct_copy; ++typedef user_struct *user_struct_ptr; ++ ++typedef union __internal_union { int i; } user_union; ++typedef user_union user_union_copy; ++typedef user_union *user_union_ptr; ++ ++typedef unsigned int user_vector __attribute__((__vector_size__(16))); ++typedef user_vector user_vector_copy; ++typedef user_vector *user_vector_ptr; ++ ++typedef int user_int; ++typedef user_int user_int_copy; ++typedef user_int *user_int_ptr; +diff --git a/gcc/testsuite/g++.dg/diagnostic/aka5a.C b/gcc/testsuite/g++.dg/diagnostic/aka5a.C +new file mode 100644 +index 000000000..e9d4c02f6 +--- /dev/null ++++ b/gcc/testsuite/g++.dg/diagnostic/aka5a.C +@@ -0,0 +1,127 @@ ++#define IS_SYSTEM_HEADER ++#include "aka5.h" ++ ++typedef user_enum user_enum_copy; ++ ++struct s { int i; }; ++ ++user_enum ue1; ++user_enum_copy ue2; ++user_enum_ptr ue_ptr1; ++user_enum *ue_ptr2; ++const user_enum *const_ue_ptr1; ++const user_enum_copy *const_ue_ptr2; ++volatile user_enum *volatile_ue_ptr1; ++volatile user_enum_copy *volatile_ue_ptr2; ++user_enum (*ue_array_ptr1)[10]; ++user_enum_copy (*ue_array_ptr2)[10]; ++user_enum (*ue_fn_ptr1) (void); ++void (*ue_fn_ptr2) (user_enum); ++void (*ue_fn_ptr3) (user_enum, ...); ++user_enum_copy (*ue_fn_ptr4) (void); ++void (*ue_fn_ptr5) (user_enum_copy); ++void (*ue_fn_ptr6) (user_enum_copy, ...); ++user_enum (*__attribute__((__transaction_unsafe__)) unsafe_ue_fn_ptr1) (void); ++user_enum_copy (*__attribute__((__transaction_unsafe__)) unsafe_ue_fn_ptr2) (void); ++ ++user_struct us1; ++user_struct_copy us2; ++user_struct_ptr us_ptr1; ++user_struct *us_ptr2; ++const user_struct *const_us_ptr1; ++const user_struct_copy *const_us_ptr2; ++ ++user_union uu1; ++user_union_copy uu2; ++user_union_ptr uu_ptr1; ++user_union *uu_ptr2; ++const user_union *const_uu_ptr1; ++const user_union_copy *const_uu_ptr2; ++ ++user_vector uv1; ++user_vector_copy uv2; ++user_vector_ptr uv_ptr1; ++user_vector *uv_ptr2; ++const user_vector *const_uv_ptr1; ++const user_vector_copy *const_uv_ptr2; ++ ++user_int ui1; ++user_int_copy ui2; ++user_int_ptr ui_ptr1; ++user_int *ui_ptr2; ++const user_int *const_ui_ptr1; ++const user_int_copy *const_ui_ptr2; ++volatile user_int *volatile_ui_ptr1; ++volatile user_int_copy *volatile_ui_ptr2; ++user_int (*ui_array_ptr1)[10]; ++user_int_copy (*ui_array_ptr2)[10]; ++user_int (*ui_fn_ptr1) (void); ++void (*ui_fn_ptr2) (user_int); ++void (*ui_fn_ptr3) (user_int, ...); ++user_int_copy (*ui_fn_ptr4) (void); ++void (*ui_fn_ptr5) (user_int_copy); ++void (*ui_fn_ptr6) (user_int_copy, ...); ++user_int (*__attribute__((__transaction_unsafe__)) unsafe_ui_fn_ptr1) (void); ++user_int_copy (*__attribute__((__transaction_unsafe__)) unsafe_ui_fn_ptr2) (void); ++ ++void f (s s1) ++{ ++ ue1 = s1; // { dg-error {cannot convert 's' to 'user_enum' in assignment} } ++ ue2 = s1; // { dg-error {cannot convert 's' to 'user_enum_copy' {aka 'user_enum'} in assignment} } ++ ue_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'user_enum_ptr' {aka 'user_enum\*'} in assignment} } ++ ue_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'user_enum\*' in assignment} } ++ const_ue_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'const user_enum\*' in assignment} } ++ const_ue_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'const user_enum_copy\*' {aka 'const user_enum\*'} in assignment} } ++ volatile_ue_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'volatile user_enum\*' in assignment} } ++ volatile_ue_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'volatile user_enum_copy\*' {aka 'volatile user_enum\*'} in assignment} } ++ ue_array_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'user_enum \(\*\)\[10\]' in assignment} } ++ ue_array_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'user_enum_copy \(\*\)\[10\]' {aka 'user_enum \(\*\)\[10\]'} in assignment} } ++ ue_fn_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'user_enum \(\*\)\(\)' in assignment} } ++ ue_fn_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'void \(\*\)\(user_enum\)' in assignment} } ++ ue_fn_ptr3 = &s1; // { dg-error {cannot convert 's\*' to 'void \(\*\)\(user_enum, \.\.\.\)' in assignment} } ++ ue_fn_ptr4 = &s1; // { dg-error {cannot convert 's\*' to 'user_enum_copy \(\*\)\(\)' {aka 'user_enum \(\*\)\(\)'} in assignment} } ++ ue_fn_ptr5 = &s1; // { dg-error {cannot convert 's\*' to 'void \(\*\)\(user_enum_copy\)' {aka 'void \(\*\)\(user_enum\)'} in assignment} } ++ ue_fn_ptr6 = &s1; // { dg-error {cannot convert 's\*' to 'void \(\*\)\(user_enum_copy, \.\.\.\)' {aka 'void \(\*\)\(user_enum, \.\.\.\)'} in assignment} } ++ unsafe_ue_fn_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'user_enum \(__attribute__\(\(transaction_unsafe\)\) \*\)\(\)' in assignment} } ++ unsafe_ue_fn_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'user_enum_copy \(__attribute__\(\(transaction_unsafe\)\) \*\)\(\)' {aka 'user_enum \(__attribute__\(\(transaction_unsafe\)\) \*\)\(\)'} in assignment} } ++ ++ us1 = s1; // { dg-error {no match for 'operator=' in 'us1 = s1' \(operand types are 'user_struct' and 's'\)} } ++ us2 = s1; // { dg-error {no match for 'operator=' in 'us2 = s1' \(operand types are 'user_struct_copy' {aka 'user_struct'} and 's'\)} } ++ us_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'user_struct_ptr' {aka 'user_struct\*'} in assignment} } ++ us_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'user_struct\*' in assignment} } ++ const_us_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'const user_struct\*' in assignment} } ++ const_us_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'const user_struct_copy\*' {aka 'const user_struct\*'} in assignment} } ++ ++ uu1 = s1; // { dg-error {no match for 'operator=' in 'uu1 = s1' \(operand types are 'user_union' and 's'\)} } ++ uu2 = s1; // { dg-error {no match for 'operator=' in 'uu2 = s1' \(operand types are 'user_union_copy' {aka 'user_union'} and 's'\)} } ++ uu_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'user_union_ptr' {aka 'user_union\*'} in assignment} } ++ uu_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'user_union\*' in assignment} } ++ const_uu_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'const user_union\*' in assignment} } ++ const_uu_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'const user_union_copy\*' {aka 'const user_union\*'} in assignment} } ++ ++ uv1 = s1; // { dg-error {cannot convert 's' to 'user_vector' in assignment} } ++ uv2 = s1; // { dg-error {cannot convert 's' to 'user_vector_copy' {aka 'user_vector'} in assignment} } ++ uv_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'user_vector_ptr' {aka 'user_vector\*'} in assignment} } ++ uv_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'user_vector\*' in assignment} } ++ const_uv_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'const user_vector\*' in assignment} } ++ const_uv_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'const user_vector_copy\*' {aka 'const user_vector\*'} in assignment} } ++ ++ ui1 = s1; // { dg-error {cannot convert 's' to 'user_int' {aka 'int'} in assignment} } ++ ui2 = s1; // { dg-error {cannot convert 's' to 'user_int_copy' {aka 'int'} in assignment} } ++ ui_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'user_int_ptr' {aka 'int\*'} in assignment} } ++ ui_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'user_int\*' {aka 'int\*'} in assignment} } ++ const_ui_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'const user_int\*' {aka 'const int\*'} in assignment} } ++ const_ui_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'const user_int_copy\*' {aka 'const int\*'} in assignment} } ++ volatile_ui_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'volatile user_int\*' {aka 'volatile int\*'} in assignment} } ++ volatile_ui_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'volatile user_int_copy\*' {aka 'volatile int\*'} in assignment} } ++ ui_array_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'user_int \(\*\)\[10\]' {aka 'int \(\*\)\[10\]'} in assignment} } ++ ui_array_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'user_int_copy \(\*\)\[10\]' {aka 'int \(\*\)\[10\]'} in assignment} } ++ ui_fn_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'user_int \(\*\)\(\)' {aka 'int \(\*\)\(\)'} in assignment} } ++ ui_fn_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'void \(\*\)\(user_int\)' {aka 'void \(\*\)\(int\)'} in assignment} } ++ ui_fn_ptr3 = &s1; // { dg-error {cannot convert 's\*' to 'void \(\*\)\(user_int, \.\.\.\)' {aka 'void \(\*\)\(int, \.\.\.\)'} in assignment} } ++ ui_fn_ptr4 = &s1; // { dg-error {cannot convert 's\*' to 'user_int_copy \(\*\)\(\)' {aka 'int \(\*\)\(\)'} in assignment} } ++ ui_fn_ptr5 = &s1; // { dg-error {cannot convert 's\*' to 'void \(\*\)\(user_int_copy\)' {aka 'void \(\*\)\(int\)'} in assignment} } ++ ui_fn_ptr6 = &s1; // { dg-error {cannot convert 's\*' to 'void \(\*\)\(user_int_copy, \.\.\.\)' {aka 'void \(\*\)\(int, \.\.\.\)'} in assignment} } ++ unsafe_ui_fn_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'user_int \(__attribute__\(\(transaction_unsafe\)\) \*\)\(\)' {aka 'int \(__attribute__\(\(transaction_unsafe\)\) \*\)\(\)'} in assignment} } ++ unsafe_ui_fn_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'user_int_copy \(__attribute__\(\(transaction_unsafe\)\) \*\)\(\)' {aka 'int \(__attribute__\(\(transaction_unsafe\)\) \*\)\(\)'} in assignment} } ++} +diff --git a/gcc/testsuite/g++.dg/diagnostic/aka5b.C b/gcc/testsuite/g++.dg/diagnostic/aka5b.C +new file mode 100644 +index 000000000..6942be3ee +--- /dev/null ++++ b/gcc/testsuite/g++.dg/diagnostic/aka5b.C +@@ -0,0 +1,127 @@ ++#include "aka5.h" ++ ++typedef user_enum user_enum_copy; ++ ++struct s { int i; }; ++ ++user_enum ue1; ++user_enum_copy ue2; ++user_enum_ptr ue_ptr1; ++user_enum *ue_ptr2; ++const user_enum *const_ue_ptr1; ++const user_enum_copy *const_ue_ptr2; ++volatile user_enum *volatile_ue_ptr1; ++volatile user_enum_copy *volatile_ue_ptr2; ++user_enum (*ue_array_ptr1)[10]; ++user_enum_copy (*ue_array_ptr2)[10]; ++user_enum (*ue_fn_ptr1) (void); ++void (*ue_fn_ptr2) (user_enum); ++void (*ue_fn_ptr3) (user_enum, ...); ++user_enum_copy (*ue_fn_ptr4) (void); ++void (*ue_fn_ptr5) (user_enum_copy); ++void (*ue_fn_ptr6) (user_enum_copy, ...); ++user_enum (*__attribute__((__transaction_unsafe__)) unsafe_ue_fn_ptr1) (void); ++user_enum_copy (*__attribute__((__transaction_unsafe__)) unsafe_ue_fn_ptr2) (void); ++ ++user_struct us1; ++user_struct_copy us2; ++user_struct_ptr us_ptr1; ++user_struct *us_ptr2; ++const user_struct *const_us_ptr1; ++const user_struct_copy *const_us_ptr2; ++ ++user_union uu1; ++user_union_copy uu2; ++user_union_ptr uu_ptr1; ++user_union *uu_ptr2; ++const user_union *const_uu_ptr1; ++const user_union_copy *const_uu_ptr2; ++ ++user_vector uv1; ++user_vector_copy uv2; ++user_vector_ptr uv_ptr1; ++user_vector *uv_ptr2; ++const user_vector *const_uv_ptr1; ++const user_vector_copy *const_uv_ptr2; ++ ++user_int ui1; ++user_int_copy ui2; ++user_int_ptr ui_ptr1; ++user_int *ui_ptr2; ++const user_int *const_ui_ptr1; ++const user_int_copy *const_ui_ptr2; ++volatile user_int *volatile_ui_ptr1; ++volatile user_int_copy *volatile_ui_ptr2; ++user_int (*ui_array_ptr1)[10]; ++user_int_copy (*ui_array_ptr2)[10]; ++user_int (*ui_fn_ptr1) (void); ++void (*ui_fn_ptr2) (user_int); ++void (*ui_fn_ptr3) (user_int, ...); ++user_int_copy (*ui_fn_ptr4) (void); ++void (*ui_fn_ptr5) (user_int_copy); ++void (*ui_fn_ptr6) (user_int_copy, ...); ++user_int (*__attribute__((__transaction_unsafe__)) unsafe_ui_fn_ptr1) (void); ++user_int_copy (*__attribute__((__transaction_unsafe__)) unsafe_ui_fn_ptr2) (void); ++ ++void f (s s1) ++{ ++ ue1 = s1; // { dg-error {cannot convert 's' to 'user_enum' {aka '__internal_enum'} in assignment} } ++ ue2 = s1; // { dg-error {cannot convert 's' to 'user_enum_copy' {aka '__internal_enum'} in assignment} } ++ ue_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'user_enum_ptr' {aka '__internal_enum\*'} in assignment} } ++ ue_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'user_enum\*' {aka '__internal_enum\*'} in assignment} } ++ const_ue_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'const user_enum\*' {aka 'const __internal_enum\*'} in assignment} } ++ const_ue_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'const user_enum_copy\*' {aka 'const __internal_enum\*'} in assignment} } ++ volatile_ue_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'volatile user_enum\*' {aka 'volatile __internal_enum\*'} in assignment} } ++ volatile_ue_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'volatile user_enum_copy\*' {aka 'volatile __internal_enum\*'} in assignment} } ++ ue_array_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'user_enum \(\*\)\[10\]' {aka '__internal_enum \(\*\)\[10\]'} in assignment} } ++ ue_array_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'user_enum_copy \(\*\)\[10\]' {aka '__internal_enum \(\*\)\[10\]'} in assignment} } ++ ue_fn_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'user_enum \(\*\)\(\)' {aka '__internal_enum \(\*\)\(\)'} in assignment} } ++ ue_fn_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'void \(\*\)\(user_enum\)' {aka 'void \(\*\)\(__internal_enum\)'} in assignment} } ++ ue_fn_ptr3 = &s1; // { dg-error {cannot convert 's\*' to 'void \(\*\)\(user_enum, \.\.\.\)' {aka 'void \(\*\)\(__internal_enum, \.\.\.\)'} in assignment} } ++ ue_fn_ptr4 = &s1; // { dg-error {cannot convert 's\*' to 'user_enum_copy \(\*\)\(\)' {aka '__internal_enum \(\*\)\(\)'} in assignment} } ++ ue_fn_ptr5 = &s1; // { dg-error {cannot convert 's\*' to 'void \(\*\)\(user_enum_copy\)' {aka 'void \(\*\)\(__internal_enum\)'} in assignment} } ++ ue_fn_ptr6 = &s1; // { dg-error {cannot convert 's\*' to 'void \(\*\)\(user_enum_copy, \.\.\.\)' {aka 'void \(\*\)\(__internal_enum, \.\.\.\)'} in assignment} } ++ unsafe_ue_fn_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'user_enum \(__attribute__\(\(transaction_unsafe\)\) \*\)\(\)' {aka '__internal_enum \(__attribute__\(\(transaction_unsafe\)\) \*\)\(\)'} in assignment} } ++ unsafe_ue_fn_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'user_enum_copy \(__attribute__\(\(transaction_unsafe\)\) \*\)\(\)' {aka '__internal_enum \(__attribute__\(\(transaction_unsafe\)\) \*\)\(\)'} in assignment} } ++ ++ us1 = s1; // { dg-error {no match for 'operator=' in 'us1 = s1' \(operand types are 'user_struct' {aka '__internal_struct'} and 's'\)} } ++ us2 = s1; // { dg-error {no match for 'operator=' in 'us2 = s1' \(operand types are 'user_struct_copy' {aka '__internal_struct'} and 's'\)} } ++ us_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'user_struct_ptr' {aka '__internal_struct\*'} in assignment} } ++ us_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'user_struct\*' {aka '__internal_struct\*'} in assignment} } ++ const_us_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'const user_struct\*' {aka 'const __internal_struct\*'} in assignment} } ++ const_us_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'const user_struct_copy\*' {aka 'const __internal_struct\*'} in assignment} } ++ ++ uu1 = s1; // { dg-error {no match for 'operator=' in 'uu1 = s1' \(operand types are 'user_union' {aka '__internal_union'} and 's'\)} } ++ uu2 = s1; // { dg-error {no match for 'operator=' in 'uu2 = s1' \(operand types are 'user_union_copy' {aka '__internal_union'} and 's'\)} } ++ uu_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'user_union_ptr' {aka '__internal_union\*'} in assignment} } ++ uu_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'user_union\*' {aka '__internal_union\*'} in assignment} } ++ const_uu_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'const user_union\*' {aka 'const __internal_union\*'} in assignment} } ++ const_uu_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'const user_union_copy\*' {aka 'const __internal_union\*'} in assignment} } ++ ++ uv1 = s1; // { dg-error {cannot convert 's' to 'user_vector' {aka '__vector\([48]\) unsigned int'} in assignment} } ++ uv2 = s1; // { dg-error {cannot convert 's' to 'user_vector_copy' {aka '__vector\([48]\) unsigned int'} in assignment} } ++ uv_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'user_vector_ptr' {aka '__vector\([48]\) unsigned int\*'} in assignment} } ++ uv_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'user_vector\*' {aka '__vector\([48]\) unsigned int\*'} in assignment} } ++ const_uv_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'const user_vector\*' {aka 'const __vector\([48]\) unsigned int\*'} in assignment} } ++ const_uv_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'const user_vector_copy\*' {aka 'const __vector\([48]\) unsigned int\*'} in assignment} } ++ ++ ui1 = s1; // { dg-error {cannot convert 's' to 'user_int' {aka 'int'} in assignment} } ++ ui2 = s1; // { dg-error {cannot convert 's' to 'user_int_copy' {aka 'int'} in assignment} } ++ ui_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'user_int_ptr' {aka 'int\*'} in assignment} } ++ ui_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'user_int\*' {aka 'int\*'} in assignment} } ++ const_ui_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'const user_int\*' {aka 'const int\*'} in assignment} } ++ const_ui_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'const user_int_copy\*' {aka 'const int\*'} in assignment} } ++ volatile_ui_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'volatile user_int\*' {aka 'volatile int\*'} in assignment} } ++ volatile_ui_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'volatile user_int_copy\*' {aka 'volatile int\*'} in assignment} } ++ ui_array_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'user_int \(\*\)\[10\]' {aka 'int \(\*\)\[10\]'} in assignment} } ++ ui_array_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'user_int_copy \(\*\)\[10\]' {aka 'int \(\*\)\[10\]'} in assignment} } ++ ui_fn_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'user_int \(\*\)\(\)' {aka 'int \(\*\)\(\)'} in assignment} } ++ ui_fn_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'void \(\*\)\(user_int\)' {aka 'void \(\*\)\(int\)'} in assignment} } ++ ui_fn_ptr3 = &s1; // { dg-error {cannot convert 's\*' to 'void \(\*\)\(user_int, \.\.\.\)' {aka 'void \(\*\)\(int, \.\.\.\)'} in assignment} } ++ ui_fn_ptr4 = &s1; // { dg-error {cannot convert 's\*' to 'user_int_copy \(\*\)\(\)' {aka 'int \(\*\)\(\)'} in assignment} } ++ ui_fn_ptr5 = &s1; // { dg-error {cannot convert 's\*' to 'void \(\*\)\(user_int_copy\)' {aka 'void \(\*\)\(int\)'} in assignment} } ++ ui_fn_ptr6 = &s1; // { dg-error {cannot convert 's\*' to 'void \(\*\)\(user_int_copy, \.\.\.\)' {aka 'void \(\*\)\(int, \.\.\.\)'} in assignment} } ++ unsafe_ui_fn_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'user_int \(__attribute__\(\(transaction_unsafe\)\) \*\)\(\)' {aka 'int \(__attribute__\(\(transaction_unsafe\)\) \*\)\(\)'} in assignment} } ++ unsafe_ui_fn_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'user_int_copy \(__attribute__\(\(transaction_unsafe\)\) \*\)\(\)' {aka 'int \(__attribute__\(\(transaction_unsafe\)\) \*\)\(\)'} in assignment} } ++} ++ +diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr61034.C b/gcc/testsuite/g++.dg/tree-ssa/pr61034.C +index 2e3dfecac..6a76adb5b 100644 +--- a/gcc/testsuite/g++.dg/tree-ssa/pr61034.C ++++ b/gcc/testsuite/g++.dg/tree-ssa/pr61034.C +@@ -1,5 +1,5 @@ + // { dg-do compile } +-// { dg-options "-O2 -fdump-tree-fre3 -fdump-tree-optimized -fdelete-null-pointer-checks --param early-inlining-insns-O2=14" } ++// { dg-options "-O2 -fdump-tree-fre3 -fdump-tree-optimized -fdelete-null-pointer-checks --param early-inlining-insns-O2=14 --param max-inline-insns-single-O2=200" } + + #define assume(x) if(!(x))__builtin_unreachable() + +diff --git a/gcc/testsuite/g++.target/aarch64/diag_aka_1.C b/gcc/testsuite/g++.target/aarch64/diag_aka_1.C +new file mode 100644 +index 000000000..6b489981f +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/diag_aka_1.C +@@ -0,0 +1,13 @@ ++#include ++ ++typedef int16x4_t myvec; ++ ++void f (float x) ++{ ++ __Int8x8_t y1 = x; // { dg-error {cannot convert 'float' to '__Int8x8_t' in initialization} } ++ __Int8x8_t *ptr1 = &x; // { dg-error {cannot convert 'float\*' to '__Int8x8_t\*' in initialization} } ++ int8x8_t y2 = x; // { dg-error {cannot convert 'float' to 'int8x8_t' in initialization} } ++ int8x8_t *ptr2 = &x; // { dg-error {cannot convert 'float\*' to 'int8x8_t\*' in initialization} } ++ myvec y3 = x; // { dg-error {cannot convert 'float' to 'myvec' {aka 'int16x4_t'} in initialization} } ++ myvec *ptr3 = &x; // { dg-error {cannot convert 'float\*' to 'myvec\*' {aka 'int16x4_t\*'} in initialization} } ++} +diff --git a/gcc/testsuite/gcc.dg/diag-aka-1.c b/gcc/testsuite/gcc.dg/diag-aka-1.c +index fde4ca7c7..3383c1c26 100644 +--- a/gcc/testsuite/gcc.dg/diag-aka-1.c ++++ b/gcc/testsuite/gcc.dg/diag-aka-1.c +@@ -2,7 +2,7 @@ + /* { dg-options "-Wc++-compat" } */ + + typedef struct A { int i; } B; +-typedef struct T { int i; } T; ++typedef struct T { int i; } *T; /* { dg-warning "using 'T' as both a typedef and a tag is invalid" } */ + typedef const float TFA; + typedef TFA TFB; + typedef TFB TFC; +@@ -24,6 +24,6 @@ bar (B *b, int *i) + int + foo (void *a) + { +- T *t = a; /* { dg-warning "request for implicit conversion from 'void \\*' to 'T \\*' {aka 'struct T \\*'} not" } */ ++ T t = a; /* { dg-warning "request for implicit conversion from 'void \\*' to 'T' {aka 'struct T \\*'} not" } */ + return t->i; + } +diff --git a/gcc/testsuite/gcc.dg/diag-aka-4.c b/gcc/testsuite/gcc.dg/diag-aka-4.c +new file mode 100644 +index 000000000..cf98dd96a +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/diag-aka-4.c +@@ -0,0 +1,72 @@ ++typedef struct struct_wrapper { int i; } struct_wrapper; ++typedef struct { int i; } anon_struct_wrapper; ++ ++typedef union union_wrapper { int i; } union_wrapper; ++typedef union { int i; } anon_union_wrapper; ++ ++typedef enum enum_wrapper { A, B } enum_wrapper; ++typedef enum { C, D } anon_enum_wrapper; ++ ++void test_struct_wrapper (struct_wrapper y, int x) ++{ ++ struct_wrapper *ptr = &x; /* { dg-error {initialization of 'struct_wrapper \*' from incompatible pointer type 'int \*'} } */ ++ const struct_wrapper *const_ptr = &x; /* { dg-error {initialization of 'const struct_wrapper \*' from incompatible pointer type 'int \*'} } */ ++ volatile struct_wrapper *volatile_ptr = &x; /* { dg-error {initialization of 'volatile struct_wrapper \*' from incompatible pointer type 'int \*'} } */ ++ struct_wrapper (*aptr)[10] = &x; /* { dg-error {initialization of 'struct_wrapper \(\*\)\[10\]' from incompatible pointer type 'int \*'} } */ ++ struct_wrapper (*f1)(int) = &x; /* { dg-error {initialization of 'struct_wrapper \(\*\)\(int\)' from incompatible pointer type 'int \*'} } */ ++ int (*f2)(struct_wrapper) = &x; /* { dg-error {initialization of 'int \(\*\)\(struct_wrapper\)' from incompatible pointer type 'int \*'} } */ ++ y = x; /* { dg-error {incompatible types when assigning to type 'struct_wrapper' from type 'int'} } */ ++} ++ ++void test_anon_struct_wrapper (anon_struct_wrapper y, int x) ++{ ++ anon_struct_wrapper *ptr = &x; /* { dg-error {initialization of 'anon_struct_wrapper \*' from incompatible pointer type 'int \*'} } */ ++ const anon_struct_wrapper *const_ptr = &x; /* { dg-error {initialization of 'const anon_struct_wrapper \*' from incompatible pointer type 'int \*'} } */ ++ volatile anon_struct_wrapper *volatile_ptr = &x; /* { dg-error {initialization of 'volatile anon_struct_wrapper \*' from incompatible pointer type 'int \*'} } */ ++ anon_struct_wrapper (*aptr)[10] = &x; /* { dg-error {initialization of 'anon_struct_wrapper \(\*\)\[10\]' from incompatible pointer type 'int \*'} } */ ++ anon_struct_wrapper (*f1)(int) = &x; /* { dg-error {initialization of 'anon_struct_wrapper \(\*\)\(int\)' from incompatible pointer type 'int \*'} } */ ++ int (*f2)(anon_struct_wrapper) = &x; /* { dg-error {initialization of 'int \(\*\)\(anon_struct_wrapper\)' from incompatible pointer type 'int \*'} } */ ++ y = x; /* { dg-error {incompatible types when assigning to type 'anon_struct_wrapper' from type 'int'} } */ ++} ++ ++void test_union_wrapper (union_wrapper y, int x) ++{ ++ union_wrapper *ptr = &x; /* { dg-error {initialization of 'union_wrapper \*' from incompatible pointer type 'int \*'} } */ ++ const union_wrapper *const_ptr = &x; /* { dg-error {initialization of 'const union_wrapper \*' from incompatible pointer type 'int \*'} } */ ++ volatile union_wrapper *volatile_ptr = &x; /* { dg-error {initialization of 'volatile union_wrapper \*' from incompatible pointer type 'int \*'} } */ ++ union_wrapper (*aptr)[10] = &x; /* { dg-error {initialization of 'union_wrapper \(\*\)\[10\]' from incompatible pointer type 'int \*'} } */ ++ union_wrapper (*f1)(int) = &x; /* { dg-error {initialization of 'union_wrapper \(\*\)\(int\)' from incompatible pointer type 'int \*'} } */ ++ int (*f2)(union_wrapper) = &x; /* { dg-error {initialization of 'int \(\*\)\(union_wrapper\)' from incompatible pointer type 'int \*'} } */ ++ y = x; /* { dg-error {incompatible types when assigning to type 'union_wrapper' from type 'int'} } */ ++} ++ ++void test_anon_union_wrapper (anon_union_wrapper y, int x) ++{ ++ anon_union_wrapper *ptr = &x; /* { dg-error {initialization of 'anon_union_wrapper \*' from incompatible pointer type 'int \*'} } */ ++ const anon_union_wrapper *const_ptr = &x; /* { dg-error {initialization of 'const anon_union_wrapper \*' from incompatible pointer type 'int \*'} } */ ++ volatile anon_union_wrapper *volatile_ptr = &x; /* { dg-error {initialization of 'volatile anon_union_wrapper \*' from incompatible pointer type 'int \*'} } */ ++ anon_union_wrapper (*aptr)[10] = &x; /* { dg-error {initialization of 'anon_union_wrapper \(\*\)\[10\]' from incompatible pointer type 'int \*'} } */ ++ anon_union_wrapper (*f1)(int) = &x; /* { dg-error {initialization of 'anon_union_wrapper \(\*\)\(int\)' from incompatible pointer type 'int \*'} } */ ++ int (*f2)(anon_union_wrapper) = &x; /* { dg-error {initialization of 'int \(\*\)\(anon_union_wrapper\)' from incompatible pointer type 'int \*'} } */ ++ y = x; /* { dg-error {incompatible types when assigning to type 'anon_union_wrapper' from type 'int'} } */ ++} ++ ++void test_enum_wrapper (enum_wrapper y, int x) ++{ ++ enum_wrapper *ptr = &x; /* { dg-error {initialization of 'enum_wrapper \*' from incompatible pointer type 'int \*'} } */ ++ const enum_wrapper *const_ptr = &x; /* { dg-error {initialization of 'const enum_wrapper \*' from incompatible pointer type 'int \*'} } */ ++ volatile enum_wrapper *volatile_ptr = &x; /* { dg-error {initialization of 'volatile enum_wrapper \*' from incompatible pointer type 'int \*'} } */ ++ enum_wrapper (*aptr)[10] = &x; /* { dg-error {initialization of 'enum_wrapper \(\*\)\[10\]' from incompatible pointer type 'int \*'} } */ ++ enum_wrapper (*f1)(int) = &x; /* { dg-error {initialization of 'enum_wrapper \(\*\)\(int\)' from incompatible pointer type 'int \*'} } */ ++ int (*f2)(enum_wrapper) = &x; /* { dg-error {initialization of 'int \(\*\)\(enum_wrapper\)' from incompatible pointer type 'int \*'} } */ ++} ++ ++void test_anon_enum_wrapper (anon_enum_wrapper y, int x) ++{ ++ anon_enum_wrapper *ptr = &x; /* { dg-error {initialization of 'anon_enum_wrapper \*' from incompatible pointer type 'int \*'} } */ ++ const anon_enum_wrapper *const_ptr = &x; /* { dg-error {initialization of 'const anon_enum_wrapper \*' from incompatible pointer type 'int \*'} } */ ++ volatile anon_enum_wrapper *volatile_ptr = &x; /* { dg-error {initialization of 'volatile anon_enum_wrapper \*' from incompatible pointer type 'int \*'} } */ ++ anon_enum_wrapper (*aptr)[10] = &x; /* { dg-error {initialization of 'anon_enum_wrapper \(\*\)\[10\]' from incompatible pointer type 'int \*'} } */ ++ anon_enum_wrapper (*f1)(int) = &x; /* { dg-error {initialization of 'anon_enum_wrapper \(\*\)\(int\)' from incompatible pointer type 'int \*'} } */ ++ int (*f2)(anon_enum_wrapper) = &x; /* { dg-error {initialization of 'int \(\*\)\(anon_enum_wrapper\)' from incompatible pointer type 'int \*'} } */ ++} +diff --git a/gcc/testsuite/gcc.dg/diag-aka-5.h b/gcc/testsuite/gcc.dg/diag-aka-5.h +new file mode 100644 +index 000000000..0c7404d76 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/diag-aka-5.h +@@ -0,0 +1,22 @@ ++#ifdef IS_SYSTEM_HEADER ++#pragma GCC system_header ++#endif ++ ++typedef enum __internal_enum { A, B } user_enum; ++typedef user_enum *user_enum_ptr; ++ ++typedef struct __internal_struct { int i; } user_struct; ++typedef user_struct user_struct_copy; ++typedef user_struct *user_struct_ptr; ++ ++typedef union __internal_union { int i; } user_union; ++typedef user_union user_union_copy; ++typedef user_union *user_union_ptr; ++ ++typedef unsigned int user_vector __attribute__((__vector_size__(16))); ++typedef user_vector user_vector_copy; ++typedef user_vector *user_vector_ptr; ++ ++typedef int user_int; ++typedef user_int user_int_copy; ++typedef user_int *user_int_ptr; +diff --git a/gcc/testsuite/gcc.dg/diag-aka-5a.c b/gcc/testsuite/gcc.dg/diag-aka-5a.c +new file mode 100644 +index 000000000..573020659 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/diag-aka-5a.c +@@ -0,0 +1,135 @@ ++#define IS_SYSTEM_HEADER ++#include "diag-aka-5.h" ++ ++typedef user_enum user_enum_copy; ++ ++struct s { int i; }; ++ ++user_enum ue1; ++user_enum_copy ue2; ++user_enum_ptr ue_ptr1; ++user_enum *ue_ptr2; ++const user_enum *const_ue_ptr1; ++const user_enum_copy *const_ue_ptr2; ++volatile user_enum *volatile_ue_ptr1; ++volatile user_enum_copy *volatile_ue_ptr2; ++__extension__ _Atomic user_enum *atomic_ue_ptr1; ++__extension__ _Atomic user_enum_copy *atomic_ue_ptr2; ++user_enum (*ue_array_ptr1)[10]; ++user_enum_copy (*ue_array_ptr2)[10]; ++user_enum (*ue_fn_ptr1) (void); ++void (*ue_fn_ptr2) (user_enum); ++void (*ue_fn_ptr3) (user_enum, ...); ++user_enum_copy (*ue_fn_ptr4) (void); ++void (*ue_fn_ptr5) (user_enum_copy); ++void (*ue_fn_ptr6) (user_enum_copy, ...); ++user_enum (*__attribute__((__transaction_unsafe__)) unsafe_ue_fn_ptr1) (void); ++user_enum_copy (*__attribute__((__transaction_unsafe__)) unsafe_ue_fn_ptr2) (void); ++ ++user_struct us1; ++user_struct_copy us2; ++user_struct_ptr us_ptr1; ++user_struct *us_ptr2; ++const user_struct *const_us_ptr1; ++const user_struct_copy *const_us_ptr2; ++ ++user_union uu1; ++user_union_copy uu2; ++user_union_ptr uu_ptr1; ++user_union *uu_ptr2; ++const user_union *const_uu_ptr1; ++const user_union_copy *const_uu_ptr2; ++ ++user_vector uv1; ++user_vector_copy uv2; ++user_vector_ptr uv_ptr1; ++user_vector *uv_ptr2; ++const user_vector *const_uv_ptr1; ++const user_vector_copy *const_uv_ptr2; ++ ++user_int ui1; ++user_int_copy ui2; ++user_int_ptr ui_ptr1; ++user_int *ui_ptr2; ++const user_int *const_ui_ptr1; ++const user_int_copy *const_ui_ptr2; ++volatile user_int *volatile_ui_ptr1; ++volatile user_int_copy *volatile_ui_ptr2; ++__extension__ _Atomic user_int *atomic_ui_ptr1; ++__extension__ _Atomic user_int_copy *atomic_ui_ptr2; ++user_int (*ui_array_ptr1)[10]; ++user_int_copy (*ui_array_ptr2)[10]; ++user_int (*ui_fn_ptr1) (void); ++void (*ui_fn_ptr2) (user_int); ++void (*ui_fn_ptr3) (user_int, ...); ++user_int_copy (*ui_fn_ptr4) (void); ++void (*ui_fn_ptr5) (user_int_copy); ++void (*ui_fn_ptr6) (user_int_copy, ...); ++user_int (*__attribute__((__transaction_unsafe__)) unsafe_ui_fn_ptr1) (void); ++user_int_copy (*__attribute__((__transaction_unsafe__)) unsafe_ui_fn_ptr2) (void); ++ ++void f (struct s s) ++{ ++ ue1 = s; /* { dg-error {assigning to type 'user_enum' from type 'struct s'} } */ ++ ue2 = s; /* { dg-error {assigning to type 'user_enum_copy' {aka 'user_enum'} from type 'struct s'} } */ ++ ue_ptr1 = &s; /* { dg-error {assignment to 'user_enum_ptr' {aka 'user_enum \*'} from incompatible pointer type 'struct s \*'} } */ ++ ue_ptr2 = &s; /* { dg-error {assignment to 'user_enum \*' from incompatible pointer type 'struct s \*'} } */ ++ const_ue_ptr1 = &s; /* { dg-error {assignment to 'const user_enum \*' from incompatible pointer type 'struct s \*'} } */ ++ const_ue_ptr2 = &s; /* { dg-error {assignment to 'const user_enum_copy \*' {aka 'const user_enum \*'} from incompatible pointer type 'struct s \*'} } */ ++ volatile_ue_ptr1 = &s; /* { dg-error {assignment to 'volatile user_enum \*' from incompatible pointer type 'struct s \*'} } */ ++ volatile_ue_ptr2 = &s; /* { dg-error {assignment to 'volatile user_enum_copy \*' {aka 'volatile user_enum \*'} from incompatible pointer type 'struct s \*'} } */ ++ atomic_ue_ptr1 = &s; /* { dg-error {assignment to '_Atomic user_enum \*' from incompatible pointer type 'struct s \*'} } */ ++ atomic_ue_ptr2 = &s; /* { dg-error {assignment to '_Atomic user_enum_copy \*' {aka '_Atomic user_enum \*'} from incompatible pointer type 'struct s \*'} } */ ++ ue_array_ptr1 = &s; /* { dg-error {assignment to 'user_enum \(\*\)\[10\]' from incompatible pointer type 'struct s \*'} } */ ++ ue_array_ptr2 = &s; /* { dg-error {assignment to 'user_enum_copy \(\*\)\[10\]' {aka 'user_enum \(\*\)\[10\]'} from incompatible pointer type 'struct s \*'} } */ ++ ue_fn_ptr1 = &s; /* { dg-error {assignment to 'user_enum \(\*\)\(void\)' from incompatible pointer type 'struct s \*'} } */ ++ ue_fn_ptr2 = &s; /* { dg-error {assignment to 'void \(\*\)\(user_enum\)' from incompatible pointer type 'struct s \*'} } */ ++ ue_fn_ptr3 = &s; /* { dg-error {assignment to 'void \(\*\)\(user_enum, \.\.\.\)' from incompatible pointer type 'struct s \*'} } */ ++ ue_fn_ptr4 = &s; /* { dg-error {assignment to 'user_enum_copy \(\*\)\(void\)' {aka 'user_enum \(\*\)\(void\)'} from incompatible pointer type 'struct s \*'} } */ ++ ue_fn_ptr5 = &s; /* { dg-error {assignment to 'void \(\*\)\(user_enum_copy\)' {aka 'void \(\*\)\(user_enum\)'} from incompatible pointer type 'struct s \*'} } */ ++ ue_fn_ptr6 = &s; /* { dg-error {assignment to 'void \(\*\)\(user_enum_copy, \.\.\.\)' {aka 'void \(\*\)\(user_enum, \.\.\.\)'} from incompatible pointer type 'struct s \*'} } */ ++ unsafe_ue_fn_ptr1 = &s; /* { dg-error {assignment to 'user_enum \(__attribute__\(\(transaction_unsafe\)\) \*\)\(void\)' from incompatible pointer type 'struct s \*'} } */ ++ unsafe_ue_fn_ptr2 = &s; /* { dg-error {assignment to 'user_enum_copy \(__attribute__\(\(transaction_unsafe\)\) \*\)\(void\)' {aka 'user_enum \(__attribute__\(\(transaction_unsafe\)\) \*\)\(void\)'} from incompatible pointer type 'struct s \*'} } */ ++ ++ us1 = s; /* { dg-error {assigning to type 'user_struct' from type 'struct s'} } */ ++ us2 = s; /* { dg-error {assigning to type 'user_struct_copy' {aka 'user_struct'} from type 'struct s'} } */ ++ us_ptr1 = &s; /* { dg-error {assignment to 'user_struct_ptr' {aka 'user_struct \*'} from incompatible pointer type 'struct s \*'} } */ ++ us_ptr2 = &s; /* { dg-error {assignment to 'user_struct \*' from incompatible pointer type 'struct s \*'} } */ ++ const_us_ptr1 = &s; /* { dg-error {assignment to 'const user_struct \*' from incompatible pointer type 'struct s \*'} } */ ++ const_us_ptr2 = &s; /* { dg-error {assignment to 'const user_struct_copy \*' {aka 'const user_struct \*'} from incompatible pointer type 'struct s \*'} } */ ++ ++ uu1 = s; /* { dg-error {assigning to type 'user_union' from type 'struct s'} } */ ++ uu2 = s; /* { dg-error {assigning to type 'user_union_copy' {aka 'user_union'} from type 'struct s'} } */ ++ uu_ptr1 = &s; /* { dg-error {assignment to 'user_union_ptr' {aka 'user_union \*'} from incompatible pointer type 'struct s \*'} } */ ++ uu_ptr2 = &s; /* { dg-error {assignment to 'user_union \*' from incompatible pointer type 'struct s \*'} } */ ++ const_uu_ptr1 = &s; /* { dg-error {assignment to 'const user_union \*' from incompatible pointer type 'struct s \*'} } */ ++ const_uu_ptr2 = &s; /* { dg-error {assignment to 'const user_union_copy \*' {aka 'const user_union \*'} from incompatible pointer type 'struct s \*'} } */ ++ ++ uv1 = s; /* { dg-error {assigning to type 'user_vector' from type 'struct s'} } */ ++ uv2 = s; /* { dg-error {assigning to type 'user_vector_copy' {aka 'user_vector'} from type 'struct s'} } */ ++ uv_ptr1 = &s; /* { dg-error {assignment to 'user_vector_ptr' {aka 'user_vector \*'} from incompatible pointer type 'struct s \*'} } */ ++ uv_ptr2 = &s; /* { dg-error {assignment to 'user_vector \*' from incompatible pointer type 'struct s \*'} } */ ++ const_uv_ptr1 = &s; /* { dg-error {assignment to 'const user_vector \*' from incompatible pointer type 'struct s \*'} } */ ++ const_uv_ptr2 = &s; /* { dg-error {assignment to 'const user_vector_copy \*' {aka 'const user_vector \*'} from incompatible pointer type 'struct s \*'} } */ ++ ++ ui1 = s; /* { dg-error {assigning to type 'user_int' {aka 'int'} from type 'struct s'} } */ ++ ui2 = s; /* { dg-error {assigning to type 'user_int_copy' {aka 'int'} from type 'struct s'} } */ ++ ui_ptr1 = &s; /* { dg-error {assignment to 'user_int_ptr' {aka 'int \*'} from incompatible pointer type 'struct s \*'} } */ ++ ui_ptr2 = &s; /* { dg-error {assignment to 'user_int \*' {aka 'int \*'} from incompatible pointer type 'struct s \*'} } */ ++ const_ui_ptr1 = &s; /* { dg-error {assignment to 'const user_int \*' {aka 'const int \*'} from incompatible pointer type 'struct s \*'} } */ ++ const_ui_ptr2 = &s; /* { dg-error {assignment to 'const user_int_copy \*' {aka 'const int \*'} from incompatible pointer type 'struct s \*'} } */ ++ volatile_ui_ptr1 = &s; /* { dg-error {assignment to 'volatile user_int \*' {aka 'volatile int \*'} from incompatible pointer type 'struct s \*'} } */ ++ volatile_ui_ptr2 = &s; /* { dg-error {assignment to 'volatile user_int_copy \*' {aka 'volatile int \*'} from incompatible pointer type 'struct s \*'} } */ ++ atomic_ui_ptr1 = &s; /* { dg-error {assignment to '_Atomic user_int \*' {aka '_Atomic int \*'} from incompatible pointer type 'struct s \*'} } */ ++ atomic_ui_ptr2 = &s; /* { dg-error {assignment to '_Atomic user_int_copy \*' {aka '_Atomic int \*'} from incompatible pointer type 'struct s \*'} } */ ++ ui_array_ptr1 = &s; /* { dg-error {assignment to 'user_int \(\*\)\[10\]' {aka 'int \(\*\)\[10\]'} from incompatible pointer type 'struct s \*'} } */ ++ ui_array_ptr2 = &s; /* { dg-error {assignment to 'user_int_copy \(\*\)\[10\]' {aka 'int \(\*\)\[10\]'} from incompatible pointer type 'struct s \*'} } */ ++ ui_fn_ptr1 = &s; /* { dg-error {assignment to 'user_int \(\*\)\(void\)' {aka 'int \(\*\)\(void\)'} from incompatible pointer type 'struct s \*'} } */ ++ ui_fn_ptr2 = &s; /* { dg-error {assignment to 'void \(\*\)\(user_int\)' {aka 'void \(\*\)\(int\)'} from incompatible pointer type 'struct s \*'} } */ ++ ui_fn_ptr3 = &s; /* { dg-error {assignment to 'void \(\*\)\(user_int, \.\.\.\)' {aka 'void \(\*\)\(int, \.\.\.\)'} from incompatible pointer type 'struct s \*'} } */ ++ ui_fn_ptr4 = &s; /* { dg-error {assignment to 'user_int_copy \(\*\)\(void\)' {aka 'int \(\*\)\(void\)'} from incompatible pointer type 'struct s \*'} } */ ++ ui_fn_ptr5 = &s; /* { dg-error {assignment to 'void \(\*\)\(user_int_copy\)' {aka 'void \(\*\)\(int\)'} from incompatible pointer type 'struct s \*'} } */ ++ ui_fn_ptr6 = &s; /* { dg-error {assignment to 'void \(\*\)\(user_int_copy, \.\.\.\)' {aka 'void \(\*\)\(int, \.\.\.\)'} from incompatible pointer type 'struct s \*'} } */ ++ unsafe_ui_fn_ptr1 = &s; /* { dg-error {assignment to 'user_int \(__attribute__\(\(transaction_unsafe\)\) \*\)\(void\)' {aka 'int \(__attribute__\(\(transaction_unsafe\)\) \*\)\(void\)'} from incompatible pointer type 'struct s \*'} } */ ++ unsafe_ui_fn_ptr2 = &s; /* { dg-error {assignment to 'user_int_copy \(__attribute__\(\(transaction_unsafe\)\) \*\)\(void\)' {aka 'int \(__attribute__\(\(transaction_unsafe\)\) \*\)\(void\)'} from incompatible pointer type 'struct s \*'} } */ ++} +diff --git a/gcc/testsuite/gcc.dg/diag-aka-5b.c b/gcc/testsuite/gcc.dg/diag-aka-5b.c +new file mode 100644 +index 000000000..f510d0d40 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/diag-aka-5b.c +@@ -0,0 +1,134 @@ ++#include "diag-aka-5.h" ++ ++typedef user_enum user_enum_copy; ++ ++struct s { int i; }; ++ ++user_enum ue1; ++user_enum_copy ue2; ++user_enum_ptr ue_ptr1; ++user_enum *ue_ptr2; ++const user_enum *const_ue_ptr1; ++const user_enum_copy *const_ue_ptr2; ++volatile user_enum *volatile_ue_ptr1; ++volatile user_enum_copy *volatile_ue_ptr2; ++__extension__ _Atomic user_enum *atomic_ue_ptr1; ++__extension__ _Atomic user_enum_copy *atomic_ue_ptr2; ++user_enum (*ue_array_ptr1)[10]; ++user_enum_copy (*ue_array_ptr2)[10]; ++user_enum (*ue_fn_ptr1) (void); ++void (*ue_fn_ptr2) (user_enum); ++void (*ue_fn_ptr3) (user_enum, ...); ++user_enum_copy (*ue_fn_ptr4) (void); ++void (*ue_fn_ptr5) (user_enum_copy); ++void (*ue_fn_ptr6) (user_enum_copy, ...); ++user_enum (*__attribute__((__transaction_unsafe__)) unsafe_ue_fn_ptr1) (void); ++user_enum_copy (*__attribute__((__transaction_unsafe__)) unsafe_ue_fn_ptr2) (void); ++ ++user_struct us1; ++user_struct_copy us2; ++user_struct_ptr us_ptr1; ++user_struct *us_ptr2; ++const user_struct *const_us_ptr1; ++const user_struct_copy *const_us_ptr2; ++ ++user_union uu1; ++user_union_copy uu2; ++user_union_ptr uu_ptr1; ++user_union *uu_ptr2; ++const user_union *const_uu_ptr1; ++const user_union_copy *const_uu_ptr2; ++ ++user_vector uv1; ++user_vector_copy uv2; ++user_vector_ptr uv_ptr1; ++user_vector *uv_ptr2; ++const user_vector *const_uv_ptr1; ++const user_vector_copy *const_uv_ptr2; ++ ++user_int ui1; ++user_int_copy ui2; ++user_int_ptr ui_ptr1; ++user_int *ui_ptr2; ++const user_int *const_ui_ptr1; ++const user_int_copy *const_ui_ptr2; ++volatile user_int *volatile_ui_ptr1; ++volatile user_int_copy *volatile_ui_ptr2; ++__extension__ _Atomic user_int *atomic_ui_ptr1; ++__extension__ _Atomic user_int_copy *atomic_ui_ptr2; ++user_int (*ui_array_ptr1)[10]; ++user_int_copy (*ui_array_ptr2)[10]; ++user_int (*ui_fn_ptr1) (void); ++void (*ui_fn_ptr2) (user_int); ++void (*ui_fn_ptr3) (user_int, ...); ++user_int_copy (*ui_fn_ptr4) (void); ++void (*ui_fn_ptr5) (user_int_copy); ++void (*ui_fn_ptr6) (user_int_copy, ...); ++user_int (*__attribute__((__transaction_unsafe__)) unsafe_ui_fn_ptr1) (void); ++user_int_copy (*__attribute__((__transaction_unsafe__)) unsafe_ui_fn_ptr2) (void); ++ ++void f (struct s s) ++{ ++ ue1 = s; /* { dg-error {assigning to type 'user_enum' {aka 'enum __internal_enum'} from type 'struct s'} } */ ++ ue2 = s; /* { dg-error {assigning to type 'user_enum_copy' {aka 'enum __internal_enum'} from type 'struct s'} } */ ++ ue_ptr1 = &s; /* { dg-error {assignment to 'user_enum_ptr' {aka 'enum __internal_enum \*'} from incompatible pointer type 'struct s \*'} } */ ++ ue_ptr2 = &s; /* { dg-error {assignment to 'user_enum \*' {aka 'enum __internal_enum \*'} from incompatible pointer type 'struct s \*'} } */ ++ const_ue_ptr1 = &s; /* { dg-error {assignment to 'const user_enum \*' {aka 'const enum __internal_enum \*'} from incompatible pointer type 'struct s \*'} } */ ++ const_ue_ptr2 = &s; /* { dg-error {assignment to 'const user_enum_copy \*' {aka 'const enum __internal_enum \*'} from incompatible pointer type 'struct s \*'} } */ ++ volatile_ue_ptr1 = &s; /* { dg-error {assignment to 'volatile user_enum \*' {aka 'volatile enum __internal_enum \*'} from incompatible pointer type 'struct s \*'} } */ ++ volatile_ue_ptr2 = &s; /* { dg-error {assignment to 'volatile user_enum_copy \*' {aka 'volatile enum __internal_enum \*'} from incompatible pointer type 'struct s \*'} } */ ++ atomic_ue_ptr1 = &s; /* { dg-error {assignment to '_Atomic user_enum \*' {aka '_Atomic enum __internal_enum \*'} from incompatible pointer type 'struct s \*'} } */ ++ atomic_ue_ptr2 = &s; /* { dg-error {assignment to '_Atomic user_enum_copy \*' {aka '_Atomic enum __internal_enum \*'} from incompatible pointer type 'struct s \*'} } */ ++ ue_array_ptr1 = &s; /* { dg-error {assignment to 'user_enum \(\*\)\[10\]' {aka 'enum __internal_enum \(\*\)\[10\]'} from incompatible pointer type 'struct s \*'} } */ ++ ue_array_ptr2 = &s; /* { dg-error {assignment to 'user_enum_copy \(\*\)\[10\]' {aka 'enum __internal_enum \(\*\)\[10\]'} from incompatible pointer type 'struct s \*'} } */ ++ ue_fn_ptr1 = &s; /* { dg-error {assignment to 'user_enum \(\*\)\(void\)' {aka 'enum __internal_enum \(\*\)\(void\)'} from incompatible pointer type 'struct s \*'} } */ ++ ue_fn_ptr2 = &s; /* { dg-error {assignment to 'void \(\*\)\(user_enum\)' {aka 'void \(\*\)\(enum __internal_enum\)'} from incompatible pointer type 'struct s \*'} } */ ++ ue_fn_ptr3 = &s; /* { dg-error {assignment to 'void \(\*\)\(user_enum, \.\.\.\)' {aka 'void \(\*\)\(enum __internal_enum, \.\.\.\)'} from incompatible pointer type 'struct s \*'} } */ ++ ue_fn_ptr4 = &s; /* { dg-error {assignment to 'user_enum_copy \(\*\)\(void\)' {aka 'enum __internal_enum \(\*\)\(void\)'} from incompatible pointer type 'struct s \*'} } */ ++ ue_fn_ptr5 = &s; /* { dg-error {assignment to 'void \(\*\)\(user_enum_copy\)' {aka 'void \(\*\)\(enum __internal_enum\)'} from incompatible pointer type 'struct s \*'} } */ ++ ue_fn_ptr6 = &s; /* { dg-error {assignment to 'void \(\*\)\(user_enum_copy, \.\.\.\)' {aka 'void \(\*\)\(enum __internal_enum, \.\.\.\)'} from incompatible pointer type 'struct s \*'} } */ ++ unsafe_ue_fn_ptr1 = &s; /* { dg-error {assignment to 'user_enum \(__attribute__\(\(transaction_unsafe\)\) \*\)\(void\)' {aka 'enum __internal_enum \(__attribute__\(\(transaction_unsafe\)\) \*\)\(void\)'} from incompatible pointer type 'struct s \*'} } */ ++ unsafe_ue_fn_ptr2 = &s; /* { dg-error {assignment to 'user_enum_copy \(__attribute__\(\(transaction_unsafe\)\) \*\)\(void\)' {aka 'enum __internal_enum \(__attribute__\(\(transaction_unsafe\)\) \*\)\(void\)'} from incompatible pointer type 'struct s \*'} } */ ++ ++ us1 = s; /* { dg-error {assigning to type 'user_struct' {aka 'struct __internal_struct'} from type 'struct s'} } */ ++ us2 = s; /* { dg-error {assigning to type 'user_struct_copy' {aka 'struct __internal_struct'} from type 'struct s'} } */ ++ us_ptr1 = &s; /* { dg-error {assignment to 'user_struct_ptr' {aka 'struct __internal_struct \*'} from incompatible pointer type 'struct s \*'} } */ ++ us_ptr2 = &s; /* { dg-error {assignment to 'user_struct \*' {aka 'struct __internal_struct \*'} from incompatible pointer type 'struct s \*'} } */ ++ const_us_ptr1 = &s; /* { dg-error {assignment to 'const user_struct \*' {aka 'const struct __internal_struct \*'} from incompatible pointer type 'struct s \*'} } */ ++ const_us_ptr2 = &s; /* { dg-error {assignment to 'const user_struct_copy \*' {aka 'const struct __internal_struct \*'} from incompatible pointer type 'struct s \*'} } */ ++ ++ uu1 = s; /* { dg-error {assigning to type 'user_union' {aka 'union __internal_union'} from type 'struct s'} } */ ++ uu2 = s; /* { dg-error {assigning to type 'user_union_copy' {aka 'union __internal_union'} from type 'struct s'} } */ ++ uu_ptr1 = &s; /* { dg-error {assignment to 'user_union_ptr' {aka 'union __internal_union \*'} from incompatible pointer type 'struct s \*'} } */ ++ uu_ptr2 = &s; /* { dg-error {assignment to 'user_union \*' {aka 'union __internal_union \*'} from incompatible pointer type 'struct s \*'} } */ ++ const_uu_ptr1 = &s; /* { dg-error {assignment to 'const user_union \*' {aka 'const union __internal_union \*'} from incompatible pointer type 'struct s \*'} } */ ++ const_uu_ptr2 = &s; /* { dg-error {assignment to 'const user_union_copy \*' {aka 'const union __internal_union \*'} from incompatible pointer type 'struct s \*'} } */ ++ ++ uv1 = s; /* { dg-error {assigning to type 'user_vector' {aka '__vector\([48]\) unsigned int'} from type 'struct s'} } */ ++ uv2 = s; /* { dg-error {assigning to type 'user_vector_copy' {aka '__vector\([48]\) unsigned int'} from type 'struct s'} } */ ++ uv_ptr1 = &s; /* { dg-error {assignment to 'user_vector_ptr' {aka '__vector\([48]\) unsigned int \*'} from incompatible pointer type 'struct s \*'} } */ ++ uv_ptr2 = &s; /* { dg-error {assignment to 'user_vector \*' {aka '__vector\([48]\) unsigned int \*'} from incompatible pointer type 'struct s \*'} } */ ++ const_uv_ptr1 = &s; /* { dg-error {assignment to 'const user_vector \*' {aka 'const __vector\([48]\) unsigned int \*'} from incompatible pointer type 'struct s \*'} } */ ++ const_uv_ptr2 = &s; /* { dg-error {assignment to 'const user_vector_copy \*' {aka 'const __vector\([48]\) unsigned int \*'} from incompatible pointer type 'struct s \*'} } */ ++ ++ ui1 = s; /* { dg-error {assigning to type 'user_int' {aka 'int'} from type 'struct s'} } */ ++ ui2 = s; /* { dg-error {assigning to type 'user_int_copy' {aka 'int'} from type 'struct s'} } */ ++ ui_ptr1 = &s; /* { dg-error {assignment to 'user_int_ptr' {aka 'int \*'} from incompatible pointer type 'struct s \*'} } */ ++ ui_ptr2 = &s; /* { dg-error {assignment to 'user_int \*' {aka 'int \*'} from incompatible pointer type 'struct s \*'} } */ ++ const_ui_ptr1 = &s; /* { dg-error {assignment to 'const user_int \*' {aka 'const int \*'} from incompatible pointer type 'struct s \*'} } */ ++ const_ui_ptr2 = &s; /* { dg-error {assignment to 'const user_int_copy \*' {aka 'const int \*'} from incompatible pointer type 'struct s \*'} } */ ++ volatile_ui_ptr1 = &s; /* { dg-error {assignment to 'volatile user_int \*' {aka 'volatile int \*'} from incompatible pointer type 'struct s \*'} } */ ++ volatile_ui_ptr2 = &s; /* { dg-error {assignment to 'volatile user_int_copy \*' {aka 'volatile int \*'} from incompatible pointer type 'struct s \*'} } */ ++ atomic_ui_ptr1 = &s; /* { dg-error {assignment to '_Atomic user_int \*' {aka '_Atomic int \*'} from incompatible pointer type 'struct s \*'} } */ ++ atomic_ui_ptr2 = &s; /* { dg-error {assignment to '_Atomic user_int_copy \*' {aka '_Atomic int \*'} from incompatible pointer type 'struct s \*'} } */ ++ ui_array_ptr1 = &s; /* { dg-error {assignment to 'user_int \(\*\)\[10\]' {aka 'int \(\*\)\[10\]'} from incompatible pointer type 'struct s \*'} } */ ++ ui_array_ptr2 = &s; /* { dg-error {assignment to 'user_int_copy \(\*\)\[10\]' {aka 'int \(\*\)\[10\]'} from incompatible pointer type 'struct s \*'} } */ ++ ui_fn_ptr1 = &s; /* { dg-error {assignment to 'user_int \(\*\)\(void\)' {aka 'int \(\*\)\(void\)'} from incompatible pointer type 'struct s \*'} } */ ++ ui_fn_ptr2 = &s; /* { dg-error {assignment to 'void \(\*\)\(user_int\)' {aka 'void \(\*\)\(int\)'} from incompatible pointer type 'struct s \*'} } */ ++ ui_fn_ptr3 = &s; /* { dg-error {assignment to 'void \(\*\)\(user_int, \.\.\.\)' {aka 'void \(\*\)\(int, \.\.\.\)'} from incompatible pointer type 'struct s \*'} } */ ++ ui_fn_ptr4 = &s; /* { dg-error {assignment to 'user_int_copy \(\*\)\(void\)' {aka 'int \(\*\)\(void\)'} from incompatible pointer type 'struct s \*'} } */ ++ ui_fn_ptr5 = &s; /* { dg-error {assignment to 'void \(\*\)\(user_int_copy\)' {aka 'void \(\*\)\(int\)'} from incompatible pointer type 'struct s \*'} } */ ++ ui_fn_ptr6 = &s; /* { dg-error {assignment to 'void \(\*\)\(user_int_copy, \.\.\.\)' {aka 'void \(\*\)\(int, \.\.\.\)'} from incompatible pointer type 'struct s \*'} } */ ++ unsafe_ui_fn_ptr1 = &s; /* { dg-error {assignment to 'user_int \(__attribute__\(\(transaction_unsafe\)\) \*\)\(void\)' {aka 'int \(__attribute__\(\(transaction_unsafe\)\) \*\)\(void\)'} from incompatible pointer type 'struct s \*'} } */ ++ unsafe_ui_fn_ptr2 = &s; /* { dg-error {assignment to 'user_int_copy \(__attribute__\(\(transaction_unsafe\)\) \*\)\(void\)' {aka 'int \(__attribute__\(\(transaction_unsafe\)\) \*\)\(void\)'} from incompatible pointer type 'struct s \*'} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/diag_aka_1.c b/gcc/testsuite/gcc.target/aarch64/diag_aka_1.c +index 59e24f48b..98dffead6 100644 +--- a/gcc/testsuite/gcc.target/aarch64/diag_aka_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/diag_aka_1.c +@@ -8,7 +8,6 @@ void f (float x) + __Int8x8_t *ptr1 = &x; /* { dg-error {initialization of '__Int8x8_t \*' from incompatible pointer type 'float \*'} } */ + int8x8_t y2 = x; /* { dg-error {incompatible types when initializing type 'int8x8_t' using type 'float'} } */ + int8x8_t *ptr2 = &x; /* { dg-error {initialization of 'int8x8_t \*' from incompatible pointer type 'float \*'} } */ +- /* ??? For these it would be better to print an aka for 'int16x4_t'. */ +- myvec y3 = x; /* { dg-error {incompatible types when initializing type 'myvec' using type 'float'} } */ +- myvec *ptr3 = &x; /* { dg-error {initialization of 'myvec \*' from incompatible pointer type 'float \*'} } */ ++ myvec y3 = x; /* { dg-error {incompatible types when initializing type 'myvec' {aka 'int16x4_t'} using type 'float'} } */ ++ myvec *ptr3 = &x; /* { dg-error {initialization of 'myvec \*' {aka 'int16x4_t \*'} from incompatible pointer type 'float \*'} } */ + } +diff --git a/gcc/testsuite/gcc.target/aarch64/pr88838.c b/gcc/testsuite/gcc.target/aarch64/pr88838.c +new file mode 100644 +index 000000000..d7db84758 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/pr88838.c +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++/* { dg-options "-S -O3 -march=armv8.2-a+sve" } */ ++ ++void ++f (int *restrict x, int *restrict y, int *restrict z, int n) ++{ ++ for (int i = 0; i < n; i += 1) ++ x[i] = y[i] + z[i]; ++} ++ ++/* { dg-final { scan-assembler-not "sxtw" } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/while_1.c b/gcc/testsuite/gcc.target/aarch64/sve/while_1.c +index 2655c4242..2cfb3f697 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/while_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/while_1.c +@@ -26,14 +26,14 @@ + TEST_ALL (ADD_LOOP) + + /* { dg-final { scan-assembler-not {\tuqdec} } } */ +-/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b, xzr,} 2 } } */ +-/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b, x[0-9]+,} 2 } } */ +-/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h, xzr,} 2 } } */ +-/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h, x[0-9]+,} 2 } } */ +-/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s, xzr,} 3 } } */ +-/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s, x[0-9]+,} 3 } } */ +-/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d, xzr,} 3 } } */ +-/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d, x[0-9]+,} 3 } } */ ++/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b, wzr,} 2 } } */ ++/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b, w[0-9]+,} 2 } } */ ++/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h, wzr,} 2 } } */ ++/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h, w[0-9]+,} 2 } } */ ++/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s, wzr,} 3 } } */ ++/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s, w[0-9]+,} 3 } } */ ++/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d, wzr,} 3 } } */ ++/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d, w[0-9]+,} 3 } } */ + /* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.b, p[0-7]/z, \[x0, x[0-9]+\]\n} 2 } } */ + /* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b, p[0-7], \[x0, x[0-9]+\]\n} 2 } } */ + /* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.h, p[0-7]/z, \[x0, x[0-9]+, lsl 1\]\n} 2 } } */ +diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c +index d38b298aa..e51b95593 100644 +--- a/gcc/tree-vect-loop-manip.c ++++ b/gcc/tree-vect-loop-manip.c +@@ -423,6 +423,7 @@ vect_set_loop_masks_directly (struct loop *loop, loop_vec_info loop_vinfo, + bool might_wrap_p) + { + tree compare_type = LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo); ++ tree iv_type = LOOP_VINFO_MASK_IV_TYPE (loop_vinfo); + tree mask_type = rgm->mask_type; + unsigned int nscalars_per_iter = rgm->max_nscalars_per_iter; + poly_uint64 nscalars_per_mask = TYPE_VECTOR_SUBPARTS (mask_type); +@@ -453,11 +454,16 @@ vect_set_loop_masks_directly (struct loop *loop, loop_vec_info loop_vinfo, + tree index_before_incr, index_after_incr; + gimple_stmt_iterator incr_gsi; + bool insert_after; +- tree zero_index = build_int_cst (compare_type, 0); + standard_iv_increment_position (loop, &incr_gsi, &insert_after); +- create_iv (zero_index, nscalars_step, NULL_TREE, loop, &incr_gsi, ++ ++ tree zero_index = build_int_cst (iv_type, 0); ++ tree step = build_int_cst (iv_type, ++ LOOP_VINFO_VECT_FACTOR (loop_vinfo)); ++ /* Create IV of iv_type. */ ++ create_iv (zero_index, step, NULL_TREE, loop, &incr_gsi, + insert_after, &index_before_incr, &index_after_incr); + ++ zero_index = build_int_cst (compare_type, 0); + tree test_index, test_limit, first_limit; + gimple_stmt_iterator *test_gsi; + if (might_wrap_p) +@@ -537,6 +543,10 @@ vect_set_loop_masks_directly (struct loop *loop, loop_vec_info loop_vinfo, + tree next_mask = NULL_TREE; + tree mask; + unsigned int i; ++ gimple_seq test_seq = NULL; ++ test_index = gimple_convert (&test_seq, compare_type, test_index); ++ gsi_insert_seq_before (test_gsi, test_seq, GSI_SAME_STMT); ++ + FOR_EACH_VEC_ELT_REVERSE (rgm->masks, i, mask) + { + /* Previous masks will cover BIAS scalars. This mask covers the +@@ -645,12 +655,12 @@ vect_set_loop_condition_masked (struct loop *loop, loop_vec_info loop_vinfo, + + tree compare_type = LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo); + unsigned int compare_precision = TYPE_PRECISION (compare_type); +- unsigned HOST_WIDE_INT max_vf = vect_max_vf (loop_vinfo); + tree orig_niters = niters; + + /* Type of the initial value of NITERS. */ + tree ni_actual_type = TREE_TYPE (niters); + unsigned int ni_actual_precision = TYPE_PRECISION (ni_actual_type); ++ tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo); + + /* Convert NITERS to the same size as the compare. */ + if (compare_precision > ni_actual_precision +@@ -669,33 +679,7 @@ vect_set_loop_condition_masked (struct loop *loop, loop_vec_info loop_vinfo, + else + niters = gimple_convert (&preheader_seq, compare_type, niters); + +- /* Convert skip_niters to the right type. */ +- tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo); +- +- /* Now calculate the value that the induction variable must be able +- to hit in order to ensure that we end the loop with an all-false mask. +- This involves adding the maximum number of inactive trailing scalar +- iterations. */ +- widest_int iv_limit; +- bool known_max_iters = max_loop_iterations (loop, &iv_limit); +- if (known_max_iters) +- { +- if (niters_skip) +- { +- /* Add the maximum number of skipped iterations to the +- maximum iteration count. */ +- if (TREE_CODE (niters_skip) == INTEGER_CST) +- iv_limit += wi::to_widest (niters_skip); +- else +- iv_limit += max_vf - 1; +- } +- /* IV_LIMIT is the maximum number of latch iterations, which is also +- the maximum in-range IV value. Round this value down to the previous +- vector alignment boundary and then add an extra full iteration. */ +- poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); +- iv_limit = (iv_limit & -(int) known_alignment (vf)) + max_vf; +- } +- ++ widest_int iv_limit = vect_iv_limit_for_full_masking (loop_vinfo); + /* Get the vectorization factor in tree form. */ + tree vf = build_int_cst (compare_type, + LOOP_VINFO_VECT_FACTOR (loop_vinfo)); +@@ -725,7 +709,7 @@ vect_set_loop_condition_masked (struct loop *loop, loop_vec_info loop_vinfo, + /* See whether zero-based IV would ever generate all-false masks + before wrapping around. */ + bool might_wrap_p +- = (!known_max_iters ++ = (iv_limit == -1 + || (wi::min_precision (iv_limit * rgm->max_nscalars_per_iter, + UNSIGNED) + > compare_precision)); +diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c +index 16d7d7788..e98bf2c15 100644 +--- a/gcc/tree-vect-loop.c ++++ b/gcc/tree-vect-loop.c +@@ -1038,6 +1038,8 @@ vect_verify_full_masking (loop_vec_info loop_vinfo) + { + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + unsigned int min_ni_width; ++ unsigned int max_nscalars_per_iter ++ = vect_get_max_nscalars_per_iter (loop_vinfo); + + /* Use a normal loop if there are no statements that need masking. + This only happens in rare degenerate cases: it means that the loop +@@ -1056,7 +1058,7 @@ vect_verify_full_masking (loop_vec_info loop_vinfo) + max_ni = wi::smin (max_ni, max_back_edges + 1); + + /* Account for rgroup masks, in which each bit is replicated N times. */ +- max_ni *= vect_get_max_nscalars_per_iter (loop_vinfo); ++ max_ni *= max_nscalars_per_iter; + + /* Work out how many bits we need to represent the limit. */ + min_ni_width = wi::min_precision (max_ni, UNSIGNED); +@@ -1064,6 +1066,14 @@ vect_verify_full_masking (loop_vec_info loop_vinfo) + /* Find a scalar mode for which WHILE_ULT is supported. */ + opt_scalar_int_mode cmp_mode_iter; + tree cmp_type = NULL_TREE; ++ tree iv_type = NULL_TREE; ++ widest_int iv_limit = vect_iv_limit_for_full_masking (loop_vinfo); ++ widest_int iv_precision = UINT_MAX; ++ ++ if (iv_limit != -1) ++ iv_precision = wi::min_precision (iv_limit * max_nscalars_per_iter, ++ UNSIGNED); ++ + FOR_EACH_MODE_IN_CLASS (cmp_mode_iter, MODE_INT) + { + unsigned int cmp_bits = GET_MODE_BITSIZE (cmp_mode_iter.require ()); +@@ -1075,10 +1085,32 @@ vect_verify_full_masking (loop_vec_info loop_vinfo) + && can_produce_all_loop_masks_p (loop_vinfo, this_type)) + { + /* Although we could stop as soon as we find a valid mode, +- it's often better to continue until we hit Pmode, since the +- operands to the WHILE are more likely to be reusable in +- address calculations. */ +- cmp_type = this_type; ++ there are at least two reasons why that's not always the ++ best choice: ++ ++ - An IV that's Pmode or wider is more likely to be reusable ++ in address calculations than an IV that's narrower than ++ Pmode. ++ ++ - Doing the comparison in IV_PRECISION or wider allows ++ a natural 0-based IV, whereas using a narrower comparison ++ type requires mitigations against wrap-around. ++ ++ Conversely, if the IV limit is variable, doing the comparison ++ in a wider type than the original type can introduce ++ unnecessary extensions, so picking the widest valid mode ++ is not always a good choice either. ++ ++ Here we prefer the first IV type that's Pmode or wider, ++ and the first comparison type that's IV_PRECISION or wider. ++ (The comparison type must be no wider than the IV type, ++ to avoid extensions in the vector loop.) ++ ++ ??? We might want to try continuing beyond Pmode for ILP32 ++ targets if CMP_BITS < IV_PRECISION. */ ++ iv_type = this_type; ++ if (!cmp_type || iv_precision > TYPE_PRECISION (cmp_type)) ++ cmp_type = this_type; + if (cmp_bits >= GET_MODE_BITSIZE (Pmode)) + break; + } +@@ -1089,6 +1121,7 @@ vect_verify_full_masking (loop_vec_info loop_vinfo) + return false; + + LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo) = cmp_type; ++ LOOP_VINFO_MASK_IV_TYPE (loop_vinfo) = iv_type; + return true; + } + +@@ -9080,3 +9113,45 @@ optimize_mask_stores (struct loop *loop) + add_phi_arg (phi, gimple_vuse (last_store), e, UNKNOWN_LOCATION); + } + } ++ ++/* Decide whether it is possible to use a zero-based induction variable ++ when vectorizing LOOP_VINFO with a fully-masked loop. If it is, ++ return the value that the induction variable must be able to hold ++ in order to ensure that the loop ends with an all-false mask. ++ Return -1 otherwise. */ ++widest_int ++vect_iv_limit_for_full_masking (loop_vec_info loop_vinfo) ++{ ++ tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo); ++ struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); ++ unsigned HOST_WIDE_INT max_vf = vect_max_vf (loop_vinfo); ++ ++ /* Calculate the value that the induction variable must be able ++ to hit in order to ensure that we end the loop with an all-false mask. ++ This involves adding the maximum number of inactive trailing scalar ++ iterations. */ ++ widest_int iv_limit = -1; ++ if (max_loop_iterations (loop, &iv_limit)) ++ { ++ if (niters_skip) ++ { ++ /* Add the maximum number of skipped iterations to the ++ maximum iteration count. */ ++ if (TREE_CODE (niters_skip) == INTEGER_CST) ++ iv_limit += wi::to_widest (niters_skip); ++ else ++ iv_limit += max_vf - 1; ++ } ++ else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)) ++ /* Make a conservatively-correct assumption. */ ++ iv_limit += max_vf - 1; ++ ++ /* IV_LIMIT is the maximum number of latch iterations, which is also ++ the maximum in-range IV value. Round this value down to the previous ++ vector alignment boundary and then add an extra full iteration. */ ++ poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); ++ iv_limit = (iv_limit & -(int) known_alignment (vf)) + max_vf; ++ } ++ return iv_limit; ++} ++ +diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h +index 34ba49f4d..fae4df52d 100644 +--- a/gcc/tree-vectorizer.h ++++ b/gcc/tree-vectorizer.h +@@ -529,6 +529,10 @@ typedef struct _loop_vec_info : public vec_info { + is false and vectorized loop otherwise. */ + tree simd_if_cond; + ++ /* Type of the IV to use in the WHILE_ULT call for fully-masked ++ loops. */ ++ tree iv_type; ++ + /* Unknown DRs according to which loop was peeled. */ + struct dr_vec_info *unaligned_dr; + +@@ -675,6 +679,7 @@ typedef struct _loop_vec_info : public vec_info { + #define LOOP_VINFO_MASKS(L) (L)->masks + #define LOOP_VINFO_MASK_SKIP_NITERS(L) (L)->mask_skip_niters + #define LOOP_VINFO_MASK_COMPARE_TYPE(L) (L)->mask_compare_type ++#define LOOP_VINFO_MASK_IV_TYPE(L) (L)->iv_type + #define LOOP_VINFO_PTR_MASK(L) (L)->ptr_mask + #define LOOP_VINFO_LOOP_NEST(L) (L)->shared->loop_nest + #define LOOP_VINFO_DATAREFS(L) (L)->shared->datarefs +@@ -1720,6 +1725,7 @@ extern tree vect_create_addr_base_for_vector_ref (stmt_vec_info, gimple_seq *, + /* In tree-vect-loop.c. */ + /* Used in tree-vect-loop-manip.c */ + extern void determine_peel_for_niter (loop_vec_info); ++extern widest_int vect_iv_limit_for_full_masking (loop_vec_info loop_vinfo); + /* Used in gimple-loop-interchange.c and tree-parloops.c. */ + extern bool check_reduction_path (dump_user_location_t, loop_p, gphi *, tree, + enum tree_code); diff --git a/aarch64-Fix-mismatched-SVE-predicate-modes.patch b/aarch64-Fix-mismatched-SVE-predicate-modes.patch new file mode 100644 index 0000000..8bb66d9 --- /dev/null +++ b/aarch64-Fix-mismatched-SVE-predicate-modes.patch @@ -0,0 +1,34 @@ +This backport contains 1 patch from gcc main stream tree. +The commit id of these patchs list as following in the order of time. + +0001-aarch64-Fix-mismatched-SVE-predicate-modes.patch +26bebf576ddcdcfb596f07e8c2896f17c48516e7 + +diff -urpN a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c +--- a/gcc/config/aarch64/aarch64.c 2020-12-14 00:57:20.128000000 -0500 ++++ b/gcc/config/aarch64/aarch64.c 2020-12-14 01:00:15.080000000 -0500 +@@ -4328,6 +4328,7 @@ aarch64_expand_sve_const_pred_eor (rtx t + /* EOR the result with an ELT_SIZE PTRUE. */ + rtx mask = aarch64_ptrue_all (elt_size); + mask = force_reg (VNx16BImode, mask); ++ inv = gen_lowpart (VNx16BImode, inv); + target = aarch64_target_reg (target, VNx16BImode); + emit_insn (gen_aarch64_pred_z (XOR, VNx16BImode, target, mask, inv, mask)); + return target; +diff -urpN a/gcc/testsuite/gcc.dg/vect/pr94606.c b/gcc/testsuite/gcc.dg/vect/pr94606.c +--- a/gcc/testsuite/gcc.dg/vect/pr94606.c 1969-12-31 19:00:00.000000000 -0500 ++++ b/gcc/testsuite/gcc.dg/vect/pr94606.c 2020-12-14 01:00:15.080000000 -0500 +@@ -0,0 +1,13 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-march=armv8.2-a+sve -msve-vector-bits=256" { target aarch64*-*-* } } */ ++ ++const short mask[] = { 0, 0, 0, 0, 0, 0, 0, 0, ++ 0, 0, 0, 1, 1, 1, 1, 1 }; ++ ++int ++foo (short *restrict x, short *restrict y) ++{ ++ for (int i = 0; i < 16; ++i) ++ if (mask[i]) ++ x[i] += y[i]; ++} diff --git a/aarch64-fix-sve-acle-error.patch b/aarch64-fix-sve-acle-error.patch new file mode 100644 index 0000000..237093a --- /dev/null +++ b/aarch64-fix-sve-acle-error.patch @@ -0,0 +1,2128 @@ +This backport contains 4 patchs from gcc main stream tree. +The commit id of these patchs list as following in the order of time. + +0001-AArch64-Fix-build-for-non-default-languages.patch +6ff0cdebb1bc281ba2374f3ecdbe358c4fa74093 + +0002-C-Opt-out-of-GNU-vector-extensions-for-built-in-SVE-.patch +f486280c53be53136f0bb9b578f43dc6c9c5acea + +0003-C-Add-a-target-hook-that-allows-targets-to-verify-ty.patch +65ef05d0b7fb429c5760189e638c441dc3da33f4 + +0004-AArch64-Run-general-SVE-ACLE-tests-for-C.patch +6da4c454acee4dac53c4c549fa1caeb73fe1f82b + +diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c +index 2c10743b9..50423ec0f 100644 +--- a/gcc/c-family/c-common.c ++++ b/gcc/c-family/c-common.c +@@ -1012,7 +1012,8 @@ c_build_vec_perm_expr (location_t loc, tree v0, tree v1, tree mask, + || mask == error_mark_node) + return error_mark_node; + +- if (!VECTOR_INTEGER_TYPE_P (TREE_TYPE (mask))) ++ if (!gnu_vector_type_p (TREE_TYPE (mask)) ++ || !VECTOR_INTEGER_TYPE_P (TREE_TYPE (mask))) + { + if (complain) + error_at (loc, "%<__builtin_shuffle%> last argument must " +@@ -1020,8 +1021,8 @@ c_build_vec_perm_expr (location_t loc, tree v0, tree v1, tree mask, + return error_mark_node; + } + +- if (!VECTOR_TYPE_P (TREE_TYPE (v0)) +- || !VECTOR_TYPE_P (TREE_TYPE (v1))) ++ if (!gnu_vector_type_p (TREE_TYPE (v0)) ++ || !gnu_vector_type_p (TREE_TYPE (v1))) + { + if (complain) + error_at (loc, "%<__builtin_shuffle%> arguments must be vectors"); +@@ -1096,8 +1097,9 @@ c_build_vec_convert (location_t loc1, tree expr, location_t loc2, tree type, + if (error_operand_p (expr)) + return error_mark_node; + +- if (!VECTOR_INTEGER_TYPE_P (TREE_TYPE (expr)) +- && !VECTOR_FLOAT_TYPE_P (TREE_TYPE (expr))) ++ if (!gnu_vector_type_p (TREE_TYPE (expr)) ++ || (!VECTOR_INTEGER_TYPE_P (TREE_TYPE (expr)) ++ && !VECTOR_FLOAT_TYPE_P (TREE_TYPE (expr)))) + { + if (complain) + error_at (loc1, "%<__builtin_convertvector%> first argument must " +@@ -1105,7 +1107,8 @@ c_build_vec_convert (location_t loc1, tree expr, location_t loc2, tree type, + return error_mark_node; + } + +- if (!VECTOR_INTEGER_TYPE_P (type) && !VECTOR_FLOAT_TYPE_P (type)) ++ if (!gnu_vector_type_p (type) ++ || (!VECTOR_INTEGER_TYPE_P (type) && !VECTOR_FLOAT_TYPE_P (type))) + { + if (complain) + error_at (loc2, "%<__builtin_convertvector%> second argument must " +@@ -3128,6 +3131,9 @@ pointer_int_sum (location_t loc, enum tree_code resultcode, + return error_mark_node; + size_exp = integer_one_node; + } ++ else if (!verify_type_context (loc, TCTX_POINTER_ARITH, ++ TREE_TYPE (result_type))) ++ size_exp = integer_one_node; + else + size_exp = size_in_bytes_loc (loc, TREE_TYPE (result_type)); + +@@ -3673,6 +3679,13 @@ c_sizeof_or_alignof_type (location_t loc, + "incomplete element type", op_name, type); + return error_mark_node; + } ++ else if (!verify_type_context (loc, is_sizeof ? TCTX_SIZEOF : TCTX_ALIGNOF, ++ type, !complain)) ++ { ++ if (!complain) ++ return error_mark_node; ++ value = size_one_node; ++ } + else + { + if (is_sizeof) +@@ -3705,7 +3718,10 @@ c_alignof_expr (location_t loc, tree expr) + { + tree t; + +- if (VAR_OR_FUNCTION_DECL_P (expr)) ++ if (!verify_type_context (loc, TCTX_ALIGNOF, TREE_TYPE (expr))) ++ t = size_one_node; ++ ++ else if (VAR_OR_FUNCTION_DECL_P (expr)) + t = size_int (DECL_ALIGN_UNIT (expr)); + + else if (TREE_CODE (expr) == COMPONENT_REF +@@ -7994,7 +8010,7 @@ convert_vector_to_array_for_subscript (location_t loc, + tree *vecp, tree index) + { + bool ret = false; +- if (VECTOR_TYPE_P (TREE_TYPE (*vecp))) ++ if (gnu_vector_type_p (TREE_TYPE (*vecp))) + { + tree type = TREE_TYPE (*vecp); + +@@ -8030,7 +8046,7 @@ scalar_to_vector (location_t loc, enum tree_code code, tree op0, tree op1, + bool integer_only_op = false; + enum stv_conv ret = stv_firstarg; + +- gcc_assert (VECTOR_TYPE_P (type0) || VECTOR_TYPE_P (type1)); ++ gcc_assert (gnu_vector_type_p (type0) || gnu_vector_type_p (type1)); + switch (code) + { + /* Most GENERIC binary expressions require homogeneous arguments. +@@ -8081,7 +8097,7 @@ scalar_to_vector (location_t loc, enum tree_code code, tree op0, tree op1, + case LT_EXPR: + case GT_EXPR: + /* What about UNLT_EXPR? */ +- if (VECTOR_TYPE_P (type0)) ++ if (gnu_vector_type_p (type0)) + { + ret = stv_secondarg; + std::swap (type0, type1); +diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h +index 73ce7c5df..2a9008af4 100644 +--- a/gcc/c-family/c-common.h ++++ b/gcc/c-family/c-common.h +@@ -756,6 +756,16 @@ extern bool done_lexing; + #define C_TYPE_OBJECT_OR_INCOMPLETE_P(type) \ + (!C_TYPE_FUNCTION_P (type)) + ++/* Return true if TYPE is a vector type that should be subject to the GNU ++ vector extensions (as opposed to a vector type that is used only for ++ the purposes of defining target-specific built-in functions). */ ++ ++inline bool ++gnu_vector_type_p (const_tree type) ++{ ++ return TREE_CODE (type) == VECTOR_TYPE && !TYPE_INDIVISIBLE_P (type); ++} ++ + struct visibility_flags + { + unsigned inpragma : 1; /* True when in #pragma GCC visibility. */ +diff --git a/gcc/c/c-convert.c b/gcc/c/c-convert.c +index f0f846013..21b127d0d 100644 +--- a/gcc/c/c-convert.c ++++ b/gcc/c/c-convert.c +@@ -147,8 +147,20 @@ convert (tree type, tree expr) + goto maybe_fold; + + case VECTOR_TYPE: +- ret = convert_to_vector (type, e); +- goto maybe_fold; ++ if (gnu_vector_type_p (type) ++ || gnu_vector_type_p (TREE_TYPE (e)) ++ /* Allow conversions between compatible non-GNU vector types ++ when -flax-vector-conversions is passed. The whole purpose ++ of the option is to bend the normal type rules and accept ++ nonconforming code. */ ++ || (flag_lax_vector_conversions ++ && VECTOR_TYPE_P (TREE_TYPE (e)) ++ && vector_types_convertible_p (type, TREE_TYPE (e), false))) ++ { ++ ret = convert_to_vector (type, e); ++ goto maybe_fold; ++ } ++ break; + + case RECORD_TYPE: + case UNION_TYPE: +diff --git a/gcc/c/c-decl.c b/gcc/c/c-decl.c +index 288dbe9d9..bf88d3c7d 100644 +--- a/gcc/c/c-decl.c ++++ b/gcc/c/c-decl.c +@@ -4927,7 +4927,7 @@ start_decl (struct c_declarator *declarator, struct c_declspecs *declspecs, + { + /* A complete type is ok if size is fixed. */ + +- if (TREE_CODE (TYPE_SIZE (TREE_TYPE (decl))) != INTEGER_CST ++ if (!poly_int_tree_p (TYPE_SIZE (TREE_TYPE (decl))) + || C_DECL_VARIABLE_SIZE (decl)) + { + error ("variable-sized object may not be initialized"); +@@ -5210,6 +5210,15 @@ finish_decl (tree decl, location_t init_loc, tree init, + + complete_flexible_array_elts (DECL_INITIAL (decl)); + ++ if (is_global_var (decl)) ++ { ++ type_context_kind context = (DECL_THREAD_LOCAL_P (decl) ++ ? TCTX_THREAD_STORAGE ++ : TCTX_STATIC_STORAGE); ++ if (!verify_type_context (input_location, context, TREE_TYPE (decl))) ++ TREE_TYPE (decl) = error_mark_node; ++ } ++ + if (DECL_SIZE (decl) == NULL_TREE && TREE_TYPE (decl) != error_mark_node + && COMPLETE_TYPE_P (TREE_TYPE (decl))) + layout_decl (decl, 0); +@@ -5239,7 +5248,9 @@ finish_decl (tree decl, location_t init_loc, tree init, + && TREE_STATIC (decl)) + incomplete_record_decls.safe_push (decl); + +- if (is_global_var (decl) && DECL_SIZE (decl) != NULL_TREE) ++ if (is_global_var (decl) ++ && DECL_SIZE (decl) != NULL_TREE ++ && TREE_TYPE (decl) != error_mark_node) + { + if (TREE_CODE (DECL_SIZE (decl)) == INTEGER_CST) + constant_expression_warning (DECL_SIZE (decl)); +@@ -5559,6 +5570,10 @@ build_compound_literal (location_t loc, tree type, tree init, bool non_const, + return error_mark_node; + } + ++ if (TREE_STATIC (decl) ++ && !verify_type_context (loc, TCTX_STATIC_STORAGE, type)) ++ return error_mark_node; ++ + stmt = build_stmt (DECL_SOURCE_LOCATION (decl), DECL_EXPR, decl); + complit = build1 (COMPOUND_LITERAL_EXPR, type, stmt); + TREE_SIDE_EFFECTS (complit) = 1; +@@ -6227,6 +6242,12 @@ grokdeclarator (const struct c_declarator *declarator, + if (type == error_mark_node) + continue; + ++ if (!verify_type_context (loc, TCTX_ARRAY_ELEMENT, type)) ++ { ++ type = error_mark_node; ++ continue; ++ } ++ + /* If size was specified, set ITYPE to a range-type for + that size. Otherwise, ITYPE remains null. finish_decl + may figure it out from an initial value. */ +@@ -7076,6 +7097,10 @@ grokdeclarator (const struct c_declarator *declarator, + if (orig_qual_indirect == 0) + orig_qual_type = NULL_TREE; + } ++ if (type != error_mark_node ++ && !verify_type_context (loc, TCTX_FIELD, type)) ++ type = error_mark_node; ++ + type = c_build_qualified_type (type, type_quals, orig_qual_type, + orig_qual_indirect); + decl = build_decl (declarator->id_loc, +diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c +index 87f4178ec..f456a66fb 100644 +--- a/gcc/c/c-typeck.c ++++ b/gcc/c/c-typeck.c +@@ -2609,7 +2609,7 @@ build_array_ref (location_t loc, tree array, tree index) + if (TREE_CODE (TREE_TYPE (array)) != ARRAY_TYPE + && TREE_CODE (TREE_TYPE (array)) != POINTER_TYPE + /* Allow vector[index] but not index[vector]. */ +- && !VECTOR_TYPE_P (TREE_TYPE (array))) ++ && !gnu_vector_type_p (TREE_TYPE (array))) + { + if (TREE_CODE (TREE_TYPE (index)) != ARRAY_TYPE + && TREE_CODE (TREE_TYPE (index)) != POINTER_TYPE) +@@ -3891,6 +3891,7 @@ pointer_diff (location_t loc, tree op0, tree op1, tree *instrument_expr) + addr_space_t as0 = TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (op0))); + addr_space_t as1 = TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (op1))); + tree target_type = TREE_TYPE (TREE_TYPE (op0)); ++ tree orig_op0 = op0; + tree orig_op1 = op1; + + /* If the operands point into different address spaces, we need to +@@ -3961,6 +3962,10 @@ pointer_diff (location_t loc, tree op0, tree op1, tree *instrument_expr) + /* This generates an error if op1 is pointer to incomplete type. */ + if (!COMPLETE_OR_VOID_TYPE_P (TREE_TYPE (TREE_TYPE (orig_op1)))) + error_at (loc, "arithmetic on pointer to an incomplete type"); ++ else if (verify_type_context (loc, TCTX_POINTER_ARITH, ++ TREE_TYPE (TREE_TYPE (orig_op0)))) ++ verify_type_context (loc, TCTX_POINTER_ARITH, ++ TREE_TYPE (TREE_TYPE (orig_op1))); + + op1 = c_size_in_bytes (target_type); + +@@ -4359,7 +4364,7 @@ build_unary_op (location_t location, enum tree_code code, tree xarg, + associativity, but won't generate any code. */ + if (!(typecode == INTEGER_TYPE || typecode == REAL_TYPE + || typecode == FIXED_POINT_TYPE || typecode == COMPLEX_TYPE +- || typecode == VECTOR_TYPE)) ++ || gnu_vector_type_p (TREE_TYPE (arg)))) + { + error_at (location, "wrong type argument to unary plus"); + return error_mark_node; +@@ -4372,7 +4377,7 @@ build_unary_op (location_t location, enum tree_code code, tree xarg, + case NEGATE_EXPR: + if (!(typecode == INTEGER_TYPE || typecode == REAL_TYPE + || typecode == FIXED_POINT_TYPE || typecode == COMPLEX_TYPE +- || typecode == VECTOR_TYPE)) ++ || gnu_vector_type_p (TREE_TYPE (arg)))) + { + error_at (location, "wrong type argument to unary minus"); + return error_mark_node; +@@ -4384,7 +4389,7 @@ build_unary_op (location_t location, enum tree_code code, tree xarg, + case BIT_NOT_EXPR: + /* ~ works on integer types and non float vectors. */ + if (typecode == INTEGER_TYPE +- || (typecode == VECTOR_TYPE ++ || (gnu_vector_type_p (TREE_TYPE (arg)) + && !VECTOR_FLOAT_TYPE_P (TREE_TYPE (arg)))) + { + tree e = arg; +@@ -4570,7 +4575,8 @@ build_unary_op (location_t location, enum tree_code code, tree xarg, + + if (typecode != POINTER_TYPE && typecode != FIXED_POINT_TYPE + && typecode != INTEGER_TYPE && typecode != REAL_TYPE +- && typecode != COMPLEX_TYPE && typecode != VECTOR_TYPE) ++ && typecode != COMPLEX_TYPE ++ && !gnu_vector_type_p (TREE_TYPE (arg))) + { + if (code == PREINCREMENT_EXPR || code == POSTINCREMENT_EXPR) + error_at (location, "wrong type argument to increment"); +@@ -4612,6 +4618,9 @@ build_unary_op (location_t location, enum tree_code code, tree xarg, + pedwarn (location, OPT_Wpointer_arith, + "wrong type argument to decrement"); + } ++ else ++ verify_type_context (location, TCTX_POINTER_ARITH, ++ TREE_TYPE (argtype)); + + inc = c_size_in_bytes (TREE_TYPE (argtype)); + inc = convert_to_ptrofftype_loc (location, inc); +@@ -7854,7 +7863,7 @@ digest_init (location_t init_loc, tree type, tree init, tree origtype, + TYPE_MAIN_VARIANT (type)) + || (code == ARRAY_TYPE + && comptypes (TREE_TYPE (inside_init), type)) +- || (code == VECTOR_TYPE ++ || (gnu_vector_type_p (type) + && comptypes (TREE_TYPE (inside_init), type)) + || (code == POINTER_TYPE + && TREE_CODE (TREE_TYPE (inside_init)) == ARRAY_TYPE +@@ -8352,7 +8361,7 @@ really_start_incremental_init (tree type) + + constructor_unfilled_index = constructor_index; + } +- else if (VECTOR_TYPE_P (constructor_type)) ++ else if (gnu_vector_type_p (constructor_type)) + { + /* Vectors are like simple fixed-size arrays. */ + constructor_max_index = +@@ -8526,7 +8535,7 @@ push_init_level (location_t loc, int implicit, + constructor_unfilled_fields = constructor_fields; + constructor_bit_index = bitsize_zero_node; + } +- else if (VECTOR_TYPE_P (constructor_type)) ++ else if (gnu_vector_type_p (constructor_type)) + { + /* Vectors are like simple fixed-size arrays. */ + constructor_max_index = +@@ -8715,7 +8724,7 @@ pop_init_level (location_t loc, int implicit, + ; + else if (!RECORD_OR_UNION_TYPE_P (constructor_type) + && TREE_CODE (constructor_type) != ARRAY_TYPE +- && !VECTOR_TYPE_P (constructor_type)) ++ && !gnu_vector_type_p (constructor_type)) + { + /* A nonincremental scalar initializer--just return + the element, after verifying there is just one. */ +@@ -9941,7 +9950,7 @@ process_init_element (location_t loc, struct c_expr value, bool implicit, + last_init_list_comma), + true, braced_init_obstack); + else if ((TREE_CODE (constructor_type) == ARRAY_TYPE +- || VECTOR_TYPE_P (constructor_type)) ++ || gnu_vector_type_p (constructor_type)) + && constructor_max_index + && tree_int_cst_lt (constructor_max_index, + constructor_index)) +@@ -10042,7 +10051,8 @@ process_init_element (location_t loc, struct c_expr value, bool implicit, + && value.value != error_mark_node + && TYPE_MAIN_VARIANT (TREE_TYPE (value.value)) != fieldtype + && (fieldcode == RECORD_TYPE || fieldcode == ARRAY_TYPE +- || fieldcode == UNION_TYPE || fieldcode == VECTOR_TYPE)) ++ || fieldcode == UNION_TYPE ++ || gnu_vector_type_p (fieldtype))) + { + push_init_level (loc, 1, braced_init_obstack); + continue; +@@ -10133,7 +10143,8 @@ process_init_element (location_t loc, struct c_expr value, bool implicit, + && value.value != error_mark_node + && TYPE_MAIN_VARIANT (TREE_TYPE (value.value)) != fieldtype + && (fieldcode == RECORD_TYPE || fieldcode == ARRAY_TYPE +- || fieldcode == UNION_TYPE || fieldcode == VECTOR_TYPE)) ++ || fieldcode == UNION_TYPE ++ || gnu_vector_type_p (fieldtype))) + { + push_init_level (loc, 1, braced_init_obstack); + continue; +@@ -10175,7 +10186,8 @@ process_init_element (location_t loc, struct c_expr value, bool implicit, + && value.value != error_mark_node + && TYPE_MAIN_VARIANT (TREE_TYPE (value.value)) != elttype + && (eltcode == RECORD_TYPE || eltcode == ARRAY_TYPE +- || eltcode == UNION_TYPE || eltcode == VECTOR_TYPE)) ++ || eltcode == UNION_TYPE ++ || gnu_vector_type_p (elttype))) + { + push_init_level (loc, 1, braced_init_obstack); + continue; +@@ -10211,7 +10223,7 @@ process_init_element (location_t loc, struct c_expr value, bool implicit, + constructor_unfilled_index. */ + constructor_unfilled_index = constructor_index; + } +- else if (VECTOR_TYPE_P (constructor_type)) ++ else if (gnu_vector_type_p (constructor_type)) + { + tree elttype = TYPE_MAIN_VARIANT (TREE_TYPE (constructor_type)); + +@@ -11555,7 +11567,8 @@ build_binary_op (location_t location, enum tree_code code, + + /* In case when one of the operands of the binary operation is + a vector and another is a scalar -- convert scalar to vector. */ +- if ((code0 == VECTOR_TYPE) != (code1 == VECTOR_TYPE)) ++ if ((gnu_vector_type_p (type0) && code1 != VECTOR_TYPE) ++ || (gnu_vector_type_p (type1) && code0 != VECTOR_TYPE)) + { + enum stv_conv convert_flag = scalar_to_vector (location, code, op0, op1, + true); +@@ -11650,10 +11663,12 @@ build_binary_op (location_t location, enum tree_code code, + + if ((code0 == INTEGER_TYPE || code0 == REAL_TYPE + || code0 == FIXED_POINT_TYPE +- || code0 == COMPLEX_TYPE || code0 == VECTOR_TYPE) ++ || code0 == COMPLEX_TYPE ++ || gnu_vector_type_p (type0)) + && (code1 == INTEGER_TYPE || code1 == REAL_TYPE + || code1 == FIXED_POINT_TYPE +- || code1 == COMPLEX_TYPE || code1 == VECTOR_TYPE)) ++ || code1 == COMPLEX_TYPE ++ || gnu_vector_type_p (type1))) + { + enum tree_code tcode0 = code0, tcode1 = code1; + +@@ -11684,8 +11699,8 @@ build_binary_op (location_t location, enum tree_code code, + if (code0 == INTEGER_TYPE && code1 == INTEGER_TYPE) + shorten = -1; + /* Allow vector types which are not floating point types. */ +- else if (code0 == VECTOR_TYPE +- && code1 == VECTOR_TYPE ++ else if (gnu_vector_type_p (type0) ++ && gnu_vector_type_p (type1) + && !VECTOR_FLOAT_TYPE_P (type0) + && !VECTOR_FLOAT_TYPE_P (type1)) + common = 1; +@@ -11696,7 +11711,8 @@ build_binary_op (location_t location, enum tree_code code, + doing_div_or_mod = true; + warn_for_div_by_zero (location, op1); + +- if (code0 == VECTOR_TYPE && code1 == VECTOR_TYPE ++ if (gnu_vector_type_p (type0) ++ && gnu_vector_type_p (type1) + && TREE_CODE (TREE_TYPE (type0)) == INTEGER_TYPE + && TREE_CODE (TREE_TYPE (type1)) == INTEGER_TYPE) + common = 1; +@@ -11775,7 +11791,8 @@ build_binary_op (location_t location, enum tree_code code, + Also set SHORT_SHIFT if shifting rightward. */ + + case RSHIFT_EXPR: +- if (code0 == VECTOR_TYPE && code1 == VECTOR_TYPE ++ if (gnu_vector_type_p (type0) ++ && gnu_vector_type_p (type1) + && TREE_CODE (TREE_TYPE (type0)) == INTEGER_TYPE + && TREE_CODE (TREE_TYPE (type1)) == INTEGER_TYPE + && known_eq (TYPE_VECTOR_SUBPARTS (type0), +@@ -11785,7 +11802,7 @@ build_binary_op (location_t location, enum tree_code code, + converted = 1; + } + else if ((code0 == INTEGER_TYPE || code0 == FIXED_POINT_TYPE +- || (code0 == VECTOR_TYPE ++ || (gnu_vector_type_p (type0) + && TREE_CODE (TREE_TYPE (type0)) == INTEGER_TYPE)) + && code1 == INTEGER_TYPE) + { +@@ -11834,7 +11851,8 @@ build_binary_op (location_t location, enum tree_code code, + break; + + case LSHIFT_EXPR: +- if (code0 == VECTOR_TYPE && code1 == VECTOR_TYPE ++ if (gnu_vector_type_p (type0) ++ && gnu_vector_type_p (type1) + && TREE_CODE (TREE_TYPE (type0)) == INTEGER_TYPE + && TREE_CODE (TREE_TYPE (type1)) == INTEGER_TYPE + && known_eq (TYPE_VECTOR_SUBPARTS (type0), +@@ -11844,7 +11862,7 @@ build_binary_op (location_t location, enum tree_code code, + converted = 1; + } + else if ((code0 == INTEGER_TYPE || code0 == FIXED_POINT_TYPE +- || (code0 == VECTOR_TYPE ++ || (gnu_vector_type_p (type0) + && TREE_CODE (TREE_TYPE (type0)) == INTEGER_TYPE)) + && code1 == INTEGER_TYPE) + { +@@ -11903,7 +11921,7 @@ build_binary_op (location_t location, enum tree_code code, + + case EQ_EXPR: + case NE_EXPR: +- if (code0 == VECTOR_TYPE && code1 == VECTOR_TYPE) ++ if (gnu_vector_type_p (type0) && gnu_vector_type_p (type1)) + { + tree intt; + if (!vector_types_compatible_elements_p (type0, type1)) +@@ -12071,7 +12089,7 @@ build_binary_op (location_t location, enum tree_code code, + case GE_EXPR: + case LT_EXPR: + case GT_EXPR: +- if (code0 == VECTOR_TYPE && code1 == VECTOR_TYPE) ++ if (gnu_vector_type_p (type0) && gnu_vector_type_p (type1)) + { + tree intt; + if (!vector_types_compatible_elements_p (type0, type1)) +@@ -12218,7 +12236,8 @@ build_binary_op (location_t location, enum tree_code code, + if (code0 == ERROR_MARK || code1 == ERROR_MARK) + return error_mark_node; + +- if (code0 == VECTOR_TYPE && code1 == VECTOR_TYPE ++ if (gnu_vector_type_p (type0) ++ && gnu_vector_type_p (type1) + && (!tree_int_cst_equal (TYPE_SIZE (type0), TYPE_SIZE (type1)) + || !vector_types_compatible_elements_p (type0, type1))) + { +@@ -12233,10 +12252,12 @@ build_binary_op (location_t location, enum tree_code code, + } + + if ((code0 == INTEGER_TYPE || code0 == REAL_TYPE || code0 == COMPLEX_TYPE +- || code0 == FIXED_POINT_TYPE || code0 == VECTOR_TYPE) ++ || code0 == FIXED_POINT_TYPE ++ || gnu_vector_type_p (type0)) + && + (code1 == INTEGER_TYPE || code1 == REAL_TYPE || code1 == COMPLEX_TYPE +- || code1 == FIXED_POINT_TYPE || code1 == VECTOR_TYPE)) ++ || code1 == FIXED_POINT_TYPE ++ || gnu_vector_type_p (type1))) + { + bool first_complex = (code0 == COMPLEX_TYPE); + bool second_complex = (code1 == COMPLEX_TYPE); +diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h +index a9123c858..51356da37 100644 +--- a/gcc/config/aarch64/aarch64-protos.h ++++ b/gcc/config/aarch64/aarch64-protos.h +@@ -717,6 +717,9 @@ namespace aarch64_sve { + tree, unsigned int, tree *); + gimple *gimple_fold_builtin (unsigned int, gimple_stmt_iterator *, gcall *); + rtx expand_builtin (unsigned int, tree, rtx); ++#ifdef GCC_TARGET_H ++ bool verify_type_context (location_t, type_context_kind, const_tree, bool); ++#endif + } + + extern void aarch64_split_combinev16qi (rtx operands[3]); +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc +index f830d9294..10595a5ab 100644 +--- a/gcc/config/aarch64/aarch64-sve-builtins.cc ++++ b/gcc/config/aarch64/aarch64-sve-builtins.cc +@@ -3248,8 +3248,10 @@ register_builtin_types () + BITS_PER_SVE_VECTOR)); + } + vectype = build_distinct_type_copy (vectype); ++ gcc_assert (vectype == TYPE_MAIN_VARIANT (vectype)); + SET_TYPE_STRUCTURAL_EQUALITY (vectype); + TYPE_ARTIFICIAL (vectype) = 1; ++ TYPE_INDIVISIBLE_P (vectype) = 1; + abi_vector_types[i] = vectype; + lang_hooks.types.register_builtin_type (vectype, + vector_types[i].abi_name); +@@ -3490,8 +3492,7 @@ bool + svbool_type_p (const_tree type) + { + tree abi_type = abi_vector_types[VECTOR_TYPE_svbool_t]; +- return (type != error_mark_node +- && TYPE_MAIN_VARIANT (type) == TYPE_MAIN_VARIANT (abi_type)); ++ return type != error_mark_node && TYPE_MAIN_VARIANT (type) == abi_type; + } + + /* If TYPE is a built-in type defined by the SVE ABI, return the mangled name, +@@ -3546,6 +3547,55 @@ builtin_type_p (const_tree type) + return svbool_type_p (type) || nvectors_if_data_type (type) > 0; + } + ++/* Implement TARGET_VERIFY_TYPE_CONTEXT for SVE types. */ ++bool ++verify_type_context (location_t loc, type_context_kind context, ++ const_tree type, bool silent_p) ++{ ++ if (!builtin_type_p (type)) ++ return true; ++ ++ switch (context) ++ { ++ case TCTX_SIZEOF: ++ case TCTX_STATIC_STORAGE: ++ if (!silent_p) ++ error_at (loc, "SVE type %qT does not have a fixed size", type); ++ return false; ++ ++ case TCTX_ALIGNOF: ++ if (!silent_p) ++ error_at (loc, "SVE type %qT does not have a defined alignment", type); ++ return false; ++ ++ case TCTX_THREAD_STORAGE: ++ if (!silent_p) ++ error_at (loc, "variables of type %qT cannot have thread-local" ++ " storage duration", type); ++ return false; ++ ++ case TCTX_POINTER_ARITH: ++ if (!silent_p) ++ error_at (loc, "arithmetic on pointer to SVE type %qT", type); ++ return false; ++ ++ case TCTX_FIELD: ++ if (silent_p) ++ ; ++ else if (lang_GNU_CXX ()) ++ error_at (loc, "member variables cannot have SVE type %qT", type); ++ else ++ error_at (loc, "fields cannot have SVE type %qT", type); ++ return false; ++ ++ case TCTX_ARRAY_ELEMENT: ++ if (!silent_p) ++ error_at (loc, "array elements cannot have SVE type %qT", type); ++ return false; ++ } ++ gcc_unreachable (); ++} ++ + } + + using namespace aarch64_sve; +diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c +index 3486cca89..c2ab7af56 100644 +--- a/gcc/config/aarch64/aarch64.c ++++ b/gcc/config/aarch64/aarch64.c +@@ -16201,6 +16201,15 @@ aarch64_mangle_type (const_tree type) + return NULL; + } + ++/* Implement TARGET_VERIFY_TYPE_CONTEXT. */ ++ ++static bool ++aarch64_verify_type_context (location_t loc, type_context_kind context, ++ const_tree type, bool silent_p) ++{ ++ return aarch64_sve::verify_type_context (loc, context, type, silent_p); ++} ++ + /* Find the first rtx_insn before insn that will generate an assembly + instruction. */ + +@@ -21967,6 +21976,9 @@ aarch64_libgcc_floating_mode_supported_p + #undef TARGET_MANGLE_TYPE + #define TARGET_MANGLE_TYPE aarch64_mangle_type + ++#undef TARGET_VERIFY_TYPE_CONTEXT ++#define TARGET_VERIFY_TYPE_CONTEXT aarch64_verify_type_context ++ + #undef TARGET_INVALID_CONVERSION + #define TARGET_INVALID_CONVERSION aarch64_invalid_conversion + +diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi +index 3f22bb1f6..220bbe7dd 100644 +--- a/gcc/doc/tm.texi ++++ b/gcc/doc/tm.texi +@@ -11963,6 +11963,19 @@ conversion rules. + This is currently used only by the C and C++ front ends. + @end deftypefn + ++@deftypefn {Target Hook} bool TARGET_VERIFY_TYPE_CONTEXT (location_t @var{loc}, type_context_kind @var{context}, const_tree @var{type}, bool @var{silent_p}) ++If defined, this hook returns false if there is a target-specific reason ++why type @var{type} cannot be used in the source language context described ++by @var{context}. When @var{silent_p} is false, the hook also reports an ++error against @var{loc} for invalid uses of @var{type}. ++ ++Calls to this hook should be made through the global function ++@code{verify_type_context}, which makes the @var{silent_p} parameter ++default to false and also handles @code{error_mark_node}. ++ ++The default implementation always returns true. ++@end deftypefn ++ + @defmac OBJC_JBLEN + This macro determines the size of the objective C jump buffer for the + NeXT runtime. By default, OBJC_JBLEN is defined to an innocuous value. +diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in +index 89cfb5253..a8cb42a6b 100644 +--- a/gcc/doc/tm.texi.in ++++ b/gcc/doc/tm.texi.in +@@ -8095,6 +8095,8 @@ and scanf formatter settings. + + @hook TARGET_CONVERT_TO_TYPE + ++@hook TARGET_VERIFY_TYPE_CONTEXT ++ + @defmac OBJC_JBLEN + This macro determines the size of the objective C jump buffer for the + NeXT runtime. By default, OBJC_JBLEN is defined to an innocuous value. +diff --git a/gcc/target.def b/gcc/target.def +index 05389cdd1..4e3dc341c 100644 +--- a/gcc/target.def ++++ b/gcc/target.def +@@ -5234,6 +5234,22 @@ This is currently used only by the C and C++ front ends.", + tree, (tree type, tree expr), + hook_tree_tree_tree_null) + ++DEFHOOK ++(verify_type_context, ++ "If defined, this hook returns false if there is a target-specific reason\n\ ++why type @var{type} cannot be used in the source language context described\n\ ++by @var{context}. When @var{silent_p} is false, the hook also reports an\n\ ++error against @var{loc} for invalid uses of @var{type}.\n\ ++\n\ ++Calls to this hook should be made through the global function\n\ ++@code{verify_type_context}, which makes the @var{silent_p} parameter\n\ ++default to false and also handles @code{error_mark_node}.\n\ ++\n\ ++The default implementation always returns true.", ++ bool, (location_t loc, type_context_kind context, const_tree type, ++ bool silent_p), ++ NULL) ++ + DEFHOOK + (can_change_mode_class, + "This hook returns true if it is possible to bitcast values held in\n\ +diff --git a/gcc/target.h b/gcc/target.h +index 964629669..3e6d34d34 100644 +--- a/gcc/target.h ++++ b/gcc/target.h +@@ -219,6 +219,35 @@ typedef auto_vec auto_vector_modes; + will choose the first mode that works. */ + const unsigned int VECT_COMPARE_COSTS = 1U << 0; + ++/* The contexts in which the use of a type T can be checked by ++ TARGET_VERIFY_TYPE_CONTEXT. */ ++enum type_context_kind { ++ /* Directly measuring the size of T. */ ++ TCTX_SIZEOF, ++ ++ /* Directly measuring the alignment of T. */ ++ TCTX_ALIGNOF, ++ ++ /* Creating objects of type T with static storage duration. */ ++ TCTX_STATIC_STORAGE, ++ ++ /* Creating objects of type T with thread-local storage duration. */ ++ TCTX_THREAD_STORAGE, ++ ++ /* Creating a field of type T. */ ++ TCTX_FIELD, ++ ++ /* Creating an array with elements of type T. */ ++ TCTX_ARRAY_ELEMENT, ++ ++ /* Adding to or subtracting from a pointer to T, or computing the ++ difference between two pointers when one of them is a pointer to T. */ ++ TCTX_POINTER_ARITH ++}; ++ ++extern bool verify_type_context (location_t, type_context_kind, const_tree, ++ bool = false); ++ + /* The target structure. This holds all the backend hooks. */ + #define DEFHOOKPOD(NAME, DOC, TYPE, INIT) TYPE NAME; + #define DEFHOOK(NAME, DOC, TYPE, PARAMS, INIT) TYPE (* NAME) PARAMS; +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/aarch64-sve-acle.exp b/gcc/testsuite/gcc.target/aarch64/sve/acle/aarch64-sve-acle.exp +index 34d9dfd43..1672ddfef 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/aarch64-sve-acle.exp ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/aarch64-sve-acle.exp +@@ -45,9 +45,9 @@ if { [check_effective_target_aarch64_sve] } { + } + + # Main loop. +-# FIXME: This should include general/*.c too, but leave that until the +-# C frontend allows initialization of SVE vectors. +-set files [glob -nocomplain $srcdir/$subdir/general-c/*.c] ++set files [glob -nocomplain \ ++ "$srcdir/$subdir/general/*.c" \ ++ "$srcdir/$subdir/general-c/*.c"] + dg-runtest [lsort $files] "$sve_flags" $DEFAULT_CFLAGS + + # All done. +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/gnu_vectors_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/gnu_vectors_1.c +new file mode 100644 +index 000000000..c4596f7e9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/gnu_vectors_1.c +@@ -0,0 +1,415 @@ ++/* { dg-options "-msve-vector-bits=256" } */ ++ ++#include ++ ++typedef uint8_t gnu_uint8_t __attribute__ ((vector_size (32))); ++typedef int8_t gnu_int8_t __attribute__ ((vector_size (32))); ++ ++void ++f (svuint8_t sve_u1, svint8_t sve_s1, ++ gnu_uint8_t gnu_u1, gnu_int8_t gnu_s1, int n, unsigned char uc) ++{ ++ /* Initialization. */ ++ ++ svuint8_t init_sve_u1 = 0; /* { dg-error {incompatible types when initializing type 'svuint8_t' using type 'int'} } */ ++ svuint8_t init_sve_u2 = {}; /* { dg-error {empty scalar initializer} } */ ++ svuint8_t init_sve_u3 = { sve_u1 }; ++ svuint8_t init_sve_u4 = { gnu_u1 }; ++ svuint8_t init_sve_u5 = { sve_s1 }; /* { dg-error {incompatible types when initializing type 'svuint8_t' using type 'svint8_t'} } */ ++ svuint8_t init_sve_u6 = { gnu_s1 }; /* { dg-error {incompatible types when initializing type 'svuint8_t'} } */ ++ svuint8_t init_sve_u7 = { 0 }; /* { dg-error {incompatible types when initializing type 'svuint8_t' using type 'int'} } */ ++ svuint8_t init_sve_u8 = { sve_u1, sve_u1 }; /* { dg-warning {excess elements in scalar initializer} } */ ++ svuint8_t init_sve_u9 = { gnu_u1, gnu_u1 }; /* { dg-warning {excess elements in scalar initializer} } */ ++ ++ gnu_uint8_t init_gnu_u1 = 0; /* { dg-error {incompatible types when initializing type 'gnu_uint8_t'[^\n]* using type 'int'} } */ ++ gnu_uint8_t init_gnu_u2 = {}; ++ gnu_uint8_t init_gnu_u3 = { sve_u1 }; /* { dg-error {incompatible types when initializing type 'unsigned char'} } */ ++ gnu_uint8_t init_gnu_u4 = { gnu_u1 }; /* { dg-error {incompatible types when initializing type 'unsigned char'} } */ ++ gnu_uint8_t init_gnu_u5 = { sve_s1 }; /* { dg-error {incompatible types when initializing type 'unsigned char'} } */ ++ gnu_uint8_t init_gnu_u6 = { gnu_s1 }; /* { dg-error {incompatible types when initializing type 'unsigned char'} } */ ++ gnu_uint8_t init_gnu_u7 = { 0 }; ++ ++ /* Compound literals. */ ++ ++ (svuint8_t) {}; /* { dg-error {empty scalar initializer} } */ ++ (svuint8_t) { 0 }; /* { dg-error {incompatible types when initializing type 'svuint8_t' using type 'int'} } */ ++ (svuint8_t) { sve_u1 }; ++ (svuint8_t) { gnu_u1 }; ++ (svuint8_t) { sve_s1 }; /* { dg-error {incompatible types when initializing type 'svuint8_t' using type 'svint8_t'} } */ ++ (svuint8_t) { gnu_s1 }; /* { dg-error {incompatible types when initializing type 'svuint8_t'} } */ ++ ++ (gnu_uint8_t) {}; ++ (gnu_uint8_t) { 0 }; ++ (gnu_uint8_t) { sve_u1 }; /* { dg-error {incompatible types when initializing type 'unsigned char'} } */ ++ (gnu_uint8_t) { gnu_u1 }; /* { dg-error {incompatible types when initializing type 'unsigned char'} } */ ++ ++ /* Assignment. */ ++ ++ sve_u1 = 0; /* { dg-error {incompatible types when assigning to type 'svuint8_t' from type 'int'} } */ ++ sve_u1 = sve_u1; ++ sve_u1 = gnu_u1; ++ sve_u1 = sve_s1; /* { dg-error {incompatible types when assigning to type 'svuint8_t' from type 'svint8_t'} } */ ++ sve_u1 = gnu_s1; /* { dg-error {incompatible types when assigning to type 'svuint8_t' from type 'gnu_int8_t'} } */ ++ ++ gnu_u1 = 0; /* { dg-error {incompatible types when assigning to type 'gnu_uint8_t'[^\n]* from type 'int'} } */ ++ gnu_u1 = sve_u1; ++ gnu_u1 = gnu_u1; ++ gnu_u1 = sve_s1; /* { dg-error {incompatible types when assigning to type 'gnu_uint8_t'[^\n]* from type 'svint8_t'} } */ ++ gnu_u1 = gnu_s1; /* { dg-error {incompatible types when assigning to type 'gnu_uint8_t'[^\n]* from type 'gnu_int8_t'} } */ ++ ++ /* Casts. */ ++ ++ (void) sve_u1; ++ (svuint8_t) sve_u1; ++ (svuint8_t) gnu_u1; ++ (svuint8_t) 0; /* { dg-error {conversion to non-scalar type requested} } */ ++ (svuint8_t) n; /* { dg-error {conversion to non-scalar type requested} } */ ++ (svint8_t) sve_u1; /* { dg-error {conversion to non-scalar type requested} } */ ++ (svint8_t) gnu_u1; ++ ++ (void) gnu_u1; ++ (gnu_uint8_t) sve_u1; ++ (gnu_uint8_t) gnu_u1; ++ (gnu_uint8_t) 0; /* { dg-error {can't convert a value of type 'int' to vector type '[^']*' which has different size} } */ ++ (gnu_uint8_t) n; /* { dg-error {can't convert a value of type 'int' to vector type '[^']*' which has different size} } */ ++ (gnu_int8_t) sve_u1; ++ (gnu_int8_t) gnu_u1; ++ ++ /* Vector indexing. */ ++ ++ sve_u1[0]; /* { dg-error {subscripted value is neither array nor pointer} } */ ++ &sve_u1[0]; /* { dg-error {subscripted value is neither array nor pointer} } */ ++ ++ gnu_u1[0]; ++ &gnu_u1[0]; ++ ++ /* Unary operators. */ ++ ++ +sve_u1; /* { dg-error {wrong type argument to unary plus} } */ ++ -sve_u1; /* { dg-error {wrong type argument to unary minus} } */ ++ ~sve_u1; /* { dg-error {wrong type argument to bit-complement} } */ ++ !sve_u1; /* { dg-error {wrong type argument to unary exclamation mark} } */ ++ *sve_u1; /* { dg-error {invalid type argument of unary '\*'} } */ ++ __real sve_u1; /* { dg-error {wrong type argument to __real} } */ ++ __imag sve_u1; /* { dg-error {wrong type argument to __imag} } */ ++ ++sve_u1; /* { dg-error {wrong type argument to increment} } */ ++ --sve_u1; /* { dg-error {wrong type argument to decrement} } */ ++ sve_u1++; /* { dg-error {wrong type argument to increment} } */ ++ sve_u1--; /* { dg-error {wrong type argument to decrement} } */ ++ ++ +gnu_u1; ++ -gnu_u1; ++ ~gnu_u1; ++ !gnu_u1; /* { dg-error {wrong type argument to unary exclamation mark} } */ ++ *gnu_u1; /* { dg-error {invalid type argument of unary '\*'} } */ ++ __real gnu_u1; /* { dg-error {wrong type argument to __real} } */ ++ __imag gnu_u1; /* { dg-error {wrong type argument to __imag} } */ ++ ++gnu_u1; ++ --gnu_u1; ++ gnu_u1++; ++ gnu_u1--; ++ ++ /* Vector-vector binary arithmetic. */ ++ ++ sve_u1 + sve_u1; /* { dg-error {invalid operands to binary \+} } */ ++ sve_u1 - sve_u1; /* { dg-error {invalid operands to binary -} } */ ++ sve_u1 * sve_u1; /* { dg-error {invalid operands to binary \*} } */ ++ sve_u1 / sve_u1; /* { dg-error {invalid operands to binary /} } */ ++ sve_u1 % sve_u1; /* { dg-error {invalid operands to binary %} } */ ++ sve_u1 & sve_u1; /* { dg-error {invalid operands to binary \&} } */ ++ sve_u1 | sve_u1; /* { dg-error {invalid operands to binary \|} } */ ++ sve_u1 ^ sve_u1; /* { dg-error {invalid operands to binary \^} } */ ++ sve_u1 == sve_u1; /* { dg-error {invalid operands to binary ==} } */ ++ sve_u1 != sve_u1; /* { dg-error {invalid operands to binary !=} } */ ++ sve_u1 <= sve_u1; /* { dg-error {invalid operands to binary <=} } */ ++ sve_u1 < sve_u1; /* { dg-error {invalid operands to binary <} } */ ++ sve_u1 > sve_u1; /* { dg-error {invalid operands to binary >} } */ ++ sve_u1 >= sve_u1; /* { dg-error {invalid operands to binary >=} } */ ++ sve_u1 << sve_u1; /* { dg-error {invalid operands to binary <<} } */ ++ sve_u1 >> sve_u1; /* { dg-error {invalid operands to binary >>} } */ ++ sve_u1 && sve_u1; /* { dg-error {used vector type where scalar is required} } */ ++ sve_u1 || sve_u1; /* { dg-error {used vector type where scalar is required} } */ ++ ++ sve_u1 + gnu_u1; /* { dg-error {invalid operands to binary \+} } */ ++ sve_u1 - gnu_u1; /* { dg-error {invalid operands to binary -} } */ ++ sve_u1 * gnu_u1; /* { dg-error {invalid operands to binary \*} } */ ++ sve_u1 / gnu_u1; /* { dg-error {invalid operands to binary /} } */ ++ sve_u1 % gnu_u1; /* { dg-error {invalid operands to binary %} } */ ++ sve_u1 & gnu_u1; /* { dg-error {invalid operands to binary \&} } */ ++ sve_u1 | gnu_u1; /* { dg-error {invalid operands to binary \|} } */ ++ sve_u1 ^ gnu_u1; /* { dg-error {invalid operands to binary \^} } */ ++ sve_u1 == gnu_u1; /* { dg-error {invalid operands to binary ==} } */ ++ sve_u1 != gnu_u1; /* { dg-error {invalid operands to binary !=} } */ ++ sve_u1 <= gnu_u1; /* { dg-error {invalid operands to binary <=} } */ ++ sve_u1 < gnu_u1; /* { dg-error {invalid operands to binary <} } */ ++ sve_u1 > gnu_u1; /* { dg-error {invalid operands to binary >} } */ ++ sve_u1 >= gnu_u1; /* { dg-error {invalid operands to binary >=} } */ ++ sve_u1 << gnu_u1; /* { dg-error {invalid operands to binary <<} } */ ++ sve_u1 >> gnu_u1; /* { dg-error {invalid operands to binary >>} } */ ++ sve_u1 && gnu_u1; /* { dg-error {used vector type where scalar is required} } */ ++ sve_u1 || gnu_u1; /* { dg-error {used vector type where scalar is required} } */ ++ ++ gnu_u1 + sve_u1; /* { dg-error {invalid operands to binary \+} } */ ++ gnu_u1 - sve_u1; /* { dg-error {invalid operands to binary -} } */ ++ gnu_u1 * sve_u1; /* { dg-error {invalid operands to binary \*} } */ ++ gnu_u1 / sve_u1; /* { dg-error {invalid operands to binary /} } */ ++ gnu_u1 % sve_u1; /* { dg-error {invalid operands to binary %} } */ ++ gnu_u1 & sve_u1; /* { dg-error {invalid operands to binary \&} } */ ++ gnu_u1 | sve_u1; /* { dg-error {invalid operands to binary \|} } */ ++ gnu_u1 ^ sve_u1; /* { dg-error {invalid operands to binary \^} } */ ++ gnu_u1 == sve_u1; /* { dg-error {invalid operands to binary ==} } */ ++ gnu_u1 != sve_u1; /* { dg-error {invalid operands to binary !=} } */ ++ gnu_u1 <= sve_u1; /* { dg-error {invalid operands to binary <=} } */ ++ gnu_u1 < sve_u1; /* { dg-error {invalid operands to binary <} } */ ++ gnu_u1 > sve_u1; /* { dg-error {invalid operands to binary >} } */ ++ gnu_u1 >= sve_u1; /* { dg-error {invalid operands to binary >=} } */ ++ gnu_u1 << sve_u1; /* { dg-error {invalid operands to binary <<} } */ ++ gnu_u1 >> sve_u1; /* { dg-error {invalid operands to binary >>} } */ ++ gnu_u1 && sve_u1; /* { dg-error {used vector type where scalar is required} } */ ++ gnu_u1 || sve_u1; /* { dg-error {used vector type where scalar is required} } */ ++ ++ gnu_u1 + gnu_u1; ++ gnu_u1 - gnu_u1; ++ gnu_u1 * gnu_u1; ++ gnu_u1 / gnu_u1; ++ gnu_u1 % gnu_u1; ++ gnu_u1 & gnu_u1; ++ gnu_u1 | gnu_u1; ++ gnu_u1 ^ gnu_u1; ++ gnu_u1 == gnu_u1; ++ gnu_u1 != gnu_u1; ++ gnu_u1 <= gnu_u1; ++ gnu_u1 < gnu_u1; ++ gnu_u1 > gnu_u1; ++ gnu_u1 >= gnu_u1; ++ gnu_u1 << gnu_u1; ++ gnu_u1 >> gnu_u1; ++ gnu_u1 && gnu_u1; /* { dg-error {used vector type where scalar is required} } */ ++ gnu_u1 || gnu_u1; /* { dg-error {used vector type where scalar is required} } */ ++ ++ /* Vector-scalar binary arithmetic. */ ++ ++ sve_u1 + 2; /* { dg-error {invalid operands to binary \+} } */ ++ sve_u1 - 2; /* { dg-error {invalid operands to binary -} } */ ++ sve_u1 * 2; /* { dg-error {invalid operands to binary \*} } */ ++ sve_u1 / 2; /* { dg-error {invalid operands to binary /} } */ ++ sve_u1 % 2; /* { dg-error {invalid operands to binary %} } */ ++ sve_u1 & 2; /* { dg-error {invalid operands to binary \&} } */ ++ sve_u1 | 2; /* { dg-error {invalid operands to binary \|} } */ ++ sve_u1 ^ 2; /* { dg-error {invalid operands to binary \^} } */ ++ sve_u1 == 2; /* { dg-error {invalid operands to binary ==} } */ ++ sve_u1 != 2; /* { dg-error {invalid operands to binary !=} } */ ++ sve_u1 <= 2; /* { dg-error {invalid operands to binary <=} } */ ++ sve_u1 < 2; /* { dg-error {invalid operands to binary <} } */ ++ sve_u1 > 2; /* { dg-error {invalid operands to binary >} } */ ++ sve_u1 >= 2; /* { dg-error {invalid operands to binary >=} } */ ++ sve_u1 << 2; /* { dg-error {invalid operands to binary <<} } */ ++ sve_u1 >> 2; /* { dg-error {invalid operands to binary >>} } */ ++ sve_u1 && 2; /* { dg-error {used vector type where scalar is required} } */ ++ sve_u1 || 2; /* { dg-error {used vector type where scalar is required} } */ ++ ++ sve_u1 + uc; /* { dg-error {invalid operands to binary \+} } */ ++ sve_u1 - uc; /* { dg-error {invalid operands to binary -} } */ ++ sve_u1 * uc; /* { dg-error {invalid operands to binary \*} } */ ++ sve_u1 / uc; /* { dg-error {invalid operands to binary /} } */ ++ sve_u1 % uc; /* { dg-error {invalid operands to binary %} } */ ++ sve_u1 & uc; /* { dg-error {invalid operands to binary \&} } */ ++ sve_u1 | uc; /* { dg-error {invalid operands to binary \|} } */ ++ sve_u1 ^ uc; /* { dg-error {invalid operands to binary \^} } */ ++ sve_u1 == uc; /* { dg-error {invalid operands to binary ==} } */ ++ sve_u1 != uc; /* { dg-error {invalid operands to binary !=} } */ ++ sve_u1 <= uc; /* { dg-error {invalid operands to binary <=} } */ ++ sve_u1 < uc; /* { dg-error {invalid operands to binary <} } */ ++ sve_u1 > uc; /* { dg-error {invalid operands to binary >} } */ ++ sve_u1 >= uc; /* { dg-error {invalid operands to binary >=} } */ ++ sve_u1 << uc; /* { dg-error {invalid operands to binary <<} } */ ++ sve_u1 >> uc; /* { dg-error {invalid operands to binary >>} } */ ++ sve_u1 && uc; /* { dg-error {used vector type where scalar is required} } */ ++ sve_u1 || uc; /* { dg-error {used vector type where scalar is required} } */ ++ ++ gnu_u1 + 2; ++ gnu_u1 - 2; ++ gnu_u1 * 2; ++ gnu_u1 / 2; ++ gnu_u1 % 2; ++ gnu_u1 & 2; ++ gnu_u1 | 2; ++ gnu_u1 ^ 2; ++ gnu_u1 == 2; ++ gnu_u1 != 2; ++ gnu_u1 <= 2; ++ gnu_u1 < 2; ++ gnu_u1 > 2; ++ gnu_u1 >= 2; ++ gnu_u1 << 2; ++ gnu_u1 >> 2; ++ gnu_u1 && 2; /* { dg-error {used vector type where scalar is required} } */ ++ gnu_u1 || 2; /* { dg-error {used vector type where scalar is required} } */ ++ ++ gnu_u1 + uc; ++ gnu_u1 - uc; ++ gnu_u1 * uc; ++ gnu_u1 / uc; ++ gnu_u1 % uc; ++ gnu_u1 & uc; ++ gnu_u1 | uc; ++ gnu_u1 ^ uc; ++ gnu_u1 == uc; ++ gnu_u1 != uc; ++ gnu_u1 <= uc; ++ gnu_u1 < uc; ++ gnu_u1 > uc; ++ gnu_u1 >= uc; ++ gnu_u1 << uc; ++ gnu_u1 >> uc; ++ gnu_u1 && uc; /* { dg-error {used vector type where scalar is required} } */ ++ gnu_u1 || uc; /* { dg-error {used vector type where scalar is required} } */ ++ ++ /* Scalar-vector binary arithmetic. */ ++ ++ 3 + sve_u1; /* { dg-error {invalid operands to binary \+} } */ ++ 3 - sve_u1; /* { dg-error {invalid operands to binary -} } */ ++ 3 * sve_u1; /* { dg-error {invalid operands to binary \*} } */ ++ 3 / sve_u1; /* { dg-error {invalid operands to binary /} } */ ++ 3 % sve_u1; /* { dg-error {invalid operands to binary %} } */ ++ 3 & sve_u1; /* { dg-error {invalid operands to binary \&} } */ ++ 3 | sve_u1; /* { dg-error {invalid operands to binary \|} } */ ++ 3 ^ sve_u1; /* { dg-error {invalid operands to binary \^} } */ ++ 3 == sve_u1; /* { dg-error {invalid operands to binary ==} } */ ++ 3 != sve_u1; /* { dg-error {invalid operands to binary !=} } */ ++ 3 <= sve_u1; /* { dg-error {invalid operands to binary <=} } */ ++ 3 < sve_u1; /* { dg-error {invalid operands to binary <} } */ ++ 3 > sve_u1; /* { dg-error {invalid operands to binary >} } */ ++ 3 >= sve_u1; /* { dg-error {invalid operands to binary >=} } */ ++ 3 << sve_u1; /* { dg-error {invalid operands to binary <<} } */ ++ 3 >> sve_u1; /* { dg-error {invalid operands to binary >>} } */ ++ 3 && sve_u1; /* { dg-error {invalid operands to binary \&\&} } */ ++ 3 || sve_u1; /* { dg-error {invalid operands to binary \|\|} } */ ++ ++ 3 + gnu_u1; ++ 3 - gnu_u1; ++ 3 * gnu_u1; ++ 3 / gnu_u1; ++ 3 % gnu_u1; ++ 3 & gnu_u1; ++ 3 | gnu_u1; ++ 3 ^ gnu_u1; ++ 3 == gnu_u1; ++ 3 != gnu_u1; ++ 3 <= gnu_u1; ++ 3 < gnu_u1; ++ 3 > gnu_u1; ++ 3 >= gnu_u1; ++ 3 << gnu_u1; ++ 3 >> gnu_u1; ++ 3 && gnu_u1; /* { dg-error {invalid operands to binary \&\&} } */ ++ 3 || gnu_u1; /* { dg-error {invalid operands to binary \|\|} } */ ++ ++ /* Mismatched types. */ ++ ++ sve_u1 + sve_s1; /* { dg-error {invalid operands to binary \+} } */ ++ sve_u1 - sve_s1; /* { dg-error {invalid operands to binary -} } */ ++ sve_u1 * sve_s1; /* { dg-error {invalid operands to binary \*} } */ ++ sve_u1 / sve_s1; /* { dg-error {invalid operands to binary /} } */ ++ sve_u1 % sve_s1; /* { dg-error {invalid operands to binary %} } */ ++ sve_u1 & sve_s1; /* { dg-error {invalid operands to binary \&} } */ ++ sve_u1 | sve_s1; /* { dg-error {invalid operands to binary \|} } */ ++ sve_u1 ^ sve_s1; /* { dg-error {invalid operands to binary \^} } */ ++ sve_u1 == sve_s1; /* { dg-error {invalid operands to binary ==} } */ ++ sve_u1 != sve_s1; /* { dg-error {invalid operands to binary !=} } */ ++ sve_u1 <= sve_s1; /* { dg-error {invalid operands to binary <=} } */ ++ sve_u1 < sve_s1; /* { dg-error {invalid operands to binary <} } */ ++ sve_u1 > sve_s1; /* { dg-error {invalid operands to binary >} } */ ++ sve_u1 >= sve_s1; /* { dg-error {invalid operands to binary >=} } */ ++ sve_u1 << sve_s1; /* { dg-error {invalid operands to binary <<} } */ ++ sve_u1 >> sve_s1; /* { dg-error {invalid operands to binary >>} } */ ++ sve_u1 && sve_s1; /* { dg-error {used vector type where scalar is required} } */ ++ sve_u1 || sve_s1; /* { dg-error {used vector type where scalar is required} } */ ++ ++ sve_u1 + gnu_s1; /* { dg-error {invalid operands to binary \+} } */ ++ sve_u1 - gnu_s1; /* { dg-error {invalid operands to binary -} } */ ++ sve_u1 * gnu_s1; /* { dg-error {invalid operands to binary \*} } */ ++ sve_u1 / gnu_s1; /* { dg-error {invalid operands to binary /} } */ ++ sve_u1 % gnu_s1; /* { dg-error {invalid operands to binary %} } */ ++ sve_u1 & gnu_s1; /* { dg-error {invalid operands to binary \&} } */ ++ sve_u1 | gnu_s1; /* { dg-error {invalid operands to binary \|} } */ ++ sve_u1 ^ gnu_s1; /* { dg-error {invalid operands to binary \^} } */ ++ sve_u1 == gnu_s1; /* { dg-error {invalid operands to binary ==} } */ ++ sve_u1 != gnu_s1; /* { dg-error {invalid operands to binary !=} } */ ++ sve_u1 <= gnu_s1; /* { dg-error {invalid operands to binary <=} } */ ++ sve_u1 < gnu_s1; /* { dg-error {invalid operands to binary <} } */ ++ sve_u1 > gnu_s1; /* { dg-error {invalid operands to binary >} } */ ++ sve_u1 >= gnu_s1; /* { dg-error {invalid operands to binary >=} } */ ++ sve_u1 << gnu_s1; /* { dg-error {invalid operands to binary <<} } */ ++ sve_u1 >> gnu_s1; /* { dg-error {invalid operands to binary >>} } */ ++ sve_u1 && gnu_s1; /* { dg-error {used vector type where scalar is required} } */ ++ sve_u1 || gnu_s1; /* { dg-error {used vector type where scalar is required} } */ ++ ++ gnu_u1 + sve_s1; /* { dg-error {invalid operands to binary \+} } */ ++ gnu_u1 - sve_s1; /* { dg-error {invalid operands to binary -} } */ ++ gnu_u1 * sve_s1; /* { dg-error {invalid operands to binary \*} } */ ++ gnu_u1 / sve_s1; /* { dg-error {invalid operands to binary /} } */ ++ gnu_u1 % sve_s1; /* { dg-error {invalid operands to binary %} } */ ++ gnu_u1 & sve_s1; /* { dg-error {invalid operands to binary \&} } */ ++ gnu_u1 | sve_s1; /* { dg-error {invalid operands to binary \|} } */ ++ gnu_u1 ^ sve_s1; /* { dg-error {invalid operands to binary \^} } */ ++ gnu_u1 == sve_s1; /* { dg-error {invalid operands to binary ==} } */ ++ gnu_u1 != sve_s1; /* { dg-error {invalid operands to binary !=} } */ ++ gnu_u1 <= sve_s1; /* { dg-error {invalid operands to binary <=} } */ ++ gnu_u1 < sve_s1; /* { dg-error {invalid operands to binary <} } */ ++ gnu_u1 > sve_s1; /* { dg-error {invalid operands to binary >} } */ ++ gnu_u1 >= sve_s1; /* { dg-error {invalid operands to binary >=} } */ ++ gnu_u1 << sve_s1; /* { dg-error {invalid operands to binary <<} } */ ++ gnu_u1 >> sve_s1; /* { dg-error {invalid operands to binary >>} } */ ++ gnu_u1 && sve_s1; /* { dg-error {used vector type where scalar is required} } */ ++ gnu_u1 || sve_s1; /* { dg-error {used vector type where scalar is required} } */ ++ ++ gnu_u1 + gnu_s1; ++ gnu_u1 - gnu_s1; ++ gnu_u1 * gnu_s1; ++ gnu_u1 / gnu_s1; ++ gnu_u1 % gnu_s1; ++ gnu_u1 & gnu_s1; ++ gnu_u1 | gnu_s1; ++ gnu_u1 ^ gnu_s1; ++ gnu_u1 == gnu_s1; ++ gnu_u1 != gnu_s1; ++ gnu_u1 <= gnu_s1; ++ gnu_u1 < gnu_s1; ++ gnu_u1 > gnu_s1; ++ gnu_u1 >= gnu_s1; ++ gnu_u1 << gnu_s1; ++ gnu_u1 >> gnu_s1; ++ gnu_u1 && gnu_s1; /* { dg-error {used vector type where scalar is required} } */ ++ gnu_u1 || gnu_s1; /* { dg-error {used vector type where scalar is required} } */ ++ ++ /* Conditional expressions. */ ++ ++ uc ? sve_u1 : sve_u1; ++ uc ? gnu_u1 : sve_u1; /* { dg-error {type mismatch in conditional expression} } */ ++ uc ? sve_u1 : gnu_u1; /* { dg-error {type mismatch in conditional expression} } */ ++ uc ? gnu_u1 : gnu_u1; ++ ++ sve_u1 ? sve_u1 : sve_u1; /* { dg-error {used vector type where scalar is required} } */ ++ sve_u1 ? gnu_u1 : sve_u1; /* { dg-error {used vector type where scalar is required} } */ ++ sve_u1 ? sve_u1 : gnu_u1; /* { dg-error {used vector type where scalar is required} } */ ++ sve_u1 ? gnu_u1 : gnu_u1; /* { dg-error {used vector type where scalar is required} } */ ++ ++ gnu_u1 ? sve_u1 : sve_u1; /* { dg-error {used vector type where scalar is required} } */ ++ gnu_u1 ? gnu_u1 : sve_u1; /* { dg-error {used vector type where scalar is required} } */ ++ gnu_u1 ? sve_u1 : gnu_u1; /* { dg-error {used vector type where scalar is required} } */ ++ gnu_u1 ? gnu_u1 : gnu_u1; /* { dg-error {used vector type where scalar is required} } */ ++ ++ /* Vector built-ins. */ ++ ++ __builtin_shuffle (sve_u1, sve_u1, sve_u1); /* { dg-error {'__builtin_shuffle' last argument must be an integer vector} } */ ++ __builtin_shuffle (sve_u1, gnu_u1, gnu_u1); /* { dg-error {'__builtin_shuffle' arguments must be vectors} } */ ++ __builtin_shuffle (gnu_u1, sve_u1, gnu_u1); /* { dg-error {'__builtin_shuffle' arguments must be vectors} } */ ++ __builtin_shuffle (gnu_u1, gnu_u1, sve_u1); /* { dg-error {'__builtin_shuffle' last argument must be an integer vector} } */ ++ __builtin_shuffle (gnu_u1, gnu_u1, gnu_u1); ++ ++ __builtin_convertvector (sve_u1, svuint8_t); /* { dg-error {'__builtin_convertvector' first argument must be an integer or floating vector} } */ ++ __builtin_convertvector (gnu_u1, svuint8_t); /* { dg-error {'__builtin_convertvector' second argument must be an integer or floating vector type} } */ ++ __builtin_convertvector (sve_u1, gnu_uint8_t); /* { dg-error {'__builtin_convertvector' first argument must be an integer or floating vector} } */ ++ __builtin_convertvector (gnu_u1, gnu_uint8_t); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/gnu_vectors_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/gnu_vectors_2.c +new file mode 100644 +index 000000000..61e6d2163 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/gnu_vectors_2.c +@@ -0,0 +1,415 @@ ++/* { dg-options "-msve-vector-bits=256 -flax-vector-conversions" } */ ++ ++#include ++ ++typedef uint8_t gnu_uint8_t __attribute__ ((vector_size (32))); ++typedef int8_t gnu_int8_t __attribute__ ((vector_size (32))); ++ ++void ++f (svuint8_t sve_u1, svint8_t sve_s1, ++ gnu_uint8_t gnu_u1, gnu_int8_t gnu_s1, int n, unsigned char uc) ++{ ++ /* Initialization. */ ++ ++ svuint8_t init_sve_u1 = 0; /* { dg-error {incompatible types when initializing type 'svuint8_t' using type 'int'} } */ ++ svuint8_t init_sve_u2 = {}; /* { dg-error {empty scalar initializer} } */ ++ svuint8_t init_sve_u3 = { sve_u1 }; ++ svuint8_t init_sve_u4 = { gnu_u1 }; ++ svuint8_t init_sve_u5 = { sve_s1 }; ++ svuint8_t init_sve_u6 = { gnu_s1 }; ++ svuint8_t init_sve_u7 = { 0 }; /* { dg-error {incompatible types when initializing type 'svuint8_t' using type 'int'} } */ ++ svuint8_t init_sve_u8 = { sve_u1, sve_u1 }; /* { dg-warning {excess elements in scalar initializer} } */ ++ svuint8_t init_sve_u9 = { gnu_u1, gnu_u1 }; /* { dg-warning {excess elements in scalar initializer} } */ ++ ++ gnu_uint8_t init_gnu_u1 = 0; /* { dg-error {incompatible types when initializing type 'gnu_uint8_t'[^\n]* using type 'int'} } */ ++ gnu_uint8_t init_gnu_u2 = {}; ++ gnu_uint8_t init_gnu_u3 = { sve_u1 }; /* { dg-error {incompatible types when initializing type 'unsigned char'} } */ ++ gnu_uint8_t init_gnu_u4 = { gnu_u1 }; /* { dg-error {incompatible types when initializing type 'unsigned char'} } */ ++ gnu_uint8_t init_gnu_u5 = { sve_s1 }; /* { dg-error {incompatible types when initializing type 'unsigned char'} } */ ++ gnu_uint8_t init_gnu_u6 = { gnu_s1 }; /* { dg-error {incompatible types when initializing type 'unsigned char'} } */ ++ gnu_uint8_t init_gnu_u7 = { 0 }; ++ ++ /* Compound literals. */ ++ ++ (svuint8_t) {}; /* { dg-error {empty scalar initializer} } */ ++ (svuint8_t) { 0 }; /* { dg-error {incompatible types when initializing type 'svuint8_t' using type 'int'} } */ ++ (svuint8_t) { sve_u1 }; ++ (svuint8_t) { gnu_u1 }; ++ (svuint8_t) { sve_s1 }; ++ (svuint8_t) { gnu_s1 }; ++ ++ (gnu_uint8_t) {}; ++ (gnu_uint8_t) { 0 }; ++ (gnu_uint8_t) { sve_u1 }; /* { dg-error {incompatible types when initializing type 'unsigned char'} } */ ++ (gnu_uint8_t) { gnu_u1 }; /* { dg-error {incompatible types when initializing type 'unsigned char'} } */ ++ ++ /* Assignment. */ ++ ++ sve_u1 = 0; /* { dg-error {incompatible types when assigning to type 'svuint8_t' from type 'int'} } */ ++ sve_u1 = sve_u1; ++ sve_u1 = gnu_u1; ++ sve_u1 = sve_s1; ++ sve_u1 = gnu_s1; ++ ++ gnu_u1 = 0; /* { dg-error {incompatible types when assigning to type 'gnu_uint8_t'[^\n]* from type 'int'} } */ ++ gnu_u1 = sve_u1; ++ gnu_u1 = gnu_u1; ++ gnu_u1 = sve_s1; ++ gnu_u1 = gnu_s1; ++ ++ /* Casts. */ ++ ++ (void) sve_u1; ++ (svuint8_t) sve_u1; ++ (svuint8_t) gnu_u1; ++ (svuint8_t) 0; /* { dg-error {conversion to non-scalar type requested} } */ ++ (svuint8_t) n; /* { dg-error {conversion to non-scalar type requested} } */ ++ (svint8_t) sve_u1; ++ (svint8_t) gnu_u1; ++ ++ (void) gnu_u1; ++ (gnu_uint8_t) sve_u1; ++ (gnu_uint8_t) gnu_u1; ++ (gnu_uint8_t) 0; /* { dg-error {can't convert a value of type 'int' to vector type '[^']*' which has different size} } */ ++ (gnu_uint8_t) n; /* { dg-error {can't convert a value of type 'int' to vector type '[^']*' which has different size} } */ ++ (gnu_int8_t) sve_u1; ++ (gnu_int8_t) gnu_u1; ++ ++ /* Vector indexing. */ ++ ++ sve_u1[0]; /* { dg-error {subscripted value is neither array nor pointer} } */ ++ &sve_u1[0]; /* { dg-error {subscripted value is neither array nor pointer} } */ ++ ++ gnu_u1[0]; ++ &gnu_u1[0]; ++ ++ /* Unary operators. */ ++ ++ +sve_u1; /* { dg-error {wrong type argument to unary plus} } */ ++ -sve_u1; /* { dg-error {wrong type argument to unary minus} } */ ++ ~sve_u1; /* { dg-error {wrong type argument to bit-complement} } */ ++ !sve_u1; /* { dg-error {wrong type argument to unary exclamation mark} } */ ++ *sve_u1; /* { dg-error {invalid type argument of unary '\*'} } */ ++ __real sve_u1; /* { dg-error {wrong type argument to __real} } */ ++ __imag sve_u1; /* { dg-error {wrong type argument to __imag} } */ ++ ++sve_u1; /* { dg-error {wrong type argument to increment} } */ ++ --sve_u1; /* { dg-error {wrong type argument to decrement} } */ ++ sve_u1++; /* { dg-error {wrong type argument to increment} } */ ++ sve_u1--; /* { dg-error {wrong type argument to decrement} } */ ++ ++ +gnu_u1; ++ -gnu_u1; ++ ~gnu_u1; ++ !gnu_u1; /* { dg-error {wrong type argument to unary exclamation mark} } */ ++ *gnu_u1; /* { dg-error {invalid type argument of unary '\*'} } */ ++ __real gnu_u1; /* { dg-error {wrong type argument to __real} } */ ++ __imag gnu_u1; /* { dg-error {wrong type argument to __imag} } */ ++ ++gnu_u1; ++ --gnu_u1; ++ gnu_u1++; ++ gnu_u1--; ++ ++ /* Vector-vector binary arithmetic. */ ++ ++ sve_u1 + sve_u1; /* { dg-error {invalid operands to binary \+} } */ ++ sve_u1 - sve_u1; /* { dg-error {invalid operands to binary -} } */ ++ sve_u1 * sve_u1; /* { dg-error {invalid operands to binary \*} } */ ++ sve_u1 / sve_u1; /* { dg-error {invalid operands to binary /} } */ ++ sve_u1 % sve_u1; /* { dg-error {invalid operands to binary %} } */ ++ sve_u1 & sve_u1; /* { dg-error {invalid operands to binary \&} } */ ++ sve_u1 | sve_u1; /* { dg-error {invalid operands to binary \|} } */ ++ sve_u1 ^ sve_u1; /* { dg-error {invalid operands to binary \^} } */ ++ sve_u1 == sve_u1; /* { dg-error {invalid operands to binary ==} } */ ++ sve_u1 != sve_u1; /* { dg-error {invalid operands to binary !=} } */ ++ sve_u1 <= sve_u1; /* { dg-error {invalid operands to binary <=} } */ ++ sve_u1 < sve_u1; /* { dg-error {invalid operands to binary <} } */ ++ sve_u1 > sve_u1; /* { dg-error {invalid operands to binary >} } */ ++ sve_u1 >= sve_u1; /* { dg-error {invalid operands to binary >=} } */ ++ sve_u1 << sve_u1; /* { dg-error {invalid operands to binary <<} } */ ++ sve_u1 >> sve_u1; /* { dg-error {invalid operands to binary >>} } */ ++ sve_u1 && sve_u1; /* { dg-error {used vector type where scalar is required} } */ ++ sve_u1 || sve_u1; /* { dg-error {used vector type where scalar is required} } */ ++ ++ sve_u1 + gnu_u1; /* { dg-error {invalid operands to binary \+} } */ ++ sve_u1 - gnu_u1; /* { dg-error {invalid operands to binary -} } */ ++ sve_u1 * gnu_u1; /* { dg-error {invalid operands to binary \*} } */ ++ sve_u1 / gnu_u1; /* { dg-error {invalid operands to binary /} } */ ++ sve_u1 % gnu_u1; /* { dg-error {invalid operands to binary %} } */ ++ sve_u1 & gnu_u1; /* { dg-error {invalid operands to binary \&} } */ ++ sve_u1 | gnu_u1; /* { dg-error {invalid operands to binary \|} } */ ++ sve_u1 ^ gnu_u1; /* { dg-error {invalid operands to binary \^} } */ ++ sve_u1 == gnu_u1; /* { dg-error {invalid operands to binary ==} } */ ++ sve_u1 != gnu_u1; /* { dg-error {invalid operands to binary !=} } */ ++ sve_u1 <= gnu_u1; /* { dg-error {invalid operands to binary <=} } */ ++ sve_u1 < gnu_u1; /* { dg-error {invalid operands to binary <} } */ ++ sve_u1 > gnu_u1; /* { dg-error {invalid operands to binary >} } */ ++ sve_u1 >= gnu_u1; /* { dg-error {invalid operands to binary >=} } */ ++ sve_u1 << gnu_u1; /* { dg-error {invalid operands to binary <<} } */ ++ sve_u1 >> gnu_u1; /* { dg-error {invalid operands to binary >>} } */ ++ sve_u1 && gnu_u1; /* { dg-error {used vector type where scalar is required} } */ ++ sve_u1 || gnu_u1; /* { dg-error {used vector type where scalar is required} } */ ++ ++ gnu_u1 + sve_u1; /* { dg-error {invalid operands to binary \+} } */ ++ gnu_u1 - sve_u1; /* { dg-error {invalid operands to binary -} } */ ++ gnu_u1 * sve_u1; /* { dg-error {invalid operands to binary \*} } */ ++ gnu_u1 / sve_u1; /* { dg-error {invalid operands to binary /} } */ ++ gnu_u1 % sve_u1; /* { dg-error {invalid operands to binary %} } */ ++ gnu_u1 & sve_u1; /* { dg-error {invalid operands to binary \&} } */ ++ gnu_u1 | sve_u1; /* { dg-error {invalid operands to binary \|} } */ ++ gnu_u1 ^ sve_u1; /* { dg-error {invalid operands to binary \^} } */ ++ gnu_u1 == sve_u1; /* { dg-error {invalid operands to binary ==} } */ ++ gnu_u1 != sve_u1; /* { dg-error {invalid operands to binary !=} } */ ++ gnu_u1 <= sve_u1; /* { dg-error {invalid operands to binary <=} } */ ++ gnu_u1 < sve_u1; /* { dg-error {invalid operands to binary <} } */ ++ gnu_u1 > sve_u1; /* { dg-error {invalid operands to binary >} } */ ++ gnu_u1 >= sve_u1; /* { dg-error {invalid operands to binary >=} } */ ++ gnu_u1 << sve_u1; /* { dg-error {invalid operands to binary <<} } */ ++ gnu_u1 >> sve_u1; /* { dg-error {invalid operands to binary >>} } */ ++ gnu_u1 && sve_u1; /* { dg-error {used vector type where scalar is required} } */ ++ gnu_u1 || sve_u1; /* { dg-error {used vector type where scalar is required} } */ ++ ++ gnu_u1 + gnu_u1; ++ gnu_u1 - gnu_u1; ++ gnu_u1 * gnu_u1; ++ gnu_u1 / gnu_u1; ++ gnu_u1 % gnu_u1; ++ gnu_u1 & gnu_u1; ++ gnu_u1 | gnu_u1; ++ gnu_u1 ^ gnu_u1; ++ gnu_u1 == gnu_u1; ++ gnu_u1 != gnu_u1; ++ gnu_u1 <= gnu_u1; ++ gnu_u1 < gnu_u1; ++ gnu_u1 > gnu_u1; ++ gnu_u1 >= gnu_u1; ++ gnu_u1 << gnu_u1; ++ gnu_u1 >> gnu_u1; ++ gnu_u1 && gnu_u1; /* { dg-error {used vector type where scalar is required} } */ ++ gnu_u1 || gnu_u1; /* { dg-error {used vector type where scalar is required} } */ ++ ++ /* Vector-scalar binary arithmetic. */ ++ ++ sve_u1 + 2; /* { dg-error {invalid operands to binary \+} } */ ++ sve_u1 - 2; /* { dg-error {invalid operands to binary -} } */ ++ sve_u1 * 2; /* { dg-error {invalid operands to binary \*} } */ ++ sve_u1 / 2; /* { dg-error {invalid operands to binary /} } */ ++ sve_u1 % 2; /* { dg-error {invalid operands to binary %} } */ ++ sve_u1 & 2; /* { dg-error {invalid operands to binary \&} } */ ++ sve_u1 | 2; /* { dg-error {invalid operands to binary \|} } */ ++ sve_u1 ^ 2; /* { dg-error {invalid operands to binary \^} } */ ++ sve_u1 == 2; /* { dg-error {invalid operands to binary ==} } */ ++ sve_u1 != 2; /* { dg-error {invalid operands to binary !=} } */ ++ sve_u1 <= 2; /* { dg-error {invalid operands to binary <=} } */ ++ sve_u1 < 2; /* { dg-error {invalid operands to binary <} } */ ++ sve_u1 > 2; /* { dg-error {invalid operands to binary >} } */ ++ sve_u1 >= 2; /* { dg-error {invalid operands to binary >=} } */ ++ sve_u1 << 2; /* { dg-error {invalid operands to binary <<} } */ ++ sve_u1 >> 2; /* { dg-error {invalid operands to binary >>} } */ ++ sve_u1 && 2; /* { dg-error {used vector type where scalar is required} } */ ++ sve_u1 || 2; /* { dg-error {used vector type where scalar is required} } */ ++ ++ sve_u1 + uc; /* { dg-error {invalid operands to binary \+} } */ ++ sve_u1 - uc; /* { dg-error {invalid operands to binary -} } */ ++ sve_u1 * uc; /* { dg-error {invalid operands to binary \*} } */ ++ sve_u1 / uc; /* { dg-error {invalid operands to binary /} } */ ++ sve_u1 % uc; /* { dg-error {invalid operands to binary %} } */ ++ sve_u1 & uc; /* { dg-error {invalid operands to binary \&} } */ ++ sve_u1 | uc; /* { dg-error {invalid operands to binary \|} } */ ++ sve_u1 ^ uc; /* { dg-error {invalid operands to binary \^} } */ ++ sve_u1 == uc; /* { dg-error {invalid operands to binary ==} } */ ++ sve_u1 != uc; /* { dg-error {invalid operands to binary !=} } */ ++ sve_u1 <= uc; /* { dg-error {invalid operands to binary <=} } */ ++ sve_u1 < uc; /* { dg-error {invalid operands to binary <} } */ ++ sve_u1 > uc; /* { dg-error {invalid operands to binary >} } */ ++ sve_u1 >= uc; /* { dg-error {invalid operands to binary >=} } */ ++ sve_u1 << uc; /* { dg-error {invalid operands to binary <<} } */ ++ sve_u1 >> uc; /* { dg-error {invalid operands to binary >>} } */ ++ sve_u1 && uc; /* { dg-error {used vector type where scalar is required} } */ ++ sve_u1 || uc; /* { dg-error {used vector type where scalar is required} } */ ++ ++ gnu_u1 + 2; ++ gnu_u1 - 2; ++ gnu_u1 * 2; ++ gnu_u1 / 2; ++ gnu_u1 % 2; ++ gnu_u1 & 2; ++ gnu_u1 | 2; ++ gnu_u1 ^ 2; ++ gnu_u1 == 2; ++ gnu_u1 != 2; ++ gnu_u1 <= 2; ++ gnu_u1 < 2; ++ gnu_u1 > 2; ++ gnu_u1 >= 2; ++ gnu_u1 << 2; ++ gnu_u1 >> 2; ++ gnu_u1 && 2; /* { dg-error {used vector type where scalar is required} } */ ++ gnu_u1 || 2; /* { dg-error {used vector type where scalar is required} } */ ++ ++ gnu_u1 + uc; ++ gnu_u1 - uc; ++ gnu_u1 * uc; ++ gnu_u1 / uc; ++ gnu_u1 % uc; ++ gnu_u1 & uc; ++ gnu_u1 | uc; ++ gnu_u1 ^ uc; ++ gnu_u1 == uc; ++ gnu_u1 != uc; ++ gnu_u1 <= uc; ++ gnu_u1 < uc; ++ gnu_u1 > uc; ++ gnu_u1 >= uc; ++ gnu_u1 << uc; ++ gnu_u1 >> uc; ++ gnu_u1 && uc; /* { dg-error {used vector type where scalar is required} } */ ++ gnu_u1 || uc; /* { dg-error {used vector type where scalar is required} } */ ++ ++ /* Scalar-vector binary arithmetic. */ ++ ++ 3 + sve_u1; /* { dg-error {invalid operands to binary \+} } */ ++ 3 - sve_u1; /* { dg-error {invalid operands to binary -} } */ ++ 3 * sve_u1; /* { dg-error {invalid operands to binary \*} } */ ++ 3 / sve_u1; /* { dg-error {invalid operands to binary /} } */ ++ 3 % sve_u1; /* { dg-error {invalid operands to binary %} } */ ++ 3 & sve_u1; /* { dg-error {invalid operands to binary \&} } */ ++ 3 | sve_u1; /* { dg-error {invalid operands to binary \|} } */ ++ 3 ^ sve_u1; /* { dg-error {invalid operands to binary \^} } */ ++ 3 == sve_u1; /* { dg-error {invalid operands to binary ==} } */ ++ 3 != sve_u1; /* { dg-error {invalid operands to binary !=} } */ ++ 3 <= sve_u1; /* { dg-error {invalid operands to binary <=} } */ ++ 3 < sve_u1; /* { dg-error {invalid operands to binary <} } */ ++ 3 > sve_u1; /* { dg-error {invalid operands to binary >} } */ ++ 3 >= sve_u1; /* { dg-error {invalid operands to binary >=} } */ ++ 3 << sve_u1; /* { dg-error {invalid operands to binary <<} } */ ++ 3 >> sve_u1; /* { dg-error {invalid operands to binary >>} } */ ++ 3 && sve_u1; /* { dg-error {invalid operands to binary \&\&} } */ ++ 3 || sve_u1; /* { dg-error {invalid operands to binary \|\|} } */ ++ ++ 3 + gnu_u1; ++ 3 - gnu_u1; ++ 3 * gnu_u1; ++ 3 / gnu_u1; ++ 3 % gnu_u1; ++ 3 & gnu_u1; ++ 3 | gnu_u1; ++ 3 ^ gnu_u1; ++ 3 == gnu_u1; ++ 3 != gnu_u1; ++ 3 <= gnu_u1; ++ 3 < gnu_u1; ++ 3 > gnu_u1; ++ 3 >= gnu_u1; ++ 3 << gnu_u1; ++ 3 >> gnu_u1; ++ 3 && gnu_u1; /* { dg-error {invalid operands to binary \&\&} } */ ++ 3 || gnu_u1; /* { dg-error {invalid operands to binary \|\|} } */ ++ ++ /* Mismatched types. */ ++ ++ sve_u1 + sve_s1; /* { dg-error {invalid operands to binary \+} } */ ++ sve_u1 - sve_s1; /* { dg-error {invalid operands to binary -} } */ ++ sve_u1 * sve_s1; /* { dg-error {invalid operands to binary \*} } */ ++ sve_u1 / sve_s1; /* { dg-error {invalid operands to binary /} } */ ++ sve_u1 % sve_s1; /* { dg-error {invalid operands to binary %} } */ ++ sve_u1 & sve_s1; /* { dg-error {invalid operands to binary \&} } */ ++ sve_u1 | sve_s1; /* { dg-error {invalid operands to binary \|} } */ ++ sve_u1 ^ sve_s1; /* { dg-error {invalid operands to binary \^} } */ ++ sve_u1 == sve_s1; /* { dg-error {invalid operands to binary ==} } */ ++ sve_u1 != sve_s1; /* { dg-error {invalid operands to binary !=} } */ ++ sve_u1 <= sve_s1; /* { dg-error {invalid operands to binary <=} } */ ++ sve_u1 < sve_s1; /* { dg-error {invalid operands to binary <} } */ ++ sve_u1 > sve_s1; /* { dg-error {invalid operands to binary >} } */ ++ sve_u1 >= sve_s1; /* { dg-error {invalid operands to binary >=} } */ ++ sve_u1 << sve_s1; /* { dg-error {invalid operands to binary <<} } */ ++ sve_u1 >> sve_s1; /* { dg-error {invalid operands to binary >>} } */ ++ sve_u1 && sve_s1; /* { dg-error {used vector type where scalar is required} } */ ++ sve_u1 || sve_s1; /* { dg-error {used vector type where scalar is required} } */ ++ ++ sve_u1 + gnu_s1; /* { dg-error {invalid operands to binary \+} } */ ++ sve_u1 - gnu_s1; /* { dg-error {invalid operands to binary -} } */ ++ sve_u1 * gnu_s1; /* { dg-error {invalid operands to binary \*} } */ ++ sve_u1 / gnu_s1; /* { dg-error {invalid operands to binary /} } */ ++ sve_u1 % gnu_s1; /* { dg-error {invalid operands to binary %} } */ ++ sve_u1 & gnu_s1; /* { dg-error {invalid operands to binary \&} } */ ++ sve_u1 | gnu_s1; /* { dg-error {invalid operands to binary \|} } */ ++ sve_u1 ^ gnu_s1; /* { dg-error {invalid operands to binary \^} } */ ++ sve_u1 == gnu_s1; /* { dg-error {invalid operands to binary ==} } */ ++ sve_u1 != gnu_s1; /* { dg-error {invalid operands to binary !=} } */ ++ sve_u1 <= gnu_s1; /* { dg-error {invalid operands to binary <=} } */ ++ sve_u1 < gnu_s1; /* { dg-error {invalid operands to binary <} } */ ++ sve_u1 > gnu_s1; /* { dg-error {invalid operands to binary >} } */ ++ sve_u1 >= gnu_s1; /* { dg-error {invalid operands to binary >=} } */ ++ sve_u1 << gnu_s1; /* { dg-error {invalid operands to binary <<} } */ ++ sve_u1 >> gnu_s1; /* { dg-error {invalid operands to binary >>} } */ ++ sve_u1 && gnu_s1; /* { dg-error {used vector type where scalar is required} } */ ++ sve_u1 || gnu_s1; /* { dg-error {used vector type where scalar is required} } */ ++ ++ gnu_u1 + sve_s1; /* { dg-error {invalid operands to binary \+} } */ ++ gnu_u1 - sve_s1; /* { dg-error {invalid operands to binary -} } */ ++ gnu_u1 * sve_s1; /* { dg-error {invalid operands to binary \*} } */ ++ gnu_u1 / sve_s1; /* { dg-error {invalid operands to binary /} } */ ++ gnu_u1 % sve_s1; /* { dg-error {invalid operands to binary %} } */ ++ gnu_u1 & sve_s1; /* { dg-error {invalid operands to binary \&} } */ ++ gnu_u1 | sve_s1; /* { dg-error {invalid operands to binary \|} } */ ++ gnu_u1 ^ sve_s1; /* { dg-error {invalid operands to binary \^} } */ ++ gnu_u1 == sve_s1; /* { dg-error {invalid operands to binary ==} } */ ++ gnu_u1 != sve_s1; /* { dg-error {invalid operands to binary !=} } */ ++ gnu_u1 <= sve_s1; /* { dg-error {invalid operands to binary <=} } */ ++ gnu_u1 < sve_s1; /* { dg-error {invalid operands to binary <} } */ ++ gnu_u1 > sve_s1; /* { dg-error {invalid operands to binary >} } */ ++ gnu_u1 >= sve_s1; /* { dg-error {invalid operands to binary >=} } */ ++ gnu_u1 << sve_s1; /* { dg-error {invalid operands to binary <<} } */ ++ gnu_u1 >> sve_s1; /* { dg-error {invalid operands to binary >>} } */ ++ gnu_u1 && sve_s1; /* { dg-error {used vector type where scalar is required} } */ ++ gnu_u1 || sve_s1; /* { dg-error {used vector type where scalar is required} } */ ++ ++ gnu_u1 + gnu_s1; ++ gnu_u1 - gnu_s1; ++ gnu_u1 * gnu_s1; ++ gnu_u1 / gnu_s1; ++ gnu_u1 % gnu_s1; ++ gnu_u1 & gnu_s1; ++ gnu_u1 | gnu_s1; ++ gnu_u1 ^ gnu_s1; ++ gnu_u1 == gnu_s1; ++ gnu_u1 != gnu_s1; ++ gnu_u1 <= gnu_s1; ++ gnu_u1 < gnu_s1; ++ gnu_u1 > gnu_s1; ++ gnu_u1 >= gnu_s1; ++ gnu_u1 << gnu_s1; ++ gnu_u1 >> gnu_s1; ++ gnu_u1 && gnu_s1; /* { dg-error {used vector type where scalar is required} } */ ++ gnu_u1 || gnu_s1; /* { dg-error {used vector type where scalar is required} } */ ++ ++ /* Conditional expressions. */ ++ ++ uc ? sve_u1 : sve_u1; ++ uc ? gnu_u1 : sve_u1; /* { dg-error {type mismatch in conditional expression} } */ ++ uc ? sve_u1 : gnu_u1; /* { dg-error {type mismatch in conditional expression} } */ ++ uc ? gnu_u1 : gnu_u1; ++ ++ sve_u1 ? sve_u1 : sve_u1; /* { dg-error {used vector type where scalar is required} } */ ++ sve_u1 ? gnu_u1 : sve_u1; /* { dg-error {used vector type where scalar is required} } */ ++ sve_u1 ? sve_u1 : gnu_u1; /* { dg-error {used vector type where scalar is required} } */ ++ sve_u1 ? gnu_u1 : gnu_u1; /* { dg-error {used vector type where scalar is required} } */ ++ ++ gnu_u1 ? sve_u1 : sve_u1; /* { dg-error {used vector type where scalar is required} } */ ++ gnu_u1 ? gnu_u1 : sve_u1; /* { dg-error {used vector type where scalar is required} } */ ++ gnu_u1 ? sve_u1 : gnu_u1; /* { dg-error {used vector type where scalar is required} } */ ++ gnu_u1 ? gnu_u1 : gnu_u1; /* { dg-error {used vector type where scalar is required} } */ ++ ++ /* Vector built-ins. */ ++ ++ __builtin_shuffle (sve_u1, sve_u1, sve_u1); /* { dg-error {'__builtin_shuffle' last argument must be an integer vector} } */ ++ __builtin_shuffle (sve_u1, gnu_u1, gnu_u1); /* { dg-error {'__builtin_shuffle' arguments must be vectors} } */ ++ __builtin_shuffle (gnu_u1, sve_u1, gnu_u1); /* { dg-error {'__builtin_shuffle' arguments must be vectors} } */ ++ __builtin_shuffle (gnu_u1, gnu_u1, sve_u1); /* { dg-error {'__builtin_shuffle' last argument must be an integer vector} } */ ++ __builtin_shuffle (gnu_u1, gnu_u1, gnu_u1); ++ ++ __builtin_convertvector (sve_u1, svuint8_t); /* { dg-error {'__builtin_convertvector' first argument must be an integer or floating vector} } */ ++ __builtin_convertvector (gnu_u1, svuint8_t); /* { dg-error {'__builtin_convertvector' second argument must be an integer or floating vector type} } */ ++ __builtin_convertvector (sve_u1, gnu_uint8_t); /* { dg-error {'__builtin_convertvector' first argument must be an integer or floating vector} } */ ++ __builtin_convertvector (gnu_u1, gnu_uint8_t); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/sizeless-1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/sizeless-1.c +new file mode 100644 +index 000000000..ec892a3fc +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/sizeless-1.c +@@ -0,0 +1,217 @@ ++/* { dg-options "-std=gnu99" } */ ++ ++#include ++ ++typedef signed char int8x32_t __attribute__((__vector_size__ (32))); ++ ++/* Sizeless objects with global scope. */ ++ ++svint8_t global_sve_sc; /* { dg-error {SVE type 'svint8_t' does not have a fixed size} } */ ++static svint8_t local_sve_sc; /* { dg-error {SVE type 'svint8_t' does not have a fixed size} } */ ++extern svint8_t extern_sve_sc; /* { dg-error {SVE type 'svint8_t' does not have a fixed size} } */ ++__thread svint8_t tls_sve_sc; /* { dg-error {variables of type 'svint8_t' cannot have thread-local storage duration} } */ ++_Atomic svint8_t atomic_sve_sc; /* { dg-error {SVE type 'svint8_t' does not have a fixed size} } */ ++ ++/* Sizeless arrays. */ ++ ++typedef svint8_t array_type[2]; /* { dg-error {array elements cannot have SVE type 'svint8_t'} } */ ++extern svint8_t extern_array[]; /* { dg-error {array elements cannot have SVE type 'svint8_t'} } */ ++ ++/* Sizeless fields. */ ++ ++struct struct1 { ++ svint8_t a; /* { dg-error {fields cannot have SVE type 'svint8_t'} } */ ++}; ++ ++union union1 { ++ svint8_t a; /* { dg-error {fields cannot have SVE type 'svint8_t'} } */ ++}; ++ ++/* Pointers to sizeless types. */ ++ ++svint8_t *global_sve_sc_ptr; ++svint8_t *invalid_sve_sc_ptr = &(svint8_t) { *global_sve_sc_ptr }; /* { dg-error {initializer element is not constant} } */ ++ /* { dg-error {SVE type 'svint8_t' does not have a fixed size} "" { target *-*-* } .-1 } */ ++ ++/* Sizeless arguments and return values. */ ++ ++void ext_consume_sve_sc (svint8_t); ++void ext_consume_varargs (int, ...); ++svint8_t ext_produce_sve_sc (); ++ ++/* Main tests for statements and expressions. */ ++ ++void ++statements (int n) ++{ ++ /* Local declarations. */ ++ ++ unsigned char va __attribute__((__vector_size__(2))); ++ svint8_t sve_sc1, sve_sc2; ++ _Atomic svint8_t atomic_sve_sc; ++ int8x32_t gnu_sc1; ++ svint16_t sve_sh1; ++ static svint8_t local_static_sve_sc; /* { dg-error {SVE type 'svint8_t' does not have a fixed size} } */ ++ ++ /* Layout queries. */ ++ ++ sizeof (svint8_t); /* { dg-error {SVE type 'svint8_t' does not have a fixed size} } */ ++ sizeof (sve_sc1); /* { dg-error {SVE type 'svint8_t' does not have a fixed size} } */ ++ sizeof (ext_produce_sve_sc ()); /* { dg-error {SVE type 'svint8_t' does not have a fixed size} } */ ++ _Alignof (svint8_t); /* { dg-error {SVE type 'svint8_t' does not have a defined alignment} } */ ++ _Alignof (sve_sc1); /* { dg-error {SVE type 'svint8_t' does not have a defined alignment} } */ ++ _Alignof (ext_produce_sve_sc ()); /* { dg-error {SVE type 'svint8_t' does not have a defined alignment} } */ ++ ++ /* Initialization. */ ++ ++ svint8_t init_sve_sc1 = sve_sc1; ++ svint8_t init_sve_sc2 = sve_sh1; /* { dg-error {incompatible types when initializing type 'svint8_t' using type 'svint16_t'} } */ ++ svint8_t init_sve_sc3 = {}; /* { dg-error {empty scalar initializer} } */ ++ ++ int initi_a = sve_sc1; /* { dg-error {incompatible types when initializing type 'int' using type 'svint8_t'} } */ ++ int initi_b = { sve_sc1 }; /* { dg-error {incompatible types when initializing type 'int' using type 'svint8_t'} } */ ++ ++ /* Compound literals. */ ++ ++ (svint8_t) {}; /* { dg-error {empty scalar initializer} } */ ++ (svint8_t) { sve_sc1 }; ++ ++ (int) { sve_sc1 }; /* { dg-error {incompatible types when initializing type 'int' using type 'svint8_t'} } */ ++ ++ /* Arrays. */ ++ ++ svint8_t array[2]; /* { dg-error {array elements cannot have SVE type 'svint8_t'} } */ ++ svint8_t zero_length_array[0]; /* { dg-error {array elements cannot have SVE type 'svint8_t'} } */ ++ svint8_t empty_init_array[] = {}; /* { dg-error {array elements cannot have SVE type 'svint8_t'} } */ ++ /* { dg-error {empty scalar initializer} "" { target *-*-* } .-1 } */ ++ typedef svint8_t vla_type[n]; /* { dg-error {array elements cannot have SVE type 'svint8_t'} } */ ++ ++ /* Assignment. */ ++ ++ n = sve_sc1; /* { dg-error {incompatible types when assigning to type 'int' from type 'svint8_t'} } */ ++ ++ sve_sc1 = 0; /* { dg-error {incompatible types when assigning to type 'svint8_t' from type 'int'} } */ ++ sve_sc1 = sve_sc2; ++ sve_sc1 = sve_sh1; /* { dg-error {incompatible types when assigning to type 'svint8_t' from type 'svint16_t'} } */ ++ ++ /* Casting. */ ++ ++ (void) sve_sc1; ++ (svint8_t) sve_sc1; ++ ++ /* Addressing and dereferencing. */ ++ ++ svint8_t *sve_sc_ptr = &sve_sc1; ++ int8x32_t *gnu_sc_ptr = &gnu_sc1; ++ sve_sc1 = *sve_sc_ptr; ++ ++ /* Pointer assignment. */ ++ ++ gnu_sc_ptr = sve_sc_ptr; /* { dg-warning {assignment to [^\n]* from incompatible pointer type} } */ ++ sve_sc_ptr = gnu_sc_ptr; /* { dg-warning {assignment to [^\n]* from incompatible pointer type} } */ ++ ++ /* Pointer arithmetic. */ ++ ++ ++sve_sc_ptr; /* { dg-error {arithmetic on pointer to SVE type 'svint8_t'} } */ ++ --sve_sc_ptr; /* { dg-error {arithmetic on pointer to SVE type 'svint8_t'} } */ ++ sve_sc_ptr++; /* { dg-error {arithmetic on pointer to SVE type 'svint8_t'} } */ ++ sve_sc_ptr--; /* { dg-error {arithmetic on pointer to SVE type 'svint8_t'} } */ ++ sve_sc_ptr += 0; /* { dg-error {arithmetic on pointer to SVE type 'svint8_t'} } */ ++ sve_sc_ptr += 1; /* { dg-error {arithmetic on pointer to SVE type 'svint8_t'} } */ ++ sve_sc_ptr -= 0; /* { dg-error {arithmetic on pointer to SVE type 'svint8_t'} } */ ++ sve_sc_ptr -= 1; /* { dg-error {arithmetic on pointer to SVE type 'svint8_t'} } */ ++ sve_sc_ptr - sve_sc_ptr; /* { dg-error {arithmetic on pointer to SVE type 'svint8_t'} } */ ++ gnu_sc_ptr - sve_sc_ptr; /* { dg-error {invalid operands to binary -} } */ ++ sve_sc_ptr - gnu_sc_ptr; /* { dg-error {invalid operands to binary -} } */ ++ sve_sc1 = sve_sc_ptr[0]; /* { dg-error {arithmetic on pointer to SVE type 'svint8_t'} } */ ++ sve_sc1 = sve_sc_ptr[1]; /* { dg-error {arithmetic on pointer to SVE type 'svint8_t'} } */ ++ ++ /* Pointer comparison. */ ++ ++ sve_sc_ptr == &sve_sc1; ++ sve_sc_ptr != &sve_sc1; ++ sve_sc_ptr < &sve_sc1; ++ sve_sc_ptr <= &sve_sc1; ++ sve_sc_ptr > &sve_sc1; ++ sve_sc_ptr >= &sve_sc1; ++ gnu_sc_ptr == sve_sc_ptr; /* { dg-warning {comparison of distinct pointer types lacks a cast} } */ ++ gnu_sc_ptr != sve_sc_ptr; /* { dg-warning {comparison of distinct pointer types lacks a cast} } */ ++ gnu_sc_ptr < sve_sc_ptr; /* { dg-warning {comparison of distinct pointer types lacks a cast} } */ ++ gnu_sc_ptr <= sve_sc_ptr; /* { dg-warning {comparison of distinct pointer types lacks a cast} } */ ++ gnu_sc_ptr > sve_sc_ptr; /* { dg-warning {comparison of distinct pointer types lacks a cast} } */ ++ gnu_sc_ptr >= sve_sc_ptr; /* { dg-warning {comparison of distinct pointer types lacks a cast} } */ ++ sve_sc_ptr == gnu_sc_ptr; /* { dg-warning {comparison of distinct pointer types lacks a cast} } */ ++ sve_sc_ptr != gnu_sc_ptr; /* { dg-warning {comparison of distinct pointer types lacks a cast} } */ ++ sve_sc_ptr < gnu_sc_ptr; /* { dg-warning {comparison of distinct pointer types lacks a cast} } */ ++ sve_sc_ptr <= gnu_sc_ptr; /* { dg-warning {comparison of distinct pointer types lacks a cast} } */ ++ sve_sc_ptr > gnu_sc_ptr; /* { dg-warning {comparison of distinct pointer types lacks a cast} } */ ++ sve_sc_ptr >= gnu_sc_ptr; /* { dg-warning {comparison of distinct pointer types lacks a cast} } */ ++ ++ /* Conditional expressions. */ ++ ++ 0 ? sve_sc1 : sve_sc1; ++ 0 ? sve_sc1 : sve_sh1; /* { dg-error {type mismatch in conditional expression} } */ ++ 0 ? sve_sc1 : 0; /* { dg-error {type mismatch in conditional expression} } */ ++ 0 ? 0 : sve_sc1; /* { dg-error {type mismatch in conditional expression} } */ ++ 0 ?: sve_sc1; /* { dg-error {type mismatch in conditional expression} } */ ++ 0 ? sve_sc_ptr : sve_sc_ptr; ++ 0 ? sve_sc_ptr : gnu_sc_ptr; /* { dg-warning {pointer type mismatch in conditional expression} } */ ++ 0 ? gnu_sc_ptr : sve_sc_ptr; /* { dg-warning {pointer type mismatch in conditional expression} } */ ++ ++ /* Generic associations. */ ++ ++ _Generic (sve_sc1, default: 100); ++ _Generic (1, svint8_t: 10, default: 20); ++ ++ /* Function arguments. */ ++ ++ ext_consume_sve_sc (sve_sc1); ++ ext_consume_sve_sc (sve_sh1); /* { dg-error {incompatible type for argument 1 of 'ext_consume_sve_sc'} } */ ++ ext_consume_varargs (sve_sc1); /* { dg-error {incompatible type for argument 1 of 'ext_consume_varargs'} } */ ++ ext_consume_varargs (1, sve_sc1); ++ ++ /* Function returns. */ ++ ++ ext_produce_sve_sc (); ++ sve_sc1 = ext_produce_sve_sc (); ++ sve_sh1 = ext_produce_sve_sc (); /* { dg-error {incompatible types when assigning to type 'svint16_t' from type 'svint8_t'} } */ ++ ++ /* Varargs processing. */ ++ ++ __builtin_va_list valist; ++ __builtin_va_arg (valist, svint8_t); ++ ++ /* Statement expressions. */ ++ ++ ({ sve_sc1; }); ++ ({ svint8_t another_sve_sc = *sve_sc_ptr; another_sve_sc; }); ++} ++ ++/* Function parameters in definitions. */ ++ ++void ++old_style (input_sve_sc) /* { dg-error {SVE type 'svint8_t' cannot be passed to an unprototyped function} } */ ++ svint8_t input_sve_sc; ++{ ++ svint8_t sve_sc1 = input_sve_sc; ++} ++ ++void ++new_style_param (svint8_t input_sve_sc) ++{ ++ svint8_t sve_sc1 = input_sve_sc; ++} ++ ++/* Function return values in definitions. */ ++ ++svint8_t ++good_return_sve_sc (svint8_t param) ++{ ++ return param; ++} ++ ++svint8_t ++bad_return_sve_sc (svint16_t param) ++{ ++ return param; /* { dg-error {incompatible types when returning type 'svint16_t' but 'svint8_t' was expected} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/sizeless-2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/sizeless-2.c +new file mode 100644 +index 000000000..717439300 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/sizeless-2.c +@@ -0,0 +1,217 @@ ++/* { dg-options "-std=gnu99 -msve-vector-bits=256" } */ ++ ++#include ++ ++typedef signed char int8x32_t __attribute__((__vector_size__ (32))); ++ ++/* Sizeless objects with global scope. */ ++ ++svint8_t global_sve_sc; /* { dg-error {SVE type 'svint8_t' does not have a fixed size} } */ ++static svint8_t local_sve_sc; /* { dg-error {SVE type 'svint8_t' does not have a fixed size} } */ ++extern svint8_t extern_sve_sc; /* { dg-error {SVE type 'svint8_t' does not have a fixed size} } */ ++__thread svint8_t tls_sve_sc; /* { dg-error {variables of type 'svint8_t' cannot have thread-local storage duration} } */ ++_Atomic svint8_t atomic_sve_sc; /* { dg-error {SVE type 'svint8_t' does not have a fixed size} } */ ++ ++/* Sizeless arrays. */ ++ ++typedef svint8_t array_type[2]; /* { dg-error {array elements cannot have SVE type 'svint8_t'} } */ ++extern svint8_t extern_array[]; /* { dg-error {array elements cannot have SVE type 'svint8_t'} } */ ++ ++/* Sizeless fields. */ ++ ++struct struct1 { ++ svint8_t a; /* { dg-error {fields cannot have SVE type 'svint8_t'} } */ ++}; ++ ++union union1 { ++ svint8_t a; /* { dg-error {fields cannot have SVE type 'svint8_t'} } */ ++}; ++ ++/* Pointers to sizeless types. */ ++ ++svint8_t *global_sve_sc_ptr; ++svint8_t *invalid_sve_sc_ptr = &(svint8_t) { *global_sve_sc_ptr }; /* { dg-error {initializer element is not constant} } */ ++ /* { dg-error {SVE type 'svint8_t' does not have a fixed size} "" { target *-*-* } .-1 } */ ++ ++/* Sizeless arguments and return values. */ ++ ++void ext_consume_sve_sc (svint8_t); ++void ext_consume_varargs (int, ...); ++svint8_t ext_produce_sve_sc (); ++ ++/* Main tests for statements and expressions. */ ++ ++void ++statements (int n) ++{ ++ /* Local declarations. */ ++ ++ unsigned char va __attribute__((__vector_size__(2))); ++ svint8_t sve_sc1, sve_sc2; ++ _Atomic svint8_t atomic_sve_sc; ++ int8x32_t gnu_sc1; ++ svint16_t sve_sh1; ++ static svint8_t local_static_sve_sc; /* { dg-error {SVE type 'svint8_t' does not have a fixed size} } */ ++ ++ /* Layout queries. */ ++ ++ sizeof (svint8_t); /* { dg-error {SVE type 'svint8_t' does not have a fixed size} } */ ++ sizeof (sve_sc1); /* { dg-error {SVE type 'svint8_t' does not have a fixed size} } */ ++ sizeof (ext_produce_sve_sc ()); /* { dg-error {SVE type 'svint8_t' does not have a fixed size} } */ ++ _Alignof (svint8_t); /* { dg-error {SVE type 'svint8_t' does not have a defined alignment} } */ ++ _Alignof (sve_sc1); /* { dg-error {SVE type 'svint8_t' does not have a defined alignment} } */ ++ _Alignof (ext_produce_sve_sc ()); /* { dg-error {SVE type 'svint8_t' does not have a defined alignment} } */ ++ ++ /* Initialization. */ ++ ++ svint8_t init_sve_sc1 = sve_sc1; ++ svint8_t init_sve_sc2 = sve_sh1; /* { dg-error {incompatible types when initializing type 'svint8_t' using type 'svint16_t'} } */ ++ svint8_t init_sve_sc3 = {}; /* { dg-error {empty scalar initializer} } */ ++ ++ int initi_a = sve_sc1; /* { dg-error {incompatible types when initializing type 'int' using type 'svint8_t'} } */ ++ int initi_b = { sve_sc1 }; /* { dg-error {incompatible types when initializing type 'int' using type 'svint8_t'} } */ ++ ++ /* Compound literals. */ ++ ++ (svint8_t) {}; /* { dg-error {empty scalar initializer} } */ ++ (svint8_t) { sve_sc1 }; ++ ++ (int) { sve_sc1 }; /* { dg-error {incompatible types when initializing type 'int' using type 'svint8_t'} } */ ++ ++ /* Arrays. */ ++ ++ svint8_t array[2]; /* { dg-error {array elements cannot have SVE type 'svint8_t'} } */ ++ svint8_t zero_length_array[0]; /* { dg-error {array elements cannot have SVE type 'svint8_t'} } */ ++ svint8_t empty_init_array[] = {}; /* { dg-error {array elements cannot have SVE type 'svint8_t'} } */ ++ /* { dg-error {empty scalar initializer} "" { target *-*-* } .-1 } */ ++ typedef svint8_t vla_type[n]; /* { dg-error {array elements cannot have SVE type 'svint8_t'} } */ ++ ++ /* Assignment. */ ++ ++ n = sve_sc1; /* { dg-error {incompatible types when assigning to type 'int' from type 'svint8_t'} } */ ++ ++ sve_sc1 = 0; /* { dg-error {incompatible types when assigning to type 'svint8_t' from type 'int'} } */ ++ sve_sc1 = sve_sc2; ++ sve_sc1 = sve_sh1; /* { dg-error {incompatible types when assigning to type 'svint8_t' from type 'svint16_t'} } */ ++ ++ /* Casting. */ ++ ++ (void) sve_sc1; ++ (svint8_t) sve_sc1; ++ ++ /* Addressing and dereferencing. */ ++ ++ svint8_t *sve_sc_ptr = &sve_sc1; ++ int8x32_t *gnu_sc_ptr = &gnu_sc1; ++ sve_sc1 = *sve_sc_ptr; ++ ++ /* Pointer assignment. */ ++ ++ gnu_sc_ptr = sve_sc_ptr; ++ sve_sc_ptr = gnu_sc_ptr; ++ ++ /* Pointer arithmetic. */ ++ ++ ++sve_sc_ptr; /* { dg-error {arithmetic on pointer to SVE type 'svint8_t'} } */ ++ --sve_sc_ptr; /* { dg-error {arithmetic on pointer to SVE type 'svint8_t'} } */ ++ sve_sc_ptr++; /* { dg-error {arithmetic on pointer to SVE type 'svint8_t'} } */ ++ sve_sc_ptr--; /* { dg-error {arithmetic on pointer to SVE type 'svint8_t'} } */ ++ sve_sc_ptr += 0; /* { dg-error {arithmetic on pointer to SVE type 'svint8_t'} } */ ++ sve_sc_ptr += 1; /* { dg-error {arithmetic on pointer to SVE type 'svint8_t'} } */ ++ sve_sc_ptr -= 0; /* { dg-error {arithmetic on pointer to SVE type 'svint8_t'} } */ ++ sve_sc_ptr -= 1; /* { dg-error {arithmetic on pointer to SVE type 'svint8_t'} } */ ++ sve_sc_ptr - sve_sc_ptr; /* { dg-error {arithmetic on pointer to SVE type 'svint8_t'} } */ ++ gnu_sc_ptr - sve_sc_ptr; /* { dg-error {arithmetic on pointer to SVE type 'svint8_t'} } */ ++ sve_sc_ptr - gnu_sc_ptr; /* { dg-error {arithmetic on pointer to SVE type 'svint8_t'} } */ ++ sve_sc1 = sve_sc_ptr[0]; /* { dg-error {arithmetic on pointer to SVE type 'svint8_t'} } */ ++ sve_sc1 = sve_sc_ptr[1]; /* { dg-error {arithmetic on pointer to SVE type 'svint8_t'} } */ ++ ++ /* Pointer comparison. */ ++ ++ sve_sc_ptr == &sve_sc1; ++ sve_sc_ptr != &sve_sc1; ++ sve_sc_ptr < &sve_sc1; ++ sve_sc_ptr <= &sve_sc1; ++ sve_sc_ptr > &sve_sc1; ++ sve_sc_ptr >= &sve_sc1; ++ gnu_sc_ptr == sve_sc_ptr; ++ gnu_sc_ptr != sve_sc_ptr; ++ gnu_sc_ptr < sve_sc_ptr; ++ gnu_sc_ptr <= sve_sc_ptr; ++ gnu_sc_ptr > sve_sc_ptr; ++ gnu_sc_ptr >= sve_sc_ptr; ++ sve_sc_ptr == gnu_sc_ptr; ++ sve_sc_ptr != gnu_sc_ptr; ++ sve_sc_ptr < gnu_sc_ptr; ++ sve_sc_ptr <= gnu_sc_ptr; ++ sve_sc_ptr > gnu_sc_ptr; ++ sve_sc_ptr >= gnu_sc_ptr; ++ ++ /* Conditional expressions. */ ++ ++ 0 ? sve_sc1 : sve_sc1; ++ 0 ? sve_sc1 : sve_sh1; /* { dg-error {type mismatch in conditional expression} } */ ++ 0 ? sve_sc1 : 0; /* { dg-error {type mismatch in conditional expression} } */ ++ 0 ? 0 : sve_sc1; /* { dg-error {type mismatch in conditional expression} } */ ++ 0 ?: sve_sc1; /* { dg-error {type mismatch in conditional expression} } */ ++ 0 ? sve_sc_ptr : sve_sc_ptr; ++ 0 ? sve_sc_ptr : gnu_sc_ptr; ++ 0 ? gnu_sc_ptr : sve_sc_ptr; ++ ++ /* Generic associations. */ ++ ++ _Generic (sve_sc1, default: 100); ++ _Generic (1, svint8_t: 10, default: 20); ++ ++ /* Function arguments. */ ++ ++ ext_consume_sve_sc (sve_sc1); ++ ext_consume_sve_sc (sve_sh1); /* { dg-error {incompatible type for argument 1 of 'ext_consume_sve_sc'} } */ ++ ext_consume_varargs (sve_sc1); /* { dg-error {incompatible type for argument 1 of 'ext_consume_varargs'} } */ ++ ext_consume_varargs (1, sve_sc1); ++ ++ /* Function returns. */ ++ ++ ext_produce_sve_sc (); ++ sve_sc1 = ext_produce_sve_sc (); ++ sve_sh1 = ext_produce_sve_sc (); /* { dg-error {incompatible types when assigning to type 'svint16_t' from type 'svint8_t'} } */ ++ ++ /* Varargs processing. */ ++ ++ __builtin_va_list valist; ++ __builtin_va_arg (valist, svint8_t); ++ ++ /* Statement expressions. */ ++ ++ ({ sve_sc1; }); ++ ({ svint8_t another_sve_sc = *sve_sc_ptr; another_sve_sc; }); ++} ++ ++/* Function parameters in definitions. */ ++ ++void ++old_style (input_sve_sc) /* { dg-error {SVE type 'svint8_t' cannot be passed to an unprototyped function} } */ ++ svint8_t input_sve_sc; ++{ ++ svint8_t sve_sc1 = input_sve_sc; ++} ++ ++void ++new_style_param (svint8_t input_sve_sc) ++{ ++ svint8_t sve_sc1 = input_sve_sc; ++} ++ ++/* Function return values in definitions. */ ++ ++svint8_t ++good_return_sve_sc (svint8_t param) ++{ ++ return param; ++} ++ ++svint8_t ++bad_return_sve_sc (svint16_t param) ++{ ++ return param; /* { dg-error {incompatible types when returning type 'svint16_t' but 'svint8_t' was expected} } */ ++} +diff --git a/gcc/tree-core.h b/gcc/tree-core.h +index 26b6f46ad..fca4abf2a 100644 +--- a/gcc/tree-core.h ++++ b/gcc/tree-core.h +@@ -1585,7 +1585,8 @@ struct GTY(()) tree_type_common { + unsigned warn_if_not_align : 6; + unsigned typeless_storage : 1; + unsigned empty_flag : 1; +- unsigned spare : 17; ++ unsigned indivisible_p : 1; ++ unsigned spare : 16; + + alias_set_type alias_set; + tree pointer_to; +diff --git a/gcc/tree.c b/gcc/tree.c +index 62607c63a..33e8dca2a 100644 +--- a/gcc/tree.c ++++ b/gcc/tree.c +@@ -15146,6 +15146,21 @@ max_object_size (void) + return TYPE_MAX_VALUE (ptrdiff_type_node); + } + ++/* A wrapper around TARGET_VERIFY_TYPE_CONTEXT that makes the silent_p ++ parameter default to false and that weeds out error_mark_node. */ ++ ++bool ++verify_type_context (location_t loc, type_context_kind context, ++ const_tree type, bool silent_p) ++{ ++ if (type == error_mark_node) ++ return true; ++ ++ gcc_assert (TYPE_P (type)); ++ return (!targetm.verify_type_context ++ || targetm.verify_type_context (loc, context, type, silent_p)); ++} ++ + #if CHECKING_P + + namespace selftest { +diff --git a/gcc/tree.h b/gcc/tree.h +index 356a9f544..97d18fc2b 100644 +--- a/gcc/tree.h ++++ b/gcc/tree.h +@@ -704,6 +704,11 @@ extern void omp_clause_range_check_failed (const_tree, const char *, int, + /* Used to indicate that this TYPE represents a compiler-generated entity. */ + #define TYPE_ARTIFICIAL(NODE) (TYPE_CHECK (NODE)->base.nowarning_flag) + ++/* True if the type is indivisible at the source level, i.e. if its ++ component parts cannot be accessed directly. This is used to suppress ++ normal GNU extensions for target-specific vector types. */ ++#define TYPE_INDIVISIBLE_P(NODE) (TYPE_CHECK (NODE)->type_common.indivisible_p) ++ + /* In an IDENTIFIER_NODE, this means that assemble_name was called with + this string as an argument. */ + #define TREE_SYMBOL_REFERENCED(NODE) \ diff --git a/adjust-vector-cost-and-move-EXTRACT_LAST_REDUCTION-costing.patch b/adjust-vector-cost-and-move-EXTRACT_LAST_REDUCTION-costing.patch new file mode 100644 index 0000000..6ee3d33 --- /dev/null +++ b/adjust-vector-cost-and-move-EXTRACT_LAST_REDUCTION-costing.patch @@ -0,0 +1,88 @@ +This backport contains 2 patchs from gcc main stream tree. +The commit id of these patchs list as following in the order of time. + +4bf29d15f2e01348a45a1f4e1a135962f123fdd6 +0001-AArch64-PR79262-Adjust-vector-cost.patch + +27071013521b015d17a2666448f27a6ff0c55aca +0001-Move-EXTRACT_LAST_REDUCTION-costing-to-vectorizable_.patch + +diff -Nurp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c +--- a/gcc/config/aarch64/aarch64.c 2020-11-20 04:36:33.988000000 +0800 ++++ b/gcc/config/aarch64/aarch64.c 2020-11-20 04:32:20.984000000 +0800 +@@ -448,7 +448,7 @@ static const struct cpu_vector_cost gene + 1, /* vec_int_stmt_cost */ + 1, /* vec_fp_stmt_cost */ + 2, /* vec_permute_cost */ +- 1, /* vec_to_scalar_cost */ ++ 2, /* vec_to_scalar_cost */ + 1, /* scalar_to_vec_cost */ + 1, /* vec_align_load_cost */ + 1, /* vec_unalign_load_cost */ +diff -Nurp a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c +--- a/gcc/tree-vect-loop.c 2020-11-20 04:36:34.016000000 +0800 ++++ b/gcc/tree-vect-loop.c 2020-11-20 04:32:20.984000000 +0800 +@@ -3926,8 +3926,11 @@ vect_model_reduction_cost (stmt_vec_info + + code = gimple_assign_rhs_code (orig_stmt_info->stmt); + +- if (reduction_type == EXTRACT_LAST_REDUCTION +- || reduction_type == FOLD_LEFT_REDUCTION) ++ if (reduction_type == EXTRACT_LAST_REDUCTION) ++ /* No extra instructions are needed in the prologue. The loop body ++ operations are costed in vectorizable_condition. */ ++ inside_cost = 0; ++ else if (reduction_type == FOLD_LEFT_REDUCTION) + { + /* No extra instructions needed in the prologue. */ + prologue_cost = 0; +diff -Nurp a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c +--- a/gcc/tree-vect-stmts.c 2020-11-20 04:36:33.996000000 +0800 ++++ b/gcc/tree-vect-stmts.c 2020-11-20 04:32:20.984000000 +0800 +@@ -859,7 +859,8 @@ vect_model_simple_cost (stmt_vec_info st + enum vect_def_type *dt, + int ndts, + slp_tree node, +- stmt_vector_for_cost *cost_vec) ++ stmt_vector_for_cost *cost_vec, ++ vect_cost_for_stmt kind = vector_stmt) + { + int inside_cost = 0, prologue_cost = 0; + +@@ -906,7 +907,7 @@ vect_model_simple_cost (stmt_vec_info st + } + + /* Pass the inside-of-loop statements to the target-specific cost model. */ +- inside_cost += record_stmt_cost (cost_vec, ncopies, vector_stmt, ++ inside_cost += record_stmt_cost (cost_vec, ncopies, kind, + stmt_info, 0, vect_body); + + if (dump_enabled_p ()) +@@ -9194,15 +9195,18 @@ vectorizable_condition (stmt_vec_info st + " EXTRACT_LAST_REDUCTION.\n"); + LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false; + } +- if (expand_vec_cond_expr_p (vectype, comp_vectype, +- cond_code)) +- { +- STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type; +- vect_model_simple_cost (stmt_info, ncopies, dts, ndts, slp_node, +- cost_vec); +- return true; +- } +- return false; ++ ++ vect_cost_for_stmt kind = vector_stmt; ++ if (reduction_type == EXTRACT_LAST_REDUCTION) ++ /* Count one reduction-like operation per vector. */ ++ kind = vec_to_scalar; ++ else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code)) ++ return false; ++ ++ STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type; ++ vect_model_simple_cost (stmt_info, ncopies, dts, ndts, slp_node, ++ cost_vec, kind); ++ return true; + } + + /* Transform. */ diff --git a/avoid-cycling-on-vertain-subreg-reloads.patch b/avoid-cycling-on-vertain-subreg-reloads.patch index 709d68d..f4139ac 100644 --- a/avoid-cycling-on-vertain-subreg-reloads.patch +++ b/avoid-cycling-on-vertain-subreg-reloads.patch @@ -1,3 +1,9 @@ +This backport contains 1 patch from gcc main stream tree. +The commit id of these patchs list as following in the order of time. + +0001-lra-Avoid-cycling-on-certain-subreg-reloads-PR96796.patch +6001db79c477b03eacc7e7049560921fb54b7845 + diff -uprN a/gcc/lra-constraints.c b/gcc/lra-constraints.c --- a/gcc/lra-constraints.c 2020-03-12 19:07:21.000000000 +0800 +++ b/gcc/lra-constraints.c 2020-09-08 10:02:52.308147305 +0800 diff --git a/bf16-and-matrix-characteristic.patch b/bf16-and-matrix-characteristic.patch new file mode 100644 index 0000000..8f9e252 --- /dev/null +++ b/bf16-and-matrix-characteristic.patch @@ -0,0 +1,466067 @@ +This backport contains 309 patchs from gcc main stream tree. +The commit id of these patchs list as following in the order of time. + +0001-re-PR-target-89261-ix86_data_alignment-has-wrong-arg.patch +f8b906a2de3044f1dea753b182c244a1a560d40e + +0002-Fix-Wenum-compare-switch-warning-in-i386.c.patch +791536baadc9f469ec8eef2d7213c6f6091c5fa9 + +0003-Prefer-to-use-strlen-call-instead-of-inline-expansio.patch +786e0e5239529de9a4254fe8411a0e8f843e721a + +0004-Enhance-target-and-target_clone-error-messages.patch +cc2a672a60ff7476b3e4751ba41cb77c7fc85b09 + +0005-re-PR-middle-end-88963-gcc-generates-terrible-code-f.patch +a7eb97ad269b6509bd7b31ca373daea98e4d7e85 + +0006-Split-i386.c.patch +2bf6d93547e516b6b2b2051c0fb1b47ea4acc8a4 + +0007-Split-part-of-functionality-from-lto.c-to-lto-common.patch +a79420f995764129dc40d1abcbf8ce75a0b0f906 + +0008-Error-only-when-a-non-default-mabi-is-used-with-sani.patch +080629d32eca5ea202479022f0bd429a813be7c4 + +0009-This-patch-adds-support-to-vectorize-sum-of-abslolut.patch +a9fad8fe6c84de272f2a56d462e67d53c9f4a73d + +0010-cfgexpand.c-asm_clobber_reg_is_valid-Reject-clobbers.patch +0a59215131c02dee4c8829f93d1ee678647614da + +0011-re-PR-tree-optimization-90395-ICE-verify_flow_info-f.patch +362e280d10c61bec13c1d02c11a1c4ac0846db7e + +0012-re-PR-c-59813-tail-call-elimination-didn-t-fire-for-.patch +b5b9147d35ee509714c34d813c7723bf18bb7b7a + +0013-Accept-code-attributes-as-rtx-codes-in-.md-files.patch +75df257b38bd4cdcb750fc893c5023363230cfe8 + +0014-x86-fix-pr82920.patch +0f8768f73440b040707deafd254d189c2887d00d + +0015-2019-05-14-Przemyslaw-Wirkus-przemyslaw.wirkus-arm.c.patch +a52cf5cf278e4a9e58bfa2bb67a93244766a122f + +0016-re-PR-tree-optimization-88828-Inefficient-update-of-.patch +962372f9f853c582c879f11c0db14973cc8687e0 + +0017-re-PR-tree-optimization-88828-Inefficient-update-of-.patch +595ffc073bf5b1753e3a18dfa704391ad5fad626 + +0018-gcc-move-assemble_start_function-assemble_end_functi.patch +f7430263c07b4a1bcf3deb708c8c691f233fcb40 + +0019-trans.c-check_inlining_for_nested_subprog-Quote-rese.patch +a9c697b88395a0f2b175ac30c59bd8c0c22d0db1 + +0020-gcc-aarch64-move-assemble_start_function-assemble_en.patch +6b5777c6c7059b6b8e372e567a74bdccb59a02c3 + +0021-gimple-match-head.c-Include-vec-perm-indices.h.patch +ebd733a78ccf5792067e94852c6c81a5f9aa0020 + +0022-i386-Fold-__builtin_ia32_shufpd-to-VEC_PERM_EXPR.patch +4d508751f421491052bc1d83150344e6cba30b3b + +0023-aarch64-Introduce-flags-for-SVE2.patch +28108a5341653568e9ebc49ea755ff93cc1e1711 + +0024-aarch64-Change-two-function-declaration-types.patch +1ec77eedd529f81b1dc99cda9818f1ef9e952b96 + +0025-PATCH-3-3-GCC-AARCH64-Add-support-for-pointer-authen.patch +8fc16d725206f2c40bae423d7d0d93bd1baf6da2 + +0026-This-patch-implements-the-u-avgM3_floor-and-u-avgM3_.patch +0617e23c9531373d3b232152c0d81a2c707858d9 + +0027-tree-ssa-alias-access-spath-1.c-new-testcase.patch +987c9fc581ffb04d5ab7a782bb7aee6205c45663 + +0028-PATCH-GCC-AARCH64-Fix-libstdc-build-failure-after-r2.patch +0e2e15abd0765c1866f36f0312f77c9595e7fdec + +0029-aarch64-add-support-for-fabd-in-sve.patch +3db85990dbde7f9c8212fe0fb8a241c5d2993198 + +0030-New-.md-construct-define_insn_and_rewrite.patch +f4fde1b378ad68fb2dec6719ed26c1b901488e03 + +0031-re-PR-target-88837-SVE-Poor-vector-construction-code.patch +3a0afad0d212b3ff213b393728e018caf2daa526 + +0032-AArch64-Emit-TARGET_DOTPROD-specific-sequence-for-us.patch +72215009a9f9827397a4eb74e9341b2b7dc658df + +0033-AARCH64-ILP32-Fix-aarch64_asan_shadow_offset.patch +10078f3e1d0cbebc5e6f7f4821d3ad41421ef1e0 + +0034-Make-SRA-re-construct-orginal-memory-accesses-when-e.patch +3b47da42de621c6c3bf7d2f9245df989aa7eb5a1 + +0035-Fix-fwprop-call-to-call-to-paradoxical_subreg_p.patch +6c202d9dc65833e04e35f566c645fde8278c1a24 + +0036-init_1.c-Remove-options-O2-fno-schedule-insns-and-in.patch +3a9debbd7660bafbd7658c9e843eddbac8980188 + +0037-iterators.md-ADDSUB-Fix-typo-in-comment.patch +dd550c996578ea7e94f3a59e57f24636186fbb95 + +0038-re-PR-target-88834-SVE-Poor-addressing-mode-choices-.patch +fa9863e7d34ecd011ae75083be2ae124e5831b64 + +0039-Darwin-The-need-for-picsym-stubs-is-dependent-on-lin.patch +ce3a201593d0ed5b606360c064778de34b5b04ef + +0040-netbsd-aarch64-add-netbsd-aarch64-target.patch +f32f75858a14e7b304df7a71dae15d75081b0deb + +0041-Vectorizer-Support-masking-fold-left-reductions.patch +bce29d65ebe1316d15ec7582a1d257ef1be163f7 + +0042-Darwin-The-need-for-FDE-symbols-is-dependent-on-link.patch +dbe89f49da468fbd42a27bdb7b8f06de76a871b4 + +0043-AArch64-Simplify-SVE-IFN_COND-patterns.patch +32cf949cec180799d3fb14d405772ea35b5aafd3 + +0044-AArch64-Factor-out-ptrue-predicate-creation.patch +16de3637c4df37e0203b3ad52b238887e6ca38fc + +0045-AArch64-Factor-out-pfalse-predicate-creation.patch +e7053b0c7cf3f1cd8a23cc71e7e36ec29c46b217 + +0046-AArch64-Tabify-aarch64-sve.md.patch +ea403d8bb5129632aac4d2f270566d2d0073a8ae + +0047-AArch64-Add-a-new-CC-mode-for-SVE-conditions.patch +57d6f4d04d438522dc03488ca31f71b4b7b904c8 + +0048-aarch64-Refactor-common-errata-work-around-specs.patch +91bed1a15a6dfb891b9658532b49f9488b5537f4 + +0049-objective-c-c-testsuite-Fix-stubify-tests-for-fnext-.patch +b7a0332ccd21c04a37535c97f04abc4bc28fb321 + +0050-builtins.c-get_memory_rtx-Fix-comment.patch +76715c3216cf6ccd071fc852920af55d6b0054ae + +0051-Use-alternative_mask-for-add_insn_allocno_copies.patch +73bb8fe9e915cf3219f16afdc61c308c08aa7659 + +0052-Simplify-ira_setup_alts.patch +06a65e803ed06f3ad1fd8e5f90db03aa0a7e5414 + +0053-Make-ira_get_dup_out_num-handle-more-cases.patch +ed680e2cc18c73f90e6bfbd3f346a8820476371b + +0054-Allow-earlyclobbers-in-ira_get_dup_out_num.patch +ae5569fa33c9f3286e0b747f8b6607d21a4b9827 + +0055-Use-ira_setup_alts-for-conflict-detection.patch +6de20b9d7a1af863fb51b4a783c153ea0092810a + +0056-aarch64-force-frame-pointer-setup-before-tlsdesc-cal.patch +0e510d1824241953c67b38f7a894de7238c23c61 + +0057-AArch64-Remove-constraint-strings-from-define_expand.patch +1bbffb87a9ecc3e27a4074145e55e3315df57b7d + +0058-re-PR-target-88833-SVE-Redundant-moves-for-WHILELO-b.patch +75da268e1a563a1a52389cd2ecee12d07c45a655 + +0059-PATCH-GCC-AARCH64-PR-target-90712-Fix-gcc.dg-rtl-aar.patch +2bdc7dcbbd2eee4f114c09443933cc37a546dbff + +0060-aarch64-redefine-aes-patterns.patch +5169fa77322e36dd4783bc5126185159c35a3584 + +0061-simplify-rtx.c-simplify_unary_operation_1-Use-GET_MO.patch +4faba5c3bc37c0bfceec6b254d76c5d0b3e2fe8b + +0062-Support-multiple-operand-counts-for-.md-patterns.patch +d281492de84960b5885f88fffeeb226650f5141d + +0063-arch64-Fix-ambiguous-.md-attribute-uses.patch +e7ba492a04d0bfef9752cbb16fcce3ffc31bf99f + +0064-Relax-vector_builder-elt-sanity-check.patch +72ab1c51b607dd5446ee24ff9fce9178d6b811cb + +0065-re-PR-target-90723-pr88598-2.c-segfaults-with-msve-v.patch +f2b29269c407f10718bc935b3dd5c7e8641b6847 + +0066-AArch64-Rename-bitperm-to-sve2-bitperm.patch +c10abf530e52972ef708f6e72cf20dd920cd22a2 + +0067-aarch64-add-usra-and-ssra-combine-patterns.patch +462e6f9a932a44ca73715dc5c2960e5b332f63f7 + +0068-config-i386-x86-tune.def-X86_TUNE_AVOID_256FMA_CHAIN.patch +ef893a2a769b18c61953d80670b1db8c27bc44e0 + +0069-i386-options.c-ix86_option_override_internal-Default.patch +105c2795b0d63b2cc5cb224ba066fa8b9a0ad0ff + +0070-Come-up-with-function_decl_type-and-use-it-in-tree_f.patch +cb50701ec2c7abdc48db278802022f7e94675d07 + +0071-cif-code.def-NEVER_CALL-New-code.patch +5ab2422adf894bdf84deed8c7c0557c16d6dca2b + +0072-AArch64-Make-processing-less-fragile-in-config.gcc.patch +3644cadf6a9d5a5cd8e83b0123316cf184fa4e3e + +0073-Implement-more-rtx-vector-folds-on-variable-length-v.patch +4ce6ab6889446984fd7017e2150962eb4550a7ee + +0074-Generalise-VEC_DUPLICATE-folding-for-variable-length.patch +708cc6132bb374e2c5bd1c4f43f9fe7306d20970 + +0075-Add-dg-test-for-matching-function-bodies.patch +4d706ff86ea86868615558e92407674a4f4b4af9 + +0076-Prevent-Og-from-deleting-stores-to-write-only-variab.patch +ec8ac265ff21fb379ac072848561a91e4990c47f + +0077-Don-t-run-DSE-at-Og.patch +c0fe6bce2a8c35e997f45b0a674ab2058ba50ae0 + +0078-Prevent-tree-ssa-dce.c-from-deleting-stores-at-Og.patch +f33b9c40b97f6f8a72ee370068ad81e33d71434e + +0079-re-PR-target-91150-wrong-code-with-O-mavx512vbmi-due.patch +fa2987ed8db073b9d59688363e2dfb6c60f47d70 + +0080-Handle-IFN_COND_MUL-in-tree-ssa-math-opts.c.patch +c1b3d827832f883e0634b18c88eb2bbde335aa42 + +0081-Make-lra-use-per-alternative-earlyclobber-info.patch +a25f3e8efbbc7182fa58c445574848a73856e9b4 + +0082-GCC-AArch64-Enable-Transactional-Memory-Extension.patch +89626179b6fe42cbd58c715808f7c6401879757f + +0083-Add-a-gimple_move_vops-helper-function.patch +779724a5913b4e6a7ccccc0b8b415a772144a067 + +0084-Make-function_code-a-32-bit-field.patch +55f863c4d694deafb968dbf44d08ba49bb7c0766 + +0085-AArch64-Remove-unused-commutative-attribute.patch +871b49afafe043d57f717e70532d66c5a56ca173 + +0086-AArch64-Reorganise-aarch64-sve.md.patch +915d28fe74dbb30352702ab07ea5bf30747043bb + +0087-AArch64-Make-SVE-UNSPEC_COND_-s-match-the-insn-mnemo.patch +cb18e86dd005fe009c536a8bb0aec7aa88ca66df + +0088-AArch64-Remove-redundant-SVE-FADDA-pattern.patch +8ad84de26e1032d80225905c611a47b64a385e8a + +0089-AArch64-Merge-SVE-FP-unary-patterns.patch +d45b20a5539b6f306a559470c3a7e9f84a058bfb + +0090-AArch64-Merge-SVE-FMAXNM-FMINNM-patterns.patch +214c42faa06a9eb1aa7f0296399f28df4fb068ec + +0091-AArch64-Merge-SVE-ternary-FP-operations.patch +0d80d083a2e1d368fcb11eb7ea5490c274f0ea15 + +0092-AArch64-Merge-SVE-reduction-patterns.patch +b0760a40bef3ca690691bf5d214da95b5dc25266 + +0093-AArch64-Prefer-FPRs-over-GPRs-for-CLASTB.patch +801790b37ca817089ecbae214340162e6d94ea6a + +0094-AArch64-Prefer-FPRs-over-GPRs-for-INSR.patch +61ee25b9e7d84fbb18218887d1fecfb10f72993a + +0095-AArch64-Fix-INSR-for-zero-floats.patch +9b6fb97c99abe64147f82a3ea6e6ed598e387482 + +0096-C-Fix-bogus-nested-enum-error-message.patch +99769e7fb6ed153a53174b7f08415eee347655f0 + +0097-AArch64-Make-perm_insn-the-complete-mnemonic.patch +3e2751ce5591dc8f3b5f4ffd3dacf0fb8f789395 + +0098-AArch64-Add-a-y-constraint-for-V0-V7.patch +163b1f6ab2950553e1cc1b39a6b49293b3390e46 + +0099-AArch64-Make-aarch64_classify_vector_mode-use-a-swit.patch +806f69cd68c18399e8e54b1a0913ae57beabbe69 + +0100-AArch64-Make-simd_immediate_info-INDEX-explicit.patch +1da83ccee8e7b61e7777abb63eb0e5a0ff1f1e93 + +0101-AArch64-Use-simd_immediate_info-for-SVE-predicate-co.patch +1044fa32e2b456b59b3cdc31b4f261145f1589cc + +0102-AArch64-Increase-default-function-alignment.patch +4e55aefa3ee19167a41892e4920a3e8c520aee42 + +0103-AArch64-Improve-SVE-constant-moves.patch +4aeb1ba7f62c1d680c819ae3e137c3bad6f520ca + +0104-Darwin-There-is-no-need-to-distinguish-PIC-non-PIC-s.patch +d308419c64c52c2d48bdf53a65e1790a2c897e83 + +0105-Optimise-constant-IFN_WHILE_ULTs.patch +0b1fe8cf6f1dde656c505dde6d27279dff388962 + +0106-Protect-some-checks-of-DECL_FUNCTION_CODE.patch +cb1180d547e3b28547134a06ee020163afa59cc3 + +0107-Use-checking-forms-of-DECL_FUNCTION_CODE-PR-91421.patch +4d732405bd91b54c196fdc38191f838bb01f23a6 + +0108-AArch64-Rework-SVE-PTEST-patterns.patch +34467289631e29545e14148515ab5f5d0d9e4fa7 + +0109-AArch64-Canonicalise-SVE-predicate-constants.patch +678faefcab01f9e9eeb222852675b5a042aaf900 + +0110-AArch64-Don-t-rely-on-REG_EQUAL-notes-to-combine-SVE.patch +35d6c5913d2209eb50f48b589b29f0dce13cb9b7 + +0111-AArch64-Use-unspecs-for-remaining-SVE-FP-binary-ops.patch +6fe679cc6be7a55832f9b88a8cf0751e8d5eff6e + +0112-AArch64-Add-a-GP-strictness-operand-to-SVE-FP-unspec.patch +c9c5a8090c58b84c1eb45e39e77eee223f992009 + +0113-AArch64-Commonise-some-SVE-FP-patterns.patch +0254ed7970e64abd82f21aedf9373720a73671c7 + +0114-AArch64-Add-support-for-SVE-HF-vconds.patch +a70965b114281553fa46cac9b8abab543f36793f + +0115-AArch64-Rework-SVE-FP-comparisons.patch +4a942af61c16f38f7fe51ed72a7ac23f73f62f2a + +0116-AArch64-Use-unspecs-for-SVE-conversions-involving-fl.patch +99361551624427aebe7a856a4327e083aa33733a + +0117-AArch64-Rearrange-SVE-conversion-patterns.patch +95eb5537d8bb23b952105b46250ed4fba8766b84 + +0118-AArch64-Use-x-predication-for-SVE-integer-arithmetic.patch +063082768aab23d26e42954eb115b76318f0176d + +0119-AArch64-Rework-SVE-integer-comparisons.patch +00fa90d975bfacfd91a615fbee24e3e6a100100f + +0120-AArch64-Handle-more-SVE-predicate-constants.patch +2803bc3bbca332f53801770715a5b592b2467492 + +0121-AArch64-Use-SVE-ADR-to-optimise-shift-add-sequences.patch +a229966c9c76afe0cf18c566a3c13ddde3878288 + +0122-AArch64-Add-support-for-SVE-CLS-and-CLZ.patch +bca5a9971f47cf5fe79e6595beb762539f200f46 + +0123-AArch64-Add-support-for-SVE-CNOT.patch +e0a0be93d7c2b760779c3085c5abfd0496e3458b + +0124-AArch64-Add-support-for-SVE-SU-MAX-MIN-immediate.patch +f8c22a8bbaf3ef4260f7d8beea22ed151ca4b726 + +0125-AArch64-Add-support-for-SVE-F-MAX-MIN-NM-immediate.patch +75079ddf9cb867576bbef66f3e8370d9fdeea3b8 + +0126-AArch64-Make-more-use-of-SVE-conditional-constant-mo.patch +d29f7dd50de9e8e46f7e247c53f3b0405a3dadd9 + +0127-AArch64-Use-SVE-MOV-M-of-scalars.patch +88a37c4d72899c5a3f5a7b2bca0ae0096f3270a3 + +0128-AArch64-Add-support-for-SVE-absolute-comparisons.patch +42b4e87d317377d6dcbb25ee2523da4a0c42478a + +0129-AArch64-Add-SVE-conditional-integer-unary-patterns.patch +3c9f496337f754f7c22afb46b017871db5844a97 + +0130-AArch64-Add-SVE-conditional-floating-point-unary-pat.patch +b21f7d53095b253753c5622f99809e9c82fd3009 + +0131-AArch64-Add-SVE-conditional-conversion-patterns.patch +c5e16983cd1bd6dd6eca1b939c3c8859f0c6c866 + +0132-AArch64-Use-SVE-UXT-BHW-as-a-form-of-predicated-AND.patch +d113ece60450b2efb07e9057b6d2732b08fee2c4 + +0133-AArch64-Use-SVE-BIC-for-conditional-arithmetic.patch +1b187f36ec16d43d0227805955d8fae51af26970 + +0134-Add-support-for-conditional-shifts.patch +20103c0ea9336d2b5286eb7f2605ace3fd49a431 + +0135-AArch64-Use-SVE-SU-ABD-in-conditional-arithmetic.patch +9730c5ccd522cd955bcb6e65295023621cade8b6 + +0136-AArch64-Use-SVE-FABD-in-conditional-arithmetic.patch +bf30864e4c241e50585745af504b09db55f7f08b + +0137-AArch64-Use-SVE-binary-immediate-instructions-for-co.patch +a19ba9e1b15d248e5a13ee773f4acd4ae29fdeaa + +0138-AArch64-Use-SVE-MLA-MLS-MAD-and-MSB-for-conditional-.patch +b6c3aea1892c148c21f8b87668f344b2397f4aa5 + +0139-AArch64-Add-a-commutativity-marker-to-the-SVE-SU-ABD.patch +9a8d9b3f2422d4885e5c846dee66acf6336e6ccf + +0140-aarch64-Use-neoversen1-tuning-struct-for-mcpu-cortex.patch +42418c1f7f5cb3b2f466f88053acc818ddc5cd4d + +0141-AArch64-Use-SVE-reversed-shifts-in-preference-to-MOV.patch +7d1f24018b04c13134bc47619fb8aaa390b01754 + +0142-AArch64-Add-more-unpredicated-MOVPRFX-alternatives.patch +5e176a613ef2eda92aa65736763a562dc42a50fe + +0143-AArch64-Remove-unneeded-FSUB-alternatives-and-add-a-.patch +2ae21bd133c357fcd7b6e06dc7d7d9e0660abe2c + +0144-AArch64-Add-MOVPRFX-alternatives-for-SVE-EXT-pattern.patch +06b3ba23eb6ff965a92cd99d2835d4c29316a447 + +0145-AArch64-Add-more-SVE-FMLA-and-FMAD-z-alternatives.patch +432b29c189a6d26ed701c7518402708b2fcb794f + +0146-AArch64-Rework-SVE-REV-BHW-patterns.patch +d7a09c445a475a95559e8b9f29eb06ad92effa91 + +0147-AArch64-Rework-SVE-INC-DEC-handling.patch +0fdc30bcf56d7b46122d7e67d61b56c0a198f3b3 + +0148-AArch64-Optimise-aarch64_add_offset-for-SVE-VL-const.patch +7d8bdfa7e409821c50f6d8a7b557bd7dc760c4ce + +0149-AArch64-Pass-a-pattern-to-aarch64_output_sve_cnt_imm.patch +139df05a29eb71075e42f502978dea4d00a99708 + +0150-AArch64-Tweak-operand-choice-for-SVE-predicate-AND.patch +2d2388f82f2e7f2fd1da063192ba98be45f099d2 + +0151-AArch64-Fix-predicate-alignment-for-fixed-length-SVE.patch +07108a9ebe4776610bb23f684b3a346d28511bed + +0152-AArch64-Add-a-aarch64_sve_mode_p-query.patch +5c38705dbde776f68bf1f99a71657d0e21b772a5 + +0153-Remove-TARGET_SETUP_INCOMING_VARARG_BOUNDS.patch +06b5889c434b941804d5592cd4fc8946b25c1c4b + +0154-As-discussed-below.patch +1f2a3ac34620ab4669f9f32417a7a4496c8f603a + +0155-AArch64-Use-scvtf-fbits-option-where-appropriate.patch +188d00796f5bd338b9b8ab1cc8ba4b43af8ab8fd + +0156-Add-pass_va_arg_by_reference.patch +fde65a89fad742c2dca8ad50452e482d22f3c1b2 + +0157-Add-must_pass_va_arg_in_stack.patch +4f53599cb5b822cd7f95997861c2e064977ecb6a + +0158-Use-function_arg_info-for-TARGET_ARG_PARTIAL_BYTES.patch +a7c81bc1fb43366ca1b4332d8a6042b648a84cdc + +0159-Use-function_arg_info-for-TARGET_PASS_BY_REFERENCE.patch +52090e4dbd064f486af606e3f8a283dbddc7c18a + +0160-Use-function_arg_info-for-TARGET_SETUP_INCOMING_ARGS.patch +e7056ca417326a70eca05defb6a8b20b737d3417 + +0161-Use-function_arg_info-for-TARGET_FUNCTION_-INCOMING_.patch +6783fdb7057d559aa1da8afa2c15a702c532a03e + +0162-Use-function_arg_info-for-TARGET_FUNCTION_ARG_ADVANC.patch +6930c98c69ad695469ee7daa74b3b6d578afdd0d + +0163-Use-function_arg_info-for-TARGET_CALLEE_COPIES.patch +7256c7194e186fce6ff866a124a77b08196c2a5f + +0164-Use-function_arg_info-for-TARGET_MUST_PASS_IN_STACK.patch +0ffef2005fd7536efbc9c3a572701998c8a8080c + +0165-Add-a-apply_pass_by_reference_rules-helper.patch +b12cdd6e8e8dd1f39a941b731ba1056d656a094f + +0166-re-PR-target-88839-SVE-Poor-implementation-of-blend-.patch +9556ef20164e69d094f5a3e1af262dbb45ed8e3a + +0167-aarch64-sve.md-vcond_mask-Add.patch +b1c9ec725da365165ce4c2fdf63daa33b7d86649 + +0168-aarch64-add-intrinsics-for-vld1-q-_x4-and-vst1-q-_x4.patch +391625888d4d97f9016ab9ac04acc55d81f0c26f + +0169-arm-aarch64-Add-comments-warning-that-stack-protecto.patch +a7e73b4158f528600ef97aca29201ddc92b3439f + +0170-AArch64-Add-Linux-hwcap-strings-for-some-extensions.patch +75f935365dba3eb5e9cbd11bc0d75009cad3d019 + +0171-AArch64-Add-support-for-missing-CPUs.patch +e0664b7a63ed8305e9f8539309df7fb3eb13babe + +0172-AArch64-Implement-ACLE-intrinsics-for-FRINT-32-64-Z-.patch +10bd1d964ef12daa9f92ff0b8d1e5f600aa63f7b + +0173-AArch64-Add-support-for-__jcvt-intrinsic.patch +e1d5d19ec4f84b67ac693fef5b2add7dc9cf056d + +0174-Remove-bt-load.c.patch +f78f73cbd284abe4f1718fd7803f5f98800de225 + +0175-Simplify-the-implementation-of-HARD_REG_SET.patch +504279ae0a0ce28ad37f820dcdb7f6557aabef7c + +0176-Make-note_stores-take-an-rtx_insn.patch +e8448ba5300e32917fb12f877ae40711c2b452a3 + +0177-Remove-COPY_HARD_REG_SET.patch +6576d245386e2ce52df274ef8f2ffed81cfaa1c3 + +0178-Remove-COMPL_HARD_REG_SET.patch +50b3f54d551787e0a066451ef60ef3b055a893e6 + +0179-Remove-AND_HARD_REG_SET.patch +dc333d8ff60909dbed89126443e3024f1592f8a4 + +0180-Remove-IOR_HARD_REG_SET.patch +44942965f4eae141bd1f8300e7f77d0c9a3936e4 + +0181-Remove-AND_COMPL_HARD_REG_SET.patch +d15e5131845e2a68513230a624839ef5abcda690 + +0182-Remove-IOR_COMPL_HARD_REG_SET.patch +4897c5aaa7a5db4c1ece28ef66acb3d5e41787b3 + +0183-Remove-hard_reg_set_equal_p.patch +a85796511b2b7985f79331c996761f7a87cb8116 + +0184-Tweak-interface-to-ira-build.c-ior_hard_reg_conflict.patch +75f4e3a1b322e16a1aca28bd0ced9af57cb0a683 + +0185-Add-fast-conversions-from-arrays-to-bitmaps.patch +148909bc700e4f52aa582346a29abc5bc51a9bda + +0186-Remove-global-REG_SETs.patch +0b0310e9a0e0d553bbe9f961c52e0851328aa8b0 + +0187-Remove-call_fixed_reg_set.patch +df1f0eef67939274e9ddd3df426e8dfc5184086b + +0188-Remove-no_caller_save_reg_set.patch +026116ce2a4dedad81518b0ca89dd8243b545778 + +0189-Replace-call_used_reg_set-with-call_used_or_fixed_re.patch +a5647ae846f6765f12a359acba6a71fc12254fa8 + +0190-Add-call_used_or_fixed_reg_p.patch +a365fa0636886aeda83e57b84d837cfba13597fe + +0191-Hide-call_used_regs-in-target-independent-code.patch +53bee79caba4fb88acbcd9bad7891ea45b5511e3 + +0192-Remove-call_really_used_regs.patch +d7fb4c3162307590c0babddcea4fb60c07a7c033 + +0193-Vectorise-multiply-high-with-scaling-operations-PR-8.patch +58cc98767aa1d8136d36467b892dc4adaf427acc + +0194-arm-aarch64-Make-no_insn-issue-to-nothing.patch +f62281dc1b3d751977266d8c30b4488833fcb9dd + +0195-Two-more-POLY_INT-cases-for-dwarf2out.c.patch +ef20d2215067b1bfa8b3f9549ca0baed636a94a0 + +0196-Handle-variable-length-vectors-in-compute_record_mod.patch +defc6f266c1dd625cc64ad1ecfbd1eacbcd66e4f + +0197-Don-t-treat-variable-length-vectors-as-VLAs-during-g.patch +22b6299199da4efd3944cdaabca1d095d19ff901 + +0198-Make-get_value_for_expr-check-for-INTEGER_CSTs.patch +01b57ebf58b8cc0d16db827d1d9aa5f10da23cce + +0199-aarch64-Extend-R-for-integer-registers.patch +e3f15286d1129de2cceee6acd5d5584cb5422db6 + +0200-aarch64-Implement-TImode-compare-and-swap.patch +4a2095ebace8534038ce2adf4ae94bfc854066c4 + +0201-aarch64-Tidy-aarch64_split_compare_and_swap.patch +b7e560deb37e38fb224a0cf108e15df4a717167a + +0202-aarch64-Implement-moutline-atomics.patch +3950b229a5ed6710f30241c2ddc3c74909bf4740 + +0203-Rework-constant-subreg-folds-and-handle-more-variabl.patch +f24f4c15884bf1ee65a10e2f959842eec4198876 + +0204-Extend-neg_const_int-simplifications-to-other-const-.patch +681fc0fa40cc4f018cb691d796aa819a24257774 + +0205-Avoid-adding-impossible-copies-in-ira-conflicts.c-pr.patch +9f635bd13fe9e85872e441b6f3618947f989909a + +0206-AArch64-Fix-memmodel-index-in-aarch64_store_exclusiv.patch +3a30d2558b3a199fe346479e6140cddae7fba5ed + +0207-AArch64-Use-implementation-namespace-consistently-in.patch +9a3afc3564b36fb34826899a345a9c35b1c53e39 + +0208-C-C-Allow-targets-to-check-calls-to-BUILT_IN_MD-func.patch +c6447c2014b76b5c077a07712a7f0b0aaa2e14d4 + +0209-AArch64-Split-built-in-function-codes-into-major-and.patch +6d4d616a782d5be693ea9575f69d5ebf450be090 + +0210-AArch64-Strengthen-aarch64_hard_regno_call_part_clob.patch +51051f474a768d285714d713f1b7535d6a139350 + +0211-Add-function_abi.-h-cc.patch +bd785b44932274f7067105de417938597289962c + +0212-Add-a-target-hook-for-getting-an-ABI-from-a-function.patch +002ffd3caa684c3eb30f8f53206439b7aa34b370 + +0213-Add-a-function-for-getting-the-ABI-of-a-call-insn-ta.patch +5a5a3bc5fa14664be26748c11325021b6b6f8e74 + +0214-Pass-an-ABI-identifier-to-hard_regno_call_part_clobb.patch +6ee2cc70024253d2670a4a317158b2a65251a1d1 + +0215-Remove-global-call-sets-DF-entry-exit-defs.patch +559c1ae100489da76a0283750361ace146fdeb77 + +0216-Remove-global-call-sets-IRA.patch +6c47622219d6386807b26890dcdc84f192499d33 + +0217-Remove-global-call-sets-LRA.patch +a1e6ee38e708ef2bdef4dfbb99473344bd56fa2f + +0218-Remove-global-call-sets-regrename.c.patch +0ce77f463d1d150e70a91807502d628492ca7ae5 + +0219-Make-ira-call-df_set_regs_ever_live-for-extra-call-c.patch +6d1e98dfd2bfce30640d71df355bedf114229744 + +0220-AArch64-Allow-shrink-wrapping-of-non-leaf-vector-PCS.patch +ce9d2a37f2db20328286f5d3d5a13a4e765c59f7 + +0221-AArch64-Make-more-use-of-function_abi.patch +dcdd0f055731a8c960a15e5de8715d041d9a7876 + +0222-AArch64-SVE-Utilize-ASRD-instruction-for-division-an.patch +c0c2f013906a695b8a02226f119649a370d9e083 + +0223-AArch64-Make-call-insns-record-the-callee-s-arm_pcs.patch +08cc4d925f640c3cd0336bae4dc6004244a5c80a + +0224-AArch64-Use-calls-for-SVE-TLSDESC.patch +bb6ce448fc194cca8e51aea274a1b2408c7746c3 + +0225-Remove-clobber_high.patch +17d184e5c4896264c27c27d125a6c1f8462d9d37 + +0226-C-Improve-diagnostics-for-vector-types.patch +8209db250f305cc79fd751c3ed056fb9ff551a83 + +0227-invoke.texi-early-inlining-insns-O2-Document.patch +0b92cf305dcf34387a8e2564e55ca8948df3b47a + +0228-cif-code.def-MAX_INLINE_INSNS_SINGLE_O2_LIMIT-.-New.patch +562d1e9556777988ae46c5d1357af2636bc272ea + +0229-Fix-EXECUTE_IF_SET_IN_HARD_REG_SET-use.patch +1c8264003ab1d6932d874bd1a9af4ac498d4b4a4 + +0230-Use-CONSTEXPR-in-machmode.h.patch +ad00d6c1746fdcbfd86b2d50f2500d7ccb0d1691 + +0231-pretty-print-support-URL-escape-sequences-PR-87488.patch +d26082357676a3c3843595dfe88a6c682b56e334 + +0232-Relax-store_bit_field-call-in-store_expr.patch +8b27c9052b8d191c98686e77d2fa610390c78f32 + +0233-Darwin-machopic-8-n-Back-out-part-of-PR71767-fix.patch +f922d945244558904be6868dc036c31fd05750dd + +0234-Add-expr_callee_abi.patch +63d25773e166e2e3babe626a5800e70939844754 + +0235-AArch64-Use-frame-reference-in-aarch64_layout_frame.patch +ab43763e519ed8efbbfdac801d008c338fbcb187 + +0236-AArch64-Add-an-assert-to-aarch64_layout_frame.patch +8e66b377a93e3fc371d0836768740d68ef8fffc5 + +0237-AArch64-Improve-poly_int-handling-in-aarch64_layout_.patch +9b17a646d90ad0cc30daf8432aa60ad0d751d914 + +0238-AArch64-Add-partial-SVE-vector-modes.patch +550a338052c374cb1f6c07ffd883c4046565fdd4 + +0239-AArch64-Fix-symbol-offset-limit.patch +7d3b27ff12610fde9d6c4b56abc70c6ee9b6b3db + +0240-AArch64-SVE2-Support-for-EOR3-and-variants-of-BSL.patch +2d57b12e2acd52b843adbcd6d5909cb0b9f7196b + +0241-re-PR-target-86753-gcc.target-aarch64-sve-vcond_-45-.patch +cc1facefe3b4e3b067d95291a7dba834b830ff18 + +0242-Pass-a-vec_info-to-get_vectype_for_scalar_type.patch +7ed54790da87bbb4a134020a9fb8bd1b72fd0acb + +0243-AArch64-Implement-__rndr-__rndrrs-intrinsics.patch +c5dc215df17071281c21450fa2d584e1161e4bc2 + +0244-re-PR-debug-90231-ivopts-causes-optimized-away-itera.patch +d9eabacb0483ac1f730112d551551c258365f02e + +0245-Add-a-simulate_builin_function_decl-langhook.patch +740785381ec9944c861dcc29b420c96aa933f040 + +0246-Add-a-simulate_enum_decl-langhook.patch +ac2cfa6cc35175311f92c25acbdd244f0f3bbb87 + +0247-AArch64-Handle-scalars-in-cmp-and-shift-immediate-qu.patch +6bc67182b6500b942674d6031c1bf0f02c779cbd + +0248-AArch64-Add-FFR-and-FFRT-registers.patch +183bfdafc6f1f98711c5400498a7268cc1441096 + +0249-AArch64-Extend-SVE-reverse-permutes-to-predicates.patch +28350fd1bee1e238e9c57b04c0796e1e17b659e4 + +0250-AArch64-Add-support-for-arm_sve.h.patch +624d0f07d51b7fa8bc99142bd0e8380fb9e7badc + +0251-AArch64-Add-support-for-the-SVE-PCS.patch +c600df9a4060da3c6121ff4d0b93f179eafd69d1 + +0252-AArch64-Add-main-SVE-ACLE-tests.patch +bc73c4c24daec96ad3e7ff904645c3095a4febe9 + +0253-Remove-cgraph_global_info.patch +a62bfab5d2a332925fcf10c45b4c5d8ca499439d + +0254-AArch64-Remove-unused-mode-iterators.patch +ffc111637291037e5546428275e39d8ca16d1fac + +0255-AArch64-Use-aarch64_sve_int_mode-in-SVE-ACLE-code.patch +86194087ce338c8d0073d905eb60dca654d6bba3 + +0256-Add-build_truth_vector_type_for_mode.patch +0a0ef2387cc1561d537d8d949aef9479ef17ba35 + +0257-AArch64-Add-FULL-to-SVE-mode-iterator-names.patch +f75cdd2c4e5282985a6fbdb2e72e17cb77782044 + +0258-LRA-handle-memory-constraints-that-accept-more-than-.patch +1aeffdce2dfe718e1337d75eb4f22c3c300df9bb + +0259-Handle-VIEW_CONVERT_EXPR-for-variable-length-vectors.patch +13c247d6f2a75b7e7a11546e897489716bc31506 + +0260-re-PR-target-90867-Multiplication-or-typecast-of-int.patch +94cdd3b7ceff688d039a9f134013ac9069df2e8c + +0261-re-PR-inline-asm-92615-ICE-in-extract_insn.patch +8d0d7a63019a7d67943d1867348673e3ca3dc824 + +0262-re-PR-tree-optimization-92645-Hand-written-vector-co.patch +1fa715db5490fb44668e0a37f9a5927d9030a50e + +0263-re-PR-tree-optimization-92690-vector-CTOR-optimizati.patch +88feafba3cb5b186d53080c4958474065c4bd5d2 + +0264-target.def-TARGET_VECTORIZE_BUILTIN_CONVERSION-Remov.patch +477daf831aea18923733772d686eb1ed448d96e7 + +0265-re-PR-tree-optimization-92645-Hand-written-vector-co.patch +78307657cf9675bc4aa2e77561c823834714b4c8 + +0266-re-PR-tree-optimization-92715-error-position-plus-si.patch +438d9c4afa635c7a1475feebbc220fe8d335c664 + +0267-re-PR-target-92758-r278833-breaks-gcc.target-powerpc.patch +577f4a0e5e7f7ef9b5729a3eed79e523cba9dfa9 + +0268-re-PR-tree-optimization-92803-error-type-mismatch-in.patch +a3408fa3fbf20455eb3b17b5c78397f9d66065c7 + +0269-Add-ARM-specific-Bfloat-format-support-to-middle-end.patch +d5ffd47e9a739770aa7ef5ad06c07fe9f16a3260 + +0270-re-PR-target-92904-varargs-for-__int128-is-placed-at.patch +46f3e52e834ab0c06902e7424e57513ee6a8aacd + +0271-AArch64-Enable-CLI-for-Armv8.6-a-armv8.6-a-i8mm-and-.patch +a93e1d5c70abe9fba3522318131a352fad0a4f48 + +0272-gcc-testsuite-ChangeLog.patch +9260fb066b7ed0b237a3300e05fca9bffe018c6b + +0273-Add-a-compatible_vector_types_p-target-hook.patch +482b2b43e5101921ad94e51e052a18b353f8a3f5 + +0274-AArch64-Specify-some-SVE-ACLE-functions-in-a-more-ge.patch +99a3b91535cb41807d62478cd769bc1bed0db5df + +0275-AArch64-Rename-SVE-shape-unary_count-to-unary_to_uin.patch +5b052959dcd2e9c390c7de34f806c4b22a66d8f7 + +0276-AArch64-Rename-UNSPEC_WHILE-to-match-instruction-mne.patch +6ad9571b172cd98099b477cba4efdd92c85bd222 + +0277-AArch64-Add-support-for-the-SVE2-ACLE.patch +0a09a9483825233f16e5b26bb0ffee76752339fc + +0278-config.gcc-Add-arm_bf16.h.patch +abbe1ed27355178223cd099fb73227f392416ea6 + +0279-aarch64.c-aarch64_invalid_conversion-New-function-fo.patch +9869896730f3055850034c05c596828d517fa9a2 + +0280-GCC-PATCH-AArch64-Add-ACLE-intrinsics-for-dot-produc.patch +8c197c851e7528baba7cb837f34c05ba2242f705 + +0281-GCC-PATCH-AArch64-Add-ACLE-intrinsics-for-bfdot-for-.patch +f275d73a57f1e5a07fbd4978f4b4457a5eaa1e39 + +0282-AArch64-Fix-shrinkwrapping-interactions-with-atomics.patch +e5e07b68187b9aa334519746c45b8cffc5eb7e5c + +0283-AArch64-Enable-CLI-for-Armv8.6-A-f64mm.patch +336e1b950db8b91027cdf0ab33bd905930d7f363 + +0284-AArch64-SVE-Implement-svld1ro-intrinsic.patch +9ceec73fc0e5033049704becef5d79001e31a245 + +0285-AArch64-Obvious-Correct-pattern-target-requirement.patch +568f0f355f259f58688dd73f749f4d80adc10e40 + +0286-AArch64-effective_target-for-aarch64-f64mm-asm.patch +3c9e580511e713068c0ea0d7b34f6e50ebf85447 + +0287-testsuite-Add-target-xfail-argument-to-check-functio.patch +4c33b2daeb5a87aedef77993971db1a1a1c291e6 + +0288-aarch64-Skip-some-SVE-ACLE-function-body-tests-for-I.patch +b02fbed15a36a86dda6a09a8dc237a8d288f6c09 + +0289-i386-Fix-ix86_fold_builtin-shift-folding-PR93418.patch +bff948aa337807260344c83ac9079d6386410094 + +0290-forwprop-Tweak-choice-of-VEC_PERM_EXPR-filler-PR9282.patch +1ee3b380dfb479b335f3b50039ce26abcbffe59a + +0291-SRA-Add-verification-of-accesses.patch +5b9e89c922dc2e7e8b8da644bd3a8917c16b22ac + +0292-SRA-Total-scalarization-after-access-propagation-PR9.patch +636e80eea24b780f1d5f4c14c58fc00001df8508 + +0293-aarch64-Fix-SVE-PCS-failures-for-BE-ILP32.patch +2171a9207f51bc486ed9c502cb4da706f594615e + +0294-aarch64-Add-Armv8.6-SVE-matrix-multiply-support.patch +3669677425f249c163201c4760d05abb3cf4e6bc + +0295-aarch64-Add-svbfloat16_t-support-to-arm_sve.h.patch +02fcd8ac408be56d2a6e67e2e09b26532862f233 + +0296-aarch64-Add-Armv8.6-SVE-bfloat16-support.patch +896dff99e18d67afdbe4d1effec20a3da474b22b + +0297-aarch64-ACLE-intrinsics-bfmmla-and-bfmlal-b-t.patch +f78335df69993a900512f92324cab6a20b1bde0c + +0298-aarch64-Add-an-extra-sbfiz-pattern-PR87763.patch +b65a1eb3fae53f2e1ea1ef8c1164f490d55855a1 + +0299-x86-64-Pass-aggregates-with-only-float-double-in-GPR.patch +ea5ca698dca15dc86b823661ac357a30b49dd0f6 + +0300-aarch64-ACLE-I8MM-multiply-accumulate-intrinsics.patch +40f648378061c170cf6a9ab680af01b3a3a83569 + +0301-i386-Skip-ENDBR32-at-the-target-function-entry.patch +1d69147af203d4dcd2270429f90c93f1a37ddfff + +0302-testsuite-Fix-recently-added-ipa-testcases-PR93763.patch +103bc4db7665a03bf2390ccc8ceca0dc5a7a81b7 + +0303-aarch64-Add-bfloat16-vdup-and-vreinterpret-ACLE-intr.patch +8ea6c1b89a20ef7c675535ba1994355361dac977 + +0304-aarch64-Add-bfloat16-vldn-vstn-intrinsics.patch +e603cd43b145c426468c95cf85b3c12c94daedaa + +0305-aarch64-ACLE-intrinsics-for-BFCVTN-BFCVTN2-and-BFCVT.patch +1f520d3412962e22b0338461d82f41abba8a4f12 + +0306-testsuite-Fix-misquoted-string-in-bfcvt-nosimd.c.patch +db3fa3476e9e922ca3e283df03ebd14be7220b6e + +0307-aarch64-Fix-bf16_v-ld-st-n.c-failures-for-big-endian.patch +cf9c3bff39cf973c5c8621ff44199dcb831193a7 + +0308-testsuite-Fix-gcc.target-aarch64-advsimd-intrinsics-.patch +58a703f0726b3bb6c5ac8b600369106985906590 + +0309-cleanup-graphite-results.patch +1acde74cf611f560172c74324610c29ca81edf94 + +diff --git a/gcc/Makefile.in b/gcc/Makefile.in +index bc188bbed..46ba89598 100644 +--- a/gcc/Makefile.in ++++ b/gcc/Makefile.in +@@ -1239,7 +1239,6 @@ OBJS = \ + auto-profile.o \ + bb-reorder.o \ + bitmap.o \ +- bt-load.o \ + builtins.o \ + caller-save.o \ + calls.o \ +@@ -1305,6 +1304,7 @@ OBJS = \ + fold-const.o \ + fold-const-call.o \ + function.o \ ++ function-abi.o \ + function-tests.o \ + fwprop.o \ + gcc-rich-location.o \ +@@ -2522,6 +2522,7 @@ GTFILES = $(CPPLIB_H) $(srcdir)/input.h $(srcdir)/coretypes.h \ + $(srcdir)/libfuncs.h $(SYMTAB_H) \ + $(srcdir)/real.h $(srcdir)/function.h $(srcdir)/insn-addr.h $(srcdir)/hwint.h \ + $(srcdir)/fixed-value.h \ ++ $(srcdir)/function-abi.h \ + $(srcdir)/output.h $(srcdir)/cfgloop.h $(srcdir)/cfg.h $(srcdir)/profile-count.h \ + $(srcdir)/cselib.h $(srcdir)/basic-block.h $(srcdir)/ipa-ref.h $(srcdir)/cgraph.h \ + $(srcdir)/reload.h $(srcdir)/caller-save.c $(srcdir)/symtab.c \ +diff --git a/gcc/alias.c b/gcc/alias.c +index 053c3494e..1a60f905a 100644 +--- a/gcc/alias.c ++++ b/gcc/alias.c +@@ -1572,16 +1572,6 @@ record_set (rtx dest, const_rtx set, void *data ATTRIBUTE_UNUSED) + new_reg_base_value[regno] = 0; + return; + } +- /* A CLOBBER_HIGH only wipes out the old value if the mode of the old +- value is greater than that of the clobber. */ +- else if (GET_CODE (set) == CLOBBER_HIGH) +- { +- if (new_reg_base_value[regno] != 0 +- && reg_is_clobbered_by_clobber_high ( +- regno, GET_MODE (new_reg_base_value[regno]), XEXP (set, 0))) +- new_reg_base_value[regno] = 0; +- return; +- } + + src = SET_SRC (set); + } +@@ -3284,7 +3274,8 @@ memory_modified_in_insn_p (const_rtx mem, const_rtx insn) + if (CALL_P (insn)) + return true; + memory_modified = false; +- note_stores (PATTERN (insn), memory_modified_1, CONST_CAST_RTX(mem)); ++ note_stores (as_a (insn), memory_modified_1, ++ CONST_CAST_RTX(mem)); + return memory_modified; + } + +@@ -3412,7 +3403,7 @@ init_alias_analysis (void) + && find_reg_note (insn, REG_NOALIAS, NULL_RTX)) + record_set (SET_DEST (PATTERN (insn)), NULL_RTX, NULL); + else +- note_stores (PATTERN (insn), record_set, NULL); ++ note_stores (insn, record_set, NULL); + + set = single_set (insn); + +diff --git a/gcc/array-traits.h b/gcc/array-traits.h +new file mode 100644 +index 000000000..eb65ede94 +--- /dev/null ++++ b/gcc/array-traits.h +@@ -0,0 +1,48 @@ ++/* Descriptions of array-like objects. ++ Copyright (C) 2019 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify it under ++the terms of the GNU General Public License as published by the Free ++Software Foundation; either version 3, or (at your option) any later ++version. ++ ++GCC is distributed in the hope that it will be useful, but WITHOUT ANY ++WARRANTY; without even the implied warranty of MERCHANTABILITY or ++FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++#ifndef GCC_ARRAY_TRAITS_H ++#define GCC_ARRAY_TRAITS_H ++ ++/* Implementation for single integers (and similar types). */ ++template ++struct scalar_array_traits ++{ ++ typedef T element_type; ++ static const bool has_constant_size = true; ++ static const size_t constant_size = 1; ++ static const T *base (const T &x) { return &x; } ++ static size_t size (const T &) { return 1; } ++}; ++ ++template ++struct array_traits : scalar_array_traits {}; ++ ++/* Implementation for arrays with a static size. */ ++template ++struct array_traits ++{ ++ typedef T element_type; ++ static const bool has_constant_size = true; ++ static const size_t constant_size = N; ++ static const T *base (const T (&x)[N]) { return x; } ++ static size_t size (const T (&x)[N]) { return N; } ++}; ++ ++#endif +diff --git a/gcc/attribs.c b/gcc/attribs.c +index 444192254..d447ea0e4 100644 +--- a/gcc/attribs.c ++++ b/gcc/attribs.c +@@ -691,6 +691,7 @@ decl_attributes (tree *node, tree attributes, int flags, + + if (!built_in + || !DECL_P (*anode) ++ || DECL_BUILT_IN_CLASS (*anode) != BUILT_IN_NORMAL + || (DECL_FUNCTION_CODE (*anode) != BUILT_IN_UNREACHABLE + && (DECL_FUNCTION_CODE (*anode) + != BUILT_IN_UBSAN_HANDLE_BUILTIN_UNREACHABLE))) +diff --git a/gcc/bitmap.c b/gcc/bitmap.c +index 5a8236de7..911d506f3 100644 +--- a/gcc/bitmap.c ++++ b/gcc/bitmap.c +@@ -958,17 +958,17 @@ bitmap_set_bit (bitmap head, int bit) + /* Return whether a bit is set within a bitmap. */ + + int +-bitmap_bit_p (bitmap head, int bit) ++bitmap_bit_p (const_bitmap head, int bit) + { + unsigned int indx = bit / BITMAP_ELEMENT_ALL_BITS; +- bitmap_element *ptr; ++ const bitmap_element *ptr; + unsigned bit_num; + unsigned word_num; + + if (!head->tree_form) +- ptr = bitmap_list_find_element (head, indx); ++ ptr = bitmap_list_find_element (const_cast (head), indx); + else +- ptr = bitmap_tree_find_element (head, indx); ++ ptr = bitmap_tree_find_element (const_cast (head), indx); + if (ptr == 0) + return 0; + +diff --git a/gcc/bitmap.h b/gcc/bitmap.h +index ed25c1ee5..7217f9e0a 100644 +--- a/gcc/bitmap.h ++++ b/gcc/bitmap.h +@@ -210,6 +210,7 @@ along with GCC; see the file COPYING3. If not see + on which many random-access membership tests will happen. */ + + #include "obstack.h" ++#include "array-traits.h" + + /* Bitmap memory usage. */ + struct bitmap_usage: public mem_usage +@@ -418,7 +419,7 @@ extern bool bitmap_clear_bit (bitmap, int); + extern bool bitmap_set_bit (bitmap, int); + + /* Return true if a bit is set in a bitmap. */ +-extern int bitmap_bit_p (bitmap, int); ++extern int bitmap_bit_p (const_bitmap, int); + + /* Debug functions to print a bitmap. */ + extern void debug_bitmap (const_bitmap); +@@ -937,4 +938,123 @@ class auto_bitmap + bitmap_head m_bits; + }; + ++/* Base class for bitmap_view; see there for details. */ ++template > ++class base_bitmap_view ++{ ++public: ++ typedef typename Traits::element_type array_element_type; ++ ++ base_bitmap_view (const T &, bitmap_element *); ++ operator const_bitmap () const { return &m_head; } ++ ++private: ++ base_bitmap_view (const base_bitmap_view &); ++ ++ bitmap_head m_head; ++}; ++ ++/* Provides a read-only bitmap view of a single integer bitmask or a ++ constant-sized array of integer bitmasks, or of a wrapper around such ++ bitmasks. */ ++template ++class bitmap_view : public base_bitmap_view ++{ ++public: ++ bitmap_view (const T &array) ++ : base_bitmap_view (array, m_bitmap_elements) {} ++ ++private: ++ /* How many bitmap_elements we need to hold a full T. */ ++ static const size_t num_bitmap_elements ++ = CEIL (CHAR_BIT ++ * sizeof (typename Traits::element_type) ++ * Traits::constant_size, ++ BITMAP_ELEMENT_ALL_BITS); ++ bitmap_element m_bitmap_elements[num_bitmap_elements]; ++}; ++ ++/* Initialize the view for array ARRAY, using the array of bitmap ++ elements in BITMAP_ELEMENTS (which is known to contain enough ++ entries). */ ++template ++base_bitmap_view::base_bitmap_view (const T &array, ++ bitmap_element *bitmap_elements) ++{ ++ m_head.obstack = NULL; ++ ++ /* The code currently assumes that each element of ARRAY corresponds ++ to exactly one bitmap_element. */ ++ const size_t array_element_bits = CHAR_BIT * sizeof (array_element_type); ++ STATIC_ASSERT (BITMAP_ELEMENT_ALL_BITS % array_element_bits == 0); ++ size_t array_step = BITMAP_ELEMENT_ALL_BITS / array_element_bits; ++ size_t array_size = Traits::size (array); ++ ++ /* Process each potential bitmap_element in turn. The loop is written ++ this way rather than per array element because usually there are ++ only a small number of array elements per bitmap element (typically ++ two or four). The inner loops should therefore unroll completely. */ ++ const array_element_type *array_elements = Traits::base (array); ++ unsigned int indx = 0; ++ for (size_t array_base = 0; ++ array_base < array_size; ++ array_base += array_step, indx += 1) ++ { ++ /* How many array elements are in this particular bitmap_element. */ ++ unsigned int array_count ++ = (STATIC_CONSTANT_P (array_size % array_step == 0) ++ ? array_step : MIN (array_step, array_size - array_base)); ++ ++ /* See whether we need this bitmap element. */ ++ array_element_type ior = array_elements[array_base]; ++ for (size_t i = 1; i < array_count; ++i) ++ ior |= array_elements[array_base + i]; ++ if (ior == 0) ++ continue; ++ ++ /* Grab the next bitmap element and chain it. */ ++ bitmap_element *bitmap_element = bitmap_elements++; ++ if (m_head.current) ++ m_head.current->next = bitmap_element; ++ else ++ m_head.first = bitmap_element; ++ bitmap_element->prev = m_head.current; ++ bitmap_element->next = NULL; ++ bitmap_element->indx = indx; ++ m_head.current = bitmap_element; ++ m_head.indx = indx; ++ ++ /* Fill in the bits of the bitmap element. */ ++ if (array_element_bits < BITMAP_WORD_BITS) ++ { ++ /* Multiple array elements fit in one element of ++ bitmap_element->bits. */ ++ size_t array_i = array_base; ++ for (unsigned int word_i = 0; word_i < BITMAP_ELEMENT_WORDS; ++ ++word_i) ++ { ++ BITMAP_WORD word = 0; ++ for (unsigned int shift = 0; ++ shift < BITMAP_WORD_BITS && array_i < array_size; ++ shift += array_element_bits) ++ word |= array_elements[array_i++] << shift; ++ bitmap_element->bits[word_i] = word; ++ } ++ } ++ else ++ { ++ /* Array elements are the same size as elements of ++ bitmap_element->bits, or are an exact multiple of that size. */ ++ unsigned int word_i = 0; ++ for (unsigned int i = 0; i < array_count; ++i) ++ for (unsigned int shift = 0; shift < array_element_bits; ++ shift += BITMAP_WORD_BITS) ++ bitmap_element->bits[word_i++] ++ = array_elements[array_base + i] >> shift; ++ while (word_i < BITMAP_ELEMENT_WORDS) ++ bitmap_element->bits[word_i++] = 0; ++ } ++ } ++} ++ + #endif /* GCC_BITMAP_H */ +diff --git a/gcc/bt-load.c b/gcc/bt-load.c +deleted file mode 100644 +index f68879ca4..000000000 +--- a/gcc/bt-load.c ++++ /dev/null +@@ -1,1577 +0,0 @@ +-/* Perform branch target register load optimizations. +- Copyright (C) 2001-2019 Free Software Foundation, Inc. +- +-This file is part of GCC. +- +-GCC is free software; you can redistribute it and/or modify it under +-the terms of the GNU General Public License as published by the Free +-Software Foundation; either version 3, or (at your option) any later +-version. +- +-GCC is distributed in the hope that it will be useful, but WITHOUT ANY +-WARRANTY; without even the implied warranty of MERCHANTABILITY or +-FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +-for more details. +- +-You should have received a copy of the GNU General Public License +-along with GCC; see the file COPYING3. If not see +-. */ +- +-#include "config.h" +-#include "system.h" +-#include "coretypes.h" +-#include "backend.h" +-#include "target.h" +-#include "rtl.h" +-#include "tree.h" +-#include "df.h" +-#include "insn-config.h" +-#include "regs.h" +-#include "memmodel.h" +-#include "emit-rtl.h" +-#include "recog.h" +-#include "diagnostic-core.h" +-#include "expr.h" +-#include "insn-attr.h" +-#include "tree-pass.h" +-#include "cfgrtl.h" +-#include "cfganal.h" +-#include "cfgcleanup.h" +-#include "cfgloop.h" +-#include "rtl-iter.h" +-#include "fibonacci_heap.h" +- +-struct btr_def; +- +-/* Target register optimizations - these are performed after reload. */ +- +-struct btr_def_group +-{ +- btr_def_group *next; +- rtx src; +- btr_def *members; +-}; +- +-struct btr_user +-{ +- btr_user *next; +- basic_block bb; +- int luid; +- rtx_insn *insn; +- /* If INSN has a single use of a single branch register, then +- USE points to it within INSN. If there is more than +- one branch register use, or the use is in some way ambiguous, +- then USE is NULL. */ +- rtx use; +- int n_reaching_defs; +- int first_reaching_def; +- char other_use_this_block; +-}; +- +-/* btr_def structs appear on three lists: +- 1. A list of all btr_def structures (head is +- ALL_BTR_DEFS, linked by the NEXT field). +- 2. A list of branch reg definitions per basic block (head is +- BB_BTR_DEFS[i], linked by the NEXT_THIS_BB field). +- 3. A list of all branch reg definitions belonging to the same +- group (head is in a BTR_DEF_GROUP struct, linked by +- NEXT_THIS_GROUP field). */ +- +-struct btr_def +-{ +- btr_def *next_this_bb; +- btr_def *next_this_group; +- basic_block bb; +- int luid; +- rtx_insn *insn; +- int btr; +- int cost; +- /* For a branch register setting insn that has a constant +- source (i.e. a label), group links together all the +- insns with the same source. For other branch register +- setting insns, group is NULL. */ +- btr_def_group *group; +- btr_user *uses; +- /* If this def has a reaching use which is not a simple use +- in a branch instruction, then has_ambiguous_use will be true, +- and we will not attempt to migrate this definition. */ +- char has_ambiguous_use; +- /* live_range is an approximation to the true live range for this +- def/use web, because it records the set of blocks that contain +- the live range. There could be other live ranges for the same +- branch register in that set of blocks, either in the block +- containing the def (before the def), or in a block containing +- a use (after the use). If there are such other live ranges, then +- other_btr_uses_before_def or other_btr_uses_after_use must be set true +- as appropriate. */ +- char other_btr_uses_before_def; +- char other_btr_uses_after_use; +- /* We set own_end when we have moved a definition into a dominator. +- Thus, when a later combination removes this definition again, we know +- to clear out trs_live_at_end again. */ +- char own_end; +- bitmap live_range; +-}; +- +-typedef fibonacci_heap btr_heap_t; +-typedef fibonacci_node btr_heap_node_t; +- +-static int issue_rate; +- +-static int basic_block_freq (const_basic_block); +-static int insn_sets_btr_p (const rtx_insn *, int, int *); +-static void find_btr_def_group (btr_def_group **, btr_def *); +-static btr_def *add_btr_def (btr_heap_t *, basic_block, int, rtx_insn *, +- unsigned int, int, btr_def_group **); +-static btr_user *new_btr_user (basic_block, int, rtx_insn *); +-static void dump_hard_reg_set (HARD_REG_SET); +-static void dump_btrs_live (int); +-static void note_other_use_this_block (unsigned int, btr_user *); +-static void compute_defs_uses_and_gen (btr_heap_t *, btr_def **, btr_user **, +- sbitmap *, sbitmap *, HARD_REG_SET *); +-static void compute_kill (sbitmap *, sbitmap *, HARD_REG_SET *); +-static void compute_out (sbitmap *bb_out, sbitmap *, sbitmap *, int); +-static void link_btr_uses (btr_def **, btr_user **, sbitmap *, sbitmap *, int); +-static void build_btr_def_use_webs (btr_heap_t *); +-static int block_at_edge_of_live_range_p (int, btr_def *); +-static void clear_btr_from_live_range (btr_def *def); +-static void add_btr_to_live_range (btr_def *, int); +-static void augment_live_range (bitmap, HARD_REG_SET *, basic_block, +- basic_block, int); +-static int choose_btr (HARD_REG_SET); +-static void combine_btr_defs (btr_def *, HARD_REG_SET *); +-static void btr_def_live_range (btr_def *, HARD_REG_SET *); +-static void move_btr_def (basic_block, int, btr_def *, bitmap, HARD_REG_SET *); +-static int migrate_btr_def (btr_def *, int); +-static void migrate_btr_defs (enum reg_class, int); +-static int can_move_up (const_basic_block, const rtx_insn *, int); +-static void note_btr_set (rtx, const_rtx, void *); +- +-/* The following code performs code motion of target load instructions +- (instructions that set branch target registers), to move them +- forward away from the branch instructions and out of loops (or, +- more generally, from a more frequently executed place to a less +- frequently executed place). +- Moving target load instructions further in front of the branch +- instruction that uses the target register value means that the hardware +- has a better chance of preloading the instructions at the branch +- target by the time the branch is reached. This avoids bubbles +- when a taken branch needs to flush out the pipeline. +- Moving target load instructions out of loops means they are executed +- less frequently. */ +- +-/* An obstack to hold the def-use web data structures built up for +- migrating branch target load instructions. */ +-static struct obstack migrate_btrl_obstack; +- +-/* Array indexed by basic block number, giving the set of registers +- live in that block. */ +-static HARD_REG_SET *btrs_live; +- +-/* Array indexed by basic block number, giving the set of registers live at +- the end of that block, including any uses by a final jump insn, if any. */ +-static HARD_REG_SET *btrs_live_at_end; +- +-/* Set of all target registers that we are willing to allocate. */ +-static HARD_REG_SET all_btrs; +- +-/* Provide lower and upper bounds for target register numbers, so that +- we don't need to search through all the hard registers all the time. */ +-static int first_btr, last_btr; +- +- +- +-/* Return an estimate of the frequency of execution of block bb. */ +-static int +-basic_block_freq (const_basic_block bb) +-{ +- return bb->count.to_frequency (cfun); +-} +- +-/* If the rtx at *XP references (sets or reads) any branch target +- register, return one such register. If EXCLUDEP is set, disregard +- any references within that location. */ +-static rtx * +-find_btr_use (rtx *xp, rtx *excludep = 0) +-{ +- subrtx_ptr_iterator::array_type array; +- FOR_EACH_SUBRTX_PTR (iter, array, xp, NONCONST) +- { +- rtx *loc = *iter; +- if (loc == excludep) +- iter.skip_subrtxes (); +- else +- { +- const_rtx x = *loc; +- if (REG_P (x) +- && overlaps_hard_reg_set_p (all_btrs, GET_MODE (x), REGNO (x))) +- return loc; +- } +- } +- return 0; +-} +- +-/* Return true if insn is an instruction that sets a target register. +- if CHECK_CONST is true, only return true if the source is constant. +- If such a set is found and REGNO is nonzero, assign the register number +- of the destination register to *REGNO. */ +-static int +-insn_sets_btr_p (const rtx_insn *insn, int check_const, int *regno) +-{ +- rtx set; +- +- if (NONJUMP_INSN_P (insn) +- && (set = single_set (insn))) +- { +- rtx dest = SET_DEST (set); +- rtx src = SET_SRC (set); +- +- if (GET_CODE (dest) == SUBREG) +- dest = XEXP (dest, 0); +- +- if (REG_P (dest) +- && TEST_HARD_REG_BIT (all_btrs, REGNO (dest))) +- { +- gcc_assert (!find_btr_use (&src)); +- +- if (!check_const || CONSTANT_P (src)) +- { +- if (regno) +- *regno = REGNO (dest); +- return 1; +- } +- } +- } +- return 0; +-} +- +-/* Find the group that the target register definition DEF belongs +- to in the list starting with *ALL_BTR_DEF_GROUPS. If no such +- group exists, create one. Add def to the group. */ +-static void +-find_btr_def_group (btr_def_group **all_btr_def_groups, btr_def *def) +-{ +- if (insn_sets_btr_p (def->insn, 1, NULL)) +- { +- btr_def_group *this_group; +- rtx def_src = SET_SRC (single_set (def->insn)); +- +- /* ?? This linear search is an efficiency concern, particularly +- as the search will almost always fail to find a match. */ +- for (this_group = *all_btr_def_groups; +- this_group != NULL; +- this_group = this_group->next) +- if (rtx_equal_p (def_src, this_group->src)) +- break; +- +- if (!this_group) +- { +- this_group = XOBNEW (&migrate_btrl_obstack, btr_def_group); +- this_group->src = def_src; +- this_group->members = NULL; +- this_group->next = *all_btr_def_groups; +- *all_btr_def_groups = this_group; +- } +- def->group = this_group; +- def->next_this_group = this_group->members; +- this_group->members = def; +- } +- else +- def->group = NULL; +-} +- +-/* Create a new target register definition structure, for a definition in +- block BB, instruction INSN, and insert it into ALL_BTR_DEFS. Return +- the new definition. */ +-static btr_def * +-add_btr_def (btr_heap_t *all_btr_defs, basic_block bb, int insn_luid, +- rtx_insn *insn, +- unsigned int dest_reg, int other_btr_uses_before_def, +- btr_def_group **all_btr_def_groups) +-{ +- btr_def *this_def = XOBNEW (&migrate_btrl_obstack, btr_def); +- this_def->bb = bb; +- this_def->luid = insn_luid; +- this_def->insn = insn; +- this_def->btr = dest_reg; +- this_def->cost = basic_block_freq (bb); +- this_def->has_ambiguous_use = 0; +- this_def->other_btr_uses_before_def = other_btr_uses_before_def; +- this_def->other_btr_uses_after_use = 0; +- this_def->next_this_bb = NULL; +- this_def->next_this_group = NULL; +- this_def->uses = NULL; +- this_def->live_range = NULL; +- find_btr_def_group (all_btr_def_groups, this_def); +- +- all_btr_defs->insert (-this_def->cost, this_def); +- +- if (dump_file) +- fprintf (dump_file, +- "Found target reg definition: sets %u { bb %d, insn %d }%s priority %d\n", +- dest_reg, bb->index, INSN_UID (insn), +- (this_def->group ? "" : ":not const"), this_def->cost); +- +- return this_def; +-} +- +-/* Create a new target register user structure, for a use in block BB, +- instruction INSN. Return the new user. */ +-static btr_user * +-new_btr_user (basic_block bb, int insn_luid, rtx_insn *insn) +-{ +- /* This instruction reads target registers. We need +- to decide whether we can replace all target register +- uses easily. +- */ +- rtx *usep = find_btr_use (&PATTERN (insn)); +- rtx use; +- btr_user *user = NULL; +- +- if (usep) +- { +- int unambiguous_single_use; +- +- /* We want to ensure that USE is the only use of a target +- register in INSN, so that we know that to rewrite INSN to use +- a different target register, all we have to do is replace USE. */ +- unambiguous_single_use = !find_btr_use (&PATTERN (insn), usep); +- if (!unambiguous_single_use) +- usep = NULL; +- } +- use = usep ? *usep : NULL_RTX; +- user = XOBNEW (&migrate_btrl_obstack, btr_user); +- user->bb = bb; +- user->luid = insn_luid; +- user->insn = insn; +- user->use = use; +- user->other_use_this_block = 0; +- user->next = NULL; +- user->n_reaching_defs = 0; +- user->first_reaching_def = -1; +- +- if (dump_file) +- { +- fprintf (dump_file, "Uses target reg: { bb %d, insn %d }", +- bb->index, INSN_UID (insn)); +- +- if (user->use) +- fprintf (dump_file, ": unambiguous use of reg %d\n", +- REGNO (user->use)); +- } +- +- return user; +-} +- +-/* Write the contents of S to the dump file. */ +-static void +-dump_hard_reg_set (HARD_REG_SET s) +-{ +- int reg; +- for (reg = 0; reg < FIRST_PSEUDO_REGISTER; reg++) +- if (TEST_HARD_REG_BIT (s, reg)) +- fprintf (dump_file, " %d", reg); +-} +- +-/* Write the set of target regs live in block BB to the dump file. */ +-static void +-dump_btrs_live (int bb) +-{ +- fprintf (dump_file, "BB%d live:", bb); +- dump_hard_reg_set (btrs_live[bb]); +- fprintf (dump_file, "\n"); +-} +- +-/* REGNO is the number of a branch target register that is being used or +- set. USERS_THIS_BB is a list of preceding branch target register users; +- If any of them use the same register, set their other_use_this_block +- flag. */ +-static void +-note_other_use_this_block (unsigned int regno, btr_user *users_this_bb) +-{ +- btr_user *user; +- +- for (user = users_this_bb; user != NULL; user = user->next) +- if (user->use && REGNO (user->use) == regno) +- user->other_use_this_block = 1; +-} +- +-struct defs_uses_info { +- btr_user *users_this_bb; +- HARD_REG_SET btrs_written_in_block; +- HARD_REG_SET btrs_live_in_block; +- sbitmap bb_gen; +- sbitmap *btr_defset; +-}; +- +-/* Called via note_stores or directly to register stores into / +- clobbers of a branch target register DEST that are not recognized as +- straightforward definitions. DATA points to information about the +- current basic block that needs updating. */ +-static void +-note_btr_set (rtx dest, const_rtx set ATTRIBUTE_UNUSED, void *data) +-{ +- defs_uses_info *info = (defs_uses_info *) data; +- int regno, end_regno; +- +- if (!REG_P (dest)) +- return; +- regno = REGNO (dest); +- end_regno = END_REGNO (dest); +- for (; regno < end_regno; regno++) +- if (TEST_HARD_REG_BIT (all_btrs, regno)) +- { +- note_other_use_this_block (regno, info->users_this_bb); +- SET_HARD_REG_BIT (info->btrs_written_in_block, regno); +- SET_HARD_REG_BIT (info->btrs_live_in_block, regno); +- bitmap_and_compl (info->bb_gen, info->bb_gen, +- info->btr_defset[regno - first_btr]); +- } +-} +- +-static void +-compute_defs_uses_and_gen (btr_heap_t *all_btr_defs, btr_def **def_array, +- btr_user **use_array, sbitmap *btr_defset, +- sbitmap *bb_gen, HARD_REG_SET *btrs_written) +-{ +- /* Scan the code building up the set of all defs and all uses. +- For each target register, build the set of defs of that register. +- For each block, calculate the set of target registers +- written in that block. +- Also calculate the set of btrs ever live in that block. +- */ +- int i; +- int insn_luid = 0; +- btr_def_group *all_btr_def_groups = NULL; +- defs_uses_info info; +- +- bitmap_vector_clear (bb_gen, last_basic_block_for_fn (cfun)); +- for (i = NUM_FIXED_BLOCKS; i < last_basic_block_for_fn (cfun); i++) +- { +- basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i); +- int reg; +- btr_def *defs_this_bb = NULL; +- rtx_insn *insn; +- rtx_insn *last; +- int can_throw = 0; +- +- info.users_this_bb = NULL; +- info.bb_gen = bb_gen[i]; +- info.btr_defset = btr_defset; +- +- CLEAR_HARD_REG_SET (info.btrs_live_in_block); +- CLEAR_HARD_REG_SET (info.btrs_written_in_block); +- for (reg = first_btr; reg <= last_btr; reg++) +- if (TEST_HARD_REG_BIT (all_btrs, reg) +- && REGNO_REG_SET_P (df_get_live_in (bb), reg)) +- SET_HARD_REG_BIT (info.btrs_live_in_block, reg); +- +- for (insn = BB_HEAD (bb), last = NEXT_INSN (BB_END (bb)); +- insn != last; +- insn = NEXT_INSN (insn), insn_luid++) +- { +- if (INSN_P (insn)) +- { +- int regno; +- int insn_uid = INSN_UID (insn); +- +- if (insn_sets_btr_p (insn, 0, ®no)) +- { +- btr_def *def = add_btr_def ( +- all_btr_defs, bb, insn_luid, insn, regno, +- TEST_HARD_REG_BIT (info.btrs_live_in_block, regno), +- &all_btr_def_groups); +- +- def_array[insn_uid] = def; +- SET_HARD_REG_BIT (info.btrs_written_in_block, regno); +- SET_HARD_REG_BIT (info.btrs_live_in_block, regno); +- bitmap_and_compl (bb_gen[i], bb_gen[i], +- btr_defset[regno - first_btr]); +- bitmap_set_bit (bb_gen[i], insn_uid); +- def->next_this_bb = defs_this_bb; +- defs_this_bb = def; +- bitmap_set_bit (btr_defset[regno - first_btr], insn_uid); +- note_other_use_this_block (regno, info.users_this_bb); +- } +- /* Check for the blockage emitted by expand_nl_goto_receiver. */ +- else if (cfun->has_nonlocal_label +- && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE) +- { +- btr_user *user; +- +- /* Do the equivalent of calling note_other_use_this_block +- for every target register. */ +- for (user = info.users_this_bb; user != NULL; +- user = user->next) +- if (user->use) +- user->other_use_this_block = 1; +- IOR_HARD_REG_SET (info.btrs_written_in_block, all_btrs); +- IOR_HARD_REG_SET (info.btrs_live_in_block, all_btrs); +- bitmap_clear (info.bb_gen); +- } +- else +- { +- if (find_btr_use (&PATTERN (insn))) +- { +- btr_user *user = new_btr_user (bb, insn_luid, insn); +- +- use_array[insn_uid] = user; +- if (user->use) +- SET_HARD_REG_BIT (info.btrs_live_in_block, +- REGNO (user->use)); +- else +- { +- int reg; +- for (reg = first_btr; reg <= last_btr; reg++) +- if (TEST_HARD_REG_BIT (all_btrs, reg) +- && refers_to_regno_p (reg, user->insn)) +- { +- note_other_use_this_block (reg, +- info.users_this_bb); +- SET_HARD_REG_BIT (info.btrs_live_in_block, reg); +- } +- note_stores (PATTERN (insn), note_btr_set, &info); +- } +- user->next = info.users_this_bb; +- info.users_this_bb = user; +- } +- if (CALL_P (insn)) +- { +- HARD_REG_SET *clobbered = &call_used_reg_set; +- HARD_REG_SET call_saved; +- rtx pat = PATTERN (insn); +- int i; +- +- /* Check for sibcall. */ +- if (GET_CODE (pat) == PARALLEL) +- for (i = XVECLEN (pat, 0) - 1; i >= 0; i--) +- if (ANY_RETURN_P (XVECEXP (pat, 0, i))) +- { +- COMPL_HARD_REG_SET (call_saved, +- call_used_reg_set); +- clobbered = &call_saved; +- } +- +- for (regno = first_btr; regno <= last_btr; regno++) +- if (TEST_HARD_REG_BIT (*clobbered, regno)) +- note_btr_set (regno_reg_rtx[regno], NULL_RTX, &info); +- } +- } +- } +- } +- +- COPY_HARD_REG_SET (btrs_live[i], info.btrs_live_in_block); +- COPY_HARD_REG_SET (btrs_written[i], info.btrs_written_in_block); +- +- REG_SET_TO_HARD_REG_SET (btrs_live_at_end[i], df_get_live_out (bb)); +- /* If this block ends in a jump insn, add any uses or even clobbers +- of branch target registers that it might have. */ +- for (insn = BB_END (bb); insn != BB_HEAD (bb) && ! INSN_P (insn); ) +- insn = PREV_INSN (insn); +- /* ??? for the fall-through edge, it would make sense to insert the +- btr set on the edge, but that would require to split the block +- early on so that we can distinguish between dominance from the fall +- through edge - which can use the call-clobbered registers - from +- dominance by the throw edge. */ +- if (can_throw_internal (insn)) +- { +- HARD_REG_SET tmp; +- +- COPY_HARD_REG_SET (tmp, call_used_reg_set); +- AND_HARD_REG_SET (tmp, all_btrs); +- IOR_HARD_REG_SET (btrs_live_at_end[i], tmp); +- can_throw = 1; +- } +- if (can_throw || JUMP_P (insn)) +- { +- int regno; +- +- for (regno = first_btr; regno <= last_btr; regno++) +- if (refers_to_regno_p (regno, insn)) +- SET_HARD_REG_BIT (btrs_live_at_end[i], regno); +- } +- +- if (dump_file) +- dump_btrs_live (i); +- } +-} +- +-static void +-compute_kill (sbitmap *bb_kill, sbitmap *btr_defset, +- HARD_REG_SET *btrs_written) +-{ +- int i; +- int regno; +- +- /* For each basic block, form the set BB_KILL - the set +- of definitions that the block kills. */ +- bitmap_vector_clear (bb_kill, last_basic_block_for_fn (cfun)); +- for (i = NUM_FIXED_BLOCKS; i < last_basic_block_for_fn (cfun); i++) +- { +- for (regno = first_btr; regno <= last_btr; regno++) +- if (TEST_HARD_REG_BIT (all_btrs, regno) +- && TEST_HARD_REG_BIT (btrs_written[i], regno)) +- bitmap_ior (bb_kill[i], bb_kill[i], +- btr_defset[regno - first_btr]); +- } +-} +- +-static void +-compute_out (sbitmap *bb_out, sbitmap *bb_gen, sbitmap *bb_kill, int max_uid) +-{ +- /* Perform iterative dataflow: +- Initially, for all blocks, BB_OUT = BB_GEN. +- For each block, +- BB_IN = union over predecessors of BB_OUT(pred) +- BB_OUT = (BB_IN - BB_KILL) + BB_GEN +- Iterate until the bb_out sets stop growing. */ +- int i; +- int changed; +- auto_sbitmap bb_in (max_uid); +- +- for (i = NUM_FIXED_BLOCKS; i < last_basic_block_for_fn (cfun); i++) +- bitmap_copy (bb_out[i], bb_gen[i]); +- +- changed = 1; +- while (changed) +- { +- changed = 0; +- for (i = NUM_FIXED_BLOCKS; i < last_basic_block_for_fn (cfun); i++) +- { +- bitmap_union_of_preds (bb_in, bb_out, BASIC_BLOCK_FOR_FN (cfun, i)); +- changed |= bitmap_ior_and_compl (bb_out[i], bb_gen[i], +- bb_in, bb_kill[i]); +- } +- } +-} +- +-static void +-link_btr_uses (btr_def **def_array, btr_user **use_array, sbitmap *bb_out, +- sbitmap *btr_defset, int max_uid) +-{ +- int i; +- auto_sbitmap reaching_defs (max_uid); +- +- /* Link uses to the uses lists of all of their reaching defs. +- Count up the number of reaching defs of each use. */ +- for (i = NUM_FIXED_BLOCKS; i < last_basic_block_for_fn (cfun); i++) +- { +- basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i); +- rtx_insn *insn; +- rtx_insn *last; +- +- bitmap_union_of_preds (reaching_defs, bb_out, BASIC_BLOCK_FOR_FN (cfun, i)); +- for (insn = BB_HEAD (bb), last = NEXT_INSN (BB_END (bb)); +- insn != last; +- insn = NEXT_INSN (insn)) +- { +- if (INSN_P (insn)) +- { +- int insn_uid = INSN_UID (insn); +- +- btr_def *def = def_array[insn_uid]; +- btr_user *user = use_array[insn_uid]; +- if (def != NULL) +- { +- /* Remove all reaching defs of regno except +- for this one. */ +- bitmap_and_compl (reaching_defs, reaching_defs, +- btr_defset[def->btr - first_btr]); +- bitmap_set_bit (reaching_defs, insn_uid); +- } +- +- if (user != NULL) +- { +- /* Find all the reaching defs for this use. */ +- auto_sbitmap reaching_defs_of_reg (max_uid); +- unsigned int uid = 0; +- sbitmap_iterator sbi; +- +- if (user->use) +- bitmap_and ( +- reaching_defs_of_reg, +- reaching_defs, +- btr_defset[REGNO (user->use) - first_btr]); +- else +- { +- int reg; +- +- bitmap_clear (reaching_defs_of_reg); +- for (reg = first_btr; reg <= last_btr; reg++) +- if (TEST_HARD_REG_BIT (all_btrs, reg) +- && refers_to_regno_p (reg, user->insn)) +- bitmap_or_and (reaching_defs_of_reg, +- reaching_defs_of_reg, +- reaching_defs, +- btr_defset[reg - first_btr]); +- } +- EXECUTE_IF_SET_IN_BITMAP (reaching_defs_of_reg, 0, uid, sbi) +- { +- btr_def *def = def_array[uid]; +- +- /* We now know that def reaches user. */ +- +- if (dump_file) +- fprintf (dump_file, +- "Def in insn %d reaches use in insn %d\n", +- uid, insn_uid); +- +- user->n_reaching_defs++; +- if (!user->use) +- def->has_ambiguous_use = 1; +- if (user->first_reaching_def != -1) +- { /* There is more than one reaching def. This is +- a rare case, so just give up on this def/use +- web when it occurs. */ +- def->has_ambiguous_use = 1; +- def_array[user->first_reaching_def] +- ->has_ambiguous_use = 1; +- if (dump_file) +- fprintf (dump_file, +- "(use %d has multiple reaching defs)\n", +- insn_uid); +- } +- else +- user->first_reaching_def = uid; +- if (user->other_use_this_block) +- def->other_btr_uses_after_use = 1; +- user->next = def->uses; +- def->uses = user; +- } +- } +- +- if (CALL_P (insn)) +- { +- int regno; +- +- for (regno = first_btr; regno <= last_btr; regno++) +- if (TEST_HARD_REG_BIT (all_btrs, regno) +- && TEST_HARD_REG_BIT (call_used_reg_set, regno)) +- bitmap_and_compl (reaching_defs, reaching_defs, +- btr_defset[regno - first_btr]); +- } +- } +- } +- } +-} +- +-static void +-build_btr_def_use_webs (btr_heap_t *all_btr_defs) +-{ +- const int max_uid = get_max_uid (); +- btr_def **def_array = XCNEWVEC (btr_def *, max_uid); +- btr_user **use_array = XCNEWVEC (btr_user *, max_uid); +- sbitmap *btr_defset = sbitmap_vector_alloc ( +- (last_btr - first_btr) + 1, max_uid); +- sbitmap *bb_gen = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), +- max_uid); +- HARD_REG_SET *btrs_written = XCNEWVEC (HARD_REG_SET, +- last_basic_block_for_fn (cfun)); +- sbitmap *bb_kill; +- sbitmap *bb_out; +- +- bitmap_vector_clear (btr_defset, (last_btr - first_btr) + 1); +- +- compute_defs_uses_and_gen (all_btr_defs, def_array, use_array, btr_defset, +- bb_gen, btrs_written); +- +- bb_kill = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), max_uid); +- compute_kill (bb_kill, btr_defset, btrs_written); +- free (btrs_written); +- +- bb_out = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), max_uid); +- compute_out (bb_out, bb_gen, bb_kill, max_uid); +- +- sbitmap_vector_free (bb_gen); +- sbitmap_vector_free (bb_kill); +- +- link_btr_uses (def_array, use_array, bb_out, btr_defset, max_uid); +- +- sbitmap_vector_free (bb_out); +- sbitmap_vector_free (btr_defset); +- free (use_array); +- free (def_array); +-} +- +-/* Return true if basic block BB contains the start or end of the +- live range of the definition DEF, AND there are other live +- ranges of the same target register that include BB. */ +-static int +-block_at_edge_of_live_range_p (int bb, btr_def *def) +-{ +- if (def->other_btr_uses_before_def +- && BASIC_BLOCK_FOR_FN (cfun, bb) == def->bb) +- return 1; +- else if (def->other_btr_uses_after_use) +- { +- btr_user *user; +- for (user = def->uses; user != NULL; user = user->next) +- if (BASIC_BLOCK_FOR_FN (cfun, bb) == user->bb) +- return 1; +- } +- return 0; +-} +- +-/* We are removing the def/use web DEF. The target register +- used in this web is therefore no longer live in the live range +- of this web, so remove it from the live set of all basic blocks +- in the live range of the web. +- Blocks at the boundary of the live range may contain other live +- ranges for the same target register, so we have to be careful +- to remove the target register from the live set of these blocks +- only if they do not contain other live ranges for the same register. */ +-static void +-clear_btr_from_live_range (btr_def *def) +-{ +- unsigned bb; +- bitmap_iterator bi; +- +- EXECUTE_IF_SET_IN_BITMAP (def->live_range, 0, bb, bi) +- { +- if ((!def->other_btr_uses_before_def +- && !def->other_btr_uses_after_use) +- || !block_at_edge_of_live_range_p (bb, def)) +- { +- CLEAR_HARD_REG_BIT (btrs_live[bb], def->btr); +- CLEAR_HARD_REG_BIT (btrs_live_at_end[bb], def->btr); +- if (dump_file) +- dump_btrs_live (bb); +- } +- } +- if (def->own_end) +- CLEAR_HARD_REG_BIT (btrs_live_at_end[def->bb->index], def->btr); +-} +- +- +-/* We are adding the def/use web DEF. Add the target register used +- in this web to the live set of all of the basic blocks that contain +- the live range of the web. +- If OWN_END is set, also show that the register is live from our +- definitions at the end of the basic block where it is defined. */ +-static void +-add_btr_to_live_range (btr_def *def, int own_end) +-{ +- unsigned bb; +- bitmap_iterator bi; +- +- EXECUTE_IF_SET_IN_BITMAP (def->live_range, 0, bb, bi) +- { +- SET_HARD_REG_BIT (btrs_live[bb], def->btr); +- SET_HARD_REG_BIT (btrs_live_at_end[bb], def->btr); +- if (dump_file) +- dump_btrs_live (bb); +- } +- if (own_end) +- { +- SET_HARD_REG_BIT (btrs_live_at_end[def->bb->index], def->btr); +- def->own_end = 1; +- } +-} +- +-/* Update a live range to contain the basic block NEW_BLOCK, and all +- blocks on paths between the existing live range and NEW_BLOCK. +- HEAD is a block contained in the existing live range that dominates +- all other blocks in the existing live range. +- Also add to the set BTRS_LIVE_IN_RANGE all target registers that +- are live in the blocks that we add to the live range. +- If FULL_RANGE is set, include the full live range of NEW_BB; +- otherwise, if NEW_BB dominates HEAD_BB, only add registers that +- are life at the end of NEW_BB for NEW_BB itself. +- It is a precondition that either NEW_BLOCK dominates HEAD,or +- HEAD dom NEW_BLOCK. This is used to speed up the +- implementation of this function. */ +-static void +-augment_live_range (bitmap live_range, HARD_REG_SET *btrs_live_in_range, +- basic_block head_bb, basic_block new_bb, int full_range) +-{ +- basic_block *worklist, *tos; +- +- tos = worklist = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) + 1); +- +- if (dominated_by_p (CDI_DOMINATORS, new_bb, head_bb)) +- { +- if (new_bb == head_bb) +- { +- if (full_range) +- IOR_HARD_REG_SET (*btrs_live_in_range, btrs_live[new_bb->index]); +- free (tos); +- return; +- } +- *tos++ = new_bb; +- } +- else +- { +- edge e; +- edge_iterator ei; +- int new_block = new_bb->index; +- +- gcc_assert (dominated_by_p (CDI_DOMINATORS, head_bb, new_bb)); +- +- IOR_HARD_REG_SET (*btrs_live_in_range, btrs_live[head_bb->index]); +- bitmap_set_bit (live_range, new_block); +- /* A previous btr migration could have caused a register to be +- live just at the end of new_block which we need in full, so +- use trs_live_at_end even if full_range is set. */ +- IOR_HARD_REG_SET (*btrs_live_in_range, btrs_live_at_end[new_block]); +- if (full_range) +- IOR_HARD_REG_SET (*btrs_live_in_range, btrs_live[new_block]); +- if (dump_file) +- { +- fprintf (dump_file, +- "Adding end of block %d and rest of %d to live range\n", +- new_block, head_bb->index); +- fprintf (dump_file,"Now live btrs are "); +- dump_hard_reg_set (*btrs_live_in_range); +- fprintf (dump_file, "\n"); +- } +- FOR_EACH_EDGE (e, ei, head_bb->preds) +- *tos++ = e->src; +- } +- +- while (tos != worklist) +- { +- basic_block bb = *--tos; +- if (!bitmap_bit_p (live_range, bb->index)) +- { +- edge e; +- edge_iterator ei; +- +- bitmap_set_bit (live_range, bb->index); +- IOR_HARD_REG_SET (*btrs_live_in_range, +- btrs_live[bb->index]); +- /* A previous btr migration could have caused a register to be +- live just at the end of a block which we need in full. */ +- IOR_HARD_REG_SET (*btrs_live_in_range, +- btrs_live_at_end[bb->index]); +- if (dump_file) +- { +- fprintf (dump_file, +- "Adding block %d to live range\n", bb->index); +- fprintf (dump_file,"Now live btrs are "); +- dump_hard_reg_set (*btrs_live_in_range); +- fprintf (dump_file, "\n"); +- } +- +- FOR_EACH_EDGE (e, ei, bb->preds) +- { +- basic_block pred = e->src; +- if (!bitmap_bit_p (live_range, pred->index)) +- *tos++ = pred; +- } +- } +- } +- +- free (worklist); +-} +- +-/* Return the most desirable target register that is not in +- the set USED_BTRS. */ +-static int +-choose_btr (HARD_REG_SET used_btrs) +-{ +- int i; +- +- if (!hard_reg_set_subset_p (all_btrs, used_btrs)) +- for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) +- { +-#ifdef REG_ALLOC_ORDER +- int regno = reg_alloc_order[i]; +-#else +- int regno = i; +-#endif +- if (TEST_HARD_REG_BIT (all_btrs, regno) +- && !TEST_HARD_REG_BIT (used_btrs, regno)) +- return regno; +- } +- return -1; +-} +- +-/* Calculate the set of basic blocks that contain the live range of +- the def/use web DEF. +- Also calculate the set of target registers that are live at time +- in this live range, but ignore the live range represented by DEF +- when calculating this set. */ +-static void +-btr_def_live_range (btr_def *def, HARD_REG_SET *btrs_live_in_range) +-{ +- if (!def->live_range) +- { +- btr_user *user; +- +- def->live_range = BITMAP_ALLOC (NULL); +- +- bitmap_set_bit (def->live_range, def->bb->index); +- COPY_HARD_REG_SET (*btrs_live_in_range, +- (flag_btr_bb_exclusive +- ? btrs_live : btrs_live_at_end)[def->bb->index]); +- +- for (user = def->uses; user != NULL; user = user->next) +- augment_live_range (def->live_range, btrs_live_in_range, +- def->bb, user->bb, +- (flag_btr_bb_exclusive +- || user->insn != BB_END (def->bb) +- || !JUMP_P (user->insn))); +- } +- else +- { +- /* def->live_range is accurate, but we need to recompute +- the set of target registers live over it, because migration +- of other PT instructions may have affected it. +- */ +- unsigned bb; +- unsigned def_bb = flag_btr_bb_exclusive ? -1 : def->bb->index; +- bitmap_iterator bi; +- +- CLEAR_HARD_REG_SET (*btrs_live_in_range); +- EXECUTE_IF_SET_IN_BITMAP (def->live_range, 0, bb, bi) +- { +- IOR_HARD_REG_SET (*btrs_live_in_range, +- (def_bb == bb +- ? btrs_live_at_end : btrs_live) [bb]); +- } +- } +- if (!def->other_btr_uses_before_def && +- !def->other_btr_uses_after_use) +- CLEAR_HARD_REG_BIT (*btrs_live_in_range, def->btr); +-} +- +-/* Merge into the def/use web DEF any other def/use webs in the same +- group that are dominated by DEF, provided that there is a target +- register available to allocate to the merged web. */ +-static void +-combine_btr_defs (btr_def *def, HARD_REG_SET *btrs_live_in_range) +-{ +- btr_def *other_def; +- +- for (other_def = def->group->members; +- other_def != NULL; +- other_def = other_def->next_this_group) +- { +- if (other_def != def +- && other_def->uses != NULL +- && ! other_def->has_ambiguous_use +- && dominated_by_p (CDI_DOMINATORS, other_def->bb, def->bb)) +- { +- /* def->bb dominates the other def, so def and other_def could +- be combined. */ +- /* Merge their live ranges, and get the set of +- target registers live over the merged range. */ +- int btr; +- HARD_REG_SET combined_btrs_live; +- auto_bitmap combined_live_range; +- btr_user *user; +- +- if (other_def->live_range == NULL) +- { +- HARD_REG_SET dummy_btrs_live_in_range; +- btr_def_live_range (other_def, &dummy_btrs_live_in_range); +- } +- COPY_HARD_REG_SET (combined_btrs_live, *btrs_live_in_range); +- bitmap_copy (combined_live_range, def->live_range); +- +- for (user = other_def->uses; user != NULL; user = user->next) +- augment_live_range (combined_live_range, &combined_btrs_live, +- def->bb, user->bb, +- (flag_btr_bb_exclusive +- || user->insn != BB_END (def->bb) +- || !JUMP_P (user->insn))); +- +- btr = choose_btr (combined_btrs_live); +- if (btr != -1) +- { +- /* We can combine them. */ +- if (dump_file) +- fprintf (dump_file, +- "Combining def in insn %d with def in insn %d\n", +- INSN_UID (other_def->insn), INSN_UID (def->insn)); +- +- def->btr = btr; +- user = other_def->uses; +- while (user != NULL) +- { +- btr_user *next = user->next; +- +- user->next = def->uses; +- def->uses = user; +- user = next; +- } +- /* Combining def/use webs can make target registers live +- after uses where they previously were not. This means +- some REG_DEAD notes may no longer be correct. We could +- be more precise about this if we looked at the combined +- live range, but here I just delete any REG_DEAD notes +- in case they are no longer correct. */ +- for (user = def->uses; user != NULL; user = user->next) +- remove_note (user->insn, +- find_regno_note (user->insn, REG_DEAD, +- REGNO (user->use))); +- clear_btr_from_live_range (other_def); +- other_def->uses = NULL; +- bitmap_copy (def->live_range, combined_live_range); +- if (other_def->btr == btr && other_def->other_btr_uses_after_use) +- def->other_btr_uses_after_use = 1; +- COPY_HARD_REG_SET (*btrs_live_in_range, combined_btrs_live); +- +- /* Delete the old target register initialization. */ +- delete_insn (other_def->insn); +- +- } +- } +- } +-} +- +-/* Move the definition DEF from its current position to basic +- block NEW_DEF_BB, and modify it to use branch target register BTR. +- Delete the old defining insn, and insert a new one in NEW_DEF_BB. +- Update all reaching uses of DEF in the RTL to use BTR. +- If this new position means that other defs in the +- same group can be combined with DEF then combine them. */ +-static void +-move_btr_def (basic_block new_def_bb, int btr, btr_def *def, bitmap live_range, +- HARD_REG_SET *btrs_live_in_range) +-{ +- /* We can move the instruction. +- Set a target register in block NEW_DEF_BB to the value +- needed for this target register definition. +- Replace all uses of the old target register definition by +- uses of the new definition. Delete the old definition. */ +- basic_block b = new_def_bb; +- rtx_insn *insp = BB_HEAD (b); +- rtx_insn *old_insn = def->insn; +- rtx src; +- rtx btr_rtx; +- rtx_insn *new_insn; +- machine_mode btr_mode; +- btr_user *user; +- rtx set; +- +- if (dump_file) +- fprintf(dump_file, "migrating to basic block %d, using reg %d\n", +- new_def_bb->index, btr); +- +- clear_btr_from_live_range (def); +- def->btr = btr; +- def->bb = new_def_bb; +- def->luid = 0; +- def->cost = basic_block_freq (new_def_bb); +- bitmap_copy (def->live_range, live_range); +- combine_btr_defs (def, btrs_live_in_range); +- btr = def->btr; +- def->other_btr_uses_before_def +- = TEST_HARD_REG_BIT (btrs_live[b->index], btr) ? 1 : 0; +- add_btr_to_live_range (def, 1); +- if (LABEL_P (insp)) +- insp = NEXT_INSN (insp); +- /* N.B.: insp is expected to be NOTE_INSN_BASIC_BLOCK now. Some +- optimizations can result in insp being both first and last insn of +- its basic block. */ +- /* ?? some assertions to check that insp is sensible? */ +- +- if (def->other_btr_uses_before_def) +- { +- for (insp = BB_END (b); ! INSN_P (insp); insp = PREV_INSN (insp)) +- gcc_assert (insp != BB_HEAD (b)); +- +- if (JUMP_P (insp) || can_throw_internal (insp)) +- insp = PREV_INSN (insp); +- } +- +- set = single_set (old_insn); +- src = SET_SRC (set); +- btr_mode = GET_MODE (SET_DEST (set)); +- btr_rtx = gen_rtx_REG (btr_mode, btr); +- +- new_insn = gen_move_insn (btr_rtx, src); +- +- /* Insert target register initialization at head of basic block. */ +- def->insn = emit_insn_after (new_insn, insp); +- +- df_set_regs_ever_live (btr, true); +- +- if (dump_file) +- fprintf (dump_file, "New pt is insn %d, inserted after insn %d\n", +- INSN_UID (def->insn), INSN_UID (insp)); +- +- /* Delete the old target register initialization. */ +- delete_insn (old_insn); +- +- /* Replace each use of the old target register by a use of the new target +- register. */ +- for (user = def->uses; user != NULL; user = user->next) +- { +- /* Some extra work here to ensure consistent modes, because +- it seems that a target register REG rtx can be given a different +- mode depending on the context (surely that should not be +- the case?). */ +- rtx replacement_rtx; +- if (GET_MODE (user->use) == GET_MODE (btr_rtx) +- || GET_MODE (user->use) == VOIDmode) +- replacement_rtx = btr_rtx; +- else +- replacement_rtx = gen_rtx_REG (GET_MODE (user->use), btr); +- validate_replace_rtx (user->use, replacement_rtx, user->insn); +- user->use = replacement_rtx; +- } +-} +- +-/* We anticipate intra-block scheduling to be done. See if INSN could move +- up within BB by N_INSNS. */ +-static int +-can_move_up (const_basic_block bb, const rtx_insn *insn, int n_insns) +-{ +- while (insn != BB_HEAD (bb) && n_insns > 0) +- { +- insn = PREV_INSN (insn); +- /* ??? What if we have an anti-dependency that actually prevents the +- scheduler from doing the move? We'd like to re-allocate the register, +- but not necessarily put the load into another basic block. */ +- if (INSN_P (insn)) +- n_insns--; +- } +- return n_insns <= 0; +-} +- +-/* Attempt to migrate the target register definition DEF to an +- earlier point in the flowgraph. +- +- It is a precondition of this function that DEF is migratable: +- i.e. it has a constant source, and all uses are unambiguous. +- +- Only migrations that reduce the cost of DEF will be made. +- MIN_COST is the lower bound on the cost of the DEF after migration. +- If we migrate DEF so that its cost falls below MIN_COST, +- then we do not attempt to migrate further. The idea is that +- we migrate definitions in a priority order based on their cost, +- when the cost of this definition falls below MIN_COST, then +- there is another definition with cost == MIN_COST which now +- has a higher priority than this definition. +- +- Return nonzero if there may be benefit from attempting to +- migrate this DEF further (i.e. we have reduced the cost below +- MIN_COST, but we may be able to reduce it further). +- Return zero if no further migration is possible. */ +-static int +-migrate_btr_def (btr_def *def, int min_cost) +-{ +- HARD_REG_SET btrs_live_in_range; +- int btr_used_near_def = 0; +- int def_basic_block_freq; +- basic_block attempt; +- int give_up = 0; +- int def_moved = 0; +- btr_user *user; +- int def_latency; +- +- if (dump_file) +- fprintf (dump_file, +- "Attempting to migrate pt from insn %d (cost = %d, min_cost = %d) ... ", +- INSN_UID (def->insn), def->cost, min_cost); +- +- if (!def->group || def->has_ambiguous_use) +- /* These defs are not migratable. */ +- { +- if (dump_file) +- fprintf (dump_file, "it's not migratable\n"); +- return 0; +- } +- +- if (!def->uses) +- /* We have combined this def with another in the same group, so +- no need to consider it further. +- */ +- { +- if (dump_file) +- fprintf (dump_file, "it's already combined with another pt\n"); +- return 0; +- } +- +- btr_def_live_range (def, &btrs_live_in_range); +- auto_bitmap live_range; +- bitmap_copy (live_range, def->live_range); +- +-#ifdef INSN_SCHEDULING +- def_latency = insn_default_latency (def->insn) * issue_rate; +-#else +- def_latency = issue_rate; +-#endif +- +- for (user = def->uses; user != NULL; user = user->next) +- { +- if (user->bb == def->bb +- && user->luid > def->luid +- && (def->luid + def_latency) > user->luid +- && ! can_move_up (def->bb, def->insn, +- (def->luid + def_latency) - user->luid)) +- { +- btr_used_near_def = 1; +- break; +- } +- } +- +- def_basic_block_freq = basic_block_freq (def->bb); +- +- for (attempt = get_immediate_dominator (CDI_DOMINATORS, def->bb); +- !give_up && attempt && attempt != ENTRY_BLOCK_PTR_FOR_FN (cfun) +- && def->cost >= min_cost; +- attempt = get_immediate_dominator (CDI_DOMINATORS, attempt)) +- { +- /* Try to move the instruction that sets the target register into +- basic block ATTEMPT. */ +- int try_freq = basic_block_freq (attempt); +- edge_iterator ei; +- edge e; +- +- /* If ATTEMPT has abnormal edges, skip it. */ +- FOR_EACH_EDGE (e, ei, attempt->succs) +- if (e->flags & EDGE_COMPLEX) +- break; +- if (e) +- continue; +- +- if (dump_file) +- fprintf (dump_file, "trying block %d ...", attempt->index); +- +- if (try_freq < def_basic_block_freq +- || (try_freq == def_basic_block_freq && btr_used_near_def)) +- { +- int btr; +- augment_live_range (live_range, &btrs_live_in_range, def->bb, attempt, +- flag_btr_bb_exclusive); +- if (dump_file) +- { +- fprintf (dump_file, "Now btrs live in range are: "); +- dump_hard_reg_set (btrs_live_in_range); +- fprintf (dump_file, "\n"); +- } +- btr = choose_btr (btrs_live_in_range); +- if (btr != -1) +- { +- move_btr_def (attempt, btr, def, live_range, &btrs_live_in_range); +- bitmap_copy (live_range, def->live_range); +- btr_used_near_def = 0; +- def_moved = 1; +- def_basic_block_freq = basic_block_freq (def->bb); +- } +- else +- { +- /* There are no free target registers available to move +- this far forward, so give up */ +- give_up = 1; +- if (dump_file) +- fprintf (dump_file, +- "giving up because there are no free target registers\n"); +- } +- +- } +- } +- if (!def_moved) +- { +- give_up = 1; +- if (dump_file) +- fprintf (dump_file, "failed to move\n"); +- } +- +- return !give_up; +-} +- +-/* Attempt to move instructions that set target registers earlier +- in the flowgraph, away from their corresponding uses. */ +-static void +-migrate_btr_defs (enum reg_class btr_class, int allow_callee_save) +-{ +- btr_heap_t all_btr_defs (LONG_MIN); +- int reg; +- +- gcc_obstack_init (&migrate_btrl_obstack); +- if (dump_file) +- { +- int i; +- +- for (i = NUM_FIXED_BLOCKS; i < last_basic_block_for_fn (cfun); i++) +- { +- basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i); +- fprintf (dump_file, "Basic block %d: count = ", i); +- bb->count.dump (dump_file); +- fprintf (dump_file, " loop-depth = %d idom = %d\n", +- bb_loop_depth (bb), +- get_immediate_dominator (CDI_DOMINATORS, bb)->index); +- } +- } +- +- CLEAR_HARD_REG_SET (all_btrs); +- for (first_btr = -1, reg = 0; reg < FIRST_PSEUDO_REGISTER; reg++) +- if (TEST_HARD_REG_BIT (reg_class_contents[(int) btr_class], reg) +- && (allow_callee_save || call_used_regs[reg] +- || df_regs_ever_live_p (reg))) +- { +- SET_HARD_REG_BIT (all_btrs, reg); +- last_btr = reg; +- if (first_btr < 0) +- first_btr = reg; +- } +- +- btrs_live = XCNEWVEC (HARD_REG_SET, last_basic_block_for_fn (cfun)); +- btrs_live_at_end = XCNEWVEC (HARD_REG_SET, last_basic_block_for_fn (cfun)); +- +- build_btr_def_use_webs (&all_btr_defs); +- +- while (!all_btr_defs.empty ()) +- { +- int min_cost = -all_btr_defs.min_key (); +- btr_def *def = all_btr_defs.extract_min (); +- if (migrate_btr_def (def, min_cost)) +- { +- all_btr_defs.insert (-def->cost, def); +- if (dump_file) +- { +- fprintf (dump_file, +- "Putting insn %d back on queue with priority %d\n", +- INSN_UID (def->insn), def->cost); +- } +- } +- else +- BITMAP_FREE (def->live_range); +- } +- +- free (btrs_live); +- free (btrs_live_at_end); +- obstack_free (&migrate_btrl_obstack, NULL); +-} +- +-static void +-branch_target_load_optimize (bool after_prologue_epilogue_gen) +-{ +- enum reg_class klass +- = (enum reg_class) targetm.branch_target_register_class (); +- if (klass != NO_REGS) +- { +- /* Initialize issue_rate. */ +- if (targetm.sched.issue_rate) +- issue_rate = targetm.sched.issue_rate (); +- else +- issue_rate = 1; +- +- if (!after_prologue_epilogue_gen) +- { +- /* Build the CFG for migrate_btr_defs. */ +-#if 1 +- /* This may or may not be needed, depending on where we +- run this phase. */ +- cleanup_cfg (optimize ? CLEANUP_EXPENSIVE : 0); +-#endif +- } +- df_analyze (); +- +- +- /* Dominator info is also needed for migrate_btr_def. */ +- calculate_dominance_info (CDI_DOMINATORS); +- migrate_btr_defs (klass, +- (targetm.branch_target_register_callee_saved +- (after_prologue_epilogue_gen))); +- +- free_dominance_info (CDI_DOMINATORS); +- } +-} +- +-namespace { +- +-const pass_data pass_data_branch_target_load_optimize1 = +-{ +- RTL_PASS, /* type */ +- "btl1", /* name */ +- OPTGROUP_NONE, /* optinfo_flags */ +- TV_NONE, /* tv_id */ +- 0, /* properties_required */ +- 0, /* properties_provided */ +- 0, /* properties_destroyed */ +- 0, /* todo_flags_start */ +- 0, /* todo_flags_finish */ +-}; +- +-class pass_branch_target_load_optimize1 : public rtl_opt_pass +-{ +-public: +- pass_branch_target_load_optimize1 (gcc::context *ctxt) +- : rtl_opt_pass (pass_data_branch_target_load_optimize1, ctxt) +- {} +- +- /* opt_pass methods: */ +- virtual bool gate (function *) { return flag_branch_target_load_optimize; } +- virtual unsigned int execute (function *) +- { +- branch_target_load_optimize (epilogue_completed); +- return 0; +- } +- +-}; // class pass_branch_target_load_optimize1 +- +-} // anon namespace +- +-rtl_opt_pass * +-make_pass_branch_target_load_optimize1 (gcc::context *ctxt) +-{ +- return new pass_branch_target_load_optimize1 (ctxt); +-} +- +- +-namespace { +- +-const pass_data pass_data_branch_target_load_optimize2 = +-{ +- RTL_PASS, /* type */ +- "btl2", /* name */ +- OPTGROUP_NONE, /* optinfo_flags */ +- TV_NONE, /* tv_id */ +- 0, /* properties_required */ +- 0, /* properties_provided */ +- 0, /* properties_destroyed */ +- 0, /* todo_flags_start */ +- 0, /* todo_flags_finish */ +-}; +- +-class pass_branch_target_load_optimize2 : public rtl_opt_pass +-{ +-public: +- pass_branch_target_load_optimize2 (gcc::context *ctxt) +- : rtl_opt_pass (pass_data_branch_target_load_optimize2, ctxt) +- {} +- +- /* opt_pass methods: */ +- virtual bool gate (function *) +- { +- return (optimize > 0 && flag_branch_target_load_optimize2); +- } +- +- virtual unsigned int execute (function *); +- +-}; // class pass_branch_target_load_optimize2 +- +-unsigned int +-pass_branch_target_load_optimize2::execute (function *) +-{ +- static int warned = 0; +- +- /* Leave this a warning for now so that it is possible to experiment +- with running this pass twice. In 3.6, we should either make this +- an error, or use separate dump files. */ +- if (flag_branch_target_load_optimize +- && flag_branch_target_load_optimize2 +- && !warned) +- { +- warning (0, "branch target register load optimization is not intended " +- "to be run twice"); +- +- warned = 1; +- } +- +- branch_target_load_optimize (epilogue_completed); +- return 0; +-} +- +-} // anon namespace +- +-rtl_opt_pass * +-make_pass_branch_target_load_optimize2 (gcc::context *ctxt) +-{ +- return new pass_branch_target_load_optimize2 (ctxt); +-} +diff --git a/gcc/builtins.c b/gcc/builtins.c +index 910e614a4..945205c1d 100644 +--- a/gcc/builtins.c ++++ b/gcc/builtins.c +@@ -1431,7 +1431,7 @@ expand_builtin_prefetch (tree exp) + } + + /* Get a MEM rtx for expression EXP which is the address of an operand +- to be used in a string instruction (cmpstrsi, movmemsi, ..). LEN is ++ to be used in a string instruction (cmpstrsi, cpymemsi, ..). LEN is + the maximum length of the block of memory that might be accessed or + NULL if unknown. */ + +@@ -7224,7 +7224,6 @@ expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode, + int ignore) + { + tree fndecl = get_callee_fndecl (exp); +- enum built_in_function fcode = DECL_FUNCTION_CODE (fndecl); + machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp)); + int flags; + +@@ -7236,6 +7235,7 @@ expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode, + redundant checks and be sure, that possible overflow will be detected + by ASan. */ + ++ enum built_in_function fcode = DECL_FUNCTION_CODE (fndecl); + if ((flag_sanitize & SANITIZE_ADDRESS) && asan_intercepted_p (fcode)) + return expand_call (exp, target, ignore); + +diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c +index d220e8135..bf3db074a 100644 +--- a/gcc/c-family/c-common.c ++++ b/gcc/c-family/c-common.c +@@ -5835,15 +5835,27 @@ builtin_function_validate_nargs (location_t loc, tree fndecl, int nargs, + /* Verifies the NARGS arguments ARGS to the builtin function FNDECL. + Returns false if there was an error, otherwise true. LOC is the + location of the function; ARG_LOC is a vector of locations of the +- arguments. */ ++ arguments. If FNDECL is the result of resolving an overloaded ++ target built-in, ORIG_FNDECL is the original function decl, ++ otherwise it is null. */ + + bool + check_builtin_function_arguments (location_t loc, vec arg_loc, +- tree fndecl, int nargs, tree *args) ++ tree fndecl, tree orig_fndecl, ++ int nargs, tree *args) + { +- if (!fndecl_built_in_p (fndecl, BUILT_IN_NORMAL)) ++ if (!fndecl_built_in_p (fndecl)) + return true; + ++ if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD) ++ return (!targetm.check_builtin_call ++ || targetm.check_builtin_call (loc, arg_loc, fndecl, ++ orig_fndecl, nargs, args)); ++ ++ if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_FRONTEND) ++ return true; ++ ++ gcc_assert (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL); + switch (DECL_FUNCTION_CODE (fndecl)) + { + case BUILT_IN_ALLOCA_WITH_ALIGN_AND_MAX: +@@ -7317,8 +7329,6 @@ tree + resolve_overloaded_builtin (location_t loc, tree function, + vec *params) + { +- enum built_in_function orig_code = DECL_FUNCTION_CODE (function); +- + /* Is function one of the _FETCH_OP_ or _OP_FETCH_ built-ins? + Those are not valid to call with a pointer to _Bool (or C++ bool) + and so must be rejected. */ +@@ -7340,6 +7350,7 @@ resolve_overloaded_builtin (location_t loc, tree function, + } + + /* Handle BUILT_IN_NORMAL here. */ ++ enum built_in_function orig_code = DECL_FUNCTION_CODE (function); + switch (orig_code) + { + case BUILT_IN_SPECULATION_SAFE_VALUE_N: +diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h +index 683764267..46b8d265a 100644 +--- a/gcc/c-family/c-common.h ++++ b/gcc/c-family/c-common.h +@@ -818,7 +818,7 @@ extern void check_function_arguments_recurse (void (*) + void *, tree, + unsigned HOST_WIDE_INT); + extern bool check_builtin_function_arguments (location_t, vec, +- tree, int, tree *); ++ tree, tree, int, tree *); + extern void check_function_format (const_tree, tree, int, tree *, + vec *); + extern bool attribute_fallthrough_p (tree); +@@ -995,7 +995,8 @@ extern bool c_switch_covers_all_cases_p (splay_tree, tree); + extern tree build_function_call (location_t, tree, tree); + + extern tree build_function_call_vec (location_t, vec, tree, +- vec *, vec *); ++ vec *, vec *, ++ tree = NULL_TREE); + + extern tree resolve_overloaded_builtin (location_t, tree, vec *); + +diff --git a/gcc/c-family/c-pretty-print.c b/gcc/c-family/c-pretty-print.c +index 3e25624d3..1e14658c0 100644 +--- a/gcc/c-family/c-pretty-print.c ++++ b/gcc/c-family/c-pretty-print.c +@@ -470,6 +470,16 @@ pp_c_specifier_qualifier_list (c_pretty_printer *pp, tree t) + ? "_Complex" : "__complex__")); + else if (code == VECTOR_TYPE) + { ++ /* The syntax we print for vector types isn't real C or C++ syntax, ++ so it's better to print the type name if we have one. */ ++ tree name = TYPE_NAME (t); ++ if (!(pp->flags & pp_c_flag_gnu_v3) ++ && name ++ && TREE_CODE (name) == TYPE_DECL) ++ { ++ pp->id_expression (name); ++ break; ++ } + pp_c_ws_string (pp, "__vector"); + pp_c_left_paren (pp); + pp_wide_integer (pp, TYPE_VECTOR_SUBPARTS (t)); +diff --git a/gcc/c/c-decl.c b/gcc/c/c-decl.c +index 859a62412..288dbe9d9 100644 +--- a/gcc/c/c-decl.c ++++ b/gcc/c/c-decl.c +@@ -604,7 +604,7 @@ static tree grokparms (struct c_arg_info *, bool); + static void layout_array_type (tree); + static void warn_defaults_to (location_t, int, const char *, ...) + ATTRIBUTE_GCC_DIAG(3,4); +-static const char *header_for_builtin_fn (enum built_in_function); ++static const char *header_for_builtin_fn (tree); + + /* T is a statement. Add it to the statement-tree. This is the + C/ObjC version--C++ has a slightly different version of this +@@ -1951,7 +1951,8 @@ diagnose_mismatched_decls (tree newdecl, tree olddecl, + if (!comptypes (oldtype, newtype)) + { + if (TREE_CODE (olddecl) == FUNCTION_DECL +- && fndecl_built_in_p (olddecl) && !C_DECL_DECLARED_BUILTIN (olddecl)) ++ && fndecl_built_in_p (olddecl, BUILT_IN_NORMAL) ++ && !C_DECL_DECLARED_BUILTIN (olddecl)) + { + /* Accept "harmless" mismatches in function types such + as missing qualifiers or pointer vs same size integer +@@ -1973,8 +1974,7 @@ diagnose_mismatched_decls (tree newdecl, tree olddecl, + /* If types don't match for a built-in, throw away the + built-in. No point in calling locate_old_decl here, it + won't print anything. */ +- const char *header +- = header_for_builtin_fn (DECL_FUNCTION_CODE (olddecl)); ++ const char *header = header_for_builtin_fn (olddecl); + location_t loc = DECL_SOURCE_LOCATION (newdecl); + if (warning_at (loc, OPT_Wbuiltin_declaration_mismatch, + "conflicting types for built-in function %q+D; " +@@ -2637,7 +2637,8 @@ merge_decls (tree newdecl, tree olddecl, tree newtype, tree oldtype) + |= DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (olddecl); + TREE_THIS_VOLATILE (newdecl) |= TREE_THIS_VOLATILE (olddecl); + DECL_IS_MALLOC (newdecl) |= DECL_IS_MALLOC (olddecl); +- DECL_IS_OPERATOR_NEW (newdecl) |= DECL_IS_OPERATOR_NEW (olddecl); ++ if (DECL_IS_OPERATOR_NEW_P (olddecl)) ++ DECL_SET_IS_OPERATOR_NEW (newdecl, true); + TREE_READONLY (newdecl) |= TREE_READONLY (olddecl); + DECL_PURE_P (newdecl) |= DECL_PURE_P (olddecl); + DECL_IS_NOVOPS (newdecl) |= DECL_IS_NOVOPS (olddecl); +@@ -2731,8 +2732,7 @@ merge_decls (tree newdecl, tree olddecl, tree newtype, tree oldtype) + { + /* If redeclaring a builtin function, it stays built in. + But it gets tagged as having been declared. */ +- DECL_BUILT_IN_CLASS (newdecl) = DECL_BUILT_IN_CLASS (olddecl); +- DECL_FUNCTION_CODE (newdecl) = DECL_FUNCTION_CODE (olddecl); ++ copy_decl_built_in_function (newdecl, olddecl); + C_DECL_DECLARED_BUILTIN (newdecl) = 1; + if (new_is_prototype) + { +@@ -3334,13 +3334,17 @@ implicit_decl_warning (location_t loc, tree id, tree olddecl) + hint.suppress (); + } + +-/* This function represents mapping of a function code FCODE +- to its respective header. */ ++/* Return the name of the header file that declares built-in function ++ FNDECL, or null if either we don't know or don't expect to see an ++ explicit declaration. */ + + static const char * +-header_for_builtin_fn (enum built_in_function fcode) ++header_for_builtin_fn (tree fndecl) + { +- switch (fcode) ++ if (DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL) ++ return NULL; ++ ++ switch (DECL_FUNCTION_CODE (fndecl)) + { + CASE_FLT_FN (BUILT_IN_ACOS): + CASE_FLT_FN (BUILT_IN_ACOSH): +@@ -3590,8 +3594,7 @@ implicitly_declare (location_t loc, tree functionid) + "declaration of built-in " + "function %qD", decl); + /* See if we can hint which header to include. */ +- const char *header +- = header_for_builtin_fn (DECL_FUNCTION_CODE (decl)); ++ const char *header = header_for_builtin_fn (decl); + if (header != NULL && warned) + { + rich_location richloc (line_table, loc); +@@ -4471,6 +4474,16 @@ c_builtin_function_ext_scope (tree decl) + + return decl; + } ++ ++/* Implement LANG_HOOKS_SIMULATE_BUILTIN_FUNCTION_DECL. */ ++ ++tree ++c_simulate_builtin_function_decl (tree decl) ++{ ++ tree type = TREE_TYPE (decl); ++ C_DECL_BUILTIN_PROTOTYPE (decl) = prototype_p (type); ++ return pushdecl (decl); ++} + + /* Called when a declaration is seen that contains no names to declare. + If its type is a reference to a structure, union or enum inherited +@@ -8746,6 +8759,8 @@ finish_enum (tree enumtype, tree values, tree attributes) + && !in_sizeof && !in_typeof && !in_alignof) + struct_parse_info->struct_types.safe_push (enumtype); + ++ C_TYPE_BEING_DEFINED (enumtype) = 0; ++ + return enumtype; + } + +@@ -8851,6 +8866,36 @@ build_enumerator (location_t decl_loc, location_t loc, + return tree_cons (decl, value, NULL_TREE); + } + ++/* Implement LANG_HOOKS_SIMULATE_ENUM_DECL. */ ++ ++tree ++c_simulate_enum_decl (location_t loc, const char *name, ++ vec values) ++{ ++ location_t saved_loc = input_location; ++ input_location = loc; ++ ++ struct c_enum_contents the_enum; ++ tree enumtype = start_enum (loc, &the_enum, get_identifier (name)); ++ ++ tree value_chain = NULL_TREE; ++ string_int_pair *value; ++ unsigned int i; ++ FOR_EACH_VEC_ELT (values, i, value) ++ { ++ tree decl = build_enumerator (loc, loc, &the_enum, ++ get_identifier (value->first), ++ build_int_cst (integer_type_node, ++ value->second)); ++ TREE_CHAIN (decl) = value_chain; ++ value_chain = decl; ++ } ++ ++ finish_enum (enumtype, nreverse (value_chain), NULL_TREE); ++ ++ input_location = saved_loc; ++ return enumtype; ++} + + /* Create the FUNCTION_DECL for a function definition. + DECLSPECS, DECLARATOR and ATTRIBUTES are the parts of +diff --git a/gcc/c/c-objc-common.h b/gcc/c/c-objc-common.h +index f5e820420..c8739e0b8 100644 +--- a/gcc/c/c-objc-common.h ++++ b/gcc/c/c-objc-common.h +@@ -60,6 +60,9 @@ along with GCC; see the file COPYING3. If not see + #define LANG_HOOKS_BUILTIN_FUNCTION c_builtin_function + #undef LANG_HOOKS_BUILTIN_FUNCTION_EXT_SCOPE + #define LANG_HOOKS_BUILTIN_FUNCTION_EXT_SCOPE c_builtin_function_ext_scope ++#undef LANG_HOOKS_SIMULATE_BUILTIN_FUNCTION_DECL ++#define LANG_HOOKS_SIMULATE_BUILTIN_FUNCTION_DECL \ ++ c_simulate_builtin_function_decl + #undef LANG_HOOKS_EMITS_BEGIN_STMT + #define LANG_HOOKS_EMITS_BEGIN_STMT true + +@@ -72,6 +75,8 @@ along with GCC; see the file COPYING3. If not see + #undef LANG_HOOKS_TREE_DUMP_DUMP_TREE_FN + #define LANG_HOOKS_TREE_DUMP_DUMP_TREE_FN c_dump_tree + ++#undef LANG_HOOKS_SIMULATE_ENUM_DECL ++#define LANG_HOOKS_SIMULATE_ENUM_DECL c_simulate_enum_decl + #undef LANG_HOOKS_TYPE_FOR_MODE + #define LANG_HOOKS_TYPE_FOR_MODE c_common_type_for_mode + #undef LANG_HOOKS_TYPE_FOR_SIZE +diff --git a/gcc/c/c-tree.h b/gcc/c/c-tree.h +index 7e35ab1f0..19925e793 100644 +--- a/gcc/c/c-tree.h ++++ b/gcc/c/c-tree.h +@@ -561,6 +561,8 @@ extern tree finish_enum (tree, tree, tree); + extern void finish_function (void); + extern tree finish_struct (location_t, tree, tree, tree, + struct c_struct_parse_info *); ++extern tree c_simulate_enum_decl (location_t, const char *, ++ vec); + extern struct c_arg_info *build_arg_info (void); + extern struct c_arg_info *get_parm_info (bool, tree); + extern tree grokfield (location_t, struct c_declarator *, +@@ -577,6 +579,7 @@ extern struct c_declarator *set_array_declarator_inner (struct c_declarator *, + struct c_declarator *); + extern tree c_builtin_function (tree); + extern tree c_builtin_function_ext_scope (tree); ++extern tree c_simulate_builtin_function_decl (tree); + extern void shadow_tag (const struct c_declspecs *); + extern void shadow_tag_warned (const struct c_declspecs *, int); + extern tree start_enum (location_t, struct c_enum_contents *, tree); +diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c +index cb999cbf8..87f4178ec 100644 +--- a/gcc/c/c-typeck.c ++++ b/gcc/c/c-typeck.c +@@ -3002,6 +3002,8 @@ inform_declaration (tree decl) + } + + /* Build a function call to function FUNCTION with parameters PARAMS. ++ If FUNCTION is the result of resolving an overloaded target built-in, ++ ORIG_FUNDECL is the original function decl, otherwise it is null. + ORIGTYPES, if not NULL, is a vector of types; each element is + either NULL or the original type of the corresponding element in + PARAMS. The original type may differ from TREE_TYPE of the +@@ -3012,7 +3014,7 @@ inform_declaration (tree decl) + tree + build_function_call_vec (location_t loc, vec arg_loc, + tree function, vec *params, +- vec *origtypes) ++ vec *origtypes, tree orig_fundecl) + { + tree fntype, fundecl = NULL_TREE; + tree name = NULL_TREE, result; +@@ -3032,6 +3034,8 @@ build_function_call_vec (location_t loc, vec arg_loc, + if (flag_tm) + tm_malloc_replacement (function); + fundecl = function; ++ if (!orig_fundecl) ++ orig_fundecl = fundecl; + /* Atomic functions have type checking/casting already done. They are + often rewritten and don't match the original parameter list. */ + if (name && !strncmp (IDENTIFIER_POINTER (name), "__atomic_", 9)) +@@ -3109,9 +3113,10 @@ build_function_call_vec (location_t loc, vec arg_loc, + argarray = vec_safe_address (params); + + /* Check that arguments to builtin functions match the expectations. */ +- if (fundecl && fndecl_built_in_p (fundecl, BUILT_IN_NORMAL) +- && !check_builtin_function_arguments (loc, arg_loc, fundecl, nargs, +- argarray)) ++ if (fundecl ++ && fndecl_built_in_p (fundecl) ++ && !check_builtin_function_arguments (loc, arg_loc, fundecl, ++ orig_fundecl, nargs, argarray)) + return error_mark_node; + + /* Check that the arguments to the function are valid. */ +diff --git a/gcc/caller-save.c b/gcc/caller-save.c +index 9ff470c33..0d66e0ce5 100644 +--- a/gcc/caller-save.c ++++ b/gcc/caller-save.c +@@ -37,6 +37,7 @@ along with GCC; see the file COPYING3. If not see + #include "dumpfile.h" + #include "rtl-iter.h" + #include "target.h" ++#include "function-abi.h" + + #define MOVE_MAX_WORDS (MOVE_MAX / UNITS_PER_WORD) + +@@ -192,29 +193,17 @@ init_caller_save (void) + + caller_save_initialized_p = true; + +- CLEAR_HARD_REG_SET (no_caller_save_reg_set); + /* First find all the registers that we need to deal with and all + the modes that they can have. If we can't find a mode to use, + we can't have the register live over calls. */ + + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) +- { +- if (call_used_regs[i] +- && !TEST_HARD_REG_BIT (call_fixed_reg_set, i)) +- { +- for (j = 1; j <= MOVE_MAX_WORDS; j++) +- { +- regno_save_mode[i][j] = HARD_REGNO_CALLER_SAVE_MODE (i, j, +- VOIDmode); +- if (regno_save_mode[i][j] == VOIDmode && j == 1) +- { +- SET_HARD_REG_BIT (call_fixed_reg_set, i); +- } +- } +- } +- else +- regno_save_mode[i][1] = VOIDmode; +- } ++ for (j = 1; j <= MOVE_MAX_WORDS; j++) ++ { ++ regno_save_mode[i][j] = HARD_REGNO_CALLER_SAVE_MODE (i, j, VOIDmode); ++ if (regno_save_mode[i][j] == VOIDmode && j == 1) ++ CLEAR_HARD_REG_BIT (savable_regs, i); ++ } + + /* The following code tries to approximate the conditions under which + we can easily save and restore a register without scratch registers or +@@ -275,11 +264,7 @@ init_caller_save (void) + { + regno_save_mode[i][j] = VOIDmode; + if (j == 1) +- { +- SET_HARD_REG_BIT (call_fixed_reg_set, i); +- if (call_used_regs[i]) +- SET_HARD_REG_BIT (no_caller_save_reg_set, i); +- } ++ CLEAR_HARD_REG_BIT (savable_regs, i); + } + } + +@@ -442,7 +427,9 @@ setup_save_areas (void) + freq = REG_FREQ_FROM_BB (BLOCK_FOR_INSN (insn)); + REG_SET_TO_HARD_REG_SET (hard_regs_to_save, + &chain->live_throughout); +- get_call_reg_set_usage (insn, &used_regs, call_used_reg_set); ++ used_regs = insn_callee_abi (insn).full_reg_clobbers (); ++ /* ??? This preserves traditional behavior; it might not be needed. */ ++ used_regs |= fixed_reg_set; + + /* Record all registers set in this call insn. These don't + need to be saved. N.B. the call insn might set a subreg +@@ -450,14 +437,13 @@ setup_save_areas (void) + live during the call, but the subreg that is set + isn't. */ + CLEAR_HARD_REG_SET (this_insn_sets); +- note_stores (PATTERN (insn), mark_set_regs, &this_insn_sets); ++ note_stores (insn, mark_set_regs, &this_insn_sets); + /* Sibcalls are considered to set the return value. */ + if (SIBLING_CALL_P (insn) && crtl->return_rtx) + mark_set_regs (crtl->return_rtx, NULL_RTX, &this_insn_sets); + +- AND_COMPL_HARD_REG_SET (used_regs, call_fixed_reg_set); +- AND_COMPL_HARD_REG_SET (used_regs, this_insn_sets); +- AND_HARD_REG_SET (hard_regs_to_save, used_regs); ++ used_regs &= ~(fixed_reg_set | this_insn_sets); ++ hard_regs_to_save &= used_regs & savable_regs; + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + if (TEST_HARD_REG_BIT (hard_regs_to_save, regno)) + { +@@ -526,7 +512,10 @@ setup_save_areas (void) + + REG_SET_TO_HARD_REG_SET (hard_regs_to_save, + &chain->live_throughout); +- get_call_reg_set_usage (insn, &used_regs, call_used_reg_set); ++ used_regs = insn_callee_abi (insn).full_reg_clobbers (); ++ /* ??? This preserves traditional behavior; it might not ++ be needed. */ ++ used_regs |= fixed_reg_set; + + /* Record all registers set in this call insn. These don't + need to be saved. N.B. the call insn might set a subreg +@@ -534,15 +523,14 @@ setup_save_areas (void) + live during the call, but the subreg that is set + isn't. */ + CLEAR_HARD_REG_SET (this_insn_sets); +- note_stores (PATTERN (insn), mark_set_regs, &this_insn_sets); ++ note_stores (insn, mark_set_regs, &this_insn_sets); + /* Sibcalls are considered to set the return value, + compare df-scan.c:df_get_call_refs. */ + if (SIBLING_CALL_P (insn) && crtl->return_rtx) + mark_set_regs (crtl->return_rtx, NULL_RTX, &this_insn_sets); + +- AND_COMPL_HARD_REG_SET (used_regs, call_fixed_reg_set); +- AND_COMPL_HARD_REG_SET (used_regs, this_insn_sets); +- AND_HARD_REG_SET (hard_regs_to_save, used_regs); ++ used_regs &= ~(fixed_reg_set | this_insn_sets); ++ hard_regs_to_save &= used_regs & savable_regs; + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + if (TEST_HARD_REG_BIT (hard_regs_to_save, regno)) + { +@@ -775,13 +763,13 @@ save_call_clobbered_regs (void) + + if (code == JUMP_INSN) + /* Restore all registers if this is a JUMP_INSN. */ +- COPY_HARD_REG_SET (referenced_regs, hard_regs_saved); ++ referenced_regs = hard_regs_saved; + else + { + CLEAR_HARD_REG_SET (referenced_regs); + mark_referenced_regs (&PATTERN (insn), + mark_reg_as_referenced, NULL); +- AND_HARD_REG_SET (referenced_regs, hard_regs_saved); ++ referenced_regs &= hard_regs_saved; + } + + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) +@@ -795,8 +783,8 @@ save_call_clobbered_regs (void) + be live across the call, while the other is set + afterwards. */ + CLEAR_HARD_REG_SET (this_insn_sets); +- note_stores (PATTERN (insn), mark_set_regs, &this_insn_sets); +- AND_COMPL_HARD_REG_SET (hard_regs_saved, this_insn_sets); ++ note_stores (insn, mark_set_regs, &this_insn_sets); ++ hard_regs_saved &= ~this_insn_sets; + } + + if (code == CALL_INSN +@@ -849,15 +837,18 @@ save_call_clobbered_regs (void) + multi-hard-reg pseudo; then the pseudo is considered live + during the call, but the subreg that is set isn't. */ + CLEAR_HARD_REG_SET (this_insn_sets); +- note_stores (PATTERN (insn), mark_set_regs, &this_insn_sets); ++ note_stores (insn, mark_set_regs, &this_insn_sets); + + /* Compute which hard regs must be saved before this call. */ +- AND_COMPL_HARD_REG_SET (hard_regs_to_save, call_fixed_reg_set); +- AND_COMPL_HARD_REG_SET (hard_regs_to_save, this_insn_sets); +- AND_COMPL_HARD_REG_SET (hard_regs_to_save, hard_regs_saved); +- get_call_reg_set_usage (insn, &call_def_reg_set, +- call_used_reg_set); +- AND_HARD_REG_SET (hard_regs_to_save, call_def_reg_set); ++ hard_regs_to_save &= ~(fixed_reg_set ++ | this_insn_sets ++ | hard_regs_saved); ++ hard_regs_to_save &= savable_regs; ++ call_def_reg_set = insn_callee_abi (insn).full_reg_clobbers (); ++ /* ??? This preserves traditional behavior; it might not ++ be needed. */ ++ call_def_reg_set |= fixed_reg_set; ++ hard_regs_to_save &= call_def_reg_set; + + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + if (TEST_HARD_REG_BIT (hard_regs_to_save, regno)) +@@ -872,7 +863,8 @@ save_call_clobbered_regs (void) + + if (cheap + && HARD_REGISTER_P (cheap) +- && TEST_HARD_REG_BIT (call_used_reg_set, REGNO (cheap))) ++ && TEST_HARD_REG_BIT (call_used_or_fixed_regs, ++ REGNO (cheap))) + { + rtx dest, newpat; + rtx pat = PATTERN (insn); +@@ -1414,8 +1406,7 @@ insert_one_insn (struct insn_chain *chain, int before_p, int code, rtx pat) + /* Registers that are set in CHAIN->INSN live in the new insn. + (Unless there is a REG_UNUSED note for them, but we don't + look for them here.) */ +- note_stores (PATTERN (chain->insn), add_stored_regs, +- &new_chain->live_throughout); ++ note_stores (chain->insn, add_stored_regs, &new_chain->live_throughout); + CLEAR_REG_SET (&new_chain->dead_or_set); + if (chain->insn == BB_END (BASIC_BLOCK_FOR_FN (cfun, chain->block))) + BB_END (BASIC_BLOCK_FOR_FN (cfun, chain->block)) = new_chain->insn; +diff --git a/gcc/calls.c b/gcc/calls.c +index 567959956..2638752ad 100644 +--- a/gcc/calls.c ++++ b/gcc/calls.c +@@ -346,7 +346,8 @@ prepare_call_address (tree fndecl_or_type, rtx funexp, rtx static_chain_value, + It is zero if this call doesn't want a structure value. + + NEXT_ARG_REG is the rtx that results from executing +- targetm.calls.function_arg (&args_so_far, VOIDmode, void_type_node, true) ++ targetm.calls.function_arg (&args_so_far, ++ function_arg_info::end_marker ()); + just after all the args have had their registers assigned. + This could be whatever you like, but normally it is the first + arg-register beyond those used for args in this call, +@@ -897,13 +898,12 @@ call_expr_flags (const_tree t) + return flags; + } + +-/* Return true if TYPE should be passed by invisible reference. */ ++/* Return true if ARG should be passed by invisible reference. */ + + bool +-pass_by_reference (CUMULATIVE_ARGS *ca, machine_mode mode, +- tree type, bool named_arg) ++pass_by_reference (CUMULATIVE_ARGS *ca, function_arg_info arg) + { +- if (type) ++ if (tree type = arg.type) + { + /* If this type contains non-trivial constructors, then it is + forbidden for the middle-end to create any new copies. */ +@@ -911,33 +911,55 @@ pass_by_reference (CUMULATIVE_ARGS *ca, machine_mode mode, + return true; + + /* GCC post 3.4 passes *all* variable sized types by reference. */ +- if (!TYPE_SIZE (type) || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST) ++ if (!TYPE_SIZE (type) || !poly_int_tree_p (TYPE_SIZE (type))) + return true; + + /* If a record type should be passed the same as its first (and only) + member, use the type and mode of that member. */ + if (TREE_CODE (type) == RECORD_TYPE && TYPE_TRANSPARENT_AGGR (type)) + { +- type = TREE_TYPE (first_field (type)); +- mode = TYPE_MODE (type); ++ arg.type = TREE_TYPE (first_field (type)); ++ arg.mode = TYPE_MODE (arg.type); + } + } + +- return targetm.calls.pass_by_reference (pack_cumulative_args (ca), mode, +- type, named_arg); ++ return targetm.calls.pass_by_reference (pack_cumulative_args (ca), arg); + } + +-/* Return true if TYPE, which is passed by reference, should be callee ++/* Return true if TYPE should be passed by reference when passed to ++ the "..." arguments of a function. */ ++ ++bool ++pass_va_arg_by_reference (tree type) ++{ ++ return pass_by_reference (NULL, function_arg_info (type, /*named=*/false)); ++} ++ ++/* Decide whether ARG, which occurs in the state described by CA, ++ should be passed by reference. Return true if so and update ++ ARG accordingly. */ ++ ++bool ++apply_pass_by_reference_rules (CUMULATIVE_ARGS *ca, function_arg_info &arg) ++{ ++ if (pass_by_reference (ca, arg)) ++ { ++ arg.type = build_pointer_type (arg.type); ++ arg.mode = TYPE_MODE (arg.type); ++ return true; ++ } ++ return false; ++} ++ ++/* Return true if ARG, which is passed by reference, should be callee + copied instead of caller copied. */ + + bool +-reference_callee_copied (CUMULATIVE_ARGS *ca, machine_mode mode, +- tree type, bool named_arg) ++reference_callee_copied (CUMULATIVE_ARGS *ca, const function_arg_info &arg) + { +- if (type && TREE_ADDRESSABLE (type)) ++ if (arg.type && TREE_ADDRESSABLE (arg.type)) + return false; +- return targetm.calls.callee_copies (pack_cumulative_args (ca), mode, type, +- named_arg); ++ return targetm.calls.callee_copies (pack_cumulative_args (ca), arg); + } + + +@@ -1350,7 +1372,6 @@ maybe_warn_alloc_args_overflow (tree fn, tree exp, tree args[2], int idx[2]) + location_t loc = EXPR_LOCATION (exp); + + tree fntype = fn ? TREE_TYPE (fn) : TREE_TYPE (TREE_TYPE (exp)); +- built_in_function fncode = fn ? DECL_FUNCTION_CODE (fn) : BUILT_IN_NONE; + bool warned = false; + + /* Validate each argument individually. */ +@@ -1376,11 +1397,10 @@ maybe_warn_alloc_args_overflow (tree fn, tree exp, tree args[2], int idx[2]) + friends. + Also avoid issuing the warning for calls to function named + "alloca". */ +- if ((fncode == BUILT_IN_ALLOCA +- && IDENTIFIER_LENGTH (DECL_NAME (fn)) != 6) +- || (fncode != BUILT_IN_ALLOCA +- && !lookup_attribute ("returns_nonnull", +- TYPE_ATTRIBUTES (fntype)))) ++ if (fn && fndecl_built_in_p (fn, BUILT_IN_ALLOCA) ++ ? IDENTIFIER_LENGTH (DECL_NAME (fn)) != 6 ++ : !lookup_attribute ("returns_nonnull", ++ TYPE_ATTRIBUTES (fntype))) + warned = warning_at (loc, OPT_Walloc_zero, + "%Kargument %i value is zero", + exp, idx[i] + 1); +@@ -1395,7 +1415,7 @@ maybe_warn_alloc_args_overflow (tree fn, tree exp, tree args[2], int idx[2]) + && fn + && !args[1] + && lang_GNU_CXX () +- && DECL_IS_OPERATOR_NEW (fn) ++ && DECL_IS_OPERATOR_NEW_P (fn) + && integer_all_onesp (args[i])) + continue; + +@@ -1989,15 +2009,13 @@ initialize_argument_information (int num_actuals ATTRIBUTE_UNUSED, + with those made by function.c. */ + + /* See if this argument should be passed by invisible reference. */ +- if (pass_by_reference (args_so_far_pnt, TYPE_MODE (type), +- type, argpos < n_named_args)) ++ function_arg_info orig_arg (type, argpos < n_named_args); ++ if (pass_by_reference (args_so_far_pnt, orig_arg)) + { + bool callee_copies; + tree base = NULL_TREE; + +- callee_copies +- = reference_callee_copied (args_so_far_pnt, TYPE_MODE (type), +- type, argpos < n_named_args); ++ callee_copies = reference_callee_copied (args_so_far_pnt, orig_arg); + + /* If we're compiling a thunk, pass through invisible references + instead of making a copy. */ +@@ -2118,8 +2136,8 @@ initialize_argument_information (int num_actuals ATTRIBUTE_UNUSED, + + targetm.calls.warn_parameter_passing_abi (args_so_far, type); + +- args[i].reg = targetm.calls.function_arg (args_so_far, mode, type, +- argpos < n_named_args); ++ function_arg_info arg (type, mode, argpos < n_named_args); ++ args[i].reg = targetm.calls.function_arg (args_so_far, arg); + + if (args[i].reg && CONST_INT_P (args[i].reg)) + args[i].reg = NULL; +@@ -2129,17 +2147,14 @@ initialize_argument_information (int num_actuals ATTRIBUTE_UNUSED, + arguments have to go into the incoming registers. */ + if (targetm.calls.function_incoming_arg != targetm.calls.function_arg) + args[i].tail_call_reg +- = targetm.calls.function_incoming_arg (args_so_far, mode, type, +- argpos < n_named_args); ++ = targetm.calls.function_incoming_arg (args_so_far, arg); + else + args[i].tail_call_reg = args[i].reg; + + if (args[i].reg) +- args[i].partial +- = targetm.calls.arg_partial_bytes (args_so_far, mode, type, +- argpos < n_named_args); ++ args[i].partial = targetm.calls.arg_partial_bytes (args_so_far, arg); + +- args[i].pass_on_stack = targetm.calls.must_pass_in_stack (mode, type); ++ args[i].pass_on_stack = targetm.calls.must_pass_in_stack (arg); + + /* If FUNCTION_ARG returned a (parallel [(expr_list (nil) ...) ...]), + it means that we are to pass this arg in the register(s) designated +@@ -2188,8 +2203,13 @@ initialize_argument_information (int num_actuals ATTRIBUTE_UNUSED, + /* Increment ARGS_SO_FAR, which has info about which arg-registers + have been used, etc. */ + +- targetm.calls.function_arg_advance (args_so_far, TYPE_MODE (type), +- type, argpos < n_named_args); ++ /* ??? Traditionally we've passed TYPE_MODE here, instead of the ++ promoted_mode used for function_arg above. However, the ++ corresponding handling of incoming arguments in function.c ++ does pass the promoted mode. */ ++ function_arg_info arg_to_skip (type, TYPE_MODE (type), ++ argpos < n_named_args); ++ targetm.calls.function_arg_advance (args_so_far, arg_to_skip); + + /* Store argument values for functions decorated with attribute + alloc_size. */ +@@ -4222,14 +4242,11 @@ expand_call (tree exp, rtx target, int ignore) + /* Set up next argument register. For sibling calls on machines + with register windows this should be the incoming register. */ + if (pass == 0) +- next_arg_reg = targetm.calls.function_incoming_arg (args_so_far, +- VOIDmode, +- void_type_node, +- true); ++ next_arg_reg = targetm.calls.function_incoming_arg ++ (args_so_far, function_arg_info::end_marker ()); + else +- next_arg_reg = targetm.calls.function_arg (args_so_far, +- VOIDmode, void_type_node, +- true); ++ next_arg_reg = targetm.calls.function_arg ++ (args_so_far, function_arg_info::end_marker ()); + + if (pass == 1 && (return_flags & ERF_RETURNS_ARG)) + { +@@ -4846,10 +4863,9 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value, + argvec[count].mode = Pmode; + argvec[count].partial = 0; + +- argvec[count].reg = targetm.calls.function_arg (args_so_far, +- Pmode, NULL_TREE, true); +- gcc_assert (targetm.calls.arg_partial_bytes (args_so_far, Pmode, +- NULL_TREE, 1) == 0); ++ function_arg_info ptr_arg (Pmode, /*named=*/true); ++ argvec[count].reg = targetm.calls.function_arg (args_so_far, ptr_arg); ++ gcc_assert (targetm.calls.arg_partial_bytes (args_so_far, ptr_arg) == 0); + + locate_and_pad_parm (Pmode, NULL_TREE, + #ifdef STACK_PARMS_IN_REG_PARM_AREA +@@ -4864,7 +4880,7 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value, + || reg_parm_stack_space > 0) + args_size.constant += argvec[count].locate.size.constant; + +- targetm.calls.function_arg_advance (args_so_far, Pmode, (tree) 0, true); ++ targetm.calls.function_arg_advance (args_so_far, ptr_arg); + + count++; + } +@@ -4885,11 +4901,11 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value, + && !(CONSTANT_P (val) && targetm.legitimate_constant_p (mode, val))) + val = force_operand (val, NULL_RTX); + +- if (pass_by_reference (&args_so_far_v, mode, NULL_TREE, 1)) ++ function_arg_info orig_arg (mode, /*named=*/true); ++ if (pass_by_reference (&args_so_far_v, orig_arg)) + { + rtx slot; +- int must_copy +- = !reference_callee_copied (&args_so_far_v, mode, NULL_TREE, 1); ++ int must_copy = !reference_callee_copied (&args_so_far_v, orig_arg); + + /* If this was a CONST function, it is now PURE since it now + reads memory. */ +@@ -4927,13 +4943,13 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value, + } + + mode = promote_function_mode (NULL_TREE, mode, &unsigned_p, NULL_TREE, 0); ++ function_arg_info arg (mode, /*named=*/true); + argvec[count].mode = mode; + argvec[count].value = convert_modes (mode, GET_MODE (val), val, unsigned_p); +- argvec[count].reg = targetm.calls.function_arg (args_so_far, mode, +- NULL_TREE, true); ++ argvec[count].reg = targetm.calls.function_arg (args_so_far, arg); + + argvec[count].partial +- = targetm.calls.arg_partial_bytes (args_so_far, mode, NULL_TREE, 1); ++ = targetm.calls.arg_partial_bytes (args_so_far, arg); + + if (argvec[count].reg == 0 + || argvec[count].partial != 0 +@@ -4959,7 +4975,7 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value, + known_le (GET_MODE_SIZE (mode), UNITS_PER_WORD)); + #endif + +- targetm.calls.function_arg_advance (args_so_far, mode, (tree) 0, true); ++ targetm.calls.function_arg_advance (args_so_far, arg); + } + + /* If this machine requires an external definition for library +@@ -5302,7 +5318,7 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value, + original_args_size.constant, args_size.constant, + struct_value_size, + targetm.calls.function_arg (args_so_far, +- VOIDmode, void_type_node, true), ++ function_arg_info::end_marker ()), + valreg, + old_inhibit_defer_pop + 1, call_fusage, flags, args_so_far); + +@@ -5815,22 +5831,21 @@ store_one_arg (struct arg_data *arg, rtx argblock, int flags, + return sibcall_failure; + } + +-/* Nonzero if we do not know how to pass TYPE solely in registers. */ ++/* Nonzero if we do not know how to pass ARG solely in registers. */ + + bool +-must_pass_in_stack_var_size (machine_mode mode ATTRIBUTE_UNUSED, +- const_tree type) ++must_pass_in_stack_var_size (const function_arg_info &arg) + { +- if (!type) ++ if (!arg.type) + return false; + + /* If the type has variable size... */ +- if (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST) ++ if (!poly_int_tree_p (TYPE_SIZE (arg.type))) + return true; + + /* If the type is marked as addressable (it is required + to be constructed into the stack)... */ +- if (TREE_ADDRESSABLE (type)) ++ if (TREE_ADDRESSABLE (arg.type)) + return true; + + return false; +@@ -5841,33 +5856,43 @@ must_pass_in_stack_var_size (machine_mode mode ATTRIBUTE_UNUSED, + /* ??? Should be able to merge these two by examining BLOCK_REG_PADDING. */ + + bool +-must_pass_in_stack_var_size_or_pad (machine_mode mode, const_tree type) ++must_pass_in_stack_var_size_or_pad (const function_arg_info &arg) + { +- if (!type) ++ if (!arg.type) + return false; + + /* If the type has variable size... */ +- if (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST) ++ if (TREE_CODE (TYPE_SIZE (arg.type)) != INTEGER_CST) + return true; + + /* If the type is marked as addressable (it is required + to be constructed into the stack)... */ +- if (TREE_ADDRESSABLE (type)) ++ if (TREE_ADDRESSABLE (arg.type)) + return true; + +- if (TYPE_EMPTY_P (type)) ++ if (TYPE_EMPTY_P (arg.type)) + return false; + + /* If the padding and mode of the type is such that a copy into + a register would put it into the wrong part of the register. */ +- if (mode == BLKmode +- && int_size_in_bytes (type) % (PARM_BOUNDARY / BITS_PER_UNIT) +- && (targetm.calls.function_arg_padding (mode, type) ++ if (arg.mode == BLKmode ++ && int_size_in_bytes (arg.type) % (PARM_BOUNDARY / BITS_PER_UNIT) ++ && (targetm.calls.function_arg_padding (arg.mode, arg.type) + == (BYTES_BIG_ENDIAN ? PAD_UPWARD : PAD_DOWNWARD))) + return true; + + return false; + } + ++/* Return true if TYPE must be passed on the stack when passed to ++ the "..." arguments of a function. */ ++ ++bool ++must_pass_va_arg_in_stack (tree type) ++{ ++ function_arg_info arg (type, /*named=*/false); ++ return targetm.calls.must_pass_in_stack (arg); ++} ++ + /* Tell the garbage collector about GTY markers in this source file. */ + #include "gt-calls.h" +diff --git a/gcc/calls.h b/gcc/calls.h +index 128bb5130..01ab3905a 100644 +--- a/gcc/calls.h ++++ b/gcc/calls.h +@@ -20,23 +20,108 @@ along with GCC; see the file COPYING3. If not see + #ifndef GCC_CALLS_H + #define GCC_CALLS_H + ++/* Describes a function argument. ++ ++ Each argument conceptually has a gimple-level type. Usually this type ++ is available directly as a tree via the TYPE field, but when calling ++ libgcc support functions it might instead be inferred from a mode, ++ in which case the type isn't available directly. ++ ++ This gimple-level type might go through promotion before being passed to ++ the target function. Depending on the context, the MODE field is either ++ the mode of the gimple-level type (whether explicitly given or not) ++ or the mode after promotion has been performed. */ ++class function_arg_info ++{ ++public: ++ function_arg_info () : type (NULL_TREE), mode (VOIDmode), named (false) {} ++ ++ /* Initialize an argument of mode MODE, either before or after promotion. */ ++ function_arg_info (machine_mode mode, bool named) ++ : type (NULL_TREE), mode (mode), named (named) ++ {} ++ ++ /* Initialize an unpromoted argument of type TYPE. */ ++ function_arg_info (tree type, bool named) ++ : type (type), mode (TYPE_MODE (type)), named (named) ++ {} ++ ++ /* Initialize an argument with explicit properties. */ ++ function_arg_info (tree type, machine_mode mode, bool named) ++ : type (type), mode (mode), named (named) ++ {} ++ ++ /* Return true if the gimple-level type is an aggregate. */ ++ bool aggregate_type_p () const { return type && AGGREGATE_TYPE_P (type); } ++ ++ /* Return the size of the gimple-level type, or -1 if the size is ++ variable or otherwise not representable as a poly_int64. ++ ++ Use this function when MODE is the mode of the type before promotion, ++ or in any context if the target never promotes function arguments. */ ++ poly_int64 type_size_in_bytes () const ++ { ++ if (type) ++ return int_size_in_bytes (type); ++ return GET_MODE_SIZE (mode); ++ } ++ ++ /* Return the size of the argument after promotion, or -1 if the size ++ is variable or otherwise not representable as a poly_int64. ++ ++ Use this function when MODE is the mode of the type after promotion. */ ++ poly_int64 promoted_size_in_bytes () const ++ { ++ if (mode == BLKmode) ++ return int_size_in_bytes (type); ++ return GET_MODE_SIZE (mode); ++ } ++ ++ /* True if the argument represents the end of the argument list, ++ as returned by end_marker (). */ ++ bool end_marker_p () const { return mode == VOIDmode; } ++ ++ /* Return a function_arg_info that represents the end of the ++ argument list. */ ++ static function_arg_info end_marker () ++ { ++ return function_arg_info (void_type_node, /*named=*/true); ++ } ++ ++ /* The type of the argument, or null if not known (which is true for ++ libgcc support functions). */ ++ tree type; ++ ++ /* The mode of the argument. Depending on context, this might be ++ the mode of the argument type or the mode after promotion. */ ++ machine_mode mode; ++ ++ /* True if the argument is treated as a named argument, false if it is ++ treated as an unnamed variadic argument (i.e. one passed through ++ "..."). See also TARGET_STRICT_ARGUMENT_NAMING. */ ++ unsigned int named : 1; ++}; ++ + extern int flags_from_decl_or_type (const_tree); + extern int call_expr_flags (const_tree); + extern int setjmp_call_p (const_tree); + extern bool gimple_maybe_alloca_call_p (const gimple *); + extern bool gimple_alloca_call_p (const gimple *); + extern bool alloca_call_p (const_tree); +-extern bool must_pass_in_stack_var_size (machine_mode, const_tree); +-extern bool must_pass_in_stack_var_size_or_pad (machine_mode, const_tree); ++extern bool must_pass_in_stack_var_size (const function_arg_info &); ++extern bool must_pass_in_stack_var_size_or_pad (const function_arg_info &); ++extern bool must_pass_va_arg_in_stack (tree); + extern rtx prepare_call_address (tree, rtx, rtx, rtx *, int, int); + extern bool shift_return_value (machine_mode, bool, rtx); + extern rtx expand_call (tree, rtx, int); + extern void fixup_tail_calls (void); + +-extern bool pass_by_reference (CUMULATIVE_ARGS *, machine_mode, +- tree, bool); +-extern bool reference_callee_copied (CUMULATIVE_ARGS *, machine_mode, +- tree, bool); ++extern bool pass_by_reference (CUMULATIVE_ARGS *, function_arg_info); ++extern bool pass_va_arg_by_reference (tree); ++extern bool apply_pass_by_reference_rules (CUMULATIVE_ARGS *, ++ function_arg_info &); ++extern bool reference_callee_copied (CUMULATIVE_ARGS *, ++ const function_arg_info &); + extern void maybe_warn_alloc_args_overflow (tree, tree, tree[2], int[2]); + extern tree get_attr_nonstring_decl (tree, tree * = NULL); + extern void maybe_warn_nonstring_arg (tree, tree); +diff --git a/gcc/cfgcleanup.c b/gcc/cfgcleanup.c +index 8c464ec79..ff7f014da 100644 +--- a/gcc/cfgcleanup.c ++++ b/gcc/cfgcleanup.c +@@ -54,6 +54,7 @@ along with GCC; see the file COPYING3. If not see + #include "dbgcnt.h" + #include "rtl-iter.h" + #include "regs.h" ++#include "function-abi.h" + + #define FORWARDER_BLOCK_P(BB) ((BB)->flags & BB_FORWARDER_BLOCK) + +@@ -1230,12 +1231,13 @@ old_insns_match_p (int mode ATTRIBUTE_UNUSED, rtx_insn *i1, rtx_insn *i2) + } + } + +- HARD_REG_SET i1_used, i2_used; ++ HARD_REG_SET i1_used = insn_callee_abi (i1).full_reg_clobbers (); ++ HARD_REG_SET i2_used = insn_callee_abi (i2).full_reg_clobbers (); ++ /* ??? This preserves traditional behavior; it might not be needed. */ ++ i1_used |= fixed_reg_set; ++ i2_used |= fixed_reg_set; + +- get_call_reg_set_usage (i1, &i1_used, call_used_reg_set); +- get_call_reg_set_usage (i2, &i2_used, call_used_reg_set); +- +- if (!hard_reg_set_equal_p (i1_used, i2_used)) ++ if (i1_used != i2_used) + return dir_none; + } + +@@ -1269,7 +1271,7 @@ old_insns_match_p (int mode ATTRIBUTE_UNUSED, rtx_insn *i1, rtx_insn *i2) + if (REG_NOTE_KIND (note) == REG_DEAD && STACK_REG_P (XEXP (note, 0))) + SET_HARD_REG_BIT (i2_regset, REGNO (XEXP (note, 0))); + +- if (!hard_reg_set_equal_p (i1_regset, i2_regset)) ++ if (i1_regset != i2_regset) + return dir_none; + } + #endif +diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c +index 4ae8e3b32..218414b39 100644 +--- a/gcc/cfgexpand.c ++++ b/gcc/cfgexpand.c +@@ -2874,6 +2874,15 @@ asm_clobber_reg_is_valid (int regno, int nregs, const char *regname) + error ("PIC register clobbered by %qs in %", regname); + is_valid = false; + } ++ else if (!in_hard_reg_set_p ++ (accessible_reg_set, reg_raw_mode[regno], regno)) ++ { ++ /* ??? Diagnose during gimplification? */ ++ error ("the register %qs cannot be clobbered in %" ++ " for the current target", regname); ++ is_valid = false; ++ } ++ + /* Clobbering the stack pointer register is deprecated. GCC expects + the value of the stack pointer after an asm statement to be the same + as it was before, so no asm can validly clobber the stack pointer in +@@ -3865,7 +3874,6 @@ expand_gimple_stmt (gimple *stmt) + /* If we want exceptions for non-call insns, any + may_trap_p instruction may throw. */ + && GET_CODE (PATTERN (insn)) != CLOBBER +- && GET_CODE (PATTERN (insn)) != CLOBBER_HIGH + && GET_CODE (PATTERN (insn)) != USE + && insn_could_throw_p (insn)) + make_reg_eh_region_note (insn, 0, lp_nr); +diff --git a/gcc/cfgloopanal.c b/gcc/cfgloopanal.c +index 6dbe96f9d..3388da7dd 100644 +--- a/gcc/cfgloopanal.c ++++ b/gcc/cfgloopanal.c +@@ -353,7 +353,7 @@ init_set_costs (void) + && !fixed_regs[i]) + { + target_avail_regs++; +- if (call_used_regs[i]) ++ if (call_used_or_fixed_reg_p (i)) + target_clobbered_regs++; + } + +diff --git a/gcc/cgraph.c b/gcc/cgraph.c +index 62f1afa2a..9dca43031 100644 +--- a/gcc/cgraph.c ++++ b/gcc/cgraph.c +@@ -1883,7 +1883,7 @@ cgraph_node::local_info (tree decl) + /* Return local info for the compiled function. */ + + cgraph_rtl_info * +-cgraph_node::rtl_info (tree decl) ++cgraph_node::rtl_info (const_tree decl) + { + gcc_assert (TREE_CODE (decl) == FUNCTION_DECL); + cgraph_node *node = get (decl); +@@ -1898,7 +1898,10 @@ cgraph_node::rtl_info (tree decl) + return NULL; + /* Allocate if it doesn't exist. */ + if (node->rtl == NULL) +- node->rtl = ggc_cleared_alloc (); ++ { ++ node->rtl = ggc_cleared_alloc (); ++ node->rtl->function_used_regs = reg_class_contents[ALL_REGS]; ++ } + return node->rtl; + } + +diff --git a/gcc/cgraph.h b/gcc/cgraph.h +index 10d1a2c6f..ad6720a4b 100644 +--- a/gcc/cgraph.h ++++ b/gcc/cgraph.h +@@ -1347,7 +1347,7 @@ public: + static cgraph_local_info *local_info (tree decl); + + /* Return local info for the compiled function. */ +- static struct cgraph_rtl_info *rtl_info (tree); ++ static struct cgraph_rtl_info *rtl_info (const_tree); + + /* Return the cgraph node that has ASMNAME for its DECL_ASSEMBLER_NAME. + Return NULL if there's no such node. */ +diff --git a/gcc/cgraphclones.c b/gcc/cgraphclones.c +index cd3f585bd..43423234b 100644 +--- a/gcc/cgraphclones.c ++++ b/gcc/cgraphclones.c +@@ -225,10 +225,7 @@ build_function_decl_skip_args (tree orig_decl, bitmap args_to_skip, + if (fndecl_built_in_p (new_decl) + && args_to_skip + && !bitmap_empty_p (args_to_skip)) +- { +- DECL_BUILT_IN_CLASS (new_decl) = NOT_BUILT_IN; +- DECL_FUNCTION_CODE (new_decl) = (enum built_in_function) 0; +- } ++ set_decl_built_in_function (new_decl, NOT_BUILT_IN, 0); + /* The FE might have information and assumptions about the other + arguments. */ + DECL_LANG_SPECIFIC (new_decl) = NULL; +@@ -415,7 +412,7 @@ dump_callgraph_transformation (const cgraph_node *original, + + If the new node is being inlined into another one, NEW_INLINED_TO should be + the outline function the new one is (even indirectly) inlined to. All hooks +- will see this in node's global.inlined_to, when invoked. Can be NULL if the ++ will see this in node's inlined_to, when invoked. Can be NULL if the + node is not inlined. */ + + cgraph_node * +@@ -1056,7 +1053,7 @@ cgraph_node::create_version_clone_with_body + location_t saved_loc = input_location; + tree v = TREE_VALUE (target_attributes); + input_location = DECL_SOURCE_LOCATION (new_decl); +- bool r = targetm.target_option.valid_attribute_p (new_decl, NULL, v, 0); ++ bool r = targetm.target_option.valid_attribute_p (new_decl, NULL, v, 1); + input_location = saved_loc; + if (!r) + return NULL; +diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c +index dee6becc7..ddf298583 100644 +--- a/gcc/cgraphunit.c ++++ b/gcc/cgraphunit.c +@@ -1793,7 +1793,6 @@ cgraph_node::expand_thunk (bool output_asm_thunks, bool force_gimple_thunk) + && targetm.asm_out.can_output_mi_thunk (thunk_fndecl, fixed_offset, + virtual_value, alias)) + { +- const char *fnname; + tree fn_block; + tree restype = TREE_TYPE (TREE_TYPE (thunk_fndecl)); + +@@ -1817,7 +1816,6 @@ cgraph_node::expand_thunk (bool output_asm_thunks, bool force_gimple_thunk) + = build_decl (DECL_SOURCE_LOCATION (thunk_fndecl), + RESULT_DECL, 0, restype); + DECL_CONTEXT (DECL_RESULT (thunk_fndecl)) = thunk_fndecl; +- fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl)); + + /* The back end expects DECL_INITIAL to contain a BLOCK, so we + create one. */ +@@ -1831,12 +1829,10 @@ cgraph_node::expand_thunk (bool output_asm_thunks, bool force_gimple_thunk) + insn_locations_init (); + set_curr_insn_location (DECL_SOURCE_LOCATION (thunk_fndecl)); + prologue_location = curr_insn_location (); +- assemble_start_function (thunk_fndecl, fnname); + + targetm.asm_out.output_mi_thunk (asm_out_file, thunk_fndecl, + fixed_offset, virtual_value, alias); + +- assemble_end_function (thunk_fndecl, fnname); + insn_locations_finalize (); + init_insn_lengths (); + free_after_compilation (cfun); +diff --git a/gcc/cif-code.def b/gcc/cif-code.def +index 3356377a1..a154f24f1 100644 +--- a/gcc/cif-code.def ++++ b/gcc/cif-code.def +@@ -70,8 +70,12 @@ DEFCIFCODE(LARGE_STACK_FRAME_GROWTH_LIMIT, CIF_FINAL_NORMAL, + N_("--param large-stack-frame-growth limit reached")) + DEFCIFCODE(MAX_INLINE_INSNS_SINGLE_LIMIT, CIF_FINAL_NORMAL, + N_("--param max-inline-insns-single limit reached")) ++DEFCIFCODE(MAX_INLINE_INSNS_SINGLE_O2_LIMIT, CIF_FINAL_NORMAL, ++ N_("--param max-inline-insns-single-O2 limit reached")) + DEFCIFCODE(MAX_INLINE_INSNS_AUTO_LIMIT, CIF_FINAL_NORMAL, + N_("--param max-inline-insns-auto limit reached")) ++DEFCIFCODE(MAX_INLINE_INSNS_AUTO_O2_LIMIT, CIF_FINAL_NORMAL, ++ N_("--param max-inline-insns-auto-O2 limit reached")) + DEFCIFCODE(INLINE_UNIT_GROWTH_LIMIT, CIF_FINAL_NORMAL, + N_("--param inline-unit-growth limit reached")) + +@@ -83,6 +87,10 @@ DEFCIFCODE(RECURSIVE_INLINING, CIF_FINAL_NORMAL, + DEFCIFCODE(UNLIKELY_CALL, CIF_FINAL_NORMAL, + N_("call is unlikely and code size would grow")) + ++/* Call is considered never executed. */ ++DEFCIFCODE(NEVER_CALL, CIF_FINAL_NORMAL, ++ N_("call is considered never executed and code size would grow")) ++ + /* Function is not declared as inline. */ + DEFCIFCODE(NOT_DECLARED_INLINED, CIF_FINAL_NORMAL, + N_("function not declared inline and code size would grow")) +diff --git a/gcc/combine-stack-adj.c b/gcc/combine-stack-adj.c +index 3638a1b10..d14d59abc 100644 +--- a/gcc/combine-stack-adj.c ++++ b/gcc/combine-stack-adj.c +@@ -133,7 +133,6 @@ single_set_for_csa (rtx_insn *insn) + && SET_SRC (this_rtx) == SET_DEST (this_rtx)) + ; + else if (GET_CODE (this_rtx) != CLOBBER +- && GET_CODE (this_rtx) != CLOBBER_HIGH + && GET_CODE (this_rtx) != USE) + return NULL_RTX; + } +diff --git a/gcc/combine.c b/gcc/combine.c +index b9d674c96..a425f0ca6 100644 +--- a/gcc/combine.c ++++ b/gcc/combine.c +@@ -571,7 +571,6 @@ find_single_use_1 (rtx dest, rtx *loc) + case SYMBOL_REF: + CASE_CONST_ANY: + case CLOBBER: +- case CLOBBER_HIGH: + return 0; + + case SET: +@@ -1224,8 +1223,7 @@ combine_instructions (rtx_insn *f, unsigned int nregs) + subst_low_luid = DF_INSN_LUID (insn); + subst_insn = insn; + +- note_stores (PATTERN (insn), set_nonzero_bits_and_sign_copies, +- insn); ++ note_stores (insn, set_nonzero_bits_and_sign_copies, insn); + record_dead_and_set_regs (insn); + + if (AUTO_INC_DEC) +@@ -1763,9 +1761,6 @@ set_nonzero_bits_and_sign_copies (rtx x, const_rtx set, void *data) + return; + } + +- /* Should not happen as we only using pseduo registers. */ +- gcc_assert (GET_CODE (set) != CLOBBER_HIGH); +- + /* If this register is being initialized using itself, and the + register is uninitialized in this basic block, and there are + no LOG_LINKS which set the register, then part of the +@@ -1924,7 +1919,6 @@ can_combine_p (rtx_insn *insn, rtx_insn *i3, rtx_insn *pred ATTRIBUTE_UNUSED, + + /* We can ignore CLOBBERs. */ + case CLOBBER: +- case CLOBBER_HIGH: + break; + + case SET: +@@ -2439,7 +2433,7 @@ likely_spilled_retval_p (rtx_insn *insn) + info.mask = mask; + for (p = PREV_INSN (use); info.mask && p != insn; p = PREV_INSN (p)) + if (INSN_P (p)) +- note_stores (PATTERN (p), likely_spilled_retval_1, &info); ++ note_stores (p, likely_spilled_retval_1, &info); + mask = info.mask; + + /* Check if any of the (probably) live return value registers is +@@ -2595,8 +2589,6 @@ is_parallel_of_n_reg_sets (rtx pat, int n) + if (XEXP (XVECEXP (pat, 0, i), 0) == const0_rtx) + return false; + break; +- case CLOBBER_HIGH: +- break; + default: + return false; + } +@@ -2897,8 +2889,7 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0, + for (i = 0; ok && i < XVECLEN (p2, 0); i++) + { + if ((GET_CODE (XVECEXP (p2, 0, i)) == SET +- || GET_CODE (XVECEXP (p2, 0, i)) == CLOBBER +- || GET_CODE (XVECEXP (p2, 0, i)) == CLOBBER_HIGH) ++ || GET_CODE (XVECEXP (p2, 0, i)) == CLOBBER) + && reg_overlap_mentioned_p (SET_DEST (PATTERN (i3)), + SET_DEST (XVECEXP (p2, 0, i)))) + ok = false; +@@ -4741,8 +4732,8 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0, + been made to this insn. The order is important, because newi2pat + can affect nonzero_bits of newpat. */ + if (newi2pat) +- note_stores (newi2pat, set_nonzero_bits_and_sign_copies, NULL); +- note_stores (newpat, set_nonzero_bits_and_sign_copies, NULL); ++ note_pattern_stores (newi2pat, set_nonzero_bits_and_sign_copies, NULL); ++ note_pattern_stores (newpat, set_nonzero_bits_and_sign_copies, NULL); + } + + if (undobuf.other_insn != NULL_RTX) +@@ -13409,15 +13400,6 @@ record_dead_and_set_regs_1 (rtx dest, const_rtx setter, void *data) + ? SET_SRC (setter) + : gen_lowpart (GET_MODE (dest), + SET_SRC (setter))); +- else if (GET_CODE (setter) == CLOBBER_HIGH) +- { +- reg_stat_type *rsp = ®_stat[REGNO (dest)]; +- if (rsp->last_set_value +- && reg_is_clobbered_by_clobber_high +- (REGNO (dest), GET_MODE (rsp->last_set_value), +- XEXP (setter, 0))) +- record_value_for_reg (dest, NULL, NULL_RTX); +- } + else + record_value_for_reg (dest, record_dead_insn, NULL_RTX); + } +@@ -13487,10 +13469,10 @@ record_dead_and_set_regs (rtx_insn *insn) + the return value register is set at this LUID. We could + still replace a register with the return value from the + wrong subroutine call! */ +- note_stores (PATTERN (insn), record_dead_and_set_regs_1, NULL_RTX); ++ note_stores (insn, record_dead_and_set_regs_1, NULL_RTX); + } + else +- note_stores (PATTERN (insn), record_dead_and_set_regs_1, insn); ++ note_stores (insn, record_dead_and_set_regs_1, insn); + } + + /* If a SUBREG has the promoted bit set, it is in fact a property of the +@@ -13853,10 +13835,6 @@ reg_dead_at_p_1 (rtx dest, const_rtx x, void *data ATTRIBUTE_UNUSED) + if (!REG_P (dest)) + return; + +- if (GET_CODE (x) == CLOBBER_HIGH +- && !reg_is_clobbered_by_clobber_high (reg_dead_reg, XEXP (x, 0))) +- return; +- + regno = REGNO (dest); + endregno = END_REGNO (dest); + if (reg_dead_endregno > regno && reg_dead_regno < endregno) +@@ -13904,7 +13882,7 @@ reg_dead_at_p (rtx reg, rtx_insn *insn) + if (find_regno_note (insn, REG_UNUSED, reg_dead_regno)) + return 1; + +- note_stores (PATTERN (insn), reg_dead_at_p_1, NULL); ++ note_stores (insn, reg_dead_at_p_1, NULL); + if (reg_dead_flag) + return reg_dead_flag == 1 ? 1 : 0; + +diff --git a/gcc/common.opt b/gcc/common.opt +index 0bdf51dd8..7dee534b8 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -1080,16 +1080,16 @@ Common Report Var(flag_branch_probabilities) Optimization + Use profiling information for branch probabilities. + + fbranch-target-load-optimize +-Common Report Var(flag_branch_target_load_optimize) Optimization +-Perform branch target load optimization before prologue / epilogue threading. ++Common Ignore ++Does nothing. Preserved for backward compatibility. + + fbranch-target-load-optimize2 +-Common Report Var(flag_branch_target_load_optimize2) Optimization +-Perform branch target load optimization after prologue / epilogue threading. ++Common Ignore ++Does nothing. Preserved for backward compatibility. + + fbtr-bb-exclusive +-Common Report Var(flag_btr_bb_exclusive) Optimization +-Restrict target load migration not to re-use registers in any basic block. ++Common Ignore ++Does nothing. Preserved for backward compatibility. + + fcall-saved- + Common Joined RejectNegative Var(common_deferred_options) Defer +@@ -1289,6 +1289,26 @@ Enum(diagnostic_color_rule) String(always) Value(DIAGNOSTICS_COLOR_YES) + EnumValue + Enum(diagnostic_color_rule) String(auto) Value(DIAGNOSTICS_COLOR_AUTO) + ++fdiagnostics-urls= ++Driver Common Joined RejectNegative Var(flag_diagnostics_show_urls) Enum(diagnostic_url_rule) Init(DIAGNOSTICS_URL_AUTO) ++-fdiagnostics-urls=[never|always|auto] Embed URLs in diagnostics. ++ ++; Required for these enum values. ++SourceInclude ++diagnostic-url.h ++ ++Enum ++Name(diagnostic_url_rule) Type(int) ++ ++EnumValue ++Enum(diagnostic_url_rule) String(never) Value(DIAGNOSTICS_URL_NO) ++ ++EnumValue ++Enum(diagnostic_url_rule) String(always) Value(DIAGNOSTICS_URL_YES) ++ ++EnumValue ++Enum(diagnostic_url_rule) String(auto) Value(DIAGNOSTICS_URL_AUTO) ++ + fdiagnostics-format= + Common Joined RejectNegative Enum(diagnostics_output_format) + -fdiagnostics-format=[text|json] Select output format. +@@ -1963,7 +1983,7 @@ Common Var(flag_dce) Init(1) Optimization + Use the RTL dead code elimination pass. + + fdse +-Common Var(flag_dse) Init(1) Optimization ++Common Var(flag_dse) Init(0) Optimization + Use the RTL dead store elimination pass. + + freschedule-modulo-scheduled-loops +diff --git a/gcc/common/config/aarch64/aarch64-common.c b/gcc/common/config/aarch64/aarch64-common.c +index bab3ab3fa..07c032539 100644 +--- a/gcc/common/config/aarch64/aarch64-common.c ++++ b/gcc/common/config/aarch64/aarch64-common.c +@@ -170,9 +170,9 @@ aarch64_handle_option (struct gcc_options *opts, + struct aarch64_option_extension + { + const char *const name; +- const unsigned long flag_canonical; +- const unsigned long flags_on; +- const unsigned long flags_off; ++ const uint64_t flag_canonical; ++ const uint64_t flags_on; ++ const uint64_t flags_off; + const bool is_synthetic; + }; + +@@ -201,14 +201,14 @@ struct processor_name_to_arch + { + const std::string processor_name; + const enum aarch64_arch arch; +- const unsigned long flags; ++ const uint64_t flags; + }; + + struct arch_to_arch_name + { + const enum aarch64_arch arch; + const std::string arch_name; +- const unsigned long flags; ++ const uint64_t flags; + }; + + /* Map processor names to the architecture revision they implement and +@@ -238,7 +238,7 @@ static const struct arch_to_arch_name all_architectures[] = + a copy of the string is created and stored to INVALID_EXTENSION. */ + + enum aarch64_parse_opt_result +-aarch64_parse_extension (const char *str, unsigned long *isa_flags, ++aarch64_parse_extension (const char *str, uint64_t *isa_flags, + std::string *invalid_extension) + { + /* The extension string is parsed left to right. */ +@@ -326,18 +326,21 @@ int opt_ext_cmp (const void* a, const void* b) + turns on as a dependency. As an example +dotprod turns on FL_DOTPROD and + FL_SIMD. As such the set of bits represented by this option is + {FL_DOTPROD, FL_SIMD}. */ +- unsigned long total_flags_a = opt_a->flag_canonical & opt_a->flags_on; +- unsigned long total_flags_b = opt_b->flag_canonical & opt_b->flags_on; ++ uint64_t total_flags_a = opt_a->flag_canonical & opt_a->flags_on; ++ uint64_t total_flags_b = opt_b->flag_canonical & opt_b->flags_on; + int popcnt_a = popcount_hwi ((HOST_WIDE_INT)total_flags_a); + int popcnt_b = popcount_hwi ((HOST_WIDE_INT)total_flags_b); + int order = popcnt_b - popcnt_a; + + /* If they have the same amount of bits set, give it a more + deterministic ordering by using the value of the bits themselves. */ +- if (order == 0) +- return total_flags_b - total_flags_a; ++ if (order != 0) ++ return order; + +- return order; ++ if (total_flags_a != total_flags_b) ++ return total_flags_a < total_flags_b ? 1 : -1; ++ ++ return 0; + } + + /* Implement TARGET_OPTION_INIT_STRUCT. */ +@@ -373,9 +376,9 @@ aarch64_option_init_struct (struct gcc_options *opts ATTRIBUTE_UNUSED) + */ + + static bool +-aarch64_contains_opt (unsigned long isa_flag_bits, opt_ext *opt) ++aarch64_contains_opt (uint64_t isa_flag_bits, opt_ext *opt) + { +- unsigned long flags_check ++ uint64_t flags_check + = opt->is_synthetic ? opt->flags_on : opt->flag_canonical; + + return (isa_flag_bits & flags_check) == flags_check; +@@ -388,13 +391,13 @@ aarch64_contains_opt (unsigned long isa_flag_bits, opt_ext *opt) + that all the "+" flags come before the "+no" flags. */ + + std::string +-aarch64_get_extension_string_for_isa_flags (unsigned long isa_flags, +- unsigned long default_arch_flags) ++aarch64_get_extension_string_for_isa_flags (uint64_t isa_flags, ++ uint64_t default_arch_flags) + { + const struct aarch64_option_extension *opt = NULL; + std::string outstr = ""; + +- unsigned long isa_flag_bits = isa_flags; ++ uint64_t isa_flag_bits = isa_flags; + + /* Pass one: Minimize the search space by reducing the set of options + to the smallest set that still turns on the same features as before in +@@ -538,7 +541,7 @@ aarch64_rewrite_selected_cpu (const char *name) + || a_to_an->arch == aarch64_no_arch) + fatal_error (input_location, "unknown value %qs for %<-mcpu%>", name); + +- unsigned long extensions = p_to_a->flags; ++ uint64_t extensions = p_to_a->flags; + aarch64_parse_extension (extension_str.c_str (), &extensions, NULL); + + std::string outstr = a_to_an->arch_name +diff --git a/gcc/config.gcc b/gcc/config.gcc +index b2282ecdf..506a918ed 100644 +--- a/gcc/config.gcc ++++ b/gcc/config.gcc +@@ -315,12 +315,12 @@ m32c*-*-*) + ;; + aarch64*-*-*) + cpu_type=aarch64 +- extra_headers="arm_fp16.h arm_neon.h arm_acle.h" ++ extra_headers="arm_fp16.h arm_neon.h arm_bf16.h arm_acle.h arm_sve.h" + c_target_objs="aarch64-c.o" + cxx_target_objs="aarch64-c.o" + d_target_objs="aarch64-d.o" +- extra_objs="aarch64-builtins.o aarch-common.o cortex-a57-fma-steering.o aarch64-speculation.o falkor-tag-collision-avoidance.o aarch64-bti-insert.o" +- target_gtfiles="\$(srcdir)/config/aarch64/aarch64-builtins.c" ++ extra_objs="aarch64-builtins.o aarch-common.o aarch64-sve-builtins.o aarch64-sve-builtins-shapes.o aarch64-sve-builtins-base.o cortex-a57-fma-steering.o aarch64-speculation.o falkor-tag-collision-avoidance.o aarch64-bti-insert.o" ++ target_gtfiles="\$(srcdir)/config/aarch64/aarch64-builtins.c \$(srcdir)/config/aarch64/aarch64-sve-builtins.h \$(srcdir)/config/aarch64/aarch64-sve-builtins.cc" + target_has_targetm_common=yes + ;; + alpha*-*-*) +@@ -382,7 +382,8 @@ i[34567]86-*-*) + c_target_objs="i386-c.o" + cxx_target_objs="i386-c.o" + d_target_objs="i386-d.o" +- extra_objs="x86-tune-sched.o x86-tune-sched-bd.o x86-tune-sched-atom.o x86-tune-sched-core.o" ++ extra_objs="x86-tune-sched.o x86-tune-sched-bd.o x86-tune-sched-atom.o x86-tune-sched-core.o i386-options.o i386-builtins.o i386-expand.o i386-features.o" ++ target_gtfiles="\$(srcdir)/config/i386/i386-builtins.c \$(srcdir)/config/i386/i386-expand.c \$(srcdir)/config/i386/i386-options.c" + extra_options="${extra_options} fused-madd.opt" + extra_headers="cpuid.h mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h + pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h +@@ -414,7 +415,8 @@ x86_64-*-*) + cxx_target_objs="i386-c.o" + d_target_objs="i386-d.o" + extra_options="${extra_options} fused-madd.opt" +- extra_objs="x86-tune-sched.o x86-tune-sched-bd.o x86-tune-sched-atom.o x86-tune-sched-core.o" ++ extra_objs="x86-tune-sched.o x86-tune-sched-bd.o x86-tune-sched-atom.o x86-tune-sched-core.o i386-options.o i386-builtins.o i386-expand.o i386-features.o" ++ target_gtfiles="\$(srcdir)/config/i386/i386-builtins.c \$(srcdir)/config/i386/i386-expand.c \$(srcdir)/config/i386/i386-options.c" + extra_headers="cpuid.h mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h + pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h + nmmintrin.h bmmintrin.h fma4intrin.h wmmintrin.h +@@ -980,7 +982,7 @@ esac + case ${target} in + aarch64*-*-elf | aarch64*-*-fuchsia* | aarch64*-*-rtems*) + tm_file="${tm_file} dbxelf.h elfos.h newlib-stdint.h" +- tm_file="${tm_file} aarch64/aarch64-elf.h aarch64/aarch64-elf-raw.h" ++ tm_file="${tm_file} aarch64/aarch64-elf.h aarch64/aarch64-errata.h aarch64/aarch64-elf-raw.h" + tmake_file="${tmake_file} aarch64/t-aarch64" + case $target in + aarch64-*-elf*) +@@ -1017,13 +1019,19 @@ aarch64*-*-elf | aarch64*-*-fuchsia* | aarch64*-*-rtems*) + ;; + aarch64*-*-freebsd*) + tm_file="${tm_file} dbxelf.h elfos.h ${fbsd_tm_file}" +- tm_file="${tm_file} aarch64/aarch64-elf.h aarch64/aarch64-freebsd.h" ++ tm_file="${tm_file} aarch64/aarch64-elf.h aarch64/aarch64-errata.h aarch64/aarch64-freebsd.h" + tmake_file="${tmake_file} aarch64/t-aarch64 aarch64/t-aarch64-freebsd" + tm_defines="${tm_defines} TARGET_DEFAULT_ASYNC_UNWIND_TABLES=1" + ;; ++aarch64*-*-netbsd*) ++ tm_file="${tm_file} dbxelf.h elfos.h ${nbsd_tm_file}" ++ tm_file="${tm_file} aarch64/aarch64-elf.h aarch64/aarch64-errata.h aarch64/aarch64-netbsd.h" ++ tmake_file="${tmake_file} aarch64/t-aarch64 aarch64/t-aarch64-netbsd" ++ extra_options="${extra_options} netbsd.opt netbsd-elf.opt" ++ ;; + aarch64*-*-linux*) + tm_file="${tm_file} dbxelf.h elfos.h gnu-user.h linux.h glibc-stdint.h" +- tm_file="${tm_file} aarch64/aarch64-elf.h aarch64/aarch64-linux.h" ++ tm_file="${tm_file} aarch64/aarch64-elf.h aarch64/aarch64-errata.h aarch64/aarch64-linux.h" + tmake_file="${tmake_file} aarch64/t-aarch64 aarch64/t-aarch64-linux" + tm_defines="${tm_defines} TARGET_DEFAULT_ASYNC_UNWIND_TABLES=1" + case $target in +@@ -3847,32 +3855,40 @@ case "${target}" in + sed -e 's/,.*$//'` + fi + ++ # Use the pre-processor to strip flatten the options. ++ # This makes the format less rigid than if we use ++ # grep and sed directly here. ++ opt_macro="AARCH64_OPT_EXTENSION(A, B, C, D, E, F)=A, B, C, D, E, F" ++ options_parsed="`$ac_cv_prog_CPP -D"$opt_macro" -x c \ ++ ${srcdir}/config/aarch64/aarch64-option-extensions.def`" ++ ++ # Match one element inside AARCH64_OPT_EXTENSION, we ++ # consume anything that's not a ,. ++ elem="[ ]*\([^,]\+\)[ ]*" ++ ++ # Repeat the pattern for the number of entries in the ++ # AARCH64_OPT_EXTENSION, currently 6 times. ++ sed_patt="^$elem,$elem,$elem,$elem,$elem,$elem" ++ + while [ x"$ext_val" != x ] + do + ext_val=`echo $ext_val | sed -e 's/\+//'` + ext=`echo $ext_val | sed -e 's/\+.*//'` + base_ext=`echo $ext | sed -e 's/^no//'` ++ opt_line=`echo -e "$options_parsed" | \ ++ grep "^\"$base_ext\""` + + if [ x"$base_ext" = x ] \ +- || grep "^AARCH64_OPT_EXTENSION(\"$base_ext\"," \ +- ${srcdir}/config/aarch64/aarch64-option-extensions.def \ +- > /dev/null; then +- +- ext_canon=`grep "^AARCH64_OPT_EXTENSION(\"$base_ext\"," \ +- ${srcdir}/config/aarch64/aarch64-option-extensions.def | \ +- sed -e 's/^[^,]*,[ ]*//' | \ +- sed -e 's/,.*$//'` +- ext_on=`grep "^AARCH64_OPT_EXTENSION(\"$base_ext\"," \ +- ${srcdir}/config/aarch64/aarch64-option-extensions.def | \ +- sed -e 's/^[^,]*,[ ]*[^,]*,[ ]*//' | \ +- sed -e 's/,.*$//' | \ +- sed -e 's/).*$//'` +- ext_off=`grep "^AARCH64_OPT_EXTENSION(\"$base_ext\"," \ +- ${srcdir}/config/aarch64/aarch64-option-extensions.def | \ +- sed -e 's/^[^,]*,[ ]*[^,]*,[ ]*[^,]*,[ ]*//' | \ +- sed -e 's/,.*$//' | \ +- sed -e 's/).*$//'` +- ++ || [[ -n $opt_line ]]; then ++ ++ # These regexp extract the elements based on ++ # their group match index in the regexp. ++ ext_canon=`echo -e "$opt_line" | \ ++ sed -e "s/$sed_patt/\2/"` ++ ext_on=`echo -e "$opt_line" | \ ++ sed -e "s/$sed_patt/\3/"` ++ ext_off=`echo -e "$opt_line" | \ ++ sed -e "s/$sed_patt/\4/"` + + if [ $ext = $base_ext ]; then + # Adding extension +diff --git a/gcc/config/aarch64/aarch64-arches.def b/gcc/config/aarch64/aarch64-arches.def +index d258bd492..e464d329c 100644 +--- a/gcc/config/aarch64/aarch64-arches.def ++++ b/gcc/config/aarch64/aarch64-arches.def +@@ -36,5 +36,6 @@ AARCH64_ARCH("armv8.2-a", generic, 8_2A, 8, AARCH64_FL_FOR_ARCH8_2) + AARCH64_ARCH("armv8.3-a", generic, 8_3A, 8, AARCH64_FL_FOR_ARCH8_3) + AARCH64_ARCH("armv8.4-a", generic, 8_4A, 8, AARCH64_FL_FOR_ARCH8_4) + AARCH64_ARCH("armv8.5-a", generic, 8_5A, 8, AARCH64_FL_FOR_ARCH8_5) ++AARCH64_ARCH("armv8.6-a", generic, 8_6A, 8, AARCH64_FL_FOR_ARCH8_6) + + #undef AARCH64_ARCH +diff --git a/gcc/config/aarch64/aarch64-bti-insert.c b/gcc/config/aarch64/aarch64-bti-insert.c +index e519a0f0a..db8ebb1ba 100644 +--- a/gcc/config/aarch64/aarch64-bti-insert.c ++++ b/gcc/config/aarch64/aarch64-bti-insert.c +@@ -106,7 +106,9 @@ aarch64_pac_insn_p (rtx x) + int unspec_val = XINT (sub, 1); + switch (unspec_val) + { +- case UNSPEC_PACISP: ++ case UNSPEC_PACIASP: ++ /* fall-through. */ ++ case UNSPEC_PACIBSP: + return true; + + default: +diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c +index d7b1b7bd6..c890fcc37 100644 +--- a/gcc/config/aarch64/aarch64-builtins.c ++++ b/gcc/config/aarch64/aarch64-builtins.c +@@ -68,6 +68,9 @@ + #define hi_UP E_HImode + #define hf_UP E_HFmode + #define qi_UP E_QImode ++#define bf_UP E_BFmode ++#define v4bf_UP E_V4BFmode ++#define v8bf_UP E_V8BFmode + #define UP(X) X##_UP + + #define SIMD_MAX_BUILTIN_ARGS 5 +@@ -107,6 +110,9 @@ enum aarch64_type_qualifiers + /* Lane indices selected in pairs. - must be in range, and flipped for + bigendian. */ + qualifier_lane_pair_index = 0x800, ++ /* Lane indices selected in quadtuplets. - must be in range, and flipped for ++ bigendian. */ ++ qualifier_lane_quadtup_index = 0x1000, + }; + + typedef struct +@@ -173,6 +179,10 @@ aarch64_types_ternopu_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_unsigned, qualifier_unsigned, + qualifier_unsigned, qualifier_immediate }; + #define TYPES_TERNOPUI (aarch64_types_ternopu_imm_qualifiers) ++static enum aarch64_type_qualifiers ++aarch64_types_ternop_ssus_qualifiers[SIMD_MAX_BUILTIN_ARGS] ++ = { qualifier_none, qualifier_none, qualifier_unsigned, qualifier_none }; ++#define TYPES_TERNOP_SSUS (aarch64_types_ternop_ssus_qualifiers) + + + static enum aarch64_type_qualifiers +@@ -191,6 +201,19 @@ aarch64_types_quadopu_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS] + qualifier_unsigned, qualifier_lane_index }; + #define TYPES_QUADOPU_LANE (aarch64_types_quadopu_lane_qualifiers) + ++static enum aarch64_type_qualifiers ++aarch64_types_quadopssus_lane_quadtup_qualifiers[SIMD_MAX_BUILTIN_ARGS] ++ = { qualifier_none, qualifier_none, qualifier_unsigned, ++ qualifier_none, qualifier_lane_quadtup_index }; ++#define TYPES_QUADOPSSUS_LANE_QUADTUP \ ++ (aarch64_types_quadopssus_lane_quadtup_qualifiers) ++static enum aarch64_type_qualifiers ++aarch64_types_quadopsssu_lane_quadtup_qualifiers[SIMD_MAX_BUILTIN_ARGS] ++ = { qualifier_none, qualifier_none, qualifier_none, ++ qualifier_unsigned, qualifier_lane_quadtup_index }; ++#define TYPES_QUADOPSSSU_LANE_QUADTUP \ ++ (aarch64_types_quadopsssu_lane_quadtup_qualifiers) ++ + static enum aarch64_type_qualifiers + aarch64_types_quadopu_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned, +@@ -347,6 +370,12 @@ aarch64_types_storestruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS] + #define VAR14(T, X, MAP, A, B, C, D, E, F, G, H, I, J, K, L, M, N) \ + VAR13 (T, X, MAP, A, B, C, D, E, F, G, H, I, J, K, L, M) \ + VAR1 (T, X, MAP, N) ++#define VAR15(T, X, MAP, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O) \ ++ VAR14 (T, X, MAP, A, B, C, D, E, F, G, H, I, J, K, L, M, N) \ ++ VAR1 (T, X, MAP, O) ++#define VAR16(T, X, MAP, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \ ++ VAR15 (T, X, MAP, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O) \ ++ VAR1 (T, X, MAP, P) + + #include "aarch64-builtin-iterators.h" + +@@ -432,10 +461,22 @@ enum aarch64_builtins + /* ARMv8.3-A Pointer Authentication Builtins. */ + AARCH64_PAUTH_BUILTIN_AUTIA1716, + AARCH64_PAUTH_BUILTIN_PACIA1716, ++ AARCH64_PAUTH_BUILTIN_AUTIB1716, ++ AARCH64_PAUTH_BUILTIN_PACIB1716, + AARCH64_PAUTH_BUILTIN_XPACLRI, + /* Special cased Armv8.3-A Complex FMA by Lane quad Builtins. */ + AARCH64_SIMD_FCMLA_LANEQ_BUILTIN_BASE, + AARCH64_SIMD_FCMLA_LANEQ_BUILTINS ++ /* Builtin for Arm8.3-a Javascript conversion instruction. */ ++ AARCH64_JSCVT, ++ /* TME builtins. */ ++ AARCH64_TME_BUILTIN_TSTART, ++ AARCH64_TME_BUILTIN_TCOMMIT, ++ AARCH64_TME_BUILTIN_TTEST, ++ AARCH64_TME_BUILTIN_TCANCEL, ++ /* Armv8.5-a RNG instruction builtins. */ ++ AARCH64_BUILTIN_RNG_RNDR, ++ AARCH64_BUILTIN_RNG_RNDRRS, + AARCH64_BUILTIN_MAX + }; + +@@ -490,6 +531,7 @@ const char *aarch64_scalar_builtin_types[] = { + "__builtin_aarch64_simd_oi", + "__builtin_aarch64_simd_ci", + "__builtin_aarch64_simd_xi", ++ "__builtin_aarch64_simd_bf", + NULL + }; + +@@ -547,6 +589,21 @@ static tree aarch64_simd_intXI_type_node = NULL_TREE; + tree aarch64_fp16_type_node = NULL_TREE; + tree aarch64_fp16_ptr_type_node = NULL_TREE; + ++/* Back-end node type for brain float (bfloat) types. */ ++tree aarch64_bf16_type_node = NULL_TREE; ++tree aarch64_bf16_ptr_type_node = NULL_TREE; ++ ++/* Wrapper around add_builtin_function. NAME is the name of the built-in ++ function, TYPE is the function type, and CODE is the function subcode ++ (relative to AARCH64_BUILTIN_GENERAL). */ ++static tree ++aarch64_general_add_builtin (const char *name, tree type, unsigned int code) ++{ ++ code = (code << AARCH64_BUILTIN_SHIFT) | AARCH64_BUILTIN_GENERAL; ++ return add_builtin_function (name, type, code, BUILT_IN_MD, ++ NULL, NULL_TREE); ++} ++ + static const char * + aarch64_mangle_builtin_scalar_type (const_tree type) + { +@@ -585,7 +642,7 @@ aarch64_mangle_builtin_vector_type (const_tree type) + } + + const char * +-aarch64_mangle_builtin_type (const_tree type) ++aarch64_general_mangle_builtin_type (const_tree type) + { + const char *mangle; + /* Walk through all the AArch64 builtins types tables to filter out the +@@ -627,6 +684,8 @@ aarch64_simd_builtin_std_type (machine_mode mode, + return float_type_node; + case E_DFmode: + return double_type_node; ++ case E_BFmode: ++ return aarch64_bf16_type_node; + default: + gcc_unreachable (); + } +@@ -718,6 +777,10 @@ aarch64_init_simd_builtin_types (void) + aarch64_simd_types[Float64x1_t].eltype = double_type_node; + aarch64_simd_types[Float64x2_t].eltype = double_type_node; + ++ /* Init Bfloat vector types with underlying __bf16 type. */ ++ aarch64_simd_types[Bfloat16x4_t].eltype = aarch64_bf16_type_node; ++ aarch64_simd_types[Bfloat16x8_t].eltype = aarch64_bf16_type_node; ++ + for (i = 0; i < nelts; i++) + { + tree eltype = aarch64_simd_types[i].eltype; +@@ -782,6 +845,8 @@ aarch64_init_simd_builtin_scalar_types (void) + "__builtin_aarch64_simd_poly128"); + (*lang_hooks.types.register_builtin_type) (intTI_type_node, + "__builtin_aarch64_simd_ti"); ++ (*lang_hooks.types.register_builtin_type) (aarch64_bf16_type_node, ++ "__builtin_aarch64_simd_bf"); + /* Unsigned integer types for various mode sizes. */ + (*lang_hooks.types.register_builtin_type) (unsigned_intQI_type_node, + "__builtin_aarch64_simd_uqi"); +@@ -816,8 +881,7 @@ aarch64_init_fcmla_laneq_builtins (void) + = aarch64_simd_builtin_std_type (SImode, qualifier_lane_pair_index); + tree ftype = build_function_type_list (argtype, argtype, argtype, + quadtype, lanetype, NULL_TREE); +- tree fndecl = add_builtin_function (d->name, ftype, d->fcode, +- BUILT_IN_MD, NULL, NULL_TREE); ++ tree fndecl = aarch64_general_add_builtin (d->name, ftype, d->fcode); + + aarch64_builtin_decls[d->fcode] = fndecl; + } +@@ -846,10 +910,10 @@ aarch64_init_simd_builtins (void) + size_type_node, + intSI_type_node, + NULL); +- aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_LANE_CHECK] = +- add_builtin_function ("__builtin_aarch64_im_lane_boundsi", lane_check_fpr, +- AARCH64_SIMD_BUILTIN_LANE_CHECK, BUILT_IN_MD, +- NULL, NULL_TREE); ++ aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_LANE_CHECK] ++ = aarch64_general_add_builtin ("__builtin_aarch64_im_lane_boundsi", ++ lane_check_fpr, ++ AARCH64_SIMD_BUILTIN_LANE_CHECK); + + for (i = 0; i < ARRAY_SIZE (aarch64_simd_builtin_data); i++, fcode++) + { +@@ -947,8 +1011,7 @@ aarch64_init_simd_builtins (void) + snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s", + d->name); + +- fndecl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, +- NULL, NULL_TREE); ++ fndecl = aarch64_general_add_builtin (namebuf, ftype, fcode); + aarch64_builtin_decls[fcode] = fndecl; + } + +@@ -968,8 +1031,7 @@ aarch64_init_crc32_builtins () + tree argtype = aarch64_simd_builtin_std_type (d->mode, + qualifier_unsigned); + tree ftype = build_function_type_list (usi_type, usi_type, argtype, NULL_TREE); +- tree fndecl = add_builtin_function (d->name, ftype, d->fcode, +- BUILT_IN_MD, NULL, NULL_TREE); ++ tree fndecl = aarch64_general_add_builtin (d->name, ftype, d->fcode); + + aarch64_builtin_decls[d->fcode] = fndecl; + } +@@ -1009,8 +1071,8 @@ aarch64_init_builtin_rsqrt (void) + for (; bdd < bdd_end; bdd++) + { + ftype = build_function_type_list (bdd->type_node, bdd->type_node, NULL_TREE); +- fndecl = add_builtin_function (bdd->builtin_name, +- ftype, bdd->function_code, BUILT_IN_MD, NULL, NULL_TREE); ++ fndecl = aarch64_general_add_builtin (bdd->builtin_name, ++ ftype, bdd->function_code); + aarch64_builtin_decls[bdd->function_code] = fndecl; + } + } +@@ -1030,6 +1092,19 @@ aarch64_init_fp16_types (void) + aarch64_fp16_ptr_type_node = build_pointer_type (aarch64_fp16_type_node); + } + ++/* Initialize the backend REAL_TYPE type supporting bfloat types. */ ++static void ++aarch64_init_bf16_types (void) ++{ ++ aarch64_bf16_type_node = make_node (REAL_TYPE); ++ TYPE_PRECISION (aarch64_bf16_type_node) = 16; ++ SET_TYPE_MODE (aarch64_bf16_type_node, BFmode); ++ layout_type (aarch64_bf16_type_node); ++ ++ lang_hooks.types.register_builtin_type (aarch64_bf16_type_node, "__bf16"); ++ aarch64_bf16_ptr_type_node = build_pointer_type (aarch64_bf16_type_node); ++} ++ + /* Pointer authentication builtins that will become NOP on legacy platform. + Currently, these builtins are for internal use only (libgcc EH unwinder). */ + +@@ -1044,21 +1119,77 @@ aarch64_init_pauth_hint_builtins (void) + = build_function_type_list (ptr_type_node, ptr_type_node, NULL_TREE); + + aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_AUTIA1716] +- = add_builtin_function ("__builtin_aarch64_autia1716", ftype_pointer_auth, +- AARCH64_PAUTH_BUILTIN_AUTIA1716, BUILT_IN_MD, NULL, +- NULL_TREE); ++ = aarch64_general_add_builtin ("__builtin_aarch64_autia1716", ++ ftype_pointer_auth, ++ AARCH64_PAUTH_BUILTIN_AUTIA1716); + aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_PACIA1716] +- = add_builtin_function ("__builtin_aarch64_pacia1716", ftype_pointer_auth, +- AARCH64_PAUTH_BUILTIN_PACIA1716, BUILT_IN_MD, NULL, +- NULL_TREE); ++ = aarch64_general_add_builtin ("__builtin_aarch64_pacia1716", ++ ftype_pointer_auth, ++ AARCH64_PAUTH_BUILTIN_PACIA1716); ++ aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_AUTIB1716] ++ = aarch64_general_add_builtin ("__builtin_aarch64_autib1716", ++ ftype_pointer_auth, ++ AARCH64_PAUTH_BUILTIN_AUTIB1716); ++ aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_PACIB1716] ++ = aarch64_general_add_builtin ("__builtin_aarch64_pacib1716", ++ ftype_pointer_auth, ++ AARCH64_PAUTH_BUILTIN_PACIB1716); + aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_XPACLRI] +- = add_builtin_function ("__builtin_aarch64_xpaclri", ftype_pointer_strip, +- AARCH64_PAUTH_BUILTIN_XPACLRI, BUILT_IN_MD, NULL, +- NULL_TREE); ++ = aarch64_general_add_builtin ("__builtin_aarch64_xpaclri", ++ ftype_pointer_strip, ++ AARCH64_PAUTH_BUILTIN_XPACLRI); ++} ++ ++/* Initialize the transactional memory extension (TME) builtins. */ ++static void ++aarch64_init_tme_builtins (void) ++{ ++ tree ftype_uint64_void ++ = build_function_type_list (uint64_type_node, NULL); ++ tree ftype_void_void ++ = build_function_type_list (void_type_node, NULL); ++ tree ftype_void_uint64 ++ = build_function_type_list (void_type_node, uint64_type_node, NULL); ++ ++ aarch64_builtin_decls[AARCH64_TME_BUILTIN_TSTART] ++ = aarch64_general_add_builtin ("__builtin_aarch64_tstart", ++ ftype_uint64_void, ++ AARCH64_TME_BUILTIN_TSTART); ++ aarch64_builtin_decls[AARCH64_TME_BUILTIN_TTEST] ++ = aarch64_general_add_builtin ("__builtin_aarch64_ttest", ++ ftype_uint64_void, ++ AARCH64_TME_BUILTIN_TTEST); ++ aarch64_builtin_decls[AARCH64_TME_BUILTIN_TCOMMIT] ++ = aarch64_general_add_builtin ("__builtin_aarch64_tcommit", ++ ftype_void_void, ++ AARCH64_TME_BUILTIN_TCOMMIT); ++ aarch64_builtin_decls[AARCH64_TME_BUILTIN_TCANCEL] ++ = aarch64_general_add_builtin ("__builtin_aarch64_tcancel", ++ ftype_void_uint64, ++ AARCH64_TME_BUILTIN_TCANCEL); ++} ++ ++/* Add builtins for Random Number instructions. */ ++ ++static void ++aarch64_init_rng_builtins (void) ++{ ++ tree unsigned_ptr_type = build_pointer_type (unsigned_intDI_type_node); ++ tree ftype ++ = build_function_type_list (integer_type_node, unsigned_ptr_type, NULL); ++ aarch64_builtin_decls[AARCH64_BUILTIN_RNG_RNDR] ++ = aarch64_general_add_builtin ("__builtin_aarch64_rndr", ftype, ++ AARCH64_BUILTIN_RNG_RNDR); ++ aarch64_builtin_decls[AARCH64_BUILTIN_RNG_RNDRRS] ++ = aarch64_general_add_builtin ("__builtin_aarch64_rndrrs", ftype, ++ AARCH64_BUILTIN_RNG_RNDRRS); + } + ++ ++/* Initialize all builtins in the AARCH64_BUILTIN_GENERAL group. */ ++ + void +-aarch64_init_builtins (void) ++aarch64_general_init_builtins (void) + { + tree ftype_set_fpr + = build_function_type_list (void_type_node, unsigned_type_node, NULL); +@@ -1066,25 +1197,38 @@ aarch64_init_builtins (void) + = build_function_type_list (unsigned_type_node, NULL); + + aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR] +- = add_builtin_function ("__builtin_aarch64_get_fpcr", ftype_get_fpr, +- AARCH64_BUILTIN_GET_FPCR, BUILT_IN_MD, NULL, NULL_TREE); ++ = aarch64_general_add_builtin ("__builtin_aarch64_get_fpcr", ++ ftype_get_fpr, ++ AARCH64_BUILTIN_GET_FPCR); + aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR] +- = add_builtin_function ("__builtin_aarch64_set_fpcr", ftype_set_fpr, +- AARCH64_BUILTIN_SET_FPCR, BUILT_IN_MD, NULL, NULL_TREE); ++ = aarch64_general_add_builtin ("__builtin_aarch64_set_fpcr", ++ ftype_set_fpr, ++ AARCH64_BUILTIN_SET_FPCR); + aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR] +- = add_builtin_function ("__builtin_aarch64_get_fpsr", ftype_get_fpr, +- AARCH64_BUILTIN_GET_FPSR, BUILT_IN_MD, NULL, NULL_TREE); ++ = aarch64_general_add_builtin ("__builtin_aarch64_get_fpsr", ++ ftype_get_fpr, ++ AARCH64_BUILTIN_GET_FPSR); + aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR] +- = add_builtin_function ("__builtin_aarch64_set_fpsr", ftype_set_fpr, +- AARCH64_BUILTIN_SET_FPSR, BUILT_IN_MD, NULL, NULL_TREE); ++ = aarch64_general_add_builtin ("__builtin_aarch64_set_fpsr", ++ ftype_set_fpr, ++ AARCH64_BUILTIN_SET_FPSR); + + aarch64_init_fp16_types (); + ++ aarch64_init_bf16_types (); ++ + if (TARGET_SIMD) + aarch64_init_simd_builtins (); + + aarch64_init_crc32_builtins (); + aarch64_init_builtin_rsqrt (); ++ aarch64_init_rng_builtins (); ++ ++ tree ftype_jcvt ++ = build_function_type_list (intSI_type_node, double_type_node, NULL); ++ aarch64_builtin_decls[AARCH64_JSCVT] ++ = aarch64_general_add_builtin ("__builtin_aarch64_jcvtzs", ftype_jcvt, ++ AARCH64_JSCVT); + + /* Initialize pointer authentication builtins which are backed by instructions + in NOP encoding space. +@@ -1094,10 +1238,14 @@ aarch64_init_builtins (void) + register them. */ + if (!TARGET_ILP32) + aarch64_init_pauth_hint_builtins (); ++ ++ if (TARGET_TME) ++ aarch64_init_tme_builtins (); + } + ++/* Implement TARGET_BUILTIN_DECL for the AARCH64_BUILTIN_GENERAL group. */ + tree +-aarch64_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED) ++aarch64_general_builtin_decl (unsigned code, bool) + { + if (code >= AARCH64_BUILTIN_MAX) + return error_mark_node; +@@ -1112,6 +1260,7 @@ typedef enum + SIMD_ARG_LANE_INDEX, + SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX, + SIMD_ARG_LANE_PAIR_INDEX, ++ SIMD_ARG_LANE_QUADTUP_INDEX, + SIMD_ARG_STOP + } builtin_simd_arg; + +@@ -1201,9 +1350,25 @@ aarch64_simd_expand_args (rtx target, int icode, int have_retval, + op[opc] = gen_int_mode (ENDIAN_LANE_N (nunits / 2, lane), + SImode); + } +- /* Fall through - if the lane index isn't a constant then +- the next case will error. */ +- /* FALLTHRU */ ++ /* If the lane index isn't a constant then error out. */ ++ goto constant_arg; ++ case SIMD_ARG_LANE_QUADTUP_INDEX: ++ /* Must be a previous operand into which this is an index and ++ index is restricted to nunits / 4. */ ++ gcc_assert (opc > 0); ++ if (CONST_INT_P (op[opc])) ++ { ++ machine_mode vmode = insn_data[icode].operand[opc - 1].mode; ++ unsigned int nunits ++ = GET_MODE_NUNITS (vmode).to_constant (); ++ aarch64_simd_lane_bounds (op[opc], 0, nunits / 4, exp); ++ /* Keep to GCC-vector-extension lane indices in the RTL. */ ++ int lane = INTVAL (op[opc]); ++ op[opc] = gen_int_mode (ENDIAN_LANE_N (nunits / 4, lane), ++ SImode); ++ } ++ /* If the lane index isn't a constant then error out. */ ++ goto constant_arg; + case SIMD_ARG_CONSTANT: + constant_arg: + if (!(*insn_data[icode].operand[opc].predicate) +@@ -1316,6 +1481,8 @@ aarch64_simd_expand_builtin (int fcode, tree exp, rtx target) + args[k] = SIMD_ARG_LANE_INDEX; + else if (d->qualifiers[qualifiers_k] & qualifier_lane_pair_index) + args[k] = SIMD_ARG_LANE_PAIR_INDEX; ++ else if (d->qualifiers[qualifiers_k] & qualifier_lane_quadtup_index) ++ args[k] = SIMD_ARG_LANE_QUADTUP_INDEX; + else if (d->qualifiers[qualifiers_k] & qualifier_struct_load_store_lane_index) + args[k] = SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX; + else if (d->qualifiers[qualifiers_k] & qualifier_immediate) +@@ -1497,17 +1664,90 @@ aarch64_expand_fcmla_builtin (tree exp, rtx target, int fcode) + return target; + } + +-/* Expand an expression EXP that calls a built-in function, +- with result going to TARGET if that's convenient. */ ++/* Function to expand an expression EXP which calls one of the Transactional ++ Memory Extension (TME) builtins FCODE with the result going to TARGET. */ ++static rtx ++aarch64_expand_builtin_tme (int fcode, tree exp, rtx target) ++{ ++ switch (fcode) ++ { ++ case AARCH64_TME_BUILTIN_TSTART: ++ target = gen_reg_rtx (DImode); ++ emit_insn (GEN_FCN (CODE_FOR_tstart) (target)); ++ break; ++ ++ case AARCH64_TME_BUILTIN_TTEST: ++ target = gen_reg_rtx (DImode); ++ emit_insn (GEN_FCN (CODE_FOR_ttest) (target)); ++ break; ++ ++ case AARCH64_TME_BUILTIN_TCOMMIT: ++ emit_insn (GEN_FCN (CODE_FOR_tcommit) ()); ++ break; ++ ++ case AARCH64_TME_BUILTIN_TCANCEL: ++ { ++ tree arg0 = CALL_EXPR_ARG (exp, 0); ++ rtx op0 = expand_normal (arg0); ++ if (CONST_INT_P (op0) && UINTVAL (op0) <= 65536) ++ emit_insn (GEN_FCN (CODE_FOR_tcancel) (op0)); ++ else ++ { ++ error ("%Kargument must be a 16-bit constant immediate", exp); ++ return const0_rtx; ++ } ++ } ++ break; ++ ++ default : ++ gcc_unreachable (); ++ } ++ return target; ++} ++ ++/* Expand a random number builtin EXP with code FCODE, putting the result ++ int TARGET. If IGNORE is true the return value is ignored. */ ++ + rtx +-aarch64_expand_builtin (tree exp, +- rtx target, +- rtx subtarget ATTRIBUTE_UNUSED, +- machine_mode mode ATTRIBUTE_UNUSED, +- int ignore ATTRIBUTE_UNUSED) ++aarch64_expand_rng_builtin (tree exp, rtx target, int fcode, int ignore) ++{ ++ rtx pat; ++ enum insn_code icode; ++ if (fcode == AARCH64_BUILTIN_RNG_RNDR) ++ icode = CODE_FOR_aarch64_rndr; ++ else if (fcode == AARCH64_BUILTIN_RNG_RNDRRS) ++ icode = CODE_FOR_aarch64_rndrrs; ++ else ++ gcc_unreachable (); ++ ++ rtx rand = gen_reg_rtx (DImode); ++ pat = GEN_FCN (icode) (rand); ++ if (!pat) ++ return NULL_RTX; ++ ++ tree arg0 = CALL_EXPR_ARG (exp, 0); ++ rtx res_addr = expand_normal (arg0); ++ res_addr = convert_memory_address (Pmode, res_addr); ++ rtx res_mem = gen_rtx_MEM (DImode, res_addr); ++ emit_insn (pat); ++ emit_move_insn (res_mem, rand); ++ /* If the status result is unused don't generate the CSET code. */ ++ if (ignore) ++ return target; ++ ++ rtx cc_reg = gen_rtx_REG (CC_Zmode, CC_REGNUM); ++ rtx cmp_rtx = gen_rtx_fmt_ee (NE, SImode, cc_reg, const0_rtx); ++ emit_insn (gen_aarch64_cstoresi (target, cmp_rtx, cc_reg)); ++ return target; ++} ++ ++/* Expand an expression EXP that calls built-in function FCODE, ++ with result going to TARGET if that's convenient. IGNORE is true ++ if the result of the builtin is ignored. */ ++rtx ++aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target, ++ int ignore) + { +- tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); +- int fcode = DECL_FUNCTION_CODE (fndecl); + int icode; + rtx pat, op0; + tree arg0; +@@ -1540,6 +1780,8 @@ aarch64_expand_builtin (tree exp, + + case AARCH64_PAUTH_BUILTIN_AUTIA1716: + case AARCH64_PAUTH_BUILTIN_PACIA1716: ++ case AARCH64_PAUTH_BUILTIN_AUTIB1716: ++ case AARCH64_PAUTH_BUILTIN_PACIB1716: + case AARCH64_PAUTH_BUILTIN_XPACLRI: + arg0 = CALL_EXPR_ARG (exp, 0); + op0 = force_reg (Pmode, expand_normal (arg0)); +@@ -1563,8 +1805,24 @@ aarch64_expand_builtin (tree exp, + { + tree arg1 = CALL_EXPR_ARG (exp, 1); + rtx op1 = force_reg (Pmode, expand_normal (arg1)); +- icode = (fcode == AARCH64_PAUTH_BUILTIN_PACIA1716 +- ? CODE_FOR_paci1716 : CODE_FOR_auti1716); ++ switch (fcode) ++ { ++ case AARCH64_PAUTH_BUILTIN_AUTIA1716: ++ icode = CODE_FOR_autia1716; ++ break; ++ case AARCH64_PAUTH_BUILTIN_AUTIB1716: ++ icode = CODE_FOR_autib1716; ++ break; ++ case AARCH64_PAUTH_BUILTIN_PACIA1716: ++ icode = CODE_FOR_pacia1716; ++ break; ++ case AARCH64_PAUTH_BUILTIN_PACIB1716: ++ icode = CODE_FOR_pacib1716; ++ break; ++ default: ++ icode = 0; ++ gcc_unreachable (); ++ } + + rtx x16_reg = gen_rtx_REG (Pmode, R16_REGNUM); + rtx x17_reg = gen_rtx_REG (Pmode, R17_REGNUM); +@@ -1576,6 +1834,16 @@ aarch64_expand_builtin (tree exp, + + return target; + ++ case AARCH64_JSCVT: ++ arg0 = CALL_EXPR_ARG (exp, 0); ++ op0 = force_reg (DFmode, expand_normal (arg0)); ++ if (!target) ++ target = gen_reg_rtx (SImode); ++ else ++ target = force_reg (SImode, target); ++ emit_insn (GEN_FCN (CODE_FOR_aarch64_fjcvtzs) (target, op0)); ++ return target; ++ + case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ0_V2SF: + case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ90_V2SF: + case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ180_V2SF: +@@ -1585,6 +1853,9 @@ aarch64_expand_builtin (tree exp, + case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ180_V4HF: + case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ270_V4HF: + return aarch64_expand_fcmla_builtin (exp, target, fcode); ++ case AARCH64_BUILTIN_RNG_RNDR: ++ case AARCH64_BUILTIN_RNG_RNDRRS: ++ return aarch64_expand_rng_builtin (exp, target, fcode, ignore); + } + + if (fcode >= AARCH64_SIMD_BUILTIN_BASE && fcode <= AARCH64_SIMD_BUILTIN_MAX) +@@ -1599,6 +1870,12 @@ aarch64_expand_builtin (tree exp, + || fcode == AARCH64_BUILTIN_RSQRT_V4SF) + return aarch64_expand_builtin_rsqrt (fcode, exp, target); + ++ if (fcode == AARCH64_TME_BUILTIN_TSTART ++ || fcode == AARCH64_TME_BUILTIN_TCOMMIT ++ || fcode == AARCH64_TME_BUILTIN_TTEST ++ || fcode == AARCH64_TME_BUILTIN_TCANCEL) ++ return aarch64_expand_builtin_tme (fcode, exp, target); ++ + gcc_unreachable (); + } + +@@ -1750,7 +2027,7 @@ aarch64_builtin_vectorized_function (unsigned int fn, tree type_out, + /* Return builtin for reciprocal square root. */ + + tree +-aarch64_builtin_rsqrt (unsigned int fn) ++aarch64_general_builtin_rsqrt (unsigned int fn) + { + if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv2df) + return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V2DF]; +@@ -1765,13 +2042,14 @@ aarch64_builtin_rsqrt (unsigned int fn) + #define VAR1(T, N, MAP, A) \ + case AARCH64_SIMD_BUILTIN_##T##_##N##A: + ++/* Try to fold a call to the built-in function with subcode FCODE. The ++ function is passed the N_ARGS arguments in ARGS and it returns a value ++ of type TYPE. Return the new expression on success and NULL_TREE on ++ failure. */ + tree +-aarch64_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, tree *args, +- bool ignore ATTRIBUTE_UNUSED) ++aarch64_general_fold_builtin (unsigned int fcode, tree type, ++ unsigned int n_args ATTRIBUTE_UNUSED, tree *args) + { +- int fcode = DECL_FUNCTION_CODE (fndecl); +- tree type = TREE_TYPE (TREE_TYPE (fndecl)); +- + switch (fcode) + { + BUILTIN_VDQF (UNOP, abs, 2) +@@ -1787,109 +2065,90 @@ aarch64_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, tree *args, + return NULL_TREE; + } + +-bool +-aarch64_gimple_fold_builtin (gimple_stmt_iterator *gsi) ++/* Try to fold STMT, given that it's a call to the built-in function with ++ subcode FCODE. Return the new statement on success and null on ++ failure. */ ++gimple * ++aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt) + { +- bool changed = false; +- gimple *stmt = gsi_stmt (*gsi); +- tree call = gimple_call_fn (stmt); +- tree fndecl; + gimple *new_stmt = NULL; +- +- if (call) ++ unsigned nargs = gimple_call_num_args (stmt); ++ tree *args = (nargs > 0 ++ ? gimple_call_arg_ptr (stmt, 0) ++ : &error_mark_node); ++ ++ /* We use gimple's IFN_REDUC_(PLUS|MIN|MAX)s for float, signed int ++ and unsigned int; it will distinguish according to the types of ++ the arguments to the __builtin. */ ++ switch (fcode) + { +- fndecl = gimple_call_fndecl (stmt); +- if (fndecl) ++ BUILTIN_VALL (UNOP, reduc_plus_scal_, 10) ++ new_stmt = gimple_build_call_internal (IFN_REDUC_PLUS, ++ 1, args[0]); ++ gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt)); ++ break; ++ BUILTIN_VDQIF (UNOP, reduc_smax_scal_, 10) ++ BUILTIN_VDQ_BHSI (UNOPU, reduc_umax_scal_, 10) ++ new_stmt = gimple_build_call_internal (IFN_REDUC_MAX, ++ 1, args[0]); ++ gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt)); ++ break; ++ BUILTIN_VDQIF (UNOP, reduc_smin_scal_, 10) ++ BUILTIN_VDQ_BHSI (UNOPU, reduc_umin_scal_, 10) ++ new_stmt = gimple_build_call_internal (IFN_REDUC_MIN, ++ 1, args[0]); ++ gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt)); ++ break; ++ BUILTIN_GPF (BINOP, fmulx, 0) + { +- int fcode = DECL_FUNCTION_CODE (fndecl); +- unsigned nargs = gimple_call_num_args (stmt); +- tree *args = (nargs > 0 +- ? gimple_call_arg_ptr (stmt, 0) +- : &error_mark_node); +- +- /* We use gimple's IFN_REDUC_(PLUS|MIN|MAX)s for float, signed int +- and unsigned int; it will distinguish according to the types of +- the arguments to the __builtin. */ +- switch (fcode) ++ gcc_assert (nargs == 2); ++ bool a0_cst_p = TREE_CODE (args[0]) == REAL_CST; ++ bool a1_cst_p = TREE_CODE (args[1]) == REAL_CST; ++ if (a0_cst_p || a1_cst_p) + { +- BUILTIN_VALL (UNOP, reduc_plus_scal_, 10) +- new_stmt = gimple_build_call_internal (IFN_REDUC_PLUS, +- 1, args[0]); +- gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt)); +- break; +- BUILTIN_VDQIF (UNOP, reduc_smax_scal_, 10) +- BUILTIN_VDQ_BHSI (UNOPU, reduc_umax_scal_, 10) +- new_stmt = gimple_build_call_internal (IFN_REDUC_MAX, +- 1, args[0]); +- gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt)); +- break; +- BUILTIN_VDQIF (UNOP, reduc_smin_scal_, 10) +- BUILTIN_VDQ_BHSI (UNOPU, reduc_umin_scal_, 10) +- new_stmt = gimple_build_call_internal (IFN_REDUC_MIN, +- 1, args[0]); +- gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt)); +- break; +- BUILTIN_GPF (BINOP, fmulx, 0) ++ if (a0_cst_p && a1_cst_p) + { +- gcc_assert (nargs == 2); +- bool a0_cst_p = TREE_CODE (args[0]) == REAL_CST; +- bool a1_cst_p = TREE_CODE (args[1]) == REAL_CST; +- if (a0_cst_p || a1_cst_p) ++ tree t0 = TREE_TYPE (args[0]); ++ real_value a0 = (TREE_REAL_CST (args[0])); ++ real_value a1 = (TREE_REAL_CST (args[1])); ++ if (real_equal (&a1, &dconst0)) ++ std::swap (a0, a1); ++ /* According to real_equal (), +0 equals -0. */ ++ if (real_equal (&a0, &dconst0) && real_isinf (&a1)) + { +- if (a0_cst_p && a1_cst_p) +- { +- tree t0 = TREE_TYPE (args[0]); +- real_value a0 = (TREE_REAL_CST (args[0])); +- real_value a1 = (TREE_REAL_CST (args[1])); +- if (real_equal (&a1, &dconst0)) +- std::swap (a0, a1); +- /* According to real_equal (), +0 equals -0. */ +- if (real_equal (&a0, &dconst0) && real_isinf (&a1)) +- { +- real_value res = dconst2; +- res.sign = a0.sign ^ a1.sign; +- new_stmt = +- gimple_build_assign (gimple_call_lhs (stmt), +- REAL_CST, +- build_real (t0, res)); +- } +- else +- new_stmt = +- gimple_build_assign (gimple_call_lhs (stmt), +- MULT_EXPR, +- args[0], args[1]); +- } +- else /* a0_cst_p ^ a1_cst_p. */ +- { +- real_value const_part = a0_cst_p +- ? TREE_REAL_CST (args[0]) : TREE_REAL_CST (args[1]); +- if (!real_equal (&const_part, &dconst0) +- && !real_isinf (&const_part)) +- new_stmt = +- gimple_build_assign (gimple_call_lhs (stmt), +- MULT_EXPR, args[0], args[1]); +- } ++ real_value res = dconst2; ++ res.sign = a0.sign ^ a1.sign; ++ new_stmt = gimple_build_assign (gimple_call_lhs (stmt), ++ REAL_CST, ++ build_real (t0, res)); + } +- if (new_stmt) +- { +- gimple_set_vuse (new_stmt, gimple_vuse (stmt)); +- gimple_set_vdef (new_stmt, gimple_vdef (stmt)); +- } +- break; ++ else ++ new_stmt = gimple_build_assign (gimple_call_lhs (stmt), ++ MULT_EXPR, ++ args[0], args[1]); + } +- default: +- break; ++ else /* a0_cst_p ^ a1_cst_p. */ ++ { ++ real_value const_part = a0_cst_p ++ ? TREE_REAL_CST (args[0]) : TREE_REAL_CST (args[1]); ++ if (!real_equal (&const_part, &dconst0) ++ && !real_isinf (&const_part)) ++ new_stmt = gimple_build_assign (gimple_call_lhs (stmt), ++ MULT_EXPR, args[0], ++ args[1]); ++ } ++ } ++ if (new_stmt) ++ { ++ gimple_set_vuse (new_stmt, gimple_vuse (stmt)); ++ gimple_set_vdef (new_stmt, gimple_vdef (stmt)); + } ++ break; + } ++ default: ++ break; + } +- +- if (new_stmt) +- { +- gsi_replace (gsi, new_stmt, true); +- changed = true; +- } +- +- return changed; ++ return new_stmt; + } + + void +diff --git a/gcc/config/aarch64/aarch64-c.c b/gcc/config/aarch64/aarch64-c.c +index 6d5acb02f..da78f6fe3 100644 +--- a/gcc/config/aarch64/aarch64-c.c ++++ b/gcc/config/aarch64/aarch64-c.c +@@ -110,6 +110,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile) + aarch64_def_or_undef (TARGET_CRC32, "__ARM_FEATURE_CRC32", pfile); + aarch64_def_or_undef (TARGET_DOTPROD, "__ARM_FEATURE_DOTPROD", pfile); + aarch64_def_or_undef (TARGET_COMPLEX, "__ARM_FEATURE_COMPLEX", pfile); ++ aarch64_def_or_undef (TARGET_JSCVT, "__ARM_FEATURE_JCVT", pfile); + + cpp_undef (pfile, "__AARCH64_CMODEL_TINY__"); + cpp_undef (pfile, "__AARCH64_CMODEL_SMALL__"); +@@ -146,6 +147,13 @@ aarch64_update_cpp_builtins (cpp_reader *pfile) + bits = 0; + builtin_define_with_int_value ("__ARM_FEATURE_SVE_BITS", bits); + } ++ aarch64_def_or_undef (TARGET_SVE_I8MM, ++ "__ARM_FEATURE_SVE_MATMUL_INT8", pfile); ++ aarch64_def_or_undef (TARGET_SVE_F32MM, ++ "__ARM_FEATURE_SVE_MATMUL_FP32", pfile); ++ aarch64_def_or_undef (TARGET_SVE_F64MM, ++ "__ARM_FEATURE_SVE_MATMUL_FP64", pfile); ++ aarch64_def_or_undef (TARGET_SVE2, "__ARM_FEATURE_SVE2", pfile); + + aarch64_def_or_undef (TARGET_LSE, "__ARM_FEATURE_ATOMICS", pfile); + aarch64_def_or_undef (TARGET_AES, "__ARM_FEATURE_AES", pfile); +@@ -156,6 +164,16 @@ aarch64_update_cpp_builtins (cpp_reader *pfile) + aarch64_def_or_undef (TARGET_SM4, "__ARM_FEATURE_SM4", pfile); + aarch64_def_or_undef (TARGET_F16FML, "__ARM_FEATURE_FP16_FML", pfile); + ++ aarch64_def_or_undef (TARGET_FRINT, "__ARM_FEATURE_FRINT", pfile); ++ aarch64_def_or_undef (TARGET_TME, "__ARM_FEATURE_TME", pfile); ++ aarch64_def_or_undef (TARGET_RNG, "__ARM_FEATURE_RNG", pfile); ++ ++ aarch64_def_or_undef (TARGET_I8MM, "__ARM_FEATURE_MATMUL_INT8", pfile); ++ aarch64_def_or_undef (TARGET_BF16_SIMD, ++ "__ARM_FEATURE_BF16_VECTOR_ARITHMETIC", pfile); ++ aarch64_def_or_undef (TARGET_BF16_FP, ++ "__ARM_FEATURE_BF16_SCALAR_ARITHMETIC", pfile); ++ + /* Not for ACLE, but required to keep "float.h" correct if we switch + target between implementations that do or do not support ARMv8.2-A + 16-bit floating-point extensions. */ +@@ -237,6 +255,73 @@ aarch64_pragma_target_parse (tree args, tree pop_target) + return true; + } + ++/* Implement "#pragma GCC aarch64". */ ++static void ++aarch64_pragma_aarch64 (cpp_reader *) ++{ ++ tree x; ++ if (pragma_lex (&x) != CPP_STRING) ++ { ++ error ("%<#pragma GCC aarch64%> requires a string parameter"); ++ return; ++ } ++ ++ const char *name = TREE_STRING_POINTER (x); ++ if (strcmp (name, "arm_sve.h") == 0) ++ aarch64_sve::handle_arm_sve_h (); ++ else ++ error ("unknown %<#pragma GCC aarch64%> option %qs", name); ++} ++ ++/* Implement TARGET_RESOLVE_OVERLOADED_BUILTIN. */ ++static tree ++aarch64_resolve_overloaded_builtin (unsigned int uncast_location, ++ tree fndecl, void *uncast_arglist) ++{ ++ vec empty = {}; ++ location_t location = (location_t) uncast_location; ++ vec *arglist = (uncast_arglist ++ ? (vec *) uncast_arglist ++ : &empty); ++ unsigned int code = DECL_MD_FUNCTION_CODE (fndecl); ++ unsigned int subcode = code >> AARCH64_BUILTIN_SHIFT; ++ tree new_fndecl; ++ switch (code & AARCH64_BUILTIN_CLASS) ++ { ++ case AARCH64_BUILTIN_GENERAL: ++ return NULL_TREE; ++ ++ case AARCH64_BUILTIN_SVE: ++ new_fndecl = aarch64_sve::resolve_overloaded_builtin (location, subcode, ++ arglist); ++ break; ++ } ++ if (new_fndecl == NULL_TREE || new_fndecl == error_mark_node) ++ return new_fndecl; ++ return build_function_call_vec (location, vNULL, new_fndecl, arglist, ++ NULL, fndecl); ++} ++ ++/* Implement TARGET_CHECK_BUILTIN_CALL. */ ++static bool ++aarch64_check_builtin_call (location_t loc, vec arg_loc, ++ tree fndecl, tree orig_fndecl, ++ unsigned int nargs, tree *args) ++{ ++ unsigned int code = DECL_MD_FUNCTION_CODE (fndecl); ++ unsigned int subcode = code >> AARCH64_BUILTIN_SHIFT; ++ switch (code & AARCH64_BUILTIN_CLASS) ++ { ++ case AARCH64_BUILTIN_GENERAL: ++ return true; ++ ++ case AARCH64_BUILTIN_SVE: ++ return aarch64_sve::check_builtin_call (loc, arg_loc, subcode, ++ orig_fndecl, nargs, args); ++ } ++ gcc_unreachable (); ++} ++ + /* Implement REGISTER_TARGET_PRAGMAS. */ + + void +@@ -244,4 +329,9 @@ aarch64_register_pragmas (void) + { + /* Update pragma hook to allow parsing #pragma GCC target. */ + targetm.target_option.pragma_parse = aarch64_pragma_target_parse; ++ ++ targetm.resolve_overloaded_builtin = aarch64_resolve_overloaded_builtin; ++ targetm.check_builtin_call = aarch64_check_builtin_call; ++ ++ c_register_pragma ("GCC", "aarch64", aarch64_pragma_aarch64); + } +diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def +index 82d91d625..053c6390e 100644 +--- a/gcc/config/aarch64/aarch64-cores.def ++++ b/gcc/config/aarch64/aarch64-cores.def +@@ -46,6 +46,7 @@ + /* ARMv8-A Architecture Processors. */ + + /* ARM ('A') cores. */ ++AARCH64_CORE("cortex-a34", cortexa34, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa35, 0x41, 0xd02, -1) + AARCH64_CORE("cortex-a35", cortexa35, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa35, 0x41, 0xd04, -1) + AARCH64_CORE("cortex-a53", cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03, -1) + AARCH64_CORE("cortex-a57", cortexa57, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07, -1) +@@ -99,7 +100,11 @@ AARCH64_CORE("thunderx2t99", thunderx2t99, thunderx2t99, 8_1A, AARCH64_FL_FOR + /* ARM ('A') cores. */ + AARCH64_CORE("cortex-a55", cortexa55, cortexa53, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa53, 0x41, 0xd05, -1) + AARCH64_CORE("cortex-a75", cortexa75, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, 0xd0a, -1) +-AARCH64_CORE("cortex-a76", cortexa76, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa72, 0x41, 0xd0b, -1) ++AARCH64_CORE("cortex-a76", cortexa76, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, 0xd0b, -1) ++AARCH64_CORE("cortex-a76ae", cortexa76ae, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa72, 0x41, 0xd0e, -1) ++AARCH64_CORE("cortex-a77", cortexa77, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa72, 0x41, 0xd0d, -1) ++AARCH64_CORE("cortex-a65", cortexa65, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd06, -1) ++AARCH64_CORE("cortex-a65ae", cortexa65ae, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd43, -1) + AARCH64_CORE("ares", ares, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1) + AARCH64_CORE("neoverse-n1", neoversen1, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1) + AARCH64_CORE("neoverse-e1", neoversee1, cortexa53, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa53, 0x41, 0xd4a, -1) +diff --git a/gcc/config/aarch64/aarch64-elf-raw.h b/gcc/config/aarch64/aarch64-elf-raw.h +index bbebd0ef0..8fe7b3783 100644 +--- a/gcc/config/aarch64/aarch64-elf-raw.h ++++ b/gcc/config/aarch64/aarch64-elf-raw.h +@@ -27,22 +27,6 @@ + " crtend%O%s crtn%O%s " \ + "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}" + +-#if TARGET_FIX_ERR_A53_835769_DEFAULT +-#define CA53_ERR_835769_SPEC \ +- " %{!mno-fix-cortex-a53-835769:--fix-cortex-a53-835769}" +-#else +-#define CA53_ERR_835769_SPEC \ +- " %{mfix-cortex-a53-835769:--fix-cortex-a53-835769}" +-#endif +- +-#if TARGET_FIX_ERR_A53_843419_DEFAULT +-#define CA53_ERR_843419_SPEC \ +- " %{!mno-fix-cortex-a53-843419:--fix-cortex-a53-843419}" +-#else +-#define CA53_ERR_843419_SPEC \ +- " %{mfix-cortex-a53-843419:--fix-cortex-a53-843419}" +-#endif +- + #ifndef LINK_SPEC + #define LINK_SPEC "%{h*} \ + %{static:-Bstatic} \ +@@ -51,8 +35,7 @@ + %{!static:%{rdynamic:-export-dynamic}} \ + %{mbig-endian:-EB} %{mlittle-endian:-EL} -X \ + -maarch64elf%{mabi=ilp32*:32}%{mbig-endian:b}" \ +- CA53_ERR_835769_SPEC \ +- CA53_ERR_843419_SPEC ++ AARCH64_ERRATA_LINK_SPEC + #endif + + #endif /* GCC_AARCH64_ELF_RAW_H */ +diff --git a/gcc/config/aarch64/aarch64-errata.h b/gcc/config/aarch64/aarch64-errata.h +new file mode 100644 +index 000000000..8f062536e +--- /dev/null ++++ b/gcc/config/aarch64/aarch64-errata.h +@@ -0,0 +1,44 @@ ++/* Machine description for AArch64 architecture. ++ Copyright (C) 2009-2019 Free Software Foundation, Inc. ++ Contributed by ARM Ltd. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ GCC is distributed in the hope that it will be useful, but ++ WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING3. If not see ++ . */ ++ ++#ifndef GCC_AARCH64_ERRATA_H ++#define GCC_AARCH64_ERRATA_H ++ ++#if TARGET_FIX_ERR_A53_835769_DEFAULT ++#define CA53_ERR_835769_SPEC \ ++ " %{!mno-fix-cortex-a53-835769:--fix-cortex-a53-835769}" ++#else ++#define CA53_ERR_835769_SPEC \ ++ " %{mfix-cortex-a53-835769:--fix-cortex-a53-835769}" ++#endif ++ ++#if TARGET_FIX_ERR_A53_843419_DEFAULT ++#define CA53_ERR_843419_SPEC \ ++ " %{!mno-fix-cortex-a53-843419:--fix-cortex-a53-843419}" ++#else ++#define CA53_ERR_843419_SPEC \ ++ " %{mfix-cortex-a53-843419:--fix-cortex-a53-843419}" ++#endif ++ ++#define AARCH64_ERRATA_LINK_SPEC \ ++ CA53_ERR_835769_SPEC \ ++ CA53_ERR_843419_SPEC ++ ++#endif /* GCC_AARCH64_ERRATA_H */ +diff --git a/gcc/config/aarch64/aarch64-freebsd.h b/gcc/config/aarch64/aarch64-freebsd.h +index 899e6f95e..7a3e89b1b 100644 +--- a/gcc/config/aarch64/aarch64-freebsd.h ++++ b/gcc/config/aarch64/aarch64-freebsd.h +@@ -46,26 +46,8 @@ + -X" SUBTARGET_EXTRA_LINK_SPEC " \ + %{mbig-endian:-EB} %{mlittle-endian:-EL}" + +-#if TARGET_FIX_ERR_A53_835769_DEFAULT +-#define CA53_ERR_835769_SPEC \ +- " %{!mno-fix-cortex-a53-835769:--fix-cortex-a53-835769}" +-#else +-#define CA53_ERR_835769_SPEC \ +- " %{mfix-cortex-a53-835769:--fix-cortex-a53-835769}" +-#endif +- +-#ifdef TARGET_FIX_ERR_A53_843419_DEFAULT +-#define CA53_ERR_843419_SPEC \ +- " %{!mno-fix-cortex-a53-843419:--fix-cortex-a53-843419}" +-#else +-#define CA53_ERR_843419_SPEC \ +- " %{mfix-cortex-a53-843419:--fix-cortex-a53-843419}" +-#endif +- + #undef LINK_SPEC +-#define LINK_SPEC FBSD_TARGET_LINK_SPEC \ +- CA53_ERR_835769_SPEC \ +- CA53_ERR_843419_SPEC ++#define LINK_SPEC FBSD_TARGET_LINK_SPEC AARCH64_ERRATA_LINK_SPEC + + #define GNU_USER_TARGET_MATHFILE_SPEC \ + "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}" +diff --git a/gcc/config/aarch64/aarch64-linux.h b/gcc/config/aarch64/aarch64-linux.h +index 5e8b34ded..6ff2163b6 100644 +--- a/gcc/config/aarch64/aarch64-linux.h ++++ b/gcc/config/aarch64/aarch64-linux.h +@@ -46,25 +46,8 @@ + %{mbig-endian:-EB} %{mlittle-endian:-EL} \ + -maarch64linux%{mabi=ilp32:32}%{mbig-endian:b}" + +-#if TARGET_FIX_ERR_A53_835769_DEFAULT +-#define CA53_ERR_835769_SPEC \ +- " %{!mno-fix-cortex-a53-835769:--fix-cortex-a53-835769}" +-#else +-#define CA53_ERR_835769_SPEC \ +- " %{mfix-cortex-a53-835769:--fix-cortex-a53-835769}" +-#endif +- +-#if TARGET_FIX_ERR_A53_843419_DEFAULT +-#define CA53_ERR_843419_SPEC \ +- " %{!mno-fix-cortex-a53-843419:--fix-cortex-a53-843419}" +-#else +-#define CA53_ERR_843419_SPEC \ +- " %{mfix-cortex-a53-843419:--fix-cortex-a53-843419}" +-#endif +- +-#define LINK_SPEC LINUX_TARGET_LINK_SPEC \ +- CA53_ERR_835769_SPEC \ +- CA53_ERR_843419_SPEC ++ ++#define LINK_SPEC LINUX_TARGET_LINK_SPEC AARCH64_ERRATA_LINK_SPEC + + #define GNU_USER_TARGET_MATHFILE_SPEC \ + "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}" +diff --git a/gcc/config/aarch64/aarch64-modes.def b/gcc/config/aarch64/aarch64-modes.def +index 14c1a43fe..3640540b3 100644 +--- a/gcc/config/aarch64/aarch64-modes.def ++++ b/gcc/config/aarch64/aarch64-modes.def +@@ -33,6 +33,8 @@ + CC_MODE (CCFP); + CC_MODE (CCFPE); + CC_MODE (CC_SWP); ++CC_MODE (CC_NZC); /* Only N, Z and C bits of condition flags are valid. ++ (Used with SVE predicate tests.) */ + CC_MODE (CC_NZ); /* Only N and Z bits of condition flags are valid. */ + CC_MODE (CC_Z); /* Only Z bit of condition flags is valid. */ + CC_MODE (CC_C); /* C represents unsigned overflow of a simple addition. */ +@@ -60,6 +62,10 @@ ADJUST_ALIGNMENT (VNx8BI, 2); + ADJUST_ALIGNMENT (VNx4BI, 2); + ADJUST_ALIGNMENT (VNx2BI, 2); + ++/* Bfloat16 modes. */ ++FLOAT_MODE (BF, 2, 0); ++ADJUST_FLOAT_FORMAT (BF, &arm_bfloat_half_format); ++ + VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI. */ + VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI. */ + VECTOR_MODES (FLOAT, 8); /* V2SF. */ +@@ -80,13 +86,14 @@ INT_MODE (XI, 64); + strictly necessary to set the alignment here, since the default would + be clamped to BIGGEST_ALIGNMENT anyhow, but it seems clearer. */ + #define SVE_MODES(NVECS, VB, VH, VS, VD) \ +- VECTOR_MODES_WITH_PREFIX (VNx, INT, 16 * NVECS); \ +- VECTOR_MODES_WITH_PREFIX (VNx, FLOAT, 16 * NVECS); \ ++ VECTOR_MODES_WITH_PREFIX (VNx, INT, 16 * NVECS, 0); \ ++ VECTOR_MODES_WITH_PREFIX (VNx, FLOAT, 16 * NVECS, 0); \ + \ + ADJUST_NUNITS (VB##QI, aarch64_sve_vg * NVECS * 8); \ + ADJUST_NUNITS (VH##HI, aarch64_sve_vg * NVECS * 4); \ + ADJUST_NUNITS (VS##SI, aarch64_sve_vg * NVECS * 2); \ + ADJUST_NUNITS (VD##DI, aarch64_sve_vg * NVECS); \ ++ ADJUST_NUNITS (VH##BF, aarch64_sve_vg * NVECS * 4); \ + ADJUST_NUNITS (VH##HF, aarch64_sve_vg * NVECS * 4); \ + ADJUST_NUNITS (VS##SF, aarch64_sve_vg * NVECS * 2); \ + ADJUST_NUNITS (VD##DF, aarch64_sve_vg * NVECS); \ +@@ -95,6 +102,7 @@ INT_MODE (XI, 64); + ADJUST_ALIGNMENT (VH##HI, 16); \ + ADJUST_ALIGNMENT (VS##SI, 16); \ + ADJUST_ALIGNMENT (VD##DI, 16); \ ++ ADJUST_ALIGNMENT (VH##BF, 16); \ + ADJUST_ALIGNMENT (VH##HF, 16); \ + ADJUST_ALIGNMENT (VS##SF, 16); \ + ADJUST_ALIGNMENT (VD##DF, 16); +@@ -106,6 +114,40 @@ SVE_MODES (2, VNx32, VNx16, VNx8, VNx4) + SVE_MODES (3, VNx48, VNx24, VNx12, VNx6) + SVE_MODES (4, VNx64, VNx32, VNx16, VNx8) + ++/* Partial SVE vectors: ++ ++ VNx2QI VNx4QI VNx8QI ++ VNx2HI VNx4HI ++ VNx2SI ++ ++ In memory they occupy contiguous locations, in the same way as fixed-length ++ vectors. E.g. VNx8QImode is half the size of VNx16QImode. ++ ++ Passing 1 as the final argument ensures that the modes come after all ++ other modes in the GET_MODE_WIDER chain, so that we never pick them ++ in preference to a full vector mode. */ ++VECTOR_MODES_WITH_PREFIX (VNx, INT, 2, 1); ++VECTOR_MODES_WITH_PREFIX (VNx, INT, 4, 1); ++VECTOR_MODES_WITH_PREFIX (VNx, INT, 8, 1); ++ ++ADJUST_NUNITS (VNx2QI, aarch64_sve_vg); ++ADJUST_NUNITS (VNx2HI, aarch64_sve_vg); ++ADJUST_NUNITS (VNx2SI, aarch64_sve_vg); ++ ++ADJUST_NUNITS (VNx4QI, aarch64_sve_vg * 2); ++ADJUST_NUNITS (VNx4HI, aarch64_sve_vg * 2); ++ ++ADJUST_NUNITS (VNx8QI, aarch64_sve_vg * 4); ++ ++ADJUST_ALIGNMENT (VNx2QI, 1); ++ADJUST_ALIGNMENT (VNx4QI, 1); ++ADJUST_ALIGNMENT (VNx8QI, 1); ++ ++ADJUST_ALIGNMENT (VNx2HI, 2); ++ADJUST_ALIGNMENT (VNx4HI, 2); ++ ++ADJUST_ALIGNMENT (VNx2SI, 4); ++ + /* Quad float: 128-bit floating mode for long doubles. */ + FLOAT_MODE (TF, 16, ieee_quad_format); + +diff --git a/gcc/config/aarch64/aarch64-netbsd.h b/gcc/config/aarch64/aarch64-netbsd.h +new file mode 100644 +index 000000000..e6c9264bd +--- /dev/null ++++ b/gcc/config/aarch64/aarch64-netbsd.h +@@ -0,0 +1,63 @@ ++/* Definitions for AArch64 running NetBSD ++ Copyright (C) 2016-2019 Free Software Foundation, Inc. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ GCC is distributed in the hope that it will be useful, but ++ WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING3. If not see ++ . */ ++ ++#ifndef GCC_AARCH64_NETBSD_H ++#define GCC_AARCH64_NETBSD_H ++ ++#define TARGET_LINKER_BIG_EMULATION "aarch64nbsdb" ++#define TARGET_LINKER_LITTLE_EMULATION "aarch64nbsd" ++ ++#if TARGET_BIG_ENDIAN_DEFAULT ++#define TARGET_LINKER_EMULATION TARGET_LINKER_BIG_EMULATION ++#else ++#define TARGET_LINKER_EMULATION TARGET_LINKER_LITTLE_EMULATION ++#endif ++ ++#undef SUBTARGET_EXTRA_LINK_SPEC ++#define SUBTARGET_EXTRA_LINK_SPEC " -m" TARGET_LINKER_EMULATION ++ ++#define NETBSD_ENTRY_POINT "__start" ++ ++#define NETBSD_TARGET_LINK_SPEC "%{h*} " \ ++ "-X %{mbig-endian:-EB -m " TARGET_LINKER_BIG_EMULATION "} " \ ++ "%{mlittle-endian:-EL -m " TARGET_LINKER_LITTLE_EMULATION "} " \ ++ "%(netbsd_link_spec)" ++ ++#undef LINK_SPEC ++#define LINK_SPEC NETBSD_LINK_SPEC_ELF \ ++ NETBSD_TARGET_LINK_SPEC \ ++ AARCH64_ERRATA_LINK_SPEC ++ ++#undef TARGET_OS_CPP_BUILTINS ++#define TARGET_OS_CPP_BUILTINS() \ ++ do \ ++ { \ ++ NETBSD_OS_CPP_BUILTINS_ELF(); \ ++ } \ ++ while (0) ++ ++#undef SUBTARGET_CPP_SPEC ++#define SUBTARGET_CPP_SPEC NETBSD_CPP_SPEC ++ ++#undef EXTRA_SPECS ++#define EXTRA_SPECS \ ++ { "asm_cpu_spec", ASM_CPU_SPEC }, \ ++ NETBSD_SUBTARGET_EXTRA_SPECS ++ ++#endif /* GCC_AARCH64_NETBSD_H */ +diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def +index 010fd3ccf..345cdc4da 100644 +--- a/gcc/config/aarch64/aarch64-option-extensions.def ++++ b/gcc/config/aarch64/aarch64-option-extensions.def +@@ -45,29 +45,46 @@ + entries: aes, pmull, sha1, sha2 being present). In that case this field + should contain a space (" ") separated list of the strings in 'Features' + that are required. Their order is not important. An empty string means +- do not detect this feature during auto detection. */ ++ do not detect this feature during auto detection. + +-/* NOTE: This file is being parsed by config.gcc and so the +- AARCH64_OPT_EXTENSION must adhere to a strict format: +- 1) No space between the AARCH64_OPT_EXTENSION and the opening (. +- 2) No space between the opening ( and the extension name. +- 3) No space after the extension name before the ,. +- 4) Spaces are only allowed after a , and around |. +- 5) Everything must be on one line. */ ++ NOTE: Any changes to the AARCH64_OPT_EXTENSION macro need to be mirrored in ++ config.gcc. */ + + /* Enabling "fp" just enables "fp". + Disabling "fp" also disables "simd", "crypto", "fp16", "aes", "sha2", +- "sha3", sm3/sm4 and "sve". */ +-AARCH64_OPT_EXTENSION("fp", AARCH64_FL_FP, 0, AARCH64_FL_SIMD | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2 | AARCH64_FL_SHA3 | AARCH64_FL_SM4 | AARCH64_FL_SVE, false, "fp") ++ "sha3", sm3/sm4, "sve", "sve2", "sve2-aes", "sve2-sha3", "sve2-sm4", ++ "sve2-bitperm", "i8mm", "f32mm", "f64mm", and "bf16". */ ++AARCH64_OPT_EXTENSION("fp", AARCH64_FL_FP, 0, AARCH64_FL_SIMD | \ ++ AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | \ ++ AARCH64_FL_SHA2 | AARCH64_FL_SHA3 | AARCH64_FL_SM4 | \ ++ AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_SVE2_AES | \ ++ AARCH64_FL_SVE2_SHA3 | AARCH64_FL_SVE2_SM4 | \ ++ AARCH64_FL_SVE2_BITPERM | AARCH64_FL_I8MM | \ ++ AARCH64_FL_F32MM | AARCH64_FL_F64MM | AARCH64_FL_BF16, ++ false, "fp") + + /* Enabling "simd" also enables "fp". + Disabling "simd" also disables "crypto", "dotprod", "aes", "sha2", "sha3", +- "sm3/sm4" and "sve". */ +-AARCH64_OPT_EXTENSION("simd", AARCH64_FL_SIMD, AARCH64_FL_FP, AARCH64_FL_CRYPTO | AARCH64_FL_DOTPROD | AARCH64_FL_AES | AARCH64_FL_SHA2 | AARCH64_FL_SHA3 | AARCH64_FL_SM4 | AARCH64_FL_SVE, false, "asimd") ++ "sm3/sm4", "sve", "sve2", "sve2-aes", "sve2-sha3", "sve2-sm4", ++ "sve2-bitperm", "i8mm", "f32mm" and "f64mm". */ ++AARCH64_OPT_EXTENSION("simd", AARCH64_FL_SIMD, AARCH64_FL_FP, \ ++ AARCH64_FL_CRYPTO | AARCH64_FL_DOTPROD | \ ++ AARCH64_FL_AES | AARCH64_FL_SHA2 | AARCH64_FL_SHA3 | \ ++ AARCH64_FL_SM4 | AARCH64_FL_SVE | AARCH64_FL_SVE2 | \ ++ AARCH64_FL_SVE2_AES | AARCH64_FL_SVE2_SHA3 | \ ++ AARCH64_FL_SVE2_SM4 | AARCH64_FL_SVE2_BITPERM | \ ++ AARCH64_FL_I8MM | AARCH64_FL_F32MM | AARCH64_FL_F64MM, \ ++ false, "asimd") + + /* Enabling "crypto" also enables "fp", "simd", "aes" and "sha2". +- Disabling "crypto" disables "crypto", "aes", "sha2", "sha3" and "sm3/sm4". */ +-AARCH64_OPT_EXTENSION("crypto", AARCH64_FL_CRYPTO, AARCH64_FL_FP | AARCH64_FL_SIMD | AARCH64_FL_AES | AARCH64_FL_SHA2, AARCH64_FL_AES | AARCH64_FL_SHA2 |AARCH64_FL_SHA3 | AARCH64_FL_SM4, true, "aes pmull sha1 sha2") ++ Disabling "crypto" disables "crypto", "aes", "sha2", "sha3" and "sm3/sm4", ++ "sve2-aes", "sve2-sha3", "sve2-sm4". */ ++AARCH64_OPT_EXTENSION("crypto", AARCH64_FL_CRYPTO, AARCH64_FL_FP | \ ++ AARCH64_FL_SIMD | AARCH64_FL_AES | AARCH64_FL_SHA2, \ ++ AARCH64_FL_AES | AARCH64_FL_SHA2 | AARCH64_FL_SHA3 | \ ++ AARCH64_FL_SM4 | AARCH64_FL_SVE2_AES | \ ++ AARCH64_FL_SVE2_SHA3 | AARCH64_FL_SVE2_SM4, true, \ ++ "aes pmull sha1 sha2") + + /* Enabling or disabling "crc" only changes "crc". */ + AARCH64_OPT_EXTENSION("crc", AARCH64_FL_CRC, 0, 0, false, "crc32") +@@ -76,43 +93,63 @@ AARCH64_OPT_EXTENSION("crc", AARCH64_FL_CRC, 0, 0, false, "crc32") + AARCH64_OPT_EXTENSION("lse", AARCH64_FL_LSE, 0, 0, false, "atomics") + + /* Enabling "fp16" also enables "fp". +- Disabling "fp16" disables "fp16", "fp16fml" and "sve". */ +-AARCH64_OPT_EXTENSION("fp16", AARCH64_FL_F16, AARCH64_FL_FP, AARCH64_FL_F16FML | AARCH64_FL_SVE, false, "fphp asimdhp") ++ Disabling "fp16" disables "fp16", "fp16fml", "sve", "sve2", ++ "sve2-aes", "sve2-sha3", "sve2-sm4", "sve2-bitperm", "f32mm" and ++ "f64mm". */ ++AARCH64_OPT_EXTENSION("fp16", AARCH64_FL_F16, AARCH64_FL_FP, \ ++ AARCH64_FL_F16FML | AARCH64_FL_SVE | AARCH64_FL_F32MM | \ ++ AARCH64_FL_F64MM | AARCH64_FL_SVE2 | \ ++ AARCH64_FL_SVE2_AES | AARCH64_FL_SVE2_SHA3 | \ ++ AARCH64_FL_SVE2_SM4 | AARCH64_FL_SVE2_BITPERM, false, \ ++ "fphp asimdhp") + + /* Enabling or disabling "rcpc" only changes "rcpc". */ + AARCH64_OPT_EXTENSION("rcpc", AARCH64_FL_RCPC, 0, 0, false, "lrcpc") + + /* Enabling "rdma" also enables "fp", "simd". + Disabling "rdma" just disables "rdma". */ +-AARCH64_OPT_EXTENSION("rdma", AARCH64_FL_RDMA, AARCH64_FL_FP | AARCH64_FL_SIMD, 0, false, "asimdrdm") ++AARCH64_OPT_EXTENSION("rdma", AARCH64_FL_RDMA, \ ++ AARCH64_FL_FP | AARCH64_FL_SIMD, 0, false, "asimdrdm") + + /* Enabling "dotprod" also enables "simd". + Disabling "dotprod" only disables "dotprod". */ +-AARCH64_OPT_EXTENSION("dotprod", AARCH64_FL_DOTPROD, AARCH64_FL_SIMD, 0, false, "asimddp") ++AARCH64_OPT_EXTENSION("dotprod", AARCH64_FL_DOTPROD, AARCH64_FL_SIMD, 0, \ ++ false, "asimddp") + + /* Enabling "aes" also enables "simd". +- Disabling "aes" just disables "aes". */ +-AARCH64_OPT_EXTENSION("aes", AARCH64_FL_AES, AARCH64_FL_SIMD, 0, false, "aes") ++ Disabling "aes" disables "aes" and "sve2-aes'. */ ++AARCH64_OPT_EXTENSION("aes", AARCH64_FL_AES, AARCH64_FL_SIMD, \ ++ AARCH64_FL_SVE2_AES, false, "aes") + + /* Enabling "sha2" also enables "simd". + Disabling "sha2" just disables "sha2". */ +-AARCH64_OPT_EXTENSION("sha2", AARCH64_FL_SHA2, AARCH64_FL_SIMD, 0, false, "sha1 sha2") ++AARCH64_OPT_EXTENSION("sha2", AARCH64_FL_SHA2, AARCH64_FL_SIMD, 0, false, \ ++ "sha1 sha2") + + /* Enabling "sha3" enables "simd" and "sha2". +- Disabling "sha3" just disables "sha3". */ +-AARCH64_OPT_EXTENSION("sha3", AARCH64_FL_SHA3, AARCH64_FL_SIMD | AARCH64_FL_SHA2, 0, false, "sha3 sha512") ++ Disabling "sha3" disables "sha3" and "sve2-sha3". */ ++AARCH64_OPT_EXTENSION("sha3", AARCH64_FL_SHA3, AARCH64_FL_SIMD | \ ++ AARCH64_FL_SHA2, AARCH64_FL_SVE2_SHA3, false, \ ++ "sha3 sha512") + + /* Enabling "sm4" also enables "simd". +- Disabling "sm4" just disables "sm4". */ +-AARCH64_OPT_EXTENSION("sm4", AARCH64_FL_SM4, AARCH64_FL_SIMD, 0, false, "sm3 sm4") ++ Disabling "sm4" disables "sm4" and "sve2-sm4". */ ++AARCH64_OPT_EXTENSION("sm4", AARCH64_FL_SM4, AARCH64_FL_SIMD, \ ++ AARCH64_FL_SVE2_SM4, false, "sm3 sm4") + + /* Enabling "fp16fml" also enables "fp" and "fp16". + Disabling "fp16fml" just disables "fp16fml". */ +-AARCH64_OPT_EXTENSION("fp16fml", AARCH64_FL_F16FML, AARCH64_FL_FP | AARCH64_FL_F16, 0, false, "asimdfhm") ++AARCH64_OPT_EXTENSION("fp16fml", AARCH64_FL_F16FML, \ ++ AARCH64_FL_FP | AARCH64_FL_F16, 0, false, "asimdfhm") + + /* Enabling "sve" also enables "fp16", "fp" and "simd". +- Disabling "sve" just disables "sve". */ +-AARCH64_OPT_EXTENSION("sve", AARCH64_FL_SVE, AARCH64_FL_FP | AARCH64_FL_SIMD | AARCH64_FL_F16, 0, false, "sve") ++ Disabling "sve" disables "sve", "f32mm", "f64mm", "sve2", "sve2-aes", ++ "sve2-sha3", "sve2-sm4" and "sve2-bitperm". */ ++AARCH64_OPT_EXTENSION("sve", AARCH64_FL_SVE, AARCH64_FL_FP | AARCH64_FL_SIMD | \ ++ AARCH64_FL_F16, AARCH64_FL_F32MM | AARCH64_FL_F64MM | \ ++ AARCH64_FL_SVE2 | AARCH64_FL_SVE2_AES | \ ++ AARCH64_FL_SVE2_SHA3 | AARCH64_FL_SVE2_SM4 | \ ++ AARCH64_FL_SVE2_BITPERM, false, "sve") + + /* Enabling/Disabling "profile" does not enable/disable any other feature. */ + AARCH64_OPT_EXTENSION("profile", AARCH64_FL_PROFILE, 0, 0, false, "") +@@ -124,12 +161,69 @@ AARCH64_OPT_EXTENSION("rng", AARCH64_FL_RNG, 0, 0, false, "") + AARCH64_OPT_EXTENSION("memtag", AARCH64_FL_MEMTAG, 0, 0, false, "") + + /* Enabling/Disabling "sb" only changes "sb". */ +-AARCH64_OPT_EXTENSION("sb", AARCH64_FL_SB, 0, 0, false, "") ++AARCH64_OPT_EXTENSION("sb", AARCH64_FL_SB, 0, 0, false, "sb") + + /* Enabling/Disabling "ssbs" only changes "ssbs". */ +-AARCH64_OPT_EXTENSION("ssbs", AARCH64_FL_SSBS, 0, 0, false, "") ++AARCH64_OPT_EXTENSION("ssbs", AARCH64_FL_SSBS, 0, 0, false, "ssbs") + + /* Enabling/Disabling "predres" only changes "predres". */ + AARCH64_OPT_EXTENSION("predres", AARCH64_FL_PREDRES, 0, 0, false, "") + ++/* Enabling "sve2" also enables "sve", "fp16", "fp", and "simd". ++ Disabling "sve2" disables "sve2", "sve2-aes", "sve2-sha3", "sve2-sm4", and ++ "sve2-bitperm". */ ++AARCH64_OPT_EXTENSION("sve2", AARCH64_FL_SVE2, AARCH64_FL_SVE | \ ++ AARCH64_FL_FP | AARCH64_FL_SIMD | AARCH64_FL_F16, \ ++ AARCH64_FL_SVE2_AES | AARCH64_FL_SVE2_SHA3 | \ ++ AARCH64_FL_SVE2_SM4 | AARCH64_FL_SVE2_BITPERM, false, "sve2") ++ ++/* Enabling "sve2-sm4" also enables "sm4", "simd", "fp16", "fp", "sve", and ++ "sve2". Disabling "sve2-sm4" just disables "sve2-sm4". */ ++AARCH64_OPT_EXTENSION("sve2-sm4", AARCH64_FL_SVE2_SM4, AARCH64_FL_SM4 | \ ++ AARCH64_FL_SIMD | AARCH64_FL_F16 | AARCH64_FL_FP | \ ++ AARCH64_FL_SVE | AARCH64_FL_SVE2, 0, false, "svesm4") ++ ++/* Enabling "sve2-aes" also enables "aes", "simd", "fp16", "fp", "sve", and ++ "sve2". Disabling "sve2-aes" just disables "sve2-aes". */ ++AARCH64_OPT_EXTENSION("sve2-aes", AARCH64_FL_SVE2_AES, AARCH64_FL_AES | \ ++ AARCH64_FL_SIMD | AARCH64_FL_F16 | AARCH64_FL_FP | \ ++ AARCH64_FL_SVE | AARCH64_FL_SVE2, 0, false, "sveaes") ++ ++/* Enabling "sve2-sha3" also enables "sha3", "simd", "fp16", "fp", "sve", and ++ "sve2". Disabling "sve2-sha3" just disables "sve2-sha3". */ ++AARCH64_OPT_EXTENSION("sve2-sha3", AARCH64_FL_SVE2_SHA3, AARCH64_FL_SHA3 | \ ++ AARCH64_FL_SIMD | AARCH64_FL_F16 | AARCH64_FL_FP | \ ++ AARCH64_FL_SVE | AARCH64_FL_SVE2, 0, false, "svesha3") ++ ++/* Enabling "sve2-bitperm" also enables "simd", "fp16", "fp", "sve", and ++ "sve2". Disabling "sve2-bitperm" just disables "sve2-bitperm". */ ++AARCH64_OPT_EXTENSION("sve2-bitperm", AARCH64_FL_SVE2_BITPERM, AARCH64_FL_SIMD | \ ++ AARCH64_FL_F16 | AARCH64_FL_FP | AARCH64_FL_SVE | \ ++ AARCH64_FL_SVE2, 0, false, "svebitperm") ++ ++/* Enabling or disabling "tme" only changes "tme". */ ++AARCH64_OPT_EXTENSION("tme", AARCH64_FL_TME, 0, 0, false, "") ++ ++/* Enabling "i8mm" also enables "simd" and "fp". ++ Disabling "i8mm" only disables "i8mm". */ ++AARCH64_OPT_EXTENSION("i8mm", AARCH64_FL_I8MM, \ ++ AARCH64_FL_SIMD | AARCH64_FL_FP, 0, false, "i8mm") ++ ++/* Enabling "f32mm" also enables "sve", "fp16", "fp", and "simd". ++ Disabling "f32mm" only disables "f32mm". */ ++AARCH64_OPT_EXTENSION("f32mm", AARCH64_FL_F32MM, \ ++ AARCH64_FL_SVE | AARCH64_FL_F16 | AARCH64_FL_FP | \ ++ AARCH64_FL_SIMD, 0, false, "f32mm") ++ ++/* Enabling "f64mm" also enables "sve", "fp16", "fp", and "simd". ++ Disabling "f64mm" only disables "f64mm". */ ++AARCH64_OPT_EXTENSION("f64mm", AARCH64_FL_F64MM, \ ++ AARCH64_FL_SVE | AARCH64_FL_F16 | AARCH64_FL_FP | \ ++ AARCH64_FL_SIMD, 0, false, "f64mm") ++ ++/* Enabling "bf16" also enables "simd" and "fp". ++ Disabling "bf16" only disables "bf16". */ ++AARCH64_OPT_EXTENSION("bf16", AARCH64_FL_BF16, \ ++ AARCH64_FL_SIMD | AARCH64_FL_FP, 0, false, "bf16") ++ + #undef AARCH64_OPT_EXTENSION +diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h +index 994bcfc7e..5e0a499e8 100644 +--- a/gcc/config/aarch64/aarch64-protos.h ++++ b/gcc/config/aarch64/aarch64-protos.h +@@ -396,8 +396,81 @@ enum simd_immediate_check { + AARCH64_CHECK_MOV = AARCH64_CHECK_ORR | AARCH64_CHECK_BIC + }; + ++/* The key type that -msign-return-address should use. */ ++enum aarch64_key_type { ++ AARCH64_KEY_A, ++ AARCH64_KEY_B ++}; ++ ++extern enum aarch64_key_type aarch64_ra_sign_key; ++ + extern struct tune_params aarch64_tune_params; + ++/* The available SVE predicate patterns, known in the ACLE as "svpattern". */ ++#define AARCH64_FOR_SVPATTERN(T) \ ++ T (POW2, pow2, 0) \ ++ T (VL1, vl1, 1) \ ++ T (VL2, vl2, 2) \ ++ T (VL3, vl3, 3) \ ++ T (VL4, vl4, 4) \ ++ T (VL5, vl5, 5) \ ++ T (VL6, vl6, 6) \ ++ T (VL7, vl7, 7) \ ++ T (VL8, vl8, 8) \ ++ T (VL16, vl16, 9) \ ++ T (VL32, vl32, 10) \ ++ T (VL64, vl64, 11) \ ++ T (VL128, vl128, 12) \ ++ T (VL256, vl256, 13) \ ++ T (MUL4, mul4, 29) \ ++ T (MUL3, mul3, 30) \ ++ T (ALL, all, 31) ++ ++/* The available SVE prefetch operations, known in the ACLE as "svprfop". */ ++#define AARCH64_FOR_SVPRFOP(T) \ ++ T (PLDL1KEEP, pldl1keep, 0) \ ++ T (PLDL1STRM, pldl1strm, 1) \ ++ T (PLDL2KEEP, pldl2keep, 2) \ ++ T (PLDL2STRM, pldl2strm, 3) \ ++ T (PLDL3KEEP, pldl3keep, 4) \ ++ T (PLDL3STRM, pldl3strm, 5) \ ++ T (PSTL1KEEP, pstl1keep, 8) \ ++ T (PSTL1STRM, pstl1strm, 9) \ ++ T (PSTL2KEEP, pstl2keep, 10) \ ++ T (PSTL2STRM, pstl2strm, 11) \ ++ T (PSTL3KEEP, pstl3keep, 12) \ ++ T (PSTL3STRM, pstl3strm, 13) ++ ++#define AARCH64_SVENUM(UPPER, LOWER, VALUE) AARCH64_SV_##UPPER = VALUE, ++enum aarch64_svpattern { ++ AARCH64_FOR_SVPATTERN (AARCH64_SVENUM) ++ AARCH64_NUM_SVPATTERNS ++}; ++ ++enum aarch64_svprfop { ++ AARCH64_FOR_SVPRFOP (AARCH64_SVENUM) ++ AARCH64_NUM_SVPRFOPS ++}; ++#undef AARCH64_SVENUM ++ ++/* It's convenient to divide the built-in function codes into groups, ++ rather than having everything in a single enum. This type enumerates ++ those groups. */ ++enum aarch64_builtin_class ++{ ++ AARCH64_BUILTIN_GENERAL, ++ AARCH64_BUILTIN_SVE ++}; ++ ++/* Built-in function codes are structured so that the low ++ AARCH64_BUILTIN_SHIFT bits contain the aarch64_builtin_class ++ and the upper bits contain a group-specific subcode. */ ++const unsigned int AARCH64_BUILTIN_SHIFT = 1; ++ ++/* Mask that selects the aarch64_builtin_class part of a function code. */ ++const unsigned int AARCH64_BUILTIN_CLASS = (1 << AARCH64_BUILTIN_SHIFT) - 1; ++ ++void aarch64_post_cfi_startproc (void); + poly_int64 aarch64_initial_elimination_offset (unsigned, unsigned); + int aarch64_get_condition_code (rtx); + bool aarch64_address_valid_for_prefetch_p (rtx, bool); +@@ -407,6 +480,8 @@ unsigned HOST_WIDE_INT aarch64_and_split_imm2 (HOST_WIDE_INT val_in); + bool aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode); + int aarch64_branch_cost (bool, bool); + enum aarch64_symbol_type aarch64_classify_symbolic_expression (rtx); ++opt_machine_mode aarch64_vq_mode (scalar_mode); ++opt_machine_mode aarch64_full_sve_mode (scalar_mode); + bool aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode); + bool aarch64_const_vec_all_same_int_p (rtx, HOST_WIDE_INT); + bool aarch64_const_vec_all_same_in_range_p (rtx, HOST_WIDE_INT, +@@ -414,14 +489,13 @@ bool aarch64_const_vec_all_same_in_range_p (rtx, HOST_WIDE_INT, + bool aarch64_constant_address_p (rtx); + bool aarch64_emit_approx_div (rtx, rtx, rtx); + bool aarch64_emit_approx_sqrt (rtx, rtx, bool); +-void aarch64_expand_call (rtx, rtx, bool); +-bool aarch64_expand_movmem (rtx *); ++void aarch64_expand_call (rtx, rtx, rtx, bool); ++bool aarch64_expand_cpymem (rtx *); + bool aarch64_float_const_zero_rtx_p (rtx); + bool aarch64_float_const_rtx_p (rtx); + bool aarch64_function_arg_regno_p (unsigned); + bool aarch64_fusion_enabled_p (enum aarch64_fusion_pairs); +-bool aarch64_gen_movmemqi (rtx *); +-bool aarch64_gimple_fold_builtin (gimple_stmt_iterator *); ++bool aarch64_gen_cpymemqi (rtx *); + bool aarch64_is_extend_from_extract (scalar_int_mode, rtx, rtx); + bool aarch64_is_long_call_p (rtx); + bool aarch64_is_noplt_call_p (rtx); +@@ -436,24 +510,32 @@ bool aarch64_masks_and_shift_for_bfi_p (scalar_int_mode, unsigned HOST_WIDE_INT, + unsigned HOST_WIDE_INT); + bool aarch64_zero_extend_const_eq (machine_mode, rtx, machine_mode, rtx); + bool aarch64_move_imm (HOST_WIDE_INT, machine_mode); ++machine_mode aarch64_sve_int_mode (machine_mode); + opt_machine_mode aarch64_sve_pred_mode (unsigned int); ++opt_machine_mode aarch64_sve_data_mode (scalar_mode, poly_uint64); ++bool aarch64_sve_mode_p (machine_mode); ++HOST_WIDE_INT aarch64_fold_sve_cnt_pat (aarch64_svpattern, unsigned int); + bool aarch64_sve_cnt_immediate_p (rtx); ++bool aarch64_sve_scalar_inc_dec_immediate_p (rtx); + bool aarch64_sve_addvl_addpl_immediate_p (rtx); +-bool aarch64_sve_inc_dec_immediate_p (rtx); ++bool aarch64_sve_vector_inc_dec_immediate_p (rtx); + int aarch64_add_offset_temporaries (rtx); + void aarch64_split_add_offset (scalar_int_mode, rtx, rtx, rtx, rtx, rtx); + bool aarch64_mov_operand_p (rtx, machine_mode); + rtx aarch64_reverse_mask (machine_mode, unsigned int); + bool aarch64_offset_7bit_signed_scaled_p (machine_mode, poly_int64); + bool aarch64_offset_9bit_signed_unscaled_p (machine_mode, poly_int64); ++char *aarch64_output_sve_prefetch (const char *, rtx, const char *); + char *aarch64_output_sve_cnt_immediate (const char *, const char *, rtx); +-char *aarch64_output_sve_addvl_addpl (rtx, rtx, rtx); +-char *aarch64_output_sve_inc_dec_immediate (const char *, rtx); ++char *aarch64_output_sve_cnt_pat_immediate (const char *, const char *, rtx *); ++char *aarch64_output_sve_scalar_inc_dec (rtx); ++char *aarch64_output_sve_addvl_addpl (rtx); ++char *aarch64_output_sve_vector_inc_dec (const char *, rtx); + char *aarch64_output_scalar_simd_mov_immediate (rtx, scalar_int_mode); + char *aarch64_output_simd_mov_immediate (rtx, unsigned, + enum simd_immediate_check w = AARCH64_CHECK_MOV); + char *aarch64_output_sve_mov_immediate (rtx); +-char *aarch64_output_ptrue (machine_mode, char); ++char *aarch64_output_sve_ptrues (rtx); + bool aarch64_pad_reg_upward (machine_mode, const_tree, bool); + bool aarch64_regno_ok_for_base_p (int, bool); + bool aarch64_regno_ok_for_index_p (int, bool); +@@ -462,11 +544,13 @@ bool aarch64_simd_check_vect_par_cnst_half (rtx op, machine_mode mode, + bool high); + bool aarch64_simd_scalar_immediate_valid_for_move (rtx, scalar_int_mode); + bool aarch64_simd_shift_imm_p (rtx, machine_mode, bool); ++bool aarch64_sve_ptrue_svpattern_p (rtx, struct simd_immediate_info *); + bool aarch64_simd_valid_immediate (rtx, struct simd_immediate_info *, + enum simd_immediate_check w = AARCH64_CHECK_MOV); + rtx aarch64_check_zero_based_sve_index_immediate (rtx); + bool aarch64_sve_index_immediate_p (rtx); + bool aarch64_sve_arith_immediate_p (rtx, bool); ++bool aarch64_sve_sqadd_sqsub_immediate_p (rtx, bool); + bool aarch64_sve_bitmask_immediate_p (rtx); + bool aarch64_sve_dup_immediate_p (rtx); + bool aarch64_sve_cmp_immediate_p (rtx, bool); +@@ -476,15 +560,15 @@ bool aarch64_split_dimode_const_store (rtx, rtx); + bool aarch64_symbolic_address_p (rtx); + bool aarch64_uimm12_shift (HOST_WIDE_INT); + bool aarch64_use_return_insn_p (void); +-bool aarch64_use_simple_return_insn_p (void); +-const char *aarch64_mangle_builtin_type (const_tree); + const char *aarch64_output_casesi (rtx *); + ++unsigned int aarch64_tlsdesc_abi_id (); + enum aarch64_symbol_type aarch64_classify_symbol (rtx, HOST_WIDE_INT); + enum aarch64_symbol_type aarch64_classify_tls_symbol (rtx); + enum reg_class aarch64_regno_regclass (unsigned); + int aarch64_asm_preferred_eh_data_format (int, int); + int aarch64_fpconst_pow_of_2 (rtx); ++int aarch64_fpconst_pow2_recip (rtx); + machine_mode aarch64_hard_regno_caller_save_mode (unsigned, unsigned, + machine_mode); + int aarch64_uxt_size (int, HOST_WIDE_INT); +@@ -496,13 +580,17 @@ rtx aarch64_return_addr (int, rtx); + rtx aarch64_simd_gen_const_vector_dup (machine_mode, HOST_WIDE_INT); + bool aarch64_simd_mem_operand_p (rtx); + bool aarch64_sve_ld1r_operand_p (rtx); ++bool aarch64_sve_ld1rq_operand_p (rtx); ++bool aarch64_sve_ld1ro_operand_p (rtx, scalar_mode); ++bool aarch64_sve_ldff1_operand_p (rtx); ++bool aarch64_sve_ldnf1_operand_p (rtx); + bool aarch64_sve_ldr_operand_p (rtx); ++bool aarch64_sve_prefetch_operand_p (rtx, machine_mode); + bool aarch64_sve_struct_memory_operand_p (rtx); + rtx aarch64_simd_vect_par_cnst_half (machine_mode, int, bool); + rtx aarch64_gen_stepped_int_parallel (unsigned int, int, int); + bool aarch64_stepped_int_parallel_p (rtx, int); + rtx aarch64_tls_get_addr (void); +-tree aarch64_fold_builtin (tree, int, tree *, bool); + unsigned aarch64_dbx_register_number (unsigned); + unsigned aarch64_trampoline_size (void); + void aarch64_asm_output_labelref (FILE *, const char *); +@@ -512,7 +600,15 @@ const char * aarch64_output_probe_stack_range (rtx, rtx); + const char * aarch64_output_probe_sve_stack_clash (rtx, rtx, rtx, rtx); + void aarch64_err_no_fpadvsimd (machine_mode); + void aarch64_expand_epilogue (bool); +-void aarch64_expand_mov_immediate (rtx, rtx, rtx (*) (rtx, rtx) = 0); ++rtx aarch64_ptrue_all (unsigned int); ++opt_machine_mode aarch64_ptrue_all_mode (rtx); ++rtx aarch64_convert_sve_data_to_pred (rtx, machine_mode, rtx); ++rtx aarch64_expand_sve_dupq (rtx, machine_mode, rtx); ++void aarch64_expand_mov_immediate (rtx, rtx); ++rtx aarch64_ptrue_reg (machine_mode); ++rtx aarch64_pfalse_reg (machine_mode); ++bool aarch64_sve_pred_dominates_p (rtx *, rtx); ++bool aarch64_sve_same_pred_for_ptest_p (rtx *, rtx *); + void aarch64_emit_sve_pred_move (rtx, rtx, rtx); + void aarch64_expand_sve_mem_move (rtx, rtx, machine_mode); + bool aarch64_maybe_expand_sve_subreg_move (rtx, rtx); +@@ -520,8 +616,9 @@ rtx aarch64_replace_reg_mode (rtx, machine_mode); + void aarch64_split_sve_subreg_move (rtx, rtx, rtx); + void aarch64_expand_prologue (void); + void aarch64_expand_vector_init (rtx, rtx); ++void aarch64_sve_expand_vector_init (rtx, rtx); + void aarch64_init_cumulative_args (CUMULATIVE_ARGS *, const_tree, rtx, +- const_tree, unsigned); ++ const_tree, unsigned, bool = false); + void aarch64_init_expanders (void); + void aarch64_init_simd_builtins (void); + void aarch64_emit_call_insn (rtx); +@@ -587,22 +684,39 @@ bool aarch64_gen_adjusted_ldpstp (rtx *, bool, scalar_mode, RTX_CODE); + void aarch64_expand_sve_vec_cmp_int (rtx, rtx_code, rtx, rtx); + bool aarch64_expand_sve_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool); + void aarch64_expand_sve_vcond (machine_mode, machine_mode, rtx *); +-#endif /* RTX_CODE */ + +-void aarch64_init_builtins (void); ++bool aarch64_prepare_sve_int_fma (rtx *, rtx_code); ++bool aarch64_prepare_sve_cond_int_fma (rtx *, rtx_code); ++#endif /* RTX_CODE */ + + bool aarch64_process_target_attr (tree); + void aarch64_override_options_internal (struct gcc_options *); + +-rtx aarch64_expand_builtin (tree exp, +- rtx target, +- rtx subtarget ATTRIBUTE_UNUSED, +- machine_mode mode ATTRIBUTE_UNUSED, +- int ignore ATTRIBUTE_UNUSED); +-tree aarch64_builtin_decl (unsigned, bool ATTRIBUTE_UNUSED); +-tree aarch64_builtin_rsqrt (unsigned int); ++const char *aarch64_general_mangle_builtin_type (const_tree); ++void aarch64_general_init_builtins (void); ++tree aarch64_general_fold_builtin (unsigned int, tree, unsigned int, tree *); ++gimple *aarch64_general_gimple_fold_builtin (unsigned int, gcall *); ++rtx aarch64_general_expand_builtin (unsigned int, tree, rtx, int); ++tree aarch64_general_builtin_decl (unsigned, bool); ++tree aarch64_general_builtin_rsqrt (unsigned int); + tree aarch64_builtin_vectorized_function (unsigned int, tree, tree); + ++namespace aarch64_sve { ++ void init_builtins (); ++ void handle_arm_sve_h (); ++ tree builtin_decl (unsigned, bool); ++ bool builtin_type_p (const_tree); ++ bool svbool_type_p (const_tree); ++ unsigned int nvectors_if_data_type (const_tree); ++ const char *mangle_builtin_type (const_tree); ++ tree resolve_overloaded_builtin (location_t, unsigned int, ++ vec *); ++ bool check_builtin_call (location_t, vec, unsigned int, ++ tree, unsigned int, tree *); ++ gimple *gimple_fold_builtin (unsigned int, gimple_stmt_iterator *, gcall *); ++ rtx expand_builtin (unsigned int, tree, rtx); ++} ++ + extern void aarch64_split_combinev16qi (rtx operands[3]); + extern void aarch64_expand_vec_perm (rtx, rtx, rtx, rtx, unsigned int); + extern void aarch64_expand_sve_vec_perm (rtx, rtx, rtx, rtx); +@@ -629,11 +743,10 @@ bool aarch64_handle_option (struct gcc_options *, struct gcc_options *, + const struct cl_decoded_option *, location_t); + const char *aarch64_rewrite_selected_cpu (const char *name); + enum aarch64_parse_opt_result aarch64_parse_extension (const char *, +- unsigned long *, ++ uint64_t *, + std::string *); + void aarch64_get_all_extension_candidates (auto_vec *candidates); +-std::string aarch64_get_extension_string_for_isa_flags (unsigned long, +- unsigned long); ++std::string aarch64_get_extension_string_for_isa_flags (uint64_t, uint64_t); + + /* Defined in aarch64-d.c */ + extern void aarch64_d_target_versions (void); +@@ -647,4 +760,17 @@ poly_uint64 aarch64_regmode_natural_size (machine_mode); + + bool aarch64_high_bits_all_ones_p (HOST_WIDE_INT); + ++struct atomic_ool_names ++{ ++ const char *str[5][4]; ++}; ++ ++rtx aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx, ++ const atomic_ool_names *names); ++extern const atomic_ool_names aarch64_ool_swp_names; ++extern const atomic_ool_names aarch64_ool_ldadd_names; ++extern const atomic_ool_names aarch64_ool_ldset_names; ++extern const atomic_ool_names aarch64_ool_ldclr_names; ++extern const atomic_ool_names aarch64_ool_ldeor_names; ++ + #endif /* GCC_AARCH64_PROTOS_H */ +diff --git a/gcc/config/aarch64/aarch64-simd-builtin-types.def b/gcc/config/aarch64/aarch64-simd-builtin-types.def +index b01569429..2be0ce824 100644 +--- a/gcc/config/aarch64/aarch64-simd-builtin-types.def ++++ b/gcc/config/aarch64/aarch64-simd-builtin-types.def +@@ -50,3 +50,5 @@ + ENTRY (Float32x4_t, V4SF, none, 13) + ENTRY (Float64x1_t, V1DF, none, 13) + ENTRY (Float64x2_t, V2DF, none, 13) ++ ENTRY (Bfloat16x4_t, V4BF, none, 14) ++ ENTRY (Bfloat16x8_t, V8BF, none, 14) +diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def +index 17bb0c486..d0fe4e7c8 100644 +--- a/gcc/config/aarch64/aarch64-simd-builtins.def ++++ b/gcc/config/aarch64/aarch64-simd-builtins.def +@@ -212,10 +212,15 @@ + /* Implemented by aarch64_{_lane}{q}. */ + BUILTIN_VB (TERNOP, sdot, 0) + BUILTIN_VB (TERNOPU, udot, 0) ++ BUILTIN_VB (TERNOP_SSUS, usdot, 0) + BUILTIN_VB (QUADOP_LANE, sdot_lane, 0) + BUILTIN_VB (QUADOPU_LANE, udot_lane, 0) + BUILTIN_VB (QUADOP_LANE, sdot_laneq, 0) + BUILTIN_VB (QUADOPU_LANE, udot_laneq, 0) ++ BUILTIN_VB (QUADOPSSUS_LANE_QUADTUP, usdot_lane, 0) ++ BUILTIN_VB (QUADOPSSUS_LANE_QUADTUP, usdot_laneq, 0) ++ BUILTIN_VB (QUADOPSSSU_LANE_QUADTUP, sudot_lane, 0) ++ BUILTIN_VB (QUADOPSSSU_LANE_QUADTUP, sudot_laneq, 0) + + /* Implemented by aarch64_fcadd. */ + BUILTIN_VHSDF (BINOP, fcadd90, 0) +@@ -424,7 +429,7 @@ + BUILTIN_VB (UNOP, rbit, 0) + + /* Implemented by +- aarch64_. */ ++ aarch64_. */ + BUILTIN_VALL (BINOP, zip1, 0) + BUILTIN_VALL (BINOP, zip2, 0) + BUILTIN_VALL (BINOP, uzp1, 0) +@@ -465,12 +470,18 @@ + /* Implemented by aarch64_ld1x3. */ + BUILTIN_VALLDIF (LOADSTRUCT, ld1x3, 0) + ++ /* Implemented by aarch64_ld1x4. */ ++ BUILTIN_VALLDIF (LOADSTRUCT, ld1x4, 0) ++ + /* Implemented by aarch64_st1x2. */ + BUILTIN_VALLDIF (STORESTRUCT, st1x2, 0) + + /* Implemented by aarch64_st1x3. */ + BUILTIN_VALLDIF (STORESTRUCT, st1x3, 0) + ++ /* Implemented by aarch64_st1x4. */ ++ BUILTIN_VALLDIF (STORESTRUCT, st1x4, 0) ++ + /* Implemented by fma4. */ + BUILTIN_VHSDF (TERNOP, fma, 4) + VAR1 (TERNOP, fma, 4, hf) +@@ -670,3 +681,36 @@ + /* Implemented by aarch64_fmllq_laneq_highv4sf. */ + VAR1 (QUADOP_LANE, fmlalq_laneq_high, 0, v4sf) + VAR1 (QUADOP_LANE, fmlslq_laneq_high, 0, v4sf) ++ ++ /* Implemented by aarch64_. */ ++ BUILTIN_VSFDF (UNOP, frint32z, 0) ++ BUILTIN_VSFDF (UNOP, frint32x, 0) ++ BUILTIN_VSFDF (UNOP, frint64z, 0) ++ BUILTIN_VSFDF (UNOP, frint64x, 0) ++ ++ /* Implemented by aarch64_bfdot{_lane}{q}. */ ++ VAR2 (TERNOP, bfdot, 0, v2sf, v4sf) ++ VAR2 (QUADOP_LANE_PAIR, bfdot_lane, 0, v2sf, v4sf) ++ VAR2 (QUADOP_LANE_PAIR, bfdot_laneq, 0, v2sf, v4sf) ++ ++ /* Implemented by aarch64_bfmmlaqv4sf */ ++ VAR1 (TERNOP, bfmmlaq, 0, v4sf) ++ ++ /* Implemented by aarch64_bfmlal{_lane{q}}v4sf */ ++ VAR1 (TERNOP, bfmlalb, 0, v4sf) ++ VAR1 (TERNOP, bfmlalt, 0, v4sf) ++ VAR1 (QUADOP_LANE, bfmlalb_lane, 0, v4sf) ++ VAR1 (QUADOP_LANE, bfmlalt_lane, 0, v4sf) ++ VAR1 (QUADOP_LANE, bfmlalb_lane_q, 0, v4sf) ++ VAR1 (QUADOP_LANE, bfmlalt_lane_q, 0, v4sf) ++ ++ /* Implemented by aarch64_simd_mmlav16qi. */ ++ VAR1 (TERNOP, simd_smmla, 0, v16qi) ++ VAR1 (TERNOPU, simd_ummla, 0, v16qi) ++ VAR1 (TERNOP_SSUS, simd_usmmla, 0, v16qi) ++ ++ /* Implemented by aarch64_bfcvtn{q}{2} */ ++ VAR1 (UNOP, bfcvtn, 0, v4bf) ++ VAR1 (UNOP, bfcvtn_q, 0, v8bf) ++ VAR1 (BINOP, bfcvtn2, 0, v8bf) ++ VAR1 (UNOP, bfcvt, 0, bf) +diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md +index 29ca37c65..137c88da1 100644 +--- a/gcc/config/aarch64/aarch64-simd.md ++++ b/gcc/config/aarch64/aarch64-simd.md +@@ -19,8 +19,8 @@ + ;; . + + (define_expand "mov" +- [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "") +- (match_operand:VALL_F16 1 "general_operand" ""))] ++ [(set (match_operand:VALL_F16MOV 0 "nonimmediate_operand") ++ (match_operand:VALL_F16MOV 1 "general_operand"))] + "TARGET_SIMD" + " + /* Force the operand into a register if it is not an +@@ -39,8 +39,8 @@ + ) + + (define_expand "movmisalign" +- [(set (match_operand:VALL 0 "nonimmediate_operand" "") +- (match_operand:VALL 1 "general_operand" ""))] ++ [(set (match_operand:VALL 0 "nonimmediate_operand") ++ (match_operand:VALL 1 "general_operand"))] + "TARGET_SIMD" + { + /* This pattern is not permitted to fail during expansion: if both arguments +@@ -101,10 +101,10 @@ + [(set_attr "type" "neon_dup")] + ) + +-(define_insn "*aarch64_simd_mov" +- [(set (match_operand:VD 0 "nonimmediate_operand" ++(define_insn "*aarch64_simd_mov" ++ [(set (match_operand:VDMOV 0 "nonimmediate_operand" + "=w, m, m, w, ?r, ?w, ?r, w") +- (match_operand:VD 1 "general_operand" ++ (match_operand:VDMOV 1 "general_operand" + "m, Dz, w, w, w, r, r, Dn"))] + "TARGET_SIMD + && (register_operand (operands[0], mode) +@@ -129,10 +129,10 @@ + mov_reg, neon_move")] + ) + +-(define_insn "*aarch64_simd_mov" +- [(set (match_operand:VQ 0 "nonimmediate_operand" ++(define_insn "*aarch64_simd_mov" ++ [(set (match_operand:VQMOV 0 "nonimmediate_operand" + "=w, Umn, m, w, ?r, ?w, ?r, w") +- (match_operand:VQ 1 "general_operand" ++ (match_operand:VQMOV 1 "general_operand" + "m, Dz, w, w, w, r, r, Dn"))] + "TARGET_SIMD + && (register_operand (operands[0], mode) +@@ -234,8 +234,8 @@ + + + (define_split +- [(set (match_operand:VQ 0 "register_operand" "") +- (match_operand:VQ 1 "register_operand" ""))] ++ [(set (match_operand:VQMOV 0 "register_operand" "") ++ (match_operand:VQMOV 1 "register_operand" ""))] + "TARGET_SIMD && reload_completed + && GP_REGNUM_P (REGNO (operands[0])) + && GP_REGNUM_P (REGNO (operands[1]))" +@@ -246,8 +246,8 @@ + }) + + (define_split +- [(set (match_operand:VQ 0 "register_operand" "") +- (match_operand:VQ 1 "register_operand" ""))] ++ [(set (match_operand:VQMOV 0 "register_operand" "") ++ (match_operand:VQMOV 1 "register_operand" ""))] + "TARGET_SIMD && reload_completed + && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1]))) + || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))" +@@ -258,8 +258,8 @@ + }) + + (define_expand "@aarch64_split_simd_mov" +- [(set (match_operand:VQ 0) +- (match_operand:VQ 1))] ++ [(set (match_operand:VQMOV 0) ++ (match_operand:VQMOV 1))] + "TARGET_SIMD" + { + rtx dst = operands[0]; +@@ -520,6 +520,20 @@ + [(set_attr "type" "neon_dot")] + ) + ++;; These instructions map to the __builtins for the armv8.6a I8MM usdot ++;; (vector) Dot Product operation. ++(define_insn "aarch64_usdot" ++ [(set (match_operand:VS 0 "register_operand" "=w") ++ (plus:VS ++ (unspec:VS [(match_operand: 2 "register_operand" "w") ++ (match_operand: 3 "register_operand" "w")] ++ UNSPEC_USDOT) ++ (match_operand:VS 1 "register_operand" "0")))] ++ "TARGET_I8MM" ++ "usdot\\t%0., %2., %3." ++ [(set_attr "type" "neon_dot")] ++) ++ + ;; These expands map to the Dot Product optab the vectorizer checks for. + ;; The auto-vectorizer expects a dot product builtin that also does an + ;; accumulation into the provided register. +@@ -587,6 +601,26 @@ + [(set_attr "type" "neon_dot")] + ) + ++;; These instructions map to the __builtins for the armv8.6a I8MM usdot, sudot ++;; (by element) Dot Product operations. ++(define_insn "aarch64_dot_lane" ++ [(set (match_operand:VS 0 "register_operand" "=w") ++ (plus:VS ++ (unspec:VS [(match_operand: 2 "register_operand" "w") ++ (match_operand:VB 3 "register_operand" "w") ++ (match_operand:SI 4 "immediate_operand" "i")] ++ DOTPROD_I8MM) ++ (match_operand:VS 1 "register_operand" "0")))] ++ "TARGET_I8MM" ++ { ++ int nunits = GET_MODE_NUNITS (mode).to_constant (); ++ int lane = INTVAL (operands[4]); ++ operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 4, lane), SImode); ++ return "dot\\t%0., %2., %3.4b[%4]"; ++ } ++ [(set_attr "type" "neon_dot")] ++) ++ + (define_expand "copysign3" + [(match_operand:VHSDF 0 "register_operand") + (match_operand:VHSDF 1 "register_operand") +@@ -666,8 +700,8 @@ + [(set_attr "type" "neon_fp_rsqrts_")]) + + (define_expand "rsqrt2" +- [(set (match_operand:VALLF 0 "register_operand" "=w") +- (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")] ++ [(set (match_operand:VALLF 0 "register_operand") ++ (unspec:VALLF [(match_operand:VALLF 1 "register_operand")] + UNSPEC_RSQRT))] + "TARGET_SIMD" + { +@@ -724,15 +758,15 @@ + ;; So (ABS:QI (minus:QI 64 -128)) == (ABS:QI (192 or -64 signed)) == 64. + ;; Whereas SABD would return 192 (-64 signed) on the above example. + ;; Use MINUS ([us]max (op1, op2), [us]min (op1, op2)) instead. +-(define_insn "*aarch64_abd_3" ++(define_insn "aarch64_abd_3" + [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") + (minus:VDQ_BHSI + (USMAX:VDQ_BHSI + (match_operand:VDQ_BHSI 1 "register_operand" "w") + (match_operand:VDQ_BHSI 2 "register_operand" "w")) +- (match_operator 3 "aarch64_" +- [(match_dup 1) +- (match_dup 2)])))] ++ (:VDQ_BHSI ++ (match_dup 1) ++ (match_dup 2))))] + "TARGET_SIMD" + "abd\t%0., %1., %2." + [(set_attr "type" "neon_abd")] +@@ -778,7 +812,16 @@ + ;; UABAL tmp.8h, op1.16b, op2.16b + ;; UADALP op3.4s, tmp.8h + ;; MOV op0, op3 // should be eliminated in later passes. +-;; The signed version just uses the signed variants of the above instructions. ++;; ++;; For TARGET_DOTPROD we do: ++;; MOV tmp1.16b, #1 // Can be CSE'd and hoisted out of loops. ++;; UABD tmp2.16b, op1.16b, op2.16b ++;; UDOT op3.4s, tmp2.16b, tmp1.16b ++;; MOV op0, op3 // RA will tie the operands of UDOT appropriately. ++;; ++;; The signed version just uses the signed variants of the above instructions ++;; but for TARGET_DOTPROD still emits a UDOT as the absolute difference is ++;; unsigned. + + (define_expand "sadv16qi" + [(use (match_operand:V4SI 0 "register_operand")) +@@ -787,6 +830,15 @@ + (use (match_operand:V4SI 3 "register_operand"))] + "TARGET_SIMD" + { ++ if (TARGET_DOTPROD) ++ { ++ rtx ones = force_reg (V16QImode, CONST1_RTX (V16QImode)); ++ rtx abd = gen_reg_rtx (V16QImode); ++ emit_insn (gen_aarch64_abdv16qi_3 (abd, operands[1], operands[2])); ++ emit_insn (gen_aarch64_udotv16qi (operands[0], operands[3], ++ abd, ones)); ++ DONE; ++ } + rtx reduc = gen_reg_rtx (V8HImode); + emit_insn (gen_aarch64_abdl2v16qi_3 (reduc, operands[1], + operands[2])); +@@ -949,6 +1001,21 @@ + [(set_attr "type" "neon_ins")] + ) + ++(define_expand "signbit2" ++ [(use (match_operand: 0 "register_operand")) ++ (use (match_operand:VDQSF 1 "register_operand"))] ++ "TARGET_SIMD" ++{ ++ int shift_amount = GET_MODE_UNIT_BITSIZE (mode) - 1; ++ rtx shift_vector = aarch64_simd_gen_const_vector_dup (mode, ++ shift_amount); ++ operands[1] = lowpart_subreg (mode, operands[1], mode); ++ ++ emit_insn (gen_aarch64_simd_lshr (operands[0], operands[1], ++ shift_vector)); ++ DONE; ++}) ++ + (define_insn "aarch64_simd_lshr" + [(set (match_operand:VDQ_I 0 "register_operand" "=w") + (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") +@@ -967,6 +1034,18 @@ + [(set_attr "type" "neon_shift_imm")] + ) + ++(define_insn "*aarch64_simd_sra" ++ [(set (match_operand:VDQ_I 0 "register_operand" "=w") ++ (plus:VDQ_I ++ (SHIFTRT:VDQ_I ++ (match_operand:VDQ_I 1 "register_operand" "w") ++ (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")) ++ (match_operand:VDQ_I 3 "register_operand" "0")))] ++ "TARGET_SIMD" ++ "sra\t%0., %1., %2" ++ [(set_attr "type" "neon_shift_acc")] ++) ++ + (define_insn "aarch64_simd_imm_shl" + [(set (match_operand:VDQ_I 0 "register_operand" "=w") + (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") +@@ -1006,9 +1085,9 @@ + ) + + (define_expand "ashl3" +- [(match_operand:VDQ_I 0 "register_operand" "") +- (match_operand:VDQ_I 1 "register_operand" "") +- (match_operand:SI 2 "general_operand" "")] ++ [(match_operand:VDQ_I 0 "register_operand") ++ (match_operand:VDQ_I 1 "register_operand") ++ (match_operand:SI 2 "general_operand")] + "TARGET_SIMD" + { + int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT; +@@ -1053,9 +1132,9 @@ + ) + + (define_expand "lshr3" +- [(match_operand:VDQ_I 0 "register_operand" "") +- (match_operand:VDQ_I 1 "register_operand" "") +- (match_operand:SI 2 "general_operand" "")] ++ [(match_operand:VDQ_I 0 "register_operand") ++ (match_operand:VDQ_I 1 "register_operand") ++ (match_operand:SI 2 "general_operand")] + "TARGET_SIMD" + { + int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT; +@@ -1100,9 +1179,9 @@ + ) + + (define_expand "ashr3" +- [(match_operand:VDQ_I 0 "register_operand" "") +- (match_operand:VDQ_I 1 "register_operand" "") +- (match_operand:SI 2 "general_operand" "")] ++ [(match_operand:VDQ_I 0 "register_operand") ++ (match_operand:VDQ_I 1 "register_operand") ++ (match_operand:SI 2 "general_operand")] + "TARGET_SIMD" + { + int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT; +@@ -1147,9 +1226,9 @@ + ) + + (define_expand "vashl3" +- [(match_operand:VDQ_I 0 "register_operand" "") +- (match_operand:VDQ_I 1 "register_operand" "") +- (match_operand:VDQ_I 2 "register_operand" "")] ++ [(match_operand:VDQ_I 0 "register_operand") ++ (match_operand:VDQ_I 1 "register_operand") ++ (match_operand:VDQ_I 2 "register_operand")] + "TARGET_SIMD" + { + emit_insn (gen_aarch64_simd_reg_sshl (operands[0], operands[1], +@@ -1161,9 +1240,9 @@ + ;; Negating individual lanes most certainly offsets the + ;; gain from vectorization. + (define_expand "vashr3" +- [(match_operand:VDQ_BHSI 0 "register_operand" "") +- (match_operand:VDQ_BHSI 1 "register_operand" "") +- (match_operand:VDQ_BHSI 2 "register_operand" "")] ++ [(match_operand:VDQ_BHSI 0 "register_operand") ++ (match_operand:VDQ_BHSI 1 "register_operand") ++ (match_operand:VDQ_BHSI 2 "register_operand")] + "TARGET_SIMD" + { + rtx neg = gen_reg_rtx (mode); +@@ -1175,9 +1254,9 @@ + + ;; DI vector shift + (define_expand "aarch64_ashr_simddi" +- [(match_operand:DI 0 "register_operand" "=w") +- (match_operand:DI 1 "register_operand" "w") +- (match_operand:SI 2 "aarch64_shift_imm64_di" "")] ++ [(match_operand:DI 0 "register_operand") ++ (match_operand:DI 1 "register_operand") ++ (match_operand:SI 2 "aarch64_shift_imm64_di")] + "TARGET_SIMD" + { + /* An arithmetic shift right by 64 fills the result with copies of the sign +@@ -1191,9 +1270,9 @@ + ) + + (define_expand "vlshr3" +- [(match_operand:VDQ_BHSI 0 "register_operand" "") +- (match_operand:VDQ_BHSI 1 "register_operand" "") +- (match_operand:VDQ_BHSI 2 "register_operand" "")] ++ [(match_operand:VDQ_BHSI 0 "register_operand") ++ (match_operand:VDQ_BHSI 1 "register_operand") ++ (match_operand:VDQ_BHSI 2 "register_operand")] + "TARGET_SIMD" + { + rtx neg = gen_reg_rtx (mode); +@@ -1204,9 +1283,9 @@ + }) + + (define_expand "aarch64_lshr_simddi" +- [(match_operand:DI 0 "register_operand" "=w") +- (match_operand:DI 1 "register_operand" "w") +- (match_operand:SI 2 "aarch64_shift_imm64_di" "")] ++ [(match_operand:DI 0 "register_operand") ++ (match_operand:DI 1 "register_operand") ++ (match_operand:SI 2 "aarch64_shift_imm64_di")] + "TARGET_SIMD" + { + if (INTVAL (operands[2]) == 64) +@@ -1234,9 +1313,9 @@ + ) + + (define_expand "vec_set" +- [(match_operand:VALL_F16 0 "register_operand" "+w") +- (match_operand: 1 "register_operand" "w") +- (match_operand:SI 2 "immediate_operand" "")] ++ [(match_operand:VALL_F16 0 "register_operand") ++ (match_operand: 1 "register_operand") ++ (match_operand:SI 2 "immediate_operand")] + "TARGET_SIMD" + { + HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]); +@@ -1375,9 +1454,9 @@ + ) + + (define_expand "v2di3" +- [(set (match_operand:V2DI 0 "register_operand" "") +- (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "") +- (match_operand:V2DI 2 "register_operand" "")))] ++ [(set (match_operand:V2DI 0 "register_operand") ++ (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand") ++ (match_operand:V2DI 2 "register_operand")))] + "TARGET_SIMD" + { + enum rtx_code cmp_operator; +@@ -1440,8 +1519,8 @@ + ;; On big-endian this is { zeroes, operand } + + (define_insn "move_lo_quad_internal_" +- [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w") +- (vec_concat:VQ_NO2E ++ [(set (match_operand:VQMOV_NO2E 0 "register_operand" "=w,w,w") ++ (vec_concat:VQMOV_NO2E + (match_operand: 1 "register_operand" "w,r,r") + (vec_duplicate: (const_int 0))))] + "TARGET_SIMD && !BYTES_BIG_ENDIAN" +@@ -1470,8 +1549,8 @@ + ) + + (define_insn "move_lo_quad_internal_be_" +- [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w") +- (vec_concat:VQ_NO2E ++ [(set (match_operand:VQMOV_NO2E 0 "register_operand" "=w,w,w") ++ (vec_concat:VQMOV_NO2E + (vec_duplicate: (const_int 0)) + (match_operand: 1 "register_operand" "w,r,r")))] + "TARGET_SIMD && BYTES_BIG_ENDIAN" +@@ -1500,8 +1579,8 @@ + ) + + (define_expand "move_lo_quad_" +- [(match_operand:VQ 0 "register_operand") +- (match_operand:VQ 1 "register_operand")] ++ [(match_operand:VQMOV 0 "register_operand") ++ (match_operand:VQMOV 1 "register_operand")] + "TARGET_SIMD" + { + if (BYTES_BIG_ENDIAN) +@@ -1518,11 +1597,11 @@ + ;; For big-endian this is { operand1, operand2 } + + (define_insn "aarch64_simd_move_hi_quad_" +- [(set (match_operand:VQ 0 "register_operand" "+w,w") +- (vec_concat:VQ ++ [(set (match_operand:VQMOV 0 "register_operand" "+w,w") ++ (vec_concat:VQMOV + (vec_select: + (match_dup 0) +- (match_operand:VQ 2 "vect_par_cnst_lo_half" "")) ++ (match_operand:VQMOV 2 "vect_par_cnst_lo_half" "")) + (match_operand: 1 "register_operand" "w,r")))] + "TARGET_SIMD && !BYTES_BIG_ENDIAN" + "@ +@@ -1532,12 +1611,12 @@ + ) + + (define_insn "aarch64_simd_move_hi_quad_be_" +- [(set (match_operand:VQ 0 "register_operand" "+w,w") +- (vec_concat:VQ ++ [(set (match_operand:VQMOV 0 "register_operand" "+w,w") ++ (vec_concat:VQMOV + (match_operand: 1 "register_operand" "w,r") + (vec_select: + (match_dup 0) +- (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))] ++ (match_operand:VQMOV 2 "vect_par_cnst_lo_half" ""))))] + "TARGET_SIMD && BYTES_BIG_ENDIAN" + "@ + ins\\t%0.d[1], %1.d[0] +@@ -1546,8 +1625,8 @@ + ) + + (define_expand "move_hi_quad_" +- [(match_operand:VQ 0 "register_operand" "") +- (match_operand: 1 "register_operand" "")] ++ [(match_operand:VQMOV 0 "register_operand") ++ (match_operand: 1 "register_operand")] + "TARGET_SIMD" + { + rtx p = aarch64_simd_vect_par_cnst_half (mode, , false); +@@ -1571,10 +1650,122 @@ + [(set_attr "type" "neon_shift_imm_narrow_q")] + ) + ++(define_insn "aarch64_bfdot" ++ [(set (match_operand:VDQSF 0 "register_operand" "=w") ++ (plus:VDQSF ++ (unspec:VDQSF ++ [(match_operand: 2 "register_operand" "w") ++ (match_operand: 3 "register_operand" "w")] ++ UNSPEC_BFDOT) ++ (match_operand:VDQSF 1 "register_operand" "0")))] ++ "TARGET_BF16_SIMD" ++ "bfdot\t%0., %2., %3." ++ [(set_attr "type" "neon_dot")] ++) ++ ++(define_insn "aarch64_bfdot_lane" ++ [(set (match_operand:VDQSF 0 "register_operand" "=w") ++ (plus:VDQSF ++ (unspec:VDQSF ++ [(match_operand: 2 "register_operand" "w") ++ (match_operand:VBF 3 "register_operand" "w") ++ (match_operand:SI 4 "const_int_operand" "n")] ++ UNSPEC_BFDOT) ++ (match_operand:VDQSF 1 "register_operand" "0")))] ++ "TARGET_BF16_SIMD" ++{ ++ int nunits = GET_MODE_NUNITS (mode).to_constant (); ++ int lane = INTVAL (operands[4]); ++ operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 2, lane), SImode); ++ return "bfdot\t%0., %2., %3.2h[%4]"; ++} ++ [(set_attr "type" "neon_dot")] ++) ++ ++;; bfmmla ++(define_insn "aarch64_bfmmlaqv4sf" ++ [(set (match_operand:V4SF 0 "register_operand" "=w") ++ (plus:V4SF (match_operand:V4SF 1 "register_operand" "0") ++ (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w") ++ (match_operand:V8BF 3 "register_operand" "w")] ++ UNSPEC_BFMMLA)))] ++ "TARGET_BF16_SIMD" ++ "bfmmla\\t%0.4s, %2.8h, %3.8h" ++ [(set_attr "type" "neon_fp_mla_s_q")] ++) ++ ++;; bfmlal ++(define_insn "aarch64_bfmlalv4sf" ++ [(set (match_operand:V4SF 0 "register_operand" "=w") ++ (plus: V4SF (match_operand:V4SF 1 "register_operand" "0") ++ (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w") ++ (match_operand:V8BF 3 "register_operand" "w")] ++ BF_MLA)))] ++ "TARGET_BF16_SIMD" ++ "bfmlal\\t%0.4s, %2.8h, %3.8h" ++ [(set_attr "type" "neon_fp_mla_s_q")] ++) ++ ++(define_insn "aarch64_bfmlal_lanev4sf" ++ [(set (match_operand:V4SF 0 "register_operand" "=w") ++ (plus: V4SF (match_operand:V4SF 1 "register_operand" "0") ++ (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w") ++ (match_operand:VBF 3 "register_operand" "w") ++ (match_operand:SI 4 "const_int_operand" "n")] ++ BF_MLA)))] ++ "TARGET_BF16_SIMD" ++{ ++ operands[4] = aarch64_endian_lane_rtx (mode, INTVAL (operands[4])); ++ return "bfmlal\\t%0.4s, %2.8h, %3.h[%4]"; ++} ++ [(set_attr "type" "neon_fp_mla_s_scalar_q")] ++) ++ ++;; 8-bit integer matrix multiply-accumulate ++(define_insn "aarch64_simd_mmlav16qi" ++ [(set (match_operand:V4SI 0 "register_operand" "=w") ++ (plus:V4SI ++ (unspec:V4SI [(match_operand:V16QI 2 "register_operand" "w") ++ (match_operand:V16QI 3 "register_operand" "w")] MATMUL) ++ (match_operand:V4SI 1 "register_operand" "0")))] ++ "TARGET_I8MM" ++ "mmla\\t%0.4s, %2.16b, %3.16b" ++ [(set_attr "type" "neon_mla_s_q")] ++) ++ ++;; bfcvtn ++(define_insn "aarch64_bfcvtn" ++ [(set (match_operand:V4SF_TO_BF 0 "register_operand" "=w") ++ (unspec:V4SF_TO_BF [(match_operand:V4SF 1 "register_operand" "w")] ++ UNSPEC_BFCVTN))] ++ "TARGET_BF16_SIMD" ++ "bfcvtn\\t%0.4h, %1.4s" ++ [(set_attr "type" "neon_fp_cvt_narrow_s_q")] ++) ++ ++(define_insn "aarch64_bfcvtn2v8bf" ++ [(set (match_operand:V8BF 0 "register_operand" "=w") ++ (unspec:V8BF [(match_operand:V8BF 1 "register_operand" "0") ++ (match_operand:V4SF 2 "register_operand" "w")] ++ UNSPEC_BFCVTN2))] ++ "TARGET_BF16_SIMD" ++ "bfcvtn2\\t%0.8h, %2.4s" ++ [(set_attr "type" "neon_fp_cvt_narrow_s_q")] ++) ++ ++(define_insn "aarch64_bfcvtbf" ++ [(set (match_operand:BF 0 "register_operand" "=w") ++ (unspec:BF [(match_operand:SF 1 "register_operand" "w")] ++ UNSPEC_BFCVT))] ++ "TARGET_BF16_FP" ++ "bfcvt\\t%h0, %s1" ++ [(set_attr "type" "f_cvt")] ++) ++ + (define_expand "vec_pack_trunc_" +- [(match_operand: 0 "register_operand" "") +- (match_operand:VDN 1 "register_operand" "") +- (match_operand:VDN 2 "register_operand" "")] ++ [(match_operand: 0 "register_operand") ++ (match_operand:VDN 1 "register_operand") ++ (match_operand:VDN 2 "register_operand")] + "TARGET_SIMD" + { + rtx tempreg = gen_reg_rtx (mode); +@@ -1630,7 +1821,7 @@ + ) + + (define_expand "vec_unpack_hi_" +- [(match_operand: 0 "register_operand" "") ++ [(match_operand: 0 "register_operand") + (ANY_EXTEND: (match_operand:VQW 1 "register_operand"))] + "TARGET_SIMD" + { +@@ -1642,8 +1833,8 @@ + ) + + (define_expand "vec_unpack_lo_" +- [(match_operand: 0 "register_operand" "") +- (ANY_EXTEND: (match_operand:VQW 1 "register_operand" ""))] ++ [(match_operand: 0 "register_operand") ++ (ANY_EXTEND: (match_operand:VQW 1 "register_operand"))] + "TARGET_SIMD" + { + rtx p = aarch64_simd_vect_par_cnst_half (mode, , false); +@@ -1761,9 +1952,9 @@ + ) + + (define_expand "vec_widen_mult_lo_" +- [(match_operand: 0 "register_operand" "") +- (ANY_EXTEND: (match_operand:VQW 1 "register_operand" "")) +- (ANY_EXTEND: (match_operand:VQW 2 "register_operand" ""))] ++ [(match_operand: 0 "register_operand") ++ (ANY_EXTEND: (match_operand:VQW 1 "register_operand")) ++ (ANY_EXTEND: (match_operand:VQW 2 "register_operand"))] + "TARGET_SIMD" + { + rtx p = aarch64_simd_vect_par_cnst_half (mode, , false); +@@ -1788,9 +1979,9 @@ + ) + + (define_expand "vec_widen_mult_hi_" +- [(match_operand: 0 "register_operand" "") +- (ANY_EXTEND: (match_operand:VQW 1 "register_operand" "")) +- (ANY_EXTEND: (match_operand:VQW 2 "register_operand" ""))] ++ [(match_operand: 0 "register_operand") ++ (ANY_EXTEND: (match_operand:VQW 1 "register_operand")) ++ (ANY_EXTEND: (match_operand:VQW 2 "register_operand"))] + "TARGET_SIMD" + { + rtx p = aarch64_simd_vect_par_cnst_half (mode, , true); +@@ -1855,9 +2046,9 @@ + ) + + (define_expand "div3" +- [(set (match_operand:VHSDF 0 "register_operand" "=w") +- (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w") +- (match_operand:VHSDF 2 "register_operand" "w")))] ++ [(set (match_operand:VHSDF 0 "register_operand") ++ (div:VHSDF (match_operand:VHSDF 1 "register_operand") ++ (match_operand:VHSDF 2 "register_operand")))] + "TARGET_SIMD" + { + if (aarch64_emit_approx_div (operands[0], operands[1], operands[2])) +@@ -2192,8 +2383,8 @@ + ;; other big-endian patterns their behavior is as required. + + (define_expand "vec_unpacks_lo_" +- [(match_operand: 0 "register_operand" "") +- (match_operand:VQ_HSF 1 "register_operand" "")] ++ [(match_operand: 0 "register_operand") ++ (match_operand:VQ_HSF 1 "register_operand")] + "TARGET_SIMD" + { + rtx p = aarch64_simd_vect_par_cnst_half (mode, , false); +@@ -2215,8 +2406,8 @@ + ) + + (define_expand "vec_unpacks_hi_" +- [(match_operand: 0 "register_operand" "") +- (match_operand:VQ_HSF 1 "register_operand" "")] ++ [(match_operand: 0 "register_operand") ++ (match_operand:VQ_HSF 1 "register_operand")] + "TARGET_SIMD" + { + rtx p = aarch64_simd_vect_par_cnst_half (mode, , true); +@@ -2268,9 +2459,9 @@ + ) + + (define_expand "aarch64_float_truncate_hi_" +- [(match_operand: 0 "register_operand" "=w") +- (match_operand:VDF 1 "register_operand" "0") +- (match_operand: 2 "register_operand" "w")] ++ [(match_operand: 0 "register_operand") ++ (match_operand:VDF 1 "register_operand") ++ (match_operand: 2 "register_operand")] + "TARGET_SIMD" + { + rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN +@@ -2363,8 +2554,8 @@ + ;; 'across lanes' add. + + (define_expand "reduc_plus_scal_" +- [(match_operand: 0 "register_operand" "=w") +- (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")] ++ [(match_operand: 0 "register_operand") ++ (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand")] + UNSPEC_ADDV)] + "TARGET_SIMD" + { +@@ -3116,30 +3307,31 @@ + (define_insn "*aarch64_get_lane_extend" + [(set (match_operand:GPI 0 "register_operand" "=r") + (sign_extend:GPI +- (vec_select: ++ (vec_select: + (match_operand:VDQQH 1 "register_operand" "w") + (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] + "TARGET_SIMD" + { +- operands[2] = aarch64_endian_lane_rtx (mode, INTVAL (operands[2])); ++ operands[2] = aarch64_endian_lane_rtx (mode, ++ INTVAL (operands[2])); + return "smov\\t%0, %1.[%2]"; + } +- [(set_attr "type" "neon_to_gp")] +-) +- +-(define_insn "*aarch64_get_lane_zero_extend" +- [(set (match_operand:GPI 0 "register_operand" "=r") +- (zero_extend:GPI +- (vec_select: +- (match_operand:VDQQH 1 "register_operand" "w") +- (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] +- "TARGET_SIMD" +- { +- operands[2] = aarch64_endian_lane_rtx (mode, +- INTVAL (operands[2])); +- return "umov\\t%w0, %1.[%2]"; +- } +- [(set_attr "type" "neon_to_gp")] ++ [(set_attr "type" "neon_to_gp")] ++) ++ ++(define_insn "*aarch64_get_lane_zero_extend" ++ [(set (match_operand:GPI 0 "register_operand" "=r") ++ (zero_extend:GPI ++ (vec_select: ++ (match_operand:VDQQH 1 "register_operand" "w") ++ (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] ++ "TARGET_SIMD" ++ { ++ operands[2] = aarch64_endian_lane_rtx (mode, ++ INTVAL (operands[2])); ++ return "umov\\t%w0, %1.[%2]"; ++ } ++ [(set_attr "type" "neon_to_gp")] + ) + + ;; Lane extraction of a value, neither sign nor zero extension +@@ -3280,9 +3472,9 @@ + + + (define_expand "aarch64_saddl2" +- [(match_operand: 0 "register_operand" "=w") +- (match_operand:VQW 1 "register_operand" "w") +- (match_operand:VQW 2 "register_operand" "w")] ++ [(match_operand: 0 "register_operand") ++ (match_operand:VQW 1 "register_operand") ++ (match_operand:VQW 2 "register_operand")] + "TARGET_SIMD" + { + rtx p = aarch64_simd_vect_par_cnst_half (mode, , true); +@@ -3292,9 +3484,9 @@ + }) + + (define_expand "aarch64_uaddl2" +- [(match_operand: 0 "register_operand" "=w") +- (match_operand:VQW 1 "register_operand" "w") +- (match_operand:VQW 2 "register_operand" "w")] ++ [(match_operand: 0 "register_operand") ++ (match_operand:VQW 1 "register_operand") ++ (match_operand:VQW 2 "register_operand")] + "TARGET_SIMD" + { + rtx p = aarch64_simd_vect_par_cnst_half (mode, , true); +@@ -3304,9 +3496,9 @@ + }) + + (define_expand "aarch64_ssubl2" +- [(match_operand: 0 "register_operand" "=w") +- (match_operand:VQW 1 "register_operand" "w") +- (match_operand:VQW 2 "register_operand" "w")] ++ [(match_operand: 0 "register_operand") ++ (match_operand:VQW 1 "register_operand") ++ (match_operand:VQW 2 "register_operand")] + "TARGET_SIMD" + { + rtx p = aarch64_simd_vect_par_cnst_half (mode, , true); +@@ -3316,9 +3508,9 @@ + }) + + (define_expand "aarch64_usubl2" +- [(match_operand: 0 "register_operand" "=w") +- (match_operand:VQW 1 "register_operand" "w") +- (match_operand:VQW 2 "register_operand" "w")] ++ [(match_operand: 0 "register_operand") ++ (match_operand:VQW 1 "register_operand") ++ (match_operand:VQW 2 "register_operand")] + "TARGET_SIMD" + { + rtx p = aarch64_simd_vect_par_cnst_half (mode, , true); +@@ -3341,10 +3533,10 @@ + ;; w. + + (define_expand "widen_ssum3" +- [(set (match_operand: 0 "register_operand" "") ++ [(set (match_operand: 0 "register_operand") + (plus: (sign_extend: +- (match_operand:VQW 1 "register_operand" "")) +- (match_operand: 2 "register_operand" "")))] ++ (match_operand:VQW 1 "register_operand")) ++ (match_operand: 2 "register_operand")))] + "TARGET_SIMD" + { + rtx p = aarch64_simd_vect_par_cnst_half (mode, , false); +@@ -3358,10 +3550,10 @@ + ) + + (define_expand "widen_ssum3" +- [(set (match_operand: 0 "register_operand" "") ++ [(set (match_operand: 0 "register_operand") + (plus: (sign_extend: +- (match_operand:VD_BHSI 1 "register_operand" "")) +- (match_operand: 2 "register_operand" "")))] ++ (match_operand:VD_BHSI 1 "register_operand")) ++ (match_operand: 2 "register_operand")))] + "TARGET_SIMD" + { + emit_insn (gen_aarch64_saddw (operands[0], operands[2], operands[1])); +@@ -3369,10 +3561,10 @@ + }) + + (define_expand "widen_usum3" +- [(set (match_operand: 0 "register_operand" "") ++ [(set (match_operand: 0 "register_operand") + (plus: (zero_extend: +- (match_operand:VQW 1 "register_operand" "")) +- (match_operand: 2 "register_operand" "")))] ++ (match_operand:VQW 1 "register_operand")) ++ (match_operand: 2 "register_operand")))] + "TARGET_SIMD" + { + rtx p = aarch64_simd_vect_par_cnst_half (mode, , false); +@@ -3386,10 +3578,10 @@ + ) + + (define_expand "widen_usum3" +- [(set (match_operand: 0 "register_operand" "") ++ [(set (match_operand: 0 "register_operand") + (plus: (zero_extend: +- (match_operand:VD_BHSI 1 "register_operand" "")) +- (match_operand: 2 "register_operand" "")))] ++ (match_operand:VD_BHSI 1 "register_operand")) ++ (match_operand: 2 "register_operand")))] + "TARGET_SIMD" + { + emit_insn (gen_aarch64_uaddw (operands[0], operands[2], operands[1])); +@@ -3467,9 +3659,9 @@ + ) + + (define_expand "aarch64_saddw2" +- [(match_operand: 0 "register_operand" "=w") +- (match_operand: 1 "register_operand" "w") +- (match_operand:VQW 2 "register_operand" "w")] ++ [(match_operand: 0 "register_operand") ++ (match_operand: 1 "register_operand") ++ (match_operand:VQW 2 "register_operand")] + "TARGET_SIMD" + { + rtx p = aarch64_simd_vect_par_cnst_half (mode, , true); +@@ -3479,9 +3671,9 @@ + }) + + (define_expand "aarch64_uaddw2" +- [(match_operand: 0 "register_operand" "=w") +- (match_operand: 1 "register_operand" "w") +- (match_operand:VQW 2 "register_operand" "w")] ++ [(match_operand: 0 "register_operand") ++ (match_operand: 1 "register_operand") ++ (match_operand:VQW 2 "register_operand")] + "TARGET_SIMD" + { + rtx p = aarch64_simd_vect_par_cnst_half (mode, , true); +@@ -3492,9 +3684,9 @@ + + + (define_expand "aarch64_ssubw2" +- [(match_operand: 0 "register_operand" "=w") +- (match_operand: 1 "register_operand" "w") +- (match_operand:VQW 2 "register_operand" "w")] ++ [(match_operand: 0 "register_operand") ++ (match_operand: 1 "register_operand") ++ (match_operand:VQW 2 "register_operand")] + "TARGET_SIMD" + { + rtx p = aarch64_simd_vect_par_cnst_half (mode, , true); +@@ -3504,9 +3696,9 @@ + }) + + (define_expand "aarch64_usubw2" +- [(match_operand: 0 "register_operand" "=w") +- (match_operand: 1 "register_operand" "w") +- (match_operand:VQW 2 "register_operand" "w")] ++ [(match_operand: 0 "register_operand") ++ (match_operand: 1 "register_operand") ++ (match_operand:VQW 2 "register_operand")] + "TARGET_SIMD" + { + rtx p = aarch64_simd_vect_par_cnst_half (mode, , true); +@@ -4039,10 +4231,10 @@ + ) + + (define_expand "aarch64_sqdmlal2" +- [(match_operand: 0 "register_operand" "=w") +- (match_operand: 1 "register_operand" "w") +- (match_operand:VQ_HSI 2 "register_operand" "w") +- (match_operand:VQ_HSI 3 "register_operand" "w")] ++ [(match_operand: 0 "register_operand") ++ (match_operand: 1 "register_operand") ++ (match_operand:VQ_HSI 2 "register_operand") ++ (match_operand:VQ_HSI 3 "register_operand")] + "TARGET_SIMD" + { + rtx p = aarch64_simd_vect_par_cnst_half (mode, , true); +@@ -4052,10 +4244,10 @@ + }) + + (define_expand "aarch64_sqdmlsl2" +- [(match_operand: 0 "register_operand" "=w") +- (match_operand: 1 "register_operand" "w") +- (match_operand:VQ_HSI 2 "register_operand" "w") +- (match_operand:VQ_HSI 3 "register_operand" "w")] ++ [(match_operand: 0 "register_operand") ++ (match_operand: 1 "register_operand") ++ (match_operand:VQ_HSI 2 "register_operand") ++ (match_operand:VQ_HSI 3 "register_operand")] + "TARGET_SIMD" + { + rtx p = aarch64_simd_vect_par_cnst_half (mode, , true); +@@ -4119,11 +4311,11 @@ + ) + + (define_expand "aarch64_sqdmlal2_lane" +- [(match_operand: 0 "register_operand" "=w") +- (match_operand: 1 "register_operand" "w") +- (match_operand:VQ_HSI 2 "register_operand" "w") +- (match_operand: 3 "register_operand" "") +- (match_operand:SI 4 "immediate_operand" "i")] ++ [(match_operand: 0 "register_operand") ++ (match_operand: 1 "register_operand") ++ (match_operand:VQ_HSI 2 "register_operand") ++ (match_operand: 3 "register_operand") ++ (match_operand:SI 4 "immediate_operand")] + "TARGET_SIMD" + { + rtx p = aarch64_simd_vect_par_cnst_half (mode, , true); +@@ -4134,11 +4326,11 @@ + }) + + (define_expand "aarch64_sqdmlal2_laneq" +- [(match_operand: 0 "register_operand" "=w") +- (match_operand: 1 "register_operand" "w") +- (match_operand:VQ_HSI 2 "register_operand" "w") +- (match_operand: 3 "register_operand" "") +- (match_operand:SI 4 "immediate_operand" "i")] ++ [(match_operand: 0 "register_operand") ++ (match_operand: 1 "register_operand") ++ (match_operand:VQ_HSI 2 "register_operand") ++ (match_operand: 3 "register_operand") ++ (match_operand:SI 4 "immediate_operand")] + "TARGET_SIMD" + { + rtx p = aarch64_simd_vect_par_cnst_half (mode, , true); +@@ -4149,11 +4341,11 @@ + }) + + (define_expand "aarch64_sqdmlsl2_lane" +- [(match_operand: 0 "register_operand" "=w") +- (match_operand: 1 "register_operand" "w") +- (match_operand:VQ_HSI 2 "register_operand" "w") +- (match_operand: 3 "register_operand" "") +- (match_operand:SI 4 "immediate_operand" "i")] ++ [(match_operand: 0 "register_operand") ++ (match_operand: 1 "register_operand") ++ (match_operand:VQ_HSI 2 "register_operand") ++ (match_operand: 3 "register_operand") ++ (match_operand:SI 4 "immediate_operand")] + "TARGET_SIMD" + { + rtx p = aarch64_simd_vect_par_cnst_half (mode, , true); +@@ -4164,11 +4356,11 @@ + }) + + (define_expand "aarch64_sqdmlsl2_laneq" +- [(match_operand: 0 "register_operand" "=w") +- (match_operand: 1 "register_operand" "w") +- (match_operand:VQ_HSI 2 "register_operand" "w") +- (match_operand: 3 "register_operand" "") +- (match_operand:SI 4 "immediate_operand" "i")] ++ [(match_operand: 0 "register_operand") ++ (match_operand: 1 "register_operand") ++ (match_operand:VQ_HSI 2 "register_operand") ++ (match_operand: 3 "register_operand") ++ (match_operand:SI 4 "immediate_operand")] + "TARGET_SIMD" + { + rtx p = aarch64_simd_vect_par_cnst_half (mode, , true); +@@ -4198,10 +4390,10 @@ + ) + + (define_expand "aarch64_sqdmlal2_n" +- [(match_operand: 0 "register_operand" "=w") +- (match_operand: 1 "register_operand" "w") +- (match_operand:VQ_HSI 2 "register_operand" "w") +- (match_operand: 3 "register_operand" "w")] ++ [(match_operand: 0 "register_operand") ++ (match_operand: 1 "register_operand") ++ (match_operand:VQ_HSI 2 "register_operand") ++ (match_operand: 3 "register_operand")] + "TARGET_SIMD" + { + rtx p = aarch64_simd_vect_par_cnst_half (mode, , true); +@@ -4212,10 +4404,10 @@ + }) + + (define_expand "aarch64_sqdmlsl2_n" +- [(match_operand: 0 "register_operand" "=w") +- (match_operand: 1 "register_operand" "w") +- (match_operand:VQ_HSI 2 "register_operand" "w") +- (match_operand: 3 "register_operand" "w")] ++ [(match_operand: 0 "register_operand") ++ (match_operand: 1 "register_operand") ++ (match_operand:VQ_HSI 2 "register_operand") ++ (match_operand: 3 "register_operand")] + "TARGET_SIMD" + { + rtx p = aarch64_simd_vect_par_cnst_half (mode, , true); +@@ -4367,9 +4559,9 @@ + ) + + (define_expand "aarch64_sqdmull2" +- [(match_operand: 0 "register_operand" "=w") +- (match_operand:VQ_HSI 1 "register_operand" "w") +- (match_operand:VQ_HSI 2 "register_operand" "w")] ++ [(match_operand: 0 "register_operand") ++ (match_operand:VQ_HSI 1 "register_operand") ++ (match_operand:VQ_HSI 2 "register_operand")] + "TARGET_SIMD" + { + rtx p = aarch64_simd_vect_par_cnst_half (mode, , true); +@@ -4427,10 +4619,10 @@ + ) + + (define_expand "aarch64_sqdmull2_lane" +- [(match_operand: 0 "register_operand" "=w") +- (match_operand:VQ_HSI 1 "register_operand" "w") +- (match_operand: 2 "register_operand" "") +- (match_operand:SI 3 "immediate_operand" "i")] ++ [(match_operand: 0 "register_operand") ++ (match_operand:VQ_HSI 1 "register_operand") ++ (match_operand: 2 "register_operand") ++ (match_operand:SI 3 "immediate_operand")] + "TARGET_SIMD" + { + rtx p = aarch64_simd_vect_par_cnst_half (mode, , true); +@@ -4441,10 +4633,10 @@ + }) + + (define_expand "aarch64_sqdmull2_laneq" +- [(match_operand: 0 "register_operand" "=w") +- (match_operand:VQ_HSI 1 "register_operand" "w") +- (match_operand: 2 "register_operand" "") +- (match_operand:SI 3 "immediate_operand" "i")] ++ [(match_operand: 0 "register_operand") ++ (match_operand:VQ_HSI 1 "register_operand") ++ (match_operand: 2 "register_operand") ++ (match_operand:SI 3 "immediate_operand")] + "TARGET_SIMD" + { + rtx p = aarch64_simd_vect_par_cnst_half (mode, , true); +@@ -4475,9 +4667,9 @@ + ) + + (define_expand "aarch64_sqdmull2_n" +- [(match_operand: 0 "register_operand" "=w") +- (match_operand:VQ_HSI 1 "register_operand" "w") +- (match_operand: 2 "register_operand" "w")] ++ [(match_operand: 0 "register_operand") ++ (match_operand:VQ_HSI 1 "register_operand") ++ (match_operand: 2 "register_operand")] + "TARGET_SIMD" + { + rtx p = aarch64_simd_vect_par_cnst_half (mode, , true); +@@ -4879,8 +5071,8 @@ + ;; sqrt + + (define_expand "sqrt2" +- [(set (match_operand:VHSDF 0 "register_operand" "=w") +- (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))] ++ [(set (match_operand:VHSDF 0 "register_operand") ++ (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand")))] + "TARGET_SIMD" + { + if (aarch64_emit_approx_sqrt (operands[0], operands[1], false)) +@@ -4933,8 +5125,8 @@ + ) + + (define_expand "vec_load_lanesoi" +- [(set (match_operand:OI 0 "register_operand" "=w") +- (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv") ++ [(set (match_operand:OI 0 "register_operand") ++ (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_LD2))] + "TARGET_SIMD" +@@ -4977,8 +5169,8 @@ + ) + + (define_expand "vec_store_lanesoi" +- [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv") +- (unspec:OI [(match_operand:OI 1 "register_operand" "w") ++ [(set (match_operand:OI 0 "aarch64_simd_struct_operand") ++ (unspec:OI [(match_operand:OI 1 "register_operand") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_ST2))] + "TARGET_SIMD" +@@ -5031,8 +5223,8 @@ + ) + + (define_expand "vec_load_lanesci" +- [(set (match_operand:CI 0 "register_operand" "=w") +- (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv") ++ [(set (match_operand:CI 0 "register_operand") ++ (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_LD3))] + "TARGET_SIMD" +@@ -5075,8 +5267,8 @@ + ) + + (define_expand "vec_store_lanesci" +- [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv") +- (unspec:CI [(match_operand:CI 1 "register_operand" "w") ++ [(set (match_operand:CI 0 "aarch64_simd_struct_operand") ++ (unspec:CI [(match_operand:CI 1 "register_operand") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_ST3))] + "TARGET_SIMD" +@@ -5129,8 +5321,8 @@ + ) + + (define_expand "vec_load_lanesxi" +- [(set (match_operand:XI 0 "register_operand" "=w") +- (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv") ++ [(set (match_operand:XI 0 "register_operand") ++ (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_LD4))] + "TARGET_SIMD" +@@ -5173,8 +5365,8 @@ + ) + + (define_expand "vec_store_lanesxi" +- [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv") +- (unspec:XI [(match_operand:XI 1 "register_operand" "w") ++ [(set (match_operand:XI 0 "aarch64_simd_struct_operand") ++ (unspec:XI [(match_operand:XI 1 "register_operand") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + UNSPEC_ST4))] + "TARGET_SIMD" +@@ -5219,8 +5411,8 @@ + ;; Reload patterns for AdvSIMD register list operands. + + (define_expand "mov" +- [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "") +- (match_operand:VSTRUCT 1 "general_operand" ""))] ++ [(set (match_operand:VSTRUCT 0 "nonimmediate_operand") ++ (match_operand:VSTRUCT 1 "general_operand"))] + "TARGET_SIMD" + { + if (can_create_pseudo_p ()) +@@ -5232,8 +5424,8 @@ + + + (define_expand "aarch64_ld1x3" +- [(match_operand:CI 0 "register_operand" "=w") +- (match_operand:DI 1 "register_operand" "r") ++ [(match_operand:CI 0 "register_operand") ++ (match_operand:DI 1 "register_operand") + (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + "TARGET_SIMD" + { +@@ -5252,9 +5444,31 @@ + [(set_attr "type" "neon_load1_3reg")] + ) + ++(define_expand "aarch64_ld1x4" ++ [(match_operand:XI 0 "register_operand" "=w") ++ (match_operand:DI 1 "register_operand" "r") ++ (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] ++ "TARGET_SIMD" ++{ ++ rtx mem = gen_rtx_MEM (XImode, operands[1]); ++ emit_insn (gen_aarch64_ld1_x4_ (operands[0], mem)); ++ DONE; ++}) ++ ++(define_insn "aarch64_ld1_x4_" ++ [(set (match_operand:XI 0 "register_operand" "=w") ++ (unspec:XI ++ [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv") ++ (unspec:VALLDIF [(const_int 4)] UNSPEC_VSTRUCTDUMMY)] ++ UNSPEC_LD1))] ++ "TARGET_SIMD" ++ "ld1\\t{%S0. - %V0.}, %1" ++ [(set_attr "type" "neon_load1_4reg")] ++) ++ + (define_expand "aarch64_st1x2" +- [(match_operand:DI 0 "register_operand" "") +- (match_operand:OI 1 "register_operand" "") ++ [(match_operand:DI 0 "register_operand") ++ (match_operand:OI 1 "register_operand") + (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + "TARGET_SIMD" + { +@@ -5274,8 +5488,8 @@ + ) + + (define_expand "aarch64_st1x3" +- [(match_operand:DI 0 "register_operand" "") +- (match_operand:CI 1 "register_operand" "") ++ [(match_operand:DI 0 "register_operand") ++ (match_operand:CI 1 "register_operand") + (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + "TARGET_SIMD" + { +@@ -5294,6 +5508,28 @@ + [(set_attr "type" "neon_store1_3reg")] + ) + ++(define_expand "aarch64_st1x4" ++ [(match_operand:DI 0 "register_operand" "") ++ (match_operand:XI 1 "register_operand" "") ++ (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] ++ "TARGET_SIMD" ++{ ++ rtx mem = gen_rtx_MEM (XImode, operands[0]); ++ emit_insn (gen_aarch64_st1_x4_ (mem, operands[1])); ++ DONE; ++}) ++ ++(define_insn "aarch64_st1_x4_" ++ [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv") ++ (unspec:XI ++ [(match_operand:XI 1 "register_operand" "w") ++ (unspec:VALLDIF [(const_int 4)] UNSPEC_VSTRUCTDUMMY)] ++ UNSPEC_ST1))] ++ "TARGET_SIMD" ++ "st1\\t{%S1. - %V1.}, %0" ++ [(set_attr "type" "neon_store1_4reg")] ++) ++ + (define_insn "*aarch64_mov" + [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w") + (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))] +@@ -5427,8 +5663,8 @@ + }) + + (define_expand "aarch64_ldr" +- [(match_operand:VSTRUCT 0 "register_operand" "=w") +- (match_operand:DI 1 "register_operand" "w") ++ [(match_operand:VSTRUCT 0 "register_operand") ++ (match_operand:DI 1 "register_operand") + (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + "TARGET_SIMD" + { +@@ -5502,8 +5738,8 @@ + ) + + (define_expand "aarch64_ld" +- [(match_operand:VSTRUCT 0 "register_operand" "=w") +- (match_operand:DI 1 "register_operand" "r") ++ [(match_operand:VSTRUCT 0 "register_operand") ++ (match_operand:DI 1 "register_operand") + (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + "TARGET_SIMD" + { +@@ -5530,8 +5766,8 @@ + }) + + (define_expand "aarch64_ld" +- [(match_operand:VSTRUCT 0 "register_operand" "=w") +- (match_operand:DI 1 "register_operand" "r") ++ [(match_operand:VSTRUCT 0 "register_operand") ++ (match_operand:DI 1 "register_operand") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + "TARGET_SIMD" + { +@@ -5543,8 +5779,8 @@ + }) + + (define_expand "aarch64_ld1x2" +- [(match_operand:OI 0 "register_operand" "=w") +- (match_operand:DI 1 "register_operand" "r") ++ [(match_operand:OI 0 "register_operand") ++ (match_operand:DI 1 "register_operand") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + "TARGET_SIMD" + { +@@ -5556,8 +5792,8 @@ + }) + + (define_expand "aarch64_ld1x2" +- [(match_operand:OI 0 "register_operand" "=w") +- (match_operand:DI 1 "register_operand" "r") ++ [(match_operand:OI 0 "register_operand") ++ (match_operand:DI 1 "register_operand") + (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + "TARGET_SIMD" + { +@@ -5570,10 +5806,10 @@ + + + (define_expand "aarch64_ld_lane" +- [(match_operand:VSTRUCT 0 "register_operand" "=w") +- (match_operand:DI 1 "register_operand" "w") +- (match_operand:VSTRUCT 2 "register_operand" "0") +- (match_operand:SI 3 "immediate_operand" "i") ++ [(match_operand:VSTRUCT 0 "register_operand") ++ (match_operand:DI 1 "register_operand") ++ (match_operand:VSTRUCT 2 "register_operand") ++ (match_operand:SI 3 "immediate_operand") + (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + "TARGET_SIMD" + { +@@ -5593,9 +5829,9 @@ + ;; D-register list. + + (define_expand "aarch64_get_dreg" +- [(match_operand:VDC 0 "register_operand" "=w") +- (match_operand:VSTRUCT 1 "register_operand" "w") +- (match_operand:SI 2 "immediate_operand" "i")] ++ [(match_operand:VDC 0 "register_operand") ++ (match_operand:VSTRUCT 1 "register_operand") ++ (match_operand:SI 2 "immediate_operand")] + "TARGET_SIMD" + { + int part = INTVAL (operands[2]); +@@ -5610,9 +5846,9 @@ + ;; Q-register list. + + (define_expand "aarch64_get_qreg" +- [(match_operand:VQ 0 "register_operand" "=w") +- (match_operand:VSTRUCT 1 "register_operand" "w") +- (match_operand:SI 2 "immediate_operand" "i")] ++ [(match_operand:VQ 0 "register_operand") ++ (match_operand:VSTRUCT 1 "register_operand") ++ (match_operand:SI 2 "immediate_operand")] + "TARGET_SIMD" + { + int part = INTVAL (operands[2]); +@@ -5749,13 +5985,13 @@ + ;; This instruction's pattern is generated directly by + ;; aarch64_expand_vec_perm_const, so any changes to the pattern would + ;; need corresponding changes there. +-(define_insn "aarch64_" ++(define_insn "aarch64_" + [(set (match_operand:VALL_F16 0 "register_operand" "=w") + (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w") + (match_operand:VALL_F16 2 "register_operand" "w")] + PERMUTE))] + "TARGET_SIMD" +- "\\t%0., %1., %2." ++ "\\t%0., %1., %2." + [(set_attr "type" "neon_permute")] + ) + +@@ -5851,8 +6087,8 @@ + ) + + (define_expand "aarch64_st" +- [(match_operand:DI 0 "register_operand" "r") +- (match_operand:VSTRUCT 1 "register_operand" "w") ++ [(match_operand:DI 0 "register_operand") ++ (match_operand:VSTRUCT 1 "register_operand") + (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + "TARGET_SIMD" + { +@@ -5864,8 +6100,8 @@ + }) + + (define_expand "aarch64_st" +- [(match_operand:DI 0 "register_operand" "r") +- (match_operand:VSTRUCT 1 "register_operand" "w") ++ [(match_operand:DI 0 "register_operand") ++ (match_operand:VSTRUCT 1 "register_operand") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] + "TARGET_SIMD" + { +@@ -5877,8 +6113,8 @@ + }) + + (define_expand "aarch64_st_lane" +- [(match_operand:DI 0 "register_operand" "r") +- (match_operand:VSTRUCT 1 "register_operand" "w") ++ [(match_operand:DI 0 "register_operand") ++ (match_operand:VSTRUCT 1 "register_operand") + (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) + (match_operand:SI 2 "immediate_operand")] + "TARGET_SIMD" +@@ -5914,10 +6150,10 @@ + ;; extend them in arm_neon.h and insert the resulting Q-regs. + + (define_expand "aarch64_set_qreg" +- [(match_operand:VSTRUCT 0 "register_operand" "+w") +- (match_operand:VSTRUCT 1 "register_operand" "0") +- (match_operand:VQ 2 "register_operand" "w") +- (match_operand:SI 3 "immediate_operand" "i")] ++ [(match_operand:VSTRUCT 0 "register_operand") ++ (match_operand:VSTRUCT 1 "register_operand") ++ (match_operand:VQ 2 "register_operand") ++ (match_operand:SI 3 "immediate_operand")] + "TARGET_SIMD" + { + int part = INTVAL (operands[3]); +@@ -5932,7 +6168,7 @@ + ;; Standard pattern name vec_init. + + (define_expand "vec_init" +- [(match_operand:VALL_F16 0 "register_operand" "") ++ [(match_operand:VALL_F16 0 "register_operand") + (match_operand 1 "" "")] + "TARGET_SIMD" + { +@@ -5941,7 +6177,7 @@ + }) + + (define_expand "vec_init" +- [(match_operand:VQ_NO2E 0 "register_operand" "") ++ [(match_operand:VQ_NO2E 0 "register_operand") + (match_operand 1 "" "")] + "TARGET_SIMD" + { +@@ -6020,9 +6256,9 @@ + ;; Standard pattern name vec_extract. + + (define_expand "vec_extract" +- [(match_operand: 0 "aarch64_simd_nonimmediate_operand" "") +- (match_operand:VALL_F16 1 "register_operand" "") +- (match_operand:SI 2 "immediate_operand" "")] ++ [(match_operand: 0 "aarch64_simd_nonimmediate_operand") ++ (match_operand:VALL_F16 1 "register_operand") ++ (match_operand:SI 2 "immediate_operand")] + "TARGET_SIMD" + { + emit_insn +@@ -6063,56 +6299,23 @@ + + (define_insn "aarch64_crypto_aesv16qi" + [(set (match_operand:V16QI 0 "register_operand" "=w") +- (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "%0") +- (match_operand:V16QI 2 "register_operand" "w")] ++ (unspec:V16QI ++ [(xor:V16QI ++ (match_operand:V16QI 1 "register_operand" "%0") ++ (match_operand:V16QI 2 "register_operand" "w"))] + CRYPTO_AES))] + "TARGET_SIMD && TARGET_AES" + "aes\\t%0.16b, %2.16b" + [(set_attr "type" "crypto_aese")] + ) + +-(define_insn "*aarch64_crypto_aesv16qi_xor_combine" +- [(set (match_operand:V16QI 0 "register_operand" "=w") +- (unspec:V16QI [(xor:V16QI +- (match_operand:V16QI 1 "register_operand" "%0") +- (match_operand:V16QI 2 "register_operand" "w")) +- (match_operand:V16QI 3 "aarch64_simd_imm_zero" "")] +- CRYPTO_AES))] +- "TARGET_SIMD && TARGET_AES" +- "aes\\t%0.16b, %2.16b" +- [(set_attr "type" "crypto_aese")] +-) +- +-(define_insn "*aarch64_crypto_aesv16qi_xor_combine" +- [(set (match_operand:V16QI 0 "register_operand" "=w") +- (unspec:V16QI [(match_operand:V16QI 3 "aarch64_simd_imm_zero" "") +- (xor:V16QI (match_operand:V16QI 1 "register_operand" "%0") +- (match_operand:V16QI 2 "register_operand" "w"))] +- CRYPTO_AES))] +- "TARGET_SIMD && TARGET_AES" +- "aes\\t%0.16b, %2.16b" +- [(set_attr "type" "crypto_aese")] +-) +- +-;; When AES/AESMC fusion is enabled we want the register allocation to +-;; look like: +-;; AESE Vn, _ +-;; AESMC Vn, Vn +-;; So prefer to tie operand 1 to operand 0 when fusing. +- + (define_insn "aarch64_crypto_aesv16qi" +- [(set (match_operand:V16QI 0 "register_operand" "=w,w") +- (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")] ++ [(set (match_operand:V16QI 0 "register_operand" "=w") ++ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")] + CRYPTO_AESMC))] + "TARGET_SIMD && TARGET_AES" + "aes\\t%0.16b, %1.16b" +- [(set_attr "type" "crypto_aesmc") +- (set_attr_alternative "enabled" +- [(if_then_else (match_test +- "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)") +- (const_string "yes" ) +- (const_string "no")) +- (const_string "yes")])] ++ [(set_attr "type" "crypto_aesmc")] + ) + + ;; When AESE/AESMC fusion is enabled we really want to keep the two together +@@ -6121,12 +6324,14 @@ + ;; Mash the two together during combine. + + (define_insn "*aarch64_crypto_aese_fused" +- [(set (match_operand:V16QI 0 "register_operand" "=&w") ++ [(set (match_operand:V16QI 0 "register_operand" "=w") + (unspec:V16QI + [(unspec:V16QI +- [(match_operand:V16QI 1 "register_operand" "0") +- (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESE) +- ] UNSPEC_AESMC))] ++ [(xor:V16QI ++ (match_operand:V16QI 1 "register_operand" "%0") ++ (match_operand:V16QI 2 "register_operand" "w"))] ++ UNSPEC_AESE)] ++ UNSPEC_AESMC))] + "TARGET_SIMD && TARGET_AES + && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)" + "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b" +@@ -6140,12 +6345,14 @@ + ;; Mash the two together during combine. + + (define_insn "*aarch64_crypto_aesd_fused" +- [(set (match_operand:V16QI 0 "register_operand" "=&w") ++ [(set (match_operand:V16QI 0 "register_operand" "=w") + (unspec:V16QI + [(unspec:V16QI +- [(match_operand:V16QI 1 "register_operand" "0") +- (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESD) +- ] UNSPEC_AESIMC))] ++ [(xor:V16QI ++ (match_operand:V16QI 1 "register_operand" "%0") ++ (match_operand:V16QI 2 "register_operand" "w"))] ++ UNSPEC_AESD)] ++ UNSPEC_AESIMC))] + "TARGET_SIMD && TARGET_AES + && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)" + "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b" +@@ -6397,11 +6604,11 @@ + ;; fp16fml + + (define_expand "aarch64_fmll_low" +- [(set (match_operand:VDQSF 0 "register_operand" "=w") ++ [(set (match_operand:VDQSF 0 "register_operand") + (unspec:VDQSF +- [(match_operand:VDQSF 1 "register_operand" "0") +- (match_operand: 2 "register_operand" "w") +- (match_operand: 3 "register_operand" "w")] ++ [(match_operand:VDQSF 1 "register_operand") ++ (match_operand: 2 "register_operand") ++ (match_operand: 3 "register_operand")] + VFMLA16_LOW))] + "TARGET_F16FML" + { +@@ -6420,11 +6627,11 @@ + }) + + (define_expand "aarch64_fmll_high" +- [(set (match_operand:VDQSF 0 "register_operand" "=w") ++ [(set (match_operand:VDQSF 0 "register_operand") + (unspec:VDQSF +- [(match_operand:VDQSF 1 "register_operand" "0") +- (match_operand: 2 "register_operand" "w") +- (match_operand: 3 "register_operand" "w")] ++ [(match_operand:VDQSF 1 "register_operand") ++ (match_operand: 2 "register_operand") ++ (match_operand: 3 "register_operand")] + VFMLA16_HIGH))] + "TARGET_F16FML" + { +@@ -6510,11 +6717,11 @@ + ) + + (define_expand "aarch64_fmll_lane_lowv2sf" +- [(set (match_operand:V2SF 0 "register_operand" "") +- (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "") +- (match_operand:V4HF 2 "register_operand" "") +- (match_operand:V4HF 3 "register_operand" "") +- (match_operand:SI 4 "aarch64_imm2" "")] ++ [(set (match_operand:V2SF 0 "register_operand") ++ (unspec:V2SF [(match_operand:V2SF 1 "register_operand") ++ (match_operand:V4HF 2 "register_operand") ++ (match_operand:V4HF 3 "register_operand") ++ (match_operand:SI 4 "aarch64_imm2")] + VFMLA16_LOW))] + "TARGET_F16FML" + { +@@ -6531,11 +6738,11 @@ + ) + + (define_expand "aarch64_fmll_lane_highv2sf" +- [(set (match_operand:V2SF 0 "register_operand" "") +- (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "") +- (match_operand:V4HF 2 "register_operand" "") +- (match_operand:V4HF 3 "register_operand" "") +- (match_operand:SI 4 "aarch64_imm2" "")] ++ [(set (match_operand:V2SF 0 "register_operand") ++ (unspec:V2SF [(match_operand:V2SF 1 "register_operand") ++ (match_operand:V4HF 2 "register_operand") ++ (match_operand:V4HF 3 "register_operand") ++ (match_operand:SI 4 "aarch64_imm2")] + VFMLA16_HIGH))] + "TARGET_F16FML" + { +@@ -6625,11 +6832,11 @@ + ) + + (define_expand "aarch64_fmllq_laneq_lowv4sf" +- [(set (match_operand:V4SF 0 "register_operand" "") +- (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "") +- (match_operand:V8HF 2 "register_operand" "") +- (match_operand:V8HF 3 "register_operand" "") +- (match_operand:SI 4 "aarch64_lane_imm3" "")] ++ [(set (match_operand:V4SF 0 "register_operand") ++ (unspec:V4SF [(match_operand:V4SF 1 "register_operand") ++ (match_operand:V8HF 2 "register_operand") ++ (match_operand:V8HF 3 "register_operand") ++ (match_operand:SI 4 "aarch64_lane_imm3")] + VFMLA16_LOW))] + "TARGET_F16FML" + { +@@ -6645,11 +6852,11 @@ + }) + + (define_expand "aarch64_fmllq_laneq_highv4sf" +- [(set (match_operand:V4SF 0 "register_operand" "") +- (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "") +- (match_operand:V8HF 2 "register_operand" "") +- (match_operand:V8HF 3 "register_operand" "") +- (match_operand:SI 4 "aarch64_lane_imm3" "")] ++ [(set (match_operand:V4SF 0 "register_operand") ++ (unspec:V4SF [(match_operand:V4SF 1 "register_operand") ++ (match_operand:V8HF 2 "register_operand") ++ (match_operand:V8HF 3 "register_operand") ++ (match_operand:SI 4 "aarch64_lane_imm3")] + VFMLA16_HIGH))] + "TARGET_F16FML" + { +@@ -6739,11 +6946,11 @@ + ) + + (define_expand "aarch64_fmll_laneq_lowv2sf" +- [(set (match_operand:V2SF 0 "register_operand" "") +- (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "") +- (match_operand:V4HF 2 "register_operand" "") +- (match_operand:V8HF 3 "register_operand" "") +- (match_operand:SI 4 "aarch64_lane_imm3" "")] ++ [(set (match_operand:V2SF 0 "register_operand") ++ (unspec:V2SF [(match_operand:V2SF 1 "register_operand") ++ (match_operand:V4HF 2 "register_operand") ++ (match_operand:V8HF 3 "register_operand") ++ (match_operand:SI 4 "aarch64_lane_imm3")] + VFMLA16_LOW))] + "TARGET_F16FML" + { +@@ -6760,11 +6967,11 @@ + }) + + (define_expand "aarch64_fmll_laneq_highv2sf" +- [(set (match_operand:V2SF 0 "register_operand" "") +- (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "") +- (match_operand:V4HF 2 "register_operand" "") +- (match_operand:V8HF 3 "register_operand" "") +- (match_operand:SI 4 "aarch64_lane_imm3" "")] ++ [(set (match_operand:V2SF 0 "register_operand") ++ (unspec:V2SF [(match_operand:V2SF 1 "register_operand") ++ (match_operand:V4HF 2 "register_operand") ++ (match_operand:V8HF 3 "register_operand") ++ (match_operand:SI 4 "aarch64_lane_imm3")] + VFMLA16_HIGH))] + "TARGET_F16FML" + { +@@ -6855,11 +7062,11 @@ + ) + + (define_expand "aarch64_fmllq_lane_lowv4sf" +- [(set (match_operand:V4SF 0 "register_operand" "") +- (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "") +- (match_operand:V8HF 2 "register_operand" "") +- (match_operand:V4HF 3 "register_operand" "") +- (match_operand:SI 4 "aarch64_imm2" "")] ++ [(set (match_operand:V4SF 0 "register_operand") ++ (unspec:V4SF [(match_operand:V4SF 1 "register_operand") ++ (match_operand:V8HF 2 "register_operand") ++ (match_operand:V4HF 3 "register_operand") ++ (match_operand:SI 4 "aarch64_imm2")] + VFMLA16_LOW))] + "TARGET_F16FML" + { +@@ -6875,11 +7082,11 @@ + }) + + (define_expand "aarch64_fmllq_lane_highv4sf" +- [(set (match_operand:V4SF 0 "register_operand" "") +- (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "") +- (match_operand:V8HF 2 "register_operand" "") +- (match_operand:V4HF 3 "register_operand" "") +- (match_operand:SI 4 "aarch64_imm2" "")] ++ [(set (match_operand:V4SF 0 "register_operand") ++ (unspec:V4SF [(match_operand:V4SF 1 "register_operand") ++ (match_operand:V8HF 2 "register_operand") ++ (match_operand:V4HF 3 "register_operand") ++ (match_operand:SI 4 "aarch64_imm2")] + VFMLA16_HIGH))] + "TARGET_F16FML" + { +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc +new file mode 100644 +index 000000000..b28ded0f5 +--- /dev/null ++++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc +@@ -0,0 +1,2760 @@ ++/* ACLE support for AArch64 SVE (__ARM_FEATURE_SVE intrinsics) ++ Copyright (C) 2018-2019 Free Software Foundation, Inc. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ GCC is distributed in the hope that it will be useful, but ++ WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING3. If not see ++ . */ ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "tm.h" ++#include "tree.h" ++#include "rtl.h" ++#include "tm_p.h" ++#include "memmodel.h" ++#include "insn-codes.h" ++#include "optabs.h" ++#include "recog.h" ++#include "expr.h" ++#include "basic-block.h" ++#include "function.h" ++#include "fold-const.h" ++#include "gimple.h" ++#include "gimple-iterator.h" ++#include "gimplify.h" ++#include "explow.h" ++#include "emit-rtl.h" ++#include "tree-vector-builder.h" ++#include "rtx-vector-builder.h" ++#include "vec-perm-indices.h" ++#include "aarch64-sve-builtins.h" ++#include "aarch64-sve-builtins-shapes.h" ++#include "aarch64-sve-builtins-base.h" ++#include "aarch64-sve-builtins-functions.h" ++ ++using namespace aarch64_sve; ++ ++namespace { ++ ++/* Expand a call to svmad, or svmla after reordering its operands. ++ Make _m forms merge with argument MERGE_ARGNO. */ ++static rtx ++expand_mad (function_expander &e, ++ unsigned int merge_argno = DEFAULT_MERGE_ARGNO) ++{ ++ if (e.pred == PRED_x) ++ { ++ insn_code icode; ++ if (e.type_suffix (0).integer_p) ++ icode = code_for_aarch64_pred_fma (e.vector_mode (0)); ++ else ++ icode = code_for_aarch64_pred (UNSPEC_COND_FMLA, e.vector_mode (0)); ++ return e.use_pred_x_insn (icode); ++ } ++ ++ insn_code icode = e.direct_optab_handler (cond_fma_optab); ++ return e.use_cond_insn (icode, merge_argno); ++} ++ ++/* Expand a call to svmsb, or svmls after reordering its operands. ++ Make _m forms merge with argument MERGE_ARGNO. */ ++static rtx ++expand_msb (function_expander &e, ++ unsigned int merge_argno = DEFAULT_MERGE_ARGNO) ++{ ++ if (e.pred == PRED_x) ++ { ++ insn_code icode; ++ if (e.type_suffix (0).integer_p) ++ icode = code_for_aarch64_pred_fnma (e.vector_mode (0)); ++ else ++ icode = code_for_aarch64_pred (UNSPEC_COND_FMLS, e.vector_mode (0)); ++ return e.use_pred_x_insn (icode); ++ } ++ ++ insn_code icode = e.direct_optab_handler (cond_fnma_optab); ++ return e.use_cond_insn (icode, merge_argno); ++} ++ ++class svabd_impl : public function_base ++{ ++public: ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ /* The integer operations are represented as the subtraction of the ++ minimum from the maximum, with the signedness of the instruction ++ keyed off the signedness of the maximum operation. */ ++ rtx_code max_code = e.type_suffix (0).unsigned_p ? UMAX : SMAX; ++ insn_code icode; ++ if (e.pred == PRED_x) ++ { ++ if (e.type_suffix (0).integer_p) ++ icode = code_for_aarch64_pred_abd (max_code, e.vector_mode (0)); ++ else ++ icode = code_for_aarch64_pred_abd (e.vector_mode (0)); ++ return e.use_pred_x_insn (icode); ++ } ++ ++ if (e.type_suffix (0).integer_p) ++ icode = code_for_aarch64_cond_abd (max_code, e.vector_mode (0)); ++ else ++ icode = code_for_aarch64_cond_abd (e.vector_mode (0)); ++ return e.use_cond_insn (icode); ++ } ++}; ++ ++/* Implements svacge, svacgt, svacle and svaclt. */ ++class svac_impl : public function_base ++{ ++public: ++ CONSTEXPR svac_impl (int unspec) : m_unspec (unspec) {} ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ e.add_ptrue_hint (0, e.gp_mode (0)); ++ insn_code icode = code_for_aarch64_pred_fac (m_unspec, e.vector_mode (0)); ++ return e.use_exact_insn (icode); ++ } ++ ++ /* The unspec code for the underlying comparison. */ ++ int m_unspec; ++}; ++ ++class svadda_impl : public function_base ++{ ++public: ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ /* Put the predicate last, as required by mask_fold_left_plus_optab. */ ++ e.rotate_inputs_left (0, 3); ++ machine_mode mode = e.vector_mode (0); ++ insn_code icode = direct_optab_handler (mask_fold_left_plus_optab, mode); ++ return e.use_exact_insn (icode); ++ } ++}; ++ ++/* Implements svadr[bhwd]. */ ++class svadr_bhwd_impl : public function_base ++{ ++public: ++ CONSTEXPR svadr_bhwd_impl (unsigned int shift) : m_shift (shift) {} ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ machine_mode mode = GET_MODE (e.args[0]); ++ if (m_shift == 0) ++ return e.use_exact_insn (code_for_aarch64_adr (mode)); ++ ++ /* Turn the access size into an extra shift argument. */ ++ rtx shift = gen_int_mode (m_shift, GET_MODE_INNER (mode)); ++ e.args.quick_push (expand_vector_broadcast (mode, shift)); ++ return e.use_exact_insn (code_for_aarch64_adr_shift (mode)); ++ } ++ ++ /* How many bits left to shift the vector displacement. */ ++ unsigned int m_shift; ++}; ++ ++class svasrd_impl : public function_base ++{ ++public: ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ return e.use_cond_insn (code_for_cond_asrd (e.vector_mode (0))); ++ } ++}; ++ ++class svbic_impl : public function_base ++{ ++public: ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ /* Convert svbic of a constant into svand of its inverse. */ ++ if (CONST_INT_P (e.args[2])) ++ { ++ machine_mode mode = GET_MODE_INNER (e.vector_mode (0)); ++ e.args[2] = simplify_unary_operation (NOT, mode, e.args[2], mode); ++ return e.map_to_rtx_codes (AND, AND, -1); ++ } ++ ++ if (e.type_suffix_ids[0] == TYPE_SUFFIX_b) ++ { ++ gcc_assert (e.pred == PRED_z); ++ return e.use_exact_insn (CODE_FOR_aarch64_pred_bicvnx16bi_z); ++ } ++ ++ if (e.pred == PRED_x) ++ return e.use_unpred_insn (code_for_aarch64_bic (e.vector_mode (0))); ++ ++ return e.use_cond_insn (code_for_cond_bic (e.vector_mode (0))); ++ } ++}; ++ ++/* Implements svbrkn, svbrkpa and svbrkpb. */ ++class svbrk_binary_impl : public function_base ++{ ++public: ++ CONSTEXPR svbrk_binary_impl (int unspec) : m_unspec (unspec) {} ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ return e.use_exact_insn (code_for_aarch64_brk (m_unspec)); ++ } ++ ++ /* The unspec code associated with the operation. */ ++ int m_unspec; ++}; ++ ++/* Implements svbrka and svbrkb. */ ++class svbrk_unary_impl : public function_base ++{ ++public: ++ CONSTEXPR svbrk_unary_impl (int unspec) : m_unspec (unspec) {} ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ return e.use_cond_insn (code_for_aarch64_brk (m_unspec)); ++ } ++ ++ /* The unspec code associated with the operation. */ ++ int m_unspec; ++}; ++ ++class svcadd_impl : public function_base ++{ ++public: ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ /* Convert the rotation amount into a specific unspec. */ ++ int rot = INTVAL (e.args[3]); ++ e.args.ordered_remove (3); ++ int unspec = (rot == 90 ? UNSPEC_COND_FCADD90 ++ : rot == 270 ? UNSPEC_COND_FCADD270 ++ : (gcc_unreachable (), 0)); ++ return e.map_to_unspecs (-1, -1, unspec); ++ } ++}; ++ ++/* Implements svclasta and svclastb. */ ++class svclast_impl : public quiet ++{ ++public: ++ CONSTEXPR svclast_impl (int unspec) : m_unspec (unspec) {} ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ /* Match the fold_extract_optab order. */ ++ std::swap (e.args[0], e.args[1]); ++ machine_mode mode = e.vector_mode (0); ++ insn_code icode; ++ if (e.mode_suffix_id == MODE_n) ++ icode = code_for_fold_extract (m_unspec, mode); ++ else ++ icode = code_for_aarch64_fold_extract_vector (m_unspec, mode); ++ return e.use_exact_insn (icode); ++ } ++ ++ /* The unspec code associated with the operation. */ ++ int m_unspec; ++}; ++ ++class svcmla_impl : public function_base ++{ ++public: ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ /* Convert the rotation amount into a specific unspec. */ ++ int rot = INTVAL (e.args[4]); ++ e.args.ordered_remove (4); ++ int unspec = (rot == 0 ? UNSPEC_COND_FCMLA ++ : rot == 90 ? UNSPEC_COND_FCMLA90 ++ : rot == 180 ? UNSPEC_COND_FCMLA180 ++ : rot == 270 ? UNSPEC_COND_FCMLA270 ++ : (gcc_unreachable (), 0)); ++ ++ /* Make the operand order the same as the one used by the fma optabs, ++ with the accumulator last. */ ++ e.rotate_inputs_left (1, 4); ++ return e.map_to_unspecs (-1, -1, unspec, 3); ++ } ++}; ++ ++class svcmla_lane_impl : public function_base ++{ ++public: ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ /* Convert the rotation amount into a specific unspec. */ ++ int rot = INTVAL (e.args[4]); ++ e.args.ordered_remove (4); ++ int unspec = (rot == 0 ? UNSPEC_FCMLA ++ : rot == 90 ? UNSPEC_FCMLA90 ++ : rot == 180 ? UNSPEC_FCMLA180 ++ : rot == 270 ? UNSPEC_FCMLA270 ++ : (gcc_unreachable (), 0)); ++ ++ /* Make the operand order the same as the one used by the fma optabs, ++ with the accumulator last. */ ++ e.rotate_inputs_left (0, 4); ++ insn_code icode = code_for_aarch64_lane (unspec, e.vector_mode (0)); ++ return e.use_exact_insn (icode); ++ } ++}; ++ ++/* Implements svcmp (except svcmpuo, which is handled separately). */ ++class svcmp_impl : public function_base ++{ ++public: ++ CONSTEXPR svcmp_impl (tree_code code, int unspec_for_fp) ++ : m_code (code), m_unspec_for_fp (unspec_for_fp) {} ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ machine_mode mode = e.vector_mode (0); ++ ++ /* Comparisons are UNSPEC_PRED_Z operations and so need a hint ++ operand. */ ++ e.add_ptrue_hint (0, e.gp_mode (0)); ++ ++ if (e.type_suffix (0).integer_p) ++ { ++ bool unsigned_p = e.type_suffix (0).unsigned_p; ++ rtx_code code = get_rtx_code (m_code, unsigned_p); ++ return e.use_exact_insn (code_for_aarch64_pred_cmp (code, mode)); ++ } ++ ++ insn_code icode = code_for_aarch64_pred_fcm (m_unspec_for_fp, mode); ++ return e.use_exact_insn (icode); ++ } ++ ++ /* The tree code associated with the comparison. */ ++ tree_code m_code; ++ ++ /* The unspec code to use for floating-point comparisons. */ ++ int m_unspec_for_fp; ++}; ++ ++/* Implements svcmp_wide. */ ++class svcmp_wide_impl : public function_base ++{ ++public: ++ CONSTEXPR svcmp_wide_impl (tree_code code, int unspec_for_sint, ++ int unspec_for_uint) ++ : m_code (code), m_unspec_for_sint (unspec_for_sint), ++ m_unspec_for_uint (unspec_for_uint) {} ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ machine_mode mode = e.vector_mode (0); ++ bool unsigned_p = e.type_suffix (0).unsigned_p; ++ rtx_code code = get_rtx_code (m_code, unsigned_p); ++ ++ /* Comparisons are UNSPEC_PRED_Z operations and so need a hint ++ operand. */ ++ e.add_ptrue_hint (0, e.gp_mode (0)); ++ ++ /* If the argument is a constant that the unwidened comparisons ++ can handle directly, use them instead. */ ++ insn_code icode = code_for_aarch64_pred_cmp (code, mode); ++ rtx op2 = unwrap_const_vec_duplicate (e.args[3]); ++ if (CONSTANT_P (op2) ++ && insn_data[icode].operand[4].predicate (op2, DImode)) ++ { ++ e.args[3] = op2; ++ return e.use_exact_insn (icode); ++ } ++ ++ int unspec = (unsigned_p ? m_unspec_for_uint : m_unspec_for_sint); ++ return e.use_exact_insn (code_for_aarch64_pred_cmp_wide (unspec, mode)); ++ } ++ ++ /* The tree code associated with the comparison. */ ++ tree_code m_code; ++ ++ /* The unspec codes for signed and unsigned wide comparisons ++ respectively. */ ++ int m_unspec_for_sint; ++ int m_unspec_for_uint; ++}; ++ ++class svcmpuo_impl : public quiet ++{ ++public: ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ e.add_ptrue_hint (0, e.gp_mode (0)); ++ return e.use_exact_insn (code_for_aarch64_pred_fcmuo (e.vector_mode (0))); ++ } ++}; ++ ++class svcnot_impl : public function_base ++{ ++public: ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ machine_mode mode = e.vector_mode (0); ++ if (e.pred == PRED_x) ++ { ++ /* The pattern for CNOT includes an UNSPEC_PRED_Z, so needs ++ a ptrue hint. */ ++ e.add_ptrue_hint (0, e.gp_mode (0)); ++ return e.use_pred_x_insn (code_for_aarch64_pred_cnot (mode)); ++ } ++ ++ return e.use_cond_insn (code_for_cond_cnot (mode), 0); ++ } ++}; ++ ++/* Implements svcnt[bhwd], which count the number of elements ++ in a particular vector mode. */ ++class svcnt_bhwd_impl : public function_base ++{ ++public: ++ CONSTEXPR svcnt_bhwd_impl (machine_mode ref_mode) : m_ref_mode (ref_mode) {} ++ ++ gimple * ++ fold (gimple_folder &f) const OVERRIDE ++ { ++ tree count = build_int_cstu (TREE_TYPE (f.lhs), ++ GET_MODE_NUNITS (m_ref_mode)); ++ return gimple_build_assign (f.lhs, count); ++ } ++ ++ rtx ++ expand (function_expander &) const OVERRIDE ++ { ++ return gen_int_mode (GET_MODE_NUNITS (m_ref_mode), DImode); ++ } ++ ++ /* The mode of the vector associated with the [bhwd] suffix. */ ++ machine_mode m_ref_mode; ++}; ++ ++/* Implements svcnt[bhwd]_pat. */ ++class svcnt_bhwd_pat_impl : public svcnt_bhwd_impl ++{ ++public: ++ CONSTEXPR svcnt_bhwd_pat_impl (machine_mode ref_mode) ++ : svcnt_bhwd_impl (ref_mode) {} ++ ++ gimple * ++ fold (gimple_folder &f) const OVERRIDE ++ { ++ tree pattern_arg = gimple_call_arg (f.call, 0); ++ aarch64_svpattern pattern = (aarch64_svpattern) tree_to_shwi (pattern_arg); ++ ++ if (pattern == AARCH64_SV_ALL) ++ /* svcvnt[bwhd]_pat (SV_ALL) == svcnt[bwhd] (). */ ++ return svcnt_bhwd_impl::fold (f); ++ ++ /* See whether we can count the number of elements in the pattern ++ at compile time. */ ++ unsigned int elements_per_vq = 128 / GET_MODE_UNIT_BITSIZE (m_ref_mode); ++ HOST_WIDE_INT value = aarch64_fold_sve_cnt_pat (pattern, elements_per_vq); ++ if (value >= 0) ++ { ++ tree count = build_int_cstu (TREE_TYPE (f.lhs), value); ++ return gimple_build_assign (f.lhs, count); ++ } ++ ++ return NULL; ++ } ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ unsigned int elements_per_vq = 128 / GET_MODE_UNIT_BITSIZE (m_ref_mode); ++ e.args.quick_push (gen_int_mode (elements_per_vq, DImode)); ++ e.args.quick_push (const1_rtx); ++ return e.use_exact_insn (CODE_FOR_aarch64_sve_cnt_pat); ++ } ++}; ++ ++class svcntp_impl : public function_base ++{ ++public: ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ machine_mode mode = e.vector_mode (0); ++ e.add_ptrue_hint (0, mode); ++ return e.use_exact_insn (code_for_aarch64_pred_cntp (mode)); ++ } ++}; ++ ++/* Implements svcreate2, svcreate3 and svcreate4. */ ++class svcreate_impl : public quiet ++{ ++public: ++ CONSTEXPR svcreate_impl (unsigned int vectors_per_tuple) ++ : quiet (vectors_per_tuple) {} ++ ++ gimple * ++ fold (gimple_folder &f) const OVERRIDE ++ { ++ unsigned int nargs = gimple_call_num_args (f.call); ++ tree lhs_type = TREE_TYPE (f.lhs); ++ ++ /* Replace the call with a clobber of the result (to prevent it from ++ becoming upwards exposed) followed by stores into each individual ++ vector of tuple. ++ ++ The fold routines expect the replacement statement to have the ++ same lhs as the original call, so return the clobber statement ++ rather than the final vector store. */ ++ gassign *clobber = gimple_build_assign (f.lhs, build_clobber (lhs_type)); ++ ++ for (unsigned int i = nargs; i-- > 0; ) ++ { ++ tree rhs_vector = gimple_call_arg (f.call, i); ++ tree field = tuple_type_field (TREE_TYPE (f.lhs)); ++ tree lhs_array = build3 (COMPONENT_REF, TREE_TYPE (field), ++ unshare_expr (f.lhs), field, NULL_TREE); ++ tree lhs_vector = build4 (ARRAY_REF, TREE_TYPE (rhs_vector), ++ lhs_array, size_int (i), ++ NULL_TREE, NULL_TREE); ++ gassign *assign = gimple_build_assign (lhs_vector, rhs_vector); ++ gsi_insert_after (f.gsi, assign, GSI_SAME_STMT); ++ } ++ return clobber; ++ } ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ rtx lhs_tuple = e.get_nonoverlapping_reg_target (); ++ ++ /* Record that LHS_TUPLE is dead before the first store. */ ++ emit_clobber (lhs_tuple); ++ for (unsigned int i = 0; i < e.args.length (); ++i) ++ { ++ /* Use an lvalue subreg to refer to vector I in LHS_TUPLE. */ ++ rtx lhs_vector = simplify_gen_subreg (GET_MODE (e.args[i]), ++ lhs_tuple, GET_MODE (lhs_tuple), ++ i * BYTES_PER_SVE_VECTOR); ++ emit_move_insn (lhs_vector, e.args[i]); ++ } ++ return lhs_tuple; ++ } ++}; ++ ++class svcvt_impl : public function_base ++{ ++public: ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ machine_mode mode0 = e.vector_mode (0); ++ machine_mode mode1 = e.vector_mode (1); ++ insn_code icode; ++ /* All this complication comes from the need to select four things ++ simultaneously: ++ ++ (1) the kind of conversion (int<-float, float<-int, float<-float) ++ (2) signed vs. unsigned integers, where relevant ++ (3) the predication mode, which must be the wider of the predication ++ modes for MODE0 and MODE1 ++ (4) the predication type (m, x or z) ++ ++ The only supported int<->float conversions for which the integer is ++ narrower than the float are SI<->DF. It's therefore more convenient ++ to handle (3) by defining two patterns for int<->float conversions: ++ one in which the integer is at least as wide as the float and so ++ determines the predication mode, and another single SI<->DF pattern ++ in which the float's mode determines the predication mode (which is ++ always VNx2BI in that case). ++ ++ The names of the patterns follow the optab convention of giving ++ the source mode before the destination mode. */ ++ if (e.type_suffix (1).integer_p) ++ { ++ int unspec = (e.type_suffix (1).unsigned_p ++ ? UNSPEC_COND_UCVTF ++ : UNSPEC_COND_SCVTF); ++ if (e.type_suffix (0).element_bytes <= e.type_suffix (1).element_bytes) ++ icode = (e.pred == PRED_x ++ ? code_for_aarch64_sve_nonextend (unspec, mode1, mode0) ++ : code_for_cond_nonextend (unspec, mode1, mode0)); ++ else ++ icode = (e.pred == PRED_x ++ ? code_for_aarch64_sve_extend (unspec, mode1, mode0) ++ : code_for_cond_extend (unspec, mode1, mode0)); ++ } ++ else ++ { ++ int unspec = (!e.type_suffix (0).integer_p ? UNSPEC_COND_FCVT ++ : e.type_suffix (0).unsigned_p ? UNSPEC_COND_FCVTZU ++ : UNSPEC_COND_FCVTZS); ++ if (e.type_suffix (0).element_bytes >= e.type_suffix (1).element_bytes) ++ icode = (e.pred == PRED_x ++ ? code_for_aarch64_sve_nontrunc (unspec, mode1, mode0) ++ : code_for_cond_nontrunc (unspec, mode1, mode0)); ++ else ++ icode = (e.pred == PRED_x ++ ? code_for_aarch64_sve_trunc (unspec, mode1, mode0) ++ : code_for_cond_trunc (unspec, mode1, mode0)); ++ } ++ ++ if (e.pred == PRED_x) ++ return e.use_pred_x_insn (icode); ++ return e.use_cond_insn (icode); ++ } ++}; ++ ++class svdot_impl : public function_base ++{ ++public: ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ /* In the optab, the multiplication operands come before the accumulator ++ operand. The optab is keyed off the multiplication mode. */ ++ e.rotate_inputs_left (0, 3); ++ insn_code icode ++ = e.direct_optab_handler_for_sign (sdot_prod_optab, udot_prod_optab, ++ 0, GET_MODE (e.args[0])); ++ return e.use_unpred_insn (icode); ++ } ++}; ++ ++class svdotprod_lane_impl : public unspec_based_function_base ++{ ++public: ++ CONSTEXPR svdotprod_lane_impl (int unspec_for_sint, ++ int unspec_for_uint, ++ int unspec_for_float) ++ : unspec_based_function_base (unspec_for_sint, ++ unspec_for_uint, ++ unspec_for_float) {} ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ /* Use the same ordering as the dot_prod_optab, with the ++ accumulator last. */ ++ e.rotate_inputs_left (0, 4); ++ int unspec = unspec_for (e); ++ machine_mode mode = e.vector_mode (0); ++ return e.use_exact_insn (code_for_aarch64_dot_prod_lane (unspec, mode)); ++ } ++}; ++ ++class svdup_impl : public quiet ++{ ++public: ++ gimple * ++ fold (gimple_folder &f) const OVERRIDE ++ { ++ tree vec_type = TREE_TYPE (f.lhs); ++ tree rhs = gimple_call_arg (f.call, f.pred == PRED_none ? 0 : 1); ++ ++ if (f.pred == PRED_none || f.pred == PRED_x) ++ { ++ if (CONSTANT_CLASS_P (rhs)) ++ { ++ if (f.type_suffix (0).bool_p) ++ return (tree_to_shwi (rhs) ++ ? f.fold_to_ptrue () ++ : f.fold_to_pfalse ()); ++ ++ tree rhs_vector = build_vector_from_val (vec_type, rhs); ++ return gimple_build_assign (f.lhs, rhs_vector); ++ } ++ ++ /* Avoid folding _b to a VEC_DUPLICATE_EXPR, since to do that we ++ would need to introduce an extra and unwanted conversion to ++ the truth vector element type. */ ++ if (!f.type_suffix (0).bool_p) ++ return gimple_build_assign (f.lhs, VEC_DUPLICATE_EXPR, rhs); ++ } ++ ++ return NULL; ++ } ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ if (e.pred == PRED_none || e.pred == PRED_x) ++ /* There's no benefit to using predicated instructions for _x here. */ ++ return e.use_unpred_insn (e.direct_optab_handler (vec_duplicate_optab)); ++ ++ /* Model predicated svdups as a SEL in which the "true" value is ++ the duplicate of the function argument and the "false" value ++ is the value of inactive lanes. */ ++ insn_code icode; ++ machine_mode mode = e.vector_mode (0); ++ if (valid_for_const_vector_p (GET_MODE_INNER (mode), e.args.last ())) ++ /* Duplicate the constant to fill a vector. The pattern optimizes ++ various cases involving constant operands, falling back to SEL ++ if necessary. */ ++ icode = code_for_vcond_mask (mode, mode); ++ else ++ /* Use the pattern for selecting between a duplicated scalar ++ variable and a vector fallback. */ ++ icode = code_for_aarch64_sel_dup (mode); ++ return e.use_vcond_mask_insn (icode); ++ } ++}; ++ ++class svdup_lane_impl : public quiet ++{ ++public: ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ /* The native DUP lane has an index range of 64 bytes. */ ++ machine_mode mode = e.vector_mode (0); ++ if (CONST_INT_P (e.args[1]) ++ && IN_RANGE (INTVAL (e.args[1]) * GET_MODE_UNIT_SIZE (mode), 0, 63)) ++ return e.use_exact_insn (code_for_aarch64_sve_dup_lane (mode)); ++ ++ /* Treat svdup_lane as if it were svtbl_n. */ ++ return e.use_exact_insn (code_for_aarch64_sve_tbl (e.vector_mode (0))); ++ } ++}; ++ ++class svdupq_impl : public quiet ++{ ++public: ++ gimple * ++ fold (gimple_folder &f) const OVERRIDE ++ { ++ tree vec_type = TREE_TYPE (f.lhs); ++ unsigned int nargs = gimple_call_num_args (f.call); ++ /* For predicates, pad out each argument so that we have one element ++ per bit. */ ++ unsigned int factor = (f.type_suffix (0).bool_p ++ ? f.type_suffix (0).element_bytes : 1); ++ tree_vector_builder builder (vec_type, nargs * factor, 1); ++ for (unsigned int i = 0; i < nargs; ++i) ++ { ++ tree elt = gimple_call_arg (f.call, i); ++ if (!CONSTANT_CLASS_P (elt)) ++ return NULL; ++ builder.quick_push (elt); ++ for (unsigned int j = 1; j < factor; ++j) ++ builder.quick_push (build_zero_cst (TREE_TYPE (vec_type))); ++ } ++ return gimple_build_assign (f.lhs, builder.build ()); ++ } ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ machine_mode mode = e.vector_mode (0); ++ unsigned int elements_per_vq = e.args.length (); ++ if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL) ++ { ++ /* Construct a vector of integers so that we can compare them against ++ zero below. Zero vs. nonzero is the only distinction that ++ matters. */ ++ mode = aarch64_sve_int_mode (mode); ++ for (unsigned int i = 0; i < elements_per_vq; ++i) ++ e.args[i] = simplify_gen_unary (ZERO_EXTEND, GET_MODE_INNER (mode), ++ e.args[i], QImode); ++ } ++ ++ /* Get the 128-bit Advanced SIMD vector for this data size. */ ++ scalar_mode element_mode = GET_MODE_INNER (mode); ++ machine_mode vq_mode = aarch64_vq_mode (element_mode).require (); ++ gcc_assert (known_eq (elements_per_vq, GET_MODE_NUNITS (vq_mode))); ++ ++ /* Put the arguments into a 128-bit Advanced SIMD vector. We want ++ argument N to go into architectural lane N, whereas Advanced SIMD ++ vectors are loaded memory lsb to register lsb. We therefore need ++ to reverse the elements for big-endian targets. */ ++ rtx vq_reg = gen_reg_rtx (vq_mode); ++ rtvec vec = rtvec_alloc (elements_per_vq); ++ for (unsigned int i = 0; i < elements_per_vq; ++i) ++ { ++ unsigned int argno = BYTES_BIG_ENDIAN ? elements_per_vq - i - 1 : i; ++ RTVEC_ELT (vec, i) = e.args[argno]; ++ } ++ aarch64_expand_vector_init (vq_reg, gen_rtx_PARALLEL (vq_mode, vec)); ++ ++ /* If the result is a boolean, compare the data vector against zero. */ ++ if (mode != e.vector_mode (0)) ++ { ++ rtx data_dupq = aarch64_expand_sve_dupq (NULL, mode, vq_reg); ++ return aarch64_convert_sve_data_to_pred (e.possible_target, ++ e.vector_mode (0), data_dupq); ++ } ++ ++ return aarch64_expand_sve_dupq (e.possible_target, mode, vq_reg); ++ } ++}; ++ ++class svdupq_lane_impl : public quiet ++{ ++public: ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ machine_mode mode = e.vector_mode (0); ++ rtx index = e.args[1]; ++ if (CONST_INT_P (index) && IN_RANGE (INTVAL (index), 0, 3)) ++ { ++ /* Use the .Q form of DUP, which is the native instruction for ++ this function. */ ++ insn_code icode = code_for_aarch64_sve_dupq_lane (mode); ++ unsigned int num_indices = e.elements_per_vq (0); ++ rtx indices = aarch64_gen_stepped_int_parallel ++ (num_indices, INTVAL (index) * num_indices, 1); ++ ++ e.add_output_operand (icode); ++ e.add_input_operand (icode, e.args[0]); ++ e.add_fixed_operand (indices); ++ return e.generate_insn (icode); ++ } ++ ++ /* Build a .D TBL index for the pairs of doublewords that we want to ++ duplicate. */ ++ if (CONST_INT_P (index)) ++ { ++ /* The index vector is a constant. */ ++ rtx_vector_builder builder (VNx2DImode, 2, 1); ++ builder.quick_push (gen_int_mode (INTVAL (index) * 2, DImode)); ++ builder.quick_push (gen_int_mode (INTVAL (index) * 2 + 1, DImode)); ++ index = builder.build (); ++ } ++ else ++ { ++ /* Duplicate INDEX * 2 to fill a DImode vector. The ACLE spec ++ explicitly allows the top of the index to be dropped. */ ++ index = force_reg (DImode, simplify_gen_binary (ASHIFT, DImode, ++ index, const1_rtx)); ++ index = expand_vector_broadcast (VNx2DImode, index); ++ ++ /* Get an alternating 0, 1 predicate. */ ++ rtx_vector_builder builder (VNx2BImode, 2, 1); ++ builder.quick_push (const0_rtx); ++ builder.quick_push (constm1_rtx); ++ rtx pg = force_reg (VNx2BImode, builder.build ()); ++ ++ /* Add one to the odd elements of the index. */ ++ rtx one = force_reg (VNx2DImode, CONST1_RTX (VNx2DImode)); ++ rtx target = gen_reg_rtx (VNx2DImode); ++ emit_insn (gen_cond_addvnx2di (target, pg, index, one, index)); ++ index = target; ++ } ++ ++ e.args[0] = gen_lowpart (VNx2DImode, e.args[0]); ++ e.args[1] = index; ++ return e.use_exact_insn (CODE_FOR_aarch64_sve_tblvnx2di); ++ } ++}; ++ ++/* Implements svextb, svexth and svextw. */ ++class svext_bhw_impl : public function_base ++{ ++public: ++ CONSTEXPR svext_bhw_impl (scalar_int_mode from_mode) ++ : m_from_mode (from_mode) {} ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ if (e.type_suffix (0).unsigned_p) ++ { ++ /* Convert to an AND. The widest we go is 0xffffffff, which fits ++ in a CONST_INT. */ ++ e.args.quick_push (GEN_INT (GET_MODE_MASK (m_from_mode))); ++ if (e.pred == PRED_m) ++ /* We now have arguments "(inactive, pg, op, mask)". Convert this ++ to "(pg, op, mask, inactive)" so that the order matches svand_m ++ with an extra argument on the end. Take the inactive elements ++ from this extra argument. */ ++ e.rotate_inputs_left (0, 4); ++ return e.map_to_rtx_codes (AND, AND, -1, 3); ++ } ++ ++ machine_mode wide_mode = e.vector_mode (0); ++ poly_uint64 nunits = GET_MODE_NUNITS (wide_mode); ++ machine_mode narrow_mode ++ = aarch64_sve_data_mode (m_from_mode, nunits).require (); ++ if (e.pred == PRED_x) ++ { ++ insn_code icode = code_for_aarch64_pred_sxt (wide_mode, narrow_mode); ++ return e.use_pred_x_insn (icode); ++ } ++ ++ insn_code icode = code_for_aarch64_cond_sxt (wide_mode, narrow_mode); ++ return e.use_cond_insn (icode); ++ } ++ ++ /* The element mode that we're extending from. */ ++ scalar_int_mode m_from_mode; ++}; ++ ++/* Implements svget2, svget3 and svget4. */ ++class svget_impl : public quiet ++{ ++public: ++ CONSTEXPR svget_impl (unsigned int vectors_per_tuple) ++ : quiet (vectors_per_tuple) {} ++ ++ gimple * ++ fold (gimple_folder &f) const OVERRIDE ++ { ++ /* Fold into a normal gimple component access. */ ++ tree rhs_tuple = gimple_call_arg (f.call, 0); ++ tree index = gimple_call_arg (f.call, 1); ++ tree field = tuple_type_field (TREE_TYPE (rhs_tuple)); ++ tree rhs_array = build3 (COMPONENT_REF, TREE_TYPE (field), ++ rhs_tuple, field, NULL_TREE); ++ tree rhs_vector = build4 (ARRAY_REF, TREE_TYPE (f.lhs), ++ rhs_array, index, NULL_TREE, NULL_TREE); ++ return gimple_build_assign (f.lhs, rhs_vector); ++ } ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ /* Fold the access into a subreg rvalue. */ ++ return simplify_gen_subreg (e.vector_mode (0), e.args[0], ++ GET_MODE (e.args[0]), ++ INTVAL (e.args[1]) * BYTES_PER_SVE_VECTOR); ++ } ++}; ++ ++class svindex_impl : public function_base ++{ ++public: ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ return e.use_exact_insn (e.direct_optab_handler (vec_series_optab)); ++ } ++}; ++ ++class svinsr_impl : public quiet ++{ ++public: ++ gimple * ++ fold (gimple_folder &f) const OVERRIDE ++ { ++ gcall *new_call = gimple_build_call_internal (IFN_VEC_SHL_INSERT, 2, ++ gimple_call_arg (f.call, 0), ++ gimple_call_arg (f.call, 1)); ++ gimple_call_set_lhs (new_call, f.lhs); ++ return new_call; ++ } ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ insn_code icode = direct_optab_handler (vec_shl_insert_optab, ++ e.vector_mode (0)); ++ return e.use_exact_insn (icode); ++ } ++}; ++ ++/* Implements svlasta and svlastb. */ ++class svlast_impl : public quiet ++{ ++public: ++ CONSTEXPR svlast_impl (int unspec) : m_unspec (unspec) {} ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ return e.use_exact_insn (code_for_extract (m_unspec, e.vector_mode (0))); ++ } ++ ++ /* The unspec code associated with the operation. */ ++ int m_unspec; ++}; ++ ++class svld1_impl : public full_width_access ++{ ++public: ++ unsigned int ++ call_properties (const function_instance &) const OVERRIDE ++ { ++ return CP_READ_MEMORY; ++ } ++ ++ gimple * ++ fold (gimple_folder &f) const OVERRIDE ++ { ++ tree vectype = f.vector_type (0); ++ ++ /* Get the predicate and base pointer. */ ++ gimple_seq stmts = NULL; ++ tree pred = f.convert_pred (stmts, vectype, 0); ++ tree base = f.fold_contiguous_base (stmts, vectype); ++ gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT); ++ ++ tree cookie = f.load_store_cookie (TREE_TYPE (vectype)); ++ gcall *new_call = gimple_build_call_internal (IFN_MASK_LOAD, 3, ++ base, cookie, pred); ++ gimple_call_set_lhs (new_call, f.lhs); ++ return new_call; ++ } ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ insn_code icode = convert_optab_handler (maskload_optab, ++ e.vector_mode (0), e.gp_mode (0)); ++ return e.use_contiguous_load_insn (icode); ++ } ++}; ++ ++/* Implements extending contiguous forms of svld1. */ ++class svld1_extend_impl : public extending_load ++{ ++public: ++ CONSTEXPR svld1_extend_impl (type_suffix_index memory_type) ++ : extending_load (memory_type) {} ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ insn_code icode = code_for_aarch64_load (extend_rtx_code (), ++ e.vector_mode (0), ++ e.memory_vector_mode ()); ++ return e.use_contiguous_load_insn (icode); ++ } ++}; ++ ++class svld1_gather_impl : public full_width_access ++{ ++public: ++ unsigned int ++ call_properties (const function_instance &) const OVERRIDE ++ { ++ return CP_READ_MEMORY; ++ } ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ e.prepare_gather_address_operands (1); ++ /* Put the predicate last, as required by mask_gather_load_optab. */ ++ e.rotate_inputs_left (0, 5); ++ machine_mode mem_mode = e.memory_vector_mode (); ++ insn_code icode = direct_optab_handler (mask_gather_load_optab, mem_mode); ++ return e.use_exact_insn (icode); ++ } ++}; ++ ++/* Implements extending forms of svld1_gather. */ ++class svld1_gather_extend_impl : public extending_load ++{ ++public: ++ CONSTEXPR svld1_gather_extend_impl (type_suffix_index memory_type) ++ : extending_load (memory_type) {} ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ e.prepare_gather_address_operands (1); ++ /* Put the predicate last, since the extending gathers use the same ++ operand order as mask_gather_load_optab. */ ++ e.rotate_inputs_left (0, 5); ++ insn_code icode = code_for_aarch64_gather_load (extend_rtx_code (), ++ e.vector_mode (0), ++ e.memory_vector_mode ()); ++ return e.use_exact_insn (icode); ++ } ++}; ++ ++class load_replicate : public function_base ++{ ++public: ++ unsigned int ++ call_properties (const function_instance &) const OVERRIDE ++ { ++ return CP_READ_MEMORY; ++ } ++ ++ tree ++ memory_scalar_type (const function_instance &fi) const OVERRIDE ++ { ++ return fi.scalar_type (0); ++ } ++}; ++ ++class svld1rq_impl : public load_replicate ++{ ++public: ++ machine_mode ++ memory_vector_mode (const function_instance &fi) const OVERRIDE ++ { ++ return aarch64_vq_mode (GET_MODE_INNER (fi.vector_mode (0))).require (); ++ } ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ insn_code icode = code_for_aarch64_sve_ld1rq (e.vector_mode (0)); ++ return e.use_contiguous_load_insn (icode); ++ } ++}; ++ ++class svld1ro_impl : public load_replicate ++{ ++public: ++ machine_mode ++ memory_vector_mode (const function_instance &fi) const OVERRIDE ++ { ++ return OImode; ++ } ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ insn_code icode = code_for_aarch64_sve_ld1ro (e.vector_mode (0)); ++ return e.use_contiguous_load_insn (icode); ++ } ++}; ++ ++/* Implements svld2, svld3 and svld4. */ ++class svld234_impl : public full_width_access ++{ ++public: ++ CONSTEXPR svld234_impl (unsigned int vectors_per_tuple) ++ : full_width_access (vectors_per_tuple) {} ++ ++ unsigned int ++ call_properties (const function_instance &) const OVERRIDE ++ { ++ return CP_READ_MEMORY; ++ } ++ ++ gimple * ++ fold (gimple_folder &f) const OVERRIDE ++ { ++ tree tuple_type = TREE_TYPE (f.lhs); ++ tree vectype = f.vector_type (0); ++ ++ /* Get the predicate and base pointer. */ ++ gimple_seq stmts = NULL; ++ tree pred = f.convert_pred (stmts, vectype, 0); ++ tree base = f.fold_contiguous_base (stmts, vectype); ++ gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT); ++ ++ /* Emit two statements: a clobber of the lhs, so that it isn't ++ upwards exposed, and then the load itself. ++ ++ The fold routines expect the replacement statement to have the ++ same lhs as the original call, so return the clobber statement ++ rather than the load. */ ++ gimple *clobber = gimple_build_assign (f.lhs, build_clobber (tuple_type)); ++ ++ /* View the loaded data as an array of vectors. */ ++ tree field = tuple_type_field (tuple_type); ++ tree lhs_array = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (field), ++ unshare_expr (f.lhs)); ++ ++ /* Emit the load itself. */ ++ tree cookie = f.load_store_cookie (TREE_TYPE (vectype)); ++ gcall *new_call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3, ++ base, cookie, pred); ++ gimple_call_set_lhs (new_call, lhs_array); ++ gsi_insert_after (f.gsi, new_call, GSI_SAME_STMT); ++ ++ return clobber; ++ } ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ machine_mode tuple_mode = TYPE_MODE (TREE_TYPE (e.call_expr)); ++ insn_code icode = convert_optab_handler (vec_mask_load_lanes_optab, ++ tuple_mode, e.vector_mode (0)); ++ return e.use_contiguous_load_insn (icode); ++ } ++}; ++ ++class svldff1_gather_impl : public full_width_access ++{ ++public: ++ unsigned int ++ call_properties (const function_instance &) const OVERRIDE ++ { ++ return CP_READ_MEMORY | CP_READ_FFR | CP_WRITE_FFR; ++ } ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ /* See the block comment in aarch64-sve.md for details about the ++ FFR handling. */ ++ emit_insn (gen_aarch64_update_ffr_for_load ()); ++ ++ e.prepare_gather_address_operands (1); ++ /* Put the predicate last, since ldff1_gather uses the same operand ++ order as mask_gather_load_optab. */ ++ e.rotate_inputs_left (0, 5); ++ machine_mode mem_mode = e.memory_vector_mode (); ++ return e.use_exact_insn (code_for_aarch64_ldff1_gather (mem_mode)); ++ } ++}; ++ ++/* Implements extending forms of svldff1_gather. */ ++class svldff1_gather_extend : public extending_load ++{ ++public: ++ CONSTEXPR svldff1_gather_extend (type_suffix_index memory_type) ++ : extending_load (memory_type) {} ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ /* See the block comment in aarch64-sve.md for details about the ++ FFR handling. */ ++ emit_insn (gen_aarch64_update_ffr_for_load ()); ++ ++ e.prepare_gather_address_operands (1); ++ /* Put the predicate last, since ldff1_gather uses the same operand ++ order as mask_gather_load_optab. */ ++ e.rotate_inputs_left (0, 5); ++ insn_code icode = code_for_aarch64_ldff1_gather (extend_rtx_code (), ++ e.vector_mode (0), ++ e.memory_vector_mode ()); ++ return e.use_exact_insn (icode); ++ } ++}; ++ ++class svldnt1_impl : public full_width_access ++{ ++public: ++ unsigned int ++ call_properties (const function_instance &) const OVERRIDE ++ { ++ return CP_READ_MEMORY; ++ } ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ insn_code icode = code_for_aarch64_ldnt1 (e.vector_mode (0)); ++ return e.use_contiguous_load_insn (icode); ++ } ++}; ++ ++/* Implements svldff1 and svldnf1. */ ++class svldxf1_impl : public full_width_access ++{ ++public: ++ CONSTEXPR svldxf1_impl (int unspec) : m_unspec (unspec) {} ++ ++ unsigned int ++ call_properties (const function_instance &) const OVERRIDE ++ { ++ return CP_READ_MEMORY | CP_READ_FFR | CP_WRITE_FFR; ++ } ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ /* See the block comment in aarch64-sve.md for details about the ++ FFR handling. */ ++ emit_insn (gen_aarch64_update_ffr_for_load ()); ++ ++ machine_mode mode = e.vector_mode (0); ++ return e.use_contiguous_load_insn (code_for_aarch64_ldf1 (m_unspec, mode)); ++ } ++ ++ /* The unspec associated with the load. */ ++ int m_unspec; ++}; ++ ++/* Implements extending contiguous forms of svldff1 and svldnf1. */ ++class svldxf1_extend_impl : public extending_load ++{ ++public: ++ CONSTEXPR svldxf1_extend_impl (type_suffix_index memory_type, int unspec) ++ : extending_load (memory_type), m_unspec (unspec) {} ++ ++ unsigned int ++ call_properties (const function_instance &) const OVERRIDE ++ { ++ return CP_READ_MEMORY | CP_READ_FFR | CP_WRITE_FFR; ++ } ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ /* See the block comment in aarch64-sve.md for details about the ++ FFR handling. */ ++ emit_insn (gen_aarch64_update_ffr_for_load ()); ++ ++ insn_code icode = code_for_aarch64_ldf1 (m_unspec, extend_rtx_code (), ++ e.vector_mode (0), ++ e.memory_vector_mode ()); ++ return e.use_contiguous_load_insn (icode); ++ } ++ ++ /* The unspec associated with the load. */ ++ int m_unspec; ++}; ++ ++class svlen_impl : public quiet ++{ ++public: ++ gimple * ++ fold (gimple_folder &f) const OVERRIDE ++ { ++ /* The argument only exists for its type. */ ++ tree rhs_type = TREE_TYPE (gimple_call_arg (f.call, 0)); ++ tree count = build_int_cstu (TREE_TYPE (f.lhs), ++ TYPE_VECTOR_SUBPARTS (rhs_type)); ++ return gimple_build_assign (f.lhs, count); ++ } ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ /* The argument only exists for its type. */ ++ return gen_int_mode (GET_MODE_NUNITS (e.vector_mode (0)), DImode); ++ } ++}; ++ ++class svmad_impl : public function_base ++{ ++public: ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ return expand_mad (e); ++ } ++}; ++ ++class svmla_impl : public function_base ++{ ++public: ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ /* Put the accumulator at the end (argument 3), but keep it as the ++ merge input for _m functions. */ ++ e.rotate_inputs_left (1, 4); ++ return expand_mad (e, 3); ++ } ++}; ++ ++/* Base class for svmla_lane and svmls_lane. */ ++class svmla_svmls_lane_impl : public function_base ++{ ++public: ++ CONSTEXPR svmla_svmls_lane_impl (int unspec) ++ : m_unspec (unspec) {} ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ /* Put the operands in the normal (fma ...) order, with the accumulator ++ last. This fits naturally since that's also the unprinted operand ++ in the asm output. */ ++ e.rotate_inputs_left (0, 4); ++ insn_code icode = code_for_aarch64_lane (m_unspec, e.vector_mode (0)); ++ return e.use_exact_insn (icode); ++ } ++ ++ /* The unspec code associated with the operation. */ ++ int m_unspec; ++}; ++ ++class svmls_impl : public function_base ++{ ++public: ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ /* Put the accumulator at the end (argument 3), but keep it as the ++ merge input for _m functions. */ ++ e.rotate_inputs_left (1, 4); ++ return expand_msb (e, 3); ++ } ++}; ++ ++class svmov_impl : public function_base ++{ ++public: ++ gimple * ++ fold (gimple_folder &f) const OVERRIDE ++ { ++ return gimple_build_assign (f.lhs, BIT_AND_EXPR, ++ gimple_call_arg (f.call, 0), ++ gimple_call_arg (f.call, 1)); ++ } ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ /* The canonical form for the assembler alias "MOV Pa.B, Pb/Z, Pc.B" ++ is "AND Pa.B, Pb/Z, Pc.B, Pc.B". */ ++ gcc_assert (e.pred == PRED_z); ++ e.args.quick_push (e.args[1]); ++ return e.use_exact_insn (CODE_FOR_aarch64_pred_andvnx16bi_z); ++ } ++}; ++ ++class svmmla_impl : public function_base ++{ ++public: ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ insn_code icode; ++ if (e.type_suffix (0).integer_p) ++ { ++ if (e.type_suffix (0).unsigned_p) ++ icode = code_for_aarch64_sve_add (UNSPEC_UMATMUL, e.vector_mode (0)); ++ else ++ icode = code_for_aarch64_sve_add (UNSPEC_SMATMUL, e.vector_mode (0)); ++ } ++ else ++ icode = code_for_aarch64_sve (UNSPEC_FMMLA, e.vector_mode (0)); ++ return e.use_exact_insn (icode); ++ } ++}; ++ ++class svmsb_impl : public function_base ++{ ++public: ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ return expand_msb (e); ++ } ++}; ++ ++class svnand_impl : public function_base ++{ ++public: ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ gcc_assert (e.pred == PRED_z); ++ return e.use_exact_insn (CODE_FOR_aarch64_pred_nandvnx16bi_z); ++ } ++}; ++ ++class svnor_impl : public function_base ++{ ++public: ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ gcc_assert (e.pred == PRED_z); ++ return e.use_exact_insn (CODE_FOR_aarch64_pred_norvnx16bi_z); ++ } ++}; ++ ++class svnot_impl : public rtx_code_function ++{ ++public: ++ CONSTEXPR svnot_impl () : rtx_code_function (NOT, NOT, -1) {} ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ if (e.type_suffix_ids[0] == TYPE_SUFFIX_b) ++ { ++ /* The canonical form for the assembler alias "NOT Pa.B, Pb/Z, Pc.B" ++ is "EOR Pa.B, Pb/Z, Pb.B, Pc.B". */ ++ gcc_assert (e.pred == PRED_z); ++ e.args.quick_insert (1, e.args[0]); ++ return e.use_exact_insn (CODE_FOR_aarch64_pred_xorvnx16bi_z); ++ } ++ return rtx_code_function::expand (e); ++ } ++}; ++ ++class svorn_impl : public function_base ++{ ++public: ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ gcc_assert (e.pred == PRED_z); ++ return e.use_exact_insn (CODE_FOR_aarch64_pred_ornvnx16bi_z); ++ } ++}; ++ ++class svpfalse_impl : public function_base ++{ ++public: ++ gimple * ++ fold (gimple_folder &f) const OVERRIDE ++ { ++ return f.fold_to_pfalse (); ++ } ++ ++ rtx ++ expand (function_expander &) const OVERRIDE ++ { ++ return CONST0_RTX (VNx16BImode); ++ } ++}; ++ ++/* Implements svpfirst and svpnext, which share the same .md patterns. */ ++class svpfirst_svpnext_impl : public function_base ++{ ++public: ++ CONSTEXPR svpfirst_svpnext_impl (int unspec) : m_unspec (unspec) {} ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ machine_mode mode = e.vector_mode (0); ++ e.add_ptrue_hint (0, mode); ++ return e.use_exact_insn (code_for_aarch64_sve (m_unspec, mode)); ++ } ++ ++ /* The unspec associated with the operation. */ ++ int m_unspec; ++}; ++ ++/* Implements contiguous forms of svprf[bhwd]. */ ++class svprf_bhwd_impl : public function_base ++{ ++public: ++ CONSTEXPR svprf_bhwd_impl (machine_mode mode) : m_mode (mode) {} ++ ++ unsigned int ++ call_properties (const function_instance &) const OVERRIDE ++ { ++ return CP_PREFETCH_MEMORY; ++ } ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ e.prepare_prefetch_operands (); ++ insn_code icode = code_for_aarch64_sve_prefetch (m_mode); ++ return e.use_contiguous_prefetch_insn (icode); ++ } ++ ++ /* The mode that we'd use to hold one vector of prefetched data. */ ++ machine_mode m_mode; ++}; ++ ++/* Implements svprf[bhwd]_gather. */ ++class svprf_bhwd_gather_impl : public function_base ++{ ++public: ++ CONSTEXPR svprf_bhwd_gather_impl (machine_mode mode) : m_mode (mode) {} ++ ++ unsigned int ++ call_properties (const function_instance &) const OVERRIDE ++ { ++ return CP_PREFETCH_MEMORY; ++ } ++ ++ machine_mode ++ memory_vector_mode (const function_instance &) const OVERRIDE ++ { ++ return m_mode; ++ } ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ e.prepare_prefetch_operands (); ++ e.prepare_gather_address_operands (1); ++ ++ /* Insert a zero operand to identify the mode of the memory being ++ accessed. This goes between the gather operands and prefetch ++ operands created above. */ ++ e.args.quick_insert (5, CONST0_RTX (m_mode)); ++ ++ machine_mode reg_mode = GET_MODE (e.args[2]); ++ insn_code icode = code_for_aarch64_sve_gather_prefetch (m_mode, reg_mode); ++ return e.use_exact_insn (icode); ++ } ++ ++ /* The mode that we'd use to hold one vector of prefetched data. */ ++ machine_mode m_mode; ++}; ++ ++/* Implements svptest_any, svptest_first and svptest_last. */ ++class svptest_impl : public function_base ++{ ++public: ++ CONSTEXPR svptest_impl (rtx_code compare) : m_compare (compare) {} ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ /* See whether GP is an exact ptrue for some predicate mode; ++ i.e. whether converting the GP to that mode will not drop ++ set bits and will leave all significant bits set. */ ++ machine_mode wide_mode; ++ int hint; ++ if (aarch64_ptrue_all_mode (e.args[0]).exists (&wide_mode)) ++ hint = SVE_KNOWN_PTRUE; ++ else ++ { ++ hint = SVE_MAYBE_NOT_PTRUE; ++ wide_mode = VNx16BImode; ++ } ++ ++ /* Generate the PTEST itself. */ ++ rtx pg = force_reg (VNx16BImode, e.args[0]); ++ rtx wide_pg = gen_lowpart (wide_mode, pg); ++ rtx hint_rtx = gen_int_mode (hint, DImode); ++ rtx op = force_reg (wide_mode, gen_lowpart (wide_mode, e.args[1])); ++ emit_insn (gen_aarch64_ptestvnx16bi (pg, wide_pg, hint_rtx, op)); ++ ++ /* Get the location of the boolean result. We can provide SImode and ++ DImode values directly; rely on generic code to convert others. */ ++ rtx target = e.possible_target; ++ if (!target ++ || !REG_P (target) ++ || (GET_MODE (target) != SImode && GET_MODE (target) != DImode)) ++ target = gen_reg_rtx (DImode); ++ ++ /* Generate a CSET to convert the CC result of the PTEST to a boolean. */ ++ rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM); ++ rtx compare = gen_rtx_fmt_ee (m_compare, GET_MODE (target), ++ cc_reg, const0_rtx); ++ emit_insn (gen_rtx_SET (target, compare)); ++ return target; ++ } ++ ++ /* The comparison code associated with ptest condition. */ ++ rtx_code m_compare; ++}; ++ ++class svptrue_impl : public function_base ++{ ++public: ++ gimple * ++ fold (gimple_folder &f) const OVERRIDE ++ { ++ return f.fold_to_ptrue (); ++ } ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ return aarch64_ptrue_all (e.type_suffix (0).element_bytes); ++ } ++}; ++ ++class svptrue_pat_impl : public function_base ++{ ++public: ++ gimple * ++ fold (gimple_folder &f) const OVERRIDE ++ { ++ tree pattern_arg = gimple_call_arg (f.call, 0); ++ aarch64_svpattern pattern = (aarch64_svpattern) tree_to_shwi (pattern_arg); ++ ++ if (pattern == AARCH64_SV_ALL) ++ /* svptrue_pat_bN (SV_ALL) == svptrue_bN (). */ ++ return f.fold_to_ptrue (); ++ ++ /* See whether we can count the number of elements in the pattern ++ at compile time. If so, construct a predicate with that number ++ of 1s followed by all 0s. */ ++ int nelts_per_vq = f.elements_per_vq (0); ++ HOST_WIDE_INT value = aarch64_fold_sve_cnt_pat (pattern, nelts_per_vq); ++ if (value >= 0) ++ return f.fold_to_vl_pred (value); ++ ++ return NULL; ++ } ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ /* In rtl, the predicate is represented as the constant: ++ ++ (const:V16BI (unspec:V16BI [(const_int PATTERN) ++ (const_vector:VnnBI [zeros])] ++ UNSPEC_PTRUE)) ++ ++ where nn determines the element size. */ ++ rtvec vec = gen_rtvec (2, e.args[0], CONST0_RTX (e.vector_mode (0))); ++ return gen_rtx_CONST (VNx16BImode, ++ gen_rtx_UNSPEC (VNx16BImode, vec, UNSPEC_PTRUE)); ++ } ++}; ++ ++class svqadd_impl : public function_base ++{ ++public: ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ return e.expand_signed_unpred_op (SS_PLUS, US_PLUS); ++ } ++}; ++ ++/* Implements svqdec[bhwd]{,_pat} and svqinc[bhwd]{,_pat}. */ ++class svqdec_svqinc_bhwd_impl : public function_base ++{ ++public: ++ CONSTEXPR svqdec_svqinc_bhwd_impl (rtx_code code_for_sint, ++ rtx_code code_for_uint, ++ scalar_int_mode elem_mode) ++ : m_code_for_sint (code_for_sint), ++ m_code_for_uint (code_for_uint), ++ m_elem_mode (elem_mode) ++ {} ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ /* Treat non-_pat functions in the same way as _pat functions with ++ an SV_ALL argument. */ ++ if (e.args.length () == 2) ++ e.args.quick_insert (1, gen_int_mode (AARCH64_SV_ALL, DImode)); ++ ++ /* Insert the number of elements per 128-bit block as a fake argument, ++ between the pattern and the multiplier. Arguments 1, 2 and 3 then ++ correspond exactly with the 3 UNSPEC_SVE_CNT_PAT operands; see ++ aarch64_sve_cnt_pat for details. */ ++ unsigned int elements_per_vq = 128 / GET_MODE_BITSIZE (m_elem_mode); ++ e.args.quick_insert (2, gen_int_mode (elements_per_vq, DImode)); ++ ++ rtx_code code = (e.type_suffix (0).unsigned_p ++ ? m_code_for_uint ++ : m_code_for_sint); ++ ++ /* Choose between operating on integer scalars or integer vectors. */ ++ machine_mode mode = e.vector_mode (0); ++ if (e.mode_suffix_id == MODE_n) ++ mode = GET_MODE_INNER (mode); ++ return e.use_exact_insn (code_for_aarch64_sve_pat (code, mode)); ++ } ++ ++ /* The saturating addition or subtraction codes to use for signed and ++ unsigned values respectively. */ ++ rtx_code m_code_for_sint; ++ rtx_code m_code_for_uint; ++ ++ /* The integer mode associated with the [bhwd] suffix. */ ++ scalar_int_mode m_elem_mode; ++}; ++ ++/* Implements svqdec[bhwd]{,_pat}. */ ++class svqdec_bhwd_impl : public svqdec_svqinc_bhwd_impl ++{ ++public: ++ CONSTEXPR svqdec_bhwd_impl (scalar_int_mode elem_mode) ++ : svqdec_svqinc_bhwd_impl (SS_MINUS, US_MINUS, elem_mode) {} ++}; ++ ++/* Implements svqinc[bhwd]{,_pat}. */ ++class svqinc_bhwd_impl : public svqdec_svqinc_bhwd_impl ++{ ++public: ++ CONSTEXPR svqinc_bhwd_impl (scalar_int_mode elem_mode) ++ : svqdec_svqinc_bhwd_impl (SS_PLUS, US_PLUS, elem_mode) {} ++}; ++ ++/* Implements svqdecp and svqincp. */ ++class svqdecp_svqincp_impl : public function_base ++{ ++public: ++ CONSTEXPR svqdecp_svqincp_impl (rtx_code code_for_sint, ++ rtx_code code_for_uint) ++ : m_code_for_sint (code_for_sint), ++ m_code_for_uint (code_for_uint) ++ {} ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ rtx_code code = (e.type_suffix (0).unsigned_p ++ ? m_code_for_uint ++ : m_code_for_sint); ++ insn_code icode; ++ if (e.mode_suffix_id == MODE_n) ++ { ++ /* Increment or decrement a scalar (whose mode is given by the first ++ type suffix) by the number of active elements in a predicate ++ (whose mode is given by the second type suffix). */ ++ machine_mode mode = GET_MODE_INNER (e.vector_mode (0)); ++ icode = code_for_aarch64_sve_cntp (code, mode, e.vector_mode (1)); ++ } ++ else ++ /* Increment a vector by the number of active elements in a predicate, ++ with the vector mode determining the predicate mode. */ ++ icode = code_for_aarch64_sve_cntp (code, e.vector_mode (0)); ++ return e.use_exact_insn (icode); ++ } ++ ++ /* The saturating addition or subtraction codes to use for signed and ++ unsigned values respectively. */ ++ rtx_code m_code_for_sint; ++ rtx_code m_code_for_uint; ++}; ++ ++class svqsub_impl : public function_base ++{ ++public: ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ return e.expand_signed_unpred_op (SS_MINUS, US_MINUS); ++ } ++}; ++ ++class svrdffr_impl : public function_base ++{ ++public: ++ unsigned int ++ call_properties (const function_instance &) const OVERRIDE ++ { ++ return CP_READ_FFR; ++ } ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ /* See the block comment in aarch64-sve.md for details about the ++ FFR handling. */ ++ emit_insn (gen_aarch64_copy_ffr_to_ffrt ()); ++ rtx result = e.use_exact_insn (e.pred == PRED_z ++ ? CODE_FOR_aarch64_rdffr_z ++ : CODE_FOR_aarch64_rdffr); ++ emit_insn (gen_aarch64_update_ffrt ()); ++ return result; ++ } ++}; ++ ++class svreinterpret_impl : public quiet ++{ ++public: ++ gimple * ++ fold (gimple_folder &f) const OVERRIDE ++ { ++ /* Punt to rtl if the effect of the reinterpret on registers does not ++ conform to GCC's endianness model. */ ++ if (!targetm.can_change_mode_class (f.vector_mode (0), ++ f.vector_mode (1), FP_REGS)) ++ return NULL; ++ ++ /* Otherwise svreinterpret corresponds directly to a VIEW_CONVERT_EXPR ++ reinterpretation. */ ++ tree rhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (f.lhs), ++ gimple_call_arg (f.call, 0)); ++ return gimple_build_assign (f.lhs, VIEW_CONVERT_EXPR, rhs); ++ } ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ machine_mode mode = e.vector_mode (0); ++ return e.use_exact_insn (code_for_aarch64_sve_reinterpret (mode)); ++ } ++}; ++ ++class svrev_impl : public permute ++{ ++public: ++ gimple * ++ fold (gimple_folder &f) const OVERRIDE ++ { ++ /* Punt for now on _b16 and wider; we'd need more complex evpc logic ++ to rerecognize the result. */ ++ if (f.type_suffix (0).bool_p && f.type_suffix (0).element_bits > 8) ++ return NULL; ++ ++ /* Permute as { nelts - 1, nelts - 2, nelts - 3, ... }. */ ++ poly_int64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs)); ++ vec_perm_builder builder (nelts, 1, 3); ++ for (int i = 0; i < 3; ++i) ++ builder.quick_push (nelts - i - 1); ++ return fold_permute (f, builder); ++ } ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ return e.use_exact_insn (code_for_aarch64_sve_rev (e.vector_mode (0))); ++ } ++}; ++ ++class svsel_impl : public quiet ++{ ++public: ++ gimple * ++ fold (gimple_folder &f) const OVERRIDE ++ { ++ /* svsel corresponds exactly to VEC_COND_EXPR. */ ++ gimple_seq stmts = NULL; ++ tree pred = f.convert_pred (stmts, f.vector_type (0), 0); ++ gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT); ++ return gimple_build_assign (f.lhs, VEC_COND_EXPR, pred, ++ gimple_call_arg (f.call, 1), ++ gimple_call_arg (f.call, 2)); ++ } ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ /* svsel (cond, truev, falsev) is vcond_mask (truev, falsev, cond). */ ++ e.rotate_inputs_left (0, 3); ++ insn_code icode = convert_optab_handler (vcond_mask_optab, ++ e.vector_mode (0), ++ e.gp_mode (0)); ++ return e.use_exact_insn (icode); ++ } ++}; ++ ++/* Implements svset2, svset3 and svset4. */ ++class svset_impl : public quiet ++{ ++public: ++ CONSTEXPR svset_impl (unsigned int vectors_per_tuple) ++ : quiet (vectors_per_tuple) {} ++ ++ gimple * ++ fold (gimple_folder &f) const OVERRIDE ++ { ++ tree rhs_tuple = gimple_call_arg (f.call, 0); ++ tree index = gimple_call_arg (f.call, 1); ++ tree rhs_vector = gimple_call_arg (f.call, 2); ++ ++ /* Replace the call with two statements: a copy of the full tuple ++ to the call result, followed by an update of the individual vector. ++ ++ The fold routines expect the replacement statement to have the ++ same lhs as the original call, so return the copy statement ++ rather than the field update. */ ++ gassign *copy = gimple_build_assign (unshare_expr (f.lhs), rhs_tuple); ++ ++ /* Get a reference to the individual vector. */ ++ tree field = tuple_type_field (TREE_TYPE (f.lhs)); ++ tree lhs_array = build3 (COMPONENT_REF, TREE_TYPE (field), ++ f.lhs, field, NULL_TREE); ++ tree lhs_vector = build4 (ARRAY_REF, TREE_TYPE (rhs_vector), ++ lhs_array, index, NULL_TREE, NULL_TREE); ++ gassign *update = gimple_build_assign (lhs_vector, rhs_vector); ++ gsi_insert_after (f.gsi, update, GSI_SAME_STMT); ++ ++ return copy; ++ } ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ rtx rhs_tuple = e.args[0]; ++ unsigned int index = INTVAL (e.args[1]); ++ rtx rhs_vector = e.args[2]; ++ ++ /* First copy the full tuple to the target register. */ ++ rtx lhs_tuple = e.get_nonoverlapping_reg_target (); ++ emit_move_insn (lhs_tuple, rhs_tuple); ++ ++ /* ...then update the individual vector. */ ++ rtx lhs_vector = simplify_gen_subreg (GET_MODE (rhs_vector), ++ lhs_tuple, GET_MODE (lhs_tuple), ++ index * BYTES_PER_SVE_VECTOR); ++ emit_move_insn (lhs_vector, rhs_vector); ++ return lhs_vector; ++ } ++}; ++ ++class svsetffr_impl : public function_base ++{ ++public: ++ unsigned int ++ call_properties (const function_instance &) const OVERRIDE ++ { ++ return CP_WRITE_FFR; ++ } ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ e.args.quick_push (CONSTM1_RTX (VNx16BImode)); ++ return e.use_exact_insn (CODE_FOR_aarch64_wrffr); ++ } ++}; ++ ++class svst1_impl : public full_width_access ++{ ++public: ++ unsigned int ++ call_properties (const function_instance &) const OVERRIDE ++ { ++ return CP_WRITE_MEMORY; ++ } ++ ++ gimple * ++ fold (gimple_folder &f) const OVERRIDE ++ { ++ tree vectype = f.vector_type (0); ++ ++ /* Get the predicate and base pointer. */ ++ gimple_seq stmts = NULL; ++ tree pred = f.convert_pred (stmts, vectype, 0); ++ tree base = f.fold_contiguous_base (stmts, vectype); ++ gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT); ++ ++ tree cookie = f.load_store_cookie (TREE_TYPE (vectype)); ++ tree rhs = gimple_call_arg (f.call, gimple_call_num_args (f.call) - 1); ++ return gimple_build_call_internal (IFN_MASK_STORE, 4, ++ base, cookie, pred, rhs); ++ } ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ insn_code icode = convert_optab_handler (maskstore_optab, ++ e.vector_mode (0), e.gp_mode (0)); ++ return e.use_contiguous_store_insn (icode); ++ } ++}; ++ ++class svst1_scatter_impl : public full_width_access ++{ ++public: ++ unsigned int ++ call_properties (const function_instance &) const OVERRIDE ++ { ++ return CP_WRITE_MEMORY; ++ } ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ e.prepare_gather_address_operands (1); ++ /* Put the predicate last, as required by mask_scatter_store_optab. */ ++ e.rotate_inputs_left (0, 6); ++ insn_code icode = direct_optab_handler (mask_scatter_store_optab, ++ e.memory_vector_mode ()); ++ return e.use_exact_insn (icode); ++ } ++}; ++ ++/* Implements truncating forms of svst1_scatter. */ ++class svst1_scatter_truncate_impl : public truncating_store ++{ ++public: ++ CONSTEXPR svst1_scatter_truncate_impl (scalar_int_mode to_mode) ++ : truncating_store (to_mode) {} ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ e.prepare_gather_address_operands (1); ++ /* Put the predicate last, since the truncating scatters use the same ++ operand order as mask_scatter_store_optab. */ ++ e.rotate_inputs_left (0, 6); ++ insn_code icode = code_for_aarch64_scatter_store_trunc ++ (e.memory_vector_mode (), e.vector_mode (0)); ++ return e.use_exact_insn (icode); ++ } ++}; ++ ++/* Implements truncating contiguous forms of svst1. */ ++class svst1_truncate_impl : public truncating_store ++{ ++public: ++ CONSTEXPR svst1_truncate_impl (scalar_int_mode to_mode) ++ : truncating_store (to_mode) {} ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ insn_code icode = code_for_aarch64_store_trunc (e.memory_vector_mode (), ++ e.vector_mode (0)); ++ return e.use_contiguous_store_insn (icode); ++ } ++}; ++ ++/* Implements svst2, svst3 and svst4. */ ++class svst234_impl : public full_width_access ++{ ++public: ++ CONSTEXPR svst234_impl (unsigned int vectors_per_tuple) ++ : full_width_access (vectors_per_tuple) {} ++ ++ unsigned int ++ call_properties (const function_instance &) const OVERRIDE ++ { ++ return CP_WRITE_MEMORY; ++ } ++ ++ gimple * ++ fold (gimple_folder &f) const OVERRIDE ++ { ++ tree vectype = f.vector_type (0); ++ ++ /* Get the predicate and base pointer. */ ++ gimple_seq stmts = NULL; ++ tree pred = f.convert_pred (stmts, vectype, 0); ++ tree base = f.fold_contiguous_base (stmts, vectype); ++ gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT); ++ ++ /* View the stored data as an array of vectors. */ ++ unsigned int num_args = gimple_call_num_args (f.call); ++ tree rhs_tuple = gimple_call_arg (f.call, num_args - 1); ++ tree field = tuple_type_field (TREE_TYPE (rhs_tuple)); ++ tree rhs_array = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (field), rhs_tuple); ++ ++ tree cookie = f.load_store_cookie (TREE_TYPE (vectype)); ++ return gimple_build_call_internal (IFN_MASK_STORE_LANES, 4, ++ base, cookie, pred, rhs_array); ++ } ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ machine_mode tuple_mode = GET_MODE (e.args.last ()); ++ insn_code icode = convert_optab_handler (vec_mask_store_lanes_optab, ++ tuple_mode, e.vector_mode (0)); ++ return e.use_contiguous_store_insn (icode); ++ } ++}; ++ ++class svstnt1_impl : public full_width_access ++{ ++public: ++ unsigned int ++ call_properties (const function_instance &) const OVERRIDE ++ { ++ return CP_WRITE_MEMORY; ++ } ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ insn_code icode = code_for_aarch64_stnt1 (e.vector_mode (0)); ++ return e.use_contiguous_store_insn (icode); ++ } ++}; ++ ++class svsub_impl : public rtx_code_function ++{ ++public: ++ CONSTEXPR svsub_impl () ++ : rtx_code_function (MINUS, MINUS, UNSPEC_COND_FSUB) {} ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ /* Canonicalize subtractions of constants to additions. */ ++ machine_mode mode = e.vector_mode (0); ++ if (e.try_negating_argument (2, mode)) ++ return e.map_to_rtx_codes (PLUS, PLUS, UNSPEC_COND_FADD); ++ ++ return rtx_code_function::expand (e); ++ } ++}; ++ ++class svtbl_impl : public permute ++{ ++public: ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ return e.use_exact_insn (code_for_aarch64_sve_tbl (e.vector_mode (0))); ++ } ++}; ++ ++/* Implements svtrn1 and svtrn2. */ ++class svtrn_impl : public binary_permute ++{ ++public: ++ CONSTEXPR svtrn_impl (int base) ++ : binary_permute (base ? UNSPEC_TRN2 : UNSPEC_TRN1), m_base (base) {} ++ ++ gimple * ++ fold (gimple_folder &f) const OVERRIDE ++ { ++ /* svtrn1: { 0, nelts, 2, nelts + 2, 4, nelts + 4, ... } ++ svtrn2: as for svtrn1, but with 1 added to each index. */ ++ poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs)); ++ vec_perm_builder builder (nelts, 2, 3); ++ for (unsigned int i = 0; i < 3; ++i) ++ { ++ builder.quick_push (m_base + i * 2); ++ builder.quick_push (m_base + i * 2 + nelts); ++ } ++ return fold_permute (f, builder); ++ } ++ ++ /* 0 for svtrn1, 1 for svtrn2. */ ++ unsigned int m_base; ++}; ++ ++/* Base class for svundef{,2,3,4}. */ ++class svundef_impl : public quiet ++{ ++public: ++ CONSTEXPR svundef_impl (unsigned int vectors_per_tuple) ++ : quiet (vectors_per_tuple) {} ++ ++ gimple * ++ fold (gimple_folder &f) const OVERRIDE ++ { ++ /* Don't fold svundef at the gimple level. There's no exact ++ correspondence for SSA_NAMEs, and we explicitly don't want ++ to generate a specific value (like an all-zeros vector). */ ++ if (vectors_per_tuple () == 1) ++ return NULL; ++ return gimple_build_assign (f.lhs, build_clobber (TREE_TYPE (f.lhs))); ++ } ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ rtx target = e.get_reg_target (); ++ emit_clobber (copy_rtx (target)); ++ return target; ++ } ++}; ++ ++/* Implements svunpklo and svunpkhi. */ ++class svunpk_impl : public quiet ++{ ++public: ++ CONSTEXPR svunpk_impl (bool high_p) : m_high_p (high_p) {} ++ ++ gimple * ++ fold (gimple_folder &f) const OVERRIDE ++ { ++ /* Don't fold the predicate ops, since every bit of the svbool_t ++ result is significant. */ ++ if (f.type_suffix_ids[0] == TYPE_SUFFIX_b) ++ return NULL; ++ ++ /* The first half in memory is VEC_UNPACK_LO_EXPR for little-endian ++ and VEC_UNPACK_HI_EXPR for big-endian. */ ++ bool high_p = BYTES_BIG_ENDIAN ? !m_high_p : m_high_p; ++ tree_code code = high_p ? VEC_UNPACK_HI_EXPR : VEC_UNPACK_LO_EXPR; ++ return gimple_build_assign (f.lhs, code, gimple_call_arg (f.call, 0)); ++ } ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ machine_mode mode = GET_MODE (e.args[0]); ++ unsigned int unpacku = m_high_p ? UNSPEC_UNPACKUHI : UNSPEC_UNPACKULO; ++ unsigned int unpacks = m_high_p ? UNSPEC_UNPACKSHI : UNSPEC_UNPACKSLO; ++ insn_code icode; ++ if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL) ++ icode = code_for_aarch64_sve_punpk (unpacku, mode); ++ else ++ { ++ int unspec = e.type_suffix (0).unsigned_p ? unpacku : unpacks; ++ icode = code_for_aarch64_sve_unpk (unspec, unspec, mode); ++ } ++ return e.use_exact_insn (icode); ++ } ++ ++ /* True for svunpkhi, false for svunpklo. */ ++ bool m_high_p; ++}; ++ ++/* Also implements svsudot. */ ++class svusdot_impl : public function_base ++{ ++public: ++ CONSTEXPR svusdot_impl (bool su) : m_su (su) {} ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ /* The implementation of the ACLE function svsudot (for the non-lane ++ version) is through the USDOT instruction but with the second and third ++ inputs swapped. */ ++ if (m_su) ++ e.rotate_inputs_left (1, 2); ++ /* The ACLE function has the same order requirements as for svdot. ++ While there's no requirement for the RTL pattern to have the same sort ++ of order as that for dot_prod, it's easier to read. ++ Hence we do the same rotation on arguments as svdot_impl does. */ ++ e.rotate_inputs_left (0, 3); ++ machine_mode mode = e.vector_mode (0); ++ insn_code icode = code_for_aarch64_dot_prod (UNSPEC_USDOT, mode); ++ return e.use_exact_insn (icode); ++ } ++ ++private: ++ bool m_su; ++}; ++ ++/* Implements svuzp1 and svuzp2. */ ++class svuzp_impl : public binary_permute ++{ ++public: ++ CONSTEXPR svuzp_impl (unsigned int base) ++ : binary_permute (base ? UNSPEC_UZP2 : UNSPEC_UZP1), m_base (base) {} ++ ++ gimple * ++ fold (gimple_folder &f) const OVERRIDE ++ { ++ /* svuzp1: { 0, 2, 4, 6, ... } ++ svuzp2: { 1, 3, 5, 7, ... }. */ ++ poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs)); ++ vec_perm_builder builder (nelts, 1, 3); ++ for (unsigned int i = 0; i < 3; ++i) ++ builder.quick_push (m_base + i * 2); ++ return fold_permute (f, builder); ++ } ++ ++ /* 0 for svuzp1, 1 for svuzp2. */ ++ unsigned int m_base; ++}; ++ ++/* A function_base for svwhilele and svwhilelt functions. */ ++class svwhile_impl : public function_base ++{ ++public: ++ CONSTEXPR svwhile_impl (int unspec_for_sint, int unspec_for_uint, bool eq_p) ++ : m_unspec_for_sint (unspec_for_sint), ++ m_unspec_for_uint (unspec_for_uint), m_eq_p (eq_p) ++ {} ++ ++ /* Try to fold a call by treating its arguments as constants of type T. */ ++ template ++ gimple * ++ fold_type (gimple_folder &f) const ++ { ++ /* Only handle cases in which both operands are constant. */ ++ T arg0, arg1; ++ if (!poly_int_tree_p (gimple_call_arg (f.call, 0), &arg0) ++ || !poly_int_tree_p (gimple_call_arg (f.call, 1), &arg1)) ++ return NULL; ++ ++ /* Check whether the result is known to be all-false. */ ++ if (m_eq_p ? known_gt (arg0, arg1) : known_ge (arg0, arg1)) ++ return f.fold_to_pfalse (); ++ ++ /* Punt if we can't tell at compile time whether the result ++ is all-false. */ ++ if (m_eq_p ? maybe_gt (arg0, arg1) : maybe_ge (arg0, arg1)) ++ return NULL; ++ ++ /* At this point we know the result has at least one set element. */ ++ poly_uint64 diff = arg1 - arg0; ++ poly_uint64 nelts = GET_MODE_NUNITS (f.vector_mode (0)); ++ ++ /* Canonicalize the svwhilele form to the svwhilelt form. Subtract ++ from NELTS rather than adding to DIFF, to prevent overflow. */ ++ if (m_eq_p) ++ nelts -= 1; ++ ++ /* Check whether the result is known to be all-true. */ ++ if (known_ge (diff, nelts)) ++ return f.fold_to_ptrue (); ++ ++ /* Punt if DIFF might not be the actual number of set elements ++ in the result. Conditional equality is fine. */ ++ if (maybe_gt (diff, nelts)) ++ return NULL; ++ ++ /* At this point we know that the predicate will have DIFF set elements ++ for svwhilelt and DIFF + 1 set elements for svwhilele (which stops ++ after rather than before ARG1 is reached). See if we can create ++ the predicate at compile time. */ ++ unsigned HOST_WIDE_INT vl; ++ if (diff.is_constant (&vl)) ++ /* Overflow is no longer possible after the checks above. */ ++ return f.fold_to_vl_pred (m_eq_p ? vl + 1 : vl); ++ ++ return NULL; ++ } ++ ++ gimple * ++ fold (gimple_folder &f) const OVERRIDE ++ { ++ if (f.type_suffix (1).unsigned_p) ++ return fold_type (f); ++ else ++ return fold_type (f); ++ } ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ /* Suffix 0 determines the predicate mode, suffix 1 determines the ++ scalar mode and signedness. */ ++ int unspec = (e.type_suffix (1).unsigned_p ++ ? m_unspec_for_uint ++ : m_unspec_for_sint); ++ machine_mode pred_mode = e.vector_mode (0); ++ scalar_mode reg_mode = GET_MODE_INNER (e.vector_mode (1)); ++ return e.use_exact_insn (code_for_while (unspec, reg_mode, pred_mode)); ++ } ++ ++ /* The unspec codes associated with signed and unsigned operations ++ respectively. */ ++ int m_unspec_for_sint; ++ int m_unspec_for_uint; ++ ++ /* True svwhilele, false for svwhilelt. */ ++ bool m_eq_p; ++}; ++ ++class svwrffr_impl : public function_base ++{ ++public: ++ unsigned int ++ call_properties (const function_instance &) const OVERRIDE ++ { ++ return CP_WRITE_FFR; ++ } ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ return e.use_exact_insn (CODE_FOR_aarch64_wrffr); ++ } ++}; ++ ++/* Implements svzip1 and svzip2. */ ++class svzip_impl : public binary_permute ++{ ++public: ++ CONSTEXPR svzip_impl (unsigned int base) ++ : binary_permute (base ? UNSPEC_ZIP2 : UNSPEC_ZIP1), m_base (base) {} ++ ++ gimple * ++ fold (gimple_folder &f) const OVERRIDE ++ { ++ /* svzip1: { 0, nelts, 1, nelts + 1, 2, nelts + 2, ... } ++ svzip2: as for svzip1, but with nelts / 2 added to each index. */ ++ poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs)); ++ poly_uint64 base = m_base * exact_div (nelts, 2); ++ vec_perm_builder builder (nelts, 2, 3); ++ for (unsigned int i = 0; i < 3; ++i) ++ { ++ builder.quick_push (base + i); ++ builder.quick_push (base + i + nelts); ++ } ++ return fold_permute (f, builder); ++ } ++ ++ /* 0 for svzip1, 1 for svzip2. */ ++ unsigned int m_base; ++}; ++ ++} /* end anonymous namespace */ ++ ++namespace aarch64_sve { ++ ++FUNCTION (svabd, svabd_impl,) ++FUNCTION (svabs, quiet, (ABS, ABS, UNSPEC_COND_FABS)) ++FUNCTION (svacge, svac_impl, (UNSPEC_COND_FCMGE)) ++FUNCTION (svacgt, svac_impl, (UNSPEC_COND_FCMGT)) ++FUNCTION (svacle, svac_impl, (UNSPEC_COND_FCMLE)) ++FUNCTION (svaclt, svac_impl, (UNSPEC_COND_FCMLT)) ++FUNCTION (svadd, rtx_code_function, (PLUS, PLUS, UNSPEC_COND_FADD)) ++FUNCTION (svadda, svadda_impl,) ++FUNCTION (svaddv, reduction, (UNSPEC_SADDV, UNSPEC_UADDV, UNSPEC_FADDV)) ++FUNCTION (svadrb, svadr_bhwd_impl, (0)) ++FUNCTION (svadrd, svadr_bhwd_impl, (3)) ++FUNCTION (svadrh, svadr_bhwd_impl, (1)) ++FUNCTION (svadrw, svadr_bhwd_impl, (2)) ++FUNCTION (svand, rtx_code_function, (AND, AND)) ++FUNCTION (svandv, reduction, (UNSPEC_ANDV)) ++FUNCTION (svasr, rtx_code_function, (ASHIFTRT, ASHIFTRT)) ++FUNCTION (svasr_wide, shift_wide, (ASHIFTRT, UNSPEC_ASHIFTRT_WIDE)) ++FUNCTION (svasrd, svasrd_impl,) ++FUNCTION (svbfdot, fixed_insn_function, (CODE_FOR_aarch64_sve_bfdotvnx4sf)) ++FUNCTION (svbfdot_lane, fixed_insn_function, ++ (CODE_FOR_aarch64_sve_bfdot_lanevnx4sf)) ++FUNCTION (svbfmlalb, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmlalbvnx4sf)) ++FUNCTION (svbfmlalb_lane, fixed_insn_function, ++ (CODE_FOR_aarch64_sve_bfmlalb_lanevnx4sf)) ++FUNCTION (svbfmlalt, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmlaltvnx4sf)) ++FUNCTION (svbfmlalt_lane, fixed_insn_function, ++ (CODE_FOR_aarch64_sve_bfmlalt_lanevnx4sf)) ++FUNCTION (svbfmmla, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmmlavnx4sf)) ++FUNCTION (svbic, svbic_impl,) ++FUNCTION (svbrka, svbrk_unary_impl, (UNSPEC_BRKA)) ++FUNCTION (svbrkb, svbrk_unary_impl, (UNSPEC_BRKB)) ++FUNCTION (svbrkn, svbrk_binary_impl, (UNSPEC_BRKN)) ++FUNCTION (svbrkpa, svbrk_binary_impl, (UNSPEC_BRKPA)) ++FUNCTION (svbrkpb, svbrk_binary_impl, (UNSPEC_BRKPB)) ++FUNCTION (svcadd, svcadd_impl,) ++FUNCTION (svclasta, svclast_impl, (UNSPEC_CLASTA)) ++FUNCTION (svclastb, svclast_impl, (UNSPEC_CLASTB)) ++FUNCTION (svcls, unary_count, (CLRSB)) ++FUNCTION (svclz, unary_count, (CLZ)) ++FUNCTION (svcmla, svcmla_impl,) ++FUNCTION (svcmla_lane, svcmla_lane_impl,) ++FUNCTION (svcmpeq, svcmp_impl, (EQ_EXPR, UNSPEC_COND_FCMEQ)) ++FUNCTION (svcmpeq_wide, svcmp_wide_impl, (EQ_EXPR, UNSPEC_COND_CMPEQ_WIDE, ++ UNSPEC_COND_CMPEQ_WIDE)) ++FUNCTION (svcmpge, svcmp_impl, (GE_EXPR, UNSPEC_COND_FCMGE)) ++FUNCTION (svcmpge_wide, svcmp_wide_impl, (GE_EXPR, UNSPEC_COND_CMPGE_WIDE, ++ UNSPEC_COND_CMPHS_WIDE)) ++FUNCTION (svcmpgt, svcmp_impl, (GT_EXPR, UNSPEC_COND_FCMGT)) ++FUNCTION (svcmpgt_wide, svcmp_wide_impl, (GT_EXPR, UNSPEC_COND_CMPGT_WIDE, ++ UNSPEC_COND_CMPHI_WIDE)) ++FUNCTION (svcmple, svcmp_impl, (LE_EXPR, UNSPEC_COND_FCMLE)) ++FUNCTION (svcmple_wide, svcmp_wide_impl, (LE_EXPR, UNSPEC_COND_CMPLE_WIDE, ++ UNSPEC_COND_CMPLS_WIDE)) ++FUNCTION (svcmplt, svcmp_impl, (LT_EXPR, UNSPEC_COND_FCMLT)) ++FUNCTION (svcmplt_wide, svcmp_wide_impl, (LT_EXPR, UNSPEC_COND_CMPLT_WIDE, ++ UNSPEC_COND_CMPLO_WIDE)) ++FUNCTION (svcmpne, svcmp_impl, (NE_EXPR, UNSPEC_COND_FCMNE)) ++FUNCTION (svcmpne_wide, svcmp_wide_impl, (NE_EXPR, UNSPEC_COND_CMPNE_WIDE, ++ UNSPEC_COND_CMPNE_WIDE)) ++FUNCTION (svcmpuo, svcmpuo_impl,) ++FUNCTION (svcnot, svcnot_impl,) ++FUNCTION (svcnt, unary_count, (POPCOUNT)) ++FUNCTION (svcntb, svcnt_bhwd_impl, (VNx16QImode)) ++FUNCTION (svcntb_pat, svcnt_bhwd_pat_impl, (VNx16QImode)) ++FUNCTION (svcntd, svcnt_bhwd_impl, (VNx2DImode)) ++FUNCTION (svcntd_pat, svcnt_bhwd_pat_impl, (VNx2DImode)) ++FUNCTION (svcnth, svcnt_bhwd_impl, (VNx8HImode)) ++FUNCTION (svcnth_pat, svcnt_bhwd_pat_impl, (VNx8HImode)) ++FUNCTION (svcntp, svcntp_impl,) ++FUNCTION (svcntw, svcnt_bhwd_impl, (VNx4SImode)) ++FUNCTION (svcntw_pat, svcnt_bhwd_pat_impl, (VNx4SImode)) ++FUNCTION (svcompact, QUIET_CODE_FOR_MODE0 (aarch64_sve_compact),) ++FUNCTION (svcreate2, svcreate_impl, (2)) ++FUNCTION (svcreate3, svcreate_impl, (3)) ++FUNCTION (svcreate4, svcreate_impl, (4)) ++FUNCTION (svcvt, svcvt_impl,) ++FUNCTION (svcvtnt, CODE_FOR_MODE0 (aarch64_sve_cvtnt),) ++FUNCTION (svdiv, rtx_code_function, (DIV, UDIV, UNSPEC_COND_FDIV)) ++FUNCTION (svdivr, rtx_code_function_rotated, (DIV, UDIV, UNSPEC_COND_FDIV)) ++FUNCTION (svdot, svdot_impl,) ++FUNCTION (svdot_lane, svdotprod_lane_impl, (UNSPEC_SDOT, UNSPEC_UDOT, -1)) ++FUNCTION (svdup, svdup_impl,) ++FUNCTION (svdup_lane, svdup_lane_impl,) ++FUNCTION (svdupq, svdupq_impl,) ++FUNCTION (svdupq_lane, svdupq_lane_impl,) ++FUNCTION (sveor, rtx_code_function, (XOR, XOR, -1)) ++FUNCTION (sveorv, reduction, (UNSPEC_XORV)) ++FUNCTION (svexpa, unspec_based_function, (-1, -1, UNSPEC_FEXPA)) ++FUNCTION (svext, QUIET_CODE_FOR_MODE0 (aarch64_sve_ext),) ++FUNCTION (svextb, svext_bhw_impl, (QImode)) ++FUNCTION (svexth, svext_bhw_impl, (HImode)) ++FUNCTION (svextw, svext_bhw_impl, (SImode)) ++FUNCTION (svget2, svget_impl, (2)) ++FUNCTION (svget3, svget_impl, (3)) ++FUNCTION (svget4, svget_impl, (4)) ++FUNCTION (svindex, svindex_impl,) ++FUNCTION (svinsr, svinsr_impl,) ++FUNCTION (svlasta, svlast_impl, (UNSPEC_LASTA)) ++FUNCTION (svlastb, svlast_impl, (UNSPEC_LASTB)) ++FUNCTION (svld1, svld1_impl,) ++FUNCTION (svld1_gather, svld1_gather_impl,) ++FUNCTION (svld1ro, svld1ro_impl,) ++FUNCTION (svld1rq, svld1rq_impl,) ++FUNCTION (svld1sb, svld1_extend_impl, (TYPE_SUFFIX_s8)) ++FUNCTION (svld1sb_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_s8)) ++FUNCTION (svld1sh, svld1_extend_impl, (TYPE_SUFFIX_s16)) ++FUNCTION (svld1sh_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_s16)) ++FUNCTION (svld1sw, svld1_extend_impl, (TYPE_SUFFIX_s32)) ++FUNCTION (svld1sw_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_s32)) ++FUNCTION (svld1ub, svld1_extend_impl, (TYPE_SUFFIX_u8)) ++FUNCTION (svld1ub_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_u8)) ++FUNCTION (svld1uh, svld1_extend_impl, (TYPE_SUFFIX_u16)) ++FUNCTION (svld1uh_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_u16)) ++FUNCTION (svld1uw, svld1_extend_impl, (TYPE_SUFFIX_u32)) ++FUNCTION (svld1uw_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_u32)) ++FUNCTION (svld2, svld234_impl, (2)) ++FUNCTION (svld3, svld234_impl, (3)) ++FUNCTION (svld4, svld234_impl, (4)) ++FUNCTION (svldff1, svldxf1_impl, (UNSPEC_LDFF1)) ++FUNCTION (svldff1_gather, svldff1_gather_impl,) ++FUNCTION (svldff1sb, svldxf1_extend_impl, (TYPE_SUFFIX_s8, UNSPEC_LDFF1)) ++FUNCTION (svldff1sb_gather, svldff1_gather_extend, (TYPE_SUFFIX_s8)) ++FUNCTION (svldff1sh, svldxf1_extend_impl, (TYPE_SUFFIX_s16, UNSPEC_LDFF1)) ++FUNCTION (svldff1sh_gather, svldff1_gather_extend, (TYPE_SUFFIX_s16)) ++FUNCTION (svldff1sw, svldxf1_extend_impl, (TYPE_SUFFIX_s32, UNSPEC_LDFF1)) ++FUNCTION (svldff1sw_gather, svldff1_gather_extend, (TYPE_SUFFIX_s32)) ++FUNCTION (svldff1ub, svldxf1_extend_impl, (TYPE_SUFFIX_u8, UNSPEC_LDFF1)) ++FUNCTION (svldff1ub_gather, svldff1_gather_extend, (TYPE_SUFFIX_u8)) ++FUNCTION (svldff1uh, svldxf1_extend_impl, (TYPE_SUFFIX_u16, UNSPEC_LDFF1)) ++FUNCTION (svldff1uh_gather, svldff1_gather_extend, (TYPE_SUFFIX_u16)) ++FUNCTION (svldff1uw, svldxf1_extend_impl, (TYPE_SUFFIX_u32, UNSPEC_LDFF1)) ++FUNCTION (svldff1uw_gather, svldff1_gather_extend, (TYPE_SUFFIX_u32)) ++FUNCTION (svldnf1, svldxf1_impl, (UNSPEC_LDNF1)) ++FUNCTION (svldnf1sb, svldxf1_extend_impl, (TYPE_SUFFIX_s8, UNSPEC_LDNF1)) ++FUNCTION (svldnf1sh, svldxf1_extend_impl, (TYPE_SUFFIX_s16, UNSPEC_LDNF1)) ++FUNCTION (svldnf1sw, svldxf1_extend_impl, (TYPE_SUFFIX_s32, UNSPEC_LDNF1)) ++FUNCTION (svldnf1ub, svldxf1_extend_impl, (TYPE_SUFFIX_u8, UNSPEC_LDNF1)) ++FUNCTION (svldnf1uh, svldxf1_extend_impl, (TYPE_SUFFIX_u16, UNSPEC_LDNF1)) ++FUNCTION (svldnf1uw, svldxf1_extend_impl, (TYPE_SUFFIX_u32, UNSPEC_LDNF1)) ++FUNCTION (svldnt1, svldnt1_impl,) ++FUNCTION (svlen, svlen_impl,) ++FUNCTION (svlsl, rtx_code_function, (ASHIFT, ASHIFT)) ++FUNCTION (svlsl_wide, shift_wide, (ASHIFT, UNSPEC_ASHIFT_WIDE)) ++FUNCTION (svlsr, rtx_code_function, (LSHIFTRT, LSHIFTRT)) ++FUNCTION (svlsr_wide, shift_wide, (LSHIFTRT, UNSPEC_LSHIFTRT_WIDE)) ++FUNCTION (svmad, svmad_impl,) ++FUNCTION (svmax, rtx_code_function, (SMAX, UMAX, UNSPEC_COND_FMAX)) ++FUNCTION (svmaxnm, unspec_based_function, (-1, -1, UNSPEC_COND_FMAXNM)) ++FUNCTION (svmaxnmv, reduction, (UNSPEC_FMAXNMV)) ++FUNCTION (svmaxv, reduction, (UNSPEC_SMAXV, UNSPEC_UMAXV, UNSPEC_FMAXV)) ++FUNCTION (svmin, rtx_code_function, (SMIN, UMIN, UNSPEC_COND_FMIN)) ++FUNCTION (svminnm, unspec_based_function, (-1, -1, UNSPEC_COND_FMINNM)) ++FUNCTION (svminnmv, reduction, (UNSPEC_FMINNMV)) ++FUNCTION (svminv, reduction, (UNSPEC_SMINV, UNSPEC_UMINV, UNSPEC_FMINV)) ++FUNCTION (svmla, svmla_impl,) ++FUNCTION (svmla_lane, svmla_svmls_lane_impl, (UNSPEC_FMLA)) ++FUNCTION (svmls, svmls_impl,) ++FUNCTION (svmls_lane, svmla_svmls_lane_impl, (UNSPEC_FMLS)) ++FUNCTION (svmmla, svmmla_impl,) ++FUNCTION (svmov, svmov_impl,) ++FUNCTION (svmsb, svmsb_impl,) ++FUNCTION (svmul, rtx_code_function, (MULT, MULT, UNSPEC_COND_FMUL)) ++FUNCTION (svmul_lane, CODE_FOR_MODE0 (aarch64_mul_lane),) ++FUNCTION (svmulh, unspec_based_function, (UNSPEC_SMUL_HIGHPART, ++ UNSPEC_UMUL_HIGHPART, -1)) ++FUNCTION (svmulx, unspec_based_function, (-1, -1, UNSPEC_COND_FMULX)) ++FUNCTION (svnand, svnand_impl,) ++FUNCTION (svneg, quiet, (NEG, NEG, UNSPEC_COND_FNEG)) ++FUNCTION (svnmad, unspec_based_function, (-1, -1, UNSPEC_COND_FNMLA)) ++FUNCTION (svnmla, unspec_based_function_rotated, (-1, -1, UNSPEC_COND_FNMLA)) ++FUNCTION (svnmls, unspec_based_function_rotated, (-1, -1, UNSPEC_COND_FNMLS)) ++FUNCTION (svnmsb, unspec_based_function, (-1, -1, UNSPEC_COND_FNMLS)) ++FUNCTION (svnor, svnor_impl,) ++FUNCTION (svnot, svnot_impl,) ++FUNCTION (svorn, svorn_impl,) ++FUNCTION (svorr, rtx_code_function, (IOR, IOR)) ++FUNCTION (svorv, reduction, (UNSPEC_IORV)) ++FUNCTION (svpfalse, svpfalse_impl,) ++FUNCTION (svpfirst, svpfirst_svpnext_impl, (UNSPEC_PFIRST)) ++FUNCTION (svpnext, svpfirst_svpnext_impl, (UNSPEC_PNEXT)) ++FUNCTION (svprfb, svprf_bhwd_impl, (VNx16QImode)) ++FUNCTION (svprfb_gather, svprf_bhwd_gather_impl, (VNx16QImode)) ++FUNCTION (svprfd, svprf_bhwd_impl, (VNx2DImode)) ++FUNCTION (svprfd_gather, svprf_bhwd_gather_impl, (VNx2DImode)) ++FUNCTION (svprfh, svprf_bhwd_impl, (VNx8HImode)) ++FUNCTION (svprfh_gather, svprf_bhwd_gather_impl, (VNx8HImode)) ++FUNCTION (svprfw, svprf_bhwd_impl, (VNx4SImode)) ++FUNCTION (svprfw_gather, svprf_bhwd_gather_impl, (VNx4SImode)) ++FUNCTION (svptest_any, svptest_impl, (NE)) ++FUNCTION (svptest_first, svptest_impl, (LT)) ++FUNCTION (svptest_last, svptest_impl, (LTU)) ++FUNCTION (svptrue, svptrue_impl,) ++FUNCTION (svptrue_pat, svptrue_pat_impl,) ++FUNCTION (svqadd, svqadd_impl,) ++FUNCTION (svqdecb, svqdec_bhwd_impl, (QImode)) ++FUNCTION (svqdecb_pat, svqdec_bhwd_impl, (QImode)) ++FUNCTION (svqdecd, svqdec_bhwd_impl, (DImode)) ++FUNCTION (svqdecd_pat, svqdec_bhwd_impl, (DImode)) ++FUNCTION (svqdech, svqdec_bhwd_impl, (HImode)) ++FUNCTION (svqdech_pat, svqdec_bhwd_impl, (HImode)) ++FUNCTION (svqdecp, svqdecp_svqincp_impl, (SS_MINUS, US_MINUS)) ++FUNCTION (svqdecw, svqdec_bhwd_impl, (SImode)) ++FUNCTION (svqdecw_pat, svqdec_bhwd_impl, (SImode)) ++FUNCTION (svqincb, svqinc_bhwd_impl, (QImode)) ++FUNCTION (svqincb_pat, svqinc_bhwd_impl, (QImode)) ++FUNCTION (svqincd, svqinc_bhwd_impl, (DImode)) ++FUNCTION (svqincd_pat, svqinc_bhwd_impl, (DImode)) ++FUNCTION (svqinch, svqinc_bhwd_impl, (HImode)) ++FUNCTION (svqinch_pat, svqinc_bhwd_impl, (HImode)) ++FUNCTION (svqincp, svqdecp_svqincp_impl, (SS_PLUS, US_PLUS)) ++FUNCTION (svqincw, svqinc_bhwd_impl, (SImode)) ++FUNCTION (svqincw_pat, svqinc_bhwd_impl, (SImode)) ++FUNCTION (svqsub, svqsub_impl,) ++FUNCTION (svrbit, unspec_based_function, (UNSPEC_RBIT, UNSPEC_RBIT, -1)) ++FUNCTION (svrdffr, svrdffr_impl,) ++FUNCTION (svrecpe, unspec_based_function, (-1, -1, UNSPEC_FRECPE)) ++FUNCTION (svrecps, unspec_based_function, (-1, -1, UNSPEC_FRECPS)) ++FUNCTION (svrecpx, unspec_based_function, (-1, -1, UNSPEC_COND_FRECPX)) ++FUNCTION (svreinterpret, svreinterpret_impl,) ++FUNCTION (svrev, svrev_impl,) ++FUNCTION (svrevb, unspec_based_function, (UNSPEC_REVB, UNSPEC_REVB, -1)) ++FUNCTION (svrevh, unspec_based_function, (UNSPEC_REVH, UNSPEC_REVH, -1)) ++FUNCTION (svrevw, unspec_based_function, (UNSPEC_REVW, UNSPEC_REVW, -1)) ++FUNCTION (svrinta, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTA)) ++FUNCTION (svrinti, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTI)) ++FUNCTION (svrintm, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTM)) ++FUNCTION (svrintn, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTN)) ++FUNCTION (svrintp, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTP)) ++FUNCTION (svrintx, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTX)) ++FUNCTION (svrintz, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTZ)) ++FUNCTION (svrsqrte, unspec_based_function, (-1, -1, UNSPEC_RSQRTE)) ++FUNCTION (svrsqrts, unspec_based_function, (-1, -1, UNSPEC_RSQRTS)) ++FUNCTION (svscale, unspec_based_function, (-1, -1, UNSPEC_COND_FSCALE)) ++FUNCTION (svsel, svsel_impl,) ++FUNCTION (svset2, svset_impl, (2)) ++FUNCTION (svset3, svset_impl, (3)) ++FUNCTION (svset4, svset_impl, (4)) ++FUNCTION (svsetffr, svsetffr_impl,) ++FUNCTION (svsplice, QUIET_CODE_FOR_MODE0 (aarch64_sve_splice),) ++FUNCTION (svsqrt, rtx_code_function, (SQRT, SQRT, UNSPEC_COND_FSQRT)) ++FUNCTION (svst1, svst1_impl,) ++FUNCTION (svst1_scatter, svst1_scatter_impl,) ++FUNCTION (svst1b, svst1_truncate_impl, (QImode)) ++FUNCTION (svst1b_scatter, svst1_scatter_truncate_impl, (QImode)) ++FUNCTION (svst1h, svst1_truncate_impl, (HImode)) ++FUNCTION (svst1h_scatter, svst1_scatter_truncate_impl, (HImode)) ++FUNCTION (svst1w, svst1_truncate_impl, (SImode)) ++FUNCTION (svst1w_scatter, svst1_scatter_truncate_impl, (SImode)) ++FUNCTION (svst2, svst234_impl, (2)) ++FUNCTION (svst3, svst234_impl, (3)) ++FUNCTION (svst4, svst234_impl, (4)) ++FUNCTION (svstnt1, svstnt1_impl,) ++FUNCTION (svsub, svsub_impl,) ++FUNCTION (svsubr, rtx_code_function_rotated, (MINUS, MINUS, UNSPEC_COND_FSUB)) ++FUNCTION (svsudot, svusdot_impl, (true)) ++FUNCTION (svsudot_lane, svdotprod_lane_impl, (UNSPEC_SUDOT, -1, -1)) ++FUNCTION (svtbl, svtbl_impl,) ++FUNCTION (svtmad, CODE_FOR_MODE0 (aarch64_sve_tmad),) ++FUNCTION (svtrn1, svtrn_impl, (0)) ++FUNCTION (svtrn1q, unspec_based_function, (UNSPEC_TRN1Q, UNSPEC_TRN1Q, ++ UNSPEC_TRN1Q)) ++FUNCTION (svtrn2, svtrn_impl, (1)) ++FUNCTION (svtrn2q, unspec_based_function, (UNSPEC_TRN2Q, UNSPEC_TRN2Q, ++ UNSPEC_TRN2Q)) ++FUNCTION (svtsmul, unspec_based_function, (-1, -1, UNSPEC_FTSMUL)) ++FUNCTION (svtssel, unspec_based_function, (-1, -1, UNSPEC_FTSSEL)) ++FUNCTION (svundef, svundef_impl, (1)) ++FUNCTION (svundef2, svundef_impl, (2)) ++FUNCTION (svundef3, svundef_impl, (3)) ++FUNCTION (svundef4, svundef_impl, (4)) ++FUNCTION (svunpkhi, svunpk_impl, (true)) ++FUNCTION (svunpklo, svunpk_impl, (false)) ++FUNCTION (svusdot, svusdot_impl, (false)) ++FUNCTION (svusdot_lane, svdotprod_lane_impl, (UNSPEC_USDOT, -1, -1)) ++FUNCTION (svusmmla, unspec_based_add_function, (UNSPEC_USMATMUL, -1, -1)) ++FUNCTION (svuzp1, svuzp_impl, (0)) ++FUNCTION (svuzp1q, unspec_based_function, (UNSPEC_UZP1Q, UNSPEC_UZP1Q, ++ UNSPEC_UZP1Q)) ++FUNCTION (svuzp2, svuzp_impl, (1)) ++FUNCTION (svuzp2q, unspec_based_function, (UNSPEC_UZP2Q, UNSPEC_UZP2Q, ++ UNSPEC_UZP2Q)) ++FUNCTION (svwhilele, svwhile_impl, (UNSPEC_WHILELE, UNSPEC_WHILELS, true)) ++FUNCTION (svwhilelt, svwhile_impl, (UNSPEC_WHILELT, UNSPEC_WHILELO, false)) ++FUNCTION (svwrffr, svwrffr_impl,) ++FUNCTION (svzip1, svzip_impl, (0)) ++FUNCTION (svzip1q, unspec_based_function, (UNSPEC_ZIP1Q, UNSPEC_ZIP1Q, ++ UNSPEC_ZIP1Q)) ++FUNCTION (svzip2, svzip_impl, (1)) ++FUNCTION (svzip2q, unspec_based_function, (UNSPEC_ZIP2Q, UNSPEC_ZIP2Q, ++ UNSPEC_ZIP2Q)) ++ ++} /* end namespace aarch64_sve */ +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.def b/gcc/config/aarch64/aarch64-sve-builtins-base.def +new file mode 100644 +index 000000000..795a5fd90 +--- /dev/null ++++ b/gcc/config/aarch64/aarch64-sve-builtins-base.def +@@ -0,0 +1,355 @@ ++/* ACLE support for AArch64 SVE (__ARM_FEATURE_SVE intrinsics) ++ Copyright (C) 2018-2019 Free Software Foundation, Inc. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ GCC is distributed in the hope that it will be useful, but ++ WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING3. If not see ++ . */ ++ ++#define REQUIRED_EXTENSIONS 0 ++DEF_SVE_FUNCTION (svabd, binary_opt_n, all_arith, mxz) ++DEF_SVE_FUNCTION (svabs, unary, all_float_and_signed, mxz) ++DEF_SVE_FUNCTION (svacge, compare_opt_n, all_float, implicit) ++DEF_SVE_FUNCTION (svacgt, compare_opt_n, all_float, implicit) ++DEF_SVE_FUNCTION (svacle, compare_opt_n, all_float, implicit) ++DEF_SVE_FUNCTION (svaclt, compare_opt_n, all_float, implicit) ++DEF_SVE_FUNCTION (svadd, binary_opt_n, all_arith, mxz) ++DEF_SVE_FUNCTION (svadda, fold_left, all_float, implicit) ++DEF_SVE_FUNCTION (svaddv, reduction_wide, all_arith, implicit) ++DEF_SVE_FUNCTION (svadrb, adr_offset, none, none) ++DEF_SVE_FUNCTION (svadrd, adr_index, none, none) ++DEF_SVE_FUNCTION (svadrh, adr_index, none, none) ++DEF_SVE_FUNCTION (svadrw, adr_index, none, none) ++DEF_SVE_FUNCTION (svand, binary_opt_n, all_integer, mxz) ++DEF_SVE_FUNCTION (svand, binary_opt_n, b, z) ++DEF_SVE_FUNCTION (svandv, reduction, all_integer, implicit) ++DEF_SVE_FUNCTION (svasr, binary_uint_opt_n, all_signed, mxz) ++DEF_SVE_FUNCTION (svasr_wide, binary_uint64_opt_n, bhs_signed, mxz) ++DEF_SVE_FUNCTION (svasrd, shift_right_imm, all_signed, mxz) ++DEF_SVE_FUNCTION (svbic, binary_opt_n, all_integer, mxz) ++DEF_SVE_FUNCTION (svbic, binary_opt_n, b, z) ++DEF_SVE_FUNCTION (svbrka, unary, b, mz) ++DEF_SVE_FUNCTION (svbrkb, unary, b, mz) ++DEF_SVE_FUNCTION (svbrkn, binary, b, z) ++DEF_SVE_FUNCTION (svbrkpa, binary, b, z) ++DEF_SVE_FUNCTION (svbrkpb, binary, b, z) ++DEF_SVE_FUNCTION (svcadd, binary_rotate, all_float, mxz) ++DEF_SVE_FUNCTION (svclasta, clast, all_data, implicit) ++DEF_SVE_FUNCTION (svclastb, clast, all_data, implicit) ++DEF_SVE_FUNCTION (svcls, unary_to_uint, all_signed, mxz) ++DEF_SVE_FUNCTION (svclz, unary_to_uint, all_integer, mxz) ++DEF_SVE_FUNCTION (svcmla, ternary_rotate, all_float, mxz) ++DEF_SVE_FUNCTION (svcmla_lane, ternary_lane_rotate, hs_float, none) ++DEF_SVE_FUNCTION (svcmpeq, compare_opt_n, all_arith, implicit) ++DEF_SVE_FUNCTION (svcmpeq_wide, compare_wide_opt_n, bhs_signed, implicit) ++DEF_SVE_FUNCTION (svcmpge, compare_opt_n, all_arith, implicit) ++DEF_SVE_FUNCTION (svcmpge_wide, compare_wide_opt_n, bhs_integer, implicit) ++DEF_SVE_FUNCTION (svcmpgt, compare_opt_n, all_arith, implicit) ++DEF_SVE_FUNCTION (svcmpgt_wide, compare_wide_opt_n, bhs_integer, implicit) ++DEF_SVE_FUNCTION (svcmple, compare_opt_n, all_arith, implicit) ++DEF_SVE_FUNCTION (svcmple_wide, compare_wide_opt_n, bhs_integer, implicit) ++DEF_SVE_FUNCTION (svcmplt, compare_opt_n, all_arith, implicit) ++DEF_SVE_FUNCTION (svcmplt_wide, compare_wide_opt_n, bhs_integer, implicit) ++DEF_SVE_FUNCTION (svcmpne, compare_opt_n, all_arith, implicit) ++DEF_SVE_FUNCTION (svcmpne_wide, compare_wide_opt_n, bhs_signed, implicit) ++DEF_SVE_FUNCTION (svcmpuo, compare_opt_n, all_float, implicit) ++DEF_SVE_FUNCTION (svcnot, unary, all_integer, mxz) ++DEF_SVE_FUNCTION (svcnt, unary_to_uint, all_data, mxz) ++DEF_SVE_FUNCTION (svcntb, count_inherent, none, none) ++DEF_SVE_FUNCTION (svcntb_pat, count_pat, none, none) ++DEF_SVE_FUNCTION (svcntd, count_inherent, none, none) ++DEF_SVE_FUNCTION (svcntd_pat, count_pat, none, none) ++DEF_SVE_FUNCTION (svcnth, count_inherent, none, none) ++DEF_SVE_FUNCTION (svcnth_pat, count_pat, none, none) ++DEF_SVE_FUNCTION (svcntp, count_pred, all_pred, implicit) ++DEF_SVE_FUNCTION (svcntw, count_inherent, none, none) ++DEF_SVE_FUNCTION (svcntw_pat, count_pat, none, none) ++DEF_SVE_FUNCTION (svcompact, unary, sd_data, implicit) ++DEF_SVE_FUNCTION (svcreate2, create, all_data, none) ++DEF_SVE_FUNCTION (svcreate3, create, all_data, none) ++DEF_SVE_FUNCTION (svcreate4, create, all_data, none) ++DEF_SVE_FUNCTION (svcvt, unary_convert, cvt, mxz) ++DEF_SVE_FUNCTION (svdiv, binary_opt_n, all_float_and_sd_integer, mxz) ++DEF_SVE_FUNCTION (svdivr, binary_opt_n, all_float_and_sd_integer, mxz) ++DEF_SVE_FUNCTION (svdot, ternary_qq_opt_n, sd_integer, none) ++DEF_SVE_FUNCTION (svdot_lane, ternary_qq_lane, sd_integer, none) ++DEF_SVE_FUNCTION (svdup, unary_n, all_data, mxz_or_none) ++DEF_SVE_FUNCTION (svdup, unary_n, all_pred, none) ++DEF_SVE_FUNCTION (svdup_lane, binary_uint_n, all_data, none) ++DEF_SVE_FUNCTION (svdupq, dupq, all_data, none) ++DEF_SVE_FUNCTION (svdupq, dupq, all_pred, none) ++DEF_SVE_FUNCTION (svdupq_lane, binary_uint64_n, all_data, none) ++DEF_SVE_FUNCTION (sveor, binary_opt_n, all_integer, mxz) ++DEF_SVE_FUNCTION (sveor, binary_opt_n, b, z) ++DEF_SVE_FUNCTION (sveorv, reduction, all_integer, implicit) ++DEF_SVE_FUNCTION (svexpa, unary_uint, all_float, none) ++DEF_SVE_FUNCTION (svext, ext, all_data, none) ++DEF_SVE_FUNCTION (svextb, unary, hsd_integer, mxz) ++DEF_SVE_FUNCTION (svexth, unary, sd_integer, mxz) ++DEF_SVE_FUNCTION (svextw, unary, d_integer, mxz) ++DEF_SVE_FUNCTION (svget2, get, all_data, none) ++DEF_SVE_FUNCTION (svget3, get, all_data, none) ++DEF_SVE_FUNCTION (svget4, get, all_data, none) ++DEF_SVE_FUNCTION (svindex, binary_scalar, all_integer, none) ++DEF_SVE_FUNCTION (svinsr, binary_n, all_data, none) ++DEF_SVE_FUNCTION (svlasta, reduction, all_data, implicit) ++DEF_SVE_FUNCTION (svlastb, reduction, all_data, implicit) ++DEF_SVE_FUNCTION (svld1, load, all_data, implicit) ++DEF_SVE_FUNCTION (svld1_gather, load_gather_sv, sd_data, implicit) ++DEF_SVE_FUNCTION (svld1_gather, load_gather_vs, sd_data, implicit) ++DEF_SVE_FUNCTION (svld1rq, load_replicate, all_data, implicit) ++DEF_SVE_FUNCTION (svld1sb, load_ext, hsd_integer, implicit) ++DEF_SVE_FUNCTION (svld1sb_gather, load_ext_gather_offset, sd_integer, implicit) ++DEF_SVE_FUNCTION (svld1sh, load_ext, sd_integer, implicit) ++DEF_SVE_FUNCTION (svld1sh_gather, load_ext_gather_offset, sd_integer, implicit) ++DEF_SVE_FUNCTION (svld1sh_gather, load_ext_gather_index, sd_integer, implicit) ++DEF_SVE_FUNCTION (svld1sw, load_ext, d_integer, implicit) ++DEF_SVE_FUNCTION (svld1sw_gather, load_ext_gather_offset, d_integer, implicit) ++DEF_SVE_FUNCTION (svld1sw_gather, load_ext_gather_index, d_integer, implicit) ++DEF_SVE_FUNCTION (svld1ub, load_ext, hsd_integer, implicit) ++DEF_SVE_FUNCTION (svld1ub_gather, load_ext_gather_offset, sd_integer, implicit) ++DEF_SVE_FUNCTION (svld1uh, load_ext, sd_integer, implicit) ++DEF_SVE_FUNCTION (svld1uh_gather, load_ext_gather_offset, sd_integer, implicit) ++DEF_SVE_FUNCTION (svld1uh_gather, load_ext_gather_index, sd_integer, implicit) ++DEF_SVE_FUNCTION (svld1uw, load_ext, d_integer, implicit) ++DEF_SVE_FUNCTION (svld1uw_gather, load_ext_gather_offset, d_integer, implicit) ++DEF_SVE_FUNCTION (svld1uw_gather, load_ext_gather_index, d_integer, implicit) ++DEF_SVE_FUNCTION (svldff1, load, all_data, implicit) ++DEF_SVE_FUNCTION (svldff1_gather, load_gather_sv, sd_data, implicit) ++DEF_SVE_FUNCTION (svldff1_gather, load_gather_vs, sd_data, implicit) ++DEF_SVE_FUNCTION (svldff1sb, load_ext, hsd_integer, implicit) ++DEF_SVE_FUNCTION (svldff1sb_gather, load_ext_gather_offset, sd_integer, implicit) ++DEF_SVE_FUNCTION (svldff1sh, load_ext, sd_integer, implicit) ++DEF_SVE_FUNCTION (svldff1sh_gather, load_ext_gather_offset, sd_integer, implicit) ++DEF_SVE_FUNCTION (svldff1sh_gather, load_ext_gather_index, sd_integer, implicit) ++DEF_SVE_FUNCTION (svldff1sw, load_ext, d_integer, implicit) ++DEF_SVE_FUNCTION (svldff1sw_gather, load_ext_gather_offset, d_integer, implicit) ++DEF_SVE_FUNCTION (svldff1sw_gather, load_ext_gather_index, d_integer, implicit) ++DEF_SVE_FUNCTION (svldff1ub, load_ext, hsd_integer, implicit) ++DEF_SVE_FUNCTION (svldff1ub_gather, load_ext_gather_offset, sd_integer, implicit) ++DEF_SVE_FUNCTION (svldff1uh, load_ext, sd_integer, implicit) ++DEF_SVE_FUNCTION (svldff1uh_gather, load_ext_gather_offset, sd_integer, implicit) ++DEF_SVE_FUNCTION (svldff1uh_gather, load_ext_gather_index, sd_integer, implicit) ++DEF_SVE_FUNCTION (svldff1uw, load_ext, d_integer, implicit) ++DEF_SVE_FUNCTION (svldff1uw_gather, load_ext_gather_offset, d_integer, implicit) ++DEF_SVE_FUNCTION (svldff1uw_gather, load_ext_gather_index, d_integer, implicit) ++DEF_SVE_FUNCTION (svldnf1, load, all_data, implicit) ++DEF_SVE_FUNCTION (svldnf1sb, load_ext, hsd_integer, implicit) ++DEF_SVE_FUNCTION (svldnf1sh, load_ext, sd_integer, implicit) ++DEF_SVE_FUNCTION (svldnf1sw, load_ext, d_integer, implicit) ++DEF_SVE_FUNCTION (svldnf1ub, load_ext, hsd_integer, implicit) ++DEF_SVE_FUNCTION (svldnf1uh, load_ext, sd_integer, implicit) ++DEF_SVE_FUNCTION (svldnf1uw, load_ext, d_integer, implicit) ++DEF_SVE_FUNCTION (svldnt1, load, all_data, implicit) ++DEF_SVE_FUNCTION (svld2, load, all_data, implicit) ++DEF_SVE_FUNCTION (svld3, load, all_data, implicit) ++DEF_SVE_FUNCTION (svld4, load, all_data, implicit) ++DEF_SVE_FUNCTION (svlen, count_vector, all_data, none) ++DEF_SVE_FUNCTION (svlsl, binary_uint_opt_n, all_integer, mxz) ++DEF_SVE_FUNCTION (svlsl_wide, binary_uint64_opt_n, bhs_integer, mxz) ++DEF_SVE_FUNCTION (svlsr, binary_uint_opt_n, all_unsigned, mxz) ++DEF_SVE_FUNCTION (svlsr_wide, binary_uint64_opt_n, bhs_unsigned, mxz) ++DEF_SVE_FUNCTION (svmad, ternary_opt_n, all_arith, mxz) ++DEF_SVE_FUNCTION (svmax, binary_opt_n, all_arith, mxz) ++DEF_SVE_FUNCTION (svmaxnm, binary_opt_n, all_float, mxz) ++DEF_SVE_FUNCTION (svmaxnmv, reduction, all_float, implicit) ++DEF_SVE_FUNCTION (svmaxv, reduction, all_arith, implicit) ++DEF_SVE_FUNCTION (svmin, binary_opt_n, all_arith, mxz) ++DEF_SVE_FUNCTION (svminnm, binary_opt_n, all_float, mxz) ++DEF_SVE_FUNCTION (svminnmv, reduction, all_float, implicit) ++DEF_SVE_FUNCTION (svminv, reduction, all_arith, implicit) ++DEF_SVE_FUNCTION (svmla, ternary_opt_n, all_arith, mxz) ++DEF_SVE_FUNCTION (svmla_lane, ternary_lane, all_float, none) ++DEF_SVE_FUNCTION (svmls, ternary_opt_n, all_arith, mxz) ++DEF_SVE_FUNCTION (svmls_lane, ternary_lane, all_float, none) ++DEF_SVE_FUNCTION (svmmla, mmla, none, none) ++DEF_SVE_FUNCTION (svmov, unary, b, z) ++DEF_SVE_FUNCTION (svmsb, ternary_opt_n, all_arith, mxz) ++DEF_SVE_FUNCTION (svmul, binary_opt_n, all_arith, mxz) ++DEF_SVE_FUNCTION (svmul_lane, binary_lane, all_float, none) ++DEF_SVE_FUNCTION (svmulh, binary_opt_n, all_integer, mxz) ++DEF_SVE_FUNCTION (svmulx, binary_opt_n, all_float, mxz) ++DEF_SVE_FUNCTION (svnand, binary_opt_n, b, z) ++DEF_SVE_FUNCTION (svneg, unary, all_float_and_signed, mxz) ++DEF_SVE_FUNCTION (svnmad, ternary_opt_n, all_float, mxz) ++DEF_SVE_FUNCTION (svnmla, ternary_opt_n, all_float, mxz) ++DEF_SVE_FUNCTION (svnmls, ternary_opt_n, all_float, mxz) ++DEF_SVE_FUNCTION (svnmsb, ternary_opt_n, all_float, mxz) ++DEF_SVE_FUNCTION (svnor, binary_opt_n, b, z) ++DEF_SVE_FUNCTION (svnot, unary, all_integer, mxz) ++DEF_SVE_FUNCTION (svnot, unary, b, z) ++DEF_SVE_FUNCTION (svorn, binary_opt_n, b, z) ++DEF_SVE_FUNCTION (svorr, binary_opt_n, all_integer, mxz) ++DEF_SVE_FUNCTION (svorr, binary_opt_n, b, z) ++DEF_SVE_FUNCTION (svorv, reduction, all_integer, implicit) ++DEF_SVE_FUNCTION (svpfalse, inherent_b, b, none) ++DEF_SVE_FUNCTION (svpfirst, unary, b, implicit) ++DEF_SVE_FUNCTION (svpnext, unary_pred, all_pred, implicit) ++DEF_SVE_FUNCTION (svprfb, prefetch, none, implicit) ++DEF_SVE_FUNCTION (svprfb_gather, prefetch_gather_offset, none, implicit) ++DEF_SVE_FUNCTION (svprfd, prefetch, none, implicit) ++DEF_SVE_FUNCTION (svprfd_gather, prefetch_gather_index, none, implicit) ++DEF_SVE_FUNCTION (svprfh, prefetch, none, implicit) ++DEF_SVE_FUNCTION (svprfh_gather, prefetch_gather_index, none, implicit) ++DEF_SVE_FUNCTION (svprfw, prefetch, none, implicit) ++DEF_SVE_FUNCTION (svprfw_gather, prefetch_gather_index, none, implicit) ++DEF_SVE_FUNCTION (svptest_any, ptest, none, implicit) ++DEF_SVE_FUNCTION (svptest_first, ptest, none, implicit) ++DEF_SVE_FUNCTION (svptest_last, ptest, none, implicit) ++DEF_SVE_FUNCTION (svptrue, inherent, all_pred, none) ++DEF_SVE_FUNCTION (svptrue_pat, pattern_pred, all_pred, none) ++DEF_SVE_FUNCTION (svqadd, binary_opt_n, all_integer, none) ++DEF_SVE_FUNCTION (svqdecb, inc_dec, sd_integer, none) ++DEF_SVE_FUNCTION (svqdecb_pat, inc_dec_pat, sd_integer, none) ++DEF_SVE_FUNCTION (svqdecd, inc_dec, d_integer, none) ++DEF_SVE_FUNCTION (svqdecd, inc_dec, sd_integer, none) ++DEF_SVE_FUNCTION (svqdecd_pat, inc_dec_pat, d_integer, none) ++DEF_SVE_FUNCTION (svqdecd_pat, inc_dec_pat, sd_integer, none) ++DEF_SVE_FUNCTION (svqdech, inc_dec, h_integer, none) ++DEF_SVE_FUNCTION (svqdech, inc_dec, sd_integer, none) ++DEF_SVE_FUNCTION (svqdech_pat, inc_dec_pat, h_integer, none) ++DEF_SVE_FUNCTION (svqdech_pat, inc_dec_pat, sd_integer, none) ++DEF_SVE_FUNCTION (svqdecp, inc_dec_pred, hsd_integer, none) ++DEF_SVE_FUNCTION (svqdecp, inc_dec_pred_scalar, inc_dec_n, none) ++DEF_SVE_FUNCTION (svqdecw, inc_dec, s_integer, none) ++DEF_SVE_FUNCTION (svqdecw, inc_dec, sd_integer, none) ++DEF_SVE_FUNCTION (svqdecw_pat, inc_dec_pat, s_integer, none) ++DEF_SVE_FUNCTION (svqdecw_pat, inc_dec_pat, sd_integer, none) ++DEF_SVE_FUNCTION (svqincb, inc_dec, sd_integer, none) ++DEF_SVE_FUNCTION (svqincb_pat, inc_dec_pat, sd_integer, none) ++DEF_SVE_FUNCTION (svqincd, inc_dec, d_integer, none) ++DEF_SVE_FUNCTION (svqincd, inc_dec, sd_integer, none) ++DEF_SVE_FUNCTION (svqincd_pat, inc_dec_pat, d_integer, none) ++DEF_SVE_FUNCTION (svqincd_pat, inc_dec_pat, sd_integer, none) ++DEF_SVE_FUNCTION (svqinch, inc_dec, h_integer, none) ++DEF_SVE_FUNCTION (svqinch, inc_dec, sd_integer, none) ++DEF_SVE_FUNCTION (svqinch_pat, inc_dec_pat, h_integer, none) ++DEF_SVE_FUNCTION (svqinch_pat, inc_dec_pat, sd_integer, none) ++DEF_SVE_FUNCTION (svqincp, inc_dec_pred, hsd_integer, none) ++DEF_SVE_FUNCTION (svqincp, inc_dec_pred_scalar, inc_dec_n, none) ++DEF_SVE_FUNCTION (svqincw, inc_dec, s_integer, none) ++DEF_SVE_FUNCTION (svqincw, inc_dec, sd_integer, none) ++DEF_SVE_FUNCTION (svqincw_pat, inc_dec_pat, s_integer, none) ++DEF_SVE_FUNCTION (svqincw_pat, inc_dec_pat, sd_integer, none) ++DEF_SVE_FUNCTION (svqsub, binary_opt_n, all_integer, none) ++DEF_SVE_FUNCTION (svrbit, unary, all_integer, mxz) ++DEF_SVE_FUNCTION (svrdffr, rdffr, none, z_or_none) ++DEF_SVE_FUNCTION (svrecpe, unary, all_float, none) ++DEF_SVE_FUNCTION (svrecps, binary, all_float, none) ++DEF_SVE_FUNCTION (svrecpx, unary, all_float, mxz) ++DEF_SVE_FUNCTION (svreinterpret, unary_convert, reinterpret, none) ++DEF_SVE_FUNCTION (svrev, unary, all_data, none) ++DEF_SVE_FUNCTION (svrev, unary_pred, all_pred, none) ++DEF_SVE_FUNCTION (svrevb, unary, hsd_integer, mxz) ++DEF_SVE_FUNCTION (svrevh, unary, sd_integer, mxz) ++DEF_SVE_FUNCTION (svrevw, unary, d_integer, mxz) ++DEF_SVE_FUNCTION (svrinta, unary, all_float, mxz) ++DEF_SVE_FUNCTION (svrinti, unary, all_float, mxz) ++DEF_SVE_FUNCTION (svrintm, unary, all_float, mxz) ++DEF_SVE_FUNCTION (svrintn, unary, all_float, mxz) ++DEF_SVE_FUNCTION (svrintp, unary, all_float, mxz) ++DEF_SVE_FUNCTION (svrintx, unary, all_float, mxz) ++DEF_SVE_FUNCTION (svrintz, unary, all_float, mxz) ++DEF_SVE_FUNCTION (svrsqrte, unary, all_float, none) ++DEF_SVE_FUNCTION (svrsqrts, binary, all_float, none) ++DEF_SVE_FUNCTION (svscale, binary_int_opt_n, all_float, mxz) ++DEF_SVE_FUNCTION (svsel, binary, all_data, implicit) ++DEF_SVE_FUNCTION (svsel, binary, b, implicit) ++DEF_SVE_FUNCTION (svset2, set, all_data, none) ++DEF_SVE_FUNCTION (svset3, set, all_data, none) ++DEF_SVE_FUNCTION (svset4, set, all_data, none) ++DEF_SVE_FUNCTION (svsetffr, setffr, none, none) ++DEF_SVE_FUNCTION (svsplice, binary, all_data, implicit) ++DEF_SVE_FUNCTION (svsqrt, unary, all_float, mxz) ++DEF_SVE_FUNCTION (svst1, store, all_data, implicit) ++DEF_SVE_FUNCTION (svst1_scatter, store_scatter_index, sd_data, implicit) ++DEF_SVE_FUNCTION (svst1_scatter, store_scatter_offset, sd_data, implicit) ++DEF_SVE_FUNCTION (svst1b, store, hsd_integer, implicit) ++DEF_SVE_FUNCTION (svst1b_scatter, store_scatter_offset, sd_integer, implicit) ++DEF_SVE_FUNCTION (svst1h, store, sd_integer, implicit) ++DEF_SVE_FUNCTION (svst1h_scatter, store_scatter_index, sd_integer, implicit) ++DEF_SVE_FUNCTION (svst1h_scatter, store_scatter_offset, sd_integer, implicit) ++DEF_SVE_FUNCTION (svst1w, store, d_integer, implicit) ++DEF_SVE_FUNCTION (svst1w_scatter, store_scatter_index, d_integer, implicit) ++DEF_SVE_FUNCTION (svst1w_scatter, store_scatter_offset, d_integer, implicit) ++DEF_SVE_FUNCTION (svst2, store, all_data, implicit) ++DEF_SVE_FUNCTION (svst3, store, all_data, implicit) ++DEF_SVE_FUNCTION (svst4, store, all_data, implicit) ++DEF_SVE_FUNCTION (svstnt1, store, all_data, implicit) ++DEF_SVE_FUNCTION (svsub, binary_opt_n, all_arith, mxz) ++DEF_SVE_FUNCTION (svsubr, binary_opt_n, all_arith, mxz) ++DEF_SVE_FUNCTION (svtbl, binary_uint, all_data, none) ++DEF_SVE_FUNCTION (svtmad, tmad, all_float, none) ++DEF_SVE_FUNCTION (svtrn1, binary, all_data, none) ++DEF_SVE_FUNCTION (svtrn1, binary_pred, all_pred, none) ++DEF_SVE_FUNCTION (svtrn2, binary, all_data, none) ++DEF_SVE_FUNCTION (svtrn2, binary_pred, all_pred, none) ++DEF_SVE_FUNCTION (svtsmul, binary_uint, all_float, none) ++DEF_SVE_FUNCTION (svtssel, binary_uint, all_float, none) ++DEF_SVE_FUNCTION (svundef, inherent, all_data, none) ++DEF_SVE_FUNCTION (svundef2, inherent, all_data, none) ++DEF_SVE_FUNCTION (svundef3, inherent, all_data, none) ++DEF_SVE_FUNCTION (svundef4, inherent, all_data, none) ++DEF_SVE_FUNCTION (svunpkhi, unary_widen, hsd_integer, none) ++DEF_SVE_FUNCTION (svunpkhi, unary_widen, b, none) ++DEF_SVE_FUNCTION (svunpklo, unary_widen, hsd_integer, none) ++DEF_SVE_FUNCTION (svunpklo, unary_widen, b, none) ++DEF_SVE_FUNCTION (svuzp1, binary, all_data, none) ++DEF_SVE_FUNCTION (svuzp1, binary_pred, all_pred, none) ++DEF_SVE_FUNCTION (svuzp2, binary, all_data, none) ++DEF_SVE_FUNCTION (svuzp2, binary_pred, all_pred, none) ++DEF_SVE_FUNCTION (svwhilele, compare_scalar, while, none) ++DEF_SVE_FUNCTION (svwhilelt, compare_scalar, while, none) ++DEF_SVE_FUNCTION (svwrffr, setffr, none, implicit) ++DEF_SVE_FUNCTION (svzip1, binary, all_data, none) ++DEF_SVE_FUNCTION (svzip1, binary_pred, all_pred, none) ++DEF_SVE_FUNCTION (svzip2, binary, all_data, none) ++DEF_SVE_FUNCTION (svzip2, binary_pred, all_pred, none) ++#undef REQUIRED_EXTENSIONS ++ ++#define REQUIRED_EXTENSIONS AARCH64_FL_BF16 ++DEF_SVE_FUNCTION (svbfdot, ternary_bfloat_opt_n, s_float, none) ++DEF_SVE_FUNCTION (svbfdot_lane, ternary_bfloat_lanex2, s_float, none) ++DEF_SVE_FUNCTION (svbfmlalb, ternary_bfloat_opt_n, s_float, none) ++DEF_SVE_FUNCTION (svbfmlalb_lane, ternary_bfloat_lane, s_float, none) ++DEF_SVE_FUNCTION (svbfmlalt, ternary_bfloat_opt_n, s_float, none) ++DEF_SVE_FUNCTION (svbfmlalt_lane, ternary_bfloat_lane, s_float, none) ++DEF_SVE_FUNCTION (svbfmmla, ternary_bfloat, s_float, none) ++DEF_SVE_FUNCTION (svcvt, unary_convert, cvt_bfloat, mxz) ++DEF_SVE_FUNCTION (svcvtnt, unary_convert_narrowt, cvt_bfloat, mx) ++#undef REQUIRED_EXTENSIONS ++ ++#define REQUIRED_EXTENSIONS AARCH64_FL_I8MM ++DEF_SVE_FUNCTION (svmmla, mmla, s_integer, none) ++DEF_SVE_FUNCTION (svusmmla, ternary_uintq_intq, s_signed, none) ++DEF_SVE_FUNCTION (svsudot, ternary_intq_uintq_opt_n, s_signed, none) ++DEF_SVE_FUNCTION (svsudot_lane, ternary_intq_uintq_lane, s_signed, none) ++DEF_SVE_FUNCTION (svusdot, ternary_uintq_intq_opt_n, s_signed, none) ++DEF_SVE_FUNCTION (svusdot_lane, ternary_uintq_intq_lane, s_signed, none) ++#undef REQUIRED_EXTENSIONS ++ ++#define REQUIRED_EXTENSIONS AARCH64_FL_F32MM ++DEF_SVE_FUNCTION (svmmla, mmla, s_float, none) ++#undef REQUIRED_EXTENSIONS ++ ++#define REQUIRED_EXTENSIONS AARCH64_FL_F64MM ++DEF_SVE_FUNCTION (svld1ro, load_replicate, all_data, implicit) ++DEF_SVE_FUNCTION (svmmla, mmla, d_float, none) ++DEF_SVE_FUNCTION (svtrn1q, binary, all_data, none) ++DEF_SVE_FUNCTION (svtrn2q, binary, all_data, none) ++DEF_SVE_FUNCTION (svuzp1q, binary, all_data, none) ++DEF_SVE_FUNCTION (svuzp2q, binary, all_data, none) ++DEF_SVE_FUNCTION (svzip1q, binary, all_data, none) ++DEF_SVE_FUNCTION (svzip2q, binary, all_data, none) ++#undef REQUIRED_EXTENSIONS +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.h b/gcc/config/aarch64/aarch64-sve-builtins-base.h +new file mode 100644 +index 000000000..2467e729e +--- /dev/null ++++ b/gcc/config/aarch64/aarch64-sve-builtins-base.h +@@ -0,0 +1,304 @@ ++/* ACLE support for AArch64 SVE (__ARM_FEATURE_SVE intrinsics) ++ Copyright (C) 2018-2019 Free Software Foundation, Inc. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ GCC is distributed in the hope that it will be useful, but ++ WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING3. If not see ++ . */ ++ ++#ifndef GCC_AARCH64_SVE_BUILTINS_BASE_H ++#define GCC_AARCH64_SVE_BUILTINS_BASE_H ++ ++namespace aarch64_sve ++{ ++ namespace functions ++ { ++ extern const function_base *const svabd; ++ extern const function_base *const svabs; ++ extern const function_base *const svacge; ++ extern const function_base *const svacgt; ++ extern const function_base *const svacle; ++ extern const function_base *const svaclt; ++ extern const function_base *const svadd; ++ extern const function_base *const svadda; ++ extern const function_base *const svaddv; ++ extern const function_base *const svadrb; ++ extern const function_base *const svadrd; ++ extern const function_base *const svadrh; ++ extern const function_base *const svadrw; ++ extern const function_base *const svand; ++ extern const function_base *const svandv; ++ extern const function_base *const svasr; ++ extern const function_base *const svasr_wide; ++ extern const function_base *const svasrd; ++ extern const function_base *const svbfdot; ++ extern const function_base *const svbfdot_lane; ++ extern const function_base *const svbfmlalb; ++ extern const function_base *const svbfmlalb_lane; ++ extern const function_base *const svbfmlalt; ++ extern const function_base *const svbfmlalt_lane; ++ extern const function_base *const svbfmmla; ++ extern const function_base *const svbic; ++ extern const function_base *const svbrka; ++ extern const function_base *const svbrkb; ++ extern const function_base *const svbrkn; ++ extern const function_base *const svbrkpa; ++ extern const function_base *const svbrkpb; ++ extern const function_base *const svcadd; ++ extern const function_base *const svclasta; ++ extern const function_base *const svclastb; ++ extern const function_base *const svcls; ++ extern const function_base *const svclz; ++ extern const function_base *const svcmla; ++ extern const function_base *const svcmla_lane; ++ extern const function_base *const svcmpeq; ++ extern const function_base *const svcmpeq_wide; ++ extern const function_base *const svcmpge; ++ extern const function_base *const svcmpge_wide; ++ extern const function_base *const svcmpgt; ++ extern const function_base *const svcmpgt_wide; ++ extern const function_base *const svcmple; ++ extern const function_base *const svcmple_wide; ++ extern const function_base *const svcmplt; ++ extern const function_base *const svcmplt_wide; ++ extern const function_base *const svcmpne; ++ extern const function_base *const svcmpne_wide; ++ extern const function_base *const svcmpuo; ++ extern const function_base *const svcnot; ++ extern const function_base *const svcnt; ++ extern const function_base *const svcntb; ++ extern const function_base *const svcntb_pat; ++ extern const function_base *const svcntd; ++ extern const function_base *const svcntd_pat; ++ extern const function_base *const svcnth; ++ extern const function_base *const svcnth_pat; ++ extern const function_base *const svcntp; ++ extern const function_base *const svcntw; ++ extern const function_base *const svcntw_pat; ++ extern const function_base *const svcompact; ++ extern const function_base *const svcreate2; ++ extern const function_base *const svcreate3; ++ extern const function_base *const svcreate4; ++ extern const function_base *const svcvt; ++ extern const function_base *const svcvtnt; ++ extern const function_base *const svdiv; ++ extern const function_base *const svdivr; ++ extern const function_base *const svdot; ++ extern const function_base *const svdot_lane; ++ extern const function_base *const svdup; ++ extern const function_base *const svdup_lane; ++ extern const function_base *const svdupq; ++ extern const function_base *const svdupq_lane; ++ extern const function_base *const sveor; ++ extern const function_base *const sveorv; ++ extern const function_base *const svexpa; ++ extern const function_base *const svext; ++ extern const function_base *const svextb; ++ extern const function_base *const svexth; ++ extern const function_base *const svextw; ++ extern const function_base *const svget2; ++ extern const function_base *const svget3; ++ extern const function_base *const svget4; ++ extern const function_base *const svindex; ++ extern const function_base *const svinsr; ++ extern const function_base *const svlasta; ++ extern const function_base *const svlastb; ++ extern const function_base *const svld1; ++ extern const function_base *const svld1_gather; ++ extern const function_base *const svld1ro; ++ extern const function_base *const svld1rq; ++ extern const function_base *const svld1sb; ++ extern const function_base *const svld1sb_gather; ++ extern const function_base *const svld1sh; ++ extern const function_base *const svld1sh_gather; ++ extern const function_base *const svld1sw; ++ extern const function_base *const svld1sw_gather; ++ extern const function_base *const svld1ub; ++ extern const function_base *const svld1ub_gather; ++ extern const function_base *const svld1uh; ++ extern const function_base *const svld1uh_gather; ++ extern const function_base *const svld1uw; ++ extern const function_base *const svld1uw_gather; ++ extern const function_base *const svld2; ++ extern const function_base *const svld3; ++ extern const function_base *const svld4; ++ extern const function_base *const svldff1; ++ extern const function_base *const svldff1_gather; ++ extern const function_base *const svldff1sb; ++ extern const function_base *const svldff1sb_gather; ++ extern const function_base *const svldff1sh; ++ extern const function_base *const svldff1sh_gather; ++ extern const function_base *const svldff1sw; ++ extern const function_base *const svldff1sw_gather; ++ extern const function_base *const svldff1ub; ++ extern const function_base *const svldff1ub_gather; ++ extern const function_base *const svldff1uh; ++ extern const function_base *const svldff1uh_gather; ++ extern const function_base *const svldff1uw; ++ extern const function_base *const svldff1uw_gather; ++ extern const function_base *const svldnf1; ++ extern const function_base *const svldnf1sb; ++ extern const function_base *const svldnf1sh; ++ extern const function_base *const svldnf1sw; ++ extern const function_base *const svldnf1ub; ++ extern const function_base *const svldnf1uh; ++ extern const function_base *const svldnf1uw; ++ extern const function_base *const svldnt1; ++ extern const function_base *const svlen; ++ extern const function_base *const svlsl; ++ extern const function_base *const svlsl_wide; ++ extern const function_base *const svlsr; ++ extern const function_base *const svlsr_wide; ++ extern const function_base *const svmad; ++ extern const function_base *const svmax; ++ extern const function_base *const svmaxnm; ++ extern const function_base *const svmaxnmv; ++ extern const function_base *const svmaxv; ++ extern const function_base *const svmin; ++ extern const function_base *const svminnm; ++ extern const function_base *const svminnmv; ++ extern const function_base *const svminv; ++ extern const function_base *const svmla; ++ extern const function_base *const svmla_lane; ++ extern const function_base *const svmls; ++ extern const function_base *const svmls_lane; ++ extern const function_base *const svmmla; ++ extern const function_base *const svmov; ++ extern const function_base *const svmsb; ++ extern const function_base *const svmul; ++ extern const function_base *const svmul_lane; ++ extern const function_base *const svmulh; ++ extern const function_base *const svmulx; ++ extern const function_base *const svnand; ++ extern const function_base *const svneg; ++ extern const function_base *const svnmad; ++ extern const function_base *const svnmla; ++ extern const function_base *const svnmls; ++ extern const function_base *const svnmsb; ++ extern const function_base *const svnor; ++ extern const function_base *const svnot; ++ extern const function_base *const svorn; ++ extern const function_base *const svorr; ++ extern const function_base *const svorv; ++ extern const function_base *const svpfalse; ++ extern const function_base *const svpfirst; ++ extern const function_base *const svpnext; ++ extern const function_base *const svprfb; ++ extern const function_base *const svprfb_gather; ++ extern const function_base *const svprfd; ++ extern const function_base *const svprfd_gather; ++ extern const function_base *const svprfh; ++ extern const function_base *const svprfh_gather; ++ extern const function_base *const svprfw; ++ extern const function_base *const svprfw_gather; ++ extern const function_base *const svptest_any; ++ extern const function_base *const svptest_first; ++ extern const function_base *const svptest_last; ++ extern const function_base *const svptrue; ++ extern const function_base *const svptrue_pat; ++ extern const function_base *const svqadd; ++ extern const function_base *const svqdecb; ++ extern const function_base *const svqdecb_pat; ++ extern const function_base *const svqdecd; ++ extern const function_base *const svqdecd_pat; ++ extern const function_base *const svqdech; ++ extern const function_base *const svqdech_pat; ++ extern const function_base *const svqdecp; ++ extern const function_base *const svqdecw; ++ extern const function_base *const svqdecw_pat; ++ extern const function_base *const svqincb; ++ extern const function_base *const svqincb_pat; ++ extern const function_base *const svqincd; ++ extern const function_base *const svqincd_pat; ++ extern const function_base *const svqinch; ++ extern const function_base *const svqinch_pat; ++ extern const function_base *const svqincp; ++ extern const function_base *const svqincw; ++ extern const function_base *const svqincw_pat; ++ extern const function_base *const svqsub; ++ extern const function_base *const svrbit; ++ extern const function_base *const svrdffr; ++ extern const function_base *const svrecpe; ++ extern const function_base *const svrecps; ++ extern const function_base *const svrecpx; ++ extern const function_base *const svreinterpret; ++ extern const function_base *const svrev; ++ extern const function_base *const svrevb; ++ extern const function_base *const svrevh; ++ extern const function_base *const svrevw; ++ extern const function_base *const svrinta; ++ extern const function_base *const svrinti; ++ extern const function_base *const svrintm; ++ extern const function_base *const svrintn; ++ extern const function_base *const svrintp; ++ extern const function_base *const svrintx; ++ extern const function_base *const svrintz; ++ extern const function_base *const svrsqrte; ++ extern const function_base *const svrsqrts; ++ extern const function_base *const svscale; ++ extern const function_base *const svsel; ++ extern const function_base *const svset2; ++ extern const function_base *const svset3; ++ extern const function_base *const svset4; ++ extern const function_base *const svsetffr; ++ extern const function_base *const svsplice; ++ extern const function_base *const svsqrt; ++ extern const function_base *const svst1; ++ extern const function_base *const svst1_scatter; ++ extern const function_base *const svst1b; ++ extern const function_base *const svst1b_scatter; ++ extern const function_base *const svst1h; ++ extern const function_base *const svst1h_scatter; ++ extern const function_base *const svst1w; ++ extern const function_base *const svst1w_scatter; ++ extern const function_base *const svst2; ++ extern const function_base *const svst3; ++ extern const function_base *const svst4; ++ extern const function_base *const svstnt1; ++ extern const function_base *const svsub; ++ extern const function_base *const svsubr; ++ extern const function_base *const svsudot; ++ extern const function_base *const svsudot_lane; ++ extern const function_base *const svtbl; ++ extern const function_base *const svtmad; ++ extern const function_base *const svtrn1; ++ extern const function_base *const svtrn1q; ++ extern const function_base *const svtrn2; ++ extern const function_base *const svtrn2q; ++ extern const function_base *const svtsmul; ++ extern const function_base *const svtssel; ++ extern const function_base *const svundef; ++ extern const function_base *const svundef2; ++ extern const function_base *const svundef3; ++ extern const function_base *const svundef4; ++ extern const function_base *const svunpkhi; ++ extern const function_base *const svunpklo; ++ extern const function_base *const svusdot; ++ extern const function_base *const svusdot_lane; ++ extern const function_base *const svusmmla; ++ extern const function_base *const svuzp1; ++ extern const function_base *const svuzp1q; ++ extern const function_base *const svuzp2; ++ extern const function_base *const svuzp2q; ++ extern const function_base *const svwhilele; ++ extern const function_base *const svwhilelt; ++ extern const function_base *const svwrffr; ++ extern const function_base *const svzip1; ++ extern const function_base *const svzip1q; ++ extern const function_base *const svzip2; ++ extern const function_base *const svzip2q; ++ } ++} ++ ++#endif +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-functions.h b/gcc/config/aarch64/aarch64-sve-builtins-functions.h +new file mode 100644 +index 000000000..ee1760668 +--- /dev/null ++++ b/gcc/config/aarch64/aarch64-sve-builtins-functions.h +@@ -0,0 +1,630 @@ ++/* ACLE support for AArch64 SVE (function_base classes) ++ Copyright (C) 2018-2019 Free Software Foundation, Inc. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ GCC is distributed in the hope that it will be useful, but ++ WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING3. If not see ++ . */ ++ ++#ifndef GCC_AARCH64_SVE_BUILTINS_FUNCTIONS_H ++#define GCC_AARCH64_SVE_BUILTINS_FUNCTIONS_H ++ ++namespace aarch64_sve { ++ ++/* Wrap T, which is derived from function_base, and indicate that the ++ function never has side effects. It is only necessary to use this ++ wrapper on functions that might have floating-point suffixes, since ++ otherwise we assume by default that the function has no side effects. */ ++template ++class quiet : public T ++{ ++public: ++ CONSTEXPR quiet () : T () {} ++ ++ /* Unfortunately we can't use parameter packs yet. */ ++ template ++ CONSTEXPR quiet (const T1 &t1) : T (t1) {} ++ ++ template ++ CONSTEXPR quiet (const T1 &t1, const T2 &t2) : T (t1, t2) {} ++ ++ template ++ CONSTEXPR quiet (const T1 &t1, const T2 &t2, const T3 &t3) ++ : T (t1, t2, t3) {} ++ ++ unsigned int ++ call_properties (const function_instance &) const OVERRIDE ++ { ++ return 0; ++ } ++}; ++ ++/* A function_base that sometimes or always operates on tuples of ++ vectors. */ ++class multi_vector_function : public function_base ++{ ++public: ++ CONSTEXPR multi_vector_function (unsigned int vectors_per_tuple) ++ : m_vectors_per_tuple (vectors_per_tuple) {} ++ ++ unsigned int ++ vectors_per_tuple () const OVERRIDE ++ { ++ return m_vectors_per_tuple; ++ } ++ ++ /* The number of vectors in a tuple, or 1 if the function only operates ++ on single vectors. */ ++ unsigned int m_vectors_per_tuple; ++}; ++ ++/* A function_base that loads or stores contiguous memory elements ++ without extending or truncating them. */ ++class full_width_access : public multi_vector_function ++{ ++public: ++ CONSTEXPR full_width_access (unsigned int vectors_per_tuple = 1) ++ : multi_vector_function (vectors_per_tuple) {} ++ ++ tree ++ memory_scalar_type (const function_instance &fi) const OVERRIDE ++ { ++ return fi.scalar_type (0); ++ } ++ ++ machine_mode ++ memory_vector_mode (const function_instance &fi) const OVERRIDE ++ { ++ machine_mode mode = fi.vector_mode (0); ++ if (m_vectors_per_tuple != 1) ++ mode = targetm.array_mode (mode, m_vectors_per_tuple).require (); ++ return mode; ++ } ++}; ++ ++/* A function_base that loads elements from memory and extends them ++ to a wider element. The memory element type is a fixed part of ++ the function base name. */ ++class extending_load : public function_base ++{ ++public: ++ CONSTEXPR extending_load (type_suffix_index memory_type) ++ : m_memory_type (memory_type) {} ++ ++ unsigned int ++ call_properties (const function_instance &) const OVERRIDE ++ { ++ return CP_READ_MEMORY; ++ } ++ ++ tree ++ memory_scalar_type (const function_instance &) const OVERRIDE ++ { ++ return scalar_types[type_suffixes[m_memory_type].vector_type]; ++ } ++ ++ machine_mode ++ memory_vector_mode (const function_instance &fi) const OVERRIDE ++ { ++ machine_mode mem_mode = type_suffixes[m_memory_type].vector_mode; ++ machine_mode reg_mode = fi.vector_mode (0); ++ return aarch64_sve_data_mode (GET_MODE_INNER (mem_mode), ++ GET_MODE_NUNITS (reg_mode)).require (); ++ } ++ ++ /* Return the rtx code associated with the kind of extension that ++ the load performs. */ ++ rtx_code ++ extend_rtx_code () const ++ { ++ return (type_suffixes[m_memory_type].unsigned_p ++ ? ZERO_EXTEND : SIGN_EXTEND); ++ } ++ ++ /* The type of the memory elements. This is part of the function base ++ name rather than a true type suffix. */ ++ type_suffix_index m_memory_type; ++}; ++ ++/* A function_base that truncates vector elements and stores them to memory. ++ The memory element width is a fixed part of the function base name. */ ++class truncating_store : public function_base ++{ ++public: ++ CONSTEXPR truncating_store (scalar_int_mode to_mode) : m_to_mode (to_mode) {} ++ ++ unsigned int ++ call_properties (const function_instance &) const OVERRIDE ++ { ++ return CP_WRITE_MEMORY; ++ } ++ ++ tree ++ memory_scalar_type (const function_instance &fi) const OVERRIDE ++ { ++ /* In truncating stores, the signedness of the memory element is defined ++ to be the same as the signedness of the vector element. The signedness ++ doesn't make any difference to the behavior of the function. */ ++ type_class_index tclass = fi.type_suffix (0).tclass; ++ unsigned int element_bits = GET_MODE_BITSIZE (m_to_mode); ++ type_suffix_index suffix = find_type_suffix (tclass, element_bits); ++ return scalar_types[type_suffixes[suffix].vector_type]; ++ } ++ ++ machine_mode ++ memory_vector_mode (const function_instance &fi) const OVERRIDE ++ { ++ poly_uint64 nunits = GET_MODE_NUNITS (fi.vector_mode (0)); ++ return aarch64_sve_data_mode (m_to_mode, nunits).require (); ++ } ++ ++ /* The mode of a single memory element. */ ++ scalar_int_mode m_to_mode; ++}; ++ ++/* An incomplete function_base for functions that have an associated rtx code. ++ It simply records information about the mapping for derived classes ++ to use. */ ++class rtx_code_function_base : public function_base ++{ ++public: ++ CONSTEXPR rtx_code_function_base (rtx_code code_for_sint, ++ rtx_code code_for_uint, ++ int unspec_for_fp = -1) ++ : m_code_for_sint (code_for_sint), m_code_for_uint (code_for_uint), ++ m_unspec_for_fp (unspec_for_fp) {} ++ ++ /* The rtx code to use for signed and unsigned integers respectively. ++ Can be UNKNOWN for functions that don't have integer forms. */ ++ rtx_code m_code_for_sint; ++ rtx_code m_code_for_uint; ++ ++ /* The UNSPEC_COND_* to use for floating-point operations. Can be -1 ++ for functions that only operate on integers. */ ++ int m_unspec_for_fp; ++}; ++ ++/* A function_base for functions that have an associated rtx code. ++ It supports all forms of predication except PRED_implicit. */ ++class rtx_code_function : public rtx_code_function_base ++{ ++public: ++ CONSTEXPR rtx_code_function (rtx_code code_for_sint, rtx_code code_for_uint, ++ int unspec_for_fp = -1) ++ : rtx_code_function_base (code_for_sint, code_for_uint, unspec_for_fp) {} ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ return e.map_to_rtx_codes (m_code_for_sint, m_code_for_uint, ++ m_unspec_for_fp); ++ } ++}; ++ ++/* Like rtx_code_function, but for functions that take what is normally ++ the final argument first. One use of this class is to handle binary ++ reversed operations; another is to handle MLA-style operations that ++ are normally expressed in GCC as MAD-style operations. */ ++class rtx_code_function_rotated : public rtx_code_function_base ++{ ++public: ++ CONSTEXPR rtx_code_function_rotated (rtx_code code_for_sint, ++ rtx_code code_for_uint, ++ int unspec_for_fp = -1) ++ : rtx_code_function_base (code_for_sint, code_for_uint, unspec_for_fp) {} ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ /* Rotate the inputs into their normal order, but continue to make _m ++ functions merge with what was originally the first vector argument. */ ++ unsigned int nargs = e.args.length (); ++ e.rotate_inputs_left (e.pred != PRED_none ? 1 : 0, nargs); ++ return e.map_to_rtx_codes (m_code_for_sint, m_code_for_uint, ++ m_unspec_for_fp, nargs - 1); ++ } ++}; ++ ++/* An incomplete function_base for functions that have an associated ++ unspec code, with separate codes for signed integers, unsigned ++ integers and floating-point values. The class simply records ++ information about the mapping for derived classes to use. */ ++class unspec_based_function_base : public function_base ++{ ++public: ++ CONSTEXPR unspec_based_function_base (int unspec_for_sint, ++ int unspec_for_uint, ++ int unspec_for_fp) ++ : m_unspec_for_sint (unspec_for_sint), ++ m_unspec_for_uint (unspec_for_uint), ++ m_unspec_for_fp (unspec_for_fp) ++ {} ++ ++ /* Return the unspec code to use for INSTANCE, based on type suffix 0. */ ++ int ++ unspec_for (const function_instance &instance) const ++ { ++ return (!instance.type_suffix (0).integer_p ? m_unspec_for_fp ++ : instance.type_suffix (0).unsigned_p ? m_unspec_for_uint ++ : m_unspec_for_sint); ++ } ++ ++ /* The unspec code associated with signed-integer, unsigned-integer ++ and floating-point operations respectively. */ ++ int m_unspec_for_sint; ++ int m_unspec_for_uint; ++ int m_unspec_for_fp; ++}; ++ ++/* A function_base for functions that have an associated unspec code. ++ It supports all forms of predication except PRED_implicit. */ ++class unspec_based_function : public unspec_based_function_base ++{ ++public: ++ CONSTEXPR unspec_based_function (int unspec_for_sint, int unspec_for_uint, ++ int unspec_for_fp) ++ : unspec_based_function_base (unspec_for_sint, unspec_for_uint, ++ unspec_for_fp) ++ {} ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ return e.map_to_unspecs (m_unspec_for_sint, m_unspec_for_uint, ++ m_unspec_for_fp); ++ } ++}; ++ ++/* Like unspec_based_function, but for functions that take what is normally ++ the final argument first. One use of this class is to handle binary ++ reversed operations; another is to handle MLA-style operations that ++ are normally expressed in GCC as MAD-style operations. */ ++class unspec_based_function_rotated : public unspec_based_function_base ++{ ++public: ++ CONSTEXPR unspec_based_function_rotated (int unspec_for_sint, ++ int unspec_for_uint, ++ int unspec_for_fp) ++ : unspec_based_function_base (unspec_for_sint, unspec_for_uint, ++ unspec_for_fp) ++ {} ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ /* Rotate the inputs into their normal order, but continue to make _m ++ functions merge with what was originally the first vector argument. */ ++ unsigned int nargs = e.args.length (); ++ e.rotate_inputs_left (e.pred != PRED_none ? 1 : 0, nargs); ++ return e.map_to_unspecs (m_unspec_for_sint, m_unspec_for_uint, ++ m_unspec_for_fp, nargs - 1); ++ } ++}; ++ ++/* Like unspec_based_function, but map the function directly to ++ CODE (UNSPEC, M) instead of using the generic predication-based ++ expansion. where M is the vector mode associated with type suffix 0. ++ This is useful if the unspec doesn't describe the full operation or ++ if the usual predication rules don't apply for some reason. */ ++template ++class unspec_based_function_exact_insn : public unspec_based_function_base ++{ ++public: ++ CONSTEXPR unspec_based_function_exact_insn (int unspec_for_sint, ++ int unspec_for_uint, ++ int unspec_for_fp) ++ : unspec_based_function_base (unspec_for_sint, unspec_for_uint, ++ unspec_for_fp) ++ {} ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ return e.use_exact_insn (CODE (unspec_for (e), e.vector_mode (0))); ++ } ++}; ++ ++/* A function that performs an unspec and then adds it to another value. */ ++typedef unspec_based_function_exact_insn ++ unspec_based_add_function; ++ ++/* A functon that uses aarch64_pred* patterns regardless of the ++ predication type. */ ++typedef unspec_based_function_exact_insn ++ unspec_based_pred_function; ++ ++/* A function that acts like unspec_based_function_exact_insn ++ when operating on integers, but that expands to an (fma ...)-style ++ aarch64_sve* operation when applied to floats. */ ++template ++class unspec_based_fused_function : public unspec_based_function_base ++{ ++public: ++ CONSTEXPR unspec_based_fused_function (int unspec_for_sint, ++ int unspec_for_uint, ++ int unspec_for_fp) ++ : unspec_based_function_base (unspec_for_sint, unspec_for_uint, ++ unspec_for_fp) ++ {} ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ int unspec = unspec_for (e); ++ insn_code icode; ++ if (e.type_suffix (0).float_p) ++ { ++ /* Put the operands in the normal (fma ...) order, with the accumulator ++ last. This fits naturally since that's also the unprinted operand ++ in the asm output. */ ++ e.rotate_inputs_left (0, e.pred != PRED_none ? 4 : 3); ++ icode = code_for_aarch64_sve (unspec, e.vector_mode (0)); ++ } ++ else ++ icode = INT_CODE (unspec, e.vector_mode (0)); ++ return e.use_exact_insn (icode); ++ } ++}; ++ ++/* Like unspec_based_fused_function, but for _lane functions. */ ++template ++class unspec_based_fused_lane_function : public unspec_based_function_base ++{ ++public: ++ CONSTEXPR unspec_based_fused_lane_function (int unspec_for_sint, ++ int unspec_for_uint, ++ int unspec_for_fp) ++ : unspec_based_function_base (unspec_for_sint, unspec_for_uint, ++ unspec_for_fp) ++ {} ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ int unspec = unspec_for (e); ++ insn_code icode; ++ if (e.type_suffix (0).float_p) ++ { ++ /* Put the operands in the normal (fma ...) order, with the accumulator ++ last. This fits naturally since that's also the unprinted operand ++ in the asm output. */ ++ e.rotate_inputs_left (0, e.pred != PRED_none ? 5 : 4); ++ icode = code_for_aarch64_lane (unspec, e.vector_mode (0)); ++ } ++ else ++ icode = INT_CODE (unspec, e.vector_mode (0)); ++ return e.use_exact_insn (icode); ++ } ++}; ++ ++/* A function_base that uses CODE_FOR_MODE (M) to get the associated ++ instruction code, where M is the vector mode associated with type ++ suffix N. */ ++template ++class code_for_mode_function : public function_base ++{ ++public: ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ return e.use_exact_insn (CODE_FOR_MODE (e.vector_mode (N))); ++ } ++}; ++ ++/* A function that uses code_for_ (M), where M is the vector ++ mode associated with the first type suffix. */ ++#define CODE_FOR_MODE0(PATTERN) code_for_mode_function ++ ++/* Likewise for the second type suffix. */ ++#define CODE_FOR_MODE1(PATTERN) code_for_mode_function ++ ++/* Like CODE_FOR_MODE0, but the function doesn't raise exceptions when ++ operating on floating-point data. */ ++#define QUIET_CODE_FOR_MODE0(PATTERN) \ ++ quiet< code_for_mode_function > ++ ++/* A function_base for functions that always expand to a fixed insn pattern, ++ regardless of what the suffixes are. */ ++class fixed_insn_function : public function_base ++{ ++public: ++ CONSTEXPR fixed_insn_function (insn_code code) : m_code (code) {} ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ return e.use_exact_insn (m_code); ++ } ++ ++ /* The instruction to use. */ ++ insn_code m_code; ++}; ++ ++/* A function_base for functions that permute their arguments. */ ++class permute : public quiet ++{ ++public: ++ /* Fold a unary or binary permute with the permute vector given by ++ BUILDER. */ ++ gimple * ++ fold_permute (const gimple_folder &f, const vec_perm_builder &builder) const ++ { ++ /* Punt for now on _b16 and wider; we'd need more complex evpc logic ++ to rerecognize the result. */ ++ if (f.type_suffix (0).bool_p && f.type_suffix (0).element_bits > 8) ++ return NULL; ++ ++ unsigned int nargs = gimple_call_num_args (f.call); ++ poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs)); ++ vec_perm_indices indices (builder, nargs, nelts); ++ tree perm_type = build_vector_type (ssizetype, nelts); ++ return gimple_build_assign (f.lhs, VEC_PERM_EXPR, ++ gimple_call_arg (f.call, 0), ++ gimple_call_arg (f.call, nargs - 1), ++ vec_perm_indices_to_tree (perm_type, indices)); ++ } ++}; ++ ++/* A function_base for functions that permute two vectors using a fixed ++ choice of indices. */ ++class binary_permute : public permute ++{ ++public: ++ CONSTEXPR binary_permute (int unspec) : m_unspec (unspec) {} ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ insn_code icode = code_for_aarch64_sve (m_unspec, e.vector_mode (0)); ++ return e.use_exact_insn (icode); ++ } ++ ++ /* The unspec code associated with the operation. */ ++ int m_unspec; ++}; ++ ++/* A function_base for functions that reduce a vector to a scalar. */ ++class reduction : public function_base ++{ ++public: ++ CONSTEXPR reduction (int unspec) ++ : m_unspec_for_sint (unspec), ++ m_unspec_for_uint (unspec), ++ m_unspec_for_fp (unspec) ++ {} ++ ++ CONSTEXPR reduction (int unspec_for_sint, int unspec_for_uint, ++ int unspec_for_fp) ++ : m_unspec_for_sint (unspec_for_sint), ++ m_unspec_for_uint (unspec_for_uint), ++ m_unspec_for_fp (unspec_for_fp) ++ {} ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ machine_mode mode = e.vector_mode (0); ++ int unspec = (!e.type_suffix (0).integer_p ? m_unspec_for_fp ++ : e.type_suffix (0).unsigned_p ? m_unspec_for_uint ++ : m_unspec_for_sint); ++ /* There's no distinction between SADDV and UADDV for 64-bit elements; ++ the signed versions only exist for narrower elements. */ ++ if (GET_MODE_UNIT_BITSIZE (mode) == 64 && unspec == UNSPEC_SADDV) ++ unspec = UNSPEC_UADDV; ++ return e.use_exact_insn (code_for_aarch64_pred_reduc (unspec, mode)); ++ } ++ ++ /* The unspec code associated with signed-integer, unsigned-integer ++ and floating-point operations respectively. */ ++ int m_unspec_for_sint; ++ int m_unspec_for_uint; ++ int m_unspec_for_fp; ++}; ++ ++/* A function_base for functions that shift narrower-than-64-bit values ++ by 64-bit amounts. */ ++class shift_wide : public function_base ++{ ++public: ++ CONSTEXPR shift_wide (rtx_code code, int wide_unspec) ++ : m_code (code), m_wide_unspec (wide_unspec) {} ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ machine_mode mode = e.vector_mode (0); ++ machine_mode elem_mode = GET_MODE_INNER (mode); ++ ++ /* If the argument is a constant that the normal shifts can handle ++ directly, use them instead. */ ++ rtx shift = unwrap_const_vec_duplicate (e.args.last ()); ++ if (aarch64_simd_shift_imm_p (shift, elem_mode, m_code == ASHIFT)) ++ { ++ e.args.last () = shift; ++ return e.map_to_rtx_codes (m_code, m_code, -1); ++ } ++ ++ if (e.pred == PRED_x) ++ return e.use_unpred_insn (code_for_aarch64_sve (m_wide_unspec, mode)); ++ ++ return e.use_cond_insn (code_for_cond (m_wide_unspec, mode)); ++ } ++ ++ /* The rtx code associated with a "normal" shift. */ ++ rtx_code m_code; ++ ++ /* The unspec code associated with the wide shift. */ ++ int m_wide_unspec; ++}; ++ ++/* A function_base for unary functions that count bits. */ ++class unary_count : public quiet ++{ ++public: ++ CONSTEXPR unary_count (rtx_code code) : m_code (code) {} ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ /* The md patterns treat the operand as an integer. */ ++ machine_mode mode = aarch64_sve_int_mode (e.vector_mode (0)); ++ e.args.last () = gen_lowpart (mode, e.args.last ()); ++ ++ if (e.pred == PRED_x) ++ return e.use_pred_x_insn (code_for_aarch64_pred (m_code, mode)); ++ ++ return e.use_cond_insn (code_for_cond (m_code, mode)); ++ } ++ ++ /* The rtx code associated with the operation. */ ++ rtx_code m_code; ++}; ++ ++/* A function_base for svwhile* functions. */ ++class while_comparison : public function_base ++{ ++public: ++ CONSTEXPR while_comparison (int unspec_for_sint, int unspec_for_uint) ++ : m_unspec_for_sint (unspec_for_sint), ++ m_unspec_for_uint (unspec_for_uint) ++ {} ++ ++ rtx ++ expand (function_expander &e) const OVERRIDE ++ { ++ /* Suffix 0 determines the predicate mode, suffix 1 determines the ++ scalar mode and signedness. */ ++ int unspec = (e.type_suffix (1).unsigned_p ++ ? m_unspec_for_uint ++ : m_unspec_for_sint); ++ machine_mode pred_mode = e.vector_mode (0); ++ scalar_mode reg_mode = GET_MODE_INNER (e.vector_mode (1)); ++ return e.use_exact_insn (code_for_while (unspec, reg_mode, pred_mode)); ++ } ++ ++ /* The unspec codes associated with signed and unsigned operations ++ respectively. */ ++ int m_unspec_for_sint; ++ int m_unspec_for_uint; ++}; ++ ++} ++ ++/* Declare the global function base NAME, creating it from an instance ++ of class CLASS with constructor arguments ARGS. */ ++#define FUNCTION(NAME, CLASS, ARGS) \ ++ namespace { static CONSTEXPR const CLASS NAME##_obj ARGS; } \ ++ namespace functions { const function_base *const NAME = &NAME##_obj; } ++ ++#endif +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc +new file mode 100644 +index 000000000..c6f6ce170 +--- /dev/null ++++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc +@@ -0,0 +1,3451 @@ ++/* ACLE support for AArch64 SVE (function shapes) ++ Copyright (C) 2018-2019 Free Software Foundation, Inc. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ GCC is distributed in the hope that it will be useful, but ++ WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING3. If not see ++ . */ ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "tm.h" ++#include "tree.h" ++#include "rtl.h" ++#include "tm_p.h" ++#include "memmodel.h" ++#include "insn-codes.h" ++#include "optabs.h" ++#include "aarch64-sve-builtins.h" ++#include "aarch64-sve-builtins-shapes.h" ++ ++/* In the comments below, _t0 represents the first type suffix and _t1 ++ represents the second. Square brackets enclose characters that are ++ present in only the full name, not the overloaded name. Governing ++ predicate arguments and predicate suffixes are not shown, since they ++ depend on the predication type, which is a separate piece of ++ information from the shape. ++ ++ Non-overloaded functions may have additional suffixes beyond the ++ ones shown, if those suffixes don't affect the types in the type ++ signature. E.g. the predicate form of svtrn1 has a _b suffix, ++ but this does not affect the prototype, which is always ++ "svbool_t(svbool_t, svbool_t)". */ ++ ++namespace aarch64_sve { ++ ++/* Return a representation of "const T *". */ ++static tree ++build_const_pointer (tree t) ++{ ++ return build_pointer_type (build_qualified_type (t, TYPE_QUAL_CONST)); ++} ++ ++/* If INSTANCE has a governing predicate, add it to the list of argument ++ types in ARGUMENT_TYPES. RETURN_TYPE is the type returned by the ++ function. */ ++static void ++apply_predication (const function_instance &instance, tree return_type, ++ vec &argument_types) ++{ ++ if (instance.pred != PRED_none) ++ { ++ argument_types.quick_insert (0, get_svbool_t ()); ++ /* For unary merge operations, the first argument is a vector with ++ the same type as the result. For unary_convert_narrowt it also ++ provides the "bottom" half of active elements, and is present ++ for all types of predication. */ ++ if ((argument_types.length () == 2 && instance.pred == PRED_m) ++ || instance.shape == shapes::unary_convert_narrowt) ++ argument_types.quick_insert (0, return_type); ++ } ++} ++ ++/* Parse and move past an element type in FORMAT and return it as a type ++ suffix. The format is: ++ ++ [01] - the element type in type suffix 0 or 1 of INSTANCE ++ f - a floating-point type with the given number of bits ++ f[01] - a floating-point type with the same width as type suffix 0 or 1 ++ B - bfloat16_t ++ h - a half-sized version of ++ p - a predicate (represented as TYPE_SUFFIX_b) ++ q - a quarter-sized version of ++ s - a signed type with the given number of bits ++ s[01] - a signed type with the same width as type suffix 0 or 1 ++ u - an unsigned type with the given number of bits ++ u[01] - an unsigned type with the same width as type suffix 0 or 1 ++ w - a 64-bit version of if is integral, otherwise ++ ++ where is another element type. */ ++static type_suffix_index ++parse_element_type (const function_instance &instance, const char *&format) ++{ ++ int ch = *format++; ++ ++ if (ch == 'f' || ch == 's' || ch == 'u') ++ { ++ type_class_index tclass = (ch == 'f' ? TYPE_float ++ : ch == 's' ? TYPE_signed ++ : TYPE_unsigned); ++ char *end; ++ unsigned int bits = strtol (format, &end, 10); ++ format = end; ++ if (bits == 0 || bits == 1) ++ bits = instance.type_suffix (bits).element_bits; ++ return find_type_suffix (tclass, bits); ++ } ++ ++ if (ch == 'w') ++ { ++ type_suffix_index suffix = parse_element_type (instance, format); ++ if (type_suffixes[suffix].integer_p) ++ return find_type_suffix (type_suffixes[suffix].tclass, 64); ++ return suffix; ++ } ++ ++ if (ch == 'p') ++ return TYPE_SUFFIX_b; ++ ++ if (ch == 'B') ++ return TYPE_SUFFIX_bf16; ++ ++ if (ch == 'q') ++ { ++ type_suffix_index suffix = parse_element_type (instance, format); ++ return find_type_suffix (type_suffixes[suffix].tclass, ++ type_suffixes[suffix].element_bits / 4); ++ } ++ ++ if (ch == 'h') ++ { ++ type_suffix_index suffix = parse_element_type (instance, format); ++ /* Widening and narrowing doesn't change the type for predicates; ++ everything's still an svbool_t. */ ++ if (suffix == TYPE_SUFFIX_b) ++ return suffix; ++ return find_type_suffix (type_suffixes[suffix].tclass, ++ type_suffixes[suffix].element_bits / 2); ++ } ++ ++ if (ch == '0' || ch == '1') ++ return instance.type_suffix_ids[ch - '0']; ++ ++ gcc_unreachable (); ++} ++ ++/* Read and return a type from FORMAT for function INSTANCE. Advance ++ FORMAT beyond the type string. The format is: ++ ++ _ - void ++ al - array pointer for loads ++ ap - array pointer for prefetches ++ as - array pointer for stores ++ b - base vector type (from a _base suffix) ++ d - displacement vector type (from a _index or _offset suffix) ++ e - an enum with the given name ++ s - a scalar type with the given element suffix ++ t - a vector or tuple type with given element suffix [*1] ++ v - a vector with the given element suffix ++ ++ where has the format described above parse_element_type ++ ++ [*1] the vectors_per_tuple function indicates whether the type should ++ be a tuple, and if so, how many vectors it should contain. */ ++static tree ++parse_type (const function_instance &instance, const char *&format) ++{ ++ int ch = *format++; ++ ++ if (ch == '_') ++ return void_type_node; ++ ++ if (ch == 'a') ++ { ++ ch = *format++; ++ if (ch == 'l') ++ return build_const_pointer (instance.memory_scalar_type ()); ++ if (ch == 'p') ++ return const_ptr_type_node; ++ if (ch == 's') ++ return build_pointer_type (instance.memory_scalar_type ()); ++ gcc_unreachable (); ++ } ++ ++ if (ch == 'b') ++ return instance.base_vector_type (); ++ ++ if (ch == 'd') ++ return instance.displacement_vector_type (); ++ ++ if (ch == 'e') ++ { ++ if (strncmp (format, "pattern", 7) == 0) ++ { ++ format += 7; ++ return acle_svpattern; ++ } ++ if (strncmp (format, "prfop", 5) == 0) ++ { ++ format += 5; ++ return acle_svprfop; ++ } ++ gcc_unreachable (); ++ } ++ ++ if (ch == 's') ++ { ++ type_suffix_index suffix = parse_element_type (instance, format); ++ return scalar_types[type_suffixes[suffix].vector_type]; ++ } ++ ++ if (ch == 't') ++ { ++ type_suffix_index suffix = parse_element_type (instance, format); ++ vector_type_index vector_type = type_suffixes[suffix].vector_type; ++ unsigned int num_vectors = instance.vectors_per_tuple (); ++ return acle_vector_types[num_vectors - 1][vector_type]; ++ } ++ ++ if (ch == 'v') ++ { ++ type_suffix_index suffix = parse_element_type (instance, format); ++ return acle_vector_types[0][type_suffixes[suffix].vector_type]; ++ } ++ ++ gcc_unreachable (); ++} ++ ++/* Read and move past any argument count at FORMAT for the function ++ signature of INSTANCE. The counts are: ++ ++ *q: one argument per element in a 128-bit quadword (as for svdupq) ++ *t: one argument per vector in a tuple (as for svcreate) ++ ++ Otherwise the count is 1. */ ++static unsigned int ++parse_count (const function_instance &instance, const char *&format) ++{ ++ if (format[0] == '*' && format[1] == 'q') ++ { ++ format += 2; ++ return instance.elements_per_vq (0); ++ } ++ if (format[0] == '*' && format[1] == 't') ++ { ++ format += 2; ++ return instance.vectors_per_tuple (); ++ } ++ return 1; ++} ++ ++/* Read a type signature for INSTANCE from FORMAT. Add the argument types ++ to ARGUMENT_TYPES and return the return type. ++ ++ The format is a comma-separated list of types (as for parse_type), ++ with the first type being the return type and the rest being the ++ argument types. Each argument type can be followed by an optional ++ count (as for parse_count). */ ++static tree ++parse_signature (const function_instance &instance, const char *format, ++ vec &argument_types) ++{ ++ tree return_type = parse_type (instance, format); ++ while (format[0] == ',') ++ { ++ format += 1; ++ tree argument_type = parse_type (instance, format); ++ unsigned int count = parse_count (instance, format); ++ for (unsigned int i = 0; i < count; ++i) ++ argument_types.quick_push (argument_type); ++ } ++ gcc_assert (format[0] == 0); ++ return return_type; ++} ++ ++/* Add one function instance for GROUP, using mode suffix MODE_SUFFIX_ID, ++ the type suffixes at index TI and the predication suffix at index PI. ++ The other arguments are as for build_all. */ ++static void ++build_one (function_builder &b, const char *signature, ++ const function_group_info &group, mode_suffix_index mode_suffix_id, ++ unsigned int ti, unsigned int pi, bool force_direct_overloads) ++{ ++ /* Byte forms of svdupq take 16 arguments. */ ++ auto_vec argument_types; ++ function_instance instance (group.base_name, *group.base, *group.shape, ++ mode_suffix_id, group.types[ti], ++ group.preds[pi]); ++ tree return_type = parse_signature (instance, signature, argument_types); ++ apply_predication (instance, return_type, argument_types); ++ b.add_unique_function (instance, return_type, argument_types, ++ group.required_extensions, force_direct_overloads); ++} ++ ++/* GROUP describes some sort of gather or scatter operation. There are ++ two cases: ++ ++ - If the function has any type suffixes (as for loads and stores), the ++ first function type suffix specifies either a 32-bit or a 64-bit type, ++ which in turn selects either MODE32 or MODE64 as the addressing mode. ++ Add a function instance for every type and predicate combination ++ in GROUP for which the associated addressing mode is not MODE_none. ++ ++ - If the function has no type suffixes (as for prefetches), add one ++ MODE32 form and one MODE64 form for each predication type. ++ ++ The other arguments are as for build_all. */ ++static void ++build_32_64 (function_builder &b, const char *signature, ++ const function_group_info &group, mode_suffix_index mode32, ++ mode_suffix_index mode64, bool force_direct_overloads = false) ++{ ++ for (unsigned int pi = 0; group.preds[pi] != NUM_PREDS; ++pi) ++ if (group.types[0][0] == NUM_TYPE_SUFFIXES) ++ { ++ gcc_assert (mode32 != MODE_none && mode64 != MODE_none); ++ build_one (b, signature, group, mode32, 0, pi, ++ force_direct_overloads); ++ build_one (b, signature, group, mode64, 0, pi, ++ force_direct_overloads); ++ } ++ else ++ for (unsigned int ti = 0; group.types[ti][0] != NUM_TYPE_SUFFIXES; ++ti) ++ { ++ unsigned int bits = type_suffixes[group.types[ti][0]].element_bits; ++ gcc_assert (bits == 32 || bits == 64); ++ mode_suffix_index mode = bits == 32 ? mode32 : mode64; ++ if (mode != MODE_none) ++ build_one (b, signature, group, mode, ti, pi, ++ force_direct_overloads); ++ } ++} ++ ++/* For every type and predicate combination in GROUP, add one function ++ that takes a scalar (pointer) base and a signed vector array index, ++ and another that instead takes an unsigned vector array index. ++ The vector array index has the same element size as the first ++ function type suffix. SIGNATURE is as for build_all. */ ++static void ++build_sv_index (function_builder &b, const char *signature, ++ const function_group_info &group) ++{ ++ build_32_64 (b, signature, group, MODE_s32index, MODE_s64index); ++ build_32_64 (b, signature, group, MODE_u32index, MODE_u64index); ++} ++ ++/* Like build_sv_index, but only handle 64-bit types. */ ++static void ++build_sv_index64 (function_builder &b, const char *signature, ++ const function_group_info &group) ++{ ++ build_32_64 (b, signature, group, MODE_none, MODE_s64index); ++ build_32_64 (b, signature, group, MODE_none, MODE_u64index); ++} ++ ++/* Like build_sv_index, but taking vector byte offsets instead of vector ++ array indices. */ ++static void ++build_sv_offset (function_builder &b, const char *signature, ++ const function_group_info &group) ++{ ++ build_32_64 (b, signature, group, MODE_s32offset, MODE_s64offset); ++ build_32_64 (b, signature, group, MODE_u32offset, MODE_u64offset); ++} ++ ++/* Like build_sv_offset, but exclude offsets that must be interpreted ++ as signed (i.e. s32offset). */ ++static void ++build_sv_uint_offset (function_builder &b, const char *signature, ++ const function_group_info &group) ++{ ++ build_32_64 (b, signature, group, MODE_none, MODE_s64offset); ++ build_32_64 (b, signature, group, MODE_u32offset, MODE_u64offset); ++} ++ ++/* For every type and predicate combination in GROUP, add a function ++ that takes a vector base address and no displacement. The vector ++ base has the same element size as the first type suffix. ++ ++ The other arguments are as for build_all. */ ++static void ++build_v_base (function_builder &b, const char *signature, ++ const function_group_info &group, ++ bool force_direct_overloads = false) ++{ ++ build_32_64 (b, signature, group, MODE_u32base, MODE_u64base, ++ force_direct_overloads); ++} ++ ++/* Like build_v_base, but for functions that also take a scalar array ++ index. */ ++static void ++build_vs_index (function_builder &b, const char *signature, ++ const function_group_info &group, ++ bool force_direct_overloads = false) ++{ ++ build_32_64 (b, signature, group, MODE_u32base_index, MODE_u64base_index, ++ force_direct_overloads); ++} ++ ++/* Like build_v_base, but for functions that also take a scalar byte ++ offset. */ ++static void ++build_vs_offset (function_builder &b, const char *signature, ++ const function_group_info &group, ++ bool force_direct_overloads = false) ++{ ++ build_32_64 (b, signature, group, MODE_u32base_offset, MODE_u64base_offset, ++ force_direct_overloads); ++} ++ ++/* Add a function instance for every type and predicate combination ++ in GROUP. Take the function base name from GROUP and the mode suffix ++ from MODE_SUFFIX_ID. Use SIGNATURE to construct the function signature ++ without a governing predicate, then use apply_predication to add in the ++ predicate. FORCE_DIRECT_OVERLOADS is true if there is a one-to-one ++ mapping between "short" and "full" names, and if standard overload ++ resolution therefore isn't necessary. */ ++static void ++build_all (function_builder &b, const char *signature, ++ const function_group_info &group, mode_suffix_index mode_suffix_id, ++ bool force_direct_overloads = false) ++{ ++ for (unsigned int pi = 0; group.preds[pi] != NUM_PREDS; ++pi) ++ for (unsigned int ti = 0; ++ ti == 0 || group.types[ti][0] != NUM_TYPE_SUFFIXES; ++ti) ++ build_one (b, signature, group, mode_suffix_id, ti, pi, ++ force_direct_overloads); ++} ++ ++/* TYPE is the largest type suffix associated with the arguments of R, ++ but the result is twice as wide. Return the associated type suffix ++ if it exists, otherwise report an appropriate error and return ++ NUM_TYPE_SUFFIXES. */ ++static type_suffix_index ++long_type_suffix (function_resolver &r, type_suffix_index type) ++{ ++ unsigned int element_bits = type_suffixes[type].element_bits; ++ if (type_suffixes[type].integer_p && element_bits < 64) ++ return find_type_suffix (type_suffixes[type].tclass, element_bits * 2); ++ ++ r.report_no_such_form (type); ++ return NUM_TYPE_SUFFIXES; ++} ++ ++/* Declare the function shape NAME, pointing it to an instance ++ of class _def. */ ++#define SHAPE(NAME) \ ++ static CONSTEXPR const NAME##_def NAME##_obj; \ ++ namespace shapes { const function_shape *const NAME = &NAME##_obj; } ++ ++/* Base class for functions that are not overloaded. */ ++struct nonoverloaded_base : public function_shape ++{ ++ bool ++ explicit_type_suffix_p (unsigned int) const OVERRIDE ++ { ++ return true; ++ } ++ ++ tree ++ resolve (function_resolver &) const OVERRIDE ++ { ++ gcc_unreachable (); ++ } ++}; ++ ++/* Base class for overloaded functions. Bit N of EXPLICIT_MASK is true ++ if type suffix N appears in the overloaded name. */ ++template ++struct overloaded_base : public function_shape ++{ ++ bool ++ explicit_type_suffix_p (unsigned int i) const OVERRIDE ++ { ++ return (EXPLICIT_MASK >> i) & 1; ++ } ++}; ++ ++/* Base class for adr_index and adr_offset. */ ++struct adr_base : public overloaded_base<0> ++{ ++ /* The function takes two arguments: a vector base and a vector displacement ++ (either an index or an offset). Resolve based on them both. */ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ unsigned int i, nargs; ++ mode_suffix_index mode; ++ if (!r.check_gp_argument (2, i, nargs) ++ || (mode = r.resolve_adr_address (0)) == MODE_none) ++ return error_mark_node; ++ ++ return r.resolve_to (mode); ++ }; ++}; ++ ++/* Base class for narrowing bottom binary functions that take an ++ immediate second operand. The result is half the size of input ++ and has class CLASS. */ ++template ++struct binary_imm_narrowb_base : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_n); ++ STATIC_ASSERT (CLASS == function_resolver::SAME_TYPE_CLASS ++ || CLASS == TYPE_unsigned); ++ if (CLASS == TYPE_unsigned) ++ build_all (b, "vhu0,v0,su64", group, MODE_n); ++ else ++ build_all (b, "vh0,v0,su64", group, MODE_n); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ return r.resolve_uniform (1, 1); ++ } ++}; ++ ++/* The top equivalent of binary_imm_narrowb_base. It takes three arguments, ++ with the first being the values of the even elements, which are typically ++ the result of the narrowb operation. */ ++template ++struct binary_imm_narrowt_base : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_n); ++ STATIC_ASSERT (CLASS == function_resolver::SAME_TYPE_CLASS ++ || CLASS == TYPE_unsigned); ++ if (CLASS == TYPE_unsigned) ++ build_all (b, "vhu0,vhu0,v0,su64", group, MODE_n); ++ else ++ build_all (b, "vh0,vh0,v0,su64", group, MODE_n); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ unsigned int i, nargs; ++ type_suffix_index type; ++ if (!r.check_gp_argument (3, i, nargs) ++ || (type = r.infer_vector_type (i + 1)) == NUM_TYPE_SUFFIXES ++ || !r.require_derived_vector_type (i, i + 1, type, CLASS, r.HALF_SIZE) ++ || !r.require_integer_immediate (i + 2)) ++ return error_mark_node; ++ ++ return r.resolve_to (r.mode_suffix_id, type); ++ } ++}; ++ ++/* Base class for long (i.e. narrow op narrow -> wide) binary functions ++ that take an immediate second operand. The type suffix specifies ++ the wider type. */ ++struct binary_imm_long_base : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_n); ++ build_all (b, "v0,vh0,su64", group, MODE_n); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ unsigned int i, nargs; ++ type_suffix_index type, result_type; ++ if (!r.check_gp_argument (2, i, nargs) ++ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES ++ || !r.require_integer_immediate (i + 1) ++ || (result_type = long_type_suffix (r, type)) == NUM_TYPE_SUFFIXES) ++ return error_mark_node; ++ ++ if (tree res = r.lookup_form (r.mode_suffix_id, result_type)) ++ return res; ++ ++ return r.report_no_such_form (type); ++ } ++}; ++ ++/* Base class for inc_dec and inc_dec_pat. */ ++struct inc_dec_base : public overloaded_base<0> ++{ ++ CONSTEXPR inc_dec_base (bool pat_p) : m_pat_p (pat_p) {} ++ ++ /* Resolve based on the first argument only, which must be either a ++ scalar or a vector. If it's a scalar, it must be a 32-bit or ++ 64-bit integer. */ ++ tree ++ resolve (function_resolver &r) const ++ { ++ unsigned int i, nargs; ++ if (!r.check_gp_argument (m_pat_p ? 3 : 2, i, nargs) ++ || !r.require_vector_or_scalar_type (i)) ++ return error_mark_node; ++ ++ mode_suffix_index mode; ++ type_suffix_index type; ++ if (r.scalar_argument_p (i)) ++ { ++ mode = MODE_n; ++ type = r.infer_integer_scalar_type (i); ++ } ++ else ++ { ++ mode = MODE_none; ++ type = r.infer_vector_type (i); ++ } ++ if (type == NUM_TYPE_SUFFIXES) ++ return error_mark_node; ++ ++ for (++i; i < nargs; ++i) ++ if (!r.require_integer_immediate (i)) ++ return error_mark_node; ++ ++ return r.resolve_to (mode, type); ++ } ++ ++ bool ++ check (function_checker &c) const OVERRIDE ++ { ++ return c.require_immediate_range (m_pat_p ? 2 : 1, 1, 16); ++ } ++ ++ bool m_pat_p; ++}; ++ ++/* Base class for load and load_replicate. */ ++struct load_contiguous_base : public overloaded_base<0> ++{ ++ /* Resolve a call based purely on a pointer argument. The other arguments ++ are a governing predicate and (for MODE_vnum) a vnum offset. */ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ bool vnum_p = r.mode_suffix_id == MODE_vnum; ++ gcc_assert (r.mode_suffix_id == MODE_none || vnum_p); ++ ++ unsigned int i, nargs; ++ type_suffix_index type; ++ if (!r.check_gp_argument (vnum_p ? 2 : 1, i, nargs) ++ || (type = r.infer_pointer_type (i)) == NUM_TYPE_SUFFIXES ++ || (vnum_p && !r.require_scalar_type (i + 1, "int64_t"))) ++ return error_mark_node; ++ ++ return r.resolve_to (r.mode_suffix_id, type); ++ } ++}; ++ ++/* Base class for gather loads that take a scalar base and a vector ++ displacement (either an offset or an index). */ ++struct load_gather_sv_base : public overloaded_base<0> ++{ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ unsigned int i, nargs; ++ mode_suffix_index mode; ++ type_suffix_index type; ++ if (!r.check_gp_argument (2, i, nargs) ++ || (type = r.infer_pointer_type (i, true)) == NUM_TYPE_SUFFIXES ++ || (mode = r.resolve_sv_displacement (i + 1, type, true), ++ mode == MODE_none)) ++ return error_mark_node; ++ ++ return r.resolve_to (mode, type); ++ } ++}; ++ ++/* Base class for load_ext_gather_index and load_ext_gather_offset, ++ which differ only in the units of the displacement. */ ++struct load_ext_gather_base : public overloaded_base<1> ++{ ++ /* Resolve a gather load that takes one of: ++ ++ - a scalar pointer base and a vector displacement ++ - a vector base with no displacement or ++ - a vector base and a scalar displacement ++ ++ The function has an explicit type suffix that determines the type ++ of the loaded data. */ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ /* No resolution is needed for a vector base with no displacement; ++ there's a one-to-one mapping between short and long names. */ ++ gcc_assert (r.displacement_units () != UNITS_none); ++ ++ type_suffix_index type = r.type_suffix_ids[0]; ++ ++ unsigned int i, nargs; ++ mode_suffix_index mode; ++ if (!r.check_gp_argument (2, i, nargs) ++ || (mode = r.resolve_gather_address (i, type, true)) == MODE_none) ++ return error_mark_node; ++ ++ return r.resolve_to (mode, type); ++ } ++}; ++ ++/* sv_t svfoo[_t0](sv_t, sv_t, ++ sv_t) (for integer t0) ++ sv_t svmmla[_t0](sv_t, sv_t, sv_t) (for floating-point t0) ++ ++ The functions act like the equivalent of "ternary_qq" for integer elements ++ and normal vector-only ternary functions for floating-point elements. */ ++struct mmla_def : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ /* svmmla is distributed over several extensions. Allow the common ++ denominator to define the overloaded svmmla function without ++ defining any specific versions. */ ++ if (group.types[0][0] != NUM_TYPE_SUFFIXES) ++ { ++ if (type_suffixes[group.types[0][0]].float_p) ++ build_all (b, "v0,v0,v0,v0", group, MODE_none); ++ else ++ build_all (b, "v0,v0,vq0,vq0", group, MODE_none); ++ } ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ unsigned int i, nargs; ++ type_suffix_index type; ++ if (!r.check_gp_argument (3, i, nargs) ++ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES) ++ return error_mark_node; ++ ++ /* Make sure that the function exists now, since not all forms ++ follow a set pattern after this point. */ ++ tree res = r.resolve_to (r.mode_suffix_id, type); ++ if (res == error_mark_node) ++ return res; ++ ++ bool float_p = type_suffixes[type].float_p; ++ unsigned int modifier = float_p ? r.SAME_SIZE : r.QUARTER_SIZE; ++ if (!r.require_derived_vector_type (i + 1, i, type, r.SAME_TYPE_CLASS, ++ modifier) ++ || !r.require_derived_vector_type (i + 2, i, type, r.SAME_TYPE_CLASS, ++ modifier)) ++ return error_mark_node; ++ ++ return res; ++ } ++}; ++SHAPE (mmla) ++ ++/* Base class for prefetch_gather_index and prefetch_gather_offset, ++ which differ only in the units of the displacement. */ ++struct prefetch_gather_base : public overloaded_base<0> ++{ ++ /* Resolve a gather prefetch that takes one of: ++ ++ - a scalar pointer base (const void *) and a vector displacement ++ - a vector base with no displacement or ++ - a vector base and a scalar displacement ++ ++ The prefetch operation is the final argument. This is purely a ++ mode-based resolution; there are no type suffixes. */ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ bool has_displacement_p = r.displacement_units () != UNITS_none; ++ ++ unsigned int i, nargs; ++ mode_suffix_index mode; ++ if (!r.check_gp_argument (has_displacement_p ? 3 : 2, i, nargs) ++ || (mode = r.resolve_gather_address (i, NUM_TYPE_SUFFIXES, ++ false)) == MODE_none ++ || !r.require_integer_immediate (nargs - 1)) ++ return error_mark_node; ++ ++ return r.resolve_to (mode); ++ } ++}; ++ ++/* Wraps BASE to provide a narrowing shift right function. Argument N ++ is an immediate shift amount in the range [1, sizeof(_t) * 4]. */ ++template ++struct shift_right_imm_narrow_wrapper : public BASE ++{ ++ bool ++ check (function_checker &c) const OVERRIDE ++ { ++ unsigned int bits = c.type_suffix (0).element_bits / 2; ++ return c.require_immediate_range (N, 1, bits); ++ } ++}; ++ ++/* Base class for store_scatter_index and store_scatter_offset, ++ which differ only in the units of the displacement. */ ++struct store_scatter_base : public overloaded_base<0> ++{ ++ /* Resolve a scatter store that takes one of: ++ ++ - a scalar pointer base and a vector displacement ++ - a vector base with no displacement or ++ - a vector base and a scalar displacement ++ ++ The stored data is the final argument, and it determines the ++ type suffix. */ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ bool has_displacement_p = r.displacement_units () != UNITS_none; ++ ++ unsigned int i, nargs; ++ mode_suffix_index mode; ++ type_suffix_index type; ++ if (!r.check_gp_argument (has_displacement_p ? 3 : 2, i, nargs) ++ || (type = r.infer_sd_vector_type (nargs - 1)) == NUM_TYPE_SUFFIXES ++ || (mode = r.resolve_gather_address (i, type, false)) == MODE_none) ++ return error_mark_node; ++ ++ return r.resolve_to (mode, type); ++ } ++}; ++ ++/* Base class for ternary operations in which the final argument is an ++ immediate shift amount. The derived class should check the range. */ ++struct ternary_shift_imm_base : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_n); ++ build_all (b, "v0,v0,v0,su64", group, MODE_n); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ return r.resolve_uniform (2, 1); ++ } ++}; ++ ++/* Base class for ternary operations in which the first argument has the ++ same element type as the result, and in which the second and third ++ arguments have an element type that is derived the first. ++ ++ MODIFIER is the number of element bits in the second and third ++ arguments, or a function_resolver modifier that says how this ++ precision is derived from the first argument's elements. ++ ++ TYPE_CLASS2 and TYPE_CLASS3 are the type classes of the second and ++ third arguments, or function_resolver::SAME_TYPE_CLASS if the type ++ class is the same as the first argument. */ ++template ++struct ternary_resize2_opt_n_base : public overloaded_base<0> ++{ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ unsigned int i, nargs; ++ type_suffix_index type; ++ if (!r.check_gp_argument (3, i, nargs) ++ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES ++ || !r.require_derived_vector_type (i + 1, i, type, TYPE_CLASS2, ++ MODIFIER)) ++ return error_mark_node; ++ ++ return r.finish_opt_n_resolution (i + 2, i, type, TYPE_CLASS3, MODIFIER); ++ } ++}; ++ ++/* Like ternary_resize2_opt_n_base, but for functions that don't take ++ a final scalar argument. */ ++template ++struct ternary_resize2_base : public overloaded_base<0> ++{ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ unsigned int i, nargs; ++ type_suffix_index type; ++ if (!r.check_gp_argument (3, i, nargs) ++ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES ++ || !r.require_derived_vector_type (i + 1, i, type, TYPE_CLASS2, ++ MODIFIER) ++ || !r.require_derived_vector_type (i + 2, i, type, TYPE_CLASS3, ++ MODIFIER)) ++ return error_mark_node; ++ ++ return r.resolve_to (r.mode_suffix_id, type); ++ } ++}; ++ ++/* Like ternary_resize2_opt_n_base, but for functions that take a final ++ lane argument. */ ++template ++struct ternary_resize2_lane_base : public overloaded_base<0> ++{ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ unsigned int i, nargs; ++ type_suffix_index type; ++ if (!r.check_gp_argument (4, i, nargs) ++ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES ++ || !r.require_derived_vector_type (i + 1, i, type, TYPE_CLASS2, ++ MODIFIER) ++ || !r.require_derived_vector_type (i + 2, i, type, TYPE_CLASS3, ++ MODIFIER) ++ || !r.require_integer_immediate (i + 3)) ++ return error_mark_node; ++ ++ return r.resolve_to (r.mode_suffix_id, type); ++ } ++}; ++ ++/* A specialization of ternary_resize2_lane_base for bfloat16 elements, ++ indexed in groups of N elements. */ ++template ++struct ternary_bfloat_lane_base ++ : public ternary_resize2_lane_base<16, TYPE_bfloat, TYPE_bfloat> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "v0,v0,vB,vB,su64", group, MODE_none); ++ } ++ ++ bool ++ check (function_checker &c) const OVERRIDE ++ { ++ return c.require_immediate_lane_index (3, N); ++ } ++}; ++ ++/* A specialization of ternary_resize2_lane_base for quarter-sized ++ elements. */ ++template ++struct ternary_qq_lane_base ++ : public ternary_resize2_lane_base ++{ ++ bool ++ check (function_checker &c) const OVERRIDE ++ { ++ return c.require_immediate_lane_index (3, 4); ++ } ++}; ++ ++/* Base class for narrowing bottom unary functions. The result is half ++ the size of input and has class CLASS. */ ++template ++struct unary_narrowb_base : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ STATIC_ASSERT (CLASS == function_resolver::SAME_TYPE_CLASS ++ || CLASS == TYPE_unsigned); ++ if (CLASS == TYPE_unsigned) ++ build_all (b, "vhu0,v0", group, MODE_none); ++ else ++ build_all (b, "vh0,v0", group, MODE_none); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ return r.resolve_unary (CLASS, r.HALF_SIZE); ++ } ++}; ++ ++/* The top equivalent of unary_imm_narrowb_base. All forms take the values ++ of the even elements as an extra argument, before any governing predicate. ++ These even elements are typically the result of the narrowb operation. */ ++template ++struct unary_narrowt_base : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ STATIC_ASSERT (CLASS == function_resolver::SAME_TYPE_CLASS ++ || CLASS == TYPE_unsigned); ++ if (CLASS == TYPE_unsigned) ++ build_all (b, "vhu0,vhu0,v0", group, MODE_none); ++ else ++ build_all (b, "vh0,vh0,v0", group, MODE_none); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ unsigned int i, nargs; ++ type_suffix_index type; ++ if (!r.check_gp_argument (2, i, nargs) ++ || (type = r.infer_vector_type (i + 1)) == NUM_TYPE_SUFFIXES ++ || !r.require_derived_vector_type (i, i + 1, type, CLASS, r.HALF_SIZE)) ++ return error_mark_node; ++ ++ return r.resolve_to (r.mode_suffix_id, type); ++ } ++}; ++ ++/* sv_t svfoo[_m0base]_[m1]index(sv_t, sv_t) ++ ++ for all valid combinations of vector base type and vector ++ displacement type . */ ++struct adr_index_def : public adr_base ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_index); ++ build_all (b, "b,b,d", group, MODE_u32base_s32index); ++ build_all (b, "b,b,d", group, MODE_u32base_u32index); ++ build_all (b, "b,b,d", group, MODE_u64base_s64index); ++ build_all (b, "b,b,d", group, MODE_u64base_u64index); ++ } ++}; ++SHAPE (adr_index) ++ ++/* sv_t svfoo[_m0base]_[m1]offset(sv_t, sv_t). ++ ++ for all valid combinations of vector base type and vector ++ displacement type . */ ++struct adr_offset_def : public adr_base ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_offset); ++ build_all (b, "b,b,d", group, MODE_u32base_s32offset); ++ build_all (b, "b,b,d", group, MODE_u32base_u32offset); ++ build_all (b, "b,b,d", group, MODE_u64base_s64offset); ++ build_all (b, "b,b,d", group, MODE_u64base_u64offset); ++ } ++}; ++SHAPE (adr_offset) ++ ++/* sv_t svfoo[_t0](sv_t, sv_t) ++ ++ i.e. a binary operation with uniform types, but with no scalar form. */ ++struct binary_def : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "v0,v0,v0", group, MODE_none); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ return r.resolve_uniform (2); ++ } ++}; ++SHAPE (binary) ++ ++/* sv_t svfoo[_t0](sv_t, sv_t) ++ sv_t svfoo[_n_t0](sv_t, _t). ++ ++ i.e. a version of the standard binary shape binary_opt_n in which ++ the final argument is always a signed integer. */ ++struct binary_int_opt_n_def : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "v0,v0,vs0", group, MODE_none); ++ build_all (b, "v0,v0,ss0", group, MODE_n); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ unsigned int i, nargs; ++ type_suffix_index type; ++ if (!r.check_gp_argument (2, i, nargs) ++ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES) ++ return error_mark_node; ++ ++ return r.finish_opt_n_resolution (i + 1, i, type, TYPE_signed); ++ } ++}; ++SHAPE (binary_int_opt_n) ++ ++/* sv_t svfoo_(sv_t, sv_t, uint64_t) ++ ++ where the final argument is an integer constant expression in the ++ range [0, 16 / sizeof (_t) - 1]. */ ++struct binary_lane_def : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "v0,v0,v0,su64", group, MODE_none); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ return r.resolve_uniform (2, 1); ++ } ++ ++ bool ++ check (function_checker &c) const OVERRIDE ++ { ++ return c.require_immediate_lane_index (2); ++ } ++}; ++SHAPE (binary_lane) ++ ++/* sv_t svfoo[_t0](sv_t, sv_t, uint64_t). ++ ++ where the final argument is an integer constant expression in the ++ range [0, 32 / sizeof (_t) - 1]. */ ++struct binary_long_lane_def : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "v0,vh0,vh0,su64", group, MODE_none); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ unsigned int i, nargs; ++ type_suffix_index type, result_type; ++ if (!r.check_gp_argument (3, i, nargs) ++ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES ++ || !r.require_matching_vector_type (i + 1, type) ++ || !r.require_integer_immediate (i + 2) ++ || (result_type = long_type_suffix (r, type)) == NUM_TYPE_SUFFIXES) ++ return error_mark_node; ++ ++ if (tree res = r.lookup_form (r.mode_suffix_id, result_type)) ++ return res; ++ ++ return r.report_no_such_form (type); ++ } ++ ++ bool ++ check (function_checker &c) const OVERRIDE ++ { ++ return c.require_immediate_lane_index (2); ++ } ++}; ++SHAPE (binary_long_lane) ++ ++/* sv_t svfoo[_t0](sv_t, sv_t) ++ sv_t svfoo[_n_t0](sv_t, _t). */ ++struct binary_long_opt_n_def : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "v0,vh0,vh0", group, MODE_none); ++ build_all (b, "v0,vh0,sh0", group, MODE_n); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ unsigned int i, nargs; ++ type_suffix_index type, result_type; ++ if (!r.check_gp_argument (2, i, nargs) ++ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES ++ || (result_type = long_type_suffix (r, type)) == NUM_TYPE_SUFFIXES) ++ return error_mark_node; ++ ++ return r.finish_opt_n_resolution (i + 1, i, type, r.SAME_TYPE_CLASS, ++ r.SAME_SIZE, result_type); ++ } ++}; ++SHAPE (binary_long_opt_n) ++ ++/* sv_t svfoo[_n_t0](sv_t, _t). ++ ++ i.e. a binary operation in which the final argument is always a scalar ++ rather than a vector. */ ++struct binary_n_def : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_n); ++ build_all (b, "v0,v0,s0", group, MODE_n); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ unsigned int i, nargs; ++ type_suffix_index type; ++ if (!r.check_gp_argument (2, i, nargs) ++ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES ++ || !r.require_derived_scalar_type (i + 1, r.SAME_TYPE_CLASS)) ++ return error_mark_node; ++ ++ return r.resolve_to (r.mode_suffix_id, type); ++ } ++}; ++SHAPE (binary_n) ++ ++/* sv_t svfoo[_t0](sv_t, sv_t) ++ sv_t svfoo[_n_t0](sv_t, _t) ++ ++ i.e. a version of binary_opt_n in which the output elements are half the ++ width of the input elements. */ ++struct binary_narrowb_opt_n_def : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "vh0,v0,v0", group, MODE_none); ++ build_all (b, "vh0,v0,s0", group, MODE_n); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ return r.resolve_uniform_opt_n (2); ++ } ++}; ++SHAPE (binary_narrowb_opt_n) ++ ++/* sv_t svfoo[_t0](sv_t, sv_t, sv_t) ++ sv_t svfoo[_n_t0](sv_t, sv_t, _t) ++ ++ This is the "top" counterpart to binary_narrowb_opt_n. */ ++struct binary_narrowt_opt_n_def : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "vh0,vh0,v0,v0", group, MODE_none); ++ build_all (b, "vh0,vh0,v0,s0", group, MODE_n); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ unsigned int i, nargs; ++ type_suffix_index type; ++ if (!r.check_gp_argument (3, i, nargs) ++ || (type = r.infer_vector_type (i + 1)) == NUM_TYPE_SUFFIXES ++ || !r.require_derived_vector_type (i, i + 1, type, r.SAME_TYPE_CLASS, ++ r.HALF_SIZE)) ++ return error_mark_node; ++ ++ return r.finish_opt_n_resolution (i + 2, i + 1, type); ++ } ++}; ++SHAPE (binary_narrowt_opt_n) ++ ++/* sv_t svfoo[_t0](sv_t, sv_t) ++ sv_t svfoo[_n_t0](sv_t, _t) ++ ++ i.e. the standard shape for binary operations that operate on ++ uniform types. */ ++struct binary_opt_n_def : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "v0,v0,v0", group, MODE_none); ++ /* _b functions do not have an _n form, but are classified as ++ binary_opt_n so that they can be overloaded with vector ++ functions. */ ++ if (group.types[0][0] == TYPE_SUFFIX_b) ++ gcc_assert (group.types[0][1] == NUM_TYPE_SUFFIXES); ++ else ++ build_all (b, "v0,v0,s0", group, MODE_n); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ return r.resolve_uniform_opt_n (2); ++ } ++}; ++SHAPE (binary_opt_n) ++ ++/* svbool_t svfoo(svbool_t, svbool_t). */ ++struct binary_pred_def : public nonoverloaded_base ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ build_all (b, "v0,v0,v0", group, MODE_none); ++ } ++}; ++SHAPE (binary_pred) ++ ++/* sv_t svfoo[_](sv_t, sv_t, uint64_t) ++ ++ where the final argument must be 90 or 270. */ ++struct binary_rotate_def : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "v0,v0,v0,su64", group, MODE_none); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ return r.resolve_uniform (2, 1); ++ } ++ ++ bool ++ check (function_checker &c) const OVERRIDE ++ { ++ return c.require_immediate_either_or (2, 90, 270); ++ } ++}; ++SHAPE (binary_rotate) ++ ++/* sv_t svfoo_t0(_t, _t) ++ ++ i.e. a binary function that takes two scalars and returns a vector. ++ An explicit type suffix is required. */ ++struct binary_scalar_def : public nonoverloaded_base ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ build_all (b, "v0,s0,s0", group, MODE_none); ++ } ++}; ++SHAPE (binary_scalar) ++ ++/* sv_t svfoo[_t0](sv_t, sv_t). ++ ++ i.e. a version of "binary" that returns unsigned integers. */ ++struct binary_to_uint_def : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "vu0,v0,v0", group, MODE_none); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ return r.resolve_uniform (2); ++ } ++}; ++SHAPE (binary_to_uint) ++ ++/* sv_t svfoo[_t0](sv_t, sv_t) ++ ++ i.e. a version of "binary" in which the final argument is always an ++ unsigned integer. */ ++struct binary_uint_def : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "v0,v0,vu0", group, MODE_none); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ unsigned int i, nargs; ++ type_suffix_index type; ++ if (!r.check_gp_argument (2, i, nargs) ++ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES ++ || !r.require_derived_vector_type (i + 1, i, type, TYPE_unsigned)) ++ return error_mark_node; ++ ++ return r.resolve_to (r.mode_suffix_id, type); ++ } ++}; ++SHAPE (binary_uint) ++ ++/* sv_t svfoo[_t0](sv_t, _t) ++ ++ i.e. a version of binary_n in which the final argument is always an ++ unsigned integer. */ ++struct binary_uint_n_def : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "v0,v0,su0", group, MODE_none); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ unsigned int i, nargs; ++ type_suffix_index type; ++ if (!r.check_gp_argument (2, i, nargs) ++ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES ++ || !r.require_derived_scalar_type (i + 1, TYPE_unsigned)) ++ return error_mark_node; ++ ++ return r.resolve_to (r.mode_suffix_id, type); ++ } ++}; ++SHAPE (binary_uint_n) ++ ++/* sv_t svfoo[_t0](sv_t, sv_t) ++ sv_t svfoo[_n_t0](sv_t, _t) ++ ++ i.e. a version of the standard binary shape binary_opt_n in which ++ the final argument is always an unsigned integer. */ ++struct binary_uint_opt_n_def : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "v0,v0,vu0", group, MODE_none); ++ build_all (b, "v0,v0,su0", group, MODE_n); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ unsigned int i, nargs; ++ type_suffix_index type; ++ if (!r.check_gp_argument (2, i, nargs) ++ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES) ++ return error_mark_node; ++ ++ return r.finish_opt_n_resolution (i + 1, i, type, TYPE_unsigned); ++ } ++}; ++SHAPE (binary_uint_opt_n) ++ ++/* sv_t svfoo[_t0](sv_t, uint64_t). ++ ++ i.e. a version of binary_n in which the final argument is always ++ a 64-bit unsigned integer. */ ++struct binary_uint64_n_def : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "v0,v0,su64", group, MODE_none); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ unsigned int i, nargs; ++ type_suffix_index type; ++ if (!r.check_gp_argument (2, i, nargs) ++ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES ++ || !r.require_scalar_type (i + 1, "uint64_t")) ++ return error_mark_node; ++ ++ return r.resolve_to (r.mode_suffix_id, type); ++ } ++}; ++SHAPE (binary_uint64_n) ++ ++/* sv_t svfoo[_t0](sv_t, svuint64_t) ++ sv_t svfoo[_n_t0](sv_t, uint64_t) ++ ++ i.e. a version of the standard binary shape binary_opt_n in which ++ the final argument is always a uint64_t. */ ++struct binary_uint64_opt_n_def : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "v0,v0,vu64", group, MODE_none); ++ build_all (b, "v0,v0,su64", group, MODE_n); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ unsigned int i, nargs; ++ type_suffix_index type; ++ if (!r.check_gp_argument (2, i, nargs) ++ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES) ++ return error_mark_node; ++ ++ return r.finish_opt_n_resolution (i + 1, i, type, TYPE_unsigned, 64); ++ } ++}; ++SHAPE (binary_uint64_opt_n) ++ ++/* sv_t svfoo[_t0](sv_t, sv_t). */ ++struct binary_wide_def : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "v0,v0,vh0", group, MODE_none); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ unsigned int i, nargs; ++ type_suffix_index type; ++ if (!r.check_gp_argument (2, i, nargs) ++ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES ++ || !r.require_derived_vector_type (i + 1, i, type, r.SAME_TYPE_CLASS, ++ r.HALF_SIZE)) ++ return error_mark_node; ++ ++ return r.resolve_to (r.mode_suffix_id, type); ++ } ++}; ++SHAPE (binary_wide) ++ ++/* sv_t svfoo[_t0](sv_t, sv_t) ++ sv_t svfoo[_n_t0](sv_t, _t). */ ++struct binary_wide_opt_n_def : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "v0,v0,vh0", group, MODE_none); ++ build_all (b, "v0,v0,sh0", group, MODE_n); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ unsigned int i, nargs; ++ type_suffix_index type; ++ if (!r.check_gp_argument (2, i, nargs) ++ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES) ++ return error_mark_node; ++ ++ return r.finish_opt_n_resolution (i + 1, i, type, r.SAME_TYPE_CLASS, ++ r.HALF_SIZE); ++ } ++}; ++SHAPE (binary_wide_opt_n) ++ ++/* sv_t svfoo[_t0](sv_t, sv_t) ++ _t svfoo[_n_t0](_t, sv_t). */ ++struct clast_def : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "v0,v0,v0", group, MODE_none); ++ build_all (b, "s0,s0,v0", group, MODE_n); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ unsigned int i, nargs; ++ if (!r.check_gp_argument (2, i, nargs) ++ || !r.require_vector_or_scalar_type (i)) ++ return error_mark_node; ++ ++ if (r.scalar_argument_p (i)) ++ { ++ type_suffix_index type; ++ if (!r.require_derived_scalar_type (i, r.SAME_TYPE_CLASS) ++ || (type = r.infer_vector_type (i + 1)) == NUM_TYPE_SUFFIXES) ++ return error_mark_node; ++ return r.resolve_to (MODE_n, type); ++ } ++ else ++ { ++ type_suffix_index type; ++ if ((type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES ++ || !r.require_matching_vector_type (i + 1, type)) ++ return error_mark_node; ++ return r.resolve_to (MODE_none, type); ++ } ++ } ++}; ++SHAPE (clast) ++ ++/* svbool_t svfoo[_t0](sv_t, sv_t). */ ++struct compare_def : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "vp,v0,v0", group, MODE_none); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ return r.resolve_uniform (2); ++ } ++}; ++SHAPE (compare) ++ ++/* svbool_t svfoo[_t0](sv_t, sv_t) ++ svbool_t svfoo[_n_t0](sv_t, _t) ++ ++ i.e. a comparison between two vectors, or between a vector and a scalar. */ ++struct compare_opt_n_def : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "vp,v0,v0", group, MODE_none); ++ build_all (b, "vp,v0,s0", group, MODE_n); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ return r.resolve_uniform_opt_n (2); ++ } ++}; ++SHAPE (compare_opt_n) ++ ++/* svbool_t svfoo[_t0](const _t *, const _t *). */ ++struct compare_ptr_def : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "vp,al,al", group, MODE_none); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ unsigned int i, nargs; ++ type_suffix_index type; ++ if (!r.check_gp_argument (2, i, nargs) ++ || (type = r.infer_pointer_type (i)) == NUM_TYPE_SUFFIXES ++ || !r.require_matching_pointer_type (i + 1, i, type)) ++ return error_mark_node; ++ ++ return r.resolve_to (r.mode_suffix_id, type); ++ } ++}; ++SHAPE (compare_ptr) ++ ++/* svbool_t svfoo_t0[_t1](_t, _t) ++ ++ where _t0 is a _b suffix that describes the predicate result. ++ There is no direct relationship between the element sizes of _t0 ++ and _t1. */ ++struct compare_scalar_def : public overloaded_base<1> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "vp,s1,s1", group, MODE_none); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ unsigned int i, nargs; ++ type_suffix_index type; ++ if (!r.check_gp_argument (2, i, nargs) ++ || (type = r.infer_integer_scalar_type (i)) == NUM_TYPE_SUFFIXES ++ || !r.require_matching_integer_scalar_type (i + 1, i, type)) ++ return error_mark_node; ++ ++ return r.resolve_to (r.mode_suffix_id, r.type_suffix_ids[0], type); ++ } ++}; ++SHAPE (compare_scalar) ++ ++/* svbool_t svfoo[_t0](sv_t, svint64_t) (for signed t0) ++ svbool_t svfoo[_n_t0](sv_t, int64_t) (for signed t0) ++ svbool_t svfoo[_t0](sv_t, svuint64_t) (for unsigned t0) ++ svbool_t svfoo[_n_t0](sv_t, uint64_t) (for unsigned t0) ++ ++ i.e. a comparison in which the second argument is 64 bits. */ ++struct compare_wide_opt_n_def : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "vp,v0,vw0", group, MODE_none); ++ build_all (b, "vp,v0,sw0", group, MODE_n); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ unsigned int i, nargs; ++ type_suffix_index type; ++ if (!r.check_gp_argument (2, i, nargs) ++ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES) ++ return error_mark_node; ++ ++ return r.finish_opt_n_resolution (i + 1, i, type, r.SAME_TYPE_CLASS, 64); ++ } ++}; ++SHAPE (compare_wide_opt_n) ++ ++/* uint64_t svfoo(). */ ++struct count_inherent_def : public nonoverloaded_base ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ build_all (b, "su64", group, MODE_none); ++ } ++}; ++SHAPE (count_inherent) ++ ++/* uint64_t svfoo(enum svpattern). */ ++struct count_pat_def : public nonoverloaded_base ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ build_all (b, "su64,epattern", group, MODE_none); ++ } ++}; ++SHAPE (count_pat) ++ ++/* uint64_t svfoo(svbool_t). */ ++struct count_pred_def : public nonoverloaded_base ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ build_all (b, "su64,vp", group, MODE_none); ++ } ++}; ++SHAPE (count_pred) ++ ++/* uint64_t svfoo[_t0](sv_t). */ ++struct count_vector_def : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "su64,v0", group, MODE_none); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ return r.resolve_uniform (1); ++ } ++}; ++SHAPE (count_vector) ++ ++/* svxN_t svfoo[_t0](sv_t, ..., sv_t) ++ ++ where there are N arguments in total. */ ++struct create_def : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "t0,v0*t", group, MODE_none); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ return r.resolve_uniform (r.vectors_per_tuple ()); ++ } ++}; ++SHAPE (create) ++ ++/* sv_t svfoo[_n]_t0(_t, ..., _t) ++ ++ where there are enough arguments to fill 128 bits of data (or to ++ control 128 bits of data in the case of predicates). */ ++struct dupq_def : public overloaded_base<1> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ /* The "_n" suffix is optional; the full name has it, but the short ++ name doesn't. */ ++ build_all (b, "v0,s0*q", group, MODE_n, true); ++ } ++ ++ tree ++ resolve (function_resolver &) const OVERRIDE ++ { ++ /* The short forms just make "_n" implicit, so no resolution is needed. */ ++ gcc_unreachable (); ++ } ++}; ++SHAPE (dupq) ++ ++/* sv_t svfoo[_t0](sv_t, sv_t, uint64_t) ++ ++ where the final argument is an integer constant expression that when ++ multiplied by the number of bytes in t0 is in the range [0, 255]. */ ++struct ext_def : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "v0,v0,v0,su64", group, MODE_none); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ return r.resolve_uniform (2, 1); ++ } ++ ++ bool ++ check (function_checker &c) const OVERRIDE ++ { ++ unsigned int bytes = c.type_suffix (0).element_bytes; ++ return c.require_immediate_range (2, 0, 256 / bytes - 1); ++ } ++}; ++SHAPE (ext) ++ ++/* _t svfoo[_t0](_t, sv_t). */ ++struct fold_left_def : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "s0,s0,v0", group, MODE_none); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ unsigned int i, nargs; ++ type_suffix_index type; ++ if (!r.check_gp_argument (2, i, nargs) ++ || !r.require_derived_scalar_type (i, r.SAME_TYPE_CLASS) ++ || (type = r.infer_vector_type (i + 1)) == NUM_TYPE_SUFFIXES) ++ return error_mark_node; ++ ++ return r.resolve_to (r.mode_suffix_id, type); ++ } ++}; ++SHAPE (fold_left) ++ ++/* sv_t svfoo[_t0](svxN_t, uint64_t) ++ ++ where the final argument is an integer constant expression in ++ the range [0, N - 1]. */ ++struct get_def : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "v0,t0,su64", group, MODE_none); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ unsigned int i, nargs; ++ type_suffix_index type; ++ if (!r.check_gp_argument (2, i, nargs) ++ || (type = r.infer_tuple_type (i)) == NUM_TYPE_SUFFIXES ++ || !r.require_integer_immediate (i + 1)) ++ return error_mark_node; ++ ++ return r.resolve_to (r.mode_suffix_id, type); ++ } ++ ++ bool ++ check (function_checker &c) const OVERRIDE ++ { ++ unsigned int nvectors = c.vectors_per_tuple (); ++ return c.require_immediate_range (1, 0, nvectors - 1); ++ } ++}; ++SHAPE (get) ++ ++/* sv_t svfoo[_t0](sv_t, uint64_t) ++ _t svfoo[_n_t0](_t, uint64_t) ++ ++ where the t0 in the vector form is a signed or unsigned integer ++ whose size is tied to the [bhwd] suffix of "svfoo". */ ++struct inc_dec_def : public inc_dec_base ++{ ++ CONSTEXPR inc_dec_def () : inc_dec_base (false) {} ++ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ /* These functions are unusual in that the type suffixes for ++ the scalar and vector forms are not related. The vector ++ form always has exactly two potential suffixes while the ++ scalar form always has four. */ ++ if (group.types[2][0] == NUM_TYPE_SUFFIXES) ++ build_all (b, "v0,v0,su64", group, MODE_none); ++ else ++ build_all (b, "s0,s0,su64", group, MODE_n); ++ } ++}; ++SHAPE (inc_dec) ++ ++/* sv_t svfoo[_t0](sv_t, enum svpattern, uint64_t) ++ _t svfoo[_n_t0](_t, enum svpattern, uint64_t) ++ ++ where the t0 in the vector form is a signed or unsigned integer ++ whose size is tied to the [bhwd] suffix of "svfoo". */ ++struct inc_dec_pat_def : public inc_dec_base ++{ ++ CONSTEXPR inc_dec_pat_def () : inc_dec_base (true) {} ++ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ /* These functions are unusual in that the type suffixes for ++ the scalar and vector forms are not related. The vector ++ form always has exactly two potential suffixes while the ++ scalar form always has four. */ ++ if (group.types[2][0] == NUM_TYPE_SUFFIXES) ++ build_all (b, "v0,v0,epattern,su64", group, MODE_none); ++ else ++ build_all (b, "s0,s0,epattern,su64", group, MODE_n); ++ } ++}; ++SHAPE (inc_dec_pat) ++ ++/* sv_t svfoo[_t0](sv_t, svbool_t). */ ++struct inc_dec_pred_def : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "v0,v0,vp", group, MODE_none); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ unsigned int i, nargs; ++ type_suffix_index type; ++ if (!r.check_gp_argument (2, i, nargs) ++ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES ++ || !r.require_vector_type (i + 1, VECTOR_TYPE_svbool_t)) ++ return error_mark_node; ++ ++ return r.resolve_to (r.mode_suffix_id, type); ++ } ++}; ++SHAPE (inc_dec_pred) ++ ++/* _t svfoo[_n_t0]_t1(_t, svbool_t) ++ ++ where _t1 is a _b suffix that describes the svbool_t argument. */ ++struct inc_dec_pred_scalar_def : public overloaded_base<2> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_n); ++ build_all (b, "s0,s0,vp", group, MODE_n); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ unsigned int i, nargs; ++ type_suffix_index type; ++ if (!r.check_gp_argument (2, i, nargs) ++ || (type = r.infer_integer_scalar_type (i)) == NUM_TYPE_SUFFIXES ++ || !r.require_vector_type (i + 1, VECTOR_TYPE_svbool_t)) ++ return error_mark_node; ++ ++ return r.resolve_to (r.mode_suffix_id, type, r.type_suffix_ids[1]); ++ } ++}; ++SHAPE (inc_dec_pred_scalar) ++ ++/* sv[xN]_t svfoo_t0(). */ ++struct inherent_def : public nonoverloaded_base ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ build_all (b, "t0", group, MODE_none); ++ } ++}; ++SHAPE (inherent) ++ ++/* svbool_t svfoo[_b](). */ ++struct inherent_b_def : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ /* The "_b" suffix is optional; the full name has it, but the short ++ name doesn't. */ ++ build_all (b, "v0", group, MODE_none, true); ++ } ++ ++ tree ++ resolve (function_resolver &) const OVERRIDE ++ { ++ /* The short forms just make "_b" implicit, so no resolution is needed. */ ++ gcc_unreachable (); ++ } ++}; ++SHAPE (inherent_b) ++ ++/* sv[xN]_t svfoo[_t0](const _t *) ++ sv[xN]_t svfoo_vnum[_t0](const _t *, int64_t). */ ++struct load_def : public load_contiguous_base ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ b.add_overloaded_functions (group, MODE_vnum); ++ build_all (b, "t0,al", group, MODE_none); ++ build_all (b, "t0,al,ss64", group, MODE_vnum); ++ } ++}; ++SHAPE (load) ++ ++/* sv_t svfoo_t0(const _t *) ++ sv_t svfoo_vnum_t0(const _t *, int64_t) ++ ++ where is determined by the function base name. */ ++struct load_ext_def : public nonoverloaded_base ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ build_all (b, "t0,al", group, MODE_none); ++ build_all (b, "t0,al,ss64", group, MODE_vnum); ++ } ++}; ++SHAPE (load_ext) ++ ++/* sv_t svfoo_[s32]index_t0(const _t *, svint32_t) ++ sv_t svfoo_[s64]index_t0(const _t *, svint64_t) ++ sv_t svfoo_[u32]index_t0(const _t *, svuint32_t) ++ sv_t svfoo_[u64]index_t0(const _t *, svuint64_t) ++ ++ sv_t svfoo[_u32base]_index_t0(svuint32_t, int64_t) ++ sv_t svfoo[_u64base]_index_t0(svuint64_t, int64_t) ++ ++ where is determined by the function base name. */ ++struct load_ext_gather_index_def : public load_ext_gather_base ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_index); ++ build_sv_index (b, "t0,al,d", group); ++ build_vs_index (b, "t0,b,ss64", group); ++ } ++}; ++SHAPE (load_ext_gather_index) ++ ++/* sv_t svfoo_[s64]index_t0(const _t *, svint64_t) ++ sv_t svfoo_[u64]index_t0(const _t *, svuint64_t) ++ ++ sv_t svfoo[_u32base]_index_t0(svuint32_t, int64_t) ++ sv_t svfoo[_u64base]_index_t0(svuint64_t, int64_t) ++ ++ where is determined by the function base name. This is ++ load_ext_gather_index that doesn't support 32-bit vector indices. */ ++struct load_ext_gather_index_restricted_def : public load_ext_gather_base ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_index); ++ build_sv_index64 (b, "t0,al,d", group); ++ build_vs_index (b, "t0,b,ss64", group); ++ } ++}; ++SHAPE (load_ext_gather_index_restricted) ++ ++/* sv_t svfoo_[s32]offset_t0(const _t *, svint32_t) ++ sv_t svfoo_[s64]offset_t0(const _t *, svint64_t) ++ sv_t svfoo_[u32]offset_t0(const _t *, svuint32_t) ++ sv_t svfoo_[u64]offset_t0(const _t *, svuint64_t) ++ ++ sv_t svfoo[_u32base]_t0(svuint32_t) ++ sv_t svfoo[_u64base]_t0(svuint64_t) ++ ++ sv_t svfoo[_u32base]_offset_t0(svuint32_t, int64_t) ++ sv_t svfoo[_u64base]_offset_t0(svuint64_t, int64_t) ++ ++ where is determined by the function base name. */ ++struct load_ext_gather_offset_def : public load_ext_gather_base ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_offset); ++ build_sv_offset (b, "t0,al,d", group); ++ build_v_base (b, "t0,b", group, true); ++ build_vs_offset (b, "t0,b,ss64", group); ++ } ++}; ++SHAPE (load_ext_gather_offset) ++ ++/* sv_t svfoo_[s64]offset_t0(const _t *, svint64_t) ++ sv_t svfoo_[u32]offset_t0(const _t *, svuint32_t) ++ sv_t svfoo_[u64]offset_t0(const _t *, svuint64_t) ++ ++ sv_t svfoo[_u32base]_t0(svuint32_t) ++ sv_t svfoo[_u64base]_t0(svuint64_t) ++ ++ sv_t svfoo[_u32base]_offset_t0(svuint32_t, int64_t) ++ sv_t svfoo[_u64base]_offset_t0(svuint64_t, int64_t) ++ ++ where is determined by the function base name. This is ++ load_ext_gather_offset without the s32 vector offset form. */ ++struct load_ext_gather_offset_restricted_def : public load_ext_gather_base ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_offset); ++ build_sv_uint_offset (b, "t0,al,d", group); ++ build_v_base (b, "t0,b", group, true); ++ build_vs_offset (b, "t0,b,ss64", group); ++ } ++}; ++SHAPE (load_ext_gather_offset_restricted) ++ ++/* sv_t svfoo_[s32]index[_t0](const _t *, svint32_t) ++ sv_t svfoo_[s64]index[_t0](const _t *, svint64_t) ++ sv_t svfoo_[u32]index[_t0](const _t *, svuint32_t) ++ sv_t svfoo_[u64]index[_t0](const _t *, svuint64_t) ++ ++ sv_t svfoo_[s32]offset[_t0](const _t *, svint32_t) ++ sv_t svfoo_[s64]offset[_t0](const _t *, svint64_t) ++ sv_t svfoo_[u32]offset[_t0](const _t *, svuint32_t) ++ sv_t svfoo_[u64]offset[_t0](const _t *, svuint64_t). */ ++struct load_gather_sv_def : public load_gather_sv_base ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_index); ++ b.add_overloaded_functions (group, MODE_offset); ++ build_sv_index (b, "t0,al,d", group); ++ build_sv_offset (b, "t0,al,d", group); ++ } ++}; ++SHAPE (load_gather_sv) ++ ++/* sv_t svfoo_[u32]index[_t0](const _t *, svuint32_t) ++ sv_t svfoo_[u64]index[_t0](const _t *, svuint64_t) ++ ++ sv_t svfoo_[s64]offset[_t0](const _t *, svint64_t) ++ sv_t svfoo_[u32]offset[_t0](const _t *, svuint32_t) ++ sv_t svfoo_[u64]offset[_t0](const _t *, svuint64_t) ++ ++ This is load_gather_sv without the 32-bit vector index forms and ++ without the s32 vector offset form. */ ++struct load_gather_sv_restricted_def : public load_gather_sv_base ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_index); ++ b.add_overloaded_functions (group, MODE_offset); ++ build_sv_index64 (b, "t0,al,d", group); ++ build_sv_uint_offset (b, "t0,al,d", group); ++ } ++}; ++SHAPE (load_gather_sv_restricted) ++ ++/* sv_t svfoo[_u32base]_t0(svuint32_t) ++ sv_t svfoo[_u64base]_t0(svuint64_t) ++ ++ sv_t svfoo[_u32base]_index_t0(svuint32_t, int64_t) ++ sv_t svfoo[_u64base]_index_t0(svuint64_t, int64_t) ++ ++ sv_t svfoo[_u32base]_offset_t0(svuint32_t, int64_t) ++ sv_t svfoo[_u64base]_offset_t0(svuint64_t, int64_t). */ ++struct load_gather_vs_def : public overloaded_base<1> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ /* The base vector mode is optional; the full name has it but the ++ short name doesn't. There is no ambiguity with SHAPE_load_gather_sv ++ because the latter uses an implicit type suffix. */ ++ build_v_base (b, "t0,b", group, true); ++ build_vs_index (b, "t0,b,ss64", group, true); ++ build_vs_offset (b, "t0,b,ss64", group, true); ++ } ++ ++ tree ++ resolve (function_resolver &) const OVERRIDE ++ { ++ /* The short name just makes the base vector mode implicit; ++ no resolution is needed. */ ++ gcc_unreachable (); ++ } ++}; ++SHAPE (load_gather_vs) ++ ++/* sv_t svfoo[_t0](const _t *) ++ ++ The only difference from "load" is that this shape has no vnum form. */ ++struct load_replicate_def : public load_contiguous_base ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "t0,al", group, MODE_none); ++ } ++}; ++SHAPE (load_replicate) ++ ++/* svbool_t svfoo(enum svpattern). */ ++struct pattern_pred_def : public nonoverloaded_base ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ build_all (b, "vp,epattern", group, MODE_none); ++ } ++}; ++SHAPE (pattern_pred) ++ ++/* void svfoo(const void *, svprfop) ++ void svfoo_vnum(const void *, int64_t, svprfop). */ ++struct prefetch_def : public nonoverloaded_base ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ build_all (b, "_,ap,eprfop", group, MODE_none); ++ build_all (b, "_,ap,ss64,eprfop", group, MODE_vnum); ++ } ++}; ++SHAPE (prefetch) ++ ++/* void svfoo_[s32]index(const void *, svint32_t, svprfop) ++ void svfoo_[s64]index(const void *, svint64_t, svprfop) ++ void svfoo_[u32]index(const void *, svuint32_t, svprfop) ++ void svfoo_[u64]index(const void *, svuint64_t, svprfop) ++ ++ void svfoo[_u32base](svuint32_t, svprfop) ++ void svfoo[_u64base](svuint64_t, svprfop) ++ ++ void svfoo[_u32base]_index(svuint32_t, int64_t, svprfop) ++ void svfoo[_u64base]_index(svuint64_t, int64_t, svprfop). */ ++struct prefetch_gather_index_def : public prefetch_gather_base ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ b.add_overloaded_functions (group, MODE_index); ++ build_sv_index (b, "_,ap,d,eprfop", group); ++ build_v_base (b, "_,b,eprfop", group); ++ build_vs_index (b, "_,b,ss64,eprfop", group); ++ } ++}; ++SHAPE (prefetch_gather_index) ++ ++/* void svfoo_[s32]offset(const void *, svint32_t, svprfop) ++ void svfoo_[s64]offset(const void *, svint64_t, svprfop) ++ void svfoo_[u32]offset(const void *, svuint32_t, svprfop) ++ void svfoo_[u64]offset(const void *, svuint64_t, svprfop) ++ ++ void svfoo[_u32base](svuint32_t, svprfop) ++ void svfoo[_u64base](svuint64_t, svprfop) ++ ++ void svfoo[_u32base]_offset(svuint32_t, int64_t, svprfop) ++ void svfoo[_u64base]_offset(svuint64_t, int64_t, svprfop). */ ++struct prefetch_gather_offset_def : public prefetch_gather_base ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ b.add_overloaded_functions (group, MODE_offset); ++ build_sv_offset (b, "_,ap,d,eprfop", group); ++ build_v_base (b, "_,b,eprfop", group); ++ build_vs_offset (b, "_,b,ss64,eprfop", group); ++ } ++}; ++SHAPE (prefetch_gather_offset) ++ ++/* bool svfoo(svbool_t). */ ++struct ptest_def : public nonoverloaded_base ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ build_all (b, "sp,vp", group, MODE_none); ++ } ++}; ++SHAPE (ptest) ++ ++/* svbool_t svfoo(). */ ++struct rdffr_def : public nonoverloaded_base ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ build_all (b, "vp", group, MODE_none); ++ } ++}; ++SHAPE (rdffr) ++ ++/* _t svfoo[_t0](sv_t). */ ++struct reduction_def : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "s0,v0", group, MODE_none); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ return r.resolve_uniform (1); ++ } ++}; ++SHAPE (reduction) ++ ++/* int64_t svfoo[_t0](sv_t) (for signed t0) ++ uint64_t svfoo[_t0](sv_t) (for unsigned t0) ++ _t svfoo[_t0](sv_t) (for floating-point t0) ++ ++ i.e. a version of "reduction" in which the return type for integers ++ always has 64 bits. */ ++struct reduction_wide_def : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "sw0,v0", group, MODE_none); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ return r.resolve_uniform (1); ++ } ++}; ++SHAPE (reduction_wide) ++ ++/* svxN_t svfoo[_t0](svxN_t, uint64_t, sv_t) ++ ++ where the second argument is an integer constant expression in the ++ range [0, N - 1]. */ ++struct set_def : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "t0,t0,su64,v0", group, MODE_none); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ unsigned int i, nargs; ++ type_suffix_index type; ++ if (!r.check_gp_argument (3, i, nargs) ++ || (type = r.infer_tuple_type (i)) == NUM_TYPE_SUFFIXES ++ || !r.require_integer_immediate (i + 1) ++ || !r.require_derived_vector_type (i + 2, i, type)) ++ return error_mark_node; ++ ++ return r.resolve_to (r.mode_suffix_id, type); ++ } ++ ++ bool ++ check (function_checker &c) const OVERRIDE ++ { ++ unsigned int nvectors = c.vectors_per_tuple (); ++ return c.require_immediate_range (1, 0, nvectors - 1); ++ } ++}; ++SHAPE (set) ++ ++/* void svfoo(). */ ++struct setffr_def : public nonoverloaded_base ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ build_all (b, "_", group, MODE_none); ++ } ++}; ++SHAPE (setffr) ++ ++/* sv_t svfoo[_n_t0])(sv_t, uint64_t) ++ ++ where the final argument must be an integer constant expression in the ++ range [0, sizeof (_t) * 8 - 1]. */ ++struct shift_left_imm_def : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_n); ++ build_all (b, "v0,v0,su64", group, MODE_n); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ return r.resolve_uniform (1, 1); ++ } ++ ++ bool ++ check (function_checker &c) const OVERRIDE ++ { ++ unsigned int bits = c.type_suffix (0).element_bits; ++ return c.require_immediate_range (1, 0, bits - 1); ++ } ++}; ++SHAPE (shift_left_imm) ++ ++/* sv_t svfoo[_n_t0])(sv_t, uint64_t) ++ ++ where the final argument must be an integer constant expression in the ++ range [0, sizeof (_t) * 4 - 1]. */ ++struct shift_left_imm_long_def : public binary_imm_long_base ++{ ++ bool ++ check (function_checker &c) const OVERRIDE ++ { ++ unsigned int bits = c.type_suffix (0).element_bits / 2; ++ return c.require_immediate_range (1, 0, bits - 1); ++ } ++}; ++SHAPE (shift_left_imm_long) ++ ++/* sv_t svfoo[_n_t0])(sv_t, uint64_t) ++ ++ where the final argument must be an integer constant expression in the ++ range [0, sizeof (_t) * 8 - 1]. */ ++struct shift_left_imm_to_uint_def : public shift_left_imm_def ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_n); ++ build_all (b, "vu0,v0,su64", group, MODE_n); ++ } ++}; ++SHAPE (shift_left_imm_to_uint) ++ ++/* sv_t svfoo[_n_t0])(sv_t, uint64_t) ++ ++ where the final argument must be an integer constant expression in the ++ range [1, sizeof (_t) * 8]. */ ++struct shift_right_imm_def : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_n); ++ build_all (b, "v0,v0,su64", group, MODE_n); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ return r.resolve_uniform (1, 1); ++ } ++ ++ bool ++ check (function_checker &c) const OVERRIDE ++ { ++ unsigned int bits = c.type_suffix (0).element_bits; ++ return c.require_immediate_range (1, 1, bits); ++ } ++}; ++SHAPE (shift_right_imm) ++ ++/* sv_t svfoo[_n_t0])(sv_t, uint64_t) ++ ++ where the final argument must be an integer constant expression in the ++ range [1, sizeof (_t) * 4]. */ ++typedef shift_right_imm_narrow_wrapper, 1> ++ shift_right_imm_narrowb_def; ++SHAPE (shift_right_imm_narrowb) ++ ++/* sv_t svfoo[_n_t0])(sv_t, sv_t, uint64_t) ++ ++ where the final argument must be an integer constant expression in the ++ range [1, sizeof (_t) * 4]. */ ++typedef shift_right_imm_narrow_wrapper, 2> ++ shift_right_imm_narrowt_def; ++SHAPE (shift_right_imm_narrowt) ++ ++/* sv_t svfoo[_n_t0])(sv_t, uint64_t) ++ ++ where the final argument must be an integer constant expression in the ++ range [1, sizeof (_t) * 4]. */ ++typedef binary_imm_narrowb_base ++ binary_imm_narrowb_base_unsigned; ++typedef shift_right_imm_narrow_wrapper ++ shift_right_imm_narrowb_to_uint_def; ++SHAPE (shift_right_imm_narrowb_to_uint) ++ ++/* sv_t svfoo[_n_t0])(sv_t, sv_t, uint64_t) ++ ++ where the final argument must be an integer constant expression in the ++ range [1, sizeof (_t) * 4]. */ ++typedef binary_imm_narrowt_base ++ binary_imm_narrowt_base_unsigned; ++typedef shift_right_imm_narrow_wrapper ++ shift_right_imm_narrowt_to_uint_def; ++SHAPE (shift_right_imm_narrowt_to_uint) ++ ++/* void svfoo[_t0](_t *, sv[xN]_t) ++ void svfoo_vnum[_t0](_t *, int64_t, sv[xN]_t) ++ ++ where might be tied to (for non-truncating stores) or might ++ depend on the function base name (for truncating stores). */ ++struct store_def : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ b.add_overloaded_functions (group, MODE_vnum); ++ build_all (b, "_,as,t0", group, MODE_none); ++ build_all (b, "_,as,ss64,t0", group, MODE_vnum); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ bool vnum_p = r.mode_suffix_id == MODE_vnum; ++ gcc_assert (r.mode_suffix_id == MODE_none || vnum_p); ++ ++ unsigned int i, nargs; ++ type_suffix_index type; ++ if (!r.check_gp_argument (vnum_p ? 3 : 2, i, nargs) ++ || !r.require_pointer_type (i) ++ || (vnum_p && !r.require_scalar_type (i + 1, "int64_t")) ++ || ((type = r.infer_tuple_type (nargs - 1)) == NUM_TYPE_SUFFIXES)) ++ return error_mark_node; ++ ++ return r.resolve_to (r.mode_suffix_id, type); ++ } ++}; ++SHAPE (store) ++ ++/* void svfoo_[s32]index[_t0](_t *, svint32_t, sv_t) ++ void svfoo_[s64]index[_t0](_t *, svint64_t, sv_t) ++ void svfoo_[u32]index[_t0](_t *, svuint32_t, sv_t) ++ void svfoo_[u64]index[_t0](_t *, svuint64_t, sv_t) ++ ++ void svfoo[_u32base]_index[_t0](svuint32_t, int64_t, sv_t) ++ void svfoo[_u64base]_index[_t0](svuint64_t, int64_t, sv_t) ++ ++ where might be tied to (for non-truncating stores) or might ++ depend on the function base name (for truncating stores). */ ++struct store_scatter_index_def : public store_scatter_base ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_index); ++ build_sv_index (b, "_,as,d,t0", group); ++ build_vs_index (b, "_,b,ss64,t0", group); ++ } ++}; ++SHAPE (store_scatter_index) ++ ++/* void svfoo_[s64]index[_t0](_t *, svint64_t, sv_t) ++ void svfoo_[u64]index[_t0](_t *, svuint64_t, sv_t) ++ ++ void svfoo[_u32base]_index[_t0](svuint32_t, int64_t, sv_t) ++ void svfoo[_u64base]_index[_t0](svuint64_t, int64_t, sv_t) ++ ++ i.e. a version of store_scatter_index that doesn't support 32-bit ++ vector indices. */ ++struct store_scatter_index_restricted_def : public store_scatter_base ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_index); ++ build_sv_index64 (b, "_,as,d,t0", group); ++ build_vs_index (b, "_,b,ss64,t0", group); ++ } ++}; ++SHAPE (store_scatter_index_restricted) ++ ++/* void svfoo_[s32]offset[_t0](_t *, svint32_t, sv_t) ++ void svfoo_[s64]offset[_t0](_t *, svint64_t, sv_t) ++ void svfoo_[u32]offset[_t0](_t *, svuint32_t, sv_t) ++ void svfoo_[u64]offset[_t0](_t *, svuint64_t, sv_t) ++ ++ void svfoo[_u32base_t0](svuint32_t, sv_t) ++ void svfoo[_u64base_t0](svuint64_t, sv_t) ++ ++ void svfoo[_u32base]_offset[_t0](svuint32_t, int64_t, sv_t) ++ void svfoo[_u64base]_offset[_t0](svuint64_t, int64_t, sv_t) ++ ++ where might be tied to (for non-truncating stores) or might ++ depend on the function base name (for truncating stores). */ ++struct store_scatter_offset_def : public store_scatter_base ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ b.add_overloaded_functions (group, MODE_offset); ++ build_sv_offset (b, "_,as,d,t0", group); ++ build_v_base (b, "_,b,t0", group); ++ build_vs_offset (b, "_,b,ss64,t0", group); ++ } ++}; ++SHAPE (store_scatter_offset) ++ ++/* void svfoo_[s64]offset[_t0](_t *, svint64_t, sv_t) ++ void svfoo_[u32]offset[_t0](_t *, svuint32_t, sv_t) ++ void svfoo_[u64]offset[_t0](_t *, svuint64_t, sv_t) ++ ++ void svfoo[_u32base_t0](svuint32_t, sv_t) ++ void svfoo[_u64base_t0](svuint64_t, sv_t) ++ ++ void svfoo[_u32base]_offset[_t0](svuint32_t, int64_t, sv_t) ++ void svfoo[_u64base]_offset[_t0](svuint64_t, int64_t, sv_t) ++ ++ i.e. a version of store_scatter_offset that doesn't support svint32_t ++ offsets. */ ++struct store_scatter_offset_restricted_def : public store_scatter_base ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ b.add_overloaded_functions (group, MODE_offset); ++ build_sv_uint_offset (b, "_,as,d,t0", group); ++ build_v_base (b, "_,b,t0", group); ++ build_vs_offset (b, "_,b,ss64,t0", group); ++ } ++}; ++SHAPE (store_scatter_offset_restricted) ++ ++/* sv_t svfoo[_t0](svxN_t, sv_t). */ ++struct tbl_tuple_def : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "v0,t0,vu0", group, MODE_none); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ unsigned int i, nargs; ++ type_suffix_index type; ++ if (!r.check_gp_argument (2, i, nargs) ++ || (type = r.infer_tuple_type (i)) == NUM_TYPE_SUFFIXES ++ || !r.require_derived_vector_type (i + 1, i, type, TYPE_unsigned)) ++ return error_mark_node; ++ ++ return r.resolve_to (r.mode_suffix_id, type); ++ } ++}; ++SHAPE (tbl_tuple) ++ ++/* sv_t svfoo[_t0](sv_t, svbfloatt16_t, svbfloat16_t). */ ++struct ternary_bfloat_def ++ : public ternary_resize2_base<16, TYPE_bfloat, TYPE_bfloat> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "v0,v0,vB,vB", group, MODE_none); ++ } ++}; ++SHAPE (ternary_bfloat) ++ ++/* sv_t svfoo[_t0](sv_t, svbfloat16_t, svbfloat16_t, uint64_t) ++ ++ where the final argument is an integer constant expression in the range ++ [0, 7]. */ ++typedef ternary_bfloat_lane_base<1> ternary_bfloat_lane_def; ++SHAPE (ternary_bfloat_lane) ++ ++/* sv_t svfoo[_t0](sv_t, svbfloat16_t, svbfloat16_t, uint64_t) ++ ++ where the final argument is an integer constant expression in the range ++ [0, 3]. */ ++typedef ternary_bfloat_lane_base<2> ternary_bfloat_lanex2_def; ++SHAPE (ternary_bfloat_lanex2) ++ ++/* sv_t svfoo[_t0](sv_t, svbfloatt16_t, svbfloat16_t) ++ sv_t svfoo[_n_t0](sv_t, svbfloat16_t, bfloat16_t). */ ++struct ternary_bfloat_opt_n_def ++ : public ternary_resize2_opt_n_base<16, TYPE_bfloat, TYPE_bfloat> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "v0,v0,vB,vB", group, MODE_none); ++ build_all (b, "v0,v0,vB,sB", group, MODE_n); ++ } ++}; ++SHAPE (ternary_bfloat_opt_n) ++ ++/* sv_t svfoo[_t0](sv_t, sv_t, sv_t, ++ uint64_t) ++ ++ where the final argument is an integer constant expression in the range ++ [0, 16 / sizeof (_t) - 1]. */ ++struct ternary_intq_uintq_lane_def ++ : public ternary_qq_lane_base ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "v0,v0,vqs0,vqu0,su64", group, MODE_none); ++ } ++}; ++SHAPE (ternary_intq_uintq_lane) ++ ++/* sv_t svfoo[_t0](sv_t, sv_t, sv_t) ++ sv_t svfoo[_n_t0](sv_t, sv_t, ++ _t). */ ++struct ternary_intq_uintq_opt_n_def ++ : public ternary_resize2_opt_n_base ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "v0,v0,vqs0,vqu0", group, MODE_none); ++ build_all (b, "v0,v0,vqs0,squ0", group, MODE_n); ++ } ++}; ++SHAPE (ternary_intq_uintq_opt_n) ++ ++/* svbool_t svfoo[_](sv_t, sv_t, sv_t, uint64_t) ++ ++ where the final argument is an integer constant expression in the ++ range [0, 16 / sizeof (_t) - 1]. */ ++struct ternary_lane_def : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "v0,v0,v0,v0,su64", group, MODE_none); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ return r.resolve_uniform (3, 1); ++ } ++ ++ bool ++ check (function_checker &c) const OVERRIDE ++ { ++ return c.require_immediate_lane_index (3); ++ } ++}; ++SHAPE (ternary_lane) ++ ++/* svbool_t svfoo[_](sv_t, sv_t, sv_t, uint64_t, uint64_t) ++ ++ where the penultimate argument is an integer constant expression in ++ the range [0, 8 / sizeof (_t) - 1] and where the final argument ++ is an integer constant expression in {0, 90, 180, 270}. */ ++struct ternary_lane_rotate_def : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "v0,v0,v0,v0,su64,su64", group, MODE_none); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ return r.resolve_uniform (3, 2); ++ } ++ ++ bool ++ check (function_checker &c) const OVERRIDE ++ { ++ return (c.require_immediate_lane_index (3, 2) ++ && c.require_immediate_one_of (4, 0, 90, 180, 270)); ++ } ++}; ++SHAPE (ternary_lane_rotate) ++ ++/* sv_t svfoo[_t0](sv_t, sv_t, sv_t, uint64_t) ++ ++ where the final argument is an integer constant expression in the range ++ [0, 32 / sizeof (_t) - 1]. */ ++struct ternary_long_lane_def ++ : public ternary_resize2_lane_base ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "v0,v0,vh0,vh0,su64", group, MODE_none); ++ } ++ ++ bool ++ check (function_checker &c) const OVERRIDE ++ { ++ return c.require_immediate_lane_index (3); ++ } ++}; ++SHAPE (ternary_long_lane) ++ ++/* sv_t svfoo[_t0](sv_t, sv_t, sv_t) ++ sv_t svfoo[_n_t0](sv_t, sv_t, _t) ++ ++ i.e. a version of the standard ternary shape ternary_opt_n in which ++ the element type of the last two arguments is the half-sized ++ equivalent of . */ ++struct ternary_long_opt_n_def ++ : public ternary_resize2_opt_n_base ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "v0,v0,vh0,vh0", group, MODE_none); ++ build_all (b, "v0,v0,vh0,sh0", group, MODE_n); ++ } ++}; ++SHAPE (ternary_long_opt_n) ++ ++/* sv_t svfoo[_t0](sv_t, sv_t, sv_t) ++ sv_t svfoo[_n_t0](sv_t, sv_t, _t) ++ ++ i.e. the standard shape for ternary operations that operate on ++ uniform types. */ ++struct ternary_opt_n_def : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "v0,v0,v0,v0", group, MODE_none); ++ build_all (b, "v0,v0,v0,s0", group, MODE_n); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ return r.resolve_uniform_opt_n (3); ++ } ++}; ++SHAPE (ternary_opt_n) ++ ++/* sv_t svfoo[_t0](sv_t, sv_t, sv_t, uint64_t) ++ ++ where the final argument is an integer constant expression in the range ++ [0, 16 / sizeof (_t) - 1]. */ ++struct ternary_qq_lane_def : public ternary_qq_lane_base<> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "v0,v0,vq0,vq0,su64", group, MODE_none); ++ } ++}; ++SHAPE (ternary_qq_lane) ++ ++/* svbool_t svfoo[_](sv_t, sv_t, sv_t, ++ uint64_t) ++ ++ where the final argument is an integer constant expression in ++ {0, 90, 180, 270}. */ ++struct ternary_qq_lane_rotate_def : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "v0,v0,vq0,vq0,su64,su64", group, MODE_none); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ unsigned int i, nargs; ++ type_suffix_index type; ++ if (!r.check_gp_argument (5, i, nargs) ++ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES ++ || !r.require_derived_vector_type (i + 1, i, type, r.SAME_TYPE_CLASS, ++ r.QUARTER_SIZE) ++ || !r.require_derived_vector_type (i + 2, i, type, r.SAME_TYPE_CLASS, ++ r.QUARTER_SIZE) ++ || !r.require_integer_immediate (i + 3) ++ || !r.require_integer_immediate (i + 4)) ++ return error_mark_node; ++ ++ return r.resolve_to (r.mode_suffix_id, type); ++ } ++ ++ bool ++ check (function_checker &c) const OVERRIDE ++ { ++ return (c.require_immediate_lane_index (3, 4) ++ && c.require_immediate_one_of (4, 0, 90, 180, 270)); ++ } ++}; ++SHAPE (ternary_qq_lane_rotate) ++ ++/* sv_t svfoo[_t0](sv_t, sv_t, sv_t) ++ sv_t svfoo[_n_t0](sv_t, sv_t, _t) ++ ++ i.e. a version of the standard ternary shape ternary_opt_n in which ++ the element type of the last two arguments is the quarter-sized ++ equivalent of . */ ++struct ternary_qq_opt_n_def ++ : public ternary_resize2_opt_n_base ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "v0,v0,vq0,vq0", group, MODE_none); ++ build_all (b, "v0,v0,vq0,sq0", group, MODE_n); ++ } ++}; ++SHAPE (ternary_qq_opt_n) ++ ++/* svbool_t svfoo[_](sv_t, sv_t, sv_t, ++ uint64_t) ++ ++ where the final argument is an integer constant expression in ++ {0, 90, 180, 270}. */ ++struct ternary_qq_rotate_def : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "v0,v0,vq0,vq0,su64", group, MODE_none); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ unsigned int i, nargs; ++ type_suffix_index type; ++ if (!r.check_gp_argument (4, i, nargs) ++ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES ++ || !r.require_derived_vector_type (i + 1, i, type, r.SAME_TYPE_CLASS, ++ r.QUARTER_SIZE) ++ || !r.require_derived_vector_type (i + 2, i, type, r.SAME_TYPE_CLASS, ++ r.QUARTER_SIZE) ++ || !r.require_integer_immediate (i + 3)) ++ return error_mark_node; ++ ++ return r.resolve_to (r.mode_suffix_id, type); ++ } ++ ++ bool ++ check (function_checker &c) const OVERRIDE ++ { ++ return c.require_immediate_one_of (3, 0, 90, 180, 270); ++ } ++}; ++SHAPE (ternary_qq_rotate) ++ ++/* svbool_t svfoo[_](sv_t, sv_t, sv_t, uint64_t) ++ ++ where the final argument is an integer constant expression in ++ {0, 90, 180, 270}. */ ++struct ternary_rotate_def : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "v0,v0,v0,v0,su64", group, MODE_none); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ return r.resolve_uniform (3, 1); ++ } ++ ++ bool ++ check (function_checker &c) const OVERRIDE ++ { ++ return c.require_immediate_one_of (3, 0, 90, 180, 270); ++ } ++}; ++SHAPE (ternary_rotate) ++ ++/* sv_t svfoo[_n_t0])(sv_t, sv_t, uint64_t) ++ ++ where the final argument must be an integer constant expression in the ++ range [0, sizeof (_t) * 8 - 1]. */ ++struct ternary_shift_left_imm_def : public ternary_shift_imm_base ++{ ++ bool ++ check (function_checker &c) const OVERRIDE ++ { ++ unsigned int bits = c.type_suffix (0).element_bits; ++ return c.require_immediate_range (2, 0, bits - 1); ++ } ++}; ++SHAPE (ternary_shift_left_imm) ++ ++/* sv_t svfoo[_n_t0])(sv_t, sv_t, uint64_t) ++ ++ where the final argument must be an integer constant expression in the ++ range [1, sizeof (_t) * 8]. */ ++struct ternary_shift_right_imm_def : public ternary_shift_imm_base ++{ ++ bool ++ check (function_checker &c) const OVERRIDE ++ { ++ unsigned int bits = c.type_suffix (0).element_bits; ++ return c.require_immediate_range (2, 1, bits); ++ } ++}; ++SHAPE (ternary_shift_right_imm) ++ ++/* sv_t svfoo[_t0](sv_t, sv_t, sv_t). */ ++struct ternary_uint_def : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "v0,v0,v0,vu0", group, MODE_none); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ unsigned int i, nargs; ++ type_suffix_index type; ++ if (!r.check_gp_argument (3, i, nargs) ++ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES ++ || !r.require_matching_vector_type (i + 1, type) ++ || !r.require_derived_vector_type (i + 2, i, type, TYPE_unsigned)) ++ return error_mark_node; ++ ++ return r.resolve_to (r.mode_suffix_id, type); ++ } ++}; ++SHAPE (ternary_uint) ++ ++/* sv_t svfoo[_t0](sv_t, svu_t, ++ sv_t). */ ++struct ternary_uintq_intq_def ++ : public ternary_resize2_base ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "v0,v0,vqu0,vqs0", group, MODE_none); ++ } ++}; ++SHAPE (ternary_uintq_intq) ++ ++/* sv_t svfoo[_t0](sv_t, sv_t, sv_t, ++ uint64_t) ++ ++ where the final argument is an integer constant expression in the range ++ [0, 16 / sizeof (_t) - 1]. */ ++struct ternary_uintq_intq_lane_def ++ : public ternary_qq_lane_base ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "v0,v0,vqu0,vqs0,su64", group, MODE_none); ++ } ++}; ++SHAPE (ternary_uintq_intq_lane) ++ ++/* sv_t svfoo[_t0](sv_t, sv_t, sv_t) ++ sv_t svfoo[_n_t0](sv_t, sv_t, ++ _t). */ ++struct ternary_uintq_intq_opt_n_def ++ : public ternary_resize2_opt_n_base ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "v0,v0,vqu0,vqs0", group, MODE_none); ++ build_all (b, "v0,v0,vqu0,sqs0", group, MODE_n); ++ } ++}; ++SHAPE (ternary_uintq_intq_opt_n) ++ ++/* svbool_t svfoo[_](sv_t, sv_t, uint64_t) ++ ++ where the final argument is an integer constant expression in the ++ range [0, 7]. */ ++struct tmad_def : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "v0,v0,v0,su64", group, MODE_none); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ return r.resolve_uniform (2, 1); ++ } ++ ++ bool ++ check (function_checker &c) const OVERRIDE ++ { ++ return c.require_immediate_range (2, 0, 7); ++ } ++}; ++SHAPE (tmad) ++ ++/* sv_t svfoo[_t0](sv_t) ++ ++ i.e. the standard shape for unary operations that operate on ++ uniform types. */ ++struct unary_def : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "v0,v0", group, MODE_none); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ return r.resolve_unary (); ++ } ++}; ++SHAPE (unary) ++ ++/* sv_t svfoo_t0[_t1](sv_t) ++ ++ where the target type must be specified explicitly but the source ++ type can be inferred. */ ++struct unary_convert_def : public overloaded_base<1> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "v0,v1", group, MODE_none); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ return r.resolve_unary (r.type_suffix (0).tclass, ++ r.type_suffix (0).element_bits); ++ } ++}; ++SHAPE (unary_convert) ++ ++/* sv_t svfoo_t0[_t1](sv_t, sv_t) ++ ++ This is a version of unary_convert in which the even-indexed ++ elements are passed in as a first parameter, before any governing ++ predicate. */ ++struct unary_convert_narrowt_def : public overloaded_base<1> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "v0,v1", group, MODE_none); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ return r.resolve_unary (r.type_suffix (0).tclass, ++ r.type_suffix (0).element_bits, true); ++ } ++}; ++SHAPE (unary_convert_narrowt) ++ ++/* sv_t svfoo[_t0](sv_t). */ ++struct unary_long_def : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "v0,vh0", group, MODE_none); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ unsigned int i, nargs; ++ type_suffix_index type, result_type; ++ if (!r.check_gp_argument (1, i, nargs) ++ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES ++ || (result_type = long_type_suffix (r, type)) == NUM_TYPE_SUFFIXES) ++ return error_mark_node; ++ ++ if (tree res = r.lookup_form (r.mode_suffix_id, result_type)) ++ return res; ++ ++ return r.report_no_such_form (type); ++ } ++}; ++SHAPE (unary_long) ++ ++/* sv_t svfoo[_n]_t0(_t). */ ++struct unary_n_def : public overloaded_base<1> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ /* The "_n" suffix is optional; the full name has it, but the short ++ name doesn't. */ ++ build_all (b, "v0,s0", group, MODE_n, true); ++ } ++ ++ tree ++ resolve (function_resolver &) const OVERRIDE ++ { ++ /* The short forms just make "_n" implicit, so no resolution is needed. */ ++ gcc_unreachable (); ++ } ++}; ++SHAPE (unary_n) ++ ++/* sv_t svfoo[_t0](sv_t). */ ++typedef unary_narrowb_base<> unary_narrowb_def; ++SHAPE (unary_narrowb) ++ ++/* sv_t svfoo[_t0](sv_t, sv_t). */ ++typedef unary_narrowt_base<> unary_narrowt_def; ++SHAPE (unary_narrowt) ++ ++/* sv_t svfoo[_t0](sv_t). */ ++typedef unary_narrowb_base unary_narrowb_to_uint_def; ++SHAPE (unary_narrowb_to_uint) ++ ++/* sv_t svfoo[_t0](sv_t, sv_t). */ ++typedef unary_narrowt_base unary_narrowt_to_uint_def; ++SHAPE (unary_narrowt_to_uint) ++ ++/* svbool_t svfoo(svbool_t). */ ++struct unary_pred_def : public nonoverloaded_base ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ build_all (b, "v0,v0", group, MODE_none); ++ } ++}; ++SHAPE (unary_pred) ++ ++/* sv_t svfoo[_t0](sv_t) ++ ++ i.e. a version of "unary" in which the returned vector contains ++ signed integers. */ ++struct unary_to_int_def : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "vs0,v0", group, MODE_none); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ return r.resolve_unary (TYPE_signed); ++ } ++}; ++SHAPE (unary_to_int) ++ ++/* sv_t svfoo[_t0](sv_t) ++ ++ i.e. a version of "unary" in which the returned vector contains ++ unsigned integers. */ ++struct unary_to_uint_def : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "vu0,v0", group, MODE_none); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ return r.resolve_unary (TYPE_unsigned); ++ } ++}; ++SHAPE (unary_to_uint) ++ ++/* sv_t svfoo[_t0](sv_t) ++ ++ where always belongs a certain type class, and where ++ therefore uniquely determines . */ ++struct unary_uint_def : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "v0,vu0", group, MODE_none); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ unsigned int i, nargs; ++ type_suffix_index type; ++ if (!r.check_gp_argument (1, i, nargs) ++ || (type = r.infer_unsigned_vector_type (i)) == NUM_TYPE_SUFFIXES) ++ return error_mark_node; ++ ++ /* Search for a valid suffix with the same number of bits as TYPE. */ ++ unsigned int element_bits = type_suffixes[type].element_bits; ++ if (type_suffixes[type].unsigned_p) ++ for (unsigned int j = 0; j < NUM_TYPE_SUFFIXES; ++j) ++ if (type_suffixes[j].element_bits == element_bits) ++ if (tree res = r.lookup_form (r.mode_suffix_id, ++ type_suffix_index (j))) ++ return res; ++ ++ return r.report_no_such_form (type); ++ } ++}; ++SHAPE (unary_uint) ++ ++/* sv_t svfoo[_](sv_t) ++ ++ i.e. a version of "unary" in which the source elements are half the ++ size of the destination elements, but have the same type class. */ ++struct unary_widen_def : public overloaded_base<0> ++{ ++ void ++ build (function_builder &b, const function_group_info &group) const OVERRIDE ++ { ++ b.add_overloaded_functions (group, MODE_none); ++ build_all (b, "v0,vh0", group, MODE_none); ++ } ++ ++ tree ++ resolve (function_resolver &r) const OVERRIDE ++ { ++ unsigned int i, nargs; ++ type_suffix_index type; ++ if (!r.check_gp_argument (1, i, nargs) ++ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES) ++ return error_mark_node; ++ ++ /* There is only a single form for predicates. */ ++ if (type == TYPE_SUFFIX_b) ++ return r.resolve_to (r.mode_suffix_id, type); ++ ++ if (type_suffixes[type].integer_p ++ && type_suffixes[type].element_bits < 64) ++ { ++ type_suffix_index wide_suffix ++ = find_type_suffix (type_suffixes[type].tclass, ++ type_suffixes[type].element_bits * 2); ++ if (tree res = r.lookup_form (r.mode_suffix_id, wide_suffix)) ++ return res; ++ } ++ ++ return r.report_no_such_form (type); ++ } ++}; ++SHAPE (unary_widen) ++ ++} +diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h +new file mode 100644 +index 000000000..b36f50acd +--- /dev/null ++++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h +@@ -0,0 +1,191 @@ ++/* ACLE support for AArch64 SVE (function shapes) ++ Copyright (C) 2018-2019 Free Software Foundation, Inc. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ GCC is distributed in the hope that it will be useful, but ++ WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING3. If not see ++ . */ ++ ++#ifndef GCC_AARCH64_SVE_BUILTINS_SHAPES_H ++#define GCC_AARCH64_SVE_BUILTINS_SHAPES_H ++ ++namespace aarch64_sve ++{ ++ /* The naming convention is: ++ ++ - to use the name of the function if the rules are very specific to ++ a particular function (e.g. svext, for which the range of the ++ final immediate value is in no way generic). ++ ++ - to use names like "unary" etc. if the rules are somewhat generic, ++ especially if there are no ranges involved. ++ ++ When using generic names, the handling of the final vector argument ++ can be modified as follows: ++ ++ - an "_n" suffix changes the argument from a vector to a scalar. ++ ++ - an "_opt_n" suffix says that there are two forms of each function: ++ one in which the argument is the usual vector, and one in which it ++ is replaced by a scalar. ++ ++ - "_int" and "_uint" replace the argument's element type with a ++ signed or unsigned integer of the same width. The suffixes above ++ then indicate whether this final argument is or might be a scalar. ++ ++ - "_int64" and "_uint64" similarly replace the argument's element type ++ with int64_t or uint64_t. ++ ++ - "_wide" replaces the argument's element type with a 64-bit integer ++ of the same signedness. This only makes sense for integer elements. ++ ++ - "_lane" indicates that the argument is indexed by a constant lane ++ number, provided as an immediately-following argument of type uint64_t. ++ ++ Also: ++ ++ - "inherent" means that the function takes no arguments. ++ ++ - "_rotate" means that the final argument is a rotation amount ++ (0, 90, 180 or 270). ++ ++ - "_scalar" indicates that all data arguments are scalars rather ++ than vectors. ++ ++ - in gather/scatter addresses, "sv" stands for "scalar base, ++ vector displacement" while "vs" stands for "vector base, ++ scalar displacement". ++ ++ - "_pred" indicates that the function takes an svbool_t argument ++ that does not act as a governing predicate.. */ ++ namespace shapes ++ { ++ extern const function_shape *const adr_index; ++ extern const function_shape *const adr_offset; ++ extern const function_shape *const binary; ++ extern const function_shape *const binary_int_opt_n; ++ extern const function_shape *const binary_lane; ++ extern const function_shape *const binary_long_lane; ++ extern const function_shape *const binary_long_opt_n; ++ extern const function_shape *const binary_n; ++ extern const function_shape *const binary_narrowb_opt_n; ++ extern const function_shape *const binary_narrowt_opt_n; ++ extern const function_shape *const binary_opt_n; ++ extern const function_shape *const binary_pred; ++ extern const function_shape *const binary_rotate; ++ extern const function_shape *const binary_scalar; ++ extern const function_shape *const binary_to_uint; ++ extern const function_shape *const binary_uint; ++ extern const function_shape *const binary_uint_n; ++ extern const function_shape *const binary_uint_opt_n; ++ extern const function_shape *const binary_uint64_n; ++ extern const function_shape *const binary_uint64_opt_n; ++ extern const function_shape *const binary_wide; ++ extern const function_shape *const binary_wide_opt_n; ++ extern const function_shape *const clast; ++ extern const function_shape *const compare; ++ extern const function_shape *const compare_opt_n; ++ extern const function_shape *const compare_ptr; ++ extern const function_shape *const compare_scalar; ++ extern const function_shape *const compare_wide_opt_n; ++ extern const function_shape *const count_inherent; ++ extern const function_shape *const count_pat; ++ extern const function_shape *const count_pred; ++ extern const function_shape *const count_vector; ++ extern const function_shape *const create; ++ extern const function_shape *const dupq; ++ extern const function_shape *const ext; ++ extern const function_shape *const fold_left; ++ extern const function_shape *const get; ++ extern const function_shape *const inc_dec; ++ extern const function_shape *const inc_dec_pat; ++ extern const function_shape *const inc_dec_pred; ++ extern const function_shape *const inc_dec_pred_scalar; ++ extern const function_shape *const inherent; ++ extern const function_shape *const inherent_b; ++ extern const function_shape *const load; ++ extern const function_shape *const load_ext; ++ extern const function_shape *const load_ext_gather_index; ++ extern const function_shape *const load_ext_gather_index_restricted; ++ extern const function_shape *const load_ext_gather_offset; ++ extern const function_shape *const load_ext_gather_offset_restricted; ++ extern const function_shape *const load_gather_sv; ++ extern const function_shape *const load_gather_sv_restricted; ++ extern const function_shape *const load_gather_vs; ++ extern const function_shape *const load_replicate; ++ extern const function_shape *const mmla; ++ extern const function_shape *const pattern_pred; ++ extern const function_shape *const prefetch; ++ extern const function_shape *const prefetch_gather_index; ++ extern const function_shape *const prefetch_gather_offset; ++ extern const function_shape *const ptest; ++ extern const function_shape *const rdffr; ++ extern const function_shape *const reduction; ++ extern const function_shape *const reduction_wide; ++ extern const function_shape *const set; ++ extern const function_shape *const setffr; ++ extern const function_shape *const shift_left_imm_long; ++ extern const function_shape *const shift_left_imm_to_uint; ++ extern const function_shape *const shift_right_imm; ++ extern const function_shape *const shift_right_imm_narrowb; ++ extern const function_shape *const shift_right_imm_narrowt; ++ extern const function_shape *const shift_right_imm_narrowb_to_uint; ++ extern const function_shape *const shift_right_imm_narrowt_to_uint; ++ extern const function_shape *const store; ++ extern const function_shape *const store_scatter_index; ++ extern const function_shape *const store_scatter_index_restricted; ++ extern const function_shape *const store_scatter_offset; ++ extern const function_shape *const store_scatter_offset_restricted; ++ extern const function_shape *const tbl_tuple; ++ extern const function_shape *const ternary_bfloat; ++ extern const function_shape *const ternary_bfloat_lane; ++ extern const function_shape *const ternary_bfloat_lanex2; ++ extern const function_shape *const ternary_bfloat_opt_n; ++ extern const function_shape *const ternary_intq_uintq_lane; ++ extern const function_shape *const ternary_intq_uintq_opt_n; ++ extern const function_shape *const ternary_lane; ++ extern const function_shape *const ternary_lane_rotate; ++ extern const function_shape *const ternary_long_lane; ++ extern const function_shape *const ternary_long_opt_n; ++ extern const function_shape *const ternary_opt_n; ++ extern const function_shape *const ternary_qq_lane; ++ extern const function_shape *const ternary_qq_lane_rotate; ++ extern const function_shape *const ternary_qq_opt_n; ++ extern const function_shape *const ternary_qq_rotate; ++ extern const function_shape *const ternary_rotate; ++ extern const function_shape *const ternary_shift_left_imm; ++ extern const function_shape *const ternary_shift_right_imm; ++ extern const function_shape *const ternary_uint; ++ extern const function_shape *const ternary_uintq_intq; ++ extern const function_shape *const ternary_uintq_intq_lane; ++ extern const function_shape *const ternary_uintq_intq_opt_n; ++ extern const function_shape *const tmad; ++ extern const function_shape *const unary; ++ extern const function_shape *const unary_convert; ++ extern const function_shape *const unary_convert_narrowt; ++ extern const function_shape *const unary_long; ++ extern const function_shape *const unary_n; ++ extern const function_shape *const unary_narrowb; ++ extern const function_shape *const unary_narrowt; ++ extern const function_shape *const unary_narrowb_to_uint; ++ extern const function_shape *const unary_narrowt_to_uint; ++ extern const function_shape *const unary_pred; ++ extern const function_shape *const unary_to_int; ++ extern const function_shape *const unary_to_uint; ++ extern const function_shape *const unary_uint; ++ extern const function_shape *const unary_widen; ++ } ++} ++ ++#endif +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc +new file mode 100644 +index 000000000..f830d9294 +--- /dev/null ++++ b/gcc/config/aarch64/aarch64-sve-builtins.cc +@@ -0,0 +1,3568 @@ ++/* ACLE support for AArch64 SVE ++ Copyright (C) 2018-2019 Free Software Foundation, Inc. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ GCC is distributed in the hope that it will be useful, but ++ WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING3. If not see ++ . */ ++ ++#define IN_TARGET_CODE 1 ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "tm.h" ++#include "tree.h" ++#include "rtl.h" ++#include "tm_p.h" ++#include "memmodel.h" ++#include "insn-codes.h" ++#include "optabs.h" ++#include "recog.h" ++#include "diagnostic.h" ++#include "expr.h" ++#include "basic-block.h" ++#include "function.h" ++#include "fold-const.h" ++#include "gimple.h" ++#include "gimple-iterator.h" ++#include "gimplify.h" ++#include "explow.h" ++#include "emit-rtl.h" ++#include "tree-vector-builder.h" ++#include "stor-layout.h" ++#include "regs.h" ++#include "alias.h" ++#include "gimple-fold.h" ++#include "langhooks.h" ++#include "stringpool.h" ++#include "aarch64-sve-builtins.h" ++#include "aarch64-sve-builtins-base.h" ++#include "aarch64-sve-builtins-shapes.h" ++ ++namespace aarch64_sve { ++ ++/* Static information about each single-predicate or single-vector ++ ABI and ACLE type. */ ++struct vector_type_info ++{ ++ /* The name of the type as declared by arm_sve.h. */ ++ const char *acle_name; ++ ++ /* The name of the type specified in AAPCS64. The type is always ++ available under this name, even when arm_sve.h isn't included. */ ++ const char *abi_name; ++ ++ /* The C++ mangling of ABI_NAME. */ ++ const char *mangled_name; ++}; ++ ++/* Describes a function decl. */ ++class GTY(()) registered_function ++{ ++public: ++ /* The ACLE function that the decl represents. */ ++ function_instance instance GTY ((skip)); ++ ++ /* The decl itself. */ ++ tree decl; ++ ++ /* The architecture extensions that the function requires, as a set of ++ AARCH64_FL_* flags. */ ++ uint64_t required_extensions; ++ ++ /* True if the decl represents an overloaded function that needs to be ++ resolved by function_resolver. */ ++ bool overloaded_p; ++}; ++ ++/* Hash traits for registered_function. */ ++struct registered_function_hasher : nofree_ptr_hash ++{ ++ typedef function_instance compare_type; ++ ++ static hashval_t hash (value_type); ++ static bool equal (value_type, const compare_type &); ++}; ++ ++/* Information about each single-predicate or single-vector type. */ ++static CONSTEXPR const vector_type_info vector_types[] = { ++#define DEF_SVE_TYPE(ACLE_NAME, NCHARS, ABI_NAME, SCALAR_TYPE) \ ++ { #ACLE_NAME, #ABI_NAME, #NCHARS #ABI_NAME }, ++#include "aarch64-sve-builtins.def" ++}; ++ ++/* The function name suffix associated with each predication type. */ ++static const char *const pred_suffixes[NUM_PREDS + 1] = { ++ "", ++ "", ++ "_m", ++ "_x", ++ "_z", ++ "" ++}; ++ ++/* Static information about each mode_suffix_index. */ ++CONSTEXPR const mode_suffix_info mode_suffixes[] = { ++#define VECTOR_TYPE_none NUM_VECTOR_TYPES ++#define DEF_SVE_MODE(NAME, BASE, DISPLACEMENT, UNITS) \ ++ { "_" #NAME, VECTOR_TYPE_##BASE, VECTOR_TYPE_##DISPLACEMENT, UNITS_##UNITS }, ++#include "aarch64-sve-builtins.def" ++#undef VECTOR_TYPE_none ++ { "", NUM_VECTOR_TYPES, NUM_VECTOR_TYPES, UNITS_none } ++}; ++ ++/* Static information about each type_suffix_index. */ ++CONSTEXPR const type_suffix_info type_suffixes[NUM_TYPE_SUFFIXES + 1] = { ++#define DEF_SVE_TYPE_SUFFIX(NAME, ACLE_TYPE, CLASS, BITS, MODE) \ ++ { "_" #NAME, \ ++ VECTOR_TYPE_##ACLE_TYPE, \ ++ TYPE_##CLASS, \ ++ BITS, \ ++ BITS / BITS_PER_UNIT, \ ++ TYPE_##CLASS == TYPE_signed || TYPE_##CLASS == TYPE_unsigned, \ ++ TYPE_##CLASS == TYPE_unsigned, \ ++ TYPE_##CLASS == TYPE_float, \ ++ TYPE_##CLASS == TYPE_bool, \ ++ 0, \ ++ MODE }, ++#include "aarch64-sve-builtins.def" ++ { "", NUM_VECTOR_TYPES, TYPE_bool, 0, 0, false, false, false, false, ++ 0, VOIDmode } ++}; ++ ++/* Define a TYPES_ macro for each combination of type ++ suffixes that an ACLE function can have, where is the ++ name used in DEF_SVE_FUNCTION entries. ++ ++ Use S (T) for single type suffix T and D (T1, T2) for a pair of type ++ suffixes T1 and T2. Use commas to separate the suffixes. ++ ++ Although the order shouldn't matter, the convention is to sort the ++ suffixes lexicographically after dividing suffixes into a type ++ class ("b", "f", etc.) and a numerical bit count. */ ++ ++/* _b8 _b16 _b32 _b64. */ ++#define TYPES_all_pred(S, D) \ ++ S (b8), S (b16), S (b32), S (b64) ++ ++/* _f16 _f32 _f64. */ ++#define TYPES_all_float(S, D) \ ++ S (f16), S (f32), S (f64) ++ ++/* _s8 _s16 _s32 _s64. */ ++#define TYPES_all_signed(S, D) \ ++ S (s8), S (s16), S (s32), S (s64) ++ ++/* _f16 _f32 _f64 ++ _s8 _s16 _s32 _s64. */ ++#define TYPES_all_float_and_signed(S, D) \ ++ TYPES_all_float (S, D), TYPES_all_signed (S, D) ++ ++/* _u8 _u16 _u32 _u64. */ ++#define TYPES_all_unsigned(S, D) \ ++ S (u8), S (u16), S (u32), S (u64) ++ ++/* _s8 _s16 _s32 _s64 ++ _u8 _u16 _u32 _u64. */ ++#define TYPES_all_integer(S, D) \ ++ TYPES_all_signed (S, D), TYPES_all_unsigned (S, D) ++ ++/* _f16 _f32 _f64 ++ _s8 _s16 _s32 _s64 ++ _u8 _u16 _u32 _u64. */ ++#define TYPES_all_arith(S, D) \ ++ TYPES_all_float (S, D), TYPES_all_integer (S, D) ++ ++/* _bf16 ++ _f16 _f32 _f64 ++ _s8 _s16 _s32 _s64 ++ _u8 _u16 _u32 _u64. */ ++#define TYPES_all_data(S, D) \ ++ S (bf16), TYPES_all_arith (S, D) ++ ++/* _b only. */ ++#define TYPES_b(S, D) \ ++ S (b) ++ ++/* _u8. */ ++#define TYPES_b_unsigned(S, D) \ ++ S (u8) ++ ++/* _s8 ++ _u8. */ ++#define TYPES_b_integer(S, D) \ ++ S (s8), TYPES_b_unsigned (S, D) ++ ++/* _s8 _s16 ++ _u8 _u16. */ ++#define TYPES_bh_integer(S, D) \ ++ S (s8), S (s16), S (u8), S (u16) ++ ++/* _u8 _u32. */ ++#define TYPES_bs_unsigned(S, D) \ ++ S (u8), S (u32) ++ ++/* _s8 _s16 _s32. */ ++#define TYPES_bhs_signed(S, D) \ ++ S (s8), S (s16), S (s32) ++ ++/* _u8 _u16 _u32. */ ++#define TYPES_bhs_unsigned(S, D) \ ++ S (u8), S (u16), S (u32) ++ ++/* _s8 _s16 _s32 ++ _u8 _u16 _u32. */ ++#define TYPES_bhs_integer(S, D) \ ++ TYPES_bhs_signed (S, D), TYPES_bhs_unsigned (S, D) ++ ++/* _s16 ++ _u16. */ ++#define TYPES_h_integer(S, D) \ ++ S (s16), S (u16) ++ ++/* _s16 _s32. */ ++#define TYPES_hs_signed(S, D) \ ++ S (s16), S (s32) ++ ++/* _s16 _s32 ++ _u16 _u32. */ ++#define TYPES_hs_integer(S, D) \ ++ TYPES_hs_signed (S, D), S (u16), S (u32) ++ ++/* _f16 _f32. */ ++#define TYPES_hs_float(S, D) \ ++ S (f16), S (f32) ++ ++/* _u16 _u64. */ ++#define TYPES_hd_unsigned(S, D) \ ++ S (u16), S (u64) ++ ++/* _s16 _s32 _s64. */ ++#define TYPES_hsd_signed(S, D) \ ++ S (s16), S (s32), S (s64) ++ ++/* _s16 _s32 _s64 ++ _u16 _u32 _u64. */ ++#define TYPES_hsd_integer(S, D) \ ++ TYPES_hsd_signed (S, D), S (u16), S (u32), S (u64) ++ ++/* _f32. */ ++#define TYPES_s_float(S, D) \ ++ S (f32) ++ ++/* _f32 ++ _s16 _s32 _s64 ++ _u16 _u32 _u64. */ ++#define TYPES_s_float_hsd_integer(S, D) \ ++ TYPES_s_float (S, D), TYPES_hsd_integer (S, D) ++ ++/* _f32 ++ _s32 _s64 ++ _u32 _u64. */ ++#define TYPES_s_float_sd_integer(S, D) \ ++ TYPES_s_float (S, D), TYPES_sd_integer (S, D) ++ ++/* _s32. */ ++#define TYPES_s_signed(S, D) \ ++ S (s32) ++ ++/* _u32. */ ++#define TYPES_s_unsigned(S, D) \ ++ S (u32) ++ ++/* _s32 _u32. */ ++#define TYPES_s_integer(S, D) \ ++ TYPES_s_signed (S, D), TYPES_s_unsigned (S, D) ++ ++/* _s32 _s64. */ ++#define TYPES_sd_signed(S, D) \ ++ S (s32), S (s64) ++ ++/* _u32 _u64. */ ++#define TYPES_sd_unsigned(S, D) \ ++ S (u32), S (u64) ++ ++/* _s32 _s64 ++ _u32 _u64. */ ++#define TYPES_sd_integer(S, D) \ ++ TYPES_sd_signed (S, D), TYPES_sd_unsigned (S, D) ++ ++/* _f32 _f64 ++ _s32 _s64 ++ _u32 _u64. */ ++#define TYPES_sd_data(S, D) \ ++ S (f32), S (f64), TYPES_sd_integer (S, D) ++ ++/* _f16 _f32 _f64 ++ _s32 _s64 ++ _u32 _u64. */ ++#define TYPES_all_float_and_sd_integer(S, D) \ ++ TYPES_all_float (S, D), TYPES_sd_integer (S, D) ++ ++/* _f64. */ ++#define TYPES_d_float(S, D) \ ++ S (f64) ++ ++/* _u64. */ ++#define TYPES_d_unsigned(S, D) \ ++ S (u64) ++ ++/* _s64 ++ _u64. */ ++#define TYPES_d_integer(S, D) \ ++ S (s64), TYPES_d_unsigned (S, D) ++ ++/* _f64 ++ _s64 ++ _u64. */ ++#define TYPES_d_data(S, D) \ ++ TYPES_d_float (S, D), TYPES_d_integer (S, D) ++ ++/* All the type combinations allowed by svcvt. */ ++#define TYPES_cvt(S, D) \ ++ D (f16, f32), D (f16, f64), \ ++ D (f16, s16), D (f16, s32), D (f16, s64), \ ++ D (f16, u16), D (f16, u32), D (f16, u64), \ ++ \ ++ D (f32, f16), D (f32, f64), \ ++ D (f32, s32), D (f32, s64), \ ++ D (f32, u32), D (f32, u64), \ ++ \ ++ D (f64, f16), D (f64, f32), \ ++ D (f64, s32), D (f64, s64), \ ++ D (f64, u32), D (f64, u64), \ ++ \ ++ D (s16, f16), \ ++ D (s32, f16), D (s32, f32), D (s32, f64), \ ++ D (s64, f16), D (s64, f32), D (s64, f64), \ ++ \ ++ D (u16, f16), \ ++ D (u32, f16), D (u32, f32), D (u32, f64), \ ++ D (u64, f16), D (u64, f32), D (u64, f64) ++ ++/* _bf16_f32. */ ++#define TYPES_cvt_bfloat(S, D) \ ++ D (bf16, f32) ++ ++/* _f32_f16 ++ _f64_f32. */ ++#define TYPES_cvt_long(S, D) \ ++ D (f32, f16), D (f64, f32) ++ ++/* _f16_f32. */ ++#define TYPES_cvt_narrow_s(S, D) \ ++ D (f32, f64) ++ ++/* _f16_f32 ++ _f32_f64. */ ++#define TYPES_cvt_narrow(S, D) \ ++ D (f16, f32), TYPES_cvt_narrow_s (S, D) ++ ++/* { _s32 _s64 } x { _b8 _b16 _b32 _b64 } ++ { _u32 _u64 }. */ ++#define TYPES_inc_dec_n1(D, A) \ ++ D (A, b8), D (A, b16), D (A, b32), D (A, b64) ++#define TYPES_inc_dec_n(S, D) \ ++ TYPES_inc_dec_n1 (D, s32), \ ++ TYPES_inc_dec_n1 (D, s64), \ ++ TYPES_inc_dec_n1 (D, u32), \ ++ TYPES_inc_dec_n1 (D, u64) ++ ++/* { _bf16 } { _bf16 } ++ { _f16 _f32 _f64 } { _f16 _f32 _f64 } ++ { _s8 _s16 _s32 _s64 } x { _s8 _s16 _s32 _s64 } ++ { _u8 _u16 _u32 _u64 } { _u8 _u16 _u32 _u64 }. */ ++#define TYPES_reinterpret1(D, A) \ ++ D (A, bf16), \ ++ D (A, f16), D (A, f32), D (A, f64), \ ++ D (A, s8), D (A, s16), D (A, s32), D (A, s64), \ ++ D (A, u8), D (A, u16), D (A, u32), D (A, u64) ++#define TYPES_reinterpret(S, D) \ ++ TYPES_reinterpret1 (D, bf16), \ ++ TYPES_reinterpret1 (D, f16), \ ++ TYPES_reinterpret1 (D, f32), \ ++ TYPES_reinterpret1 (D, f64), \ ++ TYPES_reinterpret1 (D, s8), \ ++ TYPES_reinterpret1 (D, s16), \ ++ TYPES_reinterpret1 (D, s32), \ ++ TYPES_reinterpret1 (D, s64), \ ++ TYPES_reinterpret1 (D, u8), \ ++ TYPES_reinterpret1 (D, u16), \ ++ TYPES_reinterpret1 (D, u32), \ ++ TYPES_reinterpret1 (D, u64) ++ ++/* { _b8 _b16 _b32 _b64 } x { _s32 _s64 } ++ { _u32 _u64 } */ ++#define TYPES_while1(D, bn) \ ++ D (bn, s32), D (bn, s64), D (bn, u32), D (bn, u64) ++#define TYPES_while(S, D) \ ++ TYPES_while1 (D, b8), \ ++ TYPES_while1 (D, b16), \ ++ TYPES_while1 (D, b32), \ ++ TYPES_while1 (D, b64) ++ ++/* Describe a pair of type suffixes in which only the first is used. */ ++#define DEF_VECTOR_TYPE(X) { TYPE_SUFFIX_ ## X, NUM_TYPE_SUFFIXES } ++ ++/* Describe a pair of type suffixes in which both are used. */ ++#define DEF_DOUBLE_TYPE(X, Y) { TYPE_SUFFIX_ ## X, TYPE_SUFFIX_ ## Y } ++ ++/* Create an array that can be used in aarch64-sve-builtins.def to ++ select the type suffixes in TYPES_. */ ++#define DEF_SVE_TYPES_ARRAY(NAME) \ ++ static const type_suffix_pair types_##NAME[] = { \ ++ TYPES_##NAME (DEF_VECTOR_TYPE, DEF_DOUBLE_TYPE), \ ++ { NUM_TYPE_SUFFIXES, NUM_TYPE_SUFFIXES } \ ++ } ++ ++/* For functions that don't take any type suffixes. */ ++static const type_suffix_pair types_none[] = { ++ { NUM_TYPE_SUFFIXES, NUM_TYPE_SUFFIXES }, ++ { NUM_TYPE_SUFFIXES, NUM_TYPE_SUFFIXES } ++}; ++ ++/* Create an array for each TYPES_ macro above. */ ++DEF_SVE_TYPES_ARRAY (all_pred); ++DEF_SVE_TYPES_ARRAY (all_float); ++DEF_SVE_TYPES_ARRAY (all_signed); ++DEF_SVE_TYPES_ARRAY (all_float_and_signed); ++DEF_SVE_TYPES_ARRAY (all_unsigned); ++DEF_SVE_TYPES_ARRAY (all_integer); ++DEF_SVE_TYPES_ARRAY (all_arith); ++DEF_SVE_TYPES_ARRAY (all_data); ++DEF_SVE_TYPES_ARRAY (b); ++DEF_SVE_TYPES_ARRAY (b_unsigned); ++DEF_SVE_TYPES_ARRAY (b_integer); ++DEF_SVE_TYPES_ARRAY (bh_integer); ++DEF_SVE_TYPES_ARRAY (bs_unsigned); ++DEF_SVE_TYPES_ARRAY (bhs_signed); ++DEF_SVE_TYPES_ARRAY (bhs_unsigned); ++DEF_SVE_TYPES_ARRAY (bhs_integer); ++DEF_SVE_TYPES_ARRAY (h_integer); ++DEF_SVE_TYPES_ARRAY (hs_signed); ++DEF_SVE_TYPES_ARRAY (hs_integer); ++DEF_SVE_TYPES_ARRAY (hs_float); ++DEF_SVE_TYPES_ARRAY (hd_unsigned); ++DEF_SVE_TYPES_ARRAY (hsd_signed); ++DEF_SVE_TYPES_ARRAY (hsd_integer); ++DEF_SVE_TYPES_ARRAY (s_float); ++DEF_SVE_TYPES_ARRAY (s_float_hsd_integer); ++DEF_SVE_TYPES_ARRAY (s_float_sd_integer); ++DEF_SVE_TYPES_ARRAY (s_signed); ++DEF_SVE_TYPES_ARRAY (s_unsigned); ++DEF_SVE_TYPES_ARRAY (s_integer); ++DEF_SVE_TYPES_ARRAY (sd_signed); ++DEF_SVE_TYPES_ARRAY (sd_unsigned); ++DEF_SVE_TYPES_ARRAY (sd_integer); ++DEF_SVE_TYPES_ARRAY (sd_data); ++DEF_SVE_TYPES_ARRAY (all_float_and_sd_integer); ++DEF_SVE_TYPES_ARRAY (d_float); ++DEF_SVE_TYPES_ARRAY (d_unsigned); ++DEF_SVE_TYPES_ARRAY (d_integer); ++DEF_SVE_TYPES_ARRAY (d_data); ++DEF_SVE_TYPES_ARRAY (cvt); ++DEF_SVE_TYPES_ARRAY (cvt_bfloat); ++DEF_SVE_TYPES_ARRAY (cvt_long); ++DEF_SVE_TYPES_ARRAY (cvt_narrow_s); ++DEF_SVE_TYPES_ARRAY (cvt_narrow); ++DEF_SVE_TYPES_ARRAY (inc_dec_n); ++DEF_SVE_TYPES_ARRAY (reinterpret); ++DEF_SVE_TYPES_ARRAY (while); ++ ++/* Used by functions that have no governing predicate. */ ++static const predication_index preds_none[] = { PRED_none, NUM_PREDS }; ++ ++/* Used by functions that have a governing predicate but do not have an ++ explicit suffix. */ ++static const predication_index preds_implicit[] = { PRED_implicit, NUM_PREDS }; ++ ++/* Used by functions that allow merging and "don't care" predication, ++ but are not suitable for predicated MOVPRFX. */ ++static const predication_index preds_mx[] = { ++ PRED_m, PRED_x, NUM_PREDS ++}; ++ ++/* Used by functions that allow merging, zeroing and "don't care" ++ predication. */ ++static const predication_index preds_mxz[] = { ++ PRED_m, PRED_x, PRED_z, NUM_PREDS ++}; ++ ++/* Used by functions that have the mxz predicated forms above, and in addition ++ have an unpredicated form. */ ++static const predication_index preds_mxz_or_none[] = { ++ PRED_m, PRED_x, PRED_z, PRED_none, NUM_PREDS ++}; ++ ++/* Used by functions that allow merging and zeroing predication but have ++ no "_x" form. */ ++static const predication_index preds_mz[] = { PRED_m, PRED_z, NUM_PREDS }; ++ ++/* Used by functions that have an unpredicated form and a _z predicated ++ form. */ ++static const predication_index preds_z_or_none[] = { ++ PRED_z, PRED_none, NUM_PREDS ++}; ++ ++/* Used by (mostly predicate) functions that only support "_z" predication. */ ++static const predication_index preds_z[] = { PRED_z, NUM_PREDS }; ++ ++/* A list of all SVE ACLE functions. */ ++static CONSTEXPR const function_group_info function_groups[] = { ++#define DEF_SVE_FUNCTION(NAME, SHAPE, TYPES, PREDS) \ ++ { #NAME, &functions::NAME, &shapes::SHAPE, types_##TYPES, preds_##PREDS, \ ++ REQUIRED_EXTENSIONS | AARCH64_FL_SVE }, ++#include "aarch64-sve-builtins.def" ++}; ++ ++/* The scalar type associated with each vector type. */ ++GTY(()) tree scalar_types[NUM_VECTOR_TYPES]; ++ ++/* The single-predicate and single-vector types, with their built-in ++ "__SV..._t" name. Allow an index of NUM_VECTOR_TYPES, which always ++ yields a null tree. */ ++static GTY(()) tree abi_vector_types[NUM_VECTOR_TYPES + 1]; ++ ++/* Same, but with the arm_sve.h "sv..._t" name. */ ++GTY(()) tree acle_vector_types[MAX_TUPLE_SIZE][NUM_VECTOR_TYPES + 1]; ++ ++/* The svpattern enum type. */ ++GTY(()) tree acle_svpattern; ++ ++/* The svprfop enum type. */ ++GTY(()) tree acle_svprfop; ++ ++/* The list of all registered function decls, indexed by code. */ ++static GTY(()) vec *registered_functions; ++ ++/* All registered function decls, hashed on the function_instance ++ that they implement. This is used for looking up implementations of ++ overloaded functions. */ ++static hash_table *function_table; ++ ++/* True if we've already complained about attempts to use functions ++ when the required extension is disabled. */ ++static bool reported_missing_extension_p; ++ ++/* If TYPE is an ACLE vector type, return the associated vector_type, ++ otherwise return NUM_VECTOR_TYPES. */ ++static vector_type_index ++find_vector_type (const_tree type) ++{ ++ /* A linear search should be OK here, since the code isn't hot and ++ the number of types is only small. */ ++ type = TYPE_MAIN_VARIANT (type); ++ for (unsigned int i = 0; i < NUM_VECTOR_TYPES; ++i) ++ if (type == abi_vector_types[i]) ++ return vector_type_index (i); ++ return NUM_VECTOR_TYPES; ++} ++ ++/* If TYPE is a valid SVE element type, return the corresponding type ++ suffix, otherwise return NUM_TYPE_SUFFIXES. */ ++static type_suffix_index ++find_type_suffix_for_scalar_type (const_tree type) ++{ ++ /* A linear search should be OK here, since the code isn't hot and ++ the number of types is only small. */ ++ type = TYPE_MAIN_VARIANT (type); ++ for (unsigned int suffix_i = 0; suffix_i < NUM_TYPE_SUFFIXES; ++suffix_i) ++ if (!type_suffixes[suffix_i].bool_p) ++ { ++ vector_type_index vector_i = type_suffixes[suffix_i].vector_type; ++ if (type == TYPE_MAIN_VARIANT (scalar_types[vector_i])) ++ return type_suffix_index (suffix_i); ++ } ++ return NUM_TYPE_SUFFIXES; ++} ++ ++/* Report an error against LOCATION that the user has tried to use ++ function FNDECL when extension EXTENSION is disabled. */ ++static void ++report_missing_extension (location_t location, tree fndecl, ++ const char *extension) ++{ ++ /* Avoid reporting a slew of messages for a single oversight. */ ++ if (reported_missing_extension_p) ++ return; ++ ++ error_at (location, "ACLE function %qD requires ISA extension %qs", ++ fndecl, extension); ++ inform (location, "you can enable %qs using the command-line" ++ " option %<-march%>, or by using the %" ++ " attribute or pragma", extension); ++ reported_missing_extension_p = true; ++} ++ ++/* Check whether all the AARCH64_FL_* values in REQUIRED_EXTENSIONS are ++ enabled, given that those extensions are required for function FNDECL. ++ Report an error against LOCATION if not. */ ++static bool ++check_required_extensions (location_t location, tree fndecl, ++ uint64_t required_extensions) ++{ ++ uint64_t missing_extensions = required_extensions & ~aarch64_isa_flags; ++ if (missing_extensions == 0) ++ return true; ++ ++ static const struct { uint64_t flag; const char *name; } extensions[] = { ++#define AARCH64_OPT_EXTENSION(EXT_NAME, FLAG_CANONICAL, FLAGS_ON, FLAGS_OFF, \ ++ SYNTHETIC, FEATURE_STRING) \ ++ { FLAG_CANONICAL, EXT_NAME }, ++#include "aarch64-option-extensions.def" ++ }; ++ ++ for (unsigned int i = 0; i < ARRAY_SIZE (extensions); ++i) ++ if (missing_extensions & extensions[i].flag) ++ { ++ report_missing_extension (location, fndecl, extensions[i].name); ++ return false; ++ } ++ gcc_unreachable (); ++} ++ ++/* Report that LOCATION has a call to FNDECL in which argument ARGNO ++ was not an integer constant expression. ARGNO counts from zero. */ ++static void ++report_non_ice (location_t location, tree fndecl, unsigned int argno) ++{ ++ error_at (location, "argument %d of %qE must be an integer constant" ++ " expression", argno + 1, fndecl); ++} ++ ++/* Report that LOCATION has a call to FNDECL in which argument ARGNO has ++ the value ACTUAL, whereas the function requires a value in the range ++ [MIN, MAX]. ARGNO counts from zero. */ ++static void ++report_out_of_range (location_t location, tree fndecl, unsigned int argno, ++ HOST_WIDE_INT actual, HOST_WIDE_INT min, ++ HOST_WIDE_INT max) ++{ ++ error_at (location, "passing %wd to argument %d of %qE, which expects" ++ " a value in the range [%wd, %wd]", actual, argno + 1, fndecl, ++ min, max); ++} ++ ++/* Report that LOCATION has a call to FNDECL in which argument ARGNO has ++ the value ACTUAL, whereas the function requires either VALUE0 or ++ VALUE1. ARGNO counts from zero. */ ++static void ++report_neither_nor (location_t location, tree fndecl, unsigned int argno, ++ HOST_WIDE_INT actual, HOST_WIDE_INT value0, ++ HOST_WIDE_INT value1) ++{ ++ error_at (location, "passing %wd to argument %d of %qE, which expects" ++ " either %wd or %wd", actual, argno + 1, fndecl, value0, value1); ++} ++ ++/* Report that LOCATION has a call to FNDECL in which argument ARGNO has ++ the value ACTUAL, whereas the function requires one of VALUE0..3. ++ ARGNO counts from zero. */ ++static void ++report_not_one_of (location_t location, tree fndecl, unsigned int argno, ++ HOST_WIDE_INT actual, HOST_WIDE_INT value0, ++ HOST_WIDE_INT value1, HOST_WIDE_INT value2, ++ HOST_WIDE_INT value3) ++{ ++ error_at (location, "passing %wd to argument %d of %qE, which expects" ++ " %wd, %wd, %wd or %wd", actual, argno + 1, fndecl, value0, value1, ++ value2, value3); ++} ++ ++/* Report that LOCATION has a call to FNDECL in which argument ARGNO has ++ the value ACTUAL, whereas the function requires a valid value of ++ enum type ENUMTYPE. ARGNO counts from zero. */ ++static void ++report_not_enum (location_t location, tree fndecl, unsigned int argno, ++ HOST_WIDE_INT actual, tree enumtype) ++{ ++ error_at (location, "passing %wd to argument %d of %qE, which expects" ++ " a valid %qT value", actual, argno + 1, fndecl, enumtype); ++} ++ ++/* Return a hash code for a function_instance. */ ++hashval_t ++function_instance::hash () const ++{ ++ inchash::hash h; ++ /* BASE uniquely determines BASE_NAME, so we don't need to hash both. */ ++ h.add_ptr (base); ++ h.add_ptr (shape); ++ h.add_int (mode_suffix_id); ++ h.add_int (type_suffix_ids[0]); ++ h.add_int (type_suffix_ids[1]); ++ h.add_int (pred); ++ return h.end (); ++} ++ ++/* Return a set of CP_* flags that describe what the function could do, ++ taking the command-line flags into account. */ ++unsigned int ++function_instance::call_properties () const ++{ ++ unsigned int flags = base->call_properties (*this); ++ ++ /* -fno-trapping-math means that we can assume any FP exceptions ++ are not user-visible. */ ++ if (!flag_trapping_math) ++ flags &= ~CP_RAISE_FP_EXCEPTIONS; ++ ++ return flags; ++} ++ ++/* Return true if calls to the function could read some form of ++ global state. */ ++bool ++function_instance::reads_global_state_p () const ++{ ++ unsigned int flags = call_properties (); ++ ++ /* Preserve any dependence on rounding mode, flush to zero mode, etc. ++ There is currently no way of turning this off; in particular, ++ -fno-rounding-math (which is the default) means that we should make ++ the usual assumptions about rounding mode, which for intrinsics means ++ acting as the instructions do. */ ++ if (flags & CP_READ_FPCR) ++ return true; ++ ++ /* Handle direct reads of global state. */ ++ return flags & (CP_READ_MEMORY | CP_READ_FFR); ++} ++ ++/* Return true if calls to the function could modify some form of ++ global state. */ ++bool ++function_instance::modifies_global_state_p () const ++{ ++ unsigned int flags = call_properties (); ++ ++ /* Preserve any exception state written back to the FPCR, ++ unless -fno-trapping-math says this is unnecessary. */ ++ if (flags & CP_RAISE_FP_EXCEPTIONS) ++ return true; ++ ++ /* Treat prefetches as modifying global state, since that's the ++ only means we have of keeping them in their correct position. */ ++ if (flags & CP_PREFETCH_MEMORY) ++ return true; ++ ++ /* Handle direct modifications of global state. */ ++ return flags & (CP_WRITE_MEMORY | CP_WRITE_FFR); ++} ++ ++/* Return true if calls to the function could raise a signal. */ ++bool ++function_instance::could_trap_p () const ++{ ++ unsigned int flags = call_properties (); ++ ++ /* Handle functions that could raise SIGFPE. */ ++ if (flags & CP_RAISE_FP_EXCEPTIONS) ++ return true; ++ ++ /* Handle functions that could raise SIGBUS or SIGSEGV. */ ++ if (flags & (CP_READ_MEMORY | CP_WRITE_MEMORY)) ++ return true; ++ ++ return false; ++} ++ ++inline hashval_t ++registered_function_hasher::hash (value_type value) ++{ ++ return value->instance.hash (); ++} ++ ++inline bool ++registered_function_hasher::equal (value_type value, const compare_type &key) ++{ ++ return value->instance == key; ++} ++ ++sve_switcher::sve_switcher () ++ : m_old_isa_flags (aarch64_isa_flags) ++{ ++ /* Changing the ISA flags and have_regs_of_mode should be enough here. ++ We shouldn't need to pay the compile-time cost of a full target ++ switch. */ ++ aarch64_isa_flags = (AARCH64_FL_FP | AARCH64_FL_SIMD | AARCH64_FL_F16 ++ | AARCH64_FL_SVE); ++ ++ memcpy (m_old_have_regs_of_mode, have_regs_of_mode, ++ sizeof (have_regs_of_mode)); ++ for (int i = 0; i < NUM_MACHINE_MODES; ++i) ++ if (aarch64_sve_mode_p ((machine_mode) i)) ++ have_regs_of_mode[i] = true; ++} ++ ++sve_switcher::~sve_switcher () ++{ ++ memcpy (have_regs_of_mode, m_old_have_regs_of_mode, ++ sizeof (have_regs_of_mode)); ++ aarch64_isa_flags = m_old_isa_flags; ++} ++ ++function_builder::function_builder () ++{ ++ m_overload_type = build_function_type (void_type_node, void_list_node); ++ m_direct_overloads = lang_GNU_CXX (); ++ gcc_obstack_init (&m_string_obstack); ++} ++ ++function_builder::~function_builder () ++{ ++ obstack_free (&m_string_obstack, NULL); ++} ++ ++/* Add NAME to the end of the function name being built. */ ++void ++function_builder::append_name (const char *name) ++{ ++ obstack_grow (&m_string_obstack, name, strlen (name)); ++} ++ ++/* Zero-terminate and complete the function name being built. */ ++char * ++function_builder::finish_name () ++{ ++ obstack_1grow (&m_string_obstack, 0); ++ return (char *) obstack_finish (&m_string_obstack); ++} ++ ++/* Return the overloaded or full function name for INSTANCE; OVERLOADED_P ++ selects which. Allocate the string on m_string_obstack; the caller ++ must use obstack_free to free it after use. */ ++char * ++function_builder::get_name (const function_instance &instance, ++ bool overloaded_p) ++{ ++ append_name (instance.base_name); ++ if (overloaded_p) ++ switch (instance.displacement_units ()) ++ { ++ case UNITS_none: ++ break; ++ ++ case UNITS_bytes: ++ append_name ("_offset"); ++ break; ++ ++ case UNITS_elements: ++ append_name ("_index"); ++ break; ++ ++ case UNITS_vectors: ++ append_name ("_vnum"); ++ break; ++ } ++ else ++ append_name (instance.mode_suffix ().string); ++ for (unsigned int i = 0; i < 2; ++i) ++ if (!overloaded_p || instance.shape->explicit_type_suffix_p (i)) ++ append_name (instance.type_suffix (i).string); ++ append_name (pred_suffixes[instance.pred]); ++ return finish_name (); ++} ++ ++/* Add attribute NAME to ATTRS. */ ++static tree ++add_attribute (const char *name, tree attrs) ++{ ++ return tree_cons (get_identifier (name), NULL_TREE, attrs); ++} ++ ++/* Return the appropriate function attributes for INSTANCE. */ ++tree ++function_builder::get_attributes (const function_instance &instance) ++{ ++ tree attrs = NULL_TREE; ++ ++ if (!instance.modifies_global_state_p ()) ++ { ++ if (instance.reads_global_state_p ()) ++ attrs = add_attribute ("pure", attrs); ++ else ++ attrs = add_attribute ("const", attrs); ++ } ++ ++ if (!flag_non_call_exceptions || !instance.could_trap_p ()) ++ attrs = add_attribute ("nothrow", attrs); ++ ++ return add_attribute ("leaf", attrs); ++} ++ ++/* Add a function called NAME with type FNTYPE and attributes ATTRS. ++ INSTANCE describes what the function does and OVERLOADED_P indicates ++ whether it is overloaded. REQUIRED_EXTENSIONS are the set of ++ architecture extensions that the function requires. */ ++registered_function & ++function_builder::add_function (const function_instance &instance, ++ const char *name, tree fntype, tree attrs, ++ uint64_t required_extensions, ++ bool overloaded_p) ++{ ++ unsigned int code = vec_safe_length (registered_functions); ++ code = (code << AARCH64_BUILTIN_SHIFT) | AARCH64_BUILTIN_SVE; ++ tree decl = simulate_builtin_function_decl (input_location, name, fntype, ++ code, NULL, attrs); ++ ++ registered_function &rfn = *ggc_alloc (); ++ rfn.instance = instance; ++ rfn.decl = decl; ++ rfn.required_extensions = required_extensions; ++ rfn.overloaded_p = overloaded_p; ++ vec_safe_push (registered_functions, &rfn); ++ ++ return rfn; ++} ++ ++/* Add a built-in function for INSTANCE, with the argument types given ++ by ARGUMENT_TYPES and the return type given by RETURN_TYPE. ++ REQUIRED_EXTENSIONS are the set of architecture extensions that the ++ function requires. FORCE_DIRECT_OVERLOADS is true if there is a ++ one-to-one mapping between "short" and "full" names, and if standard ++ overload resolution therefore isn't necessary. */ ++void ++function_builder::add_unique_function (const function_instance &instance, ++ tree return_type, ++ vec &argument_types, ++ uint64_t required_extensions, ++ bool force_direct_overloads) ++{ ++ /* Add the function under its full (unique) name. */ ++ char *name = get_name (instance, false); ++ tree fntype = build_function_type_array (return_type, ++ argument_types.length (), ++ argument_types.address ()); ++ tree attrs = get_attributes (instance); ++ registered_function &rfn = add_function (instance, name, fntype, attrs, ++ required_extensions, false); ++ ++ /* Enter the function into the hash table. */ ++ hashval_t hash = instance.hash (); ++ registered_function **rfn_slot ++ = function_table->find_slot_with_hash (instance, hash, INSERT); ++ gcc_assert (!*rfn_slot); ++ *rfn_slot = &rfn; ++ ++ /* Also add the function under its overloaded alias, if we want ++ a separate decl for each instance of an overloaded function. */ ++ if (m_direct_overloads || force_direct_overloads) ++ { ++ char *overload_name = get_name (instance, true); ++ if (strcmp (name, overload_name) != 0) ++ { ++ /* Attribute lists shouldn't be shared. */ ++ tree attrs = get_attributes (instance); ++ add_function (instance, overload_name, fntype, attrs, ++ required_extensions, false); ++ } ++ } ++ ++ obstack_free (&m_string_obstack, name); ++} ++ ++/* Add one function decl for INSTANCE, to be used with manual overload ++ resolution. REQUIRED_EXTENSIONS are the set of architecture extensions ++ that the function requires. ++ ++ For simplicity, deal with duplicate attempts to add the same function, ++ including cases in which the new function requires more features than ++ the original one did. In that case we'll check whether the required ++ features are available as part of resolving the function to the ++ relevant unique function. */ ++void ++function_builder::add_overloaded_function (const function_instance &instance, ++ uint64_t required_extensions) ++{ ++ char *name = get_name (instance, true); ++ if (registered_function **map_value = m_overload_names.get (name)) ++ gcc_assert ((*map_value)->instance == instance ++ && ((*map_value)->required_extensions ++ & ~required_extensions) == 0); ++ else ++ { ++ registered_function &rfn ++ = add_function (instance, name, m_overload_type, NULL_TREE, ++ required_extensions, true); ++ const char *permanent_name = IDENTIFIER_POINTER (DECL_NAME (rfn.decl)); ++ m_overload_names.put (permanent_name, &rfn); ++ } ++ obstack_free (&m_string_obstack, name); ++} ++ ++/* If we are using manual overload resolution, add one function decl ++ for each overloaded function in GROUP. Take the function base name ++ from GROUP and the mode from MODE. */ ++void ++function_builder::add_overloaded_functions (const function_group_info &group, ++ mode_suffix_index mode) ++{ ++ if (m_direct_overloads) ++ return; ++ ++ unsigned int explicit_type0 = (*group.shape)->explicit_type_suffix_p (0); ++ unsigned int explicit_type1 = (*group.shape)->explicit_type_suffix_p (1); ++ for (unsigned int pi = 0; group.preds[pi] != NUM_PREDS; ++pi) ++ { ++ if (!explicit_type0 && !explicit_type1) ++ { ++ /* Deal with the common case in which there is one overloaded ++ function for all type combinations. */ ++ function_instance instance (group.base_name, *group.base, ++ *group.shape, mode, types_none[0], ++ group.preds[pi]); ++ add_overloaded_function (instance, group.required_extensions); ++ } ++ else ++ for (unsigned int ti = 0; group.types[ti][0] != NUM_TYPE_SUFFIXES; ++ ++ti) ++ { ++ /* Stub out the types that are determined by overload ++ resolution. */ ++ type_suffix_pair types = { ++ explicit_type0 ? group.types[ti][0] : NUM_TYPE_SUFFIXES, ++ explicit_type1 ? group.types[ti][1] : NUM_TYPE_SUFFIXES ++ }; ++ function_instance instance (group.base_name, *group.base, ++ *group.shape, mode, types, ++ group.preds[pi]); ++ add_overloaded_function (instance, group.required_extensions); ++ } ++ } ++} ++ ++/* Register all the functions in GROUP. */ ++void ++function_builder::register_function_group (const function_group_info &group) ++{ ++ (*group.shape)->build (*this, group); ++} ++ ++function_call_info::function_call_info (location_t location_in, ++ const function_instance &instance_in, ++ tree fndecl_in) ++ : function_instance (instance_in), location (location_in), fndecl (fndecl_in) ++{ ++} ++ ++function_resolver::function_resolver (location_t location, ++ const function_instance &instance, ++ tree fndecl, vec &arglist) ++ : function_call_info (location, instance, fndecl), m_arglist (arglist) ++{ ++} ++ ++/* Return the vector type associated with type suffix TYPE. */ ++tree ++function_resolver::get_vector_type (type_suffix_index type) ++{ ++ return acle_vector_types[0][type_suffixes[type].vector_type]; ++} ++ ++/* Return the name associated with TYPE. Using the ++ name should be more user-friendly than the underlying canonical type, ++ since it makes the signedness and bitwidth explicit. */ ++const char * ++function_resolver::get_scalar_type_name (type_suffix_index type) ++{ ++ return vector_types[type_suffixes[type].vector_type].acle_name + 2; ++} ++ ++/* Return the type of argument I, or error_mark_node if it isn't ++ well-formed. */ ++tree ++function_resolver::get_argument_type (unsigned int i) ++{ ++ tree arg = m_arglist[i]; ++ return arg == error_mark_node ? arg : TREE_TYPE (arg); ++} ++ ++/* Return true if argument I is some form of scalar value. */ ++bool ++function_resolver::scalar_argument_p (unsigned int i) ++{ ++ tree type = get_argument_type (i); ++ return (INTEGRAL_TYPE_P (type) ++ /* Allow pointer types, leaving the frontend to warn where ++ necessary. */ ++ || POINTER_TYPE_P (type) ++ || SCALAR_FLOAT_TYPE_P (type)); ++} ++ ++/* Report that the function has no form that takes type suffix TYPE. ++ Return error_mark_node. */ ++tree ++function_resolver::report_no_such_form (type_suffix_index type) ++{ ++ error_at (location, "%qE has no form that takes %qT arguments", ++ fndecl, get_vector_type (type)); ++ return error_mark_node; ++} ++ ++/* Silently check whether there is an instance of the function with the ++ mode suffix given by MODE and the type suffixes given by TYPE0 and TYPE1. ++ Return its function decl if so, otherwise return null. */ ++tree ++function_resolver::lookup_form (mode_suffix_index mode, ++ type_suffix_index type0, ++ type_suffix_index type1) ++{ ++ type_suffix_pair types = { type0, type1 }; ++ function_instance instance (base_name, base, shape, mode, types, pred); ++ registered_function *rfn ++ = function_table->find_with_hash (instance, instance.hash ()); ++ return rfn ? rfn->decl : NULL_TREE; ++} ++ ++/* Resolve the function to one with the mode suffix given by MODE and the ++ type suffixes given by TYPE0 and TYPE1. Return its function decl on ++ success, otherwise report an error and return error_mark_node. */ ++tree ++function_resolver::resolve_to (mode_suffix_index mode, ++ type_suffix_index type0, ++ type_suffix_index type1) ++{ ++ tree res = lookup_form (mode, type0, type1); ++ if (!res) ++ { ++ if (type1 == NUM_TYPE_SUFFIXES) ++ return report_no_such_form (type0); ++ if (type0 == type_suffix_ids[0]) ++ return report_no_such_form (type1); ++ /* To be filled in when we have other cases. */ ++ gcc_unreachable (); ++ } ++ return res; ++} ++ ++/* Require argument ARGNO to be a 32-bit or 64-bit scalar integer type. ++ Return the associated type suffix on success, otherwise report an ++ error and return NUM_TYPE_SUFFIXES. */ ++type_suffix_index ++function_resolver::infer_integer_scalar_type (unsigned int argno) ++{ ++ tree actual = get_argument_type (argno); ++ if (actual == error_mark_node) ++ return NUM_TYPE_SUFFIXES; ++ ++ /* Allow enums and booleans to decay to integers, for compatibility ++ with C++ overloading rules. */ ++ if (INTEGRAL_TYPE_P (actual)) ++ { ++ bool uns_p = TYPE_UNSIGNED (actual); ++ /* Honor the usual integer promotions, so that resolution works ++ in the same way as for C++. */ ++ if (TYPE_PRECISION (actual) < 32) ++ return TYPE_SUFFIX_s32; ++ if (TYPE_PRECISION (actual) == 32) ++ return uns_p ? TYPE_SUFFIX_u32 : TYPE_SUFFIX_s32; ++ if (TYPE_PRECISION (actual) == 64) ++ return uns_p ? TYPE_SUFFIX_u64 : TYPE_SUFFIX_s64; ++ } ++ ++ error_at (location, "passing %qT to argument %d of %qE, which expects" ++ " a 32-bit or 64-bit integer type", actual, argno + 1, fndecl); ++ return NUM_TYPE_SUFFIXES; ++} ++ ++/* Require argument ARGNO to be a pointer to a scalar type that has a ++ corresponding type suffix. Return that type suffix on success, ++ otherwise report an error and return NUM_TYPE_SUFFIXES. ++ GATHER_SCATTER_P is true if the function is a gather/scatter ++ operation, and so requires a pointer to 32-bit or 64-bit data. */ ++type_suffix_index ++function_resolver::infer_pointer_type (unsigned int argno, ++ bool gather_scatter_p) ++{ ++ tree actual = get_argument_type (argno); ++ if (actual == error_mark_node) ++ return NUM_TYPE_SUFFIXES; ++ ++ if (TREE_CODE (actual) != POINTER_TYPE) ++ { ++ error_at (location, "passing %qT to argument %d of %qE, which" ++ " expects a pointer type", actual, argno + 1, fndecl); ++ if (VECTOR_TYPE_P (actual) && gather_scatter_p) ++ inform (location, "an explicit type suffix is needed" ++ " when using a vector of base addresses"); ++ return NUM_TYPE_SUFFIXES; ++ } ++ ++ tree target = TREE_TYPE (actual); ++ type_suffix_index type = find_type_suffix_for_scalar_type (target); ++ if (type == NUM_TYPE_SUFFIXES) ++ { ++ error_at (location, "passing %qT to argument %d of %qE, but %qT is not" ++ " a valid SVE element type", actual, argno + 1, fndecl, ++ build_qualified_type (target, 0)); ++ return NUM_TYPE_SUFFIXES; ++ } ++ unsigned int bits = type_suffixes[type].element_bits; ++ if (gather_scatter_p && bits != 32 && bits != 64) ++ { ++ error_at (location, "passing %qT to argument %d of %qE, which" ++ " expects a pointer to 32-bit or 64-bit elements", ++ actual, argno + 1, fndecl); ++ return NUM_TYPE_SUFFIXES; ++ } ++ ++ return type; ++} ++ ++/* Require argument ARGNO to be a single vector or a tuple of NUM_VECTORS ++ vectors; NUM_VECTORS is 1 for the former. Return the associated type ++ suffix on success, using TYPE_SUFFIX_b for predicates. Report an error ++ and return NUM_TYPE_SUFFIXES on failure. */ ++type_suffix_index ++function_resolver::infer_vector_or_tuple_type (unsigned int argno, ++ unsigned int num_vectors) ++{ ++ tree actual = get_argument_type (argno); ++ if (actual == error_mark_node) ++ return NUM_TYPE_SUFFIXES; ++ ++ /* A linear search should be OK here, since the code isn't hot and ++ the number of types is only small. */ ++ for (unsigned int size_i = 0; size_i < MAX_TUPLE_SIZE; ++size_i) ++ for (unsigned int suffix_i = 0; suffix_i < NUM_TYPE_SUFFIXES; ++suffix_i) ++ { ++ vector_type_index type_i = type_suffixes[suffix_i].vector_type; ++ tree type = acle_vector_types[size_i][type_i]; ++ if (type && TYPE_MAIN_VARIANT (actual) == TYPE_MAIN_VARIANT (type)) ++ { ++ if (size_i + 1 == num_vectors) ++ return type_suffix_index (suffix_i); ++ ++ if (num_vectors == 1) ++ error_at (location, "passing %qT to argument %d of %qE, which" ++ " expects a single SVE vector rather than a tuple", ++ actual, argno + 1, fndecl); ++ else if (size_i == 0 && type_i != VECTOR_TYPE_svbool_t) ++ error_at (location, "passing single vector %qT to argument %d" ++ " of %qE, which expects a tuple of %d vectors", ++ actual, argno + 1, fndecl, num_vectors); ++ else ++ error_at (location, "passing %qT to argument %d of %qE, which" ++ " expects a tuple of %d vectors", actual, argno + 1, ++ fndecl, num_vectors); ++ return NUM_TYPE_SUFFIXES; ++ } ++ } ++ ++ if (num_vectors == 1) ++ error_at (location, "passing %qT to argument %d of %qE, which" ++ " expects an SVE vector type", actual, argno + 1, fndecl); ++ else ++ error_at (location, "passing %qT to argument %d of %qE, which" ++ " expects an SVE tuple type", actual, argno + 1, fndecl); ++ return NUM_TYPE_SUFFIXES; ++} ++ ++/* Require argument ARGNO to have some form of vector type. Return the ++ associated type suffix on success, using TYPE_SUFFIX_b for predicates. ++ Report an error and return NUM_TYPE_SUFFIXES on failure. */ ++type_suffix_index ++function_resolver::infer_vector_type (unsigned int argno) ++{ ++ return infer_vector_or_tuple_type (argno, 1); ++} ++ ++/* Like infer_vector_type, but also require the type to be integral. */ ++type_suffix_index ++function_resolver::infer_integer_vector_type (unsigned int argno) ++{ ++ type_suffix_index type = infer_vector_type (argno); ++ if (type == NUM_TYPE_SUFFIXES) ++ return type; ++ ++ if (!type_suffixes[type].integer_p) ++ { ++ error_at (location, "passing %qT to argument %d of %qE, which" ++ " expects a vector of integers", get_argument_type (argno), ++ argno + 1, fndecl); ++ return NUM_TYPE_SUFFIXES; ++ } ++ ++ return type; ++} ++ ++/* Like infer_vector_type, but also require the type to be an unsigned ++ integer. */ ++type_suffix_index ++function_resolver::infer_unsigned_vector_type (unsigned int argno) ++{ ++ type_suffix_index type = infer_vector_type (argno); ++ if (type == NUM_TYPE_SUFFIXES) ++ return type; ++ ++ if (!type_suffixes[type].unsigned_p) ++ { ++ error_at (location, "passing %qT to argument %d of %qE, which" ++ " expects a vector of unsigned integers", ++ get_argument_type (argno), argno + 1, fndecl); ++ return NUM_TYPE_SUFFIXES; ++ } ++ ++ return type; ++} ++ ++/* Like infer_vector_type, but also require the element size to be ++ 32 or 64 bits. */ ++type_suffix_index ++function_resolver::infer_sd_vector_type (unsigned int argno) ++{ ++ type_suffix_index type = infer_vector_type (argno); ++ if (type == NUM_TYPE_SUFFIXES) ++ return type; ++ ++ unsigned int bits = type_suffixes[type].element_bits; ++ if (bits != 32 && bits != 64) ++ { ++ error_at (location, "passing %qT to argument %d of %qE, which" ++ " expects a vector of 32-bit or 64-bit elements", ++ get_argument_type (argno), argno + 1, fndecl); ++ return NUM_TYPE_SUFFIXES; ++ } ++ ++ return type; ++} ++ ++/* If the function operates on tuples of vectors, require argument ARGNO to be ++ a tuple with the appropriate number of vectors, otherwise require it to be ++ a single vector. Return the associated type suffix on success, using ++ TYPE_SUFFIX_b for predicates. Report an error and return NUM_TYPE_SUFFIXES ++ on failure. */ ++type_suffix_index ++function_resolver::infer_tuple_type (unsigned int argno) ++{ ++ return infer_vector_or_tuple_type (argno, vectors_per_tuple ()); ++} ++ ++/* Require argument ARGNO to be a vector or scalar argument. Return true ++ if it is, otherwise report an appropriate error. */ ++bool ++function_resolver::require_vector_or_scalar_type (unsigned int argno) ++{ ++ tree actual = get_argument_type (argno); ++ if (actual == error_mark_node) ++ return false; ++ ++ if (!scalar_argument_p (argno) && !VECTOR_TYPE_P (actual)) ++ { ++ error_at (location, "passing %qT to argument %d of %qE, which" ++ " expects a vector or scalar type", actual, argno + 1, fndecl); ++ return false; ++ } ++ ++ return true; ++} ++ ++/* Require argument ARGNO to have vector type TYPE, in cases where this ++ requirement holds for all uses of the function. Return true if the ++ argument has the right form, otherwise report an appropriate error. */ ++bool ++function_resolver::require_vector_type (unsigned int argno, ++ vector_type_index type) ++{ ++ tree expected = acle_vector_types[0][type]; ++ tree actual = get_argument_type (argno); ++ if (actual != error_mark_node ++ && TYPE_MAIN_VARIANT (expected) != TYPE_MAIN_VARIANT (actual)) ++ { ++ error_at (location, "passing %qT to argument %d of %qE, which" ++ " expects %qT", actual, argno + 1, fndecl, expected); ++ return false; ++ } ++ return true; ++} ++ ++/* Like require_vector_type, but TYPE is inferred from previous arguments ++ rather than being a fixed part of the function signature. This changes ++ the nature of the error messages. */ ++bool ++function_resolver::require_matching_vector_type (unsigned int argno, ++ type_suffix_index type) ++{ ++ type_suffix_index new_type = infer_vector_type (argno); ++ if (new_type == NUM_TYPE_SUFFIXES) ++ return false; ++ ++ if (type != new_type) ++ { ++ error_at (location, "passing %qT to argument %d of %qE, but" ++ " previous arguments had type %qT", ++ get_vector_type (new_type), argno + 1, fndecl, ++ get_vector_type (type)); ++ return false; ++ } ++ return true; ++} ++ ++/* Require argument ARGNO to be a vector type with the following properties: ++ ++ - the type class must be the same as FIRST_TYPE's if EXPECTED_TCLASS ++ is SAME_TYPE_CLASS, otherwise it must be EXPECTED_TCLASS itself. ++ ++ - the element size must be: ++ ++ - the same as FIRST_TYPE's if EXPECTED_BITS == SAME_SIZE ++ - half of FIRST_TYPE's if EXPECTED_BITS == HALF_SIZE ++ - a quarter of FIRST_TYPE's if EXPECTED_BITS == QUARTER_SIZE ++ - EXPECTED_BITS itself otherwise ++ ++ Return true if the argument has the required type, otherwise report ++ an appropriate error. ++ ++ FIRST_ARGNO is the first argument that is known to have type FIRST_TYPE. ++ Usually it comes before ARGNO, but sometimes it is more natural to resolve ++ arguments out of order. ++ ++ If the required properties depend on FIRST_TYPE then both FIRST_ARGNO and ++ ARGNO contribute to the resolution process. If the required properties ++ are fixed, only FIRST_ARGNO contributes to the resolution process. ++ ++ This function is a bit of a Swiss army knife. The complication comes ++ from trying to give good error messages when FIRST_ARGNO and ARGNO are ++ inconsistent, since either of them might be wrong. */ ++bool function_resolver:: ++require_derived_vector_type (unsigned int argno, ++ unsigned int first_argno, ++ type_suffix_index first_type, ++ type_class_index expected_tclass, ++ unsigned int expected_bits) ++{ ++ /* If the type needs to match FIRST_ARGNO exactly, use the preferred ++ error message for that case. The VECTOR_TYPE_P test excludes tuple ++ types, which we handle below instead. */ ++ bool both_vectors_p = VECTOR_TYPE_P (get_argument_type (first_argno)); ++ if (both_vectors_p ++ && expected_tclass == SAME_TYPE_CLASS ++ && expected_bits == SAME_SIZE) ++ { ++ /* There's no need to resolve this case out of order. */ ++ gcc_assert (argno > first_argno); ++ return require_matching_vector_type (argno, first_type); ++ } ++ ++ /* Use FIRST_TYPE to get the expected type class and element size. */ ++ type_class_index orig_expected_tclass = expected_tclass; ++ if (expected_tclass == NUM_TYPE_CLASSES) ++ expected_tclass = type_suffixes[first_type].tclass; ++ ++ unsigned int orig_expected_bits = expected_bits; ++ if (expected_bits == SAME_SIZE) ++ expected_bits = type_suffixes[first_type].element_bits; ++ else if (expected_bits == HALF_SIZE) ++ expected_bits = type_suffixes[first_type].element_bits / 2; ++ else if (expected_bits == QUARTER_SIZE) ++ expected_bits = type_suffixes[first_type].element_bits / 4; ++ ++ /* If the expected type doesn't depend on FIRST_TYPE at all, ++ just check for the fixed choice of vector type. */ ++ if (expected_tclass == orig_expected_tclass ++ && expected_bits == orig_expected_bits) ++ { ++ const type_suffix_info &expected_suffix ++ = type_suffixes[find_type_suffix (expected_tclass, expected_bits)]; ++ return require_vector_type (argno, expected_suffix.vector_type); ++ } ++ ++ /* Require the argument to be some form of SVE vector type, ++ without being specific about the type of vector we want. */ ++ type_suffix_index actual_type = infer_vector_type (argno); ++ if (actual_type == NUM_TYPE_SUFFIXES) ++ return false; ++ ++ /* Exit now if we got the right type. */ ++ bool tclass_ok_p = (type_suffixes[actual_type].tclass == expected_tclass); ++ bool size_ok_p = (type_suffixes[actual_type].element_bits == expected_bits); ++ if (tclass_ok_p && size_ok_p) ++ return true; ++ ++ /* First look for cases in which the actual type contravenes a fixed ++ size requirement, without having to refer to FIRST_TYPE. */ ++ if (!size_ok_p && expected_bits == orig_expected_bits) ++ { ++ error_at (location, "passing %qT to argument %d of %qE, which" ++ " expects a vector of %d-bit elements", ++ get_vector_type (actual_type), argno + 1, fndecl, ++ expected_bits); ++ return false; ++ } ++ ++ /* Likewise for a fixed type class requirement. This is only ever ++ needed for signed and unsigned types, so don't create unnecessary ++ translation work for other type classes. */ ++ if (!tclass_ok_p && orig_expected_tclass == TYPE_signed) ++ { ++ error_at (location, "passing %qT to argument %d of %qE, which" ++ " expects a vector of signed integers", ++ get_vector_type (actual_type), argno + 1, fndecl); ++ return false; ++ } ++ if (!tclass_ok_p && orig_expected_tclass == TYPE_unsigned) ++ { ++ error_at (location, "passing %qT to argument %d of %qE, which" ++ " expects a vector of unsigned integers", ++ get_vector_type (actual_type), argno + 1, fndecl); ++ return false; ++ } ++ ++ /* Make sure that FIRST_TYPE itself is sensible before using it ++ as a basis for an error message. */ ++ if (resolve_to (mode_suffix_id, first_type) == error_mark_node) ++ return false; ++ ++ /* If the arguments have consistent type classes, but a link between ++ the sizes has been broken, try to describe the error in those terms. */ ++ if (both_vectors_p && tclass_ok_p && orig_expected_bits == SAME_SIZE) ++ { ++ if (argno < first_argno) ++ { ++ std::swap (argno, first_argno); ++ std::swap (actual_type, first_type); ++ } ++ error_at (location, "arguments %d and %d of %qE must have the" ++ " same element size, but the values passed here have type" ++ " %qT and %qT respectively", first_argno + 1, argno + 1, ++ fndecl, get_vector_type (first_type), ++ get_vector_type (actual_type)); ++ return false; ++ } ++ ++ /* Likewise in reverse: look for cases in which the sizes are consistent ++ but a link between the type classes has been broken. */ ++ if (both_vectors_p ++ && size_ok_p ++ && orig_expected_tclass == SAME_TYPE_CLASS ++ && type_suffixes[first_type].integer_p ++ && type_suffixes[actual_type].integer_p) ++ { ++ if (argno < first_argno) ++ { ++ std::swap (argno, first_argno); ++ std::swap (actual_type, first_type); ++ } ++ error_at (location, "arguments %d and %d of %qE must have the" ++ " same signedness, but the values passed here have type" ++ " %qT and %qT respectively", first_argno + 1, argno + 1, ++ fndecl, get_vector_type (first_type), ++ get_vector_type (actual_type)); ++ return false; ++ } ++ ++ /* The two arguments are wildly inconsistent. */ ++ type_suffix_index expected_type ++ = find_type_suffix (expected_tclass, expected_bits); ++ error_at (location, "passing %qT instead of the expected %qT to argument" ++ " %d of %qE, after passing %qT to argument %d", ++ get_vector_type (actual_type), get_vector_type (expected_type), ++ argno + 1, fndecl, get_argument_type (first_argno), ++ first_argno + 1); ++ return false; ++} ++ ++/* Require argument ARGNO to match argument FIRST_ARGNO, which was inferred ++ to be a pointer to a scalar element of type TYPE. */ ++bool ++function_resolver::require_matching_pointer_type (unsigned int argno, ++ unsigned int first_argno, ++ type_suffix_index type) ++{ ++ type_suffix_index new_type = infer_pointer_type (argno); ++ if (new_type == NUM_TYPE_SUFFIXES) ++ return false; ++ ++ if (type != new_type) ++ { ++ error_at (location, "passing %qT to argument %d of %qE, but" ++ " argument %d had type %qT", get_argument_type (argno), ++ argno + 1, fndecl, first_argno + 1, ++ get_argument_type (first_argno)); ++ return false; ++ } ++ return true; ++} ++ ++/* Require argument ARGNO to be a (possibly variable) scalar, using EXPECTED ++ as the name of its expected type. Return true if the argument has the ++ right form, otherwise report an appropriate error. */ ++bool ++function_resolver::require_scalar_type (unsigned int argno, ++ const char *expected) ++{ ++ if (!scalar_argument_p (argno)) ++ { ++ error_at (location, "passing %qT to argument %d of %qE, which" ++ " expects %qs", get_argument_type (argno), argno + 1, ++ fndecl, expected); ++ return false; ++ } ++ return true; ++} ++ ++/* Require argument ARGNO to be some form of pointer, without being specific ++ about its target type. Return true if the argument has the right form, ++ otherwise report an appropriate error. */ ++bool ++function_resolver::require_pointer_type (unsigned int argno) ++{ ++ if (!scalar_argument_p (argno)) ++ { ++ error_at (location, "passing %qT to argument %d of %qE, which" ++ " expects a scalar pointer", get_argument_type (argno), ++ argno + 1, fndecl); ++ return false; ++ } ++ return true; ++} ++ ++/* Argument FIRST_ARGNO is a scalar with type EXPECTED_TYPE, and argument ++ ARGNO should be consistent with it. Return true if it is, otherwise ++ report an appropriate error. */ ++bool function_resolver:: ++require_matching_integer_scalar_type (unsigned int argno, ++ unsigned int first_argno, ++ type_suffix_index expected_type) ++{ ++ type_suffix_index actual_type = infer_integer_scalar_type (argno); ++ if (actual_type == NUM_TYPE_SUFFIXES) ++ return false; ++ ++ if (actual_type == expected_type) ++ return true; ++ ++ error_at (location, "call to %qE is ambiguous; argument %d has type" ++ " %qs but argument %d has type %qs", fndecl, ++ first_argno + 1, get_scalar_type_name (expected_type), ++ argno + 1, get_scalar_type_name (actual_type)); ++ return false; ++} ++ ++/* Require argument ARGNO to be a (possibly variable) scalar, expecting it ++ to have the following properties: ++ ++ - the type class must be the same as for type suffix 0 if EXPECTED_TCLASS ++ is SAME_TYPE_CLASS, otherwise it must be EXPECTED_TCLASS itself. ++ ++ - the element size must be the same as for type suffix 0 if EXPECTED_BITS ++ is SAME_TYPE_SIZE, otherwise it must be EXPECTED_BITS itself. ++ ++ Return true if the argument is valid, otherwise report an appropriate error. ++ ++ Note that we don't check whether the scalar type actually has the required ++ properties, since that's subject to implicit promotions and conversions. ++ Instead we just use the expected properties to tune the error message. */ ++bool function_resolver:: ++require_derived_scalar_type (unsigned int argno, ++ type_class_index expected_tclass, ++ unsigned int expected_bits) ++{ ++ gcc_assert (expected_tclass == SAME_TYPE_CLASS ++ || expected_tclass == TYPE_signed ++ || expected_tclass == TYPE_unsigned); ++ ++ /* If the expected type doesn't depend on the type suffix at all, ++ just check for the fixed choice of scalar type. */ ++ if (expected_tclass != SAME_TYPE_CLASS && expected_bits != SAME_SIZE) ++ { ++ type_suffix_index expected_type ++ = find_type_suffix (expected_tclass, expected_bits); ++ return require_scalar_type (argno, get_scalar_type_name (expected_type)); ++ } ++ ++ if (scalar_argument_p (argno)) ++ return true; ++ ++ if (expected_tclass == SAME_TYPE_CLASS) ++ /* It doesn't really matter whether the element is expected to be ++ the same size as type suffix 0. */ ++ error_at (location, "passing %qT to argument %d of %qE, which" ++ " expects a scalar element", get_argument_type (argno), ++ argno + 1, fndecl); ++ else ++ /* It doesn't seem useful to distinguish between signed and unsigned ++ scalars here. */ ++ error_at (location, "passing %qT to argument %d of %qE, which" ++ " expects a scalar integer", get_argument_type (argno), ++ argno + 1, fndecl); ++ return false; ++} ++ ++/* Require argument ARGNO to be suitable for an integer constant expression. ++ Return true if it is, otherwise report an appropriate error. ++ ++ function_checker checks whether the argument is actually constant and ++ has a suitable range. The reason for distinguishing immediate arguments ++ here is because it provides more consistent error messages than ++ require_scalar_type would. */ ++bool ++function_resolver::require_integer_immediate (unsigned int argno) ++{ ++ if (!scalar_argument_p (argno)) ++ { ++ report_non_ice (location, fndecl, argno); ++ return false; ++ } ++ return true; ++} ++ ++/* Require argument ARGNO to be a vector base in a gather-style address. ++ Return its type on success, otherwise return NUM_VECTOR_TYPES. */ ++vector_type_index ++function_resolver::infer_vector_base_type (unsigned int argno) ++{ ++ type_suffix_index type = infer_vector_type (argno); ++ if (type == NUM_TYPE_SUFFIXES) ++ return NUM_VECTOR_TYPES; ++ ++ if (type == TYPE_SUFFIX_u32 || type == TYPE_SUFFIX_u64) ++ return type_suffixes[type].vector_type; ++ ++ error_at (location, "passing %qT to argument %d of %qE, which" ++ " expects %qs or %qs", get_argument_type (argno), ++ argno + 1, fndecl, "svuint32_t", "svuint64_t"); ++ return NUM_VECTOR_TYPES; ++} ++ ++/* Require argument ARGNO to be a vector displacement in a gather-style ++ address. Return its type on success, otherwise return NUM_VECTOR_TYPES. */ ++vector_type_index ++function_resolver::infer_vector_displacement_type (unsigned int argno) ++{ ++ type_suffix_index type = infer_integer_vector_type (argno); ++ if (type == NUM_TYPE_SUFFIXES) ++ return NUM_VECTOR_TYPES; ++ ++ if (type_suffixes[type].integer_p ++ && (type_suffixes[type].element_bits == 32 ++ || type_suffixes[type].element_bits == 64)) ++ return type_suffixes[type].vector_type; ++ ++ error_at (location, "passing %qT to argument %d of %qE, which" ++ " expects a vector of 32-bit or 64-bit integers", ++ get_argument_type (argno), argno + 1, fndecl); ++ return NUM_VECTOR_TYPES; ++} ++ ++/* Require argument ARGNO to be a vector displacement in a gather-style ++ address. There are three possible uses: ++ ++ - for loading into elements of type TYPE (when LOAD_P is true) ++ - for storing from elements of type TYPE (when LOAD_P is false) ++ - for prefetching data (when TYPE is NUM_TYPE_SUFFIXES) ++ ++ The overloaded function's mode suffix determines the units of the ++ displacement (bytes for "_offset", elements for "_index"). ++ ++ Return the associated mode on success, otherwise report an error ++ and return MODE_none. */ ++mode_suffix_index ++function_resolver::resolve_sv_displacement (unsigned int argno, ++ type_suffix_index type, ++ bool load_p) ++{ ++ if (type == NUM_TYPE_SUFFIXES) ++ { ++ /* For prefetches, the base is a void pointer and the displacement ++ can be any valid offset or index type. */ ++ vector_type_index displacement_vector_type ++ = infer_vector_displacement_type (argno); ++ if (displacement_vector_type == NUM_VECTOR_TYPES) ++ return MODE_none; ++ ++ mode_suffix_index mode = find_mode_suffix (NUM_VECTOR_TYPES, ++ displacement_vector_type, ++ displacement_units ()); ++ gcc_assert (mode != MODE_none); ++ return mode; ++ } ++ ++ unsigned int required_bits = type_suffixes[type].element_bits; ++ if (required_bits == 32 ++ && displacement_units () == UNITS_elements ++ && !lookup_form (MODE_s32index, type) ++ && !lookup_form (MODE_u32index, type)) ++ { ++ if (lookup_form (MODE_u32base_index, type)) ++ { ++ if (type_suffix_ids[0] == NUM_TYPE_SUFFIXES) ++ { ++ gcc_assert (!load_p); ++ error_at (location, "when storing %qT, %qE requires a vector" ++ " base and a scalar index", get_vector_type (type), ++ fndecl); ++ } ++ else ++ error_at (location, "%qE requires a vector base and a scalar" ++ " index", fndecl); ++ } ++ else ++ error_at (location, "%qE does not support 32-bit vector type %qT", ++ fndecl, get_vector_type (type)); ++ return MODE_none; ++ } ++ ++ /* Check for some form of vector type, without naming any in particular ++ as being expected. */ ++ type_suffix_index displacement_type = infer_vector_type (argno); ++ if (displacement_type == NUM_TYPE_SUFFIXES) ++ return MODE_none; ++ ++ /* If the displacement type is consistent with the data vector type, ++ try to find the associated mode suffix. This will fall through ++ for non-integral displacement types. */ ++ if (type_suffixes[displacement_type].element_bits == required_bits) ++ { ++ vector_type_index displacement_vector_type ++ = type_suffixes[displacement_type].vector_type; ++ mode_suffix_index mode = find_mode_suffix (NUM_VECTOR_TYPES, ++ displacement_vector_type, ++ displacement_units ()); ++ if (mode != MODE_none) ++ { ++ if (mode == MODE_s32offset ++ && !lookup_form (mode, type) ++ && lookup_form (MODE_u32offset, type)) ++ { ++ if (type_suffix_ids[0] == NUM_TYPE_SUFFIXES) ++ error_at (location, "%qE does not support 32-bit sign-extended" ++ " offsets", fndecl); ++ else ++ error_at (location, "%qE does not support sign-extended" ++ " offsets", fndecl); ++ return MODE_none; ++ } ++ return mode; ++ } ++ } ++ ++ if (type_suffix_ids[0] == NUM_TYPE_SUFFIXES) ++ { ++ /* TYPE has been inferred rather than specified by the user, ++ so mention it in the error messages. */ ++ if (load_p) ++ error_at (location, "passing %qT to argument %d of %qE, which when" ++ " loading %qT expects a vector of %d-bit integers", ++ get_argument_type (argno), argno + 1, fndecl, ++ get_vector_type (type), required_bits); ++ else ++ error_at (location, "passing %qT to argument %d of %qE, which when" ++ " storing %qT expects a vector of %d-bit integers", ++ get_argument_type (argno), argno + 1, fndecl, ++ get_vector_type (type), required_bits); ++ } ++ else ++ /* TYPE is part of the function name. */ ++ error_at (location, "passing %qT to argument %d of %qE, which" ++ " expects a vector of %d-bit integers", ++ get_argument_type (argno), argno + 1, fndecl, required_bits); ++ return MODE_none; ++} ++ ++/* Require the arguments starting at ARGNO to form a gather-style address. ++ There are three possible uses: ++ ++ - for loading into elements of type TYPE (when LOAD_P is true) ++ - for storing from elements of type TYPE (when LOAD_P is false) ++ - for prefetching data (when TYPE is NUM_TYPE_SUFFIXES) ++ ++ The three possible addresses are: ++ ++ - a vector base with no displacement ++ - a vector base and a scalar displacement ++ - a scalar (pointer) base and a vector displacement ++ ++ The overloaded function's mode suffix determines whether there is ++ a displacement, and if so, what units it uses: ++ ++ - MODE_none: no displacement ++ - MODE_offset: the displacement is measured in bytes ++ - MODE_index: the displacement is measured in elements ++ ++ Return the mode of the non-overloaded function on success, otherwise ++ report an error and return MODE_none. */ ++mode_suffix_index ++function_resolver::resolve_gather_address (unsigned int argno, ++ type_suffix_index type, ++ bool load_p) ++{ ++ tree actual = get_argument_type (argno); ++ if (actual == error_mark_node) ++ return MODE_none; ++ ++ if (displacement_units () != UNITS_none) ++ { ++ /* Some form of displacement is needed. First handle a scalar ++ pointer base and a vector displacement. */ ++ if (scalar_argument_p (argno)) ++ /* Don't check the pointer type here, since there's only one valid ++ choice. Leave that to the frontend. */ ++ return resolve_sv_displacement (argno + 1, type, load_p); ++ ++ if (!VECTOR_TYPE_P (actual)) ++ { ++ error_at (location, "passing %qT to argument %d of %qE," ++ " which expects a vector or pointer base address", ++ actual, argno + 1, fndecl); ++ return MODE_none; ++ } ++ } ++ ++ /* Check for the correct choice of vector base type. */ ++ vector_type_index base_vector_type; ++ if (type == NUM_TYPE_SUFFIXES) ++ { ++ /* Since prefetches have no type suffix, there is a free choice ++ between 32-bit and 64-bit base addresses. */ ++ base_vector_type = infer_vector_base_type (argno); ++ if (base_vector_type == NUM_VECTOR_TYPES) ++ return MODE_none; ++ } ++ else ++ { ++ /* Check for some form of vector type, without saying which type ++ we expect. */ ++ type_suffix_index base_type = infer_vector_type (argno); ++ if (base_type == NUM_TYPE_SUFFIXES) ++ return MODE_none; ++ ++ /* Check whether the type is the right one. */ ++ unsigned int required_bits = type_suffixes[type].element_bits; ++ gcc_assert (required_bits == 32 || required_bits == 64); ++ type_suffix_index required_type = (required_bits == 32 ++ ? TYPE_SUFFIX_u32 ++ : TYPE_SUFFIX_u64); ++ if (required_type != base_type) ++ { ++ error_at (location, "passing %qT to argument %d of %qE," ++ " which expects %qT", actual, argno + 1, fndecl, ++ get_vector_type (required_type)); ++ return MODE_none; ++ } ++ base_vector_type = type_suffixes[base_type].vector_type; ++ } ++ ++ /* Check the scalar displacement, if any. */ ++ if (displacement_units () != UNITS_none ++ && !require_scalar_type (argno + 1, "int64_t")) ++ return MODE_none; ++ ++ /* Find the appropriate mode suffix. The checks above should have ++ weeded out all erroneous cases. */ ++ for (unsigned int mode_i = 0; mode_i < ARRAY_SIZE (mode_suffixes); ++mode_i) ++ { ++ const mode_suffix_info &mode = mode_suffixes[mode_i]; ++ if (mode.base_vector_type == base_vector_type ++ && mode.displacement_vector_type == NUM_VECTOR_TYPES ++ && mode.displacement_units == displacement_units ()) ++ return mode_suffix_index (mode_i); ++ } ++ ++ gcc_unreachable (); ++} ++ ++/* Require arguments ARGNO and ARGNO + 1 to form an ADR-style address, ++ i.e. one with a vector of base addresses and a vector of displacements. ++ The overloaded function's mode suffix determines the units of the ++ displacement (bytes for "_offset", elements for "_index"). ++ ++ Return the associated mode suffix on success, otherwise report ++ an error and return MODE_none. */ ++mode_suffix_index ++function_resolver::resolve_adr_address (unsigned int argno) ++{ ++ vector_type_index base_type = infer_vector_base_type (argno); ++ if (base_type == NUM_VECTOR_TYPES) ++ return MODE_none; ++ ++ vector_type_index displacement_type ++ = infer_vector_displacement_type (argno + 1); ++ if (displacement_type == NUM_VECTOR_TYPES) ++ return MODE_none; ++ ++ mode_suffix_index mode = find_mode_suffix (base_type, displacement_type, ++ displacement_units ()); ++ if (mode == MODE_none) ++ { ++ if (mode_suffix_id == MODE_offset) ++ error_at (location, "cannot combine a base of type %qT with" ++ " an offset of type %qT", ++ get_argument_type (argno), get_argument_type (argno + 1)); ++ else ++ error_at (location, "cannot combine a base of type %qT with" ++ " an index of type %qT", ++ get_argument_type (argno), get_argument_type (argno + 1)); ++ } ++ return mode; ++} ++ ++/* Require the function to have exactly EXPECTED arguments. Return true ++ if it does, otherwise report an appropriate error. */ ++bool ++function_resolver::check_num_arguments (unsigned int expected) ++{ ++ if (m_arglist.length () < expected) ++ error_at (location, "too few arguments to function %qE", fndecl); ++ else if (m_arglist.length () > expected) ++ error_at (location, "too many arguments to function %qE", fndecl); ++ return m_arglist.length () == expected; ++} ++ ++/* If the function is predicated, check that the first argument is a ++ suitable governing predicate. Also check that there are NOPS further ++ arguments after any governing predicate, but don't check what they are. ++ ++ Return true on success, otherwise report a suitable error. ++ When returning true: ++ ++ - set I to the number of the first unchecked argument. ++ - set NARGS to the total number of arguments. */ ++bool ++function_resolver::check_gp_argument (unsigned int nops, ++ unsigned int &i, unsigned int &nargs) ++{ ++ i = 0; ++ if (pred != PRED_none) ++ { ++ /* Unary merge operations should use resolve_unary instead. */ ++ gcc_assert (nops != 1 || pred != PRED_m); ++ nargs = nops + 1; ++ if (!check_num_arguments (nargs) ++ || !require_vector_type (i, VECTOR_TYPE_svbool_t)) ++ return false; ++ i += 1; ++ } ++ else ++ { ++ nargs = nops; ++ if (!check_num_arguments (nargs)) ++ return false; ++ } ++ ++ return true; ++} ++ ++/* Finish resolving a function whose final argument can be a vector ++ or a scalar, with the function having an implicit "_n" suffix ++ in the latter case. This "_n" form might only exist for certain ++ type suffixes. ++ ++ ARGNO is the index of the final argument. The inferred type suffix ++ was obtained from argument FIRST_ARGNO, which has type FIRST_TYPE. ++ EXPECTED_TCLASS and EXPECTED_BITS describe the expected properties ++ of the final vector or scalar argument, in the same way as for ++ require_derived_vector_type. INFERRED_TYPE is the inferred type ++ suffix itself, or NUM_TYPE_SUFFIXES if it's the same as FIRST_TYPE. ++ ++ Return the function decl of the resolved function on success, ++ otherwise report a suitable error and return error_mark_node. */ ++tree function_resolver:: ++finish_opt_n_resolution (unsigned int argno, unsigned int first_argno, ++ type_suffix_index first_type, ++ type_class_index expected_tclass, ++ unsigned int expected_bits, ++ type_suffix_index inferred_type) ++{ ++ if (inferred_type == NUM_TYPE_SUFFIXES) ++ inferred_type = first_type; ++ tree scalar_form = lookup_form (MODE_n, inferred_type); ++ ++ /* Allow the final argument to be scalar, if an _n form exists. */ ++ if (scalar_argument_p (argno)) ++ { ++ if (scalar_form) ++ return scalar_form; ++ ++ /* Check the vector form normally. If that succeeds, raise an ++ error about having no corresponding _n form. */ ++ tree res = resolve_to (mode_suffix_id, inferred_type); ++ if (res != error_mark_node) ++ error_at (location, "passing %qT to argument %d of %qE, but its" ++ " %qT form does not accept scalars", ++ get_argument_type (argno), argno + 1, fndecl, ++ get_vector_type (first_type)); ++ return error_mark_node; ++ } ++ ++ /* If an _n form does exist, provide a more accurate message than ++ require_derived_vector_type would for arguments that are neither ++ vectors nor scalars. */ ++ if (scalar_form && !require_vector_or_scalar_type (argno)) ++ return error_mark_node; ++ ++ /* Check for the correct vector type. */ ++ if (!require_derived_vector_type (argno, first_argno, first_type, ++ expected_tclass, expected_bits)) ++ return error_mark_node; ++ ++ return resolve_to (mode_suffix_id, inferred_type); ++} ++ ++/* Resolve a (possibly predicated) unary function. If the function uses ++ merge predication or if TREAT_AS_MERGE_P is true, there is an extra ++ vector argument before the governing predicate that specifies the ++ values of inactive elements. This argument has the following ++ properties: ++ ++ - the type class must be the same as for active elements if MERGE_TCLASS ++ is SAME_TYPE_CLASS, otherwise it must be MERGE_TCLASS itself. ++ ++ - the element size must be the same as for active elements if MERGE_BITS ++ is SAME_TYPE_SIZE, otherwise it must be MERGE_BITS itself. ++ ++ Return the function decl of the resolved function on success, ++ otherwise report a suitable error and return error_mark_node. */ ++tree ++function_resolver::resolve_unary (type_class_index merge_tclass, ++ unsigned int merge_bits, ++ bool treat_as_merge_p) ++{ ++ type_suffix_index type; ++ if (pred == PRED_m || treat_as_merge_p) ++ { ++ if (!check_num_arguments (3)) ++ return error_mark_node; ++ if (merge_tclass == SAME_TYPE_CLASS && merge_bits == SAME_SIZE) ++ { ++ /* The inactive elements are the same as the active elements, ++ so we can use normal left-to-right resolution. */ ++ if ((type = infer_vector_type (0)) == NUM_TYPE_SUFFIXES ++ || !require_vector_type (1, VECTOR_TYPE_svbool_t) ++ || !require_matching_vector_type (2, type)) ++ return error_mark_node; ++ } ++ else ++ { ++ /* The inactive element type is a function of the active one, ++ so resolve the active one first. */ ++ if (!require_vector_type (1, VECTOR_TYPE_svbool_t) ++ || (type = infer_vector_type (2)) == NUM_TYPE_SUFFIXES ++ || !require_derived_vector_type (0, 2, type, merge_tclass, ++ merge_bits)) ++ return error_mark_node; ++ } ++ } ++ else ++ { ++ /* We just need to check the predicate (if any) and the single ++ vector argument. */ ++ unsigned int i, nargs; ++ if (!check_gp_argument (1, i, nargs) ++ || (type = infer_vector_type (i)) == NUM_TYPE_SUFFIXES) ++ return error_mark_node; ++ } ++ ++ /* Handle convert-like functions in which the first type suffix is ++ explicit. */ ++ if (type_suffix_ids[0] != NUM_TYPE_SUFFIXES) ++ return resolve_to (mode_suffix_id, type_suffix_ids[0], type); ++ ++ return resolve_to (mode_suffix_id, type); ++} ++ ++/* Resolve a (possibly predicated) function that takes NOPS like-typed ++ vector arguments followed by NIMM integer immediates. Return the ++ function decl of the resolved function on success, otherwise report ++ a suitable error and return error_mark_node. */ ++tree ++function_resolver::resolve_uniform (unsigned int nops, unsigned int nimm) ++{ ++ unsigned int i, nargs; ++ type_suffix_index type; ++ if (!check_gp_argument (nops + nimm, i, nargs) ++ || (type = infer_vector_type (i)) == NUM_TYPE_SUFFIXES) ++ return error_mark_node; ++ ++ i += 1; ++ for (; i < nargs - nimm; ++i) ++ if (!require_matching_vector_type (i, type)) ++ return error_mark_node; ++ ++ for (; i < nargs; ++i) ++ if (!require_integer_immediate (i)) ++ return error_mark_node; ++ ++ return resolve_to (mode_suffix_id, type); ++} ++ ++/* Resolve a (possibly predicated) function that offers a choice between ++ taking: ++ ++ - NOPS like-typed vector arguments or ++ - NOPS - 1 like-typed vector arguments followed by a scalar argument ++ ++ Return the function decl of the resolved function on success, ++ otherwise report a suitable error and return error_mark_node. */ ++tree ++function_resolver::resolve_uniform_opt_n (unsigned int nops) ++{ ++ unsigned int i, nargs; ++ type_suffix_index type; ++ if (!check_gp_argument (nops, i, nargs) ++ || (type = infer_vector_type (i)) == NUM_TYPE_SUFFIXES) ++ return error_mark_node; ++ ++ unsigned int first_arg = i++; ++ for (; i < nargs - 1; ++i) ++ if (!require_matching_vector_type (i, type)) ++ return error_mark_node; ++ ++ return finish_opt_n_resolution (i, first_arg, type); ++} ++ ++/* If the call is erroneous, report an appropriate error and return ++ error_mark_node. Otherwise, if the function is overloaded, return ++ the decl of the non-overloaded function. Return NULL_TREE otherwise, ++ indicating that the call should be processed in the normal way. */ ++tree ++function_resolver::resolve () ++{ ++ return shape->resolve (*this); ++} ++ ++function_checker::function_checker (location_t location, ++ const function_instance &instance, ++ tree fndecl, tree fntype, ++ unsigned int nargs, tree *args) ++ : function_call_info (location, instance, fndecl), ++ m_fntype (fntype), m_nargs (nargs), m_args (args), ++ /* We don't have to worry about unary _m operations here, since they ++ never have arguments that need checking. */ ++ m_base_arg (pred != PRED_none ? 1 : 0) ++{ ++} ++ ++/* Return true if argument ARGNO exists. which it might not for ++ erroneous calls. It is safe to wave through checks if this ++ function returns false. */ ++bool ++function_checker::argument_exists_p (unsigned int argno) ++{ ++ gcc_assert (argno < (unsigned int) type_num_arguments (m_fntype)); ++ return argno < m_nargs; ++} ++ ++/* Check that argument ARGNO is an integer constant expression and ++ store its value in VALUE_OUT if so. The caller should first ++ check that argument ARGNO exists. */ ++bool ++function_checker::require_immediate (unsigned int argno, ++ HOST_WIDE_INT &value_out) ++{ ++ gcc_assert (argno < m_nargs); ++ tree arg = m_args[argno]; ++ ++ /* The type and range are unsigned, so read the argument as an ++ unsigned rather than signed HWI. */ ++ if (!tree_fits_uhwi_p (arg)) ++ { ++ report_non_ice (location, fndecl, argno); ++ return false; ++ } ++ ++ /* ...but treat VALUE_OUT as signed for error reporting, since printing ++ -1 is more user-friendly than the maximum uint64_t value. */ ++ value_out = tree_to_uhwi (arg); ++ return true; ++} ++ ++/* Check that argument REL_ARGNO is an integer constant expression that ++ has the value VALUE0 or VALUE1. REL_ARGNO counts from the end of the ++ predication arguments. */ ++bool ++function_checker::require_immediate_either_or (unsigned int rel_argno, ++ HOST_WIDE_INT value0, ++ HOST_WIDE_INT value1) ++{ ++ unsigned int argno = m_base_arg + rel_argno; ++ if (!argument_exists_p (argno)) ++ return true; ++ ++ HOST_WIDE_INT actual; ++ if (!require_immediate (argno, actual)) ++ return false; ++ ++ if (actual != value0 && actual != value1) ++ { ++ report_neither_nor (location, fndecl, argno, actual, 90, 270); ++ return false; ++ } ++ ++ return true; ++} ++ ++/* Check that argument REL_ARGNO is an integer constant expression that has ++ a valid value for enumeration type TYPE. REL_ARGNO counts from the end ++ of the predication arguments. */ ++bool ++function_checker::require_immediate_enum (unsigned int rel_argno, tree type) ++{ ++ unsigned int argno = m_base_arg + rel_argno; ++ if (!argument_exists_p (argno)) ++ return true; ++ ++ HOST_WIDE_INT actual; ++ if (!require_immediate (argno, actual)) ++ return false; ++ ++ for (tree entry = TYPE_VALUES (type); entry; entry = TREE_CHAIN (entry)) ++ { ++ /* The value is an INTEGER_CST for C and a CONST_DECL wrapper ++ around an INTEGER_CST for C++. */ ++ tree value = TREE_VALUE (entry); ++ if (TREE_CODE (value) == CONST_DECL) ++ value = DECL_INITIAL (value); ++ if (wi::to_widest (value) == actual) ++ return true; ++ } ++ ++ report_not_enum (location, fndecl, argno, actual, type); ++ return false; ++} ++ ++/* Check that argument REL_ARGNO is suitable for indexing argument ++ REL_ARGNO - 1, in groups of GROUP_SIZE elements. REL_ARGNO counts ++ from the end of the predication arguments. */ ++bool ++function_checker::require_immediate_lane_index (unsigned int rel_argno, ++ unsigned int group_size) ++{ ++ unsigned int argno = m_base_arg + rel_argno; ++ if (!argument_exists_p (argno)) ++ return true; ++ ++ /* Get the type of the previous argument. tree_argument_type wants a ++ 1-based number, whereas ARGNO is 0-based. */ ++ machine_mode mode = TYPE_MODE (type_argument_type (m_fntype, argno)); ++ gcc_assert (VECTOR_MODE_P (mode)); ++ unsigned int nlanes = 128 / (group_size * GET_MODE_UNIT_BITSIZE (mode)); ++ return require_immediate_range (rel_argno, 0, nlanes - 1); ++} ++ ++/* Check that argument REL_ARGNO is an integer constant expression that ++ has one of the given values. */ ++bool ++function_checker::require_immediate_one_of (unsigned int rel_argno, ++ HOST_WIDE_INT value0, ++ HOST_WIDE_INT value1, ++ HOST_WIDE_INT value2, ++ HOST_WIDE_INT value3) ++{ ++ unsigned int argno = m_base_arg + rel_argno; ++ if (!argument_exists_p (argno)) ++ return true; ++ ++ HOST_WIDE_INT actual; ++ if (!require_immediate (argno, actual)) ++ return false; ++ ++ if (actual != value0 ++ && actual != value1 ++ && actual != value2 ++ && actual != value3) ++ { ++ report_not_one_of (location, fndecl, argno, actual, ++ value0, value1, value2, value3); ++ return false; ++ } ++ ++ return true; ++} ++ ++/* Check that argument REL_ARGNO is an integer constant expression in the ++ range [MIN, MAX]. REL_ARGNO counts from the end of the predication ++ arguments. */ ++bool ++function_checker::require_immediate_range (unsigned int rel_argno, ++ HOST_WIDE_INT min, ++ HOST_WIDE_INT max) ++{ ++ unsigned int argno = m_base_arg + rel_argno; ++ if (!argument_exists_p (argno)) ++ return true; ++ ++ /* Required because of the tree_to_uhwi -> HOST_WIDE_INT conversion ++ in require_immediate. */ ++ gcc_assert (min >= 0 && min <= max); ++ HOST_WIDE_INT actual; ++ if (!require_immediate (argno, actual)) ++ return false; ++ ++ if (!IN_RANGE (actual, min, max)) ++ { ++ report_out_of_range (location, fndecl, argno, actual, min, max); ++ return false; ++ } ++ ++ return true; ++} ++ ++/* Perform semantic checks on the call. Return true if the call is valid, ++ otherwise report a suitable error. */ ++bool ++function_checker::check () ++{ ++ function_args_iterator iter; ++ tree type; ++ unsigned int i = 0; ++ FOREACH_FUNCTION_ARGS (m_fntype, type, iter) ++ { ++ if (type == void_type_node || i >= m_nargs) ++ break; ++ ++ if (i >= m_base_arg ++ && TREE_CODE (type) == ENUMERAL_TYPE ++ && !require_immediate_enum (i - m_base_arg, type)) ++ return false; ++ ++ i += 1; ++ } ++ ++ return shape->check (*this); ++} ++ ++gimple_folder::gimple_folder (const function_instance &instance, tree fndecl, ++ gimple_stmt_iterator *gsi_in, gcall *call_in) ++ : function_call_info (gimple_location (call_in), instance, fndecl), ++ gsi (gsi_in), call (call_in), lhs (gimple_call_lhs (call_in)) ++{ ++} ++ ++/* Convert predicate argument ARGNO so that it has the type appropriate for ++ an operation on VECTYPE. Add any new statements to STMTS. */ ++tree ++gimple_folder::convert_pred (gimple_seq &stmts, tree vectype, ++ unsigned int argno) ++{ ++ tree pred = gimple_call_arg (call, argno); ++ if (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (pred)), ++ TYPE_VECTOR_SUBPARTS (vectype))) ++ return pred; ++ ++ return gimple_build (&stmts, VIEW_CONVERT_EXPR, ++ truth_type_for (vectype), pred); ++} ++ ++/* Return a pointer to the address in a contiguous load or store, ++ given that each memory vector has type VECTYPE. Add any new ++ statements to STMTS. */ ++tree ++gimple_folder::fold_contiguous_base (gimple_seq &stmts, tree vectype) ++{ ++ tree base = gimple_call_arg (call, 1); ++ if (mode_suffix_id == MODE_vnum) ++ { ++ tree offset = gimple_call_arg (call, 2); ++ offset = gimple_convert (&stmts, sizetype, offset); ++ offset = gimple_build (&stmts, MULT_EXPR, sizetype, offset, ++ TYPE_SIZE_UNIT (vectype)); ++ base = gimple_build (&stmts, POINTER_PLUS_EXPR, TREE_TYPE (base), ++ base, offset); ++ } ++ return base; ++} ++ ++/* Return the alignment and TBAA argument to an internal load or store ++ function like IFN_MASK_LOAD or IFN_MASK_STORE, given that it accesses ++ memory elements of type TYPE. */ ++tree ++gimple_folder::load_store_cookie (tree type) ++{ ++ return build_int_cst (build_pointer_type (type), TYPE_ALIGN_UNIT (type)); ++} ++ ++/* Fold the call to a call to INSTANCE, with the same arguments. */ ++gimple * ++gimple_folder::redirect_call (const function_instance &instance) ++{ ++ registered_function *rfn ++ = function_table->find_with_hash (instance, instance.hash ()); ++ if (!rfn) ++ return NULL; ++ ++ gimple_call_set_fndecl (call, rfn->decl); ++ return call; ++} ++ ++/* Fold the call to a PTRUE, taking the element size from type suffix 0. */ ++gimple * ++gimple_folder::fold_to_ptrue () ++{ ++ tree svbool_type = TREE_TYPE (lhs); ++ tree bool_type = TREE_TYPE (svbool_type); ++ unsigned int element_bytes = type_suffix (0).element_bytes; ++ ++ /* The return type is svbool_t for all type suffixes, thus for b8 we ++ want { 1, 1, 1, 1, ... }, for b16 we want { 1, 0, 1, 0, ... }, etc. */ ++ tree_vector_builder builder (svbool_type, element_bytes, 1); ++ builder.quick_push (build_all_ones_cst (bool_type)); ++ for (unsigned int i = 1; i < element_bytes; ++i) ++ builder.quick_push (build_zero_cst (bool_type)); ++ return gimple_build_assign (lhs, builder.build ()); ++} ++ ++/* Fold the call to a PFALSE. */ ++gimple * ++gimple_folder::fold_to_pfalse () ++{ ++ return gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs))); ++} ++ ++/* Fold an operation to a constant predicate in which the first VL ++ elements are set and the rest are clear. Take the element size ++ from type suffix 0. */ ++gimple * ++gimple_folder::fold_to_vl_pred (unsigned int vl) ++{ ++ tree vectype = TREE_TYPE (lhs); ++ tree element_type = TREE_TYPE (vectype); ++ tree minus_one = build_all_ones_cst (element_type); ++ tree zero = build_zero_cst (element_type); ++ unsigned int element_bytes = type_suffix (0).element_bytes; ++ ++ /* Construct COUNT elements that contain the ptrue followed by ++ a repeating sequence of COUNT elements. */ ++ unsigned int count = constant_lower_bound (TYPE_VECTOR_SUBPARTS (vectype)); ++ gcc_assert (vl * element_bytes <= count); ++ tree_vector_builder builder (vectype, count, 2); ++ for (unsigned int i = 0; i < count * 2; ++i) ++ { ++ bool bit = (i & (element_bytes - 1)) == 0 && i < vl * element_bytes; ++ builder.quick_push (bit ? minus_one : zero); ++ } ++ return gimple_build_assign (lhs, builder.build ()); ++} ++ ++/* Try to fold the call. Return the new statement on success and null ++ on failure. */ ++gimple * ++gimple_folder::fold () ++{ ++ /* Don't fold anything when SVE is disabled; emit an error during ++ expansion instead. */ ++ if (!TARGET_SVE) ++ return NULL; ++ ++ /* Punt if the function has a return type and no result location is ++ provided. The attributes should allow target-independent code to ++ remove the calls if appropriate. */ ++ if (!lhs && TREE_TYPE (gimple_call_fntype (call)) != void_type_node) ++ return NULL; ++ ++ return base->fold (*this); ++} ++ ++function_expander::function_expander (const function_instance &instance, ++ tree fndecl, tree call_expr_in, ++ rtx possible_target_in) ++ : function_call_info (EXPR_LOCATION (call_expr_in), instance, fndecl), ++ call_expr (call_expr_in), possible_target (possible_target_in) ++{ ++} ++ ++/* Return the handler of direct optab OP for type suffix SUFFIX_I. */ ++insn_code ++function_expander::direct_optab_handler (optab op, unsigned int suffix_i) ++{ ++ return ::direct_optab_handler (op, vector_mode (suffix_i)); ++} ++ ++/* Choose between signed and unsigned direct optabs SIGNED_OP and ++ UNSIGNED_OP based on the signedness of type suffix SUFFIX_I, then ++ pick the appropriate optab handler for the mode. Use MODE as the ++ mode if given, otherwise use the mode of type suffix SUFFIX_I. */ ++insn_code ++function_expander::direct_optab_handler_for_sign (optab signed_op, ++ optab unsigned_op, ++ unsigned int suffix_i, ++ machine_mode mode) ++{ ++ if (mode == VOIDmode) ++ mode = vector_mode (suffix_i); ++ optab op = type_suffix (suffix_i).unsigned_p ? unsigned_op : signed_op; ++ return ::direct_optab_handler (op, mode); ++} ++ ++/* Return true if X overlaps any input. */ ++bool ++function_expander::overlaps_input_p (rtx x) ++{ ++ for (unsigned int i = 0; i < args.length (); ++i) ++ if (reg_overlap_mentioned_p (x, args[i])) ++ return true; ++ return false; ++} ++ ++/* Return the base address for a contiguous load or store function. ++ MEM_MODE is the mode of the addressed memory. */ ++rtx ++function_expander::get_contiguous_base (machine_mode mem_mode) ++{ ++ rtx base = args[1]; ++ if (mode_suffix_id == MODE_vnum) ++ { ++ /* Use the size of the memory mode for extending loads and truncating ++ stores. Use the size of a full vector for non-extending loads ++ and non-truncating stores (including svld[234] and svst[234]). */ ++ poly_int64 size = ordered_min (GET_MODE_SIZE (mem_mode), ++ BYTES_PER_SVE_VECTOR); ++ rtx offset = gen_int_mode (size, Pmode); ++ offset = simplify_gen_binary (MULT, Pmode, args[2], offset); ++ base = simplify_gen_binary (PLUS, Pmode, base, offset); ++ } ++ return base; ++} ++ ++/* For a function that does the equivalent of: ++ ++ OUTPUT = COND ? FN (INPUTS) : FALLBACK; ++ ++ return the value of FALLBACK. ++ ++ MODE is the mode of OUTPUT. NOPS is the number of operands in INPUTS. ++ MERGE_ARGNO is the argument that provides FALLBACK for _m functions, ++ or DEFAULT_MERGE_ARGNO if we should apply the usual rules. ++ ++ ARGNO is the caller's index into args. If the returned value is ++ argument 0 (as for unary _m operations), increment ARGNO past the ++ returned argument. */ ++rtx ++function_expander::get_fallback_value (machine_mode mode, unsigned int nops, ++ unsigned int merge_argno, ++ unsigned int &argno) ++{ ++ if (pred == PRED_z) ++ return CONST0_RTX (mode); ++ ++ gcc_assert (pred == PRED_m || pred == PRED_x); ++ if (merge_argno == DEFAULT_MERGE_ARGNO) ++ merge_argno = nops == 1 && pred == PRED_m ? 0 : 1; ++ ++ if (merge_argno == 0) ++ return args[argno++]; ++ ++ return args[merge_argno]; ++} ++ ++/* Return a REG rtx that can be used for the result of the function, ++ using the preferred target if suitable. */ ++rtx ++function_expander::get_reg_target () ++{ ++ machine_mode target_mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (fndecl))); ++ if (!possible_target || GET_MODE (possible_target) != target_mode) ++ possible_target = gen_reg_rtx (target_mode); ++ return possible_target; ++} ++ ++/* As for get_reg_target, but make sure that the returned REG does not ++ overlap any inputs. */ ++rtx ++function_expander::get_nonoverlapping_reg_target () ++{ ++ if (possible_target && overlaps_input_p (possible_target)) ++ possible_target = NULL_RTX; ++ return get_reg_target (); ++} ++ ++/* Add an output operand to the instruction we're building, which has ++ code ICODE. Bind the output to the preferred target rtx if possible. */ ++void ++function_expander::add_output_operand (insn_code icode) ++{ ++ unsigned int opno = m_ops.length (); ++ machine_mode mode = insn_data[icode].operand[opno].mode; ++ m_ops.safe_grow (opno + 1); ++ create_output_operand (&m_ops.last (), possible_target, mode); ++} ++ ++/* Add an input operand to the instruction we're building, which has ++ code ICODE. Calculate the value of the operand as follows: ++ ++ - If the operand is a vector and X is not, broadcast X to fill a ++ vector of the appropriate mode. ++ ++ - Otherwise, if the operand is a predicate, coerce X to have the ++ mode that the instruction expects. In this case X is known to be ++ VNx16BImode (the mode of svbool_t). ++ ++ - Otherwise use X directly. The expand machinery checks that X has ++ the right mode for the instruction. */ ++void ++function_expander::add_input_operand (insn_code icode, rtx x) ++{ ++ unsigned int opno = m_ops.length (); ++ const insn_operand_data &operand = insn_data[icode].operand[opno]; ++ machine_mode mode = operand.mode; ++ if (mode == VOIDmode) ++ { ++ /* The only allowable use of VOIDmode is the wildcard ++ aarch64_any_register_operand, which is used to avoid ++ combinatorial explosion in the reinterpret patterns. */ ++ gcc_assert (operand.predicate == aarch64_any_register_operand); ++ mode = GET_MODE (x); ++ } ++ else if (!VECTOR_MODE_P (GET_MODE (x)) && VECTOR_MODE_P (mode)) ++ x = expand_vector_broadcast (mode, x); ++ else if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL) ++ { ++ gcc_assert (GET_MODE (x) == VNx16BImode); ++ x = gen_lowpart (mode, x); ++ } ++ m_ops.safe_grow (m_ops.length () + 1); ++ create_input_operand (&m_ops.last (), x, mode); ++} ++ ++/* Add an integer operand with value X to the instruction. */ ++void ++function_expander::add_integer_operand (HOST_WIDE_INT x) ++{ ++ m_ops.safe_grow (m_ops.length () + 1); ++ create_integer_operand (&m_ops.last (), x); ++} ++ ++/* Add a memory operand with mode MODE and address ADDR. */ ++void ++function_expander::add_mem_operand (machine_mode mode, rtx addr) ++{ ++ /* Exception for OImode for the ld1ro intrinsics. ++ They act on 256 bit octaword data, and it's just easier to use a scalar ++ mode to represent that than add a new vector mode solely for the purpose ++ of this intrinsic. */ ++ gcc_assert (VECTOR_MODE_P (mode) || mode == OImode); ++ rtx mem = gen_rtx_MEM (mode, memory_address (mode, addr)); ++ /* The memory is only guaranteed to be element-aligned. */ ++ set_mem_align (mem, GET_MODE_ALIGNMENT (GET_MODE_INNER (mode))); ++ add_fixed_operand (mem); ++} ++ ++/* Add an address operand with value X. The static operand data says ++ what mode and form the address must have. */ ++void ++function_expander::add_address_operand (rtx x) ++{ ++ m_ops.safe_grow (m_ops.length () + 1); ++ create_address_operand (&m_ops.last (), x); ++} ++ ++/* Add an operand that must be X. The only way of legitimizing an ++ invalid X is to reload the address of a MEM. */ ++void ++function_expander::add_fixed_operand (rtx x) ++{ ++ m_ops.safe_grow (m_ops.length () + 1); ++ create_fixed_operand (&m_ops.last (), x); ++} ++ ++/* Generate instruction ICODE, given that its operands have already ++ been added to M_OPS. Return the value of the first operand. */ ++rtx ++function_expander::generate_insn (insn_code icode) ++{ ++ expand_insn (icode, m_ops.length (), m_ops.address ()); ++ return function_returns_void_p () ? const0_rtx : m_ops[0].value; ++} ++ ++/* Convert the arguments to a gather/scatter function into the ++ associated md operands. Argument ARGNO is the scalar or vector base and ++ argument ARGNO + 1 is the scalar or vector displacement (if applicable). ++ The md pattern expects: ++ ++ - a scalar base ++ - a vector displacement ++ ++ If SCALED_P is true, it also expects: ++ ++ - a const_int that is 1 if the displacement is zero-extended from 32 bits ++ - a scaling multiplier (1 for bytes, 2 for .h indices, etc.). ++ ++ If SCALED_P is false, the displacement is implicitly zero-extended ++ and the scaling multiplier is implicitly 1. */ ++void ++function_expander::prepare_gather_address_operands (unsigned int argno, ++ bool scaled_p) ++{ ++ machine_mode mem_mode = memory_vector_mode (); ++ tree vector_type = base_vector_type (); ++ units_index units = displacement_units (); ++ int shift_idx = -1; ++ if (units == UNITS_none) ++ { ++ /* Vector base, no displacement. Convert to an integer zero base ++ and a vector byte offset. */ ++ args.quick_insert (argno, const0_rtx); ++ units = UNITS_bytes; ++ } ++ else if (vector_type) ++ { ++ /* Vector base, scalar displacement. Convert to a scalar base and ++ a vector byte offset. */ ++ std::swap (args[argno], args[argno + 1]); ++ if (units == UNITS_elements) ++ shift_idx = argno; ++ } ++ else ++ { ++ /* Scalar base, vector displacement. This is the order that the md ++ pattern wants. */ ++ if (Pmode == SImode) ++ args[argno] = simplify_gen_unary (ZERO_EXTEND, DImode, ++ args[argno], SImode); ++ vector_type = displacement_vector_type (); ++ if (units == UNITS_elements && !scaled_p) ++ shift_idx = argno + 1; ++ } ++ tree scalar_displacement_type = TREE_TYPE (vector_type); ++ ++ if (shift_idx >= 0) ++ { ++ machine_mode arg_mode = GET_MODE (args[shift_idx]); ++ if (arg_mode == VOIDmode) ++ arg_mode = DImode; ++ unsigned int elt_bytes = GET_MODE_UNIT_SIZE (mem_mode); ++ rtx shift = gen_int_mode (exact_log2 (elt_bytes), DImode); ++ args[shift_idx] = simplify_gen_binary (ASHIFT, arg_mode, ++ args[shift_idx], shift); ++ units = UNITS_bytes; ++ } ++ ++ bool uxtw_p = (TYPE_PRECISION (scalar_displacement_type) == 64 ++ || TYPE_UNSIGNED (scalar_displacement_type)); ++ unsigned int scale = (units == UNITS_bytes ++ ? 1 : GET_MODE_UNIT_SIZE (mem_mode)); ++ ++ if (scaled_p) ++ { ++ args.quick_insert (argno + 2, GEN_INT (uxtw_p)); ++ args.quick_insert (argno + 3, GEN_INT (scale)); ++ } ++ else ++ gcc_assert (uxtw_p && scale == 1); ++} ++ ++/* The final argument is an immediate svprfop value. Add two fake arguments ++ to represent the rw and locality operands of a PREFETCH rtx. */ ++void ++function_expander::prepare_prefetch_operands () ++{ ++ unsigned int prfop = INTVAL (args.last ()); ++ /* Bit 3 of the prfop selects stores over loads. */ ++ args.quick_push (GEN_INT ((prfop & 8) != 0)); ++ /* Bits 1 and 2 specify the locality; 0-based for svprfop but ++ 1-based for PREFETCH. */ ++ args.quick_push (GEN_INT (((prfop >> 1) & 3) + 1)); ++} ++ ++/* Add a dummy argument to indicate whether predicate argument ARGNO ++ is all-true when interpreted in mode PRED_MODE. The hint goes ++ immediately after ARGNO. */ ++void ++function_expander::add_ptrue_hint (unsigned int argno, machine_mode pred_mode) ++{ ++ rtx pred = gen_lowpart (pred_mode, args[argno]); ++ int hint = (pred == CONSTM1_RTX (pred_mode) ++ ? SVE_KNOWN_PTRUE : SVE_MAYBE_NOT_PTRUE); ++ args.quick_insert (argno + 1, gen_int_mode (hint, SImode)); ++} ++ ++/* Rotate inputs args[START:END] one position to the left, so that ++ args[START] becomes args[END - 1]. */ ++void ++function_expander::rotate_inputs_left (unsigned int start, unsigned int end) ++{ ++ rtx new_last = args[start]; ++ for (unsigned int i = start; i < end - 1; ++i) ++ args[i] = args[i + 1]; ++ args[end - 1] = new_last; ++} ++ ++/* Return true if the negation of argument ARGNO can be folded away, ++ replacing it with the negated value if so. MODE is the associated ++ vector mode, but the argument could be a single element. The main ++ case this handles is constant arguments. */ ++bool ++function_expander::try_negating_argument (unsigned int argno, ++ machine_mode mode) ++{ ++ rtx x = args[argno]; ++ if (!VECTOR_MODE_P (GET_MODE (x))) ++ mode = GET_MODE_INNER (mode); ++ ++ x = simplify_unary_operation (NEG, mode, x, mode); ++ if (!x) ++ return false; ++ ++ args[argno] = x; ++ return true; ++} ++ ++/* Implement the call using instruction ICODE, with a 1:1 mapping between ++ arguments and input operands. */ ++rtx ++function_expander::use_exact_insn (insn_code icode) ++{ ++ unsigned int nops = insn_data[icode].n_operands; ++ if (!function_returns_void_p ()) ++ { ++ add_output_operand (icode); ++ nops -= 1; ++ } ++ for (unsigned int i = 0; i < nops; ++i) ++ add_input_operand (icode, args[i]); ++ return generate_insn (icode); ++} ++ ++/* Implement the call using instruction ICODE, which does not use a ++ governing predicate. We must therefore drop the GP from an _x call. */ ++rtx ++function_expander::use_unpred_insn (insn_code icode) ++{ ++ /* We can't drop the predicate for _z and _m. */ ++ gcc_assert (pred == PRED_x || pred == PRED_none); ++ /* Discount the output operand. */ ++ unsigned int nops = insn_data[icode].n_operands - 1; ++ /* Drop the predicate argument in the case of _x predication. */ ++ unsigned int bias = (pred == PRED_x ? 1 : 0); ++ unsigned int i = 0; ++ ++ add_output_operand (icode); ++ for (; i < nops; ++i) ++ add_input_operand (icode, args[i + bias]); ++ ++ return generate_insn (icode); ++} ++ ++/* Implement the call using instruction ICODE, which is a predicated ++ operation that returns arbitrary values for inactive lanes. */ ++rtx ++function_expander::use_pred_x_insn (insn_code icode) ++{ ++ /* At present we never need to handle PRED_none, which would involve ++ creating a new predicate rather than using one supplied by the user. */ ++ gcc_assert (pred == PRED_x); ++ /* Discount the output operand. */ ++ unsigned int nops = args.length () - 1; ++ ++ bool has_float_operand_p = FLOAT_MODE_P (insn_data[icode].operand[0].mode); ++ ++ /* Add the normal operands. */ ++ add_output_operand (icode); ++ add_input_operand (icode, args[0]); ++ for (unsigned int i = 0; i < nops; ++i) ++ { ++ add_input_operand (icode, args[i + 1]); ++ if (FLOAT_MODE_P (GET_MODE (args[i + 1]))) ++ has_float_operand_p = true; ++ } ++ ++ if (has_float_operand_p) ++ { ++ /* Add a flag that indicates whether unpredicated instructions ++ are allowed. */ ++ rtx pred = m_ops[1].value; ++ if (flag_trapping_math && pred != CONST1_RTX (GET_MODE (pred))) ++ add_integer_operand (SVE_STRICT_GP); ++ else ++ add_integer_operand (SVE_RELAXED_GP); ++ } ++ ++ return generate_insn (icode); ++} ++ ++/* Implement the call using instruction ICODE, which does the equivalent of: ++ ++ OUTPUT = COND ? FN (INPUTS) : FALLBACK; ++ ++ The instruction operands are in the order above: OUTPUT, COND, INPUTS ++ and FALLBACK. MERGE_ARGNO is the argument that provides FALLBACK for _m ++ functions, or DEFAULT_MERGE_ARGNO if we should apply the usual rules. */ ++rtx ++function_expander::use_cond_insn (insn_code icode, unsigned int merge_argno) ++{ ++ /* At present we never need to handle PRED_none, which would involve ++ creating a new predicate rather than using one supplied by the user. */ ++ gcc_assert (pred != PRED_none); ++ /* Discount the output, predicate and fallback value. */ ++ unsigned int nops = insn_data[icode].n_operands - 3; ++ machine_mode mode = insn_data[icode].operand[0].mode; ++ ++ unsigned int opno = 0; ++ rtx fallback_arg = get_fallback_value (mode, nops, merge_argno, opno); ++ rtx pred = args[opno++]; ++ ++ add_output_operand (icode); ++ add_input_operand (icode, pred); ++ for (unsigned int i = 0; i < nops; ++i) ++ add_input_operand (icode, args[opno + i]); ++ add_input_operand (icode, fallback_arg); ++ return generate_insn (icode); ++} ++ ++/* Implement the call using instruction ICODE, which is a select-like ++ operation with the following operands: ++ ++ 0: output ++ 1: true value ++ 2: false value ++ 3: predicate ++ ++ MERGE_ARGNO is the argument that provides the "false" value for _m ++ functions, or DEFAULT_MERGE_ARGNO if we should apply the usual rules. */ ++rtx ++function_expander::use_vcond_mask_insn (insn_code icode, ++ unsigned int merge_argno) ++{ ++ machine_mode mode = vector_mode (0); ++ ++ unsigned int opno = 0; ++ rtx false_arg = get_fallback_value (mode, 1, merge_argno, opno); ++ rtx pred_arg = args[opno++]; ++ rtx true_arg = args[opno++]; ++ ++ add_output_operand (icode); ++ add_input_operand (icode, true_arg); ++ add_input_operand (icode, false_arg); ++ add_input_operand (icode, pred_arg); ++ return generate_insn (icode); ++} ++ ++/* Implement the call using instruction ICODE, which loads memory operand 1 ++ into register operand 0 under the control of predicate operand 2. */ ++rtx ++function_expander::use_contiguous_load_insn (insn_code icode) ++{ ++ machine_mode mem_mode = memory_vector_mode (); ++ ++ add_output_operand (icode); ++ add_mem_operand (mem_mode, get_contiguous_base (mem_mode)); ++ add_input_operand (icode, args[0]); ++ return generate_insn (icode); ++} ++ ++/* Implement the call using instruction ICODE, which prefetches from ++ address operand 1 under the control of predicate operand 0. ++ Operands 2, 3 and 4 respectively specify the svprfop value, ++ the PREFETCH rw flag and the PREFETCH locality. */ ++rtx ++function_expander::use_contiguous_prefetch_insn (insn_code icode) ++{ ++ add_input_operand (icode, args[0]); ++ add_address_operand (get_contiguous_base (VNx16QImode)); ++ for (unsigned int i = args.length () - 3; i < args.length (); ++i) ++ add_input_operand (icode, args[i]); ++ return generate_insn (icode); ++} ++ ++/* Implement the call using instruction ICODE, which stores register operand 1 ++ into memory operand 0 under the control of predicate operand 2. */ ++rtx ++function_expander::use_contiguous_store_insn (insn_code icode) ++{ ++ machine_mode mem_mode = memory_vector_mode (); ++ ++ add_mem_operand (mem_mode, get_contiguous_base (mem_mode)); ++ add_input_operand (icode, args.last ()); ++ add_input_operand (icode, args[0]); ++ return generate_insn (icode); ++} ++ ++/* Implement the call using one of the following strategies, chosen in order: ++ ++ (1) "aarch64_pred__z" for PRED_z predicate functions ++ ++ (2) "aarch64_pred_" for PRED_x functions ++ ++ (3) a normal unpredicated optab for PRED_none and PRED_x functions, ++ dropping the predicate in the latter case ++ ++ (4) "cond_" otherwise ++ ++ where corresponds to: ++ ++ - CODE_FOR_SINT for signed integers ++ - CODE_FOR_UINT for unsigned integers ++ - UNSPEC_FOR_FP for floating-point values ++ ++ MERGE_ARGNO is the argument that provides the values of inactive lanes for ++ _m functions, or DEFAULT_MERGE_ARGNO if we should apply the usual rules. */ ++rtx ++function_expander::map_to_rtx_codes (rtx_code code_for_sint, ++ rtx_code code_for_uint, ++ int unspec_for_fp, ++ unsigned int merge_argno) ++{ ++ machine_mode mode = vector_mode (0); ++ rtx_code code = (type_suffix (0).unsigned_p ? code_for_uint : code_for_sint); ++ insn_code icode; ++ ++ /* Handle predicate logic operations, which always use _z predication. */ ++ if (type_suffix (0).tclass == TYPE_bool) ++ { ++ gcc_assert (pred == PRED_z && code_for_uint == code_for_sint); ++ return use_exact_insn (code_for_aarch64_pred_z (code, mode)); ++ } ++ ++ /* First try using UNSPEC_PRED_X patterns for _x predication, ++ if available. */ ++ if (pred == PRED_x) ++ { ++ if (type_suffix (0).integer_p) ++ icode = maybe_code_for_aarch64_pred (code, mode); ++ else ++ icode = maybe_code_for_aarch64_pred (unspec_for_fp, mode); ++ if (icode != CODE_FOR_nothing) ++ return use_pred_x_insn (icode); ++ } ++ ++ /* Otherwise expand PRED_none and PRED_x operations without a predicate. ++ Floating-point operations conventionally use the signed rtx code. */ ++ if (pred == PRED_none || pred == PRED_x) ++ return use_unpred_insn (direct_optab_handler (code_to_optab (code), 0)); ++ ++ /* Don't use cond_*_optabs here, since not all codes have one yet. */ ++ if (type_suffix (0).integer_p) ++ icode = code_for_cond (code, mode); ++ else ++ icode = code_for_cond (unspec_for_fp, mode); ++ return use_cond_insn (icode, merge_argno); ++} ++ ++/* Implement the call using one of the following strategies, chosen in order: ++ ++ (1) "aarch64_pred_" for PRED_x functions; this is a ++ predicated pattern ++ ++ (2) "aarch64_sve_" for PRED_none and PRED_x functions; ++ this is an unpredicated pattern ++ ++ (3) "cond_" otherwise ++ ++ where corresponds to: ++ ++ - UNSPEC_FOR_SINT for signed integers ++ - UNSPEC_FOR_UINT for unsigned integers ++ - UNSPEC_FOR_FP for floating-point values ++ ++ MERGE_ARGNO is the argument that provides the values of inactive lanes for ++ _m functions, or DEFAULT_MERGE_ARGNO if we should apply the usual rules. */ ++rtx ++function_expander::map_to_unspecs (int unspec_for_sint, int unspec_for_uint, ++ int unspec_for_fp, unsigned int merge_argno) ++{ ++ machine_mode mode = vector_mode (0); ++ int unspec = (!type_suffix (0).integer_p ? unspec_for_fp ++ : type_suffix (0).unsigned_p ? unspec_for_uint ++ : unspec_for_sint); ++ ++ if (pred == PRED_x) ++ { ++ insn_code icode = maybe_code_for_aarch64_pred (unspec, mode); ++ if (icode != CODE_FOR_nothing) ++ return use_pred_x_insn (icode); ++ } ++ ++ if (pred == PRED_none || pred == PRED_x) ++ { ++ insn_code icode = maybe_code_for_aarch64_sve (unspec, mode); ++ if (icode != CODE_FOR_nothing) ++ return use_unpred_insn (icode); ++ } ++ ++ insn_code icode = code_for_cond (unspec, vector_mode (0)); ++ return use_cond_insn (icode, merge_argno); ++} ++ ++/* Implement the call using an @aarch64 instruction and the ++ instructions are parameterized by an rtx_code. CODE_FOR_SINT ++ is the rtx_code for signed integer operations, CODE_FOR_UINT ++ is the rtx_code for unsigned integer operations. */ ++rtx ++function_expander::expand_signed_unpred_op (rtx_code code_for_sint, ++ rtx_code code_for_uint) ++{ ++ insn_code icode; ++ if (type_suffix (0).unsigned_p) ++ icode = code_for_aarch64 (code_for_uint, code_for_uint, vector_mode (0)); ++ else ++ icode = code_for_aarch64 (code_for_sint, code_for_sint, vector_mode (0)); ++ return use_unpred_insn (icode); ++} ++ ++/* Expand the call and return its lhs. */ ++rtx ++function_expander::expand () ++{ ++ unsigned int nargs = call_expr_nargs (call_expr); ++ args.reserve (nargs); ++ for (unsigned int i = 0; i < nargs; ++i) ++ args.quick_push (expand_normal (CALL_EXPR_ARG (call_expr, i))); ++ ++ return base->expand (*this); ++} ++ ++/* Register the built-in SVE ABI types, such as __SVBool_t. */ ++static void ++register_builtin_types () ++{ ++#define DEF_SVE_TYPE(ACLE_NAME, NCHARS, ABI_NAME, SCALAR_TYPE) \ ++ scalar_types[VECTOR_TYPE_ ## ACLE_NAME] = SCALAR_TYPE; ++#include "aarch64-sve-builtins.def" ++ ++ for (unsigned int i = 0; i < NUM_VECTOR_TYPES; ++i) ++ { ++ tree eltype = scalar_types[i]; ++ tree vectype; ++ if (eltype == boolean_type_node) ++ { ++ vectype = build_truth_vector_type_for_mode (BYTES_PER_SVE_VECTOR, ++ VNx16BImode); ++ gcc_assert (TYPE_MODE (vectype) == VNx16BImode ++ && TYPE_MODE (vectype) == TYPE_MODE_RAW (vectype) ++ && TYPE_ALIGN (vectype) == 16 ++ && known_eq (wi::to_poly_offset (TYPE_SIZE (vectype)), ++ BYTES_PER_SVE_VECTOR)); ++ } ++ else ++ { ++ unsigned int elbytes = tree_to_uhwi (TYPE_SIZE_UNIT (eltype)); ++ poly_uint64 nunits = exact_div (BYTES_PER_SVE_VECTOR, elbytes); ++ vectype = build_vector_type (eltype, nunits); ++ gcc_assert (VECTOR_MODE_P (TYPE_MODE (vectype)) ++ && TYPE_MODE (vectype) == TYPE_MODE_RAW (vectype) ++ && TYPE_ALIGN (vectype) == 128 ++ && known_eq (wi::to_poly_offset (TYPE_SIZE (vectype)), ++ BITS_PER_SVE_VECTOR)); ++ } ++ vectype = build_distinct_type_copy (vectype); ++ SET_TYPE_STRUCTURAL_EQUALITY (vectype); ++ TYPE_ARTIFICIAL (vectype) = 1; ++ abi_vector_types[i] = vectype; ++ lang_hooks.types.register_builtin_type (vectype, ++ vector_types[i].abi_name); ++ } ++} ++ ++/* Initialize all compiler built-ins related to SVE that should be ++ defined at start-up. */ ++void ++init_builtins () ++{ ++ sve_switcher sve; ++ register_builtin_types (); ++} ++ ++/* Register vector type TYPE under its arm_sve.h name. */ ++static void ++register_vector_type (vector_type_index type) ++{ ++ tree vectype = abi_vector_types[type]; ++ tree id = get_identifier (vector_types[type].acle_name); ++ tree decl = build_decl (input_location, TYPE_DECL, id, vectype); ++ decl = lang_hooks.decls.pushdecl (decl); ++ ++ /* Record the new ACLE type if pushdecl succeeded without error. Use ++ the ABI type otherwise, so that the type we record at least has the ++ right form, even if it doesn't have the right name. This should give ++ better error recovery behavior than installing error_mark_node or ++ installing an incorrect type. */ ++ if (TREE_CODE (decl) == TYPE_DECL ++ && TYPE_MAIN_VARIANT (TREE_TYPE (decl)) == vectype) ++ vectype = TREE_TYPE (decl); ++ acle_vector_types[0][type] = vectype; ++} ++ ++/* Register the tuple type that contains NUM_VECTORS vectors of type TYPE. */ ++static void ++register_tuple_type (unsigned int num_vectors, vector_type_index type) ++{ ++ tree tuple_type = lang_hooks.types.make_type (RECORD_TYPE); ++ ++ /* The contents of the type are opaque, so we can define them in any ++ way that maps to the correct ABI type. ++ ++ Here we choose to use the same layout as for arm_neon.h, but with ++ "__val" instead of "val": ++ ++ struct svfooxN_t { svfoo_t __val[N]; }; ++ ++ (It wouldn't be possible to write that directly in C or C++ for ++ sizeless types, but that's not a problem for this function.) ++ ++ Using arrays simplifies the handling of svget and svset for variable ++ arguments. */ ++ tree vector_type = acle_vector_types[0][type]; ++ tree array_type = build_array_type_nelts (vector_type, num_vectors); ++ gcc_assert (VECTOR_MODE_P (TYPE_MODE (array_type)) ++ && TYPE_MODE_RAW (array_type) == TYPE_MODE (array_type) ++ && TYPE_ALIGN (array_type) == 128); ++ ++ tree field = build_decl (input_location, FIELD_DECL, ++ get_identifier ("__val"), array_type); ++ DECL_FIELD_CONTEXT (field) = tuple_type; ++ TYPE_FIELDS (tuple_type) = field; ++ layout_type (tuple_type); ++ gcc_assert (VECTOR_MODE_P (TYPE_MODE (tuple_type)) ++ && TYPE_MODE_RAW (tuple_type) == TYPE_MODE (tuple_type) ++ && TYPE_ALIGN (tuple_type) == 128); ++ ++ /* Work out the structure name. */ ++ char buffer[sizeof ("svbfloat16x4_t")]; ++ const char *vector_type_name = vector_types[type].acle_name; ++ snprintf (buffer, sizeof (buffer), "%.*sx%d_t", ++ (int) strlen (vector_type_name) - 2, vector_type_name, ++ num_vectors); ++ ++ tree decl = build_decl (input_location, TYPE_DECL, ++ get_identifier (buffer), tuple_type); ++ TYPE_NAME (tuple_type) = decl; ++ TYPE_STUB_DECL (tuple_type) = decl; ++ lang_hooks.decls.pushdecl (decl); ++ /* ??? Undo the effect of set_underlying_type for C. The C frontend ++ doesn't recognize DECL as a built-in because (as intended) the decl has ++ a real location instead of BUILTINS_LOCATION. The frontend therefore ++ treats the decl like a normal C "typedef struct foo foo;", expecting ++ the type for tag "struct foo" to have a dummy unnamed TYPE_DECL instead ++ of the named one we attached above. It then sets DECL_ORIGINAL_TYPE ++ on the supposedly unnamed decl, creating a circularity that upsets ++ dwarf2out. ++ ++ We don't want to follow the normal C model and create "struct foo" ++ tags for tuple types since (a) the types are supposed to be opaque ++ and (b) they couldn't be defined as a real struct anyway. Treating ++ the TYPE_DECLs as "typedef struct foo foo;" without creating ++ "struct foo" would lead to confusing error messages. */ ++ DECL_ORIGINAL_TYPE (decl) = NULL_TREE; ++ ++ acle_vector_types[num_vectors - 1][type] = tuple_type; ++} ++ ++/* Register the svpattern enum. */ ++static void ++register_svpattern () ++{ ++ auto_vec values; ++#define PUSH(UPPER, LOWER, VALUE) \ ++ values.quick_push (string_int_pair ("SV_" #UPPER, VALUE)); ++ AARCH64_FOR_SVPATTERN (PUSH) ++#undef PUSH ++ ++ acle_svpattern = lang_hooks.types.simulate_enum_decl (input_location, ++ "svpattern", values); ++} ++ ++/* Register the svprfop enum. */ ++static void ++register_svprfop () ++{ ++ auto_vec values; ++#define PUSH(UPPER, LOWER, VALUE) \ ++ values.quick_push (string_int_pair ("SV_" #UPPER, VALUE)); ++ AARCH64_FOR_SVPRFOP (PUSH) ++#undef PUSH ++ ++ acle_svprfop = lang_hooks.types.simulate_enum_decl (input_location, ++ "svprfop", values); ++} ++ ++/* Implement #pragma GCC aarch64 "arm_sve.h". */ ++void ++handle_arm_sve_h () ++{ ++ if (function_table) ++ { ++ error ("duplicate definition of %qs", "arm_sve.h"); ++ return; ++ } ++ ++ sve_switcher sve; ++ ++ /* Define the vector and tuple types. */ ++ for (unsigned int type_i = 0; type_i < NUM_VECTOR_TYPES; ++type_i) ++ { ++ vector_type_index type = vector_type_index (type_i); ++ register_vector_type (type); ++ if (type != VECTOR_TYPE_svbool_t) ++ for (unsigned int count = 2; count <= MAX_TUPLE_SIZE; ++count) ++ register_tuple_type (count, type); ++ } ++ ++ /* Define the enums. */ ++ register_svpattern (); ++ register_svprfop (); ++ ++ /* Define the functions. */ ++ function_table = new hash_table (1023); ++ function_builder builder; ++ for (unsigned int i = 0; i < ARRAY_SIZE (function_groups); ++i) ++ builder.register_function_group (function_groups[i]); ++} ++ ++/* Return the function decl with SVE function subcode CODE, or error_mark_node ++ if no such function exists. */ ++tree ++builtin_decl (unsigned int code, bool) ++{ ++ if (code >= vec_safe_length (registered_functions)) ++ return error_mark_node; ++ return (*registered_functions)[code]->decl; ++} ++ ++/* If we're implementing manual overloading, check whether the SVE ++ function with subcode CODE is overloaded, and if so attempt to ++ determine the corresponding non-overloaded function. The call ++ occurs at location LOCATION and has the arguments given by ARGLIST. ++ ++ If the call is erroneous, report an appropriate error and return ++ error_mark_node. Otherwise, if the function is overloaded, return ++ the decl of the non-overloaded function. Return NULL_TREE otherwise, ++ indicating that the call should be processed in the normal way. */ ++tree ++resolve_overloaded_builtin (location_t location, unsigned int code, ++ vec *arglist) ++{ ++ if (code >= vec_safe_length (registered_functions)) ++ return NULL_TREE; ++ ++ registered_function &rfn = *(*registered_functions)[code]; ++ if (rfn.overloaded_p) ++ return function_resolver (location, rfn.instance, rfn.decl, ++ *arglist).resolve (); ++ return NULL_TREE; ++} ++ ++/* Perform any semantic checks needed for a call to the SVE function ++ with subcode CODE, such as testing for integer constant expressions. ++ The call occurs at location LOCATION and has NARGS arguments, ++ given by ARGS. FNDECL is the original function decl, before ++ overload resolution. ++ ++ Return true if the call is valid, otherwise report a suitable error. */ ++bool ++check_builtin_call (location_t location, vec, unsigned int code, ++ tree fndecl, unsigned int nargs, tree *args) ++{ ++ const registered_function &rfn = *(*registered_functions)[code]; ++ if (!check_required_extensions (location, rfn.decl, rfn.required_extensions)) ++ return false; ++ return function_checker (location, rfn.instance, fndecl, ++ TREE_TYPE (rfn.decl), nargs, args).check (); ++} ++ ++/* Attempt to fold STMT, given that it's a call to the SVE function ++ with subcode CODE. Return the new statement on success and null ++ on failure. Insert any other new statements at GSI. */ ++gimple * ++gimple_fold_builtin (unsigned int code, gimple_stmt_iterator *gsi, gcall *stmt) ++{ ++ registered_function &rfn = *(*registered_functions)[code]; ++ return gimple_folder (rfn.instance, rfn.decl, gsi, stmt).fold (); ++} ++ ++/* Expand a call to the SVE function with subcode CODE. EXP is the call ++ expression and TARGET is the preferred location for the result. ++ Return the value of the lhs. */ ++rtx ++expand_builtin (unsigned int code, tree exp, rtx target) ++{ ++ registered_function &rfn = *(*registered_functions)[code]; ++ if (!check_required_extensions (EXPR_LOCATION (exp), rfn.decl, ++ rfn.required_extensions)) ++ return target; ++ return function_expander (rfn.instance, rfn.decl, exp, target).expand (); ++} ++ ++/* Return true if TYPE is the ABI-defined __SVBool_t type. */ ++bool ++svbool_type_p (const_tree type) ++{ ++ tree abi_type = abi_vector_types[VECTOR_TYPE_svbool_t]; ++ return (type != error_mark_node ++ && TYPE_MAIN_VARIANT (type) == TYPE_MAIN_VARIANT (abi_type)); ++} ++ ++/* If TYPE is a built-in type defined by the SVE ABI, return the mangled name, ++ otherwise return NULL. */ ++const char * ++mangle_builtin_type (const_tree type) ++{ ++ if (type == error_mark_node) ++ return NULL; ++ ++ vector_type_index vtype = find_vector_type (type); ++ if (vtype != NUM_VECTOR_TYPES) ++ return vector_types[vtype].mangled_name; ++ ++ return NULL; ++} ++ ++/* If TYPE is one of the ABI-defined SVE vector types, or an ACLE-defined ++ tuple of them, return the number of vectors it contains. Return 0 ++ otherwise. */ ++unsigned int ++nvectors_if_data_type (const_tree type) ++{ ++ if (type == error_mark_node) ++ return 0; ++ ++ type = TYPE_MAIN_VARIANT (type); ++ if (VECTOR_TYPE_P (type)) ++ { ++ vector_type_index type_id = find_vector_type (type); ++ if (type_id != VECTOR_TYPE_svbool_t && type_id != NUM_VECTOR_TYPES) ++ return 1; ++ } ++ else if (TREE_CODE (type) == RECORD_TYPE) ++ { ++ for (unsigned int size_i = 1; size_i < MAX_TUPLE_SIZE; ++size_i) ++ for (unsigned int type_i = 0; type_i < NUM_VECTOR_TYPES; ++type_i) ++ { ++ tree tuple_type = acle_vector_types[size_i][type_i]; ++ if (tuple_type && type == TYPE_MAIN_VARIANT (tuple_type)) ++ return size_i + 1; ++ } ++ } ++ ++ return 0; ++} ++ ++/* Return true if TYPE is a built-in type defined by the SVE ABI. */ ++bool ++builtin_type_p (const_tree type) ++{ ++ return svbool_type_p (type) || nvectors_if_data_type (type) > 0; ++} ++ ++} ++ ++using namespace aarch64_sve; ++ ++inline void ++gt_ggc_mx (function_instance *) ++{ ++} ++ ++inline void ++gt_pch_nx (function_instance *) ++{ ++} ++ ++inline void ++gt_pch_nx (function_instance *, void (*) (void *, void *), void *) ++{ ++} ++ ++#include "gt-aarch64-sve-builtins.h" +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.def b/gcc/config/aarch64/aarch64-sve-builtins.def +new file mode 100644 +index 000000000..83fba0d41 +--- /dev/null ++++ b/gcc/config/aarch64/aarch64-sve-builtins.def +@@ -0,0 +1,100 @@ ++/* Builtin lists for AArch64 SVE ++ Copyright (C) 2018-2019 Free Software Foundation, Inc. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ GCC is distributed in the hope that it will be useful, but ++ WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING3. If not see ++ . */ ++ ++#ifndef DEF_SVE_MODE ++#define DEF_SVE_MODE(A, B, C, D) ++#endif ++ ++#ifndef DEF_SVE_TYPE ++#define DEF_SVE_TYPE(A, B, C, D) ++#endif ++ ++#ifndef DEF_SVE_TYPE_SUFFIX ++#define DEF_SVE_TYPE_SUFFIX(A, B, C, D, E) ++#endif ++ ++#ifndef DEF_SVE_FUNCTION ++#define DEF_SVE_FUNCTION(A, B, C, D) ++#endif ++ ++DEF_SVE_MODE (n, none, none, none) ++DEF_SVE_MODE (index, none, none, elements) ++DEF_SVE_MODE (offset, none, none, bytes) ++DEF_SVE_MODE (s32index, none, svint32_t, elements) ++DEF_SVE_MODE (s32offset, none, svint32_t, bytes) ++DEF_SVE_MODE (s64index, none, svint64_t, elements) ++DEF_SVE_MODE (s64offset, none, svint64_t, bytes) ++DEF_SVE_MODE (u32base, svuint32_t, none, none) ++DEF_SVE_MODE (u32base_index, svuint32_t, none, elements) ++DEF_SVE_MODE (u32base_offset, svuint32_t, none, bytes) ++DEF_SVE_MODE (u32base_s32index, svuint32_t, svint32_t, elements) ++DEF_SVE_MODE (u32base_s32offset, svuint32_t, svint32_t, bytes) ++DEF_SVE_MODE (u32base_u32index, svuint32_t, svuint32_t, elements) ++DEF_SVE_MODE (u32base_u32offset, svuint32_t, svuint32_t, bytes) ++DEF_SVE_MODE (u32index, none, svuint32_t, elements) ++DEF_SVE_MODE (u32offset, none, svuint32_t, bytes) ++DEF_SVE_MODE (u64base, svuint64_t, none, none) ++DEF_SVE_MODE (u64base_index, svuint64_t, none, elements) ++DEF_SVE_MODE (u64base_offset, svuint64_t, none, bytes) ++DEF_SVE_MODE (u64base_s64index, svuint64_t, svint64_t, elements) ++DEF_SVE_MODE (u64base_s64offset, svuint64_t, svint64_t, bytes) ++DEF_SVE_MODE (u64base_u64index, svuint64_t, svuint64_t, elements) ++DEF_SVE_MODE (u64base_u64offset, svuint64_t, svuint64_t, bytes) ++DEF_SVE_MODE (u64index, none, svuint64_t, elements) ++DEF_SVE_MODE (u64offset, none, svuint64_t, bytes) ++DEF_SVE_MODE (vnum, none, none, vectors) ++ ++DEF_SVE_TYPE (svbool_t, 10, __SVBool_t, boolean_type_node) ++DEF_SVE_TYPE (svbfloat16_t, 14, __SVBfloat16_t, aarch64_bf16_type_node) ++DEF_SVE_TYPE (svfloat16_t, 13, __SVFloat16_t, aarch64_fp16_type_node) ++DEF_SVE_TYPE (svfloat32_t, 13, __SVFloat32_t, float_type_node) ++DEF_SVE_TYPE (svfloat64_t, 13, __SVFloat64_t, double_type_node) ++DEF_SVE_TYPE (svint8_t, 10, __SVInt8_t, intQI_type_node) ++DEF_SVE_TYPE (svint16_t, 11, __SVInt16_t, intHI_type_node) ++DEF_SVE_TYPE (svint32_t, 11, __SVInt32_t, intSI_type_node) ++DEF_SVE_TYPE (svint64_t, 11, __SVInt64_t, intDI_type_node) ++DEF_SVE_TYPE (svuint8_t, 11, __SVUint8_t, unsigned_intQI_type_node) ++DEF_SVE_TYPE (svuint16_t, 12, __SVUint16_t, unsigned_intHI_type_node) ++DEF_SVE_TYPE (svuint32_t, 12, __SVUint32_t, unsigned_intSI_type_node) ++DEF_SVE_TYPE (svuint64_t, 12, __SVUint64_t, unsigned_intDI_type_node) ++ ++DEF_SVE_TYPE_SUFFIX (b, svbool_t, bool, 8, VNx16BImode) ++DEF_SVE_TYPE_SUFFIX (b8, svbool_t, bool, 8, VNx16BImode) ++DEF_SVE_TYPE_SUFFIX (b16, svbool_t, bool, 16, VNx8BImode) ++DEF_SVE_TYPE_SUFFIX (b32, svbool_t, bool, 32, VNx4BImode) ++DEF_SVE_TYPE_SUFFIX (b64, svbool_t, bool, 64, VNx2BImode) ++DEF_SVE_TYPE_SUFFIX (bf16, svbfloat16_t, bfloat, 16, VNx8BFmode) ++DEF_SVE_TYPE_SUFFIX (f16, svfloat16_t, float, 16, VNx8HFmode) ++DEF_SVE_TYPE_SUFFIX (f32, svfloat32_t, float, 32, VNx4SFmode) ++DEF_SVE_TYPE_SUFFIX (f64, svfloat64_t, float, 64, VNx2DFmode) ++DEF_SVE_TYPE_SUFFIX (s8, svint8_t, signed, 8, VNx16QImode) ++DEF_SVE_TYPE_SUFFIX (s16, svint16_t, signed, 16, VNx8HImode) ++DEF_SVE_TYPE_SUFFIX (s32, svint32_t, signed, 32, VNx4SImode) ++DEF_SVE_TYPE_SUFFIX (s64, svint64_t, signed, 64, VNx2DImode) ++DEF_SVE_TYPE_SUFFIX (u8, svuint8_t, unsigned, 8, VNx16QImode) ++DEF_SVE_TYPE_SUFFIX (u16, svuint16_t, unsigned, 16, VNx8HImode) ++DEF_SVE_TYPE_SUFFIX (u32, svuint32_t, unsigned, 32, VNx4SImode) ++DEF_SVE_TYPE_SUFFIX (u64, svuint64_t, unsigned, 64, VNx2DImode) ++ ++#include "aarch64-sve-builtins-base.def" ++ ++#undef DEF_SVE_FUNCTION ++#undef DEF_SVE_TYPE_SUFFIX ++#undef DEF_SVE_TYPE ++#undef DEF_SVE_MODE +diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h +new file mode 100644 +index 000000000..d1aa612b9 +--- /dev/null ++++ b/gcc/config/aarch64/aarch64-sve-builtins.h +@@ -0,0 +1,878 @@ ++/* ACLE support for AArch64 SVE ++ Copyright (C) 2018-2019 Free Software Foundation, Inc. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3, or (at your option) ++ any later version. ++ ++ GCC is distributed in the hope that it will be useful, but ++ WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with GCC; see the file COPYING3. If not see ++ . */ ++ ++#ifndef GCC_AARCH64_SVE_BUILTINS_H ++#define GCC_AARCH64_SVE_BUILTINS_H ++ ++/* The full name of an SVE ACLE function is the concatenation of: ++ ++ - the base name ("svadd", etc.) ++ - the "mode" suffix ("_n", "_index", etc.) ++ - the type suffixes ("_s32", "_b8", etc.) ++ - the predication suffix ("_x", "_z", etc.) ++ ++ Each piece of information is individually useful, so we retain this ++ classification throughout: ++ ++ - function_base represents the base name ++ ++ - mode_suffix_index represents the mode suffix ++ ++ - type_suffix_index represents individual type suffixes, while ++ type_suffix_pair represents a pair of them ++ ++ - prediction_index extends the predication suffix with an additional ++ alternative: PRED_implicit for implicitly-predicated operations ++ ++ In addition to its unique full name, a function may have a shorter ++ overloaded alias. This alias removes pieces of the suffixes that ++ can be inferred from the arguments, such as by shortening the mode ++ suffix or dropping some of the type suffixes. The base name and the ++ predication suffix stay the same. ++ ++ The function_shape class describes what arguments a given function ++ takes and what its overloaded alias is called. In broad terms, ++ function_base describes how the underlying instruction behaves while ++ function_shape describes how that instruction has been presented at ++ the language level. ++ ++ The static list of functions uses function_group to describe a group ++ of related functions. The function_builder class is responsible for ++ expanding this static description into a list of individual functions ++ and registering the associated built-in functions. function_instance ++ describes one of these individual functions in terms of the properties ++ described above. ++ ++ The classes involved in compiling a function call are: ++ ++ - function_resolver, which resolves an overloaded function call to a ++ specific function_instance and its associated function decl ++ ++ - function_checker, which checks whether the values of the arguments ++ conform to the ACLE specification ++ ++ - gimple_folder, which tries to fold a function call at the gimple level ++ ++ - function_expander, which expands a function call into rtl instructions ++ ++ function_resolver and function_checker operate at the language level ++ and so are associated with the function_shape. gimple_folder and ++ function_expander are concerned with the behavior of the function ++ and so are associated with the function_base. ++ ++ Note that we've specifically chosen not to fold calls in the frontend, ++ since SVE intrinsics will hardly ever fold a useful language-level ++ constant. */ ++namespace aarch64_sve ++{ ++/* The maximum number of vectors in an ACLE tuple type. */ ++const unsigned int MAX_TUPLE_SIZE = 4; ++ ++/* Used to represent the default merge argument index for _m functions. ++ The actual index depends on how many arguments the function takes. */ ++const unsigned int DEFAULT_MERGE_ARGNO = ~0U; ++ ++/* Flags that describe what a function might do, in addition to reading ++ its arguments and returning a result. */ ++const unsigned int CP_READ_FPCR = 1U << 0; ++const unsigned int CP_RAISE_FP_EXCEPTIONS = 1U << 1; ++const unsigned int CP_READ_MEMORY = 1U << 2; ++const unsigned int CP_PREFETCH_MEMORY = 1U << 3; ++const unsigned int CP_WRITE_MEMORY = 1U << 4; ++const unsigned int CP_READ_FFR = 1U << 5; ++const unsigned int CP_WRITE_FFR = 1U << 6; ++ ++/* Enumerates the SVE predicate and (data) vector types, together called ++ "vector types" for brevity. */ ++enum vector_type_index ++{ ++#define DEF_SVE_TYPE(ACLE_NAME, NCHARS, ABI_NAME, SCALAR_TYPE) \ ++ VECTOR_TYPE_ ## ACLE_NAME, ++#include "aarch64-sve-builtins.def" ++ NUM_VECTOR_TYPES ++}; ++ ++/* Classifies the available measurement units for an address displacement. */ ++enum units_index ++{ ++ UNITS_none, ++ UNITS_bytes, ++ UNITS_elements, ++ UNITS_vectors ++}; ++ ++/* Describes the various uses of a governing predicate. */ ++enum predication_index ++{ ++ /* No governing predicate is present. */ ++ PRED_none, ++ ++ /* A governing predicate is present but there is no predication suffix ++ associated with it. This is used when the result is neither a vector ++ nor a predicate, since the distinction between "zeroing" and "merging" ++ doesn't apply in that case. It is also used when a suffix would be ++ redundant (such as for loads and comparisons, which are inherently ++ zeroing operations). */ ++ PRED_implicit, ++ ++ /* Merging predication: copy inactive lanes from the first data argument ++ to the vector result. */ ++ PRED_m, ++ ++ /* "Don't care" predication: set inactive lanes of the vector result ++ to arbitrary values. */ ++ PRED_x, ++ ++ /* Zero predication: set inactive lanes of the vector result to zero. */ ++ PRED_z, ++ ++ NUM_PREDS ++}; ++ ++/* Classifies element types, based on type suffixes with the bit count ++ removed. */ ++enum type_class_index ++{ ++ TYPE_bool, ++ TYPE_bfloat, ++ TYPE_float, ++ TYPE_signed, ++ TYPE_unsigned, ++ NUM_TYPE_CLASSES ++}; ++ ++/* Classifies an operation into "modes"; for example, to distinguish ++ vector-scalar operations from vector-vector operations, or to ++ distinguish between different addressing modes. This classification ++ accounts for the function suffixes that occur between the base name ++ and the first type suffix. */ ++enum mode_suffix_index ++{ ++#define DEF_SVE_MODE(NAME, BASE, DISPLACEMENT, UNITS) MODE_##NAME, ++#include "aarch64-sve-builtins.def" ++ MODE_none ++}; ++ ++/* Enumerates the possible type suffixes. Each suffix is associated with ++ a vector type, but for predicates provides extra information about the ++ element size. */ ++enum type_suffix_index ++{ ++#define DEF_SVE_TYPE_SUFFIX(NAME, ACLE_TYPE, CLASS, BITS, MODE) \ ++ TYPE_SUFFIX_ ## NAME, ++#include "aarch64-sve-builtins.def" ++ NUM_TYPE_SUFFIXES ++}; ++ ++/* Combines two type suffixes. */ ++typedef enum type_suffix_index type_suffix_pair[2]; ++ ++class function_base; ++class function_shape; ++ ++/* Static information about a mode suffix. */ ++struct mode_suffix_info ++{ ++ /* The suffix string itself. */ ++ const char *string; ++ ++ /* The type of the vector base address, or NUM_VECTOR_TYPES if the ++ mode does not include a vector base address. */ ++ vector_type_index base_vector_type; ++ ++ /* The type of the vector displacement, or NUM_VECTOR_TYPES if the ++ mode does not include a vector displacement. (Note that scalar ++ displacements are always int64_t.) */ ++ vector_type_index displacement_vector_type; ++ ++ /* The units in which the vector or scalar displacement is measured, ++ or UNITS_none if the mode doesn't take a displacement. */ ++ units_index displacement_units; ++}; ++ ++/* Static information about a type suffix. */ ++struct type_suffix_info ++{ ++ /* The suffix string itself. */ ++ const char *string; ++ ++ /* The associated ACLE vector or predicate type. */ ++ vector_type_index vector_type : 8; ++ ++ /* What kind of type the suffix represents. */ ++ type_class_index tclass : 8; ++ ++ /* The number of bits and bytes in an element. For predicates this ++ measures the associated data elements. */ ++ unsigned int element_bits : 8; ++ unsigned int element_bytes : 8; ++ ++ /* True if the suffix is for an integer type. */ ++ unsigned int integer_p : 1; ++ /* True if the suffix is for an unsigned type. */ ++ unsigned int unsigned_p : 1; ++ /* True if the suffix is for a floating-point type. */ ++ unsigned int float_p : 1; ++ /* True if the suffix is for a boolean type. */ ++ unsigned int bool_p : 1; ++ unsigned int spare : 12; ++ ++ /* The associated vector or predicate mode. */ ++ machine_mode vector_mode : 16; ++}; ++ ++/* Static information about a set of functions. */ ++struct function_group_info ++{ ++ /* The base name, as a string. */ ++ const char *base_name; ++ ++ /* Describes the behavior associated with the function base name. */ ++ const function_base *const *base; ++ ++ /* The shape of the functions, as described above the class definition. ++ It's possible to have entries with the same base name but different ++ shapes. */ ++ const function_shape *const *shape; ++ ++ /* A list of the available type suffixes, and of the available predication ++ types. The function supports every combination of the two. ++ ++ The list of type suffixes is terminated by two NUM_TYPE_SUFFIXES ++ while the list of predication types is terminated by NUM_PREDS. ++ The list of type suffixes is lexicographically ordered based ++ on the index value. */ ++ const type_suffix_pair *types; ++ const predication_index *preds; ++ ++ /* The architecture extensions that the functions require, as a set of ++ AARCH64_FL_* flags. */ ++ uint64_t required_extensions; ++}; ++ ++/* Describes a single fully-resolved function (i.e. one that has a ++ unique full name). */ ++class GTY((user)) function_instance ++{ ++public: ++ function_instance (const char *, const function_base *, ++ const function_shape *, mode_suffix_index, ++ const type_suffix_pair &, predication_index); ++ ++ bool operator== (const function_instance &) const; ++ bool operator!= (const function_instance &) const; ++ hashval_t hash () const; ++ ++ unsigned int call_properties () const; ++ bool reads_global_state_p () const; ++ bool modifies_global_state_p () const; ++ bool could_trap_p () const; ++ ++ unsigned int vectors_per_tuple () const; ++ tree memory_scalar_type () const; ++ machine_mode memory_vector_mode () const; ++ ++ const mode_suffix_info &mode_suffix () const; ++ tree base_vector_type () const; ++ tree displacement_vector_type () const; ++ units_index displacement_units () const; ++ ++ const type_suffix_info &type_suffix (unsigned int) const; ++ tree scalar_type (unsigned int) const; ++ tree vector_type (unsigned int) const; ++ tree tuple_type (unsigned int) const; ++ unsigned int elements_per_vq (unsigned int i) const; ++ machine_mode vector_mode (unsigned int) const; ++ machine_mode gp_mode (unsigned int) const; ++ ++ /* The properties of the function. (The explicit "enum"s are required ++ for gengtype.) */ ++ const char *base_name; ++ const function_base *base; ++ const function_shape *shape; ++ enum mode_suffix_index mode_suffix_id; ++ type_suffix_pair type_suffix_ids; ++ enum predication_index pred; ++}; ++ ++class registered_function; ++ ++/* A class for building and registering function decls. */ ++class function_builder ++{ ++public: ++ function_builder (); ++ ~function_builder (); ++ ++ void add_unique_function (const function_instance &, tree, ++ vec &, uint64_t, bool); ++ void add_overloaded_function (const function_instance &, uint64_t); ++ void add_overloaded_functions (const function_group_info &, ++ mode_suffix_index); ++ ++ void register_function_group (const function_group_info &); ++ ++private: ++ void append_name (const char *); ++ char *finish_name (); ++ ++ char *get_name (const function_instance &, bool); ++ ++ tree get_attributes (const function_instance &); ++ ++ registered_function &add_function (const function_instance &, ++ const char *, tree, tree, uint64_t, bool); ++ ++ /* The function type to use for functions that are resolved by ++ function_resolver. */ ++ tree m_overload_type; ++ ++ /* True if we should create a separate decl for each instance of an ++ overloaded function, instead of using function_resolver. */ ++ bool m_direct_overloads; ++ ++ /* Used for building up function names. */ ++ obstack m_string_obstack; ++ ++ /* Maps all overloaded function names that we've registered so far ++ to their associated function_instances. */ ++ hash_map m_overload_names; ++}; ++ ++/* A base class for handling calls to built-in functions. */ ++class function_call_info : public function_instance ++{ ++public: ++ function_call_info (location_t, const function_instance &, tree); ++ ++ bool function_returns_void_p (); ++ ++ /* The location of the call. */ ++ location_t location; ++ ++ /* The FUNCTION_DECL that is being called. */ ++ tree fndecl; ++}; ++ ++/* A class for resolving an overloaded function call. */ ++class function_resolver : public function_call_info ++{ ++public: ++ enum { SAME_SIZE = 256, HALF_SIZE, QUARTER_SIZE }; ++ static const type_class_index SAME_TYPE_CLASS = NUM_TYPE_CLASSES; ++ ++ function_resolver (location_t, const function_instance &, tree, ++ vec &); ++ ++ tree get_vector_type (type_suffix_index); ++ const char *get_scalar_type_name (type_suffix_index); ++ tree get_argument_type (unsigned int); ++ bool scalar_argument_p (unsigned int); ++ ++ tree report_no_such_form (type_suffix_index); ++ tree lookup_form (mode_suffix_index, ++ type_suffix_index = NUM_TYPE_SUFFIXES, ++ type_suffix_index = NUM_TYPE_SUFFIXES); ++ tree resolve_to (mode_suffix_index, ++ type_suffix_index = NUM_TYPE_SUFFIXES, ++ type_suffix_index = NUM_TYPE_SUFFIXES); ++ ++ type_suffix_index infer_integer_scalar_type (unsigned int); ++ type_suffix_index infer_pointer_type (unsigned int, bool = false); ++ type_suffix_index infer_vector_or_tuple_type (unsigned int, unsigned int); ++ type_suffix_index infer_vector_type (unsigned int); ++ type_suffix_index infer_integer_vector_type (unsigned int); ++ type_suffix_index infer_unsigned_vector_type (unsigned int); ++ type_suffix_index infer_sd_vector_type (unsigned int); ++ type_suffix_index infer_tuple_type (unsigned int); ++ ++ bool require_vector_or_scalar_type (unsigned int); ++ ++ bool require_vector_type (unsigned int, vector_type_index); ++ bool require_matching_vector_type (unsigned int, type_suffix_index); ++ bool require_derived_vector_type (unsigned int, unsigned int, ++ type_suffix_index, ++ type_class_index = SAME_TYPE_CLASS, ++ unsigned int = SAME_SIZE); ++ ++ bool require_scalar_type (unsigned int, const char *); ++ bool require_pointer_type (unsigned int); ++ bool require_matching_integer_scalar_type (unsigned int, unsigned int, ++ type_suffix_index); ++ bool require_derived_scalar_type (unsigned int, type_class_index, ++ unsigned int = SAME_SIZE); ++ bool require_matching_pointer_type (unsigned int, unsigned int, ++ type_suffix_index); ++ bool require_integer_immediate (unsigned int); ++ ++ vector_type_index infer_vector_base_type (unsigned int); ++ vector_type_index infer_vector_displacement_type (unsigned int); ++ ++ mode_suffix_index resolve_sv_displacement (unsigned int, ++ type_suffix_index, bool); ++ mode_suffix_index resolve_gather_address (unsigned int, ++ type_suffix_index, bool); ++ mode_suffix_index resolve_adr_address (unsigned int); ++ ++ bool check_num_arguments (unsigned int); ++ bool check_gp_argument (unsigned int, unsigned int &, unsigned int &); ++ tree resolve_unary (type_class_index = SAME_TYPE_CLASS, ++ unsigned int = SAME_SIZE, bool = false); ++ tree resolve_uniform (unsigned int, unsigned int = 0); ++ tree resolve_uniform_opt_n (unsigned int); ++ tree finish_opt_n_resolution (unsigned int, unsigned int, type_suffix_index, ++ type_class_index = SAME_TYPE_CLASS, ++ unsigned int = SAME_SIZE, ++ type_suffix_index = NUM_TYPE_SUFFIXES); ++ ++ tree resolve (); ++ ++private: ++ /* The arguments to the overloaded function. */ ++ vec &m_arglist; ++}; ++ ++/* A class for checking that the semantic constraints on a function call are ++ satisfied, such as arguments being integer constant expressions with ++ a particular range. The parent class's FNDECL is the decl that was ++ called in the original source, before overload resolution. */ ++class function_checker : public function_call_info ++{ ++public: ++ function_checker (location_t, const function_instance &, tree, ++ tree, unsigned int, tree *); ++ ++ bool require_immediate_either_or (unsigned int, HOST_WIDE_INT, ++ HOST_WIDE_INT); ++ bool require_immediate_enum (unsigned int, tree); ++ bool require_immediate_lane_index (unsigned int, unsigned int = 1); ++ bool require_immediate_one_of (unsigned int, HOST_WIDE_INT, HOST_WIDE_INT, ++ HOST_WIDE_INT, HOST_WIDE_INT); ++ bool require_immediate_range (unsigned int, HOST_WIDE_INT, HOST_WIDE_INT); ++ ++ bool check (); ++ ++private: ++ bool argument_exists_p (unsigned int); ++ ++ bool require_immediate (unsigned int, HOST_WIDE_INT &); ++ ++ /* The type of the resolved function. */ ++ tree m_fntype; ++ ++ /* The arguments to the function. */ ++ unsigned int m_nargs; ++ tree *m_args; ++ ++ /* The first argument not associated with the function's predication ++ type. */ ++ unsigned int m_base_arg; ++}; ++ ++/* A class for folding a gimple function call. */ ++class gimple_folder : public function_call_info ++{ ++public: ++ gimple_folder (const function_instance &, tree, ++ gimple_stmt_iterator *, gcall *); ++ ++ tree convert_pred (gimple_seq &, tree, unsigned int); ++ tree fold_contiguous_base (gimple_seq &, tree); ++ tree load_store_cookie (tree); ++ ++ gimple *redirect_call (const function_instance &); ++ gimple *fold_to_pfalse (); ++ gimple *fold_to_ptrue (); ++ gimple *fold_to_vl_pred (unsigned int); ++ ++ gimple *fold (); ++ ++ /* Where to insert extra statements that feed the final replacement. */ ++ gimple_stmt_iterator *gsi; ++ ++ /* The call we're folding. */ ++ gcall *call; ++ ++ /* The result of the call, or null if none. */ ++ tree lhs; ++}; ++ ++/* A class for expanding a function call into RTL. */ ++class function_expander : public function_call_info ++{ ++public: ++ function_expander (const function_instance &, tree, tree, rtx); ++ rtx expand (); ++ ++ insn_code direct_optab_handler (optab, unsigned int = 0); ++ insn_code direct_optab_handler_for_sign (optab, optab, unsigned int = 0, ++ machine_mode = E_VOIDmode); ++ ++ bool overlaps_input_p (rtx); ++ ++ rtx get_contiguous_base (machine_mode); ++ rtx get_fallback_value (machine_mode, unsigned int, ++ unsigned int, unsigned int &); ++ rtx get_reg_target (); ++ rtx get_nonoverlapping_reg_target (); ++ ++ void add_output_operand (insn_code); ++ void add_input_operand (insn_code, rtx); ++ void add_integer_operand (HOST_WIDE_INT); ++ void add_mem_operand (machine_mode, rtx); ++ void add_address_operand (rtx); ++ void add_fixed_operand (rtx); ++ rtx generate_insn (insn_code); ++ ++ void prepare_gather_address_operands (unsigned int, bool = true); ++ void prepare_prefetch_operands (); ++ void add_ptrue_hint (unsigned int, machine_mode); ++ void rotate_inputs_left (unsigned int, unsigned int); ++ bool try_negating_argument (unsigned int, machine_mode); ++ ++ rtx use_exact_insn (insn_code); ++ rtx use_unpred_insn (insn_code); ++ rtx use_pred_x_insn (insn_code); ++ rtx use_cond_insn (insn_code, unsigned int = DEFAULT_MERGE_ARGNO); ++ rtx use_vcond_mask_insn (insn_code, unsigned int = DEFAULT_MERGE_ARGNO); ++ rtx use_contiguous_load_insn (insn_code); ++ rtx use_contiguous_prefetch_insn (insn_code); ++ rtx use_contiguous_store_insn (insn_code); ++ ++ rtx map_to_rtx_codes (rtx_code, rtx_code, int, ++ unsigned int = DEFAULT_MERGE_ARGNO); ++ rtx map_to_unspecs (int, int, int, unsigned int = DEFAULT_MERGE_ARGNO); ++ rtx expand_signed_unpred_op (rtx_code, rtx_code); ++ ++ /* The function call expression. */ ++ tree call_expr; ++ ++ /* For functions that return a value, this is the preferred location ++ of that value. It could be null or could have a different mode ++ from the function return type. */ ++ rtx possible_target; ++ ++ /* The expanded arguments. */ ++ auto_vec args; ++ ++private: ++ /* Used to build up the operands to an instruction. */ ++ auto_vec m_ops; ++}; ++ ++/* Provides information about a particular function base name, and handles ++ tasks related to the base name. */ ++class function_base ++{ ++public: ++ /* Return a set of CP_* flags that describe what the function might do, ++ in addition to reading its arguments and returning a result. */ ++ virtual unsigned int call_properties (const function_instance &) const; ++ ++ /* If the function operates on tuples of vectors, return the number ++ of vectors in the tuples, otherwise return 1. */ ++ virtual unsigned int vectors_per_tuple () const { return 1; } ++ ++ /* If the function addresses memory, return the type of a single ++ scalar memory element. */ ++ virtual tree ++ memory_scalar_type (const function_instance &) const ++ { ++ gcc_unreachable (); ++ } ++ ++ /* If the function addresses memory, return a vector mode whose ++ GET_MODE_NUNITS is the number of elements addressed and whose ++ GET_MODE_INNER is the mode of a single scalar memory element. */ ++ virtual machine_mode ++ memory_vector_mode (const function_instance &) const ++ { ++ gcc_unreachable (); ++ } ++ ++ /* Try to fold the given gimple call. Return the new gimple statement ++ on success, otherwise return null. */ ++ virtual gimple *fold (gimple_folder &) const { return NULL; } ++ ++ /* Expand the given call into rtl. Return the result of the function, ++ or an arbitrary value if the function doesn't return a result. */ ++ virtual rtx expand (function_expander &) const = 0; ++}; ++ ++/* Classifies functions into "shapes". The idea is to take all the ++ type signatures for a set of functions, remove the governing predicate ++ (if any), and classify what's left based on: ++ ++ - the number of arguments ++ ++ - the process of determining the types in the signature from the mode ++ and type suffixes in the function name (including types that are not ++ affected by the suffixes) ++ ++ - which arguments must be integer constant expressions, and what range ++ those arguments have ++ ++ - the process for mapping overloaded names to "full" names. */ ++class function_shape ++{ ++public: ++ virtual bool explicit_type_suffix_p (unsigned int) const = 0; ++ ++ /* Define all functions associated with the given group. */ ++ virtual void build (function_builder &, ++ const function_group_info &) const = 0; ++ ++ /* Try to resolve the overloaded call. Return the non-overloaded ++ function decl on success and error_mark_node on failure. */ ++ virtual tree resolve (function_resolver &) const = 0; ++ ++ /* Check whether the given call is semantically valid. Return true ++ if it is, otherwise report an error and return false. */ ++ virtual bool check (function_checker &) const { return true; } ++}; ++ ++/* RAII class for enabling enough SVE features to define the built-in ++ types and implement the arm_sve.h pragma. */ ++class sve_switcher ++{ ++public: ++ sve_switcher (); ++ ~sve_switcher (); ++ ++private: ++ unsigned long m_old_isa_flags; ++ bool m_old_have_regs_of_mode[MAX_MACHINE_MODE]; ++}; ++ ++extern const type_suffix_info type_suffixes[NUM_TYPE_SUFFIXES + 1]; ++extern const mode_suffix_info mode_suffixes[MODE_none + 1]; ++ ++extern tree scalar_types[NUM_VECTOR_TYPES]; ++extern tree acle_vector_types[MAX_TUPLE_SIZE][NUM_VECTOR_TYPES + 1]; ++extern tree acle_svpattern; ++extern tree acle_svprfop; ++ ++/* Return the ACLE type svbool_t. */ ++inline tree ++get_svbool_t (void) ++{ ++ return acle_vector_types[0][VECTOR_TYPE_svbool_t]; ++} ++ ++/* Try to find a mode with the given mode_suffix_info fields. Return the ++ mode on success or MODE_none on failure. */ ++inline mode_suffix_index ++find_mode_suffix (vector_type_index base_vector_type, ++ vector_type_index displacement_vector_type, ++ units_index displacement_units) ++{ ++ for (unsigned int mode_i = 0; mode_i < ARRAY_SIZE (mode_suffixes); ++mode_i) ++ { ++ const mode_suffix_info &mode = mode_suffixes[mode_i]; ++ if (mode.base_vector_type == base_vector_type ++ && mode.displacement_vector_type == displacement_vector_type ++ && mode.displacement_units == displacement_units) ++ return mode_suffix_index (mode_i); ++ } ++ return MODE_none; ++} ++ ++/* Return the type suffix associated with ELEMENT_BITS-bit elements of type ++ class TCLASS. */ ++inline type_suffix_index ++find_type_suffix (type_class_index tclass, unsigned int element_bits) ++{ ++ for (unsigned int i = 0; i < NUM_TYPE_SUFFIXES; ++i) ++ if (type_suffixes[i].tclass == tclass ++ && type_suffixes[i].element_bits == element_bits) ++ return type_suffix_index (i); ++ gcc_unreachable (); ++} ++ ++/* Return the single field in tuple type TYPE. */ ++inline tree ++tuple_type_field (tree type) ++{ ++ for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) ++ if (TREE_CODE (field) == FIELD_DECL) ++ return field; ++ gcc_unreachable (); ++} ++ ++inline function_instance:: ++function_instance (const char *base_name_in, ++ const function_base *base_in, ++ const function_shape *shape_in, ++ mode_suffix_index mode_suffix_id_in, ++ const type_suffix_pair &type_suffix_ids_in, ++ predication_index pred_in) ++ : base_name (base_name_in), base (base_in), shape (shape_in), ++ mode_suffix_id (mode_suffix_id_in), pred (pred_in) ++{ ++ memcpy (type_suffix_ids, type_suffix_ids_in, sizeof (type_suffix_ids)); ++} ++ ++inline bool ++function_instance::operator== (const function_instance &other) const ++{ ++ return (base == other.base ++ && shape == other.shape ++ && mode_suffix_id == other.mode_suffix_id ++ && pred == other.pred ++ && type_suffix_ids[0] == other.type_suffix_ids[0] ++ && type_suffix_ids[1] == other.type_suffix_ids[1]); ++} ++ ++inline bool ++function_instance::operator!= (const function_instance &other) const ++{ ++ return !operator== (other); ++} ++ ++/* If the function operates on tuples of vectors, return the number ++ of vectors in the tuples, otherwise return 1. */ ++inline unsigned int ++function_instance::vectors_per_tuple () const ++{ ++ return base->vectors_per_tuple (); ++} ++ ++/* If the function addresses memory, return the type of a single ++ scalar memory element. */ ++inline tree ++function_instance::memory_scalar_type () const ++{ ++ return base->memory_scalar_type (*this); ++} ++ ++/* If the function addresses memory, return a vector mode whose ++ GET_MODE_NUNITS is the number of elements addressed and whose ++ GET_MODE_INNER is the mode of a single scalar memory element. */ ++inline machine_mode ++function_instance::memory_vector_mode () const ++{ ++ return base->memory_vector_mode (*this); ++} ++ ++/* Return information about the function's mode suffix. */ ++inline const mode_suffix_info & ++function_instance::mode_suffix () const ++{ ++ return mode_suffixes[mode_suffix_id]; ++} ++ ++/* Return the type of the function's vector base address argument, ++ or null it doesn't have a vector base address. */ ++inline tree ++function_instance::base_vector_type () const ++{ ++ return acle_vector_types[0][mode_suffix ().base_vector_type]; ++} ++ ++/* Return the type of the function's vector index or offset argument, ++ or null if doesn't have a vector index or offset argument. */ ++inline tree ++function_instance::displacement_vector_type () const ++{ ++ return acle_vector_types[0][mode_suffix ().displacement_vector_type]; ++} ++ ++/* If the function takes a vector or scalar displacement, return the units ++ in which the displacement is measured, otherwise return UNITS_none. */ ++inline units_index ++function_instance::displacement_units () const ++{ ++ return mode_suffix ().displacement_units; ++} ++ ++/* Return information about type suffix I. */ ++inline const type_suffix_info & ++function_instance::type_suffix (unsigned int i) const ++{ ++ return type_suffixes[type_suffix_ids[i]]; ++} ++ ++/* Return the scalar type associated with type suffix I. */ ++inline tree ++function_instance::scalar_type (unsigned int i) const ++{ ++ return scalar_types[type_suffix (i).vector_type]; ++} ++ ++/* Return the vector type associated with type suffix I. */ ++inline tree ++function_instance::vector_type (unsigned int i) const ++{ ++ return acle_vector_types[0][type_suffix (i).vector_type]; ++} ++ ++/* If the function operates on tuples of vectors, return the tuple type ++ associated with type suffix I, otherwise return the vector type associated ++ with type suffix I. */ ++inline tree ++function_instance::tuple_type (unsigned int i) const ++{ ++ unsigned int num_vectors = vectors_per_tuple (); ++ return acle_vector_types[num_vectors - 1][type_suffix (i).vector_type]; ++} ++ ++/* Return the number of elements of type suffix I that fit within a ++ 128-bit block. */ ++inline unsigned int ++function_instance::elements_per_vq (unsigned int i) const ++{ ++ return 128 / type_suffix (i).element_bits; ++} ++ ++/* Return the vector or predicate mode associated with type suffix I. */ ++inline machine_mode ++function_instance::vector_mode (unsigned int i) const ++{ ++ return type_suffix (i).vector_mode; ++} ++ ++/* Return the mode of the governing predicate to use when operating on ++ type suffix I. */ ++inline machine_mode ++function_instance::gp_mode (unsigned int i) const ++{ ++ return aarch64_sve_pred_mode (type_suffix (i).element_bytes).require (); ++} ++ ++/* Return true if the function has no return value. */ ++inline bool ++function_call_info::function_returns_void_p () ++{ ++ return TREE_TYPE (TREE_TYPE (fndecl)) == void_type_node; ++} ++ ++/* Default implementation of function::call_properties, with conservatively ++ correct behavior for floating-point instructions. */ ++inline unsigned int ++function_base::call_properties (const function_instance &instance) const ++{ ++ unsigned int flags = 0; ++ if (instance.type_suffix (0).float_p || instance.type_suffix (1).float_p) ++ flags |= CP_READ_FPCR | CP_RAISE_FP_EXCEPTIONS; ++ return flags; ++} ++ ++} ++ ++#endif +diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md +index 02d33b727..11198e8a9 100644 +--- a/gcc/config/aarch64/aarch64-sve.md ++++ b/gcc/config/aarch64/aarch64-sve.md +@@ -18,8 +18,168 @@ + ;; along with GCC; see the file COPYING3. If not see + ;; . + +-;; Note on the handling of big-endian SVE +-;; -------------------------------------- ++;; The file is organised into the following sections (search for the full ++;; line): ++;; ++;; == General notes ++;; ---- Note on the handling of big-endian SVE ++;; ---- Description of UNSPEC_PTEST ++;; ---- Description of UNSPEC_PRED_Z ++;; ---- Note on predicated integer arithemtic and UNSPEC_PRED_X ++;; ---- Note on predicated FP arithmetic patterns and GP "strictness" ++;; ---- Note on FFR handling ++;; ++;; == Moves ++;; ---- Moves of single vectors ++;; ---- Moves of multiple vectors ++;; ---- Moves of predicates ++;; ---- Moves relating to the FFR ++;; ++;; == Loads ++;; ---- Normal contiguous loads ++;; ---- Extending contiguous loads ++;; ---- First-faulting contiguous loads ++;; ---- First-faulting extending contiguous loads ++;; ---- Non-temporal contiguous loads ++;; ---- Normal gather loads ++;; ---- Extending gather loads ++;; ---- First-faulting gather loads ++;; ---- First-faulting extending gather loads ++;; ++;; == Prefetches ++;; ---- Contiguous prefetches ++;; ---- Gather prefetches ++;; ++;; == Stores ++;; ---- Normal contiguous stores ++;; ---- Truncating contiguous stores ++;; ---- Non-temporal contiguous stores ++;; ---- Normal scatter stores ++;; ---- Truncating scatter stores ++;; ++;; == Vector creation ++;; ---- [INT,FP] Duplicate element ++;; ---- [INT,FP] Initialize from individual elements ++;; ---- [INT] Linear series ++;; ---- [PRED] Duplicate element ++;; ++;; == Vector decomposition ++;; ---- [INT,FP] Extract index ++;; ---- [INT,FP] Extract active element ++;; ---- [PRED] Extract index ++;; ++;; == Unary arithmetic ++;; ---- [INT] General unary arithmetic corresponding to rtx codes ++;; ---- [INT] General unary arithmetic corresponding to unspecs ++;; ---- [INT] Sign extension ++;; ---- [INT] Zero extension ++;; ---- [INT] Logical inverse ++;; ---- [FP<-INT] General unary arithmetic that maps to unspecs ++;; ---- [FP] General unary arithmetic corresponding to unspecs ++;; ---- [PRED] Inverse ++ ++;; == Binary arithmetic ++;; ---- [INT] General binary arithmetic corresponding to rtx codes ++;; ---- [INT] Addition ++;; ---- [INT] Subtraction ++;; ---- [INT] Take address ++;; ---- [INT] Absolute difference ++;; ---- [INT] Saturating addition and subtraction ++;; ---- [INT] Highpart multiplication ++;; ---- [INT] Division ++;; ---- [INT] Binary logical operations ++;; ---- [INT] Binary logical operations (inverted second input) ++;; ---- [INT] Shifts (rounding towards -Inf) ++;; ---- [INT] Shifts (rounding towards 0) ++;; ---- [FP<-INT] General binary arithmetic corresponding to unspecs ++;; ---- [FP] General binary arithmetic corresponding to rtx codes ++;; ---- [FP] General binary arithmetic corresponding to unspecs ++;; ---- [FP] Addition ++;; ---- [FP] Complex addition ++;; ---- [FP] Subtraction ++;; ---- [FP] Absolute difference ++;; ---- [FP] Multiplication ++;; ---- [FP] Binary logical operations ++;; ---- [FP] Sign copying ++;; ---- [FP] Maximum and minimum ++;; ---- [PRED] Binary logical operations ++;; ---- [PRED] Binary logical operations (inverted second input) ++;; ---- [PRED] Binary logical operations (inverted result) ++;; ++;; == Ternary arithmetic ++;; ---- [INT] MLA and MAD ++;; ---- [INT] MLS and MSB ++;; ---- [INT] Dot product ++;; ---- [INT] Sum of absolute differences ++;; ---- [INT] Matrix multiply-accumulate ++;; ---- [FP] General ternary arithmetic corresponding to unspecs ++;; ---- [FP] Complex multiply-add ++;; ---- [FP] Trigonometric multiply-add ++;; ---- [FP] Bfloat16 long ternary arithmetic (SF,BF,BF) ++;; ---- [FP] Matrix multiply-accumulate ++;; ++;; == Comparisons and selects ++;; ---- [INT,FP] Select based on predicates ++;; ---- [INT,FP] Compare and select ++;; ---- [INT] Comparisons ++;; ---- [INT] While tests ++;; ---- [FP] Direct comparisons ++;; ---- [FP] Absolute comparisons ++;; ---- [PRED] Select ++;; ---- [PRED] Test bits ++;; ++;; == Reductions ++;; ---- [INT,FP] Conditional reductions ++;; ---- [INT] Tree reductions ++;; ---- [FP] Tree reductions ++;; ---- [FP] Left-to-right reductions ++;; ++;; == Permutes ++;; ---- [INT,FP] General permutes ++;; ---- [INT,FP] Special-purpose unary permutes ++;; ---- [INT,FP] Special-purpose binary permutes ++;; ---- [PRED] Special-purpose unary permutes ++;; ---- [PRED] Special-purpose binary permutes ++;; ++;; == Conversions ++;; ---- [INT<-INT] Packs ++;; ---- [INT<-INT] Unpacks ++;; ---- [INT<-FP] Conversions ++;; ---- [INT<-FP] Packs ++;; ---- [INT<-FP] Unpacks ++;; ---- [FP<-INT] Conversions ++;; ---- [FP<-INT] Packs ++;; ---- [FP<-INT] Unpacks ++;; ---- [FP<-FP] Packs ++;; ---- [FP<-FP] Packs (bfloat16) ++;; ---- [FP<-FP] Unpacks ++;; ---- [PRED<-PRED] Packs ++;; ---- [PRED<-PRED] Unpacks ++;; ++;; == Vector partitioning ++;; ---- [PRED] Unary partitioning ++;; ---- [PRED] Binary partitioning ++;; ---- [PRED] Scalarization ++;; ++;; == Counting elements ++;; ---- [INT] Count elements in a pattern (scalar) ++;; ---- [INT] Increment by the number of elements in a pattern (scalar) ++;; ---- [INT] Increment by the number of elements in a pattern (vector) ++;; ---- [INT] Decrement by the number of elements in a pattern (scalar) ++;; ---- [INT] Decrement by the number of elements in a pattern (vector) ++;; ---- [INT] Count elements in a predicate (scalar) ++;; ---- [INT] Increment by the number of elements in a predicate (scalar) ++;; ---- [INT] Increment by the number of elements in a predicate (vector) ++;; ---- [INT] Decrement by the number of elements in a predicate (scalar) ++;; ---- [INT] Decrement by the number of elements in a predicate (vector) ++ ++;; ========================================================================= ++;; == General notes ++;; ========================================================================= ++;; ++;; ------------------------------------------------------------------------- ++;; ---- Note on the handling of big-endian SVE ++;; ------------------------------------------------------------------------- + ;; + ;; On big-endian systems, Advanced SIMD mov patterns act in the + ;; same way as movdi or movti would: the first byte of memory goes +@@ -59,12 +219,339 @@ + ;; the order of the bytes within the elements is different. We instead + ;; access spill slots via LD1 and ST1, using secondary reloads to + ;; reserve a predicate register. ++;; ++;; ------------------------------------------------------------------------- ++;; ---- Description of UNSPEC_PTEST ++;; ------------------------------------------------------------------------- ++;; ++;; SVE provides a PTEST instruction for testing the active lanes of a ++;; predicate and setting the flags based on the result. The associated ++;; condition code tests are: ++;; ++;; - any (= ne): at least one active bit is set ++;; - none (= eq): all active bits are clear (*) ++;; - first (= mi): the first active bit is set ++;; - nfrst (= pl): the first active bit is clear (*) ++;; - last (= cc): the last active bit is set ++;; - nlast (= cs): the last active bit is clear (*) ++;; ++;; where the conditions marked (*) are also true when there are no active ++;; lanes (i.e. when the governing predicate is a PFALSE). The flags results ++;; of a PTEST use the condition code mode CC_NZC. ++;; ++;; PTEST is always a .B operation (i.e. it always operates on VNx16BI). ++;; This means that for other predicate modes, we need a governing predicate ++;; in which all bits are defined. ++;; ++;; For example, most predicated .H operations ignore the odd bits of the ++;; governing predicate, so that an active lane is represented by the ++;; bits "1x" and an inactive lane by the bits "0x", where "x" can be ++;; any value. To test a .H predicate, we instead need "10" and "00" ++;; respectively, so that the condition only tests the even bits of the ++;; predicate. ++;; ++;; Several instructions set the flags as a side-effect, in the same way ++;; that a separate PTEST would. It's important for code quality that we ++;; use these flags results as often as possible, particularly in the case ++;; of WHILE* and RDFFR. ++;; ++;; Also, some of the instructions that set the flags are unpredicated ++;; and instead implicitly test all .B, .H, .S or .D elements, as though ++;; they were predicated on a PTRUE of that size. For example, a .S ++;; WHILELO sets the flags in the same way as a PTEST with a .S PTRUE ++;; would. ++;; ++;; We therefore need to represent PTEST operations in a way that ++;; makes it easy to combine them with both predicated and unpredicated ++;; operations, while using a VNx16BI governing predicate for all ++;; predicate modes. We do this using: ++;; ++;; (unspec:CC_NZC [gp cast_gp ptrue_flag op] UNSPEC_PTEST) ++;; ++;; where: ++;; ++;; - GP is the real VNx16BI governing predicate ++;; ++;; - CAST_GP is GP cast to the mode of OP. All bits dropped by casting ++;; GP to CAST_GP are guaranteed to be clear in GP. ++;; ++;; - PTRUE_FLAG is a CONST_INT (conceptually of mode SI) that has the value ++;; SVE_KNOWN_PTRUE if we know that CAST_GP (rather than GP) is all-true and ++;; SVE_MAYBE_NOT_PTRUE otherwise. ++;; ++;; - OP is the predicate we want to test, of the same mode as CAST_GP. ++;; ++;; ------------------------------------------------------------------------- ++;; ---- Description of UNSPEC_PRED_Z ++;; ------------------------------------------------------------------------- ++;; ++;; SVE integer comparisons are predicated and return zero for inactive ++;; lanes. Sometimes we use them with predicates that are all-true and ++;; sometimes we use them with general predicates. ++;; ++;; The integer comparisons also set the flags and so build-in the effect ++;; of a PTEST. We therefore want to be able to combine integer comparison ++;; patterns with PTESTs of the result. One difficulty with doing this is ++;; that (as noted above) the PTEST is always a .B operation and so can place ++;; stronger requirements on the governing predicate than the comparison does. ++;; ++;; For example, when applying a separate PTEST to the result of a full-vector ++;; .H comparison, the PTEST must be predicated on a .H PTRUE instead of a ++;; .B PTRUE. In constrast, the comparison might be predicated on either ++;; a .H PTRUE or a .B PTRUE, since the values of odd-indexed predicate ++;; bits don't matter for .H operations. ++;; ++;; We therefore can't rely on a full-vector comparison using the same ++;; predicate register as a following PTEST. We instead need to remember ++;; whether a comparison is known to be a full-vector comparison and use ++;; this information in addition to a check for equal predicate registers. ++;; At the same time, it's useful to have a common representation for all ++;; integer comparisons, so that they can be handled by a single set of ++;; patterns. ++;; ++;; We therefore take a similar approach to UNSPEC_PTEST above and use: ++;; ++;; (unspec: [gp ptrue_flag (code:M op0 op1)] UNSPEC_PRED_Z) ++;; ++;; where: ++;; ++;; - GP is the governing predicate, of mode ++;; ++;; - PTRUE_FLAG is a CONST_INT (conceptually of mode SI) that has the value ++;; SVE_KNOWN_PTRUE if we know that GP is all-true and SVE_MAYBE_NOT_PTRUE ++;; otherwise ++;; ++;; - CODE is the comparison code ++;; ++;; - OP0 and OP1 are the values being compared, of mode M ++;; ++;; The "Z" in UNSPEC_PRED_Z indicates that inactive lanes are zero. ++;; ++;; ------------------------------------------------------------------------- ++;; ---- Note on predicated integer arithemtic and UNSPEC_PRED_X ++;; ------------------------------------------------------------------------- ++;; ++;; Many SVE integer operations are predicated. We can generate them ++;; from four sources: ++;; ++;; (1) Using normal unpredicated optabs. In this case we need to create ++;; an all-true predicate register to act as the governing predicate ++;; for the SVE instruction. There are no inactive lanes, and thus ++;; the values of inactive lanes don't matter. ++;; ++;; (2) Using _x ACLE functions. In this case the function provides a ++;; specific predicate and some lanes might be inactive. However, ++;; as for (1), the values of the inactive lanes don't matter. ++;; We can make extra lanes active without changing the behavior ++;; (although for code-quality reasons we should avoid doing so ++;; needlessly). ++;; ++;; (3) Using cond_* optabs that correspond to IFN_COND_* internal functions. ++;; These optabs have a predicate operand that specifies which lanes are ++;; active and another operand that provides the values of inactive lanes. ++;; ++;; (4) Using _m and _z ACLE functions. These functions map to the same ++;; patterns as (3), with the _z functions setting inactive lanes to zero ++;; and the _m functions setting the inactive lanes to one of the function ++;; arguments. ++;; ++;; For (1) and (2) we need a way of attaching the predicate to a normal ++;; unpredicated integer operation. We do this using: ++;; ++;; (unspec:M [pred (code:M (op0 op1 ...))] UNSPEC_PRED_X) ++;; ++;; where (code:M (op0 op1 ...)) is the normal integer operation and PRED ++;; is a predicate of mode . PRED might or might not be a PTRUE; ++;; it always is for (1), but might not be for (2). ++;; ++;; The unspec as a whole has the same value as (code:M ...) when PRED is ++;; all-true. It is always semantically valid to replace PRED with a PTRUE, ++;; but as noted above, we should only do so if there's a specific benefit. ++;; ++;; (The "_X" in the unspec is named after the ACLE functions in (2).) ++;; ++;; For (3) and (4) we can simply use the SVE port's normal representation ++;; of a predicate-based select: ++;; ++;; (unspec:M [pred (code:M (op0 op1 ...)) inactive] UNSPEC_SEL) ++;; ++;; where INACTIVE specifies the values of inactive lanes. ++;; ++;; We can also use the UNSPEC_PRED_X wrapper in the UNSPEC_SEL rather ++;; than inserting the integer operation directly. This is mostly useful ++;; if we want the combine pass to merge an integer operation with an explicit ++;; vcond_mask (in other words, with a following SEL instruction). However, ++;; it's generally better to merge such operations at the gimple level ++;; using (3). ++;; ++;; ------------------------------------------------------------------------- ++;; ---- Note on predicated FP arithmetic patterns and GP "strictness" ++;; ------------------------------------------------------------------------- ++;; ++;; Most SVE floating-point operations are predicated. We can generate ++;; them from four sources: ++;; ++;; (1) Using normal unpredicated optabs. In this case we need to create ++;; an all-true predicate register to act as the governing predicate ++;; for the SVE instruction. There are no inactive lanes, and thus ++;; the values of inactive lanes don't matter. ++;; ++;; (2) Using _x ACLE functions. In this case the function provides a ++;; specific predicate and some lanes might be inactive. However, ++;; as for (1), the values of the inactive lanes don't matter. ++;; ++;; The instruction must have the same exception behavior as the ++;; function call unless things like command-line flags specifically ++;; allow otherwise. For example, with -ffast-math, it is OK to ++;; raise exceptions for inactive lanes, but normally it isn't. ++;; ++;; (3) Using cond_* optabs that correspond to IFN_COND_* internal functions. ++;; These optabs have a predicate operand that specifies which lanes are ++;; active and another operand that provides the values of inactive lanes. ++;; ++;; (4) Using _m and _z ACLE functions. These functions map to the same ++;; patterns as (3), with the _z functions setting inactive lanes to zero ++;; and the _m functions setting the inactive lanes to one of the function ++;; arguments. ++;; ++;; So: ++;; ++;; - In (1), the predicate is known to be all true and the pattern can use ++;; unpredicated operations where available. ++;; ++;; - In (2), the predicate might or might not be all true. The pattern can ++;; use unpredicated instructions if the predicate is all-true or if things ++;; like command-line flags allow exceptions for inactive lanes. ++;; ++;; - (3) and (4) represent a native SVE predicated operation. Some lanes ++;; might be inactive and inactive lanes of the result must have specific ++;; values. There is no scope for using unpredicated instructions (and no ++;; reason to want to), so the question about command-line flags doesn't ++;; arise. ++;; ++;; It would be inaccurate to model (2) as an rtx code like (sqrt ...) ++;; in combination with a separate predicate operand, e.g. ++;; ++;; (unspec [(match_operand: 1 "register_operand" "Upl") ++;; (sqrt:SVE_FULL_F 2 "register_operand" "w")] ++;; ....) ++;; ++;; because (sqrt ...) can raise an exception for any lane, including ++;; inactive ones. We therefore need to use an unspec instead. ++;; ++;; Also, (2) requires some way of distinguishing the case in which the ++;; predicate might have inactive lanes and cannot be changed from the ++;; case in which the predicate has no inactive lanes or can be changed. ++;; This information is also useful when matching combined FP patterns ++;; in which the predicates might not be equal. ++;; ++;; We therefore model FP operations as an unspec of the form: ++;; ++;; (unspec [pred strictness op0 op1 ...] UNSPEC_COND_) ++;; ++;; where: ++;; ++;; - PRED is the governing predicate. ++;; ++;; - STRICTNESS is a CONST_INT that conceptually has mode SI. It has the ++;; value SVE_STRICT_GP if PRED might have inactive lanes and if those ++;; lanes must remain inactive. It has the value SVE_RELAXED_GP otherwise. ++;; ++;; - OP0 OP1 ... are the normal input operands to the operation. ++;; ++;; - MNEMONIC is the mnemonic of the associated SVE instruction. ++;; ++;; ------------------------------------------------------------------------- ++;; ---- Note on FFR handling ++;; ------------------------------------------------------------------------- ++;; ++;; Logically we want to divide FFR-related instructions into regions ++;; that contain exactly one of: ++;; ++;; - a single write to the FFR ++;; - any number of reads from the FFR (but only one read is likely) ++;; - any number of LDFF1 and LDNF1 instructions ++;; ++;; However, LDFF1 and LDNF1 instructions should otherwise behave like ++;; normal loads as far as possible. This means that they should be ++;; schedulable within a region in the same way that LD1 would be, ++;; and they should be deleted as dead if the result is unused. The loads ++;; should therefore not write to the FFR, since that would both serialize ++;; the loads with respect to each other and keep the loads live for any ++;; later RDFFR. ++;; ++;; We get around this by using a fake "FFR token" (FFRT) to help describe ++;; the dependencies. Writing to the FFRT starts a new "FFRT region", ++;; while using the FFRT keeps the instruction within its region. ++;; Specifically: ++;; ++;; - Writes start a new FFRT region as well as setting the FFR: ++;; ++;; W1: parallel (FFRT = , FFR = ) ++;; ++;; - Loads use an LD1-like instruction that also uses the FFRT, so that the ++;; loads stay within the same FFRT region: ++;; ++;; L1: load data while using the FFRT ++;; ++;; In addition, any FFRT region that includes a load also has at least one ++;; instance of: ++;; ++;; L2: FFR = update(FFR, FFRT) [type == no_insn] ++;; ++;; to make it clear that the region both reads from and writes to the FFR. ++;; ++;; - Reads do the following: ++;; ++;; R1: FFRT = FFR [type == no_insn] ++;; R2: read from the FFRT ++;; R3: FFRT = update(FFRT) [type == no_insn] ++;; ++;; R1 and R3 both create new FFRT regions, so that previous LDFF1s and ++;; LDNF1s cannot move forwards across R1 and later LDFF1s and LDNF1s ++;; cannot move backwards across R3. ++;; ++;; This way, writes are only kept alive by later loads or reads, ++;; and write/read pairs fold normally. For two consecutive reads, ++;; the first R3 is made dead by the second R1, which in turn becomes ++;; redundant with the first R1. We then have: ++;; ++;; first R1: FFRT = FFR ++;; first read from the FFRT ++;; second read from the FFRT ++;; second R3: FFRT = update(FFRT) ++;; ++;; i.e. the two FFRT regions collapse into a single one with two ++;; independent reads. ++;; ++;; The model still prevents some valid optimizations though. For example, ++;; if all loads in an FFRT region are deleted as dead, nothing would remove ++;; the L2 instructions. ++ ++;; ========================================================================= ++;; == Moves ++;; ========================================================================= ++ ++;; ------------------------------------------------------------------------- ++;; ---- Moves of single vectors ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - MOV (including aliases) ++;; - LD1B (contiguous form) ++;; - LD1D ( " " ) ++;; - LD1H ( " " ) ++;; - LD1W ( " " ) ++;; - LDR ++;; - ST1B (contiguous form) ++;; - ST1D ( " " ) ++;; - ST1H ( " " ) ++;; - ST1W ( " " ) ++;; - STR ++;; ------------------------------------------------------------------------- + +- +-;; SVE data moves. + (define_expand "mov" +- [(set (match_operand:SVE_ALL 0 "nonimmediate_operand") +- (match_operand:SVE_ALL 1 "general_operand"))] ++ [(set (match_operand:SVE_FULL 0 "nonimmediate_operand") ++ (match_operand:SVE_FULL 1 "general_operand"))] + "TARGET_SVE" + { + /* Use the predicated load and store patterns where possible. +@@ -72,7 +559,7 @@ + head of the file) and increases the addressing choices for + little-endian. */ + if ((MEM_P (operands[0]) || MEM_P (operands[1])) +- && can_create_pseudo_p ()) ++ && can_create_pseudo_p ()) + { + aarch64_expand_sve_mem_move (operands[0], operands[1], mode); + DONE; +@@ -80,47 +567,37 @@ + + if (CONSTANT_P (operands[1])) + { +- aarch64_expand_mov_immediate (operands[0], operands[1], +- gen_vec_duplicate); ++ aarch64_expand_mov_immediate (operands[0], operands[1]); + DONE; + } + + /* Optimize subregs on big-endian targets: we can use REV[BHW] + instead of going through memory. */ + if (BYTES_BIG_ENDIAN +- && aarch64_maybe_expand_sve_subreg_move (operands[0], operands[1])) ++ && aarch64_maybe_expand_sve_subreg_move (operands[0], operands[1])) + DONE; + } + ) + +-;; A pattern for optimizing SUBREGs that have a reinterpreting effect +-;; on big-endian targets; see aarch64_maybe_expand_sve_subreg_move +-;; for details. We use a special predicate for operand 2 to reduce +-;; the number of patterns. +-(define_insn_and_split "*aarch64_sve_mov_subreg_be" +- [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w") +- (unspec:SVE_ALL +- [(match_operand:VNx16BI 1 "register_operand" "Upl") +- (match_operand 2 "aarch64_any_register_operand" "w")] +- UNSPEC_REV_SUBREG))] +- "TARGET_SVE && BYTES_BIG_ENDIAN" +- "#" +- "&& reload_completed" +- [(const_int 0)] ++(define_expand "movmisalign" ++ [(set (match_operand:SVE_FULL 0 "nonimmediate_operand") ++ (match_operand:SVE_FULL 1 "general_operand"))] ++ "TARGET_SVE" + { +- aarch64_split_sve_subreg_move (operands[0], operands[1], operands[2]); ++ /* Equivalent to a normal move for our purpooses. */ ++ emit_move_insn (operands[0], operands[1]); + DONE; + } + ) + +-;; Unpredicated moves (little-endian). Only allow memory operations +-;; during and after RA; before RA we want the predicated load and +-;; store patterns to be used instead. ++;; Unpredicated moves (bytes or little-endian). Only allow memory operations ++;; during and after RA; before RA we want the predicated load and store ++;; patterns to be used instead. + (define_insn "*aarch64_sve_mov_le" +- [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w") +- (match_operand:SVE_ALL 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))] ++ [(set (match_operand:SVE_FULL 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w") ++ (match_operand:SVE_FULL 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))] + "TARGET_SVE +- && !BYTES_BIG_ENDIAN ++ && (mode == VNx16QImode || !BYTES_BIG_ENDIAN) + && ((lra_in_progress || reload_completed) + || (register_operand (operands[0], mode) + && nonmemory_operand (operands[1], mode)))" +@@ -131,12 +608,12 @@ + * return aarch64_output_sve_mov_immediate (operands[1]);" + ) + +-;; Unpredicated moves (big-endian). Memory accesses require secondary ++;; Unpredicated moves (non-byte big-endian). Memory accesses require secondary + ;; reloads. + (define_insn "*aarch64_sve_mov_be" +- [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w") +- (match_operand:SVE_ALL 1 "aarch64_nonmemory_operand" "w, Dn"))] +- "TARGET_SVE && BYTES_BIG_ENDIAN" ++ [(set (match_operand:SVE_FULL 0 "register_operand" "=w, w") ++ (match_operand:SVE_FULL 1 "aarch64_nonmemory_operand" "w, Dn"))] ++ "TARGET_SVE && BYTES_BIG_ENDIAN && mode != VNx16QImode" + "@ + mov\t%0.d, %1.d + * return aarch64_output_sve_mov_immediate (operands[1]);" +@@ -144,10 +621,11 @@ + + ;; Handle big-endian memory reloads. We use byte PTRUE for all modes + ;; to try to encourage reuse. ++;; This pattern needs constraints due to TARGET_SECONDARY_RELOAD hook. + (define_expand "aarch64_sve_reload_be" + [(parallel + [(set (match_operand 0) +- (match_operand 1)) ++ (match_operand 1)) + (clobber (match_operand:VNx16BI 2 "register_operand" "=Upl"))])] + "TARGET_SVE && BYTES_BIG_ENDIAN" + { +@@ -166,16 +644,15 @@ + } + ) + +-;; A predicated load or store for which the predicate is known to be +-;; all-true. Note that this pattern is generated directly by +-;; aarch64_emit_sve_pred_move, so changes to this pattern will +-;; need changes there as well. ++;; A predicated move in which the predicate is known to be all-true. ++;; Note that this pattern is generated directly by aarch64_emit_sve_pred_move, ++;; so changes to this pattern will need changes there as well. + (define_insn_and_split "@aarch64_pred_mov" +- [(set (match_operand:SVE_ALL 0 "nonimmediate_operand" "=w, w, m") +- (unspec:SVE_ALL ++ [(set (match_operand:SVE_FULL 0 "nonimmediate_operand" "=w, w, m") ++ (unspec:SVE_FULL + [(match_operand: 1 "register_operand" "Upl, Upl, Upl") +- (match_operand:SVE_ALL 2 "nonimmediate_operand" "w, m, w")] +- UNSPEC_MERGE_PTRUE))] ++ (match_operand:SVE_FULL 2 "nonimmediate_operand" "w, m, w")] ++ UNSPEC_PRED_X))] + "TARGET_SVE + && (register_operand (operands[0], mode) + || register_operand (operands[2], mode))" +@@ -188,152 +665,67 @@ + [(set (match_dup 0) (match_dup 2))] + ) + +-(define_expand "movmisalign" +- [(set (match_operand:SVE_ALL 0 "nonimmediate_operand") +- (match_operand:SVE_ALL 1 "general_operand"))] +- "TARGET_SVE" ++;; A pattern for optimizing SUBREGs that have a reinterpreting effect ++;; on big-endian targets; see aarch64_maybe_expand_sve_subreg_move ++;; for details. We use a special predicate for operand 2 to reduce ++;; the number of patterns. ++(define_insn_and_split "*aarch64_sve_mov_subreg_be" ++ [(set (match_operand:SVE_FULL 0 "aarch64_sve_nonimmediate_operand" "=w") ++ (unspec:SVE_FULL ++ [(match_operand:VNx16BI 1 "register_operand" "Upl") ++ (match_operand 2 "aarch64_any_register_operand" "w")] ++ UNSPEC_REV_SUBREG))] ++ "TARGET_SVE && BYTES_BIG_ENDIAN" ++ "#" ++ "&& reload_completed" ++ [(const_int 0)] + { +- /* Equivalent to a normal move for our purpooses. */ +- emit_move_insn (operands[0], operands[1]); ++ aarch64_split_sve_subreg_move (operands[0], operands[1], operands[2]); + DONE; + } + ) + +-(define_insn "maskload" +- [(set (match_operand:SVE_ALL 0 "register_operand" "=w") +- (unspec:SVE_ALL +- [(match_operand: 2 "register_operand" "Upl") +- (match_operand:SVE_ALL 1 "memory_operand" "m")] +- UNSPEC_LD1_SVE))] +- "TARGET_SVE" +- "ld1\t%0., %2/z, %1" +-) +- +-(define_insn "maskstore" +- [(set (match_operand:SVE_ALL 0 "memory_operand" "+m") +- (unspec:SVE_ALL [(match_operand: 2 "register_operand" "Upl") +- (match_operand:SVE_ALL 1 "register_operand" "w") +- (match_dup 0)] +- UNSPEC_ST1_SVE))] +- "TARGET_SVE" +- "st1\t%1., %2, %0" +-) +- +-;; Unpredicated gather loads. +-(define_expand "gather_load" +- [(set (match_operand:SVE_SD 0 "register_operand") +- (unspec:SVE_SD +- [(match_dup 5) +- (match_operand:DI 1 "aarch64_reg_or_zero") +- (match_operand: 2 "register_operand") +- (match_operand:DI 3 "const_int_operand") +- (match_operand:DI 4 "aarch64_gather_scale_operand_") +- (mem:BLK (scratch))] +- UNSPEC_LD1_GATHER))] ++;; Reinterpret operand 1 in operand 0's mode, without changing its contents. ++;; This is equivalent to a subreg on little-endian targets but not for ++;; big-endian; see the comment at the head of the file for details. ++(define_expand "@aarch64_sve_reinterpret" ++ [(set (match_operand:SVE_FULL 0 "register_operand") ++ (unspec:SVE_FULL ++ [(match_operand 1 "aarch64_any_register_operand")] ++ UNSPEC_REINTERPRET))] + "TARGET_SVE" + { +- operands[5] = force_reg (mode, CONSTM1_RTX (mode)); ++ if (!BYTES_BIG_ENDIAN) ++ { ++ emit_move_insn (operands[0], gen_lowpart (mode, operands[1])); ++ DONE; ++ } + } + ) + +-;; Predicated gather loads for 32-bit elements. Operand 3 is true for +-;; unsigned extension and false for signed extension. +-(define_insn "mask_gather_load" +- [(set (match_operand:SVE_S 0 "register_operand" "=w, w, w, w, w") +- (unspec:SVE_S +- [(match_operand: 5 "register_operand" "Upl, Upl, Upl, Upl, Upl") +- (match_operand:DI 1 "aarch64_reg_or_zero" "Z, rk, rk, rk, rk") +- (match_operand: 2 "register_operand" "w, w, w, w, w") +- (match_operand:DI 3 "const_int_operand" "i, Z, Ui1, Z, Ui1") +- (match_operand:DI 4 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, i, i") +- (mem:BLK (scratch))] +- UNSPEC_LD1_GATHER))] +- "TARGET_SVE" +- "@ +- ld1w\t%0.s, %5/z, [%2.s] +- ld1w\t%0.s, %5/z, [%1, %2.s, sxtw] +- ld1w\t%0.s, %5/z, [%1, %2.s, uxtw] +- ld1w\t%0.s, %5/z, [%1, %2.s, sxtw %p4] +- ld1w\t%0.s, %5/z, [%1, %2.s, uxtw %p4]" +-) +- +-;; Predicated gather loads for 64-bit elements. The value of operand 3 +-;; doesn't matter in this case. +-(define_insn "mask_gather_load" +- [(set (match_operand:SVE_D 0 "register_operand" "=w, w, w") +- (unspec:SVE_D +- [(match_operand: 5 "register_operand" "Upl, Upl, Upl") +- (match_operand:DI 1 "aarch64_reg_or_zero" "Z, rk, rk") +- (match_operand: 2 "register_operand" "w, w, w") +- (match_operand:DI 3 "const_int_operand") +- (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, Ui1, i") +- (mem:BLK (scratch))] +- UNSPEC_LD1_GATHER))] +- "TARGET_SVE" +- "@ +- ld1d\t%0.d, %5/z, [%2.d] +- ld1d\t%0.d, %5/z, [%1, %2.d] +- ld1d\t%0.d, %5/z, [%1, %2.d, lsl %p4]" +-) +- +-;; Unpredicated scatter store. +-(define_expand "scatter_store" +- [(set (mem:BLK (scratch)) +- (unspec:BLK +- [(match_dup 5) +- (match_operand:DI 0 "aarch64_reg_or_zero") +- (match_operand: 1 "register_operand") +- (match_operand:DI 2 "const_int_operand") +- (match_operand:DI 3 "aarch64_gather_scale_operand_") +- (match_operand:SVE_SD 4 "register_operand")] +- UNSPEC_ST1_SCATTER))] ++;; A pattern for handling type punning on big-endian targets. We use a ++;; special predicate for operand 1 to reduce the number of patterns. ++(define_insn_and_split "*aarch64_sve_reinterpret" ++ [(set (match_operand:SVE_FULL 0 "register_operand" "=w") ++ (unspec:SVE_FULL ++ [(match_operand 1 "aarch64_any_register_operand" "w")] ++ UNSPEC_REINTERPRET))] + "TARGET_SVE" ++ "#" ++ "&& reload_completed" ++ [(set (match_dup 0) (match_dup 1))] + { +- operands[5] = force_reg (mode, CONSTM1_RTX (mode)); ++ operands[1] = aarch64_replace_reg_mode (operands[1], mode); + } + ) + +-;; Predicated scatter stores for 32-bit elements. Operand 2 is true for +-;; unsigned extension and false for signed extension. +-(define_insn "mask_scatter_store" +- [(set (mem:BLK (scratch)) +- (unspec:BLK +- [(match_operand: 5 "register_operand" "Upl, Upl, Upl, Upl, Upl") +- (match_operand:DI 0 "aarch64_reg_or_zero" "Z, rk, rk, rk, rk") +- (match_operand: 1 "register_operand" "w, w, w, w, w") +- (match_operand:DI 2 "const_int_operand" "i, Z, Ui1, Z, Ui1") +- (match_operand:DI 3 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, i, i") +- (match_operand:SVE_S 4 "register_operand" "w, w, w, w, w")] +- UNSPEC_ST1_SCATTER))] +- "TARGET_SVE" +- "@ +- st1w\t%4.s, %5, [%1.s] +- st1w\t%4.s, %5, [%0, %1.s, sxtw] +- st1w\t%4.s, %5, [%0, %1.s, uxtw] +- st1w\t%4.s, %5, [%0, %1.s, sxtw %p3] +- st1w\t%4.s, %5, [%0, %1.s, uxtw %p3]" +-) +- +-;; Predicated scatter stores for 64-bit elements. The value of operand 2 +-;; doesn't matter in this case. +-(define_insn "mask_scatter_store" +- [(set (mem:BLK (scratch)) +- (unspec:BLK +- [(match_operand: 5 "register_operand" "Upl, Upl, Upl") +- (match_operand:DI 0 "aarch64_reg_or_zero" "Z, rk, rk") +- (match_operand: 1 "register_operand" "w, w, w") +- (match_operand:DI 2 "const_int_operand") +- (match_operand:DI 3 "aarch64_gather_scale_operand_d" "Ui1, Ui1, i") +- (match_operand:SVE_D 4 "register_operand" "w, w, w")] +- UNSPEC_ST1_SCATTER))] +- "TARGET_SVE" +- "@ +- st1d\t%4.d, %5, [%1.d] +- st1d\t%4.d, %5, [%0, %1.d] +- st1d\t%4.d, %5, [%0, %1.d, lsl %p3]" +-) ++;; ------------------------------------------------------------------------- ++;; ---- Moves of multiple vectors ++;; ------------------------------------------------------------------------- ++;; All patterns in this section are synthetic and split to real ++;; instructions after reload. ++;; ------------------------------------------------------------------------- + +-;; SVE structure moves. + (define_expand "mov" + [(set (match_operand:SVE_STRUCT 0 "nonimmediate_operand") + (match_operand:SVE_STRUCT 1 "general_operand"))] +@@ -368,7 +760,7 @@ + + ;; Unpredicated structure moves (big-endian). Memory accesses require + ;; secondary reloads. +-(define_insn "*aarch64_sve_mov_le" ++(define_insn "*aarch64_sve_mov_be" + [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w, w") + (match_operand:SVE_STRUCT 1 "aarch64_nonmemory_operand" "w, Dn"))] + "TARGET_SVE && BYTES_BIG_ENDIAN" +@@ -409,7 +801,7 @@ + (unspec:SVE_STRUCT + [(match_operand: 1 "register_operand" "Upl, Upl, Upl") + (match_operand:SVE_STRUCT 2 "aarch64_sve_struct_nonimmediate_operand" "w, Utx, w")] +- UNSPEC_MERGE_PTRUE))] ++ UNSPEC_PRED_X))] + "TARGET_SVE + && (register_operand (operands[0], mode) + || register_operand (operands[2], mode))" +@@ -432,6 +824,18 @@ + [(set_attr "length" "")] + ) + ++;; ------------------------------------------------------------------------- ++;; ---- Moves of predicates ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - MOV ++;; - LDR ++;; - PFALSE ++;; - PTRUE ++;; - PTRUES ++;; - STR ++;; ------------------------------------------------------------------------- ++ + (define_expand "mov" + [(set (match_operand:PRED_ALL 0 "nonimmediate_operand") + (match_operand:PRED_ALL 1 "general_operand"))] +@@ -439,12 +843,18 @@ + { + if (GET_CODE (operands[0]) == MEM) + operands[1] = force_reg (mode, operands[1]); ++ ++ if (CONSTANT_P (operands[1])) ++ { ++ aarch64_expand_mov_immediate (operands[0], operands[1]); ++ DONE; ++ } + } + ) + + (define_insn "*aarch64_sve_mov" +- [(set (match_operand:PRED_ALL 0 "nonimmediate_operand" "=Upa, m, Upa, Upa, Upa") +- (match_operand:PRED_ALL 1 "general_operand" "Upa, Upa, m, Dz, Dm"))] ++ [(set (match_operand:PRED_ALL 0 "nonimmediate_operand" "=Upa, m, Upa, Upa") ++ (match_operand:PRED_ALL 1 "aarch64_mov_operand" "Upa, Upa, m, Dn"))] + "TARGET_SVE + && (register_operand (operands[0], mode) + || register_operand (operands[1], mode))" +@@ -452,287 +862,296 @@ + mov\t%0.b, %1.b + str\t%1, %0 + ldr\t%0, %1 +- pfalse\t%0.b +- * return aarch64_output_ptrue (mode, '');" ++ * return aarch64_output_sve_mov_immediate (operands[1]);" + ) + +-;; Handle extractions from a predicate by converting to an integer vector +-;; and extracting from there. +-(define_expand "vec_extract" +- [(match_operand: 0 "register_operand") +- (match_operand: 1 "register_operand") +- (match_operand:SI 2 "nonmemory_operand") +- ;; Dummy operand to which we can attach the iterator. +- (reg:SVE_I V0_REGNUM)] ++;; Match PTRUES Pn.B when both the predicate and flags are useful. ++(define_insn_and_rewrite "*aarch64_sve_ptruevnx16bi_cc" ++ [(set (reg:CC_NZC CC_REGNUM) ++ (unspec:CC_NZC ++ [(match_operand 2) ++ (match_operand 3) ++ (const_int SVE_KNOWN_PTRUE) ++ (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate" ++ [(unspec:VNx16BI ++ [(match_operand:SI 4 "const_int_operand") ++ (match_operand:VNx16BI 5 "aarch64_simd_imm_zero")] ++ UNSPEC_PTRUE)])] ++ UNSPEC_PTEST)) ++ (set (match_operand:VNx16BI 0 "register_operand" "=Upa") ++ (match_dup 1))] + "TARGET_SVE" + { +- rtx tmp = gen_reg_rtx (mode); +- emit_insn (gen_aarch64_sve_dup_const (tmp, operands[1], +- CONST1_RTX (mode), +- CONST0_RTX (mode))); +- emit_insn (gen_vec_extract (operands[0], tmp, operands[2])); +- DONE; ++ return aarch64_output_sve_ptrues (operands[1]); ++ } ++ "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))" ++ { ++ operands[2] = operands[3] = CONSTM1_RTX (VNx16BImode); + } + ) + +-(define_expand "vec_extract" +- [(set (match_operand: 0 "register_operand") +- (vec_select: +- (match_operand:SVE_ALL 1 "register_operand") +- (parallel [(match_operand:SI 2 "nonmemory_operand")])))] ++;; Match PTRUES Pn.[HSD] when both the predicate and flags are useful. ++(define_insn_and_rewrite "*aarch64_sve_ptrue_cc" ++ [(set (reg:CC_NZC CC_REGNUM) ++ (unspec:CC_NZC ++ [(match_operand 2) ++ (match_operand 3) ++ (const_int SVE_KNOWN_PTRUE) ++ (subreg:PRED_HSD ++ (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate" ++ [(unspec:VNx16BI ++ [(match_operand:SI 4 "const_int_operand") ++ (match_operand:PRED_HSD 5 "aarch64_simd_imm_zero")] ++ UNSPEC_PTRUE)]) 0)] ++ UNSPEC_PTEST)) ++ (set (match_operand:VNx16BI 0 "register_operand" "=Upa") ++ (match_dup 1))] + "TARGET_SVE" + { +- poly_int64 val; +- if (poly_int_rtx_p (operands[2], &val) +- && known_eq (val, GET_MODE_NUNITS (mode) - 1)) +- { +- /* The last element can be extracted with a LASTB and a false +- predicate. */ +- rtx sel = force_reg (mode, CONST0_RTX (mode)); +- emit_insn (gen_extract_last_ (operands[0], sel, operands[1])); +- DONE; +- } +- if (!CONST_INT_P (operands[2])) +- { +- /* Create an index with operand[2] as the base and -1 as the step. +- It will then be zero for the element we care about. */ +- rtx index = gen_lowpart (mode, operands[2]); +- index = force_reg (mode, index); +- rtx series = gen_reg_rtx (mode); +- emit_insn (gen_vec_series (series, index, constm1_rtx)); +- +- /* Get a predicate that is true for only that element. */ +- rtx zero = CONST0_RTX (mode); +- rtx cmp = gen_rtx_EQ (mode, series, zero); +- rtx sel = gen_reg_rtx (mode); +- emit_insn (gen_vec_cmp (sel, cmp, series, zero)); +- +- /* Select the element using LASTB. */ +- emit_insn (gen_extract_last_ (operands[0], sel, operands[1])); +- DONE; +- } +- } +-) +- +-;; Extract element zero. This is a special case because we want to force +-;; the registers to be the same for the second alternative, and then +-;; split the instruction into nothing after RA. +-(define_insn_and_split "*vec_extract_0" +- [(set (match_operand: 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv") +- (vec_select: +- (match_operand:SVE_ALL 1 "register_operand" "w, 0, w") +- (parallel [(const_int 0)])))] +- "TARGET_SVE" +- { +- operands[1] = gen_rtx_REG (mode, REGNO (operands[1])); +- switch (which_alternative) +- { +- case 0: +- return "umov\\t%0, %1.[0]"; +- case 1: +- return "#"; +- case 2: +- return "st1\\t{%1.}[0], %0"; +- default: +- gcc_unreachable (); +- } ++ return aarch64_output_sve_ptrues (operands[1]); + } +- "&& reload_completed +- && REG_P (operands[0]) +- && REGNO (operands[0]) == REGNO (operands[1])" +- [(const_int 0)] ++ "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))" + { +- emit_note (NOTE_INSN_DELETED); +- DONE; ++ operands[2] = CONSTM1_RTX (VNx16BImode); ++ operands[3] = CONSTM1_RTX (mode); + } +- [(set_attr "type" "neon_to_gp_q, untyped, neon_store1_one_lane_q")] + ) + +-;; Extract an element from the Advanced SIMD portion of the register. +-;; We don't just reuse the aarch64-simd.md pattern because we don't +-;; want any change in lane number on big-endian targets. +-(define_insn "*vec_extract_v128" +- [(set (match_operand: 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv") +- (vec_select: +- (match_operand:SVE_ALL 1 "register_operand" "w, w, w") +- (parallel [(match_operand:SI 2 "const_int_operand")])))] +- "TARGET_SVE +- && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (mode), 1, 15)" ++;; Match PTRUES Pn.B when only the flags result is useful (which is ++;; a way of testing VL). ++(define_insn_and_rewrite "*aarch64_sve_ptruevnx16bi_ptest" ++ [(set (reg:CC_NZC CC_REGNUM) ++ (unspec:CC_NZC ++ [(match_operand 2) ++ (match_operand 3) ++ (const_int SVE_KNOWN_PTRUE) ++ (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate" ++ [(unspec:VNx16BI ++ [(match_operand:SI 4 "const_int_operand") ++ (match_operand:VNx16BI 5 "aarch64_simd_imm_zero")] ++ UNSPEC_PTRUE)])] ++ UNSPEC_PTEST)) ++ (clobber (match_scratch:VNx16BI 0 "=Upa"))] ++ "TARGET_SVE" + { +- operands[1] = gen_rtx_REG (mode, REGNO (operands[1])); +- switch (which_alternative) +- { +- case 0: +- return "umov\\t%0, %1.[%2]"; +- case 1: +- return "dup\\t%0, %1.[%2]"; +- case 2: +- return "st1\\t{%1.}[%2], %0"; +- default: +- gcc_unreachable (); +- } ++ return aarch64_output_sve_ptrues (operands[1]); + } +- [(set_attr "type" "neon_to_gp_q, neon_dup_q, neon_store1_one_lane_q")] +-) +- +-;; Extract an element in the range of DUP. This pattern allows the +-;; source and destination to be different. +-(define_insn "*vec_extract_dup" +- [(set (match_operand: 0 "register_operand" "=w") +- (vec_select: +- (match_operand:SVE_ALL 1 "register_operand" "w") +- (parallel [(match_operand:SI 2 "const_int_operand")])))] +- "TARGET_SVE +- && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (mode), 16, 63)" ++ "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))" + { +- operands[0] = gen_rtx_REG (mode, REGNO (operands[0])); +- return "dup\t%0., %1.[%2]"; ++ operands[2] = operands[3] = CONSTM1_RTX (VNx16BImode); + } + ) + +-;; Extract an element outside the range of DUP. This pattern requires the +-;; source and destination to be the same. +-(define_insn "*vec_extract_ext" +- [(set (match_operand: 0 "register_operand" "=w") +- (vec_select: +- (match_operand:SVE_ALL 1 "register_operand" "0") +- (parallel [(match_operand:SI 2 "const_int_operand")])))] +- "TARGET_SVE && INTVAL (operands[2]) * GET_MODE_SIZE (mode) >= 64" ++;; Match PTRUES Pn.[HWD] when only the flags result is useful (which is ++;; a way of testing VL). ++(define_insn_and_rewrite "*aarch64_sve_ptrue_ptest" ++ [(set (reg:CC_NZC CC_REGNUM) ++ (unspec:CC_NZC ++ [(match_operand 2) ++ (match_operand 3) ++ (const_int SVE_KNOWN_PTRUE) ++ (subreg:PRED_HSD ++ (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate" ++ [(unspec:VNx16BI ++ [(match_operand:SI 4 "const_int_operand") ++ (match_operand:PRED_HSD 5 "aarch64_simd_imm_zero")] ++ UNSPEC_PTRUE)]) 0)] ++ UNSPEC_PTEST)) ++ (clobber (match_scratch:VNx16BI 0 "=Upa"))] ++ "TARGET_SVE" + { +- operands[0] = gen_rtx_REG (mode, REGNO (operands[0])); +- operands[2] = GEN_INT (INTVAL (operands[2]) * GET_MODE_SIZE (mode)); +- return "ext\t%0.b, %0.b, %0.b, #%2"; ++ return aarch64_output_sve_ptrues (operands[1]); + } +-) +- +-;; Extract the last active element of operand 1 into operand 0. +-;; If no elements are active, extract the last inactive element instead. +-(define_insn "extract_last_" +- [(set (match_operand: 0 "register_operand" "=r, w") +- (unspec: +- [(match_operand: 1 "register_operand" "Upl, Upl") +- (match_operand:SVE_ALL 2 "register_operand" "w, w")] +- UNSPEC_LASTB))] +- "TARGET_SVE" +- "@ +- lastb\t%0, %1, %2. +- lastb\t%0, %1, %2." +-) +- +-(define_expand "vec_duplicate" +- [(parallel +- [(set (match_operand:SVE_ALL 0 "register_operand") +- (vec_duplicate:SVE_ALL +- (match_operand: 1 "aarch64_sve_dup_operand"))) +- (clobber (scratch:))])] +- "TARGET_SVE" ++ "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))" + { +- if (MEM_P (operands[1])) +- { +- rtx ptrue = force_reg (mode, CONSTM1_RTX (mode)); +- emit_insn (gen_sve_ld1r (operands[0], ptrue, operands[1], +- CONST0_RTX (mode))); +- DONE; +- } ++ operands[2] = CONSTM1_RTX (VNx16BImode); ++ operands[3] = CONSTM1_RTX (mode); + } + ) + +-;; Accept memory operands for the benefit of combine, and also in case +-;; the scalar input gets spilled to memory during RA. We want to split +-;; the load at the first opportunity in order to allow the PTRUE to be +-;; optimized with surrounding code. +-(define_insn_and_split "*vec_duplicate_reg" +- [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w") +- (vec_duplicate:SVE_ALL +- (match_operand: 1 "aarch64_sve_dup_operand" "r, w, Uty"))) +- (clobber (match_scratch: 2 "=X, X, Upl"))] ++;; ------------------------------------------------------------------------- ++;; ---- Moves relating to the FFR ++;; ------------------------------------------------------------------------- ++;; RDFFR ++;; RDFFRS ++;; SETFFR ++;; WRFFR ++;; ------------------------------------------------------------------------- ++ ++;; [W1 in the block comment above about FFR handling] ++;; ++;; Write to the FFR and start a new FFRT scheduling region. ++(define_insn "aarch64_wrffr" ++ [(set (reg:VNx16BI FFR_REGNUM) ++ (match_operand:VNx16BI 0 "aarch64_simd_reg_or_minus_one" "Dm, Upa")) ++ (set (reg:VNx16BI FFRT_REGNUM) ++ (match_dup 0))] + "TARGET_SVE" + "@ +- mov\t%0., %1 +- mov\t%0., %1 +- #" +- "&& MEM_P (operands[1])" +- [(const_int 0)] +- { +- if (GET_CODE (operands[2]) == SCRATCH) +- operands[2] = gen_reg_rtx (mode); +- emit_move_insn (operands[2], CONSTM1_RTX (mode)); +- emit_insn (gen_sve_ld1r (operands[0], operands[2], operands[1], +- CONST0_RTX (mode))); +- DONE; +- } +- [(set_attr "length" "4,4,8")] ++ setffr ++ wrffr\t%0.b" + ) + +-;; This is used for vec_duplicates from memory, but can also +-;; be used by combine to optimize selects of a a vec_duplicate +-;; with zero. +-(define_insn "sve_ld1r" +- [(set (match_operand:SVE_ALL 0 "register_operand" "=w") +- (unspec:SVE_ALL +- [(match_operand: 1 "register_operand" "Upl") +- (vec_duplicate:SVE_ALL +- (match_operand: 2 "aarch64_sve_ld1r_operand" "Uty")) +- (match_operand:SVE_ALL 3 "aarch64_simd_imm_zero")] +- UNSPEC_SEL))] ++;; [L2 in the block comment above about FFR handling] ++;; ++;; Introduce a read from and write to the FFR in the current FFRT region, ++;; so that the FFR value is live on entry to the region and so that the FFR ++;; value visibly changes within the region. This is used (possibly multiple ++;; times) in an FFRT region that includes LDFF1 or LDNF1 instructions. ++(define_insn "aarch64_update_ffr_for_load" ++ [(set (reg:VNx16BI FFR_REGNUM) ++ (unspec:VNx16BI [(reg:VNx16BI FFRT_REGNUM) ++ (reg:VNx16BI FFR_REGNUM)] UNSPEC_UPDATE_FFR))] + "TARGET_SVE" +- "ld1r\t%0., %1/z, %2" ++ "" ++ [(set_attr "type" "no_insn")] + ) + +-;; Load 128 bits from memory and duplicate to fill a vector. Since there +-;; are so few operations on 128-bit "elements", we don't define a VNx1TI +-;; and simply use vectors of bytes instead. +-(define_insn "*sve_ld1rq" +- [(set (match_operand:SVE_ALL 0 "register_operand" "=w") +- (unspec:SVE_ALL +- [(match_operand: 1 "register_operand" "Upl") +- (match_operand:TI 2 "aarch64_sve_ld1r_operand" "Uty")] +- UNSPEC_LD1RQ))] ++;; [R1 in the block comment above about FFR handling] ++;; ++;; Notionally copy the FFR to the FFRT, so that the current FFR value ++;; can be read from there by the RDFFR instructions below. This acts ++;; as a scheduling barrier for earlier LDFF1 and LDNF1 instructions and ++;; creates a natural dependency with earlier writes. ++(define_insn "aarch64_copy_ffr_to_ffrt" ++ [(set (reg:VNx16BI FFRT_REGNUM) ++ (reg:VNx16BI FFR_REGNUM))] + "TARGET_SVE" +- "ld1rq\t%0., %1/z, %2" ++ "" ++ [(set_attr "type" "no_insn")] + ) + +-;; Implement a predicate broadcast by shifting the low bit of the scalar +-;; input into the top bit and using a WHILELO. An alternative would be to +-;; duplicate the input and do a compare with zero. +-(define_expand "vec_duplicate" +- [(set (match_operand:PRED_ALL 0 "register_operand") +- (vec_duplicate:PRED_ALL (match_operand 1 "register_operand")))] ++;; [R2 in the block comment above about FFR handling] ++;; ++;; Read the FFR via the FFRT. ++(define_insn "aarch64_rdffr" ++ [(set (match_operand:VNx16BI 0 "register_operand" "=Upa") ++ (reg:VNx16BI FFRT_REGNUM))] ++ "TARGET_SVE" ++ "rdffr\t%0.b" ++) ++ ++;; Likewise with zero predication. ++(define_insn "aarch64_rdffr_z" ++ [(set (match_operand:VNx16BI 0 "register_operand" "=Upa") ++ (and:VNx16BI ++ (reg:VNx16BI FFRT_REGNUM) ++ (match_operand:VNx16BI 1 "register_operand" "Upa")))] ++ "TARGET_SVE" ++ "rdffr\t%0.b, %1/z" ++) ++ ++;; Read the FFR to test for a fault, without using the predicate result. ++(define_insn "*aarch64_rdffr_z_ptest" ++ [(set (reg:CC_NZC CC_REGNUM) ++ (unspec:CC_NZC ++ [(match_operand:VNx16BI 1 "register_operand" "Upa") ++ (match_dup 1) ++ (match_operand:SI 2 "aarch64_sve_ptrue_flag") ++ (and:VNx16BI ++ (reg:VNx16BI FFRT_REGNUM) ++ (match_dup 1))] ++ UNSPEC_PTEST)) ++ (clobber (match_scratch:VNx16BI 0 "=Upa"))] ++ "TARGET_SVE" ++ "rdffrs\t%0.b, %1/z" ++) ++ ++;; Same for unpredicated RDFFR when tested with a known PTRUE. ++(define_insn "*aarch64_rdffr_ptest" ++ [(set (reg:CC_NZC CC_REGNUM) ++ (unspec:CC_NZC ++ [(match_operand:VNx16BI 1 "register_operand" "Upa") ++ (match_dup 1) ++ (const_int SVE_KNOWN_PTRUE) ++ (reg:VNx16BI FFRT_REGNUM)] ++ UNSPEC_PTEST)) ++ (clobber (match_scratch:VNx16BI 0 "=Upa"))] ++ "TARGET_SVE" ++ "rdffrs\t%0.b, %1/z" ++) ++ ++;; Read the FFR with zero predication and test the result. ++(define_insn "*aarch64_rdffr_z_cc" ++ [(set (reg:CC_NZC CC_REGNUM) ++ (unspec:CC_NZC ++ [(match_operand:VNx16BI 1 "register_operand" "Upa") ++ (match_dup 1) ++ (match_operand:SI 2 "aarch64_sve_ptrue_flag") ++ (and:VNx16BI ++ (reg:VNx16BI FFRT_REGNUM) ++ (match_dup 1))] ++ UNSPEC_PTEST)) ++ (set (match_operand:VNx16BI 0 "register_operand" "=Upa") ++ (and:VNx16BI ++ (reg:VNx16BI FFRT_REGNUM) ++ (match_dup 1)))] ++ "TARGET_SVE" ++ "rdffrs\t%0.b, %1/z" ++) ++ ++;; Same for unpredicated RDFFR when tested with a known PTRUE. ++(define_insn "*aarch64_rdffr_cc" ++ [(set (reg:CC_NZC CC_REGNUM) ++ (unspec:CC_NZC ++ [(match_operand:VNx16BI 1 "register_operand" "Upa") ++ (match_dup 1) ++ (const_int SVE_KNOWN_PTRUE) ++ (reg:VNx16BI FFRT_REGNUM)] ++ UNSPEC_PTEST)) ++ (set (match_operand:VNx16BI 0 "register_operand" "=Upa") ++ (reg:VNx16BI FFRT_REGNUM))] ++ "TARGET_SVE" ++ "rdffrs\t%0.b, %1/z" ++) ++ ++;; [R3 in the block comment above about FFR handling] ++;; ++;; Arbitrarily update the FFRT after a read from the FFR. This acts as ++;; a scheduling barrier for later LDFF1 and LDNF1 instructions. ++(define_insn "aarch64_update_ffrt" ++ [(set (reg:VNx16BI FFRT_REGNUM) ++ (unspec:VNx16BI [(reg:VNx16BI FFRT_REGNUM)] UNSPEC_UPDATE_FFRT))] + "TARGET_SVE" +- { +- rtx tmp = gen_reg_rtx (DImode); +- rtx op1 = gen_lowpart (DImode, operands[1]); +- emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode))); +- emit_insn (gen_while_ultdi (operands[0], const0_rtx, tmp)); +- DONE; +- } +-) +- +-(define_insn "vec_series" +- [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w") +- (vec_series:SVE_I +- (match_operand: 1 "aarch64_sve_index_operand" "Usi, r, r") +- (match_operand: 2 "aarch64_sve_index_operand" "r, Usi, r")))] ++ "" ++ [(set_attr "type" "no_insn")] ++) ++ ++;; ========================================================================= ++;; == Loads ++;; ========================================================================= ++ ++;; ------------------------------------------------------------------------- ++;; ---- Normal contiguous loads ++;; ------------------------------------------------------------------------- ++;; Includes contiguous forms of: ++;; - LD1B ++;; - LD1D ++;; - LD1H ++;; - LD1W ++;; - LD2B ++;; - LD2D ++;; - LD2H ++;; - LD2W ++;; - LD3B ++;; - LD3D ++;; - LD3H ++;; - LD3W ++;; - LD4B ++;; - LD4D ++;; - LD4H ++;; - LD4W ++;; ------------------------------------------------------------------------- ++ ++;; Predicated LD1. ++(define_insn "maskload" ++ [(set (match_operand:SVE_FULL 0 "register_operand" "=w") ++ (unspec:SVE_FULL ++ [(match_operand: 2 "register_operand" "Upl") ++ (match_operand:SVE_FULL 1 "memory_operand" "m")] ++ UNSPEC_LD1_SVE))] + "TARGET_SVE" +- "@ +- index\t%0., #%1, %2 +- index\t%0., %1, #%2 +- index\t%0., %1, %2" +-) +- +-;; Optimize {x, x, x, x, ...} + {0, n, 2*n, 3*n, ...} if n is in range +-;; of an INDEX instruction. +-(define_insn "*vec_series_plus" +- [(set (match_operand:SVE_I 0 "register_operand" "=w") +- (plus:SVE_I +- (vec_duplicate:SVE_I +- (match_operand: 1 "register_operand" "r")) +- (match_operand:SVE_I 2 "immediate_operand")))] +- "TARGET_SVE && aarch64_check_zero_based_sve_index_immediate (operands[2])" +- { +- operands[2] = aarch64_check_zero_based_sve_index_immediate (operands[2]); +- return "index\t%0., %1, #%2"; +- } ++ "ld1\t%0., %2/z, %1" + ) + + ;; Unpredicated LD[234]. +@@ -744,7 +1163,7 @@ + UNSPEC_LDN))] + "TARGET_SVE" + { +- operands[2] = force_reg (mode, CONSTM1_RTX (mode)); ++ operands[2] = aarch64_ptrue_reg (mode); + } + ) + +@@ -759,884 +1178,5373 @@ + "ld\t%0, %2/z, %1" + ) + +-;; Unpredicated ST[234]. This is always a full update, so the dependence +-;; on the old value of the memory location (via (match_dup 0)) is redundant. +-;; There doesn't seem to be any obvious benefit to treating the all-true +-;; case differently though. In particular, it's very unlikely that we'll +-;; only find out during RTL that a store_lanes is dead. +-(define_expand "vec_store_lanes" +- [(set (match_operand:SVE_STRUCT 0 "memory_operand") +- (unspec:SVE_STRUCT +- [(match_dup 2) +- (match_operand:SVE_STRUCT 1 "register_operand") +- (match_dup 0)] +- UNSPEC_STN))] ++;; ------------------------------------------------------------------------- ++;; ---- Extending contiguous loads ++;; ------------------------------------------------------------------------- ++;; Includes contiguous forms of: ++;; LD1B ++;; LD1H ++;; LD1SB ++;; LD1SH ++;; LD1SW ++;; LD1W ++;; ------------------------------------------------------------------------- ++ ++;; Predicated load and extend, with 8 elements per 128-bit block. ++(define_insn "@aarch64_load_" ++ [(set (match_operand:VNx8_WIDE 0 "register_operand" "=w") ++ (ANY_EXTEND:VNx8_WIDE ++ (unspec:VNx8_NARROW ++ [(match_operand:VNx8BI 2 "register_operand" "Upl") ++ (match_operand:VNx8_NARROW 1 "memory_operand" "m")] ++ UNSPEC_LD1_SVE)))] ++ "TARGET_SVE" ++ "ld1\t%0., %2/z, %1" ++) ++ ++;; Predicated load and extend, with 4 elements per 128-bit block. ++(define_insn "@aarch64_load_" ++ [(set (match_operand:VNx4_WIDE 0 "register_operand" "=w") ++ (ANY_EXTEND:VNx4_WIDE ++ (unspec:VNx4_NARROW ++ [(match_operand:VNx4BI 2 "register_operand" "Upl") ++ (match_operand:VNx4_NARROW 1 "memory_operand" "m")] ++ UNSPEC_LD1_SVE)))] ++ "TARGET_SVE" ++ "ld1\t%0., %2/z, %1" ++) ++ ++;; Predicated load and extend, with 2 elements per 128-bit block. ++(define_insn "@aarch64_load_" ++ [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w") ++ (ANY_EXTEND:VNx2_WIDE ++ (unspec:VNx2_NARROW ++ [(match_operand:VNx2BI 2 "register_operand" "Upl") ++ (match_operand:VNx2_NARROW 1 "memory_operand" "m")] ++ UNSPEC_LD1_SVE)))] ++ "TARGET_SVE" ++ "ld1\t%0., %2/z, %1" ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- First-faulting contiguous loads ++;; ------------------------------------------------------------------------- ++;; Includes contiguous forms of: ++;; - LDFF1B ++;; - LDFF1D ++;; - LDFF1H ++;; - LDFF1W ++;; - LDNF1B ++;; - LDNF1D ++;; - LDNF1H ++;; - LDNF1W ++;; ------------------------------------------------------------------------- ++ ++;; Contiguous non-extending first-faulting or non-faulting loads. ++(define_insn "@aarch64_ldf1" ++ [(set (match_operand:SVE_FULL 0 "register_operand" "=w") ++ (unspec:SVE_FULL ++ [(match_operand: 2 "register_operand" "Upl") ++ (match_operand:SVE_FULL 1 "aarch64_sve_ldf1_operand" "Ut") ++ (reg:VNx16BI FFRT_REGNUM)] ++ SVE_LDFF1_LDNF1))] ++ "TARGET_SVE" ++ "ldf1\t%0., %2/z, %1" ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- First-faulting extending contiguous loads ++;; ------------------------------------------------------------------------- ++;; Includes contiguous forms of: ++;; - LDFF1B ++;; - LDFF1H ++;; - LDFF1SB ++;; - LDFF1SH ++;; - LDFF1SW ++;; - LDFF1W ++;; - LDNF1B ++;; - LDNF1H ++;; - LDNF1SB ++;; - LDNF1SH ++;; - LDNF1SW ++;; - LDNF1W ++;; ------------------------------------------------------------------------- ++ ++;; Predicated first-faulting or non-faulting load and extend, with 8 elements ++;; per 128-bit block. ++(define_insn "@aarch64_ldf1_" ++ [(set (match_operand:VNx8_WIDE 0 "register_operand" "=w") ++ (ANY_EXTEND:VNx8_WIDE ++ (unspec:VNx8_NARROW ++ [(match_operand:VNx8BI 2 "register_operand" "Upl") ++ (match_operand:VNx8_NARROW 1 "aarch64_sve_ldf1_operand" "Ut") ++ (reg:VNx16BI FFRT_REGNUM)] ++ SVE_LDFF1_LDNF1)))] ++ "TARGET_SVE" ++ "ldf1\t%0., %2/z, %1" ++) ++ ++;; Predicated first-faulting or non-faulting load and extend, with 4 elements ++;; per 128-bit block. ++(define_insn "@aarch64_ldf1_" ++ [(set (match_operand:VNx4_WIDE 0 "register_operand" "=w") ++ (ANY_EXTEND:VNx4_WIDE ++ (unspec:VNx4_NARROW ++ [(match_operand:VNx4BI 2 "register_operand" "Upl") ++ (match_operand:VNx4_NARROW 1 "aarch64_sve_ldf1_operand" "Ut") ++ (reg:VNx16BI FFRT_REGNUM)] ++ SVE_LDFF1_LDNF1)))] ++ "TARGET_SVE" ++ "ldf1\t%0., %2/z, %1" ++) ++ ++;; Predicated first-faulting or non-faulting load and extend, with 2 elements ++;; per 128-bit block. ++(define_insn "@aarch64_ldf1_" ++ [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w") ++ (ANY_EXTEND:VNx2_WIDE ++ (unspec:VNx2_NARROW ++ [(match_operand:VNx2BI 2 "register_operand" "Upl") ++ (match_operand:VNx2_NARROW 1 "aarch64_sve_ldf1_operand" "Ut") ++ (reg:VNx16BI FFRT_REGNUM)] ++ SVE_LDFF1_LDNF1)))] ++ "TARGET_SVE" ++ "ldf1\t%0., %2/z, %1" ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- Non-temporal contiguous loads ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - LDNT1B ++;; - LDNT1D ++;; - LDNT1H ++;; - LDNT1W ++;; ------------------------------------------------------------------------- ++ ++;; Predicated contiguous non-temporal load. ++(define_insn "@aarch64_ldnt1" ++ [(set (match_operand:SVE_FULL 0 "register_operand" "=w") ++ (unspec:SVE_FULL ++ [(match_operand: 2 "register_operand" "Upl") ++ (match_operand:SVE_FULL 1 "memory_operand" "m")] ++ UNSPEC_LDNT1_SVE))] ++ "TARGET_SVE" ++ "ldnt1\t%0., %2/z, %1" ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- Normal gather loads ++;; ------------------------------------------------------------------------- ++;; Includes gather forms of: ++;; - LD1D ++;; - LD1W ++;; ------------------------------------------------------------------------- ++ ++;; Unpredicated gather loads. ++(define_expand "gather_load" ++ [(set (match_operand:SVE_FULL_SD 0 "register_operand") ++ (unspec:SVE_FULL_SD ++ [(match_dup 5) ++ (match_operand:DI 1 "aarch64_sve_gather_offset_") ++ (match_operand: 2 "register_operand") ++ (match_operand:DI 3 "const_int_operand") ++ (match_operand:DI 4 "aarch64_gather_scale_operand_") ++ (mem:BLK (scratch))] ++ UNSPEC_LD1_GATHER))] ++ "TARGET_SVE" ++ { ++ operands[5] = aarch64_ptrue_reg (mode); ++ } ++) ++ ++;; Predicated gather loads for 32-bit elements. Operand 3 is true for ++;; unsigned extension and false for signed extension. ++(define_insn "mask_gather_load" ++ [(set (match_operand:SVE_FULL_S 0 "register_operand" "=w, w, w, w, w, w") ++ (unspec:SVE_FULL_S ++ [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") ++ (match_operand:DI 1 "aarch64_sve_gather_offset_w" "Z, vgw, rk, rk, rk, rk") ++ (match_operand:VNx4SI 2 "register_operand" "w, w, w, w, w, w") ++ (match_operand:DI 3 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1") ++ (match_operand:DI 4 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, Ui1, i, i") ++ (mem:BLK (scratch))] ++ UNSPEC_LD1_GATHER))] ++ "TARGET_SVE" ++ "@ ++ ld1w\t%0.s, %5/z, [%2.s] ++ ld1w\t%0.s, %5/z, [%2.s, #%1] ++ ld1w\t%0.s, %5/z, [%1, %2.s, sxtw] ++ ld1w\t%0.s, %5/z, [%1, %2.s, uxtw] ++ ld1w\t%0.s, %5/z, [%1, %2.s, sxtw %p4] ++ ld1w\t%0.s, %5/z, [%1, %2.s, uxtw %p4]" ++) ++ ++;; Predicated gather loads for 64-bit elements. The value of operand 3 ++;; doesn't matter in this case. ++(define_insn "mask_gather_load" ++ [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w, w, w") ++ (unspec:SVE_FULL_D ++ [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl") ++ (match_operand:DI 1 "aarch64_sve_gather_offset_d" "Z, vgd, rk, rk") ++ (match_operand:VNx2DI 2 "register_operand" "w, w, w, w") ++ (match_operand:DI 3 "const_int_operand") ++ (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, Ui1, Ui1, i") ++ (mem:BLK (scratch))] ++ UNSPEC_LD1_GATHER))] + "TARGET_SVE" ++ "@ ++ ld1d\t%0.d, %5/z, [%2.d] ++ ld1d\t%0.d, %5/z, [%2.d, #%1] ++ ld1d\t%0.d, %5/z, [%1, %2.d] ++ ld1d\t%0.d, %5/z, [%1, %2.d, lsl %p4]" ++) ++ ++;; Likewise, but with the offset being sign-extended from 32 bits. ++(define_insn "*mask_gather_load_sxtw" ++ [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w") ++ (unspec:SVE_FULL_D ++ [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") ++ (match_operand:DI 1 "register_operand" "rk, rk") ++ (unspec:VNx2DI ++ [(match_dup 5) ++ (sign_extend:VNx2DI ++ (truncate:VNx2SI ++ (match_operand:VNx2DI 2 "register_operand" "w, w")))] ++ UNSPEC_PRED_X) ++ (match_operand:DI 3 "const_int_operand") ++ (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, i") ++ (mem:BLK (scratch))] ++ UNSPEC_LD1_GATHER))] ++ "TARGET_SVE" ++ "@ ++ ld1d\t%0.d, %5/z, [%1, %2.d, sxtw] ++ ld1d\t%0.d, %5/z, [%1, %2.d, sxtw %p4]" ++) ++ ++;; Likewise, but with the offset being zero-extended from 32 bits. ++(define_insn "*mask_gather_load_uxtw" ++ [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w") ++ (unspec:SVE_FULL_D ++ [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") ++ (match_operand:DI 1 "register_operand" "rk, rk") ++ (and:VNx2DI ++ (match_operand:VNx2DI 2 "register_operand" "w, w") ++ (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate")) ++ (match_operand:DI 3 "const_int_operand") ++ (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, i") ++ (mem:BLK (scratch))] ++ UNSPEC_LD1_GATHER))] ++ "TARGET_SVE" ++ "@ ++ ld1d\t%0.d, %5/z, [%1, %2.d, uxtw] ++ ld1d\t%0.d, %5/z, [%1, %2.d, uxtw %p4]" ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- Extending gather loads ++;; ------------------------------------------------------------------------- ++;; Includes gather forms of: ++;; - LD1B ++;; - LD1H ++;; - LD1SB ++;; - LD1SH ++;; - LD1SW ++;; - LD1W ++;; ------------------------------------------------------------------------- ++ ++;; Predicated extending gather loads for 32-bit elements. Operand 3 is ++;; true for unsigned extension and false for signed extension. ++(define_insn "@aarch64_gather_load_" ++ [(set (match_operand:VNx4_WIDE 0 "register_operand" "=w, w, w, w, w, w") ++ (ANY_EXTEND:VNx4_WIDE ++ (unspec:VNx4_NARROW ++ [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") ++ (match_operand:DI 1 "aarch64_sve_gather_offset_" "Z, vg, rk, rk, rk, rk") ++ (match_operand:VNx4_WIDE 2 "register_operand" "w, w, w, w, w, w") ++ (match_operand:DI 3 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1") ++ (match_operand:DI 4 "aarch64_gather_scale_operand_" "Ui1, Ui1, Ui1, Ui1, i, i") ++ (mem:BLK (scratch))] ++ UNSPEC_LD1_GATHER)))] ++ "TARGET_SVE" ++ "@ ++ ld1\t%0.s, %5/z, [%2.s] ++ ld1\t%0.s, %5/z, [%2.s, #%1] ++ ld1\t%0.s, %5/z, [%1, %2.s, sxtw] ++ ld1\t%0.s, %5/z, [%1, %2.s, uxtw] ++ ld1\t%0.s, %5/z, [%1, %2.s, sxtw %p4] ++ ld1\t%0.s, %5/z, [%1, %2.s, uxtw %p4]" ++) ++ ++;; Predicated extending gather loads for 64-bit elements. The value of ++;; operand 3 doesn't matter in this case. ++(define_insn "@aarch64_gather_load_" ++ [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w, w, w") ++ (ANY_EXTEND:VNx2_WIDE ++ (unspec:VNx2_NARROW ++ [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl") ++ (match_operand:DI 1 "aarch64_sve_gather_offset_" "Z, vg, rk, rk") ++ (match_operand:VNx2_WIDE 2 "register_operand" "w, w, w, w") ++ (match_operand:DI 3 "const_int_operand") ++ (match_operand:DI 4 "aarch64_gather_scale_operand_" "Ui1, Ui1, Ui1, i") ++ (mem:BLK (scratch))] ++ UNSPEC_LD1_GATHER)))] ++ "TARGET_SVE" ++ "@ ++ ld1\t%0.d, %5/z, [%2.d] ++ ld1\t%0.d, %5/z, [%2.d, #%1] ++ ld1\t%0.d, %5/z, [%1, %2.d] ++ ld1\t%0.d, %5/z, [%1, %2.d, lsl %p4]" ++) ++ ++;; Likewise, but with the offset being sign-extended from 32 bits. ++(define_insn_and_rewrite "*aarch64_gather_load__sxtw" ++ [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w") ++ (ANY_EXTEND:VNx2_WIDE ++ (unspec:VNx2_NARROW ++ [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") ++ (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk") ++ (unspec:VNx2DI ++ [(match_operand 6) ++ (sign_extend:VNx2DI ++ (truncate:VNx2SI ++ (match_operand:VNx2DI 2 "register_operand" "w, w")))] ++ UNSPEC_PRED_X) ++ (match_operand:DI 3 "const_int_operand") ++ (match_operand:DI 4 "aarch64_gather_scale_operand_" "Ui1, i") ++ (mem:BLK (scratch))] ++ UNSPEC_LD1_GATHER)))] ++ "TARGET_SVE" ++ "@ ++ ld1\t%0.d, %5/z, [%1, %2.d, sxtw] ++ ld1\t%0.d, %5/z, [%1, %2.d, sxtw %p4]" ++ "&& !rtx_equal_p (operands[5], operands[6])" ++ { ++ operands[6] = copy_rtx (operands[5]); ++ } ++) ++ ++;; Likewise, but with the offset being zero-extended from 32 bits. ++(define_insn "*aarch64_gather_load__uxtw" ++ [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w") ++ (ANY_EXTEND:VNx2_WIDE ++ (unspec:VNx2_NARROW ++ [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") ++ (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk") ++ (and:VNx2DI ++ (match_operand:VNx2DI 2 "register_operand" "w, w") ++ (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate")) ++ (match_operand:DI 3 "const_int_operand") ++ (match_operand:DI 4 "aarch64_gather_scale_operand_" "Ui1, i") ++ (mem:BLK (scratch))] ++ UNSPEC_LD1_GATHER)))] ++ "TARGET_SVE" ++ "@ ++ ld1\t%0.d, %5/z, [%1, %2.d, uxtw] ++ ld1\t%0.d, %5/z, [%1, %2.d, uxtw %p4]" ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- First-faulting gather loads ++;; ------------------------------------------------------------------------- ++;; Includes gather forms of: ++;; - LDFF1D ++;; - LDFF1W ++;; ------------------------------------------------------------------------- ++ ++;; Predicated first-faulting gather loads for 32-bit elements. Operand ++;; 3 is true for unsigned extension and false for signed extension. ++(define_insn "@aarch64_ldff1_gather" ++ [(set (match_operand:SVE_FULL_S 0 "register_operand" "=w, w, w, w, w, w") ++ (unspec:SVE_FULL_S ++ [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") ++ (match_operand:DI 1 "aarch64_sve_gather_offset_w" "Z, vgw, rk, rk, rk, rk") ++ (match_operand:VNx4SI 2 "register_operand" "w, w, w, w, w, w") ++ (match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1") ++ (match_operand:DI 4 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, Ui1, i, i") ++ (mem:BLK (scratch)) ++ (reg:VNx16BI FFRT_REGNUM)] ++ UNSPEC_LDFF1_GATHER))] ++ "TARGET_SVE" ++ "@ ++ ldff1w\t%0.s, %5/z, [%2.s] ++ ldff1w\t%0.s, %5/z, [%2.s, #%1] ++ ldff1w\t%0.s, %5/z, [%1, %2.s, sxtw] ++ ldff1w\t%0.s, %5/z, [%1, %2.s, uxtw] ++ ldff1w\t%0.s, %5/z, [%1, %2.s, sxtw %p4] ++ ldff1w\t%0.s, %5/z, [%1, %2.s, uxtw %p4]" ++) ++ ++;; Predicated first-faulting gather loads for 64-bit elements. The value ++;; of operand 3 doesn't matter in this case. ++(define_insn "@aarch64_ldff1_gather" ++ [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w, w, w") ++ (unspec:SVE_FULL_D ++ [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl") ++ (match_operand:DI 1 "aarch64_sve_gather_offset_d" "Z, vgd, rk, rk") ++ (match_operand:VNx2DI 2 "register_operand" "w, w, w, w") ++ (match_operand:DI 3 "const_int_operand") ++ (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, Ui1, Ui1, i") ++ (mem:BLK (scratch)) ++ (reg:VNx16BI FFRT_REGNUM)] ++ UNSPEC_LDFF1_GATHER))] ++ "TARGET_SVE" ++ "@ ++ ldff1d\t%0.d, %5/z, [%2.d] ++ ldff1d\t%0.d, %5/z, [%2.d, #%1] ++ ldff1d\t%0.d, %5/z, [%1, %2.d] ++ ldff1d\t%0.d, %5/z, [%1, %2.d, lsl %p4]" ++) ++ ++;; Likewise, but with the offset being sign-extended from 32 bits. ++(define_insn_and_rewrite "*aarch64_ldff1_gather_sxtw" ++ [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w") ++ (unspec:SVE_FULL_D ++ [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") ++ (match_operand:DI 1 "register_operand" "rk, rk") ++ (unspec:VNx2DI ++ [(match_operand 6) ++ (sign_extend:VNx2DI ++ (truncate:VNx2SI ++ (match_operand:VNx2DI 2 "register_operand" "w, w")))] ++ UNSPEC_PRED_X) ++ (match_operand:DI 3 "const_int_operand") ++ (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, i") ++ (mem:BLK (scratch)) ++ (reg:VNx16BI FFRT_REGNUM)] ++ UNSPEC_LDFF1_GATHER))] ++ "TARGET_SVE" ++ "@ ++ ldff1d\t%0.d, %5/z, [%1, %2.d, sxtw] ++ ldff1d\t%0.d, %5/z, [%1, %2.d, sxtw %p4]" ++ "&& !rtx_equal_p (operands[5], operands[6])" ++ { ++ operands[6] = copy_rtx (operands[5]); ++ } ++) ++ ++;; Likewise, but with the offset being zero-extended from 32 bits. ++(define_insn "*aarch64_ldff1_gather_uxtw" ++ [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w") ++ (unspec:SVE_FULL_D ++ [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") ++ (match_operand:DI 1 "register_operand" "rk, rk") ++ (and:VNx2DI ++ (match_operand:VNx2DI 2 "register_operand" "w, w") ++ (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate")) ++ (match_operand:DI 3 "const_int_operand") ++ (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, i") ++ (mem:BLK (scratch)) ++ (reg:VNx16BI FFRT_REGNUM)] ++ UNSPEC_LDFF1_GATHER))] ++ "TARGET_SVE" ++ "@ ++ ldff1d\t%0.d, %5/z, [%1, %2.d, uxtw] ++ ldff1d\t%0.d, %5/z, [%1, %2.d, uxtw %p4]" ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- First-faulting extending gather loads ++;; ------------------------------------------------------------------------- ++;; Includes gather forms of: ++;; - LDFF1B ++;; - LDFF1H ++;; - LDFF1SB ++;; - LDFF1SH ++;; - LDFF1SW ++;; - LDFF1W ++;; ------------------------------------------------------------------------- ++ ++;; Predicated extending first-faulting gather loads for 32-bit elements. ++;; Operand 3 is true for unsigned extension and false for signed extension. ++(define_insn "@aarch64_ldff1_gather_" ++ [(set (match_operand:VNx4_WIDE 0 "register_operand" "=w, w, w, w, w, w") ++ (ANY_EXTEND:VNx4_WIDE ++ (unspec:VNx4_NARROW ++ [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") ++ (match_operand:DI 1 "aarch64_sve_gather_offset_" "Z, vg, rk, rk, rk, rk") ++ (match_operand:VNx4_WIDE 2 "register_operand" "w, w, w, w, w, w") ++ (match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1") ++ (match_operand:DI 4 "aarch64_gather_scale_operand_" "Ui1, Ui1, Ui1, Ui1, i, i") ++ (mem:BLK (scratch)) ++ (reg:VNx16BI FFRT_REGNUM)] ++ UNSPEC_LDFF1_GATHER)))] ++ "TARGET_SVE" ++ "@ ++ ldff1\t%0.s, %5/z, [%2.s] ++ ldff1\t%0.s, %5/z, [%2.s, #%1] ++ ldff1\t%0.s, %5/z, [%1, %2.s, sxtw] ++ ldff1\t%0.s, %5/z, [%1, %2.s, uxtw] ++ ldff1\t%0.s, %5/z, [%1, %2.s, sxtw %p4] ++ ldff1\t%0.s, %5/z, [%1, %2.s, uxtw %p4]" ++) ++ ++;; Predicated extending first-faulting gather loads for 64-bit elements. ++;; The value of operand 3 doesn't matter in this case. ++(define_insn "@aarch64_ldff1_gather_" ++ [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w, w, w") ++ (ANY_EXTEND:VNx2_WIDE ++ (unspec:VNx2_NARROW ++ [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl") ++ (match_operand:DI 1 "aarch64_sve_gather_offset_" "Z, vg, rk, rk") ++ (match_operand:VNx2_WIDE 2 "register_operand" "w, w, w, w") ++ (match_operand:DI 3 "const_int_operand") ++ (match_operand:DI 4 "aarch64_gather_scale_operand_" "Ui1, Ui1, Ui1, i") ++ (mem:BLK (scratch)) ++ (reg:VNx16BI FFRT_REGNUM)] ++ UNSPEC_LDFF1_GATHER)))] ++ "TARGET_SVE" ++ "@ ++ ldff1\t%0.d, %5/z, [%2.d] ++ ldff1\t%0.d, %5/z, [%2.d, #%1] ++ ldff1\t%0.d, %5/z, [%1, %2.d] ++ ldff1\t%0.d, %5/z, [%1, %2.d, lsl %p4]" ++) ++ ++;; Likewise, but with the offset being sign-extended from 32 bits. ++(define_insn_and_rewrite "*aarch64_ldff1_gather__sxtw" ++ [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w") ++ (ANY_EXTEND:VNx2_WIDE ++ (unspec:VNx2_NARROW ++ [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") ++ (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk") ++ (unspec:VNx2DI ++ [(match_operand 6) ++ (sign_extend:VNx2DI ++ (truncate:VNx2SI ++ (match_operand:VNx2DI 2 "register_operand" "w, w")))] ++ UNSPEC_PRED_X) ++ (match_operand:DI 3 "const_int_operand") ++ (match_operand:DI 4 "aarch64_gather_scale_operand_" "Ui1, i") ++ (mem:BLK (scratch)) ++ (reg:VNx16BI FFRT_REGNUM)] ++ UNSPEC_LDFF1_GATHER)))] ++ "TARGET_SVE" ++ "@ ++ ldff1\t%0.d, %5/z, [%1, %2.d, sxtw] ++ ldff1\t%0.d, %5/z, [%1, %2.d, sxtw %p4]" ++ "&& !rtx_equal_p (operands[5], operands[6])" ++ { ++ operands[6] = copy_rtx (operands[5]); ++ } ++) ++ ++;; Likewise, but with the offset being zero-extended from 32 bits. ++(define_insn "*aarch64_ldff1_gather__uxtw" ++ [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w") ++ (ANY_EXTEND:VNx2_WIDE ++ (unspec:VNx2_NARROW ++ [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") ++ (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk") ++ (and:VNx2DI ++ (match_operand:VNx2DI 2 "register_operand" "w, w") ++ (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate")) ++ (match_operand:DI 3 "const_int_operand") ++ (match_operand:DI 4 "aarch64_gather_scale_operand_" "Ui1, i") ++ (mem:BLK (scratch)) ++ (reg:VNx16BI FFRT_REGNUM)] ++ UNSPEC_LDFF1_GATHER)))] ++ "TARGET_SVE" ++ "@ ++ ldff1\t%0.d, %5/z, [%1, %2.d, uxtw] ++ ldff1\t%0.d, %5/z, [%1, %2.d, uxtw %p4]" ++) ++ ++;; ========================================================================= ++;; == Prefetches ++;; ========================================================================= ++ ++;; ------------------------------------------------------------------------- ++;; ---- Contiguous prefetches ++;; ------------------------------------------------------------------------- ++;; Includes contiguous forms of: ++;; - PRFB ++;; - PRFD ++;; - PRFH ++;; - PRFW ++;; ------------------------------------------------------------------------- ++ ++;; Contiguous predicated prefetches. Operand 2 gives the real prefetch ++;; operation (as an svprfop), with operands 3 and 4 providing distilled ++;; information. ++(define_insn "@aarch64_sve_prefetch" ++ [(prefetch (unspec:DI ++ [(match_operand: 0 "register_operand" "Upl") ++ (match_operand:SVE_FULL_I 1 "aarch64_sve_prefetch_operand" "UP") ++ (match_operand:DI 2 "const_int_operand")] ++ UNSPEC_SVE_PREFETCH) ++ (match_operand:DI 3 "const_int_operand") ++ (match_operand:DI 4 "const_int_operand"))] ++ "TARGET_SVE" ++ { ++ operands[1] = gen_rtx_MEM (mode, operands[1]); ++ return aarch64_output_sve_prefetch ("prf", operands[2], "%0, %1"); ++ } ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- Gather prefetches ++;; ------------------------------------------------------------------------- ++;; Includes gather forms of: ++;; - PRFB ++;; - PRFD ++;; - PRFH ++;; - PRFW ++;; ------------------------------------------------------------------------- ++ ++;; Predicated gather prefetches for 32-bit bases and offsets. The operands ++;; are: ++;; 0: the governing predicate ++;; 1: the scalar component of the address ++;; 2: the vector component of the address ++;; 3: 1 for zero extension, 0 for sign extension ++;; 4: the scale multiplier ++;; 5: a vector zero that identifies the mode of data being accessed ++;; 6: the prefetch operator (an svprfop) ++;; 7: the normal RTL prefetch rw flag ++;; 8: the normal RTL prefetch locality value ++(define_insn "@aarch64_sve_gather_prefetch" ++ [(prefetch (unspec:DI ++ [(match_operand:VNx4BI 0 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") ++ (match_operand:DI 1 "aarch64_sve_gather_offset_" "Z, vg, rk, rk, rk, rk") ++ (match_operand:VNx4SI_ONLY 2 "register_operand" "w, w, w, w, w, w") ++ (match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1") ++ (match_operand:DI 4 "aarch64_gather_scale_operand_" "Ui1, Ui1, Ui1, Ui1, i, i") ++ (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero") ++ (match_operand:DI 6 "const_int_operand")] ++ UNSPEC_SVE_PREFETCH_GATHER) ++ (match_operand:DI 7 "const_int_operand") ++ (match_operand:DI 8 "const_int_operand"))] ++ "TARGET_SVE" ++ { ++ static const char *const insns[][2] = { ++ "prf", "%0, [%2.s]", ++ "prf", "%0, [%2.s, #%1]", ++ "prfb", "%0, [%1, %2.s, sxtw]", ++ "prfb", "%0, [%1, %2.s, uxtw]", ++ "prf", "%0, [%1, %2.s, sxtw %p4]", ++ "prf", "%0, [%1, %2.s, uxtw %p4]" ++ }; ++ const char *const *parts = insns[which_alternative]; ++ return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]); ++ } ++) ++ ++;; Predicated gather prefetches for 64-bit elements. The value of operand 3 ++;; doesn't matter in this case. ++(define_insn "@aarch64_sve_gather_prefetch" ++ [(prefetch (unspec:DI ++ [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl, Upl, Upl") ++ (match_operand:DI 1 "aarch64_sve_gather_offset_" "Z, vg, rk, rk") ++ (match_operand:VNx2DI_ONLY 2 "register_operand" "w, w, w, w") ++ (match_operand:DI 3 "const_int_operand") ++ (match_operand:DI 4 "aarch64_gather_scale_operand_" "Ui1, Ui1, Ui1, i") ++ (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero") ++ (match_operand:DI 6 "const_int_operand")] ++ UNSPEC_SVE_PREFETCH_GATHER) ++ (match_operand:DI 7 "const_int_operand") ++ (match_operand:DI 8 "const_int_operand"))] ++ "TARGET_SVE" ++ { ++ static const char *const insns[][2] = { ++ "prf", "%0, [%2.d]", ++ "prf", "%0, [%2.d, #%1]", ++ "prfb", "%0, [%1, %2.d]", ++ "prf", "%0, [%1, %2.d, lsl %p4]" ++ }; ++ const char *const *parts = insns[which_alternative]; ++ return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]); ++ } ++) ++ ++;; Likewise, but with the offset being sign-extended from 32 bits. ++(define_insn_and_rewrite "*aarch64_sve_gather_prefetch_sxtw" ++ [(prefetch (unspec:DI ++ [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl") ++ (match_operand:DI 1 "register_operand" "rk, rk") ++ (unspec:VNx2DI_ONLY ++ [(match_operand 9) ++ (sign_extend:VNx2DI ++ (truncate:VNx2SI ++ (match_operand:VNx2DI 2 "register_operand" "w, w")))] ++ UNSPEC_PRED_X) ++ (match_operand:DI 3 "const_int_operand") ++ (match_operand:DI 4 "aarch64_gather_scale_operand_" "Ui1, i") ++ (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero") ++ (match_operand:DI 6 "const_int_operand")] ++ UNSPEC_SVE_PREFETCH_GATHER) ++ (match_operand:DI 7 "const_int_operand") ++ (match_operand:DI 8 "const_int_operand"))] ++ "TARGET_SVE" ++ { ++ static const char *const insns[][2] = { ++ "prfb", "%0, [%1, %2.d, sxtw]", ++ "prf", "%0, [%1, %2.d, sxtw %p4]" ++ }; ++ const char *const *parts = insns[which_alternative]; ++ return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]); ++ } ++ "&& !rtx_equal_p (operands[0], operands[9])" ++ { ++ operands[9] = copy_rtx (operands[0]); ++ } ++) ++ ++;; Likewise, but with the offset being zero-extended from 32 bits. ++(define_insn "*aarch64_sve_gather_prefetch_uxtw" ++ [(prefetch (unspec:DI ++ [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl") ++ (match_operand:DI 1 "register_operand" "rk, rk") ++ (and:VNx2DI_ONLY ++ (match_operand:VNx2DI 2 "register_operand" "w, w") ++ (match_operand:VNx2DI 9 "aarch64_sve_uxtw_immediate")) ++ (match_operand:DI 3 "const_int_operand") ++ (match_operand:DI 4 "aarch64_gather_scale_operand_" "Ui1, i") ++ (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero") ++ (match_operand:DI 6 "const_int_operand")] ++ UNSPEC_SVE_PREFETCH_GATHER) ++ (match_operand:DI 7 "const_int_operand") ++ (match_operand:DI 8 "const_int_operand"))] ++ "TARGET_SVE" ++ { ++ static const char *const insns[][2] = { ++ "prfb", "%0, [%1, %2.d, uxtw]", ++ "prf", "%0, [%1, %2.d, uxtw %p4]" ++ }; ++ const char *const *parts = insns[which_alternative]; ++ return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]); ++ } ++) ++ ++;; ========================================================================= ++;; == Stores ++;; ========================================================================= ++ ++;; ------------------------------------------------------------------------- ++;; ---- Normal contiguous stores ++;; ------------------------------------------------------------------------- ++;; Includes contiguous forms of: ++;; - ST1B ++;; - ST1D ++;; - ST1H ++;; - ST1W ++;; - ST2B ++;; - ST2D ++;; - ST2H ++;; - ST2W ++;; - ST3B ++;; - ST3D ++;; - ST3H ++;; - ST3W ++;; - ST4B ++;; - ST4D ++;; - ST4H ++;; - ST4W ++;; ------------------------------------------------------------------------- ++ ++;; Predicated ST1. ++(define_insn "maskstore" ++ [(set (match_operand:SVE_FULL 0 "memory_operand" "+m") ++ (unspec:SVE_FULL ++ [(match_operand: 2 "register_operand" "Upl") ++ (match_operand:SVE_FULL 1 "register_operand" "w") ++ (match_dup 0)] ++ UNSPEC_ST1_SVE))] ++ "TARGET_SVE" ++ "st1\t%1., %2, %0" ++) ++ ++;; Unpredicated ST[234]. This is always a full update, so the dependence ++;; on the old value of the memory location (via (match_dup 0)) is redundant. ++;; There doesn't seem to be any obvious benefit to treating the all-true ++;; case differently though. In particular, it's very unlikely that we'll ++;; only find out during RTL that a store_lanes is dead. ++(define_expand "vec_store_lanes" ++ [(set (match_operand:SVE_STRUCT 0 "memory_operand") ++ (unspec:SVE_STRUCT ++ [(match_dup 2) ++ (match_operand:SVE_STRUCT 1 "register_operand") ++ (match_dup 0)] ++ UNSPEC_STN))] ++ "TARGET_SVE" ++ { ++ operands[2] = aarch64_ptrue_reg (mode); ++ } ++) ++ ++;; Predicated ST[234]. ++(define_insn "vec_mask_store_lanes" ++ [(set (match_operand:SVE_STRUCT 0 "memory_operand" "+m") ++ (unspec:SVE_STRUCT ++ [(match_operand: 2 "register_operand" "Upl") ++ (match_operand:SVE_STRUCT 1 "register_operand" "w") ++ (match_dup 0)] ++ UNSPEC_STN))] ++ "TARGET_SVE" ++ "st\t%1, %2, %0" ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- Truncating contiguous stores ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - ST1B ++;; - ST1H ++;; - ST1W ++;; ------------------------------------------------------------------------- ++ ++;; Predicated truncate and store, with 8 elements per 128-bit block. ++(define_insn "@aarch64_store_trunc" ++ [(set (match_operand:VNx8_NARROW 0 "memory_operand" "+m") ++ (unspec:VNx8_NARROW ++ [(match_operand:VNx8BI 2 "register_operand" "Upl") ++ (truncate:VNx8_NARROW ++ (match_operand:VNx8_WIDE 1 "register_operand" "w")) ++ (match_dup 0)] ++ UNSPEC_ST1_SVE))] ++ "TARGET_SVE" ++ "st1\t%1., %2, %0" ++) ++ ++;; Predicated truncate and store, with 4 elements per 128-bit block. ++(define_insn "@aarch64_store_trunc" ++ [(set (match_operand:VNx4_NARROW 0 "memory_operand" "+m") ++ (unspec:VNx4_NARROW ++ [(match_operand:VNx4BI 2 "register_operand" "Upl") ++ (truncate:VNx4_NARROW ++ (match_operand:VNx4_WIDE 1 "register_operand" "w")) ++ (match_dup 0)] ++ UNSPEC_ST1_SVE))] ++ "TARGET_SVE" ++ "st1\t%1., %2, %0" ++) ++ ++;; Predicated truncate and store, with 2 elements per 128-bit block. ++(define_insn "@aarch64_store_trunc" ++ [(set (match_operand:VNx2_NARROW 0 "memory_operand" "+m") ++ (unspec:VNx2_NARROW ++ [(match_operand:VNx2BI 2 "register_operand" "Upl") ++ (truncate:VNx2_NARROW ++ (match_operand:VNx2_WIDE 1 "register_operand" "w")) ++ (match_dup 0)] ++ UNSPEC_ST1_SVE))] ++ "TARGET_SVE" ++ "st1\t%1., %2, %0" ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- Non-temporal contiguous stores ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - STNT1B ++;; - STNT1D ++;; - STNT1H ++;; - STNT1W ++;; ------------------------------------------------------------------------- ++ ++(define_insn "@aarch64_stnt1" ++ [(set (match_operand:SVE_FULL 0 "memory_operand" "+m") ++ (unspec:SVE_FULL ++ [(match_operand: 2 "register_operand" "Upl") ++ (match_operand:SVE_FULL 1 "register_operand" "w") ++ (match_dup 0)] ++ UNSPEC_STNT1_SVE))] ++ "TARGET_SVE" ++ "stnt1\t%1., %2, %0" ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- Normal scatter stores ++;; ------------------------------------------------------------------------- ++;; Includes scatter forms of: ++;; - ST1D ++;; - ST1W ++;; ------------------------------------------------------------------------- ++ ++;; Unpredicated scatter stores. ++(define_expand "scatter_store" ++ [(set (mem:BLK (scratch)) ++ (unspec:BLK ++ [(match_dup 5) ++ (match_operand:DI 0 "aarch64_sve_gather_offset_") ++ (match_operand: 1 "register_operand") ++ (match_operand:DI 2 "const_int_operand") ++ (match_operand:DI 3 "aarch64_gather_scale_operand_") ++ (match_operand:SVE_FULL_SD 4 "register_operand")] ++ UNSPEC_ST1_SCATTER))] ++ "TARGET_SVE" ++ { ++ operands[5] = aarch64_ptrue_reg (mode); ++ } ++) ++ ++;; Predicated scatter stores for 32-bit elements. Operand 2 is true for ++;; unsigned extension and false for signed extension. ++(define_insn "mask_scatter_store" ++ [(set (mem:BLK (scratch)) ++ (unspec:BLK ++ [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") ++ (match_operand:DI 0 "aarch64_sve_gather_offset_w" "Z, vgw, rk, rk, rk, rk") ++ (match_operand:VNx4SI 1 "register_operand" "w, w, w, w, w, w") ++ (match_operand:DI 2 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1") ++ (match_operand:DI 3 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, Ui1, i, i") ++ (match_operand:SVE_FULL_S 4 "register_operand" "w, w, w, w, w, w")] ++ UNSPEC_ST1_SCATTER))] ++ "TARGET_SVE" ++ "@ ++ st1w\t%4.s, %5, [%1.s] ++ st1w\t%4.s, %5, [%1.s, #%0] ++ st1w\t%4.s, %5, [%0, %1.s, sxtw] ++ st1w\t%4.s, %5, [%0, %1.s, uxtw] ++ st1w\t%4.s, %5, [%0, %1.s, sxtw %p3] ++ st1w\t%4.s, %5, [%0, %1.s, uxtw %p3]" ++) ++ ++;; Predicated scatter stores for 64-bit elements. The value of operand 2 ++;; doesn't matter in this case. ++(define_insn "mask_scatter_store" ++ [(set (mem:BLK (scratch)) ++ (unspec:BLK ++ [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl") ++ (match_operand:DI 0 "aarch64_sve_gather_offset_d" "Z, vgd, rk, rk") ++ (match_operand:VNx2DI 1 "register_operand" "w, w, w, w") ++ (match_operand:DI 2 "const_int_operand") ++ (match_operand:DI 3 "aarch64_gather_scale_operand_d" "Ui1, Ui1, Ui1, i") ++ (match_operand:SVE_FULL_D 4 "register_operand" "w, w, w, w")] ++ UNSPEC_ST1_SCATTER))] ++ "TARGET_SVE" ++ "@ ++ st1d\t%4.d, %5, [%1.d] ++ st1d\t%4.d, %5, [%1.d, #%0] ++ st1d\t%4.d, %5, [%0, %1.d] ++ st1d\t%4.d, %5, [%0, %1.d, lsl %p3]" ++) ++ ++;; Likewise, but with the offset being sign-extended from 32 bits. ++(define_insn_and_rewrite "*mask_scatter_store_sxtw" ++ [(set (mem:BLK (scratch)) ++ (unspec:BLK ++ [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") ++ (match_operand:DI 0 "register_operand" "rk, rk") ++ (unspec:VNx2DI ++ [(match_operand 6) ++ (sign_extend:VNx2DI ++ (truncate:VNx2SI ++ (match_operand:VNx2DI 1 "register_operand" "w, w")))] ++ UNSPEC_PRED_X) ++ (match_operand:DI 2 "const_int_operand") ++ (match_operand:DI 3 "aarch64_gather_scale_operand_d" "Ui1, i") ++ (match_operand:SVE_FULL_D 4 "register_operand" "w, w")] ++ UNSPEC_ST1_SCATTER))] ++ "TARGET_SVE" ++ "@ ++ st1d\t%4.d, %5, [%0, %1.d, sxtw] ++ st1d\t%4.d, %5, [%0, %1.d, sxtw %p3]" ++ "&& !rtx_equal_p (operands[5], operands[6])" ++ { ++ operands[6] = copy_rtx (operands[5]); ++ } ++) ++ ++;; Likewise, but with the offset being zero-extended from 32 bits. ++(define_insn "*mask_scatter_store_uxtw" ++ [(set (mem:BLK (scratch)) ++ (unspec:BLK ++ [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") ++ (match_operand:DI 0 "aarch64_reg_or_zero" "rk, rk") ++ (and:VNx2DI ++ (match_operand:VNx2DI 1 "register_operand" "w, w") ++ (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate")) ++ (match_operand:DI 2 "const_int_operand") ++ (match_operand:DI 3 "aarch64_gather_scale_operand_d" "Ui1, i") ++ (match_operand:SVE_FULL_D 4 "register_operand" "w, w")] ++ UNSPEC_ST1_SCATTER))] ++ "TARGET_SVE" ++ "@ ++ st1d\t%4.d, %5, [%0, %1.d, uxtw] ++ st1d\t%4.d, %5, [%0, %1.d, uxtw %p3]" ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- Truncating scatter stores ++;; ------------------------------------------------------------------------- ++;; Includes scatter forms of: ++;; - ST1B ++;; - ST1H ++;; - ST1W ++;; ------------------------------------------------------------------------- ++ ++;; Predicated truncating scatter stores for 32-bit elements. Operand 2 is ++;; true for unsigned extension and false for signed extension. ++(define_insn "@aarch64_scatter_store_trunc" ++ [(set (mem:BLK (scratch)) ++ (unspec:BLK ++ [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") ++ (match_operand:DI 0 "aarch64_sve_gather_offset_" "Z, vg, rk, rk, rk, rk") ++ (match_operand:VNx4SI 1 "register_operand" "w, w, w, w, w, w") ++ (match_operand:DI 2 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1") ++ (match_operand:DI 3 "aarch64_gather_scale_operand_" "Ui1, Ui1, Ui1, Ui1, i, i") ++ (truncate:VNx4_NARROW ++ (match_operand:VNx4_WIDE 4 "register_operand" "w, w, w, w, w, w"))] ++ UNSPEC_ST1_SCATTER))] ++ "TARGET_SVE" ++ "@ ++ st1\t%4.s, %5, [%1.s] ++ st1\t%4.s, %5, [%1.s, #%0] ++ st1\t%4.s, %5, [%0, %1.s, sxtw] ++ st1\t%4.s, %5, [%0, %1.s, uxtw] ++ st1\t%4.s, %5, [%0, %1.s, sxtw %p3] ++ st1\t%4.s, %5, [%0, %1.s, uxtw %p3]" ++) ++ ++;; Predicated truncating scatter stores for 64-bit elements. The value of ++;; operand 2 doesn't matter in this case. ++(define_insn "@aarch64_scatter_store_trunc" ++ [(set (mem:BLK (scratch)) ++ (unspec:BLK ++ [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl") ++ (match_operand:DI 0 "aarch64_sve_gather_offset_" "Z, vg, rk, rk") ++ (match_operand:VNx2DI 1 "register_operand" "w, w, w, w") ++ (match_operand:DI 2 "const_int_operand") ++ (match_operand:DI 3 "aarch64_gather_scale_operand_" "Ui1, Ui1, Ui1, i") ++ (truncate:VNx2_NARROW ++ (match_operand:VNx2_WIDE 4 "register_operand" "w, w, w, w"))] ++ UNSPEC_ST1_SCATTER))] ++ "TARGET_SVE" ++ "@ ++ st1\t%4.d, %5, [%1.d] ++ st1\t%4.d, %5, [%1.d, #%0] ++ st1\t%4.d, %5, [%0, %1.d] ++ st1\t%4.d, %5, [%0, %1.d, lsl %p3]" ++) ++ ++;; Likewise, but with the offset being sign-extended from 32 bits. ++(define_insn_and_rewrite "*aarch64_scatter_store_trunc_sxtw" ++ [(set (mem:BLK (scratch)) ++ (unspec:BLK ++ [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") ++ (match_operand:DI 0 "register_operand" "rk, rk") ++ (unspec:VNx2DI ++ [(match_operand 6) ++ (sign_extend:VNx2DI ++ (truncate:VNx2SI ++ (match_operand:VNx2DI 1 "register_operand" "w, w")))] ++ UNSPEC_PRED_X) ++ (match_operand:DI 2 "const_int_operand") ++ (match_operand:DI 3 "aarch64_gather_scale_operand_" "Ui1, i") ++ (truncate:VNx2_NARROW ++ (match_operand:VNx2_WIDE 4 "register_operand" "w, w"))] ++ UNSPEC_ST1_SCATTER))] ++ "TARGET_SVE" ++ "@ ++ st1\t%4.d, %5, [%0, %1.d, sxtw] ++ st1\t%4.d, %5, [%0, %1.d, sxtw %p3]" ++ "&& !rtx_equal_p (operands[5], operands[6])" ++ { ++ operands[6] = copy_rtx (operands[5]); ++ } ++) ++ ++;; Likewise, but with the offset being zero-extended from 32 bits. ++(define_insn "*aarch64_scatter_store_trunc_uxtw" ++ [(set (mem:BLK (scratch)) ++ (unspec:BLK ++ [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") ++ (match_operand:DI 0 "aarch64_reg_or_zero" "rk, rk") ++ (and:VNx2DI ++ (match_operand:VNx2DI 1 "register_operand" "w, w") ++ (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate")) ++ (match_operand:DI 2 "const_int_operand") ++ (match_operand:DI 3 "aarch64_gather_scale_operand_" "Ui1, i") ++ (truncate:VNx2_NARROW ++ (match_operand:VNx2_WIDE 4 "register_operand" "w, w"))] ++ UNSPEC_ST1_SCATTER))] ++ "TARGET_SVE" ++ "@ ++ st1\t%4.d, %5, [%0, %1.d, uxtw] ++ st1\t%4.d, %5, [%0, %1.d, uxtw %p3]" ++) ++ ++;; ========================================================================= ++;; == Vector creation ++;; ========================================================================= ++ ++;; ------------------------------------------------------------------------- ++;; ---- [INT,FP] Duplicate element ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - DUP ++;; - MOV ++;; - LD1RB ++;; - LD1RD ++;; - LD1RH ++;; - LD1RW ++;; - LD1ROB (F64MM) ++;; - LD1ROD (F64MM) ++;; - LD1ROH (F64MM) ++;; - LD1ROW (F64MM) ++;; - LD1RQB ++;; - LD1RQD ++;; - LD1RQH ++;; - LD1RQW ++;; ------------------------------------------------------------------------- ++ ++(define_expand "vec_duplicate" ++ [(parallel ++ [(set (match_operand:SVE_FULL 0 "register_operand") ++ (vec_duplicate:SVE_FULL ++ (match_operand: 1 "aarch64_sve_dup_operand"))) ++ (clobber (scratch:VNx16BI))])] ++ "TARGET_SVE" ++ { ++ if (MEM_P (operands[1])) ++ { ++ rtx ptrue = aarch64_ptrue_reg (mode); ++ emit_insn (gen_sve_ld1r (operands[0], ptrue, operands[1], ++ CONST0_RTX (mode))); ++ DONE; ++ } ++ } ++) ++ ++;; Accept memory operands for the benefit of combine, and also in case ++;; the scalar input gets spilled to memory during RA. We want to split ++;; the load at the first opportunity in order to allow the PTRUE to be ++;; optimized with surrounding code. ++(define_insn_and_split "*vec_duplicate_reg" ++ [(set (match_operand:SVE_FULL 0 "register_operand" "=w, w, w") ++ (vec_duplicate:SVE_FULL ++ (match_operand: 1 "aarch64_sve_dup_operand" "r, w, Uty"))) ++ (clobber (match_scratch:VNx16BI 2 "=X, X, Upl"))] ++ "TARGET_SVE" ++ "@ ++ mov\t%0., %1 ++ mov\t%0., %1 ++ #" ++ "&& MEM_P (operands[1])" ++ [(const_int 0)] ++ { ++ if (GET_CODE (operands[2]) == SCRATCH) ++ operands[2] = gen_reg_rtx (VNx16BImode); ++ emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode)); ++ rtx gp = gen_lowpart (mode, operands[2]); ++ emit_insn (gen_sve_ld1r (operands[0], gp, operands[1], ++ CONST0_RTX (mode))); ++ DONE; ++ } ++ [(set_attr "length" "4,4,8")] ++) ++ ++;; Duplicate an Advanced SIMD vector to fill an SVE vector (LE version). ++(define_insn "@aarch64_vec_duplicate_vq_le" ++ [(set (match_operand:SVE_FULL 0 "register_operand" "=w") ++ (vec_duplicate:SVE_FULL ++ (match_operand: 1 "register_operand" "w")))] ++ "TARGET_SVE && !BYTES_BIG_ENDIAN" ++ { ++ operands[1] = gen_rtx_REG (mode, REGNO (operands[1])); ++ return "dup\t%0.q, %1.q[0]"; ++ } ++) ++ ++;; Duplicate an Advanced SIMD vector to fill an SVE vector (BE version). ++;; The SVE register layout puts memory lane N into (architectural) ++;; register lane N, whereas the Advanced SIMD layout puts the memory ++;; lsb into the register lsb. We therefore have to describe this in rtl ++;; terms as a reverse of the V128 vector followed by a duplicate. ++(define_insn "@aarch64_vec_duplicate_vq_be" ++ [(set (match_operand:SVE_FULL 0 "register_operand" "=w") ++ (vec_duplicate:SVE_FULL ++ (vec_select: ++ (match_operand: 1 "register_operand" "w") ++ (match_operand 2 "descending_int_parallel"))))] ++ "TARGET_SVE ++ && BYTES_BIG_ENDIAN ++ && known_eq (INTVAL (XVECEXP (operands[2], 0, 0)), ++ GET_MODE_NUNITS (mode) - 1)" ++ { ++ operands[1] = gen_rtx_REG (mode, REGNO (operands[1])); ++ return "dup\t%0.q, %1.q[0]"; ++ } ++) ++ ++;; This is used for vec_duplicates from memory, but can also ++;; be used by combine to optimize selects of a a vec_duplicate ++;; with zero. ++(define_insn "sve_ld1r" ++ [(set (match_operand:SVE_FULL 0 "register_operand" "=w") ++ (unspec:SVE_FULL ++ [(match_operand: 1 "register_operand" "Upl") ++ (vec_duplicate:SVE_FULL ++ (match_operand: 2 "aarch64_sve_ld1r_operand" "Uty")) ++ (match_operand:SVE_FULL 3 "aarch64_simd_imm_zero")] ++ UNSPEC_SEL))] ++ "TARGET_SVE" ++ "ld1r\t%0., %1/z, %2" ++) ++ ++;; Load 128 bits from memory under predicate control and duplicate to ++;; fill a vector. ++(define_insn "@aarch64_sve_ld1rq" ++ [(set (match_operand:SVE_FULL 0 "register_operand" "=w") ++ (unspec:SVE_FULL ++ [(match_operand: 2 "register_operand" "Upl") ++ (match_operand: 1 "aarch64_sve_ld1rq_operand" "UtQ")] ++ UNSPEC_LD1RQ))] ++ "TARGET_SVE" ++ { ++ operands[1] = gen_rtx_MEM (mode, XEXP (operands[1], 0)); ++ return "ld1rq\t%0., %2/z, %1"; ++ } ++) ++ ++(define_insn "@aarch64_sve_ld1ro" ++ [(set (match_operand:SVE_FULL 0 "register_operand" "=w") ++ (unspec:SVE_FULL ++ [(match_operand: 2 "register_operand" "Upl") ++ (match_operand:OI 1 "aarch64_sve_ld1ro_operand_" ++ "UO")] ++ UNSPEC_LD1RO))] ++ "TARGET_SVE_F64MM" ++ { ++ operands[1] = gen_rtx_MEM (mode, XEXP (operands[1], 0)); ++ return "ld1ro\t%0., %2/z, %1"; ++ } ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- [INT,FP] Initialize from individual elements ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - INSR ++;; ------------------------------------------------------------------------- ++ ++(define_expand "vec_init" ++ [(match_operand:SVE_FULL 0 "register_operand") ++ (match_operand 1 "")] ++ "TARGET_SVE" ++ { ++ aarch64_sve_expand_vector_init (operands[0], operands[1]); ++ DONE; ++ } ++) ++ ++;; Shift an SVE vector left and insert a scalar into element 0. ++(define_insn "vec_shl_insert_" ++ [(set (match_operand:SVE_FULL 0 "register_operand" "=?w, w, ??&w, ?&w") ++ (unspec:SVE_FULL ++ [(match_operand:SVE_FULL 1 "register_operand" "0, 0, w, w") ++ (match_operand: 2 "aarch64_reg_or_zero" "rZ, w, rZ, w")] ++ UNSPEC_INSR))] ++ "TARGET_SVE" ++ "@ ++ insr\t%0., %2 ++ insr\t%0., %2 ++ movprfx\t%0, %1\;insr\t%0., %2 ++ movprfx\t%0, %1\;insr\t%0., %2" ++ [(set_attr "movprfx" "*,*,yes,yes")] ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- [INT] Linear series ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - INDEX ++;; ------------------------------------------------------------------------- ++ ++(define_insn "vec_series" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, w") ++ (vec_series:SVE_FULL_I ++ (match_operand: 1 "aarch64_sve_index_operand" "Usi, r, r") ++ (match_operand: 2 "aarch64_sve_index_operand" "r, Usi, r")))] ++ "TARGET_SVE" ++ "@ ++ index\t%0., #%1, %2 ++ index\t%0., %1, #%2 ++ index\t%0., %1, %2" ++) ++ ++;; Optimize {x, x, x, x, ...} + {0, n, 2*n, 3*n, ...} if n is in range ++;; of an INDEX instruction. ++(define_insn "*vec_series_plus" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w") ++ (plus:SVE_FULL_I ++ (vec_duplicate:SVE_FULL_I ++ (match_operand: 1 "register_operand" "r")) ++ (match_operand:SVE_FULL_I 2 "immediate_operand")))] ++ "TARGET_SVE && aarch64_check_zero_based_sve_index_immediate (operands[2])" ++ { ++ operands[2] = aarch64_check_zero_based_sve_index_immediate (operands[2]); ++ return "index\t%0., %1, #%2"; ++ } ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- [PRED] Duplicate element ++;; ------------------------------------------------------------------------- ++;; The patterns in this section are synthetic. ++;; ------------------------------------------------------------------------- ++ ++;; Implement a predicate broadcast by shifting the low bit of the scalar ++;; input into the top bit and using a WHILELO. An alternative would be to ++;; duplicate the input and do a compare with zero. ++(define_expand "vec_duplicate" ++ [(set (match_operand:PRED_ALL 0 "register_operand") ++ (vec_duplicate:PRED_ALL (match_operand:QI 1 "register_operand")))] ++ "TARGET_SVE" ++ { ++ rtx tmp = gen_reg_rtx (DImode); ++ rtx op1 = gen_lowpart (DImode, operands[1]); ++ emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode))); ++ emit_insn (gen_while_ultdi (operands[0], const0_rtx, tmp)); ++ DONE; ++ } ++) ++ ++;; ========================================================================= ++;; == Vector decomposition ++;; ========================================================================= ++ ++;; ------------------------------------------------------------------------- ++;; ---- [INT,FP] Extract index ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - DUP (Advanced SIMD) ++;; - DUP (SVE) ++;; - EXT (SVE) ++;; - ST1 (Advanced SIMD) ++;; - UMOV (Advanced SIMD) ++;; ------------------------------------------------------------------------- ++ ++(define_expand "vec_extract" ++ [(set (match_operand: 0 "register_operand") ++ (vec_select: ++ (match_operand:SVE_FULL 1 "register_operand") ++ (parallel [(match_operand:SI 2 "nonmemory_operand")])))] ++ "TARGET_SVE" ++ { ++ poly_int64 val; ++ if (poly_int_rtx_p (operands[2], &val) ++ && known_eq (val, GET_MODE_NUNITS (mode) - 1)) ++ { ++ /* The last element can be extracted with a LASTB and a false ++ predicate. */ ++ rtx sel = aarch64_pfalse_reg (mode); ++ emit_insn (gen_extract_last_ (operands[0], sel, operands[1])); ++ DONE; ++ } ++ if (!CONST_INT_P (operands[2])) ++ { ++ /* Create an index with operand[2] as the base and -1 as the step. ++ It will then be zero for the element we care about. */ ++ rtx index = gen_lowpart (mode, operands[2]); ++ index = force_reg (mode, index); ++ rtx series = gen_reg_rtx (mode); ++ emit_insn (gen_vec_series (series, index, constm1_rtx)); ++ ++ /* Get a predicate that is true for only that element. */ ++ rtx zero = CONST0_RTX (mode); ++ rtx cmp = gen_rtx_EQ (mode, series, zero); ++ rtx sel = gen_reg_rtx (mode); ++ emit_insn (gen_vec_cmp (sel, cmp, series, zero)); ++ ++ /* Select the element using LASTB. */ ++ emit_insn (gen_extract_last_ (operands[0], sel, operands[1])); ++ DONE; ++ } ++ } ++) ++ ++;; Extract element zero. This is a special case because we want to force ++;; the registers to be the same for the second alternative, and then ++;; split the instruction into nothing after RA. ++(define_insn_and_split "*vec_extract_0" ++ [(set (match_operand: 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv") ++ (vec_select: ++ (match_operand:SVE_FULL 1 "register_operand" "w, 0, w") ++ (parallel [(const_int 0)])))] ++ "TARGET_SVE" ++ { ++ operands[1] = gen_rtx_REG (mode, REGNO (operands[1])); ++ switch (which_alternative) ++ { ++ case 0: ++ return "umov\\t%0, %1.[0]"; ++ case 1: ++ return "#"; ++ case 2: ++ return "st1\\t{%1.}[0], %0"; ++ default: ++ gcc_unreachable (); ++ } ++ } ++ "&& reload_completed ++ && REG_P (operands[0]) ++ && REGNO (operands[0]) == REGNO (operands[1])" ++ [(const_int 0)] ++ { ++ emit_note (NOTE_INSN_DELETED); ++ DONE; ++ } ++ [(set_attr "type" "neon_to_gp_q, untyped, neon_store1_one_lane_q")] ++) ++ ++;; Extract an element from the Advanced SIMD portion of the register. ++;; We don't just reuse the aarch64-simd.md pattern because we don't ++;; want any change in lane number on big-endian targets. ++(define_insn "*vec_extract_v128" ++ [(set (match_operand: 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv") ++ (vec_select: ++ (match_operand:SVE_FULL 1 "register_operand" "w, w, w") ++ (parallel [(match_operand:SI 2 "const_int_operand")])))] ++ "TARGET_SVE ++ && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (mode), 1, 15)" ++ { ++ operands[1] = gen_rtx_REG (mode, REGNO (operands[1])); ++ switch (which_alternative) ++ { ++ case 0: ++ return "umov\\t%0, %1.[%2]"; ++ case 1: ++ return "dup\\t%0, %1.[%2]"; ++ case 2: ++ return "st1\\t{%1.}[%2], %0"; ++ default: ++ gcc_unreachable (); ++ } ++ } ++ [(set_attr "type" "neon_to_gp_q, neon_dup_q, neon_store1_one_lane_q")] ++) ++ ++;; Extract an element in the range of DUP. This pattern allows the ++;; source and destination to be different. ++(define_insn "*vec_extract_dup" ++ [(set (match_operand: 0 "register_operand" "=w") ++ (vec_select: ++ (match_operand:SVE_FULL 1 "register_operand" "w") ++ (parallel [(match_operand:SI 2 "const_int_operand")])))] ++ "TARGET_SVE ++ && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (mode), 16, 63)" ++ { ++ operands[0] = gen_rtx_REG (mode, REGNO (operands[0])); ++ return "dup\t%0., %1.[%2]"; ++ } ++) ++ ++;; Extract an element outside the range of DUP. This pattern requires the ++;; source and destination to be the same. ++(define_insn "*vec_extract_ext" ++ [(set (match_operand: 0 "register_operand" "=w, ?&w") ++ (vec_select: ++ (match_operand:SVE_FULL 1 "register_operand" "0, w") ++ (parallel [(match_operand:SI 2 "const_int_operand")])))] ++ "TARGET_SVE && INTVAL (operands[2]) * GET_MODE_SIZE (mode) >= 64" ++ { ++ operands[0] = gen_rtx_REG (mode, REGNO (operands[0])); ++ operands[2] = GEN_INT (INTVAL (operands[2]) * GET_MODE_SIZE (mode)); ++ return (which_alternative == 0 ++ ? "ext\t%0.b, %0.b, %0.b, #%2" ++ : "movprfx\t%0, %1\;ext\t%0.b, %0.b, %1.b, #%2"); ++ } ++ [(set_attr "movprfx" "*,yes")] ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- [INT,FP] Extract active element ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - LASTA ++;; - LASTB ++;; ------------------------------------------------------------------------- ++ ++;; Extract the last active element of operand 1 into operand 0. ++;; If no elements are active, extract the last inactive element instead. ++(define_insn "@extract__" ++ [(set (match_operand: 0 "register_operand" "=?r, w") ++ (unspec: ++ [(match_operand: 1 "register_operand" "Upl, Upl") ++ (match_operand:SVE_FULL 2 "register_operand" "w, w")] ++ LAST))] ++ "TARGET_SVE" ++ "@ ++ last\t%0, %1, %2. ++ last\t%0, %1, %2." ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- [PRED] Extract index ++;; ------------------------------------------------------------------------- ++;; The patterns in this section are synthetic. ++;; ------------------------------------------------------------------------- ++ ++;; Handle extractions from a predicate by converting to an integer vector ++;; and extracting from there. ++(define_expand "vec_extract" ++ [(match_operand: 0 "register_operand") ++ (match_operand: 1 "register_operand") ++ (match_operand:SI 2 "nonmemory_operand") ++ ;; Dummy operand to which we can attach the iterator. ++ (reg:SVE_FULL_I V0_REGNUM)] ++ "TARGET_SVE" ++ { ++ rtx tmp = gen_reg_rtx (mode); ++ emit_insn (gen_vcond_mask_ (tmp, operands[1], ++ CONST1_RTX (mode), ++ CONST0_RTX (mode))); ++ emit_insn (gen_vec_extract (operands[0], tmp, operands[2])); ++ DONE; ++ } ++) ++ ++;; ========================================================================= ++;; == Unary arithmetic ++;; ========================================================================= ++ ++;; ------------------------------------------------------------------------- ++;; ---- [INT] General unary arithmetic corresponding to rtx codes ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - ABS ++;; - CLS (= clrsb) ++;; - CLZ ++;; - CNT (= popcount) ++;; - NEG ++;; - NOT ++;; ------------------------------------------------------------------------- ++ ++;; Unpredicated integer unary arithmetic. ++(define_expand "2" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand") ++ (unspec:SVE_FULL_I ++ [(match_dup 2) ++ (SVE_INT_UNARY:SVE_FULL_I ++ (match_operand:SVE_FULL_I 1 "register_operand"))] ++ UNSPEC_PRED_X))] ++ "TARGET_SVE" ++ { ++ operands[2] = aarch64_ptrue_reg (mode); ++ } ++) ++ ++;; Integer unary arithmetic predicated with a PTRUE. ++(define_insn "@aarch64_pred_" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w") ++ (unspec:SVE_FULL_I ++ [(match_operand: 1 "register_operand" "Upl") ++ (SVE_INT_UNARY:SVE_FULL_I ++ (match_operand:SVE_FULL_I 2 "register_operand" "w"))] ++ UNSPEC_PRED_X))] ++ "TARGET_SVE" ++ "\t%0., %1/m, %2." ++) ++ ++;; Predicated integer unary arithmetic with merging. ++(define_expand "@cond_" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand") ++ (unspec:SVE_FULL_I ++ [(match_operand: 1 "register_operand") ++ (SVE_INT_UNARY:SVE_FULL_I ++ (match_operand:SVE_FULL_I 2 "register_operand")) ++ (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero")] ++ UNSPEC_SEL))] ++ "TARGET_SVE" ++) ++ ++;; Predicated integer unary arithmetic, merging with the first input. ++(define_insn "*cond__2" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") ++ (unspec:SVE_FULL_I ++ [(match_operand: 1 "register_operand" "Upl, Upl") ++ (SVE_INT_UNARY:SVE_FULL_I ++ (match_operand:SVE_FULL_I 2 "register_operand" "0, w")) ++ (match_dup 2)] ++ UNSPEC_SEL))] ++ "TARGET_SVE" ++ "@ ++ \t%0., %1/m, %0. ++ movprfx\t%0, %2\;\t%0., %1/m, %2." ++ [(set_attr "movprfx" "*,yes")] ++) ++ ++;; Predicated integer unary arithmetic, merging with an independent value. ++;; ++;; The earlyclobber isn't needed for the first alternative, but omitting ++;; it would only help the case in which operands 2 and 3 are the same, ++;; which is handled above rather than here. Marking all the alternatives ++;; as earlyclobber helps to make the instruction more regular to the ++;; register allocator. ++(define_insn "*cond__any" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, ?&w, ?&w") ++ (unspec:SVE_FULL_I ++ [(match_operand: 1 "register_operand" "Upl, Upl, Upl") ++ (SVE_INT_UNARY:SVE_FULL_I ++ (match_operand:SVE_FULL_I 2 "register_operand" "w, w, w")) ++ (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] ++ UNSPEC_SEL))] ++ "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])" ++ "@ ++ \t%0., %1/m, %2. ++ movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %2. ++ movprfx\t%0, %3\;\t%0., %1/m, %2." ++ [(set_attr "movprfx" "*,yes,yes")] ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- [INT] General unary arithmetic corresponding to unspecs ++;; ------------------------------------------------------------------------- ++;; Includes ++;; - RBIT ++;; - REVB ++;; - REVH ++;; - REVW ++;; ------------------------------------------------------------------------- ++ ++;; Predicated integer unary operations. ++(define_insn "@aarch64_pred_" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w") ++ (unspec:SVE_FULL_I ++ [(match_operand: 1 "register_operand" "Upl") ++ (unspec:SVE_FULL_I ++ [(match_operand:SVE_FULL_I 2 "register_operand" "w")] ++ SVE_INT_UNARY)] ++ UNSPEC_PRED_X))] ++ "TARGET_SVE && >= " ++ "\t%0., %1/m, %2." ++) ++ ++;; Predicated integer unary operations with merging. ++(define_insn "@cond_" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w, ?&w") ++ (unspec:SVE_FULL_I ++ [(match_operand: 1 "register_operand" "Upl, Upl, Upl") ++ (unspec:SVE_FULL_I ++ [(match_operand:SVE_FULL_I 2 "register_operand" "w, w, w")] ++ SVE_INT_UNARY) ++ (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] ++ UNSPEC_SEL))] ++ "TARGET_SVE && >= " ++ "@ ++ \t%0., %1/m, %2. ++ movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %2. ++ movprfx\t%0, %3\;\t%0., %1/m, %2." ++ [(set_attr "movprfx" "*,yes,yes")] ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- [INT] Sign extension ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - SXTB ++;; - SXTH ++;; - SXTW ++;; ------------------------------------------------------------------------- ++ ++;; Predicated SXT[BHW]. ++(define_insn "@aarch64_pred_sxt" ++ [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w") ++ (unspec:SVE_FULL_HSDI ++ [(match_operand: 1 "register_operand" "Upl") ++ (sign_extend:SVE_FULL_HSDI ++ (truncate:SVE_PARTIAL ++ (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")))] ++ UNSPEC_PRED_X))] ++ "TARGET_SVE && (~ & ) == 0" ++ "sxt\t%0., %1/m, %2." ++) ++ ++;; Predicated SXT[BHW] with merging. ++(define_insn "@aarch64_cond_sxt" ++ [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w, ?&w") ++ (unspec:SVE_FULL_HSDI ++ [(match_operand: 1 "register_operand" "Upl, Upl, Upl") ++ (sign_extend:SVE_FULL_HSDI ++ (truncate:SVE_PARTIAL ++ (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w, w"))) ++ (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] ++ UNSPEC_SEL))] ++ "TARGET_SVE && (~ & ) == 0" ++ "@ ++ sxt\t%0., %1/m, %2. ++ movprfx\t%0., %1/z, %2.\;sxt\t%0., %1/m, %2. ++ movprfx\t%0, %3\;sxt\t%0., %1/m, %2." ++ [(set_attr "movprfx" "*,yes,yes")] ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- [INT] Zero extension ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - UXTB ++;; - UXTH ++;; - UXTW ++;; ------------------------------------------------------------------------- ++ ++;; Match UXT[BHW] as a conditional AND of a constant, merging with the ++;; first input. ++(define_insn "*cond_uxt_2" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") ++ (unspec:SVE_FULL_I ++ [(match_operand: 1 "register_operand" "Upl, Upl") ++ (and:SVE_FULL_I ++ (match_operand:SVE_FULL_I 2 "register_operand" "0, w") ++ (match_operand:SVE_FULL_I 3 "aarch64_sve_uxt_immediate")) ++ (match_dup 2)] ++ UNSPEC_SEL))] ++ "TARGET_SVE" ++ "@ ++ uxt%e3\t%0., %1/m, %0. ++ movprfx\t%0, %2\;uxt%e3\t%0., %1/m, %2." ++ [(set_attr "movprfx" "*,yes")] ++) ++ ++;; Match UXT[BHW] as a conditional AND of a constant, merging with an ++;; independent value. ++;; ++;; The earlyclobber isn't needed for the first alternative, but omitting ++;; it would only help the case in which operands 2 and 4 are the same, ++;; which is handled above rather than here. Marking all the alternatives ++;; as early-clobber helps to make the instruction more regular to the ++;; register allocator. ++(define_insn "*cond_uxt_any" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, ?&w, ?&w") ++ (unspec:SVE_FULL_I ++ [(match_operand: 1 "register_operand" "Upl, Upl, Upl") ++ (and:SVE_FULL_I ++ (match_operand:SVE_FULL_I 2 "register_operand" "w, w, w") ++ (match_operand:SVE_FULL_I 3 "aarch64_sve_uxt_immediate")) ++ (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "0, Dz, w")] ++ UNSPEC_SEL))] ++ "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" ++ "@ ++ uxt%e3\t%0., %1/m, %2. ++ movprfx\t%0., %1/z, %2.\;uxt%e3\t%0., %1/m, %2. ++ movprfx\t%0, %4\;uxt%e3\t%0., %1/m, %2." ++ [(set_attr "movprfx" "*,yes,yes")] ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- [INT] Logical inverse ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - CNOT ++;; ------------------------------------------------------------------------- ++ ++;; Predicated logical inverse. ++(define_expand "@aarch64_pred_cnot" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand") ++ (unspec:SVE_FULL_I ++ [(unspec: ++ [(match_operand: 1 "register_operand") ++ (match_operand:SI 2 "aarch64_sve_ptrue_flag") ++ (eq: ++ (match_operand:SVE_FULL_I 3 "register_operand") ++ (match_dup 4))] ++ UNSPEC_PRED_Z) ++ (match_dup 5) ++ (match_dup 4)] ++ UNSPEC_SEL))] ++ "TARGET_SVE" ++ { ++ operands[4] = CONST0_RTX (mode); ++ operands[5] = CONST1_RTX (mode); ++ } ++) ++ ++(define_insn "*cnot" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w") ++ (unspec:SVE_FULL_I ++ [(unspec: ++ [(match_operand: 1 "register_operand" "Upl") ++ (match_operand:SI 5 "aarch64_sve_ptrue_flag") ++ (eq: ++ (match_operand:SVE_FULL_I 2 "register_operand" "w") ++ (match_operand:SVE_FULL_I 3 "aarch64_simd_imm_zero"))] ++ UNSPEC_PRED_Z) ++ (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_one") ++ (match_dup 3)] ++ UNSPEC_SEL))] ++ "TARGET_SVE" ++ "cnot\t%0., %1/m, %2." ++) ++ ++;; Predicated logical inverse with merging. ++(define_expand "@cond_cnot" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand") ++ (unspec:SVE_FULL_I ++ [(match_operand: 1 "register_operand") ++ (unspec:SVE_FULL_I ++ [(unspec: ++ [(match_dup 4) ++ (const_int SVE_KNOWN_PTRUE) ++ (eq: ++ (match_operand:SVE_FULL_I 2 "register_operand") ++ (match_dup 5))] ++ UNSPEC_PRED_Z) ++ (match_dup 6) ++ (match_dup 5)] ++ UNSPEC_SEL) ++ (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero")] ++ UNSPEC_SEL))] ++ "TARGET_SVE" ++ { ++ operands[4] = CONSTM1_RTX (mode); ++ operands[5] = CONST0_RTX (mode); ++ operands[6] = CONST1_RTX (mode); ++ } ++) ++ ++;; Predicated logical inverse, merging with the first input. ++(define_insn_and_rewrite "*cond_cnot_2" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") ++ (unspec:SVE_FULL_I ++ [(match_operand: 1 "register_operand" "Upl, Upl") ++ ;; Logical inverse of operand 2 (as above). ++ (unspec:SVE_FULL_I ++ [(unspec: ++ [(match_operand 5) ++ (const_int SVE_KNOWN_PTRUE) ++ (eq: ++ (match_operand:SVE_FULL_I 2 "register_operand" "0, w") ++ (match_operand:SVE_FULL_I 3 "aarch64_simd_imm_zero"))] ++ UNSPEC_PRED_Z) ++ (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_one") ++ (match_dup 3)] ++ UNSPEC_SEL) ++ (match_dup 2)] ++ UNSPEC_SEL))] ++ "TARGET_SVE" ++ "@ ++ cnot\t%0., %1/m, %0. ++ movprfx\t%0, %2\;cnot\t%0., %1/m, %2." ++ "&& !CONSTANT_P (operands[5])" ++ { ++ operands[5] = CONSTM1_RTX (mode); ++ } ++ [(set_attr "movprfx" "*,yes")] ++) ++ ++;; Predicated logical inverse, merging with an independent value. ++;; ++;; The earlyclobber isn't needed for the first alternative, but omitting ++;; it would only help the case in which operands 2 and 6 are the same, ++;; which is handled above rather than here. Marking all the alternatives ++;; as earlyclobber helps to make the instruction more regular to the ++;; register allocator. ++(define_insn_and_rewrite "*cond_cnot_any" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, ?&w, ?&w") ++ (unspec:SVE_FULL_I ++ [(match_operand: 1 "register_operand" "Upl, Upl, Upl") ++ ;; Logical inverse of operand 2 (as above). ++ (unspec:SVE_FULL_I ++ [(unspec: ++ [(match_operand 5) ++ (const_int SVE_KNOWN_PTRUE) ++ (eq: ++ (match_operand:SVE_FULL_I 2 "register_operand" "w, w, w") ++ (match_operand:SVE_FULL_I 3 "aarch64_simd_imm_zero"))] ++ UNSPEC_PRED_Z) ++ (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_one") ++ (match_dup 3)] ++ UNSPEC_SEL) ++ (match_operand:SVE_FULL_I 6 "aarch64_simd_reg_or_zero" "0, Dz, w")] ++ UNSPEC_SEL))] ++ "TARGET_SVE && !rtx_equal_p (operands[2], operands[6])" ++ "@ ++ cnot\t%0., %1/m, %2. ++ movprfx\t%0., %1/z, %2.\;cnot\t%0., %1/m, %2. ++ movprfx\t%0, %6\;cnot\t%0., %1/m, %2." ++ "&& !CONSTANT_P (operands[5])" ++ { ++ operands[5] = CONSTM1_RTX (mode); ++ } ++ [(set_attr "movprfx" "*,yes,yes")] ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- [FP<-INT] General unary arithmetic that maps to unspecs ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - FEXPA ++;; ------------------------------------------------------------------------- ++ ++;; Unpredicated unary operations that take an integer and return a float. ++(define_insn "@aarch64_sve_" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w") ++ (unspec:SVE_FULL_F ++ [(match_operand: 1 "register_operand" "w")] ++ SVE_FP_UNARY_INT))] ++ "TARGET_SVE" ++ "\t%0., %1." ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- [FP] General unary arithmetic corresponding to unspecs ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - FABS ++;; - FNEG ++;; - FRECPE ++;; - FRECPX ++;; - FRINTA ++;; - FRINTI ++;; - FRINTM ++;; - FRINTN ++;; - FRINTP ++;; - FRINTX ++;; - FRINTZ ++;; - FRSQRT ++;; - FSQRT ++;; ------------------------------------------------------------------------- ++ ++;; Unpredicated floating-point unary operations. ++(define_insn "@aarch64_sve_" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w") ++ (unspec:SVE_FULL_F ++ [(match_operand:SVE_FULL_F 1 "register_operand" "w")] ++ SVE_FP_UNARY))] ++ "TARGET_SVE" ++ "\t%0., %1." ++) ++ ++;; Unpredicated floating-point unary operations. ++(define_expand "2" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand") ++ (unspec:SVE_FULL_F ++ [(match_dup 2) ++ (const_int SVE_RELAXED_GP) ++ (match_operand:SVE_FULL_F 1 "register_operand")] ++ SVE_COND_FP_UNARY))] ++ "TARGET_SVE" ++ { ++ operands[2] = aarch64_ptrue_reg (mode); ++ } ++) ++ ++;; Predicated floating-point unary operations. ++(define_insn "@aarch64_pred_" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w") ++ (unspec:SVE_FULL_F ++ [(match_operand: 1 "register_operand" "Upl") ++ (match_operand:SI 3 "aarch64_sve_gp_strictness") ++ (match_operand:SVE_FULL_F 2 "register_operand" "w")] ++ SVE_COND_FP_UNARY))] ++ "TARGET_SVE" ++ "\t%0., %1/m, %2." ++) ++ ++;; Predicated floating-point unary arithmetic with merging. ++(define_expand "@cond_" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand") ++ (unspec:SVE_FULL_F ++ [(match_operand: 1 "register_operand") ++ (unspec:SVE_FULL_F ++ [(match_dup 1) ++ (const_int SVE_STRICT_GP) ++ (match_operand:SVE_FULL_F 2 "register_operand")] ++ SVE_COND_FP_UNARY) ++ (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")] ++ UNSPEC_SEL))] ++ "TARGET_SVE" ++) ++ ++;; Predicated floating-point unary arithmetic, merging with the first input. ++(define_insn_and_rewrite "*cond__2" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") ++ (unspec:SVE_FULL_F ++ [(match_operand: 1 "register_operand" "Upl, Upl") ++ (unspec:SVE_FULL_F ++ [(match_operand 3) ++ (match_operand:SI 4 "aarch64_sve_gp_strictness") ++ (match_operand:SVE_FULL_F 2 "register_operand" "0, w")] ++ SVE_COND_FP_UNARY) ++ (match_dup 2)] ++ UNSPEC_SEL))] ++ "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[3], operands[1])" ++ "@ ++ \t%0., %1/m, %0. ++ movprfx\t%0, %2\;\t%0., %1/m, %2." ++ "&& !rtx_equal_p (operands[1], operands[3])" ++ { ++ operands[3] = copy_rtx (operands[1]); ++ } ++ [(set_attr "movprfx" "*,yes")] ++) ++ ++;; Predicated floating-point unary arithmetic, merging with an independent ++;; value. ++;; ++;; The earlyclobber isn't needed for the first alternative, but omitting ++;; it would only help the case in which operands 2 and 3 are the same, ++;; which is handled above rather than here. Marking all the alternatives ++;; as earlyclobber helps to make the instruction more regular to the ++;; register allocator. ++(define_insn_and_rewrite "*cond__any" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, ?&w, ?&w") ++ (unspec:SVE_FULL_F ++ [(match_operand: 1 "register_operand" "Upl, Upl, Upl") ++ (unspec:SVE_FULL_F ++ [(match_operand 4) ++ (match_operand:SI 5 "aarch64_sve_gp_strictness") ++ (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")] ++ SVE_COND_FP_UNARY) ++ (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] ++ UNSPEC_SEL))] ++ "TARGET_SVE ++ && !rtx_equal_p (operands[2], operands[3]) ++ && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" ++ "@ ++ \t%0., %1/m, %2. ++ movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %2. ++ movprfx\t%0, %3\;\t%0., %1/m, %2." ++ "&& !rtx_equal_p (operands[1], operands[4])" ++ { ++ operands[4] = copy_rtx (operands[1]); ++ } ++ [(set_attr "movprfx" "*,yes,yes")] ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- [PRED] Inverse ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - NOT ++;; ------------------------------------------------------------------------- ++ ++;; Unpredicated predicate inverse. ++(define_expand "one_cmpl2" ++ [(set (match_operand:PRED_ALL 0 "register_operand") ++ (and:PRED_ALL ++ (not:PRED_ALL (match_operand:PRED_ALL 1 "register_operand")) ++ (match_dup 2)))] ++ "TARGET_SVE" ++ { ++ operands[2] = aarch64_ptrue_reg (mode); ++ } ++) ++ ++;; Predicated predicate inverse. ++(define_insn "*one_cmpl3" ++ [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") ++ (and:PRED_ALL ++ (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa")) ++ (match_operand:PRED_ALL 1 "register_operand" "Upa")))] ++ "TARGET_SVE" ++ "not\t%0.b, %1/z, %2.b" ++) ++ ++;; ========================================================================= ++;; == Binary arithmetic ++;; ========================================================================= ++ ++;; ------------------------------------------------------------------------- ++;; ---- [INT] General binary arithmetic corresponding to rtx codes ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - ADD (merging form only) ++;; - AND (merging form only) ++;; - ASR (merging form only) ++;; - EOR (merging form only) ++;; - LSL (merging form only) ++;; - LSR (merging form only) ++;; - MUL ++;; - ORR (merging form only) ++;; - SMAX ++;; - SMIN ++;; - SUB (merging form only) ++;; - UMAX ++;; - UMIN ++;; ------------------------------------------------------------------------- ++ ++;; Unpredicated integer binary operations that have an immediate form. ++(define_expand "3" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand") ++ (unspec:SVE_FULL_I ++ [(match_dup 3) ++ (SVE_INT_BINARY_IMM:SVE_FULL_I ++ (match_operand:SVE_FULL_I 1 "register_operand") ++ (match_operand:SVE_FULL_I 2 "aarch64_sve__operand"))] ++ UNSPEC_PRED_X))] ++ "TARGET_SVE" ++ { ++ operands[3] = aarch64_ptrue_reg (mode); ++ } ++) ++ ++;; Integer binary operations that have an immediate form, predicated ++;; with a PTRUE. We don't actually need the predicate for the first ++;; and third alternatives, but using Upa or X isn't likely to gain much ++;; and would make the instruction seem less uniform to the register ++;; allocator. ++(define_insn_and_split "@aarch64_pred_" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w, ?&w") ++ (unspec:SVE_FULL_I ++ [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl") ++ (SVE_INT_BINARY_IMM:SVE_FULL_I ++ (match_operand:SVE_FULL_I 2 "register_operand" "%0, 0, w, w") ++ (match_operand:SVE_FULL_I 3 "aarch64_sve__operand" ", w, , w"))] ++ UNSPEC_PRED_X))] ++ "TARGET_SVE" ++ "@ ++ # ++ \t%0., %1/m, %0., %3. ++ # ++ movprfx\t%0, %2\;\t%0., %1/m, %0., %3." ++ ; Split the unpredicated form after reload, so that we don't have ++ ; the unnecessary PTRUE. ++ "&& reload_completed ++ && !register_operand (operands[3], mode)" ++ [(set (match_dup 0) ++ (SVE_INT_BINARY_IMM:SVE_FULL_I (match_dup 2) (match_dup 3)))] ++ "" ++ [(set_attr "movprfx" "*,*,yes,yes")] ++) ++ ++;; Unpredicated binary operations with a constant (post-RA only). ++;; These are generated by splitting a predicated instruction whose ++;; predicate is unused. ++(define_insn "*post_ra_3" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") ++ (SVE_INT_BINARY_IMM:SVE_FULL_I ++ (match_operand:SVE_FULL_I 1 "register_operand" "0, w") ++ (match_operand:SVE_FULL_I 2 "aarch64_sve__immediate")))] ++ "TARGET_SVE && reload_completed" ++ "@ ++ \t%0., %0., #%2 ++ movprfx\t%0, %1\;\t%0., %0., #%2" ++ [(set_attr "movprfx" "*,yes")] ++) ++ ++;; Predicated integer operations with merging. ++(define_expand "@cond_" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand") ++ (unspec:SVE_FULL_I ++ [(match_operand: 1 "register_operand") ++ (SVE_INT_BINARY:SVE_FULL_I ++ (match_operand:SVE_FULL_I 2 "register_operand") ++ (match_operand:SVE_FULL_I 3 "")) ++ (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")] ++ UNSPEC_SEL))] ++ "TARGET_SVE" ++) ++ ++;; Predicated integer operations, merging with the first input. ++(define_insn "*cond__2" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") ++ (unspec:SVE_FULL_I ++ [(match_operand: 1 "register_operand" "Upl, Upl") ++ (SVE_INT_BINARY:SVE_FULL_I ++ (match_operand:SVE_FULL_I 2 "register_operand" "0, w") ++ (match_operand:SVE_FULL_I 3 "register_operand" "w, w")) ++ (match_dup 2)] ++ UNSPEC_SEL))] ++ "TARGET_SVE" ++ "@ ++ \t%0., %1/m, %0., %3. ++ movprfx\t%0, %2\;\t%0., %1/m, %0., %3." ++ [(set_attr "movprfx" "*,yes")] ++) ++ ++;; Predicated integer operations, merging with the second input. ++(define_insn "*cond__3" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") ++ (unspec:SVE_FULL_I ++ [(match_operand: 1 "register_operand" "Upl, Upl") ++ (SVE_INT_BINARY:SVE_FULL_I ++ (match_operand:SVE_FULL_I 2 "register_operand" "w, w") ++ (match_operand:SVE_FULL_I 3 "register_operand" "0, w")) ++ (match_dup 3)] ++ UNSPEC_SEL))] ++ "TARGET_SVE" ++ "@ ++ \t%0., %1/m, %0., %2. ++ movprfx\t%0, %3\;\t%0., %1/m, %0., %2." ++ [(set_attr "movprfx" "*,yes")] ++) ++ ++;; Predicated integer operations, merging with an independent value. ++(define_insn_and_rewrite "*cond__any" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, ?&w") ++ (unspec:SVE_FULL_I ++ [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") ++ (SVE_INT_BINARY:SVE_FULL_I ++ (match_operand:SVE_FULL_I 2 "register_operand" "0, w, w, w, w") ++ (match_operand:SVE_FULL_I 3 "register_operand" "w, 0, w, w, w")) ++ (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")] ++ UNSPEC_SEL))] ++ "TARGET_SVE ++ && !rtx_equal_p (operands[2], operands[4]) ++ && !rtx_equal_p (operands[3], operands[4])" ++ "@ ++ movprfx\t%0., %1/z, %0.\;\t%0., %1/m, %0., %3. ++ movprfx\t%0., %1/z, %0.\;\t%0., %1/m, %0., %2. ++ movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %0., %3. ++ movprfx\t%0., %1/m, %2.\;\t%0., %1/m, %0., %3. ++ #" ++ "&& reload_completed ++ && register_operand (operands[4], mode) ++ && !rtx_equal_p (operands[0], operands[4])" ++ { ++ emit_insn (gen_vcond_mask_ (operands[0], operands[2], ++ operands[4], operands[1])); ++ operands[4] = operands[2] = operands[0]; ++ } ++ [(set_attr "movprfx" "yes")] ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- [INT] Addition ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - ADD ++;; - DECB ++;; - DECD ++;; - DECH ++;; - DECW ++;; - INCB ++;; - INCD ++;; - INCH ++;; - INCW ++;; - SUB ++;; ------------------------------------------------------------------------- ++ ++(define_insn "add3" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, w, ?w, ?w, w") ++ (plus:SVE_FULL_I ++ (match_operand:SVE_FULL_I 1 "register_operand" "%0, 0, 0, w, w, w") ++ (match_operand:SVE_FULL_I 2 "aarch64_sve_add_operand" "vsa, vsn, vsi, vsa, vsn, w")))] ++ "TARGET_SVE" ++ "@ ++ add\t%0., %0., #%D2 ++ sub\t%0., %0., #%N2 ++ * return aarch64_output_sve_vector_inc_dec (\"%0.\", operands[2]); ++ movprfx\t%0, %1\;add\t%0., %0., #%D2 ++ movprfx\t%0, %1\;sub\t%0., %0., #%N2 ++ add\t%0., %1., %2." ++ [(set_attr "movprfx" "*,*,*,yes,yes,*")] ++) ++ ++;; Merging forms are handled through SVE_INT_BINARY. ++ ++;; ------------------------------------------------------------------------- ++;; ---- [INT] Subtraction ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - SUB ++;; - SUBR ++;; ------------------------------------------------------------------------- ++ ++(define_insn "sub3" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w") ++ (minus:SVE_FULL_I ++ (match_operand:SVE_FULL_I 1 "aarch64_sve_arith_operand" "w, vsa, vsa") ++ (match_operand:SVE_FULL_I 2 "register_operand" "w, 0, w")))] ++ "TARGET_SVE" ++ "@ ++ sub\t%0., %1., %2. ++ subr\t%0., %0., #%D1 ++ movprfx\t%0, %2\;subr\t%0., %0., #%D1" ++ [(set_attr "movprfx" "*,*,yes")] ++) ++ ++;; Merging forms are handled through SVE_INT_BINARY. ++ ++;; ------------------------------------------------------------------------- ++;; ---- [INT] Take address ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - ADR ++;; ------------------------------------------------------------------------- ++ ++;; An unshifted and unscaled ADR. This is functionally equivalent to an ADD, ++;; but the svadrb intrinsics should preserve the user's choice. ++(define_insn "@aarch64_adr" ++ [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w") ++ (unspec:SVE_FULL_SDI ++ [(match_operand:SVE_FULL_SDI 1 "register_operand" "w") ++ (match_operand:SVE_FULL_SDI 2 "register_operand" "w")] ++ UNSPEC_ADR))] ++ "TARGET_SVE" ++ "adr\t%0., [%1., %2.]" ++) ++ ++;; Same, but with the offset being sign-extended from the low 32 bits. ++(define_insn_and_rewrite "*aarch64_adr_sxtw" ++ [(set (match_operand:VNx2DI 0 "register_operand" "=w") ++ (unspec:VNx2DI ++ [(match_operand:VNx2DI 1 "register_operand" "w") ++ (unspec:VNx2DI ++ [(match_operand 3) ++ (sign_extend:VNx2DI ++ (truncate:VNx2SI ++ (match_operand:VNx2DI 2 "register_operand" "w")))] ++ UNSPEC_PRED_X)] ++ UNSPEC_ADR))] ++ "TARGET_SVE" ++ "adr\t%0.d, [%1.d, %2.d, sxtw]" ++ "&& !CONSTANT_P (operands[3])" ++ { ++ operands[3] = CONSTM1_RTX (VNx2BImode); ++ } ++) ++ ++;; Same, but with the offset being zero-extended from the low 32 bits. ++(define_insn "*aarch64_adr_uxtw_unspec" ++ [(set (match_operand:VNx2DI 0 "register_operand" "=w") ++ (unspec:VNx2DI ++ [(match_operand:VNx2DI 1 "register_operand" "w") ++ (and:VNx2DI ++ (match_operand:VNx2DI 2 "register_operand" "w") ++ (match_operand:VNx2DI 3 "aarch64_sve_uxtw_immediate"))] ++ UNSPEC_ADR))] ++ "TARGET_SVE" ++ "adr\t%0.d, [%1.d, %2.d, uxtw]" ++) ++ ++;; Same, matching as a PLUS rather than unspec. ++(define_insn "*aarch64_adr_uxtw_and" ++ [(set (match_operand:VNx2DI 0 "register_operand" "=w") ++ (plus:VNx2DI ++ (and:VNx2DI ++ (match_operand:VNx2DI 2 "register_operand" "w") ++ (match_operand:VNx2DI 3 "aarch64_sve_uxtw_immediate")) ++ (match_operand:VNx2DI 1 "register_operand" "w")))] ++ "TARGET_SVE" ++ "adr\t%0.d, [%1.d, %2.d, uxtw]" ++) ++ ++;; ADR with a nonzero shift. ++(define_expand "@aarch64_adr_shift" ++ [(set (match_operand:SVE_FULL_SDI 0 "register_operand") ++ (plus:SVE_FULL_SDI ++ (unspec:SVE_FULL_SDI ++ [(match_dup 4) ++ (ashift:SVE_FULL_SDI ++ (match_operand:SVE_FULL_SDI 2 "register_operand") ++ (match_operand:SVE_FULL_SDI 3 "const_1_to_3_operand"))] ++ UNSPEC_PRED_X) ++ (match_operand:SVE_FULL_SDI 1 "register_operand")))] ++ "TARGET_SVE" ++ { ++ operands[4] = CONSTM1_RTX (mode); ++ } ++) ++ ++(define_insn_and_rewrite "*aarch64_adr_shift" ++ [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w") ++ (plus:SVE_FULL_SDI ++ (unspec:SVE_FULL_SDI ++ [(match_operand 4) ++ (ashift:SVE_FULL_SDI ++ (match_operand:SVE_FULL_SDI 2 "register_operand" "w") ++ (match_operand:SVE_FULL_SDI 3 "const_1_to_3_operand"))] ++ UNSPEC_PRED_X) ++ (match_operand:SVE_FULL_SDI 1 "register_operand" "w")))] ++ "TARGET_SVE" ++ "adr\t%0., [%1., %2., lsl %3]" ++ "&& !CONSTANT_P (operands[4])" ++ { ++ operands[4] = CONSTM1_RTX (mode); ++ } ++) ++ ++;; Same, but with the index being sign-extended from the low 32 bits. ++(define_insn_and_rewrite "*aarch64_adr_shift_sxtw" ++ [(set (match_operand:VNx2DI 0 "register_operand" "=w") ++ (plus:VNx2DI ++ (unspec:VNx2DI ++ [(match_operand 4) ++ (ashift:VNx2DI ++ (unspec:VNx2DI ++ [(match_operand 5) ++ (sign_extend:VNx2DI ++ (truncate:VNx2SI ++ (match_operand:VNx2DI 2 "register_operand" "w")))] ++ UNSPEC_PRED_X) ++ (match_operand:VNx2DI 3 "const_1_to_3_operand"))] ++ UNSPEC_PRED_X) ++ (match_operand:VNx2DI 1 "register_operand" "w")))] ++ "TARGET_SVE" ++ "adr\t%0.d, [%1.d, %2.d, sxtw %3]" ++ "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))" ++ { ++ operands[5] = operands[4] = CONSTM1_RTX (VNx2BImode); ++ } ++) ++ ++;; Same, but with the index being zero-extended from the low 32 bits. ++(define_insn_and_rewrite "*aarch64_adr_shift_uxtw" ++ [(set (match_operand:VNx2DI 0 "register_operand" "=w") ++ (plus:VNx2DI ++ (unspec:VNx2DI ++ [(match_operand 5) ++ (ashift:VNx2DI ++ (and:VNx2DI ++ (match_operand:VNx2DI 2 "register_operand" "w") ++ (match_operand:VNx2DI 4 "aarch64_sve_uxtw_immediate")) ++ (match_operand:VNx2DI 3 "const_1_to_3_operand"))] ++ UNSPEC_PRED_X) ++ (match_operand:VNx2DI 1 "register_operand" "w")))] ++ "TARGET_SVE" ++ "adr\t%0.d, [%1.d, %2.d, uxtw %3]" ++ "&& !CONSTANT_P (operands[5])" ++ { ++ operands[5] = CONSTM1_RTX (VNx2BImode); ++ } ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- [INT] Absolute difference ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - SABD ++;; - UABD ++;; ------------------------------------------------------------------------- ++ ++;; Unpredicated integer absolute difference. ++(define_expand "abd_3" ++ [(use (match_operand:SVE_FULL_I 0 "register_operand")) ++ (USMAX:SVE_FULL_I ++ (match_operand:SVE_FULL_I 1 "register_operand") ++ (match_operand:SVE_FULL_I 2 "register_operand"))] ++ "TARGET_SVE" ++ { ++ rtx pred = aarch64_ptrue_reg (mode); ++ emit_insn (gen_aarch64_pred_abd (operands[0], pred, operands[1], ++ operands[2])); ++ DONE; ++ } ++) ++ ++;; Predicated integer absolute difference. ++(define_insn "@aarch64_pred_abd" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") ++ (unspec:SVE_FULL_I ++ [(match_operand: 1 "register_operand" "Upl, Upl") ++ (minus:SVE_FULL_I ++ (USMAX:SVE_FULL_I ++ (match_operand:SVE_FULL_I 2 "register_operand" "%0, w") ++ (match_operand:SVE_FULL_I 3 "register_operand" "w, w")) ++ (:SVE_FULL_I ++ (match_dup 2) ++ (match_dup 3)))] ++ UNSPEC_PRED_X))] ++ "TARGET_SVE" ++ "@ ++ abd\t%0., %1/m, %0., %3. ++ movprfx\t%0, %2\;abd\t%0., %1/m, %0., %3." ++ [(set_attr "movprfx" "*,yes")] ++) ++ ++(define_expand "@aarch64_cond_abd" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand") ++ (unspec:SVE_FULL_I ++ [(match_operand: 1 "register_operand") ++ (minus:SVE_FULL_I ++ (unspec:SVE_FULL_I ++ [(match_dup 1) ++ (USMAX:SVE_FULL_I ++ (match_operand:SVE_FULL_I 2 "register_operand") ++ (match_operand:SVE_FULL_I 3 "register_operand"))] ++ UNSPEC_PRED_X) ++ (unspec:SVE_FULL_I ++ [(match_dup 1) ++ (:SVE_FULL_I ++ (match_dup 2) ++ (match_dup 3))] ++ UNSPEC_PRED_X)) ++ (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")] ++ UNSPEC_SEL))] ++ "TARGET_SVE" ++{ ++ if (rtx_equal_p (operands[3], operands[4])) ++ std::swap (operands[2], operands[3]); ++}) ++ ++;; Predicated integer absolute difference, merging with the first input. ++(define_insn_and_rewrite "*aarch64_cond_abd_2" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") ++ (unspec:SVE_FULL_I ++ [(match_operand: 1 "register_operand" "Upl, Upl") ++ (minus:SVE_FULL_I ++ (unspec:SVE_FULL_I ++ [(match_operand 4) ++ (USMAX:SVE_FULL_I ++ (match_operand:SVE_FULL_I 2 "register_operand" "0, w") ++ (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))] ++ UNSPEC_PRED_X) ++ (unspec:SVE_FULL_I ++ [(match_operand 5) ++ (:SVE_FULL_I ++ (match_dup 2) ++ (match_dup 3))] ++ UNSPEC_PRED_X)) ++ (match_dup 2)] ++ UNSPEC_SEL))] ++ "TARGET_SVE" ++ "@ ++ abd\t%0., %1/m, %0., %3. ++ movprfx\t%0, %2\;abd\t%0., %1/m, %0., %3." ++ "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))" ++ { ++ operands[4] = operands[5] = CONSTM1_RTX (mode); ++ } ++ [(set_attr "movprfx" "*,yes")] ++) ++ ++;; Predicated integer absolute difference, merging with an independent value. ++(define_insn_and_rewrite "*aarch64_cond_abd_any" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, ?&w") ++ (unspec:SVE_FULL_I ++ [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") ++ (minus:SVE_FULL_I ++ (unspec:SVE_FULL_I ++ [(match_operand 5) ++ (USMAX:SVE_FULL_I ++ (match_operand:SVE_FULL_I 2 "register_operand" "0, w, w, w, w") ++ (match_operand:SVE_FULL_I 3 "register_operand" "w, 0, w, w, w"))] ++ UNSPEC_PRED_X) ++ (unspec:SVE_FULL_I ++ [(match_operand 6) ++ (:SVE_FULL_I ++ (match_dup 2) ++ (match_dup 3))] ++ UNSPEC_PRED_X)) ++ (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")] ++ UNSPEC_SEL))] ++ "TARGET_SVE ++ && !rtx_equal_p (operands[2], operands[4]) ++ && !rtx_equal_p (operands[3], operands[4])" ++ "@ ++ movprfx\t%0., %1/z, %0.\;abd\t%0., %1/m, %0., %3. ++ movprfx\t%0., %1/z, %0.\;abd\t%0., %1/m, %0., %2. ++ movprfx\t%0., %1/z, %2.\;abd\t%0., %1/m, %0., %3. ++ movprfx\t%0., %1/m, %2.\;abd\t%0., %1/m, %0., %3. ++ #" ++ "&& 1" ++ { ++ if (!CONSTANT_P (operands[5]) || !CONSTANT_P (operands[6])) ++ operands[5] = operands[6] = CONSTM1_RTX (mode); ++ else if (reload_completed ++ && register_operand (operands[4], mode) ++ && !rtx_equal_p (operands[0], operands[4])) ++ { ++ emit_insn (gen_vcond_mask_ (operands[0], operands[2], ++ operands[4], operands[1])); ++ operands[4] = operands[2] = operands[0]; ++ } ++ else ++ FAIL; ++ } ++ [(set_attr "movprfx" "yes")] ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- [INT] Saturating addition and subtraction ++;; ------------------------------------------------------------------------- ++;; - SQADD ++;; - SQSUB ++;; - UQADD ++;; - UQSUB ++;; ------------------------------------------------------------------------- ++ ++;; Unpredicated saturating signed addition and subtraction. ++(define_insn "@aarch64_" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w, ?&w, w") ++ (SBINQOPS:SVE_FULL_I ++ (match_operand:SVE_FULL_I 1 "register_operand" "0, 0, w, w, w") ++ (match_operand:SVE_FULL_I 2 "aarch64_sve_sqadd_operand" "vsQ, vsS, vsQ, vsS, w")))] ++ "TARGET_SVE" ++ "@ ++ \t%0., %0., #%D2 ++ \t%0., %0., #%N2 ++ movprfx\t%0, %1\;\t%0., %0., #%D2 ++ movprfx\t%0, %1\;\t%0., %0., #%N2 ++ \t%0., %1., %2." ++ [(set_attr "movprfx" "*,*,yes,yes,*")] ++) ++ ++;; Unpredicated saturating unsigned addition and subtraction. ++(define_insn "@aarch64_" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w, w") ++ (UBINQOPS:SVE_FULL_I ++ (match_operand:SVE_FULL_I 1 "register_operand" "0, w, w") ++ (match_operand:SVE_FULL_I 2 "aarch64_sve_arith_operand" "vsa, vsa, w")))] ++ "TARGET_SVE" ++ "@ ++ \t%0., %0., #%D2 ++ movprfx\t%0, %1\;\t%0., %0., #%D2 ++ \t%0., %1., %2." ++ [(set_attr "movprfx" "*,yes,*")] ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- [INT] Highpart multiplication ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - SMULH ++;; - UMULH ++;; ------------------------------------------------------------------------- ++ ++;; Unpredicated highpart multiplication. ++(define_expand "mul3_highpart" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand") ++ (unspec:SVE_FULL_I ++ [(match_dup 3) ++ (unspec:SVE_FULL_I ++ [(match_operand:SVE_FULL_I 1 "register_operand") ++ (match_operand:SVE_FULL_I 2 "register_operand")] ++ MUL_HIGHPART)] ++ UNSPEC_PRED_X))] ++ "TARGET_SVE" ++ { ++ operands[3] = aarch64_ptrue_reg (mode); ++ } ++) ++ ++;; Predicated highpart multiplication. ++(define_insn "@aarch64_pred_" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") ++ (unspec:SVE_FULL_I ++ [(match_operand: 1 "register_operand" "Upl, Upl") ++ (unspec:SVE_FULL_I ++ [(match_operand:SVE_FULL_I 2 "register_operand" "%0, w") ++ (match_operand:SVE_FULL_I 3 "register_operand" "w, w")] ++ MUL_HIGHPART)] ++ UNSPEC_PRED_X))] ++ "TARGET_SVE" ++ "@ ++ mulh\t%0., %1/m, %0., %3. ++ movprfx\t%0, %2\;mulh\t%0., %1/m, %0., %3." ++ [(set_attr "movprfx" "*,yes")] ++) ++ ++;; Predicated highpart multiplications with merging. ++(define_expand "@cond_" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand") ++ (unspec:SVE_FULL_I ++ [(match_operand: 1 "register_operand") ++ (unspec:SVE_FULL_I ++ [(match_operand:SVE_FULL_I 2 "register_operand") ++ (match_operand:SVE_FULL_I 3 "register_operand")] ++ MUL_HIGHPART) ++ (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")] ++ UNSPEC_SEL))] ++ "TARGET_SVE" ++{ ++ /* Only target code is aware of these operations, so we don't need ++ to handle the fully-general case. */ ++ gcc_assert (rtx_equal_p (operands[2], operands[4]) ++ || CONSTANT_P (operands[4])); ++}) ++ ++;; Predicated highpart multiplications, merging with the first input. ++(define_insn "*cond__2" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") ++ (unspec:SVE_FULL_I ++ [(match_operand: 1 "register_operand" "Upl, Upl") ++ (unspec:SVE_FULL_I ++ [(match_operand:SVE_FULL_I 2 "register_operand" "0, w") ++ (match_operand:SVE_FULL_I 3 "register_operand" "w, w")] ++ MUL_HIGHPART) ++ (match_dup 2)] ++ UNSPEC_SEL))] ++ "TARGET_SVE" ++ "@ ++ \t%0., %1/m, %0., %3. ++ movprfx\t%0, %2\;\t%0., %1/m, %0., %3." ++ [(set_attr "movprfx" "*,yes")]) ++ ++;; Predicated highpart multiplications, merging with zero. ++(define_insn "*cond__z" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w") ++ (unspec:SVE_FULL_I ++ [(match_operand: 1 "register_operand" "Upl, Upl") ++ (unspec:SVE_FULL_I ++ [(match_operand:SVE_FULL_I 2 "register_operand" "%0, w") ++ (match_operand:SVE_FULL_I 3 "register_operand" "w, w")] ++ MUL_HIGHPART) ++ (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_zero")] ++ UNSPEC_SEL))] ++ "TARGET_SVE" ++ "@ ++ movprfx\t%0., %1/z, %0.\;\t%0., %1/m, %0., %3. ++ movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %0., %3." ++ [(set_attr "movprfx" "yes")]) ++ ++;; ------------------------------------------------------------------------- ++;; ---- [INT] Division ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - SDIV ++;; - SDIVR ++;; - UDIV ++;; - UDIVR ++;; ------------------------------------------------------------------------- ++ ++;; Unpredicated integer division. ++(define_expand "3" ++ [(set (match_operand:SVE_FULL_SDI 0 "register_operand") ++ (unspec:SVE_FULL_SDI ++ [(match_dup 3) ++ (SVE_INT_BINARY_SD:SVE_FULL_SDI ++ (match_operand:SVE_FULL_SDI 1 "register_operand") ++ (match_operand:SVE_FULL_SDI 2 "register_operand"))] ++ UNSPEC_PRED_X))] ++ "TARGET_SVE" ++ { ++ operands[3] = aarch64_ptrue_reg (mode); ++ } ++) ++ ++;; Integer division predicated with a PTRUE. ++(define_insn "@aarch64_pred_" ++ [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, w, ?&w") ++ (unspec:SVE_FULL_SDI ++ [(match_operand: 1 "register_operand" "Upl, Upl, Upl") ++ (SVE_INT_BINARY_SD:SVE_FULL_SDI ++ (match_operand:SVE_FULL_SDI 2 "register_operand" "0, w, w") ++ (match_operand:SVE_FULL_SDI 3 "register_operand" "w, 0, w"))] ++ UNSPEC_PRED_X))] ++ "TARGET_SVE" ++ "@ ++ \t%0., %1/m, %0., %3. ++ r\t%0., %1/m, %0., %2. ++ movprfx\t%0, %2\;\t%0., %1/m, %0., %3." ++ [(set_attr "movprfx" "*,*,yes")] ++) ++ ++;; Predicated integer division with merging. ++(define_expand "@cond_" ++ [(set (match_operand:SVE_FULL_SDI 0 "register_operand") ++ (unspec:SVE_FULL_SDI ++ [(match_operand: 1 "register_operand") ++ (SVE_INT_BINARY_SD:SVE_FULL_SDI ++ (match_operand:SVE_FULL_SDI 2 "register_operand") ++ (match_operand:SVE_FULL_SDI 3 "register_operand")) ++ (match_operand:SVE_FULL_SDI 4 "aarch64_simd_reg_or_zero")] ++ UNSPEC_SEL))] ++ "TARGET_SVE" ++) ++ ++;; Predicated integer division, merging with the first input. ++(define_insn "*cond__2" ++ [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w") ++ (unspec:SVE_FULL_SDI ++ [(match_operand: 1 "register_operand" "Upl, Upl") ++ (SVE_INT_BINARY_SD:SVE_FULL_SDI ++ (match_operand:SVE_FULL_SDI 2 "register_operand" "0, w") ++ (match_operand:SVE_FULL_SDI 3 "register_operand" "w, w")) ++ (match_dup 2)] ++ UNSPEC_SEL))] ++ "TARGET_SVE" ++ "@ ++ \t%0., %1/m, %0., %3. ++ movprfx\t%0, %2\;\t%0., %1/m, %0., %3." ++ [(set_attr "movprfx" "*,yes")] ++) ++ ++;; Predicated integer division, merging with the second input. ++(define_insn "*cond__3" ++ [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w") ++ (unspec:SVE_FULL_SDI ++ [(match_operand: 1 "register_operand" "Upl, Upl") ++ (SVE_INT_BINARY_SD:SVE_FULL_SDI ++ (match_operand:SVE_FULL_SDI 2 "register_operand" "w, w") ++ (match_operand:SVE_FULL_SDI 3 "register_operand" "0, w")) ++ (match_dup 3)] ++ UNSPEC_SEL))] ++ "TARGET_SVE" ++ "@ ++ \t%0., %1/m, %0., %2. ++ movprfx\t%0, %3\;\t%0., %1/m, %0., %2." ++ [(set_attr "movprfx" "*,yes")] ++) ++ ++;; Predicated integer division, merging with an independent value. ++(define_insn_and_rewrite "*cond__any" ++ [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=&w, &w, &w, &w, ?&w") ++ (unspec:SVE_FULL_SDI ++ [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") ++ (SVE_INT_BINARY_SD:SVE_FULL_SDI ++ (match_operand:SVE_FULL_SDI 2 "register_operand" "0, w, w, w, w") ++ (match_operand:SVE_FULL_SDI 3 "register_operand" "w, 0, w, w, w")) ++ (match_operand:SVE_FULL_SDI 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")] ++ UNSPEC_SEL))] ++ "TARGET_SVE ++ && !rtx_equal_p (operands[2], operands[4]) ++ && !rtx_equal_p (operands[3], operands[4])" ++ "@ ++ movprfx\t%0., %1/z, %0.\;\t%0., %1/m, %0., %3. ++ movprfx\t%0., %1/z, %0.\;\t%0., %1/m, %0., %2. ++ movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %0., %3. ++ movprfx\t%0., %1/m, %2.\;\t%0., %1/m, %0., %3. ++ #" ++ "&& reload_completed ++ && register_operand (operands[4], mode) ++ && !rtx_equal_p (operands[0], operands[4])" ++ { ++ emit_insn (gen_vcond_mask_ (operands[0], operands[2], ++ operands[4], operands[1])); ++ operands[4] = operands[2] = operands[0]; ++ } ++ [(set_attr "movprfx" "yes")] ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- [INT] Binary logical operations ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - AND ++;; - EOR ++;; - ORR ++;; ------------------------------------------------------------------------- ++ ++;; Unpredicated integer binary logical operations. ++(define_insn "3" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?w, w") ++ (LOGICAL:SVE_FULL_I ++ (match_operand:SVE_FULL_I 1 "register_operand" "%0, w, w") ++ (match_operand:SVE_FULL_I 2 "aarch64_sve_logical_operand" "vsl, vsl, w")))] ++ "TARGET_SVE" ++ "@ ++ \t%0., %0., #%C2 ++ movprfx\t%0, %1\;\t%0., %0., #%C2 ++ \t%0.d, %1.d, %2.d" ++ [(set_attr "movprfx" "*,yes,*")] ++) ++ ++;; Merging forms are handled through SVE_INT_BINARY. ++ ++;; ------------------------------------------------------------------------- ++;; ---- [INT] Binary logical operations (inverted second input) ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - BIC ++;; ------------------------------------------------------------------------- ++ ++;; Unpredicated BIC. ++(define_expand "@aarch64_bic" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand") ++ (and:SVE_FULL_I ++ (unspec:SVE_FULL_I ++ [(match_dup 3) ++ (not:SVE_FULL_I (match_operand:SVE_FULL_I 2 "register_operand"))] ++ UNSPEC_PRED_X) ++ (match_operand:SVE_FULL_I 1 "register_operand")))] ++ "TARGET_SVE" ++ { ++ operands[3] = CONSTM1_RTX (mode); ++ } ++) ++ ++;; Predicated BIC. ++(define_insn_and_rewrite "*bic3" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w") ++ (and:SVE_FULL_I ++ (unspec:SVE_FULL_I ++ [(match_operand 3) ++ (not:SVE_FULL_I ++ (match_operand:SVE_FULL_I 2 "register_operand" "w"))] ++ UNSPEC_PRED_X) ++ (match_operand:SVE_FULL_I 1 "register_operand" "w")))] ++ "TARGET_SVE" ++ "bic\t%0.d, %1.d, %2.d" ++ "&& !CONSTANT_P (operands[3])" ++ { ++ operands[3] = CONSTM1_RTX (mode); ++ } ++) ++ ++;; Predicated BIC with merging. ++(define_expand "@cond_bic" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand") ++ (unspec:SVE_FULL_I ++ [(match_operand: 1 "register_operand") ++ (and:SVE_FULL_I ++ (not:SVE_FULL_I (match_operand:SVE_FULL_I 3 "register_operand")) ++ (match_operand:SVE_FULL_I 2 "register_operand")) ++ (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")] ++ UNSPEC_SEL))] ++ "TARGET_SVE" ++) ++ ++;; Predicated integer BIC, merging with the first input. ++(define_insn "*cond_bic_2" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") ++ (unspec:SVE_FULL_I ++ [(match_operand: 1 "register_operand" "Upl, Upl") ++ (and:SVE_FULL_I ++ (not:SVE_FULL_I ++ (match_operand:SVE_FULL_I 3 "register_operand" "w, w")) ++ (match_operand:SVE_FULL_I 2 "register_operand" "0, w")) ++ (match_dup 2)] ++ UNSPEC_SEL))] ++ "TARGET_SVE" ++ "@ ++ bic\t%0., %1/m, %0., %3. ++ movprfx\t%0, %2\;bic\t%0., %1/m, %0., %3." ++ [(set_attr "movprfx" "*,yes")] ++) ++ ++;; Predicated integer BIC, merging with an independent value. ++(define_insn_and_rewrite "*cond_bic_any" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, ?&w") ++ (unspec:SVE_FULL_I ++ [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl") ++ (and:SVE_FULL_I ++ (not:SVE_FULL_I ++ (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w, w")) ++ (match_operand:SVE_FULL_I 2 "register_operand" "0, w, w, w")) ++ (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")] ++ UNSPEC_SEL))] ++ "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" ++ "@ ++ movprfx\t%0., %1/z, %0.\;bic\t%0., %1/m, %0., %3. ++ movprfx\t%0., %1/z, %2.\;bic\t%0., %1/m, %0., %3. ++ movprfx\t%0., %1/m, %2.\;bic\t%0., %1/m, %0., %3. ++ #" ++ "&& reload_completed ++ && register_operand (operands[4], mode) ++ && !rtx_equal_p (operands[0], operands[4])" ++ { ++ emit_insn (gen_vcond_mask_ (operands[0], operands[2], ++ operands[4], operands[1])); ++ operands[4] = operands[2] = operands[0]; ++ } ++ [(set_attr "movprfx" "yes")] ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- [INT] Shifts (rounding towards -Inf) ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - ASR ++;; - ASRR ++;; - LSL ++;; - LSLR ++;; - LSR ++;; - LSRR ++;; ------------------------------------------------------------------------- ++ ++;; Unpredicated shift by a scalar, which expands into one of the vector ++;; shifts below. ++(define_expand "3" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand") ++ (ASHIFT:SVE_FULL_I ++ (match_operand:SVE_FULL_I 1 "register_operand") ++ (match_operand: 2 "general_operand")))] ++ "TARGET_SVE" ++ { ++ rtx amount; ++ if (CONST_INT_P (operands[2])) ++ { ++ amount = gen_const_vec_duplicate (mode, operands[2]); ++ if (!aarch64_sve_shift_operand (operands[2], mode)) ++ amount = force_reg (mode, amount); ++ } ++ else ++ { ++ amount = gen_reg_rtx (mode); ++ emit_insn (gen_vec_duplicate (amount, ++ convert_to_mode (mode, ++ operands[2], 0))); ++ } ++ emit_insn (gen_v3 (operands[0], operands[1], amount)); ++ DONE; ++ } ++) ++ ++;; Unpredicated shift by a vector. ++(define_expand "v3" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand") ++ (unspec:SVE_FULL_I ++ [(match_dup 3) ++ (ASHIFT:SVE_FULL_I ++ (match_operand:SVE_FULL_I 1 "register_operand") ++ (match_operand:SVE_FULL_I 2 "aarch64_sve_shift_operand"))] ++ UNSPEC_PRED_X))] ++ "TARGET_SVE" ++ { ++ operands[3] = aarch64_ptrue_reg (mode); ++ } ++) ++ ++;; Shift by a vector, predicated with a PTRUE. We don't actually need ++;; the predicate for the first alternative, but using Upa or X isn't ++;; likely to gain much and would make the instruction seem less uniform ++;; to the register allocator. ++(define_insn_and_split "@aarch64_pred_" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, w, ?&w") ++ (unspec:SVE_FULL_I ++ [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl") ++ (ASHIFT:SVE_FULL_I ++ (match_operand:SVE_FULL_I 2 "register_operand" "w, 0, w, w") ++ (match_operand:SVE_FULL_I 3 "aarch64_sve_shift_operand" "D, w, 0, w"))] ++ UNSPEC_PRED_X))] ++ "TARGET_SVE" ++ "@ ++ # ++ \t%0., %1/m, %0., %3. ++ r\t%0., %1/m, %3., %2. ++ movprfx\t%0, %2\;\t%0., %1/m, %0., %3." ++ "&& reload_completed ++ && !register_operand (operands[3], mode)" ++ [(set (match_dup 0) (ASHIFT:SVE_FULL_I (match_dup 2) (match_dup 3)))] ++ "" ++ [(set_attr "movprfx" "*,*,*,yes")] ++) ++ ++;; Unpredicated shift operations by a constant (post-RA only). ++;; These are generated by splitting a predicated instruction whose ++;; predicate is unused. ++(define_insn "*post_ra_v3" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w") ++ (ASHIFT:SVE_FULL_I ++ (match_operand:SVE_FULL_I 1 "register_operand" "w") ++ (match_operand:SVE_FULL_I 2 "aarch64_simd_shift_imm")))] ++ "TARGET_SVE && reload_completed" ++ "\t%0., %1., #%2" ++) ++ ++;; Predicated integer shift, merging with the first input. ++(define_insn "*cond__2_const" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") ++ (unspec:SVE_FULL_I ++ [(match_operand: 1 "register_operand" "Upl, Upl") ++ (ASHIFT:SVE_FULL_I ++ (match_operand:SVE_FULL_I 2 "register_operand" "0, w") ++ (match_operand:SVE_FULL_I 3 "aarch64_simd_shift_imm")) ++ (match_dup 2)] ++ UNSPEC_SEL))] ++ "TARGET_SVE" ++ "@ ++ \t%0., %1/m, %0., #%3 ++ movprfx\t%0, %2\;\t%0., %1/m, %0., #%3" ++ [(set_attr "movprfx" "*,yes")] ++) ++ ++;; Predicated integer shift, merging with an independent value. ++(define_insn_and_rewrite "*cond__any_const" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, &w, ?&w") ++ (unspec:SVE_FULL_I ++ [(match_operand: 1 "register_operand" "Upl, Upl, Upl") ++ (ASHIFT:SVE_FULL_I ++ (match_operand:SVE_FULL_I 2 "register_operand" "w, w, w") ++ (match_operand:SVE_FULL_I 3 "aarch64_simd_shift_imm")) ++ (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")] ++ UNSPEC_SEL))] ++ "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" ++ "@ ++ movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %0., #%3 ++ movprfx\t%0., %1/m, %2.\;\t%0., %1/m, %0., #%3 ++ #" ++ "&& reload_completed ++ && register_operand (operands[4], mode) ++ && !rtx_equal_p (operands[0], operands[4])" ++ { ++ emit_insn (gen_vcond_mask_ (operands[0], operands[2], ++ operands[4], operands[1])); ++ operands[4] = operands[2] = operands[0]; ++ } ++ [(set_attr "movprfx" "yes")] ++) ++ ++;; Unpredicated shifts of narrow elements by 64-bit amounts. ++(define_insn "@aarch64_sve_" ++ [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w") ++ (unspec:SVE_FULL_BHSI ++ [(match_operand:SVE_FULL_BHSI 1 "register_operand" "w") ++ (match_operand:VNx2DI 2 "register_operand" "w")] ++ SVE_SHIFT_WIDE))] ++ "TARGET_SVE" ++ "\t%0., %1., %2.d" ++) ++ ++;; Merging predicated shifts of narrow elements by 64-bit amounts. ++(define_expand "@cond_" ++ [(set (match_operand:SVE_FULL_BHSI 0 "register_operand") ++ (unspec:SVE_FULL_BHSI ++ [(match_operand: 1 "register_operand") ++ (unspec:SVE_FULL_BHSI ++ [(match_operand:SVE_FULL_BHSI 2 "register_operand") ++ (match_operand:VNx2DI 3 "register_operand")] ++ SVE_SHIFT_WIDE) ++ (match_operand:SVE_FULL_BHSI 4 "aarch64_simd_reg_or_zero")] ++ UNSPEC_SEL))] ++ "TARGET_SVE" ++) ++ ++;; Predicated shifts of narrow elements by 64-bit amounts, merging with ++;; the first input. ++(define_insn "*cond__m" ++ [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w, ?&w") ++ (unspec:SVE_FULL_BHSI ++ [(match_operand: 1 "register_operand" "Upl, Upl") ++ (unspec:SVE_FULL_BHSI ++ [(match_operand:SVE_FULL_BHSI 2 "register_operand" "0, w") ++ (match_operand:VNx2DI 3 "register_operand" "w, w")] ++ SVE_SHIFT_WIDE) ++ (match_dup 2)] ++ UNSPEC_SEL))] ++ "TARGET_SVE" ++ "@ ++ \t%0., %1/m, %0., %3.d ++ movprfx\t%0, %2\;\t%0., %1/m, %0., %3.d" ++ [(set_attr "movprfx" "*, yes")]) ++ ++;; Predicated shifts of narrow elements by 64-bit amounts, merging with zero. ++(define_insn "*cond__z" ++ [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=&w, &w") ++ (unspec:SVE_FULL_BHSI ++ [(match_operand: 1 "register_operand" "Upl, Upl") ++ (unspec:SVE_FULL_BHSI ++ [(match_operand:SVE_FULL_BHSI 2 "register_operand" "0, w") ++ (match_operand:VNx2DI 3 "register_operand" "w, w")] ++ SVE_SHIFT_WIDE) ++ (match_operand:SVE_FULL_BHSI 4 "aarch64_simd_imm_zero")] ++ UNSPEC_SEL))] ++ "TARGET_SVE" ++ "@ ++ movprfx\t%0., %1/z, %0.\;\t%0., %1/m, %0., %3.d ++ movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %0., %3.d" ++ [(set_attr "movprfx" "yes")]) ++ ++;; ------------------------------------------------------------------------- ++;; ---- [INT] Shifts (rounding towards 0) ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - ASRD ++;; ------------------------------------------------------------------------- ++ ++;; Unpredicated ASRD. ++(define_expand "sdiv_pow23" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand") ++ (unspec:SVE_FULL_I ++ [(match_dup 3) ++ (unspec:SVE_FULL_I ++ [(match_operand:SVE_FULL_I 1 "register_operand") ++ (match_operand 2 "aarch64_simd_rshift_imm")] ++ UNSPEC_ASRD) ++ (match_dup 1)] ++ UNSPEC_SEL))] ++ "TARGET_SVE" ++ { ++ operands[3] = aarch64_ptrue_reg (mode); ++ } ++) ++ ++;; Predicated ASRD with merging. ++(define_expand "@cond_asrd" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand") ++ (unspec:SVE_FULL_I ++ [(match_operand: 1 "register_operand") ++ (unspec:SVE_FULL_I ++ [(match_operand:SVE_FULL_I 2 "register_operand") ++ (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm")] ++ UNSPEC_ASRD) ++ (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")] ++ UNSPEC_SEL))] ++ "TARGET_SVE" ++) ++ ++;; Predicated ASRD, merging with the first input. ++(define_insn "*cond_asrd_2" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") ++ (unspec:SVE_FULL_I ++ [(match_operand: 1 "register_operand" "Upl, Upl") ++ (unspec:SVE_FULL_I ++ [(match_operand:SVE_FULL_I 2 "register_operand" "0, w") ++ (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm")] ++ UNSPEC_ASRD) ++ (match_dup 2)] ++ UNSPEC_SEL))] ++ "TARGET_SVE" ++ "@ ++ asrd\t%0., %1/m, %0., #%3 ++ movprfx\t%0, %2\;asrd\t%0., %1/m, %0., #%3" ++ [(set_attr "movprfx" "*,yes")]) ++ ++;; Predicated ASRD, merging with zero. ++(define_insn "*cond_asrd_z" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w") ++ (unspec:SVE_FULL_I ++ [(match_operand: 1 "register_operand" "Upl") ++ (unspec:SVE_FULL_I ++ [(match_operand:SVE_FULL_I 2 "register_operand" "w") ++ (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm")] ++ UNSPEC_ASRD) ++ (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_zero")] ++ UNSPEC_SEL))] ++ "TARGET_SVE" ++ "movprfx\t%0., %1/z, %2.\;asrd\t%0., %1/m, %0., #%3" ++ [(set_attr "movprfx" "yes")]) ++ ++;; ------------------------------------------------------------------------- ++;; ---- [FP<-INT] General binary arithmetic corresponding to unspecs ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - FSCALE ++;; - FTSMUL ++;; - FTSSEL ++;; ------------------------------------------------------------------------- ++ ++;; Unpredicated floating-point binary operations that take an integer as ++;; their second operand. ++(define_insn "@aarch64_sve_" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w") ++ (unspec:SVE_FULL_F ++ [(match_operand:SVE_FULL_F 1 "register_operand" "w") ++ (match_operand: 2 "register_operand" "w")] ++ SVE_FP_BINARY_INT))] ++ "TARGET_SVE" ++ "\t%0., %1., %2." ++) ++ ++;; Predicated floating-point binary operations that take an integer ++;; as their second operand. ++(define_insn "@aarch64_pred_" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") ++ (unspec:SVE_FULL_F ++ [(match_operand: 1 "register_operand" "Upl, Upl") ++ (match_operand:SI 4 "aarch64_sve_gp_strictness") ++ (match_operand:SVE_FULL_F 2 "register_operand" "0, w") ++ (match_operand: 3 "register_operand" "w, w")] ++ SVE_COND_FP_BINARY_INT))] ++ "TARGET_SVE" ++ "@ ++ \t%0., %1/m, %0., %3. ++ movprfx\t%0, %2\;\t%0., %1/m, %0., %3." ++ [(set_attr "movprfx" "*,yes")] ++) ++ ++;; Predicated floating-point binary operations with merging, taking an ++;; integer as their second operand. ++(define_expand "@cond_" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand") ++ (unspec:SVE_FULL_F ++ [(match_operand: 1 "register_operand") ++ (unspec:SVE_FULL_F ++ [(match_dup 1) ++ (const_int SVE_STRICT_GP) ++ (match_operand:SVE_FULL_F 2 "register_operand") ++ (match_operand: 3 "register_operand")] ++ SVE_COND_FP_BINARY_INT) ++ (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")] ++ UNSPEC_SEL))] ++ "TARGET_SVE" ++) ++ ++;; Predicated floating-point binary operations that take an integer as their ++;; second operand, with inactive lanes coming from the first operand. ++(define_insn_and_rewrite "*cond__2" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") ++ (unspec:SVE_FULL_F ++ [(match_operand: 1 "register_operand" "Upl, Upl") ++ (unspec:SVE_FULL_F ++ [(match_operand 4) ++ (match_operand:SI 5 "aarch64_sve_gp_strictness") ++ (match_operand:SVE_FULL_F 2 "register_operand" "0, w") ++ (match_operand: 3 "register_operand" "w, w")] ++ SVE_COND_FP_BINARY_INT) ++ (match_dup 2)] ++ UNSPEC_SEL))] ++ "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" ++ "@ ++ \t%0., %1/m, %0., %3. ++ movprfx\t%0, %2\;\t%0., %1/m, %0., %3." ++ "&& !rtx_equal_p (operands[1], operands[4])" ++ { ++ operands[4] = copy_rtx (operands[1]); ++ } ++ [(set_attr "movprfx" "*,yes")] ++) ++ ++;; Predicated floating-point binary operations that take an integer as ++;; their second operand, with the values of inactive lanes being distinct ++;; from the other inputs. ++(define_insn_and_rewrite "*cond__any" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w") ++ (unspec:SVE_FULL_F ++ [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl") ++ (unspec:SVE_FULL_F ++ [(match_operand 5) ++ (match_operand:SI 6 "aarch64_sve_gp_strictness") ++ (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w") ++ (match_operand: 3 "register_operand" "w, w, w, w")] ++ SVE_COND_FP_BINARY_INT) ++ (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")] ++ UNSPEC_SEL))] ++ "TARGET_SVE ++ && !rtx_equal_p (operands[2], operands[4]) ++ && aarch64_sve_pred_dominates_p (&operands[5], operands[1])" ++ "@ ++ movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %0., %3. ++ movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %0., %3. ++ movprfx\t%0., %1/m, %2.\;\t%0., %1/m, %0., %3. ++ #" ++ "&& 1" ++ { ++ if (reload_completed ++ && register_operand (operands[4], mode) ++ && !rtx_equal_p (operands[0], operands[4])) ++ { ++ emit_insn (gen_vcond_mask_ (operands[0], operands[2], ++ operands[4], operands[1])); ++ operands[4] = operands[2] = operands[0]; ++ } ++ else if (!rtx_equal_p (operands[1], operands[5])) ++ operands[5] = copy_rtx (operands[1]); ++ else ++ FAIL; ++ } ++ [(set_attr "movprfx" "yes")] ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- [FP] General binary arithmetic corresponding to rtx codes ++;; ------------------------------------------------------------------------- ++;; Includes post-RA forms of: ++;; - FADD ++;; - FMUL ++;; - FSUB ++;; ------------------------------------------------------------------------- ++ ++;; Unpredicated floating-point binary operations (post-RA only). ++;; These are generated by splitting a predicated instruction whose ++;; predicate is unused. ++(define_insn "*post_ra_3" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w") ++ (SVE_UNPRED_FP_BINARY:SVE_FULL_F ++ (match_operand:SVE_FULL_F 1 "register_operand" "w") ++ (match_operand:SVE_FULL_F 2 "register_operand" "w")))] ++ "TARGET_SVE && reload_completed" ++ "\t%0., %1., %2.") ++ ++;; ------------------------------------------------------------------------- ++;; ---- [FP] General binary arithmetic corresponding to unspecs ++;; ------------------------------------------------------------------------- ++;; Includes merging forms of: ++;; - FADD (constant forms handled in the "Addition" section) ++;; - FDIV ++;; - FDIVR ++;; - FMAX ++;; - FMAXNM (including #0.0 and #1.0) ++;; - FMIN ++;; - FMINNM (including #0.0 and #1.0) ++;; - FMUL (including #0.5 and #2.0) ++;; - FMULX ++;; - FRECPS ++;; - FRSQRTS ++;; - FSUB (constant forms handled in the "Addition" section) ++;; - FSUBR (constant forms handled in the "Subtraction" section) ++;; ------------------------------------------------------------------------- ++ ++;; Unpredicated floating-point binary operations. ++(define_insn "@aarch64_sve_" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w") ++ (unspec:SVE_FULL_F ++ [(match_operand:SVE_FULL_F 1 "register_operand" "w") ++ (match_operand:SVE_FULL_F 2 "register_operand" "w")] ++ SVE_FP_BINARY))] ++ "TARGET_SVE" ++ "\t%0., %1., %2." ++) ++ ++;; Unpredicated floating-point binary operations that need to be predicated ++;; for SVE. ++(define_expand "3" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand") ++ (unspec:SVE_FULL_F ++ [(match_dup 3) ++ (const_int SVE_RELAXED_GP) ++ (match_operand:SVE_FULL_F 1 "") ++ (match_operand:SVE_FULL_F 2 "")] ++ SVE_COND_FP_BINARY))] ++ "TARGET_SVE" ++ { ++ operands[3] = aarch64_ptrue_reg (mode); ++ } ++) ++ ++;; Predicated floating-point binary operations that have no immediate forms. ++(define_insn "@aarch64_pred_" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?&w") ++ (unspec:SVE_FULL_F ++ [(match_operand: 1 "register_operand" "Upl, Upl, Upl") ++ (match_operand:SI 4 "aarch64_sve_gp_strictness") ++ (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w") ++ (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w")] ++ SVE_COND_FP_BINARY_REG))] ++ "TARGET_SVE" ++ "@ ++ \t%0., %1/m, %0., %3. ++ \t%0., %1/m, %0., %2. ++ movprfx\t%0, %2\;\t%0., %1/m, %0., %3." ++ [(set_attr "movprfx" "*,*,yes")] ++) ++ ++;; Predicated floating-point operations with merging. ++(define_expand "@cond_" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand") ++ (unspec:SVE_FULL_F ++ [(match_operand: 1 "register_operand") ++ (unspec:SVE_FULL_F ++ [(match_dup 1) ++ (const_int SVE_STRICT_GP) ++ (match_operand:SVE_FULL_F 2 "") ++ (match_operand:SVE_FULL_F 3 "")] ++ SVE_COND_FP_BINARY) ++ (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")] ++ UNSPEC_SEL))] ++ "TARGET_SVE" ++) ++ ++;; Predicated floating-point operations, merging with the first input. ++(define_insn_and_rewrite "*cond__2" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") ++ (unspec:SVE_FULL_F ++ [(match_operand: 1 "register_operand" "Upl, Upl") ++ (unspec:SVE_FULL_F ++ [(match_operand 4) ++ (match_operand:SI 5 "aarch64_sve_gp_strictness") ++ (match_operand:SVE_FULL_F 2 "register_operand" "0, w") ++ (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] ++ SVE_COND_FP_BINARY) ++ (match_dup 2)] ++ UNSPEC_SEL))] ++ "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" ++ "@ ++ \t%0., %1/m, %0., %3. ++ movprfx\t%0, %2\;\t%0., %1/m, %0., %3." ++ "&& !rtx_equal_p (operands[1], operands[4])" ++ { ++ operands[4] = copy_rtx (operands[1]); ++ } ++ [(set_attr "movprfx" "*,yes")] ++) ++ ++;; Same for operations that take a 1-bit constant. ++(define_insn_and_rewrite "*cond__2_const" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w") ++ (unspec:SVE_FULL_F ++ [(match_operand: 1 "register_operand" "Upl, Upl") ++ (unspec:SVE_FULL_F ++ [(match_operand 4) ++ (match_operand:SI 5 "aarch64_sve_gp_strictness") ++ (match_operand:SVE_FULL_F 2 "register_operand" "0, w") ++ (match_operand:SVE_FULL_F 3 "")] ++ SVE_COND_FP_BINARY_I1) ++ (match_dup 2)] ++ UNSPEC_SEL))] ++ "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" ++ "@ ++ \t%0., %1/m, %0., #%3 ++ movprfx\t%0, %2\;\t%0., %1/m, %0., #%3" ++ "&& !rtx_equal_p (operands[1], operands[4])" ++ { ++ operands[4] = copy_rtx (operands[1]); ++ } ++ [(set_attr "movprfx" "*,yes")] ++) ++ ++;; Predicated floating-point operations, merging with the second input. ++(define_insn_and_rewrite "*cond__3" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") ++ (unspec:SVE_FULL_F ++ [(match_operand: 1 "register_operand" "Upl, Upl") ++ (unspec:SVE_FULL_F ++ [(match_operand 4) ++ (match_operand:SI 5 "aarch64_sve_gp_strictness") ++ (match_operand:SVE_FULL_F 2 "register_operand" "w, w") ++ (match_operand:SVE_FULL_F 3 "register_operand" "0, w")] ++ SVE_COND_FP_BINARY) ++ (match_dup 3)] ++ UNSPEC_SEL))] ++ "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" ++ "@ ++ \t%0., %1/m, %0., %2. ++ movprfx\t%0, %3\;\t%0., %1/m, %0., %2." ++ "&& !rtx_equal_p (operands[1], operands[4])" ++ { ++ operands[4] = copy_rtx (operands[1]); ++ } ++ [(set_attr "movprfx" "*,yes")] ++) ++ ++;; Predicated floating-point operations, merging with an independent value. ++(define_insn_and_rewrite "*cond__any" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w") ++ (unspec:SVE_FULL_F ++ [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") ++ (unspec:SVE_FULL_F ++ [(match_operand 5) ++ (match_operand:SI 6 "aarch64_sve_gp_strictness") ++ (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w") ++ (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")] ++ SVE_COND_FP_BINARY) ++ (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")] ++ UNSPEC_SEL))] ++ "TARGET_SVE ++ && !rtx_equal_p (operands[2], operands[4]) ++ && !rtx_equal_p (operands[3], operands[4]) ++ && aarch64_sve_pred_dominates_p (&operands[5], operands[1])" ++ "@ ++ movprfx\t%0., %1/z, %0.\;\t%0., %1/m, %0., %3. ++ movprfx\t%0., %1/z, %0.\;\t%0., %1/m, %0., %2. ++ movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %0., %3. ++ movprfx\t%0., %1/m, %2.\;\t%0., %1/m, %0., %3. ++ #" ++ "&& 1" ++ { ++ if (reload_completed ++ && register_operand (operands[4], mode) ++ && !rtx_equal_p (operands[0], operands[4])) ++ { ++ emit_insn (gen_vcond_mask_ (operands[0], operands[2], ++ operands[4], operands[1])); ++ operands[4] = operands[2] = operands[0]; ++ } ++ else if (!rtx_equal_p (operands[1], operands[5])) ++ operands[5] = copy_rtx (operands[1]); ++ else ++ FAIL; ++ } ++ [(set_attr "movprfx" "yes")] ++) ++ ++;; Same for operations that take a 1-bit constant. ++(define_insn_and_rewrite "*cond__any_const" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w") ++ (unspec:SVE_FULL_F ++ [(match_operand: 1 "register_operand" "Upl, Upl, Upl") ++ (unspec:SVE_FULL_F ++ [(match_operand 5) ++ (match_operand:SI 6 "aarch64_sve_gp_strictness") ++ (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w") ++ (match_operand:SVE_FULL_F 3 "")] ++ SVE_COND_FP_BINARY_I1) ++ (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")] ++ UNSPEC_SEL))] ++ "TARGET_SVE ++ && !rtx_equal_p (operands[2], operands[4]) ++ && aarch64_sve_pred_dominates_p (&operands[5], operands[1])" ++ "@ ++ movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %0., #%3 ++ movprfx\t%0., %1/m, %2.\;\t%0., %1/m, %0., #%3 ++ #" ++ "&& 1" ++ { ++ if (reload_completed ++ && register_operand (operands[4], mode) ++ && !rtx_equal_p (operands[0], operands[4])) ++ { ++ emit_insn (gen_vcond_mask_ (operands[0], operands[2], ++ operands[4], operands[1])); ++ operands[4] = operands[2] = operands[0]; ++ } ++ else if (!rtx_equal_p (operands[1], operands[5])) ++ operands[5] = copy_rtx (operands[1]); ++ else ++ FAIL; ++ } ++ [(set_attr "movprfx" "yes")] ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- [FP] Addition ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - FADD ++;; - FSUB ++;; ------------------------------------------------------------------------- ++ ++;; Predicated floating-point addition. ++(define_insn_and_split "@aarch64_pred_" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?&w, ?&w, ?&w") ++ (unspec:SVE_FULL_F ++ [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl, Upl") ++ (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, i, Z, Ui1, i, i, Ui1") ++ (match_operand:SVE_FULL_F 2 "register_operand" "%0, 0, w, 0, w, w, w") ++ (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, w, w, vsA, vsN, w")] ++ SVE_COND_FP_ADD))] ++ "TARGET_SVE" ++ "@ ++ fadd\t%0., %1/m, %0., #%3 ++ fsub\t%0., %1/m, %0., #%N3 ++ # ++ fadd\t%0., %1/m, %0., %3. ++ movprfx\t%0, %2\;fadd\t%0., %1/m, %0., #%3 ++ movprfx\t%0, %2\;fsub\t%0., %1/m, %0., #%N3 ++ movprfx\t%0, %2\;fadd\t%0., %1/m, %0., %3." ++ ; Split the unpredicated form after reload, so that we don't have ++ ; the unnecessary PTRUE. ++ "&& reload_completed ++ && register_operand (operands[3], mode) ++ && INTVAL (operands[4]) == SVE_RELAXED_GP" ++ [(set (match_dup 0) (plus:SVE_FULL_F (match_dup 2) (match_dup 3)))] ++ "" ++ [(set_attr "movprfx" "*,*,*,*,yes,yes,yes")] ++) ++ ++;; Predicated floating-point addition of a constant, merging with the ++;; first input. ++(define_insn_and_rewrite "*cond_add_2_const" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w, ?w") ++ (unspec:SVE_FULL_F ++ [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl") ++ (unspec:SVE_FULL_F ++ [(match_operand 4) ++ (match_operand:SI 5 "aarch64_sve_gp_strictness") ++ (match_operand:SVE_FULL_F 2 "register_operand" "0, 0, w, w") ++ (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN")] ++ UNSPEC_COND_FADD) ++ (match_dup 2)] ++ UNSPEC_SEL))] ++ "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" ++ "@ ++ fadd\t%0., %1/m, %0., #%3 ++ fsub\t%0., %1/m, %0., #%N3 ++ movprfx\t%0, %2\;fadd\t%0., %1/m, %0., #%3 ++ movprfx\t%0, %2\;fsub\t%0., %1/m, %0., #%N3" ++ "&& !rtx_equal_p (operands[1], operands[4])" ++ { ++ operands[4] = copy_rtx (operands[1]); ++ } ++ [(set_attr "movprfx" "*,*,yes,yes")] ++) ++ ++;; Predicated floating-point addition of a constant, merging with an ++;; independent value. ++(define_insn_and_rewrite "*cond_add_any_const" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?w, ?w") ++ (unspec:SVE_FULL_F ++ [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") ++ (unspec:SVE_FULL_F ++ [(match_operand 5) ++ (match_operand:SI 6 "aarch64_sve_gp_strictness") ++ (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w, w, w") ++ (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN, vsA, vsN")] ++ UNSPEC_COND_FADD) ++ (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, 0, w, w")] ++ UNSPEC_SEL))] ++ "TARGET_SVE ++ && !rtx_equal_p (operands[2], operands[4]) ++ && aarch64_sve_pred_dominates_p (&operands[5], operands[1])" ++ "@ ++ movprfx\t%0., %1/z, %2.\;fadd\t%0., %1/m, %0., #%3 ++ movprfx\t%0., %1/z, %2.\;fsub\t%0., %1/m, %0., #%N3 ++ movprfx\t%0., %1/m, %2.\;fadd\t%0., %1/m, %0., #%3 ++ movprfx\t%0., %1/m, %2.\;fsub\t%0., %1/m, %0., #%N3 ++ # ++ #" ++ "&& 1" ++ { ++ if (reload_completed ++ && register_operand (operands[4], mode) ++ && !rtx_equal_p (operands[0], operands[4])) ++ { ++ emit_insn (gen_vcond_mask_ (operands[0], operands[2], ++ operands[4], operands[1])); ++ operands[4] = operands[2] = operands[0]; ++ } ++ else if (!rtx_equal_p (operands[1], operands[5])) ++ operands[5] = copy_rtx (operands[1]); ++ else ++ FAIL; ++ } ++ [(set_attr "movprfx" "yes")] ++) ++ ++;; Register merging forms are handled through SVE_COND_FP_BINARY. ++ ++;; ------------------------------------------------------------------------- ++;; ---- [FP] Complex addition ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - FCADD ++;; ------------------------------------------------------------------------- ++ ++;; Predicated FCADD. ++(define_insn "@aarch64_pred_" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") ++ (unspec:SVE_FULL_F ++ [(match_operand: 1 "register_operand" "Upl, Upl") ++ (match_operand:SI 4 "aarch64_sve_gp_strictness") ++ (match_operand:SVE_FULL_F 2 "register_operand" "0, w") ++ (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] ++ SVE_COND_FCADD))] ++ "TARGET_SVE" ++ "@ ++ fcadd\t%0., %1/m, %0., %3., # ++ movprfx\t%0, %2\;fcadd\t%0., %1/m, %0., %3., #" ++ [(set_attr "movprfx" "*,yes")] ++) ++ ++;; Predicated FCADD with merging. ++(define_expand "@cond_" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand") ++ (unspec:SVE_FULL_F ++ [(match_operand: 1 "register_operand") ++ (unspec:SVE_FULL_F ++ [(match_dup 1) ++ (const_int SVE_STRICT_GP) ++ (match_operand:SVE_FULL_F 2 "register_operand") ++ (match_operand:SVE_FULL_F 3 "register_operand")] ++ SVE_COND_FCADD) ++ (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")] ++ UNSPEC_SEL))] ++ "TARGET_SVE" ++) ++ ++;; Predicated FCADD, merging with the first input. ++(define_insn_and_rewrite "*cond__2" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") ++ (unspec:SVE_FULL_F ++ [(match_operand: 1 "register_operand" "Upl, Upl") ++ (unspec:SVE_FULL_F ++ [(match_operand 4) ++ (match_operand:SI 5 "aarch64_sve_gp_strictness") ++ (match_operand:SVE_FULL_F 2 "register_operand" "0, w") ++ (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] ++ SVE_COND_FCADD) ++ (match_dup 2)] ++ UNSPEC_SEL))] ++ "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" ++ "@ ++ fcadd\t%0., %1/m, %0., %3., # ++ movprfx\t%0, %2\;fcadd\t%0., %1/m, %0., %3., #" ++ "&& !rtx_equal_p (operands[1], operands[4])" ++ { ++ operands[4] = copy_rtx (operands[1]); ++ } ++ [(set_attr "movprfx" "*,yes")] ++) ++ ++;; Predicated FCADD, merging with an independent value. ++(define_insn_and_rewrite "*cond__any" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w") ++ (unspec:SVE_FULL_F ++ [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl") ++ (unspec:SVE_FULL_F ++ [(match_operand 5) ++ (match_operand:SI 6 "aarch64_sve_gp_strictness") ++ (match_operand:SVE_FULL_F 2 "register_operand" "w, 0, w, w") ++ (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w")] ++ SVE_COND_FCADD) ++ (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")] ++ UNSPEC_SEL))] ++ "TARGET_SVE ++ && !rtx_equal_p (operands[2], operands[4]) ++ && aarch64_sve_pred_dominates_p (&operands[5], operands[1])" ++ "@ ++ movprfx\t%0., %1/z, %2.\;fcadd\t%0., %1/m, %0., %3., # ++ movprfx\t%0., %1/z, %0.\;fcadd\t%0., %1/m, %0., %3., # ++ movprfx\t%0., %1/m, %2.\;fcadd\t%0., %1/m, %0., %3., # ++ #" ++ "&& 1" ++ { ++ if (reload_completed ++ && register_operand (operands[4], mode) ++ && !rtx_equal_p (operands[0], operands[4])) ++ { ++ emit_insn (gen_vcond_mask_ (operands[0], operands[2], ++ operands[4], operands[1])); ++ operands[4] = operands[2] = operands[0]; ++ } ++ else if (!rtx_equal_p (operands[1], operands[5])) ++ operands[5] = copy_rtx (operands[1]); ++ else ++ FAIL; ++ } ++ [(set_attr "movprfx" "yes")] ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- [FP] Subtraction ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - FSUB ++;; - FSUBR ++;; ------------------------------------------------------------------------- ++ ++;; Predicated floating-point subtraction. ++(define_insn_and_split "@aarch64_pred_" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?&w, ?&w") ++ (unspec:SVE_FULL_F ++ [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") ++ (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, Z, Ui1, Ui1, i, Ui1") ++ (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_operand" "vsA, w, 0, w, vsA, w") ++ (match_operand:SVE_FULL_F 3 "register_operand" "0, w, w, 0, w, w")] ++ SVE_COND_FP_SUB))] ++ "TARGET_SVE" ++ "@ ++ fsubr\t%0., %1/m, %0., #%2 ++ # ++ fsub\t%0., %1/m, %0., %3. ++ fsubr\t%0., %1/m, %0., %2. ++ movprfx\t%0, %3\;fsubr\t%0., %1/m, %0., #%2 ++ movprfx\t%0, %2\;fsub\t%0., %1/m, %0., %3." ++ ; Split the unpredicated form after reload, so that we don't have ++ ; the unnecessary PTRUE. ++ "&& reload_completed ++ && register_operand (operands[2], mode) ++ && INTVAL (operands[4]) == SVE_RELAXED_GP" ++ [(set (match_dup 0) (minus:SVE_FULL_F (match_dup 2) (match_dup 3)))] ++ "" ++ [(set_attr "movprfx" "*,*,*,*,yes,yes")] ++) ++ ++;; Predicated floating-point subtraction from a constant, merging with the ++;; second input. ++(define_insn_and_rewrite "*cond_sub_3_const" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w") ++ (unspec:SVE_FULL_F ++ [(match_operand: 1 "register_operand" "Upl, Upl") ++ (unspec:SVE_FULL_F ++ [(match_operand 4) ++ (match_operand:SI 5 "aarch64_sve_gp_strictness") ++ (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate") ++ (match_operand:SVE_FULL_F 3 "register_operand" "0, w")] ++ UNSPEC_COND_FSUB) ++ (match_dup 3)] ++ UNSPEC_SEL))] ++ "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" ++ "@ ++ fsubr\t%0., %1/m, %0., #%2 ++ movprfx\t%0, %3\;fsubr\t%0., %1/m, %0., #%2" ++ "&& !rtx_equal_p (operands[1], operands[4])" ++ { ++ operands[4] = copy_rtx (operands[1]); ++ } ++ [(set_attr "movprfx" "*,yes")] ++) ++ ++;; Predicated floating-point subtraction from a constant, merging with an ++;; independent value. ++(define_insn_and_rewrite "*cond_sub_any_const" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w") ++ (unspec:SVE_FULL_F ++ [(match_operand: 1 "register_operand" "Upl, Upl, Upl") ++ (unspec:SVE_FULL_F ++ [(match_operand 5) ++ (match_operand:SI 6 "aarch64_sve_gp_strictness") ++ (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate") ++ (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w")] ++ UNSPEC_COND_FSUB) ++ (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")] ++ UNSPEC_SEL))] ++ "TARGET_SVE ++ && !rtx_equal_p (operands[3], operands[4]) ++ && aarch64_sve_pred_dominates_p (&operands[5], operands[1])" ++ "@ ++ movprfx\t%0., %1/z, %3.\;fsubr\t%0., %1/m, %0., #%2 ++ movprfx\t%0., %1/m, %3.\;fsubr\t%0., %1/m, %0., #%2 ++ #" ++ "&& 1" + { +- operands[2] = force_reg (mode, CONSTM1_RTX (mode)); ++ if (reload_completed ++ && register_operand (operands[4], mode) ++ && !rtx_equal_p (operands[0], operands[4])) ++ { ++ emit_insn (gen_vcond_mask_ (operands[0], operands[3], ++ operands[4], operands[1])); ++ operands[4] = operands[3] = operands[0]; ++ } ++ else if (!rtx_equal_p (operands[1], operands[5])) ++ operands[5] = copy_rtx (operands[1]); ++ else ++ FAIL; + } ++ [(set_attr "movprfx" "yes")] + ) + +-;; Predicated ST[234]. +-(define_insn "vec_mask_store_lanes" +- [(set (match_operand:SVE_STRUCT 0 "memory_operand" "+m") +- (unspec:SVE_STRUCT +- [(match_operand: 2 "register_operand" "Upl") +- (match_operand:SVE_STRUCT 1 "register_operand" "w") +- (match_dup 0)] +- UNSPEC_STN))] ++;; Register merging forms are handled through SVE_COND_FP_BINARY. ++ ++;; ------------------------------------------------------------------------- ++;; ---- [FP] Absolute difference ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - FABD ++;; ------------------------------------------------------------------------- ++ ++;; Predicated floating-point absolute difference. ++(define_expand "@aarch64_pred_abd" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand") ++ (unspec:SVE_FULL_F ++ [(match_operand: 1 "register_operand") ++ (match_operand:SI 4 "aarch64_sve_gp_strictness") ++ (unspec:SVE_FULL_F ++ [(match_dup 1) ++ (match_dup 4) ++ (match_operand:SVE_FULL_F 2 "register_operand") ++ (match_operand:SVE_FULL_F 3 "register_operand")] ++ UNSPEC_COND_FSUB)] ++ UNSPEC_COND_FABS))] + "TARGET_SVE" +- "st\t%1, %2, %0" + ) + +-(define_expand "vec_perm" +- [(match_operand:SVE_ALL 0 "register_operand") +- (match_operand:SVE_ALL 1 "register_operand") +- (match_operand:SVE_ALL 2 "register_operand") +- (match_operand: 3 "aarch64_sve_vec_perm_operand")] +- "TARGET_SVE && GET_MODE_NUNITS (mode).is_constant ()" ++;; Predicated floating-point absolute difference. ++(define_insn_and_rewrite "*aarch64_pred_abd" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") ++ (unspec:SVE_FULL_F ++ [(match_operand: 1 "register_operand" "Upl, Upl") ++ (match_operand:SI 4 "aarch64_sve_gp_strictness") ++ (unspec:SVE_FULL_F ++ [(match_operand 5) ++ (match_operand:SI 6 "aarch64_sve_gp_strictness") ++ (match_operand:SVE_FULL_F 2 "register_operand" "%0, w") ++ (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] ++ UNSPEC_COND_FSUB)] ++ UNSPEC_COND_FABS))] ++ "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])" ++ "@ ++ fabd\t%0., %1/m, %0., %3. ++ movprfx\t%0, %2\;fabd\t%0., %1/m, %0., %3." ++ "&& !rtx_equal_p (operands[1], operands[5])" + { +- aarch64_expand_sve_vec_perm (operands[0], operands[1], +- operands[2], operands[3]); +- DONE; ++ operands[5] = copy_rtx (operands[1]); + } ++ [(set_attr "movprfx" "*,yes")] + ) + +-(define_insn "*aarch64_sve_tbl" +- [(set (match_operand:SVE_ALL 0 "register_operand" "=w") +- (unspec:SVE_ALL +- [(match_operand:SVE_ALL 1 "register_operand" "w") +- (match_operand: 2 "register_operand" "w")] +- UNSPEC_TBL))] ++(define_expand "@aarch64_cond_abd" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand") ++ (unspec:SVE_FULL_F ++ [(match_operand: 1 "register_operand") ++ (unspec:SVE_FULL_F ++ [(match_dup 1) ++ (const_int SVE_STRICT_GP) ++ (unspec:SVE_FULL_F ++ [(match_dup 1) ++ (const_int SVE_STRICT_GP) ++ (match_operand:SVE_FULL_F 2 "register_operand") ++ (match_operand:SVE_FULL_F 3 "register_operand")] ++ UNSPEC_COND_FSUB)] ++ UNSPEC_COND_FABS) ++ (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")] ++ UNSPEC_SEL))] + "TARGET_SVE" +- "tbl\t%0., %1., %2." ++{ ++ if (rtx_equal_p (operands[3], operands[4])) ++ std::swap (operands[2], operands[3]); ++}) ++ ++;; Predicated floating-point absolute difference, merging with the first ++;; input. ++(define_insn_and_rewrite "*aarch64_cond_abd_2" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") ++ (unspec:SVE_FULL_F ++ [(match_operand: 1 "register_operand" "Upl, Upl") ++ (unspec:SVE_FULL_F ++ [(match_operand 4) ++ (match_operand:SI 5 "aarch64_sve_gp_strictness") ++ (unspec:SVE_FULL_F ++ [(match_operand 6) ++ (match_operand:SI 7 "aarch64_sve_gp_strictness") ++ (match_operand:SVE_FULL_F 2 "register_operand" "0, w") ++ (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] ++ UNSPEC_COND_FSUB)] ++ UNSPEC_COND_FABS) ++ (match_dup 2)] ++ UNSPEC_SEL))] ++ "TARGET_SVE ++ && aarch64_sve_pred_dominates_p (&operands[4], operands[1]) ++ && aarch64_sve_pred_dominates_p (&operands[6], operands[1])" ++ "@ ++ fabd\t%0., %1/m, %0., %3. ++ movprfx\t%0, %2\;fabd\t%0., %1/m, %0., %3." ++ "&& (!rtx_equal_p (operands[1], operands[4]) ++ || !rtx_equal_p (operands[1], operands[6]))" ++ { ++ operands[4] = copy_rtx (operands[1]); ++ operands[6] = copy_rtx (operands[1]); ++ } ++ [(set_attr "movprfx" "*,yes")] + ) + +-(define_insn "*aarch64_sve_" +- [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") +- (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa") +- (match_operand:PRED_ALL 2 "register_operand" "Upa")] +- PERMUTE))] +- "TARGET_SVE" +- "\t%0., %1., %2." ++;; Predicated floating-point absolute difference, merging with the second ++;; input. ++(define_insn_and_rewrite "*aarch64_cond_abd_3" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") ++ (unspec:SVE_FULL_F ++ [(match_operand: 1 "register_operand" "Upl, Upl") ++ (unspec:SVE_FULL_F ++ [(match_operand 4) ++ (match_operand:SI 5 "aarch64_sve_gp_strictness") ++ (unspec:SVE_FULL_F ++ [(match_operand 6) ++ (match_operand:SI 7 "aarch64_sve_gp_strictness") ++ (match_operand:SVE_FULL_F 2 "register_operand" "w, w") ++ (match_operand:SVE_FULL_F 3 "register_operand" "0, w")] ++ UNSPEC_COND_FSUB)] ++ UNSPEC_COND_FABS) ++ (match_dup 3)] ++ UNSPEC_SEL))] ++ "TARGET_SVE ++ && aarch64_sve_pred_dominates_p (&operands[4], operands[1]) ++ && aarch64_sve_pred_dominates_p (&operands[6], operands[1])" ++ "@ ++ fabd\t%0., %1/m, %0., %2. ++ movprfx\t%0, %3\;fabd\t%0., %1/m, %0., %2." ++ "&& (!rtx_equal_p (operands[1], operands[4]) ++ || !rtx_equal_p (operands[1], operands[6]))" ++ { ++ operands[4] = copy_rtx (operands[1]); ++ operands[6] = copy_rtx (operands[1]); ++ } ++ [(set_attr "movprfx" "*,yes")] ++) ++ ++;; Predicated floating-point absolute difference, merging with an ++;; independent value. ++(define_insn_and_rewrite "*aarch64_cond_abd_any" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w") ++ (unspec:SVE_FULL_F ++ [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") ++ (unspec:SVE_FULL_F ++ [(match_operand 5) ++ (match_operand:SI 6 "aarch64_sve_gp_strictness") ++ (unspec:SVE_FULL_F ++ [(match_operand 7) ++ (match_operand:SI 8 "aarch64_sve_gp_strictness") ++ (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w") ++ (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")] ++ UNSPEC_COND_FSUB)] ++ UNSPEC_COND_FABS) ++ (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")] ++ UNSPEC_SEL))] ++ "TARGET_SVE ++ && !rtx_equal_p (operands[2], operands[4]) ++ && !rtx_equal_p (operands[3], operands[4]) ++ && aarch64_sve_pred_dominates_p (&operands[5], operands[1]) ++ && aarch64_sve_pred_dominates_p (&operands[7], operands[1])" ++ "@ ++ movprfx\t%0., %1/z, %0.\;fabd\t%0., %1/m, %0., %3. ++ movprfx\t%0., %1/z, %0.\;fabd\t%0., %1/m, %0., %2. ++ movprfx\t%0., %1/z, %2.\;fabd\t%0., %1/m, %0., %3. ++ movprfx\t%0., %1/m, %2.\;fabd\t%0., %1/m, %0., %3. ++ #" ++ "&& 1" ++ { ++ if (reload_completed ++ && register_operand (operands[4], mode) ++ && !rtx_equal_p (operands[0], operands[4])) ++ { ++ emit_insn (gen_vcond_mask_ (operands[0], operands[3], ++ operands[4], operands[1])); ++ operands[4] = operands[3] = operands[0]; ++ } ++ else if (!rtx_equal_p (operands[1], operands[5]) ++ || !rtx_equal_p (operands[1], operands[7])) ++ { ++ operands[5] = copy_rtx (operands[1]); ++ operands[7] = copy_rtx (operands[1]); ++ } ++ else ++ FAIL; ++ } ++ [(set_attr "movprfx" "yes")] + ) + +-(define_insn "aarch64_sve_" +- [(set (match_operand:SVE_ALL 0 "register_operand" "=w") +- (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w") +- (match_operand:SVE_ALL 2 "register_operand" "w")] +- PERMUTE))] ++;; ------------------------------------------------------------------------- ++;; ---- [FP] Multiplication ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - FMUL ++;; ------------------------------------------------------------------------- ++ ++;; Predicated floating-point multiplication. ++(define_insn_and_split "@aarch64_pred_" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, ?&w, ?&w") ++ (unspec:SVE_FULL_F ++ [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") ++ (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, Z, Ui1, i, Ui1") ++ (match_operand:SVE_FULL_F 2 "register_operand" "%0, w, 0, w, w") ++ (match_operand:SVE_FULL_F 3 "aarch64_sve_float_mul_operand" "vsM, w, w, vsM, w")] ++ SVE_COND_FP_MUL))] + "TARGET_SVE" +- "\t%0., %1., %2." ++ "@ ++ fmul\t%0., %1/m, %0., #%3 ++ # ++ fmul\t%0., %1/m, %0., %3. ++ movprfx\t%0, %2\;fmul\t%0., %1/m, %0., #%3 ++ movprfx\t%0, %2\;fmul\t%0., %1/m, %0., %3." ++ ; Split the unpredicated form after reload, so that we don't have ++ ; the unnecessary PTRUE. ++ "&& reload_completed ++ && register_operand (operands[3], mode) ++ && INTVAL (operands[4]) == SVE_RELAXED_GP" ++ [(set (match_dup 0) (mult:SVE_FULL_F (match_dup 2) (match_dup 3)))] ++ "" ++ [(set_attr "movprfx" "*,*,*,yes,yes")] + ) + +-(define_insn "*aarch64_sve_rev64" +- [(set (match_operand:SVE_BHS 0 "register_operand" "=w") +- (unspec:SVE_BHS +- [(match_operand:VNx2BI 1 "register_operand" "Upl") +- (unspec:SVE_BHS [(match_operand:SVE_BHS 2 "register_operand" "w")] +- UNSPEC_REV64)] +- UNSPEC_MERGE_PTRUE))] ++;; Merging forms are handled through SVE_COND_FP_BINARY and ++;; SVE_COND_FP_BINARY_I1. ++ ++;; Unpredicated multiplication by selected lanes. ++(define_insn "@aarch64_mul_lane_" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w") ++ (mult:SVE_FULL_F ++ (unspec:SVE_FULL_F ++ [(match_operand:SVE_FULL_F 2 "register_operand" "") ++ (match_operand:SI 3 "const_int_operand")] ++ UNSPEC_SVE_LANE_SELECT) ++ (match_operand:SVE_FULL_F 1 "register_operand" "w")))] + "TARGET_SVE" +- "rev\t%0.d, %1/m, %2.d" ++ "fmul\t%0., %1., %2.[%3]" + ) + +-(define_insn "*aarch64_sve_rev32" +- [(set (match_operand:SVE_BH 0 "register_operand" "=w") +- (unspec:SVE_BH +- [(match_operand:VNx4BI 1 "register_operand" "Upl") +- (unspec:SVE_BH [(match_operand:SVE_BH 2 "register_operand" "w")] +- UNSPEC_REV32)] +- UNSPEC_MERGE_PTRUE))] ++;; ------------------------------------------------------------------------- ++;; ---- [FP] Binary logical operations ++;; ------------------------------------------------------------------------- ++;; Includes ++;; - AND ++;; - EOR ++;; - ORR ++;; ------------------------------------------------------------------------- ++ ++;; Binary logical operations on floating-point modes. We avoid subregs ++;; by providing this, but we need to use UNSPECs since rtx logical ops ++;; aren't defined for floating-point modes. ++(define_insn "*3" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w") ++ (unspec:SVE_FULL_F ++ [(match_operand:SVE_FULL_F 1 "register_operand" "w") ++ (match_operand:SVE_FULL_F 2 "register_operand" "w")] ++ LOGICALF))] + "TARGET_SVE" +- "rev\t%0.s, %1/m, %2.s" ++ "\t%0.d, %1.d, %2.d" + ) + +-(define_insn "*aarch64_sve_rev16vnx16qi" +- [(set (match_operand:VNx16QI 0 "register_operand" "=w") +- (unspec:VNx16QI +- [(match_operand:VNx8BI 1 "register_operand" "Upl") +- (unspec:VNx16QI [(match_operand:VNx16QI 2 "register_operand" "w")] +- UNSPEC_REV16)] +- UNSPEC_MERGE_PTRUE))] ++;; ------------------------------------------------------------------------- ++;; ---- [FP] Sign copying ++;; ------------------------------------------------------------------------- ++;; The patterns in this section are synthetic. ++;; ------------------------------------------------------------------------- ++ ++(define_expand "copysign3" ++ [(match_operand:SVE_FULL_F 0 "register_operand") ++ (match_operand:SVE_FULL_F 1 "register_operand") ++ (match_operand:SVE_FULL_F 2 "register_operand")] + "TARGET_SVE" +- "revb\t%0.h, %1/m, %2.h" ++ { ++ rtx sign = gen_reg_rtx (mode); ++ rtx mant = gen_reg_rtx (mode); ++ rtx int_res = gen_reg_rtx (mode); ++ int bits = GET_MODE_UNIT_BITSIZE (mode) - 1; ++ ++ rtx arg1 = lowpart_subreg (mode, operands[1], mode); ++ rtx arg2 = lowpart_subreg (mode, operands[2], mode); ++ ++ emit_insn (gen_and3 ++ (sign, arg2, ++ aarch64_simd_gen_const_vector_dup (mode, ++ HOST_WIDE_INT_M1U ++ << bits))); ++ emit_insn (gen_and3 ++ (mant, arg1, ++ aarch64_simd_gen_const_vector_dup (mode, ++ ~(HOST_WIDE_INT_M1U ++ << bits)))); ++ emit_insn (gen_ior3 (int_res, sign, mant)); ++ emit_move_insn (operands[0], gen_lowpart (mode, int_res)); ++ DONE; ++ } + ) + +-(define_insn "*aarch64_sve_rev" +- [(set (match_operand:SVE_ALL 0 "register_operand" "=w") +- (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w")] +- UNSPEC_REV))] ++(define_expand "xorsign3" ++ [(match_operand:SVE_FULL_F 0 "register_operand") ++ (match_operand:SVE_FULL_F 1 "register_operand") ++ (match_operand:SVE_FULL_F 2 "register_operand")] + "TARGET_SVE" +- "rev\t%0., %1.") ++ { ++ rtx sign = gen_reg_rtx (mode); ++ rtx int_res = gen_reg_rtx (mode); ++ int bits = GET_MODE_UNIT_BITSIZE (mode) - 1; + +-(define_insn "*aarch64_sve_dup_lane" +- [(set (match_operand:SVE_ALL 0 "register_operand" "=w") +- (vec_duplicate:SVE_ALL +- (vec_select: +- (match_operand:SVE_ALL 1 "register_operand" "w") +- (parallel [(match_operand:SI 2 "const_int_operand")]))))] +- "TARGET_SVE +- && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (mode), 0, 63)" +- "dup\t%0., %1.[%2]" ++ rtx arg1 = lowpart_subreg (mode, operands[1], mode); ++ rtx arg2 = lowpart_subreg (mode, operands[2], mode); ++ ++ emit_insn (gen_and3 ++ (sign, arg2, ++ aarch64_simd_gen_const_vector_dup (mode, ++ HOST_WIDE_INT_M1U ++ << bits))); ++ emit_insn (gen_xor3 (int_res, arg1, sign)); ++ emit_move_insn (operands[0], gen_lowpart (mode, int_res)); ++ DONE; ++ } + ) + +-;; Note that the immediate (third) operand is the lane index not +-;; the byte index. +-(define_insn "*aarch64_sve_ext" +- [(set (match_operand:SVE_ALL 0 "register_operand" "=w") +- (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "0") +- (match_operand:SVE_ALL 2 "register_operand" "w") +- (match_operand:SI 3 "const_int_operand")] +- UNSPEC_EXT))] +- "TARGET_SVE +- && IN_RANGE (INTVAL (operands[3]) * GET_MODE_SIZE (mode), 0, 255)" ++;; ------------------------------------------------------------------------- ++;; ---- [FP] Maximum and minimum ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - FMAX ++;; - FMAXNM ++;; - FMIN ++;; - FMINNM ++;; ------------------------------------------------------------------------- ++ ++;; Unpredicated fmax/fmin (the libm functions). The optabs for the ++;; smin/smax rtx codes are handled in the generic section above. ++(define_expand "3" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand") ++ (unspec:SVE_FULL_F ++ [(match_dup 3) ++ (const_int SVE_RELAXED_GP) ++ (match_operand:SVE_FULL_F 1 "register_operand") ++ (match_operand:SVE_FULL_F 2 "aarch64_sve_float_maxmin_operand")] ++ SVE_COND_FP_MAXMIN_PUBLIC))] ++ "TARGET_SVE" + { +- operands[3] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (mode)); +- return "ext\\t%0.b, %0.b, %2.b, #%3"; ++ operands[3] = aarch64_ptrue_reg (mode); + } + ) + +-(define_insn "add3" +- [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, w") +- (plus:SVE_I +- (match_operand:SVE_I 1 "register_operand" "%0, 0, 0, w") +- (match_operand:SVE_I 2 "aarch64_sve_add_operand" "vsa, vsn, vsi, w")))] ++;; Predicated floating-point maximum/minimum. ++(define_insn "@aarch64_pred_" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?&w, ?&w") ++ (unspec:SVE_FULL_F ++ [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl") ++ (match_operand:SI 4 "aarch64_sve_gp_strictness") ++ (match_operand:SVE_FULL_F 2 "register_operand" "%0, 0, w, w") ++ (match_operand:SVE_FULL_F 3 "aarch64_sve_float_maxmin_operand" "vsB, w, vsB, w")] ++ SVE_COND_FP_MAXMIN))] + "TARGET_SVE" + "@ +- add\t%0., %0., #%D2 +- sub\t%0., %0., #%N2 +- * return aarch64_output_sve_inc_dec_immediate (\"%0.\", operands[2]); +- add\t%0., %1., %2." ++ \t%0., %1/m, %0., #%3 ++ \t%0., %1/m, %0., %3. ++ movprfx\t%0, %2\;\t%0., %1/m, %0., #%3 ++ movprfx\t%0, %2\;\t%0., %1/m, %0., %3." ++ [(set_attr "movprfx" "*,*,yes,yes")] + ) + +-(define_insn "sub3" +- [(set (match_operand:SVE_I 0 "register_operand" "=w, w") +- (minus:SVE_I +- (match_operand:SVE_I 1 "aarch64_sve_arith_operand" "w, vsa") +- (match_operand:SVE_I 2 "register_operand" "w, 0")))] ++;; Merging forms are handled through SVE_COND_FP_BINARY and ++;; SVE_COND_FP_BINARY_I1. ++ ++;; ------------------------------------------------------------------------- ++;; ---- [PRED] Binary logical operations ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - AND ++;; - ANDS ++;; - EOR ++;; - EORS ++;; - ORR ++;; - ORRS ++;; ------------------------------------------------------------------------- ++ ++;; Predicate AND. We can reuse one of the inputs as the GP. ++;; Doubling the second operand is the preferred implementation ++;; of the MOV alias, so we use that instead of %1/z, %1, %2. ++(define_insn "and3" ++ [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") ++ (and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand" "Upa") ++ (match_operand:PRED_ALL 2 "register_operand" "Upa")))] + "TARGET_SVE" +- "@ +- sub\t%0., %1., %2. +- subr\t%0., %0., #%D1" ++ "and\t%0.b, %1/z, %2.b, %2.b" + ) + +-;; Unpredicated multiplication. +-(define_expand "mul3" +- [(set (match_operand:SVE_I 0 "register_operand") +- (unspec:SVE_I +- [(match_dup 3) +- (mult:SVE_I +- (match_operand:SVE_I 1 "register_operand") +- (match_operand:SVE_I 2 "aarch64_sve_mul_operand"))] +- UNSPEC_MERGE_PTRUE))] ++;; Unpredicated predicate EOR and ORR. ++(define_expand "3" ++ [(set (match_operand:PRED_ALL 0 "register_operand") ++ (and:PRED_ALL ++ (LOGICAL_OR:PRED_ALL ++ (match_operand:PRED_ALL 1 "register_operand") ++ (match_operand:PRED_ALL 2 "register_operand")) ++ (match_dup 3)))] + "TARGET_SVE" + { +- operands[3] = force_reg (mode, CONSTM1_RTX (mode)); ++ operands[3] = aarch64_ptrue_reg (mode); + } + ) + +-;; Multiplication predicated with a PTRUE. We don't actually need the +-;; predicate for the first alternative, but using Upa or X isn't likely +-;; to gain much and would make the instruction seem less uniform to the +-;; register allocator. +-(define_insn_and_split "*mul3" +- [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w") +- (unspec:SVE_I +- [(match_operand: 1 "register_operand" "Upl, Upl, Upl") +- (mult:SVE_I +- (match_operand:SVE_I 2 "register_operand" "%0, 0, w") +- (match_operand:SVE_I 3 "aarch64_sve_mul_operand" "vsm, w, w"))] +- UNSPEC_MERGE_PTRUE))] ++;; Predicated predicate AND, EOR and ORR. ++(define_insn "@aarch64_pred__z" ++ [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") ++ (and:PRED_ALL ++ (LOGICAL:PRED_ALL ++ (match_operand:PRED_ALL 2 "register_operand" "Upa") ++ (match_operand:PRED_ALL 3 "register_operand" "Upa")) ++ (match_operand:PRED_ALL 1 "register_operand" "Upa")))] + "TARGET_SVE" +- "@ +- # +- mul\t%0., %1/m, %0., %3. +- movprfx\t%0, %2\;mul\t%0., %1/m, %0., %3." +- ; Split the unpredicated form after reload, so that we don't have +- ; the unnecessary PTRUE. +- "&& reload_completed +- && !register_operand (operands[3], mode)" +- [(set (match_dup 0) (mult:SVE_I (match_dup 2) (match_dup 3)))] +- "" +- [(set_attr "movprfx" "*,*,yes")] +-) +- +-;; Unpredicated multiplications by a constant (post-RA only). +-;; These are generated by splitting a predicated instruction whose +-;; predicate is unused. +-(define_insn "*post_ra_mul3" +- [(set (match_operand:SVE_I 0 "register_operand" "=w") +- (mult:SVE_I +- (match_operand:SVE_I 1 "register_operand" "0") +- (match_operand:SVE_I 2 "aarch64_sve_mul_immediate")))] +- "TARGET_SVE && reload_completed" +- "mul\t%0., %0., #%2" ++ "\t%0.b, %1/z, %2.b, %3.b" + ) + +-(define_insn "*madd" +- [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w") +- (plus:SVE_I +- (unspec:SVE_I +- [(match_operand: 1 "register_operand" "Upl, Upl, Upl") +- (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w, w") +- (match_operand:SVE_I 3 "register_operand" "w, w, w"))] +- UNSPEC_MERGE_PTRUE) +- (match_operand:SVE_I 4 "register_operand" "w, 0, w")))] ++;; Perform a logical operation on operands 2 and 3, using operand 1 as ++;; the GP. Store the result in operand 0 and set the flags in the same ++;; way as for PTEST. ++(define_insn "*3_cc" ++ [(set (reg:CC_NZC CC_REGNUM) ++ (unspec:CC_NZC ++ [(match_operand:VNx16BI 1 "register_operand" "Upa") ++ (match_operand 4) ++ (match_operand:SI 5 "aarch64_sve_ptrue_flag") ++ (and:PRED_ALL ++ (LOGICAL:PRED_ALL ++ (match_operand:PRED_ALL 2 "register_operand" "Upa") ++ (match_operand:PRED_ALL 3 "register_operand" "Upa")) ++ (match_dup 4))] ++ UNSPEC_PTEST)) ++ (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") ++ (and:PRED_ALL (LOGICAL:PRED_ALL (match_dup 2) (match_dup 3)) ++ (match_dup 4)))] + "TARGET_SVE" +- "@ +- mad\t%0., %1/m, %3., %4. +- mla\t%0., %1/m, %2., %3. +- movprfx\t%0, %4\;mla\t%0., %1/m, %2., %3." +- [(set_attr "movprfx" "*,*,yes")] ++ "s\t%0.b, %1/z, %2.b, %3.b" + ) + +-(define_insn "*msub3" +- [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w") +- (minus:SVE_I +- (match_operand:SVE_I 4 "register_operand" "w, 0, w") +- (unspec:SVE_I +- [(match_operand: 1 "register_operand" "Upl, Upl, Upl") +- (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w, w") +- (match_operand:SVE_I 3 "register_operand" "w, w, w"))] +- UNSPEC_MERGE_PTRUE)))] ++;; Same with just the flags result. ++(define_insn "*3_ptest" ++ [(set (reg:CC_NZC CC_REGNUM) ++ (unspec:CC_NZC ++ [(match_operand:VNx16BI 1 "register_operand" "Upa") ++ (match_operand 4) ++ (match_operand:SI 5 "aarch64_sve_ptrue_flag") ++ (and:PRED_ALL ++ (LOGICAL:PRED_ALL ++ (match_operand:PRED_ALL 2 "register_operand" "Upa") ++ (match_operand:PRED_ALL 3 "register_operand" "Upa")) ++ (match_dup 4))] ++ UNSPEC_PTEST)) ++ (clobber (match_scratch:VNx16BI 0 "=Upa"))] + "TARGET_SVE" +- "@ +- msb\t%0., %1/m, %3., %4. +- mls\t%0., %1/m, %2., %3. +- movprfx\t%0, %4\;mls\t%0., %1/m, %2., %3." +- [(set_attr "movprfx" "*,*,yes")] ++ "s\t%0.b, %1/z, %2.b, %3.b" + ) + +-;; Unpredicated highpart multiplication. +-(define_expand "mul3_highpart" +- [(set (match_operand:SVE_I 0 "register_operand") +- (unspec:SVE_I +- [(match_dup 3) +- (unspec:SVE_I [(match_operand:SVE_I 1 "register_operand") +- (match_operand:SVE_I 2 "register_operand")] +- MUL_HIGHPART)] +- UNSPEC_MERGE_PTRUE))] ++;; ------------------------------------------------------------------------- ++;; ---- [PRED] Binary logical operations (inverted second input) ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - BIC ++;; - ORN ++;; ------------------------------------------------------------------------- ++ ++;; Predicated predicate BIC and ORN. ++(define_insn "aarch64_pred__z" ++ [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") ++ (and:PRED_ALL ++ (NLOGICAL:PRED_ALL ++ (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa")) ++ (match_operand:PRED_ALL 2 "register_operand" "Upa")) ++ (match_operand:PRED_ALL 1 "register_operand" "Upa")))] + "TARGET_SVE" +- { +- operands[3] = force_reg (mode, CONSTM1_RTX (mode)); +- } +-) ++ "\t%0.b, %1/z, %2.b, %3.b" ++) ++ ++;; Same, but set the flags as a side-effect. ++(define_insn "*3_cc" ++ [(set (reg:CC_NZC CC_REGNUM) ++ (unspec:CC_NZC ++ [(match_operand:VNx16BI 1 "register_operand" "Upa") ++ (match_operand 4) ++ (match_operand:SI 5 "aarch64_sve_ptrue_flag") ++ (and:PRED_ALL ++ (NLOGICAL:PRED_ALL ++ (not:PRED_ALL ++ (match_operand:PRED_ALL 3 "register_operand" "Upa")) ++ (match_operand:PRED_ALL 2 "register_operand" "Upa")) ++ (match_dup 4))] ++ UNSPEC_PTEST)) ++ (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") ++ (and:PRED_ALL (NLOGICAL:PRED_ALL ++ (not:PRED_ALL (match_dup 3)) ++ (match_dup 2)) ++ (match_dup 4)))] ++ "TARGET_SVE" ++ "s\t%0.b, %1/z, %2.b, %3.b" ++) ++ ++;; Same with just the flags result. ++(define_insn "*3_ptest" ++ [(set (reg:CC_NZC CC_REGNUM) ++ (unspec:CC_NZC ++ [(match_operand:VNx16BI 1 "register_operand" "Upa") ++ (match_operand 4) ++ (match_operand:SI 5 "aarch64_sve_ptrue_flag") ++ (and:PRED_ALL ++ (NLOGICAL:PRED_ALL ++ (not:PRED_ALL ++ (match_operand:PRED_ALL 3 "register_operand" "Upa")) ++ (match_operand:PRED_ALL 2 "register_operand" "Upa")) ++ (match_dup 4))] ++ UNSPEC_PTEST)) ++ (clobber (match_scratch:VNx16BI 0 "=Upa"))] ++ "TARGET_SVE" ++ "s\t%0.b, %1/z, %2.b, %3.b" ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- [PRED] Binary logical operations (inverted result) ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - NAND ++;; - NOR ++;; ------------------------------------------------------------------------- + +-;; Predicated highpart multiplication. +-(define_insn "*mul3_highpart" +- [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") +- (unspec:SVE_I +- [(match_operand: 1 "register_operand" "Upl, Upl") +- (unspec:SVE_I [(match_operand:SVE_I 2 "register_operand" "%0, w") +- (match_operand:SVE_I 3 "register_operand" "w, w")] +- MUL_HIGHPART)] +- UNSPEC_MERGE_PTRUE))] ++;; Predicated predicate NAND and NOR. ++(define_insn "aarch64_pred__z" ++ [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") ++ (and:PRED_ALL ++ (NLOGICAL:PRED_ALL ++ (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa")) ++ (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa"))) ++ (match_operand:PRED_ALL 1 "register_operand" "Upa")))] + "TARGET_SVE" +- "@ +- mulh\t%0., %1/m, %0., %3. +- movprfx\t%0, %2\;mulh\t%0., %1/m, %0., %3." +- [(set_attr "movprfx" "*,yes")] ++ "\t%0.b, %1/z, %2.b, %3.b" + ) + +-;; Unpredicated division. +-(define_expand "3" +- [(set (match_operand:SVE_SDI 0 "register_operand") +- (unspec:SVE_SDI +- [(match_dup 3) +- (SVE_INT_BINARY_SD:SVE_SDI +- (match_operand:SVE_SDI 1 "register_operand") +- (match_operand:SVE_SDI 2 "register_operand"))] +- UNSPEC_MERGE_PTRUE))] ++;; Same, but set the flags as a side-effect. ++(define_insn "*3_cc" ++ [(set (reg:CC_NZC CC_REGNUM) ++ (unspec:CC_NZC ++ [(match_operand:VNx16BI 1 "register_operand" "Upa") ++ (match_operand 4) ++ (match_operand:SI 5 "aarch64_sve_ptrue_flag") ++ (and:PRED_ALL ++ (NLOGICAL:PRED_ALL ++ (not:PRED_ALL ++ (match_operand:PRED_ALL 2 "register_operand" "Upa")) ++ (not:PRED_ALL ++ (match_operand:PRED_ALL 3 "register_operand" "Upa"))) ++ (match_dup 4))] ++ UNSPEC_PTEST)) ++ (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") ++ (and:PRED_ALL (NLOGICAL:PRED_ALL ++ (not:PRED_ALL (match_dup 2)) ++ (not:PRED_ALL (match_dup 3))) ++ (match_dup 4)))] ++ "TARGET_SVE" ++ "s\t%0.b, %1/z, %2.b, %3.b" ++) ++ ++;; Same with just the flags result. ++(define_insn "*3_ptest" ++ [(set (reg:CC_NZC CC_REGNUM) ++ (unspec:CC_NZC ++ [(match_operand:VNx16BI 1 "register_operand" "Upa") ++ (match_operand 4) ++ (match_operand:SI 5 "aarch64_sve_ptrue_flag") ++ (and:PRED_ALL ++ (NLOGICAL:PRED_ALL ++ (not:PRED_ALL ++ (match_operand:PRED_ALL 2 "register_operand" "Upa")) ++ (not:PRED_ALL ++ (match_operand:PRED_ALL 3 "register_operand" "Upa"))) ++ (match_dup 4))] ++ UNSPEC_PTEST)) ++ (clobber (match_scratch:VNx16BI 0 "=Upa"))] ++ "TARGET_SVE" ++ "s\t%0.b, %1/z, %2.b, %3.b" ++) ++ ++;; ========================================================================= ++;; == Ternary arithmetic ++;; ========================================================================= ++ ++;; ------------------------------------------------------------------------- ++;; ---- [INT] MLA and MAD ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - MAD ++;; - MLA ++;; ------------------------------------------------------------------------- ++ ++;; Unpredicated integer addition of product. ++(define_expand "fma4" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand") ++ (plus:SVE_FULL_I ++ (unspec:SVE_FULL_I ++ [(match_dup 4) ++ (mult:SVE_FULL_I ++ (match_operand:SVE_FULL_I 1 "register_operand") ++ (match_operand:SVE_FULL_I 2 "nonmemory_operand"))] ++ UNSPEC_PRED_X) ++ (match_operand:SVE_FULL_I 3 "register_operand")))] + "TARGET_SVE" + { +- operands[3] = force_reg (mode, CONSTM1_RTX (mode)); ++ if (aarch64_prepare_sve_int_fma (operands, PLUS)) ++ DONE; ++ operands[4] = aarch64_ptrue_reg (mode); + } + ) + +-;; Division predicated with a PTRUE. +-(define_insn "*3" +- [(set (match_operand:SVE_SDI 0 "register_operand" "=w, w, ?&w") +- (unspec:SVE_SDI +- [(match_operand: 1 "register_operand" "Upl, Upl, Upl") +- (SVE_INT_BINARY_SD:SVE_SDI +- (match_operand:SVE_SDI 2 "register_operand" "0, w, w") +- (match_operand:SVE_SDI 3 "aarch64_sve_mul_operand" "w, 0, w"))] +- UNSPEC_MERGE_PTRUE))] ++;; Predicated integer addition of product. ++(define_insn "@aarch64_pred_fma" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w") ++ (plus:SVE_FULL_I ++ (unspec:SVE_FULL_I ++ [(match_operand: 1 "register_operand" "Upl, Upl, Upl") ++ (mult:SVE_FULL_I ++ (match_operand:SVE_FULL_I 2 "register_operand" "%0, w, w") ++ (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w"))] ++ UNSPEC_PRED_X) ++ (match_operand:SVE_FULL_I 4 "register_operand" "w, 0, w")))] + "TARGET_SVE" + "@ +- \t%0., %1/m, %0., %3. +- r\t%0., %1/m, %0., %2. +- movprfx\t%0, %2\;\t%0., %1/m, %0., %3." ++ mad\t%0., %1/m, %3., %4. ++ mla\t%0., %1/m, %2., %3. ++ movprfx\t%0, %4\;mla\t%0., %1/m, %2., %3." + [(set_attr "movprfx" "*,*,yes")] + ) + +-;; Unpredicated NEG, NOT and POPCOUNT. +-(define_expand "2" +- [(set (match_operand:SVE_I 0 "register_operand") +- (unspec:SVE_I +- [(match_dup 2) +- (SVE_INT_UNARY:SVE_I (match_operand:SVE_I 1 "register_operand"))] +- UNSPEC_MERGE_PTRUE))] ++;; Predicated integer addition of product with merging. ++(define_expand "cond_fma" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand") ++ (unspec:SVE_FULL_I ++ [(match_operand: 1 "register_operand") ++ (plus:SVE_FULL_I ++ (mult:SVE_FULL_I ++ (match_operand:SVE_FULL_I 2 "register_operand") ++ (match_operand:SVE_FULL_I 3 "general_operand")) ++ (match_operand:SVE_FULL_I 4 "register_operand")) ++ (match_operand:SVE_FULL_I 5 "aarch64_simd_reg_or_zero")] ++ UNSPEC_SEL))] + "TARGET_SVE" + { +- operands[2] = force_reg (mode, CONSTM1_RTX (mode)); ++ if (aarch64_prepare_sve_cond_int_fma (operands, PLUS)) ++ DONE; ++ /* Swap the multiplication operands if the fallback value is the ++ second of the two. */ ++ if (rtx_equal_p (operands[3], operands[5])) ++ std::swap (operands[2], operands[3]); + } + ) + +-;; NEG, NOT and POPCOUNT predicated with a PTRUE. +-(define_insn "*2" +- [(set (match_operand:SVE_I 0 "register_operand" "=w") +- (unspec:SVE_I +- [(match_operand: 1 "register_operand" "Upl") +- (SVE_INT_UNARY:SVE_I +- (match_operand:SVE_I 2 "register_operand" "w"))] +- UNSPEC_MERGE_PTRUE))] ++;; Predicated integer addition of product, merging with the first input. ++(define_insn "*cond_fma_2" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") ++ (unspec:SVE_FULL_I ++ [(match_operand: 1 "register_operand" "Upl, Upl") ++ (plus:SVE_FULL_I ++ (mult:SVE_FULL_I ++ (match_operand:SVE_FULL_I 2 "register_operand" "0, w") ++ (match_operand:SVE_FULL_I 3 "register_operand" "w, w")) ++ (match_operand:SVE_FULL_I 4 "register_operand" "w, w")) ++ (match_dup 2)] ++ UNSPEC_SEL))] + "TARGET_SVE" +- "\t%0., %1/m, %2." ++ "@ ++ mad\t%0., %1/m, %3., %4. ++ movprfx\t%0, %2\;mad\t%0., %1/m, %3., %4." ++ [(set_attr "movprfx" "*,yes")] + ) + +-;; Vector AND, ORR and XOR. +-(define_insn "3" +- [(set (match_operand:SVE_I 0 "register_operand" "=w, w") +- (LOGICAL:SVE_I +- (match_operand:SVE_I 1 "register_operand" "%0, w") +- (match_operand:SVE_I 2 "aarch64_sve_logical_operand" "vsl, w")))] ++;; Predicated integer addition of product, merging with the third input. ++(define_insn "*cond_fma_4" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") ++ (unspec:SVE_FULL_I ++ [(match_operand: 1 "register_operand" "Upl, Upl") ++ (plus:SVE_FULL_I ++ (mult:SVE_FULL_I ++ (match_operand:SVE_FULL_I 2 "register_operand" "w, w") ++ (match_operand:SVE_FULL_I 3 "register_operand" "w, w")) ++ (match_operand:SVE_FULL_I 4 "register_operand" "0, w")) ++ (match_dup 4)] ++ UNSPEC_SEL))] + "TARGET_SVE" + "@ +- \t%0., %0., #%C2 +- \t%0.d, %1.d, %2.d" ++ mla\t%0., %1/m, %2., %3. ++ movprfx\t%0, %4\;mla\t%0., %1/m, %2., %3." ++ [(set_attr "movprfx" "*,yes")] + ) + +-;; Vector AND, ORR and XOR on floating-point modes. We avoid subregs +-;; by providing this, but we need to use UNSPECs since rtx logical ops +-;; aren't defined for floating-point modes. +-(define_insn "*3" +- [(set (match_operand:SVE_F 0 "register_operand" "=w") +- (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand" "w") +- (match_operand:SVE_F 2 "register_operand" "w")] +- LOGICALF))] +- "TARGET_SVE" +- "\t%0.d, %1.d, %2.d" ++;; Predicated integer addition of product, merging with an independent value. ++(define_insn_and_rewrite "*cond_fma_any" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w") ++ (unspec:SVE_FULL_I ++ [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") ++ (plus:SVE_FULL_I ++ (mult:SVE_FULL_I ++ (match_operand:SVE_FULL_I 2 "register_operand" "w, w, 0, w, w, w") ++ (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w, 0, w, w")) ++ (match_operand:SVE_FULL_I 4 "register_operand" "w, 0, w, w, w, w")) ++ (match_operand:SVE_FULL_I 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")] ++ UNSPEC_SEL))] ++ "TARGET_SVE ++ && !rtx_equal_p (operands[2], operands[5]) ++ && !rtx_equal_p (operands[3], operands[5]) ++ && !rtx_equal_p (operands[4], operands[5])" ++ "@ ++ movprfx\t%0., %1/z, %4.\;mla\t%0., %1/m, %2., %3. ++ movprfx\t%0., %1/z, %0.\;mla\t%0., %1/m, %2., %3. ++ movprfx\t%0., %1/z, %0.\;mad\t%0., %1/m, %3., %4. ++ movprfx\t%0., %1/z, %0.\;mad\t%0., %1/m, %2., %4. ++ movprfx\t%0., %1/m, %4.\;mla\t%0., %1/m, %2., %3. ++ #" ++ "&& reload_completed ++ && register_operand (operands[5], mode) ++ && !rtx_equal_p (operands[0], operands[5])" ++ { ++ emit_insn (gen_vcond_mask_ (operands[0], operands[4], ++ operands[5], operands[1])); ++ operands[5] = operands[4] = operands[0]; ++ } ++ [(set_attr "movprfx" "yes")] + ) + +-;; REG_EQUAL notes on "not3" should ensure that we can generate +-;; this pattern even though the NOT instruction itself is predicated. +-(define_insn "bic3" +- [(set (match_operand:SVE_I 0 "register_operand" "=w") +- (and:SVE_I +- (not:SVE_I (match_operand:SVE_I 1 "register_operand" "w")) +- (match_operand:SVE_I 2 "register_operand" "w")))] ++;; ------------------------------------------------------------------------- ++;; ---- [INT] MLS and MSB ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - MLS ++;; - MSB ++;; ------------------------------------------------------------------------- ++ ++;; Unpredicated integer subtraction of product. ++(define_expand "fnma4" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand") ++ (minus:SVE_FULL_I ++ (match_operand:SVE_FULL_I 3 "register_operand") ++ (unspec:SVE_FULL_I ++ [(match_dup 4) ++ (mult:SVE_FULL_I ++ (match_operand:SVE_FULL_I 1 "register_operand") ++ (match_operand:SVE_FULL_I 2 "general_operand"))] ++ UNSPEC_PRED_X)))] + "TARGET_SVE" +- "bic\t%0.d, %2.d, %1.d" ++ { ++ if (aarch64_prepare_sve_int_fma (operands, MINUS)) ++ DONE; ++ operands[4] = aarch64_ptrue_reg (mode); ++ } + ) + +-;; Predicate AND. We can reuse one of the inputs as the GP. +-(define_insn "and3" +- [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") +- (and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand" "Upa") +- (match_operand:PRED_ALL 2 "register_operand" "Upa")))] ++;; Predicated integer subtraction of product. ++(define_insn "@aarch64_pred_fnma" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w") ++ (minus:SVE_FULL_I ++ (match_operand:SVE_FULL_I 4 "register_operand" "w, 0, w") ++ (unspec:SVE_FULL_I ++ [(match_operand: 1 "register_operand" "Upl, Upl, Upl") ++ (mult:SVE_FULL_I ++ (match_operand:SVE_FULL_I 2 "register_operand" "%0, w, w") ++ (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w"))] ++ UNSPEC_PRED_X)))] + "TARGET_SVE" +- "and\t%0.b, %1/z, %1.b, %2.b" ++ "@ ++ msb\t%0., %1/m, %3., %4. ++ mls\t%0., %1/m, %2., %3. ++ movprfx\t%0, %4\;mls\t%0., %1/m, %2., %3." ++ [(set_attr "movprfx" "*,*,yes")] + ) + +-;; Unpredicated predicate ORR and XOR. +-(define_expand "3" +- [(set (match_operand:PRED_ALL 0 "register_operand") +- (and:PRED_ALL +- (LOGICAL_OR:PRED_ALL +- (match_operand:PRED_ALL 1 "register_operand") +- (match_operand:PRED_ALL 2 "register_operand")) +- (match_dup 3)))] ++;; Predicated integer subtraction of product with merging. ++(define_expand "cond_fnma" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand") ++ (unspec:SVE_FULL_I ++ [(match_operand: 1 "register_operand") ++ (minus:SVE_FULL_I ++ (match_operand:SVE_FULL_I 4 "register_operand") ++ (mult:SVE_FULL_I ++ (match_operand:SVE_FULL_I 2 "register_operand") ++ (match_operand:SVE_FULL_I 3 "general_operand"))) ++ (match_operand:SVE_FULL_I 5 "aarch64_simd_reg_or_zero")] ++ UNSPEC_SEL))] + "TARGET_SVE" + { +- operands[3] = force_reg (mode, CONSTM1_RTX (mode)); ++ if (aarch64_prepare_sve_cond_int_fma (operands, MINUS)) ++ DONE; ++ /* Swap the multiplication operands if the fallback value is the ++ second of the two. */ ++ if (rtx_equal_p (operands[3], operands[5])) ++ std::swap (operands[2], operands[3]); + } + ) + +-;; Predicated predicate ORR and XOR. +-(define_insn "pred_3" +- [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") +- (and:PRED_ALL +- (LOGICAL:PRED_ALL +- (match_operand:PRED_ALL 2 "register_operand" "Upa") +- (match_operand:PRED_ALL 3 "register_operand" "Upa")) +- (match_operand:PRED_ALL 1 "register_operand" "Upa")))] ++;; Predicated integer subtraction of product, merging with the first input. ++(define_insn "*cond_fnma_2" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") ++ (unspec:SVE_FULL_I ++ [(match_operand: 1 "register_operand" "Upl, Upl") ++ (minus:SVE_FULL_I ++ (match_operand:SVE_FULL_I 4 "register_operand" "w, w") ++ (mult:SVE_FULL_I ++ (match_operand:SVE_FULL_I 2 "register_operand" "0, w") ++ (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))) ++ (match_dup 2)] ++ UNSPEC_SEL))] + "TARGET_SVE" +- "\t%0.b, %1/z, %2.b, %3.b" ++ "@ ++ msb\t%0., %1/m, %3., %4. ++ movprfx\t%0, %2\;msb\t%0., %1/m, %3., %4." ++ [(set_attr "movprfx" "*,yes")] + ) + +-;; Perform a logical operation on operands 2 and 3, using operand 1 as +-;; the GP (which is known to be a PTRUE). Store the result in operand 0 +-;; and set the flags in the same way as for PTEST. The (and ...) in the +-;; UNSPEC_PTEST_PTRUE is logically redundant, but means that the tested +-;; value is structurally equivalent to rhs of the second set. +-(define_insn "*3_cc" +- [(set (reg:CC CC_REGNUM) +- (compare:CC +- (unspec:SI [(match_operand:PRED_ALL 1 "register_operand" "Upa") +- (and:PRED_ALL +- (LOGICAL:PRED_ALL +- (match_operand:PRED_ALL 2 "register_operand" "Upa") +- (match_operand:PRED_ALL 3 "register_operand" "Upa")) +- (match_dup 1))] +- UNSPEC_PTEST_PTRUE) +- (const_int 0))) +- (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") +- (and:PRED_ALL (LOGICAL:PRED_ALL (match_dup 2) (match_dup 3)) +- (match_dup 1)))] ++;; Predicated integer subtraction of product, merging with the third input. ++(define_insn "*cond_fnma_4" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") ++ (unspec:SVE_FULL_I ++ [(match_operand: 1 "register_operand" "Upl, Upl") ++ (minus:SVE_FULL_I ++ (match_operand:SVE_FULL_I 4 "register_operand" "0, w") ++ (mult:SVE_FULL_I ++ (match_operand:SVE_FULL_I 2 "register_operand" "w, w") ++ (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))) ++ (match_dup 4)] ++ UNSPEC_SEL))] + "TARGET_SVE" +- "s\t%0.b, %1/z, %2.b, %3.b" ++ "@ ++ mls\t%0., %1/m, %2., %3. ++ movprfx\t%0, %4\;mls\t%0., %1/m, %2., %3." ++ [(set_attr "movprfx" "*,yes")] + ) + +-;; Unpredicated predicate inverse. +-(define_expand "one_cmpl2" +- [(set (match_operand:PRED_ALL 0 "register_operand") +- (and:PRED_ALL +- (not:PRED_ALL (match_operand:PRED_ALL 1 "register_operand")) +- (match_dup 2)))] +- "TARGET_SVE" ++;; Predicated integer subtraction of product, merging with an ++;; independent value. ++(define_insn_and_rewrite "*cond_fnma_any" ++ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w") ++ (unspec:SVE_FULL_I ++ [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") ++ (minus:SVE_FULL_I ++ (match_operand:SVE_FULL_I 4 "register_operand" "w, 0, w, w, w, w") ++ (mult:SVE_FULL_I ++ (match_operand:SVE_FULL_I 2 "register_operand" "w, w, 0, w, w, w") ++ (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w, 0, w, w"))) ++ (match_operand:SVE_FULL_I 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")] ++ UNSPEC_SEL))] ++ "TARGET_SVE ++ && !rtx_equal_p (operands[2], operands[5]) ++ && !rtx_equal_p (operands[3], operands[5]) ++ && !rtx_equal_p (operands[4], operands[5])" ++ "@ ++ movprfx\t%0., %1/z, %4.\;mls\t%0., %1/m, %2., %3. ++ movprfx\t%0., %1/z, %0.\;mls\t%0., %1/m, %2., %3. ++ movprfx\t%0., %1/z, %0.\;msb\t%0., %1/m, %3., %4. ++ movprfx\t%0., %1/z, %0.\;msb\t%0., %1/m, %2., %4. ++ movprfx\t%0., %1/m, %4.\;mls\t%0., %1/m, %2., %3. ++ #" ++ "&& reload_completed ++ && register_operand (operands[5], mode) ++ && !rtx_equal_p (operands[0], operands[5])" + { +- operands[2] = force_reg (mode, CONSTM1_RTX (mode)); ++ emit_insn (gen_vcond_mask_ (operands[0], operands[4], ++ operands[5], operands[1])); ++ operands[5] = operands[4] = operands[0]; + } ++ [(set_attr "movprfx" "yes")] + ) + +-;; Predicated predicate inverse. +-(define_insn "*one_cmpl3" +- [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") +- (and:PRED_ALL +- (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa")) +- (match_operand:PRED_ALL 1 "register_operand" "Upa")))] +- "TARGET_SVE" +- "not\t%0.b, %1/z, %2.b" +-) +- +-;; Predicated predicate BIC and ORN. +-(define_insn "*3" +- [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") +- (and:PRED_ALL +- (NLOGICAL:PRED_ALL +- (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa")) +- (match_operand:PRED_ALL 3 "register_operand" "Upa")) +- (match_operand:PRED_ALL 1 "register_operand" "Upa")))] +- "TARGET_SVE" +- "\t%0.b, %1/z, %3.b, %2.b" +-) ++;; ------------------------------------------------------------------------- ++;; ---- [INT] Dot product ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - SDOT ++;; - SUDOT (I8MM) ++;; - UDOT ++;; - USDOT (I8MM) ++;; ------------------------------------------------------------------------- + +-;; Predicated predicate NAND and NOR. +-(define_insn "*3" +- [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") +- (and:PRED_ALL +- (NLOGICAL:PRED_ALL +- (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa")) +- (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa"))) +- (match_operand:PRED_ALL 1 "register_operand" "Upa")))] ++;; Four-element integer dot-product with accumulation. ++(define_insn "dot_prod" ++ [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w") ++ (plus:SVE_FULL_SDI ++ (unspec:SVE_FULL_SDI ++ [(match_operand: 1 "register_operand" "w, w") ++ (match_operand: 2 "register_operand" "w, w")] ++ DOTPROD) ++ (match_operand:SVE_FULL_SDI 3 "register_operand" "0, w")))] + "TARGET_SVE" +- "\t%0.b, %1/z, %2.b, %3.b" ++ "@ ++ dot\\t%0., %1., %2. ++ movprfx\t%0, %3\;dot\\t%0., %1., %2." ++ [(set_attr "movprfx" "*,yes")] + ) + +-;; Unpredicated LSL, LSR and ASR by a vector. +-(define_expand "v3" +- [(set (match_operand:SVE_I 0 "register_operand") +- (unspec:SVE_I +- [(match_dup 3) +- (ASHIFT:SVE_I +- (match_operand:SVE_I 1 "register_operand") +- (match_operand:SVE_I 2 "aarch64_sve_shift_operand"))] +- UNSPEC_MERGE_PTRUE))] ++;; Four-element integer dot-product by selected lanes with accumulation. ++(define_insn "@aarch64_dot_prod_lane" ++ [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w") ++ (plus:SVE_FULL_SDI ++ (unspec:SVE_FULL_SDI ++ [(match_operand: 1 "register_operand" "w, w") ++ (unspec: ++ [(match_operand: 2 "register_operand" ", ") ++ (match_operand:SI 3 "const_int_operand")] ++ UNSPEC_SVE_LANE_SELECT)] ++ DOTPROD) ++ (match_operand:SVE_FULL_SDI 4 "register_operand" "0, w")))] + "TARGET_SVE" +- { +- operands[3] = force_reg (mode, CONSTM1_RTX (mode)); +- } ++ "@ ++ dot\\t%0., %1., %2.[%3] ++ movprfx\t%0, %4\;dot\\t%0., %1., %2.[%3]" ++ [(set_attr "movprfx" "*,yes")] + ) + +-;; LSL, LSR and ASR by a vector, predicated with a PTRUE. We don't +-;; actually need the predicate for the first alternative, but using Upa +-;; or X isn't likely to gain much and would make the instruction seem +-;; less uniform to the register allocator. +-(define_insn_and_split "*v3" +- [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w") +- (unspec:SVE_I +- [(match_operand: 1 "register_operand" "Upl, Upl, Upl") +- (ASHIFT:SVE_I +- (match_operand:SVE_I 2 "register_operand" "w, 0, w") +- (match_operand:SVE_I 3 "aarch64_sve_shift_operand" "D, w, w"))] +- UNSPEC_MERGE_PTRUE))] +- "TARGET_SVE" ++(define_insn "@aarch64_dot_prod" ++ [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w") ++ (plus:VNx4SI_ONLY ++ (unspec:VNx4SI_ONLY ++ [(match_operand: 1 "register_operand" "w, w") ++ (match_operand: 2 "register_operand" "w, w")] ++ DOTPROD_US_ONLY) ++ (match_operand:VNx4SI_ONLY 3 "register_operand" "0, w")))] ++ "TARGET_SVE_I8MM" + "@ +- # +- \t%0., %1/m, %0., %3. +- movprfx\t%0, %2\;\t%0., %1/m, %0., %3." +- "&& reload_completed +- && !register_operand (operands[3], mode)" +- [(set (match_dup 0) (ASHIFT:SVE_I (match_dup 2) (match_dup 3)))] +- "" +- [(set_attr "movprfx" "*,*,yes")] ++ dot\\t%0.s, %1.b, %2.b ++ movprfx\t%0, %3\;dot\\t%0.s, %1.b, %2.b" ++ [(set_attr "movprfx" "*,yes")] + ) + +-;; Unpredicated shift operations by a constant (post-RA only). +-;; These are generated by splitting a predicated instruction whose +-;; predicate is unused. +-(define_insn "*post_ra_v3" +- [(set (match_operand:SVE_I 0 "register_operand" "=w") +- (ASHIFT:SVE_I +- (match_operand:SVE_I 1 "register_operand" "w") +- (match_operand:SVE_I 2 "aarch64_simd_shift_imm")))] +- "TARGET_SVE && reload_completed" +- "\t%0., %1., #%2" ++(define_insn "@aarch64_dot_prod_lane" ++ [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w") ++ (plus:VNx4SI_ONLY ++ (unspec:VNx4SI_ONLY ++ [(match_operand: 1 "register_operand" "w, w") ++ (unspec: ++ [(match_operand: 2 "register_operand" "y, y") ++ (match_operand:SI 3 "const_int_operand")] ++ UNSPEC_SVE_LANE_SELECT)] ++ DOTPROD_I8MM) ++ (match_operand:VNx4SI_ONLY 4 "register_operand" "0, w")))] ++ "TARGET_SVE_I8MM" ++ "@ ++ dot\\t%0.s, %1.b, %2.b[%3] ++ movprfx\t%0, %4\;dot\\t%0.s, %1.b, %2.b[%3]" ++ [(set_attr "movprfx" "*,yes")] + ) + +-;; LSL, LSR and ASR by a scalar, which expands into one of the vector +-;; shifts above. +-(define_expand "3" +- [(set (match_operand:SVE_I 0 "register_operand") +- (ASHIFT:SVE_I (match_operand:SVE_I 1 "register_operand") +- (match_operand: 2 "general_operand")))] ++;; ------------------------------------------------------------------------- ++;; ---- [INT] Sum of absolute differences ++;; ------------------------------------------------------------------------- ++;; The patterns in this section are synthetic. ++;; ------------------------------------------------------------------------- ++ ++;; Emit a sequence to produce a sum-of-absolute-differences of the inputs in ++;; operands 1 and 2. The sequence also has to perform a widening reduction of ++;; the difference into a vector and accumulate that into operand 3 before ++;; copying that into the result operand 0. ++;; Perform that with a sequence of: ++;; MOV ones.b, #1 ++;; [SU]ABD diff.b, p0/m, op1.b, op2.b ++;; MOVPRFX op0, op3 // If necessary ++;; UDOT op0.s, diff.b, ones.b ++(define_expand "sad" ++ [(use (match_operand:SVE_FULL_SDI 0 "register_operand")) ++ (unspec: [(use (match_operand: 1 "register_operand")) ++ (use (match_operand: 2 "register_operand"))] ABAL) ++ (use (match_operand:SVE_FULL_SDI 3 "register_operand"))] + "TARGET_SVE" + { +- rtx amount; +- if (CONST_INT_P (operands[2])) +- { +- amount = gen_const_vec_duplicate (mode, operands[2]); +- if (!aarch64_sve_shift_operand (operands[2], mode)) +- amount = force_reg (mode, amount); +- } +- else +- { +- amount = gen_reg_rtx (mode); +- emit_insn (gen_vec_duplicate (amount, +- convert_to_mode (mode, +- operands[2], 0))); +- } +- emit_insn (gen_v3 (operands[0], operands[1], amount)); ++ rtx ones = force_reg (mode, CONST1_RTX (mode)); ++ rtx diff = gen_reg_rtx (mode); ++ emit_insn (gen_abd_3 (diff, operands[1], operands[2])); ++ emit_insn (gen_udot_prod (operands[0], diff, ones, operands[3])); + DONE; + } + ) + +-;; Test all bits of operand 1. Operand 0 is a GP that is known to hold PTRUE. +-;; +-;; Using UNSPEC_PTEST_PTRUE allows combine patterns to assume that the GP +-;; is a PTRUE even if the optimizers haven't yet been able to propagate +-;; the constant. We would use a separate unspec code for PTESTs involving +-;; GPs that might not be PTRUEs. +-(define_insn "ptest_ptrue" +- [(set (reg:CC CC_REGNUM) +- (compare:CC +- (unspec:SI [(match_operand:PRED_ALL 0 "register_operand" "Upa") +- (match_operand:PRED_ALL 1 "register_operand" "Upa")] +- UNSPEC_PTEST_PTRUE) +- (const_int 0)))] +- "TARGET_SVE" +- "ptest\t%0, %1.b" ++;; ------------------------------------------------------------------------- ++;; ---- [INT] Matrix multiply-accumulate ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - SMMLA (I8MM) ++;; - UMMLA (I8MM) ++;; - USMMLA (I8MM) ++;; ------------------------------------------------------------------------- ++ ++(define_insn "@aarch64_sve_add_" ++ [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w") ++ (plus:VNx4SI_ONLY ++ (unspec:VNx4SI_ONLY ++ [(match_operand: 2 "register_operand" "w, w") ++ (match_operand: 3 "register_operand" "w, w")] ++ MATMUL) ++ (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")))] ++ "TARGET_SVE_I8MM" ++ "@ ++ mmla\\t%0.s, %2.b, %3.b ++ movprfx\t%0, %1\;mmla\\t%0.s, %2.b, %3.b" ++ [(set_attr "movprfx" "*,yes")] + ) + +-;; Set element I of the result if operand1 + J < operand2 for all J in [0, I]. +-;; with the comparison being unsigned. +-(define_insn "while_ult" +- [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") +- (unspec:PRED_ALL [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ") +- (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")] +- UNSPEC_WHILE_LO)) +- (clobber (reg:CC CC_REGNUM))] +- "TARGET_SVE" +- "whilelo\t%0., %1, %2" +-) +- +-;; WHILELO sets the flags in the same way as a PTEST with a PTRUE GP. +-;; Handle the case in which both results are useful. The GP operand +-;; to the PTEST isn't needed, so we allow it to be anything. +-(define_insn_and_split "while_ult_cc" +- [(set (reg:CC CC_REGNUM) +- (compare:CC +- (unspec:SI [(match_operand:PRED_ALL 1) +- (unspec:PRED_ALL +- [(match_operand:GPI 2 "aarch64_reg_or_zero" "rZ") +- (match_operand:GPI 3 "aarch64_reg_or_zero" "rZ")] +- UNSPEC_WHILE_LO)] +- UNSPEC_PTEST_PTRUE) +- (const_int 0))) +- (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") +- (unspec:PRED_ALL [(match_dup 2) +- (match_dup 3)] +- UNSPEC_WHILE_LO))] ++;; ------------------------------------------------------------------------- ++;; ---- [FP] General ternary arithmetic corresponding to unspecs ++;; ------------------------------------------------------------------------- ++;; Includes merging patterns for: ++;; - FMAD ++;; - FMLA ++;; - FMLS ++;; - FMSB ++;; - FNMAD ++;; - FNMLA ++;; - FNMLS ++;; - FNMSB ++;; ------------------------------------------------------------------------- ++ ++;; Unpredicated floating-point ternary operations. ++(define_expand "4" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand") ++ (unspec:SVE_FULL_F ++ [(match_dup 4) ++ (const_int SVE_RELAXED_GP) ++ (match_operand:SVE_FULL_F 1 "register_operand") ++ (match_operand:SVE_FULL_F 2 "register_operand") ++ (match_operand:SVE_FULL_F 3 "register_operand")] ++ SVE_COND_FP_TERNARY))] + "TARGET_SVE" +- "whilelo\t%0., %2, %3" +- ;; Force the compiler to drop the unused predicate operand, so that we +- ;; don't have an unnecessary PTRUE. +- "&& !CONSTANT_P (operands[1])" +- [(const_int 0)] + { +- emit_insn (gen_while_ult_cc +- (operands[0], CONSTM1_RTX (mode), +- operands[2], operands[3])); +- DONE; ++ operands[4] = aarch64_ptrue_reg (mode); + } + ) + +-;; Integer comparisons predicated with a PTRUE. +-(define_insn "*cmp" +- [(set (match_operand: 0 "register_operand" "=Upa, Upa") +- (unspec: +- [(match_operand: 1 "register_operand" "Upl, Upl") +- (SVE_INT_CMP: +- (match_operand:SVE_I 2 "register_operand" "w, w") +- (match_operand:SVE_I 3 "aarch64_sve_cmp__operand" ", w"))] +- UNSPEC_MERGE_PTRUE)) +- (clobber (reg:CC CC_REGNUM))] ++;; Predicated floating-point ternary operations. ++(define_insn "@aarch64_pred_" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?&w") ++ (unspec:SVE_FULL_F ++ [(match_operand: 1 "register_operand" "Upl, Upl, Upl") ++ (match_operand:SI 5 "aarch64_sve_gp_strictness") ++ (match_operand:SVE_FULL_F 2 "register_operand" "%w, 0, w") ++ (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w") ++ (match_operand:SVE_FULL_F 4 "register_operand" "0, w, w")] ++ SVE_COND_FP_TERNARY))] + "TARGET_SVE" + "@ +- cmp\t%0., %1/z, %2., #%3 +- cmp\t%0., %1/z, %2., %3." ++ \t%0., %1/m, %2., %3. ++ \t%0., %1/m, %3., %4. ++ movprfx\t%0, %4\;\t%0., %1/m, %2., %3." ++ [(set_attr "movprfx" "*,*,yes")] + ) + +-;; Integer comparisons predicated with a PTRUE in which only the flags result +-;; is interesting. +-(define_insn "*cmp_ptest" +- [(set (reg:CC CC_REGNUM) +- (compare:CC +- (unspec:SI +- [(match_operand: 1 "register_operand" "Upl, Upl") +- (unspec: +- [(match_dup 1) +- (SVE_INT_CMP: +- (match_operand:SVE_I 2 "register_operand" "w, w") +- (match_operand:SVE_I 3 "aarch64_sve_cmp__operand" ", w"))] +- UNSPEC_MERGE_PTRUE)] +- UNSPEC_PTEST_PTRUE) +- (const_int 0))) +- (clobber (match_scratch: 0 "=Upa, Upa"))] ++;; Predicated floating-point ternary operations with merging. ++(define_expand "@cond_" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand") ++ (unspec:SVE_FULL_F ++ [(match_operand: 1 "register_operand") ++ (unspec:SVE_FULL_F ++ [(match_dup 1) ++ (const_int SVE_STRICT_GP) ++ (match_operand:SVE_FULL_F 2 "register_operand") ++ (match_operand:SVE_FULL_F 3 "register_operand") ++ (match_operand:SVE_FULL_F 4 "register_operand")] ++ SVE_COND_FP_TERNARY) ++ (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")] ++ UNSPEC_SEL))] + "TARGET_SVE" ++{ ++ /* Swap the multiplication operands if the fallback value is the ++ second of the two. */ ++ if (rtx_equal_p (operands[3], operands[5])) ++ std::swap (operands[2], operands[3]); ++}) ++ ++;; Predicated floating-point ternary operations, merging with the ++;; first input. ++(define_insn_and_rewrite "*cond__2" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") ++ (unspec:SVE_FULL_F ++ [(match_operand: 1 "register_operand" "Upl, Upl") ++ (unspec:SVE_FULL_F ++ [(match_operand 5) ++ (match_operand:SI 6 "aarch64_sve_gp_strictness") ++ (match_operand:SVE_FULL_F 2 "register_operand" "0, w") ++ (match_operand:SVE_FULL_F 3 "register_operand" "w, w") ++ (match_operand:SVE_FULL_F 4 "register_operand" "w, w")] ++ SVE_COND_FP_TERNARY) ++ (match_dup 2)] ++ UNSPEC_SEL))] ++ "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])" + "@ +- cmp\t%0., %1/z, %2., #%3 +- cmp\t%0., %1/z, %2., %3." ++ \t%0., %1/m, %3., %4. ++ movprfx\t%0, %2\;\t%0., %1/m, %3., %4." ++ "&& !rtx_equal_p (operands[1], operands[5])" ++ { ++ operands[5] = copy_rtx (operands[1]); ++ } ++ [(set_attr "movprfx" "*,yes")] + ) + +-;; Integer comparisons predicated with a PTRUE in which both the flag and +-;; predicate results are interesting. +-(define_insn "*cmp_cc" +- [(set (reg:CC CC_REGNUM) +- (compare:CC +- (unspec:SI +- [(match_operand: 1 "register_operand" "Upl, Upl") +- (unspec: +- [(match_dup 1) +- (SVE_INT_CMP: +- (match_operand:SVE_I 2 "register_operand" "w, w") +- (match_operand:SVE_I 3 "aarch64_sve_cmp__operand" ", w"))] +- UNSPEC_MERGE_PTRUE)] +- UNSPEC_PTEST_PTRUE) +- (const_int 0))) +- (set (match_operand: 0 "register_operand" "=Upa, Upa") +- (unspec: +- [(match_dup 1) +- (SVE_INT_CMP: +- (match_dup 2) +- (match_dup 3))] +- UNSPEC_MERGE_PTRUE))] +- "TARGET_SVE" ++;; Predicated floating-point ternary operations, merging with the ++;; third input. ++(define_insn_and_rewrite "*cond__4" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") ++ (unspec:SVE_FULL_F ++ [(match_operand: 1 "register_operand" "Upl, Upl") ++ (unspec:SVE_FULL_F ++ [(match_operand 5) ++ (match_operand:SI 6 "aarch64_sve_gp_strictness") ++ (match_operand:SVE_FULL_F 2 "register_operand" "w, w") ++ (match_operand:SVE_FULL_F 3 "register_operand" "w, w") ++ (match_operand:SVE_FULL_F 4 "register_operand" "0, w")] ++ SVE_COND_FP_TERNARY) ++ (match_dup 4)] ++ UNSPEC_SEL))] ++ "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])" + "@ +- cmp\t%0., %1/z, %2., #%3 +- cmp\t%0., %1/z, %2., %3." ++ \t%0., %1/m, %2., %3. ++ movprfx\t%0, %4\;\t%0., %1/m, %2., %3." ++ "&& !rtx_equal_p (operands[1], operands[5])" ++ { ++ operands[5] = copy_rtx (operands[1]); ++ } ++ [(set_attr "movprfx" "*,yes")] + ) + +-;; Predicated integer comparisons, formed by combining a PTRUE-predicated +-;; comparison with an AND. Split the instruction into its preferred form +-;; (below) at the earliest opportunity, in order to get rid of the +-;; redundant operand 1. +-(define_insn_and_split "*pred_cmp_combine" +- [(set (match_operand: 0 "register_operand" "=Upa, Upa") +- (and: +- (unspec: +- [(match_operand: 1) +- (SVE_INT_CMP: +- (match_operand:SVE_I 2 "register_operand" "w, w") +- (match_operand:SVE_I 3 "aarch64_sve_cmp__operand" ", w"))] +- UNSPEC_MERGE_PTRUE) +- (match_operand: 4 "register_operand" "Upl, Upl"))) +- (clobber (reg:CC CC_REGNUM))] +- "TARGET_SVE" +- "#" ++;; Predicated floating-point ternary operations, merging with an ++;; independent value. ++(define_insn_and_rewrite "*cond__any" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w") ++ (unspec:SVE_FULL_F ++ [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") ++ (unspec:SVE_FULL_F ++ [(match_operand 6) ++ (match_operand:SI 7 "aarch64_sve_gp_strictness") ++ (match_operand:SVE_FULL_F 2 "register_operand" "w, w, 0, w, w, w") ++ (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, 0, w, w") ++ (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w, w, w")] ++ SVE_COND_FP_TERNARY) ++ (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")] ++ UNSPEC_SEL))] ++ "TARGET_SVE ++ && !rtx_equal_p (operands[2], operands[5]) ++ && !rtx_equal_p (operands[3], operands[5]) ++ && !rtx_equal_p (operands[4], operands[5]) ++ && aarch64_sve_pred_dominates_p (&operands[6], operands[1])" ++ "@ ++ movprfx\t%0., %1/z, %4.\;\t%0., %1/m, %2., %3. ++ movprfx\t%0., %1/z, %0.\;\t%0., %1/m, %2., %3. ++ movprfx\t%0., %1/z, %0.\;\t%0., %1/m, %3., %4. ++ movprfx\t%0., %1/z, %0.\;\t%0., %1/m, %2., %4. ++ movprfx\t%0., %1/m, %4.\;\t%0., %1/m, %2., %3. ++ #" + "&& 1" +- [(parallel +- [(set (match_dup 0) +- (and: +- (SVE_INT_CMP: +- (match_dup 2) +- (match_dup 3)) +- (match_dup 4))) +- (clobber (reg:CC CC_REGNUM))])] ++ { ++ if (reload_completed ++ && register_operand (operands[5], mode) ++ && !rtx_equal_p (operands[0], operands[5])) ++ { ++ emit_insn (gen_vcond_mask_ (operands[0], operands[4], ++ operands[5], operands[1])); ++ operands[5] = operands[4] = operands[0]; ++ } ++ else if (!rtx_equal_p (operands[1], operands[6])) ++ operands[6] = copy_rtx (operands[1]); ++ else ++ FAIL; ++ } ++ [(set_attr "movprfx" "yes")] + ) + +-;; Predicated integer comparisons. +-(define_insn "*pred_cmp" +- [(set (match_operand: 0 "register_operand" "=Upa, Upa") +- (and: +- (SVE_INT_CMP: +- (match_operand:SVE_I 2 "register_operand" "w, w") +- (match_operand:SVE_I 3 "aarch64_sve_cmp__operand" ", w")) +- (match_operand: 1 "register_operand" "Upl, Upl"))) +- (clobber (reg:CC CC_REGNUM))] ++;; Unpredicated FMLA and FMLS by selected lanes. It doesn't seem worth using ++;; (fma ...) since target-independent code won't understand the indexing. ++(define_insn "@aarch64__lane_" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") ++ (unspec:SVE_FULL_F ++ [(match_operand:SVE_FULL_F 1 "register_operand" "w, w") ++ (unspec:SVE_FULL_F ++ [(match_operand:SVE_FULL_F 2 "register_operand" ", ") ++ (match_operand:SI 3 "const_int_operand")] ++ UNSPEC_SVE_LANE_SELECT) ++ (match_operand:SVE_FULL_F 4 "register_operand" "0, w")] ++ SVE_FP_TERNARY_LANE))] + "TARGET_SVE" + "@ +- cmp\t%0., %1/z, %2., #%3 +- cmp\t%0., %1/z, %2., %3." ++ \t%0., %1., %2.[%3] ++ movprfx\t%0, %4\;\t%0., %1., %2.[%3]" ++ [(set_attr "movprfx" "*,yes")] + ) + +-;; Floating-point comparisons predicated with a PTRUE. +-(define_insn "*fcm" +- [(set (match_operand: 0 "register_operand" "=Upa, Upa") +- (unspec: ++;; ------------------------------------------------------------------------- ++;; ---- [FP] Complex multiply-add ++;; ------------------------------------------------------------------------- ++;; Includes merging patterns for: ++;; - FCMLA ++;; ------------------------------------------------------------------------- ++ ++;; Predicated FCMLA. ++(define_insn "@aarch64_pred_" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") ++ (unspec:SVE_FULL_F + [(match_operand: 1 "register_operand" "Upl, Upl") +- (SVE_FP_CMP: +- (match_operand:SVE_F 2 "register_operand" "w, w") +- (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))] +- UNSPEC_MERGE_PTRUE))] ++ (match_operand:SI 5 "aarch64_sve_gp_strictness") ++ (match_operand:SVE_FULL_F 2 "register_operand" "w, w") ++ (match_operand:SVE_FULL_F 3 "register_operand" "w, w") ++ (match_operand:SVE_FULL_F 4 "register_operand" "0, w")] ++ SVE_COND_FCMLA))] + "TARGET_SVE" + "@ +- fcm\t%0., %1/z, %2., #0.0 +- fcm\t%0., %1/z, %2., %3." ++ fcmla\t%0., %1/m, %2., %3., # ++ movprfx\t%0, %4\;fcmla\t%0., %1/m, %2., %3., #" ++ [(set_attr "movprfx" "*,yes")] + ) + +-(define_insn "*fcmuo" +- [(set (match_operand: 0 "register_operand" "=Upa") +- (unspec: +- [(match_operand: 1 "register_operand" "Upl") +- (unordered: +- (match_operand:SVE_F 2 "register_operand" "w") +- (match_operand:SVE_F 3 "register_operand" "w"))] +- UNSPEC_MERGE_PTRUE))] ++;; Predicated FCMLA with merging. ++(define_expand "@cond_" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand") ++ (unspec:SVE_FULL_F ++ [(match_operand: 1 "register_operand") ++ (unspec:SVE_FULL_F ++ [(match_dup 1) ++ (const_int SVE_STRICT_GP) ++ (match_operand:SVE_FULL_F 2 "register_operand") ++ (match_operand:SVE_FULL_F 3 "register_operand") ++ (match_operand:SVE_FULL_F 4 "register_operand")] ++ SVE_COND_FCMLA) ++ (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")] ++ UNSPEC_SEL))] + "TARGET_SVE" +- "fcmuo\t%0., %1/z, %2., %3." + ) + +-;; Floating-point comparisons predicated on a PTRUE, with the results ANDed +-;; with another predicate P. This does not have the same trapping behavior +-;; as predicating the comparison itself on P, but it's a legitimate fold, +-;; since we can drop any potentially-trapping operations whose results +-;; are not needed. +-;; +-;; Split the instruction into its preferred form (below) at the earliest +-;; opportunity, in order to get rid of the redundant operand 1. +-(define_insn_and_split "*fcm_and_combine" +- [(set (match_operand: 0 "register_operand" "=Upa, Upa") +- (and: +- (unspec: +- [(match_operand: 1) +- (SVE_FP_CMP +- (match_operand:SVE_F 2 "register_operand" "w, w") +- (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))] +- UNSPEC_MERGE_PTRUE) +- (match_operand: 4 "register_operand" "Upl, Upl")))] +- "TARGET_SVE" +- "#" +- "&& 1" +- [(set (match_dup 0) +- (and: +- (SVE_FP_CMP: +- (match_dup 2) +- (match_dup 3)) +- (match_dup 4)))] ++;; Predicated FCMLA, merging with the third input. ++(define_insn_and_rewrite "*cond__4" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") ++ (unspec:SVE_FULL_F ++ [(match_operand: 1 "register_operand" "Upl, Upl") ++ (unspec:SVE_FULL_F ++ [(match_operand 5) ++ (match_operand:SI 6 "aarch64_sve_gp_strictness") ++ (match_operand:SVE_FULL_F 2 "register_operand" "w, w") ++ (match_operand:SVE_FULL_F 3 "register_operand" "w, w") ++ (match_operand:SVE_FULL_F 4 "register_operand" "0, w")] ++ SVE_COND_FCMLA) ++ (match_dup 4)] ++ UNSPEC_SEL))] ++ "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])" ++ "@ ++ fcmla\t%0., %1/m, %2., %3., # ++ movprfx\t%0, %4\;fcmla\t%0., %1/m, %2., %3., #" ++ "&& !rtx_equal_p (operands[1], operands[5])" ++ { ++ operands[5] = copy_rtx (operands[1]); ++ } ++ [(set_attr "movprfx" "*,yes")] + ) + +-(define_insn_and_split "*fcmuo_and_combine" +- [(set (match_operand: 0 "register_operand" "=Upa") +- (and: +- (unspec: +- [(match_operand: 1) +- (unordered +- (match_operand:SVE_F 2 "register_operand" "w") +- (match_operand:SVE_F 3 "register_operand" "w"))] +- UNSPEC_MERGE_PTRUE) +- (match_operand: 4 "register_operand" "Upl")))] +- "TARGET_SVE" +- "#" ++;; Predicated FCMLA, merging with an independent value. ++(define_insn_and_rewrite "*cond__any" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w") ++ (unspec:SVE_FULL_F ++ [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl") ++ (unspec:SVE_FULL_F ++ [(match_operand 6) ++ (match_operand:SI 7 "aarch64_sve_gp_strictness") ++ (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w") ++ (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w") ++ (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w")] ++ SVE_COND_FCMLA) ++ (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")] ++ UNSPEC_SEL))] ++ "TARGET_SVE ++ && !rtx_equal_p (operands[4], operands[5]) ++ && aarch64_sve_pred_dominates_p (&operands[6], operands[1])" ++ "@ ++ movprfx\t%0., %1/z, %4.\;fcmla\t%0., %1/m, %2., %3., # ++ movprfx\t%0., %1/z, %0.\;fcmla\t%0., %1/m, %2., %3., # ++ movprfx\t%0., %1/m, %4.\;fcmla\t%0., %1/m, %2., %3., # ++ #" + "&& 1" +- [(set (match_dup 0) +- (and: +- (unordered: +- (match_dup 2) +- (match_dup 3)) +- (match_dup 4)))] ++ { ++ if (reload_completed ++ && register_operand (operands[5], mode) ++ && !rtx_equal_p (operands[0], operands[5])) ++ { ++ emit_insn (gen_vcond_mask_ (operands[0], operands[4], ++ operands[5], operands[1])); ++ operands[5] = operands[4] = operands[0]; ++ } ++ else if (!rtx_equal_p (operands[1], operands[6])) ++ operands[6] = copy_rtx (operands[1]); ++ else ++ FAIL; ++ } ++ [(set_attr "movprfx" "yes")] + ) + +-;; Unpredicated floating-point comparisons, with the results ANDed +-;; with another predicate. This is a valid fold for the same reasons +-;; as above. +-(define_insn "*fcm_and" +- [(set (match_operand: 0 "register_operand" "=Upa, Upa") +- (and: +- (SVE_FP_CMP: +- (match_operand:SVE_F 2 "register_operand" "w, w") +- (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")) +- (match_operand: 1 "register_operand" "Upl, Upl")))] ++;; Unpredicated FCMLA with indexing. ++(define_insn "@aarch64__lane_" ++ [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w, ?&w") ++ (unspec:SVE_FULL_HSF ++ [(match_operand:SVE_FULL_HSF 1 "register_operand" "w, w") ++ (unspec:SVE_FULL_HSF ++ [(match_operand:SVE_FULL_HSF 2 "register_operand" ", ") ++ (match_operand:SI 3 "const_int_operand")] ++ UNSPEC_SVE_LANE_SELECT) ++ (match_operand:SVE_FULL_HSF 4 "register_operand" "0, w")] ++ FCMLA))] + "TARGET_SVE" + "@ +- fcm\t%0., %1/z, %2., #0.0 +- fcm\t%0., %1/z, %2., %3." ++ fcmla\t%0., %1., %2.[%3], # ++ movprfx\t%0, %4\;fcmla\t%0., %1., %2.[%3], #" ++ [(set_attr "movprfx" "*,yes")] + ) + +-(define_insn "*fcmuo_and" +- [(set (match_operand: 0 "register_operand" "=Upa") +- (and: +- (unordered: +- (match_operand:SVE_F 2 "register_operand" "w") +- (match_operand:SVE_F 3 "register_operand" "w")) +- (match_operand: 1 "register_operand" "Upl")))] ++;; ------------------------------------------------------------------------- ++;; ---- [FP] Trigonometric multiply-add ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - FTMAD ++;; ------------------------------------------------------------------------- ++ ++(define_insn "@aarch64_sve_tmad" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") ++ (unspec:SVE_FULL_F ++ [(match_operand:SVE_FULL_F 1 "register_operand" "0, w") ++ (match_operand:SVE_FULL_F 2 "register_operand" "w, w") ++ (match_operand:DI 3 "const_int_operand")] ++ UNSPEC_FTMAD))] + "TARGET_SVE" +- "fcmuo\t%0., %1/z, %2., %3." ++ "@ ++ ftmad\t%0., %0., %2., #%3 ++ movprfx\t%0, %1\;ftmad\t%0., %0., %2., #%3" ++ [(set_attr "movprfx" "*,yes")] + ) + +-;; Predicated floating-point comparisons. We don't need a version +-;; of this for unordered comparisons. +-(define_insn "*pred_fcm" +- [(set (match_operand: 0 "register_operand" "=Upa, Upa") +- (unspec: +- [(match_operand: 1 "register_operand" "Upl, Upl") +- (match_operand:SVE_F 2 "register_operand" "w, w") +- (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")] +- SVE_COND_FP_CMP))] ++;; ------------------------------------------------------------------------- ++;; ---- [FP] Bfloat16 long ternary arithmetic (SF,BF,BF) ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - BFDOT (BF16) ++;; - BFMLALB (BF16) ++;; - BFMLALT (BF16) ++;; - BFMMLA (BF16) ++;; ------------------------------------------------------------------------- ++ ++(define_insn "@aarch64_sve_vnx4sf" ++ [(set (match_operand:VNx4SF 0 "register_operand" "=w, ?&w") ++ (unspec:VNx4SF ++ [(match_operand:VNx4SF 1 "register_operand" "0, w") ++ (match_operand:VNx8BF 2 "register_operand" "w, w") ++ (match_operand:VNx8BF 3 "register_operand" "w, w")] ++ SVE_BFLOAT_TERNARY_LONG))] ++ "TARGET_SVE_BF16" ++ "@ ++ \t%0.s, %2.h, %3.h ++ movprfx\t%0, %1\;\t%0.s, %2.h, %3.h" ++ [(set_attr "movprfx" "*,yes")] ++) ++ ++;; The immediate range is enforced before generating the instruction. ++(define_insn "@aarch64_sve__lanevnx4sf" ++ [(set (match_operand:VNx4SF 0 "register_operand" "=w, ?&w") ++ (unspec:VNx4SF ++ [(match_operand:VNx4SF 1 "register_operand" "0, w") ++ (match_operand:VNx8BF 2 "register_operand" "w, w") ++ (match_operand:VNx8BF 3 "register_operand" "y, y") ++ (match_operand:SI 4 "const_int_operand")] ++ SVE_BFLOAT_TERNARY_LONG_LANE))] ++ "TARGET_SVE_BF16" ++ "@ ++ \t%0.s, %2.h, %3.h[%4] ++ movprfx\t%0, %1\;\t%0.s, %2.h, %3.h[%4]" ++ [(set_attr "movprfx" "*,yes")] ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- [FP] Matrix multiply-accumulate ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - FMMLA (F32MM,F64MM) ++;; ------------------------------------------------------------------------- ++ ++;; The mode iterator enforces the target requirements. ++(define_insn "@aarch64_sve_" ++ [(set (match_operand:SVE_MATMULF 0 "register_operand" "=w, ?&w") ++ (unspec:SVE_MATMULF ++ [(match_operand:SVE_MATMULF 2 "register_operand" "w, w") ++ (match_operand:SVE_MATMULF 3 "register_operand" "w, w") ++ (match_operand:SVE_MATMULF 1 "register_operand" "0, w")] ++ FMMLA))] + "TARGET_SVE" + "@ +- fcm\t%0., %1/z, %2., #0.0 +- fcm\t%0., %1/z, %2., %3." ++ \\t%0., %2., %3. ++ movprfx\t%0, %1\;\\t%0., %2., %3." ++ [(set_attr "movprfx" "*,yes")] + ) + ++;; ========================================================================= ++;; == Comparisons and selects ++;; ========================================================================= ++ ++;; ------------------------------------------------------------------------- ++;; ---- [INT,FP] Select based on predicates ++;; ------------------------------------------------------------------------- ++;; Includes merging patterns for: ++;; - FMOV ++;; - MOV ++;; - SEL ++;; ------------------------------------------------------------------------- ++ + ;; vcond_mask operand order: true, false, mask + ;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR) + ;; SEL operand order: mask, true, false +-(define_insn "vcond_mask_" +- [(set (match_operand:SVE_ALL 0 "register_operand" "=w") +- (unspec:SVE_ALL +- [(match_operand: 3 "register_operand" "Upa") +- (match_operand:SVE_ALL 1 "register_operand" "w") +- (match_operand:SVE_ALL 2 "register_operand" "w")] ++(define_expand "@vcond_mask_" ++ [(set (match_operand:SVE_FULL 0 "register_operand") ++ (unspec:SVE_FULL ++ [(match_operand: 3 "register_operand") ++ (match_operand:SVE_FULL 1 "aarch64_sve_reg_or_dup_imm") ++ (match_operand:SVE_FULL 2 "aarch64_simd_reg_or_zero")] + UNSPEC_SEL))] + "TARGET_SVE" +- "sel\t%0., %3, %1., %2." ++ { ++ if (register_operand (operands[1], mode)) ++ operands[2] = force_reg (mode, operands[2]); ++ } + ) + +-;; Selects between a duplicated immediate and zero. +-(define_insn "aarch64_sve_dup_const" +- [(set (match_operand:SVE_I 0 "register_operand" "=w") +- (unspec:SVE_I +- [(match_operand: 1 "register_operand" "Upl") +- (match_operand:SVE_I 2 "aarch64_sve_dup_immediate") +- (match_operand:SVE_I 3 "aarch64_simd_imm_zero")] ++;; Selects between: ++;; - two registers ++;; - a duplicated immediate and a register ++;; - a duplicated immediate and zero ++(define_insn "*vcond_mask_" ++ [(set (match_operand:SVE_FULL 0 "register_operand" "=w, w, w, w, ?w, ?&w, ?&w") ++ (unspec:SVE_FULL ++ [(match_operand: 3 "register_operand" "Upa, Upa, Upa, Upa, Upl, Upl, Upl") ++ (match_operand:SVE_FULL 1 "aarch64_sve_reg_or_dup_imm" "w, vss, vss, Ufc, Ufc, vss, Ufc") ++ (match_operand:SVE_FULL 2 "aarch64_simd_reg_or_zero" "w, 0, Dz, 0, Dz, w, w")] ++ UNSPEC_SEL))] ++ "TARGET_SVE ++ && (!register_operand (operands[1], mode) ++ || register_operand (operands[2], mode))" ++ "@ ++ sel\t%0., %3, %1., %2. ++ mov\t%0., %3/m, #%I1 ++ mov\t%0., %3/z, #%I1 ++ fmov\t%0., %3/m, #%1 ++ movprfx\t%0., %3/z, %0.\;fmov\t%0., %3/m, #%1 ++ movprfx\t%0, %2\;mov\t%0., %3/m, #%I1 ++ movprfx\t%0, %2\;fmov\t%0., %3/m, #%1" ++ [(set_attr "movprfx" "*,*,*,*,yes,yes,yes")] ++) ++ ++;; Optimize selects between a duplicated scalar variable and another vector, ++;; the latter of which can be a zero constant or a variable. Treat duplicates ++;; of GPRs as being more expensive than duplicates of FPRs, since they ++;; involve a cross-file move. ++(define_insn "@aarch64_sel_dup" ++ [(set (match_operand:SVE_FULL 0 "register_operand" "=?w, w, ??w, ?&w, ??&w, ?&w") ++ (unspec:SVE_FULL ++ [(match_operand: 3 "register_operand" "Upa, Upa, Upl, Upl, Upl, Upl") ++ (vec_duplicate:SVE_FULL ++ (match_operand: 1 "register_operand" "r, w, r, w, r, w")) ++ (match_operand:SVE_FULL 2 "aarch64_simd_reg_or_zero" "0, 0, Dz, Dz, w, w")] + UNSPEC_SEL))] + "TARGET_SVE" +- "mov\t%0., %1/z, #%2" ++ "@ ++ mov\t%0., %3/m, %1 ++ mov\t%0., %3/m, %1 ++ movprfx\t%0., %3/z, %0.\;mov\t%0., %3/m, %1 ++ movprfx\t%0., %3/z, %0.\;mov\t%0., %3/m, %1 ++ movprfx\t%0, %2\;mov\t%0., %3/m, %1 ++ movprfx\t%0, %2\;mov\t%0., %3/m, %1" ++ [(set_attr "movprfx" "*,*,yes,yes,yes,yes")] + ) + ++;; ------------------------------------------------------------------------- ++;; ---- [INT,FP] Compare and select ++;; ------------------------------------------------------------------------- ++;; The patterns in this section are synthetic. ++;; ------------------------------------------------------------------------- ++ + ;; Integer (signed) vcond. Don't enforce an immediate range here, since it + ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead. + (define_expand "vcond" +- [(set (match_operand:SVE_ALL 0 "register_operand") +- (if_then_else:SVE_ALL ++ [(set (match_operand:SVE_FULL 0 "register_operand") ++ (if_then_else:SVE_FULL + (match_operator 3 "comparison_operator" + [(match_operand: 4 "register_operand") + (match_operand: 5 "nonmemory_operand")]) +- (match_operand:SVE_ALL 1 "register_operand") +- (match_operand:SVE_ALL 2 "register_operand")))] ++ (match_operand:SVE_FULL 1 "nonmemory_operand") ++ (match_operand:SVE_FULL 2 "nonmemory_operand")))] + "TARGET_SVE" + { + aarch64_expand_sve_vcond (mode, mode, operands); +@@ -1647,13 +6555,13 @@ + ;; Integer vcondu. Don't enforce an immediate range here, since it + ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead. + (define_expand "vcondu" +- [(set (match_operand:SVE_ALL 0 "register_operand") +- (if_then_else:SVE_ALL ++ [(set (match_operand:SVE_FULL 0 "register_operand") ++ (if_then_else:SVE_FULL + (match_operator 3 "comparison_operator" + [(match_operand: 4 "register_operand") + (match_operand: 5 "nonmemory_operand")]) +- (match_operand:SVE_ALL 1 "register_operand") +- (match_operand:SVE_ALL 2 "register_operand")))] ++ (match_operand:SVE_FULL 1 "nonmemory_operand") ++ (match_operand:SVE_FULL 2 "nonmemory_operand")))] + "TARGET_SVE" + { + aarch64_expand_sve_vcond (mode, mode, operands); +@@ -1661,17 +6569,16 @@ + } + ) + +-;; Floating-point vcond. All comparisons except FCMUO allow a zero +-;; operand; aarch64_expand_sve_vcond handles the case of an FCMUO +-;; with zero. ++;; Floating-point vcond. All comparisons except FCMUO allow a zero operand; ++;; aarch64_expand_sve_vcond handles the case of an FCMUO with zero. + (define_expand "vcond" +- [(set (match_operand:SVE_SD 0 "register_operand") +- (if_then_else:SVE_SD ++ [(set (match_operand:SVE_FULL_HSD 0 "register_operand") ++ (if_then_else:SVE_FULL_HSD + (match_operator 3 "comparison_operator" + [(match_operand: 4 "register_operand") + (match_operand: 5 "aarch64_simd_reg_or_zero")]) +- (match_operand:SVE_SD 1 "register_operand") +- (match_operand:SVE_SD 2 "register_operand")))] ++ (match_operand:SVE_FULL_HSD 1 "nonmemory_operand") ++ (match_operand:SVE_FULL_HSD 2 "nonmemory_operand")))] + "TARGET_SVE" + { + aarch64_expand_sve_vcond (mode, mode, operands); +@@ -1679,6 +6586,22 @@ + } + ) + ++;; ------------------------------------------------------------------------- ++;; ---- [INT] Comparisons ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - CMPEQ ++;; - CMPGE ++;; - CMPGT ++;; - CMPHI ++;; - CMPHS ++;; - CMPLE ++;; - CMPLO ++;; - CMPLS ++;; - CMPLT ++;; - CMPNE ++;; ------------------------------------------------------------------------- ++ + ;; Signed integer comparisons. Don't enforce an immediate range here, since + ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int + ;; instead. +@@ -1686,9 +6609,9 @@ + [(parallel + [(set (match_operand: 0 "register_operand") + (match_operator: 1 "comparison_operator" +- [(match_operand:SVE_I 2 "register_operand") +- (match_operand:SVE_I 3 "nonmemory_operand")])) +- (clobber (reg:CC CC_REGNUM))])] ++ [(match_operand:SVE_FULL_I 2 "register_operand") ++ (match_operand:SVE_FULL_I 3 "nonmemory_operand")])) ++ (clobber (reg:CC_NZC CC_REGNUM))])] + "TARGET_SVE" + { + aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]), +@@ -1704,9 +6627,9 @@ + [(parallel + [(set (match_operand: 0 "register_operand") + (match_operator: 1 "comparison_operator" +- [(match_operand:SVE_I 2 "register_operand") +- (match_operand:SVE_I 3 "nonmemory_operand")])) +- (clobber (reg:CC CC_REGNUM))])] ++ [(match_operand:SVE_FULL_I 2 "register_operand") ++ (match_operand:SVE_FULL_I 3 "nonmemory_operand")])) ++ (clobber (reg:CC_NZC CC_REGNUM))])] + "TARGET_SVE" + { + aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]), +@@ -1715,14 +6638,285 @@ + } + ) + ++;; Predicated integer comparisons. ++(define_insn "@aarch64_pred_cmp" ++ [(set (match_operand: 0 "register_operand" "=Upa, Upa") ++ (unspec: ++ [(match_operand: 1 "register_operand" "Upl, Upl") ++ (match_operand:SI 2 "aarch64_sve_ptrue_flag") ++ (SVE_INT_CMP: ++ (match_operand:SVE_FULL_I 3 "register_operand" "w, w") ++ (match_operand:SVE_FULL_I 4 "aarch64_sve_cmp__operand" ", w"))] ++ UNSPEC_PRED_Z)) ++ (clobber (reg:CC_NZC CC_REGNUM))] ++ "TARGET_SVE" ++ "@ ++ cmp\t%0., %1/z, %3., #%4 ++ cmp\t%0., %1/z, %3., %4." ++) ++ ++;; Predicated integer comparisons in which both the flag and predicate ++;; results are interesting. ++(define_insn_and_rewrite "*cmp_cc" ++ [(set (reg:CC_NZC CC_REGNUM) ++ (unspec:CC_NZC ++ [(match_operand:VNx16BI 1 "register_operand" "Upl, Upl") ++ (match_operand 4) ++ (match_operand:SI 5 "aarch64_sve_ptrue_flag") ++ (unspec: ++ [(match_operand 6) ++ (match_operand:SI 7 "aarch64_sve_ptrue_flag") ++ (SVE_INT_CMP: ++ (match_operand:SVE_FULL_I 2 "register_operand" "w, w") ++ (match_operand:SVE_FULL_I 3 "aarch64_sve_cmp__operand" ", w"))] ++ UNSPEC_PRED_Z)] ++ UNSPEC_PTEST)) ++ (set (match_operand: 0 "register_operand" "=Upa, Upa") ++ (unspec: ++ [(match_dup 6) ++ (match_dup 7) ++ (SVE_INT_CMP: ++ (match_dup 2) ++ (match_dup 3))] ++ UNSPEC_PRED_Z))] ++ "TARGET_SVE ++ && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])" ++ "@ ++ cmp\t%0., %1/z, %2., #%3 ++ cmp\t%0., %1/z, %2., %3." ++ "&& !rtx_equal_p (operands[4], operands[6])" ++ { ++ operands[6] = copy_rtx (operands[4]); ++ operands[7] = operands[5]; ++ } ++) ++ ++;; Predicated integer comparisons in which only the flags result is ++;; interesting. ++(define_insn_and_rewrite "*cmp_ptest" ++ [(set (reg:CC_NZC CC_REGNUM) ++ (unspec:CC_NZC ++ [(match_operand:VNx16BI 1 "register_operand" "Upl, Upl") ++ (match_operand 4) ++ (match_operand:SI 5 "aarch64_sve_ptrue_flag") ++ (unspec: ++ [(match_operand 6) ++ (match_operand:SI 7 "aarch64_sve_ptrue_flag") ++ (SVE_INT_CMP: ++ (match_operand:SVE_FULL_I 2 "register_operand" "w, w") ++ (match_operand:SVE_FULL_I 3 "aarch64_sve_cmp__operand" ", w"))] ++ UNSPEC_PRED_Z)] ++ UNSPEC_PTEST)) ++ (clobber (match_scratch: 0 "=Upa, Upa"))] ++ "TARGET_SVE ++ && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])" ++ "@ ++ cmp\t%0., %1/z, %2., #%3 ++ cmp\t%0., %1/z, %2., %3." ++ "&& !rtx_equal_p (operands[4], operands[6])" ++ { ++ operands[6] = copy_rtx (operands[4]); ++ operands[7] = operands[5]; ++ } ++) ++ ++;; Predicated integer comparisons, formed by combining a PTRUE-predicated ++;; comparison with an AND. Split the instruction into its preferred form ++;; at the earliest opportunity, in order to get rid of the redundant ++;; operand 4. ++(define_insn_and_split "*cmp_and" ++ [(set (match_operand: 0 "register_operand" "=Upa, Upa") ++ (and: ++ (unspec: ++ [(match_operand 4) ++ (const_int SVE_KNOWN_PTRUE) ++ (SVE_INT_CMP: ++ (match_operand:SVE_FULL_I 2 "register_operand" "w, w") ++ (match_operand:SVE_FULL_I 3 "aarch64_sve_cmp__operand" ", w"))] ++ UNSPEC_PRED_Z) ++ (match_operand: 1 "register_operand" "Upl, Upl"))) ++ (clobber (reg:CC_NZC CC_REGNUM))] ++ "TARGET_SVE" ++ "#" ++ "&& 1" ++ [(parallel ++ [(set (match_dup 0) ++ (unspec: ++ [(match_dup 1) ++ (const_int SVE_MAYBE_NOT_PTRUE) ++ (SVE_INT_CMP: ++ (match_dup 2) ++ (match_dup 3))] ++ UNSPEC_PRED_Z)) ++ (clobber (reg:CC_NZC CC_REGNUM))])] ++) ++ ++;; Predicated integer wide comparisons. ++(define_insn "@aarch64_pred_cmp_wide" ++ [(set (match_operand: 0 "register_operand" "=Upa") ++ (unspec: ++ [(match_operand:VNx16BI 1 "register_operand" "Upl") ++ (match_operand:SI 2 "aarch64_sve_ptrue_flag") ++ (unspec: ++ [(match_operand:SVE_FULL_BHSI 3 "register_operand" "w") ++ (match_operand:VNx2DI 4 "register_operand" "w")] ++ SVE_COND_INT_CMP_WIDE)] ++ UNSPEC_PRED_Z)) ++ (clobber (reg:CC_NZC CC_REGNUM))] ++ "TARGET_SVE" ++ "cmp\t%0., %1/z, %3., %4.d" ++) ++ ++;; Predicated integer wide comparisons in which both the flag and ++;; predicate results are interesting. ++(define_insn "*aarch64_pred_cmp_wide_cc" ++ [(set (reg:CC_NZC CC_REGNUM) ++ (unspec:CC_NZC ++ [(match_operand:VNx16BI 1 "register_operand" "Upl") ++ (match_operand 4) ++ (match_operand:SI 5 "aarch64_sve_ptrue_flag") ++ (unspec: ++ [(match_operand:VNx16BI 6 "register_operand" "Upl") ++ (match_operand:SI 7 "aarch64_sve_ptrue_flag") ++ (unspec: ++ [(match_operand:SVE_FULL_BHSI 2 "register_operand" "w") ++ (match_operand:VNx2DI 3 "register_operand" "w")] ++ SVE_COND_INT_CMP_WIDE)] ++ UNSPEC_PRED_Z)] ++ UNSPEC_PTEST)) ++ (set (match_operand: 0 "register_operand" "=Upa") ++ (unspec: ++ [(match_dup 6) ++ (match_dup 7) ++ (unspec: ++ [(match_dup 2) ++ (match_dup 3)] ++ SVE_COND_INT_CMP_WIDE)] ++ UNSPEC_PRED_Z))] ++ "TARGET_SVE ++ && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])" ++ "cmp\t%0., %1/z, %2., %3.d" ++) ++ ++;; Predicated integer wide comparisons in which only the flags result ++;; is interesting. ++(define_insn "*aarch64_pred_cmp_wide_ptest" ++ [(set (reg:CC_NZC CC_REGNUM) ++ (unspec:CC_NZC ++ [(match_operand:VNx16BI 1 "register_operand" "Upl") ++ (match_operand 4) ++ (match_operand:SI 5 "aarch64_sve_ptrue_flag") ++ (unspec: ++ [(match_operand:VNx16BI 6 "register_operand" "Upl") ++ (match_operand:SI 7 "aarch64_sve_ptrue_flag") ++ (unspec: ++ [(match_operand:SVE_FULL_BHSI 2 "register_operand" "w") ++ (match_operand:VNx2DI 3 "register_operand" "w")] ++ SVE_COND_INT_CMP_WIDE)] ++ UNSPEC_PRED_Z)] ++ UNSPEC_PTEST)) ++ (clobber (match_scratch: 0 "=Upa"))] ++ "TARGET_SVE ++ && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])" ++ "cmp\t%0., %1/z, %2., %3.d" ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- [INT] While tests ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - WHILELE ++;; - WHILELO ++;; - WHILELS ++;; - WHILELT ++;; ------------------------------------------------------------------------- ++ ++;; Set element I of the result if (cmp (plus operand1 J) operand2) is ++;; true for all J in [0, I]. ++(define_insn "@while_" ++ [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") ++ (unspec:PRED_ALL [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ") ++ (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")] ++ SVE_WHILE)) ++ (clobber (reg:CC_NZC CC_REGNUM))] ++ "TARGET_SVE" ++ "while\t%0., %1, %2" ++) ++ ++;; The WHILE instructions set the flags in the same way as a PTEST with ++;; a PTRUE GP. Handle the case in which both results are useful. The GP ++;; operands to the PTEST aren't needed, so we allow them to be anything. ++(define_insn_and_rewrite "*while__cc" ++ [(set (reg:CC_NZC CC_REGNUM) ++ (unspec:CC_NZC ++ [(match_operand 3) ++ (match_operand 4) ++ (const_int SVE_KNOWN_PTRUE) ++ (unspec:PRED_ALL ++ [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ") ++ (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")] ++ SVE_WHILE)] ++ UNSPEC_PTEST)) ++ (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") ++ (unspec:PRED_ALL [(match_dup 1) ++ (match_dup 2)] ++ SVE_WHILE))] ++ "TARGET_SVE" ++ "while\t%0., %1, %2" ++ ;; Force the compiler to drop the unused predicate operand, so that we ++ ;; don't have an unnecessary PTRUE. ++ "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))" ++ { ++ operands[3] = CONSTM1_RTX (VNx16BImode); ++ operands[4] = CONSTM1_RTX (mode); ++ } ++) ++ ++;; Same, but handle the case in which only the flags result is useful. ++(define_insn_and_rewrite "*while__ptest" ++ [(set (reg:CC_NZC CC_REGNUM) ++ (unspec:CC_NZC ++ [(match_operand 3) ++ (match_operand 4) ++ (const_int SVE_KNOWN_PTRUE) ++ (unspec:PRED_ALL ++ [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ") ++ (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")] ++ SVE_WHILE)] ++ UNSPEC_PTEST)) ++ (clobber (match_scratch:PRED_ALL 0 "=Upa"))] ++ "TARGET_SVE" ++ "while\t%0., %1, %2" ++ ;; Force the compiler to drop the unused predicate operand, so that we ++ ;; don't have an unnecessary PTRUE. ++ "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))" ++ { ++ operands[3] = CONSTM1_RTX (VNx16BImode); ++ operands[4] = CONSTM1_RTX (mode); ++ } ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- [FP] Direct comparisons ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - FCMEQ ++;; - FCMGE ++;; - FCMGT ++;; - FCMLE ++;; - FCMLT ++;; - FCMNE ++;; - FCMUO ++;; ------------------------------------------------------------------------- ++ + ;; Floating-point comparisons. All comparisons except FCMUO allow a zero + ;; operand; aarch64_expand_sve_vec_cmp_float handles the case of an FCMUO + ;; with zero. + (define_expand "vec_cmp" + [(set (match_operand: 0 "register_operand") + (match_operator: 1 "comparison_operator" +- [(match_operand:SVE_F 2 "register_operand") +- (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero")]))] ++ [(match_operand:SVE_FULL_F 2 "register_operand") ++ (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]))] + "TARGET_SVE" + { + aarch64_expand_sve_vec_cmp_float (operands[0], GET_CODE (operands[1]), +@@ -1731,6 +6925,172 @@ + } + ) + ++;; Predicated floating-point comparisons. ++(define_insn "@aarch64_pred_fcm" ++ [(set (match_operand: 0 "register_operand" "=Upa, Upa") ++ (unspec: ++ [(match_operand: 1 "register_operand" "Upl, Upl") ++ (match_operand:SI 2 "aarch64_sve_ptrue_flag") ++ (match_operand:SVE_FULL_F 3 "register_operand" "w, w") ++ (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, w")] ++ SVE_COND_FP_CMP_I0))] ++ "TARGET_SVE" ++ "@ ++ fcm\t%0., %1/z, %3., #0.0 ++ fcm\t%0., %1/z, %3., %4." ++) ++ ++;; Same for unordered comparisons. ++(define_insn "@aarch64_pred_fcmuo" ++ [(set (match_operand: 0 "register_operand" "=Upa") ++ (unspec: ++ [(match_operand: 1 "register_operand" "Upl") ++ (match_operand:SI 2 "aarch64_sve_ptrue_flag") ++ (match_operand:SVE_FULL_F 3 "register_operand" "w") ++ (match_operand:SVE_FULL_F 4 "register_operand" "w")] ++ UNSPEC_COND_FCMUO))] ++ "TARGET_SVE" ++ "fcmuo\t%0., %1/z, %3., %4." ++) ++ ++;; Floating-point comparisons predicated on a PTRUE, with the results ANDed ++;; with another predicate P. This does not have the same trapping behavior ++;; as predicating the comparison itself on P, but it's a legitimate fold, ++;; since we can drop any potentially-trapping operations whose results ++;; are not needed. ++;; ++;; Split the instruction into its preferred form (below) at the earliest ++;; opportunity, in order to get rid of the redundant operand 1. ++(define_insn_and_split "*fcm_and_combine" ++ [(set (match_operand: 0 "register_operand" "=Upa, Upa") ++ (and: ++ (unspec: ++ [(match_operand: 1) ++ (const_int SVE_KNOWN_PTRUE) ++ (match_operand:SVE_FULL_F 2 "register_operand" "w, w") ++ (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "Dz, w")] ++ SVE_COND_FP_CMP_I0) ++ (match_operand: 4 "register_operand" "Upl, Upl")))] ++ "TARGET_SVE" ++ "#" ++ "&& 1" ++ [(set (match_dup 0) ++ (unspec: ++ [(match_dup 4) ++ (const_int SVE_MAYBE_NOT_PTRUE) ++ (match_dup 2) ++ (match_dup 3)] ++ SVE_COND_FP_CMP_I0))] ++) ++ ++;; Same for unordered comparisons. ++(define_insn_and_split "*fcmuo_and_combine" ++ [(set (match_operand: 0 "register_operand" "=Upa") ++ (and: ++ (unspec: ++ [(match_operand: 1) ++ (const_int SVE_KNOWN_PTRUE) ++ (match_operand:SVE_FULL_F 2 "register_operand" "w") ++ (match_operand:SVE_FULL_F 3 "register_operand" "w")] ++ UNSPEC_COND_FCMUO) ++ (match_operand: 4 "register_operand" "Upl")))] ++ "TARGET_SVE" ++ "#" ++ "&& 1" ++ [(set (match_dup 0) ++ (unspec: ++ [(match_dup 4) ++ (const_int SVE_MAYBE_NOT_PTRUE) ++ (match_dup 2) ++ (match_dup 3)] ++ UNSPEC_COND_FCMUO))] ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- [FP] Absolute comparisons ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - FACGE ++;; - FACGT ++;; - FACLE ++;; - FACLT ++;; ------------------------------------------------------------------------- ++ ++;; Predicated floating-point absolute comparisons. ++(define_expand "@aarch64_pred_fac" ++ [(set (match_operand: 0 "register_operand") ++ (unspec: ++ [(match_operand: 1 "register_operand") ++ (match_operand:SI 2 "aarch64_sve_ptrue_flag") ++ (unspec:SVE_FULL_F ++ [(match_dup 1) ++ (match_dup 2) ++ (match_operand:SVE_FULL_F 3 "register_operand")] ++ UNSPEC_COND_FABS) ++ (unspec:SVE_FULL_F ++ [(match_dup 1) ++ (match_dup 2) ++ (match_operand:SVE_FULL_F 4 "register_operand")] ++ UNSPEC_COND_FABS)] ++ SVE_COND_FP_ABS_CMP))] ++ "TARGET_SVE" ++) ++ ++(define_insn_and_rewrite "*aarch64_pred_fac" ++ [(set (match_operand: 0 "register_operand" "=Upa") ++ (unspec: ++ [(match_operand: 1 "register_operand" "Upl") ++ (match_operand:SI 4 "aarch64_sve_ptrue_flag") ++ (unspec:SVE_FULL_F ++ [(match_operand 5) ++ (match_operand:SI 6 "aarch64_sve_gp_strictness") ++ (match_operand:SVE_FULL_F 2 "register_operand" "w")] ++ UNSPEC_COND_FABS) ++ (unspec:SVE_FULL_F ++ [(match_operand 7) ++ (match_operand:SI 8 "aarch64_sve_gp_strictness") ++ (match_operand:SVE_FULL_F 3 "register_operand" "w")] ++ UNSPEC_COND_FABS)] ++ SVE_COND_FP_ABS_CMP))] ++ "TARGET_SVE ++ && aarch64_sve_pred_dominates_p (&operands[5], operands[1]) ++ && aarch64_sve_pred_dominates_p (&operands[7], operands[1])" ++ "fac\t%0., %1/z, %2., %3." ++ "&& (!rtx_equal_p (operands[1], operands[5]) ++ || !rtx_equal_p (operands[1], operands[7]))" ++ { ++ operands[5] = copy_rtx (operands[1]); ++ operands[7] = copy_rtx (operands[1]); ++ } ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- [PRED] Select ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - SEL ++;; ------------------------------------------------------------------------- ++ ++(define_insn "@vcond_mask_" ++ [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") ++ (ior:PRED_ALL ++ (and:PRED_ALL ++ (match_operand:PRED_ALL 3 "register_operand" "Upa") ++ (match_operand:PRED_ALL 1 "register_operand" "Upa")) ++ (and:PRED_ALL ++ (not (match_dup 3)) ++ (match_operand:PRED_ALL 2 "register_operand" "Upa"))))] ++ "TARGET_SVE" ++ "sel\t%0.b, %3, %1.b, %2.b" ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- [PRED] Test bits ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - PTEST ++;; ------------------------------------------------------------------------- ++ + ;; Branch based on predicate equality or inequality. + (define_expand "cbranch4" + [(set (pc) +@@ -1742,1409 +7102,2120 @@ + (pc)))] + "" + { +- rtx ptrue = force_reg (mode, CONSTM1_RTX (mode)); ++ rtx ptrue = force_reg (VNx16BImode, aarch64_ptrue_all ()); ++ rtx cast_ptrue = gen_lowpart (mode, ptrue); ++ rtx ptrue_flag = gen_int_mode (SVE_KNOWN_PTRUE, SImode); + rtx pred; + if (operands[2] == CONST0_RTX (mode)) + pred = operands[1]; + else + { + pred = gen_reg_rtx (mode); +- emit_insn (gen_pred_xor3 (pred, ptrue, operands[1], +- operands[2])); ++ emit_insn (gen_aarch64_pred_xor_z (pred, cast_ptrue, operands[1], ++ operands[2])); + } +- emit_insn (gen_ptest_ptrue (ptrue, pred)); +- operands[1] = gen_rtx_REG (CCmode, CC_REGNUM); ++ emit_insn (gen_aarch64_ptest (ptrue, cast_ptrue, ptrue_flag, pred)); ++ operands[1] = gen_rtx_REG (CC_NZCmode, CC_REGNUM); + operands[2] = const0_rtx; + } + ) + +-;; Unpredicated integer MIN/MAX. +-(define_expand "3" +- [(set (match_operand:SVE_I 0 "register_operand") +- (unspec:SVE_I +- [(match_dup 3) +- (MAXMIN:SVE_I (match_operand:SVE_I 1 "register_operand") +- (match_operand:SVE_I 2 "register_operand"))] +- UNSPEC_MERGE_PTRUE))] ++;; See "Description of UNSPEC_PTEST" above for details. ++(define_insn "aarch64_ptest" ++ [(set (reg:CC_NZC CC_REGNUM) ++ (unspec:CC_NZC [(match_operand:VNx16BI 0 "register_operand" "Upa") ++ (match_operand 1) ++ (match_operand:SI 2 "aarch64_sve_ptrue_flag") ++ (match_operand:PRED_ALL 3 "register_operand" "Upa")] ++ UNSPEC_PTEST))] ++ "TARGET_SVE" ++ "ptest\t%0, %3.b" ++) ++ ++;; ========================================================================= ++;; == Reductions ++;; ========================================================================= ++ ++;; ------------------------------------------------------------------------- ++;; ---- [INT,FP] Conditional reductions ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - CLASTA ++;; - CLASTB ++;; ------------------------------------------------------------------------- ++ ++;; Set operand 0 to the last active element in operand 3, or to tied ++;; operand 1 if no elements are active. ++(define_insn "@fold_extract__" ++ [(set (match_operand: 0 "register_operand" "=?r, w") ++ (unspec: ++ [(match_operand: 1 "register_operand" "0, 0") ++ (match_operand: 2 "register_operand" "Upl, Upl") ++ (match_operand:SVE_FULL 3 "register_operand" "w, w")] ++ CLAST))] + "TARGET_SVE" +- { +- operands[3] = force_reg (mode, CONSTM1_RTX (mode)); +- } ++ "@ ++ clast\t%0, %2, %0, %3. ++ clast\t%0, %2, %0, %3." + ) + +-;; Integer MIN/MAX predicated with a PTRUE. +-(define_insn "*3" +- [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") +- (unspec:SVE_I +- [(match_operand: 1 "register_operand" "Upl, Upl") +- (MAXMIN:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w") +- (match_operand:SVE_I 3 "register_operand" "w, w"))] +- UNSPEC_MERGE_PTRUE))] ++(define_insn "@aarch64_fold_extract_vector__" ++ [(set (match_operand:SVE_FULL 0 "register_operand" "=w, ?&w") ++ (unspec:SVE_FULL ++ [(match_operand:SVE_FULL 1 "register_operand" "0, w") ++ (match_operand: 2 "register_operand" "Upl, Upl") ++ (match_operand:SVE_FULL 3 "register_operand" "w, w")] ++ CLAST))] + "TARGET_SVE" + "@ +- \t%0., %1/m, %0., %3. +- movprfx\t%0, %2\;\t%0., %1/m, %0., %3." +- [(set_attr "movprfx" "*,yes")] +-) ++ clast\t%0., %2, %0., %3. ++ movprfx\t%0, %1\;clast\t%0., %2, %0., %3." ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- [INT] Tree reductions ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - ANDV ++;; - EORV ++;; - ORV ++;; - SADDV ++;; - SMAXV ++;; - SMINV ++;; - UADDV ++;; - UMAXV ++;; - UMINV ++;; ------------------------------------------------------------------------- + +-;; Unpredicated floating-point MIN/MAX. +-(define_expand "3" +- [(set (match_operand:SVE_F 0 "register_operand") +- (unspec:SVE_F +- [(match_dup 3) +- (FMAXMIN:SVE_F (match_operand:SVE_F 1 "register_operand") +- (match_operand:SVE_F 2 "register_operand"))] +- UNSPEC_MERGE_PTRUE))] ++;; Unpredicated integer add reduction. ++(define_expand "reduc_plus_scal_" ++ [(match_operand: 0 "register_operand") ++ (match_operand:SVE_FULL_I 1 "register_operand")] + "TARGET_SVE" + { +- operands[3] = force_reg (mode, CONSTM1_RTX (mode)); ++ rtx pred = aarch64_ptrue_reg (mode); ++ rtx tmp = mode == DImode ? operands[0] : gen_reg_rtx (DImode); ++ emit_insn (gen_aarch64_pred_reduc_uadd_ (tmp, pred, operands[1])); ++ if (tmp != operands[0]) ++ emit_move_insn (operands[0], gen_lowpart (mode, tmp)); ++ DONE; + } + ) + +-;; Floating-point MIN/MAX predicated with a PTRUE. +-(define_insn "*3" +- [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w") +- (unspec:SVE_F +- [(match_operand: 1 "register_operand" "Upl, Upl") +- (FMAXMIN:SVE_F (match_operand:SVE_F 2 "register_operand" "%0, w") +- (match_operand:SVE_F 3 "register_operand" "w, w"))] +- UNSPEC_MERGE_PTRUE))] +- "TARGET_SVE" +- "@ +- fnm\t%0., %1/m, %0., %3. +- movprfx\t%0, %2\;fnm\t%0., %1/m, %0., %3." +- [(set_attr "movprfx" "*,yes")] ++;; Predicated integer add reduction. The result is always 64-bits. ++(define_insn "@aarch64_pred_reduc__" ++ [(set (match_operand:DI 0 "register_operand" "=w") ++ (unspec:DI [(match_operand: 1 "register_operand" "Upl") ++ (match_operand:SVE_FULL_I 2 "register_operand" "w")] ++ SVE_INT_ADDV))] ++ "TARGET_SVE && >= " ++ "addv\t%d0, %1, %2." + ) + +-;; Unpredicated fmin/fmax. +-(define_expand "3" +- [(set (match_operand:SVE_F 0 "register_operand") +- (unspec:SVE_F +- [(match_dup 3) +- (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand") +- (match_operand:SVE_F 2 "register_operand")] +- FMAXMIN_UNS)] +- UNSPEC_MERGE_PTRUE))] ++;; Unpredicated integer reductions. ++(define_expand "reduc__scal_" ++ [(set (match_operand: 0 "register_operand") ++ (unspec: [(match_dup 2) ++ (match_operand:SVE_FULL_I 1 "register_operand")] ++ SVE_INT_REDUCTION))] + "TARGET_SVE" + { +- operands[3] = force_reg (mode, CONSTM1_RTX (mode)); ++ operands[2] = aarch64_ptrue_reg (mode); + } + ) + +-;; fmin/fmax predicated with a PTRUE. +-(define_insn "*3" +- [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w") +- (unspec:SVE_F +- [(match_operand: 1 "register_operand" "Upl, Upl") +- (unspec:SVE_F [(match_operand:SVE_F 2 "register_operand" "%0, w") +- (match_operand:SVE_F 3 "register_operand" "w, w")] +- FMAXMIN_UNS)] +- UNSPEC_MERGE_PTRUE))] ++;; Predicated integer reductions. ++(define_insn "@aarch64_pred_reduc__" ++ [(set (match_operand: 0 "register_operand" "=w") ++ (unspec: [(match_operand: 1 "register_operand" "Upl") ++ (match_operand:SVE_FULL_I 2 "register_operand" "w")] ++ SVE_INT_REDUCTION))] + "TARGET_SVE" +- "@ +- \t%0., %1/m, %0., %3. +- movprfx\t%0, %2\;\t%0., %1/m, %0., %3." +- [(set_attr "movprfx" "*,yes")] ++ "\t%0, %1, %2." + ) + +-;; Predicated integer operations with select. +-(define_expand "cond_" +- [(set (match_operand:SVE_I 0 "register_operand") +- (unspec:SVE_I +- [(match_operand: 1 "register_operand") +- (SVE_INT_BINARY:SVE_I +- (match_operand:SVE_I 2 "register_operand") +- (match_operand:SVE_I 3 "register_operand")) +- (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")] +- UNSPEC_SEL))] +- "TARGET_SVE" +-) ++;; ------------------------------------------------------------------------- ++;; ---- [FP] Tree reductions ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - FADDV ++;; - FMAXNMV ++;; - FMAXV ++;; - FMINNMV ++;; - FMINV ++;; ------------------------------------------------------------------------- + +-(define_expand "cond_" +- [(set (match_operand:SVE_SDI 0 "register_operand") +- (unspec:SVE_SDI +- [(match_operand: 1 "register_operand") +- (SVE_INT_BINARY_SD:SVE_SDI +- (match_operand:SVE_SDI 2 "register_operand") +- (match_operand:SVE_SDI 3 "register_operand")) +- (match_operand:SVE_SDI 4 "aarch64_simd_reg_or_zero")] +- UNSPEC_SEL))] ++;; Unpredicated floating-point tree reductions. ++(define_expand "reduc__scal_" ++ [(set (match_operand: 0 "register_operand") ++ (unspec: [(match_dup 2) ++ (match_operand:SVE_FULL_F 1 "register_operand")] ++ SVE_FP_REDUCTION))] + "TARGET_SVE" ++ { ++ operands[2] = aarch64_ptrue_reg (mode); ++ } + ) + +-;; Predicated integer operations with select matching the output operand. +-(define_insn "*cond__0" +- [(set (match_operand:SVE_I 0 "register_operand" "+w, w, ?&w") +- (unspec:SVE_I +- [(match_operand: 1 "register_operand" "Upl, Upl, Upl") +- (SVE_INT_BINARY:SVE_I +- (match_operand:SVE_I 2 "register_operand" "0, w, w") +- (match_operand:SVE_I 3 "register_operand" "w, 0, w")) +- (match_dup 0)] +- UNSPEC_SEL))] ++;; Predicated floating-point tree reductions. ++(define_insn "@aarch64_pred_reduc__" ++ [(set (match_operand: 0 "register_operand" "=w") ++ (unspec: [(match_operand: 1 "register_operand" "Upl") ++ (match_operand:SVE_FULL_F 2 "register_operand" "w")] ++ SVE_FP_REDUCTION))] + "TARGET_SVE" +- "@ +- \t%0., %1/m, %0., %3. +- \t%0., %1/m, %0., %2. +- movprfx\t%0, %1/m, %2\;\t%0., %1/m, %0., %3." +- [(set_attr "movprfx" "*,*,yes")] ++ "\t%0, %1, %2." + ) + +-(define_insn "*cond__0" +- [(set (match_operand:SVE_SDI 0 "register_operand" "+w, w, ?&w") +- (unspec:SVE_SDI +- [(match_operand: 1 "register_operand" "Upl, Upl, Upl") +- (SVE_INT_BINARY_SD:SVE_SDI +- (match_operand:SVE_SDI 2 "register_operand" "0, w, w") +- (match_operand:SVE_SDI 3 "register_operand" "w, 0, w")) +- (match_dup 0)] +- UNSPEC_SEL))] +- "TARGET_SVE" +- "@ +- \t%0., %1/m, %0., %3. +- \t%0., %1/m, %0., %2. +- movprfx\t%0, %1/m, %2\;\t%0., %1/m, %0., %3." +- [(set_attr "movprfx" "*,*,yes")] +-) ++;; ------------------------------------------------------------------------- ++;; ---- [FP] Left-to-right reductions ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - FADDA ++;; ------------------------------------------------------------------------- + +-;; Predicated integer operations with select matching the first operand. +-(define_insn "*cond__2" +- [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") +- (unspec:SVE_I +- [(match_operand: 1 "register_operand" "Upl, Upl") +- (SVE_INT_BINARY:SVE_I +- (match_operand:SVE_I 2 "register_operand" "0, w") +- (match_operand:SVE_I 3 "register_operand" "w, w")) +- (match_dup 2)] +- UNSPEC_SEL))] ++;; Unpredicated in-order FP reductions. ++(define_expand "fold_left_plus_" ++ [(set (match_operand: 0 "register_operand") ++ (unspec: [(match_dup 3) ++ (match_operand: 1 "register_operand") ++ (match_operand:SVE_FULL_F 2 "register_operand")] ++ UNSPEC_FADDA))] + "TARGET_SVE" +- "@ +- \t%0., %1/m, %0., %3. +- movprfx\t%0, %2\;\t%0., %1/m, %0., %3." +- [(set_attr "movprfx" "*,yes")] ++ { ++ operands[3] = aarch64_ptrue_reg (mode); ++ } + ) + +-(define_insn "*cond__2" +- [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w") +- (unspec:SVE_SDI +- [(match_operand: 1 "register_operand" "Upl, Upl") +- (SVE_INT_BINARY_SD:SVE_SDI +- (match_operand:SVE_SDI 2 "register_operand" "0, w") +- (match_operand:SVE_SDI 3 "register_operand" "w, w")) +- (match_dup 2)] +- UNSPEC_SEL))] ++;; Predicated in-order FP reductions. ++(define_insn "mask_fold_left_plus_" ++ [(set (match_operand: 0 "register_operand" "=w") ++ (unspec: [(match_operand: 3 "register_operand" "Upl") ++ (match_operand: 1 "register_operand" "0") ++ (match_operand:SVE_FULL_F 2 "register_operand" "w")] ++ UNSPEC_FADDA))] + "TARGET_SVE" +- "@ +- \t%0., %1/m, %0., %3. +- movprfx\t%0, %2\;\t%0., %1/m, %0., %3." +- [(set_attr "movprfx" "*,yes")] ++ "fadda\t%0, %3, %0, %2." + ) + +-;; Predicated integer operations with select matching the second operand. +-(define_insn "*cond__3" +- [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") +- (unspec:SVE_I +- [(match_operand: 1 "register_operand" "Upl, Upl") +- (SVE_INT_BINARY:SVE_I +- (match_operand:SVE_I 2 "register_operand" "w, w") +- (match_operand:SVE_I 3 "register_operand" "0, w")) +- (match_dup 3)] +- UNSPEC_SEL))] +- "TARGET_SVE" +- "@ +- \t%0., %1/m, %0., %2. +- movprfx\t%0, %3\;\t%0., %1/m, %0., %2." +- [(set_attr "movprfx" "*,yes")] +-) ++;; ========================================================================= ++;; == Permutes ++;; ========================================================================= + +-(define_insn "*cond__3" +- [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w") +- (unspec:SVE_SDI +- [(match_operand: 1 "register_operand" "Upl, Upl") +- (SVE_INT_BINARY_SD:SVE_SDI +- (match_operand:SVE_SDI 2 "register_operand" "w, w") +- (match_operand:SVE_SDI 3 "register_operand" "0, w")) +- (match_dup 3)] +- UNSPEC_SEL))] +- "TARGET_SVE" +- "@ +- \t%0., %1/m, %0., %2. +- movprfx\t%0, %3\;\t%0., %1/m, %0., %2." +- [(set_attr "movprfx" "*,yes")] +-) ++;; ------------------------------------------------------------------------- ++;; ---- [INT,FP] General permutes ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - TBL ++;; ------------------------------------------------------------------------- + +-;; Predicated integer operations with select matching zero. +-(define_insn "*cond__z" +- [(set (match_operand:SVE_I 0 "register_operand" "=&w") +- (unspec:SVE_I +- [(match_operand: 1 "register_operand" "Upl") +- (SVE_INT_BINARY:SVE_I +- (match_operand:SVE_I 2 "register_operand" "w") +- (match_operand:SVE_I 3 "register_operand" "w")) +- (match_operand:SVE_I 4 "aarch64_simd_imm_zero")] +- UNSPEC_SEL))] +- "TARGET_SVE" +- "movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %0., %3." +- [(set_attr "movprfx" "yes")] ++(define_expand "vec_perm" ++ [(match_operand:SVE_FULL 0 "register_operand") ++ (match_operand:SVE_FULL 1 "register_operand") ++ (match_operand:SVE_FULL 2 "register_operand") ++ (match_operand: 3 "aarch64_sve_vec_perm_operand")] ++ "TARGET_SVE && GET_MODE_NUNITS (mode).is_constant ()" ++ { ++ aarch64_expand_sve_vec_perm (operands[0], operands[1], ++ operands[2], operands[3]); ++ DONE; ++ } + ) + +-(define_insn "*cond__z" +- [(set (match_operand:SVE_SDI 0 "register_operand" "=&w") +- (unspec:SVE_SDI +- [(match_operand: 1 "register_operand" "Upl") +- (SVE_INT_BINARY_SD:SVE_SDI +- (match_operand:SVE_SDI 2 "register_operand" "w") +- (match_operand:SVE_SDI 3 "register_operand" "w")) +- (match_operand:SVE_SDI 4 "aarch64_simd_imm_zero")] +- UNSPEC_SEL))] ++(define_insn "@aarch64_sve_tbl" ++ [(set (match_operand:SVE_FULL 0 "register_operand" "=w") ++ (unspec:SVE_FULL ++ [(match_operand:SVE_FULL 1 "register_operand" "w") ++ (match_operand: 2 "register_operand" "w")] ++ UNSPEC_TBL))] + "TARGET_SVE" +- "movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %0., %3." +- [(set_attr "movprfx" "yes")] ++ "tbl\t%0., %1., %2." + ) + +-;; Synthetic predications with select unmatched. +-(define_insn "*cond__any" +- [(set (match_operand:SVE_I 0 "register_operand" "=&w") +- (unspec:SVE_I ++;; ------------------------------------------------------------------------- ++;; ---- [INT,FP] Special-purpose unary permutes ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - COMPACT ++;; - DUP ++;; - REV ++;; ------------------------------------------------------------------------- ++ ++;; Compact active elements and pad with zeros. ++(define_insn "@aarch64_sve_compact" ++ [(set (match_operand:SVE_FULL_SD 0 "register_operand" "=w") ++ (unspec:SVE_FULL_SD + [(match_operand: 1 "register_operand" "Upl") +- (SVE_INT_BINARY:SVE_I +- (match_operand:SVE_I 2 "register_operand" "w") +- (match_operand:SVE_I 3 "register_operand" "w")) +- (match_operand:SVE_I 4 "register_operand" "w")] +- UNSPEC_SEL))] ++ (match_operand:SVE_FULL_SD 2 "register_operand" "w")] ++ UNSPEC_SVE_COMPACT))] + "TARGET_SVE" +- "#" ++ "compact\t%0., %1, %2." + ) + +-(define_insn "*cond__any" +- [(set (match_operand:SVE_SDI 0 "register_operand" "=&w") +- (unspec:SVE_SDI +- [(match_operand: 1 "register_operand" "Upl") +- (SVE_INT_BINARY_SD:SVE_I +- (match_operand:SVE_SDI 2 "register_operand" "w") +- (match_operand:SVE_SDI 3 "register_operand" "w")) +- (match_operand:SVE_SDI 4 "register_operand" "w")] +- UNSPEC_SEL))] +- "TARGET_SVE" +- "#" ++;; Duplicate one element of a vector. ++(define_insn "@aarch64_sve_dup_lane" ++ [(set (match_operand:SVE_FULL 0 "register_operand" "=w") ++ (vec_duplicate:SVE_FULL ++ (vec_select: ++ (match_operand:SVE_FULL 1 "register_operand" "w") ++ (parallel [(match_operand:SI 2 "const_int_operand")]))))] ++ "TARGET_SVE ++ && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (mode), 0, 63)" ++ "dup\t%0., %1.[%2]" + ) + +-(define_split +- [(set (match_operand:SVE_I 0 "register_operand") +- (unspec:SVE_I +- [(match_operand: 1 "register_operand") +- (match_operator:SVE_I 5 "aarch64_sve_any_binary_operator" +- [(match_operand:SVE_I 2 "register_operand") +- (match_operand:SVE_I 3 "register_operand")]) +- (match_operand:SVE_I 4 "register_operand")] +- UNSPEC_SEL))] +- "TARGET_SVE && reload_completed +- && !(rtx_equal_p (operands[0], operands[4]) +- || rtx_equal_p (operands[2], operands[4]) +- || rtx_equal_p (operands[3], operands[4]))" +- ; Not matchable by any one insn or movprfx insn. We need a separate select. +- [(set (match_dup 0) +- (unspec:SVE_I [(match_dup 1) (match_dup 2) (match_dup 4)] +- UNSPEC_SEL)) +- (set (match_dup 0) +- (unspec:SVE_I +- [(match_dup 1) +- (match_op_dup 5 [(match_dup 0) (match_dup 3)]) +- (match_dup 0)] +- UNSPEC_SEL))] ++;; Use DUP.Q to duplicate a 128-bit segment of a register. ++;; ++;; The vec_select: sets memory lane number N of the V128 to lane ++;; number op2 + N of op1. (We don't need to distinguish between memory ++;; and architectural register lane numbering for op1 or op0, since the ++;; two numbering schemes are the same for SVE.) ++;; ++;; The vec_duplicate:SVE_FULL then copies memory lane number N of the ++;; V128 (and thus lane number op2 + N of op1) to lane numbers N + I * STEP ++;; of op0. We therefore get the correct result for both endiannesses. ++;; ++;; The wrinkle is that for big-endian V128 registers, memory lane numbering ++;; is in the opposite order to architectural register lane numbering. ++;; Thus if we were to do this operation via a V128 temporary register, ++;; the vec_select and vec_duplicate would both involve a reverse operation ++;; for big-endian targets. In this fused pattern the two reverses cancel ++;; each other out. ++(define_insn "@aarch64_sve_dupq_lane" ++ [(set (match_operand:SVE_FULL 0 "register_operand" "=w") ++ (vec_duplicate:SVE_FULL ++ (vec_select: ++ (match_operand:SVE_FULL 1 "register_operand" "w") ++ (match_operand 2 "ascending_int_parallel"))))] ++ "TARGET_SVE ++ && (INTVAL (XVECEXP (operands[2], 0, 0)) ++ * GET_MODE_SIZE (mode)) % 16 == 0 ++ && IN_RANGE (INTVAL (XVECEXP (operands[2], 0, 0)) ++ * GET_MODE_SIZE (mode), 0, 63)" ++ { ++ unsigned int byte = (INTVAL (XVECEXP (operands[2], 0, 0)) ++ * GET_MODE_SIZE (mode)); ++ operands[2] = gen_int_mode (byte / 16, DImode); ++ return "dup\t%0.q, %1.q[%2]"; ++ } + ) + +-;; Set operand 0 to the last active element in operand 3, or to tied +-;; operand 1 if no elements are active. +-(define_insn "fold_extract_last_" +- [(set (match_operand: 0 "register_operand" "=r, w") +- (unspec: +- [(match_operand: 1 "register_operand" "0, 0") +- (match_operand: 2 "register_operand" "Upl, Upl") +- (match_operand:SVE_ALL 3 "register_operand" "w, w")] +- UNSPEC_CLASTB))] ++;; Reverse the order of elements within a full vector. ++(define_insn "@aarch64_sve_rev" ++ [(set (match_operand:SVE_FULL 0 "register_operand" "=w") ++ (unspec:SVE_FULL ++ [(match_operand:SVE_FULL 1 "register_operand" "w")] ++ UNSPEC_REV))] + "TARGET_SVE" +- "@ +- clastb\t%0, %2, %0, %3. +- clastb\t%0, %2, %0, %3." +-) ++ "rev\t%0., %1.") + +-;; Unpredicated integer add reduction. +-(define_expand "reduc_plus_scal_" +- [(set (match_operand: 0 "register_operand") +- (unspec: [(match_dup 2) +- (match_operand:SVE_I 1 "register_operand")] +- UNSPEC_ADDV))] ++;; ------------------------------------------------------------------------- ++;; ---- [INT,FP] Special-purpose binary permutes ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - SPLICE ++;; - TRN1 ++;; - TRN2 ++;; - UZP1 ++;; - UZP2 ++;; - ZIP1 ++;; - ZIP2 ++;; ------------------------------------------------------------------------- ++ ++;; Like EXT, but start at the first active element. ++(define_insn "@aarch64_sve_splice" ++ [(set (match_operand:SVE_FULL 0 "register_operand" "=w, ?&w") ++ (unspec:SVE_FULL ++ [(match_operand: 1 "register_operand" "Upl, Upl") ++ (match_operand:SVE_FULL 2 "register_operand" "0, w") ++ (match_operand:SVE_FULL 3 "register_operand" "w, w")] ++ UNSPEC_SVE_SPLICE))] + "TARGET_SVE" ++ "@ ++ splice\t%0., %1, %0., %3. ++ movprfx\t%0, %2\;splice\t%0., %1, %0., %3." ++ [(set_attr "movprfx" "*, yes")] ++) ++ ++;; Permutes that take half the elements from one vector and half the ++;; elements from the other. ++(define_insn "@aarch64_sve_" ++ [(set (match_operand:SVE_FULL 0 "register_operand" "=w") ++ (unspec:SVE_FULL ++ [(match_operand:SVE_FULL 1 "register_operand" "w") ++ (match_operand:SVE_FULL 2 "register_operand" "w")] ++ PERMUTE))] ++ "TARGET_SVE" ++ "\t%0., %1., %2." ++) ++ ++;; Apply PERMUTE to 128-bit sequences. The behavior of these patterns ++;; doesn't depend on the mode. ++(define_insn "@aarch64_sve_" ++ [(set (match_operand:SVE_FULL 0 "register_operand" "=w") ++ (unspec:SVE_FULL ++ [(match_operand:SVE_FULL 1 "register_operand" "w") ++ (match_operand:SVE_FULL 2 "register_operand" "w")] ++ PERMUTEQ))] ++ "TARGET_SVE_F64MM" ++ "\t%0.q, %1.q, %2.q" ++) ++ ++;; Concatenate two vectors and extract a subvector. Note that the ++;; immediate (third) operand is the lane index not the byte index. ++(define_insn "@aarch64_sve_ext" ++ [(set (match_operand:SVE_FULL 0 "register_operand" "=w, ?&w") ++ (unspec:SVE_FULL ++ [(match_operand:SVE_FULL 1 "register_operand" "0, w") ++ (match_operand:SVE_FULL 2 "register_operand" "w, w") ++ (match_operand:SI 3 "const_int_operand")] ++ UNSPEC_EXT))] ++ "TARGET_SVE ++ && IN_RANGE (INTVAL (operands[3]) * GET_MODE_SIZE (mode), 0, 255)" + { +- operands[2] = force_reg (mode, CONSTM1_RTX (mode)); ++ operands[3] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (mode)); ++ return (which_alternative == 0 ++ ? "ext\\t%0.b, %0.b, %2.b, #%3" ++ : "movprfx\t%0, %1\;ext\\t%0.b, %0.b, %2.b, #%3"); + } ++ [(set_attr "movprfx" "*,yes")] + ) + +-;; Predicated integer add reduction. The result is always 64-bits. +-(define_insn "*reduc_plus_scal_" +- [(set (match_operand: 0 "register_operand" "=w") +- (unspec: [(match_operand: 1 "register_operand" "Upl") +- (match_operand:SVE_I 2 "register_operand" "w")] +- UNSPEC_ADDV))] ++;; ------------------------------------------------------------------------- ++;; ---- [PRED] Special-purpose unary permutes ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - REV ++;; ------------------------------------------------------------------------- ++ ++(define_insn "@aarch64_sve_rev" ++ [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") ++ (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")] ++ UNSPEC_REV))] + "TARGET_SVE" +- "uaddv\t%d0, %1, %2." +-) ++ "rev\t%0., %1.") + +-;; Unpredicated floating-point add reduction. +-(define_expand "reduc_plus_scal_" +- [(set (match_operand: 0 "register_operand") +- (unspec: [(match_dup 2) +- (match_operand:SVE_F 1 "register_operand")] +- UNSPEC_FADDV))] ++;; ------------------------------------------------------------------------- ++;; ---- [PRED] Special-purpose binary permutes ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - TRN1 ++;; - TRN2 ++;; - UZP1 ++;; - UZP2 ++;; - ZIP1 ++;; - ZIP2 ++;; ------------------------------------------------------------------------- ++ ++;; Permutes that take half the elements from one vector and half the ++;; elements from the other. ++(define_insn "@aarch64_sve_" ++ [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") ++ (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa") ++ (match_operand:PRED_ALL 2 "register_operand" "Upa")] ++ PERMUTE))] + "TARGET_SVE" +- { +- operands[2] = force_reg (mode, CONSTM1_RTX (mode)); +- } ++ "\t%0., %1., %2." + ) + +-;; Predicated floating-point add reduction. +-(define_insn "*reduc_plus_scal_" +- [(set (match_operand: 0 "register_operand" "=w") +- (unspec: [(match_operand: 1 "register_operand" "Upl") +- (match_operand:SVE_F 2 "register_operand" "w")] +- UNSPEC_FADDV))] ++;; ========================================================================= ++;; == Conversions ++;; ========================================================================= ++ ++;; ------------------------------------------------------------------------- ++;; ---- [INT<-INT] Packs ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - UZP1 ++;; ------------------------------------------------------------------------- ++ ++;; Integer pack. Use UZP1 on the narrower type, which discards ++;; the high part of each wide element. ++(define_insn "vec_pack_trunc_" ++ [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w") ++ (unspec:SVE_FULL_BHSI ++ [(match_operand: 1 "register_operand" "w") ++ (match_operand: 2 "register_operand" "w")] ++ UNSPEC_PACK))] + "TARGET_SVE" +- "faddv\t%0, %1, %2." ++ "uzp1\t%0., %1., %2." + ) + +-;; Unpredicated integer MIN/MAX reduction. +-(define_expand "reduc__scal_" +- [(set (match_operand: 0 "register_operand") +- (unspec: [(match_dup 2) +- (match_operand:SVE_I 1 "register_operand")] +- MAXMINV))] ++;; ------------------------------------------------------------------------- ++;; ---- [INT<-INT] Unpacks ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - SUNPKHI ++;; - SUNPKLO ++;; - UUNPKHI ++;; - UUNPKLO ++;; ------------------------------------------------------------------------- ++ ++;; Unpack the low or high half of a vector, where "high" refers to ++;; the low-numbered lanes for big-endian and the high-numbered lanes ++;; for little-endian. ++(define_expand "vec_unpack__" ++ [(match_operand: 0 "register_operand") ++ (unspec: ++ [(match_operand:SVE_FULL_BHSI 1 "register_operand")] UNPACK)] + "TARGET_SVE" + { +- operands[2] = force_reg (mode, CONSTM1_RTX (mode)); ++ emit_insn (( ++ ? gen_aarch64_sve_unpkhi_ ++ : gen_aarch64_sve_unpklo_) ++ (operands[0], operands[1])); ++ DONE; + } + ) + +-;; Predicated integer MIN/MAX reduction. +-(define_insn "*reduc__scal_" +- [(set (match_operand: 0 "register_operand" "=w") +- (unspec: [(match_operand: 1 "register_operand" "Upl") +- (match_operand:SVE_I 2 "register_operand" "w")] +- MAXMINV))] ++(define_insn "@aarch64_sve_unpk_" ++ [(set (match_operand: 0 "register_operand" "=w") ++ (unspec: ++ [(match_operand:SVE_FULL_BHSI 1 "register_operand" "w")] ++ UNPACK))] + "TARGET_SVE" +- "v\t%0, %1, %2." ++ "unpk\t%0., %1." + ) + +-;; Unpredicated floating-point MIN/MAX reduction. +-(define_expand "reduc__scal_" +- [(set (match_operand: 0 "register_operand") +- (unspec: [(match_dup 2) +- (match_operand:SVE_F 1 "register_operand")] +- FMAXMINV))] ++;; ------------------------------------------------------------------------- ++;; ---- [INT<-FP] Conversions ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - FCVTZS ++;; - FCVTZU ++;; ------------------------------------------------------------------------- ++ ++;; Unpredicated conversion of floats to integers of the same size (HF to HI, ++;; SF to SI or DF to DI). ++(define_expand "2" ++ [(set (match_operand: 0 "register_operand") ++ (unspec: ++ [(match_dup 2) ++ (const_int SVE_RELAXED_GP) ++ (match_operand:SVE_FULL_F 1 "register_operand")] ++ SVE_COND_FCVTI))] + "TARGET_SVE" + { +- operands[2] = force_reg (mode, CONSTM1_RTX (mode)); ++ operands[2] = aarch64_ptrue_reg (mode); + } + ) + +-;; Predicated floating-point MIN/MAX reduction. +-(define_insn "*reduc__scal_" +- [(set (match_operand: 0 "register_operand" "=w") +- (unspec: [(match_operand: 1 "register_operand" "Upl") +- (match_operand:SVE_F 2 "register_operand" "w")] +- FMAXMINV))] +- "TARGET_SVE" +- "v\t%0, %1, %2." ++;; Predicated float-to-integer conversion, either to the same width or wider. ++(define_insn "@aarch64_sve__nontrunc" ++ [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w") ++ (unspec:SVE_FULL_HSDI ++ [(match_operand: 1 "register_operand" "Upl") ++ (match_operand:SI 3 "aarch64_sve_gp_strictness") ++ (match_operand:SVE_FULL_F 2 "register_operand" "w")] ++ SVE_COND_FCVTI))] ++ "TARGET_SVE && >= " ++ "fcvtz\t%0., %1/m, %2." + ) + +-(define_expand "reduc__scal_" +- [(set (match_operand: 0 "register_operand") +- (unspec: [(match_dup 2) +- (match_operand:SVE_I 1 "register_operand")] +- BITWISEV))] +- "TARGET_SVE" ++;; Predicated narrowing float-to-integer conversion. ++(define_insn "@aarch64_sve__trunc" ++ [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w") ++ (unspec:VNx4SI_ONLY ++ [(match_operand:VNx2BI 1 "register_operand" "Upl") ++ (match_operand:SI 3 "aarch64_sve_gp_strictness") ++ (match_operand:VNx2DF_ONLY 2 "register_operand" "w")] ++ SVE_COND_FCVTI))] ++ "TARGET_SVE" ++ "fcvtz\t%0., %1/m, %2." ++) ++ ++;; Predicated float-to-integer conversion with merging, either to the same ++;; width or wider. ++(define_expand "@cond__nontrunc" ++ [(set (match_operand:SVE_FULL_HSDI 0 "register_operand") ++ (unspec:SVE_FULL_HSDI ++ [(match_operand: 1 "register_operand") ++ (unspec:SVE_FULL_HSDI ++ [(match_dup 1) ++ (const_int SVE_STRICT_GP) ++ (match_operand:SVE_FULL_F 2 "register_operand")] ++ SVE_COND_FCVTI) ++ (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero")] ++ UNSPEC_SEL))] ++ "TARGET_SVE && >= " ++) ++ ++;; The first alternative doesn't need the earlyclobber, but the only case ++;; it would help is the uninteresting one in which operands 2 and 3 are ++;; the same register (despite having different modes). Making all the ++;; alternatives earlyclobber makes things more consistent for the ++;; register allocator. ++(define_insn_and_rewrite "*cond__nontrunc" ++ [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=&w, &w, ?&w") ++ (unspec:SVE_FULL_HSDI ++ [(match_operand: 1 "register_operand" "Upl, Upl, Upl") ++ (unspec:SVE_FULL_HSDI ++ [(match_operand 4) ++ (match_operand:SI 5 "aarch64_sve_gp_strictness") ++ (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")] ++ SVE_COND_FCVTI) ++ (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] ++ UNSPEC_SEL))] ++ "TARGET_SVE ++ && >= ++ && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" ++ "@ ++ fcvtz\t%0., %1/m, %2. ++ movprfx\t%0., %1/z, %2.\;fcvtz\t%0., %1/m, %2. ++ movprfx\t%0, %3\;fcvtz\t%0., %1/m, %2." ++ "&& !rtx_equal_p (operands[1], operands[4])" + { +- operands[2] = force_reg (mode, CONSTM1_RTX (mode)); ++ operands[4] = copy_rtx (operands[1]); + } ++ [(set_attr "movprfx" "*,yes,yes")] ++) ++ ++;; Predicated narrowing float-to-integer conversion with merging. ++(define_expand "@cond__trunc" ++ [(set (match_operand:VNx4SI_ONLY 0 "register_operand") ++ (unspec:VNx4SI_ONLY ++ [(match_operand:VNx2BI 1 "register_operand") ++ (unspec:VNx4SI_ONLY ++ [(match_dup 1) ++ (const_int SVE_STRICT_GP) ++ (match_operand:VNx2DF_ONLY 2 "register_operand")] ++ SVE_COND_FCVTI) ++ (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero")] ++ UNSPEC_SEL))] ++ "TARGET_SVE" + ) + +-(define_insn "*reduc__scal_" +- [(set (match_operand: 0 "register_operand" "=w") +- (unspec: [(match_operand: 1 "register_operand" "Upl") +- (match_operand:SVE_I 2 "register_operand" "w")] +- BITWISEV))] ++(define_insn "*cond__trunc" ++ [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=&w, &w, ?&w") ++ (unspec:VNx4SI_ONLY ++ [(match_operand:VNx2BI 1 "register_operand" "Upl, Upl, Upl") ++ (unspec:VNx4SI_ONLY ++ [(match_dup 1) ++ (match_operand:SI 4 "aarch64_sve_gp_strictness") ++ (match_operand:VNx2DF_ONLY 2 "register_operand" "w, w, w")] ++ SVE_COND_FCVTI) ++ (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] ++ UNSPEC_SEL))] + "TARGET_SVE" +- "\t%0, %1, %2." ++ "@ ++ fcvtz\t%0., %1/m, %2. ++ movprfx\t%0., %1/z, %2.\;fcvtz\t%0., %1/m, %2. ++ movprfx\t%0, %3\;fcvtz\t%0., %1/m, %2." ++ [(set_attr "movprfx" "*,yes,yes")] + ) + +-;; Unpredicated in-order FP reductions. +-(define_expand "fold_left_plus_" +- [(set (match_operand: 0 "register_operand") +- (unspec: [(match_dup 3) +- (match_operand: 1 "register_operand") +- (match_operand:SVE_F 2 "register_operand")] +- UNSPEC_FADDA))] ++;; ------------------------------------------------------------------------- ++;; ---- [INT<-FP] Packs ++;; ------------------------------------------------------------------------- ++;; The patterns in this section are synthetic. ++;; ------------------------------------------------------------------------- ++ ++;; Convert two vectors of DF to SI and pack the results into a single vector. ++(define_expand "vec_pack_fix_trunc_vnx2df" ++ [(set (match_dup 4) ++ (unspec:VNx4SI ++ [(match_dup 3) ++ (const_int SVE_RELAXED_GP) ++ (match_operand:VNx2DF 1 "register_operand")] ++ SVE_COND_FCVTI)) ++ (set (match_dup 5) ++ (unspec:VNx4SI ++ [(match_dup 3) ++ (const_int SVE_RELAXED_GP) ++ (match_operand:VNx2DF 2 "register_operand")] ++ SVE_COND_FCVTI)) ++ (set (match_operand:VNx4SI 0 "register_operand") ++ (unspec:VNx4SI [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))] + "TARGET_SVE" + { +- operands[3] = force_reg (mode, CONSTM1_RTX (mode)); ++ operands[3] = aarch64_ptrue_reg (VNx2BImode); ++ operands[4] = gen_reg_rtx (VNx4SImode); ++ operands[5] = gen_reg_rtx (VNx4SImode); + } + ) + +-;; In-order FP reductions predicated with PTRUE. +-(define_insn "*fold_left_plus_" +- [(set (match_operand: 0 "register_operand" "=w") +- (unspec: [(match_operand: 1 "register_operand" "Upl") +- (match_operand: 2 "register_operand" "0") +- (match_operand:SVE_F 3 "register_operand" "w")] +- UNSPEC_FADDA))] +- "TARGET_SVE" +- "fadda\t%0, %1, %0, %3." +-) ++;; ------------------------------------------------------------------------- ++;; ---- [INT<-FP] Unpacks ++;; ------------------------------------------------------------------------- ++;; No patterns here yet! ++;; ------------------------------------------------------------------------- + +-;; Predicated form of the above in-order reduction. +-(define_insn "*pred_fold_left_plus_" +- [(set (match_operand: 0 "register_operand" "=w") +- (unspec: +- [(match_operand: 1 "register_operand" "0") +- (unspec:SVE_F +- [(match_operand: 2 "register_operand" "Upl") +- (match_operand:SVE_F 3 "register_operand" "w") +- (match_operand:SVE_F 4 "aarch64_simd_imm_zero")] +- UNSPEC_SEL)] +- UNSPEC_FADDA))] +- "TARGET_SVE" +- "fadda\t%0, %2, %0, %3." +-) ++;; ------------------------------------------------------------------------- ++;; ---- [FP<-INT] Conversions ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - SCVTF ++;; - UCVTF ++;; ------------------------------------------------------------------------- + +-;; Unpredicated floating-point addition. +-(define_expand "add3" +- [(set (match_operand:SVE_F 0 "register_operand") +- (unspec:SVE_F +- [(match_dup 3) +- (plus:SVE_F +- (match_operand:SVE_F 1 "register_operand") +- (match_operand:SVE_F 2 "aarch64_sve_float_arith_with_sub_operand"))] +- UNSPEC_MERGE_PTRUE))] ++;; Unpredicated conversion of integers to floats of the same size ++;; (HI to HF, SI to SF or DI to DF). ++(define_expand "2" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand") ++ (unspec:SVE_FULL_F ++ [(match_dup 2) ++ (const_int SVE_RELAXED_GP) ++ (match_operand: 1 "register_operand")] ++ SVE_COND_ICVTF))] + "TARGET_SVE" + { +- operands[3] = force_reg (mode, CONSTM1_RTX (mode)); ++ operands[2] = aarch64_ptrue_reg (mode); + } + ) + +-;; Floating-point addition predicated with a PTRUE. +-(define_insn_and_split "*add3" +- [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w") +- (unspec:SVE_F +- [(match_operand: 1 "register_operand" "Upl, Upl, Upl") +- (plus:SVE_F +- (match_operand:SVE_F 2 "register_operand" "%0, 0, w") +- (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, w"))] +- UNSPEC_MERGE_PTRUE))] +- "TARGET_SVE" +- "@ +- fadd\t%0., %1/m, %0., #%3 +- fsub\t%0., %1/m, %0., #%N3 +- #" +- ; Split the unpredicated form after reload, so that we don't have +- ; the unnecessary PTRUE. +- "&& reload_completed +- && register_operand (operands[3], mode)" +- [(set (match_dup 0) (plus:SVE_F (match_dup 2) (match_dup 3)))] ++;; Predicated integer-to-float conversion, either to the same width or ++;; narrower. ++(define_insn "@aarch64_sve__nonextend" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w") ++ (unspec:SVE_FULL_F ++ [(match_operand: 1 "register_operand" "Upl") ++ (match_operand:SI 3 "aarch64_sve_gp_strictness") ++ (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")] ++ SVE_COND_ICVTF))] ++ "TARGET_SVE && >= " ++ "cvtf\t%0., %1/m, %2." + ) + +-;; Unpredicated floating-point subtraction. +-(define_expand "sub3" +- [(set (match_operand:SVE_F 0 "register_operand") +- (unspec:SVE_F +- [(match_dup 3) +- (minus:SVE_F +- (match_operand:SVE_F 1 "aarch64_sve_float_arith_operand") +- (match_operand:SVE_F 2 "register_operand"))] +- UNSPEC_MERGE_PTRUE))] +- "TARGET_SVE" +- { +- operands[3] = force_reg (mode, CONSTM1_RTX (mode)); +- } ++;; Predicated widening integer-to-float conversion. ++(define_insn "@aarch64_sve__extend" ++ [(set (match_operand:VNx2DF_ONLY 0 "register_operand" "=w") ++ (unspec:VNx2DF_ONLY ++ [(match_operand:VNx2BI 1 "register_operand" "Upl") ++ (match_operand:SI 3 "aarch64_sve_gp_strictness") ++ (match_operand:VNx4SI_ONLY 2 "register_operand" "w")] ++ SVE_COND_ICVTF))] ++ "TARGET_SVE" ++ "cvtf\t%0., %1/m, %2." ++) ++ ++;; Predicated integer-to-float conversion with merging, either to the same ++;; width or narrower. ++(define_expand "@cond__nonextend" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand") ++ (unspec:SVE_FULL_F ++ [(match_operand: 1 "register_operand") ++ (unspec:SVE_FULL_F ++ [(match_dup 1) ++ (const_int SVE_STRICT_GP) ++ (match_operand:SVE_FULL_HSDI 2 "register_operand")] ++ SVE_COND_ICVTF) ++ (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")] ++ UNSPEC_SEL))] ++ "TARGET_SVE && >= " + ) + +-;; Floating-point subtraction predicated with a PTRUE. +-(define_insn_and_split "*sub3" +- [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w, w") +- (unspec:SVE_F +- [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl") +- (minus:SVE_F +- (match_operand:SVE_F 2 "aarch64_sve_float_arith_operand" "0, 0, vsA, w") +- (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, 0, w"))] +- UNSPEC_MERGE_PTRUE))] ++;; The first alternative doesn't need the earlyclobber, but the only case ++;; it would help is the uninteresting one in which operands 2 and 3 are ++;; the same register (despite having different modes). Making all the ++;; alternatives earlyclobber makes things more consistent for the ++;; register allocator. ++(define_insn_and_rewrite "*cond__nonextend" ++ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, ?&w") ++ (unspec:SVE_FULL_F ++ [(match_operand: 1 "register_operand" "Upl, Upl, Upl") ++ (unspec:SVE_FULL_F ++ [(match_operand 4) ++ (match_operand:SI 5 "aarch64_sve_gp_strictness") ++ (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w, w")] ++ SVE_COND_ICVTF) ++ (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] ++ UNSPEC_SEL))] + "TARGET_SVE +- && (register_operand (operands[2], mode) +- || register_operand (operands[3], mode))" ++ && >= ++ && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" + "@ +- fsub\t%0., %1/m, %0., #%3 +- fadd\t%0., %1/m, %0., #%N3 +- fsubr\t%0., %1/m, %0., #%2 +- #" +- ; Split the unpredicated form after reload, so that we don't have +- ; the unnecessary PTRUE. +- "&& reload_completed +- && register_operand (operands[2], mode) +- && register_operand (operands[3], mode)" +- [(set (match_dup 0) (minus:SVE_F (match_dup 2) (match_dup 3)))] +-) +- +-;; Unpredicated floating-point multiplication. +-(define_expand "mul3" +- [(set (match_operand:SVE_F 0 "register_operand") +- (unspec:SVE_F +- [(match_dup 3) +- (mult:SVE_F +- (match_operand:SVE_F 1 "register_operand") +- (match_operand:SVE_F 2 "aarch64_sve_float_mul_operand"))] +- UNSPEC_MERGE_PTRUE))] +- "TARGET_SVE" ++ cvtf\t%0., %1/m, %2. ++ movprfx\t%0., %1/z, %2.\;cvtf\t%0., %1/m, %2. ++ movprfx\t%0, %3\;cvtf\t%0., %1/m, %2." ++ "&& !rtx_equal_p (operands[1], operands[4])" + { +- operands[3] = force_reg (mode, CONSTM1_RTX (mode)); ++ operands[4] = copy_rtx (operands[1]); + } ++ [(set_attr "movprfx" "*,yes,yes")] ++) ++ ++;; Predicated widening integer-to-float conversion with merging. ++(define_expand "@cond__extend" ++ [(set (match_operand:VNx2DF_ONLY 0 "register_operand") ++ (unspec:VNx2DF_ONLY ++ [(match_operand:VNx2BI 1 "register_operand") ++ (unspec:VNx2DF_ONLY ++ [(match_dup 1) ++ (const_int SVE_STRICT_GP) ++ (match_operand:VNx4SI_ONLY 2 "register_operand")] ++ SVE_COND_ICVTF) ++ (match_operand:VNx2DF_ONLY 3 "aarch64_simd_reg_or_zero")] ++ UNSPEC_SEL))] ++ "TARGET_SVE" + ) + +-;; Floating-point multiplication predicated with a PTRUE. +-(define_insn_and_split "*mul3" +- [(set (match_operand:SVE_F 0 "register_operand" "=w, w") +- (unspec:SVE_F +- [(match_operand: 1 "register_operand" "Upl, Upl") +- (mult:SVE_F +- (match_operand:SVE_F 2 "register_operand" "%0, w") +- (match_operand:SVE_F 3 "aarch64_sve_float_mul_operand" "vsM, w"))] +- UNSPEC_MERGE_PTRUE))] ++(define_insn "*cond__extend" ++ [(set (match_operand:VNx2DF_ONLY 0 "register_operand" "=w, ?&w, ?&w") ++ (unspec:VNx2DF_ONLY ++ [(match_operand:VNx2BI 1 "register_operand" "Upl, Upl, Upl") ++ (unspec:VNx2DF_ONLY ++ [(match_dup 1) ++ (match_operand:SI 4 "aarch64_sve_gp_strictness") ++ (match_operand:VNx4SI_ONLY 2 "register_operand" "w, w, w")] ++ SVE_COND_ICVTF) ++ (match_operand:VNx2DF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] ++ UNSPEC_SEL))] + "TARGET_SVE" + "@ +- fmul\t%0., %1/m, %0., #%3 +- #" +- ; Split the unpredicated form after reload, so that we don't have +- ; the unnecessary PTRUE. +- "&& reload_completed +- && register_operand (operands[3], mode)" +- [(set (match_dup 0) (mult:SVE_F (match_dup 2) (match_dup 3)))] ++ cvtf\t%0., %1/m, %2. ++ movprfx\t%0., %1/z, %2.\;cvtf\t%0., %1/m, %2. ++ movprfx\t%0, %3\;cvtf\t%0., %1/m, %2." ++ [(set_attr "movprfx" "*,yes,yes")] + ) + +-;; Unpredicated floating-point binary operations (post-RA only). +-;; These are generated by splitting a predicated instruction whose +-;; predicate is unused. +-(define_insn "*post_ra_3" +- [(set (match_operand:SVE_F 0 "register_operand" "=w") +- (SVE_UNPRED_FP_BINARY:SVE_F +- (match_operand:SVE_F 1 "register_operand" "w") +- (match_operand:SVE_F 2 "register_operand" "w")))] +- "TARGET_SVE && reload_completed" +- "\t%0., %1., %2.") ++;; ------------------------------------------------------------------------- ++;; ---- [FP<-INT] Packs ++;; ------------------------------------------------------------------------- ++;; No patterns here yet! ++;; ------------------------------------------------------------------------- + +-;; Unpredicated fma (%0 = (%1 * %2) + %3). +-(define_expand "fma4" +- [(set (match_operand:SVE_F 0 "register_operand") +- (unspec:SVE_F +- [(match_dup 4) +- (fma:SVE_F (match_operand:SVE_F 1 "register_operand") +- (match_operand:SVE_F 2 "register_operand") +- (match_operand:SVE_F 3 "register_operand"))] +- UNSPEC_MERGE_PTRUE))] ++;; ------------------------------------------------------------------------- ++;; ---- [FP<-INT] Unpacks ++;; ------------------------------------------------------------------------- ++;; The patterns in this section are synthetic. ++;; ------------------------------------------------------------------------- ++ ++;; Unpack one half of a VNx4SI to VNx2DF. First unpack from VNx4SI ++;; to VNx2DI, reinterpret the VNx2DI as a VNx4SI, then convert the ++;; unpacked VNx4SI to VNx2DF. ++(define_expand "vec_unpack_float__vnx4si" ++ [(match_operand:VNx2DF 0 "register_operand") ++ (FLOATUORS:VNx2DF ++ (unspec:VNx2DI [(match_operand:VNx4SI 1 "register_operand")] ++ UNPACK_UNSIGNED))] + "TARGET_SVE" + { +- operands[4] = force_reg (mode, CONSTM1_RTX (mode)); ++ /* Use ZIP to do the unpack, since we don't care about the upper halves ++ and since it has the nice property of not needing any subregs. ++ If using UUNPK* turns out to be preferable, we could model it as ++ a ZIP whose first operand is zero. */ ++ rtx temp = gen_reg_rtx (VNx4SImode); ++ emit_insn (( ++ ? gen_aarch64_sve_zip2vnx4si ++ : gen_aarch64_sve_zip1vnx4si) ++ (temp, operands[1], operands[1])); ++ rtx ptrue = aarch64_ptrue_reg (VNx2BImode); ++ rtx strictness = gen_int_mode (SVE_RELAXED_GP, SImode); ++ emit_insn (gen_aarch64_sve__extendvnx4sivnx2df ++ (operands[0], ptrue, temp, strictness)); ++ DONE; + } + ) + +-;; fma predicated with a PTRUE. +-(define_insn "*fma4" +- [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w") +- (unspec:SVE_F +- [(match_operand: 1 "register_operand" "Upl, Upl, Upl") +- (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w, w") +- (match_operand:SVE_F 4 "register_operand" "w, w, w") +- (match_operand:SVE_F 2 "register_operand" "w, 0, w"))] +- UNSPEC_MERGE_PTRUE))] +- "TARGET_SVE" +- "@ +- fmad\t%0., %1/m, %4., %2. +- fmla\t%0., %1/m, %3., %4. +- movprfx\t%0, %2\;fmla\t%0., %1/m, %3., %4." +- [(set_attr "movprfx" "*,*,yes")] +-) ++;; ------------------------------------------------------------------------- ++;; ---- [FP<-FP] Packs ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - FCVT ++;; ------------------------------------------------------------------------- + +-;; Unpredicated fnma (%0 = (-%1 * %2) + %3). +-(define_expand "fnma4" +- [(set (match_operand:SVE_F 0 "register_operand") +- (unspec:SVE_F +- [(match_dup 4) +- (fma:SVE_F (neg:SVE_F +- (match_operand:SVE_F 1 "register_operand")) +- (match_operand:SVE_F 2 "register_operand") +- (match_operand:SVE_F 3 "register_operand"))] +- UNSPEC_MERGE_PTRUE))] ++;; Convert two vectors of DF to SF, or two vectors of SF to HF, and pack ++;; the results into a single vector. ++(define_expand "vec_pack_trunc_" ++ [(set (match_dup 4) ++ (unspec:SVE_FULL_HSF ++ [(match_dup 3) ++ (const_int SVE_RELAXED_GP) ++ (match_operand: 1 "register_operand")] ++ UNSPEC_COND_FCVT)) ++ (set (match_dup 5) ++ (unspec:SVE_FULL_HSF ++ [(match_dup 3) ++ (const_int SVE_RELAXED_GP) ++ (match_operand: 2 "register_operand")] ++ UNSPEC_COND_FCVT)) ++ (set (match_operand:SVE_FULL_HSF 0 "register_operand") ++ (unspec:SVE_FULL_HSF [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))] + "TARGET_SVE" + { +- operands[4] = force_reg (mode, CONSTM1_RTX (mode)); ++ operands[3] = aarch64_ptrue_reg (mode); ++ operands[4] = gen_reg_rtx (mode); ++ operands[5] = gen_reg_rtx (mode); + } + ) + +-;; fnma predicated with a PTRUE. +-(define_insn "*fnma4" +- [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w") +- (unspec:SVE_F +- [(match_operand: 1 "register_operand" "Upl, Upl, Upl") +- (fma:SVE_F (neg:SVE_F +- (match_operand:SVE_F 3 "register_operand" "%0, w, w")) +- (match_operand:SVE_F 4 "register_operand" "w, w, w") +- (match_operand:SVE_F 2 "register_operand" "w, 0, w"))] +- UNSPEC_MERGE_PTRUE))] +- "TARGET_SVE" ++;; Predicated float-to-float truncation. ++(define_insn "@aarch64_sve__trunc" ++ [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w") ++ (unspec:SVE_FULL_HSF ++ [(match_operand: 1 "register_operand" "Upl") ++ (match_operand:SI 3 "aarch64_sve_gp_strictness") ++ (match_operand:SVE_FULL_SDF 2 "register_operand" "w")] ++ SVE_COND_FCVT))] ++ "TARGET_SVE && > " ++ "fcvt\t%0., %1/m, %2." ++) ++ ++;; Predicated float-to-float truncation with merging. ++(define_expand "@cond__trunc" ++ [(set (match_operand:SVE_FULL_HSF 0 "register_operand") ++ (unspec:SVE_FULL_HSF ++ [(match_operand: 1 "register_operand") ++ (unspec:SVE_FULL_HSF ++ [(match_dup 1) ++ (const_int SVE_STRICT_GP) ++ (match_operand:SVE_FULL_SDF 2 "register_operand")] ++ SVE_COND_FCVT) ++ (match_operand:SVE_FULL_HSF 3 "aarch64_simd_reg_or_zero")] ++ UNSPEC_SEL))] ++ "TARGET_SVE && > " ++) ++ ++(define_insn "*cond__trunc" ++ [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w, ?&w, ?&w") ++ (unspec:SVE_FULL_HSF ++ [(match_operand: 1 "register_operand" "Upl, Upl, Upl") ++ (unspec:SVE_FULL_HSF ++ [(match_dup 1) ++ (match_operand:SI 4 "aarch64_sve_gp_strictness") ++ (match_operand:SVE_FULL_SDF 2 "register_operand" "w, w, w")] ++ SVE_COND_FCVT) ++ (match_operand:SVE_FULL_HSF 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] ++ UNSPEC_SEL))] ++ "TARGET_SVE && > " + "@ +- fmsb\t%0., %1/m, %4., %2. +- fmls\t%0., %1/m, %3., %4. +- movprfx\t%0, %2\;fmls\t%0., %1/m, %3., %4." +- [(set_attr "movprfx" "*,*,yes")] ++ fcvt\t%0., %1/m, %2. ++ movprfx\t%0., %1/z, %2.\;fcvt\t%0., %1/m, %2. ++ movprfx\t%0, %3\;fcvt\t%0., %1/m, %2." ++ [(set_attr "movprfx" "*,yes,yes")] ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- [FP<-FP] Packs (bfloat16) ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - BFCVT (BF16) ++;; - BFCVTNT (BF16) ++;; ------------------------------------------------------------------------- ++ ++;; Predicated BFCVT. ++(define_insn "@aarch64_sve__trunc" ++ [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w") ++ (unspec:VNx8BF_ONLY ++ [(match_operand:VNx4BI 1 "register_operand" "Upl") ++ (match_operand:SI 3 "aarch64_sve_gp_strictness") ++ (match_operand:VNx4SF_ONLY 2 "register_operand" "w")] ++ SVE_COND_FCVT))] ++ "TARGET_SVE_BF16" ++ "bfcvt\t%0.h, %1/m, %2.s" ++) ++ ++;; Predicated BFCVT with merging. ++(define_expand "@cond__trunc" ++ [(set (match_operand:VNx8BF_ONLY 0 "register_operand") ++ (unspec:VNx8BF_ONLY ++ [(match_operand:VNx4BI 1 "register_operand") ++ (unspec:VNx8BF_ONLY ++ [(match_dup 1) ++ (const_int SVE_STRICT_GP) ++ (match_operand:VNx4SF_ONLY 2 "register_operand")] ++ SVE_COND_FCVT) ++ (match_operand:VNx8BF_ONLY 3 "aarch64_simd_reg_or_zero")] ++ UNSPEC_SEL))] ++ "TARGET_SVE_BF16" ++) ++ ++(define_insn "*cond__trunc" ++ [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w, ?&w, ?&w") ++ (unspec:VNx8BF_ONLY ++ [(match_operand:VNx4BI 1 "register_operand" "Upl, Upl, Upl") ++ (unspec:VNx8BF_ONLY ++ [(match_dup 1) ++ (match_operand:SI 4 "aarch64_sve_gp_strictness") ++ (match_operand:VNx4SF_ONLY 2 "register_operand" "w, w, w")] ++ SVE_COND_FCVT) ++ (match_operand:VNx8BF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] ++ UNSPEC_SEL))] ++ "TARGET_SVE_BF16" ++ "@ ++ bfcvt\t%0.h, %1/m, %2.s ++ movprfx\t%0.s, %1/z, %2.s\;bfcvt\t%0.h, %1/m, %2.s ++ movprfx\t%0, %3\;bfcvt\t%0.h, %1/m, %2.s" ++ [(set_attr "movprfx" "*,yes,yes")] + ) + +-;; Unpredicated fms (%0 = (%1 * %2) - %3). +-(define_expand "fms4" +- [(set (match_operand:SVE_F 0 "register_operand") +- (unspec:SVE_F +- [(match_dup 4) +- (fma:SVE_F (match_operand:SVE_F 1 "register_operand") +- (match_operand:SVE_F 2 "register_operand") +- (neg:SVE_F +- (match_operand:SVE_F 3 "register_operand")))] +- UNSPEC_MERGE_PTRUE))] ++;; Predicated BFCVTNT. This doesn't give a natural aarch64_pred_*/cond_* ++;; pair because the even elements always have to be supplied for active ++;; elements, even if the inactive elements don't matter. ++;; ++;; This instructions does not take MOVPRFX. ++(define_insn "@aarch64_sve_cvtnt" ++ [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w") ++ (unspec:VNx8BF_ONLY ++ [(match_operand:VNx4BI 2 "register_operand" "Upl") ++ (const_int SVE_STRICT_GP) ++ (match_operand:VNx8BF_ONLY 1 "register_operand" "0") ++ (match_operand:VNx4SF 3 "register_operand" "w")] ++ UNSPEC_COND_FCVTNT))] ++ "TARGET_SVE_BF16" ++ "bfcvtnt\t%0.h, %2/m, %3.s" ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- [FP<-FP] Unpacks ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - FCVT ++;; ------------------------------------------------------------------------- ++ ++;; Unpack one half of a VNx4SF to VNx2DF, or one half of a VNx8HF to VNx4SF. ++;; First unpack the source without conversion, then float-convert the ++;; unpacked source. ++(define_expand "vec_unpacks__" ++ [(match_operand: 0 "register_operand") ++ (unspec:SVE_FULL_HSF ++ [(match_operand:SVE_FULL_HSF 1 "register_operand")] ++ UNPACK_UNSIGNED)] + "TARGET_SVE" + { +- operands[4] = force_reg (mode, CONSTM1_RTX (mode)); ++ /* Use ZIP to do the unpack, since we don't care about the upper halves ++ and since it has the nice property of not needing any subregs. ++ If using UUNPK* turns out to be preferable, we could model it as ++ a ZIP whose first operand is zero. */ ++ rtx temp = gen_reg_rtx (mode); ++ emit_insn (( ++ ? gen_aarch64_sve_zip2 ++ : gen_aarch64_sve_zip1) ++ (temp, operands[1], operands[1])); ++ rtx ptrue = aarch64_ptrue_reg (mode); ++ rtx strictness = gen_int_mode (SVE_RELAXED_GP, SImode); ++ emit_insn (gen_aarch64_sve_fcvt_nontrunc ++ (operands[0], ptrue, temp, strictness)); ++ DONE; + } + ) + +-;; fms predicated with a PTRUE. +-(define_insn "*fms4" +- [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w") +- (unspec:SVE_F +- [(match_operand: 1 "register_operand" "Upl, Upl, Upl") +- (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w, w") +- (match_operand:SVE_F 4 "register_operand" "w, w, w") +- (neg:SVE_F +- (match_operand:SVE_F 2 "register_operand" "w, 0, w")))] +- UNSPEC_MERGE_PTRUE))] +- "TARGET_SVE" ++;; Predicated float-to-float extension. ++(define_insn "@aarch64_sve__nontrunc" ++ [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w") ++ (unspec:SVE_FULL_SDF ++ [(match_operand: 1 "register_operand" "Upl") ++ (match_operand:SI 3 "aarch64_sve_gp_strictness") ++ (match_operand:SVE_FULL_HSF 2 "register_operand" "w")] ++ SVE_COND_FCVT))] ++ "TARGET_SVE && > " ++ "fcvt\t%0., %1/m, %2." ++) ++ ++;; Predicated float-to-float extension with merging. ++(define_expand "@cond__nontrunc" ++ [(set (match_operand:SVE_FULL_SDF 0 "register_operand") ++ (unspec:SVE_FULL_SDF ++ [(match_operand: 1 "register_operand") ++ (unspec:SVE_FULL_SDF ++ [(match_dup 1) ++ (const_int SVE_STRICT_GP) ++ (match_operand:SVE_FULL_HSF 2 "register_operand")] ++ SVE_COND_FCVT) ++ (match_operand:SVE_FULL_SDF 3 "aarch64_simd_reg_or_zero")] ++ UNSPEC_SEL))] ++ "TARGET_SVE && > " ++) ++ ++(define_insn "*cond__nontrunc" ++ [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w, ?&w, ?&w") ++ (unspec:SVE_FULL_SDF ++ [(match_operand: 1 "register_operand" "Upl, Upl, Upl") ++ (unspec:SVE_FULL_SDF ++ [(match_dup 1) ++ (match_operand:SI 4 "aarch64_sve_gp_strictness") ++ (match_operand:SVE_FULL_HSF 2 "register_operand" "w, w, w")] ++ SVE_COND_FCVT) ++ (match_operand:SVE_FULL_SDF 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] ++ UNSPEC_SEL))] ++ "TARGET_SVE && > " + "@ +- fnmsb\t%0., %1/m, %4., %2. +- fnmls\t%0., %1/m, %3., %4. +- movprfx\t%0, %2\;fnmls\t%0., %1/m, %3., %4." +- [(set_attr "movprfx" "*,*,yes")] ++ fcvt\t%0., %1/m, %2. ++ movprfx\t%0., %1/z, %2.\;fcvt\t%0., %1/m, %2. ++ movprfx\t%0, %3\;fcvt\t%0., %1/m, %2." ++ [(set_attr "movprfx" "*,yes,yes")] + ) + +-;; Unpredicated fnms (%0 = (-%1 * %2) - %3). +-(define_expand "fnms4" +- [(set (match_operand:SVE_F 0 "register_operand") +- (unspec:SVE_F +- [(match_dup 4) +- (fma:SVE_F (neg:SVE_F +- (match_operand:SVE_F 1 "register_operand")) +- (match_operand:SVE_F 2 "register_operand") +- (neg:SVE_F +- (match_operand:SVE_F 3 "register_operand")))] +- UNSPEC_MERGE_PTRUE))] +- "TARGET_SVE" +- { +- operands[4] = force_reg (mode, CONSTM1_RTX (mode)); +- } +-) ++;; ------------------------------------------------------------------------- ++;; ---- [PRED<-PRED] Packs ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - UZP1 ++;; ------------------------------------------------------------------------- + +-;; fnms predicated with a PTRUE. +-(define_insn "*fnms4" +- [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w") +- (unspec:SVE_F +- [(match_operand: 1 "register_operand" "Upl, Upl, Upl") +- (fma:SVE_F (neg:SVE_F +- (match_operand:SVE_F 3 "register_operand" "%0, w, w")) +- (match_operand:SVE_F 4 "register_operand" "w, w, w") +- (neg:SVE_F +- (match_operand:SVE_F 2 "register_operand" "w, 0, w")))] +- UNSPEC_MERGE_PTRUE))] ++;; Predicate pack. Use UZP1 on the narrower type, which discards ++;; the high part of each wide element. ++(define_insn "vec_pack_trunc_" ++ [(set (match_operand:PRED_BHS 0 "register_operand" "=Upa") ++ (unspec:PRED_BHS ++ [(match_operand: 1 "register_operand" "Upa") ++ (match_operand: 2 "register_operand" "Upa")] ++ UNSPEC_PACK))] + "TARGET_SVE" +- "@ +- fnmad\t%0., %1/m, %4., %2. +- fnmla\t%0., %1/m, %3., %4. +- movprfx\t%0, %2\;fnmla\t%0., %1/m, %3., %4." +- [(set_attr "movprfx" "*,*,yes")] ++ "uzp1\t%0., %1., %2." + ) + +-;; Unpredicated floating-point division. +-(define_expand "div3" +- [(set (match_operand:SVE_F 0 "register_operand") +- (unspec:SVE_F +- [(match_dup 3) +- (div:SVE_F (match_operand:SVE_F 1 "register_operand") +- (match_operand:SVE_F 2 "register_operand"))] +- UNSPEC_MERGE_PTRUE))] ++;; ------------------------------------------------------------------------- ++;; ---- [PRED<-PRED] Unpacks ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - PUNPKHI ++;; - PUNPKLO ++;; ------------------------------------------------------------------------- ++ ++;; Unpack the low or high half of a predicate, where "high" refers to ++;; the low-numbered lanes for big-endian and the high-numbered lanes ++;; for little-endian. ++(define_expand "vec_unpack__" ++ [(match_operand: 0 "register_operand") ++ (unspec: [(match_operand:PRED_BHS 1 "register_operand")] ++ UNPACK)] + "TARGET_SVE" + { +- operands[3] = force_reg (mode, CONSTM1_RTX (mode)); ++ emit_insn (( ++ ? gen_aarch64_sve_punpkhi_ ++ : gen_aarch64_sve_punpklo_) ++ (operands[0], operands[1])); ++ DONE; + } + ) + +-;; Floating-point division predicated with a PTRUE. +-(define_insn "*div3" +- [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w") +- (unspec:SVE_F +- [(match_operand: 1 "register_operand" "Upl, Upl, Upl") +- (div:SVE_F (match_operand:SVE_F 2 "register_operand" "0, w, w") +- (match_operand:SVE_F 3 "register_operand" "w, 0, w"))] +- UNSPEC_MERGE_PTRUE))] ++(define_insn "@aarch64_sve_punpk_" ++ [(set (match_operand: 0 "register_operand" "=Upa") ++ (unspec: [(match_operand:PRED_BHS 1 "register_operand" "Upa")] ++ UNPACK_UNSIGNED))] + "TARGET_SVE" +- "@ +- fdiv\t%0., %1/m, %0., %3. +- fdivr\t%0., %1/m, %0., %2. +- movprfx\t%0, %2\;fdiv\t%0., %1/m, %0., %3." +- [(set_attr "movprfx" "*,*,yes")] ++ "punpk\t%0.h, %1.b" + ) + +-;; Unpredicated FNEG, FABS and FSQRT. +-(define_expand "2" +- [(set (match_operand:SVE_F 0 "register_operand") +- (unspec:SVE_F +- [(match_dup 2) +- (SVE_FP_UNARY:SVE_F (match_operand:SVE_F 1 "register_operand"))] +- UNSPEC_MERGE_PTRUE))] ++;; ========================================================================= ++;; == Vector partitioning ++;; ========================================================================= ++ ++;; ------------------------------------------------------------------------- ++;; ---- [PRED] Unary partitioning ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - BRKA ++;; - BRKAS ++;; - BRKB ++;; - BRKBS ++;; ------------------------------------------------------------------------- ++ ++;; Note that unlike most other instructions that have both merging and ++;; zeroing forms, these instructions don't operate elementwise and so ++;; don't fit the IFN_COND model. ++(define_insn "@aarch64_brk" ++ [(set (match_operand:VNx16BI 0 "register_operand" "=Upa, Upa") ++ (unspec:VNx16BI ++ [(match_operand:VNx16BI 1 "register_operand" "Upa, Upa") ++ (match_operand:VNx16BI 2 "register_operand" "Upa, Upa") ++ (match_operand:VNx16BI 3 "aarch64_simd_reg_or_zero" "Dz, 0")] ++ SVE_BRK_UNARY))] ++ "TARGET_SVE" ++ "@ ++ brk\t%0.b, %1/z, %2.b ++ brk\t%0.b, %1/m, %2.b" ++) ++ ++;; Same, but also producing a flags result. ++(define_insn "*aarch64_brk_cc" ++ [(set (reg:CC_NZC CC_REGNUM) ++ (unspec:CC_NZC ++ [(match_operand:VNx16BI 1 "register_operand" "Upa, Upa") ++ (match_dup 1) ++ (match_operand:SI 4 "aarch64_sve_ptrue_flag") ++ (unspec:VNx16BI ++ [(match_dup 1) ++ (match_operand:VNx16BI 2 "register_operand" "Upa, Upa") ++ (match_operand:VNx16BI 3 "aarch64_simd_reg_or_zero" "Dz, 0")] ++ SVE_BRK_UNARY)] ++ UNSPEC_PTEST)) ++ (set (match_operand:VNx16BI 0 "register_operand" "=Upa, Upa") ++ (unspec:VNx16BI ++ [(match_dup 1) ++ (match_dup 2) ++ (match_dup 3)] ++ SVE_BRK_UNARY))] ++ "TARGET_SVE" ++ "@ ++ brks\t%0.b, %1/z, %2.b ++ brks\t%0.b, %1/m, %2.b" ++) ++ ++;; Same, but with only the flags result being interesting. ++(define_insn "*aarch64_brk_ptest" ++ [(set (reg:CC_NZC CC_REGNUM) ++ (unspec:CC_NZC ++ [(match_operand:VNx16BI 1 "register_operand" "Upa, Upa") ++ (match_dup 1) ++ (match_operand:SI 4 "aarch64_sve_ptrue_flag") ++ (unspec:VNx16BI ++ [(match_dup 1) ++ (match_operand:VNx16BI 2 "register_operand" "Upa, Upa") ++ (match_operand:VNx16BI 3 "aarch64_simd_reg_or_zero" "Dz, 0")] ++ SVE_BRK_UNARY)] ++ UNSPEC_PTEST)) ++ (clobber (match_scratch:VNx16BI 0 "=Upa, Upa"))] + "TARGET_SVE" ++ "@ ++ brks\t%0.b, %1/z, %2.b ++ brks\t%0.b, %1/m, %2.b" ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- [PRED] Binary partitioning ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - BRKN ++;; - BRKNS ++;; - BRKPA ++;; - BRKPAS ++;; - BRKPB ++;; - BRKPBS ++;; ------------------------------------------------------------------------- ++ ++;; Binary BRKs (BRKN, BRKPA, BRKPB). ++(define_insn "@aarch64_brk" ++ [(set (match_operand:VNx16BI 0 "register_operand" "=Upa") ++ (unspec:VNx16BI ++ [(match_operand:VNx16BI 1 "register_operand" "Upa") ++ (match_operand:VNx16BI 2 "register_operand" "Upa") ++ (match_operand:VNx16BI 3 "register_operand" "")] ++ SVE_BRK_BINARY))] ++ "TARGET_SVE" ++ "brk\t%0.b, %1/z, %2.b, %.b" ++) ++ ++;; Same, but also producing a flags result. ++(define_insn "*aarch64_brk_cc" ++ [(set (reg:CC_NZC CC_REGNUM) ++ (unspec:CC_NZC ++ [(match_operand:VNx16BI 1 "register_operand" "Upa") ++ (match_dup 1) ++ (match_operand:SI 4 "aarch64_sve_ptrue_flag") ++ (unspec:VNx16BI ++ [(match_dup 1) ++ (match_operand:VNx16BI 2 "register_operand" "Upa") ++ (match_operand:VNx16BI 3 "register_operand" "")] ++ SVE_BRK_BINARY)] ++ UNSPEC_PTEST)) ++ (set (match_operand:VNx16BI 0 "register_operand" "=Upa") ++ (unspec:VNx16BI ++ [(match_dup 1) ++ (match_dup 2) ++ (match_dup 3)] ++ SVE_BRK_BINARY))] ++ "TARGET_SVE" ++ "brks\t%0.b, %1/z, %2.b, %.b" ++) ++ ++;; Same, but with only the flags result being interesting. ++(define_insn "*aarch64_brk_ptest" ++ [(set (reg:CC_NZC CC_REGNUM) ++ (unspec:CC_NZC ++ [(match_operand:VNx16BI 1 "register_operand" "Upa") ++ (match_dup 1) ++ (match_operand:SI 4 "aarch64_sve_ptrue_flag") ++ (unspec:VNx16BI ++ [(match_dup 1) ++ (match_operand:VNx16BI 2 "register_operand" "Upa") ++ (match_operand:VNx16BI 3 "register_operand" "")] ++ SVE_BRK_BINARY)] ++ UNSPEC_PTEST)) ++ (clobber (match_scratch:VNx16BI 0 "=Upa"))] ++ "TARGET_SVE" ++ "brks\t%0.b, %1/z, %2.b, %.b" ++) ++ ++;; ------------------------------------------------------------------------- ++;; ---- [PRED] Scalarization ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - PFIRST ++;; - PNEXT ++;; ------------------------------------------------------------------------- ++ ++(define_insn "@aarch64_sve_" ++ [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") ++ (unspec:PRED_ALL ++ [(match_operand:PRED_ALL 1 "register_operand" "Upa") ++ (match_operand:SI 2 "aarch64_sve_ptrue_flag") ++ (match_operand:PRED_ALL 3 "register_operand" "0")] ++ SVE_PITER)) ++ (clobber (reg:CC_NZC CC_REGNUM))] ++ "TARGET_SVE && >= " ++ "\t%0., %1, %0." ++) ++ ++;; Same, but also producing a flags result. ++(define_insn_and_rewrite "*aarch64_sve__cc" ++ [(set (reg:CC_NZC CC_REGNUM) ++ (unspec:CC_NZC ++ [(match_operand:VNx16BI 1 "register_operand" "Upa") ++ (match_operand 2) ++ (match_operand:SI 3 "aarch64_sve_ptrue_flag") ++ (unspec:PRED_ALL ++ [(match_operand 4) ++ (match_operand:SI 5 "aarch64_sve_ptrue_flag") ++ (match_operand:PRED_ALL 6 "register_operand" "0")] ++ SVE_PITER)] ++ UNSPEC_PTEST)) ++ (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") ++ (unspec:PRED_ALL ++ [(match_dup 4) ++ (match_dup 5) ++ (match_dup 6)] ++ SVE_PITER))] ++ "TARGET_SVE ++ && >= ++ && aarch64_sve_same_pred_for_ptest_p (&operands[2], &operands[4])" ++ "\t%0., %1, %0." ++ "&& !rtx_equal_p (operands[2], operands[4])" + { +- operands[2] = force_reg (mode, CONSTM1_RTX (mode)); ++ operands[4] = operands[2]; ++ operands[5] = operands[3]; + } + ) + +-;; FNEG, FABS and FSQRT predicated with a PTRUE. +-(define_insn "*2" +- [(set (match_operand:SVE_F 0 "register_operand" "=w") +- (unspec:SVE_F +- [(match_operand: 1 "register_operand" "Upl") +- (SVE_FP_UNARY:SVE_F (match_operand:SVE_F 2 "register_operand" "w"))] +- UNSPEC_MERGE_PTRUE))] +- "TARGET_SVE" +- "\t%0., %1/m, %2." +-) +- +-;; Unpredicated FRINTy. +-(define_expand "2" +- [(set (match_operand:SVE_F 0 "register_operand") +- (unspec:SVE_F +- [(match_dup 2) +- (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand")] +- FRINT)] +- UNSPEC_MERGE_PTRUE))] +- "TARGET_SVE" ++;; Same, but with only the flags result being interesting. ++(define_insn_and_rewrite "*aarch64_sve__ptest" ++ [(set (reg:CC_NZC CC_REGNUM) ++ (unspec:CC_NZC ++ [(match_operand:VNx16BI 1 "register_operand" "Upa") ++ (match_operand 2) ++ (match_operand:SI 3 "aarch64_sve_ptrue_flag") ++ (unspec:PRED_ALL ++ [(match_operand 4) ++ (match_operand:SI 5 "aarch64_sve_ptrue_flag") ++ (match_operand:PRED_ALL 6 "register_operand" "0")] ++ SVE_PITER)] ++ UNSPEC_PTEST)) ++ (clobber (match_scratch:PRED_ALL 0 "=Upa"))] ++ "TARGET_SVE ++ && >= ++ && aarch64_sve_same_pred_for_ptest_p (&operands[2], &operands[4])" ++ "\t%0., %1, %0." ++ "&& !rtx_equal_p (operands[2], operands[4])" + { +- operands[2] = force_reg (mode, CONSTM1_RTX (mode)); ++ operands[4] = operands[2]; ++ operands[5] = operands[3]; + } + ) + +-;; FRINTy predicated with a PTRUE. +-(define_insn "*2" +- [(set (match_operand:SVE_F 0 "register_operand" "=w") +- (unspec:SVE_F +- [(match_operand: 1 "register_operand" "Upl") +- (unspec:SVE_F [(match_operand:SVE_F 2 "register_operand" "w")] +- FRINT)] +- UNSPEC_MERGE_PTRUE))] +- "TARGET_SVE" +- "frint\t%0., %1/m, %2." +-) ++;; ========================================================================= ++;; == Counting elements ++;; ========================================================================= + +-;; Unpredicated conversion of floats to integers of the same size (HF to HI, +-;; SF to SI or DF to DI). +-(define_expand "2" +- [(set (match_operand: 0 "register_operand") +- (unspec: +- [(match_dup 2) +- (FIXUORS: +- (match_operand:SVE_F 1 "register_operand"))] +- UNSPEC_MERGE_PTRUE))] ++;; ------------------------------------------------------------------------- ++;; ---- [INT] Count elements in a pattern (scalar) ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - CNTB ++;; - CNTD ++;; - CNTH ++;; - CNTW ++;; ------------------------------------------------------------------------- ++ ++;; Count the number of elements in an svpattern. Operand 1 is the pattern, ++;; operand 2 is the number of elements that fit in a 128-bit block, and ++;; operand 3 is a multiplier in the range [1, 16]. ++;; ++;; Note that this pattern isn't used for SV_ALL (but would work for that too). ++(define_insn "aarch64_sve_cnt_pat" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (zero_extend:DI ++ (unspec:SI [(match_operand:DI 1 "const_int_operand") ++ (match_operand:DI 2 "const_int_operand") ++ (match_operand:DI 3 "const_int_operand")] ++ UNSPEC_SVE_CNT_PAT)))] + "TARGET_SVE" + { +- operands[2] = force_reg (mode, CONSTM1_RTX (mode)); ++ return aarch64_output_sve_cnt_pat_immediate ("cnt", "%x0", operands + 1); + } + ) + +-;; Conversion of SF to DI, SI or HI, predicated with a PTRUE. +-(define_insn "*v16hsf2" +- [(set (match_operand:SVE_HSDI 0 "register_operand" "=w") +- (unspec:SVE_HSDI +- [(match_operand: 1 "register_operand" "Upl") +- (FIXUORS:SVE_HSDI +- (match_operand:VNx8HF 2 "register_operand" "w"))] +- UNSPEC_MERGE_PTRUE))] ++;; ------------------------------------------------------------------------- ++;; ---- [INT] Increment by the number of elements in a pattern (scalar) ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - INC ++;; - SQINC ++;; - UQINC ++;; ------------------------------------------------------------------------- ++ ++;; Increment a DImode register by the number of elements in an svpattern. ++;; See aarch64_sve_cnt_pat for the counting behavior. ++(define_insn "@aarch64_sve__pat" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (ANY_PLUS:DI (zero_extend:DI ++ (unspec:SI [(match_operand:DI 2 "const_int_operand") ++ (match_operand:DI 3 "const_int_operand") ++ (match_operand:DI 4 "const_int_operand")] ++ UNSPEC_SVE_CNT_PAT)) ++ (match_operand:DI_ONLY 1 "register_operand" "0")))] + "TARGET_SVE" +- "fcvtz\t%0., %1/m, %2.h" ++ { ++ return aarch64_output_sve_cnt_pat_immediate ("", "%x0", ++ operands + 2); ++ } + ) + +-;; Conversion of SF to DI or SI, predicated with a PTRUE. +-(define_insn "*vnx4sf2" +- [(set (match_operand:SVE_SDI 0 "register_operand" "=w") +- (unspec:SVE_SDI +- [(match_operand: 1 "register_operand" "Upl") +- (FIXUORS:SVE_SDI +- (match_operand:VNx4SF 2 "register_operand" "w"))] +- UNSPEC_MERGE_PTRUE))] ++;; Increment an SImode register by the number of elements in an svpattern ++;; using modular arithmetic. See aarch64_sve_cnt_pat for the counting ++;; behavior. ++(define_insn "*aarch64_sve_incsi_pat" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (plus:SI (unspec:SI [(match_operand:DI 2 "const_int_operand") ++ (match_operand:DI 3 "const_int_operand") ++ (match_operand:DI 4 "const_int_operand")] ++ UNSPEC_SVE_CNT_PAT) ++ (match_operand:SI 1 "register_operand" "0")))] + "TARGET_SVE" +- "fcvtz\t%0., %1/m, %2.s" ++ { ++ return aarch64_output_sve_cnt_pat_immediate ("inc", "%x0", operands + 2); ++ } + ) + +-;; Conversion of DF to DI or SI, predicated with a PTRUE. +-(define_insn "*vnx2df2" +- [(set (match_operand:SVE_SDI 0 "register_operand" "=w") +- (unspec:SVE_SDI +- [(match_operand:VNx2BI 1 "register_operand" "Upl") +- (FIXUORS:SVE_SDI +- (match_operand:VNx2DF 2 "register_operand" "w"))] +- UNSPEC_MERGE_PTRUE))] ++;; Increment an SImode register by the number of elements in an svpattern ++;; using saturating arithmetic, extending the result to 64 bits. ++;; ++;; See aarch64_sve_cnt_pat for the counting behavior. ++(define_insn "@aarch64_sve__pat" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (:DI ++ (SAT_PLUS:SI ++ (unspec:SI [(match_operand:DI 2 "const_int_operand") ++ (match_operand:DI 3 "const_int_operand") ++ (match_operand:DI 4 "const_int_operand")] ++ UNSPEC_SVE_CNT_PAT) ++ (match_operand:SI_ONLY 1 "register_operand" "0"))))] + "TARGET_SVE" +- "fcvtz\t%0., %1/m, %2.d" ++ { ++ const char *registers = ( == SS_PLUS ? "%x0, %w0" : "%w0"); ++ return aarch64_output_sve_cnt_pat_immediate ("", registers, ++ operands + 2); ++ } + ) + +-;; Unpredicated conversion of integers to floats of the same size +-;; (HI to HF, SI to SF or DI to DF). +-(define_expand "2" +- [(set (match_operand:SVE_F 0 "register_operand") +- (unspec:SVE_F +- [(match_dup 2) +- (FLOATUORS:SVE_F +- (match_operand: 1 "register_operand"))] +- UNSPEC_MERGE_PTRUE))] ++;; ------------------------------------------------------------------------- ++;; ---- [INT] Increment by the number of elements in a pattern (vector) ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - INC ++;; - SQINC ++;; - UQINC ++;; ------------------------------------------------------------------------- ++ ++;; Increment a vector of DIs by the number of elements in an svpattern. ++;; See aarch64_sve_cnt_pat for the counting behavior. ++(define_insn "@aarch64_sve__pat" ++ [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w") ++ (ANY_PLUS:VNx2DI ++ (vec_duplicate:VNx2DI ++ (zero_extend:DI ++ (unspec:SI [(match_operand:DI 2 "const_int_operand") ++ (match_operand:DI 3 "const_int_operand") ++ (match_operand:DI 4 "const_int_operand")] ++ UNSPEC_SVE_CNT_PAT))) ++ (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")))] + "TARGET_SVE" + { +- operands[2] = force_reg (mode, CONSTM1_RTX (mode)); ++ if (which_alternative == 1) ++ output_asm_insn ("movprfx\t%0, %1", operands); ++ return aarch64_output_sve_cnt_pat_immediate ("", "%0.", ++ operands + 2); + } ++ [(set_attr "movprfx" "*,yes")] + ) + +-;; Conversion of DI, SI or HI to the same number of HFs, predicated +-;; with a PTRUE. +-(define_insn "*vnx8hf2" +- [(set (match_operand:VNx8HF 0 "register_operand" "=w") +- (unspec:VNx8HF +- [(match_operand: 1 "register_operand" "Upl") +- (FLOATUORS:VNx8HF +- (match_operand:SVE_HSDI 2 "register_operand" "w"))] +- UNSPEC_MERGE_PTRUE))] ++;; Increment a vector of SIs by the number of elements in an svpattern. ++;; See aarch64_sve_cnt_pat for the counting behavior. ++(define_insn "@aarch64_sve__pat" ++ [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w") ++ (ANY_PLUS:VNx4SI ++ (vec_duplicate:VNx4SI ++ (unspec:SI [(match_operand:DI 2 "const_int_operand") ++ (match_operand:DI 3 "const_int_operand") ++ (match_operand:DI 4 "const_int_operand")] ++ UNSPEC_SVE_CNT_PAT)) ++ (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")))] + "TARGET_SVE" +- "cvtf\t%0.h, %1/m, %2." ++ { ++ if (which_alternative == 1) ++ output_asm_insn ("movprfx\t%0, %1", operands); ++ return aarch64_output_sve_cnt_pat_immediate ("", "%0.", ++ operands + 2); ++ } ++ [(set_attr "movprfx" "*,yes")] + ) + +-;; Conversion of DI or SI to the same number of SFs, predicated with a PTRUE. +-(define_insn "*vnx4sf2" +- [(set (match_operand:VNx4SF 0 "register_operand" "=w") +- (unspec:VNx4SF +- [(match_operand: 1 "register_operand" "Upl") +- (FLOATUORS:VNx4SF +- (match_operand:SVE_SDI 2 "register_operand" "w"))] +- UNSPEC_MERGE_PTRUE))] ++;; Increment a vector of HIs by the number of elements in an svpattern. ++;; See aarch64_sve_cnt_pat for the counting behavior. ++(define_expand "@aarch64_sve__pat" ++ [(set (match_operand:VNx8HI 0 "register_operand") ++ (ANY_PLUS:VNx8HI ++ (vec_duplicate:VNx8HI ++ (truncate:HI ++ (unspec:SI [(match_operand:DI 2 "const_int_operand") ++ (match_operand:DI 3 "const_int_operand") ++ (match_operand:DI 4 "const_int_operand")] ++ UNSPEC_SVE_CNT_PAT))) ++ (match_operand:VNx8HI_ONLY 1 "register_operand")))] ++ "TARGET_SVE" ++) ++ ++(define_insn "*aarch64_sve__pat" ++ [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w") ++ (ANY_PLUS:VNx8HI ++ (vec_duplicate:VNx8HI ++ (match_operator:HI 5 "subreg_lowpart_operator" ++ [(unspec:SI [(match_operand:DI 2 "const_int_operand") ++ (match_operand:DI 3 "const_int_operand") ++ (match_operand:DI 4 "const_int_operand")] ++ UNSPEC_SVE_CNT_PAT)])) ++ (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")))] + "TARGET_SVE" +- "cvtf\t%0.s, %1/m, %2." ++ { ++ if (which_alternative == 1) ++ output_asm_insn ("movprfx\t%0, %1", operands); ++ return aarch64_output_sve_cnt_pat_immediate ("", "%0.", ++ operands + 2); ++ } ++ [(set_attr "movprfx" "*,yes")] + ) + +-;; Conversion of DI or SI to DF, predicated with a PTRUE. +-(define_insn "aarch64_sve_vnx2df2" +- [(set (match_operand:VNx2DF 0 "register_operand" "=w") +- (unspec:VNx2DF +- [(match_operand:VNx2BI 1 "register_operand" "Upl") +- (FLOATUORS:VNx2DF +- (match_operand:SVE_SDI 2 "register_operand" "w"))] +- UNSPEC_MERGE_PTRUE))] ++;; ------------------------------------------------------------------------- ++;; ---- [INT] Decrement by the number of elements in a pattern (scalar) ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - DEC ++;; - SQDEC ++;; - UQDEC ++;; ------------------------------------------------------------------------- ++ ++;; Decrement a DImode register by the number of elements in an svpattern. ++;; See aarch64_sve_cnt_pat for the counting behavior. ++(define_insn "@aarch64_sve__pat" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (ANY_MINUS:DI (match_operand:DI_ONLY 1 "register_operand" "0") ++ (zero_extend:DI ++ (unspec:SI [(match_operand:DI 2 "const_int_operand") ++ (match_operand:DI 3 "const_int_operand") ++ (match_operand:DI 4 "const_int_operand")] ++ UNSPEC_SVE_CNT_PAT))))] + "TARGET_SVE" +- "cvtf\t%0.d, %1/m, %2." ++ { ++ return aarch64_output_sve_cnt_pat_immediate ("", "%x0", ++ operands + 2); ++ } + ) + +-;; Conversion of DFs to the same number of SFs, or SFs to the same number +-;; of HFs. +-(define_insn "*trunc2" +- [(set (match_operand:SVE_HSF 0 "register_operand" "=w") +- (unspec:SVE_HSF +- [(match_operand: 1 "register_operand" "Upl") +- (unspec:SVE_HSF +- [(match_operand: 2 "register_operand" "w")] +- UNSPEC_FLOAT_CONVERT)] +- UNSPEC_MERGE_PTRUE))] ++;; Decrement an SImode register by the number of elements in an svpattern ++;; using modular arithmetic. See aarch64_sve_cnt_pat for the counting ++;; behavior. ++(define_insn "*aarch64_sve_decsi_pat" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (minus:SI (match_operand:SI 1 "register_operand" "0") ++ (unspec:SI [(match_operand:DI 2 "const_int_operand") ++ (match_operand:DI 3 "const_int_operand") ++ (match_operand:DI 4 "const_int_operand")] ++ UNSPEC_SVE_CNT_PAT)))] + "TARGET_SVE" +- "fcvt\t%0., %1/m, %2." ++ { ++ return aarch64_output_sve_cnt_pat_immediate ("dec", "%x0", operands + 2); ++ } + ) + +-;; Conversion of SFs to the same number of DFs, or HFs to the same number +-;; of SFs. +-(define_insn "aarch64_sve_extend2" +- [(set (match_operand: 0 "register_operand" "=w") +- (unspec: +- [(match_operand: 1 "register_operand" "Upl") +- (unspec: +- [(match_operand:SVE_HSF 2 "register_operand" "w")] +- UNSPEC_FLOAT_CONVERT)] +- UNSPEC_MERGE_PTRUE))] ++;; Decrement an SImode register by the number of elements in an svpattern ++;; using saturating arithmetic, extending the result to 64 bits. ++;; ++;; See aarch64_sve_cnt_pat for the counting behavior. ++(define_insn "@aarch64_sve__pat" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (:DI ++ (SAT_MINUS:SI ++ (match_operand:SI_ONLY 1 "register_operand" "0") ++ (unspec:SI [(match_operand:DI 2 "const_int_operand") ++ (match_operand:DI 3 "const_int_operand") ++ (match_operand:DI 4 "const_int_operand")] ++ UNSPEC_SVE_CNT_PAT))))] + "TARGET_SVE" +- "fcvt\t%0., %1/m, %2." ++ { ++ const char *registers = ( == SS_MINUS ? "%x0, %w0" : "%w0"); ++ return aarch64_output_sve_cnt_pat_immediate ("", registers, ++ operands + 2); ++ } + ) + +-;; Unpack the low or high half of a predicate, where "high" refers to +-;; the low-numbered lanes for big-endian and the high-numbered lanes +-;; for little-endian. +-(define_expand "vec_unpack__" +- [(match_operand: 0 "register_operand") +- (unspec: [(match_operand:PRED_BHS 1 "register_operand")] +- UNPACK)] ++;; ------------------------------------------------------------------------- ++;; ---- [INT] Decrement by the number of elements in a pattern (vector) ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - DEC ++;; - SQDEC ++;; - UQDEC ++;; ------------------------------------------------------------------------- ++ ++;; Decrement a vector of DIs by the number of elements in an svpattern. ++;; See aarch64_sve_cnt_pat for the counting behavior. ++(define_insn "@aarch64_sve__pat" ++ [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w") ++ (ANY_MINUS:VNx2DI ++ (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w") ++ (vec_duplicate:VNx2DI ++ (zero_extend:DI ++ (unspec:SI [(match_operand:DI 2 "const_int_operand") ++ (match_operand:DI 3 "const_int_operand") ++ (match_operand:DI 4 "const_int_operand")] ++ UNSPEC_SVE_CNT_PAT)))))] + "TARGET_SVE" + { +- emit_insn (( +- ? gen_aarch64_sve_punpkhi_ +- : gen_aarch64_sve_punpklo_) +- (operands[0], operands[1])); +- DONE; ++ if (which_alternative == 1) ++ output_asm_insn ("movprfx\t%0, %1", operands); ++ return aarch64_output_sve_cnt_pat_immediate ("", "%0.", ++ operands + 2); + } ++ [(set_attr "movprfx" "*,yes")] + ) + +-;; PUNPKHI and PUNPKLO. +-(define_insn "aarch64_sve_punpk_" +- [(set (match_operand: 0 "register_operand" "=Upa") +- (unspec: [(match_operand:PRED_BHS 1 "register_operand" "Upa")] +- UNPACK_UNSIGNED))] ++;; Decrement a vector of SIs by the number of elements in an svpattern. ++;; See aarch64_sve_cnt_pat for the counting behavior. ++(define_insn "@aarch64_sve__pat" ++ [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w") ++ (ANY_MINUS:VNx4SI ++ (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w") ++ (vec_duplicate:VNx4SI ++ (unspec:SI [(match_operand:DI 2 "const_int_operand") ++ (match_operand:DI 3 "const_int_operand") ++ (match_operand:DI 4 "const_int_operand")] ++ UNSPEC_SVE_CNT_PAT))))] + "TARGET_SVE" +- "punpk\t%0.h, %1.b" ++ { ++ if (which_alternative == 1) ++ output_asm_insn ("movprfx\t%0, %1", operands); ++ return aarch64_output_sve_cnt_pat_immediate ("", "%0.", ++ operands + 2); ++ } ++ [(set_attr "movprfx" "*,yes")] + ) + +-;; Unpack the low or high half of a vector, where "high" refers to +-;; the low-numbered lanes for big-endian and the high-numbered lanes +-;; for little-endian. +-(define_expand "vec_unpack__" +- [(match_operand: 0 "register_operand") +- (unspec: [(match_operand:SVE_BHSI 1 "register_operand")] UNPACK)] ++;; Decrement a vector of HIs by the number of elements in an svpattern. ++;; See aarch64_sve_cnt_pat for the counting behavior. ++(define_expand "@aarch64_sve__pat" ++ [(set (match_operand:VNx8HI 0 "register_operand") ++ (ANY_MINUS:VNx8HI ++ (match_operand:VNx8HI_ONLY 1 "register_operand") ++ (vec_duplicate:VNx8HI ++ (truncate:HI ++ (unspec:SI [(match_operand:DI 2 "const_int_operand") ++ (match_operand:DI 3 "const_int_operand") ++ (match_operand:DI 4 "const_int_operand")] ++ UNSPEC_SVE_CNT_PAT)))))] ++ "TARGET_SVE" ++) ++ ++(define_insn "*aarch64_sve__pat" ++ [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w") ++ (ANY_MINUS:VNx8HI ++ (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w") ++ (vec_duplicate:VNx8HI ++ (match_operator:HI 5 "subreg_lowpart_operator" ++ [(unspec:SI [(match_operand:DI 2 "const_int_operand") ++ (match_operand:DI 3 "const_int_operand") ++ (match_operand:DI 4 "const_int_operand")] ++ UNSPEC_SVE_CNT_PAT)]))))] + "TARGET_SVE" + { +- emit_insn (( +- ? gen_aarch64_sve_unpkhi_ +- : gen_aarch64_sve_unpklo_) +- (operands[0], operands[1])); +- DONE; ++ if (which_alternative == 1) ++ output_asm_insn ("movprfx\t%0, %1", operands); ++ return aarch64_output_sve_cnt_pat_immediate ("", "%0.", ++ operands + 2); + } ++ [(set_attr "movprfx" "*,yes")] + ) + +-;; SUNPKHI, UUNPKHI, SUNPKLO and UUNPKLO. +-(define_insn "aarch64_sve_unpk_" +- [(set (match_operand: 0 "register_operand" "=w") +- (unspec: [(match_operand:SVE_BHSI 1 "register_operand" "w")] +- UNPACK))] ++;; ------------------------------------------------------------------------- ++;; ---- [INT] Count elements in a predicate (scalar) ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - CNTP ++;; ------------------------------------------------------------------------- ++ ++;; Count the number of set bits in a predicate. Operand 3 is true if ++;; operand 1 is known to be all-true. ++(define_insn "@aarch64_pred_cntp" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (zero_extend:DI ++ (unspec:SI [(match_operand:PRED_ALL 1 "register_operand" "Upl") ++ (match_operand:SI 2 "aarch64_sve_ptrue_flag") ++ (match_operand:PRED_ALL 3 "register_operand" "Upa")] ++ UNSPEC_CNTP)))] ++ "TARGET_SVE" ++ "cntp\t%x0, %1, %3.") ++ ++;; ------------------------------------------------------------------------- ++;; ---- [INT] Increment by the number of elements in a predicate (scalar) ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - INCP ++;; - SQINCP ++;; - UQINCP ++;; ------------------------------------------------------------------------- ++ ++;; Increment a DImode register by the number of set bits in a predicate. ++;; See aarch64_sve_cntp for a description of the operands. ++(define_expand "@aarch64_sve__cntp" ++ [(set (match_operand:DI 0 "register_operand") ++ (ANY_PLUS:DI ++ (zero_extend:DI ++ (unspec:SI [(match_dup 3) ++ (const_int SVE_KNOWN_PTRUE) ++ (match_operand:PRED_ALL 2 "register_operand")] ++ UNSPEC_CNTP)) ++ (match_operand:DI_ONLY 1 "register_operand")))] + "TARGET_SVE" +- "unpk\t%0., %1." ++ { ++ operands[3] = CONSTM1_RTX (mode); ++ } + ) + +-;; Unpack one half of a VNx4SF to VNx2DF, or one half of a VNx8HF to VNx4SF. +-;; First unpack the source without conversion, then float-convert the +-;; unpacked source. +-(define_expand "vec_unpacks__" +- [(match_operand: 0 "register_operand") +- (unspec:SVE_HSF [(match_operand:SVE_HSF 1 "register_operand")] +- UNPACK_UNSIGNED)] ++(define_insn_and_rewrite "*aarch64_sve__cntp" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (ANY_PLUS:DI ++ (zero_extend:DI ++ (unspec:SI [(match_operand 3) ++ (const_int SVE_KNOWN_PTRUE) ++ (match_operand:PRED_ALL 2 "register_operand" "Upa")] ++ UNSPEC_CNTP)) ++ (match_operand:DI_ONLY 1 "register_operand" "0")))] + "TARGET_SVE" ++ "p\t%x0, %2." ++ "&& !CONSTANT_P (operands[3])" + { +- /* Use ZIP to do the unpack, since we don't care about the upper halves +- and since it has the nice property of not needing any subregs. +- If using UUNPK* turns out to be preferable, we could model it as +- a ZIP whose first operand is zero. */ +- rtx temp = gen_reg_rtx (mode); +- emit_insn (( +- ? gen_aarch64_sve_zip2 +- : gen_aarch64_sve_zip1) +- (temp, operands[1], operands[1])); +- rtx ptrue = force_reg (mode, CONSTM1_RTX (mode)); +- emit_insn (gen_aarch64_sve_extend2 (operands[0], +- ptrue, temp)); +- DONE; ++ operands[3] = CONSTM1_RTX (mode); + } + ) + +-;; Unpack one half of a VNx4SI to VNx2DF. First unpack from VNx4SI +-;; to VNx2DI, reinterpret the VNx2DI as a VNx4SI, then convert the +-;; unpacked VNx4SI to VNx2DF. +-(define_expand "vec_unpack_float__vnx4si" +- [(match_operand:VNx2DF 0 "register_operand") +- (FLOATUORS:VNx2DF +- (unspec:VNx2DI [(match_operand:VNx4SI 1 "register_operand")] +- UNPACK_UNSIGNED))] +- "TARGET_SVE" ++;; Increment an SImode register by the number of set bits in a predicate ++;; using modular arithmetic. See aarch64_sve_cntp for a description of ++;; the operands. ++(define_insn_and_rewrite "*aarch64_incsi_cntp" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (plus:SI ++ (unspec:SI [(match_operand 3) ++ (const_int SVE_KNOWN_PTRUE) ++ (match_operand:PRED_ALL 2 "register_operand" "Upa")] ++ UNSPEC_CNTP) ++ (match_operand:SI 1 "register_operand" "0")))] ++ "TARGET_SVE" ++ "incp\t%x0, %2." ++ "&& !CONSTANT_P (operands[3])" + { +- /* Use ZIP to do the unpack, since we don't care about the upper halves +- and since it has the nice property of not needing any subregs. +- If using UUNPK* turns out to be preferable, we could model it as +- a ZIP whose first operand is zero. */ +- rtx temp = gen_reg_rtx (VNx4SImode); +- emit_insn (( +- ? gen_aarch64_sve_zip2vnx4si +- : gen_aarch64_sve_zip1vnx4si) +- (temp, operands[1], operands[1])); +- rtx ptrue = force_reg (VNx2BImode, CONSTM1_RTX (VNx2BImode)); +- emit_insn (gen_aarch64_sve_vnx4sivnx2df2 (operands[0], +- ptrue, temp)); +- DONE; ++ operands[3] = CONSTM1_RTX (mode); + } + ) + +-;; Predicate pack. Use UZP1 on the narrower type, which discards +-;; the high part of each wide element. +-(define_insn "vec_pack_trunc_" +- [(set (match_operand:PRED_BHS 0 "register_operand" "=Upa") +- (unspec:PRED_BHS +- [(match_operand: 1 "register_operand" "Upa") +- (match_operand: 2 "register_operand" "Upa")] +- UNSPEC_PACK))] ++;; Increment an SImode register by the number of set bits in a predicate ++;; using saturating arithmetic, extending the result to 64 bits. ++;; ++;; See aarch64_sve_cntp for a description of the operands. ++(define_expand "@aarch64_sve__cntp" ++ [(set (match_operand:DI 0 "register_operand") ++ (:DI ++ (SAT_PLUS:SI ++ (unspec:SI [(match_dup 3) ++ (const_int SVE_KNOWN_PTRUE) ++ (match_operand:PRED_ALL 2 "register_operand")] ++ UNSPEC_CNTP) ++ (match_operand:SI_ONLY 1 "register_operand"))))] + "TARGET_SVE" +- "uzp1\t%0., %1., %2." ++ { ++ operands[3] = CONSTM1_RTX (mode); ++ } + ) + +-;; Integer pack. Use UZP1 on the narrower type, which discards +-;; the high part of each wide element. +-(define_insn "vec_pack_trunc_" +- [(set (match_operand:SVE_BHSI 0 "register_operand" "=w") +- (unspec:SVE_BHSI +- [(match_operand: 1 "register_operand" "w") +- (match_operand: 2 "register_operand" "w")] +- UNSPEC_PACK))] ++(define_insn_and_rewrite "*aarch64_sve__cntp" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (:DI ++ (SAT_PLUS:SI ++ (unspec:SI [(match_operand 3) ++ (const_int SVE_KNOWN_PTRUE) ++ (match_operand:PRED_ALL 2 "register_operand" "Upa")] ++ UNSPEC_CNTP) ++ (match_operand:SI_ONLY 1 "register_operand" "0"))))] + "TARGET_SVE" +- "uzp1\t%0., %1., %2." ++ { ++ if ( == SS_PLUS) ++ return "p\t%x0, %2., %w0"; ++ else ++ return "p\t%w0, %2."; ++ } ++ "&& !CONSTANT_P (operands[3])" ++ { ++ operands[3] = CONSTM1_RTX (mode); ++ } + ) + +-;; Convert two vectors of DF to SF, or two vectors of SF to HF, and pack +-;; the results into a single vector. +-(define_expand "vec_pack_trunc_" +- [(set (match_dup 4) +- (unspec:SVE_HSF +- [(match_dup 3) +- (unspec:SVE_HSF [(match_operand: 1 "register_operand")] +- UNSPEC_FLOAT_CONVERT)] +- UNSPEC_MERGE_PTRUE)) +- (set (match_dup 5) +- (unspec:SVE_HSF +- [(match_dup 3) +- (unspec:SVE_HSF [(match_operand: 2 "register_operand")] +- UNSPEC_FLOAT_CONVERT)] +- UNSPEC_MERGE_PTRUE)) +- (set (match_operand:SVE_HSF 0 "register_operand") +- (unspec:SVE_HSF [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))] ++;; ------------------------------------------------------------------------- ++;; ---- [INT] Increment by the number of elements in a predicate (vector) ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - INCP ++;; - SQINCP ++;; - UQINCP ++;; ------------------------------------------------------------------------- ++ ++;; Increment a vector of DIs by the number of set bits in a predicate. ++;; See aarch64_sve_cntp for a description of the operands. ++(define_expand "@aarch64_sve__cntp" ++ [(set (match_operand:VNx2DI 0 "register_operand") ++ (ANY_PLUS:VNx2DI ++ (vec_duplicate:VNx2DI ++ (zero_extend:DI ++ (unspec:SI ++ [(match_dup 3) ++ (const_int SVE_KNOWN_PTRUE) ++ (match_operand: 2 "register_operand")] ++ UNSPEC_CNTP))) ++ (match_operand:VNx2DI_ONLY 1 "register_operand")))] + "TARGET_SVE" + { +- operands[3] = force_reg (mode, CONSTM1_RTX (mode)); +- operands[4] = gen_reg_rtx (mode); +- operands[5] = gen_reg_rtx (mode); ++ operands[3] = CONSTM1_RTX (mode); + } + ) + +-;; Convert two vectors of DF to SI and pack the results into a single vector. +-(define_expand "vec_pack_fix_trunc_vnx2df" +- [(set (match_dup 4) +- (unspec:VNx4SI +- [(match_dup 3) +- (FIXUORS:VNx4SI (match_operand:VNx2DF 1 "register_operand"))] +- UNSPEC_MERGE_PTRUE)) +- (set (match_dup 5) +- (unspec:VNx4SI +- [(match_dup 3) +- (FIXUORS:VNx4SI (match_operand:VNx2DF 2 "register_operand"))] +- UNSPEC_MERGE_PTRUE)) +- (set (match_operand:VNx4SI 0 "register_operand") +- (unspec:VNx4SI [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))] ++(define_insn_and_rewrite "*aarch64_sve__cntp" ++ [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w") ++ (ANY_PLUS:VNx2DI ++ (vec_duplicate:VNx2DI ++ (zero_extend:DI ++ (unspec:SI ++ [(match_operand 3) ++ (const_int SVE_KNOWN_PTRUE) ++ (match_operand: 2 "register_operand" "Upa, Upa")] ++ UNSPEC_CNTP))) ++ (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")))] + "TARGET_SVE" ++ "@ ++ p\t%0.d, %2 ++ movprfx\t%0, %1\;p\t%0.d, %2" ++ "&& !CONSTANT_P (operands[3])" + { +- operands[3] = force_reg (VNx2BImode, CONSTM1_RTX (VNx2BImode)); +- operands[4] = gen_reg_rtx (VNx4SImode); +- operands[5] = gen_reg_rtx (VNx4SImode); ++ operands[3] = CONSTM1_RTX (mode); + } ++ [(set_attr "movprfx" "*,yes")] + ) + +-;; Predicated floating-point operations with select. +-(define_expand "cond_" +- [(set (match_operand:SVE_F 0 "register_operand") +- (unspec:SVE_F +- [(match_operand: 1 "register_operand") +- (unspec:SVE_F +- [(match_operand:SVE_F 2 "register_operand") +- (match_operand:SVE_F 3 "register_operand")] +- SVE_COND_FP_BINARY) +- (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")] +- UNSPEC_SEL))] ++;; Increment a vector of SIs by the number of set bits in a predicate. ++;; See aarch64_sve_cntp for a description of the operands. ++(define_expand "@aarch64_sve__cntp" ++ [(set (match_operand:VNx4SI 0 "register_operand") ++ (ANY_PLUS:VNx4SI ++ (vec_duplicate:VNx4SI ++ (unspec:SI ++ [(match_dup 3) ++ (const_int SVE_KNOWN_PTRUE) ++ (match_operand: 2 "register_operand")] ++ UNSPEC_CNTP)) ++ (match_operand:VNx4SI_ONLY 1 "register_operand")))] + "TARGET_SVE" ++ { ++ operands[3] = CONSTM1_RTX (mode); ++ } + ) + +-;; Predicated floating-point operations with select matching output. +-(define_insn "*cond__0" +- [(set (match_operand:SVE_F 0 "register_operand" "+w, w, ?&w") +- (unspec:SVE_F +- [(match_operand: 1 "register_operand" "Upl, Upl, Upl") +- (unspec:SVE_F +- [(match_operand:SVE_F 2 "register_operand" "0, w, w") +- (match_operand:SVE_F 3 "register_operand" "w, 0, w")] +- SVE_COND_FP_BINARY) +- (match_dup 0)] +- UNSPEC_SEL))] ++(define_insn_and_rewrite "*aarch64_sve__cntp" ++ [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w") ++ (ANY_PLUS:VNx4SI ++ (vec_duplicate:VNx4SI ++ (unspec:SI ++ [(match_operand 3) ++ (const_int SVE_KNOWN_PTRUE) ++ (match_operand: 2 "register_operand" "Upa, Upa")] ++ UNSPEC_CNTP)) ++ (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")))] + "TARGET_SVE" + "@ +- \t%0., %1/m, %0., %3. +- \t%0., %1/m, %0., %2. +- movprfx\t%0, %1/m, %2\;\t%0., %1/m, %0., %3." +- [(set_attr "movprfx" "*,*,yes")] ++ p\t%0.s, %2 ++ movprfx\t%0, %1\;p\t%0.s, %2" ++ "&& !CONSTANT_P (operands[3])" ++ { ++ operands[3] = CONSTM1_RTX (mode); ++ } ++ [(set_attr "movprfx" "*,yes")] + ) + +-;; Predicated floating-point operations with select matching first operand. +-(define_insn "*cond__2" +- [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w") +- (unspec:SVE_F +- [(match_operand: 1 "register_operand" "Upl, Upl") +- (unspec:SVE_F +- [(match_operand:SVE_F 2 "register_operand" "0, w") +- (match_operand:SVE_F 3 "register_operand" "w, w")] +- SVE_COND_FP_BINARY) +- (match_dup 2)] +- UNSPEC_SEL))] ++;; Increment a vector of HIs by the number of set bits in a predicate. ++;; See aarch64_sve_cntp for a description of the operands. ++(define_expand "@aarch64_sve__cntp" ++ [(set (match_operand:VNx8HI 0 "register_operand") ++ (ANY_PLUS:VNx8HI ++ (vec_duplicate:VNx8HI ++ (truncate:HI ++ (unspec:SI ++ [(match_dup 3) ++ (const_int SVE_KNOWN_PTRUE) ++ (match_operand: 2 "register_operand")] ++ UNSPEC_CNTP))) ++ (match_operand:VNx8HI_ONLY 1 "register_operand")))] + "TARGET_SVE" +- "@ +- \t%0., %1/m, %0., %3. +- movprfx\t%0, %2\;\t%0., %1/m, %0., %3." +- [(set_attr "movprfx" "*,yes")] ++ { ++ operands[3] = CONSTM1_RTX (mode); ++ } + ) + +-;; Predicated floating-point operations with select matching second operand. +-(define_insn "*cond__3" +- [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w") +- (unspec:SVE_F +- [(match_operand: 1 "register_operand" "Upl, Upl") +- (unspec:SVE_F +- [(match_operand:SVE_F 2 "register_operand" "w, w") +- (match_operand:SVE_F 3 "register_operand" "0, w")] +- SVE_COND_FP_BINARY) +- (match_dup 3)] +- UNSPEC_SEL))] ++(define_insn_and_rewrite "*aarch64_sve__cntp" ++ [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w") ++ (ANY_PLUS:VNx8HI ++ (vec_duplicate:VNx8HI ++ (match_operator:HI 3 "subreg_lowpart_operator" ++ [(unspec:SI ++ [(match_operand 4) ++ (const_int SVE_KNOWN_PTRUE) ++ (match_operand: 2 "register_operand" "Upa, Upa")] ++ UNSPEC_CNTP)])) ++ (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")))] + "TARGET_SVE" + "@ +- \t%0., %1/m, %0., %2. +- movprfx\t%0, %3\;\t%0., %1/m, %0., %2." ++ p\t%0.h, %2 ++ movprfx\t%0, %1\;p\t%0.h, %2" ++ "&& !CONSTANT_P (operands[4])" ++ { ++ operands[4] = CONSTM1_RTX (mode); ++ } + [(set_attr "movprfx" "*,yes")] + ) + +-;; Predicated floating-point operations with select matching zero. +-(define_insn "*cond__z" +- [(set (match_operand:SVE_F 0 "register_operand" "=&w") +- (unspec:SVE_F +- [(match_operand: 1 "register_operand" "Upl") +- (unspec:SVE_F +- [(match_operand:SVE_F 2 "register_operand" "w") +- (match_operand:SVE_F 3 "register_operand" "w")] +- SVE_COND_FP_BINARY) +- (match_operand:SVE_F 4 "aarch64_simd_imm_zero")] +- UNSPEC_SEL))] ++;; ------------------------------------------------------------------------- ++;; ---- [INT] Decrement by the number of elements in a predicate (scalar) ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - DECP ++;; - SQDECP ++;; - UQDECP ++;; ------------------------------------------------------------------------- ++ ++;; Decrement a DImode register by the number of set bits in a predicate. ++;; See aarch64_sve_cntp for a description of the operands. ++(define_expand "@aarch64_sve__cntp" ++ [(set (match_operand:DI 0 "register_operand") ++ (ANY_MINUS:DI ++ (match_operand:DI_ONLY 1 "register_operand") ++ (zero_extend:DI ++ (unspec:SI [(match_dup 3) ++ (const_int SVE_KNOWN_PTRUE) ++ (match_operand:PRED_ALL 2 "register_operand")] ++ UNSPEC_CNTP))))] + "TARGET_SVE" +- "movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %0., %3." +- [(set_attr "movprfx" "yes")] ++ { ++ operands[3] = CONSTM1_RTX (mode); ++ } + ) + +-;; Synthetic predication of floating-point operations with select unmatched. +-(define_insn_and_split "*cond__any" +- [(set (match_operand:SVE_F 0 "register_operand" "=&w") +- (unspec:SVE_F +- [(match_operand: 1 "register_operand" "Upl") +- (unspec:SVE_F +- [(match_operand:SVE_F 2 "register_operand" "w") +- (match_operand:SVE_F 3 "register_operand" "w")] +- SVE_COND_FP_BINARY) +- (match_operand:SVE_F 4 "register_operand" "w")] +- UNSPEC_SEL))] ++(define_insn_and_rewrite "*aarch64_sve__cntp" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (ANY_MINUS:DI ++ (match_operand:DI_ONLY 1 "register_operand" "0") ++ (zero_extend:DI ++ (unspec:SI [(match_operand 3) ++ (const_int SVE_KNOWN_PTRUE) ++ (match_operand:PRED_ALL 2 "register_operand" "Upa")] ++ UNSPEC_CNTP))))] + "TARGET_SVE" +- "#" +- "&& reload_completed +- && !(rtx_equal_p (operands[0], operands[4]) +- || rtx_equal_p (operands[2], operands[4]) +- || rtx_equal_p (operands[3], operands[4]))" +- ; Not matchable by any one insn or movprfx insn. We need a separate select. +- [(set (match_dup 0) +- (unspec:SVE_F [(match_dup 1) (match_dup 2) (match_dup 4)] UNSPEC_SEL)) +- (set (match_dup 0) +- (unspec:SVE_F +- [(match_dup 1) +- (unspec:SVE_F [(match_dup 0) (match_dup 3)] SVE_COND_FP_BINARY) +- (match_dup 0)] +- UNSPEC_SEL))] ++ "p\t%x0, %2." ++ "&& !CONSTANT_P (operands[3])" ++ { ++ operands[3] = CONSTM1_RTX (mode); ++ } + ) + +-;; Predicated floating-point ternary operations with select. +-(define_expand "cond_" +- [(set (match_operand:SVE_F 0 "register_operand") +- (unspec:SVE_F +- [(match_operand: 1 "register_operand") +- (unspec:SVE_F +- [(match_operand:SVE_F 2 "register_operand") +- (match_operand:SVE_F 3 "register_operand") +- (match_operand:SVE_F 4 "register_operand")] +- SVE_COND_FP_TERNARY) +- (match_operand:SVE_F 5 "aarch64_simd_reg_or_zero")] +- UNSPEC_SEL))] +- "TARGET_SVE" +-{ +- /* Swap the multiplication operands if the fallback value is the +- second of the two. */ +- if (rtx_equal_p (operands[3], operands[5])) +- std::swap (operands[2], operands[3]); +-}) ++;; Decrement an SImode register by the number of set bits in a predicate ++;; using modular arithmetic. See aarch64_sve_cntp for a description of the ++;; operands. ++(define_insn_and_rewrite "*aarch64_decsi_cntp" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (minus:SI ++ (match_operand:SI 1 "register_operand" "0") ++ (unspec:SI [(match_operand 3) ++ (const_int SVE_KNOWN_PTRUE) ++ (match_operand:PRED_ALL 2 "register_operand" "Upa")] ++ UNSPEC_CNTP)))] ++ "TARGET_SVE" ++ "decp\t%x0, %2." ++ "&& !CONSTANT_P (operands[3])" ++ { ++ operands[3] = CONSTM1_RTX (mode); ++ } ++) + +-;; Predicated floating-point ternary operations using the FMAD-like form. +-(define_insn "*cond__2" +- [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w") +- (unspec:SVE_F +- [(match_operand: 1 "register_operand" "Upl, Upl") +- (unspec:SVE_F +- [(match_operand:SVE_F 2 "register_operand" "0, w") +- (match_operand:SVE_F 3 "register_operand" "w, w") +- (match_operand:SVE_F 4 "register_operand" "w, w")] +- SVE_COND_FP_TERNARY) +- (match_dup 2)] +- UNSPEC_SEL))] ++;; Decrement an SImode register by the number of set bits in a predicate ++;; using saturating arithmetic, extending the result to 64 bits. ++;; ++;; See aarch64_sve_cntp for a description of the operands. ++(define_expand "@aarch64_sve__cntp" ++ [(set (match_operand:DI 0 "register_operand") ++ (:DI ++ (SAT_MINUS:SI ++ (match_operand:SI_ONLY 1 "register_operand") ++ (unspec:SI [(match_dup 3) ++ (const_int SVE_KNOWN_PTRUE) ++ (match_operand:PRED_ALL 2 "register_operand")] ++ UNSPEC_CNTP))))] + "TARGET_SVE" +- "@ +- \t%0., %1/m, %3., %4. +- movprfx\t%0, %2\;\t%0., %1/m, %3., %4." +- [(set_attr "movprfx" "*,yes")] ++ { ++ operands[3] = CONSTM1_RTX (mode); ++ } + ) + +-;; Predicated floating-point ternary operations using the FMLA-like form. +-(define_insn "*cond__4" +- [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w") +- (unspec:SVE_F +- [(match_operand: 1 "register_operand" "Upl, Upl") +- (unspec:SVE_F +- [(match_operand:SVE_F 2 "register_operand" "w, w") +- (match_operand:SVE_F 3 "register_operand" "w, w") +- (match_operand:SVE_F 4 "register_operand" "0, w")] +- SVE_COND_FP_TERNARY) +- (match_dup 4)] +- UNSPEC_SEL))] ++(define_insn_and_rewrite "*aarch64_sve__cntp" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (:DI ++ (SAT_MINUS:SI ++ (match_operand:SI_ONLY 1 "register_operand" "0") ++ (unspec:SI [(match_operand 3) ++ (const_int SVE_KNOWN_PTRUE) ++ (match_operand:PRED_ALL 2 "register_operand" "Upa")] ++ UNSPEC_CNTP))))] + "TARGET_SVE" +- "@ +- \t%0., %1/m, %2., %3. +- movprfx\t%0, %4\;\t%0., %1/m, %2., %3." +- [(set_attr "movprfx" "*,yes")] ++ { ++ if ( == SS_MINUS) ++ return "p\t%x0, %2., %w0"; ++ else ++ return "p\t%w0, %2."; ++ } ++ "&& !CONSTANT_P (operands[3])" ++ { ++ operands[3] = CONSTM1_RTX (mode); ++ } + ) + +-;; Predicated floating-point ternary operations in which the value for +-;; inactive lanes is distinct from the other inputs. +-(define_insn_and_split "*cond__any" +- [(set (match_operand:SVE_F 0 "register_operand" "=&w, &w, ?&w") +- (unspec:SVE_F +- [(match_operand: 1 "register_operand" "Upl, Upl, Upl") +- (unspec:SVE_F +- [(match_operand:SVE_F 2 "register_operand" "w, w, w") +- (match_operand:SVE_F 3 "register_operand" "w, w, w") +- (match_operand:SVE_F 4 "register_operand" "w, w, w")] +- SVE_COND_FP_TERNARY) +- (match_operand:SVE_F 5 "aarch64_simd_reg_or_zero" "Dz, 0, w")] +- UNSPEC_SEL))] +- "TARGET_SVE +- && !rtx_equal_p (operands[2], operands[5]) +- && !rtx_equal_p (operands[3], operands[5]) +- && !rtx_equal_p (operands[4], operands[5])" +- "@ +- movprfx\t%0., %1/z, %4.\;\t%0., %1/m, %2., %3. +- movprfx\t%0., %1/m, %4.\;\t%0., %1/m, %2., %3. +- #" +- "&& reload_completed +- && !CONSTANT_P (operands[5]) +- && !rtx_equal_p (operands[0], operands[5])" +- [(set (match_dup 0) +- (unspec:SVE_F [(match_dup 1) (match_dup 4) (match_dup 5)] UNSPEC_SEL)) +- (set (match_dup 0) +- (unspec:SVE_F +- [(match_dup 1) +- (unspec:SVE_F [(match_dup 2) (match_dup 3) (match_dup 0)] +- SVE_COND_FP_TERNARY) +- (match_dup 0)] +- UNSPEC_SEL))] +- "" +- [(set_attr "movprfx" "yes")] ++;; ------------------------------------------------------------------------- ++;; ---- [INT] Decrement by the number of elements in a predicate (vector) ++;; ------------------------------------------------------------------------- ++;; Includes: ++;; - DECP ++;; - SQDECP ++;; - UQDECP ++;; ------------------------------------------------------------------------- ++ ++;; Decrement a vector of DIs by the number of set bits in a predicate. ++;; See aarch64_sve_cntp for a description of the operands. ++(define_expand "@aarch64_sve__cntp" ++ [(set (match_operand:VNx2DI 0 "register_operand") ++ (ANY_MINUS:VNx2DI ++ (match_operand:VNx2DI_ONLY 1 "register_operand") ++ (vec_duplicate:VNx2DI ++ (zero_extend:DI ++ (unspec:SI ++ [(match_dup 3) ++ (const_int SVE_KNOWN_PTRUE) ++ (match_operand: 2 "register_operand")] ++ UNSPEC_CNTP)))))] ++ "TARGET_SVE" ++ { ++ operands[3] = CONSTM1_RTX (mode); ++ } + ) + +-;; Shift an SVE vector left and insert a scalar into element 0. +-(define_insn "vec_shl_insert_" +- [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w") +- (unspec:SVE_ALL +- [(match_operand:SVE_ALL 1 "register_operand" "0, 0") +- (match_operand: 2 "register_operand" "rZ, w")] +- UNSPEC_INSR))] ++(define_insn_and_rewrite "*aarch64_sve__cntp" ++ [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w") ++ (ANY_MINUS:VNx2DI ++ (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w") ++ (vec_duplicate:VNx2DI ++ (zero_extend:DI ++ (unspec:SI ++ [(match_operand 3) ++ (const_int SVE_KNOWN_PTRUE) ++ (match_operand: 2 "register_operand" "Upa, Upa")] ++ UNSPEC_CNTP)))))] + "TARGET_SVE" + "@ +- insr\t%0., %2 +- insr\t%0., %2" ++ p\t%0.d, %2 ++ movprfx\t%0, %1\;p\t%0.d, %2" ++ "&& !CONSTANT_P (operands[3])" ++ { ++ operands[3] = CONSTM1_RTX (mode); ++ } ++ [(set_attr "movprfx" "*,yes")] + ) + +-(define_expand "copysign3" +- [(match_operand:SVE_F 0 "register_operand") +- (match_operand:SVE_F 1 "register_operand") +- (match_operand:SVE_F 2 "register_operand")] ++;; Decrement a vector of SIs by the number of set bits in a predicate. ++;; See aarch64_sve_cntp for a description of the operands. ++(define_expand "@aarch64_sve__cntp" ++ [(set (match_operand:VNx4SI 0 "register_operand") ++ (ANY_MINUS:VNx4SI ++ (match_operand:VNx4SI_ONLY 1 "register_operand") ++ (vec_duplicate:VNx4SI ++ (unspec:SI ++ [(match_dup 3) ++ (const_int SVE_KNOWN_PTRUE) ++ (match_operand: 2 "register_operand")] ++ UNSPEC_CNTP))))] + "TARGET_SVE" + { +- rtx sign = gen_reg_rtx (mode); +- rtx mant = gen_reg_rtx (mode); +- rtx int_res = gen_reg_rtx (mode); +- int bits = GET_MODE_UNIT_BITSIZE (mode) - 1; +- +- rtx arg1 = lowpart_subreg (mode, operands[1], mode); +- rtx arg2 = lowpart_subreg (mode, operands[2], mode); +- +- emit_insn (gen_and3 +- (sign, arg2, +- aarch64_simd_gen_const_vector_dup (mode, +- HOST_WIDE_INT_M1U +- << bits))); +- emit_insn (gen_and3 +- (mant, arg1, +- aarch64_simd_gen_const_vector_dup (mode, +- ~(HOST_WIDE_INT_M1U +- << bits)))); +- emit_insn (gen_ior3 (int_res, sign, mant)); +- emit_move_insn (operands[0], gen_lowpart (mode, int_res)); +- DONE; ++ operands[3] = CONSTM1_RTX (mode); + } + ) + +-(define_expand "xorsign3" +- [(match_operand:SVE_F 0 "register_operand") +- (match_operand:SVE_F 1 "register_operand") +- (match_operand:SVE_F 2 "register_operand")] ++(define_insn_and_rewrite "*aarch64_sve__cntp" ++ [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w") ++ (ANY_MINUS:VNx4SI ++ (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w") ++ (vec_duplicate:VNx4SI ++ (unspec:SI ++ [(match_operand 3) ++ (const_int SVE_KNOWN_PTRUE) ++ (match_operand: 2 "register_operand" "Upa, Upa")] ++ UNSPEC_CNTP))))] + "TARGET_SVE" ++ "@ ++ p\t%0.s, %2 ++ movprfx\t%0, %1\;p\t%0.s, %2" ++ "&& !CONSTANT_P (operands[3])" + { +- rtx sign = gen_reg_rtx (mode); +- rtx int_res = gen_reg_rtx (mode); +- int bits = GET_MODE_UNIT_BITSIZE (mode) - 1; +- +- rtx arg1 = lowpart_subreg (mode, operands[1], mode); +- rtx arg2 = lowpart_subreg (mode, operands[2], mode); ++ operands[3] = CONSTM1_RTX (mode); ++ } ++ [(set_attr "movprfx" "*,yes")] ++) + +- emit_insn (gen_and3 +- (sign, arg2, +- aarch64_simd_gen_const_vector_dup (mode, +- HOST_WIDE_INT_M1U +- << bits))); +- emit_insn (gen_xor3 (int_res, arg1, sign)); +- emit_move_insn (operands[0], gen_lowpart (mode, int_res)); +- DONE; ++;; Decrement a vector of HIs by the number of set bits in a predicate. ++;; See aarch64_sve_cntp for a description of the operands. ++(define_expand "@aarch64_sve__cntp" ++ [(set (match_operand:VNx8HI 0 "register_operand") ++ (ANY_MINUS:VNx8HI ++ (match_operand:VNx8HI_ONLY 1 "register_operand") ++ (vec_duplicate:VNx8HI ++ (truncate:HI ++ (unspec:SI ++ [(match_dup 3) ++ (const_int SVE_KNOWN_PTRUE) ++ (match_operand: 2 "register_operand")] ++ UNSPEC_CNTP)))))] ++ "TARGET_SVE" ++ { ++ operands[3] = CONSTM1_RTX (mode); + } + ) + +-;; Unpredicated DOT product. +-(define_insn "dot_prod" +- [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w") +- (plus:SVE_SDI +- (unspec:SVE_SDI +- [(match_operand: 1 "register_operand" "w, w") +- (match_operand: 2 "register_operand" "w, w")] +- DOTPROD) +- (match_operand:SVE_SDI 3 "register_operand" "0, w")))] ++(define_insn_and_rewrite "*aarch64_sve__cntp" ++ [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w") ++ (ANY_MINUS:VNx8HI ++ (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w") ++ (vec_duplicate:VNx8HI ++ (match_operator:HI 3 "subreg_lowpart_operator" ++ [(unspec:SI ++ [(match_operand 4) ++ (const_int SVE_KNOWN_PTRUE) ++ (match_operand: 2 "register_operand" "Upa, Upa")] ++ UNSPEC_CNTP)]))))] + "TARGET_SVE" + "@ +- dot\\t%0., %1., %2. +- movprfx\t%0, %3\;dot\\t%0., %1., %2." ++ p\t%0.h, %2 ++ movprfx\t%0, %1\;p\t%0.h, %2" ++ "&& !CONSTANT_P (operands[4])" ++ { ++ operands[4] = CONSTM1_RTX (mode); ++ } + [(set_attr "movprfx" "*,yes")] + ) +diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md +index 2b1ec85ae..a6a14b7fc 100644 +--- a/gcc/config/aarch64/aarch64-tune.md ++++ b/gcc/config/aarch64/aarch64-tune.md +@@ -1,5 +1,5 @@ + ;; -*- buffer-read-only: t -*- + ;; Generated automatically by gentune.sh from aarch64-cores.def + (define_attr "tune" +- "cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,ares,neoversen1,neoversee1,tsv110,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55" ++ "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa65,cortexa65ae,ares,neoversen1,neoversee1,tsv110,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55" + (const (symbol_ref "((enum attr_tune) aarch64_tune)"))) +diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c +index 5dfcaf57e..ee85bb4e2 100644 +--- a/gcc/config/aarch64/aarch64.c ++++ b/gcc/config/aarch64/aarch64.c +@@ -73,6 +73,8 @@ + #include "selftest-rtl.h" + #include "rtx-vector-builder.h" + #include "intl.h" ++#include "expmed.h" ++#include "function-abi.h" + + /* This file should be included last. */ + #include "target-def.h" +@@ -83,7 +85,7 @@ + /* Information about a legitimate vector immediate operand. */ + struct simd_immediate_info + { +- enum insn_type { MOV, MVN }; ++ enum insn_type { MOV, MVN, INDEX, PTRUE }; + enum modifier_type { LSL, MSL }; + + simd_immediate_info () {} +@@ -92,33 +94,51 @@ struct simd_immediate_info + insn_type = MOV, modifier_type = LSL, + unsigned int = 0); + simd_immediate_info (scalar_mode, rtx, rtx); ++ simd_immediate_info (scalar_int_mode, aarch64_svpattern); + + /* The mode of the elements. */ + scalar_mode elt_mode; + +- /* The value of each element if all elements are the same, or the +- first value if the constant is a series. */ +- rtx value; +- +- /* The value of the step if the constant is a series, null otherwise. */ +- rtx step; +- + /* The instruction to use to move the immediate into a vector. */ + insn_type insn; + +- /* The kind of shift modifier to use, and the number of bits to shift. +- This is (LSL, 0) if no shift is needed. */ +- modifier_type modifier; +- unsigned int shift; ++ union ++ { ++ /* For MOV and MVN. */ ++ struct ++ { ++ /* The value of each element. */ ++ rtx value; ++ ++ /* The kind of shift modifier to use, and the number of bits to shift. ++ This is (LSL, 0) if no shift is needed. */ ++ modifier_type modifier; ++ unsigned int shift; ++ } mov; ++ ++ /* For INDEX. */ ++ struct ++ { ++ /* The value of the first element and the step to be added for each ++ subsequent element. */ ++ rtx base, step; ++ } index; ++ ++ /* For PTRUE. */ ++ aarch64_svpattern pattern; ++ } u; + }; + + /* Construct a floating-point immediate in which each element has mode + ELT_MODE_IN and value VALUE_IN. */ + inline simd_immediate_info + ::simd_immediate_info (scalar_float_mode elt_mode_in, rtx value_in) +- : elt_mode (elt_mode_in), value (value_in), step (NULL_RTX), insn (MOV), +- modifier (LSL), shift (0) +-{} ++ : elt_mode (elt_mode_in), insn (MOV) ++{ ++ u.mov.value = value_in; ++ u.mov.modifier = LSL; ++ u.mov.shift = 0; ++} + + /* Construct an integer immediate in which each element has mode ELT_MODE_IN + and value VALUE_IN. The other parameters are as for the structure +@@ -128,17 +148,32 @@ inline simd_immediate_info + unsigned HOST_WIDE_INT value_in, + insn_type insn_in, modifier_type modifier_in, + unsigned int shift_in) +- : elt_mode (elt_mode_in), value (gen_int_mode (value_in, elt_mode_in)), +- step (NULL_RTX), insn (insn_in), modifier (modifier_in), shift (shift_in) +-{} ++ : elt_mode (elt_mode_in), insn (insn_in) ++{ ++ u.mov.value = gen_int_mode (value_in, elt_mode_in); ++ u.mov.modifier = modifier_in; ++ u.mov.shift = shift_in; ++} + + /* Construct an integer immediate in which each element has mode ELT_MODE_IN +- and where element I is equal to VALUE_IN + I * STEP_IN. */ ++ and where element I is equal to BASE_IN + I * STEP_IN. */ ++inline simd_immediate_info ++::simd_immediate_info (scalar_mode elt_mode_in, rtx base_in, rtx step_in) ++ : elt_mode (elt_mode_in), insn (INDEX) ++{ ++ u.index.base = base_in; ++ u.index.step = step_in; ++} ++ ++/* Construct a predicate that controls elements of mode ELT_MODE_IN ++ and has PTRUE pattern PATTERN_IN. */ + inline simd_immediate_info +-::simd_immediate_info (scalar_mode elt_mode_in, rtx value_in, rtx step_in) +- : elt_mode (elt_mode_in), value (value_in), step (step_in), insn (MOV), +- modifier (LSL), shift (0) +-{} ++::simd_immediate_info (scalar_int_mode elt_mode_in, ++ aarch64_svpattern pattern_in) ++ : elt_mode (elt_mode_in), insn (PTRUE) ++{ ++ u.pattern = pattern_in; ++} + + /* The current code model. */ + enum aarch64_code_model aarch64_cmodel; +@@ -177,7 +212,7 @@ unsigned aarch64_architecture_version; + enum aarch64_processor aarch64_tune = cortexa53; + + /* Mask to specify which instruction scheduling options should be used. */ +-unsigned long aarch64_tune_flags = 0; ++uint64_t aarch64_tune_flags = 0; + + /* Global flag for PC relative loads. */ + bool aarch64_pcrelative_literal_loads; +@@ -693,7 +728,7 @@ static const struct tune_params generic_tunings = + 4, /* memmov_cost */ + 2, /* issue_rate */ + (AARCH64_FUSE_AES_AESMC), /* fusible_ops */ +- "8", /* function_align. */ ++ "16:12", /* function_align. */ + "4", /* jump_align. */ + "8", /* loop_align. */ + 2, /* int_reassoc_width. */ +@@ -1139,7 +1174,7 @@ struct processor + enum aarch64_processor sched_core; + enum aarch64_arch arch; + unsigned architecture_version; +- const unsigned long flags; ++ const uint64_t flags; + const struct tune_params *const tune; + }; + +@@ -1172,15 +1207,46 @@ static const struct processor *selected_arch; + static const struct processor *selected_cpu; + static const struct processor *selected_tune; + ++enum aarch64_key_type aarch64_ra_sign_key = AARCH64_KEY_A; ++ + /* The current tuning set. */ + struct tune_params aarch64_tune_params = generic_tunings; + ++/* Check whether an 'aarch64_vector_pcs' attribute is valid. */ ++ ++static tree ++handle_aarch64_vector_pcs_attribute (tree *node, tree name, tree, ++ int, bool *no_add_attrs) ++{ ++ /* Since we set fn_type_req to true, the caller should have checked ++ this for us. */ ++ gcc_assert (FUNC_OR_METHOD_TYPE_P (*node)); ++ switch ((arm_pcs) fntype_abi (*node).id ()) ++ { ++ case ARM_PCS_AAPCS64: ++ case ARM_PCS_SIMD: ++ return NULL_TREE; ++ ++ case ARM_PCS_SVE: ++ error ("the %qE attribute cannot be applied to an SVE function type", ++ name); ++ *no_add_attrs = true; ++ return NULL_TREE; ++ ++ case ARM_PCS_TLSDESC: ++ case ARM_PCS_UNKNOWN: ++ break; ++ } ++ gcc_unreachable (); ++} ++ + /* Table of machine attributes. */ + static const struct attribute_spec aarch64_attribute_table[] = + { + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, + affects_type_identity, handler, exclude } */ +- { "aarch64_vector_pcs", 0, 0, false, true, true, true, NULL, NULL }, ++ { "aarch64_vector_pcs", 0, 0, false, true, true, true, ++ handle_aarch64_vector_pcs_attribute, NULL }, + { NULL, 0, 0, false, false, false, false, NULL, NULL } + }; + +@@ -1241,6 +1307,7 @@ static enum aarch64_parse_opt_result + aarch64_handle_standard_branch_protection (char* str, char* rest) + { + aarch64_ra_sign_scope = AARCH64_FUNCTION_NON_LEAF; ++ aarch64_ra_sign_key = AARCH64_KEY_A; + aarch64_enable_bti = 1; + if (rest) + { +@@ -1255,6 +1322,7 @@ aarch64_handle_pac_ret_protection (char* str ATTRIBUTE_UNUSED, + char* rest ATTRIBUTE_UNUSED) + { + aarch64_ra_sign_scope = AARCH64_FUNCTION_NON_LEAF; ++ aarch64_ra_sign_key = AARCH64_KEY_A; + return AARCH64_PARSE_OK; + } + +@@ -1266,6 +1334,14 @@ aarch64_handle_pac_ret_leaf (char* str ATTRIBUTE_UNUSED, + return AARCH64_PARSE_OK; + } + ++static enum aarch64_parse_opt_result ++aarch64_handle_pac_ret_b_key (char* str ATTRIBUTE_UNUSED, ++ char* rest ATTRIBUTE_UNUSED) ++{ ++ aarch64_ra_sign_key = AARCH64_KEY_B; ++ return AARCH64_PARSE_OK; ++} ++ + static enum aarch64_parse_opt_result + aarch64_handle_bti_protection (char* str ATTRIBUTE_UNUSED, + char* rest ATTRIBUTE_UNUSED) +@@ -1276,6 +1352,7 @@ aarch64_handle_bti_protection (char* str ATTRIBUTE_UNUSED, + + static const struct aarch64_branch_protect_type aarch64_pac_ret_subtypes[] = { + { "leaf", aarch64_handle_pac_ret_leaf, NULL, 0 }, ++ { "b-key", aarch64_handle_pac_ret_b_key, NULL, 0 }, + { NULL, NULL, NULL, 0 } + }; + +@@ -1295,6 +1372,66 @@ static const char * const aarch64_condition_codes[] = + "hi", "ls", "ge", "lt", "gt", "le", "al", "nv" + }; + ++/* The preferred condition codes for SVE conditions. */ ++static const char *const aarch64_sve_condition_codes[] = ++{ ++ "none", "any", "nlast", "last", "first", "nfrst", "vs", "vc", ++ "pmore", "plast", "tcont", "tstop", "gt", "le", "al", "nv" ++}; ++ ++/* Return the assembly token for svpattern value VALUE. */ ++ ++static const char * ++svpattern_token (enum aarch64_svpattern pattern) ++{ ++ switch (pattern) ++ { ++#define CASE(UPPER, LOWER, VALUE) case AARCH64_SV_##UPPER: return #LOWER; ++ AARCH64_FOR_SVPATTERN (CASE) ++#undef CASE ++ case AARCH64_NUM_SVPATTERNS: ++ break; ++ } ++ gcc_unreachable (); ++} ++ ++/* Return the descriptor of the SIMD ABI. */ ++ ++static const predefined_function_abi & ++aarch64_simd_abi (void) ++{ ++ predefined_function_abi &simd_abi = function_abis[ARM_PCS_SIMD]; ++ if (!simd_abi.initialized_p ()) ++ { ++ HARD_REG_SET full_reg_clobbers ++ = default_function_abi.full_reg_clobbers (); ++ for (int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) ++ if (FP_SIMD_SAVED_REGNUM_P (regno)) ++ CLEAR_HARD_REG_BIT (full_reg_clobbers, regno); ++ simd_abi.initialize (ARM_PCS_SIMD, full_reg_clobbers); ++ } ++ return simd_abi; ++} ++ ++/* Return the descriptor of the SVE PCS. */ ++ ++static const predefined_function_abi & ++aarch64_sve_abi (void) ++{ ++ predefined_function_abi &sve_abi = function_abis[ARM_PCS_SVE]; ++ if (!sve_abi.initialized_p ()) ++ { ++ HARD_REG_SET full_reg_clobbers ++ = default_function_abi.full_reg_clobbers (); ++ for (int regno = V8_REGNUM; regno <= V23_REGNUM; ++regno) ++ CLEAR_HARD_REG_BIT (full_reg_clobbers, regno); ++ for (int regno = P4_REGNUM; regno <= P11_REGNUM; ++regno) ++ CLEAR_HARD_REG_BIT (full_reg_clobbers, regno); ++ sve_abi.initialize (ARM_PCS_SVE, full_reg_clobbers); ++ } ++ return sve_abi; ++} ++ + /* Generate code to enable conditional branches in functions over 1 MiB. */ + const char * + aarch64_gen_far_branch (rtx * operands, int pos_label, const char * dest, +@@ -1337,6 +1474,14 @@ aarch64_err_no_fpadvsimd (machine_mode mode) + " vector types", "+nofp"); + } + ++/* Return true if REGNO is P0-P15 or one of the special FFR-related ++ registers. */ ++inline bool ++pr_or_ffr_regnum_p (unsigned int regno) ++{ ++ return PR_REGNUM_P (regno) || regno == FFR_REGNUM || regno == FFRT_REGNUM; ++} ++ + /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS. + The register allocator chooses POINTER_AND_FP_REGS if FP_REGS and + GENERAL_REGS have the same cost - even if POINTER_AND_FP_REGS has a much +@@ -1413,6 +1558,16 @@ aarch64_dbx_register_number (unsigned regno) + return DWARF_FRAME_REGISTERS; + } + ++/* If X is a CONST_DOUBLE, return its bit representation as a constant ++ integer, otherwise return X unmodified. */ ++static rtx ++aarch64_bit_representation (rtx x) ++{ ++ if (CONST_DOUBLE_P (x)) ++ x = gen_lowpart (int_mode_for_mode (GET_MODE (x)).require (), x); ++ return x; ++} ++ + /* Return true if MODE is any of the Advanced SIMD structure modes. */ + static bool + aarch64_advsimd_struct_mode_p (machine_mode mode) +@@ -1439,6 +1594,9 @@ const unsigned int VEC_SVE_PRED = 4; + /* Can be used in combination with VEC_ADVSIMD or VEC_SVE_DATA to indicate + a structure of 2, 3 or 4 vectors. */ + const unsigned int VEC_STRUCT = 8; ++/* Can be used in combination with VEC_SVE_DATA to indicate that the ++ vector has fewer significant bytes than a full SVE vector. */ ++const unsigned int VEC_PARTIAL = 16; + /* Useful combinations of the above. */ + const unsigned int VEC_ANY_SVE = VEC_SVE_DATA | VEC_SVE_PRED; + const unsigned int VEC_ANY_DATA = VEC_ADVSIMD | VEC_SVE_DATA; +@@ -1454,34 +1612,84 @@ aarch64_classify_vector_mode (machine_mode mode) + if (aarch64_sve_pred_mode_p (mode)) + return VEC_SVE_PRED; + +- scalar_mode inner = GET_MODE_INNER (mode); +- if (VECTOR_MODE_P (mode) +- && (inner == QImode +- || inner == HImode +- || inner == HFmode +- || inner == SImode +- || inner == SFmode +- || inner == DImode +- || inner == DFmode)) +- { +- if (TARGET_SVE) +- { +- if (known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR)) +- return VEC_SVE_DATA; +- if (known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR * 2) +- || known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR * 3) +- || known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR * 4)) +- return VEC_SVE_DATA | VEC_STRUCT; +- } ++ /* Make the decision based on the mode's enum value rather than its ++ properties, so that we keep the correct classification regardless ++ of -msve-vector-bits. */ ++ switch (mode) ++ { ++ /* Partial SVE QI vectors. */ ++ case E_VNx2QImode: ++ case E_VNx4QImode: ++ case E_VNx8QImode: ++ /* Partial SVE HI vectors. */ ++ case E_VNx2HImode: ++ case E_VNx4HImode: ++ /* Partial SVE SI vector. */ ++ case E_VNx2SImode: ++ return TARGET_SVE ? VEC_SVE_DATA | VEC_PARTIAL : 0; ++ ++ case E_VNx16QImode: ++ case E_VNx8HImode: ++ case E_VNx4SImode: ++ case E_VNx2DImode: ++ case E_VNx8BFmode: ++ case E_VNx8HFmode: ++ case E_VNx4SFmode: ++ case E_VNx2DFmode: ++ return TARGET_SVE ? VEC_SVE_DATA : 0; ++ ++ /* x2 SVE vectors. */ ++ case E_VNx32QImode: ++ case E_VNx16HImode: ++ case E_VNx8SImode: ++ case E_VNx4DImode: ++ case E_VNx16BFmode: ++ case E_VNx16HFmode: ++ case E_VNx8SFmode: ++ case E_VNx4DFmode: ++ /* x3 SVE vectors. */ ++ case E_VNx48QImode: ++ case E_VNx24HImode: ++ case E_VNx12SImode: ++ case E_VNx6DImode: ++ case E_VNx24BFmode: ++ case E_VNx24HFmode: ++ case E_VNx12SFmode: ++ case E_VNx6DFmode: ++ /* x4 SVE vectors. */ ++ case E_VNx64QImode: ++ case E_VNx32HImode: ++ case E_VNx16SImode: ++ case E_VNx8DImode: ++ case E_VNx32BFmode: ++ case E_VNx32HFmode: ++ case E_VNx16SFmode: ++ case E_VNx8DFmode: ++ return TARGET_SVE ? VEC_SVE_DATA | VEC_STRUCT : 0; ++ ++ /* 64-bit Advanced SIMD vectors. */ ++ case E_V8QImode: ++ case E_V4HImode: ++ case E_V2SImode: ++ /* ...E_V1DImode doesn't exist. */ ++ case E_V4HFmode: ++ case E_V4BFmode: ++ case E_V2SFmode: ++ case E_V1DFmode: ++ /* 128-bit Advanced SIMD vectors. */ ++ case E_V16QImode: ++ case E_V8HImode: ++ case E_V4SImode: ++ case E_V2DImode: ++ case E_V8HFmode: ++ case E_V8BFmode: ++ case E_V4SFmode: ++ case E_V2DFmode: ++ return TARGET_SIMD ? VEC_ADVSIMD : 0; + +- /* This includes V1DF but not V1DI (which doesn't exist). */ +- if (TARGET_SIMD +- && (known_eq (GET_MODE_BITSIZE (mode), 64) +- || known_eq (GET_MODE_BITSIZE (mode), 128))) +- return VEC_ADVSIMD; ++ default: ++ return 0; + } +- +- return 0; + } + + /* Return true if MODE is any of the data vector modes, including +@@ -1492,6 +1700,14 @@ aarch64_vector_data_mode_p (machine_mode mode) + return aarch64_classify_vector_mode (mode) & VEC_ANY_DATA; + } + ++/* Return true if MODE is any form of SVE mode, including predicates, ++ vectors and structures. */ ++bool ++aarch64_sve_mode_p (machine_mode mode) ++{ ++ return aarch64_classify_vector_mode (mode) & VEC_ANY_SVE; ++} ++ + /* Return true if MODE is an SVE data vector mode; either a single vector + or a structure of vectors. */ + static bool +@@ -1500,6 +1716,24 @@ aarch64_sve_data_mode_p (machine_mode mode) + return aarch64_classify_vector_mode (mode) & VEC_SVE_DATA; + } + ++/* Return the number of defined bytes in one constituent vector of ++ SVE mode MODE, which has vector flags VEC_FLAGS. */ ++static poly_int64 ++aarch64_vl_bytes (machine_mode mode, unsigned int vec_flags) ++{ ++ if (vec_flags & VEC_PARTIAL) ++ /* A single partial vector. */ ++ return GET_MODE_SIZE (mode); ++ ++ if (vec_flags & VEC_SVE_DATA) ++ /* A single vector or a tuple. */ ++ return BYTES_PER_SVE_VECTOR; ++ ++ /* A single predicate. */ ++ gcc_assert (vec_flags & VEC_SVE_PRED); ++ return BYTES_PER_SVE_PRED; ++} ++ + /* Implement target hook TARGET_ARRAY_MODE. */ + static opt_machine_mode + aarch64_array_mode (machine_mode mode, unsigned HOST_WIDE_INT nelems) +@@ -1582,6 +1816,43 @@ aarch64_vectorize_related_mode (machine_mode vector_mode, + return default_vectorize_related_mode (vector_mode, element_mode, nunits); + } + ++/* Return the SVE vector mode that has NUNITS elements of mode INNER_MODE. */ ++ ++opt_machine_mode ++aarch64_sve_data_mode (scalar_mode inner_mode, poly_uint64 nunits) ++{ ++ enum mode_class mclass = (is_a (inner_mode) ++ ? MODE_VECTOR_FLOAT : MODE_VECTOR_INT); ++ machine_mode mode; ++ FOR_EACH_MODE_IN_CLASS (mode, mclass) ++ if (inner_mode == GET_MODE_INNER (mode) ++ && known_eq (nunits, GET_MODE_NUNITS (mode)) ++ && aarch64_sve_data_mode_p (mode)) ++ return mode; ++ return opt_machine_mode (); ++} ++ ++/* Return the integer element mode associated with SVE mode MODE. */ ++ ++static scalar_int_mode ++aarch64_sve_element_int_mode (machine_mode mode) ++{ ++ unsigned int elt_bits = vector_element_size (BITS_PER_SVE_VECTOR, ++ GET_MODE_NUNITS (mode)); ++ return int_mode_for_size (elt_bits, 0).require (); ++} ++ ++/* Return the integer vector mode associated with SVE mode MODE. ++ Unlike mode_for_int_vector, this can handle the case in which ++ MODE is a predicate (and thus has a different total size). */ ++ ++machine_mode ++aarch64_sve_int_mode (machine_mode mode) ++{ ++ scalar_int_mode int_mode = aarch64_sve_element_int_mode (mode); ++ return aarch64_sve_data_mode (int_mode, GET_MODE_NUNITS (mode)).require (); ++} ++ + /* Implement TARGET_PREFERRED_ELSE_VALUE. For binary operations, + prefer to use the first arithmetic operand as the else value if + the else value doesn't matter, since that exactly matches the SVE +@@ -1610,13 +1881,19 @@ aarch64_hard_regno_nregs (unsigned regno, machine_mode mode) + { + case FP_REGS: + case FP_LO_REGS: +- if (aarch64_sve_data_mode_p (mode)) +- return exact_div (GET_MODE_SIZE (mode), +- BYTES_PER_SVE_VECTOR).to_constant (); +- return CEIL (lowest_size, UNITS_PER_VREG); ++ case FP_LO8_REGS: ++ { ++ unsigned int vec_flags = aarch64_classify_vector_mode (mode); ++ if (vec_flags & VEC_SVE_DATA) ++ return exact_div (GET_MODE_SIZE (mode), ++ aarch64_vl_bytes (mode, vec_flags)).to_constant (); ++ return CEIL (lowest_size, UNITS_PER_VREG); ++ } + case PR_REGS: + case PR_LO_REGS: + case PR_HI_REGS: ++ case FFR_REGS: ++ case PR_AND_FFR_REGS: + return 1; + default: + return CEIL (lowest_size, UNITS_PER_WORD); +@@ -1637,11 +1914,16 @@ aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode) + return mode == DImode; + + unsigned int vec_flags = aarch64_classify_vector_mode (mode); ++ /* At the moment, partial vector modes are only useful for memory ++ references, but that could change in future. */ ++ if (vec_flags & VEC_PARTIAL) ++ return false; ++ + if (vec_flags & VEC_SVE_PRED) +- return PR_REGNUM_P (regno); ++ return pr_or_ffr_regnum_p (regno); + +- if (PR_REGNUM_P (regno)) +- return 0; ++ if (pr_or_ffr_regnum_p (regno)) ++ return false; + + if (regno == SP_REGNUM) + /* The purpose of comparing with ptr_mode is to support the +@@ -1670,102 +1952,184 @@ aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode) + return false; + } + +-/* Return true if this is a definition of a vectorized simd function. */ ++/* Return true if TYPE is a type that should be passed or returned in ++ SVE registers, assuming enough registers are available. When returning ++ true, set *NUM_ZR and *NUM_PR to the number of required Z and P registers ++ respectively. */ + + static bool +-aarch64_simd_decl_p (tree fndecl) ++aarch64_sve_argument_p (const_tree type, unsigned int *num_zr, ++ unsigned int *num_pr) + { +- tree fntype; +- +- if (fndecl == NULL) +- return false; +- fntype = TREE_TYPE (fndecl); +- if (fntype == NULL) +- return false; ++ if (aarch64_sve::svbool_type_p (type)) ++ { ++ *num_pr = 1; ++ *num_zr = 0; ++ return true; ++ } + +- /* Functions with the aarch64_vector_pcs attribute use the simd ABI. */ +- if (lookup_attribute ("aarch64_vector_pcs", TYPE_ATTRIBUTES (fntype)) != NULL) +- return true; ++ if (unsigned int nvectors = aarch64_sve::nvectors_if_data_type (type)) ++ { ++ *num_pr = 0; ++ *num_zr = nvectors; ++ return true; ++ } + + return false; + } + +-/* Return the mode a register save/restore should use. DImode for integer +- registers, DFmode for FP registers in non-SIMD functions (they only save +- the bottom half of a 128 bit register), or TFmode for FP registers in +- SIMD functions. */ ++/* Return true if a function with type FNTYPE returns its value in ++ SVE vector or predicate registers. */ + +-static machine_mode +-aarch64_reg_save_mode (tree fndecl, unsigned regno) ++static bool ++aarch64_returns_value_in_sve_regs_p (const_tree fntype) + { +- return GP_REGNUM_P (regno) +- ? E_DImode +- : (aarch64_simd_decl_p (fndecl) ? E_TFmode : E_DFmode); ++ unsigned int num_zr, num_pr; ++ tree return_type = TREE_TYPE (fntype); ++ return (return_type != error_mark_node ++ && aarch64_sve_argument_p (return_type, &num_zr, &num_pr)); + } + +-/* Return true if the instruction is a call to a SIMD function, false +- if it is not a SIMD function or if we do not know anything about +- the function. */ ++/* Return true if a function with type FNTYPE takes arguments in ++ SVE vector or predicate registers. */ + + static bool +-aarch64_simd_call_p (rtx_insn *insn) ++aarch64_takes_arguments_in_sve_regs_p (const_tree fntype) + { +- rtx symbol; +- rtx call; +- tree fndecl; ++ CUMULATIVE_ARGS args_so_far_v; ++ aarch64_init_cumulative_args (&args_so_far_v, NULL_TREE, NULL_RTX, ++ NULL_TREE, 0, true); ++ cumulative_args_t args_so_far = pack_cumulative_args (&args_so_far_v); + +- gcc_assert (CALL_P (insn)); +- call = get_call_rtx_from (insn); +- symbol = XEXP (XEXP (call, 0), 0); +- if (GET_CODE (symbol) != SYMBOL_REF) +- return false; +- fndecl = SYMBOL_REF_DECL (symbol); +- if (!fndecl) +- return false; ++ for (tree chain = TYPE_ARG_TYPES (fntype); ++ chain && chain != void_list_node; ++ chain = TREE_CHAIN (chain)) ++ { ++ tree arg_type = TREE_VALUE (chain); ++ if (arg_type == error_mark_node) ++ return false; ++ ++ function_arg_info arg (arg_type, /*named=*/true); ++ apply_pass_by_reference_rules (&args_so_far_v, arg); ++ unsigned int num_zr, num_pr; ++ if (aarch64_sve_argument_p (arg.type, &num_zr, &num_pr)) ++ return true; + +- return aarch64_simd_decl_p (fndecl); ++ targetm.calls.function_arg_advance (args_so_far, arg); ++ } ++ return false; + } + +-/* Implement TARGET_REMOVE_EXTRA_CALL_PRESERVED_REGS. If INSN calls +- a function that uses the SIMD ABI, take advantage of the extra +- call-preserved registers that the ABI provides. */ ++/* Implement TARGET_FNTYPE_ABI. */ + +-void +-aarch64_remove_extra_call_preserved_regs (rtx_insn *insn, +- HARD_REG_SET *return_set) ++static const predefined_function_abi & ++aarch64_fntype_abi (const_tree fntype) + { +- if (aarch64_simd_call_p (insn)) +- { +- for (int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) +- if (FP_SIMD_SAVED_REGNUM_P (regno)) +- CLEAR_HARD_REG_BIT (*return_set, regno); +- } ++ if (lookup_attribute ("aarch64_vector_pcs", TYPE_ATTRIBUTES (fntype))) ++ return aarch64_simd_abi (); ++ ++ if (aarch64_returns_value_in_sve_regs_p (fntype) ++ || aarch64_takes_arguments_in_sve_regs_p (fntype)) ++ return aarch64_sve_abi (); ++ ++ return default_function_abi; + } + +-/* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The callee only saves +- the lower 64 bits of a 128-bit register. Tell the compiler the callee +- clobbers the top 64 bits when restoring the bottom 64 bits. */ ++/* Implement TARGET_COMPATIBLE_VECTOR_TYPES_P. */ + + static bool +-aarch64_hard_regno_call_part_clobbered (rtx_insn *insn, unsigned int regno, +- machine_mode mode) ++aarch64_compatible_vector_types_p (const_tree type1, const_tree type2) ++{ ++ return (aarch64_sve::builtin_type_p (type1) ++ == aarch64_sve::builtin_type_p (type2)); ++} ++ ++/* Return true if we should emit CFI for register REGNO. */ ++ ++static bool ++aarch64_emit_cfi_for_reg_p (unsigned int regno) + { +- bool simd_p = insn && CALL_P (insn) && aarch64_simd_call_p (insn); +- return FP_REGNUM_P (regno) +- && maybe_gt (GET_MODE_SIZE (mode), simd_p ? 16 : 8); ++ return (GP_REGNUM_P (regno) ++ || !default_function_abi.clobbers_full_reg_p (regno)); + } + +-/* Implement TARGET_RETURN_CALL_WITH_MAX_CLOBBERS. */ ++/* Return the mode we should use to save and restore register REGNO. */ + +-rtx_insn * +-aarch64_return_call_with_max_clobbers (rtx_insn *call_1, rtx_insn *call_2) ++static machine_mode ++aarch64_reg_save_mode (unsigned int regno) + { +- gcc_assert (CALL_P (call_1) && CALL_P (call_2)); ++ if (GP_REGNUM_P (regno)) ++ return DImode; + +- if (!aarch64_simd_call_p (call_1) || aarch64_simd_call_p (call_2)) +- return call_1; +- else +- return call_2; ++ if (FP_REGNUM_P (regno)) ++ switch (crtl->abi->id ()) ++ { ++ case ARM_PCS_AAPCS64: ++ /* Only the low 64 bits are saved by the base PCS. */ ++ return DFmode; ++ ++ case ARM_PCS_SIMD: ++ /* The vector PCS saves the low 128 bits (which is the full ++ register on non-SVE targets). */ ++ return TFmode; ++ ++ case ARM_PCS_SVE: ++ /* Use vectors of DImode for registers that need frame ++ information, so that the first 64 bytes of the save slot ++ are always the equivalent of what storing D would give. */ ++ if (aarch64_emit_cfi_for_reg_p (regno)) ++ return VNx2DImode; ++ ++ /* Use vectors of bytes otherwise, so that the layout is ++ endian-agnostic, and so that we can use LDR and STR for ++ big-endian targets. */ ++ return VNx16QImode; ++ ++ case ARM_PCS_TLSDESC: ++ case ARM_PCS_UNKNOWN: ++ break; ++ } ++ ++ if (PR_REGNUM_P (regno)) ++ /* Save the full predicate register. */ ++ return VNx16BImode; ++ ++ gcc_unreachable (); ++} ++ ++/* Implement TARGET_INSN_CALLEE_ABI. */ ++ ++const predefined_function_abi & ++aarch64_insn_callee_abi (const rtx_insn *insn) ++{ ++ rtx pat = PATTERN (insn); ++ gcc_assert (GET_CODE (pat) == PARALLEL); ++ rtx unspec = XVECEXP (pat, 0, 1); ++ gcc_assert (GET_CODE (unspec) == UNSPEC ++ && XINT (unspec, 1) == UNSPEC_CALLEE_ABI); ++ return function_abis[INTVAL (XVECEXP (unspec, 0, 0))]; ++} ++ ++/* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The callee only saves ++ the lower 64 bits of a 128-bit register. Tell the compiler the callee ++ clobbers the top 64 bits when restoring the bottom 64 bits. */ ++ ++static bool ++aarch64_hard_regno_call_part_clobbered (unsigned int abi_id, ++ unsigned int regno, ++ machine_mode mode) ++{ ++ if (FP_REGNUM_P (regno) && abi_id != ARM_PCS_SVE) ++ { ++ poly_int64 per_register_size = GET_MODE_SIZE (mode); ++ unsigned int nregs = hard_regno_nregs (regno, mode); ++ if (nregs > 1) ++ per_register_size = exact_div (per_register_size, nregs); ++ if (abi_id == ARM_PCS_SIMD || abi_id == ARM_PCS_TLSDESC) ++ return maybe_gt (per_register_size, 16); ++ return maybe_gt (per_register_size, 8); ++ } ++ return false; + } + + /* Implement REGMODE_NATURAL_SIZE. */ +@@ -1899,10 +2263,33 @@ emit_set_insn (rtx x, rtx y) + rtx + aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y) + { +- machine_mode mode = SELECT_CC_MODE (code, x, y); +- rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM); ++ machine_mode cmp_mode = GET_MODE (x); ++ machine_mode cc_mode; ++ rtx cc_reg; ++ ++ if (cmp_mode == TImode) ++ { ++ gcc_assert (code == NE); ++ ++ cc_mode = CCmode; ++ cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM); + +- emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y)); ++ rtx x_lo = operand_subword (x, 0, 0, TImode); ++ rtx y_lo = operand_subword (y, 0, 0, TImode); ++ emit_set_insn (cc_reg, gen_rtx_COMPARE (cc_mode, x_lo, y_lo)); ++ ++ rtx x_hi = operand_subword (x, 1, 0, TImode); ++ rtx y_hi = operand_subword (y, 1, 0, TImode); ++ emit_insn (gen_ccmpdi (cc_reg, cc_reg, x_hi, y_hi, ++ gen_rtx_EQ (cc_mode, cc_reg, const0_rtx), ++ GEN_INT (AARCH64_EQ))); ++ } ++ else ++ { ++ cc_mode = SELECT_CC_MODE (code, x, y); ++ cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM); ++ emit_set_insn (cc_reg, gen_rtx_COMPARE (cc_mode, x, y)); ++ } + return cc_reg; + } + +@@ -2466,7 +2853,36 @@ aarch64_zero_extend_const_eq (machine_mode xmode, rtx x, + gcc_assert (r != NULL); + return rtx_equal_p (x, r); + } +- ++ ++/* Return TARGET if it is nonnull and a register of mode MODE. ++ Otherwise, return a fresh register of mode MODE if we can, ++ or TARGET reinterpreted as MODE if we can't. */ ++ ++static rtx ++aarch64_target_reg (rtx target, machine_mode mode) ++{ ++ if (target && REG_P (target) && GET_MODE (target) == mode) ++ return target; ++ if (!can_create_pseudo_p ()) ++ { ++ gcc_assert (target); ++ return gen_lowpart (mode, target); ++ } ++ return gen_reg_rtx (mode); ++} ++ ++/* Return a register that contains the constant in BUILDER, given that ++ the constant is a legitimate move operand. Use TARGET as the register ++ if it is nonnull and convenient. */ ++ ++static rtx ++aarch64_emit_set_immediate (rtx target, rtx_vector_builder &builder) ++{ ++ rtx src = builder.build (); ++ target = aarch64_target_reg (target, GET_MODE (src)); ++ emit_insn (gen_rtx_SET (target, src)); ++ return target; ++} + + static rtx + aarch64_force_temporary (machine_mode mode, rtx x, rtx value) +@@ -2481,82 +2897,474 @@ aarch64_force_temporary (machine_mode mode, rtx x, rtx value) + } + } + +-/* Return true if we can move VALUE into a register using a single +- CNT[BHWD] instruction. */ ++/* Return true if predicate value X is a constant in which every element ++ is a CONST_INT. When returning true, describe X in BUILDER as a VNx16BI ++ value, i.e. as a predicate in which all bits are significant. */ + + static bool +-aarch64_sve_cnt_immediate_p (poly_int64 value) ++aarch64_get_sve_pred_bits (rtx_vector_builder &builder, rtx x) + { +- HOST_WIDE_INT factor = value.coeffs[0]; +- /* The coefficient must be [1, 16] * {2, 4, 8, 16}. */ +- return (value.coeffs[1] == factor +- && IN_RANGE (factor, 2, 16 * 16) +- && (factor & 1) == 0 +- && factor <= 16 * (factor & -factor)); ++ if (GET_CODE (x) != CONST_VECTOR) ++ return false; ++ ++ unsigned int factor = vector_element_size (GET_MODE_NUNITS (VNx16BImode), ++ GET_MODE_NUNITS (GET_MODE (x))); ++ unsigned int npatterns = CONST_VECTOR_NPATTERNS (x) * factor; ++ unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x); ++ builder.new_vector (VNx16BImode, npatterns, nelts_per_pattern); ++ ++ unsigned int nelts = const_vector_encoded_nelts (x); ++ for (unsigned int i = 0; i < nelts; ++i) ++ { ++ rtx elt = CONST_VECTOR_ENCODED_ELT (x, i); ++ if (!CONST_INT_P (elt)) ++ return false; ++ ++ builder.quick_push (elt); ++ for (unsigned int j = 1; j < factor; ++j) ++ builder.quick_push (const0_rtx); ++ } ++ builder.finalize (); ++ return true; + } + +-/* Likewise for rtx X. */ ++/* BUILDER contains a predicate constant of mode VNx16BI. Return the ++ widest predicate element size it can have (that is, the largest size ++ for which each element would still be 0 or 1). */ + +-bool +-aarch64_sve_cnt_immediate_p (rtx x) ++unsigned int ++aarch64_widest_sve_pred_elt_size (rtx_vector_builder &builder) + { +- poly_int64 value; +- return poly_int_rtx_p (x, &value) && aarch64_sve_cnt_immediate_p (value); ++ /* Start with the most optimistic assumption: that we only need ++ one bit per pattern. This is what we will use if only the first ++ bit in each pattern is ever set. */ ++ unsigned int mask = GET_MODE_SIZE (DImode); ++ mask |= builder.npatterns (); ++ ++ /* Look for set bits. */ ++ unsigned int nelts = builder.encoded_nelts (); ++ for (unsigned int i = 1; i < nelts; ++i) ++ if (INTVAL (builder.elt (i)) != 0) ++ { ++ if (i & 1) ++ return 1; ++ mask |= i; ++ } ++ return mask & -mask; + } + +-/* Return the asm string for an instruction with a CNT-like vector size +- operand (a vector pattern followed by a multiplier in the range [1, 16]). +- PREFIX is the mnemonic without the size suffix and OPERANDS is the +- first part of the operands template (the part that comes before the +- vector size itself). FACTOR is the number of quadwords. +- NELTS_PER_VQ, if nonzero, is the number of elements in each quadword. +- If it is zero, we can use any element size. */ ++/* If VNx16BImode rtx X is a canonical PTRUE for a predicate mode, ++ return that predicate mode, otherwise return opt_machine_mode (). */ + +-static char * +-aarch64_output_sve_cnt_immediate (const char *prefix, const char *operands, +- unsigned int factor, +- unsigned int nelts_per_vq) ++opt_machine_mode ++aarch64_ptrue_all_mode (rtx x) + { +- static char buffer[sizeof ("sqincd\t%x0, %w0, all, mul #16")]; ++ gcc_assert (GET_MODE (x) == VNx16BImode); ++ if (GET_CODE (x) != CONST_VECTOR ++ || !CONST_VECTOR_DUPLICATE_P (x) ++ || !CONST_INT_P (CONST_VECTOR_ENCODED_ELT (x, 0)) ++ || INTVAL (CONST_VECTOR_ENCODED_ELT (x, 0)) == 0) ++ return opt_machine_mode (); + +- if (nelts_per_vq == 0) +- /* There is some overlap in the ranges of the four CNT instructions. +- Here we always use the smallest possible element size, so that the +- multiplier is 1 whereever possible. */ +- nelts_per_vq = factor & -factor; +- int shift = std::min (exact_log2 (nelts_per_vq), 4); +- gcc_assert (IN_RANGE (shift, 1, 4)); +- char suffix = "dwhb"[shift - 1]; ++ unsigned int nelts = const_vector_encoded_nelts (x); ++ for (unsigned int i = 1; i < nelts; ++i) ++ if (CONST_VECTOR_ENCODED_ELT (x, i) != const0_rtx) ++ return opt_machine_mode (); + +- factor >>= shift; +- unsigned int written; +- if (factor == 1) +- written = snprintf (buffer, sizeof (buffer), "%s%c\t%s", +- prefix, suffix, operands); +- else +- written = snprintf (buffer, sizeof (buffer), "%s%c\t%s, all, mul #%d", +- prefix, suffix, operands, factor); +- gcc_assert (written < sizeof (buffer)); +- return buffer; ++ return aarch64_sve_pred_mode (nelts); + } + +-/* Return the asm string for an instruction with a CNT-like vector size +- operand (a vector pattern followed by a multiplier in the range [1, 16]). +- PREFIX is the mnemonic without the size suffix and OPERANDS is the +- first part of the operands template (the part that comes before the +- vector size itself). X is the value of the vector size operand, +- as a polynomial integer rtx. */ ++/* BUILDER is a predicate constant of mode VNx16BI. Consider the value ++ that the constant would have with predicate element size ELT_SIZE ++ (ignoring the upper bits in each element) and return: + +-char * ++ * -1 if all bits are set ++ * N if the predicate has N leading set bits followed by all clear bits ++ * 0 if the predicate does not have any of these forms. */ ++ ++int ++aarch64_partial_ptrue_length (rtx_vector_builder &builder, ++ unsigned int elt_size) ++{ ++ /* If nelts_per_pattern is 3, we have set bits followed by clear bits ++ followed by set bits. */ ++ if (builder.nelts_per_pattern () == 3) ++ return 0; ++ ++ /* Skip over leading set bits. */ ++ unsigned int nelts = builder.encoded_nelts (); ++ unsigned int i = 0; ++ for (; i < nelts; i += elt_size) ++ if (INTVAL (builder.elt (i)) == 0) ++ break; ++ unsigned int vl = i / elt_size; ++ ++ /* Check for the all-true case. */ ++ if (i == nelts) ++ return -1; ++ ++ /* If nelts_per_pattern is 1, then either VL is zero, or we have a ++ repeating pattern of set bits followed by clear bits. */ ++ if (builder.nelts_per_pattern () != 2) ++ return 0; ++ ++ /* We have a "foreground" value and a duplicated "background" value. ++ If the background might repeat and the last set bit belongs to it, ++ we might have set bits followed by clear bits followed by set bits. */ ++ if (i > builder.npatterns () && maybe_ne (nelts, builder.full_nelts ())) ++ return 0; ++ ++ /* Make sure that the rest are all clear. */ ++ for (; i < nelts; i += elt_size) ++ if (INTVAL (builder.elt (i)) != 0) ++ return 0; ++ ++ return vl; ++} ++ ++/* See if there is an svpattern that encodes an SVE predicate of mode ++ PRED_MODE in which the first VL bits are set and the rest are clear. ++ Return the pattern if so, otherwise return AARCH64_NUM_SVPATTERNS. ++ A VL of -1 indicates an all-true vector. */ ++ ++aarch64_svpattern ++aarch64_svpattern_for_vl (machine_mode pred_mode, int vl) ++{ ++ if (vl < 0) ++ return AARCH64_SV_ALL; ++ ++ if (maybe_gt (vl, GET_MODE_NUNITS (pred_mode))) ++ return AARCH64_NUM_SVPATTERNS; ++ ++ if (vl >= 1 && vl <= 8) ++ return aarch64_svpattern (AARCH64_SV_VL1 + (vl - 1)); ++ ++ if (vl >= 16 && vl <= 256 && pow2p_hwi (vl)) ++ return aarch64_svpattern (AARCH64_SV_VL16 + (exact_log2 (vl) - 4)); ++ ++ int max_vl; ++ if (GET_MODE_NUNITS (pred_mode).is_constant (&max_vl)) ++ { ++ if (vl == (max_vl / 3) * 3) ++ return AARCH64_SV_MUL3; ++ /* These would only trigger for non-power-of-2 lengths. */ ++ if (vl == (max_vl & -4)) ++ return AARCH64_SV_MUL4; ++ if (vl == (1 << floor_log2 (max_vl))) ++ return AARCH64_SV_POW2; ++ if (vl == max_vl) ++ return AARCH64_SV_ALL; ++ } ++ return AARCH64_NUM_SVPATTERNS; ++} ++ ++/* Return a VNx16BImode constant in which every sequence of ELT_SIZE ++ bits has the lowest bit set and the upper bits clear. This is the ++ VNx16BImode equivalent of a PTRUE for controlling elements of ++ ELT_SIZE bytes. However, because the constant is VNx16BImode, ++ all bits are significant, even the upper zeros. */ ++ ++rtx ++aarch64_ptrue_all (unsigned int elt_size) ++{ ++ rtx_vector_builder builder (VNx16BImode, elt_size, 1); ++ builder.quick_push (const1_rtx); ++ for (unsigned int i = 1; i < elt_size; ++i) ++ builder.quick_push (const0_rtx); ++ return builder.build (); ++} ++ ++/* Return an all-true predicate register of mode MODE. */ ++ ++rtx ++aarch64_ptrue_reg (machine_mode mode) ++{ ++ gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL); ++ rtx reg = force_reg (VNx16BImode, CONSTM1_RTX (VNx16BImode)); ++ return gen_lowpart (mode, reg); ++} ++ ++/* Return an all-false predicate register of mode MODE. */ ++ ++rtx ++aarch64_pfalse_reg (machine_mode mode) ++{ ++ gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL); ++ rtx reg = force_reg (VNx16BImode, CONST0_RTX (VNx16BImode)); ++ return gen_lowpart (mode, reg); ++} ++ ++/* Return true if predicate PRED1[0] is true whenever predicate PRED2 is ++ true, or alternatively if we know that the operation predicated by ++ PRED1[0] is safe to perform whenever PRED2 is true. PRED1[1] is a ++ aarch64_sve_gp_strictness operand that describes the operation ++ predicated by PRED1[0]. */ ++ ++bool ++aarch64_sve_pred_dominates_p (rtx *pred1, rtx pred2) ++{ ++ machine_mode mode = GET_MODE (pred2); ++ gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL ++ && mode == GET_MODE (pred1[0]) ++ && aarch64_sve_gp_strictness (pred1[1], SImode)); ++ return (pred1[0] == CONSTM1_RTX (mode) ++ || INTVAL (pred1[1]) == SVE_RELAXED_GP ++ || rtx_equal_p (pred1[0], pred2)); ++} ++ ++/* PRED1[0] is a PTEST predicate and PRED1[1] is an aarch64_sve_ptrue_flag ++ for it. PRED2[0] is the predicate for the instruction whose result ++ is tested by the PTEST and PRED2[1] is again an aarch64_sve_ptrue_flag ++ for it. Return true if we can prove that the two predicates are ++ equivalent for PTEST purposes; that is, if we can replace PRED2[0] ++ with PRED1[0] without changing behavior. */ ++ ++bool ++aarch64_sve_same_pred_for_ptest_p (rtx *pred1, rtx *pred2) ++{ ++ machine_mode mode = GET_MODE (pred1[0]); ++ gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL ++ && mode == GET_MODE (pred2[0]) ++ && aarch64_sve_ptrue_flag (pred1[1], SImode) ++ && aarch64_sve_ptrue_flag (pred2[1], SImode)); ++ ++ bool ptrue1_p = (pred1[0] == CONSTM1_RTX (mode) ++ || INTVAL (pred1[1]) == SVE_KNOWN_PTRUE); ++ bool ptrue2_p = (pred2[0] == CONSTM1_RTX (mode) ++ || INTVAL (pred2[1]) == SVE_KNOWN_PTRUE); ++ return (ptrue1_p && ptrue2_p) || rtx_equal_p (pred1[0], pred2[0]); ++} ++ ++/* Emit a comparison CMP between OP0 and OP1, both of which have mode ++ DATA_MODE, and return the result in a predicate of mode PRED_MODE. ++ Use TARGET as the target register if nonnull and convenient. */ ++ ++static rtx ++aarch64_sve_emit_int_cmp (rtx target, machine_mode pred_mode, rtx_code cmp, ++ machine_mode data_mode, rtx op1, rtx op2) ++{ ++ insn_code icode = code_for_aarch64_pred_cmp (cmp, data_mode); ++ expand_operand ops[5]; ++ create_output_operand (&ops[0], target, pred_mode); ++ create_input_operand (&ops[1], CONSTM1_RTX (pred_mode), pred_mode); ++ create_integer_operand (&ops[2], SVE_KNOWN_PTRUE); ++ create_input_operand (&ops[3], op1, data_mode); ++ create_input_operand (&ops[4], op2, data_mode); ++ expand_insn (icode, 5, ops); ++ return ops[0].value; ++} ++ ++/* Use a comparison to convert integer vector SRC into MODE, which is ++ the corresponding SVE predicate mode. Use TARGET for the result ++ if it's nonnull and convenient. */ ++ ++rtx ++aarch64_convert_sve_data_to_pred (rtx target, machine_mode mode, rtx src) ++{ ++ machine_mode src_mode = GET_MODE (src); ++ return aarch64_sve_emit_int_cmp (target, mode, NE, src_mode, ++ src, CONST0_RTX (src_mode)); ++} ++ ++/* Return the assembly token for svprfop value PRFOP. */ ++ ++static const char * ++svprfop_token (enum aarch64_svprfop prfop) ++{ ++ switch (prfop) ++ { ++#define CASE(UPPER, LOWER, VALUE) case AARCH64_SV_##UPPER: return #LOWER; ++ AARCH64_FOR_SVPRFOP (CASE) ++#undef CASE ++ case AARCH64_NUM_SVPRFOPS: ++ break; ++ } ++ gcc_unreachable (); ++} ++ ++/* Return the assembly string for an SVE prefetch operation with ++ mnemonic MNEMONIC, given that PRFOP_RTX is the prefetch operation ++ and that SUFFIX is the format for the remaining operands. */ ++ ++char * ++aarch64_output_sve_prefetch (const char *mnemonic, rtx prfop_rtx, ++ const char *suffix) ++{ ++ static char buffer[128]; ++ aarch64_svprfop prfop = (aarch64_svprfop) INTVAL (prfop_rtx); ++ unsigned int written = snprintf (buffer, sizeof (buffer), "%s\t%s, %s", ++ mnemonic, svprfop_token (prfop), suffix); ++ gcc_assert (written < sizeof (buffer)); ++ return buffer; ++} ++ ++/* Check whether we can calculate the number of elements in PATTERN ++ at compile time, given that there are NELTS_PER_VQ elements per ++ 128-bit block. Return the value if so, otherwise return -1. */ ++ ++HOST_WIDE_INT ++aarch64_fold_sve_cnt_pat (aarch64_svpattern pattern, unsigned int nelts_per_vq) ++{ ++ unsigned int vl, const_vg; ++ if (pattern >= AARCH64_SV_VL1 && pattern <= AARCH64_SV_VL8) ++ vl = 1 + (pattern - AARCH64_SV_VL1); ++ else if (pattern >= AARCH64_SV_VL16 && pattern <= AARCH64_SV_VL256) ++ vl = 16 << (pattern - AARCH64_SV_VL16); ++ else if (aarch64_sve_vg.is_constant (&const_vg)) ++ { ++ /* There are two vector granules per quadword. */ ++ unsigned int nelts = (const_vg / 2) * nelts_per_vq; ++ switch (pattern) ++ { ++ case AARCH64_SV_POW2: return 1 << floor_log2 (nelts); ++ case AARCH64_SV_MUL4: return nelts & -4; ++ case AARCH64_SV_MUL3: return (nelts / 3) * 3; ++ case AARCH64_SV_ALL: return nelts; ++ default: gcc_unreachable (); ++ } ++ } ++ else ++ return -1; ++ ++ /* There are two vector granules per quadword. */ ++ poly_uint64 nelts_all = exact_div (aarch64_sve_vg, 2) * nelts_per_vq; ++ if (known_le (vl, nelts_all)) ++ return vl; ++ ++ /* Requesting more elements than are available results in a PFALSE. */ ++ if (known_gt (vl, nelts_all)) ++ return 0; ++ ++ return -1; ++} ++ ++/* Return true if we can move VALUE into a register using a single ++ CNT[BHWD] instruction. */ ++ ++static bool ++aarch64_sve_cnt_immediate_p (poly_int64 value) ++{ ++ HOST_WIDE_INT factor = value.coeffs[0]; ++ /* The coefficient must be [1, 16] * {2, 4, 8, 16}. */ ++ return (value.coeffs[1] == factor ++ && IN_RANGE (factor, 2, 16 * 16) ++ && (factor & 1) == 0 ++ && factor <= 16 * (factor & -factor)); ++} ++ ++/* Likewise for rtx X. */ ++ ++bool ++aarch64_sve_cnt_immediate_p (rtx x) ++{ ++ poly_int64 value; ++ return poly_int_rtx_p (x, &value) && aarch64_sve_cnt_immediate_p (value); ++} ++ ++/* Return the asm string for an instruction with a CNT-like vector size ++ operand (a vector pattern followed by a multiplier in the range [1, 16]). ++ PREFIX is the mnemonic without the size suffix and OPERANDS is the ++ first part of the operands template (the part that comes before the ++ vector size itself). PATTERN is the pattern to use. FACTOR is the ++ number of quadwords. NELTS_PER_VQ, if nonzero, is the number of elements ++ in each quadword. If it is zero, we can use any element size. */ ++ ++static char * ++aarch64_output_sve_cnt_immediate (const char *prefix, const char *operands, ++ aarch64_svpattern pattern, ++ unsigned int factor, ++ unsigned int nelts_per_vq) ++{ ++ static char buffer[sizeof ("sqincd\t%x0, %w0, vl256, mul #16")]; ++ ++ if (nelts_per_vq == 0) ++ /* There is some overlap in the ranges of the four CNT instructions. ++ Here we always use the smallest possible element size, so that the ++ multiplier is 1 whereever possible. */ ++ nelts_per_vq = factor & -factor; ++ int shift = std::min (exact_log2 (nelts_per_vq), 4); ++ gcc_assert (IN_RANGE (shift, 1, 4)); ++ char suffix = "dwhb"[shift - 1]; ++ ++ factor >>= shift; ++ unsigned int written; ++ if (pattern == AARCH64_SV_ALL && factor == 1) ++ written = snprintf (buffer, sizeof (buffer), "%s%c\t%s", ++ prefix, suffix, operands); ++ else if (factor == 1) ++ written = snprintf (buffer, sizeof (buffer), "%s%c\t%s, %s", ++ prefix, suffix, operands, svpattern_token (pattern)); ++ else ++ written = snprintf (buffer, sizeof (buffer), "%s%c\t%s, %s, mul #%d", ++ prefix, suffix, operands, svpattern_token (pattern), ++ factor); ++ gcc_assert (written < sizeof (buffer)); ++ return buffer; ++} ++ ++/* Return the asm string for an instruction with a CNT-like vector size ++ operand (a vector pattern followed by a multiplier in the range [1, 16]). ++ PREFIX is the mnemonic without the size suffix and OPERANDS is the ++ first part of the operands template (the part that comes before the ++ vector size itself). X is the value of the vector size operand, ++ as a polynomial integer rtx; we need to convert this into an "all" ++ pattern with a multiplier. */ ++ ++char * + aarch64_output_sve_cnt_immediate (const char *prefix, const char *operands, + rtx x) + { + poly_int64 value = rtx_to_poly_int64 (x); + gcc_assert (aarch64_sve_cnt_immediate_p (value)); +- return aarch64_output_sve_cnt_immediate (prefix, operands, ++ return aarch64_output_sve_cnt_immediate (prefix, operands, AARCH64_SV_ALL, + value.coeffs[1], 0); + } + ++/* Return the asm string for an instruction with a CNT-like vector size ++ operand (a vector pattern followed by a multiplier in the range [1, 16]). ++ PREFIX is the mnemonic without the size suffix and OPERANDS is the ++ first part of the operands template (the part that comes before the ++ vector size itself). CNT_PAT[0..2] are the operands of the ++ UNSPEC_SVE_CNT_PAT; see aarch64_sve_cnt_pat for details. */ ++ ++char * ++aarch64_output_sve_cnt_pat_immediate (const char *prefix, ++ const char *operands, rtx *cnt_pat) ++{ ++ aarch64_svpattern pattern = (aarch64_svpattern) INTVAL (cnt_pat[0]); ++ unsigned int nelts_per_vq = INTVAL (cnt_pat[1]); ++ unsigned int factor = INTVAL (cnt_pat[2]) * nelts_per_vq; ++ return aarch64_output_sve_cnt_immediate (prefix, operands, pattern, ++ factor, nelts_per_vq); ++} ++ ++/* Return true if we can add X using a single SVE INC or DEC instruction. */ ++ ++bool ++aarch64_sve_scalar_inc_dec_immediate_p (rtx x) ++{ ++ poly_int64 value; ++ return (poly_int_rtx_p (x, &value) ++ && (aarch64_sve_cnt_immediate_p (value) ++ || aarch64_sve_cnt_immediate_p (-value))); ++} ++ ++/* Return the asm string for adding SVE INC/DEC immediate OFFSET to ++ operand 0. */ ++ ++char * ++aarch64_output_sve_scalar_inc_dec (rtx offset) ++{ ++ poly_int64 offset_value = rtx_to_poly_int64 (offset); ++ gcc_assert (offset_value.coeffs[0] == offset_value.coeffs[1]); ++ if (offset_value.coeffs[1] > 0) ++ return aarch64_output_sve_cnt_immediate ("inc", "%x0", AARCH64_SV_ALL, ++ offset_value.coeffs[1], 0); ++ else ++ return aarch64_output_sve_cnt_immediate ("dec", "%x0", AARCH64_SV_ALL, ++ -offset_value.coeffs[1], 0); ++} ++ + /* Return true if we can add VALUE to a register using a single ADDVL + or ADDPL instruction. */ + +@@ -2582,27 +3390,16 @@ aarch64_sve_addvl_addpl_immediate_p (rtx x) + && aarch64_sve_addvl_addpl_immediate_p (value)); + } + +-/* Return the asm string for adding ADDVL or ADDPL immediate X to operand 1 +- and storing the result in operand 0. */ ++/* Return the asm string for adding ADDVL or ADDPL immediate OFFSET ++ to operand 1 and storing the result in operand 0. */ + + char * +-aarch64_output_sve_addvl_addpl (rtx dest, rtx base, rtx offset) ++aarch64_output_sve_addvl_addpl (rtx offset) + { + static char buffer[sizeof ("addpl\t%x0, %x1, #-") + 3 * sizeof (int)]; + poly_int64 offset_value = rtx_to_poly_int64 (offset); + gcc_assert (aarch64_sve_addvl_addpl_immediate_p (offset_value)); + +- /* Use INC or DEC if possible. */ +- if (rtx_equal_p (dest, base) && GP_REGNUM_P (REGNO (dest))) +- { +- if (aarch64_sve_cnt_immediate_p (offset_value)) +- return aarch64_output_sve_cnt_immediate ("inc", "%x0", +- offset_value.coeffs[1], 0); +- if (aarch64_sve_cnt_immediate_p (-offset_value)) +- return aarch64_output_sve_cnt_immediate ("dec", "%x0", +- -offset_value.coeffs[1], 0); +- } +- + int factor = offset_value.coeffs[1]; + if ((factor & 15) == 0) + snprintf (buffer, sizeof (buffer), "addvl\t%%x0, %%x1, #%d", factor / 16); +@@ -2617,8 +3414,8 @@ aarch64_output_sve_addvl_addpl (rtx dest, rtx base, rtx offset) + factor in *FACTOR_OUT (if nonnull). */ + + bool +-aarch64_sve_inc_dec_immediate_p (rtx x, int *factor_out, +- unsigned int *nelts_per_vq_out) ++aarch64_sve_vector_inc_dec_immediate_p (rtx x, int *factor_out, ++ unsigned int *nelts_per_vq_out) + { + rtx elt; + poly_int64 value; +@@ -2652,9 +3449,9 @@ aarch64_sve_inc_dec_immediate_p (rtx x, int *factor_out, + instruction. */ + + bool +-aarch64_sve_inc_dec_immediate_p (rtx x) ++aarch64_sve_vector_inc_dec_immediate_p (rtx x) + { +- return aarch64_sve_inc_dec_immediate_p (x, NULL, NULL); ++ return aarch64_sve_vector_inc_dec_immediate_p (x, NULL, NULL); + } + + /* Return the asm template for an SVE vector INC or DEC instruction. +@@ -2662,18 +3459,18 @@ aarch64_sve_inc_dec_immediate_p (rtx x) + value of the vector count operand itself. */ + + char * +-aarch64_output_sve_inc_dec_immediate (const char *operands, rtx x) ++aarch64_output_sve_vector_inc_dec (const char *operands, rtx x) + { + int factor; + unsigned int nelts_per_vq; +- if (!aarch64_sve_inc_dec_immediate_p (x, &factor, &nelts_per_vq)) ++ if (!aarch64_sve_vector_inc_dec_immediate_p (x, &factor, &nelts_per_vq)) + gcc_unreachable (); + if (factor < 0) +- return aarch64_output_sve_cnt_immediate ("dec", operands, -factor, +- nelts_per_vq); ++ return aarch64_output_sve_cnt_immediate ("dec", operands, AARCH64_SV_ALL, ++ -factor, nelts_per_vq); + else +- return aarch64_output_sve_cnt_immediate ("inc", operands, factor, +- nelts_per_vq); ++ return aarch64_output_sve_cnt_immediate ("inc", operands, AARCH64_SV_ALL, ++ factor, nelts_per_vq); + } + + static int +@@ -3056,20 +3853,36 @@ aarch64_add_offset (scalar_int_mode mode, rtx dest, rtx src, + } + else + { +- /* Use CNTD, then multiply it by FACTOR. */ +- val = gen_int_mode (poly_int64 (2, 2), mode); ++ /* Base the factor on LOW_BIT if we can calculate LOW_BIT ++ directly, since that should increase the chances of being ++ able to use a shift and add sequence. If LOW_BIT itself ++ is out of range, just use CNTD. */ ++ if (low_bit <= 16 * 8) ++ factor /= low_bit; ++ else ++ low_bit = 1; ++ ++ val = gen_int_mode (poly_int64 (low_bit * 2, low_bit * 2), mode); + val = aarch64_force_temporary (mode, temp1, val); + +- /* Go back to using a negative multiplication factor if we have +- no register from which to subtract. */ +- if (code == MINUS && src == const0_rtx) ++ if (can_create_pseudo_p ()) ++ { ++ rtx coeff1 = gen_int_mode (factor, mode); ++ val = expand_mult (mode, val, coeff1, NULL_RTX, false, true); ++ } ++ else + { +- factor = -factor; +- code = PLUS; ++ /* Go back to using a negative multiplication factor if we have ++ no register from which to subtract. */ ++ if (code == MINUS && src == const0_rtx) ++ { ++ factor = -factor; ++ code = PLUS; ++ } ++ rtx coeff1 = gen_int_mode (factor, mode); ++ coeff1 = aarch64_force_temporary (mode, temp2, coeff1); ++ val = gen_rtx_MULT (mode, val, coeff1); + } +- rtx coeff1 = gen_int_mode (factor, mode); +- coeff1 = aarch64_force_temporary (mode, temp2, coeff1); +- val = gen_rtx_MULT (mode, val, coeff1); + } + + if (shift > 0) +@@ -3176,32 +3989,55 @@ aarch64_expand_vec_series (rtx dest, rtx base, rtx step) + emit_set_insn (dest, gen_rtx_VEC_SERIES (mode, base, step)); + } + +-/* Try to duplicate SRC into SVE register DEST, given that SRC is an +- integer of mode INT_MODE. Return true on success. */ ++/* Duplicate 128-bit Advanced SIMD vector SRC so that it fills an SVE ++ register of mode MODE. Use TARGET for the result if it's nonnull ++ and convenient. + +-static bool +-aarch64_expand_sve_widened_duplicate (rtx dest, scalar_int_mode src_mode, +- rtx src) +-{ +- /* If the constant is smaller than 128 bits, we can do the move +- using a vector of SRC_MODEs. */ +- if (src_mode != TImode) +- { +- poly_uint64 count = exact_div (GET_MODE_SIZE (GET_MODE (dest)), +- GET_MODE_SIZE (src_mode)); +- machine_mode dup_mode = mode_for_vector (src_mode, count).require (); +- emit_move_insn (gen_lowpart (dup_mode, dest), +- gen_const_vec_duplicate (dup_mode, src)); +- return true; ++ The two vector modes must have the same element mode. The behavior ++ is to duplicate architectural lane N of SRC into architectural lanes ++ N + I * STEP of the result. On big-endian targets, architectural ++ lane 0 of an Advanced SIMD vector is the last element of the vector ++ in memory layout, so for big-endian targets this operation has the ++ effect of reversing SRC before duplicating it. Callers need to ++ account for this. */ ++ ++rtx ++aarch64_expand_sve_dupq (rtx target, machine_mode mode, rtx src) ++{ ++ machine_mode src_mode = GET_MODE (src); ++ gcc_assert (GET_MODE_INNER (mode) == GET_MODE_INNER (src_mode)); ++ insn_code icode = (BYTES_BIG_ENDIAN ++ ? code_for_aarch64_vec_duplicate_vq_be (mode) ++ : code_for_aarch64_vec_duplicate_vq_le (mode)); ++ ++ unsigned int i = 0; ++ expand_operand ops[3]; ++ create_output_operand (&ops[i++], target, mode); ++ create_output_operand (&ops[i++], src, src_mode); ++ if (BYTES_BIG_ENDIAN) ++ { ++ /* Create a PARALLEL describing the reversal of SRC. */ ++ unsigned int nelts_per_vq = 128 / GET_MODE_UNIT_BITSIZE (mode); ++ rtx sel = aarch64_gen_stepped_int_parallel (nelts_per_vq, ++ nelts_per_vq - 1, -1); ++ create_fixed_operand (&ops[i++], sel); + } ++ expand_insn (icode, i, ops); ++ return ops[0].value; ++} ++ ++/* Try to force 128-bit vector value SRC into memory and use LD1RQ to fetch ++ the memory image into DEST. Return true on success. */ + +- /* Use LD1RQ[BHWD] to load the 128 bits from memory. */ +- src = force_const_mem (src_mode, src); ++static bool ++aarch64_expand_sve_ld1rq (rtx dest, rtx src) ++{ ++ src = force_const_mem (GET_MODE (src), src); + if (!src) + return false; + + /* Make sure that the address is legitimate. */ +- if (!aarch64_sve_ld1r_operand_p (src)) ++ if (!aarch64_sve_ld1rq_operand_p (src)) + { + rtx addr = force_reg (Pmode, XEXP (src, 0)); + src = replace_equiv_address (src, addr); +@@ -3210,47 +4046,128 @@ aarch64_expand_sve_widened_duplicate (rtx dest, scalar_int_mode src_mode, + machine_mode mode = GET_MODE (dest); + unsigned int elem_bytes = GET_MODE_UNIT_SIZE (mode); + machine_mode pred_mode = aarch64_sve_pred_mode (elem_bytes).require (); +- rtx ptrue = force_reg (pred_mode, CONSTM1_RTX (pred_mode)); +- src = gen_rtx_UNSPEC (mode, gen_rtvec (2, ptrue, src), UNSPEC_LD1RQ); +- emit_insn (gen_rtx_SET (dest, src)); ++ rtx ptrue = aarch64_ptrue_reg (pred_mode); ++ emit_insn (gen_aarch64_sve_ld1rq (mode, dest, src, ptrue)); + return true; + } + +-/* Expand a move of general CONST_VECTOR SRC into DEST, given that it +- isn't a simple duplicate or series. */ ++/* Return a register containing CONST_VECTOR SRC, given that SRC has an ++ SVE data mode and isn't a legitimate constant. Use TARGET for the ++ result if convenient. + +-static void +-aarch64_expand_sve_const_vector (rtx dest, rtx src) ++ The returned register can have whatever mode seems most natural ++ given the contents of SRC. */ ++ ++static rtx ++aarch64_expand_sve_const_vector (rtx target, rtx src) + { + machine_mode mode = GET_MODE (src); + unsigned int npatterns = CONST_VECTOR_NPATTERNS (src); + unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (src); +- gcc_assert (npatterns > 1); ++ scalar_mode elt_mode = GET_MODE_INNER (mode); ++ unsigned int elt_bits = GET_MODE_BITSIZE (elt_mode); ++ unsigned int encoded_bits = npatterns * nelts_per_pattern * elt_bits; ++ ++ if (nelts_per_pattern == 1 && encoded_bits == 128) ++ { ++ /* The constant is a duplicated quadword but can't be narrowed ++ beyond a quadword. Get the memory image of the first quadword ++ as a 128-bit vector and try using LD1RQ to load it from memory. ++ ++ The effect for both endiannesses is to load memory lane N into ++ architectural lanes N + I * STEP of the result. On big-endian ++ targets, the layout of the 128-bit vector in an Advanced SIMD ++ register would be different from its layout in an SVE register, ++ but this 128-bit vector is a memory value only. */ ++ machine_mode vq_mode = aarch64_vq_mode (elt_mode).require (); ++ rtx vq_value = simplify_gen_subreg (vq_mode, src, mode, 0); ++ if (vq_value && aarch64_expand_sve_ld1rq (target, vq_value)) ++ return target; ++ } ++ ++ if (nelts_per_pattern == 1 && encoded_bits < 128) ++ { ++ /* The vector is a repeating sequence of 64 bits or fewer. ++ See if we can load them using an Advanced SIMD move and then ++ duplicate it to fill a vector. This is better than using a GPR ++ move because it keeps everything in the same register file. */ ++ machine_mode vq_mode = aarch64_vq_mode (elt_mode).require (); ++ rtx_vector_builder builder (vq_mode, npatterns, 1); ++ for (unsigned int i = 0; i < npatterns; ++i) ++ { ++ /* We want memory lane N to go into architectural lane N, ++ so reverse for big-endian targets. The DUP .Q pattern ++ has a compensating reverse built-in. */ ++ unsigned int srci = BYTES_BIG_ENDIAN ? npatterns - i - 1 : i; ++ builder.quick_push (CONST_VECTOR_ENCODED_ELT (src, srci)); ++ } ++ rtx vq_src = builder.build (); ++ if (aarch64_simd_valid_immediate (vq_src, NULL)) ++ { ++ vq_src = force_reg (vq_mode, vq_src); ++ return aarch64_expand_sve_dupq (target, mode, vq_src); ++ } + +- if (nelts_per_pattern == 1) +- { +- /* The constant is a repeating seqeuence of at least two elements, +- where the repeating elements occupy no more than 128 bits. +- Get an integer representation of the replicated value. */ +- scalar_int_mode int_mode; +- if (BYTES_BIG_ENDIAN) +- /* For now, always use LD1RQ to load the value on big-endian +- targets, since the handling of smaller integers includes a +- subreg that is semantically an element reverse. */ +- int_mode = TImode; +- else ++ /* Get an integer representation of the repeating part of Advanced ++ SIMD vector VQ_SRC. This preserves the endianness of VQ_SRC, ++ which for big-endian targets is lane-swapped wrt a normal ++ Advanced SIMD vector. This means that for both endiannesses, ++ memory lane N of SVE vector SRC corresponds to architectural ++ lane N of a register holding VQ_SRC. This in turn means that ++ memory lane 0 of SVE vector SRC is in the lsb of VQ_SRC (viewed ++ as a single 128-bit value) and thus that memory lane 0 of SRC is ++ in the lsb of the integer. Duplicating the integer therefore ++ ensures that memory lane N of SRC goes into architectural lane ++ N + I * INDEX of the SVE register. */ ++ scalar_mode int_mode = int_mode_for_size (encoded_bits, 0).require (); ++ rtx elt_value = simplify_gen_subreg (int_mode, vq_src, vq_mode, 0); ++ if (elt_value) + { +- unsigned int int_bits = GET_MODE_UNIT_BITSIZE (mode) * npatterns; +- gcc_assert (int_bits <= 128); +- int_mode = int_mode_for_size (int_bits, 0).require (); ++ /* Pretend that we had a vector of INT_MODE to start with. */ ++ elt_mode = int_mode; ++ mode = aarch64_full_sve_mode (int_mode).require (); ++ ++ /* If the integer can be moved into a general register by a ++ single instruction, do that and duplicate the result. */ ++ if (CONST_INT_P (elt_value) ++ && aarch64_move_imm (INTVAL (elt_value), elt_mode)) ++ { ++ elt_value = force_reg (elt_mode, elt_value); ++ return expand_vector_broadcast (mode, elt_value); ++ } ++ } ++ else if (npatterns == 1) ++ /* We're duplicating a single value, but can't do better than ++ force it to memory and load from there. This handles things ++ like symbolic constants. */ ++ elt_value = CONST_VECTOR_ENCODED_ELT (src, 0); ++ ++ if (elt_value) ++ { ++ /* Load the element from memory if we can, otherwise move it into ++ a register and use a DUP. */ ++ rtx op = force_const_mem (elt_mode, elt_value); ++ if (!op) ++ op = force_reg (elt_mode, elt_value); ++ return expand_vector_broadcast (mode, op); + } +- rtx int_value = simplify_gen_subreg (int_mode, src, mode, 0); +- if (int_value +- && aarch64_expand_sve_widened_duplicate (dest, int_mode, int_value)) +- return; + } + ++ /* Try using INDEX. */ ++ rtx base, step; ++ if (const_vec_series_p (src, &base, &step)) ++ { ++ aarch64_expand_vec_series (target, base, step); ++ return target; ++ } ++ ++ /* From here on, it's better to force the whole constant to memory ++ if we can. */ ++ if (GET_MODE_NUNITS (mode).is_constant ()) ++ return NULL_RTX; ++ + /* Expand each pattern individually. */ ++ gcc_assert (npatterns > 1); + rtx_vector_builder builder; + auto_vec vectors (npatterns); + for (unsigned int i = 0; i < npatterns; ++i) +@@ -3267,22 +4184,263 @@ aarch64_expand_sve_const_vector (rtx dest, rtx src) + npatterns /= 2; + for (unsigned int i = 0; i < npatterns; ++i) + { +- rtx tmp = (npatterns == 1 ? dest : gen_reg_rtx (mode)); ++ rtx tmp = (npatterns == 1 ? target : gen_reg_rtx (mode)); + rtvec v = gen_rtvec (2, vectors[i], vectors[i + npatterns]); + emit_set_insn (tmp, gen_rtx_UNSPEC (mode, v, UNSPEC_ZIP1)); + vectors[i] = tmp; + } + } +- gcc_assert (vectors[0] == dest); ++ gcc_assert (vectors[0] == target); ++ return target; ++} ++ ++/* Use WHILE to set a predicate register of mode MODE in which the first ++ VL bits are set and the rest are clear. Use TARGET for the register ++ if it's nonnull and convenient. */ ++ ++static rtx ++aarch64_sve_move_pred_via_while (rtx target, machine_mode mode, ++ unsigned int vl) ++{ ++ rtx limit = force_reg (DImode, gen_int_mode (vl, DImode)); ++ target = aarch64_target_reg (target, mode); ++ emit_insn (gen_while (UNSPEC_WHILELO, DImode, mode, ++ target, const0_rtx, limit)); ++ return target; ++} ++ ++static rtx ++aarch64_expand_sve_const_pred_1 (rtx, rtx_vector_builder &, bool); ++ ++/* BUILDER is a constant predicate in which the index of every set bit ++ is a multiple of ELT_SIZE (which is <= 8). Try to load the constant ++ by inverting every element at a multiple of ELT_SIZE and EORing the ++ result with an ELT_SIZE PTRUE. ++ ++ Return a register that contains the constant on success, otherwise ++ return null. Use TARGET as the register if it is nonnull and ++ convenient. */ ++ ++static rtx ++aarch64_expand_sve_const_pred_eor (rtx target, rtx_vector_builder &builder, ++ unsigned int elt_size) ++{ ++ /* Invert every element at a multiple of ELT_SIZE, keeping the ++ other bits zero. */ ++ rtx_vector_builder inv_builder (VNx16BImode, builder.npatterns (), ++ builder.nelts_per_pattern ()); ++ for (unsigned int i = 0; i < builder.encoded_nelts (); ++i) ++ if ((i & (elt_size - 1)) == 0 && INTVAL (builder.elt (i)) == 0) ++ inv_builder.quick_push (const1_rtx); ++ else ++ inv_builder.quick_push (const0_rtx); ++ inv_builder.finalize (); ++ ++ /* See if we can load the constant cheaply. */ ++ rtx inv = aarch64_expand_sve_const_pred_1 (NULL_RTX, inv_builder, false); ++ if (!inv) ++ return NULL_RTX; ++ ++ /* EOR the result with an ELT_SIZE PTRUE. */ ++ rtx mask = aarch64_ptrue_all (elt_size); ++ mask = force_reg (VNx16BImode, mask); ++ target = aarch64_target_reg (target, VNx16BImode); ++ emit_insn (gen_aarch64_pred_z (XOR, VNx16BImode, target, mask, inv, mask)); ++ return target; ++} ++ ++/* BUILDER is a constant predicate in which the index of every set bit ++ is a multiple of ELT_SIZE (which is <= 8). Try to load the constant ++ using a TRN1 of size PERMUTE_SIZE, which is >= ELT_SIZE. Return the ++ register on success, otherwise return null. Use TARGET as the register ++ if nonnull and convenient. */ ++ ++static rtx ++aarch64_expand_sve_const_pred_trn (rtx target, rtx_vector_builder &builder, ++ unsigned int elt_size, ++ unsigned int permute_size) ++{ ++ /* We're going to split the constant into two new constants A and B, ++ with element I of BUILDER going into A if (I & PERMUTE_SIZE) == 0 ++ and into B otherwise. E.g. for PERMUTE_SIZE == 4 && ELT_SIZE == 1: ++ ++ A: { 0, 1, 2, 3, _, _, _, _, 8, 9, 10, 11, _, _, _, _ } ++ B: { 4, 5, 6, 7, _, _, _, _, 12, 13, 14, 15, _, _, _, _ } ++ ++ where _ indicates elements that will be discarded by the permute. ++ ++ First calculate the ELT_SIZEs for A and B. */ ++ unsigned int a_elt_size = GET_MODE_SIZE (DImode); ++ unsigned int b_elt_size = GET_MODE_SIZE (DImode); ++ for (unsigned int i = 0; i < builder.encoded_nelts (); i += elt_size) ++ if (INTVAL (builder.elt (i)) != 0) ++ { ++ if (i & permute_size) ++ b_elt_size |= i - permute_size; ++ else ++ a_elt_size |= i; ++ } ++ a_elt_size &= -a_elt_size; ++ b_elt_size &= -b_elt_size; ++ ++ /* Now construct the vectors themselves. */ ++ rtx_vector_builder a_builder (VNx16BImode, builder.npatterns (), ++ builder.nelts_per_pattern ()); ++ rtx_vector_builder b_builder (VNx16BImode, builder.npatterns (), ++ builder.nelts_per_pattern ()); ++ unsigned int nelts = builder.encoded_nelts (); ++ for (unsigned int i = 0; i < nelts; ++i) ++ if (i & (elt_size - 1)) ++ { ++ a_builder.quick_push (const0_rtx); ++ b_builder.quick_push (const0_rtx); ++ } ++ else if ((i & permute_size) == 0) ++ { ++ /* The A and B elements are significant. */ ++ a_builder.quick_push (builder.elt (i)); ++ b_builder.quick_push (builder.elt (i + permute_size)); ++ } ++ else ++ { ++ /* The A and B elements are going to be discarded, so pick whatever ++ is likely to give a nice constant. We are targeting element ++ sizes A_ELT_SIZE and B_ELT_SIZE for A and B respectively, ++ with the aim of each being a sequence of ones followed by ++ a sequence of zeros. So: ++ ++ * if X_ELT_SIZE <= PERMUTE_SIZE, the best approach is to ++ duplicate the last X_ELT_SIZE element, to extend the ++ current sequence of ones or zeros. ++ ++ * if X_ELT_SIZE > PERMUTE_SIZE, the best approach is to add a ++ zero, so that the constant really does have X_ELT_SIZE and ++ not a smaller size. */ ++ if (a_elt_size > permute_size) ++ a_builder.quick_push (const0_rtx); ++ else ++ a_builder.quick_push (a_builder.elt (i - a_elt_size)); ++ if (b_elt_size > permute_size) ++ b_builder.quick_push (const0_rtx); ++ else ++ b_builder.quick_push (b_builder.elt (i - b_elt_size)); ++ } ++ a_builder.finalize (); ++ b_builder.finalize (); ++ ++ /* Try loading A into a register. */ ++ rtx_insn *last = get_last_insn (); ++ rtx a = aarch64_expand_sve_const_pred_1 (NULL_RTX, a_builder, false); ++ if (!a) ++ return NULL_RTX; ++ ++ /* Try loading B into a register. */ ++ rtx b = a; ++ if (a_builder != b_builder) ++ { ++ b = aarch64_expand_sve_const_pred_1 (NULL_RTX, b_builder, false); ++ if (!b) ++ { ++ delete_insns_since (last); ++ return NULL_RTX; ++ } ++ } ++ ++ /* Emit the TRN1 itself. */ ++ machine_mode mode = aarch64_sve_pred_mode (permute_size).require (); ++ target = aarch64_target_reg (target, mode); ++ emit_insn (gen_aarch64_sve (UNSPEC_TRN1, mode, target, ++ gen_lowpart (mode, a), ++ gen_lowpart (mode, b))); ++ return target; ++} ++ ++/* Subroutine of aarch64_expand_sve_const_pred. Try to load the VNx16BI ++ constant in BUILDER into an SVE predicate register. Return the register ++ on success, otherwise return null. Use TARGET for the register if ++ nonnull and convenient. ++ ++ ALLOW_RECURSE_P is true if we can use methods that would call this ++ function recursively. */ ++ ++static rtx ++aarch64_expand_sve_const_pred_1 (rtx target, rtx_vector_builder &builder, ++ bool allow_recurse_p) ++{ ++ if (builder.encoded_nelts () == 1) ++ /* A PFALSE or a PTRUE .B ALL. */ ++ return aarch64_emit_set_immediate (target, builder); ++ ++ unsigned int elt_size = aarch64_widest_sve_pred_elt_size (builder); ++ if (int vl = aarch64_partial_ptrue_length (builder, elt_size)) ++ { ++ /* If we can load the constant using PTRUE, use it as-is. */ ++ machine_mode mode = aarch64_sve_pred_mode (elt_size).require (); ++ if (aarch64_svpattern_for_vl (mode, vl) != AARCH64_NUM_SVPATTERNS) ++ return aarch64_emit_set_immediate (target, builder); ++ ++ /* Otherwise use WHILE to set the first VL bits. */ ++ return aarch64_sve_move_pred_via_while (target, mode, vl); ++ } ++ ++ if (!allow_recurse_p) ++ return NULL_RTX; ++ ++ /* Try inverting the vector in element size ELT_SIZE and then EORing ++ the result with an ELT_SIZE PTRUE. */ ++ if (INTVAL (builder.elt (0)) == 0) ++ if (rtx res = aarch64_expand_sve_const_pred_eor (target, builder, ++ elt_size)) ++ return res; ++ ++ /* Try using TRN1 to permute two simpler constants. */ ++ for (unsigned int i = elt_size; i <= 8; i *= 2) ++ if (rtx res = aarch64_expand_sve_const_pred_trn (target, builder, ++ elt_size, i)) ++ return res; ++ ++ return NULL_RTX; + } + +-/* Set DEST to immediate IMM. For SVE vector modes, GEN_VEC_DUPLICATE +- is a pattern that can be used to set DEST to a replicated scalar +- element. */ ++/* Return an SVE predicate register that contains the VNx16BImode ++ constant in BUILDER, without going through the move expanders. ++ ++ The returned register can have whatever mode seems most natural ++ given the contents of BUILDER. Use TARGET for the result if ++ convenient. */ ++ ++static rtx ++aarch64_expand_sve_const_pred (rtx target, rtx_vector_builder &builder) ++{ ++ /* Try loading the constant using pure predicate operations. */ ++ if (rtx res = aarch64_expand_sve_const_pred_1 (target, builder, true)) ++ return res; ++ ++ /* Try forcing the constant to memory. */ ++ if (builder.full_nelts ().is_constant ()) ++ if (rtx mem = force_const_mem (VNx16BImode, builder.build ())) ++ { ++ target = aarch64_target_reg (target, VNx16BImode); ++ emit_move_insn (target, mem); ++ return target; ++ } ++ ++ /* The last resort is to load the constant as an integer and then ++ compare it against zero. Use -1 for set bits in order to increase ++ the changes of using SVE DUPM or an Advanced SIMD byte mask. */ ++ rtx_vector_builder int_builder (VNx16QImode, builder.npatterns (), ++ builder.nelts_per_pattern ()); ++ for (unsigned int i = 0; i < builder.encoded_nelts (); ++i) ++ int_builder.quick_push (INTVAL (builder.elt (i)) ++ ? constm1_rtx : const0_rtx); ++ return aarch64_convert_sve_data_to_pred (target, VNx16BImode, ++ int_builder.build ()); ++} ++ ++/* Set DEST to immediate IMM. */ + + void +-aarch64_expand_mov_immediate (rtx dest, rtx imm, +- rtx (*gen_vec_duplicate) (rtx, rtx)) ++aarch64_expand_mov_immediate (rtx dest, rtx imm) + { + machine_mode mode = GET_MODE (dest); + +@@ -3405,38 +4563,50 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm, + + if (!CONST_INT_P (imm)) + { +- rtx base, step, value; +- if (GET_CODE (imm) == HIGH +- || aarch64_simd_valid_immediate (imm, NULL)) +- emit_insn (gen_rtx_SET (dest, imm)); +- else if (const_vec_series_p (imm, &base, &step)) +- aarch64_expand_vec_series (dest, base, step); +- else if (const_vec_duplicate_p (imm, &value)) ++ if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL) + { +- /* If the constant is out of range of an SVE vector move, +- load it from memory if we can, otherwise move it into +- a register and use a DUP. */ +- scalar_mode inner_mode = GET_MODE_INNER (mode); +- rtx op = force_const_mem (inner_mode, value); +- if (!op) +- op = force_reg (inner_mode, value); +- else if (!aarch64_sve_ld1r_operand_p (op)) ++ /* Only the low bit of each .H, .S and .D element is defined, ++ so we can set the upper bits to whatever we like. If the ++ predicate is all-true in MODE, prefer to set all the undefined ++ bits as well, so that we can share a single .B predicate for ++ all modes. */ ++ if (imm == CONSTM1_RTX (mode)) ++ imm = CONSTM1_RTX (VNx16BImode); ++ ++ /* All methods for constructing predicate modes wider than VNx16BI ++ will set the upper bits of each element to zero. Expose this ++ by moving such constants as a VNx16BI, so that all bits are ++ significant and so that constants for different modes can be ++ shared. The wider constant will still be available as a ++ REG_EQUAL note. */ ++ rtx_vector_builder builder; ++ if (aarch64_get_sve_pred_bits (builder, imm)) + { +- rtx addr = force_reg (Pmode, XEXP (op, 0)); +- op = replace_equiv_address (op, addr); ++ rtx res = aarch64_expand_sve_const_pred (dest, builder); ++ if (dest != res) ++ emit_move_insn (dest, gen_lowpart (mode, res)); ++ return; + } +- emit_insn (gen_vec_duplicate (dest, op)); + } +- else if (GET_CODE (imm) == CONST_VECTOR +- && !GET_MODE_NUNITS (GET_MODE (imm)).is_constant ()) +- aarch64_expand_sve_const_vector (dest, imm); +- else ++ ++ if (GET_CODE (imm) == HIGH ++ || aarch64_simd_valid_immediate (imm, NULL)) + { +- rtx mem = force_const_mem (mode, imm); +- gcc_assert (mem); +- emit_move_insn (dest, mem); ++ emit_insn (gen_rtx_SET (dest, imm)); ++ return; + } + ++ if (GET_CODE (imm) == CONST_VECTOR && aarch64_sve_data_mode_p (mode)) ++ if (rtx res = aarch64_expand_sve_const_vector (dest, imm)) ++ { ++ if (dest != res) ++ emit_insn (gen_aarch64_sve_reinterpret (mode, dest, res)); ++ return; ++ } ++ ++ rtx mem = force_const_mem (mode, imm); ++ gcc_assert (mem); ++ emit_move_insn (dest, mem); + return; + } + +@@ -3455,6 +4625,7 @@ aarch64_emit_sve_pred_move (rtx dest, rtx pred, rtx src) + create_output_operand (&ops[0], dest, mode); + create_input_operand (&ops[1], pred, GET_MODE(pred)); + create_input_operand (&ops[2], src, mode); ++ temporary_volatile_ok v (true); + expand_insn (code_for_aarch64_pred_mov (mode), 3, ops); + } + +@@ -3471,7 +4642,7 @@ void + aarch64_expand_sve_mem_move (rtx dest, rtx src, machine_mode pred_mode) + { + machine_mode mode = GET_MODE (dest); +- rtx ptrue = force_reg (pred_mode, CONSTM1_RTX (pred_mode)); ++ rtx ptrue = aarch64_ptrue_reg (pred_mode); + if (!register_operand (src, mode) + && !register_operand (dest, mode)) + { +@@ -3535,7 +4706,7 @@ aarch64_maybe_expand_sve_subreg_move (rtx dest, rtx src) + return false; + + /* Generate *aarch64_sve_mov_subreg_be. */ +- rtx ptrue = force_reg (VNx16BImode, CONSTM1_RTX (VNx16BImode)); ++ rtx ptrue = aarch64_ptrue_reg (VNx16BImode); + rtx unspec = gen_rtx_UNSPEC (GET_MODE (dest), gen_rtvec (2, ptrue, src), + UNSPEC_REV_SUBREG); + emit_insn (gen_rtx_SET (dest, unspec)); +@@ -3557,14 +4728,29 @@ aarch64_replace_reg_mode (rtx x, machine_mode mode) + return x; + } + ++/* Return the SVE REV[BHW] unspec for reversing quantites of mode MODE ++ stored in wider integer containers. */ ++ ++static unsigned int ++aarch64_sve_rev_unspec (machine_mode mode) ++{ ++ switch (GET_MODE_UNIT_SIZE (mode)) ++ { ++ case 1: return UNSPEC_REVB; ++ case 2: return UNSPEC_REVH; ++ case 4: return UNSPEC_REVW; ++ } ++ gcc_unreachable (); ++} ++ + /* Split a *aarch64_sve_mov_subreg_be pattern with the given + operands. */ + + void + aarch64_split_sve_subreg_move (rtx dest, rtx ptrue, rtx src) + { +- /* Decide which REV operation we need. The mode with narrower elements +- determines the mode of the operands and the mode with the wider ++ /* Decide which REV operation we need. The mode with wider elements ++ determines the mode of the operands and the mode with the narrower + elements determines the reverse width. */ + machine_mode mode_with_wider_elts = GET_MODE (dest); + machine_mode mode_with_narrower_elts = GET_MODE (src); +@@ -3572,38 +4758,22 @@ aarch64_split_sve_subreg_move (rtx dest, rtx ptrue, rtx src) + < GET_MODE_UNIT_SIZE (mode_with_narrower_elts)) + std::swap (mode_with_wider_elts, mode_with_narrower_elts); + ++ unsigned int unspec = aarch64_sve_rev_unspec (mode_with_narrower_elts); + unsigned int wider_bytes = GET_MODE_UNIT_SIZE (mode_with_wider_elts); +- unsigned int unspec; +- if (wider_bytes == 8) +- unspec = UNSPEC_REV64; +- else if (wider_bytes == 4) +- unspec = UNSPEC_REV32; +- else if (wider_bytes == 2) +- unspec = UNSPEC_REV16; +- else +- gcc_unreachable (); + machine_mode pred_mode = aarch64_sve_pred_mode (wider_bytes).require (); + +- /* Emit: +- +- (set DEST (unspec [PTRUE (unspec [SRC] UNSPEC_REV)] +- UNSPEC_MERGE_PTRUE)) +- +- with the appropriate modes. */ ++ /* Get the operands in the appropriate modes and emit the instruction. */ + ptrue = gen_lowpart (pred_mode, ptrue); +- dest = aarch64_replace_reg_mode (dest, mode_with_narrower_elts); +- src = aarch64_replace_reg_mode (src, mode_with_narrower_elts); +- src = gen_rtx_UNSPEC (mode_with_narrower_elts, gen_rtvec (1, src), unspec); +- src = gen_rtx_UNSPEC (mode_with_narrower_elts, gen_rtvec (2, ptrue, src), +- UNSPEC_MERGE_PTRUE); +- emit_insn (gen_rtx_SET (dest, src)); ++ dest = aarch64_replace_reg_mode (dest, mode_with_wider_elts); ++ src = aarch64_replace_reg_mode (src, mode_with_wider_elts); ++ emit_insn (gen_aarch64_pred (unspec, mode_with_wider_elts, ++ dest, ptrue, src)); + } + + static bool +-aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED, +- tree exp ATTRIBUTE_UNUSED) ++aarch64_function_ok_for_sibcall (tree, tree exp) + { +- if (aarch64_simd_decl_p (cfun->decl) != aarch64_simd_decl_p (decl)) ++ if (crtl->abi->id () != expr_callee_abi (exp).id ()) + return false; + + return true; +@@ -3612,35 +4782,48 @@ aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED, + /* Implement TARGET_PASS_BY_REFERENCE. */ + + static bool +-aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED, +- machine_mode mode, +- const_tree type, +- bool named ATTRIBUTE_UNUSED) ++aarch64_pass_by_reference (cumulative_args_t pcum_v, ++ const function_arg_info &arg) + { ++ CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v); + HOST_WIDE_INT size; + machine_mode dummymode; + int nregs; + ++ unsigned int num_zr, num_pr; ++ if (arg.type && aarch64_sve_argument_p (arg.type, &num_zr, &num_pr)) ++ { ++ if (pcum && !pcum->silent_p && !TARGET_SVE) ++ /* We can't gracefully recover at this point, so make this a ++ fatal error. */ ++ fatal_error (input_location, "arguments of type %qT require" ++ " the SVE ISA extension", arg.type); ++ ++ /* Variadic SVE types are passed by reference. Normal non-variadic ++ arguments are too if we've run out of registers. */ ++ return (!arg.named ++ || pcum->aapcs_nvrn + num_zr > NUM_FP_ARG_REGS ++ || pcum->aapcs_nprn + num_pr > NUM_PR_ARG_REGS); ++ } ++ + /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */ +- if (mode == BLKmode && type) +- size = int_size_in_bytes (type); ++ if (arg.mode == BLKmode && arg.type) ++ size = int_size_in_bytes (arg.type); + else + /* No frontends can create types with variable-sized modes, so we + shouldn't be asked to pass or return them. */ +- size = GET_MODE_SIZE (mode).to_constant (); ++ size = GET_MODE_SIZE (arg.mode).to_constant (); + + /* Aggregates are passed by reference based on their size. */ +- if (type && AGGREGATE_TYPE_P (type)) +- { +- size = int_size_in_bytes (type); +- } ++ if (arg.aggregate_type_p ()) ++ size = int_size_in_bytes (arg.type); + + /* Variable sized arguments are always returned by reference. */ + if (size < 0) + return true; + + /* Can this be a candidate to be passed in fp/simd register(s)? */ +- if (aarch64_vfp_is_call_or_return_candidate (mode, type, ++ if (aarch64_vfp_is_call_or_return_candidate (arg.mode, arg.type, + &dummymode, &nregs, + NULL)) + return false; +@@ -3696,6 +4879,29 @@ aarch64_function_value (const_tree type, const_tree func, + if (INTEGRAL_TYPE_P (type)) + mode = promote_function_mode (type, mode, &unsignedp, func, 1); + ++ unsigned int num_zr, num_pr; ++ if (type && aarch64_sve_argument_p (type, &num_zr, &num_pr)) ++ { ++ /* Don't raise an error here if we're called when SVE is disabled, ++ since this is really just a query function. Other code must ++ do that where appropriate. */ ++ mode = TYPE_MODE_RAW (type); ++ gcc_assert (VECTOR_MODE_P (mode) ++ && (!TARGET_SVE || aarch64_sve_mode_p (mode))); ++ ++ if (num_zr > 0 && num_pr == 0) ++ return gen_rtx_REG (mode, V0_REGNUM); ++ ++ if (num_zr == 0 && num_pr == 1) ++ return gen_rtx_REG (mode, P0_REGNUM); ++ ++ gcc_unreachable (); ++ } ++ ++ /* Generic vectors that map to SVE modes with -msve-vector-bits=N are ++ returned in memory, not by value. */ ++ gcc_assert (!aarch64_sve_mode_p (mode)); ++ + if (aarch64_return_in_msb (type)) + { + HOST_WIDE_INT size = int_size_in_bytes (type); +@@ -3778,6 +4984,16 @@ aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED) + /* Simple scalar types always returned in registers. */ + return false; + ++ unsigned int num_zr, num_pr; ++ if (type && aarch64_sve_argument_p (type, &num_zr, &num_pr)) ++ { ++ /* All SVE types we support fit in registers. For example, it isn't ++ yet possible to define an aggregate of 9+ SVE vectors or 5+ SVE ++ predicates. */ ++ gcc_assert (num_zr <= NUM_FP_ARG_REGS && num_pr <= NUM_PR_ARG_REGS); ++ return false; ++ } ++ + if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type), + type, + &ag_mode, +@@ -3853,11 +5069,11 @@ aarch64_function_arg_alignment (machine_mode mode, const_tree type, + numbers refer to the rule numbers in the AAPCS64. */ + + static void +-aarch64_layout_arg (cumulative_args_t pcum_v, machine_mode mode, +- const_tree type, +- bool named ATTRIBUTE_UNUSED) ++aarch64_layout_arg (cumulative_args_t pcum_v, const function_arg_info &arg) + { + CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v); ++ tree type = arg.type; ++ machine_mode mode = arg.mode; + int ncrn, nvrn, nregs; + bool allocate_ncrn, allocate_nvrn; + HOST_WIDE_INT size; +@@ -3869,6 +5085,46 @@ aarch64_layout_arg (cumulative_args_t pcum_v, machine_mode mode, + + pcum->aapcs_arg_processed = true; + ++ unsigned int num_zr, num_pr; ++ if (type && aarch64_sve_argument_p (type, &num_zr, &num_pr)) ++ { ++ /* The PCS says that it is invalid to pass an SVE value to an ++ unprototyped function. There is no ABI-defined location we ++ can return in this case, so we have no real choice but to raise ++ an error immediately, even though this is only a query function. */ ++ if (arg.named && pcum->pcs_variant != ARM_PCS_SVE) ++ { ++ gcc_assert (!pcum->silent_p); ++ error ("SVE type %qT cannot be passed to an unprototyped function", ++ arg.type); ++ /* Avoid repeating the message, and avoid tripping the assert ++ below. */ ++ pcum->pcs_variant = ARM_PCS_SVE; ++ } ++ ++ /* We would have converted the argument into pass-by-reference ++ form if it didn't fit in registers. */ ++ pcum->aapcs_nextnvrn = pcum->aapcs_nvrn + num_zr; ++ pcum->aapcs_nextnprn = pcum->aapcs_nprn + num_pr; ++ gcc_assert (arg.named ++ && pcum->pcs_variant == ARM_PCS_SVE ++ && aarch64_sve_mode_p (mode) ++ && pcum->aapcs_nextnvrn <= NUM_FP_ARG_REGS ++ && pcum->aapcs_nextnprn <= NUM_PR_ARG_REGS); ++ ++ if (num_zr > 0 && num_pr == 0) ++ pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + pcum->aapcs_nvrn); ++ else if (num_zr == 0 && num_pr == 1) ++ pcum->aapcs_reg = gen_rtx_REG (mode, P0_REGNUM + pcum->aapcs_nprn); ++ else ++ gcc_unreachable (); ++ return; ++ } ++ ++ /* Generic vectors that map to SVE modes with -msve-vector-bits=N are ++ passed by reference, not by value. */ ++ gcc_assert (!aarch64_sve_mode_p (mode)); ++ + /* Size in bytes, rounded to the nearest multiple of 8 bytes. */ + if (type) + size = int_size_in_bytes (type); +@@ -3893,7 +5149,7 @@ aarch64_layout_arg (cumulative_args_t pcum_v, machine_mode mode, + and homogenous short-vector aggregates (HVA). */ + if (allocate_nvrn) + { +- if (!TARGET_FLOAT) ++ if (!pcum->silent_p && !TARGET_FLOAT) + aarch64_err_no_fpadvsimd (mode); + + if (nvrn + nregs <= NUM_FP_ARG_REGS) +@@ -4009,37 +5265,46 @@ on_stack: + /* Implement TARGET_FUNCTION_ARG. */ + + static rtx +-aarch64_function_arg (cumulative_args_t pcum_v, machine_mode mode, +- const_tree type, bool named) ++aarch64_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg) + { + CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v); +- gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64); ++ gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64 ++ || pcum->pcs_variant == ARM_PCS_SIMD ++ || pcum->pcs_variant == ARM_PCS_SVE); + +- if (mode == VOIDmode) +- return NULL_RTX; ++ if (arg.end_marker_p ()) ++ return gen_int_mode (pcum->pcs_variant, DImode); + +- aarch64_layout_arg (pcum_v, mode, type, named); ++ aarch64_layout_arg (pcum_v, arg); + return pcum->aapcs_reg; + } + + void + aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum, +- const_tree fntype ATTRIBUTE_UNUSED, +- rtx libname ATTRIBUTE_UNUSED, +- const_tree fndecl ATTRIBUTE_UNUSED, +- unsigned n_named ATTRIBUTE_UNUSED) ++ const_tree fntype, ++ rtx libname ATTRIBUTE_UNUSED, ++ const_tree fndecl ATTRIBUTE_UNUSED, ++ unsigned n_named ATTRIBUTE_UNUSED, ++ bool silent_p) + { + pcum->aapcs_ncrn = 0; + pcum->aapcs_nvrn = 0; ++ pcum->aapcs_nprn = 0; + pcum->aapcs_nextncrn = 0; + pcum->aapcs_nextnvrn = 0; +- pcum->pcs_variant = ARM_PCS_AAPCS64; ++ pcum->aapcs_nextnprn = 0; ++ if (fntype) ++ pcum->pcs_variant = (arm_pcs) fntype_abi (fntype).id (); ++ else ++ pcum->pcs_variant = ARM_PCS_AAPCS64; + pcum->aapcs_reg = NULL_RTX; + pcum->aapcs_arg_processed = false; + pcum->aapcs_stack_words = 0; + pcum->aapcs_stack_size = 0; ++ pcum->silent_p = silent_p; + +- if (!TARGET_FLOAT ++ if (!silent_p ++ && !TARGET_FLOAT + && fndecl && TREE_PUBLIC (fndecl) + && fntype && fntype != error_mark_node) + { +@@ -4050,24 +5315,38 @@ aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum, + &mode, &nregs, NULL)) + aarch64_err_no_fpadvsimd (TYPE_MODE (type)); + } +- return; ++ ++ if (!silent_p ++ && !TARGET_SVE ++ && pcum->pcs_variant == ARM_PCS_SVE) ++ { ++ /* We can't gracefully recover at this point, so make this a ++ fatal error. */ ++ if (fndecl) ++ fatal_error (input_location, "%qE requires the SVE ISA extension", ++ fndecl); ++ else ++ fatal_error (input_location, "calls to functions of type %qT require" ++ " the SVE ISA extension", fntype); ++ } + } + + static void + aarch64_function_arg_advance (cumulative_args_t pcum_v, +- machine_mode mode, +- const_tree type, +- bool named) ++ const function_arg_info &arg) + { + CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v); +- if (pcum->pcs_variant == ARM_PCS_AAPCS64) ++ if (pcum->pcs_variant == ARM_PCS_AAPCS64 ++ || pcum->pcs_variant == ARM_PCS_SIMD ++ || pcum->pcs_variant == ARM_PCS_SVE) + { +- aarch64_layout_arg (pcum_v, mode, type, named); ++ aarch64_layout_arg (pcum_v, arg); + gcc_assert ((pcum->aapcs_reg != NULL_RTX) + != (pcum->aapcs_stack_words != 0)); + pcum->aapcs_arg_processed = false; + pcum->aapcs_ncrn = pcum->aapcs_nextncrn; + pcum->aapcs_nvrn = pcum->aapcs_nextnvrn; ++ pcum->aapcs_nprn = pcum->aapcs_nextnprn; + pcum->aapcs_stack_size += pcum->aapcs_stack_words; + pcum->aapcs_stack_words = 0; + pcum->aapcs_reg = NULL_RTX; +@@ -4500,11 +5779,14 @@ aarch64_needs_frame_chain (void) + static void + aarch64_layout_frame (void) + { +- HOST_WIDE_INT offset = 0; ++ poly_int64 offset = 0; + int regno, last_fp_reg = INVALID_REGNUM; +- bool simd_function = aarch64_simd_decl_p (cfun->decl); ++ machine_mode vector_save_mode = aarch64_reg_save_mode (V8_REGNUM); ++ poly_int64 vector_save_size = GET_MODE_SIZE (vector_save_mode); ++ bool frame_related_fp_reg_p = false; ++ aarch64_frame &frame = cfun->machine->frame; + +- cfun->machine->frame.emit_frame_chain = aarch64_needs_frame_chain (); ++ frame.emit_frame_chain = aarch64_needs_frame_chain (); + + /* Adjust the outgoing arguments size if required. Keep it in sync with what + the mid-end is doing. */ +@@ -4513,184 +5795,264 @@ aarch64_layout_frame (void) + #define SLOT_NOT_REQUIRED (-2) + #define SLOT_REQUIRED (-1) + +- cfun->machine->frame.wb_candidate1 = INVALID_REGNUM; +- cfun->machine->frame.wb_candidate2 = INVALID_REGNUM; +- +- /* If this is a non-leaf simd function with calls we assume that +- at least one of those calls is to a non-simd function and thus +- we must save V8 to V23 in the prologue. */ +- +- if (simd_function && !crtl->is_leaf) +- { +- for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) +- if (FP_SIMD_SAVED_REGNUM_P (regno)) +- df_set_regs_ever_live (regno, true); +- } ++ frame.wb_candidate1 = INVALID_REGNUM; ++ frame.wb_candidate2 = INVALID_REGNUM; ++ frame.spare_pred_reg = INVALID_REGNUM; + + /* First mark all the registers that really need to be saved... */ +- for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++) +- cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED; +- +- for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) +- cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED; ++ for (regno = 0; regno <= LAST_SAVED_REGNUM; regno++) ++ frame.reg_offset[regno] = SLOT_NOT_REQUIRED; + + /* ... that includes the eh data registers (if needed)... */ + if (crtl->calls_eh_return) + for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++) +- cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] +- = SLOT_REQUIRED; ++ frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = SLOT_REQUIRED; + + /* ... and any callee saved register that dataflow says is live. */ + for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++) + if (df_regs_ever_live_p (regno) ++ && !fixed_regs[regno] + && (regno == R30_REGNUM +- || !call_used_regs[regno])) +- cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED; ++ || !crtl->abi->clobbers_full_reg_p (regno))) ++ frame.reg_offset[regno] = SLOT_REQUIRED; + + for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) + if (df_regs_ever_live_p (regno) +- && (!call_used_regs[regno] +- || (simd_function && FP_SIMD_SAVED_REGNUM_P (regno)))) ++ && !fixed_regs[regno] ++ && !crtl->abi->clobbers_full_reg_p (regno)) + { +- cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED; ++ frame.reg_offset[regno] = SLOT_REQUIRED; + last_fp_reg = regno; ++ if (aarch64_emit_cfi_for_reg_p (regno)) ++ frame_related_fp_reg_p = true; + } + +- if (cfun->machine->frame.emit_frame_chain) +- { +- /* FP and LR are placed in the linkage record. */ +- cfun->machine->frame.reg_offset[R29_REGNUM] = 0; +- cfun->machine->frame.wb_candidate1 = R29_REGNUM; +- cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD; +- cfun->machine->frame.wb_candidate2 = R30_REGNUM; +- offset = 2 * UNITS_PER_WORD; ++ /* Big-endian SVE frames need a spare predicate register in order ++ to save Z8-Z15. Decide which register they should use. Prefer ++ an unused argument register if possible, so that we don't force P4 ++ to be saved unnecessarily. */ ++ if (frame_related_fp_reg_p ++ && crtl->abi->id () == ARM_PCS_SVE ++ && BYTES_BIG_ENDIAN) ++ { ++ bitmap live1 = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)); ++ bitmap live2 = df_get_live_in (EXIT_BLOCK_PTR_FOR_FN (cfun)); ++ for (regno = P0_REGNUM; regno <= P7_REGNUM; regno++) ++ if (!bitmap_bit_p (live1, regno) && !bitmap_bit_p (live2, regno)) ++ break; ++ gcc_assert (regno <= P7_REGNUM); ++ frame.spare_pred_reg = regno; ++ df_set_regs_ever_live (regno, true); + } + ++ for (regno = P0_REGNUM; regno <= P15_REGNUM; regno++) ++ if (df_regs_ever_live_p (regno) ++ && !fixed_regs[regno] ++ && !crtl->abi->clobbers_full_reg_p (regno)) ++ frame.reg_offset[regno] = SLOT_REQUIRED; ++ + /* With stack-clash, LR must be saved in non-leaf functions. */ + gcc_assert (crtl->is_leaf +- || (cfun->machine->frame.reg_offset[R30_REGNUM] +- != SLOT_NOT_REQUIRED)); ++ || maybe_ne (frame.reg_offset[R30_REGNUM], SLOT_NOT_REQUIRED)); ++ ++ /* Now assign stack slots for the registers. Start with the predicate ++ registers, since predicate LDR and STR have a relatively small ++ offset range. These saves happen below the hard frame pointer. */ ++ for (regno = P0_REGNUM; regno <= P15_REGNUM; regno++) ++ if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED)) ++ { ++ frame.reg_offset[regno] = offset; ++ offset += BYTES_PER_SVE_PRED; ++ } ++ ++ /* We save a maximum of 8 predicate registers, and since vector ++ registers are 8 times the size of a predicate register, all the ++ saved predicates fit within a single vector. Doing this also ++ rounds the offset to a 128-bit boundary. */ ++ if (maybe_ne (offset, 0)) ++ { ++ gcc_assert (known_le (offset, vector_save_size)); ++ offset = vector_save_size; ++ } ++ ++ /* If we need to save any SVE vector registers, add them next. */ ++ if (last_fp_reg != (int) INVALID_REGNUM && crtl->abi->id () == ARM_PCS_SVE) ++ for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) ++ if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED)) ++ { ++ frame.reg_offset[regno] = offset; ++ offset += vector_save_size; ++ } ++ ++ /* OFFSET is now the offset of the hard frame pointer from the bottom ++ of the callee save area. */ ++ bool saves_below_hard_fp_p = maybe_ne (offset, 0); ++ frame.below_hard_fp_saved_regs_size = offset; ++ if (frame.emit_frame_chain) ++ { ++ /* FP and LR are placed in the linkage record. */ ++ frame.reg_offset[R29_REGNUM] = offset; ++ frame.wb_candidate1 = R29_REGNUM; ++ frame.reg_offset[R30_REGNUM] = offset + UNITS_PER_WORD; ++ frame.wb_candidate2 = R30_REGNUM; ++ offset += 2 * UNITS_PER_WORD; ++ } + +- /* Now assign stack slots for them. */ + for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++) +- if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED) ++ if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED)) + { +- cfun->machine->frame.reg_offset[regno] = offset; +- if (cfun->machine->frame.wb_candidate1 == INVALID_REGNUM) +- cfun->machine->frame.wb_candidate1 = regno; +- else if (cfun->machine->frame.wb_candidate2 == INVALID_REGNUM) +- cfun->machine->frame.wb_candidate2 = regno; ++ frame.reg_offset[regno] = offset; ++ if (frame.wb_candidate1 == INVALID_REGNUM) ++ frame.wb_candidate1 = regno; ++ else if (frame.wb_candidate2 == INVALID_REGNUM) ++ frame.wb_candidate2 = regno; + offset += UNITS_PER_WORD; + } + +- HOST_WIDE_INT max_int_offset = offset; +- offset = ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT); +- bool has_align_gap = offset != max_int_offset; ++ poly_int64 max_int_offset = offset; ++ offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); ++ bool has_align_gap = maybe_ne (offset, max_int_offset); + + for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) +- if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED) ++ if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED)) + { + /* If there is an alignment gap between integer and fp callee-saves, + allocate the last fp register to it if possible. */ + if (regno == last_fp_reg + && has_align_gap +- && !simd_function +- && (offset & 8) == 0) ++ && known_eq (vector_save_size, 8) ++ && multiple_p (offset, 16)) + { +- cfun->machine->frame.reg_offset[regno] = max_int_offset; ++ frame.reg_offset[regno] = max_int_offset; + break; + } + +- cfun->machine->frame.reg_offset[regno] = offset; +- if (cfun->machine->frame.wb_candidate1 == INVALID_REGNUM) +- cfun->machine->frame.wb_candidate1 = regno; +- else if (cfun->machine->frame.wb_candidate2 == INVALID_REGNUM +- && cfun->machine->frame.wb_candidate1 >= V0_REGNUM) +- cfun->machine->frame.wb_candidate2 = regno; +- offset += simd_function ? UNITS_PER_VREG : UNITS_PER_WORD; ++ frame.reg_offset[regno] = offset; ++ if (frame.wb_candidate1 == INVALID_REGNUM) ++ frame.wb_candidate1 = regno; ++ else if (frame.wb_candidate2 == INVALID_REGNUM ++ && frame.wb_candidate1 >= V0_REGNUM) ++ frame.wb_candidate2 = regno; ++ offset += vector_save_size; + } + +- offset = ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT); ++ offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); + +- cfun->machine->frame.saved_regs_size = offset; ++ frame.saved_regs_size = offset; + +- HOST_WIDE_INT varargs_and_saved_regs_size +- = offset + cfun->machine->frame.saved_varargs_size; ++ poly_int64 varargs_and_saved_regs_size = offset + frame.saved_varargs_size; + +- cfun->machine->frame.hard_fp_offset ++ poly_int64 above_outgoing_args + = aligned_upper_bound (varargs_and_saved_regs_size + + get_frame_size (), + STACK_BOUNDARY / BITS_PER_UNIT); + ++ frame.hard_fp_offset ++ = above_outgoing_args - frame.below_hard_fp_saved_regs_size; ++ + /* Both these values are already aligned. */ + gcc_assert (multiple_p (crtl->outgoing_args_size, + STACK_BOUNDARY / BITS_PER_UNIT)); +- cfun->machine->frame.frame_size +- = (cfun->machine->frame.hard_fp_offset +- + crtl->outgoing_args_size); ++ frame.frame_size = above_outgoing_args + crtl->outgoing_args_size; + +- cfun->machine->frame.locals_offset = cfun->machine->frame.saved_varargs_size; ++ frame.locals_offset = frame.saved_varargs_size; + +- cfun->machine->frame.initial_adjust = 0; +- cfun->machine->frame.final_adjust = 0; +- cfun->machine->frame.callee_adjust = 0; +- cfun->machine->frame.callee_offset = 0; ++ frame.initial_adjust = 0; ++ frame.final_adjust = 0; ++ frame.callee_adjust = 0; ++ frame.sve_callee_adjust = 0; ++ frame.callee_offset = 0; + + HOST_WIDE_INT max_push_offset = 0; +- if (cfun->machine->frame.wb_candidate2 != INVALID_REGNUM) ++ if (frame.wb_candidate2 != INVALID_REGNUM) + max_push_offset = 512; +- else if (cfun->machine->frame.wb_candidate1 != INVALID_REGNUM) ++ else if (frame.wb_candidate1 != INVALID_REGNUM) + max_push_offset = 256; + +- HOST_WIDE_INT const_size, const_fp_offset; +- if (cfun->machine->frame.frame_size.is_constant (&const_size) ++ HOST_WIDE_INT const_size, const_outgoing_args_size, const_fp_offset; ++ HOST_WIDE_INT const_saved_regs_size; ++ if (frame.frame_size.is_constant (&const_size) + && const_size < max_push_offset +- && known_eq (crtl->outgoing_args_size, 0)) ++ && known_eq (frame.hard_fp_offset, const_size)) + { + /* Simple, small frame with no outgoing arguments: ++ + stp reg1, reg2, [sp, -frame_size]! + stp reg3, reg4, [sp, 16] */ +- cfun->machine->frame.callee_adjust = const_size; +- } +- else if (known_lt (crtl->outgoing_args_size +- + cfun->machine->frame.saved_regs_size, 512) ++ frame.callee_adjust = const_size; ++ } ++ else if (crtl->outgoing_args_size.is_constant (&const_outgoing_args_size) ++ && frame.saved_regs_size.is_constant (&const_saved_regs_size) ++ && const_outgoing_args_size + const_saved_regs_size < 512 ++ /* We could handle this case even with outgoing args, provided ++ that the number of args left us with valid offsets for all ++ predicate and vector save slots. It's such a rare case that ++ it hardly seems worth the effort though. */ ++ && (!saves_below_hard_fp_p || const_outgoing_args_size == 0) + && !(cfun->calls_alloca +- && known_lt (cfun->machine->frame.hard_fp_offset, +- max_push_offset))) ++ && frame.hard_fp_offset.is_constant (&const_fp_offset) ++ && const_fp_offset < max_push_offset)) + { + /* Frame with small outgoing arguments: ++ + sub sp, sp, frame_size + stp reg1, reg2, [sp, outgoing_args_size] + stp reg3, reg4, [sp, outgoing_args_size + 16] */ +- cfun->machine->frame.initial_adjust = cfun->machine->frame.frame_size; +- cfun->machine->frame.callee_offset +- = cfun->machine->frame.frame_size - cfun->machine->frame.hard_fp_offset; ++ frame.initial_adjust = frame.frame_size; ++ frame.callee_offset = const_outgoing_args_size; + } +- else if (cfun->machine->frame.hard_fp_offset.is_constant (&const_fp_offset) ++ else if (saves_below_hard_fp_p ++ && known_eq (frame.saved_regs_size, ++ frame.below_hard_fp_saved_regs_size)) ++ { ++ /* Frame in which all saves are SVE saves: ++ ++ sub sp, sp, hard_fp_offset + below_hard_fp_saved_regs_size ++ save SVE registers relative to SP ++ sub sp, sp, outgoing_args_size */ ++ frame.initial_adjust = (frame.hard_fp_offset ++ + frame.below_hard_fp_saved_regs_size); ++ frame.final_adjust = crtl->outgoing_args_size; ++ } ++ else if (frame.hard_fp_offset.is_constant (&const_fp_offset) + && const_fp_offset < max_push_offset) + { +- /* Frame with large outgoing arguments but a small local area: ++ /* Frame with large outgoing arguments or SVE saves, but with ++ a small local area: ++ + stp reg1, reg2, [sp, -hard_fp_offset]! + stp reg3, reg4, [sp, 16] ++ [sub sp, sp, below_hard_fp_saved_regs_size] ++ [save SVE registers relative to SP] + sub sp, sp, outgoing_args_size */ +- cfun->machine->frame.callee_adjust = const_fp_offset; +- cfun->machine->frame.final_adjust +- = cfun->machine->frame.frame_size - cfun->machine->frame.callee_adjust; ++ frame.callee_adjust = const_fp_offset; ++ frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size; ++ frame.final_adjust = crtl->outgoing_args_size; + } + else + { +- /* Frame with large local area and outgoing arguments using frame pointer: ++ /* Frame with large local area and outgoing arguments or SVE saves, ++ using frame pointer: ++ + sub sp, sp, hard_fp_offset + stp x29, x30, [sp, 0] + add x29, sp, 0 + stp reg3, reg4, [sp, 16] ++ [sub sp, sp, below_hard_fp_saved_regs_size] ++ [save SVE registers relative to SP] + sub sp, sp, outgoing_args_size */ +- cfun->machine->frame.initial_adjust = cfun->machine->frame.hard_fp_offset; +- cfun->machine->frame.final_adjust +- = cfun->machine->frame.frame_size - cfun->machine->frame.initial_adjust; ++ frame.initial_adjust = frame.hard_fp_offset; ++ frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size; ++ frame.final_adjust = crtl->outgoing_args_size; + } + +- cfun->machine->frame.laid_out = true; ++ /* Make sure the individual adjustments add up to the full frame size. */ ++ gcc_assert (known_eq (frame.initial_adjust ++ + frame.callee_adjust ++ + frame.sve_callee_adjust ++ + frame.final_adjust, frame.frame_size)); ++ ++ frame.laid_out = true; + } + + /* Return true if the register REGNO is saved on entry to +@@ -4699,7 +6061,7 @@ aarch64_layout_frame (void) + static bool + aarch64_register_saved_on_entry (int regno) + { +- return cfun->machine->frame.reg_offset[regno] >= 0; ++ return known_ge (cfun->machine->frame.reg_offset[regno], 0); + } + + /* Return the next register up from REGNO up to LIMIT for the callee +@@ -4766,7 +6128,7 @@ static void + aarch64_push_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment) + { + rtx_insn *insn; +- machine_mode mode = aarch64_reg_save_mode (cfun->decl, regno1); ++ machine_mode mode = aarch64_reg_save_mode (regno1); + + if (regno2 == INVALID_REGNUM) + return aarch64_pushwb_single_reg (mode, regno1, adjustment); +@@ -4812,7 +6174,7 @@ static void + aarch64_pop_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment, + rtx *cfi_ops) + { +- machine_mode mode = aarch64_reg_save_mode (cfun->decl, regno1); ++ machine_mode mode = aarch64_reg_save_mode (regno1); + rtx reg1 = gen_rtx_REG (mode, regno1); + + *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg1, *cfi_ops); +@@ -4888,10 +6250,10 @@ aarch64_return_address_signing_enabled (void) + gcc_assert (cfun->machine->frame.laid_out); + + /* If signing scope is AARCH64_FUNCTION_NON_LEAF, we only sign a leaf function +- if it's LR is pushed onto stack. */ ++ if its LR is pushed onto stack. */ + return (aarch64_ra_sign_scope == AARCH64_FUNCTION_ALL + || (aarch64_ra_sign_scope == AARCH64_FUNCTION_NON_LEAF +- && cfun->machine->frame.reg_offset[LR_REGNUM] >= 0)); ++ && known_ge (cfun->machine->frame.reg_offset[LR_REGNUM], 0))); + } + + /* Return TRUE if Branch Target Identification Mechanism is enabled. */ +@@ -4901,17 +6263,75 @@ aarch64_bti_enabled (void) + return (aarch64_enable_bti == 1); + } + ++/* The caller is going to use ST1D or LD1D to save or restore an SVE ++ register in mode MODE at BASE_RTX + OFFSET, where OFFSET is in ++ the range [1, 16] * GET_MODE_SIZE (MODE). Prepare for this by: ++ ++ (1) updating BASE_RTX + OFFSET so that it is a legitimate ST1D ++ or LD1D address ++ ++ (2) setting PRED to a valid predicate register for the ST1D or LD1D, ++ if the variable isn't already nonnull ++ ++ (1) is needed when OFFSET is in the range [8, 16] * GET_MODE_SIZE (MODE). ++ Handle this case using a temporary base register that is suitable for ++ all offsets in that range. Use ANCHOR_REG as this base register if it ++ is nonnull, otherwise create a new register and store it in ANCHOR_REG. */ ++ ++static inline void ++aarch64_adjust_sve_callee_save_base (machine_mode mode, rtx &base_rtx, ++ rtx &anchor_reg, poly_int64 &offset, ++ rtx &ptrue) ++{ ++ if (maybe_ge (offset, 8 * GET_MODE_SIZE (mode))) ++ { ++ /* This is the maximum valid offset of the anchor from the base. ++ Lower values would be valid too. */ ++ poly_int64 anchor_offset = 16 * GET_MODE_SIZE (mode); ++ if (!anchor_reg) ++ { ++ anchor_reg = gen_rtx_REG (Pmode, STACK_CLASH_SVE_CFA_REGNUM); ++ emit_insn (gen_add3_insn (anchor_reg, base_rtx, ++ gen_int_mode (anchor_offset, Pmode))); ++ } ++ base_rtx = anchor_reg; ++ offset -= anchor_offset; ++ } ++ if (!ptrue) ++ { ++ int pred_reg = cfun->machine->frame.spare_pred_reg; ++ emit_move_insn (gen_rtx_REG (VNx16BImode, pred_reg), ++ CONSTM1_RTX (VNx16BImode)); ++ ptrue = gen_rtx_REG (VNx2BImode, pred_reg); ++ } ++} ++ ++/* Add a REG_CFA_EXPRESSION note to INSN to say that register REG ++ is saved at BASE + OFFSET. */ ++ ++static void ++aarch64_add_cfa_expression (rtx_insn *insn, rtx reg, ++ rtx base, poly_int64 offset) ++{ ++ rtx mem = gen_frame_mem (GET_MODE (reg), ++ plus_constant (Pmode, base, offset)); ++ add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg)); ++} ++ + /* Emit code to save the callee-saved registers from register number START + to LIMIT to the stack at the location starting at offset START_OFFSET, +- skipping any write-back candidates if SKIP_WB is true. */ ++ skipping any write-back candidates if SKIP_WB is true. HARD_FP_VALID_P ++ is true if the hard frame pointer has been set up. */ + + static void +-aarch64_save_callee_saves (machine_mode mode, poly_int64 start_offset, +- unsigned start, unsigned limit, bool skip_wb) ++aarch64_save_callee_saves (poly_int64 start_offset, ++ unsigned start, unsigned limit, bool skip_wb, ++ bool hard_fp_valid_p) + { + rtx_insn *insn; + unsigned regno; + unsigned regno2; ++ rtx anchor_reg = NULL_RTX, ptrue = NULL_RTX; + + for (regno = aarch64_next_callee_save (start, limit); + regno <= limit; +@@ -4919,7 +6339,7 @@ aarch64_save_callee_saves (machine_mode mode, poly_int64 start_offset, + { + rtx reg, mem; + poly_int64 offset; +- int offset_diff; ++ bool frame_related_p = aarch64_emit_cfi_for_reg_p (regno); + + if (skip_wb + && (regno == cfun->machine->frame.wb_candidate1 +@@ -4927,27 +6347,53 @@ aarch64_save_callee_saves (machine_mode mode, poly_int64 start_offset, + continue; + + if (cfun->machine->reg_is_wrapped_separately[regno]) +- continue; ++ continue; + ++ machine_mode mode = aarch64_reg_save_mode (regno); + reg = gen_rtx_REG (mode, regno); + offset = start_offset + cfun->machine->frame.reg_offset[regno]; +- mem = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx, +- offset)); ++ rtx base_rtx = stack_pointer_rtx; ++ poly_int64 sp_offset = offset; + +- regno2 = aarch64_next_callee_save (regno + 1, limit); +- offset_diff = cfun->machine->frame.reg_offset[regno2] +- - cfun->machine->frame.reg_offset[regno]; ++ HOST_WIDE_INT const_offset; ++ if (mode == VNx2DImode && BYTES_BIG_ENDIAN) ++ aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg, ++ offset, ptrue); ++ else if (GP_REGNUM_P (regno) ++ && (!offset.is_constant (&const_offset) || const_offset >= 512)) ++ { ++ gcc_assert (known_eq (start_offset, 0)); ++ poly_int64 fp_offset ++ = cfun->machine->frame.below_hard_fp_saved_regs_size; ++ if (hard_fp_valid_p) ++ base_rtx = hard_frame_pointer_rtx; ++ else ++ { ++ if (!anchor_reg) ++ { ++ anchor_reg = gen_rtx_REG (Pmode, STACK_CLASH_SVE_CFA_REGNUM); ++ emit_insn (gen_add3_insn (anchor_reg, base_rtx, ++ gen_int_mode (fp_offset, Pmode))); ++ } ++ base_rtx = anchor_reg; ++ } ++ offset -= fp_offset; ++ } ++ mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset)); ++ bool need_cfa_note_p = (base_rtx != stack_pointer_rtx); + +- if (regno2 <= limit ++ if (!aarch64_sve_mode_p (mode) ++ && (regno2 = aarch64_next_callee_save (regno + 1, limit)) <= limit + && !cfun->machine->reg_is_wrapped_separately[regno2] +- && known_eq (GET_MODE_SIZE (mode), offset_diff)) ++ && known_eq (GET_MODE_SIZE (mode), ++ cfun->machine->frame.reg_offset[regno2] ++ - cfun->machine->frame.reg_offset[regno])) + { + rtx reg2 = gen_rtx_REG (mode, regno2); + rtx mem2; + +- offset = start_offset + cfun->machine->frame.reg_offset[regno2]; +- mem2 = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx, +- offset)); ++ offset += GET_MODE_SIZE (mode); ++ mem2 = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset)); + insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2, + reg2)); + +@@ -4955,71 +6401,96 @@ aarch64_save_callee_saves (machine_mode mode, poly_int64 start_offset, + always assumed to be relevant to the frame + calculations; subsequent parts, are only + frame-related if explicitly marked. */ +- RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1; ++ if (aarch64_emit_cfi_for_reg_p (regno2)) ++ { ++ if (need_cfa_note_p) ++ aarch64_add_cfa_expression (insn, reg2, stack_pointer_rtx, ++ sp_offset + GET_MODE_SIZE (mode)); ++ else ++ RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1; ++ } ++ + regno = regno2; + } ++ else if (mode == VNx2DImode && BYTES_BIG_ENDIAN) ++ { ++ insn = emit_insn (gen_aarch64_pred_mov (mode, mem, ptrue, reg)); ++ need_cfa_note_p = true; ++ } ++ else if (aarch64_sve_mode_p (mode)) ++ insn = emit_insn (gen_rtx_SET (mem, reg)); + else + insn = emit_move_insn (mem, reg); + +- RTX_FRAME_RELATED_P (insn) = 1; ++ RTX_FRAME_RELATED_P (insn) = frame_related_p; ++ if (frame_related_p && need_cfa_note_p) ++ aarch64_add_cfa_expression (insn, reg, stack_pointer_rtx, sp_offset); + } + } + +-/* Emit code to restore the callee registers of mode MODE from register +- number START up to and including LIMIT. Restore from the stack offset +- START_OFFSET, skipping any write-back candidates if SKIP_WB is true. +- Write the appropriate REG_CFA_RESTORE notes into CFI_OPS. */ ++/* Emit code to restore the callee registers from register number START ++ up to and including LIMIT. Restore from the stack offset START_OFFSET, ++ skipping any write-back candidates if SKIP_WB is true. Write the ++ appropriate REG_CFA_RESTORE notes into CFI_OPS. */ + + static void +-aarch64_restore_callee_saves (machine_mode mode, +- poly_int64 start_offset, unsigned start, ++aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start, + unsigned limit, bool skip_wb, rtx *cfi_ops) + { +- rtx base_rtx = stack_pointer_rtx; + unsigned regno; + unsigned regno2; + poly_int64 offset; ++ rtx anchor_reg = NULL_RTX, ptrue = NULL_RTX; + + for (regno = aarch64_next_callee_save (start, limit); + regno <= limit; + regno = aarch64_next_callee_save (regno + 1, limit)) + { ++ bool frame_related_p = aarch64_emit_cfi_for_reg_p (regno); + if (cfun->machine->reg_is_wrapped_separately[regno]) +- continue; ++ continue; + + rtx reg, mem; +- int offset_diff; + + if (skip_wb + && (regno == cfun->machine->frame.wb_candidate1 + || regno == cfun->machine->frame.wb_candidate2)) + continue; + ++ machine_mode mode = aarch64_reg_save_mode (regno); + reg = gen_rtx_REG (mode, regno); + offset = start_offset + cfun->machine->frame.reg_offset[regno]; ++ rtx base_rtx = stack_pointer_rtx; ++ if (mode == VNx2DImode && BYTES_BIG_ENDIAN) ++ aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg, ++ offset, ptrue); + mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset)); + +- regno2 = aarch64_next_callee_save (regno + 1, limit); +- offset_diff = cfun->machine->frame.reg_offset[regno2] +- - cfun->machine->frame.reg_offset[regno]; +- +- if (regno2 <= limit ++ if (!aarch64_sve_mode_p (mode) ++ && (regno2 = aarch64_next_callee_save (regno + 1, limit)) <= limit + && !cfun->machine->reg_is_wrapped_separately[regno2] +- && known_eq (GET_MODE_SIZE (mode), offset_diff)) ++ && known_eq (GET_MODE_SIZE (mode), ++ cfun->machine->frame.reg_offset[regno2] ++ - cfun->machine->frame.reg_offset[regno])) + { + rtx reg2 = gen_rtx_REG (mode, regno2); + rtx mem2; + +- offset = start_offset + cfun->machine->frame.reg_offset[regno2]; ++ offset += GET_MODE_SIZE (mode); + mem2 = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset)); + emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2)); + + *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops); + regno = regno2; + } ++ else if (mode == VNx2DImode && BYTES_BIG_ENDIAN) ++ emit_insn (gen_aarch64_pred_mov (mode, reg, ptrue, mem)); ++ else if (aarch64_sve_mode_p (mode)) ++ emit_insn (gen_rtx_SET (reg, mem)); + else + emit_move_insn (reg, mem); +- *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops); ++ if (frame_related_p) ++ *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops); + } + } + +@@ -5101,13 +6572,35 @@ aarch64_get_separate_components (void) + for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++) + if (aarch64_register_saved_on_entry (regno)) + { ++ /* Punt on saves and restores that use ST1D and LD1D. We could ++ try to be smarter, but it would involve making sure that the ++ spare predicate register itself is safe to use at the save ++ and restore points. Also, when a frame pointer is being used, ++ the slots are often out of reach of ST1D and LD1D anyway. */ ++ machine_mode mode = aarch64_reg_save_mode (regno); ++ if (mode == VNx2DImode && BYTES_BIG_ENDIAN) ++ continue; ++ + poly_int64 offset = cfun->machine->frame.reg_offset[regno]; +- if (!frame_pointer_needed) +- offset += cfun->machine->frame.frame_size +- - cfun->machine->frame.hard_fp_offset; ++ ++ /* If the register is saved in the first SVE save slot, we use ++ it as a stack probe for -fstack-clash-protection. */ ++ if (flag_stack_clash_protection ++ && maybe_ne (cfun->machine->frame.below_hard_fp_saved_regs_size, 0) ++ && known_eq (offset, 0)) ++ continue; ++ ++ /* Get the offset relative to the register we'll use. */ ++ if (frame_pointer_needed) ++ offset -= cfun->machine->frame.below_hard_fp_saved_regs_size; ++ else ++ offset += crtl->outgoing_args_size; ++ + /* Check that we can access the stack slot of the register with one + direct load with no adjustments needed. */ +- if (offset_12bit_unsigned_scaled_p (DImode, offset)) ++ if (aarch64_sve_mode_p (mode) ++ ? offset_9bit_signed_scaled_p (mode, offset) ++ : offset_12bit_unsigned_scaled_p (mode, offset)) + bitmap_set_bit (components, regno); + } + +@@ -5115,6 +6608,12 @@ aarch64_get_separate_components (void) + if (frame_pointer_needed) + bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM); + ++ /* If the spare predicate register used by big-endian SVE code ++ is call-preserved, it must be saved in the main prologue ++ before any saves that use it. */ ++ if (cfun->machine->frame.spare_pred_reg != INVALID_REGNUM) ++ bitmap_clear_bit (components, cfun->machine->frame.spare_pred_reg); ++ + unsigned reg1 = cfun->machine->frame.wb_candidate1; + unsigned reg2 = cfun->machine->frame.wb_candidate2; + /* If registers have been chosen to be stored/restored with +@@ -5139,31 +6638,48 @@ aarch64_components_for_bb (basic_block bb) + bitmap in = DF_LIVE_IN (bb); + bitmap gen = &DF_LIVE_BB_INFO (bb)->gen; + bitmap kill = &DF_LIVE_BB_INFO (bb)->kill; +- bool simd_function = aarch64_simd_decl_p (cfun->decl); + + sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1); + bitmap_clear (components); + ++ /* Clobbered registers don't generate values in any meaningful sense, ++ since nothing after the clobber can rely on their value. And we can't ++ say that partially-clobbered registers are unconditionally killed, ++ because whether they're killed or not depends on the mode of the ++ value they're holding. Thus partially call-clobbered registers ++ appear in neither the kill set nor the gen set. ++ ++ Check manually for any calls that clobber more of a register than the ++ current function can. */ ++ function_abi_aggregator callee_abis; ++ rtx_insn *insn; ++ FOR_BB_INSNS (bb, insn) ++ if (CALL_P (insn)) ++ callee_abis.note_callee_abi (insn_callee_abi (insn)); ++ HARD_REG_SET extra_caller_saves = callee_abis.caller_save_regs (*crtl->abi); ++ + /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */ + for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++) +- if ((!call_used_regs[regno] +- || (simd_function && FP_SIMD_SAVED_REGNUM_P (regno))) +- && (bitmap_bit_p (in, regno) +- || bitmap_bit_p (gen, regno) +- || bitmap_bit_p (kill, regno))) ++ if (!fixed_regs[regno] ++ && !crtl->abi->clobbers_full_reg_p (regno) ++ && (TEST_HARD_REG_BIT (extra_caller_saves, regno) ++ || bitmap_bit_p (in, regno) ++ || bitmap_bit_p (gen, regno) ++ || bitmap_bit_p (kill, regno))) + { +- unsigned regno2, offset, offset2; + bitmap_set_bit (components, regno); + + /* If there is a callee-save at an adjacent offset, add it too + to increase the use of LDP/STP. */ +- offset = cfun->machine->frame.reg_offset[regno]; +- regno2 = ((offset & 8) == 0) ? regno + 1 : regno - 1; ++ poly_int64 offset = cfun->machine->frame.reg_offset[regno]; ++ unsigned regno2 = multiple_p (offset, 16) ? regno + 1 : regno - 1; + + if (regno2 <= LAST_SAVED_REGNUM) + { +- offset2 = cfun->machine->frame.reg_offset[regno2]; +- if ((offset & ~8) == (offset2 & ~8)) ++ poly_int64 offset2 = cfun->machine->frame.reg_offset[regno2]; ++ if (regno < regno2 ++ ? known_eq (offset + 8, offset2) ++ : multiple_p (offset2, 16) && known_eq (offset2 + 8, offset)) + bitmap_set_bit (components, regno2); + } + } +@@ -5218,16 +6734,16 @@ aarch64_process_components (sbitmap components, bool prologue_p) + + while (regno != last_regno) + { +- /* AAPCS64 section 5.1.2 requires only the low 64 bits to be saved +- so DFmode for the vector registers is enough. For simd functions +- we want to save the low 128 bits. */ +- machine_mode mode = aarch64_reg_save_mode (cfun->decl, regno); ++ bool frame_related_p = aarch64_emit_cfi_for_reg_p (regno); ++ machine_mode mode = aarch64_reg_save_mode (regno); + + rtx reg = gen_rtx_REG (mode, regno); + poly_int64 offset = cfun->machine->frame.reg_offset[regno]; +- if (!frame_pointer_needed) +- offset += cfun->machine->frame.frame_size +- - cfun->machine->frame.hard_fp_offset; ++ if (frame_pointer_needed) ++ offset -= cfun->machine->frame.below_hard_fp_saved_regs_size; ++ else ++ offset += crtl->outgoing_args_size; ++ + rtx addr = plus_constant (Pmode, ptr_reg, offset); + rtx mem = gen_frame_mem (mode, addr); + +@@ -5238,39 +6754,49 @@ aarch64_process_components (sbitmap components, bool prologue_p) + if (regno2 == last_regno) + { + insn = emit_insn (set); +- RTX_FRAME_RELATED_P (insn) = 1; +- if (prologue_p) +- add_reg_note (insn, REG_CFA_OFFSET, copy_rtx (set)); +- else +- add_reg_note (insn, REG_CFA_RESTORE, reg); ++ if (frame_related_p) ++ { ++ RTX_FRAME_RELATED_P (insn) = 1; ++ if (prologue_p) ++ add_reg_note (insn, REG_CFA_OFFSET, copy_rtx (set)); ++ else ++ add_reg_note (insn, REG_CFA_RESTORE, reg); ++ } + break; + } + + poly_int64 offset2 = cfun->machine->frame.reg_offset[regno2]; + /* The next register is not of the same class or its offset is not + mergeable with the current one into a pair. */ +- if (!satisfies_constraint_Ump (mem) ++ if (aarch64_sve_mode_p (mode) ++ || !satisfies_constraint_Ump (mem) + || GP_REGNUM_P (regno) != GP_REGNUM_P (regno2) +- || (aarch64_simd_decl_p (cfun->decl) && FP_REGNUM_P (regno)) ++ || (crtl->abi->id () == ARM_PCS_SIMD && FP_REGNUM_P (regno)) + || maybe_ne ((offset2 - cfun->machine->frame.reg_offset[regno]), + GET_MODE_SIZE (mode))) + { + insn = emit_insn (set); +- RTX_FRAME_RELATED_P (insn) = 1; +- if (prologue_p) +- add_reg_note (insn, REG_CFA_OFFSET, copy_rtx (set)); +- else +- add_reg_note (insn, REG_CFA_RESTORE, reg); ++ if (frame_related_p) ++ { ++ RTX_FRAME_RELATED_P (insn) = 1; ++ if (prologue_p) ++ add_reg_note (insn, REG_CFA_OFFSET, copy_rtx (set)); ++ else ++ add_reg_note (insn, REG_CFA_RESTORE, reg); ++ } + + regno = regno2; + continue; + } + ++ bool frame_related2_p = aarch64_emit_cfi_for_reg_p (regno2); ++ + /* REGNO2 can be saved/restored in a pair with REGNO. */ + rtx reg2 = gen_rtx_REG (mode, regno2); +- if (!frame_pointer_needed) +- offset2 += cfun->machine->frame.frame_size +- - cfun->machine->frame.hard_fp_offset; ++ if (frame_pointer_needed) ++ offset2 -= cfun->machine->frame.below_hard_fp_saved_regs_size; ++ else ++ offset2 += crtl->outgoing_args_size; + rtx addr2 = plus_constant (Pmode, ptr_reg, offset2); + rtx mem2 = gen_frame_mem (mode, addr2); + rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2) +@@ -5281,16 +6807,23 @@ aarch64_process_components (sbitmap components, bool prologue_p) + else + insn = emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2)); + +- RTX_FRAME_RELATED_P (insn) = 1; +- if (prologue_p) +- { +- add_reg_note (insn, REG_CFA_OFFSET, set); +- add_reg_note (insn, REG_CFA_OFFSET, set2); +- } +- else ++ if (frame_related_p || frame_related2_p) + { +- add_reg_note (insn, REG_CFA_RESTORE, reg); +- add_reg_note (insn, REG_CFA_RESTORE, reg2); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ if (prologue_p) ++ { ++ if (frame_related_p) ++ add_reg_note (insn, REG_CFA_OFFSET, set); ++ if (frame_related2_p) ++ add_reg_note (insn, REG_CFA_OFFSET, set2); ++ } ++ else ++ { ++ if (frame_related_p) ++ add_reg_note (insn, REG_CFA_RESTORE, reg); ++ if (frame_related2_p) ++ add_reg_note (insn, REG_CFA_RESTORE, reg2); ++ } + } + + regno = aarch64_get_next_set_bit (components, regno2 + 1); +@@ -5359,15 +6892,31 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, + HOST_WIDE_INT guard_size + = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE); + HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD; +- /* When doing the final adjustment for the outgoing argument size we can't +- assume that LR was saved at position 0. So subtract it's offset from the +- ABI safe buffer so that we don't accidentally allow an adjustment that +- would result in an allocation larger than the ABI buffer without +- probing. */ + HOST_WIDE_INT min_probe_threshold +- = final_adjustment_p +- ? guard_used_by_caller - cfun->machine->frame.reg_offset[LR_REGNUM] +- : guard_size - guard_used_by_caller; ++ = (final_adjustment_p ++ ? guard_used_by_caller ++ : guard_size - guard_used_by_caller); ++ /* When doing the final adjustment for the outgoing arguments, take into ++ account any unprobed space there is above the current SP. There are ++ two cases: ++ ++ - When saving SVE registers below the hard frame pointer, we force ++ the lowest save to take place in the prologue before doing the final ++ adjustment (i.e. we don't allow the save to be shrink-wrapped). ++ This acts as a probe at SP, so there is no unprobed space. ++ ++ - When there are no SVE register saves, we use the store of the link ++ register as a probe. We can't assume that LR was saved at position 0 ++ though, so treat any space below it as unprobed. */ ++ if (final_adjustment_p ++ && known_eq (cfun->machine->frame.below_hard_fp_saved_regs_size, 0)) ++ { ++ poly_int64 lr_offset = cfun->machine->frame.reg_offset[LR_REGNUM]; ++ if (known_ge (lr_offset, 0)) ++ min_probe_threshold -= lr_offset.to_constant (); ++ else ++ gcc_assert (!flag_stack_clash_protection || known_eq (poly_size, 0)); ++ } + + poly_int64 frame_size = cfun->machine->frame.frame_size; + +@@ -5377,13 +6926,15 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, + if (flag_stack_clash_protection && !final_adjustment_p) + { + poly_int64 initial_adjust = cfun->machine->frame.initial_adjust; ++ poly_int64 sve_callee_adjust = cfun->machine->frame.sve_callee_adjust; + poly_int64 final_adjust = cfun->machine->frame.final_adjust; + + if (known_eq (frame_size, 0)) + { + dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false); + } +- else if (known_lt (initial_adjust, guard_size - guard_used_by_caller) ++ else if (known_lt (initial_adjust + sve_callee_adjust, ++ guard_size - guard_used_by_caller) + && known_lt (final_adjust, guard_used_by_caller)) + { + dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true); +@@ -5583,24 +7134,10 @@ aarch64_epilogue_uses (int regno) + { + if (regno == LR_REGNUM) + return 1; +- if (aarch64_simd_decl_p (cfun->decl) && FP_SIMD_SAVED_REGNUM_P (regno)) +- return 1; + } + return 0; + } + +-/* Add a REG_CFA_EXPRESSION note to INSN to say that register REG +- is saved at BASE + OFFSET. */ +- +-static void +-aarch64_add_cfa_expression (rtx_insn *insn, unsigned int reg, +- rtx base, poly_int64 offset) +-{ +- rtx mem = gen_frame_mem (DImode, plus_constant (Pmode, base, offset)); +- add_reg_note (insn, REG_CFA_EXPRESSION, +- gen_rtx_SET (mem, regno_reg_rtx[reg])); +-} +- + /* AArch64 stack frames generated by this compiler look like: + + +-------------------------------+ +@@ -5622,8 +7159,12 @@ aarch64_add_cfa_expression (rtx_insn *insn, unsigned int reg, + +-------------------------------+ | + | LR' | | + +-------------------------------+ | +- | FP' | / <- hard_frame_pointer_rtx (aligned) +- +-------------------------------+ ++ | FP' | | ++ +-------------------------------+ |<- hard_frame_pointer_rtx (aligned) ++ | SVE vector registers | | \ ++ +-------------------------------+ | | below_hard_fp_saved_regs_size ++ | SVE predicate registers | / / ++ +-------------------------------+ + | dynamic allocation | + +-------------------------------+ + | padding | +@@ -5656,7 +7197,8 @@ aarch64_add_cfa_expression (rtx_insn *insn, unsigned int reg, + The following registers are reserved during frame layout and should not be + used for any other purpose: + +- - r11: Used by stack clash protection when SVE is enabled. ++ - r11: Used by stack clash protection when SVE is enabled, and also ++ as an anchor register when saving and restoring registers + - r12(EP0) and r13(EP1): Used as temporaries for stack adjustment. + - r14 and r15: Used for speculation tracking. + - r16(IP0), r17(IP1): Used by indirect tailcalls. +@@ -5679,15 +7221,37 @@ aarch64_expand_prologue (void) + HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust; + poly_int64 final_adjust = cfun->machine->frame.final_adjust; + poly_int64 callee_offset = cfun->machine->frame.callee_offset; ++ poly_int64 sve_callee_adjust = cfun->machine->frame.sve_callee_adjust; ++ poly_int64 below_hard_fp_saved_regs_size ++ = cfun->machine->frame.below_hard_fp_saved_regs_size; + unsigned reg1 = cfun->machine->frame.wb_candidate1; + unsigned reg2 = cfun->machine->frame.wb_candidate2; + bool emit_frame_chain = cfun->machine->frame.emit_frame_chain; + rtx_insn *insn; + ++ if (flag_stack_clash_protection && known_eq (callee_adjust, 0)) ++ { ++ /* Fold the SVE allocation into the initial allocation. ++ We don't do this in aarch64_layout_arg to avoid pessimizing ++ the epilogue code. */ ++ initial_adjust += sve_callee_adjust; ++ sve_callee_adjust = 0; ++ } ++ + /* Sign return address for functions. */ + if (aarch64_return_address_signing_enabled ()) + { +- insn = emit_insn (gen_pacisp ()); ++ switch (aarch64_ra_sign_key) ++ { ++ case AARCH64_KEY_A: ++ insn = emit_insn (gen_paciasp ()); ++ break; ++ case AARCH64_KEY_B: ++ insn = emit_insn (gen_pacibsp ()); ++ break; ++ default: ++ gcc_unreachable (); ++ } + add_reg_note (insn, REG_CFA_TOGGLE_RA_MANGLE, const0_rtx); + RTX_FRAME_RELATED_P (insn) = 1; + } +@@ -5726,18 +7290,27 @@ aarch64_expand_prologue (void) + if (callee_adjust != 0) + aarch64_push_regs (reg1, reg2, callee_adjust); + ++ /* The offset of the frame chain record (if any) from the current SP. */ ++ poly_int64 chain_offset = (initial_adjust + callee_adjust ++ - cfun->machine->frame.hard_fp_offset); ++ gcc_assert (known_ge (chain_offset, 0)); ++ ++ /* The offset of the bottom of the save area from the current SP. */ ++ poly_int64 saved_regs_offset = chain_offset - below_hard_fp_saved_regs_size; ++ + if (emit_frame_chain) + { +- poly_int64 reg_offset = callee_adjust; + if (callee_adjust == 0) + { + reg1 = R29_REGNUM; + reg2 = R30_REGNUM; +- reg_offset = callee_offset; +- aarch64_save_callee_saves (DImode, reg_offset, reg1, reg2, false); ++ aarch64_save_callee_saves (saved_regs_offset, reg1, reg2, ++ false, false); + } ++ else ++ gcc_assert (known_eq (chain_offset, 0)); + aarch64_add_offset (Pmode, hard_frame_pointer_rtx, +- stack_pointer_rtx, callee_offset, ++ stack_pointer_rtx, chain_offset, + tmp1_rtx, tmp0_rtx, frame_pointer_needed); + if (frame_pointer_needed && !frame_size.is_constant ()) + { +@@ -5764,23 +7337,31 @@ aarch64_expand_prologue (void) + + /* Change the save slot expressions for the registers that + we've already saved. */ +- reg_offset -= callee_offset; +- aarch64_add_cfa_expression (insn, reg2, hard_frame_pointer_rtx, +- reg_offset + UNITS_PER_WORD); +- aarch64_add_cfa_expression (insn, reg1, hard_frame_pointer_rtx, +- reg_offset); ++ aarch64_add_cfa_expression (insn, regno_reg_rtx[reg2], ++ hard_frame_pointer_rtx, UNITS_PER_WORD); ++ aarch64_add_cfa_expression (insn, regno_reg_rtx[reg1], ++ hard_frame_pointer_rtx, 0); + } + emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx)); + } + +- aarch64_save_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM, +- callee_adjust != 0 || emit_frame_chain); +- if (aarch64_simd_decl_p (cfun->decl)) +- aarch64_save_callee_saves (TFmode, callee_offset, V0_REGNUM, V31_REGNUM, +- callee_adjust != 0 || emit_frame_chain); +- else +- aarch64_save_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM, +- callee_adjust != 0 || emit_frame_chain); ++ aarch64_save_callee_saves (saved_regs_offset, R0_REGNUM, R30_REGNUM, ++ callee_adjust != 0 || emit_frame_chain, ++ emit_frame_chain); ++ if (maybe_ne (sve_callee_adjust, 0)) ++ { ++ gcc_assert (!flag_stack_clash_protection ++ || known_eq (initial_adjust, 0)); ++ aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, ++ sve_callee_adjust, ++ !frame_pointer_needed, false); ++ saved_regs_offset += sve_callee_adjust; ++ } ++ aarch64_save_callee_saves (saved_regs_offset, P0_REGNUM, P15_REGNUM, ++ false, emit_frame_chain); ++ aarch64_save_callee_saves (saved_regs_offset, V0_REGNUM, V31_REGNUM, ++ callee_adjust != 0 || emit_frame_chain, ++ emit_frame_chain); + + /* We may need to probe the final adjustment if it is larger than the guard + that is assumed by the called. */ +@@ -5806,19 +7387,6 @@ aarch64_use_return_insn_p (void) + return known_eq (cfun->machine->frame.frame_size, 0); + } + +-/* Return false for non-leaf SIMD functions in order to avoid +- shrink-wrapping them. Doing this will lose the necessary +- save/restore of FP registers. */ +- +-bool +-aarch64_use_simple_return_insn_p (void) +-{ +- if (aarch64_simd_decl_p (cfun->decl) && !crtl->is_leaf) +- return false; +- +- return true; +-} +- + /* Generate the epilogue instructions for returning from a function. + This is almost exactly the reverse of the prolog sequence, except + that we need to insert barriers to avoid scheduling loads that read +@@ -5831,6 +7399,9 @@ aarch64_expand_epilogue (bool for_sibcall) + HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust; + poly_int64 final_adjust = cfun->machine->frame.final_adjust; + poly_int64 callee_offset = cfun->machine->frame.callee_offset; ++ poly_int64 sve_callee_adjust = cfun->machine->frame.sve_callee_adjust; ++ poly_int64 below_hard_fp_saved_regs_size ++ = cfun->machine->frame.below_hard_fp_saved_regs_size; + unsigned reg1 = cfun->machine->frame.wb_candidate1; + unsigned reg2 = cfun->machine->frame.wb_candidate2; + rtx cfi_ops = NULL; +@@ -5844,15 +7415,23 @@ aarch64_expand_epilogue (bool for_sibcall) + = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE); + HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD; + +- /* We can re-use the registers when the allocation amount is smaller than +- guard_size - guard_used_by_caller because we won't be doing any probes +- then. In such situations the register should remain live with the correct ++ /* We can re-use the registers when: ++ ++ (a) the deallocation amount is the same as the corresponding ++ allocation amount (which is false if we combine the initial ++ and SVE callee save allocations in the prologue); and ++ ++ (b) the allocation amount doesn't need a probe (which is false ++ if the amount is guard_size - guard_used_by_caller or greater). ++ ++ In such situations the register should remain live with the correct + value. */ + bool can_inherit_p = (initial_adjust.is_constant () +- && final_adjust.is_constant ()) ++ && final_adjust.is_constant () + && (!flag_stack_clash_protection +- || known_lt (initial_adjust, +- guard_size - guard_used_by_caller)); ++ || (known_lt (initial_adjust, ++ guard_size - guard_used_by_caller) ++ && known_eq (sve_callee_adjust, 0)))); + + /* We need to add memory barrier to prevent read from deallocated stack. */ + bool need_barrier_p +@@ -5877,7 +7456,8 @@ aarch64_expand_epilogue (bool for_sibcall) + /* If writeback is used when restoring callee-saves, the CFA + is restored on the instruction doing the writeback. */ + aarch64_add_offset (Pmode, stack_pointer_rtx, +- hard_frame_pointer_rtx, -callee_offset, ++ hard_frame_pointer_rtx, ++ -callee_offset - below_hard_fp_saved_regs_size, + tmp1_rtx, tmp0_rtx, callee_adjust == 0); + else + /* The case where we need to re-use the register here is very rare, so +@@ -5885,14 +7465,17 @@ aarch64_expand_epilogue (bool for_sibcall) + immediate doesn't fit. */ + aarch64_add_sp (tmp1_rtx, tmp0_rtx, final_adjust, true); + +- aarch64_restore_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM, ++ /* Restore the vector registers before the predicate registers, ++ so that we can use P4 as a temporary for big-endian SVE frames. */ ++ aarch64_restore_callee_saves (callee_offset, V0_REGNUM, V31_REGNUM, ++ callee_adjust != 0, &cfi_ops); ++ aarch64_restore_callee_saves (callee_offset, P0_REGNUM, P15_REGNUM, ++ false, &cfi_ops); ++ if (maybe_ne (sve_callee_adjust, 0)) ++ aarch64_add_sp (NULL_RTX, NULL_RTX, sve_callee_adjust, true); ++ aarch64_restore_callee_saves (callee_offset - sve_callee_adjust, ++ R0_REGNUM, R30_REGNUM, + callee_adjust != 0, &cfi_ops); +- if (aarch64_simd_decl_p (cfun->decl)) +- aarch64_restore_callee_saves (TFmode, callee_offset, V0_REGNUM, V31_REGNUM, +- callee_adjust != 0, &cfi_ops); +- else +- aarch64_restore_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM, +- callee_adjust != 0, &cfi_ops); + + if (need_barrier_p) + emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx)); +@@ -5943,13 +7526,23 @@ aarch64_expand_epilogue (bool for_sibcall) + if (aarch64_return_address_signing_enabled () + && (for_sibcall || !TARGET_ARMV8_3 || crtl->calls_eh_return)) + { +- insn = emit_insn (gen_autisp ()); ++ switch (aarch64_ra_sign_key) ++ { ++ case AARCH64_KEY_A: ++ insn = emit_insn (gen_autiasp ()); ++ break; ++ case AARCH64_KEY_B: ++ insn = emit_insn (gen_autibsp ()); ++ break; ++ default: ++ gcc_unreachable (); ++ } + add_reg_note (insn, REG_CFA_TOGGLE_RA_MANGLE, const0_rtx); + RTX_FRAME_RELATED_P (insn) = 1; + } + + /* Stack adjustment for exception handler. */ +- if (crtl->calls_eh_return) ++ if (crtl->calls_eh_return && !for_sibcall) + { + /* We need to unwind the stack by the offset computed by + EH_RETURN_STACKADJ_RTX. We have already reset the CFA +@@ -6015,6 +7608,7 @@ aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, + int this_regno = R0_REGNUM; + rtx this_rtx, temp0, temp1, addr, funexp; + rtx_insn *insn; ++ const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk)); + + if (aarch64_bti_enabled ()) + emit_insn (gen_bti_c()); +@@ -6077,14 +7671,18 @@ aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, + } + funexp = XEXP (DECL_RTL (function), 0); + funexp = gen_rtx_MEM (FUNCTION_MODE, funexp); +- insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX)); ++ rtx callee_abi = gen_int_mode (fndecl_abi (function).id (), DImode); ++ insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, callee_abi)); + SIBLING_CALL_P (insn) = 1; + + insn = get_insns (); + shorten_branches (insn); ++ ++ assemble_start_function (thunk, fnname); + final_start_function (insn, file, 1); + final (insn, file, 1); + final_end_function (); ++ assemble_end_function (thunk, fnname); + + /* Stop pretending to be a post-reload pass. */ + reload_completed = 0; +@@ -6608,9 +8206,15 @@ aarch64_classify_address (struct aarch64_address_info *info, + + HOST_WIDE_INT const_size; + ++ /* Whether a vector mode is partial doesn't affect address legitimacy. ++ Partial vectors like VNx8QImode allow the same indexed addressing ++ mode and MUL VL addressing mode as full vectors like VNx16QImode; ++ in both cases, MUL VL counts multiples of GET_MODE_SIZE. */ ++ unsigned int vec_flags = aarch64_classify_vector_mode (mode); ++ vec_flags &= ~VEC_PARTIAL; ++ + /* On BE, we use load/store pair for all large int mode load/stores. + TI/TFmode may also use a load/store pair. */ +- unsigned int vec_flags = aarch64_classify_vector_mode (mode); + bool advsimd_struct_p = (vec_flags == (VEC_ADVSIMD | VEC_STRUCT)); + bool load_store_pair_p = (type == ADDR_QUERY_LDP_STP + || type == ADDR_QUERY_LDP_STP_N +@@ -6628,7 +8232,7 @@ aarch64_classify_address (struct aarch64_address_info *info, + bool allow_reg_index_p = (!load_store_pair_p + && (known_lt (GET_MODE_SIZE (mode), 16) + || vec_flags == VEC_ADVSIMD +- || vec_flags == VEC_SVE_DATA)); ++ || vec_flags & VEC_SVE_DATA)); + + /* For SVE, only accept [Rn], [Rn, Rm, LSL #shift] and + [Rn, #offset, MUL VL]. */ +@@ -7152,11 +8756,12 @@ aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2) + RESULT is the register in which the result is returned. It's NULL for + "call" and "sibcall". + MEM is the location of the function call. ++ CALLEE_ABI is a const_int that gives the arm_pcs of the callee. + SIBCALL indicates whether this function call is normal call or sibling call. + It will generate different pattern accordingly. */ + + void +-aarch64_expand_call (rtx result, rtx mem, bool sibcall) ++aarch64_expand_call (rtx result, rtx mem, rtx callee_abi, bool sibcall) + { + rtx call, callee, tmp; + rtvec vec; +@@ -7186,7 +8791,11 @@ aarch64_expand_call (rtx result, rtx mem, bool sibcall) + else + tmp = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNUM)); + +- vec = gen_rtvec (2, call, tmp); ++ gcc_assert (CONST_INT_P (callee_abi)); ++ callee_abi = gen_rtx_UNSPEC (DImode, gen_rtvec (1, callee_abi), ++ UNSPEC_CALLEE_ABI); ++ ++ vec = gen_rtvec (3, call, callee_abi, tmp); + call = gen_rtx_PARALLEL (VOIDmode, vec); + + aarch64_emit_call_insn (call); +@@ -7382,6 +8991,21 @@ aarch64_get_condition_code_1 (machine_mode mode, enum rtx_code comp_code) + } + break; + ++ case E_CC_NZCmode: ++ switch (comp_code) ++ { ++ case NE: return AARCH64_NE; /* = any */ ++ case EQ: return AARCH64_EQ; /* = none */ ++ case GE: return AARCH64_PL; /* = nfrst */ ++ case LT: return AARCH64_MI; /* = first */ ++ case GEU: return AARCH64_CS; /* = nlast */ ++ case GTU: return AARCH64_HI; /* = pmore */ ++ case LEU: return AARCH64_LS; /* = plast */ ++ case LTU: return AARCH64_CC; /* = last */ ++ default: return -1; ++ } ++ break; ++ + case E_CC_NZmode: + switch (comp_code) + { +@@ -7524,15 +9148,24 @@ aarch64_print_vector_float_operand (FILE *f, rtx x, bool negate) + if (negate) + r = real_value_negate (&r); + +- /* We only handle the SVE single-bit immediates here. */ ++ /* Handle the SVE single-bit immediates specially, since they have a ++ fixed form in the assembly syntax. */ + if (real_equal (&r, &dconst0)) + asm_fprintf (f, "0.0"); ++ else if (real_equal (&r, &dconst2)) ++ asm_fprintf (f, "2.0"); + else if (real_equal (&r, &dconst1)) + asm_fprintf (f, "1.0"); + else if (real_equal (&r, &dconsthalf)) + asm_fprintf (f, "0.5"); + else +- return false; ++ { ++ const int buf_size = 20; ++ char float_buf[buf_size] = {'\0'}; ++ real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, ++ 1, GET_MODE (elt)); ++ asm_fprintf (f, "%s", float_buf); ++ } + + return true; + } +@@ -7560,7 +9193,13 @@ sizetochar (int size) + 'D': Take the duplicated element in a vector constant + and print it as an unsigned integer, in decimal. + 'e': Print the sign/zero-extend size as a character 8->b, +- 16->h, 32->w. ++ 16->h, 32->w. Can also be used for masks: ++ 0xff->b, 0xffff->h, 0xffffffff->w. ++ 'I': If the operand is a duplicated vector constant, ++ replace it with the duplicated scalar. If the ++ operand is then a floating-point constant, replace ++ it with the integer bit representation. Print the ++ transformed constant as a signed decimal number. + 'p': Prints N such that 2^N == X (X must be power of 2 and + const int). + 'P': Print the number of non-zero bits in X (a const_int). +@@ -7574,7 +9213,7 @@ sizetochar (int size) + 'S/T/U/V': Print a FP/SIMD register name for a register list. + The register printed is the FP/SIMD register name + of X + 0/1/2/3 for S/T/U/V. +- 'R': Print a scalar FP/SIMD register name + 1. ++ 'R': Print a scalar Integer/FP/SIMD register name + 1. + 'X': Print bottom 16 bits of integer constant in hex. + 'w/x': Print a general register name or the zero register + (32-bit or 64-bit). +@@ -7626,27 +9265,22 @@ aarch64_print_operand (FILE *f, rtx x, int code) + + case 'e': + { +- int n; +- +- if (!CONST_INT_P (x) +- || (n = exact_log2 (INTVAL (x) & ~7)) <= 0) ++ x = unwrap_const_vec_duplicate (x); ++ if (!CONST_INT_P (x)) + { + output_operand_lossage ("invalid operand for '%%%c'", code); + return; + } + +- switch (n) ++ HOST_WIDE_INT val = INTVAL (x); ++ if ((val & ~7) == 8 || val == 0xff) ++ fputc ('b', f); ++ else if ((val & ~7) == 16 || val == 0xffff) ++ fputc ('h', f); ++ else if ((val & ~7) == 32 || val == 0xffffffff) ++ fputc ('w', f); ++ else + { +- case 3: +- fputc ('b', f); +- break; +- case 4: +- fputc ('h', f); +- break; +- case 5: +- fputc ('w', f); +- break; +- default: + output_operand_lossage ("invalid operand for '%%%c'", code); + return; + } +@@ -7693,6 +9327,19 @@ aarch64_print_operand (FILE *f, rtx x, int code) + asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]); + break; + ++ case 'I': ++ { ++ x = aarch64_bit_representation (unwrap_const_vec_duplicate (x)); ++ if (CONST_INT_P (x)) ++ asm_fprintf (f, "%wd", INTVAL (x)); ++ else ++ { ++ output_operand_lossage ("invalid operand for '%%%c'", code); ++ return; ++ } ++ break; ++ } ++ + case 'M': + case 'm': + { +@@ -7715,7 +9362,10 @@ aarch64_print_operand (FILE *f, rtx x, int code) + gcc_assert (cond_code >= 0); + if (code == 'M') + cond_code = AARCH64_INVERSE_CONDITION_CODE (cond_code); +- fputs (aarch64_condition_codes[cond_code], f); ++ if (GET_MODE (XEXP (x, 0)) == CC_NZCmode) ++ fputs (aarch64_sve_condition_codes[cond_code], f); ++ else ++ fputs (aarch64_condition_codes[cond_code], f); + } + break; + +@@ -7766,12 +9416,13 @@ aarch64_print_operand (FILE *f, rtx x, int code) + break; + + case 'R': +- if (!REG_P (x) || !FP_REGNUM_P (REGNO (x))) +- { +- output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code); +- return; +- } +- asm_fprintf (f, "q%d", REGNO (x) - V0_REGNUM + 1); ++ if (REG_P (x) && FP_REGNUM_P (REGNO (x))) ++ asm_fprintf (f, "q%d", REGNO (x) - V0_REGNUM + 1); ++ else if (REG_P (x) && GP_REGNUM_P (REGNO (x))) ++ asm_fprintf (f, "x%d", REGNO (x) - R0_REGNUM + 1); ++ else ++ output_operand_lossage ("incompatible register operand for '%%%c'", ++ code); + break; + + case 'X': +@@ -8068,7 +9719,7 @@ aarch64_print_address_internal (FILE *f, machine_mode mode, rtx x, + aarch64_addr_query_type type) + { + struct aarch64_address_info addr; +- unsigned int size; ++ unsigned int size, vec_flags; + + /* Check all addresses are Pmode - including ILP32. */ + if (GET_MODE (x) != Pmode +@@ -8084,26 +9735,24 @@ aarch64_print_address_internal (FILE *f, machine_mode mode, rtx x, + { + case ADDRESS_REG_IMM: + if (known_eq (addr.const_offset, 0)) +- asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]); +- else if (aarch64_sve_data_mode_p (mode)) + { +- HOST_WIDE_INT vnum +- = exact_div (addr.const_offset, +- BYTES_PER_SVE_VECTOR).to_constant (); +- asm_fprintf (f, "[%s, #%wd, mul vl]", +- reg_names[REGNO (addr.base)], vnum); ++ asm_fprintf (f, "[%s]", reg_names[REGNO (addr.base)]); ++ return true; + } +- else if (aarch64_sve_pred_mode_p (mode)) ++ ++ vec_flags = aarch64_classify_vector_mode (mode); ++ if (vec_flags & VEC_ANY_SVE) + { + HOST_WIDE_INT vnum + = exact_div (addr.const_offset, +- BYTES_PER_SVE_PRED).to_constant (); ++ aarch64_vl_bytes (mode, vec_flags)).to_constant (); + asm_fprintf (f, "[%s, #%wd, mul vl]", + reg_names[REGNO (addr.base)], vnum); ++ return true; + } +- else +- asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)], +- INTVAL (addr.offset)); ++ ++ asm_fprintf (f, "[%s, %wd]", reg_names[REGNO (addr.base)], ++ INTVAL (addr.offset)); + return true; + + case ADDRESS_REG_REG: +@@ -8234,11 +9883,15 @@ aarch64_regno_regclass (unsigned regno) + return POINTER_REGS; + + if (FP_REGNUM_P (regno)) +- return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS; ++ return (FP_LO8_REGNUM_P (regno) ? FP_LO8_REGS ++ : FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS); + + if (PR_REGNUM_P (regno)) + return PR_LO_REGNUM_P (regno) ? PR_LO_REGS : PR_HI_REGS; + ++ if (regno == FFR_REGNUM || regno == FFRT_REGNUM) ++ return FFR_REGS; ++ + return NO_REGS; + } + +@@ -8348,13 +10001,14 @@ aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x, + secondary_reload_info *sri) + { + /* Use aarch64_sve_reload_be for SVE reloads that cannot be handled +- directly by the *aarch64_sve_mov_be move pattern. See the ++ directly by the *aarch64_sve_mov_[lb]e move patterns. See the + comment at the head of aarch64-sve.md for more details about the + big-endian handling. */ + if (BYTES_BIG_ENDIAN + && reg_class_subset_p (rclass, FP_REGS) + && !((REG_P (x) && HARD_REGISTER_P (x)) + || aarch64_simd_valid_immediate (x, NULL)) ++ && mode != VNx16QImode + && aarch64_sve_data_mode_p (mode)) + { + sri->icode = CODE_FOR_aarch64_sve_reload_be; +@@ -8514,7 +10168,7 @@ aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode) + can hold MODE, but at the moment we need to handle all modes. + Just ignore any runtime parts for registers that can't store them. */ + HOST_WIDE_INT lowest_size = constant_lower_bound (GET_MODE_SIZE (mode)); +- unsigned int nregs; ++ unsigned int nregs, vec_flags; + switch (regclass) + { + case TAILCALL_ADDR_REGS: +@@ -8524,17 +10178,21 @@ aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode) + case POINTER_AND_FP_REGS: + case FP_REGS: + case FP_LO_REGS: +- if (aarch64_sve_data_mode_p (mode) ++ case FP_LO8_REGS: ++ vec_flags = aarch64_classify_vector_mode (mode); ++ if ((vec_flags & VEC_SVE_DATA) + && constant_multiple_p (GET_MODE_SIZE (mode), +- BYTES_PER_SVE_VECTOR, &nregs)) ++ aarch64_vl_bytes (mode, vec_flags), &nregs)) + return nregs; +- return (aarch64_vector_data_mode_p (mode) ++ return (vec_flags & VEC_ADVSIMD + ? CEIL (lowest_size, UNITS_PER_VREG) + : CEIL (lowest_size, UNITS_PER_WORD)); + case STACK_REG: + case PR_REGS: + case PR_LO_REGS: + case PR_HI_REGS: ++ case FFR_REGS: ++ case PR_AND_FFR_REGS: + return 1; + + case NO_REGS: +@@ -10715,6 +12373,14 @@ aarch64_register_move_cost (machine_mode mode, + if (from == TAILCALL_ADDR_REGS || from == POINTER_REGS) + from = GENERAL_REGS; + ++ /* Make RDFFR very expensive. In particular, if we know that the FFR ++ contains a PTRUE (e.g. after a SETFFR), we must never use RDFFR ++ as a way of obtaining a PTRUE. */ ++ if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL ++ && hard_reg_set_subset_p (reg_class_contents[from_i], ++ reg_class_contents[FFR_REGS])) ++ return 80; ++ + /* Moving between GPR and stack cost is the same as GP2GP. */ + if ((from == GENERAL_REGS && to == STACK_REG) + || (to == GENERAL_REGS && from == STACK_REG)) +@@ -10764,6 +12430,93 @@ aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED, + return aarch64_tune_params.memmov_cost; + } + ++/* Implement TARGET_INIT_BUILTINS. */ ++static void ++aarch64_init_builtins () ++{ ++ aarch64_general_init_builtins (); ++ aarch64_sve::init_builtins (); ++} ++ ++/* Implement TARGET_FOLD_BUILTIN. */ ++static tree ++aarch64_fold_builtin (tree fndecl, int nargs, tree *args, bool) ++{ ++ unsigned int code = DECL_MD_FUNCTION_CODE (fndecl); ++ unsigned int subcode = code >> AARCH64_BUILTIN_SHIFT; ++ tree type = TREE_TYPE (TREE_TYPE (fndecl)); ++ switch (code & AARCH64_BUILTIN_CLASS) ++ { ++ case AARCH64_BUILTIN_GENERAL: ++ return aarch64_general_fold_builtin (subcode, type, nargs, args); ++ ++ case AARCH64_BUILTIN_SVE: ++ return NULL_TREE; ++ } ++ gcc_unreachable (); ++} ++ ++/* Implement TARGET_GIMPLE_FOLD_BUILTIN. */ ++static bool ++aarch64_gimple_fold_builtin (gimple_stmt_iterator *gsi) ++{ ++ gcall *stmt = as_a (gsi_stmt (*gsi)); ++ tree fndecl = gimple_call_fndecl (stmt); ++ unsigned int code = DECL_MD_FUNCTION_CODE (fndecl); ++ unsigned int subcode = code >> AARCH64_BUILTIN_SHIFT; ++ gimple *new_stmt = NULL; ++ switch (code & AARCH64_BUILTIN_CLASS) ++ { ++ case AARCH64_BUILTIN_GENERAL: ++ new_stmt = aarch64_general_gimple_fold_builtin (subcode, stmt); ++ break; ++ ++ case AARCH64_BUILTIN_SVE: ++ new_stmt = aarch64_sve::gimple_fold_builtin (subcode, gsi, stmt); ++ break; ++ } ++ ++ if (!new_stmt) ++ return false; ++ ++ gsi_replace (gsi, new_stmt, true); ++ return true; ++} ++ ++/* Implement TARGET_EXPAND_BUILTIN. */ ++static rtx ++aarch64_expand_builtin (tree exp, rtx target, rtx, machine_mode, int ignore) ++{ ++ tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); ++ unsigned int code = DECL_MD_FUNCTION_CODE (fndecl); ++ unsigned int subcode = code >> AARCH64_BUILTIN_SHIFT; ++ switch (code & AARCH64_BUILTIN_CLASS) ++ { ++ case AARCH64_BUILTIN_GENERAL: ++ return aarch64_general_expand_builtin (subcode, exp, target, ignore); ++ ++ case AARCH64_BUILTIN_SVE: ++ return aarch64_sve::expand_builtin (subcode, exp, target); ++ } ++ gcc_unreachable (); ++} ++ ++/* Implement TARGET_BUILTIN_DECL. */ ++static tree ++aarch64_builtin_decl (unsigned int code, bool initialize_p) ++{ ++ unsigned int subcode = code >> AARCH64_BUILTIN_SHIFT; ++ switch (code & AARCH64_BUILTIN_CLASS) ++ { ++ case AARCH64_BUILTIN_GENERAL: ++ return aarch64_general_builtin_decl (subcode, initialize_p); ++ ++ case AARCH64_BUILTIN_SVE: ++ return aarch64_sve::builtin_decl (subcode, initialize_p); ++ } ++ gcc_unreachable (); ++} ++ + /* Return true if it is safe and beneficial to use the approximate rsqrt optabs + to optimize 1.0/sqrt. */ + +@@ -10787,7 +12540,17 @@ aarch64_builtin_reciprocal (tree fndecl) + + if (!use_rsqrt_p (mode)) + return NULL_TREE; +- return aarch64_builtin_rsqrt (DECL_FUNCTION_CODE (fndecl)); ++ unsigned int code = DECL_MD_FUNCTION_CODE (fndecl); ++ unsigned int subcode = code >> AARCH64_BUILTIN_SHIFT; ++ switch (code & AARCH64_BUILTIN_CLASS) ++ { ++ case AARCH64_BUILTIN_GENERAL: ++ return aarch64_general_builtin_rsqrt (subcode); ++ ++ case AARCH64_BUILTIN_SVE: ++ return NULL_TREE; ++ } ++ gcc_unreachable (); + } + + /* Emit instruction sequence to compute either the approximate square root +@@ -11096,7 +12859,7 @@ static void initialize_aarch64_code_model (struct gcc_options *); + + static enum aarch64_parse_opt_result + aarch64_parse_arch (const char *to_parse, const struct processor **res, +- unsigned long *isa_flags, std::string *invalid_extension) ++ uint64_t *isa_flags, std::string *invalid_extension) + { + const char *ext; + const struct processor *arch; +@@ -11119,7 +12882,7 @@ aarch64_parse_arch (const char *to_parse, const struct processor **res, + if (strlen (arch->name) == len + && strncmp (arch->name, to_parse, len) == 0) + { +- unsigned long isa_temp = arch->flags; ++ uint64_t isa_temp = arch->flags; + + if (ext != NULL) + { +@@ -11151,7 +12914,7 @@ aarch64_parse_arch (const char *to_parse, const struct processor **res, + + static enum aarch64_parse_opt_result + aarch64_parse_cpu (const char *to_parse, const struct processor **res, +- unsigned long *isa_flags, std::string *invalid_extension) ++ uint64_t *isa_flags, std::string *invalid_extension) + { + const char *ext; + const struct processor *cpu; +@@ -11173,7 +12936,7 @@ aarch64_parse_cpu (const char *to_parse, const struct processor **res, + { + if (strlen (cpu->name) == len && strncmp (cpu->name, to_parse, len) == 0) + { +- unsigned long isa_temp = cpu->flags; ++ uint64_t isa_temp = cpu->flags; + + + if (ext != NULL) +@@ -11758,7 +13521,7 @@ aarch64_print_hint_for_extensions (const std::string &str) + + static bool + aarch64_validate_mcpu (const char *str, const struct processor **res, +- unsigned long *isa_flags) ++ uint64_t *isa_flags) + { + std::string invalid_extension; + enum aarch64_parse_opt_result parse_res +@@ -11885,9 +13648,9 @@ aarch64_validate_mbranch_protection (const char *const_str) + enum aarch64_parse_opt_result res = + aarch64_parse_branch_protection (const_str, &str); + if (res == AARCH64_PARSE_INVALID_ARG) +- error ("invalid arg %<%s%> for %<-mbranch-protection=%>", str); ++ error ("invalid argument %<%s%> for %<-mbranch-protection=%>", str); + else if (res == AARCH64_PARSE_MISSING_ARG) +- error ("missing arg for %<-mbranch-protection=%>"); ++ error ("missing argument for %<-mbranch-protection=%>"); + free (str); + return res == AARCH64_PARSE_OK; + } +@@ -11899,7 +13662,7 @@ aarch64_validate_mbranch_protection (const char *const_str) + + static bool + aarch64_validate_march (const char *str, const struct processor **res, +- unsigned long *isa_flags) ++ uint64_t *isa_flags) + { + std::string invalid_extension; + enum aarch64_parse_opt_result parse_res +@@ -12014,8 +13777,8 @@ aarch64_convert_sve_vector_bits (aarch64_sve_vector_bits_enum value) + static void + aarch64_override_options (void) + { +- unsigned long cpu_isa = 0; +- unsigned long arch_isa = 0; ++ uint64_t cpu_isa = 0; ++ uint64_t arch_isa = 0; + aarch64_isa_flags = 0; + + bool valid_cpu = true; +@@ -12255,7 +14018,7 @@ aarch64_option_print (FILE *file, int indent, struct cl_target_option *ptr) + { + const struct processor *cpu + = aarch64_get_tune_cpu (ptr->x_explicit_tune_core); +- unsigned long isa_flags = ptr->x_aarch64_isa_flags; ++ uint64_t isa_flags = ptr->x_aarch64_isa_flags; + const struct processor *arch = aarch64_get_arch (ptr->x_explicit_arch); + std::string extension + = aarch64_get_extension_string_for_isa_flags (isa_flags, arch->flags); +@@ -12508,7 +14271,7 @@ static bool + aarch64_handle_attr_isa_flags (char *str) + { + enum aarch64_parse_opt_result parse_res; +- unsigned long isa_flags = aarch64_isa_flags; ++ uint64_t isa_flags = aarch64_isa_flags; + + /* We allow "+nothing" in the beginning to clear out all architectural + features if the user wants to handpick specific features. */ +@@ -12999,6 +14762,26 @@ aarch64_can_inline_p (tree caller, tree callee) + return true; + } + ++/* Return the ID of the TLDESC ABI, initializing the descriptor if hasn't ++ been already. */ ++ ++unsigned int ++aarch64_tlsdesc_abi_id () ++{ ++ predefined_function_abi &tlsdesc_abi = function_abis[ARM_PCS_TLSDESC]; ++ if (!tlsdesc_abi.initialized_p ()) ++ { ++ HARD_REG_SET full_reg_clobbers; ++ CLEAR_HARD_REG_SET (full_reg_clobbers); ++ SET_HARD_REG_BIT (full_reg_clobbers, R0_REGNUM); ++ SET_HARD_REG_BIT (full_reg_clobbers, CC_REGNUM); ++ for (int regno = P0_REGNUM; regno <= P15_REGNUM; ++regno) ++ SET_HARD_REG_BIT (full_reg_clobbers, regno); ++ tlsdesc_abi.initialize (ARM_PCS_TLSDESC, full_reg_clobbers); ++ } ++ return tlsdesc_abi.id (); ++} ++ + /* Return true if SYMBOL_REF X binds locally. */ + + static bool +@@ -13104,26 +14887,31 @@ aarch64_classify_symbol (rtx x, HOST_WIDE_INT offset) + the offset does not cause overflow of the final address. But + we have no way of knowing the address of symbol at compile time + so we can't accurately say if the distance between the PC and +- symbol + offset is outside the addressible range of +/-1M in the +- TINY code model. So we rely on images not being greater than +- 1M and cap the offset at 1M and anything beyond 1M will have to +- be loaded using an alternative mechanism. Furthermore if the +- symbol is a weak reference to something that isn't known to +- resolve to a symbol in this module, then force to memory. */ +- if ((SYMBOL_REF_WEAK (x) +- && !aarch64_symbol_binds_local_p (x)) +- || !IN_RANGE (offset, -1048575, 1048575)) ++ symbol + offset is outside the addressible range of +/-1MB in the ++ TINY code model. So we limit the maximum offset to +/-64KB and ++ assume the offset to the symbol is not larger than +/-(1MB - 64KB). ++ If offset_within_block_p is true we allow larger offsets. ++ Furthermore force to memory if the symbol is a weak reference to ++ something that doesn't resolve to a symbol in this module. */ ++ ++ if (SYMBOL_REF_WEAK (x) && !aarch64_symbol_binds_local_p (x)) ++ return SYMBOL_FORCE_TO_MEM; ++ if (!(IN_RANGE (offset, -0x10000, 0x10000) ++ || offset_within_block_p (x, offset))) + return SYMBOL_FORCE_TO_MEM; ++ + return SYMBOL_TINY_ABSOLUTE; + + case AARCH64_CMODEL_SMALL: + /* Same reasoning as the tiny code model, but the offset cap here is +- 4G. */ +- if ((SYMBOL_REF_WEAK (x) +- && !aarch64_symbol_binds_local_p (x)) +- || !IN_RANGE (offset, HOST_WIDE_INT_C (-4294967263), +- HOST_WIDE_INT_C (4294967264))) ++ 1MB, allowing +/-3.9GB for the offset to the symbol. */ ++ ++ if (SYMBOL_REF_WEAK (x) && !aarch64_symbol_binds_local_p (x)) + return SYMBOL_FORCE_TO_MEM; ++ if (!(IN_RANGE (offset, -0x100000, 0x100000) ++ || offset_within_block_p (x, offset))) ++ return SYMBOL_FORCE_TO_MEM; ++ + return SYMBOL_SMALL_ABSOLUTE; + + case AARCH64_CMODEL_TINY_PIC: +@@ -13432,7 +15220,7 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, + HOST_WIDE_INT size, rsize, adjust, align; + tree t, u, cond1, cond2; + +- indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false); ++ indirect_p = pass_va_arg_by_reference (type); + if (indirect_p) + type = build_pointer_type (type); + +@@ -13626,6 +15414,10 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, + field_t = aarch64_fp16_type_node; + field_ptr_t = aarch64_fp16_ptr_type_node; + break; ++ case E_BFmode: ++ field_t = aarch64_bf16_type_node; ++ field_ptr_t = aarch64_bf16_ptr_type_node; ++ break; + case E_V2SImode: + case E_V4SImode: + { +@@ -13677,9 +15469,9 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, + /* Implement TARGET_SETUP_INCOMING_VARARGS. */ + + static void +-aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode, +- tree type, int *pretend_size ATTRIBUTE_UNUSED, +- int no_rtl) ++aarch64_setup_incoming_varargs (cumulative_args_t cum_v, ++ const function_arg_info &arg, ++ int *pretend_size ATTRIBUTE_UNUSED, int no_rtl) + { + CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); + CUMULATIVE_ARGS local_cum; +@@ -13690,7 +15482,7 @@ aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode, + argument. Advance a local copy of CUM past the last "real" named + argument, to find out how many registers are left over. */ + local_cum = *cum; +- aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true); ++ aarch64_function_arg_advance (pack_cumulative_args(&local_cum), arg); + + /* Found out how many registers we need to save. + Honor tree-stdvar analysis results. */ +@@ -13777,6 +15569,10 @@ aarch64_conditional_register_usage (void) + call_used_regs[i] = 1; + } + ++ /* Only allow the FFR and FFRT to be accessed via special patterns. */ ++ CLEAR_HARD_REG_BIT (operand_reg_set, FFR_REGNUM); ++ CLEAR_HARD_REG_BIT (operand_reg_set, FFRT_REGNUM); ++ + /* When tracking speculation, we need a couple of call-clobbered registers + to track the speculation state. It would be nice to just use + IP0 and IP1, but currently there are numerous places that just +@@ -13802,6 +15598,10 @@ aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep) + machine_mode mode; + HOST_WIDE_INT size; + ++ /* SVE types (and types containing SVE types) must be handled ++ before calling this function. */ ++ gcc_assert (!aarch64_sve::builtin_type_p (type)); ++ + switch (TREE_CODE (type)) + { + case REAL_TYPE: +@@ -13973,6 +15773,9 @@ aarch64_short_vector_p (const_tree type, + { + poly_int64 size = -1; + ++ if (type && aarch64_sve::builtin_type_p (type)) ++ return false; ++ + if (type && TREE_CODE (type) == VECTOR_TYPE) + size = int_size_in_bytes (type); + else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT +@@ -14033,11 +15836,14 @@ aarch64_vfp_is_call_or_return_candidate (machine_mode mode, + int *count, + bool *is_ha) + { ++ if (is_ha != NULL) *is_ha = false; ++ ++ if (type && aarch64_sve::builtin_type_p (type)) ++ return false; ++ + machine_mode new_mode = VOIDmode; + bool composite_p = aarch64_composite_type_p (type, mode); + +- if (is_ha != NULL) *is_ha = false; +- + if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT) + || aarch64_short_vector_p (type, mode)) + { +@@ -14083,7 +15889,63 @@ static bool + aarch64_vector_mode_supported_p (machine_mode mode) + { + unsigned int vec_flags = aarch64_classify_vector_mode (mode); +- return vec_flags != 0 && (vec_flags & VEC_STRUCT) == 0; ++ return vec_flags != 0 && (vec_flags & (VEC_STRUCT | VEC_PARTIAL)) == 0; ++} ++ ++/* Return the full-width SVE vector mode for element mode MODE, if one ++ exists. */ ++opt_machine_mode ++aarch64_full_sve_mode (scalar_mode mode) ++{ ++ switch (mode) ++ { ++ case E_DFmode: ++ return VNx2DFmode; ++ case E_SFmode: ++ return VNx4SFmode; ++ case E_HFmode: ++ return VNx8HFmode; ++ case E_BFmode: ++ return VNx8BFmode; ++ case E_DImode: ++ return VNx2DImode; ++ case E_SImode: ++ return VNx4SImode; ++ case E_HImode: ++ return VNx8HImode; ++ case E_QImode: ++ return VNx16QImode; ++ default: ++ return opt_machine_mode (); ++ } ++} ++ ++/* Return the 128-bit Advanced SIMD vector mode for element mode MODE, ++ if it exists. */ ++opt_machine_mode ++aarch64_vq_mode (scalar_mode mode) ++{ ++ switch (mode) ++ { ++ case E_DFmode: ++ return V2DFmode; ++ case E_SFmode: ++ return V4SFmode; ++ case E_HFmode: ++ return V8HFmode; ++ case E_BFmode: ++ return V8BFmode; ++ case E_SImode: ++ return V4SImode; ++ case E_HImode: ++ return V8HImode; ++ case E_QImode: ++ return V16QImode; ++ case E_DImode: ++ return V2DImode; ++ default: ++ return opt_machine_mode (); ++ } + } + + /* Return appropriate SIMD container +@@ -14092,49 +15954,13 @@ static machine_mode + aarch64_simd_container_mode (scalar_mode mode, poly_int64 width) + { + if (TARGET_SVE && known_eq (width, BITS_PER_SVE_VECTOR)) +- switch (mode) +- { +- case E_DFmode: +- return VNx2DFmode; +- case E_SFmode: +- return VNx4SFmode; +- case E_HFmode: +- return VNx8HFmode; +- case E_DImode: +- return VNx2DImode; +- case E_SImode: +- return VNx4SImode; +- case E_HImode: +- return VNx8HImode; +- case E_QImode: +- return VNx16QImode; +- default: +- return word_mode; +- } ++ return aarch64_full_sve_mode (mode).else_mode (word_mode); + + gcc_assert (known_eq (width, 64) || known_eq (width, 128)); + if (TARGET_SIMD) + { + if (known_eq (width, 128)) +- switch (mode) +- { +- case E_DFmode: +- return V2DFmode; +- case E_SFmode: +- return V4SFmode; +- case E_HFmode: +- return V8HFmode; +- case E_SImode: +- return V4SImode; +- case E_HImode: +- return V8HImode; +- case E_QImode: +- return V16QImode; +- case E_DImode: +- return V2DImode; +- default: +- break; +- } ++ return aarch64_vq_mode (mode).else_mode (word_mode); + else + switch (mode) + { +@@ -14142,6 +15968,8 @@ aarch64_simd_container_mode (scalar_mode mode, poly_int64 width) + return V2SFmode; + case E_HFmode: + return V4HFmode; ++ case E_BFmode: ++ return V4BFmode; + case E_SImode: + return V2SImode; + case E_HImode: +@@ -14205,14 +16033,24 @@ aarch64_mangle_type (const_tree type) + if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type)) + return "St9__va_list"; + +- /* Half-precision float. */ ++ /* Half-precision floating point types. */ + if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16) +- return "Dh"; ++ { ++ if (TYPE_MODE (type) == BFmode) ++ return "u6__bf16"; ++ else ++ return "Dh"; ++ } + + /* Mangle AArch64-specific internal types. TYPE_NAME is non-NULL_TREE for + builtin types. */ + if (TYPE_NAME (type) != NULL) +- return aarch64_mangle_builtin_type (type); ++ { ++ const char *res; ++ if ((res = aarch64_general_mangle_builtin_type (type)) ++ || (res = aarch64_sve::mangle_builtin_type (type))) ++ return res; ++ } + + /* Use the default mangling. */ + return NULL; +@@ -14370,6 +16208,27 @@ aarch64_sve_arith_immediate_p (rtx x, bool negate_p) + return IN_RANGE (val, 0, 0xff00); + } + ++/* Return true if X is a valid immediate for the SVE SQADD and SQSUB ++ instructions. Negate X first if NEGATE_P is true. */ ++ ++bool ++aarch64_sve_sqadd_sqsub_immediate_p (rtx x, bool negate_p) ++{ ++ rtx elt; ++ ++ if (!const_vec_duplicate_p (x, &elt) ++ || !CONST_INT_P (elt)) ++ return false; ++ ++ if (!aarch64_sve_arith_immediate_p (x, negate_p)) ++ return false; ++ ++ /* After the optional negation, the immediate must be nonnegative. ++ E.g. a saturating add of -127 must be done via SQSUB Zn.B, Zn.B, #127 ++ instead of SQADD Zn.B, Zn.B, #129. */ ++ return negate_p == (INTVAL (elt) < 0); ++} ++ + /* Return true if X is a valid immediate operand for an SVE logical + instruction such as AND. */ + +@@ -14390,13 +16249,11 @@ aarch64_sve_bitmask_immediate_p (rtx x) + bool + aarch64_sve_dup_immediate_p (rtx x) + { +- rtx elt; +- +- if (!const_vec_duplicate_p (x, &elt) +- || !CONST_INT_P (elt)) ++ x = aarch64_bit_representation (unwrap_const_vec_duplicate (x)); ++ if (!CONST_INT_P (x)) + return false; + +- HOST_WIDE_INT val = INTVAL (elt); ++ HOST_WIDE_INT val = INTVAL (x); + if (val & 0xff) + return IN_RANGE (val, -0x80, 0x7f); + return IN_RANGE (val, -0x8000, 0x7f00); +@@ -14408,13 +16265,11 @@ aarch64_sve_dup_immediate_p (rtx x) + bool + aarch64_sve_cmp_immediate_p (rtx x, bool signed_p) + { +- rtx elt; +- +- return (const_vec_duplicate_p (x, &elt) +- && CONST_INT_P (elt) ++ x = unwrap_const_vec_duplicate (x); ++ return (CONST_INT_P (x) + && (signed_p +- ? IN_RANGE (INTVAL (elt), -16, 15) +- : IN_RANGE (INTVAL (elt), 0, 127))); ++ ? IN_RANGE (INTVAL (x), -16, 15) ++ : IN_RANGE (INTVAL (x), 0, 127))); + } + + /* Return true if X is a valid immediate operand for an SVE FADD or FSUB +@@ -14450,11 +16305,10 @@ aarch64_sve_float_mul_immediate_p (rtx x) + { + rtx elt; + +- /* GCC will never generate a multiply with an immediate of 2, so there is no +- point testing for it (even though it is a valid constant). */ + return (const_vec_duplicate_p (x, &elt) + && GET_CODE (elt) == CONST_DOUBLE +- && real_equal (CONST_DOUBLE_REAL_VALUE (elt), &dconsthalf)); ++ && (real_equal (CONST_DOUBLE_REAL_VALUE (elt), &dconsthalf) ++ || real_equal (CONST_DOUBLE_REAL_VALUE (elt), &dconst2))); + } + + /* Return true if replicating VAL32 is a valid 2-byte or 4-byte immediate +@@ -14607,6 +16461,77 @@ aarch64_sve_valid_immediate (unsigned HOST_WIDE_INT val64, + return false; + } + ++/* Return true if X is an UNSPEC_PTRUE constant of the form: ++ ++ (const (unspec [PATTERN ZERO] UNSPEC_PTRUE)) ++ ++ where PATTERN is the svpattern as a CONST_INT and where ZERO ++ is a zero constant of the required PTRUE mode (which can have ++ fewer elements than X's mode, if zero bits are significant). ++ ++ If so, and if INFO is nonnull, describe the immediate in INFO. */ ++bool ++aarch64_sve_ptrue_svpattern_p (rtx x, struct simd_immediate_info *info) ++{ ++ if (GET_CODE (x) != CONST) ++ return false; ++ ++ x = XEXP (x, 0); ++ if (GET_CODE (x) != UNSPEC || XINT (x, 1) != UNSPEC_PTRUE) ++ return false; ++ ++ if (info) ++ { ++ aarch64_svpattern pattern ++ = (aarch64_svpattern) INTVAL (XVECEXP (x, 0, 0)); ++ machine_mode pred_mode = GET_MODE (XVECEXP (x, 0, 1)); ++ scalar_int_mode int_mode = aarch64_sve_element_int_mode (pred_mode); ++ *info = simd_immediate_info (int_mode, pattern); ++ } ++ return true; ++} ++ ++/* Return true if X is a valid SVE predicate. If INFO is nonnull, use ++ it to describe valid immediates. */ ++ ++static bool ++aarch64_sve_pred_valid_immediate (rtx x, simd_immediate_info *info) ++{ ++ if (aarch64_sve_ptrue_svpattern_p (x, info)) ++ return true; ++ ++ if (x == CONST0_RTX (GET_MODE (x))) ++ { ++ if (info) ++ *info = simd_immediate_info (DImode, 0); ++ return true; ++ } ++ ++ /* Analyze the value as a VNx16BImode. This should be relatively ++ efficient, since rtx_vector_builder has enough built-in capacity ++ to store all VLA predicate constants without needing the heap. */ ++ rtx_vector_builder builder; ++ if (!aarch64_get_sve_pred_bits (builder, x)) ++ return false; ++ ++ unsigned int elt_size = aarch64_widest_sve_pred_elt_size (builder); ++ if (int vl = aarch64_partial_ptrue_length (builder, elt_size)) ++ { ++ machine_mode mode = aarch64_sve_pred_mode (elt_size).require (); ++ aarch64_svpattern pattern = aarch64_svpattern_for_vl (mode, vl); ++ if (pattern != AARCH64_NUM_SVPATTERNS) ++ { ++ if (info) ++ { ++ scalar_int_mode int_mode = aarch64_sve_element_int_mode (mode); ++ *info = simd_immediate_info (int_mode, pattern); ++ } ++ return true; ++ } ++ } ++ return false; ++} ++ + /* Return true if OP is a valid SIMD immediate for the operation + described by WHICH. If INFO is nonnull, use it to describe valid + immediates. */ +@@ -14619,6 +16544,9 @@ aarch64_simd_valid_immediate (rtx op, simd_immediate_info *info, + if (vec_flags == 0 || vec_flags == (VEC_ADVSIMD | VEC_STRUCT)) + return false; + ++ if (vec_flags & VEC_SVE_PRED) ++ return aarch64_sve_pred_valid_immediate (op, info); ++ + scalar_mode elt_mode = GET_MODE_INNER (mode); + rtx base, step; + unsigned int n_elts; +@@ -14643,11 +16571,6 @@ aarch64_simd_valid_immediate (rtx op, simd_immediate_info *info, + else + return false; + +- /* Handle PFALSE and PTRUE. */ +- if (vec_flags & VEC_SVE_PRED) +- return (op == CONST0_RTX (mode) +- || op == CONSTM1_RTX (mode)); +- + scalar_float_mode elt_float_mode; + if (n_elts == 1 + && is_a (elt_mode, &elt_float_mode)) +@@ -14731,11 +16654,14 @@ aarch64_check_zero_based_sve_index_immediate (rtx x) + bool + aarch64_simd_shift_imm_p (rtx x, machine_mode mode, bool left) + { ++ x = unwrap_const_vec_duplicate (x); ++ if (!CONST_INT_P (x)) ++ return false; + int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT; + if (left) +- return aarch64_const_vec_all_same_in_range_p (x, 0, bit_width - 1); ++ return IN_RANGE (INTVAL (x), 0, bit_width - 1); + else +- return aarch64_const_vec_all_same_in_range_p (x, 1, bit_width); ++ return IN_RANGE (INTVAL (x), 1, bit_width); + } + + /* Return the bitmask CONST_INT to select the bits required by a zero extract +@@ -14763,7 +16689,17 @@ aarch64_mov_operand_p (rtx x, machine_mode mode) + return true; + + if (VECTOR_MODE_P (GET_MODE (x))) +- return aarch64_simd_valid_immediate (x, NULL); ++ { ++ /* Require predicate constants to be VNx16BI before RA, so that we ++ force everything to have a canonical form. */ ++ if (!lra_in_progress ++ && !reload_completed ++ && GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_BOOL ++ && GET_MODE (x) != VNx16BImode) ++ return false; ++ ++ return aarch64_simd_valid_immediate (x, NULL); ++ } + + if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x)) + return true; +@@ -14953,6 +16889,72 @@ aarch64_sve_ld1r_operand_p (rtx op) + && offset_6bit_unsigned_scaled_p (mode, addr.const_offset)); + } + ++/* Return true if OP is a valid MEM operand for an SVE LD1R{Q,O} instruction ++ where the size of the read data is specified by `mode` and the size of the ++ vector elements are specified by `elem_mode`. */ ++bool ++aarch64_sve_ld1rq_ld1ro_operand_p (rtx op, machine_mode mode, ++ scalar_mode elem_mode) ++{ ++ struct aarch64_address_info addr; ++ if (!MEM_P (op) ++ || !aarch64_classify_address (&addr, XEXP (op, 0), elem_mode, false)) ++ return false; ++ ++ if (addr.type == ADDRESS_REG_IMM) ++ return offset_4bit_signed_scaled_p (mode, addr.const_offset); ++ ++ if (addr.type == ADDRESS_REG_REG) ++ return (1U << addr.shift) == GET_MODE_SIZE (elem_mode); ++ ++ return false; ++} ++ ++/* Return true if OP is a valid MEM operand for an SVE LD1RQ instruction. */ ++bool ++aarch64_sve_ld1rq_operand_p (rtx op) ++{ ++ return aarch64_sve_ld1rq_ld1ro_operand_p (op, TImode, ++ GET_MODE_INNER (GET_MODE (op))); ++} ++ ++/* Return true if OP is a valid MEM operand for an SVE LD1RO instruction for ++ accessing a vector where the element size is specified by `elem_mode`. */ ++bool ++aarch64_sve_ld1ro_operand_p (rtx op, scalar_mode elem_mode) ++{ ++ return aarch64_sve_ld1rq_ld1ro_operand_p (op, OImode, elem_mode); ++} ++ ++/* Return true if OP is a valid MEM operand for an SVE LDFF1 instruction. */ ++bool ++aarch64_sve_ldff1_operand_p (rtx op) ++{ ++ if (!MEM_P (op)) ++ return false; ++ ++ struct aarch64_address_info addr; ++ if (!aarch64_classify_address (&addr, XEXP (op, 0), GET_MODE (op), false)) ++ return false; ++ ++ if (addr.type == ADDRESS_REG_IMM) ++ return known_eq (addr.const_offset, 0); ++ ++ return addr.type == ADDRESS_REG_REG; ++} ++ ++/* Return true if OP is a valid MEM operand for an SVE LDNF1 instruction. */ ++bool ++aarch64_sve_ldnf1_operand_p (rtx op) ++{ ++ struct aarch64_address_info addr; ++ ++ return (MEM_P (op) ++ && aarch64_classify_address (&addr, XEXP (op, 0), ++ GET_MODE (op), false) ++ && addr.type == ADDRESS_REG_IMM); ++} ++ + /* Return true if OP is a valid MEM operand for an SVE LDR instruction. + The conditions for STR are the same. */ + bool +@@ -14966,6 +16968,21 @@ aarch64_sve_ldr_operand_p (rtx op) + && addr.type == ADDRESS_REG_IMM); + } + ++/* Return true if OP is a valid address for an SVE PRF[BHWD] instruction, ++ addressing memory of mode MODE. */ ++bool ++aarch64_sve_prefetch_operand_p (rtx op, machine_mode mode) ++{ ++ struct aarch64_address_info addr; ++ if (!aarch64_classify_address (&addr, op, mode, false)) ++ return false; ++ ++ if (addr.type == ADDRESS_REG_IMM) ++ return known_eq (addr.const_offset, 0); ++ ++ return addr.type == ADDRESS_REG_REG; ++} ++ + /* Return true if OP is a valid MEM operand for an SVE_STRUCT mode. + We need to be able to access the individual pieces, so the range + is different from LD[234] and ST[234]. */ +@@ -15027,11 +17044,13 @@ aarch64_simd_attr_length_rglist (machine_mode mode) + static HOST_WIDE_INT + aarch64_simd_vector_alignment (const_tree type) + { ++ /* ??? Checking the mode isn't ideal, but VECTOR_BOOLEAN_TYPE_P can ++ be set for non-predicate vectors of booleans. Modes are the most ++ direct way we have of identifying real SVE predicate types. */ ++ if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_VECTOR_BOOL) ++ return 16; + if (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST) +- /* ??? Checking the mode isn't ideal, but VECTOR_BOOLEAN_TYPE_P can +- be set for non-predicate vectors of booleans. Modes are the most +- direct way we have of identifying real SVE predicate types. */ +- return GET_MODE_CLASS (TYPE_MODE (type)) == MODE_VECTOR_BOOL ? 16 : 128; ++ return 128; + return wi::umin (wi::to_wide (TYPE_SIZE (type)), 128).to_uhwi (); + } + +@@ -15361,34 +17380,383 @@ aarch64_expand_vector_init (rtx target, rtx vals) + (see aarch64_simd_valid_immediate). */ + for (int i = 0; i < n_elts; i++) + { +- rtx x = XVECEXP (vals, 0, i); +- if (CONST_INT_P (x) || CONST_DOUBLE_P (x)) +- continue; +- rtx subst = any_const; +- for (int bit = n_elts / 2; bit > 0; bit /= 2) +- { +- /* Look in the copied vector, as more elements are const. */ +- rtx test = XVECEXP (copy, 0, i ^ bit); +- if (CONST_INT_P (test) || CONST_DOUBLE_P (test)) +- { +- subst = test; +- break; +- } +- } +- XVECEXP (copy, 0, i) = subst; ++ rtx x = XVECEXP (vals, 0, i); ++ if (CONST_INT_P (x) || CONST_DOUBLE_P (x)) ++ continue; ++ rtx subst = any_const; ++ for (int bit = n_elts / 2; bit > 0; bit /= 2) ++ { ++ /* Look in the copied vector, as more elements are const. */ ++ rtx test = XVECEXP (copy, 0, i ^ bit); ++ if (CONST_INT_P (test) || CONST_DOUBLE_P (test)) ++ { ++ subst = test; ++ break; ++ } ++ } ++ XVECEXP (copy, 0, i) = subst; ++ } ++ aarch64_expand_vector_init (target, copy); ++ } ++ ++ /* Insert the variable lanes directly. */ ++ for (int i = 0; i < n_elts; i++) ++ { ++ rtx x = XVECEXP (vals, 0, i); ++ if (CONST_INT_P (x) || CONST_DOUBLE_P (x)) ++ continue; ++ x = copy_to_mode_reg (inner_mode, x); ++ emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i))); ++ } ++} ++ ++/* Emit RTL corresponding to: ++ insr TARGET, ELEM. */ ++ ++static void ++emit_insr (rtx target, rtx elem) ++{ ++ machine_mode mode = GET_MODE (target); ++ scalar_mode elem_mode = GET_MODE_INNER (mode); ++ elem = force_reg (elem_mode, elem); ++ ++ insn_code icode = optab_handler (vec_shl_insert_optab, mode); ++ gcc_assert (icode != CODE_FOR_nothing); ++ emit_insn (GEN_FCN (icode) (target, target, elem)); ++} ++ ++/* Subroutine of aarch64_sve_expand_vector_init for handling ++ trailing constants. ++ This function works as follows: ++ (a) Create a new vector consisting of trailing constants. ++ (b) Initialize TARGET with the constant vector using emit_move_insn. ++ (c) Insert remaining elements in TARGET using insr. ++ NELTS is the total number of elements in original vector while ++ while NELTS_REQD is the number of elements that are actually ++ significant. ++ ++ ??? The heuristic used is to do above only if number of constants ++ is at least half the total number of elements. May need fine tuning. */ ++ ++static bool ++aarch64_sve_expand_vector_init_handle_trailing_constants ++ (rtx target, const rtx_vector_builder &builder, int nelts, int nelts_reqd) ++{ ++ machine_mode mode = GET_MODE (target); ++ scalar_mode elem_mode = GET_MODE_INNER (mode); ++ int n_trailing_constants = 0; ++ ++ for (int i = nelts_reqd - 1; ++ i >= 0 && aarch64_legitimate_constant_p (elem_mode, builder.elt (i)); ++ i--) ++ n_trailing_constants++; ++ ++ if (n_trailing_constants >= nelts_reqd / 2) ++ { ++ rtx_vector_builder v (mode, 1, nelts); ++ for (int i = 0; i < nelts; i++) ++ v.quick_push (builder.elt (i + nelts_reqd - n_trailing_constants)); ++ rtx const_vec = v.build (); ++ emit_move_insn (target, const_vec); ++ ++ for (int i = nelts_reqd - n_trailing_constants - 1; i >= 0; i--) ++ emit_insr (target, builder.elt (i)); ++ ++ return true; ++ } ++ ++ return false; ++} ++ ++/* Subroutine of aarch64_sve_expand_vector_init. ++ Works as follows: ++ (a) Initialize TARGET by broadcasting element NELTS_REQD - 1 of BUILDER. ++ (b) Skip trailing elements from BUILDER, which are the same as ++ element NELTS_REQD - 1. ++ (c) Insert earlier elements in reverse order in TARGET using insr. */ ++ ++static void ++aarch64_sve_expand_vector_init_insert_elems (rtx target, ++ const rtx_vector_builder &builder, ++ int nelts_reqd) ++{ ++ machine_mode mode = GET_MODE (target); ++ scalar_mode elem_mode = GET_MODE_INNER (mode); ++ ++ struct expand_operand ops[2]; ++ enum insn_code icode = optab_handler (vec_duplicate_optab, mode); ++ gcc_assert (icode != CODE_FOR_nothing); ++ ++ create_output_operand (&ops[0], target, mode); ++ create_input_operand (&ops[1], builder.elt (nelts_reqd - 1), elem_mode); ++ expand_insn (icode, 2, ops); ++ ++ int ndups = builder.count_dups (nelts_reqd - 1, -1, -1); ++ for (int i = nelts_reqd - ndups - 1; i >= 0; i--) ++ emit_insr (target, builder.elt (i)); ++} ++ ++/* Subroutine of aarch64_sve_expand_vector_init to handle case ++ when all trailing elements of builder are same. ++ This works as follows: ++ (a) Use expand_insn interface to broadcast last vector element in TARGET. ++ (b) Insert remaining elements in TARGET using insr. ++ ++ ??? The heuristic used is to do above if number of same trailing elements ++ is at least 3/4 of total number of elements, loosely based on ++ heuristic from mostly_zeros_p. May need fine-tuning. */ ++ ++static bool ++aarch64_sve_expand_vector_init_handle_trailing_same_elem ++ (rtx target, const rtx_vector_builder &builder, int nelts_reqd) ++{ ++ int ndups = builder.count_dups (nelts_reqd - 1, -1, -1); ++ if (ndups >= (3 * nelts_reqd) / 4) ++ { ++ aarch64_sve_expand_vector_init_insert_elems (target, builder, ++ nelts_reqd - ndups + 1); ++ return true; ++ } ++ ++ return false; ++} ++ ++/* Initialize register TARGET from BUILDER. NELTS is the constant number ++ of elements in BUILDER. ++ ++ The function tries to initialize TARGET from BUILDER if it fits one ++ of the special cases outlined below. ++ ++ Failing that, the function divides BUILDER into two sub-vectors: ++ v_even = even elements of BUILDER; ++ v_odd = odd elements of BUILDER; ++ ++ and recursively calls itself with v_even and v_odd. ++ ++ if (recursive call succeeded for v_even or v_odd) ++ TARGET = zip (v_even, v_odd) ++ ++ The function returns true if it managed to build TARGET from BUILDER ++ with one of the special cases, false otherwise. ++ ++ Example: {a, 1, b, 2, c, 3, d, 4} ++ ++ The vector gets divided into: ++ v_even = {a, b, c, d} ++ v_odd = {1, 2, 3, 4} ++ ++ aarch64_sve_expand_vector_init(v_odd) hits case 1 and ++ initialize tmp2 from constant vector v_odd using emit_move_insn. ++ ++ aarch64_sve_expand_vector_init(v_even) fails since v_even contains ++ 4 elements, so we construct tmp1 from v_even using insr: ++ tmp1 = dup(d) ++ insr tmp1, c ++ insr tmp1, b ++ insr tmp1, a ++ ++ And finally: ++ TARGET = zip (tmp1, tmp2) ++ which sets TARGET to {a, 1, b, 2, c, 3, d, 4}. */ ++ ++static bool ++aarch64_sve_expand_vector_init (rtx target, const rtx_vector_builder &builder, ++ int nelts, int nelts_reqd) ++{ ++ machine_mode mode = GET_MODE (target); ++ ++ /* Case 1: Vector contains trailing constants. */ ++ ++ if (aarch64_sve_expand_vector_init_handle_trailing_constants ++ (target, builder, nelts, nelts_reqd)) ++ return true; ++ ++ /* Case 2: Vector contains leading constants. */ ++ ++ rtx_vector_builder rev_builder (mode, 1, nelts_reqd); ++ for (int i = 0; i < nelts_reqd; i++) ++ rev_builder.quick_push (builder.elt (nelts_reqd - i - 1)); ++ rev_builder.finalize (); ++ ++ if (aarch64_sve_expand_vector_init_handle_trailing_constants ++ (target, rev_builder, nelts, nelts_reqd)) ++ { ++ emit_insn (gen_aarch64_sve_rev (mode, target, target)); ++ return true; ++ } ++ ++ /* Case 3: Vector contains trailing same element. */ ++ ++ if (aarch64_sve_expand_vector_init_handle_trailing_same_elem ++ (target, builder, nelts_reqd)) ++ return true; ++ ++ /* Case 4: Vector contains leading same element. */ ++ ++ if (aarch64_sve_expand_vector_init_handle_trailing_same_elem ++ (target, rev_builder, nelts_reqd) && nelts_reqd == nelts) ++ { ++ emit_insn (gen_aarch64_sve_rev (mode, target, target)); ++ return true; ++ } ++ ++ /* Avoid recursing below 4-elements. ++ ??? The threshold 4 may need fine-tuning. */ ++ ++ if (nelts_reqd <= 4) ++ return false; ++ ++ rtx_vector_builder v_even (mode, 1, nelts); ++ rtx_vector_builder v_odd (mode, 1, nelts); ++ ++ for (int i = 0; i < nelts * 2; i += 2) ++ { ++ v_even.quick_push (builder.elt (i)); ++ v_odd.quick_push (builder.elt (i + 1)); ++ } ++ ++ v_even.finalize (); ++ v_odd.finalize (); ++ ++ rtx tmp1 = gen_reg_rtx (mode); ++ bool did_even_p = aarch64_sve_expand_vector_init (tmp1, v_even, ++ nelts, nelts_reqd / 2); ++ ++ rtx tmp2 = gen_reg_rtx (mode); ++ bool did_odd_p = aarch64_sve_expand_vector_init (tmp2, v_odd, ++ nelts, nelts_reqd / 2); ++ ++ if (!did_even_p && !did_odd_p) ++ return false; ++ ++ /* Initialize v_even and v_odd using INSR if it didn't match any of the ++ special cases and zip v_even, v_odd. */ ++ ++ if (!did_even_p) ++ aarch64_sve_expand_vector_init_insert_elems (tmp1, v_even, nelts_reqd / 2); ++ ++ if (!did_odd_p) ++ aarch64_sve_expand_vector_init_insert_elems (tmp2, v_odd, nelts_reqd / 2); ++ ++ rtvec v = gen_rtvec (2, tmp1, tmp2); ++ emit_set_insn (target, gen_rtx_UNSPEC (mode, v, UNSPEC_ZIP1)); ++ return true; ++} ++ ++/* Initialize register TARGET from the elements in PARALLEL rtx VALS. */ ++ ++void ++aarch64_sve_expand_vector_init (rtx target, rtx vals) ++{ ++ machine_mode mode = GET_MODE (target); ++ int nelts = XVECLEN (vals, 0); ++ ++ rtx_vector_builder v (mode, 1, nelts); ++ for (int i = 0; i < nelts; i++) ++ v.quick_push (XVECEXP (vals, 0, i)); ++ v.finalize (); ++ ++ /* If neither sub-vectors of v could be initialized specially, ++ then use INSR to insert all elements from v into TARGET. ++ ??? This might not be optimal for vectors with large ++ initializers like 16-element or above. ++ For nelts < 4, it probably isn't useful to handle specially. */ ++ ++ if (nelts < 4 ++ || !aarch64_sve_expand_vector_init (target, v, nelts, nelts)) ++ aarch64_sve_expand_vector_init_insert_elems (target, v, nelts); ++} ++ ++/* Check whether VALUE is a vector constant in which every element ++ is either a power of 2 or a negated power of 2. If so, return ++ a constant vector of log2s, and flip CODE between PLUS and MINUS ++ if VALUE contains negated powers of 2. Return NULL_RTX otherwise. */ ++ ++static rtx ++aarch64_convert_mult_to_shift (rtx value, rtx_code &code) ++{ ++ if (GET_CODE (value) != CONST_VECTOR) ++ return NULL_RTX; ++ ++ rtx_vector_builder builder; ++ if (!builder.new_unary_operation (GET_MODE (value), value, false)) ++ return NULL_RTX; ++ ++ scalar_mode int_mode = GET_MODE_INNER (GET_MODE (value)); ++ /* 1 if the result of the multiplication must be negated, ++ 0 if it mustn't, or -1 if we don't yet care. */ ++ int negate = -1; ++ unsigned int encoded_nelts = const_vector_encoded_nelts (value); ++ for (unsigned int i = 0; i < encoded_nelts; ++i) ++ { ++ rtx elt = CONST_VECTOR_ENCODED_ELT (value, i); ++ if (!CONST_SCALAR_INT_P (elt)) ++ return NULL_RTX; ++ rtx_mode_t val (elt, int_mode); ++ wide_int pow2 = wi::neg (val); ++ if (val != pow2) ++ { ++ /* It matters whether we negate or not. Make that choice, ++ and make sure that it's consistent with previous elements. */ ++ if (negate == !wi::neg_p (val)) ++ return NULL_RTX; ++ negate = wi::neg_p (val); ++ if (!negate) ++ pow2 = val; + } +- aarch64_expand_vector_init (target, copy); ++ /* POW2 is now the value that we want to be a power of 2. */ ++ int shift = wi::exact_log2 (pow2); ++ if (shift < 0) ++ return NULL_RTX; ++ builder.quick_push (gen_int_mode (shift, int_mode)); ++ } ++ if (negate == -1) ++ /* PLUS and MINUS are equivalent; canonicalize on PLUS. */ ++ code = PLUS; ++ else if (negate == 1) ++ code = code == PLUS ? MINUS : PLUS; ++ return builder.build (); ++} ++ ++/* Prepare for an integer SVE multiply-add or multiply-subtract pattern; ++ CODE is PLUS for the former and MINUS for the latter. OPERANDS is the ++ operands array, in the same order as for fma_optab. Return true if ++ the function emitted all the necessary instructions, false if the caller ++ should generate the pattern normally with the new OPERANDS array. */ ++ ++bool ++aarch64_prepare_sve_int_fma (rtx *operands, rtx_code code) ++{ ++ machine_mode mode = GET_MODE (operands[0]); ++ if (rtx shifts = aarch64_convert_mult_to_shift (operands[2], code)) ++ { ++ rtx product = expand_binop (mode, vashl_optab, operands[1], shifts, ++ NULL_RTX, true, OPTAB_DIRECT); ++ force_expand_binop (mode, code == PLUS ? add_optab : sub_optab, ++ operands[3], product, operands[0], true, ++ OPTAB_DIRECT); ++ return true; + } ++ operands[2] = force_reg (mode, operands[2]); ++ return false; ++} + +- /* Insert the variable lanes directly. */ +- for (int i = 0; i < n_elts; i++) ++/* Likewise, but for a conditional pattern. */ ++ ++bool ++aarch64_prepare_sve_cond_int_fma (rtx *operands, rtx_code code) ++{ ++ machine_mode mode = GET_MODE (operands[0]); ++ if (rtx shifts = aarch64_convert_mult_to_shift (operands[3], code)) + { +- rtx x = XVECEXP (vals, 0, i); +- if (CONST_INT_P (x) || CONST_DOUBLE_P (x)) +- continue; +- x = copy_to_mode_reg (inner_mode, x); +- emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i))); ++ rtx product = expand_binop (mode, vashl_optab, operands[2], shifts, ++ NULL_RTX, true, OPTAB_DIRECT); ++ emit_insn (gen_cond (code, mode, operands[0], operands[1], ++ operands[4], product, operands[5])); ++ return true; + } ++ operands[3] = force_reg (mode, operands[3]); ++ return false; + } + + static unsigned HOST_WIDE_INT +@@ -15428,11 +17796,15 @@ aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global) + static void + aarch64_asm_output_variant_pcs (FILE *stream, const tree decl, const char* name) + { +- if (aarch64_simd_decl_p (decl)) ++ if (TREE_CODE (decl) == FUNCTION_DECL) + { +- fprintf (stream, "\t.variant_pcs\t"); +- assemble_name (stream, name); +- fprintf (stream, "\n"); ++ arm_pcs pcs = (arm_pcs) fndecl_abi (decl).id (); ++ if (pcs == ARM_PCS_SIMD || pcs == ARM_PCS_SVE) ++ { ++ fprintf (stream, "\t.variant_pcs\t"); ++ assemble_name (stream, name); ++ fprintf (stream, "\n"); ++ } + } + } + +@@ -15459,7 +17831,7 @@ aarch64_declare_function_name (FILE *stream, const char* name, + const struct processor *this_arch + = aarch64_get_arch (targ_options->x_explicit_arch); + +- unsigned long isa_flags = targ_options->x_aarch64_isa_flags; ++ uint64_t isa_flags = targ_options->x_aarch64_isa_flags; + std::string extension + = aarch64_get_extension_string_for_isa_flags (isa_flags, + this_arch->flags); +@@ -15541,6 +17913,18 @@ aarch64_asm_output_external (FILE *stream, tree decl, const char* name) + aarch64_asm_output_variant_pcs (stream, decl, name); + } + ++/* Triggered after a .cfi_startproc directive is emitted into the assembly file. ++ Used to output the .cfi_b_key_frame directive when signing the current ++ function with the B key. */ ++ ++void ++aarch64_post_cfi_startproc (FILE *f, tree ignored ATTRIBUTE_UNUSED) ++{ ++ if (cfun->machine->frame.laid_out && aarch64_return_address_signing_enabled () ++ && aarch64_ra_sign_key == AARCH64_KEY_B) ++ asm_fprintf (f, "\t.cfi_b_key_frame\n"); ++} ++ + /* Implements TARGET_ASM_FILE_START. Output the assembly header. */ + + static void +@@ -15551,7 +17935,7 @@ aarch64_start_file (void) + + const struct processor *default_arch + = aarch64_get_arch (default_options->x_explicit_arch); +- unsigned long default_isa_flags = default_options->x_aarch64_isa_flags; ++ uint64_t default_isa_flags = default_options->x_aarch64_isa_flags; + std::string extension + = aarch64_get_extension_string_for_isa_flags (default_isa_flags, + default_arch->flags); +@@ -15570,16 +17954,26 @@ static void + aarch64_emit_load_exclusive (machine_mode mode, rtx rval, + rtx mem, rtx model_rtx) + { +- emit_insn (gen_aarch64_load_exclusive (mode, rval, mem, model_rtx)); ++ if (mode == TImode) ++ emit_insn (gen_aarch64_load_exclusive_pair (gen_lowpart (DImode, rval), ++ gen_highpart (DImode, rval), ++ mem, model_rtx)); ++ else ++ emit_insn (gen_aarch64_load_exclusive (mode, rval, mem, model_rtx)); + } + + /* Emit store exclusive. */ + + static void + aarch64_emit_store_exclusive (machine_mode mode, rtx bval, +- rtx rval, rtx mem, rtx model_rtx) ++ rtx mem, rtx rval, rtx model_rtx) + { +- emit_insn (gen_aarch64_store_exclusive (mode, bval, rval, mem, model_rtx)); ++ if (mode == TImode) ++ emit_insn (gen_aarch64_store_exclusive_pair ++ (bval, mem, operand_subword (rval, 0, 0, TImode), ++ operand_subword (rval, 1, 0, TImode), model_rtx)); ++ else ++ emit_insn (gen_aarch64_store_exclusive (mode, bval, mem, rval, model_rtx)); + } + + /* Mark the previous jump instruction as unlikely. */ +@@ -15591,6 +17985,82 @@ aarch64_emit_unlikely_jump (rtx insn) + add_reg_br_prob_note (jump, profile_probability::very_unlikely ()); + } + ++/* We store the names of the various atomic helpers in a 5x4 array. ++ Return the libcall function given MODE, MODEL and NAMES. */ ++ ++rtx ++aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx, ++ const atomic_ool_names *names) ++{ ++ memmodel model = memmodel_base (INTVAL (model_rtx)); ++ int mode_idx, model_idx; ++ ++ switch (mode) ++ { ++ case E_QImode: ++ mode_idx = 0; ++ break; ++ case E_HImode: ++ mode_idx = 1; ++ break; ++ case E_SImode: ++ mode_idx = 2; ++ break; ++ case E_DImode: ++ mode_idx = 3; ++ break; ++ case E_TImode: ++ mode_idx = 4; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ ++ switch (model) ++ { ++ case MEMMODEL_RELAXED: ++ model_idx = 0; ++ break; ++ case MEMMODEL_CONSUME: ++ case MEMMODEL_ACQUIRE: ++ model_idx = 1; ++ break; ++ case MEMMODEL_RELEASE: ++ model_idx = 2; ++ break; ++ case MEMMODEL_ACQ_REL: ++ case MEMMODEL_SEQ_CST: ++ model_idx = 3; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ ++ return init_one_libfunc_visibility (names->str[mode_idx][model_idx], ++ VISIBILITY_HIDDEN); ++} ++ ++#define DEF0(B, N) \ ++ { "__aarch64_" #B #N "_relax", \ ++ "__aarch64_" #B #N "_acq", \ ++ "__aarch64_" #B #N "_rel", \ ++ "__aarch64_" #B #N "_acq_rel" } ++ ++#define DEF4(B) DEF0(B, 1), DEF0(B, 2), DEF0(B, 4), DEF0(B, 8), \ ++ { NULL, NULL, NULL, NULL } ++#define DEF5(B) DEF0(B, 1), DEF0(B, 2), DEF0(B, 4), DEF0(B, 8), DEF0(B, 16) ++ ++static const atomic_ool_names aarch64_ool_cas_names = { { DEF5(cas) } }; ++const atomic_ool_names aarch64_ool_swp_names = { { DEF4(swp) } }; ++const atomic_ool_names aarch64_ool_ldadd_names = { { DEF4(ldadd) } }; ++const atomic_ool_names aarch64_ool_ldset_names = { { DEF4(ldset) } }; ++const atomic_ool_names aarch64_ool_ldclr_names = { { DEF4(ldclr) } }; ++const atomic_ool_names aarch64_ool_ldeor_names = { { DEF4(ldeor) } }; ++ ++#undef DEF0 ++#undef DEF4 ++#undef DEF5 ++ + /* Expand a compare and swap pattern. */ + + void +@@ -15637,6 +18107,17 @@ aarch64_expand_compare_and_swap (rtx operands[]) + newval, mod_s)); + cc_reg = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode); + } ++ else if (TARGET_OUTLINE_ATOMICS) ++ { ++ /* Oldval must satisfy compare afterward. */ ++ if (!aarch64_plus_operand (oldval, mode)) ++ oldval = force_reg (mode, oldval); ++ rtx func = aarch64_atomic_ool_func (mode, mod_s, &aarch64_ool_cas_names); ++ rval = emit_library_call_value (func, NULL_RTX, LCT_NORMAL, r_mode, ++ oldval, mode, newval, mode, ++ XEXP (mem, 0), Pmode); ++ cc_reg = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode); ++ } + else + { + /* The oldval predicate varies by mode. Test it and force to reg. */ +@@ -15682,13 +18163,11 @@ aarch64_split_compare_and_swap (rtx operands[]) + /* Split after prolog/epilog to avoid interactions with shrinkwrapping. */ + gcc_assert (epilogue_completed); + +- rtx rval, mem, oldval, newval, scratch; ++ rtx rval, mem, oldval, newval, scratch, x, model_rtx; + machine_mode mode; + bool is_weak; + rtx_code_label *label1, *label2; +- rtx x, cond; + enum memmodel model; +- rtx model_rtx; + + rval = operands[0]; + mem = operands[1]; +@@ -15709,7 +18188,8 @@ aarch64_split_compare_and_swap (rtx operands[]) + CBNZ scratch, .label1 + .label2: + CMP rval, 0. */ +- bool strong_zero_p = !is_weak && oldval == const0_rtx; ++ bool strong_zero_p = (!is_weak && !aarch64_track_speculation && ++ oldval == const0_rtx && mode != TImode); + + label1 = NULL; + if (!is_weak) +@@ -15722,35 +18202,20 @@ aarch64_split_compare_and_swap (rtx operands[]) + /* The initial load can be relaxed for a __sync operation since a final + barrier will be emitted to stop code hoisting. */ + if (is_mm_sync (model)) +- aarch64_emit_load_exclusive (mode, rval, mem, +- GEN_INT (MEMMODEL_RELAXED)); ++ aarch64_emit_load_exclusive (mode, rval, mem, GEN_INT (MEMMODEL_RELAXED)); + else + aarch64_emit_load_exclusive (mode, rval, mem, model_rtx); + + if (strong_zero_p) +- { +- if (aarch64_track_speculation) +- { +- /* Emit an explicit compare instruction, so that we can correctly +- track the condition codes. */ +- rtx cc_reg = aarch64_gen_compare_reg (NE, rval, const0_rtx); +- x = gen_rtx_NE (GET_MODE (cc_reg), cc_reg, const0_rtx); +- } +- else +- x = gen_rtx_NE (VOIDmode, rval, const0_rtx); +- +- x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, +- gen_rtx_LABEL_REF (Pmode, label2), pc_rtx); +- aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x)); +- } ++ x = gen_rtx_NE (VOIDmode, rval, const0_rtx); + else + { +- cond = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode); +- x = gen_rtx_NE (VOIDmode, cond, const0_rtx); +- x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, +- gen_rtx_LABEL_REF (Pmode, label2), pc_rtx); +- aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x)); ++ rtx cc_reg = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode); ++ x = gen_rtx_NE (VOIDmode, cc_reg, const0_rtx); + } ++ x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, ++ gen_rtx_LABEL_REF (Pmode, label2), pc_rtx); ++ aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x)); + + aarch64_emit_store_exclusive (mode, scratch, mem, newval, model_rtx); + +@@ -15771,22 +18236,16 @@ aarch64_split_compare_and_swap (rtx operands[]) + aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x)); + } + else +- { +- cond = gen_rtx_REG (CCmode, CC_REGNUM); +- x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx); +- emit_insn (gen_rtx_SET (cond, x)); +- } ++ aarch64_gen_compare_reg (NE, scratch, const0_rtx); + + emit_label (label2); ++ + /* If we used a CBNZ in the exchange loop emit an explicit compare with RVAL + to set the condition flags. If this is not used it will be removed by + later passes. */ + if (strong_zero_p) +- { +- cond = gen_rtx_REG (CCmode, CC_REGNUM); +- x = gen_rtx_COMPARE (CCmode, rval, const0_rtx); +- emit_insn (gen_rtx_SET (cond, x)); +- } ++ aarch64_gen_compare_reg (NE, rval, const0_rtx); ++ + /* Emit any final barrier needed for a __sync operation. */ + if (is_mm_sync (model)) + aarch64_emit_post_barrier (model); +@@ -15939,6 +18398,7 @@ aarch64_float_const_representable_p (rtx x) + REAL_VALUE_TYPE r, m; + bool fail; + ++ x = unwrap_const_vec_duplicate (x); + if (!CONST_DOUBLE_P (x)) + return false; + +@@ -16034,17 +18494,18 @@ aarch64_output_simd_mov_immediate (rtx const_vector, unsigned width, + + if (GET_MODE_CLASS (info.elt_mode) == MODE_FLOAT) + { +- gcc_assert (info.shift == 0 && info.insn == simd_immediate_info::MOV); ++ gcc_assert (info.insn == simd_immediate_info::MOV ++ && info.u.mov.shift == 0); + /* For FP zero change it to a CONST_INT 0 and use the integer SIMD + move immediate path. */ +- if (aarch64_float_const_zero_rtx_p (info.value)) +- info.value = GEN_INT (0); ++ if (aarch64_float_const_zero_rtx_p (info.u.mov.value)) ++ info.u.mov.value = GEN_INT (0); + else + { + const unsigned int buf_size = 20; + char float_buf[buf_size] = {'\0'}; + real_to_decimal_for_mode (float_buf, +- CONST_DOUBLE_REAL_VALUE (info.value), ++ CONST_DOUBLE_REAL_VALUE (info.u.mov.value), + buf_size, buf_size, 1, info.elt_mode); + + if (lane_count == 1) +@@ -16056,36 +18517,39 @@ aarch64_output_simd_mov_immediate (rtx const_vector, unsigned width, + } + } + +- gcc_assert (CONST_INT_P (info.value)); ++ gcc_assert (CONST_INT_P (info.u.mov.value)); + + if (which == AARCH64_CHECK_MOV) + { + mnemonic = info.insn == simd_immediate_info::MVN ? "mvni" : "movi"; +- shift_op = info.modifier == simd_immediate_info::MSL ? "msl" : "lsl"; ++ shift_op = (info.u.mov.modifier == simd_immediate_info::MSL ++ ? "msl" : "lsl"); + if (lane_count == 1) + snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX, +- mnemonic, UINTVAL (info.value)); +- else if (info.shift) ++ mnemonic, UINTVAL (info.u.mov.value)); ++ else if (info.u.mov.shift) + snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " + HOST_WIDE_INT_PRINT_HEX ", %s %d", mnemonic, lane_count, +- element_char, UINTVAL (info.value), shift_op, info.shift); ++ element_char, UINTVAL (info.u.mov.value), shift_op, ++ info.u.mov.shift); + else + snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " + HOST_WIDE_INT_PRINT_HEX, mnemonic, lane_count, +- element_char, UINTVAL (info.value)); ++ element_char, UINTVAL (info.u.mov.value)); + } + else + { + /* For AARCH64_CHECK_BIC and AARCH64_CHECK_ORR. */ + mnemonic = info.insn == simd_immediate_info::MVN ? "bic" : "orr"; +- if (info.shift) ++ if (info.u.mov.shift) + snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, #" + HOST_WIDE_INT_PRINT_DEC ", %s #%d", mnemonic, lane_count, +- element_char, UINTVAL (info.value), "lsl", info.shift); ++ element_char, UINTVAL (info.u.mov.value), "lsl", ++ info.u.mov.shift); + else + snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, #" + HOST_WIDE_INT_PRINT_DEC, mnemonic, lane_count, +- element_char, UINTVAL (info.value)); ++ element_char, UINTVAL (info.u.mov.value)); + } + return templ; + } +@@ -16129,24 +18593,49 @@ aarch64_output_sve_mov_immediate (rtx const_vector) + + element_char = sizetochar (GET_MODE_BITSIZE (info.elt_mode)); + +- if (info.step) ++ machine_mode vec_mode = GET_MODE (const_vector); ++ if (aarch64_sve_pred_mode_p (vec_mode)) ++ { ++ static char buf[sizeof ("ptrue\t%0.N, vlNNNNN")]; ++ if (info.insn == simd_immediate_info::MOV) ++ { ++ gcc_assert (info.u.mov.value == const0_rtx); ++ snprintf (buf, sizeof (buf), "pfalse\t%%0.b"); ++ } ++ else ++ { ++ gcc_assert (info.insn == simd_immediate_info::PTRUE); ++ unsigned int total_bytes; ++ if (info.u.pattern == AARCH64_SV_ALL ++ && BYTES_PER_SVE_VECTOR.is_constant (&total_bytes)) ++ snprintf (buf, sizeof (buf), "ptrue\t%%0.%c, vl%d", element_char, ++ total_bytes / GET_MODE_SIZE (info.elt_mode)); ++ else ++ snprintf (buf, sizeof (buf), "ptrue\t%%0.%c, %s", element_char, ++ svpattern_token (info.u.pattern)); ++ } ++ return buf; ++ } ++ ++ if (info.insn == simd_immediate_info::INDEX) + { + snprintf (templ, sizeof (templ), "index\t%%0.%c, #" + HOST_WIDE_INT_PRINT_DEC ", #" HOST_WIDE_INT_PRINT_DEC, +- element_char, INTVAL (info.value), INTVAL (info.step)); ++ element_char, INTVAL (info.u.index.base), ++ INTVAL (info.u.index.step)); + return templ; + } + + if (GET_MODE_CLASS (info.elt_mode) == MODE_FLOAT) + { +- if (aarch64_float_const_zero_rtx_p (info.value)) +- info.value = GEN_INT (0); ++ if (aarch64_float_const_zero_rtx_p (info.u.mov.value)) ++ info.u.mov.value = GEN_INT (0); + else + { + const int buf_size = 20; + char float_buf[buf_size] = {}; + real_to_decimal_for_mode (float_buf, +- CONST_DOUBLE_REAL_VALUE (info.value), ++ CONST_DOUBLE_REAL_VALUE (info.u.mov.value), + buf_size, buf_size, 1, info.elt_mode); + + snprintf (templ, sizeof (templ), "fmov\t%%0.%c, #%s", +@@ -16156,23 +18645,27 @@ aarch64_output_sve_mov_immediate (rtx const_vector) + } + + snprintf (templ, sizeof (templ), "mov\t%%0.%c, #" HOST_WIDE_INT_PRINT_DEC, +- element_char, INTVAL (info.value)); ++ element_char, INTVAL (info.u.mov.value)); + return templ; + } + +-/* Return the asm format for a PTRUE instruction whose destination has +- mode MODE. SUFFIX is the element size suffix. */ ++/* Return the asm template for a PTRUES. CONST_UNSPEC is the ++ aarch64_sve_ptrue_svpattern_immediate that describes the predicate ++ pattern. */ + + char * +-aarch64_output_ptrue (machine_mode mode, char suffix) ++aarch64_output_sve_ptrues (rtx const_unspec) + { +- unsigned int nunits; +- static char buf[sizeof ("ptrue\t%0.N, vlNNNNN")]; +- if (GET_MODE_NUNITS (mode).is_constant (&nunits)) +- snprintf (buf, sizeof (buf), "ptrue\t%%0.%c, vl%d", suffix, nunits); +- else +- snprintf (buf, sizeof (buf), "ptrue\t%%0.%c, all", suffix); +- return buf; ++ static char templ[40]; ++ ++ struct simd_immediate_info info; ++ bool is_valid = aarch64_simd_valid_immediate (const_unspec, &info); ++ gcc_assert (is_valid && info.insn == simd_immediate_info::PTRUE); ++ ++ char element_char = sizetochar (GET_MODE_BITSIZE (info.elt_mode)); ++ snprintf (templ, sizeof (templ), "ptrues\t%%0.%c, %s", element_char, ++ svpattern_token (info.u.pattern)); ++ return templ; + } + + /* Split operands into moves from op[1] + op[2] into op[0]. */ +@@ -16590,13 +19083,31 @@ aarch64_evpc_rev_local (struct expand_vec_perm_d *d) + if (d->testing_p) + return true; + +- rtx src = gen_rtx_UNSPEC (d->vmode, gen_rtvec (1, d->op0), unspec); + if (d->vec_flags == VEC_SVE_DATA) + { +- rtx pred = force_reg (pred_mode, CONSTM1_RTX (pred_mode)); +- src = gen_rtx_UNSPEC (d->vmode, gen_rtvec (2, pred, src), +- UNSPEC_MERGE_PTRUE); ++ machine_mode int_mode = aarch64_sve_int_mode (pred_mode); ++ rtx target = gen_reg_rtx (int_mode); ++ if (BYTES_BIG_ENDIAN) ++ /* The act of taking a subreg between INT_MODE and d->vmode ++ is itself a reversing operation on big-endian targets; ++ see the comment at the head of aarch64-sve.md for details. ++ First reinterpret OP0 as INT_MODE without using a subreg ++ and without changing the contents. */ ++ emit_insn (gen_aarch64_sve_reinterpret (int_mode, target, d->op0)); ++ else ++ { ++ /* For SVE we use REV[BHW] unspecs derived from the element size ++ of v->mode and vector modes whose elements have SIZE bytes. ++ This ensures that the vector modes match the predicate modes. */ ++ int unspec = aarch64_sve_rev_unspec (d->vmode); ++ rtx pred = aarch64_ptrue_reg (pred_mode); ++ emit_insn (gen_aarch64_pred (unspec, int_mode, target, pred, ++ gen_lowpart (int_mode, d->op0))); ++ } ++ emit_move_insn (d->target, gen_lowpart (d->vmode, target)); ++ return true; + } ++ rtx src = gen_rtx_UNSPEC (d->vmode, gen_rtvec (1, d->op0), unspec); + emit_set_insn (d->target, src); + return true; + } +@@ -16609,7 +19120,7 @@ aarch64_evpc_rev_global (struct expand_vec_perm_d *d) + { + poly_uint64 nelt = d->perm.length (); + +- if (!d->one_vector_p || d->vec_flags != VEC_SVE_DATA) ++ if (!d->one_vector_p || d->vec_flags == VEC_ADVSIMD) + return false; + + if (!d->perm.series_p (0, 1, nelt - 1, -1)) +@@ -16722,6 +19233,50 @@ aarch64_evpc_sve_tbl (struct expand_vec_perm_d *d) + return true; + } + ++/* Try to implement D using SVE SEL instruction. */ ++ ++static bool ++aarch64_evpc_sel (struct expand_vec_perm_d *d) ++{ ++ machine_mode vmode = d->vmode; ++ int unit_size = GET_MODE_UNIT_SIZE (vmode); ++ ++ if (d->vec_flags != VEC_SVE_DATA ++ || unit_size > 8) ++ return false; ++ ++ int n_patterns = d->perm.encoding ().npatterns (); ++ poly_int64 vec_len = d->perm.length (); ++ ++ for (int i = 0; i < n_patterns; ++i) ++ if (!known_eq (d->perm[i], i) ++ && !known_eq (d->perm[i], vec_len + i)) ++ return false; ++ ++ for (int i = n_patterns; i < n_patterns * 2; i++) ++ if (!d->perm.series_p (i, n_patterns, i, n_patterns) ++ && !d->perm.series_p (i, n_patterns, vec_len + i, n_patterns)) ++ return false; ++ ++ if (d->testing_p) ++ return true; ++ ++ machine_mode pred_mode = aarch64_sve_pred_mode (unit_size).require (); ++ ++ rtx_vector_builder builder (pred_mode, n_patterns, 2); ++ for (int i = 0; i < n_patterns * 2; i++) ++ { ++ rtx elem = known_eq (d->perm[i], i) ? CONST1_RTX (BImode) ++ : CONST0_RTX (BImode); ++ builder.quick_push (elem); ++ } ++ ++ rtx const_vec = builder.build (); ++ rtx pred = force_reg (pred_mode, const_vec); ++ emit_insn (gen_vcond_mask (vmode, vmode, d->target, d->op1, d->op0, pred)); ++ return true; ++} ++ + static bool + aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) + { +@@ -16754,6 +19309,8 @@ aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) + return true; + else if (aarch64_evpc_trn (d)) + return true; ++ else if (aarch64_evpc_sel (d)) ++ return true; + if (d->vec_flags == VEC_SVE_DATA) + return aarch64_evpc_sve_tbl (d); + else if (d->vec_flags == VEC_ADVSIMD) +@@ -16829,60 +19386,19 @@ aarch64_reverse_mask (machine_mode mode, unsigned int nunits) + return force_reg (V16QImode, mask); + } + +-/* Return true if X is a valid second operand for the SVE instruction +- that implements integer comparison OP_CODE. */ +- +-static bool +-aarch64_sve_cmp_operand_p (rtx_code op_code, rtx x) +-{ +- if (register_operand (x, VOIDmode)) +- return true; +- +- switch (op_code) +- { +- case LTU: +- case LEU: +- case GEU: +- case GTU: +- return aarch64_sve_cmp_immediate_p (x, false); +- case LT: +- case LE: +- case GE: +- case GT: +- case NE: +- case EQ: +- return aarch64_sve_cmp_immediate_p (x, true); +- default: +- gcc_unreachable (); +- } +-} +- +-/* Use predicated SVE instructions to implement the equivalent of: +- +- (set TARGET OP) +- +- given that PTRUE is an all-true predicate of the appropriate mode. */ +- +-static void +-aarch64_emit_sve_ptrue_op (rtx target, rtx ptrue, rtx op) +-{ +- rtx unspec = gen_rtx_UNSPEC (GET_MODE (target), +- gen_rtvec (2, ptrue, op), +- UNSPEC_MERGE_PTRUE); +- rtx_insn *insn = emit_set_insn (target, unspec); +- set_unique_reg_note (insn, REG_EQUAL, copy_rtx (op)); +-} ++/* Expand an SVE integer comparison using the SVE equivalent of: + +-/* Likewise, but also clobber the condition codes. */ ++ (set TARGET (CODE OP0 OP1)). */ + +-static void +-aarch64_emit_sve_ptrue_op_cc (rtx target, rtx ptrue, rtx op) ++void ++aarch64_expand_sve_vec_cmp_int (rtx target, rtx_code code, rtx op0, rtx op1) + { +- rtx unspec = gen_rtx_UNSPEC (GET_MODE (target), +- gen_rtvec (2, ptrue, op), +- UNSPEC_MERGE_PTRUE); +- rtx_insn *insn = emit_insn (gen_set_clobber_cc (target, unspec)); +- set_unique_reg_note (insn, REG_EQUAL, copy_rtx (op)); ++ machine_mode pred_mode = GET_MODE (target); ++ machine_mode data_mode = GET_MODE (op0); ++ rtx res = aarch64_sve_emit_int_cmp (target, pred_mode, code, data_mode, ++ op0, op1); ++ if (!rtx_equal_p (target, res)) ++ emit_move_insn (target, res); + } + + /* Return the UNSPEC_COND_* code for comparison CODE. */ +@@ -16893,17 +19409,19 @@ aarch64_unspec_cond_code (rtx_code code) + switch (code) + { + case NE: +- return UNSPEC_COND_NE; ++ return UNSPEC_COND_FCMNE; + case EQ: +- return UNSPEC_COND_EQ; ++ return UNSPEC_COND_FCMEQ; + case LT: +- return UNSPEC_COND_LT; ++ return UNSPEC_COND_FCMLT; + case GT: +- return UNSPEC_COND_GT; ++ return UNSPEC_COND_FCMGT; + case LE: +- return UNSPEC_COND_LE; ++ return UNSPEC_COND_FCMLE; + case GE: +- return UNSPEC_COND_GE; ++ return UNSPEC_COND_FCMGE; ++ case UNORDERED: ++ return UNSPEC_COND_FCMUO; + default: + gcc_unreachable (); + } +@@ -16911,78 +19429,58 @@ aarch64_unspec_cond_code (rtx_code code) + + /* Emit: + +- (set TARGET (unspec [PRED OP0 OP1] UNSPEC_COND_)) ++ (set TARGET (unspec [PRED KNOWN_PTRUE_P OP0 OP1] UNSPEC_COND_)) + +- where is the operation associated with comparison CODE. This form +- of instruction is used when (and (CODE OP0 OP1) PRED) would have different +- semantics, such as when PRED might not be all-true and when comparing +- inactive lanes could have side effects. */ ++ where is the operation associated with comparison CODE. ++ KNOWN_PTRUE_P is true if PRED is known to be a PTRUE. */ + + static void +-aarch64_emit_sve_predicated_cond (rtx target, rtx_code code, +- rtx pred, rtx op0, rtx op1) ++aarch64_emit_sve_fp_cond (rtx target, rtx_code code, rtx pred, ++ bool known_ptrue_p, rtx op0, rtx op1) + { ++ rtx flag = gen_int_mode (known_ptrue_p, SImode); + rtx unspec = gen_rtx_UNSPEC (GET_MODE (pred), +- gen_rtvec (3, pred, op0, op1), ++ gen_rtvec (4, pred, flag, op0, op1), + aarch64_unspec_cond_code (code)); + emit_set_insn (target, unspec); + } + +-/* Expand an SVE integer comparison using the SVE equivalent of: +- +- (set TARGET (CODE OP0 OP1)). */ +- +-void +-aarch64_expand_sve_vec_cmp_int (rtx target, rtx_code code, rtx op0, rtx op1) +-{ +- machine_mode pred_mode = GET_MODE (target); +- machine_mode data_mode = GET_MODE (op0); +- +- if (!aarch64_sve_cmp_operand_p (code, op1)) +- op1 = force_reg (data_mode, op1); +- +- rtx ptrue = force_reg (pred_mode, CONSTM1_RTX (pred_mode)); +- rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1); +- aarch64_emit_sve_ptrue_op_cc (target, ptrue, cond); +-} +- + /* Emit the SVE equivalent of: + +- (set TMP1 (CODE1 OP0 OP1)) +- (set TMP2 (CODE2 OP0 OP1)) ++ (set TMP1 (unspec [PRED KNOWN_PTRUE_P OP0 OP1] UNSPEC_COND_)) ++ (set TMP2 (unspec [PRED KNOWN_PTRUE_P OP0 OP1] UNSPEC_COND_)) + (set TARGET (ior:PRED_MODE TMP1 TMP2)) + +- PTRUE is an all-true predicate with the same mode as TARGET. */ ++ where is the operation associated with comparison CODEi. ++ KNOWN_PTRUE_P is true if PRED is known to be a PTRUE. */ + + static void +-aarch64_emit_sve_or_conds (rtx target, rtx_code code1, rtx_code code2, +- rtx ptrue, rtx op0, rtx op1) ++aarch64_emit_sve_or_fp_conds (rtx target, rtx_code code1, rtx_code code2, ++ rtx pred, bool known_ptrue_p, rtx op0, rtx op1) + { +- machine_mode pred_mode = GET_MODE (ptrue); ++ machine_mode pred_mode = GET_MODE (pred); + rtx tmp1 = gen_reg_rtx (pred_mode); +- aarch64_emit_sve_ptrue_op (tmp1, ptrue, +- gen_rtx_fmt_ee (code1, pred_mode, op0, op1)); ++ aarch64_emit_sve_fp_cond (tmp1, code1, pred, known_ptrue_p, op0, op1); + rtx tmp2 = gen_reg_rtx (pred_mode); +- aarch64_emit_sve_ptrue_op (tmp2, ptrue, +- gen_rtx_fmt_ee (code2, pred_mode, op0, op1)); ++ aarch64_emit_sve_fp_cond (tmp2, code2, pred, known_ptrue_p, op0, op1); + aarch64_emit_binop (target, ior_optab, tmp1, tmp2); + } + + /* Emit the SVE equivalent of: + +- (set TMP (CODE OP0 OP1)) ++ (set TMP (unspec [PRED KNOWN_PTRUE_P OP0 OP1] UNSPEC_COND_)) + (set TARGET (not TMP)) + +- PTRUE is an all-true predicate with the same mode as TARGET. */ ++ where is the operation associated with comparison CODE. ++ KNOWN_PTRUE_P is true if PRED is known to be a PTRUE. */ + + static void +-aarch64_emit_sve_inverted_cond (rtx target, rtx ptrue, rtx_code code, +- rtx op0, rtx op1) ++aarch64_emit_sve_invert_fp_cond (rtx target, rtx_code code, rtx pred, ++ bool known_ptrue_p, rtx op0, rtx op1) + { +- machine_mode pred_mode = GET_MODE (ptrue); ++ machine_mode pred_mode = GET_MODE (pred); + rtx tmp = gen_reg_rtx (pred_mode); +- aarch64_emit_sve_ptrue_op (tmp, ptrue, +- gen_rtx_fmt_ee (code, pred_mode, op0, op1)); ++ aarch64_emit_sve_fp_cond (tmp, code, pred, known_ptrue_p, op0, op1); + aarch64_emit_unop (target, one_cmpl_optab, tmp); + } + +@@ -17000,7 +19498,7 @@ aarch64_expand_sve_vec_cmp_float (rtx target, rtx_code code, + machine_mode pred_mode = GET_MODE (target); + machine_mode data_mode = GET_MODE (op0); + +- rtx ptrue = force_reg (pred_mode, CONSTM1_RTX (pred_mode)); ++ rtx ptrue = aarch64_ptrue_reg (pred_mode); + switch (code) + { + case UNORDERED: +@@ -17015,14 +19513,13 @@ aarch64_expand_sve_vec_cmp_float (rtx target, rtx_code code, + case NE: + { + /* There is native support for the comparison. */ +- rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1); +- aarch64_emit_sve_ptrue_op (target, ptrue, cond); ++ aarch64_emit_sve_fp_cond (target, code, ptrue, true, op0, op1); + return false; + } + + case LTGT: + /* This is a trapping operation (LT or GT). */ +- aarch64_emit_sve_or_conds (target, LT, GT, ptrue, op0, op1); ++ aarch64_emit_sve_or_fp_conds (target, LT, GT, ptrue, true, op0, op1); + return false; + + case UNEQ: +@@ -17030,7 +19527,8 @@ aarch64_expand_sve_vec_cmp_float (rtx target, rtx_code code, + { + /* This would trap for signaling NaNs. */ + op1 = force_reg (data_mode, op1); +- aarch64_emit_sve_or_conds (target, UNORDERED, EQ, ptrue, op0, op1); ++ aarch64_emit_sve_or_fp_conds (target, UNORDERED, EQ, ++ ptrue, true, op0, op1); + return false; + } + /* fall through */ +@@ -17043,7 +19541,8 @@ aarch64_expand_sve_vec_cmp_float (rtx target, rtx_code code, + /* Work out which elements are ordered. */ + rtx ordered = gen_reg_rtx (pred_mode); + op1 = force_reg (data_mode, op1); +- aarch64_emit_sve_inverted_cond (ordered, ptrue, UNORDERED, op0, op1); ++ aarch64_emit_sve_invert_fp_cond (ordered, UNORDERED, ++ ptrue, true, op0, op1); + + /* Test the opposite condition for the ordered elements, + then invert the result. */ +@@ -17053,13 +19552,12 @@ aarch64_expand_sve_vec_cmp_float (rtx target, rtx_code code, + code = reverse_condition_maybe_unordered (code); + if (can_invert_p) + { +- aarch64_emit_sve_predicated_cond (target, code, +- ordered, op0, op1); ++ aarch64_emit_sve_fp_cond (target, code, ++ ordered, false, op0, op1); + return true; + } +- rtx tmp = gen_reg_rtx (pred_mode); +- aarch64_emit_sve_predicated_cond (tmp, code, ordered, op0, op1); +- aarch64_emit_unop (target, one_cmpl_optab, tmp); ++ aarch64_emit_sve_invert_fp_cond (target, code, ++ ordered, false, op0, op1); + return false; + } + break; +@@ -17077,11 +19575,10 @@ aarch64_expand_sve_vec_cmp_float (rtx target, rtx_code code, + code = reverse_condition_maybe_unordered (code); + if (can_invert_p) + { +- rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1); +- aarch64_emit_sve_ptrue_op (target, ptrue, cond); ++ aarch64_emit_sve_fp_cond (target, code, ptrue, true, op0, op1); + return true; + } +- aarch64_emit_sve_inverted_cond (target, ptrue, code, op0, op1); ++ aarch64_emit_sve_invert_fp_cond (target, code, ptrue, true, op0, op1); + return false; + } + +@@ -17104,6 +19601,13 @@ aarch64_expand_sve_vcond (machine_mode data_mode, machine_mode cmp_mode, + else + aarch64_expand_sve_vec_cmp_int (pred, GET_CODE (ops[3]), ops[4], ops[5]); + ++ if (!aarch64_sve_reg_or_dup_imm (ops[1], data_mode)) ++ ops[1] = force_reg (data_mode, ops[1]); ++ /* The "false" value can only be zero if the "true" value is a constant. */ ++ if (register_operand (ops[1], data_mode) ++ || !aarch64_simd_reg_or_zero (ops[2], data_mode)) ++ ops[2] = force_reg (data_mode, ops[2]); ++ + rtvec vec = gen_rtvec (3, pred, ops[1], ops[2]); + emit_set_insn (ops[0], gen_rtx_UNSPEC (data_mode, vec, UNSPEC_SEL)); + } +@@ -17181,11 +19685,11 @@ aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst, + *dst = aarch64_progress_pointer (*dst); + } + +-/* Expand movmem, as if from a __builtin_memcpy. Return true if ++/* Expand cpymem, as if from a __builtin_memcpy. Return true if + we succeed, otherwise return false. */ + + bool +-aarch64_expand_movmem (rtx *operands) ++aarch64_expand_cpymem (rtx *operands) + { + int n, mode_bits; + rtx dst = operands[0]; +@@ -17452,7 +19956,10 @@ aarch64_expand_subvti (rtx op0, rtx low_dest, rtx low_in1, + static unsigned HOST_WIDE_INT + aarch64_asan_shadow_offset (void) + { +- return (HOST_WIDE_INT_1 << 36); ++ if (TARGET_ILP32) ++ return (HOST_WIDE_INT_1 << 29); ++ else ++ return (HOST_WIDE_INT_1 << 36); + } + + static rtx +@@ -17758,10 +20265,6 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) + } + } + +- if (aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC) +- && aarch_crypto_can_dual_issue (prev, curr)) +- return true; +- + if (aarch64_fusion_enabled_p (AARCH64_FUSE_CMP_BRANCH) + && any_condjump_p (curr)) + { +@@ -18545,6 +21048,29 @@ aarch64_fpconst_pow_of_2 (rtx x) + return exact_log2 (real_to_integer (r)); + } + ++/* If X is a positive CONST_DOUBLE with a value that is the reciprocal of a ++ power of 2 (i.e 1/2^n) return the number of float bits. e.g. for x==(1/2^n) ++ return n. Otherwise return -1. */ ++ ++int ++aarch64_fpconst_pow2_recip (rtx x) ++{ ++ REAL_VALUE_TYPE r0; ++ ++ if (!CONST_DOUBLE_P (x)) ++ return -1; ++ ++ r0 = *CONST_DOUBLE_REAL_VALUE (x); ++ if (exact_real_inverse (DFmode, &r0) ++ && !REAL_VALUE_NEGATIVE (r0)) ++ { ++ int ret = exact_log2 (real_to_integer (&r0)); ++ if (ret >= 1 && ret <= 32) ++ return ret; ++ } ++ return -1; ++} ++ + /* If X is a vector of equal CONST_DOUBLE values and that value is + Y, return the aarch64_fpconst_pow_of_2 of Y. Otherwise return -1. */ + +@@ -18765,12 +21291,8 @@ aarch64_select_early_remat_modes (sbitmap modes) + /* SVE values are not normally live across a call, so it should be + worth doing early rematerialization even in VL-specific mode. */ + for (int i = 0; i < NUM_MACHINE_MODES; ++i) +- { +- machine_mode mode = (machine_mode) i; +- unsigned int vec_flags = aarch64_classify_vector_mode (mode); +- if (vec_flags & VEC_ANY_SVE) +- bitmap_set_bit (modes, i); +- } ++ if (aarch64_sve_mode_p ((machine_mode) i)) ++ bitmap_set_bit (modes, i); + } + + /* Override the default target speculation_safe_value. */ +@@ -18994,6 +21516,55 @@ aarch64_stack_protect_guard (void) + return NULL_TREE; + } + ++/* Return the diagnostic message string if conversion from FROMTYPE to ++ TOTYPE is not allowed, NULL otherwise. */ ++ ++static const char * ++aarch64_invalid_conversion (const_tree fromtype, const_tree totype) ++{ ++ if (element_mode (fromtype) != element_mode (totype)) ++ { ++ /* Do no allow conversions to/from BFmode scalar types. */ ++ if (TYPE_MODE (fromtype) == BFmode) ++ return N_("invalid conversion from type %"); ++ if (TYPE_MODE (totype) == BFmode) ++ return N_("invalid conversion to type %"); ++ } ++ ++ /* Conversion allowed. */ ++ return NULL; ++} ++ ++/* Return the diagnostic message string if the unary operation OP is ++ not permitted on TYPE, NULL otherwise. */ ++ ++static const char * ++aarch64_invalid_unary_op (int op, const_tree type) ++{ ++ /* Reject all single-operand operations on BFmode except for &. */ ++ if (element_mode (type) == BFmode && op != ADDR_EXPR) ++ return N_("operation not permitted on type %"); ++ ++ /* Operation allowed. */ ++ return NULL; ++} ++ ++/* Return the diagnostic message string if the binary operation OP is ++ not permitted on TYPE1 and TYPE2, NULL otherwise. */ ++ ++static const char * ++aarch64_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, ++ const_tree type2) ++{ ++ /* Reject all 2-operand operations on BFmode. */ ++ if (element_mode (type1) == BFmode ++ || element_mode (type2) == BFmode) ++ return N_("operation not permitted on type %"); ++ ++ /* Operation allowed. */ ++ return NULL; ++} ++ + /* Implement TARGET_ASM_FILE_END for AArch64. This adds the AArch64 GNU NOTE + section at the end if needed. */ + #define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000 +@@ -19137,7 +21708,7 @@ aarch64_run_selftests (void) + #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list + + #undef TARGET_CALLEE_COPIES +-#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false ++#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_arg_info_false + + #undef TARGET_CAN_ELIMINATE + #define TARGET_CAN_ELIMINATE aarch64_can_eliminate +@@ -19247,6 +21818,15 @@ aarch64_libgcc_floating_mode_supported_p + #undef TARGET_MANGLE_TYPE + #define TARGET_MANGLE_TYPE aarch64_mangle_type + ++#undef TARGET_INVALID_CONVERSION ++#define TARGET_INVALID_CONVERSION aarch64_invalid_conversion ++ ++#undef TARGET_INVALID_UNARY_OP ++#define TARGET_INVALID_UNARY_OP aarch64_invalid_unary_op ++ ++#undef TARGET_INVALID_BINARY_OP ++#define TARGET_INVALID_BINARY_OP aarch64_invalid_binary_op ++ + #undef TARGET_MEMORY_MOVE_COST + #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost + +@@ -19370,6 +21950,9 @@ aarch64_libgcc_floating_mode_supported_p + #undef TARGET_VECTOR_MODE_SUPPORTED_P + #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p + ++#undef TARGET_COMPATIBLE_VECTOR_TYPES_P ++#define TARGET_COMPATIBLE_VECTOR_TYPES_P aarch64_compatible_vector_types_p ++ + #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT + #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \ + aarch64_builtin_support_vector_misalignment +@@ -19517,13 +22100,8 @@ aarch64_libgcc_floating_mode_supported_p + #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \ + aarch64_hard_regno_call_part_clobbered + +-#undef TARGET_REMOVE_EXTRA_CALL_PRESERVED_REGS +-#define TARGET_REMOVE_EXTRA_CALL_PRESERVED_REGS \ +- aarch64_remove_extra_call_preserved_regs +- +-#undef TARGET_RETURN_CALL_WITH_MAX_CLOBBERS +-#define TARGET_RETURN_CALL_WITH_MAX_CLOBBERS \ +- aarch64_return_call_with_max_clobbers ++#undef TARGET_INSN_CALLEE_ABI ++#define TARGET_INSN_CALLEE_ABI aarch64_insn_callee_abi + + #undef TARGET_CONSTANT_ALIGNMENT + #define TARGET_CONSTANT_ALIGNMENT aarch64_constant_alignment +@@ -19566,11 +22144,20 @@ aarch64_libgcc_floating_mode_supported_p + #undef TARGET_GET_MULTILIB_ABI_NAME + #define TARGET_GET_MULTILIB_ABI_NAME aarch64_get_multilib_abi_name + ++#undef TARGET_FNTYPE_ABI ++#define TARGET_FNTYPE_ABI aarch64_fntype_abi ++ + #if CHECKING_P + #undef TARGET_RUN_TARGET_SELFTESTS + #define TARGET_RUN_TARGET_SELFTESTS selftest::aarch64_run_selftests + #endif /* #if CHECKING_P */ + ++#undef TARGET_ASM_POST_CFI_STARTPROC ++#define TARGET_ASM_POST_CFI_STARTPROC aarch64_post_cfi_startproc ++ ++#undef TARGET_STRICT_ARGUMENT_NAMING ++#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true ++ + struct gcc_target targetm = TARGET_INITIALIZER; + + #include "gt-aarch64.h" +diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h +index 772a97296..d5341656f 100644 +--- a/gcc/config/aarch64/aarch64.h ++++ b/gcc/config/aarch64/aarch64.h +@@ -192,6 +192,31 @@ extern unsigned aarch64_architecture_version; + /* Execution and Data Prediction Restriction instructions supported. */ + #define AARCH64_FL_PREDRES (1 << 27) + ++/* SVE2 instruction supported. */ ++#define AARCH64_FL_SVE2 (1 << 28) ++#define AARCH64_FL_SVE2_AES (1 << 29) ++#define AARCH64_FL_SVE2_SM4 (1 << 30) ++#define AARCH64_FL_SVE2_SHA3 (1ULL << 31) ++#define AARCH64_FL_SVE2_BITPERM (1ULL << 32) ++ ++/* Transactional Memory Extension. */ ++#define AARCH64_FL_TME (1ULL << 33) /* Has TME instructions. */ ++ ++/* Armv8.6-A architecture extensions. */ ++#define AARCH64_FL_V8_6 (1ULL << 34) ++ ++/* 8-bit Integer Matrix Multiply (I8MM) extensions. */ ++#define AARCH64_FL_I8MM (1ULL << 35) ++ ++/* Brain half-precision floating-point (BFloat16) Extension. */ ++#define AARCH64_FL_BF16 (1ULL << 36) ++ ++/* 32-bit Floating-point Matrix Multiply (F32MM) extensions. */ ++#define AARCH64_FL_F32MM (1ULL << 37) ++ ++/* 64-bit Floating-point Matrix Multiply (F64MM) extensions. */ ++#define AARCH64_FL_F64MM (1ULL << 38) ++ + /* Has FP and SIMD. */ + #define AARCH64_FL_FPSIMD (AARCH64_FL_FP | AARCH64_FL_SIMD) + +@@ -213,6 +238,9 @@ extern unsigned aarch64_architecture_version; + #define AARCH64_FL_FOR_ARCH8_5 \ + (AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_V8_5 \ + | AARCH64_FL_SB | AARCH64_FL_SSBS | AARCH64_FL_PREDRES) ++#define AARCH64_FL_FOR_ARCH8_6 \ ++ (AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_V8_6 | AARCH64_FL_FPSIMD \ ++ | AARCH64_FL_I8MM | AARCH64_FL_BF16) + + /* Macros to test ISA flags. */ + +@@ -225,6 +253,7 @@ extern unsigned aarch64_architecture_version; + #define AARCH64_ISA_V8_2 (aarch64_isa_flags & AARCH64_FL_V8_2) + #define AARCH64_ISA_F16 (aarch64_isa_flags & AARCH64_FL_F16) + #define AARCH64_ISA_SVE (aarch64_isa_flags & AARCH64_FL_SVE) ++#define AARCH64_ISA_SVE2 (aarch64_isa_flags & AARCH64_FL_SVE2) + #define AARCH64_ISA_V8_3 (aarch64_isa_flags & AARCH64_FL_V8_3) + #define AARCH64_ISA_DOTPROD (aarch64_isa_flags & AARCH64_FL_DOTPROD) + #define AARCH64_ISA_AES (aarch64_isa_flags & AARCH64_FL_AES) +@@ -234,7 +263,14 @@ extern unsigned aarch64_architecture_version; + #define AARCH64_ISA_SHA3 (aarch64_isa_flags & AARCH64_FL_SHA3) + #define AARCH64_ISA_F16FML (aarch64_isa_flags & AARCH64_FL_F16FML) + #define AARCH64_ISA_RCPC8_4 (aarch64_isa_flags & AARCH64_FL_RCPC8_4) ++#define AARCH64_ISA_RNG (aarch64_isa_flags & AARCH64_FL_RNG) + #define AARCH64_ISA_V8_5 (aarch64_isa_flags & AARCH64_FL_V8_5) ++#define AARCH64_ISA_TME (aarch64_isa_flags & AARCH64_FL_TME) ++#define AARCH64_ISA_V8_6 (aarch64_isa_flags & AARCH64_FL_V8_6) ++#define AARCH64_ISA_I8MM (aarch64_isa_flags & AARCH64_FL_I8MM) ++#define AARCH64_ISA_F32MM (aarch64_isa_flags & AARCH64_FL_F32MM) ++#define AARCH64_ISA_F64MM (aarch64_isa_flags & AARCH64_FL_F64MM) ++#define AARCH64_ISA_BF16 (aarch64_isa_flags & AARCH64_FL_BF16) + + /* Crypto is an optional extension to AdvSIMD. */ + #define TARGET_CRYPTO (TARGET_SIMD && AARCH64_ISA_CRYPTO) +@@ -270,12 +306,44 @@ extern unsigned aarch64_architecture_version; + /* SVE instructions, enabled through +sve. */ + #define TARGET_SVE (AARCH64_ISA_SVE) + ++/* SVE2 instructions, enabled through +sve2. */ ++#define TARGET_SVE2 (AARCH64_ISA_SVE2) ++ + /* ARMv8.3-A features. */ + #define TARGET_ARMV8_3 (AARCH64_ISA_V8_3) + ++/* Javascript conversion instruction from Armv8.3-a. */ ++#define TARGET_JSCVT (TARGET_FLOAT && AARCH64_ISA_V8_3) ++ + /* Armv8.3-a Complex number extension to AdvSIMD extensions. */ + #define TARGET_COMPLEX (TARGET_SIMD && TARGET_ARMV8_3) + ++/* Floating-point rounding instructions from Armv8.5-a. */ ++#define TARGET_FRINT (AARCH64_ISA_V8_5 && TARGET_FLOAT) ++ ++/* TME instructions are enabled. */ ++#define TARGET_TME (AARCH64_ISA_TME) ++ ++/* Random number instructions from Armv8.5-a. */ ++#define TARGET_RNG (AARCH64_ISA_RNG) ++ ++/* I8MM instructions are enabled through +i8mm. */ ++#define TARGET_I8MM (AARCH64_ISA_I8MM) ++#define TARGET_SVE_I8MM (TARGET_SVE && AARCH64_ISA_I8MM) ++ ++/* F32MM instructions are enabled through +f32mm. */ ++#define TARGET_F32MM (AARCH64_ISA_F32MM) ++#define TARGET_SVE_F32MM (TARGET_SVE && AARCH64_ISA_F32MM) ++ ++/* F64MM instructions are enabled through +f64mm. */ ++#define TARGET_F64MM (AARCH64_ISA_F64MM) ++#define TARGET_SVE_F64MM (TARGET_SVE && AARCH64_ISA_F64MM) ++ ++/* BF16 instructions are enabled through +bf16. */ ++#define TARGET_BF16_FP (AARCH64_ISA_BF16) ++#define TARGET_BF16_SIMD (AARCH64_ISA_BF16 && TARGET_SIMD) ++#define TARGET_SVE_BF16 (TARGET_SVE && AARCH64_ISA_BF16) ++ + /* Make sure this is always defined so we don't have to check for ifdefs + but rather use normal ifs. */ + #ifndef TARGET_FIX_ERR_A53_835769_DEFAULT +@@ -338,6 +406,9 @@ extern unsigned aarch64_architecture_version; + P0-P7 Predicate low registers: valid in all predicate contexts + P8-P15 Predicate high registers: used as scratch space + ++ FFR First Fault Register, a fixed-use SVE predicate register ++ FFRT FFR token: a fake register used for modelling dependencies ++ + VG Pseudo "vector granules" register + + VG is the number of 64-bit elements in an SVE vector. We define +@@ -358,6 +429,7 @@ extern unsigned aarch64_architecture_version; + 1, 1, 1, 1, /* SFP, AP, CC, VG */ \ + 0, 0, 0, 0, 0, 0, 0, 0, /* P0 - P7 */ \ + 0, 0, 0, 0, 0, 0, 0, 0, /* P8 - P15 */ \ ++ 1, 1 /* FFR and FFRT */ \ + } + + /* X30 is marked as caller-saved which is in line with regular function call +@@ -380,6 +452,7 @@ extern unsigned aarch64_architecture_version; + 1, 1, 1, 1, /* SFP, AP, CC, VG */ \ + 1, 1, 1, 1, 1, 1, 1, 1, /* P0 - P7 */ \ + 1, 1, 1, 1, 1, 1, 1, 1, /* P8 - P15 */ \ ++ 1, 1 /* FFR and FFRT */ \ + } + + #define REGISTER_NAMES \ +@@ -395,6 +468,7 @@ extern unsigned aarch64_architecture_version; + "sfp", "ap", "cc", "vg", \ + "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", \ + "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15", \ ++ "ffr", "ffrt" \ + } + + /* Generate the register aliases for core register N */ +@@ -443,11 +517,12 @@ extern unsigned aarch64_architecture_version; + #define FRAME_POINTER_REGNUM SFP_REGNUM + #define STACK_POINTER_REGNUM SP_REGNUM + #define ARG_POINTER_REGNUM AP_REGNUM +-#define FIRST_PSEUDO_REGISTER (P15_REGNUM + 1) ++#define FIRST_PSEUDO_REGISTER (FFRT_REGNUM + 1) + +-/* The number of (integer) argument register available. */ ++/* The number of argument registers available for each class. */ + #define NUM_ARG_REGS 8 + #define NUM_FP_ARG_REGS 8 ++#define NUM_PR_ARG_REGS 4 + + /* A Homogeneous Floating-Point or Short-Vector Aggregate may have at most + four members. */ +@@ -514,6 +589,9 @@ extern unsigned aarch64_architecture_version; + #define ASM_OUTPUT_EXTERNAL(STR, DECL, NAME) \ + aarch64_asm_output_external (STR, DECL, NAME) + ++/* Output assembly strings after .cfi_startproc is emitted. */ ++#define ASM_POST_CFI_STARTPROC aarch64_post_cfi_startproc ++ + /* For EH returns X4 contains the stack adjustment. */ + #define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, R4_REGNUM) + #define EH_RETURN_HANDLER_RTX aarch64_eh_return_handler_rtx () +@@ -542,6 +620,9 @@ extern unsigned aarch64_architecture_version; + #define FP_LO_REGNUM_P(REGNO) \ + (((unsigned) (REGNO - V0_REGNUM)) <= (V15_REGNUM - V0_REGNUM)) + ++#define FP_LO8_REGNUM_P(REGNO) \ ++ (((unsigned) (REGNO - V0_REGNUM)) <= (V7_REGNUM - V0_REGNUM)) ++ + #define PR_REGNUM_P(REGNO)\ + (((unsigned) (REGNO - P0_REGNUM)) <= (P15_REGNUM - P0_REGNUM)) + +@@ -560,12 +641,15 @@ enum reg_class + GENERAL_REGS, + STACK_REG, + POINTER_REGS, ++ FP_LO8_REGS, + FP_LO_REGS, + FP_REGS, + POINTER_AND_FP_REGS, + PR_LO_REGS, + PR_HI_REGS, + PR_REGS, ++ FFR_REGS, ++ PR_AND_FFR_REGS, + ALL_REGS, + LIM_REG_CLASSES /* Last */ + }; +@@ -579,12 +663,15 @@ enum reg_class + "GENERAL_REGS", \ + "STACK_REG", \ + "POINTER_REGS", \ ++ "FP_LO8_REGS", \ + "FP_LO_REGS", \ + "FP_REGS", \ + "POINTER_AND_FP_REGS", \ + "PR_LO_REGS", \ + "PR_HI_REGS", \ + "PR_REGS", \ ++ "FFR_REGS", \ ++ "PR_AND_FFR_REGS", \ + "ALL_REGS" \ + } + +@@ -595,12 +682,15 @@ enum reg_class + { 0x7fffffff, 0x00000000, 0x00000003 }, /* GENERAL_REGS */ \ + { 0x80000000, 0x00000000, 0x00000000 }, /* STACK_REG */ \ + { 0xffffffff, 0x00000000, 0x00000003 }, /* POINTER_REGS */ \ ++ { 0x00000000, 0x000000ff, 0x00000000 }, /* FP_LO8_REGS */ \ + { 0x00000000, 0x0000ffff, 0x00000000 }, /* FP_LO_REGS */ \ + { 0x00000000, 0xffffffff, 0x00000000 }, /* FP_REGS */ \ + { 0xffffffff, 0xffffffff, 0x00000003 }, /* POINTER_AND_FP_REGS */\ + { 0x00000000, 0x00000000, 0x00000ff0 }, /* PR_LO_REGS */ \ + { 0x00000000, 0x00000000, 0x000ff000 }, /* PR_HI_REGS */ \ + { 0x00000000, 0x00000000, 0x000ffff0 }, /* PR_REGS */ \ ++ { 0x00000000, 0x00000000, 0x00300000 }, /* FFR_REGS */ \ ++ { 0x00000000, 0x00000000, 0x003ffff0 }, /* PR_AND_FFR_REGS */ \ + { 0xffffffff, 0xffffffff, 0x000fffff } /* ALL_REGS */ \ + } + +@@ -676,7 +766,7 @@ extern enum aarch64_processor aarch64_tune; + #ifdef HAVE_POLY_INT_H + struct GTY (()) aarch64_frame + { +- HOST_WIDE_INT reg_offset[FIRST_PSEUDO_REGISTER]; ++ poly_int64 reg_offset[LAST_SAVED_REGNUM + 1]; + + /* The number of extra stack bytes taken up by register varargs. + This area is allocated by the callee at the very top of the +@@ -684,9 +774,12 @@ struct GTY (()) aarch64_frame + STACK_BOUNDARY. */ + HOST_WIDE_INT saved_varargs_size; + +- /* The size of the saved callee-save int/FP registers. */ ++ /* The size of the callee-save registers with a slot in REG_OFFSET. */ ++ poly_int64 saved_regs_size; + +- HOST_WIDE_INT saved_regs_size; ++ /* The size of the callee-save registers with a slot in REG_OFFSET that ++ are saved below the hard frame pointer. */ ++ poly_int64 below_hard_fp_saved_regs_size; + + /* Offset from the base of the frame (incomming SP) to the + top of the locals area. This value is always a multiple of +@@ -714,6 +807,10 @@ struct GTY (()) aarch64_frame + It may be non-zero if no push is used (ie. callee_adjust == 0). */ + poly_int64 callee_offset; + ++ /* The size of the stack adjustment before saving or after restoring ++ SVE registers. */ ++ poly_int64 sve_callee_adjust; ++ + /* The size of the stack adjustment after saving callee-saves. */ + poly_int64 final_adjust; + +@@ -723,6 +820,11 @@ struct GTY (()) aarch64_frame + unsigned wb_candidate1; + unsigned wb_candidate2; + ++ /* Big-endian SVE frames need a spare predicate register in order ++ to save vector registers in the correct layout for unwinding. ++ This is the register they should use. */ ++ unsigned spare_pred_reg; ++ + bool laid_out; + }; + +@@ -751,6 +853,10 @@ enum aarch64_abi_type + enum arm_pcs + { + ARM_PCS_AAPCS64, /* Base standard AAPCS for 64 bit. */ ++ ARM_PCS_SIMD, /* For aarch64_vector_pcs functions. */ ++ ARM_PCS_SVE, /* For functions that pass or return ++ values in SVE registers. */ ++ ARM_PCS_TLSDESC, /* For targets of tlsdesc calls. */ + ARM_PCS_UNKNOWN + }; + +@@ -777,6 +883,8 @@ typedef struct + int aapcs_nextncrn; /* Next next core register number. */ + int aapcs_nvrn; /* Next Vector register number. */ + int aapcs_nextnvrn; /* Next Next Vector register number. */ ++ int aapcs_nprn; /* Next Predicate register number. */ ++ int aapcs_nextnprn; /* Next Next Predicate register number. */ + rtx aapcs_reg; /* Register assigned to this argument. This + is NULL_RTX if this parameter goes on + the stack. */ +@@ -787,6 +895,8 @@ typedef struct + aapcs_reg == NULL_RTX. */ + int aapcs_stack_size; /* The total size (in words, per 8 byte) of the + stack arg area so far. */ ++ bool silent_p; /* True if we should act silently, rather than ++ raise an error for invalid calls. */ + } CUMULATIVE_ARGS; + #endif + +@@ -842,7 +952,7 @@ typedef struct + /* MOVE_RATIO dictates when we will use the move_by_pieces infrastructure. + move_by_pieces will continually copy the largest safe chunks. So a + 7-byte copy is a 4-byte + 2-byte + byte copy. This proves inefficient +- for both size and speed of copy, so we will instead use the "movmem" ++ for both size and speed of copy, so we will instead use the "cpymem" + standard name to implement the copy. This logic does not apply when + targeting -mstrict-align, so keep a sensible default in that case. */ + #define MOVE_RATIO(speed) \ +@@ -1025,13 +1135,13 @@ extern enum aarch64_code_model aarch64_cmodel; + #define AARCH64_VALID_SIMD_DREG_MODE(MODE) \ + ((MODE) == V2SImode || (MODE) == V4HImode || (MODE) == V8QImode \ + || (MODE) == V2SFmode || (MODE) == V4HFmode || (MODE) == DImode \ +- || (MODE) == DFmode) ++ || (MODE) == DFmode || (MODE) == V4BFmode) + + /* Modes valid for AdvSIMD Q registers. */ + #define AARCH64_VALID_SIMD_QREG_MODE(MODE) \ + ((MODE) == V4SImode || (MODE) == V8HImode || (MODE) == V16QImode \ + || (MODE) == V4SFmode || (MODE) == V8HFmode || (MODE) == V2DImode \ +- || (MODE) == V2DFmode) ++ || (MODE) == V2DFmode || (MODE) == V8BFmode) + + #define ENDIAN_LANE_N(NUNITS, N) \ + (BYTES_BIG_ENDIAN ? NUNITS - 1 - N : N) +@@ -1079,6 +1189,11 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); + extern tree aarch64_fp16_type_node; + extern tree aarch64_fp16_ptr_type_node; + ++/* This type is the user-visible __bf16, and a pointer to that type. Defined ++ in aarch64-builtins.c. */ ++extern tree aarch64_bf16_type_node; ++extern tree aarch64_bf16_ptr_type_node; ++ + /* The generic unwind code in libgcc does not initialize the frame pointer. + So in order to unwind a function using a frame pointer, the very first + function that is unwound must save the frame pointer. That way the frame +@@ -1094,7 +1209,8 @@ extern poly_uint16 aarch64_sve_vg; + #define BITS_PER_SVE_VECTOR (poly_uint16 (aarch64_sve_vg * 64)) + #define BYTES_PER_SVE_VECTOR (poly_uint16 (aarch64_sve_vg * 8)) + +-/* The number of bytes in an SVE predicate. */ ++/* The number of bits and bytes in an SVE predicate. */ ++#define BITS_PER_SVE_PRED BYTES_PER_SVE_VECTOR + #define BYTES_PER_SVE_PRED aarch64_sve_vg + + /* The SVE mode for a vector of bytes. */ +diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md +index 73c34a227..34cccc7cd 100644 +--- a/gcc/config/aarch64/aarch64.md ++++ b/gcc/config/aarch64/aarch64.md +@@ -85,7 +85,6 @@ + (V29_REGNUM 61) + (V30_REGNUM 62) + (V31_REGNUM 63) +- (LAST_SAVED_REGNUM 63) + (SFP_REGNUM 64) + (AP_REGNUM 65) + (CC_REGNUM 66) +@@ -107,6 +106,11 @@ + (P13_REGNUM 81) + (P14_REGNUM 82) + (P15_REGNUM 83) ++ (LAST_SAVED_REGNUM 83) ++ (FFR_REGNUM 84) ++ ;; "FFR token": a fake register used for representing the scheduling ++ ;; restrictions on FFR-related operations. ++ (FFRT_REGNUM 85) + ;; Scratch register used by stack clash protection to calculate + ;; SVE CFA offsets during probing. + (STACK_CLASH_SVE_CFA_REGNUM 11) +@@ -120,13 +124,17 @@ + ;; Scratch registers used in frame layout. + (IP0_REGNUM 16) + (IP1_REGNUM 17) ++ (FP_REGNUM 29) + (LR_REGNUM 30) + ] + ) + + (define_c_enum "unspec" [ +- UNSPEC_AUTI1716 +- UNSPEC_AUTISP ++ UNSPEC_AUTIA1716 ++ UNSPEC_AUTIB1716 ++ UNSPEC_AUTIASP ++ UNSPEC_AUTIBSP ++ UNSPEC_CALLEE_ABI + UNSPEC_CASESI + UNSPEC_CRC32B + UNSPEC_CRC32CB +@@ -138,6 +146,11 @@ + UNSPEC_CRC32X + UNSPEC_FCVTZS + UNSPEC_FCVTZU ++ UNSPEC_FJCVTZS ++ UNSPEC_FRINT32Z ++ UNSPEC_FRINT32X ++ UNSPEC_FRINT64Z ++ UNSPEC_FRINT64X + UNSPEC_URECPE + UNSPEC_FRECPE + UNSPEC_FRECPS +@@ -169,8 +182,10 @@ + UNSPEC_LD4_LANE + UNSPEC_MB + UNSPEC_NOP +- UNSPEC_PACI1716 +- UNSPEC_PACISP ++ UNSPEC_PACIA1716 ++ UNSPEC_PACIB1716 ++ UNSPEC_PACIASP ++ UNSPEC_PACIBSP + UNSPEC_PRLG_STK + UNSPEC_REV + UNSPEC_RBIT +@@ -211,26 +226,49 @@ + UNSPEC_XPACLRI + UNSPEC_LD1_SVE + UNSPEC_ST1_SVE ++ UNSPEC_LDNT1_SVE ++ UNSPEC_STNT1_SVE + UNSPEC_LD1RQ + UNSPEC_LD1_GATHER ++ UNSPEC_LDFF1_GATHER + UNSPEC_ST1_SCATTER +- UNSPEC_MERGE_PTRUE +- UNSPEC_PTEST_PTRUE ++ UNSPEC_PRED_X ++ UNSPEC_PRED_Z ++ UNSPEC_PTEST ++ UNSPEC_PTRUE + UNSPEC_UNPACKSHI + UNSPEC_UNPACKUHI + UNSPEC_UNPACKSLO + UNSPEC_UNPACKULO + UNSPEC_PACK +- UNSPEC_FLOAT_CONVERT +- UNSPEC_WHILE_LO ++ UNSPEC_WHILELE ++ UNSPEC_WHILELO ++ UNSPEC_WHILELS ++ UNSPEC_WHILELT + UNSPEC_LDN + UNSPEC_STN + UNSPEC_INSR ++ UNSPEC_CLASTA + UNSPEC_CLASTB + UNSPEC_FADDA + UNSPEC_REV_SUBREG ++ UNSPEC_REINTERPRET + UNSPEC_SPECULATION_TRACKER + UNSPEC_COPYSIGN ++ UNSPEC_TTEST ; Represent transaction test. ++ UNSPEC_UPDATE_FFR ++ UNSPEC_UPDATE_FFRT ++ UNSPEC_RDFFR ++ UNSPEC_WRFFR ++ ;; Represents an SVE-style lane index, in which the indexing applies ++ ;; within the containing 128-bit block. ++ UNSPEC_SVE_LANE_SELECT ++ UNSPEC_SVE_CNT_PAT ++ UNSPEC_SVE_PREFETCH ++ UNSPEC_SVE_PREFETCH_GATHER ++ UNSPEC_SVE_COMPACT ++ UNSPEC_SVE_SPLICE ++ UNSPEC_LD1RO + ]) + + (define_c_enum "unspecv" [ +@@ -246,9 +284,35 @@ + UNSPECV_BTI_C ; Represent BTI c. + UNSPECV_BTI_J ; Represent BTI j. + UNSPECV_BTI_JC ; Represent BTI jc. ++ UNSPECV_TSTART ; Represent transaction start. ++ UNSPECV_TCOMMIT ; Represent transaction commit. ++ UNSPECV_TCANCEL ; Represent transaction cancel. ++ UNSPEC_RNDR ; Represent RNDR ++ UNSPEC_RNDRRS ; Represent RNDRRS + ] + ) + ++;; These constants are used as a const_int in various SVE unspecs ++;; to indicate whether the governing predicate is known to be a PTRUE. ++(define_constants ++ [; Indicates that the predicate might not be a PTRUE. ++ (SVE_MAYBE_NOT_PTRUE 0) ++ ++ ; Indicates that the predicate is known to be a PTRUE. ++ (SVE_KNOWN_PTRUE 1)]) ++ ++;; These constants are used as a const_int in predicated SVE FP arithmetic ++;; to indicate whether the operation is allowed to make additional lanes ++;; active without worrying about the effect on faulting behavior. ++(define_constants ++ [; Indicates either that all lanes are active or that the instruction may ++ ; operate on inactive inputs even if doing so could induce a fault. ++ (SVE_RELAXED_GP 0) ++ ++ ; Indicates that some lanes might be inactive and that the instruction ++ ; must not operate on inactive inputs if doing so could induce a fault. ++ (SVE_STRICT_GP 1)]) ++ + ;; If further include files are added the defintion of MD_INCLUDES + ;; must be updated. + +@@ -383,8 +447,8 @@ + + (define_expand "cbranch4" + [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator" +- [(match_operand:GPI 1 "register_operand" "") +- (match_operand:GPI 2 "aarch64_plus_operand" "")]) ++ [(match_operand:GPI 1 "register_operand") ++ (match_operand:GPI 2 "aarch64_plus_operand")]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "" +@@ -397,8 +461,8 @@ + + (define_expand "cbranch4" + [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator" +- [(match_operand:GPF 1 "register_operand" "") +- (match_operand:GPF 2 "aarch64_fp_compare_operand" "")]) ++ [(match_operand:GPF 1 "register_operand") ++ (match_operand:GPF 2 "aarch64_fp_compare_operand")]) + (label_ref (match_operand 3 "" "")) + (pc)))] + "" +@@ -412,7 +476,7 @@ + (define_expand "cbranchcc4" + [(set (pc) (if_then_else + (match_operator 0 "aarch64_comparison_operator" +- [(match_operand 1 "cc_register" "") ++ [(match_operand 1 "cc_register") + (match_operand 2 "const0_operand")]) + (label_ref (match_operand 3 "" "")) + (pc)))] +@@ -475,9 +539,9 @@ + ;; csneg x0, x0, x1, mi + + (define_expand "mod3" +- [(match_operand:GPI 0 "register_operand" "") +- (match_operand:GPI 1 "register_operand" "") +- (match_operand:GPI 2 "const_int_operand" "")] ++ [(match_operand:GPI 0 "register_operand") ++ (match_operand:GPI 1 "register_operand") ++ (match_operand:GPI 2 "const_int_operand")] + "" + { + HOST_WIDE_INT val = INTVAL (operands[2]); +@@ -530,10 +594,14 @@ + (pc)))] + "" + { ++ /* GCC's traditional style has been to use "beq" instead of "b.eq", etc., ++ but the "." is required for SVE conditions. */ ++ bool use_dot_p = GET_MODE (operands[1]) == CC_NZCmode; + if (get_attr_length (insn) == 8) +- return aarch64_gen_far_branch (operands, 2, "Lbcond", "b%M0\\t"); ++ return aarch64_gen_far_branch (operands, 2, "Lbcond", ++ use_dot_p ? "b.%M0\\t" : "b%M0\\t"); + else +- return "b%m0\\t%l2"; ++ return use_dot_p ? "b.%m0\\t%l2" : "b%m0\\t%l2"; + } + [(set_attr "type" "branch") + (set (attr "length") +@@ -558,14 +626,14 @@ + ;; sub x0, x1, #(CST & 0xfff000) + ;; subs x0, x0, #(CST & 0x000fff) + ;; b .Label +-(define_insn_and_split "*compare_condjump" ++(define_insn_and_split "*compare_condjump" + [(set (pc) (if_then_else (EQL + (match_operand:GPI 0 "register_operand" "r") + (match_operand:GPI 1 "aarch64_imm24" "n")) + (label_ref:P (match_operand 2 "" "")) + (pc)))] +- "!aarch64_move_imm (INTVAL (operands[1]), mode) +- && !aarch64_plus_operand (operands[1], mode) ++ "!aarch64_move_imm (INTVAL (operands[1]), mode) ++ && !aarch64_plus_operand (operands[1], mode) + && !reload_completed" + "#" + "&& true" +@@ -573,20 +641,21 @@ + { + HOST_WIDE_INT lo_imm = UINTVAL (operands[1]) & 0xfff; + HOST_WIDE_INT hi_imm = UINTVAL (operands[1]) & 0xfff000; +- rtx tmp = gen_reg_rtx (mode); +- emit_insn (gen_add3 (tmp, operands[0], GEN_INT (-hi_imm))); +- emit_insn (gen_add3_compare0 (tmp, tmp, GEN_INT (-lo_imm))); ++ rtx tmp = gen_reg_rtx (mode); ++ emit_insn (gen_add3 (tmp, operands[0], GEN_INT (-hi_imm))); ++ emit_insn (gen_add3_compare0 (tmp, tmp, GEN_INT (-lo_imm))); + rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM); +- rtx cmp_rtx = gen_rtx_fmt_ee (, mode, cc_reg, const0_rtx); ++ rtx cmp_rtx = gen_rtx_fmt_ee (, mode, ++ cc_reg, const0_rtx); + emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, operands[2])); + DONE; + } + ) + + (define_expand "casesi" +- [(match_operand:SI 0 "register_operand" "") ; Index +- (match_operand:SI 1 "const_int_operand" "") ; Lower bound +- (match_operand:SI 2 "const_int_operand" "") ; Total range ++ [(match_operand:SI 0 "register_operand") ; Index ++ (match_operand:SI 1 "const_int_operand") ; Lower bound ++ (match_operand:SI 2 "const_int_operand") ; Total range + (match_operand:DI 3 "" "") ; Table label + (match_operand:DI 4 "" "")] ; Out of range label + "" +@@ -739,8 +808,12 @@ + if (aarch64_return_address_signing_enabled () + && TARGET_ARMV8_3 + && !crtl->calls_eh_return) +- return "retaa"; +- ++ { ++ if (aarch64_ra_sign_key == AARCH64_KEY_B) ++ return "retab"; ++ else ++ return "retaa"; ++ } + return "ret"; + } + [(set_attr "type" "branch")] +@@ -754,7 +827,7 @@ + + (define_insn "simple_return" + [(simple_return)] +- "aarch64_use_simple_return_insn_p ()" ++ "" + "ret" + [(set_attr "type" "branch")] + ) +@@ -868,14 +941,15 @@ + ;; ------------------------------------------------------------------- + + (define_expand "call" +- [(parallel [(call (match_operand 0 "memory_operand" "") +- (match_operand 1 "general_operand" "")) +- (use (match_operand 2 "" "")) +- (clobber (reg:DI LR_REGNUM))])] ++ [(parallel ++ [(call (match_operand 0 "memory_operand") ++ (match_operand 1 "general_operand")) ++ (unspec:DI [(match_operand 2 "const_int_operand")] UNSPEC_CALLEE_ABI) ++ (clobber (reg:DI LR_REGNUM))])] + "" + " + { +- aarch64_expand_call (NULL_RTX, operands[0], false); ++ aarch64_expand_call (NULL_RTX, operands[0], operands[2], false); + DONE; + }" + ) +@@ -883,6 +957,7 @@ + (define_insn "*call_insn" + [(call (mem:DI (match_operand:DI 0 "aarch64_call_insn_operand" "r, Usf")) + (match_operand 1 "" "")) ++ (unspec:DI [(match_operand:DI 2 "const_int_operand")] UNSPEC_CALLEE_ABI) + (clobber (reg:DI LR_REGNUM))] + "" + "@ +@@ -892,15 +967,16 @@ + ) + + (define_expand "call_value" +- [(parallel [(set (match_operand 0 "" "") +- (call (match_operand 1 "memory_operand" "") +- (match_operand 2 "general_operand" ""))) +- (use (match_operand 3 "" "")) +- (clobber (reg:DI LR_REGNUM))])] ++ [(parallel ++ [(set (match_operand 0 "") ++ (call (match_operand 1 "memory_operand") ++ (match_operand 2 "general_operand"))) ++ (unspec:DI [(match_operand 3 "const_int_operand")] UNSPEC_CALLEE_ABI) ++ (clobber (reg:DI LR_REGNUM))])] + "" + " + { +- aarch64_expand_call (operands[0], operands[1], false); ++ aarch64_expand_call (operands[0], operands[1], operands[3], false); + DONE; + }" + ) +@@ -909,6 +985,7 @@ + [(set (match_operand 0 "" "") + (call (mem:DI (match_operand:DI 1 "aarch64_call_insn_operand" "r, Usf")) + (match_operand 2 "" ""))) ++ (unspec:DI [(match_operand:DI 3 "const_int_operand")] UNSPEC_CALLEE_ABI) + (clobber (reg:DI LR_REGNUM))] + "" + "@ +@@ -918,33 +995,36 @@ + ) + + (define_expand "sibcall" +- [(parallel [(call (match_operand 0 "memory_operand" "") +- (match_operand 1 "general_operand" "")) +- (return) +- (use (match_operand 2 "" ""))])] ++ [(parallel ++ [(call (match_operand 0 "memory_operand") ++ (match_operand 1 "general_operand")) ++ (unspec:DI [(match_operand 2 "const_int_operand")] UNSPEC_CALLEE_ABI) ++ (return)])] + "" + { +- aarch64_expand_call (NULL_RTX, operands[0], true); ++ aarch64_expand_call (NULL_RTX, operands[0], operands[2], true); + DONE; + } + ) + + (define_expand "sibcall_value" +- [(parallel [(set (match_operand 0 "" "") +- (call (match_operand 1 "memory_operand" "") +- (match_operand 2 "general_operand" ""))) +- (return) +- (use (match_operand 3 "" ""))])] ++ [(parallel ++ [(set (match_operand 0 "") ++ (call (match_operand 1 "memory_operand") ++ (match_operand 2 "general_operand"))) ++ (unspec:DI [(match_operand 3 "const_int_operand")] UNSPEC_CALLEE_ABI) ++ (return)])] + "" + { +- aarch64_expand_call (operands[0], operands[1], true); ++ aarch64_expand_call (operands[0], operands[1], operands[3], true); + DONE; + } + ) + + (define_insn "*sibcall_insn" + [(call (mem:DI (match_operand:DI 0 "aarch64_call_insn_operand" "Ucs, Usf")) +- (match_operand 1 "" "")) ++ (match_operand 1 "")) ++ (unspec:DI [(match_operand:DI 2 "const_int_operand")] UNSPEC_CALLEE_ABI) + (return)] + "SIBLING_CALL_P (insn)" + "@ +@@ -954,10 +1034,11 @@ + ) + + (define_insn "*sibcall_value_insn" +- [(set (match_operand 0 "" "") ++ [(set (match_operand 0 "") + (call (mem:DI + (match_operand:DI 1 "aarch64_call_insn_operand" "Ucs, Usf")) +- (match_operand 2 "" ""))) ++ (match_operand 2 ""))) ++ (unspec:DI [(match_operand:DI 3 "const_int_operand")] UNSPEC_CALLEE_ABI) + (return)] + "SIBLING_CALL_P (insn)" + "@ +@@ -977,7 +1058,9 @@ + { + int i; + +- emit_call_insn (gen_call (operands[0], const0_rtx, NULL)); ++ /* Untyped calls always use the default ABI. It's only possible to use ++ ABI variants if we know the type of the target function. */ ++ emit_call_insn (gen_call (operands[0], const0_rtx, const0_rtx)); + + for (i = 0; i < XVECLEN (operands[2], 0); i++) + { +@@ -998,8 +1081,8 @@ + ;; ------------------------------------------------------------------- + + (define_expand "mov" +- [(set (match_operand:SHORT 0 "nonimmediate_operand" "") +- (match_operand:SHORT 1 "general_operand" ""))] ++ [(set (match_operand:SHORT 0 "nonimmediate_operand") ++ (match_operand:SHORT 1 "general_operand"))] + "" + " + if (GET_CODE (operands[0]) == MEM && operands[1] != const0_rtx) +@@ -1055,8 +1138,8 @@ + ) + + (define_expand "mov" +- [(set (match_operand:GPI 0 "nonimmediate_operand" "") +- (match_operand:GPI 1 "general_operand" ""))] ++ [(set (match_operand:GPI 0 "nonimmediate_operand") ++ (match_operand:GPI 1 "general_operand"))] + "" + " + if (MEM_P (operands[0]) && !MEM_VOLATILE_P (operands[0]) +@@ -1162,8 +1245,8 @@ + ) + + (define_expand "movti" +- [(set (match_operand:TI 0 "nonimmediate_operand" "") +- (match_operand:TI 1 "general_operand" ""))] ++ [(set (match_operand:TI 0 "nonimmediate_operand") ++ (match_operand:TI 1 "general_operand"))] + "" + " + if (GET_CODE (operands[0]) == MEM && operands[1] != const0_rtx) +@@ -1217,8 +1300,8 @@ + }) + + (define_expand "mov" +- [(set (match_operand:GPF_TF_F16 0 "nonimmediate_operand" "") +- (match_operand:GPF_TF_F16 1 "general_operand" ""))] ++ [(set (match_operand:GPF_TF_F16_MOV 0 "nonimmediate_operand") ++ (match_operand:GPF_TF_F16_MOV 1 "general_operand"))] + "" + { + if (!TARGET_FLOAT) +@@ -1234,11 +1317,11 @@ + } + ) + +-(define_insn "*movhf_aarch64" +- [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w , w,?r,w,w ,w ,w,m,r,m ,r") +- (match_operand:HF 1 "general_operand" "Y ,?rY,?r, w,w,Ufc,Uvi,m,w,m,rY,r"))] +- "TARGET_FLOAT && (register_operand (operands[0], HFmode) +- || aarch64_reg_or_fp_zero (operands[1], HFmode))" ++(define_insn "*mov_aarch64" ++ [(set (match_operand:HFBF 0 "nonimmediate_operand" "=w,w , w,?r,w,w ,w ,w,m,r,m ,r") ++ (match_operand:HFBF 1 "general_operand" "Y ,?rY,?r, w,w,Ufc,Uvi,m,w,m,rY,r"))] ++ "TARGET_FLOAT && (register_operand (operands[0], mode) ++ || aarch64_reg_or_fp_zero (operands[1], mode))" + "@ + movi\\t%0.4h, #0 + fmov\\t%h0, %w1 +@@ -1363,17 +1446,17 @@ + + ;; 0 is dst + ;; 1 is src +-;; 2 is size of move in bytes ++;; 2 is size of copy in bytes + ;; 3 is alignment + +-(define_expand "movmemdi" ++(define_expand "cpymemdi" + [(match_operand:BLK 0 "memory_operand") + (match_operand:BLK 1 "memory_operand") + (match_operand:DI 2 "immediate_operand") + (match_operand:DI 3 "immediate_operand")] + "!STRICT_ALIGNMENT" + { +- if (aarch64_expand_movmem (operands)) ++ if (aarch64_expand_cpymem (operands)) + DONE; + FAIL; + } +@@ -1492,8 +1575,8 @@ + (mem:GPI (plus:P (match_dup 1) + (match_operand:P 5 "const_int_operand" "n"))))])] + "INTVAL (operands[5]) == GET_MODE_SIZE (mode)" +- "ldp\\t%2, %3, [%1], %4" +- [(set_attr "type" "load_")] ++ "ldp\\t%2, %3, [%1], %4" ++ [(set_attr "type" "load_")] + ) + + (define_insn "loadwb_pair_" +@@ -1507,7 +1590,7 @@ + (mem:GPF (plus:P (match_dup 1) + (match_operand:P 5 "const_int_operand" "n"))))])] + "INTVAL (operands[5]) == GET_MODE_SIZE (mode)" +- "ldp\\t%2, %3, [%1], %4" ++ "ldp\\t%2, %3, [%1], %4" + [(set_attr "type" "neon_load1_2reg")] + ) + +@@ -1540,8 +1623,8 @@ + (match_operand:P 5 "const_int_operand" "n"))) + (match_operand:GPI 3 "register_operand" "r"))])] + "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (mode)" +- "stp\\t%2, %3, [%0, %4]!" +- [(set_attr "type" "store_")] ++ "stp\\t%2, %3, [%0, %4]!" ++ [(set_attr "type" "store_")] + ) + + (define_insn "storewb_pair_" +@@ -1556,7 +1639,7 @@ + (match_operand:P 5 "const_int_operand" "n"))) + (match_operand:GPF 3 "register_operand" "w"))])] + "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (mode)" +- "stp\\t%2, %3, [%0, %4]!" ++ "stp\\t%2, %3, [%0, %4]!" + [(set_attr "type" "neon_store1_2reg")] + ) + +@@ -1702,9 +1785,9 @@ + + (define_expand "add3" + [(set +- (match_operand:GPI 0 "register_operand" "") +- (plus:GPI (match_operand:GPI 1 "register_operand" "") +- (match_operand:GPI 2 "aarch64_pluslong_or_poly_operand" "")))] ++ (match_operand:GPI 0 "register_operand") ++ (plus:GPI (match_operand:GPI 1 "register_operand") ++ (match_operand:GPI 2 "aarch64_pluslong_or_poly_operand")))] + "" + { + /* If operands[1] is a subreg extract the inner RTX. */ +@@ -1713,6 +1796,7 @@ + /* If the constant is too large for a single instruction and isn't frame + based, split off the immediate so it is available for CSE. */ + if (!aarch64_plus_immediate (operands[2], mode) ++ && !(TARGET_SVE && aarch64_sve_plus_immediate (operands[2], mode)) + && can_create_pseudo_p () + && (!REG_P (op1) + || !REGNO_PTR_FRAME_P (REGNO (op1)))) +@@ -1730,10 +1814,10 @@ + + (define_insn "*add3_aarch64" + [(set +- (match_operand:GPI 0 "register_operand" "=rk,rk,w,rk,r,rk") ++ (match_operand:GPI 0 "register_operand" "=rk,rk,w,rk,r,r,rk") + (plus:GPI +- (match_operand:GPI 1 "register_operand" "%rk,rk,w,rk,rk,rk") +- (match_operand:GPI 2 "aarch64_pluslong_operand" "I,r,w,J,Uaa,Uav")))] ++ (match_operand:GPI 1 "register_operand" "%rk,rk,w,rk,rk,0,rk") ++ (match_operand:GPI 2 "aarch64_pluslong_operand" "I,r,w,J,Uaa,Uai,Uav")))] + "" + "@ + add\\t%0, %1, %2 +@@ -1741,10 +1825,11 @@ + add\\t%0, %1, %2 + sub\\t%0, %1, #%n2 + # +- * return aarch64_output_sve_addvl_addpl (operands[0], operands[1], operands[2]);" +- ;; The "alu_imm" type for ADDVL/ADDPL is just a placeholder. +- [(set_attr "type" "alu_imm,alu_sreg,neon_add,alu_imm,multiple,alu_imm") +- (set_attr "arch" "*,*,simd,*,*,*")] ++ * return aarch64_output_sve_scalar_inc_dec (operands[2]); ++ * return aarch64_output_sve_addvl_addpl (operands[2]);" ++ ;; The "alu_imm" types for INC/DEC and ADDVL/ADDPL are just placeholders. ++ [(set_attr "type" "alu_imm,alu_sreg,neon_add,alu_imm,multiple,alu_imm,alu_imm") ++ (set_attr "arch" "*,*,simd,*,*,sve,sve")] + ) + + ;; zero_extend version of above +@@ -1823,17 +1908,18 @@ + ;; this pattern. + (define_insn_and_split "*add3_poly_1" + [(set +- (match_operand:GPI 0 "register_operand" "=r,r,r,r,r,&r") ++ (match_operand:GPI 0 "register_operand" "=r,r,r,r,r,r,&r") + (plus:GPI +- (match_operand:GPI 1 "register_operand" "%rk,rk,rk,rk,rk,rk") +- (match_operand:GPI 2 "aarch64_pluslong_or_poly_operand" "I,r,J,Uaa,Uav,Uat")))] ++ (match_operand:GPI 1 "register_operand" "%rk,rk,rk,rk,rk,0,rk") ++ (match_operand:GPI 2 "aarch64_pluslong_or_poly_operand" "I,r,J,Uaa,Uav,Uai,Uat")))] + "TARGET_SVE && operands[0] != stack_pointer_rtx" + "@ + add\\t%0, %1, %2 + add\\t%0, %1, %2 + sub\\t%0, %1, #%n2 + # +- * return aarch64_output_sve_addvl_addpl (operands[0], operands[1], operands[2]); ++ * return aarch64_output_sve_scalar_inc_dec (operands[2]); ++ * return aarch64_output_sve_addvl_addpl (operands[2]); + #" + "&& epilogue_completed + && !reg_overlap_mentioned_p (operands[0], operands[1]) +@@ -1844,8 +1930,8 @@ + operands[2], operands[0], NULL_RTX); + DONE; + } +- ;; The "alu_imm" type for ADDVL/ADDPL is just a placeholder. +- [(set_attr "type" "alu_imm,alu_sreg,alu_imm,multiple,alu_imm,multiple")] ++ ;; The "alu_imm" types for INC/DEC and ADDVL/ADDPL are just placeholders. ++ [(set_attr "type" "alu_imm,alu_sreg,alu_imm,multiple,alu_imm,alu_imm,multiple")] + ) + + (define_split +@@ -1897,9 +1983,9 @@ + }) + + (define_expand "addti3" +- [(set (match_operand:TI 0 "register_operand" "") +- (plus:TI (match_operand:TI 1 "register_operand" "") +- (match_operand:TI 2 "aarch64_reg_or_imm" "")))] ++ [(set (match_operand:TI 0 "register_operand") ++ (plus:TI (match_operand:TI 1 "register_operand") ++ (match_operand:TI 2 "aarch64_reg_or_imm")))] + "" + { + rtx low_dest, op1_low, op2_low, high_dest, op1_high, op2_high; +@@ -1930,9 +2016,9 @@ + }) + + (define_expand "addvti4" +- [(match_operand:TI 0 "register_operand" "") +- (match_operand:TI 1 "register_operand" "") +- (match_operand:TI 2 "aarch64_reg_or_imm" "") ++ [(match_operand:TI 0 "register_operand") ++ (match_operand:TI 1 "register_operand") ++ (match_operand:TI 2 "aarch64_reg_or_imm") + (label_ref (match_operand 3 "" ""))] + "" + { +@@ -1964,9 +2050,9 @@ + }) + + (define_expand "uaddvti4" +- [(match_operand:TI 0 "register_operand" "") +- (match_operand:TI 1 "register_operand" "") +- (match_operand:TI 2 "aarch64_reg_or_imm" "") ++ [(match_operand:TI 0 "register_operand") ++ (match_operand:TI 1 "register_operand") ++ (match_operand:TI 2 "aarch64_reg_or_imm") + (label_ref (match_operand 3 "" ""))] + "" + { +@@ -2501,9 +2587,9 @@ + (plus: + (match_dup 4) + (zero_extend: +- (match_operand:GPI 1 "register_operand" ""))) ++ (match_operand:GPI 1 "register_operand"))) + (zero_extend: +- (match_operand:GPI 2 "register_operand" ""))) ++ (match_operand:GPI 2 "register_operand"))) + (match_dup 6))) + (set (match_operand:GPI 0 "register_operand") + (plus:GPI +@@ -2564,9 +2650,9 @@ + (plus: + (match_dup 3) + (sign_extend: +- (match_operand:GPI 1 "register_operand" ""))) ++ (match_operand:GPI 1 "register_operand"))) + (sign_extend: +- (match_operand:GPI 2 "register_operand" ""))) ++ (match_operand:GPI 2 "register_operand"))) + (sign_extend: + (plus:GPI + (plus:GPI (match_dup 4) (match_dup 1)) +@@ -2835,9 +2921,9 @@ + }) + + (define_expand "subti3" +- [(set (match_operand:TI 0 "register_operand" "") +- (minus:TI (match_operand:TI 1 "aarch64_reg_or_zero" "") +- (match_operand:TI 2 "register_operand" "")))] ++ [(set (match_operand:TI 0 "register_operand") ++ (minus:TI (match_operand:TI 1 "aarch64_reg_or_zero") ++ (match_operand:TI 2 "register_operand")))] + "" + { + rtx low_dest, op1_low, op2_low, high_dest, op1_high, op2_high; +@@ -3285,12 +3371,12 @@ + [(set (reg:CC CC_REGNUM) + (compare:CC + (zero_extend: +- (match_operand:GPI 1 "aarch64_reg_or_zero" "")) ++ (match_operand:GPI 1 "aarch64_reg_or_zero")) + (plus: + (zero_extend: +- (match_operand:GPI 2 "register_operand" "")) ++ (match_operand:GPI 2 "register_operand")) + (ltu: (reg:CC CC_REGNUM) (const_int 0))))) +- (set (match_operand:GPI 0 "register_operand" "") ++ (set (match_operand:GPI 0 "register_operand") + (minus:GPI + (minus:GPI (match_dup 1) (match_dup 2)) + (ltu:GPI (reg:CC CC_REGNUM) (const_int 0))))])] +@@ -3353,16 +3439,16 @@ + (compare:CC_V + (minus: + (sign_extend: +- (match_operand:GPI 1 "aarch64_reg_or_zero" "")) ++ (match_operand:GPI 1 "aarch64_reg_or_zero")) + (plus: + (sign_extend: +- (match_operand:GPI 2 "register_operand" "")) ++ (match_operand:GPI 2 "register_operand")) + (ltu: (reg:CC CC_REGNUM) (const_int 0)))) + (sign_extend: + (minus:GPI (match_dup 1) + (plus:GPI (ltu:GPI (reg:CC CC_REGNUM) (const_int 0)) + (match_dup 2)))))) +- (set (match_operand:GPI 0 "register_operand" "") ++ (set (match_operand:GPI 0 "register_operand") + (minus:GPI + (minus:GPI (match_dup 1) (match_dup 2)) + (ltu:GPI (reg:CC CC_REGNUM) (const_int 0))))])] +@@ -3475,8 +3561,8 @@ + ) + + (define_expand "abs2" +- [(match_operand:GPI 0 "register_operand" "") +- (match_operand:GPI 1 "register_operand" "")] ++ [(match_operand:GPI 0 "register_operand") ++ (match_operand:GPI 1 "register_operand")] + "" + { + rtx ccreg = aarch64_gen_compare_reg (LT, operands[1], const0_rtx); +@@ -3889,10 +3975,10 @@ + ;; ------------------------------------------------------------------- + + (define_expand "cstore4" +- [(set (match_operand:SI 0 "register_operand" "") ++ [(set (match_operand:SI 0 "register_operand") + (match_operator:SI 1 "aarch64_comparison_operator" +- [(match_operand:GPI 2 "register_operand" "") +- (match_operand:GPI 3 "aarch64_plus_operand" "")]))] ++ [(match_operand:GPI 2 "register_operand") ++ (match_operand:GPI 3 "aarch64_plus_operand")]))] + "" + " + operands[2] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2], +@@ -3914,10 +4000,10 @@ + + + (define_expand "cstore4" +- [(set (match_operand:SI 0 "register_operand" "") ++ [(set (match_operand:SI 0 "register_operand") + (match_operator:SI 1 "aarch64_comparison_operator_mode" +- [(match_operand:GPF 2 "register_operand" "") +- (match_operand:GPF 3 "aarch64_fp_compare_operand" "")]))] ++ [(match_operand:GPF 2 "register_operand") ++ (match_operand:GPF 3 "aarch64_fp_compare_operand")]))] + "" + " + operands[2] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2], +@@ -4002,13 +4088,13 @@ + ) + + (define_expand "cmov6" +- [(set (match_operand:GPI 0 "register_operand" "") ++ [(set (match_operand:GPI 0 "register_operand") + (if_then_else:GPI + (match_operator 1 "aarch64_comparison_operator" +- [(match_operand:GPI 2 "register_operand" "") +- (match_operand:GPI 3 "aarch64_plus_operand" "")]) +- (match_operand:GPI 4 "register_operand" "") +- (match_operand:GPI 5 "register_operand" "")))] ++ [(match_operand:GPI 2 "register_operand") ++ (match_operand:GPI 3 "aarch64_plus_operand")]) ++ (match_operand:GPI 4 "register_operand") ++ (match_operand:GPI 5 "register_operand")))] + "" + " + operands[2] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2], +@@ -4018,13 +4104,13 @@ + ) + + (define_expand "cmov6" +- [(set (match_operand:GPF 0 "register_operand" "") ++ [(set (match_operand:GPF 0 "register_operand") + (if_then_else:GPF + (match_operator 1 "aarch64_comparison_operator" +- [(match_operand:GPF 2 "register_operand" "") +- (match_operand:GPF 3 "aarch64_fp_compare_operand" "")]) +- (match_operand:GPF 4 "register_operand" "") +- (match_operand:GPF 5 "register_operand" "")))] ++ [(match_operand:GPF 2 "register_operand") ++ (match_operand:GPF 3 "aarch64_fp_compare_operand")]) ++ (match_operand:GPF 4 "register_operand") ++ (match_operand:GPF 5 "register_operand")))] + "" + " + operands[2] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2], +@@ -4102,10 +4188,10 @@ + ) + + (define_expand "movcc" +- [(set (match_operand:ALLI 0 "register_operand" "") +- (if_then_else:ALLI (match_operand 1 "aarch64_comparison_operator" "") +- (match_operand:ALLI 2 "register_operand" "") +- (match_operand:ALLI 3 "register_operand" "")))] ++ [(set (match_operand:ALLI 0 "register_operand") ++ (if_then_else:ALLI (match_operand 1 "aarch64_comparison_operator") ++ (match_operand:ALLI 2 "register_operand") ++ (match_operand:ALLI 3 "register_operand")))] + "" + { + rtx ccreg; +@@ -4121,10 +4207,10 @@ + ) + + (define_expand "movcc" +- [(set (match_operand:GPI 0 "register_operand" "") +- (if_then_else:GPI (match_operand 1 "aarch64_comparison_operator" "") +- (match_operand:GPF 2 "register_operand" "") +- (match_operand:GPF 3 "register_operand" "")))] ++ [(set (match_operand:GPI 0 "register_operand") ++ (if_then_else:GPI (match_operand 1 "aarch64_comparison_operator") ++ (match_operand:GPF 2 "register_operand") ++ (match_operand:GPF 3 "register_operand")))] + "" + { + rtx ccreg; +@@ -4140,10 +4226,10 @@ + ) + + (define_expand "movcc" +- [(set (match_operand:GPF 0 "register_operand" "") +- (if_then_else:GPF (match_operand 1 "aarch64_comparison_operator" "") +- (match_operand:GPF 2 "register_operand" "") +- (match_operand:GPF 3 "register_operand" "")))] ++ [(set (match_operand:GPF 0 "register_operand") ++ (if_then_else:GPF (match_operand 1 "aarch64_comparison_operator") ++ (match_operand:GPF 2 "register_operand") ++ (match_operand:GPF 3 "register_operand")))] + "" + { + rtx ccreg; +@@ -4159,10 +4245,10 @@ + ) + + (define_expand "cc" +- [(set (match_operand:GPI 0 "register_operand" "") +- (if_then_else:GPI (match_operand 1 "aarch64_comparison_operator" "") +- (NEG_NOT:GPI (match_operand:GPI 2 "register_operand" "")) +- (match_operand:GPI 3 "register_operand" "")))] ++ [(set (match_operand:GPI 0 "register_operand") ++ (if_then_else:GPI (match_operand 1 "aarch64_comparison_operator") ++ (NEG_NOT:GPI (match_operand:GPI 2 "register_operand")) ++ (match_operand:GPI 3 "register_operand")))] + "" + { + rtx ccreg; +@@ -4769,7 +4855,7 @@ + [(set_attr "type" "alus_imm")] + ) + +-(define_insn "*ands_compare0" ++(define_insn "*ands_compare0" + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ + (zero_extend:GPI (match_operand:SHORT 1 "register_operand" "r")) +@@ -5391,7 +5477,7 @@ + ;; ------------------------------------------------------------------- + + (define_expand "" +- [(set (match_operand:DI 0 "register_operand" "=r") ++ [(set (match_operand:DI 0 "register_operand") + (ANY_EXTRACT:DI (match_operand:DI 1 "register_operand") + (match_operand 2 + "aarch64_simd_shift_imm_offset_di") +@@ -5647,6 +5733,21 @@ + [(set_attr "type" "bfx")] + ) + ++(define_insn "*ashiftsi_extvdi_bfiz" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (ashift:SI ++ (match_operator:SI 4 "subreg_lowpart_operator" ++ [(sign_extract:DI ++ (match_operand:DI 1 "register_operand" "r") ++ (match_operand 2 "aarch64_simd_shift_imm_offset_si") ++ (const_int 0))]) ++ (match_operand 3 "aarch64_simd_shift_imm_si")))] ++ "IN_RANGE (INTVAL (operands[2]) + INTVAL (operands[3]), ++ 1, GET_MODE_BITSIZE (SImode) - 1)" ++ "sbfiz\\t%w0, %w1, %3, %2" ++ [(set_attr "type" "bfx")] ++) ++ + ;; When the bit position and width of the equivalent extraction add up to 32 + ;; we can use a W-reg LSL instruction taking advantage of the implicit + ;; zero-extension of the X-reg. +@@ -6008,6 +6109,44 @@ + [(set_attr "type" "f_cvtf2i")] + ) + ++;; Equal width integer to fp and multiply combine. ++(define_insn "*aarch64_cvtf2_mult" ++ [(set (match_operand:GPF 0 "register_operand" "=w,w") ++ (mult:GPF (FLOATUORS:GPF ++ (match_operand: 1 "register_operand" "w,?r")) ++ (match_operand:GPF 2 "aarch64_fp_pow2_recip" "Dt,Dt")))] ++ "TARGET_FLOAT" ++ { ++ operands[2] = GEN_INT (aarch64_fpconst_pow2_recip (operands[2])); ++ switch (which_alternative) ++ { ++ case 0: ++ return "cvtf\t%0, %1, #%2"; ++ case 1: ++ return "cvtf\t%0, %1, #%2"; ++ default: ++ gcc_unreachable (); ++ } ++ } ++ [(set_attr "type" "neon_int_to_fp_,f_cvti2f") ++ (set_attr "arch" "simd,fp")] ++) ++ ++;; Unequal width integer to fp and multiply combine. ++(define_insn "*aarch64_cvtf2_mult" ++ [(set (match_operand:GPF 0 "register_operand" "=w") ++ (mult:GPF (FLOATUORS:GPF ++ (match_operand: 1 "register_operand" "r")) ++ (match_operand:GPF 2 "aarch64_fp_pow2_recip" "Dt")))] ++ "TARGET_FLOAT" ++ { ++ operands[2] = GEN_INT (aarch64_fpconst_pow2_recip (operands[2])); ++ return "cvtf\t%0, %1, #%2"; ++ } ++ [(set_attr "type" "f_cvti2f")] ++) ++ ++;; Equal width integer to fp conversion. + (define_insn "2" + [(set (match_operand:GPF 0 "register_operand" "=w,w") + (FLOATUORS:GPF (match_operand: 1 "register_operand" "w,?r")))] +@@ -6019,6 +6158,7 @@ + (set_attr "arch" "simd,fp")] + ) + ++;; Unequal width integer to fp conversions. + (define_insn "2" + [(set (match_operand:GPF 0 "register_operand" "=w") + (FLOATUORS:GPF (match_operand: 1 "register_operand" "r")))] +@@ -6241,8 +6381,8 @@ + ) + + (define_expand "sqrt2" +- [(set (match_operand:GPF_F16 0 "register_operand" "=w") +- (sqrt:GPF_F16 (match_operand:GPF_F16 1 "register_operand" "w")))] ++ [(set (match_operand:GPF_F16 0 "register_operand") ++ (sqrt:GPF_F16 (match_operand:GPF_F16 1 "register_operand")))] + "TARGET_FLOAT" + { + if (aarch64_emit_approx_sqrt (operands[0], operands[1], false)) +@@ -6401,6 +6541,7 @@ + ;; ------------------------------------------------------------------- + ;; Reload Scalar Floating point modes from constant pool. + ;; The AArch64 port doesn't have __int128 constant move support. ++;; The patterns need constraints due to TARGET_SECONDARY_RELOAD hook. + (define_expand "@aarch64_reload_movcp" + [(set (match_operand:GPF_TF 0 "register_operand" "=w") + (mem:GPF_TF (match_operand 1 "aarch64_constant_pool_symref" "S"))) +@@ -6501,9 +6642,9 @@ + ;; rodata section. + + (define_expand "add_losym" +- [(set (match_operand 0 "register_operand" "=r") +- (lo_sum (match_operand 1 "register_operand" "r") +- (match_operand 2 "aarch64_valid_symref" "S")))] ++ [(set (match_operand 0 "register_operand") ++ (lo_sum (match_operand 1 "register_operand") ++ (match_operand 2 "aarch64_valid_symref")))] + "" + { + machine_mode mode = GET_MODE (operands[0]); +@@ -6602,9 +6743,10 @@ + ;; instructions in the TLS stubs, in order to enable linker relaxation. + ;; Therefore we treat the stubs as an atomic sequence. + (define_expand "tlsgd_small_" +- [(parallel [(set (match_operand 0 "register_operand" "") ++ [(parallel [(set (match_operand 0 "register_operand") + (call (mem:DI (match_dup 2)) (const_int 1))) +- (unspec:DI [(match_operand:PTR 1 "aarch64_valid_symref" "")] UNSPEC_GOTSMALLTLS) ++ (unspec:DI [(const_int 0)] UNSPEC_CALLEE_ABI) ++ (unspec:DI [(match_operand:PTR 1 "aarch64_valid_symref")] UNSPEC_GOTSMALLTLS) + (clobber (reg:DI LR_REGNUM))])] + "" + { +@@ -6614,6 +6756,7 @@ + (define_insn "*tlsgd_small_" + [(set (match_operand 0 "register_operand" "") + (call (mem:DI (match_operand:DI 2 "" "")) (const_int 1))) ++ (unspec:DI [(const_int 0)] UNSPEC_CALLEE_ABI) + (unspec:DI [(match_operand:PTR 1 "aarch64_valid_symref" "S")] UNSPEC_GOTSMALLTLS) + (clobber (reg:DI LR_REGNUM)) + ] +@@ -6714,7 +6857,12 @@ + "TARGET_TLS_DESC" + { + if (TARGET_SVE) +- emit_insn (gen_tlsdesc_small_sve_ (operands[0])); ++ { ++ rtx abi = gen_int_mode (aarch64_tlsdesc_abi_id (), DImode); ++ rtx_insn *call ++ = emit_call_insn (gen_tlsdesc_small_sve_ (operands[0], abi)); ++ RTL_CONST_CALL_P (call) = 1; ++ } + else + emit_insn (gen_tlsdesc_small_advsimd_ (operands[0])); + DONE; +@@ -6729,72 +6877,27 @@ + UNSPEC_TLSDESC)) + (clobber (reg:DI LR_REGNUM)) + (clobber (reg:CC CC_REGNUM)) +- (clobber (match_scratch:DI 1 "=r"))] ++ (clobber (match_scratch:DI 1 "=r")) ++ (use (reg:DI FP_REGNUM))] + "TARGET_TLS_DESC && !TARGET_SVE" + "adrp\\tx0, %A0\;ldr\\t%1, [x0, #%L0]\;add\\t0, 0, %L0\;.tlsdesccall\\t%0\;blr\\t%1" + [(set_attr "type" "call") + (set_attr "length" "16")]) + +-;; For SVE, model tlsdesc calls as clobbering the lower 128 bits of +-;; all vector registers, and clobber all predicate registers, on +-;; top of the usual R0 and LR. ++;; For SVE, model tlsdesc calls as normal calls, with the callee ABI ++;; describing the extra call-preserved guarantees. This would work ++;; for non-SVE too, but avoiding a call is probably better if we can. + (define_insn "tlsdesc_small_sve_" + [(set (reg:PTR R0_REGNUM) +- (unspec:PTR [(match_operand 0 "aarch64_valid_symref" "S")] +- UNSPEC_TLSDESC)) ++ (call (mem:DI (unspec:PTR ++ [(match_operand 0 "aarch64_valid_symref")] ++ UNSPEC_TLSDESC)) ++ (const_int 0))) ++ (unspec:DI [(match_operand:DI 1 "const_int_operand")] UNSPEC_CALLEE_ABI) + (clobber (reg:DI LR_REGNUM)) +- (clobber (reg:CC CC_REGNUM)) +- (clobber_high (reg:TI V0_REGNUM)) +- (clobber_high (reg:TI V1_REGNUM)) +- (clobber_high (reg:TI V2_REGNUM)) +- (clobber_high (reg:TI V3_REGNUM)) +- (clobber_high (reg:TI V4_REGNUM)) +- (clobber_high (reg:TI V5_REGNUM)) +- (clobber_high (reg:TI V6_REGNUM)) +- (clobber_high (reg:TI V7_REGNUM)) +- (clobber_high (reg:TI V8_REGNUM)) +- (clobber_high (reg:TI V9_REGNUM)) +- (clobber_high (reg:TI V10_REGNUM)) +- (clobber_high (reg:TI V11_REGNUM)) +- (clobber_high (reg:TI V12_REGNUM)) +- (clobber_high (reg:TI V13_REGNUM)) +- (clobber_high (reg:TI V14_REGNUM)) +- (clobber_high (reg:TI V15_REGNUM)) +- (clobber_high (reg:TI V16_REGNUM)) +- (clobber_high (reg:TI V17_REGNUM)) +- (clobber_high (reg:TI V18_REGNUM)) +- (clobber_high (reg:TI V19_REGNUM)) +- (clobber_high (reg:TI V20_REGNUM)) +- (clobber_high (reg:TI V21_REGNUM)) +- (clobber_high (reg:TI V22_REGNUM)) +- (clobber_high (reg:TI V23_REGNUM)) +- (clobber_high (reg:TI V24_REGNUM)) +- (clobber_high (reg:TI V25_REGNUM)) +- (clobber_high (reg:TI V26_REGNUM)) +- (clobber_high (reg:TI V27_REGNUM)) +- (clobber_high (reg:TI V28_REGNUM)) +- (clobber_high (reg:TI V29_REGNUM)) +- (clobber_high (reg:TI V30_REGNUM)) +- (clobber_high (reg:TI V31_REGNUM)) +- (clobber (reg:VNx2BI P0_REGNUM)) +- (clobber (reg:VNx2BI P1_REGNUM)) +- (clobber (reg:VNx2BI P2_REGNUM)) +- (clobber (reg:VNx2BI P3_REGNUM)) +- (clobber (reg:VNx2BI P4_REGNUM)) +- (clobber (reg:VNx2BI P5_REGNUM)) +- (clobber (reg:VNx2BI P6_REGNUM)) +- (clobber (reg:VNx2BI P7_REGNUM)) +- (clobber (reg:VNx2BI P8_REGNUM)) +- (clobber (reg:VNx2BI P9_REGNUM)) +- (clobber (reg:VNx2BI P10_REGNUM)) +- (clobber (reg:VNx2BI P11_REGNUM)) +- (clobber (reg:VNx2BI P12_REGNUM)) +- (clobber (reg:VNx2BI P13_REGNUM)) +- (clobber (reg:VNx2BI P14_REGNUM)) +- (clobber (reg:VNx2BI P15_REGNUM)) +- (clobber (match_scratch:DI 1 "=r"))] ++ (clobber (match_scratch:DI 2 "=r"))] + "TARGET_TLS_DESC && TARGET_SVE" +- "adrp\\tx0, %A0\;ldr\\t%1, [x0, #%L0]\;add\\t0, 0, %L0\;.tlsdesccall\\t%0\;blr\\t%1" ++ "adrp\\tx0, %A0\;ldr\\t%2, [x0, #%L0]\;add\\t0, 0, %L0\;.tlsdesccall\\t%0\;blr\\t%2" + [(set_attr "type" "call") + (set_attr "length" "16")]) + +@@ -6808,6 +6911,15 @@ + [(set_attr "length" "0")] + ) + ++(define_insn "aarch64_fjcvtzs" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec:SI [(match_operand:DF 1 "register_operand" "w")] ++ UNSPEC_FJCVTZS))] ++ "TARGET_JSCVT" ++ "fjcvtzs\\t%w0, %d1" ++ [(set_attr "type" "f_cvtf2i")] ++) ++ + ;; Pointer authentication patterns are always provided. In architecture + ;; revisions prior to ARMv8.3-A these HINT instructions operate as NOPs. + ;; This lets the user write portable software which authenticates pointers +@@ -6821,7 +6933,7 @@ + [(set (reg:DI R30_REGNUM) + (unspec:DI [(reg:DI R30_REGNUM) (reg:DI SP_REGNUM)] PAUTH_LR_SP))] + "" +- "hint\t // asp"; ++ "hint\t // sp"; + ) + + ;; Signing/Authenticating X17 using X16 as the salt. +@@ -6830,7 +6942,7 @@ + [(set (reg:DI R17_REGNUM) + (unspec:DI [(reg:DI R17_REGNUM) (reg:DI R16_REGNUM)] PAUTH_17_16))] + "" +- "hint\t // a1716"; ++ "hint\t // 1716"; + ) + + ;; Stripping the signature in R30. +@@ -6885,7 +6997,7 @@ + + ;; Named pattern for expanding thread pointer reference. + (define_expand "get_thread_pointerdi" +- [(match_operand:DI 0 "register_operand" "=r")] ++ [(match_operand:DI 0 "register_operand")] + "" + { + rtx tmp = aarch64_load_tp (operands[0]); +@@ -6941,13 +7053,15 @@ + } + [(set_attr "type" "mrs")]) + ++;; DO NOT SPLIT THIS PATTERN. It is important for security reasons that the ++;; canary value does not live beyond the life of this sequence. + (define_insn "stack_protect_set_" + [(set (match_operand:PTR 0 "memory_operand" "=m") + (unspec:PTR [(match_operand:PTR 1 "memory_operand" "m")] + UNSPEC_SP_SET)) + (set (match_scratch:PTR 2 "=&r") (const_int 0))] + "" +- "ldr\\t%2, %1\;str\\t%2, %0\;mov\t%2,0" ++ "ldr\\t%2, %1\;str\\t%2, %0\;mov\t%2, 0" + [(set_attr "length" "12") + (set_attr "type" "multiple")]) + +@@ -7122,12 +7236,6 @@ + [(set_attr "type" "no_insn")] + ) + +-;; Helper for aarch64.c code. +-(define_expand "set_clobber_cc" +- [(parallel [(set (match_operand 0) +- (match_operand 1)) +- (clobber (reg:CC CC_REGNUM))])]) +- + ;; Hard speculation barrier. + (define_insn "speculation_barrier" + [(unspec_volatile [(const_int 0)] UNSPECV_SPECULATION_BARRIER)] +@@ -7142,10 +7250,10 @@ + ;; tracking enabled. Use the speculation tracker to decide whether to + ;; copy operand 1 to the target, or to copy the fail value (operand 2). + (define_expand "@despeculate_copy" +- [(set (match_operand:ALLI_TI 0 "register_operand" "=r") ++ [(set (match_operand:ALLI_TI 0 "register_operand") + (unspec_volatile:ALLI_TI +- [(match_operand:ALLI_TI 1 "register_operand" "r") +- (match_operand:ALLI_TI 2 "aarch64_reg_or_zero" "rZ") ++ [(match_operand:ALLI_TI 1 "register_operand") ++ (match_operand:ALLI_TI 2 "aarch64_reg_or_zero") + (use (reg:DI SPECULATION_TRACKER_REGNUM)) + (clobber (reg:CC CC_REGNUM))] UNSPECV_SPECULATION_BARRIER))] + "" +@@ -7235,6 +7343,73 @@ + (set_attr "speculation_barrier" "true")] + ) + ++(define_insn "aarch64_" ++ [(set (match_operand:VSFDF 0 "register_operand" "=w") ++ (unspec:VSFDF [(match_operand:VSFDF 1 "register_operand" "w")] ++ FRINTNZX))] ++ "TARGET_FRINT && TARGET_FLOAT ++ && !(VECTOR_MODE_P (mode) && !TARGET_SIMD)" ++ "\\t%0, %1" ++ [(set_attr "type" "f_rint")] ++) ++ ++;; Transactional Memory Extension (TME) instructions. ++ ++(define_insn "tstart" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (unspec_volatile:DI [(const_int 0)] UNSPECV_TSTART)) ++ (clobber (mem:BLK (scratch)))] ++ "TARGET_TME" ++ "tstart\\t%0" ++ [(set_attr "type" "tme")] ++) ++ ++(define_insn "ttest" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (unspec_volatile:DI [(const_int 0)] UNSPEC_TTEST)) ++ (clobber (mem:BLK (scratch)))] ++ "TARGET_TME" ++ "ttest\\t%0" ++ [(set_attr "type" "tme")] ++) ++ ++(define_insn "tcommit" ++ [(unspec_volatile:BLK [(const_int 0)] UNSPECV_TCOMMIT) ++ (clobber (mem:BLK (scratch)))] ++ "TARGET_TME" ++ "tcommit" ++ [(set_attr "type" "tme")] ++) ++ ++(define_insn "tcancel" ++ [(unspec_volatile:BLK ++ [(match_operand 0 "const_int_operand" "n")] UNSPECV_TCANCEL) ++ (clobber (mem:BLK (scratch)))] ++ "TARGET_TME && (UINTVAL (operands[0]) <= 65535)" ++ "tcancel\\t#%0" ++ [(set_attr "type" "tme")] ++) ++ ++(define_insn "aarch64_rndr" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (unspec_volatile:DI [(const_int 0)] UNSPEC_RNDR)) ++ (set (reg:CC_Z CC_REGNUM) ++ (unspec_volatile:CC_Z [(const_int 0)] UNSPEC_RNDR))] ++ "TARGET_RNG" ++ "mrs\t%0, RNDR" ++ [(set_attr "type" "mrs")] ++) ++ ++(define_insn "aarch64_rndrrs" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (unspec_volatile:DI [(const_int 0)] UNSPEC_RNDRRS)) ++ (set (reg:CC_Z CC_REGNUM) ++ (unspec_volatile:CC_Z [(const_int 0)] UNSPEC_RNDRRS))] ++ "TARGET_RNG" ++ "mrs\t%0, RNDRRS" ++ [(set_attr "type" "mrs")] ++) ++ + ;; AdvSIMD Stuff + (include "aarch64-simd.md") + +diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt +index d2cb41be6..e2be8ff6f 100644 +--- a/gcc/config/aarch64/aarch64.opt ++++ b/gcc/config/aarch64/aarch64.opt +@@ -31,7 +31,7 @@ TargetSave + const char *x_aarch64_override_tune_string + + TargetVariable +-unsigned long aarch64_isa_flags = 0 ++uint64_t aarch64_isa_flags = 0 + + TargetVariable + unsigned aarch64_enable_bti = 2 +@@ -261,3 +261,6 @@ user-land code. + TargetVariable + long aarch64_stack_protector_guard_offset = 0 + ++moutline-atomics ++Target Report Mask(OUTLINE_ATOMICS) Save ++Generate local calls to out-of-line atomic operations. +diff --git a/gcc/config/aarch64/arm_acle.h b/gcc/config/aarch64/arm_acle.h +index 534a989c3..2284e7164 100644 +--- a/gcc/config/aarch64/arm_acle.h ++++ b/gcc/config/aarch64/arm_acle.h +@@ -29,14 +29,77 @@ + + #include + +-#pragma GCC push_options +- +-#pragma GCC target ("+nothing+crc") +- + #ifdef __cplusplus + extern "C" { + #endif + ++#pragma GCC push_options ++#pragma GCC target ("arch=armv8.3-a") ++__extension__ static __inline int32_t __attribute__ ((__always_inline__)) ++__jcvt (double __a) ++{ ++ return __builtin_aarch64_jcvtzs (__a); ++} ++ ++#pragma GCC pop_options ++ ++#pragma GCC push_options ++#pragma GCC target ("arch=armv8.5-a") ++__extension__ static __inline float __attribute__ ((__always_inline__)) ++__rint32zf (float __a) ++{ ++ return __builtin_aarch64_frint32zsf (__a); ++} ++ ++__extension__ static __inline double __attribute__ ((__always_inline__)) ++__rint32z (double __a) ++{ ++ return __builtin_aarch64_frint32zdf (__a); ++} ++ ++__extension__ static __inline float __attribute__ ((__always_inline__)) ++__rint64zf (float __a) ++{ ++ return __builtin_aarch64_frint64zsf (__a); ++} ++ ++__extension__ static __inline double __attribute__ ((__always_inline__)) ++__rint64z (double __a) ++{ ++ return __builtin_aarch64_frint64zdf (__a); ++} ++ ++__extension__ static __inline float __attribute__ ((__always_inline__)) ++__rint32xf (float __a) ++{ ++ return __builtin_aarch64_frint32xsf (__a); ++} ++ ++__extension__ static __inline double __attribute__ ((__always_inline__)) ++__rint32x (double __a) ++{ ++ return __builtin_aarch64_frint32xdf (__a); ++} ++ ++__extension__ static __inline float __attribute__ ((__always_inline__)) ++__rint64xf (float __a) ++{ ++ return __builtin_aarch64_frint64xsf (__a); ++} ++ ++__extension__ static __inline double __attribute__ ((__always_inline__)) ++__rint64x (double __a) ++{ ++ return __builtin_aarch64_frint64xdf (__a); ++} ++ ++ ++#pragma GCC pop_options ++ ++#pragma GCC push_options ++ ++#pragma GCC target ("+nothing+crc") ++ + __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) + __crc32b (uint32_t __a, uint8_t __b) + { +@@ -85,10 +148,69 @@ __crc32d (uint32_t __a, uint64_t __b) + return __builtin_aarch64_crc32x (__a, __b); + } + +-#ifdef __cplusplus ++#pragma GCC pop_options ++ ++#ifdef __ARM_FEATURE_TME ++#pragma GCC push_options ++#pragma GCC target ("+nothing+tme") ++ ++#define _TMFAILURE_REASON 0x00007fffu ++#define _TMFAILURE_RTRY 0x00008000u ++#define _TMFAILURE_CNCL 0x00010000u ++#define _TMFAILURE_MEM 0x00020000u ++#define _TMFAILURE_IMP 0x00040000u ++#define _TMFAILURE_ERR 0x00080000u ++#define _TMFAILURE_SIZE 0x00100000u ++#define _TMFAILURE_NEST 0x00200000u ++#define _TMFAILURE_DBG 0x00400000u ++#define _TMFAILURE_INT 0x00800000u ++#define _TMFAILURE_TRIVIAL 0x01000000u ++ ++__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) ++__tstart (void) ++{ ++ return __builtin_aarch64_tstart (); ++} ++ ++__extension__ static __inline void __attribute__ ((__always_inline__)) ++__tcommit (void) ++{ ++ __builtin_aarch64_tcommit (); ++} ++ ++__extension__ static __inline void __attribute__ ((__always_inline__)) ++__tcancel (const uint64_t __reason) ++{ ++ __builtin_aarch64_tcancel (__reason); + } ++ ++__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) ++__ttest (void) ++{ ++ return __builtin_aarch64_ttest (); ++} ++ ++#pragma GCC pop_options + #endif + ++#pragma GCC push_options ++#pragma GCC target ("+nothing+rng") ++__extension__ static __inline int __attribute__ ((__always_inline__)) ++__rndr (uint64_t *__res) ++{ ++ return __builtin_aarch64_rndr (__res); ++} ++ ++__extension__ static __inline int __attribute__ ((__always_inline__)) ++__rndrrs (uint64_t *__res) ++{ ++ return __builtin_aarch64_rndrrs (__res); ++} ++ + #pragma GCC pop_options + ++#ifdef __cplusplus ++} ++#endif ++ + #endif +diff --git a/gcc/config/aarch64/arm_bf16.h b/gcc/config/aarch64/arm_bf16.h +new file mode 100644 +index 000000000..984875dcc +--- /dev/null ++++ b/gcc/config/aarch64/arm_bf16.h +@@ -0,0 +1,45 @@ ++/* Arm BF16 instrinsics include file. ++ ++ Copyright (C) 2019-2020 Free Software Foundation, Inc. ++ Contributed by Arm. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published ++ by the Free Software Foundation; either version 3, or (at your ++ option) any later version. ++ ++ GCC is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++ License for more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++#ifndef _AARCH64_BF16_H_ ++#define _AARCH64_BF16_H_ ++ ++typedef __bf16 bfloat16_t; ++typedef float float32_t; ++ ++#pragma GCC push_options ++#pragma GCC target ("+nothing+bf16+nosimd") ++ ++__extension__ extern __inline bfloat16_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vcvth_bf16_f32 (float32_t __a) ++{ ++ return __builtin_aarch64_bfcvtbf (__a); ++} ++ ++#pragma GCC pop_options ++ ++#endif +diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h +index 314ef3018..7435905ff 100644 +--- a/gcc/config/aarch64/arm_neon.h ++++ b/gcc/config/aarch64/arm_neon.h +@@ -73,6 +73,39 @@ typedef __fp16 float16_t; + typedef float float32_t; + typedef double float64_t; + ++typedef __Bfloat16x4_t bfloat16x4_t; ++typedef __Bfloat16x8_t bfloat16x8_t; ++ ++typedef struct bfloat16x4x2_t ++{ ++ bfloat16x4_t val[2]; ++} bfloat16x4x2_t; ++ ++typedef struct bfloat16x8x2_t ++{ ++ bfloat16x8_t val[2]; ++} bfloat16x8x2_t; ++ ++typedef struct bfloat16x4x3_t ++{ ++ bfloat16x4_t val[3]; ++} bfloat16x4x3_t; ++ ++typedef struct bfloat16x8x3_t ++{ ++ bfloat16x8_t val[3]; ++} bfloat16x8x3_t; ++ ++typedef struct bfloat16x4x4_t ++{ ++ bfloat16x4_t val[4]; ++} bfloat16x4x4_t; ++ ++typedef struct bfloat16x8x4_t ++{ ++ bfloat16x8_t val[4]; ++} bfloat16x8x4_t; ++ + typedef struct int8x8x2_t + { + int8x8_t val[2]; +@@ -6572,867 +6605,867 @@ vcombine_p64 (poly64x1_t __a, poly64x1_t __b) + + __extension__ extern __inline int8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vaba_s8 (int8x8_t a, int8x8_t b, int8x8_t c) ++vaba_s8 (int8x8_t __a, int8x8_t __b, int8x8_t __c) + { +- int8x8_t result; ++ int8x8_t __result; + __asm__ ("saba %0.8b,%2.8b,%3.8b" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int16x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vaba_s16 (int16x4_t a, int16x4_t b, int16x4_t c) ++vaba_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c) + { +- int16x4_t result; ++ int16x4_t __result; + __asm__ ("saba %0.4h,%2.4h,%3.4h" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int32x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vaba_s32 (int32x2_t a, int32x2_t b, int32x2_t c) ++vaba_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c) + { +- int32x2_t result; ++ int32x2_t __result; + __asm__ ("saba %0.2s,%2.2s,%3.2s" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vaba_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c) ++vaba_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) + { +- uint8x8_t result; ++ uint8x8_t __result; + __asm__ ("uaba %0.8b,%2.8b,%3.8b" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint16x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vaba_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c) ++vaba_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) + { +- uint16x4_t result; ++ uint16x4_t __result; + __asm__ ("uaba %0.4h,%2.4h,%3.4h" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint32x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vaba_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c) ++vaba_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c) + { +- uint32x2_t result; ++ uint32x2_t __result; + __asm__ ("uaba %0.2s,%2.2s,%3.2s" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vabal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c) ++vabal_high_s8 (int16x8_t __a, int8x16_t __b, int8x16_t __c) + { +- int16x8_t result; ++ int16x8_t __result; + __asm__ ("sabal2 %0.8h,%2.16b,%3.16b" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vabal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c) ++vabal_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c) + { +- int32x4_t result; ++ int32x4_t __result; + __asm__ ("sabal2 %0.4s,%2.8h,%3.8h" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vabal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c) ++vabal_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c) + { +- int64x2_t result; ++ int64x2_t __result; + __asm__ ("sabal2 %0.2d,%2.4s,%3.4s" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vabal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c) ++vabal_high_u8 (uint16x8_t __a, uint8x16_t __b, uint8x16_t __c) + { +- uint16x8_t result; ++ uint16x8_t __result; + __asm__ ("uabal2 %0.8h,%2.16b,%3.16b" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vabal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c) ++vabal_high_u16 (uint32x4_t __a, uint16x8_t __b, uint16x8_t __c) + { +- uint32x4_t result; ++ uint32x4_t __result; + __asm__ ("uabal2 %0.4s,%2.8h,%3.8h" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vabal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c) ++vabal_high_u32 (uint64x2_t __a, uint32x4_t __b, uint32x4_t __c) + { +- uint64x2_t result; ++ uint64x2_t __result; + __asm__ ("uabal2 %0.2d,%2.4s,%3.4s" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vabal_s8 (int16x8_t a, int8x8_t b, int8x8_t c) ++vabal_s8 (int16x8_t __a, int8x8_t __b, int8x8_t __c) + { +- int16x8_t result; ++ int16x8_t __result; + __asm__ ("sabal %0.8h,%2.8b,%3.8b" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vabal_s16 (int32x4_t a, int16x4_t b, int16x4_t c) ++vabal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c) + { +- int32x4_t result; ++ int32x4_t __result; + __asm__ ("sabal %0.4s,%2.4h,%3.4h" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vabal_s32 (int64x2_t a, int32x2_t b, int32x2_t c) ++vabal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c) + { +- int64x2_t result; ++ int64x2_t __result; + __asm__ ("sabal %0.2d,%2.2s,%3.2s" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vabal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c) ++vabal_u8 (uint16x8_t __a, uint8x8_t __b, uint8x8_t __c) + { +- uint16x8_t result; ++ uint16x8_t __result; + __asm__ ("uabal %0.8h,%2.8b,%3.8b" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vabal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c) ++vabal_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c) + { +- uint32x4_t result; ++ uint32x4_t __result; + __asm__ ("uabal %0.4s,%2.4h,%3.4h" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vabal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c) ++vabal_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c) + { +- uint64x2_t result; ++ uint64x2_t __result; + __asm__ ("uabal %0.2d,%2.2s,%3.2s" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vabaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c) ++vabaq_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c) + { +- int8x16_t result; ++ int8x16_t __result; + __asm__ ("saba %0.16b,%2.16b,%3.16b" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vabaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c) ++vabaq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c) + { +- int16x8_t result; ++ int16x8_t __result; + __asm__ ("saba %0.8h,%2.8h,%3.8h" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vabaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c) ++vabaq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c) + { +- int32x4_t result; ++ int32x4_t __result; + __asm__ ("saba %0.4s,%2.4s,%3.4s" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vabaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c) ++vabaq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) + { +- uint8x16_t result; ++ uint8x16_t __result; + __asm__ ("uaba %0.16b,%2.16b,%3.16b" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vabaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c) ++vabaq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) + { +- uint16x8_t result; ++ uint16x8_t __result; + __asm__ ("uaba %0.8h,%2.8h,%3.8h" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vabaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c) ++vabaq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) + { +- uint32x4_t result; ++ uint32x4_t __result; + __asm__ ("uaba %0.4s,%2.4s,%3.4s" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vabd_s8 (int8x8_t a, int8x8_t b) ++vabd_s8 (int8x8_t __a, int8x8_t __b) + { +- int8x8_t result; ++ int8x8_t __result; + __asm__ ("sabd %0.8b, %1.8b, %2.8b" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int16x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vabd_s16 (int16x4_t a, int16x4_t b) ++vabd_s16 (int16x4_t __a, int16x4_t __b) + { +- int16x4_t result; ++ int16x4_t __result; + __asm__ ("sabd %0.4h, %1.4h, %2.4h" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int32x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vabd_s32 (int32x2_t a, int32x2_t b) ++vabd_s32 (int32x2_t __a, int32x2_t __b) + { +- int32x2_t result; ++ int32x2_t __result; + __asm__ ("sabd %0.2s, %1.2s, %2.2s" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vabd_u8 (uint8x8_t a, uint8x8_t b) ++vabd_u8 (uint8x8_t __a, uint8x8_t __b) + { +- uint8x8_t result; ++ uint8x8_t __result; + __asm__ ("uabd %0.8b, %1.8b, %2.8b" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint16x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vabd_u16 (uint16x4_t a, uint16x4_t b) ++vabd_u16 (uint16x4_t __a, uint16x4_t __b) + { +- uint16x4_t result; ++ uint16x4_t __result; + __asm__ ("uabd %0.4h, %1.4h, %2.4h" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint32x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vabd_u32 (uint32x2_t a, uint32x2_t b) ++vabd_u32 (uint32x2_t __a, uint32x2_t __b) + { +- uint32x2_t result; ++ uint32x2_t __result; + __asm__ ("uabd %0.2s, %1.2s, %2.2s" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vabdl_high_s8 (int8x16_t a, int8x16_t b) ++vabdl_high_s8 (int8x16_t __a, int8x16_t __b) + { +- int16x8_t result; ++ int16x8_t __result; + __asm__ ("sabdl2 %0.8h,%1.16b,%2.16b" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vabdl_high_s16 (int16x8_t a, int16x8_t b) ++vabdl_high_s16 (int16x8_t __a, int16x8_t __b) + { +- int32x4_t result; ++ int32x4_t __result; + __asm__ ("sabdl2 %0.4s,%1.8h,%2.8h" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vabdl_high_s32 (int32x4_t a, int32x4_t b) ++vabdl_high_s32 (int32x4_t __a, int32x4_t __b) + { +- int64x2_t result; ++ int64x2_t __result; + __asm__ ("sabdl2 %0.2d,%1.4s,%2.4s" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vabdl_high_u8 (uint8x16_t a, uint8x16_t b) ++vabdl_high_u8 (uint8x16_t __a, uint8x16_t __b) + { +- uint16x8_t result; ++ uint16x8_t __result; + __asm__ ("uabdl2 %0.8h,%1.16b,%2.16b" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vabdl_high_u16 (uint16x8_t a, uint16x8_t b) ++vabdl_high_u16 (uint16x8_t __a, uint16x8_t __b) + { +- uint32x4_t result; ++ uint32x4_t __result; + __asm__ ("uabdl2 %0.4s,%1.8h,%2.8h" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vabdl_high_u32 (uint32x4_t a, uint32x4_t b) ++vabdl_high_u32 (uint32x4_t __a, uint32x4_t __b) + { +- uint64x2_t result; ++ uint64x2_t __result; + __asm__ ("uabdl2 %0.2d,%1.4s,%2.4s" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vabdl_s8 (int8x8_t a, int8x8_t b) ++vabdl_s8 (int8x8_t __a, int8x8_t __b) + { +- int16x8_t result; ++ int16x8_t __result; + __asm__ ("sabdl %0.8h, %1.8b, %2.8b" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vabdl_s16 (int16x4_t a, int16x4_t b) ++vabdl_s16 (int16x4_t __a, int16x4_t __b) + { +- int32x4_t result; ++ int32x4_t __result; + __asm__ ("sabdl %0.4s, %1.4h, %2.4h" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vabdl_s32 (int32x2_t a, int32x2_t b) ++vabdl_s32 (int32x2_t __a, int32x2_t __b) + { +- int64x2_t result; ++ int64x2_t __result; + __asm__ ("sabdl %0.2d, %1.2s, %2.2s" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vabdl_u8 (uint8x8_t a, uint8x8_t b) ++vabdl_u8 (uint8x8_t __a, uint8x8_t __b) + { +- uint16x8_t result; ++ uint16x8_t __result; + __asm__ ("uabdl %0.8h, %1.8b, %2.8b" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vabdl_u16 (uint16x4_t a, uint16x4_t b) ++vabdl_u16 (uint16x4_t __a, uint16x4_t __b) + { +- uint32x4_t result; ++ uint32x4_t __result; + __asm__ ("uabdl %0.4s, %1.4h, %2.4h" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vabdl_u32 (uint32x2_t a, uint32x2_t b) ++vabdl_u32 (uint32x2_t __a, uint32x2_t __b) + { +- uint64x2_t result; ++ uint64x2_t __result; + __asm__ ("uabdl %0.2d, %1.2s, %2.2s" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vabdq_s8 (int8x16_t a, int8x16_t b) ++vabdq_s8 (int8x16_t __a, int8x16_t __b) + { +- int8x16_t result; ++ int8x16_t __result; + __asm__ ("sabd %0.16b, %1.16b, %2.16b" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vabdq_s16 (int16x8_t a, int16x8_t b) ++vabdq_s16 (int16x8_t __a, int16x8_t __b) + { +- int16x8_t result; ++ int16x8_t __result; + __asm__ ("sabd %0.8h, %1.8h, %2.8h" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vabdq_s32 (int32x4_t a, int32x4_t b) ++vabdq_s32 (int32x4_t __a, int32x4_t __b) + { +- int32x4_t result; ++ int32x4_t __result; + __asm__ ("sabd %0.4s, %1.4s, %2.4s" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vabdq_u8 (uint8x16_t a, uint8x16_t b) ++vabdq_u8 (uint8x16_t __a, uint8x16_t __b) + { +- uint8x16_t result; ++ uint8x16_t __result; + __asm__ ("uabd %0.16b, %1.16b, %2.16b" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vabdq_u16 (uint16x8_t a, uint16x8_t b) ++vabdq_u16 (uint16x8_t __a, uint16x8_t __b) + { +- uint16x8_t result; ++ uint16x8_t __result; + __asm__ ("uabd %0.8h, %1.8h, %2.8h" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vabdq_u32 (uint32x4_t a, uint32x4_t b) ++vabdq_u32 (uint32x4_t __a, uint32x4_t __b) + { +- uint32x4_t result; ++ uint32x4_t __result; + __asm__ ("uabd %0.4s, %1.4s, %2.4s" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vaddlv_s8 (int8x8_t a) ++vaddlv_s8 (int8x8_t __a) + { +- int16_t result; ++ int16_t __result; + __asm__ ("saddlv %h0,%1.8b" +- : "=w"(result) +- : "w"(a) ++ : "=w"(__result) ++ : "w"(__a) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int32_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vaddlv_s16 (int16x4_t a) ++vaddlv_s16 (int16x4_t __a) + { +- int32_t result; ++ int32_t __result; + __asm__ ("saddlv %s0,%1.4h" +- : "=w"(result) +- : "w"(a) ++ : "=w"(__result) ++ : "w"(__a) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vaddlv_u8 (uint8x8_t a) ++vaddlv_u8 (uint8x8_t __a) + { +- uint16_t result; ++ uint16_t __result; + __asm__ ("uaddlv %h0,%1.8b" +- : "=w"(result) +- : "w"(a) ++ : "=w"(__result) ++ : "w"(__a) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint32_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vaddlv_u16 (uint16x4_t a) ++vaddlv_u16 (uint16x4_t __a) + { +- uint32_t result; ++ uint32_t __result; + __asm__ ("uaddlv %s0,%1.4h" +- : "=w"(result) +- : "w"(a) ++ : "=w"(__result) ++ : "w"(__a) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vaddlvq_s8 (int8x16_t a) ++vaddlvq_s8 (int8x16_t __a) + { +- int16_t result; ++ int16_t __result; + __asm__ ("saddlv %h0,%1.16b" +- : "=w"(result) +- : "w"(a) ++ : "=w"(__result) ++ : "w"(__a) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int32_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vaddlvq_s16 (int16x8_t a) ++vaddlvq_s16 (int16x8_t __a) + { +- int32_t result; ++ int32_t __result; + __asm__ ("saddlv %s0,%1.8h" +- : "=w"(result) +- : "w"(a) ++ : "=w"(__result) ++ : "w"(__a) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int64_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vaddlvq_s32 (int32x4_t a) ++vaddlvq_s32 (int32x4_t __a) + { +- int64_t result; ++ int64_t __result; + __asm__ ("saddlv %d0,%1.4s" +- : "=w"(result) +- : "w"(a) ++ : "=w"(__result) ++ : "w"(__a) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vaddlvq_u8 (uint8x16_t a) ++vaddlvq_u8 (uint8x16_t __a) + { +- uint16_t result; ++ uint16_t __result; + __asm__ ("uaddlv %h0,%1.16b" +- : "=w"(result) +- : "w"(a) ++ : "=w"(__result) ++ : "w"(__a) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint32_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vaddlvq_u16 (uint16x8_t a) ++vaddlvq_u16 (uint16x8_t __a) + { +- uint32_t result; ++ uint32_t __result; + __asm__ ("uaddlv %s0,%1.8h" +- : "=w"(result) +- : "w"(a) ++ : "=w"(__result) ++ : "w"(__a) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint64_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vaddlvq_u32 (uint32x4_t a) ++vaddlvq_u32 (uint32x4_t __a) + { +- uint64_t result; ++ uint64_t __result; + __asm__ ("uaddlv %d0,%1.4s" +- : "=w"(result) +- : "w"(a) ++ : "=w"(__result) ++ : "w"(__a) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline float32x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vcvtx_f32_f64 (float64x2_t a) ++vcvtx_f32_f64 (float64x2_t __a) + { +- float32x2_t result; ++ float32x2_t __result; + __asm__ ("fcvtxn %0.2s,%1.2d" +- : "=w"(result) +- : "w"(a) ++ : "=w"(__result) ++ : "w"(__a) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline float32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vcvtx_high_f32_f64 (float32x2_t a, float64x2_t b) ++vcvtx_high_f32_f64 (float32x2_t __a, float64x2_t __b) + { +- float32x4_t result; ++ float32x4_t __result; + __asm__ ("fcvtxn2 %0.4s,%1.2d" +- : "=w"(result) +- : "w" (b), "0"(a) ++ : "=w"(__result) ++ : "w" (__b), "0"(__a) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline float32_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vcvtxd_f32_f64 (float64_t a) ++vcvtxd_f32_f64 (float64_t __a) + { +- float32_t result; ++ float32_t __result; + __asm__ ("fcvtxn %s0,%d1" +- : "=w"(result) +- : "w"(a) ++ : "=w"(__result) ++ : "w"(__a) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline float32x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmla_n_f32 (float32x2_t a, float32x2_t b, float32_t c) ++vmla_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c) + { +- float32x2_t result; +- float32x2_t t1; ++ float32x2_t __result; ++ float32x2_t __t1; + __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fadd %0.2s, %0.2s, %1.2s" +- : "=w"(result), "=w"(t1) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result), "=w"(__t1) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int16x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmla_n_s16 (int16x4_t a, int16x4_t b, int16_t c) ++vmla_n_s16 (int16x4_t __a, int16x4_t __b, int16_t __c) + { +- int16x4_t result; ++ int16x4_t __result; + __asm__ ("mla %0.4h,%2.4h,%3.h[0]" +- : "=w"(result) +- : "0"(a), "w"(b), "x"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "x"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int32x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmla_n_s32 (int32x2_t a, int32x2_t b, int32_t c) ++vmla_n_s32 (int32x2_t __a, int32x2_t __b, int32_t __c) + { +- int32x2_t result; ++ int32x2_t __result; + __asm__ ("mla %0.2s,%2.2s,%3.s[0]" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint16x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmla_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c) ++vmla_n_u16 (uint16x4_t __a, uint16x4_t __b, uint16_t __c) + { +- uint16x4_t result; ++ uint16x4_t __result; + __asm__ ("mla %0.4h,%2.4h,%3.h[0]" +- : "=w"(result) +- : "0"(a), "w"(b), "x"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "x"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint32x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmla_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c) ++vmla_n_u32 (uint32x2_t __a, uint32x2_t __b, uint32_t __c) + { +- uint32x2_t result; ++ uint32x2_t __result; + __asm__ ("mla %0.2s,%2.2s,%3.s[0]" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmla_s8 (int8x8_t a, int8x8_t b, int8x8_t c) ++vmla_s8 (int8x8_t __a, int8x8_t __b, int8x8_t __c) + { +- int8x8_t result; ++ int8x8_t __result; + __asm__ ("mla %0.8b, %2.8b, %3.8b" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int16x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmla_s16 (int16x4_t a, int16x4_t b, int16x4_t c) ++vmla_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c) + { +- int16x4_t result; ++ int16x4_t __result; + __asm__ ("mla %0.4h, %2.4h, %3.4h" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int32x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmla_s32 (int32x2_t a, int32x2_t b, int32x2_t c) ++vmla_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c) + { +- int32x2_t result; ++ int32x2_t __result; + __asm__ ("mla %0.2s, %2.2s, %3.2s" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmla_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c) ++vmla_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) + { +- uint8x8_t result; ++ uint8x8_t __result; + __asm__ ("mla %0.8b, %2.8b, %3.8b" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint16x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmla_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c) ++vmla_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) + { +- uint16x4_t result; ++ uint16x4_t __result; + __asm__ ("mla %0.4h, %2.4h, %3.4h" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint32x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c) ++vmla_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c) + { +- uint32x2_t result; ++ uint32x2_t __result; + __asm__ ("mla %0.2s, %2.2s, %3.2s" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + #define vmlal_high_lane_s16(a, b, c, d) \ +@@ -7549,122 +7582,122 @@ vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c) + + __extension__ extern __inline int32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlal_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c) ++vmlal_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c) + { +- int32x4_t result; ++ int32x4_t __result; + __asm__ ("smlal2 %0.4s,%2.8h,%3.h[0]" +- : "=w"(result) +- : "0"(a), "w"(b), "x"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "x"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlal_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c) ++vmlal_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c) + { +- int64x2_t result; ++ int64x2_t __result; + __asm__ ("smlal2 %0.2d,%2.4s,%3.s[0]" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlal_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c) ++vmlal_high_n_u16 (uint32x4_t __a, uint16x8_t __b, uint16_t __c) + { +- uint32x4_t result; ++ uint32x4_t __result; + __asm__ ("umlal2 %0.4s,%2.8h,%3.h[0]" +- : "=w"(result) +- : "0"(a), "w"(b), "x"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "x"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlal_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c) ++vmlal_high_n_u32 (uint64x2_t __a, uint32x4_t __b, uint32_t __c) + { +- uint64x2_t result; ++ uint64x2_t __result; + __asm__ ("umlal2 %0.2d,%2.4s,%3.s[0]" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c) ++vmlal_high_s8 (int16x8_t __a, int8x16_t __b, int8x16_t __c) + { +- int16x8_t result; ++ int16x8_t __result; + __asm__ ("smlal2 %0.8h,%2.16b,%3.16b" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c) ++vmlal_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c) + { +- int32x4_t result; ++ int32x4_t __result; + __asm__ ("smlal2 %0.4s,%2.8h,%3.8h" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c) ++vmlal_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c) + { +- int64x2_t result; ++ int64x2_t __result; + __asm__ ("smlal2 %0.2d,%2.4s,%3.4s" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c) ++vmlal_high_u8 (uint16x8_t __a, uint8x16_t __b, uint8x16_t __c) + { +- uint16x8_t result; ++ uint16x8_t __result; + __asm__ ("umlal2 %0.8h,%2.16b,%3.16b" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c) ++vmlal_high_u16 (uint32x4_t __a, uint16x8_t __b, uint16x8_t __c) + { +- uint32x4_t result; ++ uint32x4_t __result; + __asm__ ("umlal2 %0.4s,%2.8h,%3.8h" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c) ++vmlal_high_u32 (uint64x2_t __a, uint32x4_t __b, uint32x4_t __c) + { +- uint64x2_t result; ++ uint64x2_t __result; + __asm__ ("umlal2 %0.2d,%2.4s,%3.4s" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + #define vmlal_lane_s16(a, b, c, d) \ +@@ -7781,388 +7814,388 @@ vmlal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c) + + __extension__ extern __inline int32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlal_n_s16 (int32x4_t a, int16x4_t b, int16_t c) ++vmlal_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c) + { +- int32x4_t result; ++ int32x4_t __result; + __asm__ ("smlal %0.4s,%2.4h,%3.h[0]" +- : "=w"(result) +- : "0"(a), "w"(b), "x"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "x"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlal_n_s32 (int64x2_t a, int32x2_t b, int32_t c) ++vmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c) + { +- int64x2_t result; ++ int64x2_t __result; + __asm__ ("smlal %0.2d,%2.2s,%3.s[0]" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlal_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c) ++vmlal_n_u16 (uint32x4_t __a, uint16x4_t __b, uint16_t __c) + { +- uint32x4_t result; ++ uint32x4_t __result; + __asm__ ("umlal %0.4s,%2.4h,%3.h[0]" +- : "=w"(result) +- : "0"(a), "w"(b), "x"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "x"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlal_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c) ++vmlal_n_u32 (uint64x2_t __a, uint32x2_t __b, uint32_t __c) + { +- uint64x2_t result; ++ uint64x2_t __result; + __asm__ ("umlal %0.2d,%2.2s,%3.s[0]" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlal_s8 (int16x8_t a, int8x8_t b, int8x8_t c) ++vmlal_s8 (int16x8_t __a, int8x8_t __b, int8x8_t __c) + { +- int16x8_t result; ++ int16x8_t __result; + __asm__ ("smlal %0.8h,%2.8b,%3.8b" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlal_s16 (int32x4_t a, int16x4_t b, int16x4_t c) ++vmlal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c) + { +- int32x4_t result; ++ int32x4_t __result; + __asm__ ("smlal %0.4s,%2.4h,%3.4h" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlal_s32 (int64x2_t a, int32x2_t b, int32x2_t c) ++vmlal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c) + { +- int64x2_t result; ++ int64x2_t __result; + __asm__ ("smlal %0.2d,%2.2s,%3.2s" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c) ++vmlal_u8 (uint16x8_t __a, uint8x8_t __b, uint8x8_t __c) + { +- uint16x8_t result; ++ uint16x8_t __result; + __asm__ ("umlal %0.8h,%2.8b,%3.8b" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c) ++vmlal_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c) + { +- uint32x4_t result; ++ uint32x4_t __result; + __asm__ ("umlal %0.4s,%2.4h,%3.4h" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c) ++vmlal_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c) + { +- uint64x2_t result; ++ uint64x2_t __result; + __asm__ ("umlal %0.2d,%2.2s,%3.2s" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline float32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c) ++vmlaq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c) + { +- float32x4_t result; +- float32x4_t t1; ++ float32x4_t __result; ++ float32x4_t __t1; + __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fadd %0.4s, %0.4s, %1.4s" +- : "=w"(result), "=w"(t1) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result), "=w"(__t1) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlaq_n_s16 (int16x8_t a, int16x8_t b, int16_t c) ++vmlaq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c) + { +- int16x8_t result; ++ int16x8_t __result; + __asm__ ("mla %0.8h,%2.8h,%3.h[0]" +- : "=w"(result) +- : "0"(a), "w"(b), "x"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "x"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlaq_n_s32 (int32x4_t a, int32x4_t b, int32_t c) ++vmlaq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c) + { +- int32x4_t result; ++ int32x4_t __result; + __asm__ ("mla %0.4s,%2.4s,%3.s[0]" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlaq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c) ++vmlaq_n_u16 (uint16x8_t __a, uint16x8_t __b, uint16_t __c) + { +- uint16x8_t result; ++ uint16x8_t __result; + __asm__ ("mla %0.8h,%2.8h,%3.h[0]" +- : "=w"(result) +- : "0"(a), "w"(b), "x"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "x"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlaq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c) ++vmlaq_n_u32 (uint32x4_t __a, uint32x4_t __b, uint32_t __c) + { +- uint32x4_t result; ++ uint32x4_t __result; + __asm__ ("mla %0.4s,%2.4s,%3.s[0]" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c) ++vmlaq_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c) + { +- int8x16_t result; ++ int8x16_t __result; + __asm__ ("mla %0.16b, %2.16b, %3.16b" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c) ++vmlaq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c) + { +- int16x8_t result; ++ int16x8_t __result; + __asm__ ("mla %0.8h, %2.8h, %3.8h" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c) ++vmlaq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c) + { +- int32x4_t result; ++ int32x4_t __result; + __asm__ ("mla %0.4s, %2.4s, %3.4s" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c) ++vmlaq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) + { +- uint8x16_t result; ++ uint8x16_t __result; + __asm__ ("mla %0.16b, %2.16b, %3.16b" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c) ++vmlaq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) + { +- uint16x8_t result; ++ uint16x8_t __result; + __asm__ ("mla %0.8h, %2.8h, %3.8h" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c) ++vmlaq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) + { +- uint32x4_t result; ++ uint32x4_t __result; + __asm__ ("mla %0.4s, %2.4s, %3.4s" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline float32x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmls_n_f32 (float32x2_t a, float32x2_t b, float32_t c) ++vmls_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c) + { +- float32x2_t result; +- float32x2_t t1; ++ float32x2_t __result; ++ float32x2_t __t1; + __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fsub %0.2s, %0.2s, %1.2s" +- : "=w"(result), "=w"(t1) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result), "=w"(__t1) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int16x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmls_n_s16 (int16x4_t a, int16x4_t b, int16_t c) ++vmls_n_s16 (int16x4_t __a, int16x4_t __b, int16_t __c) + { +- int16x4_t result; ++ int16x4_t __result; + __asm__ ("mls %0.4h, %2.4h, %3.h[0]" +- : "=w"(result) +- : "0"(a), "w"(b), "x"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "x"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int32x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmls_n_s32 (int32x2_t a, int32x2_t b, int32_t c) ++vmls_n_s32 (int32x2_t __a, int32x2_t __b, int32_t __c) + { +- int32x2_t result; ++ int32x2_t __result; + __asm__ ("mls %0.2s, %2.2s, %3.s[0]" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint16x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmls_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c) ++vmls_n_u16 (uint16x4_t __a, uint16x4_t __b, uint16_t __c) + { +- uint16x4_t result; ++ uint16x4_t __result; + __asm__ ("mls %0.4h, %2.4h, %3.h[0]" +- : "=w"(result) +- : "0"(a), "w"(b), "x"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "x"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint32x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmls_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c) ++vmls_n_u32 (uint32x2_t __a, uint32x2_t __b, uint32_t __c) + { +- uint32x2_t result; ++ uint32x2_t __result; + __asm__ ("mls %0.2s, %2.2s, %3.s[0]" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmls_s8 (int8x8_t a, int8x8_t b, int8x8_t c) ++vmls_s8 (int8x8_t __a, int8x8_t __b, int8x8_t __c) + { +- int8x8_t result; ++ int8x8_t __result; + __asm__ ("mls %0.8b,%2.8b,%3.8b" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int16x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmls_s16 (int16x4_t a, int16x4_t b, int16x4_t c) ++vmls_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c) + { +- int16x4_t result; ++ int16x4_t __result; + __asm__ ("mls %0.4h,%2.4h,%3.4h" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int32x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmls_s32 (int32x2_t a, int32x2_t b, int32x2_t c) ++vmls_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c) + { +- int32x2_t result; ++ int32x2_t __result; + __asm__ ("mls %0.2s,%2.2s,%3.2s" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmls_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c) ++vmls_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) + { +- uint8x8_t result; ++ uint8x8_t __result; + __asm__ ("mls %0.8b,%2.8b,%3.8b" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint16x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmls_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c) ++vmls_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) + { +- uint16x4_t result; ++ uint16x4_t __result; + __asm__ ("mls %0.4h,%2.4h,%3.4h" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint32x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c) ++vmls_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c) + { +- uint32x2_t result; ++ uint32x2_t __result; + __asm__ ("mls %0.2s,%2.2s,%3.2s" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + #define vmlsl_high_lane_s16(a, b, c, d) \ +@@ -8279,122 +8312,122 @@ vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c) + + __extension__ extern __inline int32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlsl_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c) ++vmlsl_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c) + { +- int32x4_t result; ++ int32x4_t __result; + __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[0]" +- : "=w"(result) +- : "0"(a), "w"(b), "x"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "x"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlsl_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c) ++vmlsl_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c) + { +- int64x2_t result; ++ int64x2_t __result; + __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[0]" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlsl_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c) ++vmlsl_high_n_u16 (uint32x4_t __a, uint16x8_t __b, uint16_t __c) + { +- uint32x4_t result; ++ uint32x4_t __result; + __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[0]" +- : "=w"(result) +- : "0"(a), "w"(b), "x"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "x"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlsl_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c) ++vmlsl_high_n_u32 (uint64x2_t __a, uint32x4_t __b, uint32_t __c) + { +- uint64x2_t result; ++ uint64x2_t __result; + __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[0]" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlsl_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c) ++vmlsl_high_s8 (int16x8_t __a, int8x16_t __b, int8x16_t __c) + { +- int16x8_t result; ++ int16x8_t __result; + __asm__ ("smlsl2 %0.8h,%2.16b,%3.16b" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlsl_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c) ++vmlsl_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c) + { +- int32x4_t result; ++ int32x4_t __result; + __asm__ ("smlsl2 %0.4s,%2.8h,%3.8h" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlsl_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c) ++vmlsl_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c) + { +- int64x2_t result; ++ int64x2_t __result; + __asm__ ("smlsl2 %0.2d,%2.4s,%3.4s" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlsl_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c) ++vmlsl_high_u8 (uint16x8_t __a, uint8x16_t __b, uint8x16_t __c) + { +- uint16x8_t result; ++ uint16x8_t __result; + __asm__ ("umlsl2 %0.8h,%2.16b,%3.16b" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlsl_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c) ++vmlsl_high_u16 (uint32x4_t __a, uint16x8_t __b, uint16x8_t __c) + { +- uint32x4_t result; ++ uint32x4_t __result; + __asm__ ("umlsl2 %0.4s,%2.8h,%3.8h" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlsl_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c) ++vmlsl_high_u32 (uint64x2_t __a, uint32x4_t __b, uint32x4_t __c) + { +- uint64x2_t result; ++ uint64x2_t __result; + __asm__ ("umlsl2 %0.2d,%2.4s,%3.4s" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + #define vmlsl_lane_s16(a, b, c, d) \ +@@ -8511,543 +8544,543 @@ vmlsl_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c) + + __extension__ extern __inline int32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlsl_n_s16 (int32x4_t a, int16x4_t b, int16_t c) ++vmlsl_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c) + { +- int32x4_t result; ++ int32x4_t __result; + __asm__ ("smlsl %0.4s, %2.4h, %3.h[0]" +- : "=w"(result) +- : "0"(a), "w"(b), "x"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "x"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlsl_n_s32 (int64x2_t a, int32x2_t b, int32_t c) ++vmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c) + { +- int64x2_t result; ++ int64x2_t __result; + __asm__ ("smlsl %0.2d, %2.2s, %3.s[0]" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlsl_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c) ++vmlsl_n_u16 (uint32x4_t __a, uint16x4_t __b, uint16_t __c) + { +- uint32x4_t result; ++ uint32x4_t __result; + __asm__ ("umlsl %0.4s, %2.4h, %3.h[0]" +- : "=w"(result) +- : "0"(a), "w"(b), "x"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "x"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlsl_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c) ++vmlsl_n_u32 (uint64x2_t __a, uint32x2_t __b, uint32_t __c) + { +- uint64x2_t result; ++ uint64x2_t __result; + __asm__ ("umlsl %0.2d, %2.2s, %3.s[0]" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlsl_s8 (int16x8_t a, int8x8_t b, int8x8_t c) ++vmlsl_s8 (int16x8_t __a, int8x8_t __b, int8x8_t __c) + { +- int16x8_t result; ++ int16x8_t __result; + __asm__ ("smlsl %0.8h, %2.8b, %3.8b" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlsl_s16 (int32x4_t a, int16x4_t b, int16x4_t c) ++vmlsl_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c) + { +- int32x4_t result; ++ int32x4_t __result; + __asm__ ("smlsl %0.4s, %2.4h, %3.4h" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlsl_s32 (int64x2_t a, int32x2_t b, int32x2_t c) ++vmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c) + { +- int64x2_t result; ++ int64x2_t __result; + __asm__ ("smlsl %0.2d, %2.2s, %3.2s" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlsl_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c) ++vmlsl_u8 (uint16x8_t __a, uint8x8_t __b, uint8x8_t __c) + { +- uint16x8_t result; ++ uint16x8_t __result; + __asm__ ("umlsl %0.8h, %2.8b, %3.8b" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlsl_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c) ++vmlsl_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c) + { +- uint32x4_t result; ++ uint32x4_t __result; + __asm__ ("umlsl %0.4s, %2.4h, %3.4h" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlsl_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c) ++vmlsl_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c) + { +- uint64x2_t result; ++ uint64x2_t __result; + __asm__ ("umlsl %0.2d, %2.2s, %3.2s" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline float32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlsq_n_f32 (float32x4_t a, float32x4_t b, float32_t c) ++vmlsq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c) + { +- float32x4_t result; +- float32x4_t t1; ++ float32x4_t __result; ++ float32x4_t __t1; + __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fsub %0.4s, %0.4s, %1.4s" +- : "=w"(result), "=w"(t1) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result), "=w"(__t1) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlsq_n_s16 (int16x8_t a, int16x8_t b, int16_t c) ++vmlsq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c) + { +- int16x8_t result; ++ int16x8_t __result; + __asm__ ("mls %0.8h, %2.8h, %3.h[0]" +- : "=w"(result) +- : "0"(a), "w"(b), "x"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "x"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlsq_n_s32 (int32x4_t a, int32x4_t b, int32_t c) ++vmlsq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c) + { +- int32x4_t result; ++ int32x4_t __result; + __asm__ ("mls %0.4s, %2.4s, %3.s[0]" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlsq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c) ++vmlsq_n_u16 (uint16x8_t __a, uint16x8_t __b, uint16_t __c) + { +- uint16x8_t result; ++ uint16x8_t __result; + __asm__ ("mls %0.8h, %2.8h, %3.h[0]" +- : "=w"(result) +- : "0"(a), "w"(b), "x"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "x"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlsq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c) ++vmlsq_n_u32 (uint32x4_t __a, uint32x4_t __b, uint32_t __c) + { +- uint32x4_t result; ++ uint32x4_t __result; + __asm__ ("mls %0.4s, %2.4s, %3.s[0]" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlsq_s8 (int8x16_t a, int8x16_t b, int8x16_t c) ++vmlsq_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c) + { +- int8x16_t result; ++ int8x16_t __result; + __asm__ ("mls %0.16b,%2.16b,%3.16b" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlsq_s16 (int16x8_t a, int16x8_t b, int16x8_t c) ++vmlsq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c) + { +- int16x8_t result; ++ int16x8_t __result; + __asm__ ("mls %0.8h,%2.8h,%3.8h" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlsq_s32 (int32x4_t a, int32x4_t b, int32x4_t c) ++vmlsq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c) + { +- int32x4_t result; ++ int32x4_t __result; + __asm__ ("mls %0.4s,%2.4s,%3.4s" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlsq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c) ++vmlsq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) + { +- uint8x16_t result; ++ uint8x16_t __result; + __asm__ ("mls %0.16b,%2.16b,%3.16b" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlsq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c) ++vmlsq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) + { +- uint16x8_t result; ++ uint16x8_t __result; + __asm__ ("mls %0.8h,%2.8h,%3.8h" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlsq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c) ++vmlsq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) + { +- uint32x4_t result; ++ uint32x4_t __result; + __asm__ ("mls %0.4s,%2.4s,%3.4s" +- : "=w"(result) +- : "0"(a), "w"(b), "w"(c) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b), "w"(__c) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmovl_high_s8 (int8x16_t a) ++vmovl_high_s8 (int8x16_t __a) + { +- int16x8_t result; ++ int16x8_t __result; + __asm__ ("sshll2 %0.8h,%1.16b,#0" +- : "=w"(result) +- : "w"(a) ++ : "=w"(__result) ++ : "w"(__a) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmovl_high_s16 (int16x8_t a) ++vmovl_high_s16 (int16x8_t __a) + { +- int32x4_t result; ++ int32x4_t __result; + __asm__ ("sshll2 %0.4s,%1.8h,#0" +- : "=w"(result) +- : "w"(a) ++ : "=w"(__result) ++ : "w"(__a) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmovl_high_s32 (int32x4_t a) ++vmovl_high_s32 (int32x4_t __a) + { +- int64x2_t result; ++ int64x2_t __result; + __asm__ ("sshll2 %0.2d,%1.4s,#0" +- : "=w"(result) +- : "w"(a) ++ : "=w"(__result) ++ : "w"(__a) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmovl_high_u8 (uint8x16_t a) ++vmovl_high_u8 (uint8x16_t __a) + { +- uint16x8_t result; ++ uint16x8_t __result; + __asm__ ("ushll2 %0.8h,%1.16b,#0" +- : "=w"(result) +- : "w"(a) ++ : "=w"(__result) ++ : "w"(__a) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmovl_high_u16 (uint16x8_t a) ++vmovl_high_u16 (uint16x8_t __a) + { +- uint32x4_t result; ++ uint32x4_t __result; + __asm__ ("ushll2 %0.4s,%1.8h,#0" +- : "=w"(result) +- : "w"(a) ++ : "=w"(__result) ++ : "w"(__a) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmovl_high_u32 (uint32x4_t a) ++vmovl_high_u32 (uint32x4_t __a) + { +- uint64x2_t result; ++ uint64x2_t __result; + __asm__ ("ushll2 %0.2d,%1.4s,#0" +- : "=w"(result) +- : "w"(a) ++ : "=w"(__result) ++ : "w"(__a) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmovl_s8 (int8x8_t a) ++vmovl_s8 (int8x8_t __a) + { +- int16x8_t result; ++ int16x8_t __result; + __asm__ ("sshll %0.8h,%1.8b,#0" +- : "=w"(result) +- : "w"(a) ++ : "=w"(__result) ++ : "w"(__a) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmovl_s16 (int16x4_t a) ++vmovl_s16 (int16x4_t __a) + { +- int32x4_t result; ++ int32x4_t __result; + __asm__ ("sshll %0.4s,%1.4h,#0" +- : "=w"(result) +- : "w"(a) ++ : "=w"(__result) ++ : "w"(__a) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmovl_s32 (int32x2_t a) ++vmovl_s32 (int32x2_t __a) + { +- int64x2_t result; ++ int64x2_t __result; + __asm__ ("sshll %0.2d,%1.2s,#0" +- : "=w"(result) +- : "w"(a) ++ : "=w"(__result) ++ : "w"(__a) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmovl_u8 (uint8x8_t a) ++vmovl_u8 (uint8x8_t __a) + { +- uint16x8_t result; ++ uint16x8_t __result; + __asm__ ("ushll %0.8h,%1.8b,#0" +- : "=w"(result) +- : "w"(a) ++ : "=w"(__result) ++ : "w"(__a) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmovl_u16 (uint16x4_t a) ++vmovl_u16 (uint16x4_t __a) + { +- uint32x4_t result; ++ uint32x4_t __result; + __asm__ ("ushll %0.4s,%1.4h,#0" +- : "=w"(result) +- : "w"(a) ++ : "=w"(__result) ++ : "w"(__a) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmovl_u32 (uint32x2_t a) ++vmovl_u32 (uint32x2_t __a) + { +- uint64x2_t result; ++ uint64x2_t __result; + __asm__ ("ushll %0.2d,%1.2s,#0" +- : "=w"(result) +- : "w"(a) ++ : "=w"(__result) ++ : "w"(__a) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmovn_high_s16 (int8x8_t a, int16x8_t b) ++vmovn_high_s16 (int8x8_t __a, int16x8_t __b) + { +- int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0))); ++ int8x16_t __result = vcombine_s8 (__a, vcreate_s8 (__AARCH64_UINT64_C (0x0))); + __asm__ ("xtn2 %0.16b,%1.8h" +- : "+w"(result) +- : "w"(b) ++ : "+w"(__result) ++ : "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmovn_high_s32 (int16x4_t a, int32x4_t b) ++vmovn_high_s32 (int16x4_t __a, int32x4_t __b) + { +- int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0))); ++ int16x8_t __result = vcombine_s16 (__a, vcreate_s16 (__AARCH64_UINT64_C (0x0))); + __asm__ ("xtn2 %0.8h,%1.4s" +- : "+w"(result) +- : "w"(b) ++ : "+w"(__result) ++ : "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmovn_high_s64 (int32x2_t a, int64x2_t b) ++vmovn_high_s64 (int32x2_t __a, int64x2_t __b) + { +- int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0))); ++ int32x4_t __result = vcombine_s32 (__a, vcreate_s32 (__AARCH64_UINT64_C (0x0))); + __asm__ ("xtn2 %0.4s,%1.2d" +- : "+w"(result) +- : "w"(b) ++ : "+w"(__result) ++ : "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmovn_high_u16 (uint8x8_t a, uint16x8_t b) ++vmovn_high_u16 (uint8x8_t __a, uint16x8_t __b) + { +- uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0))); ++ uint8x16_t __result = vcombine_u8 (__a, vcreate_u8 (__AARCH64_UINT64_C (0x0))); + __asm__ ("xtn2 %0.16b,%1.8h" +- : "+w"(result) +- : "w"(b) ++ : "+w"(__result) ++ : "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmovn_high_u32 (uint16x4_t a, uint32x4_t b) ++vmovn_high_u32 (uint16x4_t __a, uint32x4_t __b) + { +- uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0))); ++ uint16x8_t __result = vcombine_u16 (__a, vcreate_u16 (__AARCH64_UINT64_C (0x0))); + __asm__ ("xtn2 %0.8h,%1.4s" +- : "+w"(result) +- : "w"(b) ++ : "+w"(__result) ++ : "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmovn_high_u64 (uint32x2_t a, uint64x2_t b) ++vmovn_high_u64 (uint32x2_t __a, uint64x2_t __b) + { +- uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0))); ++ uint32x4_t __result = vcombine_u32 (__a, vcreate_u32 (__AARCH64_UINT64_C (0x0))); + __asm__ ("xtn2 %0.4s,%1.2d" +- : "+w"(result) +- : "w"(b) ++ : "+w"(__result) ++ : "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmovn_s16 (int16x8_t a) ++vmovn_s16 (int16x8_t __a) + { +- int8x8_t result; ++ int8x8_t __result; + __asm__ ("xtn %0.8b,%1.8h" +- : "=w"(result) +- : "w"(a) ++ : "=w"(__result) ++ : "w"(__a) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int16x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmovn_s32 (int32x4_t a) ++vmovn_s32 (int32x4_t __a) + { +- int16x4_t result; ++ int16x4_t __result; + __asm__ ("xtn %0.4h,%1.4s" +- : "=w"(result) +- : "w"(a) ++ : "=w"(__result) ++ : "w"(__a) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int32x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmovn_s64 (int64x2_t a) ++vmovn_s64 (int64x2_t __a) + { +- int32x2_t result; ++ int32x2_t __result; + __asm__ ("xtn %0.2s,%1.2d" +- : "=w"(result) +- : "w"(a) ++ : "=w"(__result) ++ : "w"(__a) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmovn_u16 (uint16x8_t a) ++vmovn_u16 (uint16x8_t __a) + { +- uint8x8_t result; ++ uint8x8_t __result; + __asm__ ("xtn %0.8b,%1.8h" +- : "=w"(result) +- : "w"(a) ++ : "=w"(__result) ++ : "w"(__a) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint16x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmovn_u32 (uint32x4_t a) ++vmovn_u32 (uint32x4_t __a) + { +- uint16x4_t result; ++ uint16x4_t __result; + __asm__ ("xtn %0.4h,%1.4s" +- : "=w"(result) +- : "w"(a) ++ : "=w"(__result) ++ : "w"(__a) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint32x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmovn_u64 (uint64x2_t a) ++vmovn_u64 (uint64x2_t __a) + { +- uint32x2_t result; ++ uint32x2_t __result; + __asm__ ("xtn %0.2s,%1.2d" +- : "=w"(result) +- : "w"(a) ++ : "=w"(__result) ++ : "w"(__a) + : /* No clobbers */); +- return result; ++ return __result; + } + + #define vmull_high_lane_s16(a, b, c) \ +@@ -9156,134 +9189,134 @@ vmovn_u64 (uint64x2_t a) + + __extension__ extern __inline int32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmull_high_n_s16 (int16x8_t a, int16_t b) ++vmull_high_n_s16 (int16x8_t __a, int16_t __b) + { +- int32x4_t result; ++ int32x4_t __result; + __asm__ ("smull2 %0.4s,%1.8h,%2.h[0]" +- : "=w"(result) +- : "w"(a), "x"(b) ++ : "=w"(__result) ++ : "w"(__a), "x"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmull_high_n_s32 (int32x4_t a, int32_t b) ++vmull_high_n_s32 (int32x4_t __a, int32_t __b) + { +- int64x2_t result; ++ int64x2_t __result; + __asm__ ("smull2 %0.2d,%1.4s,%2.s[0]" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmull_high_n_u16 (uint16x8_t a, uint16_t b) ++vmull_high_n_u16 (uint16x8_t __a, uint16_t __b) + { +- uint32x4_t result; ++ uint32x4_t __result; + __asm__ ("umull2 %0.4s,%1.8h,%2.h[0]" +- : "=w"(result) +- : "w"(a), "x"(b) ++ : "=w"(__result) ++ : "w"(__a), "x"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmull_high_n_u32 (uint32x4_t a, uint32_t b) ++vmull_high_n_u32 (uint32x4_t __a, uint32_t __b) + { +- uint64x2_t result; ++ uint64x2_t __result; + __asm__ ("umull2 %0.2d,%1.4s,%2.s[0]" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline poly16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmull_high_p8 (poly8x16_t a, poly8x16_t b) ++vmull_high_p8 (poly8x16_t __a, poly8x16_t __b) + { +- poly16x8_t result; ++ poly16x8_t __result; + __asm__ ("pmull2 %0.8h,%1.16b,%2.16b" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmull_high_s8 (int8x16_t a, int8x16_t b) ++vmull_high_s8 (int8x16_t __a, int8x16_t __b) + { +- int16x8_t result; ++ int16x8_t __result; + __asm__ ("smull2 %0.8h,%1.16b,%2.16b" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmull_high_s16 (int16x8_t a, int16x8_t b) ++vmull_high_s16 (int16x8_t __a, int16x8_t __b) + { +- int32x4_t result; ++ int32x4_t __result; + __asm__ ("smull2 %0.4s,%1.8h,%2.8h" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmull_high_s32 (int32x4_t a, int32x4_t b) ++vmull_high_s32 (int32x4_t __a, int32x4_t __b) + { +- int64x2_t result; ++ int64x2_t __result; + __asm__ ("smull2 %0.2d,%1.4s,%2.4s" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmull_high_u8 (uint8x16_t a, uint8x16_t b) ++vmull_high_u8 (uint8x16_t __a, uint8x16_t __b) + { +- uint16x8_t result; ++ uint16x8_t __result; + __asm__ ("umull2 %0.8h,%1.16b,%2.16b" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmull_high_u16 (uint16x8_t a, uint16x8_t b) ++vmull_high_u16 (uint16x8_t __a, uint16x8_t __b) + { +- uint32x4_t result; ++ uint32x4_t __result; + __asm__ ("umull2 %0.4s,%1.8h,%2.8h" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmull_high_u32 (uint32x4_t a, uint32x4_t b) ++vmull_high_u32 (uint32x4_t __a, uint32x4_t __b) + { +- uint64x2_t result; ++ uint64x2_t __result; + __asm__ ("umull2 %0.2d,%1.4s,%2.4s" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + #define vmull_lane_s16(a, b, c) \ +@@ -9392,722 +9425,722 @@ vmull_high_u32 (uint32x4_t a, uint32x4_t b) + + __extension__ extern __inline int32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmull_n_s16 (int16x4_t a, int16_t b) ++vmull_n_s16 (int16x4_t __a, int16_t __b) + { +- int32x4_t result; ++ int32x4_t __result; + __asm__ ("smull %0.4s,%1.4h,%2.h[0]" +- : "=w"(result) +- : "w"(a), "x"(b) ++ : "=w"(__result) ++ : "w"(__a), "x"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmull_n_s32 (int32x2_t a, int32_t b) ++vmull_n_s32 (int32x2_t __a, int32_t __b) + { +- int64x2_t result; ++ int64x2_t __result; + __asm__ ("smull %0.2d,%1.2s,%2.s[0]" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmull_n_u16 (uint16x4_t a, uint16_t b) ++vmull_n_u16 (uint16x4_t __a, uint16_t __b) + { +- uint32x4_t result; ++ uint32x4_t __result; + __asm__ ("umull %0.4s,%1.4h,%2.h[0]" +- : "=w"(result) +- : "w"(a), "x"(b) ++ : "=w"(__result) ++ : "w"(__a), "x"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmull_n_u32 (uint32x2_t a, uint32_t b) ++vmull_n_u32 (uint32x2_t __a, uint32_t __b) + { +- uint64x2_t result; ++ uint64x2_t __result; + __asm__ ("umull %0.2d,%1.2s,%2.s[0]" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline poly16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmull_p8 (poly8x8_t a, poly8x8_t b) ++vmull_p8 (poly8x8_t __a, poly8x8_t __b) + { +- poly16x8_t result; ++ poly16x8_t __result; + __asm__ ("pmull %0.8h, %1.8b, %2.8b" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmull_s8 (int8x8_t a, int8x8_t b) ++vmull_s8 (int8x8_t __a, int8x8_t __b) + { +- int16x8_t result; ++ int16x8_t __result; + __asm__ ("smull %0.8h, %1.8b, %2.8b" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmull_s16 (int16x4_t a, int16x4_t b) ++vmull_s16 (int16x4_t __a, int16x4_t __b) + { +- int32x4_t result; ++ int32x4_t __result; + __asm__ ("smull %0.4s, %1.4h, %2.4h" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmull_s32 (int32x2_t a, int32x2_t b) ++vmull_s32 (int32x2_t __a, int32x2_t __b) + { +- int64x2_t result; ++ int64x2_t __result; + __asm__ ("smull %0.2d, %1.2s, %2.2s" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmull_u8 (uint8x8_t a, uint8x8_t b) ++vmull_u8 (uint8x8_t __a, uint8x8_t __b) + { +- uint16x8_t result; ++ uint16x8_t __result; + __asm__ ("umull %0.8h, %1.8b, %2.8b" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmull_u16 (uint16x4_t a, uint16x4_t b) ++vmull_u16 (uint16x4_t __a, uint16x4_t __b) + { +- uint32x4_t result; ++ uint32x4_t __result; + __asm__ ("umull %0.4s, %1.4h, %2.4h" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmull_u32 (uint32x2_t a, uint32x2_t b) ++vmull_u32 (uint32x2_t __a, uint32x2_t __b) + { +- uint64x2_t result; ++ uint64x2_t __result; + __asm__ ("umull %0.2d, %1.2s, %2.2s" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int16x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpadal_s8 (int16x4_t a, int8x8_t b) ++vpadal_s8 (int16x4_t __a, int8x8_t __b) + { +- int16x4_t result; ++ int16x4_t __result; + __asm__ ("sadalp %0.4h,%2.8b" +- : "=w"(result) +- : "0"(a), "w"(b) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int32x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpadal_s16 (int32x2_t a, int16x4_t b) ++vpadal_s16 (int32x2_t __a, int16x4_t __b) + { +- int32x2_t result; ++ int32x2_t __result; + __asm__ ("sadalp %0.2s,%2.4h" +- : "=w"(result) +- : "0"(a), "w"(b) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int64x1_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpadal_s32 (int64x1_t a, int32x2_t b) ++vpadal_s32 (int64x1_t __a, int32x2_t __b) + { +- int64x1_t result; ++ int64x1_t __result; + __asm__ ("sadalp %0.1d,%2.2s" +- : "=w"(result) +- : "0"(a), "w"(b) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint16x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpadal_u8 (uint16x4_t a, uint8x8_t b) ++vpadal_u8 (uint16x4_t __a, uint8x8_t __b) + { +- uint16x4_t result; ++ uint16x4_t __result; + __asm__ ("uadalp %0.4h,%2.8b" +- : "=w"(result) +- : "0"(a), "w"(b) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint32x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpadal_u16 (uint32x2_t a, uint16x4_t b) ++vpadal_u16 (uint32x2_t __a, uint16x4_t __b) + { +- uint32x2_t result; ++ uint32x2_t __result; + __asm__ ("uadalp %0.2s,%2.4h" +- : "=w"(result) +- : "0"(a), "w"(b) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint64x1_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpadal_u32 (uint64x1_t a, uint32x2_t b) ++vpadal_u32 (uint64x1_t __a, uint32x2_t __b) + { +- uint64x1_t result; ++ uint64x1_t __result; + __asm__ ("uadalp %0.1d,%2.2s" +- : "=w"(result) +- : "0"(a), "w"(b) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpadalq_s8 (int16x8_t a, int8x16_t b) ++vpadalq_s8 (int16x8_t __a, int8x16_t __b) + { +- int16x8_t result; ++ int16x8_t __result; + __asm__ ("sadalp %0.8h,%2.16b" +- : "=w"(result) +- : "0"(a), "w"(b) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpadalq_s16 (int32x4_t a, int16x8_t b) ++vpadalq_s16 (int32x4_t __a, int16x8_t __b) + { +- int32x4_t result; ++ int32x4_t __result; + __asm__ ("sadalp %0.4s,%2.8h" +- : "=w"(result) +- : "0"(a), "w"(b) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpadalq_s32 (int64x2_t a, int32x4_t b) ++vpadalq_s32 (int64x2_t __a, int32x4_t __b) + { +- int64x2_t result; ++ int64x2_t __result; + __asm__ ("sadalp %0.2d,%2.4s" +- : "=w"(result) +- : "0"(a), "w"(b) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpadalq_u8 (uint16x8_t a, uint8x16_t b) ++vpadalq_u8 (uint16x8_t __a, uint8x16_t __b) + { +- uint16x8_t result; ++ uint16x8_t __result; + __asm__ ("uadalp %0.8h,%2.16b" +- : "=w"(result) +- : "0"(a), "w"(b) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpadalq_u16 (uint32x4_t a, uint16x8_t b) ++vpadalq_u16 (uint32x4_t __a, uint16x8_t __b) + { +- uint32x4_t result; ++ uint32x4_t __result; + __asm__ ("uadalp %0.4s,%2.8h" +- : "=w"(result) +- : "0"(a), "w"(b) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpadalq_u32 (uint64x2_t a, uint32x4_t b) ++vpadalq_u32 (uint64x2_t __a, uint32x4_t __b) + { +- uint64x2_t result; ++ uint64x2_t __result; + __asm__ ("uadalp %0.2d,%2.4s" +- : "=w"(result) +- : "0"(a), "w"(b) ++ : "=w"(__result) ++ : "0"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int16x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpaddl_s8 (int8x8_t a) ++vpaddl_s8 (int8x8_t __a) + { +- int16x4_t result; ++ int16x4_t __result; + __asm__ ("saddlp %0.4h,%1.8b" +- : "=w"(result) +- : "w"(a) ++ : "=w"(__result) ++ : "w"(__a) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int32x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpaddl_s16 (int16x4_t a) ++vpaddl_s16 (int16x4_t __a) + { +- int32x2_t result; ++ int32x2_t __result; + __asm__ ("saddlp %0.2s,%1.4h" +- : "=w"(result) +- : "w"(a) ++ : "=w"(__result) ++ : "w"(__a) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int64x1_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpaddl_s32 (int32x2_t a) ++vpaddl_s32 (int32x2_t __a) + { +- int64x1_t result; ++ int64x1_t __result; + __asm__ ("saddlp %0.1d,%1.2s" +- : "=w"(result) +- : "w"(a) ++ : "=w"(__result) ++ : "w"(__a) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint16x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpaddl_u8 (uint8x8_t a) ++vpaddl_u8 (uint8x8_t __a) + { +- uint16x4_t result; ++ uint16x4_t __result; + __asm__ ("uaddlp %0.4h,%1.8b" +- : "=w"(result) +- : "w"(a) ++ : "=w"(__result) ++ : "w"(__a) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint32x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpaddl_u16 (uint16x4_t a) ++vpaddl_u16 (uint16x4_t __a) + { +- uint32x2_t result; ++ uint32x2_t __result; + __asm__ ("uaddlp %0.2s,%1.4h" +- : "=w"(result) +- : "w"(a) ++ : "=w"(__result) ++ : "w"(__a) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint64x1_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpaddl_u32 (uint32x2_t a) ++vpaddl_u32 (uint32x2_t __a) + { +- uint64x1_t result; ++ uint64x1_t __result; + __asm__ ("uaddlp %0.1d,%1.2s" +- : "=w"(result) +- : "w"(a) ++ : "=w"(__result) ++ : "w"(__a) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpaddlq_s8 (int8x16_t a) ++vpaddlq_s8 (int8x16_t __a) + { +- int16x8_t result; ++ int16x8_t __result; + __asm__ ("saddlp %0.8h,%1.16b" +- : "=w"(result) +- : "w"(a) ++ : "=w"(__result) ++ : "w"(__a) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpaddlq_s16 (int16x8_t a) ++vpaddlq_s16 (int16x8_t __a) + { +- int32x4_t result; ++ int32x4_t __result; + __asm__ ("saddlp %0.4s,%1.8h" +- : "=w"(result) +- : "w"(a) ++ : "=w"(__result) ++ : "w"(__a) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpaddlq_s32 (int32x4_t a) ++vpaddlq_s32 (int32x4_t __a) + { +- int64x2_t result; ++ int64x2_t __result; + __asm__ ("saddlp %0.2d,%1.4s" +- : "=w"(result) +- : "w"(a) ++ : "=w"(__result) ++ : "w"(__a) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpaddlq_u8 (uint8x16_t a) ++vpaddlq_u8 (uint8x16_t __a) + { +- uint16x8_t result; ++ uint16x8_t __result; + __asm__ ("uaddlp %0.8h,%1.16b" +- : "=w"(result) +- : "w"(a) ++ : "=w"(__result) ++ : "w"(__a) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpaddlq_u16 (uint16x8_t a) ++vpaddlq_u16 (uint16x8_t __a) + { +- uint32x4_t result; ++ uint32x4_t __result; + __asm__ ("uaddlp %0.4s,%1.8h" +- : "=w"(result) +- : "w"(a) ++ : "=w"(__result) ++ : "w"(__a) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpaddlq_u32 (uint32x4_t a) ++vpaddlq_u32 (uint32x4_t __a) + { +- uint64x2_t result; ++ uint64x2_t __result; + __asm__ ("uaddlp %0.2d,%1.4s" +- : "=w"(result) +- : "w"(a) ++ : "=w"(__result) ++ : "w"(__a) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpaddq_s8 (int8x16_t a, int8x16_t b) ++vpaddq_s8 (int8x16_t __a, int8x16_t __b) + { +- int8x16_t result; ++ int8x16_t __result; + __asm__ ("addp %0.16b,%1.16b,%2.16b" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpaddq_s16 (int16x8_t a, int16x8_t b) ++vpaddq_s16 (int16x8_t __a, int16x8_t __b) + { +- int16x8_t result; ++ int16x8_t __result; + __asm__ ("addp %0.8h,%1.8h,%2.8h" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpaddq_s32 (int32x4_t a, int32x4_t b) ++vpaddq_s32 (int32x4_t __a, int32x4_t __b) + { +- int32x4_t result; ++ int32x4_t __result; + __asm__ ("addp %0.4s,%1.4s,%2.4s" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpaddq_s64 (int64x2_t a, int64x2_t b) ++vpaddq_s64 (int64x2_t __a, int64x2_t __b) + { +- int64x2_t result; ++ int64x2_t __result; + __asm__ ("addp %0.2d,%1.2d,%2.2d" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpaddq_u8 (uint8x16_t a, uint8x16_t b) ++vpaddq_u8 (uint8x16_t __a, uint8x16_t __b) + { +- uint8x16_t result; ++ uint8x16_t __result; + __asm__ ("addp %0.16b,%1.16b,%2.16b" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpaddq_u16 (uint16x8_t a, uint16x8_t b) ++vpaddq_u16 (uint16x8_t __a, uint16x8_t __b) + { +- uint16x8_t result; ++ uint16x8_t __result; + __asm__ ("addp %0.8h,%1.8h,%2.8h" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpaddq_u32 (uint32x4_t a, uint32x4_t b) ++vpaddq_u32 (uint32x4_t __a, uint32x4_t __b) + { +- uint32x4_t result; ++ uint32x4_t __result; + __asm__ ("addp %0.4s,%1.4s,%2.4s" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpaddq_u64 (uint64x2_t a, uint64x2_t b) ++vpaddq_u64 (uint64x2_t __a, uint64x2_t __b) + { +- uint64x2_t result; ++ uint64x2_t __result; + __asm__ ("addp %0.2d,%1.2d,%2.2d" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int16x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqdmulh_n_s16 (int16x4_t a, int16_t b) ++vqdmulh_n_s16 (int16x4_t __a, int16_t __b) + { +- int16x4_t result; ++ int16x4_t __result; + __asm__ ("sqdmulh %0.4h,%1.4h,%2.h[0]" +- : "=w"(result) +- : "w"(a), "x"(b) ++ : "=w"(__result) ++ : "w"(__a), "x"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int32x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqdmulh_n_s32 (int32x2_t a, int32_t b) ++vqdmulh_n_s32 (int32x2_t __a, int32_t __b) + { +- int32x2_t result; ++ int32x2_t __result; + __asm__ ("sqdmulh %0.2s,%1.2s,%2.s[0]" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqdmulhq_n_s16 (int16x8_t a, int16_t b) ++vqdmulhq_n_s16 (int16x8_t __a, int16_t __b) + { +- int16x8_t result; ++ int16x8_t __result; + __asm__ ("sqdmulh %0.8h,%1.8h,%2.h[0]" +- : "=w"(result) +- : "w"(a), "x"(b) ++ : "=w"(__result) ++ : "w"(__a), "x"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqdmulhq_n_s32 (int32x4_t a, int32_t b) ++vqdmulhq_n_s32 (int32x4_t __a, int32_t __b) + { +- int32x4_t result; ++ int32x4_t __result; + __asm__ ("sqdmulh %0.4s,%1.4s,%2.s[0]" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqmovn_high_s16 (int8x8_t a, int16x8_t b) ++vqmovn_high_s16 (int8x8_t __a, int16x8_t __b) + { +- int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0))); ++ int8x16_t __result = vcombine_s8 (__a, vcreate_s8 (__AARCH64_UINT64_C (0x0))); + __asm__ ("sqxtn2 %0.16b, %1.8h" +- : "+w"(result) +- : "w"(b) ++ : "+w"(__result) ++ : "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqmovn_high_s32 (int16x4_t a, int32x4_t b) ++vqmovn_high_s32 (int16x4_t __a, int32x4_t __b) + { +- int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0))); ++ int16x8_t __result = vcombine_s16 (__a, vcreate_s16 (__AARCH64_UINT64_C (0x0))); + __asm__ ("sqxtn2 %0.8h, %1.4s" +- : "+w"(result) +- : "w"(b) ++ : "+w"(__result) ++ : "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqmovn_high_s64 (int32x2_t a, int64x2_t b) ++vqmovn_high_s64 (int32x2_t __a, int64x2_t __b) + { +- int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0))); ++ int32x4_t __result = vcombine_s32 (__a, vcreate_s32 (__AARCH64_UINT64_C (0x0))); + __asm__ ("sqxtn2 %0.4s, %1.2d" +- : "+w"(result) +- : "w"(b) ++ : "+w"(__result) ++ : "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqmovn_high_u16 (uint8x8_t a, uint16x8_t b) ++vqmovn_high_u16 (uint8x8_t __a, uint16x8_t __b) + { +- uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0))); ++ uint8x16_t __result = vcombine_u8 (__a, vcreate_u8 (__AARCH64_UINT64_C (0x0))); + __asm__ ("uqxtn2 %0.16b, %1.8h" +- : "+w"(result) +- : "w"(b) ++ : "+w"(__result) ++ : "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqmovn_high_u32 (uint16x4_t a, uint32x4_t b) ++vqmovn_high_u32 (uint16x4_t __a, uint32x4_t __b) + { +- uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0))); ++ uint16x8_t __result = vcombine_u16 (__a, vcreate_u16 (__AARCH64_UINT64_C (0x0))); + __asm__ ("uqxtn2 %0.8h, %1.4s" +- : "+w"(result) +- : "w"(b) ++ : "+w"(__result) ++ : "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqmovn_high_u64 (uint32x2_t a, uint64x2_t b) ++vqmovn_high_u64 (uint32x2_t __a, uint64x2_t __b) + { +- uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0))); ++ uint32x4_t __result = vcombine_u32 (__a, vcreate_u32 (__AARCH64_UINT64_C (0x0))); + __asm__ ("uqxtn2 %0.4s, %1.2d" +- : "+w"(result) +- : "w"(b) ++ : "+w"(__result) ++ : "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqmovun_high_s16 (uint8x8_t a, int16x8_t b) ++vqmovun_high_s16 (uint8x8_t __a, int16x8_t __b) + { +- uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0))); ++ uint8x16_t __result = vcombine_u8 (__a, vcreate_u8 (__AARCH64_UINT64_C (0x0))); + __asm__ ("sqxtun2 %0.16b, %1.8h" +- : "+w"(result) +- : "w"(b) ++ : "+w"(__result) ++ : "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqmovun_high_s32 (uint16x4_t a, int32x4_t b) ++vqmovun_high_s32 (uint16x4_t __a, int32x4_t __b) + { +- uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0))); ++ uint16x8_t __result = vcombine_u16 (__a, vcreate_u16 (__AARCH64_UINT64_C (0x0))); + __asm__ ("sqxtun2 %0.8h, %1.4s" +- : "+w"(result) +- : "w"(b) ++ : "+w"(__result) ++ : "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqmovun_high_s64 (uint32x2_t a, int64x2_t b) ++vqmovun_high_s64 (uint32x2_t __a, int64x2_t __b) + { +- uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0))); ++ uint32x4_t __result = vcombine_u32 (__a, vcreate_u32 (__AARCH64_UINT64_C (0x0))); + __asm__ ("sqxtun2 %0.4s, %1.2d" +- : "+w"(result) +- : "w"(b) ++ : "+w"(__result) ++ : "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int16x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqrdmulh_n_s16 (int16x4_t a, int16_t b) ++vqrdmulh_n_s16 (int16x4_t __a, int16_t __b) + { +- int16x4_t result; ++ int16x4_t __result; + __asm__ ("sqrdmulh %0.4h,%1.4h,%2.h[0]" +- : "=w"(result) +- : "w"(a), "x"(b) ++ : "=w"(__result) ++ : "w"(__a), "x"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int32x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqrdmulh_n_s32 (int32x2_t a, int32_t b) ++vqrdmulh_n_s32 (int32x2_t __a, int32_t __b) + { +- int32x2_t result; ++ int32x2_t __result; + __asm__ ("sqrdmulh %0.2s,%1.2s,%2.s[0]" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqrdmulhq_n_s16 (int16x8_t a, int16_t b) ++vqrdmulhq_n_s16 (int16x8_t __a, int16_t __b) + { +- int16x8_t result; ++ int16x8_t __result; + __asm__ ("sqrdmulh %0.8h,%1.8h,%2.h[0]" +- : "=w"(result) +- : "w"(a), "x"(b) ++ : "=w"(__result) ++ : "w"(__a), "x"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqrdmulhq_n_s32 (int32x4_t a, int32_t b) ++vqrdmulhq_n_s32 (int32x4_t __a, int32_t __b) + { +- int32x4_t result; ++ int32x4_t __result; + __asm__ ("sqrdmulh %0.4s,%1.4s,%2.s[0]" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + #define vqrshrn_high_n_s16(a, b, c) \ +@@ -10544,26 +10577,26 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b) + + __extension__ extern __inline uint32x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vrsqrte_u32 (uint32x2_t a) ++vrsqrte_u32 (uint32x2_t __a) + { +- uint32x2_t result; ++ uint32x2_t __result; + __asm__ ("ursqrte %0.2s,%1.2s" +- : "=w"(result) +- : "w"(a) ++ : "=w"(__result) ++ : "w"(__a) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vrsqrteq_u32 (uint32x4_t a) ++vrsqrteq_u32 (uint32x4_t __a) + { +- uint32x4_t result; ++ uint32x4_t __result; + __asm__ ("ursqrte %0.4s,%1.4s" +- : "=w"(result) +- : "w"(a) ++ : "=w"(__result) ++ : "w"(__a) + : /* No clobbers */); +- return result; ++ return __result; + } + + #define vshrn_high_n_s16(a, b, c) \ +@@ -10860,48 +10893,48 @@ vrsqrteq_u32 (uint32x4_t a) + + __extension__ extern __inline uint8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vtst_p8 (poly8x8_t a, poly8x8_t b) ++vtst_p8 (poly8x8_t __a, poly8x8_t __b) + { +- return (uint8x8_t) ((((uint8x8_t) a) & ((uint8x8_t) b)) ++ return (uint8x8_t) ((((uint8x8_t) __a) & ((uint8x8_t) __b)) + != 0); + } + + __extension__ extern __inline uint16x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vtst_p16 (poly16x4_t a, poly16x4_t b) ++vtst_p16 (poly16x4_t __a, poly16x4_t __b) + { +- return (uint16x4_t) ((((uint16x4_t) a) & ((uint16x4_t) b)) ++ return (uint16x4_t) ((((uint16x4_t) __a) & ((uint16x4_t) __b)) + != 0); + } + + __extension__ extern __inline uint64x1_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vtst_p64 (poly64x1_t a, poly64x1_t b) ++vtst_p64 (poly64x1_t __a, poly64x1_t __b) + { +- return (uint64x1_t) ((a & b) != __AARCH64_INT64_C (0)); ++ return (uint64x1_t) ((__a & __b) != __AARCH64_INT64_C (0)); + } + + __extension__ extern __inline uint8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vtstq_p8 (poly8x16_t a, poly8x16_t b) ++vtstq_p8 (poly8x16_t __a, poly8x16_t __b) + { +- return (uint8x16_t) ((((uint8x16_t) a) & ((uint8x16_t) b)) ++ return (uint8x16_t) ((((uint8x16_t) __a) & ((uint8x16_t) __b)) + != 0); + } + + __extension__ extern __inline uint16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vtstq_p16 (poly16x8_t a, poly16x8_t b) ++vtstq_p16 (poly16x8_t __a, poly16x8_t __b) + { +- return (uint16x8_t) ((((uint16x8_t) a) & ((uint16x8_t) b)) ++ return (uint16x8_t) ((((uint16x8_t) __a) & ((uint16x8_t) __b)) + != 0); + } + + __extension__ extern __inline uint64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vtstq_p64 (poly64x2_t a, poly64x2_t b) ++vtstq_p64 (poly64x2_t __a, poly64x2_t __b) + { +- return (uint64x2_t) ((((uint64x2_t) a) & ((uint64x2_t) b)) ++ return (uint64x2_t) ((((uint64x2_t) __a) & ((uint64x2_t) __b)) + != __AARCH64_INT64_C (0)); + } + +@@ -11248,20 +11281,20 @@ __ST4_LANE_FUNC (uint64x2x4_t, uint64_t, v2di, di, u64) + + __extension__ extern __inline int64_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vaddlv_s32 (int32x2_t a) ++vaddlv_s32 (int32x2_t __a) + { +- int64_t result; +- __asm__ ("saddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : ); +- return result; ++ int64_t __result; ++ __asm__ ("saddlp %0.1d, %1.2s" : "=w"(__result) : "w"(__a) : ); ++ return __result; + } + + __extension__ extern __inline uint64_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vaddlv_u32 (uint32x2_t a) ++vaddlv_u32 (uint32x2_t __a) + { +- uint64_t result; +- __asm__ ("uaddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : ); +- return result; ++ uint64_t __result; ++ __asm__ ("uaddlp %0.1d, %1.2s" : "=w"(__result) : "w"(__a) : ); ++ return __result; + } + + __extension__ extern __inline int16x4_t +@@ -11324,367 +11357,367 @@ vqrdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c) + + __extension__ extern __inline poly8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqtbl1_p8 (poly8x16_t a, uint8x8_t b) ++vqtbl1_p8 (poly8x16_t __a, uint8x8_t __b) + { +- poly8x8_t result; ++ poly8x8_t __result; + __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqtbl1_s8 (int8x16_t a, uint8x8_t b) ++vqtbl1_s8 (int8x16_t __a, uint8x8_t __b) + { +- int8x8_t result; ++ int8x8_t __result; + __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqtbl1_u8 (uint8x16_t a, uint8x8_t b) ++vqtbl1_u8 (uint8x16_t __a, uint8x8_t __b) + { +- uint8x8_t result; ++ uint8x8_t __result; + __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline poly8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqtbl1q_p8 (poly8x16_t a, uint8x16_t b) ++vqtbl1q_p8 (poly8x16_t __a, uint8x16_t __b) + { +- poly8x16_t result; ++ poly8x16_t __result; + __asm__ ("tbl %0.16b, {%1.16b}, %2.16b" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqtbl1q_s8 (int8x16_t a, uint8x16_t b) ++vqtbl1q_s8 (int8x16_t __a, uint8x16_t __b) + { +- int8x16_t result; ++ int8x16_t __result; + __asm__ ("tbl %0.16b, {%1.16b}, %2.16b" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqtbl1q_u8 (uint8x16_t a, uint8x16_t b) ++vqtbl1q_u8 (uint8x16_t __a, uint8x16_t __b) + { +- uint8x16_t result; ++ uint8x16_t __result; + __asm__ ("tbl %0.16b, {%1.16b}, %2.16b" +- : "=w"(result) +- : "w"(a), "w"(b) ++ : "=w"(__result) ++ : "w"(__a), "w"(__b) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqtbx1_s8 (int8x8_t r, int8x16_t tab, uint8x8_t idx) ++vqtbx1_s8 (int8x8_t __r, int8x16_t __tab, uint8x8_t __idx) + { +- int8x8_t result = r; ++ int8x8_t __result = __r; + __asm__ ("tbx %0.8b,{%1.16b},%2.8b" +- : "+w"(result) +- : "w"(tab), "w"(idx) ++ : "+w"(__result) ++ : "w"(__tab), "w"(__idx) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqtbx1_u8 (uint8x8_t r, uint8x16_t tab, uint8x8_t idx) ++vqtbx1_u8 (uint8x8_t __r, uint8x16_t __tab, uint8x8_t __idx) + { +- uint8x8_t result = r; ++ uint8x8_t __result = __r; + __asm__ ("tbx %0.8b,{%1.16b},%2.8b" +- : "+w"(result) +- : "w"(tab), "w"(idx) ++ : "+w"(__result) ++ : "w"(__tab), "w"(__idx) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline poly8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqtbx1_p8 (poly8x8_t r, poly8x16_t tab, uint8x8_t idx) ++vqtbx1_p8 (poly8x8_t __r, poly8x16_t __tab, uint8x8_t __idx) + { +- poly8x8_t result = r; ++ poly8x8_t __result = __r; + __asm__ ("tbx %0.8b,{%1.16b},%2.8b" +- : "+w"(result) +- : "w"(tab), "w"(idx) ++ : "+w"(__result) ++ : "w"(__tab), "w"(__idx) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqtbx1q_s8 (int8x16_t r, int8x16_t tab, uint8x16_t idx) ++vqtbx1q_s8 (int8x16_t __r, int8x16_t __tab, uint8x16_t __idx) + { +- int8x16_t result = r; ++ int8x16_t __result = __r; + __asm__ ("tbx %0.16b,{%1.16b},%2.16b" +- : "+w"(result) +- : "w"(tab), "w"(idx) ++ : "+w"(__result) ++ : "w"(__tab), "w"(__idx) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqtbx1q_u8 (uint8x16_t r, uint8x16_t tab, uint8x16_t idx) ++vqtbx1q_u8 (uint8x16_t __r, uint8x16_t __tab, uint8x16_t __idx) + { +- uint8x16_t result = r; ++ uint8x16_t __result = __r; + __asm__ ("tbx %0.16b,{%1.16b},%2.16b" +- : "+w"(result) +- : "w"(tab), "w"(idx) ++ : "+w"(__result) ++ : "w"(__tab), "w"(__idx) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline poly8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqtbx1q_p8 (poly8x16_t r, poly8x16_t tab, uint8x16_t idx) ++vqtbx1q_p8 (poly8x16_t __r, poly8x16_t __tab, uint8x16_t __idx) + { +- poly8x16_t result = r; ++ poly8x16_t __result = __r; + __asm__ ("tbx %0.16b,{%1.16b},%2.16b" +- : "+w"(result) +- : "w"(tab), "w"(idx) ++ : "+w"(__result) ++ : "w"(__tab), "w"(__idx) + : /* No clobbers */); +- return result; ++ return __result; + } + + /* V7 legacy table intrinsics. */ + + __extension__ extern __inline int8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vtbl1_s8 (int8x8_t tab, int8x8_t idx) ++vtbl1_s8 (int8x8_t __tab, int8x8_t __idx) + { +- int8x8_t result; +- int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (__AARCH64_UINT64_C (0x0))); ++ int8x8_t __result; ++ int8x16_t __temp = vcombine_s8 (__tab, vcreate_s8 (__AARCH64_UINT64_C (0x0))); + __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" +- : "=w"(result) +- : "w"(temp), "w"(idx) ++ : "=w"(__result) ++ : "w"(__temp), "w"(__idx) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vtbl1_u8 (uint8x8_t tab, uint8x8_t idx) ++vtbl1_u8 (uint8x8_t __tab, uint8x8_t __idx) + { +- uint8x8_t result; +- uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (__AARCH64_UINT64_C (0x0))); ++ uint8x8_t __result; ++ uint8x16_t __temp = vcombine_u8 (__tab, vcreate_u8 (__AARCH64_UINT64_C (0x0))); + __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" +- : "=w"(result) +- : "w"(temp), "w"(idx) ++ : "=w"(__result) ++ : "w"(__temp), "w"(__idx) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline poly8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vtbl1_p8 (poly8x8_t tab, uint8x8_t idx) ++vtbl1_p8 (poly8x8_t __tab, uint8x8_t __idx) + { +- poly8x8_t result; +- poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (__AARCH64_UINT64_C (0x0))); ++ poly8x8_t __result; ++ poly8x16_t __temp = vcombine_p8 (__tab, vcreate_p8 (__AARCH64_UINT64_C (0x0))); + __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" +- : "=w"(result) +- : "w"(temp), "w"(idx) ++ : "=w"(__result) ++ : "w"(__temp), "w"(__idx) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vtbl2_s8 (int8x8x2_t tab, int8x8_t idx) ++vtbl2_s8 (int8x8x2_t __tab, int8x8_t __idx) + { +- int8x8_t result; +- int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]); ++ int8x8_t __result; ++ int8x16_t __temp = vcombine_s8 (__tab.val[0], __tab.val[1]); + __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" +- : "=w"(result) +- : "w"(temp), "w"(idx) ++ : "=w"(__result) ++ : "w"(__temp), "w"(__idx) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vtbl2_u8 (uint8x8x2_t tab, uint8x8_t idx) ++vtbl2_u8 (uint8x8x2_t __tab, uint8x8_t __idx) + { +- uint8x8_t result; +- uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]); ++ uint8x8_t __result; ++ uint8x16_t __temp = vcombine_u8 (__tab.val[0], __tab.val[1]); + __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" +- : "=w"(result) +- : "w"(temp), "w"(idx) ++ : "=w"(__result) ++ : "w"(__temp), "w"(__idx) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline poly8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vtbl2_p8 (poly8x8x2_t tab, uint8x8_t idx) ++vtbl2_p8 (poly8x8x2_t __tab, uint8x8_t __idx) + { +- poly8x8_t result; +- poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]); ++ poly8x8_t __result; ++ poly8x16_t __temp = vcombine_p8 (__tab.val[0], __tab.val[1]); + __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" +- : "=w"(result) +- : "w"(temp), "w"(idx) ++ : "=w"(__result) ++ : "w"(__temp), "w"(__idx) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline int8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vtbl3_s8 (int8x8x3_t tab, int8x8_t idx) ++vtbl3_s8 (int8x8x3_t __tab, int8x8_t __idx) + { +- int8x8_t result; +- int8x16x2_t temp; ++ int8x8_t __result; ++ int8x16x2_t __temp; + __builtin_aarch64_simd_oi __o; +- temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]); +- temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (__AARCH64_UINT64_C (0x0))); ++ __temp.val[0] = vcombine_s8 (__tab.val[0], __tab.val[1]); ++ __temp.val[1] = vcombine_s8 (__tab.val[2], vcreate_s8 (__AARCH64_UINT64_C (0x0))); + __o = __builtin_aarch64_set_qregoiv16qi (__o, +- (int8x16_t) temp.val[0], 0); ++ (int8x16_t) __temp.val[0], 0); + __o = __builtin_aarch64_set_qregoiv16qi (__o, +- (int8x16_t) temp.val[1], 1); +- result = __builtin_aarch64_tbl3v8qi (__o, idx); +- return result; ++ (int8x16_t) __temp.val[1], 1); ++ __result = __builtin_aarch64_tbl3v8qi (__o, __idx); ++ return __result; + } + + __extension__ extern __inline uint8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vtbl3_u8 (uint8x8x3_t tab, uint8x8_t idx) ++vtbl3_u8 (uint8x8x3_t __tab, uint8x8_t __idx) + { +- uint8x8_t result; +- uint8x16x2_t temp; ++ uint8x8_t __result; ++ uint8x16x2_t __temp; + __builtin_aarch64_simd_oi __o; +- temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]); +- temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (__AARCH64_UINT64_C (0x0))); ++ __temp.val[0] = vcombine_u8 (__tab.val[0], __tab.val[1]); ++ __temp.val[1] = vcombine_u8 (__tab.val[2], vcreate_u8 (__AARCH64_UINT64_C (0x0))); + __o = __builtin_aarch64_set_qregoiv16qi (__o, +- (int8x16_t) temp.val[0], 0); ++ (int8x16_t) __temp.val[0], 0); + __o = __builtin_aarch64_set_qregoiv16qi (__o, +- (int8x16_t) temp.val[1], 1); +- result = (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx); +- return result; ++ (int8x16_t) __temp.val[1], 1); ++ __result = (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx); ++ return __result; + } + + __extension__ extern __inline poly8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vtbl3_p8 (poly8x8x3_t tab, uint8x8_t idx) ++vtbl3_p8 (poly8x8x3_t __tab, uint8x8_t __idx) + { +- poly8x8_t result; +- poly8x16x2_t temp; ++ poly8x8_t __result; ++ poly8x16x2_t __temp; + __builtin_aarch64_simd_oi __o; +- temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]); +- temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (__AARCH64_UINT64_C (0x0))); ++ __temp.val[0] = vcombine_p8 (__tab.val[0], __tab.val[1]); ++ __temp.val[1] = vcombine_p8 (__tab.val[2], vcreate_p8 (__AARCH64_UINT64_C (0x0))); + __o = __builtin_aarch64_set_qregoiv16qi (__o, +- (int8x16_t) temp.val[0], 0); ++ (int8x16_t) __temp.val[0], 0); + __o = __builtin_aarch64_set_qregoiv16qi (__o, +- (int8x16_t) temp.val[1], 1); +- result = (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx); +- return result; ++ (int8x16_t) __temp.val[1], 1); ++ __result = (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx); ++ return __result; + } + + __extension__ extern __inline int8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vtbl4_s8 (int8x8x4_t tab, int8x8_t idx) ++vtbl4_s8 (int8x8x4_t __tab, int8x8_t __idx) + { +- int8x8_t result; +- int8x16x2_t temp; ++ int8x8_t __result; ++ int8x16x2_t __temp; + __builtin_aarch64_simd_oi __o; +- temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]); +- temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]); ++ __temp.val[0] = vcombine_s8 (__tab.val[0], __tab.val[1]); ++ __temp.val[1] = vcombine_s8 (__tab.val[2], __tab.val[3]); + __o = __builtin_aarch64_set_qregoiv16qi (__o, +- (int8x16_t) temp.val[0], 0); ++ (int8x16_t) __temp.val[0], 0); + __o = __builtin_aarch64_set_qregoiv16qi (__o, +- (int8x16_t) temp.val[1], 1); +- result = __builtin_aarch64_tbl3v8qi (__o, idx); +- return result; ++ (int8x16_t) __temp.val[1], 1); ++ __result = __builtin_aarch64_tbl3v8qi (__o, __idx); ++ return __result; + } + + __extension__ extern __inline uint8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vtbl4_u8 (uint8x8x4_t tab, uint8x8_t idx) ++vtbl4_u8 (uint8x8x4_t __tab, uint8x8_t __idx) + { +- uint8x8_t result; +- uint8x16x2_t temp; ++ uint8x8_t __result; ++ uint8x16x2_t __temp; + __builtin_aarch64_simd_oi __o; +- temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]); +- temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]); ++ __temp.val[0] = vcombine_u8 (__tab.val[0], __tab.val[1]); ++ __temp.val[1] = vcombine_u8 (__tab.val[2], __tab.val[3]); + __o = __builtin_aarch64_set_qregoiv16qi (__o, +- (int8x16_t) temp.val[0], 0); ++ (int8x16_t) __temp.val[0], 0); + __o = __builtin_aarch64_set_qregoiv16qi (__o, +- (int8x16_t) temp.val[1], 1); +- result = (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx); +- return result; ++ (int8x16_t) __temp.val[1], 1); ++ __result = (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx); ++ return __result; + } + + __extension__ extern __inline poly8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vtbl4_p8 (poly8x8x4_t tab, uint8x8_t idx) ++vtbl4_p8 (poly8x8x4_t __tab, uint8x8_t __idx) + { +- poly8x8_t result; +- poly8x16x2_t temp; ++ poly8x8_t __result; ++ poly8x16x2_t __temp; + __builtin_aarch64_simd_oi __o; +- temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]); +- temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]); ++ __temp.val[0] = vcombine_p8 (__tab.val[0], __tab.val[1]); ++ __temp.val[1] = vcombine_p8 (__tab.val[2], __tab.val[3]); + __o = __builtin_aarch64_set_qregoiv16qi (__o, +- (int8x16_t) temp.val[0], 0); ++ (int8x16_t) __temp.val[0], 0); + __o = __builtin_aarch64_set_qregoiv16qi (__o, +- (int8x16_t) temp.val[1], 1); +- result = (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx); +- return result; ++ (int8x16_t) __temp.val[1], 1); ++ __result = (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx); ++ return __result; + } + + __extension__ extern __inline int8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vtbx2_s8 (int8x8_t r, int8x8x2_t tab, int8x8_t idx) ++vtbx2_s8 (int8x8_t __r, int8x8x2_t __tab, int8x8_t __idx) + { +- int8x8_t result = r; +- int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]); ++ int8x8_t __result = __r; ++ int8x16_t __temp = vcombine_s8 (__tab.val[0], __tab.val[1]); + __asm__ ("tbx %0.8b, {%1.16b}, %2.8b" +- : "+w"(result) +- : "w"(temp), "w"(idx) ++ : "+w"(__result) ++ : "w"(__temp), "w"(__idx) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline uint8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vtbx2_u8 (uint8x8_t r, uint8x8x2_t tab, uint8x8_t idx) ++vtbx2_u8 (uint8x8_t __r, uint8x8x2_t __tab, uint8x8_t __idx) + { +- uint8x8_t result = r; +- uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]); ++ uint8x8_t __result = __r; ++ uint8x16_t __temp = vcombine_u8 (__tab.val[0], __tab.val[1]); + __asm__ ("tbx %0.8b, {%1.16b}, %2.8b" +- : "+w"(result) +- : "w"(temp), "w"(idx) ++ : "+w"(__result) ++ : "w"(__temp), "w"(__idx) + : /* No clobbers */); +- return result; ++ return __result; + } + + __extension__ extern __inline poly8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vtbx2_p8 (poly8x8_t r, poly8x8x2_t tab, uint8x8_t idx) ++vtbx2_p8 (poly8x8_t __r, poly8x8x2_t __tab, uint8x8_t __idx) + { +- poly8x8_t result = r; +- poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]); ++ poly8x8_t __result = __r; ++ poly8x16_t __temp = vcombine_p8 (__tab.val[0], __tab.val[1]); + __asm__ ("tbx %0.8b, {%1.16b}, %2.8b" +- : "+w"(result) +- : "w"(temp), "w"(idx) ++ : "+w"(__result) ++ : "w"(__temp), "w"(__idx) + : /* No clobbers */); +- return result; ++ return __result; + } + + /* End of temporary inline asm. */ +@@ -17063,98 +17096,98 @@ vld1_f16 (const float16_t *__a) + + __extension__ extern __inline float32x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vld1_f32 (const float32_t *a) ++vld1_f32 (const float32_t *__a) + { +- return __builtin_aarch64_ld1v2sf ((const __builtin_aarch64_simd_sf *) a); ++ return __builtin_aarch64_ld1v2sf ((const __builtin_aarch64_simd_sf *) __a); + } + + __extension__ extern __inline float64x1_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vld1_f64 (const float64_t *a) ++vld1_f64 (const float64_t *__a) + { +- return (float64x1_t) {*a}; ++ return (float64x1_t) {*__a}; + } + + __extension__ extern __inline poly8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vld1_p8 (const poly8_t *a) ++vld1_p8 (const poly8_t *__a) + { + return (poly8x8_t) +- __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a); ++ __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) __a); + } + + __extension__ extern __inline poly16x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vld1_p16 (const poly16_t *a) ++vld1_p16 (const poly16_t *__a) + { + return (poly16x4_t) +- __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a); ++ __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) __a); + } + + __extension__ extern __inline poly64x1_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vld1_p64 (const poly64_t *a) ++vld1_p64 (const poly64_t *__a) + { +- return (poly64x1_t) {*a}; ++ return (poly64x1_t) {*__a}; + } + + __extension__ extern __inline int8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vld1_s8 (const int8_t *a) ++vld1_s8 (const int8_t *__a) + { +- return __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a); ++ return __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) __a); + } + + __extension__ extern __inline int16x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vld1_s16 (const int16_t *a) ++vld1_s16 (const int16_t *__a) + { +- return __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a); ++ return __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) __a); + } + + __extension__ extern __inline int32x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vld1_s32 (const int32_t *a) ++vld1_s32 (const int32_t *__a) + { +- return __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a); ++ return __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) __a); + } + + __extension__ extern __inline int64x1_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vld1_s64 (const int64_t *a) ++vld1_s64 (const int64_t *__a) + { +- return (int64x1_t) {*a}; ++ return (int64x1_t) {*__a}; + } + + __extension__ extern __inline uint8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vld1_u8 (const uint8_t *a) ++vld1_u8 (const uint8_t *__a) + { + return (uint8x8_t) +- __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a); ++ __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) __a); + } + + __extension__ extern __inline uint16x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vld1_u16 (const uint16_t *a) ++vld1_u16 (const uint16_t *__a) + { + return (uint16x4_t) +- __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a); ++ __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) __a); + } + + __extension__ extern __inline uint32x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vld1_u32 (const uint32_t *a) ++vld1_u32 (const uint32_t *__a) + { + return (uint32x2_t) +- __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a); ++ __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) __a); + } + + __extension__ extern __inline uint64x1_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vld1_u64 (const uint64_t *a) ++vld1_u64 (const uint64_t *__a) + { +- return (uint64x1_t) {*a}; ++ return (uint64x1_t) {*__a}; + } + + /* vld1x3 */ +@@ -17536,76 +17569,76 @@ vld1q_f16 (const float16_t *__a) + + __extension__ extern __inline float32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vld1q_f32 (const float32_t *a) ++vld1q_f32 (const float32_t *__a) + { +- return __builtin_aarch64_ld1v4sf ((const __builtin_aarch64_simd_sf *) a); ++ return __builtin_aarch64_ld1v4sf ((const __builtin_aarch64_simd_sf *) __a); + } + + __extension__ extern __inline float64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vld1q_f64 (const float64_t *a) ++vld1q_f64 (const float64_t *__a) + { +- return __builtin_aarch64_ld1v2df ((const __builtin_aarch64_simd_df *) a); ++ return __builtin_aarch64_ld1v2df ((const __builtin_aarch64_simd_df *) __a); + } + + __extension__ extern __inline poly8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vld1q_p8 (const poly8_t *a) ++vld1q_p8 (const poly8_t *__a) + { + return (poly8x16_t) +- __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a); ++ __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) __a); + } + + __extension__ extern __inline poly16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vld1q_p16 (const poly16_t *a) ++vld1q_p16 (const poly16_t *__a) + { + return (poly16x8_t) +- __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a); ++ __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) __a); + } + + __extension__ extern __inline poly64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vld1q_p64 (const poly64_t *a) ++vld1q_p64 (const poly64_t *__a) + { + return (poly64x2_t) +- __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a); ++ __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) __a); + } + + __extension__ extern __inline int8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vld1q_s8 (const int8_t *a) ++vld1q_s8 (const int8_t *__a) + { +- return __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a); ++ return __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) __a); + } + + __extension__ extern __inline int16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vld1q_s16 (const int16_t *a) ++vld1q_s16 (const int16_t *__a) + { +- return __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a); ++ return __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) __a); + } + + __extension__ extern __inline int32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vld1q_s32 (const int32_t *a) ++vld1q_s32 (const int32_t *__a) + { +- return __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a); ++ return __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) __a); + } + + __extension__ extern __inline int64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vld1q_s64 (const int64_t *a) ++vld1q_s64 (const int64_t *__a) + { +- return __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a); ++ return __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) __a); + } + + __extension__ extern __inline uint8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vld1q_u8 (const uint8_t *a) ++vld1q_u8 (const uint8_t *__a) + { + return (uint8x16_t) +- __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a); ++ __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) __a); + } + + __extension__ extern __inline uint8x8x2_t +@@ -17946,26 +17979,308 @@ vld1q_p64_x2 (const poly64_t *__a) + + __extension__ extern __inline uint16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vld1q_u16 (const uint16_t *a) ++vld1q_u16 (const uint16_t *__a) + { + return (uint16x8_t) +- __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a); ++ __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) __a); + } + + __extension__ extern __inline uint32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vld1q_u32 (const uint32_t *a) ++vld1q_u32 (const uint32_t *__a) + { + return (uint32x4_t) +- __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a); ++ __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) __a); + } + + __extension__ extern __inline uint64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vld1q_u64 (const uint64_t *a) ++vld1q_u64 (const uint64_t *__a) + { + return (uint64x2_t) +- __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a); ++ __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) __a); ++} ++ ++/* vld1(q)_x4. */ ++ ++__extension__ extern __inline int8x8x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vld1_s8_x4 (const int8_t *__a) ++{ ++ union { int8x8x4_t __i; __builtin_aarch64_simd_xi __o; } __au; ++ __au.__o ++ = __builtin_aarch64_ld1x4v8qi ((const __builtin_aarch64_simd_qi *) __a); ++ return __au.__i; ++} ++ ++__extension__ extern __inline int8x16x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vld1q_s8_x4 (const int8_t *__a) ++{ ++ union { int8x16x4_t __i; __builtin_aarch64_simd_xi __o; } __au; ++ __au.__o ++ = __builtin_aarch64_ld1x4v16qi ((const __builtin_aarch64_simd_qi *) __a); ++ return __au.__i; ++} ++ ++__extension__ extern __inline int16x4x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vld1_s16_x4 (const int16_t *__a) ++{ ++ union { int16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __au; ++ __au.__o ++ = __builtin_aarch64_ld1x4v4hi ((const __builtin_aarch64_simd_hi *) __a); ++ return __au.__i; ++} ++ ++__extension__ extern __inline int16x8x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vld1q_s16_x4 (const int16_t *__a) ++{ ++ union { int16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __au; ++ __au.__o ++ = __builtin_aarch64_ld1x4v8hi ((const __builtin_aarch64_simd_hi *) __a); ++ return __au.__i; ++} ++ ++__extension__ extern __inline int32x2x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vld1_s32_x4 (const int32_t *__a) ++{ ++ union { int32x2x4_t __i; __builtin_aarch64_simd_xi __o; } __au; ++ __au.__o ++ = __builtin_aarch64_ld1x4v2si ((const __builtin_aarch64_simd_si *) __a); ++ return __au.__i; ++} ++ ++__extension__ extern __inline int32x4x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vld1q_s32_x4 (const int32_t *__a) ++{ ++ union { int32x4x4_t __i; __builtin_aarch64_simd_xi __o; } __au; ++ __au.__o ++ = __builtin_aarch64_ld1x4v4si ((const __builtin_aarch64_simd_si *) __a); ++ return __au.__i; ++} ++ ++__extension__ extern __inline uint8x8x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vld1_u8_x4 (const uint8_t *__a) ++{ ++ union { uint8x8x4_t __i; __builtin_aarch64_simd_xi __o; } __au; ++ __au.__o ++ = __builtin_aarch64_ld1x4v8qi ((const __builtin_aarch64_simd_qi *) __a); ++ return __au.__i; ++} ++ ++__extension__ extern __inline uint8x16x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vld1q_u8_x4 (const uint8_t *__a) ++{ ++ union { uint8x16x4_t __i; __builtin_aarch64_simd_xi __o; } __au; ++ __au.__o ++ = __builtin_aarch64_ld1x4v16qi ((const __builtin_aarch64_simd_qi *) __a); ++ return __au.__i; ++} ++ ++__extension__ extern __inline uint16x4x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vld1_u16_x4 (const uint16_t *__a) ++{ ++ union { uint16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __au; ++ __au.__o ++ = __builtin_aarch64_ld1x4v4hi ((const __builtin_aarch64_simd_hi *) __a); ++ return __au.__i; ++} ++ ++__extension__ extern __inline uint16x8x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vld1q_u16_x4 (const uint16_t *__a) ++{ ++ union { uint16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __au; ++ __au.__o ++ = __builtin_aarch64_ld1x4v8hi ((const __builtin_aarch64_simd_hi *) __a); ++ return __au.__i; ++} ++ ++__extension__ extern __inline uint32x2x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vld1_u32_x4 (const uint32_t *__a) ++{ ++ union { uint32x2x4_t __i; __builtin_aarch64_simd_xi __o; } __au; ++ __au.__o ++ = __builtin_aarch64_ld1x4v2si ((const __builtin_aarch64_simd_si *) __a); ++ return __au.__i; ++} ++ ++__extension__ extern __inline uint32x4x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vld1q_u32_x4 (const uint32_t *__a) ++{ ++ union { uint32x4x4_t __i; __builtin_aarch64_simd_xi __o; } __au; ++ __au.__o ++ = __builtin_aarch64_ld1x4v4si ((const __builtin_aarch64_simd_si *) __a); ++ return __au.__i; ++} ++ ++__extension__ extern __inline float16x4x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vld1_f16_x4 (const float16_t *__a) ++{ ++ union { float16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __au; ++ __au.__o ++ = __builtin_aarch64_ld1x4v4hf ((const __builtin_aarch64_simd_hf *) __a); ++ return __au.__i; ++} ++ ++__extension__ extern __inline float16x8x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vld1q_f16_x4 (const float16_t *__a) ++{ ++ union { float16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __au; ++ __au.__o ++ = __builtin_aarch64_ld1x4v8hf ((const __builtin_aarch64_simd_hf *) __a); ++ return __au.__i; ++} ++ ++__extension__ extern __inline float32x2x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vld1_f32_x4 (const float32_t *__a) ++{ ++ union { float32x2x4_t __i; __builtin_aarch64_simd_xi __o; } __au; ++ __au.__o ++ = __builtin_aarch64_ld1x4v2sf ((const __builtin_aarch64_simd_sf *) __a); ++ return __au.__i; ++} ++ ++__extension__ extern __inline float32x4x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vld1q_f32_x4 (const float32_t *__a) ++{ ++ union { float32x4x4_t __i; __builtin_aarch64_simd_xi __o; } __au; ++ __au.__o ++ = __builtin_aarch64_ld1x4v4sf ((const __builtin_aarch64_simd_sf *) __a); ++ return __au.__i; ++} ++ ++__extension__ extern __inline poly8x8x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vld1_p8_x4 (const poly8_t *__a) ++{ ++ union { poly8x8x4_t __i; __builtin_aarch64_simd_xi __o; } __au; ++ __au.__o ++ = __builtin_aarch64_ld1x4v8qi ((const __builtin_aarch64_simd_qi *) __a); ++ return __au.__i; ++} ++ ++__extension__ extern __inline poly8x16x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vld1q_p8_x4 (const poly8_t *__a) ++{ ++ union { poly8x16x4_t __i; __builtin_aarch64_simd_xi __o; } __au; ++ __au.__o ++ = __builtin_aarch64_ld1x4v16qi ((const __builtin_aarch64_simd_qi *) __a); ++ return __au.__i; ++} ++ ++__extension__ extern __inline poly16x4x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vld1_p16_x4 (const poly16_t *__a) ++{ ++ union { poly16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __au; ++ __au.__o ++ = __builtin_aarch64_ld1x4v4hi ((const __builtin_aarch64_simd_hi *) __a); ++ return __au.__i; ++} ++ ++__extension__ extern __inline poly16x8x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vld1q_p16_x4 (const poly16_t *__a) ++{ ++ union { poly16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __au; ++ __au.__o ++ = __builtin_aarch64_ld1x4v8hi ((const __builtin_aarch64_simd_hi *) __a); ++ return __au.__i; ++} ++ ++__extension__ extern __inline int64x1x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vld1_s64_x4 (const int64_t *__a) ++{ ++ union { int64x1x4_t __i; __builtin_aarch64_simd_xi __o; } __au; ++ __au.__o ++ = __builtin_aarch64_ld1x4di ((const __builtin_aarch64_simd_di *) __a); ++ return __au.__i; ++} ++ ++__extension__ extern __inline uint64x1x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vld1_u64_x4 (const uint64_t *__a) ++{ ++ union { uint64x1x4_t __i; __builtin_aarch64_simd_xi __o; } __au; ++ __au.__o ++ = __builtin_aarch64_ld1x4di ((const __builtin_aarch64_simd_di *) __a); ++ return __au.__i; ++} ++ ++__extension__ extern __inline poly64x1x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vld1_p64_x4 (const poly64_t *__a) ++{ ++ union { poly64x1x4_t __i; __builtin_aarch64_simd_xi __o; } __au; ++ __au.__o ++ = __builtin_aarch64_ld1x4di ((const __builtin_aarch64_simd_di *) __a); ++ return __au.__i; ++} ++ ++__extension__ extern __inline int64x2x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vld1q_s64_x4 (const int64_t *__a) ++{ ++ union { int64x2x4_t __i; __builtin_aarch64_simd_xi __o; } __au; ++ __au.__o ++ = __builtin_aarch64_ld1x4v2di ((const __builtin_aarch64_simd_di *) __a); ++ return __au.__i; ++} ++ ++__extension__ extern __inline uint64x2x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vld1q_u64_x4 (const uint64_t *__a) ++{ ++ union { uint64x2x4_t __i; __builtin_aarch64_simd_xi __o; } __au; ++ __au.__o ++ = __builtin_aarch64_ld1x4v2di ((const __builtin_aarch64_simd_di *) __a); ++ return __au.__i; ++} ++ ++__extension__ extern __inline poly64x2x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vld1q_p64_x4 (const poly64_t *__a) ++{ ++ union { poly64x2x4_t __i; __builtin_aarch64_simd_xi __o; } __au; ++ __au.__o ++ = __builtin_aarch64_ld1x4v2di ((const __builtin_aarch64_simd_di *) __a); ++ return __au.__i; ++} ++ ++__extension__ extern __inline float64x1x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vld1_f64_x4 (const float64_t *__a) ++{ ++ union { float64x1x4_t __i; __builtin_aarch64_simd_xi __o; } __au; ++ __au.__o ++ = __builtin_aarch64_ld1x4df ((const __builtin_aarch64_simd_df *) __a); ++ return __au.__i; ++} ++ ++__extension__ extern __inline float64x2x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vld1q_f64_x4 (const float64_t *__a) ++{ ++ union { float64x2x4_t __i; __builtin_aarch64_simd_xi __o; } __au; ++ __au.__o ++ = __builtin_aarch64_ld1x4v2df ((const __builtin_aarch64_simd_df *) __a); ++ return __au.__i; + } + + /* vld1_dup */ +@@ -21115,328 +21430,328 @@ vmulxd_laneq_f64 (float64_t __a, float64x2_t __v, const int __lane) + + __extension__ extern __inline int8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpmax_s8 (int8x8_t a, int8x8_t b) ++vpmax_s8 (int8x8_t __a, int8x8_t __b) + { +- return __builtin_aarch64_smaxpv8qi (a, b); ++ return __builtin_aarch64_smaxpv8qi (__a, __b); + } + + __extension__ extern __inline int16x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpmax_s16 (int16x4_t a, int16x4_t b) ++vpmax_s16 (int16x4_t __a, int16x4_t __b) + { +- return __builtin_aarch64_smaxpv4hi (a, b); ++ return __builtin_aarch64_smaxpv4hi (__a, __b); + } + + __extension__ extern __inline int32x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpmax_s32 (int32x2_t a, int32x2_t b) ++vpmax_s32 (int32x2_t __a, int32x2_t __b) + { +- return __builtin_aarch64_smaxpv2si (a, b); ++ return __builtin_aarch64_smaxpv2si (__a, __b); + } + + __extension__ extern __inline uint8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpmax_u8 (uint8x8_t a, uint8x8_t b) ++vpmax_u8 (uint8x8_t __a, uint8x8_t __b) + { +- return (uint8x8_t) __builtin_aarch64_umaxpv8qi ((int8x8_t) a, +- (int8x8_t) b); ++ return (uint8x8_t) __builtin_aarch64_umaxpv8qi ((int8x8_t) __a, ++ (int8x8_t) __b); + } + + __extension__ extern __inline uint16x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpmax_u16 (uint16x4_t a, uint16x4_t b) ++vpmax_u16 (uint16x4_t __a, uint16x4_t __b) + { +- return (uint16x4_t) __builtin_aarch64_umaxpv4hi ((int16x4_t) a, +- (int16x4_t) b); ++ return (uint16x4_t) __builtin_aarch64_umaxpv4hi ((int16x4_t) __a, ++ (int16x4_t) __b); + } + + __extension__ extern __inline uint32x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpmax_u32 (uint32x2_t a, uint32x2_t b) ++vpmax_u32 (uint32x2_t __a, uint32x2_t __b) + { +- return (uint32x2_t) __builtin_aarch64_umaxpv2si ((int32x2_t) a, +- (int32x2_t) b); ++ return (uint32x2_t) __builtin_aarch64_umaxpv2si ((int32x2_t) __a, ++ (int32x2_t) __b); + } + + __extension__ extern __inline int8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpmaxq_s8 (int8x16_t a, int8x16_t b) ++vpmaxq_s8 (int8x16_t __a, int8x16_t __b) + { +- return __builtin_aarch64_smaxpv16qi (a, b); ++ return __builtin_aarch64_smaxpv16qi (__a, __b); + } + + __extension__ extern __inline int16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpmaxq_s16 (int16x8_t a, int16x8_t b) ++vpmaxq_s16 (int16x8_t __a, int16x8_t __b) + { +- return __builtin_aarch64_smaxpv8hi (a, b); ++ return __builtin_aarch64_smaxpv8hi (__a, __b); + } + + __extension__ extern __inline int32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpmaxq_s32 (int32x4_t a, int32x4_t b) ++vpmaxq_s32 (int32x4_t __a, int32x4_t __b) + { +- return __builtin_aarch64_smaxpv4si (a, b); ++ return __builtin_aarch64_smaxpv4si (__a, __b); + } + + __extension__ extern __inline uint8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpmaxq_u8 (uint8x16_t a, uint8x16_t b) ++vpmaxq_u8 (uint8x16_t __a, uint8x16_t __b) + { +- return (uint8x16_t) __builtin_aarch64_umaxpv16qi ((int8x16_t) a, +- (int8x16_t) b); ++ return (uint8x16_t) __builtin_aarch64_umaxpv16qi ((int8x16_t) __a, ++ (int8x16_t) __b); + } + + __extension__ extern __inline uint16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpmaxq_u16 (uint16x8_t a, uint16x8_t b) ++vpmaxq_u16 (uint16x8_t __a, uint16x8_t __b) + { +- return (uint16x8_t) __builtin_aarch64_umaxpv8hi ((int16x8_t) a, +- (int16x8_t) b); ++ return (uint16x8_t) __builtin_aarch64_umaxpv8hi ((int16x8_t) __a, ++ (int16x8_t) __b); + } + + __extension__ extern __inline uint32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpmaxq_u32 (uint32x4_t a, uint32x4_t b) ++vpmaxq_u32 (uint32x4_t __a, uint32x4_t __b) + { +- return (uint32x4_t) __builtin_aarch64_umaxpv4si ((int32x4_t) a, +- (int32x4_t) b); ++ return (uint32x4_t) __builtin_aarch64_umaxpv4si ((int32x4_t) __a, ++ (int32x4_t) __b); + } + + __extension__ extern __inline float32x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpmax_f32 (float32x2_t a, float32x2_t b) ++vpmax_f32 (float32x2_t __a, float32x2_t __b) + { +- return __builtin_aarch64_smax_nanpv2sf (a, b); ++ return __builtin_aarch64_smax_nanpv2sf (__a, __b); + } + + __extension__ extern __inline float32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpmaxq_f32 (float32x4_t a, float32x4_t b) ++vpmaxq_f32 (float32x4_t __a, float32x4_t __b) + { +- return __builtin_aarch64_smax_nanpv4sf (a, b); ++ return __builtin_aarch64_smax_nanpv4sf (__a, __b); + } + + __extension__ extern __inline float64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpmaxq_f64 (float64x2_t a, float64x2_t b) ++vpmaxq_f64 (float64x2_t __a, float64x2_t __b) + { +- return __builtin_aarch64_smax_nanpv2df (a, b); ++ return __builtin_aarch64_smax_nanpv2df (__a, __b); + } + + __extension__ extern __inline float64_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpmaxqd_f64 (float64x2_t a) ++vpmaxqd_f64 (float64x2_t __a) + { +- return __builtin_aarch64_reduc_smax_nan_scal_v2df (a); ++ return __builtin_aarch64_reduc_smax_nan_scal_v2df (__a); + } + + __extension__ extern __inline float32_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpmaxs_f32 (float32x2_t a) ++vpmaxs_f32 (float32x2_t __a) + { +- return __builtin_aarch64_reduc_smax_nan_scal_v2sf (a); ++ return __builtin_aarch64_reduc_smax_nan_scal_v2sf (__a); + } + + /* vpmaxnm */ + + __extension__ extern __inline float32x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpmaxnm_f32 (float32x2_t a, float32x2_t b) ++vpmaxnm_f32 (float32x2_t __a, float32x2_t __b) + { +- return __builtin_aarch64_smaxpv2sf (a, b); ++ return __builtin_aarch64_smaxpv2sf (__a, __b); + } + + __extension__ extern __inline float32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpmaxnmq_f32 (float32x4_t a, float32x4_t b) ++vpmaxnmq_f32 (float32x4_t __a, float32x4_t __b) + { +- return __builtin_aarch64_smaxpv4sf (a, b); ++ return __builtin_aarch64_smaxpv4sf (__a, __b); + } + + __extension__ extern __inline float64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpmaxnmq_f64 (float64x2_t a, float64x2_t b) ++vpmaxnmq_f64 (float64x2_t __a, float64x2_t __b) + { +- return __builtin_aarch64_smaxpv2df (a, b); ++ return __builtin_aarch64_smaxpv2df (__a, __b); + } + + __extension__ extern __inline float64_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpmaxnmqd_f64 (float64x2_t a) ++vpmaxnmqd_f64 (float64x2_t __a) + { +- return __builtin_aarch64_reduc_smax_scal_v2df (a); ++ return __builtin_aarch64_reduc_smax_scal_v2df (__a); + } + + __extension__ extern __inline float32_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpmaxnms_f32 (float32x2_t a) ++vpmaxnms_f32 (float32x2_t __a) + { +- return __builtin_aarch64_reduc_smax_scal_v2sf (a); ++ return __builtin_aarch64_reduc_smax_scal_v2sf (__a); + } + + /* vpmin */ + + __extension__ extern __inline int8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpmin_s8 (int8x8_t a, int8x8_t b) ++vpmin_s8 (int8x8_t __a, int8x8_t __b) + { +- return __builtin_aarch64_sminpv8qi (a, b); ++ return __builtin_aarch64_sminpv8qi (__a, __b); + } + + __extension__ extern __inline int16x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpmin_s16 (int16x4_t a, int16x4_t b) ++vpmin_s16 (int16x4_t __a, int16x4_t __b) + { +- return __builtin_aarch64_sminpv4hi (a, b); ++ return __builtin_aarch64_sminpv4hi (__a, __b); + } + + __extension__ extern __inline int32x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpmin_s32 (int32x2_t a, int32x2_t b) ++vpmin_s32 (int32x2_t __a, int32x2_t __b) + { +- return __builtin_aarch64_sminpv2si (a, b); ++ return __builtin_aarch64_sminpv2si (__a, __b); + } + + __extension__ extern __inline uint8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpmin_u8 (uint8x8_t a, uint8x8_t b) ++vpmin_u8 (uint8x8_t __a, uint8x8_t __b) + { +- return (uint8x8_t) __builtin_aarch64_uminpv8qi ((int8x8_t) a, +- (int8x8_t) b); ++ return (uint8x8_t) __builtin_aarch64_uminpv8qi ((int8x8_t) __a, ++ (int8x8_t) __b); + } + + __extension__ extern __inline uint16x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpmin_u16 (uint16x4_t a, uint16x4_t b) ++vpmin_u16 (uint16x4_t __a, uint16x4_t __b) + { +- return (uint16x4_t) __builtin_aarch64_uminpv4hi ((int16x4_t) a, +- (int16x4_t) b); ++ return (uint16x4_t) __builtin_aarch64_uminpv4hi ((int16x4_t) __a, ++ (int16x4_t) __b); + } + + __extension__ extern __inline uint32x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpmin_u32 (uint32x2_t a, uint32x2_t b) ++vpmin_u32 (uint32x2_t __a, uint32x2_t __b) + { +- return (uint32x2_t) __builtin_aarch64_uminpv2si ((int32x2_t) a, +- (int32x2_t) b); ++ return (uint32x2_t) __builtin_aarch64_uminpv2si ((int32x2_t) __a, ++ (int32x2_t) __b); + } + + __extension__ extern __inline int8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpminq_s8 (int8x16_t a, int8x16_t b) ++vpminq_s8 (int8x16_t __a, int8x16_t __b) + { +- return __builtin_aarch64_sminpv16qi (a, b); ++ return __builtin_aarch64_sminpv16qi (__a, __b); + } + + __extension__ extern __inline int16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpminq_s16 (int16x8_t a, int16x8_t b) ++vpminq_s16 (int16x8_t __a, int16x8_t __b) + { +- return __builtin_aarch64_sminpv8hi (a, b); ++ return __builtin_aarch64_sminpv8hi (__a, __b); + } + + __extension__ extern __inline int32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpminq_s32 (int32x4_t a, int32x4_t b) ++vpminq_s32 (int32x4_t __a, int32x4_t __b) + { +- return __builtin_aarch64_sminpv4si (a, b); ++ return __builtin_aarch64_sminpv4si (__a, __b); + } + + __extension__ extern __inline uint8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpminq_u8 (uint8x16_t a, uint8x16_t b) ++vpminq_u8 (uint8x16_t __a, uint8x16_t __b) + { +- return (uint8x16_t) __builtin_aarch64_uminpv16qi ((int8x16_t) a, +- (int8x16_t) b); ++ return (uint8x16_t) __builtin_aarch64_uminpv16qi ((int8x16_t) __a, ++ (int8x16_t) __b); + } + + __extension__ extern __inline uint16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpminq_u16 (uint16x8_t a, uint16x8_t b) ++vpminq_u16 (uint16x8_t __a, uint16x8_t __b) + { +- return (uint16x8_t) __builtin_aarch64_uminpv8hi ((int16x8_t) a, +- (int16x8_t) b); ++ return (uint16x8_t) __builtin_aarch64_uminpv8hi ((int16x8_t) __a, ++ (int16x8_t) __b); + } + + __extension__ extern __inline uint32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpminq_u32 (uint32x4_t a, uint32x4_t b) ++vpminq_u32 (uint32x4_t __a, uint32x4_t __b) + { +- return (uint32x4_t) __builtin_aarch64_uminpv4si ((int32x4_t) a, +- (int32x4_t) b); ++ return (uint32x4_t) __builtin_aarch64_uminpv4si ((int32x4_t) __a, ++ (int32x4_t) __b); + } + + __extension__ extern __inline float32x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpmin_f32 (float32x2_t a, float32x2_t b) ++vpmin_f32 (float32x2_t __a, float32x2_t __b) + { +- return __builtin_aarch64_smin_nanpv2sf (a, b); ++ return __builtin_aarch64_smin_nanpv2sf (__a, __b); + } + + __extension__ extern __inline float32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpminq_f32 (float32x4_t a, float32x4_t b) ++vpminq_f32 (float32x4_t __a, float32x4_t __b) + { +- return __builtin_aarch64_smin_nanpv4sf (a, b); ++ return __builtin_aarch64_smin_nanpv4sf (__a, __b); + } + + __extension__ extern __inline float64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpminq_f64 (float64x2_t a, float64x2_t b) ++vpminq_f64 (float64x2_t __a, float64x2_t __b) + { +- return __builtin_aarch64_smin_nanpv2df (a, b); ++ return __builtin_aarch64_smin_nanpv2df (__a, __b); + } + + __extension__ extern __inline float64_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpminqd_f64 (float64x2_t a) ++vpminqd_f64 (float64x2_t __a) + { +- return __builtin_aarch64_reduc_smin_nan_scal_v2df (a); ++ return __builtin_aarch64_reduc_smin_nan_scal_v2df (__a); + } + + __extension__ extern __inline float32_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpmins_f32 (float32x2_t a) ++vpmins_f32 (float32x2_t __a) + { +- return __builtin_aarch64_reduc_smin_nan_scal_v2sf (a); ++ return __builtin_aarch64_reduc_smin_nan_scal_v2sf (__a); + } + + /* vpminnm */ + + __extension__ extern __inline float32x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpminnm_f32 (float32x2_t a, float32x2_t b) ++vpminnm_f32 (float32x2_t __a, float32x2_t __b) + { +- return __builtin_aarch64_sminpv2sf (a, b); ++ return __builtin_aarch64_sminpv2sf (__a, __b); + } + + __extension__ extern __inline float32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpminnmq_f32 (float32x4_t a, float32x4_t b) ++vpminnmq_f32 (float32x4_t __a, float32x4_t __b) + { +- return __builtin_aarch64_sminpv4sf (a, b); ++ return __builtin_aarch64_sminpv4sf (__a, __b); + } + + __extension__ extern __inline float64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpminnmq_f64 (float64x2_t a, float64x2_t b) ++vpminnmq_f64 (float64x2_t __a, float64x2_t __b) + { +- return __builtin_aarch64_sminpv2df (a, b); ++ return __builtin_aarch64_sminpv2df (__a, __b); + } + + __extension__ extern __inline float64_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpminnmqd_f64 (float64x2_t a) ++vpminnmqd_f64 (float64x2_t __a) + { +- return __builtin_aarch64_reduc_smin_scal_v2df (a); ++ return __builtin_aarch64_reduc_smin_scal_v2df (__a); + } + + __extension__ extern __inline float32_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpminnms_f32 (float32x2_t a) ++vpminnms_f32 (float32x2_t __a) + { +- return __builtin_aarch64_reduc_smin_scal_v2sf (a); ++ return __builtin_aarch64_reduc_smin_scal_v2sf (__a); + } + + /* vmaxnm */ +@@ -21889,9 +22204,9 @@ vminnmvq_f64 (float64x2_t __a) + + __extension__ extern __inline float32x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmla_f32 (float32x2_t a, float32x2_t b, float32x2_t c) ++vmla_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c) + { +- return a + b * c; ++ return __a + __b * __c; + } + + __extension__ extern __inline float64x1_t +@@ -21903,16 +22218,16 @@ vmla_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c) + + __extension__ extern __inline float32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlaq_f32 (float32x4_t a, float32x4_t b, float32x4_t c) ++vmlaq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c) + { +- return a + b * c; ++ return __a + __b * __c; + } + + __extension__ extern __inline float64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlaq_f64 (float64x2_t a, float64x2_t b, float64x2_t c) ++vmlaq_f64 (float64x2_t __a, float64x2_t __b, float64x2_t __c) + { +- return a + b * c; ++ return __a + __b * __c; + } + + /* vmla_lane */ +@@ -22087,9 +22402,9 @@ vmlaq_laneq_u32 (uint32x4_t __a, uint32x4_t __b, + + __extension__ extern __inline float32x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmls_f32 (float32x2_t a, float32x2_t b, float32x2_t c) ++vmls_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c) + { +- return a - b * c; ++ return __a - __b * __c; + } + + __extension__ extern __inline float64x1_t +@@ -22101,16 +22416,16 @@ vmls_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c) + + __extension__ extern __inline float32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlsq_f32 (float32x4_t a, float32x4_t b, float32x4_t c) ++vmlsq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c) + { +- return a - b * c; ++ return __a - __b * __c; + } + + __extension__ extern __inline float64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmlsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c) ++vmlsq_f64 (float64x2_t __a, float64x2_t __b, float64x2_t __c) + { +- return a - b * c; ++ return __a - __b * __c; + } + + /* vmls_lane */ +@@ -24874,419 +25189,419 @@ vqsubd_u64 (uint64_t __a, uint64_t __b) + + __extension__ extern __inline int8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqtbl2_s8 (int8x16x2_t tab, uint8x8_t idx) ++vqtbl2_s8 (int8x16x2_t __tab, uint8x8_t __idx) + { + __builtin_aarch64_simd_oi __o; +- __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[1], 1); +- return __builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx); ++ __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[1], 1); ++ return __builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx); + } + + __extension__ extern __inline uint8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqtbl2_u8 (uint8x16x2_t tab, uint8x8_t idx) ++vqtbl2_u8 (uint8x16x2_t __tab, uint8x8_t __idx) + { + __builtin_aarch64_simd_oi __o; +- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1); +- return (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx); ++ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); ++ return (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx); + } + + __extension__ extern __inline poly8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqtbl2_p8 (poly8x16x2_t tab, uint8x8_t idx) ++vqtbl2_p8 (poly8x16x2_t __tab, uint8x8_t __idx) + { + __builtin_aarch64_simd_oi __o; +- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1); +- return (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx); ++ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); ++ return (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx); + } + + __extension__ extern __inline int8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqtbl2q_s8 (int8x16x2_t tab, uint8x16_t idx) ++vqtbl2q_s8 (int8x16x2_t __tab, uint8x16_t __idx) + { + __builtin_aarch64_simd_oi __o; +- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1); +- return __builtin_aarch64_tbl3v16qi (__o, (int8x16_t)idx); ++ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); ++ return __builtin_aarch64_tbl3v16qi (__o, (int8x16_t)__idx); + } + + __extension__ extern __inline uint8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqtbl2q_u8 (uint8x16x2_t tab, uint8x16_t idx) ++vqtbl2q_u8 (uint8x16x2_t __tab, uint8x16_t __idx) + { + __builtin_aarch64_simd_oi __o; +- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1); +- return (uint8x16_t)__builtin_aarch64_tbl3v16qi (__o, (int8x16_t)idx); ++ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); ++ return (uint8x16_t)__builtin_aarch64_tbl3v16qi (__o, (int8x16_t)__idx); + } + + __extension__ extern __inline poly8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqtbl2q_p8 (poly8x16x2_t tab, uint8x16_t idx) ++vqtbl2q_p8 (poly8x16x2_t __tab, uint8x16_t __idx) + { + __builtin_aarch64_simd_oi __o; +- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1); +- return (poly8x16_t)__builtin_aarch64_tbl3v16qi (__o, (int8x16_t)idx); ++ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); ++ return (poly8x16_t)__builtin_aarch64_tbl3v16qi (__o, (int8x16_t)__idx); + } + + /* vqtbl3 */ + + __extension__ extern __inline int8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqtbl3_s8 (int8x16x3_t tab, uint8x8_t idx) ++vqtbl3_s8 (int8x16x3_t __tab, uint8x8_t __idx) + { + __builtin_aarch64_simd_ci __o; +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0); +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1); +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2); +- return __builtin_aarch64_qtbl3v8qi (__o, (int8x8_t)idx); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2); ++ return __builtin_aarch64_qtbl3v8qi (__o, (int8x8_t)__idx); + } + + __extension__ extern __inline uint8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqtbl3_u8 (uint8x16x3_t tab, uint8x8_t idx) ++vqtbl3_u8 (uint8x16x3_t __tab, uint8x8_t __idx) + { + __builtin_aarch64_simd_ci __o; +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0); +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1); +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2); +- return (uint8x8_t)__builtin_aarch64_qtbl3v8qi (__o, (int8x8_t)idx); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2); ++ return (uint8x8_t)__builtin_aarch64_qtbl3v8qi (__o, (int8x8_t)__idx); + } + + __extension__ extern __inline poly8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqtbl3_p8 (poly8x16x3_t tab, uint8x8_t idx) ++vqtbl3_p8 (poly8x16x3_t __tab, uint8x8_t __idx) + { + __builtin_aarch64_simd_ci __o; +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0); +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1); +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2); +- return (poly8x8_t)__builtin_aarch64_qtbl3v8qi (__o, (int8x8_t)idx); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2); ++ return (poly8x8_t)__builtin_aarch64_qtbl3v8qi (__o, (int8x8_t)__idx); + } + + __extension__ extern __inline int8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqtbl3q_s8 (int8x16x3_t tab, uint8x16_t idx) ++vqtbl3q_s8 (int8x16x3_t __tab, uint8x16_t __idx) + { + __builtin_aarch64_simd_ci __o; +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0); +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1); +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2); +- return __builtin_aarch64_qtbl3v16qi (__o, (int8x16_t)idx); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2); ++ return __builtin_aarch64_qtbl3v16qi (__o, (int8x16_t)__idx); + } + + __extension__ extern __inline uint8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqtbl3q_u8 (uint8x16x3_t tab, uint8x16_t idx) ++vqtbl3q_u8 (uint8x16x3_t __tab, uint8x16_t __idx) + { + __builtin_aarch64_simd_ci __o; +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0); +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1); +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2); +- return (uint8x16_t)__builtin_aarch64_qtbl3v16qi (__o, (int8x16_t)idx); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2); ++ return (uint8x16_t)__builtin_aarch64_qtbl3v16qi (__o, (int8x16_t)__idx); + } + + __extension__ extern __inline poly8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqtbl3q_p8 (poly8x16x3_t tab, uint8x16_t idx) ++vqtbl3q_p8 (poly8x16x3_t __tab, uint8x16_t __idx) + { + __builtin_aarch64_simd_ci __o; +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0); +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1); +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2); +- return (poly8x16_t)__builtin_aarch64_qtbl3v16qi (__o, (int8x16_t)idx); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2); ++ return (poly8x16_t)__builtin_aarch64_qtbl3v16qi (__o, (int8x16_t)__idx); + } + + /* vqtbl4 */ + + __extension__ extern __inline int8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqtbl4_s8 (int8x16x4_t tab, uint8x8_t idx) ++vqtbl4_s8 (int8x16x4_t __tab, uint8x8_t __idx) + { + __builtin_aarch64_simd_xi __o; +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3); +- return __builtin_aarch64_qtbl4v8qi (__o, (int8x8_t)idx); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3); ++ return __builtin_aarch64_qtbl4v8qi (__o, (int8x8_t)__idx); + } + + __extension__ extern __inline uint8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqtbl4_u8 (uint8x16x4_t tab, uint8x8_t idx) ++vqtbl4_u8 (uint8x16x4_t __tab, uint8x8_t __idx) + { + __builtin_aarch64_simd_xi __o; +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3); +- return (uint8x8_t)__builtin_aarch64_qtbl4v8qi (__o, (int8x8_t)idx); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3); ++ return (uint8x8_t)__builtin_aarch64_qtbl4v8qi (__o, (int8x8_t)__idx); + } + + __extension__ extern __inline poly8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqtbl4_p8 (poly8x16x4_t tab, uint8x8_t idx) ++vqtbl4_p8 (poly8x16x4_t __tab, uint8x8_t __idx) + { + __builtin_aarch64_simd_xi __o; +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3); +- return (poly8x8_t)__builtin_aarch64_qtbl4v8qi (__o, (int8x8_t)idx); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3); ++ return (poly8x8_t)__builtin_aarch64_qtbl4v8qi (__o, (int8x8_t)__idx); + } + + __extension__ extern __inline int8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqtbl4q_s8 (int8x16x4_t tab, uint8x16_t idx) ++vqtbl4q_s8 (int8x16x4_t __tab, uint8x16_t __idx) + { + __builtin_aarch64_simd_xi __o; +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3); +- return __builtin_aarch64_qtbl4v16qi (__o, (int8x16_t)idx); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3); ++ return __builtin_aarch64_qtbl4v16qi (__o, (int8x16_t)__idx); + } + + __extension__ extern __inline uint8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqtbl4q_u8 (uint8x16x4_t tab, uint8x16_t idx) ++vqtbl4q_u8 (uint8x16x4_t __tab, uint8x16_t __idx) + { + __builtin_aarch64_simd_xi __o; +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3); +- return (uint8x16_t)__builtin_aarch64_qtbl4v16qi (__o, (int8x16_t)idx); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3); ++ return (uint8x16_t)__builtin_aarch64_qtbl4v16qi (__o, (int8x16_t)__idx); + } + + __extension__ extern __inline poly8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqtbl4q_p8 (poly8x16x4_t tab, uint8x16_t idx) ++vqtbl4q_p8 (poly8x16x4_t __tab, uint8x16_t __idx) + { + __builtin_aarch64_simd_xi __o; +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3); +- return (poly8x16_t)__builtin_aarch64_qtbl4v16qi (__o, (int8x16_t)idx); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3); ++ return (poly8x16_t)__builtin_aarch64_qtbl4v16qi (__o, (int8x16_t)__idx); + } + + + /* vqtbx2 */ + __extension__ extern __inline int8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqtbx2_s8 (int8x8_t r, int8x16x2_t tab, uint8x8_t idx) ++vqtbx2_s8 (int8x8_t __r, int8x16x2_t __tab, uint8x8_t __idx) + { + __builtin_aarch64_simd_oi __o; +- __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[1], 1); +- return __builtin_aarch64_tbx4v8qi (r, __o, (int8x8_t)idx); ++ __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[1], 1); ++ return __builtin_aarch64_tbx4v8qi (__r, __o, (int8x8_t)__idx); + } + + __extension__ extern __inline uint8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqtbx2_u8 (uint8x8_t r, uint8x16x2_t tab, uint8x8_t idx) ++vqtbx2_u8 (uint8x8_t __r, uint8x16x2_t __tab, uint8x8_t __idx) + { + __builtin_aarch64_simd_oi __o; +- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1); +- return (uint8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)r, __o, +- (int8x8_t)idx); ++ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); ++ return (uint8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)__r, __o, ++ (int8x8_t)__idx); + } + + __extension__ extern __inline poly8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqtbx2_p8 (poly8x8_t r, poly8x16x2_t tab, uint8x8_t idx) ++vqtbx2_p8 (poly8x8_t __r, poly8x16x2_t __tab, uint8x8_t __idx) + { + __builtin_aarch64_simd_oi __o; +- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1); +- return (poly8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)r, __o, +- (int8x8_t)idx); ++ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); ++ return (poly8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)__r, __o, ++ (int8x8_t)__idx); + } + + __extension__ extern __inline int8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqtbx2q_s8 (int8x16_t r, int8x16x2_t tab, uint8x16_t idx) ++vqtbx2q_s8 (int8x16_t __r, int8x16x2_t __tab, uint8x16_t __idx) + { + __builtin_aarch64_simd_oi __o; +- __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[1], 1); +- return __builtin_aarch64_tbx4v16qi (r, __o, (int8x16_t)idx); ++ __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[1], 1); ++ return __builtin_aarch64_tbx4v16qi (__r, __o, (int8x16_t)__idx); + } + + __extension__ extern __inline uint8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqtbx2q_u8 (uint8x16_t r, uint8x16x2_t tab, uint8x16_t idx) ++vqtbx2q_u8 (uint8x16_t __r, uint8x16x2_t __tab, uint8x16_t __idx) + { + __builtin_aarch64_simd_oi __o; +- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1); +- return (uint8x16_t)__builtin_aarch64_tbx4v16qi ((int8x16_t)r, __o, +- (int8x16_t)idx); ++ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); ++ return (uint8x16_t)__builtin_aarch64_tbx4v16qi ((int8x16_t)__r, __o, ++ (int8x16_t)__idx); + } + + __extension__ extern __inline poly8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqtbx2q_p8 (poly8x16_t r, poly8x16x2_t tab, uint8x16_t idx) ++vqtbx2q_p8 (poly8x16_t __r, poly8x16x2_t __tab, uint8x16_t __idx) + { + __builtin_aarch64_simd_oi __o; +- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1); +- return (poly8x16_t)__builtin_aarch64_tbx4v16qi ((int8x16_t)r, __o, +- (int8x16_t)idx); ++ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); ++ return (poly8x16_t)__builtin_aarch64_tbx4v16qi ((int8x16_t)__r, __o, ++ (int8x16_t)__idx); + } + + /* vqtbx3 */ + __extension__ extern __inline int8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqtbx3_s8 (int8x8_t r, int8x16x3_t tab, uint8x8_t idx) ++vqtbx3_s8 (int8x8_t __r, int8x16x3_t __tab, uint8x8_t __idx) + { + __builtin_aarch64_simd_ci __o; +- __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[0], 0); +- __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[1], 1); +- __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[2], 2); +- return __builtin_aarch64_qtbx3v8qi (r, __o, (int8x8_t)idx); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, __tab.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, __tab.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, __tab.val[2], 2); ++ return __builtin_aarch64_qtbx3v8qi (__r, __o, (int8x8_t)__idx); + } + + __extension__ extern __inline uint8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqtbx3_u8 (uint8x8_t r, uint8x16x3_t tab, uint8x8_t idx) ++vqtbx3_u8 (uint8x8_t __r, uint8x16x3_t __tab, uint8x8_t __idx) + { + __builtin_aarch64_simd_ci __o; +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0); +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1); +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2); +- return (uint8x8_t)__builtin_aarch64_qtbx3v8qi ((int8x8_t)r, __o, +- (int8x8_t)idx); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2); ++ return (uint8x8_t)__builtin_aarch64_qtbx3v8qi ((int8x8_t)__r, __o, ++ (int8x8_t)__idx); + } + + __extension__ extern __inline poly8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqtbx3_p8 (poly8x8_t r, poly8x16x3_t tab, uint8x8_t idx) ++vqtbx3_p8 (poly8x8_t __r, poly8x16x3_t __tab, uint8x8_t __idx) + { + __builtin_aarch64_simd_ci __o; +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0); +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1); +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2); +- return (poly8x8_t)__builtin_aarch64_qtbx3v8qi ((int8x8_t)r, __o, +- (int8x8_t)idx); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2); ++ return (poly8x8_t)__builtin_aarch64_qtbx3v8qi ((int8x8_t)__r, __o, ++ (int8x8_t)__idx); + } + + __extension__ extern __inline int8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqtbx3q_s8 (int8x16_t r, int8x16x3_t tab, uint8x16_t idx) ++vqtbx3q_s8 (int8x16_t __r, int8x16x3_t __tab, uint8x16_t __idx) + { + __builtin_aarch64_simd_ci __o; +- __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[0], 0); +- __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[1], 1); +- __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[2], 2); +- return __builtin_aarch64_qtbx3v16qi (r, __o, (int8x16_t)idx); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, __tab.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, __tab.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, __tab.val[2], 2); ++ return __builtin_aarch64_qtbx3v16qi (__r, __o, (int8x16_t)__idx); + } + + __extension__ extern __inline uint8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqtbx3q_u8 (uint8x16_t r, uint8x16x3_t tab, uint8x16_t idx) ++vqtbx3q_u8 (uint8x16_t __r, uint8x16x3_t __tab, uint8x16_t __idx) + { + __builtin_aarch64_simd_ci __o; +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0); +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1); +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2); +- return (uint8x16_t)__builtin_aarch64_qtbx3v16qi ((int8x16_t)r, __o, +- (int8x16_t)idx); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2); ++ return (uint8x16_t)__builtin_aarch64_qtbx3v16qi ((int8x16_t)__r, __o, ++ (int8x16_t)__idx); + } + + __extension__ extern __inline poly8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqtbx3q_p8 (poly8x16_t r, poly8x16x3_t tab, uint8x16_t idx) ++vqtbx3q_p8 (poly8x16_t __r, poly8x16x3_t __tab, uint8x16_t __idx) + { + __builtin_aarch64_simd_ci __o; +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0); +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1); +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2); +- return (poly8x16_t)__builtin_aarch64_qtbx3v16qi ((int8x16_t)r, __o, +- (int8x16_t)idx); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2); ++ return (poly8x16_t)__builtin_aarch64_qtbx3v16qi ((int8x16_t)__r, __o, ++ (int8x16_t)__idx); + } + + /* vqtbx4 */ + + __extension__ extern __inline int8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqtbx4_s8 (int8x8_t r, int8x16x4_t tab, uint8x8_t idx) ++vqtbx4_s8 (int8x8_t __r, int8x16x4_t __tab, uint8x8_t __idx) + { + __builtin_aarch64_simd_xi __o; +- __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[0], 0); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[1], 1); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[2], 2); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[3], 3); +- return __builtin_aarch64_qtbx4v8qi (r, __o, (int8x8_t)idx); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, __tab.val[0], 0); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, __tab.val[1], 1); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, __tab.val[2], 2); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, __tab.val[3], 3); ++ return __builtin_aarch64_qtbx4v8qi (__r, __o, (int8x8_t)__idx); + } + + __extension__ extern __inline uint8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqtbx4_u8 (uint8x8_t r, uint8x16x4_t tab, uint8x8_t idx) ++vqtbx4_u8 (uint8x8_t __r, uint8x16x4_t __tab, uint8x8_t __idx) + { + __builtin_aarch64_simd_xi __o; +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3); +- return (uint8x8_t)__builtin_aarch64_qtbx4v8qi ((int8x8_t)r, __o, +- (int8x8_t)idx); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3); ++ return (uint8x8_t)__builtin_aarch64_qtbx4v8qi ((int8x8_t)__r, __o, ++ (int8x8_t)__idx); + } + + __extension__ extern __inline poly8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqtbx4_p8 (poly8x8_t r, poly8x16x4_t tab, uint8x8_t idx) ++vqtbx4_p8 (poly8x8_t __r, poly8x16x4_t __tab, uint8x8_t __idx) + { + __builtin_aarch64_simd_xi __o; +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3); +- return (poly8x8_t)__builtin_aarch64_qtbx4v8qi ((int8x8_t)r, __o, +- (int8x8_t)idx); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3); ++ return (poly8x8_t)__builtin_aarch64_qtbx4v8qi ((int8x8_t)__r, __o, ++ (int8x8_t)__idx); + } + + __extension__ extern __inline int8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqtbx4q_s8 (int8x16_t r, int8x16x4_t tab, uint8x16_t idx) ++vqtbx4q_s8 (int8x16_t __r, int8x16x4_t __tab, uint8x16_t __idx) + { + __builtin_aarch64_simd_xi __o; +- __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[0], 0); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[1], 1); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[2], 2); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[3], 3); +- return __builtin_aarch64_qtbx4v16qi (r, __o, (int8x16_t)idx); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, __tab.val[0], 0); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, __tab.val[1], 1); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, __tab.val[2], 2); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, __tab.val[3], 3); ++ return __builtin_aarch64_qtbx4v16qi (__r, __o, (int8x16_t)__idx); + } + + __extension__ extern __inline uint8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqtbx4q_u8 (uint8x16_t r, uint8x16x4_t tab, uint8x16_t idx) ++vqtbx4q_u8 (uint8x16_t __r, uint8x16x4_t __tab, uint8x16_t __idx) + { + __builtin_aarch64_simd_xi __o; +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3); +- return (uint8x16_t)__builtin_aarch64_qtbx4v16qi ((int8x16_t)r, __o, +- (int8x16_t)idx); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3); ++ return (uint8x16_t)__builtin_aarch64_qtbx4v16qi ((int8x16_t)__r, __o, ++ (int8x16_t)__idx); + } + + __extension__ extern __inline poly8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vqtbx4q_p8 (poly8x16_t r, poly8x16x4_t tab, uint8x16_t idx) ++vqtbx4q_p8 (poly8x16_t __r, poly8x16x4_t __tab, uint8x16_t __idx) + { + __builtin_aarch64_simd_xi __o; +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3); +- return (poly8x16_t)__builtin_aarch64_qtbx4v16qi ((int8x16_t)r, __o, +- (int8x16_t)idx); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3); ++ return (poly8x16_t)__builtin_aarch64_qtbx4v16qi ((int8x16_t)__r, __o, ++ (int8x16_t)__idx); + } + + /* vrbit */ +@@ -25457,134 +25772,134 @@ vrecpxd_f64 (float64_t __a) + + __extension__ extern __inline poly8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vrev16_p8 (poly8x8_t a) ++vrev16_p8 (poly8x8_t __a) + { +- return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); ++ return __builtin_shuffle (__a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); + } + + __extension__ extern __inline int8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vrev16_s8 (int8x8_t a) ++vrev16_s8 (int8x8_t __a) + { +- return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); ++ return __builtin_shuffle (__a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); + } + + __extension__ extern __inline uint8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vrev16_u8 (uint8x8_t a) ++vrev16_u8 (uint8x8_t __a) + { +- return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); ++ return __builtin_shuffle (__a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); + } + + __extension__ extern __inline poly8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vrev16q_p8 (poly8x16_t a) ++vrev16q_p8 (poly8x16_t __a) + { +- return __builtin_shuffle (a, ++ return __builtin_shuffle (__a, + (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 }); + } + + __extension__ extern __inline int8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vrev16q_s8 (int8x16_t a) ++vrev16q_s8 (int8x16_t __a) + { +- return __builtin_shuffle (a, ++ return __builtin_shuffle (__a, + (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 }); + } + + __extension__ extern __inline uint8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vrev16q_u8 (uint8x16_t a) ++vrev16q_u8 (uint8x16_t __a) + { +- return __builtin_shuffle (a, ++ return __builtin_shuffle (__a, + (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 }); + } + + __extension__ extern __inline poly8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vrev32_p8 (poly8x8_t a) ++vrev32_p8 (poly8x8_t __a) + { +- return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); ++ return __builtin_shuffle (__a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); + } + + __extension__ extern __inline poly16x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vrev32_p16 (poly16x4_t a) ++vrev32_p16 (poly16x4_t __a) + { +- return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 }); ++ return __builtin_shuffle (__a, (uint16x4_t) { 1, 0, 3, 2 }); + } + + __extension__ extern __inline int8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vrev32_s8 (int8x8_t a) ++vrev32_s8 (int8x8_t __a) + { +- return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); ++ return __builtin_shuffle (__a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); + } + + __extension__ extern __inline int16x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vrev32_s16 (int16x4_t a) ++vrev32_s16 (int16x4_t __a) + { +- return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 }); ++ return __builtin_shuffle (__a, (uint16x4_t) { 1, 0, 3, 2 }); + } + + __extension__ extern __inline uint8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vrev32_u8 (uint8x8_t a) ++vrev32_u8 (uint8x8_t __a) + { +- return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); ++ return __builtin_shuffle (__a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); + } + + __extension__ extern __inline uint16x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vrev32_u16 (uint16x4_t a) ++vrev32_u16 (uint16x4_t __a) + { +- return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 }); ++ return __builtin_shuffle (__a, (uint16x4_t) { 1, 0, 3, 2 }); + } + + __extension__ extern __inline poly8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vrev32q_p8 (poly8x16_t a) ++vrev32q_p8 (poly8x16_t __a) + { +- return __builtin_shuffle (a, ++ return __builtin_shuffle (__a, + (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }); + } + + __extension__ extern __inline poly16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vrev32q_p16 (poly16x8_t a) ++vrev32q_p16 (poly16x8_t __a) + { +- return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); ++ return __builtin_shuffle (__a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); + } + + __extension__ extern __inline int8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vrev32q_s8 (int8x16_t a) ++vrev32q_s8 (int8x16_t __a) + { +- return __builtin_shuffle (a, ++ return __builtin_shuffle (__a, + (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }); + } + + __extension__ extern __inline int16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vrev32q_s16 (int16x8_t a) ++vrev32q_s16 (int16x8_t __a) + { +- return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); ++ return __builtin_shuffle (__a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); + } + + __extension__ extern __inline uint8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vrev32q_u8 (uint8x16_t a) ++vrev32q_u8 (uint8x16_t __a) + { +- return __builtin_shuffle (a, ++ return __builtin_shuffle (__a, + (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }); + } + + __extension__ extern __inline uint16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vrev32q_u16 (uint16x8_t a) ++vrev32q_u16 (uint16x8_t __a) + { +- return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); ++ return __builtin_shuffle (__a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); + } + + __extension__ extern __inline float16x4_t +@@ -25596,65 +25911,65 @@ vrev64_f16 (float16x4_t __a) + + __extension__ extern __inline float32x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vrev64_f32 (float32x2_t a) ++vrev64_f32 (float32x2_t __a) + { +- return __builtin_shuffle (a, (uint32x2_t) { 1, 0 }); ++ return __builtin_shuffle (__a, (uint32x2_t) { 1, 0 }); + } + + __extension__ extern __inline poly8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vrev64_p8 (poly8x8_t a) ++vrev64_p8 (poly8x8_t __a) + { +- return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 }); ++ return __builtin_shuffle (__a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 }); + } + + __extension__ extern __inline poly16x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vrev64_p16 (poly16x4_t a) ++vrev64_p16 (poly16x4_t __a) + { +- return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 }); ++ return __builtin_shuffle (__a, (uint16x4_t) { 3, 2, 1, 0 }); + } + + __extension__ extern __inline int8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vrev64_s8 (int8x8_t a) ++vrev64_s8 (int8x8_t __a) + { +- return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 }); ++ return __builtin_shuffle (__a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 }); + } + + __extension__ extern __inline int16x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vrev64_s16 (int16x4_t a) ++vrev64_s16 (int16x4_t __a) + { +- return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 }); ++ return __builtin_shuffle (__a, (uint16x4_t) { 3, 2, 1, 0 }); + } + + __extension__ extern __inline int32x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vrev64_s32 (int32x2_t a) ++vrev64_s32 (int32x2_t __a) + { +- return __builtin_shuffle (a, (uint32x2_t) { 1, 0 }); ++ return __builtin_shuffle (__a, (uint32x2_t) { 1, 0 }); + } + + __extension__ extern __inline uint8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vrev64_u8 (uint8x8_t a) ++vrev64_u8 (uint8x8_t __a) + { +- return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 }); ++ return __builtin_shuffle (__a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 }); + } + + __extension__ extern __inline uint16x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vrev64_u16 (uint16x4_t a) ++vrev64_u16 (uint16x4_t __a) + { +- return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 }); ++ return __builtin_shuffle (__a, (uint16x4_t) { 3, 2, 1, 0 }); + } + + __extension__ extern __inline uint32x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vrev64_u32 (uint32x2_t a) ++vrev64_u32 (uint32x2_t __a) + { +- return __builtin_shuffle (a, (uint32x2_t) { 1, 0 }); ++ return __builtin_shuffle (__a, (uint32x2_t) { 1, 0 }); + } + + __extension__ extern __inline float16x8_t +@@ -25666,68 +25981,68 @@ vrev64q_f16 (float16x8_t __a) + + __extension__ extern __inline float32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vrev64q_f32 (float32x4_t a) ++vrev64q_f32 (float32x4_t __a) + { +- return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 }); ++ return __builtin_shuffle (__a, (uint32x4_t) { 1, 0, 3, 2 }); + } + + __extension__ extern __inline poly8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vrev64q_p8 (poly8x16_t a) ++vrev64q_p8 (poly8x16_t __a) + { +- return __builtin_shuffle (a, ++ return __builtin_shuffle (__a, + (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }); + } + + __extension__ extern __inline poly16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vrev64q_p16 (poly16x8_t a) ++vrev64q_p16 (poly16x8_t __a) + { +- return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); ++ return __builtin_shuffle (__a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); + } + + __extension__ extern __inline int8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vrev64q_s8 (int8x16_t a) ++vrev64q_s8 (int8x16_t __a) + { +- return __builtin_shuffle (a, ++ return __builtin_shuffle (__a, + (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }); + } + + __extension__ extern __inline int16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vrev64q_s16 (int16x8_t a) ++vrev64q_s16 (int16x8_t __a) + { +- return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); ++ return __builtin_shuffle (__a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); + } + + __extension__ extern __inline int32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vrev64q_s32 (int32x4_t a) ++vrev64q_s32 (int32x4_t __a) + { +- return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 }); ++ return __builtin_shuffle (__a, (uint32x4_t) { 1, 0, 3, 2 }); + } + + __extension__ extern __inline uint8x16_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vrev64q_u8 (uint8x16_t a) ++vrev64q_u8 (uint8x16_t __a) + { +- return __builtin_shuffle (a, ++ return __builtin_shuffle (__a, + (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }); + } + + __extension__ extern __inline uint16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vrev64q_u16 (uint16x8_t a) ++vrev64q_u16 (uint16x8_t __a) + { +- return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); ++ return __builtin_shuffle (__a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); + } + + __extension__ extern __inline uint32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vrev64q_u32 (uint32x4_t a) ++vrev64q_u32 (uint32x4_t __a) + { +- return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 }); ++ return __builtin_shuffle (__a, (uint32x4_t) { 1, 0, 3, 2 }); + } + + /* vrnd */ +@@ -26420,87 +26735,90 @@ vrsrad_n_u64 (uint64_t __a, uint64_t __b, const int __c) + + __extension__ extern __inline uint32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vsha1cq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk) ++vsha1cq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk) + { +- return __builtin_aarch64_crypto_sha1cv4si_uuuu (hash_abcd, hash_e, wk); ++ return __builtin_aarch64_crypto_sha1cv4si_uuuu (__hash_abcd, __hash_e, __wk); + } + + __extension__ extern __inline uint32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vsha1mq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk) ++vsha1mq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk) + { +- return __builtin_aarch64_crypto_sha1mv4si_uuuu (hash_abcd, hash_e, wk); ++ return __builtin_aarch64_crypto_sha1mv4si_uuuu (__hash_abcd, __hash_e, __wk); + } + + __extension__ extern __inline uint32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vsha1pq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk) ++vsha1pq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk) + { +- return __builtin_aarch64_crypto_sha1pv4si_uuuu (hash_abcd, hash_e, wk); ++ return __builtin_aarch64_crypto_sha1pv4si_uuuu (__hash_abcd, __hash_e, __wk); + } + + __extension__ extern __inline uint32_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vsha1h_u32 (uint32_t hash_e) ++vsha1h_u32 (uint32_t __hash_e) + { +- return __builtin_aarch64_crypto_sha1hsi_uu (hash_e); ++ return __builtin_aarch64_crypto_sha1hsi_uu (__hash_e); + } + + __extension__ extern __inline uint32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vsha1su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7, uint32x4_t w8_11) ++vsha1su0q_u32 (uint32x4_t __w0_3, uint32x4_t __w4_7, uint32x4_t __w8_11) + { +- return __builtin_aarch64_crypto_sha1su0v4si_uuuu (w0_3, w4_7, w8_11); ++ return __builtin_aarch64_crypto_sha1su0v4si_uuuu (__w0_3, __w4_7, __w8_11); + } + + __extension__ extern __inline uint32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vsha1su1q_u32 (uint32x4_t tw0_3, uint32x4_t w12_15) ++vsha1su1q_u32 (uint32x4_t __tw0_3, uint32x4_t __w12_15) + { +- return __builtin_aarch64_crypto_sha1su1v4si_uuu (tw0_3, w12_15); ++ return __builtin_aarch64_crypto_sha1su1v4si_uuu (__tw0_3, __w12_15); + } + + __extension__ extern __inline uint32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vsha256hq_u32 (uint32x4_t hash_abcd, uint32x4_t hash_efgh, uint32x4_t wk) ++vsha256hq_u32 (uint32x4_t __hash_abcd, uint32x4_t __hash_efgh, uint32x4_t __wk) + { +- return __builtin_aarch64_crypto_sha256hv4si_uuuu (hash_abcd, hash_efgh, wk); ++ return __builtin_aarch64_crypto_sha256hv4si_uuuu (__hash_abcd, __hash_efgh, ++ __wk); + } + + __extension__ extern __inline uint32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vsha256h2q_u32 (uint32x4_t hash_efgh, uint32x4_t hash_abcd, uint32x4_t wk) ++vsha256h2q_u32 (uint32x4_t __hash_efgh, uint32x4_t __hash_abcd, uint32x4_t __wk) + { +- return __builtin_aarch64_crypto_sha256h2v4si_uuuu (hash_efgh, hash_abcd, wk); ++ return __builtin_aarch64_crypto_sha256h2v4si_uuuu (__hash_efgh, __hash_abcd, ++ __wk); + } + + __extension__ extern __inline uint32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vsha256su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7) ++vsha256su0q_u32 (uint32x4_t __w0_3, uint32x4_t __w4_7) + { +- return __builtin_aarch64_crypto_sha256su0v4si_uuu (w0_3, w4_7); ++ return __builtin_aarch64_crypto_sha256su0v4si_uuu (__w0_3, __w4_7); + } + + __extension__ extern __inline uint32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vsha256su1q_u32 (uint32x4_t tw0_3, uint32x4_t w8_11, uint32x4_t w12_15) ++vsha256su1q_u32 (uint32x4_t __tw0_3, uint32x4_t __w8_11, uint32x4_t __w12_15) + { +- return __builtin_aarch64_crypto_sha256su1v4si_uuuu (tw0_3, w8_11, w12_15); ++ return __builtin_aarch64_crypto_sha256su1v4si_uuuu (__tw0_3, __w8_11, ++ __w12_15); + } + + __extension__ extern __inline poly128_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmull_p64 (poly64_t a, poly64_t b) ++vmull_p64 (poly64_t __a, poly64_t __b) + { + return +- __builtin_aarch64_crypto_pmulldi_ppp (a, b); ++ __builtin_aarch64_crypto_pmulldi_ppp (__a, __b); + } + + __extension__ extern __inline poly128_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vmull_high_p64 (poly64x2_t a, poly64x2_t b) ++vmull_high_p64 (poly64x2_t __a, poly64x2_t __b) + { +- return __builtin_aarch64_crypto_pmullv2di_ppp (a, b); ++ return __builtin_aarch64_crypto_pmullv2di_ppp (__a, __b); + } + + #pragma GCC pop_options +@@ -27202,30 +27520,30 @@ vsqaddd_u64 (uint64_t __a, int64_t __b) + /* vsqrt */ + __extension__ extern __inline float32x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vsqrt_f32 (float32x2_t a) ++vsqrt_f32 (float32x2_t __a) + { +- return __builtin_aarch64_sqrtv2sf (a); ++ return __builtin_aarch64_sqrtv2sf (__a); + } + + __extension__ extern __inline float32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vsqrtq_f32 (float32x4_t a) ++vsqrtq_f32 (float32x4_t __a) + { +- return __builtin_aarch64_sqrtv4sf (a); ++ return __builtin_aarch64_sqrtv4sf (__a); + } + + __extension__ extern __inline float64x1_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vsqrt_f64 (float64x1_t a) ++vsqrt_f64 (float64x1_t __a) + { +- return (float64x1_t) { __builtin_aarch64_sqrtdf (a[0]) }; ++ return (float64x1_t) { __builtin_aarch64_sqrtdf (__a[0]) }; + } + + __extension__ extern __inline float64x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vsqrtq_f64 (float64x2_t a) ++vsqrtq_f64 (float64x2_t __a) + { +- return __builtin_aarch64_sqrtv2df (a); ++ return __builtin_aarch64_sqrtv2df (__a); + } + + /* vsra */ +@@ -27495,98 +27813,98 @@ vst1_f16 (float16_t *__a, float16x4_t __b) + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1_f32 (float32_t *a, float32x2_t b) ++vst1_f32 (float32_t *__a, float32x2_t __b) + { +- __builtin_aarch64_st1v2sf ((__builtin_aarch64_simd_sf *) a, b); ++ __builtin_aarch64_st1v2sf ((__builtin_aarch64_simd_sf *) __a, __b); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1_f64 (float64_t *a, float64x1_t b) ++vst1_f64 (float64_t *__a, float64x1_t __b) + { +- *a = b[0]; ++ *__a = __b[0]; + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1_p8 (poly8_t *a, poly8x8_t b) ++vst1_p8 (poly8_t *__a, poly8x8_t __b) + { +- __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a, +- (int8x8_t) b); ++ __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) __a, ++ (int8x8_t) __b); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1_p16 (poly16_t *a, poly16x4_t b) ++vst1_p16 (poly16_t *__a, poly16x4_t __b) + { +- __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a, +- (int16x4_t) b); ++ __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) __a, ++ (int16x4_t) __b); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1_p64 (poly64_t *a, poly64x1_t b) ++vst1_p64 (poly64_t *__a, poly64x1_t __b) + { +- *a = b[0]; ++ *__a = __b[0]; + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1_s8 (int8_t *a, int8x8_t b) ++vst1_s8 (int8_t *__a, int8x8_t __b) + { +- __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a, b); ++ __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) __a, __b); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1_s16 (int16_t *a, int16x4_t b) ++vst1_s16 (int16_t *__a, int16x4_t __b) + { +- __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a, b); ++ __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) __a, __b); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1_s32 (int32_t *a, int32x2_t b) ++vst1_s32 (int32_t *__a, int32x2_t __b) + { +- __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a, b); ++ __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) __a, __b); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1_s64 (int64_t *a, int64x1_t b) ++vst1_s64 (int64_t *__a, int64x1_t __b) + { +- *a = b[0]; ++ *__a = __b[0]; + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1_u8 (uint8_t *a, uint8x8_t b) ++vst1_u8 (uint8_t *__a, uint8x8_t __b) + { +- __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a, +- (int8x8_t) b); ++ __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) __a, ++ (int8x8_t) __b); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1_u16 (uint16_t *a, uint16x4_t b) ++vst1_u16 (uint16_t *__a, uint16x4_t __b) + { +- __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a, +- (int16x4_t) b); ++ __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) __a, ++ (int16x4_t) __b); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1_u32 (uint32_t *a, uint32x2_t b) ++vst1_u32 (uint32_t *__a, uint32x2_t __b) + { +- __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a, +- (int32x2_t) b); ++ __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) __a, ++ (int32x2_t) __b); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1_u64 (uint64_t *a, uint64x1_t b) ++vst1_u64 (uint64_t *__a, uint64x1_t __b) + { +- *a = b[0]; ++ *__a = __b[0]; + } + + /* vst1q */ +@@ -27600,100 +27918,100 @@ vst1q_f16 (float16_t *__a, float16x8_t __b) + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1q_f32 (float32_t *a, float32x4_t b) ++vst1q_f32 (float32_t *__a, float32x4_t __b) + { +- __builtin_aarch64_st1v4sf ((__builtin_aarch64_simd_sf *) a, b); ++ __builtin_aarch64_st1v4sf ((__builtin_aarch64_simd_sf *) __a, __b); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1q_f64 (float64_t *a, float64x2_t b) ++vst1q_f64 (float64_t *__a, float64x2_t __b) + { +- __builtin_aarch64_st1v2df ((__builtin_aarch64_simd_df *) a, b); ++ __builtin_aarch64_st1v2df ((__builtin_aarch64_simd_df *) __a, __b); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1q_p8 (poly8_t *a, poly8x16_t b) ++vst1q_p8 (poly8_t *__a, poly8x16_t __b) + { +- __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a, +- (int8x16_t) b); ++ __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) __a, ++ (int8x16_t) __b); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1q_p16 (poly16_t *a, poly16x8_t b) ++vst1q_p16 (poly16_t *__a, poly16x8_t __b) + { +- __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a, +- (int16x8_t) b); ++ __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) __a, ++ (int16x8_t) __b); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1q_p64 (poly64_t *a, poly64x2_t b) ++vst1q_p64 (poly64_t *__a, poly64x2_t __b) + { +- __builtin_aarch64_st1v2di_sp ((__builtin_aarch64_simd_di *) a, +- (poly64x2_t) b); ++ __builtin_aarch64_st1v2di_sp ((__builtin_aarch64_simd_di *) __a, ++ (poly64x2_t) __b); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1q_s8 (int8_t *a, int8x16_t b) ++vst1q_s8 (int8_t *__a, int8x16_t __b) + { +- __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a, b); ++ __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) __a, __b); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1q_s16 (int16_t *a, int16x8_t b) ++vst1q_s16 (int16_t *__a, int16x8_t __b) + { +- __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a, b); ++ __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) __a, __b); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1q_s32 (int32_t *a, int32x4_t b) ++vst1q_s32 (int32_t *__a, int32x4_t __b) + { +- __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a, b); ++ __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) __a, __b); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1q_s64 (int64_t *a, int64x2_t b) ++vst1q_s64 (int64_t *__a, int64x2_t __b) + { +- __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a, b); ++ __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) __a, __b); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1q_u8 (uint8_t *a, uint8x16_t b) ++vst1q_u8 (uint8_t *__a, uint8x16_t __b) + { +- __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a, +- (int8x16_t) b); ++ __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) __a, ++ (int8x16_t) __b); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1q_u16 (uint16_t *a, uint16x8_t b) ++vst1q_u16 (uint16_t *__a, uint16x8_t __b) + { +- __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a, +- (int16x8_t) b); ++ __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) __a, ++ (int16x8_t) __b); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1q_u32 (uint32_t *a, uint32x4_t b) ++vst1q_u32 (uint32_t *__a, uint32x4_t __b) + { +- __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a, +- (int32x4_t) b); ++ __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) __a, ++ (int32x4_t) __b); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1q_u64 (uint64_t *a, uint64x2_t b) ++vst1q_u64 (uint64_t *__a, uint64x2_t __b) + { +- __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a, +- (int64x2_t) b); ++ __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) __a, ++ (int64x2_t) __b); + } + + /* vst1_lane */ +@@ -27900,327 +28218,343 @@ vst1q_lane_u64 (uint64_t *__a, uint64x2_t __b, const int __lane) + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1_s64_x2 (int64_t * __a, int64x1x2_t val) ++vst1_s64_x2 (int64_t * __a, int64x1x2_t __val) + { + __builtin_aarch64_simd_oi __o; +- int64x2x2_t temp; +- temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0))); +- temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0))); +- __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1); ++ int64x2x2_t __temp; ++ __temp.val[0] ++ = vcombine_s64 (__val.val[0], vcreate_s64 (__AARCH64_INT64_C (0))); ++ __temp.val[1] ++ = vcombine_s64 (__val.val[1], vcreate_s64 (__AARCH64_INT64_C (0))); ++ __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __temp.val[1], 1); + __builtin_aarch64_st1x2di ((__builtin_aarch64_simd_di *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1_u64_x2 (uint64_t * __a, uint64x1x2_t val) ++vst1_u64_x2 (uint64_t * __a, uint64x1x2_t __val) + { + __builtin_aarch64_simd_oi __o; +- uint64x2x2_t temp; +- temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0))); +- temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0))); +- __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1); ++ uint64x2x2_t __temp; ++ __temp.val[0] ++ = vcombine_u64 (__val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] ++ = vcombine_u64 (__val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0))); ++ __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __temp.val[1], 1); + __builtin_aarch64_st1x2di ((__builtin_aarch64_simd_di *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1_f64_x2 (float64_t * __a, float64x1x2_t val) ++vst1_f64_x2 (float64_t * __a, float64x1x2_t __val) + { + __builtin_aarch64_simd_oi __o; +- float64x2x2_t temp; +- temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0))); +- temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0))); +- __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[1], 1); ++ float64x2x2_t __temp; ++ __temp.val[0] ++ = vcombine_f64 (__val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] ++ = vcombine_f64 (__val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0))); ++ __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) __temp.val[1], 1); + __builtin_aarch64_st1x2df ((__builtin_aarch64_simd_df *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1_s8_x2 (int8_t * __a, int8x8x2_t val) ++vst1_s8_x2 (int8_t * __a, int8x8x2_t __val) + { + __builtin_aarch64_simd_oi __o; +- int8x16x2_t temp; +- temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0))); +- temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0))); +- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1); ++ int8x16x2_t __temp; ++ __temp.val[0] ++ = vcombine_s8 (__val.val[0], vcreate_s8 (__AARCH64_INT64_C (0))); ++ __temp.val[1] ++ = vcombine_s8 (__val.val[1], vcreate_s8 (__AARCH64_INT64_C (0))); ++ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[1], 1); + __builtin_aarch64_st1x2v8qi ((__builtin_aarch64_simd_qi *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1_p8_x2 (poly8_t * __a, poly8x8x2_t val) ++vst1_p8_x2 (poly8_t * __a, poly8x8x2_t __val) + { + __builtin_aarch64_simd_oi __o; +- poly8x16x2_t temp; +- temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0))); +- temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0))); +- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1); ++ poly8x16x2_t __temp; ++ __temp.val[0] ++ = vcombine_p8 (__val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] ++ = vcombine_p8 (__val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0))); ++ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[1], 1); + __builtin_aarch64_st1x2v8qi ((__builtin_aarch64_simd_qi *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1_s16_x2 (int16_t * __a, int16x4x2_t val) ++vst1_s16_x2 (int16_t * __a, int16x4x2_t __val) + { + __builtin_aarch64_simd_oi __o; +- int16x8x2_t temp; +- temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0))); +- temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0))); +- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1); ++ int16x8x2_t __temp; ++ __temp.val[0] ++ = vcombine_s16 (__val.val[0], vcreate_s16 (__AARCH64_INT64_C (0))); ++ __temp.val[1] ++ = vcombine_s16 (__val.val[1], vcreate_s16 (__AARCH64_INT64_C (0))); ++ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[1], 1); + __builtin_aarch64_st1x2v4hi ((__builtin_aarch64_simd_hi *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1_p16_x2 (poly16_t * __a, poly16x4x2_t val) ++vst1_p16_x2 (poly16_t * __a, poly16x4x2_t __val) + { + __builtin_aarch64_simd_oi __o; +- poly16x8x2_t temp; +- temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0))); +- temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0))); +- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1); ++ poly16x8x2_t __temp; ++ __temp.val[0] ++ = vcombine_p16 (__val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] ++ = vcombine_p16 (__val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0))); ++ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[1], 1); + __builtin_aarch64_st1x2v4hi ((__builtin_aarch64_simd_hi *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1_s32_x2 (int32_t * __a, int32x2x2_t val) ++vst1_s32_x2 (int32_t * __a, int32x2x2_t __val) + { + __builtin_aarch64_simd_oi __o; +- int32x4x2_t temp; +- temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0))); +- temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0))); +- __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1); ++ int32x4x2_t __temp; ++ __temp.val[0] ++ = vcombine_s32 (__val.val[0], vcreate_s32 (__AARCH64_INT64_C (0))); ++ __temp.val[1] ++ = vcombine_s32 (__val.val[1], vcreate_s32 (__AARCH64_INT64_C (0))); ++ __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __temp.val[1], 1); + __builtin_aarch64_st1x2v2si ((__builtin_aarch64_simd_si *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1_u8_x2 (uint8_t * __a, uint8x8x2_t val) ++vst1_u8_x2 (uint8_t * __a, uint8x8x2_t __val) + { + __builtin_aarch64_simd_oi __o; +- uint8x16x2_t temp; +- temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0))); +- temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0))); +- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1); ++ uint8x16x2_t __temp; ++ __temp.val[0] = vcombine_u8 (__val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] = vcombine_u8 (__val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0))); ++ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[1], 1); + __builtin_aarch64_st1x2v8qi ((__builtin_aarch64_simd_qi *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1_u16_x2 (uint16_t * __a, uint16x4x2_t val) ++vst1_u16_x2 (uint16_t * __a, uint16x4x2_t __val) + { + __builtin_aarch64_simd_oi __o; +- uint16x8x2_t temp; +- temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0))); +- temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0))); +- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1); ++ uint16x8x2_t __temp; ++ __temp.val[0] = vcombine_u16 (__val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] = vcombine_u16 (__val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0))); ++ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[1], 1); + __builtin_aarch64_st1x2v4hi ((__builtin_aarch64_simd_hi *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1_u32_x2 (uint32_t * __a, uint32x2x2_t val) ++vst1_u32_x2 (uint32_t * __a, uint32x2x2_t __val) + { + __builtin_aarch64_simd_oi __o; +- uint32x4x2_t temp; +- temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0))); +- temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0))); +- __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1); ++ uint32x4x2_t __temp; ++ __temp.val[0] = vcombine_u32 (__val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] = vcombine_u32 (__val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0))); ++ __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __temp.val[1], 1); + __builtin_aarch64_st1x2v2si ((__builtin_aarch64_simd_si *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1_f16_x2 (float16_t * __a, float16x4x2_t val) ++vst1_f16_x2 (float16_t * __a, float16x4x2_t __val) + { + __builtin_aarch64_simd_oi __o; +- float16x8x2_t temp; +- temp.val[0] = vcombine_f16 (val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0))); +- temp.val[1] = vcombine_f16 (val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0))); +- __o = __builtin_aarch64_set_qregoiv8hf (__o, temp.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv8hf (__o, temp.val[1], 1); ++ float16x8x2_t __temp; ++ __temp.val[0] = vcombine_f16 (__val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] = vcombine_f16 (__val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0))); ++ __o = __builtin_aarch64_set_qregoiv8hf (__o, __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv8hf (__o, __temp.val[1], 1); + __builtin_aarch64_st1x2v4hf (__a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1_f32_x2 (float32_t * __a, float32x2x2_t val) ++vst1_f32_x2 (float32_t * __a, float32x2x2_t __val) + { + __builtin_aarch64_simd_oi __o; +- float32x4x2_t temp; +- temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0))); +- temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0))); +- __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[1], 1); ++ float32x4x2_t __temp; ++ __temp.val[0] = vcombine_f32 (__val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] = vcombine_f32 (__val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0))); ++ __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) __temp.val[1], 1); + __builtin_aarch64_st1x2v2sf ((__builtin_aarch64_simd_sf *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1_p64_x2 (poly64_t * __a, poly64x1x2_t val) ++vst1_p64_x2 (poly64_t * __a, poly64x1x2_t __val) + { + __builtin_aarch64_simd_oi __o; +- poly64x2x2_t temp; +- temp.val[0] = vcombine_p64 (val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0))); +- temp.val[1] = vcombine_p64 (val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0))); ++ poly64x2x2_t __temp; ++ __temp.val[0] = vcombine_p64 (__val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] = vcombine_p64 (__val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0))); + __o = __builtin_aarch64_set_qregoiv2di_ssps (__o, +- (poly64x2_t) temp.val[0], 0); ++ (poly64x2_t) __temp.val[0], 0); + __o = __builtin_aarch64_set_qregoiv2di_ssps (__o, +- (poly64x2_t) temp.val[1], 1); ++ (poly64x2_t) __temp.val[1], 1); + __builtin_aarch64_st1x2di ((__builtin_aarch64_simd_di *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1q_s8_x2 (int8_t * __a, int8x16x2_t val) ++vst1q_s8_x2 (int8_t * __a, int8x16x2_t __val) + { + __builtin_aarch64_simd_oi __o; +- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1); ++ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[1], 1); + __builtin_aarch64_st1x2v16qi ((__builtin_aarch64_simd_qi *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1q_p8_x2 (poly8_t * __a, poly8x16x2_t val) ++vst1q_p8_x2 (poly8_t * __a, poly8x16x2_t __val) + { + __builtin_aarch64_simd_oi __o; +- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1); ++ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[1], 1); + __builtin_aarch64_st1x2v16qi ((__builtin_aarch64_simd_qi *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1q_s16_x2 (int16_t * __a, int16x8x2_t val) ++vst1q_s16_x2 (int16_t * __a, int16x8x2_t __val) + { + __builtin_aarch64_simd_oi __o; +- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1); ++ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[1], 1); + __builtin_aarch64_st1x2v8hi ((__builtin_aarch64_simd_hi *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1q_p16_x2 (poly16_t * __a, poly16x8x2_t val) ++vst1q_p16_x2 (poly16_t * __a, poly16x8x2_t __val) + { + __builtin_aarch64_simd_oi __o; +- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1); ++ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[1], 1); + __builtin_aarch64_st1x2v8hi ((__builtin_aarch64_simd_hi *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1q_s32_x2 (int32_t * __a, int32x4x2_t val) ++vst1q_s32_x2 (int32_t * __a, int32x4x2_t __val) + { + __builtin_aarch64_simd_oi __o; +- __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1); ++ __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __val.val[1], 1); + __builtin_aarch64_st1x2v4si ((__builtin_aarch64_simd_si *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1q_s64_x2 (int64_t * __a, int64x2x2_t val) ++vst1q_s64_x2 (int64_t * __a, int64x2x2_t __val) + { + __builtin_aarch64_simd_oi __o; +- __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1); ++ __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __val.val[1], 1); + __builtin_aarch64_st1x2v2di ((__builtin_aarch64_simd_di *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1q_u8_x2 (uint8_t * __a, uint8x16x2_t val) ++vst1q_u8_x2 (uint8_t * __a, uint8x16x2_t __val) + { + __builtin_aarch64_simd_oi __o; +- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1); ++ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[1], 1); + __builtin_aarch64_st1x2v16qi ((__builtin_aarch64_simd_qi *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1q_u16_x2 (uint16_t * __a, uint16x8x2_t val) ++vst1q_u16_x2 (uint16_t * __a, uint16x8x2_t __val) + { + __builtin_aarch64_simd_oi __o; +- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1); ++ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[1], 1); + __builtin_aarch64_st1x2v8hi ((__builtin_aarch64_simd_hi *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1q_u32_x2 (uint32_t * __a, uint32x4x2_t val) ++vst1q_u32_x2 (uint32_t * __a, uint32x4x2_t __val) + { + __builtin_aarch64_simd_oi __o; +- __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1); ++ __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __val.val[1], 1); + __builtin_aarch64_st1x2v4si ((__builtin_aarch64_simd_si *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1q_u64_x2 (uint64_t * __a, uint64x2x2_t val) ++vst1q_u64_x2 (uint64_t * __a, uint64x2x2_t __val) + { + __builtin_aarch64_simd_oi __o; +- __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1); ++ __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __val.val[1], 1); + __builtin_aarch64_st1x2v2di ((__builtin_aarch64_simd_di *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1q_f16_x2 (float16_t * __a, float16x8x2_t val) ++vst1q_f16_x2 (float16_t * __a, float16x8x2_t __val) + { + __builtin_aarch64_simd_oi __o; +- __o = __builtin_aarch64_set_qregoiv8hf (__o, val.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv8hf (__o, val.val[1], 1); ++ __o = __builtin_aarch64_set_qregoiv8hf (__o, __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv8hf (__o, __val.val[1], 1); + __builtin_aarch64_st1x2v8hf (__a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1q_f32_x2 (float32_t * __a, float32x4x2_t val) ++vst1q_f32_x2 (float32_t * __a, float32x4x2_t __val) + { + __builtin_aarch64_simd_oi __o; +- __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[1], 1); ++ __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) __val.val[1], 1); + __builtin_aarch64_st1x2v4sf ((__builtin_aarch64_simd_sf *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1q_f64_x2 (float64_t * __a, float64x2x2_t val) ++vst1q_f64_x2 (float64_t * __a, float64x2x2_t __val) + { + __builtin_aarch64_simd_oi __o; +- __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[1], 1); ++ __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) __val.val[1], 1); + __builtin_aarch64_st1x2v2df ((__builtin_aarch64_simd_df *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1q_p64_x2 (poly64_t * __a, poly64x2x2_t val) ++vst1q_p64_x2 (poly64_t * __a, poly64x2x2_t __val) + { + __builtin_aarch64_simd_oi __o; + __o = __builtin_aarch64_set_qregoiv2di_ssps (__o, +- (poly64x2_t) val.val[0], 0); ++ (poly64x2_t) __val.val[0], 0); + __o = __builtin_aarch64_set_qregoiv2di_ssps (__o, +- (poly64x2_t) val.val[1], 1); ++ (poly64x2_t) __val.val[1], 1); + __builtin_aarch64_st1x2v2di ((__builtin_aarch64_simd_di *) __a, __o); + } + +@@ -28228,1483 +28562,1709 @@ vst1q_p64_x2 (poly64_t * __a, poly64x2x2_t val) + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1_s64_x3 (int64_t * __a, int64x1x3_t val) ++vst1_s64_x3 (int64_t * __a, int64x1x3_t __val) + { + __builtin_aarch64_simd_ci __o; +- int64x2x3_t temp; +- temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0))); +- temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0))); +- temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0))); +- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1); +- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2); ++ int64x2x3_t __temp; ++ __temp.val[0] = vcombine_s64 (__val.val[0], vcreate_s64 (__AARCH64_INT64_C (0))); ++ __temp.val[1] = vcombine_s64 (__val.val[1], vcreate_s64 (__AARCH64_INT64_C (0))); ++ __temp.val[2] = vcombine_s64 (__val.val[2], vcreate_s64 (__AARCH64_INT64_C (0))); ++ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[2], 2); + __builtin_aarch64_st1x3di ((__builtin_aarch64_simd_di *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1_u64_x3 (uint64_t * __a, uint64x1x3_t val) ++vst1_u64_x3 (uint64_t * __a, uint64x1x3_t __val) + { + __builtin_aarch64_simd_ci __o; +- uint64x2x3_t temp; +- temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0))); +- temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0))); +- temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0))); +- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1); +- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2); ++ uint64x2x3_t __temp; ++ __temp.val[0] = vcombine_u64 (__val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] = vcombine_u64 (__val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0))); ++ __temp.val[2] = vcombine_u64 (__val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0))); ++ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[2], 2); + __builtin_aarch64_st1x3di ((__builtin_aarch64_simd_di *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1_f64_x3 (float64_t * __a, float64x1x3_t val) ++vst1_f64_x3 (float64_t * __a, float64x1x3_t __val) + { + __builtin_aarch64_simd_ci __o; +- float64x2x3_t temp; +- temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0))); +- temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0))); +- temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0))); +- __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[1], 1); +- __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[2], 2); ++ float64x2x3_t __temp; ++ __temp.val[0] = vcombine_f64 (__val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] = vcombine_f64 (__val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0))); ++ __temp.val[2] = vcombine_f64 (__val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0))); ++ __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __temp.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __temp.val[2], 2); + __builtin_aarch64_st1x3df ((__builtin_aarch64_simd_df *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1_s8_x3 (int8_t * __a, int8x8x3_t val) ++vst1_s8_x3 (int8_t * __a, int8x8x3_t __val) + { + __builtin_aarch64_simd_ci __o; +- int8x16x3_t temp; +- temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0))); +- temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0))); +- temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0))); +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1); +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2); ++ int8x16x3_t __temp; ++ __temp.val[0] = vcombine_s8 (__val.val[0], vcreate_s8 (__AARCH64_INT64_C (0))); ++ __temp.val[1] = vcombine_s8 (__val.val[1], vcreate_s8 (__AARCH64_INT64_C (0))); ++ __temp.val[2] = vcombine_s8 (__val.val[2], vcreate_s8 (__AARCH64_INT64_C (0))); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[2], 2); + __builtin_aarch64_st1x3v8qi ((__builtin_aarch64_simd_qi *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1_p8_x3 (poly8_t * __a, poly8x8x3_t val) ++vst1_p8_x3 (poly8_t * __a, poly8x8x3_t __val) + { + __builtin_aarch64_simd_ci __o; +- poly8x16x3_t temp; +- temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0))); +- temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0))); +- temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0))); +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1); +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2); ++ poly8x16x3_t __temp; ++ __temp.val[0] = vcombine_p8 (__val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] = vcombine_p8 (__val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0))); ++ __temp.val[2] = vcombine_p8 (__val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0))); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[2], 2); + __builtin_aarch64_st1x3v8qi ((__builtin_aarch64_simd_qi *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1_s16_x3 (int16_t * __a, int16x4x3_t val) ++vst1_s16_x3 (int16_t * __a, int16x4x3_t __val) + { + __builtin_aarch64_simd_ci __o; +- int16x8x3_t temp; +- temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0))); +- temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0))); +- temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0))); +- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1); +- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2); ++ int16x8x3_t __temp; ++ __temp.val[0] = vcombine_s16 (__val.val[0], vcreate_s16 (__AARCH64_INT64_C (0))); ++ __temp.val[1] = vcombine_s16 (__val.val[1], vcreate_s16 (__AARCH64_INT64_C (0))); ++ __temp.val[2] = vcombine_s16 (__val.val[2], vcreate_s16 (__AARCH64_INT64_C (0))); ++ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[2], 2); + __builtin_aarch64_st1x3v4hi ((__builtin_aarch64_simd_hi *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1_p16_x3 (poly16_t * __a, poly16x4x3_t val) ++vst1_p16_x3 (poly16_t * __a, poly16x4x3_t __val) + { + __builtin_aarch64_simd_ci __o; +- poly16x8x3_t temp; +- temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0))); +- temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0))); +- temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0))); +- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1); +- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2); ++ poly16x8x3_t __temp; ++ __temp.val[0] = vcombine_p16 (__val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] = vcombine_p16 (__val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0))); ++ __temp.val[2] = vcombine_p16 (__val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0))); ++ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[2], 2); + __builtin_aarch64_st1x3v4hi ((__builtin_aarch64_simd_hi *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1_s32_x3 (int32_t * __a, int32x2x3_t val) ++vst1_s32_x3 (int32_t * __a, int32x2x3_t __val) + { + __builtin_aarch64_simd_ci __o; +- int32x4x3_t temp; +- temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0))); +- temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0))); +- temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0))); +- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1); +- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2); ++ int32x4x3_t __temp; ++ __temp.val[0] = vcombine_s32 (__val.val[0], vcreate_s32 (__AARCH64_INT64_C (0))); ++ __temp.val[1] = vcombine_s32 (__val.val[1], vcreate_s32 (__AARCH64_INT64_C (0))); ++ __temp.val[2] = vcombine_s32 (__val.val[2], vcreate_s32 (__AARCH64_INT64_C (0))); ++ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[2], 2); + __builtin_aarch64_st1x3v2si ((__builtin_aarch64_simd_si *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1_u8_x3 (uint8_t * __a, uint8x8x3_t val) ++vst1_u8_x3 (uint8_t * __a, uint8x8x3_t __val) + { + __builtin_aarch64_simd_ci __o; +- uint8x16x3_t temp; +- temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0))); +- temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0))); +- temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0))); +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1); +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2); ++ uint8x16x3_t __temp; ++ __temp.val[0] = vcombine_u8 (__val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] = vcombine_u8 (__val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0))); ++ __temp.val[2] = vcombine_u8 (__val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0))); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[2], 2); + __builtin_aarch64_st1x3v8qi ((__builtin_aarch64_simd_qi *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1_u16_x3 (uint16_t * __a, uint16x4x3_t val) ++vst1_u16_x3 (uint16_t * __a, uint16x4x3_t __val) + { + __builtin_aarch64_simd_ci __o; +- uint16x8x3_t temp; +- temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0))); +- temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0))); +- temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0))); +- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1); +- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2); ++ uint16x8x3_t __temp; ++ __temp.val[0] = vcombine_u16 (__val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] = vcombine_u16 (__val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0))); ++ __temp.val[2] = vcombine_u16 (__val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0))); ++ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[2], 2); + __builtin_aarch64_st1x3v4hi ((__builtin_aarch64_simd_hi *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1_u32_x3 (uint32_t * __a, uint32x2x3_t val) ++vst1_u32_x3 (uint32_t * __a, uint32x2x3_t __val) + { + __builtin_aarch64_simd_ci __o; +- uint32x4x3_t temp; +- temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0))); +- temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0))); +- temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0))); +- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1); +- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2); ++ uint32x4x3_t __temp; ++ __temp.val[0] = vcombine_u32 (__val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] = vcombine_u32 (__val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0))); ++ __temp.val[2] = vcombine_u32 (__val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0))); ++ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[2], 2); + __builtin_aarch64_st1x3v2si ((__builtin_aarch64_simd_si *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1_f16_x3 (float16_t * __a, float16x4x3_t val) ++vst1_f16_x3 (float16_t * __a, float16x4x3_t __val) + { + __builtin_aarch64_simd_ci __o; +- float16x8x3_t temp; +- temp.val[0] = vcombine_f16 (val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0))); +- temp.val[1] = vcombine_f16 (val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0))); +- temp.val[2] = vcombine_f16 (val.val[2], vcreate_f16 (__AARCH64_UINT64_C (0))); +- __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) temp.val[1], 1); +- __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) temp.val[2], 2); ++ float16x8x3_t __temp; ++ __temp.val[0] = vcombine_f16 (__val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] = vcombine_f16 (__val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0))); ++ __temp.val[2] = vcombine_f16 (__val.val[2], vcreate_f16 (__AARCH64_UINT64_C (0))); ++ __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __temp.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __temp.val[2], 2); + __builtin_aarch64_st1x3v4hf ((__builtin_aarch64_simd_hf *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1_f32_x3 (float32_t * __a, float32x2x3_t val) ++vst1_f32_x3 (float32_t * __a, float32x2x3_t __val) + { + __builtin_aarch64_simd_ci __o; +- float32x4x3_t temp; +- temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0))); +- temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0))); +- temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0))); +- __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[1], 1); +- __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[2], 2); ++ float32x4x3_t __temp; ++ __temp.val[0] = vcombine_f32 (__val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] = vcombine_f32 (__val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0))); ++ __temp.val[2] = vcombine_f32 (__val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0))); ++ __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __temp.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __temp.val[2], 2); + __builtin_aarch64_st1x3v2sf ((__builtin_aarch64_simd_sf *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1_p64_x3 (poly64_t * __a, poly64x1x3_t val) ++vst1_p64_x3 (poly64_t * __a, poly64x1x3_t __val) + { + __builtin_aarch64_simd_ci __o; +- poly64x2x3_t temp; +- temp.val[0] = vcombine_p64 (val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0))); +- temp.val[1] = vcombine_p64 (val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0))); +- temp.val[2] = vcombine_p64 (val.val[2], vcreate_p64 (__AARCH64_UINT64_C (0))); ++ poly64x2x3_t __temp; ++ __temp.val[0] = vcombine_p64 (__val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] = vcombine_p64 (__val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0))); ++ __temp.val[2] = vcombine_p64 (__val.val[2], vcreate_p64 (__AARCH64_UINT64_C (0))); + __o = __builtin_aarch64_set_qregciv2di_ssps (__o, +- (poly64x2_t) temp.val[0], 0); ++ (poly64x2_t) __temp.val[0], 0); + __o = __builtin_aarch64_set_qregciv2di_ssps (__o, +- (poly64x2_t) temp.val[1], 1); ++ (poly64x2_t) __temp.val[1], 1); + __o = __builtin_aarch64_set_qregciv2di_ssps (__o, +- (poly64x2_t) temp.val[2], 2); ++ (poly64x2_t) __temp.val[2], 2); + __builtin_aarch64_st1x3di ((__builtin_aarch64_simd_di *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1q_s8_x3 (int8_t * __a, int8x16x3_t val) ++vst1q_s8_x3 (int8_t * __a, int8x16x3_t __val) + { + __builtin_aarch64_simd_ci __o; +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1); +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[2], 2); + __builtin_aarch64_st1x3v16qi ((__builtin_aarch64_simd_qi *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1q_p8_x3 (poly8_t * __a, poly8x16x3_t val) ++vst1q_p8_x3 (poly8_t * __a, poly8x16x3_t __val) + { + __builtin_aarch64_simd_ci __o; +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1); +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[2], 2); + __builtin_aarch64_st1x3v16qi ((__builtin_aarch64_simd_qi *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1q_s16_x3 (int16_t * __a, int16x8x3_t val) ++vst1q_s16_x3 (int16_t * __a, int16x8x3_t __val) + { + __builtin_aarch64_simd_ci __o; +- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1); +- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2); ++ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[2], 2); + __builtin_aarch64_st1x3v8hi ((__builtin_aarch64_simd_hi *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1q_p16_x3 (poly16_t * __a, poly16x8x3_t val) ++vst1q_p16_x3 (poly16_t * __a, poly16x8x3_t __val) + { + __builtin_aarch64_simd_ci __o; +- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1); +- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2); ++ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[2], 2); + __builtin_aarch64_st1x3v8hi ((__builtin_aarch64_simd_hi *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1q_s32_x3 (int32_t * __a, int32x4x3_t val) ++vst1q_s32_x3 (int32_t * __a, int32x4x3_t __val) + { + __builtin_aarch64_simd_ci __o; +- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1); +- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2); ++ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[2], 2); + __builtin_aarch64_st1x3v4si ((__builtin_aarch64_simd_si *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1q_s64_x3 (int64_t * __a, int64x2x3_t val) ++vst1q_s64_x3 (int64_t * __a, int64x2x3_t __val) + { + __builtin_aarch64_simd_ci __o; +- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1); +- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2); ++ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[2], 2); + __builtin_aarch64_st1x3v2di ((__builtin_aarch64_simd_di *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1q_u8_x3 (uint8_t * __a, uint8x16x3_t val) ++vst1q_u8_x3 (uint8_t * __a, uint8x16x3_t __val) + { + __builtin_aarch64_simd_ci __o; +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1); +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[2], 2); + __builtin_aarch64_st1x3v16qi ((__builtin_aarch64_simd_qi *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1q_u16_x3 (uint16_t * __a, uint16x8x3_t val) ++vst1q_u16_x3 (uint16_t * __a, uint16x8x3_t __val) + { + __builtin_aarch64_simd_ci __o; +- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1); +- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2); ++ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[2], 2); + __builtin_aarch64_st1x3v8hi ((__builtin_aarch64_simd_hi *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1q_u32_x3 (uint32_t * __a, uint32x4x3_t val) ++vst1q_u32_x3 (uint32_t * __a, uint32x4x3_t __val) + { + __builtin_aarch64_simd_ci __o; +- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1); +- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2); ++ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[2], 2); + __builtin_aarch64_st1x3v4si ((__builtin_aarch64_simd_si *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1q_u64_x3 (uint64_t * __a, uint64x2x3_t val) ++vst1q_u64_x3 (uint64_t * __a, uint64x2x3_t __val) + { + __builtin_aarch64_simd_ci __o; +- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1); +- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2); ++ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[2], 2); + __builtin_aarch64_st1x3v2di ((__builtin_aarch64_simd_di *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1q_f16_x3 (float16_t * __a, float16x8x3_t val) ++vst1q_f16_x3 (float16_t * __a, float16x8x3_t __val) + { + __builtin_aarch64_simd_ci __o; +- __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) val.val[1], 1); +- __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) val.val[2], 2); ++ __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __val.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __val.val[2], 2); + __builtin_aarch64_st1x3v8hf ((__builtin_aarch64_simd_hf *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1q_f32_x3 (float32_t * __a, float32x4x3_t val) ++vst1q_f32_x3 (float32_t * __a, float32x4x3_t __val) + { + __builtin_aarch64_simd_ci __o; +- __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[1], 1); +- __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[2], 2); ++ __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __val.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __val.val[2], 2); + __builtin_aarch64_st1x3v4sf ((__builtin_aarch64_simd_sf *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1q_f64_x3 (float64_t * __a, float64x2x3_t val) ++vst1q_f64_x3 (float64_t * __a, float64x2x3_t __val) + { + __builtin_aarch64_simd_ci __o; +- __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[1], 1); +- __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[2], 2); ++ __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __val.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __val.val[2], 2); + __builtin_aarch64_st1x3v2df ((__builtin_aarch64_simd_df *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst1q_p64_x3 (poly64_t * __a, poly64x2x3_t val) ++vst1q_p64_x3 (poly64_t * __a, poly64x2x3_t __val) + { + __builtin_aarch64_simd_ci __o; + __o = __builtin_aarch64_set_qregciv2di_ssps (__o, +- (poly64x2_t) val.val[0], 0); ++ (poly64x2_t) __val.val[0], 0); + __o = __builtin_aarch64_set_qregciv2di_ssps (__o, +- (poly64x2_t) val.val[1], 1); ++ (poly64x2_t) __val.val[1], 1); + __o = __builtin_aarch64_set_qregciv2di_ssps (__o, +- (poly64x2_t) val.val[2], 2); ++ (poly64x2_t) __val.val[2], 2); + __builtin_aarch64_st1x3v2di ((__builtin_aarch64_simd_di *) __a, __o); + } + +-/* vstn */ ++/* vst1(q)_x4. */ + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst2_s64 (int64_t * __a, int64x1x2_t val) ++vst1_s8_x4 (int8_t * __a, int8x8x4_t val) + { +- __builtin_aarch64_simd_oi __o; +- int64x2x2_t temp; +- temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0))); +- temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0))); +- __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1); +- __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o); ++ union { int8x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; ++ __builtin_aarch64_st1x4v8qi ((__builtin_aarch64_simd_qi *) __a, __u.__o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst2_u64 (uint64_t * __a, uint64x1x2_t val) ++vst1q_s8_x4 (int8_t * __a, int8x16x4_t val) + { +- __builtin_aarch64_simd_oi __o; +- uint64x2x2_t temp; +- temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0))); +- temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0))); +- __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1); +- __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o); ++ union { int8x16x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; ++ __builtin_aarch64_st1x4v16qi ((__builtin_aarch64_simd_qi *) __a, __u.__o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst2_f64 (float64_t * __a, float64x1x2_t val) ++vst1_s16_x4 (int16_t * __a, int16x4x4_t val) + { +- __builtin_aarch64_simd_oi __o; +- float64x2x2_t temp; +- temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0))); +- temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0))); +- __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[1], 1); +- __builtin_aarch64_st2df ((__builtin_aarch64_simd_df *) __a, __o); ++ union { int16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; ++ __builtin_aarch64_st1x4v4hi ((__builtin_aarch64_simd_hi *) __a, __u.__o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst2_s8 (int8_t * __a, int8x8x2_t val) ++vst1q_s16_x4 (int16_t * __a, int16x8x4_t val) + { +- __builtin_aarch64_simd_oi __o; +- int8x16x2_t temp; +- temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0))); +- temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0))); +- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1); +- __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o); ++ union { int16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; ++ __builtin_aarch64_st1x4v8hi ((__builtin_aarch64_simd_hi *) __a, __u.__o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst2_p8 (poly8_t * __a, poly8x8x2_t val) ++vst1_s32_x4 (int32_t * __a, int32x2x4_t val) + { +- __builtin_aarch64_simd_oi __o; +- poly8x16x2_t temp; +- temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0))); +- temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0))); +- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1); +- __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o); ++ union { int32x2x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; ++ __builtin_aarch64_st1x4v2si ((__builtin_aarch64_simd_si *) __a, __u.__o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst2_s16 (int16_t * __a, int16x4x2_t val) ++vst1q_s32_x4 (int32_t * __a, int32x4x4_t val) + { +- __builtin_aarch64_simd_oi __o; +- int16x8x2_t temp; +- temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0))); +- temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0))); +- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1); +- __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o); ++ union { int32x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; ++ __builtin_aarch64_st1x4v4si ((__builtin_aarch64_simd_si *) __a, __u.__o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst2_p16 (poly16_t * __a, poly16x4x2_t val) ++vst1_u8_x4 (uint8_t * __a, uint8x8x4_t val) + { +- __builtin_aarch64_simd_oi __o; +- poly16x8x2_t temp; +- temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0))); +- temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0))); +- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1); +- __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o); ++ union { uint8x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; ++ __builtin_aarch64_st1x4v8qi ((__builtin_aarch64_simd_qi *) __a, __u.__o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst2_s32 (int32_t * __a, int32x2x2_t val) ++vst1q_u8_x4 (uint8_t * __a, uint8x16x4_t val) + { +- __builtin_aarch64_simd_oi __o; +- int32x4x2_t temp; +- temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0))); +- temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0))); +- __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1); +- __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o); ++ union { uint8x16x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; ++ __builtin_aarch64_st1x4v16qi ((__builtin_aarch64_simd_qi *) __a, __u.__o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst2_u8 (uint8_t * __a, uint8x8x2_t val) ++vst1_u16_x4 (uint16_t * __a, uint16x4x4_t val) + { +- __builtin_aarch64_simd_oi __o; +- uint8x16x2_t temp; +- temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0))); +- temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0))); +- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1); +- __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o); ++ union { uint16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; ++ __builtin_aarch64_st1x4v4hi ((__builtin_aarch64_simd_hi *) __a, __u.__o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst2_u16 (uint16_t * __a, uint16x4x2_t val) ++vst1q_u16_x4 (uint16_t * __a, uint16x8x4_t val) + { +- __builtin_aarch64_simd_oi __o; +- uint16x8x2_t temp; +- temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0))); +- temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0))); +- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1); +- __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o); ++ union { uint16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; ++ __builtin_aarch64_st1x4v8hi ((__builtin_aarch64_simd_hi *) __a, __u.__o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst2_u32 (uint32_t * __a, uint32x2x2_t val) ++vst1_u32_x4 (uint32_t * __a, uint32x2x4_t val) + { +- __builtin_aarch64_simd_oi __o; +- uint32x4x2_t temp; +- temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0))); +- temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0))); +- __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1); +- __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o); ++ union { uint32x2x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; ++ __builtin_aarch64_st1x4v2si ((__builtin_aarch64_simd_si *) __a, __u.__o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst2_f16 (float16_t * __a, float16x4x2_t val) ++vst1q_u32_x4 (uint32_t * __a, uint32x4x4_t val) + { +- __builtin_aarch64_simd_oi __o; +- float16x8x2_t temp; +- temp.val[0] = vcombine_f16 (val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0))); +- temp.val[1] = vcombine_f16 (val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0))); +- __o = __builtin_aarch64_set_qregoiv8hf (__o, temp.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv8hf (__o, temp.val[1], 1); +- __builtin_aarch64_st2v4hf (__a, __o); ++ union { uint32x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; ++ __builtin_aarch64_st1x4v4si ((__builtin_aarch64_simd_si *) __a, __u.__o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst2_f32 (float32_t * __a, float32x2x2_t val) ++vst1_f16_x4 (float16_t * __a, float16x4x4_t val) + { +- __builtin_aarch64_simd_oi __o; +- float32x4x2_t temp; +- temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0))); +- temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0))); +- __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[1], 1); +- __builtin_aarch64_st2v2sf ((__builtin_aarch64_simd_sf *) __a, __o); ++ union { float16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; ++ __builtin_aarch64_st1x4v4hf ((__builtin_aarch64_simd_hf *) __a, __u.__o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst2_p64 (poly64_t * __a, poly64x1x2_t val) ++vst1q_f16_x4 (float16_t * __a, float16x8x4_t val) + { +- __builtin_aarch64_simd_oi __o; +- poly64x2x2_t temp; +- temp.val[0] = vcombine_p64 (val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0))); +- temp.val[1] = vcombine_p64 (val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0))); +- __o = __builtin_aarch64_set_qregoiv2di_ssps (__o, +- (poly64x2_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv2di_ssps (__o, +- (poly64x2_t) temp.val[1], 1); +- __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o); ++ union { float16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; ++ __builtin_aarch64_st1x4v8hf ((__builtin_aarch64_simd_hf *) __a, __u.__o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst2q_s8 (int8_t * __a, int8x16x2_t val) ++vst1_f32_x4 (float32_t * __a, float32x2x4_t val) + { +- __builtin_aarch64_simd_oi __o; +- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1); +- __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o); ++ union { float32x2x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; ++ __builtin_aarch64_st1x4v2sf ((__builtin_aarch64_simd_sf *) __a, __u.__o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst2q_p8 (poly8_t * __a, poly8x16x2_t val) ++vst1q_f32_x4 (float32_t * __a, float32x4x4_t val) + { +- __builtin_aarch64_simd_oi __o; +- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1); +- __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o); ++ union { float32x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; ++ __builtin_aarch64_st1x4v4sf ((__builtin_aarch64_simd_sf *) __a, __u.__o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst2q_s16 (int16_t * __a, int16x8x2_t val) ++vst1_p8_x4 (poly8_t * __a, poly8x8x4_t val) + { +- __builtin_aarch64_simd_oi __o; +- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1); +- __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o); ++ union { poly8x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; ++ __builtin_aarch64_st1x4v8qi ((__builtin_aarch64_simd_qi *) __a, __u.__o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst2q_p16 (poly16_t * __a, poly16x8x2_t val) ++vst1q_p8_x4 (poly8_t * __a, poly8x16x4_t val) + { +- __builtin_aarch64_simd_oi __o; +- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1); +- __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o); ++ union { poly8x16x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; ++ __builtin_aarch64_st1x4v16qi ((__builtin_aarch64_simd_qi *) __a, __u.__o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst2q_s32 (int32_t * __a, int32x4x2_t val) ++vst1_p16_x4 (poly16_t * __a, poly16x4x4_t val) + { +- __builtin_aarch64_simd_oi __o; +- __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1); +- __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o); ++ union { poly16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; ++ __builtin_aarch64_st1x4v4hi ((__builtin_aarch64_simd_hi *) __a, __u.__o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst2q_s64 (int64_t * __a, int64x2x2_t val) ++vst1q_p16_x4 (poly16_t * __a, poly16x8x4_t val) + { +- __builtin_aarch64_simd_oi __o; +- __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1); +- __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o); ++ union { poly16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; ++ __builtin_aarch64_st1x4v8hi ((__builtin_aarch64_simd_hi *) __a, __u.__o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst2q_u8 (uint8_t * __a, uint8x16x2_t val) ++vst1_s64_x4 (int64_t * __a, int64x1x4_t val) + { +- __builtin_aarch64_simd_oi __o; +- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1); +- __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o); ++ union { int64x1x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; ++ __builtin_aarch64_st1x4di ((__builtin_aarch64_simd_di *) __a, __u.__o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst2q_u16 (uint16_t * __a, uint16x8x2_t val) ++vst1_u64_x4 (uint64_t * __a, uint64x1x4_t val) + { +- __builtin_aarch64_simd_oi __o; +- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1); +- __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o); ++ union { uint64x1x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; ++ __builtin_aarch64_st1x4di ((__builtin_aarch64_simd_di *) __a, __u.__o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst2q_u32 (uint32_t * __a, uint32x4x2_t val) ++vst1_p64_x4 (poly64_t * __a, poly64x1x4_t val) + { +- __builtin_aarch64_simd_oi __o; +- __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1); +- __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o); ++ union { poly64x1x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; ++ __builtin_aarch64_st1x4di ((__builtin_aarch64_simd_di *) __a, __u.__o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst2q_u64 (uint64_t * __a, uint64x2x2_t val) ++vst1q_s64_x4 (int64_t * __a, int64x2x4_t val) + { +- __builtin_aarch64_simd_oi __o; +- __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1); +- __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o); ++ union { int64x2x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; ++ __builtin_aarch64_st1x4v2di ((__builtin_aarch64_simd_di *) __a, __u.__o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst2q_f16 (float16_t * __a, float16x8x2_t val) ++vst1q_u64_x4 (uint64_t * __a, uint64x2x4_t val) + { +- __builtin_aarch64_simd_oi __o; +- __o = __builtin_aarch64_set_qregoiv8hf (__o, val.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv8hf (__o, val.val[1], 1); +- __builtin_aarch64_st2v8hf (__a, __o); ++ union { uint64x2x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; ++ __builtin_aarch64_st1x4v2di ((__builtin_aarch64_simd_di *) __a, __u.__o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst2q_f32 (float32_t * __a, float32x4x2_t val) ++vst1q_p64_x4 (poly64_t * __a, poly64x2x4_t val) + { +- __builtin_aarch64_simd_oi __o; +- __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[1], 1); +- __builtin_aarch64_st2v4sf ((__builtin_aarch64_simd_sf *) __a, __o); ++ union { poly64x2x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; ++ __builtin_aarch64_st1x4v2di ((__builtin_aarch64_simd_di *) __a, __u.__o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst2q_f64 (float64_t * __a, float64x2x2_t val) ++vst1_f64_x4 (float64_t * __a, float64x1x4_t val) + { +- __builtin_aarch64_simd_oi __o; +- __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[1], 1); +- __builtin_aarch64_st2v2df ((__builtin_aarch64_simd_df *) __a, __o); ++ union { float64x1x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; ++ __builtin_aarch64_st1x4df ((__builtin_aarch64_simd_df *) __a, __u.__o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst2q_p64 (poly64_t * __a, poly64x2x2_t val) ++vst1q_f64_x4 (float64_t * __a, float64x2x4_t val) + { +- __builtin_aarch64_simd_oi __o; +- __o = __builtin_aarch64_set_qregoiv2di_ssps (__o, +- (poly64x2_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregoiv2di_ssps (__o, +- (poly64x2_t) val.val[1], 1); +- __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o); ++ union { float64x2x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; ++ __builtin_aarch64_st1x4v2df ((__builtin_aarch64_simd_df *) __a, __u.__o); + } + ++/* vstn */ ++ + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst3_s64 (int64_t * __a, int64x1x3_t val) ++vst2_s64 (int64_t * __a, int64x1x2_t __val) + { +- __builtin_aarch64_simd_ci __o; +- int64x2x3_t temp; +- temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0))); +- temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0))); +- temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0))); +- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1); +- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2); +- __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o); ++ __builtin_aarch64_simd_oi __o; ++ int64x2x2_t __temp; ++ __temp.val[0] = vcombine_s64 (__val.val[0], vcreate_s64 (__AARCH64_INT64_C (0))); ++ __temp.val[1] = vcombine_s64 (__val.val[1], vcreate_s64 (__AARCH64_INT64_C (0))); ++ __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __temp.val[1], 1); ++ __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst3_u64 (uint64_t * __a, uint64x1x3_t val) ++vst2_u64 (uint64_t * __a, uint64x1x2_t __val) + { +- __builtin_aarch64_simd_ci __o; +- uint64x2x3_t temp; +- temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0))); +- temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0))); +- temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0))); +- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1); +- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2); +- __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o); ++ __builtin_aarch64_simd_oi __o; ++ uint64x2x2_t __temp; ++ __temp.val[0] = vcombine_u64 (__val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] = vcombine_u64 (__val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0))); ++ __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __temp.val[1], 1); ++ __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o); ++} ++ ++__extension__ extern __inline void ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vst2_f64 (float64_t * __a, float64x1x2_t __val) ++{ ++ __builtin_aarch64_simd_oi __o; ++ float64x2x2_t __temp; ++ __temp.val[0] = vcombine_f64 (__val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] = vcombine_f64 (__val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0))); ++ __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) __temp.val[1], 1); ++ __builtin_aarch64_st2df ((__builtin_aarch64_simd_df *) __a, __o); ++} ++ ++__extension__ extern __inline void ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vst2_s8 (int8_t * __a, int8x8x2_t __val) ++{ ++ __builtin_aarch64_simd_oi __o; ++ int8x16x2_t __temp; ++ __temp.val[0] = vcombine_s8 (__val.val[0], vcreate_s8 (__AARCH64_INT64_C (0))); ++ __temp.val[1] = vcombine_s8 (__val.val[1], vcreate_s8 (__AARCH64_INT64_C (0))); ++ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[1], 1); ++ __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o); ++} ++ ++__extension__ extern __inline void ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vst2_p8 (poly8_t * __a, poly8x8x2_t __val) ++{ ++ __builtin_aarch64_simd_oi __o; ++ poly8x16x2_t __temp; ++ __temp.val[0] = vcombine_p8 (__val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] = vcombine_p8 (__val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0))); ++ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[1], 1); ++ __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o); ++} ++ ++__extension__ extern __inline void ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vst2_s16 (int16_t * __a, int16x4x2_t __val) ++{ ++ __builtin_aarch64_simd_oi __o; ++ int16x8x2_t __temp; ++ __temp.val[0] = vcombine_s16 (__val.val[0], vcreate_s16 (__AARCH64_INT64_C (0))); ++ __temp.val[1] = vcombine_s16 (__val.val[1], vcreate_s16 (__AARCH64_INT64_C (0))); ++ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[1], 1); ++ __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o); ++} ++ ++__extension__ extern __inline void ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vst2_p16 (poly16_t * __a, poly16x4x2_t __val) ++{ ++ __builtin_aarch64_simd_oi __o; ++ poly16x8x2_t __temp; ++ __temp.val[0] = vcombine_p16 (__val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] = vcombine_p16 (__val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0))); ++ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[1], 1); ++ __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o); ++} ++ ++__extension__ extern __inline void ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vst2_s32 (int32_t * __a, int32x2x2_t __val) ++{ ++ __builtin_aarch64_simd_oi __o; ++ int32x4x2_t __temp; ++ __temp.val[0] = vcombine_s32 (__val.val[0], vcreate_s32 (__AARCH64_INT64_C (0))); ++ __temp.val[1] = vcombine_s32 (__val.val[1], vcreate_s32 (__AARCH64_INT64_C (0))); ++ __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __temp.val[1], 1); ++ __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o); ++} ++ ++__extension__ extern __inline void ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vst2_u8 (uint8_t * __a, uint8x8x2_t __val) ++{ ++ __builtin_aarch64_simd_oi __o; ++ uint8x16x2_t __temp; ++ __temp.val[0] = vcombine_u8 (__val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] = vcombine_u8 (__val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0))); ++ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[1], 1); ++ __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o); ++} ++ ++__extension__ extern __inline void ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vst2_u16 (uint16_t * __a, uint16x4x2_t __val) ++{ ++ __builtin_aarch64_simd_oi __o; ++ uint16x8x2_t __temp; ++ __temp.val[0] = vcombine_u16 (__val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] = vcombine_u16 (__val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0))); ++ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[1], 1); ++ __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o); ++} ++ ++__extension__ extern __inline void ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vst2_u32 (uint32_t * __a, uint32x2x2_t __val) ++{ ++ __builtin_aarch64_simd_oi __o; ++ uint32x4x2_t __temp; ++ __temp.val[0] = vcombine_u32 (__val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] = vcombine_u32 (__val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0))); ++ __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __temp.val[1], 1); ++ __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o); ++} ++ ++__extension__ extern __inline void ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vst2_f16 (float16_t * __a, float16x4x2_t __val) ++{ ++ __builtin_aarch64_simd_oi __o; ++ float16x8x2_t __temp; ++ __temp.val[0] = vcombine_f16 (__val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] = vcombine_f16 (__val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0))); ++ __o = __builtin_aarch64_set_qregoiv8hf (__o, __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv8hf (__o, __temp.val[1], 1); ++ __builtin_aarch64_st2v4hf (__a, __o); ++} ++ ++__extension__ extern __inline void ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vst2_f32 (float32_t * __a, float32x2x2_t __val) ++{ ++ __builtin_aarch64_simd_oi __o; ++ float32x4x2_t __temp; ++ __temp.val[0] = vcombine_f32 (__val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] = vcombine_f32 (__val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0))); ++ __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) __temp.val[1], 1); ++ __builtin_aarch64_st2v2sf ((__builtin_aarch64_simd_sf *) __a, __o); ++} ++ ++__extension__ extern __inline void ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vst2_p64 (poly64_t * __a, poly64x1x2_t __val) ++{ ++ __builtin_aarch64_simd_oi __o; ++ poly64x2x2_t __temp; ++ __temp.val[0] = vcombine_p64 (__val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] = vcombine_p64 (__val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0))); ++ __o = __builtin_aarch64_set_qregoiv2di_ssps (__o, ++ (poly64x2_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv2di_ssps (__o, ++ (poly64x2_t) __temp.val[1], 1); ++ __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o); ++} ++ ++__extension__ extern __inline void ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vst2q_s8 (int8_t * __a, int8x16x2_t __val) ++{ ++ __builtin_aarch64_simd_oi __o; ++ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[1], 1); ++ __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o); ++} ++ ++__extension__ extern __inline void ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vst2q_p8 (poly8_t * __a, poly8x16x2_t __val) ++{ ++ __builtin_aarch64_simd_oi __o; ++ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[1], 1); ++ __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o); ++} ++ ++__extension__ extern __inline void ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vst2q_s16 (int16_t * __a, int16x8x2_t __val) ++{ ++ __builtin_aarch64_simd_oi __o; ++ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[1], 1); ++ __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o); ++} ++ ++__extension__ extern __inline void ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vst2q_p16 (poly16_t * __a, poly16x8x2_t __val) ++{ ++ __builtin_aarch64_simd_oi __o; ++ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[1], 1); ++ __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o); ++} ++ ++__extension__ extern __inline void ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vst2q_s32 (int32_t * __a, int32x4x2_t __val) ++{ ++ __builtin_aarch64_simd_oi __o; ++ __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __val.val[1], 1); ++ __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o); ++} ++ ++__extension__ extern __inline void ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vst2q_s64 (int64_t * __a, int64x2x2_t __val) ++{ ++ __builtin_aarch64_simd_oi __o; ++ __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __val.val[1], 1); ++ __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o); ++} ++ ++__extension__ extern __inline void ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vst2q_u8 (uint8_t * __a, uint8x16x2_t __val) ++{ ++ __builtin_aarch64_simd_oi __o; ++ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[1], 1); ++ __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o); ++} ++ ++__extension__ extern __inline void ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vst2q_u16 (uint16_t * __a, uint16x8x2_t __val) ++{ ++ __builtin_aarch64_simd_oi __o; ++ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[1], 1); ++ __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o); ++} ++ ++__extension__ extern __inline void ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vst2q_u32 (uint32_t * __a, uint32x4x2_t __val) ++{ ++ __builtin_aarch64_simd_oi __o; ++ __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __val.val[1], 1); ++ __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o); ++} ++ ++__extension__ extern __inline void ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vst2q_u64 (uint64_t * __a, uint64x2x2_t __val) ++{ ++ __builtin_aarch64_simd_oi __o; ++ __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __val.val[1], 1); ++ __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o); ++} ++ ++__extension__ extern __inline void ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vst2q_f16 (float16_t * __a, float16x8x2_t __val) ++{ ++ __builtin_aarch64_simd_oi __o; ++ __o = __builtin_aarch64_set_qregoiv8hf (__o, __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv8hf (__o, __val.val[1], 1); ++ __builtin_aarch64_st2v8hf (__a, __o); ++} ++ ++__extension__ extern __inline void ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vst2q_f32 (float32_t * __a, float32x4x2_t __val) ++{ ++ __builtin_aarch64_simd_oi __o; ++ __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) __val.val[1], 1); ++ __builtin_aarch64_st2v4sf ((__builtin_aarch64_simd_sf *) __a, __o); ++} ++ ++__extension__ extern __inline void ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vst2q_f64 (float64_t * __a, float64x2x2_t __val) ++{ ++ __builtin_aarch64_simd_oi __o; ++ __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) __val.val[1], 1); ++ __builtin_aarch64_st2v2df ((__builtin_aarch64_simd_df *) __a, __o); ++} ++ ++__extension__ extern __inline void ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vst2q_p64 (poly64_t * __a, poly64x2x2_t __val) ++{ ++ __builtin_aarch64_simd_oi __o; ++ __o = __builtin_aarch64_set_qregoiv2di_ssps (__o, ++ (poly64x2_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv2di_ssps (__o, ++ (poly64x2_t) __val.val[1], 1); ++ __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o); ++} ++ ++__extension__ extern __inline void ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vst3_s64 (int64_t * __a, int64x1x3_t __val) ++{ ++ __builtin_aarch64_simd_ci __o; ++ int64x2x3_t __temp; ++ __temp.val[0] = vcombine_s64 (__val.val[0], vcreate_s64 (__AARCH64_INT64_C (0))); ++ __temp.val[1] = vcombine_s64 (__val.val[1], vcreate_s64 (__AARCH64_INT64_C (0))); ++ __temp.val[2] = vcombine_s64 (__val.val[2], vcreate_s64 (__AARCH64_INT64_C (0))); ++ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[2], 2); ++ __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o); ++} ++ ++__extension__ extern __inline void ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vst3_u64 (uint64_t * __a, uint64x1x3_t __val) ++{ ++ __builtin_aarch64_simd_ci __o; ++ uint64x2x3_t __temp; ++ __temp.val[0] = vcombine_u64 (__val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] = vcombine_u64 (__val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0))); ++ __temp.val[2] = vcombine_u64 (__val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0))); ++ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[2], 2); ++ __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst3_f64 (float64_t * __a, float64x1x3_t val) ++vst3_f64 (float64_t * __a, float64x1x3_t __val) + { + __builtin_aarch64_simd_ci __o; +- float64x2x3_t temp; +- temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0))); +- temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0))); +- temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0))); +- __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[1], 1); +- __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[2], 2); ++ float64x2x3_t __temp; ++ __temp.val[0] = vcombine_f64 (__val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] = vcombine_f64 (__val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0))); ++ __temp.val[2] = vcombine_f64 (__val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0))); ++ __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __temp.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __temp.val[2], 2); + __builtin_aarch64_st3df ((__builtin_aarch64_simd_df *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst3_s8 (int8_t * __a, int8x8x3_t val) ++vst3_s8 (int8_t * __a, int8x8x3_t __val) + { + __builtin_aarch64_simd_ci __o; +- int8x16x3_t temp; +- temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0))); +- temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0))); +- temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0))); +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1); +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2); ++ int8x16x3_t __temp; ++ __temp.val[0] = vcombine_s8 (__val.val[0], vcreate_s8 (__AARCH64_INT64_C (0))); ++ __temp.val[1] = vcombine_s8 (__val.val[1], vcreate_s8 (__AARCH64_INT64_C (0))); ++ __temp.val[2] = vcombine_s8 (__val.val[2], vcreate_s8 (__AARCH64_INT64_C (0))); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[2], 2); + __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst3_p8 (poly8_t * __a, poly8x8x3_t val) ++vst3_p8 (poly8_t * __a, poly8x8x3_t __val) + { + __builtin_aarch64_simd_ci __o; +- poly8x16x3_t temp; +- temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0))); +- temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0))); +- temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0))); +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1); +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2); ++ poly8x16x3_t __temp; ++ __temp.val[0] = vcombine_p8 (__val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] = vcombine_p8 (__val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0))); ++ __temp.val[2] = vcombine_p8 (__val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0))); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[2], 2); + __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst3_s16 (int16_t * __a, int16x4x3_t val) ++vst3_s16 (int16_t * __a, int16x4x3_t __val) + { + __builtin_aarch64_simd_ci __o; +- int16x8x3_t temp; +- temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0))); +- temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0))); +- temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0))); +- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1); +- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2); ++ int16x8x3_t __temp; ++ __temp.val[0] = vcombine_s16 (__val.val[0], vcreate_s16 (__AARCH64_INT64_C (0))); ++ __temp.val[1] = vcombine_s16 (__val.val[1], vcreate_s16 (__AARCH64_INT64_C (0))); ++ __temp.val[2] = vcombine_s16 (__val.val[2], vcreate_s16 (__AARCH64_INT64_C (0))); ++ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[2], 2); + __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst3_p16 (poly16_t * __a, poly16x4x3_t val) ++vst3_p16 (poly16_t * __a, poly16x4x3_t __val) + { + __builtin_aarch64_simd_ci __o; +- poly16x8x3_t temp; +- temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0))); +- temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0))); +- temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0))); +- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1); +- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2); ++ poly16x8x3_t __temp; ++ __temp.val[0] = vcombine_p16 (__val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] = vcombine_p16 (__val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0))); ++ __temp.val[2] = vcombine_p16 (__val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0))); ++ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[2], 2); + __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst3_s32 (int32_t * __a, int32x2x3_t val) ++vst3_s32 (int32_t * __a, int32x2x3_t __val) + { + __builtin_aarch64_simd_ci __o; +- int32x4x3_t temp; +- temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0))); +- temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0))); +- temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0))); +- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1); +- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2); ++ int32x4x3_t __temp; ++ __temp.val[0] = vcombine_s32 (__val.val[0], vcreate_s32 (__AARCH64_INT64_C (0))); ++ __temp.val[1] = vcombine_s32 (__val.val[1], vcreate_s32 (__AARCH64_INT64_C (0))); ++ __temp.val[2] = vcombine_s32 (__val.val[2], vcreate_s32 (__AARCH64_INT64_C (0))); ++ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[2], 2); + __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst3_u8 (uint8_t * __a, uint8x8x3_t val) ++vst3_u8 (uint8_t * __a, uint8x8x3_t __val) + { + __builtin_aarch64_simd_ci __o; +- uint8x16x3_t temp; +- temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0))); +- temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0))); +- temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0))); +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1); +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2); ++ uint8x16x3_t __temp; ++ __temp.val[0] = vcombine_u8 (__val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] = vcombine_u8 (__val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0))); ++ __temp.val[2] = vcombine_u8 (__val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0))); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[2], 2); + __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst3_u16 (uint16_t * __a, uint16x4x3_t val) ++vst3_u16 (uint16_t * __a, uint16x4x3_t __val) + { + __builtin_aarch64_simd_ci __o; +- uint16x8x3_t temp; +- temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0))); +- temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0))); +- temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0))); +- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1); +- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2); ++ uint16x8x3_t __temp; ++ __temp.val[0] = vcombine_u16 (__val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] = vcombine_u16 (__val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0))); ++ __temp.val[2] = vcombine_u16 (__val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0))); ++ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[2], 2); + __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst3_u32 (uint32_t * __a, uint32x2x3_t val) ++vst3_u32 (uint32_t * __a, uint32x2x3_t __val) + { + __builtin_aarch64_simd_ci __o; +- uint32x4x3_t temp; +- temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0))); +- temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0))); +- temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0))); +- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1); +- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2); ++ uint32x4x3_t __temp; ++ __temp.val[0] = vcombine_u32 (__val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] = vcombine_u32 (__val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0))); ++ __temp.val[2] = vcombine_u32 (__val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0))); ++ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[2], 2); + __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst3_f16 (float16_t * __a, float16x4x3_t val) ++vst3_f16 (float16_t * __a, float16x4x3_t __val) + { + __builtin_aarch64_simd_ci __o; +- float16x8x3_t temp; +- temp.val[0] = vcombine_f16 (val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0))); +- temp.val[1] = vcombine_f16 (val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0))); +- temp.val[2] = vcombine_f16 (val.val[2], vcreate_f16 (__AARCH64_UINT64_C (0))); +- __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) temp.val[1], 1); +- __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) temp.val[2], 2); ++ float16x8x3_t __temp; ++ __temp.val[0] = vcombine_f16 (__val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] = vcombine_f16 (__val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0))); ++ __temp.val[2] = vcombine_f16 (__val.val[2], vcreate_f16 (__AARCH64_UINT64_C (0))); ++ __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __temp.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __temp.val[2], 2); + __builtin_aarch64_st3v4hf ((__builtin_aarch64_simd_hf *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst3_f32 (float32_t * __a, float32x2x3_t val) ++vst3_f32 (float32_t * __a, float32x2x3_t __val) + { + __builtin_aarch64_simd_ci __o; +- float32x4x3_t temp; +- temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0))); +- temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0))); +- temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0))); +- __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[1], 1); +- __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[2], 2); ++ float32x4x3_t __temp; ++ __temp.val[0] = vcombine_f32 (__val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] = vcombine_f32 (__val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0))); ++ __temp.val[2] = vcombine_f32 (__val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0))); ++ __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __temp.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __temp.val[2], 2); + __builtin_aarch64_st3v2sf ((__builtin_aarch64_simd_sf *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst3_p64 (poly64_t * __a, poly64x1x3_t val) ++vst3_p64 (poly64_t * __a, poly64x1x3_t __val) + { + __builtin_aarch64_simd_ci __o; +- poly64x2x3_t temp; +- temp.val[0] = vcombine_p64 (val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0))); +- temp.val[1] = vcombine_p64 (val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0))); +- temp.val[2] = vcombine_p64 (val.val[2], vcreate_p64 (__AARCH64_UINT64_C (0))); ++ poly64x2x3_t __temp; ++ __temp.val[0] = vcombine_p64 (__val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] = vcombine_p64 (__val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0))); ++ __temp.val[2] = vcombine_p64 (__val.val[2], vcreate_p64 (__AARCH64_UINT64_C (0))); + __o = __builtin_aarch64_set_qregciv2di_ssps (__o, +- (poly64x2_t) temp.val[0], 0); ++ (poly64x2_t) __temp.val[0], 0); + __o = __builtin_aarch64_set_qregciv2di_ssps (__o, +- (poly64x2_t) temp.val[1], 1); ++ (poly64x2_t) __temp.val[1], 1); + __o = __builtin_aarch64_set_qregciv2di_ssps (__o, +- (poly64x2_t) temp.val[2], 2); ++ (poly64x2_t) __temp.val[2], 2); + __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst3q_s8 (int8_t * __a, int8x16x3_t val) ++vst3q_s8 (int8_t * __a, int8x16x3_t __val) + { + __builtin_aarch64_simd_ci __o; +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1); +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[2], 2); + __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst3q_p8 (poly8_t * __a, poly8x16x3_t val) ++vst3q_p8 (poly8_t * __a, poly8x16x3_t __val) + { + __builtin_aarch64_simd_ci __o; +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1); +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[2], 2); + __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst3q_s16 (int16_t * __a, int16x8x3_t val) ++vst3q_s16 (int16_t * __a, int16x8x3_t __val) + { + __builtin_aarch64_simd_ci __o; +- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1); +- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2); ++ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[2], 2); + __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst3q_p16 (poly16_t * __a, poly16x8x3_t val) ++vst3q_p16 (poly16_t * __a, poly16x8x3_t __val) + { + __builtin_aarch64_simd_ci __o; +- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1); +- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2); ++ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[2], 2); + __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst3q_s32 (int32_t * __a, int32x4x3_t val) ++vst3q_s32 (int32_t * __a, int32x4x3_t __val) + { + __builtin_aarch64_simd_ci __o; +- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1); +- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2); ++ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[2], 2); + __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst3q_s64 (int64_t * __a, int64x2x3_t val) ++vst3q_s64 (int64_t * __a, int64x2x3_t __val) + { + __builtin_aarch64_simd_ci __o; +- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1); +- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2); ++ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[2], 2); + __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst3q_u8 (uint8_t * __a, uint8x16x3_t val) ++vst3q_u8 (uint8_t * __a, uint8x16x3_t __val) + { + __builtin_aarch64_simd_ci __o; +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1); +- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[2], 2); + __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst3q_u16 (uint16_t * __a, uint16x8x3_t val) ++vst3q_u16 (uint16_t * __a, uint16x8x3_t __val) + { + __builtin_aarch64_simd_ci __o; +- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1); +- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2); ++ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[2], 2); + __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst3q_u32 (uint32_t * __a, uint32x4x3_t val) ++vst3q_u32 (uint32_t * __a, uint32x4x3_t __val) + { + __builtin_aarch64_simd_ci __o; +- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1); +- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2); ++ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[2], 2); + __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst3q_u64 (uint64_t * __a, uint64x2x3_t val) ++vst3q_u64 (uint64_t * __a, uint64x2x3_t __val) + { + __builtin_aarch64_simd_ci __o; +- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1); +- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2); ++ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[2], 2); + __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst3q_f16 (float16_t * __a, float16x8x3_t val) ++vst3q_f16 (float16_t * __a, float16x8x3_t __val) + { + __builtin_aarch64_simd_ci __o; +- __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) val.val[1], 1); +- __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) val.val[2], 2); ++ __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __val.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __val.val[2], 2); + __builtin_aarch64_st3v8hf ((__builtin_aarch64_simd_hf *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst3q_f32 (float32_t * __a, float32x4x3_t val) ++vst3q_f32 (float32_t * __a, float32x4x3_t __val) + { + __builtin_aarch64_simd_ci __o; +- __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[1], 1); +- __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[2], 2); ++ __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __val.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __val.val[2], 2); + __builtin_aarch64_st3v4sf ((__builtin_aarch64_simd_sf *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst3q_f64 (float64_t * __a, float64x2x3_t val) ++vst3q_f64 (float64_t * __a, float64x2x3_t __val) + { + __builtin_aarch64_simd_ci __o; +- __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[1], 1); +- __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[2], 2); ++ __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __val.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __val.val[2], 2); + __builtin_aarch64_st3v2df ((__builtin_aarch64_simd_df *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst3q_p64 (poly64_t * __a, poly64x2x3_t val) ++vst3q_p64 (poly64_t * __a, poly64x2x3_t __val) + { + __builtin_aarch64_simd_ci __o; + __o = __builtin_aarch64_set_qregciv2di_ssps (__o, +- (poly64x2_t) val.val[0], 0); ++ (poly64x2_t) __val.val[0], 0); + __o = __builtin_aarch64_set_qregciv2di_ssps (__o, +- (poly64x2_t) val.val[1], 1); ++ (poly64x2_t) __val.val[1], 1); + __o = __builtin_aarch64_set_qregciv2di_ssps (__o, +- (poly64x2_t) val.val[2], 2); ++ (poly64x2_t) __val.val[2], 2); + __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst4_s64 (int64_t * __a, int64x1x4_t val) ++vst4_s64 (int64_t * __a, int64x1x4_t __val) + { + __builtin_aarch64_simd_xi __o; +- int64x2x4_t temp; +- temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0))); +- temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0))); +- temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0))); +- temp.val[3] = vcombine_s64 (val.val[3], vcreate_s64 (__AARCH64_INT64_C (0))); +- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1); +- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2); +- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3); ++ int64x2x4_t __temp; ++ __temp.val[0] = vcombine_s64 (__val.val[0], vcreate_s64 (__AARCH64_INT64_C (0))); ++ __temp.val[1] = vcombine_s64 (__val.val[1], vcreate_s64 (__AARCH64_INT64_C (0))); ++ __temp.val[2] = vcombine_s64 (__val.val[2], vcreate_s64 (__AARCH64_INT64_C (0))); ++ __temp.val[3] = vcombine_s64 (__val.val[3], vcreate_s64 (__AARCH64_INT64_C (0))); ++ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[1], 1); ++ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[2], 2); ++ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[3], 3); + __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst4_u64 (uint64_t * __a, uint64x1x4_t val) ++vst4_u64 (uint64_t * __a, uint64x1x4_t __val) + { + __builtin_aarch64_simd_xi __o; +- uint64x2x4_t temp; +- temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0))); +- temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0))); +- temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0))); +- temp.val[3] = vcombine_u64 (val.val[3], vcreate_u64 (__AARCH64_UINT64_C (0))); +- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1); +- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2); +- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3); ++ uint64x2x4_t __temp; ++ __temp.val[0] = vcombine_u64 (__val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] = vcombine_u64 (__val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0))); ++ __temp.val[2] = vcombine_u64 (__val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0))); ++ __temp.val[3] = vcombine_u64 (__val.val[3], vcreate_u64 (__AARCH64_UINT64_C (0))); ++ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[1], 1); ++ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[2], 2); ++ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[3], 3); + __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst4_f64 (float64_t * __a, float64x1x4_t val) ++vst4_f64 (float64_t * __a, float64x1x4_t __val) + { + __builtin_aarch64_simd_xi __o; +- float64x2x4_t temp; +- temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0))); +- temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0))); +- temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0))); +- temp.val[3] = vcombine_f64 (val.val[3], vcreate_f64 (__AARCH64_UINT64_C (0))); +- __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[1], 1); +- __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[2], 2); +- __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[3], 3); ++ float64x2x4_t __temp; ++ __temp.val[0] = vcombine_f64 (__val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] = vcombine_f64 (__val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0))); ++ __temp.val[2] = vcombine_f64 (__val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0))); ++ __temp.val[3] = vcombine_f64 (__val.val[3], vcreate_f64 (__AARCH64_UINT64_C (0))); ++ __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __temp.val[1], 1); ++ __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __temp.val[2], 2); ++ __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __temp.val[3], 3); + __builtin_aarch64_st4df ((__builtin_aarch64_simd_df *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst4_s8 (int8_t * __a, int8x8x4_t val) ++vst4_s8 (int8_t * __a, int8x8x4_t __val) + { + __builtin_aarch64_simd_xi __o; +- int8x16x4_t temp; +- temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0))); +- temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0))); +- temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0))); +- temp.val[3] = vcombine_s8 (val.val[3], vcreate_s8 (__AARCH64_INT64_C (0))); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3); ++ int8x16x4_t __temp; ++ __temp.val[0] = vcombine_s8 (__val.val[0], vcreate_s8 (__AARCH64_INT64_C (0))); ++ __temp.val[1] = vcombine_s8 (__val.val[1], vcreate_s8 (__AARCH64_INT64_C (0))); ++ __temp.val[2] = vcombine_s8 (__val.val[2], vcreate_s8 (__AARCH64_INT64_C (0))); ++ __temp.val[3] = vcombine_s8 (__val.val[3], vcreate_s8 (__AARCH64_INT64_C (0))); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[1], 1); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[2], 2); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[3], 3); + __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst4_p8 (poly8_t * __a, poly8x8x4_t val) ++vst4_p8 (poly8_t * __a, poly8x8x4_t __val) + { + __builtin_aarch64_simd_xi __o; +- poly8x16x4_t temp; +- temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0))); +- temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0))); +- temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0))); +- temp.val[3] = vcombine_p8 (val.val[3], vcreate_p8 (__AARCH64_UINT64_C (0))); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3); ++ poly8x16x4_t __temp; ++ __temp.val[0] = vcombine_p8 (__val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] = vcombine_p8 (__val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0))); ++ __temp.val[2] = vcombine_p8 (__val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0))); ++ __temp.val[3] = vcombine_p8 (__val.val[3], vcreate_p8 (__AARCH64_UINT64_C (0))); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[1], 1); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[2], 2); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[3], 3); + __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst4_s16 (int16_t * __a, int16x4x4_t val) ++vst4_s16 (int16_t * __a, int16x4x4_t __val) + { + __builtin_aarch64_simd_xi __o; +- int16x8x4_t temp; +- temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0))); +- temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0))); +- temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0))); +- temp.val[3] = vcombine_s16 (val.val[3], vcreate_s16 (__AARCH64_INT64_C (0))); +- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1); +- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2); +- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3); ++ int16x8x4_t __temp; ++ __temp.val[0] = vcombine_s16 (__val.val[0], vcreate_s16 (__AARCH64_INT64_C (0))); ++ __temp.val[1] = vcombine_s16 (__val.val[1], vcreate_s16 (__AARCH64_INT64_C (0))); ++ __temp.val[2] = vcombine_s16 (__val.val[2], vcreate_s16 (__AARCH64_INT64_C (0))); ++ __temp.val[3] = vcombine_s16 (__val.val[3], vcreate_s16 (__AARCH64_INT64_C (0))); ++ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[1], 1); ++ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[2], 2); ++ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[3], 3); + __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst4_p16 (poly16_t * __a, poly16x4x4_t val) ++vst4_p16 (poly16_t * __a, poly16x4x4_t __val) + { + __builtin_aarch64_simd_xi __o; +- poly16x8x4_t temp; +- temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0))); +- temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0))); +- temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0))); +- temp.val[3] = vcombine_p16 (val.val[3], vcreate_p16 (__AARCH64_UINT64_C (0))); +- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1); +- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2); +- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3); ++ poly16x8x4_t __temp; ++ __temp.val[0] = vcombine_p16 (__val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] = vcombine_p16 (__val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0))); ++ __temp.val[2] = vcombine_p16 (__val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0))); ++ __temp.val[3] = vcombine_p16 (__val.val[3], vcreate_p16 (__AARCH64_UINT64_C (0))); ++ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[1], 1); ++ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[2], 2); ++ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[3], 3); + __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst4_s32 (int32_t * __a, int32x2x4_t val) ++vst4_s32 (int32_t * __a, int32x2x4_t __val) + { + __builtin_aarch64_simd_xi __o; +- int32x4x4_t temp; +- temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0))); +- temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0))); +- temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0))); +- temp.val[3] = vcombine_s32 (val.val[3], vcreate_s32 (__AARCH64_INT64_C (0))); +- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1); +- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2); +- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3); ++ int32x4x4_t __temp; ++ __temp.val[0] = vcombine_s32 (__val.val[0], vcreate_s32 (__AARCH64_INT64_C (0))); ++ __temp.val[1] = vcombine_s32 (__val.val[1], vcreate_s32 (__AARCH64_INT64_C (0))); ++ __temp.val[2] = vcombine_s32 (__val.val[2], vcreate_s32 (__AARCH64_INT64_C (0))); ++ __temp.val[3] = vcombine_s32 (__val.val[3], vcreate_s32 (__AARCH64_INT64_C (0))); ++ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[1], 1); ++ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[2], 2); ++ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[3], 3); + __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst4_u8 (uint8_t * __a, uint8x8x4_t val) ++vst4_u8 (uint8_t * __a, uint8x8x4_t __val) + { + __builtin_aarch64_simd_xi __o; +- uint8x16x4_t temp; +- temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0))); +- temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0))); +- temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0))); +- temp.val[3] = vcombine_u8 (val.val[3], vcreate_u8 (__AARCH64_UINT64_C (0))); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3); ++ uint8x16x4_t __temp; ++ __temp.val[0] = vcombine_u8 (__val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] = vcombine_u8 (__val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0))); ++ __temp.val[2] = vcombine_u8 (__val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0))); ++ __temp.val[3] = vcombine_u8 (__val.val[3], vcreate_u8 (__AARCH64_UINT64_C (0))); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[1], 1); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[2], 2); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[3], 3); + __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst4_u16 (uint16_t * __a, uint16x4x4_t val) ++vst4_u16 (uint16_t * __a, uint16x4x4_t __val) + { + __builtin_aarch64_simd_xi __o; +- uint16x8x4_t temp; +- temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0))); +- temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0))); +- temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0))); +- temp.val[3] = vcombine_u16 (val.val[3], vcreate_u16 (__AARCH64_UINT64_C (0))); +- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1); +- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2); +- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3); ++ uint16x8x4_t __temp; ++ __temp.val[0] = vcombine_u16 (__val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] = vcombine_u16 (__val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0))); ++ __temp.val[2] = vcombine_u16 (__val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0))); ++ __temp.val[3] = vcombine_u16 (__val.val[3], vcreate_u16 (__AARCH64_UINT64_C (0))); ++ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[1], 1); ++ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[2], 2); ++ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[3], 3); + __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst4_u32 (uint32_t * __a, uint32x2x4_t val) ++vst4_u32 (uint32_t * __a, uint32x2x4_t __val) + { + __builtin_aarch64_simd_xi __o; +- uint32x4x4_t temp; +- temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0))); +- temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0))); +- temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0))); +- temp.val[3] = vcombine_u32 (val.val[3], vcreate_u32 (__AARCH64_UINT64_C (0))); +- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1); +- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2); +- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3); ++ uint32x4x4_t __temp; ++ __temp.val[0] = vcombine_u32 (__val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] = vcombine_u32 (__val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0))); ++ __temp.val[2] = vcombine_u32 (__val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0))); ++ __temp.val[3] = vcombine_u32 (__val.val[3], vcreate_u32 (__AARCH64_UINT64_C (0))); ++ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[1], 1); ++ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[2], 2); ++ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[3], 3); + __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst4_f16 (float16_t * __a, float16x4x4_t val) ++vst4_f16 (float16_t * __a, float16x4x4_t __val) + { + __builtin_aarch64_simd_xi __o; +- float16x8x4_t temp; +- temp.val[0] = vcombine_f16 (val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0))); +- temp.val[1] = vcombine_f16 (val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0))); +- temp.val[2] = vcombine_f16 (val.val[2], vcreate_f16 (__AARCH64_UINT64_C (0))); +- temp.val[3] = vcombine_f16 (val.val[3], vcreate_f16 (__AARCH64_UINT64_C (0))); +- __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) temp.val[1], 1); +- __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) temp.val[2], 2); +- __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) temp.val[3], 3); ++ float16x8x4_t __temp; ++ __temp.val[0] = vcombine_f16 (__val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] = vcombine_f16 (__val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0))); ++ __temp.val[2] = vcombine_f16 (__val.val[2], vcreate_f16 (__AARCH64_UINT64_C (0))); ++ __temp.val[3] = vcombine_f16 (__val.val[3], vcreate_f16 (__AARCH64_UINT64_C (0))); ++ __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __temp.val[1], 1); ++ __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __temp.val[2], 2); ++ __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __temp.val[3], 3); + __builtin_aarch64_st4v4hf ((__builtin_aarch64_simd_hf *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst4_f32 (float32_t * __a, float32x2x4_t val) ++vst4_f32 (float32_t * __a, float32x2x4_t __val) + { + __builtin_aarch64_simd_xi __o; +- float32x4x4_t temp; +- temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0))); +- temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0))); +- temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0))); +- temp.val[3] = vcombine_f32 (val.val[3], vcreate_f32 (__AARCH64_UINT64_C (0))); +- __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[0], 0); +- __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[1], 1); +- __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[2], 2); +- __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[3], 3); ++ float32x4x4_t __temp; ++ __temp.val[0] = vcombine_f32 (__val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] = vcombine_f32 (__val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0))); ++ __temp.val[2] = vcombine_f32 (__val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0))); ++ __temp.val[3] = vcombine_f32 (__val.val[3], vcreate_f32 (__AARCH64_UINT64_C (0))); ++ __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __temp.val[1], 1); ++ __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __temp.val[2], 2); ++ __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __temp.val[3], 3); + __builtin_aarch64_st4v2sf ((__builtin_aarch64_simd_sf *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst4_p64 (poly64_t * __a, poly64x1x4_t val) ++vst4_p64 (poly64_t * __a, poly64x1x4_t __val) + { + __builtin_aarch64_simd_xi __o; +- poly64x2x4_t temp; +- temp.val[0] = vcombine_p64 (val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0))); +- temp.val[1] = vcombine_p64 (val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0))); +- temp.val[2] = vcombine_p64 (val.val[2], vcreate_p64 (__AARCH64_UINT64_C (0))); +- temp.val[3] = vcombine_p64 (val.val[3], vcreate_p64 (__AARCH64_UINT64_C (0))); ++ poly64x2x4_t __temp; ++ __temp.val[0] = vcombine_p64 (__val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] = vcombine_p64 (__val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0))); ++ __temp.val[2] = vcombine_p64 (__val.val[2], vcreate_p64 (__AARCH64_UINT64_C (0))); ++ __temp.val[3] = vcombine_p64 (__val.val[3], vcreate_p64 (__AARCH64_UINT64_C (0))); + __o = __builtin_aarch64_set_qregxiv2di_ssps (__o, +- (poly64x2_t) temp.val[0], 0); ++ (poly64x2_t) __temp.val[0], 0); + __o = __builtin_aarch64_set_qregxiv2di_ssps (__o, +- (poly64x2_t) temp.val[1], 1); ++ (poly64x2_t) __temp.val[1], 1); + __o = __builtin_aarch64_set_qregxiv2di_ssps (__o, +- (poly64x2_t) temp.val[2], 2); ++ (poly64x2_t) __temp.val[2], 2); + __o = __builtin_aarch64_set_qregxiv2di_ssps (__o, +- (poly64x2_t) temp.val[3], 3); ++ (poly64x2_t) __temp.val[3], 3); + __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst4q_s8 (int8_t * __a, int8x16x4_t val) ++vst4q_s8 (int8_t * __a, int8x16x4_t __val) + { + __builtin_aarch64_simd_xi __o; +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[1], 1); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[2], 2); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[3], 3); + __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst4q_p8 (poly8_t * __a, poly8x16x4_t val) ++vst4q_p8 (poly8_t * __a, poly8x16x4_t __val) + { + __builtin_aarch64_simd_xi __o; +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[1], 1); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[2], 2); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[3], 3); + __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst4q_s16 (int16_t * __a, int16x8x4_t val) ++vst4q_s16 (int16_t * __a, int16x8x4_t __val) + { + __builtin_aarch64_simd_xi __o; +- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1); +- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2); +- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3); ++ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[1], 1); ++ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[2], 2); ++ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[3], 3); + __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst4q_p16 (poly16_t * __a, poly16x8x4_t val) ++vst4q_p16 (poly16_t * __a, poly16x8x4_t __val) + { + __builtin_aarch64_simd_xi __o; +- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1); +- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2); +- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3); ++ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[1], 1); ++ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[2], 2); ++ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[3], 3); + __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst4q_s32 (int32_t * __a, int32x4x4_t val) ++vst4q_s32 (int32_t * __a, int32x4x4_t __val) + { + __builtin_aarch64_simd_xi __o; +- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1); +- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2); +- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3); ++ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __val.val[1], 1); ++ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __val.val[2], 2); ++ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __val.val[3], 3); + __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst4q_s64 (int64_t * __a, int64x2x4_t val) ++vst4q_s64 (int64_t * __a, int64x2x4_t __val) + { + __builtin_aarch64_simd_xi __o; +- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1); +- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2); +- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3); ++ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __val.val[1], 1); ++ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __val.val[2], 2); ++ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __val.val[3], 3); + __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst4q_u8 (uint8_t * __a, uint8x16x4_t val) ++vst4q_u8 (uint8_t * __a, uint8x16x4_t __val) + { + __builtin_aarch64_simd_xi __o; +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2); +- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[1], 1); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[2], 2); ++ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[3], 3); + __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst4q_u16 (uint16_t * __a, uint16x8x4_t val) ++vst4q_u16 (uint16_t * __a, uint16x8x4_t __val) + { + __builtin_aarch64_simd_xi __o; +- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1); +- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2); +- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3); ++ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[1], 1); ++ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[2], 2); ++ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[3], 3); + __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst4q_u32 (uint32_t * __a, uint32x4x4_t val) ++vst4q_u32 (uint32_t * __a, uint32x4x4_t __val) + { + __builtin_aarch64_simd_xi __o; +- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1); +- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2); +- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3); ++ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __val.val[1], 1); ++ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __val.val[2], 2); ++ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __val.val[3], 3); + __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst4q_u64 (uint64_t * __a, uint64x2x4_t val) ++vst4q_u64 (uint64_t * __a, uint64x2x4_t __val) + { + __builtin_aarch64_simd_xi __o; +- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1); +- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2); +- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3); ++ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __val.val[1], 1); ++ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __val.val[2], 2); ++ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __val.val[3], 3); + __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst4q_f16 (float16_t * __a, float16x8x4_t val) ++vst4q_f16 (float16_t * __a, float16x8x4_t __val) + { + __builtin_aarch64_simd_xi __o; +- __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) val.val[1], 1); +- __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) val.val[2], 2); +- __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) val.val[3], 3); ++ __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __val.val[1], 1); ++ __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __val.val[2], 2); ++ __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __val.val[3], 3); + __builtin_aarch64_st4v8hf ((__builtin_aarch64_simd_hf *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst4q_f32 (float32_t * __a, float32x4x4_t val) ++vst4q_f32 (float32_t * __a, float32x4x4_t __val) + { + __builtin_aarch64_simd_xi __o; +- __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[1], 1); +- __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[2], 2); +- __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[3], 3); ++ __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __val.val[1], 1); ++ __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __val.val[2], 2); ++ __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __val.val[3], 3); + __builtin_aarch64_st4v4sf ((__builtin_aarch64_simd_sf *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst4q_f64 (float64_t * __a, float64x2x4_t val) ++vst4q_f64 (float64_t * __a, float64x2x4_t __val) + { + __builtin_aarch64_simd_xi __o; +- __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[0], 0); +- __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[1], 1); +- __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[2], 2); +- __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[3], 3); ++ __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __val.val[1], 1); ++ __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __val.val[2], 2); ++ __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __val.val[3], 3); + __builtin_aarch64_st4v2df ((__builtin_aarch64_simd_df *) __a, __o); + } + + __extension__ extern __inline void + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vst4q_p64 (poly64_t * __a, poly64x2x4_t val) ++vst4q_p64 (poly64_t * __a, poly64x2x4_t __val) + { + __builtin_aarch64_simd_xi __o; + __o = __builtin_aarch64_set_qregxiv2di_ssps (__o, +- (poly64x2_t) val.val[0], 0); ++ (poly64x2_t) __val.val[0], 0); + __o = __builtin_aarch64_set_qregxiv2di_ssps (__o, +- (poly64x2_t) val.val[1], 1); ++ (poly64x2_t) __val.val[1], 1); + __o = __builtin_aarch64_set_qregxiv2di_ssps (__o, +- (poly64x2_t) val.val[2], 2); ++ (poly64x2_t) __val.val[2], 2); + __o = __builtin_aarch64_set_qregxiv2di_ssps (__o, +- (poly64x2_t) val.val[3], 3); ++ (poly64x2_t) __val.val[3], 3); + __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o); + } + +@@ -29796,53 +30356,53 @@ __extension__ extern __inline int8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) + vtbx4_s8 (int8x8_t __r, int8x8x4_t __tab, int8x8_t __idx) + { +- int8x8_t result; +- int8x16x2_t temp; ++ int8x8_t __result; ++ int8x16x2_t __temp; + __builtin_aarch64_simd_oi __o; +- temp.val[0] = vcombine_s8 (__tab.val[0], __tab.val[1]); +- temp.val[1] = vcombine_s8 (__tab.val[2], __tab.val[3]); ++ __temp.val[0] = vcombine_s8 (__tab.val[0], __tab.val[1]); ++ __temp.val[1] = vcombine_s8 (__tab.val[2], __tab.val[3]); + __o = __builtin_aarch64_set_qregoiv16qi (__o, +- (int8x16_t) temp.val[0], 0); ++ (int8x16_t) __temp.val[0], 0); + __o = __builtin_aarch64_set_qregoiv16qi (__o, +- (int8x16_t) temp.val[1], 1); +- result = __builtin_aarch64_tbx4v8qi (__r, __o, __idx); +- return result; ++ (int8x16_t) __temp.val[1], 1); ++ __result = __builtin_aarch64_tbx4v8qi (__r, __o, __idx); ++ return __result; + } + + __extension__ extern __inline uint8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) + vtbx4_u8 (uint8x8_t __r, uint8x8x4_t __tab, uint8x8_t __idx) + { +- uint8x8_t result; +- uint8x16x2_t temp; ++ uint8x8_t __result; ++ uint8x16x2_t __temp; + __builtin_aarch64_simd_oi __o; +- temp.val[0] = vcombine_u8 (__tab.val[0], __tab.val[1]); +- temp.val[1] = vcombine_u8 (__tab.val[2], __tab.val[3]); ++ __temp.val[0] = vcombine_u8 (__tab.val[0], __tab.val[1]); ++ __temp.val[1] = vcombine_u8 (__tab.val[2], __tab.val[3]); + __o = __builtin_aarch64_set_qregoiv16qi (__o, +- (int8x16_t) temp.val[0], 0); ++ (int8x16_t) __temp.val[0], 0); + __o = __builtin_aarch64_set_qregoiv16qi (__o, +- (int8x16_t) temp.val[1], 1); +- result = (uint8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)__r, __o, ++ (int8x16_t) __temp.val[1], 1); ++ __result = (uint8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)__r, __o, + (int8x8_t)__idx); +- return result; ++ return __result; + } + + __extension__ extern __inline poly8x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) + vtbx4_p8 (poly8x8_t __r, poly8x8x4_t __tab, uint8x8_t __idx) + { +- poly8x8_t result; +- poly8x16x2_t temp; ++ poly8x8_t __result; ++ poly8x16x2_t __temp; + __builtin_aarch64_simd_oi __o; +- temp.val[0] = vcombine_p8 (__tab.val[0], __tab.val[1]); +- temp.val[1] = vcombine_p8 (__tab.val[2], __tab.val[3]); ++ __temp.val[0] = vcombine_p8 (__tab.val[0], __tab.val[1]); ++ __temp.val[1] = vcombine_p8 (__tab.val[2], __tab.val[3]); + __o = __builtin_aarch64_set_qregoiv16qi (__o, +- (int8x16_t) temp.val[0], 0); ++ (int8x16_t) __temp.val[0], 0); + __o = __builtin_aarch64_set_qregoiv16qi (__o, +- (int8x16_t) temp.val[1], 1); +- result = (poly8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)__r, __o, ++ (int8x16_t) __temp.val[1], 1); ++ __result = (poly8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)__r, __o, + (int8x8_t)__idx); +- return result; ++ return __result; + } + + /* vtrn */ +@@ -30374,65 +30934,65 @@ vtrn_f16 (float16x4_t __a, float16x4_t __b) + + __extension__ extern __inline float32x2x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vtrn_f32 (float32x2_t a, float32x2_t b) ++vtrn_f32 (float32x2_t __a, float32x2_t __b) + { +- return (float32x2x2_t) {vtrn1_f32 (a, b), vtrn2_f32 (a, b)}; ++ return (float32x2x2_t) {vtrn1_f32 (__a, __b), vtrn2_f32 (__a, __b)}; + } + + __extension__ extern __inline poly8x8x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vtrn_p8 (poly8x8_t a, poly8x8_t b) ++vtrn_p8 (poly8x8_t __a, poly8x8_t __b) + { +- return (poly8x8x2_t) {vtrn1_p8 (a, b), vtrn2_p8 (a, b)}; ++ return (poly8x8x2_t) {vtrn1_p8 (__a, __b), vtrn2_p8 (__a, __b)}; + } + + __extension__ extern __inline poly16x4x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vtrn_p16 (poly16x4_t a, poly16x4_t b) ++vtrn_p16 (poly16x4_t __a, poly16x4_t __b) + { +- return (poly16x4x2_t) {vtrn1_p16 (a, b), vtrn2_p16 (a, b)}; ++ return (poly16x4x2_t) {vtrn1_p16 (__a, __b), vtrn2_p16 (__a, __b)}; + } + + __extension__ extern __inline int8x8x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vtrn_s8 (int8x8_t a, int8x8_t b) ++vtrn_s8 (int8x8_t __a, int8x8_t __b) + { +- return (int8x8x2_t) {vtrn1_s8 (a, b), vtrn2_s8 (a, b)}; ++ return (int8x8x2_t) {vtrn1_s8 (__a, __b), vtrn2_s8 (__a, __b)}; + } + + __extension__ extern __inline int16x4x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vtrn_s16 (int16x4_t a, int16x4_t b) ++vtrn_s16 (int16x4_t __a, int16x4_t __b) + { +- return (int16x4x2_t) {vtrn1_s16 (a, b), vtrn2_s16 (a, b)}; ++ return (int16x4x2_t) {vtrn1_s16 (__a, __b), vtrn2_s16 (__a, __b)}; + } + + __extension__ extern __inline int32x2x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vtrn_s32 (int32x2_t a, int32x2_t b) ++vtrn_s32 (int32x2_t __a, int32x2_t __b) + { +- return (int32x2x2_t) {vtrn1_s32 (a, b), vtrn2_s32 (a, b)}; ++ return (int32x2x2_t) {vtrn1_s32 (__a, __b), vtrn2_s32 (__a, __b)}; + } + + __extension__ extern __inline uint8x8x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vtrn_u8 (uint8x8_t a, uint8x8_t b) ++vtrn_u8 (uint8x8_t __a, uint8x8_t __b) + { +- return (uint8x8x2_t) {vtrn1_u8 (a, b), vtrn2_u8 (a, b)}; ++ return (uint8x8x2_t) {vtrn1_u8 (__a, __b), vtrn2_u8 (__a, __b)}; + } + + __extension__ extern __inline uint16x4x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vtrn_u16 (uint16x4_t a, uint16x4_t b) ++vtrn_u16 (uint16x4_t __a, uint16x4_t __b) + { +- return (uint16x4x2_t) {vtrn1_u16 (a, b), vtrn2_u16 (a, b)}; ++ return (uint16x4x2_t) {vtrn1_u16 (__a, __b), vtrn2_u16 (__a, __b)}; + } + + __extension__ extern __inline uint32x2x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vtrn_u32 (uint32x2_t a, uint32x2_t b) ++vtrn_u32 (uint32x2_t __a, uint32x2_t __b) + { +- return (uint32x2x2_t) {vtrn1_u32 (a, b), vtrn2_u32 (a, b)}; ++ return (uint32x2x2_t) {vtrn1_u32 (__a, __b), vtrn2_u32 (__a, __b)}; + } + + __extension__ extern __inline float16x8x2_t +@@ -30444,65 +31004,65 @@ vtrnq_f16 (float16x8_t __a, float16x8_t __b) + + __extension__ extern __inline float32x4x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vtrnq_f32 (float32x4_t a, float32x4_t b) ++vtrnq_f32 (float32x4_t __a, float32x4_t __b) + { +- return (float32x4x2_t) {vtrn1q_f32 (a, b), vtrn2q_f32 (a, b)}; ++ return (float32x4x2_t) {vtrn1q_f32 (__a, __b), vtrn2q_f32 (__a, __b)}; + } + + __extension__ extern __inline poly8x16x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vtrnq_p8 (poly8x16_t a, poly8x16_t b) ++vtrnq_p8 (poly8x16_t __a, poly8x16_t __b) + { +- return (poly8x16x2_t) {vtrn1q_p8 (a, b), vtrn2q_p8 (a, b)}; ++ return (poly8x16x2_t) {vtrn1q_p8 (__a, __b), vtrn2q_p8 (__a, __b)}; + } + + __extension__ extern __inline poly16x8x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vtrnq_p16 (poly16x8_t a, poly16x8_t b) ++vtrnq_p16 (poly16x8_t __a, poly16x8_t __b) + { +- return (poly16x8x2_t) {vtrn1q_p16 (a, b), vtrn2q_p16 (a, b)}; ++ return (poly16x8x2_t) {vtrn1q_p16 (__a, __b), vtrn2q_p16 (__a, __b)}; + } + + __extension__ extern __inline int8x16x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vtrnq_s8 (int8x16_t a, int8x16_t b) ++vtrnq_s8 (int8x16_t __a, int8x16_t __b) + { +- return (int8x16x2_t) {vtrn1q_s8 (a, b), vtrn2q_s8 (a, b)}; ++ return (int8x16x2_t) {vtrn1q_s8 (__a, __b), vtrn2q_s8 (__a, __b)}; + } + + __extension__ extern __inline int16x8x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vtrnq_s16 (int16x8_t a, int16x8_t b) ++vtrnq_s16 (int16x8_t __a, int16x8_t __b) + { +- return (int16x8x2_t) {vtrn1q_s16 (a, b), vtrn2q_s16 (a, b)}; ++ return (int16x8x2_t) {vtrn1q_s16 (__a, __b), vtrn2q_s16 (__a, __b)}; + } + + __extension__ extern __inline int32x4x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vtrnq_s32 (int32x4_t a, int32x4_t b) ++vtrnq_s32 (int32x4_t __a, int32x4_t __b) + { +- return (int32x4x2_t) {vtrn1q_s32 (a, b), vtrn2q_s32 (a, b)}; ++ return (int32x4x2_t) {vtrn1q_s32 (__a, __b), vtrn2q_s32 (__a, __b)}; + } + + __extension__ extern __inline uint8x16x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vtrnq_u8 (uint8x16_t a, uint8x16_t b) ++vtrnq_u8 (uint8x16_t __a, uint8x16_t __b) + { +- return (uint8x16x2_t) {vtrn1q_u8 (a, b), vtrn2q_u8 (a, b)}; ++ return (uint8x16x2_t) {vtrn1q_u8 (__a, __b), vtrn2q_u8 (__a, __b)}; + } + + __extension__ extern __inline uint16x8x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vtrnq_u16 (uint16x8_t a, uint16x8_t b) ++vtrnq_u16 (uint16x8_t __a, uint16x8_t __b) + { +- return (uint16x8x2_t) {vtrn1q_u16 (a, b), vtrn2q_u16 (a, b)}; ++ return (uint16x8x2_t) {vtrn1q_u16 (__a, __b), vtrn2q_u16 (__a, __b)}; + } + + __extension__ extern __inline uint32x4x2_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vtrnq_u32 (uint32x4_t a, uint32x4_t b) ++vtrnq_u32 (uint32x4_t __a, uint32x4_t __b) + { +- return (uint32x4x2_t) {vtrn1q_u32 (a, b), vtrn2q_u32 (a, b)}; ++ return (uint32x4x2_t) {vtrn1q_u32 (__a, __b), vtrn2q_u32 (__a, __b)}; + } + + /* vtst */ +@@ -32200,30 +32760,30 @@ vrndxq_f16 (float16x8_t __a) + + __extension__ extern __inline float16x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vrsqrte_f16 (float16x4_t a) ++vrsqrte_f16 (float16x4_t __a) + { +- return __builtin_aarch64_rsqrtev4hf (a); ++ return __builtin_aarch64_rsqrtev4hf (__a); + } + + __extension__ extern __inline float16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vrsqrteq_f16 (float16x8_t a) ++vrsqrteq_f16 (float16x8_t __a) + { +- return __builtin_aarch64_rsqrtev8hf (a); ++ return __builtin_aarch64_rsqrtev8hf (__a); + } + + __extension__ extern __inline float16x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vsqrt_f16 (float16x4_t a) ++vsqrt_f16 (float16x4_t __a) + { +- return __builtin_aarch64_sqrtv4hf (a); ++ return __builtin_aarch64_sqrtv4hf (__a); + } + + __extension__ extern __inline float16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vsqrtq_f16 (float16x8_t a) ++vsqrtq_f16 (float16x8_t __a) + { +- return __builtin_aarch64_sqrtv8hf (a); ++ return __builtin_aarch64_sqrtv8hf (__a); + } + + /* ARMv8.2-A FP16 two operands vector intrinsics. */ +@@ -32244,16 +32804,16 @@ vaddq_f16 (float16x8_t __a, float16x8_t __b) + + __extension__ extern __inline float16x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vabd_f16 (float16x4_t a, float16x4_t b) ++vabd_f16 (float16x4_t __a, float16x4_t __b) + { +- return __builtin_aarch64_fabdv4hf (a, b); ++ return __builtin_aarch64_fabdv4hf (__a, __b); + } + + __extension__ extern __inline float16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vabdq_f16 (float16x8_t a, float16x8_t b) ++vabdq_f16 (float16x8_t __a, float16x8_t __b) + { +- return __builtin_aarch64_fabdv8hf (a, b); ++ return __builtin_aarch64_fabdv8hf (__a, __b); + } + + __extension__ extern __inline uint16x4_t +@@ -32538,72 +33098,72 @@ vmulxq_f16 (float16x8_t __a, float16x8_t __b) + + __extension__ extern __inline float16x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpadd_f16 (float16x4_t a, float16x4_t b) ++vpadd_f16 (float16x4_t __a, float16x4_t __b) + { +- return __builtin_aarch64_faddpv4hf (a, b); ++ return __builtin_aarch64_faddpv4hf (__a, __b); + } + + __extension__ extern __inline float16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpaddq_f16 (float16x8_t a, float16x8_t b) ++vpaddq_f16 (float16x8_t __a, float16x8_t __b) + { +- return __builtin_aarch64_faddpv8hf (a, b); ++ return __builtin_aarch64_faddpv8hf (__a, __b); + } + + __extension__ extern __inline float16x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpmax_f16 (float16x4_t a, float16x4_t b) ++vpmax_f16 (float16x4_t __a, float16x4_t __b) + { +- return __builtin_aarch64_smax_nanpv4hf (a, b); ++ return __builtin_aarch64_smax_nanpv4hf (__a, __b); + } + + __extension__ extern __inline float16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpmaxq_f16 (float16x8_t a, float16x8_t b) ++vpmaxq_f16 (float16x8_t __a, float16x8_t __b) + { +- return __builtin_aarch64_smax_nanpv8hf (a, b); ++ return __builtin_aarch64_smax_nanpv8hf (__a, __b); + } + + __extension__ extern __inline float16x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpmaxnm_f16 (float16x4_t a, float16x4_t b) ++vpmaxnm_f16 (float16x4_t __a, float16x4_t __b) + { +- return __builtin_aarch64_smaxpv4hf (a, b); ++ return __builtin_aarch64_smaxpv4hf (__a, __b); + } + + __extension__ extern __inline float16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpmaxnmq_f16 (float16x8_t a, float16x8_t b) ++vpmaxnmq_f16 (float16x8_t __a, float16x8_t __b) + { +- return __builtin_aarch64_smaxpv8hf (a, b); ++ return __builtin_aarch64_smaxpv8hf (__a, __b); + } + + __extension__ extern __inline float16x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpmin_f16 (float16x4_t a, float16x4_t b) ++vpmin_f16 (float16x4_t __a, float16x4_t __b) + { +- return __builtin_aarch64_smin_nanpv4hf (a, b); ++ return __builtin_aarch64_smin_nanpv4hf (__a, __b); + } + + __extension__ extern __inline float16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpminq_f16 (float16x8_t a, float16x8_t b) ++vpminq_f16 (float16x8_t __a, float16x8_t __b) + { +- return __builtin_aarch64_smin_nanpv8hf (a, b); ++ return __builtin_aarch64_smin_nanpv8hf (__a, __b); + } + + __extension__ extern __inline float16x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpminnm_f16 (float16x4_t a, float16x4_t b) ++vpminnm_f16 (float16x4_t __a, float16x4_t __b) + { +- return __builtin_aarch64_sminpv4hf (a, b); ++ return __builtin_aarch64_sminpv4hf (__a, __b); + } + + __extension__ extern __inline float16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vpminnmq_f16 (float16x8_t a, float16x8_t b) ++vpminnmq_f16 (float16x8_t __a, float16x8_t __b) + { +- return __builtin_aarch64_sminpv8hf (a, b); ++ return __builtin_aarch64_sminpv8hf (__a, __b); + } + + __extension__ extern __inline float16x4_t +@@ -32622,16 +33182,16 @@ vrecpsq_f16 (float16x8_t __a, float16x8_t __b) + + __extension__ extern __inline float16x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vrsqrts_f16 (float16x4_t a, float16x4_t b) ++vrsqrts_f16 (float16x4_t __a, float16x4_t __b) + { +- return __builtin_aarch64_rsqrtsv4hf (a, b); ++ return __builtin_aarch64_rsqrtsv4hf (__a, __b); + } + + __extension__ extern __inline float16x8_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +-vrsqrtsq_f16 (float16x8_t a, float16x8_t b) ++vrsqrtsq_f16 (float16x8_t __a, float16x8_t __b) + { +- return __builtin_aarch64_rsqrtsv8hf (a, b); ++ return __builtin_aarch64_rsqrtsv8hf (__a, __b); + } + + __extension__ extern __inline float16x4_t +@@ -33961,6 +34521,1308 @@ vfmlslq_laneq_high_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b, + + #pragma GCC pop_options + ++#pragma GCC push_options ++#pragma GCC target ("arch=armv8.5-a") ++ ++__extension__ extern __inline float32x2_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vrnd32z_f32 (float32x2_t __a) ++{ ++ return __builtin_aarch64_frint32zv2sf (__a); ++} ++ ++__extension__ extern __inline float32x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vrnd32zq_f32 (float32x4_t __a) ++{ ++ return __builtin_aarch64_frint32zv4sf (__a); ++} ++ ++__extension__ extern __inline float64x1_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vrnd32z_f64 (float64x1_t __a) ++{ ++ return (float64x1_t) ++ {__builtin_aarch64_frint32zdf (vget_lane_f64 (__a, 0))}; ++} ++ ++__extension__ extern __inline float64x2_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vrnd32zq_f64 (float64x2_t __a) ++{ ++ return __builtin_aarch64_frint32zv2df (__a); ++} ++ ++__extension__ extern __inline float32x2_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vrnd32x_f32 (float32x2_t __a) ++{ ++ return __builtin_aarch64_frint32xv2sf (__a); ++} ++ ++__extension__ extern __inline float32x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vrnd32xq_f32 (float32x4_t __a) ++{ ++ return __builtin_aarch64_frint32xv4sf (__a); ++} ++ ++__extension__ extern __inline float64x1_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vrnd32x_f64 (float64x1_t __a) ++{ ++ return (float64x1_t) {__builtin_aarch64_frint32xdf (vget_lane_f64 (__a, 0))}; ++} ++ ++__extension__ extern __inline float64x2_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vrnd32xq_f64 (float64x2_t __a) ++{ ++ return __builtin_aarch64_frint32xv2df (__a); ++} ++ ++__extension__ extern __inline float32x2_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vrnd64z_f32 (float32x2_t __a) ++{ ++ return __builtin_aarch64_frint64zv2sf (__a); ++} ++ ++__extension__ extern __inline float32x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vrnd64zq_f32 (float32x4_t __a) ++{ ++ return __builtin_aarch64_frint64zv4sf (__a); ++} ++ ++__extension__ extern __inline float64x1_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vrnd64z_f64 (float64x1_t __a) ++{ ++ return (float64x1_t) {__builtin_aarch64_frint64zdf (vget_lane_f64 (__a, 0))}; ++} ++ ++__extension__ extern __inline float64x2_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vrnd64zq_f64 (float64x2_t __a) ++{ ++ return __builtin_aarch64_frint64zv2df (__a); ++} ++ ++__extension__ extern __inline float32x2_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vrnd64x_f32 (float32x2_t __a) ++{ ++ return __builtin_aarch64_frint64xv2sf (__a); ++} ++ ++__extension__ extern __inline float32x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vrnd64xq_f32 (float32x4_t __a) ++{ ++ return __builtin_aarch64_frint64xv4sf (__a); ++} ++ ++__extension__ extern __inline float64x1_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vrnd64x_f64 (float64x1_t __a) ++{ ++ return (float64x1_t) {__builtin_aarch64_frint64xdf (vget_lane_f64 (__a, 0))}; ++} ++ ++__extension__ extern __inline float64x2_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vrnd64xq_f64 (float64x2_t __a) ++{ ++ return __builtin_aarch64_frint64xv2df (__a); ++} ++ ++#pragma GCC pop_options ++ ++#include "arm_bf16.h" ++ ++#pragma GCC push_options ++#pragma GCC target ("arch=armv8.2-a+bf16") ++ ++__extension__ extern __inline bfloat16x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vset_lane_bf16 (bfloat16_t __elem, bfloat16x4_t __vec, const int __index) ++{ ++ return __aarch64_vset_lane_any (__elem, __vec, __index); ++} ++ ++__extension__ extern __inline bfloat16x8_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vsetq_lane_bf16 (bfloat16_t __elem, bfloat16x8_t __vec, const int __index) ++{ ++ return __aarch64_vset_lane_any (__elem, __vec, __index); ++} ++ ++__extension__ extern __inline bfloat16_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vget_lane_bf16 (bfloat16x4_t __a, const int __b) ++{ ++ return __aarch64_vget_lane_any (__a, __b); ++} ++ ++__extension__ extern __inline bfloat16_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vgetq_lane_bf16 (bfloat16x8_t __a, const int __b) ++{ ++ return __aarch64_vget_lane_any (__a, __b); ++} ++ ++__extension__ extern __inline bfloat16x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vcreate_bf16 (uint64_t __a) ++{ ++ return (bfloat16x4_t) __a; ++} ++ ++__extension__ extern __inline bfloat16x8_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vcombine_bf16 (bfloat16x4_t __a, bfloat16x4_t __b) ++{ ++ return (bfloat16x8_t)__builtin_aarch64_combinev4bf (__a, __b); ++} ++ ++/* vdup */ ++ ++__extension__ extern __inline bfloat16x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vdup_n_bf16 (bfloat16_t __a) ++{ ++ return (bfloat16x4_t) {__a, __a, __a, __a}; ++} ++ ++__extension__ extern __inline bfloat16x8_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vdupq_n_bf16 (bfloat16_t __a) ++{ ++ return (bfloat16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a}; ++} ++ ++__extension__ extern __inline bfloat16x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vdup_lane_bf16 (bfloat16x4_t __a, const int __b) ++{ ++ return vdup_n_bf16 (__aarch64_vget_lane_any (__a, __b)); ++} ++ ++__extension__ extern __inline bfloat16x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vdup_laneq_bf16 (bfloat16x8_t __a, const int __b) ++{ ++ return vdup_n_bf16 (__aarch64_vget_lane_any (__a, __b)); ++} ++ ++__extension__ extern __inline bfloat16x8_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vdupq_lane_bf16 (bfloat16x4_t __a, const int __b) ++{ ++ return vdupq_n_bf16 (__aarch64_vget_lane_any (__a, __b)); ++} ++ ++__extension__ extern __inline bfloat16x8_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vdupq_laneq_bf16 (bfloat16x8_t __a, const int __b) ++{ ++ return vdupq_n_bf16 (__aarch64_vget_lane_any (__a, __b)); ++} ++ ++__extension__ extern __inline bfloat16_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vduph_lane_bf16 (bfloat16x4_t __a, const int __b) ++{ ++ return __aarch64_vget_lane_any (__a, __b); ++} ++ ++__extension__ extern __inline bfloat16_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vduph_laneq_bf16 (bfloat16x8_t __a, const int __b) ++{ ++ return __aarch64_vget_lane_any (__a, __b); ++} ++ ++/* vld */ ++ ++__extension__ extern __inline bfloat16x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vld1_bf16 (const bfloat16_t *__a) ++{ ++ return (bfloat16x4_t) __builtin_aarch64_ld1v4bf (__a); ++} ++ ++__extension__ extern __inline bfloat16x8_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vld1q_bf16 (const bfloat16_t *__a) ++{ ++ return __builtin_aarch64_ld1v8bf (__a); ++} ++ ++__extension__ extern __inline bfloat16x4x2_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vld1_bf16_x2 (const bfloat16_t *__a) ++{ ++ bfloat16x4x2_t ret; ++ __builtin_aarch64_simd_oi __o; ++ __o = __builtin_aarch64_ld1x2v4bf ((const __builtin_aarch64_simd_bf *) __a); ++ ret.val[0] = (bfloat16x4_t) __builtin_aarch64_get_dregoiv4bf (__o, 0); ++ ret.val[1] = (bfloat16x4_t) __builtin_aarch64_get_dregoiv4bf (__o, 1); ++ return ret; ++} ++ ++__extension__ extern __inline bfloat16x8x2_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vld1q_bf16_x2 (const bfloat16_t *__a) ++{ ++ bfloat16x8x2_t ret; ++ __builtin_aarch64_simd_oi __o; ++ __o = __builtin_aarch64_ld1x2v8bf ((const __builtin_aarch64_simd_bf *) __a); ++ ret.val[0] = (bfloat16x8_t) __builtin_aarch64_get_qregoiv8bf (__o, 0); ++ ret.val[1] = (bfloat16x8_t) __builtin_aarch64_get_qregoiv8bf (__o, 1); ++ return ret; ++} ++ ++__extension__ extern __inline bfloat16x4x3_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vld1_bf16_x3 (const bfloat16_t *__a) ++{ ++ bfloat16x4x3_t __i; ++ __builtin_aarch64_simd_ci __o; ++ __o = __builtin_aarch64_ld1x3v4bf ((const __builtin_aarch64_simd_bf *) __a); ++ __i.val[0] = (bfloat16x4_t) __builtin_aarch64_get_dregciv4bf (__o, 0); ++ __i.val[1] = (bfloat16x4_t) __builtin_aarch64_get_dregciv4bf (__o, 1); ++ __i.val[2] = (bfloat16x4_t) __builtin_aarch64_get_dregciv4bf (__o, 2); ++ return __i; ++} ++ ++__extension__ extern __inline bfloat16x8x3_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vld1q_bf16_x3 (const bfloat16_t *__a) ++{ ++ bfloat16x8x3_t __i; ++ __builtin_aarch64_simd_ci __o; ++ __o = __builtin_aarch64_ld1x3v8bf ((const __builtin_aarch64_simd_bf *) __a); ++ __i.val[0] = (bfloat16x8_t) __builtin_aarch64_get_qregciv8bf (__o, 0); ++ __i.val[1] = (bfloat16x8_t) __builtin_aarch64_get_qregciv8bf (__o, 1); ++ __i.val[2] = (bfloat16x8_t) __builtin_aarch64_get_qregciv8bf (__o, 2); ++ return __i; ++} ++__extension__ extern __inline bfloat16x4x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vld1_bf16_x4 (const bfloat16_t *__a) ++{ ++ union { bfloat16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __au; ++ __au.__o ++ = __builtin_aarch64_ld1x4v4bf ((const __builtin_aarch64_simd_bf *) __a); ++ return __au.__i; ++} ++ ++__extension__ extern __inline bfloat16x8x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vld1q_bf16_x4 (const bfloat16_t *__a) ++{ ++ union { bfloat16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __au; ++ __au.__o ++ = __builtin_aarch64_ld1x4v8bf ((const __builtin_aarch64_simd_bf *) __a); ++ return __au.__i; ++} ++ ++__extension__ extern __inline bfloat16x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vld1_lane_bf16 (const bfloat16_t *__src, bfloat16x4_t __vec, const int __lane) ++{ ++ return __aarch64_vset_lane_any (*__src, __vec, __lane); ++} ++ ++__extension__ extern __inline bfloat16x8_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vld1q_lane_bf16 (const bfloat16_t *__src, bfloat16x8_t __vec, const int __lane) ++{ ++ return __aarch64_vset_lane_any (*__src, __vec, __lane); ++} ++ ++__extension__ extern __inline bfloat16x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vld1_dup_bf16 (const bfloat16_t* __a) ++{ ++ return vdup_n_bf16 (*__a); ++} ++ ++__extension__ extern __inline bfloat16x8_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vld1q_dup_bf16 (const bfloat16_t* __a) ++{ ++ return vdupq_n_bf16 (*__a); ++} ++ ++__extension__ extern __inline bfloat16x4x2_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vld2_bf16 (const bfloat16_t * __a) ++{ ++ bfloat16x4x2_t ret; ++ __builtin_aarch64_simd_oi __o; ++ __o = __builtin_aarch64_ld2v4bf (__a); ++ ret.val[0] = (bfloat16x4_t) __builtin_aarch64_get_dregoiv4bf (__o, 0); ++ ret.val[1] = (bfloat16x4_t) __builtin_aarch64_get_dregoiv4bf (__o, 1); ++ return ret; ++} ++ ++__extension__ extern __inline bfloat16x8x2_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vld2q_bf16 (const bfloat16_t * __a) ++{ ++ bfloat16x8x2_t ret; ++ __builtin_aarch64_simd_oi __o; ++ __o = __builtin_aarch64_ld2v8bf ((const __builtin_aarch64_simd_bf *) __a); ++ ret.val[0] = (bfloat16x8_t) __builtin_aarch64_get_qregoiv8bf (__o, 0); ++ ret.val[1] = (bfloat16x8_t) __builtin_aarch64_get_qregoiv8bf (__o, 1); ++ return ret; ++} ++ ++__extension__ extern __inline bfloat16x4x2_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vld2_dup_bf16 (const bfloat16_t * __a) ++{ ++ bfloat16x4x2_t ret; ++ __builtin_aarch64_simd_oi __o; ++ __o = __builtin_aarch64_ld2rv4bf ((const __builtin_aarch64_simd_bf *) __a); ++ ret.val[0] = (bfloat16x4_t) __builtin_aarch64_get_dregoiv4bf (__o, 0); ++ ret.val[1] = (bfloat16x4_t) __builtin_aarch64_get_dregoiv4bf (__o, 1); ++ return ret; ++} ++ ++__extension__ extern __inline bfloat16x8x2_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vld2q_dup_bf16 (const bfloat16_t * __a) ++{ ++ bfloat16x8x2_t ret; ++ __builtin_aarch64_simd_oi __o; ++ __o = __builtin_aarch64_ld2rv8bf ((const __builtin_aarch64_simd_bf *) __a); ++ ret.val[0] = (bfloat16x8_t) __builtin_aarch64_get_qregoiv8bf (__o, 0); ++ ret.val[1] = (bfloat16x8_t) __builtin_aarch64_get_qregoiv8bf (__o, 1); ++ return ret; ++} ++ ++__extension__ extern __inline bfloat16x4x3_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vld3_bf16 (const bfloat16_t * __a) ++{ ++ bfloat16x4x3_t ret; ++ __builtin_aarch64_simd_ci __o; ++ __o = __builtin_aarch64_ld3v4bf ((const __builtin_aarch64_simd_bf *) __a); ++ ret.val[0] = (bfloat16x4_t) __builtin_aarch64_get_dregciv4bf (__o, 0); ++ ret.val[1] = (bfloat16x4_t) __builtin_aarch64_get_dregciv4bf (__o, 1); ++ ret.val[2] = (bfloat16x4_t) __builtin_aarch64_get_dregciv4bf (__o, 2); ++ return ret; ++} ++ ++__extension__ extern __inline bfloat16x8x3_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vld3q_bf16 (const bfloat16_t * __a) ++{ ++ bfloat16x8x3_t ret; ++ __builtin_aarch64_simd_ci __o; ++ __o = __builtin_aarch64_ld3v8bf ((const __builtin_aarch64_simd_bf *) __a); ++ ret.val[0] = (bfloat16x8_t) __builtin_aarch64_get_qregciv8bf (__o, 0); ++ ret.val[1] = (bfloat16x8_t) __builtin_aarch64_get_qregciv8bf (__o, 1); ++ ret.val[2] = (bfloat16x8_t) __builtin_aarch64_get_qregciv8bf (__o, 2); ++ return ret; ++} ++ ++__extension__ extern __inline bfloat16x4x3_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vld3_dup_bf16 (const bfloat16_t * __a) ++{ ++ bfloat16x4x3_t ret; ++ __builtin_aarch64_simd_ci __o; ++ __o = __builtin_aarch64_ld3rv4bf ((const __builtin_aarch64_simd_bf *) __a); ++ ret.val[0] = (bfloat16x4_t) __builtin_aarch64_get_dregciv4bf (__o, 0); ++ ret.val[1] = (bfloat16x4_t) __builtin_aarch64_get_dregciv4bf (__o, 1); ++ ret.val[2] = (bfloat16x4_t) __builtin_aarch64_get_dregciv4bf (__o, 2); ++ return ret; ++} ++ ++__extension__ extern __inline bfloat16x8x3_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vld3q_dup_bf16 (const bfloat16_t * __a) ++{ ++ bfloat16x8x3_t ret; ++ __builtin_aarch64_simd_ci __o; ++ __o = __builtin_aarch64_ld3rv8bf ((const __builtin_aarch64_simd_bf *) __a); ++ ret.val[0] = (bfloat16x8_t) __builtin_aarch64_get_qregciv8bf (__o, 0); ++ ret.val[1] = (bfloat16x8_t) __builtin_aarch64_get_qregciv8bf (__o, 1); ++ ret.val[2] = (bfloat16x8_t) __builtin_aarch64_get_qregciv8bf (__o, 2); ++ return ret; ++} ++ ++__extension__ extern __inline bfloat16x4x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vld4_bf16 (const bfloat16_t * __a) ++{ ++ bfloat16x4x4_t ret; ++ __builtin_aarch64_simd_xi __o; ++ __o = __builtin_aarch64_ld4v4bf ((const __builtin_aarch64_simd_bf *) __a); ++ ret.val[0] = (bfloat16x4_t) __builtin_aarch64_get_dregxiv4bf (__o, 0); ++ ret.val[1] = (bfloat16x4_t) __builtin_aarch64_get_dregxiv4bf (__o, 1); ++ ret.val[2] = (bfloat16x4_t) __builtin_aarch64_get_dregxiv4bf (__o, 2); ++ ret.val[3] = (bfloat16x4_t) __builtin_aarch64_get_dregxiv4bf (__o, 3); ++ return ret; ++} ++ ++__extension__ extern __inline bfloat16x8x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vld4q_bf16 (const bfloat16_t * __a) ++{ ++ bfloat16x8x4_t ret; ++ __builtin_aarch64_simd_xi __o; ++ __o = __builtin_aarch64_ld4v8bf ((const __builtin_aarch64_simd_bf *) __a); ++ ret.val[0] = (bfloat16x8_t) __builtin_aarch64_get_qregxiv8bf (__o, 0); ++ ret.val[1] = (bfloat16x8_t) __builtin_aarch64_get_qregxiv8bf (__o, 1); ++ ret.val[2] = (bfloat16x8_t) __builtin_aarch64_get_qregxiv8bf (__o, 2); ++ ret.val[3] = (bfloat16x8_t) __builtin_aarch64_get_qregxiv8bf (__o, 3); ++ return ret; ++} ++ ++__extension__ extern __inline bfloat16x4x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vld4_dup_bf16 (const bfloat16_t * __a) ++{ ++ bfloat16x4x4_t ret; ++ __builtin_aarch64_simd_xi __o; ++ __o = __builtin_aarch64_ld4rv4bf ((const __builtin_aarch64_simd_bf *) __a); ++ ret.val[0] = (bfloat16x4_t) __builtin_aarch64_get_dregxiv4bf (__o, 0); ++ ret.val[1] = (bfloat16x4_t) __builtin_aarch64_get_dregxiv4bf (__o, 1); ++ ret.val[2] = (bfloat16x4_t) __builtin_aarch64_get_dregxiv4bf (__o, 2); ++ ret.val[3] = (bfloat16x4_t) __builtin_aarch64_get_dregxiv4bf (__o, 3); ++ return ret; ++} ++ ++__extension__ extern __inline bfloat16x8x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vld4q_dup_bf16 (const bfloat16_t * __a) ++{ ++ bfloat16x8x4_t ret; ++ __builtin_aarch64_simd_xi __o; ++ __o = __builtin_aarch64_ld4rv8bf ((const __builtin_aarch64_simd_bf *) __a); ++ ret.val[0] = (bfloat16x8_t) __builtin_aarch64_get_qregxiv8bf (__o, 0); ++ ret.val[1] = (bfloat16x8_t) __builtin_aarch64_get_qregxiv8bf (__o, 1); ++ ret.val[2] = (bfloat16x8_t) __builtin_aarch64_get_qregxiv8bf (__o, 2); ++ ret.val[3] = (bfloat16x8_t) __builtin_aarch64_get_qregxiv8bf (__o, 3); ++ return ret; ++} ++ ++/* vst */ ++ ++__extension__ extern __inline void ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vst1_bf16 (bfloat16_t *__a, bfloat16x4_t __b) ++{ ++ __builtin_aarch64_st1v4bf (__a, __b); ++} ++ ++__extension__ extern __inline void ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vst1_bf16_x2 (bfloat16_t * __a, bfloat16x4x2_t __val) ++{ ++ __builtin_aarch64_simd_oi __o; ++ bfloat16x8x2_t __temp; ++ __temp.val[0] = vcombine_bf16 (__val.val[0], vcreate_bf16 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] = vcombine_bf16 (__val.val[1], vcreate_bf16 (__AARCH64_UINT64_C (0))); ++ __o = __builtin_aarch64_set_qregoiv8bf (__o, __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv8bf (__o, __temp.val[1], 1); ++ __builtin_aarch64_st1x2v4bf (__a, __o); ++} ++ ++__extension__ extern __inline void ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vst1q_bf16_x2 (bfloat16_t * __a, bfloat16x8x2_t __val) ++{ ++ __builtin_aarch64_simd_oi __o; ++ __o = __builtin_aarch64_set_qregoiv8bf (__o, __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv8bf (__o, __val.val[1], 1); ++ __builtin_aarch64_st1x2v8bf (__a, __o); ++} ++ ++__extension__ extern __inline void ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vst1_bf16_x3 (bfloat16_t * __a, bfloat16x4x3_t __val) ++{ ++ __builtin_aarch64_simd_ci __o; ++ bfloat16x8x3_t __temp; ++ __temp.val[0] = vcombine_bf16 (__val.val[0], vcreate_bf16 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] = vcombine_bf16 (__val.val[1], vcreate_bf16 (__AARCH64_UINT64_C (0))); ++ __temp.val[2] = vcombine_bf16 (__val.val[2], vcreate_bf16 (__AARCH64_UINT64_C (0))); ++ __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __temp.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __temp.val[2], 2); ++ __builtin_aarch64_st1x3v4bf ((__builtin_aarch64_simd_bf *) __a, __o); ++} ++ ++__extension__ extern __inline void ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vst1q_bf16_x3 (bfloat16_t * __a, bfloat16x8x3_t __val) ++{ ++ __builtin_aarch64_simd_ci __o; ++ __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __val.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __val.val[2], 2); ++ __builtin_aarch64_st1x3v8bf ((__builtin_aarch64_simd_bf *) __a, __o); ++} ++ ++__extension__ extern __inline void ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vst1_bf16_x4 (bfloat16_t * __a, bfloat16x4x4_t val) ++{ ++ union { bfloat16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; ++ __builtin_aarch64_st1x4v4bf ((__builtin_aarch64_simd_bf *) __a, __u.__o); ++} ++ ++__extension__ extern __inline void ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vst1q_bf16_x4 (bfloat16_t * __a, bfloat16x8x4_t val) ++{ ++ union { bfloat16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; ++ __builtin_aarch64_st1x4v8bf ((__builtin_aarch64_simd_bf *) __a, __u.__o); ++} ++ ++__extension__ extern __inline void ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vst1q_bf16 (bfloat16_t *__a, bfloat16x8_t __b) ++{ ++ __builtin_aarch64_st1v8bf (__a, __b); ++} ++ ++__extension__ extern __inline void ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vst1_lane_bf16 (bfloat16_t *__a, bfloat16x4_t __b, const int __lane) ++{ ++ *__a = __aarch64_vget_lane_any (__b, __lane); ++} ++ ++__extension__ extern __inline void ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vst1q_lane_bf16 (bfloat16_t *__a, bfloat16x8_t __b, const int __lane) ++{ ++ *__a = __aarch64_vget_lane_any (__b, __lane); ++} ++ ++__extension__ extern __inline void ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vst2_bf16 (bfloat16_t * __a, bfloat16x4x2_t __val) ++{ ++ __builtin_aarch64_simd_oi __o; ++ bfloat16x8x2_t __temp; ++ __temp.val[0] = vcombine_bf16 (__val.val[0], vcreate_bf16 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] = vcombine_bf16 (__val.val[1], vcreate_bf16 (__AARCH64_UINT64_C (0))); ++ __o = __builtin_aarch64_set_qregoiv8bf (__o, __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv8bf (__o, __temp.val[1], 1); ++ __builtin_aarch64_st2v4bf (__a, __o); ++} ++ ++__extension__ extern __inline void ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vst2q_bf16 (bfloat16_t * __a, bfloat16x8x2_t __val) ++{ ++ __builtin_aarch64_simd_oi __o; ++ __o = __builtin_aarch64_set_qregoiv8bf (__o, __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregoiv8bf (__o, __val.val[1], 1); ++ __builtin_aarch64_st2v8bf (__a, __o); ++} ++ ++__extension__ extern __inline void ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vst3_bf16 (bfloat16_t * __a, bfloat16x4x3_t __val) ++{ ++ __builtin_aarch64_simd_ci __o; ++ bfloat16x8x3_t __temp; ++ __temp.val[0] = vcombine_bf16 (__val.val[0], vcreate_bf16 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] = vcombine_bf16 (__val.val[1], vcreate_bf16 (__AARCH64_UINT64_C (0))); ++ __temp.val[2] = vcombine_bf16 (__val.val[2], vcreate_bf16 (__AARCH64_UINT64_C (0))); ++ __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __temp.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __temp.val[2], 2); ++ __builtin_aarch64_st3v4bf ((__builtin_aarch64_simd_bf *) __a, __o); ++} ++ ++__extension__ extern __inline void ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vst3q_bf16 (bfloat16_t * __a, bfloat16x8x3_t __val) ++{ ++ __builtin_aarch64_simd_ci __o; ++ __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __val.val[1], 1); ++ __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __val.val[2], 2); ++ __builtin_aarch64_st3v8bf ((__builtin_aarch64_simd_bf *) __a, __o); ++} ++ ++__extension__ extern __inline void ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vst4_bf16 (bfloat16_t * __a, bfloat16x4x4_t __val) ++{ ++ __builtin_aarch64_simd_xi __o; ++ bfloat16x8x4_t __temp; ++ __temp.val[0] = vcombine_bf16 (__val.val[0], vcreate_bf16 (__AARCH64_UINT64_C (0))); ++ __temp.val[1] = vcombine_bf16 (__val.val[1], vcreate_bf16 (__AARCH64_UINT64_C (0))); ++ __temp.val[2] = vcombine_bf16 (__val.val[2], vcreate_bf16 (__AARCH64_UINT64_C (0))); ++ __temp.val[3] = vcombine_bf16 (__val.val[3], vcreate_bf16 (__AARCH64_UINT64_C (0))); ++ __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __temp.val[0], 0); ++ __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __temp.val[1], 1); ++ __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __temp.val[2], 2); ++ __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __temp.val[3], 3); ++ __builtin_aarch64_st4v4bf ((__builtin_aarch64_simd_bf *) __a, __o); ++} ++ ++__extension__ extern __inline void ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vst4q_bf16 (bfloat16_t * __a, bfloat16x8x4_t __val) ++{ ++ __builtin_aarch64_simd_xi __o; ++ __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __val.val[0], 0); ++ __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __val.val[1], 1); ++ __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __val.val[2], 2); ++ __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __val.val[3], 3); ++ __builtin_aarch64_st4v8bf ((__builtin_aarch64_simd_bf *) __a, __o); ++} ++ ++/* vreinterpret */ ++ ++__extension__ extern __inline bfloat16x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpret_bf16_u8 (uint8x8_t __a) ++{ ++ return (bfloat16x4_t)__a; ++} ++ ++__extension__ extern __inline bfloat16x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpret_bf16_u16 (uint16x4_t __a) ++{ ++ return (bfloat16x4_t)__a; ++} ++ ++__extension__ extern __inline bfloat16x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpret_bf16_u32 (uint32x2_t __a) ++{ ++ return (bfloat16x4_t)__a; ++} ++ ++__extension__ extern __inline bfloat16x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpret_bf16_u64 (uint64x1_t __a) ++{ ++ return (bfloat16x4_t)__a; ++} ++ ++__extension__ extern __inline bfloat16x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpret_bf16_s8 (int8x8_t __a) ++{ ++ return (bfloat16x4_t)__a; ++} ++ ++__extension__ extern __inline bfloat16x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpret_bf16_s16 (int16x4_t __a) ++{ ++ return (bfloat16x4_t)__a; ++} ++ ++__extension__ extern __inline bfloat16x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpret_bf16_s32 (int32x2_t __a) ++{ ++ return (bfloat16x4_t)__a; ++} ++ ++__extension__ extern __inline bfloat16x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpret_bf16_s64 (int64x1_t __a) ++{ ++ return (bfloat16x4_t)__a; ++} ++ ++__extension__ extern __inline bfloat16x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpret_bf16_p8 (poly8x8_t __a) ++{ ++ return (bfloat16x4_t)__a; ++} ++ ++__extension__ extern __inline bfloat16x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpret_bf16_p16 (poly16x4_t __a) ++{ ++ return (bfloat16x4_t)__a; ++} ++ ++__extension__ extern __inline bfloat16x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpret_bf16_p64 (poly64x1_t __a) ++{ ++ return (bfloat16x4_t)__a; ++} ++ ++__extension__ extern __inline bfloat16x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpret_bf16_f16 (float16x4_t __a) ++{ ++ return (bfloat16x4_t)__a; ++} ++ ++__extension__ extern __inline bfloat16x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpret_bf16_f32 (float32x2_t __a) ++{ ++ return (bfloat16x4_t)__a; ++} ++ ++__extension__ extern __inline bfloat16x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpret_bf16_f64 (float64x1_t __a) ++{ ++ return (bfloat16x4_t)__a; ++} ++ ++__extension__ extern __inline bfloat16x8_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpretq_bf16_u8 (uint8x16_t __a) ++{ ++ return (bfloat16x8_t)__a; ++} ++ ++__extension__ extern __inline bfloat16x8_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpretq_bf16_u16 (uint16x8_t __a) ++{ ++ return (bfloat16x8_t)__a; ++} ++ ++__extension__ extern __inline bfloat16x8_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpretq_bf16_u32 (uint32x4_t __a) ++{ ++ return (bfloat16x8_t)__a; ++} ++ ++__extension__ extern __inline bfloat16x8_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpretq_bf16_u64 (uint64x2_t __a) ++{ ++ return (bfloat16x8_t)__a; ++} ++ ++__extension__ extern __inline bfloat16x8_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpretq_bf16_s8 (int8x16_t __a) ++{ ++ return (bfloat16x8_t)__a; ++} ++ ++__extension__ extern __inline bfloat16x8_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpretq_bf16_s16 (int16x8_t __a) ++{ ++ return (bfloat16x8_t)__a; ++} ++ ++__extension__ extern __inline bfloat16x8_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpretq_bf16_s32 (int32x4_t __a) ++{ ++ return (bfloat16x8_t)__a; ++} ++ ++__extension__ extern __inline bfloat16x8_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpretq_bf16_s64 (int64x2_t __a) ++{ ++ return (bfloat16x8_t)__a; ++} ++ ++__extension__ extern __inline bfloat16x8_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpretq_bf16_p8 (poly8x16_t __a) ++{ ++ return (bfloat16x8_t)__a; ++} ++ ++__extension__ extern __inline bfloat16x8_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpretq_bf16_p16 (poly16x8_t __a) ++{ ++ return (bfloat16x8_t)__a; ++} ++ ++__extension__ extern __inline bfloat16x8_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpretq_bf16_p64 (poly64x2_t __a) ++{ ++ return (bfloat16x8_t)__a; ++} ++ ++__extension__ extern __inline bfloat16x8_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpretq_bf16_p128 (poly128_t __a) ++{ ++ return (bfloat16x8_t)__a; ++} ++ ++__extension__ extern __inline bfloat16x8_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpretq_bf16_f16 (float16x8_t __a) ++{ ++ return (bfloat16x8_t)__a; ++} ++ ++__extension__ extern __inline bfloat16x8_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpretq_bf16_f32 (float32x4_t __a) ++{ ++ return (bfloat16x8_t)__a; ++} ++ ++__extension__ extern __inline bfloat16x8_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpretq_bf16_f64 (float64x2_t __a) ++{ ++ return (bfloat16x8_t)__a; ++} ++ ++__extension__ extern __inline int8x8_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpret_s8_bf16 (bfloat16x4_t __a) ++{ ++ return (int8x8_t)__a; ++} ++ ++__extension__ extern __inline int16x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpret_s16_bf16 (bfloat16x4_t __a) ++{ ++ return (int16x4_t)__a; ++} ++ ++__extension__ extern __inline int32x2_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpret_s32_bf16 (bfloat16x4_t __a) ++{ ++ return (int32x2_t)__a; ++} ++ ++__extension__ extern __inline int64x1_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpret_s64_bf16 (bfloat16x4_t __a) ++{ ++ return (int64x1_t)__a; ++} ++ ++__extension__ extern __inline uint8x8_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpret_u8_bf16 (bfloat16x4_t __a) ++{ ++ return (uint8x8_t)__a; ++} ++ ++__extension__ extern __inline uint16x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpret_u16_bf16 (bfloat16x4_t __a) ++{ ++ return (uint16x4_t)__a; ++} ++ ++__extension__ extern __inline uint32x2_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpret_u32_bf16 (bfloat16x4_t __a) ++{ ++ return (uint32x2_t)__a; ++} ++ ++__extension__ extern __inline uint64x1_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpret_u64_bf16 (bfloat16x4_t __a) ++{ ++ return (uint64x1_t)__a; ++} ++ ++__extension__ extern __inline float16x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpret_f16_bf16 (bfloat16x4_t __a) ++{ ++ return (float16x4_t)__a; ++} ++ ++__extension__ extern __inline float32x2_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpret_f32_bf16 (bfloat16x4_t __a) ++{ ++ return (float32x2_t)__a; ++} ++ ++__extension__ extern __inline float64x1_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpret_f64_bf16 (bfloat16x4_t __a) ++{ ++ return (float64x1_t)__a; ++} ++ ++__extension__ extern __inline poly8x8_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpret_p8_bf16 (bfloat16x4_t __a) ++{ ++ return (poly8x8_t)__a; ++} ++ ++__extension__ extern __inline poly16x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpret_p16_bf16 (bfloat16x4_t __a) ++{ ++ return (poly16x4_t)__a; ++} ++ ++__extension__ extern __inline poly64x1_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpret_p64_bf16 (bfloat16x4_t __a) ++{ ++ return (poly64x1_t)__a; ++} ++ ++__extension__ extern __inline int8x16_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpretq_s8_bf16 (bfloat16x8_t __a) ++{ ++ return (int8x16_t)__a; ++} ++ ++__extension__ extern __inline int16x8_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpretq_s16_bf16 (bfloat16x8_t __a) ++{ ++ return (int16x8_t)__a; ++} ++ ++__extension__ extern __inline int32x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpretq_s32_bf16 (bfloat16x8_t __a) ++{ ++ return (int32x4_t)__a; ++} ++ ++__extension__ extern __inline int64x2_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpretq_s64_bf16 (bfloat16x8_t __a) ++{ ++ return (int64x2_t)__a; ++} ++ ++__extension__ extern __inline uint8x16_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpretq_u8_bf16 (bfloat16x8_t __a) ++{ ++ return (uint8x16_t)__a; ++} ++ ++__extension__ extern __inline uint16x8_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpretq_u16_bf16 (bfloat16x8_t __a) ++{ ++ return (uint16x8_t)__a; ++} ++ ++__extension__ extern __inline uint32x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpretq_u32_bf16 (bfloat16x8_t __a) ++{ ++ return (uint32x4_t)__a; ++} ++ ++__extension__ extern __inline uint64x2_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpretq_u64_bf16 (bfloat16x8_t __a) ++{ ++ return (uint64x2_t)__a; ++} ++ ++__extension__ extern __inline float16x8_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpretq_f16_bf16 (bfloat16x8_t __a) ++{ ++ return (float16x8_t)__a; ++} ++ ++__extension__ extern __inline float32x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpretq_f32_bf16 (bfloat16x8_t __a) ++{ ++ return (float32x4_t)__a; ++} ++ ++__extension__ extern __inline float64x2_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpretq_f64_bf16 (bfloat16x8_t __a) ++{ ++ return (float64x2_t)__a; ++} ++ ++__extension__ extern __inline poly8x16_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpretq_p8_bf16 (bfloat16x8_t __a) ++{ ++ return (poly8x16_t)__a; ++} ++ ++__extension__ extern __inline poly16x8_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpretq_p16_bf16 (bfloat16x8_t __a) ++{ ++ return (poly16x8_t)__a; ++} ++ ++__extension__ extern __inline poly64x2_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpretq_p64_bf16 (bfloat16x8_t __a) ++{ ++ return (poly64x2_t)__a; ++} ++ ++__extension__ extern __inline poly128_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vreinterpretq_p128_bf16 (bfloat16x8_t __a) ++{ ++ return (poly128_t)__a; ++} ++ ++__extension__ extern __inline float32x2_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vbfdot_f32 (float32x2_t __r, bfloat16x4_t __a, bfloat16x4_t __b) ++{ ++ return __builtin_aarch64_bfdotv2sf (__r, __a, __b); ++} ++ ++__extension__ extern __inline float32x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vbfdotq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b) ++{ ++ return __builtin_aarch64_bfdotv4sf (__r, __a, __b); ++} ++ ++__extension__ extern __inline float32x2_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vbfdot_lane_f32 (float32x2_t __r, bfloat16x4_t __a, bfloat16x4_t __b, ++ const int __index) ++{ ++ return __builtin_aarch64_bfdot_lanev2sf (__r, __a, __b, __index); ++} ++ ++__extension__ extern __inline float32x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vbfdotq_lane_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x4_t __b, ++ const int __index) ++{ ++ return __builtin_aarch64_bfdot_lanev4sf (__r, __a, __b, __index); ++} ++ ++__extension__ extern __inline float32x2_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vbfdot_laneq_f32 (float32x2_t __r, bfloat16x4_t __a, bfloat16x8_t __b, ++ const int __index) ++{ ++ return __builtin_aarch64_bfdot_laneqv2sf (__r, __a, __b, __index); ++} ++ ++__extension__ extern __inline float32x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vbfdotq_laneq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b, ++ const int __index) ++{ ++ return __builtin_aarch64_bfdot_laneqv4sf (__r, __a, __b, __index); ++} ++ ++__extension__ extern __inline float32x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vbfmmlaq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b) ++ ++{ ++ return __builtin_aarch64_bfmmlaqv4sf (__r, __a, __b); ++} ++ ++__extension__ extern __inline float32x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vbfmlalbq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b) ++{ ++ return __builtin_aarch64_bfmlalbv4sf (__r, __a, __b); ++} ++ ++__extension__ extern __inline float32x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vbfmlaltq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b) ++{ ++ return __builtin_aarch64_bfmlaltv4sf (__r, __a, __b); ++} ++ ++__extension__ extern __inline float32x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vbfmlalbq_lane_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x4_t __b, ++ const int __index) ++{ ++ return __builtin_aarch64_bfmlalb_lanev4sf (__r, __a, __b, __index); ++} ++ ++__extension__ extern __inline float32x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vbfmlaltq_lane_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x4_t __b, ++ const int __index) ++{ ++ return __builtin_aarch64_bfmlalt_lanev4sf (__r, __a, __b, __index); ++} ++ ++__extension__ extern __inline float32x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vbfmlalbq_laneq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b, ++ const int __index) ++{ ++ return __builtin_aarch64_bfmlalb_lane_qv4sf (__r, __a, __b, __index); ++} ++ ++__extension__ extern __inline float32x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vbfmlaltq_laneq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b, ++ const int __index) ++{ ++ return __builtin_aarch64_bfmlalt_lane_qv4sf (__r, __a, __b, __index); ++} ++ ++__extension__ extern __inline bfloat16x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vcvt_bf16_f32 (float32x4_t __a) ++{ ++ return __builtin_aarch64_bfcvtnv4bf (__a); ++} ++ ++__extension__ extern __inline bfloat16x8_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vcvtq_low_bf16_f32 (float32x4_t __a) ++{ ++ return __builtin_aarch64_bfcvtn_qv8bf (__a); ++} ++ ++__extension__ extern __inline bfloat16x8_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vcvtq_high_bf16_f32 (bfloat16x8_t __inactive, float32x4_t __a) ++{ ++ return __builtin_aarch64_bfcvtn2v8bf (__inactive, __a); ++} ++ ++#pragma GCC pop_options ++ ++/* AdvSIMD 8-bit Integer Matrix Multiply (I8MM) intrinsics. */ ++ ++#pragma GCC push_options ++#pragma GCC target ("arch=armv8.2-a+i8mm") ++ ++__extension__ extern __inline int32x2_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vusdot_s32 (int32x2_t __r, uint8x8_t __a, int8x8_t __b) ++{ ++ return __builtin_aarch64_usdotv8qi_ssus (__r, __a, __b); ++} ++ ++__extension__ extern __inline int32x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vusdotq_s32 (int32x4_t __r, uint8x16_t __a, int8x16_t __b) ++{ ++ return __builtin_aarch64_usdotv16qi_ssus (__r, __a, __b); ++} ++ ++__extension__ extern __inline int32x2_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vusdot_lane_s32 (int32x2_t __r, uint8x8_t __a, int8x8_t __b, const int __index) ++{ ++ return __builtin_aarch64_usdot_lanev8qi_ssuss (__r, __a, __b, __index); ++} ++ ++__extension__ extern __inline int32x2_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vusdot_laneq_s32 (int32x2_t __r, uint8x8_t __a, int8x16_t __b, ++ const int __index) ++{ ++ return __builtin_aarch64_usdot_laneqv8qi_ssuss (__r, __a, __b, __index); ++} ++ ++__extension__ extern __inline int32x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vusdotq_lane_s32 (int32x4_t __r, uint8x16_t __a, int8x8_t __b, ++ const int __index) ++{ ++ return __builtin_aarch64_usdot_lanev16qi_ssuss (__r, __a, __b, __index); ++} ++ ++__extension__ extern __inline int32x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vusdotq_laneq_s32 (int32x4_t __r, uint8x16_t __a, int8x16_t __b, ++ const int __index) ++{ ++ return __builtin_aarch64_usdot_laneqv16qi_ssuss (__r, __a, __b, __index); ++} ++ ++__extension__ extern __inline int32x2_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vsudot_lane_s32 (int32x2_t __r, int8x8_t __a, uint8x8_t __b, const int __index) ++{ ++ return __builtin_aarch64_sudot_lanev8qi_sssus (__r, __a, __b, __index); ++} ++ ++__extension__ extern __inline int32x2_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vsudot_laneq_s32 (int32x2_t __r, int8x8_t __a, uint8x16_t __b, ++ const int __index) ++{ ++ return __builtin_aarch64_sudot_laneqv8qi_sssus (__r, __a, __b, __index); ++} ++ ++__extension__ extern __inline int32x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vsudotq_lane_s32 (int32x4_t __r, int8x16_t __a, uint8x8_t __b, ++ const int __index) ++{ ++ return __builtin_aarch64_sudot_lanev16qi_sssus (__r, __a, __b, __index); ++} ++ ++__extension__ extern __inline int32x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vsudotq_laneq_s32 (int32x4_t __r, int8x16_t __a, uint8x16_t __b, ++ const int __index) ++{ ++ return __builtin_aarch64_sudot_laneqv16qi_sssus (__r, __a, __b, __index); ++} ++ ++/* Matrix Multiply-Accumulate. */ ++ ++__extension__ extern __inline int32x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vmmlaq_s32 (int32x4_t __r, int8x16_t __a, int8x16_t __b) ++{ ++ return __builtin_aarch64_simd_smmlav16qi (__r, __a, __b); ++} ++ ++__extension__ extern __inline uint32x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vmmlaq_u32 (uint32x4_t __r, uint8x16_t __a, uint8x16_t __b) ++{ ++ return __builtin_aarch64_simd_ummlav16qi_uuuu (__r, __a, __b); ++} ++ ++__extension__ extern __inline int32x4_t ++__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) ++vusmmlaq_s32 (int32x4_t __r, uint8x16_t __a, int8x16_t __b) ++{ ++ return __builtin_aarch64_simd_usmmlav16qi_ssus (__r, __a, __b); ++} ++ ++#pragma GCC pop_options ++ + #undef __aarch64_vget_lane_any + + #undef __aarch64_vdup_lane_any +diff --git a/gcc/config/aarch64/arm_sve.h b/gcc/config/aarch64/arm_sve.h +new file mode 100644 +index 000000000..0a316c0a0 +--- /dev/null ++++ b/gcc/config/aarch64/arm_sve.h +@@ -0,0 +1,37 @@ ++/* AArch64 SVE intrinsics include file. ++ Copyright (C) 2018-2019 Free Software Foundation, Inc. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published ++ by the Free Software Foundation; either version 3, or (at your ++ option) any later version. ++ ++ GCC is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++ License for more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++#ifndef _ARM_SVE_H_ ++#define _ARM_SVE_H_ ++ ++#include ++#include ++ ++typedef __fp16 float16_t; ++typedef float float32_t; ++typedef double float64_t; ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++#endif +diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md +index 0f357662a..002e91d2b 100644 +--- a/gcc/config/aarch64/atomics.md ++++ b/gcc/config/aarch64/atomics.md +@@ -22,10 +22,10 @@ + + (define_expand "@atomic_compare_and_swap" + [(match_operand:SI 0 "register_operand" "") ;; bool out +- (match_operand:ALLI 1 "register_operand" "") ;; val out +- (match_operand:ALLI 2 "aarch64_sync_memory_operand" "") ;; memory +- (match_operand:ALLI 3 "nonmemory_operand" "") ;; expected +- (match_operand:ALLI 4 "aarch64_reg_or_zero" "") ;; desired ++ (match_operand:ALLI_TI 1 "register_operand" "") ;; val out ++ (match_operand:ALLI_TI 2 "aarch64_sync_memory_operand" "") ;; memory ++ (match_operand:ALLI_TI 3 "nonmemory_operand" "") ;; expected ++ (match_operand:ALLI_TI 4 "aarch64_reg_or_zero" "") ;; desired + (match_operand:SI 5 "const_int_operand") ;; is_weak + (match_operand:SI 6 "const_int_operand") ;; mod_s + (match_operand:SI 7 "const_int_operand")] ;; mod_f +@@ -88,6 +88,30 @@ + } + ) + ++(define_insn_and_split "@aarch64_compare_and_swap" ++ [(set (reg:CC CC_REGNUM) ;; bool out ++ (unspec_volatile:CC [(const_int 0)] UNSPECV_ATOMIC_CMPSW)) ++ (set (match_operand:JUST_TI 0 "register_operand" "=&r") ;; val out ++ (match_operand:JUST_TI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory ++ (set (match_dup 1) ++ (unspec_volatile:JUST_TI ++ [(match_operand:JUST_TI 2 "aarch64_reg_or_zero" "rZ") ;; expect ++ (match_operand:JUST_TI 3 "aarch64_reg_or_zero" "rZ") ;; desired ++ (match_operand:SI 4 "const_int_operand") ;; is_weak ++ (match_operand:SI 5 "const_int_operand") ;; mod_s ++ (match_operand:SI 6 "const_int_operand")] ;; mod_f ++ UNSPECV_ATOMIC_CMPSW)) ++ (clobber (match_scratch:SI 7 "=&r"))] ++ "" ++ "#" ++ "&& epilogue_completed" ++ [(const_int 0)] ++ { ++ aarch64_split_compare_and_swap (operands); ++ DONE; ++ } ++) ++ + (define_insn "@aarch64_compare_and_swap_lse" + [(set (match_operand:SI 0 "register_operand" "+r") ;; val out + (zero_extend:SI +@@ -133,23 +157,56 @@ + return "casal\t%0, %2, %1"; + }) + ++(define_insn "@aarch64_compare_and_swap_lse" ++ [(set (match_operand:JUST_TI 0 "register_operand" "+r") ;; val out ++ (match_operand:JUST_TI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory ++ (set (match_dup 1) ++ (unspec_volatile:JUST_TI ++ [(match_dup 0) ;; expect ++ (match_operand:JUST_TI 2 "register_operand" "r") ;; desired ++ (match_operand:SI 3 "const_int_operand")] ;; mod_s ++ UNSPECV_ATOMIC_CMPSW))] ++ "TARGET_LSE" ++{ ++ enum memmodel model = memmodel_from_int (INTVAL (operands[3])); ++ if (is_mm_relaxed (model)) ++ return "casp\t%0, %R0, %2, %R2, %1"; ++ else if (is_mm_acquire (model) || is_mm_consume (model)) ++ return "caspa\t%0, %R0, %2, %R2, %1"; ++ else if (is_mm_release (model)) ++ return "caspl\t%0, %R0, %2, %R2, %1"; ++ else ++ return "caspal\t%0, %R0, %2, %R2, %1"; ++}) ++ + (define_expand "atomic_exchange" +- [(match_operand:ALLI 0 "register_operand" "") +- (match_operand:ALLI 1 "aarch64_sync_memory_operand" "") +- (match_operand:ALLI 2 "aarch64_reg_or_zero" "") +- (match_operand:SI 3 "const_int_operand" "")] ++ [(match_operand:ALLI 0 "register_operand") ++ (match_operand:ALLI 1 "aarch64_sync_memory_operand") ++ (match_operand:ALLI 2 "aarch64_reg_or_zero") ++ (match_operand:SI 3 "const_int_operand")] + "" + { +- rtx (*gen) (rtx, rtx, rtx, rtx); +- + /* Use an atomic SWP when available. */ + if (TARGET_LSE) +- gen = gen_aarch64_atomic_exchange_lse; ++ { ++ emit_insn (gen_aarch64_atomic_exchange_lse ++ (operands[0], operands[1], operands[2], operands[3])); ++ } ++ else if (TARGET_OUTLINE_ATOMICS) ++ { ++ machine_mode mode = mode; ++ rtx func = aarch64_atomic_ool_func (mode, operands[3], ++ &aarch64_ool_swp_names); ++ rtx rval = emit_library_call_value (func, operands[0], LCT_NORMAL, ++ mode, operands[2], mode, ++ XEXP (operands[1], 0), Pmode); ++ emit_move_insn (operands[0], rval); ++ } + else +- gen = gen_aarch64_atomic_exchange; +- +- emit_insn (gen (operands[0], operands[1], operands[2], operands[3])); +- ++ { ++ emit_insn (gen_aarch64_atomic_exchange ++ (operands[0], operands[1], operands[2], operands[3])); ++ } + DONE; + } + ) +@@ -198,9 +255,9 @@ + ) + + (define_expand "atomic_" +- [(match_operand:ALLI 0 "aarch64_sync_memory_operand" "") ++ [(match_operand:ALLI 0 "aarch64_sync_memory_operand") + (atomic_op:ALLI +- (match_operand:ALLI 1 "" "") ++ (match_operand:ALLI 1 "") + (match_operand:SI 2 "const_int_operand"))] + "" + { +@@ -234,6 +291,39 @@ + } + operands[1] = force_reg (mode, operands[1]); + } ++ else if (TARGET_OUTLINE_ATOMICS) ++ { ++ const atomic_ool_names *names; ++ switch () ++ { ++ case MINUS: ++ operands[1] = expand_simple_unop (mode, NEG, operands[1], ++ NULL, 1); ++ /* fallthru */ ++ case PLUS: ++ names = &aarch64_ool_ldadd_names; ++ break; ++ case IOR: ++ names = &aarch64_ool_ldset_names; ++ break; ++ case XOR: ++ names = &aarch64_ool_ldeor_names; ++ break; ++ case AND: ++ operands[1] = expand_simple_unop (mode, NOT, operands[1], ++ NULL, 1); ++ names = &aarch64_ool_ldclr_names; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ machine_mode mode = mode; ++ rtx func = aarch64_atomic_ool_func (mode, operands[2], names); ++ emit_library_call_value (func, NULL_RTX, LCT_NORMAL, mode, ++ operands[1], mode, ++ XEXP (operands[0], 0), Pmode); ++ DONE; ++ } + else + gen = gen_aarch64_atomic_; + +@@ -322,10 +412,10 @@ + ;; Load-operate-store, returning the original memory data. + + (define_expand "atomic_fetch_" +- [(match_operand:ALLI 0 "register_operand" "") +- (match_operand:ALLI 1 "aarch64_sync_memory_operand" "") ++ [(match_operand:ALLI 0 "register_operand") ++ (match_operand:ALLI 1 "aarch64_sync_memory_operand") + (atomic_op:ALLI +- (match_operand:ALLI 2 "" "") ++ (match_operand:ALLI 2 "") + (match_operand:SI 3 "const_int_operand"))] + "" + { +@@ -359,6 +449,40 @@ + } + operands[2] = force_reg (mode, operands[2]); + } ++ else if (TARGET_OUTLINE_ATOMICS) ++ { ++ const atomic_ool_names *names; ++ switch () ++ { ++ case MINUS: ++ operands[2] = expand_simple_unop (mode, NEG, operands[2], ++ NULL, 1); ++ /* fallthru */ ++ case PLUS: ++ names = &aarch64_ool_ldadd_names; ++ break; ++ case IOR: ++ names = &aarch64_ool_ldset_names; ++ break; ++ case XOR: ++ names = &aarch64_ool_ldeor_names; ++ break; ++ case AND: ++ operands[2] = expand_simple_unop (mode, NOT, operands[2], ++ NULL, 1); ++ names = &aarch64_ool_ldclr_names; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ machine_mode mode = mode; ++ rtx func = aarch64_atomic_ool_func (mode, operands[3], names); ++ rtx rval = emit_library_call_value (func, operands[0], LCT_NORMAL, mode, ++ operands[2], mode, ++ XEXP (operands[1], 0), Pmode); ++ emit_move_insn (operands[0], rval); ++ DONE; ++ } + else + gen = gen_aarch64_atomic_fetch_; + +@@ -439,16 +563,16 @@ + ;; Load-operate-store, returning the updated memory data. + + (define_expand "atomic__fetch" +- [(match_operand:ALLI 0 "register_operand" "") ++ [(match_operand:ALLI 0 "register_operand") + (atomic_op:ALLI +- (match_operand:ALLI 1 "aarch64_sync_memory_operand" "") +- (match_operand:ALLI 2 "" "")) ++ (match_operand:ALLI 1 "aarch64_sync_memory_operand") ++ (match_operand:ALLI 2 "")) + (match_operand:SI 3 "const_int_operand")] + "" + { + /* Use an atomic load-operate instruction when possible. In this case + we will re-compute the result from the original mem value. */ +- if (TARGET_LSE) ++ if (TARGET_LSE || TARGET_OUTLINE_ATOMICS) + { + rtx tmp = gen_reg_rtx (mode); + operands[2] = force_reg (mode, operands[2]); +@@ -581,6 +705,24 @@ + } + ) + ++(define_insn "aarch64_load_exclusive_pair" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (unspec_volatile:DI ++ [(match_operand:TI 2 "aarch64_sync_memory_operand" "Q") ++ (match_operand:SI 3 "const_int_operand")] ++ UNSPECV_LX)) ++ (set (match_operand:DI 1 "register_operand" "=r") ++ (unspec_volatile:DI [(match_dup 2) (match_dup 3)] UNSPECV_LX))] ++ "" ++ { ++ enum memmodel model = memmodel_from_int (INTVAL (operands[3])); ++ if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_release (model)) ++ return "ldxp\t%0, %1, %2"; ++ else ++ return "ldaxp\t%0, %1, %2"; ++ } ++) ++ + (define_insn "@aarch64_store_exclusive" + [(set (match_operand:SI 0 "register_operand" "=&r") + (unspec_volatile:SI [(const_int 0)] UNSPECV_SX)) +@@ -599,8 +741,27 @@ + } + ) + ++(define_insn "aarch64_store_exclusive_pair" ++ [(set (match_operand:SI 0 "register_operand" "=&r") ++ (unspec_volatile:SI [(const_int 0)] UNSPECV_SX)) ++ (set (match_operand:TI 1 "aarch64_sync_memory_operand" "=Q") ++ (unspec_volatile:TI ++ [(match_operand:DI 2 "aarch64_reg_or_zero" "rZ") ++ (match_operand:DI 3 "aarch64_reg_or_zero" "rZ") ++ (match_operand:SI 4 "const_int_operand")] ++ UNSPECV_SX))] ++ "" ++ { ++ enum memmodel model = memmodel_from_int (INTVAL (operands[4])); ++ if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_acquire (model)) ++ return "stxp\t%w0, %x2, %x3, %1"; ++ else ++ return "stlxp\t%w0, %x2, %x3, %1"; ++ } ++) ++ + (define_expand "mem_thread_fence" +- [(match_operand:SI 0 "const_int_operand" "")] ++ [(match_operand:SI 0 "const_int_operand")] + "" + { + enum memmodel model = memmodel_from_int (INTVAL (operands[0])); +diff --git a/gcc/config/aarch64/check-sve-md.awk b/gcc/config/aarch64/check-sve-md.awk +new file mode 100644 +index 000000000..3da78f3dd +--- /dev/null ++++ b/gcc/config/aarch64/check-sve-md.awk +@@ -0,0 +1,66 @@ ++#!/usr/bin/awk -f ++# Copyright (C) 2019 Free Software Foundation, Inc. ++# ++# This program is free software; you can redistribute it and/or modify it ++# under the terms of the GNU General Public License as published by the ++# Free Software Foundation; either version 3, or (at your option) any ++# later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program; see the file COPYING3. If not see ++# . ++ ++# This awk script checks that aarch64-sve.md (passed either on the ++# command line or via stdin) has an up-to-date contents section. ++ ++BEGIN { ++ seen1 = 0 ++ seen2 = 0 ++ errors = 0 ++} ++ ++# The headings in the comments use a two-level hierarchy: ";; == ..." ++# for major sections and ";; ---- ..." for minor sections. Each section ++# heading must be unique. ++# ++# The contents section should list all the section headings, using the ++# same text and in the same order. We should therefore see exactly two ++# copies of the section list. ++/^;; == / || /^;; ---- / { ++ if ($0 in seen || seen2 > 0) ++ { ++ if (seen2 >= seen1) ++ { ++ printf "error: line not in contents: %s\n", $0 > "/dev/stderr" ++ errors += 1 ++ exit(1) ++ } ++ if ($0 != order[seen2]) ++ { ++ printf "error: mismatched contents\n saw: %s\nexpected: %s\n", \ ++ $0, order[seen2] > "/dev/stderr" ++ errors += 1 ++ exit(1) ++ } ++ seen2 += 1 ++ } ++ else ++ { ++ seen[$0] = 1 ++ order[seen1] = $0 ++ seen1 += 1 ++ } ++} ++ ++END { ++ if (seen2 < seen1 && errors == 0) ++ { ++ printf "error: line only in contents: %s\n", order[seen2] > "/dev/stderr" ++ exit(1) ++ } ++} +diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md +index 21f9549e6..191c996c1 100644 +--- a/gcc/config/aarch64/constraints.md ++++ b/gcc/config/aarch64/constraints.md +@@ -36,6 +36,9 @@ + (define_register_constraint "x" "FP_LO_REGS" + "Floating point and SIMD vector registers V0 - V15.") + ++(define_register_constraint "y" "FP_LO8_REGS" ++ "Floating point and SIMD vector registers V0 - V7.") ++ + (define_constraint "I" + "A constant that can be used with an ADD operation." + (and (match_code "const_int") +@@ -46,6 +49,12 @@ + (and (match_code "const_int") + (match_test "aarch64_pluslong_strict_immedate (op, VOIDmode)"))) + ++(define_constraint "Uai" ++ "@internal ++ A constraint that matches a VG-based constant that can be added by ++ a single INC or DEC." ++ (match_operand 0 "aarch64_sve_scalar_inc_dec_immediate")) ++ + (define_constraint "Uav" + "@internal + A constraint that matches a VG-based constant that can be added by +@@ -114,8 +123,8 @@ + (match_test "aarch64_float_const_zero_rtx_p (op)"))) + + (define_constraint "Z" +- "Integer constant zero." +- (match_test "op == const0_rtx")) ++ "Integer or floating-point constant zero." ++ (match_test "op == CONST0_RTX (GET_MODE (op))")) + + (define_constraint "Ush" + "A constraint that matches an absolute symbolic address high part." +@@ -248,6 +257,38 @@ + true, + ADDR_QUERY_LDP_STP_N)"))) + ++(define_address_constraint "UPb" ++ "@internal ++ An address valid for SVE PRFB instructions." ++ (match_test "aarch64_sve_prefetch_operand_p (op, VNx16QImode)")) ++ ++(define_address_constraint "UPd" ++ "@internal ++ An address valid for SVE PRFD instructions." ++ (match_test "aarch64_sve_prefetch_operand_p (op, VNx2DImode)")) ++ ++(define_address_constraint "UPh" ++ "@internal ++ An address valid for SVE PRFH instructions." ++ (match_test "aarch64_sve_prefetch_operand_p (op, VNx8HImode)")) ++ ++(define_address_constraint "UPw" ++ "@internal ++ An address valid for SVE PRFW instructions." ++ (match_test "aarch64_sve_prefetch_operand_p (op, VNx4SImode)")) ++ ++(define_memory_constraint "Utf" ++ "@internal ++ An address valid for SVE LDFF1 instructions." ++ (and (match_code "mem") ++ (match_test "aarch64_sve_ldff1_operand_p (op)"))) ++ ++(define_memory_constraint "Utn" ++ "@internal ++ An address valid for SVE LDNF1 instructions." ++ (and (match_code "mem") ++ (match_test "aarch64_sve_ldnf1_operand_p (op)"))) ++ + (define_memory_constraint "Utr" + "@internal + An address valid for SVE LDR and STR instructions (as distinct from +@@ -269,6 +310,37 @@ + (match_test "aarch64_legitimate_address_p (V2DImode, + XEXP (op, 0), 1)"))) + ++(define_memory_constraint "UtQ" ++ "@internal ++ An address valid for SVE LD1RQs." ++ (and (match_code "mem") ++ (match_test "aarch64_sve_ld1rq_operand_p (op)"))) ++ ++(define_memory_constraint "UOb" ++ "@internal ++ An address valid for SVE LD1ROH." ++ (and (match_code "mem") ++ (match_test "aarch64_sve_ld1ro_operand_p (op, QImode)"))) ++ ++(define_memory_constraint "UOh" ++ "@internal ++ An address valid for SVE LD1ROH." ++ (and (match_code "mem") ++ (match_test "aarch64_sve_ld1ro_operand_p (op, HImode)"))) ++ ++ ++(define_memory_constraint "UOw" ++ "@internal ++ An address valid for SVE LD1ROW." ++ (and (match_code "mem") ++ (match_test "aarch64_sve_ld1ro_operand_p (op, SImode)"))) ++ ++(define_memory_constraint "UOd" ++ "@internal ++ An address valid for SVE LD1ROD." ++ (and (match_code "mem") ++ (match_test "aarch64_sve_ld1ro_operand_p (op, DImode)"))) ++ + (define_memory_constraint "Uty" + "@internal + An address valid for SVE LD1Rs." +@@ -284,7 +356,7 @@ + (define_constraint "Ufc" + "A floating point constant which can be used with an\ + FMOV immediate operation." +- (and (match_code "const_double") ++ (and (match_code "const_double,const_vector") + (match_test "aarch64_float_const_representable_p (op)"))) + + (define_constraint "Uvi" +@@ -329,6 +401,13 @@ + (match_test "aarch64_simd_scalar_immediate_valid_for_move (op, + QImode)"))) + ++(define_constraint "Dt" ++ "@internal ++ A const_double which is the reciprocal of an exact power of two, can be ++ used in an scvtf with fract bits operation" ++ (and (match_code "const_double") ++ (match_test "aarch64_fpconst_pow2_recip (op) > 0"))) ++ + (define_constraint "Dl" + "@internal + A constraint that matches vector of immediates for left shifts." +@@ -373,18 +452,54 @@ + An address valid for a prefetch instruction." + (match_test "aarch64_address_valid_for_prefetch_p (op, true)")) + ++(define_constraint "vgb" ++ "@internal ++ A constraint that matches an immediate offset valid for SVE LD1B ++ gather instructions." ++ (match_operand 0 "aarch64_sve_gather_immediate_b")) ++ ++(define_constraint "vgd" ++ "@internal ++ A constraint that matches an immediate offset valid for SVE LD1D ++ gather instructions." ++ (match_operand 0 "aarch64_sve_gather_immediate_d")) ++ ++(define_constraint "vgh" ++ "@internal ++ A constraint that matches an immediate offset valid for SVE LD1H ++ gather instructions." ++ (match_operand 0 "aarch64_sve_gather_immediate_h")) ++ ++(define_constraint "vgw" ++ "@internal ++ A constraint that matches an immediate offset valid for SVE LD1W ++ gather instructions." ++ (match_operand 0 "aarch64_sve_gather_immediate_w")) ++ + (define_constraint "vsa" + "@internal + A constraint that matches an immediate operand valid for SVE + arithmetic instructions." + (match_operand 0 "aarch64_sve_arith_immediate")) + ++(define_constraint "vsb" ++ "@internal ++ A constraint that matches an immediate operand valid for SVE UMAX ++ and UMIN operations." ++ (match_operand 0 "aarch64_sve_vsb_immediate")) ++ + (define_constraint "vsc" + "@internal + A constraint that matches a signed immediate operand valid for SVE + CMP instructions." + (match_operand 0 "aarch64_sve_cmp_vsc_immediate")) + ++(define_constraint "vss" ++ "@internal ++ A constraint that matches a signed immediate operand valid for SVE ++ DUP instructions." ++ (match_test "aarch64_sve_dup_immediate_p (op)")) ++ + (define_constraint "vsd" + "@internal + A constraint that matches an unsigned immediate operand valid for SVE +@@ -395,7 +510,7 @@ + "@internal + A constraint that matches a vector count operand valid for SVE INC and + DEC instructions." +- (match_operand 0 "aarch64_sve_inc_dec_immediate")) ++ (match_operand 0 "aarch64_sve_vector_inc_dec_immediate")) + + (define_constraint "vsn" + "@internal +@@ -403,6 +518,18 @@ + is valid for SVE SUB instructions." + (match_operand 0 "aarch64_sve_sub_arith_immediate")) + ++(define_constraint "vsQ" ++ "@internal ++ Like vsa, but additionally check that the immediate is nonnegative ++ when interpreted as a signed value." ++ (match_operand 0 "aarch64_sve_qadd_immediate")) ++ ++(define_constraint "vsS" ++ "@internal ++ Like vsn, but additionally check that the immediate is negative ++ when interpreted as a signed value." ++ (match_operand 0 "aarch64_sve_qsub_immediate")) ++ + (define_constraint "vsl" + "@internal + A constraint that matches an immediate operand valid for SVE logical +@@ -411,9 +538,9 @@ + + (define_constraint "vsm" + "@internal +- A constraint that matches an immediate operand valid for SVE MUL +- operations." +- (match_operand 0 "aarch64_sve_mul_immediate")) ++ A constraint that matches an immediate operand valid for SVE MUL, ++ SMAX and SMIN operations." ++ (match_operand 0 "aarch64_sve_vsm_immediate")) + + (define_constraint "vsA" + "@internal +@@ -421,13 +548,20 @@ + and FSUB operations." + (match_operand 0 "aarch64_sve_float_arith_immediate")) + ++;; "B" for "bound". ++(define_constraint "vsB" ++ "@internal ++ A constraint that matches an immediate operand valid for SVE FMAX ++ and FMIN operations." ++ (match_operand 0 "aarch64_sve_float_maxmin_immediate")) ++ + (define_constraint "vsM" + "@internal +- A constraint that matches an imediate operand valid for SVE FMUL ++ A constraint that matches an immediate operand valid for SVE FMUL + operations." + (match_operand 0 "aarch64_sve_float_mul_immediate")) + + (define_constraint "vsN" + "@internal + A constraint that matches the negative of vsA" +- (match_operand 0 "aarch64_sve_float_arith_with_sub_immediate")) ++ (match_operand 0 "aarch64_sve_float_negated_arith_immediate")) +diff --git a/gcc/config/aarch64/cortex-a57-fma-steering.c b/gcc/config/aarch64/cortex-a57-fma-steering.c +index eb91662b6..d8e6038d1 100644 +--- a/gcc/config/aarch64/cortex-a57-fma-steering.c ++++ b/gcc/config/aarch64/cortex-a57-fma-steering.c +@@ -37,6 +37,7 @@ + #include "insn-attr.h" + #include "context.h" + #include "tree-pass.h" ++#include "function-abi.h" + #include "regrename.h" + #include "aarch64-protos.h" + +@@ -267,7 +268,7 @@ rename_single_chain (du_head_p head, HARD_REG_SET *unavailable) + if (DEBUG_INSN_P (tmp->insn)) + continue; + n_uses++; +- IOR_COMPL_HARD_REG_SET (*unavailable, reg_class_contents[tmp->cl]); ++ *unavailable |= ~reg_class_contents[tmp->cl]; + super_class = reg_class_superunion[(int) super_class][(int) tmp->cl]; + } + +@@ -281,7 +282,7 @@ rename_single_chain (du_head_p head, HARD_REG_SET *unavailable) + { + fprintf (dump_file, "Register %s in insn %d", reg_names[reg], + INSN_UID (head->first->insn)); +- if (head->need_caller_save_reg) ++ if (head->call_abis) + fprintf (dump_file, " crosses a call"); + } + +diff --git a/gcc/config/aarch64/driver-aarch64.c b/gcc/config/aarch64/driver-aarch64.c +index 6f16775f4..ef4f18352 100644 +--- a/gcc/config/aarch64/driver-aarch64.c ++++ b/gcc/config/aarch64/driver-aarch64.c +@@ -32,7 +32,7 @@ std::string aarch64_get_extension_string_for_isa_flags (unsigned long, + struct aarch64_arch_extension + { + const char *ext; +- unsigned int flag; ++ uint64_t flag; + const char *feat_string; + }; + +@@ -52,7 +52,7 @@ struct aarch64_core_data + unsigned char implementer_id; /* Exactly 8 bits */ + unsigned int part_no; /* 12 bits + 12 bits */ + unsigned variant; +- const unsigned long flags; ++ const uint64_t flags; + }; + + #define AARCH64_BIG_LITTLE(BIG, LITTLE) \ +@@ -75,7 +75,7 @@ struct aarch64_arch_driver_info + { + const char* id; + const char* name; +- const unsigned long flags; ++ const uint64_t flags; + }; + + #define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH_REV, FLAGS) \ +@@ -179,8 +179,8 @@ host_detect_local_cpu (int argc, const char **argv) + unsigned int variants[2] = { ALL_VARIANTS, ALL_VARIANTS }; + unsigned int n_variants = 0; + bool processed_exts = false; +- unsigned long extension_flags = 0; +- unsigned long default_flags = 0; ++ uint64_t extension_flags = 0; ++ uint64_t default_flags = 0; + + gcc_assert (argc); + +diff --git a/gcc/config/aarch64/falkor-tag-collision-avoidance.c b/gcc/config/aarch64/falkor-tag-collision-avoidance.c +index 779dee81f..35ca79232 100644 +--- a/gcc/config/aarch64/falkor-tag-collision-avoidance.c ++++ b/gcc/config/aarch64/falkor-tag-collision-avoidance.c +@@ -38,6 +38,7 @@ + #include "optabs.h" + #include "regs.h" + #include "recog.h" ++#include "function-abi.h" + #include "regrename.h" + #include "print-rtl.h" + +@@ -229,7 +230,7 @@ init_unavailable (tag_insn_info *insn_info, tag_map_t &tag_map, du_head_p head, + if (DEBUG_INSN_P (tmp->insn)) + continue; + +- IOR_COMPL_HARD_REG_SET (*unavailable, reg_class_contents[tmp->cl]); ++ *unavailable |= ~reg_class_contents[tmp->cl]; + super_class = reg_class_superunion[(int) super_class][(int) tmp->cl]; + } + +diff --git a/gcc/config/aarch64/falkor.md b/gcc/config/aarch64/falkor.md +index 41955af81..2bcc661e5 100644 +--- a/gcc/config/aarch64/falkor.md ++++ b/gcc/config/aarch64/falkor.md +@@ -648,7 +648,7 @@ + + (define_insn_reservation "falkor_other_0_nothing" 0 + (and (eq_attr "tune" "falkor") +- (eq_attr "type" "no_insn,trap,block")) ++ (eq_attr "type" "trap,block")) + "nothing") + + (define_insn_reservation "falkor_other_2_z" 2 +diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md +index c7ccd5bf6..7b6456961 100644 +--- a/gcc/config/aarch64/iterators.md ++++ b/gcc/config/aarch64/iterators.md +@@ -29,9 +29,16 @@ + ;; Iterator for HI, SI, DI, some instructions can only work on these modes. + (define_mode_iterator GPI_I16 [(HI "AARCH64_ISA_F16") SI DI]) + ++;; "Iterator" for just TI -- features like @pattern only work with iterators. ++(define_mode_iterator JUST_TI [TI]) ++ + ;; Iterator for QI and HI modes + (define_mode_iterator SHORT [QI HI]) + ++;; Iterators for single modes, for "@" patterns. ++(define_mode_iterator SI_ONLY [SI]) ++(define_mode_iterator DI_ONLY [DI]) ++ + ;; Iterator for all integer modes (up to 64-bit) + (define_mode_iterator ALLI [QI HI SI DI]) + +@@ -50,9 +57,16 @@ + ;; Iterator for all scalar floating point modes (HF, SF, DF) + (define_mode_iterator GPF_HF [HF SF DF]) + ++;; Iterator for all 16-bit scalar floating point modes (HF, BF) ++(define_mode_iterator HFBF [HF BF]) ++ + ;; Iterator for all scalar floating point modes (HF, SF, DF and TF) + (define_mode_iterator GPF_TF_F16 [HF SF DF TF]) + ++;; Iterator for all scalar floating point modes suitable for moving, including ++;; special BF type (HF, SF, DF, TF and BF) ++(define_mode_iterator GPF_TF_F16_MOV [HF BF SF DF TF]) ++ + ;; Double vector modes. + (define_mode_iterator VDF [V2SF V4HF]) + +@@ -70,7 +84,10 @@ + (define_mode_iterator VSDQ_I_DI [V8QI V16QI V4HI V8HI V2SI V4SI V2DI DI]) + + ;; Double vector modes. +-(define_mode_iterator VD [V8QI V4HI V4HF V2SI V2SF]) ++(define_mode_iterator VD [V8QI V4HI V4HF V2SI V2SF V4BF]) ++ ++;; Double vector modes suitable for moving. Includes BFmode. ++(define_mode_iterator VDMOV [V8QI V4HI V4HF V4BF V2SI V2SF]) + + ;; All modes stored in registers d0-d31. + (define_mode_iterator DREG [V8QI V4HI V4HF V2SI V2SF DF]) +@@ -85,20 +102,29 @@ + (define_mode_iterator VDQ_BHSI [V8QI V16QI V4HI V8HI V2SI V4SI]) + + ;; Quad vector modes. +-(define_mode_iterator VQ [V16QI V8HI V4SI V2DI V8HF V4SF V2DF]) ++(define_mode_iterator VQ [V16QI V8HI V4SI V2DI V8HF V4SF V2DF V8BF]) + + ;; Copy of the above. +-(define_mode_iterator VQ2 [V16QI V8HI V4SI V2DI V8HF V4SF V2DF]) ++(define_mode_iterator VQ2 [V16QI V8HI V4SI V2DI V8HF V8BF V4SF V2DF]) ++ ++;; Quad vector modes suitable for moving. Includes BFmode. ++(define_mode_iterator VQMOV [V16QI V8HI V4SI V2DI V8HF V8BF V4SF V2DF]) ++ ++;; VQMOV without 2-element modes. ++(define_mode_iterator VQMOV_NO2E [V16QI V8HI V4SI V8HF V8BF V4SF]) + + ;; Quad integer vector modes. + (define_mode_iterator VQ_I [V16QI V8HI V4SI V2DI]) + + ;; VQ without 2 element modes. +-(define_mode_iterator VQ_NO2E [V16QI V8HI V4SI V8HF V4SF]) ++(define_mode_iterator VQ_NO2E [V16QI V8HI V4SI V8HF V4SF V8BF]) + + ;; Quad vector with only 2 element modes. + (define_mode_iterator VQ_2E [V2DI V2DF]) + ++;; BFmode vector modes. ++(define_mode_iterator VBF [V4BF V8BF]) ++ + ;; This mode iterator allows :P to be used for patterns that operate on + ;; addresses in different modes. In LP64, only DI will match, while in + ;; ILP32, either can match. +@@ -110,7 +136,8 @@ + (define_mode_iterator PTR [(SI "ptr_mode == SImode") (DI "ptr_mode == DImode")]) + + ;; Advanced SIMD Float modes suitable for moving, loading and storing. +-(define_mode_iterator VDQF_F16 [V4HF V8HF V2SF V4SF V2DF]) ++(define_mode_iterator VDQF_F16 [V4HF V8HF V2SF V4SF V2DF ++ V4BF V8BF]) + + ;; Advanced SIMD Float modes. + (define_mode_iterator VDQF [V2SF V4SF V2DF]) +@@ -128,6 +155,9 @@ + (HF "TARGET_SIMD_F16INST") + SF DF]) + ++;; Scalar and vetor modes for SF, DF. ++(define_mode_iterator VSFDF [V2SF V4SF V2DF DF SF]) ++ + ;; Advanced SIMD single Float modes. + (define_mode_iterator VDQSF [V2SF V4SF]) + +@@ -148,7 +178,12 @@ + + ;; All Advanced SIMD modes suitable for moving, loading, and storing. + (define_mode_iterator VALL_F16 [V8QI V16QI V4HI V8HI V2SI V4SI V2DI +- V4HF V8HF V2SF V4SF V2DF]) ++ V4HF V8HF V4BF V8BF V2SF V4SF V2DF]) ++ ++;; All Advanced SIMD modes suitable for moving, loading, and storing, ++;; including special Bfloat vector types. ++(define_mode_iterator VALL_F16MOV [V8QI V16QI V4HI V8HI V2SI V4SI V2DI ++ V4HF V8HF V4BF V8BF V2SF V4SF V2DF]) + + ;; The VALL_F16 modes except the 128-bit 2-element ones. + (define_mode_iterator VALL_F16_NO_V2Q [V8QI V16QI V4HI V8HI V2SI V4SI +@@ -159,10 +194,10 @@ + + ;; All Advanced SIMD modes and DI. + (define_mode_iterator VALLDI_F16 [V8QI V16QI V4HI V8HI V2SI V4SI V2DI +- V4HF V8HF V2SF V4SF V2DF DI]) ++ V4HF V8HF V4BF V8BF V2SF V4SF V2DF DI]) + + ;; All Advanced SIMD modes, plus DI and DF. +-(define_mode_iterator VALLDIF [V8QI V16QI V4HI V8HI V2SI V4SI ++(define_mode_iterator VALLDIF [V8QI V16QI V4HI V8HI V2SI V4SI V4BF V8BF + V2DI V4HF V8HF V2SF V4SF V2DF DI DF]) + + ;; Advanced SIMD modes for Integer reduction across lanes. +@@ -185,7 +220,7 @@ + (define_mode_iterator VQW [V16QI V8HI V4SI]) + + ;; Double vector modes for combines. +-(define_mode_iterator VDC [V8QI V4HI V4HF V2SI V2SF DI DF]) ++(define_mode_iterator VDC [V8QI V4HI V4BF V4HF V2SI V2SF DI DF]) + + ;; Advanced SIMD modes except double int. + (define_mode_iterator VDQIF [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF V2DF]) +@@ -274,50 +309,85 @@ + ;; count. + (define_mode_iterator VMUL_CHANGE_NLANES [V4HI V8HI V2SI V4SI V2SF V4SF]) + +-;; All SVE vector modes. +-(define_mode_iterator SVE_ALL [VNx16QI VNx8HI VNx4SI VNx2DI +- VNx8HF VNx4SF VNx2DF]) ++;; Iterators for single modes, for "@" patterns. ++(define_mode_iterator VNx8HI_ONLY [VNx8HI]) ++(define_mode_iterator VNx8BF_ONLY [VNx8BF]) ++(define_mode_iterator VNx4SI_ONLY [VNx4SI]) ++(define_mode_iterator VNx4SF_ONLY [VNx4SF]) ++(define_mode_iterator VNx2DI_ONLY [VNx2DI]) ++(define_mode_iterator VNx2DF_ONLY [VNx2DF]) + + ;; All SVE vector structure modes. + (define_mode_iterator SVE_STRUCT [VNx32QI VNx16HI VNx8SI VNx4DI +- VNx16HF VNx8SF VNx4DF ++ VNx16BF VNx16HF VNx8SF VNx4DF + VNx48QI VNx24HI VNx12SI VNx6DI +- VNx24HF VNx12SF VNx6DF ++ VNx24BF VNx24HF VNx12SF VNx6DF + VNx64QI VNx32HI VNx16SI VNx8DI +- VNx32HF VNx16SF VNx8DF]) ++ VNx32BF VNx32HF VNx16SF VNx8DF]) + +-;; All SVE vector modes that have 8-bit or 16-bit elements. +-(define_mode_iterator SVE_BH [VNx16QI VNx8HI VNx8HF]) ++;; All fully-packed SVE vector modes. ++(define_mode_iterator SVE_FULL [VNx16QI VNx8HI VNx4SI VNx2DI ++ VNx8BF VNx8HF VNx4SF VNx2DF]) + +-;; All SVE vector modes that have 8-bit, 16-bit or 32-bit elements. +-(define_mode_iterator SVE_BHS [VNx16QI VNx8HI VNx4SI VNx8HF VNx4SF]) ++;; All fully-packed SVE integer vector modes. ++(define_mode_iterator SVE_FULL_I [VNx16QI VNx8HI VNx4SI VNx2DI]) + +-;; All SVE integer vector modes that have 8-bit, 16-bit or 32-bit elements. +-(define_mode_iterator SVE_BHSI [VNx16QI VNx8HI VNx4SI]) ++;; All fully-packed SVE floating-point vector modes. ++(define_mode_iterator SVE_FULL_F [VNx8HF VNx4SF VNx2DF]) + +-;; All SVE integer vector modes that have 16-bit, 32-bit or 64-bit elements. +-(define_mode_iterator SVE_HSDI [VNx16QI VNx8HI VNx4SI]) ++;; Fully-packed SVE integer vector modes that have 8-bit, 16-bit or 32-bit ++;; elements. ++(define_mode_iterator SVE_FULL_BHSI [VNx16QI VNx8HI VNx4SI]) + +-;; All SVE floating-point vector modes that have 16-bit or 32-bit elements. +-(define_mode_iterator SVE_HSF [VNx8HF VNx4SF]) ++;; Fully-packed SVE vector modes that have 16-bit, 32-bit or 64-bit elements. ++(define_mode_iterator SVE_FULL_HSD [VNx8HI VNx4SI VNx2DI ++ VNx8BF VNx8HF VNx4SF VNx2DF]) + +-;; All SVE vector modes that have 32-bit or 64-bit elements. +-(define_mode_iterator SVE_SD [VNx4SI VNx2DI VNx4SF VNx2DF]) ++;; Fully-packed SVE integer vector modes that have 16-bit, 32-bit or 64-bit ++;; elements. ++(define_mode_iterator SVE_FULL_HSDI [VNx8HI VNx4SI VNx2DI]) + +-;; All SVE vector modes that have 32-bit elements. +-(define_mode_iterator SVE_S [VNx4SI VNx4SF]) ++;; Fully-packed SVE floating-point vector modes that have 16-bit or 32-bit ++;; elements. ++(define_mode_iterator SVE_FULL_HSF [VNx8HF VNx4SF]) + +-;; All SVE vector modes that have 64-bit elements. +-(define_mode_iterator SVE_D [VNx2DI VNx2DF]) ++;; Fully-packed SVE vector modes that have 32-bit or 64-bit elements. ++(define_mode_iterator SVE_FULL_SD [VNx4SI VNx2DI VNx4SF VNx2DF]) + +-;; All SVE integer vector modes that have 32-bit or 64-bit elements. +-(define_mode_iterator SVE_SDI [VNx4SI VNx2DI]) ++;; Fully-packed SVE integer vector modes that have 32-bit or 64-bit elements. ++(define_mode_iterator SVE_FULL_SDI [VNx4SI VNx2DI]) + +-;; All SVE integer vector modes. +-(define_mode_iterator SVE_I [VNx16QI VNx8HI VNx4SI VNx2DI]) ++;; Fully-packed SVE floating-point vector modes that have 32-bit or 64-bit ++;; elements. ++(define_mode_iterator SVE_FULL_SDF [VNx4SF VNx2DF]) + +-;; All SVE floating-point vector modes. +-(define_mode_iterator SVE_F [VNx8HF VNx4SF VNx2DF]) ++;; Same, but with the appropriate conditions for FMMLA support. ++(define_mode_iterator SVE_MATMULF [(VNx4SF "TARGET_SVE_F32MM") ++ (VNx2DF "TARGET_SVE_F64MM")]) ++ ++;; Fully-packed SVE vector modes that have 32-bit elements. ++(define_mode_iterator SVE_FULL_S [VNx4SI VNx4SF]) ++ ++;; Fully-packed SVE vector modes that have 64-bit elements. ++(define_mode_iterator SVE_FULL_D [VNx2DI VNx2DF]) ++ ++;; All partial SVE modes. ++(define_mode_iterator SVE_PARTIAL [VNx2QI ++ VNx4QI VNx2HI ++ VNx8QI VNx4HI VNx2SI]) ++ ++;; Modes involved in extending or truncating SVE data, for 8 elements per ++;; 128-bit block. ++(define_mode_iterator VNx8_NARROW [VNx8QI]) ++(define_mode_iterator VNx8_WIDE [VNx8HI]) ++ ++;; ...same for 4 elements per 128-bit block. ++(define_mode_iterator VNx4_NARROW [VNx4QI VNx4HI]) ++(define_mode_iterator VNx4_WIDE [VNx4SI]) ++ ++;; ...same for 2 elements per 128-bit block. ++(define_mode_iterator VNx2_NARROW [VNx2QI VNx2HI VNx2SI]) ++(define_mode_iterator VNx2_WIDE [VNx2DI]) + + ;; All SVE predicate modes. + (define_mode_iterator PRED_ALL [VNx16BI VNx8BI VNx4BI VNx2BI]) +@@ -325,6 +395,12 @@ + ;; SVE predicate modes that control 8-bit, 16-bit or 32-bit elements. + (define_mode_iterator PRED_BHS [VNx16BI VNx8BI VNx4BI]) + ++;; SVE predicate modes that control 16-bit, 32-bit or 64-bit elements. ++(define_mode_iterator PRED_HSD [VNx8BI VNx4BI VNx2BI]) ++ ++;; Bfloat16 modes to which V4SF can be converted ++(define_mode_iterator V4SF_TO_BF [V4BF V8BF]) ++ + ;; ------------------------------------------------------------------ + ;; Unspec enumerations for Advance SIMD. These could well go into + ;; aarch64.md but for their use in int_iterators here. +@@ -365,6 +441,10 @@ + UNSPEC_RSUBHN2 ; Used in aarch64-simd.md. + UNSPEC_SQDMULH ; Used in aarch64-simd.md. + UNSPEC_SQRDMULH ; Used in aarch64-simd.md. ++ UNSPEC_SMULLB ; Used in aarch64-sve2.md. ++ UNSPEC_SMULLT ; Used in aarch64-sve2.md. ++ UNSPEC_UMULLB ; Used in aarch64-sve2.md. ++ UNSPEC_UMULLT ; Used in aarch64-sve2.md. + UNSPEC_PMUL ; Used in aarch64-simd.md. + UNSPEC_FMULX ; Used in aarch64-simd.md. + UNSPEC_USQADD ; Used in aarch64-simd.md. +@@ -387,6 +467,10 @@ + UNSPEC_UQSHRN ; Used in aarch64-simd.md. + UNSPEC_SQRSHRN ; Used in aarch64-simd.md. + UNSPEC_UQRSHRN ; Used in aarch64-simd.md. ++ UNSPEC_SHRNB ; Used in aarch64-sve2.md. ++ UNSPEC_SHRNT ; Used in aarch64-sve2.md. ++ UNSPEC_RSHRNB ; Used in aarch64-sve2.md. ++ UNSPEC_RSHRNT ; Used in aarch64-sve2.md. + UNSPEC_SSHL ; Used in aarch64-simd.md. + UNSPEC_USHL ; Used in aarch64-simd.md. + UNSPEC_SRSHL ; Used in aarch64-simd.md. +@@ -459,38 +543,126 @@ + UNSPEC_FMLSL ; Used in aarch64-simd.md. + UNSPEC_FMLAL2 ; Used in aarch64-simd.md. + UNSPEC_FMLSL2 ; Used in aarch64-simd.md. ++ UNSPEC_ADR ; Used in aarch64-sve.md. + UNSPEC_SEL ; Used in aarch64-sve.md. ++ UNSPEC_BRKA ; Used in aarch64-sve.md. ++ UNSPEC_BRKB ; Used in aarch64-sve.md. ++ UNSPEC_BRKN ; Used in aarch64-sve.md. ++ UNSPEC_BRKPA ; Used in aarch64-sve.md. ++ UNSPEC_BRKPB ; Used in aarch64-sve.md. ++ UNSPEC_PFIRST ; Used in aarch64-sve.md. ++ UNSPEC_PNEXT ; Used in aarch64-sve.md. ++ UNSPEC_CNTP ; Used in aarch64-sve.md. ++ UNSPEC_SADDV ; Used in aarch64-sve.md. ++ UNSPEC_UADDV ; Used in aarch64-sve.md. + UNSPEC_ANDV ; Used in aarch64-sve.md. + UNSPEC_IORV ; Used in aarch64-sve.md. + UNSPEC_XORV ; Used in aarch64-sve.md. + UNSPEC_ANDF ; Used in aarch64-sve.md. + UNSPEC_IORF ; Used in aarch64-sve.md. + UNSPEC_XORF ; Used in aarch64-sve.md. ++ UNSPEC_REVB ; Used in aarch64-sve.md. ++ UNSPEC_REVH ; Used in aarch64-sve.md. ++ UNSPEC_REVW ; Used in aarch64-sve.md. + UNSPEC_SMUL_HIGHPART ; Used in aarch64-sve.md. + UNSPEC_UMUL_HIGHPART ; Used in aarch64-sve.md. +- UNSPEC_COND_ADD ; Used in aarch64-sve.md. +- UNSPEC_COND_SUB ; Used in aarch64-sve.md. +- UNSPEC_COND_MUL ; Used in aarch64-sve.md. +- UNSPEC_COND_DIV ; Used in aarch64-sve.md. +- UNSPEC_COND_MAX ; Used in aarch64-sve.md. +- UNSPEC_COND_MIN ; Used in aarch64-sve.md. ++ UNSPEC_FMLA ; Used in aarch64-sve.md. ++ UNSPEC_FMLS ; Used in aarch64-sve.md. ++ UNSPEC_FEXPA ; Used in aarch64-sve.md. ++ UNSPEC_FMMLA ; Used in aarch64-sve.md. ++ UNSPEC_FTMAD ; Used in aarch64-sve.md. ++ UNSPEC_FTSMUL ; Used in aarch64-sve.md. ++ UNSPEC_FTSSEL ; Used in aarch64-sve.md. ++ UNSPEC_SMATMUL ; Used in aarch64-sve.md. ++ UNSPEC_UMATMUL ; Used in aarch64-sve.md. ++ UNSPEC_USMATMUL ; Used in aarch64-sve.md. ++ UNSPEC_TRN1Q ; Used in aarch64-sve.md. ++ UNSPEC_TRN2Q ; Used in aarch64-sve.md. ++ UNSPEC_UZP1Q ; Used in aarch64-sve.md. ++ UNSPEC_UZP2Q ; Used in aarch64-sve.md. ++ UNSPEC_ZIP1Q ; Used in aarch64-sve.md. ++ UNSPEC_ZIP2Q ; Used in aarch64-sve.md. ++ UNSPEC_COND_CMPEQ_WIDE ; Used in aarch64-sve.md. ++ UNSPEC_COND_CMPGE_WIDE ; Used in aarch64-sve.md. ++ UNSPEC_COND_CMPGT_WIDE ; Used in aarch64-sve.md. ++ UNSPEC_COND_CMPHI_WIDE ; Used in aarch64-sve.md. ++ UNSPEC_COND_CMPHS_WIDE ; Used in aarch64-sve.md. ++ UNSPEC_COND_CMPLE_WIDE ; Used in aarch64-sve.md. ++ UNSPEC_COND_CMPLO_WIDE ; Used in aarch64-sve.md. ++ UNSPEC_COND_CMPLS_WIDE ; Used in aarch64-sve.md. ++ UNSPEC_COND_CMPLT_WIDE ; Used in aarch64-sve.md. ++ UNSPEC_COND_CMPNE_WIDE ; Used in aarch64-sve.md. ++ UNSPEC_COND_FABS ; Used in aarch64-sve.md. ++ UNSPEC_COND_FADD ; Used in aarch64-sve.md. ++ UNSPEC_COND_FCADD90 ; Used in aarch64-sve.md. ++ UNSPEC_COND_FCADD270 ; Used in aarch64-sve.md. ++ UNSPEC_COND_FCMEQ ; Used in aarch64-sve.md. ++ UNSPEC_COND_FCMGE ; Used in aarch64-sve.md. ++ UNSPEC_COND_FCMGT ; Used in aarch64-sve.md. ++ UNSPEC_COND_FCMLA ; Used in aarch64-sve.md. ++ UNSPEC_COND_FCMLA90 ; Used in aarch64-sve.md. ++ UNSPEC_COND_FCMLA180 ; Used in aarch64-sve.md. ++ UNSPEC_COND_FCMLA270 ; Used in aarch64-sve.md. ++ UNSPEC_COND_FCMLE ; Used in aarch64-sve.md. ++ UNSPEC_COND_FCMLT ; Used in aarch64-sve.md. ++ UNSPEC_COND_FCMNE ; Used in aarch64-sve.md. ++ UNSPEC_COND_FCMUO ; Used in aarch64-sve.md. ++ UNSPEC_COND_FCVT ; Used in aarch64-sve.md. ++ UNSPEC_COND_FCVTZS ; Used in aarch64-sve.md. ++ UNSPEC_COND_FCVTZU ; Used in aarch64-sve.md. ++ UNSPEC_COND_FDIV ; Used in aarch64-sve.md. ++ UNSPEC_COND_FMAX ; Used in aarch64-sve.md. ++ UNSPEC_COND_FMAXNM ; Used in aarch64-sve.md. ++ UNSPEC_COND_FMIN ; Used in aarch64-sve.md. ++ UNSPEC_COND_FMINNM ; Used in aarch64-sve.md. + UNSPEC_COND_FMLA ; Used in aarch64-sve.md. + UNSPEC_COND_FMLS ; Used in aarch64-sve.md. ++ UNSPEC_COND_FMUL ; Used in aarch64-sve.md. ++ UNSPEC_COND_FMULX ; Used in aarch64-sve.md. ++ UNSPEC_COND_FNEG ; Used in aarch64-sve.md. + UNSPEC_COND_FNMLA ; Used in aarch64-sve.md. + UNSPEC_COND_FNMLS ; Used in aarch64-sve.md. +- UNSPEC_COND_LT ; Used in aarch64-sve.md. +- UNSPEC_COND_LE ; Used in aarch64-sve.md. +- UNSPEC_COND_EQ ; Used in aarch64-sve.md. +- UNSPEC_COND_NE ; Used in aarch64-sve.md. +- UNSPEC_COND_GE ; Used in aarch64-sve.md. +- UNSPEC_COND_GT ; Used in aarch64-sve.md. ++ UNSPEC_COND_FRECPX ; Used in aarch64-sve.md. ++ UNSPEC_COND_FRINTA ; Used in aarch64-sve.md. ++ UNSPEC_COND_FRINTI ; Used in aarch64-sve.md. ++ UNSPEC_COND_FRINTM ; Used in aarch64-sve.md. ++ UNSPEC_COND_FRINTN ; Used in aarch64-sve.md. ++ UNSPEC_COND_FRINTP ; Used in aarch64-sve.md. ++ UNSPEC_COND_FRINTX ; Used in aarch64-sve.md. ++ UNSPEC_COND_FRINTZ ; Used in aarch64-sve.md. ++ UNSPEC_COND_FSCALE ; Used in aarch64-sve.md. ++ UNSPEC_COND_FSQRT ; Used in aarch64-sve.md. ++ UNSPEC_COND_FSUB ; Used in aarch64-sve.md. ++ UNSPEC_COND_SCVTF ; Used in aarch64-sve.md. ++ UNSPEC_COND_UCVTF ; Used in aarch64-sve.md. ++ UNSPEC_LASTA ; Used in aarch64-sve.md. + UNSPEC_LASTB ; Used in aarch64-sve.md. ++ UNSPEC_ASHIFT_WIDE ; Used in aarch64-sve.md. ++ UNSPEC_ASHIFTRT_WIDE ; Used in aarch64-sve.md. ++ UNSPEC_LSHIFTRT_WIDE ; Used in aarch64-sve.md. ++ UNSPEC_LDFF1 ; Used in aarch64-sve.md. ++ UNSPEC_LDNF1 ; Used in aarch64-sve.md. + UNSPEC_FCADD90 ; Used in aarch64-simd.md. + UNSPEC_FCADD270 ; Used in aarch64-simd.md. + UNSPEC_FCMLA ; Used in aarch64-simd.md. + UNSPEC_FCMLA90 ; Used in aarch64-simd.md. + UNSPEC_FCMLA180 ; Used in aarch64-simd.md. + UNSPEC_FCMLA270 ; Used in aarch64-simd.md. ++ UNSPEC_COND_FCVTNT ; Used in aarch64-sve2.md. ++ UNSPEC_SMULHS ; Used in aarch64-sve2.md. ++ UNSPEC_SMULHRS ; Used in aarch64-sve2.md. ++ UNSPEC_UMULHS ; Used in aarch64-sve2.md. ++ UNSPEC_UMULHRS ; Used in aarch64-sve2.md. ++ UNSPEC_ASRD ; Used in aarch64-sve.md. ++ UNSPEC_USDOT ; Used in aarch64-simd.md. ++ UNSPEC_SUDOT ; Used in aarch64-simd.md. ++ UNSPEC_BFDOT ; Used in aarch64-simd.md. ++ UNSPEC_BFMLALB ; Used in aarch64-sve.md. ++ UNSPEC_BFMLALT ; Used in aarch64-sve.md. ++ UNSPEC_BFMMLA ; Used in aarch64-sve.md. ++ UNSPEC_BFCVTN ; Used in aarch64-simd.md. ++ UNSPEC_BFCVTN2 ; Used in aarch64-simd.md. ++ UNSPEC_BFCVT ; Used in aarch64-simd.md. + ]) + + ;; ------------------------------------------------------------------ +@@ -586,6 +758,7 @@ + (V2SI "2") (V4SI "4") + (V2DI "2") + (V4HF "4") (V8HF "8") ++ (V4BF "4") (V8BF "8") + (V2SF "2") (V4SF "4") + (V1DF "1") (V2DF "2") + (DI "1") (DF "1")]) +@@ -610,6 +783,14 @@ + (define_mode_attr sizem1 [(QI "#7") (HI "#15") (SI "#31") (DI "#63") + (HF "#15") (SF "#31") (DF "#63")]) + ++;; The number of bits in a vector element, or controlled by a predicate ++;; element. ++(define_mode_attr elem_bits [(VNx16BI "8") (VNx8BI "16") ++ (VNx4BI "32") (VNx2BI "64") ++ (VNx16QI "8") (VNx8HI "16") ++ (VNx4SI "32") (VNx2DI "64") ++ (VNx8HF "16") (VNx4SF "32") (VNx2DF "64")]) ++ + ;; Attribute to describe constants acceptable in logical operations + (define_mode_attr lconst [(SI "K") (DI "L")]) + +@@ -624,6 +805,7 @@ + + (define_mode_attr Vtype [(V8QI "8b") (V16QI "16b") + (V4HI "4h") (V8HI "8h") ++ (V4BF "4h") (V8BF "8h") + (V2SI "2s") (V4SI "4s") + (DI "1d") (DF "1d") + (V2DI "2d") (V2SF "2s") +@@ -637,7 +819,8 @@ + (V4HI ".4h") (V8HI ".8h") + (V2SI ".2s") (V4SI ".4s") + (V2DI ".2d") (V4HF ".4h") +- (V8HF ".8h") (V2SF ".2s") ++ (V8HF ".8h") (V4BF ".4h") ++ (V8BF ".8h") (V2SF ".2s") + (V4SF ".4s") (V2DF ".2d") + (DI "") (SI "") + (HI "") (QI "") +@@ -655,9 +838,10 @@ + (V4HI "h") (V8HI "h") (VNx8HI "h") (VNx8BI "h") + (V2SI "s") (V4SI "s") (VNx4SI "s") (VNx4BI "s") + (V2DI "d") (VNx2DI "d") (VNx2BI "d") +- (V4HF "h") (V8HF "h") (VNx8HF "h") ++ (V4HF "h") (V8HF "h") (VNx8HF "h") (VNx8BF "h") + (V2SF "s") (V4SF "s") (VNx4SF "s") + (V2DF "d") (VNx2DF "d") ++ (BF "h") (V4BF "h") (V8BF "h") + (HF "h") + (SF "s") (DF "d") + (QI "b") (HI "h") +@@ -667,13 +851,17 @@ + (define_mode_attr Vetype_fourth [(VNx4SI "b") (VNx2DI "h")]) + + ;; Equivalent of "size" for a vector element. +-(define_mode_attr Vesize [(VNx16QI "b") +- (VNx8HI "h") (VNx8HF "h") +- (VNx4SI "w") (VNx4SF "w") ++(define_mode_attr Vesize [(VNx16QI "b") (VNx8QI "b") ++ (VNx4QI "b") (VNx2QI "b") ++ (VNx8HI "h") (VNx4HI "h") ++ (VNx2HI "h") (VNx8HF "h") ++ (VNx4SI "w") (VNx2SI "w") (VNx4SF "w") + (VNx2DI "d") (VNx2DF "d") + (VNx32QI "b") (VNx48QI "b") (VNx64QI "b") + (VNx16HI "h") (VNx24HI "h") (VNx32HI "h") + (VNx16HF "h") (VNx24HF "h") (VNx32HF "h") ++ (VNx16BF "h") (VNx24BF "h") (VNx32BF "h") ++ (VNx8BF "h") + (VNx8SI "w") (VNx12SI "w") (VNx16SI "w") + (VNx8SF "w") (VNx12SF "w") (VNx16SF "w") + (VNx4DI "d") (VNx6DI "d") (VNx8DI "d") +@@ -697,13 +885,16 @@ + (V8HF "16b") (V2SF "8b") + (V4SF "16b") (V2DF "16b") + (DI "8b") (DF "8b") +- (SI "8b") (SF "8b")]) ++ (SI "8b") (SF "8b") ++ (V4BF "8b") (V8BF "16b")]) + + ;; Define element mode for each vector mode. + (define_mode_attr VEL [(V8QI "QI") (V16QI "QI") (VNx16QI "QI") + (V4HI "HI") (V8HI "HI") (VNx8HI "HI") + (V2SI "SI") (V4SI "SI") (VNx4SI "SI") ++ (VNx8BF "BF") + (DI "DI") (V2DI "DI") (VNx2DI "DI") ++ (V4BF "BF") (V8BF "BF") + (V4HF "HF") (V8HF "HF") (VNx8HF "HF") + (V2SF "SF") (V4SF "SF") (VNx4SF "SF") + (DF "DF") (V2DF "DF") (VNx2DF "DF") +@@ -713,8 +904,10 @@ + ;; Define element mode for each vector mode (lower case). + (define_mode_attr Vel [(V8QI "qi") (V16QI "qi") (VNx16QI "qi") + (V4HI "hi") (V8HI "hi") (VNx8HI "hi") ++ (VNx8BF "bf") + (V2SI "si") (V4SI "si") (VNx4SI "si") + (DI "di") (V2DI "di") (VNx2DI "di") ++ (V4BF "bf") (V8BF "bf") + (V4HF "hf") (V8HF "hf") (VNx8HF "hf") + (V2SF "sf") (V4SF "sf") (VNx4SF "sf") + (V2DF "df") (DF "df") (VNx2DF "df") +@@ -723,19 +916,19 @@ + + ;; Element mode with floating-point values replaced by like-sized integers. + (define_mode_attr VEL_INT [(VNx16QI "QI") +- (VNx8HI "HI") (VNx8HF "HI") ++ (VNx8HI "HI") (VNx8HF "HI") (VNx8BF "HI") + (VNx4SI "SI") (VNx4SF "SI") + (VNx2DI "DI") (VNx2DF "DI")]) + + ;; Gives the mode of the 128-bit lowpart of an SVE vector. + (define_mode_attr V128 [(VNx16QI "V16QI") +- (VNx8HI "V8HI") (VNx8HF "V8HF") ++ (VNx8HI "V8HI") (VNx8HF "V8HF") (VNx8BF "V8BF") + (VNx4SI "V4SI") (VNx4SF "V4SF") + (VNx2DI "V2DI") (VNx2DF "V2DF")]) + + ;; ...and again in lower case. + (define_mode_attr v128 [(VNx16QI "v16qi") +- (VNx8HI "v8hi") (VNx8HF "v8hf") ++ (VNx8HI "v8hi") (VNx8HF "v8hf") (VNx8BF "v8bf") + (VNx4SI "v4si") (VNx4SF "v4sf") + (VNx2DI "v2di") (VNx2DF "v2df")]) + +@@ -763,19 +956,20 @@ + (V2SI "SI") (V4SI "V2SI") + (V2DI "DI") (V2SF "SF") + (V4SF "V2SF") (V4HF "V2HF") +- (V8HF "V4HF") (V2DF "DF")]) ++ (V8HF "V4HF") (V2DF "DF") ++ (V8BF "V4BF")]) + + ;; Half modes of all vector modes, in lower-case. + (define_mode_attr Vhalf [(V8QI "v4qi") (V16QI "v8qi") + (V4HI "v2hi") (V8HI "v4hi") +- (V8HF "v4hf") ++ (V8HF "v4hf") (V8BF "v4bf") + (V2SI "si") (V4SI "v2si") + (V2DI "di") (V2SF "sf") + (V4SF "v2sf") (V2DF "df")]) + + ;; Double modes of vector modes. + (define_mode_attr VDBL [(V8QI "V16QI") (V4HI "V8HI") +- (V4HF "V8HF") ++ (V4HF "V8HF") (V4BF "V8BF") + (V2SI "V4SI") (V2SF "V4SF") + (SI "V2SI") (DI "V2DI") + (DF "V2DF")]) +@@ -785,7 +979,7 @@ + + ;; Double modes of vector modes (lower case). + (define_mode_attr Vdbl [(V8QI "v16qi") (V4HI "v8hi") +- (V4HF "v8hf") ++ (V4HF "v8hf") (V4BF "v8bf") + (V2SI "v4si") (V2SF "v4sf") + (SI "v2si") (DI "v2di") + (DF "v2df")]) +@@ -879,6 +1073,7 @@ + ;; variation on mapping FP modes to GP regs. + (define_mode_attr vwcore [(V8QI "w") (V16QI "w") (VNx16QI "w") + (V4HI "w") (V8HI "w") (VNx8HI "w") ++ (VNx8BF "w") + (V2SI "w") (V4SI "w") (VNx4SI "w") + (DI "x") (V2DI "x") (VNx2DI "x") + (V4HF "w") (V8HF "w") (VNx8HF "w") +@@ -894,12 +1089,14 @@ + (V2SI "V2SI") (V4SI "V4SI") + (DI "DI") (V2DI "V2DI") + (V4HF "V4HI") (V8HF "V8HI") ++ (V4BF "V4HI") (V8BF "V8HI") + (V2SF "V2SI") (V4SF "V4SI") + (DF "DI") (V2DF "V2DI") + (SF "SI") (SI "SI") + (HF "HI") + (VNx16QI "VNx16QI") + (VNx8HI "VNx8HI") (VNx8HF "VNx8HI") ++ (VNx8BF "VNx8HI") + (VNx4SI "VNx4SI") (VNx4SF "VNx4SI") + (VNx2DI "VNx2DI") (VNx2DF "VNx2DI") + ]) +@@ -910,19 +1107,25 @@ + (V2SI "v2si") (V4SI "v4si") + (DI "di") (V2DI "v2di") + (V4HF "v4hi") (V8HF "v8hi") ++ (V4BF "v4hi") (V8BF "v8hi") + (V2SF "v2si") (V4SF "v4si") + (DF "di") (V2DF "v2di") + (SF "si") + (VNx16QI "vnx16qi") + (VNx8HI "vnx8hi") (VNx8HF "vnx8hi") ++ (VNx8BF "vnx8hi") + (VNx4SI "vnx4si") (VNx4SF "vnx4si") + (VNx2DI "vnx2di") (VNx2DF "vnx2di") + ]) + + ;; Floating-point equivalent of selected modes. +-(define_mode_attr V_FP_EQUIV [(VNx4SI "VNx4SF") (VNx4SF "VNx4SF") ++(define_mode_attr V_FP_EQUIV [(VNx8HI "VNx8HF") (VNx8HF "VNx8HF") ++ (VNx8BF "VNx8HF") ++ (VNx4SI "VNx4SF") (VNx4SF "VNx4SF") + (VNx2DI "VNx2DF") (VNx2DF "VNx2DF")]) +-(define_mode_attr v_fp_equiv [(VNx4SI "vnx4sf") (VNx4SF "vnx4sf") ++(define_mode_attr v_fp_equiv [(VNx8HI "vnx8hf") (VNx8HF "vnx8hf") ++ (VNx8BF "vnx8hf") ++ (VNx4SI "vnx4sf") (VNx4SF "vnx4sf") + (VNx2DI "vnx2df") (VNx2DF "vnx2df")]) + + ;; Mode for vector conditional operations where the comparison has +@@ -976,6 +1179,7 @@ + + (define_mode_attr VSWAP_WIDTH [(V8QI "V16QI") (V16QI "V8QI") + (V4HI "V8HI") (V8HI "V4HI") ++ (V8BF "V4BF") (V4BF "V8BF") + (V2SI "V4SI") (V4SI "V2SI") + (DI "V2DI") (V2DI "DI") + (V2SF "V4SF") (V4SF "V2SF") +@@ -988,6 +1192,7 @@ + (DI "to_128") (V2DI "to_64") + (V4HF "to_128") (V8HF "to_64") + (V2SF "to_128") (V4SF "to_64") ++ (V4BF "to_128") (V8BF "to_64") + (DF "to_128") (V2DF "to_64")]) + + ;; For certain vector-by-element multiplication instructions we must +@@ -1021,9 +1226,11 @@ + ;; Defined to '_q' for 128-bit types. + (define_mode_attr q [(V8QI "") (V16QI "_q") + (V4HI "") (V8HI "_q") ++ (V4BF "") (V8BF "_q") + (V2SI "") (V4SI "_q") + (DI "") (V2DI "_q") + (V4HF "") (V8HF "_q") ++ (V4BF "") (V8BF "_q") + (V2SF "") (V4SF "_q") + (V2DF "_q") + (QI "") (HI "") (SI "") (DI "") (HF "") (SF "") (DF "")]) +@@ -1044,6 +1251,9 @@ + ;; Register suffix for DOTPROD input types from the return type. + (define_mode_attr Vdottype [(V2SI "8b") (V4SI "16b")]) + ++;; Register suffix for BFDOT input types from the return type. ++(define_mode_attr Vbfdottype [(V2SF "4h") (V4SF "8h")]) ++ + ;; Sum of lengths of instructions needed to move vector registers of a mode. + (define_mode_attr insn_count [(OI "8") (CI "12") (XI "16")]) + +@@ -1054,63 +1264,83 @@ + ;; Width of 2nd and 3rd arguments to fp16 vector multiply add/sub + (define_mode_attr VFMLA_W [(V2SF "V4HF") (V4SF "V8HF")]) + ++;; Width of 2nd and 3rd arguments to bf16 vector multiply add/sub ++(define_mode_attr VBFMLA_W [(V2SF "V4BF") (V4SF "V8BF")]) ++ + (define_mode_attr VFMLA_SEL_W [(V2SF "V2HF") (V4SF "V4HF")]) + + (define_mode_attr f16quad [(V2SF "") (V4SF "q")]) + ++(define_mode_attr isquadop [(V8QI "") (V16QI "q") (V4BF "") (V8BF "q")]) ++ + (define_code_attr f16mac [(plus "a") (minus "s")]) + + ;; Map smax to smin and umax to umin. + (define_code_attr max_opp [(smax "smin") (umax "umin")]) + ++;; Same as above, but louder. ++(define_code_attr MAX_OPP [(smax "SMIN") (umax "UMIN")]) ++ + ;; The number of subvectors in an SVE_STRUCT. + (define_mode_attr vector_count [(VNx32QI "2") (VNx16HI "2") + (VNx8SI "2") (VNx4DI "2") ++ (VNx16BF "2") + (VNx16HF "2") (VNx8SF "2") (VNx4DF "2") + (VNx48QI "3") (VNx24HI "3") + (VNx12SI "3") (VNx6DI "3") ++ (VNx24BF "3") + (VNx24HF "3") (VNx12SF "3") (VNx6DF "3") + (VNx64QI "4") (VNx32HI "4") + (VNx16SI "4") (VNx8DI "4") ++ (VNx32BF "4") + (VNx32HF "4") (VNx16SF "4") (VNx8DF "4")]) + + ;; The number of instruction bytes needed for an SVE_STRUCT move. This is + ;; equal to vector_count * 4. + (define_mode_attr insn_length [(VNx32QI "8") (VNx16HI "8") + (VNx8SI "8") (VNx4DI "8") ++ (VNx16BF "8") + (VNx16HF "8") (VNx8SF "8") (VNx4DF "8") + (VNx48QI "12") (VNx24HI "12") + (VNx12SI "12") (VNx6DI "12") ++ (VNx24BF "12") + (VNx24HF "12") (VNx12SF "12") (VNx6DF "12") + (VNx64QI "16") (VNx32HI "16") + (VNx16SI "16") (VNx8DI "16") ++ (VNx32BF "16") + (VNx32HF "16") (VNx16SF "16") (VNx8DF "16")]) + + ;; The type of a subvector in an SVE_STRUCT. + (define_mode_attr VSINGLE [(VNx32QI "VNx16QI") + (VNx16HI "VNx8HI") (VNx16HF "VNx8HF") ++ (VNx16BF "VNx8BF") + (VNx8SI "VNx4SI") (VNx8SF "VNx4SF") + (VNx4DI "VNx2DI") (VNx4DF "VNx2DF") + (VNx48QI "VNx16QI") + (VNx24HI "VNx8HI") (VNx24HF "VNx8HF") ++ (VNx24BF "VNx8BF") + (VNx12SI "VNx4SI") (VNx12SF "VNx4SF") + (VNx6DI "VNx2DI") (VNx6DF "VNx2DF") + (VNx64QI "VNx16QI") + (VNx32HI "VNx8HI") (VNx32HF "VNx8HF") ++ (VNx32BF "VNx8BF") + (VNx16SI "VNx4SI") (VNx16SF "VNx4SF") + (VNx8DI "VNx2DI") (VNx8DF "VNx2DF")]) + + ;; ...and again in lower case. + (define_mode_attr vsingle [(VNx32QI "vnx16qi") + (VNx16HI "vnx8hi") (VNx16HF "vnx8hf") ++ (VNx16BF "vnx8bf") + (VNx8SI "vnx4si") (VNx8SF "vnx4sf") + (VNx4DI "vnx2di") (VNx4DF "vnx2df") + (VNx48QI "vnx16qi") + (VNx24HI "vnx8hi") (VNx24HF "vnx8hf") ++ (VNx24BF "vnx8bf") + (VNx12SI "vnx4si") (VNx12SF "vnx4sf") + (VNx6DI "vnx2di") (VNx6DF "vnx2df") + (VNx64QI "vnx16qi") + (VNx32HI "vnx8hi") (VNx32HF "vnx8hf") ++ (VNx32BF "vnx8bf") + (VNx16SI "vnx4si") (VNx16SF "vnx4sf") + (VNx8DI "vnx2di") (VNx8DF "vnx2df")]) + +@@ -1118,36 +1348,44 @@ + ;; this is equivalent to the of the subvector mode. + (define_mode_attr VPRED [(VNx16QI "VNx16BI") + (VNx8HI "VNx8BI") (VNx8HF "VNx8BI") ++ (VNx8BF "VNx8BI") + (VNx4SI "VNx4BI") (VNx4SF "VNx4BI") + (VNx2DI "VNx2BI") (VNx2DF "VNx2BI") + (VNx32QI "VNx16BI") + (VNx16HI "VNx8BI") (VNx16HF "VNx8BI") ++ (VNx16BF "VNx8BI") + (VNx8SI "VNx4BI") (VNx8SF "VNx4BI") + (VNx4DI "VNx2BI") (VNx4DF "VNx2BI") + (VNx48QI "VNx16BI") + (VNx24HI "VNx8BI") (VNx24HF "VNx8BI") ++ (VNx24BF "VNx8BI") + (VNx12SI "VNx4BI") (VNx12SF "VNx4BI") + (VNx6DI "VNx2BI") (VNx6DF "VNx2BI") + (VNx64QI "VNx16BI") + (VNx32HI "VNx8BI") (VNx32HF "VNx8BI") ++ (VNx32BF "VNx8BI") + (VNx16SI "VNx4BI") (VNx16SF "VNx4BI") + (VNx8DI "VNx2BI") (VNx8DF "VNx2BI")]) + + ;; ...and again in lower case. + (define_mode_attr vpred [(VNx16QI "vnx16bi") + (VNx8HI "vnx8bi") (VNx8HF "vnx8bi") ++ (VNx8BF "vnx8bi") + (VNx4SI "vnx4bi") (VNx4SF "vnx4bi") + (VNx2DI "vnx2bi") (VNx2DF "vnx2bi") + (VNx32QI "vnx16bi") + (VNx16HI "vnx8bi") (VNx16HF "vnx8bi") ++ (VNx16BF "vnx8bi") + (VNx8SI "vnx4bi") (VNx8SF "vnx4bi") + (VNx4DI "vnx2bi") (VNx4DF "vnx2bi") + (VNx48QI "vnx16bi") + (VNx24HI "vnx8bi") (VNx24HF "vnx8bi") ++ (VNx24BF "vnx8bi") + (VNx12SI "vnx4bi") (VNx12SF "vnx4bi") + (VNx6DI "vnx2bi") (VNx6DF "vnx2bi") + (VNx64QI "vnx16bi") + (VNx32HI "vnx8bi") (VNx32HF "vnx4bi") ++ (VNx32BF "vnx8bi") + (VNx16SI "vnx4bi") (VNx16SF "vnx4bi") + (VNx8DI "vnx2bi") (VNx8DF "vnx2bi")]) + +@@ -1158,6 +1396,30 @@ + (V4HF "[%4]") (V8HF "[%4]") + ]) + ++;; The number of bytes controlled by a predicate ++(define_mode_attr data_bytes [(VNx16BI "1") (VNx8BI "2") ++ (VNx4BI "4") (VNx2BI "8")]) ++ ++;; Two-nybble mask for partial vector modes: nunits, byte size. ++(define_mode_attr self_mask [(VNx8QI "0x81") ++ (VNx4QI "0x41") ++ (VNx2QI "0x21") ++ (VNx4HI "0x42") ++ (VNx2HI "0x22") ++ (VNx2SI "0x24")]) ++ ++;; For full vector modes, the mask of narrower modes, encoded as above. ++(define_mode_attr narrower_mask [(VNx8HI "0x81") ++ (VNx4SI "0x43") ++ (VNx2DI "0x27")]) ++ ++;; The constraint to use for an SVE [SU]DOT, FMUL, FMLA or FMLS lane index. ++(define_mode_attr sve_lane_con [(VNx4SI "y") (VNx2DI "x") ++ (VNx8HF "y") (VNx4SF "y") (VNx2DF "x")]) ++ ++;; The constraint to use for an SVE FCMLA lane index. ++(define_mode_attr sve_lane_pair_con [(VNx8HF "y") (VNx4SF "x")]) ++ + ;; ------------------------------------------------------------------- + ;; Code Iterators + ;; ------------------------------------------------------------------- +@@ -1168,6 +1430,8 @@ + ;; This code iterator allows the shifts supported in arithmetic instructions + (define_code_iterator ASHIFT [ashift ashiftrt lshiftrt]) + ++(define_code_iterator SHIFTRT [ashiftrt lshiftrt]) ++ + ;; Code iterator for logical operations + (define_code_iterator LOGICAL [and ior xor]) + +@@ -1214,7 +1478,7 @@ + ;; Signed and unsigned max operations. + (define_code_iterator USMAX [smax umax]) + +-;; Code iterator for variants of vector max and min. ++;; Code iterator for plus and minus. + (define_code_iterator ADDSUB [plus minus]) + + ;; Code iterator for variants of vector saturating binary ops. +@@ -1226,6 +1490,21 @@ + ;; Code iterator for signed variants of vector saturating binary ops. + (define_code_iterator SBINQOPS [ss_plus ss_minus]) + ++;; Code iterator for unsigned variants of vector saturating binary ops. ++(define_code_iterator UBINQOPS [us_plus us_minus]) ++ ++;; Modular and saturating addition. ++(define_code_iterator ANY_PLUS [plus ss_plus us_plus]) ++ ++;; Saturating addition. ++(define_code_iterator SAT_PLUS [ss_plus us_plus]) ++ ++;; Modular and saturating subtraction. ++(define_code_iterator ANY_MINUS [minus ss_minus us_minus]) ++ ++;; Saturating subtraction. ++(define_code_iterator SAT_MINUS [ss_minus us_minus]) ++ + ;; Comparison operators for CM. + (define_code_iterator COMPARISONS [lt le eq ge gt]) + +@@ -1236,27 +1515,25 @@ + (define_code_iterator FAC_COMPARISONS [lt le ge gt]) + + ;; SVE integer unary operations. +-(define_code_iterator SVE_INT_UNARY [abs neg not popcount]) +- +-;; SVE floating-point unary operations. +-(define_code_iterator SVE_FP_UNARY [abs neg sqrt]) ++(define_code_iterator SVE_INT_UNARY [abs neg not clrsb clz popcount]) + + ;; SVE integer binary operations. + (define_code_iterator SVE_INT_BINARY [plus minus mult smax umax smin umin ++ ashift ashiftrt lshiftrt + and ior xor]) + + ;; SVE integer binary division operations. + (define_code_iterator SVE_INT_BINARY_SD [div udiv]) + ++;; SVE integer binary operations that have an immediate form. ++(define_code_iterator SVE_INT_BINARY_IMM [mult smax smin umax umin]) ++ + ;; SVE floating-point operations with an unpredicated all-register form. + (define_code_iterator SVE_UNPRED_FP_BINARY [plus minus mult]) + + ;; SVE integer comparisons. + (define_code_iterator SVE_INT_CMP [lt le eq ne ge gt ltu leu geu gtu]) + +-;; SVE floating-point comparisons. +-(define_code_iterator SVE_FP_CMP [lt le eq ne ge gt]) +- + ;; ------------------------------------------------------------------- + ;; Code Attributes + ;; ------------------------------------------------------------------- +@@ -1273,6 +1550,8 @@ + (unsigned_fix "fixuns") + (float "float") + (unsigned_float "floatuns") ++ (clrsb "clrsb") ++ (clz "clz") + (popcount "popcount") + (and "and") + (ior "ior") +@@ -1304,8 +1583,7 @@ + (leu "leu") + (geu "geu") + (gtu "gtu") +- (abs "abs") +- (sqrt "sqrt")]) ++ (abs "abs")]) + + ;; For comparison operators we use the FCM* and CM* instructions. + ;; As there are no CMLE or CMLT instructions which act on 3 vector +@@ -1350,6 +1628,9 @@ + (define_code_attr shift [(ashift "lsl") (ashiftrt "asr") + (lshiftrt "lsr") (rotatert "ror")]) + ++;; Op prefix for shift right and accumulate. ++(define_code_attr sra_op [(ashiftrt "s") (lshiftrt "u")]) ++ + ;; Map shift operators onto underlying bit-field instructions + (define_code_attr bfshift [(ashift "ubfiz") (ashiftrt "sbfx") + (lshiftrt "ubfx") (rotatert "extr")]) +@@ -1374,6 +1655,15 @@ + (smax "s") (umax "u") + (smin "s") (umin "u")]) + ++;; "s" for signed ops, empty for unsigned ones. ++(define_code_attr s [(sign_extend "s") (zero_extend "")]) ++ ++;; Map signed/unsigned ops to the corresponding extension. ++(define_code_attr paired_extend [(ss_plus "sign_extend") ++ (us_plus "zero_extend") ++ (ss_minus "sign_extend") ++ (us_minus "zero_extend")]) ++ + ;; Whether a shift is left or right. + (define_code_attr lr [(ashift "l") (ashiftrt "r") (lshiftrt "r")]) + +@@ -1434,35 +1724,45 @@ + (smax "smax") + (umin "umin") + (umax "umax") ++ (ashift "lsl") ++ (ashiftrt "asr") ++ (lshiftrt "lsr") + (and "and") + (ior "orr") + (xor "eor") + (not "not") ++ (clrsb "cls") ++ (clz "clz") + (popcount "cnt")]) + + (define_code_attr sve_int_op_rev [(plus "add") +- (minus "subr") +- (mult "mul") +- (div "sdivr") +- (udiv "udivr") +- (smin "smin") +- (smax "smax") +- (umin "umin") +- (umax "umax") +- (and "and") +- (ior "orr") +- (xor "eor")]) ++ (minus "subr") ++ (mult "mul") ++ (div "sdivr") ++ (udiv "udivr") ++ (smin "smin") ++ (smax "smax") ++ (umin "umin") ++ (umax "umax") ++ (ashift "lslr") ++ (ashiftrt "asrr") ++ (lshiftrt "lsrr") ++ (and "and") ++ (ior "orr") ++ (xor "eor")]) + + ;; The floating-point SVE instruction that implements an rtx code. + (define_code_attr sve_fp_op [(plus "fadd") + (minus "fsub") +- (mult "fmul") +- (neg "fneg") +- (abs "fabs") +- (sqrt "fsqrt")]) ++ (mult "fmul")]) + + ;; The SVE immediate constraint to use for an rtl code. +-(define_code_attr sve_imm_con [(eq "vsc") ++(define_code_attr sve_imm_con [(mult "vsm") ++ (smax "vsm") ++ (smin "vsm") ++ (umax "vsb") ++ (umin "vsb") ++ (eq "vsc") + (ne "vsc") + (lt "vsc") + (ge "vsc") +@@ -1473,6 +1773,33 @@ + (geu "vsd") + (gtu "vsd")]) + ++;; The prefix letter to use when printing an immediate operand. ++(define_code_attr sve_imm_prefix [(mult "") ++ (smax "") ++ (smin "") ++ (umax "D") ++ (umin "D")]) ++ ++;; The predicate to use for the second input operand in a cond_ ++;; pattern. ++(define_code_attr sve_pred_int_rhs2_operand ++ [(plus "register_operand") ++ (minus "register_operand") ++ (mult "register_operand") ++ (smax "register_operand") ++ (umax "register_operand") ++ (smin "register_operand") ++ (umin "register_operand") ++ (ashift "aarch64_sve_lshift_operand") ++ (ashiftrt "aarch64_sve_rshift_operand") ++ (lshiftrt "aarch64_sve_rshift_operand") ++ (and "aarch64_sve_pred_and_operand") ++ (ior "register_operand") ++ (xor "register_operand")]) ++ ++(define_code_attr inc_dec [(minus "dec") (ss_minus "sqdec") (us_minus "uqdec") ++ (plus "inc") (ss_plus "sqinc") (us_plus "uqinc")]) ++ + ;; ------------------------------------------------------------------- + ;; Int Iterators. + ;; ------------------------------------------------------------------- +@@ -1492,7 +1819,7 @@ + (define_int_iterator FMAXMINV [UNSPEC_FMAXV UNSPEC_FMINV + UNSPEC_FMAXNMV UNSPEC_FMINNMV]) + +-(define_int_iterator BITWISEV [UNSPEC_ANDV UNSPEC_IORV UNSPEC_XORV]) ++(define_int_iterator SVE_INT_ADDV [UNSPEC_SADDV UNSPEC_UADDV]) + + (define_int_iterator LOGICALF [UNSPEC_ANDF UNSPEC_IORF UNSPEC_XORF]) + +@@ -1505,8 +1832,20 @@ + + (define_int_iterator RHADD [UNSPEC_SRHADD UNSPEC_URHADD]) + ++(define_int_iterator MULLBT [UNSPEC_SMULLB UNSPEC_UMULLB ++ UNSPEC_SMULLT UNSPEC_UMULLT]) ++ ++(define_int_iterator SHRNB [UNSPEC_SHRNB UNSPEC_RSHRNB]) ++ ++(define_int_iterator SHRNT [UNSPEC_SHRNT UNSPEC_RSHRNT]) ++ ++(define_int_iterator BSL_DUP [1 2]) ++ + (define_int_iterator DOTPROD [UNSPEC_SDOT UNSPEC_UDOT]) + ++(define_int_iterator DOTPROD_I8MM [UNSPEC_USDOT UNSPEC_SUDOT]) ++(define_int_iterator DOTPROD_US_ONLY [UNSPEC_USDOT]) ++ + (define_int_iterator ADDSUBHN [UNSPEC_ADDHN UNSPEC_RADDHN + UNSPEC_SUBHN UNSPEC_RSUBHN]) + +@@ -1516,12 +1855,17 @@ + (define_int_iterator FMAXMIN_UNS [UNSPEC_FMAX UNSPEC_FMIN + UNSPEC_FMAXNM UNSPEC_FMINNM]) + +-(define_int_iterator PAUTH_LR_SP [UNSPEC_PACISP UNSPEC_AUTISP]) ++(define_int_iterator PAUTH_LR_SP [UNSPEC_PACIASP UNSPEC_AUTIASP ++ UNSPEC_PACIBSP UNSPEC_AUTIBSP]) + +-(define_int_iterator PAUTH_17_16 [UNSPEC_PACI1716 UNSPEC_AUTI1716]) ++(define_int_iterator PAUTH_17_16 [UNSPEC_PACIA1716 UNSPEC_AUTIA1716 ++ UNSPEC_PACIB1716 UNSPEC_AUTIB1716]) + + (define_int_iterator VQDMULH [UNSPEC_SQDMULH UNSPEC_SQRDMULH]) + ++(define_int_iterator MULHRS [UNSPEC_SMULHS UNSPEC_UMULHS ++ UNSPEC_SMULHRS UNSPEC_UMULHRS]) ++ + (define_int_iterator USSUQADD [UNSPEC_SUQADD UNSPEC_USQADD]) + + (define_int_iterator SUQMOVN [UNSPEC_SQXTN UNSPEC_UQXTN]) +@@ -1555,6 +1899,10 @@ + UNSPEC_TRN1 UNSPEC_TRN2 + UNSPEC_UZP1 UNSPEC_UZP2]) + ++(define_int_iterator PERMUTEQ [UNSPEC_ZIP1Q UNSPEC_ZIP2Q ++ UNSPEC_TRN1Q UNSPEC_TRN2Q ++ UNSPEC_UZP1Q UNSPEC_UZP2Q]) ++ + (define_int_iterator OPTAB_PERMUTE [UNSPEC_ZIP1 UNSPEC_ZIP2 + UNSPEC_UZP1 UNSPEC_UZP2]) + +@@ -1601,18 +1949,144 @@ + + (define_int_iterator MUL_HIGHPART [UNSPEC_SMUL_HIGHPART UNSPEC_UMUL_HIGHPART]) + +-(define_int_iterator SVE_COND_FP_BINARY [UNSPEC_COND_ADD UNSPEC_COND_SUB +- UNSPEC_COND_MUL UNSPEC_COND_DIV +- UNSPEC_COND_MAX UNSPEC_COND_MIN]) ++(define_int_iterator CLAST [UNSPEC_CLASTA UNSPEC_CLASTB]) ++ ++(define_int_iterator LAST [UNSPEC_LASTA UNSPEC_LASTB]) ++ ++(define_int_iterator SVE_INT_UNARY [UNSPEC_RBIT UNSPEC_REVB ++ UNSPEC_REVH UNSPEC_REVW]) ++ ++(define_int_iterator SVE_FP_UNARY [UNSPEC_FRECPE UNSPEC_RSQRTE]) ++ ++(define_int_iterator SVE_FP_UNARY_INT [UNSPEC_FEXPA]) ++ ++(define_int_iterator SVE_FP_BINARY [UNSPEC_FRECPS UNSPEC_RSQRTS]) ++ ++(define_int_iterator SVE_FP_BINARY_INT [UNSPEC_FTSMUL UNSPEC_FTSSEL]) ++ ++(define_int_iterator SVE_BFLOAT_TERNARY_LONG [UNSPEC_BFDOT ++ UNSPEC_BFMLALB ++ UNSPEC_BFMLALT ++ UNSPEC_BFMMLA]) ++ ++(define_int_iterator SVE_BFLOAT_TERNARY_LONG_LANE [UNSPEC_BFDOT ++ UNSPEC_BFMLALB ++ UNSPEC_BFMLALT]) ++ ++(define_int_iterator SVE_INT_REDUCTION [UNSPEC_ANDV ++ UNSPEC_IORV ++ UNSPEC_SMAXV ++ UNSPEC_SMINV ++ UNSPEC_UMAXV ++ UNSPEC_UMINV ++ UNSPEC_XORV]) ++ ++(define_int_iterator SVE_FP_REDUCTION [UNSPEC_FADDV ++ UNSPEC_FMAXV ++ UNSPEC_FMAXNMV ++ UNSPEC_FMINV ++ UNSPEC_FMINNMV]) ++ ++(define_int_iterator SVE_COND_FP_UNARY [UNSPEC_COND_FABS ++ UNSPEC_COND_FNEG ++ UNSPEC_COND_FRECPX ++ UNSPEC_COND_FRINTA ++ UNSPEC_COND_FRINTI ++ UNSPEC_COND_FRINTM ++ UNSPEC_COND_FRINTN ++ UNSPEC_COND_FRINTP ++ UNSPEC_COND_FRINTX ++ UNSPEC_COND_FRINTZ ++ UNSPEC_COND_FSQRT]) ++ ++(define_int_iterator SVE_COND_FCVT [UNSPEC_COND_FCVT]) ++(define_int_iterator SVE_COND_FCVTI [UNSPEC_COND_FCVTZS UNSPEC_COND_FCVTZU]) ++(define_int_iterator SVE_COND_ICVTF [UNSPEC_COND_SCVTF UNSPEC_COND_UCVTF]) ++ ++(define_int_iterator SVE_COND_FP_BINARY [UNSPEC_COND_FADD ++ UNSPEC_COND_FDIV ++ UNSPEC_COND_FMAX ++ UNSPEC_COND_FMAXNM ++ UNSPEC_COND_FMIN ++ UNSPEC_COND_FMINNM ++ UNSPEC_COND_FMUL ++ UNSPEC_COND_FMULX ++ UNSPEC_COND_FSUB]) ++ ++(define_int_iterator SVE_COND_FP_BINARY_INT [UNSPEC_COND_FSCALE]) ++ ++(define_int_iterator SVE_COND_FP_ADD [UNSPEC_COND_FADD]) ++(define_int_iterator SVE_COND_FP_SUB [UNSPEC_COND_FSUB]) ++(define_int_iterator SVE_COND_FP_MUL [UNSPEC_COND_FMUL]) ++ ++(define_int_iterator SVE_COND_FP_BINARY_I1 [UNSPEC_COND_FMAX ++ UNSPEC_COND_FMAXNM ++ UNSPEC_COND_FMIN ++ UNSPEC_COND_FMINNM ++ UNSPEC_COND_FMUL]) ++ ++(define_int_iterator SVE_COND_FP_BINARY_REG [UNSPEC_COND_FDIV ++ UNSPEC_COND_FMULX]) ++ ++(define_int_iterator SVE_COND_FCADD [UNSPEC_COND_FCADD90 ++ UNSPEC_COND_FCADD270]) ++ ++(define_int_iterator SVE_COND_FP_MAXMIN [UNSPEC_COND_FMAX ++ UNSPEC_COND_FMAXNM ++ UNSPEC_COND_FMIN ++ UNSPEC_COND_FMINNM]) ++ ++;; Floating-point max/min operations that correspond to optabs, ++;; as opposed to those that are internal to the port. ++(define_int_iterator SVE_COND_FP_MAXMIN_PUBLIC [UNSPEC_COND_FMAXNM ++ UNSPEC_COND_FMINNM]) + + (define_int_iterator SVE_COND_FP_TERNARY [UNSPEC_COND_FMLA + UNSPEC_COND_FMLS + UNSPEC_COND_FNMLA + UNSPEC_COND_FNMLS]) + +-(define_int_iterator SVE_COND_FP_CMP [UNSPEC_COND_LT UNSPEC_COND_LE +- UNSPEC_COND_EQ UNSPEC_COND_NE +- UNSPEC_COND_GE UNSPEC_COND_GT]) ++(define_int_iterator SVE_COND_FCMLA [UNSPEC_COND_FCMLA ++ UNSPEC_COND_FCMLA90 ++ UNSPEC_COND_FCMLA180 ++ UNSPEC_COND_FCMLA270]) ++ ++(define_int_iterator SVE_COND_INT_CMP_WIDE [UNSPEC_COND_CMPEQ_WIDE ++ UNSPEC_COND_CMPGE_WIDE ++ UNSPEC_COND_CMPGT_WIDE ++ UNSPEC_COND_CMPHI_WIDE ++ UNSPEC_COND_CMPHS_WIDE ++ UNSPEC_COND_CMPLE_WIDE ++ UNSPEC_COND_CMPLO_WIDE ++ UNSPEC_COND_CMPLS_WIDE ++ UNSPEC_COND_CMPLT_WIDE ++ UNSPEC_COND_CMPNE_WIDE]) ++ ++;; SVE FP comparisons that accept #0.0. ++(define_int_iterator SVE_COND_FP_CMP_I0 [UNSPEC_COND_FCMEQ ++ UNSPEC_COND_FCMGE ++ UNSPEC_COND_FCMGT ++ UNSPEC_COND_FCMLE ++ UNSPEC_COND_FCMLT ++ UNSPEC_COND_FCMNE]) ++ ++(define_int_iterator SVE_COND_FP_ABS_CMP [UNSPEC_COND_FCMGE ++ UNSPEC_COND_FCMGT ++ UNSPEC_COND_FCMLE ++ UNSPEC_COND_FCMLT]) ++ ++(define_int_iterator SVE_FP_TERNARY_LANE [UNSPEC_FMLA UNSPEC_FMLS]) ++ ++(define_int_iterator SVE_CFP_TERNARY_LANE [UNSPEC_FCMLA UNSPEC_FCMLA90 ++ UNSPEC_FCMLA180 UNSPEC_FCMLA270]) ++ ++(define_int_iterator SVE_WHILE [UNSPEC_WHILELE UNSPEC_WHILELO UNSPEC_WHILELS UNSPEC_WHILELT]) ++ ++(define_int_iterator SVE_SHIFT_WIDE [UNSPEC_ASHIFT_WIDE ++ UNSPEC_ASHIFTRT_WIDE ++ UNSPEC_LSHIFTRT_WIDE]) ++ ++(define_int_iterator SVE_LDFF1_LDNF1 [UNSPEC_LDFF1 UNSPEC_LDNF1]) + + (define_int_iterator FCADD [UNSPEC_FCADD90 + UNSPEC_FCADD270]) +@@ -1622,6 +2096,23 @@ + UNSPEC_FCMLA180 + UNSPEC_FCMLA270]) + ++(define_int_iterator FRINTNZX [UNSPEC_FRINT32Z UNSPEC_FRINT32X ++ UNSPEC_FRINT64Z UNSPEC_FRINT64X]) ++ ++(define_int_iterator SVE_BRK_UNARY [UNSPEC_BRKA UNSPEC_BRKB]) ++ ++(define_int_iterator SVE_BRK_BINARY [UNSPEC_BRKN UNSPEC_BRKPA UNSPEC_BRKPB]) ++ ++(define_int_iterator SVE_PITER [UNSPEC_PFIRST UNSPEC_PNEXT]) ++ ++(define_int_iterator MATMUL [UNSPEC_SMATMUL UNSPEC_UMATMUL ++ UNSPEC_USMATMUL]) ++ ++(define_int_iterator FMMLA [UNSPEC_FMMLA]) ++ ++(define_int_iterator BF_MLA [UNSPEC_BFMLALB ++ UNSPEC_BFMLALT]) ++ + ;; Iterators for atomic operations. + + (define_int_iterator ATOMIC_LDOP +@@ -1646,19 +2137,84 @@ + (define_int_attr optab [(UNSPEC_ANDF "and") + (UNSPEC_IORF "ior") + (UNSPEC_XORF "xor") ++ (UNSPEC_SADDV "sadd") ++ (UNSPEC_UADDV "uadd") + (UNSPEC_ANDV "and") + (UNSPEC_IORV "ior") + (UNSPEC_XORV "xor") +- (UNSPEC_COND_ADD "add") +- (UNSPEC_COND_SUB "sub") +- (UNSPEC_COND_MUL "mul") +- (UNSPEC_COND_DIV "div") +- (UNSPEC_COND_MAX "smax") +- (UNSPEC_COND_MIN "smin") ++ (UNSPEC_FRECPE "frecpe") ++ (UNSPEC_FRECPS "frecps") ++ (UNSPEC_RSQRTE "frsqrte") ++ (UNSPEC_RSQRTS "frsqrts") ++ (UNSPEC_RBIT "rbit") ++ (UNSPEC_REVB "revb") ++ (UNSPEC_REVH "revh") ++ (UNSPEC_REVW "revw") ++ (UNSPEC_UMAXV "umax") ++ (UNSPEC_UMINV "umin") ++ (UNSPEC_SMAXV "smax") ++ (UNSPEC_SMINV "smin") ++ (UNSPEC_FADDV "plus") ++ (UNSPEC_FMAXNMV "smax") ++ (UNSPEC_FMAXV "smax_nan") ++ (UNSPEC_FMINNMV "smin") ++ (UNSPEC_FMINV "smin_nan") ++ (UNSPEC_SMUL_HIGHPART "smulh") ++ (UNSPEC_UMUL_HIGHPART "umulh") ++ (UNSPEC_FMLA "fma") ++ (UNSPEC_FMLS "fnma") ++ (UNSPEC_FCMLA "fcmla") ++ (UNSPEC_FCMLA90 "fcmla90") ++ (UNSPEC_FCMLA180 "fcmla180") ++ (UNSPEC_FCMLA270 "fcmla270") ++ (UNSPEC_FEXPA "fexpa") ++ (UNSPEC_FTSMUL "ftsmul") ++ (UNSPEC_FTSSEL "ftssel") ++ (UNSPEC_SMATMUL "smatmul") ++ (UNSPEC_TRN1Q "trn1q") ++ (UNSPEC_TRN2Q "trn2q") ++ (UNSPEC_UMATMUL "umatmul") ++ (UNSPEC_USMATMUL "usmatmul") ++ (UNSPEC_UZP1Q "uzp1q") ++ (UNSPEC_UZP2Q "uzp2q") ++ (UNSPEC_ZIP1Q "zip1q") ++ (UNSPEC_ZIP2Q "zip2q") ++ (UNSPEC_COND_FABS "abs") ++ (UNSPEC_COND_FADD "add") ++ (UNSPEC_COND_FCADD90 "cadd90") ++ (UNSPEC_COND_FCADD270 "cadd270") ++ (UNSPEC_COND_FCMLA "fcmla") ++ (UNSPEC_COND_FCMLA90 "fcmla90") ++ (UNSPEC_COND_FCMLA180 "fcmla180") ++ (UNSPEC_COND_FCMLA270 "fcmla270") ++ (UNSPEC_COND_FCVT "fcvt") ++ (UNSPEC_COND_FCVTZS "fix_trunc") ++ (UNSPEC_COND_FCVTZU "fixuns_trunc") ++ (UNSPEC_COND_FDIV "div") ++ (UNSPEC_COND_FMAX "smax_nan") ++ (UNSPEC_COND_FMAXNM "smax") ++ (UNSPEC_COND_FMIN "smin_nan") ++ (UNSPEC_COND_FMINNM "smin") + (UNSPEC_COND_FMLA "fma") + (UNSPEC_COND_FMLS "fnma") ++ (UNSPEC_COND_FMUL "mul") ++ (UNSPEC_COND_FMULX "mulx") ++ (UNSPEC_COND_FNEG "neg") + (UNSPEC_COND_FNMLA "fnms") +- (UNSPEC_COND_FNMLS "fms")]) ++ (UNSPEC_COND_FNMLS "fms") ++ (UNSPEC_COND_FRECPX "frecpx") ++ (UNSPEC_COND_FRINTA "round") ++ (UNSPEC_COND_FRINTI "nearbyint") ++ (UNSPEC_COND_FRINTM "floor") ++ (UNSPEC_COND_FRINTN "frintn") ++ (UNSPEC_COND_FRINTP "ceil") ++ (UNSPEC_COND_FRINTX "rint") ++ (UNSPEC_COND_FRINTZ "btrunc") ++ (UNSPEC_COND_FSCALE "fscale") ++ (UNSPEC_COND_FSQRT "sqrt") ++ (UNSPEC_COND_FSUB "sub") ++ (UNSPEC_COND_SCVTF "float") ++ (UNSPEC_COND_UCVTF "floatuns")]) + + (define_int_attr maxmin_uns [(UNSPEC_UMAXV "umax") + (UNSPEC_UMINV "umin") +@@ -1671,7 +2227,11 @@ + (UNSPEC_FMINNMV "smin") + (UNSPEC_FMINV "smin_nan") + (UNSPEC_FMAXNM "fmax") +- (UNSPEC_FMINNM "fmin")]) ++ (UNSPEC_FMINNM "fmin") ++ (UNSPEC_COND_FMAX "fmax_nan") ++ (UNSPEC_COND_FMAXNM "fmax") ++ (UNSPEC_COND_FMIN "fmin_nan") ++ (UNSPEC_COND_FMINNM "fmin")]) + + (define_int_attr maxmin_uns_op [(UNSPEC_UMAXV "umax") + (UNSPEC_UMINV "umin") +@@ -1686,22 +2246,41 @@ + (UNSPEC_FMAXNM "fmaxnm") + (UNSPEC_FMINNM "fminnm")]) + +-(define_int_attr bit_reduc_op [(UNSPEC_ANDV "andv") +- (UNSPEC_IORV "orv") +- (UNSPEC_XORV "eorv")]) ++(define_code_attr binqops_op [(ss_plus "sqadd") ++ (us_plus "uqadd") ++ (ss_minus "sqsub") ++ (us_minus "uqsub")]) ++ ++(define_code_attr binqops_op_rev [(ss_plus "sqsub") ++ (ss_minus "sqadd")]) + + ;; The SVE logical instruction that implements an unspec. + (define_int_attr logicalf_op [(UNSPEC_ANDF "and") + (UNSPEC_IORF "orr") + (UNSPEC_XORF "eor")]) + ++(define_int_attr last_op [(UNSPEC_CLASTA "after_last") ++ (UNSPEC_CLASTB "last") ++ (UNSPEC_LASTA "after_last") ++ (UNSPEC_LASTB "last")]) ++ + ;; "s" for signed operations and "u" for unsigned ones. +-(define_int_attr su [(UNSPEC_UNPACKSHI "s") ++(define_int_attr su [(UNSPEC_SADDV "s") ++ (UNSPEC_UADDV "u") ++ (UNSPEC_UNPACKSHI "s") + (UNSPEC_UNPACKUHI "u") + (UNSPEC_UNPACKSLO "s") + (UNSPEC_UNPACKULO "u") + (UNSPEC_SMUL_HIGHPART "s") +- (UNSPEC_UMUL_HIGHPART "u")]) ++ (UNSPEC_UMUL_HIGHPART "u") ++ (UNSPEC_COND_FCVTZS "s") ++ (UNSPEC_COND_FCVTZU "u") ++ (UNSPEC_COND_SCVTF "s") ++ (UNSPEC_COND_UCVTF "u") ++ (UNSPEC_SMULLB "s") (UNSPEC_UMULLB "u") ++ (UNSPEC_SMULLT "s") (UNSPEC_UMULLT "u") ++ (UNSPEC_SMULHS "s") (UNSPEC_UMULHS "u") ++ (UNSPEC_SMULHRS "s") (UNSPEC_UMULHRS "u")]) + + (define_int_attr sur [(UNSPEC_SHADD "s") (UNSPEC_UHADD "u") + (UNSPEC_SRHADD "sr") (UNSPEC_URHADD "ur") +@@ -1731,6 +2310,9 @@ + (UNSPEC_URSHL "ur") (UNSPEC_SRSHL "sr") + (UNSPEC_UQRSHL "u") (UNSPEC_SQRSHL "s") + (UNSPEC_SDOT "s") (UNSPEC_UDOT "u") ++ (UNSPEC_USDOT "us") (UNSPEC_SUDOT "su") ++ (UNSPEC_SMATMUL "s") (UNSPEC_UMATMUL "u") ++ (UNSPEC_USMATMUL "us") + ]) + + (define_int_attr r [(UNSPEC_SQDMULH "") (UNSPEC_SQRDMULH "r") +@@ -1739,6 +2321,10 @@ + (UNSPEC_SQRSHRN "r") (UNSPEC_UQRSHRN "r") + (UNSPEC_SQSHL "") (UNSPEC_UQSHL "") + (UNSPEC_SQRSHL "r")(UNSPEC_UQRSHL "r") ++ (UNSPEC_SHRNB "") (UNSPEC_SHRNT "") ++ (UNSPEC_RSHRNB "r") (UNSPEC_RSHRNT "r") ++ (UNSPEC_SMULHS "") (UNSPEC_UMULHS "") ++ (UNSPEC_SMULHRS "r") (UNSPEC_UMULHRS "r") + ]) + + (define_int_attr lr [(UNSPEC_SSLI "l") (UNSPEC_USLI "l") +@@ -1751,6 +2337,13 @@ + (UNSPEC_SHADD "") (UNSPEC_UHADD "u") + (UNSPEC_SRHADD "") (UNSPEC_URHADD "u")]) + ++(define_int_attr fn [(UNSPEC_LDFF1 "f") (UNSPEC_LDNF1 "n")]) ++ ++(define_int_attr ab [(UNSPEC_CLASTA "a") (UNSPEC_CLASTB "b") ++ (UNSPEC_LASTA "a") (UNSPEC_LASTB "b")]) ++ ++(define_int_attr bt [(UNSPEC_BFMLALB "b") (UNSPEC_BFMLALT "t")]) ++ + (define_int_attr addsub [(UNSPEC_SHADD "add") + (UNSPEC_UHADD "add") + (UNSPEC_SRHADD "add") +@@ -1768,6 +2361,18 @@ + (UNSPEC_RADDHN2 "add") + (UNSPEC_RSUBHN2 "sub")]) + ++;; BSL variants: first commutative operand. ++(define_int_attr bsl_1st [(1 "w") (2 "0")]) ++ ++;; BSL variants: second commutative operand. ++(define_int_attr bsl_2nd [(1 "0") (2 "w")]) ++ ++;; BSL variants: duplicated input operand. ++(define_int_attr bsl_dup [(1 "1") (2 "2")]) ++ ++;; BSL variants: operand which requires preserving via movprfx. ++(define_int_attr bsl_mov [(1 "2") (2 "1")]) ++ + (define_int_attr offsetlr [(UNSPEC_SSLI "") (UNSPEC_USLI "") + (UNSPEC_SSRI "offset_") + (UNSPEC_USRI "offset_")]) +@@ -1797,29 +2402,47 @@ + (UNSPEC_FCVTZU "fcvtzu")]) + + ;; Pointer authentication mnemonic prefix. +-(define_int_attr pauth_mnem_prefix [(UNSPEC_PACISP "paci") +- (UNSPEC_AUTISP "auti") +- (UNSPEC_PACI1716 "paci") +- (UNSPEC_AUTI1716 "auti")]) +- +-;; Pointer authentication HINT number for NOP space instructions using A Key. +-(define_int_attr pauth_hint_num_a [(UNSPEC_PACISP "25") +- (UNSPEC_AUTISP "29") +- (UNSPEC_PACI1716 "8") +- (UNSPEC_AUTI1716 "12")]) +- +-(define_int_attr perm_insn [(UNSPEC_ZIP1 "zip") (UNSPEC_ZIP2 "zip") +- (UNSPEC_TRN1 "trn") (UNSPEC_TRN2 "trn") +- (UNSPEC_UZP1 "uzp") (UNSPEC_UZP2 "uzp")]) ++(define_int_attr pauth_mnem_prefix [(UNSPEC_PACIASP "pacia") ++ (UNSPEC_PACIBSP "pacib") ++ (UNSPEC_PACIA1716 "pacia") ++ (UNSPEC_PACIB1716 "pacib") ++ (UNSPEC_AUTIASP "autia") ++ (UNSPEC_AUTIBSP "autib") ++ (UNSPEC_AUTIA1716 "autia") ++ (UNSPEC_AUTIB1716 "autib")]) ++ ++(define_int_attr pauth_key [(UNSPEC_PACIASP "AARCH64_KEY_A") ++ (UNSPEC_PACIBSP "AARCH64_KEY_B") ++ (UNSPEC_PACIA1716 "AARCH64_KEY_A") ++ (UNSPEC_PACIB1716 "AARCH64_KEY_B") ++ (UNSPEC_AUTIASP "AARCH64_KEY_A") ++ (UNSPEC_AUTIBSP "AARCH64_KEY_B") ++ (UNSPEC_AUTIA1716 "AARCH64_KEY_A") ++ (UNSPEC_AUTIB1716 "AARCH64_KEY_B")]) ++ ++;; Pointer authentication HINT number for NOP space instructions using A and ++;; B key. ++(define_int_attr pauth_hint_num [(UNSPEC_PACIASP "25") ++ (UNSPEC_PACIBSP "27") ++ (UNSPEC_AUTIASP "29") ++ (UNSPEC_AUTIBSP "31") ++ (UNSPEC_PACIA1716 "8") ++ (UNSPEC_PACIB1716 "10") ++ (UNSPEC_AUTIA1716 "12") ++ (UNSPEC_AUTIB1716 "14")]) ++ ++(define_int_attr perm_insn [(UNSPEC_ZIP1 "zip1") (UNSPEC_ZIP2 "zip2") ++ (UNSPEC_ZIP1Q "zip1") (UNSPEC_ZIP2Q "zip2") ++ (UNSPEC_TRN1 "trn1") (UNSPEC_TRN2 "trn2") ++ (UNSPEC_TRN1Q "trn1") (UNSPEC_TRN2Q "trn2") ++ (UNSPEC_UZP1 "uzp1") (UNSPEC_UZP2 "uzp2") ++ (UNSPEC_UZP1Q "uzp1") (UNSPEC_UZP2Q "uzp2")]) + + ; op code for REV instructions (size within which elements are reversed). + (define_int_attr rev_op [(UNSPEC_REV64 "64") (UNSPEC_REV32 "32") + (UNSPEC_REV16 "16")]) + +-(define_int_attr perm_hilo [(UNSPEC_ZIP1 "1") (UNSPEC_ZIP2 "2") +- (UNSPEC_TRN1 "1") (UNSPEC_TRN2 "2") +- (UNSPEC_UZP1 "1") (UNSPEC_UZP2 "2") +- (UNSPEC_UNPACKSHI "hi") (UNSPEC_UNPACKUHI "hi") ++(define_int_attr perm_hilo [(UNSPEC_UNPACKSHI "hi") (UNSPEC_UNPACKUHI "hi") + (UNSPEC_UNPACKSLO "lo") (UNSPEC_UNPACKULO "lo")]) + + ;; Return true if the associated optab refers to the high-numbered lanes, +@@ -1861,34 +2484,122 @@ + (define_int_attr f16mac1 [(UNSPEC_FMLAL "a") (UNSPEC_FMLSL "s") + (UNSPEC_FMLAL2 "a") (UNSPEC_FMLSL2 "s")]) + ++(define_int_attr frintnzs_op [(UNSPEC_FRINT32Z "frint32z") (UNSPEC_FRINT32X "frint32x") ++ (UNSPEC_FRINT64Z "frint64z") (UNSPEC_FRINT64X "frint64x")]) ++ + ;; The condition associated with an UNSPEC_COND_. +-(define_int_attr cmp_op [(UNSPEC_COND_LT "lt") +- (UNSPEC_COND_LE "le") +- (UNSPEC_COND_EQ "eq") +- (UNSPEC_COND_NE "ne") +- (UNSPEC_COND_GE "ge") +- (UNSPEC_COND_GT "gt")]) +- +-(define_int_attr sve_fp_op [(UNSPEC_COND_ADD "fadd") +- (UNSPEC_COND_SUB "fsub") +- (UNSPEC_COND_MUL "fmul") +- (UNSPEC_COND_DIV "fdiv") +- (UNSPEC_COND_MAX "fmaxnm") +- (UNSPEC_COND_MIN "fminnm")]) +- +-(define_int_attr sve_fp_op_rev [(UNSPEC_COND_ADD "fadd") +- (UNSPEC_COND_SUB "fsubr") +- (UNSPEC_COND_MUL "fmul") +- (UNSPEC_COND_DIV "fdivr") +- (UNSPEC_COND_MAX "fmaxnm") +- (UNSPEC_COND_MIN "fminnm")]) ++(define_int_attr cmp_op [(UNSPEC_COND_CMPEQ_WIDE "eq") ++ (UNSPEC_COND_CMPGE_WIDE "ge") ++ (UNSPEC_COND_CMPGT_WIDE "gt") ++ (UNSPEC_COND_CMPHI_WIDE "hi") ++ (UNSPEC_COND_CMPHS_WIDE "hs") ++ (UNSPEC_COND_CMPLE_WIDE "le") ++ (UNSPEC_COND_CMPLO_WIDE "lo") ++ (UNSPEC_COND_CMPLS_WIDE "ls") ++ (UNSPEC_COND_CMPLT_WIDE "lt") ++ (UNSPEC_COND_CMPNE_WIDE "ne") ++ (UNSPEC_COND_FCMEQ "eq") ++ (UNSPEC_COND_FCMGE "ge") ++ (UNSPEC_COND_FCMGT "gt") ++ (UNSPEC_COND_FCMLE "le") ++ (UNSPEC_COND_FCMLT "lt") ++ (UNSPEC_COND_FCMNE "ne") ++ (UNSPEC_WHILELE "le") ++ (UNSPEC_WHILELO "lo") ++ (UNSPEC_WHILELS "ls") ++ (UNSPEC_WHILELT "lt")]) ++ ++(define_int_attr while_optab_cmp [(UNSPEC_WHILELE "le") ++ (UNSPEC_WHILELO "ult") ++ (UNSPEC_WHILELS "ule") ++ (UNSPEC_WHILELT "lt")]) ++ ++(define_int_attr brk_op [(UNSPEC_BRKA "a") (UNSPEC_BRKB "b") ++ (UNSPEC_BRKN "n") ++ (UNSPEC_BRKPA "pa") (UNSPEC_BRKPB "pb")]) ++ ++(define_int_attr sve_pred_op [(UNSPEC_PFIRST "pfirst") (UNSPEC_PNEXT "pnext")]) ++ ++(define_int_attr sve_int_op [(UNSPEC_ANDV "andv") ++ (UNSPEC_IORV "orv") ++ (UNSPEC_XORV "eorv") ++ (UNSPEC_UMAXV "umaxv") ++ (UNSPEC_UMINV "uminv") ++ (UNSPEC_SMAXV "smaxv") ++ (UNSPEC_SMINV "sminv") ++ (UNSPEC_SMUL_HIGHPART "smulh") ++ (UNSPEC_UMUL_HIGHPART "umulh") ++ (UNSPEC_ASHIFT_WIDE "lsl") ++ (UNSPEC_ASHIFTRT_WIDE "asr") ++ (UNSPEC_LSHIFTRT_WIDE "lsr") ++ (UNSPEC_RBIT "rbit") ++ (UNSPEC_REVB "revb") ++ (UNSPEC_REVH "revh") ++ (UNSPEC_REVW "revw")]) ++ ++(define_int_attr sve_fp_op [(UNSPEC_BFDOT "bfdot") ++ (UNSPEC_BFMLALB "bfmlalb") ++ (UNSPEC_BFMLALT "bfmlalt") ++ (UNSPEC_BFMMLA "bfmmla") ++ (UNSPEC_FRECPE "frecpe") ++ (UNSPEC_FRECPS "frecps") ++ (UNSPEC_RSQRTE "frsqrte") ++ (UNSPEC_RSQRTS "frsqrts") ++ (UNSPEC_FADDV "faddv") ++ (UNSPEC_FEXPA "fexpa") ++ (UNSPEC_FMAXNMV "fmaxnmv") ++ (UNSPEC_FMAXV "fmaxv") ++ (UNSPEC_FMINNMV "fminnmv") ++ (UNSPEC_FMINV "fminv") ++ (UNSPEC_FMLA "fmla") ++ (UNSPEC_FMLS "fmls") ++ (UNSPEC_FMMLA "fmmla") ++ (UNSPEC_FTSMUL "ftsmul") ++ (UNSPEC_FTSSEL "ftssel") ++ (UNSPEC_COND_FABS "fabs") ++ (UNSPEC_COND_FADD "fadd") ++ (UNSPEC_COND_FDIV "fdiv") ++ (UNSPEC_COND_FMAX "fmax") ++ (UNSPEC_COND_FMAXNM "fmaxnm") ++ (UNSPEC_COND_FMIN "fmin") ++ (UNSPEC_COND_FMINNM "fminnm") ++ (UNSPEC_COND_FMUL "fmul") ++ (UNSPEC_COND_FMULX "fmulx") ++ (UNSPEC_COND_FNEG "fneg") ++ (UNSPEC_COND_FRECPX "frecpx") ++ (UNSPEC_COND_FRINTA "frinta") ++ (UNSPEC_COND_FRINTI "frinti") ++ (UNSPEC_COND_FRINTM "frintm") ++ (UNSPEC_COND_FRINTN "frintn") ++ (UNSPEC_COND_FRINTP "frintp") ++ (UNSPEC_COND_FRINTX "frintx") ++ (UNSPEC_COND_FRINTZ "frintz") ++ (UNSPEC_COND_FSCALE "fscale") ++ (UNSPEC_COND_FSQRT "fsqrt") ++ (UNSPEC_COND_FSUB "fsub")]) ++ ++(define_int_attr sve_fp_op_rev [(UNSPEC_COND_FADD "fadd") ++ (UNSPEC_COND_FDIV "fdivr") ++ (UNSPEC_COND_FMAX "fmax") ++ (UNSPEC_COND_FMAXNM "fmaxnm") ++ (UNSPEC_COND_FMIN "fmin") ++ (UNSPEC_COND_FMINNM "fminnm") ++ (UNSPEC_COND_FMUL "fmul") ++ (UNSPEC_COND_FMULX "fmulx") ++ (UNSPEC_COND_FSUB "fsubr")]) + + (define_int_attr rot [(UNSPEC_FCADD90 "90") + (UNSPEC_FCADD270 "270") + (UNSPEC_FCMLA "0") + (UNSPEC_FCMLA90 "90") + (UNSPEC_FCMLA180 "180") +- (UNSPEC_FCMLA270 "270")]) ++ (UNSPEC_FCMLA270 "270") ++ (UNSPEC_COND_FCADD90 "90") ++ (UNSPEC_COND_FCADD270 "270") ++ (UNSPEC_COND_FCMLA "0") ++ (UNSPEC_COND_FCMLA90 "90") ++ (UNSPEC_COND_FCMLA180 "180") ++ (UNSPEC_COND_FCMLA270 "270")]) + + (define_int_attr sve_fmla_op [(UNSPEC_COND_FMLA "fmla") + (UNSPEC_COND_FMLS "fmls") +@@ -1900,9 +2611,54 @@ + (UNSPEC_COND_FNMLA "fnmad") + (UNSPEC_COND_FNMLS "fnmsb")]) + +-(define_int_attr commutative [(UNSPEC_COND_ADD "true") +- (UNSPEC_COND_SUB "false") +- (UNSPEC_COND_MUL "true") +- (UNSPEC_COND_DIV "false") +- (UNSPEC_COND_MIN "true") +- (UNSPEC_COND_MAX "true")]) ++;; The register constraint to use for the final operand in a binary BRK. ++(define_int_attr brk_reg_con [(UNSPEC_BRKN "0") ++ (UNSPEC_BRKPA "Upa") (UNSPEC_BRKPB "Upa")]) ++ ++;; The register number to print for the above. ++(define_int_attr brk_reg_opno [(UNSPEC_BRKN "0") ++ (UNSPEC_BRKPA "3") (UNSPEC_BRKPB "3")]) ++ ++;; The predicate to use for the first input operand in a floating-point ++;; 3 pattern. ++(define_int_attr sve_pred_fp_rhs1_operand ++ [(UNSPEC_COND_FADD "register_operand") ++ (UNSPEC_COND_FDIV "register_operand") ++ (UNSPEC_COND_FMAX "register_operand") ++ (UNSPEC_COND_FMAXNM "register_operand") ++ (UNSPEC_COND_FMIN "register_operand") ++ (UNSPEC_COND_FMINNM "register_operand") ++ (UNSPEC_COND_FMUL "register_operand") ++ (UNSPEC_COND_FMULX "register_operand") ++ (UNSPEC_COND_FSUB "aarch64_sve_float_arith_operand")]) ++ ++;; The predicate to use for the second input operand in a floating-point ++;; 3 pattern. ++(define_int_attr sve_pred_fp_rhs2_operand ++ [(UNSPEC_COND_FADD "aarch64_sve_float_arith_with_sub_operand") ++ (UNSPEC_COND_FDIV "register_operand") ++ (UNSPEC_COND_FMAX "aarch64_sve_float_maxmin_operand") ++ (UNSPEC_COND_FMAXNM "aarch64_sve_float_maxmin_operand") ++ (UNSPEC_COND_FMIN "aarch64_sve_float_maxmin_operand") ++ (UNSPEC_COND_FMINNM "aarch64_sve_float_maxmin_operand") ++ (UNSPEC_COND_FMUL "aarch64_sve_float_mul_operand") ++ (UNSPEC_COND_FMULX "register_operand") ++ (UNSPEC_COND_FSUB "register_operand")]) ++ ++;; Likewise for immediates only. ++(define_int_attr sve_pred_fp_rhs2_immediate ++ [(UNSPEC_COND_FMAX "aarch64_sve_float_maxmin_immediate") ++ (UNSPEC_COND_FMAXNM "aarch64_sve_float_maxmin_immediate") ++ (UNSPEC_COND_FMIN "aarch64_sve_float_maxmin_immediate") ++ (UNSPEC_COND_FMINNM "aarch64_sve_float_maxmin_immediate") ++ (UNSPEC_COND_FMUL "aarch64_sve_float_mul_immediate")]) ++ ++;; The maximum number of element bits that an instruction can handle. ++(define_int_attr max_elem_bits [(UNSPEC_UADDV "64") (UNSPEC_SADDV "32") ++ (UNSPEC_PFIRST "8") (UNSPEC_PNEXT "64")]) ++ ++;; The minimum number of element bits that an instruction can handle. ++(define_int_attr min_elem_bits [(UNSPEC_RBIT "8") ++ (UNSPEC_REVB "16") ++ (UNSPEC_REVH "32") ++ (UNSPEC_REVW "64")]) +diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md +index 5f7f281e2..0b6bf6172 100644 +--- a/gcc/config/aarch64/predicates.md ++++ b/gcc/config/aarch64/predicates.md +@@ -39,9 +39,17 @@ + (and (match_code "const_int") + (match_test "op == CONST0_RTX (mode)"))) + +-(define_special_predicate "subreg_lowpart_operator" +- (and (match_code "subreg") +- (match_test "subreg_lowpart_p (op)"))) ++(define_predicate "const_1_to_3_operand" ++ (match_code "const_int,const_vector") ++{ ++ op = unwrap_const_vec_duplicate (op); ++ return CONST_INT_P (op) && IN_RANGE (INTVAL (op), 1, 3); ++}) ++ ++(define_predicate "subreg_lowpart_operator" ++ (ior (match_code "truncate") ++ (and (match_code "subreg") ++ (match_test "subreg_lowpart_p (op)")))) + + (define_predicate "aarch64_ccmp_immediate" + (and (match_code "const_int") +@@ -53,13 +61,12 @@ + + (define_predicate "aarch64_simd_register" + (and (match_code "reg") +- (ior (match_test "REGNO_REG_CLASS (REGNO (op)) == FP_LO_REGS") +- (match_test "REGNO_REG_CLASS (REGNO (op)) == FP_REGS")))) ++ (match_test "FP_REGNUM_P (REGNO (op))"))) + + (define_predicate "aarch64_reg_or_zero" +- (and (match_code "reg,subreg,const_int") ++ (and (match_code "reg,subreg,const_int,const_double") + (ior (match_operand 0 "register_operand") +- (match_test "op == const0_rtx")))) ++ (match_test "op == CONST0_RTX (GET_MODE (op))")))) + + (define_predicate "aarch64_reg_or_fp_zero" + (ior (match_operand 0 "register_operand") +@@ -98,6 +105,10 @@ + (and (match_code "const_double") + (match_test "aarch64_fpconst_pow_of_2 (op) > 0"))) + ++(define_predicate "aarch64_fp_pow2_recip" ++ (and (match_code "const_double") ++ (match_test "aarch64_fpconst_pow2_recip (op) > 0"))) ++ + (define_predicate "aarch64_fp_vec_pow2" + (match_test "aarch64_vec_fpconst_pow_of_2 (op) > 0")) + +@@ -138,10 +149,18 @@ + (and (match_operand 0 "aarch64_pluslong_immediate") + (not (match_operand 0 "aarch64_plus_immediate")))) + ++(define_predicate "aarch64_sve_scalar_inc_dec_immediate" ++ (and (match_code "const_poly_int") ++ (match_test "aarch64_sve_scalar_inc_dec_immediate_p (op)"))) ++ + (define_predicate "aarch64_sve_addvl_addpl_immediate" + (and (match_code "const_poly_int") + (match_test "aarch64_sve_addvl_addpl_immediate_p (op)"))) + ++(define_predicate "aarch64_sve_plus_immediate" ++ (ior (match_operand 0 "aarch64_sve_scalar_inc_dec_immediate") ++ (match_operand 0 "aarch64_sve_addvl_addpl_immediate"))) ++ + (define_predicate "aarch64_split_add_offset_immediate" + (and (match_code "const_poly_int") + (match_test "aarch64_add_offset_temporaries (op) == 1"))) +@@ -149,7 +168,8 @@ + (define_predicate "aarch64_pluslong_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "aarch64_pluslong_immediate") +- (match_operand 0 "aarch64_sve_addvl_addpl_immediate"))) ++ (and (match_test "TARGET_SVE") ++ (match_operand 0 "aarch64_sve_plus_immediate")))) + + (define_predicate "aarch64_pluslong_or_poly_operand" + (ior (match_operand 0 "aarch64_pluslong_operand") +@@ -323,12 +343,6 @@ + (ior (match_operand 0 "register_operand") + (match_operand 0 "const_scalar_int_operand"))) + +-(define_predicate "aarch64_smin" +- (match_code "smin")) +- +-(define_predicate "aarch64_umin" +- (match_code "umin")) +- + ;; True for integer comparisons and for FP comparisons other than LTGT or UNEQ. + (define_special_predicate "aarch64_comparison_operator" + (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu,unordered, +@@ -444,6 +458,12 @@ + return aarch64_stepped_int_parallel_p (op, 1); + }) + ++(define_predicate "descending_int_parallel" ++ (match_code "parallel") ++{ ++ return aarch64_stepped_int_parallel_p (op, -1); ++}) ++ + (define_special_predicate "aarch64_simd_lshift_imm" + (match_code "const,const_vector") + { +@@ -460,6 +480,10 @@ + (and (match_code "const,const_vector") + (match_test "op == CONST0_RTX (GET_MODE (op))"))) + ++(define_predicate "aarch64_simd_imm_one" ++ (and (match_code "const_vector") ++ (match_test "op == CONST1_RTX (GET_MODE (op))"))) ++ + (define_predicate "aarch64_simd_or_scalar_imm_zero" + (and (match_code "const_int,const_double,const,const_vector") + (match_test "op == CONST0_RTX (GET_MODE (op))"))) +@@ -474,6 +498,10 @@ + (match_test "op == const0_rtx") + (match_operand 0 "aarch64_simd_or_scalar_imm_zero")))) + ++(define_predicate "aarch64_simd_reg_or_minus_one" ++ (ior (match_operand 0 "register_operand") ++ (match_operand 0 "aarch64_simd_imm_minus_one"))) ++ + (define_predicate "aarch64_simd_struct_operand" + (and (match_code "mem") + (match_test "TARGET_SIMD && aarch64_simd_mem_operand_p (op)"))) +@@ -556,12 +584,44 @@ + (and (match_operand 0 "memory_operand") + (match_test "aarch64_sve_ld1r_operand_p (op)"))) + ++(define_predicate "aarch64_sve_ld1rq_operand" ++ (and (match_code "mem") ++ (match_test "aarch64_sve_ld1rq_operand_p (op)"))) ++ ++(define_predicate "aarch64_sve_ld1ro_operand_b" ++ (and (match_code "mem") ++ (match_test "aarch64_sve_ld1ro_operand_p (op, QImode)"))) ++ ++(define_predicate "aarch64_sve_ld1ro_operand_h" ++ (and (match_code "mem") ++ (match_test "aarch64_sve_ld1ro_operand_p (op, HImode)"))) ++ ++(define_predicate "aarch64_sve_ld1ro_operand_w" ++ (and (match_code "mem") ++ (match_test "aarch64_sve_ld1ro_operand_p (op, SImode)"))) ++ ++(define_predicate "aarch64_sve_ld1ro_operand_d" ++ (and (match_code "mem") ++ (match_test "aarch64_sve_ld1ro_operand_p (op, DImode)"))) ++ ++(define_predicate "aarch64_sve_ldff1_operand" ++ (and (match_code "mem") ++ (match_test "aarch64_sve_ldff1_operand_p (op)"))) ++ ++(define_predicate "aarch64_sve_ldnf1_operand" ++ (and (match_code "mem") ++ (match_test "aarch64_sve_ldnf1_operand_p (op)"))) ++ + ;; Like memory_operand, but restricted to addresses that are valid for + ;; SVE LDR and STR instructions. + (define_predicate "aarch64_sve_ldr_operand" + (and (match_code "mem") + (match_test "aarch64_sve_ldr_operand_p (op)"))) + ++(define_special_predicate "aarch64_sve_prefetch_operand" ++ (and (match_code "reg, plus") ++ (match_test "aarch64_sve_prefetch_operand_p (op, mode)"))) ++ + (define_predicate "aarch64_sve_nonimmediate_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "aarch64_sve_ldr_operand"))) +@@ -586,6 +646,10 @@ + (ior (match_operand 0 "register_operand") + (match_operand 0 "aarch64_sve_ld1r_operand"))) + ++(define_predicate "aarch64_sve_ptrue_svpattern_immediate" ++ (and (match_code "const") ++ (match_test "aarch64_sve_ptrue_svpattern_p (op, NULL)"))) ++ + (define_predicate "aarch64_sve_arith_immediate" + (and (match_code "const,const_vector") + (match_test "aarch64_sve_arith_immediate_p (op, false)"))) +@@ -594,28 +658,84 @@ + (and (match_code "const,const_vector") + (match_test "aarch64_sve_arith_immediate_p (op, true)"))) + +-(define_predicate "aarch64_sve_inc_dec_immediate" ++(define_predicate "aarch64_sve_qadd_immediate" + (and (match_code "const,const_vector") +- (match_test "aarch64_sve_inc_dec_immediate_p (op)"))) ++ (match_test "aarch64_sve_sqadd_sqsub_immediate_p (op, false)"))) ++ ++(define_predicate "aarch64_sve_qsub_immediate" ++ (and (match_code "const,const_vector") ++ (match_test "aarch64_sve_sqadd_sqsub_immediate_p (op, true)"))) ++ ++(define_predicate "aarch64_sve_vector_inc_dec_immediate" ++ (and (match_code "const,const_vector") ++ (match_test "aarch64_sve_vector_inc_dec_immediate_p (op)"))) ++ ++(define_predicate "aarch64_sve_gather_immediate_b" ++ (and (match_code "const_int") ++ (match_test "IN_RANGE (INTVAL (op), 0, 31)"))) ++ ++(define_predicate "aarch64_sve_gather_immediate_h" ++ (and (match_code "const_int") ++ (match_test "IN_RANGE (INTVAL (op), 0, 62)") ++ (match_test "(INTVAL (op) & 1) == 0"))) ++ ++(define_predicate "aarch64_sve_gather_immediate_w" ++ (and (match_code "const_int") ++ (match_test "IN_RANGE (INTVAL (op), 0, 124)") ++ (match_test "(INTVAL (op) & 3) == 0"))) ++ ++(define_predicate "aarch64_sve_gather_immediate_d" ++ (and (match_code "const_int") ++ (match_test "IN_RANGE (INTVAL (op), 0, 248)") ++ (match_test "(INTVAL (op) & 7) == 0"))) ++ ++(define_predicate "aarch64_sve_uxtb_immediate" ++ (and (match_code "const_vector") ++ (match_test "GET_MODE_UNIT_BITSIZE (GET_MODE (op)) > 8") ++ (match_test "aarch64_const_vec_all_same_int_p (op, 0xff)"))) ++ ++(define_predicate "aarch64_sve_uxth_immediate" ++ (and (match_code "const_vector") ++ (match_test "GET_MODE_UNIT_BITSIZE (GET_MODE (op)) > 16") ++ (match_test "aarch64_const_vec_all_same_int_p (op, 0xffff)"))) ++ ++(define_predicate "aarch64_sve_uxtw_immediate" ++ (and (match_code "const_vector") ++ (match_test "GET_MODE_UNIT_BITSIZE (GET_MODE (op)) > 32") ++ (match_test "aarch64_const_vec_all_same_int_p (op, 0xffffffff)"))) ++ ++(define_predicate "aarch64_sve_uxt_immediate" ++ (ior (match_operand 0 "aarch64_sve_uxtb_immediate") ++ (match_operand 0 "aarch64_sve_uxth_immediate") ++ (match_operand 0 "aarch64_sve_uxtw_immediate"))) + + (define_predicate "aarch64_sve_logical_immediate" + (and (match_code "const,const_vector") + (match_test "aarch64_sve_bitmask_immediate_p (op)"))) + +-(define_predicate "aarch64_sve_mul_immediate" ++;; Used for SVE UMAX and UMIN. ++(define_predicate "aarch64_sve_vsb_immediate" ++ (and (match_code "const_vector") ++ (match_test "GET_MODE_INNER (GET_MODE (op)) == QImode ++ ? aarch64_const_vec_all_same_in_range_p (op, -128, 127) ++ : aarch64_const_vec_all_same_in_range_p (op, 0, 255)"))) ++ ++;; Used for SVE MUL, SMAX and SMIN. ++(define_predicate "aarch64_sve_vsm_immediate" + (and (match_code "const,const_vector") + (match_test "aarch64_const_vec_all_same_in_range_p (op, -128, 127)"))) + + (define_predicate "aarch64_sve_dup_immediate" + (and (match_code "const,const_vector") +- (match_test "aarch64_sve_dup_immediate_p (op)"))) ++ (ior (match_test "aarch64_sve_dup_immediate_p (op)") ++ (match_test "aarch64_float_const_representable_p (op)")))) + + (define_predicate "aarch64_sve_cmp_vsc_immediate" +- (and (match_code "const,const_vector") ++ (and (match_code "const_int,const_vector") + (match_test "aarch64_sve_cmp_immediate_p (op, true)"))) + + (define_predicate "aarch64_sve_cmp_vsd_immediate" +- (and (match_code "const,const_vector") ++ (and (match_code "const_int,const_vector") + (match_test "aarch64_sve_cmp_immediate_p (op, false)"))) + + (define_predicate "aarch64_sve_index_immediate" +@@ -626,14 +746,23 @@ + (and (match_code "const,const_vector") + (match_test "aarch64_sve_float_arith_immediate_p (op, false)"))) + +-(define_predicate "aarch64_sve_float_arith_with_sub_immediate" ++(define_predicate "aarch64_sve_float_negated_arith_immediate" + (and (match_code "const,const_vector") + (match_test "aarch64_sve_float_arith_immediate_p (op, true)"))) + ++(define_predicate "aarch64_sve_float_arith_with_sub_immediate" ++ (ior (match_operand 0 "aarch64_sve_float_arith_immediate") ++ (match_operand 0 "aarch64_sve_float_negated_arith_immediate"))) ++ + (define_predicate "aarch64_sve_float_mul_immediate" + (and (match_code "const,const_vector") + (match_test "aarch64_sve_float_mul_immediate_p (op)"))) + ++(define_predicate "aarch64_sve_float_maxmin_immediate" ++ (and (match_code "const_vector") ++ (ior (match_test "op == CONST0_RTX (GET_MODE (op))") ++ (match_test "op == CONST1_RTX (GET_MODE (op))")))) ++ + (define_predicate "aarch64_sve_arith_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "aarch64_sve_arith_immediate"))) +@@ -641,12 +770,37 @@ + (define_predicate "aarch64_sve_add_operand" + (ior (match_operand 0 "aarch64_sve_arith_operand") + (match_operand 0 "aarch64_sve_sub_arith_immediate") +- (match_operand 0 "aarch64_sve_inc_dec_immediate"))) ++ (match_operand 0 "aarch64_sve_vector_inc_dec_immediate"))) ++ ++(define_predicate "aarch64_sve_sqadd_operand" ++ (ior (match_operand 0 "register_operand") ++ (match_operand 0 "aarch64_sve_qadd_immediate") ++ (match_operand 0 "aarch64_sve_qsub_immediate"))) ++ ++(define_predicate "aarch64_sve_pred_and_operand" ++ (ior (match_operand 0 "register_operand") ++ (match_operand 0 "aarch64_sve_uxt_immediate"))) + + (define_predicate "aarch64_sve_logical_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "aarch64_sve_logical_immediate"))) + ++(define_predicate "aarch64_sve_gather_offset_b" ++ (ior (match_operand 0 "register_operand") ++ (match_operand 0 "aarch64_sve_gather_immediate_b"))) ++ ++(define_predicate "aarch64_sve_gather_offset_h" ++ (ior (match_operand 0 "register_operand") ++ (match_operand 0 "aarch64_sve_gather_immediate_h"))) ++ ++(define_predicate "aarch64_sve_gather_offset_w" ++ (ior (match_operand 0 "register_operand") ++ (match_operand 0 "aarch64_sve_gather_immediate_w"))) ++ ++(define_predicate "aarch64_sve_gather_offset_d" ++ (ior (match_operand 0 "register_operand") ++ (match_operand 0 "aarch64_sve_gather_immediate_d"))) ++ + (define_predicate "aarch64_sve_lshift_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "aarch64_simd_lshift_imm"))) +@@ -655,9 +809,17 @@ + (ior (match_operand 0 "register_operand") + (match_operand 0 "aarch64_simd_rshift_imm"))) + +-(define_predicate "aarch64_sve_mul_operand" ++(define_predicate "aarch64_sve_vsb_operand" + (ior (match_operand 0 "register_operand") +- (match_operand 0 "aarch64_sve_mul_immediate"))) ++ (match_operand 0 "aarch64_sve_vsb_immediate"))) ++ ++(define_predicate "aarch64_sve_vsm_operand" ++ (ior (match_operand 0 "register_operand") ++ (match_operand 0 "aarch64_sve_vsm_immediate"))) ++ ++(define_predicate "aarch64_sve_reg_or_dup_imm" ++ (ior (match_operand 0 "register_operand") ++ (match_operand 0 "aarch64_sve_dup_immediate"))) + + (define_predicate "aarch64_sve_cmp_vsc_operand" + (ior (match_operand 0 "register_operand") +@@ -676,17 +838,39 @@ + (match_operand 0 "aarch64_sve_float_arith_immediate"))) + + (define_predicate "aarch64_sve_float_arith_with_sub_operand" +- (ior (match_operand 0 "aarch64_sve_float_arith_operand") ++ (ior (match_operand 0 "register_operand") + (match_operand 0 "aarch64_sve_float_arith_with_sub_immediate"))) + + (define_predicate "aarch64_sve_float_mul_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "aarch64_sve_float_mul_immediate"))) + ++(define_predicate "aarch64_sve_float_maxmin_operand" ++ (ior (match_operand 0 "register_operand") ++ (match_operand 0 "aarch64_sve_float_maxmin_immediate"))) ++ + (define_predicate "aarch64_sve_vec_perm_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "aarch64_constant_vector_operand"))) + ++(define_predicate "aarch64_sve_ptrue_flag" ++ (and (match_code "const_int") ++ (ior (match_test "INTVAL (op) == SVE_MAYBE_NOT_PTRUE") ++ (match_test "INTVAL (op) == SVE_KNOWN_PTRUE")))) ++ ++(define_predicate "aarch64_sve_gp_strictness" ++ (and (match_code "const_int") ++ (ior (match_test "INTVAL (op) == SVE_RELAXED_GP") ++ (match_test "INTVAL (op) == SVE_STRICT_GP")))) ++ ++(define_predicate "aarch64_gather_scale_operand_b" ++ (and (match_code "const_int") ++ (match_test "INTVAL (op) == 1"))) ++ ++(define_predicate "aarch64_gather_scale_operand_h" ++ (and (match_code "const_int") ++ (match_test "INTVAL (op) == 1 || INTVAL (op) == 2"))) ++ + (define_predicate "aarch64_gather_scale_operand_w" + (and (match_code "const_int") + (match_test "INTVAL (op) == 1 || INTVAL (op) == 4"))) +diff --git a/gcc/config/aarch64/saphira.md b/gcc/config/aarch64/saphira.md +index 853deeef0..3cc7bc410 100644 +--- a/gcc/config/aarch64/saphira.md ++++ b/gcc/config/aarch64/saphira.md +@@ -520,7 +520,7 @@ + + (define_insn_reservation "saphira_other_0_nothing" 0 + (and (eq_attr "tune" "saphira") +- (eq_attr "type" "no_insn,trap,block")) ++ (eq_attr "type" "trap,block")) + "nothing") + + (define_insn_reservation "saphira_other_2_ld" 2 +diff --git a/gcc/config/aarch64/t-aarch64 b/gcc/config/aarch64/t-aarch64 +index ee471f898..28e1c7aec 100644 +--- a/gcc/config/aarch64/t-aarch64 ++++ b/gcc/config/aarch64/t-aarch64 +@@ -40,6 +40,43 @@ aarch64-builtins.o: $(srcdir)/config/aarch64/aarch64-builtins.c $(CONFIG_H) \ + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ + $(srcdir)/config/aarch64/aarch64-builtins.c + ++aarch64-sve-builtins.o: $(srcdir)/config/aarch64/aarch64-sve-builtins.cc \ ++ $(srcdir)/config/aarch64/aarch64-sve-builtins.def \ ++ $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) $(RTL_H) \ ++ $(TM_P_H) memmodel.h insn-codes.h $(OPTABS_H) $(RECOG_H) $(DIAGNOSTIC_H) \ ++ $(EXPR_H) $(BASIC_BLOCK_H) $(FUNCTION_H) fold-const.h $(GIMPLE_H) \ ++ gimple-iterator.h gimplify.h explow.h $(EMIT_RTL_H) tree-vector-builder.h \ ++ stor-layout.h $(REG_H) alias.h gimple-fold.h langhooks.h \ ++ stringpool.h \ ++ $(srcdir)/config/aarch64/aarch64-sve-builtins.h \ ++ $(srcdir)/config/aarch64/aarch64-sve-builtins-shapes.h \ ++ $(srcdir)/config/aarch64/aarch64-sve-builtins-base.h ++ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ ++ $(srcdir)/config/aarch64/aarch64-sve-builtins.cc ++ ++aarch64-sve-builtins-shapes.o: \ ++ $(srcdir)/config/aarch64/aarch64-sve-builtins-shapes.cc \ ++ $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) $(RTL_H) \ ++ $(TM_P_H) memmodel.h insn-codes.h $(OPTABS_H) \ ++ $(srcdir)/config/aarch64/aarch64-sve-builtins.h \ ++ $(srcdir)/config/aarch64/aarch64-sve-builtins-shapes.h ++ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ ++ $(srcdir)/config/aarch64/aarch64-sve-builtins-shapes.cc ++ ++aarch64-sve-builtins-base.o: \ ++ $(srcdir)/config/aarch64/aarch64-sve-builtins-base.cc \ ++ $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) $(RTL_H) \ ++ $(TM_P_H) memmodel.h insn-codes.h $(OPTABS_H) $(RECOG_H) \ ++ $(EXPR_H) $(BASIC_BLOCK_H) $(FUNCTION_H) fold-const.h $(GIMPLE_H) \ ++ gimple-iterator.h gimplify.h explow.h $(EMIT_RTL_H) tree-vector-builder.h \ ++ rtx-vector-builder.h vec-perm-indices.h \ ++ $(srcdir)/config/aarch64/aarch64-sve-builtins.h \ ++ $(srcdir)/config/aarch64/aarch64-sve-builtins-shapes.h \ ++ $(srcdir)/config/aarch64/aarch64-sve-builtins-base.h \ ++ $(srcdir)/config/aarch64/aarch64-sve-builtins-functions.h ++ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ ++ $(srcdir)/config/aarch64/aarch64-sve-builtins-base.cc ++ + aarch64-builtin-iterators.h: $(srcdir)/config/aarch64/geniterators.sh \ + $(srcdir)/config/aarch64/iterators.md + $(SHELL) $(srcdir)/config/aarch64/geniterators.sh \ +@@ -103,3 +140,10 @@ aarch64-bti-insert.o: $(srcdir)/config/aarch64/aarch64-bti-insert.c \ + comma=, + MULTILIB_OPTIONS = $(subst $(comma),/, $(patsubst %, mabi=%, $(subst $(comma),$(comma)mabi=,$(TM_MULTILIB_CONFIG)))) + MULTILIB_DIRNAMES = $(subst $(comma), ,$(TM_MULTILIB_CONFIG)) ++ ++insn-conditions.md: s-check-sve-md ++s-check-sve-md: $(srcdir)/config/aarch64/check-sve-md.awk \ ++ $(srcdir)/config/aarch64/aarch64-sve.md ++ $(AWK) -f $(srcdir)/config/aarch64/check-sve-md.awk \ ++ $(srcdir)/config/aarch64/aarch64-sve.md ++ $(STAMP) s-check-sve-md +diff --git a/gcc/config/aarch64/t-aarch64-netbsd b/gcc/config/aarch64/t-aarch64-netbsd +new file mode 100644 +index 000000000..aa447d0f6 +--- /dev/null ++++ b/gcc/config/aarch64/t-aarch64-netbsd +@@ -0,0 +1,21 @@ ++# Machine description for AArch64 architecture. ++# Copyright (C) 2016-2019 Free Software Foundation, Inc. ++# ++# This file is part of GCC. ++# ++# GCC is free software; you can redistribute it and/or modify it ++# under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 3, or (at your option) ++# any later version. ++# ++# GCC is distributed in the hope that it will be useful, but ++# WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++# General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with GCC; see the file COPYING3. If not see ++# . ++ ++LIB1ASMSRC = aarch64/lib1funcs.asm ++LIB1ASMFUNCS = _aarch64_sync_cache_range +diff --git a/gcc/config/aarch64/thunderx2t99.md b/gcc/config/aarch64/thunderx2t99.md +index c43c39ecd..bb6e0abb0 100644 +--- a/gcc/config/aarch64/thunderx2t99.md ++++ b/gcc/config/aarch64/thunderx2t99.md +@@ -74,7 +74,7 @@ + + (define_insn_reservation "thunderx2t99_nothing" 0 + (and (eq_attr "tune" "thunderx2t99") +- (eq_attr "type" "no_insn,block")) ++ (eq_attr "type" "block")) + "nothing") + + (define_insn_reservation "thunderx2t99_mrs" 0 +diff --git a/gcc/config/aarch64/tsv110.md b/gcc/config/aarch64/tsv110.md +index 680c48a68..f20055dae 100644 +--- a/gcc/config/aarch64/tsv110.md ++++ b/gcc/config/aarch64/tsv110.md +@@ -281,7 +281,7 @@ + shift_imm,shift_reg,\ + mov_imm,mov_reg,\ + mvn_imm,mvn_reg,\ +- mrs,multiple,no_insn")) ++ mrs,multiple")) + "tsv110_alu1|tsv110_alu2|tsv110_alu3") + + (define_insn_reservation "tsv110_alus" 1 +diff --git a/gcc/config/alpha/alpha.c b/gcc/config/alpha/alpha.c +index 524379d37..cd6aa117c 100644 +--- a/gcc/config/alpha/alpha.c ++++ b/gcc/config/alpha/alpha.c +@@ -6380,7 +6380,7 @@ alpha_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, + t = fold_convert (build_nonstandard_integer_type (64, 0), offset_field); + offset = get_initialized_tmp_var (t, pre_p, NULL); + +- indirect = pass_by_reference (NULL, TYPE_MODE (type), type, false); ++ indirect = pass_va_arg_by_reference (type); + + if (indirect) + { +diff --git a/gcc/config/alpha/alpha.h b/gcc/config/alpha/alpha.h +index e2008202a..68eafe194 100644 +--- a/gcc/config/alpha/alpha.h ++++ b/gcc/config/alpha/alpha.h +@@ -759,7 +759,7 @@ do { \ + #define MOVE_MAX 8 + + /* If a memory-to-memory move would take MOVE_RATIO or more simple +- move-instruction pairs, we will do a movmem or libcall instead. ++ move-instruction pairs, we will do a cpymem or libcall instead. + + Without byte/word accesses, we want no more than four instructions; + with, several single byte accesses are better. */ +diff --git a/gcc/config/alpha/alpha.md b/gcc/config/alpha/alpha.md +index dd340a08e..228dee44c 100644 +--- a/gcc/config/alpha/alpha.md ++++ b/gcc/config/alpha/alpha.md +@@ -4673,7 +4673,7 @@ + ;; Argument 2 is the length + ;; Argument 3 is the alignment + +-(define_expand "movmemqi" ++(define_expand "cpymemqi" + [(parallel [(set (match_operand:BLK 0 "memory_operand") + (match_operand:BLK 1 "memory_operand")) + (use (match_operand:DI 2 "immediate_operand")) +@@ -4686,7 +4686,7 @@ + FAIL; + }) + +-(define_expand "movmemdi" ++(define_expand "cpymemdi" + [(parallel [(set (match_operand:BLK 0 "memory_operand") + (match_operand:BLK 1 "memory_operand")) + (use (match_operand:DI 2 "immediate_operand")) +@@ -4703,7 +4703,7 @@ + "TARGET_ABI_OPEN_VMS" + "operands[4] = gen_rtx_SYMBOL_REF (Pmode, \"OTS$MOVE\");") + +-(define_insn "*movmemdi_1" ++(define_insn "*cpymemdi_1" + [(set (match_operand:BLK 0 "memory_operand" "=m,m") + (match_operand:BLK 1 "memory_operand" "m,m")) + (use (match_operand:DI 2 "nonmemory_operand" "r,i")) +diff --git a/gcc/config/arc/arc-protos.h b/gcc/config/arc/arc-protos.h +index ac0de6b28..00d2dd2c6 100644 +--- a/gcc/config/arc/arc-protos.h ++++ b/gcc/config/arc/arc-protos.h +@@ -35,7 +35,7 @@ extern void arc_final_prescan_insn (rtx_insn *, rtx *, int); + extern const char *arc_output_libcall (const char *); + extern int arc_output_addsi (rtx *operands, bool, bool); + extern int arc_output_commutative_cond_exec (rtx *operands, bool); +-extern bool arc_expand_movmem (rtx *operands); ++extern bool arc_expand_cpymem (rtx *operands); + extern bool prepare_move_operands (rtx *operands, machine_mode mode); + extern void emit_shift (enum rtx_code, rtx, rtx, rtx); + extern void arc_expand_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx); +diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c +index 325dd3cea..c0f13ebe7 100644 +--- a/gcc/config/arc/arc.c ++++ b/gcc/config/arc/arc.c +@@ -8791,7 +8791,7 @@ arc_output_commutative_cond_exec (rtx *operands, bool output_p) + return 8; + } + +-/* Helper function of arc_expand_movmem. ADDR points to a chunk of memory. ++/* Helper function of arc_expand_cpymem. ADDR points to a chunk of memory. + Emit code and return an potentially modified address such that offsets + up to SIZE are can be added to yield a legitimate address. + if REUSE is set, ADDR is a register that may be modified. */ +@@ -8825,7 +8825,7 @@ force_offsettable (rtx addr, HOST_WIDE_INT size, bool reuse) + offset ranges. Return true on success. */ + + bool +-arc_expand_movmem (rtx *operands) ++arc_expand_cpymem (rtx *operands) + { + rtx dst = operands[0]; + rtx src = operands[1]; +@@ -10335,7 +10335,7 @@ arc_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size, + enum by_pieces_operation op, + bool speed_p) + { +- /* Let the movmem expander handle small block moves. */ ++ /* Let the cpymem expander handle small block moves. */ + if (op == MOVE_BY_PIECES) + return false; + +diff --git a/gcc/config/arc/arc.h b/gcc/config/arc/arc.h +index 00fc3e471..7ae10a666 100644 +--- a/gcc/config/arc/arc.h ++++ b/gcc/config/arc/arc.h +@@ -1423,7 +1423,7 @@ do { \ + in one reasonably fast instruction. */ + #define MOVE_MAX 4 + +-/* Undo the effects of the movmem pattern presence on STORE_BY_PIECES_P . */ ++/* Undo the effects of the cpymem pattern presence on STORE_BY_PIECES_P . */ + #define MOVE_RATIO(SPEED) ((SPEED) ? 15 : 3) + + /* Define this to be nonzero if shift instructions ignore all but the +diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md +index 34e8248bc..2cfcf8bdd 100644 +--- a/gcc/config/arc/arc.md ++++ b/gcc/config/arc/arc.md +@@ -5114,13 +5114,13 @@ core_3, archs4x, archs4xd, archs4xd_slow" + (set_attr "type" "loop_end") + (set_attr "length" "4,20")]) + +-(define_expand "movmemsi" ++(define_expand "cpymemsi" + [(match_operand:BLK 0 "" "") + (match_operand:BLK 1 "" "") + (match_operand:SI 2 "nonmemory_operand" "") + (match_operand 3 "immediate_operand" "")] + "" +- "if (arc_expand_movmem (operands)) DONE; else FAIL;") ++ "if (arc_expand_cpymem (operands)) DONE; else FAIL;") + + ;; Close http://gcc.gnu.org/bugzilla/show_bug.cgi?id=35803 if this works + ;; to the point that we can generate cmove instructions. +diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h +index 98beb6109..dd1f32798 100644 +--- a/gcc/config/arm/arm-protos.h ++++ b/gcc/config/arm/arm-protos.h +@@ -127,8 +127,8 @@ extern bool offset_ok_for_ldrd_strd (HOST_WIDE_INT); + extern bool operands_ok_ldrd_strd (rtx, rtx, rtx, HOST_WIDE_INT, bool, bool); + extern bool gen_operands_ldrd_strd (rtx *, bool, bool, bool); + extern bool valid_operands_ldrd_strd (rtx *, bool); +-extern int arm_gen_movmemqi (rtx *); +-extern bool gen_movmem_ldrd_strd (rtx *); ++extern int arm_gen_cpymemqi (rtx *); ++extern bool gen_cpymem_ldrd_strd (rtx *); + extern machine_mode arm_select_cc_mode (RTX_CODE, rtx, rtx); + extern machine_mode arm_select_dominance_cc_mode (rtx, rtx, + HOST_WIDE_INT); +@@ -204,7 +204,7 @@ extern void thumb2_final_prescan_insn (rtx_insn *); + extern const char *thumb_load_double_from_address (rtx *); + extern const char *thumb_output_move_mem_multiple (int, rtx *); + extern const char *thumb_call_via_reg (rtx); +-extern void thumb_expand_movmemqi (rtx *); ++extern void thumb_expand_cpymemqi (rtx *); + extern rtx arm_return_addr (int, rtx); + extern void thumb_reload_out_hi (rtx *); + extern void thumb_set_return_address (rtx, rtx); +diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c +index eba26011e..c8a09329a 100644 +--- a/gcc/config/arm/arm.c ++++ b/gcc/config/arm/arm.c +@@ -14426,7 +14426,7 @@ arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length, + core type, optimize_size setting, etc. */ + + static int +-arm_movmemqi_unaligned (rtx *operands) ++arm_cpymemqi_unaligned (rtx *operands) + { + HOST_WIDE_INT length = INTVAL (operands[2]); + +@@ -14463,7 +14463,7 @@ arm_movmemqi_unaligned (rtx *operands) + } + + int +-arm_gen_movmemqi (rtx *operands) ++arm_gen_cpymemqi (rtx *operands) + { + HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes; + HOST_WIDE_INT srcoffset, dstoffset; +@@ -14477,7 +14477,7 @@ arm_gen_movmemqi (rtx *operands) + return 0; + + if (unaligned_access && (INTVAL (operands[3]) & 3) != 0) +- return arm_movmemqi_unaligned (operands); ++ return arm_cpymemqi_unaligned (operands); + + if (INTVAL (operands[3]) & 3) + return 0; +@@ -14611,7 +14611,7 @@ arm_gen_movmemqi (rtx *operands) + return 1; + } + +-/* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx ++/* Helper for gen_cpymem_ldrd_strd. Increase the address of memory rtx + by mode size. */ + inline static rtx + next_consecutive_mem (rtx mem) +@@ -14626,7 +14626,7 @@ next_consecutive_mem (rtx mem) + /* Copy using LDRD/STRD instructions whenever possible. + Returns true upon success. */ + bool +-gen_movmem_ldrd_strd (rtx *operands) ++gen_cpymem_ldrd_strd (rtx *operands) + { + unsigned HOST_WIDE_INT len; + HOST_WIDE_INT align; +@@ -14670,7 +14670,7 @@ gen_movmem_ldrd_strd (rtx *operands) + + /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */ + if (!(dst_aligned || src_aligned)) +- return arm_gen_movmemqi (operands); ++ return arm_gen_cpymemqi (operands); + + /* If the either src or dst is unaligned we'll be accessing it as pairs + of unaligned SImode accesses. Otherwise we can generate DImode +@@ -26472,7 +26472,7 @@ thumb_call_via_reg (rtx reg) + + /* Routines for generating rtl. */ + void +-thumb_expand_movmemqi (rtx *operands) ++thumb_expand_cpymemqi (rtx *operands) + { + rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0)); + rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0)); +@@ -26481,13 +26481,13 @@ thumb_expand_movmemqi (rtx *operands) + + while (len >= 12) + { +- emit_insn (gen_movmem12b (out, in, out, in)); ++ emit_insn (gen_cpymem12b (out, in, out, in)); + len -= 12; + } + + if (len >= 8) + { +- emit_insn (gen_movmem8b (out, in, out, in)); ++ emit_insn (gen_cpymem8b (out, in, out, in)); + len -= 8; + } + +diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md +index 53e54874c..a1b9d9fac 100644 +--- a/gcc/config/arm/arm.md ++++ b/gcc/config/arm/arm.md +@@ -7260,7 +7260,7 @@ + ;; We could let this apply for blocks of less than this, but it clobbers so + ;; many registers that there is then probably a better way. + +-(define_expand "movmemqi" ++(define_expand "cpymemqi" + [(match_operand:BLK 0 "general_operand" "") + (match_operand:BLK 1 "general_operand" "") + (match_operand:SI 2 "const_int_operand" "") +@@ -7272,12 +7272,12 @@ + if (TARGET_LDRD && current_tune->prefer_ldrd_strd + && !optimize_function_for_size_p (cfun)) + { +- if (gen_movmem_ldrd_strd (operands)) ++ if (gen_cpymem_ldrd_strd (operands)) + DONE; + FAIL; + } + +- if (arm_gen_movmemqi (operands)) ++ if (arm_gen_cpymemqi (operands)) + DONE; + FAIL; + } +@@ -7287,7 +7287,7 @@ + || INTVAL (operands[2]) > 48) + FAIL; + +- thumb_expand_movmemqi (operands); ++ thumb_expand_cpymemqi (operands); + DONE; + } + " +@@ -8807,6 +8807,8 @@ + [(set_attr "arch" "t1,32")] + ) + ++;; DO NOT SPLIT THIS INSN. It's important for security reasons that the ++;; canary value does not live beyond the life of this sequence. + (define_insn "*stack_protect_set_insn" + [(set (match_operand:SI 0 "memory_operand" "=m,m") + (unspec:SI [(mem:SI (match_operand:SI 1 "register_operand" "+&l,&r"))] +@@ -8814,8 +8816,8 @@ + (clobber (match_dup 1))] + "" + "@ +- ldr\\t%1, [%1]\;str\\t%1, %0\;movs\t%1,#0 +- ldr\\t%1, [%1]\;str\\t%1, %0\;mov\t%1,#0" ++ ldr\\t%1, [%1]\;str\\t%1, %0\;movs\t%1, #0 ++ ldr\\t%1, [%1]\;str\\t%1, %0\;mov\t%1, #0" + [(set_attr "length" "8,12") + (set_attr "conds" "clob,nocond") + (set_attr "type" "multiple") +diff --git a/gcc/config/arm/arm1020e.md b/gcc/config/arm/arm1020e.md +index b835cbaaa..c4c038b04 100644 +--- a/gcc/config/arm/arm1020e.md ++++ b/gcc/config/arm/arm1020e.md +@@ -72,7 +72,7 @@ + adr,bfm,rev,\ + shift_imm,shift_reg,\ + mov_imm,mov_reg,mvn_imm,mvn_reg,\ +- multiple,no_insn")) ++ multiple")) + "1020a_e,1020a_m,1020a_w") + + ;; ALU operations with a shift-by-constant operand +diff --git a/gcc/config/arm/arm1026ejs.md b/gcc/config/arm/arm1026ejs.md +index 05f4d724f..88546872a 100644 +--- a/gcc/config/arm/arm1026ejs.md ++++ b/gcc/config/arm/arm1026ejs.md +@@ -72,7 +72,7 @@ + adr,bfm,rev,\ + shift_imm,shift_reg,\ + mov_imm,mov_reg,mvn_imm,mvn_reg,\ +- multiple,no_insn")) ++ multiple")) + "a_e,a_m,a_w") + + ;; ALU operations with a shift-by-constant operand +diff --git a/gcc/config/arm/arm1136jfs.md b/gcc/config/arm/arm1136jfs.md +index ae0b54f5e..e7fd53afe 100644 +--- a/gcc/config/arm/arm1136jfs.md ++++ b/gcc/config/arm/arm1136jfs.md +@@ -81,7 +81,7 @@ + adr,bfm,rev,\ + shift_imm,shift_reg,\ + mov_imm,mov_reg,mvn_imm,mvn_reg,\ +- multiple,no_insn")) ++ multiple")) + "e_1,e_2,e_3,e_wb") + + ;; ALU operations with a shift-by-constant operand +diff --git a/gcc/config/arm/arm926ejs.md b/gcc/config/arm/arm926ejs.md +index db4c7db8c..b4f503159 100644 +--- a/gcc/config/arm/arm926ejs.md ++++ b/gcc/config/arm/arm926ejs.md +@@ -67,7 +67,7 @@ + shift_imm,shift_reg,extend,\ + mov_imm,mov_reg,mov_shift,\ + mvn_imm,mvn_reg,mvn_shift,\ +- multiple,no_insn")) ++ multiple")) + "e,m,w") + + ;; ALU operations with a shift-by-register operand +diff --git a/gcc/config/arm/cortex-a15.md b/gcc/config/arm/cortex-a15.md +index f57f98675..26765c3db 100644 +--- a/gcc/config/arm/cortex-a15.md ++++ b/gcc/config/arm/cortex-a15.md +@@ -68,7 +68,7 @@ + shift_imm,shift_reg,\ + mov_imm,mov_reg,\ + mvn_imm,mvn_reg,\ +- mrs,multiple,no_insn")) ++ mrs,multiple")) + "ca15_issue1,(ca15_sx1,ca15_sx1_alu)|(ca15_sx2,ca15_sx2_alu)") + + ;; ALU ops with immediate shift +diff --git a/gcc/config/arm/cortex-a17.md b/gcc/config/arm/cortex-a17.md +index a0c6e5141..97b716414 100644 +--- a/gcc/config/arm/cortex-a17.md ++++ b/gcc/config/arm/cortex-a17.md +@@ -42,7 +42,7 @@ + adc_imm,adcs_imm,adc_reg,adcs_reg,\ + adr, mov_imm,mov_reg,\ + mvn_imm,mvn_reg,extend,\ +- mrs,multiple,no_insn")) ++ mrs,multiple")) + "ca17_alu") + + (define_insn_reservation "cortex_a17_alu_shiftimm" 2 +diff --git a/gcc/config/arm/cortex-a5.md b/gcc/config/arm/cortex-a5.md +index efced646a..08aa90856 100644 +--- a/gcc/config/arm/cortex-a5.md ++++ b/gcc/config/arm/cortex-a5.md +@@ -64,7 +64,7 @@ + adr,bfm,clz,rbit,rev,alu_dsp_reg,\ + shift_imm,shift_reg,\ + mov_imm,mov_reg,mvn_imm,mvn_reg,\ +- mrs,multiple,no_insn")) ++ mrs,multiple")) + "cortex_a5_ex1") + + (define_insn_reservation "cortex_a5_alu_shift" 2 +diff --git a/gcc/config/arm/cortex-a53.md b/gcc/config/arm/cortex-a53.md +index b55d34e91..9b29f3874 100644 +--- a/gcc/config/arm/cortex-a53.md ++++ b/gcc/config/arm/cortex-a53.md +@@ -86,7 +86,7 @@ + alu_sreg,alus_sreg,logic_reg,logics_reg, + adc_imm,adcs_imm,adc_reg,adcs_reg, + csel,clz,rbit,rev,alu_dsp_reg, +- mov_reg,mvn_reg,mrs,multiple,no_insn")) ++ mov_reg,mvn_reg,mrs,multiple")) + "cortex_a53_slot_any") + + (define_insn_reservation "cortex_a53_alu_shift" 3 +diff --git a/gcc/config/arm/cortex-a57.md b/gcc/config/arm/cortex-a57.md +index 577dc8d7f..49654bf18 100644 +--- a/gcc/config/arm/cortex-a57.md ++++ b/gcc/config/arm/cortex-a57.md +@@ -301,7 +301,7 @@ + rotate_imm,shift_imm,shift_reg,\ + mov_imm,mov_reg,\ + mvn_imm,mvn_reg,\ +- mrs,multiple,no_insn")) ++ mrs,multiple")) + "ca57_sx1|ca57_sx2") + + ;; ALU ops with immediate shift +diff --git a/gcc/config/arm/cortex-a7.md b/gcc/config/arm/cortex-a7.md +index 1f9d6414e..f1b60aa27 100644 +--- a/gcc/config/arm/cortex-a7.md ++++ b/gcc/config/arm/cortex-a7.md +@@ -149,7 +149,7 @@ + logic_shift_reg,logics_shift_reg,\ + mov_shift,mov_shift_reg,\ + mvn_shift,mvn_shift_reg,\ +- mrs,multiple,no_insn")) ++ mrs,multiple")) + "cortex_a7_ex1") + + ;; Forwarding path for unshifted operands. +diff --git a/gcc/config/arm/cortex-a8.md b/gcc/config/arm/cortex-a8.md +index 980aed86e..e3372453d 100644 +--- a/gcc/config/arm/cortex-a8.md ++++ b/gcc/config/arm/cortex-a8.md +@@ -90,7 +90,7 @@ + adc_imm,adcs_imm,adc_reg,adcs_reg,\ + adr,bfm,clz,rbit,rev,alu_dsp_reg,\ + shift_imm,shift_reg,\ +- multiple,no_insn")) ++ multiple")) + "cortex_a8_default") + + (define_insn_reservation "cortex_a8_alu_shift" 2 +diff --git a/gcc/config/arm/cortex-a9.md b/gcc/config/arm/cortex-a9.md +index 6402a4438..c8474152c 100644 +--- a/gcc/config/arm/cortex-a9.md ++++ b/gcc/config/arm/cortex-a9.md +@@ -87,7 +87,7 @@ cortex_a9_p1_e2 + cortex_a9_p0_e1 + cortex_a9_p1_e1") + shift_imm,shift_reg,\ + mov_imm,mov_reg,mvn_imm,mvn_reg,\ + mov_shift_reg,mov_shift,\ +- mrs,multiple,no_insn")) ++ mrs,multiple")) + "cortex_a9_p0_default|cortex_a9_p1_default") + + ;; An instruction using the shifter will go down E1. +diff --git a/gcc/config/arm/cortex-m4.md b/gcc/config/arm/cortex-m4.md +index 60038c1e7..f8efcfcfc 100644 +--- a/gcc/config/arm/cortex-m4.md ++++ b/gcc/config/arm/cortex-m4.md +@@ -42,7 +42,7 @@ + logic_shift_reg,logics_shift_reg,\ + mov_imm,mov_reg,mov_shift,mov_shift_reg,\ + mvn_imm,mvn_reg,mvn_shift,mvn_shift_reg,\ +- mrs,multiple,no_insn") ++ mrs,multiple") + (ior (eq_attr "mul32" "yes") + (eq_attr "widen_mul64" "yes")))) + "cortex_m4_ex") +diff --git a/gcc/config/arm/cortex-m7.md b/gcc/config/arm/cortex-m7.md +index e4695ad66..dfe9a742c 100644 +--- a/gcc/config/arm/cortex-m7.md ++++ b/gcc/config/arm/cortex-m7.md +@@ -48,7 +48,7 @@ + logic_shift_imm,logics_shift_imm,\ + alu_shift_reg,alus_shift_reg,\ + logic_shift_reg,logics_shift_reg,\ +- mrs,clz,f_mcr,f_mrc,multiple,no_insn")) ++ mrs,clz,f_mcr,f_mrc,multiple")) + "cm7_i0|cm7_i1,cm7_a0|cm7_a1") + + ;; Simple alu with inline shift operation. +diff --git a/gcc/config/arm/cortex-r4.md b/gcc/config/arm/cortex-r4.md +index d7c0135fc..af5db23a6 100644 +--- a/gcc/config/arm/cortex-r4.md ++++ b/gcc/config/arm/cortex-r4.md +@@ -102,7 +102,7 @@ + (eq_attr "type" "alu_shift_reg,alus_shift_reg,\ + logic_shift_reg,logics_shift_reg,\ + mov_shift_reg,mvn_shift_reg,\ +- mrs,multiple,no_insn")) ++ mrs,multiple")) + "cortex_r4_alu_shift_reg") + + ;; An ALU instruction followed by an ALU instruction with no early dep. +diff --git a/gcc/config/arm/fa526.md b/gcc/config/arm/fa526.md +index e6625b011..294b79692 100644 +--- a/gcc/config/arm/fa526.md ++++ b/gcc/config/arm/fa526.md +@@ -68,7 +68,7 @@ + adr,bfm,rev,\ + shift_imm,shift_reg,\ + mov_imm,mov_reg,mvn_imm,mvn_reg,\ +- mrs,multiple,no_insn")) ++ mrs,multiple")) + "fa526_core") + + (define_insn_reservation "526_alu_shift_op" 2 +diff --git a/gcc/config/arm/fa606te.md b/gcc/config/arm/fa606te.md +index f2c104fb1..9007050ed 100644 +--- a/gcc/config/arm/fa606te.md ++++ b/gcc/config/arm/fa606te.md +@@ -73,7 +73,7 @@ + logic_shift_reg,logics_shift_reg,\ + mov_imm,mov_reg,mov_shift,mov_shift_reg,\ + mvn_imm,mvn_reg,mvn_shift,mvn_shift_reg,\ +- mrs,multiple,no_insn")) ++ mrs,multiple")) + "fa606te_core") + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +diff --git a/gcc/config/arm/fa626te.md b/gcc/config/arm/fa626te.md +index 880090fd7..6bdc2e8b5 100644 +--- a/gcc/config/arm/fa626te.md ++++ b/gcc/config/arm/fa626te.md +@@ -74,7 +74,7 @@ + adr,bfm,rev,\ + shift_imm,shift_reg,\ + mov_imm,mov_reg,mvn_imm,mvn_reg,\ +- mrs,multiple,no_insn")) ++ mrs,multiple")) + "fa626te_core") + + (define_insn_reservation "626te_alu_shift_op" 2 +diff --git a/gcc/config/arm/fa726te.md b/gcc/config/arm/fa726te.md +index cb5fbaf99..f6f2531c8 100644 +--- a/gcc/config/arm/fa726te.md ++++ b/gcc/config/arm/fa726te.md +@@ -91,7 +91,7 @@ + adc_imm,adcs_imm,adc_reg,adcs_reg,\ + adr,bfm,rev,\ + shift_imm,shift_reg,\ +- mrs,multiple,no_insn")) ++ mrs,multiple")) + "fa726te_issue+(fa726te_alu0_pipe|fa726te_alu1_pipe)") + + ;; ALU operations with a shift-by-register operand. +diff --git a/gcc/config/arm/thumb1.md b/gcc/config/arm/thumb1.md +index 041e2db34..f8eb732ac 100644 +--- a/gcc/config/arm/thumb1.md ++++ b/gcc/config/arm/thumb1.md +@@ -985,7 +985,7 @@ + + ;; Thumb block-move insns + +-(define_insn "movmem12b" ++(define_insn "cpymem12b" + [(set (mem:SI (match_operand:SI 2 "register_operand" "0")) + (mem:SI (match_operand:SI 3 "register_operand" "1"))) + (set (mem:SI (plus:SI (match_dup 2) (const_int 4))) +@@ -1007,7 +1007,7 @@ + (set_attr "type" "store_12")] + ) + +-(define_insn "movmem8b" ++(define_insn "cpymem8b" + [(set (mem:SI (match_operand:SI 2 "register_operand" "0")) + (mem:SI (match_operand:SI 3 "register_operand" "1"))) + (set (mem:SI (plus:SI (match_dup 2) (const_int 4))) +diff --git a/gcc/config/arm/types.md b/gcc/config/arm/types.md +index f8f8dd090..60faad659 100644 +--- a/gcc/config/arm/types.md ++++ b/gcc/config/arm/types.md +@@ -546,6 +546,10 @@ + ; The classification below is for coprocessor instructions + ; + ; coproc ++; ++; The classification below is for TME instructions ++; ++; tme + + (define_attr "type" + "adc_imm,\ +@@ -1091,7 +1095,8 @@ + crypto_sha3,\ + crypto_sm3,\ + crypto_sm4,\ +- coproc" ++ coproc,\ ++ tme" + (const_string "untyped")) + + ; Is this an (integer side) multiply with a 32-bit (or smaller) result? +@@ -1215,3 +1220,7 @@ + crypto_sha256_fast, crypto_sha256_slow") + (const_string "yes") + (const_string "no"))) ++ ++(define_insn_reservation "no_reservation" 0 ++ (eq_attr "type" "no_insn") ++ "nothing") +diff --git a/gcc/config/arm/xgene1.md b/gcc/config/arm/xgene1.md +index 14156421d..81498daa0 100644 +--- a/gcc/config/arm/xgene1.md ++++ b/gcc/config/arm/xgene1.md +@@ -64,11 +64,6 @@ + (eq_attr "type" "branch")) + "xgene1_decode1op") + +-(define_insn_reservation "xgene1_nop" 1 +- (and (eq_attr "tune" "xgene1") +- (eq_attr "type" "no_insn")) +- "xgene1_decode1op") +- + (define_insn_reservation "xgene1_call" 1 + (and (eq_attr "tune" "xgene1") + (eq_attr "type" "call")) +diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h +index dd0babbd7..31fe3a66d 100644 +--- a/gcc/config/avr/avr-protos.h ++++ b/gcc/config/avr/avr-protos.h +@@ -82,7 +82,7 @@ extern rtx avr_to_int_mode (rtx); + + extern void avr_expand_prologue (void); + extern void avr_expand_epilogue (bool); +-extern bool avr_emit_movmemhi (rtx*); ++extern bool avr_emit_cpymemhi (rtx*); + extern int avr_epilogue_uses (int regno); + + extern void avr_output_addr_vec (rtx_insn*, rtx); +@@ -92,7 +92,7 @@ extern const char* avr_out_plus (rtx, rtx*, int* =NULL, int* =NULL, bool =true); + extern const char* avr_out_round (rtx_insn *, rtx*, int* =NULL); + extern const char* avr_out_addto_sp (rtx*, int*); + extern const char* avr_out_xload (rtx_insn *, rtx*, int*); +-extern const char* avr_out_movmem (rtx_insn *, rtx*, int*); ++extern const char* avr_out_cpymem (rtx_insn *, rtx*, int*); + extern const char* avr_out_insert_bits (rtx*, int*); + extern bool avr_popcount_each_byte (rtx, int, int); + extern bool avr_has_nibble_0xf (rtx); +diff --git a/gcc/config/avr/avr.c b/gcc/config/avr/avr.c +index cb4b14ae3..3e6e5d2ee 100644 +--- a/gcc/config/avr/avr.c ++++ b/gcc/config/avr/avr.c +@@ -9421,7 +9421,7 @@ avr_adjust_insn_length (rtx_insn *insn, int len) + case ADJUST_LEN_MOV16: output_movhi (insn, op, &len); break; + case ADJUST_LEN_MOV24: avr_out_movpsi (insn, op, &len); break; + case ADJUST_LEN_MOV32: output_movsisf (insn, op, &len); break; +- case ADJUST_LEN_MOVMEM: avr_out_movmem (insn, op, &len); break; ++ case ADJUST_LEN_CPYMEM: avr_out_cpymem (insn, op, &len); break; + case ADJUST_LEN_XLOAD: avr_out_xload (insn, op, &len); break; + case ADJUST_LEN_SEXT: avr_out_sign_extend (insn, op, &len); break; + +@@ -13338,7 +13338,7 @@ avr_emit3_fix_outputs (rtx (*gen)(rtx,rtx,rtx), rtx *op, + } + + +-/* Worker function for movmemhi expander. ++/* Worker function for cpymemhi expander. + XOP[0] Destination as MEM:BLK + XOP[1] Source " " + XOP[2] # Bytes to copy +@@ -13347,7 +13347,7 @@ avr_emit3_fix_outputs (rtx (*gen)(rtx,rtx,rtx), rtx *op, + Return FALSE if the operand compination is not supported. */ + + bool +-avr_emit_movmemhi (rtx *xop) ++avr_emit_cpymemhi (rtx *xop) + { + HOST_WIDE_INT count; + machine_mode loop_mode; +@@ -13424,14 +13424,14 @@ avr_emit_movmemhi (rtx *xop) + Do the copy-loop inline. */ + + rtx (*fun) (rtx, rtx, rtx) +- = QImode == loop_mode ? gen_movmem_qi : gen_movmem_hi; ++ = QImode == loop_mode ? gen_cpymem_qi : gen_cpymem_hi; + + insn = fun (xas, loop_reg, loop_reg); + } + else + { + rtx (*fun) (rtx, rtx) +- = QImode == loop_mode ? gen_movmemx_qi : gen_movmemx_hi; ++ = QImode == loop_mode ? gen_cpymemx_qi : gen_cpymemx_hi; + + emit_move_insn (gen_rtx_REG (QImode, 23), a_hi8); + +@@ -13445,7 +13445,7 @@ avr_emit_movmemhi (rtx *xop) + } + + +-/* Print assembler for movmem_qi, movmem_hi insns... ++/* Print assembler for cpymem_qi, cpymem_hi insns... + $0 : Address Space + $1, $2 : Loop register + Z : Source address +@@ -13453,7 +13453,7 @@ avr_emit_movmemhi (rtx *xop) + */ + + const char* +-avr_out_movmem (rtx_insn *insn ATTRIBUTE_UNUSED, rtx *op, int *plen) ++avr_out_cpymem (rtx_insn *insn ATTRIBUTE_UNUSED, rtx *op, int *plen) + { + addr_space_t as = (addr_space_t) INTVAL (op[0]); + machine_mode loop_mode = GET_MODE (op[1]); +diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md +index f263b693c..e85bf4963 100644 +--- a/gcc/config/avr/avr.md ++++ b/gcc/config/avr/avr.md +@@ -70,7 +70,7 @@ + + (define_c_enum "unspec" + [UNSPEC_STRLEN +- UNSPEC_MOVMEM ++ UNSPEC_CPYMEM + UNSPEC_INDEX_JMP + UNSPEC_FMUL + UNSPEC_FMULS +@@ -158,7 +158,7 @@ + tsthi, tstpsi, tstsi, compare, compare64, call, + mov8, mov16, mov24, mov32, reload_in16, reload_in24, reload_in32, + ufract, sfract, round, +- xload, movmem, ++ xload, cpymem, + ashlqi, ashrqi, lshrqi, + ashlhi, ashrhi, lshrhi, + ashlsi, ashrsi, lshrsi, +@@ -992,20 +992,20 @@ + ;;========================================================================= + ;; move string (like memcpy) + +-(define_expand "movmemhi" ++(define_expand "cpymemhi" + [(parallel [(set (match_operand:BLK 0 "memory_operand" "") + (match_operand:BLK 1 "memory_operand" "")) + (use (match_operand:HI 2 "const_int_operand" "")) + (use (match_operand:HI 3 "const_int_operand" ""))])] + "" + { +- if (avr_emit_movmemhi (operands)) ++ if (avr_emit_cpymemhi (operands)) + DONE; + + FAIL; + }) + +-(define_mode_attr MOVMEM_r_d [(QI "r") ++(define_mode_attr CPYMEM_r_d [(QI "r") + (HI "wd")]) + + ;; $0 : Address Space +@@ -1013,23 +1013,23 @@ + ;; R30 : source address + ;; R26 : destination address + +-;; "movmem_qi" +-;; "movmem_hi" +-(define_insn "movmem_" ++;; "cpymem_qi" ++;; "cpymem_hi" ++(define_insn "cpymem_" + [(set (mem:BLK (reg:HI REG_X)) + (mem:BLK (reg:HI REG_Z))) + (unspec [(match_operand:QI 0 "const_int_operand" "n")] +- UNSPEC_MOVMEM) +- (use (match_operand:QIHI 1 "register_operand" "")) ++ UNSPEC_CPYMEM) ++ (use (match_operand:QIHI 1 "register_operand" "")) + (clobber (reg:HI REG_X)) + (clobber (reg:HI REG_Z)) + (clobber (reg:QI LPM_REGNO)) + (clobber (match_operand:QIHI 2 "register_operand" "=1"))] + "" + { +- return avr_out_movmem (insn, operands, NULL); ++ return avr_out_cpymem (insn, operands, NULL); + } +- [(set_attr "adjust_len" "movmem") ++ [(set_attr "adjust_len" "cpymem") + (set_attr "cc" "clobber")]) + + +@@ -1039,14 +1039,14 @@ + ;; R23:Z : 24-bit source address + ;; R26 : 16-bit destination address + +-;; "movmemx_qi" +-;; "movmemx_hi" +-(define_insn "movmemx_" ++;; "cpymemx_qi" ++;; "cpymemx_hi" ++(define_insn "cpymemx_" + [(set (mem:BLK (reg:HI REG_X)) + (mem:BLK (lo_sum:PSI (reg:QI 23) + (reg:HI REG_Z)))) + (unspec [(match_operand:QI 0 "const_int_operand" "n")] +- UNSPEC_MOVMEM) ++ UNSPEC_CPYMEM) + (use (reg:QIHI 24)) + (clobber (reg:HI REG_X)) + (clobber (reg:HI REG_Z)) +diff --git a/gcc/config/bfin/bfin-protos.h b/gcc/config/bfin/bfin-protos.h +index 64a184275..7d0f705e0 100644 +--- a/gcc/config/bfin/bfin-protos.h ++++ b/gcc/config/bfin/bfin-protos.h +@@ -81,7 +81,7 @@ extern bool expand_move (rtx *, machine_mode); + extern void bfin_expand_call (rtx, rtx, rtx, rtx, int); + extern bool bfin_longcall_p (rtx, int); + extern bool bfin_dsp_memref_p (rtx); +-extern bool bfin_expand_movmem (rtx, rtx, rtx, rtx); ++extern bool bfin_expand_cpymem (rtx, rtx, rtx, rtx); + + extern enum reg_class secondary_input_reload_class (enum reg_class, + machine_mode, +diff --git a/gcc/config/bfin/bfin.c b/gcc/config/bfin/bfin.c +index 97c2c12d5..288a2ff59 100644 +--- a/gcc/config/bfin/bfin.c ++++ b/gcc/config/bfin/bfin.c +@@ -3208,7 +3208,7 @@ output_pop_multiple (rtx insn, rtx *operands) + /* Adjust DST and SRC by OFFSET bytes, and generate one move in mode MODE. */ + + static void +-single_move_for_movmem (rtx dst, rtx src, machine_mode mode, HOST_WIDE_INT offset) ++single_move_for_cpymem (rtx dst, rtx src, machine_mode mode, HOST_WIDE_INT offset) + { + rtx scratch = gen_reg_rtx (mode); + rtx srcmem, dstmem; +@@ -3224,7 +3224,7 @@ single_move_for_movmem (rtx dst, rtx src, machine_mode mode, HOST_WIDE_INT offse + back on a different method. */ + + bool +-bfin_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp) ++bfin_expand_cpymem (rtx dst, rtx src, rtx count_exp, rtx align_exp) + { + rtx srcreg, destreg, countreg; + HOST_WIDE_INT align = 0; +@@ -3269,7 +3269,7 @@ bfin_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp) + { + if ((count & ~3) == 4) + { +- single_move_for_movmem (dst, src, SImode, offset); ++ single_move_for_cpymem (dst, src, SImode, offset); + offset = 4; + } + else if (count & ~3) +@@ -3282,7 +3282,7 @@ bfin_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp) + } + if (count & 2) + { +- single_move_for_movmem (dst, src, HImode, offset); ++ single_move_for_cpymem (dst, src, HImode, offset); + offset += 2; + } + } +@@ -3290,7 +3290,7 @@ bfin_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp) + { + if ((count & ~1) == 2) + { +- single_move_for_movmem (dst, src, HImode, offset); ++ single_move_for_cpymem (dst, src, HImode, offset); + offset = 2; + } + else if (count & ~1) +@@ -3304,7 +3304,7 @@ bfin_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp) + } + if (count & 1) + { +- single_move_for_movmem (dst, src, QImode, offset); ++ single_move_for_cpymem (dst, src, QImode, offset); + } + return true; + } +diff --git a/gcc/config/bfin/bfin.h b/gcc/config/bfin/bfin.h +index 19b7f819d..4aba596f6 100644 +--- a/gcc/config/bfin/bfin.h ++++ b/gcc/config/bfin/bfin.h +@@ -793,7 +793,7 @@ typedef struct { + #define MOVE_MAX UNITS_PER_WORD + + /* If a memory-to-memory move would take MOVE_RATIO or more simple +- move-instruction pairs, we will do a movmem or libcall instead. */ ++ move-instruction pairs, we will do a cpymem or libcall instead. */ + + #define MOVE_RATIO(speed) 5 + +diff --git a/gcc/config/bfin/bfin.md b/gcc/config/bfin/bfin.md +index ac5892424..6ac208d04 100644 +--- a/gcc/config/bfin/bfin.md ++++ b/gcc/config/bfin/bfin.md +@@ -2316,14 +2316,14 @@ + (set_attr "length" "16") + (set_attr "seq_insns" "multi")]) + +-(define_expand "movmemsi" ++(define_expand "cpymemsi" + [(match_operand:BLK 0 "general_operand" "") + (match_operand:BLK 1 "general_operand" "") + (match_operand:SI 2 "const_int_operand" "") + (match_operand:SI 3 "const_int_operand" "")] + "" + { +- if (bfin_expand_movmem (operands[0], operands[1], operands[2], operands[3])) ++ if (bfin_expand_cpymem (operands[0], operands[1], operands[2], operands[3])) + DONE; + FAIL; + }) +diff --git a/gcc/config/c6x/c6x-protos.h b/gcc/config/c6x/c6x-protos.h +index a657969a2..8c04c315a 100644 +--- a/gcc/config/c6x/c6x-protos.h ++++ b/gcc/config/c6x/c6x-protos.h +@@ -35,7 +35,7 @@ extern bool c6x_long_call_p (rtx); + extern void c6x_expand_call (rtx, rtx, bool); + extern rtx c6x_expand_compare (rtx, machine_mode); + extern bool c6x_force_op_for_comparison_p (enum rtx_code, rtx); +-extern bool c6x_expand_movmem (rtx, rtx, rtx, rtx, rtx, rtx); ++extern bool c6x_expand_cpymem (rtx, rtx, rtx, rtx, rtx, rtx); + + extern rtx c6x_subword (rtx, bool); + extern void split_di (rtx *, int, rtx *, rtx *); +diff --git a/gcc/config/c6x/c6x.c b/gcc/config/c6x/c6x.c +index 9a07c4013..e4176774b 100644 +--- a/gcc/config/c6x/c6x.c ++++ b/gcc/config/c6x/c6x.c +@@ -1683,10 +1683,10 @@ c6x_valid_mask_p (HOST_WIDE_INT val) + return true; + } + +-/* Expand a block move for a movmemM pattern. */ ++/* Expand a block move for a cpymemM pattern. */ + + bool +-c6x_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp, ++c6x_expand_cpymem (rtx dst, rtx src, rtx count_exp, rtx align_exp, + rtx expected_align_exp ATTRIBUTE_UNUSED, + rtx expected_size_exp ATTRIBUTE_UNUSED) + { +diff --git a/gcc/config/c6x/c6x.md b/gcc/config/c6x/c6x.md +index 8218e1dad..f9bf9ba99 100644 +--- a/gcc/config/c6x/c6x.md ++++ b/gcc/config/c6x/c6x.md +@@ -2844,7 +2844,7 @@ + ;; Block moves + ;; ------------------------------------------------------------------------- + +-(define_expand "movmemsi" ++(define_expand "cpymemsi" + [(use (match_operand:BLK 0 "memory_operand" "")) + (use (match_operand:BLK 1 "memory_operand" "")) + (use (match_operand:SI 2 "nonmemory_operand" "")) +@@ -2853,7 +2853,7 @@ + (use (match_operand:SI 5 "const_int_operand" ""))] + "" + { +- if (c6x_expand_movmem (operands[0], operands[1], operands[2], operands[3], ++ if (c6x_expand_cpymem (operands[0], operands[1], operands[2], operands[3], + operands[4], operands[5])) + DONE; + else +diff --git a/gcc/config/darwin.c b/gcc/config/darwin.c +index a7610829f..dcd69698f 100644 +--- a/gcc/config/darwin.c ++++ b/gcc/config/darwin.c +@@ -2150,7 +2150,7 @@ darwin_emit_unwind_label (FILE *file, tree decl, int for_eh, int empty) + if (! for_eh || ! ld_needs_eh_markers) + return; + +- /* FIXME: This only works when the eh for all sections of a function are ++ /* FIXME: This only works when the eh for all sections of a function are + emitted at the same time. If that changes, we would need to use a lookup + table of some form to determine what to do. Also, we should emit the + unadorned label for the partition containing the public label for a +@@ -3325,7 +3325,7 @@ darwin_override_options (void) + + /* Linkers >= ld64-62.1 (at least) are capable of making the necessary PIC + indirections and we no longer need to emit pic symbol stubs. +- However, if we are generating code for earlier ones (or for use in the ++ However, if we are generating code for earlier ones (or for use in the + kernel) the stubs might still be required, and this will be set true. + If the user sets it on or off - then that takes precedence. + +@@ -3334,18 +3334,18 @@ darwin_override_options (void) + + if (!global_options_set.x_darwin_symbol_stubs) + { +- if (darwin_target_linker) ++ if (darwin_target_linker) + { + if (strverscmp (darwin_target_linker, MIN_LD64_OMIT_STUBS) < 0) + { + darwin_symbol_stubs = true; + ld_needs_eh_markers = true; + } +- } ++ } + else if (generating_for_darwin_version < 9) + { + /* If we don't know the linker version and we're targeting an old +- system, we know no better than to assume the use of an earlier ++ system, we know no better than to assume the use of an earlier + linker. */ + darwin_symbol_stubs = true; + ld_needs_eh_markers = true; +@@ -3354,7 +3354,7 @@ darwin_override_options (void) + else if (DARWIN_X86 && darwin_symbol_stubs && TARGET_64BIT) + { + inform (input_location, +- "%<-msymbol-stubs%> is not required for 64b code (ignored)"); ++ "%<-mpic-symbol-stubs%> is not required for 64b code (ignored)"); + darwin_symbol_stubs = false; + } + +diff --git a/gcc/config/frv/frv.md b/gcc/config/frv/frv.md +index 064bf53ea..6e8db59fd 100644 +--- a/gcc/config/frv/frv.md ++++ b/gcc/config/frv/frv.md +@@ -1887,7 +1887,7 @@ + ;; Argument 2 is the length + ;; Argument 3 is the alignment + +-(define_expand "movmemsi" ++(define_expand "cpymemsi" + [(parallel [(set (match_operand:BLK 0 "" "") + (match_operand:BLK 1 "" "")) + (use (match_operand:SI 2 "" "")) +diff --git a/gcc/config/ft32/ft32.md b/gcc/config/ft32/ft32.md +index de2394644..9e31f2ca7 100644 +--- a/gcc/config/ft32/ft32.md ++++ b/gcc/config/ft32/ft32.md +@@ -851,7 +851,7 @@ + "stpcpy %b1,%b2 # %0 %b1 %b2" + ) + +-(define_insn "movmemsi" ++(define_insn "cpymemsi" + [(set (match_operand:BLK 0 "memory_operand" "=W,BW") + (match_operand:BLK 1 "memory_operand" "W,BW")) + (use (match_operand:SI 2 "ft32_imm_operand" "KA,KA")) +diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c +index eb06ff9e0..480bb22ee 100644 +--- a/gcc/config/gcn/gcn.c ++++ b/gcc/config/gcn/gcn.c +@@ -2495,7 +2495,7 @@ gcn_gimplify_va_arg_expr (tree valist, tree type, + tree t, u; + bool indirect; + +- indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0); ++ indirect = pass_va_arg_by_reference (type); + if (indirect) + { + type = ptr; +diff --git a/gcc/config/h8300/h8300.md b/gcc/config/h8300/h8300.md +index eb0ae835f..42610fddb 100644 +--- a/gcc/config/h8300/h8300.md ++++ b/gcc/config/h8300/h8300.md +@@ -474,11 +474,11 @@ + (set_attr "length_table" "*,movl") + (set_attr "cc" "set_zn,set_znv")]) + +-;; Implement block moves using movmd. Defining movmemsi allows the full ++;; Implement block copies using movmd. Defining cpymemsi allows the full + ;; range of constant lengths (up to 0x40000 bytes when using movmd.l). + ;; See h8sx_emit_movmd for details. + +-(define_expand "movmemsi" ++(define_expand "cpymemsi" + [(use (match_operand:BLK 0 "memory_operand" "")) + (use (match_operand:BLK 1 "memory_operand" "")) + (use (match_operand:SI 2 "" "")) +diff --git a/gcc/config/i386/i386-builtins.c b/gcc/config/i386/i386-builtins.c +new file mode 100644 +index 000000000..6afb246eb +--- /dev/null ++++ b/gcc/config/i386/i386-builtins.c +@@ -0,0 +1,2539 @@ ++/* Copyright (C) 1988-2019 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++#define IN_TARGET_CODE 1 ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "backend.h" ++#include "rtl.h" ++#include "tree.h" ++#include "memmodel.h" ++#include "gimple.h" ++#include "cfghooks.h" ++#include "cfgloop.h" ++#include "df.h" ++#include "tm_p.h" ++#include "stringpool.h" ++#include "expmed.h" ++#include "optabs.h" ++#include "regs.h" ++#include "emit-rtl.h" ++#include "recog.h" ++#include "cgraph.h" ++#include "diagnostic.h" ++#include "cfgbuild.h" ++#include "alias.h" ++#include "fold-const.h" ++#include "attribs.h" ++#include "calls.h" ++#include "stor-layout.h" ++#include "varasm.h" ++#include "output.h" ++#include "insn-attr.h" ++#include "flags.h" ++#include "except.h" ++#include "explow.h" ++#include "expr.h" ++#include "cfgrtl.h" ++#include "common/common-target.h" ++#include "langhooks.h" ++#include "reload.h" ++#include "gimplify.h" ++#include "dwarf2.h" ++#include "tm-constrs.h" ++#include "params.h" ++#include "cselib.h" ++#include "sched-int.h" ++#include "opts.h" ++#include "tree-pass.h" ++#include "context.h" ++#include "pass_manager.h" ++#include "target-globals.h" ++#include "gimple-iterator.h" ++#include "tree-vectorizer.h" ++#include "shrink-wrap.h" ++#include "builtins.h" ++#include "rtl-iter.h" ++#include "tree-iterator.h" ++#include "dbgcnt.h" ++#include "case-cfn-macros.h" ++#include "dojump.h" ++#include "fold-const-call.h" ++#include "tree-vrp.h" ++#include "tree-ssanames.h" ++#include "selftest.h" ++#include "selftest-rtl.h" ++#include "print-rtl.h" ++#include "intl.h" ++#include "ifcvt.h" ++#include "symbol-summary.h" ++#include "ipa-prop.h" ++#include "ipa-fnsummary.h" ++#include "wide-int-bitmask.h" ++#include "tree-vector-builder.h" ++#include "debug.h" ++#include "dwarf2out.h" ++#include "i386-builtins.h" ++ ++#undef BDESC ++#undef BDESC_FIRST ++#undef BDESC_END ++ ++/* Macros for verification of enum ix86_builtins order. */ ++#define BDESC_VERIFY(x, y, z) \ ++ gcc_checking_assert ((x) == (enum ix86_builtins) ((y) + (z))) ++#define BDESC_VERIFYS(x, y, z) \ ++ STATIC_ASSERT ((x) == (enum ix86_builtins) ((y) + (z))) ++ ++BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPESTR_FIRST, ++ IX86_BUILTIN__BDESC_COMI_LAST, 1); ++BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPISTR_FIRST, ++ IX86_BUILTIN__BDESC_PCMPESTR_LAST, 1); ++BDESC_VERIFYS (IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST, ++ IX86_BUILTIN__BDESC_PCMPISTR_LAST, 1); ++BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS_FIRST, ++ IX86_BUILTIN__BDESC_SPECIAL_ARGS_LAST, 1); ++BDESC_VERIFYS (IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST, ++ IX86_BUILTIN__BDESC_ARGS_LAST, 1); ++BDESC_VERIFYS (IX86_BUILTIN__BDESC_MULTI_ARG_FIRST, ++ IX86_BUILTIN__BDESC_ROUND_ARGS_LAST, 1); ++BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_FIRST, ++ IX86_BUILTIN__BDESC_MULTI_ARG_LAST, 1); ++BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_NORMAL_FIRST, ++ IX86_BUILTIN__BDESC_CET_LAST, 1); ++BDESC_VERIFYS (IX86_BUILTIN_MAX, ++ IX86_BUILTIN__BDESC_CET_NORMAL_LAST, 1); ++ ++ ++/* Table for the ix86 builtin non-function types. */ ++static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1]; ++ ++/* Retrieve an element from the above table, building some of ++ the types lazily. */ ++ ++static tree ++ix86_get_builtin_type (enum ix86_builtin_type tcode) ++{ ++ unsigned int index; ++ tree type, itype; ++ ++ gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab)); ++ ++ type = ix86_builtin_type_tab[(int) tcode]; ++ if (type != NULL) ++ return type; ++ ++ gcc_assert (tcode > IX86_BT_LAST_PRIM); ++ if (tcode <= IX86_BT_LAST_VECT) ++ { ++ machine_mode mode; ++ ++ index = tcode - IX86_BT_LAST_PRIM - 1; ++ itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]); ++ mode = ix86_builtin_type_vect_mode[index]; ++ ++ type = build_vector_type_for_mode (itype, mode); ++ } ++ else ++ { ++ int quals; ++ ++ index = tcode - IX86_BT_LAST_VECT - 1; ++ if (tcode <= IX86_BT_LAST_PTR) ++ quals = TYPE_UNQUALIFIED; ++ else ++ quals = TYPE_QUAL_CONST; ++ ++ itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]); ++ if (quals != TYPE_UNQUALIFIED) ++ itype = build_qualified_type (itype, quals); ++ ++ type = build_pointer_type (itype); ++ } ++ ++ ix86_builtin_type_tab[(int) tcode] = type; ++ return type; ++} ++ ++/* Table for the ix86 builtin function types. */ ++static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1]; ++ ++/* Retrieve an element from the above table, building some of ++ the types lazily. */ ++ ++static tree ++ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode) ++{ ++ tree type; ++ ++ gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab)); ++ ++ type = ix86_builtin_func_type_tab[(int) tcode]; ++ if (type != NULL) ++ return type; ++ ++ if (tcode <= IX86_BT_LAST_FUNC) ++ { ++ unsigned start = ix86_builtin_func_start[(int) tcode]; ++ unsigned after = ix86_builtin_func_start[(int) tcode + 1]; ++ tree rtype, atype, args = void_list_node; ++ unsigned i; ++ ++ rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]); ++ for (i = after - 1; i > start; --i) ++ { ++ atype = ix86_get_builtin_type (ix86_builtin_func_args[i]); ++ args = tree_cons (NULL, atype, args); ++ } ++ ++ type = build_function_type (rtype, args); ++ } ++ else ++ { ++ unsigned index = tcode - IX86_BT_LAST_FUNC - 1; ++ enum ix86_builtin_func_type icode; ++ ++ icode = ix86_builtin_func_alias_base[index]; ++ type = ix86_get_builtin_func_type (icode); ++ } ++ ++ ix86_builtin_func_type_tab[(int) tcode] = type; ++ return type; ++} ++ ++/* Table for the ix86 builtin decls. */ ++static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX]; ++ ++struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX]; ++ ++tree get_ix86_builtin (enum ix86_builtins c) ++{ ++ return ix86_builtins[c]; ++} ++ ++/* Bits that can still enable any inclusion of a builtin. */ ++HOST_WIDE_INT deferred_isa_values = 0; ++HOST_WIDE_INT deferred_isa_values2 = 0; ++ ++/* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the ++ MASK and MASK2 of which isa_flags and ix86_isa_flags2 to use in the ++ ix86_builtins_isa array. Stores the function decl in the ix86_builtins ++ array. Returns the function decl or NULL_TREE, if the builtin was not ++ added. ++ ++ If the front end has a special hook for builtin functions, delay adding ++ builtin functions that aren't in the current ISA until the ISA is changed ++ with function specific optimization. Doing so, can save about 300K for the ++ default compiler. When the builtin is expanded, check at that time whether ++ it is valid. ++ ++ If the front end doesn't have a special hook, record all builtins, even if ++ it isn't an instruction set in the current ISA in case the user uses ++ function specific options for a different ISA, so that we don't get scope ++ errors if a builtin is added in the middle of a function scope. */ ++ ++static inline tree ++def_builtin (HOST_WIDE_INT mask, HOST_WIDE_INT mask2, ++ const char *name, ++ enum ix86_builtin_func_type tcode, ++ enum ix86_builtins code) ++{ ++ tree decl = NULL_TREE; ++ ++ /* An instruction may be 64bit only regardless of ISAs. */ ++ if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT) ++ { ++ ix86_builtins_isa[(int) code].isa = mask; ++ ix86_builtins_isa[(int) code].isa2 = mask2; ++ ++ mask &= ~OPTION_MASK_ISA_64BIT; ++ ++ /* Filter out the masks most often ored together with others. */ ++ if ((mask & ix86_isa_flags & OPTION_MASK_ISA_AVX512VL) ++ && mask != OPTION_MASK_ISA_AVX512VL) ++ mask &= ~OPTION_MASK_ISA_AVX512VL; ++ if ((mask & ix86_isa_flags & OPTION_MASK_ISA_AVX512BW) ++ && mask != OPTION_MASK_ISA_AVX512BW) ++ mask &= ~OPTION_MASK_ISA_AVX512BW; ++ ++ if (((mask2 == 0 || (mask2 & ix86_isa_flags2) != 0) ++ && (mask == 0 || (mask & ix86_isa_flags) != 0)) ++ || (lang_hooks.builtin_function ++ == lang_hooks.builtin_function_ext_scope)) ++ { ++ tree type = ix86_get_builtin_func_type (tcode); ++ decl = add_builtin_function (name, type, code, BUILT_IN_MD, ++ NULL, NULL_TREE); ++ ix86_builtins[(int) code] = decl; ++ ix86_builtins_isa[(int) code].set_and_not_built_p = false; ++ } ++ else ++ { ++ /* Just MASK and MASK2 where set_and_not_built_p == true can potentially ++ include a builtin. */ ++ deferred_isa_values |= mask; ++ deferred_isa_values2 |= mask2; ++ ix86_builtins[(int) code] = NULL_TREE; ++ ix86_builtins_isa[(int) code].tcode = tcode; ++ ix86_builtins_isa[(int) code].name = name; ++ ix86_builtins_isa[(int) code].const_p = false; ++ ix86_builtins_isa[(int) code].pure_p = false; ++ ix86_builtins_isa[(int) code].set_and_not_built_p = true; ++ } ++ } ++ ++ return decl; ++} ++ ++/* Like def_builtin, but also marks the function decl "const". */ ++ ++static inline tree ++def_builtin_const (HOST_WIDE_INT mask, HOST_WIDE_INT mask2, const char *name, ++ enum ix86_builtin_func_type tcode, enum ix86_builtins code) ++{ ++ tree decl = def_builtin (mask, mask2, name, tcode, code); ++ if (decl) ++ TREE_READONLY (decl) = 1; ++ else ++ ix86_builtins_isa[(int) code].const_p = true; ++ ++ return decl; ++} ++ ++/* Like def_builtin, but also marks the function decl "pure". */ ++ ++static inline tree ++def_builtin_pure (HOST_WIDE_INT mask, HOST_WIDE_INT mask2, const char *name, ++ enum ix86_builtin_func_type tcode, enum ix86_builtins code) ++{ ++ tree decl = def_builtin (mask, mask2, name, tcode, code); ++ if (decl) ++ DECL_PURE_P (decl) = 1; ++ else ++ ix86_builtins_isa[(int) code].pure_p = true; ++ ++ return decl; ++} ++ ++/* Add any new builtin functions for a given ISA that may not have been ++ declared. This saves a bit of space compared to adding all of the ++ declarations to the tree, even if we didn't use them. */ ++ ++void ++ix86_add_new_builtins (HOST_WIDE_INT isa, HOST_WIDE_INT isa2) ++{ ++ isa &= ~OPTION_MASK_ISA_64BIT; ++ ++ if ((isa & deferred_isa_values) == 0 ++ && (isa2 & deferred_isa_values2) == 0) ++ return; ++ ++ /* Bits in ISA value can be removed from potential isa values. */ ++ deferred_isa_values &= ~isa; ++ deferred_isa_values2 &= ~isa2; ++ ++ int i; ++ tree saved_current_target_pragma = current_target_pragma; ++ current_target_pragma = NULL_TREE; ++ ++ for (i = 0; i < (int)IX86_BUILTIN_MAX; i++) ++ { ++ if (((ix86_builtins_isa[i].isa & isa) != 0 ++ || (ix86_builtins_isa[i].isa2 & isa2) != 0) ++ && ix86_builtins_isa[i].set_and_not_built_p) ++ { ++ tree decl, type; ++ ++ /* Don't define the builtin again. */ ++ ix86_builtins_isa[i].set_and_not_built_p = false; ++ ++ type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode); ++ decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name, ++ type, i, BUILT_IN_MD, NULL, ++ NULL_TREE); ++ ++ ix86_builtins[i] = decl; ++ if (ix86_builtins_isa[i].const_p) ++ TREE_READONLY (decl) = 1; ++ } ++ } ++ ++ current_target_pragma = saved_current_target_pragma; ++} ++ ++/* TM vector builtins. */ ++ ++/* Reuse the existing x86-specific `struct builtin_description' cause ++ we're lazy. Add casts to make them fit. */ ++static const struct builtin_description bdesc_tm[] = ++{ ++ { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI }, ++ { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI }, ++ { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI }, ++ { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI }, ++ { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI }, ++ { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI }, ++ { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI }, ++ ++ { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF }, ++ { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF }, ++ { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF }, ++ { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF }, ++ { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF }, ++ { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF }, ++ { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF }, ++ ++ { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF }, ++ { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF }, ++ { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF }, ++ { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF }, ++ { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF }, ++ { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF }, ++ { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF }, ++ ++ { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID }, ++ { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID }, ++ { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID }, ++}; ++ ++/* Initialize the transactional memory vector load/store builtins. */ ++ ++static void ++ix86_init_tm_builtins (void) ++{ ++ enum ix86_builtin_func_type ftype; ++ const struct builtin_description *d; ++ size_t i; ++ tree decl; ++ tree attrs_load, attrs_type_load, attrs_store, attrs_type_store; ++ tree attrs_log, attrs_type_log; ++ ++ if (!flag_tm) ++ return; ++ ++ /* If there are no builtins defined, we must be compiling in a ++ language without trans-mem support. */ ++ if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1)) ++ return; ++ ++ /* Use whatever attributes a normal TM load has. */ ++ decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1); ++ attrs_load = DECL_ATTRIBUTES (decl); ++ attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl)); ++ /* Use whatever attributes a normal TM store has. */ ++ decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1); ++ attrs_store = DECL_ATTRIBUTES (decl); ++ attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl)); ++ /* Use whatever attributes a normal TM log has. */ ++ decl = builtin_decl_explicit (BUILT_IN_TM_LOG); ++ attrs_log = DECL_ATTRIBUTES (decl); ++ attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl)); ++ ++ for (i = 0, d = bdesc_tm; ++ i < ARRAY_SIZE (bdesc_tm); ++ i++, d++) ++ { ++ if ((d->mask & ix86_isa_flags) != 0 ++ || (lang_hooks.builtin_function ++ == lang_hooks.builtin_function_ext_scope)) ++ { ++ tree type, attrs, attrs_type; ++ enum built_in_function code = (enum built_in_function) d->code; ++ ++ ftype = (enum ix86_builtin_func_type) d->flag; ++ type = ix86_get_builtin_func_type (ftype); ++ ++ if (BUILTIN_TM_LOAD_P (code)) ++ { ++ attrs = attrs_load; ++ attrs_type = attrs_type_load; ++ } ++ else if (BUILTIN_TM_STORE_P (code)) ++ { ++ attrs = attrs_store; ++ attrs_type = attrs_type_store; ++ } ++ else ++ { ++ attrs = attrs_log; ++ attrs_type = attrs_type_log; ++ } ++ decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL, ++ /* The builtin without the prefix for ++ calling it directly. */ ++ d->name + strlen ("__builtin_"), ++ attrs); ++ /* add_builtin_function() will set the DECL_ATTRIBUTES, now ++ set the TYPE_ATTRIBUTES. */ ++ decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN); ++ ++ set_builtin_decl (code, decl, false); ++ } ++ } ++} ++ ++/* Set up all the MMX/SSE builtins, even builtins for instructions that are not ++ in the current target ISA to allow the user to compile particular modules ++ with different target specific options that differ from the command line ++ options. */ ++static void ++ix86_init_mmx_sse_builtins (void) ++{ ++ const struct builtin_description * d; ++ enum ix86_builtin_func_type ftype; ++ size_t i; ++ ++ /* Add all special builtins with variable number of operands. */ ++ for (i = 0, d = bdesc_special_args; ++ i < ARRAY_SIZE (bdesc_special_args); ++ i++, d++) ++ { ++ BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST, i); ++ if (d->name == 0) ++ continue; ++ ++ ftype = (enum ix86_builtin_func_type) d->flag; ++ def_builtin (d->mask, d->mask2, d->name, ftype, d->code); ++ } ++ BDESC_VERIFYS (IX86_BUILTIN__BDESC_SPECIAL_ARGS_LAST, ++ IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST, ++ ARRAY_SIZE (bdesc_special_args) - 1); ++ ++ /* Add all builtins with variable number of operands. */ ++ for (i = 0, d = bdesc_args; ++ i < ARRAY_SIZE (bdesc_args); ++ i++, d++) ++ { ++ BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_ARGS_FIRST, i); ++ if (d->name == 0) ++ continue; ++ ++ ftype = (enum ix86_builtin_func_type) d->flag; ++ def_builtin_const (d->mask, d->mask2, d->name, ftype, d->code); ++ } ++ BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS_LAST, ++ IX86_BUILTIN__BDESC_ARGS_FIRST, ++ ARRAY_SIZE (bdesc_args) - 1); ++ ++ /* Add all builtins with rounding. */ ++ for (i = 0, d = bdesc_round_args; ++ i < ARRAY_SIZE (bdesc_round_args); ++ i++, d++) ++ { ++ BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST, i); ++ if (d->name == 0) ++ continue; ++ ++ ftype = (enum ix86_builtin_func_type) d->flag; ++ def_builtin_const (d->mask, d->mask2, d->name, ftype, d->code); ++ } ++ BDESC_VERIFYS (IX86_BUILTIN__BDESC_ROUND_ARGS_LAST, ++ IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST, ++ ARRAY_SIZE (bdesc_round_args) - 1); ++ ++ /* pcmpestr[im] insns. */ ++ for (i = 0, d = bdesc_pcmpestr; ++ i < ARRAY_SIZE (bdesc_pcmpestr); ++ i++, d++) ++ { ++ BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_PCMPESTR_FIRST, i); ++ if (d->code == IX86_BUILTIN_PCMPESTRM128) ++ ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT; ++ else ++ ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT; ++ def_builtin_const (d->mask, d->mask2, d->name, ftype, d->code); ++ } ++ BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPESTR_LAST, ++ IX86_BUILTIN__BDESC_PCMPESTR_FIRST, ++ ARRAY_SIZE (bdesc_pcmpestr) - 1); ++ ++ /* pcmpistr[im] insns. */ ++ for (i = 0, d = bdesc_pcmpistr; ++ i < ARRAY_SIZE (bdesc_pcmpistr); ++ i++, d++) ++ { ++ BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_PCMPISTR_FIRST, i); ++ if (d->code == IX86_BUILTIN_PCMPISTRM128) ++ ftype = V16QI_FTYPE_V16QI_V16QI_INT; ++ else ++ ftype = INT_FTYPE_V16QI_V16QI_INT; ++ def_builtin_const (d->mask, d->mask2, d->name, ftype, d->code); ++ } ++ BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPISTR_LAST, ++ IX86_BUILTIN__BDESC_PCMPISTR_FIRST, ++ ARRAY_SIZE (bdesc_pcmpistr) - 1); ++ ++ /* comi/ucomi insns. */ ++ for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++) ++ { ++ BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_COMI_FIRST, i); ++ if (d->mask == OPTION_MASK_ISA_SSE2) ++ ftype = INT_FTYPE_V2DF_V2DF; ++ else ++ ftype = INT_FTYPE_V4SF_V4SF; ++ def_builtin_const (d->mask, d->mask2, d->name, ftype, d->code); ++ } ++ BDESC_VERIFYS (IX86_BUILTIN__BDESC_COMI_LAST, ++ IX86_BUILTIN__BDESC_COMI_FIRST, ++ ARRAY_SIZE (bdesc_comi) - 1); ++ ++ /* SSE */ ++ def_builtin (OPTION_MASK_ISA_SSE, 0, "__builtin_ia32_ldmxcsr", ++ VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR); ++ def_builtin_pure (OPTION_MASK_ISA_SSE, 0, "__builtin_ia32_stmxcsr", ++ UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR); ++ ++ /* SSE or 3DNow!A */ ++ def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A ++ /* As it uses V4HImode, we have to require -mmmx too. */ ++ | OPTION_MASK_ISA_MMX, 0, ++ "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR, ++ IX86_BUILTIN_MASKMOVQ); ++ ++ /* SSE2 */ ++ def_builtin (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_maskmovdqu", ++ VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU); ++ ++ def_builtin (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_clflush", ++ VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH); ++ x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_mfence", ++ VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE); ++ ++ /* SSE3. */ ++ def_builtin (OPTION_MASK_ISA_SSE3, 0, "__builtin_ia32_monitor", ++ VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR); ++ def_builtin (OPTION_MASK_ISA_SSE3, 0, "__builtin_ia32_mwait", ++ VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT); ++ ++ /* AES */ ++ def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, 0, ++ "__builtin_ia32_aesenc128", ++ V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128); ++ def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, 0, ++ "__builtin_ia32_aesenclast128", ++ V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128); ++ def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, 0, ++ "__builtin_ia32_aesdec128", ++ V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128); ++ def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, 0, ++ "__builtin_ia32_aesdeclast128", ++ V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128); ++ def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, 0, ++ "__builtin_ia32_aesimc128", ++ V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128); ++ def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, 0, ++ "__builtin_ia32_aeskeygenassist128", ++ V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128); ++ ++ /* PCLMUL */ ++ def_builtin_const (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2, 0, ++ "__builtin_ia32_pclmulqdq128", ++ V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128); ++ ++ /* RDRND */ ++ def_builtin (OPTION_MASK_ISA_RDRND, 0, "__builtin_ia32_rdrand16_step", ++ INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP); ++ def_builtin (OPTION_MASK_ISA_RDRND, 0, "__builtin_ia32_rdrand32_step", ++ INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP); ++ def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT, 0, ++ "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG, ++ IX86_BUILTIN_RDRAND64_STEP); ++ ++ /* AVX2 */ ++ def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gathersiv2df", ++ V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT, ++ IX86_BUILTIN_GATHERSIV2DF); ++ ++ def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gathersiv4df", ++ V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT, ++ IX86_BUILTIN_GATHERSIV4DF); ++ ++ def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatherdiv2df", ++ V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT, ++ IX86_BUILTIN_GATHERDIV2DF); ++ ++ def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatherdiv4df", ++ V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT, ++ IX86_BUILTIN_GATHERDIV4DF); ++ ++ def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gathersiv4sf", ++ V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT, ++ IX86_BUILTIN_GATHERSIV4SF); ++ ++ def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gathersiv8sf", ++ V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT, ++ IX86_BUILTIN_GATHERSIV8SF); ++ ++ def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatherdiv4sf", ++ V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT, ++ IX86_BUILTIN_GATHERDIV4SF); ++ ++ def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatherdiv4sf256", ++ V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT, ++ IX86_BUILTIN_GATHERDIV8SF); ++ ++ def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gathersiv2di", ++ V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT, ++ IX86_BUILTIN_GATHERSIV2DI); ++ ++ def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gathersiv4di", ++ V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT, ++ IX86_BUILTIN_GATHERSIV4DI); ++ ++ def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatherdiv2di", ++ V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT, ++ IX86_BUILTIN_GATHERDIV2DI); ++ ++ def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatherdiv4di", ++ V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT, ++ IX86_BUILTIN_GATHERDIV4DI); ++ ++ def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gathersiv4si", ++ V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT, ++ IX86_BUILTIN_GATHERSIV4SI); ++ ++ def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gathersiv8si", ++ V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT, ++ IX86_BUILTIN_GATHERSIV8SI); ++ ++ def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatherdiv4si", ++ V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT, ++ IX86_BUILTIN_GATHERDIV4SI); ++ ++ def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatherdiv4si256", ++ V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT, ++ IX86_BUILTIN_GATHERDIV8SI); ++ ++ def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatheraltsiv4df ", ++ V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT, ++ IX86_BUILTIN_GATHERALTSIV4DF); ++ ++ def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatheraltdiv8sf ", ++ V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT, ++ IX86_BUILTIN_GATHERALTDIV8SF); ++ ++ def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatheraltsiv4di ", ++ V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT, ++ IX86_BUILTIN_GATHERALTSIV4DI); ++ ++ def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatheraltdiv8si ", ++ V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT, ++ IX86_BUILTIN_GATHERALTDIV8SI); ++ ++ /* AVX512F */ ++ def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gathersiv16sf", ++ V16SF_FTYPE_V16SF_PCVOID_V16SI_HI_INT, ++ IX86_BUILTIN_GATHER3SIV16SF); ++ ++ def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gathersiv8df", ++ V8DF_FTYPE_V8DF_PCVOID_V8SI_QI_INT, ++ IX86_BUILTIN_GATHER3SIV8DF); ++ ++ def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gatherdiv16sf", ++ V8SF_FTYPE_V8SF_PCVOID_V8DI_QI_INT, ++ IX86_BUILTIN_GATHER3DIV16SF); ++ ++ def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gatherdiv8df", ++ V8DF_FTYPE_V8DF_PCVOID_V8DI_QI_INT, ++ IX86_BUILTIN_GATHER3DIV8DF); ++ ++ def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gathersiv16si", ++ V16SI_FTYPE_V16SI_PCVOID_V16SI_HI_INT, ++ IX86_BUILTIN_GATHER3SIV16SI); ++ ++ def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gathersiv8di", ++ V8DI_FTYPE_V8DI_PCVOID_V8SI_QI_INT, ++ IX86_BUILTIN_GATHER3SIV8DI); ++ ++ def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gatherdiv16si", ++ V8SI_FTYPE_V8SI_PCVOID_V8DI_QI_INT, ++ IX86_BUILTIN_GATHER3DIV16SI); ++ ++ def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gatherdiv8di", ++ V8DI_FTYPE_V8DI_PCVOID_V8DI_QI_INT, ++ IX86_BUILTIN_GATHER3DIV8DI); ++ ++ def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gather3altsiv8df ", ++ V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT, ++ IX86_BUILTIN_GATHER3ALTSIV8DF); ++ ++ def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gather3altdiv16sf ", ++ V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT, ++ IX86_BUILTIN_GATHER3ALTDIV16SF); ++ ++ def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gather3altsiv8di ", ++ V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT, ++ IX86_BUILTIN_GATHER3ALTSIV8DI); ++ ++ def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gather3altdiv16si ", ++ V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT, ++ IX86_BUILTIN_GATHER3ALTDIV16SI); ++ ++ def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scattersiv16sf", ++ VOID_FTYPE_PVOID_HI_V16SI_V16SF_INT, ++ IX86_BUILTIN_SCATTERSIV16SF); ++ ++ def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scattersiv8df", ++ VOID_FTYPE_PVOID_QI_V8SI_V8DF_INT, ++ IX86_BUILTIN_SCATTERSIV8DF); ++ ++ def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatterdiv16sf", ++ VOID_FTYPE_PVOID_QI_V8DI_V8SF_INT, ++ IX86_BUILTIN_SCATTERDIV16SF); ++ ++ def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatterdiv8df", ++ VOID_FTYPE_PVOID_QI_V8DI_V8DF_INT, ++ IX86_BUILTIN_SCATTERDIV8DF); ++ ++ def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scattersiv16si", ++ VOID_FTYPE_PVOID_HI_V16SI_V16SI_INT, ++ IX86_BUILTIN_SCATTERSIV16SI); ++ ++ def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scattersiv8di", ++ VOID_FTYPE_PVOID_QI_V8SI_V8DI_INT, ++ IX86_BUILTIN_SCATTERSIV8DI); ++ ++ def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatterdiv16si", ++ VOID_FTYPE_PVOID_QI_V8DI_V8SI_INT, ++ IX86_BUILTIN_SCATTERDIV16SI); ++ ++ def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatterdiv8di", ++ VOID_FTYPE_PVOID_QI_V8DI_V8DI_INT, ++ IX86_BUILTIN_SCATTERDIV8DI); ++ ++ /* AVX512VL */ ++ def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3siv2df", ++ V2DF_FTYPE_V2DF_PCVOID_V4SI_QI_INT, ++ IX86_BUILTIN_GATHER3SIV2DF); ++ ++ def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3siv4df", ++ V4DF_FTYPE_V4DF_PCVOID_V4SI_QI_INT, ++ IX86_BUILTIN_GATHER3SIV4DF); ++ ++ def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3div2df", ++ V2DF_FTYPE_V2DF_PCVOID_V2DI_QI_INT, ++ IX86_BUILTIN_GATHER3DIV2DF); ++ ++ def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3div4df", ++ V4DF_FTYPE_V4DF_PCVOID_V4DI_QI_INT, ++ IX86_BUILTIN_GATHER3DIV4DF); ++ ++ def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3siv4sf", ++ V4SF_FTYPE_V4SF_PCVOID_V4SI_QI_INT, ++ IX86_BUILTIN_GATHER3SIV4SF); ++ ++ def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3siv8sf", ++ V8SF_FTYPE_V8SF_PCVOID_V8SI_QI_INT, ++ IX86_BUILTIN_GATHER3SIV8SF); ++ ++ def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3div4sf", ++ V4SF_FTYPE_V4SF_PCVOID_V2DI_QI_INT, ++ IX86_BUILTIN_GATHER3DIV4SF); ++ ++ def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3div8sf", ++ V4SF_FTYPE_V4SF_PCVOID_V4DI_QI_INT, ++ IX86_BUILTIN_GATHER3DIV8SF); ++ ++ def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3siv2di", ++ V2DI_FTYPE_V2DI_PCVOID_V4SI_QI_INT, ++ IX86_BUILTIN_GATHER3SIV2DI); ++ ++ def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3siv4di", ++ V4DI_FTYPE_V4DI_PCVOID_V4SI_QI_INT, ++ IX86_BUILTIN_GATHER3SIV4DI); ++ ++ def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3div2di", ++ V2DI_FTYPE_V2DI_PCVOID_V2DI_QI_INT, ++ IX86_BUILTIN_GATHER3DIV2DI); ++ ++ def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3div4di", ++ V4DI_FTYPE_V4DI_PCVOID_V4DI_QI_INT, ++ IX86_BUILTIN_GATHER3DIV4DI); ++ ++ def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3siv4si", ++ V4SI_FTYPE_V4SI_PCVOID_V4SI_QI_INT, ++ IX86_BUILTIN_GATHER3SIV4SI); ++ ++ def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3siv8si", ++ V8SI_FTYPE_V8SI_PCVOID_V8SI_QI_INT, ++ IX86_BUILTIN_GATHER3SIV8SI); ++ ++ def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3div4si", ++ V4SI_FTYPE_V4SI_PCVOID_V2DI_QI_INT, ++ IX86_BUILTIN_GATHER3DIV4SI); ++ ++ def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3div8si", ++ V4SI_FTYPE_V4SI_PCVOID_V4DI_QI_INT, ++ IX86_BUILTIN_GATHER3DIV8SI); ++ ++ def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3altsiv4df ", ++ V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT, ++ IX86_BUILTIN_GATHER3ALTSIV4DF); ++ ++ def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3altdiv8sf ", ++ V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT, ++ IX86_BUILTIN_GATHER3ALTDIV8SF); ++ ++ def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3altsiv4di ", ++ V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT, ++ IX86_BUILTIN_GATHER3ALTSIV4DI); ++ ++ def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3altdiv8si ", ++ V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT, ++ IX86_BUILTIN_GATHER3ALTDIV8SI); ++ ++ def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scattersiv8sf", ++ VOID_FTYPE_PVOID_QI_V8SI_V8SF_INT, ++ IX86_BUILTIN_SCATTERSIV8SF); ++ ++ def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scattersiv4sf", ++ VOID_FTYPE_PVOID_QI_V4SI_V4SF_INT, ++ IX86_BUILTIN_SCATTERSIV4SF); ++ ++ def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scattersiv4df", ++ VOID_FTYPE_PVOID_QI_V4SI_V4DF_INT, ++ IX86_BUILTIN_SCATTERSIV4DF); ++ ++ def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scattersiv2df", ++ VOID_FTYPE_PVOID_QI_V4SI_V2DF_INT, ++ IX86_BUILTIN_SCATTERSIV2DF); ++ ++ def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatterdiv8sf", ++ VOID_FTYPE_PVOID_QI_V4DI_V4SF_INT, ++ IX86_BUILTIN_SCATTERDIV8SF); ++ ++ def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatterdiv4sf", ++ VOID_FTYPE_PVOID_QI_V2DI_V4SF_INT, ++ IX86_BUILTIN_SCATTERDIV4SF); ++ ++ def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatterdiv4df", ++ VOID_FTYPE_PVOID_QI_V4DI_V4DF_INT, ++ IX86_BUILTIN_SCATTERDIV4DF); ++ ++ def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatterdiv2df", ++ VOID_FTYPE_PVOID_QI_V2DI_V2DF_INT, ++ IX86_BUILTIN_SCATTERDIV2DF); ++ ++ def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scattersiv8si", ++ VOID_FTYPE_PVOID_QI_V8SI_V8SI_INT, ++ IX86_BUILTIN_SCATTERSIV8SI); ++ ++ def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scattersiv4si", ++ VOID_FTYPE_PVOID_QI_V4SI_V4SI_INT, ++ IX86_BUILTIN_SCATTERSIV4SI); ++ ++ def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scattersiv4di", ++ VOID_FTYPE_PVOID_QI_V4SI_V4DI_INT, ++ IX86_BUILTIN_SCATTERSIV4DI); ++ ++ def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scattersiv2di", ++ VOID_FTYPE_PVOID_QI_V4SI_V2DI_INT, ++ IX86_BUILTIN_SCATTERSIV2DI); ++ ++ def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatterdiv8si", ++ VOID_FTYPE_PVOID_QI_V4DI_V4SI_INT, ++ IX86_BUILTIN_SCATTERDIV8SI); ++ ++ def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatterdiv4si", ++ VOID_FTYPE_PVOID_QI_V2DI_V4SI_INT, ++ IX86_BUILTIN_SCATTERDIV4SI); ++ ++ def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatterdiv4di", ++ VOID_FTYPE_PVOID_QI_V4DI_V4DI_INT, ++ IX86_BUILTIN_SCATTERDIV4DI); ++ ++ def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatterdiv2di", ++ VOID_FTYPE_PVOID_QI_V2DI_V2DI_INT, ++ IX86_BUILTIN_SCATTERDIV2DI); ++ ++ def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatteraltsiv8df ", ++ VOID_FTYPE_PDOUBLE_QI_V16SI_V8DF_INT, ++ IX86_BUILTIN_SCATTERALTSIV8DF); ++ ++ def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatteraltdiv16sf ", ++ VOID_FTYPE_PFLOAT_HI_V8DI_V16SF_INT, ++ IX86_BUILTIN_SCATTERALTDIV16SF); ++ ++ def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatteraltsiv8di ", ++ VOID_FTYPE_PLONGLONG_QI_V16SI_V8DI_INT, ++ IX86_BUILTIN_SCATTERALTSIV8DI); ++ ++ def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatteraltdiv16si ", ++ VOID_FTYPE_PINT_HI_V8DI_V16SI_INT, ++ IX86_BUILTIN_SCATTERALTDIV16SI); ++ ++ def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatteraltsiv4df ", ++ VOID_FTYPE_PDOUBLE_QI_V8SI_V4DF_INT, ++ IX86_BUILTIN_SCATTERALTSIV4DF); ++ ++ def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatteraltdiv8sf ", ++ VOID_FTYPE_PFLOAT_QI_V4DI_V8SF_INT, ++ IX86_BUILTIN_SCATTERALTDIV8SF); ++ ++ def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatteraltsiv4di ", ++ VOID_FTYPE_PLONGLONG_QI_V8SI_V4DI_INT, ++ IX86_BUILTIN_SCATTERALTSIV4DI); ++ ++ def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatteraltdiv8si ", ++ VOID_FTYPE_PINT_QI_V4DI_V8SI_INT, ++ IX86_BUILTIN_SCATTERALTDIV8SI); ++ ++ def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatteraltsiv2df ", ++ VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT, ++ IX86_BUILTIN_SCATTERALTSIV2DF); ++ ++ def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatteraltdiv4sf ", ++ VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT, ++ IX86_BUILTIN_SCATTERALTDIV4SF); ++ ++ def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatteraltsiv2di ", ++ VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT, ++ IX86_BUILTIN_SCATTERALTSIV2DI); ++ ++ def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatteraltdiv4si ", ++ VOID_FTYPE_PINT_QI_V2DI_V4SI_INT, ++ IX86_BUILTIN_SCATTERALTDIV4SI); ++ ++ /* AVX512PF */ ++ def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_gatherpfdpd", ++ VOID_FTYPE_QI_V8SI_PCVOID_INT_INT, ++ IX86_BUILTIN_GATHERPFDPD); ++ def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_gatherpfdps", ++ VOID_FTYPE_HI_V16SI_PCVOID_INT_INT, ++ IX86_BUILTIN_GATHERPFDPS); ++ def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_gatherpfqpd", ++ VOID_FTYPE_QI_V8DI_PCVOID_INT_INT, ++ IX86_BUILTIN_GATHERPFQPD); ++ def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_gatherpfqps", ++ VOID_FTYPE_QI_V8DI_PCVOID_INT_INT, ++ IX86_BUILTIN_GATHERPFQPS); ++ def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_scatterpfdpd", ++ VOID_FTYPE_QI_V8SI_PCVOID_INT_INT, ++ IX86_BUILTIN_SCATTERPFDPD); ++ def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_scatterpfdps", ++ VOID_FTYPE_HI_V16SI_PCVOID_INT_INT, ++ IX86_BUILTIN_SCATTERPFDPS); ++ def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_scatterpfqpd", ++ VOID_FTYPE_QI_V8DI_PCVOID_INT_INT, ++ IX86_BUILTIN_SCATTERPFQPD); ++ def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_scatterpfqps", ++ VOID_FTYPE_QI_V8DI_PCVOID_INT_INT, ++ IX86_BUILTIN_SCATTERPFQPS); ++ ++ /* SHA */ ++ def_builtin_const (OPTION_MASK_ISA_SHA, 0, "__builtin_ia32_sha1msg1", ++ V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1); ++ def_builtin_const (OPTION_MASK_ISA_SHA, 0, "__builtin_ia32_sha1msg2", ++ V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2); ++ def_builtin_const (OPTION_MASK_ISA_SHA, 0, "__builtin_ia32_sha1nexte", ++ V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE); ++ def_builtin_const (OPTION_MASK_ISA_SHA, 0, "__builtin_ia32_sha1rnds4", ++ V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4); ++ def_builtin_const (OPTION_MASK_ISA_SHA, 0, "__builtin_ia32_sha256msg1", ++ V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1); ++ def_builtin_const (OPTION_MASK_ISA_SHA, 0, "__builtin_ia32_sha256msg2", ++ V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2); ++ def_builtin_const (OPTION_MASK_ISA_SHA, 0, "__builtin_ia32_sha256rnds2", ++ V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2); ++ ++ /* RTM. */ ++ def_builtin (OPTION_MASK_ISA_RTM, 0, "__builtin_ia32_xabort", ++ VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT); ++ ++ /* MMX access to the vec_init patterns. */ ++ def_builtin_const (OPTION_MASK_ISA_MMX, 0, "__builtin_ia32_vec_init_v2si", ++ V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI); ++ ++ def_builtin_const (OPTION_MASK_ISA_MMX, 0, "__builtin_ia32_vec_init_v4hi", ++ V4HI_FTYPE_HI_HI_HI_HI, ++ IX86_BUILTIN_VEC_INIT_V4HI); ++ ++ def_builtin_const (OPTION_MASK_ISA_MMX, 0, "__builtin_ia32_vec_init_v8qi", ++ V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI, ++ IX86_BUILTIN_VEC_INIT_V8QI); ++ ++ /* Access to the vec_extract patterns. */ ++ def_builtin_const (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_vec_ext_v2df", ++ DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF); ++ def_builtin_const (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_vec_ext_v2di", ++ DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI); ++ def_builtin_const (OPTION_MASK_ISA_SSE, 0, "__builtin_ia32_vec_ext_v4sf", ++ FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF); ++ def_builtin_const (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_vec_ext_v4si", ++ SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI); ++ def_builtin_const (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_vec_ext_v8hi", ++ HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI); ++ ++ def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A ++ /* As it uses V4HImode, we have to require -mmmx too. */ ++ | OPTION_MASK_ISA_MMX, 0, ++ "__builtin_ia32_vec_ext_v4hi", ++ HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI); ++ ++ def_builtin_const (OPTION_MASK_ISA_MMX, 0, "__builtin_ia32_vec_ext_v2si", ++ SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI); ++ ++ def_builtin_const (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_vec_ext_v16qi", ++ QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI); ++ ++ /* Access to the vec_set patterns. */ ++ def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, 0, ++ "__builtin_ia32_vec_set_v2di", ++ V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI); ++ ++ def_builtin_const (OPTION_MASK_ISA_SSE4_1, 0, "__builtin_ia32_vec_set_v4sf", ++ V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF); ++ ++ def_builtin_const (OPTION_MASK_ISA_SSE4_1, 0, "__builtin_ia32_vec_set_v4si", ++ V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI); ++ ++ def_builtin_const (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_vec_set_v8hi", ++ V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI); ++ ++ def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A ++ /* As it uses V4HImode, we have to require -mmmx too. */ ++ | OPTION_MASK_ISA_MMX, 0, ++ "__builtin_ia32_vec_set_v4hi", ++ V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI); ++ ++ def_builtin_const (OPTION_MASK_ISA_SSE4_1, 0, "__builtin_ia32_vec_set_v16qi", ++ V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI); ++ ++ /* RDSEED */ ++ def_builtin (OPTION_MASK_ISA_RDSEED, 0, "__builtin_ia32_rdseed_hi_step", ++ INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP); ++ def_builtin (OPTION_MASK_ISA_RDSEED, 0, "__builtin_ia32_rdseed_si_step", ++ INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP); ++ def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT, 0, ++ "__builtin_ia32_rdseed_di_step", ++ INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP); ++ ++ /* ADCX */ ++ def_builtin (0, 0, "__builtin_ia32_addcarryx_u32", ++ UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32); ++ def_builtin (OPTION_MASK_ISA_64BIT, 0, ++ "__builtin_ia32_addcarryx_u64", ++ UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG, ++ IX86_BUILTIN_ADDCARRYX64); ++ ++ /* SBB */ ++ def_builtin (0, 0, "__builtin_ia32_sbb_u32", ++ UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32); ++ def_builtin (OPTION_MASK_ISA_64BIT, 0, ++ "__builtin_ia32_sbb_u64", ++ UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG, ++ IX86_BUILTIN_SBB64); ++ ++ /* Read/write FLAGS. */ ++ if (TARGET_64BIT) ++ { ++ def_builtin (OPTION_MASK_ISA_64BIT, 0, "__builtin_ia32_readeflags_u64", ++ UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS); ++ def_builtin (OPTION_MASK_ISA_64BIT, 0, "__builtin_ia32_writeeflags_u64", ++ VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS); ++ } ++ else ++ { ++ def_builtin (0, 0, "__builtin_ia32_readeflags_u32", ++ UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS); ++ def_builtin (0, 0, "__builtin_ia32_writeeflags_u32", ++ VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS); ++ } ++ ++ /* CLFLUSHOPT. */ ++ def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, 0, "__builtin_ia32_clflushopt", ++ VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT); ++ ++ /* CLWB. */ ++ def_builtin (OPTION_MASK_ISA_CLWB, 0, "__builtin_ia32_clwb", ++ VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB); ++ ++ /* MONITORX and MWAITX. */ ++ def_builtin (0, OPTION_MASK_ISA_MWAITX, "__builtin_ia32_monitorx", ++ VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITORX); ++ def_builtin (0, OPTION_MASK_ISA_MWAITX, "__builtin_ia32_mwaitx", ++ VOID_FTYPE_UNSIGNED_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAITX); ++ ++ /* CLZERO. */ ++ def_builtin (0, OPTION_MASK_ISA_CLZERO, "__builtin_ia32_clzero", ++ VOID_FTYPE_PCVOID, IX86_BUILTIN_CLZERO); ++ ++ /* WAITPKG. */ ++ def_builtin (0, OPTION_MASK_ISA_WAITPKG, "__builtin_ia32_umonitor", ++ VOID_FTYPE_PVOID, IX86_BUILTIN_UMONITOR); ++ def_builtin (0, OPTION_MASK_ISA_WAITPKG, "__builtin_ia32_umwait", ++ UINT8_FTYPE_UNSIGNED_UINT64, IX86_BUILTIN_UMWAIT); ++ def_builtin (0, OPTION_MASK_ISA_WAITPKG, "__builtin_ia32_tpause", ++ UINT8_FTYPE_UNSIGNED_UINT64, IX86_BUILTIN_TPAUSE); ++ ++ /* CLDEMOTE. */ ++ def_builtin (0, OPTION_MASK_ISA_CLDEMOTE, "__builtin_ia32_cldemote", ++ VOID_FTYPE_PCVOID, IX86_BUILTIN_CLDEMOTE); ++ ++ /* Add FMA4 multi-arg argument instructions */ ++ for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++) ++ { ++ BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_MULTI_ARG_FIRST, i); ++ if (d->name == 0) ++ continue; ++ ++ ftype = (enum ix86_builtin_func_type) d->flag; ++ def_builtin_const (d->mask, d->mask2, d->name, ftype, d->code); ++ } ++ BDESC_VERIFYS (IX86_BUILTIN__BDESC_MULTI_ARG_LAST, ++ IX86_BUILTIN__BDESC_MULTI_ARG_FIRST, ++ ARRAY_SIZE (bdesc_multi_arg) - 1); ++ ++ /* Add CET inrinsics. */ ++ for (i = 0, d = bdesc_cet; i < ARRAY_SIZE (bdesc_cet); i++, d++) ++ { ++ BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_CET_FIRST, i); ++ if (d->name == 0) ++ continue; ++ ++ ftype = (enum ix86_builtin_func_type) d->flag; ++ def_builtin (d->mask, d->mask2, d->name, ftype, d->code); ++ } ++ BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_LAST, ++ IX86_BUILTIN__BDESC_CET_FIRST, ++ ARRAY_SIZE (bdesc_cet) - 1); ++ ++ for (i = 0, d = bdesc_cet_rdssp; ++ i < ARRAY_SIZE (bdesc_cet_rdssp); ++ i++, d++) ++ { ++ BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_CET_NORMAL_FIRST, i); ++ if (d->name == 0) ++ continue; ++ ++ ftype = (enum ix86_builtin_func_type) d->flag; ++ def_builtin (d->mask, d->mask2, d->name, ftype, d->code); ++ } ++ BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_NORMAL_LAST, ++ IX86_BUILTIN__BDESC_CET_NORMAL_FIRST, ++ ARRAY_SIZE (bdesc_cet_rdssp) - 1); ++} ++ ++#undef BDESC_VERIFY ++#undef BDESC_VERIFYS ++ ++/* Make builtins to detect cpu type and features supported. NAME is ++ the builtin name, CODE is the builtin code, and FTYPE is the function ++ type of the builtin. */ ++ ++static void ++make_cpu_type_builtin (const char* name, int code, ++ enum ix86_builtin_func_type ftype, bool is_const) ++{ ++ tree decl; ++ tree type; ++ ++ type = ix86_get_builtin_func_type (ftype); ++ decl = add_builtin_function (name, type, code, BUILT_IN_MD, ++ NULL, NULL_TREE); ++ gcc_assert (decl != NULL_TREE); ++ ix86_builtins[(int) code] = decl; ++ TREE_READONLY (decl) = is_const; ++} ++ ++/* Make builtins to get CPU type and features supported. The created ++ builtins are : ++ ++ __builtin_cpu_init (), to detect cpu type and features, ++ __builtin_cpu_is (""), to check if cpu is of type , ++ __builtin_cpu_supports (""), to check if cpu supports ++ */ ++ ++static void ++ix86_init_platform_type_builtins (void) ++{ ++ make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT, ++ INT_FTYPE_VOID, false); ++ make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS, ++ INT_FTYPE_PCCHAR, true); ++ make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS, ++ INT_FTYPE_PCCHAR, true); ++} ++ ++/* Internal method for ix86_init_builtins. */ ++ ++static void ++ix86_init_builtins_va_builtins_abi (void) ++{ ++ tree ms_va_ref, sysv_va_ref; ++ tree fnvoid_va_end_ms, fnvoid_va_end_sysv; ++ tree fnvoid_va_start_ms, fnvoid_va_start_sysv; ++ tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv; ++ tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE; ++ ++ if (!TARGET_64BIT) ++ return; ++ fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE); ++ fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE); ++ ms_va_ref = build_reference_type (ms_va_list_type_node); ++ sysv_va_ref = build_pointer_type (TREE_TYPE (sysv_va_list_type_node)); ++ ++ fnvoid_va_end_ms = build_function_type_list (void_type_node, ms_va_ref, ++ NULL_TREE); ++ fnvoid_va_start_ms ++ = build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE); ++ fnvoid_va_end_sysv ++ = build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE); ++ fnvoid_va_start_sysv ++ = build_varargs_function_type_list (void_type_node, sysv_va_ref, ++ NULL_TREE); ++ fnvoid_va_copy_ms ++ = build_function_type_list (void_type_node, ms_va_ref, ++ ms_va_list_type_node, NULL_TREE); ++ fnvoid_va_copy_sysv ++ = build_function_type_list (void_type_node, sysv_va_ref, ++ sysv_va_ref, NULL_TREE); ++ ++ add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms, ++ BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms); ++ add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms, ++ BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms); ++ add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms, ++ BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms); ++ add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv, ++ BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv); ++ add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv, ++ BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv); ++ add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv, ++ BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv); ++} ++ ++static void ++ix86_init_builtin_types (void) ++{ ++ tree float80_type_node, const_string_type_node; ++ ++ /* The __float80 type. */ ++ float80_type_node = long_double_type_node; ++ if (TYPE_MODE (float80_type_node) != XFmode) ++ { ++ if (float64x_type_node != NULL_TREE ++ && TYPE_MODE (float64x_type_node) == XFmode) ++ float80_type_node = float64x_type_node; ++ else ++ { ++ /* The __float80 type. */ ++ float80_type_node = make_node (REAL_TYPE); ++ ++ TYPE_PRECISION (float80_type_node) = 80; ++ layout_type (float80_type_node); ++ } ++ } ++ lang_hooks.types.register_builtin_type (float80_type_node, "__float80"); ++ ++ /* The __float128 type. The node has already been created as ++ _Float128, so we only need to register the __float128 name for ++ it. */ ++ lang_hooks.types.register_builtin_type (float128_type_node, "__float128"); ++ ++ const_string_type_node ++ = build_pointer_type (build_qualified_type ++ (char_type_node, TYPE_QUAL_CONST)); ++ ++ /* This macro is built by i386-builtin-types.awk. */ ++ DEFINE_BUILTIN_PRIMITIVE_TYPES; ++} ++ ++void ++ix86_init_builtins (void) ++{ ++ tree ftype, decl; ++ ++ ix86_init_builtin_types (); ++ ++ /* Builtins to get CPU type and features. */ ++ ix86_init_platform_type_builtins (); ++ ++ /* TFmode support builtins. */ ++ def_builtin_const (0, 0, "__builtin_infq", ++ FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ); ++ def_builtin_const (0, 0, "__builtin_huge_valq", ++ FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ); ++ ++ ftype = ix86_get_builtin_func_type (FLOAT128_FTYPE_CONST_STRING); ++ decl = add_builtin_function ("__builtin_nanq", ftype, IX86_BUILTIN_NANQ, ++ BUILT_IN_MD, "nanq", NULL_TREE); ++ TREE_READONLY (decl) = 1; ++ ix86_builtins[(int) IX86_BUILTIN_NANQ] = decl; ++ ++ decl = add_builtin_function ("__builtin_nansq", ftype, IX86_BUILTIN_NANSQ, ++ BUILT_IN_MD, "nansq", NULL_TREE); ++ TREE_READONLY (decl) = 1; ++ ix86_builtins[(int) IX86_BUILTIN_NANSQ] = decl; ++ ++ /* We will expand them to normal call if SSE isn't available since ++ they are used by libgcc. */ ++ ftype = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128); ++ decl = add_builtin_function ("__builtin_fabsq", ftype, IX86_BUILTIN_FABSQ, ++ BUILT_IN_MD, "__fabstf2", NULL_TREE); ++ TREE_READONLY (decl) = 1; ++ ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl; ++ ++ ftype = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128); ++ decl = add_builtin_function ("__builtin_copysignq", ftype, ++ IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD, ++ "__copysigntf3", NULL_TREE); ++ TREE_READONLY (decl) = 1; ++ ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl; ++ ++ ix86_init_tm_builtins (); ++ ix86_init_mmx_sse_builtins (); ++ ++ if (TARGET_LP64) ++ ix86_init_builtins_va_builtins_abi (); ++ ++#ifdef SUBTARGET_INIT_BUILTINS ++ SUBTARGET_INIT_BUILTINS; ++#endif ++} ++ ++/* Return the ix86 builtin for CODE. */ ++ ++tree ++ix86_builtin_decl (unsigned code, bool) ++{ ++ if (code >= IX86_BUILTIN_MAX) ++ return error_mark_node; ++ ++ return ix86_builtins[code]; ++} ++ ++/* This returns the target-specific builtin with code CODE if ++ current_function_decl has visibility on this builtin, which is checked ++ using isa flags. Returns NULL_TREE otherwise. */ ++ ++static tree ix86_get_builtin (enum ix86_builtins code) ++{ ++ struct cl_target_option *opts; ++ tree target_tree = NULL_TREE; ++ ++ /* Determine the isa flags of current_function_decl. */ ++ ++ if (current_function_decl) ++ target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl); ++ ++ if (target_tree == NULL) ++ target_tree = target_option_default_node; ++ ++ opts = TREE_TARGET_OPTION (target_tree); ++ ++ if ((ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags) ++ || (ix86_builtins_isa[(int) code].isa2 & opts->x_ix86_isa_flags2)) ++ return ix86_builtin_decl (code, true); ++ else ++ return NULL_TREE; ++} ++ ++/* Vectorization library interface and handlers. */ ++tree (*ix86_veclib_handler) (combined_fn, tree, tree); ++ ++/* Returns a function decl for a vectorized version of the combined function ++ with combined_fn code FN and the result vector type TYPE, or NULL_TREE ++ if it is not available. */ ++ ++tree ++ix86_builtin_vectorized_function (unsigned int fn, tree type_out, ++ tree type_in) ++{ ++ machine_mode in_mode, out_mode; ++ int in_n, out_n; ++ ++ if (TREE_CODE (type_out) != VECTOR_TYPE ++ || TREE_CODE (type_in) != VECTOR_TYPE) ++ return NULL_TREE; ++ ++ out_mode = TYPE_MODE (TREE_TYPE (type_out)); ++ out_n = TYPE_VECTOR_SUBPARTS (type_out); ++ in_mode = TYPE_MODE (TREE_TYPE (type_in)); ++ in_n = TYPE_VECTOR_SUBPARTS (type_in); ++ ++ switch (fn) ++ { ++ CASE_CFN_EXP2: ++ if (out_mode == SFmode && in_mode == SFmode) ++ { ++ if (out_n == 16 && in_n == 16) ++ return ix86_get_builtin (IX86_BUILTIN_EXP2PS); ++ } ++ break; ++ ++ CASE_CFN_IFLOOR: ++ CASE_CFN_LFLOOR: ++ CASE_CFN_LLFLOOR: ++ /* The round insn does not trap on denormals. */ ++ if (flag_trapping_math || !TARGET_SSE4_1) ++ break; ++ ++ if (out_mode == SImode && in_mode == DFmode) ++ { ++ if (out_n == 4 && in_n == 2) ++ return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX); ++ else if (out_n == 8 && in_n == 4) ++ return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256); ++ else if (out_n == 16 && in_n == 8) ++ return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512); ++ } ++ if (out_mode == SImode && in_mode == SFmode) ++ { ++ if (out_n == 4 && in_n == 4) ++ return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX); ++ else if (out_n == 8 && in_n == 8) ++ return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256); ++ else if (out_n == 16 && in_n == 16) ++ return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX512); ++ } ++ break; ++ ++ CASE_CFN_ICEIL: ++ CASE_CFN_LCEIL: ++ CASE_CFN_LLCEIL: ++ /* The round insn does not trap on denormals. */ ++ if (flag_trapping_math || !TARGET_SSE4_1) ++ break; ++ ++ if (out_mode == SImode && in_mode == DFmode) ++ { ++ if (out_n == 4 && in_n == 2) ++ return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX); ++ else if (out_n == 8 && in_n == 4) ++ return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256); ++ else if (out_n == 16 && in_n == 8) ++ return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512); ++ } ++ if (out_mode == SImode && in_mode == SFmode) ++ { ++ if (out_n == 4 && in_n == 4) ++ return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX); ++ else if (out_n == 8 && in_n == 8) ++ return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256); ++ else if (out_n == 16 && in_n == 16) ++ return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX512); ++ } ++ break; ++ ++ CASE_CFN_IRINT: ++ CASE_CFN_LRINT: ++ CASE_CFN_LLRINT: ++ if (out_mode == SImode && in_mode == DFmode) ++ { ++ if (out_n == 4 && in_n == 2) ++ return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX); ++ else if (out_n == 8 && in_n == 4) ++ return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256); ++ else if (out_n == 16 && in_n == 8) ++ return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX512); ++ } ++ if (out_mode == SImode && in_mode == SFmode) ++ { ++ if (out_n == 4 && in_n == 4) ++ return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ); ++ else if (out_n == 8 && in_n == 8) ++ return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256); ++ else if (out_n == 16 && in_n == 16) ++ return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ512); ++ } ++ break; ++ ++ CASE_CFN_IROUND: ++ CASE_CFN_LROUND: ++ CASE_CFN_LLROUND: ++ /* The round insn does not trap on denormals. */ ++ if (flag_trapping_math || !TARGET_SSE4_1) ++ break; ++ ++ if (out_mode == SImode && in_mode == DFmode) ++ { ++ if (out_n == 4 && in_n == 2) ++ return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX); ++ else if (out_n == 8 && in_n == 4) ++ return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256); ++ else if (out_n == 16 && in_n == 8) ++ return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512); ++ } ++ if (out_mode == SImode && in_mode == SFmode) ++ { ++ if (out_n == 4 && in_n == 4) ++ return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX); ++ else if (out_n == 8 && in_n == 8) ++ return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256); ++ else if (out_n == 16 && in_n == 16) ++ return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX512); ++ } ++ break; ++ ++ CASE_CFN_FLOOR: ++ /* The round insn does not trap on denormals. */ ++ if (flag_trapping_math || !TARGET_SSE4_1) ++ break; ++ ++ if (out_mode == DFmode && in_mode == DFmode) ++ { ++ if (out_n == 2 && in_n == 2) ++ return ix86_get_builtin (IX86_BUILTIN_FLOORPD); ++ else if (out_n == 4 && in_n == 4) ++ return ix86_get_builtin (IX86_BUILTIN_FLOORPD256); ++ else if (out_n == 8 && in_n == 8) ++ return ix86_get_builtin (IX86_BUILTIN_FLOORPD512); ++ } ++ if (out_mode == SFmode && in_mode == SFmode) ++ { ++ if (out_n == 4 && in_n == 4) ++ return ix86_get_builtin (IX86_BUILTIN_FLOORPS); ++ else if (out_n == 8 && in_n == 8) ++ return ix86_get_builtin (IX86_BUILTIN_FLOORPS256); ++ else if (out_n == 16 && in_n == 16) ++ return ix86_get_builtin (IX86_BUILTIN_FLOORPS512); ++ } ++ break; ++ ++ CASE_CFN_CEIL: ++ /* The round insn does not trap on denormals. */ ++ if (flag_trapping_math || !TARGET_SSE4_1) ++ break; ++ ++ if (out_mode == DFmode && in_mode == DFmode) ++ { ++ if (out_n == 2 && in_n == 2) ++ return ix86_get_builtin (IX86_BUILTIN_CEILPD); ++ else if (out_n == 4 && in_n == 4) ++ return ix86_get_builtin (IX86_BUILTIN_CEILPD256); ++ else if (out_n == 8 && in_n == 8) ++ return ix86_get_builtin (IX86_BUILTIN_CEILPD512); ++ } ++ if (out_mode == SFmode && in_mode == SFmode) ++ { ++ if (out_n == 4 && in_n == 4) ++ return ix86_get_builtin (IX86_BUILTIN_CEILPS); ++ else if (out_n == 8 && in_n == 8) ++ return ix86_get_builtin (IX86_BUILTIN_CEILPS256); ++ else if (out_n == 16 && in_n == 16) ++ return ix86_get_builtin (IX86_BUILTIN_CEILPS512); ++ } ++ break; ++ ++ CASE_CFN_TRUNC: ++ /* The round insn does not trap on denormals. */ ++ if (flag_trapping_math || !TARGET_SSE4_1) ++ break; ++ ++ if (out_mode == DFmode && in_mode == DFmode) ++ { ++ if (out_n == 2 && in_n == 2) ++ return ix86_get_builtin (IX86_BUILTIN_TRUNCPD); ++ else if (out_n == 4 && in_n == 4) ++ return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256); ++ else if (out_n == 8 && in_n == 8) ++ return ix86_get_builtin (IX86_BUILTIN_TRUNCPD512); ++ } ++ if (out_mode == SFmode && in_mode == SFmode) ++ { ++ if (out_n == 4 && in_n == 4) ++ return ix86_get_builtin (IX86_BUILTIN_TRUNCPS); ++ else if (out_n == 8 && in_n == 8) ++ return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256); ++ else if (out_n == 16 && in_n == 16) ++ return ix86_get_builtin (IX86_BUILTIN_TRUNCPS512); ++ } ++ break; ++ ++ CASE_CFN_RINT: ++ /* The round insn does not trap on denormals. */ ++ if (flag_trapping_math || !TARGET_SSE4_1) ++ break; ++ ++ if (out_mode == DFmode && in_mode == DFmode) ++ { ++ if (out_n == 2 && in_n == 2) ++ return ix86_get_builtin (IX86_BUILTIN_RINTPD); ++ else if (out_n == 4 && in_n == 4) ++ return ix86_get_builtin (IX86_BUILTIN_RINTPD256); ++ } ++ if (out_mode == SFmode && in_mode == SFmode) ++ { ++ if (out_n == 4 && in_n == 4) ++ return ix86_get_builtin (IX86_BUILTIN_RINTPS); ++ else if (out_n == 8 && in_n == 8) ++ return ix86_get_builtin (IX86_BUILTIN_RINTPS256); ++ } ++ break; ++ ++ CASE_CFN_FMA: ++ if (out_mode == DFmode && in_mode == DFmode) ++ { ++ if (out_n == 2 && in_n == 2) ++ return ix86_get_builtin (IX86_BUILTIN_VFMADDPD); ++ if (out_n == 4 && in_n == 4) ++ return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256); ++ } ++ if (out_mode == SFmode && in_mode == SFmode) ++ { ++ if (out_n == 4 && in_n == 4) ++ return ix86_get_builtin (IX86_BUILTIN_VFMADDPS); ++ if (out_n == 8 && in_n == 8) ++ return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256); ++ } ++ break; ++ ++ default: ++ break; ++ } ++ ++ /* Dispatch to a handler for a vectorization library. */ ++ if (ix86_veclib_handler) ++ return ix86_veclib_handler (combined_fn (fn), type_out, type_in); ++ ++ return NULL_TREE; ++} ++ ++/* Returns a decl of a function that implements gather load with ++ memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE. ++ Return NULL_TREE if it is not available. */ ++ ++tree ++ix86_vectorize_builtin_gather (const_tree mem_vectype, ++ const_tree index_type, int scale) ++{ ++ bool si; ++ enum ix86_builtins code; ++ ++ if (! TARGET_AVX2 || !TARGET_USE_GATHER) ++ return NULL_TREE; ++ ++ if ((TREE_CODE (index_type) != INTEGER_TYPE ++ && !POINTER_TYPE_P (index_type)) ++ || (TYPE_MODE (index_type) != SImode ++ && TYPE_MODE (index_type) != DImode)) ++ return NULL_TREE; ++ ++ if (TYPE_PRECISION (index_type) > POINTER_SIZE) ++ return NULL_TREE; ++ ++ /* v*gather* insn sign extends index to pointer mode. */ ++ if (TYPE_PRECISION (index_type) < POINTER_SIZE ++ && TYPE_UNSIGNED (index_type)) ++ return NULL_TREE; ++ ++ if (scale <= 0 ++ || scale > 8 ++ || (scale & (scale - 1)) != 0) ++ return NULL_TREE; ++ ++ si = TYPE_MODE (index_type) == SImode; ++ switch (TYPE_MODE (mem_vectype)) ++ { ++ case E_V2DFmode: ++ if (TARGET_AVX512VL) ++ code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF; ++ else ++ code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF; ++ break; ++ case E_V4DFmode: ++ if (TARGET_AVX512VL) ++ code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF; ++ else ++ code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF; ++ break; ++ case E_V2DImode: ++ if (TARGET_AVX512VL) ++ code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI; ++ else ++ code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI; ++ break; ++ case E_V4DImode: ++ if (TARGET_AVX512VL) ++ code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI; ++ else ++ code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI; ++ break; ++ case E_V4SFmode: ++ if (TARGET_AVX512VL) ++ code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF; ++ else ++ code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF; ++ break; ++ case E_V8SFmode: ++ if (TARGET_AVX512VL) ++ code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF; ++ else ++ code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF; ++ break; ++ case E_V4SImode: ++ if (TARGET_AVX512VL) ++ code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI; ++ else ++ code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI; ++ break; ++ case E_V8SImode: ++ if (TARGET_AVX512VL) ++ code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI; ++ else ++ code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI; ++ break; ++ case E_V8DFmode: ++ if (TARGET_AVX512F) ++ code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF; ++ else ++ return NULL_TREE; ++ break; ++ case E_V8DImode: ++ if (TARGET_AVX512F) ++ code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI; ++ else ++ return NULL_TREE; ++ break; ++ case E_V16SFmode: ++ if (TARGET_AVX512F) ++ code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF; ++ else ++ return NULL_TREE; ++ break; ++ case E_V16SImode: ++ if (TARGET_AVX512F) ++ code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI; ++ else ++ return NULL_TREE; ++ break; ++ default: ++ return NULL_TREE; ++ } ++ ++ return ix86_get_builtin (code); ++} ++ ++/* Returns a code for a target-specific builtin that implements ++ reciprocal of the function, or NULL_TREE if not available. */ ++ ++tree ++ix86_builtin_reciprocal (tree fndecl) ++{ ++ enum ix86_builtins fn_code ++ = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl); ++ switch (fn_code) ++ { ++ /* Vectorized version of sqrt to rsqrt conversion. */ ++ case IX86_BUILTIN_SQRTPS_NR: ++ return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR); ++ ++ case IX86_BUILTIN_SQRTPS_NR256: ++ return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256); ++ ++ default: ++ return NULL_TREE; ++ } ++} ++ ++/* Priority of i386 features, greater value is higher priority. This is ++ used to decide the order in which function dispatch must happen. For ++ instance, a version specialized for SSE4.2 should be checked for dispatch ++ before a version for SSE3, as SSE4.2 implies SSE3. */ ++enum feature_priority ++{ ++ P_ZERO = 0, ++ P_MMX, ++ P_SSE, ++ P_SSE2, ++ P_SSE3, ++ P_SSSE3, ++ P_PROC_SSSE3, ++ P_SSE4_A, ++ P_PROC_SSE4_A, ++ P_SSE4_1, ++ P_SSE4_2, ++ P_PROC_SSE4_2, ++ P_POPCNT, ++ P_AES, ++ P_PCLMUL, ++ P_AVX, ++ P_PROC_AVX, ++ P_BMI, ++ P_PROC_BMI, ++ P_FMA4, ++ P_XOP, ++ P_PROC_XOP, ++ P_FMA, ++ P_PROC_FMA, ++ P_BMI2, ++ P_AVX2, ++ P_PROC_AVX2, ++ P_AVX512F, ++ P_PROC_AVX512F ++}; ++ ++/* This is the order of bit-fields in __processor_features in cpuinfo.c */ ++enum processor_features ++{ ++ F_CMOV = 0, ++ F_MMX, ++ F_POPCNT, ++ F_SSE, ++ F_SSE2, ++ F_SSE3, ++ F_SSSE3, ++ F_SSE4_1, ++ F_SSE4_2, ++ F_AVX, ++ F_AVX2, ++ F_SSE4_A, ++ F_FMA4, ++ F_XOP, ++ F_FMA, ++ F_AVX512F, ++ F_BMI, ++ F_BMI2, ++ F_AES, ++ F_PCLMUL, ++ F_AVX512VL, ++ F_AVX512BW, ++ F_AVX512DQ, ++ F_AVX512CD, ++ F_AVX512ER, ++ F_AVX512PF, ++ F_AVX512VBMI, ++ F_AVX512IFMA, ++ F_AVX5124VNNIW, ++ F_AVX5124FMAPS, ++ F_AVX512VPOPCNTDQ, ++ F_AVX512VBMI2, ++ F_GFNI, ++ F_VPCLMULQDQ, ++ F_AVX512VNNI, ++ F_AVX512BITALG, ++ F_MAX ++}; ++ ++/* These are the values for vendor types and cpu types and subtypes ++ in cpuinfo.c. Cpu types and subtypes should be subtracted by ++ the corresponding start value. */ ++enum processor_model ++{ ++ M_INTEL = 1, ++ M_AMD, ++ M_CPU_TYPE_START, ++ M_INTEL_BONNELL, ++ M_INTEL_CORE2, ++ M_INTEL_COREI7, ++ M_AMDFAM10H, ++ M_AMDFAM15H, ++ M_INTEL_SILVERMONT, ++ M_INTEL_KNL, ++ M_AMD_BTVER1, ++ M_AMD_BTVER2, ++ M_AMDFAM17H, ++ M_INTEL_KNM, ++ M_INTEL_GOLDMONT, ++ M_INTEL_GOLDMONT_PLUS, ++ M_INTEL_TREMONT, ++ M_CPU_SUBTYPE_START, ++ M_INTEL_COREI7_NEHALEM, ++ M_INTEL_COREI7_WESTMERE, ++ M_INTEL_COREI7_SANDYBRIDGE, ++ M_AMDFAM10H_BARCELONA, ++ M_AMDFAM10H_SHANGHAI, ++ M_AMDFAM10H_ISTANBUL, ++ M_AMDFAM15H_BDVER1, ++ M_AMDFAM15H_BDVER2, ++ M_AMDFAM15H_BDVER3, ++ M_AMDFAM15H_BDVER4, ++ M_AMDFAM17H_ZNVER1, ++ M_INTEL_COREI7_IVYBRIDGE, ++ M_INTEL_COREI7_HASWELL, ++ M_INTEL_COREI7_BROADWELL, ++ M_INTEL_COREI7_SKYLAKE, ++ M_INTEL_COREI7_SKYLAKE_AVX512, ++ M_INTEL_COREI7_CANNONLAKE, ++ M_INTEL_COREI7_ICELAKE_CLIENT, ++ M_INTEL_COREI7_ICELAKE_SERVER, ++ M_AMDFAM17H_ZNVER2, ++ M_INTEL_COREI7_CASCADELAKE ++}; ++ ++struct _arch_names_table ++{ ++ const char *const name; ++ const enum processor_model model; ++}; ++ ++static const _arch_names_table arch_names_table[] = ++{ ++ {"amd", M_AMD}, ++ {"intel", M_INTEL}, ++ {"atom", M_INTEL_BONNELL}, ++ {"slm", M_INTEL_SILVERMONT}, ++ {"core2", M_INTEL_CORE2}, ++ {"corei7", M_INTEL_COREI7}, ++ {"nehalem", M_INTEL_COREI7_NEHALEM}, ++ {"westmere", M_INTEL_COREI7_WESTMERE}, ++ {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE}, ++ {"ivybridge", M_INTEL_COREI7_IVYBRIDGE}, ++ {"haswell", M_INTEL_COREI7_HASWELL}, ++ {"broadwell", M_INTEL_COREI7_BROADWELL}, ++ {"skylake", M_INTEL_COREI7_SKYLAKE}, ++ {"skylake-avx512", M_INTEL_COREI7_SKYLAKE_AVX512}, ++ {"cannonlake", M_INTEL_COREI7_CANNONLAKE}, ++ {"icelake-client", M_INTEL_COREI7_ICELAKE_CLIENT}, ++ {"icelake-server", M_INTEL_COREI7_ICELAKE_SERVER}, ++ {"cascadelake", M_INTEL_COREI7_CASCADELAKE}, ++ {"bonnell", M_INTEL_BONNELL}, ++ {"silvermont", M_INTEL_SILVERMONT}, ++ {"goldmont", M_INTEL_GOLDMONT}, ++ {"goldmont-plus", M_INTEL_GOLDMONT_PLUS}, ++ {"tremont", M_INTEL_TREMONT}, ++ {"knl", M_INTEL_KNL}, ++ {"knm", M_INTEL_KNM}, ++ {"amdfam10h", M_AMDFAM10H}, ++ {"barcelona", M_AMDFAM10H_BARCELONA}, ++ {"shanghai", M_AMDFAM10H_SHANGHAI}, ++ {"istanbul", M_AMDFAM10H_ISTANBUL}, ++ {"btver1", M_AMD_BTVER1}, ++ {"amdfam15h", M_AMDFAM15H}, ++ {"bdver1", M_AMDFAM15H_BDVER1}, ++ {"bdver2", M_AMDFAM15H_BDVER2}, ++ {"bdver3", M_AMDFAM15H_BDVER3}, ++ {"bdver4", M_AMDFAM15H_BDVER4}, ++ {"btver2", M_AMD_BTVER2}, ++ {"amdfam17h", M_AMDFAM17H}, ++ {"znver1", M_AMDFAM17H_ZNVER1}, ++ {"znver2", M_AMDFAM17H_ZNVER2}, ++}; ++ ++/* These are the target attribute strings for which a dispatcher is ++ available, from fold_builtin_cpu. */ ++struct _isa_names_table ++{ ++ const char *const name; ++ const enum processor_features feature; ++ const enum feature_priority priority; ++}; ++ ++static const _isa_names_table isa_names_table[] = ++{ ++ {"cmov", F_CMOV, P_ZERO}, ++ {"mmx", F_MMX, P_MMX}, ++ {"popcnt", F_POPCNT, P_POPCNT}, ++ {"sse", F_SSE, P_SSE}, ++ {"sse2", F_SSE2, P_SSE2}, ++ {"sse3", F_SSE3, P_SSE3}, ++ {"ssse3", F_SSSE3, P_SSSE3}, ++ {"sse4a", F_SSE4_A, P_SSE4_A}, ++ {"sse4.1", F_SSE4_1, P_SSE4_1}, ++ {"sse4.2", F_SSE4_2, P_SSE4_2}, ++ {"avx", F_AVX, P_AVX}, ++ {"fma4", F_FMA4, P_FMA4}, ++ {"xop", F_XOP, P_XOP}, ++ {"fma", F_FMA, P_FMA}, ++ {"avx2", F_AVX2, P_AVX2}, ++ {"avx512f", F_AVX512F, P_AVX512F}, ++ {"bmi", F_BMI, P_BMI}, ++ {"bmi2", F_BMI2, P_BMI2}, ++ {"aes", F_AES, P_AES}, ++ {"pclmul", F_PCLMUL, P_PCLMUL}, ++ {"avx512vl",F_AVX512VL, P_ZERO}, ++ {"avx512bw",F_AVX512BW, P_ZERO}, ++ {"avx512dq",F_AVX512DQ, P_ZERO}, ++ {"avx512cd",F_AVX512CD, P_ZERO}, ++ {"avx512er",F_AVX512ER, P_ZERO}, ++ {"avx512pf",F_AVX512PF, P_ZERO}, ++ {"avx512vbmi",F_AVX512VBMI, P_ZERO}, ++ {"avx512ifma",F_AVX512IFMA, P_ZERO}, ++ {"avx5124vnniw",F_AVX5124VNNIW, P_ZERO}, ++ {"avx5124fmaps",F_AVX5124FMAPS, P_ZERO}, ++ {"avx512vpopcntdq",F_AVX512VPOPCNTDQ, P_ZERO}, ++ {"avx512vbmi2", F_AVX512VBMI2, P_ZERO}, ++ {"gfni", F_GFNI, P_ZERO}, ++ {"vpclmulqdq", F_VPCLMULQDQ, P_ZERO}, ++ {"avx512vnni", F_AVX512VNNI, P_ZERO}, ++ {"avx512bitalg", F_AVX512BITALG, P_ZERO} ++}; ++ ++/* This parses the attribute arguments to target in DECL and determines ++ the right builtin to use to match the platform specification. ++ It returns the priority value for this version decl. If PREDICATE_LIST ++ is not NULL, it stores the list of cpu features that need to be checked ++ before dispatching this function. */ ++ ++unsigned int ++get_builtin_code_for_version (tree decl, tree *predicate_list) ++{ ++ tree attrs; ++ struct cl_target_option cur_target; ++ tree target_node; ++ struct cl_target_option *new_target; ++ const char *arg_str = NULL; ++ const char *attrs_str = NULL; ++ char *tok_str = NULL; ++ char *token; ++ ++ enum feature_priority priority = P_ZERO; ++ ++ static unsigned int NUM_FEATURES ++ = sizeof (isa_names_table) / sizeof (_isa_names_table); ++ ++ unsigned int i; ++ ++ tree predicate_chain = NULL_TREE; ++ tree predicate_decl, predicate_arg; ++ ++ attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl)); ++ gcc_assert (attrs != NULL); ++ ++ attrs = TREE_VALUE (TREE_VALUE (attrs)); ++ ++ gcc_assert (TREE_CODE (attrs) == STRING_CST); ++ attrs_str = TREE_STRING_POINTER (attrs); ++ ++ /* Return priority zero for default function. */ ++ if (strcmp (attrs_str, "default") == 0) ++ return 0; ++ ++ /* Handle arch= if specified. For priority, set it to be 1 more than ++ the best instruction set the processor can handle. For instance, if ++ there is a version for atom and a version for ssse3 (the highest ISA ++ priority for atom), the atom version must be checked for dispatch ++ before the ssse3 version. */ ++ if (strstr (attrs_str, "arch=") != NULL) ++ { ++ cl_target_option_save (&cur_target, &global_options); ++ target_node ++ = ix86_valid_target_attribute_tree (decl, attrs, &global_options, ++ &global_options_set, 0); ++ ++ gcc_assert (target_node); ++ if (target_node == error_mark_node) ++ return 0; ++ new_target = TREE_TARGET_OPTION (target_node); ++ gcc_assert (new_target); ++ ++ if (new_target->arch_specified && new_target->arch > 0) ++ { ++ switch (new_target->arch) ++ { ++ case PROCESSOR_CORE2: ++ arg_str = "core2"; ++ priority = P_PROC_SSSE3; ++ break; ++ case PROCESSOR_NEHALEM: ++ if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_PCLMUL) ++ { ++ arg_str = "westmere"; ++ priority = P_PCLMUL; ++ } ++ else ++ { ++ /* We translate "arch=corei7" and "arch=nehalem" to ++ "corei7" so that it will be mapped to M_INTEL_COREI7 ++ as cpu type to cover all M_INTEL_COREI7_XXXs. */ ++ arg_str = "corei7"; ++ priority = P_PROC_SSE4_2; ++ } ++ break; ++ case PROCESSOR_SANDYBRIDGE: ++ if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C) ++ arg_str = "ivybridge"; ++ else ++ arg_str = "sandybridge"; ++ priority = P_PROC_AVX; ++ break; ++ case PROCESSOR_HASWELL: ++ if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX) ++ arg_str = "broadwell"; ++ else ++ arg_str = "haswell"; ++ priority = P_PROC_AVX2; ++ break; ++ case PROCESSOR_SKYLAKE: ++ arg_str = "skylake"; ++ priority = P_PROC_AVX2; ++ break; ++ case PROCESSOR_SKYLAKE_AVX512: ++ arg_str = "skylake-avx512"; ++ priority = P_PROC_AVX512F; ++ break; ++ case PROCESSOR_CANNONLAKE: ++ arg_str = "cannonlake"; ++ priority = P_PROC_AVX512F; ++ break; ++ case PROCESSOR_ICELAKE_CLIENT: ++ arg_str = "icelake-client"; ++ priority = P_PROC_AVX512F; ++ break; ++ case PROCESSOR_ICELAKE_SERVER: ++ arg_str = "icelake-server"; ++ priority = P_PROC_AVX512F; ++ break; ++ case PROCESSOR_CASCADELAKE: ++ arg_str = "cascadelake"; ++ priority = P_PROC_AVX512F; ++ break; ++ case PROCESSOR_BONNELL: ++ arg_str = "bonnell"; ++ priority = P_PROC_SSSE3; ++ break; ++ case PROCESSOR_KNL: ++ arg_str = "knl"; ++ priority = P_PROC_AVX512F; ++ break; ++ case PROCESSOR_KNM: ++ arg_str = "knm"; ++ priority = P_PROC_AVX512F; ++ break; ++ case PROCESSOR_SILVERMONT: ++ arg_str = "silvermont"; ++ priority = P_PROC_SSE4_2; ++ break; ++ case PROCESSOR_GOLDMONT: ++ arg_str = "goldmont"; ++ priority = P_PROC_SSE4_2; ++ break; ++ case PROCESSOR_GOLDMONT_PLUS: ++ arg_str = "goldmont-plus"; ++ priority = P_PROC_SSE4_2; ++ break; ++ case PROCESSOR_TREMONT: ++ arg_str = "tremont"; ++ priority = P_PROC_SSE4_2; ++ break; ++ case PROCESSOR_AMDFAM10: ++ arg_str = "amdfam10h"; ++ priority = P_PROC_SSE4_A; ++ break; ++ case PROCESSOR_BTVER1: ++ arg_str = "btver1"; ++ priority = P_PROC_SSE4_A; ++ break; ++ case PROCESSOR_BTVER2: ++ arg_str = "btver2"; ++ priority = P_PROC_BMI; ++ break; ++ case PROCESSOR_BDVER1: ++ arg_str = "bdver1"; ++ priority = P_PROC_XOP; ++ break; ++ case PROCESSOR_BDVER2: ++ arg_str = "bdver2"; ++ priority = P_PROC_FMA; ++ break; ++ case PROCESSOR_BDVER3: ++ arg_str = "bdver3"; ++ priority = P_PROC_FMA; ++ break; ++ case PROCESSOR_BDVER4: ++ arg_str = "bdver4"; ++ priority = P_PROC_AVX2; ++ break; ++ case PROCESSOR_ZNVER1: ++ arg_str = "znver1"; ++ priority = P_PROC_AVX2; ++ break; ++ case PROCESSOR_ZNVER2: ++ arg_str = "znver2"; ++ priority = P_PROC_AVX2; ++ break; ++ } ++ } ++ ++ cl_target_option_restore (&global_options, &cur_target); ++ ++ if (predicate_list && arg_str == NULL) ++ { ++ error_at (DECL_SOURCE_LOCATION (decl), ++ "no dispatcher found for the versioning attributes"); ++ return 0; ++ } ++ ++ if (predicate_list) ++ { ++ predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS]; ++ /* For a C string literal the length includes the trailing NULL. */ ++ predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str); ++ predicate_chain = tree_cons (predicate_decl, predicate_arg, ++ predicate_chain); ++ } ++ } ++ ++ /* Process feature name. */ ++ tok_str = (char *) xmalloc (strlen (attrs_str) + 1); ++ strcpy (tok_str, attrs_str); ++ token = strtok (tok_str, ","); ++ predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS]; ++ ++ while (token != NULL) ++ { ++ /* Do not process "arch=" */ ++ if (strncmp (token, "arch=", 5) == 0) ++ { ++ token = strtok (NULL, ","); ++ continue; ++ } ++ for (i = 0; i < NUM_FEATURES; ++i) ++ { ++ if (strcmp (token, isa_names_table[i].name) == 0) ++ { ++ if (predicate_list) ++ { ++ predicate_arg = build_string_literal ( ++ strlen (isa_names_table[i].name) + 1, ++ isa_names_table[i].name); ++ predicate_chain = tree_cons (predicate_decl, predicate_arg, ++ predicate_chain); ++ } ++ /* Find the maximum priority feature. */ ++ if (isa_names_table[i].priority > priority) ++ priority = isa_names_table[i].priority; ++ ++ break; ++ } ++ } ++ if (predicate_list && priority == P_ZERO) ++ { ++ error_at (DECL_SOURCE_LOCATION (decl), ++ "ISA %qs is not supported in % attribute, " ++ "use % syntax", token); ++ return 0; ++ } ++ token = strtok (NULL, ","); ++ } ++ free (tok_str); ++ ++ if (predicate_list && predicate_chain == NULL_TREE) ++ { ++ error_at (DECL_SOURCE_LOCATION (decl), ++ "no dispatcher found for the versioning attributes: %s", ++ attrs_str); ++ return 0; ++ } ++ else if (predicate_list) ++ { ++ predicate_chain = nreverse (predicate_chain); ++ *predicate_list = predicate_chain; ++ } ++ ++ return priority; ++} ++ ++/* This builds the processor_model struct type defined in ++ libgcc/config/i386/cpuinfo.c */ ++ ++static tree ++build_processor_model_struct (void) ++{ ++ const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype", ++ "__cpu_features"}; ++ tree field = NULL_TREE, field_chain = NULL_TREE; ++ int i; ++ tree type = make_node (RECORD_TYPE); ++ ++ /* The first 3 fields are unsigned int. */ ++ for (i = 0; i < 3; ++i) ++ { ++ field = build_decl (UNKNOWN_LOCATION, FIELD_DECL, ++ get_identifier (field_name[i]), unsigned_type_node); ++ if (field_chain != NULL_TREE) ++ DECL_CHAIN (field) = field_chain; ++ field_chain = field; ++ } ++ ++ /* The last field is an array of unsigned integers of size one. */ ++ field = build_decl (UNKNOWN_LOCATION, FIELD_DECL, ++ get_identifier (field_name[3]), ++ build_array_type (unsigned_type_node, ++ build_index_type (size_one_node))); ++ if (field_chain != NULL_TREE) ++ DECL_CHAIN (field) = field_chain; ++ field_chain = field; ++ ++ finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE); ++ return type; ++} ++ ++/* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */ ++ ++static tree ++make_var_decl (tree type, const char *name) ++{ ++ tree new_decl; ++ ++ new_decl = build_decl (UNKNOWN_LOCATION, ++ VAR_DECL, ++ get_identifier(name), ++ type); ++ ++ DECL_EXTERNAL (new_decl) = 1; ++ TREE_STATIC (new_decl) = 1; ++ TREE_PUBLIC (new_decl) = 1; ++ DECL_INITIAL (new_decl) = 0; ++ DECL_ARTIFICIAL (new_decl) = 0; ++ DECL_PRESERVE_P (new_decl) = 1; ++ ++ make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl)); ++ assemble_variable (new_decl, 0, 0, 0); ++ ++ return new_decl; ++} ++ ++/* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded ++ into an integer defined in libgcc/config/i386/cpuinfo.c */ ++ ++tree ++fold_builtin_cpu (tree fndecl, tree *args) ++{ ++ unsigned int i; ++ enum ix86_builtins fn_code ++ = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl); ++ tree param_string_cst = NULL; ++ ++ tree __processor_model_type = build_processor_model_struct (); ++ tree __cpu_model_var = make_var_decl (__processor_model_type, ++ "__cpu_model"); ++ ++ ++ varpool_node::add (__cpu_model_var); ++ ++ gcc_assert ((args != NULL) && (*args != NULL)); ++ ++ param_string_cst = *args; ++ while (param_string_cst ++ && TREE_CODE (param_string_cst) != STRING_CST) ++ { ++ /* *args must be a expr that can contain other EXPRS leading to a ++ STRING_CST. */ ++ if (!EXPR_P (param_string_cst)) ++ { ++ error ("parameter to builtin must be a string constant or literal"); ++ return integer_zero_node; ++ } ++ param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0); ++ } ++ ++ gcc_assert (param_string_cst); ++ ++ if (fn_code == IX86_BUILTIN_CPU_IS) ++ { ++ tree ref; ++ tree field; ++ tree final; ++ ++ unsigned int field_val = 0; ++ unsigned int NUM_ARCH_NAMES ++ = sizeof (arch_names_table) / sizeof (struct _arch_names_table); ++ ++ for (i = 0; i < NUM_ARCH_NAMES; i++) ++ if (strcmp (arch_names_table[i].name, ++ TREE_STRING_POINTER (param_string_cst)) == 0) ++ break; ++ ++ if (i == NUM_ARCH_NAMES) ++ { ++ error ("parameter to builtin not valid: %s", ++ TREE_STRING_POINTER (param_string_cst)); ++ return integer_zero_node; ++ } ++ ++ field = TYPE_FIELDS (__processor_model_type); ++ field_val = arch_names_table[i].model; ++ ++ /* CPU types are stored in the next field. */ ++ if (field_val > M_CPU_TYPE_START ++ && field_val < M_CPU_SUBTYPE_START) ++ { ++ field = DECL_CHAIN (field); ++ field_val -= M_CPU_TYPE_START; ++ } ++ ++ /* CPU subtypes are stored in the next field. */ ++ if (field_val > M_CPU_SUBTYPE_START) ++ { ++ field = DECL_CHAIN ( DECL_CHAIN (field)); ++ field_val -= M_CPU_SUBTYPE_START; ++ } ++ ++ /* Get the appropriate field in __cpu_model. */ ++ ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var, ++ field, NULL_TREE); ++ ++ /* Check the value. */ ++ final = build2 (EQ_EXPR, unsigned_type_node, ref, ++ build_int_cstu (unsigned_type_node, field_val)); ++ return build1 (CONVERT_EXPR, integer_type_node, final); ++ } ++ else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS) ++ { ++ tree ref; ++ tree array_elt; ++ tree field; ++ tree final; ++ ++ unsigned int field_val = 0; ++ unsigned int NUM_ISA_NAMES ++ = sizeof (isa_names_table) / sizeof (struct _isa_names_table); ++ ++ for (i = 0; i < NUM_ISA_NAMES; i++) ++ if (strcmp (isa_names_table[i].name, ++ TREE_STRING_POINTER (param_string_cst)) == 0) ++ break; ++ ++ if (i == NUM_ISA_NAMES) ++ { ++ error ("parameter to builtin not valid: %s", ++ TREE_STRING_POINTER (param_string_cst)); ++ return integer_zero_node; ++ } ++ ++ if (isa_names_table[i].feature >= 32) ++ { ++ tree __cpu_features2_var = make_var_decl (unsigned_type_node, ++ "__cpu_features2"); ++ ++ varpool_node::add (__cpu_features2_var); ++ field_val = (1U << (isa_names_table[i].feature - 32)); ++ /* Return __cpu_features2 & field_val */ ++ final = build2 (BIT_AND_EXPR, unsigned_type_node, ++ __cpu_features2_var, ++ build_int_cstu (unsigned_type_node, field_val)); ++ return build1 (CONVERT_EXPR, integer_type_node, final); ++ } ++ ++ field = TYPE_FIELDS (__processor_model_type); ++ /* Get the last field, which is __cpu_features. */ ++ while (DECL_CHAIN (field)) ++ field = DECL_CHAIN (field); ++ ++ /* Get the appropriate field: __cpu_model.__cpu_features */ ++ ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var, ++ field, NULL_TREE); ++ ++ /* Access the 0th element of __cpu_features array. */ ++ array_elt = build4 (ARRAY_REF, unsigned_type_node, ref, ++ integer_zero_node, NULL_TREE, NULL_TREE); ++ ++ field_val = (1U << isa_names_table[i].feature); ++ /* Return __cpu_model.__cpu_features[0] & field_val */ ++ final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt, ++ build_int_cstu (unsigned_type_node, field_val)); ++ return build1 (CONVERT_EXPR, integer_type_node, final); ++ } ++ gcc_unreachable (); ++} ++ ++#include "gt-i386-builtins.h" +diff --git a/gcc/config/i386/i386-builtins.h b/gcc/config/i386/i386-builtins.h +new file mode 100644 +index 000000000..c0264e5bf +--- /dev/null ++++ b/gcc/config/i386/i386-builtins.h +@@ -0,0 +1,330 @@ ++/* Copyright (C) 1988-2019 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++#ifndef GCC_I386_BUILTINS_H ++#define GCC_I386_BUILTINS_H ++ ++/* The following file contains several enumerations and data structures ++ built from the definitions in i386-builtin-types.def. */ ++ ++#include "i386-builtin-types.inc" ++ ++/* Codes for all the SSE/MMX builtins. Builtins not mentioned in any ++ bdesc_* arrays below should come first, then builtins for each bdesc_* ++ array in ascending order, so that we can use direct array accesses. */ ++enum ix86_builtins ++{ ++ IX86_BUILTIN_MASKMOVQ, ++ IX86_BUILTIN_LDMXCSR, ++ IX86_BUILTIN_STMXCSR, ++ IX86_BUILTIN_MASKMOVDQU, ++ IX86_BUILTIN_PSLLDQ128, ++ IX86_BUILTIN_CLFLUSH, ++ IX86_BUILTIN_MONITOR, ++ IX86_BUILTIN_MWAIT, ++ IX86_BUILTIN_UMONITOR, ++ IX86_BUILTIN_UMWAIT, ++ IX86_BUILTIN_TPAUSE, ++ IX86_BUILTIN_CLZERO, ++ IX86_BUILTIN_CLDEMOTE, ++ IX86_BUILTIN_VEC_INIT_V2SI, ++ IX86_BUILTIN_VEC_INIT_V4HI, ++ IX86_BUILTIN_VEC_INIT_V8QI, ++ IX86_BUILTIN_VEC_EXT_V2DF, ++ IX86_BUILTIN_VEC_EXT_V2DI, ++ IX86_BUILTIN_VEC_EXT_V4SF, ++ IX86_BUILTIN_VEC_EXT_V4SI, ++ IX86_BUILTIN_VEC_EXT_V8HI, ++ IX86_BUILTIN_VEC_EXT_V2SI, ++ IX86_BUILTIN_VEC_EXT_V4HI, ++ IX86_BUILTIN_VEC_EXT_V16QI, ++ IX86_BUILTIN_VEC_SET_V2DI, ++ IX86_BUILTIN_VEC_SET_V4SF, ++ IX86_BUILTIN_VEC_SET_V4SI, ++ IX86_BUILTIN_VEC_SET_V8HI, ++ IX86_BUILTIN_VEC_SET_V4HI, ++ IX86_BUILTIN_VEC_SET_V16QI, ++ IX86_BUILTIN_GATHERSIV2DF, ++ IX86_BUILTIN_GATHERSIV4DF, ++ IX86_BUILTIN_GATHERDIV2DF, ++ IX86_BUILTIN_GATHERDIV4DF, ++ IX86_BUILTIN_GATHERSIV4SF, ++ IX86_BUILTIN_GATHERSIV8SF, ++ IX86_BUILTIN_GATHERDIV4SF, ++ IX86_BUILTIN_GATHERDIV8SF, ++ IX86_BUILTIN_GATHERSIV2DI, ++ IX86_BUILTIN_GATHERSIV4DI, ++ IX86_BUILTIN_GATHERDIV2DI, ++ IX86_BUILTIN_GATHERDIV4DI, ++ IX86_BUILTIN_GATHERSIV4SI, ++ IX86_BUILTIN_GATHERSIV8SI, ++ IX86_BUILTIN_GATHERDIV4SI, ++ IX86_BUILTIN_GATHERDIV8SI, ++ IX86_BUILTIN_GATHER3SIV8SF, ++ IX86_BUILTIN_GATHER3SIV4SF, ++ IX86_BUILTIN_GATHER3SIV4DF, ++ IX86_BUILTIN_GATHER3SIV2DF, ++ IX86_BUILTIN_GATHER3DIV8SF, ++ IX86_BUILTIN_GATHER3DIV4SF, ++ IX86_BUILTIN_GATHER3DIV4DF, ++ IX86_BUILTIN_GATHER3DIV2DF, ++ IX86_BUILTIN_GATHER3SIV8SI, ++ IX86_BUILTIN_GATHER3SIV4SI, ++ IX86_BUILTIN_GATHER3SIV4DI, ++ IX86_BUILTIN_GATHER3SIV2DI, ++ IX86_BUILTIN_GATHER3DIV8SI, ++ IX86_BUILTIN_GATHER3DIV4SI, ++ IX86_BUILTIN_GATHER3DIV4DI, ++ IX86_BUILTIN_GATHER3DIV2DI, ++ IX86_BUILTIN_SCATTERSIV8SF, ++ IX86_BUILTIN_SCATTERSIV4SF, ++ IX86_BUILTIN_SCATTERSIV4DF, ++ IX86_BUILTIN_SCATTERSIV2DF, ++ IX86_BUILTIN_SCATTERDIV8SF, ++ IX86_BUILTIN_SCATTERDIV4SF, ++ IX86_BUILTIN_SCATTERDIV4DF, ++ IX86_BUILTIN_SCATTERDIV2DF, ++ IX86_BUILTIN_SCATTERSIV8SI, ++ IX86_BUILTIN_SCATTERSIV4SI, ++ IX86_BUILTIN_SCATTERSIV4DI, ++ IX86_BUILTIN_SCATTERSIV2DI, ++ IX86_BUILTIN_SCATTERDIV8SI, ++ IX86_BUILTIN_SCATTERDIV4SI, ++ IX86_BUILTIN_SCATTERDIV4DI, ++ IX86_BUILTIN_SCATTERDIV2DI, ++ /* Alternate 4 and 8 element gather/scatter for the vectorizer ++ where all operands are 32-byte or 64-byte wide respectively. */ ++ IX86_BUILTIN_GATHERALTSIV4DF, ++ IX86_BUILTIN_GATHERALTDIV8SF, ++ IX86_BUILTIN_GATHERALTSIV4DI, ++ IX86_BUILTIN_GATHERALTDIV8SI, ++ IX86_BUILTIN_GATHER3ALTDIV16SF, ++ IX86_BUILTIN_GATHER3ALTDIV16SI, ++ IX86_BUILTIN_GATHER3ALTSIV4DF, ++ IX86_BUILTIN_GATHER3ALTDIV8SF, ++ IX86_BUILTIN_GATHER3ALTSIV4DI, ++ IX86_BUILTIN_GATHER3ALTDIV8SI, ++ IX86_BUILTIN_GATHER3ALTSIV8DF, ++ IX86_BUILTIN_GATHER3ALTSIV8DI, ++ IX86_BUILTIN_GATHER3DIV16SF, ++ IX86_BUILTIN_GATHER3DIV16SI, ++ IX86_BUILTIN_GATHER3DIV8DF, ++ IX86_BUILTIN_GATHER3DIV8DI, ++ IX86_BUILTIN_GATHER3SIV16SF, ++ IX86_BUILTIN_GATHER3SIV16SI, ++ IX86_BUILTIN_GATHER3SIV8DF, ++ IX86_BUILTIN_GATHER3SIV8DI, ++ IX86_BUILTIN_SCATTERALTSIV8DF, ++ IX86_BUILTIN_SCATTERALTDIV16SF, ++ IX86_BUILTIN_SCATTERALTSIV8DI, ++ IX86_BUILTIN_SCATTERALTDIV16SI, ++ IX86_BUILTIN_SCATTERALTSIV4DF, ++ IX86_BUILTIN_SCATTERALTDIV8SF, ++ IX86_BUILTIN_SCATTERALTSIV4DI, ++ IX86_BUILTIN_SCATTERALTDIV8SI, ++ IX86_BUILTIN_SCATTERALTSIV2DF, ++ IX86_BUILTIN_SCATTERALTDIV4SF, ++ IX86_BUILTIN_SCATTERALTSIV2DI, ++ IX86_BUILTIN_SCATTERALTDIV4SI, ++ IX86_BUILTIN_SCATTERDIV16SF, ++ IX86_BUILTIN_SCATTERDIV16SI, ++ IX86_BUILTIN_SCATTERDIV8DF, ++ IX86_BUILTIN_SCATTERDIV8DI, ++ IX86_BUILTIN_SCATTERSIV16SF, ++ IX86_BUILTIN_SCATTERSIV16SI, ++ IX86_BUILTIN_SCATTERSIV8DF, ++ IX86_BUILTIN_SCATTERSIV8DI, ++ IX86_BUILTIN_GATHERPFQPD, ++ IX86_BUILTIN_GATHERPFDPS, ++ IX86_BUILTIN_GATHERPFDPD, ++ IX86_BUILTIN_GATHERPFQPS, ++ IX86_BUILTIN_SCATTERPFDPD, ++ IX86_BUILTIN_SCATTERPFDPS, ++ IX86_BUILTIN_SCATTERPFQPD, ++ IX86_BUILTIN_SCATTERPFQPS, ++ IX86_BUILTIN_CLWB, ++ IX86_BUILTIN_CLFLUSHOPT, ++ IX86_BUILTIN_INFQ, ++ IX86_BUILTIN_HUGE_VALQ, ++ IX86_BUILTIN_NANQ, ++ IX86_BUILTIN_NANSQ, ++ IX86_BUILTIN_XABORT, ++ IX86_BUILTIN_ADDCARRYX32, ++ IX86_BUILTIN_ADDCARRYX64, ++ IX86_BUILTIN_SBB32, ++ IX86_BUILTIN_SBB64, ++ IX86_BUILTIN_RDRAND16_STEP, ++ IX86_BUILTIN_RDRAND32_STEP, ++ IX86_BUILTIN_RDRAND64_STEP, ++ IX86_BUILTIN_RDSEED16_STEP, ++ IX86_BUILTIN_RDSEED32_STEP, ++ IX86_BUILTIN_RDSEED64_STEP, ++ IX86_BUILTIN_MONITORX, ++ IX86_BUILTIN_MWAITX, ++ IX86_BUILTIN_CFSTRING, ++ IX86_BUILTIN_CPU_INIT, ++ IX86_BUILTIN_CPU_IS, ++ IX86_BUILTIN_CPU_SUPPORTS, ++ IX86_BUILTIN_READ_FLAGS, ++ IX86_BUILTIN_WRITE_FLAGS, ++ ++ /* All the remaining builtins are tracked in bdesc_* arrays in ++ i386-builtin.def. Don't add any IX86_BUILTIN_* enumerators after ++ this point. */ ++#define BDESC(mask, mask2, icode, name, code, comparison, flag) \ ++ code, ++#define BDESC_FIRST(kind, kindu, mask, mask2, icode, name, code, comparison, flag) \ ++ code, \ ++ IX86_BUILTIN__BDESC_##kindu##_FIRST = code, ++#define BDESC_END(kind, next_kind) ++ ++#include "i386-builtin.def" ++ ++#undef BDESC ++#undef BDESC_FIRST ++#undef BDESC_END ++ ++ IX86_BUILTIN_MAX, ++ ++ IX86_BUILTIN__BDESC_MAX_FIRST = IX86_BUILTIN_MAX, ++ ++ /* Now just the aliases for bdesc_* start/end. */ ++#define BDESC(mask, mask2, icode, name, code, comparison, flag) ++#define BDESC_FIRST(kind, kindu, mask, mask2, icode, name, code, comparison, flag) ++#define BDESC_END(kind, next_kind) \ ++ IX86_BUILTIN__BDESC_##kind##_LAST \ ++ = IX86_BUILTIN__BDESC_##next_kind##_FIRST - 1, ++ ++#include "i386-builtin.def" ++ ++#undef BDESC ++#undef BDESC_FIRST ++#undef BDESC_END ++ ++ /* Just to make sure there is no comma after the last enumerator. */ ++ IX86_BUILTIN__BDESC_MAX_LAST = IX86_BUILTIN__BDESC_MAX_FIRST ++}; ++ ++/* Table of all of the builtin functions that are possible with different ISA's ++ but are waiting to be built until a function is declared to use that ++ ISA. */ ++struct builtin_isa { ++ HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */ ++ HOST_WIDE_INT isa2; /* additional isa_flags this builtin is defined for */ ++ const char *name; /* function name */ ++ enum ix86_builtin_func_type tcode; /* type to use in the declaration */ ++ unsigned char const_p:1; /* true if the declaration is constant */ ++ unsigned char pure_p:1; /* true if the declaration has pure attribute */ ++ bool set_and_not_built_p; ++}; ++ ++/* Bits for builtin_description.flag. */ ++ ++/* Set when we don't support the comparison natively, and should ++ swap_comparison in order to support it. */ ++#define BUILTIN_DESC_SWAP_OPERANDS 1 ++ ++struct builtin_description ++{ ++ const HOST_WIDE_INT mask; ++ const HOST_WIDE_INT mask2; ++ const enum insn_code icode; ++ const char *const name; ++ const enum ix86_builtins code; ++ const enum rtx_code comparison; ++ const int flag; ++}; ++ ++#define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT ++#define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT ++#define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT ++#define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT ++#define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF ++#define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF ++#define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF ++#define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF ++#define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI ++#define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI ++#define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI ++#define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI ++#define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI ++#define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI ++#define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI ++#define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI ++#define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI ++#define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI ++#define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF ++#define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF ++#define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI ++#define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI ++#define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI ++#define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI ++#define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI ++#define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI ++#define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI ++#define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI ++#define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP ++#define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP ++#define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP ++#define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP ++#define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF ++#define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF ++#define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF ++#define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF ++#define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF ++#define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF ++#define MULTI_ARG_1_SF V4SF_FTYPE_V4SF ++#define MULTI_ARG_1_DF V2DF_FTYPE_V2DF ++#define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF ++#define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF ++#define MULTI_ARG_1_DI V2DI_FTYPE_V2DI ++#define MULTI_ARG_1_SI V4SI_FTYPE_V4SI ++#define MULTI_ARG_1_HI V8HI_FTYPE_V8HI ++#define MULTI_ARG_1_QI V16QI_FTYPE_V16QI ++#define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI ++#define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI ++#define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI ++#define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI ++#define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI ++#define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI ++ ++#define BDESC(mask, mask2, icode, name, code, comparison, flag) \ ++ { mask, mask2, icode, name, code, comparison, flag }, ++#define BDESC_FIRST(kind, kindu, mask, mask2, icode, name, code, comparison, flag) \ ++static const struct builtin_description bdesc_##kind[] = \ ++{ \ ++ BDESC (mask, mask2, icode, name, code, comparison, flag) ++#define BDESC_END(kind, next_kind) \ ++}; ++ ++#include "i386-builtin.def" ++ ++extern builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX]; ++ ++tree ix86_builtin_vectorized_function (unsigned int fn, tree type_out, ++ tree type_in); ++void ix86_init_builtins (void); ++tree ix86_vectorize_builtin_gather (const_tree mem_vectype, ++ const_tree index_type, int scale); ++tree ix86_builtin_decl (unsigned code, bool); ++tree ix86_builtin_reciprocal (tree fndecl); ++unsigned int get_builtin_code_for_version (tree decl, tree *predicate_list); ++tree fold_builtin_cpu (tree fndecl, tree *args); ++tree get_ix86_builtin (enum ix86_builtins c); ++ ++#endif /* GCC_I386_BUILTINS_H */ +diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c +index 5e7e46fce..50cac3b1a 100644 +--- a/gcc/config/i386/i386-c.c ++++ b/gcc/config/i386/i386-c.c +@@ -586,8 +586,9 @@ ix86_pragma_target_parse (tree args, tree pop_target) + } + else + { +- cur_tree = ix86_valid_target_attribute_tree (args, &global_options, +- &global_options_set); ++ cur_tree = ix86_valid_target_attribute_tree (NULL_TREE, args, ++ &global_options, ++ &global_options_set, 0); + if (!cur_tree || cur_tree == error_mark_node) + { + cl_target_option_restore (&global_options, +diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c +new file mode 100644 +index 000000000..01f38b9ea +--- /dev/null ++++ b/gcc/config/i386/i386-expand.c +@@ -0,0 +1,19842 @@ ++/* Copyright (C) 1988-2019 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++#define IN_TARGET_CODE 1 ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "backend.h" ++#include "rtl.h" ++#include "tree.h" ++#include "memmodel.h" ++#include "gimple.h" ++#include "cfghooks.h" ++#include "cfgloop.h" ++#include "df.h" ++#include "tm_p.h" ++#include "stringpool.h" ++#include "expmed.h" ++#include "optabs.h" ++#include "regs.h" ++#include "emit-rtl.h" ++#include "recog.h" ++#include "cgraph.h" ++#include "diagnostic.h" ++#include "cfgbuild.h" ++#include "alias.h" ++#include "fold-const.h" ++#include "attribs.h" ++#include "calls.h" ++#include "stor-layout.h" ++#include "varasm.h" ++#include "output.h" ++#include "insn-attr.h" ++#include "flags.h" ++#include "except.h" ++#include "explow.h" ++#include "expr.h" ++#include "cfgrtl.h" ++#include "common/common-target.h" ++#include "langhooks.h" ++#include "reload.h" ++#include "gimplify.h" ++#include "dwarf2.h" ++#include "tm-constrs.h" ++#include "params.h" ++#include "cselib.h" ++#include "sched-int.h" ++#include "opts.h" ++#include "tree-pass.h" ++#include "context.h" ++#include "pass_manager.h" ++#include "target-globals.h" ++#include "gimple-iterator.h" ++#include "tree-vectorizer.h" ++#include "shrink-wrap.h" ++#include "builtins.h" ++#include "rtl-iter.h" ++#include "tree-iterator.h" ++#include "dbgcnt.h" ++#include "case-cfn-macros.h" ++#include "dojump.h" ++#include "fold-const-call.h" ++#include "tree-vrp.h" ++#include "tree-ssanames.h" ++#include "selftest.h" ++#include "selftest-rtl.h" ++#include "print-rtl.h" ++#include "intl.h" ++#include "ifcvt.h" ++#include "symbol-summary.h" ++#include "ipa-prop.h" ++#include "ipa-fnsummary.h" ++#include "wide-int-bitmask.h" ++#include "tree-vector-builder.h" ++#include "debug.h" ++#include "dwarf2out.h" ++#include "i386-options.h" ++#include "i386-builtins.h" ++#include "i386-expand.h" ++ ++/* Split one or more double-mode RTL references into pairs of half-mode ++ references. The RTL can be REG, offsettable MEM, integer constant, or ++ CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to ++ split and "num" is its length. lo_half and hi_half are output arrays ++ that parallel "operands". */ ++ ++void ++split_double_mode (machine_mode mode, rtx operands[], ++ int num, rtx lo_half[], rtx hi_half[]) ++{ ++ machine_mode half_mode; ++ unsigned int byte; ++ ++ switch (mode) ++ { ++ case E_TImode: ++ half_mode = DImode; ++ break; ++ case E_DImode: ++ half_mode = SImode; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ ++ byte = GET_MODE_SIZE (half_mode); ++ ++ while (num--) ++ { ++ rtx op = operands[num]; ++ ++ /* simplify_subreg refuse to split volatile memory addresses, ++ but we still have to handle it. */ ++ if (MEM_P (op)) ++ { ++ lo_half[num] = adjust_address (op, half_mode, 0); ++ hi_half[num] = adjust_address (op, half_mode, byte); ++ } ++ else ++ { ++ lo_half[num] = simplify_gen_subreg (half_mode, op, ++ GET_MODE (op) == VOIDmode ++ ? mode : GET_MODE (op), 0); ++ hi_half[num] = simplify_gen_subreg (half_mode, op, ++ GET_MODE (op) == VOIDmode ++ ? mode : GET_MODE (op), byte); ++ } ++ } ++} ++ ++/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate ++ for the target. */ ++ ++void ++ix86_expand_clear (rtx dest) ++{ ++ rtx tmp; ++ ++ /* We play register width games, which are only valid after reload. */ ++ gcc_assert (reload_completed); ++ ++ /* Avoid HImode and its attendant prefix byte. */ ++ if (GET_MODE_SIZE (GET_MODE (dest)) < 4) ++ dest = gen_rtx_REG (SImode, REGNO (dest)); ++ tmp = gen_rtx_SET (dest, const0_rtx); ++ ++ if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ()) ++ { ++ rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); ++ tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob)); ++ } ++ ++ emit_insn (tmp); ++} ++ ++void ++ix86_expand_move (machine_mode mode, rtx operands[]) ++{ ++ rtx op0, op1; ++ rtx tmp, addend = NULL_RTX; ++ enum tls_model model; ++ ++ op0 = operands[0]; ++ op1 = operands[1]; ++ ++ switch (GET_CODE (op1)) ++ { ++ case CONST: ++ tmp = XEXP (op1, 0); ++ ++ if (GET_CODE (tmp) != PLUS ++ || GET_CODE (XEXP (tmp, 0)) != SYMBOL_REF) ++ break; ++ ++ op1 = XEXP (tmp, 0); ++ addend = XEXP (tmp, 1); ++ /* FALLTHRU */ ++ ++ case SYMBOL_REF: ++ model = SYMBOL_REF_TLS_MODEL (op1); ++ ++ if (model) ++ op1 = legitimize_tls_address (op1, model, true); ++ else if (ix86_force_load_from_GOT_p (op1)) ++ { ++ /* Load the external function address via GOT slot to avoid PLT. */ ++ op1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op1), ++ (TARGET_64BIT ++ ? UNSPEC_GOTPCREL ++ : UNSPEC_GOT)); ++ op1 = gen_rtx_CONST (Pmode, op1); ++ op1 = gen_const_mem (Pmode, op1); ++ set_mem_alias_set (op1, ix86_GOT_alias_set ()); ++ } ++ else ++ { ++ tmp = legitimize_pe_coff_symbol (op1, addend != NULL_RTX); ++ if (tmp) ++ { ++ op1 = tmp; ++ if (!addend) ++ break; ++ } ++ else ++ { ++ op1 = operands[1]; ++ break; ++ } ++ } ++ ++ if (addend) ++ { ++ op1 = force_operand (op1, NULL_RTX); ++ op1 = expand_simple_binop (Pmode, PLUS, op1, addend, ++ op0, 1, OPTAB_DIRECT); ++ } ++ else ++ op1 = force_operand (op1, op0); ++ ++ if (op1 == op0) ++ return; ++ ++ op1 = convert_to_mode (mode, op1, 1); ++ ++ default: ++ break; ++ } ++ ++ if ((flag_pic || MACHOPIC_INDIRECT) ++ && symbolic_operand (op1, mode)) ++ { ++ if (TARGET_MACHO && !TARGET_64BIT) ++ { ++#if TARGET_MACHO ++ /* dynamic-no-pic */ ++ if (MACHOPIC_INDIRECT) ++ { ++ rtx temp = (op0 && REG_P (op0) && mode == Pmode) ++ ? op0 : gen_reg_rtx (Pmode); ++ op1 = machopic_indirect_data_reference (op1, temp); ++ if (MACHOPIC_PURE) ++ op1 = machopic_legitimize_pic_address (op1, mode, ++ temp == op1 ? 0 : temp); ++ } ++ if (op0 != op1 && GET_CODE (op0) != MEM) ++ { ++ rtx insn = gen_rtx_SET (op0, op1); ++ emit_insn (insn); ++ return; ++ } ++ if (GET_CODE (op0) == MEM) ++ op1 = force_reg (Pmode, op1); ++ else ++ { ++ rtx temp = op0; ++ if (GET_CODE (temp) != REG) ++ temp = gen_reg_rtx (Pmode); ++ temp = legitimize_pic_address (op1, temp); ++ if (temp == op0) ++ return; ++ op1 = temp; ++ } ++ /* dynamic-no-pic */ ++#endif ++ } ++ else ++ { ++ if (MEM_P (op0)) ++ op1 = force_reg (mode, op1); ++ else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode))) ++ { ++ rtx reg = can_create_pseudo_p () ? NULL_RTX : op0; ++ op1 = legitimize_pic_address (op1, reg); ++ if (op0 == op1) ++ return; ++ op1 = convert_to_mode (mode, op1, 1); ++ } ++ } ++ } ++ else ++ { ++ if (MEM_P (op0) ++ && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode) ++ || !push_operand (op0, mode)) ++ && MEM_P (op1)) ++ op1 = force_reg (mode, op1); ++ ++ if (push_operand (op0, mode) ++ && ! general_no_elim_operand (op1, mode)) ++ op1 = copy_to_mode_reg (mode, op1); ++ ++ /* Force large constants in 64bit compilation into register ++ to get them CSEed. */ ++ if (can_create_pseudo_p () ++ && (mode == DImode) && TARGET_64BIT ++ && immediate_operand (op1, mode) ++ && !x86_64_zext_immediate_operand (op1, VOIDmode) ++ && !register_operand (op0, mode) ++ && optimize) ++ op1 = copy_to_mode_reg (mode, op1); ++ ++ if (can_create_pseudo_p () ++ && CONST_DOUBLE_P (op1)) ++ { ++ /* If we are loading a floating point constant to a register, ++ force the value to memory now, since we'll get better code ++ out the back end. */ ++ ++ op1 = validize_mem (force_const_mem (mode, op1)); ++ if (!register_operand (op0, mode)) ++ { ++ rtx temp = gen_reg_rtx (mode); ++ emit_insn (gen_rtx_SET (temp, op1)); ++ emit_move_insn (op0, temp); ++ return; ++ } ++ } ++ } ++ ++ emit_insn (gen_rtx_SET (op0, op1)); ++} ++ ++void ++ix86_expand_vector_move (machine_mode mode, rtx operands[]) ++{ ++ rtx op0 = operands[0], op1 = operands[1]; ++ /* Use GET_MODE_BITSIZE instead of GET_MODE_ALIGNMENT for IA MCU ++ psABI since the biggest alignment is 4 byte for IA MCU psABI. */ ++ unsigned int align = (TARGET_IAMCU ++ ? GET_MODE_BITSIZE (mode) ++ : GET_MODE_ALIGNMENT (mode)); ++ ++ if (push_operand (op0, VOIDmode)) ++ op0 = emit_move_resolve_push (mode, op0); ++ ++ /* Force constants other than zero into memory. We do not know how ++ the instructions used to build constants modify the upper 64 bits ++ of the register, once we have that information we may be able ++ to handle some of them more efficiently. */ ++ if (can_create_pseudo_p () ++ && (CONSTANT_P (op1) ++ || (SUBREG_P (op1) ++ && CONSTANT_P (SUBREG_REG (op1)))) ++ && ((register_operand (op0, mode) ++ && !standard_sse_constant_p (op1, mode)) ++ /* ix86_expand_vector_move_misalign() does not like constants. */ ++ || (SSE_REG_MODE_P (mode) ++ && MEM_P (op0) ++ && MEM_ALIGN (op0) < align))) ++ { ++ if (SUBREG_P (op1)) ++ { ++ machine_mode imode = GET_MODE (SUBREG_REG (op1)); ++ rtx r = force_const_mem (imode, SUBREG_REG (op1)); ++ if (r) ++ r = validize_mem (r); ++ else ++ r = force_reg (imode, SUBREG_REG (op1)); ++ op1 = simplify_gen_subreg (mode, r, imode, SUBREG_BYTE (op1)); ++ } ++ else ++ op1 = validize_mem (force_const_mem (mode, op1)); ++ } ++ ++ /* We need to check memory alignment for SSE mode since attribute ++ can make operands unaligned. */ ++ if (can_create_pseudo_p () ++ && SSE_REG_MODE_P (mode) ++ && ((MEM_P (op0) && (MEM_ALIGN (op0) < align)) ++ || (MEM_P (op1) && (MEM_ALIGN (op1) < align)))) ++ { ++ rtx tmp[2]; ++ ++ /* ix86_expand_vector_move_misalign() does not like both ++ arguments in memory. */ ++ if (!register_operand (op0, mode) ++ && !register_operand (op1, mode)) ++ op1 = force_reg (mode, op1); ++ ++ tmp[0] = op0; tmp[1] = op1; ++ ix86_expand_vector_move_misalign (mode, tmp); ++ return; ++ } ++ ++ /* Make operand1 a register if it isn't already. */ ++ if (can_create_pseudo_p () ++ && !register_operand (op0, mode) ++ && !register_operand (op1, mode)) ++ { ++ emit_move_insn (op0, force_reg (GET_MODE (op0), op1)); ++ return; ++ } ++ ++ emit_insn (gen_rtx_SET (op0, op1)); ++} ++ ++/* Split 32-byte AVX unaligned load and store if needed. */ ++ ++static void ++ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1) ++{ ++ rtx m; ++ rtx (*extract) (rtx, rtx, rtx); ++ machine_mode mode; ++ ++ if ((MEM_P (op1) && !TARGET_AVX256_SPLIT_UNALIGNED_LOAD) ++ || (MEM_P (op0) && !TARGET_AVX256_SPLIT_UNALIGNED_STORE)) ++ { ++ emit_insn (gen_rtx_SET (op0, op1)); ++ return; ++ } ++ ++ rtx orig_op0 = NULL_RTX; ++ mode = GET_MODE (op0); ++ switch (GET_MODE_CLASS (mode)) ++ { ++ case MODE_VECTOR_INT: ++ case MODE_INT: ++ if (mode != V32QImode) ++ { ++ if (!MEM_P (op0)) ++ { ++ orig_op0 = op0; ++ op0 = gen_reg_rtx (V32QImode); ++ } ++ else ++ op0 = gen_lowpart (V32QImode, op0); ++ op1 = gen_lowpart (V32QImode, op1); ++ mode = V32QImode; ++ } ++ break; ++ case MODE_VECTOR_FLOAT: ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ ++ switch (mode) ++ { ++ default: ++ gcc_unreachable (); ++ case E_V32QImode: ++ extract = gen_avx_vextractf128v32qi; ++ mode = V16QImode; ++ break; ++ case E_V8SFmode: ++ extract = gen_avx_vextractf128v8sf; ++ mode = V4SFmode; ++ break; ++ case E_V4DFmode: ++ extract = gen_avx_vextractf128v4df; ++ mode = V2DFmode; ++ break; ++ } ++ ++ if (MEM_P (op1)) ++ { ++ rtx r = gen_reg_rtx (mode); ++ m = adjust_address (op1, mode, 0); ++ emit_move_insn (r, m); ++ m = adjust_address (op1, mode, 16); ++ r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m); ++ emit_move_insn (op0, r); ++ } ++ else if (MEM_P (op0)) ++ { ++ m = adjust_address (op0, mode, 0); ++ emit_insn (extract (m, op1, const0_rtx)); ++ m = adjust_address (op0, mode, 16); ++ emit_insn (extract (m, copy_rtx (op1), const1_rtx)); ++ } ++ else ++ gcc_unreachable (); ++ ++ if (orig_op0) ++ emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0)); ++} ++ ++/* Implement the movmisalign patterns for SSE. Non-SSE modes go ++ straight to ix86_expand_vector_move. */ ++/* Code generation for scalar reg-reg moves of single and double precision data: ++ if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true) ++ movaps reg, reg ++ else ++ movss reg, reg ++ if (x86_sse_partial_reg_dependency == true) ++ movapd reg, reg ++ else ++ movsd reg, reg ++ ++ Code generation for scalar loads of double precision data: ++ if (x86_sse_split_regs == true) ++ movlpd mem, reg (gas syntax) ++ else ++ movsd mem, reg ++ ++ Code generation for unaligned packed loads of single precision data ++ (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency): ++ if (x86_sse_unaligned_move_optimal) ++ movups mem, reg ++ ++ if (x86_sse_partial_reg_dependency == true) ++ { ++ xorps reg, reg ++ movlps mem, reg ++ movhps mem+8, reg ++ } ++ else ++ { ++ movlps mem, reg ++ movhps mem+8, reg ++ } ++ ++ Code generation for unaligned packed loads of double precision data ++ (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs): ++ if (x86_sse_unaligned_move_optimal) ++ movupd mem, reg ++ ++ if (x86_sse_split_regs == true) ++ { ++ movlpd mem, reg ++ movhpd mem+8, reg ++ } ++ else ++ { ++ movsd mem, reg ++ movhpd mem+8, reg ++ } ++ */ ++ ++void ++ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[]) ++{ ++ rtx op0, op1, m; ++ ++ op0 = operands[0]; ++ op1 = operands[1]; ++ ++ /* Use unaligned load/store for AVX512 or when optimizing for size. */ ++ if (GET_MODE_SIZE (mode) == 64 || optimize_insn_for_size_p ()) ++ { ++ emit_insn (gen_rtx_SET (op0, op1)); ++ return; ++ } ++ ++ if (TARGET_AVX) ++ { ++ if (GET_MODE_SIZE (mode) == 32) ++ ix86_avx256_split_vector_move_misalign (op0, op1); ++ else ++ /* Always use 128-bit mov_internal pattern for AVX. */ ++ emit_insn (gen_rtx_SET (op0, op1)); ++ return; ++ } ++ ++ if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL ++ || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) ++ { ++ emit_insn (gen_rtx_SET (op0, op1)); ++ return; ++ } ++ ++ /* ??? If we have typed data, then it would appear that using ++ movdqu is the only way to get unaligned data loaded with ++ integer type. */ ++ if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT) ++ { ++ emit_insn (gen_rtx_SET (op0, op1)); ++ return; ++ } ++ ++ if (MEM_P (op1)) ++ { ++ if (TARGET_SSE2 && mode == V2DFmode) ++ { ++ rtx zero; ++ ++ /* When SSE registers are split into halves, we can avoid ++ writing to the top half twice. */ ++ if (TARGET_SSE_SPLIT_REGS) ++ { ++ emit_clobber (op0); ++ zero = op0; ++ } ++ else ++ { ++ /* ??? Not sure about the best option for the Intel chips. ++ The following would seem to satisfy; the register is ++ entirely cleared, breaking the dependency chain. We ++ then store to the upper half, with a dependency depth ++ of one. A rumor has it that Intel recommends two movsd ++ followed by an unpacklpd, but this is unconfirmed. And ++ given that the dependency depth of the unpacklpd would ++ still be one, I'm not sure why this would be better. */ ++ zero = CONST0_RTX (V2DFmode); ++ } ++ ++ m = adjust_address (op1, DFmode, 0); ++ emit_insn (gen_sse2_loadlpd (op0, zero, m)); ++ m = adjust_address (op1, DFmode, 8); ++ emit_insn (gen_sse2_loadhpd (op0, op0, m)); ++ } ++ else ++ { ++ rtx t; ++ ++ if (mode != V4SFmode) ++ t = gen_reg_rtx (V4SFmode); ++ else ++ t = op0; ++ ++ if (TARGET_SSE_PARTIAL_REG_DEPENDENCY) ++ emit_move_insn (t, CONST0_RTX (V4SFmode)); ++ else ++ emit_clobber (t); ++ ++ m = adjust_address (op1, V2SFmode, 0); ++ emit_insn (gen_sse_loadlps (t, t, m)); ++ m = adjust_address (op1, V2SFmode, 8); ++ emit_insn (gen_sse_loadhps (t, t, m)); ++ if (mode != V4SFmode) ++ emit_move_insn (op0, gen_lowpart (mode, t)); ++ } ++ } ++ else if (MEM_P (op0)) ++ { ++ if (TARGET_SSE2 && mode == V2DFmode) ++ { ++ m = adjust_address (op0, DFmode, 0); ++ emit_insn (gen_sse2_storelpd (m, op1)); ++ m = adjust_address (op0, DFmode, 8); ++ emit_insn (gen_sse2_storehpd (m, op1)); ++ } ++ else ++ { ++ if (mode != V4SFmode) ++ op1 = gen_lowpart (V4SFmode, op1); ++ ++ m = adjust_address (op0, V2SFmode, 0); ++ emit_insn (gen_sse_storelps (m, op1)); ++ m = adjust_address (op0, V2SFmode, 8); ++ emit_insn (gen_sse_storehps (m, copy_rtx (op1))); ++ } ++ } ++ else ++ gcc_unreachable (); ++} ++ ++/* Helper function of ix86_fixup_binary_operands to canonicalize ++ operand order. Returns true if the operands should be swapped. */ ++ ++static bool ++ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode, ++ rtx operands[]) ++{ ++ rtx dst = operands[0]; ++ rtx src1 = operands[1]; ++ rtx src2 = operands[2]; ++ ++ /* If the operation is not commutative, we can't do anything. */ ++ if (GET_RTX_CLASS (code) != RTX_COMM_ARITH ++ && GET_RTX_CLASS (code) != RTX_COMM_COMPARE) ++ return false; ++ ++ /* Highest priority is that src1 should match dst. */ ++ if (rtx_equal_p (dst, src1)) ++ return false; ++ if (rtx_equal_p (dst, src2)) ++ return true; ++ ++ /* Next highest priority is that immediate constants come second. */ ++ if (immediate_operand (src2, mode)) ++ return false; ++ if (immediate_operand (src1, mode)) ++ return true; ++ ++ /* Lowest priority is that memory references should come second. */ ++ if (MEM_P (src2)) ++ return false; ++ if (MEM_P (src1)) ++ return true; ++ ++ return false; ++} ++ ++ ++/* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the ++ destination to use for the operation. If different from the true ++ destination in operands[0], a copy operation will be required. */ ++ ++rtx ++ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode, ++ rtx operands[]) ++{ ++ rtx dst = operands[0]; ++ rtx src1 = operands[1]; ++ rtx src2 = operands[2]; ++ ++ /* Canonicalize operand order. */ ++ if (ix86_swap_binary_operands_p (code, mode, operands)) ++ { ++ /* It is invalid to swap operands of different modes. */ ++ gcc_assert (GET_MODE (src1) == GET_MODE (src2)); ++ ++ std::swap (src1, src2); ++ } ++ ++ /* Both source operands cannot be in memory. */ ++ if (MEM_P (src1) && MEM_P (src2)) ++ { ++ /* Optimization: Only read from memory once. */ ++ if (rtx_equal_p (src1, src2)) ++ { ++ src2 = force_reg (mode, src2); ++ src1 = src2; ++ } ++ else if (rtx_equal_p (dst, src1)) ++ src2 = force_reg (mode, src2); ++ else ++ src1 = force_reg (mode, src1); ++ } ++ ++ /* If the destination is memory, and we do not have matching source ++ operands, do things in registers. */ ++ if (MEM_P (dst) && !rtx_equal_p (dst, src1)) ++ dst = gen_reg_rtx (mode); ++ ++ /* Source 1 cannot be a constant. */ ++ if (CONSTANT_P (src1)) ++ src1 = force_reg (mode, src1); ++ ++ /* Source 1 cannot be a non-matching memory. */ ++ if (MEM_P (src1) && !rtx_equal_p (dst, src1)) ++ src1 = force_reg (mode, src1); ++ ++ /* Improve address combine. */ ++ if (code == PLUS ++ && GET_MODE_CLASS (mode) == MODE_INT ++ && MEM_P (src2)) ++ src2 = force_reg (mode, src2); ++ ++ operands[1] = src1; ++ operands[2] = src2; ++ return dst; ++} ++ ++/* Similarly, but assume that the destination has already been ++ set up properly. */ ++ ++void ++ix86_fixup_binary_operands_no_copy (enum rtx_code code, ++ machine_mode mode, rtx operands[]) ++{ ++ rtx dst = ix86_fixup_binary_operands (code, mode, operands); ++ gcc_assert (dst == operands[0]); ++} ++ ++/* Attempt to expand a binary operator. Make the expansion closer to the ++ actual machine, then just general_operand, which will allow 3 separate ++ memory references (one output, two input) in a single insn. */ ++ ++void ++ix86_expand_binary_operator (enum rtx_code code, machine_mode mode, ++ rtx operands[]) ++{ ++ rtx src1, src2, dst, op, clob; ++ ++ dst = ix86_fixup_binary_operands (code, mode, operands); ++ src1 = operands[1]; ++ src2 = operands[2]; ++ ++ /* Emit the instruction. */ ++ ++ op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, src1, src2)); ++ ++ if (reload_completed ++ && code == PLUS ++ && !rtx_equal_p (dst, src1)) ++ { ++ /* This is going to be an LEA; avoid splitting it later. */ ++ emit_insn (op); ++ } ++ else ++ { ++ clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); ++ emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); ++ } ++ ++ /* Fix up the destination if needed. */ ++ if (dst != operands[0]) ++ emit_move_insn (operands[0], dst); ++} ++ ++/* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with ++ the given OPERANDS. */ ++ ++void ++ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode, ++ rtx operands[]) ++{ ++ rtx op1 = NULL_RTX, op2 = NULL_RTX; ++ if (SUBREG_P (operands[1])) ++ { ++ op1 = operands[1]; ++ op2 = operands[2]; ++ } ++ else if (SUBREG_P (operands[2])) ++ { ++ op1 = operands[2]; ++ op2 = operands[1]; ++ } ++ /* Optimize (__m128i) d | (__m128i) e and similar code ++ when d and e are float vectors into float vector logical ++ insn. In C/C++ without using intrinsics there is no other way ++ to express vector logical operation on float vectors than ++ to cast them temporarily to integer vectors. */ ++ if (op1 ++ && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL ++ && (SUBREG_P (op2) || GET_CODE (op2) == CONST_VECTOR) ++ && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT ++ && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode) ++ && SUBREG_BYTE (op1) == 0 ++ && (GET_CODE (op2) == CONST_VECTOR ++ || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2)) ++ && SUBREG_BYTE (op2) == 0)) ++ && can_create_pseudo_p ()) ++ { ++ rtx dst; ++ switch (GET_MODE (SUBREG_REG (op1))) ++ { ++ case E_V4SFmode: ++ case E_V8SFmode: ++ case E_V16SFmode: ++ case E_V2DFmode: ++ case E_V4DFmode: ++ case E_V8DFmode: ++ dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1))); ++ if (GET_CODE (op2) == CONST_VECTOR) ++ { ++ op2 = gen_lowpart (GET_MODE (dst), op2); ++ op2 = force_reg (GET_MODE (dst), op2); ++ } ++ else ++ { ++ op1 = operands[1]; ++ op2 = SUBREG_REG (operands[2]); ++ if (!vector_operand (op2, GET_MODE (dst))) ++ op2 = force_reg (GET_MODE (dst), op2); ++ } ++ op1 = SUBREG_REG (op1); ++ if (!vector_operand (op1, GET_MODE (dst))) ++ op1 = force_reg (GET_MODE (dst), op1); ++ emit_insn (gen_rtx_SET (dst, ++ gen_rtx_fmt_ee (code, GET_MODE (dst), ++ op1, op2))); ++ emit_move_insn (operands[0], gen_lowpart (mode, dst)); ++ return; ++ default: ++ break; ++ } ++ } ++ if (!vector_operand (operands[1], mode)) ++ operands[1] = force_reg (mode, operands[1]); ++ if (!vector_operand (operands[2], mode)) ++ operands[2] = force_reg (mode, operands[2]); ++ ix86_fixup_binary_operands_no_copy (code, mode, operands); ++ emit_insn (gen_rtx_SET (operands[0], ++ gen_rtx_fmt_ee (code, mode, operands[1], ++ operands[2]))); ++} ++ ++/* Return TRUE or FALSE depending on whether the binary operator meets the ++ appropriate constraints. */ ++ ++bool ++ix86_binary_operator_ok (enum rtx_code code, machine_mode mode, ++ rtx operands[3]) ++{ ++ rtx dst = operands[0]; ++ rtx src1 = operands[1]; ++ rtx src2 = operands[2]; ++ ++ /* Both source operands cannot be in memory. */ ++ if (MEM_P (src1) && MEM_P (src2)) ++ return false; ++ ++ /* Canonicalize operand order for commutative operators. */ ++ if (ix86_swap_binary_operands_p (code, mode, operands)) ++ std::swap (src1, src2); ++ ++ /* If the destination is memory, we must have a matching source operand. */ ++ if (MEM_P (dst) && !rtx_equal_p (dst, src1)) ++ return false; ++ ++ /* Source 1 cannot be a constant. */ ++ if (CONSTANT_P (src1)) ++ return false; ++ ++ /* Source 1 cannot be a non-matching memory. */ ++ if (MEM_P (src1) && !rtx_equal_p (dst, src1)) ++ /* Support "andhi/andsi/anddi" as a zero-extending move. */ ++ return (code == AND ++ && (mode == HImode ++ || mode == SImode ++ || (TARGET_64BIT && mode == DImode)) ++ && satisfies_constraint_L (src2)); ++ ++ return true; ++} ++ ++/* Attempt to expand a unary operator. Make the expansion closer to the ++ actual machine, then just general_operand, which will allow 2 separate ++ memory references (one output, one input) in a single insn. */ ++ ++void ++ix86_expand_unary_operator (enum rtx_code code, machine_mode mode, ++ rtx operands[]) ++{ ++ bool matching_memory = false; ++ rtx src, dst, op, clob; ++ ++ dst = operands[0]; ++ src = operands[1]; ++ ++ /* If the destination is memory, and we do not have matching source ++ operands, do things in registers. */ ++ if (MEM_P (dst)) ++ { ++ if (rtx_equal_p (dst, src)) ++ matching_memory = true; ++ else ++ dst = gen_reg_rtx (mode); ++ } ++ ++ /* When source operand is memory, destination must match. */ ++ if (MEM_P (src) && !matching_memory) ++ src = force_reg (mode, src); ++ ++ /* Emit the instruction. */ ++ ++ op = gen_rtx_SET (dst, gen_rtx_fmt_e (code, mode, src)); ++ ++ if (code == NOT) ++ emit_insn (op); ++ else ++ { ++ clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); ++ emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); ++ } ++ ++ /* Fix up the destination if needed. */ ++ if (dst != operands[0]) ++ emit_move_insn (operands[0], dst); ++} ++ ++/* Predict just emitted jump instruction to be taken with probability PROB. */ ++ ++static void ++predict_jump (int prob) ++{ ++ rtx_insn *insn = get_last_insn (); ++ gcc_assert (JUMP_P (insn)); ++ add_reg_br_prob_note (insn, profile_probability::from_reg_br_prob_base (prob)); ++} ++ ++/* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and ++ divisor are within the range [0-255]. */ ++ ++void ++ix86_split_idivmod (machine_mode mode, rtx operands[], ++ bool signed_p) ++{ ++ rtx_code_label *end_label, *qimode_label; ++ rtx div, mod; ++ rtx_insn *insn; ++ rtx scratch, tmp0, tmp1, tmp2; ++ rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx); ++ rtx (*gen_zero_extend) (rtx, rtx); ++ rtx (*gen_test_ccno_1) (rtx, rtx); ++ ++ switch (mode) ++ { ++ case E_SImode: ++ if (GET_MODE (operands[0]) == SImode) ++ { ++ if (GET_MODE (operands[1]) == SImode) ++ gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1; ++ else ++ gen_divmod4_1 ++ = signed_p ? gen_divmodsi4_zext_2 : gen_udivmodsi4_zext_2; ++ gen_zero_extend = gen_zero_extendqisi2; ++ } ++ else ++ { ++ gen_divmod4_1 ++ = signed_p ? gen_divmodsi4_zext_1 : gen_udivmodsi4_zext_1; ++ gen_zero_extend = gen_zero_extendqidi2; ++ } ++ gen_test_ccno_1 = gen_testsi_ccno_1; ++ break; ++ case E_DImode: ++ gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1; ++ gen_test_ccno_1 = gen_testdi_ccno_1; ++ gen_zero_extend = gen_zero_extendqidi2; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ ++ end_label = gen_label_rtx (); ++ qimode_label = gen_label_rtx (); ++ ++ scratch = gen_reg_rtx (mode); ++ ++ /* Use 8bit unsigned divimod if dividend and divisor are within ++ the range [0-255]. */ ++ emit_move_insn (scratch, operands[2]); ++ scratch = expand_simple_binop (mode, IOR, scratch, operands[3], ++ scratch, 1, OPTAB_DIRECT); ++ emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100))); ++ tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG); ++ tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx); ++ tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0, ++ gen_rtx_LABEL_REF (VOIDmode, qimode_label), ++ pc_rtx); ++ insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp0)); ++ predict_jump (REG_BR_PROB_BASE * 50 / 100); ++ JUMP_LABEL (insn) = qimode_label; ++ ++ /* Generate original signed/unsigned divimod. */ ++ div = gen_divmod4_1 (operands[0], operands[1], ++ operands[2], operands[3]); ++ emit_insn (div); ++ ++ /* Branch to the end. */ ++ emit_jump_insn (gen_jump (end_label)); ++ emit_barrier (); ++ ++ /* Generate 8bit unsigned divide. */ ++ emit_label (qimode_label); ++ /* Don't use operands[0] for result of 8bit divide since not all ++ registers support QImode ZERO_EXTRACT. */ ++ tmp0 = lowpart_subreg (HImode, scratch, mode); ++ tmp1 = lowpart_subreg (HImode, operands[2], mode); ++ tmp2 = lowpart_subreg (QImode, operands[3], mode); ++ emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2)); ++ ++ if (signed_p) ++ { ++ div = gen_rtx_DIV (mode, operands[2], operands[3]); ++ mod = gen_rtx_MOD (mode, operands[2], operands[3]); ++ } ++ else ++ { ++ div = gen_rtx_UDIV (mode, operands[2], operands[3]); ++ mod = gen_rtx_UMOD (mode, operands[2], operands[3]); ++ } ++ if (mode == SImode) ++ { ++ if (GET_MODE (operands[0]) != SImode) ++ div = gen_rtx_ZERO_EXTEND (DImode, div); ++ if (GET_MODE (operands[1]) != SImode) ++ mod = gen_rtx_ZERO_EXTEND (DImode, mod); ++ } ++ ++ /* Extract remainder from AH. */ ++ tmp1 = gen_rtx_ZERO_EXTRACT (GET_MODE (operands[1]), ++ tmp0, GEN_INT (8), GEN_INT (8)); ++ if (REG_P (operands[1])) ++ insn = emit_move_insn (operands[1], tmp1); ++ else ++ { ++ /* Need a new scratch register since the old one has result ++ of 8bit divide. */ ++ scratch = gen_reg_rtx (GET_MODE (operands[1])); ++ emit_move_insn (scratch, tmp1); ++ insn = emit_move_insn (operands[1], scratch); ++ } ++ set_unique_reg_note (insn, REG_EQUAL, mod); ++ ++ /* Zero extend quotient from AL. */ ++ tmp1 = gen_lowpart (QImode, tmp0); ++ insn = emit_insn (gen_zero_extend (operands[0], tmp1)); ++ set_unique_reg_note (insn, REG_EQUAL, div); ++ ++ emit_label (end_label); ++} ++ ++/* Emit x86 binary operand CODE in mode MODE, where the first operand ++ matches destination. RTX includes clobber of FLAGS_REG. */ ++ ++void ++ix86_emit_binop (enum rtx_code code, machine_mode mode, ++ rtx dst, rtx src) ++{ ++ rtx op, clob; ++ ++ op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, dst, src)); ++ clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); ++ ++ emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); ++} ++ ++/* Return true if regno1 def is nearest to the insn. */ ++ ++static bool ++find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2) ++{ ++ rtx_insn *prev = insn; ++ rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn)); ++ ++ if (insn == start) ++ return false; ++ while (prev && prev != start) ++ { ++ if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev)) ++ { ++ prev = PREV_INSN (prev); ++ continue; ++ } ++ if (insn_defines_reg (regno1, INVALID_REGNUM, prev)) ++ return true; ++ else if (insn_defines_reg (regno2, INVALID_REGNUM, prev)) ++ return false; ++ prev = PREV_INSN (prev); ++ } ++ ++ /* None of the regs is defined in the bb. */ ++ return false; ++} ++ ++/* Split lea instructions into a sequence of instructions ++ which are executed on ALU to avoid AGU stalls. ++ It is assumed that it is allowed to clobber flags register ++ at lea position. */ ++ ++void ++ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode) ++{ ++ unsigned int regno0, regno1, regno2; ++ struct ix86_address parts; ++ rtx target, tmp; ++ int ok, adds; ++ ++ ok = ix86_decompose_address (operands[1], &parts); ++ gcc_assert (ok); ++ ++ target = gen_lowpart (mode, operands[0]); ++ ++ regno0 = true_regnum (target); ++ regno1 = INVALID_REGNUM; ++ regno2 = INVALID_REGNUM; ++ ++ if (parts.base) ++ { ++ parts.base = gen_lowpart (mode, parts.base); ++ regno1 = true_regnum (parts.base); ++ } ++ ++ if (parts.index) ++ { ++ parts.index = gen_lowpart (mode, parts.index); ++ regno2 = true_regnum (parts.index); ++ } ++ ++ if (parts.disp) ++ parts.disp = gen_lowpart (mode, parts.disp); ++ ++ if (parts.scale > 1) ++ { ++ /* Case r1 = r1 + ... */ ++ if (regno1 == regno0) ++ { ++ /* If we have a case r1 = r1 + C * r2 then we ++ should use multiplication which is very ++ expensive. Assume cost model is wrong if we ++ have such case here. */ ++ gcc_assert (regno2 != regno0); ++ ++ for (adds = parts.scale; adds > 0; adds--) ++ ix86_emit_binop (PLUS, mode, target, parts.index); ++ } ++ else ++ { ++ /* r1 = r2 + r3 * C case. Need to move r3 into r1. */ ++ if (regno0 != regno2) ++ emit_insn (gen_rtx_SET (target, parts.index)); ++ ++ /* Use shift for scaling. */ ++ ix86_emit_binop (ASHIFT, mode, target, ++ GEN_INT (exact_log2 (parts.scale))); ++ ++ if (parts.base) ++ ix86_emit_binop (PLUS, mode, target, parts.base); ++ ++ if (parts.disp && parts.disp != const0_rtx) ++ ix86_emit_binop (PLUS, mode, target, parts.disp); ++ } ++ } ++ else if (!parts.base && !parts.index) ++ { ++ gcc_assert(parts.disp); ++ emit_insn (gen_rtx_SET (target, parts.disp)); ++ } ++ else ++ { ++ if (!parts.base) ++ { ++ if (regno0 != regno2) ++ emit_insn (gen_rtx_SET (target, parts.index)); ++ } ++ else if (!parts.index) ++ { ++ if (regno0 != regno1) ++ emit_insn (gen_rtx_SET (target, parts.base)); ++ } ++ else ++ { ++ if (regno0 == regno1) ++ tmp = parts.index; ++ else if (regno0 == regno2) ++ tmp = parts.base; ++ else ++ { ++ rtx tmp1; ++ ++ /* Find better operand for SET instruction, depending ++ on which definition is farther from the insn. */ ++ if (find_nearest_reg_def (insn, regno1, regno2)) ++ tmp = parts.index, tmp1 = parts.base; ++ else ++ tmp = parts.base, tmp1 = parts.index; ++ ++ emit_insn (gen_rtx_SET (target, tmp)); ++ ++ if (parts.disp && parts.disp != const0_rtx) ++ ix86_emit_binop (PLUS, mode, target, parts.disp); ++ ++ ix86_emit_binop (PLUS, mode, target, tmp1); ++ return; ++ } ++ ++ ix86_emit_binop (PLUS, mode, target, tmp); ++ } ++ ++ if (parts.disp && parts.disp != const0_rtx) ++ ix86_emit_binop (PLUS, mode, target, parts.disp); ++ } ++} ++ ++/* Post-reload splitter for converting an SF or DFmode value in an ++ SSE register into an unsigned SImode. */ ++ ++void ++ix86_split_convert_uns_si_sse (rtx operands[]) ++{ ++ machine_mode vecmode; ++ rtx value, large, zero_or_two31, input, two31, x; ++ ++ large = operands[1]; ++ zero_or_two31 = operands[2]; ++ input = operands[3]; ++ two31 = operands[4]; ++ vecmode = GET_MODE (large); ++ value = gen_rtx_REG (vecmode, REGNO (operands[0])); ++ ++ /* Load up the value into the low element. We must ensure that the other ++ elements are valid floats -- zero is the easiest such value. */ ++ if (MEM_P (input)) ++ { ++ if (vecmode == V4SFmode) ++ emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input)); ++ else ++ emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input)); ++ } ++ else ++ { ++ input = gen_rtx_REG (vecmode, REGNO (input)); ++ emit_move_insn (value, CONST0_RTX (vecmode)); ++ if (vecmode == V4SFmode) ++ emit_insn (gen_sse_movss (value, value, input)); ++ else ++ emit_insn (gen_sse2_movsd (value, value, input)); ++ } ++ ++ emit_move_insn (large, two31); ++ emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31); ++ ++ x = gen_rtx_fmt_ee (LE, vecmode, large, value); ++ emit_insn (gen_rtx_SET (large, x)); ++ ++ x = gen_rtx_AND (vecmode, zero_or_two31, large); ++ emit_insn (gen_rtx_SET (zero_or_two31, x)); ++ ++ x = gen_rtx_MINUS (vecmode, value, zero_or_two31); ++ emit_insn (gen_rtx_SET (value, x)); ++ ++ large = gen_rtx_REG (V4SImode, REGNO (large)); ++ emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31))); ++ ++ x = gen_rtx_REG (V4SImode, REGNO (value)); ++ if (vecmode == V4SFmode) ++ emit_insn (gen_fix_truncv4sfv4si2 (x, value)); ++ else ++ emit_insn (gen_sse2_cvttpd2dq (x, value)); ++ value = x; ++ ++ emit_insn (gen_xorv4si3 (value, value, large)); ++} ++ ++static bool ix86_expand_vector_init_one_nonzero (bool mmx_ok, ++ machine_mode mode, rtx target, ++ rtx var, int one_var); ++ ++/* Convert an unsigned DImode value into a DFmode, using only SSE. ++ Expects the 64-bit DImode to be supplied in a pair of integral ++ registers. Requires SSE2; will use SSE3 if available. For x86_32, ++ -mfpmath=sse, !optimize_size only. */ ++ ++void ++ix86_expand_convert_uns_didf_sse (rtx target, rtx input) ++{ ++ REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt; ++ rtx int_xmm, fp_xmm; ++ rtx biases, exponents; ++ rtx x; ++ ++ int_xmm = gen_reg_rtx (V4SImode); ++ if (TARGET_INTER_UNIT_MOVES_TO_VEC) ++ emit_insn (gen_movdi_to_sse (int_xmm, input)); ++ else if (TARGET_SSE_SPLIT_REGS) ++ { ++ emit_clobber (int_xmm); ++ emit_move_insn (gen_lowpart (DImode, int_xmm), input); ++ } ++ else ++ { ++ x = gen_reg_rtx (V2DImode); ++ ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0); ++ emit_move_insn (int_xmm, gen_lowpart (V4SImode, x)); ++ } ++ ++ x = gen_rtx_CONST_VECTOR (V4SImode, ++ gen_rtvec (4, GEN_INT (0x43300000UL), ++ GEN_INT (0x45300000UL), ++ const0_rtx, const0_rtx)); ++ exponents = validize_mem (force_const_mem (V4SImode, x)); ++ ++ /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */ ++ emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents)); ++ ++ /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm) ++ yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)). ++ Similarly (0x45300000UL ## fp_value_hi_xmm) yields ++ (0x1.0p84 + double(fp_value_hi_xmm)). ++ Note these exponents differ by 32. */ ++ ++ fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm)); ++ ++ /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values ++ in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */ ++ real_ldexp (&bias_lo_rvt, &dconst1, 52); ++ real_ldexp (&bias_hi_rvt, &dconst1, 84); ++ biases = const_double_from_real_value (bias_lo_rvt, DFmode); ++ x = const_double_from_real_value (bias_hi_rvt, DFmode); ++ biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x)); ++ biases = validize_mem (force_const_mem (V2DFmode, biases)); ++ emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases)); ++ ++ /* Add the upper and lower DFmode values together. */ ++ if (TARGET_SSE3) ++ emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm)); ++ else ++ { ++ x = copy_to_mode_reg (V2DFmode, fp_xmm); ++ emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm)); ++ emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x)); ++ } ++ ++ ix86_expand_vector_extract (false, target, fp_xmm, 0); ++} ++ ++/* Not used, but eases macroization of patterns. */ ++void ++ix86_expand_convert_uns_sixf_sse (rtx, rtx) ++{ ++ gcc_unreachable (); ++} ++ ++/* Convert an unsigned SImode value into a DFmode. Only currently used ++ for SSE, but applicable anywhere. */ ++ ++void ++ix86_expand_convert_uns_sidf_sse (rtx target, rtx input) ++{ ++ REAL_VALUE_TYPE TWO31r; ++ rtx x, fp; ++ ++ x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1), ++ NULL, 1, OPTAB_DIRECT); ++ ++ fp = gen_reg_rtx (DFmode); ++ emit_insn (gen_floatsidf2 (fp, x)); ++ ++ real_ldexp (&TWO31r, &dconst1, 31); ++ x = const_double_from_real_value (TWO31r, DFmode); ++ ++ x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT); ++ if (x != target) ++ emit_move_insn (target, x); ++} ++ ++/* Convert a signed DImode value into a DFmode. Only used for SSE in ++ 32-bit mode; otherwise we have a direct convert instruction. */ ++ ++void ++ix86_expand_convert_sign_didf_sse (rtx target, rtx input) ++{ ++ REAL_VALUE_TYPE TWO32r; ++ rtx fp_lo, fp_hi, x; ++ ++ fp_lo = gen_reg_rtx (DFmode); ++ fp_hi = gen_reg_rtx (DFmode); ++ ++ emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input))); ++ ++ real_ldexp (&TWO32r, &dconst1, 32); ++ x = const_double_from_real_value (TWO32r, DFmode); ++ fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT); ++ ++ ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input)); ++ ++ x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target, ++ 0, OPTAB_DIRECT); ++ if (x != target) ++ emit_move_insn (target, x); ++} ++ ++/* Convert an unsigned SImode value into a SFmode, using only SSE. ++ For x86_32, -mfpmath=sse, !optimize_size only. */ ++void ++ix86_expand_convert_uns_sisf_sse (rtx target, rtx input) ++{ ++ REAL_VALUE_TYPE ONE16r; ++ rtx fp_hi, fp_lo, int_hi, int_lo, x; ++ ++ real_ldexp (&ONE16r, &dconst1, 16); ++ x = const_double_from_real_value (ONE16r, SFmode); ++ int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff), ++ NULL, 0, OPTAB_DIRECT); ++ int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16), ++ NULL, 0, OPTAB_DIRECT); ++ fp_hi = gen_reg_rtx (SFmode); ++ fp_lo = gen_reg_rtx (SFmode); ++ emit_insn (gen_floatsisf2 (fp_hi, int_hi)); ++ emit_insn (gen_floatsisf2 (fp_lo, int_lo)); ++ fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi, ++ 0, OPTAB_DIRECT); ++ fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target, ++ 0, OPTAB_DIRECT); ++ if (!rtx_equal_p (target, fp_hi)) ++ emit_move_insn (target, fp_hi); ++} ++ ++/* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert ++ a vector of unsigned ints VAL to vector of floats TARGET. */ ++ ++void ++ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val) ++{ ++ rtx tmp[8]; ++ REAL_VALUE_TYPE TWO16r; ++ machine_mode intmode = GET_MODE (val); ++ machine_mode fltmode = GET_MODE (target); ++ rtx (*cvt) (rtx, rtx); ++ ++ if (intmode == V4SImode) ++ cvt = gen_floatv4siv4sf2; ++ else ++ cvt = gen_floatv8siv8sf2; ++ tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff)); ++ tmp[0] = force_reg (intmode, tmp[0]); ++ tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1, ++ OPTAB_DIRECT); ++ tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16), ++ NULL_RTX, 1, OPTAB_DIRECT); ++ tmp[3] = gen_reg_rtx (fltmode); ++ emit_insn (cvt (tmp[3], tmp[1])); ++ tmp[4] = gen_reg_rtx (fltmode); ++ emit_insn (cvt (tmp[4], tmp[2])); ++ real_ldexp (&TWO16r, &dconst1, 16); ++ tmp[5] = const_double_from_real_value (TWO16r, SFmode); ++ tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5])); ++ tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1, ++ OPTAB_DIRECT); ++ tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1, ++ OPTAB_DIRECT); ++ if (tmp[7] != target) ++ emit_move_insn (target, tmp[7]); ++} ++ ++/* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc* ++ pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*. ++ This is done by doing just signed conversion if < 0x1p31, and otherwise by ++ subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */ ++ ++rtx ++ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp) ++{ ++ REAL_VALUE_TYPE TWO31r; ++ rtx two31r, tmp[4]; ++ machine_mode mode = GET_MODE (val); ++ machine_mode scalarmode = GET_MODE_INNER (mode); ++ machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode; ++ rtx (*cmp) (rtx, rtx, rtx, rtx); ++ int i; ++ ++ for (i = 0; i < 3; i++) ++ tmp[i] = gen_reg_rtx (mode); ++ real_ldexp (&TWO31r, &dconst1, 31); ++ two31r = const_double_from_real_value (TWO31r, scalarmode); ++ two31r = ix86_build_const_vector (mode, 1, two31r); ++ two31r = force_reg (mode, two31r); ++ switch (mode) ++ { ++ case E_V8SFmode: cmp = gen_avx_maskcmpv8sf3; break; ++ case E_V4SFmode: cmp = gen_sse_maskcmpv4sf3; break; ++ case E_V4DFmode: cmp = gen_avx_maskcmpv4df3; break; ++ case E_V2DFmode: cmp = gen_sse2_maskcmpv2df3; break; ++ default: gcc_unreachable (); ++ } ++ tmp[3] = gen_rtx_LE (mode, two31r, val); ++ emit_insn (cmp (tmp[0], two31r, val, tmp[3])); ++ tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1], ++ 0, OPTAB_DIRECT); ++ if (intmode == V4SImode || TARGET_AVX2) ++ *xorp = expand_simple_binop (intmode, ASHIFT, ++ gen_lowpart (intmode, tmp[0]), ++ GEN_INT (31), NULL_RTX, 0, ++ OPTAB_DIRECT); ++ else ++ { ++ rtx two31 = GEN_INT (HOST_WIDE_INT_1U << 31); ++ two31 = ix86_build_const_vector (intmode, 1, two31); ++ *xorp = expand_simple_binop (intmode, AND, ++ gen_lowpart (intmode, tmp[0]), ++ two31, NULL_RTX, 0, ++ OPTAB_DIRECT); ++ } ++ return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2], ++ 0, OPTAB_DIRECT); ++} ++ ++/* Generate code for floating point ABS or NEG. */ ++ ++void ++ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode, ++ rtx operands[]) ++{ ++ rtx mask, set, dst, src; ++ bool use_sse = false; ++ bool vector_mode = VECTOR_MODE_P (mode); ++ machine_mode vmode = mode; ++ ++ if (vector_mode) ++ use_sse = true; ++ else if (mode == TFmode) ++ use_sse = true; ++ else if (TARGET_SSE_MATH) ++ { ++ use_sse = SSE_FLOAT_MODE_P (mode); ++ if (mode == SFmode) ++ vmode = V4SFmode; ++ else if (mode == DFmode) ++ vmode = V2DFmode; ++ } ++ ++ /* NEG and ABS performed with SSE use bitwise mask operations. ++ Create the appropriate mask now. */ ++ if (use_sse) ++ mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS); ++ else ++ mask = NULL_RTX; ++ ++ dst = operands[0]; ++ src = operands[1]; ++ ++ set = gen_rtx_fmt_e (code, mode, src); ++ set = gen_rtx_SET (dst, set); ++ ++ if (mask) ++ { ++ rtx use, clob; ++ rtvec par; ++ ++ use = gen_rtx_USE (VOIDmode, mask); ++ if (vector_mode) ++ par = gen_rtvec (2, set, use); ++ else ++ { ++ clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); ++ par = gen_rtvec (3, set, use, clob); ++ } ++ emit_insn (gen_rtx_PARALLEL (VOIDmode, par)); ++ } ++ else ++ emit_insn (set); ++} ++ ++/* Expand a copysign operation. Special case operand 0 being a constant. */ ++ ++void ++ix86_expand_copysign (rtx operands[]) ++{ ++ machine_mode mode, vmode; ++ rtx dest, op0, op1, mask, nmask; ++ ++ dest = operands[0]; ++ op0 = operands[1]; ++ op1 = operands[2]; ++ ++ mode = GET_MODE (dest); ++ ++ if (mode == SFmode) ++ vmode = V4SFmode; ++ else if (mode == DFmode) ++ vmode = V2DFmode; ++ else ++ vmode = mode; ++ ++ if (CONST_DOUBLE_P (op0)) ++ { ++ rtx (*copysign_insn)(rtx, rtx, rtx, rtx); ++ ++ if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0))) ++ op0 = simplify_unary_operation (ABS, mode, op0, mode); ++ ++ if (mode == SFmode || mode == DFmode) ++ { ++ if (op0 == CONST0_RTX (mode)) ++ op0 = CONST0_RTX (vmode); ++ else ++ { ++ rtx v = ix86_build_const_vector (vmode, false, op0); ++ ++ op0 = force_reg (vmode, v); ++ } ++ } ++ else if (op0 != CONST0_RTX (mode)) ++ op0 = force_reg (mode, op0); ++ ++ mask = ix86_build_signbit_mask (vmode, 0, 0); ++ ++ if (mode == SFmode) ++ copysign_insn = gen_copysignsf3_const; ++ else if (mode == DFmode) ++ copysign_insn = gen_copysigndf3_const; ++ else ++ copysign_insn = gen_copysigntf3_const; ++ ++ emit_insn (copysign_insn (dest, op0, op1, mask)); ++ } ++ else ++ { ++ rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx); ++ ++ nmask = ix86_build_signbit_mask (vmode, 0, 1); ++ mask = ix86_build_signbit_mask (vmode, 0, 0); ++ ++ if (mode == SFmode) ++ copysign_insn = gen_copysignsf3_var; ++ else if (mode == DFmode) ++ copysign_insn = gen_copysigndf3_var; ++ else ++ copysign_insn = gen_copysigntf3_var; ++ ++ emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask)); ++ } ++} ++ ++/* Deconstruct a copysign operation into bit masks. Operand 0 is known to ++ be a constant, and so has already been expanded into a vector constant. */ ++ ++void ++ix86_split_copysign_const (rtx operands[]) ++{ ++ machine_mode mode, vmode; ++ rtx dest, op0, mask, x; ++ ++ dest = operands[0]; ++ op0 = operands[1]; ++ mask = operands[3]; ++ ++ mode = GET_MODE (dest); ++ vmode = GET_MODE (mask); ++ ++ dest = lowpart_subreg (vmode, dest, mode); ++ x = gen_rtx_AND (vmode, dest, mask); ++ emit_insn (gen_rtx_SET (dest, x)); ++ ++ if (op0 != CONST0_RTX (vmode)) ++ { ++ x = gen_rtx_IOR (vmode, dest, op0); ++ emit_insn (gen_rtx_SET (dest, x)); ++ } ++} ++ ++/* Deconstruct a copysign operation into bit masks. Operand 0 is variable, ++ so we have to do two masks. */ ++ ++void ++ix86_split_copysign_var (rtx operands[]) ++{ ++ machine_mode mode, vmode; ++ rtx dest, scratch, op0, op1, mask, nmask, x; ++ ++ dest = operands[0]; ++ scratch = operands[1]; ++ op0 = operands[2]; ++ op1 = operands[3]; ++ nmask = operands[4]; ++ mask = operands[5]; ++ ++ mode = GET_MODE (dest); ++ vmode = GET_MODE (mask); ++ ++ if (rtx_equal_p (op0, op1)) ++ { ++ /* Shouldn't happen often (it's useless, obviously), but when it does ++ we'd generate incorrect code if we continue below. */ ++ emit_move_insn (dest, op0); ++ return; ++ } ++ ++ if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */ ++ { ++ gcc_assert (REGNO (op1) == REGNO (scratch)); ++ ++ x = gen_rtx_AND (vmode, scratch, mask); ++ emit_insn (gen_rtx_SET (scratch, x)); ++ ++ dest = mask; ++ op0 = lowpart_subreg (vmode, op0, mode); ++ x = gen_rtx_NOT (vmode, dest); ++ x = gen_rtx_AND (vmode, x, op0); ++ emit_insn (gen_rtx_SET (dest, x)); ++ } ++ else ++ { ++ if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */ ++ { ++ x = gen_rtx_AND (vmode, scratch, mask); ++ } ++ else /* alternative 2,4 */ ++ { ++ gcc_assert (REGNO (mask) == REGNO (scratch)); ++ op1 = lowpart_subreg (vmode, op1, mode); ++ x = gen_rtx_AND (vmode, scratch, op1); ++ } ++ emit_insn (gen_rtx_SET (scratch, x)); ++ ++ if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */ ++ { ++ dest = lowpart_subreg (vmode, op0, mode); ++ x = gen_rtx_AND (vmode, dest, nmask); ++ } ++ else /* alternative 3,4 */ ++ { ++ gcc_assert (REGNO (nmask) == REGNO (dest)); ++ dest = nmask; ++ op0 = lowpart_subreg (vmode, op0, mode); ++ x = gen_rtx_AND (vmode, dest, op0); ++ } ++ emit_insn (gen_rtx_SET (dest, x)); ++ } ++ ++ x = gen_rtx_IOR (vmode, dest, scratch); ++ emit_insn (gen_rtx_SET (dest, x)); ++} ++ ++/* Expand an xorsign operation. */ ++ ++void ++ix86_expand_xorsign (rtx operands[]) ++{ ++ rtx (*xorsign_insn)(rtx, rtx, rtx, rtx); ++ machine_mode mode, vmode; ++ rtx dest, op0, op1, mask; ++ ++ dest = operands[0]; ++ op0 = operands[1]; ++ op1 = operands[2]; ++ ++ mode = GET_MODE (dest); ++ ++ if (mode == SFmode) ++ { ++ xorsign_insn = gen_xorsignsf3_1; ++ vmode = V4SFmode; ++ } ++ else if (mode == DFmode) ++ { ++ xorsign_insn = gen_xorsigndf3_1; ++ vmode = V2DFmode; ++ } ++ else ++ gcc_unreachable (); ++ ++ mask = ix86_build_signbit_mask (vmode, 0, 0); ++ ++ emit_insn (xorsign_insn (dest, op0, op1, mask)); ++} ++ ++/* Deconstruct an xorsign operation into bit masks. */ ++ ++void ++ix86_split_xorsign (rtx operands[]) ++{ ++ machine_mode mode, vmode; ++ rtx dest, op0, mask, x; ++ ++ dest = operands[0]; ++ op0 = operands[1]; ++ mask = operands[3]; ++ ++ mode = GET_MODE (dest); ++ vmode = GET_MODE (mask); ++ ++ dest = lowpart_subreg (vmode, dest, mode); ++ x = gen_rtx_AND (vmode, dest, mask); ++ emit_insn (gen_rtx_SET (dest, x)); ++ ++ op0 = lowpart_subreg (vmode, op0, mode); ++ x = gen_rtx_XOR (vmode, dest, op0); ++ emit_insn (gen_rtx_SET (dest, x)); ++} ++ ++static rtx ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1); ++ ++void ++ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label) ++{ ++ machine_mode mode = GET_MODE (op0); ++ rtx tmp; ++ ++ /* Handle special case - vector comparsion with boolean result, transform ++ it using ptest instruction. */ ++ if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) ++ { ++ rtx flag = gen_rtx_REG (CCZmode, FLAGS_REG); ++ machine_mode p_mode = GET_MODE_SIZE (mode) == 32 ? V4DImode : V2DImode; ++ ++ gcc_assert (code == EQ || code == NE); ++ /* Generate XOR since we can't check that one operand is zero vector. */ ++ tmp = gen_reg_rtx (mode); ++ emit_insn (gen_rtx_SET (tmp, gen_rtx_XOR (mode, op0, op1))); ++ tmp = gen_lowpart (p_mode, tmp); ++ emit_insn (gen_rtx_SET (gen_rtx_REG (CCmode, FLAGS_REG), ++ gen_rtx_UNSPEC (CCmode, ++ gen_rtvec (2, tmp, tmp), ++ UNSPEC_PTEST))); ++ tmp = gen_rtx_fmt_ee (code, VOIDmode, flag, const0_rtx); ++ tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, ++ gen_rtx_LABEL_REF (VOIDmode, label), ++ pc_rtx); ++ emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); ++ return; ++ } ++ ++ switch (mode) ++ { ++ case E_SFmode: ++ case E_DFmode: ++ case E_XFmode: ++ case E_QImode: ++ case E_HImode: ++ case E_SImode: ++ simple: ++ tmp = ix86_expand_compare (code, op0, op1); ++ tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, ++ gen_rtx_LABEL_REF (VOIDmode, label), ++ pc_rtx); ++ emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); ++ return; ++ ++ case E_DImode: ++ if (TARGET_64BIT) ++ goto simple; ++ /* For 32-bit target DI comparison may be performed on ++ SSE registers. To allow this we should avoid split ++ to SI mode which is achieved by doing xor in DI mode ++ and then comparing with zero (which is recognized by ++ STV pass). We don't compare using xor when optimizing ++ for size. */ ++ if (!optimize_insn_for_size_p () ++ && TARGET_STV ++ && (code == EQ || code == NE)) ++ { ++ op0 = force_reg (mode, gen_rtx_XOR (mode, op0, op1)); ++ op1 = const0_rtx; ++ } ++ /* FALLTHRU */ ++ case E_TImode: ++ /* Expand DImode branch into multiple compare+branch. */ ++ { ++ rtx lo[2], hi[2]; ++ rtx_code_label *label2; ++ enum rtx_code code1, code2, code3; ++ machine_mode submode; ++ ++ if (CONSTANT_P (op0) && !CONSTANT_P (op1)) ++ { ++ std::swap (op0, op1); ++ code = swap_condition (code); ++ } ++ ++ split_double_mode (mode, &op0, 1, lo+0, hi+0); ++ split_double_mode (mode, &op1, 1, lo+1, hi+1); ++ ++ submode = mode == DImode ? SImode : DImode; ++ ++ /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to ++ avoid two branches. This costs one extra insn, so disable when ++ optimizing for size. */ ++ ++ if ((code == EQ || code == NE) ++ && (!optimize_insn_for_size_p () ++ || hi[1] == const0_rtx || lo[1] == const0_rtx)) ++ { ++ rtx xor0, xor1; ++ ++ xor1 = hi[0]; ++ if (hi[1] != const0_rtx) ++ xor1 = expand_binop (submode, xor_optab, xor1, hi[1], ++ NULL_RTX, 0, OPTAB_WIDEN); ++ ++ xor0 = lo[0]; ++ if (lo[1] != const0_rtx) ++ xor0 = expand_binop (submode, xor_optab, xor0, lo[1], ++ NULL_RTX, 0, OPTAB_WIDEN); ++ ++ tmp = expand_binop (submode, ior_optab, xor1, xor0, ++ NULL_RTX, 0, OPTAB_WIDEN); ++ ++ ix86_expand_branch (code, tmp, const0_rtx, label); ++ return; ++ } ++ ++ /* Otherwise, if we are doing less-than or greater-or-equal-than, ++ op1 is a constant and the low word is zero, then we can just ++ examine the high word. Similarly for low word -1 and ++ less-or-equal-than or greater-than. */ ++ ++ if (CONST_INT_P (hi[1])) ++ switch (code) ++ { ++ case LT: case LTU: case GE: case GEU: ++ if (lo[1] == const0_rtx) ++ { ++ ix86_expand_branch (code, hi[0], hi[1], label); ++ return; ++ } ++ break; ++ case LE: case LEU: case GT: case GTU: ++ if (lo[1] == constm1_rtx) ++ { ++ ix86_expand_branch (code, hi[0], hi[1], label); ++ return; ++ } ++ break; ++ default: ++ break; ++ } ++ ++ /* Emulate comparisons that do not depend on Zero flag with ++ double-word subtraction. Note that only Overflow, Sign ++ and Carry flags are valid, so swap arguments and condition ++ of comparisons that would otherwise test Zero flag. */ ++ ++ switch (code) ++ { ++ case LE: case LEU: case GT: case GTU: ++ std::swap (lo[0], lo[1]); ++ std::swap (hi[0], hi[1]); ++ code = swap_condition (code); ++ /* FALLTHRU */ ++ ++ case LT: case LTU: case GE: case GEU: ++ { ++ rtx (*cmp_insn) (rtx, rtx); ++ rtx (*sbb_insn) (rtx, rtx, rtx); ++ bool uns = (code == LTU || code == GEU); ++ ++ if (TARGET_64BIT) ++ { ++ cmp_insn = gen_cmpdi_1; ++ sbb_insn ++ = uns ? gen_subdi3_carry_ccc : gen_subdi3_carry_ccgz; ++ } ++ else ++ { ++ cmp_insn = gen_cmpsi_1; ++ sbb_insn ++ = uns ? gen_subsi3_carry_ccc : gen_subsi3_carry_ccgz; ++ } ++ ++ if (!nonimmediate_operand (lo[0], submode)) ++ lo[0] = force_reg (submode, lo[0]); ++ if (!x86_64_general_operand (lo[1], submode)) ++ lo[1] = force_reg (submode, lo[1]); ++ ++ if (!register_operand (hi[0], submode)) ++ hi[0] = force_reg (submode, hi[0]); ++ if ((uns && !nonimmediate_operand (hi[1], submode)) ++ || (!uns && !x86_64_general_operand (hi[1], submode))) ++ hi[1] = force_reg (submode, hi[1]); ++ ++ emit_insn (cmp_insn (lo[0], lo[1])); ++ emit_insn (sbb_insn (gen_rtx_SCRATCH (submode), hi[0], hi[1])); ++ ++ tmp = gen_rtx_REG (uns ? CCCmode : CCGZmode, FLAGS_REG); ++ ++ ix86_expand_branch (code, tmp, const0_rtx, label); ++ return; ++ } ++ ++ default: ++ break; ++ } ++ ++ /* Otherwise, we need two or three jumps. */ ++ ++ label2 = gen_label_rtx (); ++ ++ code1 = code; ++ code2 = swap_condition (code); ++ code3 = unsigned_condition (code); ++ ++ switch (code) ++ { ++ case LT: case GT: case LTU: case GTU: ++ break; ++ ++ case LE: code1 = LT; code2 = GT; break; ++ case GE: code1 = GT; code2 = LT; break; ++ case LEU: code1 = LTU; code2 = GTU; break; ++ case GEU: code1 = GTU; code2 = LTU; break; ++ ++ case EQ: code1 = UNKNOWN; code2 = NE; break; ++ case NE: code2 = UNKNOWN; break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ /* ++ * a < b => ++ * if (hi(a) < hi(b)) goto true; ++ * if (hi(a) > hi(b)) goto false; ++ * if (lo(a) < lo(b)) goto true; ++ * false: ++ */ ++ ++ if (code1 != UNKNOWN) ++ ix86_expand_branch (code1, hi[0], hi[1], label); ++ if (code2 != UNKNOWN) ++ ix86_expand_branch (code2, hi[0], hi[1], label2); ++ ++ ix86_expand_branch (code3, lo[0], lo[1], label); ++ ++ if (code2 != UNKNOWN) ++ emit_label (label2); ++ return; ++ } ++ ++ default: ++ gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC); ++ goto simple; ++ } ++} ++ ++/* Figure out whether to use unordered fp comparisons. */ ++ ++static bool ++ix86_unordered_fp_compare (enum rtx_code code) ++{ ++ if (!TARGET_IEEE_FP) ++ return false; ++ ++ switch (code) ++ { ++ case GT: ++ case GE: ++ case LT: ++ case LE: ++ return false; ++ ++ case EQ: ++ case NE: ++ ++ case LTGT: ++ case UNORDERED: ++ case ORDERED: ++ case UNLT: ++ case UNLE: ++ case UNGT: ++ case UNGE: ++ case UNEQ: ++ return true; ++ ++ default: ++ gcc_unreachable (); ++ } ++} ++ ++/* Return a comparison we can do and that it is equivalent to ++ swap_condition (code) apart possibly from orderedness. ++ But, never change orderedness if TARGET_IEEE_FP, returning ++ UNKNOWN in that case if necessary. */ ++ ++static enum rtx_code ++ix86_fp_swap_condition (enum rtx_code code) ++{ ++ switch (code) ++ { ++ case GT: /* GTU - CF=0 & ZF=0 */ ++ return TARGET_IEEE_FP ? UNKNOWN : UNLT; ++ case GE: /* GEU - CF=0 */ ++ return TARGET_IEEE_FP ? UNKNOWN : UNLE; ++ case UNLT: /* LTU - CF=1 */ ++ return TARGET_IEEE_FP ? UNKNOWN : GT; ++ case UNLE: /* LEU - CF=1 | ZF=1 */ ++ return TARGET_IEEE_FP ? UNKNOWN : GE; ++ default: ++ return swap_condition (code); ++ } ++} ++ ++/* Return cost of comparison CODE using the best strategy for performance. ++ All following functions do use number of instructions as a cost metrics. ++ In future this should be tweaked to compute bytes for optimize_size and ++ take into account performance of various instructions on various CPUs. */ ++ ++static int ++ix86_fp_comparison_cost (enum rtx_code code) ++{ ++ int arith_cost; ++ ++ /* The cost of code using bit-twiddling on %ah. */ ++ switch (code) ++ { ++ case UNLE: ++ case UNLT: ++ case LTGT: ++ case GT: ++ case GE: ++ case UNORDERED: ++ case ORDERED: ++ case UNEQ: ++ arith_cost = 4; ++ break; ++ case LT: ++ case NE: ++ case EQ: ++ case UNGE: ++ arith_cost = TARGET_IEEE_FP ? 5 : 4; ++ break; ++ case LE: ++ case UNGT: ++ arith_cost = TARGET_IEEE_FP ? 6 : 4; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ ++ switch (ix86_fp_comparison_strategy (code)) ++ { ++ case IX86_FPCMP_COMI: ++ return arith_cost > 4 ? 3 : 2; ++ case IX86_FPCMP_SAHF: ++ return arith_cost > 4 ? 4 : 3; ++ default: ++ return arith_cost; ++ } ++} ++ ++/* Swap, force into registers, or otherwise massage the two operands ++ to a fp comparison. The operands are updated in place; the new ++ comparison code is returned. */ ++ ++static enum rtx_code ++ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1) ++{ ++ bool unordered_compare = ix86_unordered_fp_compare (code); ++ rtx op0 = *pop0, op1 = *pop1; ++ machine_mode op_mode = GET_MODE (op0); ++ bool is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode); ++ ++ /* All of the unordered compare instructions only work on registers. ++ The same is true of the fcomi compare instructions. The XFmode ++ compare instructions require registers except when comparing ++ against zero or when converting operand 1 from fixed point to ++ floating point. */ ++ ++ if (!is_sse ++ && (unordered_compare ++ || (op_mode == XFmode ++ && ! (standard_80387_constant_p (op0) == 1 ++ || standard_80387_constant_p (op1) == 1) ++ && GET_CODE (op1) != FLOAT) ++ || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI)) ++ { ++ op0 = force_reg (op_mode, op0); ++ op1 = force_reg (op_mode, op1); ++ } ++ else ++ { ++ /* %%% We only allow op1 in memory; op0 must be st(0). So swap ++ things around if they appear profitable, otherwise force op0 ++ into a register. */ ++ ++ if (standard_80387_constant_p (op0) == 0 ++ || (MEM_P (op0) ++ && ! (standard_80387_constant_p (op1) == 0 ++ || MEM_P (op1)))) ++ { ++ enum rtx_code new_code = ix86_fp_swap_condition (code); ++ if (new_code != UNKNOWN) ++ { ++ std::swap (op0, op1); ++ code = new_code; ++ } ++ } ++ ++ if (!REG_P (op0)) ++ op0 = force_reg (op_mode, op0); ++ ++ if (CONSTANT_P (op1)) ++ { ++ int tmp = standard_80387_constant_p (op1); ++ if (tmp == 0) ++ op1 = validize_mem (force_const_mem (op_mode, op1)); ++ else if (tmp == 1) ++ { ++ if (TARGET_CMOVE) ++ op1 = force_reg (op_mode, op1); ++ } ++ else ++ op1 = force_reg (op_mode, op1); ++ } ++ } ++ ++ /* Try to rearrange the comparison to make it cheaper. */ ++ if (ix86_fp_comparison_cost (code) ++ > ix86_fp_comparison_cost (swap_condition (code)) ++ && (REG_P (op1) || can_create_pseudo_p ())) ++ { ++ std::swap (op0, op1); ++ code = swap_condition (code); ++ if (!REG_P (op0)) ++ op0 = force_reg (op_mode, op0); ++ } ++ ++ *pop0 = op0; ++ *pop1 = op1; ++ return code; ++} ++ ++/* Generate insn patterns to do a floating point compare of OPERANDS. */ ++ ++static rtx ++ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1) ++{ ++ bool unordered_compare = ix86_unordered_fp_compare (code); ++ machine_mode cmp_mode; ++ rtx tmp, scratch; ++ ++ code = ix86_prepare_fp_compare_args (code, &op0, &op1); ++ ++ tmp = gen_rtx_COMPARE (CCFPmode, op0, op1); ++ if (unordered_compare) ++ tmp = gen_rtx_UNSPEC (CCFPmode, gen_rtvec (1, tmp), UNSPEC_NOTRAP); ++ ++ /* Do fcomi/sahf based test when profitable. */ ++ switch (ix86_fp_comparison_strategy (code)) ++ { ++ case IX86_FPCMP_COMI: ++ cmp_mode = CCFPmode; ++ emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode, FLAGS_REG), tmp)); ++ break; ++ ++ case IX86_FPCMP_SAHF: ++ cmp_mode = CCFPmode; ++ tmp = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW); ++ scratch = gen_reg_rtx (HImode); ++ emit_insn (gen_rtx_SET (scratch, tmp)); ++ emit_insn (gen_x86_sahf_1 (scratch)); ++ break; ++ ++ case IX86_FPCMP_ARITH: ++ cmp_mode = CCNOmode; ++ tmp = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW); ++ scratch = gen_reg_rtx (HImode); ++ emit_insn (gen_rtx_SET (scratch, tmp)); ++ ++ /* In the unordered case, we have to check C2 for NaN's, which ++ doesn't happen to work out to anything nice combination-wise. ++ So do some bit twiddling on the value we've got in AH to come ++ up with an appropriate set of condition codes. */ ++ ++ switch (code) ++ { ++ case GT: ++ case UNGT: ++ if (code == GT || !TARGET_IEEE_FP) ++ { ++ emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x45))); ++ code = EQ; ++ } ++ else ++ { ++ emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45))); ++ emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx)); ++ emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44))); ++ cmp_mode = CCmode; ++ code = GEU; ++ } ++ break; ++ case LT: ++ case UNLT: ++ if (code == LT && TARGET_IEEE_FP) ++ { ++ emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45))); ++ emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx)); ++ cmp_mode = CCmode; ++ code = EQ; ++ } ++ else ++ { ++ emit_insn (gen_testqi_ext_1_ccno (scratch, const1_rtx)); ++ code = NE; ++ } ++ break; ++ case GE: ++ case UNGE: ++ if (code == GE || !TARGET_IEEE_FP) ++ { ++ emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x05))); ++ code = EQ; ++ } ++ else ++ { ++ emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45))); ++ emit_insn (gen_xorqi_ext_1_cc (scratch, scratch, const1_rtx)); ++ code = NE; ++ } ++ break; ++ case LE: ++ case UNLE: ++ if (code == LE && TARGET_IEEE_FP) ++ { ++ emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45))); ++ emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx)); ++ emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40))); ++ cmp_mode = CCmode; ++ code = LTU; ++ } ++ else ++ { ++ emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x45))); ++ code = NE; ++ } ++ break; ++ case EQ: ++ case UNEQ: ++ if (code == EQ && TARGET_IEEE_FP) ++ { ++ emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45))); ++ emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40))); ++ cmp_mode = CCmode; ++ code = EQ; ++ } ++ else ++ { ++ emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x40))); ++ code = NE; ++ } ++ break; ++ case NE: ++ case LTGT: ++ if (code == NE && TARGET_IEEE_FP) ++ { ++ emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45))); ++ emit_insn (gen_xorqi_ext_1_cc (scratch, scratch, ++ GEN_INT (0x40))); ++ code = NE; ++ } ++ else ++ { ++ emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x40))); ++ code = EQ; ++ } ++ break; ++ ++ case UNORDERED: ++ emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x04))); ++ code = NE; ++ break; ++ case ORDERED: ++ emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x04))); ++ code = EQ; ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ break; ++ ++ default: ++ gcc_unreachable(); ++ } ++ ++ /* Return the test that should be put into the flags user, i.e. ++ the bcc, scc, or cmov instruction. */ ++ return gen_rtx_fmt_ee (code, VOIDmode, ++ gen_rtx_REG (cmp_mode, FLAGS_REG), ++ const0_rtx); ++} ++ ++/* Generate insn patterns to do an integer compare of OPERANDS. */ ++ ++static rtx ++ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1) ++{ ++ machine_mode cmpmode; ++ rtx tmp, flags; ++ ++ cmpmode = SELECT_CC_MODE (code, op0, op1); ++ flags = gen_rtx_REG (cmpmode, FLAGS_REG); ++ ++ /* This is very simple, but making the interface the same as in the ++ FP case makes the rest of the code easier. */ ++ tmp = gen_rtx_COMPARE (cmpmode, op0, op1); ++ emit_insn (gen_rtx_SET (flags, tmp)); ++ ++ /* Return the test that should be put into the flags user, i.e. ++ the bcc, scc, or cmov instruction. */ ++ return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx); ++} ++ ++static rtx ++ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1) ++{ ++ rtx ret; ++ ++ if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC) ++ ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1); ++ ++ else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0))) ++ { ++ gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0))); ++ ret = ix86_expand_fp_compare (code, op0, op1); ++ } ++ else ++ ret = ix86_expand_int_compare (code, op0, op1); ++ ++ return ret; ++} ++ ++void ++ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1) ++{ ++ rtx ret; ++ ++ gcc_assert (GET_MODE (dest) == QImode); ++ ++ ret = ix86_expand_compare (code, op0, op1); ++ PUT_MODE (ret, QImode); ++ emit_insn (gen_rtx_SET (dest, ret)); ++} ++ ++/* Expand comparison setting or clearing carry flag. Return true when ++ successful and set pop for the operation. */ ++static bool ++ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop) ++{ ++ machine_mode mode ++ = GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1); ++ ++ /* Do not handle double-mode compares that go through special path. */ ++ if (mode == (TARGET_64BIT ? TImode : DImode)) ++ return false; ++ ++ if (SCALAR_FLOAT_MODE_P (mode)) ++ { ++ rtx compare_op; ++ rtx_insn *compare_seq; ++ ++ gcc_assert (!DECIMAL_FLOAT_MODE_P (mode)); ++ ++ /* Shortcut: following common codes never translate ++ into carry flag compares. */ ++ if (code == EQ || code == NE || code == UNEQ || code == LTGT ++ || code == ORDERED || code == UNORDERED) ++ return false; ++ ++ /* These comparisons require zero flag; swap operands so they won't. */ ++ if ((code == GT || code == UNLE || code == LE || code == UNGT) ++ && !TARGET_IEEE_FP) ++ { ++ std::swap (op0, op1); ++ code = swap_condition (code); ++ } ++ ++ /* Try to expand the comparison and verify that we end up with ++ carry flag based comparison. This fails to be true only when ++ we decide to expand comparison using arithmetic that is not ++ too common scenario. */ ++ start_sequence (); ++ compare_op = ix86_expand_fp_compare (code, op0, op1); ++ compare_seq = get_insns (); ++ end_sequence (); ++ ++ if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode) ++ code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op)); ++ else ++ code = GET_CODE (compare_op); ++ ++ if (code != LTU && code != GEU) ++ return false; ++ ++ emit_insn (compare_seq); ++ *pop = compare_op; ++ return true; ++ } ++ ++ if (!INTEGRAL_MODE_P (mode)) ++ return false; ++ ++ switch (code) ++ { ++ case LTU: ++ case GEU: ++ break; ++ ++ /* Convert a==0 into (unsigned)a<1. */ ++ case EQ: ++ case NE: ++ if (op1 != const0_rtx) ++ return false; ++ op1 = const1_rtx; ++ code = (code == EQ ? LTU : GEU); ++ break; ++ ++ /* Convert a>b into b=b-1. */ ++ case GTU: ++ case LEU: ++ if (CONST_INT_P (op1)) ++ { ++ op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0)); ++ /* Bail out on overflow. We still can swap operands but that ++ would force loading of the constant into register. */ ++ if (op1 == const0_rtx ++ || !x86_64_immediate_operand (op1, GET_MODE (op1))) ++ return false; ++ code = (code == GTU ? GEU : LTU); ++ } ++ else ++ { ++ std::swap (op0, op1); ++ code = (code == GTU ? LTU : GEU); ++ } ++ break; ++ ++ /* Convert a>=0 into (unsigned)a<0x80000000. */ ++ case LT: ++ case GE: ++ if (mode == DImode || op1 != const0_rtx) ++ return false; ++ op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode); ++ code = (code == LT ? GEU : LTU); ++ break; ++ case LE: ++ case GT: ++ if (mode == DImode || op1 != constm1_rtx) ++ return false; ++ op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode); ++ code = (code == LE ? GEU : LTU); ++ break; ++ ++ default: ++ return false; ++ } ++ /* Swapping operands may cause constant to appear as first operand. */ ++ if (!nonimmediate_operand (op0, VOIDmode)) ++ { ++ if (!can_create_pseudo_p ()) ++ return false; ++ op0 = force_reg (mode, op0); ++ } ++ *pop = ix86_expand_compare (code, op0, op1); ++ gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU); ++ return true; ++} ++ ++/* Expand conditional increment or decrement using adb/sbb instructions. ++ The default case using setcc followed by the conditional move can be ++ done by generic code. */ ++bool ++ix86_expand_int_addcc (rtx operands[]) ++{ ++ enum rtx_code code = GET_CODE (operands[1]); ++ rtx flags; ++ rtx (*insn)(rtx, rtx, rtx, rtx, rtx); ++ rtx compare_op; ++ rtx val = const0_rtx; ++ bool fpcmp = false; ++ machine_mode mode; ++ rtx op0 = XEXP (operands[1], 0); ++ rtx op1 = XEXP (operands[1], 1); ++ ++ if (operands[3] != const1_rtx ++ && operands[3] != constm1_rtx) ++ return false; ++ if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op)) ++ return false; ++ code = GET_CODE (compare_op); ++ ++ flags = XEXP (compare_op, 0); ++ ++ if (GET_MODE (flags) == CCFPmode) ++ { ++ fpcmp = true; ++ code = ix86_fp_compare_code_to_integer (code); ++ } ++ ++ if (code != LTU) ++ { ++ val = constm1_rtx; ++ if (fpcmp) ++ PUT_CODE (compare_op, ++ reverse_condition_maybe_unordered ++ (GET_CODE (compare_op))); ++ else ++ PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op))); ++ } ++ ++ mode = GET_MODE (operands[0]); ++ ++ /* Construct either adc or sbb insn. */ ++ if ((code == LTU) == (operands[3] == constm1_rtx)) ++ { ++ switch (mode) ++ { ++ case E_QImode: ++ insn = gen_subqi3_carry; ++ break; ++ case E_HImode: ++ insn = gen_subhi3_carry; ++ break; ++ case E_SImode: ++ insn = gen_subsi3_carry; ++ break; ++ case E_DImode: ++ insn = gen_subdi3_carry; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ } ++ else ++ { ++ switch (mode) ++ { ++ case E_QImode: ++ insn = gen_addqi3_carry; ++ break; ++ case E_HImode: ++ insn = gen_addhi3_carry; ++ break; ++ case E_SImode: ++ insn = gen_addsi3_carry; ++ break; ++ case E_DImode: ++ insn = gen_adddi3_carry; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ } ++ emit_insn (insn (operands[0], operands[2], val, flags, compare_op)); ++ ++ return true; ++} ++ ++bool ++ix86_expand_int_movcc (rtx operands[]) ++{ ++ enum rtx_code code = GET_CODE (operands[1]), compare_code; ++ rtx_insn *compare_seq; ++ rtx compare_op; ++ machine_mode mode = GET_MODE (operands[0]); ++ bool sign_bit_compare_p = false; ++ rtx op0 = XEXP (operands[1], 0); ++ rtx op1 = XEXP (operands[1], 1); ++ ++ if (GET_MODE (op0) == TImode ++ || (GET_MODE (op0) == DImode ++ && !TARGET_64BIT)) ++ return false; ++ ++ start_sequence (); ++ compare_op = ix86_expand_compare (code, op0, op1); ++ compare_seq = get_insns (); ++ end_sequence (); ++ ++ compare_code = GET_CODE (compare_op); ++ ++ if ((op1 == const0_rtx && (code == GE || code == LT)) ++ || (op1 == constm1_rtx && (code == GT || code == LE))) ++ sign_bit_compare_p = true; ++ ++ /* Don't attempt mode expansion here -- if we had to expand 5 or 6 ++ HImode insns, we'd be swallowed in word prefix ops. */ ++ ++ if ((mode != HImode || TARGET_FAST_PREFIX) ++ && (mode != (TARGET_64BIT ? TImode : DImode)) ++ && CONST_INT_P (operands[2]) ++ && CONST_INT_P (operands[3])) ++ { ++ rtx out = operands[0]; ++ HOST_WIDE_INT ct = INTVAL (operands[2]); ++ HOST_WIDE_INT cf = INTVAL (operands[3]); ++ HOST_WIDE_INT diff; ++ ++ diff = ct - cf; ++ /* Sign bit compares are better done using shifts than we do by using ++ sbb. */ ++ if (sign_bit_compare_p ++ || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op)) ++ { ++ /* Detect overlap between destination and compare sources. */ ++ rtx tmp = out; ++ ++ if (!sign_bit_compare_p) ++ { ++ rtx flags; ++ bool fpcmp = false; ++ ++ compare_code = GET_CODE (compare_op); ++ ++ flags = XEXP (compare_op, 0); ++ ++ if (GET_MODE (flags) == CCFPmode) ++ { ++ fpcmp = true; ++ compare_code ++ = ix86_fp_compare_code_to_integer (compare_code); ++ } ++ ++ /* To simplify rest of code, restrict to the GEU case. */ ++ if (compare_code == LTU) ++ { ++ std::swap (ct, cf); ++ compare_code = reverse_condition (compare_code); ++ code = reverse_condition (code); ++ } ++ else ++ { ++ if (fpcmp) ++ PUT_CODE (compare_op, ++ reverse_condition_maybe_unordered ++ (GET_CODE (compare_op))); ++ else ++ PUT_CODE (compare_op, ++ reverse_condition (GET_CODE (compare_op))); ++ } ++ diff = ct - cf; ++ ++ if (reg_overlap_mentioned_p (out, op0) ++ || reg_overlap_mentioned_p (out, op1)) ++ tmp = gen_reg_rtx (mode); ++ ++ if (mode == DImode) ++ emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op)); ++ else ++ emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), ++ flags, compare_op)); ++ } ++ else ++ { ++ if (code == GT || code == GE) ++ code = reverse_condition (code); ++ else ++ { ++ std::swap (ct, cf); ++ diff = ct - cf; ++ } ++ tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1); ++ } ++ ++ if (diff == 1) ++ { ++ /* ++ * cmpl op0,op1 ++ * sbbl dest,dest ++ * [addl dest, ct] ++ * ++ * Size 5 - 8. ++ */ ++ if (ct) ++ tmp = expand_simple_binop (mode, PLUS, ++ tmp, GEN_INT (ct), ++ copy_rtx (tmp), 1, OPTAB_DIRECT); ++ } ++ else if (cf == -1) ++ { ++ /* ++ * cmpl op0,op1 ++ * sbbl dest,dest ++ * orl $ct, dest ++ * ++ * Size 8. ++ */ ++ tmp = expand_simple_binop (mode, IOR, ++ tmp, GEN_INT (ct), ++ copy_rtx (tmp), 1, OPTAB_DIRECT); ++ } ++ else if (diff == -1 && ct) ++ { ++ /* ++ * cmpl op0,op1 ++ * sbbl dest,dest ++ * notl dest ++ * [addl dest, cf] ++ * ++ * Size 8 - 11. ++ */ ++ tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1); ++ if (cf) ++ tmp = expand_simple_binop (mode, PLUS, ++ copy_rtx (tmp), GEN_INT (cf), ++ copy_rtx (tmp), 1, OPTAB_DIRECT); ++ } ++ else ++ { ++ /* ++ * cmpl op0,op1 ++ * sbbl dest,dest ++ * [notl dest] ++ * andl cf - ct, dest ++ * [addl dest, ct] ++ * ++ * Size 8 - 11. ++ */ ++ ++ if (cf == 0) ++ { ++ cf = ct; ++ ct = 0; ++ tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1); ++ } ++ ++ tmp = expand_simple_binop (mode, AND, ++ copy_rtx (tmp), ++ gen_int_mode (cf - ct, mode), ++ copy_rtx (tmp), 1, OPTAB_DIRECT); ++ if (ct) ++ tmp = expand_simple_binop (mode, PLUS, ++ copy_rtx (tmp), GEN_INT (ct), ++ copy_rtx (tmp), 1, OPTAB_DIRECT); ++ } ++ ++ if (!rtx_equal_p (tmp, out)) ++ emit_move_insn (copy_rtx (out), copy_rtx (tmp)); ++ ++ return true; ++ } ++ ++ if (diff < 0) ++ { ++ machine_mode cmp_mode = GET_MODE (op0); ++ enum rtx_code new_code; ++ ++ if (SCALAR_FLOAT_MODE_P (cmp_mode)) ++ { ++ gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode)); ++ ++ /* We may be reversing unordered compare to normal compare, that ++ is not valid in general (we may convert non-trapping condition ++ to trapping one), however on i386 we currently emit all ++ comparisons unordered. */ ++ new_code = reverse_condition_maybe_unordered (code); ++ } ++ else ++ new_code = ix86_reverse_condition (code, cmp_mode); ++ if (new_code != UNKNOWN) ++ { ++ std::swap (ct, cf); ++ diff = -diff; ++ code = new_code; ++ } ++ } ++ ++ compare_code = UNKNOWN; ++ if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT ++ && CONST_INT_P (op1)) ++ { ++ if (op1 == const0_rtx ++ && (code == LT || code == GE)) ++ compare_code = code; ++ else if (op1 == constm1_rtx) ++ { ++ if (code == LE) ++ compare_code = LT; ++ else if (code == GT) ++ compare_code = GE; ++ } ++ } ++ ++ /* Optimize dest = (op0 < 0) ? -1 : cf. */ ++ if (compare_code != UNKNOWN ++ && GET_MODE (op0) == GET_MODE (out) ++ && (cf == -1 || ct == -1)) ++ { ++ /* If lea code below could be used, only optimize ++ if it results in a 2 insn sequence. */ ++ ++ if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8 ++ || diff == 3 || diff == 5 || diff == 9) ++ || (compare_code == LT && ct == -1) ++ || (compare_code == GE && cf == -1)) ++ { ++ /* ++ * notl op1 (if necessary) ++ * sarl $31, op1 ++ * orl cf, op1 ++ */ ++ if (ct != -1) ++ { ++ cf = ct; ++ ct = -1; ++ code = reverse_condition (code); ++ } ++ ++ out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1); ++ ++ out = expand_simple_binop (mode, IOR, ++ out, GEN_INT (cf), ++ out, 1, OPTAB_DIRECT); ++ if (out != operands[0]) ++ emit_move_insn (operands[0], out); ++ ++ return true; ++ } ++ } ++ ++ ++ if ((diff == 1 || diff == 2 || diff == 4 || diff == 8 ++ || diff == 3 || diff == 5 || diff == 9) ++ && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL) ++ && (mode != DImode ++ || x86_64_immediate_operand (GEN_INT (cf), VOIDmode))) ++ { ++ /* ++ * xorl dest,dest ++ * cmpl op1,op2 ++ * setcc dest ++ * lea cf(dest*(ct-cf)),dest ++ * ++ * Size 14. ++ * ++ * This also catches the degenerate setcc-only case. ++ */ ++ ++ rtx tmp; ++ int nops; ++ ++ out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1); ++ ++ nops = 0; ++ /* On x86_64 the lea instruction operates on Pmode, so we need ++ to get arithmetics done in proper mode to match. */ ++ if (diff == 1) ++ tmp = copy_rtx (out); ++ else ++ { ++ rtx out1; ++ out1 = copy_rtx (out); ++ tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1)); ++ nops++; ++ if (diff & 1) ++ { ++ tmp = gen_rtx_PLUS (mode, tmp, out1); ++ nops++; ++ } ++ } ++ if (cf != 0) ++ { ++ tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf)); ++ nops++; ++ } ++ if (!rtx_equal_p (tmp, out)) ++ { ++ if (nops == 1) ++ out = force_operand (tmp, copy_rtx (out)); ++ else ++ emit_insn (gen_rtx_SET (copy_rtx (out), copy_rtx (tmp))); ++ } ++ if (!rtx_equal_p (out, operands[0])) ++ emit_move_insn (operands[0], copy_rtx (out)); ++ ++ return true; ++ } ++ ++ /* ++ * General case: Jumpful: ++ * xorl dest,dest cmpl op1, op2 ++ * cmpl op1, op2 movl ct, dest ++ * setcc dest jcc 1f ++ * decl dest movl cf, dest ++ * andl (cf-ct),dest 1: ++ * addl ct,dest ++ * ++ * Size 20. Size 14. ++ * ++ * This is reasonably steep, but branch mispredict costs are ++ * high on modern cpus, so consider failing only if optimizing ++ * for space. ++ */ ++ ++ if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL)) ++ && BRANCH_COST (optimize_insn_for_speed_p (), ++ false) >= 2) ++ { ++ if (cf == 0) ++ { ++ machine_mode cmp_mode = GET_MODE (op0); ++ enum rtx_code new_code; ++ ++ if (SCALAR_FLOAT_MODE_P (cmp_mode)) ++ { ++ gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode)); ++ ++ /* We may be reversing unordered compare to normal compare, ++ that is not valid in general (we may convert non-trapping ++ condition to trapping one), however on i386 we currently ++ emit all comparisons unordered. */ ++ new_code = reverse_condition_maybe_unordered (code); ++ } ++ else ++ { ++ new_code = ix86_reverse_condition (code, cmp_mode); ++ if (compare_code != UNKNOWN && new_code != UNKNOWN) ++ compare_code = reverse_condition (compare_code); ++ } ++ ++ if (new_code != UNKNOWN) ++ { ++ cf = ct; ++ ct = 0; ++ code = new_code; ++ } ++ } ++ ++ if (compare_code != UNKNOWN) ++ { ++ /* notl op1 (if needed) ++ sarl $31, op1 ++ andl (cf-ct), op1 ++ addl ct, op1 ++ ++ For x < 0 (resp. x <= -1) there will be no notl, ++ so if possible swap the constants to get rid of the ++ complement. ++ True/false will be -1/0 while code below (store flag ++ followed by decrement) is 0/-1, so the constants need ++ to be exchanged once more. */ ++ ++ if (compare_code == GE || !cf) ++ { ++ code = reverse_condition (code); ++ compare_code = LT; ++ } ++ else ++ std::swap (ct, cf); ++ ++ out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1); ++ } ++ else ++ { ++ out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1); ++ ++ out = expand_simple_binop (mode, PLUS, copy_rtx (out), ++ constm1_rtx, ++ copy_rtx (out), 1, OPTAB_DIRECT); ++ } ++ ++ out = expand_simple_binop (mode, AND, copy_rtx (out), ++ gen_int_mode (cf - ct, mode), ++ copy_rtx (out), 1, OPTAB_DIRECT); ++ if (ct) ++ out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct), ++ copy_rtx (out), 1, OPTAB_DIRECT); ++ if (!rtx_equal_p (out, operands[0])) ++ emit_move_insn (operands[0], copy_rtx (out)); ++ ++ return true; ++ } ++ } ++ ++ if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL)) ++ { ++ /* Try a few things more with specific constants and a variable. */ ++ ++ optab op; ++ rtx var, orig_out, out, tmp; ++ ++ if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2) ++ return false; ++ ++ /* If one of the two operands is an interesting constant, load a ++ constant with the above and mask it in with a logical operation. */ ++ ++ if (CONST_INT_P (operands[2])) ++ { ++ var = operands[3]; ++ if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx) ++ operands[3] = constm1_rtx, op = and_optab; ++ else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx) ++ operands[3] = const0_rtx, op = ior_optab; ++ else ++ return false; ++ } ++ else if (CONST_INT_P (operands[3])) ++ { ++ var = operands[2]; ++ if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx) ++ operands[2] = constm1_rtx, op = and_optab; ++ else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx) ++ operands[2] = const0_rtx, op = ior_optab; ++ else ++ return false; ++ } ++ else ++ return false; ++ ++ orig_out = operands[0]; ++ tmp = gen_reg_rtx (mode); ++ operands[0] = tmp; ++ ++ /* Recurse to get the constant loaded. */ ++ if (!ix86_expand_int_movcc (operands)) ++ return false; ++ ++ /* Mask in the interesting variable. */ ++ out = expand_binop (mode, op, var, tmp, orig_out, 0, ++ OPTAB_WIDEN); ++ if (!rtx_equal_p (out, orig_out)) ++ emit_move_insn (copy_rtx (orig_out), copy_rtx (out)); ++ ++ return true; ++ } ++ ++ /* ++ * For comparison with above, ++ * ++ * movl cf,dest ++ * movl ct,tmp ++ * cmpl op1,op2 ++ * cmovcc tmp,dest ++ * ++ * Size 15. ++ */ ++ ++ if (! nonimmediate_operand (operands[2], mode)) ++ operands[2] = force_reg (mode, operands[2]); ++ if (! nonimmediate_operand (operands[3], mode)) ++ operands[3] = force_reg (mode, operands[3]); ++ ++ if (! register_operand (operands[2], VOIDmode) ++ && (mode == QImode ++ || ! register_operand (operands[3], VOIDmode))) ++ operands[2] = force_reg (mode, operands[2]); ++ ++ if (mode == QImode ++ && ! register_operand (operands[3], VOIDmode)) ++ operands[3] = force_reg (mode, operands[3]); ++ ++ emit_insn (compare_seq); ++ emit_insn (gen_rtx_SET (operands[0], ++ gen_rtx_IF_THEN_ELSE (mode, ++ compare_op, operands[2], ++ operands[3]))); ++ return true; ++} ++ ++/* Detect conditional moves that exactly match min/max operational ++ semantics. Note that this is IEEE safe, as long as we don't ++ interchange the operands. ++ ++ Returns FALSE if this conditional move doesn't match a MIN/MAX, ++ and TRUE if the operation is successful and instructions are emitted. */ ++ ++static bool ++ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0, ++ rtx cmp_op1, rtx if_true, rtx if_false) ++{ ++ machine_mode mode; ++ bool is_min; ++ rtx tmp; ++ ++ if (code == LT) ++ ; ++ else if (code == UNGE) ++ std::swap (if_true, if_false); ++ else ++ return false; ++ ++ if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false)) ++ is_min = true; ++ else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false)) ++ is_min = false; ++ else ++ return false; ++ ++ mode = GET_MODE (dest); ++ ++ /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here, ++ but MODE may be a vector mode and thus not appropriate. */ ++ if (!flag_finite_math_only || flag_signed_zeros) ++ { ++ int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX; ++ rtvec v; ++ ++ if_true = force_reg (mode, if_true); ++ v = gen_rtvec (2, if_true, if_false); ++ tmp = gen_rtx_UNSPEC (mode, v, u); ++ } ++ else ++ { ++ code = is_min ? SMIN : SMAX; ++ if (MEM_P (if_true) && MEM_P (if_false)) ++ if_true = force_reg (mode, if_true); ++ tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false); ++ } ++ ++ emit_insn (gen_rtx_SET (dest, tmp)); ++ return true; ++} ++ ++/* Expand an SSE comparison. Return the register with the result. */ ++ ++static rtx ++ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1, ++ rtx op_true, rtx op_false) ++{ ++ machine_mode mode = GET_MODE (dest); ++ machine_mode cmp_ops_mode = GET_MODE (cmp_op0); ++ ++ /* In general case result of comparison can differ from operands' type. */ ++ machine_mode cmp_mode; ++ ++ /* In AVX512F the result of comparison is an integer mask. */ ++ bool maskcmp = false; ++ rtx x; ++ ++ if (GET_MODE_SIZE (cmp_ops_mode) == 64) ++ { ++ unsigned int nbits = GET_MODE_NUNITS (cmp_ops_mode); ++ cmp_mode = int_mode_for_size (nbits, 0).require (); ++ maskcmp = true; ++ } ++ else ++ cmp_mode = cmp_ops_mode; ++ ++ cmp_op0 = force_reg (cmp_ops_mode, cmp_op0); ++ ++ int (*op1_predicate)(rtx, machine_mode) ++ = VECTOR_MODE_P (cmp_ops_mode) ? vector_operand : nonimmediate_operand; ++ ++ if (!op1_predicate (cmp_op1, cmp_ops_mode)) ++ cmp_op1 = force_reg (cmp_ops_mode, cmp_op1); ++ ++ if (optimize ++ || (maskcmp && cmp_mode != mode) ++ || (op_true && reg_overlap_mentioned_p (dest, op_true)) ++ || (op_false && reg_overlap_mentioned_p (dest, op_false))) ++ dest = gen_reg_rtx (maskcmp ? cmp_mode : mode); ++ ++ /* Compare patterns for int modes are unspec in AVX512F only. */ ++ if (maskcmp && (code == GT || code == EQ)) ++ { ++ rtx (*gen)(rtx, rtx, rtx); ++ ++ switch (cmp_ops_mode) ++ { ++ case E_V64QImode: ++ gcc_assert (TARGET_AVX512BW); ++ gen = code == GT ? gen_avx512bw_gtv64qi3 : gen_avx512bw_eqv64qi3_1; ++ break; ++ case E_V32HImode: ++ gcc_assert (TARGET_AVX512BW); ++ gen = code == GT ? gen_avx512bw_gtv32hi3 : gen_avx512bw_eqv32hi3_1; ++ break; ++ case E_V16SImode: ++ gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1; ++ break; ++ case E_V8DImode: ++ gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1; ++ break; ++ default: ++ gen = NULL; ++ } ++ ++ if (gen) ++ { ++ emit_insn (gen (dest, cmp_op0, cmp_op1)); ++ return dest; ++ } ++ } ++ x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1); ++ ++ if (cmp_mode != mode && !maskcmp) ++ { ++ x = force_reg (cmp_ops_mode, x); ++ convert_move (dest, x, false); ++ } ++ else ++ emit_insn (gen_rtx_SET (dest, x)); ++ ++ return dest; ++} ++ ++/* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical ++ operations. This is used for both scalar and vector conditional moves. */ ++ ++void ++ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false) ++{ ++ machine_mode mode = GET_MODE (dest); ++ machine_mode cmpmode = GET_MODE (cmp); ++ ++ /* In AVX512F the result of comparison is an integer mask. */ ++ bool maskcmp = (mode != cmpmode && TARGET_AVX512F); ++ ++ rtx t2, t3, x; ++ ++ /* If we have an integer mask and FP value then we need ++ to cast mask to FP mode. */ ++ if (mode != cmpmode && VECTOR_MODE_P (cmpmode)) ++ { ++ cmp = force_reg (cmpmode, cmp); ++ cmp = gen_rtx_SUBREG (mode, cmp, 0); ++ } ++ ++ if (maskcmp) ++ { ++ rtx (*gen) (rtx, rtx) = NULL; ++ if ((op_true == CONST0_RTX (mode) ++ && vector_all_ones_operand (op_false, mode)) ++ || (op_false == CONST0_RTX (mode) ++ && vector_all_ones_operand (op_true, mode))) ++ switch (mode) ++ { ++ case E_V64QImode: ++ if (TARGET_AVX512BW) ++ gen = gen_avx512bw_cvtmask2bv64qi; ++ break; ++ case E_V32QImode: ++ if (TARGET_AVX512VL && TARGET_AVX512BW) ++ gen = gen_avx512vl_cvtmask2bv32qi; ++ break; ++ case E_V16QImode: ++ if (TARGET_AVX512VL && TARGET_AVX512BW) ++ gen = gen_avx512vl_cvtmask2bv16qi; ++ break; ++ case E_V32HImode: ++ if (TARGET_AVX512BW) ++ gen = gen_avx512bw_cvtmask2wv32hi; ++ break; ++ case E_V16HImode: ++ if (TARGET_AVX512VL && TARGET_AVX512BW) ++ gen = gen_avx512vl_cvtmask2wv16hi; ++ break; ++ case E_V8HImode: ++ if (TARGET_AVX512VL && TARGET_AVX512BW) ++ gen = gen_avx512vl_cvtmask2wv8hi; ++ break; ++ case E_V16SImode: ++ if (TARGET_AVX512DQ) ++ gen = gen_avx512f_cvtmask2dv16si; ++ break; ++ case E_V8SImode: ++ if (TARGET_AVX512VL && TARGET_AVX512DQ) ++ gen = gen_avx512vl_cvtmask2dv8si; ++ break; ++ case E_V4SImode: ++ if (TARGET_AVX512VL && TARGET_AVX512DQ) ++ gen = gen_avx512vl_cvtmask2dv4si; ++ break; ++ case E_V8DImode: ++ if (TARGET_AVX512DQ) ++ gen = gen_avx512f_cvtmask2qv8di; ++ break; ++ case E_V4DImode: ++ if (TARGET_AVX512VL && TARGET_AVX512DQ) ++ gen = gen_avx512vl_cvtmask2qv4di; ++ break; ++ case E_V2DImode: ++ if (TARGET_AVX512VL && TARGET_AVX512DQ) ++ gen = gen_avx512vl_cvtmask2qv2di; ++ break; ++ default: ++ break; ++ } ++ if (gen && SCALAR_INT_MODE_P (cmpmode)) ++ { ++ cmp = force_reg (cmpmode, cmp); ++ if (op_true == CONST0_RTX (mode)) ++ { ++ rtx (*gen_not) (rtx, rtx); ++ switch (cmpmode) ++ { ++ case E_QImode: gen_not = gen_knotqi; break; ++ case E_HImode: gen_not = gen_knothi; break; ++ case E_SImode: gen_not = gen_knotsi; break; ++ case E_DImode: gen_not = gen_knotdi; break; ++ default: gcc_unreachable (); ++ } ++ rtx n = gen_reg_rtx (cmpmode); ++ emit_insn (gen_not (n, cmp)); ++ cmp = n; ++ } ++ emit_insn (gen (dest, cmp)); ++ return; ++ } ++ } ++ else if (vector_all_ones_operand (op_true, mode) ++ && op_false == CONST0_RTX (mode)) ++ { ++ emit_insn (gen_rtx_SET (dest, cmp)); ++ return; ++ } ++ else if (op_false == CONST0_RTX (mode)) ++ { ++ op_true = force_reg (mode, op_true); ++ x = gen_rtx_AND (mode, cmp, op_true); ++ emit_insn (gen_rtx_SET (dest, x)); ++ return; ++ } ++ else if (op_true == CONST0_RTX (mode)) ++ { ++ op_false = force_reg (mode, op_false); ++ x = gen_rtx_NOT (mode, cmp); ++ x = gen_rtx_AND (mode, x, op_false); ++ emit_insn (gen_rtx_SET (dest, x)); ++ return; ++ } ++ else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)) ++ { ++ op_false = force_reg (mode, op_false); ++ x = gen_rtx_IOR (mode, cmp, op_false); ++ emit_insn (gen_rtx_SET (dest, x)); ++ return; ++ } ++ else if (TARGET_XOP) ++ { ++ op_true = force_reg (mode, op_true); ++ ++ if (!nonimmediate_operand (op_false, mode)) ++ op_false = force_reg (mode, op_false); ++ ++ emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cmp, ++ op_true, ++ op_false))); ++ return; ++ } ++ ++ rtx (*gen) (rtx, rtx, rtx, rtx) = NULL; ++ rtx d = dest; ++ ++ if (!vector_operand (op_true, mode)) ++ op_true = force_reg (mode, op_true); ++ ++ op_false = force_reg (mode, op_false); ++ ++ switch (mode) ++ { ++ case E_V4SFmode: ++ if (TARGET_SSE4_1) ++ gen = gen_sse4_1_blendvps; ++ break; ++ case E_V2DFmode: ++ if (TARGET_SSE4_1) ++ gen = gen_sse4_1_blendvpd; ++ break; ++ case E_SFmode: ++ if (TARGET_SSE4_1) ++ { ++ gen = gen_sse4_1_blendvss; ++ op_true = force_reg (mode, op_true); ++ } ++ break; ++ case E_DFmode: ++ if (TARGET_SSE4_1) ++ { ++ gen = gen_sse4_1_blendvsd; ++ op_true = force_reg (mode, op_true); ++ } ++ break; ++ case E_V16QImode: ++ case E_V8HImode: ++ case E_V4SImode: ++ case E_V2DImode: ++ if (TARGET_SSE4_1) ++ { ++ gen = gen_sse4_1_pblendvb; ++ if (mode != V16QImode) ++ d = gen_reg_rtx (V16QImode); ++ op_false = gen_lowpart (V16QImode, op_false); ++ op_true = gen_lowpart (V16QImode, op_true); ++ cmp = gen_lowpart (V16QImode, cmp); ++ } ++ break; ++ case E_V8SFmode: ++ if (TARGET_AVX) ++ gen = gen_avx_blendvps256; ++ break; ++ case E_V4DFmode: ++ if (TARGET_AVX) ++ gen = gen_avx_blendvpd256; ++ break; ++ case E_V32QImode: ++ case E_V16HImode: ++ case E_V8SImode: ++ case E_V4DImode: ++ if (TARGET_AVX2) ++ { ++ gen = gen_avx2_pblendvb; ++ if (mode != V32QImode) ++ d = gen_reg_rtx (V32QImode); ++ op_false = gen_lowpart (V32QImode, op_false); ++ op_true = gen_lowpart (V32QImode, op_true); ++ cmp = gen_lowpart (V32QImode, cmp); ++ } ++ break; ++ ++ case E_V64QImode: ++ gen = gen_avx512bw_blendmv64qi; ++ break; ++ case E_V32HImode: ++ gen = gen_avx512bw_blendmv32hi; ++ break; ++ case E_V16SImode: ++ gen = gen_avx512f_blendmv16si; ++ break; ++ case E_V8DImode: ++ gen = gen_avx512f_blendmv8di; ++ break; ++ case E_V8DFmode: ++ gen = gen_avx512f_blendmv8df; ++ break; ++ case E_V16SFmode: ++ gen = gen_avx512f_blendmv16sf; ++ break; ++ ++ default: ++ break; ++ } ++ ++ if (gen != NULL) ++ { ++ emit_insn (gen (d, op_false, op_true, cmp)); ++ if (d != dest) ++ emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d)); ++ } ++ else ++ { ++ op_true = force_reg (mode, op_true); ++ ++ t2 = gen_reg_rtx (mode); ++ if (optimize) ++ t3 = gen_reg_rtx (mode); ++ else ++ t3 = dest; ++ ++ x = gen_rtx_AND (mode, op_true, cmp); ++ emit_insn (gen_rtx_SET (t2, x)); ++ ++ x = gen_rtx_NOT (mode, cmp); ++ x = gen_rtx_AND (mode, x, op_false); ++ emit_insn (gen_rtx_SET (t3, x)); ++ ++ x = gen_rtx_IOR (mode, t3, t2); ++ emit_insn (gen_rtx_SET (dest, x)); ++ } ++} ++ ++/* Swap, force into registers, or otherwise massage the two operands ++ to an sse comparison with a mask result. Thus we differ a bit from ++ ix86_prepare_fp_compare_args which expects to produce a flags result. ++ ++ The DEST operand exists to help determine whether to commute commutative ++ operators. The POP0/POP1 operands are updated in place. The new ++ comparison code is returned, or UNKNOWN if not implementable. */ ++ ++static enum rtx_code ++ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code, ++ rtx *pop0, rtx *pop1) ++{ ++ switch (code) ++ { ++ case LTGT: ++ case UNEQ: ++ /* AVX supports all the needed comparisons. */ ++ if (TARGET_AVX) ++ break; ++ /* We have no LTGT as an operator. We could implement it with ++ NE & ORDERED, but this requires an extra temporary. It's ++ not clear that it's worth it. */ ++ return UNKNOWN; ++ ++ case LT: ++ case LE: ++ case UNGT: ++ case UNGE: ++ /* These are supported directly. */ ++ break; ++ ++ case EQ: ++ case NE: ++ case UNORDERED: ++ case ORDERED: ++ /* AVX has 3 operand comparisons, no need to swap anything. */ ++ if (TARGET_AVX) ++ break; ++ /* For commutative operators, try to canonicalize the destination ++ operand to be first in the comparison - this helps reload to ++ avoid extra moves. */ ++ if (!dest || !rtx_equal_p (dest, *pop1)) ++ break; ++ /* FALLTHRU */ ++ ++ case GE: ++ case GT: ++ case UNLE: ++ case UNLT: ++ /* These are not supported directly before AVX, and furthermore ++ ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the ++ comparison operands to transform into something that is ++ supported. */ ++ std::swap (*pop0, *pop1); ++ code = swap_condition (code); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ return code; ++} ++ ++/* Expand a floating-point conditional move. Return true if successful. */ ++ ++bool ++ix86_expand_fp_movcc (rtx operands[]) ++{ ++ machine_mode mode = GET_MODE (operands[0]); ++ enum rtx_code code = GET_CODE (operands[1]); ++ rtx tmp, compare_op; ++ rtx op0 = XEXP (operands[1], 0); ++ rtx op1 = XEXP (operands[1], 1); ++ ++ if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode)) ++ { ++ machine_mode cmode; ++ ++ /* Since we've no cmove for sse registers, don't force bad register ++ allocation just to gain access to it. Deny movcc when the ++ comparison mode doesn't match the move mode. */ ++ cmode = GET_MODE (op0); ++ if (cmode == VOIDmode) ++ cmode = GET_MODE (op1); ++ if (cmode != mode) ++ return false; ++ ++ code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1); ++ if (code == UNKNOWN) ++ return false; ++ ++ if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1, ++ operands[2], operands[3])) ++ return true; ++ ++ tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1, ++ operands[2], operands[3]); ++ ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]); ++ return true; ++ } ++ ++ if (GET_MODE (op0) == TImode ++ || (GET_MODE (op0) == DImode ++ && !TARGET_64BIT)) ++ return false; ++ ++ /* The floating point conditional move instructions don't directly ++ support conditions resulting from a signed integer comparison. */ ++ ++ compare_op = ix86_expand_compare (code, op0, op1); ++ if (!fcmov_comparison_operator (compare_op, VOIDmode)) ++ { ++ tmp = gen_reg_rtx (QImode); ++ ix86_expand_setcc (tmp, code, op0, op1); ++ ++ compare_op = ix86_expand_compare (NE, tmp, const0_rtx); ++ } ++ ++ emit_insn (gen_rtx_SET (operands[0], ++ gen_rtx_IF_THEN_ELSE (mode, compare_op, ++ operands[2], operands[3]))); ++ ++ return true; ++} ++ ++/* Helper for ix86_cmp_code_to_pcmp_immediate for int modes. */ ++ ++static int ++ix86_int_cmp_code_to_pcmp_immediate (enum rtx_code code) ++{ ++ switch (code) ++ { ++ case EQ: ++ return 0; ++ case LT: ++ case LTU: ++ return 1; ++ case LE: ++ case LEU: ++ return 2; ++ case NE: ++ return 4; ++ case GE: ++ case GEU: ++ return 5; ++ case GT: ++ case GTU: ++ return 6; ++ default: ++ gcc_unreachable (); ++ } ++} ++ ++/* Helper for ix86_cmp_code_to_pcmp_immediate for fp modes. */ ++ ++static int ++ix86_fp_cmp_code_to_pcmp_immediate (enum rtx_code code) ++{ ++ switch (code) ++ { ++ case EQ: ++ return 0x00; ++ case NE: ++ return 0x04; ++ case GT: ++ return 0x0e; ++ case LE: ++ return 0x02; ++ case GE: ++ return 0x0d; ++ case LT: ++ return 0x01; ++ case UNLE: ++ return 0x0a; ++ case UNLT: ++ return 0x09; ++ case UNGE: ++ return 0x05; ++ case UNGT: ++ return 0x06; ++ case UNEQ: ++ return 0x18; ++ case LTGT: ++ return 0x0c; ++ case ORDERED: ++ return 0x07; ++ case UNORDERED: ++ return 0x03; ++ default: ++ gcc_unreachable (); ++ } ++} ++ ++/* Return immediate value to be used in UNSPEC_PCMP ++ for comparison CODE in MODE. */ ++ ++static int ++ix86_cmp_code_to_pcmp_immediate (enum rtx_code code, machine_mode mode) ++{ ++ if (FLOAT_MODE_P (mode)) ++ return ix86_fp_cmp_code_to_pcmp_immediate (code); ++ return ix86_int_cmp_code_to_pcmp_immediate (code); ++} ++ ++/* Expand AVX-512 vector comparison. */ ++ ++bool ++ix86_expand_mask_vec_cmp (rtx operands[]) ++{ ++ machine_mode mask_mode = GET_MODE (operands[0]); ++ machine_mode cmp_mode = GET_MODE (operands[2]); ++ enum rtx_code code = GET_CODE (operands[1]); ++ rtx imm = GEN_INT (ix86_cmp_code_to_pcmp_immediate (code, cmp_mode)); ++ int unspec_code; ++ rtx unspec; ++ ++ switch (code) ++ { ++ case LEU: ++ case GTU: ++ case GEU: ++ case LTU: ++ unspec_code = UNSPEC_UNSIGNED_PCMP; ++ break; ++ ++ default: ++ unspec_code = UNSPEC_PCMP; ++ } ++ ++ unspec = gen_rtx_UNSPEC (mask_mode, gen_rtvec (3, operands[2], ++ operands[3], imm), ++ unspec_code); ++ emit_insn (gen_rtx_SET (operands[0], unspec)); ++ ++ return true; ++} ++ ++/* Expand fp vector comparison. */ ++ ++bool ++ix86_expand_fp_vec_cmp (rtx operands[]) ++{ ++ enum rtx_code code = GET_CODE (operands[1]); ++ rtx cmp; ++ ++ code = ix86_prepare_sse_fp_compare_args (operands[0], code, ++ &operands[2], &operands[3]); ++ if (code == UNKNOWN) ++ { ++ rtx temp; ++ switch (GET_CODE (operands[1])) ++ { ++ case LTGT: ++ temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[2], ++ operands[3], NULL, NULL); ++ cmp = ix86_expand_sse_cmp (operands[0], NE, operands[2], ++ operands[3], NULL, NULL); ++ code = AND; ++ break; ++ case UNEQ: ++ temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[2], ++ operands[3], NULL, NULL); ++ cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[2], ++ operands[3], NULL, NULL); ++ code = IOR; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1, ++ OPTAB_DIRECT); ++ } ++ else ++ cmp = ix86_expand_sse_cmp (operands[0], code, operands[2], operands[3], ++ operands[1], operands[2]); ++ ++ if (operands[0] != cmp) ++ emit_move_insn (operands[0], cmp); ++ ++ return true; ++} ++ ++static rtx ++ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1, ++ rtx op_true, rtx op_false, bool *negate) ++{ ++ machine_mode data_mode = GET_MODE (dest); ++ machine_mode mode = GET_MODE (cop0); ++ rtx x; ++ ++ *negate = false; ++ ++ /* XOP supports all of the comparisons on all 128-bit vector int types. */ ++ if (TARGET_XOP ++ && (mode == V16QImode || mode == V8HImode ++ || mode == V4SImode || mode == V2DImode)) ++ ; ++ else ++ { ++ /* Canonicalize the comparison to EQ, GT, GTU. */ ++ switch (code) ++ { ++ case EQ: ++ case GT: ++ case GTU: ++ break; ++ ++ case NE: ++ case LE: ++ case LEU: ++ code = reverse_condition (code); ++ *negate = true; ++ break; ++ ++ case GE: ++ case GEU: ++ code = reverse_condition (code); ++ *negate = true; ++ /* FALLTHRU */ ++ ++ case LT: ++ case LTU: ++ std::swap (cop0, cop1); ++ code = swap_condition (code); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ /* Only SSE4.1/SSE4.2 supports V2DImode. */ ++ if (mode == V2DImode) ++ { ++ switch (code) ++ { ++ case EQ: ++ /* SSE4.1 supports EQ. */ ++ if (!TARGET_SSE4_1) ++ return NULL; ++ break; ++ ++ case GT: ++ case GTU: ++ /* SSE4.2 supports GT/GTU. */ ++ if (!TARGET_SSE4_2) ++ return NULL; ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ } ++ ++ rtx optrue = op_true ? op_true : CONSTM1_RTX (data_mode); ++ rtx opfalse = op_false ? op_false : CONST0_RTX (data_mode); ++ if (*negate) ++ std::swap (optrue, opfalse); ++ ++ /* Transform x > y ? 0 : -1 (i.e. x <= y ? -1 : 0 or x <= y) when ++ not using integer masks into min (x, y) == x ? -1 : 0 (i.e. ++ min (x, y) == x). While we add one instruction (the minimum), ++ we remove the need for two instructions in the negation, as the ++ result is done this way. ++ When using masks, do it for SI/DImode element types, as it is shorter ++ than the two subtractions. */ ++ if ((code != EQ ++ && GET_MODE_SIZE (mode) != 64 ++ && vector_all_ones_operand (opfalse, data_mode) ++ && optrue == CONST0_RTX (data_mode)) ++ || (code == GTU ++ && GET_MODE_SIZE (GET_MODE_INNER (mode)) >= 4 ++ /* Don't do it if not using integer masks and we'd end up with ++ the right values in the registers though. */ ++ && (GET_MODE_SIZE (mode) == 64 ++ || !vector_all_ones_operand (optrue, data_mode) ++ || opfalse != CONST0_RTX (data_mode)))) ++ { ++ rtx (*gen) (rtx, rtx, rtx) = NULL; ++ ++ switch (mode) ++ { ++ case E_V16SImode: ++ gen = (code == GTU) ? gen_uminv16si3 : gen_sminv16si3; ++ break; ++ case E_V8DImode: ++ gen = (code == GTU) ? gen_uminv8di3 : gen_sminv8di3; ++ cop0 = force_reg (mode, cop0); ++ cop1 = force_reg (mode, cop1); ++ break; ++ case E_V32QImode: ++ if (TARGET_AVX2) ++ gen = (code == GTU) ? gen_uminv32qi3 : gen_sminv32qi3; ++ break; ++ case E_V16HImode: ++ if (TARGET_AVX2) ++ gen = (code == GTU) ? gen_uminv16hi3 : gen_sminv16hi3; ++ break; ++ case E_V8SImode: ++ if (TARGET_AVX2) ++ gen = (code == GTU) ? gen_uminv8si3 : gen_sminv8si3; ++ break; ++ case E_V4DImode: ++ if (TARGET_AVX512VL) ++ { ++ gen = (code == GTU) ? gen_uminv4di3 : gen_sminv4di3; ++ cop0 = force_reg (mode, cop0); ++ cop1 = force_reg (mode, cop1); ++ } ++ break; ++ case E_V16QImode: ++ if (code == GTU && TARGET_SSE2) ++ gen = gen_uminv16qi3; ++ else if (code == GT && TARGET_SSE4_1) ++ gen = gen_sminv16qi3; ++ break; ++ case E_V8HImode: ++ if (code == GTU && TARGET_SSE4_1) ++ gen = gen_uminv8hi3; ++ else if (code == GT && TARGET_SSE2) ++ gen = gen_sminv8hi3; ++ break; ++ case E_V4SImode: ++ if (TARGET_SSE4_1) ++ gen = (code == GTU) ? gen_uminv4si3 : gen_sminv4si3; ++ break; ++ case E_V2DImode: ++ if (TARGET_AVX512VL) ++ { ++ gen = (code == GTU) ? gen_uminv2di3 : gen_sminv2di3; ++ cop0 = force_reg (mode, cop0); ++ cop1 = force_reg (mode, cop1); ++ } ++ break; ++ default: ++ break; ++ } ++ ++ if (gen) ++ { ++ rtx tem = gen_reg_rtx (mode); ++ if (!vector_operand (cop0, mode)) ++ cop0 = force_reg (mode, cop0); ++ if (!vector_operand (cop1, mode)) ++ cop1 = force_reg (mode, cop1); ++ *negate = !*negate; ++ emit_insn (gen (tem, cop0, cop1)); ++ cop1 = tem; ++ code = EQ; ++ } ++ } ++ ++ /* Unsigned parallel compare is not supported by the hardware. ++ Play some tricks to turn this into a signed comparison ++ against 0. */ ++ if (code == GTU) ++ { ++ cop0 = force_reg (mode, cop0); ++ ++ switch (mode) ++ { ++ case E_V16SImode: ++ case E_V8DImode: ++ case E_V8SImode: ++ case E_V4DImode: ++ case E_V4SImode: ++ case E_V2DImode: ++ { ++ rtx t1, t2, mask; ++ rtx (*gen_sub3) (rtx, rtx, rtx); ++ ++ switch (mode) ++ { ++ case E_V16SImode: gen_sub3 = gen_subv16si3; break; ++ case E_V8DImode: gen_sub3 = gen_subv8di3; break; ++ case E_V8SImode: gen_sub3 = gen_subv8si3; break; ++ case E_V4DImode: gen_sub3 = gen_subv4di3; break; ++ case E_V4SImode: gen_sub3 = gen_subv4si3; break; ++ case E_V2DImode: gen_sub3 = gen_subv2di3; break; ++ default: ++ gcc_unreachable (); ++ } ++ /* Subtract (-(INT MAX) - 1) from both operands to make ++ them signed. */ ++ mask = ix86_build_signbit_mask (mode, true, false); ++ t1 = gen_reg_rtx (mode); ++ emit_insn (gen_sub3 (t1, cop0, mask)); ++ ++ t2 = gen_reg_rtx (mode); ++ emit_insn (gen_sub3 (t2, cop1, mask)); ++ ++ cop0 = t1; ++ cop1 = t2; ++ code = GT; ++ } ++ break; ++ ++ case E_V64QImode: ++ case E_V32HImode: ++ case E_V32QImode: ++ case E_V16HImode: ++ case E_V16QImode: ++ case E_V8HImode: ++ /* Perform a parallel unsigned saturating subtraction. */ ++ x = gen_reg_rtx (mode); ++ emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, cop0, ++ cop1))); ++ ++ cop0 = x; ++ cop1 = CONST0_RTX (mode); ++ code = EQ; ++ *negate = !*negate; ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ } ++ } ++ ++ if (*negate) ++ std::swap (op_true, op_false); ++ ++ /* Allow the comparison to be done in one mode, but the movcc to ++ happen in another mode. */ ++ if (data_mode == mode) ++ { ++ x = ix86_expand_sse_cmp (dest, code, cop0, cop1, ++ op_true, op_false); ++ } ++ else ++ { ++ gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode)); ++ x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1, ++ op_true, op_false); ++ if (GET_MODE (x) == mode) ++ x = gen_lowpart (data_mode, x); ++ } ++ ++ return x; ++} ++ ++/* Expand integer vector comparison. */ ++ ++bool ++ix86_expand_int_vec_cmp (rtx operands[]) ++{ ++ rtx_code code = GET_CODE (operands[1]); ++ bool negate = false; ++ rtx cmp = ix86_expand_int_sse_cmp (operands[0], code, operands[2], ++ operands[3], NULL, NULL, &negate); ++ ++ if (!cmp) ++ return false; ++ ++ if (negate) ++ cmp = ix86_expand_int_sse_cmp (operands[0], EQ, cmp, ++ CONST0_RTX (GET_MODE (cmp)), ++ NULL, NULL, &negate); ++ ++ gcc_assert (!negate); ++ ++ if (operands[0] != cmp) ++ emit_move_insn (operands[0], cmp); ++ ++ return true; ++} ++ ++/* Expand a floating-point vector conditional move; a vcond operation ++ rather than a movcc operation. */ ++ ++bool ++ix86_expand_fp_vcond (rtx operands[]) ++{ ++ enum rtx_code code = GET_CODE (operands[3]); ++ rtx cmp; ++ ++ code = ix86_prepare_sse_fp_compare_args (operands[0], code, ++ &operands[4], &operands[5]); ++ if (code == UNKNOWN) ++ { ++ rtx temp; ++ switch (GET_CODE (operands[3])) ++ { ++ case LTGT: ++ temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4], ++ operands[5], operands[0], operands[0]); ++ cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4], ++ operands[5], operands[1], operands[2]); ++ code = AND; ++ break; ++ case UNEQ: ++ temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4], ++ operands[5], operands[0], operands[0]); ++ cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4], ++ operands[5], operands[1], operands[2]); ++ code = IOR; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1, ++ OPTAB_DIRECT); ++ ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]); ++ return true; ++ } ++ ++ if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4], ++ operands[5], operands[1], operands[2])) ++ return true; ++ ++ cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5], ++ operands[1], operands[2]); ++ ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]); ++ return true; ++} ++ ++/* Expand a signed/unsigned integral vector conditional move. */ ++ ++bool ++ix86_expand_int_vcond (rtx operands[]) ++{ ++ machine_mode data_mode = GET_MODE (operands[0]); ++ machine_mode mode = GET_MODE (operands[4]); ++ enum rtx_code code = GET_CODE (operands[3]); ++ bool negate = false; ++ rtx x, cop0, cop1; ++ ++ cop0 = operands[4]; ++ cop1 = operands[5]; ++ ++ /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31 ++ and x < 0 ? 1 : 0 into (unsigned) x >> 31. */ ++ if ((code == LT || code == GE) ++ && data_mode == mode ++ && cop1 == CONST0_RTX (mode) ++ && operands[1 + (code == LT)] == CONST0_RTX (data_mode) ++ && GET_MODE_UNIT_SIZE (data_mode) > 1 ++ && GET_MODE_UNIT_SIZE (data_mode) <= 8 ++ && (GET_MODE_SIZE (data_mode) == 16 ++ || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32))) ++ { ++ rtx negop = operands[2 - (code == LT)]; ++ int shift = GET_MODE_UNIT_BITSIZE (data_mode) - 1; ++ if (negop == CONST1_RTX (data_mode)) ++ { ++ rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift), ++ operands[0], 1, OPTAB_DIRECT); ++ if (res != operands[0]) ++ emit_move_insn (operands[0], res); ++ return true; ++ } ++ else if (GET_MODE_INNER (data_mode) != DImode ++ && vector_all_ones_operand (negop, data_mode)) ++ { ++ rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift), ++ operands[0], 0, OPTAB_DIRECT); ++ if (res != operands[0]) ++ emit_move_insn (operands[0], res); ++ return true; ++ } ++ } ++ ++ if (!nonimmediate_operand (cop1, mode)) ++ cop1 = force_reg (mode, cop1); ++ if (!general_operand (operands[1], data_mode)) ++ operands[1] = force_reg (data_mode, operands[1]); ++ if (!general_operand (operands[2], data_mode)) ++ operands[2] = force_reg (data_mode, operands[2]); ++ ++ x = ix86_expand_int_sse_cmp (operands[0], code, cop0, cop1, ++ operands[1], operands[2], &negate); ++ ++ if (!x) ++ return false; ++ ++ ix86_expand_sse_movcc (operands[0], x, operands[1+negate], ++ operands[2-negate]); ++ return true; ++} ++ ++static bool ++ix86_expand_vec_perm_vpermt2 (rtx target, rtx mask, rtx op0, rtx op1, ++ struct expand_vec_perm_d *d) ++{ ++ /* ix86_expand_vec_perm_vpermt2 is called from both const and non-const ++ expander, so args are either in d, or in op0, op1 etc. */ ++ machine_mode mode = GET_MODE (d ? d->op0 : op0); ++ machine_mode maskmode = mode; ++ rtx (*gen) (rtx, rtx, rtx, rtx) = NULL; ++ ++ switch (mode) ++ { ++ case E_V8HImode: ++ if (TARGET_AVX512VL && TARGET_AVX512BW) ++ gen = gen_avx512vl_vpermt2varv8hi3; ++ break; ++ case E_V16HImode: ++ if (TARGET_AVX512VL && TARGET_AVX512BW) ++ gen = gen_avx512vl_vpermt2varv16hi3; ++ break; ++ case E_V64QImode: ++ if (TARGET_AVX512VBMI) ++ gen = gen_avx512bw_vpermt2varv64qi3; ++ break; ++ case E_V32HImode: ++ if (TARGET_AVX512BW) ++ gen = gen_avx512bw_vpermt2varv32hi3; ++ break; ++ case E_V4SImode: ++ if (TARGET_AVX512VL) ++ gen = gen_avx512vl_vpermt2varv4si3; ++ break; ++ case E_V8SImode: ++ if (TARGET_AVX512VL) ++ gen = gen_avx512vl_vpermt2varv8si3; ++ break; ++ case E_V16SImode: ++ if (TARGET_AVX512F) ++ gen = gen_avx512f_vpermt2varv16si3; ++ break; ++ case E_V4SFmode: ++ if (TARGET_AVX512VL) ++ { ++ gen = gen_avx512vl_vpermt2varv4sf3; ++ maskmode = V4SImode; ++ } ++ break; ++ case E_V8SFmode: ++ if (TARGET_AVX512VL) ++ { ++ gen = gen_avx512vl_vpermt2varv8sf3; ++ maskmode = V8SImode; ++ } ++ break; ++ case E_V16SFmode: ++ if (TARGET_AVX512F) ++ { ++ gen = gen_avx512f_vpermt2varv16sf3; ++ maskmode = V16SImode; ++ } ++ break; ++ case E_V2DImode: ++ if (TARGET_AVX512VL) ++ gen = gen_avx512vl_vpermt2varv2di3; ++ break; ++ case E_V4DImode: ++ if (TARGET_AVX512VL) ++ gen = gen_avx512vl_vpermt2varv4di3; ++ break; ++ case E_V8DImode: ++ if (TARGET_AVX512F) ++ gen = gen_avx512f_vpermt2varv8di3; ++ break; ++ case E_V2DFmode: ++ if (TARGET_AVX512VL) ++ { ++ gen = gen_avx512vl_vpermt2varv2df3; ++ maskmode = V2DImode; ++ } ++ break; ++ case E_V4DFmode: ++ if (TARGET_AVX512VL) ++ { ++ gen = gen_avx512vl_vpermt2varv4df3; ++ maskmode = V4DImode; ++ } ++ break; ++ case E_V8DFmode: ++ if (TARGET_AVX512F) ++ { ++ gen = gen_avx512f_vpermt2varv8df3; ++ maskmode = V8DImode; ++ } ++ break; ++ default: ++ break; ++ } ++ ++ if (gen == NULL) ++ return false; ++ ++ /* ix86_expand_vec_perm_vpermt2 is called from both const and non-const ++ expander, so args are either in d, or in op0, op1 etc. */ ++ if (d) ++ { ++ rtx vec[64]; ++ target = d->target; ++ op0 = d->op0; ++ op1 = d->op1; ++ for (int i = 0; i < d->nelt; ++i) ++ vec[i] = GEN_INT (d->perm[i]); ++ mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec)); ++ } ++ ++ emit_insn (gen (target, force_reg (maskmode, mask), op0, op1)); ++ return true; ++} ++ ++/* Expand a variable vector permutation. */ ++ ++void ++ix86_expand_vec_perm (rtx operands[]) ++{ ++ rtx target = operands[0]; ++ rtx op0 = operands[1]; ++ rtx op1 = operands[2]; ++ rtx mask = operands[3]; ++ rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32]; ++ machine_mode mode = GET_MODE (op0); ++ machine_mode maskmode = GET_MODE (mask); ++ int w, e, i; ++ bool one_operand_shuffle = rtx_equal_p (op0, op1); ++ ++ /* Number of elements in the vector. */ ++ w = GET_MODE_NUNITS (mode); ++ e = GET_MODE_UNIT_SIZE (mode); ++ gcc_assert (w <= 64); ++ ++ if (TARGET_AVX512F && one_operand_shuffle) ++ { ++ rtx (*gen) (rtx, rtx, rtx) = NULL; ++ switch (mode) ++ { ++ case E_V16SImode: ++ gen =gen_avx512f_permvarv16si; ++ break; ++ case E_V16SFmode: ++ gen = gen_avx512f_permvarv16sf; ++ break; ++ case E_V8DImode: ++ gen = gen_avx512f_permvarv8di; ++ break; ++ case E_V8DFmode: ++ gen = gen_avx512f_permvarv8df; ++ break; ++ default: ++ break; ++ } ++ if (gen != NULL) ++ { ++ emit_insn (gen (target, op0, mask)); ++ return; ++ } ++ } ++ ++ if (ix86_expand_vec_perm_vpermt2 (target, mask, op0, op1, NULL)) ++ return; ++ ++ if (TARGET_AVX2) ++ { ++ if (mode == V4DImode || mode == V4DFmode || mode == V16HImode) ++ { ++ /* Unfortunately, the VPERMQ and VPERMPD instructions only support ++ an constant shuffle operand. With a tiny bit of effort we can ++ use VPERMD instead. A re-interpretation stall for V4DFmode is ++ unfortunate but there's no avoiding it. ++ Similarly for V16HImode we don't have instructions for variable ++ shuffling, while for V32QImode we can use after preparing suitable ++ masks vpshufb; vpshufb; vpermq; vpor. */ ++ ++ if (mode == V16HImode) ++ { ++ maskmode = mode = V32QImode; ++ w = 32; ++ e = 1; ++ } ++ else ++ { ++ maskmode = mode = V8SImode; ++ w = 8; ++ e = 4; ++ } ++ t1 = gen_reg_rtx (maskmode); ++ ++ /* Replicate the low bits of the V4DImode mask into V8SImode: ++ mask = { A B C D } ++ t1 = { A A B B C C D D }. */ ++ for (i = 0; i < w / 2; ++i) ++ vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2); ++ vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec)); ++ vt = force_reg (maskmode, vt); ++ mask = gen_lowpart (maskmode, mask); ++ if (maskmode == V8SImode) ++ emit_insn (gen_avx2_permvarv8si (t1, mask, vt)); ++ else ++ emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt)); ++ ++ /* Multiply the shuffle indicies by two. */ ++ t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1, ++ OPTAB_DIRECT); ++ ++ /* Add one to the odd shuffle indicies: ++ t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */ ++ for (i = 0; i < w / 2; ++i) ++ { ++ vec[i * 2] = const0_rtx; ++ vec[i * 2 + 1] = const1_rtx; ++ } ++ vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec)); ++ vt = validize_mem (force_const_mem (maskmode, vt)); ++ t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1, ++ OPTAB_DIRECT); ++ ++ /* Continue as if V8SImode (resp. V32QImode) was used initially. */ ++ operands[3] = mask = t1; ++ target = gen_reg_rtx (mode); ++ op0 = gen_lowpart (mode, op0); ++ op1 = gen_lowpart (mode, op1); ++ } ++ ++ switch (mode) ++ { ++ case E_V8SImode: ++ /* The VPERMD and VPERMPS instructions already properly ignore ++ the high bits of the shuffle elements. No need for us to ++ perform an AND ourselves. */ ++ if (one_operand_shuffle) ++ { ++ emit_insn (gen_avx2_permvarv8si (target, op0, mask)); ++ if (target != operands[0]) ++ emit_move_insn (operands[0], ++ gen_lowpart (GET_MODE (operands[0]), target)); ++ } ++ else ++ { ++ t1 = gen_reg_rtx (V8SImode); ++ t2 = gen_reg_rtx (V8SImode); ++ emit_insn (gen_avx2_permvarv8si (t1, op0, mask)); ++ emit_insn (gen_avx2_permvarv8si (t2, op1, mask)); ++ goto merge_two; ++ } ++ return; ++ ++ case E_V8SFmode: ++ mask = gen_lowpart (V8SImode, mask); ++ if (one_operand_shuffle) ++ emit_insn (gen_avx2_permvarv8sf (target, op0, mask)); ++ else ++ { ++ t1 = gen_reg_rtx (V8SFmode); ++ t2 = gen_reg_rtx (V8SFmode); ++ emit_insn (gen_avx2_permvarv8sf (t1, op0, mask)); ++ emit_insn (gen_avx2_permvarv8sf (t2, op1, mask)); ++ goto merge_two; ++ } ++ return; ++ ++ case E_V4SImode: ++ /* By combining the two 128-bit input vectors into one 256-bit ++ input vector, we can use VPERMD and VPERMPS for the full ++ two-operand shuffle. */ ++ t1 = gen_reg_rtx (V8SImode); ++ t2 = gen_reg_rtx (V8SImode); ++ emit_insn (gen_avx_vec_concatv8si (t1, op0, op1)); ++ emit_insn (gen_avx_vec_concatv8si (t2, mask, mask)); ++ emit_insn (gen_avx2_permvarv8si (t1, t1, t2)); ++ emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx)); ++ return; ++ ++ case E_V4SFmode: ++ t1 = gen_reg_rtx (V8SFmode); ++ t2 = gen_reg_rtx (V8SImode); ++ mask = gen_lowpart (V4SImode, mask); ++ emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1)); ++ emit_insn (gen_avx_vec_concatv8si (t2, mask, mask)); ++ emit_insn (gen_avx2_permvarv8sf (t1, t1, t2)); ++ emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx)); ++ return; ++ ++ case E_V32QImode: ++ t1 = gen_reg_rtx (V32QImode); ++ t2 = gen_reg_rtx (V32QImode); ++ t3 = gen_reg_rtx (V32QImode); ++ vt2 = GEN_INT (-128); ++ vt = gen_const_vec_duplicate (V32QImode, vt2); ++ vt = force_reg (V32QImode, vt); ++ for (i = 0; i < 32; i++) ++ vec[i] = i < 16 ? vt2 : const0_rtx; ++ vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec)); ++ vt2 = force_reg (V32QImode, vt2); ++ /* From mask create two adjusted masks, which contain the same ++ bits as mask in the low 7 bits of each vector element. ++ The first mask will have the most significant bit clear ++ if it requests element from the same 128-bit lane ++ and MSB set if it requests element from the other 128-bit lane. ++ The second mask will have the opposite values of the MSB, ++ and additionally will have its 128-bit lanes swapped. ++ E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have ++ t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and ++ t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ... ++ stands for other 12 bytes. */ ++ /* The bit whether element is from the same lane or the other ++ lane is bit 4, so shift it up by 3 to the MSB position. */ ++ t5 = gen_reg_rtx (V4DImode); ++ emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask), ++ GEN_INT (3))); ++ /* Clear MSB bits from the mask just in case it had them set. */ ++ emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask)); ++ /* After this t1 will have MSB set for elements from other lane. */ ++ emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2)); ++ /* Clear bits other than MSB. */ ++ emit_insn (gen_andv32qi3 (t1, t1, vt)); ++ /* Or in the lower bits from mask into t3. */ ++ emit_insn (gen_iorv32qi3 (t3, t1, t2)); ++ /* And invert MSB bits in t1, so MSB is set for elements from the same ++ lane. */ ++ emit_insn (gen_xorv32qi3 (t1, t1, vt)); ++ /* Swap 128-bit lanes in t3. */ ++ t6 = gen_reg_rtx (V4DImode); ++ emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3), ++ const2_rtx, GEN_INT (3), ++ const0_rtx, const1_rtx)); ++ /* And or in the lower bits from mask into t1. */ ++ emit_insn (gen_iorv32qi3 (t1, t1, t2)); ++ if (one_operand_shuffle) ++ { ++ /* Each of these shuffles will put 0s in places where ++ element from the other 128-bit lane is needed, otherwise ++ will shuffle in the requested value. */ ++ emit_insn (gen_avx2_pshufbv32qi3 (t3, op0, ++ gen_lowpart (V32QImode, t6))); ++ emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1)); ++ /* For t3 the 128-bit lanes are swapped again. */ ++ t7 = gen_reg_rtx (V4DImode); ++ emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3), ++ const2_rtx, GEN_INT (3), ++ const0_rtx, const1_rtx)); ++ /* And oring both together leads to the result. */ ++ emit_insn (gen_iorv32qi3 (target, t1, ++ gen_lowpart (V32QImode, t7))); ++ if (target != operands[0]) ++ emit_move_insn (operands[0], ++ gen_lowpart (GET_MODE (operands[0]), target)); ++ return; ++ } ++ ++ t4 = gen_reg_rtx (V32QImode); ++ /* Similarly to the above one_operand_shuffle code, ++ just for repeated twice for each operand. merge_two: ++ code will merge the two results together. */ ++ emit_insn (gen_avx2_pshufbv32qi3 (t4, op0, ++ gen_lowpart (V32QImode, t6))); ++ emit_insn (gen_avx2_pshufbv32qi3 (t3, op1, ++ gen_lowpart (V32QImode, t6))); ++ emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1)); ++ emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1)); ++ t7 = gen_reg_rtx (V4DImode); ++ emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4), ++ const2_rtx, GEN_INT (3), ++ const0_rtx, const1_rtx)); ++ t8 = gen_reg_rtx (V4DImode); ++ emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3), ++ const2_rtx, GEN_INT (3), ++ const0_rtx, const1_rtx)); ++ emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7))); ++ emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8))); ++ t1 = t4; ++ t2 = t3; ++ goto merge_two; ++ ++ default: ++ gcc_assert (GET_MODE_SIZE (mode) <= 16); ++ break; ++ } ++ } ++ ++ if (TARGET_XOP) ++ { ++ /* The XOP VPPERM insn supports three inputs. By ignoring the ++ one_operand_shuffle special case, we avoid creating another ++ set of constant vectors in memory. */ ++ one_operand_shuffle = false; ++ ++ /* mask = mask & {2*w-1, ...} */ ++ vt = GEN_INT (2*w - 1); ++ } ++ else ++ { ++ /* mask = mask & {w-1, ...} */ ++ vt = GEN_INT (w - 1); ++ } ++ ++ vt = gen_const_vec_duplicate (maskmode, vt); ++ mask = expand_simple_binop (maskmode, AND, mask, vt, ++ NULL_RTX, 0, OPTAB_DIRECT); ++ ++ /* For non-QImode operations, convert the word permutation control ++ into a byte permutation control. */ ++ if (mode != V16QImode) ++ { ++ mask = expand_simple_binop (maskmode, ASHIFT, mask, ++ GEN_INT (exact_log2 (e)), ++ NULL_RTX, 0, OPTAB_DIRECT); ++ ++ /* Convert mask to vector of chars. */ ++ mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask)); ++ ++ /* Replicate each of the input bytes into byte positions: ++ (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8} ++ (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12} ++ (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */ ++ for (i = 0; i < 16; ++i) ++ vec[i] = GEN_INT (i/e * e); ++ vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec)); ++ vt = validize_mem (force_const_mem (V16QImode, vt)); ++ if (TARGET_XOP) ++ emit_insn (gen_xop_pperm (mask, mask, mask, vt)); ++ else ++ emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt)); ++ ++ /* Convert it into the byte positions by doing ++ mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */ ++ for (i = 0; i < 16; ++i) ++ vec[i] = GEN_INT (i % e); ++ vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec)); ++ vt = validize_mem (force_const_mem (V16QImode, vt)); ++ emit_insn (gen_addv16qi3 (mask, mask, vt)); ++ } ++ ++ /* The actual shuffle operations all operate on V16QImode. */ ++ op0 = gen_lowpart (V16QImode, op0); ++ op1 = gen_lowpart (V16QImode, op1); ++ ++ if (TARGET_XOP) ++ { ++ if (GET_MODE (target) != V16QImode) ++ target = gen_reg_rtx (V16QImode); ++ emit_insn (gen_xop_pperm (target, op0, op1, mask)); ++ if (target != operands[0]) ++ emit_move_insn (operands[0], ++ gen_lowpart (GET_MODE (operands[0]), target)); ++ } ++ else if (one_operand_shuffle) ++ { ++ if (GET_MODE (target) != V16QImode) ++ target = gen_reg_rtx (V16QImode); ++ emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask)); ++ if (target != operands[0]) ++ emit_move_insn (operands[0], ++ gen_lowpart (GET_MODE (operands[0]), target)); ++ } ++ else ++ { ++ rtx xops[6]; ++ bool ok; ++ ++ /* Shuffle the two input vectors independently. */ ++ t1 = gen_reg_rtx (V16QImode); ++ t2 = gen_reg_rtx (V16QImode); ++ emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask)); ++ emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask)); ++ ++ merge_two: ++ /* Then merge them together. The key is whether any given control ++ element contained a bit set that indicates the second word. */ ++ mask = operands[3]; ++ vt = GEN_INT (w); ++ if (maskmode == V2DImode && !TARGET_SSE4_1) ++ { ++ /* Without SSE4.1, we don't have V2DImode EQ. Perform one ++ more shuffle to convert the V2DI input mask into a V4SI ++ input mask. At which point the masking that expand_int_vcond ++ will work as desired. */ ++ rtx t3 = gen_reg_rtx (V4SImode); ++ emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask), ++ const0_rtx, const0_rtx, ++ const2_rtx, const2_rtx)); ++ mask = t3; ++ maskmode = V4SImode; ++ e = w = 4; ++ } ++ ++ vt = gen_const_vec_duplicate (maskmode, vt); ++ vt = force_reg (maskmode, vt); ++ mask = expand_simple_binop (maskmode, AND, mask, vt, ++ NULL_RTX, 0, OPTAB_DIRECT); ++ ++ if (GET_MODE (target) != mode) ++ target = gen_reg_rtx (mode); ++ xops[0] = target; ++ xops[1] = gen_lowpart (mode, t2); ++ xops[2] = gen_lowpart (mode, t1); ++ xops[3] = gen_rtx_EQ (maskmode, mask, vt); ++ xops[4] = mask; ++ xops[5] = vt; ++ ok = ix86_expand_int_vcond (xops); ++ gcc_assert (ok); ++ if (target != operands[0]) ++ emit_move_insn (operands[0], ++ gen_lowpart (GET_MODE (operands[0]), target)); ++ } ++} ++ ++/* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is ++ true if we should do zero extension, else sign extension. HIGH_P is ++ true if we want the N/2 high elements, else the low elements. */ ++ ++void ++ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p) ++{ ++ machine_mode imode = GET_MODE (src); ++ rtx tmp; ++ ++ if (TARGET_SSE4_1) ++ { ++ rtx (*unpack)(rtx, rtx); ++ rtx (*extract)(rtx, rtx) = NULL; ++ machine_mode halfmode = BLKmode; ++ ++ switch (imode) ++ { ++ case E_V64QImode: ++ if (unsigned_p) ++ unpack = gen_avx512bw_zero_extendv32qiv32hi2; ++ else ++ unpack = gen_avx512bw_sign_extendv32qiv32hi2; ++ halfmode = V32QImode; ++ extract ++ = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi; ++ break; ++ case E_V32QImode: ++ if (unsigned_p) ++ unpack = gen_avx2_zero_extendv16qiv16hi2; ++ else ++ unpack = gen_avx2_sign_extendv16qiv16hi2; ++ halfmode = V16QImode; ++ extract ++ = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi; ++ break; ++ case E_V32HImode: ++ if (unsigned_p) ++ unpack = gen_avx512f_zero_extendv16hiv16si2; ++ else ++ unpack = gen_avx512f_sign_extendv16hiv16si2; ++ halfmode = V16HImode; ++ extract ++ = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi; ++ break; ++ case E_V16HImode: ++ if (unsigned_p) ++ unpack = gen_avx2_zero_extendv8hiv8si2; ++ else ++ unpack = gen_avx2_sign_extendv8hiv8si2; ++ halfmode = V8HImode; ++ extract ++ = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi; ++ break; ++ case E_V16SImode: ++ if (unsigned_p) ++ unpack = gen_avx512f_zero_extendv8siv8di2; ++ else ++ unpack = gen_avx512f_sign_extendv8siv8di2; ++ halfmode = V8SImode; ++ extract ++ = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si; ++ break; ++ case E_V8SImode: ++ if (unsigned_p) ++ unpack = gen_avx2_zero_extendv4siv4di2; ++ else ++ unpack = gen_avx2_sign_extendv4siv4di2; ++ halfmode = V4SImode; ++ extract ++ = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si; ++ break; ++ case E_V16QImode: ++ if (unsigned_p) ++ unpack = gen_sse4_1_zero_extendv8qiv8hi2; ++ else ++ unpack = gen_sse4_1_sign_extendv8qiv8hi2; ++ break; ++ case E_V8HImode: ++ if (unsigned_p) ++ unpack = gen_sse4_1_zero_extendv4hiv4si2; ++ else ++ unpack = gen_sse4_1_sign_extendv4hiv4si2; ++ break; ++ case E_V4SImode: ++ if (unsigned_p) ++ unpack = gen_sse4_1_zero_extendv2siv2di2; ++ else ++ unpack = gen_sse4_1_sign_extendv2siv2di2; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ ++ if (GET_MODE_SIZE (imode) >= 32) ++ { ++ tmp = gen_reg_rtx (halfmode); ++ emit_insn (extract (tmp, src)); ++ } ++ else if (high_p) ++ { ++ /* Shift higher 8 bytes to lower 8 bytes. */ ++ tmp = gen_reg_rtx (V1TImode); ++ emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src), ++ GEN_INT (64))); ++ tmp = gen_lowpart (imode, tmp); ++ } ++ else ++ tmp = src; ++ ++ emit_insn (unpack (dest, tmp)); ++ } ++ else ++ { ++ rtx (*unpack)(rtx, rtx, rtx); ++ ++ switch (imode) ++ { ++ case E_V16QImode: ++ if (high_p) ++ unpack = gen_vec_interleave_highv16qi; ++ else ++ unpack = gen_vec_interleave_lowv16qi; ++ break; ++ case E_V8HImode: ++ if (high_p) ++ unpack = gen_vec_interleave_highv8hi; ++ else ++ unpack = gen_vec_interleave_lowv8hi; ++ break; ++ case E_V4SImode: ++ if (high_p) ++ unpack = gen_vec_interleave_highv4si; ++ else ++ unpack = gen_vec_interleave_lowv4si; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ ++ if (unsigned_p) ++ tmp = force_reg (imode, CONST0_RTX (imode)); ++ else ++ tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode), ++ src, pc_rtx, pc_rtx); ++ ++ rtx tmp2 = gen_reg_rtx (imode); ++ emit_insn (unpack (tmp2, src, tmp)); ++ emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2)); ++ } ++} ++ ++/* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode, ++ but works for floating pointer parameters and nonoffsetable memories. ++ For pushes, it returns just stack offsets; the values will be saved ++ in the right order. Maximally three parts are generated. */ ++ ++static int ++ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode) ++{ ++ int size; ++ ++ if (!TARGET_64BIT) ++ size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4; ++ else ++ size = (GET_MODE_SIZE (mode) + 4) / 8; ++ ++ gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand))); ++ gcc_assert (size >= 2 && size <= 4); ++ ++ /* Optimize constant pool reference to immediates. This is used by fp ++ moves, that force all constants to memory to allow combining. */ ++ if (MEM_P (operand) && MEM_READONLY_P (operand)) ++ operand = avoid_constant_pool_reference (operand); ++ ++ if (MEM_P (operand) && !offsettable_memref_p (operand)) ++ { ++ /* The only non-offsetable memories we handle are pushes. */ ++ int ok = push_operand (operand, VOIDmode); ++ ++ gcc_assert (ok); ++ ++ operand = copy_rtx (operand); ++ PUT_MODE (operand, word_mode); ++ parts[0] = parts[1] = parts[2] = parts[3] = operand; ++ return size; ++ } ++ ++ if (GET_CODE (operand) == CONST_VECTOR) ++ { ++ scalar_int_mode imode = int_mode_for_mode (mode).require (); ++ /* Caution: if we looked through a constant pool memory above, ++ the operand may actually have a different mode now. That's ++ ok, since we want to pun this all the way back to an integer. */ ++ operand = simplify_subreg (imode, operand, GET_MODE (operand), 0); ++ gcc_assert (operand != NULL); ++ mode = imode; ++ } ++ ++ if (!TARGET_64BIT) ++ { ++ if (mode == DImode) ++ split_double_mode (mode, &operand, 1, &parts[0], &parts[1]); ++ else ++ { ++ int i; ++ ++ if (REG_P (operand)) ++ { ++ gcc_assert (reload_completed); ++ for (i = 0; i < size; i++) ++ parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i); ++ } ++ else if (offsettable_memref_p (operand)) ++ { ++ operand = adjust_address (operand, SImode, 0); ++ parts[0] = operand; ++ for (i = 1; i < size; i++) ++ parts[i] = adjust_address (operand, SImode, 4 * i); ++ } ++ else if (CONST_DOUBLE_P (operand)) ++ { ++ const REAL_VALUE_TYPE *r; ++ long l[4]; ++ ++ r = CONST_DOUBLE_REAL_VALUE (operand); ++ switch (mode) ++ { ++ case E_TFmode: ++ real_to_target (l, r, mode); ++ parts[3] = gen_int_mode (l[3], SImode); ++ parts[2] = gen_int_mode (l[2], SImode); ++ break; ++ case E_XFmode: ++ /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since ++ long double may not be 80-bit. */ ++ real_to_target (l, r, mode); ++ parts[2] = gen_int_mode (l[2], SImode); ++ break; ++ case E_DFmode: ++ REAL_VALUE_TO_TARGET_DOUBLE (*r, l); ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ parts[1] = gen_int_mode (l[1], SImode); ++ parts[0] = gen_int_mode (l[0], SImode); ++ } ++ else ++ gcc_unreachable (); ++ } ++ } ++ else ++ { ++ if (mode == TImode) ++ split_double_mode (mode, &operand, 1, &parts[0], &parts[1]); ++ if (mode == XFmode || mode == TFmode) ++ { ++ machine_mode upper_mode = mode==XFmode ? SImode : DImode; ++ if (REG_P (operand)) ++ { ++ gcc_assert (reload_completed); ++ parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0); ++ parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1); ++ } ++ else if (offsettable_memref_p (operand)) ++ { ++ operand = adjust_address (operand, DImode, 0); ++ parts[0] = operand; ++ parts[1] = adjust_address (operand, upper_mode, 8); ++ } ++ else if (CONST_DOUBLE_P (operand)) ++ { ++ long l[4]; ++ ++ real_to_target (l, CONST_DOUBLE_REAL_VALUE (operand), mode); ++ ++ /* real_to_target puts 32-bit pieces in each long. */ ++ parts[0] = gen_int_mode ((l[0] & HOST_WIDE_INT_C (0xffffffff)) ++ | ((l[1] & HOST_WIDE_INT_C (0xffffffff)) ++ << 32), DImode); ++ ++ if (upper_mode == SImode) ++ parts[1] = gen_int_mode (l[2], SImode); ++ else ++ parts[1] ++ = gen_int_mode ((l[2] & HOST_WIDE_INT_C (0xffffffff)) ++ | ((l[3] & HOST_WIDE_INT_C (0xffffffff)) ++ << 32), DImode); ++ } ++ else ++ gcc_unreachable (); ++ } ++ } ++ ++ return size; ++} ++ ++/* Emit insns to perform a move or push of DI, DF, XF, and TF values. ++ Return false when normal moves are needed; true when all required ++ insns have been emitted. Operands 2-4 contain the input values ++ int the correct order; operands 5-7 contain the output values. */ ++ ++void ++ix86_split_long_move (rtx operands[]) ++{ ++ rtx part[2][4]; ++ int nparts, i, j; ++ int push = 0; ++ int collisions = 0; ++ machine_mode mode = GET_MODE (operands[0]); ++ bool collisionparts[4]; ++ ++ /* The DFmode expanders may ask us to move double. ++ For 64bit target this is single move. By hiding the fact ++ here we simplify i386.md splitters. */ ++ if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8) ++ { ++ /* Optimize constant pool reference to immediates. This is used by ++ fp moves, that force all constants to memory to allow combining. */ ++ ++ if (MEM_P (operands[1]) ++ && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF ++ && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0))) ++ operands[1] = get_pool_constant (XEXP (operands[1], 0)); ++ if (push_operand (operands[0], VOIDmode)) ++ { ++ operands[0] = copy_rtx (operands[0]); ++ PUT_MODE (operands[0], word_mode); ++ } ++ else ++ operands[0] = gen_lowpart (DImode, operands[0]); ++ operands[1] = gen_lowpart (DImode, operands[1]); ++ emit_move_insn (operands[0], operands[1]); ++ return; ++ } ++ ++ /* The only non-offsettable memory we handle is push. */ ++ if (push_operand (operands[0], VOIDmode)) ++ push = 1; ++ else ++ gcc_assert (!MEM_P (operands[0]) ++ || offsettable_memref_p (operands[0])); ++ ++ nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0])); ++ ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0])); ++ ++ /* When emitting push, take care for source operands on the stack. */ ++ if (push && MEM_P (operands[1]) ++ && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1])) ++ { ++ rtx src_base = XEXP (part[1][nparts - 1], 0); ++ ++ /* Compensate for the stack decrement by 4. */ ++ if (!TARGET_64BIT && nparts == 3 ++ && mode == XFmode && TARGET_128BIT_LONG_DOUBLE) ++ src_base = plus_constant (Pmode, src_base, 4); ++ ++ /* src_base refers to the stack pointer and is ++ automatically decreased by emitted push. */ ++ for (i = 0; i < nparts; i++) ++ part[1][i] = change_address (part[1][i], ++ GET_MODE (part[1][i]), src_base); ++ } ++ ++ /* We need to do copy in the right order in case an address register ++ of the source overlaps the destination. */ ++ if (REG_P (part[0][0]) && MEM_P (part[1][0])) ++ { ++ rtx tmp; ++ ++ for (i = 0; i < nparts; i++) ++ { ++ collisionparts[i] ++ = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0)); ++ if (collisionparts[i]) ++ collisions++; ++ } ++ ++ /* Collision in the middle part can be handled by reordering. */ ++ if (collisions == 1 && nparts == 3 && collisionparts [1]) ++ { ++ std::swap (part[0][1], part[0][2]); ++ std::swap (part[1][1], part[1][2]); ++ } ++ else if (collisions == 1 ++ && nparts == 4 ++ && (collisionparts [1] || collisionparts [2])) ++ { ++ if (collisionparts [1]) ++ { ++ std::swap (part[0][1], part[0][2]); ++ std::swap (part[1][1], part[1][2]); ++ } ++ else ++ { ++ std::swap (part[0][2], part[0][3]); ++ std::swap (part[1][2], part[1][3]); ++ } ++ } ++ ++ /* If there are more collisions, we can't handle it by reordering. ++ Do an lea to the last part and use only one colliding move. */ ++ else if (collisions > 1) ++ { ++ rtx base, addr; ++ ++ collisions = 1; ++ ++ base = part[0][nparts - 1]; ++ ++ /* Handle the case when the last part isn't valid for lea. ++ Happens in 64-bit mode storing the 12-byte XFmode. */ ++ if (GET_MODE (base) != Pmode) ++ base = gen_rtx_REG (Pmode, REGNO (base)); ++ ++ addr = XEXP (part[1][0], 0); ++ if (TARGET_TLS_DIRECT_SEG_REFS) ++ { ++ struct ix86_address parts; ++ int ok = ix86_decompose_address (addr, &parts); ++ gcc_assert (ok); ++ /* It is not valid to use %gs: or %fs: in lea. */ ++ gcc_assert (parts.seg == ADDR_SPACE_GENERIC); ++ } ++ emit_insn (gen_rtx_SET (base, addr)); ++ part[1][0] = replace_equiv_address (part[1][0], base); ++ for (i = 1; i < nparts; i++) ++ { ++ tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i); ++ part[1][i] = replace_equiv_address (part[1][i], tmp); ++ } ++ } ++ } ++ ++ if (push) ++ { ++ if (!TARGET_64BIT) ++ { ++ if (nparts == 3) ++ { ++ if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode) ++ emit_insn (ix86_gen_add3 (stack_pointer_rtx, ++ stack_pointer_rtx, GEN_INT (-4))); ++ emit_move_insn (part[0][2], part[1][2]); ++ } ++ else if (nparts == 4) ++ { ++ emit_move_insn (part[0][3], part[1][3]); ++ emit_move_insn (part[0][2], part[1][2]); ++ } ++ } ++ else ++ { ++ /* In 64bit mode we don't have 32bit push available. In case this is ++ register, it is OK - we will just use larger counterpart. We also ++ retype memory - these comes from attempt to avoid REX prefix on ++ moving of second half of TFmode value. */ ++ if (GET_MODE (part[1][1]) == SImode) ++ { ++ switch (GET_CODE (part[1][1])) ++ { ++ case MEM: ++ part[1][1] = adjust_address (part[1][1], DImode, 0); ++ break; ++ ++ case REG: ++ part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1])); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ if (GET_MODE (part[1][0]) == SImode) ++ part[1][0] = part[1][1]; ++ } ++ } ++ emit_move_insn (part[0][1], part[1][1]); ++ emit_move_insn (part[0][0], part[1][0]); ++ return; ++ } ++ ++ /* Choose correct order to not overwrite the source before it is copied. */ ++ if ((REG_P (part[0][0]) ++ && REG_P (part[1][1]) ++ && (REGNO (part[0][0]) == REGNO (part[1][1]) ++ || (nparts == 3 ++ && REGNO (part[0][0]) == REGNO (part[1][2])) ++ || (nparts == 4 ++ && REGNO (part[0][0]) == REGNO (part[1][3])))) ++ || (collisions > 0 ++ && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))) ++ { ++ for (i = 0, j = nparts - 1; i < nparts; i++, j--) ++ { ++ operands[2 + i] = part[0][j]; ++ operands[6 + i] = part[1][j]; ++ } ++ } ++ else ++ { ++ for (i = 0; i < nparts; i++) ++ { ++ operands[2 + i] = part[0][i]; ++ operands[6 + i] = part[1][i]; ++ } ++ } ++ ++ /* If optimizing for size, attempt to locally unCSE nonzero constants. */ ++ if (optimize_insn_for_size_p ()) ++ { ++ for (j = 0; j < nparts - 1; j++) ++ if (CONST_INT_P (operands[6 + j]) ++ && operands[6 + j] != const0_rtx ++ && REG_P (operands[2 + j])) ++ for (i = j; i < nparts - 1; i++) ++ if (CONST_INT_P (operands[7 + i]) ++ && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j])) ++ operands[7 + i] = operands[2 + j]; ++ } ++ ++ for (i = 0; i < nparts; i++) ++ emit_move_insn (operands[2 + i], operands[6 + i]); ++ ++ return; ++} ++ ++/* Helper function of ix86_split_ashl used to generate an SImode/DImode ++ left shift by a constant, either using a single shift or ++ a sequence of add instructions. */ ++ ++static void ++ix86_expand_ashl_const (rtx operand, int count, machine_mode mode) ++{ ++ rtx (*insn)(rtx, rtx, rtx); ++ ++ if (count == 1 ++ || (count * ix86_cost->add <= ix86_cost->shift_const ++ && !optimize_insn_for_size_p ())) ++ { ++ insn = mode == DImode ? gen_addsi3 : gen_adddi3; ++ while (count-- > 0) ++ emit_insn (insn (operand, operand, operand)); ++ } ++ else ++ { ++ insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3; ++ emit_insn (insn (operand, operand, GEN_INT (count))); ++ } ++} ++ ++void ++ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode) ++{ ++ rtx (*gen_ashl3)(rtx, rtx, rtx); ++ rtx (*gen_shld)(rtx, rtx, rtx); ++ int half_width = GET_MODE_BITSIZE (mode) >> 1; ++ ++ rtx low[2], high[2]; ++ int count; ++ ++ if (CONST_INT_P (operands[2])) ++ { ++ split_double_mode (mode, operands, 2, low, high); ++ count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1); ++ ++ if (count >= half_width) ++ { ++ emit_move_insn (high[0], low[1]); ++ emit_move_insn (low[0], const0_rtx); ++ ++ if (count > half_width) ++ ix86_expand_ashl_const (high[0], count - half_width, mode); ++ } ++ else ++ { ++ gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld; ++ ++ if (!rtx_equal_p (operands[0], operands[1])) ++ emit_move_insn (operands[0], operands[1]); ++ ++ emit_insn (gen_shld (high[0], low[0], GEN_INT (count))); ++ ix86_expand_ashl_const (low[0], count, mode); ++ } ++ return; ++ } ++ ++ split_double_mode (mode, operands, 1, low, high); ++ ++ gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3; ++ ++ if (operands[1] == const1_rtx) ++ { ++ /* Assuming we've chosen a QImode capable registers, then 1 << N ++ can be done with two 32/64-bit shifts, no branches, no cmoves. */ ++ if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0])) ++ { ++ rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG); ++ ++ ix86_expand_clear (low[0]); ++ ix86_expand_clear (high[0]); ++ emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width))); ++ ++ d = gen_lowpart (QImode, low[0]); ++ d = gen_rtx_STRICT_LOW_PART (VOIDmode, d); ++ s = gen_rtx_EQ (QImode, flags, const0_rtx); ++ emit_insn (gen_rtx_SET (d, s)); ++ ++ d = gen_lowpart (QImode, high[0]); ++ d = gen_rtx_STRICT_LOW_PART (VOIDmode, d); ++ s = gen_rtx_NE (QImode, flags, const0_rtx); ++ emit_insn (gen_rtx_SET (d, s)); ++ } ++ ++ /* Otherwise, we can get the same results by manually performing ++ a bit extract operation on bit 5/6, and then performing the two ++ shifts. The two methods of getting 0/1 into low/high are exactly ++ the same size. Avoiding the shift in the bit extract case helps ++ pentium4 a bit; no one else seems to care much either way. */ ++ else ++ { ++ machine_mode half_mode; ++ rtx (*gen_lshr3)(rtx, rtx, rtx); ++ rtx (*gen_and3)(rtx, rtx, rtx); ++ rtx (*gen_xor3)(rtx, rtx, rtx); ++ HOST_WIDE_INT bits; ++ rtx x; ++ ++ if (mode == DImode) ++ { ++ half_mode = SImode; ++ gen_lshr3 = gen_lshrsi3; ++ gen_and3 = gen_andsi3; ++ gen_xor3 = gen_xorsi3; ++ bits = 5; ++ } ++ else ++ { ++ half_mode = DImode; ++ gen_lshr3 = gen_lshrdi3; ++ gen_and3 = gen_anddi3; ++ gen_xor3 = gen_xordi3; ++ bits = 6; ++ } ++ ++ if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ()) ++ x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]); ++ else ++ x = gen_lowpart (half_mode, operands[2]); ++ emit_insn (gen_rtx_SET (high[0], x)); ++ ++ emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits))); ++ emit_insn (gen_and3 (high[0], high[0], const1_rtx)); ++ emit_move_insn (low[0], high[0]); ++ emit_insn (gen_xor3 (low[0], low[0], const1_rtx)); ++ } ++ ++ emit_insn (gen_ashl3 (low[0], low[0], operands[2])); ++ emit_insn (gen_ashl3 (high[0], high[0], operands[2])); ++ return; ++ } ++ ++ if (operands[1] == constm1_rtx) ++ { ++ /* For -1 << N, we can avoid the shld instruction, because we ++ know that we're shifting 0...31/63 ones into a -1. */ ++ emit_move_insn (low[0], constm1_rtx); ++ if (optimize_insn_for_size_p ()) ++ emit_move_insn (high[0], low[0]); ++ else ++ emit_move_insn (high[0], constm1_rtx); ++ } ++ else ++ { ++ gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld; ++ ++ if (!rtx_equal_p (operands[0], operands[1])) ++ emit_move_insn (operands[0], operands[1]); ++ ++ split_double_mode (mode, operands, 1, low, high); ++ emit_insn (gen_shld (high[0], low[0], operands[2])); ++ } ++ ++ emit_insn (gen_ashl3 (low[0], low[0], operands[2])); ++ ++ if (TARGET_CMOVE && scratch) ++ { ++ rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx) ++ = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1; ++ ++ ix86_expand_clear (scratch); ++ emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch)); ++ } ++ else ++ { ++ rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx) ++ = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2; ++ ++ emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2])); ++ } ++} ++ ++void ++ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode) ++{ ++ rtx (*gen_ashr3)(rtx, rtx, rtx) ++ = mode == DImode ? gen_ashrsi3 : gen_ashrdi3; ++ rtx (*gen_shrd)(rtx, rtx, rtx); ++ int half_width = GET_MODE_BITSIZE (mode) >> 1; ++ ++ rtx low[2], high[2]; ++ int count; ++ ++ if (CONST_INT_P (operands[2])) ++ { ++ split_double_mode (mode, operands, 2, low, high); ++ count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1); ++ ++ if (count == GET_MODE_BITSIZE (mode) - 1) ++ { ++ emit_move_insn (high[0], high[1]); ++ emit_insn (gen_ashr3 (high[0], high[0], ++ GEN_INT (half_width - 1))); ++ emit_move_insn (low[0], high[0]); ++ ++ } ++ else if (count >= half_width) ++ { ++ emit_move_insn (low[0], high[1]); ++ emit_move_insn (high[0], low[0]); ++ emit_insn (gen_ashr3 (high[0], high[0], ++ GEN_INT (half_width - 1))); ++ ++ if (count > half_width) ++ emit_insn (gen_ashr3 (low[0], low[0], ++ GEN_INT (count - half_width))); ++ } ++ else ++ { ++ gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd; ++ ++ if (!rtx_equal_p (operands[0], operands[1])) ++ emit_move_insn (operands[0], operands[1]); ++ ++ emit_insn (gen_shrd (low[0], high[0], GEN_INT (count))); ++ emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count))); ++ } ++ } ++ else ++ { ++ gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd; ++ ++ if (!rtx_equal_p (operands[0], operands[1])) ++ emit_move_insn (operands[0], operands[1]); ++ ++ split_double_mode (mode, operands, 1, low, high); ++ ++ emit_insn (gen_shrd (low[0], high[0], operands[2])); ++ emit_insn (gen_ashr3 (high[0], high[0], operands[2])); ++ ++ if (TARGET_CMOVE && scratch) ++ { ++ rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx) ++ = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1; ++ ++ emit_move_insn (scratch, high[0]); ++ emit_insn (gen_ashr3 (scratch, scratch, ++ GEN_INT (half_width - 1))); ++ emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2], ++ scratch)); ++ } ++ else ++ { ++ rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx) ++ = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3; ++ ++ emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2])); ++ } ++ } ++} ++ ++void ++ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode) ++{ ++ rtx (*gen_lshr3)(rtx, rtx, rtx) ++ = mode == DImode ? gen_lshrsi3 : gen_lshrdi3; ++ rtx (*gen_shrd)(rtx, rtx, rtx); ++ int half_width = GET_MODE_BITSIZE (mode) >> 1; ++ ++ rtx low[2], high[2]; ++ int count; ++ ++ if (CONST_INT_P (operands[2])) ++ { ++ split_double_mode (mode, operands, 2, low, high); ++ count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1); ++ ++ if (count >= half_width) ++ { ++ emit_move_insn (low[0], high[1]); ++ ix86_expand_clear (high[0]); ++ ++ if (count > half_width) ++ emit_insn (gen_lshr3 (low[0], low[0], ++ GEN_INT (count - half_width))); ++ } ++ else ++ { ++ gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd; ++ ++ if (!rtx_equal_p (operands[0], operands[1])) ++ emit_move_insn (operands[0], operands[1]); ++ ++ emit_insn (gen_shrd (low[0], high[0], GEN_INT (count))); ++ emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count))); ++ } ++ } ++ else ++ { ++ gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd; ++ ++ if (!rtx_equal_p (operands[0], operands[1])) ++ emit_move_insn (operands[0], operands[1]); ++ ++ split_double_mode (mode, operands, 1, low, high); ++ ++ emit_insn (gen_shrd (low[0], high[0], operands[2])); ++ emit_insn (gen_lshr3 (high[0], high[0], operands[2])); ++ ++ if (TARGET_CMOVE && scratch) ++ { ++ rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx) ++ = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1; ++ ++ ix86_expand_clear (scratch); ++ emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2], ++ scratch)); ++ } ++ else ++ { ++ rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx) ++ = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2; ++ ++ emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2])); ++ } ++ } ++} ++ ++/* Return mode for the memcpy/memset loop counter. Prefer SImode over ++ DImode for constant loop counts. */ ++ ++static machine_mode ++counter_mode (rtx count_exp) ++{ ++ if (GET_MODE (count_exp) != VOIDmode) ++ return GET_MODE (count_exp); ++ if (!CONST_INT_P (count_exp)) ++ return Pmode; ++ if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff)) ++ return DImode; ++ return SImode; ++} ++ ++/* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR ++ to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT ++ specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set ++ memory by VALUE (supposed to be in MODE). ++ ++ The size is rounded down to whole number of chunk size moved at once. ++ SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */ ++ ++ ++static void ++expand_set_or_cpymem_via_loop (rtx destmem, rtx srcmem, ++ rtx destptr, rtx srcptr, rtx value, ++ rtx count, machine_mode mode, int unroll, ++ int expected_size, bool issetmem) ++{ ++ rtx_code_label *out_label, *top_label; ++ rtx iter, tmp; ++ machine_mode iter_mode = counter_mode (count); ++ int piece_size_n = GET_MODE_SIZE (mode) * unroll; ++ rtx piece_size = GEN_INT (piece_size_n); ++ rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1)); ++ rtx size; ++ int i; ++ ++ top_label = gen_label_rtx (); ++ out_label = gen_label_rtx (); ++ iter = gen_reg_rtx (iter_mode); ++ ++ size = expand_simple_binop (iter_mode, AND, count, piece_size_mask, ++ NULL, 1, OPTAB_DIRECT); ++ /* Those two should combine. */ ++ if (piece_size == const1_rtx) ++ { ++ emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode, ++ true, out_label); ++ predict_jump (REG_BR_PROB_BASE * 10 / 100); ++ } ++ emit_move_insn (iter, const0_rtx); ++ ++ emit_label (top_label); ++ ++ tmp = convert_modes (Pmode, iter_mode, iter, true); ++ ++ /* This assert could be relaxed - in this case we'll need to compute ++ smallest power of two, containing in PIECE_SIZE_N and pass it to ++ offset_address. */ ++ gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0); ++ destmem = offset_address (destmem, tmp, piece_size_n); ++ destmem = adjust_address (destmem, mode, 0); ++ ++ if (!issetmem) ++ { ++ srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n); ++ srcmem = adjust_address (srcmem, mode, 0); ++ ++ /* When unrolling for chips that reorder memory reads and writes, ++ we can save registers by using single temporary. ++ Also using 4 temporaries is overkill in 32bit mode. */ ++ if (!TARGET_64BIT && 0) ++ { ++ for (i = 0; i < unroll; i++) ++ { ++ if (i) ++ { ++ destmem = adjust_address (copy_rtx (destmem), mode, ++ GET_MODE_SIZE (mode)); ++ srcmem = adjust_address (copy_rtx (srcmem), mode, ++ GET_MODE_SIZE (mode)); ++ } ++ emit_move_insn (destmem, srcmem); ++ } ++ } ++ else ++ { ++ rtx tmpreg[4]; ++ gcc_assert (unroll <= 4); ++ for (i = 0; i < unroll; i++) ++ { ++ tmpreg[i] = gen_reg_rtx (mode); ++ if (i) ++ srcmem = adjust_address (copy_rtx (srcmem), mode, ++ GET_MODE_SIZE (mode)); ++ emit_move_insn (tmpreg[i], srcmem); ++ } ++ for (i = 0; i < unroll; i++) ++ { ++ if (i) ++ destmem = adjust_address (copy_rtx (destmem), mode, ++ GET_MODE_SIZE (mode)); ++ emit_move_insn (destmem, tmpreg[i]); ++ } ++ } ++ } ++ else ++ for (i = 0; i < unroll; i++) ++ { ++ if (i) ++ destmem = adjust_address (copy_rtx (destmem), mode, ++ GET_MODE_SIZE (mode)); ++ emit_move_insn (destmem, value); ++ } ++ ++ tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter, ++ true, OPTAB_LIB_WIDEN); ++ if (tmp != iter) ++ emit_move_insn (iter, tmp); ++ ++ emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode, ++ true, top_label); ++ if (expected_size != -1) ++ { ++ expected_size /= GET_MODE_SIZE (mode) * unroll; ++ if (expected_size == 0) ++ predict_jump (0); ++ else if (expected_size > REG_BR_PROB_BASE) ++ predict_jump (REG_BR_PROB_BASE - 1); ++ else ++ predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) ++ / expected_size); ++ } ++ else ++ predict_jump (REG_BR_PROB_BASE * 80 / 100); ++ iter = ix86_zero_extend_to_Pmode (iter); ++ tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr, ++ true, OPTAB_LIB_WIDEN); ++ if (tmp != destptr) ++ emit_move_insn (destptr, tmp); ++ if (!issetmem) ++ { ++ tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr, ++ true, OPTAB_LIB_WIDEN); ++ if (tmp != srcptr) ++ emit_move_insn (srcptr, tmp); ++ } ++ emit_label (out_label); ++} ++ ++/* Divide COUNTREG by SCALE. */ ++static rtx ++scale_counter (rtx countreg, int scale) ++{ ++ rtx sc; ++ ++ if (scale == 1) ++ return countreg; ++ if (CONST_INT_P (countreg)) ++ return GEN_INT (INTVAL (countreg) / scale); ++ gcc_assert (REG_P (countreg)); ++ ++ sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg, ++ GEN_INT (exact_log2 (scale)), ++ NULL, 1, OPTAB_DIRECT); ++ return sc; ++} ++ ++/* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument. ++ When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored. ++ When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored. ++ For setmem case, VALUE is a promoted to a wider size ORIG_VALUE. ++ ORIG_VALUE is the original value passed to memset to fill the memory with. ++ Other arguments have same meaning as for previous function. */ ++ ++static void ++expand_set_or_cpymem_via_rep (rtx destmem, rtx srcmem, ++ rtx destptr, rtx srcptr, rtx value, rtx orig_value, ++ rtx count, ++ machine_mode mode, bool issetmem) ++{ ++ rtx destexp; ++ rtx srcexp; ++ rtx countreg; ++ HOST_WIDE_INT rounded_count; ++ ++ /* If possible, it is shorter to use rep movs. ++ TODO: Maybe it is better to move this logic to decide_alg. */ ++ if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3) ++ && (!issetmem || orig_value == const0_rtx)) ++ mode = SImode; ++ ++ if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode) ++ destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0); ++ ++ countreg = ix86_zero_extend_to_Pmode (scale_counter (count, ++ GET_MODE_SIZE (mode))); ++ if (mode != QImode) ++ { ++ destexp = gen_rtx_ASHIFT (Pmode, countreg, ++ GEN_INT (exact_log2 (GET_MODE_SIZE (mode)))); ++ destexp = gen_rtx_PLUS (Pmode, destexp, destptr); ++ } ++ else ++ destexp = gen_rtx_PLUS (Pmode, destptr, countreg); ++ if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count)) ++ { ++ rounded_count ++ = ROUND_DOWN (INTVAL (count), (HOST_WIDE_INT) GET_MODE_SIZE (mode)); ++ destmem = shallow_copy_rtx (destmem); ++ set_mem_size (destmem, rounded_count); ++ } ++ else if (MEM_SIZE_KNOWN_P (destmem)) ++ clear_mem_size (destmem); ++ ++ if (issetmem) ++ { ++ value = force_reg (mode, gen_lowpart (mode, value)); ++ emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp)); ++ } ++ else ++ { ++ if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode) ++ srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0); ++ if (mode != QImode) ++ { ++ srcexp = gen_rtx_ASHIFT (Pmode, countreg, ++ GEN_INT (exact_log2 (GET_MODE_SIZE (mode)))); ++ srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr); ++ } ++ else ++ srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg); ++ if (CONST_INT_P (count)) ++ { ++ rounded_count ++ = ROUND_DOWN (INTVAL (count), (HOST_WIDE_INT) GET_MODE_SIZE (mode)); ++ srcmem = shallow_copy_rtx (srcmem); ++ set_mem_size (srcmem, rounded_count); ++ } ++ else ++ { ++ if (MEM_SIZE_KNOWN_P (srcmem)) ++ clear_mem_size (srcmem); ++ } ++ emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg, ++ destexp, srcexp)); ++ } ++} ++ ++/* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to ++ DESTMEM. ++ SRC is passed by pointer to be updated on return. ++ Return value is updated DST. */ ++static rtx ++emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr, ++ HOST_WIDE_INT size_to_move) ++{ ++ rtx dst = destmem, src = *srcmem, adjust, tempreg; ++ enum insn_code code; ++ machine_mode move_mode; ++ int piece_size, i; ++ ++ /* Find the widest mode in which we could perform moves. ++ Start with the biggest power of 2 less than SIZE_TO_MOVE and half ++ it until move of such size is supported. */ ++ piece_size = 1 << floor_log2 (size_to_move); ++ while (!int_mode_for_size (piece_size * BITS_PER_UNIT, 0).exists (&move_mode) ++ || (code = optab_handler (mov_optab, move_mode)) == CODE_FOR_nothing) ++ { ++ gcc_assert (piece_size > 1); ++ piece_size >>= 1; ++ } ++ ++ /* Find the corresponding vector mode with the same size as MOVE_MODE. ++ MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */ ++ if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode)) ++ { ++ int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode); ++ if (!mode_for_vector (word_mode, nunits).exists (&move_mode) ++ || (code = optab_handler (mov_optab, move_mode)) == CODE_FOR_nothing) ++ { ++ move_mode = word_mode; ++ piece_size = GET_MODE_SIZE (move_mode); ++ code = optab_handler (mov_optab, move_mode); ++ } ++ } ++ gcc_assert (code != CODE_FOR_nothing); ++ ++ dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0); ++ src = adjust_automodify_address_nv (src, move_mode, srcptr, 0); ++ ++ /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */ ++ gcc_assert (size_to_move % piece_size == 0); ++ adjust = GEN_INT (piece_size); ++ for (i = 0; i < size_to_move; i += piece_size) ++ { ++ /* We move from memory to memory, so we'll need to do it via ++ a temporary register. */ ++ tempreg = gen_reg_rtx (move_mode); ++ emit_insn (GEN_FCN (code) (tempreg, src)); ++ emit_insn (GEN_FCN (code) (dst, tempreg)); ++ ++ emit_move_insn (destptr, ++ gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust)); ++ emit_move_insn (srcptr, ++ gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust)); ++ ++ dst = adjust_automodify_address_nv (dst, move_mode, destptr, ++ piece_size); ++ src = adjust_automodify_address_nv (src, move_mode, srcptr, ++ piece_size); ++ } ++ ++ /* Update DST and SRC rtx. */ ++ *srcmem = src; ++ return dst; ++} ++ ++/* Helper function for the string operations below. Dest VARIABLE whether ++ it is aligned to VALUE bytes. If true, jump to the label. */ ++ ++static rtx_code_label * ++ix86_expand_aligntest (rtx variable, int value, bool epilogue) ++{ ++ rtx_code_label *label = gen_label_rtx (); ++ rtx tmpcount = gen_reg_rtx (GET_MODE (variable)); ++ if (GET_MODE (variable) == DImode) ++ emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value))); ++ else ++ emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value))); ++ emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable), ++ 1, label); ++ if (epilogue) ++ predict_jump (REG_BR_PROB_BASE * 50 / 100); ++ else ++ predict_jump (REG_BR_PROB_BASE * 90 / 100); ++ return label; ++} ++ ++ ++/* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */ ++ ++static void ++expand_cpymem_epilogue (rtx destmem, rtx srcmem, ++ rtx destptr, rtx srcptr, rtx count, int max_size) ++{ ++ rtx src, dest; ++ if (CONST_INT_P (count)) ++ { ++ HOST_WIDE_INT countval = INTVAL (count); ++ HOST_WIDE_INT epilogue_size = countval % max_size; ++ int i; ++ ++ /* For now MAX_SIZE should be a power of 2. This assert could be ++ relaxed, but it'll require a bit more complicated epilogue ++ expanding. */ ++ gcc_assert ((max_size & (max_size - 1)) == 0); ++ for (i = max_size; i >= 1; i >>= 1) ++ { ++ if (epilogue_size & i) ++ destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i); ++ } ++ return; ++ } ++ if (max_size > 8) ++ { ++ count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1), ++ count, 1, OPTAB_DIRECT); ++ expand_set_or_cpymem_via_loop (destmem, srcmem, destptr, srcptr, NULL, ++ count, QImode, 1, 4, false); ++ return; ++ } ++ ++ /* When there are stringops, we can cheaply increase dest and src pointers. ++ Otherwise we save code size by maintaining offset (zero is readily ++ available from preceding rep operation) and using x86 addressing modes. ++ */ ++ if (TARGET_SINGLE_STRINGOP) ++ { ++ if (max_size > 4) ++ { ++ rtx_code_label *label = ix86_expand_aligntest (count, 4, true); ++ src = change_address (srcmem, SImode, srcptr); ++ dest = change_address (destmem, SImode, destptr); ++ emit_insn (gen_strmov (destptr, dest, srcptr, src)); ++ emit_label (label); ++ LABEL_NUSES (label) = 1; ++ } ++ if (max_size > 2) ++ { ++ rtx_code_label *label = ix86_expand_aligntest (count, 2, true); ++ src = change_address (srcmem, HImode, srcptr); ++ dest = change_address (destmem, HImode, destptr); ++ emit_insn (gen_strmov (destptr, dest, srcptr, src)); ++ emit_label (label); ++ LABEL_NUSES (label) = 1; ++ } ++ if (max_size > 1) ++ { ++ rtx_code_label *label = ix86_expand_aligntest (count, 1, true); ++ src = change_address (srcmem, QImode, srcptr); ++ dest = change_address (destmem, QImode, destptr); ++ emit_insn (gen_strmov (destptr, dest, srcptr, src)); ++ emit_label (label); ++ LABEL_NUSES (label) = 1; ++ } ++ } ++ else ++ { ++ rtx offset = force_reg (Pmode, const0_rtx); ++ rtx tmp; ++ ++ if (max_size > 4) ++ { ++ rtx_code_label *label = ix86_expand_aligntest (count, 4, true); ++ src = change_address (srcmem, SImode, srcptr); ++ dest = change_address (destmem, SImode, destptr); ++ emit_move_insn (dest, src); ++ tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL, ++ true, OPTAB_LIB_WIDEN); ++ if (tmp != offset) ++ emit_move_insn (offset, tmp); ++ emit_label (label); ++ LABEL_NUSES (label) = 1; ++ } ++ if (max_size > 2) ++ { ++ rtx_code_label *label = ix86_expand_aligntest (count, 2, true); ++ tmp = gen_rtx_PLUS (Pmode, srcptr, offset); ++ src = change_address (srcmem, HImode, tmp); ++ tmp = gen_rtx_PLUS (Pmode, destptr, offset); ++ dest = change_address (destmem, HImode, tmp); ++ emit_move_insn (dest, src); ++ tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp, ++ true, OPTAB_LIB_WIDEN); ++ if (tmp != offset) ++ emit_move_insn (offset, tmp); ++ emit_label (label); ++ LABEL_NUSES (label) = 1; ++ } ++ if (max_size > 1) ++ { ++ rtx_code_label *label = ix86_expand_aligntest (count, 1, true); ++ tmp = gen_rtx_PLUS (Pmode, srcptr, offset); ++ src = change_address (srcmem, QImode, tmp); ++ tmp = gen_rtx_PLUS (Pmode, destptr, offset); ++ dest = change_address (destmem, QImode, tmp); ++ emit_move_insn (dest, src); ++ emit_label (label); ++ LABEL_NUSES (label) = 1; ++ } ++ } ++} ++ ++/* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM ++ with value PROMOTED_VAL. ++ SRC is passed by pointer to be updated on return. ++ Return value is updated DST. */ ++static rtx ++emit_memset (rtx destmem, rtx destptr, rtx promoted_val, ++ HOST_WIDE_INT size_to_move) ++{ ++ rtx dst = destmem, adjust; ++ enum insn_code code; ++ machine_mode move_mode; ++ int piece_size, i; ++ ++ /* Find the widest mode in which we could perform moves. ++ Start with the biggest power of 2 less than SIZE_TO_MOVE and half ++ it until move of such size is supported. */ ++ move_mode = GET_MODE (promoted_val); ++ if (move_mode == VOIDmode) ++ move_mode = QImode; ++ if (size_to_move < GET_MODE_SIZE (move_mode)) ++ { ++ unsigned int move_bits = size_to_move * BITS_PER_UNIT; ++ move_mode = int_mode_for_size (move_bits, 0).require (); ++ promoted_val = gen_lowpart (move_mode, promoted_val); ++ } ++ piece_size = GET_MODE_SIZE (move_mode); ++ code = optab_handler (mov_optab, move_mode); ++ gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX); ++ ++ dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0); ++ ++ /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */ ++ gcc_assert (size_to_move % piece_size == 0); ++ adjust = GEN_INT (piece_size); ++ for (i = 0; i < size_to_move; i += piece_size) ++ { ++ if (piece_size <= GET_MODE_SIZE (word_mode)) ++ { ++ emit_insn (gen_strset (destptr, dst, promoted_val)); ++ dst = adjust_automodify_address_nv (dst, move_mode, destptr, ++ piece_size); ++ continue; ++ } ++ ++ emit_insn (GEN_FCN (code) (dst, promoted_val)); ++ ++ emit_move_insn (destptr, ++ gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust)); ++ ++ dst = adjust_automodify_address_nv (dst, move_mode, destptr, ++ piece_size); ++ } ++ ++ /* Update DST rtx. */ ++ return dst; ++} ++/* Output code to set at most count & (max_size - 1) bytes starting by DEST. */ ++static void ++expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value, ++ rtx count, int max_size) ++{ ++ count = expand_simple_binop (counter_mode (count), AND, count, ++ GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT); ++ expand_set_or_cpymem_via_loop (destmem, NULL, destptr, NULL, ++ gen_lowpart (QImode, value), count, QImode, ++ 1, max_size / 2, true); ++} ++ ++/* Output code to set at most count & (max_size - 1) bytes starting by DEST. */ ++static void ++expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value, ++ rtx count, int max_size) ++{ ++ rtx dest; ++ ++ if (CONST_INT_P (count)) ++ { ++ HOST_WIDE_INT countval = INTVAL (count); ++ HOST_WIDE_INT epilogue_size = countval % max_size; ++ int i; ++ ++ /* For now MAX_SIZE should be a power of 2. This assert could be ++ relaxed, but it'll require a bit more complicated epilogue ++ expanding. */ ++ gcc_assert ((max_size & (max_size - 1)) == 0); ++ for (i = max_size; i >= 1; i >>= 1) ++ { ++ if (epilogue_size & i) ++ { ++ if (vec_value && i > GET_MODE_SIZE (GET_MODE (value))) ++ destmem = emit_memset (destmem, destptr, vec_value, i); ++ else ++ destmem = emit_memset (destmem, destptr, value, i); ++ } ++ } ++ return; ++ } ++ if (max_size > 32) ++ { ++ expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size); ++ return; ++ } ++ if (max_size > 16) ++ { ++ rtx_code_label *label = ix86_expand_aligntest (count, 16, true); ++ if (TARGET_64BIT) ++ { ++ dest = change_address (destmem, DImode, destptr); ++ emit_insn (gen_strset (destptr, dest, value)); ++ dest = adjust_automodify_address_nv (dest, DImode, destptr, 8); ++ emit_insn (gen_strset (destptr, dest, value)); ++ } ++ else ++ { ++ dest = change_address (destmem, SImode, destptr); ++ emit_insn (gen_strset (destptr, dest, value)); ++ dest = adjust_automodify_address_nv (dest, SImode, destptr, 4); ++ emit_insn (gen_strset (destptr, dest, value)); ++ dest = adjust_automodify_address_nv (dest, SImode, destptr, 8); ++ emit_insn (gen_strset (destptr, dest, value)); ++ dest = adjust_automodify_address_nv (dest, SImode, destptr, 12); ++ emit_insn (gen_strset (destptr, dest, value)); ++ } ++ emit_label (label); ++ LABEL_NUSES (label) = 1; ++ } ++ if (max_size > 8) ++ { ++ rtx_code_label *label = ix86_expand_aligntest (count, 8, true); ++ if (TARGET_64BIT) ++ { ++ dest = change_address (destmem, DImode, destptr); ++ emit_insn (gen_strset (destptr, dest, value)); ++ } ++ else ++ { ++ dest = change_address (destmem, SImode, destptr); ++ emit_insn (gen_strset (destptr, dest, value)); ++ dest = adjust_automodify_address_nv (dest, SImode, destptr, 4); ++ emit_insn (gen_strset (destptr, dest, value)); ++ } ++ emit_label (label); ++ LABEL_NUSES (label) = 1; ++ } ++ if (max_size > 4) ++ { ++ rtx_code_label *label = ix86_expand_aligntest (count, 4, true); ++ dest = change_address (destmem, SImode, destptr); ++ emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value))); ++ emit_label (label); ++ LABEL_NUSES (label) = 1; ++ } ++ if (max_size > 2) ++ { ++ rtx_code_label *label = ix86_expand_aligntest (count, 2, true); ++ dest = change_address (destmem, HImode, destptr); ++ emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value))); ++ emit_label (label); ++ LABEL_NUSES (label) = 1; ++ } ++ if (max_size > 1) ++ { ++ rtx_code_label *label = ix86_expand_aligntest (count, 1, true); ++ dest = change_address (destmem, QImode, destptr); ++ emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value))); ++ emit_label (label); ++ LABEL_NUSES (label) = 1; ++ } ++} ++ ++/* Adjust COUNTER by the VALUE. */ ++static void ++ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value) ++{ ++ rtx (*gen_add)(rtx, rtx, rtx) ++ = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3; ++ ++ emit_insn (gen_add (countreg, countreg, GEN_INT (-value))); ++} ++ ++/* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to ++ DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN. ++ Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are ++ ignored. ++ Return value is updated DESTMEM. */ ++ ++static rtx ++expand_set_or_cpymem_prologue (rtx destmem, rtx srcmem, ++ rtx destptr, rtx srcptr, rtx value, ++ rtx vec_value, rtx count, int align, ++ int desired_alignment, bool issetmem) ++{ ++ int i; ++ for (i = 1; i < desired_alignment; i <<= 1) ++ { ++ if (align <= i) ++ { ++ rtx_code_label *label = ix86_expand_aligntest (destptr, i, false); ++ if (issetmem) ++ { ++ if (vec_value && i > GET_MODE_SIZE (GET_MODE (value))) ++ destmem = emit_memset (destmem, destptr, vec_value, i); ++ else ++ destmem = emit_memset (destmem, destptr, value, i); ++ } ++ else ++ destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i); ++ ix86_adjust_counter (count, i); ++ emit_label (label); ++ LABEL_NUSES (label) = 1; ++ set_mem_align (destmem, i * 2 * BITS_PER_UNIT); ++ } ++ } ++ return destmem; ++} ++ ++/* Test if COUNT&SIZE is nonzero and if so, expand movme ++ or setmem sequence that is valid for SIZE..2*SIZE-1 bytes ++ and jump to DONE_LABEL. */ ++static void ++expand_small_cpymem_or_setmem (rtx destmem, rtx srcmem, ++ rtx destptr, rtx srcptr, ++ rtx value, rtx vec_value, ++ rtx count, int size, ++ rtx done_label, bool issetmem) ++{ ++ rtx_code_label *label = ix86_expand_aligntest (count, size, false); ++ machine_mode mode = int_mode_for_size (size * BITS_PER_UNIT, 1).else_blk (); ++ rtx modesize; ++ int n; ++ ++ /* If we do not have vector value to copy, we must reduce size. */ ++ if (issetmem) ++ { ++ if (!vec_value) ++ { ++ if (GET_MODE (value) == VOIDmode && size > 8) ++ mode = Pmode; ++ else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value))) ++ mode = GET_MODE (value); ++ } ++ else ++ mode = GET_MODE (vec_value), value = vec_value; ++ } ++ else ++ { ++ /* Choose appropriate vector mode. */ ++ if (size >= 32) ++ mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode; ++ else if (size >= 16) ++ mode = TARGET_SSE ? V16QImode : DImode; ++ srcmem = change_address (srcmem, mode, srcptr); ++ } ++ destmem = change_address (destmem, mode, destptr); ++ modesize = GEN_INT (GET_MODE_SIZE (mode)); ++ gcc_assert (GET_MODE_SIZE (mode) <= size); ++ for (n = 0; n * GET_MODE_SIZE (mode) < size; n++) ++ { ++ if (issetmem) ++ emit_move_insn (destmem, gen_lowpart (mode, value)); ++ else ++ { ++ emit_move_insn (destmem, srcmem); ++ srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode)); ++ } ++ destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode)); ++ } ++ ++ destmem = offset_address (destmem, count, 1); ++ destmem = offset_address (destmem, GEN_INT (-2 * size), ++ GET_MODE_SIZE (mode)); ++ if (!issetmem) ++ { ++ srcmem = offset_address (srcmem, count, 1); ++ srcmem = offset_address (srcmem, GEN_INT (-2 * size), ++ GET_MODE_SIZE (mode)); ++ } ++ for (n = 0; n * GET_MODE_SIZE (mode) < size; n++) ++ { ++ if (issetmem) ++ emit_move_insn (destmem, gen_lowpart (mode, value)); ++ else ++ { ++ emit_move_insn (destmem, srcmem); ++ srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode)); ++ } ++ destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode)); ++ } ++ emit_jump_insn (gen_jump (done_label)); ++ emit_barrier (); ++ ++ emit_label (label); ++ LABEL_NUSES (label) = 1; ++} ++ ++/* Handle small memcpy (up to SIZE that is supposed to be small power of 2. ++ and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN ++ bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can ++ proceed with an loop copying SIZE bytes at once. Do moves in MODE. ++ DONE_LABEL is a label after the whole copying sequence. The label is created ++ on demand if *DONE_LABEL is NULL. ++ MIN_SIZE is minimal size of block copied. This value gets adjusted for new ++ bounds after the initial copies. ++ ++ DESTMEM/SRCMEM are memory expressions pointing to the copies block, ++ DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether ++ we will dispatch to a library call for large blocks. ++ ++ In pseudocode we do: ++ ++ if (COUNT < SIZE) ++ { ++ Assume that SIZE is 4. Bigger sizes are handled analogously ++ if (COUNT & 4) ++ { ++ copy 4 bytes from SRCPTR to DESTPTR ++ copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4 ++ goto done_label ++ } ++ if (!COUNT) ++ goto done_label; ++ copy 1 byte from SRCPTR to DESTPTR ++ if (COUNT & 2) ++ { ++ copy 2 bytes from SRCPTR to DESTPTR ++ copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2 ++ } ++ } ++ else ++ { ++ copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR ++ copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE ++ ++ OLD_DESPTR = DESTPTR; ++ Align DESTPTR up to DESIRED_ALIGN ++ SRCPTR += DESTPTR - OLD_DESTPTR ++ COUNT -= DEST_PTR - OLD_DESTPTR ++ if (DYNAMIC_CHECK) ++ Round COUNT down to multiple of SIZE ++ << optional caller supplied zero size guard is here >> ++ << optional caller supplied dynamic check is here >> ++ << caller supplied main copy loop is here >> ++ } ++ done_label: ++ */ ++static void ++expand_set_or_cpymem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem, ++ rtx *destptr, rtx *srcptr, ++ machine_mode mode, ++ rtx value, rtx vec_value, ++ rtx *count, ++ rtx_code_label **done_label, ++ int size, ++ int desired_align, ++ int align, ++ unsigned HOST_WIDE_INT *min_size, ++ bool dynamic_check, ++ bool issetmem) ++{ ++ rtx_code_label *loop_label = NULL, *label; ++ int n; ++ rtx modesize; ++ int prolog_size = 0; ++ rtx mode_value; ++ ++ /* Chose proper value to copy. */ ++ if (issetmem && VECTOR_MODE_P (mode)) ++ mode_value = vec_value; ++ else ++ mode_value = value; ++ gcc_assert (GET_MODE_SIZE (mode) <= size); ++ ++ /* See if block is big or small, handle small blocks. */ ++ if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size) ++ { ++ int size2 = size; ++ loop_label = gen_label_rtx (); ++ ++ if (!*done_label) ++ *done_label = gen_label_rtx (); ++ ++ emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count), ++ 1, loop_label); ++ size2 >>= 1; ++ ++ /* Handle sizes > 3. */ ++ for (;size2 > 2; size2 >>= 1) ++ expand_small_cpymem_or_setmem (destmem, srcmem, ++ *destptr, *srcptr, ++ value, vec_value, ++ *count, ++ size2, *done_label, issetmem); ++ /* Nothing to copy? Jump to DONE_LABEL if so */ ++ emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count), ++ 1, *done_label); ++ ++ /* Do a byte copy. */ ++ destmem = change_address (destmem, QImode, *destptr); ++ if (issetmem) ++ emit_move_insn (destmem, gen_lowpart (QImode, value)); ++ else ++ { ++ srcmem = change_address (srcmem, QImode, *srcptr); ++ emit_move_insn (destmem, srcmem); ++ } ++ ++ /* Handle sizes 2 and 3. */ ++ label = ix86_expand_aligntest (*count, 2, false); ++ destmem = change_address (destmem, HImode, *destptr); ++ destmem = offset_address (destmem, *count, 1); ++ destmem = offset_address (destmem, GEN_INT (-2), 2); ++ if (issetmem) ++ emit_move_insn (destmem, gen_lowpart (HImode, value)); ++ else ++ { ++ srcmem = change_address (srcmem, HImode, *srcptr); ++ srcmem = offset_address (srcmem, *count, 1); ++ srcmem = offset_address (srcmem, GEN_INT (-2), 2); ++ emit_move_insn (destmem, srcmem); ++ } ++ ++ emit_label (label); ++ LABEL_NUSES (label) = 1; ++ emit_jump_insn (gen_jump (*done_label)); ++ emit_barrier (); ++ } ++ else ++ gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size ++ || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size); ++ ++ /* Start memcpy for COUNT >= SIZE. */ ++ if (loop_label) ++ { ++ emit_label (loop_label); ++ LABEL_NUSES (loop_label) = 1; ++ } ++ ++ /* Copy first desired_align bytes. */ ++ if (!issetmem) ++ srcmem = change_address (srcmem, mode, *srcptr); ++ destmem = change_address (destmem, mode, *destptr); ++ modesize = GEN_INT (GET_MODE_SIZE (mode)); ++ for (n = 0; prolog_size < desired_align - align; n++) ++ { ++ if (issetmem) ++ emit_move_insn (destmem, mode_value); ++ else ++ { ++ emit_move_insn (destmem, srcmem); ++ srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode)); ++ } ++ destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode)); ++ prolog_size += GET_MODE_SIZE (mode); ++ } ++ ++ ++ /* Copy last SIZE bytes. */ ++ destmem = offset_address (destmem, *count, 1); ++ destmem = offset_address (destmem, ++ GEN_INT (-size - prolog_size), ++ 1); ++ if (issetmem) ++ emit_move_insn (destmem, mode_value); ++ else ++ { ++ srcmem = offset_address (srcmem, *count, 1); ++ srcmem = offset_address (srcmem, ++ GEN_INT (-size - prolog_size), ++ 1); ++ emit_move_insn (destmem, srcmem); ++ } ++ for (n = 1; n * GET_MODE_SIZE (mode) < size; n++) ++ { ++ destmem = offset_address (destmem, modesize, 1); ++ if (issetmem) ++ emit_move_insn (destmem, mode_value); ++ else ++ { ++ srcmem = offset_address (srcmem, modesize, 1); ++ emit_move_insn (destmem, srcmem); ++ } ++ } ++ ++ /* Align destination. */ ++ if (desired_align > 1 && desired_align > align) ++ { ++ rtx saveddest = *destptr; ++ ++ gcc_assert (desired_align <= size); ++ /* Align destptr up, place it to new register. */ ++ *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr, ++ GEN_INT (prolog_size), ++ NULL_RTX, 1, OPTAB_DIRECT); ++ if (REG_P (*destptr) && REG_P (saveddest) && REG_POINTER (saveddest)) ++ REG_POINTER (*destptr) = 1; ++ *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr, ++ GEN_INT (-desired_align), ++ *destptr, 1, OPTAB_DIRECT); ++ /* See how many bytes we skipped. */ ++ saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest, ++ *destptr, ++ saveddest, 1, OPTAB_DIRECT); ++ /* Adjust srcptr and count. */ ++ if (!issetmem) ++ *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr, ++ saveddest, *srcptr, 1, OPTAB_DIRECT); ++ *count = expand_simple_binop (GET_MODE (*count), PLUS, *count, ++ saveddest, *count, 1, OPTAB_DIRECT); ++ /* We copied at most size + prolog_size. */ ++ if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size)) ++ *min_size ++ = ROUND_DOWN (*min_size - size, (unsigned HOST_WIDE_INT)size); ++ else ++ *min_size = 0; ++ ++ /* Our loops always round down the block size, but for dispatch to ++ library we need precise value. */ ++ if (dynamic_check) ++ *count = expand_simple_binop (GET_MODE (*count), AND, *count, ++ GEN_INT (-size), *count, 1, OPTAB_DIRECT); ++ } ++ else ++ { ++ gcc_assert (prolog_size == 0); ++ /* Decrease count, so we won't end up copying last word twice. */ ++ if (!CONST_INT_P (*count)) ++ *count = expand_simple_binop (GET_MODE (*count), PLUS, *count, ++ constm1_rtx, *count, 1, OPTAB_DIRECT); ++ else ++ *count = GEN_INT (ROUND_DOWN (UINTVAL (*count) - 1, ++ (unsigned HOST_WIDE_INT)size)); ++ if (*min_size) ++ *min_size = ROUND_DOWN (*min_size - 1, (unsigned HOST_WIDE_INT)size); ++ } ++} ++ ++ ++/* This function is like the previous one, except here we know how many bytes ++ need to be copied. That allows us to update alignment not only of DST, which ++ is returned, but also of SRC, which is passed as a pointer for that ++ reason. */ ++static rtx ++expand_set_or_cpymem_constant_prologue (rtx dst, rtx *srcp, rtx destreg, ++ rtx srcreg, rtx value, rtx vec_value, ++ int desired_align, int align_bytes, ++ bool issetmem) ++{ ++ rtx src = NULL; ++ rtx orig_dst = dst; ++ rtx orig_src = NULL; ++ int piece_size = 1; ++ int copied_bytes = 0; ++ ++ if (!issetmem) ++ { ++ gcc_assert (srcp != NULL); ++ src = *srcp; ++ orig_src = src; ++ } ++ ++ for (piece_size = 1; ++ piece_size <= desired_align && copied_bytes < align_bytes; ++ piece_size <<= 1) ++ { ++ if (align_bytes & piece_size) ++ { ++ if (issetmem) ++ { ++ if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value))) ++ dst = emit_memset (dst, destreg, vec_value, piece_size); ++ else ++ dst = emit_memset (dst, destreg, value, piece_size); ++ } ++ else ++ dst = emit_memmov (dst, &src, destreg, srcreg, piece_size); ++ copied_bytes += piece_size; ++ } ++ } ++ if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT) ++ set_mem_align (dst, desired_align * BITS_PER_UNIT); ++ if (MEM_SIZE_KNOWN_P (orig_dst)) ++ set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes); ++ ++ if (!issetmem) ++ { ++ int src_align_bytes = get_mem_align_offset (src, desired_align ++ * BITS_PER_UNIT); ++ if (src_align_bytes >= 0) ++ src_align_bytes = desired_align - src_align_bytes; ++ if (src_align_bytes >= 0) ++ { ++ unsigned int src_align; ++ for (src_align = desired_align; src_align >= 2; src_align >>= 1) ++ { ++ if ((src_align_bytes & (src_align - 1)) ++ == (align_bytes & (src_align - 1))) ++ break; ++ } ++ if (src_align > (unsigned int) desired_align) ++ src_align = desired_align; ++ if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT) ++ set_mem_align (src, src_align * BITS_PER_UNIT); ++ } ++ if (MEM_SIZE_KNOWN_P (orig_src)) ++ set_mem_size (src, MEM_SIZE (orig_src) - align_bytes); ++ *srcp = src; ++ } ++ ++ return dst; ++} ++ ++/* Return true if ALG can be used in current context. ++ Assume we expand memset if MEMSET is true. */ ++static bool ++alg_usable_p (enum stringop_alg alg, bool memset, bool have_as) ++{ ++ if (alg == no_stringop) ++ return false; ++ if (alg == vector_loop) ++ return TARGET_SSE || TARGET_AVX; ++ /* Algorithms using the rep prefix want at least edi and ecx; ++ additionally, memset wants eax and memcpy wants esi. Don't ++ consider such algorithms if the user has appropriated those ++ registers for their own purposes, or if we have a non-default ++ address space, since some string insns cannot override the segment. */ ++ if (alg == rep_prefix_1_byte ++ || alg == rep_prefix_4_byte ++ || alg == rep_prefix_8_byte) ++ { ++ if (have_as) ++ return false; ++ if (fixed_regs[CX_REG] ++ || fixed_regs[DI_REG] ++ || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG])) ++ return false; ++ } ++ return true; ++} ++ ++/* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */ ++static enum stringop_alg ++decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, ++ unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size, ++ bool memset, bool zero_memset, bool have_as, ++ int *dynamic_check, bool *noalign, bool recur) ++{ ++ const struct stringop_algs *algs; ++ bool optimize_for_speed; ++ int max = 0; ++ const struct processor_costs *cost; ++ int i; ++ bool any_alg_usable_p = false; ++ ++ *noalign = false; ++ *dynamic_check = -1; ++ ++ /* Even if the string operation call is cold, we still might spend a lot ++ of time processing large blocks. */ ++ if (optimize_function_for_size_p (cfun) ++ || (optimize_insn_for_size_p () ++ && (max_size < 256 ++ || (expected_size != -1 && expected_size < 256)))) ++ optimize_for_speed = false; ++ else ++ optimize_for_speed = true; ++ ++ cost = optimize_for_speed ? ix86_cost : &ix86_size_cost; ++ if (memset) ++ algs = &cost->memset[TARGET_64BIT != 0]; ++ else ++ algs = &cost->memcpy[TARGET_64BIT != 0]; ++ ++ /* See maximal size for user defined algorithm. */ ++ for (i = 0; i < MAX_STRINGOP_ALGS; i++) ++ { ++ enum stringop_alg candidate = algs->size[i].alg; ++ bool usable = alg_usable_p (candidate, memset, have_as); ++ any_alg_usable_p |= usable; ++ ++ if (candidate != libcall && candidate && usable) ++ max = algs->size[i].max; ++ } ++ ++ /* If expected size is not known but max size is small enough ++ so inline version is a win, set expected size into ++ the range. */ ++ if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1) ++ && expected_size == -1) ++ expected_size = min_size / 2 + max_size / 2; ++ ++ /* If user specified the algorithm, honor it if possible. */ ++ if (ix86_stringop_alg != no_stringop ++ && alg_usable_p (ix86_stringop_alg, memset, have_as)) ++ return ix86_stringop_alg; ++ /* rep; movq or rep; movl is the smallest variant. */ ++ else if (!optimize_for_speed) ++ { ++ *noalign = true; ++ if (!count || (count & 3) || (memset && !zero_memset)) ++ return alg_usable_p (rep_prefix_1_byte, memset, have_as) ++ ? rep_prefix_1_byte : loop_1_byte; ++ else ++ return alg_usable_p (rep_prefix_4_byte, memset, have_as) ++ ? rep_prefix_4_byte : loop; ++ } ++ /* Very tiny blocks are best handled via the loop, REP is expensive to ++ setup. */ ++ else if (expected_size != -1 && expected_size < 4) ++ return loop_1_byte; ++ else if (expected_size != -1) ++ { ++ enum stringop_alg alg = libcall; ++ bool alg_noalign = false; ++ for (i = 0; i < MAX_STRINGOP_ALGS; i++) ++ { ++ /* We get here if the algorithms that were not libcall-based ++ were rep-prefix based and we are unable to use rep prefixes ++ based on global register usage. Break out of the loop and ++ use the heuristic below. */ ++ if (algs->size[i].max == 0) ++ break; ++ if (algs->size[i].max >= expected_size || algs->size[i].max == -1) ++ { ++ enum stringop_alg candidate = algs->size[i].alg; ++ ++ if (candidate != libcall ++ && alg_usable_p (candidate, memset, have_as)) ++ { ++ alg = candidate; ++ alg_noalign = algs->size[i].noalign; ++ } ++ /* Honor TARGET_INLINE_ALL_STRINGOPS by picking ++ last non-libcall inline algorithm. */ ++ if (TARGET_INLINE_ALL_STRINGOPS) ++ { ++ /* When the current size is best to be copied by a libcall, ++ but we are still forced to inline, run the heuristic below ++ that will pick code for medium sized blocks. */ ++ if (alg != libcall) ++ { ++ *noalign = alg_noalign; ++ return alg; ++ } ++ else if (!any_alg_usable_p) ++ break; ++ } ++ else if (alg_usable_p (candidate, memset, have_as)) ++ { ++ *noalign = algs->size[i].noalign; ++ return candidate; ++ } ++ } ++ } ++ } ++ /* When asked to inline the call anyway, try to pick meaningful choice. ++ We look for maximal size of block that is faster to copy by hand and ++ take blocks of at most of that size guessing that average size will ++ be roughly half of the block. ++ ++ If this turns out to be bad, we might simply specify the preferred ++ choice in ix86_costs. */ ++ if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY) ++ && (algs->unknown_size == libcall ++ || !alg_usable_p (algs->unknown_size, memset, have_as))) ++ { ++ enum stringop_alg alg; ++ HOST_WIDE_INT new_expected_size = (max > 0 ? max : 4096) / 2; ++ ++ /* If there aren't any usable algorithms or if recursing already, ++ then recursing on smaller sizes or same size isn't going to ++ find anything. Just return the simple byte-at-a-time copy loop. */ ++ if (!any_alg_usable_p || recur) ++ { ++ /* Pick something reasonable. */ ++ if (TARGET_INLINE_STRINGOPS_DYNAMICALLY && !recur) ++ *dynamic_check = 128; ++ return loop_1_byte; ++ } ++ alg = decide_alg (count, new_expected_size, min_size, max_size, memset, ++ zero_memset, have_as, dynamic_check, noalign, true); ++ gcc_assert (*dynamic_check == -1); ++ if (TARGET_INLINE_STRINGOPS_DYNAMICALLY) ++ *dynamic_check = max; ++ else ++ gcc_assert (alg != libcall); ++ return alg; ++ } ++ return (alg_usable_p (algs->unknown_size, memset, have_as) ++ ? algs->unknown_size : libcall); ++} ++ ++/* Decide on alignment. We know that the operand is already aligned to ALIGN ++ (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */ ++static int ++decide_alignment (int align, ++ enum stringop_alg alg, ++ int expected_size, ++ machine_mode move_mode) ++{ ++ int desired_align = 0; ++ ++ gcc_assert (alg != no_stringop); ++ ++ if (alg == libcall) ++ return 0; ++ if (move_mode == VOIDmode) ++ return 0; ++ ++ desired_align = GET_MODE_SIZE (move_mode); ++ /* PentiumPro has special logic triggering for 8 byte aligned blocks. ++ copying whole cacheline at once. */ ++ if (TARGET_PENTIUMPRO ++ && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte)) ++ desired_align = 8; ++ ++ if (optimize_size) ++ desired_align = 1; ++ if (desired_align < align) ++ desired_align = align; ++ if (expected_size != -1 && expected_size < 4) ++ desired_align = align; ++ ++ return desired_align; ++} ++ ++ ++/* Helper function for memcpy. For QImode value 0xXY produce ++ 0xXYXYXYXY of wide specified by MODE. This is essentially ++ a * 0x10101010, but we can do slightly better than ++ synth_mult by unwinding the sequence by hand on CPUs with ++ slow multiply. */ ++static rtx ++promote_duplicated_reg (machine_mode mode, rtx val) ++{ ++ machine_mode valmode = GET_MODE (val); ++ rtx tmp; ++ int nops = mode == DImode ? 3 : 2; ++ ++ gcc_assert (mode == SImode || mode == DImode || val == const0_rtx); ++ if (val == const0_rtx) ++ return copy_to_mode_reg (mode, CONST0_RTX (mode)); ++ if (CONST_INT_P (val)) ++ { ++ HOST_WIDE_INT v = INTVAL (val) & 255; ++ ++ v |= v << 8; ++ v |= v << 16; ++ if (mode == DImode) ++ v |= (v << 16) << 16; ++ return copy_to_mode_reg (mode, gen_int_mode (v, mode)); ++ } ++ ++ if (valmode == VOIDmode) ++ valmode = QImode; ++ if (valmode != QImode) ++ val = gen_lowpart (QImode, val); ++ if (mode == QImode) ++ return val; ++ if (!TARGET_PARTIAL_REG_STALL) ++ nops--; ++ if (ix86_cost->mult_init[mode == DImode ? 3 : 2] ++ + ix86_cost->mult_bit * (mode == DImode ? 8 : 4) ++ <= (ix86_cost->shift_const + ix86_cost->add) * nops ++ + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0))) ++ { ++ rtx reg = convert_modes (mode, QImode, val, true); ++ tmp = promote_duplicated_reg (mode, const1_rtx); ++ return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1, ++ OPTAB_DIRECT); ++ } ++ else ++ { ++ rtx reg = convert_modes (mode, QImode, val, true); ++ ++ if (!TARGET_PARTIAL_REG_STALL) ++ if (mode == SImode) ++ emit_insn (gen_insvsi_1 (reg, reg)); ++ else ++ emit_insn (gen_insvdi_1 (reg, reg)); ++ else ++ { ++ tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8), ++ NULL, 1, OPTAB_DIRECT); ++ reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, ++ OPTAB_DIRECT); ++ } ++ tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16), ++ NULL, 1, OPTAB_DIRECT); ++ reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT); ++ if (mode == SImode) ++ return reg; ++ tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32), ++ NULL, 1, OPTAB_DIRECT); ++ reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT); ++ return reg; ++ } ++} ++ ++/* Duplicate value VAL using promote_duplicated_reg into maximal size that will ++ be needed by main loop copying SIZE_NEEDED chunks and prologue getting ++ alignment from ALIGN to DESIRED_ALIGN. */ ++static rtx ++promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, ++ int align) ++{ ++ rtx promoted_val; ++ ++ if (TARGET_64BIT ++ && (size_needed > 4 || (desired_align > align && desired_align > 4))) ++ promoted_val = promote_duplicated_reg (DImode, val); ++ else if (size_needed > 2 || (desired_align > align && desired_align > 2)) ++ promoted_val = promote_duplicated_reg (SImode, val); ++ else if (size_needed > 1 || (desired_align > align && desired_align > 1)) ++ promoted_val = promote_duplicated_reg (HImode, val); ++ else ++ promoted_val = val; ++ ++ return promoted_val; ++} ++ ++/* Copy the address to a Pmode register. This is used for x32 to ++ truncate DImode TLS address to a SImode register. */ ++ ++static rtx ++ix86_copy_addr_to_reg (rtx addr) ++{ ++ rtx reg; ++ if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode) ++ { ++ reg = copy_addr_to_reg (addr); ++ REG_POINTER (reg) = 1; ++ return reg; ++ } ++ else ++ { ++ gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode); ++ reg = copy_to_mode_reg (DImode, addr); ++ REG_POINTER (reg) = 1; ++ return gen_rtx_SUBREG (SImode, reg, 0); ++ } ++} ++ ++/* Expand string move (memcpy) ot store (memset) operation. Use i386 string ++ operations when profitable. The code depends upon architecture, block size ++ and alignment, but always has one of the following overall structures: ++ ++ Aligned move sequence: ++ ++ 1) Prologue guard: Conditional that jumps up to epilogues for small ++ blocks that can be handled by epilogue alone. This is faster ++ but also needed for correctness, since prologue assume the block ++ is larger than the desired alignment. ++ ++ Optional dynamic check for size and libcall for large ++ blocks is emitted here too, with -minline-stringops-dynamically. ++ ++ 2) Prologue: copy first few bytes in order to get destination ++ aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less ++ than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be ++ copied. We emit either a jump tree on power of two sized ++ blocks, or a byte loop. ++ ++ 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks ++ with specified algorithm. ++ ++ 4) Epilogue: code copying tail of the block that is too small to be ++ handled by main body (or up to size guarded by prologue guard). ++ ++ Misaligned move sequence ++ ++ 1) missaligned move prologue/epilogue containing: ++ a) Prologue handling small memory blocks and jumping to done_label ++ (skipped if blocks are known to be large enough) ++ b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is ++ needed by single possibly misaligned move ++ (skipped if alignment is not needed) ++ c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves ++ ++ 2) Zero size guard dispatching to done_label, if needed ++ ++ 3) dispatch to library call, if needed, ++ ++ 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks ++ with specified algorithm. */ ++bool ++ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp, ++ rtx align_exp, rtx expected_align_exp, ++ rtx expected_size_exp, rtx min_size_exp, ++ rtx max_size_exp, rtx probable_max_size_exp, ++ bool issetmem) ++{ ++ rtx destreg; ++ rtx srcreg = NULL; ++ rtx_code_label *label = NULL; ++ rtx tmp; ++ rtx_code_label *jump_around_label = NULL; ++ HOST_WIDE_INT align = 1; ++ unsigned HOST_WIDE_INT count = 0; ++ HOST_WIDE_INT expected_size = -1; ++ int size_needed = 0, epilogue_size_needed; ++ int desired_align = 0, align_bytes = 0; ++ enum stringop_alg alg; ++ rtx promoted_val = NULL; ++ rtx vec_promoted_val = NULL; ++ bool force_loopy_epilogue = false; ++ int dynamic_check; ++ bool need_zero_guard = false; ++ bool noalign; ++ machine_mode move_mode = VOIDmode; ++ machine_mode wider_mode; ++ int unroll_factor = 1; ++ /* TODO: Once value ranges are available, fill in proper data. */ ++ unsigned HOST_WIDE_INT min_size = 0; ++ unsigned HOST_WIDE_INT max_size = -1; ++ unsigned HOST_WIDE_INT probable_max_size = -1; ++ bool misaligned_prologue_used = false; ++ bool have_as; ++ ++ if (CONST_INT_P (align_exp)) ++ align = INTVAL (align_exp); ++ /* i386 can do misaligned access on reasonably increased cost. */ ++ if (CONST_INT_P (expected_align_exp) ++ && INTVAL (expected_align_exp) > align) ++ align = INTVAL (expected_align_exp); ++ /* ALIGN is the minimum of destination and source alignment, but we care here ++ just about destination alignment. */ ++ else if (!issetmem ++ && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT) ++ align = MEM_ALIGN (dst) / BITS_PER_UNIT; ++ ++ if (CONST_INT_P (count_exp)) ++ { ++ min_size = max_size = probable_max_size = count = expected_size ++ = INTVAL (count_exp); ++ /* When COUNT is 0, there is nothing to do. */ ++ if (!count) ++ return true; ++ } ++ else ++ { ++ if (min_size_exp) ++ min_size = INTVAL (min_size_exp); ++ if (max_size_exp) ++ max_size = INTVAL (max_size_exp); ++ if (probable_max_size_exp) ++ probable_max_size = INTVAL (probable_max_size_exp); ++ if (CONST_INT_P (expected_size_exp)) ++ expected_size = INTVAL (expected_size_exp); ++ } ++ ++ /* Make sure we don't need to care about overflow later on. */ ++ if (count > (HOST_WIDE_INT_1U << 30)) ++ return false; ++ ++ have_as = !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (dst)); ++ if (!issetmem) ++ have_as |= !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src)); ++ ++ /* Step 0: Decide on preferred algorithm, desired alignment and ++ size of chunks to be copied by main loop. */ ++ alg = decide_alg (count, expected_size, min_size, probable_max_size, ++ issetmem, ++ issetmem && val_exp == const0_rtx, have_as, ++ &dynamic_check, &noalign, false); ++ ++ if (dump_file) ++ fprintf (dump_file, "Selected stringop expansion strategy: %s\n", ++ stringop_alg_names[alg]); ++ ++ if (alg == libcall) ++ return false; ++ gcc_assert (alg != no_stringop); ++ ++ /* For now vector-version of memset is generated only for memory zeroing, as ++ creating of promoted vector value is very cheap in this case. */ ++ if (issetmem && alg == vector_loop && val_exp != const0_rtx) ++ alg = unrolled_loop; ++ ++ if (!count) ++ count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp); ++ destreg = ix86_copy_addr_to_reg (XEXP (dst, 0)); ++ if (!issetmem) ++ srcreg = ix86_copy_addr_to_reg (XEXP (src, 0)); ++ ++ unroll_factor = 1; ++ move_mode = word_mode; ++ switch (alg) ++ { ++ case libcall: ++ case no_stringop: ++ case last_alg: ++ gcc_unreachable (); ++ case loop_1_byte: ++ need_zero_guard = true; ++ move_mode = QImode; ++ break; ++ case loop: ++ need_zero_guard = true; ++ break; ++ case unrolled_loop: ++ need_zero_guard = true; ++ unroll_factor = (TARGET_64BIT ? 4 : 2); ++ break; ++ case vector_loop: ++ need_zero_guard = true; ++ unroll_factor = 4; ++ /* Find the widest supported mode. */ ++ move_mode = word_mode; ++ while (GET_MODE_WIDER_MODE (move_mode).exists (&wider_mode) ++ && optab_handler (mov_optab, wider_mode) != CODE_FOR_nothing) ++ move_mode = wider_mode; ++ ++ if (TARGET_AVX128_OPTIMAL && GET_MODE_BITSIZE (move_mode) > 128) ++ move_mode = TImode; ++ ++ /* Find the corresponding vector mode with the same size as MOVE_MODE. ++ MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */ ++ if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode)) ++ { ++ int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode); ++ if (!mode_for_vector (word_mode, nunits).exists (&move_mode) ++ || optab_handler (mov_optab, move_mode) == CODE_FOR_nothing) ++ move_mode = word_mode; ++ } ++ gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing); ++ break; ++ case rep_prefix_8_byte: ++ move_mode = DImode; ++ break; ++ case rep_prefix_4_byte: ++ move_mode = SImode; ++ break; ++ case rep_prefix_1_byte: ++ move_mode = QImode; ++ break; ++ } ++ size_needed = GET_MODE_SIZE (move_mode) * unroll_factor; ++ epilogue_size_needed = size_needed; ++ ++ /* If we are going to call any library calls conditionally, make sure any ++ pending stack adjustment happen before the first conditional branch, ++ otherwise they will be emitted before the library call only and won't ++ happen from the other branches. */ ++ if (dynamic_check != -1) ++ do_pending_stack_adjust (); ++ ++ desired_align = decide_alignment (align, alg, expected_size, move_mode); ++ if (!TARGET_ALIGN_STRINGOPS || noalign) ++ align = desired_align; ++ ++ /* Step 1: Prologue guard. */ ++ ++ /* Alignment code needs count to be in register. */ ++ if (CONST_INT_P (count_exp) && desired_align > align) ++ { ++ if (INTVAL (count_exp) > desired_align ++ && INTVAL (count_exp) > size_needed) ++ { ++ align_bytes ++ = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT); ++ if (align_bytes <= 0) ++ align_bytes = 0; ++ else ++ align_bytes = desired_align - align_bytes; ++ } ++ if (align_bytes == 0) ++ count_exp = force_reg (counter_mode (count_exp), count_exp); ++ } ++ gcc_assert (desired_align >= 1 && align >= 1); ++ ++ /* Misaligned move sequences handle both prologue and epilogue at once. ++ Default code generation results in a smaller code for large alignments ++ and also avoids redundant job when sizes are known precisely. */ ++ misaligned_prologue_used ++ = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES ++ && MAX (desired_align, epilogue_size_needed) <= 32 ++ && desired_align <= epilogue_size_needed ++ && ((desired_align > align && !align_bytes) ++ || (!count && epilogue_size_needed > 1))); ++ ++ /* Do the cheap promotion to allow better CSE across the ++ main loop and epilogue (ie one load of the big constant in the ++ front of all code. ++ For now the misaligned move sequences do not have fast path ++ without broadcasting. */ ++ if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used))) ++ { ++ if (alg == vector_loop) ++ { ++ gcc_assert (val_exp == const0_rtx); ++ vec_promoted_val = promote_duplicated_reg (move_mode, val_exp); ++ promoted_val = promote_duplicated_reg_to_size (val_exp, ++ GET_MODE_SIZE (word_mode), ++ desired_align, align); ++ } ++ else ++ { ++ promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed, ++ desired_align, align); ++ } ++ } ++ /* Misaligned move sequences handles both prologues and epilogues at once. ++ Default code generation results in smaller code for large alignments and ++ also avoids redundant job when sizes are known precisely. */ ++ if (misaligned_prologue_used) ++ { ++ /* Misaligned move prologue handled small blocks by itself. */ ++ expand_set_or_cpymem_prologue_epilogue_by_misaligned_moves ++ (dst, src, &destreg, &srcreg, ++ move_mode, promoted_val, vec_promoted_val, ++ &count_exp, ++ &jump_around_label, ++ desired_align < align ++ ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed, ++ desired_align, align, &min_size, dynamic_check, issetmem); ++ if (!issetmem) ++ src = change_address (src, BLKmode, srcreg); ++ dst = change_address (dst, BLKmode, destreg); ++ set_mem_align (dst, desired_align * BITS_PER_UNIT); ++ epilogue_size_needed = 0; ++ if (need_zero_guard ++ && min_size < (unsigned HOST_WIDE_INT) size_needed) ++ { ++ /* It is possible that we copied enough so the main loop will not ++ execute. */ ++ gcc_assert (size_needed > 1); ++ if (jump_around_label == NULL_RTX) ++ jump_around_label = gen_label_rtx (); ++ emit_cmp_and_jump_insns (count_exp, ++ GEN_INT (size_needed), ++ LTU, 0, counter_mode (count_exp), 1, jump_around_label); ++ if (expected_size == -1 ++ || expected_size < (desired_align - align) / 2 + size_needed) ++ predict_jump (REG_BR_PROB_BASE * 20 / 100); ++ else ++ predict_jump (REG_BR_PROB_BASE * 60 / 100); ++ } ++ } ++ /* Ensure that alignment prologue won't copy past end of block. */ ++ else if (size_needed > 1 || (desired_align > 1 && desired_align > align)) ++ { ++ epilogue_size_needed = MAX (size_needed - 1, desired_align - align); ++ /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes. ++ Make sure it is power of 2. */ ++ epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1); ++ ++ /* To improve performance of small blocks, we jump around the VAL ++ promoting mode. This mean that if the promoted VAL is not constant, ++ we might not use it in the epilogue and have to use byte ++ loop variant. */ ++ if (issetmem && epilogue_size_needed > 2 && !promoted_val) ++ force_loopy_epilogue = true; ++ if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed) ++ || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed) ++ { ++ /* If main algorithm works on QImode, no epilogue is needed. ++ For small sizes just don't align anything. */ ++ if (size_needed == 1) ++ desired_align = align; ++ else ++ goto epilogue; ++ } ++ else if (!count ++ && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed) ++ { ++ label = gen_label_rtx (); ++ emit_cmp_and_jump_insns (count_exp, ++ GEN_INT (epilogue_size_needed), ++ LTU, 0, counter_mode (count_exp), 1, label); ++ if (expected_size == -1 || expected_size < epilogue_size_needed) ++ predict_jump (REG_BR_PROB_BASE * 60 / 100); ++ else ++ predict_jump (REG_BR_PROB_BASE * 20 / 100); ++ } ++ } ++ ++ /* Emit code to decide on runtime whether library call or inline should be ++ used. */ ++ if (dynamic_check != -1) ++ { ++ if (!issetmem && CONST_INT_P (count_exp)) ++ { ++ if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check) ++ { ++ emit_block_copy_via_libcall (dst, src, count_exp); ++ count_exp = const0_rtx; ++ goto epilogue; ++ } ++ } ++ else ++ { ++ rtx_code_label *hot_label = gen_label_rtx (); ++ if (jump_around_label == NULL_RTX) ++ jump_around_label = gen_label_rtx (); ++ emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1), ++ LEU, 0, counter_mode (count_exp), ++ 1, hot_label); ++ predict_jump (REG_BR_PROB_BASE * 90 / 100); ++ if (issetmem) ++ set_storage_via_libcall (dst, count_exp, val_exp); ++ else ++ emit_block_copy_via_libcall (dst, src, count_exp); ++ emit_jump (jump_around_label); ++ emit_label (hot_label); ++ } ++ } ++ ++ /* Step 2: Alignment prologue. */ ++ /* Do the expensive promotion once we branched off the small blocks. */ ++ if (issetmem && !promoted_val) ++ promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed, ++ desired_align, align); ++ ++ if (desired_align > align && !misaligned_prologue_used) ++ { ++ if (align_bytes == 0) ++ { ++ /* Except for the first move in prologue, we no longer know ++ constant offset in aliasing info. It don't seems to worth ++ the pain to maintain it for the first move, so throw away ++ the info early. */ ++ dst = change_address (dst, BLKmode, destreg); ++ if (!issetmem) ++ src = change_address (src, BLKmode, srcreg); ++ dst = expand_set_or_cpymem_prologue (dst, src, destreg, srcreg, ++ promoted_val, vec_promoted_val, ++ count_exp, align, desired_align, ++ issetmem); ++ /* At most desired_align - align bytes are copied. */ ++ if (min_size < (unsigned)(desired_align - align)) ++ min_size = 0; ++ else ++ min_size -= desired_align - align; ++ } ++ else ++ { ++ /* If we know how many bytes need to be stored before dst is ++ sufficiently aligned, maintain aliasing info accurately. */ ++ dst = expand_set_or_cpymem_constant_prologue (dst, &src, destreg, ++ srcreg, ++ promoted_val, ++ vec_promoted_val, ++ desired_align, ++ align_bytes, ++ issetmem); ++ ++ count_exp = plus_constant (counter_mode (count_exp), ++ count_exp, -align_bytes); ++ count -= align_bytes; ++ min_size -= align_bytes; ++ max_size -= align_bytes; ++ } ++ if (need_zero_guard ++ && min_size < (unsigned HOST_WIDE_INT) size_needed ++ && (count < (unsigned HOST_WIDE_INT) size_needed ++ || (align_bytes == 0 ++ && count < ((unsigned HOST_WIDE_INT) size_needed ++ + desired_align - align)))) ++ { ++ /* It is possible that we copied enough so the main loop will not ++ execute. */ ++ gcc_assert (size_needed > 1); ++ if (label == NULL_RTX) ++ label = gen_label_rtx (); ++ emit_cmp_and_jump_insns (count_exp, ++ GEN_INT (size_needed), ++ LTU, 0, counter_mode (count_exp), 1, label); ++ if (expected_size == -1 ++ || expected_size < (desired_align - align) / 2 + size_needed) ++ predict_jump (REG_BR_PROB_BASE * 20 / 100); ++ else ++ predict_jump (REG_BR_PROB_BASE * 60 / 100); ++ } ++ } ++ if (label && size_needed == 1) ++ { ++ emit_label (label); ++ LABEL_NUSES (label) = 1; ++ label = NULL; ++ epilogue_size_needed = 1; ++ if (issetmem) ++ promoted_val = val_exp; ++ } ++ else if (label == NULL_RTX && !misaligned_prologue_used) ++ epilogue_size_needed = size_needed; ++ ++ /* Step 3: Main loop. */ ++ ++ switch (alg) ++ { ++ case libcall: ++ case no_stringop: ++ case last_alg: ++ gcc_unreachable (); ++ case loop_1_byte: ++ case loop: ++ case unrolled_loop: ++ expand_set_or_cpymem_via_loop (dst, src, destreg, srcreg, promoted_val, ++ count_exp, move_mode, unroll_factor, ++ expected_size, issetmem); ++ break; ++ case vector_loop: ++ expand_set_or_cpymem_via_loop (dst, src, destreg, srcreg, ++ vec_promoted_val, count_exp, move_mode, ++ unroll_factor, expected_size, issetmem); ++ break; ++ case rep_prefix_8_byte: ++ case rep_prefix_4_byte: ++ case rep_prefix_1_byte: ++ expand_set_or_cpymem_via_rep (dst, src, destreg, srcreg, promoted_val, ++ val_exp, count_exp, move_mode, issetmem); ++ break; ++ } ++ /* Adjust properly the offset of src and dest memory for aliasing. */ ++ if (CONST_INT_P (count_exp)) ++ { ++ if (!issetmem) ++ src = adjust_automodify_address_nv (src, BLKmode, srcreg, ++ (count / size_needed) * size_needed); ++ dst = adjust_automodify_address_nv (dst, BLKmode, destreg, ++ (count / size_needed) * size_needed); ++ } ++ else ++ { ++ if (!issetmem) ++ src = change_address (src, BLKmode, srcreg); ++ dst = change_address (dst, BLKmode, destreg); ++ } ++ ++ /* Step 4: Epilogue to copy the remaining bytes. */ ++ epilogue: ++ if (label) ++ { ++ /* When the main loop is done, COUNT_EXP might hold original count, ++ while we want to copy only COUNT_EXP & SIZE_NEEDED bytes. ++ Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED ++ bytes. Compensate if needed. */ ++ ++ if (size_needed < epilogue_size_needed) ++ { ++ tmp = expand_simple_binop (counter_mode (count_exp), AND, count_exp, ++ GEN_INT (size_needed - 1), count_exp, 1, ++ OPTAB_DIRECT); ++ if (tmp != count_exp) ++ emit_move_insn (count_exp, tmp); ++ } ++ emit_label (label); ++ LABEL_NUSES (label) = 1; ++ } ++ ++ if (count_exp != const0_rtx && epilogue_size_needed > 1) ++ { ++ if (force_loopy_epilogue) ++ expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp, ++ epilogue_size_needed); ++ else ++ { ++ if (issetmem) ++ expand_setmem_epilogue (dst, destreg, promoted_val, ++ vec_promoted_val, count_exp, ++ epilogue_size_needed); ++ else ++ expand_cpymem_epilogue (dst, src, destreg, srcreg, count_exp, ++ epilogue_size_needed); ++ } ++ } ++ if (jump_around_label) ++ emit_label (jump_around_label); ++ return true; ++} ++ ++ ++/* Expand the appropriate insns for doing strlen if not just doing ++ repnz; scasb ++ ++ out = result, initialized with the start address ++ align_rtx = alignment of the address. ++ scratch = scratch register, initialized with the startaddress when ++ not aligned, otherwise undefined ++ ++ This is just the body. It needs the initializations mentioned above and ++ some address computing at the end. These things are done in i386.md. */ ++ ++static void ++ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx) ++{ ++ int align; ++ rtx tmp; ++ rtx_code_label *align_2_label = NULL; ++ rtx_code_label *align_3_label = NULL; ++ rtx_code_label *align_4_label = gen_label_rtx (); ++ rtx_code_label *end_0_label = gen_label_rtx (); ++ rtx mem; ++ rtx tmpreg = gen_reg_rtx (SImode); ++ rtx scratch = gen_reg_rtx (SImode); ++ rtx cmp; ++ ++ align = 0; ++ if (CONST_INT_P (align_rtx)) ++ align = INTVAL (align_rtx); ++ ++ /* Loop to check 1..3 bytes for null to get an aligned pointer. */ ++ ++ /* Is there a known alignment and is it less than 4? */ ++ if (align < 4) ++ { ++ rtx scratch1 = gen_reg_rtx (Pmode); ++ emit_move_insn (scratch1, out); ++ /* Is there a known alignment and is it not 2? */ ++ if (align != 2) ++ { ++ align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */ ++ align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */ ++ ++ /* Leave just the 3 lower bits. */ ++ align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3), ++ NULL_RTX, 0, OPTAB_WIDEN); ++ ++ emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL, ++ Pmode, 1, align_4_label); ++ emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL, ++ Pmode, 1, align_2_label); ++ emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL, ++ Pmode, 1, align_3_label); ++ } ++ else ++ { ++ /* Since the alignment is 2, we have to check 2 or 0 bytes; ++ check if is aligned to 4 - byte. */ ++ ++ align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx, ++ NULL_RTX, 0, OPTAB_WIDEN); ++ ++ emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL, ++ Pmode, 1, align_4_label); ++ } ++ ++ mem = change_address (src, QImode, out); ++ ++ /* Now compare the bytes. */ ++ ++ /* Compare the first n unaligned byte on a byte per byte basis. */ ++ emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, ++ QImode, 1, end_0_label); ++ ++ /* Increment the address. */ ++ emit_insn (ix86_gen_add3 (out, out, const1_rtx)); ++ ++ /* Not needed with an alignment of 2 */ ++ if (align != 2) ++ { ++ emit_label (align_2_label); ++ ++ emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1, ++ end_0_label); ++ ++ emit_insn (ix86_gen_add3 (out, out, const1_rtx)); ++ ++ emit_label (align_3_label); ++ } ++ ++ emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1, ++ end_0_label); ++ ++ emit_insn (ix86_gen_add3 (out, out, const1_rtx)); ++ } ++ ++ /* Generate loop to check 4 bytes at a time. It is not a good idea to ++ align this loop. It gives only huge programs, but does not help to ++ speed up. */ ++ emit_label (align_4_label); ++ ++ mem = change_address (src, SImode, out); ++ emit_move_insn (scratch, mem); ++ emit_insn (ix86_gen_add3 (out, out, GEN_INT (4))); ++ ++ /* This formula yields a nonzero result iff one of the bytes is zero. ++ This saves three branches inside loop and many cycles. */ ++ ++ emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101))); ++ emit_insn (gen_one_cmplsi2 (scratch, scratch)); ++ emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch)); ++ emit_insn (gen_andsi3 (tmpreg, tmpreg, ++ gen_int_mode (0x80808080, SImode))); ++ emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1, ++ align_4_label); ++ ++ if (TARGET_CMOVE) ++ { ++ rtx reg = gen_reg_rtx (SImode); ++ rtx reg2 = gen_reg_rtx (Pmode); ++ emit_move_insn (reg, tmpreg); ++ emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16))); ++ ++ /* If zero is not in the first two bytes, move two bytes forward. */ ++ emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080))); ++ tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); ++ tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); ++ emit_insn (gen_rtx_SET (tmpreg, ++ gen_rtx_IF_THEN_ELSE (SImode, tmp, ++ reg, ++ tmpreg))); ++ /* Emit lea manually to avoid clobbering of flags. */ ++ emit_insn (gen_rtx_SET (reg2, gen_rtx_PLUS (Pmode, out, const2_rtx))); ++ ++ tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); ++ tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); ++ emit_insn (gen_rtx_SET (out, ++ gen_rtx_IF_THEN_ELSE (Pmode, tmp, ++ reg2, ++ out))); ++ } ++ else ++ { ++ rtx_code_label *end_2_label = gen_label_rtx (); ++ /* Is zero in the first two bytes? */ ++ ++ emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080))); ++ tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); ++ tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx); ++ tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, ++ gen_rtx_LABEL_REF (VOIDmode, end_2_label), ++ pc_rtx); ++ tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); ++ JUMP_LABEL (tmp) = end_2_label; ++ ++ /* Not in the first two. Move two bytes forward. */ ++ emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16))); ++ emit_insn (ix86_gen_add3 (out, out, const2_rtx)); ++ ++ emit_label (end_2_label); ++ ++ } ++ ++ /* Avoid branch in fixing the byte. */ ++ tmpreg = gen_lowpart (QImode, tmpreg); ++ emit_insn (gen_addqi3_cconly_overflow (tmpreg, tmpreg)); ++ tmp = gen_rtx_REG (CCmode, FLAGS_REG); ++ cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx); ++ emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp)); ++ ++ emit_label (end_0_label); ++} ++ ++/* Expand strlen. */ ++ ++bool ++ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align) ++{ ++if (TARGET_UNROLL_STRLEN ++ && TARGET_INLINE_ALL_STRINGOPS ++ && eoschar == const0_rtx ++ && optimize > 1) ++ { ++ /* The generic case of strlen expander is long. Avoid it's ++ expanding unless TARGET_INLINE_ALL_STRINGOPS. */ ++ rtx addr = force_reg (Pmode, XEXP (src, 0)); ++ /* Well it seems that some optimizer does not combine a call like ++ foo(strlen(bar), strlen(bar)); ++ when the move and the subtraction is done here. It does calculate ++ the length just once when these instructions are done inside of ++ output_strlen_unroll(). But I think since &bar[strlen(bar)] is ++ often used and I use one fewer register for the lifetime of ++ output_strlen_unroll() this is better. */ ++ ++ emit_move_insn (out, addr); ++ ++ ix86_expand_strlensi_unroll_1 (out, src, align); ++ ++ /* strlensi_unroll_1 returns the address of the zero at the end of ++ the string, like memchr(), so compute the length by subtracting ++ the start address. */ ++ emit_insn (ix86_gen_sub3 (out, out, addr)); ++ return true; ++ } ++ else ++ return false; ++} ++ ++/* For given symbol (function) construct code to compute address of it's PLT ++ entry in large x86-64 PIC model. */ ++ ++static rtx ++construct_plt_address (rtx symbol) ++{ ++ rtx tmp, unspec; ++ ++ gcc_assert (GET_CODE (symbol) == SYMBOL_REF); ++ gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF); ++ gcc_assert (Pmode == DImode); ++ ++ tmp = gen_reg_rtx (Pmode); ++ unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF); ++ ++ emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec)); ++ emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx)); ++ return tmp; ++} ++ ++/* Additional registers that are clobbered by SYSV calls. */ ++ ++static int const x86_64_ms_sysv_extra_clobbered_registers ++ [NUM_X86_64_MS_CLOBBERED_REGS] = ++{ ++ SI_REG, DI_REG, ++ XMM6_REG, XMM7_REG, ++ XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG, ++ XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG ++}; ++ ++rtx_insn * ++ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, ++ rtx callarg2, ++ rtx pop, bool sibcall) ++{ ++ rtx vec[3]; ++ rtx use = NULL, call; ++ unsigned int vec_len = 0; ++ tree fndecl; ++ ++ if (GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF) ++ { ++ fndecl = SYMBOL_REF_DECL (XEXP (fnaddr, 0)); ++ if (fndecl ++ && (lookup_attribute ("interrupt", ++ TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))) ++ error ("interrupt service routine cannot be called directly"); ++ } ++ else ++ fndecl = NULL_TREE; ++ ++ if (pop == const0_rtx) ++ pop = NULL; ++ gcc_assert (!TARGET_64BIT || !pop); ++ ++ if (TARGET_MACHO && !TARGET_64BIT) ++ { ++#if TARGET_MACHO ++ if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF) ++ fnaddr = machopic_indirect_call_target (fnaddr); ++#endif ++ } ++ else ++ { ++ /* Static functions and indirect calls don't need the pic register. Also, ++ check if PLT was explicitly avoided via no-plt or "noplt" attribute, making ++ it an indirect call. */ ++ rtx addr = XEXP (fnaddr, 0); ++ if (flag_pic ++ && GET_CODE (addr) == SYMBOL_REF ++ && !SYMBOL_REF_LOCAL_P (addr)) ++ { ++ if (flag_plt ++ && (SYMBOL_REF_DECL (addr) == NULL_TREE ++ || !lookup_attribute ("noplt", ++ DECL_ATTRIBUTES (SYMBOL_REF_DECL (addr))))) ++ { ++ if (!TARGET_64BIT ++ || (ix86_cmodel == CM_LARGE_PIC ++ && DEFAULT_ABI != MS_ABI)) ++ { ++ use_reg (&use, gen_rtx_REG (Pmode, ++ REAL_PIC_OFFSET_TABLE_REGNUM)); ++ if (ix86_use_pseudo_pic_reg ()) ++ emit_move_insn (gen_rtx_REG (Pmode, ++ REAL_PIC_OFFSET_TABLE_REGNUM), ++ pic_offset_table_rtx); ++ } ++ } ++ else if (!TARGET_PECOFF && !TARGET_MACHO) ++ { ++ if (TARGET_64BIT) ++ { ++ fnaddr = gen_rtx_UNSPEC (Pmode, ++ gen_rtvec (1, addr), ++ UNSPEC_GOTPCREL); ++ fnaddr = gen_rtx_CONST (Pmode, fnaddr); ++ } ++ else ++ { ++ fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), ++ UNSPEC_GOT); ++ fnaddr = gen_rtx_CONST (Pmode, fnaddr); ++ fnaddr = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, ++ fnaddr); ++ } ++ fnaddr = gen_const_mem (Pmode, fnaddr); ++ /* Pmode may not be the same as word_mode for x32, which ++ doesn't support indirect branch via 32-bit memory slot. ++ Since x32 GOT slot is 64 bit with zero upper 32 bits, ++ indirect branch via x32 GOT slot is OK. */ ++ if (GET_MODE (fnaddr) != word_mode) ++ fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr); ++ fnaddr = gen_rtx_MEM (QImode, fnaddr); ++ } ++ } ++ } ++ ++ /* Skip setting up RAX register for -mskip-rax-setup when there are no ++ parameters passed in vector registers. */ ++ if (TARGET_64BIT ++ && (INTVAL (callarg2) > 0 ++ || (INTVAL (callarg2) == 0 ++ && (TARGET_SSE || !flag_skip_rax_setup)))) ++ { ++ rtx al = gen_rtx_REG (QImode, AX_REG); ++ emit_move_insn (al, callarg2); ++ use_reg (&use, al); ++ } ++ ++ if (ix86_cmodel == CM_LARGE_PIC ++ && !TARGET_PECOFF ++ && MEM_P (fnaddr) ++ && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF ++ && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode)) ++ fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0))); ++ /* Since x32 GOT slot is 64 bit with zero upper 32 bits, indirect ++ branch via x32 GOT slot is OK. */ ++ else if (!(TARGET_X32 ++ && MEM_P (fnaddr) ++ && GET_CODE (XEXP (fnaddr, 0)) == ZERO_EXTEND ++ && GOT_memory_operand (XEXP (XEXP (fnaddr, 0), 0), Pmode)) ++ && (sibcall ++ ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode) ++ : !call_insn_operand (XEXP (fnaddr, 0), word_mode))) ++ { ++ fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1); ++ fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr)); ++ } ++ ++ call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1); ++ ++ if (retval) ++ call = gen_rtx_SET (retval, call); ++ vec[vec_len++] = call; ++ ++ if (pop) ++ { ++ pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop); ++ pop = gen_rtx_SET (stack_pointer_rtx, pop); ++ vec[vec_len++] = pop; ++ } ++ ++ if (cfun->machine->no_caller_saved_registers ++ && (!fndecl ++ || (!TREE_THIS_VOLATILE (fndecl) ++ && !lookup_attribute ("no_caller_saved_registers", ++ TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))))) ++ { ++ static const char ix86_call_used_regs[] = CALL_USED_REGISTERS; ++ bool is_64bit_ms_abi = (TARGET_64BIT ++ && ix86_function_abi (fndecl) == MS_ABI); ++ char c_mask = CALL_USED_REGISTERS_MASK (is_64bit_ms_abi); ++ ++ /* If there are no caller-saved registers, add all registers ++ that are clobbered by the call which returns. */ ++ for (int i = 0; i < FIRST_PSEUDO_REGISTER; i++) ++ if (!fixed_regs[i] ++ && (ix86_call_used_regs[i] == 1 ++ || (ix86_call_used_regs[i] & c_mask)) ++ && !STACK_REGNO_P (i) ++ && !MMX_REGNO_P (i)) ++ clobber_reg (&use, ++ gen_rtx_REG (GET_MODE (regno_reg_rtx[i]), i)); ++ } ++ else if (TARGET_64BIT_MS_ABI ++ && (!callarg2 || INTVAL (callarg2) != -2)) ++ { ++ unsigned i; ++ ++ for (i = 0; i < NUM_X86_64_MS_CLOBBERED_REGS; i++) ++ { ++ int regno = x86_64_ms_sysv_extra_clobbered_registers[i]; ++ machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode; ++ ++ clobber_reg (&use, gen_rtx_REG (mode, regno)); ++ } ++ ++ /* Set here, but it may get cleared later. */ ++ if (TARGET_CALL_MS2SYSV_XLOGUES) ++ { ++ if (!TARGET_SSE) ++ ; ++ ++ /* Don't break hot-patched functions. */ ++ else if (ix86_function_ms_hook_prologue (current_function_decl)) ++ ; ++ ++ /* TODO: Cases not yet examined. */ ++ else if (flag_split_stack) ++ warn_once_call_ms2sysv_xlogues ("-fsplit-stack"); ++ ++ else ++ { ++ gcc_assert (!reload_completed); ++ cfun->machine->call_ms2sysv = true; ++ } ++ } ++ } ++ ++ if (vec_len > 1) ++ call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec)); ++ rtx_insn *call_insn = emit_call_insn (call); ++ if (use) ++ CALL_INSN_FUNCTION_USAGE (call_insn) = use; ++ ++ return call_insn; ++} ++ ++/* Split simple return with popping POPC bytes from stack to indirect ++ branch with stack adjustment . */ ++ ++void ++ix86_split_simple_return_pop_internal (rtx popc) ++{ ++ struct machine_function *m = cfun->machine; ++ rtx ecx = gen_rtx_REG (SImode, CX_REG); ++ rtx_insn *insn; ++ ++ /* There is no "pascal" calling convention in any 64bit ABI. */ ++ gcc_assert (!TARGET_64BIT); ++ ++ insn = emit_insn (gen_pop (ecx)); ++ m->fs.cfa_offset -= UNITS_PER_WORD; ++ m->fs.sp_offset -= UNITS_PER_WORD; ++ ++ rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); ++ x = gen_rtx_SET (stack_pointer_rtx, x); ++ add_reg_note (insn, REG_CFA_ADJUST_CFA, x); ++ add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx)); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ ++ x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, popc); ++ x = gen_rtx_SET (stack_pointer_rtx, x); ++ insn = emit_insn (x); ++ add_reg_note (insn, REG_CFA_ADJUST_CFA, x); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ ++ /* Now return address is in ECX. */ ++ emit_jump_insn (gen_simple_return_indirect_internal (ecx)); ++} ++ ++/* Errors in the source file can cause expand_expr to return const0_rtx ++ where we expect a vector. To avoid crashing, use one of the vector ++ clear instructions. */ ++ ++static rtx ++safe_vector_operand (rtx x, machine_mode mode) ++{ ++ if (x == const0_rtx) ++ x = CONST0_RTX (mode); ++ return x; ++} ++ ++/* Subroutine of ix86_expand_builtin to take care of binop insns. */ ++ ++static rtx ++ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target) ++{ ++ rtx pat; ++ tree arg0 = CALL_EXPR_ARG (exp, 0); ++ tree arg1 = CALL_EXPR_ARG (exp, 1); ++ rtx op0 = expand_normal (arg0); ++ rtx op1 = expand_normal (arg1); ++ machine_mode tmode = insn_data[icode].operand[0].mode; ++ machine_mode mode0 = insn_data[icode].operand[1].mode; ++ machine_mode mode1 = insn_data[icode].operand[2].mode; ++ ++ if (VECTOR_MODE_P (mode0)) ++ op0 = safe_vector_operand (op0, mode0); ++ if (VECTOR_MODE_P (mode1)) ++ op1 = safe_vector_operand (op1, mode1); ++ ++ if (optimize || !target ++ || GET_MODE (target) != tmode ++ || !insn_data[icode].operand[0].predicate (target, tmode)) ++ target = gen_reg_rtx (tmode); ++ ++ if (GET_MODE (op1) == SImode && mode1 == TImode) ++ { ++ rtx x = gen_reg_rtx (V4SImode); ++ emit_insn (gen_sse2_loadd (x, op1)); ++ op1 = gen_lowpart (TImode, x); ++ } ++ ++ if (!insn_data[icode].operand[1].predicate (op0, mode0)) ++ op0 = copy_to_mode_reg (mode0, op0); ++ if (!insn_data[icode].operand[2].predicate (op1, mode1)) ++ op1 = copy_to_mode_reg (mode1, op1); ++ ++ pat = GEN_FCN (icode) (target, op0, op1); ++ if (! pat) ++ return 0; ++ ++ emit_insn (pat); ++ ++ return target; ++} ++ ++/* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */ ++ ++static rtx ++ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target, ++ enum ix86_builtin_func_type m_type, ++ enum rtx_code sub_code) ++{ ++ rtx pat; ++ int i; ++ int nargs; ++ bool comparison_p = false; ++ bool tf_p = false; ++ bool last_arg_constant = false; ++ int num_memory = 0; ++ struct { ++ rtx op; ++ machine_mode mode; ++ } args[4]; ++ ++ machine_mode tmode = insn_data[icode].operand[0].mode; ++ ++ switch (m_type) ++ { ++ case MULTI_ARG_4_DF2_DI_I: ++ case MULTI_ARG_4_DF2_DI_I1: ++ case MULTI_ARG_4_SF2_SI_I: ++ case MULTI_ARG_4_SF2_SI_I1: ++ nargs = 4; ++ last_arg_constant = true; ++ break; ++ ++ case MULTI_ARG_3_SF: ++ case MULTI_ARG_3_DF: ++ case MULTI_ARG_3_SF2: ++ case MULTI_ARG_3_DF2: ++ case MULTI_ARG_3_DI: ++ case MULTI_ARG_3_SI: ++ case MULTI_ARG_3_SI_DI: ++ case MULTI_ARG_3_HI: ++ case MULTI_ARG_3_HI_SI: ++ case MULTI_ARG_3_QI: ++ case MULTI_ARG_3_DI2: ++ case MULTI_ARG_3_SI2: ++ case MULTI_ARG_3_HI2: ++ case MULTI_ARG_3_QI2: ++ nargs = 3; ++ break; ++ ++ case MULTI_ARG_2_SF: ++ case MULTI_ARG_2_DF: ++ case MULTI_ARG_2_DI: ++ case MULTI_ARG_2_SI: ++ case MULTI_ARG_2_HI: ++ case MULTI_ARG_2_QI: ++ nargs = 2; ++ break; ++ ++ case MULTI_ARG_2_DI_IMM: ++ case MULTI_ARG_2_SI_IMM: ++ case MULTI_ARG_2_HI_IMM: ++ case MULTI_ARG_2_QI_IMM: ++ nargs = 2; ++ last_arg_constant = true; ++ break; ++ ++ case MULTI_ARG_1_SF: ++ case MULTI_ARG_1_DF: ++ case MULTI_ARG_1_SF2: ++ case MULTI_ARG_1_DF2: ++ case MULTI_ARG_1_DI: ++ case MULTI_ARG_1_SI: ++ case MULTI_ARG_1_HI: ++ case MULTI_ARG_1_QI: ++ case MULTI_ARG_1_SI_DI: ++ case MULTI_ARG_1_HI_DI: ++ case MULTI_ARG_1_HI_SI: ++ case MULTI_ARG_1_QI_DI: ++ case MULTI_ARG_1_QI_SI: ++ case MULTI_ARG_1_QI_HI: ++ nargs = 1; ++ break; ++ ++ case MULTI_ARG_2_DI_CMP: ++ case MULTI_ARG_2_SI_CMP: ++ case MULTI_ARG_2_HI_CMP: ++ case MULTI_ARG_2_QI_CMP: ++ nargs = 2; ++ comparison_p = true; ++ break; ++ ++ case MULTI_ARG_2_SF_TF: ++ case MULTI_ARG_2_DF_TF: ++ case MULTI_ARG_2_DI_TF: ++ case MULTI_ARG_2_SI_TF: ++ case MULTI_ARG_2_HI_TF: ++ case MULTI_ARG_2_QI_TF: ++ nargs = 2; ++ tf_p = true; ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ if (optimize || !target ++ || GET_MODE (target) != tmode ++ || !insn_data[icode].operand[0].predicate (target, tmode)) ++ target = gen_reg_rtx (tmode); ++ else if (memory_operand (target, tmode)) ++ num_memory++; ++ ++ gcc_assert (nargs <= 4); ++ ++ for (i = 0; i < nargs; i++) ++ { ++ tree arg = CALL_EXPR_ARG (exp, i); ++ rtx op = expand_normal (arg); ++ int adjust = (comparison_p) ? 1 : 0; ++ machine_mode mode = insn_data[icode].operand[i+adjust+1].mode; ++ ++ if (last_arg_constant && i == nargs - 1) ++ { ++ if (!insn_data[icode].operand[i + 1].predicate (op, mode)) ++ { ++ enum insn_code new_icode = icode; ++ switch (icode) ++ { ++ case CODE_FOR_xop_vpermil2v2df3: ++ case CODE_FOR_xop_vpermil2v4sf3: ++ case CODE_FOR_xop_vpermil2v4df3: ++ case CODE_FOR_xop_vpermil2v8sf3: ++ error ("the last argument must be a 2-bit immediate"); ++ return gen_reg_rtx (tmode); ++ case CODE_FOR_xop_rotlv2di3: ++ new_icode = CODE_FOR_rotlv2di3; ++ goto xop_rotl; ++ case CODE_FOR_xop_rotlv4si3: ++ new_icode = CODE_FOR_rotlv4si3; ++ goto xop_rotl; ++ case CODE_FOR_xop_rotlv8hi3: ++ new_icode = CODE_FOR_rotlv8hi3; ++ goto xop_rotl; ++ case CODE_FOR_xop_rotlv16qi3: ++ new_icode = CODE_FOR_rotlv16qi3; ++ xop_rotl: ++ if (CONST_INT_P (op)) ++ { ++ int mask = GET_MODE_UNIT_BITSIZE (tmode) - 1; ++ op = GEN_INT (INTVAL (op) & mask); ++ gcc_checking_assert ++ (insn_data[icode].operand[i + 1].predicate (op, mode)); ++ } ++ else ++ { ++ gcc_checking_assert ++ (nargs == 2 ++ && insn_data[new_icode].operand[0].mode == tmode ++ && insn_data[new_icode].operand[1].mode == tmode ++ && insn_data[new_icode].operand[2].mode == mode ++ && insn_data[new_icode].operand[0].predicate ++ == insn_data[icode].operand[0].predicate ++ && insn_data[new_icode].operand[1].predicate ++ == insn_data[icode].operand[1].predicate); ++ icode = new_icode; ++ goto non_constant; ++ } ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ } ++ } ++ else ++ { ++ non_constant: ++ if (VECTOR_MODE_P (mode)) ++ op = safe_vector_operand (op, mode); ++ ++ /* If we aren't optimizing, only allow one memory operand to be ++ generated. */ ++ if (memory_operand (op, mode)) ++ num_memory++; ++ ++ gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode); ++ ++ if (optimize ++ || !insn_data[icode].operand[i+adjust+1].predicate (op, mode) ++ || num_memory > 1) ++ op = force_reg (mode, op); ++ } ++ ++ args[i].op = op; ++ args[i].mode = mode; ++ } ++ ++ switch (nargs) ++ { ++ case 1: ++ pat = GEN_FCN (icode) (target, args[0].op); ++ break; ++ ++ case 2: ++ if (tf_p) ++ pat = GEN_FCN (icode) (target, args[0].op, args[1].op, ++ GEN_INT ((int)sub_code)); ++ else if (! comparison_p) ++ pat = GEN_FCN (icode) (target, args[0].op, args[1].op); ++ else ++ { ++ rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target), ++ args[0].op, ++ args[1].op); ++ ++ pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op); ++ } ++ break; ++ ++ case 3: ++ pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op); ++ break; ++ ++ case 4: ++ pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ if (! pat) ++ return 0; ++ ++ emit_insn (pat); ++ return target; ++} ++ ++/* Subroutine of ix86_expand_args_builtin to take care of scalar unop ++ insns with vec_merge. */ ++ ++static rtx ++ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp, ++ rtx target) ++{ ++ rtx pat; ++ tree arg0 = CALL_EXPR_ARG (exp, 0); ++ rtx op1, op0 = expand_normal (arg0); ++ machine_mode tmode = insn_data[icode].operand[0].mode; ++ machine_mode mode0 = insn_data[icode].operand[1].mode; ++ ++ if (optimize || !target ++ || GET_MODE (target) != tmode ++ || !insn_data[icode].operand[0].predicate (target, tmode)) ++ target = gen_reg_rtx (tmode); ++ ++ if (VECTOR_MODE_P (mode0)) ++ op0 = safe_vector_operand (op0, mode0); ++ ++ if ((optimize && !register_operand (op0, mode0)) ++ || !insn_data[icode].operand[1].predicate (op0, mode0)) ++ op0 = copy_to_mode_reg (mode0, op0); ++ ++ op1 = op0; ++ if (!insn_data[icode].operand[2].predicate (op1, mode0)) ++ op1 = copy_to_mode_reg (mode0, op1); ++ ++ pat = GEN_FCN (icode) (target, op0, op1); ++ if (! pat) ++ return 0; ++ emit_insn (pat); ++ return target; ++} ++ ++/* Subroutine of ix86_expand_builtin to take care of comparison insns. */ ++ ++static rtx ++ix86_expand_sse_compare (const struct builtin_description *d, ++ tree exp, rtx target, bool swap) ++{ ++ rtx pat; ++ tree arg0 = CALL_EXPR_ARG (exp, 0); ++ tree arg1 = CALL_EXPR_ARG (exp, 1); ++ rtx op0 = expand_normal (arg0); ++ rtx op1 = expand_normal (arg1); ++ rtx op2; ++ machine_mode tmode = insn_data[d->icode].operand[0].mode; ++ machine_mode mode0 = insn_data[d->icode].operand[1].mode; ++ machine_mode mode1 = insn_data[d->icode].operand[2].mode; ++ enum rtx_code comparison = d->comparison; ++ ++ if (VECTOR_MODE_P (mode0)) ++ op0 = safe_vector_operand (op0, mode0); ++ if (VECTOR_MODE_P (mode1)) ++ op1 = safe_vector_operand (op1, mode1); ++ ++ /* Swap operands if we have a comparison that isn't available in ++ hardware. */ ++ if (swap) ++ std::swap (op0, op1); ++ ++ if (optimize || !target ++ || GET_MODE (target) != tmode ++ || !insn_data[d->icode].operand[0].predicate (target, tmode)) ++ target = gen_reg_rtx (tmode); ++ ++ if ((optimize && !register_operand (op0, mode0)) ++ || !insn_data[d->icode].operand[1].predicate (op0, mode0)) ++ op0 = copy_to_mode_reg (mode0, op0); ++ if ((optimize && !register_operand (op1, mode1)) ++ || !insn_data[d->icode].operand[2].predicate (op1, mode1)) ++ op1 = copy_to_mode_reg (mode1, op1); ++ ++ op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1); ++ pat = GEN_FCN (d->icode) (target, op0, op1, op2); ++ if (! pat) ++ return 0; ++ emit_insn (pat); ++ return target; ++} ++ ++/* Subroutine of ix86_expand_builtin to take care of comi insns. */ ++ ++static rtx ++ix86_expand_sse_comi (const struct builtin_description *d, tree exp, ++ rtx target) ++{ ++ rtx pat; ++ tree arg0 = CALL_EXPR_ARG (exp, 0); ++ tree arg1 = CALL_EXPR_ARG (exp, 1); ++ rtx op0 = expand_normal (arg0); ++ rtx op1 = expand_normal (arg1); ++ machine_mode mode0 = insn_data[d->icode].operand[0].mode; ++ machine_mode mode1 = insn_data[d->icode].operand[1].mode; ++ enum rtx_code comparison = d->comparison; ++ ++ if (VECTOR_MODE_P (mode0)) ++ op0 = safe_vector_operand (op0, mode0); ++ if (VECTOR_MODE_P (mode1)) ++ op1 = safe_vector_operand (op1, mode1); ++ ++ /* Swap operands if we have a comparison that isn't available in ++ hardware. */ ++ if (d->flag & BUILTIN_DESC_SWAP_OPERANDS) ++ std::swap (op0, op1); ++ ++ target = gen_reg_rtx (SImode); ++ emit_move_insn (target, const0_rtx); ++ target = gen_rtx_SUBREG (QImode, target, 0); ++ ++ if ((optimize && !register_operand (op0, mode0)) ++ || !insn_data[d->icode].operand[0].predicate (op0, mode0)) ++ op0 = copy_to_mode_reg (mode0, op0); ++ if ((optimize && !register_operand (op1, mode1)) ++ || !insn_data[d->icode].operand[1].predicate (op1, mode1)) ++ op1 = copy_to_mode_reg (mode1, op1); ++ ++ pat = GEN_FCN (d->icode) (op0, op1); ++ if (! pat) ++ return 0; ++ emit_insn (pat); ++ emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target), ++ gen_rtx_fmt_ee (comparison, QImode, ++ SET_DEST (pat), ++ const0_rtx))); ++ ++ return SUBREG_REG (target); ++} ++ ++/* Subroutines of ix86_expand_args_builtin to take care of round insns. */ ++ ++static rtx ++ix86_expand_sse_round (const struct builtin_description *d, tree exp, ++ rtx target) ++{ ++ rtx pat; ++ tree arg0 = CALL_EXPR_ARG (exp, 0); ++ rtx op1, op0 = expand_normal (arg0); ++ machine_mode tmode = insn_data[d->icode].operand[0].mode; ++ machine_mode mode0 = insn_data[d->icode].operand[1].mode; ++ ++ if (optimize || target == 0 ++ || GET_MODE (target) != tmode ++ || !insn_data[d->icode].operand[0].predicate (target, tmode)) ++ target = gen_reg_rtx (tmode); ++ ++ if (VECTOR_MODE_P (mode0)) ++ op0 = safe_vector_operand (op0, mode0); ++ ++ if ((optimize && !register_operand (op0, mode0)) ++ || !insn_data[d->icode].operand[0].predicate (op0, mode0)) ++ op0 = copy_to_mode_reg (mode0, op0); ++ ++ op1 = GEN_INT (d->comparison); ++ ++ pat = GEN_FCN (d->icode) (target, op0, op1); ++ if (! pat) ++ return 0; ++ emit_insn (pat); ++ return target; ++} ++ ++static rtx ++ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d, ++ tree exp, rtx target) ++{ ++ rtx pat; ++ tree arg0 = CALL_EXPR_ARG (exp, 0); ++ tree arg1 = CALL_EXPR_ARG (exp, 1); ++ rtx op0 = expand_normal (arg0); ++ rtx op1 = expand_normal (arg1); ++ rtx op2; ++ machine_mode tmode = insn_data[d->icode].operand[0].mode; ++ machine_mode mode0 = insn_data[d->icode].operand[1].mode; ++ machine_mode mode1 = insn_data[d->icode].operand[2].mode; ++ ++ if (optimize || target == 0 ++ || GET_MODE (target) != tmode ++ || !insn_data[d->icode].operand[0].predicate (target, tmode)) ++ target = gen_reg_rtx (tmode); ++ ++ op0 = safe_vector_operand (op0, mode0); ++ op1 = safe_vector_operand (op1, mode1); ++ ++ if ((optimize && !register_operand (op0, mode0)) ++ || !insn_data[d->icode].operand[0].predicate (op0, mode0)) ++ op0 = copy_to_mode_reg (mode0, op0); ++ if ((optimize && !register_operand (op1, mode1)) ++ || !insn_data[d->icode].operand[1].predicate (op1, mode1)) ++ op1 = copy_to_mode_reg (mode1, op1); ++ ++ op2 = GEN_INT (d->comparison); ++ ++ pat = GEN_FCN (d->icode) (target, op0, op1, op2); ++ if (! pat) ++ return 0; ++ emit_insn (pat); ++ return target; ++} ++ ++/* Subroutine of ix86_expand_builtin to take care of ptest insns. */ ++ ++static rtx ++ix86_expand_sse_ptest (const struct builtin_description *d, tree exp, ++ rtx target) ++{ ++ rtx pat; ++ tree arg0 = CALL_EXPR_ARG (exp, 0); ++ tree arg1 = CALL_EXPR_ARG (exp, 1); ++ rtx op0 = expand_normal (arg0); ++ rtx op1 = expand_normal (arg1); ++ machine_mode mode0 = insn_data[d->icode].operand[0].mode; ++ machine_mode mode1 = insn_data[d->icode].operand[1].mode; ++ enum rtx_code comparison = d->comparison; ++ ++ if (VECTOR_MODE_P (mode0)) ++ op0 = safe_vector_operand (op0, mode0); ++ if (VECTOR_MODE_P (mode1)) ++ op1 = safe_vector_operand (op1, mode1); ++ ++ target = gen_reg_rtx (SImode); ++ emit_move_insn (target, const0_rtx); ++ target = gen_rtx_SUBREG (QImode, target, 0); ++ ++ if ((optimize && !register_operand (op0, mode0)) ++ || !insn_data[d->icode].operand[0].predicate (op0, mode0)) ++ op0 = copy_to_mode_reg (mode0, op0); ++ if ((optimize && !register_operand (op1, mode1)) ++ || !insn_data[d->icode].operand[1].predicate (op1, mode1)) ++ op1 = copy_to_mode_reg (mode1, op1); ++ ++ pat = GEN_FCN (d->icode) (op0, op1); ++ if (! pat) ++ return 0; ++ emit_insn (pat); ++ emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target), ++ gen_rtx_fmt_ee (comparison, QImode, ++ SET_DEST (pat), ++ const0_rtx))); ++ ++ return SUBREG_REG (target); ++} ++ ++/* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */ ++ ++static rtx ++ix86_expand_sse_pcmpestr (const struct builtin_description *d, ++ tree exp, rtx target) ++{ ++ rtx pat; ++ tree arg0 = CALL_EXPR_ARG (exp, 0); ++ tree arg1 = CALL_EXPR_ARG (exp, 1); ++ tree arg2 = CALL_EXPR_ARG (exp, 2); ++ tree arg3 = CALL_EXPR_ARG (exp, 3); ++ tree arg4 = CALL_EXPR_ARG (exp, 4); ++ rtx scratch0, scratch1; ++ rtx op0 = expand_normal (arg0); ++ rtx op1 = expand_normal (arg1); ++ rtx op2 = expand_normal (arg2); ++ rtx op3 = expand_normal (arg3); ++ rtx op4 = expand_normal (arg4); ++ machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm; ++ ++ tmode0 = insn_data[d->icode].operand[0].mode; ++ tmode1 = insn_data[d->icode].operand[1].mode; ++ modev2 = insn_data[d->icode].operand[2].mode; ++ modei3 = insn_data[d->icode].operand[3].mode; ++ modev4 = insn_data[d->icode].operand[4].mode; ++ modei5 = insn_data[d->icode].operand[5].mode; ++ modeimm = insn_data[d->icode].operand[6].mode; ++ ++ if (VECTOR_MODE_P (modev2)) ++ op0 = safe_vector_operand (op0, modev2); ++ if (VECTOR_MODE_P (modev4)) ++ op2 = safe_vector_operand (op2, modev4); ++ ++ if (!insn_data[d->icode].operand[2].predicate (op0, modev2)) ++ op0 = copy_to_mode_reg (modev2, op0); ++ if (!insn_data[d->icode].operand[3].predicate (op1, modei3)) ++ op1 = copy_to_mode_reg (modei3, op1); ++ if ((optimize && !register_operand (op2, modev4)) ++ || !insn_data[d->icode].operand[4].predicate (op2, modev4)) ++ op2 = copy_to_mode_reg (modev4, op2); ++ if (!insn_data[d->icode].operand[5].predicate (op3, modei5)) ++ op3 = copy_to_mode_reg (modei5, op3); ++ ++ if (!insn_data[d->icode].operand[6].predicate (op4, modeimm)) ++ { ++ error ("the fifth argument must be an 8-bit immediate"); ++ return const0_rtx; ++ } ++ ++ if (d->code == IX86_BUILTIN_PCMPESTRI128) ++ { ++ if (optimize || !target ++ || GET_MODE (target) != tmode0 ++ || !insn_data[d->icode].operand[0].predicate (target, tmode0)) ++ target = gen_reg_rtx (tmode0); ++ ++ scratch1 = gen_reg_rtx (tmode1); ++ ++ pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4); ++ } ++ else if (d->code == IX86_BUILTIN_PCMPESTRM128) ++ { ++ if (optimize || !target ++ || GET_MODE (target) != tmode1 ++ || !insn_data[d->icode].operand[1].predicate (target, tmode1)) ++ target = gen_reg_rtx (tmode1); ++ ++ scratch0 = gen_reg_rtx (tmode0); ++ ++ pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4); ++ } ++ else ++ { ++ gcc_assert (d->flag); ++ ++ scratch0 = gen_reg_rtx (tmode0); ++ scratch1 = gen_reg_rtx (tmode1); ++ ++ pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4); ++ } ++ ++ if (! pat) ++ return 0; ++ ++ emit_insn (pat); ++ ++ if (d->flag) ++ { ++ target = gen_reg_rtx (SImode); ++ emit_move_insn (target, const0_rtx); ++ target = gen_rtx_SUBREG (QImode, target, 0); ++ ++ emit_insn ++ (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target), ++ gen_rtx_fmt_ee (EQ, QImode, ++ gen_rtx_REG ((machine_mode) d->flag, ++ FLAGS_REG), ++ const0_rtx))); ++ return SUBREG_REG (target); ++ } ++ else ++ return target; ++} ++ ++ ++/* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */ ++ ++static rtx ++ix86_expand_sse_pcmpistr (const struct builtin_description *d, ++ tree exp, rtx target) ++{ ++ rtx pat; ++ tree arg0 = CALL_EXPR_ARG (exp, 0); ++ tree arg1 = CALL_EXPR_ARG (exp, 1); ++ tree arg2 = CALL_EXPR_ARG (exp, 2); ++ rtx scratch0, scratch1; ++ rtx op0 = expand_normal (arg0); ++ rtx op1 = expand_normal (arg1); ++ rtx op2 = expand_normal (arg2); ++ machine_mode tmode0, tmode1, modev2, modev3, modeimm; ++ ++ tmode0 = insn_data[d->icode].operand[0].mode; ++ tmode1 = insn_data[d->icode].operand[1].mode; ++ modev2 = insn_data[d->icode].operand[2].mode; ++ modev3 = insn_data[d->icode].operand[3].mode; ++ modeimm = insn_data[d->icode].operand[4].mode; ++ ++ if (VECTOR_MODE_P (modev2)) ++ op0 = safe_vector_operand (op0, modev2); ++ if (VECTOR_MODE_P (modev3)) ++ op1 = safe_vector_operand (op1, modev3); ++ ++ if (!insn_data[d->icode].operand[2].predicate (op0, modev2)) ++ op0 = copy_to_mode_reg (modev2, op0); ++ if ((optimize && !register_operand (op1, modev3)) ++ || !insn_data[d->icode].operand[3].predicate (op1, modev3)) ++ op1 = copy_to_mode_reg (modev3, op1); ++ ++ if (!insn_data[d->icode].operand[4].predicate (op2, modeimm)) ++ { ++ error ("the third argument must be an 8-bit immediate"); ++ return const0_rtx; ++ } ++ ++ if (d->code == IX86_BUILTIN_PCMPISTRI128) ++ { ++ if (optimize || !target ++ || GET_MODE (target) != tmode0 ++ || !insn_data[d->icode].operand[0].predicate (target, tmode0)) ++ target = gen_reg_rtx (tmode0); ++ ++ scratch1 = gen_reg_rtx (tmode1); ++ ++ pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2); ++ } ++ else if (d->code == IX86_BUILTIN_PCMPISTRM128) ++ { ++ if (optimize || !target ++ || GET_MODE (target) != tmode1 ++ || !insn_data[d->icode].operand[1].predicate (target, tmode1)) ++ target = gen_reg_rtx (tmode1); ++ ++ scratch0 = gen_reg_rtx (tmode0); ++ ++ pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2); ++ } ++ else ++ { ++ gcc_assert (d->flag); ++ ++ scratch0 = gen_reg_rtx (tmode0); ++ scratch1 = gen_reg_rtx (tmode1); ++ ++ pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2); ++ } ++ ++ if (! pat) ++ return 0; ++ ++ emit_insn (pat); ++ ++ if (d->flag) ++ { ++ target = gen_reg_rtx (SImode); ++ emit_move_insn (target, const0_rtx); ++ target = gen_rtx_SUBREG (QImode, target, 0); ++ ++ emit_insn ++ (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target), ++ gen_rtx_fmt_ee (EQ, QImode, ++ gen_rtx_REG ((machine_mode) d->flag, ++ FLAGS_REG), ++ const0_rtx))); ++ return SUBREG_REG (target); ++ } ++ else ++ return target; ++} ++ ++/* Fixup modeless constants to fit required mode. */ ++ ++static rtx ++fixup_modeless_constant (rtx x, machine_mode mode) ++{ ++ if (GET_MODE (x) == VOIDmode) ++ x = convert_to_mode (mode, x, 1); ++ return x; ++} ++ ++/* Subroutine of ix86_expand_builtin to take care of insns with ++ variable number of operands. */ ++ ++static rtx ++ix86_expand_args_builtin (const struct builtin_description *d, ++ tree exp, rtx target) ++{ ++ rtx pat, real_target; ++ unsigned int i, nargs; ++ unsigned int nargs_constant = 0; ++ unsigned int mask_pos = 0; ++ int num_memory = 0; ++ struct ++ { ++ rtx op; ++ machine_mode mode; ++ } args[6]; ++ bool second_arg_count = false; ++ enum insn_code icode = d->icode; ++ const struct insn_data_d *insn_p = &insn_data[icode]; ++ machine_mode tmode = insn_p->operand[0].mode; ++ machine_mode rmode = VOIDmode; ++ bool swap = false; ++ enum rtx_code comparison = d->comparison; ++ ++ switch ((enum ix86_builtin_func_type) d->flag) ++ { ++ case V2DF_FTYPE_V2DF_ROUND: ++ case V4DF_FTYPE_V4DF_ROUND: ++ case V8DF_FTYPE_V8DF_ROUND: ++ case V4SF_FTYPE_V4SF_ROUND: ++ case V8SF_FTYPE_V8SF_ROUND: ++ case V16SF_FTYPE_V16SF_ROUND: ++ case V4SI_FTYPE_V4SF_ROUND: ++ case V8SI_FTYPE_V8SF_ROUND: ++ case V16SI_FTYPE_V16SF_ROUND: ++ return ix86_expand_sse_round (d, exp, target); ++ case V4SI_FTYPE_V2DF_V2DF_ROUND: ++ case V8SI_FTYPE_V4DF_V4DF_ROUND: ++ case V16SI_FTYPE_V8DF_V8DF_ROUND: ++ return ix86_expand_sse_round_vec_pack_sfix (d, exp, target); ++ case INT_FTYPE_V8SF_V8SF_PTEST: ++ case INT_FTYPE_V4DI_V4DI_PTEST: ++ case INT_FTYPE_V4DF_V4DF_PTEST: ++ case INT_FTYPE_V4SF_V4SF_PTEST: ++ case INT_FTYPE_V2DI_V2DI_PTEST: ++ case INT_FTYPE_V2DF_V2DF_PTEST: ++ return ix86_expand_sse_ptest (d, exp, target); ++ case FLOAT128_FTYPE_FLOAT128: ++ case FLOAT_FTYPE_FLOAT: ++ case INT_FTYPE_INT: ++ case UINT_FTYPE_UINT: ++ case UINT16_FTYPE_UINT16: ++ case UINT64_FTYPE_INT: ++ case UINT64_FTYPE_UINT64: ++ case INT64_FTYPE_INT64: ++ case INT64_FTYPE_V4SF: ++ case INT64_FTYPE_V2DF: ++ case INT_FTYPE_V16QI: ++ case INT_FTYPE_V8QI: ++ case INT_FTYPE_V8SF: ++ case INT_FTYPE_V4DF: ++ case INT_FTYPE_V4SF: ++ case INT_FTYPE_V2DF: ++ case INT_FTYPE_V32QI: ++ case V16QI_FTYPE_V16QI: ++ case V8SI_FTYPE_V8SF: ++ case V8SI_FTYPE_V4SI: ++ case V8HI_FTYPE_V8HI: ++ case V8HI_FTYPE_V16QI: ++ case V8QI_FTYPE_V8QI: ++ case V8SF_FTYPE_V8SF: ++ case V8SF_FTYPE_V8SI: ++ case V8SF_FTYPE_V4SF: ++ case V8SF_FTYPE_V8HI: ++ case V4SI_FTYPE_V4SI: ++ case V4SI_FTYPE_V16QI: ++ case V4SI_FTYPE_V4SF: ++ case V4SI_FTYPE_V8SI: ++ case V4SI_FTYPE_V8HI: ++ case V4SI_FTYPE_V4DF: ++ case V4SI_FTYPE_V2DF: ++ case V4HI_FTYPE_V4HI: ++ case V4DF_FTYPE_V4DF: ++ case V4DF_FTYPE_V4SI: ++ case V4DF_FTYPE_V4SF: ++ case V4DF_FTYPE_V2DF: ++ case V4SF_FTYPE_V4SF: ++ case V4SF_FTYPE_V4SI: ++ case V4SF_FTYPE_V8SF: ++ case V4SF_FTYPE_V4DF: ++ case V4SF_FTYPE_V8HI: ++ case V4SF_FTYPE_V2DF: ++ case V2DI_FTYPE_V2DI: ++ case V2DI_FTYPE_V16QI: ++ case V2DI_FTYPE_V8HI: ++ case V2DI_FTYPE_V4SI: ++ case V2DF_FTYPE_V2DF: ++ case V2DF_FTYPE_V4SI: ++ case V2DF_FTYPE_V4DF: ++ case V2DF_FTYPE_V4SF: ++ case V2DF_FTYPE_V2SI: ++ case V2SI_FTYPE_V2SI: ++ case V2SI_FTYPE_V4SF: ++ case V2SI_FTYPE_V2SF: ++ case V2SI_FTYPE_V2DF: ++ case V2SF_FTYPE_V2SF: ++ case V2SF_FTYPE_V2SI: ++ case V32QI_FTYPE_V32QI: ++ case V32QI_FTYPE_V16QI: ++ case V16HI_FTYPE_V16HI: ++ case V16HI_FTYPE_V8HI: ++ case V8SI_FTYPE_V8SI: ++ case V16HI_FTYPE_V16QI: ++ case V8SI_FTYPE_V16QI: ++ case V4DI_FTYPE_V16QI: ++ case V8SI_FTYPE_V8HI: ++ case V4DI_FTYPE_V8HI: ++ case V4DI_FTYPE_V4SI: ++ case V4DI_FTYPE_V2DI: ++ case UQI_FTYPE_UQI: ++ case UHI_FTYPE_UHI: ++ case USI_FTYPE_USI: ++ case USI_FTYPE_UQI: ++ case USI_FTYPE_UHI: ++ case UDI_FTYPE_UDI: ++ case UHI_FTYPE_V16QI: ++ case USI_FTYPE_V32QI: ++ case UDI_FTYPE_V64QI: ++ case V16QI_FTYPE_UHI: ++ case V32QI_FTYPE_USI: ++ case V64QI_FTYPE_UDI: ++ case V8HI_FTYPE_UQI: ++ case V16HI_FTYPE_UHI: ++ case V32HI_FTYPE_USI: ++ case V4SI_FTYPE_UQI: ++ case V8SI_FTYPE_UQI: ++ case V4SI_FTYPE_UHI: ++ case V8SI_FTYPE_UHI: ++ case UQI_FTYPE_V8HI: ++ case UHI_FTYPE_V16HI: ++ case USI_FTYPE_V32HI: ++ case UQI_FTYPE_V4SI: ++ case UQI_FTYPE_V8SI: ++ case UHI_FTYPE_V16SI: ++ case UQI_FTYPE_V2DI: ++ case UQI_FTYPE_V4DI: ++ case UQI_FTYPE_V8DI: ++ case V16SI_FTYPE_UHI: ++ case V2DI_FTYPE_UQI: ++ case V4DI_FTYPE_UQI: ++ case V16SI_FTYPE_INT: ++ case V16SF_FTYPE_V8SF: ++ case V16SI_FTYPE_V8SI: ++ case V16SF_FTYPE_V4SF: ++ case V16SI_FTYPE_V4SI: ++ case V16SI_FTYPE_V16SF: ++ case V16SI_FTYPE_V16SI: ++ case V64QI_FTYPE_V64QI: ++ case V32HI_FTYPE_V32HI: ++ case V16SF_FTYPE_V16SF: ++ case V8DI_FTYPE_UQI: ++ case V8DI_FTYPE_V8DI: ++ case V8DF_FTYPE_V4DF: ++ case V8DF_FTYPE_V2DF: ++ case V8DF_FTYPE_V8DF: ++ case V4DI_FTYPE_V4DI: ++ nargs = 1; ++ break; ++ case V4SF_FTYPE_V4SF_VEC_MERGE: ++ case V2DF_FTYPE_V2DF_VEC_MERGE: ++ return ix86_expand_unop_vec_merge_builtin (icode, exp, target); ++ case FLOAT128_FTYPE_FLOAT128_FLOAT128: ++ case V16QI_FTYPE_V16QI_V16QI: ++ case V16QI_FTYPE_V8HI_V8HI: ++ case V16SF_FTYPE_V16SF_V16SF: ++ case V8QI_FTYPE_V8QI_V8QI: ++ case V8QI_FTYPE_V4HI_V4HI: ++ case V8HI_FTYPE_V8HI_V8HI: ++ case V8HI_FTYPE_V16QI_V16QI: ++ case V8HI_FTYPE_V4SI_V4SI: ++ case V8SF_FTYPE_V8SF_V8SF: ++ case V8SF_FTYPE_V8SF_V8SI: ++ case V8DF_FTYPE_V8DF_V8DF: ++ case V4SI_FTYPE_V4SI_V4SI: ++ case V4SI_FTYPE_V8HI_V8HI: ++ case V4SI_FTYPE_V2DF_V2DF: ++ case V4HI_FTYPE_V4HI_V4HI: ++ case V4HI_FTYPE_V8QI_V8QI: ++ case V4HI_FTYPE_V2SI_V2SI: ++ case V4DF_FTYPE_V4DF_V4DF: ++ case V4DF_FTYPE_V4DF_V4DI: ++ case V4SF_FTYPE_V4SF_V4SF: ++ case V4SF_FTYPE_V4SF_V4SI: ++ case V4SF_FTYPE_V4SF_V2SI: ++ case V4SF_FTYPE_V4SF_V2DF: ++ case V4SF_FTYPE_V4SF_UINT: ++ case V4SF_FTYPE_V4SF_DI: ++ case V4SF_FTYPE_V4SF_SI: ++ case V2DI_FTYPE_V2DI_V2DI: ++ case V2DI_FTYPE_V16QI_V16QI: ++ case V2DI_FTYPE_V4SI_V4SI: ++ case V2DI_FTYPE_V2DI_V16QI: ++ case V2SI_FTYPE_V2SI_V2SI: ++ case V2SI_FTYPE_V4HI_V4HI: ++ case V2SI_FTYPE_V2SF_V2SF: ++ case V2DF_FTYPE_V2DF_V2DF: ++ case V2DF_FTYPE_V2DF_V4SF: ++ case V2DF_FTYPE_V2DF_V2DI: ++ case V2DF_FTYPE_V2DF_DI: ++ case V2DF_FTYPE_V2DF_SI: ++ case V2DF_FTYPE_V2DF_UINT: ++ case V2SF_FTYPE_V2SF_V2SF: ++ case V1DI_FTYPE_V1DI_V1DI: ++ case V1DI_FTYPE_V8QI_V8QI: ++ case V1DI_FTYPE_V2SI_V2SI: ++ case V32QI_FTYPE_V16HI_V16HI: ++ case V16HI_FTYPE_V8SI_V8SI: ++ case V64QI_FTYPE_V64QI_V64QI: ++ case V32QI_FTYPE_V32QI_V32QI: ++ case V16HI_FTYPE_V32QI_V32QI: ++ case V16HI_FTYPE_V16HI_V16HI: ++ case V8SI_FTYPE_V4DF_V4DF: ++ case V8SI_FTYPE_V8SI_V8SI: ++ case V8SI_FTYPE_V16HI_V16HI: ++ case V4DI_FTYPE_V4DI_V4DI: ++ case V4DI_FTYPE_V8SI_V8SI: ++ case V8DI_FTYPE_V64QI_V64QI: ++ if (comparison == UNKNOWN) ++ return ix86_expand_binop_builtin (icode, exp, target); ++ nargs = 2; ++ break; ++ case V4SF_FTYPE_V4SF_V4SF_SWAP: ++ case V2DF_FTYPE_V2DF_V2DF_SWAP: ++ gcc_assert (comparison != UNKNOWN); ++ nargs = 2; ++ swap = true; ++ break; ++ case V16HI_FTYPE_V16HI_V8HI_COUNT: ++ case V16HI_FTYPE_V16HI_SI_COUNT: ++ case V8SI_FTYPE_V8SI_V4SI_COUNT: ++ case V8SI_FTYPE_V8SI_SI_COUNT: ++ case V4DI_FTYPE_V4DI_V2DI_COUNT: ++ case V4DI_FTYPE_V4DI_INT_COUNT: ++ case V8HI_FTYPE_V8HI_V8HI_COUNT: ++ case V8HI_FTYPE_V8HI_SI_COUNT: ++ case V4SI_FTYPE_V4SI_V4SI_COUNT: ++ case V4SI_FTYPE_V4SI_SI_COUNT: ++ case V4HI_FTYPE_V4HI_V4HI_COUNT: ++ case V4HI_FTYPE_V4HI_SI_COUNT: ++ case V2DI_FTYPE_V2DI_V2DI_COUNT: ++ case V2DI_FTYPE_V2DI_SI_COUNT: ++ case V2SI_FTYPE_V2SI_V2SI_COUNT: ++ case V2SI_FTYPE_V2SI_SI_COUNT: ++ case V1DI_FTYPE_V1DI_V1DI_COUNT: ++ case V1DI_FTYPE_V1DI_SI_COUNT: ++ nargs = 2; ++ second_arg_count = true; ++ break; ++ case V16HI_FTYPE_V16HI_INT_V16HI_UHI_COUNT: ++ case V16HI_FTYPE_V16HI_V8HI_V16HI_UHI_COUNT: ++ case V16SI_FTYPE_V16SI_INT_V16SI_UHI_COUNT: ++ case V16SI_FTYPE_V16SI_V4SI_V16SI_UHI_COUNT: ++ case V2DI_FTYPE_V2DI_INT_V2DI_UQI_COUNT: ++ case V2DI_FTYPE_V2DI_V2DI_V2DI_UQI_COUNT: ++ case V32HI_FTYPE_V32HI_INT_V32HI_USI_COUNT: ++ case V32HI_FTYPE_V32HI_V8HI_V32HI_USI_COUNT: ++ case V4DI_FTYPE_V4DI_INT_V4DI_UQI_COUNT: ++ case V4DI_FTYPE_V4DI_V2DI_V4DI_UQI_COUNT: ++ case V4SI_FTYPE_V4SI_INT_V4SI_UQI_COUNT: ++ case V4SI_FTYPE_V4SI_V4SI_V4SI_UQI_COUNT: ++ case V8DI_FTYPE_V8DI_INT_V8DI_UQI_COUNT: ++ case V8DI_FTYPE_V8DI_V2DI_V8DI_UQI_COUNT: ++ case V8HI_FTYPE_V8HI_INT_V8HI_UQI_COUNT: ++ case V8HI_FTYPE_V8HI_V8HI_V8HI_UQI_COUNT: ++ case V8SI_FTYPE_V8SI_INT_V8SI_UQI_COUNT: ++ case V8SI_FTYPE_V8SI_V4SI_V8SI_UQI_COUNT: ++ nargs = 4; ++ second_arg_count = true; ++ break; ++ case UINT64_FTYPE_UINT64_UINT64: ++ case UINT_FTYPE_UINT_UINT: ++ case UINT_FTYPE_UINT_USHORT: ++ case UINT_FTYPE_UINT_UCHAR: ++ case UINT16_FTYPE_UINT16_INT: ++ case UINT8_FTYPE_UINT8_INT: ++ case UQI_FTYPE_UQI_UQI: ++ case UHI_FTYPE_UHI_UHI: ++ case USI_FTYPE_USI_USI: ++ case UDI_FTYPE_UDI_UDI: ++ case V16SI_FTYPE_V8DF_V8DF: ++ nargs = 2; ++ break; ++ case V2DI_FTYPE_V2DI_INT_CONVERT: ++ nargs = 2; ++ rmode = V1TImode; ++ nargs_constant = 1; ++ break; ++ case V4DI_FTYPE_V4DI_INT_CONVERT: ++ nargs = 2; ++ rmode = V2TImode; ++ nargs_constant = 1; ++ break; ++ case V8DI_FTYPE_V8DI_INT_CONVERT: ++ nargs = 2; ++ rmode = V4TImode; ++ nargs_constant = 1; ++ break; ++ case V8HI_FTYPE_V8HI_INT: ++ case V8HI_FTYPE_V8SF_INT: ++ case V16HI_FTYPE_V16SF_INT: ++ case V8HI_FTYPE_V4SF_INT: ++ case V8SF_FTYPE_V8SF_INT: ++ case V4SF_FTYPE_V16SF_INT: ++ case V16SF_FTYPE_V16SF_INT: ++ case V4SI_FTYPE_V4SI_INT: ++ case V4SI_FTYPE_V8SI_INT: ++ case V4HI_FTYPE_V4HI_INT: ++ case V4DF_FTYPE_V4DF_INT: ++ case V4DF_FTYPE_V8DF_INT: ++ case V4SF_FTYPE_V4SF_INT: ++ case V4SF_FTYPE_V8SF_INT: ++ case V2DI_FTYPE_V2DI_INT: ++ case V2DF_FTYPE_V2DF_INT: ++ case V2DF_FTYPE_V4DF_INT: ++ case V16HI_FTYPE_V16HI_INT: ++ case V8SI_FTYPE_V8SI_INT: ++ case V16SI_FTYPE_V16SI_INT: ++ case V4SI_FTYPE_V16SI_INT: ++ case V4DI_FTYPE_V4DI_INT: ++ case V2DI_FTYPE_V4DI_INT: ++ case V4DI_FTYPE_V8DI_INT: ++ case QI_FTYPE_V4SF_INT: ++ case QI_FTYPE_V2DF_INT: ++ case UQI_FTYPE_UQI_UQI_CONST: ++ case UHI_FTYPE_UHI_UQI: ++ case USI_FTYPE_USI_UQI: ++ case UDI_FTYPE_UDI_UQI: ++ nargs = 2; ++ nargs_constant = 1; ++ break; ++ case V16QI_FTYPE_V16QI_V16QI_V16QI: ++ case V8SF_FTYPE_V8SF_V8SF_V8SF: ++ case V4DF_FTYPE_V4DF_V4DF_V4DF: ++ case V4SF_FTYPE_V4SF_V4SF_V4SF: ++ case V2DF_FTYPE_V2DF_V2DF_V2DF: ++ case V32QI_FTYPE_V32QI_V32QI_V32QI: ++ case UHI_FTYPE_V16SI_V16SI_UHI: ++ case UQI_FTYPE_V8DI_V8DI_UQI: ++ case V16HI_FTYPE_V16SI_V16HI_UHI: ++ case V16QI_FTYPE_V16SI_V16QI_UHI: ++ case V16QI_FTYPE_V8DI_V16QI_UQI: ++ case V16SF_FTYPE_V16SF_V16SF_UHI: ++ case V16SF_FTYPE_V4SF_V16SF_UHI: ++ case V16SI_FTYPE_SI_V16SI_UHI: ++ case V16SI_FTYPE_V16HI_V16SI_UHI: ++ case V16SI_FTYPE_V16QI_V16SI_UHI: ++ case V8SF_FTYPE_V4SF_V8SF_UQI: ++ case V4DF_FTYPE_V2DF_V4DF_UQI: ++ case V8SI_FTYPE_V4SI_V8SI_UQI: ++ case V8SI_FTYPE_SI_V8SI_UQI: ++ case V4SI_FTYPE_V4SI_V4SI_UQI: ++ case V4SI_FTYPE_SI_V4SI_UQI: ++ case V4DI_FTYPE_V2DI_V4DI_UQI: ++ case V4DI_FTYPE_DI_V4DI_UQI: ++ case V2DI_FTYPE_V2DI_V2DI_UQI: ++ case V2DI_FTYPE_DI_V2DI_UQI: ++ case V64QI_FTYPE_V64QI_V64QI_UDI: ++ case V64QI_FTYPE_V16QI_V64QI_UDI: ++ case V64QI_FTYPE_QI_V64QI_UDI: ++ case V32QI_FTYPE_V32QI_V32QI_USI: ++ case V32QI_FTYPE_V16QI_V32QI_USI: ++ case V32QI_FTYPE_QI_V32QI_USI: ++ case V16QI_FTYPE_V16QI_V16QI_UHI: ++ case V16QI_FTYPE_QI_V16QI_UHI: ++ case V32HI_FTYPE_V8HI_V32HI_USI: ++ case V32HI_FTYPE_HI_V32HI_USI: ++ case V16HI_FTYPE_V8HI_V16HI_UHI: ++ case V16HI_FTYPE_HI_V16HI_UHI: ++ case V8HI_FTYPE_V8HI_V8HI_UQI: ++ case V8HI_FTYPE_HI_V8HI_UQI: ++ case V8SF_FTYPE_V8HI_V8SF_UQI: ++ case V4SF_FTYPE_V8HI_V4SF_UQI: ++ case V8SI_FTYPE_V8SF_V8SI_UQI: ++ case V4SI_FTYPE_V4SF_V4SI_UQI: ++ case V4DI_FTYPE_V4SF_V4DI_UQI: ++ case V2DI_FTYPE_V4SF_V2DI_UQI: ++ case V4SF_FTYPE_V4DI_V4SF_UQI: ++ case V4SF_FTYPE_V2DI_V4SF_UQI: ++ case V4DF_FTYPE_V4DI_V4DF_UQI: ++ case V2DF_FTYPE_V2DI_V2DF_UQI: ++ case V16QI_FTYPE_V8HI_V16QI_UQI: ++ case V16QI_FTYPE_V16HI_V16QI_UHI: ++ case V16QI_FTYPE_V4SI_V16QI_UQI: ++ case V16QI_FTYPE_V8SI_V16QI_UQI: ++ case V8HI_FTYPE_V4SI_V8HI_UQI: ++ case V8HI_FTYPE_V8SI_V8HI_UQI: ++ case V16QI_FTYPE_V2DI_V16QI_UQI: ++ case V16QI_FTYPE_V4DI_V16QI_UQI: ++ case V8HI_FTYPE_V2DI_V8HI_UQI: ++ case V8HI_FTYPE_V4DI_V8HI_UQI: ++ case V4SI_FTYPE_V2DI_V4SI_UQI: ++ case V4SI_FTYPE_V4DI_V4SI_UQI: ++ case V32QI_FTYPE_V32HI_V32QI_USI: ++ case UHI_FTYPE_V16QI_V16QI_UHI: ++ case USI_FTYPE_V32QI_V32QI_USI: ++ case UDI_FTYPE_V64QI_V64QI_UDI: ++ case UQI_FTYPE_V8HI_V8HI_UQI: ++ case UHI_FTYPE_V16HI_V16HI_UHI: ++ case USI_FTYPE_V32HI_V32HI_USI: ++ case UQI_FTYPE_V4SI_V4SI_UQI: ++ case UQI_FTYPE_V8SI_V8SI_UQI: ++ case UQI_FTYPE_V2DI_V2DI_UQI: ++ case UQI_FTYPE_V4DI_V4DI_UQI: ++ case V4SF_FTYPE_V2DF_V4SF_UQI: ++ case V4SF_FTYPE_V4DF_V4SF_UQI: ++ case V16SI_FTYPE_V16SI_V16SI_UHI: ++ case V16SI_FTYPE_V4SI_V16SI_UHI: ++ case V2DI_FTYPE_V4SI_V2DI_UQI: ++ case V2DI_FTYPE_V8HI_V2DI_UQI: ++ case V2DI_FTYPE_V16QI_V2DI_UQI: ++ case V4DI_FTYPE_V4DI_V4DI_UQI: ++ case V4DI_FTYPE_V4SI_V4DI_UQI: ++ case V4DI_FTYPE_V8HI_V4DI_UQI: ++ case V4DI_FTYPE_V16QI_V4DI_UQI: ++ case V4DI_FTYPE_V4DF_V4DI_UQI: ++ case V2DI_FTYPE_V2DF_V2DI_UQI: ++ case V4SI_FTYPE_V4DF_V4SI_UQI: ++ case V4SI_FTYPE_V2DF_V4SI_UQI: ++ case V4SI_FTYPE_V8HI_V4SI_UQI: ++ case V4SI_FTYPE_V16QI_V4SI_UQI: ++ case V4DI_FTYPE_V4DI_V4DI_V4DI: ++ case V8DF_FTYPE_V2DF_V8DF_UQI: ++ case V8DF_FTYPE_V4DF_V8DF_UQI: ++ case V8DF_FTYPE_V8DF_V8DF_UQI: ++ case V8SF_FTYPE_V8SF_V8SF_UQI: ++ case V8SF_FTYPE_V8SI_V8SF_UQI: ++ case V4DF_FTYPE_V4DF_V4DF_UQI: ++ case V4SF_FTYPE_V4SF_V4SF_UQI: ++ case V2DF_FTYPE_V2DF_V2DF_UQI: ++ case V2DF_FTYPE_V4SF_V2DF_UQI: ++ case V2DF_FTYPE_V4SI_V2DF_UQI: ++ case V4SF_FTYPE_V4SI_V4SF_UQI: ++ case V4DF_FTYPE_V4SF_V4DF_UQI: ++ case V4DF_FTYPE_V4SI_V4DF_UQI: ++ case V8SI_FTYPE_V8SI_V8SI_UQI: ++ case V8SI_FTYPE_V8HI_V8SI_UQI: ++ case V8SI_FTYPE_V16QI_V8SI_UQI: ++ case V8DF_FTYPE_V8SI_V8DF_UQI: ++ case V8DI_FTYPE_DI_V8DI_UQI: ++ case V16SF_FTYPE_V8SF_V16SF_UHI: ++ case V16SI_FTYPE_V8SI_V16SI_UHI: ++ case V16HI_FTYPE_V16HI_V16HI_UHI: ++ case V8HI_FTYPE_V16QI_V8HI_UQI: ++ case V16HI_FTYPE_V16QI_V16HI_UHI: ++ case V32HI_FTYPE_V32HI_V32HI_USI: ++ case V32HI_FTYPE_V32QI_V32HI_USI: ++ case V8DI_FTYPE_V16QI_V8DI_UQI: ++ case V8DI_FTYPE_V2DI_V8DI_UQI: ++ case V8DI_FTYPE_V4DI_V8DI_UQI: ++ case V8DI_FTYPE_V8DI_V8DI_UQI: ++ case V8DI_FTYPE_V8HI_V8DI_UQI: ++ case V8DI_FTYPE_V8SI_V8DI_UQI: ++ case V8HI_FTYPE_V8DI_V8HI_UQI: ++ case V8SI_FTYPE_V8DI_V8SI_UQI: ++ case V4SI_FTYPE_V4SI_V4SI_V4SI: ++ case V16SI_FTYPE_V16SI_V16SI_V16SI: ++ case V8DI_FTYPE_V8DI_V8DI_V8DI: ++ case V32HI_FTYPE_V32HI_V32HI_V32HI: ++ case V2DI_FTYPE_V2DI_V2DI_V2DI: ++ case V16HI_FTYPE_V16HI_V16HI_V16HI: ++ case V8SI_FTYPE_V8SI_V8SI_V8SI: ++ case V8HI_FTYPE_V8HI_V8HI_V8HI: ++ nargs = 3; ++ break; ++ case V32QI_FTYPE_V32QI_V32QI_INT: ++ case V16HI_FTYPE_V16HI_V16HI_INT: ++ case V16QI_FTYPE_V16QI_V16QI_INT: ++ case V4DI_FTYPE_V4DI_V4DI_INT: ++ case V8HI_FTYPE_V8HI_V8HI_INT: ++ case V8SI_FTYPE_V8SI_V8SI_INT: ++ case V8SI_FTYPE_V8SI_V4SI_INT: ++ case V8SF_FTYPE_V8SF_V8SF_INT: ++ case V8SF_FTYPE_V8SF_V4SF_INT: ++ case V4SI_FTYPE_V4SI_V4SI_INT: ++ case V4DF_FTYPE_V4DF_V4DF_INT: ++ case V16SF_FTYPE_V16SF_V16SF_INT: ++ case V16SF_FTYPE_V16SF_V4SF_INT: ++ case V16SI_FTYPE_V16SI_V4SI_INT: ++ case V4DF_FTYPE_V4DF_V2DF_INT: ++ case V4SF_FTYPE_V4SF_V4SF_INT: ++ case V2DI_FTYPE_V2DI_V2DI_INT: ++ case V4DI_FTYPE_V4DI_V2DI_INT: ++ case V2DF_FTYPE_V2DF_V2DF_INT: ++ case UQI_FTYPE_V8DI_V8UDI_INT: ++ case UQI_FTYPE_V8DF_V8DF_INT: ++ case UQI_FTYPE_V2DF_V2DF_INT: ++ case UQI_FTYPE_V4SF_V4SF_INT: ++ case UHI_FTYPE_V16SI_V16SI_INT: ++ case UHI_FTYPE_V16SF_V16SF_INT: ++ case V64QI_FTYPE_V64QI_V64QI_INT: ++ case V32HI_FTYPE_V32HI_V32HI_INT: ++ case V16SI_FTYPE_V16SI_V16SI_INT: ++ case V8DI_FTYPE_V8DI_V8DI_INT: ++ nargs = 3; ++ nargs_constant = 1; ++ break; ++ case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT: ++ nargs = 3; ++ rmode = V4DImode; ++ nargs_constant = 1; ++ break; ++ case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT: ++ nargs = 3; ++ rmode = V2DImode; ++ nargs_constant = 1; ++ break; ++ case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT: ++ nargs = 3; ++ rmode = DImode; ++ nargs_constant = 1; ++ break; ++ case V2DI_FTYPE_V2DI_UINT_UINT: ++ nargs = 3; ++ nargs_constant = 2; ++ break; ++ case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT: ++ nargs = 3; ++ rmode = V8DImode; ++ nargs_constant = 1; ++ break; ++ case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UDI_CONVERT: ++ nargs = 5; ++ rmode = V8DImode; ++ mask_pos = 2; ++ nargs_constant = 1; ++ break; ++ case QI_FTYPE_V8DF_INT_UQI: ++ case QI_FTYPE_V4DF_INT_UQI: ++ case QI_FTYPE_V2DF_INT_UQI: ++ case HI_FTYPE_V16SF_INT_UHI: ++ case QI_FTYPE_V8SF_INT_UQI: ++ case QI_FTYPE_V4SF_INT_UQI: ++ case V4SI_FTYPE_V4SI_V4SI_UHI: ++ case V8SI_FTYPE_V8SI_V8SI_UHI: ++ nargs = 3; ++ mask_pos = 1; ++ nargs_constant = 1; ++ break; ++ case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_USI_CONVERT: ++ nargs = 5; ++ rmode = V4DImode; ++ mask_pos = 2; ++ nargs_constant = 1; ++ break; ++ case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UHI_CONVERT: ++ nargs = 5; ++ rmode = V2DImode; ++ mask_pos = 2; ++ nargs_constant = 1; ++ break; ++ case V32QI_FTYPE_V32QI_V32QI_V32QI_USI: ++ case V32HI_FTYPE_V32HI_V32HI_V32HI_USI: ++ case V32HI_FTYPE_V64QI_V64QI_V32HI_USI: ++ case V16SI_FTYPE_V32HI_V32HI_V16SI_UHI: ++ case V64QI_FTYPE_V64QI_V64QI_V64QI_UDI: ++ case V32HI_FTYPE_V32HI_V8HI_V32HI_USI: ++ case V16HI_FTYPE_V16HI_V8HI_V16HI_UHI: ++ case V8SI_FTYPE_V8SI_V4SI_V8SI_UQI: ++ case V4DI_FTYPE_V4DI_V2DI_V4DI_UQI: ++ case V64QI_FTYPE_V32HI_V32HI_V64QI_UDI: ++ case V32QI_FTYPE_V16HI_V16HI_V32QI_USI: ++ case V16QI_FTYPE_V8HI_V8HI_V16QI_UHI: ++ case V32HI_FTYPE_V16SI_V16SI_V32HI_USI: ++ case V16HI_FTYPE_V8SI_V8SI_V16HI_UHI: ++ case V8HI_FTYPE_V4SI_V4SI_V8HI_UQI: ++ case V4DF_FTYPE_V4DF_V4DI_V4DF_UQI: ++ case V8SF_FTYPE_V8SF_V8SI_V8SF_UQI: ++ case V4SF_FTYPE_V4SF_V4SI_V4SF_UQI: ++ case V2DF_FTYPE_V2DF_V2DI_V2DF_UQI: ++ case V2DI_FTYPE_V4SI_V4SI_V2DI_UQI: ++ case V4DI_FTYPE_V8SI_V8SI_V4DI_UQI: ++ case V4DF_FTYPE_V4DI_V4DF_V4DF_UQI: ++ case V8SF_FTYPE_V8SI_V8SF_V8SF_UQI: ++ case V2DF_FTYPE_V2DI_V2DF_V2DF_UQI: ++ case V4SF_FTYPE_V4SI_V4SF_V4SF_UQI: ++ case V16SF_FTYPE_V16SF_V16SF_V16SF_UHI: ++ case V16SF_FTYPE_V16SF_V16SI_V16SF_UHI: ++ case V16SF_FTYPE_V16SI_V16SF_V16SF_UHI: ++ case V16SI_FTYPE_V16SI_V16SI_V16SI_UHI: ++ case V16SI_FTYPE_V16SI_V4SI_V16SI_UHI: ++ case V8HI_FTYPE_V8HI_V8HI_V8HI_UQI: ++ case V8SI_FTYPE_V8SI_V8SI_V8SI_UQI: ++ case V4SI_FTYPE_V4SI_V4SI_V4SI_UQI: ++ case V8SF_FTYPE_V8SF_V8SF_V8SF_UQI: ++ case V16QI_FTYPE_V16QI_V16QI_V16QI_UHI: ++ case V16HI_FTYPE_V16HI_V16HI_V16HI_UHI: ++ case V2DI_FTYPE_V2DI_V2DI_V2DI_UQI: ++ case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI: ++ case V4DI_FTYPE_V4DI_V4DI_V4DI_UQI: ++ case V4DF_FTYPE_V4DF_V4DF_V4DF_UQI: ++ case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI: ++ case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI: ++ case V8DF_FTYPE_V8DF_V8DI_V8DF_UQI: ++ case V8DF_FTYPE_V8DI_V8DF_V8DF_UQI: ++ case V8DI_FTYPE_V16SI_V16SI_V8DI_UQI: ++ case V8DI_FTYPE_V8DI_V2DI_V8DI_UQI: ++ case V8DI_FTYPE_V8DI_V8DI_V8DI_UQI: ++ case V8HI_FTYPE_V16QI_V16QI_V8HI_UQI: ++ case V16HI_FTYPE_V32QI_V32QI_V16HI_UHI: ++ case V8SI_FTYPE_V16HI_V16HI_V8SI_UQI: ++ case V4SI_FTYPE_V8HI_V8HI_V4SI_UQI: ++ nargs = 4; ++ break; ++ case V2DF_FTYPE_V2DF_V2DF_V2DI_INT: ++ case V4DF_FTYPE_V4DF_V4DF_V4DI_INT: ++ case V4SF_FTYPE_V4SF_V4SF_V4SI_INT: ++ case V8SF_FTYPE_V8SF_V8SF_V8SI_INT: ++ case V16SF_FTYPE_V16SF_V16SF_V16SI_INT: ++ nargs = 4; ++ nargs_constant = 1; ++ break; ++ case UQI_FTYPE_V4DI_V4DI_INT_UQI: ++ case UQI_FTYPE_V8SI_V8SI_INT_UQI: ++ case QI_FTYPE_V4DF_V4DF_INT_UQI: ++ case QI_FTYPE_V8SF_V8SF_INT_UQI: ++ case UQI_FTYPE_V2DI_V2DI_INT_UQI: ++ case UQI_FTYPE_V4SI_V4SI_INT_UQI: ++ case UQI_FTYPE_V2DF_V2DF_INT_UQI: ++ case UQI_FTYPE_V4SF_V4SF_INT_UQI: ++ case UDI_FTYPE_V64QI_V64QI_INT_UDI: ++ case USI_FTYPE_V32QI_V32QI_INT_USI: ++ case UHI_FTYPE_V16QI_V16QI_INT_UHI: ++ case USI_FTYPE_V32HI_V32HI_INT_USI: ++ case UHI_FTYPE_V16HI_V16HI_INT_UHI: ++ case UQI_FTYPE_V8HI_V8HI_INT_UQI: ++ case V32HI_FTYPE_V32HI_V32HI_V32HI_INT: ++ case V16HI_FTYPE_V16HI_V16HI_V16HI_INT: ++ case V8HI_FTYPE_V8HI_V8HI_V8HI_INT: ++ case V8SI_FTYPE_V8SI_V8SI_V8SI_INT: ++ case V4DI_FTYPE_V4DI_V4DI_V4DI_INT: ++ case V8DI_FTYPE_V8DI_V8DI_V8DI_INT: ++ case V16SI_FTYPE_V16SI_V16SI_V16SI_INT: ++ case V2DI_FTYPE_V2DI_V2DI_V2DI_INT: ++ case V4SI_FTYPE_V4SI_V4SI_V4SI_INT: ++ nargs = 4; ++ mask_pos = 1; ++ nargs_constant = 1; ++ break; ++ case V2DI_FTYPE_V2DI_V2DI_UINT_UINT: ++ nargs = 4; ++ nargs_constant = 2; ++ break; ++ case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED: ++ case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG: ++ nargs = 4; ++ break; ++ case UQI_FTYPE_V8DI_V8DI_INT_UQI: ++ case UHI_FTYPE_V16SI_V16SI_INT_UHI: ++ mask_pos = 1; ++ nargs = 4; ++ nargs_constant = 1; ++ break; ++ case V8SF_FTYPE_V8SF_INT_V8SF_UQI: ++ case V4SF_FTYPE_V4SF_INT_V4SF_UQI: ++ case V2DF_FTYPE_V4DF_INT_V2DF_UQI: ++ case V2DI_FTYPE_V4DI_INT_V2DI_UQI: ++ case V8SF_FTYPE_V16SF_INT_V8SF_UQI: ++ case V8SI_FTYPE_V16SI_INT_V8SI_UQI: ++ case V2DF_FTYPE_V8DF_INT_V2DF_UQI: ++ case V2DI_FTYPE_V8DI_INT_V2DI_UQI: ++ case V4SF_FTYPE_V8SF_INT_V4SF_UQI: ++ case V4SI_FTYPE_V8SI_INT_V4SI_UQI: ++ case V8HI_FTYPE_V8SF_INT_V8HI_UQI: ++ case V8HI_FTYPE_V4SF_INT_V8HI_UQI: ++ case V32HI_FTYPE_V32HI_INT_V32HI_USI: ++ case V16HI_FTYPE_V16HI_INT_V16HI_UHI: ++ case V8HI_FTYPE_V8HI_INT_V8HI_UQI: ++ case V4DI_FTYPE_V4DI_INT_V4DI_UQI: ++ case V2DI_FTYPE_V2DI_INT_V2DI_UQI: ++ case V8SI_FTYPE_V8SI_INT_V8SI_UQI: ++ case V4SI_FTYPE_V4SI_INT_V4SI_UQI: ++ case V4DF_FTYPE_V4DF_INT_V4DF_UQI: ++ case V2DF_FTYPE_V2DF_INT_V2DF_UQI: ++ case V8DF_FTYPE_V8DF_INT_V8DF_UQI: ++ case V16SF_FTYPE_V16SF_INT_V16SF_UHI: ++ case V16HI_FTYPE_V16SF_INT_V16HI_UHI: ++ case V16SI_FTYPE_V16SI_INT_V16SI_UHI: ++ case V4SI_FTYPE_V16SI_INT_V4SI_UQI: ++ case V4DI_FTYPE_V8DI_INT_V4DI_UQI: ++ case V4DF_FTYPE_V8DF_INT_V4DF_UQI: ++ case V4SF_FTYPE_V16SF_INT_V4SF_UQI: ++ case V8DI_FTYPE_V8DI_INT_V8DI_UQI: ++ nargs = 4; ++ mask_pos = 2; ++ nargs_constant = 1; ++ break; ++ case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_UHI: ++ case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_UHI: ++ case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI: ++ case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI: ++ case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI: ++ case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI: ++ case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI: ++ case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI: ++ case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_UQI: ++ case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_UQI: ++ case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI: ++ case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI: ++ case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_UQI: ++ case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_UQI: ++ case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_UQI: ++ case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_UQI: ++ case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_UQI: ++ case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UQI: ++ case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI: ++ case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_UHI: ++ case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_UQI: ++ case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_UHI: ++ case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_UHI: ++ case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_UQI: ++ case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_UQI: ++ case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_UQI: ++ case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_UQI: ++ nargs = 5; ++ mask_pos = 2; ++ nargs_constant = 1; ++ break; ++ case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI: ++ case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI: ++ case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_UQI: ++ case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_UQI: ++ case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_UQI: ++ case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_UQI: ++ case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_UQI: ++ case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_UQI: ++ case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_UQI: ++ case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_UQI: ++ nargs = 5; ++ mask_pos = 1; ++ nargs_constant = 1; ++ break; ++ case V64QI_FTYPE_V64QI_V64QI_INT_V64QI_UDI: ++ case V32QI_FTYPE_V32QI_V32QI_INT_V32QI_USI: ++ case V16QI_FTYPE_V16QI_V16QI_INT_V16QI_UHI: ++ case V32HI_FTYPE_V32HI_V32HI_INT_V32HI_INT: ++ case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_INT: ++ case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_INT: ++ case V16HI_FTYPE_V16HI_V16HI_INT_V16HI_INT: ++ case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_INT: ++ case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_INT: ++ case V8HI_FTYPE_V8HI_V8HI_INT_V8HI_INT: ++ case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_INT: ++ case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_INT: ++ nargs = 5; ++ mask_pos = 1; ++ nargs_constant = 2; ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ gcc_assert (nargs <= ARRAY_SIZE (args)); ++ ++ if (comparison != UNKNOWN) ++ { ++ gcc_assert (nargs == 2); ++ return ix86_expand_sse_compare (d, exp, target, swap); ++ } ++ ++ if (rmode == VOIDmode || rmode == tmode) ++ { ++ if (optimize ++ || target == 0 ++ || GET_MODE (target) != tmode ++ || !insn_p->operand[0].predicate (target, tmode)) ++ target = gen_reg_rtx (tmode); ++ else if (memory_operand (target, tmode)) ++ num_memory++; ++ real_target = target; ++ } ++ else ++ { ++ real_target = gen_reg_rtx (tmode); ++ target = lowpart_subreg (rmode, real_target, tmode); ++ } ++ ++ for (i = 0; i < nargs; i++) ++ { ++ tree arg = CALL_EXPR_ARG (exp, i); ++ rtx op = expand_normal (arg); ++ machine_mode mode = insn_p->operand[i + 1].mode; ++ bool match = insn_p->operand[i + 1].predicate (op, mode); ++ ++ if (second_arg_count && i == 1) ++ { ++ /* SIMD shift insns take either an 8-bit immediate or ++ register as count. But builtin functions take int as ++ count. If count doesn't match, we put it in register. ++ The instructions are using 64-bit count, if op is just ++ 32-bit, zero-extend it, as negative shift counts ++ are undefined behavior and zero-extension is more ++ efficient. */ ++ if (!match) ++ { ++ if (SCALAR_INT_MODE_P (GET_MODE (op))) ++ op = convert_modes (mode, GET_MODE (op), op, 1); ++ else ++ op = lowpart_subreg (mode, op, GET_MODE (op)); ++ if (!insn_p->operand[i + 1].predicate (op, mode)) ++ op = copy_to_reg (op); ++ } ++ } ++ else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) || ++ (!mask_pos && (nargs - i) <= nargs_constant)) ++ { ++ if (!match) ++ switch (icode) ++ { ++ case CODE_FOR_avx_vinsertf128v4di: ++ case CODE_FOR_avx_vextractf128v4di: ++ error ("the last argument must be an 1-bit immediate"); ++ return const0_rtx; ++ ++ case CODE_FOR_avx512f_cmpv8di3_mask: ++ case CODE_FOR_avx512f_cmpv16si3_mask: ++ case CODE_FOR_avx512f_ucmpv8di3_mask: ++ case CODE_FOR_avx512f_ucmpv16si3_mask: ++ case CODE_FOR_avx512vl_cmpv4di3_mask: ++ case CODE_FOR_avx512vl_cmpv8si3_mask: ++ case CODE_FOR_avx512vl_ucmpv4di3_mask: ++ case CODE_FOR_avx512vl_ucmpv8si3_mask: ++ case CODE_FOR_avx512vl_cmpv2di3_mask: ++ case CODE_FOR_avx512vl_cmpv4si3_mask: ++ case CODE_FOR_avx512vl_ucmpv2di3_mask: ++ case CODE_FOR_avx512vl_ucmpv4si3_mask: ++ error ("the last argument must be a 3-bit immediate"); ++ return const0_rtx; ++ ++ case CODE_FOR_sse4_1_roundsd: ++ case CODE_FOR_sse4_1_roundss: ++ ++ case CODE_FOR_sse4_1_roundpd: ++ case CODE_FOR_sse4_1_roundps: ++ case CODE_FOR_avx_roundpd256: ++ case CODE_FOR_avx_roundps256: ++ ++ case CODE_FOR_sse4_1_roundpd_vec_pack_sfix: ++ case CODE_FOR_sse4_1_roundps_sfix: ++ case CODE_FOR_avx_roundpd_vec_pack_sfix256: ++ case CODE_FOR_avx_roundps_sfix256: ++ ++ case CODE_FOR_sse4_1_blendps: ++ case CODE_FOR_avx_blendpd256: ++ case CODE_FOR_avx_vpermilv4df: ++ case CODE_FOR_avx_vpermilv4df_mask: ++ case CODE_FOR_avx512f_getmantv8df_mask: ++ case CODE_FOR_avx512f_getmantv16sf_mask: ++ case CODE_FOR_avx512vl_getmantv8sf_mask: ++ case CODE_FOR_avx512vl_getmantv4df_mask: ++ case CODE_FOR_avx512vl_getmantv4sf_mask: ++ case CODE_FOR_avx512vl_getmantv2df_mask: ++ case CODE_FOR_avx512dq_rangepv8df_mask_round: ++ case CODE_FOR_avx512dq_rangepv16sf_mask_round: ++ case CODE_FOR_avx512dq_rangepv4df_mask: ++ case CODE_FOR_avx512dq_rangepv8sf_mask: ++ case CODE_FOR_avx512dq_rangepv2df_mask: ++ case CODE_FOR_avx512dq_rangepv4sf_mask: ++ case CODE_FOR_avx_shufpd256_mask: ++ error ("the last argument must be a 4-bit immediate"); ++ return const0_rtx; ++ ++ case CODE_FOR_sha1rnds4: ++ case CODE_FOR_sse4_1_blendpd: ++ case CODE_FOR_avx_vpermilv2df: ++ case CODE_FOR_avx_vpermilv2df_mask: ++ case CODE_FOR_xop_vpermil2v2df3: ++ case CODE_FOR_xop_vpermil2v4sf3: ++ case CODE_FOR_xop_vpermil2v4df3: ++ case CODE_FOR_xop_vpermil2v8sf3: ++ case CODE_FOR_avx512f_vinsertf32x4_mask: ++ case CODE_FOR_avx512f_vinserti32x4_mask: ++ case CODE_FOR_avx512f_vextractf32x4_mask: ++ case CODE_FOR_avx512f_vextracti32x4_mask: ++ case CODE_FOR_sse2_shufpd: ++ case CODE_FOR_sse2_shufpd_mask: ++ case CODE_FOR_avx512dq_shuf_f64x2_mask: ++ case CODE_FOR_avx512dq_shuf_i64x2_mask: ++ case CODE_FOR_avx512vl_shuf_i32x4_mask: ++ case CODE_FOR_avx512vl_shuf_f32x4_mask: ++ error ("the last argument must be a 2-bit immediate"); ++ return const0_rtx; ++ ++ case CODE_FOR_avx_vextractf128v4df: ++ case CODE_FOR_avx_vextractf128v8sf: ++ case CODE_FOR_avx_vextractf128v8si: ++ case CODE_FOR_avx_vinsertf128v4df: ++ case CODE_FOR_avx_vinsertf128v8sf: ++ case CODE_FOR_avx_vinsertf128v8si: ++ case CODE_FOR_avx512f_vinsertf64x4_mask: ++ case CODE_FOR_avx512f_vinserti64x4_mask: ++ case CODE_FOR_avx512f_vextractf64x4_mask: ++ case CODE_FOR_avx512f_vextracti64x4_mask: ++ case CODE_FOR_avx512dq_vinsertf32x8_mask: ++ case CODE_FOR_avx512dq_vinserti32x8_mask: ++ case CODE_FOR_avx512vl_vinsertv4df: ++ case CODE_FOR_avx512vl_vinsertv4di: ++ case CODE_FOR_avx512vl_vinsertv8sf: ++ case CODE_FOR_avx512vl_vinsertv8si: ++ error ("the last argument must be a 1-bit immediate"); ++ return const0_rtx; ++ ++ case CODE_FOR_avx_vmcmpv2df3: ++ case CODE_FOR_avx_vmcmpv4sf3: ++ case CODE_FOR_avx_cmpv2df3: ++ case CODE_FOR_avx_cmpv4sf3: ++ case CODE_FOR_avx_cmpv4df3: ++ case CODE_FOR_avx_cmpv8sf3: ++ case CODE_FOR_avx512f_cmpv8df3_mask: ++ case CODE_FOR_avx512f_cmpv16sf3_mask: ++ case CODE_FOR_avx512f_vmcmpv2df3_mask: ++ case CODE_FOR_avx512f_vmcmpv4sf3_mask: ++ error ("the last argument must be a 5-bit immediate"); ++ return const0_rtx; ++ ++ default: ++ switch (nargs_constant) ++ { ++ case 2: ++ if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) || ++ (!mask_pos && (nargs - i) == nargs_constant)) ++ { ++ error ("the next to last argument must be an 8-bit immediate"); ++ break; ++ } ++ /* FALLTHRU */ ++ case 1: ++ error ("the last argument must be an 8-bit immediate"); ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ return const0_rtx; ++ } ++ } ++ else ++ { ++ if (VECTOR_MODE_P (mode)) ++ op = safe_vector_operand (op, mode); ++ ++ /* If we aren't optimizing, only allow one memory operand to ++ be generated. */ ++ if (memory_operand (op, mode)) ++ num_memory++; ++ ++ op = fixup_modeless_constant (op, mode); ++ ++ if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode) ++ { ++ if (optimize || !match || num_memory > 1) ++ op = copy_to_mode_reg (mode, op); ++ } ++ else ++ { ++ op = copy_to_reg (op); ++ op = lowpart_subreg (mode, op, GET_MODE (op)); ++ } ++ } ++ ++ args[i].op = op; ++ args[i].mode = mode; ++ } ++ ++ switch (nargs) ++ { ++ case 1: ++ pat = GEN_FCN (icode) (real_target, args[0].op); ++ break; ++ case 2: ++ pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op); ++ break; ++ case 3: ++ pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op, ++ args[2].op); ++ break; ++ case 4: ++ pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op, ++ args[2].op, args[3].op); ++ break; ++ case 5: ++ pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op, ++ args[2].op, args[3].op, args[4].op); ++ break; ++ case 6: ++ pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op, ++ args[2].op, args[3].op, args[4].op, ++ args[5].op); ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ ++ if (! pat) ++ return 0; ++ ++ emit_insn (pat); ++ return target; ++} ++ ++/* Transform pattern of following layout: ++ (set A ++ (unspec [B C] UNSPEC_EMBEDDED_ROUNDING)) ++ ) ++ into: ++ (set (A B)) */ ++ ++static rtx ++ix86_erase_embedded_rounding (rtx pat) ++{ ++ if (GET_CODE (pat) == INSN) ++ pat = PATTERN (pat); ++ ++ gcc_assert (GET_CODE (pat) == SET); ++ rtx src = SET_SRC (pat); ++ gcc_assert (XVECLEN (src, 0) == 2); ++ rtx p0 = XVECEXP (src, 0, 0); ++ gcc_assert (GET_CODE (src) == UNSPEC ++ && XINT (src, 1) == UNSPEC_EMBEDDED_ROUNDING); ++ rtx res = gen_rtx_SET (SET_DEST (pat), p0); ++ return res; ++} ++ ++/* Subroutine of ix86_expand_round_builtin to take care of comi insns ++ with rounding. */ ++static rtx ++ix86_expand_sse_comi_round (const struct builtin_description *d, ++ tree exp, rtx target) ++{ ++ rtx pat, set_dst; ++ tree arg0 = CALL_EXPR_ARG (exp, 0); ++ tree arg1 = CALL_EXPR_ARG (exp, 1); ++ tree arg2 = CALL_EXPR_ARG (exp, 2); ++ tree arg3 = CALL_EXPR_ARG (exp, 3); ++ rtx op0 = expand_normal (arg0); ++ rtx op1 = expand_normal (arg1); ++ rtx op2 = expand_normal (arg2); ++ rtx op3 = expand_normal (arg3); ++ enum insn_code icode = d->icode; ++ const struct insn_data_d *insn_p = &insn_data[icode]; ++ machine_mode mode0 = insn_p->operand[0].mode; ++ machine_mode mode1 = insn_p->operand[1].mode; ++ enum rtx_code comparison = UNEQ; ++ bool need_ucomi = false; ++ ++ /* See avxintrin.h for values. */ ++ enum rtx_code comi_comparisons[32] = ++ { ++ UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT, ++ UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE, ++ UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT ++ }; ++ bool need_ucomi_values[32] = ++ { ++ true, false, false, true, true, false, false, true, ++ true, false, false, true, true, false, false, true, ++ false, true, true, false, false, true, true, false, ++ false, true, true, false, false, true, true, false ++ }; ++ ++ if (!CONST_INT_P (op2)) ++ { ++ error ("the third argument must be comparison constant"); ++ return const0_rtx; ++ } ++ if (INTVAL (op2) < 0 || INTVAL (op2) >= 32) ++ { ++ error ("incorrect comparison mode"); ++ return const0_rtx; ++ } ++ ++ if (!insn_p->operand[2].predicate (op3, SImode)) ++ { ++ error ("incorrect rounding operand"); ++ return const0_rtx; ++ } ++ ++ comparison = comi_comparisons[INTVAL (op2)]; ++ need_ucomi = need_ucomi_values[INTVAL (op2)]; ++ ++ if (VECTOR_MODE_P (mode0)) ++ op0 = safe_vector_operand (op0, mode0); ++ if (VECTOR_MODE_P (mode1)) ++ op1 = safe_vector_operand (op1, mode1); ++ ++ target = gen_reg_rtx (SImode); ++ emit_move_insn (target, const0_rtx); ++ target = gen_rtx_SUBREG (QImode, target, 0); ++ ++ if ((optimize && !register_operand (op0, mode0)) ++ || !insn_p->operand[0].predicate (op0, mode0)) ++ op0 = copy_to_mode_reg (mode0, op0); ++ if ((optimize && !register_operand (op1, mode1)) ++ || !insn_p->operand[1].predicate (op1, mode1)) ++ op1 = copy_to_mode_reg (mode1, op1); ++ ++ if (need_ucomi) ++ icode = icode == CODE_FOR_sse_comi_round ++ ? CODE_FOR_sse_ucomi_round ++ : CODE_FOR_sse2_ucomi_round; ++ ++ pat = GEN_FCN (icode) (op0, op1, op3); ++ if (! pat) ++ return 0; ++ ++ /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */ ++ if (INTVAL (op3) == NO_ROUND) ++ { ++ pat = ix86_erase_embedded_rounding (pat); ++ if (! pat) ++ return 0; ++ ++ set_dst = SET_DEST (pat); ++ } ++ else ++ { ++ gcc_assert (GET_CODE (pat) == SET); ++ set_dst = SET_DEST (pat); ++ } ++ ++ emit_insn (pat); ++ emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target), ++ gen_rtx_fmt_ee (comparison, QImode, ++ set_dst, ++ const0_rtx))); ++ ++ return SUBREG_REG (target); ++} ++ ++static rtx ++ix86_expand_round_builtin (const struct builtin_description *d, ++ tree exp, rtx target) ++{ ++ rtx pat; ++ unsigned int i, nargs; ++ struct ++ { ++ rtx op; ++ machine_mode mode; ++ } args[6]; ++ enum insn_code icode = d->icode; ++ const struct insn_data_d *insn_p = &insn_data[icode]; ++ machine_mode tmode = insn_p->operand[0].mode; ++ unsigned int nargs_constant = 0; ++ unsigned int redundant_embed_rnd = 0; ++ ++ switch ((enum ix86_builtin_func_type) d->flag) ++ { ++ case UINT64_FTYPE_V2DF_INT: ++ case UINT64_FTYPE_V4SF_INT: ++ case UINT_FTYPE_V2DF_INT: ++ case UINT_FTYPE_V4SF_INT: ++ case INT64_FTYPE_V2DF_INT: ++ case INT64_FTYPE_V4SF_INT: ++ case INT_FTYPE_V2DF_INT: ++ case INT_FTYPE_V4SF_INT: ++ nargs = 2; ++ break; ++ case V4SF_FTYPE_V4SF_UINT_INT: ++ case V4SF_FTYPE_V4SF_UINT64_INT: ++ case V2DF_FTYPE_V2DF_UINT64_INT: ++ case V4SF_FTYPE_V4SF_INT_INT: ++ case V4SF_FTYPE_V4SF_INT64_INT: ++ case V2DF_FTYPE_V2DF_INT64_INT: ++ case V4SF_FTYPE_V4SF_V4SF_INT: ++ case V2DF_FTYPE_V2DF_V2DF_INT: ++ case V4SF_FTYPE_V4SF_V2DF_INT: ++ case V2DF_FTYPE_V2DF_V4SF_INT: ++ nargs = 3; ++ break; ++ case V8SF_FTYPE_V8DF_V8SF_QI_INT: ++ case V8DF_FTYPE_V8DF_V8DF_QI_INT: ++ case V8SI_FTYPE_V8DF_V8SI_QI_INT: ++ case V8DI_FTYPE_V8DF_V8DI_QI_INT: ++ case V8SF_FTYPE_V8DI_V8SF_QI_INT: ++ case V8DF_FTYPE_V8DI_V8DF_QI_INT: ++ case V16SF_FTYPE_V16SF_V16SF_HI_INT: ++ case V8DI_FTYPE_V8SF_V8DI_QI_INT: ++ case V16SF_FTYPE_V16SI_V16SF_HI_INT: ++ case V16SI_FTYPE_V16SF_V16SI_HI_INT: ++ case V8DF_FTYPE_V8SF_V8DF_QI_INT: ++ case V16SF_FTYPE_V16HI_V16SF_HI_INT: ++ case V2DF_FTYPE_V2DF_V2DF_V2DF_INT: ++ case V4SF_FTYPE_V4SF_V4SF_V4SF_INT: ++ nargs = 4; ++ break; ++ case V4SF_FTYPE_V4SF_V4SF_INT_INT: ++ case V2DF_FTYPE_V2DF_V2DF_INT_INT: ++ nargs_constant = 2; ++ nargs = 4; ++ break; ++ case INT_FTYPE_V4SF_V4SF_INT_INT: ++ case INT_FTYPE_V2DF_V2DF_INT_INT: ++ return ix86_expand_sse_comi_round (d, exp, target); ++ case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT: ++ case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT: ++ case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT: ++ case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT: ++ case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT: ++ case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT: ++ case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT: ++ case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT: ++ nargs = 5; ++ break; ++ case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT: ++ case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT: ++ nargs_constant = 4; ++ nargs = 5; ++ break; ++ case UQI_FTYPE_V8DF_V8DF_INT_UQI_INT: ++ case UQI_FTYPE_V2DF_V2DF_INT_UQI_INT: ++ case UHI_FTYPE_V16SF_V16SF_INT_UHI_INT: ++ case UQI_FTYPE_V4SF_V4SF_INT_UQI_INT: ++ nargs_constant = 3; ++ nargs = 5; ++ break; ++ case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT: ++ case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT: ++ case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT: ++ case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT: ++ case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT: ++ case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT: ++ nargs = 6; ++ nargs_constant = 4; ++ break; ++ case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT: ++ case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT: ++ case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT: ++ case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT: ++ nargs = 6; ++ nargs_constant = 3; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ gcc_assert (nargs <= ARRAY_SIZE (args)); ++ ++ if (optimize ++ || target == 0 ++ || GET_MODE (target) != tmode ++ || !insn_p->operand[0].predicate (target, tmode)) ++ target = gen_reg_rtx (tmode); ++ ++ for (i = 0; i < nargs; i++) ++ { ++ tree arg = CALL_EXPR_ARG (exp, i); ++ rtx op = expand_normal (arg); ++ machine_mode mode = insn_p->operand[i + 1].mode; ++ bool match = insn_p->operand[i + 1].predicate (op, mode); ++ ++ if (i == nargs - nargs_constant) ++ { ++ if (!match) ++ { ++ switch (icode) ++ { ++ case CODE_FOR_avx512f_getmantv8df_mask_round: ++ case CODE_FOR_avx512f_getmantv16sf_mask_round: ++ case CODE_FOR_avx512f_vgetmantv2df_round: ++ case CODE_FOR_avx512f_vgetmantv2df_mask_round: ++ case CODE_FOR_avx512f_vgetmantv4sf_round: ++ case CODE_FOR_avx512f_vgetmantv4sf_mask_round: ++ error ("the immediate argument must be a 4-bit immediate"); ++ return const0_rtx; ++ case CODE_FOR_avx512f_cmpv8df3_mask_round: ++ case CODE_FOR_avx512f_cmpv16sf3_mask_round: ++ case CODE_FOR_avx512f_vmcmpv2df3_mask_round: ++ case CODE_FOR_avx512f_vmcmpv4sf3_mask_round: ++ error ("the immediate argument must be a 5-bit immediate"); ++ return const0_rtx; ++ default: ++ error ("the immediate argument must be an 8-bit immediate"); ++ return const0_rtx; ++ } ++ } ++ } ++ else if (i == nargs-1) ++ { ++ if (!insn_p->operand[nargs].predicate (op, SImode)) ++ { ++ error ("incorrect rounding operand"); ++ return const0_rtx; ++ } ++ ++ /* If there is no rounding use normal version of the pattern. */ ++ if (INTVAL (op) == NO_ROUND) ++ redundant_embed_rnd = 1; ++ } ++ else ++ { ++ if (VECTOR_MODE_P (mode)) ++ op = safe_vector_operand (op, mode); ++ ++ op = fixup_modeless_constant (op, mode); ++ ++ if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode) ++ { ++ if (optimize || !match) ++ op = copy_to_mode_reg (mode, op); ++ } ++ else ++ { ++ op = copy_to_reg (op); ++ op = lowpart_subreg (mode, op, GET_MODE (op)); ++ } ++ } ++ ++ args[i].op = op; ++ args[i].mode = mode; ++ } ++ ++ switch (nargs) ++ { ++ case 1: ++ pat = GEN_FCN (icode) (target, args[0].op); ++ break; ++ case 2: ++ pat = GEN_FCN (icode) (target, args[0].op, args[1].op); ++ break; ++ case 3: ++ pat = GEN_FCN (icode) (target, args[0].op, args[1].op, ++ args[2].op); ++ break; ++ case 4: ++ pat = GEN_FCN (icode) (target, args[0].op, args[1].op, ++ args[2].op, args[3].op); ++ break; ++ case 5: ++ pat = GEN_FCN (icode) (target, args[0].op, args[1].op, ++ args[2].op, args[3].op, args[4].op); ++ break; ++ case 6: ++ pat = GEN_FCN (icode) (target, args[0].op, args[1].op, ++ args[2].op, args[3].op, args[4].op, ++ args[5].op); ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ ++ if (!pat) ++ return 0; ++ ++ if (redundant_embed_rnd) ++ pat = ix86_erase_embedded_rounding (pat); ++ ++ emit_insn (pat); ++ return target; ++} ++ ++/* Subroutine of ix86_expand_builtin to take care of special insns ++ with variable number of operands. */ ++ ++static rtx ++ix86_expand_special_args_builtin (const struct builtin_description *d, ++ tree exp, rtx target) ++{ ++ tree arg; ++ rtx pat, op; ++ unsigned int i, nargs, arg_adjust, memory; ++ bool aligned_mem = false; ++ struct ++ { ++ rtx op; ++ machine_mode mode; ++ } args[3]; ++ enum insn_code icode = d->icode; ++ bool last_arg_constant = false; ++ const struct insn_data_d *insn_p = &insn_data[icode]; ++ machine_mode tmode = insn_p->operand[0].mode; ++ enum { load, store } klass; ++ ++ switch ((enum ix86_builtin_func_type) d->flag) ++ { ++ case VOID_FTYPE_VOID: ++ emit_insn (GEN_FCN (icode) (target)); ++ return 0; ++ case VOID_FTYPE_UINT64: ++ case VOID_FTYPE_UNSIGNED: ++ nargs = 0; ++ klass = store; ++ memory = 0; ++ break; ++ ++ case INT_FTYPE_VOID: ++ case USHORT_FTYPE_VOID: ++ case UINT64_FTYPE_VOID: ++ case UINT_FTYPE_VOID: ++ case UNSIGNED_FTYPE_VOID: ++ nargs = 0; ++ klass = load; ++ memory = 0; ++ break; ++ case UINT64_FTYPE_PUNSIGNED: ++ case V2DI_FTYPE_PV2DI: ++ case V4DI_FTYPE_PV4DI: ++ case V32QI_FTYPE_PCCHAR: ++ case V16QI_FTYPE_PCCHAR: ++ case V8SF_FTYPE_PCV4SF: ++ case V8SF_FTYPE_PCFLOAT: ++ case V4SF_FTYPE_PCFLOAT: ++ case V4DF_FTYPE_PCV2DF: ++ case V4DF_FTYPE_PCDOUBLE: ++ case V2DF_FTYPE_PCDOUBLE: ++ case VOID_FTYPE_PVOID: ++ case V8DI_FTYPE_PV8DI: ++ nargs = 1; ++ klass = load; ++ memory = 0; ++ switch (icode) ++ { ++ case CODE_FOR_sse4_1_movntdqa: ++ case CODE_FOR_avx2_movntdqa: ++ case CODE_FOR_avx512f_movntdqa: ++ aligned_mem = true; ++ break; ++ default: ++ break; ++ } ++ break; ++ case VOID_FTYPE_PV2SF_V4SF: ++ case VOID_FTYPE_PV8DI_V8DI: ++ case VOID_FTYPE_PV4DI_V4DI: ++ case VOID_FTYPE_PV2DI_V2DI: ++ case VOID_FTYPE_PCHAR_V32QI: ++ case VOID_FTYPE_PCHAR_V16QI: ++ case VOID_FTYPE_PFLOAT_V16SF: ++ case VOID_FTYPE_PFLOAT_V8SF: ++ case VOID_FTYPE_PFLOAT_V4SF: ++ case VOID_FTYPE_PDOUBLE_V8DF: ++ case VOID_FTYPE_PDOUBLE_V4DF: ++ case VOID_FTYPE_PDOUBLE_V2DF: ++ case VOID_FTYPE_PLONGLONG_LONGLONG: ++ case VOID_FTYPE_PULONGLONG_ULONGLONG: ++ case VOID_FTYPE_PUNSIGNED_UNSIGNED: ++ case VOID_FTYPE_PINT_INT: ++ nargs = 1; ++ klass = store; ++ /* Reserve memory operand for target. */ ++ memory = ARRAY_SIZE (args); ++ switch (icode) ++ { ++ /* These builtins and instructions require the memory ++ to be properly aligned. */ ++ case CODE_FOR_avx_movntv4di: ++ case CODE_FOR_sse2_movntv2di: ++ case CODE_FOR_avx_movntv8sf: ++ case CODE_FOR_sse_movntv4sf: ++ case CODE_FOR_sse4a_vmmovntv4sf: ++ case CODE_FOR_avx_movntv4df: ++ case CODE_FOR_sse2_movntv2df: ++ case CODE_FOR_sse4a_vmmovntv2df: ++ case CODE_FOR_sse2_movntidi: ++ case CODE_FOR_sse_movntq: ++ case CODE_FOR_sse2_movntisi: ++ case CODE_FOR_avx512f_movntv16sf: ++ case CODE_FOR_avx512f_movntv8df: ++ case CODE_FOR_avx512f_movntv8di: ++ aligned_mem = true; ++ break; ++ default: ++ break; ++ } ++ break; ++ case VOID_FTYPE_PVOID_PCVOID: ++ nargs = 1; ++ klass = store; ++ memory = 0; ++ ++ break; ++ case V4SF_FTYPE_V4SF_PCV2SF: ++ case V2DF_FTYPE_V2DF_PCDOUBLE: ++ nargs = 2; ++ klass = load; ++ memory = 1; ++ break; ++ case V8SF_FTYPE_PCV8SF_V8SI: ++ case V4DF_FTYPE_PCV4DF_V4DI: ++ case V4SF_FTYPE_PCV4SF_V4SI: ++ case V2DF_FTYPE_PCV2DF_V2DI: ++ case V8SI_FTYPE_PCV8SI_V8SI: ++ case V4DI_FTYPE_PCV4DI_V4DI: ++ case V4SI_FTYPE_PCV4SI_V4SI: ++ case V2DI_FTYPE_PCV2DI_V2DI: ++ case VOID_FTYPE_INT_INT64: ++ nargs = 2; ++ klass = load; ++ memory = 0; ++ break; ++ case VOID_FTYPE_PV8DF_V8DF_UQI: ++ case VOID_FTYPE_PV4DF_V4DF_UQI: ++ case VOID_FTYPE_PV2DF_V2DF_UQI: ++ case VOID_FTYPE_PV16SF_V16SF_UHI: ++ case VOID_FTYPE_PV8SF_V8SF_UQI: ++ case VOID_FTYPE_PV4SF_V4SF_UQI: ++ case VOID_FTYPE_PV8DI_V8DI_UQI: ++ case VOID_FTYPE_PV4DI_V4DI_UQI: ++ case VOID_FTYPE_PV2DI_V2DI_UQI: ++ case VOID_FTYPE_PV16SI_V16SI_UHI: ++ case VOID_FTYPE_PV8SI_V8SI_UQI: ++ case VOID_FTYPE_PV4SI_V4SI_UQI: ++ case VOID_FTYPE_PV64QI_V64QI_UDI: ++ case VOID_FTYPE_PV32HI_V32HI_USI: ++ case VOID_FTYPE_PV32QI_V32QI_USI: ++ case VOID_FTYPE_PV16QI_V16QI_UHI: ++ case VOID_FTYPE_PV16HI_V16HI_UHI: ++ case VOID_FTYPE_PV8HI_V8HI_UQI: ++ switch (icode) ++ { ++ /* These builtins and instructions require the memory ++ to be properly aligned. */ ++ case CODE_FOR_avx512f_storev16sf_mask: ++ case CODE_FOR_avx512f_storev16si_mask: ++ case CODE_FOR_avx512f_storev8df_mask: ++ case CODE_FOR_avx512f_storev8di_mask: ++ case CODE_FOR_avx512vl_storev8sf_mask: ++ case CODE_FOR_avx512vl_storev8si_mask: ++ case CODE_FOR_avx512vl_storev4df_mask: ++ case CODE_FOR_avx512vl_storev4di_mask: ++ case CODE_FOR_avx512vl_storev4sf_mask: ++ case CODE_FOR_avx512vl_storev4si_mask: ++ case CODE_FOR_avx512vl_storev2df_mask: ++ case CODE_FOR_avx512vl_storev2di_mask: ++ aligned_mem = true; ++ break; ++ default: ++ break; ++ } ++ /* FALLTHRU */ ++ case VOID_FTYPE_PV8SF_V8SI_V8SF: ++ case VOID_FTYPE_PV4DF_V4DI_V4DF: ++ case VOID_FTYPE_PV4SF_V4SI_V4SF: ++ case VOID_FTYPE_PV2DF_V2DI_V2DF: ++ case VOID_FTYPE_PV8SI_V8SI_V8SI: ++ case VOID_FTYPE_PV4DI_V4DI_V4DI: ++ case VOID_FTYPE_PV4SI_V4SI_V4SI: ++ case VOID_FTYPE_PV2DI_V2DI_V2DI: ++ case VOID_FTYPE_PV8SI_V8DI_UQI: ++ case VOID_FTYPE_PV8HI_V8DI_UQI: ++ case VOID_FTYPE_PV16HI_V16SI_UHI: ++ case VOID_FTYPE_PV16QI_V8DI_UQI: ++ case VOID_FTYPE_PV16QI_V16SI_UHI: ++ case VOID_FTYPE_PV4SI_V4DI_UQI: ++ case VOID_FTYPE_PV4SI_V2DI_UQI: ++ case VOID_FTYPE_PV8HI_V4DI_UQI: ++ case VOID_FTYPE_PV8HI_V2DI_UQI: ++ case VOID_FTYPE_PV8HI_V8SI_UQI: ++ case VOID_FTYPE_PV8HI_V4SI_UQI: ++ case VOID_FTYPE_PV16QI_V4DI_UQI: ++ case VOID_FTYPE_PV16QI_V2DI_UQI: ++ case VOID_FTYPE_PV16QI_V8SI_UQI: ++ case VOID_FTYPE_PV16QI_V4SI_UQI: ++ case VOID_FTYPE_PCHAR_V64QI_UDI: ++ case VOID_FTYPE_PCHAR_V32QI_USI: ++ case VOID_FTYPE_PCHAR_V16QI_UHI: ++ case VOID_FTYPE_PSHORT_V32HI_USI: ++ case VOID_FTYPE_PSHORT_V16HI_UHI: ++ case VOID_FTYPE_PSHORT_V8HI_UQI: ++ case VOID_FTYPE_PINT_V16SI_UHI: ++ case VOID_FTYPE_PINT_V8SI_UQI: ++ case VOID_FTYPE_PINT_V4SI_UQI: ++ case VOID_FTYPE_PINT64_V8DI_UQI: ++ case VOID_FTYPE_PINT64_V4DI_UQI: ++ case VOID_FTYPE_PINT64_V2DI_UQI: ++ case VOID_FTYPE_PDOUBLE_V8DF_UQI: ++ case VOID_FTYPE_PDOUBLE_V4DF_UQI: ++ case VOID_FTYPE_PDOUBLE_V2DF_UQI: ++ case VOID_FTYPE_PFLOAT_V16SF_UHI: ++ case VOID_FTYPE_PFLOAT_V8SF_UQI: ++ case VOID_FTYPE_PFLOAT_V4SF_UQI: ++ case VOID_FTYPE_PV32QI_V32HI_USI: ++ case VOID_FTYPE_PV16QI_V16HI_UHI: ++ case VOID_FTYPE_PV8QI_V8HI_UQI: ++ nargs = 2; ++ klass = store; ++ /* Reserve memory operand for target. */ ++ memory = ARRAY_SIZE (args); ++ break; ++ case V4SF_FTYPE_PCV4SF_V4SF_UQI: ++ case V8SF_FTYPE_PCV8SF_V8SF_UQI: ++ case V16SF_FTYPE_PCV16SF_V16SF_UHI: ++ case V4SI_FTYPE_PCV4SI_V4SI_UQI: ++ case V8SI_FTYPE_PCV8SI_V8SI_UQI: ++ case V16SI_FTYPE_PCV16SI_V16SI_UHI: ++ case V2DF_FTYPE_PCV2DF_V2DF_UQI: ++ case V4DF_FTYPE_PCV4DF_V4DF_UQI: ++ case V8DF_FTYPE_PCV8DF_V8DF_UQI: ++ case V2DI_FTYPE_PCV2DI_V2DI_UQI: ++ case V4DI_FTYPE_PCV4DI_V4DI_UQI: ++ case V8DI_FTYPE_PCV8DI_V8DI_UQI: ++ case V64QI_FTYPE_PCV64QI_V64QI_UDI: ++ case V32HI_FTYPE_PCV32HI_V32HI_USI: ++ case V32QI_FTYPE_PCV32QI_V32QI_USI: ++ case V16QI_FTYPE_PCV16QI_V16QI_UHI: ++ case V16HI_FTYPE_PCV16HI_V16HI_UHI: ++ case V8HI_FTYPE_PCV8HI_V8HI_UQI: ++ switch (icode) ++ { ++ /* These builtins and instructions require the memory ++ to be properly aligned. */ ++ case CODE_FOR_avx512f_loadv16sf_mask: ++ case CODE_FOR_avx512f_loadv16si_mask: ++ case CODE_FOR_avx512f_loadv8df_mask: ++ case CODE_FOR_avx512f_loadv8di_mask: ++ case CODE_FOR_avx512vl_loadv8sf_mask: ++ case CODE_FOR_avx512vl_loadv8si_mask: ++ case CODE_FOR_avx512vl_loadv4df_mask: ++ case CODE_FOR_avx512vl_loadv4di_mask: ++ case CODE_FOR_avx512vl_loadv4sf_mask: ++ case CODE_FOR_avx512vl_loadv4si_mask: ++ case CODE_FOR_avx512vl_loadv2df_mask: ++ case CODE_FOR_avx512vl_loadv2di_mask: ++ case CODE_FOR_avx512bw_loadv64qi_mask: ++ case CODE_FOR_avx512vl_loadv32qi_mask: ++ case CODE_FOR_avx512vl_loadv16qi_mask: ++ case CODE_FOR_avx512bw_loadv32hi_mask: ++ case CODE_FOR_avx512vl_loadv16hi_mask: ++ case CODE_FOR_avx512vl_loadv8hi_mask: ++ aligned_mem = true; ++ break; ++ default: ++ break; ++ } ++ /* FALLTHRU */ ++ case V64QI_FTYPE_PCCHAR_V64QI_UDI: ++ case V32QI_FTYPE_PCCHAR_V32QI_USI: ++ case V16QI_FTYPE_PCCHAR_V16QI_UHI: ++ case V32HI_FTYPE_PCSHORT_V32HI_USI: ++ case V16HI_FTYPE_PCSHORT_V16HI_UHI: ++ case V8HI_FTYPE_PCSHORT_V8HI_UQI: ++ case V16SI_FTYPE_PCINT_V16SI_UHI: ++ case V8SI_FTYPE_PCINT_V8SI_UQI: ++ case V4SI_FTYPE_PCINT_V4SI_UQI: ++ case V8DI_FTYPE_PCINT64_V8DI_UQI: ++ case V4DI_FTYPE_PCINT64_V4DI_UQI: ++ case V2DI_FTYPE_PCINT64_V2DI_UQI: ++ case V8DF_FTYPE_PCDOUBLE_V8DF_UQI: ++ case V4DF_FTYPE_PCDOUBLE_V4DF_UQI: ++ case V2DF_FTYPE_PCDOUBLE_V2DF_UQI: ++ case V16SF_FTYPE_PCFLOAT_V16SF_UHI: ++ case V8SF_FTYPE_PCFLOAT_V8SF_UQI: ++ case V4SF_FTYPE_PCFLOAT_V4SF_UQI: ++ nargs = 3; ++ klass = load; ++ memory = 0; ++ break; ++ case VOID_FTYPE_UINT_UINT_UINT: ++ case VOID_FTYPE_UINT64_UINT_UINT: ++ case UCHAR_FTYPE_UINT_UINT_UINT: ++ case UCHAR_FTYPE_UINT64_UINT_UINT: ++ nargs = 3; ++ klass = load; ++ memory = ARRAY_SIZE (args); ++ last_arg_constant = true; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ ++ gcc_assert (nargs <= ARRAY_SIZE (args)); ++ ++ if (klass == store) ++ { ++ arg = CALL_EXPR_ARG (exp, 0); ++ op = expand_normal (arg); ++ gcc_assert (target == 0); ++ if (memory) ++ { ++ op = ix86_zero_extend_to_Pmode (op); ++ target = gen_rtx_MEM (tmode, op); ++ /* target at this point has just BITS_PER_UNIT MEM_ALIGN ++ on it. Try to improve it using get_pointer_alignment, ++ and if the special builtin is one that requires strict ++ mode alignment, also from it's GET_MODE_ALIGNMENT. ++ Failure to do so could lead to ix86_legitimate_combined_insn ++ rejecting all changes to such insns. */ ++ unsigned int align = get_pointer_alignment (arg); ++ if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode)) ++ align = GET_MODE_ALIGNMENT (tmode); ++ if (MEM_ALIGN (target) < align) ++ set_mem_align (target, align); ++ } ++ else ++ target = force_reg (tmode, op); ++ arg_adjust = 1; ++ } ++ else ++ { ++ arg_adjust = 0; ++ if (optimize ++ || target == 0 ++ || !register_operand (target, tmode) ++ || GET_MODE (target) != tmode) ++ target = gen_reg_rtx (tmode); ++ } ++ ++ for (i = 0; i < nargs; i++) ++ { ++ machine_mode mode = insn_p->operand[i + 1].mode; ++ bool match; ++ ++ arg = CALL_EXPR_ARG (exp, i + arg_adjust); ++ op = expand_normal (arg); ++ match = insn_p->operand[i + 1].predicate (op, mode); ++ ++ if (last_arg_constant && (i + 1) == nargs) ++ { ++ if (!match) ++ { ++ if (icode == CODE_FOR_lwp_lwpvalsi3 ++ || icode == CODE_FOR_lwp_lwpinssi3 ++ || icode == CODE_FOR_lwp_lwpvaldi3 ++ || icode == CODE_FOR_lwp_lwpinsdi3) ++ error ("the last argument must be a 32-bit immediate"); ++ else ++ error ("the last argument must be an 8-bit immediate"); ++ return const0_rtx; ++ } ++ } ++ else ++ { ++ if (i == memory) ++ { ++ /* This must be the memory operand. */ ++ op = ix86_zero_extend_to_Pmode (op); ++ op = gen_rtx_MEM (mode, op); ++ /* op at this point has just BITS_PER_UNIT MEM_ALIGN ++ on it. Try to improve it using get_pointer_alignment, ++ and if the special builtin is one that requires strict ++ mode alignment, also from it's GET_MODE_ALIGNMENT. ++ Failure to do so could lead to ix86_legitimate_combined_insn ++ rejecting all changes to such insns. */ ++ unsigned int align = get_pointer_alignment (arg); ++ if (aligned_mem && align < GET_MODE_ALIGNMENT (mode)) ++ align = GET_MODE_ALIGNMENT (mode); ++ if (MEM_ALIGN (op) < align) ++ set_mem_align (op, align); ++ } ++ else ++ { ++ /* This must be register. */ ++ if (VECTOR_MODE_P (mode)) ++ op = safe_vector_operand (op, mode); ++ ++ op = fixup_modeless_constant (op, mode); ++ ++ if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode) ++ op = copy_to_mode_reg (mode, op); ++ else ++ { ++ op = copy_to_reg (op); ++ op = lowpart_subreg (mode, op, GET_MODE (op)); ++ } ++ } ++ } ++ ++ args[i].op = op; ++ args[i].mode = mode; ++ } ++ ++ switch (nargs) ++ { ++ case 0: ++ pat = GEN_FCN (icode) (target); ++ break; ++ case 1: ++ pat = GEN_FCN (icode) (target, args[0].op); ++ break; ++ case 2: ++ pat = GEN_FCN (icode) (target, args[0].op, args[1].op); ++ break; ++ case 3: ++ pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op); ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ ++ if (! pat) ++ return 0; ++ emit_insn (pat); ++ return klass == store ? 0 : target; ++} ++ ++/* Return the integer constant in ARG. Constrain it to be in the range ++ of the subparts of VEC_TYPE; issue an error if not. */ ++ ++static int ++get_element_number (tree vec_type, tree arg) ++{ ++ unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1; ++ ++ if (!tree_fits_uhwi_p (arg) ++ || (elt = tree_to_uhwi (arg), elt > max)) ++ { ++ error ("selector must be an integer constant in the range " ++ "[0, %wi]", max); ++ return 0; ++ } ++ ++ return elt; ++} ++ ++/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around ++ ix86_expand_vector_init. We DO have language-level syntax for this, in ++ the form of (type){ init-list }. Except that since we can't place emms ++ instructions from inside the compiler, we can't allow the use of MMX ++ registers unless the user explicitly asks for it. So we do *not* define ++ vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead ++ we have builtins invoked by mmintrin.h that gives us license to emit ++ these sorts of instructions. */ ++ ++static rtx ++ix86_expand_vec_init_builtin (tree type, tree exp, rtx target) ++{ ++ machine_mode tmode = TYPE_MODE (type); ++ machine_mode inner_mode = GET_MODE_INNER (tmode); ++ int i, n_elt = GET_MODE_NUNITS (tmode); ++ rtvec v = rtvec_alloc (n_elt); ++ ++ gcc_assert (VECTOR_MODE_P (tmode)); ++ gcc_assert (call_expr_nargs (exp) == n_elt); ++ ++ for (i = 0; i < n_elt; ++i) ++ { ++ rtx x = expand_normal (CALL_EXPR_ARG (exp, i)); ++ RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x); ++ } ++ ++ if (!target || !register_operand (target, tmode)) ++ target = gen_reg_rtx (tmode); ++ ++ ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v)); ++ return target; ++} ++ ++/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around ++ ix86_expand_vector_extract. They would be redundant (for non-MMX) if we ++ had a language-level syntax for referencing vector elements. */ ++ ++static rtx ++ix86_expand_vec_ext_builtin (tree exp, rtx target) ++{ ++ machine_mode tmode, mode0; ++ tree arg0, arg1; ++ int elt; ++ rtx op0; ++ ++ arg0 = CALL_EXPR_ARG (exp, 0); ++ arg1 = CALL_EXPR_ARG (exp, 1); ++ ++ op0 = expand_normal (arg0); ++ elt = get_element_number (TREE_TYPE (arg0), arg1); ++ ++ tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0))); ++ mode0 = TYPE_MODE (TREE_TYPE (arg0)); ++ gcc_assert (VECTOR_MODE_P (mode0)); ++ ++ op0 = force_reg (mode0, op0); ++ ++ if (optimize || !target || !register_operand (target, tmode)) ++ target = gen_reg_rtx (tmode); ++ ++ ix86_expand_vector_extract (true, target, op0, elt); ++ ++ return target; ++} ++ ++/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around ++ ix86_expand_vector_set. They would be redundant (for non-MMX) if we had ++ a language-level syntax for referencing vector elements. */ ++ ++static rtx ++ix86_expand_vec_set_builtin (tree exp) ++{ ++ machine_mode tmode, mode1; ++ tree arg0, arg1, arg2; ++ int elt; ++ rtx op0, op1, target; ++ ++ arg0 = CALL_EXPR_ARG (exp, 0); ++ arg1 = CALL_EXPR_ARG (exp, 1); ++ arg2 = CALL_EXPR_ARG (exp, 2); ++ ++ tmode = TYPE_MODE (TREE_TYPE (arg0)); ++ mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0))); ++ gcc_assert (VECTOR_MODE_P (tmode)); ++ ++ op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL); ++ op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL); ++ elt = get_element_number (TREE_TYPE (arg0), arg2); ++ ++ if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode) ++ op1 = convert_modes (mode1, GET_MODE (op1), op1, true); ++ ++ op0 = force_reg (tmode, op0); ++ op1 = force_reg (mode1, op1); ++ ++ /* OP0 is the source of these builtin functions and shouldn't be ++ modified. Create a copy, use it and return it as target. */ ++ target = gen_reg_rtx (tmode); ++ emit_move_insn (target, op0); ++ ix86_expand_vector_set (true, target, op1, elt); ++ ++ return target; ++} ++ ++/* Expand an expression EXP that calls a built-in function, ++ with result going to TARGET if that's convenient ++ (and in mode MODE if that's convenient). ++ SUBTARGET may be used as the target for computing one of EXP's operands. ++ IGNORE is nonzero if the value is to be ignored. */ ++ ++rtx ++ix86_expand_builtin (tree exp, rtx target, rtx subtarget, ++ machine_mode mode, int ignore) ++{ ++ size_t i; ++ enum insn_code icode, icode2; ++ tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); ++ tree arg0, arg1, arg2, arg3, arg4; ++ rtx op0, op1, op2, op3, op4, pat, pat2, insn; ++ machine_mode mode0, mode1, mode2, mode3, mode4; ++ unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl); ++ ++ /* For CPU builtins that can be folded, fold first and expand the fold. */ ++ switch (fcode) ++ { ++ case IX86_BUILTIN_CPU_INIT: ++ { ++ /* Make it call __cpu_indicator_init in libgcc. */ ++ tree call_expr, fndecl, type; ++ type = build_function_type_list (integer_type_node, NULL_TREE); ++ fndecl = build_fn_decl ("__cpu_indicator_init", type); ++ call_expr = build_call_expr (fndecl, 0); ++ return expand_expr (call_expr, target, mode, EXPAND_NORMAL); ++ } ++ case IX86_BUILTIN_CPU_IS: ++ case IX86_BUILTIN_CPU_SUPPORTS: ++ { ++ tree arg0 = CALL_EXPR_ARG (exp, 0); ++ tree fold_expr = fold_builtin_cpu (fndecl, &arg0); ++ gcc_assert (fold_expr != NULL_TREE); ++ return expand_expr (fold_expr, target, mode, EXPAND_NORMAL); ++ } ++ } ++ ++ HOST_WIDE_INT isa = ix86_isa_flags; ++ HOST_WIDE_INT isa2 = ix86_isa_flags2; ++ HOST_WIDE_INT bisa = ix86_builtins_isa[fcode].isa; ++ HOST_WIDE_INT bisa2 = ix86_builtins_isa[fcode].isa2; ++ /* The general case is we require all the ISAs specified in bisa{,2} ++ to be enabled. ++ The exceptions are: ++ OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A ++ OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 ++ OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4 ++ where for each this pair it is sufficient if either of the ISAs is ++ enabled, plus if it is ored with other options also those others. */ ++ if (((bisa & (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A)) ++ == (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A)) ++ && (isa & (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A)) != 0) ++ isa |= (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A); ++ if (((bisa & (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32)) ++ == (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32)) ++ && (isa & (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32)) != 0) ++ isa |= (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32); ++ if (((bisa & (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4)) ++ == (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4)) ++ && (isa & (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4)) != 0) ++ isa |= (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4); ++ if ((bisa & isa) != bisa || (bisa2 & isa2) != bisa2) ++ { ++ bool add_abi_p = bisa & OPTION_MASK_ISA_64BIT; ++ if (TARGET_ABI_X32) ++ bisa |= OPTION_MASK_ABI_X32; ++ else ++ bisa |= OPTION_MASK_ABI_64; ++ char *opts = ix86_target_string (bisa, bisa2, 0, 0, NULL, NULL, ++ (enum fpmath_unit) 0, false, add_abi_p); ++ if (!opts) ++ error ("%qE needs unknown isa option", fndecl); ++ else ++ { ++ gcc_assert (opts != NULL); ++ error ("%qE needs isa option %s", fndecl, opts); ++ free (opts); ++ } ++ return expand_call (exp, target, ignore); ++ } ++ ++ switch (fcode) ++ { ++ case IX86_BUILTIN_MASKMOVQ: ++ case IX86_BUILTIN_MASKMOVDQU: ++ icode = (fcode == IX86_BUILTIN_MASKMOVQ ++ ? CODE_FOR_mmx_maskmovq ++ : CODE_FOR_sse2_maskmovdqu); ++ /* Note the arg order is different from the operand order. */ ++ arg1 = CALL_EXPR_ARG (exp, 0); ++ arg2 = CALL_EXPR_ARG (exp, 1); ++ arg0 = CALL_EXPR_ARG (exp, 2); ++ op0 = expand_normal (arg0); ++ op1 = expand_normal (arg1); ++ op2 = expand_normal (arg2); ++ mode0 = insn_data[icode].operand[0].mode; ++ mode1 = insn_data[icode].operand[1].mode; ++ mode2 = insn_data[icode].operand[2].mode; ++ ++ op0 = ix86_zero_extend_to_Pmode (op0); ++ op0 = gen_rtx_MEM (mode1, op0); ++ ++ if (!insn_data[icode].operand[0].predicate (op0, mode0)) ++ op0 = copy_to_mode_reg (mode0, op0); ++ if (!insn_data[icode].operand[1].predicate (op1, mode1)) ++ op1 = copy_to_mode_reg (mode1, op1); ++ if (!insn_data[icode].operand[2].predicate (op2, mode2)) ++ op2 = copy_to_mode_reg (mode2, op2); ++ pat = GEN_FCN (icode) (op0, op1, op2); ++ if (! pat) ++ return 0; ++ emit_insn (pat); ++ return 0; ++ ++ case IX86_BUILTIN_LDMXCSR: ++ op0 = expand_normal (CALL_EXPR_ARG (exp, 0)); ++ target = assign_386_stack_local (SImode, SLOT_TEMP); ++ emit_move_insn (target, op0); ++ emit_insn (gen_sse_ldmxcsr (target)); ++ return 0; ++ ++ case IX86_BUILTIN_STMXCSR: ++ target = assign_386_stack_local (SImode, SLOT_TEMP); ++ emit_insn (gen_sse_stmxcsr (target)); ++ return copy_to_mode_reg (SImode, target); ++ ++ case IX86_BUILTIN_CLFLUSH: ++ arg0 = CALL_EXPR_ARG (exp, 0); ++ op0 = expand_normal (arg0); ++ icode = CODE_FOR_sse2_clflush; ++ if (!insn_data[icode].operand[0].predicate (op0, Pmode)) ++ op0 = ix86_zero_extend_to_Pmode (op0); ++ ++ emit_insn (gen_sse2_clflush (op0)); ++ return 0; ++ ++ case IX86_BUILTIN_CLWB: ++ arg0 = CALL_EXPR_ARG (exp, 0); ++ op0 = expand_normal (arg0); ++ icode = CODE_FOR_clwb; ++ if (!insn_data[icode].operand[0].predicate (op0, Pmode)) ++ op0 = ix86_zero_extend_to_Pmode (op0); ++ ++ emit_insn (gen_clwb (op0)); ++ return 0; ++ ++ case IX86_BUILTIN_CLFLUSHOPT: ++ arg0 = CALL_EXPR_ARG (exp, 0); ++ op0 = expand_normal (arg0); ++ icode = CODE_FOR_clflushopt; ++ if (!insn_data[icode].operand[0].predicate (op0, Pmode)) ++ op0 = ix86_zero_extend_to_Pmode (op0); ++ ++ emit_insn (gen_clflushopt (op0)); ++ return 0; ++ ++ case IX86_BUILTIN_MONITOR: ++ case IX86_BUILTIN_MONITORX: ++ arg0 = CALL_EXPR_ARG (exp, 0); ++ arg1 = CALL_EXPR_ARG (exp, 1); ++ arg2 = CALL_EXPR_ARG (exp, 2); ++ op0 = expand_normal (arg0); ++ op1 = expand_normal (arg1); ++ op2 = expand_normal (arg2); ++ if (!REG_P (op0)) ++ op0 = ix86_zero_extend_to_Pmode (op0); ++ if (!REG_P (op1)) ++ op1 = copy_to_mode_reg (SImode, op1); ++ if (!REG_P (op2)) ++ op2 = copy_to_mode_reg (SImode, op2); ++ ++ emit_insn (fcode == IX86_BUILTIN_MONITOR ++ ? ix86_gen_monitor (op0, op1, op2) ++ : ix86_gen_monitorx (op0, op1, op2)); ++ return 0; ++ ++ case IX86_BUILTIN_MWAIT: ++ arg0 = CALL_EXPR_ARG (exp, 0); ++ arg1 = CALL_EXPR_ARG (exp, 1); ++ op0 = expand_normal (arg0); ++ op1 = expand_normal (arg1); ++ if (!REG_P (op0)) ++ op0 = copy_to_mode_reg (SImode, op0); ++ if (!REG_P (op1)) ++ op1 = copy_to_mode_reg (SImode, op1); ++ emit_insn (gen_sse3_mwait (op0, op1)); ++ return 0; ++ ++ case IX86_BUILTIN_MWAITX: ++ arg0 = CALL_EXPR_ARG (exp, 0); ++ arg1 = CALL_EXPR_ARG (exp, 1); ++ arg2 = CALL_EXPR_ARG (exp, 2); ++ op0 = expand_normal (arg0); ++ op1 = expand_normal (arg1); ++ op2 = expand_normal (arg2); ++ if (!REG_P (op0)) ++ op0 = copy_to_mode_reg (SImode, op0); ++ if (!REG_P (op1)) ++ op1 = copy_to_mode_reg (SImode, op1); ++ if (!REG_P (op2)) ++ op2 = copy_to_mode_reg (SImode, op2); ++ emit_insn (gen_mwaitx (op0, op1, op2)); ++ return 0; ++ ++ case IX86_BUILTIN_UMONITOR: ++ arg0 = CALL_EXPR_ARG (exp, 0); ++ op0 = expand_normal (arg0); ++ ++ op0 = ix86_zero_extend_to_Pmode (op0); ++ ++ insn = (TARGET_64BIT ++ ? gen_umonitor_di (op0) ++ : gen_umonitor_si (op0)); ++ ++ emit_insn (insn); ++ return 0; ++ ++ case IX86_BUILTIN_UMWAIT: ++ case IX86_BUILTIN_TPAUSE: ++ arg0 = CALL_EXPR_ARG (exp, 0); ++ arg1 = CALL_EXPR_ARG (exp, 1); ++ op0 = expand_normal (arg0); ++ op1 = expand_normal (arg1); ++ ++ if (!REG_P (op0)) ++ op0 = copy_to_mode_reg (SImode, op0); ++ ++ op1 = force_reg (DImode, op1); ++ ++ if (TARGET_64BIT) ++ { ++ op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32), ++ NULL, 1, OPTAB_DIRECT); ++ switch (fcode) ++ { ++ case IX86_BUILTIN_UMWAIT: ++ icode = CODE_FOR_umwait_rex64; ++ break; ++ case IX86_BUILTIN_TPAUSE: ++ icode = CODE_FOR_tpause_rex64; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ ++ op2 = gen_lowpart (SImode, op2); ++ op1 = gen_lowpart (SImode, op1); ++ pat = GEN_FCN (icode) (op0, op1, op2); ++ } ++ else ++ { ++ switch (fcode) ++ { ++ case IX86_BUILTIN_UMWAIT: ++ icode = CODE_FOR_umwait; ++ break; ++ case IX86_BUILTIN_TPAUSE: ++ icode = CODE_FOR_tpause; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ pat = GEN_FCN (icode) (op0, op1); ++ } ++ ++ if (!pat) ++ return 0; ++ ++ emit_insn (pat); ++ ++ if (target == 0 ++ || !register_operand (target, QImode)) ++ target = gen_reg_rtx (QImode); ++ ++ pat = gen_rtx_EQ (QImode, gen_rtx_REG (CCCmode, FLAGS_REG), ++ const0_rtx); ++ emit_insn (gen_rtx_SET (target, pat)); ++ ++ return target; ++ ++ case IX86_BUILTIN_CLZERO: ++ arg0 = CALL_EXPR_ARG (exp, 0); ++ op0 = expand_normal (arg0); ++ if (!REG_P (op0)) ++ op0 = ix86_zero_extend_to_Pmode (op0); ++ emit_insn (ix86_gen_clzero (op0)); ++ return 0; ++ ++ case IX86_BUILTIN_CLDEMOTE: ++ arg0 = CALL_EXPR_ARG (exp, 0); ++ op0 = expand_normal (arg0); ++ icode = CODE_FOR_cldemote; ++ if (!insn_data[icode].operand[0].predicate (op0, Pmode)) ++ op0 = ix86_zero_extend_to_Pmode (op0); ++ ++ emit_insn (gen_cldemote (op0)); ++ return 0; ++ ++ case IX86_BUILTIN_VEC_INIT_V2SI: ++ case IX86_BUILTIN_VEC_INIT_V4HI: ++ case IX86_BUILTIN_VEC_INIT_V8QI: ++ return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target); ++ ++ case IX86_BUILTIN_VEC_EXT_V2DF: ++ case IX86_BUILTIN_VEC_EXT_V2DI: ++ case IX86_BUILTIN_VEC_EXT_V4SF: ++ case IX86_BUILTIN_VEC_EXT_V4SI: ++ case IX86_BUILTIN_VEC_EXT_V8HI: ++ case IX86_BUILTIN_VEC_EXT_V2SI: ++ case IX86_BUILTIN_VEC_EXT_V4HI: ++ case IX86_BUILTIN_VEC_EXT_V16QI: ++ return ix86_expand_vec_ext_builtin (exp, target); ++ ++ case IX86_BUILTIN_VEC_SET_V2DI: ++ case IX86_BUILTIN_VEC_SET_V4SF: ++ case IX86_BUILTIN_VEC_SET_V4SI: ++ case IX86_BUILTIN_VEC_SET_V8HI: ++ case IX86_BUILTIN_VEC_SET_V4HI: ++ case IX86_BUILTIN_VEC_SET_V16QI: ++ return ix86_expand_vec_set_builtin (exp); ++ ++ case IX86_BUILTIN_NANQ: ++ case IX86_BUILTIN_NANSQ: ++ return expand_call (exp, target, ignore); ++ ++ case IX86_BUILTIN_RDPID: ++ ++ op0 = gen_reg_rtx (word_mode); ++ ++ if (TARGET_64BIT) ++ { ++ insn = gen_rdpid_rex64 (op0); ++ op0 = convert_to_mode (SImode, op0, 1); ++ } ++ else ++ insn = gen_rdpid (op0); ++ ++ emit_insn (insn); ++ ++ if (target == 0 ++ || !register_operand (target, SImode)) ++ target = gen_reg_rtx (SImode); ++ ++ emit_move_insn (target, op0); ++ return target; ++ ++ case IX86_BUILTIN_RDPMC: ++ case IX86_BUILTIN_RDTSC: ++ case IX86_BUILTIN_RDTSCP: ++ case IX86_BUILTIN_XGETBV: ++ ++ op0 = gen_reg_rtx (DImode); ++ op1 = gen_reg_rtx (DImode); ++ ++ if (fcode == IX86_BUILTIN_RDPMC) ++ { ++ arg0 = CALL_EXPR_ARG (exp, 0); ++ op2 = expand_normal (arg0); ++ if (!register_operand (op2, SImode)) ++ op2 = copy_to_mode_reg (SImode, op2); ++ ++ insn = (TARGET_64BIT ++ ? gen_rdpmc_rex64 (op0, op1, op2) ++ : gen_rdpmc (op0, op2)); ++ emit_insn (insn); ++ } ++ else if (fcode == IX86_BUILTIN_XGETBV) ++ { ++ arg0 = CALL_EXPR_ARG (exp, 0); ++ op2 = expand_normal (arg0); ++ if (!register_operand (op2, SImode)) ++ op2 = copy_to_mode_reg (SImode, op2); ++ ++ insn = (TARGET_64BIT ++ ? gen_xgetbv_rex64 (op0, op1, op2) ++ : gen_xgetbv (op0, op2)); ++ emit_insn (insn); ++ } ++ else if (fcode == IX86_BUILTIN_RDTSC) ++ { ++ insn = (TARGET_64BIT ++ ? gen_rdtsc_rex64 (op0, op1) ++ : gen_rdtsc (op0)); ++ emit_insn (insn); ++ } ++ else ++ { ++ op2 = gen_reg_rtx (SImode); ++ ++ insn = (TARGET_64BIT ++ ? gen_rdtscp_rex64 (op0, op1, op2) ++ : gen_rdtscp (op0, op2)); ++ emit_insn (insn); ++ ++ arg0 = CALL_EXPR_ARG (exp, 0); ++ op4 = expand_normal (arg0); ++ if (!address_operand (op4, VOIDmode)) ++ { ++ op4 = convert_memory_address (Pmode, op4); ++ op4 = copy_addr_to_reg (op4); ++ } ++ emit_move_insn (gen_rtx_MEM (SImode, op4), op2); ++ } ++ ++ if (target == 0 ++ || !register_operand (target, DImode)) ++ target = gen_reg_rtx (DImode); ++ ++ if (TARGET_64BIT) ++ { ++ op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32), ++ op1, 1, OPTAB_DIRECT); ++ op0 = expand_simple_binop (DImode, IOR, op0, op1, ++ op0, 1, OPTAB_DIRECT); ++ } ++ ++ emit_move_insn (target, op0); ++ return target; ++ ++ case IX86_BUILTIN_MOVDIR64B: ++ ++ arg0 = CALL_EXPR_ARG (exp, 0); ++ arg1 = CALL_EXPR_ARG (exp, 1); ++ op0 = expand_normal (arg0); ++ op1 = expand_normal (arg1); ++ ++ op0 = ix86_zero_extend_to_Pmode (op0); ++ if (!address_operand (op1, VOIDmode)) ++ { ++ op1 = convert_memory_address (Pmode, op1); ++ op1 = copy_addr_to_reg (op1); ++ } ++ op1 = gen_rtx_MEM (XImode, op1); ++ ++ insn = (TARGET_64BIT ++ ? gen_movdir64b_di (op0, op1) ++ : gen_movdir64b_si (op0, op1)); ++ emit_insn (insn); ++ return 0; ++ ++ case IX86_BUILTIN_FXSAVE: ++ case IX86_BUILTIN_FXRSTOR: ++ case IX86_BUILTIN_FXSAVE64: ++ case IX86_BUILTIN_FXRSTOR64: ++ case IX86_BUILTIN_FNSTENV: ++ case IX86_BUILTIN_FLDENV: ++ mode0 = BLKmode; ++ switch (fcode) ++ { ++ case IX86_BUILTIN_FXSAVE: ++ icode = CODE_FOR_fxsave; ++ break; ++ case IX86_BUILTIN_FXRSTOR: ++ icode = CODE_FOR_fxrstor; ++ break; ++ case IX86_BUILTIN_FXSAVE64: ++ icode = CODE_FOR_fxsave64; ++ break; ++ case IX86_BUILTIN_FXRSTOR64: ++ icode = CODE_FOR_fxrstor64; ++ break; ++ case IX86_BUILTIN_FNSTENV: ++ icode = CODE_FOR_fnstenv; ++ break; ++ case IX86_BUILTIN_FLDENV: ++ icode = CODE_FOR_fldenv; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ ++ arg0 = CALL_EXPR_ARG (exp, 0); ++ op0 = expand_normal (arg0); ++ ++ if (!address_operand (op0, VOIDmode)) ++ { ++ op0 = convert_memory_address (Pmode, op0); ++ op0 = copy_addr_to_reg (op0); ++ } ++ op0 = gen_rtx_MEM (mode0, op0); ++ ++ pat = GEN_FCN (icode) (op0); ++ if (pat) ++ emit_insn (pat); ++ return 0; ++ ++ case IX86_BUILTIN_XSETBV: ++ arg0 = CALL_EXPR_ARG (exp, 0); ++ arg1 = CALL_EXPR_ARG (exp, 1); ++ op0 = expand_normal (arg0); ++ op1 = expand_normal (arg1); ++ ++ if (!REG_P (op0)) ++ op0 = copy_to_mode_reg (SImode, op0); ++ ++ op1 = force_reg (DImode, op1); ++ ++ if (TARGET_64BIT) ++ { ++ op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32), ++ NULL, 1, OPTAB_DIRECT); ++ ++ icode = CODE_FOR_xsetbv_rex64; ++ ++ op2 = gen_lowpart (SImode, op2); ++ op1 = gen_lowpart (SImode, op1); ++ pat = GEN_FCN (icode) (op0, op1, op2); ++ } ++ else ++ { ++ icode = CODE_FOR_xsetbv; ++ ++ pat = GEN_FCN (icode) (op0, op1); ++ } ++ if (pat) ++ emit_insn (pat); ++ return 0; ++ ++ case IX86_BUILTIN_XSAVE: ++ case IX86_BUILTIN_XRSTOR: ++ case IX86_BUILTIN_XSAVE64: ++ case IX86_BUILTIN_XRSTOR64: ++ case IX86_BUILTIN_XSAVEOPT: ++ case IX86_BUILTIN_XSAVEOPT64: ++ case IX86_BUILTIN_XSAVES: ++ case IX86_BUILTIN_XRSTORS: ++ case IX86_BUILTIN_XSAVES64: ++ case IX86_BUILTIN_XRSTORS64: ++ case IX86_BUILTIN_XSAVEC: ++ case IX86_BUILTIN_XSAVEC64: ++ arg0 = CALL_EXPR_ARG (exp, 0); ++ arg1 = CALL_EXPR_ARG (exp, 1); ++ op0 = expand_normal (arg0); ++ op1 = expand_normal (arg1); ++ ++ if (!address_operand (op0, VOIDmode)) ++ { ++ op0 = convert_memory_address (Pmode, op0); ++ op0 = copy_addr_to_reg (op0); ++ } ++ op0 = gen_rtx_MEM (BLKmode, op0); ++ ++ op1 = force_reg (DImode, op1); ++ ++ if (TARGET_64BIT) ++ { ++ op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32), ++ NULL, 1, OPTAB_DIRECT); ++ switch (fcode) ++ { ++ case IX86_BUILTIN_XSAVE: ++ icode = CODE_FOR_xsave_rex64; ++ break; ++ case IX86_BUILTIN_XRSTOR: ++ icode = CODE_FOR_xrstor_rex64; ++ break; ++ case IX86_BUILTIN_XSAVE64: ++ icode = CODE_FOR_xsave64; ++ break; ++ case IX86_BUILTIN_XRSTOR64: ++ icode = CODE_FOR_xrstor64; ++ break; ++ case IX86_BUILTIN_XSAVEOPT: ++ icode = CODE_FOR_xsaveopt_rex64; ++ break; ++ case IX86_BUILTIN_XSAVEOPT64: ++ icode = CODE_FOR_xsaveopt64; ++ break; ++ case IX86_BUILTIN_XSAVES: ++ icode = CODE_FOR_xsaves_rex64; ++ break; ++ case IX86_BUILTIN_XRSTORS: ++ icode = CODE_FOR_xrstors_rex64; ++ break; ++ case IX86_BUILTIN_XSAVES64: ++ icode = CODE_FOR_xsaves64; ++ break; ++ case IX86_BUILTIN_XRSTORS64: ++ icode = CODE_FOR_xrstors64; ++ break; ++ case IX86_BUILTIN_XSAVEC: ++ icode = CODE_FOR_xsavec_rex64; ++ break; ++ case IX86_BUILTIN_XSAVEC64: ++ icode = CODE_FOR_xsavec64; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ ++ op2 = gen_lowpart (SImode, op2); ++ op1 = gen_lowpart (SImode, op1); ++ pat = GEN_FCN (icode) (op0, op1, op2); ++ } ++ else ++ { ++ switch (fcode) ++ { ++ case IX86_BUILTIN_XSAVE: ++ icode = CODE_FOR_xsave; ++ break; ++ case IX86_BUILTIN_XRSTOR: ++ icode = CODE_FOR_xrstor; ++ break; ++ case IX86_BUILTIN_XSAVEOPT: ++ icode = CODE_FOR_xsaveopt; ++ break; ++ case IX86_BUILTIN_XSAVES: ++ icode = CODE_FOR_xsaves; ++ break; ++ case IX86_BUILTIN_XRSTORS: ++ icode = CODE_FOR_xrstors; ++ break; ++ case IX86_BUILTIN_XSAVEC: ++ icode = CODE_FOR_xsavec; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ pat = GEN_FCN (icode) (op0, op1); ++ } ++ ++ if (pat) ++ emit_insn (pat); ++ return 0; ++ ++ case IX86_BUILTIN_LLWPCB: ++ arg0 = CALL_EXPR_ARG (exp, 0); ++ op0 = expand_normal (arg0); ++ icode = CODE_FOR_lwp_llwpcb; ++ if (!insn_data[icode].operand[0].predicate (op0, Pmode)) ++ op0 = ix86_zero_extend_to_Pmode (op0); ++ emit_insn (gen_lwp_llwpcb (op0)); ++ return 0; ++ ++ case IX86_BUILTIN_SLWPCB: ++ icode = CODE_FOR_lwp_slwpcb; ++ if (!target ++ || !insn_data[icode].operand[0].predicate (target, Pmode)) ++ target = gen_reg_rtx (Pmode); ++ emit_insn (gen_lwp_slwpcb (target)); ++ return target; ++ ++ case IX86_BUILTIN_BEXTRI32: ++ case IX86_BUILTIN_BEXTRI64: ++ arg0 = CALL_EXPR_ARG (exp, 0); ++ arg1 = CALL_EXPR_ARG (exp, 1); ++ op0 = expand_normal (arg0); ++ op1 = expand_normal (arg1); ++ icode = (fcode == IX86_BUILTIN_BEXTRI32 ++ ? CODE_FOR_tbm_bextri_si ++ : CODE_FOR_tbm_bextri_di); ++ if (!CONST_INT_P (op1)) ++ { ++ error ("last argument must be an immediate"); ++ return const0_rtx; ++ } ++ else ++ { ++ unsigned char length = (INTVAL (op1) >> 8) & 0xFF; ++ unsigned char lsb_index = INTVAL (op1) & 0xFF; ++ op1 = GEN_INT (length); ++ op2 = GEN_INT (lsb_index); ++ ++ mode1 = insn_data[icode].operand[1].mode; ++ if (!insn_data[icode].operand[1].predicate (op0, mode1)) ++ op0 = copy_to_mode_reg (mode1, op0); ++ ++ mode0 = insn_data[icode].operand[0].mode; ++ if (target == 0 ++ || !register_operand (target, mode0)) ++ target = gen_reg_rtx (mode0); ++ ++ pat = GEN_FCN (icode) (target, op0, op1, op2); ++ if (pat) ++ emit_insn (pat); ++ return target; ++ } ++ ++ case IX86_BUILTIN_RDRAND16_STEP: ++ icode = CODE_FOR_rdrandhi_1; ++ mode0 = HImode; ++ goto rdrand_step; ++ ++ case IX86_BUILTIN_RDRAND32_STEP: ++ icode = CODE_FOR_rdrandsi_1; ++ mode0 = SImode; ++ goto rdrand_step; ++ ++ case IX86_BUILTIN_RDRAND64_STEP: ++ icode = CODE_FOR_rdranddi_1; ++ mode0 = DImode; ++ ++rdrand_step: ++ arg0 = CALL_EXPR_ARG (exp, 0); ++ op1 = expand_normal (arg0); ++ if (!address_operand (op1, VOIDmode)) ++ { ++ op1 = convert_memory_address (Pmode, op1); ++ op1 = copy_addr_to_reg (op1); ++ } ++ ++ op0 = gen_reg_rtx (mode0); ++ emit_insn (GEN_FCN (icode) (op0)); ++ ++ emit_move_insn (gen_rtx_MEM (mode0, op1), op0); ++ ++ op1 = gen_reg_rtx (SImode); ++ emit_move_insn (op1, CONST1_RTX (SImode)); ++ ++ /* Emit SImode conditional move. */ ++ if (mode0 == HImode) ++ { ++ if (TARGET_ZERO_EXTEND_WITH_AND ++ && optimize_function_for_speed_p (cfun)) ++ { ++ op2 = force_reg (SImode, const0_rtx); ++ ++ emit_insn (gen_movstricthi ++ (gen_lowpart (HImode, op2), op0)); ++ } ++ else ++ { ++ op2 = gen_reg_rtx (SImode); ++ ++ emit_insn (gen_zero_extendhisi2 (op2, op0)); ++ } ++ } ++ else if (mode0 == SImode) ++ op2 = op0; ++ else ++ op2 = gen_rtx_SUBREG (SImode, op0, 0); ++ ++ if (target == 0 ++ || !register_operand (target, SImode)) ++ target = gen_reg_rtx (SImode); ++ ++ pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG), ++ const0_rtx); ++ emit_insn (gen_rtx_SET (target, ++ gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1))); ++ return target; ++ ++ case IX86_BUILTIN_RDSEED16_STEP: ++ icode = CODE_FOR_rdseedhi_1; ++ mode0 = HImode; ++ goto rdseed_step; ++ ++ case IX86_BUILTIN_RDSEED32_STEP: ++ icode = CODE_FOR_rdseedsi_1; ++ mode0 = SImode; ++ goto rdseed_step; ++ ++ case IX86_BUILTIN_RDSEED64_STEP: ++ icode = CODE_FOR_rdseeddi_1; ++ mode0 = DImode; ++ ++rdseed_step: ++ arg0 = CALL_EXPR_ARG (exp, 0); ++ op1 = expand_normal (arg0); ++ if (!address_operand (op1, VOIDmode)) ++ { ++ op1 = convert_memory_address (Pmode, op1); ++ op1 = copy_addr_to_reg (op1); ++ } ++ ++ op0 = gen_reg_rtx (mode0); ++ emit_insn (GEN_FCN (icode) (op0)); ++ ++ emit_move_insn (gen_rtx_MEM (mode0, op1), op0); ++ ++ op2 = gen_reg_rtx (QImode); ++ ++ pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG), ++ const0_rtx); ++ emit_insn (gen_rtx_SET (op2, pat)); ++ ++ if (target == 0 ++ || !register_operand (target, SImode)) ++ target = gen_reg_rtx (SImode); ++ ++ emit_insn (gen_zero_extendqisi2 (target, op2)); ++ return target; ++ ++ case IX86_BUILTIN_SBB32: ++ icode = CODE_FOR_subborrowsi; ++ icode2 = CODE_FOR_subborrowsi_0; ++ mode0 = SImode; ++ mode1 = DImode; ++ mode2 = CCmode; ++ goto handlecarry; ++ ++ case IX86_BUILTIN_SBB64: ++ icode = CODE_FOR_subborrowdi; ++ icode2 = CODE_FOR_subborrowdi_0; ++ mode0 = DImode; ++ mode1 = TImode; ++ mode2 = CCmode; ++ goto handlecarry; ++ ++ case IX86_BUILTIN_ADDCARRYX32: ++ icode = CODE_FOR_addcarrysi; ++ icode2 = CODE_FOR_addcarrysi_0; ++ mode0 = SImode; ++ mode1 = DImode; ++ mode2 = CCCmode; ++ goto handlecarry; ++ ++ case IX86_BUILTIN_ADDCARRYX64: ++ icode = CODE_FOR_addcarrydi; ++ icode2 = CODE_FOR_addcarrydi_0; ++ mode0 = DImode; ++ mode1 = TImode; ++ mode2 = CCCmode; ++ ++ handlecarry: ++ arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */ ++ arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */ ++ arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */ ++ arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */ ++ ++ op1 = expand_normal (arg0); ++ if (!integer_zerop (arg0)) ++ op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1)); ++ ++ op2 = expand_normal (arg1); ++ if (!register_operand (op2, mode0)) ++ op2 = copy_to_mode_reg (mode0, op2); ++ ++ op3 = expand_normal (arg2); ++ if (!register_operand (op3, mode0)) ++ op3 = copy_to_mode_reg (mode0, op3); ++ ++ op4 = expand_normal (arg3); ++ if (!address_operand (op4, VOIDmode)) ++ { ++ op4 = convert_memory_address (Pmode, op4); ++ op4 = copy_addr_to_reg (op4); ++ } ++ ++ op0 = gen_reg_rtx (mode0); ++ if (integer_zerop (arg0)) ++ { ++ /* If arg0 is 0, optimize right away into add or sub ++ instruction that sets CCCmode flags. */ ++ op1 = gen_rtx_REG (mode2, FLAGS_REG); ++ emit_insn (GEN_FCN (icode2) (op0, op2, op3)); ++ } ++ else ++ { ++ /* Generate CF from input operand. */ ++ emit_insn (gen_addqi3_cconly_overflow (op1, constm1_rtx)); ++ ++ /* Generate instruction that consumes CF. */ ++ op1 = gen_rtx_REG (CCCmode, FLAGS_REG); ++ pat = gen_rtx_LTU (mode1, op1, const0_rtx); ++ pat2 = gen_rtx_LTU (mode0, op1, const0_rtx); ++ emit_insn (GEN_FCN (icode) (op0, op2, op3, op1, pat, pat2)); ++ } ++ ++ /* Return current CF value. */ ++ if (target == 0) ++ target = gen_reg_rtx (QImode); ++ ++ pat = gen_rtx_LTU (QImode, op1, const0_rtx); ++ emit_insn (gen_rtx_SET (target, pat)); ++ ++ /* Store the result. */ ++ emit_move_insn (gen_rtx_MEM (mode0, op4), op0); ++ ++ return target; ++ ++ case IX86_BUILTIN_READ_FLAGS: ++ emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG))); ++ ++ if (optimize ++ || target == NULL_RTX ++ || !nonimmediate_operand (target, word_mode) ++ || GET_MODE (target) != word_mode) ++ target = gen_reg_rtx (word_mode); ++ ++ emit_insn (gen_pop (target)); ++ return target; ++ ++ case IX86_BUILTIN_WRITE_FLAGS: ++ ++ arg0 = CALL_EXPR_ARG (exp, 0); ++ op0 = expand_normal (arg0); ++ if (!general_no_elim_operand (op0, word_mode)) ++ op0 = copy_to_mode_reg (word_mode, op0); ++ ++ emit_insn (gen_push (op0)); ++ emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG))); ++ return 0; ++ ++ case IX86_BUILTIN_KTESTC8: ++ icode = CODE_FOR_ktestqi; ++ mode3 = CCCmode; ++ goto kortest; ++ ++ case IX86_BUILTIN_KTESTZ8: ++ icode = CODE_FOR_ktestqi; ++ mode3 = CCZmode; ++ goto kortest; ++ ++ case IX86_BUILTIN_KTESTC16: ++ icode = CODE_FOR_ktesthi; ++ mode3 = CCCmode; ++ goto kortest; ++ ++ case IX86_BUILTIN_KTESTZ16: ++ icode = CODE_FOR_ktesthi; ++ mode3 = CCZmode; ++ goto kortest; ++ ++ case IX86_BUILTIN_KTESTC32: ++ icode = CODE_FOR_ktestsi; ++ mode3 = CCCmode; ++ goto kortest; ++ ++ case IX86_BUILTIN_KTESTZ32: ++ icode = CODE_FOR_ktestsi; ++ mode3 = CCZmode; ++ goto kortest; ++ ++ case IX86_BUILTIN_KTESTC64: ++ icode = CODE_FOR_ktestdi; ++ mode3 = CCCmode; ++ goto kortest; ++ ++ case IX86_BUILTIN_KTESTZ64: ++ icode = CODE_FOR_ktestdi; ++ mode3 = CCZmode; ++ goto kortest; ++ ++ case IX86_BUILTIN_KORTESTC8: ++ icode = CODE_FOR_kortestqi; ++ mode3 = CCCmode; ++ goto kortest; ++ ++ case IX86_BUILTIN_KORTESTZ8: ++ icode = CODE_FOR_kortestqi; ++ mode3 = CCZmode; ++ goto kortest; ++ ++ case IX86_BUILTIN_KORTESTC16: ++ icode = CODE_FOR_kortesthi; ++ mode3 = CCCmode; ++ goto kortest; ++ ++ case IX86_BUILTIN_KORTESTZ16: ++ icode = CODE_FOR_kortesthi; ++ mode3 = CCZmode; ++ goto kortest; ++ ++ case IX86_BUILTIN_KORTESTC32: ++ icode = CODE_FOR_kortestsi; ++ mode3 = CCCmode; ++ goto kortest; ++ ++ case IX86_BUILTIN_KORTESTZ32: ++ icode = CODE_FOR_kortestsi; ++ mode3 = CCZmode; ++ goto kortest; ++ ++ case IX86_BUILTIN_KORTESTC64: ++ icode = CODE_FOR_kortestdi; ++ mode3 = CCCmode; ++ goto kortest; ++ ++ case IX86_BUILTIN_KORTESTZ64: ++ icode = CODE_FOR_kortestdi; ++ mode3 = CCZmode; ++ ++ kortest: ++ arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */ ++ arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */ ++ op0 = expand_normal (arg0); ++ op1 = expand_normal (arg1); ++ ++ mode0 = insn_data[icode].operand[0].mode; ++ mode1 = insn_data[icode].operand[1].mode; ++ ++ if (GET_MODE (op0) != VOIDmode) ++ op0 = force_reg (GET_MODE (op0), op0); ++ ++ op0 = gen_lowpart (mode0, op0); ++ ++ if (!insn_data[icode].operand[0].predicate (op0, mode0)) ++ op0 = copy_to_mode_reg (mode0, op0); ++ ++ if (GET_MODE (op1) != VOIDmode) ++ op1 = force_reg (GET_MODE (op1), op1); ++ ++ op1 = gen_lowpart (mode1, op1); ++ ++ if (!insn_data[icode].operand[1].predicate (op1, mode1)) ++ op1 = copy_to_mode_reg (mode1, op1); ++ ++ target = gen_reg_rtx (QImode); ++ ++ /* Emit kortest. */ ++ emit_insn (GEN_FCN (icode) (op0, op1)); ++ /* And use setcc to return result from flags. */ ++ ix86_expand_setcc (target, EQ, ++ gen_rtx_REG (mode3, FLAGS_REG), const0_rtx); ++ return target; ++ ++ case IX86_BUILTIN_GATHERSIV2DF: ++ icode = CODE_FOR_avx2_gathersiv2df; ++ goto gather_gen; ++ case IX86_BUILTIN_GATHERSIV4DF: ++ icode = CODE_FOR_avx2_gathersiv4df; ++ goto gather_gen; ++ case IX86_BUILTIN_GATHERDIV2DF: ++ icode = CODE_FOR_avx2_gatherdiv2df; ++ goto gather_gen; ++ case IX86_BUILTIN_GATHERDIV4DF: ++ icode = CODE_FOR_avx2_gatherdiv4df; ++ goto gather_gen; ++ case IX86_BUILTIN_GATHERSIV4SF: ++ icode = CODE_FOR_avx2_gathersiv4sf; ++ goto gather_gen; ++ case IX86_BUILTIN_GATHERSIV8SF: ++ icode = CODE_FOR_avx2_gathersiv8sf; ++ goto gather_gen; ++ case IX86_BUILTIN_GATHERDIV4SF: ++ icode = CODE_FOR_avx2_gatherdiv4sf; ++ goto gather_gen; ++ case IX86_BUILTIN_GATHERDIV8SF: ++ icode = CODE_FOR_avx2_gatherdiv8sf; ++ goto gather_gen; ++ case IX86_BUILTIN_GATHERSIV2DI: ++ icode = CODE_FOR_avx2_gathersiv2di; ++ goto gather_gen; ++ case IX86_BUILTIN_GATHERSIV4DI: ++ icode = CODE_FOR_avx2_gathersiv4di; ++ goto gather_gen; ++ case IX86_BUILTIN_GATHERDIV2DI: ++ icode = CODE_FOR_avx2_gatherdiv2di; ++ goto gather_gen; ++ case IX86_BUILTIN_GATHERDIV4DI: ++ icode = CODE_FOR_avx2_gatherdiv4di; ++ goto gather_gen; ++ case IX86_BUILTIN_GATHERSIV4SI: ++ icode = CODE_FOR_avx2_gathersiv4si; ++ goto gather_gen; ++ case IX86_BUILTIN_GATHERSIV8SI: ++ icode = CODE_FOR_avx2_gathersiv8si; ++ goto gather_gen; ++ case IX86_BUILTIN_GATHERDIV4SI: ++ icode = CODE_FOR_avx2_gatherdiv4si; ++ goto gather_gen; ++ case IX86_BUILTIN_GATHERDIV8SI: ++ icode = CODE_FOR_avx2_gatherdiv8si; ++ goto gather_gen; ++ case IX86_BUILTIN_GATHERALTSIV4DF: ++ icode = CODE_FOR_avx2_gathersiv4df; ++ goto gather_gen; ++ case IX86_BUILTIN_GATHERALTDIV8SF: ++ icode = CODE_FOR_avx2_gatherdiv8sf; ++ goto gather_gen; ++ case IX86_BUILTIN_GATHERALTSIV4DI: ++ icode = CODE_FOR_avx2_gathersiv4di; ++ goto gather_gen; ++ case IX86_BUILTIN_GATHERALTDIV8SI: ++ icode = CODE_FOR_avx2_gatherdiv8si; ++ goto gather_gen; ++ case IX86_BUILTIN_GATHER3SIV16SF: ++ icode = CODE_FOR_avx512f_gathersiv16sf; ++ goto gather_gen; ++ case IX86_BUILTIN_GATHER3SIV8DF: ++ icode = CODE_FOR_avx512f_gathersiv8df; ++ goto gather_gen; ++ case IX86_BUILTIN_GATHER3DIV16SF: ++ icode = CODE_FOR_avx512f_gatherdiv16sf; ++ goto gather_gen; ++ case IX86_BUILTIN_GATHER3DIV8DF: ++ icode = CODE_FOR_avx512f_gatherdiv8df; ++ goto gather_gen; ++ case IX86_BUILTIN_GATHER3SIV16SI: ++ icode = CODE_FOR_avx512f_gathersiv16si; ++ goto gather_gen; ++ case IX86_BUILTIN_GATHER3SIV8DI: ++ icode = CODE_FOR_avx512f_gathersiv8di; ++ goto gather_gen; ++ case IX86_BUILTIN_GATHER3DIV16SI: ++ icode = CODE_FOR_avx512f_gatherdiv16si; ++ goto gather_gen; ++ case IX86_BUILTIN_GATHER3DIV8DI: ++ icode = CODE_FOR_avx512f_gatherdiv8di; ++ goto gather_gen; ++ case IX86_BUILTIN_GATHER3ALTSIV8DF: ++ icode = CODE_FOR_avx512f_gathersiv8df; ++ goto gather_gen; ++ case IX86_BUILTIN_GATHER3ALTDIV16SF: ++ icode = CODE_FOR_avx512f_gatherdiv16sf; ++ goto gather_gen; ++ case IX86_BUILTIN_GATHER3ALTSIV8DI: ++ icode = CODE_FOR_avx512f_gathersiv8di; ++ goto gather_gen; ++ case IX86_BUILTIN_GATHER3ALTDIV16SI: ++ icode = CODE_FOR_avx512f_gatherdiv16si; ++ goto gather_gen; ++ case IX86_BUILTIN_GATHER3SIV2DF: ++ icode = CODE_FOR_avx512vl_gathersiv2df; ++ goto gather_gen; ++ case IX86_BUILTIN_GATHER3SIV4DF: ++ icode = CODE_FOR_avx512vl_gathersiv4df; ++ goto gather_gen; ++ case IX86_BUILTIN_GATHER3DIV2DF: ++ icode = CODE_FOR_avx512vl_gatherdiv2df; ++ goto gather_gen; ++ case IX86_BUILTIN_GATHER3DIV4DF: ++ icode = CODE_FOR_avx512vl_gatherdiv4df; ++ goto gather_gen; ++ case IX86_BUILTIN_GATHER3SIV4SF: ++ icode = CODE_FOR_avx512vl_gathersiv4sf; ++ goto gather_gen; ++ case IX86_BUILTIN_GATHER3SIV8SF: ++ icode = CODE_FOR_avx512vl_gathersiv8sf; ++ goto gather_gen; ++ case IX86_BUILTIN_GATHER3DIV4SF: ++ icode = CODE_FOR_avx512vl_gatherdiv4sf; ++ goto gather_gen; ++ case IX86_BUILTIN_GATHER3DIV8SF: ++ icode = CODE_FOR_avx512vl_gatherdiv8sf; ++ goto gather_gen; ++ case IX86_BUILTIN_GATHER3SIV2DI: ++ icode = CODE_FOR_avx512vl_gathersiv2di; ++ goto gather_gen; ++ case IX86_BUILTIN_GATHER3SIV4DI: ++ icode = CODE_FOR_avx512vl_gathersiv4di; ++ goto gather_gen; ++ case IX86_BUILTIN_GATHER3DIV2DI: ++ icode = CODE_FOR_avx512vl_gatherdiv2di; ++ goto gather_gen; ++ case IX86_BUILTIN_GATHER3DIV4DI: ++ icode = CODE_FOR_avx512vl_gatherdiv4di; ++ goto gather_gen; ++ case IX86_BUILTIN_GATHER3SIV4SI: ++ icode = CODE_FOR_avx512vl_gathersiv4si; ++ goto gather_gen; ++ case IX86_BUILTIN_GATHER3SIV8SI: ++ icode = CODE_FOR_avx512vl_gathersiv8si; ++ goto gather_gen; ++ case IX86_BUILTIN_GATHER3DIV4SI: ++ icode = CODE_FOR_avx512vl_gatherdiv4si; ++ goto gather_gen; ++ case IX86_BUILTIN_GATHER3DIV8SI: ++ icode = CODE_FOR_avx512vl_gatherdiv8si; ++ goto gather_gen; ++ case IX86_BUILTIN_GATHER3ALTSIV4DF: ++ icode = CODE_FOR_avx512vl_gathersiv4df; ++ goto gather_gen; ++ case IX86_BUILTIN_GATHER3ALTDIV8SF: ++ icode = CODE_FOR_avx512vl_gatherdiv8sf; ++ goto gather_gen; ++ case IX86_BUILTIN_GATHER3ALTSIV4DI: ++ icode = CODE_FOR_avx512vl_gathersiv4di; ++ goto gather_gen; ++ case IX86_BUILTIN_GATHER3ALTDIV8SI: ++ icode = CODE_FOR_avx512vl_gatherdiv8si; ++ goto gather_gen; ++ case IX86_BUILTIN_SCATTERSIV16SF: ++ icode = CODE_FOR_avx512f_scattersiv16sf; ++ goto scatter_gen; ++ case IX86_BUILTIN_SCATTERSIV8DF: ++ icode = CODE_FOR_avx512f_scattersiv8df; ++ goto scatter_gen; ++ case IX86_BUILTIN_SCATTERDIV16SF: ++ icode = CODE_FOR_avx512f_scatterdiv16sf; ++ goto scatter_gen; ++ case IX86_BUILTIN_SCATTERDIV8DF: ++ icode = CODE_FOR_avx512f_scatterdiv8df; ++ goto scatter_gen; ++ case IX86_BUILTIN_SCATTERSIV16SI: ++ icode = CODE_FOR_avx512f_scattersiv16si; ++ goto scatter_gen; ++ case IX86_BUILTIN_SCATTERSIV8DI: ++ icode = CODE_FOR_avx512f_scattersiv8di; ++ goto scatter_gen; ++ case IX86_BUILTIN_SCATTERDIV16SI: ++ icode = CODE_FOR_avx512f_scatterdiv16si; ++ goto scatter_gen; ++ case IX86_BUILTIN_SCATTERDIV8DI: ++ icode = CODE_FOR_avx512f_scatterdiv8di; ++ goto scatter_gen; ++ case IX86_BUILTIN_SCATTERSIV8SF: ++ icode = CODE_FOR_avx512vl_scattersiv8sf; ++ goto scatter_gen; ++ case IX86_BUILTIN_SCATTERSIV4SF: ++ icode = CODE_FOR_avx512vl_scattersiv4sf; ++ goto scatter_gen; ++ case IX86_BUILTIN_SCATTERSIV4DF: ++ icode = CODE_FOR_avx512vl_scattersiv4df; ++ goto scatter_gen; ++ case IX86_BUILTIN_SCATTERSIV2DF: ++ icode = CODE_FOR_avx512vl_scattersiv2df; ++ goto scatter_gen; ++ case IX86_BUILTIN_SCATTERDIV8SF: ++ icode = CODE_FOR_avx512vl_scatterdiv8sf; ++ goto scatter_gen; ++ case IX86_BUILTIN_SCATTERDIV4SF: ++ icode = CODE_FOR_avx512vl_scatterdiv4sf; ++ goto scatter_gen; ++ case IX86_BUILTIN_SCATTERDIV4DF: ++ icode = CODE_FOR_avx512vl_scatterdiv4df; ++ goto scatter_gen; ++ case IX86_BUILTIN_SCATTERDIV2DF: ++ icode = CODE_FOR_avx512vl_scatterdiv2df; ++ goto scatter_gen; ++ case IX86_BUILTIN_SCATTERSIV8SI: ++ icode = CODE_FOR_avx512vl_scattersiv8si; ++ goto scatter_gen; ++ case IX86_BUILTIN_SCATTERSIV4SI: ++ icode = CODE_FOR_avx512vl_scattersiv4si; ++ goto scatter_gen; ++ case IX86_BUILTIN_SCATTERSIV4DI: ++ icode = CODE_FOR_avx512vl_scattersiv4di; ++ goto scatter_gen; ++ case IX86_BUILTIN_SCATTERSIV2DI: ++ icode = CODE_FOR_avx512vl_scattersiv2di; ++ goto scatter_gen; ++ case IX86_BUILTIN_SCATTERDIV8SI: ++ icode = CODE_FOR_avx512vl_scatterdiv8si; ++ goto scatter_gen; ++ case IX86_BUILTIN_SCATTERDIV4SI: ++ icode = CODE_FOR_avx512vl_scatterdiv4si; ++ goto scatter_gen; ++ case IX86_BUILTIN_SCATTERDIV4DI: ++ icode = CODE_FOR_avx512vl_scatterdiv4di; ++ goto scatter_gen; ++ case IX86_BUILTIN_SCATTERDIV2DI: ++ icode = CODE_FOR_avx512vl_scatterdiv2di; ++ goto scatter_gen; ++ case IX86_BUILTIN_GATHERPFDPD: ++ icode = CODE_FOR_avx512pf_gatherpfv8sidf; ++ goto vec_prefetch_gen; ++ case IX86_BUILTIN_SCATTERALTSIV8DF: ++ icode = CODE_FOR_avx512f_scattersiv8df; ++ goto scatter_gen; ++ case IX86_BUILTIN_SCATTERALTDIV16SF: ++ icode = CODE_FOR_avx512f_scatterdiv16sf; ++ goto scatter_gen; ++ case IX86_BUILTIN_SCATTERALTSIV8DI: ++ icode = CODE_FOR_avx512f_scattersiv8di; ++ goto scatter_gen; ++ case IX86_BUILTIN_SCATTERALTDIV16SI: ++ icode = CODE_FOR_avx512f_scatterdiv16si; ++ goto scatter_gen; ++ case IX86_BUILTIN_SCATTERALTSIV4DF: ++ icode = CODE_FOR_avx512vl_scattersiv4df; ++ goto scatter_gen; ++ case IX86_BUILTIN_SCATTERALTDIV8SF: ++ icode = CODE_FOR_avx512vl_scatterdiv8sf; ++ goto scatter_gen; ++ case IX86_BUILTIN_SCATTERALTSIV4DI: ++ icode = CODE_FOR_avx512vl_scattersiv4di; ++ goto scatter_gen; ++ case IX86_BUILTIN_SCATTERALTDIV8SI: ++ icode = CODE_FOR_avx512vl_scatterdiv8si; ++ goto scatter_gen; ++ case IX86_BUILTIN_SCATTERALTSIV2DF: ++ icode = CODE_FOR_avx512vl_scattersiv2df; ++ goto scatter_gen; ++ case IX86_BUILTIN_SCATTERALTDIV4SF: ++ icode = CODE_FOR_avx512vl_scatterdiv4sf; ++ goto scatter_gen; ++ case IX86_BUILTIN_SCATTERALTSIV2DI: ++ icode = CODE_FOR_avx512vl_scattersiv2di; ++ goto scatter_gen; ++ case IX86_BUILTIN_SCATTERALTDIV4SI: ++ icode = CODE_FOR_avx512vl_scatterdiv4si; ++ goto scatter_gen; ++ case IX86_BUILTIN_GATHERPFDPS: ++ icode = CODE_FOR_avx512pf_gatherpfv16sisf; ++ goto vec_prefetch_gen; ++ case IX86_BUILTIN_GATHERPFQPD: ++ icode = CODE_FOR_avx512pf_gatherpfv8didf; ++ goto vec_prefetch_gen; ++ case IX86_BUILTIN_GATHERPFQPS: ++ icode = CODE_FOR_avx512pf_gatherpfv8disf; ++ goto vec_prefetch_gen; ++ case IX86_BUILTIN_SCATTERPFDPD: ++ icode = CODE_FOR_avx512pf_scatterpfv8sidf; ++ goto vec_prefetch_gen; ++ case IX86_BUILTIN_SCATTERPFDPS: ++ icode = CODE_FOR_avx512pf_scatterpfv16sisf; ++ goto vec_prefetch_gen; ++ case IX86_BUILTIN_SCATTERPFQPD: ++ icode = CODE_FOR_avx512pf_scatterpfv8didf; ++ goto vec_prefetch_gen; ++ case IX86_BUILTIN_SCATTERPFQPS: ++ icode = CODE_FOR_avx512pf_scatterpfv8disf; ++ goto vec_prefetch_gen; ++ ++ gather_gen: ++ rtx half; ++ rtx (*gen) (rtx, rtx); ++ ++ arg0 = CALL_EXPR_ARG (exp, 0); ++ arg1 = CALL_EXPR_ARG (exp, 1); ++ arg2 = CALL_EXPR_ARG (exp, 2); ++ arg3 = CALL_EXPR_ARG (exp, 3); ++ arg4 = CALL_EXPR_ARG (exp, 4); ++ op0 = expand_normal (arg0); ++ op1 = expand_normal (arg1); ++ op2 = expand_normal (arg2); ++ op3 = expand_normal (arg3); ++ op4 = expand_normal (arg4); ++ /* Note the arg order is different from the operand order. */ ++ mode0 = insn_data[icode].operand[1].mode; ++ mode2 = insn_data[icode].operand[3].mode; ++ mode3 = insn_data[icode].operand[4].mode; ++ mode4 = insn_data[icode].operand[5].mode; ++ ++ if (target == NULL_RTX ++ || GET_MODE (target) != insn_data[icode].operand[0].mode ++ || !insn_data[icode].operand[0].predicate (target, ++ GET_MODE (target))) ++ subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode); ++ else ++ subtarget = target; ++ ++ switch (fcode) ++ { ++ case IX86_BUILTIN_GATHER3ALTSIV8DF: ++ case IX86_BUILTIN_GATHER3ALTSIV8DI: ++ half = gen_reg_rtx (V8SImode); ++ if (!nonimmediate_operand (op2, V16SImode)) ++ op2 = copy_to_mode_reg (V16SImode, op2); ++ emit_insn (gen_vec_extract_lo_v16si (half, op2)); ++ op2 = half; ++ break; ++ case IX86_BUILTIN_GATHER3ALTSIV4DF: ++ case IX86_BUILTIN_GATHER3ALTSIV4DI: ++ case IX86_BUILTIN_GATHERALTSIV4DF: ++ case IX86_BUILTIN_GATHERALTSIV4DI: ++ half = gen_reg_rtx (V4SImode); ++ if (!nonimmediate_operand (op2, V8SImode)) ++ op2 = copy_to_mode_reg (V8SImode, op2); ++ emit_insn (gen_vec_extract_lo_v8si (half, op2)); ++ op2 = half; ++ break; ++ case IX86_BUILTIN_GATHER3ALTDIV16SF: ++ case IX86_BUILTIN_GATHER3ALTDIV16SI: ++ half = gen_reg_rtx (mode0); ++ if (mode0 == V8SFmode) ++ gen = gen_vec_extract_lo_v16sf; ++ else ++ gen = gen_vec_extract_lo_v16si; ++ if (!nonimmediate_operand (op0, GET_MODE (op0))) ++ op0 = copy_to_mode_reg (GET_MODE (op0), op0); ++ emit_insn (gen (half, op0)); ++ op0 = half; ++ op3 = lowpart_subreg (QImode, op3, HImode); ++ break; ++ case IX86_BUILTIN_GATHER3ALTDIV8SF: ++ case IX86_BUILTIN_GATHER3ALTDIV8SI: ++ case IX86_BUILTIN_GATHERALTDIV8SF: ++ case IX86_BUILTIN_GATHERALTDIV8SI: ++ half = gen_reg_rtx (mode0); ++ if (mode0 == V4SFmode) ++ gen = gen_vec_extract_lo_v8sf; ++ else ++ gen = gen_vec_extract_lo_v8si; ++ if (!nonimmediate_operand (op0, GET_MODE (op0))) ++ op0 = copy_to_mode_reg (GET_MODE (op0), op0); ++ emit_insn (gen (half, op0)); ++ op0 = half; ++ if (VECTOR_MODE_P (GET_MODE (op3))) ++ { ++ half = gen_reg_rtx (mode0); ++ if (!nonimmediate_operand (op3, GET_MODE (op3))) ++ op3 = copy_to_mode_reg (GET_MODE (op3), op3); ++ emit_insn (gen (half, op3)); ++ op3 = half; ++ } ++ break; ++ default: ++ break; ++ } ++ ++ /* Force memory operand only with base register here. But we ++ don't want to do it on memory operand for other builtin ++ functions. */ ++ op1 = ix86_zero_extend_to_Pmode (op1); ++ ++ if (!insn_data[icode].operand[1].predicate (op0, mode0)) ++ op0 = copy_to_mode_reg (mode0, op0); ++ if (!insn_data[icode].operand[2].predicate (op1, Pmode)) ++ op1 = copy_to_mode_reg (Pmode, op1); ++ if (!insn_data[icode].operand[3].predicate (op2, mode2)) ++ op2 = copy_to_mode_reg (mode2, op2); ++ ++ op3 = fixup_modeless_constant (op3, mode3); ++ ++ if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode) ++ { ++ if (!insn_data[icode].operand[4].predicate (op3, mode3)) ++ op3 = copy_to_mode_reg (mode3, op3); ++ } ++ else ++ { ++ op3 = copy_to_reg (op3); ++ op3 = lowpart_subreg (mode3, op3, GET_MODE (op3)); ++ } ++ if (!insn_data[icode].operand[5].predicate (op4, mode4)) ++ { ++ error ("the last argument must be scale 1, 2, 4, 8"); ++ return const0_rtx; ++ } ++ ++ /* Optimize. If mask is known to have all high bits set, ++ replace op0 with pc_rtx to signal that the instruction ++ overwrites the whole destination and doesn't use its ++ previous contents. */ ++ if (optimize) ++ { ++ if (TREE_CODE (arg3) == INTEGER_CST) ++ { ++ if (integer_all_onesp (arg3)) ++ op0 = pc_rtx; ++ } ++ else if (TREE_CODE (arg3) == VECTOR_CST) ++ { ++ unsigned int negative = 0; ++ for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i) ++ { ++ tree cst = VECTOR_CST_ELT (arg3, i); ++ if (TREE_CODE (cst) == INTEGER_CST ++ && tree_int_cst_sign_bit (cst)) ++ negative++; ++ else if (TREE_CODE (cst) == REAL_CST ++ && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst))) ++ negative++; ++ } ++ if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3))) ++ op0 = pc_rtx; ++ } ++ else if (TREE_CODE (arg3) == SSA_NAME ++ && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE) ++ { ++ /* Recognize also when mask is like: ++ __v2df src = _mm_setzero_pd (); ++ __v2df mask = _mm_cmpeq_pd (src, src); ++ or ++ __v8sf src = _mm256_setzero_ps (); ++ __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ); ++ as that is a cheaper way to load all ones into ++ a register than having to load a constant from ++ memory. */ ++ gimple *def_stmt = SSA_NAME_DEF_STMT (arg3); ++ if (is_gimple_call (def_stmt)) ++ { ++ tree fndecl = gimple_call_fndecl (def_stmt); ++ if (fndecl ++ && fndecl_built_in_p (fndecl, BUILT_IN_MD)) ++ switch (DECL_MD_FUNCTION_CODE (fndecl)) ++ { ++ case IX86_BUILTIN_CMPPD: ++ case IX86_BUILTIN_CMPPS: ++ case IX86_BUILTIN_CMPPD256: ++ case IX86_BUILTIN_CMPPS256: ++ if (!integer_zerop (gimple_call_arg (def_stmt, 2))) ++ break; ++ /* FALLTHRU */ ++ case IX86_BUILTIN_CMPEQPD: ++ case IX86_BUILTIN_CMPEQPS: ++ if (initializer_zerop (gimple_call_arg (def_stmt, 0)) ++ && initializer_zerop (gimple_call_arg (def_stmt, ++ 1))) ++ op0 = pc_rtx; ++ break; ++ default: ++ break; ++ } ++ } ++ } ++ } ++ ++ pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4); ++ if (! pat) ++ return const0_rtx; ++ emit_insn (pat); ++ ++ switch (fcode) ++ { ++ case IX86_BUILTIN_GATHER3DIV16SF: ++ if (target == NULL_RTX) ++ target = gen_reg_rtx (V8SFmode); ++ emit_insn (gen_vec_extract_lo_v16sf (target, subtarget)); ++ break; ++ case IX86_BUILTIN_GATHER3DIV16SI: ++ if (target == NULL_RTX) ++ target = gen_reg_rtx (V8SImode); ++ emit_insn (gen_vec_extract_lo_v16si (target, subtarget)); ++ break; ++ case IX86_BUILTIN_GATHER3DIV8SF: ++ case IX86_BUILTIN_GATHERDIV8SF: ++ if (target == NULL_RTX) ++ target = gen_reg_rtx (V4SFmode); ++ emit_insn (gen_vec_extract_lo_v8sf (target, subtarget)); ++ break; ++ case IX86_BUILTIN_GATHER3DIV8SI: ++ case IX86_BUILTIN_GATHERDIV8SI: ++ if (target == NULL_RTX) ++ target = gen_reg_rtx (V4SImode); ++ emit_insn (gen_vec_extract_lo_v8si (target, subtarget)); ++ break; ++ default: ++ target = subtarget; ++ break; ++ } ++ return target; ++ ++ scatter_gen: ++ arg0 = CALL_EXPR_ARG (exp, 0); ++ arg1 = CALL_EXPR_ARG (exp, 1); ++ arg2 = CALL_EXPR_ARG (exp, 2); ++ arg3 = CALL_EXPR_ARG (exp, 3); ++ arg4 = CALL_EXPR_ARG (exp, 4); ++ op0 = expand_normal (arg0); ++ op1 = expand_normal (arg1); ++ op2 = expand_normal (arg2); ++ op3 = expand_normal (arg3); ++ op4 = expand_normal (arg4); ++ mode1 = insn_data[icode].operand[1].mode; ++ mode2 = insn_data[icode].operand[2].mode; ++ mode3 = insn_data[icode].operand[3].mode; ++ mode4 = insn_data[icode].operand[4].mode; ++ ++ /* Scatter instruction stores operand op3 to memory with ++ indices from op2 and scale from op4 under writemask op1. ++ If index operand op2 has more elements then source operand ++ op3 one need to use only its low half. And vice versa. */ ++ switch (fcode) ++ { ++ case IX86_BUILTIN_SCATTERALTSIV8DF: ++ case IX86_BUILTIN_SCATTERALTSIV8DI: ++ half = gen_reg_rtx (V8SImode); ++ if (!nonimmediate_operand (op2, V16SImode)) ++ op2 = copy_to_mode_reg (V16SImode, op2); ++ emit_insn (gen_vec_extract_lo_v16si (half, op2)); ++ op2 = half; ++ break; ++ case IX86_BUILTIN_SCATTERALTDIV16SF: ++ case IX86_BUILTIN_SCATTERALTDIV16SI: ++ half = gen_reg_rtx (mode3); ++ if (mode3 == V8SFmode) ++ gen = gen_vec_extract_lo_v16sf; ++ else ++ gen = gen_vec_extract_lo_v16si; ++ if (!nonimmediate_operand (op3, GET_MODE (op3))) ++ op3 = copy_to_mode_reg (GET_MODE (op3), op3); ++ emit_insn (gen (half, op3)); ++ op3 = half; ++ break; ++ case IX86_BUILTIN_SCATTERALTSIV4DF: ++ case IX86_BUILTIN_SCATTERALTSIV4DI: ++ half = gen_reg_rtx (V4SImode); ++ if (!nonimmediate_operand (op2, V8SImode)) ++ op2 = copy_to_mode_reg (V8SImode, op2); ++ emit_insn (gen_vec_extract_lo_v8si (half, op2)); ++ op2 = half; ++ break; ++ case IX86_BUILTIN_SCATTERALTDIV8SF: ++ case IX86_BUILTIN_SCATTERALTDIV8SI: ++ half = gen_reg_rtx (mode3); ++ if (mode3 == V4SFmode) ++ gen = gen_vec_extract_lo_v8sf; ++ else ++ gen = gen_vec_extract_lo_v8si; ++ if (!nonimmediate_operand (op3, GET_MODE (op3))) ++ op3 = copy_to_mode_reg (GET_MODE (op3), op3); ++ emit_insn (gen (half, op3)); ++ op3 = half; ++ break; ++ case IX86_BUILTIN_SCATTERALTSIV2DF: ++ case IX86_BUILTIN_SCATTERALTSIV2DI: ++ if (!nonimmediate_operand (op2, V4SImode)) ++ op2 = copy_to_mode_reg (V4SImode, op2); ++ break; ++ case IX86_BUILTIN_SCATTERALTDIV4SF: ++ case IX86_BUILTIN_SCATTERALTDIV4SI: ++ if (!nonimmediate_operand (op3, GET_MODE (op3))) ++ op3 = copy_to_mode_reg (GET_MODE (op3), op3); ++ break; ++ default: ++ break; ++ } ++ ++ /* Force memory operand only with base register here. But we ++ don't want to do it on memory operand for other builtin ++ functions. */ ++ op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1)); ++ ++ if (!insn_data[icode].operand[0].predicate (op0, Pmode)) ++ op0 = copy_to_mode_reg (Pmode, op0); ++ ++ op1 = fixup_modeless_constant (op1, mode1); ++ ++ if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode) ++ { ++ if (!insn_data[icode].operand[1].predicate (op1, mode1)) ++ op1 = copy_to_mode_reg (mode1, op1); ++ } ++ else ++ { ++ op1 = copy_to_reg (op1); ++ op1 = lowpart_subreg (mode1, op1, GET_MODE (op1)); ++ } ++ ++ if (!insn_data[icode].operand[2].predicate (op2, mode2)) ++ op2 = copy_to_mode_reg (mode2, op2); ++ ++ if (!insn_data[icode].operand[3].predicate (op3, mode3)) ++ op3 = copy_to_mode_reg (mode3, op3); ++ ++ if (!insn_data[icode].operand[4].predicate (op4, mode4)) ++ { ++ error ("the last argument must be scale 1, 2, 4, 8"); ++ return const0_rtx; ++ } ++ ++ pat = GEN_FCN (icode) (op0, op1, op2, op3, op4); ++ if (! pat) ++ return const0_rtx; ++ ++ emit_insn (pat); ++ return 0; ++ ++ vec_prefetch_gen: ++ arg0 = CALL_EXPR_ARG (exp, 0); ++ arg1 = CALL_EXPR_ARG (exp, 1); ++ arg2 = CALL_EXPR_ARG (exp, 2); ++ arg3 = CALL_EXPR_ARG (exp, 3); ++ arg4 = CALL_EXPR_ARG (exp, 4); ++ op0 = expand_normal (arg0); ++ op1 = expand_normal (arg1); ++ op2 = expand_normal (arg2); ++ op3 = expand_normal (arg3); ++ op4 = expand_normal (arg4); ++ mode0 = insn_data[icode].operand[0].mode; ++ mode1 = insn_data[icode].operand[1].mode; ++ mode3 = insn_data[icode].operand[3].mode; ++ mode4 = insn_data[icode].operand[4].mode; ++ ++ op0 = fixup_modeless_constant (op0, mode0); ++ ++ if (GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode) ++ { ++ if (!insn_data[icode].operand[0].predicate (op0, mode0)) ++ op0 = copy_to_mode_reg (mode0, op0); ++ } ++ else ++ { ++ op0 = copy_to_reg (op0); ++ op0 = lowpart_subreg (mode0, op0, GET_MODE (op0)); ++ } ++ ++ if (!insn_data[icode].operand[1].predicate (op1, mode1)) ++ op1 = copy_to_mode_reg (mode1, op1); ++ ++ /* Force memory operand only with base register here. But we ++ don't want to do it on memory operand for other builtin ++ functions. */ ++ op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1)); ++ ++ if (!insn_data[icode].operand[2].predicate (op2, Pmode)) ++ op2 = copy_to_mode_reg (Pmode, op2); ++ ++ if (!insn_data[icode].operand[3].predicate (op3, mode3)) ++ { ++ error ("the forth argument must be scale 1, 2, 4, 8"); ++ return const0_rtx; ++ } ++ ++ if (!insn_data[icode].operand[4].predicate (op4, mode4)) ++ { ++ error ("incorrect hint operand"); ++ return const0_rtx; ++ } ++ ++ pat = GEN_FCN (icode) (op0, op1, op2, op3, op4); ++ if (! pat) ++ return const0_rtx; ++ ++ emit_insn (pat); ++ ++ return 0; ++ ++ case IX86_BUILTIN_XABORT: ++ icode = CODE_FOR_xabort; ++ arg0 = CALL_EXPR_ARG (exp, 0); ++ op0 = expand_normal (arg0); ++ mode0 = insn_data[icode].operand[0].mode; ++ if (!insn_data[icode].operand[0].predicate (op0, mode0)) ++ { ++ error ("the argument to % intrinsic must " ++ "be an 8-bit immediate"); ++ return const0_rtx; ++ } ++ emit_insn (gen_xabort (op0)); ++ return 0; ++ ++ case IX86_BUILTIN_RSTORSSP: ++ case IX86_BUILTIN_CLRSSBSY: ++ arg0 = CALL_EXPR_ARG (exp, 0); ++ op0 = expand_normal (arg0); ++ icode = (fcode == IX86_BUILTIN_RSTORSSP ++ ? CODE_FOR_rstorssp ++ : CODE_FOR_clrssbsy); ++ if (!address_operand (op0, VOIDmode)) ++ { ++ op1 = convert_memory_address (Pmode, op0); ++ op0 = copy_addr_to_reg (op1); ++ } ++ emit_insn (GEN_FCN (icode) (gen_rtx_MEM (Pmode, op0))); ++ return 0; ++ ++ case IX86_BUILTIN_WRSSD: ++ case IX86_BUILTIN_WRSSQ: ++ case IX86_BUILTIN_WRUSSD: ++ case IX86_BUILTIN_WRUSSQ: ++ arg0 = CALL_EXPR_ARG (exp, 0); ++ op0 = expand_normal (arg0); ++ arg1 = CALL_EXPR_ARG (exp, 1); ++ op1 = expand_normal (arg1); ++ switch (fcode) ++ { ++ case IX86_BUILTIN_WRSSD: ++ icode = CODE_FOR_wrsssi; ++ mode = SImode; ++ break; ++ case IX86_BUILTIN_WRSSQ: ++ icode = CODE_FOR_wrssdi; ++ mode = DImode; ++ break; ++ case IX86_BUILTIN_WRUSSD: ++ icode = CODE_FOR_wrusssi; ++ mode = SImode; ++ break; ++ case IX86_BUILTIN_WRUSSQ: ++ icode = CODE_FOR_wrussdi; ++ mode = DImode; ++ break; ++ } ++ op0 = force_reg (mode, op0); ++ if (!address_operand (op1, VOIDmode)) ++ { ++ op2 = convert_memory_address (Pmode, op1); ++ op1 = copy_addr_to_reg (op2); ++ } ++ emit_insn (GEN_FCN (icode) (op0, gen_rtx_MEM (mode, op1))); ++ return 0; ++ ++ default: ++ break; ++ } ++ ++ if (fcode >= IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST ++ && fcode <= IX86_BUILTIN__BDESC_SPECIAL_ARGS_LAST) ++ { ++ i = fcode - IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST; ++ return ix86_expand_special_args_builtin (bdesc_special_args + i, exp, ++ target); ++ } ++ ++ if (fcode >= IX86_BUILTIN__BDESC_ARGS_FIRST ++ && fcode <= IX86_BUILTIN__BDESC_ARGS_LAST) ++ { ++ i = fcode - IX86_BUILTIN__BDESC_ARGS_FIRST; ++ rtx (*fcn) (rtx, rtx, rtx, rtx) = NULL; ++ rtx (*fcn_mask) (rtx, rtx, rtx, rtx, rtx); ++ rtx (*fcn_maskz) (rtx, rtx, rtx, rtx, rtx, rtx); ++ int masked = 1; ++ machine_mode mode, wide_mode, nar_mode; ++ ++ nar_mode = V4SFmode; ++ mode = V16SFmode; ++ wide_mode = V64SFmode; ++ fcn_mask = gen_avx5124fmaddps_4fmaddps_mask; ++ fcn_maskz = gen_avx5124fmaddps_4fmaddps_maskz; ++ ++ switch (fcode) ++ { ++ case IX86_BUILTIN_4FMAPS: ++ fcn = gen_avx5124fmaddps_4fmaddps; ++ masked = 0; ++ goto v4fma_expand; ++ ++ case IX86_BUILTIN_4DPWSSD: ++ nar_mode = V4SImode; ++ mode = V16SImode; ++ wide_mode = V64SImode; ++ fcn = gen_avx5124vnniw_vp4dpwssd; ++ masked = 0; ++ goto v4fma_expand; ++ ++ case IX86_BUILTIN_4DPWSSDS: ++ nar_mode = V4SImode; ++ mode = V16SImode; ++ wide_mode = V64SImode; ++ fcn = gen_avx5124vnniw_vp4dpwssds; ++ masked = 0; ++ goto v4fma_expand; ++ ++ case IX86_BUILTIN_4FNMAPS: ++ fcn = gen_avx5124fmaddps_4fnmaddps; ++ masked = 0; ++ goto v4fma_expand; ++ ++ case IX86_BUILTIN_4FNMAPS_MASK: ++ fcn_mask = gen_avx5124fmaddps_4fnmaddps_mask; ++ fcn_maskz = gen_avx5124fmaddps_4fnmaddps_maskz; ++ goto v4fma_expand; ++ ++ case IX86_BUILTIN_4DPWSSD_MASK: ++ nar_mode = V4SImode; ++ mode = V16SImode; ++ wide_mode = V64SImode; ++ fcn_mask = gen_avx5124vnniw_vp4dpwssd_mask; ++ fcn_maskz = gen_avx5124vnniw_vp4dpwssd_maskz; ++ goto v4fma_expand; ++ ++ case IX86_BUILTIN_4DPWSSDS_MASK: ++ nar_mode = V4SImode; ++ mode = V16SImode; ++ wide_mode = V64SImode; ++ fcn_mask = gen_avx5124vnniw_vp4dpwssds_mask; ++ fcn_maskz = gen_avx5124vnniw_vp4dpwssds_maskz; ++ goto v4fma_expand; ++ ++ case IX86_BUILTIN_4FMAPS_MASK: ++ { ++ tree args[4]; ++ rtx ops[4]; ++ rtx wide_reg; ++ rtx accum; ++ rtx addr; ++ rtx mem; ++ ++v4fma_expand: ++ wide_reg = gen_reg_rtx (wide_mode); ++ for (i = 0; i < 4; i++) ++ { ++ args[i] = CALL_EXPR_ARG (exp, i); ++ ops[i] = expand_normal (args[i]); ++ ++ emit_move_insn (gen_rtx_SUBREG (mode, wide_reg, i * 64), ++ ops[i]); ++ } ++ ++ accum = expand_normal (CALL_EXPR_ARG (exp, 4)); ++ accum = force_reg (mode, accum); ++ ++ addr = expand_normal (CALL_EXPR_ARG (exp, 5)); ++ addr = force_reg (Pmode, addr); ++ ++ mem = gen_rtx_MEM (nar_mode, addr); ++ ++ target = gen_reg_rtx (mode); ++ ++ emit_move_insn (target, accum); ++ ++ if (! masked) ++ emit_insn (fcn (target, accum, wide_reg, mem)); ++ else ++ { ++ rtx merge, mask; ++ merge = expand_normal (CALL_EXPR_ARG (exp, 6)); ++ ++ mask = expand_normal (CALL_EXPR_ARG (exp, 7)); ++ ++ if (CONST_INT_P (mask)) ++ mask = fixup_modeless_constant (mask, HImode); ++ ++ mask = force_reg (HImode, mask); ++ ++ if (GET_MODE (mask) != HImode) ++ mask = gen_rtx_SUBREG (HImode, mask, 0); ++ ++ /* If merge is 0 then we're about to emit z-masked variant. */ ++ if (const0_operand (merge, mode)) ++ emit_insn (fcn_maskz (target, accum, wide_reg, mem, merge, mask)); ++ /* If merge is the same as accum then emit merge-masked variant. */ ++ else if (CALL_EXPR_ARG (exp, 6) == CALL_EXPR_ARG (exp, 4)) ++ { ++ merge = force_reg (mode, merge); ++ emit_insn (fcn_mask (target, wide_reg, mem, merge, mask)); ++ } ++ /* Merge with something unknown might happen if we z-mask w/ -O0. */ ++ else ++ { ++ target = gen_reg_rtx (mode); ++ emit_move_insn (target, merge); ++ emit_insn (fcn_mask (target, wide_reg, mem, target, mask)); ++ } ++ } ++ return target; ++ } ++ ++ case IX86_BUILTIN_4FNMASS: ++ fcn = gen_avx5124fmaddps_4fnmaddss; ++ masked = 0; ++ goto s4fma_expand; ++ ++ case IX86_BUILTIN_4FMASS: ++ fcn = gen_avx5124fmaddps_4fmaddss; ++ masked = 0; ++ goto s4fma_expand; ++ ++ case IX86_BUILTIN_4FNMASS_MASK: ++ fcn_mask = gen_avx5124fmaddps_4fnmaddss_mask; ++ fcn_maskz = gen_avx5124fmaddps_4fnmaddss_maskz; ++ goto s4fma_expand; ++ ++ case IX86_BUILTIN_4FMASS_MASK: ++ { ++ tree args[4]; ++ rtx ops[4]; ++ rtx wide_reg; ++ rtx accum; ++ rtx addr; ++ rtx mem; ++ ++ fcn_mask = gen_avx5124fmaddps_4fmaddss_mask; ++ fcn_maskz = gen_avx5124fmaddps_4fmaddss_maskz; ++ ++s4fma_expand: ++ mode = V4SFmode; ++ wide_reg = gen_reg_rtx (V64SFmode); ++ for (i = 0; i < 4; i++) ++ { ++ rtx tmp; ++ args[i] = CALL_EXPR_ARG (exp, i); ++ ops[i] = expand_normal (args[i]); ++ ++ tmp = gen_reg_rtx (SFmode); ++ emit_move_insn (tmp, gen_rtx_SUBREG (SFmode, ops[i], 0)); ++ ++ emit_move_insn (gen_rtx_SUBREG (V16SFmode, wide_reg, i * 64), ++ gen_rtx_SUBREG (V16SFmode, tmp, 0)); ++ } ++ ++ accum = expand_normal (CALL_EXPR_ARG (exp, 4)); ++ accum = force_reg (V4SFmode, accum); ++ ++ addr = expand_normal (CALL_EXPR_ARG (exp, 5)); ++ addr = force_reg (Pmode, addr); ++ ++ mem = gen_rtx_MEM (V4SFmode, addr); ++ ++ target = gen_reg_rtx (V4SFmode); ++ ++ emit_move_insn (target, accum); ++ ++ if (! masked) ++ emit_insn (fcn (target, accum, wide_reg, mem)); ++ else ++ { ++ rtx merge, mask; ++ merge = expand_normal (CALL_EXPR_ARG (exp, 6)); ++ ++ mask = expand_normal (CALL_EXPR_ARG (exp, 7)); ++ ++ if (CONST_INT_P (mask)) ++ mask = fixup_modeless_constant (mask, QImode); ++ ++ mask = force_reg (QImode, mask); ++ ++ if (GET_MODE (mask) != QImode) ++ mask = gen_rtx_SUBREG (QImode, mask, 0); ++ ++ /* If merge is 0 then we're about to emit z-masked variant. */ ++ if (const0_operand (merge, mode)) ++ emit_insn (fcn_maskz (target, accum, wide_reg, mem, merge, mask)); ++ /* If merge is the same as accum then emit merge-masked ++ variant. */ ++ else if (CALL_EXPR_ARG (exp, 6) == CALL_EXPR_ARG (exp, 4)) ++ { ++ merge = force_reg (mode, merge); ++ emit_insn (fcn_mask (target, wide_reg, mem, merge, mask)); ++ } ++ /* Merge with something unknown might happen if we z-mask ++ w/ -O0. */ ++ else ++ { ++ target = gen_reg_rtx (mode); ++ emit_move_insn (target, merge); ++ emit_insn (fcn_mask (target, wide_reg, mem, target, mask)); ++ } ++ } ++ return target; ++ } ++ case IX86_BUILTIN_RDPID: ++ return ix86_expand_special_args_builtin (bdesc_args + i, exp, ++ target); ++ case IX86_BUILTIN_FABSQ: ++ case IX86_BUILTIN_COPYSIGNQ: ++ if (!TARGET_SSE) ++ /* Emit a normal call if SSE isn't available. */ ++ return expand_call (exp, target, ignore); ++ /* FALLTHRU */ ++ default: ++ return ix86_expand_args_builtin (bdesc_args + i, exp, target); ++ } ++ } ++ ++ if (fcode >= IX86_BUILTIN__BDESC_COMI_FIRST ++ && fcode <= IX86_BUILTIN__BDESC_COMI_LAST) ++ { ++ i = fcode - IX86_BUILTIN__BDESC_COMI_FIRST; ++ return ix86_expand_sse_comi (bdesc_comi + i, exp, target); ++ } ++ ++ if (fcode >= IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST ++ && fcode <= IX86_BUILTIN__BDESC_ROUND_ARGS_LAST) ++ { ++ i = fcode - IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST; ++ return ix86_expand_round_builtin (bdesc_round_args + i, exp, target); ++ } ++ ++ if (fcode >= IX86_BUILTIN__BDESC_PCMPESTR_FIRST ++ && fcode <= IX86_BUILTIN__BDESC_PCMPESTR_LAST) ++ { ++ i = fcode - IX86_BUILTIN__BDESC_PCMPESTR_FIRST; ++ return ix86_expand_sse_pcmpestr (bdesc_pcmpestr + i, exp, target); ++ } ++ ++ if (fcode >= IX86_BUILTIN__BDESC_PCMPISTR_FIRST ++ && fcode <= IX86_BUILTIN__BDESC_PCMPISTR_LAST) ++ { ++ i = fcode - IX86_BUILTIN__BDESC_PCMPISTR_FIRST; ++ return ix86_expand_sse_pcmpistr (bdesc_pcmpistr + i, exp, target); ++ } ++ ++ if (fcode >= IX86_BUILTIN__BDESC_MULTI_ARG_FIRST ++ && fcode <= IX86_BUILTIN__BDESC_MULTI_ARG_LAST) ++ { ++ i = fcode - IX86_BUILTIN__BDESC_MULTI_ARG_FIRST; ++ const struct builtin_description *d = bdesc_multi_arg + i; ++ return ix86_expand_multi_arg_builtin (d->icode, exp, target, ++ (enum ix86_builtin_func_type) ++ d->flag, d->comparison); ++ } ++ ++ if (fcode >= IX86_BUILTIN__BDESC_CET_FIRST ++ && fcode <= IX86_BUILTIN__BDESC_CET_LAST) ++ { ++ i = fcode - IX86_BUILTIN__BDESC_CET_FIRST; ++ return ix86_expand_special_args_builtin (bdesc_cet + i, exp, ++ target); ++ } ++ ++ if (fcode >= IX86_BUILTIN__BDESC_CET_NORMAL_FIRST ++ && fcode <= IX86_BUILTIN__BDESC_CET_NORMAL_LAST) ++ { ++ i = fcode - IX86_BUILTIN__BDESC_CET_NORMAL_FIRST; ++ return ix86_expand_special_args_builtin (bdesc_cet_rdssp + i, exp, ++ target); ++ } ++ ++ gcc_unreachable (); ++} ++ ++/* A subroutine of ix86_expand_vector_init_duplicate. Tries to ++ fill target with val via vec_duplicate. */ ++ ++static bool ++ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val) ++{ ++ bool ok; ++ rtx_insn *insn; ++ rtx dup; ++ ++ /* First attempt to recognize VAL as-is. */ ++ dup = gen_vec_duplicate (mode, val); ++ insn = emit_insn (gen_rtx_SET (target, dup)); ++ if (recog_memoized (insn) < 0) ++ { ++ rtx_insn *seq; ++ machine_mode innermode = GET_MODE_INNER (mode); ++ rtx reg; ++ ++ /* If that fails, force VAL into a register. */ ++ ++ start_sequence (); ++ reg = force_reg (innermode, val); ++ if (GET_MODE (reg) != innermode) ++ reg = gen_lowpart (innermode, reg); ++ SET_SRC (PATTERN (insn)) = gen_vec_duplicate (mode, reg); ++ seq = get_insns (); ++ end_sequence (); ++ if (seq) ++ emit_insn_before (seq, insn); ++ ++ ok = recog_memoized (insn) >= 0; ++ gcc_assert (ok); ++ } ++ return true; ++} ++ ++/* Get a vector mode of the same size as the original but with elements ++ twice as wide. This is only guaranteed to apply to integral vectors. */ ++ ++static machine_mode ++get_mode_wider_vector (machine_mode o) ++{ ++ /* ??? Rely on the ordering that genmodes.c gives to vectors. */ ++ machine_mode n = GET_MODE_WIDER_MODE (o).require (); ++ gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2); ++ gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n)); ++ return n; ++} ++ ++static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d); ++static bool expand_vec_perm_1 (struct expand_vec_perm_d *d); ++ ++/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector ++ with all elements equal to VAR. Return true if successful. */ ++ ++static bool ++ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode, ++ rtx target, rtx val) ++{ ++ bool ok; ++ ++ switch (mode) ++ { ++ case E_V2SImode: ++ case E_V2SFmode: ++ if (!mmx_ok) ++ return false; ++ /* FALLTHRU */ ++ ++ case E_V4DFmode: ++ case E_V4DImode: ++ case E_V8SFmode: ++ case E_V8SImode: ++ case E_V2DFmode: ++ case E_V2DImode: ++ case E_V4SFmode: ++ case E_V4SImode: ++ case E_V16SImode: ++ case E_V8DImode: ++ case E_V16SFmode: ++ case E_V8DFmode: ++ return ix86_vector_duplicate_value (mode, target, val); ++ ++ case E_V4HImode: ++ if (!mmx_ok) ++ return false; ++ if (TARGET_SSE || TARGET_3DNOW_A) ++ { ++ rtx x; ++ ++ val = gen_lowpart (SImode, val); ++ x = gen_rtx_TRUNCATE (HImode, val); ++ x = gen_rtx_VEC_DUPLICATE (mode, x); ++ emit_insn (gen_rtx_SET (target, x)); ++ return true; ++ } ++ goto widen; ++ ++ case E_V8QImode: ++ if (!mmx_ok) ++ return false; ++ goto widen; ++ ++ case E_V8HImode: ++ if (TARGET_AVX2) ++ return ix86_vector_duplicate_value (mode, target, val); ++ ++ if (TARGET_SSE2) ++ { ++ struct expand_vec_perm_d dperm; ++ rtx tmp1, tmp2; ++ ++ permute: ++ memset (&dperm, 0, sizeof (dperm)); ++ dperm.target = target; ++ dperm.vmode = mode; ++ dperm.nelt = GET_MODE_NUNITS (mode); ++ dperm.op0 = dperm.op1 = gen_reg_rtx (mode); ++ dperm.one_operand_p = true; ++ ++ /* Extend to SImode using a paradoxical SUBREG. */ ++ tmp1 = gen_reg_rtx (SImode); ++ emit_move_insn (tmp1, gen_lowpart (SImode, val)); ++ ++ /* Insert the SImode value as low element of a V4SImode vector. */ ++ tmp2 = gen_reg_rtx (V4SImode); ++ emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1)); ++ emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2)); ++ ++ ok = (expand_vec_perm_1 (&dperm) ++ || expand_vec_perm_broadcast_1 (&dperm)); ++ gcc_assert (ok); ++ return ok; ++ } ++ goto widen; ++ ++ case E_V16QImode: ++ if (TARGET_AVX2) ++ return ix86_vector_duplicate_value (mode, target, val); ++ ++ if (TARGET_SSE2) ++ goto permute; ++ goto widen; ++ ++ widen: ++ /* Replicate the value once into the next wider mode and recurse. */ ++ { ++ machine_mode smode, wsmode, wvmode; ++ rtx x; ++ ++ smode = GET_MODE_INNER (mode); ++ wvmode = get_mode_wider_vector (mode); ++ wsmode = GET_MODE_INNER (wvmode); ++ ++ val = convert_modes (wsmode, smode, val, true); ++ x = expand_simple_binop (wsmode, ASHIFT, val, ++ GEN_INT (GET_MODE_BITSIZE (smode)), ++ NULL_RTX, 1, OPTAB_LIB_WIDEN); ++ val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN); ++ ++ x = gen_reg_rtx (wvmode); ++ ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val); ++ gcc_assert (ok); ++ emit_move_insn (target, gen_lowpart (GET_MODE (target), x)); ++ return ok; ++ } ++ ++ case E_V16HImode: ++ case E_V32QImode: ++ if (TARGET_AVX2) ++ return ix86_vector_duplicate_value (mode, target, val); ++ else ++ { ++ machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode); ++ rtx x = gen_reg_rtx (hvmode); ++ ++ ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val); ++ gcc_assert (ok); ++ ++ x = gen_rtx_VEC_CONCAT (mode, x, x); ++ emit_insn (gen_rtx_SET (target, x)); ++ } ++ return true; ++ ++ case E_V64QImode: ++ case E_V32HImode: ++ if (TARGET_AVX512BW) ++ return ix86_vector_duplicate_value (mode, target, val); ++ else ++ { ++ machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode); ++ rtx x = gen_reg_rtx (hvmode); ++ ++ ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val); ++ gcc_assert (ok); ++ ++ x = gen_rtx_VEC_CONCAT (mode, x, x); ++ emit_insn (gen_rtx_SET (target, x)); ++ } ++ return true; ++ ++ default: ++ return false; ++ } ++} ++ ++/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector ++ whose ONE_VAR element is VAR, and other elements are zero. Return true ++ if successful. */ ++ ++static bool ++ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode, ++ rtx target, rtx var, int one_var) ++{ ++ machine_mode vsimode; ++ rtx new_target; ++ rtx x, tmp; ++ bool use_vector_set = false; ++ rtx (*gen_vec_set_0) (rtx, rtx, rtx) = NULL; ++ ++ switch (mode) ++ { ++ case E_V2DImode: ++ /* For SSE4.1, we normally use vector set. But if the second ++ element is zero and inter-unit moves are OK, we use movq ++ instead. */ ++ use_vector_set = (TARGET_64BIT && TARGET_SSE4_1 ++ && !(TARGET_INTER_UNIT_MOVES_TO_VEC ++ && one_var == 0)); ++ break; ++ case E_V16QImode: ++ case E_V4SImode: ++ case E_V4SFmode: ++ use_vector_set = TARGET_SSE4_1; ++ break; ++ case E_V8HImode: ++ use_vector_set = TARGET_SSE2; ++ break; ++ case E_V4HImode: ++ use_vector_set = TARGET_SSE || TARGET_3DNOW_A; ++ break; ++ case E_V32QImode: ++ case E_V16HImode: ++ use_vector_set = TARGET_AVX; ++ break; ++ case E_V8SImode: ++ use_vector_set = TARGET_AVX; ++ gen_vec_set_0 = gen_vec_setv8si_0; ++ break; ++ case E_V8SFmode: ++ use_vector_set = TARGET_AVX; ++ gen_vec_set_0 = gen_vec_setv8sf_0; ++ break; ++ case E_V4DFmode: ++ use_vector_set = TARGET_AVX; ++ gen_vec_set_0 = gen_vec_setv4df_0; ++ break; ++ case E_V4DImode: ++ /* Use ix86_expand_vector_set in 64bit mode only. */ ++ use_vector_set = TARGET_AVX && TARGET_64BIT; ++ gen_vec_set_0 = gen_vec_setv4di_0; ++ break; ++ case E_V16SImode: ++ use_vector_set = TARGET_AVX512F && one_var == 0; ++ gen_vec_set_0 = gen_vec_setv16si_0; ++ break; ++ case E_V16SFmode: ++ use_vector_set = TARGET_AVX512F && one_var == 0; ++ gen_vec_set_0 = gen_vec_setv16sf_0; ++ break; ++ case E_V8DFmode: ++ use_vector_set = TARGET_AVX512F && one_var == 0; ++ gen_vec_set_0 = gen_vec_setv8df_0; ++ break; ++ case E_V8DImode: ++ /* Use ix86_expand_vector_set in 64bit mode only. */ ++ use_vector_set = TARGET_AVX512F && TARGET_64BIT && one_var == 0; ++ gen_vec_set_0 = gen_vec_setv8di_0; ++ break; ++ default: ++ break; ++ } ++ ++ if (use_vector_set) ++ { ++ if (gen_vec_set_0 && one_var == 0) ++ { ++ var = force_reg (GET_MODE_INNER (mode), var); ++ emit_insn (gen_vec_set_0 (target, CONST0_RTX (mode), var)); ++ return true; ++ } ++ emit_insn (gen_rtx_SET (target, CONST0_RTX (mode))); ++ var = force_reg (GET_MODE_INNER (mode), var); ++ ix86_expand_vector_set (mmx_ok, target, var, one_var); ++ return true; ++ } ++ ++ switch (mode) ++ { ++ case E_V2SFmode: ++ case E_V2SImode: ++ if (!mmx_ok) ++ return false; ++ /* FALLTHRU */ ++ ++ case E_V2DFmode: ++ case E_V2DImode: ++ if (one_var != 0) ++ return false; ++ var = force_reg (GET_MODE_INNER (mode), var); ++ x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode))); ++ emit_insn (gen_rtx_SET (target, x)); ++ return true; ++ ++ case E_V4SFmode: ++ case E_V4SImode: ++ if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER) ++ new_target = gen_reg_rtx (mode); ++ else ++ new_target = target; ++ var = force_reg (GET_MODE_INNER (mode), var); ++ x = gen_rtx_VEC_DUPLICATE (mode, var); ++ x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx); ++ emit_insn (gen_rtx_SET (new_target, x)); ++ if (one_var != 0) ++ { ++ /* We need to shuffle the value to the correct position, so ++ create a new pseudo to store the intermediate result. */ ++ ++ /* With SSE2, we can use the integer shuffle insns. */ ++ if (mode != V4SFmode && TARGET_SSE2) ++ { ++ emit_insn (gen_sse2_pshufd_1 (new_target, new_target, ++ const1_rtx, ++ GEN_INT (one_var == 1 ? 0 : 1), ++ GEN_INT (one_var == 2 ? 0 : 1), ++ GEN_INT (one_var == 3 ? 0 : 1))); ++ if (target != new_target) ++ emit_move_insn (target, new_target); ++ return true; ++ } ++ ++ /* Otherwise convert the intermediate result to V4SFmode and ++ use the SSE1 shuffle instructions. */ ++ if (mode != V4SFmode) ++ { ++ tmp = gen_reg_rtx (V4SFmode); ++ emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target)); ++ } ++ else ++ tmp = new_target; ++ ++ emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp, ++ const1_rtx, ++ GEN_INT (one_var == 1 ? 0 : 1), ++ GEN_INT (one_var == 2 ? 0+4 : 1+4), ++ GEN_INT (one_var == 3 ? 0+4 : 1+4))); ++ ++ if (mode != V4SFmode) ++ emit_move_insn (target, gen_lowpart (V4SImode, tmp)); ++ else if (tmp != target) ++ emit_move_insn (target, tmp); ++ } ++ else if (target != new_target) ++ emit_move_insn (target, new_target); ++ return true; ++ ++ case E_V8HImode: ++ case E_V16QImode: ++ vsimode = V4SImode; ++ goto widen; ++ case E_V4HImode: ++ case E_V8QImode: ++ if (!mmx_ok) ++ return false; ++ vsimode = V2SImode; ++ goto widen; ++ widen: ++ if (one_var != 0) ++ return false; ++ ++ /* Zero extend the variable element to SImode and recurse. */ ++ var = convert_modes (SImode, GET_MODE_INNER (mode), var, true); ++ ++ x = gen_reg_rtx (vsimode); ++ if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x, ++ var, one_var)) ++ gcc_unreachable (); ++ ++ emit_move_insn (target, gen_lowpart (mode, x)); ++ return true; ++ ++ default: ++ return false; ++ } ++} ++ ++/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector ++ consisting of the values in VALS. It is known that all elements ++ except ONE_VAR are constants. Return true if successful. */ ++ ++static bool ++ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode, ++ rtx target, rtx vals, int one_var) ++{ ++ rtx var = XVECEXP (vals, 0, one_var); ++ machine_mode wmode; ++ rtx const_vec, x; ++ ++ const_vec = copy_rtx (vals); ++ XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode)); ++ const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0)); ++ ++ switch (mode) ++ { ++ case E_V2DFmode: ++ case E_V2DImode: ++ case E_V2SFmode: ++ case E_V2SImode: ++ /* For the two element vectors, it's just as easy to use ++ the general case. */ ++ return false; ++ ++ case E_V4DImode: ++ /* Use ix86_expand_vector_set in 64bit mode only. */ ++ if (!TARGET_64BIT) ++ return false; ++ /* FALLTHRU */ ++ case E_V4DFmode: ++ case E_V8SFmode: ++ case E_V8SImode: ++ case E_V16HImode: ++ case E_V32QImode: ++ case E_V4SFmode: ++ case E_V4SImode: ++ case E_V8HImode: ++ case E_V4HImode: ++ break; ++ ++ case E_V16QImode: ++ if (TARGET_SSE4_1) ++ break; ++ wmode = V8HImode; ++ goto widen; ++ case E_V8QImode: ++ wmode = V4HImode; ++ goto widen; ++ widen: ++ /* There's no way to set one QImode entry easily. Combine ++ the variable value with its adjacent constant value, and ++ promote to an HImode set. */ ++ x = XVECEXP (vals, 0, one_var ^ 1); ++ if (one_var & 1) ++ { ++ var = convert_modes (HImode, QImode, var, true); ++ var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8), ++ NULL_RTX, 1, OPTAB_LIB_WIDEN); ++ x = GEN_INT (INTVAL (x) & 0xff); ++ } ++ else ++ { ++ var = convert_modes (HImode, QImode, var, true); ++ x = gen_int_mode (UINTVAL (x) << 8, HImode); ++ } ++ if (x != const0_rtx) ++ var = expand_simple_binop (HImode, IOR, var, x, var, ++ 1, OPTAB_LIB_WIDEN); ++ ++ x = gen_reg_rtx (wmode); ++ emit_move_insn (x, gen_lowpart (wmode, const_vec)); ++ ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1); ++ ++ emit_move_insn (target, gen_lowpart (mode, x)); ++ return true; ++ ++ default: ++ return false; ++ } ++ ++ emit_move_insn (target, const_vec); ++ ix86_expand_vector_set (mmx_ok, target, var, one_var); ++ return true; ++} ++ ++/* A subroutine of ix86_expand_vector_init_general. Use vector ++ concatenate to handle the most general case: all values variable, ++ and none identical. */ ++ ++static void ++ix86_expand_vector_init_concat (machine_mode mode, ++ rtx target, rtx *ops, int n) ++{ ++ machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode; ++ rtx first[16], second[8], third[4]; ++ rtvec v; ++ int i, j; ++ ++ switch (n) ++ { ++ case 2: ++ switch (mode) ++ { ++ case E_V16SImode: ++ cmode = V8SImode; ++ break; ++ case E_V16SFmode: ++ cmode = V8SFmode; ++ break; ++ case E_V8DImode: ++ cmode = V4DImode; ++ break; ++ case E_V8DFmode: ++ cmode = V4DFmode; ++ break; ++ case E_V8SImode: ++ cmode = V4SImode; ++ break; ++ case E_V8SFmode: ++ cmode = V4SFmode; ++ break; ++ case E_V4DImode: ++ cmode = V2DImode; ++ break; ++ case E_V4DFmode: ++ cmode = V2DFmode; ++ break; ++ case E_V4SImode: ++ cmode = V2SImode; ++ break; ++ case E_V4SFmode: ++ cmode = V2SFmode; ++ break; ++ case E_V2DImode: ++ cmode = DImode; ++ break; ++ case E_V2SImode: ++ cmode = SImode; ++ break; ++ case E_V2DFmode: ++ cmode = DFmode; ++ break; ++ case E_V2SFmode: ++ cmode = SFmode; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ ++ if (!register_operand (ops[1], cmode)) ++ ops[1] = force_reg (cmode, ops[1]); ++ if (!register_operand (ops[0], cmode)) ++ ops[0] = force_reg (cmode, ops[0]); ++ emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, ops[0], ++ ops[1]))); ++ break; ++ ++ case 4: ++ switch (mode) ++ { ++ case E_V4DImode: ++ cmode = V2DImode; ++ break; ++ case E_V4DFmode: ++ cmode = V2DFmode; ++ break; ++ case E_V4SImode: ++ cmode = V2SImode; ++ break; ++ case E_V4SFmode: ++ cmode = V2SFmode; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ goto half; ++ ++ case 8: ++ switch (mode) ++ { ++ case E_V8DImode: ++ cmode = V2DImode; ++ hmode = V4DImode; ++ break; ++ case E_V8DFmode: ++ cmode = V2DFmode; ++ hmode = V4DFmode; ++ break; ++ case E_V8SImode: ++ cmode = V2SImode; ++ hmode = V4SImode; ++ break; ++ case E_V8SFmode: ++ cmode = V2SFmode; ++ hmode = V4SFmode; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ goto half; ++ ++ case 16: ++ switch (mode) ++ { ++ case E_V16SImode: ++ cmode = V2SImode; ++ hmode = V4SImode; ++ gmode = V8SImode; ++ break; ++ case E_V16SFmode: ++ cmode = V2SFmode; ++ hmode = V4SFmode; ++ gmode = V8SFmode; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ goto half; ++ ++half: ++ /* FIXME: We process inputs backward to help RA. PR 36222. */ ++ i = n - 1; ++ j = (n >> 1) - 1; ++ for (; i > 0; i -= 2, j--) ++ { ++ first[j] = gen_reg_rtx (cmode); ++ v = gen_rtvec (2, ops[i - 1], ops[i]); ++ ix86_expand_vector_init (false, first[j], ++ gen_rtx_PARALLEL (cmode, v)); ++ } ++ ++ n >>= 1; ++ if (n > 4) ++ { ++ gcc_assert (hmode != VOIDmode); ++ gcc_assert (gmode != VOIDmode); ++ for (i = j = 0; i < n; i += 2, j++) ++ { ++ second[j] = gen_reg_rtx (hmode); ++ ix86_expand_vector_init_concat (hmode, second [j], ++ &first [i], 2); ++ } ++ n >>= 1; ++ for (i = j = 0; i < n; i += 2, j++) ++ { ++ third[j] = gen_reg_rtx (gmode); ++ ix86_expand_vector_init_concat (gmode, third[j], ++ &second[i], 2); ++ } ++ n >>= 1; ++ ix86_expand_vector_init_concat (mode, target, third, n); ++ } ++ else if (n > 2) ++ { ++ gcc_assert (hmode != VOIDmode); ++ for (i = j = 0; i < n; i += 2, j++) ++ { ++ second[j] = gen_reg_rtx (hmode); ++ ix86_expand_vector_init_concat (hmode, second [j], ++ &first [i], 2); ++ } ++ n >>= 1; ++ ix86_expand_vector_init_concat (mode, target, second, n); ++ } ++ else ++ ix86_expand_vector_init_concat (mode, target, first, n); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++} ++ ++/* A subroutine of ix86_expand_vector_init_general. Use vector ++ interleave to handle the most general case: all values variable, ++ and none identical. */ ++ ++static void ++ix86_expand_vector_init_interleave (machine_mode mode, ++ rtx target, rtx *ops, int n) ++{ ++ machine_mode first_imode, second_imode, third_imode, inner_mode; ++ int i, j; ++ rtx op0, op1; ++ rtx (*gen_load_even) (rtx, rtx, rtx); ++ rtx (*gen_interleave_first_low) (rtx, rtx, rtx); ++ rtx (*gen_interleave_second_low) (rtx, rtx, rtx); ++ ++ switch (mode) ++ { ++ case E_V8HImode: ++ gen_load_even = gen_vec_setv8hi; ++ gen_interleave_first_low = gen_vec_interleave_lowv4si; ++ gen_interleave_second_low = gen_vec_interleave_lowv2di; ++ inner_mode = HImode; ++ first_imode = V4SImode; ++ second_imode = V2DImode; ++ third_imode = VOIDmode; ++ break; ++ case E_V16QImode: ++ gen_load_even = gen_vec_setv16qi; ++ gen_interleave_first_low = gen_vec_interleave_lowv8hi; ++ gen_interleave_second_low = gen_vec_interleave_lowv4si; ++ inner_mode = QImode; ++ first_imode = V8HImode; ++ second_imode = V4SImode; ++ third_imode = V2DImode; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ ++ for (i = 0; i < n; i++) ++ { ++ /* Extend the odd elment to SImode using a paradoxical SUBREG. */ ++ op0 = gen_reg_rtx (SImode); ++ emit_move_insn (op0, gen_lowpart (SImode, ops [i + i])); ++ ++ /* Insert the SImode value as low element of V4SImode vector. */ ++ op1 = gen_reg_rtx (V4SImode); ++ op0 = gen_rtx_VEC_MERGE (V4SImode, ++ gen_rtx_VEC_DUPLICATE (V4SImode, ++ op0), ++ CONST0_RTX (V4SImode), ++ const1_rtx); ++ emit_insn (gen_rtx_SET (op1, op0)); ++ ++ /* Cast the V4SImode vector back to a vector in orignal mode. */ ++ op0 = gen_reg_rtx (mode); ++ emit_move_insn (op0, gen_lowpart (mode, op1)); ++ ++ /* Load even elements into the second position. */ ++ emit_insn (gen_load_even (op0, ++ force_reg (inner_mode, ++ ops [i + i + 1]), ++ const1_rtx)); ++ ++ /* Cast vector to FIRST_IMODE vector. */ ++ ops[i] = gen_reg_rtx (first_imode); ++ emit_move_insn (ops[i], gen_lowpart (first_imode, op0)); ++ } ++ ++ /* Interleave low FIRST_IMODE vectors. */ ++ for (i = j = 0; i < n; i += 2, j++) ++ { ++ op0 = gen_reg_rtx (first_imode); ++ emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1])); ++ ++ /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */ ++ ops[j] = gen_reg_rtx (second_imode); ++ emit_move_insn (ops[j], gen_lowpart (second_imode, op0)); ++ } ++ ++ /* Interleave low SECOND_IMODE vectors. */ ++ switch (second_imode) ++ { ++ case E_V4SImode: ++ for (i = j = 0; i < n / 2; i += 2, j++) ++ { ++ op0 = gen_reg_rtx (second_imode); ++ emit_insn (gen_interleave_second_low (op0, ops[i], ++ ops[i + 1])); ++ ++ /* Cast the SECOND_IMODE vector to the THIRD_IMODE ++ vector. */ ++ ops[j] = gen_reg_rtx (third_imode); ++ emit_move_insn (ops[j], gen_lowpart (third_imode, op0)); ++ } ++ second_imode = V2DImode; ++ gen_interleave_second_low = gen_vec_interleave_lowv2di; ++ /* FALLTHRU */ ++ ++ case E_V2DImode: ++ op0 = gen_reg_rtx (second_imode); ++ emit_insn (gen_interleave_second_low (op0, ops[0], ++ ops[1])); ++ ++ /* Cast the SECOND_IMODE vector back to a vector on original ++ mode. */ ++ emit_insn (gen_rtx_SET (target, gen_lowpart (mode, op0))); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++} ++ ++/* A subroutine of ix86_expand_vector_init. Handle the most general case: ++ all values variable, and none identical. */ ++ ++static void ++ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode, ++ rtx target, rtx vals) ++{ ++ rtx ops[64], op0, op1, op2, op3, op4, op5; ++ machine_mode half_mode = VOIDmode; ++ machine_mode quarter_mode = VOIDmode; ++ int n, i; ++ ++ switch (mode) ++ { ++ case E_V2SFmode: ++ case E_V2SImode: ++ if (!mmx_ok && !TARGET_SSE) ++ break; ++ /* FALLTHRU */ ++ ++ case E_V16SImode: ++ case E_V16SFmode: ++ case E_V8DFmode: ++ case E_V8DImode: ++ case E_V8SFmode: ++ case E_V8SImode: ++ case E_V4DFmode: ++ case E_V4DImode: ++ case E_V4SFmode: ++ case E_V4SImode: ++ case E_V2DFmode: ++ case E_V2DImode: ++ n = GET_MODE_NUNITS (mode); ++ for (i = 0; i < n; i++) ++ ops[i] = XVECEXP (vals, 0, i); ++ ix86_expand_vector_init_concat (mode, target, ops, n); ++ return; ++ ++ case E_V2TImode: ++ for (i = 0; i < 2; i++) ++ ops[i] = gen_lowpart (V2DImode, XVECEXP (vals, 0, i)); ++ op0 = gen_reg_rtx (V4DImode); ++ ix86_expand_vector_init_concat (V4DImode, op0, ops, 2); ++ emit_move_insn (target, gen_lowpart (GET_MODE (target), op0)); ++ return; ++ ++ case E_V4TImode: ++ for (i = 0; i < 4; i++) ++ ops[i] = gen_lowpart (V2DImode, XVECEXP (vals, 0, i)); ++ ops[4] = gen_reg_rtx (V4DImode); ++ ix86_expand_vector_init_concat (V4DImode, ops[4], ops, 2); ++ ops[5] = gen_reg_rtx (V4DImode); ++ ix86_expand_vector_init_concat (V4DImode, ops[5], ops + 2, 2); ++ op0 = gen_reg_rtx (V8DImode); ++ ix86_expand_vector_init_concat (V8DImode, op0, ops + 4, 2); ++ emit_move_insn (target, gen_lowpart (GET_MODE (target), op0)); ++ return; ++ ++ case E_V32QImode: ++ half_mode = V16QImode; ++ goto half; ++ ++ case E_V16HImode: ++ half_mode = V8HImode; ++ goto half; ++ ++half: ++ n = GET_MODE_NUNITS (mode); ++ for (i = 0; i < n; i++) ++ ops[i] = XVECEXP (vals, 0, i); ++ op0 = gen_reg_rtx (half_mode); ++ op1 = gen_reg_rtx (half_mode); ++ ix86_expand_vector_init_interleave (half_mode, op0, ops, ++ n >> 2); ++ ix86_expand_vector_init_interleave (half_mode, op1, ++ &ops [n >> 1], n >> 2); ++ emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op0, op1))); ++ return; ++ ++ case E_V64QImode: ++ quarter_mode = V16QImode; ++ half_mode = V32QImode; ++ goto quarter; ++ ++ case E_V32HImode: ++ quarter_mode = V8HImode; ++ half_mode = V16HImode; ++ goto quarter; ++ ++quarter: ++ n = GET_MODE_NUNITS (mode); ++ for (i = 0; i < n; i++) ++ ops[i] = XVECEXP (vals, 0, i); ++ op0 = gen_reg_rtx (quarter_mode); ++ op1 = gen_reg_rtx (quarter_mode); ++ op2 = gen_reg_rtx (quarter_mode); ++ op3 = gen_reg_rtx (quarter_mode); ++ op4 = gen_reg_rtx (half_mode); ++ op5 = gen_reg_rtx (half_mode); ++ ix86_expand_vector_init_interleave (quarter_mode, op0, ops, ++ n >> 3); ++ ix86_expand_vector_init_interleave (quarter_mode, op1, ++ &ops [n >> 2], n >> 3); ++ ix86_expand_vector_init_interleave (quarter_mode, op2, ++ &ops [n >> 1], n >> 3); ++ ix86_expand_vector_init_interleave (quarter_mode, op3, ++ &ops [(n >> 1) | (n >> 2)], n >> 3); ++ emit_insn (gen_rtx_SET (op4, gen_rtx_VEC_CONCAT (half_mode, op0, op1))); ++ emit_insn (gen_rtx_SET (op5, gen_rtx_VEC_CONCAT (half_mode, op2, op3))); ++ emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op4, op5))); ++ return; ++ ++ case E_V16QImode: ++ if (!TARGET_SSE4_1) ++ break; ++ /* FALLTHRU */ ++ ++ case E_V8HImode: ++ if (!TARGET_SSE2) ++ break; ++ ++ /* Don't use ix86_expand_vector_init_interleave if we can't ++ move from GPR to SSE register directly. */ ++ if (!TARGET_INTER_UNIT_MOVES_TO_VEC) ++ break; ++ ++ n = GET_MODE_NUNITS (mode); ++ for (i = 0; i < n; i++) ++ ops[i] = XVECEXP (vals, 0, i); ++ ix86_expand_vector_init_interleave (mode, target, ops, n >> 1); ++ return; ++ ++ case E_V4HImode: ++ case E_V8QImode: ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ { ++ int i, j, n_elts, n_words, n_elt_per_word; ++ machine_mode inner_mode; ++ rtx words[4], shift; ++ ++ inner_mode = GET_MODE_INNER (mode); ++ n_elts = GET_MODE_NUNITS (mode); ++ n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD; ++ n_elt_per_word = n_elts / n_words; ++ shift = GEN_INT (GET_MODE_BITSIZE (inner_mode)); ++ ++ for (i = 0; i < n_words; ++i) ++ { ++ rtx word = NULL_RTX; ++ ++ for (j = 0; j < n_elt_per_word; ++j) ++ { ++ rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1); ++ elt = convert_modes (word_mode, inner_mode, elt, true); ++ ++ if (j == 0) ++ word = elt; ++ else ++ { ++ word = expand_simple_binop (word_mode, ASHIFT, word, shift, ++ word, 1, OPTAB_LIB_WIDEN); ++ word = expand_simple_binop (word_mode, IOR, word, elt, ++ word, 1, OPTAB_LIB_WIDEN); ++ } ++ } ++ ++ words[i] = word; ++ } ++ ++ if (n_words == 1) ++ emit_move_insn (target, gen_lowpart (mode, words[0])); ++ else if (n_words == 2) ++ { ++ rtx tmp = gen_reg_rtx (mode); ++ emit_clobber (tmp); ++ emit_move_insn (gen_lowpart (word_mode, tmp), words[0]); ++ emit_move_insn (gen_highpart (word_mode, tmp), words[1]); ++ emit_move_insn (target, tmp); ++ } ++ else if (n_words == 4) ++ { ++ rtx tmp = gen_reg_rtx (V4SImode); ++ gcc_assert (word_mode == SImode); ++ vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words)); ++ ix86_expand_vector_init_general (false, V4SImode, tmp, vals); ++ emit_move_insn (target, gen_lowpart (mode, tmp)); ++ } ++ else ++ gcc_unreachable (); ++ } ++} ++ ++/* Initialize vector TARGET via VALS. Suppress the use of MMX ++ instructions unless MMX_OK is true. */ ++ ++void ++ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals) ++{ ++ machine_mode mode = GET_MODE (target); ++ machine_mode inner_mode = GET_MODE_INNER (mode); ++ int n_elts = GET_MODE_NUNITS (mode); ++ int n_var = 0, one_var = -1; ++ bool all_same = true, all_const_zero = true; ++ int i; ++ rtx x; ++ ++ /* Handle first initialization from vector elts. */ ++ if (n_elts != XVECLEN (vals, 0)) ++ { ++ rtx subtarget = target; ++ x = XVECEXP (vals, 0, 0); ++ gcc_assert (GET_MODE_INNER (GET_MODE (x)) == inner_mode); ++ if (GET_MODE_NUNITS (GET_MODE (x)) * 2 == n_elts) ++ { ++ rtx ops[2] = { XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1) }; ++ if (inner_mode == QImode || inner_mode == HImode) ++ { ++ unsigned int n_bits = n_elts * GET_MODE_SIZE (inner_mode); ++ mode = mode_for_vector (SImode, n_bits / 4).require (); ++ inner_mode = mode_for_vector (SImode, n_bits / 8).require (); ++ ops[0] = gen_lowpart (inner_mode, ops[0]); ++ ops[1] = gen_lowpart (inner_mode, ops[1]); ++ subtarget = gen_reg_rtx (mode); ++ } ++ ix86_expand_vector_init_concat (mode, subtarget, ops, 2); ++ if (subtarget != target) ++ emit_move_insn (target, gen_lowpart (GET_MODE (target), subtarget)); ++ return; ++ } ++ gcc_unreachable (); ++ } ++ ++ for (i = 0; i < n_elts; ++i) ++ { ++ x = XVECEXP (vals, 0, i); ++ if (!(CONST_SCALAR_INT_P (x) ++ || CONST_DOUBLE_P (x) ++ || CONST_FIXED_P (x))) ++ n_var++, one_var = i; ++ else if (x != CONST0_RTX (inner_mode)) ++ all_const_zero = false; ++ if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) ++ all_same = false; ++ } ++ ++ /* Constants are best loaded from the constant pool. */ ++ if (n_var == 0) ++ { ++ emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))); ++ return; ++ } ++ ++ /* If all values are identical, broadcast the value. */ ++ if (all_same ++ && ix86_expand_vector_init_duplicate (mmx_ok, mode, target, ++ XVECEXP (vals, 0, 0))) ++ return; ++ ++ /* Values where only one field is non-constant are best loaded from ++ the pool and overwritten via move later. */ ++ if (n_var == 1) ++ { ++ if (all_const_zero ++ && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target, ++ XVECEXP (vals, 0, one_var), ++ one_var)) ++ return; ++ ++ if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var)) ++ return; ++ } ++ ++ ix86_expand_vector_init_general (mmx_ok, mode, target, vals); ++} ++ ++void ++ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt) ++{ ++ machine_mode mode = GET_MODE (target); ++ machine_mode inner_mode = GET_MODE_INNER (mode); ++ machine_mode half_mode; ++ bool use_vec_merge = false; ++ rtx tmp; ++ static rtx (*gen_extract[6][2]) (rtx, rtx) ++ = { ++ { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi }, ++ { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi }, ++ { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si }, ++ { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di }, ++ { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf }, ++ { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df } ++ }; ++ static rtx (*gen_insert[6][2]) (rtx, rtx, rtx) ++ = { ++ { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi }, ++ { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi }, ++ { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si }, ++ { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di }, ++ { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf }, ++ { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df } ++ }; ++ int i, j, n; ++ machine_mode mmode = VOIDmode; ++ rtx (*gen_blendm) (rtx, rtx, rtx, rtx); ++ ++ switch (mode) ++ { ++ case E_V2SFmode: ++ case E_V2SImode: ++ if (mmx_ok) ++ { ++ tmp = gen_reg_rtx (GET_MODE_INNER (mode)); ++ ix86_expand_vector_extract (true, tmp, target, 1 - elt); ++ if (elt == 0) ++ tmp = gen_rtx_VEC_CONCAT (mode, val, tmp); ++ else ++ tmp = gen_rtx_VEC_CONCAT (mode, tmp, val); ++ emit_insn (gen_rtx_SET (target, tmp)); ++ return; ++ } ++ break; ++ ++ case E_V2DImode: ++ use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT; ++ if (use_vec_merge) ++ break; ++ ++ tmp = gen_reg_rtx (GET_MODE_INNER (mode)); ++ ix86_expand_vector_extract (false, tmp, target, 1 - elt); ++ if (elt == 0) ++ tmp = gen_rtx_VEC_CONCAT (mode, val, tmp); ++ else ++ tmp = gen_rtx_VEC_CONCAT (mode, tmp, val); ++ emit_insn (gen_rtx_SET (target, tmp)); ++ return; ++ ++ case E_V2DFmode: ++ { ++ rtx op0, op1; ++ ++ /* For the two element vectors, we implement a VEC_CONCAT with ++ the extraction of the other element. */ ++ ++ tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt))); ++ tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp); ++ ++ if (elt == 0) ++ op0 = val, op1 = tmp; ++ else ++ op0 = tmp, op1 = val; ++ ++ tmp = gen_rtx_VEC_CONCAT (mode, op0, op1); ++ emit_insn (gen_rtx_SET (target, tmp)); ++ } ++ return; ++ ++ case E_V4SFmode: ++ use_vec_merge = TARGET_SSE4_1; ++ if (use_vec_merge) ++ break; ++ ++ switch (elt) ++ { ++ case 0: ++ use_vec_merge = true; ++ break; ++ ++ case 1: ++ /* tmp = target = A B C D */ ++ tmp = copy_to_reg (target); ++ /* target = A A B B */ ++ emit_insn (gen_vec_interleave_lowv4sf (target, target, target)); ++ /* target = X A B B */ ++ ix86_expand_vector_set (false, target, val, 0); ++ /* target = A X C D */ ++ emit_insn (gen_sse_shufps_v4sf (target, target, tmp, ++ const1_rtx, const0_rtx, ++ GEN_INT (2+4), GEN_INT (3+4))); ++ return; ++ ++ case 2: ++ /* tmp = target = A B C D */ ++ tmp = copy_to_reg (target); ++ /* tmp = X B C D */ ++ ix86_expand_vector_set (false, tmp, val, 0); ++ /* target = A B X D */ ++ emit_insn (gen_sse_shufps_v4sf (target, target, tmp, ++ const0_rtx, const1_rtx, ++ GEN_INT (0+4), GEN_INT (3+4))); ++ return; ++ ++ case 3: ++ /* tmp = target = A B C D */ ++ tmp = copy_to_reg (target); ++ /* tmp = X B C D */ ++ ix86_expand_vector_set (false, tmp, val, 0); ++ /* target = A B X D */ ++ emit_insn (gen_sse_shufps_v4sf (target, target, tmp, ++ const0_rtx, const1_rtx, ++ GEN_INT (2+4), GEN_INT (0+4))); ++ return; ++ ++ default: ++ gcc_unreachable (); ++ } ++ break; ++ ++ case E_V4SImode: ++ use_vec_merge = TARGET_SSE4_1; ++ if (use_vec_merge) ++ break; ++ ++ /* Element 0 handled by vec_merge below. */ ++ if (elt == 0) ++ { ++ use_vec_merge = true; ++ break; ++ } ++ ++ if (TARGET_SSE2) ++ { ++ /* With SSE2, use integer shuffles to swap element 0 and ELT, ++ store into element 0, then shuffle them back. */ ++ ++ rtx order[4]; ++ ++ order[0] = GEN_INT (elt); ++ order[1] = const1_rtx; ++ order[2] = const2_rtx; ++ order[3] = GEN_INT (3); ++ order[elt] = const0_rtx; ++ ++ emit_insn (gen_sse2_pshufd_1 (target, target, order[0], ++ order[1], order[2], order[3])); ++ ++ ix86_expand_vector_set (false, target, val, 0); ++ ++ emit_insn (gen_sse2_pshufd_1 (target, target, order[0], ++ order[1], order[2], order[3])); ++ } ++ else ++ { ++ /* For SSE1, we have to reuse the V4SF code. */ ++ rtx t = gen_reg_rtx (V4SFmode); ++ emit_move_insn (t, gen_lowpart (V4SFmode, target)); ++ ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt); ++ emit_move_insn (target, gen_lowpart (mode, t)); ++ } ++ return; ++ ++ case E_V8HImode: ++ use_vec_merge = TARGET_SSE2; ++ break; ++ case E_V4HImode: ++ use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A); ++ break; ++ ++ case E_V16QImode: ++ use_vec_merge = TARGET_SSE4_1; ++ break; ++ ++ case E_V8QImode: ++ break; ++ ++ case E_V32QImode: ++ half_mode = V16QImode; ++ j = 0; ++ n = 16; ++ goto half; ++ ++ case E_V16HImode: ++ half_mode = V8HImode; ++ j = 1; ++ n = 8; ++ goto half; ++ ++ case E_V8SImode: ++ half_mode = V4SImode; ++ j = 2; ++ n = 4; ++ goto half; ++ ++ case E_V4DImode: ++ half_mode = V2DImode; ++ j = 3; ++ n = 2; ++ goto half; ++ ++ case E_V8SFmode: ++ half_mode = V4SFmode; ++ j = 4; ++ n = 4; ++ goto half; ++ ++ case E_V4DFmode: ++ half_mode = V2DFmode; ++ j = 5; ++ n = 2; ++ goto half; ++ ++half: ++ /* Compute offset. */ ++ i = elt / n; ++ elt %= n; ++ ++ gcc_assert (i <= 1); ++ ++ /* Extract the half. */ ++ tmp = gen_reg_rtx (half_mode); ++ emit_insn (gen_extract[j][i] (tmp, target)); ++ ++ /* Put val in tmp at elt. */ ++ ix86_expand_vector_set (false, tmp, val, elt); ++ ++ /* Put it back. */ ++ emit_insn (gen_insert[j][i] (target, target, tmp)); ++ return; ++ ++ case E_V8DFmode: ++ if (TARGET_AVX512F) ++ { ++ mmode = QImode; ++ gen_blendm = gen_avx512f_blendmv8df; ++ } ++ break; ++ ++ case E_V8DImode: ++ if (TARGET_AVX512F) ++ { ++ mmode = QImode; ++ gen_blendm = gen_avx512f_blendmv8di; ++ } ++ break; ++ ++ case E_V16SFmode: ++ if (TARGET_AVX512F) ++ { ++ mmode = HImode; ++ gen_blendm = gen_avx512f_blendmv16sf; ++ } ++ break; ++ ++ case E_V16SImode: ++ if (TARGET_AVX512F) ++ { ++ mmode = HImode; ++ gen_blendm = gen_avx512f_blendmv16si; ++ } ++ break; ++ ++ case E_V32HImode: ++ if (TARGET_AVX512BW) ++ { ++ mmode = SImode; ++ gen_blendm = gen_avx512bw_blendmv32hi; ++ } ++ else if (TARGET_AVX512F) ++ { ++ half_mode = E_V8HImode; ++ n = 8; ++ goto quarter; ++ } ++ break; ++ ++ case E_V64QImode: ++ if (TARGET_AVX512BW) ++ { ++ mmode = DImode; ++ gen_blendm = gen_avx512bw_blendmv64qi; ++ } ++ else if (TARGET_AVX512F) ++ { ++ half_mode = E_V16QImode; ++ n = 16; ++ goto quarter; ++ } ++ break; ++ ++quarter: ++ /* Compute offset. */ ++ i = elt / n; ++ elt %= n; ++ ++ gcc_assert (i <= 3); ++ ++ { ++ /* Extract the quarter. */ ++ tmp = gen_reg_rtx (V4SImode); ++ rtx tmp2 = gen_lowpart (V16SImode, target); ++ rtx mask = gen_reg_rtx (QImode); ++ ++ emit_move_insn (mask, constm1_rtx); ++ emit_insn (gen_avx512f_vextracti32x4_mask (tmp, tmp2, GEN_INT (i), ++ tmp, mask)); ++ ++ tmp2 = gen_reg_rtx (half_mode); ++ emit_move_insn (tmp2, gen_lowpart (half_mode, tmp)); ++ tmp = tmp2; ++ ++ /* Put val in tmp at elt. */ ++ ix86_expand_vector_set (false, tmp, val, elt); ++ ++ /* Put it back. */ ++ tmp2 = gen_reg_rtx (V16SImode); ++ rtx tmp3 = gen_lowpart (V16SImode, target); ++ mask = gen_reg_rtx (HImode); ++ emit_move_insn (mask, constm1_rtx); ++ tmp = gen_lowpart (V4SImode, tmp); ++ emit_insn (gen_avx512f_vinserti32x4_mask (tmp2, tmp3, tmp, GEN_INT (i), ++ tmp3, mask)); ++ emit_move_insn (target, gen_lowpart (mode, tmp2)); ++ } ++ return; ++ ++ default: ++ break; ++ } ++ ++ if (mmode != VOIDmode) ++ { ++ tmp = gen_reg_rtx (mode); ++ emit_insn (gen_rtx_SET (tmp, gen_rtx_VEC_DUPLICATE (mode, val))); ++ /* The avx512*_blendm expanders have different operand order ++ from VEC_MERGE. In VEC_MERGE, the first input operand is used for ++ elements where the mask is set and second input operand otherwise, ++ in {sse,avx}*_*blend* the first input operand is used for elements ++ where the mask is clear and second input operand otherwise. */ ++ emit_insn (gen_blendm (target, target, tmp, ++ force_reg (mmode, ++ gen_int_mode (HOST_WIDE_INT_1U << elt, ++ mmode)))); ++ } ++ else if (use_vec_merge) ++ { ++ tmp = gen_rtx_VEC_DUPLICATE (mode, val); ++ tmp = gen_rtx_VEC_MERGE (mode, tmp, target, ++ GEN_INT (HOST_WIDE_INT_1U << elt)); ++ emit_insn (gen_rtx_SET (target, tmp)); ++ } ++ else ++ { ++ rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode)); ++ ++ emit_move_insn (mem, target); ++ ++ tmp = adjust_address (mem, inner_mode, elt * GET_MODE_SIZE (inner_mode)); ++ emit_move_insn (tmp, val); ++ ++ emit_move_insn (target, mem); ++ } ++} ++ ++void ++ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt) ++{ ++ machine_mode mode = GET_MODE (vec); ++ machine_mode inner_mode = GET_MODE_INNER (mode); ++ bool use_vec_extr = false; ++ rtx tmp; ++ ++ switch (mode) ++ { ++ case E_V2SImode: ++ case E_V2SFmode: ++ if (!mmx_ok) ++ break; ++ /* FALLTHRU */ ++ ++ case E_V2DFmode: ++ case E_V2DImode: ++ case E_V2TImode: ++ case E_V4TImode: ++ use_vec_extr = true; ++ break; ++ ++ case E_V4SFmode: ++ use_vec_extr = TARGET_SSE4_1; ++ if (use_vec_extr) ++ break; ++ ++ switch (elt) ++ { ++ case 0: ++ tmp = vec; ++ break; ++ ++ case 1: ++ case 3: ++ tmp = gen_reg_rtx (mode); ++ emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec, ++ GEN_INT (elt), GEN_INT (elt), ++ GEN_INT (elt+4), GEN_INT (elt+4))); ++ break; ++ ++ case 2: ++ tmp = gen_reg_rtx (mode); ++ emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec)); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ vec = tmp; ++ use_vec_extr = true; ++ elt = 0; ++ break; ++ ++ case E_V4SImode: ++ use_vec_extr = TARGET_SSE4_1; ++ if (use_vec_extr) ++ break; ++ ++ if (TARGET_SSE2) ++ { ++ switch (elt) ++ { ++ case 0: ++ tmp = vec; ++ break; ++ ++ case 1: ++ case 3: ++ tmp = gen_reg_rtx (mode); ++ emit_insn (gen_sse2_pshufd_1 (tmp, vec, ++ GEN_INT (elt), GEN_INT (elt), ++ GEN_INT (elt), GEN_INT (elt))); ++ break; ++ ++ case 2: ++ tmp = gen_reg_rtx (mode); ++ emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec)); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ vec = tmp; ++ use_vec_extr = true; ++ elt = 0; ++ } ++ else ++ { ++ /* For SSE1, we have to reuse the V4SF code. */ ++ ix86_expand_vector_extract (false, gen_lowpart (SFmode, target), ++ gen_lowpart (V4SFmode, vec), elt); ++ return; ++ } ++ break; ++ ++ case E_V8HImode: ++ use_vec_extr = TARGET_SSE2; ++ break; ++ case E_V4HImode: ++ use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A); ++ break; ++ ++ case E_V16QImode: ++ use_vec_extr = TARGET_SSE4_1; ++ break; ++ ++ case E_V8SFmode: ++ if (TARGET_AVX) ++ { ++ tmp = gen_reg_rtx (V4SFmode); ++ if (elt < 4) ++ emit_insn (gen_vec_extract_lo_v8sf (tmp, vec)); ++ else ++ emit_insn (gen_vec_extract_hi_v8sf (tmp, vec)); ++ ix86_expand_vector_extract (false, target, tmp, elt & 3); ++ return; ++ } ++ break; ++ ++ case E_V4DFmode: ++ if (TARGET_AVX) ++ { ++ tmp = gen_reg_rtx (V2DFmode); ++ if (elt < 2) ++ emit_insn (gen_vec_extract_lo_v4df (tmp, vec)); ++ else ++ emit_insn (gen_vec_extract_hi_v4df (tmp, vec)); ++ ix86_expand_vector_extract (false, target, tmp, elt & 1); ++ return; ++ } ++ break; ++ ++ case E_V32QImode: ++ if (TARGET_AVX) ++ { ++ tmp = gen_reg_rtx (V16QImode); ++ if (elt < 16) ++ emit_insn (gen_vec_extract_lo_v32qi (tmp, vec)); ++ else ++ emit_insn (gen_vec_extract_hi_v32qi (tmp, vec)); ++ ix86_expand_vector_extract (false, target, tmp, elt & 15); ++ return; ++ } ++ break; ++ ++ case E_V16HImode: ++ if (TARGET_AVX) ++ { ++ tmp = gen_reg_rtx (V8HImode); ++ if (elt < 8) ++ emit_insn (gen_vec_extract_lo_v16hi (tmp, vec)); ++ else ++ emit_insn (gen_vec_extract_hi_v16hi (tmp, vec)); ++ ix86_expand_vector_extract (false, target, tmp, elt & 7); ++ return; ++ } ++ break; ++ ++ case E_V8SImode: ++ if (TARGET_AVX) ++ { ++ tmp = gen_reg_rtx (V4SImode); ++ if (elt < 4) ++ emit_insn (gen_vec_extract_lo_v8si (tmp, vec)); ++ else ++ emit_insn (gen_vec_extract_hi_v8si (tmp, vec)); ++ ix86_expand_vector_extract (false, target, tmp, elt & 3); ++ return; ++ } ++ break; ++ ++ case E_V4DImode: ++ if (TARGET_AVX) ++ { ++ tmp = gen_reg_rtx (V2DImode); ++ if (elt < 2) ++ emit_insn (gen_vec_extract_lo_v4di (tmp, vec)); ++ else ++ emit_insn (gen_vec_extract_hi_v4di (tmp, vec)); ++ ix86_expand_vector_extract (false, target, tmp, elt & 1); ++ return; ++ } ++ break; ++ ++ case E_V32HImode: ++ if (TARGET_AVX512BW) ++ { ++ tmp = gen_reg_rtx (V16HImode); ++ if (elt < 16) ++ emit_insn (gen_vec_extract_lo_v32hi (tmp, vec)); ++ else ++ emit_insn (gen_vec_extract_hi_v32hi (tmp, vec)); ++ ix86_expand_vector_extract (false, target, tmp, elt & 15); ++ return; ++ } ++ break; ++ ++ case E_V64QImode: ++ if (TARGET_AVX512BW) ++ { ++ tmp = gen_reg_rtx (V32QImode); ++ if (elt < 32) ++ emit_insn (gen_vec_extract_lo_v64qi (tmp, vec)); ++ else ++ emit_insn (gen_vec_extract_hi_v64qi (tmp, vec)); ++ ix86_expand_vector_extract (false, target, tmp, elt & 31); ++ return; ++ } ++ break; ++ ++ case E_V16SFmode: ++ tmp = gen_reg_rtx (V8SFmode); ++ if (elt < 8) ++ emit_insn (gen_vec_extract_lo_v16sf (tmp, vec)); ++ else ++ emit_insn (gen_vec_extract_hi_v16sf (tmp, vec)); ++ ix86_expand_vector_extract (false, target, tmp, elt & 7); ++ return; ++ ++ case E_V8DFmode: ++ tmp = gen_reg_rtx (V4DFmode); ++ if (elt < 4) ++ emit_insn (gen_vec_extract_lo_v8df (tmp, vec)); ++ else ++ emit_insn (gen_vec_extract_hi_v8df (tmp, vec)); ++ ix86_expand_vector_extract (false, target, tmp, elt & 3); ++ return; ++ ++ case E_V16SImode: ++ tmp = gen_reg_rtx (V8SImode); ++ if (elt < 8) ++ emit_insn (gen_vec_extract_lo_v16si (tmp, vec)); ++ else ++ emit_insn (gen_vec_extract_hi_v16si (tmp, vec)); ++ ix86_expand_vector_extract (false, target, tmp, elt & 7); ++ return; ++ ++ case E_V8DImode: ++ tmp = gen_reg_rtx (V4DImode); ++ if (elt < 4) ++ emit_insn (gen_vec_extract_lo_v8di (tmp, vec)); ++ else ++ emit_insn (gen_vec_extract_hi_v8di (tmp, vec)); ++ ix86_expand_vector_extract (false, target, tmp, elt & 3); ++ return; ++ ++ case E_V8QImode: ++ /* ??? Could extract the appropriate HImode element and shift. */ ++ default: ++ break; ++ } ++ ++ if (use_vec_extr) ++ { ++ tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt))); ++ tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp); ++ ++ /* Let the rtl optimizers know about the zero extension performed. */ ++ if (inner_mode == QImode || inner_mode == HImode) ++ { ++ tmp = gen_rtx_ZERO_EXTEND (SImode, tmp); ++ target = gen_lowpart (SImode, target); ++ } ++ ++ emit_insn (gen_rtx_SET (target, tmp)); ++ } ++ else ++ { ++ rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode)); ++ ++ emit_move_insn (mem, vec); ++ ++ tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode)); ++ emit_move_insn (target, tmp); ++ } ++} ++ ++/* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC ++ to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode. ++ The upper bits of DEST are undefined, though they shouldn't cause ++ exceptions (some bits from src or all zeros are ok). */ ++ ++static void ++emit_reduc_half (rtx dest, rtx src, int i) ++{ ++ rtx tem, d = dest; ++ switch (GET_MODE (src)) ++ { ++ case E_V4SFmode: ++ if (i == 128) ++ tem = gen_sse_movhlps (dest, src, src); ++ else ++ tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx, ++ GEN_INT (1 + 4), GEN_INT (1 + 4)); ++ break; ++ case E_V2DFmode: ++ tem = gen_vec_interleave_highv2df (dest, src, src); ++ break; ++ case E_V16QImode: ++ case E_V8HImode: ++ case E_V4SImode: ++ case E_V2DImode: ++ d = gen_reg_rtx (V1TImode); ++ tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src), ++ GEN_INT (i / 2)); ++ break; ++ case E_V8SFmode: ++ if (i == 256) ++ tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx); ++ else ++ tem = gen_avx_shufps256 (dest, src, src, ++ GEN_INT (i == 128 ? 2 + (3 << 2) : 1)); ++ break; ++ case E_V4DFmode: ++ if (i == 256) ++ tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx); ++ else ++ tem = gen_avx_shufpd256 (dest, src, src, const1_rtx); ++ break; ++ case E_V32QImode: ++ case E_V16HImode: ++ case E_V8SImode: ++ case E_V4DImode: ++ if (i == 256) ++ { ++ if (GET_MODE (dest) != V4DImode) ++ d = gen_reg_rtx (V4DImode); ++ tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src), ++ gen_lowpart (V4DImode, src), ++ const1_rtx); ++ } ++ else ++ { ++ d = gen_reg_rtx (V2TImode); ++ tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src), ++ GEN_INT (i / 2)); ++ } ++ break; ++ case E_V64QImode: ++ case E_V32HImode: ++ case E_V16SImode: ++ case E_V16SFmode: ++ case E_V8DImode: ++ case E_V8DFmode: ++ if (i > 128) ++ tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest), ++ gen_lowpart (V16SImode, src), ++ gen_lowpart (V16SImode, src), ++ GEN_INT (0x4 + (i == 512 ? 4 : 0)), ++ GEN_INT (0x5 + (i == 512 ? 4 : 0)), ++ GEN_INT (0x6 + (i == 512 ? 4 : 0)), ++ GEN_INT (0x7 + (i == 512 ? 4 : 0)), ++ GEN_INT (0xC), GEN_INT (0xD), ++ GEN_INT (0xE), GEN_INT (0xF), ++ GEN_INT (0x10), GEN_INT (0x11), ++ GEN_INT (0x12), GEN_INT (0x13), ++ GEN_INT (0x14), GEN_INT (0x15), ++ GEN_INT (0x16), GEN_INT (0x17)); ++ else ++ tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest), ++ gen_lowpart (V16SImode, src), ++ GEN_INT (i == 128 ? 0x2 : 0x1), ++ GEN_INT (0x3), ++ GEN_INT (0x3), ++ GEN_INT (0x3), ++ GEN_INT (i == 128 ? 0x6 : 0x5), ++ GEN_INT (0x7), ++ GEN_INT (0x7), ++ GEN_INT (0x7), ++ GEN_INT (i == 128 ? 0xA : 0x9), ++ GEN_INT (0xB), ++ GEN_INT (0xB), ++ GEN_INT (0xB), ++ GEN_INT (i == 128 ? 0xE : 0xD), ++ GEN_INT (0xF), ++ GEN_INT (0xF), ++ GEN_INT (0xF)); ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ emit_insn (tem); ++ if (d != dest) ++ emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d)); ++} ++ ++/* Expand a vector reduction. FN is the binary pattern to reduce; ++ DEST is the destination; IN is the input vector. */ ++ ++void ++ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in) ++{ ++ rtx half, dst, vec = in; ++ machine_mode mode = GET_MODE (in); ++ int i; ++ ++ /* SSE4 has a special instruction for V8HImode UMIN reduction. */ ++ if (TARGET_SSE4_1 ++ && mode == V8HImode ++ && fn == gen_uminv8hi3) ++ { ++ emit_insn (gen_sse4_1_phminposuw (dest, in)); ++ return; ++ } ++ ++ for (i = GET_MODE_BITSIZE (mode); ++ i > GET_MODE_UNIT_BITSIZE (mode); ++ i >>= 1) ++ { ++ half = gen_reg_rtx (mode); ++ emit_reduc_half (half, vec, i); ++ if (i == GET_MODE_UNIT_BITSIZE (mode) * 2) ++ dst = dest; ++ else ++ dst = gen_reg_rtx (mode); ++ emit_insn (fn (dst, half, vec)); ++ vec = dst; ++ } ++} ++ ++/* Output code to perform a conditional jump to LABEL, if C2 flag in ++ FP status register is set. */ ++ ++void ++ix86_emit_fp_unordered_jump (rtx label) ++{ ++ rtx reg = gen_reg_rtx (HImode); ++ rtx_insn *insn; ++ rtx temp; ++ ++ emit_insn (gen_x86_fnstsw_1 (reg)); ++ ++ if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ())) ++ { ++ emit_insn (gen_x86_sahf_1 (reg)); ++ ++ temp = gen_rtx_REG (CCmode, FLAGS_REG); ++ temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx); ++ } ++ else ++ { ++ emit_insn (gen_testqi_ext_1_ccno (reg, GEN_INT (0x04))); ++ ++ temp = gen_rtx_REG (CCNOmode, FLAGS_REG); ++ temp = gen_rtx_NE (VOIDmode, temp, const0_rtx); ++ } ++ ++ temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp, ++ gen_rtx_LABEL_REF (VOIDmode, label), ++ pc_rtx); ++ insn = emit_jump_insn (gen_rtx_SET (pc_rtx, temp)); ++ predict_jump (REG_BR_PROB_BASE * 10 / 100); ++ JUMP_LABEL (insn) = label; ++} ++ ++/* Output code to perform an sinh XFmode calculation. */ ++ ++void ix86_emit_i387_sinh (rtx op0, rtx op1) ++{ ++ rtx e1 = gen_reg_rtx (XFmode); ++ rtx e2 = gen_reg_rtx (XFmode); ++ rtx scratch = gen_reg_rtx (HImode); ++ rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG); ++ rtx half = const_double_from_real_value (dconsthalf, XFmode); ++ rtx cst1, tmp; ++ rtx_code_label *jump_label = gen_label_rtx (); ++ rtx_insn *insn; ++ ++ /* scratch = fxam (op1) */ ++ emit_insn (gen_fxamxf2_i387 (scratch, op1)); ++ ++ /* e1 = expm1 (|op1|) */ ++ emit_insn (gen_absxf2 (e2, op1)); ++ emit_insn (gen_expm1xf2 (e1, e2)); ++ ++ /* e2 = e1 / (e1 + 1.0) + e1 */ ++ cst1 = force_reg (XFmode, CONST1_RTX (XFmode)); ++ emit_insn (gen_addxf3 (e2, e1, cst1)); ++ emit_insn (gen_divxf3 (e2, e1, e2)); ++ emit_insn (gen_addxf3 (e2, e2, e1)); ++ ++ /* flags = signbit (op1) */ ++ emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x02))); ++ ++ /* if (flags) then e2 = -e2 */ ++ tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, ++ gen_rtx_EQ (VOIDmode, flags, const0_rtx), ++ gen_rtx_LABEL_REF (VOIDmode, jump_label), ++ pc_rtx); ++ insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); ++ predict_jump (REG_BR_PROB_BASE * 50 / 100); ++ JUMP_LABEL (insn) = jump_label; ++ ++ emit_insn (gen_negxf2 (e2, e2)); ++ ++ emit_label (jump_label); ++ LABEL_NUSES (jump_label) = 1; ++ ++ /* op0 = 0.5 * e2 */ ++ half = force_reg (XFmode, half); ++ emit_insn (gen_mulxf3 (op0, e2, half)); ++} ++ ++/* Output code to perform an cosh XFmode calculation. */ ++ ++void ix86_emit_i387_cosh (rtx op0, rtx op1) ++{ ++ rtx e1 = gen_reg_rtx (XFmode); ++ rtx e2 = gen_reg_rtx (XFmode); ++ rtx half = const_double_from_real_value (dconsthalf, XFmode); ++ rtx cst1; ++ ++ /* e1 = exp (op1) */ ++ emit_insn (gen_expxf2 (e1, op1)); ++ ++ /* e2 = e1 + 1.0 / e1 */ ++ cst1 = force_reg (XFmode, CONST1_RTX (XFmode)); ++ emit_insn (gen_divxf3 (e2, cst1, e1)); ++ emit_insn (gen_addxf3 (e2, e1, e2)); ++ ++ /* op0 = 0.5 * e2 */ ++ half = force_reg (XFmode, half); ++ emit_insn (gen_mulxf3 (op0, e2, half)); ++} ++ ++/* Output code to perform an tanh XFmode calculation. */ ++ ++void ix86_emit_i387_tanh (rtx op0, rtx op1) ++{ ++ rtx e1 = gen_reg_rtx (XFmode); ++ rtx e2 = gen_reg_rtx (XFmode); ++ rtx scratch = gen_reg_rtx (HImode); ++ rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG); ++ rtx cst2, tmp; ++ rtx_code_label *jump_label = gen_label_rtx (); ++ rtx_insn *insn; ++ ++ /* scratch = fxam (op1) */ ++ emit_insn (gen_fxamxf2_i387 (scratch, op1)); ++ ++ /* e1 = expm1 (-|2 * op1|) */ ++ emit_insn (gen_addxf3 (e2, op1, op1)); ++ emit_insn (gen_absxf2 (e2, e2)); ++ emit_insn (gen_negxf2 (e2, e2)); ++ emit_insn (gen_expm1xf2 (e1, e2)); ++ ++ /* e2 = e1 / (e1 + 2.0) */ ++ cst2 = force_reg (XFmode, CONST2_RTX (XFmode)); ++ emit_insn (gen_addxf3 (e2, e1, cst2)); ++ emit_insn (gen_divxf3 (e2, e1, e2)); ++ ++ /* flags = signbit (op1) */ ++ emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x02))); ++ ++ /* if (!flags) then e2 = -e2 */ ++ tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, ++ gen_rtx_NE (VOIDmode, flags, const0_rtx), ++ gen_rtx_LABEL_REF (VOIDmode, jump_label), ++ pc_rtx); ++ insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); ++ predict_jump (REG_BR_PROB_BASE * 50 / 100); ++ JUMP_LABEL (insn) = jump_label; ++ ++ emit_insn (gen_negxf2 (e2, e2)); ++ ++ emit_label (jump_label); ++ LABEL_NUSES (jump_label) = 1; ++ ++ emit_move_insn (op0, e2); ++} ++ ++/* Output code to perform an asinh XFmode calculation. */ ++ ++void ix86_emit_i387_asinh (rtx op0, rtx op1) ++{ ++ rtx e1 = gen_reg_rtx (XFmode); ++ rtx e2 = gen_reg_rtx (XFmode); ++ rtx scratch = gen_reg_rtx (HImode); ++ rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG); ++ rtx cst1, tmp; ++ rtx_code_label *jump_label = gen_label_rtx (); ++ rtx_insn *insn; ++ ++ /* e2 = sqrt (op1^2 + 1.0) + 1.0 */ ++ emit_insn (gen_mulxf3 (e1, op1, op1)); ++ cst1 = force_reg (XFmode, CONST1_RTX (XFmode)); ++ emit_insn (gen_addxf3 (e2, e1, cst1)); ++ emit_insn (gen_sqrtxf2 (e2, e2)); ++ emit_insn (gen_addxf3 (e2, e2, cst1)); ++ ++ /* e1 = e1 / e2 */ ++ emit_insn (gen_divxf3 (e1, e1, e2)); ++ ++ /* scratch = fxam (op1) */ ++ emit_insn (gen_fxamxf2_i387 (scratch, op1)); ++ ++ /* e1 = e1 + |op1| */ ++ emit_insn (gen_absxf2 (e2, op1)); ++ emit_insn (gen_addxf3 (e1, e1, e2)); ++ ++ /* e2 = log1p (e1) */ ++ ix86_emit_i387_log1p (e2, e1); ++ ++ /* flags = signbit (op1) */ ++ emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x02))); ++ ++ /* if (flags) then e2 = -e2 */ ++ tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, ++ gen_rtx_EQ (VOIDmode, flags, const0_rtx), ++ gen_rtx_LABEL_REF (VOIDmode, jump_label), ++ pc_rtx); ++ insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); ++ predict_jump (REG_BR_PROB_BASE * 50 / 100); ++ JUMP_LABEL (insn) = jump_label; ++ ++ emit_insn (gen_negxf2 (e2, e2)); ++ ++ emit_label (jump_label); ++ LABEL_NUSES (jump_label) = 1; ++ ++ emit_move_insn (op0, e2); ++} ++ ++/* Output code to perform an acosh XFmode calculation. */ ++ ++void ix86_emit_i387_acosh (rtx op0, rtx op1) ++{ ++ rtx e1 = gen_reg_rtx (XFmode); ++ rtx e2 = gen_reg_rtx (XFmode); ++ rtx cst1 = force_reg (XFmode, CONST1_RTX (XFmode)); ++ ++ /* e2 = sqrt (op1 + 1.0) */ ++ emit_insn (gen_addxf3 (e2, op1, cst1)); ++ emit_insn (gen_sqrtxf2 (e2, e2)); ++ ++ /* e1 = sqrt (op1 - 1.0) */ ++ emit_insn (gen_subxf3 (e1, op1, cst1)); ++ emit_insn (gen_sqrtxf2 (e1, e1)); ++ ++ /* e1 = e1 * e2 */ ++ emit_insn (gen_mulxf3 (e1, e1, e2)); ++ ++ /* e1 = e1 + op1 */ ++ emit_insn (gen_addxf3 (e1, e1, op1)); ++ ++ /* op0 = log (e1) */ ++ emit_insn (gen_logxf2 (op0, e1)); ++} ++ ++/* Output code to perform an atanh XFmode calculation. */ ++ ++void ix86_emit_i387_atanh (rtx op0, rtx op1) ++{ ++ rtx e1 = gen_reg_rtx (XFmode); ++ rtx e2 = gen_reg_rtx (XFmode); ++ rtx scratch = gen_reg_rtx (HImode); ++ rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG); ++ rtx half = const_double_from_real_value (dconsthalf, XFmode); ++ rtx cst1, tmp; ++ rtx_code_label *jump_label = gen_label_rtx (); ++ rtx_insn *insn; ++ ++ /* scratch = fxam (op1) */ ++ emit_insn (gen_fxamxf2_i387 (scratch, op1)); ++ ++ /* e2 = |op1| */ ++ emit_insn (gen_absxf2 (e2, op1)); ++ ++ /* e1 = -(e2 + e2) / (e2 + 1.0) */ ++ cst1 = force_reg (XFmode, CONST1_RTX (XFmode)); ++ emit_insn (gen_addxf3 (e1, e2, cst1)); ++ emit_insn (gen_addxf3 (e2, e2, e2)); ++ emit_insn (gen_negxf2 (e2, e2)); ++ emit_insn (gen_divxf3 (e1, e2, e1)); ++ ++ /* e2 = log1p (e1) */ ++ ix86_emit_i387_log1p (e2, e1); ++ ++ /* flags = signbit (op1) */ ++ emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x02))); ++ ++ /* if (!flags) then e2 = -e2 */ ++ tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, ++ gen_rtx_NE (VOIDmode, flags, const0_rtx), ++ gen_rtx_LABEL_REF (VOIDmode, jump_label), ++ pc_rtx); ++ insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); ++ predict_jump (REG_BR_PROB_BASE * 50 / 100); ++ JUMP_LABEL (insn) = jump_label; ++ ++ emit_insn (gen_negxf2 (e2, e2)); ++ ++ emit_label (jump_label); ++ LABEL_NUSES (jump_label) = 1; ++ ++ /* op0 = 0.5 * e2 */ ++ half = force_reg (XFmode, half); ++ emit_insn (gen_mulxf3 (op0, e2, half)); ++} ++ ++/* Output code to perform a log1p XFmode calculation. */ ++ ++void ix86_emit_i387_log1p (rtx op0, rtx op1) ++{ ++ rtx_code_label *label1 = gen_label_rtx (); ++ rtx_code_label *label2 = gen_label_rtx (); ++ ++ rtx tmp = gen_reg_rtx (XFmode); ++ rtx res = gen_reg_rtx (XFmode); ++ rtx cst, cstln2, cst1; ++ rtx_insn *insn; ++ ++ cst = const_double_from_real_value ++ (REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode), XFmode); ++ cstln2 = force_reg (XFmode, standard_80387_constant_rtx (4)); /* fldln2 */ ++ ++ emit_insn (gen_absxf2 (tmp, op1)); ++ ++ cst = force_reg (XFmode, cst); ++ ix86_expand_branch (GE, tmp, cst, label1); ++ predict_jump (REG_BR_PROB_BASE * 10 / 100); ++ insn = get_last_insn (); ++ JUMP_LABEL (insn) = label1; ++ ++ emit_insn (gen_fyl2xp1xf3_i387 (res, op1, cstln2)); ++ emit_jump (label2); ++ ++ emit_label (label1); ++ LABEL_NUSES (label1) = 1; ++ ++ cst1 = force_reg (XFmode, CONST1_RTX (XFmode)); ++ emit_insn (gen_rtx_SET (tmp, gen_rtx_PLUS (XFmode, op1, cst1))); ++ emit_insn (gen_fyl2xxf3_i387 (res, tmp, cstln2)); ++ ++ emit_label (label2); ++ LABEL_NUSES (label2) = 1; ++ ++ emit_move_insn (op0, res); ++} ++ ++/* Emit code for round calculation. */ ++void ix86_emit_i387_round (rtx op0, rtx op1) ++{ ++ machine_mode inmode = GET_MODE (op1); ++ machine_mode outmode = GET_MODE (op0); ++ rtx e1 = gen_reg_rtx (XFmode); ++ rtx e2 = gen_reg_rtx (XFmode); ++ rtx scratch = gen_reg_rtx (HImode); ++ rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG); ++ rtx half = const_double_from_real_value (dconsthalf, XFmode); ++ rtx res = gen_reg_rtx (outmode); ++ rtx_code_label *jump_label = gen_label_rtx (); ++ rtx (*floor_insn) (rtx, rtx); ++ rtx (*neg_insn) (rtx, rtx); ++ rtx_insn *insn; ++ rtx tmp; ++ ++ switch (inmode) ++ { ++ case E_SFmode: ++ case E_DFmode: ++ tmp = gen_reg_rtx (XFmode); ++ ++ emit_insn (gen_rtx_SET (tmp, gen_rtx_FLOAT_EXTEND (XFmode, op1))); ++ op1 = tmp; ++ break; ++ case E_XFmode: ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ ++ switch (outmode) ++ { ++ case E_SFmode: ++ floor_insn = gen_frndintxf2_floor; ++ neg_insn = gen_negsf2; ++ break; ++ case E_DFmode: ++ floor_insn = gen_frndintxf2_floor; ++ neg_insn = gen_negdf2; ++ break; ++ case E_XFmode: ++ floor_insn = gen_frndintxf2_floor; ++ neg_insn = gen_negxf2; ++ break; ++ case E_HImode: ++ floor_insn = gen_lfloorxfhi2; ++ neg_insn = gen_neghi2; ++ break; ++ case E_SImode: ++ floor_insn = gen_lfloorxfsi2; ++ neg_insn = gen_negsi2; ++ break; ++ case E_DImode: ++ floor_insn = gen_lfloorxfdi2; ++ neg_insn = gen_negdi2; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ ++ /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */ ++ ++ /* scratch = fxam(op1) */ ++ emit_insn (gen_fxamxf2_i387 (scratch, op1)); ++ ++ /* e1 = fabs(op1) */ ++ emit_insn (gen_absxf2 (e1, op1)); ++ ++ /* e2 = e1 + 0.5 */ ++ half = force_reg (XFmode, half); ++ emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (XFmode, e1, half))); ++ ++ /* res = floor(e2) */ ++ switch (outmode) ++ { ++ case E_SFmode: ++ case E_DFmode: ++ { ++ tmp = gen_reg_rtx (XFmode); ++ ++ emit_insn (floor_insn (tmp, e2)); ++ emit_insn (gen_rtx_SET (res, ++ gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp), ++ UNSPEC_TRUNC_NOOP))); ++ } ++ break; ++ default: ++ emit_insn (floor_insn (res, e2)); ++ } ++ ++ /* flags = signbit(a) */ ++ emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x02))); ++ ++ /* if (flags) then res = -res */ ++ tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, ++ gen_rtx_EQ (VOIDmode, flags, const0_rtx), ++ gen_rtx_LABEL_REF (VOIDmode, jump_label), ++ pc_rtx); ++ insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); ++ predict_jump (REG_BR_PROB_BASE * 50 / 100); ++ JUMP_LABEL (insn) = jump_label; ++ ++ emit_insn (neg_insn (res, res)); ++ ++ emit_label (jump_label); ++ LABEL_NUSES (jump_label) = 1; ++ ++ emit_move_insn (op0, res); ++} ++ ++/* Output code to perform a Newton-Rhapson approximation of a single precision ++ floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */ ++ ++void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode) ++{ ++ rtx x0, x1, e0, e1; ++ ++ x0 = gen_reg_rtx (mode); ++ e0 = gen_reg_rtx (mode); ++ e1 = gen_reg_rtx (mode); ++ x1 = gen_reg_rtx (mode); ++ ++ /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */ ++ ++ b = force_reg (mode, b); ++ ++ /* x0 = rcp(b) estimate */ ++ if (mode == V16SFmode || mode == V8DFmode) ++ { ++ if (TARGET_AVX512ER) ++ { ++ emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b), ++ UNSPEC_RCP28))); ++ /* res = a * x0 */ ++ emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x0))); ++ return; ++ } ++ else ++ emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b), ++ UNSPEC_RCP14))); ++ } ++ else ++ emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b), ++ UNSPEC_RCP))); ++ ++ /* e0 = x0 * b */ ++ emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b))); ++ ++ /* e0 = x0 * e0 */ ++ emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0))); ++ ++ /* e1 = x0 + x0 */ ++ emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0))); ++ ++ /* x1 = e1 - e0 */ ++ emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0))); ++ ++ /* res = a * x1 */ ++ emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1))); ++} ++ ++/* Output code to perform a Newton-Rhapson approximation of a ++ single precision floating point [reciprocal] square root. */ ++ ++void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode, bool recip) ++{ ++ rtx x0, e0, e1, e2, e3, mthree, mhalf; ++ REAL_VALUE_TYPE r; ++ int unspec; ++ ++ x0 = gen_reg_rtx (mode); ++ e0 = gen_reg_rtx (mode); ++ e1 = gen_reg_rtx (mode); ++ e2 = gen_reg_rtx (mode); ++ e3 = gen_reg_rtx (mode); ++ ++ if (TARGET_AVX512ER && mode == V16SFmode) ++ { ++ if (recip) ++ /* res = rsqrt28(a) estimate */ ++ emit_insn (gen_rtx_SET (res, gen_rtx_UNSPEC (mode, gen_rtvec (1, a), ++ UNSPEC_RSQRT28))); ++ else ++ { ++ /* x0 = rsqrt28(a) estimate */ ++ emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, a), ++ UNSPEC_RSQRT28))); ++ /* res = rcp28(x0) estimate */ ++ emit_insn (gen_rtx_SET (res, gen_rtx_UNSPEC (mode, gen_rtvec (1, x0), ++ UNSPEC_RCP28))); ++ } ++ return; ++ } ++ ++ real_from_integer (&r, VOIDmode, -3, SIGNED); ++ mthree = const_double_from_real_value (r, SFmode); ++ ++ real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL); ++ mhalf = const_double_from_real_value (r, SFmode); ++ unspec = UNSPEC_RSQRT; ++ ++ if (VECTOR_MODE_P (mode)) ++ { ++ mthree = ix86_build_const_vector (mode, true, mthree); ++ mhalf = ix86_build_const_vector (mode, true, mhalf); ++ /* There is no 512-bit rsqrt. There is however rsqrt14. */ ++ if (GET_MODE_SIZE (mode) == 64) ++ unspec = UNSPEC_RSQRT14; ++ } ++ ++ /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) ++ rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */ ++ ++ a = force_reg (mode, a); ++ ++ /* x0 = rsqrt(a) estimate */ ++ emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, a), ++ unspec))); ++ ++ /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */ ++ if (!recip) ++ { ++ rtx zero = force_reg (mode, CONST0_RTX(mode)); ++ rtx mask; ++ ++ /* Handle masked compare. */ ++ if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64) ++ { ++ mask = gen_reg_rtx (HImode); ++ /* Imm value 0x4 corresponds to not-equal comparison. */ ++ emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4))); ++ emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask)); ++ } ++ else ++ { ++ mask = gen_reg_rtx (mode); ++ emit_insn (gen_rtx_SET (mask, gen_rtx_NE (mode, zero, a))); ++ emit_insn (gen_rtx_SET (x0, gen_rtx_AND (mode, x0, mask))); ++ } ++ } ++ ++ /* e0 = x0 * a */ ++ emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a))); ++ /* e1 = e0 * x0 */ ++ emit_insn (gen_rtx_SET (e1, gen_rtx_MULT (mode, e0, x0))); ++ ++ /* e2 = e1 - 3. */ ++ mthree = force_reg (mode, mthree); ++ emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (mode, e1, mthree))); ++ ++ mhalf = force_reg (mode, mhalf); ++ if (recip) ++ /* e3 = -.5 * x0 */ ++ emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, x0, mhalf))); ++ else ++ /* e3 = -.5 * e0 */ ++ emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, e0, mhalf))); ++ /* ret = e2 * e3 */ ++ emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e2, e3))); ++} ++ ++/* Expand fabs (OP0) and return a new rtx that holds the result. The ++ mask for masking out the sign-bit is stored in *SMASK, if that is ++ non-null. */ ++ ++static rtx ++ix86_expand_sse_fabs (rtx op0, rtx *smask) ++{ ++ machine_mode vmode, mode = GET_MODE (op0); ++ rtx xa, mask; ++ ++ xa = gen_reg_rtx (mode); ++ if (mode == SFmode) ++ vmode = V4SFmode; ++ else if (mode == DFmode) ++ vmode = V2DFmode; ++ else ++ vmode = mode; ++ mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true); ++ if (!VECTOR_MODE_P (mode)) ++ { ++ /* We need to generate a scalar mode mask in this case. */ ++ rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx)); ++ tmp = gen_rtx_VEC_SELECT (mode, mask, tmp); ++ mask = gen_reg_rtx (mode); ++ emit_insn (gen_rtx_SET (mask, tmp)); ++ } ++ emit_insn (gen_rtx_SET (xa, gen_rtx_AND (mode, op0, mask))); ++ ++ if (smask) ++ *smask = mask; ++ ++ return xa; ++} ++ ++/* Expands a comparison of OP0 with OP1 using comparison code CODE, ++ swapping the operands if SWAP_OPERANDS is true. The expanded ++ code is a forward jump to a newly created label in case the ++ comparison is true. The generated label rtx is returned. */ ++static rtx_code_label * ++ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1, ++ bool swap_operands) ++{ ++ bool unordered_compare = ix86_unordered_fp_compare (code); ++ rtx_code_label *label; ++ rtx tmp, reg; ++ ++ if (swap_operands) ++ std::swap (op0, op1); ++ ++ label = gen_label_rtx (); ++ tmp = gen_rtx_COMPARE (CCFPmode, op0, op1); ++ if (unordered_compare) ++ tmp = gen_rtx_UNSPEC (CCFPmode, gen_rtvec (1, tmp), UNSPEC_NOTRAP); ++ reg = gen_rtx_REG (CCFPmode, FLAGS_REG); ++ emit_insn (gen_rtx_SET (reg, tmp)); ++ tmp = gen_rtx_fmt_ee (code, VOIDmode, reg, const0_rtx); ++ tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, ++ gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx); ++ tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); ++ JUMP_LABEL (tmp) = label; ++ ++ return label; ++} ++ ++/* Expand a mask generating SSE comparison instruction comparing OP0 with OP1 ++ using comparison code CODE. Operands are swapped for the comparison if ++ SWAP_OPERANDS is true. Returns a rtx for the generated mask. */ ++static rtx ++ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1, ++ bool swap_operands) ++{ ++ rtx (*insn)(rtx, rtx, rtx, rtx); ++ machine_mode mode = GET_MODE (op0); ++ rtx mask = gen_reg_rtx (mode); ++ ++ if (swap_operands) ++ std::swap (op0, op1); ++ ++ insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse; ++ ++ emit_insn (insn (mask, op0, op1, ++ gen_rtx_fmt_ee (code, mode, op0, op1))); ++ return mask; ++} ++ ++/* Expand copysign from SIGN to the positive value ABS_VALUE ++ storing in RESULT. If MASK is non-null, it shall be a mask to mask out ++ the sign-bit. */ ++ ++static void ++ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask) ++{ ++ machine_mode mode = GET_MODE (sign); ++ rtx sgn = gen_reg_rtx (mode); ++ if (mask == NULL_RTX) ++ { ++ machine_mode vmode; ++ ++ if (mode == SFmode) ++ vmode = V4SFmode; ++ else if (mode == DFmode) ++ vmode = V2DFmode; ++ else ++ vmode = mode; ++ ++ mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false); ++ if (!VECTOR_MODE_P (mode)) ++ { ++ /* We need to generate a scalar mode mask in this case. */ ++ rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx)); ++ tmp = gen_rtx_VEC_SELECT (mode, mask, tmp); ++ mask = gen_reg_rtx (mode); ++ emit_insn (gen_rtx_SET (mask, tmp)); ++ } ++ } ++ else ++ mask = gen_rtx_NOT (mode, mask); ++ emit_insn (gen_rtx_SET (sgn, gen_rtx_AND (mode, mask, sign))); ++ emit_insn (gen_rtx_SET (result, gen_rtx_IOR (mode, abs_value, sgn))); ++} ++ ++/* Expand SSE sequence for computing lround from OP1 storing ++ into OP0. */ ++ ++void ++ix86_expand_lround (rtx op0, rtx op1) ++{ ++ /* C code for the stuff we're doing below: ++ tmp = op1 + copysign (nextafter (0.5, 0.0), op1) ++ return (long)tmp; ++ */ ++ machine_mode mode = GET_MODE (op1); ++ const struct real_format *fmt; ++ REAL_VALUE_TYPE pred_half, half_minus_pred_half; ++ rtx adj; ++ ++ /* load nextafter (0.5, 0.0) */ ++ fmt = REAL_MODE_FORMAT (mode); ++ real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode); ++ real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half); ++ ++ /* adj = copysign (0.5, op1) */ ++ adj = force_reg (mode, const_double_from_real_value (pred_half, mode)); ++ ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX); ++ ++ /* adj = op1 + adj */ ++ adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT); ++ ++ /* op0 = (imode)adj */ ++ expand_fix (op0, adj, 0); ++} ++ ++/* Expand SSE2 sequence for computing lround from OPERAND1 storing ++ into OPERAND0. */ ++ ++void ++ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor) ++{ ++ /* C code for the stuff we're doing below (for do_floor): ++ xi = (long)op1; ++ xi -= (double)xi > op1 ? 1 : 0; ++ return xi; ++ */ ++ machine_mode fmode = GET_MODE (op1); ++ machine_mode imode = GET_MODE (op0); ++ rtx ireg, freg, tmp; ++ rtx_code_label *label; ++ ++ /* reg = (long)op1 */ ++ ireg = gen_reg_rtx (imode); ++ expand_fix (ireg, op1, 0); ++ ++ /* freg = (double)reg */ ++ freg = gen_reg_rtx (fmode); ++ expand_float (freg, ireg, 0); ++ ++ /* ireg = (freg > op1) ? ireg - 1 : ireg */ ++ label = ix86_expand_sse_compare_and_jump (UNLE, ++ freg, op1, !do_floor); ++ tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS, ++ ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT); ++ emit_move_insn (ireg, tmp); ++ ++ emit_label (label); ++ LABEL_NUSES (label) = 1; ++ ++ emit_move_insn (op0, ireg); ++} ++ ++/* Generate and return a rtx of mode MODE for 2**n where n is the number ++ of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */ ++ ++static rtx ++ix86_gen_TWO52 (machine_mode mode) ++{ ++ REAL_VALUE_TYPE TWO52r; ++ rtx TWO52; ++ ++ real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23); ++ TWO52 = const_double_from_real_value (TWO52r, mode); ++ TWO52 = force_reg (mode, TWO52); ++ ++ return TWO52; ++} ++ ++/* Expand rint rounding OPERAND1 and storing the result in OPERAND0. */ ++ ++void ++ix86_expand_rint (rtx operand0, rtx operand1) ++{ ++ /* C code for the stuff we're doing below: ++ xa = fabs (operand1); ++ if (!isless (xa, 2**52)) ++ return operand1; ++ two52 = 2**52; ++ if (flag_rounding_math) ++ { ++ two52 = copysign (two52, operand1); ++ xa = operand1; ++ } ++ xa = xa + two52 - two52; ++ return copysign (xa, operand1); ++ */ ++ machine_mode mode = GET_MODE (operand0); ++ rtx res, xa, TWO52, two52, mask; ++ rtx_code_label *label; ++ ++ res = gen_reg_rtx (mode); ++ emit_move_insn (res, operand1); ++ ++ /* xa = abs (operand1) */ ++ xa = ix86_expand_sse_fabs (res, &mask); ++ ++ /* if (!isless (xa, TWO52)) goto label; */ ++ TWO52 = ix86_gen_TWO52 (mode); ++ label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); ++ ++ two52 = TWO52; ++ if (flag_rounding_math) ++ { ++ two52 = gen_reg_rtx (mode); ++ ix86_sse_copysign_to_positive (two52, TWO52, res, mask); ++ xa = res; ++ } ++ ++ xa = expand_simple_binop (mode, PLUS, xa, two52, NULL_RTX, 0, OPTAB_DIRECT); ++ xa = expand_simple_binop (mode, MINUS, xa, two52, xa, 0, OPTAB_DIRECT); ++ ++ ix86_sse_copysign_to_positive (res, xa, res, mask); ++ ++ emit_label (label); ++ LABEL_NUSES (label) = 1; ++ ++ emit_move_insn (operand0, res); ++} ++ ++/* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing ++ into OPERAND0. */ ++void ++ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor) ++{ ++ /* C code for the stuff we expand below. ++ double xa = fabs (x), x2; ++ if (!isless (xa, TWO52)) ++ return x; ++ xa = xa + TWO52 - TWO52; ++ x2 = copysign (xa, x); ++ Compensate. Floor: ++ if (x2 > x) ++ x2 -= 1; ++ Compensate. Ceil: ++ if (x2 < x) ++ x2 -= -1; ++ return x2; ++ */ ++ machine_mode mode = GET_MODE (operand0); ++ rtx xa, TWO52, tmp, one, res, mask; ++ rtx_code_label *label; ++ ++ TWO52 = ix86_gen_TWO52 (mode); ++ ++ /* Temporary for holding the result, initialized to the input ++ operand to ease control flow. */ ++ res = gen_reg_rtx (mode); ++ emit_move_insn (res, operand1); ++ ++ /* xa = abs (operand1) */ ++ xa = ix86_expand_sse_fabs (res, &mask); ++ ++ /* if (!isless (xa, TWO52)) goto label; */ ++ label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); ++ ++ /* xa = xa + TWO52 - TWO52; */ ++ xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT); ++ xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT); ++ ++ /* xa = copysign (xa, operand1) */ ++ ix86_sse_copysign_to_positive (xa, xa, res, mask); ++ ++ /* generate 1.0 or -1.0 */ ++ one = force_reg (mode, ++ const_double_from_real_value (do_floor ++ ? dconst1 : dconstm1, mode)); ++ ++ /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */ ++ tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor); ++ emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp))); ++ /* We always need to subtract here to preserve signed zero. */ ++ tmp = expand_simple_binop (mode, MINUS, ++ xa, tmp, NULL_RTX, 0, OPTAB_DIRECT); ++ emit_move_insn (res, tmp); ++ ++ emit_label (label); ++ LABEL_NUSES (label) = 1; ++ ++ emit_move_insn (operand0, res); ++} ++ ++/* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing ++ into OPERAND0. */ ++void ++ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor) ++{ ++ /* C code for the stuff we expand below. ++ double xa = fabs (x), x2; ++ if (!isless (xa, TWO52)) ++ return x; ++ x2 = (double)(long)x; ++ Compensate. Floor: ++ if (x2 > x) ++ x2 -= 1; ++ Compensate. Ceil: ++ if (x2 < x) ++ x2 += 1; ++ if (HONOR_SIGNED_ZEROS (mode)) ++ return copysign (x2, x); ++ return x2; ++ */ ++ machine_mode mode = GET_MODE (operand0); ++ rtx xa, xi, TWO52, tmp, one, res, mask; ++ rtx_code_label *label; ++ ++ TWO52 = ix86_gen_TWO52 (mode); ++ ++ /* Temporary for holding the result, initialized to the input ++ operand to ease control flow. */ ++ res = gen_reg_rtx (mode); ++ emit_move_insn (res, operand1); ++ ++ /* xa = abs (operand1) */ ++ xa = ix86_expand_sse_fabs (res, &mask); ++ ++ /* if (!isless (xa, TWO52)) goto label; */ ++ label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); ++ ++ /* xa = (double)(long)x */ ++ xi = gen_reg_rtx (mode == DFmode ? DImode : SImode); ++ expand_fix (xi, res, 0); ++ expand_float (xa, xi, 0); ++ ++ /* generate 1.0 */ ++ one = force_reg (mode, const_double_from_real_value (dconst1, mode)); ++ ++ /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */ ++ tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor); ++ emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp))); ++ tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS, ++ xa, tmp, NULL_RTX, 0, OPTAB_DIRECT); ++ emit_move_insn (res, tmp); ++ ++ if (HONOR_SIGNED_ZEROS (mode)) ++ ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask); ++ ++ emit_label (label); ++ LABEL_NUSES (label) = 1; ++ ++ emit_move_insn (operand0, res); ++} ++ ++/* Expand SSE sequence for computing round from OPERAND1 storing ++ into OPERAND0. Sequence that works without relying on DImode truncation ++ via cvttsd2siq that is only available on 64bit targets. */ ++void ++ix86_expand_rounddf_32 (rtx operand0, rtx operand1) ++{ ++ /* C code for the stuff we expand below. ++ double xa = fabs (x), xa2, x2; ++ if (!isless (xa, TWO52)) ++ return x; ++ Using the absolute value and copying back sign makes ++ -0.0 -> -0.0 correct. ++ xa2 = xa + TWO52 - TWO52; ++ Compensate. ++ dxa = xa2 - xa; ++ if (dxa <= -0.5) ++ xa2 += 1; ++ else if (dxa > 0.5) ++ xa2 -= 1; ++ x2 = copysign (xa2, x); ++ return x2; ++ */ ++ machine_mode mode = GET_MODE (operand0); ++ rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask; ++ rtx_code_label *label; ++ ++ TWO52 = ix86_gen_TWO52 (mode); ++ ++ /* Temporary for holding the result, initialized to the input ++ operand to ease control flow. */ ++ res = gen_reg_rtx (mode); ++ emit_move_insn (res, operand1); ++ ++ /* xa = abs (operand1) */ ++ xa = ix86_expand_sse_fabs (res, &mask); ++ ++ /* if (!isless (xa, TWO52)) goto label; */ ++ label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); ++ ++ /* xa2 = xa + TWO52 - TWO52; */ ++ xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT); ++ xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT); ++ ++ /* dxa = xa2 - xa; */ ++ dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT); ++ ++ /* generate 0.5, 1.0 and -0.5 */ ++ half = force_reg (mode, const_double_from_real_value (dconsthalf, mode)); ++ one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT); ++ mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX, ++ 0, OPTAB_DIRECT); ++ ++ /* Compensate. */ ++ tmp = gen_reg_rtx (mode); ++ /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */ ++ tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false); ++ emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp))); ++ xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT); ++ /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */ ++ tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false); ++ emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp))); ++ xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT); ++ ++ /* res = copysign (xa2, operand1) */ ++ ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask); ++ ++ emit_label (label); ++ LABEL_NUSES (label) = 1; ++ ++ emit_move_insn (operand0, res); ++} ++ ++/* Expand SSE sequence for computing trunc from OPERAND1 storing ++ into OPERAND0. */ ++void ++ix86_expand_trunc (rtx operand0, rtx operand1) ++{ ++ /* C code for SSE variant we expand below. ++ double xa = fabs (x), x2; ++ if (!isless (xa, TWO52)) ++ return x; ++ x2 = (double)(long)x; ++ if (HONOR_SIGNED_ZEROS (mode)) ++ return copysign (x2, x); ++ return x2; ++ */ ++ machine_mode mode = GET_MODE (operand0); ++ rtx xa, xi, TWO52, res, mask; ++ rtx_code_label *label; ++ ++ TWO52 = ix86_gen_TWO52 (mode); ++ ++ /* Temporary for holding the result, initialized to the input ++ operand to ease control flow. */ ++ res = gen_reg_rtx (mode); ++ emit_move_insn (res, operand1); ++ ++ /* xa = abs (operand1) */ ++ xa = ix86_expand_sse_fabs (res, &mask); ++ ++ /* if (!isless (xa, TWO52)) goto label; */ ++ label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); ++ ++ /* x = (double)(long)x */ ++ xi = gen_reg_rtx (mode == DFmode ? DImode : SImode); ++ expand_fix (xi, res, 0); ++ expand_float (res, xi, 0); ++ ++ if (HONOR_SIGNED_ZEROS (mode)) ++ ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask); ++ ++ emit_label (label); ++ LABEL_NUSES (label) = 1; ++ ++ emit_move_insn (operand0, res); ++} ++ ++/* Expand SSE sequence for computing trunc from OPERAND1 storing ++ into OPERAND0. */ ++void ++ix86_expand_truncdf_32 (rtx operand0, rtx operand1) ++{ ++ machine_mode mode = GET_MODE (operand0); ++ rtx xa, mask, TWO52, one, res, smask, tmp; ++ rtx_code_label *label; ++ ++ /* C code for SSE variant we expand below. ++ double xa = fabs (x), x2; ++ if (!isless (xa, TWO52)) ++ return x; ++ xa2 = xa + TWO52 - TWO52; ++ Compensate: ++ if (xa2 > xa) ++ xa2 -= 1.0; ++ x2 = copysign (xa2, x); ++ return x2; ++ */ ++ ++ TWO52 = ix86_gen_TWO52 (mode); ++ ++ /* Temporary for holding the result, initialized to the input ++ operand to ease control flow. */ ++ res = gen_reg_rtx (mode); ++ emit_move_insn (res, operand1); ++ ++ /* xa = abs (operand1) */ ++ xa = ix86_expand_sse_fabs (res, &smask); ++ ++ /* if (!isless (xa, TWO52)) goto label; */ ++ label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); ++ ++ /* res = xa + TWO52 - TWO52; */ ++ tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT); ++ tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT); ++ emit_move_insn (res, tmp); ++ ++ /* generate 1.0 */ ++ one = force_reg (mode, const_double_from_real_value (dconst1, mode)); ++ ++ /* Compensate: res = xa2 - (res > xa ? 1 : 0) */ ++ mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false); ++ emit_insn (gen_rtx_SET (mask, gen_rtx_AND (mode, mask, one))); ++ tmp = expand_simple_binop (mode, MINUS, ++ res, mask, NULL_RTX, 0, OPTAB_DIRECT); ++ emit_move_insn (res, tmp); ++ ++ /* res = copysign (res, operand1) */ ++ ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask); ++ ++ emit_label (label); ++ LABEL_NUSES (label) = 1; ++ ++ emit_move_insn (operand0, res); ++} ++ ++/* Expand SSE sequence for computing round from OPERAND1 storing ++ into OPERAND0. */ ++void ++ix86_expand_round (rtx operand0, rtx operand1) ++{ ++ /* C code for the stuff we're doing below: ++ double xa = fabs (x); ++ if (!isless (xa, TWO52)) ++ return x; ++ xa = (double)(long)(xa + nextafter (0.5, 0.0)); ++ return copysign (xa, x); ++ */ ++ machine_mode mode = GET_MODE (operand0); ++ rtx res, TWO52, xa, xi, half, mask; ++ rtx_code_label *label; ++ const struct real_format *fmt; ++ REAL_VALUE_TYPE pred_half, half_minus_pred_half; ++ ++ /* Temporary for holding the result, initialized to the input ++ operand to ease control flow. */ ++ res = gen_reg_rtx (mode); ++ emit_move_insn (res, operand1); ++ ++ TWO52 = ix86_gen_TWO52 (mode); ++ xa = ix86_expand_sse_fabs (res, &mask); ++ label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); ++ ++ /* load nextafter (0.5, 0.0) */ ++ fmt = REAL_MODE_FORMAT (mode); ++ real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode); ++ real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half); ++ ++ /* xa = xa + 0.5 */ ++ half = force_reg (mode, const_double_from_real_value (pred_half, mode)); ++ xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT); ++ ++ /* xa = (double)(int64_t)xa */ ++ xi = gen_reg_rtx (mode == DFmode ? DImode : SImode); ++ expand_fix (xi, xa, 0); ++ expand_float (xa, xi, 0); ++ ++ /* res = copysign (xa, operand1) */ ++ ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask); ++ ++ emit_label (label); ++ LABEL_NUSES (label) = 1; ++ ++ emit_move_insn (operand0, res); ++} ++ ++/* Expand SSE sequence for computing round ++ from OP1 storing into OP0 using sse4 round insn. */ ++void ++ix86_expand_round_sse4 (rtx op0, rtx op1) ++{ ++ machine_mode mode = GET_MODE (op0); ++ rtx e1, e2, res, half; ++ const struct real_format *fmt; ++ REAL_VALUE_TYPE pred_half, half_minus_pred_half; ++ rtx (*gen_copysign) (rtx, rtx, rtx); ++ rtx (*gen_round) (rtx, rtx, rtx); ++ ++ switch (mode) ++ { ++ case E_SFmode: ++ gen_copysign = gen_copysignsf3; ++ gen_round = gen_sse4_1_roundsf2; ++ break; ++ case E_DFmode: ++ gen_copysign = gen_copysigndf3; ++ gen_round = gen_sse4_1_rounddf2; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ ++ /* round (a) = trunc (a + copysign (0.5, a)) */ ++ ++ /* load nextafter (0.5, 0.0) */ ++ fmt = REAL_MODE_FORMAT (mode); ++ real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode); ++ real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half); ++ half = const_double_from_real_value (pred_half, mode); ++ ++ /* e1 = copysign (0.5, op1) */ ++ e1 = gen_reg_rtx (mode); ++ emit_insn (gen_copysign (e1, half, op1)); ++ ++ /* e2 = op1 + e1 */ ++ e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT); ++ ++ /* res = trunc (e2) */ ++ res = gen_reg_rtx (mode); ++ emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC))); ++ ++ emit_move_insn (op0, res); ++} ++ ++/* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel []))) ++ insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh ++ insn every time. */ ++ ++static GTY(()) rtx_insn *vselect_insn; ++ ++/* Initialize vselect_insn. */ ++ ++static void ++init_vselect_insn (void) ++{ ++ unsigned i; ++ rtx x; ++ ++ x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN)); ++ for (i = 0; i < MAX_VECT_LEN; ++i) ++ XVECEXP (x, 0, i) = const0_rtx; ++ x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx, ++ const0_rtx), x); ++ x = gen_rtx_SET (const0_rtx, x); ++ start_sequence (); ++ vselect_insn = emit_insn (x); ++ end_sequence (); ++} ++ ++/* Construct (set target (vec_select op0 (parallel perm))) and ++ return true if that's a valid instruction in the active ISA. */ ++ ++static bool ++expand_vselect (rtx target, rtx op0, const unsigned char *perm, ++ unsigned nelt, bool testing_p) ++{ ++ unsigned int i; ++ rtx x, save_vconcat; ++ int icode; ++ ++ if (vselect_insn == NULL_RTX) ++ init_vselect_insn (); ++ ++ x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1); ++ PUT_NUM_ELEM (XVEC (x, 0), nelt); ++ for (i = 0; i < nelt; ++i) ++ XVECEXP (x, 0, i) = GEN_INT (perm[i]); ++ save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0); ++ XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0; ++ PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target)); ++ SET_DEST (PATTERN (vselect_insn)) = target; ++ icode = recog_memoized (vselect_insn); ++ ++ if (icode >= 0 && !testing_p) ++ emit_insn (copy_rtx (PATTERN (vselect_insn))); ++ ++ SET_DEST (PATTERN (vselect_insn)) = const0_rtx; ++ XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat; ++ INSN_CODE (vselect_insn) = -1; ++ ++ return icode >= 0; ++} ++ ++/* Similar, but generate a vec_concat from op0 and op1 as well. */ ++ ++static bool ++expand_vselect_vconcat (rtx target, rtx op0, rtx op1, ++ const unsigned char *perm, unsigned nelt, ++ bool testing_p) ++{ ++ machine_mode v2mode; ++ rtx x; ++ bool ok; ++ ++ if (vselect_insn == NULL_RTX) ++ init_vselect_insn (); ++ ++ if (!GET_MODE_2XWIDER_MODE (GET_MODE (op0)).exists (&v2mode)) ++ return false; ++ x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0); ++ PUT_MODE (x, v2mode); ++ XEXP (x, 0) = op0; ++ XEXP (x, 1) = op1; ++ ok = expand_vselect (target, x, perm, nelt, testing_p); ++ XEXP (x, 0) = const0_rtx; ++ XEXP (x, 1) = const0_rtx; ++ return ok; ++} ++ ++/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D ++ using movss or movsd. */ ++static bool ++expand_vec_perm_movs (struct expand_vec_perm_d *d) ++{ ++ machine_mode vmode = d->vmode; ++ unsigned i, nelt = d->nelt; ++ rtx x; ++ ++ if (d->one_operand_p) ++ return false; ++ ++ if (!(TARGET_SSE && vmode == V4SFmode) ++ && !(TARGET_SSE2 && vmode == V2DFmode)) ++ return false; ++ ++ /* Only the first element is changed. */ ++ if (d->perm[0] != nelt && d->perm[0] != 0) ++ return false; ++ for (i = 1; i < nelt; ++i) ++ if (d->perm[i] != i + nelt - d->perm[0]) ++ return false; ++ ++ if (d->testing_p) ++ return true; ++ ++ if (d->perm[0] == nelt) ++ x = gen_rtx_VEC_MERGE (vmode, d->op1, d->op0, GEN_INT (1)); ++ else ++ x = gen_rtx_VEC_MERGE (vmode, d->op0, d->op1, GEN_INT (1)); ++ ++ emit_insn (gen_rtx_SET (d->target, x)); ++ ++ return true; ++} ++ ++/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D ++ in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */ ++ ++static bool ++expand_vec_perm_blend (struct expand_vec_perm_d *d) ++{ ++ machine_mode mmode, vmode = d->vmode; ++ unsigned i, nelt = d->nelt; ++ unsigned HOST_WIDE_INT mask; ++ rtx target, op0, op1, maskop, x; ++ rtx rperm[32], vperm; ++ ++ if (d->one_operand_p) ++ return false; ++ if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64 ++ && (TARGET_AVX512BW ++ || GET_MODE_UNIT_SIZE (vmode) >= 4)) ++ ; ++ else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32) ++ ; ++ else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode)) ++ ; ++ else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16) ++ ; ++ else ++ return false; ++ ++ /* This is a blend, not a permute. Elements must stay in their ++ respective lanes. */ ++ for (i = 0; i < nelt; ++i) ++ { ++ unsigned e = d->perm[i]; ++ if (!(e == i || e == i + nelt)) ++ return false; ++ } ++ ++ if (d->testing_p) ++ return true; ++ ++ /* ??? Without SSE4.1, we could implement this with and/andn/or. This ++ decision should be extracted elsewhere, so that we only try that ++ sequence once all budget==3 options have been tried. */ ++ target = d->target; ++ op0 = d->op0; ++ op1 = d->op1; ++ mask = 0; ++ ++ switch (vmode) ++ { ++ case E_V8DFmode: ++ case E_V16SFmode: ++ case E_V4DFmode: ++ case E_V8SFmode: ++ case E_V2DFmode: ++ case E_V4SFmode: ++ case E_V8HImode: ++ case E_V8SImode: ++ case E_V32HImode: ++ case E_V64QImode: ++ case E_V16SImode: ++ case E_V8DImode: ++ for (i = 0; i < nelt; ++i) ++ mask |= ((unsigned HOST_WIDE_INT) (d->perm[i] >= nelt)) << i; ++ break; ++ ++ case E_V2DImode: ++ for (i = 0; i < 2; ++i) ++ mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4); ++ vmode = V8HImode; ++ goto do_subreg; ++ ++ case E_V4SImode: ++ for (i = 0; i < 4; ++i) ++ mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2); ++ vmode = V8HImode; ++ goto do_subreg; ++ ++ case E_V16QImode: ++ /* See if bytes move in pairs so we can use pblendw with ++ an immediate argument, rather than pblendvb with a vector ++ argument. */ ++ for (i = 0; i < 16; i += 2) ++ if (d->perm[i] + 1 != d->perm[i + 1]) ++ { ++ use_pblendvb: ++ for (i = 0; i < nelt; ++i) ++ rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx); ++ ++ finish_pblendvb: ++ vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm)); ++ vperm = force_reg (vmode, vperm); ++ ++ if (GET_MODE_SIZE (vmode) == 16) ++ emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm)); ++ else ++ emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm)); ++ if (target != d->target) ++ emit_move_insn (d->target, gen_lowpart (d->vmode, target)); ++ return true; ++ } ++ ++ for (i = 0; i < 8; ++i) ++ mask |= (d->perm[i * 2] >= 16) << i; ++ vmode = V8HImode; ++ /* FALLTHRU */ ++ ++ do_subreg: ++ target = gen_reg_rtx (vmode); ++ op0 = gen_lowpart (vmode, op0); ++ op1 = gen_lowpart (vmode, op1); ++ break; ++ ++ case E_V32QImode: ++ /* See if bytes move in pairs. If not, vpblendvb must be used. */ ++ for (i = 0; i < 32; i += 2) ++ if (d->perm[i] + 1 != d->perm[i + 1]) ++ goto use_pblendvb; ++ /* See if bytes move in quadruplets. If yes, vpblendd ++ with immediate can be used. */ ++ for (i = 0; i < 32; i += 4) ++ if (d->perm[i] + 2 != d->perm[i + 2]) ++ break; ++ if (i < 32) ++ { ++ /* See if bytes move the same in both lanes. If yes, ++ vpblendw with immediate can be used. */ ++ for (i = 0; i < 16; i += 2) ++ if (d->perm[i] + 16 != d->perm[i + 16]) ++ goto use_pblendvb; ++ ++ /* Use vpblendw. */ ++ for (i = 0; i < 16; ++i) ++ mask |= (d->perm[i * 2] >= 32) << i; ++ vmode = V16HImode; ++ goto do_subreg; ++ } ++ ++ /* Use vpblendd. */ ++ for (i = 0; i < 8; ++i) ++ mask |= (d->perm[i * 4] >= 32) << i; ++ vmode = V8SImode; ++ goto do_subreg; ++ ++ case E_V16HImode: ++ /* See if words move in pairs. If yes, vpblendd can be used. */ ++ for (i = 0; i < 16; i += 2) ++ if (d->perm[i] + 1 != d->perm[i + 1]) ++ break; ++ if (i < 16) ++ { ++ /* See if words move the same in both lanes. If not, ++ vpblendvb must be used. */ ++ for (i = 0; i < 8; i++) ++ if (d->perm[i] + 8 != d->perm[i + 8]) ++ { ++ /* Use vpblendvb. */ ++ for (i = 0; i < 32; ++i) ++ rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx); ++ ++ vmode = V32QImode; ++ nelt = 32; ++ target = gen_reg_rtx (vmode); ++ op0 = gen_lowpart (vmode, op0); ++ op1 = gen_lowpart (vmode, op1); ++ goto finish_pblendvb; ++ } ++ ++ /* Use vpblendw. */ ++ for (i = 0; i < 16; ++i) ++ mask |= (d->perm[i] >= 16) << i; ++ break; ++ } ++ ++ /* Use vpblendd. */ ++ for (i = 0; i < 8; ++i) ++ mask |= (d->perm[i * 2] >= 16) << i; ++ vmode = V8SImode; ++ goto do_subreg; ++ ++ case E_V4DImode: ++ /* Use vpblendd. */ ++ for (i = 0; i < 4; ++i) ++ mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2); ++ vmode = V8SImode; ++ goto do_subreg; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ switch (vmode) ++ { ++ case E_V8DFmode: ++ case E_V8DImode: ++ mmode = QImode; ++ break; ++ case E_V16SFmode: ++ case E_V16SImode: ++ mmode = HImode; ++ break; ++ case E_V32HImode: ++ mmode = SImode; ++ break; ++ case E_V64QImode: ++ mmode = DImode; ++ break; ++ default: ++ mmode = VOIDmode; ++ } ++ ++ if (mmode != VOIDmode) ++ maskop = force_reg (mmode, gen_int_mode (mask, mmode)); ++ else ++ maskop = GEN_INT (mask); ++ ++ /* This matches five different patterns with the different modes. */ ++ x = gen_rtx_VEC_MERGE (vmode, op1, op0, maskop); ++ x = gen_rtx_SET (target, x); ++ emit_insn (x); ++ if (target != d->target) ++ emit_move_insn (d->target, gen_lowpart (d->vmode, target)); ++ ++ return true; ++} ++ ++/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D ++ in terms of the variable form of vpermilps. ++ ++ Note that we will have already failed the immediate input vpermilps, ++ which requires that the high and low part shuffle be identical; the ++ variable form doesn't require that. */ ++ ++static bool ++expand_vec_perm_vpermil (struct expand_vec_perm_d *d) ++{ ++ rtx rperm[8], vperm; ++ unsigned i; ++ ++ if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p) ++ return false; ++ ++ /* We can only permute within the 128-bit lane. */ ++ for (i = 0; i < 8; ++i) ++ { ++ unsigned e = d->perm[i]; ++ if (i < 4 ? e >= 4 : e < 4) ++ return false; ++ } ++ ++ if (d->testing_p) ++ return true; ++ ++ for (i = 0; i < 8; ++i) ++ { ++ unsigned e = d->perm[i]; ++ ++ /* Within each 128-bit lane, the elements of op0 are numbered ++ from 0 and the elements of op1 are numbered from 4. */ ++ if (e >= 8 + 4) ++ e -= 8; ++ else if (e >= 4) ++ e -= 4; ++ ++ rperm[i] = GEN_INT (e); ++ } ++ ++ vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm)); ++ vperm = force_reg (V8SImode, vperm); ++ emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm)); ++ ++ return true; ++} ++ ++/* Return true if permutation D can be performed as VMODE permutation ++ instead. */ ++ ++static bool ++valid_perm_using_mode_p (machine_mode vmode, struct expand_vec_perm_d *d) ++{ ++ unsigned int i, j, chunk; ++ ++ if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT ++ || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT ++ || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode)) ++ return false; ++ ++ if (GET_MODE_NUNITS (vmode) >= d->nelt) ++ return true; ++ ++ chunk = d->nelt / GET_MODE_NUNITS (vmode); ++ for (i = 0; i < d->nelt; i += chunk) ++ if (d->perm[i] & (chunk - 1)) ++ return false; ++ else ++ for (j = 1; j < chunk; ++j) ++ if (d->perm[i] + j != d->perm[i + j]) ++ return false; ++ ++ return true; ++} ++ ++/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D ++ in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */ ++ ++static bool ++expand_vec_perm_pshufb (struct expand_vec_perm_d *d) ++{ ++ unsigned i, nelt, eltsz, mask; ++ unsigned char perm[64]; ++ machine_mode vmode = V16QImode; ++ rtx rperm[64], vperm, target, op0, op1; ++ ++ nelt = d->nelt; ++ ++ if (!d->one_operand_p) ++ { ++ if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16) ++ { ++ if (TARGET_AVX2 ++ && valid_perm_using_mode_p (V2TImode, d)) ++ { ++ if (d->testing_p) ++ return true; ++ ++ /* Use vperm2i128 insn. The pattern uses ++ V4DImode instead of V2TImode. */ ++ target = d->target; ++ if (d->vmode != V4DImode) ++ target = gen_reg_rtx (V4DImode); ++ op0 = gen_lowpart (V4DImode, d->op0); ++ op1 = gen_lowpart (V4DImode, d->op1); ++ rperm[0] ++ = GEN_INT ((d->perm[0] / (nelt / 2)) ++ | ((d->perm[nelt / 2] / (nelt / 2)) * 16)); ++ emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0])); ++ if (target != d->target) ++ emit_move_insn (d->target, gen_lowpart (d->vmode, target)); ++ return true; ++ } ++ return false; ++ } ++ } ++ else ++ { ++ if (GET_MODE_SIZE (d->vmode) == 16) ++ { ++ if (!TARGET_SSSE3) ++ return false; ++ } ++ else if (GET_MODE_SIZE (d->vmode) == 32) ++ { ++ if (!TARGET_AVX2) ++ return false; ++ ++ /* V4DImode should be already handled through ++ expand_vselect by vpermq instruction. */ ++ gcc_assert (d->vmode != V4DImode); ++ ++ vmode = V32QImode; ++ if (d->vmode == V8SImode ++ || d->vmode == V16HImode ++ || d->vmode == V32QImode) ++ { ++ /* First see if vpermq can be used for ++ V8SImode/V16HImode/V32QImode. */ ++ if (valid_perm_using_mode_p (V4DImode, d)) ++ { ++ for (i = 0; i < 4; i++) ++ perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3; ++ if (d->testing_p) ++ return true; ++ target = gen_reg_rtx (V4DImode); ++ if (expand_vselect (target, gen_lowpart (V4DImode, d->op0), ++ perm, 4, false)) ++ { ++ emit_move_insn (d->target, ++ gen_lowpart (d->vmode, target)); ++ return true; ++ } ++ return false; ++ } ++ ++ /* Next see if vpermd can be used. */ ++ if (valid_perm_using_mode_p (V8SImode, d)) ++ vmode = V8SImode; ++ } ++ /* Or if vpermps can be used. */ ++ else if (d->vmode == V8SFmode) ++ vmode = V8SImode; ++ ++ if (vmode == V32QImode) ++ { ++ /* vpshufb only works intra lanes, it is not ++ possible to shuffle bytes in between the lanes. */ ++ for (i = 0; i < nelt; ++i) ++ if ((d->perm[i] ^ i) & (nelt / 2)) ++ return false; ++ } ++ } ++ else if (GET_MODE_SIZE (d->vmode) == 64) ++ { ++ if (!TARGET_AVX512BW) ++ return false; ++ ++ /* If vpermq didn't work, vpshufb won't work either. */ ++ if (d->vmode == V8DFmode || d->vmode == V8DImode) ++ return false; ++ ++ vmode = V64QImode; ++ if (d->vmode == V16SImode ++ || d->vmode == V32HImode ++ || d->vmode == V64QImode) ++ { ++ /* First see if vpermq can be used for ++ V16SImode/V32HImode/V64QImode. */ ++ if (valid_perm_using_mode_p (V8DImode, d)) ++ { ++ for (i = 0; i < 8; i++) ++ perm[i] = (d->perm[i * nelt / 8] * 8 / nelt) & 7; ++ if (d->testing_p) ++ return true; ++ target = gen_reg_rtx (V8DImode); ++ if (expand_vselect (target, gen_lowpart (V8DImode, d->op0), ++ perm, 8, false)) ++ { ++ emit_move_insn (d->target, ++ gen_lowpart (d->vmode, target)); ++ return true; ++ } ++ return false; ++ } ++ ++ /* Next see if vpermd can be used. */ ++ if (valid_perm_using_mode_p (V16SImode, d)) ++ vmode = V16SImode; ++ } ++ /* Or if vpermps can be used. */ ++ else if (d->vmode == V16SFmode) ++ vmode = V16SImode; ++ if (vmode == V64QImode) ++ { ++ /* vpshufb only works intra lanes, it is not ++ possible to shuffle bytes in between the lanes. */ ++ for (i = 0; i < nelt; ++i) ++ if ((d->perm[i] ^ i) & (nelt / 4)) ++ return false; ++ } ++ } ++ else ++ return false; ++ } ++ ++ if (d->testing_p) ++ return true; ++ ++ if (vmode == V8SImode) ++ for (i = 0; i < 8; ++i) ++ rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7); ++ else if (vmode == V16SImode) ++ for (i = 0; i < 16; ++i) ++ rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15); ++ else ++ { ++ eltsz = GET_MODE_UNIT_SIZE (d->vmode); ++ if (!d->one_operand_p) ++ mask = 2 * nelt - 1; ++ else if (vmode == V16QImode) ++ mask = nelt - 1; ++ else if (vmode == V64QImode) ++ mask = nelt / 4 - 1; ++ else ++ mask = nelt / 2 - 1; ++ ++ for (i = 0; i < nelt; ++i) ++ { ++ unsigned j, e = d->perm[i] & mask; ++ for (j = 0; j < eltsz; ++j) ++ rperm[i * eltsz + j] = GEN_INT (e * eltsz + j); ++ } ++ } ++ ++ vperm = gen_rtx_CONST_VECTOR (vmode, ++ gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm)); ++ vperm = force_reg (vmode, vperm); ++ ++ target = d->target; ++ if (d->vmode != vmode) ++ target = gen_reg_rtx (vmode); ++ op0 = gen_lowpart (vmode, d->op0); ++ if (d->one_operand_p) ++ { ++ if (vmode == V16QImode) ++ emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm)); ++ else if (vmode == V32QImode) ++ emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm)); ++ else if (vmode == V64QImode) ++ emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm)); ++ else if (vmode == V8SFmode) ++ emit_insn (gen_avx2_permvarv8sf (target, op0, vperm)); ++ else if (vmode == V8SImode) ++ emit_insn (gen_avx2_permvarv8si (target, op0, vperm)); ++ else if (vmode == V16SFmode) ++ emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm)); ++ else if (vmode == V16SImode) ++ emit_insn (gen_avx512f_permvarv16si (target, op0, vperm)); ++ else ++ gcc_unreachable (); ++ } ++ else ++ { ++ op1 = gen_lowpart (vmode, d->op1); ++ emit_insn (gen_xop_pperm (target, op0, op1, vperm)); ++ } ++ if (target != d->target) ++ emit_move_insn (d->target, gen_lowpart (d->vmode, target)); ++ ++ return true; ++} ++ ++/* For V*[QHS]Imode permutations, check if the same permutation ++ can't be performed in a 2x, 4x or 8x wider inner mode. */ ++ ++static bool ++canonicalize_vector_int_perm (const struct expand_vec_perm_d *d, ++ struct expand_vec_perm_d *nd) ++{ ++ int i; ++ machine_mode mode = VOIDmode; ++ ++ switch (d->vmode) ++ { ++ case E_V16QImode: mode = V8HImode; break; ++ case E_V32QImode: mode = V16HImode; break; ++ case E_V64QImode: mode = V32HImode; break; ++ case E_V8HImode: mode = V4SImode; break; ++ case E_V16HImode: mode = V8SImode; break; ++ case E_V32HImode: mode = V16SImode; break; ++ case E_V4SImode: mode = V2DImode; break; ++ case E_V8SImode: mode = V4DImode; break; ++ case E_V16SImode: mode = V8DImode; break; ++ default: return false; ++ } ++ for (i = 0; i < d->nelt; i += 2) ++ if ((d->perm[i] & 1) || d->perm[i + 1] != d->perm[i] + 1) ++ return false; ++ nd->vmode = mode; ++ nd->nelt = d->nelt / 2; ++ for (i = 0; i < nd->nelt; i++) ++ nd->perm[i] = d->perm[2 * i] / 2; ++ if (GET_MODE_INNER (mode) != DImode) ++ canonicalize_vector_int_perm (nd, nd); ++ if (nd != d) ++ { ++ nd->one_operand_p = d->one_operand_p; ++ nd->testing_p = d->testing_p; ++ if (d->op0 == d->op1) ++ nd->op0 = nd->op1 = gen_lowpart (nd->vmode, d->op0); ++ else ++ { ++ nd->op0 = gen_lowpart (nd->vmode, d->op0); ++ nd->op1 = gen_lowpart (nd->vmode, d->op1); ++ } ++ if (d->testing_p) ++ nd->target = gen_raw_REG (nd->vmode, LAST_VIRTUAL_REGISTER + 1); ++ else ++ nd->target = gen_reg_rtx (nd->vmode); ++ } ++ return true; ++} ++ ++/* Try to expand one-operand permutation with constant mask. */ ++ ++static bool ++ix86_expand_vec_one_operand_perm_avx512 (struct expand_vec_perm_d *d) ++{ ++ machine_mode mode = GET_MODE (d->op0); ++ machine_mode maskmode = mode; ++ rtx (*gen) (rtx, rtx, rtx) = NULL; ++ rtx target, op0, mask; ++ rtx vec[64]; ++ ++ if (!rtx_equal_p (d->op0, d->op1)) ++ return false; ++ ++ if (!TARGET_AVX512F) ++ return false; ++ ++ switch (mode) ++ { ++ case E_V16SImode: ++ gen = gen_avx512f_permvarv16si; ++ break; ++ case E_V16SFmode: ++ gen = gen_avx512f_permvarv16sf; ++ maskmode = V16SImode; ++ break; ++ case E_V8DImode: ++ gen = gen_avx512f_permvarv8di; ++ break; ++ case E_V8DFmode: ++ gen = gen_avx512f_permvarv8df; ++ maskmode = V8DImode; ++ break; ++ default: ++ return false; ++ } ++ ++ target = d->target; ++ op0 = d->op0; ++ for (int i = 0; i < d->nelt; ++i) ++ vec[i] = GEN_INT (d->perm[i]); ++ mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec)); ++ emit_insn (gen (target, op0, force_reg (maskmode, mask))); ++ return true; ++} ++ ++static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool); ++ ++/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D ++ in a single instruction. */ ++ ++static bool ++expand_vec_perm_1 (struct expand_vec_perm_d *d) ++{ ++ unsigned i, nelt = d->nelt; ++ struct expand_vec_perm_d nd; ++ ++ /* Check plain VEC_SELECT first, because AVX has instructions that could ++ match both SEL and SEL+CONCAT, but the plain SEL will allow a memory ++ input where SEL+CONCAT may not. */ ++ if (d->one_operand_p) ++ { ++ int mask = nelt - 1; ++ bool identity_perm = true; ++ bool broadcast_perm = true; ++ ++ for (i = 0; i < nelt; i++) ++ { ++ nd.perm[i] = d->perm[i] & mask; ++ if (nd.perm[i] != i) ++ identity_perm = false; ++ if (nd.perm[i]) ++ broadcast_perm = false; ++ } ++ ++ if (identity_perm) ++ { ++ if (!d->testing_p) ++ emit_move_insn (d->target, d->op0); ++ return true; ++ } ++ else if (broadcast_perm && TARGET_AVX2) ++ { ++ /* Use vpbroadcast{b,w,d}. */ ++ rtx (*gen) (rtx, rtx) = NULL; ++ switch (d->vmode) ++ { ++ case E_V64QImode: ++ if (TARGET_AVX512BW) ++ gen = gen_avx512bw_vec_dupv64qi_1; ++ break; ++ case E_V32QImode: ++ gen = gen_avx2_pbroadcastv32qi_1; ++ break; ++ case E_V32HImode: ++ if (TARGET_AVX512BW) ++ gen = gen_avx512bw_vec_dupv32hi_1; ++ break; ++ case E_V16HImode: ++ gen = gen_avx2_pbroadcastv16hi_1; ++ break; ++ case E_V16SImode: ++ if (TARGET_AVX512F) ++ gen = gen_avx512f_vec_dupv16si_1; ++ break; ++ case E_V8SImode: ++ gen = gen_avx2_pbroadcastv8si_1; ++ break; ++ case E_V16QImode: ++ gen = gen_avx2_pbroadcastv16qi; ++ break; ++ case E_V8HImode: ++ gen = gen_avx2_pbroadcastv8hi; ++ break; ++ case E_V16SFmode: ++ if (TARGET_AVX512F) ++ gen = gen_avx512f_vec_dupv16sf_1; ++ break; ++ case E_V8SFmode: ++ gen = gen_avx2_vec_dupv8sf_1; ++ break; ++ case E_V8DFmode: ++ if (TARGET_AVX512F) ++ gen = gen_avx512f_vec_dupv8df_1; ++ break; ++ case E_V8DImode: ++ if (TARGET_AVX512F) ++ gen = gen_avx512f_vec_dupv8di_1; ++ break; ++ /* For other modes prefer other shuffles this function creates. */ ++ default: break; ++ } ++ if (gen != NULL) ++ { ++ if (!d->testing_p) ++ emit_insn (gen (d->target, d->op0)); ++ return true; ++ } ++ } ++ ++ if (expand_vselect (d->target, d->op0, nd.perm, nelt, d->testing_p)) ++ return true; ++ ++ /* There are plenty of patterns in sse.md that are written for ++ SEL+CONCAT and are not replicated for a single op. Perhaps ++ that should be changed, to avoid the nastiness here. */ ++ ++ /* Recognize interleave style patterns, which means incrementing ++ every other permutation operand. */ ++ for (i = 0; i < nelt; i += 2) ++ { ++ nd.perm[i] = d->perm[i] & mask; ++ nd.perm[i + 1] = (d->perm[i + 1] & mask) + nelt; ++ } ++ if (expand_vselect_vconcat (d->target, d->op0, d->op0, nd.perm, nelt, ++ d->testing_p)) ++ return true; ++ ++ /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */ ++ if (nelt >= 4) ++ { ++ for (i = 0; i < nelt; i += 4) ++ { ++ nd.perm[i + 0] = d->perm[i + 0] & mask; ++ nd.perm[i + 1] = d->perm[i + 1] & mask; ++ nd.perm[i + 2] = (d->perm[i + 2] & mask) + nelt; ++ nd.perm[i + 3] = (d->perm[i + 3] & mask) + nelt; ++ } ++ ++ if (expand_vselect_vconcat (d->target, d->op0, d->op0, nd.perm, nelt, ++ d->testing_p)) ++ return true; ++ } ++ } ++ ++ /* Try movss/movsd instructions. */ ++ if (expand_vec_perm_movs (d)) ++ return true; ++ ++ /* Finally, try the fully general two operand permute. */ ++ if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt, ++ d->testing_p)) ++ return true; ++ ++ /* Recognize interleave style patterns with reversed operands. */ ++ if (!d->one_operand_p) ++ { ++ for (i = 0; i < nelt; ++i) ++ { ++ unsigned e = d->perm[i]; ++ if (e >= nelt) ++ e -= nelt; ++ else ++ e += nelt; ++ nd.perm[i] = e; ++ } ++ ++ if (expand_vselect_vconcat (d->target, d->op1, d->op0, nd.perm, nelt, ++ d->testing_p)) ++ return true; ++ } ++ ++ /* Try the SSE4.1 blend variable merge instructions. */ ++ if (expand_vec_perm_blend (d)) ++ return true; ++ ++ /* Try one of the AVX vpermil variable permutations. */ ++ if (expand_vec_perm_vpermil (d)) ++ return true; ++ ++ /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128, ++ vpshufb, vpermd, vpermps or vpermq variable permutation. */ ++ if (expand_vec_perm_pshufb (d)) ++ return true; ++ ++ /* Try the AVX2 vpalignr instruction. */ ++ if (expand_vec_perm_palignr (d, true)) ++ return true; ++ ++ /* Try the AVX512F vperm{s,d} instructions. */ ++ if (ix86_expand_vec_one_operand_perm_avx512 (d)) ++ return true; ++ ++ /* Try the AVX512F vpermt2/vpermi2 instructions. */ ++ if (ix86_expand_vec_perm_vpermt2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d)) ++ return true; ++ ++ /* See if we can get the same permutation in different vector integer ++ mode. */ ++ if (canonicalize_vector_int_perm (d, &nd) && expand_vec_perm_1 (&nd)) ++ { ++ if (!d->testing_p) ++ emit_move_insn (d->target, gen_lowpart (d->vmode, nd.target)); ++ return true; ++ } ++ return false; ++} ++ ++/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D ++ in terms of a pair of pshuflw + pshufhw instructions. */ ++ ++static bool ++expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d) ++{ ++ unsigned char perm2[MAX_VECT_LEN]; ++ unsigned i; ++ bool ok; ++ ++ if (d->vmode != V8HImode || !d->one_operand_p) ++ return false; ++ ++ /* The two permutations only operate in 64-bit lanes. */ ++ for (i = 0; i < 4; ++i) ++ if (d->perm[i] >= 4) ++ return false; ++ for (i = 4; i < 8; ++i) ++ if (d->perm[i] < 4) ++ return false; ++ ++ if (d->testing_p) ++ return true; ++ ++ /* Emit the pshuflw. */ ++ memcpy (perm2, d->perm, 4); ++ for (i = 4; i < 8; ++i) ++ perm2[i] = i; ++ ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p); ++ gcc_assert (ok); ++ ++ /* Emit the pshufhw. */ ++ memcpy (perm2 + 4, d->perm + 4, 4); ++ for (i = 0; i < 4; ++i) ++ perm2[i] = i; ++ ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p); ++ gcc_assert (ok); ++ ++ return true; ++} ++ ++/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify ++ the permutation using the SSSE3 palignr instruction. This succeeds ++ when all of the elements in PERM fit within one vector and we merely ++ need to shift them down so that a single vector permutation has a ++ chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only ++ the vpalignr instruction itself can perform the requested permutation. */ ++ ++static bool ++expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p) ++{ ++ unsigned i, nelt = d->nelt; ++ unsigned min, max, minswap, maxswap; ++ bool in_order, ok, swap = false; ++ rtx shift, target; ++ struct expand_vec_perm_d dcopy; ++ ++ /* Even with AVX, palignr only operates on 128-bit vectors, ++ in AVX2 palignr operates on both 128-bit lanes. */ ++ if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16) ++ && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32)) ++ return false; ++ ++ min = 2 * nelt; ++ max = 0; ++ minswap = 2 * nelt; ++ maxswap = 0; ++ for (i = 0; i < nelt; ++i) ++ { ++ unsigned e = d->perm[i]; ++ unsigned eswap = d->perm[i] ^ nelt; ++ if (GET_MODE_SIZE (d->vmode) == 32) ++ { ++ e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1); ++ eswap = e ^ (nelt / 2); ++ } ++ if (e < min) ++ min = e; ++ if (e > max) ++ max = e; ++ if (eswap < minswap) ++ minswap = eswap; ++ if (eswap > maxswap) ++ maxswap = eswap; ++ } ++ if (min == 0 ++ || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt)) ++ { ++ if (d->one_operand_p ++ || minswap == 0 ++ || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32 ++ ? nelt / 2 : nelt)) ++ return false; ++ swap = true; ++ min = minswap; ++ max = maxswap; ++ } ++ ++ /* Given that we have SSSE3, we know we'll be able to implement the ++ single operand permutation after the palignr with pshufb for ++ 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed ++ first. */ ++ if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p) ++ return true; ++ ++ dcopy = *d; ++ if (swap) ++ { ++ dcopy.op0 = d->op1; ++ dcopy.op1 = d->op0; ++ for (i = 0; i < nelt; ++i) ++ dcopy.perm[i] ^= nelt; ++ } ++ ++ in_order = true; ++ for (i = 0; i < nelt; ++i) ++ { ++ unsigned e = dcopy.perm[i]; ++ if (GET_MODE_SIZE (d->vmode) == 32 ++ && e >= nelt ++ && (e & (nelt / 2 - 1)) < min) ++ e = e - min - (nelt / 2); ++ else ++ e = e - min; ++ if (e != i) ++ in_order = false; ++ dcopy.perm[i] = e; ++ } ++ dcopy.one_operand_p = true; ++ ++ if (single_insn_only_p && !in_order) ++ return false; ++ ++ /* For AVX2, test whether we can permute the result in one instruction. */ ++ if (d->testing_p) ++ { ++ if (in_order) ++ return true; ++ dcopy.op1 = dcopy.op0; ++ return expand_vec_perm_1 (&dcopy); ++ } ++ ++ shift = GEN_INT (min * GET_MODE_UNIT_BITSIZE (d->vmode)); ++ if (GET_MODE_SIZE (d->vmode) == 16) ++ { ++ target = gen_reg_rtx (TImode); ++ emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1), ++ gen_lowpart (TImode, dcopy.op0), shift)); ++ } ++ else ++ { ++ target = gen_reg_rtx (V2TImode); ++ emit_insn (gen_avx2_palignrv2ti (target, ++ gen_lowpart (V2TImode, dcopy.op1), ++ gen_lowpart (V2TImode, dcopy.op0), ++ shift)); ++ } ++ ++ dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target); ++ ++ /* Test for the degenerate case where the alignment by itself ++ produces the desired permutation. */ ++ if (in_order) ++ { ++ emit_move_insn (d->target, dcopy.op0); ++ return true; ++ } ++ ++ ok = expand_vec_perm_1 (&dcopy); ++ gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32); ++ ++ return ok; ++} ++ ++/* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify ++ the permutation using the SSE4_1 pblendv instruction. Potentially ++ reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */ ++ ++static bool ++expand_vec_perm_pblendv (struct expand_vec_perm_d *d) ++{ ++ unsigned i, which, nelt = d->nelt; ++ struct expand_vec_perm_d dcopy, dcopy1; ++ machine_mode vmode = d->vmode; ++ bool ok; ++ ++ /* Use the same checks as in expand_vec_perm_blend. */ ++ if (d->one_operand_p) ++ return false; ++ if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32) ++ ; ++ else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode)) ++ ; ++ else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16) ++ ; ++ else ++ return false; ++ ++ /* Figure out where permutation elements stay not in their ++ respective lanes. */ ++ for (i = 0, which = 0; i < nelt; ++i) ++ { ++ unsigned e = d->perm[i]; ++ if (e != i) ++ which |= (e < nelt ? 1 : 2); ++ } ++ /* We can pblend the part where elements stay not in their ++ respective lanes only when these elements are all in one ++ half of a permutation. ++ {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective ++ lanes, but both 8 and 9 >= 8 ++ {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their ++ respective lanes and 8 >= 8, but 2 not. */ ++ if (which != 1 && which != 2) ++ return false; ++ if (d->testing_p && GET_MODE_SIZE (vmode) == 16) ++ return true; ++ ++ /* First we apply one operand permutation to the part where ++ elements stay not in their respective lanes. */ ++ dcopy = *d; ++ if (which == 2) ++ dcopy.op0 = dcopy.op1 = d->op1; ++ else ++ dcopy.op0 = dcopy.op1 = d->op0; ++ if (!d->testing_p) ++ dcopy.target = gen_reg_rtx (vmode); ++ dcopy.one_operand_p = true; ++ ++ for (i = 0; i < nelt; ++i) ++ dcopy.perm[i] = d->perm[i] & (nelt - 1); ++ ++ ok = expand_vec_perm_1 (&dcopy); ++ if (GET_MODE_SIZE (vmode) != 16 && !ok) ++ return false; ++ else ++ gcc_assert (ok); ++ if (d->testing_p) ++ return true; ++ ++ /* Next we put permuted elements into their positions. */ ++ dcopy1 = *d; ++ if (which == 2) ++ dcopy1.op1 = dcopy.target; ++ else ++ dcopy1.op0 = dcopy.target; ++ ++ for (i = 0; i < nelt; ++i) ++ dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i); ++ ++ ok = expand_vec_perm_blend (&dcopy1); ++ gcc_assert (ok); ++ ++ return true; ++} ++ ++static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d); ++ ++/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify ++ a two vector permutation into a single vector permutation by using ++ an interleave operation to merge the vectors. */ ++ ++static bool ++expand_vec_perm_interleave2 (struct expand_vec_perm_d *d) ++{ ++ struct expand_vec_perm_d dremap, dfinal; ++ unsigned i, nelt = d->nelt, nelt2 = nelt / 2; ++ unsigned HOST_WIDE_INT contents; ++ unsigned char remap[2 * MAX_VECT_LEN]; ++ rtx_insn *seq; ++ bool ok, same_halves = false; ++ ++ if (GET_MODE_SIZE (d->vmode) == 16) ++ { ++ if (d->one_operand_p) ++ return false; ++ } ++ else if (GET_MODE_SIZE (d->vmode) == 32) ++ { ++ if (!TARGET_AVX) ++ return false; ++ /* For 32-byte modes allow even d->one_operand_p. ++ The lack of cross-lane shuffling in some instructions ++ might prevent a single insn shuffle. */ ++ dfinal = *d; ++ dfinal.testing_p = true; ++ /* If expand_vec_perm_interleave3 can expand this into ++ a 3 insn sequence, give up and let it be expanded as ++ 3 insn sequence. While that is one insn longer, ++ it doesn't need a memory operand and in the common ++ case that both interleave low and high permutations ++ with the same operands are adjacent needs 4 insns ++ for both after CSE. */ ++ if (expand_vec_perm_interleave3 (&dfinal)) ++ return false; ++ } ++ else ++ return false; ++ ++ /* Examine from whence the elements come. */ ++ contents = 0; ++ for (i = 0; i < nelt; ++i) ++ contents |= HOST_WIDE_INT_1U << d->perm[i]; ++ ++ memset (remap, 0xff, sizeof (remap)); ++ dremap = *d; ++ ++ if (GET_MODE_SIZE (d->vmode) == 16) ++ { ++ unsigned HOST_WIDE_INT h1, h2, h3, h4; ++ ++ /* Split the two input vectors into 4 halves. */ ++ h1 = (HOST_WIDE_INT_1U << nelt2) - 1; ++ h2 = h1 << nelt2; ++ h3 = h2 << nelt2; ++ h4 = h3 << nelt2; ++ ++ /* If the elements from the low halves use interleave low, and similarly ++ for interleave high. If the elements are from mis-matched halves, we ++ can use shufps for V4SF/V4SI or do a DImode shuffle. */ ++ if ((contents & (h1 | h3)) == contents) ++ { ++ /* punpckl* */ ++ for (i = 0; i < nelt2; ++i) ++ { ++ remap[i] = i * 2; ++ remap[i + nelt] = i * 2 + 1; ++ dremap.perm[i * 2] = i; ++ dremap.perm[i * 2 + 1] = i + nelt; ++ } ++ if (!TARGET_SSE2 && d->vmode == V4SImode) ++ dremap.vmode = V4SFmode; ++ } ++ else if ((contents & (h2 | h4)) == contents) ++ { ++ /* punpckh* */ ++ for (i = 0; i < nelt2; ++i) ++ { ++ remap[i + nelt2] = i * 2; ++ remap[i + nelt + nelt2] = i * 2 + 1; ++ dremap.perm[i * 2] = i + nelt2; ++ dremap.perm[i * 2 + 1] = i + nelt + nelt2; ++ } ++ if (!TARGET_SSE2 && d->vmode == V4SImode) ++ dremap.vmode = V4SFmode; ++ } ++ else if ((contents & (h1 | h4)) == contents) ++ { ++ /* shufps */ ++ for (i = 0; i < nelt2; ++i) ++ { ++ remap[i] = i; ++ remap[i + nelt + nelt2] = i + nelt2; ++ dremap.perm[i] = i; ++ dremap.perm[i + nelt2] = i + nelt + nelt2; ++ } ++ if (nelt != 4) ++ { ++ /* shufpd */ ++ dremap.vmode = V2DImode; ++ dremap.nelt = 2; ++ dremap.perm[0] = 0; ++ dremap.perm[1] = 3; ++ } ++ } ++ else if ((contents & (h2 | h3)) == contents) ++ { ++ /* shufps */ ++ for (i = 0; i < nelt2; ++i) ++ { ++ remap[i + nelt2] = i; ++ remap[i + nelt] = i + nelt2; ++ dremap.perm[i] = i + nelt2; ++ dremap.perm[i + nelt2] = i + nelt; ++ } ++ if (nelt != 4) ++ { ++ /* shufpd */ ++ dremap.vmode = V2DImode; ++ dremap.nelt = 2; ++ dremap.perm[0] = 1; ++ dremap.perm[1] = 2; ++ } ++ } ++ else ++ return false; ++ } ++ else ++ { ++ unsigned int nelt4 = nelt / 4, nzcnt = 0; ++ unsigned HOST_WIDE_INT q[8]; ++ unsigned int nonzero_halves[4]; ++ ++ /* Split the two input vectors into 8 quarters. */ ++ q[0] = (HOST_WIDE_INT_1U << nelt4) - 1; ++ for (i = 1; i < 8; ++i) ++ q[i] = q[0] << (nelt4 * i); ++ for (i = 0; i < 4; ++i) ++ if (((q[2 * i] | q[2 * i + 1]) & contents) != 0) ++ { ++ nonzero_halves[nzcnt] = i; ++ ++nzcnt; ++ } ++ ++ if (nzcnt == 1) ++ { ++ gcc_assert (d->one_operand_p); ++ nonzero_halves[1] = nonzero_halves[0]; ++ same_halves = true; ++ } ++ else if (d->one_operand_p) ++ { ++ gcc_assert (nonzero_halves[0] == 0); ++ gcc_assert (nonzero_halves[1] == 1); ++ } ++ ++ if (nzcnt <= 2) ++ { ++ if (d->perm[0] / nelt2 == nonzero_halves[1]) ++ { ++ /* Attempt to increase the likelihood that dfinal ++ shuffle will be intra-lane. */ ++ std::swap (nonzero_halves[0], nonzero_halves[1]); ++ } ++ ++ /* vperm2f128 or vperm2i128. */ ++ for (i = 0; i < nelt2; ++i) ++ { ++ remap[i + nonzero_halves[1] * nelt2] = i + nelt2; ++ remap[i + nonzero_halves[0] * nelt2] = i; ++ dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2; ++ dremap.perm[i] = i + nonzero_halves[0] * nelt2; ++ } ++ ++ if (d->vmode != V8SFmode ++ && d->vmode != V4DFmode ++ && d->vmode != V8SImode) ++ { ++ dremap.vmode = V8SImode; ++ dremap.nelt = 8; ++ for (i = 0; i < 4; ++i) ++ { ++ dremap.perm[i] = i + nonzero_halves[0] * 4; ++ dremap.perm[i + 4] = i + nonzero_halves[1] * 4; ++ } ++ } ++ } ++ else if (d->one_operand_p) ++ return false; ++ else if (TARGET_AVX2 ++ && (contents & (q[0] | q[2] | q[4] | q[6])) == contents) ++ { ++ /* vpunpckl* */ ++ for (i = 0; i < nelt4; ++i) ++ { ++ remap[i] = i * 2; ++ remap[i + nelt] = i * 2 + 1; ++ remap[i + nelt2] = i * 2 + nelt2; ++ remap[i + nelt + nelt2] = i * 2 + nelt2 + 1; ++ dremap.perm[i * 2] = i; ++ dremap.perm[i * 2 + 1] = i + nelt; ++ dremap.perm[i * 2 + nelt2] = i + nelt2; ++ dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2; ++ } ++ } ++ else if (TARGET_AVX2 ++ && (contents & (q[1] | q[3] | q[5] | q[7])) == contents) ++ { ++ /* vpunpckh* */ ++ for (i = 0; i < nelt4; ++i) ++ { ++ remap[i + nelt4] = i * 2; ++ remap[i + nelt + nelt4] = i * 2 + 1; ++ remap[i + nelt2 + nelt4] = i * 2 + nelt2; ++ remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1; ++ dremap.perm[i * 2] = i + nelt4; ++ dremap.perm[i * 2 + 1] = i + nelt + nelt4; ++ dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4; ++ dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4; ++ } ++ } ++ else ++ return false; ++ } ++ ++ /* Use the remapping array set up above to move the elements from their ++ swizzled locations into their final destinations. */ ++ dfinal = *d; ++ for (i = 0; i < nelt; ++i) ++ { ++ unsigned e = remap[d->perm[i]]; ++ gcc_assert (e < nelt); ++ /* If same_halves is true, both halves of the remapped vector are the ++ same. Avoid cross-lane accesses if possible. */ ++ if (same_halves && i >= nelt2) ++ { ++ gcc_assert (e < nelt2); ++ dfinal.perm[i] = e + nelt2; ++ } ++ else ++ dfinal.perm[i] = e; ++ } ++ if (!d->testing_p) ++ { ++ dremap.target = gen_reg_rtx (dremap.vmode); ++ dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target); ++ } ++ dfinal.op1 = dfinal.op0; ++ dfinal.one_operand_p = true; ++ ++ /* Test if the final remap can be done with a single insn. For V4SFmode or ++ V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */ ++ start_sequence (); ++ ok = expand_vec_perm_1 (&dfinal); ++ seq = get_insns (); ++ end_sequence (); ++ ++ if (!ok) ++ return false; ++ ++ if (d->testing_p) ++ return true; ++ ++ if (dremap.vmode != dfinal.vmode) ++ { ++ dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0); ++ dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1); ++ } ++ ++ ok = expand_vec_perm_1 (&dremap); ++ gcc_assert (ok); ++ ++ emit_insn (seq); ++ return true; ++} ++ ++/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify ++ a single vector cross-lane permutation into vpermq followed ++ by any of the single insn permutations. */ ++ ++static bool ++expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d) ++{ ++ struct expand_vec_perm_d dremap, dfinal; ++ unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4; ++ unsigned contents[2]; ++ bool ok; ++ ++ if (!(TARGET_AVX2 ++ && (d->vmode == V32QImode || d->vmode == V16HImode) ++ && d->one_operand_p)) ++ return false; ++ ++ contents[0] = 0; ++ contents[1] = 0; ++ for (i = 0; i < nelt2; ++i) ++ { ++ contents[0] |= 1u << (d->perm[i] / nelt4); ++ contents[1] |= 1u << (d->perm[i + nelt2] / nelt4); ++ } ++ ++ for (i = 0; i < 2; ++i) ++ { ++ unsigned int cnt = 0; ++ for (j = 0; j < 4; ++j) ++ if ((contents[i] & (1u << j)) != 0 && ++cnt > 2) ++ return false; ++ } ++ ++ if (d->testing_p) ++ return true; ++ ++ dremap = *d; ++ dremap.vmode = V4DImode; ++ dremap.nelt = 4; ++ dremap.target = gen_reg_rtx (V4DImode); ++ dremap.op0 = gen_lowpart (V4DImode, d->op0); ++ dremap.op1 = dremap.op0; ++ dremap.one_operand_p = true; ++ for (i = 0; i < 2; ++i) ++ { ++ unsigned int cnt = 0; ++ for (j = 0; j < 4; ++j) ++ if ((contents[i] & (1u << j)) != 0) ++ dremap.perm[2 * i + cnt++] = j; ++ for (; cnt < 2; ++cnt) ++ dremap.perm[2 * i + cnt] = 0; ++ } ++ ++ dfinal = *d; ++ dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target); ++ dfinal.op1 = dfinal.op0; ++ dfinal.one_operand_p = true; ++ for (i = 0, j = 0; i < nelt; ++i) ++ { ++ if (i == nelt2) ++ j = 2; ++ dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0); ++ if ((d->perm[i] / nelt4) == dremap.perm[j]) ++ ; ++ else if ((d->perm[i] / nelt4) == dremap.perm[j + 1]) ++ dfinal.perm[i] |= nelt4; ++ else ++ gcc_unreachable (); ++ } ++ ++ ok = expand_vec_perm_1 (&dremap); ++ gcc_assert (ok); ++ ++ ok = expand_vec_perm_1 (&dfinal); ++ gcc_assert (ok); ++ ++ return true; ++} ++ ++static bool canonicalize_perm (struct expand_vec_perm_d *d); ++ ++/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand ++ a vector permutation using two instructions, vperm2f128 resp. ++ vperm2i128 followed by any single in-lane permutation. */ ++ ++static bool ++expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d) ++{ ++ struct expand_vec_perm_d dfirst, dsecond; ++ unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm; ++ bool ok; ++ ++ if (!TARGET_AVX ++ || GET_MODE_SIZE (d->vmode) != 32 ++ || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2)) ++ return false; ++ ++ dsecond = *d; ++ dsecond.one_operand_p = false; ++ dsecond.testing_p = true; ++ ++ /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128 ++ immediate. For perm < 16 the second permutation uses ++ d->op0 as first operand, for perm >= 16 it uses d->op1 ++ as first operand. The second operand is the result of ++ vperm2[fi]128. */ ++ for (perm = 0; perm < 32; perm++) ++ { ++ /* Ignore permutations which do not move anything cross-lane. */ ++ if (perm < 16) ++ { ++ /* The second shuffle for e.g. V4DFmode has ++ 0123 and ABCD operands. ++ Ignore AB23, as 23 is already in the second lane ++ of the first operand. */ ++ if ((perm & 0xc) == (1 << 2)) continue; ++ /* And 01CD, as 01 is in the first lane of the first ++ operand. */ ++ if ((perm & 3) == 0) continue; ++ /* And 4567, as then the vperm2[fi]128 doesn't change ++ anything on the original 4567 second operand. */ ++ if ((perm & 0xf) == ((3 << 2) | 2)) continue; ++ } ++ else ++ { ++ /* The second shuffle for e.g. V4DFmode has ++ 4567 and ABCD operands. ++ Ignore AB67, as 67 is already in the second lane ++ of the first operand. */ ++ if ((perm & 0xc) == (3 << 2)) continue; ++ /* And 45CD, as 45 is in the first lane of the first ++ operand. */ ++ if ((perm & 3) == 2) continue; ++ /* And 0123, as then the vperm2[fi]128 doesn't change ++ anything on the original 0123 first operand. */ ++ if ((perm & 0xf) == (1 << 2)) continue; ++ } ++ ++ for (i = 0; i < nelt; i++) ++ { ++ j = d->perm[i] / nelt2; ++ if (j == ((perm >> (2 * (i >= nelt2))) & 3)) ++ dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1)); ++ else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16)) ++ dsecond.perm[i] = d->perm[i] & (nelt - 1); ++ else ++ break; ++ } ++ ++ if (i == nelt) ++ { ++ start_sequence (); ++ ok = expand_vec_perm_1 (&dsecond); ++ end_sequence (); ++ } ++ else ++ ok = false; ++ ++ if (ok) ++ { ++ if (d->testing_p) ++ return true; ++ ++ /* Found a usable second shuffle. dfirst will be ++ vperm2f128 on d->op0 and d->op1. */ ++ dsecond.testing_p = false; ++ dfirst = *d; ++ dfirst.target = gen_reg_rtx (d->vmode); ++ for (i = 0; i < nelt; i++) ++ dfirst.perm[i] = (i & (nelt2 - 1)) ++ + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2; ++ ++ canonicalize_perm (&dfirst); ++ ok = expand_vec_perm_1 (&dfirst); ++ gcc_assert (ok); ++ ++ /* And dsecond is some single insn shuffle, taking ++ d->op0 and result of vperm2f128 (if perm < 16) or ++ d->op1 and result of vperm2f128 (otherwise). */ ++ if (perm >= 16) ++ dsecond.op0 = dsecond.op1; ++ dsecond.op1 = dfirst.target; ++ ++ ok = expand_vec_perm_1 (&dsecond); ++ gcc_assert (ok); ++ ++ return true; ++ } ++ ++ /* For one operand, the only useful vperm2f128 permutation is 0x01 ++ aka lanes swap. */ ++ if (d->one_operand_p) ++ return false; ++ } ++ ++ return false; ++} ++ ++/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify ++ a two vector permutation using 2 intra-lane interleave insns ++ and cross-lane shuffle for 32-byte vectors. */ ++ ++static bool ++expand_vec_perm_interleave3 (struct expand_vec_perm_d *d) ++{ ++ unsigned i, nelt; ++ rtx (*gen) (rtx, rtx, rtx); ++ ++ if (d->one_operand_p) ++ return false; ++ if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32) ++ ; ++ else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode)) ++ ; ++ else ++ return false; ++ ++ nelt = d->nelt; ++ if (d->perm[0] != 0 && d->perm[0] != nelt / 2) ++ return false; ++ for (i = 0; i < nelt; i += 2) ++ if (d->perm[i] != d->perm[0] + i / 2 ++ || d->perm[i + 1] != d->perm[0] + i / 2 + nelt) ++ return false; ++ ++ if (d->testing_p) ++ return true; ++ ++ switch (d->vmode) ++ { ++ case E_V32QImode: ++ if (d->perm[0]) ++ gen = gen_vec_interleave_highv32qi; ++ else ++ gen = gen_vec_interleave_lowv32qi; ++ break; ++ case E_V16HImode: ++ if (d->perm[0]) ++ gen = gen_vec_interleave_highv16hi; ++ else ++ gen = gen_vec_interleave_lowv16hi; ++ break; ++ case E_V8SImode: ++ if (d->perm[0]) ++ gen = gen_vec_interleave_highv8si; ++ else ++ gen = gen_vec_interleave_lowv8si; ++ break; ++ case E_V4DImode: ++ if (d->perm[0]) ++ gen = gen_vec_interleave_highv4di; ++ else ++ gen = gen_vec_interleave_lowv4di; ++ break; ++ case E_V8SFmode: ++ if (d->perm[0]) ++ gen = gen_vec_interleave_highv8sf; ++ else ++ gen = gen_vec_interleave_lowv8sf; ++ break; ++ case E_V4DFmode: ++ if (d->perm[0]) ++ gen = gen_vec_interleave_highv4df; ++ else ++ gen = gen_vec_interleave_lowv4df; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ ++ emit_insn (gen (d->target, d->op0, d->op1)); ++ return true; ++} ++ ++/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement ++ a single vector permutation using a single intra-lane vector ++ permutation, vperm2f128 swapping the lanes and vblend* insn blending ++ the non-swapped and swapped vectors together. */ ++ ++static bool ++expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d) ++{ ++ struct expand_vec_perm_d dfirst, dsecond; ++ unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2; ++ rtx_insn *seq; ++ bool ok; ++ rtx (*blend) (rtx, rtx, rtx, rtx) = NULL; ++ ++ if (!TARGET_AVX ++ || TARGET_AVX2 ++ || (d->vmode != V8SFmode && d->vmode != V4DFmode) ++ || !d->one_operand_p) ++ return false; ++ ++ dfirst = *d; ++ for (i = 0; i < nelt; i++) ++ dfirst.perm[i] = 0xff; ++ for (i = 0, msk = 0; i < nelt; i++) ++ { ++ j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2; ++ if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i]) ++ return false; ++ dfirst.perm[j] = d->perm[i]; ++ if (j != i) ++ msk |= (1 << i); ++ } ++ for (i = 0; i < nelt; i++) ++ if (dfirst.perm[i] == 0xff) ++ dfirst.perm[i] = i; ++ ++ if (!d->testing_p) ++ dfirst.target = gen_reg_rtx (dfirst.vmode); ++ ++ start_sequence (); ++ ok = expand_vec_perm_1 (&dfirst); ++ seq = get_insns (); ++ end_sequence (); ++ ++ if (!ok) ++ return false; ++ ++ if (d->testing_p) ++ return true; ++ ++ emit_insn (seq); ++ ++ dsecond = *d; ++ dsecond.op0 = dfirst.target; ++ dsecond.op1 = dfirst.target; ++ dsecond.one_operand_p = true; ++ dsecond.target = gen_reg_rtx (dsecond.vmode); ++ for (i = 0; i < nelt; i++) ++ dsecond.perm[i] = i ^ nelt2; ++ ++ ok = expand_vec_perm_1 (&dsecond); ++ gcc_assert (ok); ++ ++ blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256; ++ emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk))); ++ return true; ++} ++ ++/* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF ++ permutation using two vperm2f128, followed by a vshufpd insn blending ++ the two vectors together. */ ++ ++static bool ++expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d) ++{ ++ struct expand_vec_perm_d dfirst, dsecond, dthird; ++ bool ok; ++ ++ if (!TARGET_AVX || (d->vmode != V4DFmode)) ++ return false; ++ ++ if (d->testing_p) ++ return true; ++ ++ dfirst = *d; ++ dsecond = *d; ++ dthird = *d; ++ ++ dfirst.perm[0] = (d->perm[0] & ~1); ++ dfirst.perm[1] = (d->perm[0] & ~1) + 1; ++ dfirst.perm[2] = (d->perm[2] & ~1); ++ dfirst.perm[3] = (d->perm[2] & ~1) + 1; ++ dsecond.perm[0] = (d->perm[1] & ~1); ++ dsecond.perm[1] = (d->perm[1] & ~1) + 1; ++ dsecond.perm[2] = (d->perm[3] & ~1); ++ dsecond.perm[3] = (d->perm[3] & ~1) + 1; ++ dthird.perm[0] = (d->perm[0] % 2); ++ dthird.perm[1] = (d->perm[1] % 2) + 4; ++ dthird.perm[2] = (d->perm[2] % 2) + 2; ++ dthird.perm[3] = (d->perm[3] % 2) + 6; ++ ++ dfirst.target = gen_reg_rtx (dfirst.vmode); ++ dsecond.target = gen_reg_rtx (dsecond.vmode); ++ dthird.op0 = dfirst.target; ++ dthird.op1 = dsecond.target; ++ dthird.one_operand_p = false; ++ ++ canonicalize_perm (&dfirst); ++ canonicalize_perm (&dsecond); ++ ++ ok = expand_vec_perm_1 (&dfirst) ++ && expand_vec_perm_1 (&dsecond) ++ && expand_vec_perm_1 (&dthird); ++ ++ gcc_assert (ok); ++ ++ return true; ++} ++ ++/* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word ++ permutation with two pshufb insns and an ior. We should have already ++ failed all two instruction sequences. */ ++ ++static bool ++expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d) ++{ ++ rtx rperm[2][16], vperm, l, h, op, m128; ++ unsigned int i, nelt, eltsz; ++ ++ if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16) ++ return false; ++ gcc_assert (!d->one_operand_p); ++ ++ if (d->testing_p) ++ return true; ++ ++ nelt = d->nelt; ++ eltsz = GET_MODE_UNIT_SIZE (d->vmode); ++ ++ /* Generate two permutation masks. If the required element is within ++ the given vector it is shuffled into the proper lane. If the required ++ element is in the other vector, force a zero into the lane by setting ++ bit 7 in the permutation mask. */ ++ m128 = GEN_INT (-128); ++ for (i = 0; i < nelt; ++i) ++ { ++ unsigned j, e = d->perm[i]; ++ unsigned which = (e >= nelt); ++ if (e >= nelt) ++ e -= nelt; ++ ++ for (j = 0; j < eltsz; ++j) ++ { ++ rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j); ++ rperm[1-which][i*eltsz + j] = m128; ++ } ++ } ++ ++ vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0])); ++ vperm = force_reg (V16QImode, vperm); ++ ++ l = gen_reg_rtx (V16QImode); ++ op = gen_lowpart (V16QImode, d->op0); ++ emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm)); ++ ++ vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1])); ++ vperm = force_reg (V16QImode, vperm); ++ ++ h = gen_reg_rtx (V16QImode); ++ op = gen_lowpart (V16QImode, d->op1); ++ emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm)); ++ ++ op = d->target; ++ if (d->vmode != V16QImode) ++ op = gen_reg_rtx (V16QImode); ++ emit_insn (gen_iorv16qi3 (op, l, h)); ++ if (op != d->target) ++ emit_move_insn (d->target, gen_lowpart (d->vmode, op)); ++ ++ return true; ++} ++ ++/* Implement arbitrary permutation of one V32QImode and V16QImode operand ++ with two vpshufb insns, vpermq and vpor. We should have already failed ++ all two or three instruction sequences. */ ++ ++static bool ++expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d) ++{ ++ rtx rperm[2][32], vperm, l, h, hp, op, m128; ++ unsigned int i, nelt, eltsz; ++ ++ if (!TARGET_AVX2 ++ || !d->one_operand_p ++ || (d->vmode != V32QImode && d->vmode != V16HImode)) ++ return false; ++ ++ if (d->testing_p) ++ return true; ++ ++ nelt = d->nelt; ++ eltsz = GET_MODE_UNIT_SIZE (d->vmode); ++ ++ /* Generate two permutation masks. If the required element is within ++ the same lane, it is shuffled in. If the required element from the ++ other lane, force a zero by setting bit 7 in the permutation mask. ++ In the other mask the mask has non-negative elements if element ++ is requested from the other lane, but also moved to the other lane, ++ so that the result of vpshufb can have the two V2TImode halves ++ swapped. */ ++ m128 = GEN_INT (-128); ++ for (i = 0; i < nelt; ++i) ++ { ++ unsigned j, e = d->perm[i] & (nelt / 2 - 1); ++ unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz; ++ ++ for (j = 0; j < eltsz; ++j) ++ { ++ rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j); ++ rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128; ++ } ++ } ++ ++ vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1])); ++ vperm = force_reg (V32QImode, vperm); ++ ++ h = gen_reg_rtx (V32QImode); ++ op = gen_lowpart (V32QImode, d->op0); ++ emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm)); ++ ++ /* Swap the 128-byte lanes of h into hp. */ ++ hp = gen_reg_rtx (V4DImode); ++ op = gen_lowpart (V4DImode, h); ++ emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx, ++ const1_rtx)); ++ ++ vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0])); ++ vperm = force_reg (V32QImode, vperm); ++ ++ l = gen_reg_rtx (V32QImode); ++ op = gen_lowpart (V32QImode, d->op0); ++ emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm)); ++ ++ op = d->target; ++ if (d->vmode != V32QImode) ++ op = gen_reg_rtx (V32QImode); ++ emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp))); ++ if (op != d->target) ++ emit_move_insn (d->target, gen_lowpart (d->vmode, op)); ++ ++ return true; ++} ++ ++/* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even ++ and extract-odd permutations of two V32QImode and V16QImode operand ++ with two vpshufb insns, vpor and vpermq. We should have already ++ failed all two or three instruction sequences. */ ++ ++static bool ++expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d) ++{ ++ rtx rperm[2][32], vperm, l, h, ior, op, m128; ++ unsigned int i, nelt, eltsz; ++ ++ if (!TARGET_AVX2 ++ || d->one_operand_p ++ || (d->vmode != V32QImode && d->vmode != V16HImode)) ++ return false; ++ ++ for (i = 0; i < d->nelt; ++i) ++ if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2)) ++ return false; ++ ++ if (d->testing_p) ++ return true; ++ ++ nelt = d->nelt; ++ eltsz = GET_MODE_UNIT_SIZE (d->vmode); ++ ++ /* Generate two permutation masks. In the first permutation mask ++ the first quarter will contain indexes for the first half ++ of the op0, the second quarter will contain bit 7 set, third quarter ++ will contain indexes for the second half of the op0 and the ++ last quarter bit 7 set. In the second permutation mask ++ the first quarter will contain bit 7 set, the second quarter ++ indexes for the first half of the op1, the third quarter bit 7 set ++ and last quarter indexes for the second half of the op1. ++ I.e. the first mask e.g. for V32QImode extract even will be: ++ 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128 ++ (all values masked with 0xf except for -128) and second mask ++ for extract even will be ++ -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */ ++ m128 = GEN_INT (-128); ++ for (i = 0; i < nelt; ++i) ++ { ++ unsigned j, e = d->perm[i] & (nelt / 2 - 1); ++ unsigned which = d->perm[i] >= nelt; ++ unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0; ++ ++ for (j = 0; j < eltsz; ++j) ++ { ++ rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j); ++ rperm[1 - which][(i * eltsz + j) ^ xorv] = m128; ++ } ++ } ++ ++ vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0])); ++ vperm = force_reg (V32QImode, vperm); ++ ++ l = gen_reg_rtx (V32QImode); ++ op = gen_lowpart (V32QImode, d->op0); ++ emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm)); ++ ++ vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1])); ++ vperm = force_reg (V32QImode, vperm); ++ ++ h = gen_reg_rtx (V32QImode); ++ op = gen_lowpart (V32QImode, d->op1); ++ emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm)); ++ ++ ior = gen_reg_rtx (V32QImode); ++ emit_insn (gen_iorv32qi3 (ior, l, h)); ++ ++ /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */ ++ op = gen_reg_rtx (V4DImode); ++ ior = gen_lowpart (V4DImode, ior); ++ emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx, ++ const1_rtx, GEN_INT (3))); ++ emit_move_insn (d->target, gen_lowpart (d->vmode, op)); ++ ++ return true; ++} ++ ++/* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even ++ and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands ++ with two "and" and "pack" or two "shift" and "pack" insns. We should ++ have already failed all two instruction sequences. */ ++ ++static bool ++expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d) ++{ ++ rtx op, dop0, dop1, t; ++ unsigned i, odd, c, s, nelt = d->nelt; ++ bool end_perm = false; ++ machine_mode half_mode; ++ rtx (*gen_and) (rtx, rtx, rtx); ++ rtx (*gen_pack) (rtx, rtx, rtx); ++ rtx (*gen_shift) (rtx, rtx, rtx); ++ ++ if (d->one_operand_p) ++ return false; ++ ++ switch (d->vmode) ++ { ++ case E_V8HImode: ++ /* Required for "pack". */ ++ if (!TARGET_SSE4_1) ++ return false; ++ c = 0xffff; ++ s = 16; ++ half_mode = V4SImode; ++ gen_and = gen_andv4si3; ++ gen_pack = gen_sse4_1_packusdw; ++ gen_shift = gen_lshrv4si3; ++ break; ++ case E_V16QImode: ++ /* No check as all instructions are SSE2. */ ++ c = 0xff; ++ s = 8; ++ half_mode = V8HImode; ++ gen_and = gen_andv8hi3; ++ gen_pack = gen_sse2_packuswb; ++ gen_shift = gen_lshrv8hi3; ++ break; ++ case E_V16HImode: ++ if (!TARGET_AVX2) ++ return false; ++ c = 0xffff; ++ s = 16; ++ half_mode = V8SImode; ++ gen_and = gen_andv8si3; ++ gen_pack = gen_avx2_packusdw; ++ gen_shift = gen_lshrv8si3; ++ end_perm = true; ++ break; ++ case E_V32QImode: ++ if (!TARGET_AVX2) ++ return false; ++ c = 0xff; ++ s = 8; ++ half_mode = V16HImode; ++ gen_and = gen_andv16hi3; ++ gen_pack = gen_avx2_packuswb; ++ gen_shift = gen_lshrv16hi3; ++ end_perm = true; ++ break; ++ default: ++ /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than ++ general shuffles. */ ++ return false; ++ } ++ ++ /* Check that permutation is even or odd. */ ++ odd = d->perm[0]; ++ if (odd > 1) ++ return false; ++ ++ for (i = 1; i < nelt; ++i) ++ if (d->perm[i] != 2 * i + odd) ++ return false; ++ ++ if (d->testing_p) ++ return true; ++ ++ dop0 = gen_reg_rtx (half_mode); ++ dop1 = gen_reg_rtx (half_mode); ++ if (odd == 0) ++ { ++ t = gen_const_vec_duplicate (half_mode, GEN_INT (c)); ++ t = force_reg (half_mode, t); ++ emit_insn (gen_and (dop0, t, gen_lowpart (half_mode, d->op0))); ++ emit_insn (gen_and (dop1, t, gen_lowpart (half_mode, d->op1))); ++ } ++ else ++ { ++ emit_insn (gen_shift (dop0, ++ gen_lowpart (half_mode, d->op0), ++ GEN_INT (s))); ++ emit_insn (gen_shift (dop1, ++ gen_lowpart (half_mode, d->op1), ++ GEN_INT (s))); ++ } ++ /* In AVX2 for 256 bit case we need to permute pack result. */ ++ if (TARGET_AVX2 && end_perm) ++ { ++ op = gen_reg_rtx (d->vmode); ++ t = gen_reg_rtx (V4DImode); ++ emit_insn (gen_pack (op, dop0, dop1)); ++ emit_insn (gen_avx2_permv4di_1 (t, ++ gen_lowpart (V4DImode, op), ++ const0_rtx, ++ const2_rtx, ++ const1_rtx, ++ GEN_INT (3))); ++ emit_move_insn (d->target, gen_lowpart (d->vmode, t)); ++ } ++ else ++ emit_insn (gen_pack (d->target, dop0, dop1)); ++ ++ return true; ++} ++ ++/* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even ++ and extract-odd permutations of two V64QI operands ++ with two "shifts", two "truncs" and one "concat" insns for "odd" ++ and two "truncs" and one concat insn for "even." ++ Have already failed all two instruction sequences. */ ++ ++static bool ++expand_vec_perm_even_odd_trunc (struct expand_vec_perm_d *d) ++{ ++ rtx t1, t2, t3, t4; ++ unsigned i, odd, nelt = d->nelt; ++ ++ if (!TARGET_AVX512BW ++ || d->one_operand_p ++ || d->vmode != V64QImode) ++ return false; ++ ++ /* Check that permutation is even or odd. */ ++ odd = d->perm[0]; ++ if (odd > 1) ++ return false; ++ ++ for (i = 1; i < nelt; ++i) ++ if (d->perm[i] != 2 * i + odd) ++ return false; ++ ++ if (d->testing_p) ++ return true; ++ ++ ++ if (odd) ++ { ++ t1 = gen_reg_rtx (V32HImode); ++ t2 = gen_reg_rtx (V32HImode); ++ emit_insn (gen_lshrv32hi3 (t1, ++ gen_lowpart (V32HImode, d->op0), ++ GEN_INT (8))); ++ emit_insn (gen_lshrv32hi3 (t2, ++ gen_lowpart (V32HImode, d->op1), ++ GEN_INT (8))); ++ } ++ else ++ { ++ t1 = gen_lowpart (V32HImode, d->op0); ++ t2 = gen_lowpart (V32HImode, d->op1); ++ } ++ ++ t3 = gen_reg_rtx (V32QImode); ++ t4 = gen_reg_rtx (V32QImode); ++ emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t3, t1)); ++ emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t4, t2)); ++ emit_insn (gen_avx_vec_concatv64qi (d->target, t3, t4)); ++ ++ return true; ++} ++ ++/* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even ++ and extract-odd permutations. */ ++ ++static bool ++expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd) ++{ ++ rtx t1, t2, t3, t4, t5; ++ ++ switch (d->vmode) ++ { ++ case E_V4DFmode: ++ if (d->testing_p) ++ break; ++ t1 = gen_reg_rtx (V4DFmode); ++ t2 = gen_reg_rtx (V4DFmode); ++ ++ /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */ ++ emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20))); ++ emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31))); ++ ++ /* Now an unpck[lh]pd will produce the result required. */ ++ if (odd) ++ t3 = gen_avx_unpckhpd256 (d->target, t1, t2); ++ else ++ t3 = gen_avx_unpcklpd256 (d->target, t1, t2); ++ emit_insn (t3); ++ break; ++ ++ case E_V8SFmode: ++ { ++ int mask = odd ? 0xdd : 0x88; ++ ++ if (d->testing_p) ++ break; ++ t1 = gen_reg_rtx (V8SFmode); ++ t2 = gen_reg_rtx (V8SFmode); ++ t3 = gen_reg_rtx (V8SFmode); ++ ++ /* Shuffle within the 128-bit lanes to produce: ++ { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */ ++ emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1, ++ GEN_INT (mask))); ++ ++ /* Shuffle the lanes around to produce: ++ { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */ ++ emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1, ++ GEN_INT (0x3))); ++ ++ /* Shuffle within the 128-bit lanes to produce: ++ { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */ ++ emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44))); ++ ++ /* Shuffle within the 128-bit lanes to produce: ++ { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */ ++ emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee))); ++ ++ /* Shuffle the lanes around to produce: ++ { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */ ++ emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2, ++ GEN_INT (0x20))); ++ } ++ break; ++ ++ case E_V2DFmode: ++ case E_V4SFmode: ++ case E_V2DImode: ++ case E_V4SImode: ++ /* These are always directly implementable by expand_vec_perm_1. */ ++ gcc_unreachable (); ++ ++ case E_V8HImode: ++ if (TARGET_SSE4_1) ++ return expand_vec_perm_even_odd_pack (d); ++ else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB) ++ return expand_vec_perm_pshufb2 (d); ++ else ++ { ++ if (d->testing_p) ++ break; ++ /* We need 2*log2(N)-1 operations to achieve odd/even ++ with interleave. */ ++ t1 = gen_reg_rtx (V8HImode); ++ t2 = gen_reg_rtx (V8HImode); ++ emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1)); ++ emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1)); ++ emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1)); ++ emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1)); ++ if (odd) ++ t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2); ++ else ++ t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2); ++ emit_insn (t3); ++ } ++ break; ++ ++ case E_V16QImode: ++ return expand_vec_perm_even_odd_pack (d); ++ ++ case E_V16HImode: ++ case E_V32QImode: ++ return expand_vec_perm_even_odd_pack (d); ++ ++ case E_V64QImode: ++ return expand_vec_perm_even_odd_trunc (d); ++ ++ case E_V4DImode: ++ if (!TARGET_AVX2) ++ { ++ struct expand_vec_perm_d d_copy = *d; ++ d_copy.vmode = V4DFmode; ++ if (d->testing_p) ++ d_copy.target = gen_raw_REG (V4DFmode, LAST_VIRTUAL_REGISTER + 1); ++ else ++ d_copy.target = gen_reg_rtx (V4DFmode); ++ d_copy.op0 = gen_lowpart (V4DFmode, d->op0); ++ d_copy.op1 = gen_lowpart (V4DFmode, d->op1); ++ if (expand_vec_perm_even_odd_1 (&d_copy, odd)) ++ { ++ if (!d->testing_p) ++ emit_move_insn (d->target, ++ gen_lowpart (V4DImode, d_copy.target)); ++ return true; ++ } ++ return false; ++ } ++ ++ if (d->testing_p) ++ break; ++ ++ t1 = gen_reg_rtx (V4DImode); ++ t2 = gen_reg_rtx (V4DImode); ++ ++ /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */ ++ emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20))); ++ emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31))); ++ ++ /* Now an vpunpck[lh]qdq will produce the result required. */ ++ if (odd) ++ t3 = gen_avx2_interleave_highv4di (d->target, t1, t2); ++ else ++ t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2); ++ emit_insn (t3); ++ break; ++ ++ case E_V8SImode: ++ if (!TARGET_AVX2) ++ { ++ struct expand_vec_perm_d d_copy = *d; ++ d_copy.vmode = V8SFmode; ++ if (d->testing_p) ++ d_copy.target = gen_raw_REG (V8SFmode, LAST_VIRTUAL_REGISTER + 1); ++ else ++ d_copy.target = gen_reg_rtx (V8SFmode); ++ d_copy.op0 = gen_lowpart (V8SFmode, d->op0); ++ d_copy.op1 = gen_lowpart (V8SFmode, d->op1); ++ if (expand_vec_perm_even_odd_1 (&d_copy, odd)) ++ { ++ if (!d->testing_p) ++ emit_move_insn (d->target, ++ gen_lowpart (V8SImode, d_copy.target)); ++ return true; ++ } ++ return false; ++ } ++ ++ if (d->testing_p) ++ break; ++ ++ t1 = gen_reg_rtx (V8SImode); ++ t2 = gen_reg_rtx (V8SImode); ++ t3 = gen_reg_rtx (V4DImode); ++ t4 = gen_reg_rtx (V4DImode); ++ t5 = gen_reg_rtx (V4DImode); ++ ++ /* Shuffle the lanes around into ++ { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */ ++ emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0), ++ gen_lowpart (V4DImode, d->op1), ++ GEN_INT (0x20))); ++ emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0), ++ gen_lowpart (V4DImode, d->op1), ++ GEN_INT (0x31))); ++ ++ /* Swap the 2nd and 3rd position in each lane into ++ { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */ ++ emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3), ++ GEN_INT (2 * 4 + 1 * 16 + 3 * 64))); ++ emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4), ++ GEN_INT (2 * 4 + 1 * 16 + 3 * 64))); ++ ++ /* Now an vpunpck[lh]qdq will produce ++ { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */ ++ if (odd) ++ t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1), ++ gen_lowpart (V4DImode, t2)); ++ else ++ t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1), ++ gen_lowpart (V4DImode, t2)); ++ emit_insn (t3); ++ emit_move_insn (d->target, gen_lowpart (V8SImode, t5)); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ return true; ++} ++ ++/* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match ++ extract-even and extract-odd permutations. */ ++ ++static bool ++expand_vec_perm_even_odd (struct expand_vec_perm_d *d) ++{ ++ unsigned i, odd, nelt = d->nelt; ++ ++ odd = d->perm[0]; ++ if (odd != 0 && odd != 1) ++ return false; ++ ++ for (i = 1; i < nelt; ++i) ++ if (d->perm[i] != 2 * i + odd) ++ return false; ++ ++ return expand_vec_perm_even_odd_1 (d, odd); ++} ++ ++/* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast ++ permutations. We assume that expand_vec_perm_1 has already failed. */ ++ ++static bool ++expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d) ++{ ++ unsigned elt = d->perm[0], nelt2 = d->nelt / 2; ++ machine_mode vmode = d->vmode; ++ unsigned char perm2[4]; ++ rtx op0 = d->op0, dest; ++ bool ok; ++ ++ switch (vmode) ++ { ++ case E_V4DFmode: ++ case E_V8SFmode: ++ /* These are special-cased in sse.md so that we can optionally ++ use the vbroadcast instruction. They expand to two insns ++ if the input happens to be in a register. */ ++ gcc_unreachable (); ++ ++ case E_V2DFmode: ++ case E_V2DImode: ++ case E_V4SFmode: ++ case E_V4SImode: ++ /* These are always implementable using standard shuffle patterns. */ ++ gcc_unreachable (); ++ ++ case E_V8HImode: ++ case E_V16QImode: ++ /* These can be implemented via interleave. We save one insn by ++ stopping once we have promoted to V4SImode and then use pshufd. */ ++ if (d->testing_p) ++ return true; ++ do ++ { ++ rtx dest; ++ rtx (*gen) (rtx, rtx, rtx) ++ = vmode == V16QImode ? gen_vec_interleave_lowv16qi ++ : gen_vec_interleave_lowv8hi; ++ ++ if (elt >= nelt2) ++ { ++ gen = vmode == V16QImode ? gen_vec_interleave_highv16qi ++ : gen_vec_interleave_highv8hi; ++ elt -= nelt2; ++ } ++ nelt2 /= 2; ++ ++ dest = gen_reg_rtx (vmode); ++ emit_insn (gen (dest, op0, op0)); ++ vmode = get_mode_wider_vector (vmode); ++ op0 = gen_lowpart (vmode, dest); ++ } ++ while (vmode != V4SImode); ++ ++ memset (perm2, elt, 4); ++ dest = gen_reg_rtx (V4SImode); ++ ok = expand_vselect (dest, op0, perm2, 4, d->testing_p); ++ gcc_assert (ok); ++ if (!d->testing_p) ++ emit_move_insn (d->target, gen_lowpart (d->vmode, dest)); ++ return true; ++ ++ case E_V64QImode: ++ case E_V32QImode: ++ case E_V16HImode: ++ case E_V8SImode: ++ case E_V4DImode: ++ /* For AVX2 broadcasts of the first element vpbroadcast* or ++ vpermq should be used by expand_vec_perm_1. */ ++ gcc_assert (!TARGET_AVX2 || d->perm[0]); ++ return false; ++ ++ default: ++ gcc_unreachable (); ++ } ++} ++ ++/* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match ++ broadcast permutations. */ ++ ++static bool ++expand_vec_perm_broadcast (struct expand_vec_perm_d *d) ++{ ++ unsigned i, elt, nelt = d->nelt; ++ ++ if (!d->one_operand_p) ++ return false; ++ ++ elt = d->perm[0]; ++ for (i = 1; i < nelt; ++i) ++ if (d->perm[i] != elt) ++ return false; ++ ++ return expand_vec_perm_broadcast_1 (d); ++} ++ ++/* Implement arbitrary permutations of two V64QImode operands ++ with 2 vperm[it]2w, 2 vpshufb and one vpor instruction. */ ++static bool ++expand_vec_perm_vpermt2_vpshub2 (struct expand_vec_perm_d *d) ++{ ++ if (!TARGET_AVX512BW || !(d->vmode == V64QImode)) ++ return false; ++ ++ if (d->testing_p) ++ return true; ++ ++ struct expand_vec_perm_d ds[2]; ++ rtx rperm[128], vperm, target0, target1; ++ unsigned int i, nelt; ++ machine_mode vmode; ++ ++ nelt = d->nelt; ++ vmode = V64QImode; ++ ++ for (i = 0; i < 2; i++) ++ { ++ ds[i] = *d; ++ ds[i].vmode = V32HImode; ++ ds[i].nelt = 32; ++ ds[i].target = gen_reg_rtx (V32HImode); ++ ds[i].op0 = gen_lowpart (V32HImode, d->op0); ++ ds[i].op1 = gen_lowpart (V32HImode, d->op1); ++ } ++ ++ /* Prepare permutations such that the first one takes care of ++ putting the even bytes into the right positions or one higher ++ positions (ds[0]) and the second one takes care of ++ putting the odd bytes into the right positions or one below ++ (ds[1]). */ ++ ++ for (i = 0; i < nelt; i++) ++ { ++ ds[i & 1].perm[i / 2] = d->perm[i] / 2; ++ if (i & 1) ++ { ++ rperm[i] = constm1_rtx; ++ rperm[i + 64] = GEN_INT ((i & 14) + (d->perm[i] & 1)); ++ } ++ else ++ { ++ rperm[i] = GEN_INT ((i & 14) + (d->perm[i] & 1)); ++ rperm[i + 64] = constm1_rtx; ++ } ++ } ++ ++ bool ok = expand_vec_perm_1 (&ds[0]); ++ gcc_assert (ok); ++ ds[0].target = gen_lowpart (V64QImode, ds[0].target); ++ ++ ok = expand_vec_perm_1 (&ds[1]); ++ gcc_assert (ok); ++ ds[1].target = gen_lowpart (V64QImode, ds[1].target); ++ ++ vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm)); ++ vperm = force_reg (vmode, vperm); ++ target0 = gen_reg_rtx (V64QImode); ++ emit_insn (gen_avx512bw_pshufbv64qi3 (target0, ds[0].target, vperm)); ++ ++ vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm + 64)); ++ vperm = force_reg (vmode, vperm); ++ target1 = gen_reg_rtx (V64QImode); ++ emit_insn (gen_avx512bw_pshufbv64qi3 (target1, ds[1].target, vperm)); ++ ++ emit_insn (gen_iorv64qi3 (d->target, target0, target1)); ++ return true; ++} ++ ++/* Implement arbitrary permutation of two V32QImode and V16QImode operands ++ with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed ++ all the shorter instruction sequences. */ ++ ++static bool ++expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d) ++{ ++ rtx rperm[4][32], vperm, l[2], h[2], op, m128; ++ unsigned int i, nelt, eltsz; ++ bool used[4]; ++ ++ if (!TARGET_AVX2 ++ || d->one_operand_p ++ || (d->vmode != V32QImode && d->vmode != V16HImode)) ++ return false; ++ ++ if (d->testing_p) ++ return true; ++ ++ nelt = d->nelt; ++ eltsz = GET_MODE_UNIT_SIZE (d->vmode); ++ ++ /* Generate 4 permutation masks. If the required element is within ++ the same lane, it is shuffled in. If the required element from the ++ other lane, force a zero by setting bit 7 in the permutation mask. ++ In the other mask the mask has non-negative elements if element ++ is requested from the other lane, but also moved to the other lane, ++ so that the result of vpshufb can have the two V2TImode halves ++ swapped. */ ++ m128 = GEN_INT (-128); ++ for (i = 0; i < 32; ++i) ++ { ++ rperm[0][i] = m128; ++ rperm[1][i] = m128; ++ rperm[2][i] = m128; ++ rperm[3][i] = m128; ++ } ++ used[0] = false; ++ used[1] = false; ++ used[2] = false; ++ used[3] = false; ++ for (i = 0; i < nelt; ++i) ++ { ++ unsigned j, e = d->perm[i] & (nelt / 2 - 1); ++ unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz; ++ unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0); ++ ++ for (j = 0; j < eltsz; ++j) ++ rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j); ++ used[which] = true; ++ } ++ ++ for (i = 0; i < 2; ++i) ++ { ++ if (!used[2 * i + 1]) ++ { ++ h[i] = NULL_RTX; ++ continue; ++ } ++ vperm = gen_rtx_CONST_VECTOR (V32QImode, ++ gen_rtvec_v (32, rperm[2 * i + 1])); ++ vperm = force_reg (V32QImode, vperm); ++ h[i] = gen_reg_rtx (V32QImode); ++ op = gen_lowpart (V32QImode, i ? d->op1 : d->op0); ++ emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm)); ++ } ++ ++ /* Swap the 128-byte lanes of h[X]. */ ++ for (i = 0; i < 2; ++i) ++ { ++ if (h[i] == NULL_RTX) ++ continue; ++ op = gen_reg_rtx (V4DImode); ++ emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]), ++ const2_rtx, GEN_INT (3), const0_rtx, ++ const1_rtx)); ++ h[i] = gen_lowpart (V32QImode, op); ++ } ++ ++ for (i = 0; i < 2; ++i) ++ { ++ if (!used[2 * i]) ++ { ++ l[i] = NULL_RTX; ++ continue; ++ } ++ vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i])); ++ vperm = force_reg (V32QImode, vperm); ++ l[i] = gen_reg_rtx (V32QImode); ++ op = gen_lowpart (V32QImode, i ? d->op1 : d->op0); ++ emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm)); ++ } ++ ++ for (i = 0; i < 2; ++i) ++ { ++ if (h[i] && l[i]) ++ { ++ op = gen_reg_rtx (V32QImode); ++ emit_insn (gen_iorv32qi3 (op, l[i], h[i])); ++ l[i] = op; ++ } ++ else if (h[i]) ++ l[i] = h[i]; ++ } ++ ++ gcc_assert (l[0] && l[1]); ++ op = d->target; ++ if (d->vmode != V32QImode) ++ op = gen_reg_rtx (V32QImode); ++ emit_insn (gen_iorv32qi3 (op, l[0], l[1])); ++ if (op != d->target) ++ emit_move_insn (d->target, gen_lowpart (d->vmode, op)); ++ return true; ++} ++ ++/* The guts of ix86_vectorize_vec_perm_const. With all of the interface bits ++ taken care of, perform the expansion in D and return true on success. */ ++ ++static bool ++ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) ++{ ++ /* Try a single instruction expansion. */ ++ if (expand_vec_perm_1 (d)) ++ return true; ++ ++ /* Try sequences of two instructions. */ ++ ++ if (expand_vec_perm_pshuflw_pshufhw (d)) ++ return true; ++ ++ if (expand_vec_perm_palignr (d, false)) ++ return true; ++ ++ if (expand_vec_perm_interleave2 (d)) ++ return true; ++ ++ if (expand_vec_perm_broadcast (d)) ++ return true; ++ ++ if (expand_vec_perm_vpermq_perm_1 (d)) ++ return true; ++ ++ if (expand_vec_perm_vperm2f128 (d)) ++ return true; ++ ++ if (expand_vec_perm_pblendv (d)) ++ return true; ++ ++ /* Try sequences of three instructions. */ ++ ++ if (expand_vec_perm_even_odd_pack (d)) ++ return true; ++ ++ if (expand_vec_perm_2vperm2f128_vshuf (d)) ++ return true; ++ ++ if (expand_vec_perm_pshufb2 (d)) ++ return true; ++ ++ if (expand_vec_perm_interleave3 (d)) ++ return true; ++ ++ if (expand_vec_perm_vperm2f128_vblend (d)) ++ return true; ++ ++ /* Try sequences of four instructions. */ ++ ++ if (expand_vec_perm_even_odd_trunc (d)) ++ return true; ++ if (expand_vec_perm_vpshufb2_vpermq (d)) ++ return true; ++ ++ if (expand_vec_perm_vpshufb2_vpermq_even_odd (d)) ++ return true; ++ ++ if (expand_vec_perm_vpermt2_vpshub2 (d)) ++ return true; ++ ++ /* ??? Look for narrow permutations whose element orderings would ++ allow the promotion to a wider mode. */ ++ ++ /* ??? Look for sequences of interleave or a wider permute that place ++ the data into the correct lanes for a half-vector shuffle like ++ pshuf[lh]w or vpermilps. */ ++ ++ /* ??? Look for sequences of interleave that produce the desired results. ++ The combinatorics of punpck[lh] get pretty ugly... */ ++ ++ if (expand_vec_perm_even_odd (d)) ++ return true; ++ ++ /* Even longer sequences. */ ++ if (expand_vec_perm_vpshufb4_vpermq2 (d)) ++ return true; ++ ++ /* See if we can get the same permutation in different vector integer ++ mode. */ ++ struct expand_vec_perm_d nd; ++ if (canonicalize_vector_int_perm (d, &nd) && expand_vec_perm_1 (&nd)) ++ { ++ if (!d->testing_p) ++ emit_move_insn (d->target, gen_lowpart (d->vmode, nd.target)); ++ return true; ++ } ++ ++ return false; ++} ++ ++/* If a permutation only uses one operand, make it clear. Returns true ++ if the permutation references both operands. */ ++ ++static bool ++canonicalize_perm (struct expand_vec_perm_d *d) ++{ ++ int i, which, nelt = d->nelt; ++ ++ for (i = which = 0; i < nelt; ++i) ++ which |= (d->perm[i] < nelt ? 1 : 2); ++ ++ d->one_operand_p = true; ++ switch (which) ++ { ++ default: ++ gcc_unreachable(); ++ ++ case 3: ++ if (!rtx_equal_p (d->op0, d->op1)) ++ { ++ d->one_operand_p = false; ++ break; ++ } ++ /* The elements of PERM do not suggest that only the first operand ++ is used, but both operands are identical. Allow easier matching ++ of the permutation by folding the permutation into the single ++ input vector. */ ++ /* FALLTHRU */ ++ ++ case 2: ++ for (i = 0; i < nelt; ++i) ++ d->perm[i] &= nelt - 1; ++ d->op0 = d->op1; ++ break; ++ ++ case 1: ++ d->op1 = d->op0; ++ break; ++ } ++ ++ return (which == 3); ++} ++ ++/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */ ++ ++bool ++ix86_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, ++ rtx op1, const vec_perm_indices &sel) ++{ ++ struct expand_vec_perm_d d; ++ unsigned char perm[MAX_VECT_LEN]; ++ unsigned int i, nelt, which; ++ bool two_args; ++ ++ d.target = target; ++ d.op0 = op0; ++ d.op1 = op1; ++ ++ d.vmode = vmode; ++ gcc_assert (VECTOR_MODE_P (d.vmode)); ++ d.nelt = nelt = GET_MODE_NUNITS (d.vmode); ++ d.testing_p = !target; ++ ++ gcc_assert (sel.length () == nelt); ++ gcc_checking_assert (sizeof (d.perm) == sizeof (perm)); ++ ++ /* Given sufficient ISA support we can just return true here ++ for selected vector modes. */ ++ switch (d.vmode) ++ { ++ case E_V16SFmode: ++ case E_V16SImode: ++ case E_V8DImode: ++ case E_V8DFmode: ++ if (!TARGET_AVX512F) ++ return false; ++ /* All implementable with a single vperm[it]2 insn. */ ++ if (d.testing_p) ++ return true; ++ break; ++ case E_V32HImode: ++ if (!TARGET_AVX512BW) ++ return false; ++ if (d.testing_p) ++ /* All implementable with a single vperm[it]2 insn. */ ++ return true; ++ break; ++ case E_V64QImode: ++ if (!TARGET_AVX512BW) ++ return false; ++ if (d.testing_p) ++ /* Implementable with 2 vperm[it]2, 2 vpshufb and 1 or insn. */ ++ return true; ++ break; ++ case E_V8SImode: ++ case E_V8SFmode: ++ case E_V4DFmode: ++ case E_V4DImode: ++ if (!TARGET_AVX) ++ return false; ++ if (d.testing_p && TARGET_AVX512VL) ++ /* All implementable with a single vperm[it]2 insn. */ ++ return true; ++ break; ++ case E_V16HImode: ++ if (!TARGET_SSE2) ++ return false; ++ if (d.testing_p && TARGET_AVX2) ++ /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */ ++ return true; ++ break; ++ case E_V32QImode: ++ if (!TARGET_SSE2) ++ return false; ++ if (d.testing_p && TARGET_AVX2) ++ /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */ ++ return true; ++ break; ++ case E_V8HImode: ++ case E_V16QImode: ++ if (!TARGET_SSE2) ++ return false; ++ /* Fall through. */ ++ case E_V4SImode: ++ case E_V4SFmode: ++ if (!TARGET_SSE) ++ return false; ++ /* All implementable with a single vpperm insn. */ ++ if (d.testing_p && TARGET_XOP) ++ return true; ++ /* All implementable with 2 pshufb + 1 ior. */ ++ if (d.testing_p && TARGET_SSSE3) ++ return true; ++ break; ++ case E_V2DImode: ++ case E_V2DFmode: ++ if (!TARGET_SSE) ++ return false; ++ /* All implementable with shufpd or unpck[lh]pd. */ ++ if (d.testing_p) ++ return true; ++ break; ++ default: ++ return false; ++ } ++ ++ for (i = which = 0; i < nelt; ++i) ++ { ++ unsigned char e = sel[i]; ++ gcc_assert (e < 2 * nelt); ++ d.perm[i] = e; ++ perm[i] = e; ++ which |= (e < nelt ? 1 : 2); ++ } ++ ++ if (d.testing_p) ++ { ++ /* For all elements from second vector, fold the elements to first. */ ++ if (which == 2) ++ for (i = 0; i < nelt; ++i) ++ d.perm[i] -= nelt; ++ ++ /* Check whether the mask can be applied to the vector type. */ ++ d.one_operand_p = (which != 3); ++ ++ /* Implementable with shufps or pshufd. */ ++ if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode)) ++ return true; ++ ++ /* Otherwise we have to go through the motions and see if we can ++ figure out how to generate the requested permutation. */ ++ d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1); ++ d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2); ++ if (!d.one_operand_p) ++ d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3); ++ ++ start_sequence (); ++ bool ret = ix86_expand_vec_perm_const_1 (&d); ++ end_sequence (); ++ ++ return ret; ++ } ++ ++ two_args = canonicalize_perm (&d); ++ ++ if (ix86_expand_vec_perm_const_1 (&d)) ++ return true; ++ ++ /* If the selector says both arguments are needed, but the operands are the ++ same, the above tried to expand with one_operand_p and flattened selector. ++ If that didn't work, retry without one_operand_p; we succeeded with that ++ during testing. */ ++ if (two_args && d.one_operand_p) ++ { ++ d.one_operand_p = false; ++ memcpy (d.perm, perm, sizeof (perm)); ++ return ix86_expand_vec_perm_const_1 (&d); ++ } ++ ++ return false; ++} ++ ++void ++ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd) ++{ ++ struct expand_vec_perm_d d; ++ unsigned i, nelt; ++ ++ d.target = targ; ++ d.op0 = op0; ++ d.op1 = op1; ++ d.vmode = GET_MODE (targ); ++ d.nelt = nelt = GET_MODE_NUNITS (d.vmode); ++ d.one_operand_p = false; ++ d.testing_p = false; ++ ++ for (i = 0; i < nelt; ++i) ++ d.perm[i] = i * 2 + odd; ++ ++ /* We'll either be able to implement the permutation directly... */ ++ if (expand_vec_perm_1 (&d)) ++ return; ++ ++ /* ... or we use the special-case patterns. */ ++ expand_vec_perm_even_odd_1 (&d, odd); ++} ++ ++static void ++ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p) ++{ ++ struct expand_vec_perm_d d; ++ unsigned i, nelt, base; ++ bool ok; ++ ++ d.target = targ; ++ d.op0 = op0; ++ d.op1 = op1; ++ d.vmode = GET_MODE (targ); ++ d.nelt = nelt = GET_MODE_NUNITS (d.vmode); ++ d.one_operand_p = false; ++ d.testing_p = false; ++ ++ base = high_p ? nelt / 2 : 0; ++ for (i = 0; i < nelt / 2; ++i) ++ { ++ d.perm[i * 2] = i + base; ++ d.perm[i * 2 + 1] = i + base + nelt; ++ } ++ ++ /* Note that for AVX this isn't one instruction. */ ++ ok = ix86_expand_vec_perm_const_1 (&d); ++ gcc_assert (ok); ++} ++ ++ ++/* Expand a vector operation CODE for a V*QImode in terms of the ++ same operation on V*HImode. */ ++ ++void ++ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2) ++{ ++ machine_mode qimode = GET_MODE (dest); ++ machine_mode himode; ++ rtx (*gen_il) (rtx, rtx, rtx); ++ rtx (*gen_ih) (rtx, rtx, rtx); ++ rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h; ++ struct expand_vec_perm_d d; ++ bool ok, full_interleave; ++ bool uns_p = false; ++ int i; ++ ++ switch (qimode) ++ { ++ case E_V16QImode: ++ himode = V8HImode; ++ gen_il = gen_vec_interleave_lowv16qi; ++ gen_ih = gen_vec_interleave_highv16qi; ++ break; ++ case E_V32QImode: ++ himode = V16HImode; ++ gen_il = gen_avx2_interleave_lowv32qi; ++ gen_ih = gen_avx2_interleave_highv32qi; ++ break; ++ case E_V64QImode: ++ himode = V32HImode; ++ gen_il = gen_avx512bw_interleave_lowv64qi; ++ gen_ih = gen_avx512bw_interleave_highv64qi; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ ++ op2_l = op2_h = op2; ++ switch (code) ++ { ++ case MULT: ++ /* Unpack data such that we've got a source byte in each low byte of ++ each word. We don't care what goes into the high byte of each word. ++ Rather than trying to get zero in there, most convenient is to let ++ it be a copy of the low byte. */ ++ op2_l = gen_reg_rtx (qimode); ++ op2_h = gen_reg_rtx (qimode); ++ emit_insn (gen_il (op2_l, op2, op2)); ++ emit_insn (gen_ih (op2_h, op2, op2)); ++ ++ op1_l = gen_reg_rtx (qimode); ++ op1_h = gen_reg_rtx (qimode); ++ emit_insn (gen_il (op1_l, op1, op1)); ++ emit_insn (gen_ih (op1_h, op1, op1)); ++ full_interleave = qimode == V16QImode; ++ break; ++ ++ case ASHIFT: ++ case LSHIFTRT: ++ uns_p = true; ++ /* FALLTHRU */ ++ case ASHIFTRT: ++ op1_l = gen_reg_rtx (himode); ++ op1_h = gen_reg_rtx (himode); ++ ix86_expand_sse_unpack (op1_l, op1, uns_p, false); ++ ix86_expand_sse_unpack (op1_h, op1, uns_p, true); ++ full_interleave = true; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ ++ /* Perform the operation. */ ++ res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX, ++ 1, OPTAB_DIRECT); ++ res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX, ++ 1, OPTAB_DIRECT); ++ gcc_assert (res_l && res_h); ++ ++ /* Merge the data back into the right place. */ ++ d.target = dest; ++ d.op0 = gen_lowpart (qimode, res_l); ++ d.op1 = gen_lowpart (qimode, res_h); ++ d.vmode = qimode; ++ d.nelt = GET_MODE_NUNITS (qimode); ++ d.one_operand_p = false; ++ d.testing_p = false; ++ ++ if (full_interleave) ++ { ++ /* For SSE2, we used an full interleave, so the desired ++ results are in the even elements. */ ++ for (i = 0; i < d.nelt; ++i) ++ d.perm[i] = i * 2; ++ } ++ else ++ { ++ /* For AVX, the interleave used above was not cross-lane. So the ++ extraction is evens but with the second and third quarter swapped. ++ Happily, that is even one insn shorter than even extraction. ++ For AVX512BW we have 4 lanes. We extract evens from within a lane, ++ always first from the first and then from the second source operand, ++ the index bits above the low 4 bits remains the same. ++ Thus, for d.nelt == 32 we want permutation ++ 0,2,4,..14, 32,34,36,..46, 16,18,20,..30, 48,50,52,..62 ++ and for d.nelt == 64 we want permutation ++ 0,2,4,..14, 64,66,68,..78, 16,18,20,..30, 80,82,84,..94, ++ 32,34,36,..46, 96,98,100,..110, 48,50,52,..62, 112,114,116,..126. */ ++ for (i = 0; i < d.nelt; ++i) ++ d.perm[i] = ((i * 2) & 14) + ((i & 8) ? d.nelt : 0) + (i & ~15); ++ } ++ ++ ok = ix86_expand_vec_perm_const_1 (&d); ++ gcc_assert (ok); ++ ++ set_unique_reg_note (get_last_insn (), REG_EQUAL, ++ gen_rtx_fmt_ee (code, qimode, op1, op2)); ++} ++ ++/* Helper function of ix86_expand_mul_widen_evenodd. Return true ++ if op is CONST_VECTOR with all odd elements equal to their ++ preceding element. */ ++ ++static bool ++const_vector_equal_evenodd_p (rtx op) ++{ ++ machine_mode mode = GET_MODE (op); ++ int i, nunits = GET_MODE_NUNITS (mode); ++ if (GET_CODE (op) != CONST_VECTOR ++ || nunits != CONST_VECTOR_NUNITS (op)) ++ return false; ++ for (i = 0; i < nunits; i += 2) ++ if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1)) ++ return false; ++ return true; ++} ++ ++void ++ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2, ++ bool uns_p, bool odd_p) ++{ ++ machine_mode mode = GET_MODE (op1); ++ machine_mode wmode = GET_MODE (dest); ++ rtx x; ++ rtx orig_op1 = op1, orig_op2 = op2; ++ ++ if (!nonimmediate_operand (op1, mode)) ++ op1 = force_reg (mode, op1); ++ if (!nonimmediate_operand (op2, mode)) ++ op2 = force_reg (mode, op2); ++ ++ /* We only play even/odd games with vectors of SImode. */ ++ gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode); ++ ++ /* If we're looking for the odd results, shift those members down to ++ the even slots. For some cpus this is faster than a PSHUFD. */ ++ if (odd_p) ++ { ++ /* For XOP use vpmacsdqh, but only for smult, as it is only ++ signed. */ ++ if (TARGET_XOP && mode == V4SImode && !uns_p) ++ { ++ x = force_reg (wmode, CONST0_RTX (wmode)); ++ emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x)); ++ return; ++ } ++ ++ x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode)); ++ if (!const_vector_equal_evenodd_p (orig_op1)) ++ op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1), ++ x, NULL, 1, OPTAB_DIRECT); ++ if (!const_vector_equal_evenodd_p (orig_op2)) ++ op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2), ++ x, NULL, 1, OPTAB_DIRECT); ++ op1 = gen_lowpart (mode, op1); ++ op2 = gen_lowpart (mode, op2); ++ } ++ ++ if (mode == V16SImode) ++ { ++ if (uns_p) ++ x = gen_vec_widen_umult_even_v16si (dest, op1, op2); ++ else ++ x = gen_vec_widen_smult_even_v16si (dest, op1, op2); ++ } ++ else if (mode == V8SImode) ++ { ++ if (uns_p) ++ x = gen_vec_widen_umult_even_v8si (dest, op1, op2); ++ else ++ x = gen_vec_widen_smult_even_v8si (dest, op1, op2); ++ } ++ else if (uns_p) ++ x = gen_vec_widen_umult_even_v4si (dest, op1, op2); ++ else if (TARGET_SSE4_1) ++ x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2); ++ else ++ { ++ rtx s1, s2, t0, t1, t2; ++ ++ /* The easiest way to implement this without PMULDQ is to go through ++ the motions as if we are performing a full 64-bit multiply. With ++ the exception that we need to do less shuffling of the elements. */ ++ ++ /* Compute the sign-extension, aka highparts, of the two operands. */ ++ s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode), ++ op1, pc_rtx, pc_rtx); ++ s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode), ++ op2, pc_rtx, pc_rtx); ++ ++ /* Multiply LO(A) * HI(B), and vice-versa. */ ++ t1 = gen_reg_rtx (wmode); ++ t2 = gen_reg_rtx (wmode); ++ emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2)); ++ emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1)); ++ ++ /* Multiply LO(A) * LO(B). */ ++ t0 = gen_reg_rtx (wmode); ++ emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2)); ++ ++ /* Combine and shift the highparts into place. */ ++ t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT); ++ t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1, ++ 1, OPTAB_DIRECT); ++ ++ /* Combine high and low parts. */ ++ force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT); ++ return; ++ } ++ emit_insn (x); ++} ++ ++void ++ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2, ++ bool uns_p, bool high_p) ++{ ++ machine_mode wmode = GET_MODE (dest); ++ machine_mode mode = GET_MODE (op1); ++ rtx t1, t2, t3, t4, mask; ++ ++ switch (mode) ++ { ++ case E_V4SImode: ++ t1 = gen_reg_rtx (mode); ++ t2 = gen_reg_rtx (mode); ++ if (TARGET_XOP && !uns_p) ++ { ++ /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case, ++ shuffle the elements once so that all elements are in the right ++ place for immediate use: { A C B D }. */ ++ emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx, ++ const1_rtx, GEN_INT (3))); ++ emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx, ++ const1_rtx, GEN_INT (3))); ++ } ++ else ++ { ++ /* Put the elements into place for the multiply. */ ++ ix86_expand_vec_interleave (t1, op1, op1, high_p); ++ ix86_expand_vec_interleave (t2, op2, op2, high_p); ++ high_p = false; ++ } ++ ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p); ++ break; ++ ++ case E_V8SImode: ++ /* Shuffle the elements between the lanes. After this we ++ have { A B E F | C D G H } for each operand. */ ++ t1 = gen_reg_rtx (V4DImode); ++ t2 = gen_reg_rtx (V4DImode); ++ emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1), ++ const0_rtx, const2_rtx, ++ const1_rtx, GEN_INT (3))); ++ emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2), ++ const0_rtx, const2_rtx, ++ const1_rtx, GEN_INT (3))); ++ ++ /* Shuffle the elements within the lanes. After this we ++ have { A A B B | C C D D } or { E E F F | G G H H }. */ ++ t3 = gen_reg_rtx (V8SImode); ++ t4 = gen_reg_rtx (V8SImode); ++ mask = GEN_INT (high_p ++ ? 2 + (2 << 2) + (3 << 4) + (3 << 6) ++ : 0 + (0 << 2) + (1 << 4) + (1 << 6)); ++ emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask)); ++ emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask)); ++ ++ ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false); ++ break; ++ ++ case E_V8HImode: ++ case E_V16HImode: ++ t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX, ++ uns_p, OPTAB_DIRECT); ++ t2 = expand_binop (mode, ++ uns_p ? umul_highpart_optab : smul_highpart_optab, ++ op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT); ++ gcc_assert (t1 && t2); ++ ++ t3 = gen_reg_rtx (mode); ++ ix86_expand_vec_interleave (t3, t1, t2, high_p); ++ emit_move_insn (dest, gen_lowpart (wmode, t3)); ++ break; ++ ++ case E_V16QImode: ++ case E_V32QImode: ++ case E_V32HImode: ++ case E_V16SImode: ++ case E_V64QImode: ++ t1 = gen_reg_rtx (wmode); ++ t2 = gen_reg_rtx (wmode); ++ ix86_expand_sse_unpack (t1, op1, uns_p, high_p); ++ ix86_expand_sse_unpack (t2, op2, uns_p, high_p); ++ ++ emit_insn (gen_rtx_SET (dest, gen_rtx_MULT (wmode, t1, t2))); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++} ++ ++void ++ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2) ++{ ++ rtx res_1, res_2, res_3, res_4; ++ ++ res_1 = gen_reg_rtx (V4SImode); ++ res_2 = gen_reg_rtx (V4SImode); ++ res_3 = gen_reg_rtx (V2DImode); ++ res_4 = gen_reg_rtx (V2DImode); ++ ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false); ++ ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true); ++ ++ /* Move the results in element 2 down to element 1; we don't care ++ what goes in elements 2 and 3. Then we can merge the parts ++ back together with an interleave. ++ ++ Note that two other sequences were tried: ++ (1) Use interleaves at the start instead of psrldq, which allows ++ us to use a single shufps to merge things back at the end. ++ (2) Use shufps here to combine the two vectors, then pshufd to ++ put the elements in the correct order. ++ In both cases the cost of the reformatting stall was too high ++ and the overall sequence slower. */ ++ ++ emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3), ++ const0_rtx, const2_rtx, ++ const0_rtx, const0_rtx)); ++ emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4), ++ const0_rtx, const2_rtx, ++ const0_rtx, const0_rtx)); ++ res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2)); ++ ++ set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2)); ++} ++ ++void ++ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2) ++{ ++ machine_mode mode = GET_MODE (op0); ++ rtx t1, t2, t3, t4, t5, t6; ++ ++ if (TARGET_AVX512DQ && mode == V8DImode) ++ emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2)); ++ else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode) ++ emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2)); ++ else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode) ++ emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2)); ++ else if (TARGET_XOP && mode == V2DImode) ++ { ++ /* op1: A,B,C,D, op2: E,F,G,H */ ++ op1 = gen_lowpart (V4SImode, op1); ++ op2 = gen_lowpart (V4SImode, op2); ++ ++ t1 = gen_reg_rtx (V4SImode); ++ t2 = gen_reg_rtx (V4SImode); ++ t3 = gen_reg_rtx (V2DImode); ++ t4 = gen_reg_rtx (V2DImode); ++ ++ /* t1: B,A,D,C */ ++ emit_insn (gen_sse2_pshufd_1 (t1, op1, ++ GEN_INT (1), ++ GEN_INT (0), ++ GEN_INT (3), ++ GEN_INT (2))); ++ ++ /* t2: (B*E),(A*F),(D*G),(C*H) */ ++ emit_insn (gen_mulv4si3 (t2, t1, op2)); ++ ++ /* t3: (B*E)+(A*F), (D*G)+(C*H) */ ++ emit_insn (gen_xop_phadddq (t3, t2)); ++ ++ /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */ ++ emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32))); ++ ++ /* Multiply lower parts and add all */ ++ t5 = gen_reg_rtx (V2DImode); ++ emit_insn (gen_vec_widen_umult_even_v4si (t5, ++ gen_lowpart (V4SImode, op1), ++ gen_lowpart (V4SImode, op2))); ++ op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT); ++ ++ } ++ else ++ { ++ machine_mode nmode; ++ rtx (*umul) (rtx, rtx, rtx); ++ ++ if (mode == V2DImode) ++ { ++ umul = gen_vec_widen_umult_even_v4si; ++ nmode = V4SImode; ++ } ++ else if (mode == V4DImode) ++ { ++ umul = gen_vec_widen_umult_even_v8si; ++ nmode = V8SImode; ++ } ++ else if (mode == V8DImode) ++ { ++ umul = gen_vec_widen_umult_even_v16si; ++ nmode = V16SImode; ++ } ++ else ++ gcc_unreachable (); ++ ++ ++ /* Multiply low parts. */ ++ t1 = gen_reg_rtx (mode); ++ emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2))); ++ ++ /* Shift input vectors right 32 bits so we can multiply high parts. */ ++ t6 = GEN_INT (32); ++ t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT); ++ t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT); ++ ++ /* Multiply high parts by low parts. */ ++ t4 = gen_reg_rtx (mode); ++ t5 = gen_reg_rtx (mode); ++ emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2))); ++ emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1))); ++ ++ /* Combine and shift the highparts back. */ ++ t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT); ++ t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT); ++ ++ /* Combine high and low parts. */ ++ force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT); ++ } ++ ++ set_unique_reg_note (get_last_insn (), REG_EQUAL, ++ gen_rtx_MULT (mode, op1, op2)); ++} ++ ++/* Return 1 if control tansfer instruction INSN ++ should be encoded with notrack prefix. */ ++ ++bool ++ix86_notrack_prefixed_insn_p (rtx insn) ++{ ++ if (!insn || !((flag_cf_protection & CF_BRANCH))) ++ return false; ++ ++ if (CALL_P (insn)) ++ { ++ rtx call = get_call_rtx_from (insn); ++ gcc_assert (call != NULL_RTX); ++ rtx addr = XEXP (call, 0); ++ ++ /* Do not emit 'notrack' if it's not an indirect call. */ ++ if (MEM_P (addr) ++ && GET_CODE (XEXP (addr, 0)) == SYMBOL_REF) ++ return false; ++ else ++ return find_reg_note (insn, REG_CALL_NOCF_CHECK, 0); ++ } ++ ++ if (JUMP_P (insn) && !flag_cet_switch) ++ { ++ rtx target = JUMP_LABEL (insn); ++ if (target == NULL_RTX || ANY_RETURN_P (target)) ++ return false; ++ ++ /* Check the jump is a switch table. */ ++ rtx_insn *label = as_a (target); ++ rtx_insn *table = next_insn (label); ++ if (table == NULL_RTX || !JUMP_TABLE_DATA_P (table)) ++ return false; ++ else ++ return true; ++ } ++ return false; ++} ++ ++/* Calculate integer abs() using only SSE2 instructions. */ ++ ++void ++ix86_expand_sse2_abs (rtx target, rtx input) ++{ ++ machine_mode mode = GET_MODE (target); ++ rtx tmp0, tmp1, x; ++ ++ switch (mode) ++ { ++ case E_V2DImode: ++ case E_V4DImode: ++ /* For 64-bit signed integer X, with SSE4.2 use ++ pxor t0, t0; pcmpgtq X, t0; pxor t0, X; psubq t0, X. ++ Otherwise handle it similarly to V4SImode, except use 64 as W instead of ++ 32 and use logical instead of arithmetic right shift (which is ++ unimplemented) and subtract. */ ++ if (TARGET_SSE4_2) ++ { ++ tmp0 = gen_reg_rtx (mode); ++ tmp1 = gen_reg_rtx (mode); ++ emit_move_insn (tmp1, CONST0_RTX (mode)); ++ if (mode == E_V2DImode) ++ emit_insn (gen_sse4_2_gtv2di3 (tmp0, tmp1, input)); ++ else ++ emit_insn (gen_avx2_gtv4di3 (tmp0, tmp1, input)); ++ } ++ else ++ { ++ tmp0 = expand_simple_binop (mode, LSHIFTRT, input, ++ GEN_INT (GET_MODE_UNIT_BITSIZE (mode) ++ - 1), NULL, 0, OPTAB_DIRECT); ++ tmp0 = expand_simple_unop (mode, NEG, tmp0, NULL, false); ++ } ++ ++ tmp1 = expand_simple_binop (mode, XOR, tmp0, input, ++ NULL, 0, OPTAB_DIRECT); ++ x = expand_simple_binop (mode, MINUS, tmp1, tmp0, ++ target, 0, OPTAB_DIRECT); ++ break; ++ ++ case E_V4SImode: ++ /* For 32-bit signed integer X, the best way to calculate the absolute ++ value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */ ++ tmp0 = expand_simple_binop (mode, ASHIFTRT, input, ++ GEN_INT (GET_MODE_UNIT_BITSIZE (mode) - 1), ++ NULL, 0, OPTAB_DIRECT); ++ tmp1 = expand_simple_binop (mode, XOR, tmp0, input, ++ NULL, 0, OPTAB_DIRECT); ++ x = expand_simple_binop (mode, MINUS, tmp1, tmp0, ++ target, 0, OPTAB_DIRECT); ++ break; ++ ++ case E_V8HImode: ++ /* For 16-bit signed integer X, the best way to calculate the absolute ++ value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */ ++ tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0); ++ ++ x = expand_simple_binop (mode, SMAX, tmp0, input, ++ target, 0, OPTAB_DIRECT); ++ break; ++ ++ case E_V16QImode: ++ /* For 8-bit signed integer X, the best way to calculate the absolute ++ value of X is min ((unsigned char) X, (unsigned char) (-X)), ++ as SSE2 provides the PMINUB insn. */ ++ tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0); ++ ++ x = expand_simple_binop (V16QImode, UMIN, tmp0, input, ++ target, 0, OPTAB_DIRECT); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ if (x != target) ++ emit_move_insn (target, x); ++} ++ ++/* Expand an extract from a vector register through pextr insn. ++ Return true if successful. */ ++ ++bool ++ix86_expand_pextr (rtx *operands) ++{ ++ rtx dst = operands[0]; ++ rtx src = operands[1]; ++ ++ unsigned int size = INTVAL (operands[2]); ++ unsigned int pos = INTVAL (operands[3]); ++ ++ if (SUBREG_P (dst)) ++ { ++ /* Reject non-lowpart subregs. */ ++ if (SUBREG_BYTE (dst) > 0) ++ return false; ++ dst = SUBREG_REG (dst); ++ } ++ ++ if (SUBREG_P (src)) ++ { ++ pos += SUBREG_BYTE (src) * BITS_PER_UNIT; ++ src = SUBREG_REG (src); ++ } ++ ++ switch (GET_MODE (src)) ++ { ++ case E_V16QImode: ++ case E_V8HImode: ++ case E_V4SImode: ++ case E_V2DImode: ++ case E_V1TImode: ++ case E_TImode: ++ { ++ machine_mode srcmode, dstmode; ++ rtx d, pat; ++ ++ if (!int_mode_for_size (size, 0).exists (&dstmode)) ++ return false; ++ ++ switch (dstmode) ++ { ++ case E_QImode: ++ if (!TARGET_SSE4_1) ++ return false; ++ srcmode = V16QImode; ++ break; ++ ++ case E_HImode: ++ if (!TARGET_SSE2) ++ return false; ++ srcmode = V8HImode; ++ break; ++ ++ case E_SImode: ++ if (!TARGET_SSE4_1) ++ return false; ++ srcmode = V4SImode; ++ break; ++ ++ case E_DImode: ++ gcc_assert (TARGET_64BIT); ++ if (!TARGET_SSE4_1) ++ return false; ++ srcmode = V2DImode; ++ break; ++ ++ default: ++ return false; ++ } ++ ++ /* Reject extractions from misaligned positions. */ ++ if (pos & (size-1)) ++ return false; ++ ++ if (GET_MODE (dst) == dstmode) ++ d = dst; ++ else ++ d = gen_reg_rtx (dstmode); ++ ++ /* Construct insn pattern. */ ++ pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (pos / size))); ++ pat = gen_rtx_VEC_SELECT (dstmode, gen_lowpart (srcmode, src), pat); ++ ++ /* Let the rtl optimizers know about the zero extension performed. */ ++ if (dstmode == QImode || dstmode == HImode) ++ { ++ pat = gen_rtx_ZERO_EXTEND (SImode, pat); ++ d = gen_lowpart (SImode, d); ++ } ++ ++ emit_insn (gen_rtx_SET (d, pat)); ++ ++ if (d != dst) ++ emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d)); ++ return true; ++ } ++ ++ default: ++ return false; ++ } ++} ++ ++/* Expand an insert into a vector register through pinsr insn. ++ Return true if successful. */ ++ ++bool ++ix86_expand_pinsr (rtx *operands) ++{ ++ rtx dst = operands[0]; ++ rtx src = operands[3]; ++ ++ unsigned int size = INTVAL (operands[1]); ++ unsigned int pos = INTVAL (operands[2]); ++ ++ if (SUBREG_P (dst)) ++ { ++ pos += SUBREG_BYTE (dst) * BITS_PER_UNIT; ++ dst = SUBREG_REG (dst); ++ } ++ ++ switch (GET_MODE (dst)) ++ { ++ case E_V16QImode: ++ case E_V8HImode: ++ case E_V4SImode: ++ case E_V2DImode: ++ case E_V1TImode: ++ case E_TImode: ++ { ++ machine_mode srcmode, dstmode; ++ rtx (*pinsr)(rtx, rtx, rtx, rtx); ++ rtx d; ++ ++ if (!int_mode_for_size (size, 0).exists (&srcmode)) ++ return false; ++ ++ switch (srcmode) ++ { ++ case E_QImode: ++ if (!TARGET_SSE4_1) ++ return false; ++ dstmode = V16QImode; ++ pinsr = gen_sse4_1_pinsrb; ++ break; ++ ++ case E_HImode: ++ if (!TARGET_SSE2) ++ return false; ++ dstmode = V8HImode; ++ pinsr = gen_sse2_pinsrw; ++ break; ++ ++ case E_SImode: ++ if (!TARGET_SSE4_1) ++ return false; ++ dstmode = V4SImode; ++ pinsr = gen_sse4_1_pinsrd; ++ break; ++ ++ case E_DImode: ++ gcc_assert (TARGET_64BIT); ++ if (!TARGET_SSE4_1) ++ return false; ++ dstmode = V2DImode; ++ pinsr = gen_sse4_1_pinsrq; ++ break; ++ ++ default: ++ return false; ++ } ++ ++ /* Reject insertions to misaligned positions. */ ++ if (pos & (size-1)) ++ return false; ++ ++ if (SUBREG_P (src)) ++ { ++ unsigned int srcpos = SUBREG_BYTE (src); ++ ++ if (srcpos > 0) ++ { ++ rtx extr_ops[4]; ++ ++ extr_ops[0] = gen_reg_rtx (srcmode); ++ extr_ops[1] = gen_lowpart (srcmode, SUBREG_REG (src)); ++ extr_ops[2] = GEN_INT (size); ++ extr_ops[3] = GEN_INT (srcpos * BITS_PER_UNIT); ++ ++ if (!ix86_expand_pextr (extr_ops)) ++ return false; ++ ++ src = extr_ops[0]; ++ } ++ else ++ src = gen_lowpart (srcmode, SUBREG_REG (src)); ++ } ++ ++ if (GET_MODE (dst) == dstmode) ++ d = dst; ++ else ++ d = gen_reg_rtx (dstmode); ++ ++ emit_insn (pinsr (d, gen_lowpart (dstmode, dst), ++ gen_lowpart (srcmode, src), ++ GEN_INT (1 << (pos / size)))); ++ if (d != dst) ++ emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d)); ++ return true; ++ } ++ ++ default: ++ return false; ++ } ++} ++ ++/* All CPUs prefer to avoid cross-lane operations so perform reductions ++ upper against lower halves up to SSE reg size. */ ++ ++machine_mode ++ix86_split_reduction (machine_mode mode) ++{ ++ /* Reduce lowpart against highpart until we reach SSE reg width to ++ avoid cross-lane operations. */ ++ switch (mode) ++ { ++ case E_V8DImode: ++ case E_V4DImode: ++ return V2DImode; ++ case E_V16SImode: ++ case E_V8SImode: ++ return V4SImode; ++ case E_V32HImode: ++ case E_V16HImode: ++ return V8HImode; ++ case E_V64QImode: ++ case E_V32QImode: ++ return V16QImode; ++ case E_V16SFmode: ++ case E_V8SFmode: ++ return V4SFmode; ++ case E_V8DFmode: ++ case E_V4DFmode: ++ return V2DFmode; ++ default: ++ return mode; ++ } ++} ++ ++/* Generate call to __divmoddi4. */ ++ ++void ++ix86_expand_divmod_libfunc (rtx libfunc, machine_mode mode, ++ rtx op0, rtx op1, ++ rtx *quot_p, rtx *rem_p) ++{ ++ rtx rem = assign_386_stack_local (mode, SLOT_TEMP); ++ ++ rtx quot = emit_library_call_value (libfunc, NULL_RTX, LCT_NORMAL, ++ mode, op0, mode, op1, mode, ++ XEXP (rem, 0), Pmode); ++ *quot_p = quot; ++ *rem_p = rem; ++} ++ ++#include "gt-i386-expand.h" +diff --git a/gcc/config/i386/i386-expand.h b/gcc/config/i386/i386-expand.h +new file mode 100644 +index 000000000..9271bb85a +--- /dev/null ++++ b/gcc/config/i386/i386-expand.h +@@ -0,0 +1,58 @@ ++/* Copyright (C) 1988-2019 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++#ifndef GCC_I386_EXPAND_H ++#define GCC_I386_EXPAND_H ++ ++/* AVX512F does support 64-byte integer vector operations, ++ thus the longest vector we are faced with is V64QImode. */ ++#define MAX_VECT_LEN 64 ++ ++struct expand_vec_perm_d ++{ ++ rtx target, op0, op1; ++ unsigned char perm[MAX_VECT_LEN]; ++ machine_mode vmode; ++ unsigned char nelt; ++ bool one_operand_p; ++ bool testing_p; ++}; ++ ++rtx legitimize_tls_address (rtx x, enum tls_model model, bool for_mov); ++alias_set_type ix86_GOT_alias_set (void); ++rtx legitimize_pic_address (rtx orig, rtx reg); ++rtx legitimize_pe_coff_symbol (rtx addr, bool inreg); ++ ++bool insn_defines_reg (unsigned int regno1, unsigned int regno2, ++ rtx_insn *insn); ++void ix86_emit_binop (enum rtx_code code, machine_mode mode, rtx dst, rtx src); ++enum calling_abi ix86_function_abi (const_tree fndecl); ++bool ix86_function_ms_hook_prologue (const_tree fn); ++void warn_once_call_ms2sysv_xlogues (const char *feature); ++rtx gen_push (rtx arg); ++rtx gen_pop (rtx arg); ++rtx ix86_expand_builtin (tree exp, rtx target, rtx subtarget, ++ machine_mode mode, int ignore); ++bool ix86_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, ++ rtx op1, const vec_perm_indices &sel); ++bool ix86_notrack_prefixed_insn_p (rtx insn); ++machine_mode ix86_split_reduction (machine_mode mode); ++void ix86_expand_divmod_libfunc (rtx libfunc, machine_mode mode, rtx op0, ++ rtx op1, rtx *quot_p, rtx *rem_p); ++ ++#endif /* GCC_I386_EXPAND_H */ +diff --git a/gcc/config/i386/i386-features.c b/gcc/config/i386/i386-features.c +new file mode 100644 +index 000000000..60a120f4d +--- /dev/null ++++ b/gcc/config/i386/i386-features.c +@@ -0,0 +1,2742 @@ ++/* Copyright (C) 1988-2019 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++#define IN_TARGET_CODE 1 ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "backend.h" ++#include "rtl.h" ++#include "tree.h" ++#include "memmodel.h" ++#include "gimple.h" ++#include "cfghooks.h" ++#include "cfgloop.h" ++#include "df.h" ++#include "tm_p.h" ++#include "stringpool.h" ++#include "expmed.h" ++#include "optabs.h" ++#include "regs.h" ++#include "emit-rtl.h" ++#include "recog.h" ++#include "cgraph.h" ++#include "diagnostic.h" ++#include "cfgbuild.h" ++#include "alias.h" ++#include "fold-const.h" ++#include "attribs.h" ++#include "calls.h" ++#include "stor-layout.h" ++#include "varasm.h" ++#include "output.h" ++#include "insn-attr.h" ++#include "flags.h" ++#include "except.h" ++#include "explow.h" ++#include "expr.h" ++#include "cfgrtl.h" ++#include "common/common-target.h" ++#include "langhooks.h" ++#include "reload.h" ++#include "gimplify.h" ++#include "dwarf2.h" ++#include "tm-constrs.h" ++#include "params.h" ++#include "cselib.h" ++#include "sched-int.h" ++#include "opts.h" ++#include "tree-pass.h" ++#include "context.h" ++#include "pass_manager.h" ++#include "target-globals.h" ++#include "gimple-iterator.h" ++#include "tree-vectorizer.h" ++#include "shrink-wrap.h" ++#include "builtins.h" ++#include "rtl-iter.h" ++#include "tree-iterator.h" ++#include "dbgcnt.h" ++#include "case-cfn-macros.h" ++#include "dojump.h" ++#include "fold-const-call.h" ++#include "tree-vrp.h" ++#include "tree-ssanames.h" ++#include "selftest.h" ++#include "selftest-rtl.h" ++#include "print-rtl.h" ++#include "intl.h" ++#include "ifcvt.h" ++#include "symbol-summary.h" ++#include "ipa-prop.h" ++#include "ipa-fnsummary.h" ++#include "wide-int-bitmask.h" ++#include "tree-vector-builder.h" ++#include "debug.h" ++#include "dwarf2out.h" ++#include "i386-builtins.h" ++#include "i386-features.h" ++ ++const char * const xlogue_layout::STUB_BASE_NAMES[XLOGUE_STUB_COUNT] = { ++ "savms64", ++ "resms64", ++ "resms64x", ++ "savms64f", ++ "resms64f", ++ "resms64fx" ++}; ++ ++const unsigned xlogue_layout::REG_ORDER[xlogue_layout::MAX_REGS] = { ++/* The below offset values are where each register is stored for the layout ++ relative to incoming stack pointer. The value of each m_regs[].offset will ++ be relative to the incoming base pointer (rax or rsi) used by the stub. ++ ++ s_instances: 0 1 2 3 ++ Offset: realigned or aligned + 8 ++ Register aligned aligned + 8 aligned w/HFP w/HFP */ ++ XMM15_REG, /* 0x10 0x18 0x10 0x18 */ ++ XMM14_REG, /* 0x20 0x28 0x20 0x28 */ ++ XMM13_REG, /* 0x30 0x38 0x30 0x38 */ ++ XMM12_REG, /* 0x40 0x48 0x40 0x48 */ ++ XMM11_REG, /* 0x50 0x58 0x50 0x58 */ ++ XMM10_REG, /* 0x60 0x68 0x60 0x68 */ ++ XMM9_REG, /* 0x70 0x78 0x70 0x78 */ ++ XMM8_REG, /* 0x80 0x88 0x80 0x88 */ ++ XMM7_REG, /* 0x90 0x98 0x90 0x98 */ ++ XMM6_REG, /* 0xa0 0xa8 0xa0 0xa8 */ ++ SI_REG, /* 0xa8 0xb0 0xa8 0xb0 */ ++ DI_REG, /* 0xb0 0xb8 0xb0 0xb8 */ ++ BX_REG, /* 0xb8 0xc0 0xb8 0xc0 */ ++ BP_REG, /* 0xc0 0xc8 N/A N/A */ ++ R12_REG, /* 0xc8 0xd0 0xc0 0xc8 */ ++ R13_REG, /* 0xd0 0xd8 0xc8 0xd0 */ ++ R14_REG, /* 0xd8 0xe0 0xd0 0xd8 */ ++ R15_REG, /* 0xe0 0xe8 0xd8 0xe0 */ ++}; ++ ++/* Instantiate static const values. */ ++const HOST_WIDE_INT xlogue_layout::STUB_INDEX_OFFSET; ++const unsigned xlogue_layout::MIN_REGS; ++const unsigned xlogue_layout::MAX_REGS; ++const unsigned xlogue_layout::MAX_EXTRA_REGS; ++const unsigned xlogue_layout::VARIANT_COUNT; ++const unsigned xlogue_layout::STUB_NAME_MAX_LEN; ++ ++/* Initialize xlogue_layout::s_stub_names to zero. */ ++char xlogue_layout::s_stub_names[2][XLOGUE_STUB_COUNT][VARIANT_COUNT] ++ [STUB_NAME_MAX_LEN]; ++ ++/* Instantiates all xlogue_layout instances. */ ++const xlogue_layout xlogue_layout::s_instances[XLOGUE_SET_COUNT] = { ++ xlogue_layout (0, false), ++ xlogue_layout (8, false), ++ xlogue_layout (0, true), ++ xlogue_layout (8, true) ++}; ++ ++/* Return an appropriate const instance of xlogue_layout based upon values ++ in cfun->machine and crtl. */ ++const struct xlogue_layout & ++xlogue_layout::get_instance () ++{ ++ enum xlogue_stub_sets stub_set; ++ bool aligned_plus_8 = cfun->machine->call_ms2sysv_pad_in; ++ ++ if (stack_realign_fp) ++ stub_set = XLOGUE_SET_HFP_ALIGNED_OR_REALIGN; ++ else if (frame_pointer_needed) ++ stub_set = aligned_plus_8 ++ ? XLOGUE_SET_HFP_ALIGNED_PLUS_8 ++ : XLOGUE_SET_HFP_ALIGNED_OR_REALIGN; ++ else ++ stub_set = aligned_plus_8 ? XLOGUE_SET_ALIGNED_PLUS_8 : XLOGUE_SET_ALIGNED; ++ ++ return s_instances[stub_set]; ++} ++ ++/* Determine how many clobbered registers can be saved by the stub. ++ Returns the count of registers the stub will save and restore. */ ++unsigned ++xlogue_layout::count_stub_managed_regs () ++{ ++ bool hfp = frame_pointer_needed || stack_realign_fp; ++ unsigned i, count; ++ unsigned regno; ++ ++ for (count = i = MIN_REGS; i < MAX_REGS; ++i) ++ { ++ regno = REG_ORDER[i]; ++ if (regno == BP_REG && hfp) ++ continue; ++ if (!ix86_save_reg (regno, false, false)) ++ break; ++ ++count; ++ } ++ return count; ++} ++ ++/* Determine if register REGNO is a stub managed register given the ++ total COUNT of stub managed registers. */ ++bool ++xlogue_layout::is_stub_managed_reg (unsigned regno, unsigned count) ++{ ++ bool hfp = frame_pointer_needed || stack_realign_fp; ++ unsigned i; ++ ++ for (i = 0; i < count; ++i) ++ { ++ gcc_assert (i < MAX_REGS); ++ if (REG_ORDER[i] == BP_REG && hfp) ++ ++count; ++ else if (REG_ORDER[i] == regno) ++ return true; ++ } ++ return false; ++} ++ ++/* Constructor for xlogue_layout. */ ++xlogue_layout::xlogue_layout (HOST_WIDE_INT stack_align_off_in, bool hfp) ++ : m_hfp (hfp) , m_nregs (hfp ? 17 : 18), ++ m_stack_align_off_in (stack_align_off_in) ++{ ++ HOST_WIDE_INT offset = stack_align_off_in; ++ unsigned i, j; ++ ++ for (i = j = 0; i < MAX_REGS; ++i) ++ { ++ unsigned regno = REG_ORDER[i]; ++ ++ if (regno == BP_REG && hfp) ++ continue; ++ if (SSE_REGNO_P (regno)) ++ { ++ offset += 16; ++ /* Verify that SSE regs are always aligned. */ ++ gcc_assert (!((stack_align_off_in + offset) & 15)); ++ } ++ else ++ offset += 8; ++ ++ m_regs[j].regno = regno; ++ m_regs[j++].offset = offset - STUB_INDEX_OFFSET; ++ } ++ gcc_assert (j == m_nregs); ++} ++ ++const char * ++xlogue_layout::get_stub_name (enum xlogue_stub stub, ++ unsigned n_extra_regs) ++{ ++ const int have_avx = TARGET_AVX; ++ char *name = s_stub_names[!!have_avx][stub][n_extra_regs]; ++ ++ /* Lazy init */ ++ if (!*name) ++ { ++ int res = snprintf (name, STUB_NAME_MAX_LEN, "__%s_%s_%u", ++ (have_avx ? "avx" : "sse"), ++ STUB_BASE_NAMES[stub], ++ MIN_REGS + n_extra_regs); ++ gcc_checking_assert (res < (int)STUB_NAME_MAX_LEN); ++ } ++ ++ return name; ++} ++ ++/* Return rtx of a symbol ref for the entry point (based upon ++ cfun->machine->call_ms2sysv_extra_regs) of the specified stub. */ ++rtx ++xlogue_layout::get_stub_rtx (enum xlogue_stub stub) ++{ ++ const unsigned n_extra_regs = cfun->machine->call_ms2sysv_extra_regs; ++ gcc_checking_assert (n_extra_regs <= MAX_EXTRA_REGS); ++ gcc_assert (stub < XLOGUE_STUB_COUNT); ++ gcc_assert (crtl->stack_realign_finalized); ++ ++ return gen_rtx_SYMBOL_REF (Pmode, get_stub_name (stub, n_extra_regs)); ++} ++ ++unsigned scalar_chain::max_id = 0; ++ ++/* Initialize new chain. */ ++ ++scalar_chain::scalar_chain () ++{ ++ chain_id = ++max_id; ++ ++ if (dump_file) ++ fprintf (dump_file, "Created a new instruction chain #%d\n", chain_id); ++ ++ bitmap_obstack_initialize (NULL); ++ insns = BITMAP_ALLOC (NULL); ++ defs = BITMAP_ALLOC (NULL); ++ defs_conv = BITMAP_ALLOC (NULL); ++ queue = NULL; ++} ++ ++/* Free chain's data. */ ++ ++scalar_chain::~scalar_chain () ++{ ++ BITMAP_FREE (insns); ++ BITMAP_FREE (defs); ++ BITMAP_FREE (defs_conv); ++ bitmap_obstack_release (NULL); ++} ++ ++/* Add instruction into chains' queue. */ ++ ++void ++scalar_chain::add_to_queue (unsigned insn_uid) ++{ ++ if (bitmap_bit_p (insns, insn_uid) ++ || bitmap_bit_p (queue, insn_uid)) ++ return; ++ ++ if (dump_file) ++ fprintf (dump_file, " Adding insn %d into chain's #%d queue\n", ++ insn_uid, chain_id); ++ bitmap_set_bit (queue, insn_uid); ++} ++ ++/* For DImode conversion, mark register defined by DEF as requiring ++ conversion. */ ++ ++void ++dimode_scalar_chain::mark_dual_mode_def (df_ref def) ++{ ++ gcc_assert (DF_REF_REG_DEF_P (def)); ++ ++ if (bitmap_bit_p (defs_conv, DF_REF_REGNO (def))) ++ return; ++ ++ if (dump_file) ++ fprintf (dump_file, ++ " Mark r%d def in insn %d as requiring both modes in chain #%d\n", ++ DF_REF_REGNO (def), DF_REF_INSN_UID (def), chain_id); ++ ++ bitmap_set_bit (defs_conv, DF_REF_REGNO (def)); ++} ++ ++/* For TImode conversion, it is unused. */ ++ ++void ++timode_scalar_chain::mark_dual_mode_def (df_ref) ++{ ++ gcc_unreachable (); ++} ++ ++/* Check REF's chain to add new insns into a queue ++ and find registers requiring conversion. */ ++ ++void ++scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref) ++{ ++ df_link *chain; ++ ++ gcc_assert (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)) ++ || bitmap_bit_p (candidates, DF_REF_INSN_UID (ref))); ++ add_to_queue (DF_REF_INSN_UID (ref)); ++ ++ for (chain = DF_REF_CHAIN (ref); chain; chain = chain->next) ++ { ++ unsigned uid = DF_REF_INSN_UID (chain->ref); ++ ++ if (!NONDEBUG_INSN_P (DF_REF_INSN (chain->ref))) ++ continue; ++ ++ if (!DF_REF_REG_MEM_P (chain->ref)) ++ { ++ if (bitmap_bit_p (insns, uid)) ++ continue; ++ ++ if (bitmap_bit_p (candidates, uid)) ++ { ++ add_to_queue (uid); ++ continue; ++ } ++ } ++ ++ if (DF_REF_REG_DEF_P (chain->ref)) ++ { ++ if (dump_file) ++ fprintf (dump_file, " r%d def in insn %d isn't convertible\n", ++ DF_REF_REGNO (chain->ref), uid); ++ mark_dual_mode_def (chain->ref); ++ } ++ else ++ { ++ if (dump_file) ++ fprintf (dump_file, " r%d use in insn %d isn't convertible\n", ++ DF_REF_REGNO (chain->ref), uid); ++ mark_dual_mode_def (ref); ++ } ++ } ++} ++ ++/* Add instruction into a chain. */ ++ ++void ++scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid) ++{ ++ if (bitmap_bit_p (insns, insn_uid)) ++ return; ++ ++ if (dump_file) ++ fprintf (dump_file, " Adding insn %d to chain #%d\n", insn_uid, chain_id); ++ ++ bitmap_set_bit (insns, insn_uid); ++ ++ rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn; ++ rtx def_set = single_set (insn); ++ if (def_set && REG_P (SET_DEST (def_set)) ++ && !HARD_REGISTER_P (SET_DEST (def_set))) ++ bitmap_set_bit (defs, REGNO (SET_DEST (def_set))); ++ ++ df_ref ref; ++ df_ref def; ++ for (ref = DF_INSN_UID_DEFS (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref)) ++ if (!HARD_REGISTER_P (DF_REF_REG (ref))) ++ for (def = DF_REG_DEF_CHAIN (DF_REF_REGNO (ref)); ++ def; ++ def = DF_REF_NEXT_REG (def)) ++ analyze_register_chain (candidates, def); ++ for (ref = DF_INSN_UID_USES (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref)) ++ if (!DF_REF_REG_MEM_P (ref)) ++ analyze_register_chain (candidates, ref); ++} ++ ++/* Build new chain starting from insn INSN_UID recursively ++ adding all dependent uses and definitions. */ ++ ++void ++scalar_chain::build (bitmap candidates, unsigned insn_uid) ++{ ++ queue = BITMAP_ALLOC (NULL); ++ bitmap_set_bit (queue, insn_uid); ++ ++ if (dump_file) ++ fprintf (dump_file, "Building chain #%d...\n", chain_id); ++ ++ while (!bitmap_empty_p (queue)) ++ { ++ insn_uid = bitmap_first_set_bit (queue); ++ bitmap_clear_bit (queue, insn_uid); ++ bitmap_clear_bit (candidates, insn_uid); ++ add_insn (candidates, insn_uid); ++ } ++ ++ if (dump_file) ++ { ++ fprintf (dump_file, "Collected chain #%d...\n", chain_id); ++ fprintf (dump_file, " insns: "); ++ dump_bitmap (dump_file, insns); ++ if (!bitmap_empty_p (defs_conv)) ++ { ++ bitmap_iterator bi; ++ unsigned id; ++ const char *comma = ""; ++ fprintf (dump_file, " defs to convert: "); ++ EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi) ++ { ++ fprintf (dump_file, "%sr%d", comma, id); ++ comma = ", "; ++ } ++ fprintf (dump_file, "\n"); ++ } ++ } ++ ++ BITMAP_FREE (queue); ++} ++ ++/* Return a cost of building a vector costant ++ instead of using a scalar one. */ ++ ++int ++dimode_scalar_chain::vector_const_cost (rtx exp) ++{ ++ gcc_assert (CONST_INT_P (exp)); ++ ++ if (standard_sse_constant_p (exp, V2DImode)) ++ return COSTS_N_INSNS (1); ++ return ix86_cost->sse_load[1]; ++} ++ ++/* Compute a gain for chain conversion. */ ++ ++int ++dimode_scalar_chain::compute_convert_gain () ++{ ++ bitmap_iterator bi; ++ unsigned insn_uid; ++ int gain = 0; ++ int cost = 0; ++ ++ if (dump_file) ++ fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id); ++ ++ EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi) ++ { ++ rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn; ++ rtx def_set = single_set (insn); ++ rtx src = SET_SRC (def_set); ++ rtx dst = SET_DEST (def_set); ++ ++ if (REG_P (src) && REG_P (dst)) ++ gain += COSTS_N_INSNS (2) - ix86_cost->xmm_move; ++ else if (REG_P (src) && MEM_P (dst)) ++ gain += 2 * ix86_cost->int_store[2] - ix86_cost->sse_store[1]; ++ else if (MEM_P (src) && REG_P (dst)) ++ gain += 2 * ix86_cost->int_load[2] - ix86_cost->sse_load[1]; ++ else if (GET_CODE (src) == ASHIFT ++ || GET_CODE (src) == ASHIFTRT ++ || GET_CODE (src) == LSHIFTRT) ++ { ++ if (CONST_INT_P (XEXP (src, 0))) ++ gain -= vector_const_cost (XEXP (src, 0)); ++ gain += ix86_cost->shift_const; ++ if (INTVAL (XEXP (src, 1)) >= 32) ++ gain -= COSTS_N_INSNS (1); ++ } ++ else if (GET_CODE (src) == PLUS ++ || GET_CODE (src) == MINUS ++ || GET_CODE (src) == IOR ++ || GET_CODE (src) == XOR ++ || GET_CODE (src) == AND) ++ { ++ gain += ix86_cost->add; ++ /* Additional gain for andnot for targets without BMI. */ ++ if (GET_CODE (XEXP (src, 0)) == NOT ++ && !TARGET_BMI) ++ gain += 2 * ix86_cost->add; ++ ++ if (CONST_INT_P (XEXP (src, 0))) ++ gain -= vector_const_cost (XEXP (src, 0)); ++ if (CONST_INT_P (XEXP (src, 1))) ++ gain -= vector_const_cost (XEXP (src, 1)); ++ } ++ else if (GET_CODE (src) == NEG ++ || GET_CODE (src) == NOT) ++ gain += ix86_cost->add - COSTS_N_INSNS (1); ++ else if (GET_CODE (src) == COMPARE) ++ { ++ /* Assume comparison cost is the same. */ ++ } ++ else if (CONST_INT_P (src)) ++ { ++ if (REG_P (dst)) ++ gain += COSTS_N_INSNS (2); ++ else if (MEM_P (dst)) ++ gain += 2 * ix86_cost->int_store[2] - ix86_cost->sse_store[1]; ++ gain -= vector_const_cost (src); ++ } ++ else ++ gcc_unreachable (); ++ } ++ ++ if (dump_file) ++ fprintf (dump_file, " Instruction conversion gain: %d\n", gain); ++ ++ EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, insn_uid, bi) ++ cost += DF_REG_DEF_COUNT (insn_uid) * ix86_cost->mmxsse_to_integer; ++ ++ if (dump_file) ++ fprintf (dump_file, " Registers conversion cost: %d\n", cost); ++ ++ gain -= cost; ++ ++ if (dump_file) ++ fprintf (dump_file, " Total gain: %d\n", gain); ++ ++ return gain; ++} ++ ++/* Replace REG in X with a V2DI subreg of NEW_REG. */ ++ ++rtx ++dimode_scalar_chain::replace_with_subreg (rtx x, rtx reg, rtx new_reg) ++{ ++ if (x == reg) ++ return gen_rtx_SUBREG (V2DImode, new_reg, 0); ++ ++ const char *fmt = GET_RTX_FORMAT (GET_CODE (x)); ++ int i, j; ++ for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) ++ { ++ if (fmt[i] == 'e') ++ XEXP (x, i) = replace_with_subreg (XEXP (x, i), reg, new_reg); ++ else if (fmt[i] == 'E') ++ for (j = XVECLEN (x, i) - 1; j >= 0; j--) ++ XVECEXP (x, i, j) = replace_with_subreg (XVECEXP (x, i, j), ++ reg, new_reg); ++ } ++ ++ return x; ++} ++ ++/* Replace REG in INSN with a V2DI subreg of NEW_REG. */ ++ ++void ++dimode_scalar_chain::replace_with_subreg_in_insn (rtx_insn *insn, ++ rtx reg, rtx new_reg) ++{ ++ replace_with_subreg (single_set (insn), reg, new_reg); ++} ++ ++/* Insert generated conversion instruction sequence INSNS ++ after instruction AFTER. New BB may be required in case ++ instruction has EH region attached. */ ++ ++void ++scalar_chain::emit_conversion_insns (rtx insns, rtx_insn *after) ++{ ++ if (!control_flow_insn_p (after)) ++ { ++ emit_insn_after (insns, after); ++ return; ++ } ++ ++ basic_block bb = BLOCK_FOR_INSN (after); ++ edge e = find_fallthru_edge (bb->succs); ++ gcc_assert (e); ++ ++ basic_block new_bb = split_edge (e); ++ emit_insn_after (insns, BB_HEAD (new_bb)); ++} ++ ++/* Make vector copies for all register REGNO definitions ++ and replace its uses in a chain. */ ++ ++void ++dimode_scalar_chain::make_vector_copies (unsigned regno) ++{ ++ rtx reg = regno_reg_rtx[regno]; ++ rtx vreg = gen_reg_rtx (DImode); ++ df_ref ref; ++ ++ for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref)) ++ if (!bitmap_bit_p (insns, DF_REF_INSN_UID (ref))) ++ { ++ start_sequence (); ++ if (!TARGET_INTER_UNIT_MOVES_TO_VEC) ++ { ++ rtx tmp = assign_386_stack_local (DImode, SLOT_STV_TEMP); ++ emit_move_insn (adjust_address (tmp, SImode, 0), ++ gen_rtx_SUBREG (SImode, reg, 0)); ++ emit_move_insn (adjust_address (tmp, SImode, 4), ++ gen_rtx_SUBREG (SImode, reg, 4)); ++ emit_move_insn (vreg, tmp); ++ } ++ else if (TARGET_SSE4_1) ++ { ++ emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0), ++ CONST0_RTX (V4SImode), ++ gen_rtx_SUBREG (SImode, reg, 0))); ++ emit_insn (gen_sse4_1_pinsrd (gen_rtx_SUBREG (V4SImode, vreg, 0), ++ gen_rtx_SUBREG (V4SImode, vreg, 0), ++ gen_rtx_SUBREG (SImode, reg, 4), ++ GEN_INT (2))); ++ } ++ else ++ { ++ rtx tmp = gen_reg_rtx (DImode); ++ emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0), ++ CONST0_RTX (V4SImode), ++ gen_rtx_SUBREG (SImode, reg, 0))); ++ emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, tmp, 0), ++ CONST0_RTX (V4SImode), ++ gen_rtx_SUBREG (SImode, reg, 4))); ++ emit_insn (gen_vec_interleave_lowv4si ++ (gen_rtx_SUBREG (V4SImode, vreg, 0), ++ gen_rtx_SUBREG (V4SImode, vreg, 0), ++ gen_rtx_SUBREG (V4SImode, tmp, 0))); ++ } ++ rtx_insn *seq = get_insns (); ++ end_sequence (); ++ rtx_insn *insn = DF_REF_INSN (ref); ++ emit_conversion_insns (seq, insn); ++ ++ if (dump_file) ++ fprintf (dump_file, ++ " Copied r%d to a vector register r%d for insn %d\n", ++ regno, REGNO (vreg), INSN_UID (insn)); ++ } ++ ++ for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref)) ++ if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref))) ++ { ++ rtx_insn *insn = DF_REF_INSN (ref); ++ replace_with_subreg_in_insn (insn, reg, vreg); ++ ++ if (dump_file) ++ fprintf (dump_file, " Replaced r%d with r%d in insn %d\n", ++ regno, REGNO (vreg), INSN_UID (insn)); ++ } ++} ++ ++/* Convert all definitions of register REGNO ++ and fix its uses. Scalar copies may be created ++ in case register is used in not convertible insn. */ ++ ++void ++dimode_scalar_chain::convert_reg (unsigned regno) ++{ ++ bool scalar_copy = bitmap_bit_p (defs_conv, regno); ++ rtx reg = regno_reg_rtx[regno]; ++ rtx scopy = NULL_RTX; ++ df_ref ref; ++ bitmap conv; ++ ++ conv = BITMAP_ALLOC (NULL); ++ bitmap_copy (conv, insns); ++ ++ if (scalar_copy) ++ scopy = gen_reg_rtx (DImode); ++ ++ for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref)) ++ { ++ rtx_insn *insn = DF_REF_INSN (ref); ++ rtx def_set = single_set (insn); ++ rtx src = SET_SRC (def_set); ++ rtx reg = DF_REF_REG (ref); ++ ++ if (!MEM_P (src)) ++ { ++ replace_with_subreg_in_insn (insn, reg, reg); ++ bitmap_clear_bit (conv, INSN_UID (insn)); ++ } ++ ++ if (scalar_copy) ++ { ++ start_sequence (); ++ if (!TARGET_INTER_UNIT_MOVES_FROM_VEC) ++ { ++ rtx tmp = assign_386_stack_local (DImode, SLOT_STV_TEMP); ++ emit_move_insn (tmp, reg); ++ emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0), ++ adjust_address (tmp, SImode, 0)); ++ emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4), ++ adjust_address (tmp, SImode, 4)); ++ } ++ else if (TARGET_SSE4_1) ++ { ++ rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx)); ++ emit_insn ++ (gen_rtx_SET ++ (gen_rtx_SUBREG (SImode, scopy, 0), ++ gen_rtx_VEC_SELECT (SImode, ++ gen_rtx_SUBREG (V4SImode, reg, 0), tmp))); ++ ++ tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const1_rtx)); ++ emit_insn ++ (gen_rtx_SET ++ (gen_rtx_SUBREG (SImode, scopy, 4), ++ gen_rtx_VEC_SELECT (SImode, ++ gen_rtx_SUBREG (V4SImode, reg, 0), tmp))); ++ } ++ else ++ { ++ rtx vcopy = gen_reg_rtx (V2DImode); ++ emit_move_insn (vcopy, gen_rtx_SUBREG (V2DImode, reg, 0)); ++ emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0), ++ gen_rtx_SUBREG (SImode, vcopy, 0)); ++ emit_move_insn (vcopy, ++ gen_rtx_LSHIFTRT (V2DImode, vcopy, GEN_INT (32))); ++ emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4), ++ gen_rtx_SUBREG (SImode, vcopy, 0)); ++ } ++ rtx_insn *seq = get_insns (); ++ end_sequence (); ++ emit_conversion_insns (seq, insn); ++ ++ if (dump_file) ++ fprintf (dump_file, ++ " Copied r%d to a scalar register r%d for insn %d\n", ++ regno, REGNO (scopy), INSN_UID (insn)); ++ } ++ } ++ ++ for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref)) ++ if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref))) ++ { ++ if (bitmap_bit_p (conv, DF_REF_INSN_UID (ref))) ++ { ++ rtx_insn *insn = DF_REF_INSN (ref); ++ ++ rtx def_set = single_set (insn); ++ gcc_assert (def_set); ++ ++ rtx src = SET_SRC (def_set); ++ rtx dst = SET_DEST (def_set); ++ ++ if (!MEM_P (dst) || !REG_P (src)) ++ replace_with_subreg_in_insn (insn, reg, reg); ++ ++ bitmap_clear_bit (conv, INSN_UID (insn)); ++ } ++ } ++ /* Skip debug insns and uninitialized uses. */ ++ else if (DF_REF_CHAIN (ref) ++ && NONDEBUG_INSN_P (DF_REF_INSN (ref))) ++ { ++ gcc_assert (scopy); ++ replace_rtx (DF_REF_INSN (ref), reg, scopy); ++ df_insn_rescan (DF_REF_INSN (ref)); ++ } ++ ++ BITMAP_FREE (conv); ++} ++ ++/* Convert operand OP in INSN. We should handle ++ memory operands and uninitialized registers. ++ All other register uses are converted during ++ registers conversion. */ ++ ++void ++dimode_scalar_chain::convert_op (rtx *op, rtx_insn *insn) ++{ ++ *op = copy_rtx_if_shared (*op); ++ ++ if (GET_CODE (*op) == NOT) ++ { ++ convert_op (&XEXP (*op, 0), insn); ++ PUT_MODE (*op, V2DImode); ++ } ++ else if (MEM_P (*op)) ++ { ++ rtx tmp = gen_reg_rtx (DImode); ++ ++ emit_insn_before (gen_move_insn (tmp, *op), insn); ++ *op = gen_rtx_SUBREG (V2DImode, tmp, 0); ++ ++ if (dump_file) ++ fprintf (dump_file, " Preloading operand for insn %d into r%d\n", ++ INSN_UID (insn), REGNO (tmp)); ++ } ++ else if (REG_P (*op)) ++ { ++ /* We may have not converted register usage in case ++ this register has no definition. Otherwise it ++ should be converted in convert_reg. */ ++ df_ref ref; ++ FOR_EACH_INSN_USE (ref, insn) ++ if (DF_REF_REGNO (ref) == REGNO (*op)) ++ { ++ gcc_assert (!DF_REF_CHAIN (ref)); ++ break; ++ } ++ *op = gen_rtx_SUBREG (V2DImode, *op, 0); ++ } ++ else if (CONST_INT_P (*op)) ++ { ++ rtx vec_cst; ++ rtx tmp = gen_rtx_SUBREG (V2DImode, gen_reg_rtx (DImode), 0); ++ ++ /* Prefer all ones vector in case of -1. */ ++ if (constm1_operand (*op, GET_MODE (*op))) ++ vec_cst = CONSTM1_RTX (V2DImode); ++ else ++ vec_cst = gen_rtx_CONST_VECTOR (V2DImode, ++ gen_rtvec (2, *op, const0_rtx)); ++ ++ if (!standard_sse_constant_p (vec_cst, V2DImode)) ++ { ++ start_sequence (); ++ vec_cst = validize_mem (force_const_mem (V2DImode, vec_cst)); ++ rtx_insn *seq = get_insns (); ++ end_sequence (); ++ emit_insn_before (seq, insn); ++ } ++ ++ emit_insn_before (gen_move_insn (copy_rtx (tmp), vec_cst), insn); ++ *op = tmp; ++ } ++ else ++ { ++ gcc_assert (SUBREG_P (*op)); ++ gcc_assert (GET_MODE (*op) == V2DImode); ++ } ++} ++ ++/* Convert INSN to vector mode. */ ++ ++void ++dimode_scalar_chain::convert_insn (rtx_insn *insn) ++{ ++ rtx def_set = single_set (insn); ++ rtx src = SET_SRC (def_set); ++ rtx dst = SET_DEST (def_set); ++ rtx subreg; ++ ++ if (MEM_P (dst) && !REG_P (src)) ++ { ++ /* There are no scalar integer instructions and therefore ++ temporary register usage is required. */ ++ rtx tmp = gen_reg_rtx (DImode); ++ emit_conversion_insns (gen_move_insn (dst, tmp), insn); ++ dst = gen_rtx_SUBREG (V2DImode, tmp, 0); ++ } ++ ++ switch (GET_CODE (src)) ++ { ++ case ASHIFT: ++ case ASHIFTRT: ++ case LSHIFTRT: ++ convert_op (&XEXP (src, 0), insn); ++ PUT_MODE (src, V2DImode); ++ break; ++ ++ case PLUS: ++ case MINUS: ++ case IOR: ++ case XOR: ++ case AND: ++ convert_op (&XEXP (src, 0), insn); ++ convert_op (&XEXP (src, 1), insn); ++ PUT_MODE (src, V2DImode); ++ break; ++ ++ case NEG: ++ src = XEXP (src, 0); ++ convert_op (&src, insn); ++ subreg = gen_reg_rtx (V2DImode); ++ emit_insn_before (gen_move_insn (subreg, CONST0_RTX (V2DImode)), insn); ++ src = gen_rtx_MINUS (V2DImode, subreg, src); ++ break; ++ ++ case NOT: ++ src = XEXP (src, 0); ++ convert_op (&src, insn); ++ subreg = gen_reg_rtx (V2DImode); ++ emit_insn_before (gen_move_insn (subreg, CONSTM1_RTX (V2DImode)), insn); ++ src = gen_rtx_XOR (V2DImode, src, subreg); ++ break; ++ ++ case MEM: ++ if (!REG_P (dst)) ++ convert_op (&src, insn); ++ break; ++ ++ case REG: ++ if (!MEM_P (dst)) ++ convert_op (&src, insn); ++ break; ++ ++ case SUBREG: ++ gcc_assert (GET_MODE (src) == V2DImode); ++ break; ++ ++ case COMPARE: ++ src = SUBREG_REG (XEXP (XEXP (src, 0), 0)); ++ ++ gcc_assert ((REG_P (src) && GET_MODE (src) == DImode) ++ || (SUBREG_P (src) && GET_MODE (src) == V2DImode)); ++ ++ if (REG_P (src)) ++ subreg = gen_rtx_SUBREG (V2DImode, src, 0); ++ else ++ subreg = copy_rtx_if_shared (src); ++ emit_insn_before (gen_vec_interleave_lowv2di (copy_rtx_if_shared (subreg), ++ copy_rtx_if_shared (subreg), ++ copy_rtx_if_shared (subreg)), ++ insn); ++ dst = gen_rtx_REG (CCmode, FLAGS_REG); ++ src = gen_rtx_UNSPEC (CCmode, gen_rtvec (2, copy_rtx_if_shared (src), ++ copy_rtx_if_shared (src)), ++ UNSPEC_PTEST); ++ break; ++ ++ case CONST_INT: ++ convert_op (&src, insn); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ SET_SRC (def_set) = src; ++ SET_DEST (def_set) = dst; ++ ++ /* Drop possible dead definitions. */ ++ PATTERN (insn) = def_set; ++ ++ INSN_CODE (insn) = -1; ++ recog_memoized (insn); ++ df_insn_rescan (insn); ++} ++ ++/* Fix uses of converted REG in debug insns. */ ++ ++void ++timode_scalar_chain::fix_debug_reg_uses (rtx reg) ++{ ++ if (!flag_var_tracking) ++ return; ++ ++ df_ref ref, next; ++ for (ref = DF_REG_USE_CHAIN (REGNO (reg)); ref; ref = next) ++ { ++ rtx_insn *insn = DF_REF_INSN (ref); ++ /* Make sure the next ref is for a different instruction, ++ so that we're not affected by the rescan. */ ++ next = DF_REF_NEXT_REG (ref); ++ while (next && DF_REF_INSN (next) == insn) ++ next = DF_REF_NEXT_REG (next); ++ ++ if (DEBUG_INSN_P (insn)) ++ { ++ /* It may be a debug insn with a TImode variable in ++ register. */ ++ bool changed = false; ++ for (; ref != next; ref = DF_REF_NEXT_REG (ref)) ++ { ++ rtx *loc = DF_REF_LOC (ref); ++ if (REG_P (*loc) && GET_MODE (*loc) == V1TImode) ++ { ++ *loc = gen_rtx_SUBREG (TImode, *loc, 0); ++ changed = true; ++ } ++ } ++ if (changed) ++ df_insn_rescan (insn); ++ } ++ } ++} ++ ++/* Convert INSN from TImode to V1T1mode. */ ++ ++void ++timode_scalar_chain::convert_insn (rtx_insn *insn) ++{ ++ rtx def_set = single_set (insn); ++ rtx src = SET_SRC (def_set); ++ rtx dst = SET_DEST (def_set); ++ ++ switch (GET_CODE (dst)) ++ { ++ case REG: ++ { ++ rtx tmp = find_reg_equal_equiv_note (insn); ++ if (tmp) ++ PUT_MODE (XEXP (tmp, 0), V1TImode); ++ PUT_MODE (dst, V1TImode); ++ fix_debug_reg_uses (dst); ++ } ++ break; ++ case MEM: ++ PUT_MODE (dst, V1TImode); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ switch (GET_CODE (src)) ++ { ++ case REG: ++ PUT_MODE (src, V1TImode); ++ /* Call fix_debug_reg_uses only if SRC is never defined. */ ++ if (!DF_REG_DEF_CHAIN (REGNO (src))) ++ fix_debug_reg_uses (src); ++ break; ++ ++ case MEM: ++ PUT_MODE (src, V1TImode); ++ break; ++ ++ case CONST_WIDE_INT: ++ if (NONDEBUG_INSN_P (insn)) ++ { ++ /* Since there are no instructions to store 128-bit constant, ++ temporary register usage is required. */ ++ rtx tmp = gen_reg_rtx (V1TImode); ++ start_sequence (); ++ src = gen_rtx_CONST_VECTOR (V1TImode, gen_rtvec (1, src)); ++ src = validize_mem (force_const_mem (V1TImode, src)); ++ rtx_insn *seq = get_insns (); ++ end_sequence (); ++ if (seq) ++ emit_insn_before (seq, insn); ++ emit_conversion_insns (gen_rtx_SET (dst, tmp), insn); ++ dst = tmp; ++ } ++ break; ++ ++ case CONST_INT: ++ switch (standard_sse_constant_p (src, TImode)) ++ { ++ case 1: ++ src = CONST0_RTX (GET_MODE (dst)); ++ break; ++ case 2: ++ src = CONSTM1_RTX (GET_MODE (dst)); ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ if (NONDEBUG_INSN_P (insn)) ++ { ++ rtx tmp = gen_reg_rtx (V1TImode); ++ /* Since there are no instructions to store standard SSE ++ constant, temporary register usage is required. */ ++ emit_conversion_insns (gen_rtx_SET (dst, tmp), insn); ++ dst = tmp; ++ } ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ SET_SRC (def_set) = src; ++ SET_DEST (def_set) = dst; ++ ++ /* Drop possible dead definitions. */ ++ PATTERN (insn) = def_set; ++ ++ INSN_CODE (insn) = -1; ++ recog_memoized (insn); ++ df_insn_rescan (insn); ++} ++ ++void ++dimode_scalar_chain::convert_registers () ++{ ++ bitmap_iterator bi; ++ unsigned id; ++ ++ EXECUTE_IF_SET_IN_BITMAP (defs, 0, id, bi) ++ convert_reg (id); ++ ++ EXECUTE_IF_AND_COMPL_IN_BITMAP (defs_conv, defs, 0, id, bi) ++ make_vector_copies (id); ++} ++ ++/* Convert whole chain creating required register ++ conversions and copies. */ ++ ++int ++scalar_chain::convert () ++{ ++ bitmap_iterator bi; ++ unsigned id; ++ int converted_insns = 0; ++ ++ if (!dbg_cnt (stv_conversion)) ++ return 0; ++ ++ if (dump_file) ++ fprintf (dump_file, "Converting chain #%d...\n", chain_id); ++ ++ convert_registers (); ++ ++ EXECUTE_IF_SET_IN_BITMAP (insns, 0, id, bi) ++ { ++ convert_insn (DF_INSN_UID_GET (id)->insn); ++ converted_insns++; ++ } ++ ++ return converted_insns; ++} ++ ++/* Return 1 if INSN uses or defines a hard register. ++ Hard register uses in a memory address are ignored. ++ Clobbers and flags definitions are ignored. */ ++ ++static bool ++has_non_address_hard_reg (rtx_insn *insn) ++{ ++ df_ref ref; ++ FOR_EACH_INSN_DEF (ref, insn) ++ if (HARD_REGISTER_P (DF_REF_REAL_REG (ref)) ++ && !DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER) ++ && DF_REF_REGNO (ref) != FLAGS_REG) ++ return true; ++ ++ FOR_EACH_INSN_USE (ref, insn) ++ if (!DF_REF_REG_MEM_P (ref) && HARD_REGISTER_P (DF_REF_REAL_REG (ref))) ++ return true; ++ ++ return false; ++} ++ ++/* Check if comparison INSN may be transformed ++ into vector comparison. Currently we transform ++ zero checks only which look like: ++ ++ (set (reg:CCZ 17 flags) ++ (compare:CCZ (ior:SI (subreg:SI (reg:DI x) 4) ++ (subreg:SI (reg:DI x) 0)) ++ (const_int 0 [0]))) */ ++ ++static bool ++convertible_comparison_p (rtx_insn *insn) ++{ ++ if (!TARGET_SSE4_1) ++ return false; ++ ++ rtx def_set = single_set (insn); ++ ++ gcc_assert (def_set); ++ ++ rtx src = SET_SRC (def_set); ++ rtx dst = SET_DEST (def_set); ++ ++ gcc_assert (GET_CODE (src) == COMPARE); ++ ++ if (GET_CODE (dst) != REG ++ || REGNO (dst) != FLAGS_REG ++ || GET_MODE (dst) != CCZmode) ++ return false; ++ ++ rtx op1 = XEXP (src, 0); ++ rtx op2 = XEXP (src, 1); ++ ++ if (op2 != CONST0_RTX (GET_MODE (op2))) ++ return false; ++ ++ if (GET_CODE (op1) != IOR) ++ return false; ++ ++ op2 = XEXP (op1, 1); ++ op1 = XEXP (op1, 0); ++ ++ if (!SUBREG_P (op1) ++ || !SUBREG_P (op2) ++ || GET_MODE (op1) != SImode ++ || GET_MODE (op2) != SImode ++ || ((SUBREG_BYTE (op1) != 0 ++ || SUBREG_BYTE (op2) != GET_MODE_SIZE (SImode)) ++ && (SUBREG_BYTE (op2) != 0 ++ || SUBREG_BYTE (op1) != GET_MODE_SIZE (SImode)))) ++ return false; ++ ++ op1 = SUBREG_REG (op1); ++ op2 = SUBREG_REG (op2); ++ ++ if (op1 != op2 ++ || !REG_P (op1) ++ || GET_MODE (op1) != DImode) ++ return false; ++ ++ return true; ++} ++ ++/* The DImode version of scalar_to_vector_candidate_p. */ ++ ++static bool ++dimode_scalar_to_vector_candidate_p (rtx_insn *insn) ++{ ++ rtx def_set = single_set (insn); ++ ++ if (!def_set) ++ return false; ++ ++ if (has_non_address_hard_reg (insn)) ++ return false; ++ ++ rtx src = SET_SRC (def_set); ++ rtx dst = SET_DEST (def_set); ++ ++ if (GET_CODE (src) == COMPARE) ++ return convertible_comparison_p (insn); ++ ++ /* We are interested in DImode promotion only. */ ++ if ((GET_MODE (src) != DImode ++ && !CONST_INT_P (src)) ++ || GET_MODE (dst) != DImode) ++ return false; ++ ++ if (!REG_P (dst) && !MEM_P (dst)) ++ return false; ++ ++ switch (GET_CODE (src)) ++ { ++ case ASHIFTRT: ++ if (!TARGET_AVX512VL) ++ return false; ++ /* FALLTHRU */ ++ ++ case ASHIFT: ++ case LSHIFTRT: ++ if (!CONST_INT_P (XEXP (src, 1)) ++ || !IN_RANGE (INTVAL (XEXP (src, 1)), 0, 63)) ++ return false; ++ break; ++ ++ case PLUS: ++ case MINUS: ++ case IOR: ++ case XOR: ++ case AND: ++ if (!REG_P (XEXP (src, 1)) ++ && !MEM_P (XEXP (src, 1)) ++ && !CONST_INT_P (XEXP (src, 1))) ++ return false; ++ ++ if (GET_MODE (XEXP (src, 1)) != DImode ++ && !CONST_INT_P (XEXP (src, 1))) ++ return false; ++ break; ++ ++ case NEG: ++ case NOT: ++ break; ++ ++ case REG: ++ return true; ++ ++ case MEM: ++ case CONST_INT: ++ return REG_P (dst); ++ ++ default: ++ return false; ++ } ++ ++ if (!REG_P (XEXP (src, 0)) ++ && !MEM_P (XEXP (src, 0)) ++ && !CONST_INT_P (XEXP (src, 0)) ++ /* Check for andnot case. */ ++ && (GET_CODE (src) != AND ++ || GET_CODE (XEXP (src, 0)) != NOT ++ || !REG_P (XEXP (XEXP (src, 0), 0)))) ++ return false; ++ ++ if (GET_MODE (XEXP (src, 0)) != DImode ++ && !CONST_INT_P (XEXP (src, 0))) ++ return false; ++ ++ return true; ++} ++ ++/* The TImode version of scalar_to_vector_candidate_p. */ ++ ++static bool ++timode_scalar_to_vector_candidate_p (rtx_insn *insn) ++{ ++ rtx def_set = single_set (insn); ++ ++ if (!def_set) ++ return false; ++ ++ if (has_non_address_hard_reg (insn)) ++ return false; ++ ++ rtx src = SET_SRC (def_set); ++ rtx dst = SET_DEST (def_set); ++ ++ /* Only TImode load and store are allowed. */ ++ if (GET_MODE (dst) != TImode) ++ return false; ++ ++ if (MEM_P (dst)) ++ { ++ /* Check for store. Memory must be aligned or unaligned store ++ is optimal. Only support store from register, standard SSE ++ constant or CONST_WIDE_INT generated from piecewise store. ++ ++ ??? Verify performance impact before enabling CONST_INT for ++ __int128 store. */ ++ if (misaligned_operand (dst, TImode) ++ && !TARGET_SSE_UNALIGNED_STORE_OPTIMAL) ++ return false; ++ ++ switch (GET_CODE (src)) ++ { ++ default: ++ return false; ++ ++ case REG: ++ case CONST_WIDE_INT: ++ return true; ++ ++ case CONST_INT: ++ return standard_sse_constant_p (src, TImode); ++ } ++ } ++ else if (MEM_P (src)) ++ { ++ /* Check for load. Memory must be aligned or unaligned load is ++ optimal. */ ++ return (REG_P (dst) ++ && (!misaligned_operand (src, TImode) ++ || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)); ++ } ++ ++ return false; ++} ++ ++/* Return 1 if INSN may be converted into vector ++ instruction. */ ++ ++static bool ++scalar_to_vector_candidate_p (rtx_insn *insn) ++{ ++ if (TARGET_64BIT) ++ return timode_scalar_to_vector_candidate_p (insn); ++ else ++ return dimode_scalar_to_vector_candidate_p (insn); ++} ++ ++/* The DImode version of remove_non_convertible_regs. */ ++ ++static void ++dimode_remove_non_convertible_regs (bitmap candidates) ++{ ++ bitmap_iterator bi; ++ unsigned id; ++ bitmap regs = BITMAP_ALLOC (NULL); ++ ++ EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi) ++ { ++ rtx def_set = single_set (DF_INSN_UID_GET (id)->insn); ++ rtx reg = SET_DEST (def_set); ++ ++ if (!REG_P (reg) ++ || bitmap_bit_p (regs, REGNO (reg)) ++ || HARD_REGISTER_P (reg)) ++ continue; ++ ++ for (df_ref def = DF_REG_DEF_CHAIN (REGNO (reg)); ++ def; ++ def = DF_REF_NEXT_REG (def)) ++ { ++ if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def))) ++ { ++ if (dump_file) ++ fprintf (dump_file, ++ "r%d has non convertible definition in insn %d\n", ++ REGNO (reg), DF_REF_INSN_UID (def)); ++ ++ bitmap_set_bit (regs, REGNO (reg)); ++ break; ++ } ++ } ++ } ++ ++ EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi) ++ { ++ for (df_ref def = DF_REG_DEF_CHAIN (id); ++ def; ++ def = DF_REF_NEXT_REG (def)) ++ if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def))) ++ { ++ if (dump_file) ++ fprintf (dump_file, "Removing insn %d from candidates list\n", ++ DF_REF_INSN_UID (def)); ++ ++ bitmap_clear_bit (candidates, DF_REF_INSN_UID (def)); ++ } ++ } ++ ++ BITMAP_FREE (regs); ++} ++ ++/* For a register REGNO, scan instructions for its defs and uses. ++ Put REGNO in REGS if a def or use isn't in CANDIDATES. */ ++ ++static void ++timode_check_non_convertible_regs (bitmap candidates, bitmap regs, ++ unsigned int regno) ++{ ++ for (df_ref def = DF_REG_DEF_CHAIN (regno); ++ def; ++ def = DF_REF_NEXT_REG (def)) ++ { ++ if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def))) ++ { ++ if (dump_file) ++ fprintf (dump_file, ++ "r%d has non convertible def in insn %d\n", ++ regno, DF_REF_INSN_UID (def)); ++ ++ bitmap_set_bit (regs, regno); ++ break; ++ } ++ } ++ ++ for (df_ref ref = DF_REG_USE_CHAIN (regno); ++ ref; ++ ref = DF_REF_NEXT_REG (ref)) ++ { ++ /* Debug instructions are skipped. */ ++ if (NONDEBUG_INSN_P (DF_REF_INSN (ref)) ++ && !bitmap_bit_p (candidates, DF_REF_INSN_UID (ref))) ++ { ++ if (dump_file) ++ fprintf (dump_file, ++ "r%d has non convertible use in insn %d\n", ++ regno, DF_REF_INSN_UID (ref)); ++ ++ bitmap_set_bit (regs, regno); ++ break; ++ } ++ } ++} ++ ++/* The TImode version of remove_non_convertible_regs. */ ++ ++static void ++timode_remove_non_convertible_regs (bitmap candidates) ++{ ++ bitmap_iterator bi; ++ unsigned id; ++ bitmap regs = BITMAP_ALLOC (NULL); ++ ++ EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi) ++ { ++ rtx def_set = single_set (DF_INSN_UID_GET (id)->insn); ++ rtx dest = SET_DEST (def_set); ++ rtx src = SET_SRC (def_set); ++ ++ if ((!REG_P (dest) ++ || bitmap_bit_p (regs, REGNO (dest)) ++ || HARD_REGISTER_P (dest)) ++ && (!REG_P (src) ++ || bitmap_bit_p (regs, REGNO (src)) ++ || HARD_REGISTER_P (src))) ++ continue; ++ ++ if (REG_P (dest)) ++ timode_check_non_convertible_regs (candidates, regs, ++ REGNO (dest)); ++ ++ if (REG_P (src)) ++ timode_check_non_convertible_regs (candidates, regs, ++ REGNO (src)); ++ } ++ ++ EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi) ++ { ++ for (df_ref def = DF_REG_DEF_CHAIN (id); ++ def; ++ def = DF_REF_NEXT_REG (def)) ++ if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def))) ++ { ++ if (dump_file) ++ fprintf (dump_file, "Removing insn %d from candidates list\n", ++ DF_REF_INSN_UID (def)); ++ ++ bitmap_clear_bit (candidates, DF_REF_INSN_UID (def)); ++ } ++ ++ for (df_ref ref = DF_REG_USE_CHAIN (id); ++ ref; ++ ref = DF_REF_NEXT_REG (ref)) ++ if (bitmap_bit_p (candidates, DF_REF_INSN_UID (ref))) ++ { ++ if (dump_file) ++ fprintf (dump_file, "Removing insn %d from candidates list\n", ++ DF_REF_INSN_UID (ref)); ++ ++ bitmap_clear_bit (candidates, DF_REF_INSN_UID (ref)); ++ } ++ } ++ ++ BITMAP_FREE (regs); ++} ++ ++/* For a given bitmap of insn UIDs scans all instruction and ++ remove insn from CANDIDATES in case it has both convertible ++ and not convertible definitions. ++ ++ All insns in a bitmap are conversion candidates according to ++ scalar_to_vector_candidate_p. Currently it implies all insns ++ are single_set. */ ++ ++static void ++remove_non_convertible_regs (bitmap candidates) ++{ ++ if (TARGET_64BIT) ++ timode_remove_non_convertible_regs (candidates); ++ else ++ dimode_remove_non_convertible_regs (candidates); ++} ++ ++/* Main STV pass function. Find and convert scalar ++ instructions into vector mode when profitable. */ ++ ++static unsigned int ++convert_scalars_to_vector () ++{ ++ basic_block bb; ++ bitmap candidates; ++ int converted_insns = 0; ++ ++ bitmap_obstack_initialize (NULL); ++ candidates = BITMAP_ALLOC (NULL); ++ ++ calculate_dominance_info (CDI_DOMINATORS); ++ df_set_flags (DF_DEFER_INSN_RESCAN); ++ df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN); ++ df_md_add_problem (); ++ df_analyze (); ++ ++ /* Find all instructions we want to convert into vector mode. */ ++ if (dump_file) ++ fprintf (dump_file, "Searching for mode conversion candidates...\n"); ++ ++ FOR_EACH_BB_FN (bb, cfun) ++ { ++ rtx_insn *insn; ++ FOR_BB_INSNS (bb, insn) ++ if (scalar_to_vector_candidate_p (insn)) ++ { ++ if (dump_file) ++ fprintf (dump_file, " insn %d is marked as a candidate\n", ++ INSN_UID (insn)); ++ ++ bitmap_set_bit (candidates, INSN_UID (insn)); ++ } ++ } ++ ++ remove_non_convertible_regs (candidates); ++ ++ if (bitmap_empty_p (candidates)) ++ if (dump_file) ++ fprintf (dump_file, "There are no candidates for optimization.\n"); ++ ++ while (!bitmap_empty_p (candidates)) ++ { ++ unsigned uid = bitmap_first_set_bit (candidates); ++ scalar_chain *chain; ++ ++ if (TARGET_64BIT) ++ chain = new timode_scalar_chain; ++ else ++ chain = new dimode_scalar_chain; ++ ++ /* Find instructions chain we want to convert to vector mode. ++ Check all uses and definitions to estimate all required ++ conversions. */ ++ chain->build (candidates, uid); ++ ++ if (chain->compute_convert_gain () > 0) ++ converted_insns += chain->convert (); ++ else ++ if (dump_file) ++ fprintf (dump_file, "Chain #%d conversion is not profitable\n", ++ chain->chain_id); ++ ++ delete chain; ++ } ++ ++ if (dump_file) ++ fprintf (dump_file, "Total insns converted: %d\n", converted_insns); ++ ++ BITMAP_FREE (candidates); ++ bitmap_obstack_release (NULL); ++ df_process_deferred_rescans (); ++ ++ /* Conversion means we may have 128bit register spills/fills ++ which require aligned stack. */ ++ if (converted_insns) ++ { ++ if (crtl->stack_alignment_needed < 128) ++ crtl->stack_alignment_needed = 128; ++ if (crtl->stack_alignment_estimated < 128) ++ crtl->stack_alignment_estimated = 128; ++ /* Fix up DECL_RTL/DECL_INCOMING_RTL of arguments. */ ++ if (TARGET_64BIT) ++ for (tree parm = DECL_ARGUMENTS (current_function_decl); ++ parm; parm = DECL_CHAIN (parm)) ++ { ++ if (TYPE_MODE (TREE_TYPE (parm)) != TImode) ++ continue; ++ if (DECL_RTL_SET_P (parm) ++ && GET_MODE (DECL_RTL (parm)) == V1TImode) ++ { ++ rtx r = DECL_RTL (parm); ++ if (REG_P (r)) ++ SET_DECL_RTL (parm, gen_rtx_SUBREG (TImode, r, 0)); ++ } ++ if (DECL_INCOMING_RTL (parm) ++ && GET_MODE (DECL_INCOMING_RTL (parm)) == V1TImode) ++ { ++ rtx r = DECL_INCOMING_RTL (parm); ++ if (REG_P (r)) ++ DECL_INCOMING_RTL (parm) = gen_rtx_SUBREG (TImode, r, 0); ++ } ++ } ++ } ++ ++ return 0; ++} ++ ++static unsigned int ++rest_of_handle_insert_vzeroupper (void) ++{ ++ int i; ++ ++ /* vzeroupper instructions are inserted immediately after reload to ++ account for possible spills from 256bit or 512bit registers. The pass ++ reuses mode switching infrastructure by re-running mode insertion ++ pass, so disable entities that have already been processed. */ ++ for (i = 0; i < MAX_386_ENTITIES; i++) ++ ix86_optimize_mode_switching[i] = 0; ++ ++ ix86_optimize_mode_switching[AVX_U128] = 1; ++ ++ /* Call optimize_mode_switching. */ ++ g->get_passes ()->execute_pass_mode_switching (); ++ return 0; ++} ++ ++namespace { ++ ++const pass_data pass_data_insert_vzeroupper = ++{ ++ RTL_PASS, /* type */ ++ "vzeroupper", /* name */ ++ OPTGROUP_NONE, /* optinfo_flags */ ++ TV_MACH_DEP, /* tv_id */ ++ 0, /* properties_required */ ++ 0, /* properties_provided */ ++ 0, /* properties_destroyed */ ++ 0, /* todo_flags_start */ ++ TODO_df_finish, /* todo_flags_finish */ ++}; ++ ++class pass_insert_vzeroupper : public rtl_opt_pass ++{ ++public: ++ pass_insert_vzeroupper(gcc::context *ctxt) ++ : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt) ++ {} ++ ++ /* opt_pass methods: */ ++ virtual bool gate (function *) ++ { ++ return TARGET_AVX ++ && TARGET_VZEROUPPER && flag_expensive_optimizations ++ && !optimize_size; ++ } ++ ++ virtual unsigned int execute (function *) ++ { ++ return rest_of_handle_insert_vzeroupper (); ++ } ++ ++}; // class pass_insert_vzeroupper ++ ++const pass_data pass_data_stv = ++{ ++ RTL_PASS, /* type */ ++ "stv", /* name */ ++ OPTGROUP_NONE, /* optinfo_flags */ ++ TV_MACH_DEP, /* tv_id */ ++ 0, /* properties_required */ ++ 0, /* properties_provided */ ++ 0, /* properties_destroyed */ ++ 0, /* todo_flags_start */ ++ TODO_df_finish, /* todo_flags_finish */ ++}; ++ ++class pass_stv : public rtl_opt_pass ++{ ++public: ++ pass_stv (gcc::context *ctxt) ++ : rtl_opt_pass (pass_data_stv, ctxt), ++ timode_p (false) ++ {} ++ ++ /* opt_pass methods: */ ++ virtual bool gate (function *) ++ { ++ return (timode_p == !!TARGET_64BIT ++ && TARGET_STV && TARGET_SSE2 && optimize > 1); ++ } ++ ++ virtual unsigned int execute (function *) ++ { ++ return convert_scalars_to_vector (); ++ } ++ ++ opt_pass *clone () ++ { ++ return new pass_stv (m_ctxt); ++ } ++ ++ void set_pass_param (unsigned int n, bool param) ++ { ++ gcc_assert (n == 0); ++ timode_p = param; ++ } ++ ++private: ++ bool timode_p; ++}; // class pass_stv ++ ++} // anon namespace ++ ++rtl_opt_pass * ++make_pass_insert_vzeroupper (gcc::context *ctxt) ++{ ++ return new pass_insert_vzeroupper (ctxt); ++} ++ ++rtl_opt_pass * ++make_pass_stv (gcc::context *ctxt) ++{ ++ return new pass_stv (ctxt); ++} ++ ++/* Inserting ENDBRANCH instructions. */ ++ ++static unsigned int ++rest_of_insert_endbranch (void) ++{ ++ timevar_push (TV_MACH_DEP); ++ ++ rtx cet_eb; ++ rtx_insn *insn; ++ basic_block bb; ++ ++ /* Currently emit EB if it's a tracking function, i.e. 'nocf_check' is ++ absent among function attributes. Later an optimization will be ++ introduced to make analysis if an address of a static function is ++ taken. A static function whose address is not taken will get a ++ nocf_check attribute. This will allow to reduce the number of EB. */ ++ ++ if (!lookup_attribute ("nocf_check", ++ TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl))) ++ && (!flag_manual_endbr ++ || lookup_attribute ("cf_check", ++ DECL_ATTRIBUTES (cfun->decl))) ++ && !cgraph_node::get (cfun->decl)->only_called_directly_p ()) ++ { ++ /* Queue ENDBR insertion to x86_function_profiler. */ ++ if (crtl->profile && flag_fentry) ++ cfun->machine->endbr_queued_at_entrance = true; ++ else ++ { ++ cet_eb = gen_nop_endbr (); ++ ++ bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb; ++ insn = BB_HEAD (bb); ++ emit_insn_before (cet_eb, insn); ++ } ++ } ++ ++ bb = 0; ++ FOR_EACH_BB_FN (bb, cfun) ++ { ++ for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); ++ insn = NEXT_INSN (insn)) ++ { ++ if (CALL_P (insn)) ++ { ++ bool need_endbr; ++ need_endbr = find_reg_note (insn, REG_SETJMP, NULL) != NULL; ++ if (!need_endbr && !SIBLING_CALL_P (insn)) ++ { ++ rtx call = get_call_rtx_from (insn); ++ rtx fnaddr = XEXP (call, 0); ++ tree fndecl = NULL_TREE; ++ ++ /* Also generate ENDBRANCH for non-tail call which ++ may return via indirect branch. */ ++ if (GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF) ++ fndecl = SYMBOL_REF_DECL (XEXP (fnaddr, 0)); ++ if (fndecl == NULL_TREE) ++ fndecl = MEM_EXPR (fnaddr); ++ if (fndecl ++ && TREE_CODE (TREE_TYPE (fndecl)) != FUNCTION_TYPE ++ && TREE_CODE (TREE_TYPE (fndecl)) != METHOD_TYPE) ++ fndecl = NULL_TREE; ++ if (fndecl && TYPE_ARG_TYPES (TREE_TYPE (fndecl))) ++ { ++ tree fntype = TREE_TYPE (fndecl); ++ if (lookup_attribute ("indirect_return", ++ TYPE_ATTRIBUTES (fntype))) ++ need_endbr = true; ++ } ++ } ++ if (!need_endbr) ++ continue; ++ /* Generate ENDBRANCH after CALL, which can return more than ++ twice, setjmp-like functions. */ ++ ++ cet_eb = gen_nop_endbr (); ++ emit_insn_after_setloc (cet_eb, insn, INSN_LOCATION (insn)); ++ continue; ++ } ++ ++ if (JUMP_P (insn) && flag_cet_switch) ++ { ++ rtx target = JUMP_LABEL (insn); ++ if (target == NULL_RTX || ANY_RETURN_P (target)) ++ continue; ++ ++ /* Check the jump is a switch table. */ ++ rtx_insn *label = as_a (target); ++ rtx_insn *table = next_insn (label); ++ if (table == NULL_RTX || !JUMP_TABLE_DATA_P (table)) ++ continue; ++ ++ /* For the indirect jump find out all places it jumps and insert ++ ENDBRANCH there. It should be done under a special flag to ++ control ENDBRANCH generation for switch stmts. */ ++ edge_iterator ei; ++ edge e; ++ basic_block dest_blk; ++ ++ FOR_EACH_EDGE (e, ei, bb->succs) ++ { ++ rtx_insn *insn; ++ ++ dest_blk = e->dest; ++ insn = BB_HEAD (dest_blk); ++ gcc_assert (LABEL_P (insn)); ++ cet_eb = gen_nop_endbr (); ++ emit_insn_after (cet_eb, insn); ++ } ++ continue; ++ } ++ ++ if ((LABEL_P (insn) && LABEL_PRESERVE_P (insn)) ++ || (NOTE_P (insn) ++ && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)) ++ /* TODO. Check /s bit also. */ ++ { ++ cet_eb = gen_nop_endbr (); ++ emit_insn_after (cet_eb, insn); ++ continue; ++ } ++ } ++ } ++ ++ timevar_pop (TV_MACH_DEP); ++ return 0; ++} ++ ++namespace { ++ ++const pass_data pass_data_insert_endbranch = ++{ ++ RTL_PASS, /* type. */ ++ "cet", /* name. */ ++ OPTGROUP_NONE, /* optinfo_flags. */ ++ TV_MACH_DEP, /* tv_id. */ ++ 0, /* properties_required. */ ++ 0, /* properties_provided. */ ++ 0, /* properties_destroyed. */ ++ 0, /* todo_flags_start. */ ++ 0, /* todo_flags_finish. */ ++}; ++ ++class pass_insert_endbranch : public rtl_opt_pass ++{ ++public: ++ pass_insert_endbranch (gcc::context *ctxt) ++ : rtl_opt_pass (pass_data_insert_endbranch, ctxt) ++ {} ++ ++ /* opt_pass methods: */ ++ virtual bool gate (function *) ++ { ++ return ((flag_cf_protection & CF_BRANCH)); ++ } ++ ++ virtual unsigned int execute (function *) ++ { ++ return rest_of_insert_endbranch (); ++ } ++ ++}; // class pass_insert_endbranch ++ ++} // anon namespace ++ ++rtl_opt_pass * ++make_pass_insert_endbranch (gcc::context *ctxt) ++{ ++ return new pass_insert_endbranch (ctxt); ++} ++ ++/* At entry of the nearest common dominator for basic blocks with ++ conversions, generate a single ++ vxorps %xmmN, %xmmN, %xmmN ++ for all ++ vcvtss2sd op, %xmmN, %xmmX ++ vcvtsd2ss op, %xmmN, %xmmX ++ vcvtsi2ss op, %xmmN, %xmmX ++ vcvtsi2sd op, %xmmN, %xmmX ++ ++ NB: We want to generate only a single vxorps to cover the whole ++ function. The LCM algorithm isn't appropriate here since it may ++ place a vxorps inside the loop. */ ++ ++static unsigned int ++remove_partial_avx_dependency (void) ++{ ++ timevar_push (TV_MACH_DEP); ++ ++ bitmap_obstack_initialize (NULL); ++ bitmap convert_bbs = BITMAP_ALLOC (NULL); ++ ++ basic_block bb; ++ rtx_insn *insn, *set_insn; ++ rtx set; ++ rtx v4sf_const0 = NULL_RTX; ++ ++ auto_vec control_flow_insns; ++ ++ FOR_EACH_BB_FN (bb, cfun) ++ { ++ FOR_BB_INSNS (bb, insn) ++ { ++ if (!NONDEBUG_INSN_P (insn)) ++ continue; ++ ++ set = single_set (insn); ++ if (!set) ++ continue; ++ ++ if (get_attr_avx_partial_xmm_update (insn) ++ != AVX_PARTIAL_XMM_UPDATE_TRUE) ++ continue; ++ ++ if (!v4sf_const0) ++ { ++ calculate_dominance_info (CDI_DOMINATORS); ++ df_set_flags (DF_DEFER_INSN_RESCAN); ++ df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN); ++ df_md_add_problem (); ++ df_analyze (); ++ v4sf_const0 = gen_reg_rtx (V4SFmode); ++ } ++ ++ /* Convert PARTIAL_XMM_UPDATE_TRUE insns, DF -> SF, SF -> DF, ++ SI -> SF, SI -> DF, DI -> SF, DI -> DF, to vec_dup and ++ vec_merge with subreg. */ ++ rtx src = SET_SRC (set); ++ rtx dest = SET_DEST (set); ++ machine_mode dest_mode = GET_MODE (dest); ++ ++ rtx zero; ++ machine_mode dest_vecmode; ++ if (dest_mode == E_SFmode) ++ { ++ dest_vecmode = V4SFmode; ++ zero = v4sf_const0; ++ } ++ else ++ { ++ dest_vecmode = V2DFmode; ++ zero = gen_rtx_SUBREG (V2DFmode, v4sf_const0, 0); ++ } ++ ++ /* Change source to vector mode. */ ++ src = gen_rtx_VEC_DUPLICATE (dest_vecmode, src); ++ src = gen_rtx_VEC_MERGE (dest_vecmode, src, zero, ++ GEN_INT (HOST_WIDE_INT_1U)); ++ /* Change destination to vector mode. */ ++ rtx vec = gen_reg_rtx (dest_vecmode); ++ /* Generate an XMM vector SET. */ ++ set = gen_rtx_SET (vec, src); ++ set_insn = emit_insn_before (set, insn); ++ df_insn_rescan (set_insn); ++ ++ if (cfun->can_throw_non_call_exceptions) ++ { ++ /* Handle REG_EH_REGION note. */ ++ rtx note = find_reg_note (insn, REG_EH_REGION, NULL_RTX); ++ if (note) ++ { ++ control_flow_insns.safe_push (set_insn); ++ add_reg_note (set_insn, REG_EH_REGION, XEXP (note, 0)); ++ } ++ } ++ ++ src = gen_rtx_SUBREG (dest_mode, vec, 0); ++ set = gen_rtx_SET (dest, src); ++ ++ /* Drop possible dead definitions. */ ++ PATTERN (insn) = set; ++ ++ INSN_CODE (insn) = -1; ++ recog_memoized (insn); ++ df_insn_rescan (insn); ++ bitmap_set_bit (convert_bbs, bb->index); ++ } ++ } ++ ++ if (v4sf_const0) ++ { ++ /* (Re-)discover loops so that bb->loop_father can be used in the ++ analysis below. */ ++ loop_optimizer_init (AVOID_CFG_MODIFICATIONS); ++ ++ /* Generate a vxorps at entry of the nearest dominator for basic ++ blocks with conversions, which is in the the fake loop that ++ contains the whole function, so that there is only a single ++ vxorps in the whole function. */ ++ bb = nearest_common_dominator_for_set (CDI_DOMINATORS, ++ convert_bbs); ++ while (bb->loop_father->latch ++ != EXIT_BLOCK_PTR_FOR_FN (cfun)) ++ bb = get_immediate_dominator (CDI_DOMINATORS, ++ bb->loop_father->header); ++ ++ set = gen_rtx_SET (v4sf_const0, CONST0_RTX (V4SFmode)); ++ ++ insn = BB_HEAD (bb); ++ while (insn && !NONDEBUG_INSN_P (insn)) ++ { ++ if (insn == BB_END (bb)) ++ { ++ insn = NULL; ++ break; ++ } ++ insn = NEXT_INSN (insn); ++ } ++ if (insn == BB_HEAD (bb)) ++ set_insn = emit_insn_before (set, insn); ++ else ++ set_insn = emit_insn_after (set, ++ insn ? PREV_INSN (insn) : BB_END (bb)); ++ df_insn_rescan (set_insn); ++ df_process_deferred_rescans (); ++ loop_optimizer_finalize (); ++ ++ if (!control_flow_insns.is_empty ()) ++ { ++ free_dominance_info (CDI_DOMINATORS); ++ ++ unsigned int i; ++ FOR_EACH_VEC_ELT (control_flow_insns, i, insn) ++ if (control_flow_insn_p (insn)) ++ { ++ /* Split the block after insn. There will be a fallthru ++ edge, which is OK so we keep it. We have to create ++ the exception edges ourselves. */ ++ bb = BLOCK_FOR_INSN (insn); ++ split_block (bb, insn); ++ rtl_make_eh_edge (NULL, bb, BB_END (bb)); ++ } ++ } ++ } ++ ++ bitmap_obstack_release (NULL); ++ BITMAP_FREE (convert_bbs); ++ ++ timevar_pop (TV_MACH_DEP); ++ return 0; ++} ++ ++namespace { ++ ++const pass_data pass_data_remove_partial_avx_dependency = ++{ ++ RTL_PASS, /* type */ ++ "rpad", /* name */ ++ OPTGROUP_NONE, /* optinfo_flags */ ++ TV_MACH_DEP, /* tv_id */ ++ 0, /* properties_required */ ++ 0, /* properties_provided */ ++ 0, /* properties_destroyed */ ++ 0, /* todo_flags_start */ ++ TODO_df_finish, /* todo_flags_finish */ ++}; ++ ++class pass_remove_partial_avx_dependency : public rtl_opt_pass ++{ ++public: ++ pass_remove_partial_avx_dependency (gcc::context *ctxt) ++ : rtl_opt_pass (pass_data_remove_partial_avx_dependency, ctxt) ++ {} ++ ++ /* opt_pass methods: */ ++ virtual bool gate (function *) ++ { ++ return (TARGET_AVX ++ && TARGET_SSE_PARTIAL_REG_DEPENDENCY ++ && TARGET_SSE_MATH ++ && optimize ++ && optimize_function_for_speed_p (cfun)); ++ } ++ ++ virtual unsigned int execute (function *) ++ { ++ return remove_partial_avx_dependency (); ++ } ++}; // class pass_rpad ++ ++} // anon namespace ++ ++rtl_opt_pass * ++make_pass_remove_partial_avx_dependency (gcc::context *ctxt) ++{ ++ return new pass_remove_partial_avx_dependency (ctxt); ++} ++ ++/* This compares the priority of target features in function DECL1 ++ and DECL2. It returns positive value if DECL1 is higher priority, ++ negative value if DECL2 is higher priority and 0 if they are the ++ same. */ ++ ++int ++ix86_compare_version_priority (tree decl1, tree decl2) ++{ ++ unsigned int priority1 = get_builtin_code_for_version (decl1, NULL); ++ unsigned int priority2 = get_builtin_code_for_version (decl2, NULL); ++ ++ return (int)priority1 - (int)priority2; ++} ++ ++/* V1 and V2 point to function versions with different priorities ++ based on the target ISA. This function compares their priorities. */ ++ ++static int ++feature_compare (const void *v1, const void *v2) ++{ ++ typedef struct _function_version_info ++ { ++ tree version_decl; ++ tree predicate_chain; ++ unsigned int dispatch_priority; ++ } function_version_info; ++ ++ const function_version_info c1 = *(const function_version_info *)v1; ++ const function_version_info c2 = *(const function_version_info *)v2; ++ return (c2.dispatch_priority - c1.dispatch_priority); ++} ++ ++/* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL ++ to return a pointer to VERSION_DECL if the outcome of the expression ++ formed by PREDICATE_CHAIN is true. This function will be called during ++ version dispatch to decide which function version to execute. It returns ++ the basic block at the end, to which more conditions can be added. */ ++ ++static basic_block ++add_condition_to_bb (tree function_decl, tree version_decl, ++ tree predicate_chain, basic_block new_bb) ++{ ++ gimple *return_stmt; ++ tree convert_expr, result_var; ++ gimple *convert_stmt; ++ gimple *call_cond_stmt; ++ gimple *if_else_stmt; ++ ++ basic_block bb1, bb2, bb3; ++ edge e12, e23; ++ ++ tree cond_var, and_expr_var = NULL_TREE; ++ gimple_seq gseq; ++ ++ tree predicate_decl, predicate_arg; ++ ++ push_cfun (DECL_STRUCT_FUNCTION (function_decl)); ++ ++ gcc_assert (new_bb != NULL); ++ gseq = bb_seq (new_bb); ++ ++ ++ convert_expr = build1 (CONVERT_EXPR, ptr_type_node, ++ build_fold_addr_expr (version_decl)); ++ result_var = create_tmp_var (ptr_type_node); ++ convert_stmt = gimple_build_assign (result_var, convert_expr); ++ return_stmt = gimple_build_return (result_var); ++ ++ if (predicate_chain == NULL_TREE) ++ { ++ gimple_seq_add_stmt (&gseq, convert_stmt); ++ gimple_seq_add_stmt (&gseq, return_stmt); ++ set_bb_seq (new_bb, gseq); ++ gimple_set_bb (convert_stmt, new_bb); ++ gimple_set_bb (return_stmt, new_bb); ++ pop_cfun (); ++ return new_bb; ++ } ++ ++ while (predicate_chain != NULL) ++ { ++ cond_var = create_tmp_var (integer_type_node); ++ predicate_decl = TREE_PURPOSE (predicate_chain); ++ predicate_arg = TREE_VALUE (predicate_chain); ++ call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg); ++ gimple_call_set_lhs (call_cond_stmt, cond_var); ++ ++ gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl)); ++ gimple_set_bb (call_cond_stmt, new_bb); ++ gimple_seq_add_stmt (&gseq, call_cond_stmt); ++ ++ predicate_chain = TREE_CHAIN (predicate_chain); ++ ++ if (and_expr_var == NULL) ++ and_expr_var = cond_var; ++ else ++ { ++ gimple *assign_stmt; ++ /* Use MIN_EXPR to check if any integer is zero?. ++ and_expr_var = min_expr */ ++ assign_stmt = gimple_build_assign (and_expr_var, ++ build2 (MIN_EXPR, integer_type_node, ++ cond_var, and_expr_var)); ++ ++ gimple_set_block (assign_stmt, DECL_INITIAL (function_decl)); ++ gimple_set_bb (assign_stmt, new_bb); ++ gimple_seq_add_stmt (&gseq, assign_stmt); ++ } ++ } ++ ++ if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var, ++ integer_zero_node, ++ NULL_TREE, NULL_TREE); ++ gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl)); ++ gimple_set_bb (if_else_stmt, new_bb); ++ gimple_seq_add_stmt (&gseq, if_else_stmt); ++ ++ gimple_seq_add_stmt (&gseq, convert_stmt); ++ gimple_seq_add_stmt (&gseq, return_stmt); ++ set_bb_seq (new_bb, gseq); ++ ++ bb1 = new_bb; ++ e12 = split_block (bb1, if_else_stmt); ++ bb2 = e12->dest; ++ e12->flags &= ~EDGE_FALLTHRU; ++ e12->flags |= EDGE_TRUE_VALUE; ++ ++ e23 = split_block (bb2, return_stmt); ++ ++ gimple_set_bb (convert_stmt, bb2); ++ gimple_set_bb (return_stmt, bb2); ++ ++ bb3 = e23->dest; ++ make_edge (bb1, bb3, EDGE_FALSE_VALUE); ++ ++ remove_edge (e23); ++ make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0); ++ ++ pop_cfun (); ++ ++ return bb3; ++} ++ ++/* This function generates the dispatch function for ++ multi-versioned functions. DISPATCH_DECL is the function which will ++ contain the dispatch logic. FNDECLS are the function choices for ++ dispatch, and is a tree chain. EMPTY_BB is the basic block pointer ++ in DISPATCH_DECL in which the dispatch code is generated. */ ++ ++static int ++dispatch_function_versions (tree dispatch_decl, ++ void *fndecls_p, ++ basic_block *empty_bb) ++{ ++ tree default_decl; ++ gimple *ifunc_cpu_init_stmt; ++ gimple_seq gseq; ++ int ix; ++ tree ele; ++ vec *fndecls; ++ unsigned int num_versions = 0; ++ unsigned int actual_versions = 0; ++ unsigned int i; ++ ++ struct _function_version_info ++ { ++ tree version_decl; ++ tree predicate_chain; ++ unsigned int dispatch_priority; ++ }*function_version_info; ++ ++ gcc_assert (dispatch_decl != NULL ++ && fndecls_p != NULL ++ && empty_bb != NULL); ++ ++ /*fndecls_p is actually a vector. */ ++ fndecls = static_cast *> (fndecls_p); ++ ++ /* At least one more version other than the default. */ ++ num_versions = fndecls->length (); ++ gcc_assert (num_versions >= 2); ++ ++ function_version_info = (struct _function_version_info *) ++ XNEWVEC (struct _function_version_info, (num_versions - 1)); ++ ++ /* The first version in the vector is the default decl. */ ++ default_decl = (*fndecls)[0]; ++ ++ push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl)); ++ ++ gseq = bb_seq (*empty_bb); ++ /* Function version dispatch is via IFUNC. IFUNC resolvers fire before ++ constructors, so explicity call __builtin_cpu_init here. */ ++ ifunc_cpu_init_stmt ++ = gimple_build_call_vec (get_ix86_builtin (IX86_BUILTIN_CPU_INIT), vNULL); ++ gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt); ++ gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb); ++ set_bb_seq (*empty_bb, gseq); ++ ++ pop_cfun (); ++ ++ ++ for (ix = 1; fndecls->iterate (ix, &ele); ++ix) ++ { ++ tree version_decl = ele; ++ tree predicate_chain = NULL_TREE; ++ unsigned int priority; ++ /* Get attribute string, parse it and find the right predicate decl. ++ The predicate function could be a lengthy combination of many ++ features, like arch-type and various isa-variants. */ ++ priority = get_builtin_code_for_version (version_decl, ++ &predicate_chain); ++ ++ if (predicate_chain == NULL_TREE) ++ continue; ++ ++ function_version_info [actual_versions].version_decl = version_decl; ++ function_version_info [actual_versions].predicate_chain ++ = predicate_chain; ++ function_version_info [actual_versions].dispatch_priority = priority; ++ actual_versions++; ++ } ++ ++ /* Sort the versions according to descending order of dispatch priority. The ++ priority is based on the ISA. This is not a perfect solution. There ++ could still be ambiguity. If more than one function version is suitable ++ to execute, which one should be dispatched? In future, allow the user ++ to specify a dispatch priority next to the version. */ ++ qsort (function_version_info, actual_versions, ++ sizeof (struct _function_version_info), feature_compare); ++ ++ for (i = 0; i < actual_versions; ++i) ++ *empty_bb = add_condition_to_bb (dispatch_decl, ++ function_version_info[i].version_decl, ++ function_version_info[i].predicate_chain, ++ *empty_bb); ++ ++ /* dispatch default version at the end. */ ++ *empty_bb = add_condition_to_bb (dispatch_decl, default_decl, ++ NULL, *empty_bb); ++ ++ free (function_version_info); ++ return 0; ++} ++ ++/* This function changes the assembler name for functions that are ++ versions. If DECL is a function version and has a "target" ++ attribute, it appends the attribute string to its assembler name. */ ++ ++static tree ++ix86_mangle_function_version_assembler_name (tree decl, tree id) ++{ ++ tree version_attr; ++ const char *orig_name, *version_string; ++ char *attr_str, *assembler_name; ++ ++ if (DECL_DECLARED_INLINE_P (decl) ++ && lookup_attribute ("gnu_inline", ++ DECL_ATTRIBUTES (decl))) ++ error_at (DECL_SOURCE_LOCATION (decl), ++ "function versions cannot be marked as %," ++ " bodies have to be generated"); ++ ++ if (DECL_VIRTUAL_P (decl) ++ || DECL_VINDEX (decl)) ++ sorry ("virtual function multiversioning not supported"); ++ ++ version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl)); ++ ++ /* target attribute string cannot be NULL. */ ++ gcc_assert (version_attr != NULL_TREE); ++ ++ orig_name = IDENTIFIER_POINTER (id); ++ version_string ++ = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr))); ++ ++ if (strcmp (version_string, "default") == 0) ++ return id; ++ ++ attr_str = sorted_attr_string (TREE_VALUE (version_attr)); ++ assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2); ++ ++ sprintf (assembler_name, "%s.%s", orig_name, attr_str); ++ ++ /* Allow assembler name to be modified if already set. */ ++ if (DECL_ASSEMBLER_NAME_SET_P (decl)) ++ SET_DECL_RTL (decl, NULL); ++ ++ tree ret = get_identifier (assembler_name); ++ XDELETEVEC (attr_str); ++ XDELETEVEC (assembler_name); ++ return ret; ++} ++ ++tree ++ix86_mangle_decl_assembler_name (tree decl, tree id) ++{ ++ /* For function version, add the target suffix to the assembler name. */ ++ if (TREE_CODE (decl) == FUNCTION_DECL ++ && DECL_FUNCTION_VERSIONED (decl)) ++ id = ix86_mangle_function_version_assembler_name (decl, id); ++#ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME ++ id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id); ++#endif ++ ++ return id; ++} ++ ++/* Make a dispatcher declaration for the multi-versioned function DECL. ++ Calls to DECL function will be replaced with calls to the dispatcher ++ by the front-end. Returns the decl of the dispatcher function. */ ++ ++tree ++ix86_get_function_versions_dispatcher (void *decl) ++{ ++ tree fn = (tree) decl; ++ struct cgraph_node *node = NULL; ++ struct cgraph_node *default_node = NULL; ++ struct cgraph_function_version_info *node_v = NULL; ++ struct cgraph_function_version_info *first_v = NULL; ++ ++ tree dispatch_decl = NULL; ++ ++ struct cgraph_function_version_info *default_version_info = NULL; ++ ++ gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn)); ++ ++ node = cgraph_node::get (fn); ++ gcc_assert (node != NULL); ++ ++ node_v = node->function_version (); ++ gcc_assert (node_v != NULL); ++ ++ if (node_v->dispatcher_resolver != NULL) ++ return node_v->dispatcher_resolver; ++ ++ /* Find the default version and make it the first node. */ ++ first_v = node_v; ++ /* Go to the beginning of the chain. */ ++ while (first_v->prev != NULL) ++ first_v = first_v->prev; ++ default_version_info = first_v; ++ while (default_version_info != NULL) ++ { ++ if (is_function_default_version ++ (default_version_info->this_node->decl)) ++ break; ++ default_version_info = default_version_info->next; ++ } ++ ++ /* If there is no default node, just return NULL. */ ++ if (default_version_info == NULL) ++ return NULL; ++ ++ /* Make default info the first node. */ ++ if (first_v != default_version_info) ++ { ++ default_version_info->prev->next = default_version_info->next; ++ if (default_version_info->next) ++ default_version_info->next->prev = default_version_info->prev; ++ first_v->prev = default_version_info; ++ default_version_info->next = first_v; ++ default_version_info->prev = NULL; ++ } ++ ++ default_node = default_version_info->this_node; ++ ++#if defined (ASM_OUTPUT_TYPE_DIRECTIVE) ++ if (targetm.has_ifunc_p ()) ++ { ++ struct cgraph_function_version_info *it_v = NULL; ++ struct cgraph_node *dispatcher_node = NULL; ++ struct cgraph_function_version_info *dispatcher_version_info = NULL; ++ ++ /* Right now, the dispatching is done via ifunc. */ ++ dispatch_decl = make_dispatcher_decl (default_node->decl); ++ ++ dispatcher_node = cgraph_node::get_create (dispatch_decl); ++ gcc_assert (dispatcher_node != NULL); ++ dispatcher_node->dispatcher_function = 1; ++ dispatcher_version_info ++ = dispatcher_node->insert_new_function_version (); ++ dispatcher_version_info->next = default_version_info; ++ dispatcher_node->definition = 1; ++ ++ /* Set the dispatcher for all the versions. */ ++ it_v = default_version_info; ++ while (it_v != NULL) ++ { ++ it_v->dispatcher_resolver = dispatch_decl; ++ it_v = it_v->next; ++ } ++ } ++ else ++#endif ++ { ++ error_at (DECL_SOURCE_LOCATION (default_node->decl), ++ "multiversioning needs ifunc which is not supported " ++ "on this target"); ++ } ++ ++ return dispatch_decl; ++} ++ ++/* Make the resolver function decl to dispatch the versions of ++ a multi-versioned function, DEFAULT_DECL. IFUNC_ALIAS_DECL is ++ ifunc alias that will point to the created resolver. Create an ++ empty basic block in the resolver and store the pointer in ++ EMPTY_BB. Return the decl of the resolver function. */ ++ ++static tree ++make_resolver_func (const tree default_decl, ++ const tree ifunc_alias_decl, ++ basic_block *empty_bb) ++{ ++ char *resolver_name; ++ tree decl, type, decl_name, t; ++ ++ /* IFUNC's have to be globally visible. So, if the default_decl is ++ not, then the name of the IFUNC should be made unique. */ ++ if (TREE_PUBLIC (default_decl) == 0) ++ { ++ char *ifunc_name = make_unique_name (default_decl, "ifunc", true); ++ symtab->change_decl_assembler_name (ifunc_alias_decl, ++ get_identifier (ifunc_name)); ++ XDELETEVEC (ifunc_name); ++ } ++ ++ resolver_name = make_unique_name (default_decl, "resolver", false); ++ ++ /* The resolver function should return a (void *). */ ++ type = build_function_type_list (ptr_type_node, NULL_TREE); ++ ++ decl = build_fn_decl (resolver_name, type); ++ decl_name = get_identifier (resolver_name); ++ SET_DECL_ASSEMBLER_NAME (decl, decl_name); ++ ++ DECL_NAME (decl) = decl_name; ++ TREE_USED (decl) = 1; ++ DECL_ARTIFICIAL (decl) = 1; ++ DECL_IGNORED_P (decl) = 1; ++ TREE_PUBLIC (decl) = 0; ++ DECL_UNINLINABLE (decl) = 1; ++ ++ /* Resolver is not external, body is generated. */ ++ DECL_EXTERNAL (decl) = 0; ++ DECL_EXTERNAL (ifunc_alias_decl) = 0; ++ ++ DECL_CONTEXT (decl) = NULL_TREE; ++ DECL_INITIAL (decl) = make_node (BLOCK); ++ DECL_STATIC_CONSTRUCTOR (decl) = 0; ++ ++ if (DECL_COMDAT_GROUP (default_decl) ++ || TREE_PUBLIC (default_decl)) ++ { ++ /* In this case, each translation unit with a call to this ++ versioned function will put out a resolver. Ensure it ++ is comdat to keep just one copy. */ ++ DECL_COMDAT (decl) = 1; ++ make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl)); ++ } ++ /* Build result decl and add to function_decl. */ ++ t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node); ++ DECL_CONTEXT (t) = decl; ++ DECL_ARTIFICIAL (t) = 1; ++ DECL_IGNORED_P (t) = 1; ++ DECL_RESULT (decl) = t; ++ ++ gimplify_function_tree (decl); ++ push_cfun (DECL_STRUCT_FUNCTION (decl)); ++ *empty_bb = init_lowered_empty_function (decl, false, ++ profile_count::uninitialized ()); ++ ++ cgraph_node::add_new_function (decl, true); ++ symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl)); ++ ++ pop_cfun (); ++ ++ gcc_assert (ifunc_alias_decl != NULL); ++ /* Mark ifunc_alias_decl as "ifunc" with resolver as resolver_name. */ ++ DECL_ATTRIBUTES (ifunc_alias_decl) ++ = make_attribute ("ifunc", resolver_name, ++ DECL_ATTRIBUTES (ifunc_alias_decl)); ++ ++ /* Create the alias for dispatch to resolver here. */ ++ cgraph_node::create_same_body_alias (ifunc_alias_decl, decl); ++ XDELETEVEC (resolver_name); ++ return decl; ++} ++ ++/* Generate the dispatching code body to dispatch multi-versioned function ++ DECL. The target hook is called to process the "target" attributes and ++ provide the code to dispatch the right function at run-time. NODE points ++ to the dispatcher decl whose body will be created. */ ++ ++tree ++ix86_generate_version_dispatcher_body (void *node_p) ++{ ++ tree resolver_decl; ++ basic_block empty_bb; ++ tree default_ver_decl; ++ struct cgraph_node *versn; ++ struct cgraph_node *node; ++ ++ struct cgraph_function_version_info *node_version_info = NULL; ++ struct cgraph_function_version_info *versn_info = NULL; ++ ++ node = (cgraph_node *)node_p; ++ ++ node_version_info = node->function_version (); ++ gcc_assert (node->dispatcher_function ++ && node_version_info != NULL); ++ ++ if (node_version_info->dispatcher_resolver) ++ return node_version_info->dispatcher_resolver; ++ ++ /* The first version in the chain corresponds to the default version. */ ++ default_ver_decl = node_version_info->next->this_node->decl; ++ ++ /* node is going to be an alias, so remove the finalized bit. */ ++ node->definition = false; ++ ++ resolver_decl = make_resolver_func (default_ver_decl, ++ node->decl, &empty_bb); ++ ++ node_version_info->dispatcher_resolver = resolver_decl; ++ ++ push_cfun (DECL_STRUCT_FUNCTION (resolver_decl)); ++ ++ auto_vec fn_ver_vec; ++ ++ for (versn_info = node_version_info->next; versn_info; ++ versn_info = versn_info->next) ++ { ++ versn = versn_info->this_node; ++ /* Check for virtual functions here again, as by this time it should ++ have been determined if this function needs a vtable index or ++ not. This happens for methods in derived classes that override ++ virtual methods in base classes but are not explicitly marked as ++ virtual. */ ++ if (DECL_VINDEX (versn->decl)) ++ sorry ("virtual function multiversioning not supported"); ++ ++ fn_ver_vec.safe_push (versn->decl); ++ } ++ ++ dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb); ++ cgraph_edge::rebuild_edges (); ++ pop_cfun (); ++ return resolver_decl; ++} ++ ++ +diff --git a/gcc/config/i386/i386-features.h b/gcc/config/i386/i386-features.h +new file mode 100644 +index 000000000..358122249 +--- /dev/null ++++ b/gcc/config/i386/i386-features.h +@@ -0,0 +1,201 @@ ++/* Copyright (C) 1988-2019 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++#ifndef GCC_I386_FEATURES_H ++#define GCC_I386_FEATURES_H ++ ++enum xlogue_stub { ++ XLOGUE_STUB_SAVE, ++ XLOGUE_STUB_RESTORE, ++ XLOGUE_STUB_RESTORE_TAIL, ++ XLOGUE_STUB_SAVE_HFP, ++ XLOGUE_STUB_RESTORE_HFP, ++ XLOGUE_STUB_RESTORE_HFP_TAIL, ++ ++ XLOGUE_STUB_COUNT ++}; ++ ++enum xlogue_stub_sets { ++ XLOGUE_SET_ALIGNED, ++ XLOGUE_SET_ALIGNED_PLUS_8, ++ XLOGUE_SET_HFP_ALIGNED_OR_REALIGN, ++ XLOGUE_SET_HFP_ALIGNED_PLUS_8, ++ ++ XLOGUE_SET_COUNT ++}; ++ ++/* Register save/restore layout used by out-of-line stubs. */ ++class xlogue_layout { ++public: ++ struct reginfo ++ { ++ unsigned regno; ++ HOST_WIDE_INT offset; /* Offset used by stub base pointer (rax or ++ rsi) to where each register is stored. */ ++ }; ++ ++ unsigned get_nregs () const {return m_nregs;} ++ HOST_WIDE_INT get_stack_align_off_in () const {return m_stack_align_off_in;} ++ ++ const reginfo &get_reginfo (unsigned reg) const ++ { ++ gcc_assert (reg < m_nregs); ++ return m_regs[reg]; ++ } ++ ++ static const char *get_stub_name (enum xlogue_stub stub, ++ unsigned n_extra_args); ++ ++ /* Returns an rtx for the stub's symbol based upon ++ 1.) the specified stub (save, restore or restore_ret) and ++ 2.) the value of cfun->machine->call_ms2sysv_extra_regs and ++ 3.) rather or not stack alignment is being performed. */ ++ static rtx get_stub_rtx (enum xlogue_stub stub); ++ ++ /* Returns the amount of stack space (including padding) that the stub ++ needs to store registers based upon data in the machine_function. */ ++ HOST_WIDE_INT get_stack_space_used () const ++ { ++ const struct machine_function *m = cfun->machine; ++ unsigned last_reg = m->call_ms2sysv_extra_regs + MIN_REGS - 1; ++ ++ gcc_assert (m->call_ms2sysv_extra_regs <= MAX_EXTRA_REGS); ++ return m_regs[last_reg].offset + STUB_INDEX_OFFSET; ++ } ++ ++ /* Returns the offset for the base pointer used by the stub. */ ++ HOST_WIDE_INT get_stub_ptr_offset () const ++ { ++ return STUB_INDEX_OFFSET + m_stack_align_off_in; ++ } ++ ++ static const struct xlogue_layout &get_instance (); ++ static unsigned count_stub_managed_regs (); ++ static bool is_stub_managed_reg (unsigned regno, unsigned count); ++ ++ static const HOST_WIDE_INT STUB_INDEX_OFFSET = 0x70; ++ static const unsigned MIN_REGS = NUM_X86_64_MS_CLOBBERED_REGS; ++ static const unsigned MAX_REGS = 18; ++ static const unsigned MAX_EXTRA_REGS = MAX_REGS - MIN_REGS; ++ static const unsigned VARIANT_COUNT = MAX_EXTRA_REGS + 1; ++ static const unsigned STUB_NAME_MAX_LEN = 20; ++ static const char * const STUB_BASE_NAMES[XLOGUE_STUB_COUNT]; ++ static const unsigned REG_ORDER[MAX_REGS]; ++ static const unsigned REG_ORDER_REALIGN[MAX_REGS]; ++ ++private: ++ xlogue_layout (); ++ xlogue_layout (HOST_WIDE_INT stack_align_off_in, bool hfp); ++ xlogue_layout (const xlogue_layout &); ++ ++ /* True if hard frame pointer is used. */ ++ bool m_hfp; ++ ++ /* Max number of register this layout manages. */ ++ unsigned m_nregs; ++ ++ /* Incoming offset from 16-byte alignment. */ ++ HOST_WIDE_INT m_stack_align_off_in; ++ ++ /* Register order and offsets. */ ++ struct reginfo m_regs[MAX_REGS]; ++ ++ /* Lazy-inited cache of symbol names for stubs. */ ++ static char s_stub_names[2][XLOGUE_STUB_COUNT][VARIANT_COUNT] ++ [STUB_NAME_MAX_LEN]; ++ ++ static const xlogue_layout s_instances[XLOGUE_SET_COUNT]; ++}; ++ ++namespace { ++ ++class scalar_chain ++{ ++ public: ++ scalar_chain (); ++ virtual ~scalar_chain (); ++ ++ static unsigned max_id; ++ ++ /* ID of a chain. */ ++ unsigned int chain_id; ++ /* A queue of instructions to be included into a chain. */ ++ bitmap queue; ++ /* Instructions included into a chain. */ ++ bitmap insns; ++ /* All registers defined by a chain. */ ++ bitmap defs; ++ /* Registers used in both vector and sclar modes. */ ++ bitmap defs_conv; ++ ++ void build (bitmap candidates, unsigned insn_uid); ++ virtual int compute_convert_gain () = 0; ++ int convert (); ++ ++ protected: ++ void add_to_queue (unsigned insn_uid); ++ void emit_conversion_insns (rtx insns, rtx_insn *pos); ++ ++ private: ++ void add_insn (bitmap candidates, unsigned insn_uid); ++ void analyze_register_chain (bitmap candidates, df_ref ref); ++ virtual void mark_dual_mode_def (df_ref def) = 0; ++ virtual void convert_insn (rtx_insn *insn) = 0; ++ virtual void convert_registers () = 0; ++}; ++ ++class dimode_scalar_chain : public scalar_chain ++{ ++ public: ++ int compute_convert_gain (); ++ private: ++ void mark_dual_mode_def (df_ref def); ++ rtx replace_with_subreg (rtx x, rtx reg, rtx subreg); ++ void replace_with_subreg_in_insn (rtx_insn *insn, rtx reg, rtx subreg); ++ void convert_insn (rtx_insn *insn); ++ void convert_op (rtx *op, rtx_insn *insn); ++ void convert_reg (unsigned regno); ++ void make_vector_copies (unsigned regno); ++ void convert_registers (); ++ int vector_const_cost (rtx exp); ++}; ++ ++class timode_scalar_chain : public scalar_chain ++{ ++ public: ++ /* Convert from TImode to V1TImode is always faster. */ ++ int compute_convert_gain () { return 1; } ++ ++ private: ++ void mark_dual_mode_def (df_ref def); ++ void fix_debug_reg_uses (rtx reg); ++ void convert_insn (rtx_insn *insn); ++ /* We don't convert registers to difference size. */ ++ void convert_registers () {} ++}; ++ ++} // anon namespace ++ ++bool ix86_save_reg (unsigned int regno, bool maybe_eh_return, bool ignore_outlined); ++int ix86_compare_version_priority (tree decl1, tree decl2); ++tree ix86_generate_version_dispatcher_body (void *node_p); ++tree ix86_get_function_versions_dispatcher (void *decl); ++tree ix86_mangle_decl_assembler_name (tree decl, tree id); ++ ++ ++#endif /* GCC_I386_FEATURES_H */ +diff --git a/gcc/config/i386/i386-options.c b/gcc/config/i386/i386-options.c +new file mode 100644 +index 000000000..4a03bead8 +--- /dev/null ++++ b/gcc/config/i386/i386-options.c +@@ -0,0 +1,3707 @@ ++/* Copyright (C) 1988-2019 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++#define IN_TARGET_CODE 1 ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "backend.h" ++#include "rtl.h" ++#include "tree.h" ++#include "memmodel.h" ++#include "gimple.h" ++#include "cfghooks.h" ++#include "cfgloop.h" ++#include "df.h" ++#include "tm_p.h" ++#include "stringpool.h" ++#include "expmed.h" ++#include "optabs.h" ++#include "regs.h" ++#include "emit-rtl.h" ++#include "recog.h" ++#include "cgraph.h" ++#include "diagnostic.h" ++#include "cfgbuild.h" ++#include "alias.h" ++#include "fold-const.h" ++#include "attribs.h" ++#include "calls.h" ++#include "stor-layout.h" ++#include "varasm.h" ++#include "output.h" ++#include "insn-attr.h" ++#include "flags.h" ++#include "except.h" ++#include "explow.h" ++#include "expr.h" ++#include "cfgrtl.h" ++#include "common/common-target.h" ++#include "langhooks.h" ++#include "reload.h" ++#include "gimplify.h" ++#include "dwarf2.h" ++#include "tm-constrs.h" ++#include "params.h" ++#include "cselib.h" ++#include "sched-int.h" ++#include "opts.h" ++#include "tree-pass.h" ++#include "context.h" ++#include "pass_manager.h" ++#include "target-globals.h" ++#include "gimple-iterator.h" ++#include "tree-vectorizer.h" ++#include "shrink-wrap.h" ++#include "builtins.h" ++#include "rtl-iter.h" ++#include "tree-iterator.h" ++#include "dbgcnt.h" ++#include "case-cfn-macros.h" ++#include "dojump.h" ++#include "fold-const-call.h" ++#include "tree-vrp.h" ++#include "tree-ssanames.h" ++#include "selftest.h" ++#include "selftest-rtl.h" ++#include "print-rtl.h" ++#include "intl.h" ++#include "ifcvt.h" ++#include "symbol-summary.h" ++#include "ipa-prop.h" ++#include "ipa-fnsummary.h" ++#include "wide-int-bitmask.h" ++#include "tree-vector-builder.h" ++#include "debug.h" ++#include "dwarf2out.h" ++#include "i386-options.h" ++ ++#include "x86-tune-costs.h" ++ ++#ifndef SUBTARGET32_DEFAULT_CPU ++#define SUBTARGET32_DEFAULT_CPU "i386" ++#endif ++ ++/* Processor feature/optimization bitmasks. */ ++#define m_386 (HOST_WIDE_INT_1U< 70) ++ { ++ *ptr++ = '\\'; ++ *ptr++ = '\n'; ++ line_len = 0; ++ } ++ } ++ ++ for (j = 0; j < 2; j++) ++ if (opts[i][j]) ++ { ++ memcpy (ptr, opts[i][j], len2[j]); ++ ptr += len2[j]; ++ line_len += len2[j]; ++ } ++ } ++ ++ *ptr = '\0'; ++ gcc_assert (ret + len >= ptr); ++ ++ return ret; ++} ++ ++/* Function that is callable from the debugger to print the current ++ options. */ ++void ATTRIBUTE_UNUSED ++ix86_debug_options (void) ++{ ++ char *opts = ix86_target_string (ix86_isa_flags, ix86_isa_flags2, ++ target_flags, ix86_target_flags, ++ ix86_arch_string,ix86_tune_string, ++ ix86_fpmath, true, true); ++ ++ if (opts) ++ { ++ fprintf (stderr, "%s\n\n", opts); ++ free (opts); ++ } ++ else ++ fputs ("\n\n", stderr); ++ ++ return; ++} ++ ++/* Save the current options */ ++ ++void ++ix86_function_specific_save (struct cl_target_option *ptr, ++ struct gcc_options *opts) ++{ ++ ptr->arch = ix86_arch; ++ ptr->schedule = ix86_schedule; ++ ptr->prefetch_sse = x86_prefetch_sse; ++ ptr->tune = ix86_tune; ++ ptr->branch_cost = ix86_branch_cost; ++ ptr->tune_defaulted = ix86_tune_defaulted; ++ ptr->arch_specified = ix86_arch_specified; ++ ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit; ++ ptr->x_ix86_isa_flags2_explicit = opts->x_ix86_isa_flags2_explicit; ++ ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit; ++ ptr->x_ix86_arch_string = opts->x_ix86_arch_string; ++ ptr->x_ix86_tune_string = opts->x_ix86_tune_string; ++ ptr->x_ix86_cmodel = opts->x_ix86_cmodel; ++ ptr->x_ix86_abi = opts->x_ix86_abi; ++ ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect; ++ ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost; ++ ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes; ++ ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer; ++ ptr->x_ix86_force_drap = opts->x_ix86_force_drap; ++ ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg; ++ ptr->x_ix86_pmode = opts->x_ix86_pmode; ++ ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg; ++ ptr->x_ix86_recip_name = opts->x_ix86_recip_name; ++ ptr->x_ix86_regparm = opts->x_ix86_regparm; ++ ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold; ++ ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx; ++ ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard; ++ ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg; ++ ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect; ++ ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string; ++ ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy; ++ ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy; ++ ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default; ++ ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type; ++ ++ /* The fields are char but the variables are not; make sure the ++ values fit in the fields. */ ++ gcc_assert (ptr->arch == ix86_arch); ++ gcc_assert (ptr->schedule == ix86_schedule); ++ gcc_assert (ptr->tune == ix86_tune); ++ gcc_assert (ptr->branch_cost == ix86_branch_cost); ++} ++ ++/* Feature tests against the various architecture variations, used to create ++ ix86_arch_features based on the processor mask. */ ++static unsigned HOST_WIDE_INT initial_ix86_arch_features[X86_ARCH_LAST] = { ++ /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */ ++ ~(m_386 | m_486 | m_PENT | m_LAKEMONT | m_K6), ++ ++ /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */ ++ ~m_386, ++ ++ /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */ ++ ~(m_386 | m_486), ++ ++ /* X86_ARCH_XADD: Exchange and add was added for 80486. */ ++ ~m_386, ++ ++ /* X86_ARCH_BSWAP: Byteswap was added for 80486. */ ++ ~m_386, ++}; ++ ++/* This table must be in sync with enum processor_type in i386.h. */ ++static const struct processor_costs *processor_cost_table[] = ++{ ++ &generic_cost, ++ &i386_cost, ++ &i486_cost, ++ &pentium_cost, ++ &lakemont_cost, ++ &pentiumpro_cost, ++ &pentium4_cost, ++ &nocona_cost, ++ &core_cost, ++ &core_cost, ++ &core_cost, ++ &core_cost, ++ &atom_cost, ++ &slm_cost, ++ &slm_cost, ++ &slm_cost, ++ &slm_cost, ++ &slm_cost, ++ &slm_cost, ++ &skylake_cost, ++ &skylake_cost, ++ &skylake_cost, ++ &skylake_cost, ++ &skylake_cost, ++ &skylake_cost, ++ &intel_cost, ++ &geode_cost, ++ &k6_cost, ++ &athlon_cost, ++ &k8_cost, ++ &amdfam10_cost, ++ &bdver_cost, ++ &bdver_cost, ++ &bdver_cost, ++ &bdver_cost, ++ &btver1_cost, ++ &btver2_cost, ++ &znver1_cost, ++ &znver2_cost ++}; ++ ++/* Guarantee that the array is aligned with enum processor_type. */ ++STATIC_ASSERT (ARRAY_SIZE (processor_cost_table) == PROCESSOR_max); ++ ++static bool ++ix86_option_override_internal (bool main_args_p, ++ struct gcc_options *opts, ++ struct gcc_options *opts_set); ++static void ++set_ix86_tune_features (enum processor_type ix86_tune, bool dump); ++ ++/* Restore the current options */ ++ ++void ++ix86_function_specific_restore (struct gcc_options *opts, ++ struct cl_target_option *ptr) ++{ ++ enum processor_type old_tune = ix86_tune; ++ enum processor_type old_arch = ix86_arch; ++ unsigned HOST_WIDE_INT ix86_arch_mask; ++ int i; ++ ++ /* We don't change -fPIC. */ ++ opts->x_flag_pic = flag_pic; ++ ++ ix86_arch = (enum processor_type) ptr->arch; ++ ix86_schedule = (enum attr_cpu) ptr->schedule; ++ ix86_tune = (enum processor_type) ptr->tune; ++ x86_prefetch_sse = ptr->prefetch_sse; ++ opts->x_ix86_branch_cost = ptr->branch_cost; ++ ix86_tune_defaulted = ptr->tune_defaulted; ++ ix86_arch_specified = ptr->arch_specified; ++ opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit; ++ opts->x_ix86_isa_flags2_explicit = ptr->x_ix86_isa_flags2_explicit; ++ opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit; ++ opts->x_ix86_arch_string = ptr->x_ix86_arch_string; ++ opts->x_ix86_tune_string = ptr->x_ix86_tune_string; ++ opts->x_ix86_cmodel = ptr->x_ix86_cmodel; ++ opts->x_ix86_abi = ptr->x_ix86_abi; ++ opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect; ++ opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost; ++ opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes; ++ opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer; ++ opts->x_ix86_force_drap = ptr->x_ix86_force_drap; ++ opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg; ++ opts->x_ix86_pmode = ptr->x_ix86_pmode; ++ opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg; ++ opts->x_ix86_recip_name = ptr->x_ix86_recip_name; ++ opts->x_ix86_regparm = ptr->x_ix86_regparm; ++ opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold; ++ opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx; ++ opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard; ++ opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg; ++ opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect; ++ opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string; ++ opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy; ++ opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy; ++ opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default; ++ opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type; ++ ix86_tune_cost = processor_cost_table[ix86_tune]; ++ /* TODO: ix86_cost should be chosen at instruction or function granuality ++ so for cold code we use size_cost even in !optimize_size compilation. */ ++ if (opts->x_optimize_size) ++ ix86_cost = &ix86_size_cost; ++ else ++ ix86_cost = ix86_tune_cost; ++ ++ /* Recreate the arch feature tests if the arch changed */ ++ if (old_arch != ix86_arch) ++ { ++ ix86_arch_mask = HOST_WIDE_INT_1U << ix86_arch; ++ for (i = 0; i < X86_ARCH_LAST; ++i) ++ ix86_arch_features[i] ++ = !!(initial_ix86_arch_features[i] & ix86_arch_mask); ++ } ++ ++ /* Recreate the tune optimization tests */ ++ if (old_tune != ix86_tune) ++ set_ix86_tune_features (ix86_tune, false); ++} ++ ++/* Adjust target options after streaming them in. This is mainly about ++ reconciling them with global options. */ ++ ++void ++ix86_function_specific_post_stream_in (struct cl_target_option *ptr) ++{ ++ /* flag_pic is a global option, but ix86_cmodel is target saved option ++ partly computed from flag_pic. If flag_pic is on, adjust x_ix86_cmodel ++ for PIC, or error out. */ ++ if (flag_pic) ++ switch (ptr->x_ix86_cmodel) ++ { ++ case CM_SMALL: ++ ptr->x_ix86_cmodel = CM_SMALL_PIC; ++ break; ++ ++ case CM_MEDIUM: ++ ptr->x_ix86_cmodel = CM_MEDIUM_PIC; ++ break; ++ ++ case CM_LARGE: ++ ptr->x_ix86_cmodel = CM_LARGE_PIC; ++ break; ++ ++ case CM_KERNEL: ++ error ("code model %s does not support PIC mode", "kernel"); ++ break; ++ ++ default: ++ break; ++ } ++ else ++ switch (ptr->x_ix86_cmodel) ++ { ++ case CM_SMALL_PIC: ++ ptr->x_ix86_cmodel = CM_SMALL; ++ break; ++ ++ case CM_MEDIUM_PIC: ++ ptr->x_ix86_cmodel = CM_MEDIUM; ++ break; ++ ++ case CM_LARGE_PIC: ++ ptr->x_ix86_cmodel = CM_LARGE; ++ break; ++ ++ default: ++ break; ++ } ++} ++ ++/* Print the current options */ ++ ++void ++ix86_function_specific_print (FILE *file, int indent, ++ struct cl_target_option *ptr) ++{ ++ char *target_string ++ = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_ix86_isa_flags2, ++ ptr->x_target_flags, ptr->x_ix86_target_flags, ++ NULL, NULL, ptr->x_ix86_fpmath, false, true); ++ ++ gcc_assert (ptr->arch < PROCESSOR_max); ++ fprintf (file, "%*sarch = %d (%s)\n", ++ indent, "", ++ ptr->arch, processor_names[ptr->arch]); ++ ++ gcc_assert (ptr->tune < PROCESSOR_max); ++ fprintf (file, "%*stune = %d (%s)\n", ++ indent, "", ++ ptr->tune, processor_names[ptr->tune]); ++ ++ fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost); ++ ++ if (target_string) ++ { ++ fprintf (file, "%*s%s\n", indent, "", target_string); ++ free (target_string); ++ } ++} ++ ++ ++/* Inner function to process the attribute((target(...))), take an argument and ++ set the current options from the argument. If we have a list, recursively go ++ over the list. */ ++ ++static bool ++ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[], ++ struct gcc_options *opts, ++ struct gcc_options *opts_set, ++ struct gcc_options *enum_opts_set, ++ bool target_clone_attr) ++{ ++ char *next_optstr; ++ bool ret = true; ++ ++#define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 } ++#define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 } ++#define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 } ++#define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M } ++#define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M } ++ ++ enum ix86_opt_type ++ { ++ ix86_opt_unknown, ++ ix86_opt_yes, ++ ix86_opt_no, ++ ix86_opt_str, ++ ix86_opt_enum, ++ ix86_opt_isa ++ }; ++ ++ static const struct ++ { ++ const char *string; ++ size_t len; ++ enum ix86_opt_type type; ++ int opt; ++ int mask; ++ } attrs[] = { ++ /* isa options */ ++ IX86_ATTR_ISA ("pconfig", OPT_mpconfig), ++ IX86_ATTR_ISA ("wbnoinvd", OPT_mwbnoinvd), ++ IX86_ATTR_ISA ("sgx", OPT_msgx), ++ IX86_ATTR_ISA ("avx5124fmaps", OPT_mavx5124fmaps), ++ IX86_ATTR_ISA ("avx5124vnniw", OPT_mavx5124vnniw), ++ IX86_ATTR_ISA ("avx512vpopcntdq", OPT_mavx512vpopcntdq), ++ IX86_ATTR_ISA ("avx512vbmi2", OPT_mavx512vbmi2), ++ IX86_ATTR_ISA ("avx512vnni", OPT_mavx512vnni), ++ IX86_ATTR_ISA ("avx512bitalg", OPT_mavx512bitalg), ++ ++ IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi), ++ IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma), ++ IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl), ++ IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw), ++ IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq), ++ IX86_ATTR_ISA ("avx512er", OPT_mavx512er), ++ IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf), ++ IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd), ++ IX86_ATTR_ISA ("avx512f", OPT_mavx512f), ++ IX86_ATTR_ISA ("avx2", OPT_mavx2), ++ IX86_ATTR_ISA ("fma", OPT_mfma), ++ IX86_ATTR_ISA ("xop", OPT_mxop), ++ IX86_ATTR_ISA ("fma4", OPT_mfma4), ++ IX86_ATTR_ISA ("f16c", OPT_mf16c), ++ IX86_ATTR_ISA ("avx", OPT_mavx), ++ IX86_ATTR_ISA ("sse4", OPT_msse4), ++ IX86_ATTR_ISA ("sse4.2", OPT_msse4_2), ++ IX86_ATTR_ISA ("sse4.1", OPT_msse4_1), ++ IX86_ATTR_ISA ("sse4a", OPT_msse4a), ++ IX86_ATTR_ISA ("ssse3", OPT_mssse3), ++ IX86_ATTR_ISA ("sse3", OPT_msse3), ++ IX86_ATTR_ISA ("aes", OPT_maes), ++ IX86_ATTR_ISA ("sha", OPT_msha), ++ IX86_ATTR_ISA ("pclmul", OPT_mpclmul), ++ IX86_ATTR_ISA ("sse2", OPT_msse2), ++ IX86_ATTR_ISA ("sse", OPT_msse), ++ IX86_ATTR_ISA ("3dnowa", OPT_m3dnowa), ++ IX86_ATTR_ISA ("3dnow", OPT_m3dnow), ++ IX86_ATTR_ISA ("mmx", OPT_mmmx), ++ IX86_ATTR_ISA ("rtm", OPT_mrtm), ++ IX86_ATTR_ISA ("prfchw", OPT_mprfchw), ++ IX86_ATTR_ISA ("rdseed", OPT_mrdseed), ++ IX86_ATTR_ISA ("adx", OPT_madx), ++ IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1), ++ IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt), ++ IX86_ATTR_ISA ("xsaves", OPT_mxsaves), ++ IX86_ATTR_ISA ("xsavec", OPT_mxsavec), ++ IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt), ++ IX86_ATTR_ISA ("xsave", OPT_mxsave), ++ IX86_ATTR_ISA ("abm", OPT_mabm), ++ IX86_ATTR_ISA ("bmi", OPT_mbmi), ++ IX86_ATTR_ISA ("bmi2", OPT_mbmi2), ++ IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt), ++ IX86_ATTR_ISA ("tbm", OPT_mtbm), ++ IX86_ATTR_ISA ("popcnt", OPT_mpopcnt), ++ IX86_ATTR_ISA ("cx16", OPT_mcx16), ++ IX86_ATTR_ISA ("sahf", OPT_msahf), ++ IX86_ATTR_ISA ("movbe", OPT_mmovbe), ++ IX86_ATTR_ISA ("crc32", OPT_mcrc32), ++ IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase), ++ IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd), ++ IX86_ATTR_ISA ("mwaitx", OPT_mmwaitx), ++ IX86_ATTR_ISA ("clzero", OPT_mclzero), ++ IX86_ATTR_ISA ("pku", OPT_mpku), ++ IX86_ATTR_ISA ("lwp", OPT_mlwp), ++ IX86_ATTR_ISA ("hle", OPT_mhle), ++ IX86_ATTR_ISA ("fxsr", OPT_mfxsr), ++ IX86_ATTR_ISA ("clwb", OPT_mclwb), ++ IX86_ATTR_ISA ("rdpid", OPT_mrdpid), ++ IX86_ATTR_ISA ("gfni", OPT_mgfni), ++ IX86_ATTR_ISA ("shstk", OPT_mshstk), ++ IX86_ATTR_ISA ("vaes", OPT_mvaes), ++ IX86_ATTR_ISA ("vpclmulqdq", OPT_mvpclmulqdq), ++ IX86_ATTR_ISA ("movdiri", OPT_mmovdiri), ++ IX86_ATTR_ISA ("movdir64b", OPT_mmovdir64b), ++ IX86_ATTR_ISA ("waitpkg", OPT_mwaitpkg), ++ IX86_ATTR_ISA ("cldemote", OPT_mcldemote), ++ IX86_ATTR_ISA ("ptwrite", OPT_mptwrite), ++ ++ /* enum options */ ++ IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_), ++ ++ /* string options */ ++ IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH), ++ IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE), ++ ++ /* flag options */ ++ IX86_ATTR_YES ("cld", ++ OPT_mcld, ++ MASK_CLD), ++ ++ IX86_ATTR_NO ("fancy-math-387", ++ OPT_mfancy_math_387, ++ MASK_NO_FANCY_MATH_387), ++ ++ IX86_ATTR_YES ("ieee-fp", ++ OPT_mieee_fp, ++ MASK_IEEE_FP), ++ ++ IX86_ATTR_YES ("inline-all-stringops", ++ OPT_minline_all_stringops, ++ MASK_INLINE_ALL_STRINGOPS), ++ ++ IX86_ATTR_YES ("inline-stringops-dynamically", ++ OPT_minline_stringops_dynamically, ++ MASK_INLINE_STRINGOPS_DYNAMICALLY), ++ ++ IX86_ATTR_NO ("align-stringops", ++ OPT_mno_align_stringops, ++ MASK_NO_ALIGN_STRINGOPS), ++ ++ IX86_ATTR_YES ("recip", ++ OPT_mrecip, ++ MASK_RECIP), ++ }; ++ ++ location_t loc ++ = fndecl == NULL ? UNKNOWN_LOCATION : DECL_SOURCE_LOCATION (fndecl); ++ const char *attr_name = target_clone_attr ? "target_clone" : "target"; ++ ++ /* If this is a list, recurse to get the options. */ ++ if (TREE_CODE (args) == TREE_LIST) ++ { ++ bool ret = true; ++ ++ for (; args; args = TREE_CHAIN (args)) ++ if (TREE_VALUE (args) ++ && !ix86_valid_target_attribute_inner_p (fndecl, TREE_VALUE (args), ++ p_strings, opts, opts_set, ++ enum_opts_set, ++ target_clone_attr)) ++ ret = false; ++ ++ return ret; ++ } ++ ++ else if (TREE_CODE (args) != STRING_CST) ++ { ++ error_at (loc, "attribute %qs argument is not a string", attr_name); ++ return false; ++ } ++ ++ /* Handle multiple arguments separated by commas. */ ++ next_optstr = ASTRDUP (TREE_STRING_POINTER (args)); ++ ++ while (next_optstr && *next_optstr != '\0') ++ { ++ char *p = next_optstr; ++ char *orig_p = p; ++ char *comma = strchr (next_optstr, ','); ++ size_t len, opt_len; ++ int opt; ++ bool opt_set_p; ++ char ch; ++ unsigned i; ++ enum ix86_opt_type type = ix86_opt_unknown; ++ int mask = 0; ++ ++ if (comma) ++ { ++ *comma = '\0'; ++ len = comma - next_optstr; ++ next_optstr = comma + 1; ++ } ++ else ++ { ++ len = strlen (p); ++ next_optstr = NULL; ++ } ++ ++ /* Recognize no-xxx. */ ++ if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-') ++ { ++ opt_set_p = false; ++ p += 3; ++ len -= 3; ++ } ++ else ++ opt_set_p = true; ++ ++ /* Find the option. */ ++ ch = *p; ++ opt = N_OPTS; ++ for (i = 0; i < ARRAY_SIZE (attrs); i++) ++ { ++ type = attrs[i].type; ++ opt_len = attrs[i].len; ++ if (ch == attrs[i].string[0] ++ && ((type != ix86_opt_str && type != ix86_opt_enum) ++ ? len == opt_len ++ : len > opt_len) ++ && memcmp (p, attrs[i].string, opt_len) == 0) ++ { ++ opt = attrs[i].opt; ++ mask = attrs[i].mask; ++ break; ++ } ++ } ++ ++ /* Process the option. */ ++ if (opt == N_OPTS) ++ { ++ error_at (loc, "attribute %qs argument %qs is unknown", ++ orig_p, attr_name); ++ ret = false; ++ } ++ ++ else if (type == ix86_opt_isa) ++ { ++ struct cl_decoded_option decoded; ++ ++ generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded); ++ ix86_handle_option (opts, opts_set, ++ &decoded, input_location); ++ } ++ ++ else if (type == ix86_opt_yes || type == ix86_opt_no) ++ { ++ if (type == ix86_opt_no) ++ opt_set_p = !opt_set_p; ++ ++ if (opt_set_p) ++ opts->x_target_flags |= mask; ++ else ++ opts->x_target_flags &= ~mask; ++ } ++ ++ else if (type == ix86_opt_str) ++ { ++ if (p_strings[opt]) ++ { ++ error_at (loc, "attribute value %qs was already specified " ++ "in %qs attribute", orig_p, attr_name); ++ ret = false; ++ } ++ else ++ { ++ p_strings[opt] = xstrdup (p + opt_len); ++ if (opt == IX86_FUNCTION_SPECIFIC_ARCH) ++ { ++ /* If arch= is set, clear all bits in x_ix86_isa_flags, ++ except for ISA_64BIT, ABI_64, ABI_X32, and CODE16 ++ and all bits in x_ix86_isa_flags2. */ ++ opts->x_ix86_isa_flags &= (OPTION_MASK_ISA_64BIT ++ | OPTION_MASK_ABI_64 ++ | OPTION_MASK_ABI_X32 ++ | OPTION_MASK_CODE16); ++ opts->x_ix86_isa_flags_explicit &= (OPTION_MASK_ISA_64BIT ++ | OPTION_MASK_ABI_64 ++ | OPTION_MASK_ABI_X32 ++ | OPTION_MASK_CODE16); ++ opts->x_ix86_isa_flags2 = 0; ++ opts->x_ix86_isa_flags2_explicit = 0; ++ } ++ } ++ } ++ ++ else if (type == ix86_opt_enum) ++ { ++ bool arg_ok; ++ int value; ++ ++ arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET); ++ if (arg_ok) ++ set_option (opts, enum_opts_set, opt, value, ++ p + opt_len, DK_UNSPECIFIED, input_location, ++ global_dc); ++ else ++ { ++ error_at (loc, "attribute value %qs is unknown in %qs attribute", ++ orig_p, attr_name); ++ ret = false; ++ } ++ } ++ ++ else ++ gcc_unreachable (); ++ } ++ ++ return ret; ++} ++ ++/* Release allocated strings. */ ++static void ++release_options_strings (char **option_strings) ++{ ++ /* Free up memory allocated to hold the strings */ ++ for (unsigned i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++) ++ free (option_strings[i]); ++} ++ ++/* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */ ++ ++tree ++ix86_valid_target_attribute_tree (tree fndecl, tree args, ++ struct gcc_options *opts, ++ struct gcc_options *opts_set, ++ bool target_clone_attr) ++{ ++ const char *orig_arch_string = opts->x_ix86_arch_string; ++ const char *orig_tune_string = opts->x_ix86_tune_string; ++ enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath; ++ int orig_tune_defaulted = ix86_tune_defaulted; ++ int orig_arch_specified = ix86_arch_specified; ++ char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL }; ++ tree t = NULL_TREE; ++ struct cl_target_option *def ++ = TREE_TARGET_OPTION (target_option_default_node); ++ struct gcc_options enum_opts_set; ++ ++ memset (&enum_opts_set, 0, sizeof (enum_opts_set)); ++ ++ /* Process each of the options on the chain. */ ++ if (!ix86_valid_target_attribute_inner_p (fndecl, args, option_strings, opts, ++ opts_set, &enum_opts_set, ++ target_clone_attr)) ++ return error_mark_node; ++ ++ /* If the changed options are different from the default, rerun ++ ix86_option_override_internal, and then save the options away. ++ The string options are attribute options, and will be undone ++ when we copy the save structure. */ ++ if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags ++ || opts->x_ix86_isa_flags2 != def->x_ix86_isa_flags2 ++ || opts->x_target_flags != def->x_target_flags ++ || option_strings[IX86_FUNCTION_SPECIFIC_ARCH] ++ || option_strings[IX86_FUNCTION_SPECIFIC_TUNE] ++ || enum_opts_set.x_ix86_fpmath) ++ { ++ /* If we are using the default tune= or arch=, undo the string assigned, ++ and use the default. */ ++ if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH]) ++ opts->x_ix86_arch_string ++ = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_ARCH]); ++ else if (!orig_arch_specified) ++ opts->x_ix86_arch_string = NULL; ++ ++ if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE]) ++ opts->x_ix86_tune_string ++ = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_TUNE]); ++ else if (orig_tune_defaulted) ++ opts->x_ix86_tune_string = NULL; ++ ++ /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */ ++ if (enum_opts_set.x_ix86_fpmath) ++ opts_set->x_ix86_fpmath = (enum fpmath_unit) 1; ++ ++ /* Do any overrides, such as arch=xxx, or tune=xxx support. */ ++ bool r = ix86_option_override_internal (false, opts, opts_set); ++ if (!r) ++ { ++ release_options_strings (option_strings); ++ return error_mark_node; ++ } ++ ++ /* Add any builtin functions with the new isa if any. */ ++ ix86_add_new_builtins (opts->x_ix86_isa_flags, opts->x_ix86_isa_flags2); ++ ++ /* Save the current options unless we are validating options for ++ #pragma. */ ++ t = build_target_option_node (opts); ++ ++ opts->x_ix86_arch_string = orig_arch_string; ++ opts->x_ix86_tune_string = orig_tune_string; ++ opts_set->x_ix86_fpmath = orig_fpmath_set; ++ ++ release_options_strings (option_strings); ++ } ++ ++ return t; ++} ++ ++/* Hook to validate attribute((target("string"))). */ ++ ++bool ++ix86_valid_target_attribute_p (tree fndecl, ++ tree ARG_UNUSED (name), ++ tree args, ++ int flags) ++{ ++ struct gcc_options func_options; ++ tree new_target, new_optimize; ++ bool ret = true; ++ ++ /* attribute((target("default"))) does nothing, beyond ++ affecting multi-versioning. */ ++ if (TREE_VALUE (args) ++ && TREE_CODE (TREE_VALUE (args)) == STRING_CST ++ && TREE_CHAIN (args) == NULL_TREE ++ && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0) ++ return true; ++ ++ tree old_optimize = build_optimization_node (&global_options); ++ ++ /* Get the optimization options of the current function. */ ++ tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl); ++ ++ if (!func_optimize) ++ func_optimize = old_optimize; ++ ++ /* Init func_options. */ ++ memset (&func_options, 0, sizeof (func_options)); ++ init_options_struct (&func_options, NULL); ++ lang_hooks.init_options_struct (&func_options); ++ ++ cl_optimization_restore (&func_options, ++ TREE_OPTIMIZATION (func_optimize)); ++ ++ /* Initialize func_options to the default before its target options can ++ be set. */ ++ cl_target_option_restore (&func_options, ++ TREE_TARGET_OPTION (target_option_default_node)); ++ ++ /* FLAGS == 1 is used for target_clones attribute. */ ++ new_target ++ = ix86_valid_target_attribute_tree (fndecl, args, &func_options, ++ &global_options_set, flags == 1); ++ ++ new_optimize = build_optimization_node (&func_options); ++ ++ if (new_target == error_mark_node) ++ ret = false; ++ ++ else if (fndecl && new_target) ++ { ++ DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target; ++ ++ if (old_optimize != new_optimize) ++ DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize; ++ } ++ ++ finalize_options_struct (&func_options); ++ ++ return ret; ++} ++ ++const char *stringop_alg_names[] = { ++#define DEF_ENUM ++#define DEF_ALG(alg, name) #name, ++#include "stringop.def" ++#undef DEF_ENUM ++#undef DEF_ALG ++}; ++ ++/* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=. ++ The string is of the following form (or comma separated list of it): ++ ++ strategy_alg:max_size:[align|noalign] ++ ++ where the full size range for the strategy is either [0, max_size] or ++ [min_size, max_size], in which min_size is the max_size + 1 of the ++ preceding range. The last size range must have max_size == -1. ++ ++ Examples: ++ ++ 1. ++ -mmemcpy-strategy=libcall:-1:noalign ++ ++ this is equivalent to (for known size memcpy) -mstringop-strategy=libcall ++ ++ ++ 2. ++ -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign ++ ++ This is to tell the compiler to use the following strategy for memset ++ 1) when the expected size is between [1, 16], use rep_8byte strategy; ++ 2) when the size is between [17, 2048], use vector_loop; ++ 3) when the size is > 2048, use libcall. */ ++ ++struct stringop_size_range ++{ ++ int max; ++ stringop_alg alg; ++ bool noalign; ++}; ++ ++static void ++ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset) ++{ ++ const struct stringop_algs *default_algs; ++ stringop_size_range input_ranges[MAX_STRINGOP_ALGS]; ++ char *curr_range_str, *next_range_str; ++ const char *opt = is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy="; ++ int i = 0, n = 0; ++ ++ if (is_memset) ++ default_algs = &ix86_cost->memset[TARGET_64BIT != 0]; ++ else ++ default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0]; ++ ++ curr_range_str = strategy_str; ++ ++ do ++ { ++ int maxs; ++ char alg_name[128]; ++ char align[16]; ++ next_range_str = strchr (curr_range_str, ','); ++ if (next_range_str) ++ *next_range_str++ = '\0'; ++ ++ if (sscanf (curr_range_str, "%20[^:]:%d:%10s", alg_name, &maxs, ++ align) != 3) ++ { ++ error ("wrong argument %qs to option %qs", curr_range_str, opt); ++ return; ++ } ++ ++ if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1)) ++ { ++ error ("size ranges of option %qs should be increasing", opt); ++ return; ++ } ++ ++ for (i = 0; i < last_alg; i++) ++ if (!strcmp (alg_name, stringop_alg_names[i])) ++ break; ++ ++ if (i == last_alg) ++ { ++ error ("wrong strategy name %qs specified for option %qs", ++ alg_name, opt); ++ ++ auto_vec candidates; ++ for (i = 0; i < last_alg; i++) ++ if ((stringop_alg) i != rep_prefix_8_byte || TARGET_64BIT) ++ candidates.safe_push (stringop_alg_names[i]); ++ ++ char *s; ++ const char *hint ++ = candidates_list_and_hint (alg_name, s, candidates); ++ if (hint) ++ inform (input_location, ++ "valid arguments to %qs are: %s; did you mean %qs?", ++ opt, s, hint); ++ else ++ inform (input_location, "valid arguments to %qs are: %s", ++ opt, s); ++ XDELETEVEC (s); ++ return; ++ } ++ ++ if ((stringop_alg) i == rep_prefix_8_byte ++ && !TARGET_64BIT) ++ { ++ /* rep; movq isn't available in 32-bit code. */ ++ error ("strategy name %qs specified for option %qs " ++ "not supported for 32-bit code", alg_name, opt); ++ return; ++ } ++ ++ input_ranges[n].max = maxs; ++ input_ranges[n].alg = (stringop_alg) i; ++ if (!strcmp (align, "align")) ++ input_ranges[n].noalign = false; ++ else if (!strcmp (align, "noalign")) ++ input_ranges[n].noalign = true; ++ else ++ { ++ error ("unknown alignment %qs specified for option %qs", align, opt); ++ return; ++ } ++ n++; ++ curr_range_str = next_range_str; ++ } ++ while (curr_range_str); ++ ++ if (input_ranges[n - 1].max != -1) ++ { ++ error ("the max value for the last size range should be -1" ++ " for option %qs", opt); ++ return; ++ } ++ ++ if (n > MAX_STRINGOP_ALGS) ++ { ++ error ("too many size ranges specified in option %qs", opt); ++ return; ++ } ++ ++ /* Now override the default algs array. */ ++ for (i = 0; i < n; i++) ++ { ++ *const_cast(&default_algs->size[i].max) = input_ranges[i].max; ++ *const_cast(&default_algs->size[i].alg) ++ = input_ranges[i].alg; ++ *const_cast(&default_algs->size[i].noalign) ++ = input_ranges[i].noalign; ++ } ++} ++ ++ ++/* parse -mtune-ctrl= option. When DUMP is true, ++ print the features that are explicitly set. */ ++ ++static void ++parse_mtune_ctrl_str (bool dump) ++{ ++ if (!ix86_tune_ctrl_string) ++ return; ++ ++ char *next_feature_string = NULL; ++ char *curr_feature_string = xstrdup (ix86_tune_ctrl_string); ++ char *orig = curr_feature_string; ++ int i; ++ do ++ { ++ bool clear = false; ++ ++ next_feature_string = strchr (curr_feature_string, ','); ++ if (next_feature_string) ++ *next_feature_string++ = '\0'; ++ if (*curr_feature_string == '^') ++ { ++ curr_feature_string++; ++ clear = true; ++ } ++ for (i = 0; i < X86_TUNE_LAST; i++) ++ { ++ if (!strcmp (curr_feature_string, ix86_tune_feature_names[i])) ++ { ++ ix86_tune_features[i] = !clear; ++ if (dump) ++ fprintf (stderr, "Explicitly %s feature %s\n", ++ clear ? "clear" : "set", ix86_tune_feature_names[i]); ++ break; ++ } ++ } ++ if (i == X86_TUNE_LAST) ++ error ("unknown parameter to option %<-mtune-ctrl%>: %s", ++ clear ? curr_feature_string - 1 : curr_feature_string); ++ curr_feature_string = next_feature_string; ++ } ++ while (curr_feature_string); ++ free (orig); ++} ++ ++/* Helper function to set ix86_tune_features. IX86_TUNE is the ++ processor type. */ ++ ++static void ++set_ix86_tune_features (enum processor_type ix86_tune, bool dump) ++{ ++ unsigned HOST_WIDE_INT ix86_tune_mask = HOST_WIDE_INT_1U << ix86_tune; ++ int i; ++ ++ for (i = 0; i < X86_TUNE_LAST; ++i) ++ { ++ if (ix86_tune_no_default) ++ ix86_tune_features[i] = 0; ++ else ++ ix86_tune_features[i] ++ = !!(initial_ix86_tune_features[i] & ix86_tune_mask); ++ } ++ ++ if (dump) ++ { ++ fprintf (stderr, "List of x86 specific tuning parameter names:\n"); ++ for (i = 0; i < X86_TUNE_LAST; i++) ++ fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i], ++ ix86_tune_features[i] ? "on" : "off"); ++ } ++ ++ parse_mtune_ctrl_str (dump); ++} ++ ++ ++/* Default align_* from the processor table. */ ++ ++static void ++ix86_default_align (struct gcc_options *opts) ++{ ++ /* -falign-foo without argument: supply one. */ ++ if (opts->x_flag_align_loops && !opts->x_str_align_loops) ++ opts->x_str_align_loops = processor_cost_table[ix86_tune]->align_loop; ++ if (opts->x_flag_align_jumps && !opts->x_str_align_jumps) ++ opts->x_str_align_jumps = processor_cost_table[ix86_tune]->align_jump; ++ if (opts->x_flag_align_labels && !opts->x_str_align_labels) ++ opts->x_str_align_labels = processor_cost_table[ix86_tune]->align_label; ++ if (opts->x_flag_align_functions && !opts->x_str_align_functions) ++ opts->x_str_align_functions = processor_cost_table[ix86_tune]->align_func; ++} ++ ++/* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE hook. */ ++ ++void ++ix86_override_options_after_change (void) ++{ ++ ix86_default_align (&global_options); ++} ++ ++/* Clear stack slot assignments remembered from previous functions. ++ This is called from INIT_EXPANDERS once before RTL is emitted for each ++ function. */ ++ ++static struct machine_function * ++ix86_init_machine_status (void) ++{ ++ struct machine_function *f; ++ ++ f = ggc_cleared_alloc (); ++ f->call_abi = ix86_abi; ++ ++ return f; ++} ++ ++/* Override various settings based on options. If MAIN_ARGS_P, the ++ options are from the command line, otherwise they are from ++ attributes. Return true if there's an error related to march ++ option. */ ++ ++static bool ++ix86_option_override_internal (bool main_args_p, ++ struct gcc_options *opts, ++ struct gcc_options *opts_set) ++{ ++ int i; ++ unsigned HOST_WIDE_INT ix86_arch_mask; ++ const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL); ++ ++ /* -mrecip options. */ ++ static struct ++ { ++ const char *string; /* option name */ ++ unsigned int mask; /* mask bits to set */ ++ } ++ const recip_options[] = ++ { ++ { "all", RECIP_MASK_ALL }, ++ { "none", RECIP_MASK_NONE }, ++ { "div", RECIP_MASK_DIV }, ++ { "sqrt", RECIP_MASK_SQRT }, ++ { "vec-div", RECIP_MASK_VEC_DIV }, ++ { "vec-sqrt", RECIP_MASK_VEC_SQRT }, ++ }; ++ ++ ++ /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if ++ TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */ ++ if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags)) ++ opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32); ++#ifdef TARGET_BI_ARCH ++ else ++ { ++#if TARGET_BI_ARCH == 1 ++ /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64 ++ is on and OPTION_MASK_ABI_X32 is off. We turn off ++ OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by ++ -mx32. */ ++ if (TARGET_X32_P (opts->x_ix86_isa_flags)) ++ opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64; ++#else ++ /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is ++ on and OPTION_MASK_ABI_64 is off. We turn off ++ OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by ++ -m64 or OPTION_MASK_CODE16 is turned on by -m16. */ ++ if (TARGET_LP64_P (opts->x_ix86_isa_flags) ++ || TARGET_16BIT_P (opts->x_ix86_isa_flags)) ++ opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32; ++#endif ++ if (TARGET_64BIT_P (opts->x_ix86_isa_flags) ++ && TARGET_IAMCU_P (opts->x_target_flags)) ++ sorry ("Intel MCU psABI isn%'t supported in %s mode", ++ TARGET_X32_P (opts->x_ix86_isa_flags) ? "x32" : "64-bit"); ++ } ++#endif ++ ++ if (TARGET_X32_P (opts->x_ix86_isa_flags)) ++ { ++ /* Always turn on OPTION_MASK_ISA_64BIT and turn off ++ OPTION_MASK_ABI_64 for TARGET_X32. */ ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT; ++ opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64; ++ } ++ else if (TARGET_16BIT_P (opts->x_ix86_isa_flags)) ++ opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT ++ | OPTION_MASK_ABI_X32 ++ | OPTION_MASK_ABI_64); ++ else if (TARGET_LP64_P (opts->x_ix86_isa_flags)) ++ { ++ /* Always turn on OPTION_MASK_ISA_64BIT and turn off ++ OPTION_MASK_ABI_X32 for TARGET_LP64. */ ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT; ++ opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32; ++ } ++ ++#ifdef SUBTARGET_OVERRIDE_OPTIONS ++ SUBTARGET_OVERRIDE_OPTIONS; ++#endif ++ ++#ifdef SUBSUBTARGET_OVERRIDE_OPTIONS ++ SUBSUBTARGET_OVERRIDE_OPTIONS; ++#endif ++ ++ /* -fPIC is the default for x86_64. */ ++ if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags)) ++ opts->x_flag_pic = 2; ++ ++ /* Need to check -mtune=generic first. */ ++ if (opts->x_ix86_tune_string) ++ { ++ /* As special support for cross compilers we read -mtune=native ++ as -mtune=generic. With native compilers we won't see the ++ -mtune=native, as it was changed by the driver. */ ++ if (!strcmp (opts->x_ix86_tune_string, "native")) ++ { ++ opts->x_ix86_tune_string = "generic"; ++ } ++ else if (!strcmp (opts->x_ix86_tune_string, "x86-64")) ++ warning (OPT_Wdeprecated, ++ main_args_p ++ ? G_("%<-mtune=x86-64%> is deprecated; use %<-mtune=k8%> " ++ "or %<-mtune=generic%> instead as appropriate") ++ : G_("% is deprecated; use " ++ "% or %" ++ " instead as appropriate")); ++ } ++ else ++ { ++ if (opts->x_ix86_arch_string) ++ opts->x_ix86_tune_string = opts->x_ix86_arch_string; ++ if (!opts->x_ix86_tune_string) ++ { ++ opts->x_ix86_tune_string = processor_names[TARGET_CPU_DEFAULT]; ++ ix86_tune_defaulted = 1; ++ } ++ ++ /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string ++ or defaulted. We need to use a sensible tune option. */ ++ if (!strcmp (opts->x_ix86_tune_string, "x86-64")) ++ { ++ opts->x_ix86_tune_string = "generic"; ++ } ++ } ++ ++ if (opts->x_ix86_stringop_alg == rep_prefix_8_byte ++ && !TARGET_64BIT_P (opts->x_ix86_isa_flags)) ++ { ++ /* rep; movq isn't available in 32-bit code. */ ++ error ("%<-mstringop-strategy=rep_8byte%> not supported for 32-bit code"); ++ opts->x_ix86_stringop_alg = no_stringop; ++ } ++ ++ if (!opts->x_ix86_arch_string) ++ opts->x_ix86_arch_string ++ = TARGET_64BIT_P (opts->x_ix86_isa_flags) ++ ? "x86-64" : SUBTARGET32_DEFAULT_CPU; ++ else ++ ix86_arch_specified = 1; ++ ++ if (opts_set->x_ix86_pmode) ++ { ++ if ((TARGET_LP64_P (opts->x_ix86_isa_flags) ++ && opts->x_ix86_pmode == PMODE_SI) ++ || (!TARGET_64BIT_P (opts->x_ix86_isa_flags) ++ && opts->x_ix86_pmode == PMODE_DI)) ++ error ("address mode %qs not supported in the %s bit mode", ++ TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long", ++ TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32"); ++ } ++ else ++ opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags) ++ ? PMODE_DI : PMODE_SI; ++ ++ if (!opts_set->x_ix86_abi) ++ opts->x_ix86_abi = DEFAULT_ABI; ++ ++ if (opts->x_ix86_abi == MS_ABI && TARGET_X32_P (opts->x_ix86_isa_flags)) ++ error ("%<-mabi=ms%> not supported with X32 ABI"); ++ gcc_assert (opts->x_ix86_abi == SYSV_ABI || opts->x_ix86_abi == MS_ABI); ++ ++ const char *abi_name = opts->x_ix86_abi == MS_ABI ? "ms" : "sysv"; ++ if ((opts->x_flag_sanitize & SANITIZE_USER_ADDRESS) ++ && opts->x_ix86_abi != DEFAULT_ABI) ++ error ("%<-mabi=%s%> not supported with %<-fsanitize=address%>", abi_name); ++ if ((opts->x_flag_sanitize & SANITIZE_KERNEL_ADDRESS) ++ && opts->x_ix86_abi != DEFAULT_ABI) ++ error ("%<-mabi=%s%> not supported with %<-fsanitize=kernel-address%>", ++ abi_name); ++ if ((opts->x_flag_sanitize & SANITIZE_THREAD) ++ && opts->x_ix86_abi != DEFAULT_ABI) ++ error ("%<-mabi=%s%> not supported with %<-fsanitize=thread%>", abi_name); ++ ++ /* For targets using ms ABI enable ms-extensions, if not ++ explicit turned off. For non-ms ABI we turn off this ++ option. */ ++ if (!opts_set->x_flag_ms_extensions) ++ opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI); ++ ++ if (opts_set->x_ix86_cmodel) ++ { ++ switch (opts->x_ix86_cmodel) ++ { ++ case CM_SMALL: ++ case CM_SMALL_PIC: ++ if (opts->x_flag_pic) ++ opts->x_ix86_cmodel = CM_SMALL_PIC; ++ if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)) ++ error ("code model %qs not supported in the %s bit mode", ++ "small", "32"); ++ break; ++ ++ case CM_MEDIUM: ++ case CM_MEDIUM_PIC: ++ if (opts->x_flag_pic) ++ opts->x_ix86_cmodel = CM_MEDIUM_PIC; ++ if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)) ++ error ("code model %qs not supported in the %s bit mode", ++ "medium", "32"); ++ else if (TARGET_X32_P (opts->x_ix86_isa_flags)) ++ error ("code model %qs not supported in x32 mode", ++ "medium"); ++ break; ++ ++ case CM_LARGE: ++ case CM_LARGE_PIC: ++ if (opts->x_flag_pic) ++ opts->x_ix86_cmodel = CM_LARGE_PIC; ++ if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)) ++ error ("code model %qs not supported in the %s bit mode", ++ "large", "32"); ++ else if (TARGET_X32_P (opts->x_ix86_isa_flags)) ++ error ("code model %qs not supported in x32 mode", ++ "large"); ++ break; ++ ++ case CM_32: ++ if (opts->x_flag_pic) ++ error ("code model %s does not support PIC mode", "32"); ++ if (TARGET_64BIT_P (opts->x_ix86_isa_flags)) ++ error ("code model %qs not supported in the %s bit mode", ++ "32", "64"); ++ break; ++ ++ case CM_KERNEL: ++ if (opts->x_flag_pic) ++ { ++ error ("code model %s does not support PIC mode", "kernel"); ++ opts->x_ix86_cmodel = CM_32; ++ } ++ if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)) ++ error ("code model %qs not supported in the %s bit mode", ++ "kernel", "32"); ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ } ++ else ++ { ++ /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the ++ use of rip-relative addressing. This eliminates fixups that ++ would otherwise be needed if this object is to be placed in a ++ DLL, and is essentially just as efficient as direct addressing. */ ++ if (TARGET_64BIT_P (opts->x_ix86_isa_flags) ++ && (TARGET_RDOS || TARGET_PECOFF)) ++ opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1; ++ else if (TARGET_64BIT_P (opts->x_ix86_isa_flags)) ++ opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL; ++ else ++ opts->x_ix86_cmodel = CM_32; ++ } ++ if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL) ++ { ++ error ("%<-masm=intel%> not supported in this configuration"); ++ opts->x_ix86_asm_dialect = ASM_ATT; ++ } ++ if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0) ++ != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0)) ++ sorry ("%i-bit mode not compiled in", ++ (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32); ++ ++ for (i = 0; i < pta_size; i++) ++ if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name)) ++ { ++ if (!strcmp (opts->x_ix86_arch_string, "generic")) ++ { ++ error (main_args_p ++ ? G_("% CPU can be used only for %<-mtune=%> " ++ "switch") ++ : G_("% CPU can be used only for " ++ "% attribute")); ++ return false; ++ } ++ else if (!strcmp (opts->x_ix86_arch_string, "intel")) ++ { ++ error (main_args_p ++ ? G_("% CPU can be used only for %<-mtune=%> " ++ "switch") ++ : G_("% CPU can be used only for " ++ "% attribute")); ++ return false; ++ } ++ ++ if (TARGET_64BIT_P (opts->x_ix86_isa_flags) ++ && !((processor_alias_table[i].flags & PTA_64BIT) != 0)) ++ { ++ error ("CPU you selected does not support x86-64 " ++ "instruction set"); ++ return false; ++ } ++ ++ ix86_schedule = processor_alias_table[i].schedule; ++ ix86_arch = processor_alias_table[i].processor; ++ /* Default cpu tuning to the architecture. */ ++ ix86_tune = ix86_arch; ++ ++ if (((processor_alias_table[i].flags & PTA_MMX) != 0) ++ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX; ++ if (((processor_alias_table[i].flags & PTA_3DNOW) != 0) ++ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW; ++ if (((processor_alias_table[i].flags & PTA_3DNOW_A) != 0) ++ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A; ++ if (((processor_alias_table[i].flags & PTA_SSE) != 0) ++ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE; ++ if (((processor_alias_table[i].flags & PTA_SSE2) != 0) ++ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2; ++ if (((processor_alias_table[i].flags & PTA_SSE3) != 0) ++ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3; ++ if (((processor_alias_table[i].flags & PTA_SSSE3) != 0) ++ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3; ++ if (((processor_alias_table[i].flags & PTA_SSE4_1) != 0) ++ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1; ++ if (((processor_alias_table[i].flags & PTA_SSE4_2) != 0) ++ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2; ++ if (((processor_alias_table[i].flags & PTA_AVX) != 0) ++ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX; ++ if (((processor_alias_table[i].flags & PTA_AVX2) != 0) ++ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2; ++ if (((processor_alias_table[i].flags & PTA_FMA) != 0) ++ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA; ++ if (((processor_alias_table[i].flags & PTA_SSE4A) != 0) ++ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A; ++ if (((processor_alias_table[i].flags & PTA_FMA4) != 0) ++ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4; ++ if (((processor_alias_table[i].flags & PTA_XOP) != 0) ++ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP; ++ if (((processor_alias_table[i].flags & PTA_LWP) != 0) ++ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP; ++ if (((processor_alias_table[i].flags & PTA_ABM) != 0) ++ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM; ++ if (((processor_alias_table[i].flags & PTA_BMI) != 0) ++ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI; ++ if (((processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)) != 0) ++ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT; ++ if (((processor_alias_table[i].flags & PTA_TBM) != 0) ++ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM; ++ if (((processor_alias_table[i].flags & PTA_BMI2) != 0) ++ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2; ++ if (((processor_alias_table[i].flags & PTA_CX16) != 0) ++ && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_CX16)) ++ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_CX16; ++ if (((processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)) != 0) ++ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT; ++ if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags) ++ && ((processor_alias_table[i].flags & PTA_NO_SAHF) != 0)) ++ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF; ++ if (((processor_alias_table[i].flags & PTA_MOVBE) != 0) ++ && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_MOVBE)) ++ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_MOVBE; ++ if (((processor_alias_table[i].flags & PTA_AES) != 0) ++ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES)) ++ ix86_isa_flags |= OPTION_MASK_ISA_AES; ++ if (((processor_alias_table[i].flags & PTA_SHA) != 0) ++ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA)) ++ ix86_isa_flags |= OPTION_MASK_ISA_SHA; ++ if (((processor_alias_table[i].flags & PTA_PCLMUL) != 0) ++ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL; ++ if (((processor_alias_table[i].flags & PTA_FSGSBASE) != 0) ++ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE; ++ if (((processor_alias_table[i].flags & PTA_RDRND) != 0) ++ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND; ++ if (((processor_alias_table[i].flags & PTA_F16C) != 0) ++ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C; ++ if (((processor_alias_table[i].flags & PTA_RTM) != 0) ++ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM; ++ if (((processor_alias_table[i].flags & PTA_HLE) != 0) ++ && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_HLE)) ++ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_HLE; ++ if (((processor_alias_table[i].flags & PTA_PRFCHW) != 0) ++ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW; ++ if (((processor_alias_table[i].flags & PTA_RDSEED) != 0) ++ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED; ++ if (((processor_alias_table[i].flags & PTA_ADX) != 0) ++ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX; ++ if (((processor_alias_table[i].flags & PTA_FXSR) != 0) ++ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR; ++ if (((processor_alias_table[i].flags & PTA_XSAVE) != 0) ++ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE; ++ if (((processor_alias_table[i].flags & PTA_XSAVEOPT) != 0) ++ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT; ++ if (((processor_alias_table[i].flags & PTA_AVX512F) != 0) ++ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F; ++ if (((processor_alias_table[i].flags & PTA_AVX512ER) != 0) ++ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER; ++ if (((processor_alias_table[i].flags & PTA_AVX512PF) != 0) ++ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF; ++ if (((processor_alias_table[i].flags & PTA_AVX512CD) != 0) ++ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD; ++ if (((processor_alias_table[i].flags & PTA_PREFETCHWT1) != 0) ++ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1; ++ if (((processor_alias_table[i].flags & PTA_CLWB) != 0) ++ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLWB)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLWB; ++ if (((processor_alias_table[i].flags & PTA_CLFLUSHOPT) != 0) ++ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT; ++ if (((processor_alias_table[i].flags & PTA_CLZERO) != 0) ++ && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_CLZERO)) ++ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_CLZERO; ++ if (((processor_alias_table[i].flags & PTA_XSAVEC) != 0) ++ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC; ++ if (((processor_alias_table[i].flags & PTA_XSAVES) != 0) ++ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES; ++ if (((processor_alias_table[i].flags & PTA_AVX512DQ) != 0) ++ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ; ++ if (((processor_alias_table[i].flags & PTA_AVX512BW) != 0) ++ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW; ++ if (((processor_alias_table[i].flags & PTA_AVX512VL) != 0) ++ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL; ++ if (((processor_alias_table[i].flags & PTA_AVX512VBMI) != 0) ++ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VBMI)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI; ++ if (((processor_alias_table[i].flags & PTA_AVX512IFMA) != 0) ++ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512IFMA)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA; ++ if (((processor_alias_table[i].flags & PTA_AVX512VNNI) != 0) ++ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VNNI)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VNNI; ++ if (((processor_alias_table[i].flags & PTA_GFNI) != 0) ++ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_GFNI)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_GFNI; ++ if (((processor_alias_table[i].flags & PTA_AVX512VBMI2) != 0) ++ && !(opts->x_ix86_isa_flags_explicit ++ & OPTION_MASK_ISA_AVX512VBMI2)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI2; ++ if (((processor_alias_table[i].flags & PTA_VPCLMULQDQ) != 0) ++ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_VPCLMULQDQ)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_VPCLMULQDQ; ++ if (((processor_alias_table[i].flags & PTA_AVX512BITALG) != 0) ++ && !(opts->x_ix86_isa_flags_explicit ++ & OPTION_MASK_ISA_AVX512BITALG)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BITALG; ++ ++ if (((processor_alias_table[i].flags & PTA_AVX5124VNNIW) != 0) ++ && !(opts->x_ix86_isa_flags2_explicit ++ & OPTION_MASK_ISA_AVX5124VNNIW)) ++ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_AVX5124VNNIW; ++ if (((processor_alias_table[i].flags & PTA_AVX5124FMAPS) != 0) ++ && !(opts->x_ix86_isa_flags2_explicit ++ & OPTION_MASK_ISA_AVX5124FMAPS)) ++ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_AVX5124FMAPS; ++ if (((processor_alias_table[i].flags & PTA_AVX512VPOPCNTDQ) != 0) ++ && !(opts->x_ix86_isa_flags_explicit ++ & OPTION_MASK_ISA_AVX512VPOPCNTDQ)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VPOPCNTDQ; ++ if (((processor_alias_table[i].flags & PTA_SGX) != 0) ++ && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_SGX)) ++ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_SGX; ++ if (((processor_alias_table[i].flags & PTA_VAES) != 0) ++ && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_VAES)) ++ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_VAES; ++ if (((processor_alias_table[i].flags & PTA_RDPID) != 0) ++ && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_RDPID)) ++ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_RDPID; ++ if (((processor_alias_table[i].flags & PTA_PCONFIG) != 0) ++ && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_PCONFIG)) ++ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_PCONFIG; ++ if (((processor_alias_table[i].flags & PTA_WBNOINVD) != 0) ++ && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_WBNOINVD)) ++ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_WBNOINVD; ++ if (((processor_alias_table[i].flags & PTA_PTWRITE) != 0) ++ && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_PTWRITE)) ++ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_PTWRITE; ++ ++ if ((processor_alias_table[i].flags ++ & (PTA_PREFETCH_SSE | PTA_SSE)) != 0) ++ x86_prefetch_sse = true; ++ if (((processor_alias_table[i].flags & PTA_MWAITX) != 0) ++ && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_MWAITX)) ++ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_MWAITX; ++ if (((processor_alias_table[i].flags & PTA_PKU) != 0) ++ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PKU)) ++ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PKU; ++ ++ /* Don't enable x87 instructions if only ++ general registers are allowed. */ ++ if (!(opts_set->x_ix86_target_flags & OPTION_MASK_GENERAL_REGS_ONLY) ++ && !(opts_set->x_target_flags & MASK_80387)) ++ { ++ if (((processor_alias_table[i].flags & PTA_NO_80387) != 0)) ++ opts->x_target_flags &= ~MASK_80387; ++ else ++ opts->x_target_flags |= MASK_80387; ++ } ++ break; ++ } ++ ++ if (i == pta_size) ++ { ++ error (main_args_p ++ ? G_("bad value (%qs) for %<-march=%> switch") ++ : G_("bad value (%qs) for % attribute"), ++ opts->x_ix86_arch_string); ++ ++ auto_vec candidates; ++ for (i = 0; i < pta_size; i++) ++ if (strcmp (processor_alias_table[i].name, "generic") ++ && strcmp (processor_alias_table[i].name, "intel") ++ && (!TARGET_64BIT_P (opts->x_ix86_isa_flags) ++ || ((processor_alias_table[i].flags & PTA_64BIT) != 0))) ++ candidates.safe_push (processor_alias_table[i].name); ++ ++#ifdef HAVE_LOCAL_CPU_DETECT ++ /* Add also "native" as possible value. */ ++ candidates.safe_push ("native"); ++#endif ++ ++ char *s; ++ const char *hint ++ = candidates_list_and_hint (opts->x_ix86_arch_string, s, candidates); ++ if (hint) ++ inform (input_location, ++ main_args_p ++ ? G_("valid arguments to %<-march=%> switch are: " ++ "%s; did you mean %qs?") ++ : G_("valid arguments to % attribute are: " ++ "%s; did you mean %qs?"), s, hint); ++ else ++ inform (input_location, ++ main_args_p ++ ? G_("valid arguments to %<-march=%> switch are: %s") ++ : G_("valid arguments to % attribute " ++ "are: %s"), s); ++ XDELETEVEC (s); ++ } ++ ++ ix86_arch_mask = HOST_WIDE_INT_1U << ix86_arch; ++ for (i = 0; i < X86_ARCH_LAST; ++i) ++ ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask); ++ ++ for (i = 0; i < pta_size; i++) ++ if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name)) ++ { ++ ix86_schedule = processor_alias_table[i].schedule; ++ ix86_tune = processor_alias_table[i].processor; ++ if (TARGET_64BIT_P (opts->x_ix86_isa_flags)) ++ { ++ if (!((processor_alias_table[i].flags & PTA_64BIT) != 0)) ++ { ++ if (ix86_tune_defaulted) ++ { ++ opts->x_ix86_tune_string = "x86-64"; ++ for (i = 0; i < pta_size; i++) ++ if (! strcmp (opts->x_ix86_tune_string, ++ processor_alias_table[i].name)) ++ break; ++ ix86_schedule = processor_alias_table[i].schedule; ++ ix86_tune = processor_alias_table[i].processor; ++ } ++ else ++ error ("CPU you selected does not support x86-64 " ++ "instruction set"); ++ } ++ } ++ /* Intel CPUs have always interpreted SSE prefetch instructions as ++ NOPs; so, we can enable SSE prefetch instructions even when ++ -mtune (rather than -march) points us to a processor that has them. ++ However, the VIA C3 gives a SIGILL, so we only do that for i686 and ++ higher processors. */ ++ if (TARGET_CMOV ++ && ((processor_alias_table[i].flags ++ & (PTA_PREFETCH_SSE | PTA_SSE)) != 0)) ++ x86_prefetch_sse = true; ++ break; ++ } ++ ++ if (ix86_tune_specified && i == pta_size) ++ { ++ error (main_args_p ++ ? G_("bad value (%qs) for %<-mtune=%> switch") ++ : G_("bad value (%qs) for % attribute"), ++ opts->x_ix86_tune_string); ++ ++ auto_vec candidates; ++ for (i = 0; i < pta_size; i++) ++ if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) ++ || ((processor_alias_table[i].flags & PTA_64BIT) != 0)) ++ candidates.safe_push (processor_alias_table[i].name); ++ ++#ifdef HAVE_LOCAL_CPU_DETECT ++ /* Add also "native" as possible value. */ ++ candidates.safe_push ("native"); ++#endif ++ ++ char *s; ++ const char *hint ++ = candidates_list_and_hint (opts->x_ix86_tune_string, s, candidates); ++ if (hint) ++ inform (input_location, ++ main_args_p ++ ? G_("valid arguments to %<-mtune=%> switch are: " ++ "%s; did you mean %qs?") ++ : G_("valid arguments to % attribute are: " ++ "%s; did you mean %qs?"), s, hint); ++ else ++ inform (input_location, ++ main_args_p ++ ? G_("valid arguments to %<-mtune=%> switch are: %s") ++ : G_("valid arguments to % attribute " ++ "are: %s"), s); ++ XDELETEVEC (s); ++ } ++ ++ set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes); ++ ++#ifndef USE_IX86_FRAME_POINTER ++#define USE_IX86_FRAME_POINTER 0 ++#endif ++ ++#ifndef USE_X86_64_FRAME_POINTER ++#define USE_X86_64_FRAME_POINTER 0 ++#endif ++ ++ /* Set the default values for switches whose default depends on TARGET_64BIT ++ in case they weren't overwritten by command line options. */ ++ if (TARGET_64BIT_P (opts->x_ix86_isa_flags)) ++ { ++ if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer) ++ opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER; ++ if (opts->x_flag_asynchronous_unwind_tables ++ && !opts_set->x_flag_unwind_tables ++ && TARGET_64BIT_MS_ABI) ++ opts->x_flag_unwind_tables = 1; ++ if (opts->x_flag_asynchronous_unwind_tables == 2) ++ opts->x_flag_unwind_tables ++ = opts->x_flag_asynchronous_unwind_tables = 1; ++ if (opts->x_flag_pcc_struct_return == 2) ++ opts->x_flag_pcc_struct_return = 0; ++ } ++ else ++ { ++ if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer) ++ opts->x_flag_omit_frame_pointer ++ = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size); ++ if (opts->x_flag_asynchronous_unwind_tables == 2) ++ opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER; ++ if (opts->x_flag_pcc_struct_return == 2) ++ { ++ /* Intel MCU psABI specifies that -freg-struct-return should ++ be on. Instead of setting DEFAULT_PCC_STRUCT_RETURN to 1, ++ we check -miamcu so that -freg-struct-return is always ++ turned on if -miamcu is used. */ ++ if (TARGET_IAMCU_P (opts->x_target_flags)) ++ opts->x_flag_pcc_struct_return = 0; ++ else ++ opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN; ++ } ++ } ++ ++ ix86_tune_cost = processor_cost_table[ix86_tune]; ++ /* TODO: ix86_cost should be chosen at instruction or function granuality ++ so for cold code we use size_cost even in !optimize_size compilation. */ ++ if (opts->x_optimize_size) ++ ix86_cost = &ix86_size_cost; ++ else ++ ix86_cost = ix86_tune_cost; ++ ++ /* Arrange to set up i386_stack_locals for all functions. */ ++ init_machine_status = ix86_init_machine_status; ++ ++ /* Validate -mregparm= value. */ ++ if (opts_set->x_ix86_regparm) ++ { ++ if (TARGET_64BIT_P (opts->x_ix86_isa_flags)) ++ warning (0, "%<-mregparm%> is ignored in 64-bit mode"); ++ else if (TARGET_IAMCU_P (opts->x_target_flags)) ++ warning (0, "%<-mregparm%> is ignored for Intel MCU psABI"); ++ if (opts->x_ix86_regparm > REGPARM_MAX) ++ { ++ error ("%<-mregparm=%d%> is not between 0 and %d", ++ opts->x_ix86_regparm, REGPARM_MAX); ++ opts->x_ix86_regparm = 0; ++ } ++ } ++ if (TARGET_IAMCU_P (opts->x_target_flags) ++ || TARGET_64BIT_P (opts->x_ix86_isa_flags)) ++ opts->x_ix86_regparm = REGPARM_MAX; ++ ++ /* Default align_* from the processor table. */ ++ ix86_default_align (opts); ++ ++ /* Provide default for -mbranch-cost= value. */ ++ if (!opts_set->x_ix86_branch_cost) ++ opts->x_ix86_branch_cost = ix86_tune_cost->branch_cost; ++ ++ if (TARGET_64BIT_P (opts->x_ix86_isa_flags)) ++ { ++ opts->x_target_flags ++ |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags; ++ ++ if (!ix86_arch_specified) ++ opts->x_ix86_isa_flags ++ |= TARGET_SUBTARGET64_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit; ++ ++ if (TARGET_RTD_P (opts->x_target_flags)) ++ warning (0, ++ main_args_p ++ ? G_("%<-mrtd%> is ignored in 64bit mode") ++ : G_("% is ignored in 64bit mode")); ++ } ++ else ++ { ++ opts->x_target_flags ++ |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags; ++ ++ if (!ix86_arch_specified) ++ opts->x_ix86_isa_flags ++ |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit; ++ ++ /* i386 ABI does not specify red zone. It still makes sense to use it ++ when programmer takes care to stack from being destroyed. */ ++ if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE)) ++ opts->x_target_flags |= MASK_NO_RED_ZONE; ++ } ++ ++ /* Keep nonleaf frame pointers. */ ++ if (opts->x_flag_omit_frame_pointer) ++ opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER; ++ else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags)) ++ opts->x_flag_omit_frame_pointer = 1; ++ ++ /* If we're doing fast math, we don't care about comparison order ++ wrt NaNs. This lets us use a shorter comparison sequence. */ ++ if (opts->x_flag_finite_math_only) ++ opts->x_target_flags &= ~MASK_IEEE_FP; ++ ++ /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387, ++ since the insns won't need emulation. */ ++ if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387]) ++ opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387; ++ ++ /* Likewise, if the target doesn't have a 387, or we've specified ++ software floating point, don't use 387 inline intrinsics. */ ++ if (!TARGET_80387_P (opts->x_target_flags)) ++ opts->x_target_flags |= MASK_NO_FANCY_MATH_387; ++ ++ /* Turn on MMX builtins for -msse. */ ++ if (TARGET_SSE_P (opts->x_ix86_isa_flags)) ++ opts->x_ix86_isa_flags ++ |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit; ++ ++ /* Enable SSE prefetch. */ ++ if (TARGET_SSE_P (opts->x_ix86_isa_flags) ++ || (TARGET_PRFCHW_P (opts->x_ix86_isa_flags) ++ && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)) ++ || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags)) ++ x86_prefetch_sse = true; ++ ++ /* Enable popcnt instruction for -msse4.2 or -mabm. */ ++ if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags) ++ || TARGET_ABM_P (opts->x_ix86_isa_flags)) ++ opts->x_ix86_isa_flags ++ |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit; ++ ++ /* Enable lzcnt instruction for -mabm. */ ++ if (TARGET_ABM_P(opts->x_ix86_isa_flags)) ++ opts->x_ix86_isa_flags ++ |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit; ++ ++ /* Disable BMI, BMI2 and TBM instructions for -m16. */ ++ if (TARGET_16BIT_P(opts->x_ix86_isa_flags)) ++ opts->x_ix86_isa_flags ++ &= ~((OPTION_MASK_ISA_BMI | OPTION_MASK_ISA_BMI2 | OPTION_MASK_ISA_TBM) ++ & ~opts->x_ix86_isa_flags_explicit); ++ ++ /* Validate -mpreferred-stack-boundary= value or default it to ++ PREFERRED_STACK_BOUNDARY_DEFAULT. */ ++ ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT; ++ if (opts_set->x_ix86_preferred_stack_boundary_arg) ++ { ++ int min = TARGET_64BIT_P (opts->x_ix86_isa_flags)? 3 : 2; ++ int max = TARGET_SEH ? 4 : 12; ++ ++ if (opts->x_ix86_preferred_stack_boundary_arg < min ++ || opts->x_ix86_preferred_stack_boundary_arg > max) ++ { ++ if (min == max) ++ error ("%<-mpreferred-stack-boundary%> is not supported " ++ "for this target"); ++ else ++ error ("%<-mpreferred-stack-boundary=%d%> is not between %d and %d", ++ opts->x_ix86_preferred_stack_boundary_arg, min, max); ++ } ++ else ++ ix86_preferred_stack_boundary ++ = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT; ++ } ++ ++ /* Set the default value for -mstackrealign. */ ++ if (!opts_set->x_ix86_force_align_arg_pointer) ++ opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT; ++ ++ ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY; ++ ++ /* Validate -mincoming-stack-boundary= value or default it to ++ MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */ ++ ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary; ++ if (opts_set->x_ix86_incoming_stack_boundary_arg) ++ { ++ int min = TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 3 : 2; ++ ++ if (opts->x_ix86_incoming_stack_boundary_arg < min ++ || opts->x_ix86_incoming_stack_boundary_arg > 12) ++ error ("%<-mincoming-stack-boundary=%d%> is not between %d and 12", ++ opts->x_ix86_incoming_stack_boundary_arg, min); ++ else ++ { ++ ix86_user_incoming_stack_boundary ++ = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT; ++ ix86_incoming_stack_boundary ++ = ix86_user_incoming_stack_boundary; ++ } ++ } ++ ++#ifndef NO_PROFILE_COUNTERS ++ if (flag_nop_mcount) ++ error ("%<-mnop-mcount%> is not compatible with this target"); ++#endif ++ if (flag_nop_mcount && flag_pic) ++ error ("%<-mnop-mcount%> is not implemented for %<-fPIC%>"); ++ ++ /* Accept -msseregparm only if at least SSE support is enabled. */ ++ if (TARGET_SSEREGPARM_P (opts->x_target_flags) ++ && ! TARGET_SSE_P (opts->x_ix86_isa_flags)) ++ error (main_args_p ++ ? G_("%<-msseregparm%> used without SSE enabled") ++ : G_("% used without SSE enabled")); ++ ++ if (opts_set->x_ix86_fpmath) ++ { ++ if (opts->x_ix86_fpmath & FPMATH_SSE) ++ { ++ if (!TARGET_SSE_P (opts->x_ix86_isa_flags)) ++ { ++ if (TARGET_80387_P (opts->x_target_flags)) ++ { ++ warning (0, "SSE instruction set disabled, using 387 arithmetics"); ++ opts->x_ix86_fpmath = FPMATH_387; ++ } ++ } ++ else if ((opts->x_ix86_fpmath & FPMATH_387) ++ && !TARGET_80387_P (opts->x_target_flags)) ++ { ++ warning (0, "387 instruction set disabled, using SSE arithmetics"); ++ opts->x_ix86_fpmath = FPMATH_SSE; ++ } ++ } ++ } ++ /* For all chips supporting SSE2, -mfpmath=sse performs better than ++ fpmath=387. The second is however default at many targets since the ++ extra 80bit precision of temporaries is considered to be part of ABI. ++ Overwrite the default at least for -ffast-math. ++ TODO: -mfpmath=both seems to produce same performing code with bit ++ smaller binaries. It is however not clear if register allocation is ++ ready for this setting. ++ Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE ++ codegen. We may switch to 387 with -ffast-math for size optimized ++ functions. */ ++ else if (fast_math_flags_set_p (&global_options) ++ && TARGET_SSE2_P (opts->x_ix86_isa_flags)) ++ opts->x_ix86_fpmath = FPMATH_SSE; ++ else ++ opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags); ++ ++ /* Use external vectorized library in vectorizing intrinsics. */ ++ if (opts_set->x_ix86_veclibabi_type) ++ switch (opts->x_ix86_veclibabi_type) ++ { ++ case ix86_veclibabi_type_svml: ++ ix86_veclib_handler = &ix86_veclibabi_svml; ++ break; ++ ++ case ix86_veclibabi_type_acml: ++ ix86_veclib_handler = &ix86_veclibabi_acml; ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS] ++ && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)) ++ opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; ++ ++ /* If stack probes are required, the space used for large function ++ arguments on the stack must also be probed, so enable ++ -maccumulate-outgoing-args so this happens in the prologue. */ ++ if (TARGET_STACK_PROBE_P (opts->x_target_flags) ++ && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)) ++ { ++ if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS) ++ warning (0, ++ main_args_p ++ ? G_("stack probing requires %<-maccumulate-outgoing-args%> " ++ "for correctness") ++ : G_("stack probing requires " ++ "% for " ++ "correctness")); ++ opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; ++ } ++ ++ /* Stack realignment without -maccumulate-outgoing-args requires %ebp, ++ so enable -maccumulate-outgoing-args when %ebp is fixed. */ ++ if (fixed_regs[BP_REG] ++ && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)) ++ { ++ if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS) ++ warning (0, ++ main_args_p ++ ? G_("fixed ebp register requires " ++ "%<-maccumulate-outgoing-args%>") ++ : G_("fixed ebp register requires " ++ "%")); ++ opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; ++ } ++ ++ /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */ ++ { ++ char *p; ++ ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0); ++ p = strchr (internal_label_prefix, 'X'); ++ internal_label_prefix_len = p - internal_label_prefix; ++ *p = '\0'; ++ } ++ ++ /* When scheduling description is not available, disable scheduler pass ++ so it won't slow down the compilation and make x87 code slower. */ ++ if (!TARGET_SCHEDULE) ++ opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0; ++ ++ maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, ++ ix86_tune_cost->simultaneous_prefetches, ++ opts->x_param_values, ++ opts_set->x_param_values); ++ maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, ++ ix86_tune_cost->prefetch_block, ++ opts->x_param_values, ++ opts_set->x_param_values); ++ maybe_set_param_value (PARAM_L1_CACHE_SIZE, ++ ix86_tune_cost->l1_cache_size, ++ opts->x_param_values, ++ opts_set->x_param_values); ++ maybe_set_param_value (PARAM_L2_CACHE_SIZE, ++ ix86_tune_cost->l2_cache_size, ++ opts->x_param_values, ++ opts_set->x_param_values); ++ ++ /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */ ++ if (opts->x_flag_prefetch_loop_arrays < 0 ++ && HAVE_prefetch ++ && (opts->x_optimize >= 3 || opts->x_flag_profile_use) ++ && !opts->x_optimize_size ++ && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL) ++ opts->x_flag_prefetch_loop_arrays = 1; ++ ++ /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0) ++ can be opts->x_optimized to ap = __builtin_next_arg (0). */ ++ if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack) ++ targetm.expand_builtin_va_start = NULL; ++ ++ if (TARGET_64BIT_P (opts->x_ix86_isa_flags)) ++ { ++ ix86_gen_leave = gen_leave_rex64; ++ if (Pmode == DImode) ++ { ++ ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di; ++ ix86_gen_tls_local_dynamic_base_64 ++ = gen_tls_local_dynamic_base_64_di; ++ } ++ else ++ { ++ ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si; ++ ix86_gen_tls_local_dynamic_base_64 ++ = gen_tls_local_dynamic_base_64_si; ++ } ++ } ++ else ++ ix86_gen_leave = gen_leave; ++ ++ if (Pmode == DImode) ++ { ++ ix86_gen_add3 = gen_adddi3; ++ ix86_gen_sub3 = gen_subdi3; ++ ix86_gen_sub3_carry = gen_subdi3_carry; ++ ix86_gen_one_cmpl2 = gen_one_cmpldi2; ++ ix86_gen_andsp = gen_anddi3; ++ ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di; ++ ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi; ++ ix86_gen_probe_stack_range = gen_probe_stack_rangedi; ++ ix86_gen_monitor = gen_sse3_monitor_di; ++ ix86_gen_monitorx = gen_monitorx_di; ++ ix86_gen_clzero = gen_clzero_di; ++ } ++ else ++ { ++ ix86_gen_add3 = gen_addsi3; ++ ix86_gen_sub3 = gen_subsi3; ++ ix86_gen_sub3_carry = gen_subsi3_carry; ++ ix86_gen_one_cmpl2 = gen_one_cmplsi2; ++ ix86_gen_andsp = gen_andsi3; ++ ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si; ++ ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi; ++ ix86_gen_probe_stack_range = gen_probe_stack_rangesi; ++ ix86_gen_monitor = gen_sse3_monitor_si; ++ ix86_gen_monitorx = gen_monitorx_si; ++ ix86_gen_clzero = gen_clzero_si; ++ } ++ ++#ifdef USE_IX86_CLD ++ /* Use -mcld by default for 32-bit code if configured with --enable-cld. */ ++ if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)) ++ opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags; ++#endif ++ ++ /* Set the default value for -mfentry. */ ++ if (!opts_set->x_flag_fentry) ++ opts->x_flag_fentry = TARGET_SEH; ++ else ++ { ++ if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic ++ && opts->x_flag_fentry) ++ sorry ("%<-mfentry%> isn%'t supported for 32-bit in combination " ++ "with %<-fpic%>"); ++ else if (TARGET_SEH && !opts->x_flag_fentry) ++ sorry ("%<-mno-fentry%> isn%'t compatible with SEH"); ++ } ++ ++ if (TARGET_SEH && TARGET_CALL_MS2SYSV_XLOGUES) ++ sorry ("%<-mcall-ms2sysv-xlogues%> isn%'t currently supported with SEH"); ++ ++ if (!(opts_set->x_target_flags & MASK_VZEROUPPER) ++ && TARGET_EMIT_VZEROUPPER) ++ opts->x_target_flags |= MASK_VZEROUPPER; ++ if (!(opts_set->x_target_flags & MASK_STV)) ++ opts->x_target_flags |= MASK_STV; ++ /* Disable STV if -mpreferred-stack-boundary={2,3} or ++ -mincoming-stack-boundary={2,3} or -mstackrealign - the needed ++ stack realignment will be extra cost the pass doesn't take into ++ account and the pass can't realign the stack. */ ++ if (ix86_preferred_stack_boundary < 128 ++ || ix86_incoming_stack_boundary < 128 ++ || opts->x_ix86_force_align_arg_pointer) ++ opts->x_target_flags &= ~MASK_STV; ++ if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL] ++ && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD)) ++ opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD; ++ if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL] ++ && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE)) ++ opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE; ++ ++ /* Enable 128-bit AVX instruction generation ++ for the auto-vectorizer. */ ++ if (TARGET_AVX128_OPTIMAL ++ && (opts_set->x_prefer_vector_width_type == PVW_NONE)) ++ opts->x_prefer_vector_width_type = PVW_AVX128; ++ ++ /* Use 256-bit AVX instruction generation ++ in the auto-vectorizer. */ ++ if (ix86_tune_features[X86_TUNE_AVX256_OPTIMAL] ++ && (opts_set->x_prefer_vector_width_type == PVW_NONE)) ++ opts->x_prefer_vector_width_type = PVW_AVX256; ++ ++ if (opts->x_ix86_recip_name) ++ { ++ char *p = ASTRDUP (opts->x_ix86_recip_name); ++ char *q; ++ unsigned int mask, i; ++ bool invert; ++ ++ while ((q = strtok (p, ",")) != NULL) ++ { ++ p = NULL; ++ if (*q == '!') ++ { ++ invert = true; ++ q++; ++ } ++ else ++ invert = false; ++ ++ if (!strcmp (q, "default")) ++ mask = RECIP_MASK_ALL; ++ else ++ { ++ for (i = 0; i < ARRAY_SIZE (recip_options); i++) ++ if (!strcmp (q, recip_options[i].string)) ++ { ++ mask = recip_options[i].mask; ++ break; ++ } ++ ++ if (i == ARRAY_SIZE (recip_options)) ++ { ++ error ("unknown option for %<-mrecip=%s%>", q); ++ invert = false; ++ mask = RECIP_MASK_NONE; ++ } ++ } ++ ++ opts->x_recip_mask_explicit |= mask; ++ if (invert) ++ opts->x_recip_mask &= ~mask; ++ else ++ opts->x_recip_mask |= mask; ++ } ++ } ++ ++ if (TARGET_RECIP_P (opts->x_target_flags)) ++ opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit; ++ else if (opts_set->x_target_flags & MASK_RECIP) ++ opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit); ++ ++ /* Default long double to 64-bit for 32-bit Bionic and to __float128 ++ for 64-bit Bionic. Also default long double to 64-bit for Intel ++ MCU psABI. */ ++ if ((TARGET_HAS_BIONIC || TARGET_IAMCU) ++ && !(opts_set->x_target_flags ++ & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128))) ++ opts->x_target_flags |= (TARGET_64BIT ++ ? MASK_LONG_DOUBLE_128 ++ : MASK_LONG_DOUBLE_64); ++ ++ /* Only one of them can be active. */ ++ gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0 ++ || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0); ++ ++ /* Handle stack protector */ ++ if (!opts_set->x_ix86_stack_protector_guard) ++ { ++#ifdef TARGET_THREAD_SSP_OFFSET ++ if (!TARGET_HAS_BIONIC) ++ opts->x_ix86_stack_protector_guard = SSP_TLS; ++ else ++#endif ++ opts->x_ix86_stack_protector_guard = SSP_GLOBAL; ++ } ++ ++ if (opts_set->x_ix86_stack_protector_guard_offset_str) ++ { ++ char *endp; ++ const char *str = opts->x_ix86_stack_protector_guard_offset_str; ++ ++ errno = 0; ++ int64_t offset; ++ ++#if defined(INT64_T_IS_LONG) ++ offset = strtol (str, &endp, 0); ++#else ++ offset = strtoll (str, &endp, 0); ++#endif ++ ++ if (!*str || *endp || errno) ++ error ("%qs is not a valid number " ++ "in %<-mstack-protector-guard-offset=%>", str); ++ ++ if (!IN_RANGE (offset, HOST_WIDE_INT_C (-0x80000000), ++ HOST_WIDE_INT_C (0x7fffffff))) ++ error ("%qs is not a valid offset " ++ "in %<-mstack-protector-guard-offset=%>", str); ++ ++ opts->x_ix86_stack_protector_guard_offset = offset; ++ } ++#ifdef TARGET_THREAD_SSP_OFFSET ++ else ++ opts->x_ix86_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET; ++#endif ++ ++ if (opts_set->x_ix86_stack_protector_guard_reg_str) ++ { ++ const char *str = opts->x_ix86_stack_protector_guard_reg_str; ++ addr_space_t seg = ADDR_SPACE_GENERIC; ++ ++ /* Discard optional register prefix. */ ++ if (str[0] == '%') ++ str++; ++ ++ if (strlen (str) == 2 && str[1] == 's') ++ { ++ if (str[0] == 'f') ++ seg = ADDR_SPACE_SEG_FS; ++ else if (str[0] == 'g') ++ seg = ADDR_SPACE_SEG_GS; ++ } ++ ++ if (seg == ADDR_SPACE_GENERIC) ++ error ("%qs is not a valid base register " ++ "in %<-mstack-protector-guard-reg=%>", ++ opts->x_ix86_stack_protector_guard_reg_str); ++ ++ opts->x_ix86_stack_protector_guard_reg = seg; ++ } ++ else ++ { ++ opts->x_ix86_stack_protector_guard_reg = DEFAULT_TLS_SEG_REG; ++ ++ /* The kernel uses a different segment register for performance ++ reasons; a system call would not have to trash the userspace ++ segment register, which would be expensive. */ ++ if (opts->x_ix86_cmodel == CM_KERNEL) ++ opts->x_ix86_stack_protector_guard_reg = ADDR_SPACE_SEG_GS; ++ } ++ ++ /* Handle -mmemcpy-strategy= and -mmemset-strategy= */ ++ if (opts->x_ix86_tune_memcpy_strategy) ++ { ++ char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy); ++ ix86_parse_stringop_strategy_string (str, false); ++ free (str); ++ } ++ ++ if (opts->x_ix86_tune_memset_strategy) ++ { ++ char *str = xstrdup (opts->x_ix86_tune_memset_strategy); ++ ix86_parse_stringop_strategy_string (str, true); ++ free (str); ++ } ++ ++ /* Save the initial options in case the user does function specific ++ options. */ ++ if (main_args_p) ++ target_option_default_node = target_option_current_node ++ = build_target_option_node (opts); ++ ++ if (opts->x_flag_cf_protection != CF_NONE) ++ opts->x_flag_cf_protection ++ = (cf_protection_level) (opts->x_flag_cf_protection | CF_SET); ++ ++ if (ix86_tune_features [X86_TUNE_AVOID_256FMA_CHAINS]) ++ maybe_set_param_value (PARAM_AVOID_FMA_MAX_BITS, 256, ++ opts->x_param_values, ++ opts_set->x_param_values); ++ else if (ix86_tune_features [X86_TUNE_AVOID_128FMA_CHAINS]) ++ maybe_set_param_value (PARAM_AVOID_FMA_MAX_BITS, 128, ++ opts->x_param_values, ++ opts_set->x_param_values); ++ ++ /* PR86952: jump table usage with retpolines is slow. ++ The PR provides some numbers about the slowness. */ ++ if (ix86_indirect_branch != indirect_branch_keep ++ && !opts_set->x_flag_jump_tables) ++ opts->x_flag_jump_tables = 0; ++ ++ return true; ++} ++ ++/* Implement the TARGET_OPTION_OVERRIDE hook. */ ++ ++void ++ix86_option_override (void) ++{ ++ ix86_option_override_internal (true, &global_options, &global_options_set); ++} ++ ++/* Remember the last target of ix86_set_current_function. */ ++static GTY(()) tree ix86_previous_fndecl; ++ ++/* Set targets globals to the default (or current #pragma GCC target ++ if active). Invalidate ix86_previous_fndecl cache. */ ++ ++void ++ix86_reset_previous_fndecl (void) ++{ ++ tree new_tree = target_option_current_node; ++ cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree)); ++ if (TREE_TARGET_GLOBALS (new_tree)) ++ restore_target_globals (TREE_TARGET_GLOBALS (new_tree)); ++ else if (new_tree == target_option_default_node) ++ restore_target_globals (&default_target_globals); ++ else ++ TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts (); ++ ix86_previous_fndecl = NULL_TREE; ++} ++ ++/* Add target attribute to SIMD clone NODE if needed. */ ++ ++void ++ix86_simd_clone_adjust (struct cgraph_node *node) ++{ ++ const char *str = NULL; ++ ++ /* Attributes need to be adjusted for definitions, not declarations. */ ++ if (!node->definition) ++ return; ++ ++ gcc_assert (node->decl == cfun->decl); ++ switch (node->simdclone->vecsize_mangle) ++ { ++ case 'b': ++ if (!TARGET_SSE2) ++ str = "sse2"; ++ break; ++ case 'c': ++ if (!TARGET_AVX) ++ str = "avx"; ++ break; ++ case 'd': ++ if (!TARGET_AVX2) ++ str = "avx2"; ++ break; ++ case 'e': ++ if (!TARGET_AVX512F) ++ str = "avx512f"; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ if (str == NULL) ++ return; ++ push_cfun (NULL); ++ tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str)); ++ bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0); ++ gcc_assert (ok); ++ pop_cfun (); ++ ix86_reset_previous_fndecl (); ++ ix86_set_current_function (node->decl); ++} ++ ++ ++ ++/* Set the func_type field from the function FNDECL. */ ++ ++static void ++ix86_set_func_type (tree fndecl) ++{ ++ if (cfun->machine->func_type == TYPE_UNKNOWN) ++ { ++ if (lookup_attribute ("interrupt", ++ TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))) ++ { ++ if (ix86_function_naked (fndecl)) ++ error_at (DECL_SOURCE_LOCATION (fndecl), ++ "interrupt and naked attributes are not compatible"); ++ ++ int nargs = 0; ++ for (tree arg = DECL_ARGUMENTS (fndecl); ++ arg; ++ arg = TREE_CHAIN (arg)) ++ nargs++; ++ cfun->machine->no_caller_saved_registers = true; ++ cfun->machine->func_type ++ = nargs == 2 ? TYPE_EXCEPTION : TYPE_INTERRUPT; ++ ++ ix86_optimize_mode_switching[X86_DIRFLAG] = 1; ++ ++ /* Only dwarf2out.c can handle -WORD(AP) as a pointer argument. */ ++ if (write_symbols != NO_DEBUG && write_symbols != DWARF2_DEBUG) ++ sorry ("only DWARF debug format is supported for interrupt " ++ "service routine"); ++ } ++ else ++ { ++ cfun->machine->func_type = TYPE_NORMAL; ++ if (lookup_attribute ("no_caller_saved_registers", ++ TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))) ++ cfun->machine->no_caller_saved_registers = true; ++ } ++ } ++} ++ ++/* Set the indirect_branch_type field from the function FNDECL. */ ++ ++static void ++ix86_set_indirect_branch_type (tree fndecl) ++{ ++ if (cfun->machine->indirect_branch_type == indirect_branch_unset) ++ { ++ tree attr = lookup_attribute ("indirect_branch", ++ DECL_ATTRIBUTES (fndecl)); ++ if (attr != NULL) ++ { ++ tree args = TREE_VALUE (attr); ++ if (args == NULL) ++ gcc_unreachable (); ++ tree cst = TREE_VALUE (args); ++ if (strcmp (TREE_STRING_POINTER (cst), "keep") == 0) ++ cfun->machine->indirect_branch_type = indirect_branch_keep; ++ else if (strcmp (TREE_STRING_POINTER (cst), "thunk") == 0) ++ cfun->machine->indirect_branch_type = indirect_branch_thunk; ++ else if (strcmp (TREE_STRING_POINTER (cst), "thunk-inline") == 0) ++ cfun->machine->indirect_branch_type = indirect_branch_thunk_inline; ++ else if (strcmp (TREE_STRING_POINTER (cst), "thunk-extern") == 0) ++ cfun->machine->indirect_branch_type = indirect_branch_thunk_extern; ++ else ++ gcc_unreachable (); ++ } ++ else ++ cfun->machine->indirect_branch_type = ix86_indirect_branch; ++ ++ /* -mcmodel=large is not compatible with -mindirect-branch=thunk ++ nor -mindirect-branch=thunk-extern. */ ++ if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC) ++ && ((cfun->machine->indirect_branch_type ++ == indirect_branch_thunk_extern) ++ || (cfun->machine->indirect_branch_type ++ == indirect_branch_thunk))) ++ error ("%<-mindirect-branch=%s%> and %<-mcmodel=large%> are not " ++ "compatible", ++ ((cfun->machine->indirect_branch_type ++ == indirect_branch_thunk_extern) ++ ? "thunk-extern" : "thunk")); ++ ++ if (cfun->machine->indirect_branch_type != indirect_branch_keep ++ && (flag_cf_protection & CF_RETURN)) ++ error ("%<-mindirect-branch%> and %<-fcf-protection%> are not " ++ "compatible"); ++ } ++ ++ if (cfun->machine->function_return_type == indirect_branch_unset) ++ { ++ tree attr = lookup_attribute ("function_return", ++ DECL_ATTRIBUTES (fndecl)); ++ if (attr != NULL) ++ { ++ tree args = TREE_VALUE (attr); ++ if (args == NULL) ++ gcc_unreachable (); ++ tree cst = TREE_VALUE (args); ++ if (strcmp (TREE_STRING_POINTER (cst), "keep") == 0) ++ cfun->machine->function_return_type = indirect_branch_keep; ++ else if (strcmp (TREE_STRING_POINTER (cst), "thunk") == 0) ++ cfun->machine->function_return_type = indirect_branch_thunk; ++ else if (strcmp (TREE_STRING_POINTER (cst), "thunk-inline") == 0) ++ cfun->machine->function_return_type = indirect_branch_thunk_inline; ++ else if (strcmp (TREE_STRING_POINTER (cst), "thunk-extern") == 0) ++ cfun->machine->function_return_type = indirect_branch_thunk_extern; ++ else ++ gcc_unreachable (); ++ } ++ else ++ cfun->machine->function_return_type = ix86_function_return; ++ ++ /* -mcmodel=large is not compatible with -mfunction-return=thunk ++ nor -mfunction-return=thunk-extern. */ ++ if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC) ++ && ((cfun->machine->function_return_type ++ == indirect_branch_thunk_extern) ++ || (cfun->machine->function_return_type ++ == indirect_branch_thunk))) ++ error ("%<-mfunction-return=%s%> and %<-mcmodel=large%> are not " ++ "compatible", ++ ((cfun->machine->function_return_type ++ == indirect_branch_thunk_extern) ++ ? "thunk-extern" : "thunk")); ++ ++ if (cfun->machine->function_return_type != indirect_branch_keep ++ && (flag_cf_protection & CF_RETURN)) ++ error ("%<-mfunction-return%> and %<-fcf-protection%> are not " ++ "compatible"); ++ } ++} ++ ++/* Establish appropriate back-end context for processing the function ++ FNDECL. The argument might be NULL to indicate processing at top ++ level, outside of any function scope. */ ++void ++ix86_set_current_function (tree fndecl) ++{ ++ /* Only change the context if the function changes. This hook is called ++ several times in the course of compiling a function, and we don't want to ++ slow things down too much or call target_reinit when it isn't safe. */ ++ if (fndecl == ix86_previous_fndecl) ++ { ++ /* There may be 2 function bodies for the same function FNDECL, ++ one is extern inline and one isn't. Call ix86_set_func_type ++ to set the func_type field. */ ++ if (fndecl != NULL_TREE) ++ { ++ ix86_set_func_type (fndecl); ++ ix86_set_indirect_branch_type (fndecl); ++ } ++ return; ++ } ++ ++ tree old_tree; ++ if (ix86_previous_fndecl == NULL_TREE) ++ old_tree = target_option_current_node; ++ else if (DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)) ++ old_tree = DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl); ++ else ++ old_tree = target_option_default_node; ++ ++ if (fndecl == NULL_TREE) ++ { ++ if (old_tree != target_option_current_node) ++ ix86_reset_previous_fndecl (); ++ return; ++ } ++ ++ ix86_set_func_type (fndecl); ++ ix86_set_indirect_branch_type (fndecl); ++ ++ tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl); ++ if (new_tree == NULL_TREE) ++ new_tree = target_option_default_node; ++ ++ if (old_tree != new_tree) ++ { ++ cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree)); ++ if (TREE_TARGET_GLOBALS (new_tree)) ++ restore_target_globals (TREE_TARGET_GLOBALS (new_tree)); ++ else if (new_tree == target_option_default_node) ++ restore_target_globals (&default_target_globals); ++ else ++ TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts (); ++ } ++ ix86_previous_fndecl = fndecl; ++ ++ static bool prev_no_caller_saved_registers; ++ ++ /* 64-bit MS and SYSV ABI have different set of call used registers. ++ Avoid expensive re-initialization of init_regs each time we switch ++ function context. */ ++ if (TARGET_64BIT ++ && (call_used_or_fixed_reg_p (SI_REG) ++ == (cfun->machine->call_abi == MS_ABI))) ++ reinit_regs (); ++ /* Need to re-initialize init_regs if caller-saved registers are ++ changed. */ ++ else if (prev_no_caller_saved_registers ++ != cfun->machine->no_caller_saved_registers) ++ reinit_regs (); ++ ++ if (cfun->machine->func_type != TYPE_NORMAL ++ || cfun->machine->no_caller_saved_registers) ++ { ++ /* Don't allow SSE, MMX nor x87 instructions since they ++ may change processor state. */ ++ const char *isa; ++ if (TARGET_SSE) ++ isa = "SSE"; ++ else if (TARGET_MMX) ++ isa = "MMX/3Dnow"; ++ else if (TARGET_80387) ++ isa = "80387"; ++ else ++ isa = NULL; ++ if (isa != NULL) ++ { ++ if (cfun->machine->func_type != TYPE_NORMAL) ++ sorry (cfun->machine->func_type == TYPE_EXCEPTION ++ ? G_("%s instructions aren%'t allowed in an" ++ " exception service routine") ++ : G_("%s instructions aren%'t allowed in an" ++ " interrupt service routine"), ++ isa); ++ else ++ sorry ("%s instructions aren%'t allowed in a function with " ++ "the % attribute", isa); ++ /* Don't issue the same error twice. */ ++ cfun->machine->func_type = TYPE_NORMAL; ++ cfun->machine->no_caller_saved_registers = false; ++ } ++ } ++ ++ prev_no_caller_saved_registers ++ = cfun->machine->no_caller_saved_registers; ++} ++ ++/* Implement the TARGET_OFFLOAD_OPTIONS hook. */ ++char * ++ix86_offload_options (void) ++{ ++ if (TARGET_LP64) ++ return xstrdup ("-foffload-abi=lp64"); ++ return xstrdup ("-foffload-abi=ilp32"); ++} ++ ++/* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall", ++ and "sseregparm" calling convention attributes; ++ arguments as in struct attribute_spec.handler. */ ++ ++static tree ++ix86_handle_cconv_attribute (tree *node, tree name, tree args, int, ++ bool *no_add_attrs) ++{ ++ if (TREE_CODE (*node) != FUNCTION_TYPE ++ && TREE_CODE (*node) != METHOD_TYPE ++ && TREE_CODE (*node) != FIELD_DECL ++ && TREE_CODE (*node) != TYPE_DECL) ++ { ++ warning (OPT_Wattributes, "%qE attribute only applies to functions", ++ name); ++ *no_add_attrs = true; ++ return NULL_TREE; ++ } ++ ++ /* Can combine regparm with all attributes but fastcall, and thiscall. */ ++ if (is_attribute_p ("regparm", name)) ++ { ++ tree cst; ++ ++ if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) ++ { ++ error ("fastcall and regparm attributes are not compatible"); ++ } ++ ++ if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node))) ++ { ++ error ("regparam and thiscall attributes are not compatible"); ++ } ++ ++ cst = TREE_VALUE (args); ++ if (TREE_CODE (cst) != INTEGER_CST) ++ { ++ warning (OPT_Wattributes, ++ "%qE attribute requires an integer constant argument", ++ name); ++ *no_add_attrs = true; ++ } ++ else if (compare_tree_int (cst, REGPARM_MAX) > 0) ++ { ++ warning (OPT_Wattributes, "argument to %qE attribute larger than %d", ++ name, REGPARM_MAX); ++ *no_add_attrs = true; ++ } ++ ++ return NULL_TREE; ++ } ++ ++ if (TARGET_64BIT) ++ { ++ /* Do not warn when emulating the MS ABI. */ ++ if ((TREE_CODE (*node) != FUNCTION_TYPE ++ && TREE_CODE (*node) != METHOD_TYPE) ++ || ix86_function_type_abi (*node) != MS_ABI) ++ warning (OPT_Wattributes, "%qE attribute ignored", ++ name); ++ *no_add_attrs = true; ++ return NULL_TREE; ++ } ++ ++ /* Can combine fastcall with stdcall (redundant) and sseregparm. */ ++ if (is_attribute_p ("fastcall", name)) ++ { ++ if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node))) ++ { ++ error ("fastcall and cdecl attributes are not compatible"); ++ } ++ if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node))) ++ { ++ error ("fastcall and stdcall attributes are not compatible"); ++ } ++ if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node))) ++ { ++ error ("fastcall and regparm attributes are not compatible"); ++ } ++ if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node))) ++ { ++ error ("fastcall and thiscall attributes are not compatible"); ++ } ++ } ++ ++ /* Can combine stdcall with fastcall (redundant), regparm and ++ sseregparm. */ ++ else if (is_attribute_p ("stdcall", name)) ++ { ++ if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node))) ++ { ++ error ("stdcall and cdecl attributes are not compatible"); ++ } ++ if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) ++ { ++ error ("stdcall and fastcall attributes are not compatible"); ++ } ++ if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node))) ++ { ++ error ("stdcall and thiscall attributes are not compatible"); ++ } ++ } ++ ++ /* Can combine cdecl with regparm and sseregparm. */ ++ else if (is_attribute_p ("cdecl", name)) ++ { ++ if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node))) ++ { ++ error ("stdcall and cdecl attributes are not compatible"); ++ } ++ if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) ++ { ++ error ("fastcall and cdecl attributes are not compatible"); ++ } ++ if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node))) ++ { ++ error ("cdecl and thiscall attributes are not compatible"); ++ } ++ } ++ else if (is_attribute_p ("thiscall", name)) ++ { ++ if (TREE_CODE (*node) != METHOD_TYPE && pedantic) ++ warning (OPT_Wattributes, "%qE attribute is used for non-class method", ++ name); ++ if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node))) ++ { ++ error ("stdcall and thiscall attributes are not compatible"); ++ } ++ if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) ++ { ++ error ("fastcall and thiscall attributes are not compatible"); ++ } ++ if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node))) ++ { ++ error ("cdecl and thiscall attributes are not compatible"); ++ } ++ } ++ ++ /* Can combine sseregparm with all attributes. */ ++ ++ return NULL_TREE; ++} ++ ++#ifndef CHECK_STACK_LIMIT ++#define CHECK_STACK_LIMIT (-1) ++#endif ++ ++/* The transactional memory builtins are implicitly regparm or fastcall ++ depending on the ABI. Override the generic do-nothing attribute that ++ these builtins were declared with, and replace it with one of the two ++ attributes that we expect elsewhere. */ ++ ++static tree ++ix86_handle_tm_regparm_attribute (tree *node, tree, tree, ++ int flags, bool *no_add_attrs) ++{ ++ tree alt; ++ ++ /* In no case do we want to add the placeholder attribute. */ ++ *no_add_attrs = true; ++ ++ /* The 64-bit ABI is unchanged for transactional memory. */ ++ if (TARGET_64BIT) ++ return NULL_TREE; ++ ++ /* ??? Is there a better way to validate 32-bit windows? We have ++ cfun->machine->call_abi, but that seems to be set only for 64-bit. */ ++ if (CHECK_STACK_LIMIT > 0) ++ alt = tree_cons (get_identifier ("fastcall"), NULL, NULL); ++ else ++ { ++ alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL); ++ alt = tree_cons (get_identifier ("regparm"), alt, NULL); ++ } ++ decl_attributes (node, alt, flags); ++ ++ return NULL_TREE; ++} ++ ++/* Handle a "force_align_arg_pointer" attribute. */ ++ ++static tree ++ix86_handle_force_align_arg_pointer_attribute (tree *node, tree name, ++ tree, int, bool *no_add_attrs) ++{ ++ if (TREE_CODE (*node) != FUNCTION_TYPE ++ && TREE_CODE (*node) != METHOD_TYPE ++ && TREE_CODE (*node) != FIELD_DECL ++ && TREE_CODE (*node) != TYPE_DECL) ++ { ++ warning (OPT_Wattributes, "%qE attribute only applies to functions", ++ name); ++ *no_add_attrs = true; ++ } ++ ++ return NULL_TREE; ++} ++ ++/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in ++ struct attribute_spec.handler. */ ++ ++static tree ++ix86_handle_struct_attribute (tree *node, tree name, tree, int, ++ bool *no_add_attrs) ++{ ++ tree *type = NULL; ++ if (DECL_P (*node)) ++ { ++ if (TREE_CODE (*node) == TYPE_DECL) ++ type = &TREE_TYPE (*node); ++ } ++ else ++ type = node; ++ ++ if (!(type && RECORD_OR_UNION_TYPE_P (*type))) ++ { ++ warning (OPT_Wattributes, "%qE attribute ignored", ++ name); ++ *no_add_attrs = true; ++ } ++ ++ else if ((is_attribute_p ("ms_struct", name) ++ && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type))) ++ || ((is_attribute_p ("gcc_struct", name) ++ && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type))))) ++ { ++ warning (OPT_Wattributes, "%qE incompatible attribute ignored", ++ name); ++ *no_add_attrs = true; ++ } ++ ++ return NULL_TREE; ++} ++ ++/* Handle a "callee_pop_aggregate_return" attribute; arguments as ++ in struct attribute_spec handler. */ ++ ++static tree ++ix86_handle_callee_pop_aggregate_return (tree *node, tree name, tree args, int, ++ bool *no_add_attrs) ++{ ++ if (TREE_CODE (*node) != FUNCTION_TYPE ++ && TREE_CODE (*node) != METHOD_TYPE ++ && TREE_CODE (*node) != FIELD_DECL ++ && TREE_CODE (*node) != TYPE_DECL) ++ { ++ warning (OPT_Wattributes, "%qE attribute only applies to functions", ++ name); ++ *no_add_attrs = true; ++ return NULL_TREE; ++ } ++ if (TARGET_64BIT) ++ { ++ warning (OPT_Wattributes, "%qE attribute only available for 32-bit", ++ name); ++ *no_add_attrs = true; ++ return NULL_TREE; ++ } ++ if (is_attribute_p ("callee_pop_aggregate_return", name)) ++ { ++ tree cst; ++ ++ cst = TREE_VALUE (args); ++ if (TREE_CODE (cst) != INTEGER_CST) ++ { ++ warning (OPT_Wattributes, ++ "%qE attribute requires an integer constant argument", ++ name); ++ *no_add_attrs = true; ++ } ++ else if (compare_tree_int (cst, 0) != 0 ++ && compare_tree_int (cst, 1) != 0) ++ { ++ warning (OPT_Wattributes, ++ "argument to %qE attribute is neither zero, nor one", ++ name); ++ *no_add_attrs = true; ++ } ++ ++ return NULL_TREE; ++ } ++ ++ return NULL_TREE; ++} ++ ++/* Handle a "ms_abi" or "sysv" attribute; arguments as in ++ struct attribute_spec.handler. */ ++ ++static tree ++ix86_handle_abi_attribute (tree *node, tree name, tree, int, ++ bool *no_add_attrs) ++{ ++ if (TREE_CODE (*node) != FUNCTION_TYPE ++ && TREE_CODE (*node) != METHOD_TYPE ++ && TREE_CODE (*node) != FIELD_DECL ++ && TREE_CODE (*node) != TYPE_DECL) ++ { ++ warning (OPT_Wattributes, "%qE attribute only applies to functions", ++ name); ++ *no_add_attrs = true; ++ return NULL_TREE; ++ } ++ ++ /* Can combine regparm with all attributes but fastcall. */ ++ if (is_attribute_p ("ms_abi", name)) ++ { ++ if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node))) ++ { ++ error ("%qs and %qs attributes are not compatible", ++ "ms_abi", "sysv_abi"); ++ } ++ ++ return NULL_TREE; ++ } ++ else if (is_attribute_p ("sysv_abi", name)) ++ { ++ if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node))) ++ { ++ error ("%qs and %qs attributes are not compatible", ++ "ms_abi", "sysv_abi"); ++ } ++ ++ return NULL_TREE; ++ } ++ ++ return NULL_TREE; ++} ++ ++static tree ++ix86_handle_fndecl_attribute (tree *node, tree name, tree args, int, ++ bool *no_add_attrs) ++{ ++ if (TREE_CODE (*node) != FUNCTION_DECL) ++ { ++ warning (OPT_Wattributes, "%qE attribute only applies to functions", ++ name); ++ *no_add_attrs = true; ++ } ++ ++ if (is_attribute_p ("indirect_branch", name)) ++ { ++ tree cst = TREE_VALUE (args); ++ if (TREE_CODE (cst) != STRING_CST) ++ { ++ warning (OPT_Wattributes, ++ "%qE attribute requires a string constant argument", ++ name); ++ *no_add_attrs = true; ++ } ++ else if (strcmp (TREE_STRING_POINTER (cst), "keep") != 0 ++ && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0 ++ && strcmp (TREE_STRING_POINTER (cst), "thunk-inline") != 0 ++ && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0) ++ { ++ warning (OPT_Wattributes, ++ "argument to %qE attribute is not " ++ "(keep|thunk|thunk-inline|thunk-extern)", name); ++ *no_add_attrs = true; ++ } ++ } ++ ++ if (is_attribute_p ("function_return", name)) ++ { ++ tree cst = TREE_VALUE (args); ++ if (TREE_CODE (cst) != STRING_CST) ++ { ++ warning (OPT_Wattributes, ++ "%qE attribute requires a string constant argument", ++ name); ++ *no_add_attrs = true; ++ } ++ else if (strcmp (TREE_STRING_POINTER (cst), "keep") != 0 ++ && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0 ++ && strcmp (TREE_STRING_POINTER (cst), "thunk-inline") != 0 ++ && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0) ++ { ++ warning (OPT_Wattributes, ++ "argument to %qE attribute is not " ++ "(keep|thunk|thunk-inline|thunk-extern)", name); ++ *no_add_attrs = true; ++ } ++ } ++ ++ return NULL_TREE; ++} ++ ++static tree ++ix86_handle_no_caller_saved_registers_attribute (tree *, tree, tree, ++ int, bool *) ++{ ++ return NULL_TREE; ++} ++ ++static tree ++ix86_handle_interrupt_attribute (tree *node, tree, tree, int, bool *) ++{ ++ /* DECL_RESULT and DECL_ARGUMENTS do not exist there yet, ++ but the function type contains args and return type data. */ ++ tree func_type = *node; ++ tree return_type = TREE_TYPE (func_type); ++ ++ int nargs = 0; ++ tree current_arg_type = TYPE_ARG_TYPES (func_type); ++ while (current_arg_type ++ && ! VOID_TYPE_P (TREE_VALUE (current_arg_type))) ++ { ++ if (nargs == 0) ++ { ++ if (! POINTER_TYPE_P (TREE_VALUE (current_arg_type))) ++ error ("interrupt service routine should have a pointer " ++ "as the first argument"); ++ } ++ else if (nargs == 1) ++ { ++ if (TREE_CODE (TREE_VALUE (current_arg_type)) != INTEGER_TYPE ++ || TYPE_MODE (TREE_VALUE (current_arg_type)) != word_mode) ++ error ("interrupt service routine should have %qs " ++ "as the second argument", ++ TARGET_64BIT ++ ? (TARGET_X32 ? "unsigned long long int" ++ : "unsigned long int") ++ : "unsigned int"); ++ } ++ nargs++; ++ current_arg_type = TREE_CHAIN (current_arg_type); ++ } ++ if (!nargs || nargs > 2) ++ error ("interrupt service routine can only have a pointer argument " ++ "and an optional integer argument"); ++ if (! VOID_TYPE_P (return_type)) ++ error ("interrupt service routine must return %"); ++ ++ return NULL_TREE; ++} ++ ++/* Handle fentry_name / fentry_section attribute. */ ++ ++static tree ++ix86_handle_fentry_name (tree *node, tree name, tree args, ++ int, bool *no_add_attrs) ++{ ++ if (TREE_CODE (*node) == FUNCTION_DECL ++ && TREE_CODE (TREE_VALUE (args)) == STRING_CST) ++ /* Do nothing else, just set the attribute. We'll get at ++ it later with lookup_attribute. */ ++ ; ++ else ++ { ++ warning (OPT_Wattributes, "%qE attribute ignored", name); ++ *no_add_attrs = true; ++ } ++ ++ return NULL_TREE; ++} ++ ++/* Table of valid machine attributes. */ ++const struct attribute_spec ix86_attribute_table[] = ++{ ++ /* { name, min_len, max_len, decl_req, type_req, fn_type_req, ++ affects_type_identity, handler, exclude } */ ++ /* Stdcall attribute says callee is responsible for popping arguments ++ if they are not variable. */ ++ { "stdcall", 0, 0, false, true, true, true, ix86_handle_cconv_attribute, ++ NULL }, ++ /* Fastcall attribute says callee is responsible for popping arguments ++ if they are not variable. */ ++ { "fastcall", 0, 0, false, true, true, true, ix86_handle_cconv_attribute, ++ NULL }, ++ /* Thiscall attribute says callee is responsible for popping arguments ++ if they are not variable. */ ++ { "thiscall", 0, 0, false, true, true, true, ix86_handle_cconv_attribute, ++ NULL }, ++ /* Cdecl attribute says the callee is a normal C declaration */ ++ { "cdecl", 0, 0, false, true, true, true, ix86_handle_cconv_attribute, ++ NULL }, ++ /* Regparm attribute specifies how many integer arguments are to be ++ passed in registers. */ ++ { "regparm", 1, 1, false, true, true, true, ix86_handle_cconv_attribute, ++ NULL }, ++ /* Sseregparm attribute says we are using x86_64 calling conventions ++ for FP arguments. */ ++ { "sseregparm", 0, 0, false, true, true, true, ix86_handle_cconv_attribute, ++ NULL }, ++ /* The transactional memory builtins are implicitly regparm or fastcall ++ depending on the ABI. Override the generic do-nothing attribute that ++ these builtins were declared with. */ ++ { "*tm regparm", 0, 0, false, true, true, true, ++ ix86_handle_tm_regparm_attribute, NULL }, ++ /* force_align_arg_pointer says this function realigns the stack at entry. */ ++ { "force_align_arg_pointer", 0, 0, ++ false, true, true, false, ix86_handle_force_align_arg_pointer_attribute, ++ NULL }, ++#if TARGET_DLLIMPORT_DECL_ATTRIBUTES ++ { "dllimport", 0, 0, false, false, false, false, handle_dll_attribute, ++ NULL }, ++ { "dllexport", 0, 0, false, false, false, false, handle_dll_attribute, ++ NULL }, ++ { "shared", 0, 0, true, false, false, false, ++ ix86_handle_shared_attribute, NULL }, ++#endif ++ { "ms_struct", 0, 0, false, false, false, false, ++ ix86_handle_struct_attribute, NULL }, ++ { "gcc_struct", 0, 0, false, false, false, false, ++ ix86_handle_struct_attribute, NULL }, ++#ifdef SUBTARGET_ATTRIBUTE_TABLE ++ SUBTARGET_ATTRIBUTE_TABLE, ++#endif ++ /* ms_abi and sysv_abi calling convention function attributes. */ ++ { "ms_abi", 0, 0, false, true, true, true, ix86_handle_abi_attribute, NULL }, ++ { "sysv_abi", 0, 0, false, true, true, true, ix86_handle_abi_attribute, ++ NULL }, ++ { "ms_abi va_list", 0, 0, false, false, false, false, NULL, NULL }, ++ { "sysv_abi va_list", 0, 0, false, false, false, false, NULL, NULL }, ++ { "ms_hook_prologue", 0, 0, true, false, false, false, ++ ix86_handle_fndecl_attribute, NULL }, ++ { "callee_pop_aggregate_return", 1, 1, false, true, true, true, ++ ix86_handle_callee_pop_aggregate_return, NULL }, ++ { "interrupt", 0, 0, false, true, true, false, ++ ix86_handle_interrupt_attribute, NULL }, ++ { "no_caller_saved_registers", 0, 0, false, true, true, false, ++ ix86_handle_no_caller_saved_registers_attribute, NULL }, ++ { "naked", 0, 0, true, false, false, false, ++ ix86_handle_fndecl_attribute, NULL }, ++ { "indirect_branch", 1, 1, true, false, false, false, ++ ix86_handle_fndecl_attribute, NULL }, ++ { "function_return", 1, 1, true, false, false, false, ++ ix86_handle_fndecl_attribute, NULL }, ++ { "indirect_return", 0, 0, false, true, true, false, ++ NULL, NULL }, ++ { "fentry_name", 1, 1, true, false, false, false, ++ ix86_handle_fentry_name, NULL }, ++ { "fentry_section", 1, 1, true, false, false, false, ++ ix86_handle_fentry_name, NULL }, ++ { "cf_check", 0, 0, true, false, false, false, ++ ix86_handle_fndecl_attribute, NULL }, ++ ++ /* End element. */ ++ { NULL, 0, 0, false, false, false, false, NULL, NULL } ++}; ++ ++#include "gt-i386-options.h" +diff --git a/gcc/config/i386/i386-options.h b/gcc/config/i386/i386-options.h +new file mode 100644 +index 000000000..817ddda5c +--- /dev/null ++++ b/gcc/config/i386/i386-options.h +@@ -0,0 +1,95 @@ ++/* Copyright (C) 1988-2019 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++#ifndef GCC_I386_OPTIONS_H ++#define GCC_I386_OPTIONS_H ++ ++char *ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_INT isa2, ++ int flags, int flags2, ++ const char *arch, const char *tune, ++ enum fpmath_unit fpmath, bool add_nl_p, ++ bool add_abi_p); ++ ++extern enum attr_cpu ix86_schedule; ++ ++extern enum processor_type ix86_tune; ++extern enum processor_type ix86_arch; ++extern unsigned char x86_prefetch_sse; ++extern const struct processor_costs *ix86_tune_cost; ++ ++extern int ix86_tune_defaulted; ++extern int ix86_arch_specified; ++ ++extern unsigned int ix86_default_incoming_stack_boundary; ++extern HOST_WIDE_INT deferred_isa_values; ++extern HOST_WIDE_INT deferred_isa_values2; ++ ++extern unsigned int ix86_preferred_stack_boundary; ++extern unsigned int ix86_user_incoming_stack_boundary; ++extern unsigned int ix86_default_incoming_stack_boundary; ++extern unsigned int ix86_incoming_stack_boundary; ++ ++extern char *ix86_offload_options (void); ++extern void ix86_option_override (void); ++extern void ix86_override_options_after_change (void); ++void ix86_set_current_function (tree fndecl); ++bool ix86_function_naked (const_tree fn); ++void ix86_simd_clone_adjust (struct cgraph_node *node); ++ ++extern tree (*ix86_veclib_handler) (combined_fn, tree, tree); ++extern tree ix86_veclibabi_svml (combined_fn, tree, tree); ++extern tree ix86_veclibabi_acml (combined_fn, tree, tree); ++ ++extern rtx (*ix86_gen_leave) (void); ++extern rtx (*ix86_gen_add3) (rtx, rtx, rtx); ++extern rtx (*ix86_gen_sub3) (rtx, rtx, rtx); ++extern rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx); ++extern rtx (*ix86_gen_one_cmpl2) (rtx, rtx); ++extern rtx (*ix86_gen_monitor) (rtx, rtx, rtx); ++extern rtx (*ix86_gen_monitorx) (rtx, rtx, rtx); ++extern rtx (*ix86_gen_clzero) (rtx); ++extern rtx (*ix86_gen_andsp) (rtx, rtx, rtx); ++extern rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx); ++extern rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx); ++extern rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx); ++extern rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx); ++extern rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx); ++ ++enum ix86_function_specific_strings ++{ ++ IX86_FUNCTION_SPECIFIC_ARCH, ++ IX86_FUNCTION_SPECIFIC_TUNE, ++ IX86_FUNCTION_SPECIFIC_MAX ++}; ++ ++extern const char *stringop_alg_names[]; ++ ++void ix86_add_new_builtins (HOST_WIDE_INT isa, HOST_WIDE_INT isa2); ++void ix86_function_specific_save (struct cl_target_option *, ++ struct gcc_options *opts); ++void ix86_function_specific_restore (struct gcc_options *opts, ++ struct cl_target_option *); ++void ix86_function_specific_post_stream_in (struct cl_target_option *); ++void ix86_function_specific_print (FILE *, int, ++ struct cl_target_option *); ++bool ix86_valid_target_attribute_p (tree, tree, tree, int); ++ ++extern const struct attribute_spec ix86_attribute_table[]; ++ ++ ++#endif /* GCC_I386_OPTIONS_H */ +diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h +index 83645e89a..4afba5bc2 100644 +--- a/gcc/config/i386/i386-protos.h ++++ b/gcc/config/i386/i386-protos.h +@@ -65,7 +65,7 @@ extern int avx_vpermilp_parallel (rtx par, machine_mode mode); + extern int avx_vperm2f128_parallel (rtx par, machine_mode mode); + + extern bool ix86_expand_strlen (rtx, rtx, rtx, rtx); +-extern bool ix86_expand_set_or_movmem (rtx, rtx, rtx, rtx, rtx, rtx, ++extern bool ix86_expand_set_or_cpymem (rtx, rtx, rtx, rtx, rtx, rtx, + rtx, rtx, rtx, rtx, bool); + + extern bool constant_address_p (rtx); +@@ -207,7 +207,7 @@ extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int); + #endif /* RTX_CODE */ + + #ifdef TREE_CODE +-extern int ix86_data_alignment (tree, int, bool); ++extern int ix86_data_alignment (tree, unsigned int, bool); + extern unsigned int ix86_local_alignment (tree, machine_mode, + unsigned int); + extern unsigned int ix86_minimum_alignment (tree, machine_mode, +@@ -215,9 +215,9 @@ extern unsigned int ix86_minimum_alignment (tree, machine_mode, + extern tree ix86_handle_shared_attribute (tree *, tree, tree, int, bool *); + extern tree ix86_handle_selectany_attribute (tree *, tree, tree, int, bool *); + extern int x86_field_alignment (tree, int); +-extern tree ix86_valid_target_attribute_tree (tree, ++extern tree ix86_valid_target_attribute_tree (tree, tree, + struct gcc_options *, +- struct gcc_options *); ++ struct gcc_options *, bool); + extern unsigned int ix86_get_callcvt (const_tree); + + #endif +diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c +index 5a0f8a0eb..9282a8fb6 100644 +--- a/gcc/config/i386/i386.c ++++ b/gcc/config/i386/i386.c +@@ -91,19 +91,17 @@ along with GCC; see the file COPYING3. If not see + #include "tree-vector-builder.h" + #include "debug.h" + #include "dwarf2out.h" ++#include "i386-options.h" ++#include "i386-builtins.h" ++#include "i386-expand.h" ++#include "i386-features.h" + + /* This file should be included last. */ + #include "target-def.h" + +-#include "x86-tune-costs.h" +- + static rtx legitimize_dllimport_symbol (rtx, bool); + static rtx legitimize_pe_coff_extern_decl (rtx, bool); +-static rtx legitimize_pe_coff_symbol (rtx, bool); + static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool); +-static bool ix86_save_reg (unsigned int, bool, bool); +-static bool ix86_function_naked (const_tree); +-static bool ix86_notrack_prefixed_insn_p (rtx); + static void ix86_emit_restore_reg_using_pop (rtx); + + +@@ -126,102 +124,6 @@ const struct processor_costs *ix86_tune_cost = NULL; + /* Set by -mtune or -Os. */ + const struct processor_costs *ix86_cost = NULL; + +-/* Processor feature/optimization bitmasks. */ +-#define m_386 (HOST_WIDE_INT_1U<machine->call_ms2sysv_extra_regs and +- 3.) rather or not stack alignment is being performed. */ +- static rtx get_stub_rtx (enum xlogue_stub stub); +- +- /* Returns the amount of stack space (including padding) that the stub +- needs to store registers based upon data in the machine_function. */ +- HOST_WIDE_INT get_stack_space_used () const +- { +- const struct machine_function *m = cfun->machine; +- unsigned last_reg = m->call_ms2sysv_extra_regs + MIN_REGS - 1; +- +- gcc_assert (m->call_ms2sysv_extra_regs <= MAX_EXTRA_REGS); +- return m_regs[last_reg].offset + STUB_INDEX_OFFSET; +- } +- +- /* Returns the offset for the base pointer used by the stub. */ +- HOST_WIDE_INT get_stub_ptr_offset () const +- { +- return STUB_INDEX_OFFSET + m_stack_align_off_in; +- } +- +- static const struct xlogue_layout &get_instance (); +- static unsigned count_stub_managed_regs (); +- static bool is_stub_managed_reg (unsigned regno, unsigned count); +- +- static const HOST_WIDE_INT STUB_INDEX_OFFSET = 0x70; +- static const unsigned MIN_REGS = NUM_X86_64_MS_CLOBBERED_REGS; +- static const unsigned MAX_REGS = 18; +- static const unsigned MAX_EXTRA_REGS = MAX_REGS - MIN_REGS; +- static const unsigned VARIANT_COUNT = MAX_EXTRA_REGS + 1; +- static const unsigned STUB_NAME_MAX_LEN = 20; +- static const char * const STUB_BASE_NAMES[XLOGUE_STUB_COUNT]; +- static const unsigned REG_ORDER[MAX_REGS]; +- static const unsigned REG_ORDER_REALIGN[MAX_REGS]; +- +-private: +- xlogue_layout (); +- xlogue_layout (HOST_WIDE_INT stack_align_off_in, bool hfp); +- xlogue_layout (const xlogue_layout &); +- +- /* True if hard frame pointer is used. */ +- bool m_hfp; +- +- /* Max number of register this layout manages. */ +- unsigned m_nregs; +- +- /* Incoming offset from 16-byte alignment. */ +- HOST_WIDE_INT m_stack_align_off_in; +- +- /* Register order and offsets. */ +- struct reginfo m_regs[MAX_REGS]; +- +- /* Lazy-inited cache of symbol names for stubs. */ +- static char s_stub_names[2][XLOGUE_STUB_COUNT][VARIANT_COUNT] +- [STUB_NAME_MAX_LEN]; +- +- static const xlogue_layout s_instances[XLOGUE_SET_COUNT]; +-}; +- +-const char * const xlogue_layout::STUB_BASE_NAMES[XLOGUE_STUB_COUNT] = { +- "savms64", +- "resms64", +- "resms64x", +- "savms64f", +- "resms64f", +- "resms64fx" +-}; +- +-const unsigned xlogue_layout::REG_ORDER[xlogue_layout::MAX_REGS] = { +-/* The below offset values are where each register is stored for the layout +- relative to incoming stack pointer. The value of each m_regs[].offset will +- be relative to the incoming base pointer (rax or rsi) used by the stub. +- +- s_instances: 0 1 2 3 +- Offset: realigned or aligned + 8 +- Register aligned aligned + 8 aligned w/HFP w/HFP */ +- XMM15_REG, /* 0x10 0x18 0x10 0x18 */ +- XMM14_REG, /* 0x20 0x28 0x20 0x28 */ +- XMM13_REG, /* 0x30 0x38 0x30 0x38 */ +- XMM12_REG, /* 0x40 0x48 0x40 0x48 */ +- XMM11_REG, /* 0x50 0x58 0x50 0x58 */ +- XMM10_REG, /* 0x60 0x68 0x60 0x68 */ +- XMM9_REG, /* 0x70 0x78 0x70 0x78 */ +- XMM8_REG, /* 0x80 0x88 0x80 0x88 */ +- XMM7_REG, /* 0x90 0x98 0x90 0x98 */ +- XMM6_REG, /* 0xa0 0xa8 0xa0 0xa8 */ +- SI_REG, /* 0xa8 0xb0 0xa8 0xb0 */ +- DI_REG, /* 0xb0 0xb8 0xb0 0xb8 */ +- BX_REG, /* 0xb8 0xc0 0xb8 0xc0 */ +- BP_REG, /* 0xc0 0xc8 N/A N/A */ +- R12_REG, /* 0xc8 0xd0 0xc0 0xc8 */ +- R13_REG, /* 0xd0 0xd8 0xc8 0xd0 */ +- R14_REG, /* 0xd8 0xe0 0xd0 0xd8 */ +- R15_REG, /* 0xe0 0xe8 0xd8 0xe0 */ +-}; +- +-/* Instantiate static const values. */ +-const HOST_WIDE_INT xlogue_layout::STUB_INDEX_OFFSET; +-const unsigned xlogue_layout::MIN_REGS; +-const unsigned xlogue_layout::MAX_REGS; +-const unsigned xlogue_layout::MAX_EXTRA_REGS; +-const unsigned xlogue_layout::VARIANT_COUNT; +-const unsigned xlogue_layout::STUB_NAME_MAX_LEN; +- +-/* Initialize xlogue_layout::s_stub_names to zero. */ +-char xlogue_layout::s_stub_names[2][XLOGUE_STUB_COUNT][VARIANT_COUNT] +- [STUB_NAME_MAX_LEN]; +- +-/* Instantiates all xlogue_layout instances. */ +-const xlogue_layout xlogue_layout::s_instances[XLOGUE_SET_COUNT] = { +- xlogue_layout (0, false), +- xlogue_layout (8, false), +- xlogue_layout (0, true), +- xlogue_layout (8, true) +-}; +- +-/* Return an appropriate const instance of xlogue_layout based upon values +- in cfun->machine and crtl. */ +-const struct xlogue_layout & +-xlogue_layout::get_instance () +-{ +- enum xlogue_stub_sets stub_set; +- bool aligned_plus_8 = cfun->machine->call_ms2sysv_pad_in; +- +- if (stack_realign_fp) +- stub_set = XLOGUE_SET_HFP_ALIGNED_OR_REALIGN; +- else if (frame_pointer_needed) +- stub_set = aligned_plus_8 +- ? XLOGUE_SET_HFP_ALIGNED_PLUS_8 +- : XLOGUE_SET_HFP_ALIGNED_OR_REALIGN; +- else +- stub_set = aligned_plus_8 ? XLOGUE_SET_ALIGNED_PLUS_8 : XLOGUE_SET_ALIGNED; +- +- return s_instances[stub_set]; +-} +- +-/* Determine how many clobbered registers can be saved by the stub. +- Returns the count of registers the stub will save and restore. */ +-unsigned +-xlogue_layout::count_stub_managed_regs () +-{ +- bool hfp = frame_pointer_needed || stack_realign_fp; +- unsigned i, count; +- unsigned regno; +- +- for (count = i = MIN_REGS; i < MAX_REGS; ++i) +- { +- regno = REG_ORDER[i]; +- if (regno == BP_REG && hfp) +- continue; +- if (!ix86_save_reg (regno, false, false)) +- break; +- ++count; +- } +- return count; +-} +- +-/* Determine if register REGNO is a stub managed register given the +- total COUNT of stub managed registers. */ +-bool +-xlogue_layout::is_stub_managed_reg (unsigned regno, unsigned count) +-{ +- bool hfp = frame_pointer_needed || stack_realign_fp; +- unsigned i; +- +- for (i = 0; i < count; ++i) +- { +- gcc_assert (i < MAX_REGS); +- if (REG_ORDER[i] == BP_REG && hfp) +- ++count; +- else if (REG_ORDER[i] == regno) +- return true; +- } +- return false; +-} +- +-/* Constructor for xlogue_layout. */ +-xlogue_layout::xlogue_layout (HOST_WIDE_INT stack_align_off_in, bool hfp) +- : m_hfp (hfp) , m_nregs (hfp ? 17 : 18), +- m_stack_align_off_in (stack_align_off_in) +-{ +- HOST_WIDE_INT offset = stack_align_off_in; +- unsigned i, j; +- +- for (i = j = 0; i < MAX_REGS; ++i) +- { +- unsigned regno = REG_ORDER[i]; +- +- if (regno == BP_REG && hfp) +- continue; +- if (SSE_REGNO_P (regno)) +- { +- offset += 16; +- /* Verify that SSE regs are always aligned. */ +- gcc_assert (!((stack_align_off_in + offset) & 15)); +- } +- else +- offset += 8; +- +- m_regs[j].regno = regno; +- m_regs[j++].offset = offset - STUB_INDEX_OFFSET; +- } +- gcc_assert (j == m_nregs); +-} +- +-const char * +-xlogue_layout::get_stub_name (enum xlogue_stub stub, +- unsigned n_extra_regs) +-{ +- const int have_avx = TARGET_AVX; +- char *name = s_stub_names[!!have_avx][stub][n_extra_regs]; +- +- /* Lazy init */ +- if (!*name) +- { +- int res = snprintf (name, STUB_NAME_MAX_LEN, "__%s_%s_%u", +- (have_avx ? "avx" : "sse"), +- STUB_BASE_NAMES[stub], +- MIN_REGS + n_extra_regs); +- gcc_checking_assert (res < (int)STUB_NAME_MAX_LEN); +- } +- +- return name; +-} +- +-/* Return rtx of a symbol ref for the entry point (based upon +- cfun->machine->call_ms2sysv_extra_regs) of the specified stub. */ +-rtx +-xlogue_layout::get_stub_rtx (enum xlogue_stub stub) +-{ +- const unsigned n_extra_regs = cfun->machine->call_ms2sysv_extra_regs; +- gcc_checking_assert (n_extra_regs <= MAX_EXTRA_REGS); +- gcc_assert (stub < XLOGUE_STUB_COUNT); +- gcc_assert (crtl->stack_realign_finalized); +- +- return gen_rtx_SYMBOL_REF (Pmode, get_stub_name (stub, n_extra_regs)); +-} +- + /* Define the structure for the machine field in struct function. */ + + struct GTY(()) stack_local_entry { +@@ -741,41 +349,37 @@ enum processor_type ix86_arch; + /* True if processor has SSE prefetch instruction. */ + unsigned char x86_prefetch_sse; + +-/* -mstackrealign option */ +-static const char ix86_force_align_arg_pointer_string[] +- = "force_align_arg_pointer"; +- +-static rtx (*ix86_gen_leave) (void); +-static rtx (*ix86_gen_add3) (rtx, rtx, rtx); +-static rtx (*ix86_gen_sub3) (rtx, rtx, rtx); +-static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx); +-static rtx (*ix86_gen_one_cmpl2) (rtx, rtx); +-static rtx (*ix86_gen_monitor) (rtx, rtx, rtx); +-static rtx (*ix86_gen_monitorx) (rtx, rtx, rtx); +-static rtx (*ix86_gen_clzero) (rtx); +-static rtx (*ix86_gen_andsp) (rtx, rtx, rtx); +-static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx); +-static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx); +-static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx); +-static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx); +-static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx); ++rtx (*ix86_gen_leave) (void); ++rtx (*ix86_gen_add3) (rtx, rtx, rtx); ++rtx (*ix86_gen_sub3) (rtx, rtx, rtx); ++rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx); ++rtx (*ix86_gen_one_cmpl2) (rtx, rtx); ++rtx (*ix86_gen_monitor) (rtx, rtx, rtx); ++rtx (*ix86_gen_monitorx) (rtx, rtx, rtx); ++rtx (*ix86_gen_clzero) (rtx); ++rtx (*ix86_gen_andsp) (rtx, rtx, rtx); ++rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx); ++rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx); ++rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx); ++rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx); ++rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx); + + /* Preferred alignment for stack boundary in bits. */ + unsigned int ix86_preferred_stack_boundary; + + /* Alignment for incoming stack boundary in bits specified at + command line. */ +-static unsigned int ix86_user_incoming_stack_boundary; ++unsigned int ix86_user_incoming_stack_boundary; + + /* Default alignment for incoming stack boundary in bits. */ +-static unsigned int ix86_default_incoming_stack_boundary; ++unsigned int ix86_default_incoming_stack_boundary; + + /* Alignment for incoming stack boundary in bits. */ + unsigned int ix86_incoming_stack_boundary; + + /* Calling abi specific va_list type nodes. */ +-static GTY(()) tree sysv_va_list_type_node; +-static GTY(()) tree ms_va_list_type_node; ++tree sysv_va_list_type_node; ++tree ms_va_list_type_node; + + /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */ + char internal_label_prefix[16]; +@@ -813,7 +417,6 @@ static REAL_VALUE_TYPE ext_80387_constants_table [5]; + static bool ext_80387_constants_init; + + +-static struct machine_function * ix86_init_machine_status (void); + static rtx ix86_function_value (const_tree, const_tree, bool); + static bool ix86_function_value_regno_p (const unsigned int); + static unsigned int ix86_function_arg_boundary (machine_mode, +@@ -821,49173 +424,20710 @@ static unsigned int ix86_function_arg_boundary (machine_mode, + static rtx ix86_static_chain (const_tree, bool); + static int ix86_function_regparm (const_tree, const_tree); + static void ix86_compute_frame_layout (void); +-static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode, +- rtx, rtx, int); +-static void ix86_add_new_builtins (HOST_WIDE_INT, HOST_WIDE_INT); + static tree ix86_canonical_va_list_type (tree); +-static void predict_jump (int); + static unsigned int split_stack_prologue_scratch_regno (void); + static bool i386_asm_output_addr_const_extra (FILE *, rtx); + +-enum ix86_function_specific_strings +-{ +- IX86_FUNCTION_SPECIFIC_ARCH, +- IX86_FUNCTION_SPECIFIC_TUNE, +- IX86_FUNCTION_SPECIFIC_MAX +-}; +- +-static char *ix86_target_string (HOST_WIDE_INT, HOST_WIDE_INT, int, int, +- const char *, const char *, enum fpmath_unit, +- bool, bool); +-static void ix86_function_specific_save (struct cl_target_option *, +- struct gcc_options *opts); +-static void ix86_function_specific_restore (struct gcc_options *opts, +- struct cl_target_option *); +-static void ix86_function_specific_post_stream_in (struct cl_target_option *); +-static void ix86_function_specific_print (FILE *, int, +- struct cl_target_option *); +-static bool ix86_valid_target_attribute_p (tree, tree, tree, int); +-static bool ix86_valid_target_attribute_inner_p (tree, char *[], +- struct gcc_options *, +- struct gcc_options *, +- struct gcc_options *); + static bool ix86_can_inline_p (tree, tree); +-static void ix86_set_current_function (tree); + static unsigned int ix86_minimum_incoming_stack_boundary (bool); + +-static enum calling_abi ix86_function_abi (const_tree); +- + +-#ifndef SUBTARGET32_DEFAULT_CPU +-#define SUBTARGET32_DEFAULT_CPU "i386" +-#endif +- + /* Whether -mtune= or -march= were specified */ +-static int ix86_tune_defaulted; +-static int ix86_arch_specified; +- +-/* Vectorization library interface and handlers. */ +-static tree (*ix86_veclib_handler) (combined_fn, tree, tree); +- +-static tree ix86_veclibabi_svml (combined_fn, tree, tree); +-static tree ix86_veclibabi_acml (combined_fn, tree, tree); +- +-/* This table must be in sync with enum processor_type in i386.h. */ +-static const struct processor_costs *processor_cost_table[] = +-{ +- &generic_cost, +- &i386_cost, +- &i486_cost, +- &pentium_cost, +- &lakemont_cost, +- &pentiumpro_cost, +- &pentium4_cost, +- &nocona_cost, +- &core_cost, +- &core_cost, +- &core_cost, +- &core_cost, +- &atom_cost, +- &slm_cost, +- &slm_cost, +- &slm_cost, +- &slm_cost, +- &slm_cost, +- &slm_cost, +- &skylake_cost, +- &skylake_cost, +- &skylake_cost, +- &skylake_cost, +- &skylake_cost, +- &skylake_cost, +- &intel_cost, +- &geode_cost, +- &k6_cost, +- &athlon_cost, +- &k8_cost, +- &amdfam10_cost, +- &bdver_cost, +- &bdver_cost, +- &bdver_cost, +- &bdver_cost, +- &btver1_cost, +- &btver2_cost, +- &znver1_cost, +- &znver2_cost +-}; +- +-/* Guarantee that the array is aligned with enum processor_type. */ +-STATIC_ASSERT (ARRAY_SIZE (processor_cost_table) == PROCESSOR_max); ++int ix86_tune_defaulted; ++int ix86_arch_specified; + +-static unsigned int +-rest_of_handle_insert_vzeroupper (void) +-{ +- int i; +- +- /* vzeroupper instructions are inserted immediately after reload to +- account for possible spills from 256bit or 512bit registers. The pass +- reuses mode switching infrastructure by re-running mode insertion +- pass, so disable entities that have already been processed. */ +- for (i = 0; i < MAX_386_ENTITIES; i++) +- ix86_optimize_mode_switching[i] = 0; ++/* Return true if a red-zone is in use. We can't use red-zone when ++ there are local indirect jumps, like "indirect_jump" or "tablejump", ++ which jumps to another place in the function, since "call" in the ++ indirect thunk pushes the return address onto stack, destroying ++ red-zone. + +- ix86_optimize_mode_switching[AVX_U128] = 1; ++ TODO: If we can reserve the first 2 WORDs, for PUSH and, another ++ for CALL, in red-zone, we can allow local indirect jumps with ++ indirect thunk. */ + +- /* Call optimize_mode_switching. */ +- g->get_passes ()->execute_pass_mode_switching (); +- return 0; ++bool ++ix86_using_red_zone (void) ++{ ++ return (TARGET_RED_ZONE ++ && !TARGET_64BIT_MS_ABI ++ && (!cfun->machine->has_local_indirect_jump ++ || cfun->machine->indirect_branch_type == indirect_branch_keep)); + } +- +-/* Return 1 if INSN uses or defines a hard register. +- Hard register uses in a memory address are ignored. +- Clobbers and flags definitions are ignored. */ +- ++ ++/* Return true, if profiling code should be emitted before ++ prologue. Otherwise it returns false. ++ Note: For x86 with "hotfix" it is sorried. */ + static bool +-has_non_address_hard_reg (rtx_insn *insn) ++ix86_profile_before_prologue (void) + { +- df_ref ref; +- FOR_EACH_INSN_DEF (ref, insn) +- if (HARD_REGISTER_P (DF_REF_REAL_REG (ref)) +- && !DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER) +- && DF_REF_REGNO (ref) != FLAGS_REG) +- return true; +- +- FOR_EACH_INSN_USE (ref, insn) +- if (!DF_REF_REG_MEM_P (ref) && HARD_REGISTER_P (DF_REF_REAL_REG (ref))) +- return true; +- +- return false; ++ return flag_fentry != 0; + } + +-/* Check if comparison INSN may be transformed +- into vector comparison. Currently we transform +- zero checks only which look like: +- +- (set (reg:CCZ 17 flags) +- (compare:CCZ (ior:SI (subreg:SI (reg:DI x) 4) +- (subreg:SI (reg:DI x) 0)) +- (const_int 0 [0]))) */ ++/* Update register usage after having seen the compiler flags. */ + +-static bool +-convertible_comparison_p (rtx_insn *insn) ++static void ++ix86_conditional_register_usage (void) + { +- if (!TARGET_SSE4_1) +- return false; ++ int i, c_mask; + +- rtx def_set = single_set (insn); ++ /* If there are no caller-saved registers, preserve all registers. ++ except fixed_regs and registers used for function return value ++ since aggregate_value_p checks call_used_regs[regno] on return ++ value. */ ++ if (cfun && cfun->machine->no_caller_saved_registers) ++ for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) ++ if (!fixed_regs[i] && !ix86_function_value_regno_p (i)) ++ call_used_regs[i] = 0; + +- gcc_assert (def_set); ++ /* For 32-bit targets, disable the REX registers. */ ++ if (! TARGET_64BIT) ++ { ++ for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++) ++ CLEAR_HARD_REG_BIT (accessible_reg_set, i); ++ for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++) ++ CLEAR_HARD_REG_BIT (accessible_reg_set, i); ++ for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++) ++ CLEAR_HARD_REG_BIT (accessible_reg_set, i); ++ } + +- rtx src = SET_SRC (def_set); +- rtx dst = SET_DEST (def_set); ++ /* See the definition of CALL_USED_REGISTERS in i386.h. */ ++ c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI); ++ ++ CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]); + +- gcc_assert (GET_CODE (src) == COMPARE); ++ for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) ++ { ++ /* Set/reset conditionally defined registers from ++ CALL_USED_REGISTERS initializer. */ ++ if (call_used_regs[i] > 1) ++ call_used_regs[i] = !!(call_used_regs[i] & c_mask); + +- if (GET_CODE (dst) != REG +- || REGNO (dst) != FLAGS_REG +- || GET_MODE (dst) != CCZmode) +- return false; ++ /* Calculate registers of CLOBBERED_REGS register set ++ as call used registers from GENERAL_REGS register set. */ ++ if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i) ++ && call_used_regs[i]) ++ SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i); ++ } + +- rtx op1 = XEXP (src, 0); +- rtx op2 = XEXP (src, 1); ++ /* If MMX is disabled, disable the registers. */ ++ if (! TARGET_MMX) ++ accessible_reg_set &= ~reg_class_contents[MMX_REGS]; + +- if (op2 != CONST0_RTX (GET_MODE (op2))) +- return false; ++ /* If SSE is disabled, disable the registers. */ ++ if (! TARGET_SSE) ++ accessible_reg_set &= ~reg_class_contents[ALL_SSE_REGS]; + +- if (GET_CODE (op1) != IOR) +- return false; ++ /* If the FPU is disabled, disable the registers. */ ++ if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387)) ++ accessible_reg_set &= ~reg_class_contents[FLOAT_REGS]; + +- op2 = XEXP (op1, 1); +- op1 = XEXP (op1, 0); +- +- if (!SUBREG_P (op1) +- || !SUBREG_P (op2) +- || GET_MODE (op1) != SImode +- || GET_MODE (op2) != SImode +- || ((SUBREG_BYTE (op1) != 0 +- || SUBREG_BYTE (op2) != GET_MODE_SIZE (SImode)) +- && (SUBREG_BYTE (op2) != 0 +- || SUBREG_BYTE (op1) != GET_MODE_SIZE (SImode)))) +- return false; ++ /* If AVX512F is disabled, disable the registers. */ ++ if (! TARGET_AVX512F) ++ { ++ for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++) ++ CLEAR_HARD_REG_BIT (accessible_reg_set, i); + +- op1 = SUBREG_REG (op1); +- op2 = SUBREG_REG (op2); ++ accessible_reg_set &= ~reg_class_contents[ALL_MASK_REGS]; ++ } ++} + +- if (op1 != op2 +- || !REG_P (op1) +- || GET_MODE (op1) != DImode) +- return false; ++/* Canonicalize a comparison from one we don't have to one we do have. */ + +- return true; +-} ++static void ++ix86_canonicalize_comparison (int *code, rtx *op0, rtx *op1, ++ bool op0_preserve_value) ++{ ++ /* The order of operands in x87 ficom compare is forced by combine in ++ simplify_comparison () function. Float operator is treated as RTX_OBJ ++ with a precedence over other operators and is always put in the first ++ place. Swap condition and operands to match ficom instruction. */ ++ if (!op0_preserve_value ++ && GET_CODE (*op0) == FLOAT && MEM_P (XEXP (*op0, 0)) && REG_P (*op1)) ++ { ++ enum rtx_code scode = swap_condition ((enum rtx_code) *code); + +-/* The DImode version of scalar_to_vector_candidate_p. */ ++ /* We are called only for compares that are split to SAHF instruction. ++ Ensure that we have setcc/jcc insn for the swapped condition. */ ++ if (ix86_fp_compare_code_to_integer (scode) != UNKNOWN) ++ { ++ std::swap (*op0, *op1); ++ *code = (int) scode; ++ } ++ } ++} ++ ++ ++/* Hook to determine if one function can safely inline another. */ + + static bool +-dimode_scalar_to_vector_candidate_p (rtx_insn *insn) ++ix86_can_inline_p (tree caller, tree callee) + { +- rtx def_set = single_set (insn); +- +- if (!def_set) +- return false; ++ tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller); ++ tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee); + +- if (has_non_address_hard_reg (insn)) +- return false; ++ /* Changes of those flags can be tolerated for always inlines. Lets hope ++ user knows what he is doing. */ ++ const unsigned HOST_WIDE_INT always_inline_safe_mask ++ = (MASK_USE_8BIT_IDIV | MASK_ACCUMULATE_OUTGOING_ARGS ++ | MASK_NO_ALIGN_STRINGOPS | MASK_AVX256_SPLIT_UNALIGNED_LOAD ++ | MASK_AVX256_SPLIT_UNALIGNED_STORE | MASK_CLD ++ | MASK_NO_FANCY_MATH_387 | MASK_IEEE_FP | MASK_INLINE_ALL_STRINGOPS ++ | MASK_INLINE_STRINGOPS_DYNAMICALLY | MASK_RECIP | MASK_STACK_PROBE ++ | MASK_STV | MASK_TLS_DIRECT_SEG_REFS | MASK_VZEROUPPER ++ | MASK_NO_PUSH_ARGS | MASK_OMIT_LEAF_FRAME_POINTER); + +- rtx src = SET_SRC (def_set); +- rtx dst = SET_DEST (def_set); + +- if (GET_CODE (src) == COMPARE) +- return convertible_comparison_p (insn); ++ if (!callee_tree) ++ callee_tree = target_option_default_node; ++ if (!caller_tree) ++ caller_tree = target_option_default_node; ++ if (callee_tree == caller_tree) ++ return true; + +- /* We are interested in DImode promotion only. */ +- if ((GET_MODE (src) != DImode +- && !CONST_INT_P (src)) +- || GET_MODE (dst) != DImode) +- return false; ++ struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree); ++ struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree); ++ bool ret = false; ++ bool always_inline ++ = (DECL_DISREGARD_INLINE_LIMITS (callee) ++ && lookup_attribute ("always_inline", ++ DECL_ATTRIBUTES (callee))); + +- if (!REG_P (dst) && !MEM_P (dst)) +- return false; +- +- switch (GET_CODE (src)) +- { +- case ASHIFTRT: +- if (!TARGET_AVX512VL) +- return false; +- /* FALLTHRU */ +- +- case ASHIFT: +- case LSHIFTRT: +- if (!CONST_INT_P (XEXP (src, 1)) +- || !IN_RANGE (INTVAL (XEXP (src, 1)), 0, 63)) +- return false; +- break; +- +- case PLUS: +- case MINUS: +- case IOR: +- case XOR: +- case AND: +- if (!REG_P (XEXP (src, 1)) +- && !MEM_P (XEXP (src, 1)) +- && !CONST_INT_P (XEXP (src, 1))) +- return false; +- +- if (GET_MODE (XEXP (src, 1)) != DImode +- && !CONST_INT_P (XEXP (src, 1))) +- return false; +- break; ++ cgraph_node *callee_node = cgraph_node::get (callee); ++ /* Callee's isa options should be a subset of the caller's, i.e. a SSE4 ++ function can inline a SSE2 function but a SSE2 function can't inline ++ a SSE4 function. */ ++ if (((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags) ++ != callee_opts->x_ix86_isa_flags) ++ || ((caller_opts->x_ix86_isa_flags2 & callee_opts->x_ix86_isa_flags2) ++ != callee_opts->x_ix86_isa_flags2)) ++ ret = false; + +- case NEG: +- case NOT: +- break; ++ /* See if we have the same non-isa options. */ ++ else if ((!always_inline ++ && caller_opts->x_target_flags != callee_opts->x_target_flags) ++ || (caller_opts->x_target_flags & ~always_inline_safe_mask) ++ != (callee_opts->x_target_flags & ~always_inline_safe_mask)) ++ ret = false; + +- case REG: +- return true; ++ /* See if arch, tune, etc. are the same. */ ++ else if (caller_opts->arch != callee_opts->arch) ++ ret = false; + +- case MEM: +- case CONST_INT: +- return REG_P (dst); ++ else if (!always_inline && caller_opts->tune != callee_opts->tune) ++ ret = false; + +- default: +- return false; +- } ++ else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath ++ /* If the calle doesn't use FP expressions differences in ++ ix86_fpmath can be ignored. We are called from FEs ++ for multi-versioning call optimization, so beware of ++ ipa_fn_summaries not available. */ ++ && (! ipa_fn_summaries ++ || ipa_fn_summaries->get (callee_node) == NULL ++ || ipa_fn_summaries->get (callee_node)->fp_expressions)) ++ ret = false; + +- if (!REG_P (XEXP (src, 0)) +- && !MEM_P (XEXP (src, 0)) +- && !CONST_INT_P (XEXP (src, 0)) +- /* Check for andnot case. */ +- && (GET_CODE (src) != AND +- || GET_CODE (XEXP (src, 0)) != NOT +- || !REG_P (XEXP (XEXP (src, 0), 0)))) +- return false; ++ else if (!always_inline ++ && caller_opts->branch_cost != callee_opts->branch_cost) ++ ret = false; + +- if (GET_MODE (XEXP (src, 0)) != DImode +- && !CONST_INT_P (XEXP (src, 0))) +- return false; ++ else ++ ret = true; + +- return true; ++ return ret; + } +- +-/* The TImode version of scalar_to_vector_candidate_p. */ ++ ++/* Return true if this goes in large data/bss. */ + + static bool +-timode_scalar_to_vector_candidate_p (rtx_insn *insn) ++ix86_in_large_data_p (tree exp) + { +- rtx def_set = single_set (insn); +- +- if (!def_set) ++ if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC) + return false; + +- if (has_non_address_hard_reg (insn)) ++ if (exp == NULL_TREE) + return false; + +- rtx src = SET_SRC (def_set); +- rtx dst = SET_DEST (def_set); +- +- /* Only TImode load and store are allowed. */ +- if (GET_MODE (dst) != TImode) ++ /* Functions are never large data. */ ++ if (TREE_CODE (exp) == FUNCTION_DECL) + return false; + +- if (MEM_P (dst)) +- { +- /* Check for store. Memory must be aligned or unaligned store +- is optimal. Only support store from register, standard SSE +- constant or CONST_WIDE_INT generated from piecewise store. +- +- ??? Verify performance impact before enabling CONST_INT for +- __int128 store. */ +- if (misaligned_operand (dst, TImode) +- && !TARGET_SSE_UNALIGNED_STORE_OPTIMAL) +- return false; +- +- switch (GET_CODE (src)) +- { +- default: +- return false; +- +- case REG: +- case CONST_WIDE_INT: +- return true; ++ /* Automatic variables are never large data. */ ++ if (VAR_P (exp) && !is_global_var (exp)) ++ return false; + +- case CONST_INT: +- return standard_sse_constant_p (src, TImode); +- } +- } +- else if (MEM_P (src)) ++ if (VAR_P (exp) && DECL_SECTION_NAME (exp)) + { +- /* Check for load. Memory must be aligned or unaligned load is +- optimal. */ +- return (REG_P (dst) +- && (!misaligned_operand (src, TImode) +- || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)); ++ const char *section = DECL_SECTION_NAME (exp); ++ if (strcmp (section, ".ldata") == 0 ++ || strcmp (section, ".lbss") == 0) ++ return true; ++ return false; + } +- +- return false; +-} +- +-/* Return 1 if INSN may be converted into vector +- instruction. */ +- +-static bool +-scalar_to_vector_candidate_p (rtx_insn *insn) +-{ +- if (TARGET_64BIT) +- return timode_scalar_to_vector_candidate_p (insn); + else +- return dimode_scalar_to_vector_candidate_p (insn); +-} +- +-/* The DImode version of remove_non_convertible_regs. */ +- +-static void +-dimode_remove_non_convertible_regs (bitmap candidates) +-{ +- bitmap_iterator bi; +- unsigned id; +- bitmap regs = BITMAP_ALLOC (NULL); +- +- EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi) +- { +- rtx def_set = single_set (DF_INSN_UID_GET (id)->insn); +- rtx reg = SET_DEST (def_set); +- +- if (!REG_P (reg) +- || bitmap_bit_p (regs, REGNO (reg)) +- || HARD_REGISTER_P (reg)) +- continue; +- +- for (df_ref def = DF_REG_DEF_CHAIN (REGNO (reg)); +- def; +- def = DF_REF_NEXT_REG (def)) +- { +- if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def))) +- { +- if (dump_file) +- fprintf (dump_file, +- "r%d has non convertible definition in insn %d\n", +- REGNO (reg), DF_REF_INSN_UID (def)); +- +- bitmap_set_bit (regs, REGNO (reg)); +- break; +- } +- } +- } +- +- EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi) + { +- for (df_ref def = DF_REG_DEF_CHAIN (id); +- def; +- def = DF_REF_NEXT_REG (def)) +- if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def))) +- { +- if (dump_file) +- fprintf (dump_file, "Removing insn %d from candidates list\n", +- DF_REF_INSN_UID (def)); ++ HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp)); + +- bitmap_clear_bit (candidates, DF_REF_INSN_UID (def)); +- } ++ /* If this is an incomplete type with size 0, then we can't put it ++ in data because it might be too big when completed. Also, ++ int_size_in_bytes returns -1 if size can vary or is larger than ++ an integer in which case also it is safer to assume that it goes in ++ large data. */ ++ if (size <= 0 || size > ix86_section_threshold) ++ return true; + } + +- BITMAP_FREE (regs); ++ return false; + } + +-/* For a register REGNO, scan instructions for its defs and uses. +- Put REGNO in REGS if a def or use isn't in CANDIDATES. */ ++/* i386-specific section flag to mark large sections. */ ++#define SECTION_LARGE SECTION_MACH_DEP ++ ++/* Switch to the appropriate section for output of DECL. ++ DECL is either a `VAR_DECL' node or a constant of some sort. ++ RELOC indicates whether forming the initial value of DECL requires ++ link-time relocations. */ + +-static void +-timode_check_non_convertible_regs (bitmap candidates, bitmap regs, +- unsigned int regno) ++ATTRIBUTE_UNUSED static section * ++x86_64_elf_select_section (tree decl, int reloc, ++ unsigned HOST_WIDE_INT align) + { +- for (df_ref def = DF_REG_DEF_CHAIN (regno); +- def; +- def = DF_REF_NEXT_REG (def)) ++ if (ix86_in_large_data_p (decl)) + { +- if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def))) ++ const char *sname = NULL; ++ unsigned int flags = SECTION_WRITE | SECTION_LARGE; ++ switch (categorize_decl_for_section (decl, reloc)) + { +- if (dump_file) +- fprintf (dump_file, +- "r%d has non convertible def in insn %d\n", +- regno, DF_REF_INSN_UID (def)); +- +- bitmap_set_bit (regs, regno); ++ case SECCAT_DATA: ++ sname = ".ldata"; ++ break; ++ case SECCAT_DATA_REL: ++ sname = ".ldata.rel"; ++ break; ++ case SECCAT_DATA_REL_LOCAL: ++ sname = ".ldata.rel.local"; ++ break; ++ case SECCAT_DATA_REL_RO: ++ sname = ".ldata.rel.ro"; ++ break; ++ case SECCAT_DATA_REL_RO_LOCAL: ++ sname = ".ldata.rel.ro.local"; ++ break; ++ case SECCAT_BSS: ++ sname = ".lbss"; ++ flags |= SECTION_BSS; ++ break; ++ case SECCAT_RODATA: ++ case SECCAT_RODATA_MERGE_STR: ++ case SECCAT_RODATA_MERGE_STR_INIT: ++ case SECCAT_RODATA_MERGE_CONST: ++ sname = ".lrodata"; ++ flags &= ~SECTION_WRITE; ++ break; ++ case SECCAT_SRODATA: ++ case SECCAT_SDATA: ++ case SECCAT_SBSS: ++ gcc_unreachable (); ++ case SECCAT_TEXT: ++ case SECCAT_TDATA: ++ case SECCAT_TBSS: ++ /* We don't split these for medium model. Place them into ++ default sections and hope for best. */ + break; + } +- } +- +- for (df_ref ref = DF_REG_USE_CHAIN (regno); +- ref; +- ref = DF_REF_NEXT_REG (ref)) +- { +- /* Debug instructions are skipped. */ +- if (NONDEBUG_INSN_P (DF_REF_INSN (ref)) +- && !bitmap_bit_p (candidates, DF_REF_INSN_UID (ref))) ++ if (sname) + { +- if (dump_file) +- fprintf (dump_file, +- "r%d has non convertible use in insn %d\n", +- regno, DF_REF_INSN_UID (ref)); +- +- bitmap_set_bit (regs, regno); +- break; ++ /* We might get called with string constants, but get_named_section ++ doesn't like them as they are not DECLs. Also, we need to set ++ flags in that case. */ ++ if (!DECL_P (decl)) ++ return get_section (sname, flags, NULL); ++ return get_named_section (decl, sname, reloc); + } + } ++ return default_elf_select_section (decl, reloc, align); + } + +-/* The TImode version of remove_non_convertible_regs. */ ++/* Select a set of attributes for section NAME based on the properties ++ of DECL and whether or not RELOC indicates that DECL's initializer ++ might contain runtime relocations. */ + +-static void +-timode_remove_non_convertible_regs (bitmap candidates) ++static unsigned int ATTRIBUTE_UNUSED ++x86_64_elf_section_type_flags (tree decl, const char *name, int reloc) + { +- bitmap_iterator bi; +- unsigned id; +- bitmap regs = BITMAP_ALLOC (NULL); +- +- EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi) +- { +- rtx def_set = single_set (DF_INSN_UID_GET (id)->insn); +- rtx dest = SET_DEST (def_set); +- rtx src = SET_SRC (def_set); +- +- if ((!REG_P (dest) +- || bitmap_bit_p (regs, REGNO (dest)) +- || HARD_REGISTER_P (dest)) +- && (!REG_P (src) +- || bitmap_bit_p (regs, REGNO (src)) +- || HARD_REGISTER_P (src))) +- continue; +- +- if (REG_P (dest)) +- timode_check_non_convertible_regs (candidates, regs, +- REGNO (dest)); +- +- if (REG_P (src)) +- timode_check_non_convertible_regs (candidates, regs, +- REGNO (src)); +- } +- +- EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi) +- { +- for (df_ref def = DF_REG_DEF_CHAIN (id); +- def; +- def = DF_REF_NEXT_REG (def)) +- if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def))) +- { +- if (dump_file) +- fprintf (dump_file, "Removing insn %d from candidates list\n", +- DF_REF_INSN_UID (def)); ++ unsigned int flags = default_section_type_flags (decl, name, reloc); + +- bitmap_clear_bit (candidates, DF_REF_INSN_UID (def)); +- } ++ if (ix86_in_large_data_p (decl)) ++ flags |= SECTION_LARGE; + +- for (df_ref ref = DF_REG_USE_CHAIN (id); +- ref; +- ref = DF_REF_NEXT_REG (ref)) +- if (bitmap_bit_p (candidates, DF_REF_INSN_UID (ref))) +- { +- if (dump_file) +- fprintf (dump_file, "Removing insn %d from candidates list\n", +- DF_REF_INSN_UID (ref)); ++ if (decl == NULL_TREE ++ && (strcmp (name, ".ldata.rel.ro") == 0 ++ || strcmp (name, ".ldata.rel.ro.local") == 0)) ++ flags |= SECTION_RELRO; + +- bitmap_clear_bit (candidates, DF_REF_INSN_UID (ref)); +- } +- } ++ if (strcmp (name, ".lbss") == 0 ++ || strncmp (name, ".lbss.", 5) == 0 ++ || strncmp (name, ".gnu.linkonce.lb.", 16) == 0) ++ flags |= SECTION_BSS; + +- BITMAP_FREE (regs); ++ return flags; + } + +-/* For a given bitmap of insn UIDs scans all instruction and +- remove insn from CANDIDATES in case it has both convertible +- and not convertible definitions. +- +- All insns in a bitmap are conversion candidates according to +- scalar_to_vector_candidate_p. Currently it implies all insns +- are single_set. */ +- +-static void +-remove_non_convertible_regs (bitmap candidates) +-{ +- if (TARGET_64BIT) +- timode_remove_non_convertible_regs (candidates); +- else +- dimode_remove_non_convertible_regs (candidates); +-} +- +-class scalar_chain +-{ +- public: +- scalar_chain (); +- virtual ~scalar_chain (); +- +- static unsigned max_id; +- +- /* ID of a chain. */ +- unsigned int chain_id; +- /* A queue of instructions to be included into a chain. */ +- bitmap queue; +- /* Instructions included into a chain. */ +- bitmap insns; +- /* All registers defined by a chain. */ +- bitmap defs; +- /* Registers used in both vector and sclar modes. */ +- bitmap defs_conv; +- +- void build (bitmap candidates, unsigned insn_uid); +- virtual int compute_convert_gain () = 0; +- int convert (); +- +- protected: +- void add_to_queue (unsigned insn_uid); +- void emit_conversion_insns (rtx insns, rtx_insn *pos); +- +- private: +- void add_insn (bitmap candidates, unsigned insn_uid); +- void analyze_register_chain (bitmap candidates, df_ref ref); +- virtual void mark_dual_mode_def (df_ref def) = 0; +- virtual void convert_insn (rtx_insn *insn) = 0; +- virtual void convert_registers () = 0; +-}; +- +-class dimode_scalar_chain : public scalar_chain +-{ +- public: +- int compute_convert_gain (); +- private: +- void mark_dual_mode_def (df_ref def); +- rtx replace_with_subreg (rtx x, rtx reg, rtx subreg); +- void replace_with_subreg_in_insn (rtx_insn *insn, rtx reg, rtx subreg); +- void convert_insn (rtx_insn *insn); +- void convert_op (rtx *op, rtx_insn *insn); +- void convert_reg (unsigned regno); +- void make_vector_copies (unsigned regno); +- void convert_registers (); +- int vector_const_cost (rtx exp); +-}; ++/* Build up a unique section name, expressed as a ++ STRING_CST node, and assign it to DECL_SECTION_NAME (decl). ++ RELOC indicates whether the initial value of EXP requires ++ link-time relocations. */ + +-class timode_scalar_chain : public scalar_chain ++static void ATTRIBUTE_UNUSED ++x86_64_elf_unique_section (tree decl, int reloc) + { +- public: +- /* Convert from TImode to V1TImode is always faster. */ +- int compute_convert_gain () { return 1; } +- +- private: +- void mark_dual_mode_def (df_ref def); +- void fix_debug_reg_uses (rtx reg); +- void convert_insn (rtx_insn *insn); +- /* We don't convert registers to difference size. */ +- void convert_registers () {} +-}; +- +-unsigned scalar_chain::max_id = 0; +- +-/* Initialize new chain. */ ++ if (ix86_in_large_data_p (decl)) ++ { ++ const char *prefix = NULL; ++ /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */ ++ bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP; + +-scalar_chain::scalar_chain () +-{ +- chain_id = ++max_id; ++ switch (categorize_decl_for_section (decl, reloc)) ++ { ++ case SECCAT_DATA: ++ case SECCAT_DATA_REL: ++ case SECCAT_DATA_REL_LOCAL: ++ case SECCAT_DATA_REL_RO: ++ case SECCAT_DATA_REL_RO_LOCAL: ++ prefix = one_only ? ".ld" : ".ldata"; ++ break; ++ case SECCAT_BSS: ++ prefix = one_only ? ".lb" : ".lbss"; ++ break; ++ case SECCAT_RODATA: ++ case SECCAT_RODATA_MERGE_STR: ++ case SECCAT_RODATA_MERGE_STR_INIT: ++ case SECCAT_RODATA_MERGE_CONST: ++ prefix = one_only ? ".lr" : ".lrodata"; ++ break; ++ case SECCAT_SRODATA: ++ case SECCAT_SDATA: ++ case SECCAT_SBSS: ++ gcc_unreachable (); ++ case SECCAT_TEXT: ++ case SECCAT_TDATA: ++ case SECCAT_TBSS: ++ /* We don't split these for medium model. Place them into ++ default sections and hope for best. */ ++ break; ++ } ++ if (prefix) ++ { ++ const char *name, *linkonce; ++ char *string; + +- if (dump_file) +- fprintf (dump_file, "Created a new instruction chain #%d\n", chain_id); ++ name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); ++ name = targetm.strip_name_encoding (name); + +- bitmap_obstack_initialize (NULL); +- insns = BITMAP_ALLOC (NULL); +- defs = BITMAP_ALLOC (NULL); +- defs_conv = BITMAP_ALLOC (NULL); +- queue = NULL; +-} ++ /* If we're using one_only, then there needs to be a .gnu.linkonce ++ prefix to the section name. */ ++ linkonce = one_only ? ".gnu.linkonce" : ""; + +-/* Free chain's data. */ ++ string = ACONCAT ((linkonce, prefix, ".", name, NULL)); + +-scalar_chain::~scalar_chain () +-{ +- BITMAP_FREE (insns); +- BITMAP_FREE (defs); +- BITMAP_FREE (defs_conv); +- bitmap_obstack_release (NULL); ++ set_decl_section_name (decl, string); ++ return; ++ } ++ } ++ default_unique_section (decl, reloc); + } + +-/* Add instruction into chains' queue. */ +- +-void +-scalar_chain::add_to_queue (unsigned insn_uid) +-{ +- if (bitmap_bit_p (insns, insn_uid) +- || bitmap_bit_p (queue, insn_uid)) +- return; ++#ifdef COMMON_ASM_OP + +- if (dump_file) +- fprintf (dump_file, " Adding insn %d into chain's #%d queue\n", +- insn_uid, chain_id); +- bitmap_set_bit (queue, insn_uid); +-} ++#ifndef LARGECOMM_SECTION_ASM_OP ++#define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t" ++#endif + +-/* For DImode conversion, mark register defined by DEF as requiring +- conversion. */ ++/* This says how to output assembler code to declare an ++ uninitialized external linkage data object. + ++ For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for ++ large objects. */ + void +-dimode_scalar_chain::mark_dual_mode_def (df_ref def) ++x86_elf_aligned_decl_common (FILE *file, tree decl, ++ const char *name, unsigned HOST_WIDE_INT size, ++ int align) + { +- gcc_assert (DF_REF_REG_DEF_P (def)); +- +- if (bitmap_bit_p (defs_conv, DF_REF_REGNO (def))) +- return; +- +- if (dump_file) +- fprintf (dump_file, +- " Mark r%d def in insn %d as requiring both modes in chain #%d\n", +- DF_REF_REGNO (def), DF_REF_INSN_UID (def), chain_id); +- +- bitmap_set_bit (defs_conv, DF_REF_REGNO (def)); ++ if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) ++ && size > (unsigned int)ix86_section_threshold) ++ { ++ switch_to_section (get_named_section (decl, ".lbss", 0)); ++ fputs (LARGECOMM_SECTION_ASM_OP, file); ++ } ++ else ++ fputs (COMMON_ASM_OP, file); ++ assemble_name (file, name); ++ fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n", ++ size, align / BITS_PER_UNIT); + } ++#endif + +-/* For TImode conversion, it is unused. */ ++/* Utility function for targets to use in implementing ++ ASM_OUTPUT_ALIGNED_BSS. */ + + void +-timode_scalar_chain::mark_dual_mode_def (df_ref) ++x86_output_aligned_bss (FILE *file, tree decl, const char *name, ++ unsigned HOST_WIDE_INT size, int align) + { +- gcc_unreachable (); ++ if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) ++ && size > (unsigned int)ix86_section_threshold) ++ switch_to_section (get_named_section (decl, ".lbss", 0)); ++ else ++ switch_to_section (bss_section); ++ ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT)); ++#ifdef ASM_DECLARE_OBJECT_NAME ++ last_assemble_variable_decl = decl; ++ ASM_DECLARE_OBJECT_NAME (file, name, decl); ++#else ++ /* Standard thing is just output label for the object. */ ++ ASM_OUTPUT_LABEL (file, name); ++#endif /* ASM_DECLARE_OBJECT_NAME */ ++ ASM_OUTPUT_SKIP (file, size ? size : 1); + } ++ ++/* Decide whether we must probe the stack before any space allocation ++ on this target. It's essentially TARGET_STACK_PROBE except when ++ -fstack-check causes the stack to be already probed differently. */ + +-/* Check REF's chain to add new insns into a queue +- and find registers requiring conversion. */ +- +-void +-scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref) ++bool ++ix86_target_stack_probe (void) + { +- df_link *chain; +- +- gcc_assert (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)) +- || bitmap_bit_p (candidates, DF_REF_INSN_UID (ref))); +- add_to_queue (DF_REF_INSN_UID (ref)); +- +- for (chain = DF_REF_CHAIN (ref); chain; chain = chain->next) +- { +- unsigned uid = DF_REF_INSN_UID (chain->ref); +- +- if (!NONDEBUG_INSN_P (DF_REF_INSN (chain->ref))) +- continue; +- +- if (!DF_REF_REG_MEM_P (chain->ref)) +- { +- if (bitmap_bit_p (insns, uid)) +- continue; +- +- if (bitmap_bit_p (candidates, uid)) +- { +- add_to_queue (uid); +- continue; +- } +- } ++ /* Do not probe the stack twice if static stack checking is enabled. */ ++ if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK) ++ return false; + +- if (DF_REF_REG_DEF_P (chain->ref)) +- { +- if (dump_file) +- fprintf (dump_file, " r%d def in insn %d isn't convertible\n", +- DF_REF_REGNO (chain->ref), uid); +- mark_dual_mode_def (chain->ref); +- } +- else +- { +- if (dump_file) +- fprintf (dump_file, " r%d use in insn %d isn't convertible\n", +- DF_REF_REGNO (chain->ref), uid); +- mark_dual_mode_def (ref); +- } +- } ++ return TARGET_STACK_PROBE; + } ++ ++/* Decide whether we can make a sibling call to a function. DECL is the ++ declaration of the function being targeted by the call and EXP is the ++ CALL_EXPR representing the call. */ + +-/* Add instruction into a chain. */ +- +-void +-scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid) ++static bool ++ix86_function_ok_for_sibcall (tree decl, tree exp) + { +- if (bitmap_bit_p (insns, insn_uid)) +- return; +- +- if (dump_file) +- fprintf (dump_file, " Adding insn %d to chain #%d\n", insn_uid, chain_id); +- +- bitmap_set_bit (insns, insn_uid); ++ tree type, decl_or_type; ++ rtx a, b; ++ bool bind_global = decl && !targetm.binds_local_p (decl); + +- rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn; +- rtx def_set = single_set (insn); +- if (def_set && REG_P (SET_DEST (def_set)) +- && !HARD_REGISTER_P (SET_DEST (def_set))) +- bitmap_set_bit (defs, REGNO (SET_DEST (def_set))); ++ if (ix86_function_naked (current_function_decl)) ++ return false; + +- df_ref ref; +- df_ref def; +- for (ref = DF_INSN_UID_DEFS (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref)) +- if (!HARD_REGISTER_P (DF_REF_REG (ref))) +- for (def = DF_REG_DEF_CHAIN (DF_REF_REGNO (ref)); +- def; +- def = DF_REF_NEXT_REG (def)) +- analyze_register_chain (candidates, def); +- for (ref = DF_INSN_UID_USES (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref)) +- if (!DF_REF_REG_MEM_P (ref)) +- analyze_register_chain (candidates, ref); +-} ++ /* Sibling call isn't OK if there are no caller-saved registers ++ since all registers must be preserved before return. */ ++ if (cfun->machine->no_caller_saved_registers) ++ return false; + +-/* Build new chain starting from insn INSN_UID recursively +- adding all dependent uses and definitions. */ ++ /* If we are generating position-independent code, we cannot sibcall ++ optimize direct calls to global functions, as the PLT requires ++ %ebx be live. (Darwin does not have a PLT.) */ ++ if (!TARGET_MACHO ++ && !TARGET_64BIT ++ && flag_pic ++ && flag_plt ++ && bind_global) ++ return false; + +-void +-scalar_chain::build (bitmap candidates, unsigned insn_uid) +-{ +- queue = BITMAP_ALLOC (NULL); +- bitmap_set_bit (queue, insn_uid); ++ /* If we need to align the outgoing stack, then sibcalling would ++ unalign the stack, which may break the called function. */ ++ if (ix86_minimum_incoming_stack_boundary (true) ++ < PREFERRED_STACK_BOUNDARY) ++ return false; + +- if (dump_file) +- fprintf (dump_file, "Building chain #%d...\n", chain_id); ++ if (decl) ++ { ++ decl_or_type = decl; ++ type = TREE_TYPE (decl); ++ } ++ else ++ { ++ /* We're looking at the CALL_EXPR, we need the type of the function. */ ++ type = CALL_EXPR_FN (exp); /* pointer expression */ ++ type = TREE_TYPE (type); /* pointer type */ ++ type = TREE_TYPE (type); /* function type */ ++ decl_or_type = type; ++ } + +- while (!bitmap_empty_p (queue)) ++ /* Check that the return value locations are the same. Like ++ if we are returning floats on the 80387 register stack, we cannot ++ make a sibcall from a function that doesn't return a float to a ++ function that does or, conversely, from a function that does return ++ a float to a function that doesn't; the necessary stack adjustment ++ would not be executed. This is also the place we notice ++ differences in the return value ABI. Note that it is ok for one ++ of the functions to have void return type as long as the return ++ value of the other is passed in a register. */ ++ a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false); ++ b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)), ++ cfun->decl, false); ++ if (STACK_REG_P (a) || STACK_REG_P (b)) + { +- insn_uid = bitmap_first_set_bit (queue); +- bitmap_clear_bit (queue, insn_uid); +- bitmap_clear_bit (candidates, insn_uid); +- add_insn (candidates, insn_uid); ++ if (!rtx_equal_p (a, b)) ++ return false; + } ++ else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl)))) ++ ; ++ else if (!rtx_equal_p (a, b)) ++ return false; + +- if (dump_file) ++ if (TARGET_64BIT) ++ { ++ /* The SYSV ABI has more call-clobbered registers; ++ disallow sibcalls from MS to SYSV. */ ++ if (cfun->machine->call_abi == MS_ABI ++ && ix86_function_type_abi (type) == SYSV_ABI) ++ return false; ++ } ++ else + { +- fprintf (dump_file, "Collected chain #%d...\n", chain_id); +- fprintf (dump_file, " insns: "); +- dump_bitmap (dump_file, insns); +- if (!bitmap_empty_p (defs_conv)) ++ /* If this call is indirect, we'll need to be able to use a ++ call-clobbered register for the address of the target function. ++ Make sure that all such registers are not used for passing ++ parameters. Note that DLLIMPORT functions and call to global ++ function via GOT slot are indirect. */ ++ if (!decl ++ || (bind_global && flag_pic && !flag_plt) ++ || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)) ++ || flag_force_indirect_call) + { +- bitmap_iterator bi; +- unsigned id; +- const char *comma = ""; +- fprintf (dump_file, " defs to convert: "); +- EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi) +- { +- fprintf (dump_file, "%sr%d", comma, id); +- comma = ", "; +- } +- fprintf (dump_file, "\n"); ++ /* Check if regparm >= 3 since arg_reg_available is set to ++ false if regparm == 0. If regparm is 1 or 2, there is ++ always a call-clobbered register available. ++ ++ ??? The symbol indirect call doesn't need a call-clobbered ++ register. But we don't know if this is a symbol indirect ++ call or not here. */ ++ if (ix86_function_regparm (type, decl) >= 3 ++ && !cfun->machine->arg_reg_available) ++ return false; + } + } + +- BITMAP_FREE (queue); ++ /* Otherwise okay. That also includes certain types of indirect calls. */ ++ return true; + } + +-/* Return a cost of building a vector costant +- instead of using a scalar one. */ ++/* This function determines from TYPE the calling-convention. */ + +-int +-dimode_scalar_chain::vector_const_cost (rtx exp) ++unsigned int ++ix86_get_callcvt (const_tree type) + { +- gcc_assert (CONST_INT_P (exp)); ++ unsigned int ret = 0; ++ bool is_stdarg; ++ tree attrs; + +- if (standard_sse_constant_p (exp, V2DImode)) +- return COSTS_N_INSNS (1); +- return ix86_cost->sse_load[1]; +-} ++ if (TARGET_64BIT) ++ return IX86_CALLCVT_CDECL; + +-/* Compute a gain for chain conversion. */ ++ attrs = TYPE_ATTRIBUTES (type); ++ if (attrs != NULL_TREE) ++ { ++ if (lookup_attribute ("cdecl", attrs)) ++ ret |= IX86_CALLCVT_CDECL; ++ else if (lookup_attribute ("stdcall", attrs)) ++ ret |= IX86_CALLCVT_STDCALL; ++ else if (lookup_attribute ("fastcall", attrs)) ++ ret |= IX86_CALLCVT_FASTCALL; ++ else if (lookup_attribute ("thiscall", attrs)) ++ ret |= IX86_CALLCVT_THISCALL; + +-int +-dimode_scalar_chain::compute_convert_gain () +-{ +- bitmap_iterator bi; +- unsigned insn_uid; +- int gain = 0; +- int cost = 0; +- +- if (dump_file) +- fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id); +- +- EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi) +- { +- rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn; +- rtx def_set = single_set (insn); +- rtx src = SET_SRC (def_set); +- rtx dst = SET_DEST (def_set); +- +- if (REG_P (src) && REG_P (dst)) +- gain += COSTS_N_INSNS (2) - ix86_cost->xmm_move; +- else if (REG_P (src) && MEM_P (dst)) +- gain += 2 * ix86_cost->int_store[2] - ix86_cost->sse_store[1]; +- else if (MEM_P (src) && REG_P (dst)) +- gain += 2 * ix86_cost->int_load[2] - ix86_cost->sse_load[1]; +- else if (GET_CODE (src) == ASHIFT +- || GET_CODE (src) == ASHIFTRT +- || GET_CODE (src) == LSHIFTRT) +- { +- if (CONST_INT_P (XEXP (src, 0))) +- gain -= vector_const_cost (XEXP (src, 0)); +- +- gain += ix86_cost->shift_const; +- if (INTVAL (XEXP (src, 1)) >= 32) +- gain -= COSTS_N_INSNS (1); +- } +- else if (GET_CODE (src) == PLUS +- || GET_CODE (src) == MINUS +- || GET_CODE (src) == IOR +- || GET_CODE (src) == XOR +- || GET_CODE (src) == AND) +- { +- gain += ix86_cost->add; +- /* Additional gain for andnot for targets without BMI. */ +- if (GET_CODE (XEXP (src, 0)) == NOT +- && !TARGET_BMI) +- gain += 2 * ix86_cost->add; +- +- if (CONST_INT_P (XEXP (src, 0))) +- gain -= vector_const_cost (XEXP (src, 0)); +- if (CONST_INT_P (XEXP (src, 1))) +- gain -= vector_const_cost (XEXP (src, 1)); +- } +- else if (GET_CODE (src) == NEG +- || GET_CODE (src) == NOT) +- gain += ix86_cost->add - COSTS_N_INSNS (1); +- else if (GET_CODE (src) == COMPARE) +- { +- /* Assume comparison cost is the same. */ +- } +- else if (CONST_INT_P (src)) +- { +- if (REG_P (dst)) +- gain += COSTS_N_INSNS (2); +- else if (MEM_P (dst)) +- gain += 2 * ix86_cost->int_store[2] - ix86_cost->sse_store[1]; +- gain -= vector_const_cost (src); +- } +- else +- gcc_unreachable (); +- } +- +- if (dump_file) +- fprintf (dump_file, " Instruction conversion gain: %d\n", gain); +- +- EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, insn_uid, bi) +- cost += DF_REG_DEF_COUNT (insn_uid) * ix86_cost->mmxsse_to_integer; ++ /* Regparam isn't allowed for thiscall and fastcall. */ ++ if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0) ++ { ++ if (lookup_attribute ("regparm", attrs)) ++ ret |= IX86_CALLCVT_REGPARM; ++ if (lookup_attribute ("sseregparm", attrs)) ++ ret |= IX86_CALLCVT_SSEREGPARM; ++ } + +- if (dump_file) +- fprintf (dump_file, " Registers conversion cost: %d\n", cost); ++ if (IX86_BASE_CALLCVT(ret) != 0) ++ return ret; ++ } + +- gain -= cost; ++ is_stdarg = stdarg_p (type); ++ if (TARGET_RTD && !is_stdarg) ++ return IX86_CALLCVT_STDCALL | ret; + +- if (dump_file) +- fprintf (dump_file, " Total gain: %d\n", gain); ++ if (ret != 0 ++ || is_stdarg ++ || TREE_CODE (type) != METHOD_TYPE ++ || ix86_function_type_abi (type) != MS_ABI) ++ return IX86_CALLCVT_CDECL | ret; + +- return gain; ++ return IX86_CALLCVT_THISCALL; + } + +-/* Replace REG in X with a V2DI subreg of NEW_REG. */ ++/* Return 0 if the attributes for two types are incompatible, 1 if they ++ are compatible, and 2 if they are nearly compatible (which causes a ++ warning to be generated). */ + +-rtx +-dimode_scalar_chain::replace_with_subreg (rtx x, rtx reg, rtx new_reg) ++static int ++ix86_comp_type_attributes (const_tree type1, const_tree type2) + { +- if (x == reg) +- return gen_rtx_SUBREG (V2DImode, new_reg, 0); ++ unsigned int ccvt1, ccvt2; + +- const char *fmt = GET_RTX_FORMAT (GET_CODE (x)); +- int i, j; +- for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) +- { +- if (fmt[i] == 'e') +- XEXP (x, i) = replace_with_subreg (XEXP (x, i), reg, new_reg); +- else if (fmt[i] == 'E') +- for (j = XVECLEN (x, i) - 1; j >= 0; j--) +- XVECEXP (x, i, j) = replace_with_subreg (XVECEXP (x, i, j), +- reg, new_reg); +- } ++ if (TREE_CODE (type1) != FUNCTION_TYPE ++ && TREE_CODE (type1) != METHOD_TYPE) ++ return 1; + +- return x; +-} ++ ccvt1 = ix86_get_callcvt (type1); ++ ccvt2 = ix86_get_callcvt (type2); ++ if (ccvt1 != ccvt2) ++ return 0; ++ if (ix86_function_regparm (type1, NULL) ++ != ix86_function_regparm (type2, NULL)) ++ return 0; + +-/* Replace REG in INSN with a V2DI subreg of NEW_REG. */ ++ return 1; ++} ++ ++/* Return the regparm value for a function with the indicated TYPE and DECL. ++ DECL may be NULL when calling function indirectly ++ or considering a libcall. */ + +-void +-dimode_scalar_chain::replace_with_subreg_in_insn (rtx_insn *insn, +- rtx reg, rtx new_reg) ++static int ++ix86_function_regparm (const_tree type, const_tree decl) + { +- replace_with_subreg (single_set (insn), reg, new_reg); +-} ++ tree attr; ++ int regparm; ++ unsigned int ccvt; + +-/* Insert generated conversion instruction sequence INSNS +- after instruction AFTER. New BB may be required in case +- instruction has EH region attached. */ ++ if (TARGET_64BIT) ++ return (ix86_function_type_abi (type) == SYSV_ABI ++ ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX); ++ ccvt = ix86_get_callcvt (type); ++ regparm = ix86_regparm; + +-void +-scalar_chain::emit_conversion_insns (rtx insns, rtx_insn *after) +-{ +- if (!control_flow_insn_p (after)) ++ if ((ccvt & IX86_CALLCVT_REGPARM) != 0) + { +- emit_insn_after (insns, after); +- return; ++ attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type)); ++ if (attr) ++ { ++ regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))); ++ return regparm; ++ } + } ++ else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) ++ return 2; ++ else if ((ccvt & IX86_CALLCVT_THISCALL) != 0) ++ return 1; + +- basic_block bb = BLOCK_FOR_INSN (after); +- edge e = find_fallthru_edge (bb->succs); +- gcc_assert (e); ++ /* Use register calling convention for local functions when possible. */ ++ if (decl ++ && TREE_CODE (decl) == FUNCTION_DECL) ++ { ++ cgraph_node *target = cgraph_node::get (decl); ++ if (target) ++ target = target->function_symbol (); + +- basic_block new_bb = split_edge (e); +- emit_insn_after (insns, BB_HEAD (new_bb)); +-} ++ /* Caller and callee must agree on the calling convention, so ++ checking here just optimize means that with ++ __attribute__((optimize (...))) caller could use regparm convention ++ and callee not, or vice versa. Instead look at whether the callee ++ is optimized or not. */ ++ if (target && opt_for_fn (target->decl, optimize) ++ && !(profile_flag && !flag_fentry)) ++ { ++ cgraph_local_info *i = &target->local; ++ if (i && i->local && i->can_change_signature) ++ { ++ int local_regparm, globals = 0, regno; + +-/* Make vector copies for all register REGNO definitions +- and replace its uses in a chain. */ ++ /* Make sure no regparm register is taken by a ++ fixed register variable. */ ++ for (local_regparm = 0; local_regparm < REGPARM_MAX; ++ local_regparm++) ++ if (fixed_regs[local_regparm]) ++ break; + +-void +-dimode_scalar_chain::make_vector_copies (unsigned regno) +-{ +- rtx reg = regno_reg_rtx[regno]; +- rtx vreg = gen_reg_rtx (DImode); +- df_ref ref; ++ /* We don't want to use regparm(3) for nested functions as ++ these use a static chain pointer in the third argument. */ ++ if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl)) ++ local_regparm = 2; + +- for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref)) +- if (!bitmap_bit_p (insns, DF_REF_INSN_UID (ref))) +- { +- start_sequence (); ++ /* Save a register for the split stack. */ ++ if (flag_split_stack) ++ { ++ if (local_regparm == 3) ++ local_regparm = 2; ++ else if (local_regparm == 2 ++ && DECL_STATIC_CHAIN (target->decl)) ++ local_regparm = 1; ++ } + +- if (!TARGET_INTER_UNIT_MOVES_TO_VEC) +- { +- rtx tmp = assign_386_stack_local (DImode, SLOT_STV_TEMP); +- emit_move_insn (adjust_address (tmp, SImode, 0), +- gen_rtx_SUBREG (SImode, reg, 0)); +- emit_move_insn (adjust_address (tmp, SImode, 4), +- gen_rtx_SUBREG (SImode, reg, 4)); +- emit_move_insn (vreg, tmp); +- } +- else if (TARGET_SSE4_1) +- { +- emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0), +- CONST0_RTX (V4SImode), +- gen_rtx_SUBREG (SImode, reg, 0))); +- emit_insn (gen_sse4_1_pinsrd (gen_rtx_SUBREG (V4SImode, vreg, 0), +- gen_rtx_SUBREG (V4SImode, vreg, 0), +- gen_rtx_SUBREG (SImode, reg, 4), +- GEN_INT (2))); +- } +- else +- { +- rtx tmp = gen_reg_rtx (DImode); +- emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0), +- CONST0_RTX (V4SImode), +- gen_rtx_SUBREG (SImode, reg, 0))); +- emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, tmp, 0), +- CONST0_RTX (V4SImode), +- gen_rtx_SUBREG (SImode, reg, 4))); +- emit_insn (gen_vec_interleave_lowv4si +- (gen_rtx_SUBREG (V4SImode, vreg, 0), +- gen_rtx_SUBREG (V4SImode, vreg, 0), +- gen_rtx_SUBREG (V4SImode, tmp, 0))); +- } +- rtx_insn *seq = get_insns (); +- end_sequence (); +- rtx_insn *insn = DF_REF_INSN (ref); +- emit_conversion_insns (seq, insn); +- +- if (dump_file) +- fprintf (dump_file, +- " Copied r%d to a vector register r%d for insn %d\n", +- regno, REGNO (vreg), INSN_UID (insn)); +- } ++ /* Each fixed register usage increases register pressure, ++ so less registers should be used for argument passing. ++ This functionality can be overriden by an explicit ++ regparm value. */ ++ for (regno = AX_REG; regno <= DI_REG; regno++) ++ if (fixed_regs[regno]) ++ globals++; + +- for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref)) +- if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref))) +- { +- rtx_insn *insn = DF_REF_INSN (ref); ++ local_regparm ++ = globals < local_regparm ? local_regparm - globals : 0; + +- replace_with_subreg_in_insn (insn, reg, vreg); ++ if (local_regparm > regparm) ++ regparm = local_regparm; ++ } ++ } ++ } + +- if (dump_file) +- fprintf (dump_file, " Replaced r%d with r%d in insn %d\n", +- regno, REGNO (vreg), INSN_UID (insn)); +- } ++ return regparm; + } + +-/* Convert all definitions of register REGNO +- and fix its uses. Scalar copies may be created +- in case register is used in not convertible insn. */ ++/* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and ++ DFmode (2) arguments in SSE registers for a function with the ++ indicated TYPE and DECL. DECL may be NULL when calling function ++ indirectly or considering a libcall. Return -1 if any FP parameter ++ should be rejected by error. This is used in siutation we imply SSE ++ calling convetion but the function is called from another function with ++ SSE disabled. Otherwise return 0. */ + +-void +-dimode_scalar_chain::convert_reg (unsigned regno) ++static int ++ix86_function_sseregparm (const_tree type, const_tree decl, bool warn) + { +- bool scalar_copy = bitmap_bit_p (defs_conv, regno); +- rtx reg = regno_reg_rtx[regno]; +- rtx scopy = NULL_RTX; +- df_ref ref; +- bitmap conv; +- +- conv = BITMAP_ALLOC (NULL); +- bitmap_copy (conv, insns); +- +- if (scalar_copy) +- scopy = gen_reg_rtx (DImode); ++ gcc_assert (!TARGET_64BIT); + +- for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref)) ++ /* Use SSE registers to pass SFmode and DFmode arguments if requested ++ by the sseregparm attribute. */ ++ if (TARGET_SSEREGPARM ++ || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type)))) + { +- rtx_insn *insn = DF_REF_INSN (ref); +- rtx def_set = single_set (insn); +- rtx src = SET_SRC (def_set); +- rtx reg = DF_REF_REG (ref); +- +- if (!MEM_P (src)) +- { +- replace_with_subreg_in_insn (insn, reg, reg); +- bitmap_clear_bit (conv, INSN_UID (insn)); +- } +- +- if (scalar_copy) ++ if (!TARGET_SSE) + { +- start_sequence (); +- if (!TARGET_INTER_UNIT_MOVES_FROM_VEC) +- { +- rtx tmp = assign_386_stack_local (DImode, SLOT_STV_TEMP); +- emit_move_insn (tmp, reg); +- emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0), +- adjust_address (tmp, SImode, 0)); +- emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4), +- adjust_address (tmp, SImode, 4)); +- } +- else if (TARGET_SSE4_1) +- { +- rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx)); +- emit_insn +- (gen_rtx_SET +- (gen_rtx_SUBREG (SImode, scopy, 0), +- gen_rtx_VEC_SELECT (SImode, +- gen_rtx_SUBREG (V4SImode, reg, 0), tmp))); +- +- tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const1_rtx)); +- emit_insn +- (gen_rtx_SET +- (gen_rtx_SUBREG (SImode, scopy, 4), +- gen_rtx_VEC_SELECT (SImode, +- gen_rtx_SUBREG (V4SImode, reg, 0), tmp))); +- } +- else ++ if (warn) + { +- rtx vcopy = gen_reg_rtx (V2DImode); +- emit_move_insn (vcopy, gen_rtx_SUBREG (V2DImode, reg, 0)); +- emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0), +- gen_rtx_SUBREG (SImode, vcopy, 0)); +- emit_move_insn (vcopy, +- gen_rtx_LSHIFTRT (V2DImode, vcopy, GEN_INT (32))); +- emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4), +- gen_rtx_SUBREG (SImode, vcopy, 0)); ++ if (decl) ++ error ("calling %qD with attribute sseregparm without " ++ "SSE/SSE2 enabled", decl); ++ else ++ error ("calling %qT with attribute sseregparm without " ++ "SSE/SSE2 enabled", type); + } +- rtx_insn *seq = get_insns (); +- end_sequence (); +- emit_conversion_insns (seq, insn); +- +- if (dump_file) +- fprintf (dump_file, +- " Copied r%d to a scalar register r%d for insn %d\n", +- regno, REGNO (scopy), INSN_UID (insn)); ++ return 0; + } ++ ++ return 2; + } + +- for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref)) +- if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref))) +- { +- if (bitmap_bit_p (conv, DF_REF_INSN_UID (ref))) +- { +- rtx_insn *insn = DF_REF_INSN (ref); ++ if (!decl) ++ return 0; + +- rtx def_set = single_set (insn); +- gcc_assert (def_set); ++ cgraph_node *target = cgraph_node::get (decl); ++ if (target) ++ target = target->function_symbol (); + +- rtx src = SET_SRC (def_set); +- rtx dst = SET_DEST (def_set); ++ /* For local functions, pass up to SSE_REGPARM_MAX SFmode ++ (and DFmode for SSE2) arguments in SSE registers. */ ++ if (target ++ /* TARGET_SSE_MATH */ ++ && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE) ++ && opt_for_fn (target->decl, optimize) ++ && !(profile_flag && !flag_fentry)) ++ { ++ cgraph_local_info *i = &target->local; ++ if (i && i->local && i->can_change_signature) ++ { ++ /* Refuse to produce wrong code when local function with SSE enabled ++ is called from SSE disabled function. ++ FIXME: We need a way to detect these cases cross-ltrans partition ++ and avoid using SSE calling conventions on local functions called ++ from function with SSE disabled. For now at least delay the ++ warning until we know we are going to produce wrong code. ++ See PR66047 */ ++ if (!TARGET_SSE && warn) ++ return -1; ++ return TARGET_SSE2_P (target_opts_for_fn (target->decl) ++ ->x_ix86_isa_flags) ? 2 : 1; ++ } ++ } + +- if (!MEM_P (dst) || !REG_P (src)) +- replace_with_subreg_in_insn (insn, reg, reg); ++ return 0; ++} + +- bitmap_clear_bit (conv, INSN_UID (insn)); +- } +- } +- /* Skip debug insns and uninitialized uses. */ +- else if (DF_REF_CHAIN (ref) +- && NONDEBUG_INSN_P (DF_REF_INSN (ref))) +- { +- gcc_assert (scopy); +- replace_rtx (DF_REF_INSN (ref), reg, scopy); +- df_insn_rescan (DF_REF_INSN (ref)); +- } ++/* Return true if EAX is live at the start of the function. Used by ++ ix86_expand_prologue to determine if we need special help before ++ calling allocate_stack_worker. */ + +- BITMAP_FREE (conv); ++static bool ++ix86_eax_live_at_start_p (void) ++{ ++ /* Cheat. Don't bother working forward from ix86_function_regparm ++ to the function type to whether an actual argument is located in ++ eax. Instead just look at cfg info, which is still close enough ++ to correct at this point. This gives false positives for broken ++ functions that might use uninitialized data that happens to be ++ allocated in eax, but who cares? */ ++ return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0); + } + +-/* Convert operand OP in INSN. We should handle +- memory operands and uninitialized registers. +- All other register uses are converted during +- registers conversion. */ +- +-void +-dimode_scalar_chain::convert_op (rtx *op, rtx_insn *insn) ++static bool ++ix86_keep_aggregate_return_pointer (tree fntype) + { +- *op = copy_rtx_if_shared (*op); ++ tree attr; + +- if (GET_CODE (*op) == NOT) +- { +- convert_op (&XEXP (*op, 0), insn); +- PUT_MODE (*op, V2DImode); +- } +- else if (MEM_P (*op)) ++ if (!TARGET_64BIT) + { +- rtx tmp = gen_reg_rtx (DImode); +- +- emit_insn_before (gen_move_insn (tmp, *op), insn); +- *op = gen_rtx_SUBREG (V2DImode, tmp, 0); ++ attr = lookup_attribute ("callee_pop_aggregate_return", ++ TYPE_ATTRIBUTES (fntype)); ++ if (attr) ++ return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0); + +- if (dump_file) +- fprintf (dump_file, " Preloading operand for insn %d into r%d\n", +- INSN_UID (insn), REGNO (tmp)); +- } +- else if (REG_P (*op)) +- { +- /* We may have not converted register usage in case +- this register has no definition. Otherwise it +- should be converted in convert_reg. */ +- df_ref ref; +- FOR_EACH_INSN_USE (ref, insn) +- if (DF_REF_REGNO (ref) == REGNO (*op)) +- { +- gcc_assert (!DF_REF_CHAIN (ref)); +- break; +- } +- *op = gen_rtx_SUBREG (V2DImode, *op, 0); +- } +- else if (CONST_INT_P (*op)) +- { +- rtx vec_cst; +- rtx tmp = gen_rtx_SUBREG (V2DImode, gen_reg_rtx (DImode), 0); +- +- /* Prefer all ones vector in case of -1. */ +- if (constm1_operand (*op, GET_MODE (*op))) +- vec_cst = CONSTM1_RTX (V2DImode); +- else +- vec_cst = gen_rtx_CONST_VECTOR (V2DImode, +- gen_rtvec (2, *op, const0_rtx)); +- +- if (!standard_sse_constant_p (vec_cst, V2DImode)) +- { +- start_sequence (); +- vec_cst = validize_mem (force_const_mem (V2DImode, vec_cst)); +- rtx_insn *seq = get_insns (); +- end_sequence (); +- emit_insn_before (seq, insn); +- } +- +- emit_insn_before (gen_move_insn (copy_rtx (tmp), vec_cst), insn); +- *op = tmp; +- } +- else +- { +- gcc_assert (SUBREG_P (*op)); +- gcc_assert (GET_MODE (*op) == V2DImode); ++ /* For 32-bit MS-ABI the default is to keep aggregate ++ return pointer. */ ++ if (ix86_function_type_abi (fntype) == MS_ABI) ++ return true; + } ++ return KEEP_AGGREGATE_RETURN_POINTER != 0; + } + +-/* Convert INSN to vector mode. */ +- +-void +-dimode_scalar_chain::convert_insn (rtx_insn *insn) +-{ +- rtx def_set = single_set (insn); +- rtx src = SET_SRC (def_set); +- rtx dst = SET_DEST (def_set); +- rtx subreg; ++/* Value is the number of bytes of arguments automatically ++ popped when returning from a subroutine call. ++ FUNDECL is the declaration node of the function (as a tree), ++ FUNTYPE is the data type of the function (as a tree), ++ or for a library call it is an identifier node for the subroutine name. ++ SIZE is the number of bytes of arguments passed on the stack. + +- if (MEM_P (dst) && !REG_P (src)) +- { +- /* There are no scalar integer instructions and therefore +- temporary register usage is required. */ +- rtx tmp = gen_reg_rtx (DImode); +- emit_conversion_insns (gen_move_insn (dst, tmp), insn); +- dst = gen_rtx_SUBREG (V2DImode, tmp, 0); +- } ++ On the 80386, the RTD insn may be used to pop them if the number ++ of args is fixed, but if the number is variable then the caller ++ must pop them all. RTD can't be used for library calls now ++ because the library is compiled with the Unix compiler. ++ Use of RTD is a selectable option, since it is incompatible with ++ standard Unix calling sequences. If the option is not selected, ++ the caller must always pop the args. + +- switch (GET_CODE (src)) +- { +- case ASHIFT: +- case ASHIFTRT: +- case LSHIFTRT: +- convert_op (&XEXP (src, 0), insn); +- PUT_MODE (src, V2DImode); +- break; ++ The attribute stdcall is equivalent to RTD on a per module basis. */ + +- case PLUS: +- case MINUS: +- case IOR: +- case XOR: +- case AND: +- convert_op (&XEXP (src, 0), insn); +- convert_op (&XEXP (src, 1), insn); +- PUT_MODE (src, V2DImode); +- break; ++static poly_int64 ++ix86_return_pops_args (tree fundecl, tree funtype, poly_int64 size) ++{ ++ unsigned int ccvt; + +- case NEG: +- src = XEXP (src, 0); +- convert_op (&src, insn); +- subreg = gen_reg_rtx (V2DImode); +- emit_insn_before (gen_move_insn (subreg, CONST0_RTX (V2DImode)), insn); +- src = gen_rtx_MINUS (V2DImode, subreg, src); +- break; ++ /* None of the 64-bit ABIs pop arguments. */ ++ if (TARGET_64BIT) ++ return 0; + +- case NOT: +- src = XEXP (src, 0); +- convert_op (&src, insn); +- subreg = gen_reg_rtx (V2DImode); +- emit_insn_before (gen_move_insn (subreg, CONSTM1_RTX (V2DImode)), insn); +- src = gen_rtx_XOR (V2DImode, src, subreg); +- break; ++ ccvt = ix86_get_callcvt (funtype); + +- case MEM: +- if (!REG_P (dst)) +- convert_op (&src, insn); +- break; ++ if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL ++ | IX86_CALLCVT_THISCALL)) != 0 ++ && ! stdarg_p (funtype)) ++ return size; + +- case REG: +- if (!MEM_P (dst)) +- convert_op (&src, insn); +- break; ++ /* Lose any fake structure return argument if it is passed on the stack. */ ++ if (aggregate_value_p (TREE_TYPE (funtype), fundecl) ++ && !ix86_keep_aggregate_return_pointer (funtype)) ++ { ++ int nregs = ix86_function_regparm (funtype, fundecl); ++ if (nregs == 0) ++ return GET_MODE_SIZE (Pmode); ++ } + +- case SUBREG: +- gcc_assert (GET_MODE (src) == V2DImode); +- break; ++ return 0; ++} + +- case COMPARE: +- src = SUBREG_REG (XEXP (XEXP (src, 0), 0)); ++/* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */ + +- gcc_assert ((REG_P (src) && GET_MODE (src) == DImode) +- || (SUBREG_P (src) && GET_MODE (src) == V2DImode)); ++static bool ++ix86_legitimate_combined_insn (rtx_insn *insn) ++{ ++ int i; + +- if (REG_P (src)) +- subreg = gen_rtx_SUBREG (V2DImode, src, 0); +- else +- subreg = copy_rtx_if_shared (src); +- emit_insn_before (gen_vec_interleave_lowv2di (copy_rtx_if_shared (subreg), +- copy_rtx_if_shared (subreg), +- copy_rtx_if_shared (subreg)), +- insn); +- dst = gen_rtx_REG (CCmode, FLAGS_REG); +- src = gen_rtx_UNSPEC (CCmode, gen_rtvec (2, copy_rtx_if_shared (src), +- copy_rtx_if_shared (src)), +- UNSPEC_PTEST); +- break; ++ /* Check operand constraints in case hard registers were propagated ++ into insn pattern. This check prevents combine pass from ++ generating insn patterns with invalid hard register operands. ++ These invalid insns can eventually confuse reload to error out ++ with a spill failure. See also PRs 46829 and 46843. */ + +- case CONST_INT: +- convert_op (&src, insn); +- break; ++ gcc_assert (INSN_CODE (insn) >= 0); + +- default: +- gcc_unreachable (); +- } ++ extract_insn (insn); ++ preprocess_constraints (insn); + +- SET_SRC (def_set) = src; +- SET_DEST (def_set) = dst; ++ int n_operands = recog_data.n_operands; ++ int n_alternatives = recog_data.n_alternatives; ++ for (i = 0; i < n_operands; i++) ++ { ++ rtx op = recog_data.operand[i]; ++ machine_mode mode = GET_MODE (op); ++ const operand_alternative *op_alt; ++ int offset = 0; ++ bool win; ++ int j; + +- /* Drop possible dead definitions. */ +- PATTERN (insn) = def_set; ++ /* A unary operator may be accepted by the predicate, but it ++ is irrelevant for matching constraints. */ ++ if (UNARY_P (op)) ++ op = XEXP (op, 0); + +- INSN_CODE (insn) = -1; +- recog_memoized (insn); +- df_insn_rescan (insn); +-} ++ if (SUBREG_P (op)) ++ { ++ if (REG_P (SUBREG_REG (op)) ++ && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER) ++ offset = subreg_regno_offset (REGNO (SUBREG_REG (op)), ++ GET_MODE (SUBREG_REG (op)), ++ SUBREG_BYTE (op), ++ GET_MODE (op)); ++ op = SUBREG_REG (op); ++ } + +-/* Fix uses of converted REG in debug insns. */ ++ if (!(REG_P (op) && HARD_REGISTER_P (op))) ++ continue; + +-void +-timode_scalar_chain::fix_debug_reg_uses (rtx reg) +-{ +- if (!flag_var_tracking) +- return; ++ op_alt = recog_op_alt; + +- df_ref ref, next; +- for (ref = DF_REG_USE_CHAIN (REGNO (reg)); ref; ref = next) +- { +- rtx_insn *insn = DF_REF_INSN (ref); +- /* Make sure the next ref is for a different instruction, +- so that we're not affected by the rescan. */ +- next = DF_REF_NEXT_REG (ref); +- while (next && DF_REF_INSN (next) == insn) +- next = DF_REF_NEXT_REG (next); ++ /* Operand has no constraints, anything is OK. */ ++ win = !n_alternatives; + +- if (DEBUG_INSN_P (insn)) ++ alternative_mask preferred = get_preferred_alternatives (insn); ++ for (j = 0; j < n_alternatives; j++, op_alt += n_operands) + { +- /* It may be a debug insn with a TImode variable in +- register. */ +- bool changed = false; +- for (; ref != next; ref = DF_REF_NEXT_REG (ref)) ++ if (!TEST_BIT (preferred, j)) ++ continue; ++ if (op_alt[i].anything_ok ++ || (op_alt[i].matches != -1 ++ && operands_match_p ++ (recog_data.operand[i], ++ recog_data.operand[op_alt[i].matches])) ++ || reg_fits_class_p (op, op_alt[i].cl, offset, mode)) + { +- rtx *loc = DF_REF_LOC (ref); +- if (REG_P (*loc) && GET_MODE (*loc) == V1TImode) +- { +- *loc = gen_rtx_SUBREG (TImode, *loc, 0); +- changed = true; +- } ++ win = true; ++ break; + } +- if (changed) +- df_insn_rescan (insn); + } ++ ++ if (!win) ++ return false; + } ++ ++ return true; + } ++ ++/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */ + +-/* Convert INSN from TImode to V1T1mode. */ ++static unsigned HOST_WIDE_INT ++ix86_asan_shadow_offset (void) ++{ ++ return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44) ++ : HOST_WIDE_INT_C (0x7fff8000)) ++ : (HOST_WIDE_INT_1 << 29); ++} ++ ++/* Argument support functions. */ + +-void +-timode_scalar_chain::convert_insn (rtx_insn *insn) ++/* Return true when register may be used to pass function parameters. */ ++bool ++ix86_function_arg_regno_p (int regno) + { +- rtx def_set = single_set (insn); +- rtx src = SET_SRC (def_set); +- rtx dst = SET_DEST (def_set); ++ int i; ++ enum calling_abi call_abi; ++ const int *parm_regs; + +- switch (GET_CODE (dst)) ++ if (!TARGET_64BIT) + { +- case REG: +- { +- rtx tmp = find_reg_equal_equiv_note (insn); +- if (tmp) +- PUT_MODE (XEXP (tmp, 0), V1TImode); +- PUT_MODE (dst, V1TImode); +- fix_debug_reg_uses (dst); +- } +- break; +- case MEM: +- PUT_MODE (dst, V1TImode); +- break; +- +- default: +- gcc_unreachable (); ++ if (TARGET_MACHO) ++ return (regno < REGPARM_MAX ++ || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno])); ++ else ++ return (regno < REGPARM_MAX ++ || (TARGET_MMX && MMX_REGNO_P (regno) ++ && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX)) ++ || (TARGET_SSE && SSE_REGNO_P (regno) ++ && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))); + } + +- switch (GET_CODE (src)) +- { +- case REG: +- PUT_MODE (src, V1TImode); +- /* Call fix_debug_reg_uses only if SRC is never defined. */ +- if (!DF_REG_DEF_CHAIN (REGNO (src))) +- fix_debug_reg_uses (src); +- break; +- +- case MEM: +- PUT_MODE (src, V1TImode); +- break; +- +- case CONST_WIDE_INT: +- if (NONDEBUG_INSN_P (insn)) +- { +- /* Since there are no instructions to store 128-bit constant, +- temporary register usage is required. */ +- rtx tmp = gen_reg_rtx (V1TImode); +- start_sequence (); +- src = gen_rtx_CONST_VECTOR (V1TImode, gen_rtvec (1, src)); +- src = validize_mem (force_const_mem (V1TImode, src)); +- rtx_insn *seq = get_insns (); +- end_sequence (); +- if (seq) +- emit_insn_before (seq, insn); +- emit_conversion_insns (gen_rtx_SET (dst, tmp), insn); +- dst = tmp; +- } +- break; +- +- case CONST_INT: +- switch (standard_sse_constant_p (src, TImode)) +- { +- case 1: +- src = CONST0_RTX (GET_MODE (dst)); +- break; +- case 2: +- src = CONSTM1_RTX (GET_MODE (dst)); +- break; +- default: +- gcc_unreachable (); +- } +- if (NONDEBUG_INSN_P (insn)) +- { +- rtx tmp = gen_reg_rtx (V1TImode); +- /* Since there are no instructions to store standard SSE +- constant, temporary register usage is required. */ +- emit_conversion_insns (gen_rtx_SET (dst, tmp), insn); +- dst = tmp; +- } +- break; ++ if (TARGET_SSE && SSE_REGNO_P (regno) ++ && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)) ++ return true; + +- default: +- gcc_unreachable (); +- } ++ /* TODO: The function should depend on current function ABI but ++ builtins.c would need updating then. Therefore we use the ++ default ABI. */ ++ call_abi = ix86_cfun_abi (); + +- SET_SRC (def_set) = src; +- SET_DEST (def_set) = dst; ++ /* RAX is used as hidden argument to va_arg functions. */ ++ if (call_abi == SYSV_ABI && regno == AX_REG) ++ return true; + +- /* Drop possible dead definitions. */ +- PATTERN (insn) = def_set; ++ if (call_abi == MS_ABI) ++ parm_regs = x86_64_ms_abi_int_parameter_registers; ++ else ++ parm_regs = x86_64_int_parameter_registers; + +- INSN_CODE (insn) = -1; +- recog_memoized (insn); +- df_insn_rescan (insn); ++ for (i = 0; i < (call_abi == MS_ABI ++ ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++) ++ if (regno == parm_regs[i]) ++ return true; ++ return false; + } + +-void +-dimode_scalar_chain::convert_registers () +-{ +- bitmap_iterator bi; +- unsigned id; ++/* Return if we do not know how to pass ARG solely in registers. */ + +- EXECUTE_IF_SET_IN_BITMAP (defs, 0, id, bi) +- convert_reg (id); ++static bool ++ix86_must_pass_in_stack (const function_arg_info &arg) ++{ ++ if (must_pass_in_stack_var_size_or_pad (arg)) ++ return true; + +- EXECUTE_IF_AND_COMPL_IN_BITMAP (defs_conv, defs, 0, id, bi) +- make_vector_copies (id); ++ /* For 32-bit, we want TImode aggregates to go on the stack. But watch out! ++ The layout_type routine is crafty and tries to trick us into passing ++ currently unsupported vector types on the stack by using TImode. */ ++ return (!TARGET_64BIT && arg.mode == TImode ++ && arg.type && TREE_CODE (arg.type) != VECTOR_TYPE); + } + +-/* Convert whole chain creating required register +- conversions and copies. */ +- ++/* It returns the size, in bytes, of the area reserved for arguments passed ++ in registers for the function represented by fndecl dependent to the used ++ abi format. */ + int +-scalar_chain::convert () ++ix86_reg_parm_stack_space (const_tree fndecl) + { +- bitmap_iterator bi; +- unsigned id; +- int converted_insns = 0; +- +- if (!dbg_cnt (stv_conversion)) +- return 0; +- +- if (dump_file) +- fprintf (dump_file, "Converting chain #%d...\n", chain_id); ++ enum calling_abi call_abi = SYSV_ABI; ++ if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL) ++ call_abi = ix86_function_abi (fndecl); ++ else ++ call_abi = ix86_function_type_abi (fndecl); ++ if (TARGET_64BIT && call_abi == MS_ABI) ++ return 32; ++ return 0; ++} + +- convert_registers (); +- +- EXECUTE_IF_SET_IN_BITMAP (insns, 0, id, bi) +- { +- convert_insn (DF_INSN_UID_GET (id)->insn); +- converted_insns++; +- } +- +- return converted_insns; ++/* We add this as a workaround in order to use libc_has_function ++ hook in i386.md. */ ++bool ++ix86_libc_has_function (enum function_class fn_class) ++{ ++ return targetm.libc_has_function (fn_class); + } + +-/* Main STV pass function. Find and convert scalar +- instructions into vector mode when profitable. */ +- +-static unsigned int +-convert_scalars_to_vector () ++/* Returns value SYSV_ABI, MS_ABI dependent on fntype, ++ specifying the call abi used. */ ++enum calling_abi ++ix86_function_type_abi (const_tree fntype) + { +- basic_block bb; +- bitmap candidates; +- int converted_insns = 0; +- +- bitmap_obstack_initialize (NULL); +- candidates = BITMAP_ALLOC (NULL); +- +- calculate_dominance_info (CDI_DOMINATORS); +- df_set_flags (DF_DEFER_INSN_RESCAN); +- df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN); +- df_md_add_problem (); +- df_analyze (); +- +- /* Find all instructions we want to convert into vector mode. */ +- if (dump_file) +- fprintf (dump_file, "Searching for mode conversion candidates...\n"); +- +- FOR_EACH_BB_FN (bb, cfun) +- { +- rtx_insn *insn; +- FOR_BB_INSNS (bb, insn) +- if (scalar_to_vector_candidate_p (insn)) +- { +- if (dump_file) +- fprintf (dump_file, " insn %d is marked as a candidate\n", +- INSN_UID (insn)); +- +- bitmap_set_bit (candidates, INSN_UID (insn)); +- } +- } +- +- remove_non_convertible_regs (candidates); ++ enum calling_abi abi = ix86_abi; + +- if (bitmap_empty_p (candidates)) +- if (dump_file) +- fprintf (dump_file, "There are no candidates for optimization.\n"); ++ if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE) ++ return abi; + +- while (!bitmap_empty_p (candidates)) ++ if (abi == SYSV_ABI ++ && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype))) + { +- unsigned uid = bitmap_first_set_bit (candidates); +- scalar_chain *chain; +- +- if (TARGET_64BIT) +- chain = new timode_scalar_chain; +- else +- chain = new dimode_scalar_chain; +- +- /* Find instructions chain we want to convert to vector mode. +- Check all uses and definitions to estimate all required +- conversions. */ +- chain->build (candidates, uid); +- +- if (chain->compute_convert_gain () > 0) +- converted_insns += chain->convert (); +- else +- if (dump_file) +- fprintf (dump_file, "Chain #%d conversion is not profitable\n", +- chain->chain_id); +- +- delete chain; +- } +- +- if (dump_file) +- fprintf (dump_file, "Total insns converted: %d\n", converted_insns); +- +- BITMAP_FREE (candidates); +- bitmap_obstack_release (NULL); +- df_process_deferred_rescans (); ++ static int warned; ++ if (TARGET_X32 && !warned) ++ { ++ error ("X32 does not support % attribute"); ++ warned = 1; ++ } + +- /* Conversion means we may have 128bit register spills/fills +- which require aligned stack. */ +- if (converted_insns) +- { +- if (crtl->stack_alignment_needed < 128) +- crtl->stack_alignment_needed = 128; +- if (crtl->stack_alignment_estimated < 128) +- crtl->stack_alignment_estimated = 128; +- /* Fix up DECL_RTL/DECL_INCOMING_RTL of arguments. */ +- if (TARGET_64BIT) +- for (tree parm = DECL_ARGUMENTS (current_function_decl); +- parm; parm = DECL_CHAIN (parm)) +- { +- if (TYPE_MODE (TREE_TYPE (parm)) != TImode) +- continue; +- if (DECL_RTL_SET_P (parm) +- && GET_MODE (DECL_RTL (parm)) == V1TImode) +- { +- rtx r = DECL_RTL (parm); +- if (REG_P (r)) +- SET_DECL_RTL (parm, gen_rtx_SUBREG (TImode, r, 0)); +- } +- if (DECL_INCOMING_RTL (parm) +- && GET_MODE (DECL_INCOMING_RTL (parm)) == V1TImode) +- { +- rtx r = DECL_INCOMING_RTL (parm); +- if (REG_P (r)) +- DECL_INCOMING_RTL (parm) = gen_rtx_SUBREG (TImode, r, 0); +- } +- } ++ abi = MS_ABI; + } ++ else if (abi == MS_ABI ++ && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype))) ++ abi = SYSV_ABI; + +- return 0; ++ return abi; + } + +-namespace { +- +-const pass_data pass_data_insert_vzeroupper = ++enum calling_abi ++ix86_function_abi (const_tree fndecl) + { +- RTL_PASS, /* type */ +- "vzeroupper", /* name */ +- OPTGROUP_NONE, /* optinfo_flags */ +- TV_MACH_DEP, /* tv_id */ +- 0, /* properties_required */ +- 0, /* properties_provided */ +- 0, /* properties_destroyed */ +- 0, /* todo_flags_start */ +- TODO_df_finish, /* todo_flags_finish */ +-}; ++ return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi; ++} + +-class pass_insert_vzeroupper : public rtl_opt_pass ++/* Returns value SYSV_ABI, MS_ABI dependent on cfun, ++ specifying the call abi used. */ ++enum calling_abi ++ix86_cfun_abi (void) + { +-public: +- pass_insert_vzeroupper(gcc::context *ctxt) +- : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt) +- {} ++ return cfun ? cfun->machine->call_abi : ix86_abi; ++} + +- /* opt_pass methods: */ +- virtual bool gate (function *) ++bool ++ix86_function_ms_hook_prologue (const_tree fn) ++{ ++ if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn))) + { +- return TARGET_AVX +- && TARGET_VZEROUPPER && flag_expensive_optimizations +- && !optimize_size; ++ if (decl_function_context (fn) != NULL_TREE) ++ error_at (DECL_SOURCE_LOCATION (fn), ++ "% attribute is not compatible " ++ "with nested function"); ++ else ++ return true; + } ++ return false; ++} + +- virtual unsigned int execute (function *) +- { +- return rest_of_handle_insert_vzeroupper (); +- } ++bool ++ix86_function_naked (const_tree fn) ++{ ++ if (fn && lookup_attribute ("naked", DECL_ATTRIBUTES (fn))) ++ return true; + +-}; // class pass_insert_vzeroupper ++ return false; ++} + +-const pass_data pass_data_stv = +-{ +- RTL_PASS, /* type */ +- "stv", /* name */ +- OPTGROUP_NONE, /* optinfo_flags */ +- TV_MACH_DEP, /* tv_id */ +- 0, /* properties_required */ +- 0, /* properties_provided */ +- 0, /* properties_destroyed */ +- 0, /* todo_flags_start */ +- TODO_df_finish, /* todo_flags_finish */ +-}; ++/* Write the extra assembler code needed to declare a function properly. */ + +-class pass_stv : public rtl_opt_pass ++void ++ix86_asm_output_function_label (FILE *asm_out_file, const char *fname, ++ tree decl) + { +-public: +- pass_stv (gcc::context *ctxt) +- : rtl_opt_pass (pass_data_stv, ctxt), +- timode_p (false) +- {} ++ bool is_ms_hook = ix86_function_ms_hook_prologue (decl); + +- /* opt_pass methods: */ +- virtual bool gate (function *) ++ if (is_ms_hook) + { +- return (timode_p == !!TARGET_64BIT +- && TARGET_STV && TARGET_SSE2 && optimize > 1); +- } ++ int i, filler_count = (TARGET_64BIT ? 32 : 16); ++ unsigned int filler_cc = 0xcccccccc; + +- virtual unsigned int execute (function *) +- { +- return convert_scalars_to_vector (); ++ for (i = 0; i < filler_count; i += 4) ++ fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc); + } + +- opt_pass *clone () +- { +- return new pass_stv (m_ctxt); +- } ++#ifdef SUBTARGET_ASM_UNWIND_INIT ++ SUBTARGET_ASM_UNWIND_INIT (asm_out_file); ++#endif ++ ++ ASM_OUTPUT_LABEL (asm_out_file, fname); + +- void set_pass_param (unsigned int n, bool param) ++ /* Output magic byte marker, if hot-patch attribute is set. */ ++ if (is_ms_hook) + { +- gcc_assert (n == 0); +- timode_p = param; ++ if (TARGET_64BIT) ++ { ++ /* leaq [%rsp + 0], %rsp */ ++ fputs (ASM_BYTE "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n", ++ asm_out_file); ++ } ++ else ++ { ++ /* movl.s %edi, %edi ++ push %ebp ++ movl.s %esp, %ebp */ ++ fputs (ASM_BYTE "0x8b, 0xff, 0x55, 0x8b, 0xec\n", asm_out_file); ++ } + } ++} + +-private: +- bool timode_p; +-}; // class pass_stv +- +-} // anon namespace +- +-rtl_opt_pass * +-make_pass_insert_vzeroupper (gcc::context *ctxt) ++/* Implementation of call abi switching target hook. Specific to FNDECL ++ the specific call register sets are set. See also ++ ix86_conditional_register_usage for more details. */ ++void ++ix86_call_abi_override (const_tree fndecl) + { +- return new pass_insert_vzeroupper (ctxt); ++ cfun->machine->call_abi = ix86_function_abi (fndecl); + } + +-rtl_opt_pass * +-make_pass_stv (gcc::context *ctxt) ++/* Return 1 if pseudo register should be created and used to hold ++ GOT address for PIC code. */ ++bool ++ix86_use_pseudo_pic_reg (void) + { +- return new pass_stv (ctxt); ++ if ((TARGET_64BIT ++ && (ix86_cmodel == CM_SMALL_PIC ++ || TARGET_PECOFF)) ++ || !flag_pic) ++ return false; ++ return true; + } + +-/* Inserting ENDBRANCH instructions. */ ++/* Initialize large model PIC register. */ + +-static unsigned int +-rest_of_insert_endbranch (void) ++static void ++ix86_init_large_pic_reg (unsigned int tmp_regno) + { +- timevar_push (TV_MACH_DEP); +- +- rtx cet_eb; +- rtx_insn *insn; +- basic_block bb; +- +- /* Currently emit EB if it's a tracking function, i.e. 'nocf_check' is +- absent among function attributes. Later an optimization will be +- introduced to make analysis if an address of a static function is +- taken. A static function whose address is not taken will get a +- nocf_check attribute. This will allow to reduce the number of EB. */ +- +- if (!lookup_attribute ("nocf_check", +- TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl))) +- && (!flag_manual_endbr +- || lookup_attribute ("cf_check", +- DECL_ATTRIBUTES (cfun->decl))) +- && !cgraph_node::get (cfun->decl)->only_called_directly_p ()) +- { +- /* Queue ENDBR insertion to x86_function_profiler. */ +- if (crtl->profile && flag_fentry) +- cfun->machine->endbr_queued_at_entrance = true; +- else +- { +- cet_eb = gen_nop_endbr (); +- +- bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb; +- insn = BB_HEAD (bb); +- emit_insn_before (cet_eb, insn); +- } +- } +- +- bb = 0; +- FOR_EACH_BB_FN (bb, cfun) +- { +- for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); +- insn = NEXT_INSN (insn)) +- { +- if (CALL_P (insn)) +- { +- bool need_endbr; +- need_endbr = find_reg_note (insn, REG_SETJMP, NULL) != NULL; +- if (!need_endbr && !SIBLING_CALL_P (insn)) +- { +- rtx call = get_call_rtx_from (insn); +- rtx fnaddr = XEXP (call, 0); +- tree fndecl = NULL_TREE; +- +- /* Also generate ENDBRANCH for non-tail call which +- may return via indirect branch. */ +- if (GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF) +- fndecl = SYMBOL_REF_DECL (XEXP (fnaddr, 0)); +- if (fndecl == NULL_TREE) +- fndecl = MEM_EXPR (fnaddr); +- if (fndecl +- && TREE_CODE (TREE_TYPE (fndecl)) != FUNCTION_TYPE +- && TREE_CODE (TREE_TYPE (fndecl)) != METHOD_TYPE) +- fndecl = NULL_TREE; +- if (fndecl && TYPE_ARG_TYPES (TREE_TYPE (fndecl))) +- { +- tree fntype = TREE_TYPE (fndecl); +- if (lookup_attribute ("indirect_return", +- TYPE_ATTRIBUTES (fntype))) +- need_endbr = true; +- } +- } +- if (!need_endbr) +- continue; +- /* Generate ENDBRANCH after CALL, which can return more than +- twice, setjmp-like functions. */ +- +- cet_eb = gen_nop_endbr (); +- emit_insn_after_setloc (cet_eb, insn, INSN_LOCATION (insn)); +- continue; +- } +- +- if (JUMP_P (insn) && flag_cet_switch) +- { +- rtx target = JUMP_LABEL (insn); +- if (target == NULL_RTX || ANY_RETURN_P (target)) +- continue; +- +- /* Check the jump is a switch table. */ +- rtx_insn *label = as_a (target); +- rtx_insn *table = next_insn (label); +- if (table == NULL_RTX || !JUMP_TABLE_DATA_P (table)) +- continue; +- +- /* For the indirect jump find out all places it jumps and insert +- ENDBRANCH there. It should be done under a special flag to +- control ENDBRANCH generation for switch stmts. */ +- edge_iterator ei; +- edge e; +- basic_block dest_blk; +- +- FOR_EACH_EDGE (e, ei, bb->succs) +- { +- rtx_insn *insn; +- +- dest_blk = e->dest; +- insn = BB_HEAD (dest_blk); +- gcc_assert (LABEL_P (insn)); +- cet_eb = gen_nop_endbr (); +- emit_insn_after (cet_eb, insn); +- } +- continue; +- } +- +- if ((LABEL_P (insn) && LABEL_PRESERVE_P (insn)) +- || (NOTE_P (insn) +- && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)) +- /* TODO. Check /s bit also. */ +- { +- cet_eb = gen_nop_endbr (); +- emit_insn_after (cet_eb, insn); +- continue; +- } +- } +- } ++ rtx_code_label *label; ++ rtx tmp_reg; + +- timevar_pop (TV_MACH_DEP); +- return 0; ++ gcc_assert (Pmode == DImode); ++ label = gen_label_rtx (); ++ emit_label (label); ++ LABEL_PRESERVE_P (label) = 1; ++ tmp_reg = gen_rtx_REG (Pmode, tmp_regno); ++ gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno); ++ emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, ++ label)); ++ emit_insn (gen_set_got_offset_rex64 (tmp_reg, label)); ++ emit_insn (ix86_gen_add3 (pic_offset_table_rtx, ++ pic_offset_table_rtx, tmp_reg)); ++ const char *name = LABEL_NAME (label); ++ PUT_CODE (label, NOTE); ++ NOTE_KIND (label) = NOTE_INSN_DELETED_LABEL; ++ NOTE_DELETED_LABEL_NAME (label) = name; + } + +-namespace { +- +-const pass_data pass_data_insert_endbranch = ++/* Create and initialize PIC register if required. */ ++static void ++ix86_init_pic_reg (void) + { +- RTL_PASS, /* type. */ +- "cet", /* name. */ +- OPTGROUP_NONE, /* optinfo_flags. */ +- TV_MACH_DEP, /* tv_id. */ +- 0, /* properties_required. */ +- 0, /* properties_provided. */ +- 0, /* properties_destroyed. */ +- 0, /* todo_flags_start. */ +- 0, /* todo_flags_finish. */ +-}; ++ edge entry_edge; ++ rtx_insn *seq; + +-class pass_insert_endbranch : public rtl_opt_pass +-{ +-public: +- pass_insert_endbranch (gcc::context *ctxt) +- : rtl_opt_pass (pass_data_insert_endbranch, ctxt) +- {} ++ if (!ix86_use_pseudo_pic_reg ()) ++ return; ++ ++ start_sequence (); + +- /* opt_pass methods: */ +- virtual bool gate (function *) ++ if (TARGET_64BIT) + { +- return ((flag_cf_protection & CF_BRANCH)); ++ if (ix86_cmodel == CM_LARGE_PIC) ++ ix86_init_large_pic_reg (R11_REG); ++ else ++ emit_insn (gen_set_got_rex64 (pic_offset_table_rtx)); + } +- +- virtual unsigned int execute (function *) ++ else + { +- return rest_of_insert_endbranch (); ++ /* If there is future mcount call in the function it is more profitable ++ to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */ ++ rtx reg = crtl->profile ++ ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM) ++ : pic_offset_table_rtx; ++ rtx_insn *insn = emit_insn (gen_set_got (reg)); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ if (crtl->profile) ++ emit_move_insn (pic_offset_table_rtx, reg); ++ add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX); + } + +-}; // class pass_insert_endbranch +- +-} // anon namespace ++ seq = get_insns (); ++ end_sequence (); + +-rtl_opt_pass * +-make_pass_insert_endbranch (gcc::context *ctxt) +-{ +- return new pass_insert_endbranch (ctxt); ++ entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)); ++ insert_insn_on_edge (seq, entry_edge); ++ commit_one_edge_insertion (entry_edge); + } + +-/* At entry of the nearest common dominator for basic blocks with +- conversions, generate a single +- vxorps %xmmN, %xmmN, %xmmN +- for all +- vcvtss2sd op, %xmmN, %xmmX +- vcvtsd2ss op, %xmmN, %xmmX +- vcvtsi2ss op, %xmmN, %xmmX +- vcvtsi2sd op, %xmmN, %xmmX +- +- NB: We want to generate only a single vxorps to cover the whole +- function. The LCM algorithm isn't appropriate here since it may +- place a vxorps inside the loop. */ +- +-static unsigned int +-remove_partial_avx_dependency (void) +-{ +- timevar_push (TV_MACH_DEP); +- +- bitmap_obstack_initialize (NULL); +- bitmap convert_bbs = BITMAP_ALLOC (NULL); ++/* Initialize a variable CUM of type CUMULATIVE_ARGS ++ for a call to a function whose data type is FNTYPE. ++ For a library call, FNTYPE is 0. */ + +- basic_block bb; +- rtx_insn *insn, *set_insn; +- rtx set; +- rtx v4sf_const0 = NULL_RTX; ++void ++init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */ ++ tree fntype, /* tree ptr for function decl */ ++ rtx libname, /* SYMBOL_REF of library name or 0 */ ++ tree fndecl, ++ int caller) ++{ ++ struct cgraph_local_info *i = NULL; ++ struct cgraph_node *target = NULL; + +- auto_vec control_flow_insns; ++ memset (cum, 0, sizeof (*cum)); + +- FOR_EACH_BB_FN (bb, cfun) ++ if (fndecl) + { +- FOR_BB_INSNS (bb, insn) ++ target = cgraph_node::get (fndecl); ++ if (target) + { +- if (!NONDEBUG_INSN_P (insn)) +- continue; +- +- set = single_set (insn); +- if (!set) +- continue; ++ target = target->function_symbol (); ++ i = cgraph_node::local_info (target->decl); ++ cum->call_abi = ix86_function_abi (target->decl); ++ } ++ else ++ cum->call_abi = ix86_function_abi (fndecl); ++ } ++ else ++ cum->call_abi = ix86_function_type_abi (fntype); + +- if (get_attr_avx_partial_xmm_update (insn) +- != AVX_PARTIAL_XMM_UPDATE_TRUE) +- continue; ++ cum->caller = caller; + +- if (!v4sf_const0) +- { +- calculate_dominance_info (CDI_DOMINATORS); +- df_set_flags (DF_DEFER_INSN_RESCAN); +- df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN); +- df_md_add_problem (); +- df_analyze (); +- v4sf_const0 = gen_reg_rtx (V4SFmode); +- } ++ /* Set up the number of registers to use for passing arguments. */ ++ cum->nregs = ix86_regparm; ++ if (TARGET_64BIT) ++ { ++ cum->nregs = (cum->call_abi == SYSV_ABI ++ ? X86_64_REGPARM_MAX ++ : X86_64_MS_REGPARM_MAX); ++ } ++ if (TARGET_SSE) ++ { ++ cum->sse_nregs = SSE_REGPARM_MAX; ++ if (TARGET_64BIT) ++ { ++ cum->sse_nregs = (cum->call_abi == SYSV_ABI ++ ? X86_64_SSE_REGPARM_MAX ++ : X86_64_MS_SSE_REGPARM_MAX); ++ } ++ } ++ if (TARGET_MMX) ++ cum->mmx_nregs = MMX_REGPARM_MAX; ++ cum->warn_avx512f = true; ++ cum->warn_avx = true; ++ cum->warn_sse = true; ++ cum->warn_mmx = true; + +- /* Convert PARTIAL_XMM_UPDATE_TRUE insns, DF -> SF, SF -> DF, +- SI -> SF, SI -> DF, DI -> SF, DI -> DF, to vec_dup and +- vec_merge with subreg. */ +- rtx src = SET_SRC (set); +- rtx dest = SET_DEST (set); +- machine_mode dest_mode = GET_MODE (dest); ++ /* Because type might mismatch in between caller and callee, we need to ++ use actual type of function for local calls. ++ FIXME: cgraph_analyze can be told to actually record if function uses ++ va_start so for local functions maybe_vaarg can be made aggressive ++ helping K&R code. ++ FIXME: once typesytem is fixed, we won't need this code anymore. */ ++ if (i && i->local && i->can_change_signature) ++ fntype = TREE_TYPE (target->decl); ++ cum->stdarg = stdarg_p (fntype); ++ cum->maybe_vaarg = (fntype ++ ? (!prototype_p (fntype) || stdarg_p (fntype)) ++ : !libname); + +- rtx zero; +- machine_mode dest_vecmode; +- if (dest_mode == E_SFmode) +- { +- dest_vecmode = V4SFmode; +- zero = v4sf_const0; +- } +- else +- { +- dest_vecmode = V2DFmode; +- zero = gen_rtx_SUBREG (V2DFmode, v4sf_const0, 0); +- } ++ cum->decl = fndecl; + +- /* Change source to vector mode. */ +- src = gen_rtx_VEC_DUPLICATE (dest_vecmode, src); +- src = gen_rtx_VEC_MERGE (dest_vecmode, src, zero, +- GEN_INT (HOST_WIDE_INT_1U)); +- /* Change destination to vector mode. */ +- rtx vec = gen_reg_rtx (dest_vecmode); +- /* Generate an XMM vector SET. */ +- set = gen_rtx_SET (vec, src); +- set_insn = emit_insn_before (set, insn); +- df_insn_rescan (set_insn); +- +- if (cfun->can_throw_non_call_exceptions) ++ cum->warn_empty = !warn_abi || cum->stdarg; ++ if (!cum->warn_empty && fntype) ++ { ++ function_args_iterator iter; ++ tree argtype; ++ bool seen_empty_type = false; ++ FOREACH_FUNCTION_ARGS (fntype, argtype, iter) ++ { ++ if (argtype == error_mark_node || VOID_TYPE_P (argtype)) ++ break; ++ if (TYPE_EMPTY_P (argtype)) ++ seen_empty_type = true; ++ else if (seen_empty_type) + { +- /* Handle REG_EH_REGION note. */ +- rtx note = find_reg_note (insn, REG_EH_REGION, NULL_RTX); +- if (note) +- { +- control_flow_insns.safe_push (set_insn); +- add_reg_note (set_insn, REG_EH_REGION, XEXP (note, 0)); +- } ++ cum->warn_empty = true; ++ break; + } +- +- src = gen_rtx_SUBREG (dest_mode, vec, 0); +- set = gen_rtx_SET (dest, src); +- +- /* Drop possible dead definitions. */ +- PATTERN (insn) = set; +- +- INSN_CODE (insn) = -1; +- recog_memoized (insn); +- df_insn_rescan (insn); +- bitmap_set_bit (convert_bbs, bb->index); + } + } + +- if (v4sf_const0) ++ if (!TARGET_64BIT) + { +- /* (Re-)discover loops so that bb->loop_father can be used in the +- analysis below. */ +- loop_optimizer_init (AVOID_CFG_MODIFICATIONS); +- +- /* Generate a vxorps at entry of the nearest dominator for basic +- blocks with conversions, which is in the the fake loop that +- contains the whole function, so that there is only a single +- vxorps in the whole function. */ +- bb = nearest_common_dominator_for_set (CDI_DOMINATORS, +- convert_bbs); +- while (bb->loop_father->latch +- != EXIT_BLOCK_PTR_FOR_FN (cfun)) +- bb = get_immediate_dominator (CDI_DOMINATORS, +- bb->loop_father->header); +- +- set = gen_rtx_SET (v4sf_const0, CONST0_RTX (V4SFmode)); ++ /* If there are variable arguments, then we won't pass anything ++ in registers in 32-bit mode. */ ++ if (stdarg_p (fntype)) ++ { ++ cum->nregs = 0; ++ /* Since in 32-bit, variable arguments are always passed on ++ stack, there is scratch register available for indirect ++ sibcall. */ ++ cfun->machine->arg_reg_available = true; ++ cum->sse_nregs = 0; ++ cum->mmx_nregs = 0; ++ cum->warn_avx512f = false; ++ cum->warn_avx = false; ++ cum->warn_sse = false; ++ cum->warn_mmx = false; ++ return; ++ } + +- insn = BB_HEAD (bb); +- while (insn && !NONDEBUG_INSN_P (insn)) ++ /* Use ecx and edx registers if function has fastcall attribute, ++ else look for regparm information. */ ++ if (fntype) + { +- if (insn == BB_END (bb)) ++ unsigned int ccvt = ix86_get_callcvt (fntype); ++ if ((ccvt & IX86_CALLCVT_THISCALL) != 0) + { +- insn = NULL; +- break; ++ cum->nregs = 1; ++ cum->fastcall = 1; /* Same first register as in fastcall. */ ++ } ++ else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) ++ { ++ cum->nregs = 2; ++ cum->fastcall = 1; + } +- insn = NEXT_INSN (insn); ++ else ++ cum->nregs = ix86_function_regparm (fntype, fndecl); + } +- if (insn == BB_HEAD (bb)) +- set_insn = emit_insn_before (set, insn); +- else +- set_insn = emit_insn_after (set, +- insn ? PREV_INSN (insn) : BB_END (bb)); +- df_insn_rescan (set_insn); +- df_process_deferred_rescans (); +- loop_optimizer_finalize (); +- +- if (!control_flow_insns.is_empty ()) +- { +- free_dominance_info (CDI_DOMINATORS); + +- unsigned int i; +- FOR_EACH_VEC_ELT (control_flow_insns, i, insn) +- if (control_flow_insn_p (insn)) +- { +- /* Split the block after insn. There will be a fallthru +- edge, which is OK so we keep it. We have to create +- the exception edges ourselves. */ +- bb = BLOCK_FOR_INSN (insn); +- split_block (bb, insn); +- rtl_make_eh_edge (NULL, bb, BB_END (bb)); +- } +- } ++ /* Set up the number of SSE registers used for passing SFmode ++ and DFmode arguments. Warn for mismatching ABI. */ ++ cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true); + } + +- bitmap_obstack_release (NULL); +- BITMAP_FREE (convert_bbs); +- +- timevar_pop (TV_MACH_DEP); +- return 0; ++ cfun->machine->arg_reg_available = (cum->nregs > 0); + } + +-namespace { +- +-const pass_data pass_data_remove_partial_avx_dependency = +-{ +- RTL_PASS, /* type */ +- "rpad", /* name */ +- OPTGROUP_NONE, /* optinfo_flags */ +- TV_MACH_DEP, /* tv_id */ +- 0, /* properties_required */ +- 0, /* properties_provided */ +- 0, /* properties_destroyed */ +- 0, /* todo_flags_start */ +- TODO_df_finish, /* todo_flags_finish */ +-}; +- +-class pass_remove_partial_avx_dependency : public rtl_opt_pass +-{ +-public: +- pass_remove_partial_avx_dependency (gcc::context *ctxt) +- : rtl_opt_pass (pass_data_remove_partial_avx_dependency, ctxt) +- {} +- +- /* opt_pass methods: */ +- virtual bool gate (function *) +- { +- return (TARGET_AVX +- && TARGET_SSE_PARTIAL_REG_DEPENDENCY +- && TARGET_SSE_MATH +- && optimize +- && optimize_function_for_speed_p (cfun)); +- } +- +- virtual unsigned int execute (function *) +- { +- return remove_partial_avx_dependency (); +- } +-}; // class pass_rpad +- +-} // anon namespace ++/* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE. ++ But in the case of vector types, it is some vector mode. + +-rtl_opt_pass * +-make_pass_remove_partial_avx_dependency (gcc::context *ctxt) +-{ +- return new pass_remove_partial_avx_dependency (ctxt); +-} ++ When we have only some of our vector isa extensions enabled, then there ++ are some modes for which vector_mode_supported_p is false. For these ++ modes, the generic vector support in gcc will choose some non-vector mode ++ in order to implement the type. By computing the natural mode, we'll ++ select the proper ABI location for the operand and not depend on whatever ++ the middle-end decides to do with these vector types. + +-/* Return true if a red-zone is in use. We can't use red-zone when +- there are local indirect jumps, like "indirect_jump" or "tablejump", +- which jumps to another place in the function, since "call" in the +- indirect thunk pushes the return address onto stack, destroying +- red-zone. ++ The midde-end can't deal with the vector types > 16 bytes. In this ++ case, we return the original mode and warn ABI change if CUM isn't ++ NULL. + +- TODO: If we can reserve the first 2 WORDs, for PUSH and, another +- for CALL, in red-zone, we can allow local indirect jumps with +- indirect thunk. */ ++ If INT_RETURN is true, warn ABI change if the vector mode isn't ++ available for function return value. */ + +-bool +-ix86_using_red_zone (void) ++static machine_mode ++type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum, ++ bool in_return) + { +- return (TARGET_RED_ZONE +- && !TARGET_64BIT_MS_ABI +- && (!cfun->machine->has_local_indirect_jump +- || cfun->machine->indirect_branch_type == indirect_branch_keep)); +-} +- +-/* Return a string that documents the current -m options. The caller is +- responsible for freeing the string. */ ++ machine_mode mode = TYPE_MODE (type); + +-static char * +-ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_INT isa2, +- int flags, int flags2, +- const char *arch, const char *tune, +- enum fpmath_unit fpmath, bool add_nl_p, bool add_abi_p) +-{ +- struct ix86_target_opts +- { +- const char *option; /* option string */ +- HOST_WIDE_INT mask; /* isa mask options */ +- }; ++ if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode)) ++ { ++ HOST_WIDE_INT size = int_size_in_bytes (type); ++ if ((size == 8 || size == 16 || size == 32 || size == 64) ++ /* ??? Generic code allows us to create width 1 vectors. Ignore. */ ++ && TYPE_VECTOR_SUBPARTS (type) > 1) ++ { ++ machine_mode innermode = TYPE_MODE (TREE_TYPE (type)); + +- /* This table is ordered so that options like -msse4.2 that imply other +- ISAs come first. Target string will be displayed in the same order. */ +- static struct ix86_target_opts isa2_opts[] = +- { +- { "-mcx16", OPTION_MASK_ISA_CX16 }, +- { "-mvaes", OPTION_MASK_ISA_VAES }, +- { "-mrdpid", OPTION_MASK_ISA_RDPID }, +- { "-mpconfig", OPTION_MASK_ISA_PCONFIG }, +- { "-mwbnoinvd", OPTION_MASK_ISA_WBNOINVD }, +- { "-msgx", OPTION_MASK_ISA_SGX }, +- { "-mavx5124vnniw", OPTION_MASK_ISA_AVX5124VNNIW }, +- { "-mavx5124fmaps", OPTION_MASK_ISA_AVX5124FMAPS }, +- { "-mhle", OPTION_MASK_ISA_HLE }, +- { "-mmovbe", OPTION_MASK_ISA_MOVBE }, +- { "-mclzero", OPTION_MASK_ISA_CLZERO }, +- { "-mmwaitx", OPTION_MASK_ISA_MWAITX }, +- { "-mmovdir64b", OPTION_MASK_ISA_MOVDIR64B }, +- { "-mwaitpkg", OPTION_MASK_ISA_WAITPKG }, +- { "-mcldemote", OPTION_MASK_ISA_CLDEMOTE }, +- { "-mptwrite", OPTION_MASK_ISA_PTWRITE } +- }; +- static struct ix86_target_opts isa_opts[] = +- { +- { "-mavx512vpopcntdq", OPTION_MASK_ISA_AVX512VPOPCNTDQ }, +- { "-mavx512bitalg", OPTION_MASK_ISA_AVX512BITALG }, +- { "-mvpclmulqdq", OPTION_MASK_ISA_VPCLMULQDQ }, +- { "-mgfni", OPTION_MASK_ISA_GFNI }, +- { "-mavx512vnni", OPTION_MASK_ISA_AVX512VNNI }, +- { "-mavx512vbmi2", OPTION_MASK_ISA_AVX512VBMI2 }, +- { "-mavx512vbmi", OPTION_MASK_ISA_AVX512VBMI }, +- { "-mavx512ifma", OPTION_MASK_ISA_AVX512IFMA }, +- { "-mavx512vl", OPTION_MASK_ISA_AVX512VL }, +- { "-mavx512bw", OPTION_MASK_ISA_AVX512BW }, +- { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ }, +- { "-mavx512er", OPTION_MASK_ISA_AVX512ER }, +- { "-mavx512pf", OPTION_MASK_ISA_AVX512PF }, +- { "-mavx512cd", OPTION_MASK_ISA_AVX512CD }, +- { "-mavx512f", OPTION_MASK_ISA_AVX512F }, +- { "-mavx2", OPTION_MASK_ISA_AVX2 }, +- { "-mfma", OPTION_MASK_ISA_FMA }, +- { "-mxop", OPTION_MASK_ISA_XOP }, +- { "-mfma4", OPTION_MASK_ISA_FMA4 }, +- { "-mf16c", OPTION_MASK_ISA_F16C }, +- { "-mavx", OPTION_MASK_ISA_AVX }, +-/* { "-msse4" OPTION_MASK_ISA_SSE4 }, */ +- { "-msse4.2", OPTION_MASK_ISA_SSE4_2 }, +- { "-msse4.1", OPTION_MASK_ISA_SSE4_1 }, +- { "-msse4a", OPTION_MASK_ISA_SSE4A }, +- { "-mssse3", OPTION_MASK_ISA_SSSE3 }, +- { "-msse3", OPTION_MASK_ISA_SSE3 }, +- { "-maes", OPTION_MASK_ISA_AES }, +- { "-msha", OPTION_MASK_ISA_SHA }, +- { "-mpclmul", OPTION_MASK_ISA_PCLMUL }, +- { "-msse2", OPTION_MASK_ISA_SSE2 }, +- { "-msse", OPTION_MASK_ISA_SSE }, +- { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A }, +- { "-m3dnow", OPTION_MASK_ISA_3DNOW }, +- { "-mmmx", OPTION_MASK_ISA_MMX }, +- { "-mrtm", OPTION_MASK_ISA_RTM }, +- { "-mprfchw", OPTION_MASK_ISA_PRFCHW }, +- { "-mrdseed", OPTION_MASK_ISA_RDSEED }, +- { "-madx", OPTION_MASK_ISA_ADX }, +- { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 }, +- { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT }, +- { "-mxsaves", OPTION_MASK_ISA_XSAVES }, +- { "-mxsavec", OPTION_MASK_ISA_XSAVEC }, +- { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT }, +- { "-mxsave", OPTION_MASK_ISA_XSAVE }, +- { "-mabm", OPTION_MASK_ISA_ABM }, +- { "-mbmi", OPTION_MASK_ISA_BMI }, +- { "-mbmi2", OPTION_MASK_ISA_BMI2 }, +- { "-mlzcnt", OPTION_MASK_ISA_LZCNT }, +- { "-mtbm", OPTION_MASK_ISA_TBM }, +- { "-mpopcnt", OPTION_MASK_ISA_POPCNT }, +- { "-msahf", OPTION_MASK_ISA_SAHF }, +- { "-mcrc32", OPTION_MASK_ISA_CRC32 }, +- { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE }, +- { "-mrdrnd", OPTION_MASK_ISA_RDRND }, +- { "-mpku", OPTION_MASK_ISA_PKU }, +- { "-mlwp", OPTION_MASK_ISA_LWP }, +- { "-mfxsr", OPTION_MASK_ISA_FXSR }, +- { "-mclwb", OPTION_MASK_ISA_CLWB }, +- { "-mshstk", OPTION_MASK_ISA_SHSTK }, +- { "-mmovdiri", OPTION_MASK_ISA_MOVDIRI } +- }; ++ /* There are no XFmode vector modes. */ ++ if (innermode == XFmode) ++ return mode; + +- /* Flag options. */ +- static struct ix86_target_opts flag_opts[] = +- { +- { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE }, +- { "-mlong-double-128", MASK_LONG_DOUBLE_128 }, +- { "-mlong-double-64", MASK_LONG_DOUBLE_64 }, +- { "-m80387", MASK_80387 }, +- { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS }, +- { "-malign-double", MASK_ALIGN_DOUBLE }, +- { "-mcld", MASK_CLD }, +- { "-mfp-ret-in-387", MASK_FLOAT_RETURNS }, +- { "-mieee-fp", MASK_IEEE_FP }, +- { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS }, +- { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY }, +- { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT }, +- { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS }, +- { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 }, +- { "-mno-push-args", MASK_NO_PUSH_ARGS }, +- { "-mno-red-zone", MASK_NO_RED_ZONE }, +- { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER }, +- { "-mrecip", MASK_RECIP }, +- { "-mrtd", MASK_RTD }, +- { "-msseregparm", MASK_SSEREGPARM }, +- { "-mstack-arg-probe", MASK_STACK_PROBE }, +- { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS }, +- { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS }, +- { "-m8bit-idiv", MASK_USE_8BIT_IDIV }, +- { "-mvzeroupper", MASK_VZEROUPPER }, +- { "-mstv", MASK_STV }, +- { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD }, +- { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE }, +- { "-mcall-ms2sysv-xlogues", MASK_CALL_MS2SYSV_XLOGUES } +- }; ++ if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE) ++ mode = MIN_MODE_VECTOR_FLOAT; ++ else ++ mode = MIN_MODE_VECTOR_INT; + +- /* Additional flag options. */ +- static struct ix86_target_opts flag2_opts[] = +- { +- { "-mgeneral-regs-only", OPTION_MASK_GENERAL_REGS_ONLY } +- }; ++ /* Get the mode which has this inner mode and number of units. */ ++ FOR_EACH_MODE_FROM (mode, mode) ++ if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type) ++ && GET_MODE_INNER (mode) == innermode) ++ { ++ if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU) ++ { ++ static bool warnedavx512f; ++ static bool warnedavx512f_ret; + +- const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (isa2_opts) +- + ARRAY_SIZE (flag_opts) + ARRAY_SIZE (flag2_opts) + 6][2]; ++ if (cum && cum->warn_avx512f && !warnedavx512f) ++ { ++ if (warning (OPT_Wpsabi, "AVX512F vector argument " ++ "without AVX512F enabled changes the ABI")) ++ warnedavx512f = true; ++ } ++ else if (in_return && !warnedavx512f_ret) ++ { ++ if (warning (OPT_Wpsabi, "AVX512F vector return " ++ "without AVX512F enabled changes the ABI")) ++ warnedavx512f_ret = true; ++ } + +- char isa_other[40]; +- char isa2_other[40]; +- char flags_other[40]; +- char flags2_other[40]; +- unsigned num = 0; +- unsigned i, j; +- char *ret; +- char *ptr; +- size_t len; +- size_t line_len; +- size_t sep_len; +- const char *abi; ++ return TYPE_MODE (type); ++ } ++ else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU) ++ { ++ static bool warnedavx; ++ static bool warnedavx_ret; + +- memset (opts, '\0', sizeof (opts)); ++ if (cum && cum->warn_avx && !warnedavx) ++ { ++ if (warning (OPT_Wpsabi, "AVX vector argument " ++ "without AVX enabled changes the ABI")) ++ warnedavx = true; ++ } ++ else if (in_return && !warnedavx_ret) ++ { ++ if (warning (OPT_Wpsabi, "AVX vector return " ++ "without AVX enabled changes the ABI")) ++ warnedavx_ret = true; ++ } + +- /* Add -march= option. */ +- if (arch) +- { +- opts[num][0] = "-march="; +- opts[num++][1] = arch; +- } +- +- /* Add -mtune= option. */ +- if (tune) +- { +- opts[num][0] = "-mtune="; +- opts[num++][1] = tune; +- } +- +- /* Add -m32/-m64/-mx32. */ +- if (add_abi_p) +- { +- if ((isa & OPTION_MASK_ISA_64BIT) != 0) +- { +- if ((isa & OPTION_MASK_ABI_64) != 0) +- abi = "-m64"; +- else +- abi = "-mx32"; +- } +- else +- abi = "-m32"; +- opts[num++][0] = abi; +- } +- isa &= ~(OPTION_MASK_ISA_64BIT | OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32); +- +- /* Pick out the options in isa2 options. */ +- for (i = 0; i < ARRAY_SIZE (isa2_opts); i++) +- { +- if ((isa2 & isa2_opts[i].mask) != 0) +- { +- opts[num++][0] = isa2_opts[i].option; +- isa2 &= ~ isa2_opts[i].mask; +- } +- } +- +- if (isa2 && add_nl_p) +- { +- opts[num++][0] = isa2_other; +- sprintf (isa2_other, "(other isa2: %#" HOST_WIDE_INT_PRINT "x)", isa2); +- } +- +- /* Pick out the options in isa options. */ +- for (i = 0; i < ARRAY_SIZE (isa_opts); i++) +- { +- if ((isa & isa_opts[i].mask) != 0) +- { +- opts[num++][0] = isa_opts[i].option; +- isa &= ~ isa_opts[i].mask; +- } +- } +- +- if (isa && add_nl_p) +- { +- opts[num++][0] = isa_other; +- sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)", isa); +- } +- +- /* Add flag options. */ +- for (i = 0; i < ARRAY_SIZE (flag_opts); i++) +- { +- if ((flags & flag_opts[i].mask) != 0) +- { +- opts[num++][0] = flag_opts[i].option; +- flags &= ~ flag_opts[i].mask; +- } +- } +- +- if (flags && add_nl_p) +- { +- opts[num++][0] = flags_other; +- sprintf (flags_other, "(other flags: %#x)", flags); +- } +- +- /* Add additional flag options. */ +- for (i = 0; i < ARRAY_SIZE (flag2_opts); i++) +- { +- if ((flags2 & flag2_opts[i].mask) != 0) +- { +- opts[num++][0] = flag2_opts[i].option; +- flags2 &= ~ flag2_opts[i].mask; +- } +- } +- +- if (flags2 && add_nl_p) +- { +- opts[num++][0] = flags2_other; +- sprintf (flags2_other, "(other flags2: %#x)", flags2); +- } +- +- /* Add -fpmath= option. */ +- if (fpmath) +- { +- opts[num][0] = "-mfpmath="; +- switch ((int) fpmath) +- { +- case FPMATH_387: +- opts[num++][1] = "387"; +- break; ++ return TYPE_MODE (type); ++ } ++ else if (((size == 8 && TARGET_64BIT) || size == 16) ++ && !TARGET_SSE ++ && !TARGET_IAMCU) ++ { ++ static bool warnedsse; ++ static bool warnedsse_ret; + +- case FPMATH_SSE: +- opts[num++][1] = "sse"; +- break; ++ if (cum && cum->warn_sse && !warnedsse) ++ { ++ if (warning (OPT_Wpsabi, "SSE vector argument " ++ "without SSE enabled changes the ABI")) ++ warnedsse = true; ++ } ++ else if (!TARGET_64BIT && in_return && !warnedsse_ret) ++ { ++ if (warning (OPT_Wpsabi, "SSE vector return " ++ "without SSE enabled changes the ABI")) ++ warnedsse_ret = true; ++ } ++ } ++ else if ((size == 8 && !TARGET_64BIT) ++ && (!cfun ++ || cfun->machine->func_type == TYPE_NORMAL) ++ && !TARGET_MMX ++ && !TARGET_IAMCU) ++ { ++ static bool warnedmmx; ++ static bool warnedmmx_ret; + +- case FPMATH_387 | FPMATH_SSE: +- opts[num++][1] = "sse+387"; +- break; ++ if (cum && cum->warn_mmx && !warnedmmx) ++ { ++ if (warning (OPT_Wpsabi, "MMX vector argument " ++ "without MMX enabled changes the ABI")) ++ warnedmmx = true; ++ } ++ else if (in_return && !warnedmmx_ret) ++ { ++ if (warning (OPT_Wpsabi, "MMX vector return " ++ "without MMX enabled changes the ABI")) ++ warnedmmx_ret = true; ++ } ++ } ++ return mode; ++ } + +- default: + gcc_unreachable (); + } + } + +- /* Any options? */ +- if (num == 0) +- return NULL; +- +- gcc_assert (num < ARRAY_SIZE (opts)); +- +- /* Size the string. */ +- len = 0; +- sep_len = (add_nl_p) ? 3 : 1; +- for (i = 0; i < num; i++) +- { +- len += sep_len; +- for (j = 0; j < 2; j++) +- if (opts[i][j]) +- len += strlen (opts[i][j]); +- } +- +- /* Build the string. */ +- ret = ptr = (char *) xmalloc (len); +- line_len = 0; +- +- for (i = 0; i < num; i++) +- { +- size_t len2[2]; +- +- for (j = 0; j < 2; j++) +- len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0; +- +- if (i != 0) +- { +- *ptr++ = ' '; +- line_len++; +- +- if (add_nl_p && line_len + len2[0] + len2[1] > 70) +- { +- *ptr++ = '\\'; +- *ptr++ = '\n'; +- line_len = 0; +- } +- } +- +- for (j = 0; j < 2; j++) +- if (opts[i][j]) +- { +- memcpy (ptr, opts[i][j], len2[j]); +- ptr += len2[j]; +- line_len += len2[j]; +- } +- } +- +- *ptr = '\0'; +- gcc_assert (ret + len >= ptr); +- +- return ret; ++ return mode; + } + +-/* Return true, if profiling code should be emitted before +- prologue. Otherwise it returns false. +- Note: For x86 with "hotfix" it is sorried. */ +-static bool +-ix86_profile_before_prologue (void) +-{ +- return flag_fentry != 0; +-} ++/* We want to pass a value in REGNO whose "natural" mode is MODE. However, ++ this may not agree with the mode that the type system has chosen for the ++ register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can ++ go ahead and use it. Otherwise we have to build a PARALLEL instead. */ + +-/* Function that is callable from the debugger to print the current +- options. */ +-void ATTRIBUTE_UNUSED +-ix86_debug_options (void) ++static rtx ++gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode, ++ unsigned int regno) + { +- char *opts = ix86_target_string (ix86_isa_flags, ix86_isa_flags2, +- target_flags, ix86_target_flags, +- ix86_arch_string,ix86_tune_string, +- ix86_fpmath, true, true); ++ rtx tmp; + +- if (opts) ++ if (orig_mode != BLKmode) ++ tmp = gen_rtx_REG (orig_mode, regno); ++ else + { +- fprintf (stderr, "%s\n\n", opts); +- free (opts); ++ tmp = gen_rtx_REG (mode, regno); ++ tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx); ++ tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp)); + } +- else +- fputs ("\n\n", stderr); + +- return; ++ return tmp; + } + +-static const char *stringop_alg_names[] = { +-#define DEF_ENUM +-#define DEF_ALG(alg, name) #name, +-#include "stringop.def" +-#undef DEF_ENUM +-#undef DEF_ALG +-}; ++/* x86-64 register passing implementation. See x86-64 ABI for details. Goal ++ of this code is to classify each 8bytes of incoming argument by the register ++ class and assign registers accordingly. */ + +-/* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=. +- The string is of the following form (or comma separated list of it): ++/* Return the union class of CLASS1 and CLASS2. ++ See the x86-64 PS ABI for details. */ + +- strategy_alg:max_size:[align|noalign] ++static enum x86_64_reg_class ++merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2) ++{ ++ /* Rule #1: If both classes are equal, this is the resulting class. */ ++ if (class1 == class2) ++ return class1; + +- where the full size range for the strategy is either [0, max_size] or +- [min_size, max_size], in which min_size is the max_size + 1 of the +- preceding range. The last size range must have max_size == -1. ++ /* Rule #2: If one of the classes is NO_CLASS, the resulting class is ++ the other class. */ ++ if (class1 == X86_64_NO_CLASS) ++ return class2; ++ if (class2 == X86_64_NO_CLASS) ++ return class1; + +- Examples: ++ /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */ ++ if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS) ++ return X86_64_MEMORY_CLASS; + +- 1. +- -mmemcpy-strategy=libcall:-1:noalign ++ /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */ ++ if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS) ++ || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS)) ++ return X86_64_INTEGERSI_CLASS; ++ if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS ++ || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS) ++ return X86_64_INTEGER_CLASS; + +- this is equivalent to (for known size memcpy) -mstringop-strategy=libcall ++ /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class, ++ MEMORY is used. */ ++ if (class1 == X86_64_X87_CLASS ++ || class1 == X86_64_X87UP_CLASS ++ || class1 == X86_64_COMPLEX_X87_CLASS ++ || class2 == X86_64_X87_CLASS ++ || class2 == X86_64_X87UP_CLASS ++ || class2 == X86_64_COMPLEX_X87_CLASS) ++ return X86_64_MEMORY_CLASS; + ++ /* Rule #6: Otherwise class SSE is used. */ ++ return X86_64_SSE_CLASS; ++} + +- 2. +- -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign ++/* Classify the argument of type TYPE and mode MODE. ++ CLASSES will be filled by the register class used to pass each word ++ of the operand. The number of words is returned. In case the parameter ++ should be passed in memory, 0 is returned. As a special case for zero ++ sized containers, classes[0] will be NO_CLASS and 1 is returned. + +- This is to tell the compiler to use the following strategy for memset +- 1) when the expected size is between [1, 16], use rep_8byte strategy; +- 2) when the size is between [17, 2048], use vector_loop; +- 3) when the size is > 2048, use libcall. */ ++ BIT_OFFSET is used internally for handling records and specifies offset ++ of the offset in bits modulo 512 to avoid overflow cases. + +-struct stringop_size_range +-{ +- int max; +- stringop_alg alg; +- bool noalign; +-}; ++ See the x86-64 PS ABI for details. ++*/ + +-static void +-ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset) ++static int ++classify_argument (machine_mode mode, const_tree type, ++ enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset) + { +- const struct stringop_algs *default_algs; +- stringop_size_range input_ranges[MAX_STRINGOP_ALGS]; +- char *curr_range_str, *next_range_str; +- const char *opt = is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy="; +- int i = 0, n = 0; +- +- if (is_memset) +- default_algs = &ix86_cost->memset[TARGET_64BIT != 0]; +- else +- default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0]; ++ HOST_WIDE_INT bytes ++ = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); ++ int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD); + +- curr_range_str = strategy_str; ++ /* Variable sized entities are always passed/returned in memory. */ ++ if (bytes < 0) ++ return 0; + +- do ++ if (mode != VOIDmode) + { +- int maxs; +- char alg_name[128]; +- char align[16]; +- next_range_str = strchr (curr_range_str, ','); +- if (next_range_str) +- *next_range_str++ = '\0'; +- +- if (sscanf (curr_range_str, "%20[^:]:%d:%10s", alg_name, &maxs, +- align) != 3) +- { +- error ("wrong argument %qs to option %qs", curr_range_str, opt); +- return; +- } ++ /* The value of "named" doesn't matter. */ ++ function_arg_info arg (const_cast (type), mode, /*named=*/true); ++ if (targetm.calls.must_pass_in_stack (arg)) ++ return 0; ++ } + +- if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1)) +- { +- error ("size ranges of option %qs should be increasing", opt); +- return; +- } ++ if (type && AGGREGATE_TYPE_P (type)) ++ { ++ int i; ++ tree field; ++ enum x86_64_reg_class subclasses[MAX_CLASSES]; + +- for (i = 0; i < last_alg; i++) +- if (!strcmp (alg_name, stringop_alg_names[i])) +- break; ++ /* On x86-64 we pass structures larger than 64 bytes on the stack. */ ++ if (bytes > 64) ++ return 0; + +- if (i == last_alg) +- { +- error ("wrong strategy name %qs specified for option %qs", +- alg_name, opt); +- +- auto_vec candidates; +- for (i = 0; i < last_alg; i++) +- if ((stringop_alg) i != rep_prefix_8_byte || TARGET_64BIT) +- candidates.safe_push (stringop_alg_names[i]); +- +- char *s; +- const char *hint +- = candidates_list_and_hint (alg_name, s, candidates); +- if (hint) +- inform (input_location, +- "valid arguments to %qs are: %s; did you mean %qs?", +- opt, s, hint); +- else +- inform (input_location, "valid arguments to %qs are: %s", +- opt, s); +- XDELETEVEC (s); +- return; +- } ++ for (i = 0; i < words; i++) ++ classes[i] = X86_64_NO_CLASS; + +- if ((stringop_alg) i == rep_prefix_8_byte +- && !TARGET_64BIT) ++ /* Zero sized arrays or structures are NO_CLASS. We return 0 to ++ signalize memory class, so handle it as special case. */ ++ if (!words) + { +- /* rep; movq isn't available in 32-bit code. */ +- error ("strategy name %qs specified for option %qs " +- "not supported for 32-bit code", alg_name, opt); +- return; ++ classes[0] = X86_64_NO_CLASS; ++ return 1; + } + +- input_ranges[n].max = maxs; +- input_ranges[n].alg = (stringop_alg) i; +- if (!strcmp (align, "align")) +- input_ranges[n].noalign = false; +- else if (!strcmp (align, "noalign")) +- input_ranges[n].noalign = true; +- else +- { +- error ("unknown alignment %qs specified for option %qs", align, opt); +- return; +- } +- n++; +- curr_range_str = next_range_str; +- } +- while (curr_range_str); +- +- if (input_ranges[n - 1].max != -1) +- { +- error ("the max value for the last size range should be -1" +- " for option %qs", opt); +- return; +- } ++ /* Classify each field of record and merge classes. */ ++ switch (TREE_CODE (type)) ++ { ++ case RECORD_TYPE: ++ /* And now merge the fields of structure. */ ++ for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) ++ { ++ if (TREE_CODE (field) == FIELD_DECL) ++ { ++ int num; + +- if (n > MAX_STRINGOP_ALGS) +- { +- error ("too many size ranges specified in option %qs", opt); +- return; +- } ++ if (TREE_TYPE (field) == error_mark_node) ++ continue; + +- /* Now override the default algs array. */ +- for (i = 0; i < n; i++) +- { +- *const_cast(&default_algs->size[i].max) = input_ranges[i].max; +- *const_cast(&default_algs->size[i].alg) +- = input_ranges[i].alg; +- *const_cast(&default_algs->size[i].noalign) +- = input_ranges[i].noalign; +- } +-} +- +- +-/* parse -mtune-ctrl= option. When DUMP is true, +- print the features that are explicitly set. */ +- +-static void +-parse_mtune_ctrl_str (bool dump) +-{ +- if (!ix86_tune_ctrl_string) +- return; +- +- char *next_feature_string = NULL; +- char *curr_feature_string = xstrdup (ix86_tune_ctrl_string); +- char *orig = curr_feature_string; +- int i; +- do +- { +- bool clear = false; +- +- next_feature_string = strchr (curr_feature_string, ','); +- if (next_feature_string) +- *next_feature_string++ = '\0'; +- if (*curr_feature_string == '^') +- { +- curr_feature_string++; +- clear = true; +- } +- for (i = 0; i < X86_TUNE_LAST; i++) +- { +- if (!strcmp (curr_feature_string, ix86_tune_feature_names[i])) +- { +- ix86_tune_features[i] = !clear; +- if (dump) +- fprintf (stderr, "Explicitly %s feature %s\n", +- clear ? "clear" : "set", ix86_tune_feature_names[i]); +- break; +- } +- } +- if (i == X86_TUNE_LAST) +- error ("unknown parameter to option %<-mtune-ctrl%>: %s", +- clear ? curr_feature_string - 1 : curr_feature_string); +- curr_feature_string = next_feature_string; +- } +- while (curr_feature_string); +- free (orig); +-} +- +-/* Helper function to set ix86_tune_features. IX86_TUNE is the +- processor type. */ +- +-static void +-set_ix86_tune_features (enum processor_type ix86_tune, bool dump) +-{ +- unsigned HOST_WIDE_INT ix86_tune_mask = HOST_WIDE_INT_1U << ix86_tune; +- int i; +- +- for (i = 0; i < X86_TUNE_LAST; ++i) +- { +- if (ix86_tune_no_default) +- ix86_tune_features[i] = 0; +- else +- ix86_tune_features[i] +- = !!(initial_ix86_tune_features[i] & ix86_tune_mask); +- } +- +- if (dump) +- { +- fprintf (stderr, "List of x86 specific tuning parameter names:\n"); +- for (i = 0; i < X86_TUNE_LAST; i++) +- fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i], +- ix86_tune_features[i] ? "on" : "off"); +- } +- +- parse_mtune_ctrl_str (dump); +-} +- +- +-/* Default align_* from the processor table. */ ++ /* Bitfields are always classified as integer. Handle them ++ early, since later code would consider them to be ++ misaligned integers. */ ++ if (DECL_BIT_FIELD (field)) ++ { ++ for (i = (int_bit_position (field) ++ + (bit_offset % 64)) / 8 / 8; ++ i < ((int_bit_position (field) + (bit_offset % 64)) ++ + tree_to_shwi (DECL_SIZE (field)) ++ + 63) / 8 / 8; i++) ++ classes[i] ++ = merge_classes (X86_64_INTEGER_CLASS, classes[i]); ++ } ++ else ++ { ++ int pos; + +-static void +-ix86_default_align (struct gcc_options *opts) +-{ +- /* -falign-foo without argument: supply one. */ +- if (opts->x_flag_align_loops && !opts->x_str_align_loops) +- opts->x_str_align_loops = processor_cost_table[ix86_tune]->align_loop; +- if (opts->x_flag_align_jumps && !opts->x_str_align_jumps) +- opts->x_str_align_jumps = processor_cost_table[ix86_tune]->align_jump; +- if (opts->x_flag_align_labels && !opts->x_str_align_labels) +- opts->x_str_align_labels = processor_cost_table[ix86_tune]->align_label; +- if (opts->x_flag_align_functions && !opts->x_str_align_functions) +- opts->x_str_align_functions = processor_cost_table[ix86_tune]->align_func; +-} ++ type = TREE_TYPE (field); + +-/* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE hook. */ ++ /* Flexible array member is ignored. */ ++ if (TYPE_MODE (type) == BLKmode ++ && TREE_CODE (type) == ARRAY_TYPE ++ && TYPE_SIZE (type) == NULL_TREE ++ && TYPE_DOMAIN (type) != NULL_TREE ++ && (TYPE_MAX_VALUE (TYPE_DOMAIN (type)) ++ == NULL_TREE)) ++ { ++ static bool warned; + +-static void +-ix86_override_options_after_change (void) +-{ +- ix86_default_align (&global_options); +-} ++ if (!warned && warn_psabi) ++ { ++ warned = true; ++ inform (input_location, ++ "the ABI of passing struct with" ++ " a flexible array member has" ++ " changed in GCC 4.4"); ++ } ++ continue; ++ } ++ num = classify_argument (TYPE_MODE (type), type, ++ subclasses, ++ (int_bit_position (field) ++ + bit_offset) % 512); ++ if (!num) ++ return 0; ++ pos = (int_bit_position (field) ++ + (bit_offset % 64)) / 8 / 8; ++ for (i = 0; i < num && (i + pos) < words; i++) ++ classes[i + pos] ++ = merge_classes (subclasses[i], classes[i + pos]); ++ } ++ } ++ } ++ break; + ++ case ARRAY_TYPE: ++ /* Arrays are handled as small records. */ ++ { ++ int num; ++ num = classify_argument (TYPE_MODE (TREE_TYPE (type)), ++ TREE_TYPE (type), subclasses, bit_offset); ++ if (!num) ++ return 0; + ++ /* The partial classes are now full classes. */ ++ if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4) ++ subclasses[0] = X86_64_SSE_CLASS; ++ if (subclasses[0] == X86_64_INTEGERSI_CLASS ++ && !((bit_offset % 64) == 0 && bytes == 4)) ++ subclasses[0] = X86_64_INTEGER_CLASS; + +-/* Override various settings based on options. If MAIN_ARGS_P, the +- options are from the command line, otherwise they are from +- attributes. Return true if there's an error related to march +- option. */ ++ for (i = 0; i < words; i++) ++ classes[i] = subclasses[i % num]; + +-static bool +-ix86_option_override_internal (bool main_args_p, +- struct gcc_options *opts, +- struct gcc_options *opts_set) +-{ +- int i; +- unsigned HOST_WIDE_INT ix86_arch_mask; +- const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL); ++ break; ++ } ++ case UNION_TYPE: ++ case QUAL_UNION_TYPE: ++ /* Unions are similar to RECORD_TYPE but offset is always 0. ++ */ ++ for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) ++ { ++ if (TREE_CODE (field) == FIELD_DECL) ++ { ++ int num; + +- /* -mrecip options. */ +- static struct +- { +- const char *string; /* option name */ +- unsigned int mask; /* mask bits to set */ +- } +- const recip_options[] = +- { +- { "all", RECIP_MASK_ALL }, +- { "none", RECIP_MASK_NONE }, +- { "div", RECIP_MASK_DIV }, +- { "sqrt", RECIP_MASK_SQRT }, +- { "vec-div", RECIP_MASK_VEC_DIV }, +- { "vec-sqrt", RECIP_MASK_VEC_SQRT }, +- }; ++ if (TREE_TYPE (field) == error_mark_node) ++ continue; + ++ num = classify_argument (TYPE_MODE (TREE_TYPE (field)), ++ TREE_TYPE (field), subclasses, ++ bit_offset); ++ if (!num) ++ return 0; ++ for (i = 0; i < num && i < words; i++) ++ classes[i] = merge_classes (subclasses[i], classes[i]); ++ } ++ } ++ break; + +- /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if +- TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */ +- if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags)) +- opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32); +-#ifdef TARGET_BI_ARCH +- else +- { +-#if TARGET_BI_ARCH == 1 +- /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64 +- is on and OPTION_MASK_ABI_X32 is off. We turn off +- OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by +- -mx32. */ +- if (TARGET_X32_P (opts->x_ix86_isa_flags)) +- opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64; +-#else +- /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is +- on and OPTION_MASK_ABI_64 is off. We turn off +- OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by +- -m64 or OPTION_MASK_CODE16 is turned on by -m16. */ +- if (TARGET_LP64_P (opts->x_ix86_isa_flags) +- || TARGET_16BIT_P (opts->x_ix86_isa_flags)) +- opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32; +-#endif +- if (TARGET_64BIT_P (opts->x_ix86_isa_flags) +- && TARGET_IAMCU_P (opts->x_target_flags)) +- sorry ("Intel MCU psABI isn%'t supported in %s mode", +- TARGET_X32_P (opts->x_ix86_isa_flags) ? "x32" : "64-bit"); +- } +-#endif ++ default: ++ gcc_unreachable (); ++ } + +- if (TARGET_X32_P (opts->x_ix86_isa_flags)) +- { +- /* Always turn on OPTION_MASK_ISA_64BIT and turn off +- OPTION_MASK_ABI_64 for TARGET_X32. */ +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT; +- opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64; +- } +- else if (TARGET_16BIT_P (opts->x_ix86_isa_flags)) +- opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT +- | OPTION_MASK_ABI_X32 +- | OPTION_MASK_ABI_64); +- else if (TARGET_LP64_P (opts->x_ix86_isa_flags)) +- { +- /* Always turn on OPTION_MASK_ISA_64BIT and turn off +- OPTION_MASK_ABI_X32 for TARGET_LP64. */ +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT; +- opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32; +- } ++ if (words > 2) ++ { ++ /* When size > 16 bytes, if the first one isn't ++ X86_64_SSE_CLASS or any other ones aren't ++ X86_64_SSEUP_CLASS, everything should be passed in ++ memory. */ ++ if (classes[0] != X86_64_SSE_CLASS) ++ return 0; + +-#ifdef SUBTARGET_OVERRIDE_OPTIONS +- SUBTARGET_OVERRIDE_OPTIONS; +-#endif ++ for (i = 1; i < words; i++) ++ if (classes[i] != X86_64_SSEUP_CLASS) ++ return 0; ++ } + +-#ifdef SUBSUBTARGET_OVERRIDE_OPTIONS +- SUBSUBTARGET_OVERRIDE_OPTIONS; +-#endif ++ /* Final merger cleanup. */ ++ for (i = 0; i < words; i++) ++ { ++ /* If one class is MEMORY, everything should be passed in ++ memory. */ ++ if (classes[i] == X86_64_MEMORY_CLASS) ++ return 0; + +- /* -fPIC is the default for x86_64. */ +- if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags)) +- opts->x_flag_pic = 2; ++ /* The X86_64_SSEUP_CLASS should be always preceded by ++ X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */ ++ if (classes[i] == X86_64_SSEUP_CLASS ++ && classes[i - 1] != X86_64_SSE_CLASS ++ && classes[i - 1] != X86_64_SSEUP_CLASS) ++ { ++ /* The first one should never be X86_64_SSEUP_CLASS. */ ++ gcc_assert (i != 0); ++ classes[i] = X86_64_SSE_CLASS; ++ } + +- /* Need to check -mtune=generic first. */ +- if (opts->x_ix86_tune_string) +- { +- /* As special support for cross compilers we read -mtune=native +- as -mtune=generic. With native compilers we won't see the +- -mtune=native, as it was changed by the driver. */ +- if (!strcmp (opts->x_ix86_tune_string, "native")) +- { +- opts->x_ix86_tune_string = "generic"; +- } +- else if (!strcmp (opts->x_ix86_tune_string, "x86-64")) +- warning (OPT_Wdeprecated, +- main_args_p +- ? G_("%<-mtune=x86-64%> is deprecated; use %<-mtune=k8%> " +- "or %<-mtune=generic%> instead as appropriate") +- : G_("% is deprecated; use " +- "% or %" +- " instead as appropriate")); +- } +- else +- { +- if (opts->x_ix86_arch_string) +- opts->x_ix86_tune_string = opts->x_ix86_arch_string; +- if (!opts->x_ix86_tune_string) +- { +- opts->x_ix86_tune_string = processor_names[TARGET_CPU_DEFAULT]; +- ix86_tune_defaulted = 1; +- } ++ /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS, ++ everything should be passed in memory. */ ++ if (classes[i] == X86_64_X87UP_CLASS ++ && (classes[i - 1] != X86_64_X87_CLASS)) ++ { ++ static bool warned; + +- /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string +- or defaulted. We need to use a sensible tune option. */ +- if (!strcmp (opts->x_ix86_tune_string, "x86-64")) +- { +- opts->x_ix86_tune_string = "generic"; ++ /* The first one should never be X86_64_X87UP_CLASS. */ ++ gcc_assert (i != 0); ++ if (!warned && warn_psabi) ++ { ++ warned = true; ++ inform (input_location, ++ "the ABI of passing union with %" ++ " has changed in GCC 4.4"); ++ } ++ return 0; ++ } + } ++ return words; + } + +- if (opts->x_ix86_stringop_alg == rep_prefix_8_byte +- && !TARGET_64BIT_P (opts->x_ix86_isa_flags)) ++ /* Compute alignment needed. We align all types to natural boundaries with ++ exception of XFmode that is aligned to 64bits. */ ++ if (mode != VOIDmode && mode != BLKmode) + { +- /* rep; movq isn't available in 32-bit code. */ +- error ("%<-mstringop-strategy=rep_8byte%> not supported for 32-bit code"); +- opts->x_ix86_stringop_alg = no_stringop; +- } +- +- if (!opts->x_ix86_arch_string) +- opts->x_ix86_arch_string +- = TARGET_64BIT_P (opts->x_ix86_isa_flags) +- ? "x86-64" : SUBTARGET32_DEFAULT_CPU; +- else +- ix86_arch_specified = 1; ++ int mode_alignment = GET_MODE_BITSIZE (mode); + +- if (opts_set->x_ix86_pmode) +- { +- if ((TARGET_LP64_P (opts->x_ix86_isa_flags) +- && opts->x_ix86_pmode == PMODE_SI) +- || (!TARGET_64BIT_P (opts->x_ix86_isa_flags) +- && opts->x_ix86_pmode == PMODE_DI)) +- error ("address mode %qs not supported in the %s bit mode", +- TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long", +- TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32"); ++ if (mode == XFmode) ++ mode_alignment = 128; ++ else if (mode == XCmode) ++ mode_alignment = 256; ++ if (COMPLEX_MODE_P (mode)) ++ mode_alignment /= 2; ++ /* Misaligned fields are always returned in memory. */ ++ if (bit_offset % mode_alignment) ++ return 0; + } +- else +- opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags) +- ? PMODE_DI : PMODE_SI; +- +- if (!opts_set->x_ix86_abi) +- opts->x_ix86_abi = DEFAULT_ABI; +- +- if (opts->x_ix86_abi == MS_ABI && TARGET_X32_P (opts->x_ix86_isa_flags)) +- error ("%<-mabi=ms%> not supported with X32 ABI"); +- gcc_assert (opts->x_ix86_abi == SYSV_ABI || opts->x_ix86_abi == MS_ABI); +- +- const char *abi_name = opts->x_ix86_abi == MS_ABI ? "ms" : "sysv"; +- if ((opts->x_flag_sanitize & SANITIZE_USER_ADDRESS) +- && opts->x_ix86_abi != DEFAULT_ABI) +- error ("%<-mabi=%s%> not supported with %<-fsanitize=address%>", abi_name); +- if ((opts->x_flag_sanitize & SANITIZE_KERNEL_ADDRESS) +- && opts->x_ix86_abi != DEFAULT_ABI) +- error ("%<-mabi=%s%> not supported with %<-fsanitize=kernel-address%>", +- abi_name); +- if ((opts->x_flag_sanitize & SANITIZE_THREAD) +- && opts->x_ix86_abi != DEFAULT_ABI) +- error ("%<-mabi=%s%> not supported with %<-fsanitize=thread%>", abi_name); +- +- /* For targets using ms ABI enable ms-extensions, if not +- explicit turned off. For non-ms ABI we turn off this +- option. */ +- if (!opts_set->x_flag_ms_extensions) +- opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI); +- +- if (opts_set->x_ix86_cmodel) +- { +- switch (opts->x_ix86_cmodel) +- { +- case CM_SMALL: +- case CM_SMALL_PIC: +- if (opts->x_flag_pic) +- opts->x_ix86_cmodel = CM_SMALL_PIC; +- if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)) +- error ("code model %qs not supported in the %s bit mode", +- "small", "32"); +- break; + +- case CM_MEDIUM: +- case CM_MEDIUM_PIC: +- if (opts->x_flag_pic) +- opts->x_ix86_cmodel = CM_MEDIUM_PIC; +- if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)) +- error ("code model %qs not supported in the %s bit mode", +- "medium", "32"); +- else if (TARGET_X32_P (opts->x_ix86_isa_flags)) +- error ("code model %qs not supported in x32 mode", +- "medium"); +- break; ++ /* for V1xx modes, just use the base mode */ ++ if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode ++ && GET_MODE_UNIT_SIZE (mode) == bytes) ++ mode = GET_MODE_INNER (mode); + +- case CM_LARGE: +- case CM_LARGE_PIC: +- if (opts->x_flag_pic) +- opts->x_ix86_cmodel = CM_LARGE_PIC; +- if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)) +- error ("code model %qs not supported in the %s bit mode", +- "large", "32"); +- else if (TARGET_X32_P (opts->x_ix86_isa_flags)) +- error ("code model %qs not supported in x32 mode", +- "large"); +- break; ++ /* Classification of atomic types. */ ++ switch (mode) ++ { ++ case E_SDmode: ++ case E_DDmode: ++ classes[0] = X86_64_SSE_CLASS; ++ return 1; ++ case E_TDmode: ++ classes[0] = X86_64_SSE_CLASS; ++ classes[1] = X86_64_SSEUP_CLASS; ++ return 2; ++ case E_DImode: ++ case E_SImode: ++ case E_HImode: ++ case E_QImode: ++ case E_CSImode: ++ case E_CHImode: ++ case E_CQImode: ++ { ++ int size = bit_offset + (int) GET_MODE_BITSIZE (mode); + +- case CM_32: +- if (opts->x_flag_pic) +- error ("code model %s does not support PIC mode", "32"); +- if (TARGET_64BIT_P (opts->x_ix86_isa_flags)) +- error ("code model %qs not supported in the %s bit mode", +- "32", "64"); +- break; ++ /* Analyze last 128 bits only. */ ++ size = (size - 1) & 0x7f; + +- case CM_KERNEL: +- if (opts->x_flag_pic) +- { +- error ("code model %s does not support PIC mode", "kernel"); +- opts->x_ix86_cmodel = CM_32; +- } +- if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)) +- error ("code model %qs not supported in the %s bit mode", +- "kernel", "32"); +- break; +- +- default: +- gcc_unreachable (); +- } +- } +- else +- { +- /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the +- use of rip-relative addressing. This eliminates fixups that +- would otherwise be needed if this object is to be placed in a +- DLL, and is essentially just as efficient as direct addressing. */ +- if (TARGET_64BIT_P (opts->x_ix86_isa_flags) +- && (TARGET_RDOS || TARGET_PECOFF)) +- opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1; +- else if (TARGET_64BIT_P (opts->x_ix86_isa_flags)) +- opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL; +- else +- opts->x_ix86_cmodel = CM_32; +- } +- if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL) +- { +- error ("%<-masm=intel%> not supported in this configuration"); +- opts->x_ix86_asm_dialect = ASM_ATT; +- } +- if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0) +- != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0)) +- sorry ("%i-bit mode not compiled in", +- (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32); +- +- for (i = 0; i < pta_size; i++) +- if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name)) +- { +- if (!strcmp (opts->x_ix86_arch_string, "generic")) ++ if (size < 32) + { +- error (main_args_p +- ? G_("% CPU can be used only for %<-mtune=%> " +- "switch") +- : G_("% CPU can be used only for " +- "% attribute")); +- return false; ++ classes[0] = X86_64_INTEGERSI_CLASS; ++ return 1; + } +- else if (!strcmp (opts->x_ix86_arch_string, "intel")) ++ else if (size < 64) + { +- error (main_args_p +- ? G_("% CPU can be used only for %<-mtune=%> " +- "switch") +- : G_("% CPU can be used only for " +- "% attribute")); +- return false; ++ classes[0] = X86_64_INTEGER_CLASS; ++ return 1; + } +- +- if (TARGET_64BIT_P (opts->x_ix86_isa_flags) +- && !((processor_alias_table[i].flags & PTA_64BIT) != 0)) ++ else if (size < 64+32) + { +- error ("CPU you selected does not support x86-64 " +- "instruction set"); +- return false; ++ classes[0] = X86_64_INTEGER_CLASS; ++ classes[1] = X86_64_INTEGERSI_CLASS; ++ return 2; + } +- +- ix86_schedule = processor_alias_table[i].schedule; +- ix86_arch = processor_alias_table[i].processor; +- /* Default cpu tuning to the architecture. */ +- ix86_tune = ix86_arch; +- +- if (((processor_alias_table[i].flags & PTA_MMX) != 0) +- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX; +- if (((processor_alias_table[i].flags & PTA_3DNOW) != 0) +- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW; +- if (((processor_alias_table[i].flags & PTA_3DNOW_A) != 0) +- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A; +- if (((processor_alias_table[i].flags & PTA_SSE) != 0) +- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE; +- if (((processor_alias_table[i].flags & PTA_SSE2) != 0) +- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2; +- if (((processor_alias_table[i].flags & PTA_SSE3) != 0) +- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3; +- if (((processor_alias_table[i].flags & PTA_SSSE3) != 0) +- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3; +- if (((processor_alias_table[i].flags & PTA_SSE4_1) != 0) +- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1; +- if (((processor_alias_table[i].flags & PTA_SSE4_2) != 0) +- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2; +- if (((processor_alias_table[i].flags & PTA_AVX) != 0) +- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX; +- if (((processor_alias_table[i].flags & PTA_AVX2) != 0) +- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2; +- if (((processor_alias_table[i].flags & PTA_FMA) != 0) +- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA; +- if (((processor_alias_table[i].flags & PTA_SSE4A) != 0) +- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A; +- if (((processor_alias_table[i].flags & PTA_FMA4) != 0) +- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4; +- if (((processor_alias_table[i].flags & PTA_XOP) != 0) +- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP; +- if (((processor_alias_table[i].flags & PTA_LWP) != 0) +- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP; +- if (((processor_alias_table[i].flags & PTA_ABM) != 0) +- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM; +- if (((processor_alias_table[i].flags & PTA_BMI) != 0) +- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI; +- if (((processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)) != 0) +- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT; +- if (((processor_alias_table[i].flags & PTA_TBM) != 0) +- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM; +- if (((processor_alias_table[i].flags & PTA_BMI2) != 0) +- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2; +- if (((processor_alias_table[i].flags & PTA_CX16) != 0) +- && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_CX16)) +- opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_CX16; +- if (((processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)) != 0) +- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT; +- if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags) +- && ((processor_alias_table[i].flags & PTA_NO_SAHF) != 0)) +- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF; +- if (((processor_alias_table[i].flags & PTA_MOVBE) != 0) +- && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_MOVBE)) +- opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_MOVBE; +- if (((processor_alias_table[i].flags & PTA_AES) != 0) +- && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES)) +- ix86_isa_flags |= OPTION_MASK_ISA_AES; +- if (((processor_alias_table[i].flags & PTA_SHA) != 0) +- && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA)) +- ix86_isa_flags |= OPTION_MASK_ISA_SHA; +- if (((processor_alias_table[i].flags & PTA_PCLMUL) != 0) +- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL; +- if (((processor_alias_table[i].flags & PTA_FSGSBASE) != 0) +- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE; +- if (((processor_alias_table[i].flags & PTA_RDRND) != 0) +- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND; +- if (((processor_alias_table[i].flags & PTA_F16C) != 0) +- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C; +- if (((processor_alias_table[i].flags & PTA_RTM) != 0) +- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM; +- if (((processor_alias_table[i].flags & PTA_HLE) != 0) +- && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_HLE)) +- opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_HLE; +- if (((processor_alias_table[i].flags & PTA_PRFCHW) != 0) +- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW; +- if (((processor_alias_table[i].flags & PTA_RDSEED) != 0) +- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED; +- if (((processor_alias_table[i].flags & PTA_ADX) != 0) +- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX; +- if (((processor_alias_table[i].flags & PTA_FXSR) != 0) +- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR; +- if (((processor_alias_table[i].flags & PTA_XSAVE) != 0) +- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE; +- if (((processor_alias_table[i].flags & PTA_XSAVEOPT) != 0) +- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT; +- if (((processor_alias_table[i].flags & PTA_AVX512F) != 0) +- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F; +- if (((processor_alias_table[i].flags & PTA_AVX512ER) != 0) +- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER; +- if (((processor_alias_table[i].flags & PTA_AVX512PF) != 0) +- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF; +- if (((processor_alias_table[i].flags & PTA_AVX512CD) != 0) +- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD; +- if (((processor_alias_table[i].flags & PTA_PREFETCHWT1) != 0) +- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1; +- if (((processor_alias_table[i].flags & PTA_CLWB) != 0) +- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLWB)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLWB; +- if (((processor_alias_table[i].flags & PTA_CLFLUSHOPT) != 0) +- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT; +- if (((processor_alias_table[i].flags & PTA_CLZERO) != 0) +- && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_CLZERO)) +- opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_CLZERO; +- if (((processor_alias_table[i].flags & PTA_XSAVEC) != 0) +- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC; +- if (((processor_alias_table[i].flags & PTA_XSAVES) != 0) +- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES; +- if (((processor_alias_table[i].flags & PTA_AVX512DQ) != 0) +- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ; +- if (((processor_alias_table[i].flags & PTA_AVX512BW) != 0) +- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW; +- if (((processor_alias_table[i].flags & PTA_AVX512VL) != 0) +- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL; +- if (((processor_alias_table[i].flags & PTA_AVX512VBMI) != 0) +- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VBMI)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI; +- if (((processor_alias_table[i].flags & PTA_AVX512IFMA) != 0) +- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512IFMA)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA; +- if (((processor_alias_table[i].flags & PTA_AVX512VNNI) != 0) +- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VNNI)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VNNI; +- if (((processor_alias_table[i].flags & PTA_GFNI) != 0) +- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_GFNI)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_GFNI; +- if (((processor_alias_table[i].flags & PTA_AVX512VBMI2) != 0) +- && !(opts->x_ix86_isa_flags_explicit +- & OPTION_MASK_ISA_AVX512VBMI2)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI2; +- if (((processor_alias_table[i].flags & PTA_VPCLMULQDQ) != 0) +- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_VPCLMULQDQ)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_VPCLMULQDQ; +- if (((processor_alias_table[i].flags & PTA_AVX512BITALG) != 0) +- && !(opts->x_ix86_isa_flags_explicit +- & OPTION_MASK_ISA_AVX512BITALG)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BITALG; +- +- if (((processor_alias_table[i].flags & PTA_AVX5124VNNIW) != 0) +- && !(opts->x_ix86_isa_flags2_explicit +- & OPTION_MASK_ISA_AVX5124VNNIW)) +- opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_AVX5124VNNIW; +- if (((processor_alias_table[i].flags & PTA_AVX5124FMAPS) != 0) +- && !(opts->x_ix86_isa_flags2_explicit +- & OPTION_MASK_ISA_AVX5124FMAPS)) +- opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_AVX5124FMAPS; +- if (((processor_alias_table[i].flags & PTA_AVX512VPOPCNTDQ) != 0) +- && !(opts->x_ix86_isa_flags_explicit +- & OPTION_MASK_ISA_AVX512VPOPCNTDQ)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VPOPCNTDQ; +- if (((processor_alias_table[i].flags & PTA_SGX) != 0) +- && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_SGX)) +- opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_SGX; +- if (((processor_alias_table[i].flags & PTA_VAES) != 0) +- && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_VAES)) +- opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_VAES; +- if (((processor_alias_table[i].flags & PTA_RDPID) != 0) +- && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_RDPID)) +- opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_RDPID; +- if (((processor_alias_table[i].flags & PTA_PCONFIG) != 0) +- && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_PCONFIG)) +- opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_PCONFIG; +- if (((processor_alias_table[i].flags & PTA_WBNOINVD) != 0) +- && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_WBNOINVD)) +- opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_WBNOINVD; +- if (((processor_alias_table[i].flags & PTA_PTWRITE) != 0) +- && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_PTWRITE)) +- opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_PTWRITE; +- +- if ((processor_alias_table[i].flags +- & (PTA_PREFETCH_SSE | PTA_SSE)) != 0) +- x86_prefetch_sse = true; +- if (((processor_alias_table[i].flags & PTA_MWAITX) != 0) +- && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_MWAITX)) +- opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_MWAITX; +- if (((processor_alias_table[i].flags & PTA_PKU) != 0) +- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PKU)) +- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PKU; +- +- /* Don't enable x87 instructions if only +- general registers are allowed. */ +- if (!(opts_set->x_ix86_target_flags & OPTION_MASK_GENERAL_REGS_ONLY) +- && !(opts_set->x_target_flags & MASK_80387)) ++ else if (size < 64+64) + { +- if (((processor_alias_table[i].flags & PTA_NO_80387) != 0)) +- opts->x_target_flags &= ~MASK_80387; +- else +- opts->x_target_flags |= MASK_80387; ++ classes[0] = classes[1] = X86_64_INTEGER_CLASS; ++ return 2; + } +- break; ++ else ++ gcc_unreachable (); + } ++ case E_CDImode: ++ case E_TImode: ++ classes[0] = classes[1] = X86_64_INTEGER_CLASS; ++ return 2; ++ case E_COImode: ++ case E_OImode: ++ /* OImode shouldn't be used directly. */ ++ gcc_unreachable (); ++ case E_CTImode: ++ return 0; ++ case E_SFmode: ++ if (!(bit_offset % 64)) ++ classes[0] = X86_64_SSESF_CLASS; ++ else ++ classes[0] = X86_64_SSE_CLASS; ++ return 1; ++ case E_DFmode: ++ classes[0] = X86_64_SSEDF_CLASS; ++ return 1; ++ case E_XFmode: ++ classes[0] = X86_64_X87_CLASS; ++ classes[1] = X86_64_X87UP_CLASS; ++ return 2; ++ case E_TFmode: ++ classes[0] = X86_64_SSE_CLASS; ++ classes[1] = X86_64_SSEUP_CLASS; ++ return 2; ++ case E_SCmode: ++ classes[0] = X86_64_SSE_CLASS; ++ if (!(bit_offset % 64)) ++ return 1; ++ else ++ { ++ static bool warned; + +- if (i == pta_size) +- { +- error (main_args_p +- ? G_("bad value (%qs) for %<-march=%> switch") +- : G_("bad value (%qs) for % attribute"), +- opts->x_ix86_arch_string); ++ if (!warned && warn_psabi) ++ { ++ warned = true; ++ inform (input_location, ++ "the ABI of passing structure with %" ++ " member has changed in GCC 4.4"); ++ } ++ classes[1] = X86_64_SSESF_CLASS; ++ return 2; ++ } ++ case E_DCmode: ++ classes[0] = X86_64_SSEDF_CLASS; ++ classes[1] = X86_64_SSEDF_CLASS; ++ return 2; ++ case E_XCmode: ++ classes[0] = X86_64_COMPLEX_X87_CLASS; ++ return 1; ++ case E_TCmode: ++ /* This modes is larger than 16 bytes. */ ++ return 0; ++ case E_V8SFmode: ++ case E_V8SImode: ++ case E_V32QImode: ++ case E_V16HImode: ++ case E_V4DFmode: ++ case E_V4DImode: ++ classes[0] = X86_64_SSE_CLASS; ++ classes[1] = X86_64_SSEUP_CLASS; ++ classes[2] = X86_64_SSEUP_CLASS; ++ classes[3] = X86_64_SSEUP_CLASS; ++ return 4; ++ case E_V8DFmode: ++ case E_V16SFmode: ++ case E_V8DImode: ++ case E_V16SImode: ++ case E_V32HImode: ++ case E_V64QImode: ++ classes[0] = X86_64_SSE_CLASS; ++ classes[1] = X86_64_SSEUP_CLASS; ++ classes[2] = X86_64_SSEUP_CLASS; ++ classes[3] = X86_64_SSEUP_CLASS; ++ classes[4] = X86_64_SSEUP_CLASS; ++ classes[5] = X86_64_SSEUP_CLASS; ++ classes[6] = X86_64_SSEUP_CLASS; ++ classes[7] = X86_64_SSEUP_CLASS; ++ return 8; ++ case E_V4SFmode: ++ case E_V4SImode: ++ case E_V16QImode: ++ case E_V8HImode: ++ case E_V2DFmode: ++ case E_V2DImode: ++ classes[0] = X86_64_SSE_CLASS; ++ classes[1] = X86_64_SSEUP_CLASS; ++ return 2; ++ case E_V1TImode: ++ case E_V1DImode: ++ case E_V2SFmode: ++ case E_V2SImode: ++ case E_V4HImode: ++ case E_V8QImode: ++ classes[0] = X86_64_SSE_CLASS; ++ return 1; ++ case E_BLKmode: ++ case E_VOIDmode: ++ return 0; ++ default: ++ gcc_assert (VECTOR_MODE_P (mode)); + +- auto_vec candidates; +- for (i = 0; i < pta_size; i++) +- if (strcmp (processor_alias_table[i].name, "generic") +- && strcmp (processor_alias_table[i].name, "intel") +- && (!TARGET_64BIT_P (opts->x_ix86_isa_flags) +- || ((processor_alias_table[i].flags & PTA_64BIT) != 0))) +- candidates.safe_push (processor_alias_table[i].name); ++ if (bytes > 16) ++ return 0; + +-#ifdef HAVE_LOCAL_CPU_DETECT +- /* Add also "native" as possible value. */ +- candidates.safe_push ("native"); +-#endif ++ gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT); + +- char *s; +- const char *hint +- = candidates_list_and_hint (opts->x_ix86_arch_string, s, candidates); +- if (hint) +- inform (input_location, +- main_args_p +- ? G_("valid arguments to %<-march=%> switch are: " +- "%s; did you mean %qs?") +- : G_("valid arguments to % attribute are: " +- "%s; did you mean %qs?"), s, hint); ++ if (bit_offset + GET_MODE_BITSIZE (mode) <= 32) ++ classes[0] = X86_64_INTEGERSI_CLASS; + else +- inform (input_location, +- main_args_p +- ? G_("valid arguments to %<-march=%> switch are: %s") +- : G_("valid arguments to % attribute " +- "are: %s"), s); +- XDELETEVEC (s); ++ classes[0] = X86_64_INTEGER_CLASS; ++ classes[1] = X86_64_INTEGER_CLASS; ++ return 1 + (bytes > 8); + } ++} ++ ++/* Examine the argument and return set number of register required in each ++ class. Return true iff parameter should be passed in memory. */ ++ ++static bool ++examine_argument (machine_mode mode, const_tree type, int in_return, ++ int *int_nregs, int *sse_nregs) ++{ ++ enum x86_64_reg_class regclass[MAX_CLASSES]; ++ int n = classify_argument (mode, type, regclass, 0); + +- ix86_arch_mask = HOST_WIDE_INT_1U << ix86_arch; +- for (i = 0; i < X86_ARCH_LAST; ++i) +- ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask); ++ *int_nregs = 0; ++ *sse_nregs = 0; + +- for (i = 0; i < pta_size; i++) +- if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name)) ++ if (!n) ++ return true; ++ for (n--; n >= 0; n--) ++ switch (regclass[n]) + { +- ix86_schedule = processor_alias_table[i].schedule; +- ix86_tune = processor_alias_table[i].processor; +- if (TARGET_64BIT_P (opts->x_ix86_isa_flags)) +- { +- if (!((processor_alias_table[i].flags & PTA_64BIT) != 0)) +- { +- if (ix86_tune_defaulted) +- { +- opts->x_ix86_tune_string = "x86-64"; +- for (i = 0; i < pta_size; i++) +- if (! strcmp (opts->x_ix86_tune_string, +- processor_alias_table[i].name)) +- break; +- ix86_schedule = processor_alias_table[i].schedule; +- ix86_tune = processor_alias_table[i].processor; +- } +- else +- error ("CPU you selected does not support x86-64 " +- "instruction set"); +- } +- } +- /* Intel CPUs have always interpreted SSE prefetch instructions as +- NOPs; so, we can enable SSE prefetch instructions even when +- -mtune (rather than -march) points us to a processor that has them. +- However, the VIA C3 gives a SIGILL, so we only do that for i686 and +- higher processors. */ +- if (TARGET_CMOV +- && ((processor_alias_table[i].flags +- & (PTA_PREFETCH_SSE | PTA_SSE)) != 0)) +- x86_prefetch_sse = true; ++ case X86_64_INTEGER_CLASS: ++ case X86_64_INTEGERSI_CLASS: ++ (*int_nregs)++; + break; ++ case X86_64_SSE_CLASS: ++ case X86_64_SSESF_CLASS: ++ case X86_64_SSEDF_CLASS: ++ (*sse_nregs)++; ++ break; ++ case X86_64_NO_CLASS: ++ case X86_64_SSEUP_CLASS: ++ break; ++ case X86_64_X87_CLASS: ++ case X86_64_X87UP_CLASS: ++ case X86_64_COMPLEX_X87_CLASS: ++ if (!in_return) ++ return true; ++ break; ++ case X86_64_MEMORY_CLASS: ++ gcc_unreachable (); + } + +- if (ix86_tune_specified && i == pta_size) +- { +- error (main_args_p +- ? G_("bad value (%qs) for %<-mtune=%> switch") +- : G_("bad value (%qs) for % attribute"), +- opts->x_ix86_tune_string); +- +- auto_vec candidates; +- for (i = 0; i < pta_size; i++) +- if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) +- || ((processor_alias_table[i].flags & PTA_64BIT) != 0)) +- candidates.safe_push (processor_alias_table[i].name); +- +-#ifdef HAVE_LOCAL_CPU_DETECT +- /* Add also "native" as possible value. */ +- candidates.safe_push ("native"); +-#endif ++ return false; ++} + +- char *s; +- const char *hint +- = candidates_list_and_hint (opts->x_ix86_tune_string, s, candidates); +- if (hint) +- inform (input_location, +- main_args_p +- ? G_("valid arguments to %<-mtune=%> switch are: " +- "%s; did you mean %qs?") +- : G_("valid arguments to % attribute are: " +- "%s; did you mean %qs?"), s, hint); +- else +- inform (input_location, +- main_args_p +- ? G_("valid arguments to %<-mtune=%> switch are: %s") +- : G_("valid arguments to % attribute " +- "are: %s"), s); +- XDELETEVEC (s); +- } ++/* Construct container for the argument used by GCC interface. See ++ FUNCTION_ARG for the detailed description. */ + +- set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes); ++static rtx ++construct_container (machine_mode mode, machine_mode orig_mode, ++ const_tree type, int in_return, int nintregs, int nsseregs, ++ const int *intreg, int sse_regno) ++{ ++ /* The following variables hold the static issued_error state. */ ++ static bool issued_sse_arg_error; ++ static bool issued_sse_ret_error; ++ static bool issued_x87_ret_error; + +-#ifndef USE_IX86_FRAME_POINTER +-#define USE_IX86_FRAME_POINTER 0 +-#endif ++ machine_mode tmpmode; ++ int bytes ++ = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); ++ enum x86_64_reg_class regclass[MAX_CLASSES]; ++ int n; ++ int i; ++ int nexps = 0; ++ int needed_sseregs, needed_intregs; ++ rtx exp[MAX_CLASSES]; ++ rtx ret; + +-#ifndef USE_X86_64_FRAME_POINTER +-#define USE_X86_64_FRAME_POINTER 0 +-#endif ++ n = classify_argument (mode, type, regclass, 0); ++ if (!n) ++ return NULL; ++ if (examine_argument (mode, type, in_return, &needed_intregs, ++ &needed_sseregs)) ++ return NULL; ++ if (needed_intregs > nintregs || needed_sseregs > nsseregs) ++ return NULL; + +- /* Set the default values for switches whose default depends on TARGET_64BIT +- in case they weren't overwritten by command line options. */ +- if (TARGET_64BIT_P (opts->x_ix86_isa_flags)) +- { +- if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer) +- opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER; +- if (opts->x_flag_asynchronous_unwind_tables +- && !opts_set->x_flag_unwind_tables +- && TARGET_64BIT_MS_ABI) +- opts->x_flag_unwind_tables = 1; +- if (opts->x_flag_asynchronous_unwind_tables == 2) +- opts->x_flag_unwind_tables +- = opts->x_flag_asynchronous_unwind_tables = 1; +- if (opts->x_flag_pcc_struct_return == 2) +- opts->x_flag_pcc_struct_return = 0; +- } +- else ++ /* We allowed the user to turn off SSE for kernel mode. Don't crash if ++ some less clueful developer tries to use floating-point anyway. */ ++ if (needed_sseregs && !TARGET_SSE) + { +- if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer) +- opts->x_flag_omit_frame_pointer +- = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size); +- if (opts->x_flag_asynchronous_unwind_tables == 2) +- opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER; +- if (opts->x_flag_pcc_struct_return == 2) +- { +- /* Intel MCU psABI specifies that -freg-struct-return should +- be on. Instead of setting DEFAULT_PCC_STRUCT_RETURN to 1, +- we check -miamcu so that -freg-struct-return is always +- turned on if -miamcu is used. */ +- if (TARGET_IAMCU_P (opts->x_target_flags)) +- opts->x_flag_pcc_struct_return = 0; +- else +- opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN; ++ if (in_return) ++ { ++ if (!issued_sse_ret_error) ++ { ++ error ("SSE register return with SSE disabled"); ++ issued_sse_ret_error = true; ++ } + } +- } +- +- ix86_tune_cost = processor_cost_table[ix86_tune]; +- /* TODO: ix86_cost should be chosen at instruction or function granuality +- so for cold code we use size_cost even in !optimize_size compilation. */ +- if (opts->x_optimize_size) +- ix86_cost = &ix86_size_cost; +- else +- ix86_cost = ix86_tune_cost; +- +- /* Arrange to set up i386_stack_locals for all functions. */ +- init_machine_status = ix86_init_machine_status; +- +- /* Validate -mregparm= value. */ +- if (opts_set->x_ix86_regparm) +- { +- if (TARGET_64BIT_P (opts->x_ix86_isa_flags)) +- warning (0, "%<-mregparm%> is ignored in 64-bit mode"); +- else if (TARGET_IAMCU_P (opts->x_target_flags)) +- warning (0, "%<-mregparm%> is ignored for Intel MCU psABI"); +- if (opts->x_ix86_regparm > REGPARM_MAX) ++ else if (!issued_sse_arg_error) + { +- error ("%<-mregparm=%d%> is not between 0 and %d", +- opts->x_ix86_regparm, REGPARM_MAX); +- opts->x_ix86_regparm = 0; ++ error ("SSE register argument with SSE disabled"); ++ issued_sse_arg_error = true; + } ++ return NULL; + } +- if (TARGET_IAMCU_P (opts->x_target_flags) +- || TARGET_64BIT_P (opts->x_ix86_isa_flags)) +- opts->x_ix86_regparm = REGPARM_MAX; +- +- /* Default align_* from the processor table. */ +- ix86_default_align (opts); +- +- /* Provide default for -mbranch-cost= value. */ +- if (!opts_set->x_ix86_branch_cost) +- opts->x_ix86_branch_cost = ix86_tune_cost->branch_cost; + +- if (TARGET_64BIT_P (opts->x_ix86_isa_flags)) +- { +- opts->x_target_flags +- |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags; +- +- if (!ix86_arch_specified) +- opts->x_ix86_isa_flags +- |= TARGET_SUBTARGET64_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit; +- +- if (TARGET_RTD_P (opts->x_target_flags)) +- warning (0, +- main_args_p +- ? G_("%<-mrtd%> is ignored in 64bit mode") +- : G_("% is ignored in 64bit mode")); +- } +- else +- { +- opts->x_target_flags +- |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags; +- +- if (!ix86_arch_specified) +- opts->x_ix86_isa_flags +- |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit; +- +- /* i386 ABI does not specify red zone. It still makes sense to use it +- when programmer takes care to stack from being destroyed. */ +- if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE)) +- opts->x_target_flags |= MASK_NO_RED_ZONE; +- } +- +- /* Keep nonleaf frame pointers. */ +- if (opts->x_flag_omit_frame_pointer) +- opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER; +- else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags)) +- opts->x_flag_omit_frame_pointer = 1; +- +- /* If we're doing fast math, we don't care about comparison order +- wrt NaNs. This lets us use a shorter comparison sequence. */ +- if (opts->x_flag_finite_math_only) +- opts->x_target_flags &= ~MASK_IEEE_FP; +- +- /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387, +- since the insns won't need emulation. */ +- if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387]) +- opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387; +- +- /* Likewise, if the target doesn't have a 387, or we've specified +- software floating point, don't use 387 inline intrinsics. */ +- if (!TARGET_80387_P (opts->x_target_flags)) +- opts->x_target_flags |= MASK_NO_FANCY_MATH_387; +- +- /* Turn on MMX builtins for -msse. */ +- if (TARGET_SSE_P (opts->x_ix86_isa_flags)) +- opts->x_ix86_isa_flags +- |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit; +- +- /* Enable SSE prefetch. */ +- if (TARGET_SSE_P (opts->x_ix86_isa_flags) +- || (TARGET_PRFCHW_P (opts->x_ix86_isa_flags) +- && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)) +- || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags)) +- x86_prefetch_sse = true; +- +- /* Enable popcnt instruction for -msse4.2 or -mabm. */ +- if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags) +- || TARGET_ABM_P (opts->x_ix86_isa_flags)) +- opts->x_ix86_isa_flags +- |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit; +- +- /* Enable lzcnt instruction for -mabm. */ +- if (TARGET_ABM_P(opts->x_ix86_isa_flags)) +- opts->x_ix86_isa_flags +- |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit; +- +- /* Disable BMI, BMI2 and TBM instructions for -m16. */ +- if (TARGET_16BIT_P(opts->x_ix86_isa_flags)) +- opts->x_ix86_isa_flags +- &= ~((OPTION_MASK_ISA_BMI | OPTION_MASK_ISA_BMI2 | OPTION_MASK_ISA_TBM) +- & ~opts->x_ix86_isa_flags_explicit); +- +- /* Validate -mpreferred-stack-boundary= value or default it to +- PREFERRED_STACK_BOUNDARY_DEFAULT. */ +- ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT; +- if (opts_set->x_ix86_preferred_stack_boundary_arg) +- { +- int min = TARGET_64BIT_P (opts->x_ix86_isa_flags)? 3 : 2; +- int max = TARGET_SEH ? 4 : 12; +- +- if (opts->x_ix86_preferred_stack_boundary_arg < min +- || opts->x_ix86_preferred_stack_boundary_arg > max) +- { +- if (min == max) +- error ("%<-mpreferred-stack-boundary%> is not supported " +- "for this target"); +- else +- error ("%<-mpreferred-stack-boundary=%d%> is not between %d and %d", +- opts->x_ix86_preferred_stack_boundary_arg, min, max); +- } +- else +- ix86_preferred_stack_boundary +- = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT; +- } +- +- /* Set the default value for -mstackrealign. */ +- if (!opts_set->x_ix86_force_align_arg_pointer) +- opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT; +- +- ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY; +- +- /* Validate -mincoming-stack-boundary= value or default it to +- MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */ +- ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary; +- if (opts_set->x_ix86_incoming_stack_boundary_arg) +- { +- int min = TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 3 : 2; +- +- if (opts->x_ix86_incoming_stack_boundary_arg < min +- || opts->x_ix86_incoming_stack_boundary_arg > 12) +- error ("%<-mincoming-stack-boundary=%d%> is not between %d and 12", +- opts->x_ix86_incoming_stack_boundary_arg, min); +- else +- { +- ix86_user_incoming_stack_boundary +- = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT; +- ix86_incoming_stack_boundary +- = ix86_user_incoming_stack_boundary; +- } +- } +- +-#ifndef NO_PROFILE_COUNTERS +- if (flag_nop_mcount) +- error ("%<-mnop-mcount%> is not compatible with this target"); +-#endif +- if (flag_nop_mcount && flag_pic) +- error ("%<-mnop-mcount%> is not implemented for %<-fPIC%>"); +- +- /* Accept -msseregparm only if at least SSE support is enabled. */ +- if (TARGET_SSEREGPARM_P (opts->x_target_flags) +- && ! TARGET_SSE_P (opts->x_ix86_isa_flags)) +- error (main_args_p +- ? G_("%<-msseregparm%> used without SSE enabled") +- : G_("% used without SSE enabled")); +- +- if (opts_set->x_ix86_fpmath) +- { +- if (opts->x_ix86_fpmath & FPMATH_SSE) ++ /* Likewise, error if the ABI requires us to return values in the ++ x87 registers and the user specified -mno-80387. */ ++ if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return) ++ for (i = 0; i < n; i++) ++ if (regclass[i] == X86_64_X87_CLASS ++ || regclass[i] == X86_64_X87UP_CLASS ++ || regclass[i] == X86_64_COMPLEX_X87_CLASS) + { +- if (!TARGET_SSE_P (opts->x_ix86_isa_flags)) +- { +- if (TARGET_80387_P (opts->x_target_flags)) +- { +- warning (0, "SSE instruction set disabled, using 387 arithmetics"); +- opts->x_ix86_fpmath = FPMATH_387; +- } +- } +- else if ((opts->x_ix86_fpmath & FPMATH_387) +- && !TARGET_80387_P (opts->x_target_flags)) ++ if (!issued_x87_ret_error) + { +- warning (0, "387 instruction set disabled, using SSE arithmetics"); +- opts->x_ix86_fpmath = FPMATH_SSE; ++ error ("x87 register return with x87 disabled"); ++ issued_x87_ret_error = true; + } ++ return NULL; + } +- } +- /* For all chips supporting SSE2, -mfpmath=sse performs better than +- fpmath=387. The second is however default at many targets since the +- extra 80bit precision of temporaries is considered to be part of ABI. +- Overwrite the default at least for -ffast-math. +- TODO: -mfpmath=both seems to produce same performing code with bit +- smaller binaries. It is however not clear if register allocation is +- ready for this setting. +- Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE +- codegen. We may switch to 387 with -ffast-math for size optimized +- functions. */ +- else if (fast_math_flags_set_p (&global_options) +- && TARGET_SSE2_P (opts->x_ix86_isa_flags)) +- opts->x_ix86_fpmath = FPMATH_SSE; +- else +- opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags); + +- /* Use external vectorized library in vectorizing intrinsics. */ +- if (opts_set->x_ix86_veclibabi_type) +- switch (opts->x_ix86_veclibabi_type) ++ /* First construct simple cases. Avoid SCmode, since we want to use ++ single register to pass this type. */ ++ if (n == 1 && mode != SCmode) ++ switch (regclass[0]) + { +- case ix86_veclibabi_type_svml: +- ix86_veclib_handler = ix86_veclibabi_svml; +- break; +- +- case ix86_veclibabi_type_acml: +- ix86_veclib_handler = ix86_veclibabi_acml; ++ case X86_64_INTEGER_CLASS: ++ case X86_64_INTEGERSI_CLASS: ++ return gen_rtx_REG (mode, intreg[0]); ++ case X86_64_SSE_CLASS: ++ case X86_64_SSESF_CLASS: ++ case X86_64_SSEDF_CLASS: ++ if (mode != BLKmode) ++ return gen_reg_or_parallel (mode, orig_mode, ++ GET_SSE_REGNO (sse_regno)); + break; +- ++ case X86_64_X87_CLASS: ++ case X86_64_COMPLEX_X87_CLASS: ++ return gen_rtx_REG (mode, FIRST_STACK_REG); ++ case X86_64_NO_CLASS: ++ /* Zero sized array, struct or class. */ ++ return NULL; + default: + gcc_unreachable (); + } ++ if (n == 2 ++ && regclass[0] == X86_64_SSE_CLASS ++ && regclass[1] == X86_64_SSEUP_CLASS ++ && mode != BLKmode) ++ return gen_reg_or_parallel (mode, orig_mode, ++ GET_SSE_REGNO (sse_regno)); ++ if (n == 4 ++ && regclass[0] == X86_64_SSE_CLASS ++ && regclass[1] == X86_64_SSEUP_CLASS ++ && regclass[2] == X86_64_SSEUP_CLASS ++ && regclass[3] == X86_64_SSEUP_CLASS ++ && mode != BLKmode) ++ return gen_reg_or_parallel (mode, orig_mode, ++ GET_SSE_REGNO (sse_regno)); ++ if (n == 8 ++ && regclass[0] == X86_64_SSE_CLASS ++ && regclass[1] == X86_64_SSEUP_CLASS ++ && regclass[2] == X86_64_SSEUP_CLASS ++ && regclass[3] == X86_64_SSEUP_CLASS ++ && regclass[4] == X86_64_SSEUP_CLASS ++ && regclass[5] == X86_64_SSEUP_CLASS ++ && regclass[6] == X86_64_SSEUP_CLASS ++ && regclass[7] == X86_64_SSEUP_CLASS ++ && mode != BLKmode) ++ return gen_reg_or_parallel (mode, orig_mode, ++ GET_SSE_REGNO (sse_regno)); ++ if (n == 2 ++ && regclass[0] == X86_64_X87_CLASS ++ && regclass[1] == X86_64_X87UP_CLASS) ++ return gen_rtx_REG (XFmode, FIRST_STACK_REG); + +- if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS] +- && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)) +- opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; +- +- /* If stack probes are required, the space used for large function +- arguments on the stack must also be probed, so enable +- -maccumulate-outgoing-args so this happens in the prologue. */ +- if (TARGET_STACK_PROBE_P (opts->x_target_flags) +- && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)) +- { +- if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS) +- warning (0, +- main_args_p +- ? G_("stack probing requires %<-maccumulate-outgoing-args%> " +- "for correctness") +- : G_("stack probing requires " +- "% for " +- "correctness")); +- opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; +- } +- +- /* Stack realignment without -maccumulate-outgoing-args requires %ebp, +- so enable -maccumulate-outgoing-args when %ebp is fixed. */ +- if (fixed_regs[BP_REG] +- && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)) +- { +- if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS) +- warning (0, +- main_args_p +- ? G_("fixed ebp register requires " +- "%<-maccumulate-outgoing-args%>") +- : G_("fixed ebp register requires " +- "%")); +- opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; +- } +- +- /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */ +- { +- char *p; +- ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0); +- p = strchr (internal_label_prefix, 'X'); +- internal_label_prefix_len = p - internal_label_prefix; +- *p = '\0'; +- } +- +- /* When scheduling description is not available, disable scheduler pass +- so it won't slow down the compilation and make x87 code slower. */ +- if (!TARGET_SCHEDULE) +- opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0; +- +- maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, +- ix86_tune_cost->simultaneous_prefetches, +- opts->x_param_values, +- opts_set->x_param_values); +- maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, +- ix86_tune_cost->prefetch_block, +- opts->x_param_values, +- opts_set->x_param_values); +- maybe_set_param_value (PARAM_L1_CACHE_SIZE, +- ix86_tune_cost->l1_cache_size, +- opts->x_param_values, +- opts_set->x_param_values); +- maybe_set_param_value (PARAM_L2_CACHE_SIZE, +- ix86_tune_cost->l2_cache_size, +- opts->x_param_values, +- opts_set->x_param_values); +- +- /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */ +- if (opts->x_flag_prefetch_loop_arrays < 0 +- && HAVE_prefetch +- && (opts->x_optimize >= 3 || opts->x_flag_profile_use) +- && !opts->x_optimize_size +- && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL) +- opts->x_flag_prefetch_loop_arrays = 1; +- +- /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0) +- can be opts->x_optimized to ap = __builtin_next_arg (0). */ +- if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack) +- targetm.expand_builtin_va_start = NULL; +- +- if (TARGET_64BIT_P (opts->x_ix86_isa_flags)) +- { +- ix86_gen_leave = gen_leave_rex64; +- if (Pmode == DImode) +- { +- ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di; +- ix86_gen_tls_local_dynamic_base_64 +- = gen_tls_local_dynamic_base_64_di; +- } +- else ++ if (n == 2 ++ && regclass[0] == X86_64_INTEGER_CLASS ++ && regclass[1] == X86_64_INTEGER_CLASS ++ && (mode == CDImode || mode == TImode || mode == BLKmode) ++ && intreg[0] + 1 == intreg[1]) ++ { ++ if (mode == BLKmode) + { +- ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si; +- ix86_gen_tls_local_dynamic_base_64 +- = gen_tls_local_dynamic_base_64_si; ++ /* Use TImode for BLKmode values in 2 integer registers. */ ++ exp[0] = gen_rtx_EXPR_LIST (VOIDmode, ++ gen_rtx_REG (TImode, intreg[0]), ++ GEN_INT (0)); ++ ret = gen_rtx_PARALLEL (mode, rtvec_alloc (1)); ++ XVECEXP (ret, 0, 0) = exp[0]; ++ return ret; + } ++ else ++ return gen_rtx_REG (mode, intreg[0]); + } +- else +- ix86_gen_leave = gen_leave; +- +- if (Pmode == DImode) +- { +- ix86_gen_add3 = gen_adddi3; +- ix86_gen_sub3 = gen_subdi3; +- ix86_gen_sub3_carry = gen_subdi3_carry; +- ix86_gen_one_cmpl2 = gen_one_cmpldi2; +- ix86_gen_andsp = gen_anddi3; +- ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di; +- ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi; +- ix86_gen_probe_stack_range = gen_probe_stack_rangedi; +- ix86_gen_monitor = gen_sse3_monitor_di; +- ix86_gen_monitorx = gen_monitorx_di; +- ix86_gen_clzero = gen_clzero_di; +- } +- else +- { +- ix86_gen_add3 = gen_addsi3; +- ix86_gen_sub3 = gen_subsi3; +- ix86_gen_sub3_carry = gen_subsi3_carry; +- ix86_gen_one_cmpl2 = gen_one_cmplsi2; +- ix86_gen_andsp = gen_andsi3; +- ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si; +- ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi; +- ix86_gen_probe_stack_range = gen_probe_stack_rangesi; +- ix86_gen_monitor = gen_sse3_monitor_si; +- ix86_gen_monitorx = gen_monitorx_si; +- ix86_gen_clzero = gen_clzero_si; +- } +- +-#ifdef USE_IX86_CLD +- /* Use -mcld by default for 32-bit code if configured with --enable-cld. */ +- if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)) +- opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags; +-#endif + +- /* Set the default value for -mfentry. */ +- if (!opts_set->x_flag_fentry) +- opts->x_flag_fentry = TARGET_SEH; +- else ++ /* Otherwise figure out the entries of the PARALLEL. */ ++ for (i = 0; i < n; i++) + { +- if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic +- && opts->x_flag_fentry) +- sorry ("%<-mfentry%> isn%'t supported for 32-bit in combination " +- "with %<-fpic%>"); +- else if (TARGET_SEH && !opts->x_flag_fentry) +- sorry ("%<-mno-fentry%> isn%'t compatible with SEH"); +- } +- +- if (TARGET_SEH && TARGET_CALL_MS2SYSV_XLOGUES) +- sorry ("%<-mcall-ms2sysv-xlogues%> isn%'t currently supported with SEH"); +- +- if (!(opts_set->x_target_flags & MASK_VZEROUPPER) +- && TARGET_EMIT_VZEROUPPER) +- opts->x_target_flags |= MASK_VZEROUPPER; +- if (!(opts_set->x_target_flags & MASK_STV)) +- opts->x_target_flags |= MASK_STV; +- /* Disable STV if -mpreferred-stack-boundary={2,3} or +- -mincoming-stack-boundary={2,3} or -mstackrealign - the needed +- stack realignment will be extra cost the pass doesn't take into +- account and the pass can't realign the stack. */ +- if (ix86_preferred_stack_boundary < 128 +- || ix86_incoming_stack_boundary < 128 +- || opts->x_ix86_force_align_arg_pointer) +- opts->x_target_flags &= ~MASK_STV; +- if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL] +- && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD)) +- opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD; +- if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL] +- && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE)) +- opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE; +- +- /* Enable 128-bit AVX instruction generation +- for the auto-vectorizer. */ +- if (TARGET_AVX128_OPTIMAL +- && (opts_set->x_prefer_vector_width_type == PVW_NONE)) +- opts->x_prefer_vector_width_type = PVW_AVX128; +- +- /* Use 256-bit AVX instruction generation +- in the auto-vectorizer. */ +- if (ix86_tune_features[X86_TUNE_AVX256_OPTIMAL] +- && (opts_set->x_prefer_vector_width_type == PVW_NONE)) +- opts->x_prefer_vector_width_type = PVW_AVX256; +- +- if (opts->x_ix86_recip_name) +- { +- char *p = ASTRDUP (opts->x_ix86_recip_name); +- char *q; +- unsigned int mask, i; +- bool invert; +- +- while ((q = strtok (p, ",")) != NULL) +- { +- p = NULL; +- if (*q == '!') +- { +- invert = true; +- q++; +- } +- else +- invert = false; ++ int pos; + +- if (!strcmp (q, "default")) +- mask = RECIP_MASK_ALL; +- else +- { +- for (i = 0; i < ARRAY_SIZE (recip_options); i++) +- if (!strcmp (q, recip_options[i].string)) ++ switch (regclass[i]) ++ { ++ case X86_64_NO_CLASS: ++ break; ++ case X86_64_INTEGER_CLASS: ++ case X86_64_INTEGERSI_CLASS: ++ /* Merge TImodes on aligned occasions here too. */ ++ if (i * 8 + 8 > bytes) ++ { ++ unsigned int tmpbits = (bytes - i * 8) * BITS_PER_UNIT; ++ if (!int_mode_for_size (tmpbits, 0).exists (&tmpmode)) ++ /* We've requested 24 bytes we ++ don't have mode for. Use DImode. */ ++ tmpmode = DImode; ++ } ++ else if (regclass[i] == X86_64_INTEGERSI_CLASS) ++ tmpmode = SImode; ++ else ++ tmpmode = DImode; ++ exp [nexps++] ++ = gen_rtx_EXPR_LIST (VOIDmode, ++ gen_rtx_REG (tmpmode, *intreg), ++ GEN_INT (i*8)); ++ intreg++; ++ break; ++ case X86_64_SSESF_CLASS: ++ exp [nexps++] ++ = gen_rtx_EXPR_LIST (VOIDmode, ++ gen_rtx_REG (SFmode, ++ GET_SSE_REGNO (sse_regno)), ++ GEN_INT (i*8)); ++ sse_regno++; ++ break; ++ case X86_64_SSEDF_CLASS: ++ exp [nexps++] ++ = gen_rtx_EXPR_LIST (VOIDmode, ++ gen_rtx_REG (DFmode, ++ GET_SSE_REGNO (sse_regno)), ++ GEN_INT (i*8)); ++ sse_regno++; ++ break; ++ case X86_64_SSE_CLASS: ++ pos = i; ++ switch (n) ++ { ++ case 1: ++ tmpmode = DImode; ++ break; ++ case 2: ++ if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS) + { +- mask = recip_options[i].mask; +- break; ++ tmpmode = TImode; ++ i++; + } +- +- if (i == ARRAY_SIZE (recip_options)) +- { +- error ("unknown option for %<-mrecip=%s%>", q); +- invert = false; +- mask = RECIP_MASK_NONE; +- } +- } +- +- opts->x_recip_mask_explicit |= mask; +- if (invert) +- opts->x_recip_mask &= ~mask; +- else +- opts->x_recip_mask |= mask; ++ else ++ tmpmode = DImode; ++ break; ++ case 4: ++ gcc_assert (i == 0 ++ && regclass[1] == X86_64_SSEUP_CLASS ++ && regclass[2] == X86_64_SSEUP_CLASS ++ && regclass[3] == X86_64_SSEUP_CLASS); ++ tmpmode = OImode; ++ i += 3; ++ break; ++ case 8: ++ gcc_assert (i == 0 ++ && regclass[1] == X86_64_SSEUP_CLASS ++ && regclass[2] == X86_64_SSEUP_CLASS ++ && regclass[3] == X86_64_SSEUP_CLASS ++ && regclass[4] == X86_64_SSEUP_CLASS ++ && regclass[5] == X86_64_SSEUP_CLASS ++ && regclass[6] == X86_64_SSEUP_CLASS ++ && regclass[7] == X86_64_SSEUP_CLASS); ++ tmpmode = XImode; ++ i += 7; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ exp [nexps++] ++ = gen_rtx_EXPR_LIST (VOIDmode, ++ gen_rtx_REG (tmpmode, ++ GET_SSE_REGNO (sse_regno)), ++ GEN_INT (pos*8)); ++ sse_regno++; ++ break; ++ default: ++ gcc_unreachable (); + } + } + +- if (TARGET_RECIP_P (opts->x_target_flags)) +- opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit; +- else if (opts_set->x_target_flags & MASK_RECIP) +- opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit); ++ /* Empty aligned struct, union or class. */ ++ if (nexps == 0) ++ return NULL; ++ ++ ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps)); ++ for (i = 0; i < nexps; i++) ++ XVECEXP (ret, 0, i) = exp [i]; ++ return ret; ++} ++ ++/* Update the data in CUM to advance over an argument of mode MODE ++ and data type TYPE. (TYPE is null for libcalls where that information ++ may not be available.) + +- /* Default long double to 64-bit for 32-bit Bionic and to __float128 +- for 64-bit Bionic. Also default long double to 64-bit for Intel +- MCU psABI. */ +- if ((TARGET_HAS_BIONIC || TARGET_IAMCU) +- && !(opts_set->x_target_flags +- & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128))) +- opts->x_target_flags |= (TARGET_64BIT +- ? MASK_LONG_DOUBLE_128 +- : MASK_LONG_DOUBLE_64); ++ Return a number of integer regsiters advanced over. */ + +- /* Only one of them can be active. */ +- gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0 +- || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0); ++static int ++function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode, ++ const_tree type, HOST_WIDE_INT bytes, ++ HOST_WIDE_INT words) ++{ ++ int res = 0; ++ bool error_p = false; + +- /* Handle stack protector */ +- if (!opts_set->x_ix86_stack_protector_guard) ++ if (TARGET_IAMCU) + { +-#ifdef TARGET_THREAD_SSP_OFFSET +- if (!TARGET_HAS_BIONIC) +- opts->x_ix86_stack_protector_guard = SSP_TLS; +- else +-#endif +- opts->x_ix86_stack_protector_guard = SSP_GLOBAL; ++ /* Intel MCU psABI passes scalars and aggregates no larger than 8 ++ bytes in registers. */ ++ if (!VECTOR_MODE_P (mode) && bytes <= 8) ++ goto pass_in_reg; ++ return res; + } + +- if (opts_set->x_ix86_stack_protector_guard_offset_str) ++ switch (mode) + { +- char *endp; +- const char *str = opts->x_ix86_stack_protector_guard_offset_str; +- +- errno = 0; +- int64_t offset; +- +-#if defined(INT64_T_IS_LONG) +- offset = strtol (str, &endp, 0); +-#else +- offset = strtoll (str, &endp, 0); +-#endif +- +- if (!*str || *endp || errno) +- error ("%qs is not a valid number " +- "in %<-mstack-protector-guard-offset=%>", str); +- +- if (!IN_RANGE (offset, HOST_WIDE_INT_C (-0x80000000), +- HOST_WIDE_INT_C (0x7fffffff))) +- error ("%qs is not a valid offset " +- "in %<-mstack-protector-guard-offset=%>", str); +- +- opts->x_ix86_stack_protector_guard_offset = offset; +- } +-#ifdef TARGET_THREAD_SSP_OFFSET +- else +- opts->x_ix86_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET; +-#endif +- +- if (opts_set->x_ix86_stack_protector_guard_reg_str) +- { +- const char *str = opts->x_ix86_stack_protector_guard_reg_str; +- addr_space_t seg = ADDR_SPACE_GENERIC; ++ default: ++ break; + +- /* Discard optional register prefix. */ +- if (str[0] == '%') +- str++; ++ case E_BLKmode: ++ if (bytes < 0) ++ break; ++ /* FALLTHRU */ + +- if (strlen (str) == 2 && str[1] == 's') ++ case E_DImode: ++ case E_SImode: ++ case E_HImode: ++ case E_QImode: ++pass_in_reg: ++ cum->words += words; ++ cum->nregs -= words; ++ cum->regno += words; ++ if (cum->nregs >= 0) ++ res = words; ++ if (cum->nregs <= 0) + { +- if (str[0] == 'f') +- seg = ADDR_SPACE_SEG_FS; +- else if (str[0] == 'g') +- seg = ADDR_SPACE_SEG_GS; ++ cum->nregs = 0; ++ cfun->machine->arg_reg_available = false; ++ cum->regno = 0; + } ++ break; + +- if (seg == ADDR_SPACE_GENERIC) +- error ("%qs is not a valid base register " +- "in %<-mstack-protector-guard-reg=%>", +- opts->x_ix86_stack_protector_guard_reg_str); ++ case E_OImode: ++ /* OImode shouldn't be used directly. */ ++ gcc_unreachable (); + +- opts->x_ix86_stack_protector_guard_reg = seg; +- } +- else +- { +- opts->x_ix86_stack_protector_guard_reg = DEFAULT_TLS_SEG_REG; ++ case E_DFmode: ++ if (cum->float_in_sse == -1) ++ error_p = true; ++ if (cum->float_in_sse < 2) ++ break; ++ /* FALLTHRU */ ++ case E_SFmode: ++ if (cum->float_in_sse == -1) ++ error_p = true; ++ if (cum->float_in_sse < 1) ++ break; ++ /* FALLTHRU */ + +- /* The kernel uses a different segment register for performance +- reasons; a system call would not have to trash the userspace +- segment register, which would be expensive. */ +- if (opts->x_ix86_cmodel == CM_KERNEL) +- opts->x_ix86_stack_protector_guard_reg = ADDR_SPACE_SEG_GS; +- } ++ case E_V8SFmode: ++ case E_V8SImode: ++ case E_V64QImode: ++ case E_V32HImode: ++ case E_V16SImode: ++ case E_V8DImode: ++ case E_V16SFmode: ++ case E_V8DFmode: ++ case E_V32QImode: ++ case E_V16HImode: ++ case E_V4DFmode: ++ case E_V4DImode: ++ case E_TImode: ++ case E_V16QImode: ++ case E_V8HImode: ++ case E_V4SImode: ++ case E_V2DImode: ++ case E_V4SFmode: ++ case E_V2DFmode: ++ if (!type || !AGGREGATE_TYPE_P (type)) ++ { ++ cum->sse_words += words; ++ cum->sse_nregs -= 1; ++ cum->sse_regno += 1; ++ if (cum->sse_nregs <= 0) ++ { ++ cum->sse_nregs = 0; ++ cum->sse_regno = 0; ++ } ++ } ++ break; + +- /* Handle -mmemcpy-strategy= and -mmemset-strategy= */ +- if (opts->x_ix86_tune_memcpy_strategy) +- { +- char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy); +- ix86_parse_stringop_strategy_string (str, false); +- free (str); ++ case E_V8QImode: ++ case E_V4HImode: ++ case E_V2SImode: ++ case E_V2SFmode: ++ case E_V1TImode: ++ case E_V1DImode: ++ if (!type || !AGGREGATE_TYPE_P (type)) ++ { ++ cum->mmx_words += words; ++ cum->mmx_nregs -= 1; ++ cum->mmx_regno += 1; ++ if (cum->mmx_nregs <= 0) ++ { ++ cum->mmx_nregs = 0; ++ cum->mmx_regno = 0; ++ } ++ } ++ break; + } +- +- if (opts->x_ix86_tune_memset_strategy) ++ if (error_p) + { +- char *str = xstrdup (opts->x_ix86_tune_memset_strategy); +- ix86_parse_stringop_strategy_string (str, true); +- free (str); ++ cum->float_in_sse = 0; ++ error ("calling %qD with SSE calling convention without " ++ "SSE/SSE2 enabled", cum->decl); ++ sorry ("this is a GCC bug that can be worked around by adding " ++ "attribute used to function called"); + } + +- /* Save the initial options in case the user does function specific +- options. */ +- if (main_args_p) +- target_option_default_node = target_option_current_node +- = build_target_option_node (opts); +- +- if (opts->x_flag_cf_protection != CF_NONE) +- opts->x_flag_cf_protection +- = (cf_protection_level) (opts->x_flag_cf_protection | CF_SET); +- +- if (ix86_tune_features [X86_TUNE_AVOID_256FMA_CHAINS]) +- maybe_set_param_value (PARAM_AVOID_FMA_MAX_BITS, 256, +- opts->x_param_values, +- opts_set->x_param_values); +- else if (ix86_tune_features [X86_TUNE_AVOID_128FMA_CHAINS]) +- maybe_set_param_value (PARAM_AVOID_FMA_MAX_BITS, 128, +- opts->x_param_values, +- opts_set->x_param_values); +- +- /* PR86952: jump table usage with retpolines is slow. +- The PR provides some numbers about the slowness. */ +- if (ix86_indirect_branch != indirect_branch_keep +- && !opts_set->x_flag_jump_tables) +- opts->x_flag_jump_tables = 0; +- +- return true; +-} +- +-/* Implement the TARGET_OPTION_OVERRIDE hook. */ +- +-static void +-ix86_option_override (void) +-{ +- ix86_option_override_internal (true, &global_options, &global_options_set); +-} +- +-/* Implement the TARGET_OFFLOAD_OPTIONS hook. */ +-static char * +-ix86_offload_options (void) +-{ +- if (TARGET_LP64) +- return xstrdup ("-foffload-abi=lp64"); +- return xstrdup ("-foffload-abi=ilp32"); ++ return res; + } + +-/* Update register usage after having seen the compiler flags. */ +- +-static void +-ix86_conditional_register_usage (void) ++static int ++function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode, ++ const_tree type, HOST_WIDE_INT words, bool named) + { +- int i, c_mask; ++ int int_nregs, sse_nregs; + +- /* If there are no caller-saved registers, preserve all registers. +- except fixed_regs and registers used for function return value +- since aggregate_value_p checks call_used_regs[regno] on return +- value. */ +- if (cfun && cfun->machine->no_caller_saved_registers) +- for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) +- if (!fixed_regs[i] && !ix86_function_value_regno_p (i)) +- call_used_regs[i] = 0; ++ /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */ ++ if (!named && (VALID_AVX512F_REG_MODE (mode) ++ || VALID_AVX256_REG_MODE (mode))) ++ return 0; + +- /* For 32-bit targets, squash the REX registers. */ +- if (! TARGET_64BIT) ++ if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs) ++ && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs) + { +- for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++) +- fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; +- for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++) +- fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; +- for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++) +- fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; ++ cum->nregs -= int_nregs; ++ cum->sse_nregs -= sse_nregs; ++ cum->regno += int_nregs; ++ cum->sse_regno += sse_nregs; ++ return int_nregs; + } +- +- /* See the definition of CALL_USED_REGISTERS in i386.h. */ +- c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI); +- +- CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]); +- +- for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) ++ else + { +- /* Set/reset conditionally defined registers from +- CALL_USED_REGISTERS initializer. */ +- if (call_used_regs[i] > 1) +- call_used_regs[i] = !!(call_used_regs[i] & c_mask); +- +- /* Calculate registers of CLOBBERED_REGS register set +- as call used registers from GENERAL_REGS register set. */ +- if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i) +- && call_used_regs[i]) +- SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i); ++ int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD; ++ cum->words = ROUND_UP (cum->words, align); ++ cum->words += words; ++ return 0; + } ++} + +- /* If MMX is disabled, squash the registers. */ +- if (! TARGET_MMX) +- for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) +- if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i)) +- fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; +- +- /* If SSE is disabled, squash the registers. */ +- if (! TARGET_SSE) +- for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) +- if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i)) +- fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; +- +- /* If the FPU is disabled, squash the registers. */ +- if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387)) +- for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) +- if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i)) +- fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; ++static int ++function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes, ++ HOST_WIDE_INT words) ++{ ++ /* Otherwise, this should be passed indirect. */ ++ gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8); + +- /* If AVX512F is disabled, squash the registers. */ +- if (! TARGET_AVX512F) ++ cum->words += words; ++ if (cum->nregs > 0) + { +- for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++) +- fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; +- +- for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++) +- fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; ++ cum->nregs -= 1; ++ cum->regno += 1; ++ return 1; + } ++ return 0; + } + +-/* Canonicalize a comparison from one we don't have to one we do have. */ ++/* Update the data in CUM to advance over argument ARG. */ + + static void +-ix86_canonicalize_comparison (int *code, rtx *op0, rtx *op1, +- bool op0_preserve_value) ++ix86_function_arg_advance (cumulative_args_t cum_v, ++ const function_arg_info &arg) + { +- /* The order of operands in x87 ficom compare is forced by combine in +- simplify_comparison () function. Float operator is treated as RTX_OBJ +- with a precedence over other operators and is always put in the first +- place. Swap condition and operands to match ficom instruction. */ +- if (!op0_preserve_value +- && GET_CODE (*op0) == FLOAT && MEM_P (XEXP (*op0, 0)) && REG_P (*op1)) +- { +- enum rtx_code scode = swap_condition ((enum rtx_code) *code); ++ CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); ++ machine_mode mode = arg.mode; ++ HOST_WIDE_INT bytes, words; ++ int nregs; + +- /* We are called only for compares that are split to SAHF instruction. +- Ensure that we have setcc/jcc insn for the swapped condition. */ +- if (ix86_fp_compare_code_to_integer (scode) != UNKNOWN) +- { +- std::swap (*op0, *op1); +- *code = (int) scode; +- } +- } +-} +- +-/* Save the current options */ ++ /* The argument of interrupt handler is a special case and is ++ handled in ix86_function_arg. */ ++ if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL) ++ return; + +-static void +-ix86_function_specific_save (struct cl_target_option *ptr, +- struct gcc_options *opts) +-{ +- ptr->arch = ix86_arch; +- ptr->schedule = ix86_schedule; +- ptr->prefetch_sse = x86_prefetch_sse; +- ptr->tune = ix86_tune; +- ptr->branch_cost = ix86_branch_cost; +- ptr->tune_defaulted = ix86_tune_defaulted; +- ptr->arch_specified = ix86_arch_specified; +- ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit; +- ptr->x_ix86_isa_flags2_explicit = opts->x_ix86_isa_flags2_explicit; +- ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit; +- ptr->x_ix86_arch_string = opts->x_ix86_arch_string; +- ptr->x_ix86_tune_string = opts->x_ix86_tune_string; +- ptr->x_ix86_cmodel = opts->x_ix86_cmodel; +- ptr->x_ix86_abi = opts->x_ix86_abi; +- ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect; +- ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost; +- ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes; +- ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer; +- ptr->x_ix86_force_drap = opts->x_ix86_force_drap; +- ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg; +- ptr->x_ix86_pmode = opts->x_ix86_pmode; +- ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg; +- ptr->x_ix86_recip_name = opts->x_ix86_recip_name; +- ptr->x_ix86_regparm = opts->x_ix86_regparm; +- ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold; +- ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx; +- ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard; +- ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg; +- ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect; +- ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string; +- ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy; +- ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy; +- ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default; +- ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type; +- +- /* The fields are char but the variables are not; make sure the +- values fit in the fields. */ +- gcc_assert (ptr->arch == ix86_arch); +- gcc_assert (ptr->schedule == ix86_schedule); +- gcc_assert (ptr->tune == ix86_tune); +- gcc_assert (ptr->branch_cost == ix86_branch_cost); +-} +- +-/* Restore the current options */ ++ bytes = arg.promoted_size_in_bytes (); ++ words = CEIL (bytes, UNITS_PER_WORD); + +-static void +-ix86_function_specific_restore (struct gcc_options *opts, +- struct cl_target_option *ptr) +-{ +- enum processor_type old_tune = ix86_tune; +- enum processor_type old_arch = ix86_arch; +- unsigned HOST_WIDE_INT ix86_arch_mask; +- int i; ++ if (arg.type) ++ mode = type_natural_mode (arg.type, NULL, false); ++ ++ if (TARGET_64BIT) ++ { ++ enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi; + +- /* We don't change -fPIC. */ +- opts->x_flag_pic = flag_pic; +- +- ix86_arch = (enum processor_type) ptr->arch; +- ix86_schedule = (enum attr_cpu) ptr->schedule; +- ix86_tune = (enum processor_type) ptr->tune; +- x86_prefetch_sse = ptr->prefetch_sse; +- opts->x_ix86_branch_cost = ptr->branch_cost; +- ix86_tune_defaulted = ptr->tune_defaulted; +- ix86_arch_specified = ptr->arch_specified; +- opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit; +- opts->x_ix86_isa_flags2_explicit = ptr->x_ix86_isa_flags2_explicit; +- opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit; +- opts->x_ix86_arch_string = ptr->x_ix86_arch_string; +- opts->x_ix86_tune_string = ptr->x_ix86_tune_string; +- opts->x_ix86_cmodel = ptr->x_ix86_cmodel; +- opts->x_ix86_abi = ptr->x_ix86_abi; +- opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect; +- opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost; +- opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes; +- opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer; +- opts->x_ix86_force_drap = ptr->x_ix86_force_drap; +- opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg; +- opts->x_ix86_pmode = ptr->x_ix86_pmode; +- opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg; +- opts->x_ix86_recip_name = ptr->x_ix86_recip_name; +- opts->x_ix86_regparm = ptr->x_ix86_regparm; +- opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold; +- opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx; +- opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard; +- opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg; +- opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect; +- opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string; +- opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy; +- opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy; +- opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default; +- opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type; +- ix86_tune_cost = processor_cost_table[ix86_tune]; +- /* TODO: ix86_cost should be chosen at instruction or function granuality +- so for cold code we use size_cost even in !optimize_size compilation. */ +- if (opts->x_optimize_size) +- ix86_cost = &ix86_size_cost; ++ if (call_abi == MS_ABI) ++ nregs = function_arg_advance_ms_64 (cum, bytes, words); ++ else ++ nregs = function_arg_advance_64 (cum, mode, arg.type, words, ++ arg.named); ++ } + else +- ix86_cost = ix86_tune_cost; ++ nregs = function_arg_advance_32 (cum, mode, arg.type, bytes, words); + +- /* Recreate the arch feature tests if the arch changed */ +- if (old_arch != ix86_arch) ++ if (!nregs) + { +- ix86_arch_mask = HOST_WIDE_INT_1U << ix86_arch; +- for (i = 0; i < X86_ARCH_LAST; ++i) +- ix86_arch_features[i] +- = !!(initial_ix86_arch_features[i] & ix86_arch_mask); ++ /* Track if there are outgoing arguments on stack. */ ++ if (cum->caller) ++ cfun->machine->outgoing_args_on_stack = true; + } +- +- /* Recreate the tune optimization tests */ +- if (old_tune != ix86_tune) +- set_ix86_tune_features (ix86_tune, false); + } + +-/* Adjust target options after streaming them in. This is mainly about +- reconciling them with global options. */ +- +-static void +-ix86_function_specific_post_stream_in (struct cl_target_option *ptr) +-{ +- /* flag_pic is a global option, but ix86_cmodel is target saved option +- partly computed from flag_pic. If flag_pic is on, adjust x_ix86_cmodel +- for PIC, or error out. */ +- if (flag_pic) +- switch (ptr->x_ix86_cmodel) +- { +- case CM_SMALL: +- ptr->x_ix86_cmodel = CM_SMALL_PIC; +- break; ++/* Define where to put the arguments to a function. ++ Value is zero to push the argument on the stack, ++ or a hard register in which to store the argument. + +- case CM_MEDIUM: +- ptr->x_ix86_cmodel = CM_MEDIUM_PIC; +- break; +- +- case CM_LARGE: +- ptr->x_ix86_cmodel = CM_LARGE_PIC; +- break; +- +- case CM_KERNEL: +- error ("code model %s does not support PIC mode", "kernel"); +- break; +- +- default: +- break; +- } +- else +- switch (ptr->x_ix86_cmodel) +- { +- case CM_SMALL_PIC: +- ptr->x_ix86_cmodel = CM_SMALL; +- break; +- +- case CM_MEDIUM_PIC: +- ptr->x_ix86_cmodel = CM_MEDIUM; +- break; +- +- case CM_LARGE_PIC: +- ptr->x_ix86_cmodel = CM_LARGE; +- break; +- +- default: +- break; +- } +-} +- +-/* Print the current options */ +- +-static void +-ix86_function_specific_print (FILE *file, int indent, +- struct cl_target_option *ptr) +-{ +- char *target_string +- = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_ix86_isa_flags2, +- ptr->x_target_flags, ptr->x_ix86_target_flags, +- NULL, NULL, ptr->x_ix86_fpmath, false, true); +- +- gcc_assert (ptr->arch < PROCESSOR_max); +- fprintf (file, "%*sarch = %d (%s)\n", +- indent, "", +- ptr->arch, processor_names[ptr->arch]); +- +- gcc_assert (ptr->tune < PROCESSOR_max); +- fprintf (file, "%*stune = %d (%s)\n", +- indent, "", +- ptr->tune, processor_names[ptr->tune]); +- +- fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost); +- +- if (target_string) +- { +- fprintf (file, "%*s%s\n", indent, "", target_string); +- free (target_string); +- } +-} +- +- +-/* Inner function to process the attribute((target(...))), take an argument and +- set the current options from the argument. If we have a list, recursively go +- over the list. */ ++ MODE is the argument's machine mode. ++ TYPE is the data type of the argument (as a tree). ++ This is null for libcalls where that information may ++ not be available. ++ CUM is a variable of type CUMULATIVE_ARGS which gives info about ++ the preceding args and about the function being called. ++ NAMED is nonzero if this argument is a named parameter ++ (otherwise it is an extra parameter matching an ellipsis). */ + +-static bool +-ix86_valid_target_attribute_inner_p (tree args, char *p_strings[], +- struct gcc_options *opts, +- struct gcc_options *opts_set, +- struct gcc_options *enum_opts_set) ++static rtx ++function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode, ++ machine_mode orig_mode, const_tree type, ++ HOST_WIDE_INT bytes, HOST_WIDE_INT words) + { +- char *next_optstr; +- bool ret = true; +- +-#define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 } +-#define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 } +-#define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 } +-#define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M } +-#define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M } +- +- enum ix86_opt_type +- { +- ix86_opt_unknown, +- ix86_opt_yes, +- ix86_opt_no, +- ix86_opt_str, +- ix86_opt_enum, +- ix86_opt_isa +- }; +- +- static const struct +- { +- const char *string; +- size_t len; +- enum ix86_opt_type type; +- int opt; +- int mask; +- } attrs[] = { +- /* isa options */ +- IX86_ATTR_ISA ("pconfig", OPT_mpconfig), +- IX86_ATTR_ISA ("wbnoinvd", OPT_mwbnoinvd), +- IX86_ATTR_ISA ("sgx", OPT_msgx), +- IX86_ATTR_ISA ("avx5124fmaps", OPT_mavx5124fmaps), +- IX86_ATTR_ISA ("avx5124vnniw", OPT_mavx5124vnniw), +- IX86_ATTR_ISA ("avx512vpopcntdq", OPT_mavx512vpopcntdq), +- IX86_ATTR_ISA ("avx512vbmi2", OPT_mavx512vbmi2), +- IX86_ATTR_ISA ("avx512vnni", OPT_mavx512vnni), +- IX86_ATTR_ISA ("avx512bitalg", OPT_mavx512bitalg), +- +- IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi), +- IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma), +- IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl), +- IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw), +- IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq), +- IX86_ATTR_ISA ("avx512er", OPT_mavx512er), +- IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf), +- IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd), +- IX86_ATTR_ISA ("avx512f", OPT_mavx512f), +- IX86_ATTR_ISA ("avx2", OPT_mavx2), +- IX86_ATTR_ISA ("fma", OPT_mfma), +- IX86_ATTR_ISA ("xop", OPT_mxop), +- IX86_ATTR_ISA ("fma4", OPT_mfma4), +- IX86_ATTR_ISA ("f16c", OPT_mf16c), +- IX86_ATTR_ISA ("avx", OPT_mavx), +- IX86_ATTR_ISA ("sse4", OPT_msse4), +- IX86_ATTR_ISA ("sse4.2", OPT_msse4_2), +- IX86_ATTR_ISA ("sse4.1", OPT_msse4_1), +- IX86_ATTR_ISA ("sse4a", OPT_msse4a), +- IX86_ATTR_ISA ("ssse3", OPT_mssse3), +- IX86_ATTR_ISA ("sse3", OPT_msse3), +- IX86_ATTR_ISA ("aes", OPT_maes), +- IX86_ATTR_ISA ("sha", OPT_msha), +- IX86_ATTR_ISA ("pclmul", OPT_mpclmul), +- IX86_ATTR_ISA ("sse2", OPT_msse2), +- IX86_ATTR_ISA ("sse", OPT_msse), +- IX86_ATTR_ISA ("3dnowa", OPT_m3dnowa), +- IX86_ATTR_ISA ("3dnow", OPT_m3dnow), +- IX86_ATTR_ISA ("mmx", OPT_mmmx), +- IX86_ATTR_ISA ("rtm", OPT_mrtm), +- IX86_ATTR_ISA ("prfchw", OPT_mprfchw), +- IX86_ATTR_ISA ("rdseed", OPT_mrdseed), +- IX86_ATTR_ISA ("adx", OPT_madx), +- IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1), +- IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt), +- IX86_ATTR_ISA ("xsaves", OPT_mxsaves), +- IX86_ATTR_ISA ("xsavec", OPT_mxsavec), +- IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt), +- IX86_ATTR_ISA ("xsave", OPT_mxsave), +- IX86_ATTR_ISA ("abm", OPT_mabm), +- IX86_ATTR_ISA ("bmi", OPT_mbmi), +- IX86_ATTR_ISA ("bmi2", OPT_mbmi2), +- IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt), +- IX86_ATTR_ISA ("tbm", OPT_mtbm), +- IX86_ATTR_ISA ("popcnt", OPT_mpopcnt), +- IX86_ATTR_ISA ("cx16", OPT_mcx16), +- IX86_ATTR_ISA ("sahf", OPT_msahf), +- IX86_ATTR_ISA ("movbe", OPT_mmovbe), +- IX86_ATTR_ISA ("crc32", OPT_mcrc32), +- IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase), +- IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd), +- IX86_ATTR_ISA ("mwaitx", OPT_mmwaitx), +- IX86_ATTR_ISA ("clzero", OPT_mclzero), +- IX86_ATTR_ISA ("pku", OPT_mpku), +- IX86_ATTR_ISA ("lwp", OPT_mlwp), +- IX86_ATTR_ISA ("hle", OPT_mhle), +- IX86_ATTR_ISA ("fxsr", OPT_mfxsr), +- IX86_ATTR_ISA ("clwb", OPT_mclwb), +- IX86_ATTR_ISA ("rdpid", OPT_mrdpid), +- IX86_ATTR_ISA ("gfni", OPT_mgfni), +- IX86_ATTR_ISA ("shstk", OPT_mshstk), +- IX86_ATTR_ISA ("vaes", OPT_mvaes), +- IX86_ATTR_ISA ("vpclmulqdq", OPT_mvpclmulqdq), +- IX86_ATTR_ISA ("movdiri", OPT_mmovdiri), +- IX86_ATTR_ISA ("movdir64b", OPT_mmovdir64b), +- IX86_ATTR_ISA ("waitpkg", OPT_mwaitpkg), +- IX86_ATTR_ISA ("cldemote", OPT_mcldemote), +- IX86_ATTR_ISA ("ptwrite", OPT_mptwrite), +- +- /* enum options */ +- IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_), +- +- /* string options */ +- IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH), +- IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE), +- +- /* flag options */ +- IX86_ATTR_YES ("cld", +- OPT_mcld, +- MASK_CLD), +- +- IX86_ATTR_NO ("fancy-math-387", +- OPT_mfancy_math_387, +- MASK_NO_FANCY_MATH_387), +- +- IX86_ATTR_YES ("ieee-fp", +- OPT_mieee_fp, +- MASK_IEEE_FP), +- +- IX86_ATTR_YES ("inline-all-stringops", +- OPT_minline_all_stringops, +- MASK_INLINE_ALL_STRINGOPS), +- +- IX86_ATTR_YES ("inline-stringops-dynamically", +- OPT_minline_stringops_dynamically, +- MASK_INLINE_STRINGOPS_DYNAMICALLY), +- +- IX86_ATTR_NO ("align-stringops", +- OPT_mno_align_stringops, +- MASK_NO_ALIGN_STRINGOPS), +- +- IX86_ATTR_YES ("recip", +- OPT_mrecip, +- MASK_RECIP), +- +- }; +- +- /* If this is a list, recurse to get the options. */ +- if (TREE_CODE (args) == TREE_LIST) +- { +- bool ret = true; +- +- for (; args; args = TREE_CHAIN (args)) +- if (TREE_VALUE (args) +- && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), +- p_strings, opts, opts_set, +- enum_opts_set)) +- ret = false; ++ bool error_p = false; + +- return ret; +- } ++ /* Avoid the AL settings for the Unix64 ABI. */ ++ if (mode == VOIDmode) ++ return constm1_rtx; + +- else if (TREE_CODE (args) != STRING_CST) ++ if (TARGET_IAMCU) + { +- error ("attribute % argument not a string"); +- return false; ++ /* Intel MCU psABI passes scalars and aggregates no larger than 8 ++ bytes in registers. */ ++ if (!VECTOR_MODE_P (mode) && bytes <= 8) ++ goto pass_in_reg; ++ return NULL_RTX; + } + +- /* Handle multiple arguments separated by commas. */ +- next_optstr = ASTRDUP (TREE_STRING_POINTER (args)); +- +- while (next_optstr && *next_optstr != '\0') ++ switch (mode) + { +- char *p = next_optstr; +- char *orig_p = p; +- char *comma = strchr (next_optstr, ','); +- const char *opt_string; +- size_t len, opt_len; +- int opt; +- bool opt_set_p; +- char ch; +- unsigned i; +- enum ix86_opt_type type = ix86_opt_unknown; +- int mask = 0; ++ default: ++ break; + +- if (comma) +- { +- *comma = '\0'; +- len = comma - next_optstr; +- next_optstr = comma + 1; +- } +- else ++ case E_BLKmode: ++ if (bytes < 0) ++ break; ++ /* FALLTHRU */ ++ case E_DImode: ++ case E_SImode: ++ case E_HImode: ++ case E_QImode: ++pass_in_reg: ++ if (words <= cum->nregs) + { +- len = strlen (p); +- next_optstr = NULL; +- } ++ int regno = cum->regno; + +- /* Recognize no-xxx. */ +- if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-') +- { +- opt_set_p = false; +- p += 3; +- len -= 3; +- } +- else +- opt_set_p = true; +- +- /* Find the option. */ +- ch = *p; +- opt = N_OPTS; +- for (i = 0; i < ARRAY_SIZE (attrs); i++) +- { +- type = attrs[i].type; +- opt_len = attrs[i].len; +- if (ch == attrs[i].string[0] +- && ((type != ix86_opt_str && type != ix86_opt_enum) +- ? len == opt_len +- : len > opt_len) +- && memcmp (p, attrs[i].string, opt_len) == 0) ++ /* Fastcall allocates the first two DWORD (SImode) or ++ smaller arguments to ECX and EDX if it isn't an ++ aggregate type . */ ++ if (cum->fastcall) + { +- opt = attrs[i].opt; +- mask = attrs[i].mask; +- opt_string = attrs[i].string; +- break; +- } +- } ++ if (mode == BLKmode ++ || mode == DImode ++ || (type && AGGREGATE_TYPE_P (type))) ++ break; + +- /* Process the option. */ +- if (opt == N_OPTS) +- { +- error ("attribute(target(\"%s\")) is unknown", orig_p); +- ret = false; ++ /* ECX not EAX is the first allocated register. */ ++ if (regno == AX_REG) ++ regno = CX_REG; ++ } ++ return gen_rtx_REG (mode, regno); + } ++ break; + +- else if (type == ix86_opt_isa) ++ case E_DFmode: ++ if (cum->float_in_sse == -1) ++ error_p = true; ++ if (cum->float_in_sse < 2) ++ break; ++ /* FALLTHRU */ ++ case E_SFmode: ++ if (cum->float_in_sse == -1) ++ error_p = true; ++ if (cum->float_in_sse < 1) ++ break; ++ /* FALLTHRU */ ++ case E_TImode: ++ /* In 32bit, we pass TImode in xmm registers. */ ++ case E_V16QImode: ++ case E_V8HImode: ++ case E_V4SImode: ++ case E_V2DImode: ++ case E_V4SFmode: ++ case E_V2DFmode: ++ if (!type || !AGGREGATE_TYPE_P (type)) + { +- struct cl_decoded_option decoded; +- +- generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded); +- ix86_handle_option (opts, opts_set, +- &decoded, input_location); ++ if (cum->sse_nregs) ++ return gen_reg_or_parallel (mode, orig_mode, ++ cum->sse_regno + FIRST_SSE_REG); + } ++ break; + +- else if (type == ix86_opt_yes || type == ix86_opt_no) +- { +- if (type == ix86_opt_no) +- opt_set_p = !opt_set_p; +- +- if (opt_set_p) +- opts->x_target_flags |= mask; +- else +- opts->x_target_flags &= ~mask; +- } ++ case E_OImode: ++ case E_XImode: ++ /* OImode and XImode shouldn't be used directly. */ ++ gcc_unreachable (); + +- else if (type == ix86_opt_str) ++ case E_V64QImode: ++ case E_V32HImode: ++ case E_V16SImode: ++ case E_V8DImode: ++ case E_V16SFmode: ++ case E_V8DFmode: ++ case E_V8SFmode: ++ case E_V8SImode: ++ case E_V32QImode: ++ case E_V16HImode: ++ case E_V4DFmode: ++ case E_V4DImode: ++ if (!type || !AGGREGATE_TYPE_P (type)) + { +- if (p_strings[opt]) +- { +- error ("option(\"%s\") was already specified", opt_string); +- ret = false; +- } +- else +- { +- p_strings[opt] = xstrdup (p + opt_len); +- if (opt == IX86_FUNCTION_SPECIFIC_ARCH) +- { +- /* If arch= is set, clear all bits in x_ix86_isa_flags, +- except for ISA_64BIT, ABI_64, ABI_X32, and CODE16 +- and all bits in x_ix86_isa_flags2. */ +- opts->x_ix86_isa_flags &= (OPTION_MASK_ISA_64BIT +- | OPTION_MASK_ABI_64 +- | OPTION_MASK_ABI_X32 +- | OPTION_MASK_CODE16); +- opts->x_ix86_isa_flags_explicit &= (OPTION_MASK_ISA_64BIT +- | OPTION_MASK_ABI_64 +- | OPTION_MASK_ABI_X32 +- | OPTION_MASK_CODE16); +- opts->x_ix86_isa_flags2 = 0; +- opts->x_ix86_isa_flags2_explicit = 0; +- } +- } ++ if (cum->sse_nregs) ++ return gen_reg_or_parallel (mode, orig_mode, ++ cum->sse_regno + FIRST_SSE_REG); + } ++ break; + +- else if (type == ix86_opt_enum) ++ case E_V8QImode: ++ case E_V4HImode: ++ case E_V2SImode: ++ case E_V2SFmode: ++ case E_V1TImode: ++ case E_V1DImode: ++ if (!type || !AGGREGATE_TYPE_P (type)) + { +- bool arg_ok; +- int value; +- +- arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET); +- if (arg_ok) +- set_option (opts, enum_opts_set, opt, value, +- p + opt_len, DK_UNSPECIFIED, input_location, +- global_dc); +- else +- { +- error ("attribute(target(\"%s\")) is unknown", orig_p); +- ret = false; +- } ++ if (cum->mmx_nregs) ++ return gen_reg_or_parallel (mode, orig_mode, ++ cum->mmx_regno + FIRST_MMX_REG); + } +- +- else +- gcc_unreachable (); ++ break; ++ } ++ if (error_p) ++ { ++ cum->float_in_sse = 0; ++ error ("calling %qD with SSE calling convention without " ++ "SSE/SSE2 enabled", cum->decl); ++ sorry ("this is a GCC bug that can be worked around by adding " ++ "attribute used to function called"); + } + +- return ret; +-} +- +-/* Release allocated strings. */ +-static void +-release_options_strings (char **option_strings) +-{ +- /* Free up memory allocated to hold the strings */ +- for (unsigned i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++) +- free (option_strings[i]); ++ return NULL_RTX; + } + +-/* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */ +- +-tree +-ix86_valid_target_attribute_tree (tree args, +- struct gcc_options *opts, +- struct gcc_options *opts_set) ++static rtx ++function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode, ++ machine_mode orig_mode, const_tree type, bool named) + { +- const char *orig_arch_string = opts->x_ix86_arch_string; +- const char *orig_tune_string = opts->x_ix86_tune_string; +- enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath; +- int orig_tune_defaulted = ix86_tune_defaulted; +- int orig_arch_specified = ix86_arch_specified; +- char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL }; +- tree t = NULL_TREE; +- struct cl_target_option *def +- = TREE_TARGET_OPTION (target_option_default_node); +- struct gcc_options enum_opts_set; +- +- memset (&enum_opts_set, 0, sizeof (enum_opts_set)); +- +- /* Process each of the options on the chain. */ +- if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts, +- opts_set, &enum_opts_set)) +- return error_mark_node; ++ /* Handle a hidden AL argument containing number of registers ++ for varargs x86-64 functions. */ ++ if (mode == VOIDmode) ++ return GEN_INT (cum->maybe_vaarg ++ ? (cum->sse_nregs < 0 ++ ? X86_64_SSE_REGPARM_MAX ++ : cum->sse_regno) ++ : -1); + +- /* If the changed options are different from the default, rerun +- ix86_option_override_internal, and then save the options away. +- The string options are attribute options, and will be undone +- when we copy the save structure. */ +- if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags +- || opts->x_ix86_isa_flags2 != def->x_ix86_isa_flags2 +- || opts->x_target_flags != def->x_target_flags +- || option_strings[IX86_FUNCTION_SPECIFIC_ARCH] +- || option_strings[IX86_FUNCTION_SPECIFIC_TUNE] +- || enum_opts_set.x_ix86_fpmath) ++ switch (mode) + { +- /* If we are using the default tune= or arch=, undo the string assigned, +- and use the default. */ +- if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH]) +- opts->x_ix86_arch_string +- = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_ARCH]); +- else if (!orig_arch_specified) +- opts->x_ix86_arch_string = NULL; +- +- if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE]) +- opts->x_ix86_tune_string +- = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_TUNE]); +- else if (orig_tune_defaulted) +- opts->x_ix86_tune_string = NULL; +- +- /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */ +- if (enum_opts_set.x_ix86_fpmath) +- opts_set->x_ix86_fpmath = (enum fpmath_unit) 1; +- +- /* Do any overrides, such as arch=xxx, or tune=xxx support. */ +- bool r = ix86_option_override_internal (false, opts, opts_set); +- if (!r) +- { +- release_options_strings (option_strings); +- return error_mark_node; +- } +- +- /* Add any builtin functions with the new isa if any. */ +- ix86_add_new_builtins (opts->x_ix86_isa_flags, opts->x_ix86_isa_flags2); +- +- /* Save the current options unless we are validating options for +- #pragma. */ +- t = build_target_option_node (opts); +- +- opts->x_ix86_arch_string = orig_arch_string; +- opts->x_ix86_tune_string = orig_tune_string; +- opts_set->x_ix86_fpmath = orig_fpmath_set; ++ default: ++ break; + +- release_options_strings (option_strings); ++ case E_V8SFmode: ++ case E_V8SImode: ++ case E_V32QImode: ++ case E_V16HImode: ++ case E_V4DFmode: ++ case E_V4DImode: ++ case E_V16SFmode: ++ case E_V16SImode: ++ case E_V64QImode: ++ case E_V32HImode: ++ case E_V8DFmode: ++ case E_V8DImode: ++ /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */ ++ if (!named) ++ return NULL; ++ break; + } + +- return t; ++ return construct_container (mode, orig_mode, type, 0, cum->nregs, ++ cum->sse_nregs, ++ &x86_64_int_parameter_registers [cum->regno], ++ cum->sse_regno); + } + +-/* Hook to validate attribute((target("string"))). */ +- +-static bool +-ix86_valid_target_attribute_p (tree fndecl, +- tree ARG_UNUSED (name), +- tree args, +- int ARG_UNUSED (flags)) +-{ +- struct gcc_options func_options; +- tree new_target, new_optimize; +- bool ret = true; +- +- /* attribute((target("default"))) does nothing, beyond +- affecting multi-versioning. */ +- if (TREE_VALUE (args) +- && TREE_CODE (TREE_VALUE (args)) == STRING_CST +- && TREE_CHAIN (args) == NULL_TREE +- && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0) +- return true; ++static rtx ++function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode, ++ machine_mode orig_mode, bool named, const_tree type, ++ HOST_WIDE_INT bytes) ++{ ++ unsigned int regno; + +- tree old_optimize = build_optimization_node (&global_options); +- +- /* Get the optimization options of the current function. */ +- tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl); +- +- if (!func_optimize) +- func_optimize = old_optimize; +- +- /* Init func_options. */ +- memset (&func_options, 0, sizeof (func_options)); +- init_options_struct (&func_options, NULL); +- lang_hooks.init_options_struct (&func_options); +- +- cl_optimization_restore (&func_options, +- TREE_OPTIMIZATION (func_optimize)); +- +- /* Initialize func_options to the default before its target options can +- be set. */ +- cl_target_option_restore (&func_options, +- TREE_TARGET_OPTION (target_option_default_node)); +- +- new_target = ix86_valid_target_attribute_tree (args, &func_options, +- &global_options_set); +- +- new_optimize = build_optimization_node (&func_options); +- +- if (new_target == error_mark_node) +- ret = false; +- +- else if (fndecl && new_target) +- { +- DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target; +- +- if (old_optimize != new_optimize) +- DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize; +- } +- +- finalize_options_struct (&func_options); +- +- return ret; +-} +- +- +-/* Hook to determine if one function can safely inline another. */ +- +-static bool +-ix86_can_inline_p (tree caller, tree callee) +-{ +- tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller); +- tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee); +- +- /* Changes of those flags can be tolerated for always inlines. Lets hope +- user knows what he is doing. */ +- const unsigned HOST_WIDE_INT always_inline_safe_mask +- = (MASK_USE_8BIT_IDIV | MASK_ACCUMULATE_OUTGOING_ARGS +- | MASK_NO_ALIGN_STRINGOPS | MASK_AVX256_SPLIT_UNALIGNED_LOAD +- | MASK_AVX256_SPLIT_UNALIGNED_STORE | MASK_CLD +- | MASK_NO_FANCY_MATH_387 | MASK_IEEE_FP | MASK_INLINE_ALL_STRINGOPS +- | MASK_INLINE_STRINGOPS_DYNAMICALLY | MASK_RECIP | MASK_STACK_PROBE +- | MASK_STV | MASK_TLS_DIRECT_SEG_REFS | MASK_VZEROUPPER +- | MASK_NO_PUSH_ARGS | MASK_OMIT_LEAF_FRAME_POINTER); +- +- +- if (!callee_tree) +- callee_tree = target_option_default_node; +- if (!caller_tree) +- caller_tree = target_option_default_node; +- if (callee_tree == caller_tree) +- return true; +- +- struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree); +- struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree); +- bool ret = false; +- bool always_inline +- = (DECL_DISREGARD_INLINE_LIMITS (callee) +- && lookup_attribute ("always_inline", +- DECL_ATTRIBUTES (callee))); +- +- cgraph_node *callee_node = cgraph_node::get (callee); +- /* Callee's isa options should be a subset of the caller's, i.e. a SSE4 +- function can inline a SSE2 function but a SSE2 function can't inline +- a SSE4 function. */ +- if (((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags) +- != callee_opts->x_ix86_isa_flags) +- || ((caller_opts->x_ix86_isa_flags2 & callee_opts->x_ix86_isa_flags2) +- != callee_opts->x_ix86_isa_flags2)) +- ret = false; +- +- /* See if we have the same non-isa options. */ +- else if ((!always_inline +- && caller_opts->x_target_flags != callee_opts->x_target_flags) +- || (caller_opts->x_target_flags & ~always_inline_safe_mask) +- != (callee_opts->x_target_flags & ~always_inline_safe_mask)) +- ret = false; +- +- /* See if arch, tune, etc. are the same. */ +- else if (caller_opts->arch != callee_opts->arch) +- ret = false; +- +- else if (!always_inline && caller_opts->tune != callee_opts->tune) +- ret = false; +- +- else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath +- /* If the calle doesn't use FP expressions differences in +- ix86_fpmath can be ignored. We are called from FEs +- for multi-versioning call optimization, so beware of +- ipa_fn_summaries not available. */ +- && (! ipa_fn_summaries +- || ipa_fn_summaries->get (callee_node) == NULL +- || ipa_fn_summaries->get (callee_node)->fp_expressions)) +- ret = false; +- +- else if (!always_inline +- && caller_opts->branch_cost != callee_opts->branch_cost) +- ret = false; +- +- else +- ret = true; +- +- return ret; +-} +- +- +-/* Remember the last target of ix86_set_current_function. */ +-static GTY(()) tree ix86_previous_fndecl; +- +-/* Set targets globals to the default (or current #pragma GCC target +- if active). Invalidate ix86_previous_fndecl cache. */ ++ /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call. ++ We use value of -2 to specify that current function call is MSABI. */ ++ if (mode == VOIDmode) ++ return GEN_INT (-2); + +-void +-ix86_reset_previous_fndecl (void) +-{ +- tree new_tree = target_option_current_node; +- cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree)); +- if (TREE_TARGET_GLOBALS (new_tree)) +- restore_target_globals (TREE_TARGET_GLOBALS (new_tree)); +- else if (new_tree == target_option_default_node) +- restore_target_globals (&default_target_globals); +- else +- TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts (); +- ix86_previous_fndecl = NULL_TREE; +-} ++ /* If we've run out of registers, it goes on the stack. */ ++ if (cum->nregs == 0) ++ return NULL_RTX; + +-/* Set the func_type field from the function FNDECL. */ ++ regno = x86_64_ms_abi_int_parameter_registers[cum->regno]; + +-static void +-ix86_set_func_type (tree fndecl) +-{ +- if (cfun->machine->func_type == TYPE_UNKNOWN) ++ /* Only floating point modes are passed in anything but integer regs. */ ++ if (TARGET_SSE && (mode == SFmode || mode == DFmode)) + { +- if (lookup_attribute ("interrupt", +- TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))) ++ if (named) + { +- if (ix86_function_naked (fndecl)) +- error_at (DECL_SOURCE_LOCATION (fndecl), +- "interrupt and naked attributes are not compatible"); +- +- int nargs = 0; +- for (tree arg = DECL_ARGUMENTS (fndecl); +- arg; +- arg = TREE_CHAIN (arg)) +- nargs++; +- cfun->machine->no_caller_saved_registers = true; +- cfun->machine->func_type +- = nargs == 2 ? TYPE_EXCEPTION : TYPE_INTERRUPT; +- +- ix86_optimize_mode_switching[X86_DIRFLAG] = 1; +- +- /* Only dwarf2out.c can handle -WORD(AP) as a pointer argument. */ +- if (write_symbols != NO_DEBUG && write_symbols != DWARF2_DEBUG) +- sorry ("only DWARF debug format is supported for interrupt " +- "service routine"); ++ if (type == NULL_TREE || !AGGREGATE_TYPE_P (type)) ++ regno = cum->regno + FIRST_SSE_REG; + } + else + { +- cfun->machine->func_type = TYPE_NORMAL; +- if (lookup_attribute ("no_caller_saved_registers", +- TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))) +- cfun->machine->no_caller_saved_registers = true; ++ rtx t1, t2; ++ ++ /* Unnamed floating parameters are passed in both the ++ SSE and integer registers. */ ++ t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG); ++ t2 = gen_rtx_REG (mode, regno); ++ t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx); ++ t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx); ++ return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2)); + } + } ++ /* Handle aggregated types passed in register. */ ++ if (orig_mode == BLKmode) ++ { ++ if (bytes > 0 && bytes <= 8) ++ mode = (bytes > 4 ? DImode : SImode); ++ if (mode == BLKmode) ++ mode = DImode; ++ } ++ ++ return gen_reg_or_parallel (mode, orig_mode, regno); + } + +-/* Set the indirect_branch_type field from the function FNDECL. */ ++/* Return where to put the arguments to a function. ++ Return zero to push the argument on the stack, or a hard register in which to store the argument. + +-static void +-ix86_set_indirect_branch_type (tree fndecl) ++ ARG describes the argument while CUM gives information about the ++ preceding args and about the function being called. */ ++ ++static rtx ++ix86_function_arg (cumulative_args_t cum_v, const function_arg_info &arg) + { +- if (cfun->machine->indirect_branch_type == indirect_branch_unset) ++ CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); ++ machine_mode mode = arg.mode; ++ HOST_WIDE_INT bytes, words; ++ rtx reg; ++ ++ if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL) + { +- tree attr = lookup_attribute ("indirect_branch", +- DECL_ATTRIBUTES (fndecl)); +- if (attr != NULL) ++ gcc_assert (arg.type != NULL_TREE); ++ if (POINTER_TYPE_P (arg.type)) + { +- tree args = TREE_VALUE (attr); +- if (args == NULL) +- gcc_unreachable (); +- tree cst = TREE_VALUE (args); +- if (strcmp (TREE_STRING_POINTER (cst), "keep") == 0) +- cfun->machine->indirect_branch_type = indirect_branch_keep; +- else if (strcmp (TREE_STRING_POINTER (cst), "thunk") == 0) +- cfun->machine->indirect_branch_type = indirect_branch_thunk; +- else if (strcmp (TREE_STRING_POINTER (cst), "thunk-inline") == 0) +- cfun->machine->indirect_branch_type = indirect_branch_thunk_inline; +- else if (strcmp (TREE_STRING_POINTER (cst), "thunk-extern") == 0) +- cfun->machine->indirect_branch_type = indirect_branch_thunk_extern; +- else +- gcc_unreachable (); +- } +- else +- cfun->machine->indirect_branch_type = ix86_indirect_branch; +- +- /* -mcmodel=large is not compatible with -mindirect-branch=thunk +- nor -mindirect-branch=thunk-extern. */ +- if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC) +- && ((cfun->machine->indirect_branch_type +- == indirect_branch_thunk_extern) +- || (cfun->machine->indirect_branch_type +- == indirect_branch_thunk))) +- error ("%<-mindirect-branch=%s%> and %<-mcmodel=large%> are not " +- "compatible", +- ((cfun->machine->indirect_branch_type +- == indirect_branch_thunk_extern) +- ? "thunk-extern" : "thunk")); +- +- if (cfun->machine->indirect_branch_type != indirect_branch_keep +- && (flag_cf_protection & CF_RETURN)) +- error ("%<-mindirect-branch%> and %<-fcf-protection%> are not " +- "compatible"); +- } +- +- if (cfun->machine->function_return_type == indirect_branch_unset) +- { +- tree attr = lookup_attribute ("function_return", +- DECL_ATTRIBUTES (fndecl)); +- if (attr != NULL) +- { +- tree args = TREE_VALUE (attr); +- if (args == NULL) +- gcc_unreachable (); +- tree cst = TREE_VALUE (args); +- if (strcmp (TREE_STRING_POINTER (cst), "keep") == 0) +- cfun->machine->function_return_type = indirect_branch_keep; +- else if (strcmp (TREE_STRING_POINTER (cst), "thunk") == 0) +- cfun->machine->function_return_type = indirect_branch_thunk; +- else if (strcmp (TREE_STRING_POINTER (cst), "thunk-inline") == 0) +- cfun->machine->function_return_type = indirect_branch_thunk_inline; +- else if (strcmp (TREE_STRING_POINTER (cst), "thunk-extern") == 0) +- cfun->machine->function_return_type = indirect_branch_thunk_extern; +- else +- gcc_unreachable (); ++ /* This is the pointer argument. */ ++ gcc_assert (TYPE_MODE (arg.type) == Pmode); ++ /* It is at -WORD(AP) in the current frame in interrupt and ++ exception handlers. */ ++ reg = plus_constant (Pmode, arg_pointer_rtx, -UNITS_PER_WORD); + } + else +- cfun->machine->function_return_type = ix86_function_return; +- +- /* -mcmodel=large is not compatible with -mfunction-return=thunk +- nor -mfunction-return=thunk-extern. */ +- if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC) +- && ((cfun->machine->function_return_type +- == indirect_branch_thunk_extern) +- || (cfun->machine->function_return_type +- == indirect_branch_thunk))) +- error ("%<-mfunction-return=%s%> and %<-mcmodel=large%> are not " +- "compatible", +- ((cfun->machine->function_return_type +- == indirect_branch_thunk_extern) +- ? "thunk-extern" : "thunk")); +- +- if (cfun->machine->function_return_type != indirect_branch_keep +- && (flag_cf_protection & CF_RETURN)) +- error ("%<-mfunction-return%> and %<-fcf-protection%> are not " +- "compatible"); +- } +-} +- +-/* Establish appropriate back-end context for processing the function +- FNDECL. The argument might be NULL to indicate processing at top +- level, outside of any function scope. */ +-static void +-ix86_set_current_function (tree fndecl) +-{ +- /* Only change the context if the function changes. This hook is called +- several times in the course of compiling a function, and we don't want to +- slow things down too much or call target_reinit when it isn't safe. */ +- if (fndecl == ix86_previous_fndecl) +- { +- /* There may be 2 function bodies for the same function FNDECL, +- one is extern inline and one isn't. Call ix86_set_func_type +- to set the func_type field. */ +- if (fndecl != NULL_TREE) + { +- ix86_set_func_type (fndecl); +- ix86_set_indirect_branch_type (fndecl); ++ gcc_assert (cfun->machine->func_type == TYPE_EXCEPTION ++ && TREE_CODE (arg.type) == INTEGER_TYPE ++ && TYPE_MODE (arg.type) == word_mode); ++ /* The error code is the word-mode integer argument at ++ -2 * WORD(AP) in the current frame of the exception ++ handler. */ ++ reg = gen_rtx_MEM (word_mode, ++ plus_constant (Pmode, ++ arg_pointer_rtx, ++ -2 * UNITS_PER_WORD)); + } +- return; +- } +- +- tree old_tree; +- if (ix86_previous_fndecl == NULL_TREE) +- old_tree = target_option_current_node; +- else if (DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)) +- old_tree = DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl); +- else +- old_tree = target_option_default_node; +- +- if (fndecl == NULL_TREE) +- { +- if (old_tree != target_option_current_node) +- ix86_reset_previous_fndecl (); +- return; ++ return reg; + } + +- ix86_set_func_type (fndecl); +- ix86_set_indirect_branch_type (fndecl); ++ bytes = arg.promoted_size_in_bytes (); ++ words = CEIL (bytes, UNITS_PER_WORD); + +- tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl); +- if (new_tree == NULL_TREE) +- new_tree = target_option_default_node; ++ /* To simplify the code below, represent vector types with a vector mode ++ even if MMX/SSE are not active. */ ++ if (arg.type && TREE_CODE (arg.type) == VECTOR_TYPE) ++ mode = type_natural_mode (arg.type, cum, false); + +- if (old_tree != new_tree) ++ if (TARGET_64BIT) + { +- cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree)); +- if (TREE_TARGET_GLOBALS (new_tree)) +- restore_target_globals (TREE_TARGET_GLOBALS (new_tree)); +- else if (new_tree == target_option_default_node) +- restore_target_globals (&default_target_globals); +- else +- TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts (); +- } +- ix86_previous_fndecl = fndecl; +- +- static bool prev_no_caller_saved_registers; ++ enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi; + +- /* 64-bit MS and SYSV ABI have different set of call used registers. +- Avoid expensive re-initialization of init_regs each time we switch +- function context. */ +- if (TARGET_64BIT +- && (call_used_regs[SI_REG] +- == (cfun->machine->call_abi == MS_ABI))) +- reinit_regs (); +- /* Need to re-initialize init_regs if caller-saved registers are +- changed. */ +- else if (prev_no_caller_saved_registers +- != cfun->machine->no_caller_saved_registers) +- reinit_regs (); +- +- if (cfun->machine->func_type != TYPE_NORMAL +- || cfun->machine->no_caller_saved_registers) +- { +- /* Don't allow SSE, MMX nor x87 instructions since they +- may change processor state. */ +- const char *isa; +- if (TARGET_SSE) +- isa = "SSE"; +- else if (TARGET_MMX) +- isa = "MMX/3Dnow"; +- else if (TARGET_80387) +- isa = "80387"; ++ if (call_abi == MS_ABI) ++ reg = function_arg_ms_64 (cum, mode, arg.mode, arg.named, ++ arg.type, bytes); + else +- isa = NULL; +- if (isa != NULL) +- { +- if (cfun->machine->func_type != TYPE_NORMAL) +- sorry (cfun->machine->func_type == TYPE_EXCEPTION +- ? G_("%s instructions aren%'t allowed in an" +- " exception service routine") +- : G_("%s instructions aren%'t allowed in an" +- " interrupt service routine"), +- isa); +- else +- sorry ("%s instructions aren%'t allowed in a function with " +- "the % attribute", isa); +- /* Don't issue the same error twice. */ +- cfun->machine->func_type = TYPE_NORMAL; +- cfun->machine->no_caller_saved_registers = false; +- } ++ reg = function_arg_64 (cum, mode, arg.mode, arg.type, arg.named); + } ++ else ++ reg = function_arg_32 (cum, mode, arg.mode, arg.type, bytes, words); + +- prev_no_caller_saved_registers +- = cfun->machine->no_caller_saved_registers; ++ /* Track if there are outgoing arguments on stack. */ ++ if (reg == NULL_RTX && cum->caller) ++ cfun->machine->outgoing_args_on_stack = true; ++ ++ return reg; + } + +- +-/* Return true if this goes in large data/bss. */ ++/* A C expression that indicates when an argument must be passed by ++ reference. If nonzero for an argument, a copy of that argument is ++ made in memory and a pointer to the argument is passed instead of ++ the argument itself. The pointer is passed in whatever way is ++ appropriate for passing a pointer to that type. */ + + static bool +-ix86_in_large_data_p (tree exp) ++ix86_pass_by_reference (cumulative_args_t cum_v, const function_arg_info &arg) + { +- if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC) +- return false; ++ CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); + +- if (exp == NULL_TREE) +- return false; ++ if (TARGET_64BIT) ++ { ++ enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi; + +- /* Functions are never large data. */ +- if (TREE_CODE (exp) == FUNCTION_DECL) +- return false; ++ /* See Windows x64 Software Convention. */ ++ if (call_abi == MS_ABI) ++ { ++ HOST_WIDE_INT msize = GET_MODE_SIZE (arg.mode); + +- /* Automatic variables are never large data. */ +- if (VAR_P (exp) && !is_global_var (exp)) +- return false; ++ if (tree type = arg.type) ++ { ++ /* Arrays are passed by reference. */ ++ if (TREE_CODE (type) == ARRAY_TYPE) ++ return true; + +- if (VAR_P (exp) && DECL_SECTION_NAME (exp)) +- { +- const char *section = DECL_SECTION_NAME (exp); +- if (strcmp (section, ".ldata") == 0 +- || strcmp (section, ".lbss") == 0) +- return true; +- return false; +- } +- else +- { +- HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp)); ++ if (RECORD_OR_UNION_TYPE_P (type)) ++ { ++ /* Structs/unions of sizes other than 8, 16, 32, or 64 bits ++ are passed by reference. */ ++ msize = int_size_in_bytes (type); ++ } ++ } + +- /* If this is an incomplete type with size 0, then we can't put it +- in data because it might be too big when completed. Also, +- int_size_in_bytes returns -1 if size can vary or is larger than +- an integer in which case also it is safer to assume that it goes in +- large data. */ +- if (size <= 0 || size > ix86_section_threshold) ++ /* __m128 is passed by reference. */ ++ return msize != 1 && msize != 2 && msize != 4 && msize != 8; ++ } ++ else if (arg.type && int_size_in_bytes (arg.type) == -1) + return true; + } + + return false; + } + +-/* i386-specific section flag to mark large sections. */ +-#define SECTION_LARGE SECTION_MACH_DEP +- +-/* Switch to the appropriate section for output of DECL. +- DECL is either a `VAR_DECL' node or a constant of some sort. +- RELOC indicates whether forming the initial value of DECL requires +- link-time relocations. */ ++/* Return true when TYPE should be 128bit aligned for 32bit argument ++ passing ABI. XXX: This function is obsolete and is only used for ++ checking psABI compatibility with previous versions of GCC. */ + +-ATTRIBUTE_UNUSED static section * +-x86_64_elf_select_section (tree decl, int reloc, +- unsigned HOST_WIDE_INT align) ++static bool ++ix86_compat_aligned_value_p (const_tree type) + { +- if (ix86_in_large_data_p (decl)) ++ machine_mode mode = TYPE_MODE (type); ++ if (((TARGET_SSE && SSE_REG_MODE_P (mode)) ++ || mode == TDmode ++ || mode == TFmode ++ || mode == TCmode) ++ && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128)) ++ return true; ++ if (TYPE_ALIGN (type) < 128) ++ return false; ++ ++ if (AGGREGATE_TYPE_P (type)) + { +- const char *sname = NULL; +- unsigned int flags = SECTION_WRITE | SECTION_LARGE; +- switch (categorize_decl_for_section (decl, reloc)) ++ /* Walk the aggregates recursively. */ ++ switch (TREE_CODE (type)) + { +- case SECCAT_DATA: +- sname = ".ldata"; +- break; +- case SECCAT_DATA_REL: +- sname = ".ldata.rel"; +- break; +- case SECCAT_DATA_REL_LOCAL: +- sname = ".ldata.rel.local"; +- break; +- case SECCAT_DATA_REL_RO: +- sname = ".ldata.rel.ro"; +- break; +- case SECCAT_DATA_REL_RO_LOCAL: +- sname = ".ldata.rel.ro.local"; +- break; +- case SECCAT_BSS: +- sname = ".lbss"; +- flags |= SECTION_BSS; +- break; +- case SECCAT_RODATA: +- case SECCAT_RODATA_MERGE_STR: +- case SECCAT_RODATA_MERGE_STR_INIT: +- case SECCAT_RODATA_MERGE_CONST: +- sname = ".lrodata"; +- flags &= ~SECTION_WRITE; ++ case RECORD_TYPE: ++ case UNION_TYPE: ++ case QUAL_UNION_TYPE: ++ { ++ tree field; ++ ++ /* Walk all the structure fields. */ ++ for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) ++ { ++ if (TREE_CODE (field) == FIELD_DECL ++ && ix86_compat_aligned_value_p (TREE_TYPE (field))) ++ return true; ++ } ++ break; ++ } ++ ++ case ARRAY_TYPE: ++ /* Just for use if some languages passes arrays by value. */ ++ if (ix86_compat_aligned_value_p (TREE_TYPE (type))) ++ return true; + break; +- case SECCAT_SRODATA: +- case SECCAT_SDATA: +- case SECCAT_SBSS: ++ ++ default: + gcc_unreachable (); +- case SECCAT_TEXT: +- case SECCAT_TDATA: +- case SECCAT_TBSS: +- /* We don't split these for medium model. Place them into +- default sections and hope for best. */ +- break; +- } +- if (sname) +- { +- /* We might get called with string constants, but get_named_section +- doesn't like them as they are not DECLs. Also, we need to set +- flags in that case. */ +- if (!DECL_P (decl)) +- return get_section (sname, flags, NULL); +- return get_named_section (decl, sname, reloc); + } + } +- return default_elf_select_section (decl, reloc, align); ++ return false; + } + +-/* Select a set of attributes for section NAME based on the properties +- of DECL and whether or not RELOC indicates that DECL's initializer +- might contain runtime relocations. */ ++/* Return the alignment boundary for MODE and TYPE with alignment ALIGN. ++ XXX: This function is obsolete and is only used for checking psABI ++ compatibility with previous versions of GCC. */ + +-static unsigned int ATTRIBUTE_UNUSED +-x86_64_elf_section_type_flags (tree decl, const char *name, int reloc) ++static unsigned int ++ix86_compat_function_arg_boundary (machine_mode mode, ++ const_tree type, unsigned int align) + { +- unsigned int flags = default_section_type_flags (decl, name, reloc); ++ /* In 32bit, only _Decimal128 and __float128 are aligned to their ++ natural boundaries. */ ++ if (!TARGET_64BIT && mode != TDmode && mode != TFmode) ++ { ++ /* i386 ABI defines all arguments to be 4 byte aligned. We have to ++ make an exception for SSE modes since these require 128bit ++ alignment. + +- if (ix86_in_large_data_p (decl)) +- flags |= SECTION_LARGE; ++ The handling here differs from field_alignment. ICC aligns MMX ++ arguments to 4 byte boundaries, while structure fields are aligned ++ to 8 byte boundaries. */ ++ if (!type) ++ { ++ if (!(TARGET_SSE && SSE_REG_MODE_P (mode))) ++ align = PARM_BOUNDARY; ++ } ++ else ++ { ++ if (!ix86_compat_aligned_value_p (type)) ++ align = PARM_BOUNDARY; ++ } ++ } ++ if (align > BIGGEST_ALIGNMENT) ++ align = BIGGEST_ALIGNMENT; ++ return align; ++} + +- if (decl == NULL_TREE +- && (strcmp (name, ".ldata.rel.ro") == 0 +- || strcmp (name, ".ldata.rel.ro.local") == 0)) +- flags |= SECTION_RELRO; ++/* Return true when TYPE should be 128bit aligned for 32bit argument ++ passing ABI. */ + +- if (strcmp (name, ".lbss") == 0 +- || strncmp (name, ".lbss.", 5) == 0 +- || strncmp (name, ".gnu.linkonce.lb.", 16) == 0) +- flags |= SECTION_BSS; ++static bool ++ix86_contains_aligned_value_p (const_tree type) ++{ ++ machine_mode mode = TYPE_MODE (type); + +- return flags; +-} ++ if (mode == XFmode || mode == XCmode) ++ return false; + +-/* Build up a unique section name, expressed as a +- STRING_CST node, and assign it to DECL_SECTION_NAME (decl). +- RELOC indicates whether the initial value of EXP requires +- link-time relocations. */ ++ if (TYPE_ALIGN (type) < 128) ++ return false; + +-static void ATTRIBUTE_UNUSED +-x86_64_elf_unique_section (tree decl, int reloc) +-{ +- if (ix86_in_large_data_p (decl)) ++ if (AGGREGATE_TYPE_P (type)) + { +- const char *prefix = NULL; +- /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */ +- bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP; +- +- switch (categorize_decl_for_section (decl, reloc)) +- { +- case SECCAT_DATA: +- case SECCAT_DATA_REL: +- case SECCAT_DATA_REL_LOCAL: +- case SECCAT_DATA_REL_RO: +- case SECCAT_DATA_REL_RO_LOCAL: +- prefix = one_only ? ".ld" : ".ldata"; +- break; +- case SECCAT_BSS: +- prefix = one_only ? ".lb" : ".lbss"; +- break; +- case SECCAT_RODATA: +- case SECCAT_RODATA_MERGE_STR: +- case SECCAT_RODATA_MERGE_STR_INIT: +- case SECCAT_RODATA_MERGE_CONST: +- prefix = one_only ? ".lr" : ".lrodata"; +- break; +- case SECCAT_SRODATA: +- case SECCAT_SDATA: +- case SECCAT_SBSS: +- gcc_unreachable (); +- case SECCAT_TEXT: +- case SECCAT_TDATA: +- case SECCAT_TBSS: +- /* We don't split these for medium model. Place them into +- default sections and hope for best. */ +- break; +- } +- if (prefix) ++ /* Walk the aggregates recursively. */ ++ switch (TREE_CODE (type)) + { +- const char *name, *linkonce; +- char *string; +- +- name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); +- name = targetm.strip_name_encoding (name); ++ case RECORD_TYPE: ++ case UNION_TYPE: ++ case QUAL_UNION_TYPE: ++ { ++ tree field; + +- /* If we're using one_only, then there needs to be a .gnu.linkonce +- prefix to the section name. */ +- linkonce = one_only ? ".gnu.linkonce" : ""; ++ /* Walk all the structure fields. */ ++ for (field = TYPE_FIELDS (type); ++ field; ++ field = DECL_CHAIN (field)) ++ { ++ if (TREE_CODE (field) == FIELD_DECL ++ && ix86_contains_aligned_value_p (TREE_TYPE (field))) ++ return true; ++ } ++ break; ++ } + +- string = ACONCAT ((linkonce, prefix, ".", name, NULL)); ++ case ARRAY_TYPE: ++ /* Just for use if some languages passes arrays by value. */ ++ if (ix86_contains_aligned_value_p (TREE_TYPE (type))) ++ return true; ++ break; + +- set_decl_section_name (decl, string); +- return; ++ default: ++ gcc_unreachable (); + } + } +- default_unique_section (decl, reloc); +-} +- +-#ifdef COMMON_ASM_OP ++ else ++ return TYPE_ALIGN (type) >= 128; + +-#ifndef LARGECOMM_SECTION_ASM_OP +-#define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t" +-#endif ++ return false; ++} + +-/* This says how to output assembler code to declare an +- uninitialized external linkage data object. ++/* Gives the alignment boundary, in bits, of an argument with the ++ specified mode and type. */ + +- For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for +- large objects. */ +-void +-x86_elf_aligned_decl_common (FILE *file, tree decl, +- const char *name, unsigned HOST_WIDE_INT size, +- int align) ++static unsigned int ++ix86_function_arg_boundary (machine_mode mode, const_tree type) + { +- if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) +- && size > (unsigned int)ix86_section_threshold) ++ unsigned int align; ++ if (type) + { +- switch_to_section (get_named_section (decl, ".lbss", 0)); +- fputs (LARGECOMM_SECTION_ASM_OP, file); ++ /* Since the main variant type is used for call, we convert it to ++ the main variant type. */ ++ type = TYPE_MAIN_VARIANT (type); ++ align = TYPE_ALIGN (type); ++ if (TYPE_EMPTY_P (type)) ++ return PARM_BOUNDARY; + } + else +- fputs (COMMON_ASM_OP, file); +- assemble_name (file, name); +- fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n", +- size, align / BITS_PER_UNIT); +-} +-#endif ++ align = GET_MODE_ALIGNMENT (mode); ++ if (align < PARM_BOUNDARY) ++ align = PARM_BOUNDARY; ++ else ++ { ++ static bool warned; ++ unsigned int saved_align = align; + +-/* Utility function for targets to use in implementing +- ASM_OUTPUT_ALIGNED_BSS. */ ++ if (!TARGET_64BIT) ++ { ++ /* i386 ABI defines XFmode arguments to be 4 byte aligned. */ ++ if (!type) ++ { ++ if (mode == XFmode || mode == XCmode) ++ align = PARM_BOUNDARY; ++ } ++ else if (!ix86_contains_aligned_value_p (type)) ++ align = PARM_BOUNDARY; + +-void +-x86_output_aligned_bss (FILE *file, tree decl, const char *name, +- unsigned HOST_WIDE_INT size, int align) +-{ +- if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) +- && size > (unsigned int)ix86_section_threshold) +- switch_to_section (get_named_section (decl, ".lbss", 0)); +- else +- switch_to_section (bss_section); +- ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT)); +-#ifdef ASM_DECLARE_OBJECT_NAME +- last_assemble_variable_decl = decl; +- ASM_DECLARE_OBJECT_NAME (file, name, decl); +-#else +- /* Standard thing is just output label for the object. */ +- ASM_OUTPUT_LABEL (file, name); +-#endif /* ASM_DECLARE_OBJECT_NAME */ +- ASM_OUTPUT_SKIP (file, size ? size : 1); +-} +- +-/* Decide whether we must probe the stack before any space allocation +- on this target. It's essentially TARGET_STACK_PROBE except when +- -fstack-check causes the stack to be already probed differently. */ ++ if (align < 128) ++ align = PARM_BOUNDARY; ++ } + +-bool +-ix86_target_stack_probe (void) +-{ +- /* Do not probe the stack twice if static stack checking is enabled. */ +- if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK) +- return false; ++ if (warn_psabi ++ && !warned ++ && align != ix86_compat_function_arg_boundary (mode, type, ++ saved_align)) ++ { ++ warned = true; ++ inform (input_location, ++ "the ABI for passing parameters with %d-byte" ++ " alignment has changed in GCC 4.6", ++ align / BITS_PER_UNIT); ++ } ++ } + +- return TARGET_STACK_PROBE; ++ return align; + } +- +-/* Decide whether we can make a sibling call to a function. DECL is the +- declaration of the function being targeted by the call and EXP is the +- CALL_EXPR representing the call. */ ++ ++/* Return true if N is a possible register number of function value. */ + + static bool +-ix86_function_ok_for_sibcall (tree decl, tree exp) ++ix86_function_value_regno_p (const unsigned int regno) + { +- tree type, decl_or_type; +- rtx a, b; +- bool bind_global = decl && !targetm.binds_local_p (decl); +- +- if (ix86_function_naked (current_function_decl)) +- return false; +- +- /* Sibling call isn't OK if there are no caller-saved registers +- since all registers must be preserved before return. */ +- if (cfun->machine->no_caller_saved_registers) +- return false; +- +- /* If we are generating position-independent code, we cannot sibcall +- optimize direct calls to global functions, as the PLT requires +- %ebx be live. (Darwin does not have a PLT.) */ +- if (!TARGET_MACHO +- && !TARGET_64BIT +- && flag_pic +- && flag_plt +- && bind_global) +- return false; +- +- /* If we need to align the outgoing stack, then sibcalling would +- unalign the stack, which may break the called function. */ +- if (ix86_minimum_incoming_stack_boundary (true) +- < PREFERRED_STACK_BOUNDARY) +- return false; +- +- if (decl) +- { +- decl_or_type = decl; +- type = TREE_TYPE (decl); +- } +- else ++ switch (regno) + { +- /* We're looking at the CALL_EXPR, we need the type of the function. */ +- type = CALL_EXPR_FN (exp); /* pointer expression */ +- type = TREE_TYPE (type); /* pointer type */ +- type = TREE_TYPE (type); /* function type */ +- decl_or_type = type; +- } ++ case AX_REG: ++ return true; ++ case DX_REG: ++ return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI); ++ case DI_REG: ++ case SI_REG: ++ return TARGET_64BIT && ix86_cfun_abi () != MS_ABI; + +- /* Check that the return value locations are the same. Like +- if we are returning floats on the 80387 register stack, we cannot +- make a sibcall from a function that doesn't return a float to a +- function that does or, conversely, from a function that does return +- a float to a function that doesn't; the necessary stack adjustment +- would not be executed. This is also the place we notice +- differences in the return value ABI. Note that it is ok for one +- of the functions to have void return type as long as the return +- value of the other is passed in a register. */ +- a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false); +- b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)), +- cfun->decl, false); +- if (STACK_REG_P (a) || STACK_REG_P (b)) +- { +- if (!rtx_equal_p (a, b)) ++ /* Complex values are returned in %st(0)/%st(1) pair. */ ++ case ST0_REG: ++ case ST1_REG: ++ /* TODO: The function should depend on current function ABI but ++ builtins.c would need updating then. Therefore we use the ++ default ABI. */ ++ if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI) + return false; +- } +- else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl)))) +- ; +- else if (!rtx_equal_p (a, b)) +- return false; ++ return TARGET_FLOAT_RETURNS_IN_80387; + +- if (TARGET_64BIT) +- { +- /* The SYSV ABI has more call-clobbered registers; +- disallow sibcalls from MS to SYSV. */ +- if (cfun->machine->call_abi == MS_ABI +- && ix86_function_type_abi (type) == SYSV_ABI) +- return false; +- } +- else +- { +- /* If this call is indirect, we'll need to be able to use a +- call-clobbered register for the address of the target function. +- Make sure that all such registers are not used for passing +- parameters. Note that DLLIMPORT functions and call to global +- function via GOT slot are indirect. */ +- if (!decl +- || (bind_global && flag_pic && !flag_plt) +- || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)) +- || flag_force_indirect_call) +- { +- /* Check if regparm >= 3 since arg_reg_available is set to +- false if regparm == 0. If regparm is 1 or 2, there is +- always a call-clobbered register available. ++ /* Complex values are returned in %xmm0/%xmm1 pair. */ ++ case XMM0_REG: ++ case XMM1_REG: ++ return TARGET_SSE; + +- ??? The symbol indirect call doesn't need a call-clobbered +- register. But we don't know if this is a symbol indirect +- call or not here. */ +- if (ix86_function_regparm (type, decl) >= 3 +- && !cfun->machine->arg_reg_available) +- return false; +- } ++ case MM0_REG: ++ if (TARGET_MACHO || TARGET_64BIT) ++ return false; ++ return TARGET_MMX; + } + +- /* Otherwise okay. That also includes certain types of indirect calls. */ +- return true; ++ return false; + } + +-/* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall", +- and "sseregparm" calling convention attributes; +- arguments as in struct attribute_spec.handler. */ ++/* Define how to find the value returned by a function. ++ VALTYPE is the data type of the value (as a tree). ++ If the precise function being called is known, FUNC is its FUNCTION_DECL; ++ otherwise, FUNC is 0. */ + +-static tree +-ix86_handle_cconv_attribute (tree *node, tree name, tree args, int, +- bool *no_add_attrs) ++static rtx ++function_value_32 (machine_mode orig_mode, machine_mode mode, ++ const_tree fntype, const_tree fn) + { +- if (TREE_CODE (*node) != FUNCTION_TYPE +- && TREE_CODE (*node) != METHOD_TYPE +- && TREE_CODE (*node) != FIELD_DECL +- && TREE_CODE (*node) != TYPE_DECL) +- { +- warning (OPT_Wattributes, "%qE attribute only applies to functions", +- name); +- *no_add_attrs = true; +- return NULL_TREE; +- } +- +- /* Can combine regparm with all attributes but fastcall, and thiscall. */ +- if (is_attribute_p ("regparm", name)) +- { +- tree cst; ++ unsigned int regno; + +- if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) +- { +- error ("fastcall and regparm attributes are not compatible"); +- } ++ /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where ++ we normally prevent this case when mmx is not available. However ++ some ABIs may require the result to be returned like DImode. */ ++ if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8) ++ regno = FIRST_MMX_REG; + +- if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node))) +- { +- error ("regparam and thiscall attributes are not compatible"); +- } ++ /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where ++ we prevent this case when sse is not available. However some ABIs ++ may require the result to be returned like integer TImode. */ ++ else if (mode == TImode ++ || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16)) ++ regno = FIRST_SSE_REG; + +- cst = TREE_VALUE (args); +- if (TREE_CODE (cst) != INTEGER_CST) +- { +- warning (OPT_Wattributes, +- "%qE attribute requires an integer constant argument", +- name); +- *no_add_attrs = true; +- } +- else if (compare_tree_int (cst, REGPARM_MAX) > 0) +- { +- warning (OPT_Wattributes, "argument to %qE attribute larger than %d", +- name, REGPARM_MAX); +- *no_add_attrs = true; +- } ++ /* 32-byte vector modes in %ymm0. */ ++ else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32) ++ regno = FIRST_SSE_REG; + +- return NULL_TREE; +- } ++ /* 64-byte vector modes in %zmm0. */ ++ else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64) ++ regno = FIRST_SSE_REG; + +- if (TARGET_64BIT) +- { +- /* Do not warn when emulating the MS ABI. */ +- if ((TREE_CODE (*node) != FUNCTION_TYPE +- && TREE_CODE (*node) != METHOD_TYPE) +- || ix86_function_type_abi (*node) != MS_ABI) +- warning (OPT_Wattributes, "%qE attribute ignored", +- name); +- *no_add_attrs = true; +- return NULL_TREE; +- } ++ /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */ ++ else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387) ++ regno = FIRST_FLOAT_REG; ++ else ++ /* Most things go in %eax. */ ++ regno = AX_REG; + +- /* Can combine fastcall with stdcall (redundant) and sseregparm. */ +- if (is_attribute_p ("fastcall", name)) ++ /* Override FP return register with %xmm0 for local functions when ++ SSE math is enabled or for functions with sseregparm attribute. */ ++ if ((fn || fntype) && (mode == SFmode || mode == DFmode)) + { +- if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node))) +- { +- error ("fastcall and cdecl attributes are not compatible"); +- } +- if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node))) +- { +- error ("fastcall and stdcall attributes are not compatible"); +- } +- if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node))) +- { +- error ("fastcall and regparm attributes are not compatible"); +- } +- if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node))) ++ int sse_level = ix86_function_sseregparm (fntype, fn, false); ++ if (sse_level == -1) + { +- error ("fastcall and thiscall attributes are not compatible"); ++ error ("calling %qD with SSE calling convention without " ++ "SSE/SSE2 enabled", fn); ++ sorry ("this is a GCC bug that can be worked around by adding " ++ "attribute used to function called"); + } ++ else if ((sse_level >= 1 && mode == SFmode) ++ || (sse_level == 2 && mode == DFmode)) ++ regno = FIRST_SSE_REG; + } + +- /* Can combine stdcall with fastcall (redundant), regparm and +- sseregparm. */ +- else if (is_attribute_p ("stdcall", name)) +- { +- if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node))) +- { +- error ("stdcall and cdecl attributes are not compatible"); +- } +- if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) +- { +- error ("stdcall and fastcall attributes are not compatible"); +- } +- if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node))) +- { +- error ("stdcall and thiscall attributes are not compatible"); +- } +- } ++ /* OImode shouldn't be used directly. */ ++ gcc_assert (mode != OImode); ++ ++ return gen_rtx_REG (orig_mode, regno); ++} ++ ++static rtx ++function_value_64 (machine_mode orig_mode, machine_mode mode, ++ const_tree valtype) ++{ ++ rtx ret; + +- /* Can combine cdecl with regparm and sseregparm. */ +- else if (is_attribute_p ("cdecl", name)) ++ /* Handle libcalls, which don't provide a type node. */ ++ if (valtype == NULL) + { +- if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node))) +- { +- error ("stdcall and cdecl attributes are not compatible"); +- } +- if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) +- { +- error ("fastcall and cdecl attributes are not compatible"); +- } +- if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node))) ++ unsigned int regno; ++ ++ switch (mode) + { +- error ("cdecl and thiscall attributes are not compatible"); ++ case E_SFmode: ++ case E_SCmode: ++ case E_DFmode: ++ case E_DCmode: ++ case E_TFmode: ++ case E_SDmode: ++ case E_DDmode: ++ case E_TDmode: ++ regno = FIRST_SSE_REG; ++ break; ++ case E_XFmode: ++ case E_XCmode: ++ regno = FIRST_FLOAT_REG; ++ break; ++ case E_TCmode: ++ return NULL; ++ default: ++ regno = AX_REG; + } ++ ++ return gen_rtx_REG (mode, regno); + } +- else if (is_attribute_p ("thiscall", name)) ++ else if (POINTER_TYPE_P (valtype)) + { +- if (TREE_CODE (*node) != METHOD_TYPE && pedantic) +- warning (OPT_Wattributes, "%qE attribute is used for non-class method", +- name); +- if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node))) +- { +- error ("stdcall and thiscall attributes are not compatible"); +- } +- if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) +- { +- error ("fastcall and thiscall attributes are not compatible"); +- } +- if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node))) +- { +- error ("cdecl and thiscall attributes are not compatible"); +- } ++ /* Pointers are always returned in word_mode. */ ++ mode = word_mode; + } + +- /* Can combine sseregparm with all attributes. */ ++ ret = construct_container (mode, orig_mode, valtype, 1, ++ X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX, ++ x86_64_int_return_registers, 0); + +- return NULL_TREE; +-} ++ /* For zero sized structures, construct_container returns NULL, but we ++ need to keep rest of compiler happy by returning meaningful value. */ ++ if (!ret) ++ ret = gen_rtx_REG (orig_mode, AX_REG); + +-/* The transactional memory builtins are implicitly regparm or fastcall +- depending on the ABI. Override the generic do-nothing attribute that +- these builtins were declared with, and replace it with one of the two +- attributes that we expect elsewhere. */ ++ return ret; ++} + +-static tree +-ix86_handle_tm_regparm_attribute (tree *node, tree, tree, +- int flags, bool *no_add_attrs) ++static rtx ++function_value_ms_32 (machine_mode orig_mode, machine_mode mode, ++ const_tree fntype, const_tree fn, const_tree valtype) + { +- tree alt; ++ unsigned int regno; + +- /* In no case do we want to add the placeholder attribute. */ +- *no_add_attrs = true; ++ /* Floating point return values in %st(0) ++ (unless -mno-fp-ret-in-387 or aggregate type of up to 8 bytes). */ ++ if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387 ++ && (GET_MODE_SIZE (mode) > 8 ++ || valtype == NULL_TREE || !AGGREGATE_TYPE_P (valtype))) ++ { ++ regno = FIRST_FLOAT_REG; ++ return gen_rtx_REG (orig_mode, regno); ++ } ++ else ++ return function_value_32(orig_mode, mode, fntype,fn); ++} + +- /* The 64-bit ABI is unchanged for transactional memory. */ +- if (TARGET_64BIT) +- return NULL_TREE; ++static rtx ++function_value_ms_64 (machine_mode orig_mode, machine_mode mode, ++ const_tree valtype) ++{ ++ unsigned int regno = AX_REG; + +- /* ??? Is there a better way to validate 32-bit windows? We have +- cfun->machine->call_abi, but that seems to be set only for 64-bit. */ +- if (CHECK_STACK_LIMIT > 0) +- alt = tree_cons (get_identifier ("fastcall"), NULL, NULL); +- else ++ if (TARGET_SSE) + { +- alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL); +- alt = tree_cons (get_identifier ("regparm"), alt, NULL); ++ switch (GET_MODE_SIZE (mode)) ++ { ++ case 16: ++ if (valtype != NULL_TREE ++ && !VECTOR_INTEGER_TYPE_P (valtype) ++ && !VECTOR_INTEGER_TYPE_P (valtype) ++ && !INTEGRAL_TYPE_P (valtype) ++ && !VECTOR_FLOAT_TYPE_P (valtype)) ++ break; ++ if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode)) ++ && !COMPLEX_MODE_P (mode)) ++ regno = FIRST_SSE_REG; ++ break; ++ case 8: ++ case 4: ++ if (valtype != NULL_TREE && AGGREGATE_TYPE_P (valtype)) ++ break; ++ if (mode == SFmode || mode == DFmode) ++ regno = FIRST_SSE_REG; ++ break; ++ default: ++ break; ++ } + } +- decl_attributes (node, alt, flags); +- +- return NULL_TREE; ++ return gen_rtx_REG (orig_mode, regno); + } + +-/* This function determines from TYPE the calling-convention. */ +- +-unsigned int +-ix86_get_callcvt (const_tree type) ++static rtx ++ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl, ++ machine_mode orig_mode, machine_mode mode) + { +- unsigned int ret = 0; +- bool is_stdarg; +- tree attrs; +- +- if (TARGET_64BIT) +- return IX86_CALLCVT_CDECL; ++ const_tree fn, fntype; + +- attrs = TYPE_ATTRIBUTES (type); +- if (attrs != NULL_TREE) ++ fn = NULL_TREE; ++ if (fntype_or_decl && DECL_P (fntype_or_decl)) ++ fn = fntype_or_decl; ++ fntype = fn ? TREE_TYPE (fn) : fntype_or_decl; ++ ++ if (ix86_function_type_abi (fntype) == MS_ABI) + { +- if (lookup_attribute ("cdecl", attrs)) +- ret |= IX86_CALLCVT_CDECL; +- else if (lookup_attribute ("stdcall", attrs)) +- ret |= IX86_CALLCVT_STDCALL; +- else if (lookup_attribute ("fastcall", attrs)) +- ret |= IX86_CALLCVT_FASTCALL; +- else if (lookup_attribute ("thiscall", attrs)) +- ret |= IX86_CALLCVT_THISCALL; +- +- /* Regparam isn't allowed for thiscall and fastcall. */ +- if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0) +- { +- if (lookup_attribute ("regparm", attrs)) +- ret |= IX86_CALLCVT_REGPARM; +- if (lookup_attribute ("sseregparm", attrs)) +- ret |= IX86_CALLCVT_SSEREGPARM; +- } +- +- if (IX86_BASE_CALLCVT(ret) != 0) +- return ret; ++ if (TARGET_64BIT) ++ return function_value_ms_64 (orig_mode, mode, valtype); ++ else ++ return function_value_ms_32 (orig_mode, mode, fntype, fn, valtype); + } ++ else if (TARGET_64BIT) ++ return function_value_64 (orig_mode, mode, valtype); ++ else ++ return function_value_32 (orig_mode, mode, fntype, fn); ++} + +- is_stdarg = stdarg_p (type); +- if (TARGET_RTD && !is_stdarg) +- return IX86_CALLCVT_STDCALL | ret; +- +- if (ret != 0 +- || is_stdarg +- || TREE_CODE (type) != METHOD_TYPE +- || ix86_function_type_abi (type) != MS_ABI) +- return IX86_CALLCVT_CDECL | ret; ++static rtx ++ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool) ++{ ++ machine_mode mode, orig_mode; + +- return IX86_CALLCVT_THISCALL; ++ orig_mode = TYPE_MODE (valtype); ++ mode = type_natural_mode (valtype, NULL, true); ++ return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode); + } + +-/* Return 0 if the attributes for two types are incompatible, 1 if they +- are compatible, and 2 if they are nearly compatible (which causes a +- warning to be generated). */ ++/* Pointer function arguments and return values are promoted to ++ word_mode for normal functions. */ + +-static int +-ix86_comp_type_attributes (const_tree type1, const_tree type2) ++static machine_mode ++ix86_promote_function_mode (const_tree type, machine_mode mode, ++ int *punsignedp, const_tree fntype, ++ int for_return) + { +- unsigned int ccvt1, ccvt2; +- +- if (TREE_CODE (type1) != FUNCTION_TYPE +- && TREE_CODE (type1) != METHOD_TYPE) +- return 1; ++ if (cfun->machine->func_type == TYPE_NORMAL ++ && type != NULL_TREE ++ && POINTER_TYPE_P (type)) ++ { ++ *punsignedp = POINTERS_EXTEND_UNSIGNED; ++ return word_mode; ++ } ++ return default_promote_function_mode (type, mode, punsignedp, fntype, ++ for_return); ++} + +- ccvt1 = ix86_get_callcvt (type1); +- ccvt2 = ix86_get_callcvt (type2); +- if (ccvt1 != ccvt2) +- return 0; +- if (ix86_function_regparm (type1, NULL) +- != ix86_function_regparm (type2, NULL)) +- return 0; ++/* Return true if a structure, union or array with MODE containing FIELD ++ should be accessed using BLKmode. */ + +- return 1; ++static bool ++ix86_member_type_forces_blk (const_tree field, machine_mode mode) ++{ ++ /* Union with XFmode must be in BLKmode. */ ++ return (mode == XFmode ++ && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE ++ || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE)); + } +- +-/* Return the regparm value for a function with the indicated TYPE and DECL. +- DECL may be NULL when calling function indirectly +- or considering a libcall. */ + +-static int +-ix86_function_regparm (const_tree type, const_tree decl) ++rtx ++ix86_libcall_value (machine_mode mode) + { +- tree attr; +- int regparm; +- unsigned int ccvt; ++ return ix86_function_value_1 (NULL, NULL, mode, mode); ++} + +- if (TARGET_64BIT) +- return (ix86_function_type_abi (type) == SYSV_ABI +- ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX); +- ccvt = ix86_get_callcvt (type); +- regparm = ix86_regparm; ++/* Return true iff type is returned in memory. */ + +- if ((ccvt & IX86_CALLCVT_REGPARM) != 0) ++static bool ++ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) ++{ ++#ifdef SUBTARGET_RETURN_IN_MEMORY ++ return SUBTARGET_RETURN_IN_MEMORY (type, fntype); ++#else ++ const machine_mode mode = type_natural_mode (type, NULL, true); ++ HOST_WIDE_INT size; ++ ++ if (TARGET_64BIT) + { +- attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type)); +- if (attr) ++ if (ix86_function_type_abi (fntype) == MS_ABI) + { +- regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))); +- return regparm; +- } +- } +- else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) +- return 2; +- else if ((ccvt & IX86_CALLCVT_THISCALL) != 0) +- return 1; ++ size = int_size_in_bytes (type); + +- /* Use register calling convention for local functions when possible. */ +- if (decl +- && TREE_CODE (decl) == FUNCTION_DECL) +- { +- cgraph_node *target = cgraph_node::get (decl); +- if (target) +- target = target->function_symbol (); ++ /* __m128 is returned in xmm0. */ ++ if ((!type || VECTOR_INTEGER_TYPE_P (type) ++ || INTEGRAL_TYPE_P (type) ++ || VECTOR_FLOAT_TYPE_P (type)) ++ && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode)) ++ && !COMPLEX_MODE_P (mode) ++ && (GET_MODE_SIZE (mode) == 16 || size == 16)) ++ return false; + +- /* Caller and callee must agree on the calling convention, so +- checking here just optimize means that with +- __attribute__((optimize (...))) caller could use regparm convention +- and callee not, or vice versa. Instead look at whether the callee +- is optimized or not. */ +- if (target && opt_for_fn (target->decl, optimize) +- && !(profile_flag && !flag_fentry)) ++ /* Otherwise, the size must be exactly in [1248]. */ ++ return size != 1 && size != 2 && size != 4 && size != 8; ++ } ++ else + { +- cgraph_local_info *i = &target->local; +- if (i && i->local && i->can_change_signature) +- { +- int local_regparm, globals = 0, regno; +- +- /* Make sure no regparm register is taken by a +- fixed register variable. */ +- for (local_regparm = 0; local_regparm < REGPARM_MAX; +- local_regparm++) +- if (fixed_regs[local_regparm]) +- break; ++ int needed_intregs, needed_sseregs; + +- /* We don't want to use regparm(3) for nested functions as +- these use a static chain pointer in the third argument. */ +- if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl)) +- local_regparm = 2; ++ return examine_argument (mode, type, 1, ++ &needed_intregs, &needed_sseregs); ++ } ++ } ++ else ++ { ++ size = int_size_in_bytes (type); + +- /* Save a register for the split stack. */ +- if (flag_split_stack) +- { +- if (local_regparm == 3) +- local_regparm = 2; +- else if (local_regparm == 2 +- && DECL_STATIC_CHAIN (target->decl)) +- local_regparm = 1; +- } ++ /* Intel MCU psABI returns scalars and aggregates no larger than 8 ++ bytes in registers. */ ++ if (TARGET_IAMCU) ++ return VECTOR_MODE_P (mode) || size < 0 || size > 8; + +- /* Each fixed register usage increases register pressure, +- so less registers should be used for argument passing. +- This functionality can be overriden by an explicit +- regparm value. */ +- for (regno = AX_REG; regno <= DI_REG; regno++) +- if (fixed_regs[regno]) +- globals++; ++ if (mode == BLKmode) ++ return true; + +- local_regparm +- = globals < local_regparm ? local_regparm - globals : 0; ++ if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8) ++ return false; + +- if (local_regparm > regparm) +- regparm = local_regparm; +- } +- } +- } ++ if (VECTOR_MODE_P (mode) || mode == TImode) ++ { ++ /* User-created vectors small enough to fit in EAX. */ ++ if (size < 8) ++ return false; + +- return regparm; +-} ++ /* Unless ABI prescibes otherwise, ++ MMX/3dNow values are returned in MM0 if available. */ ++ ++ if (size == 8) ++ return TARGET_VECT8_RETURNS || !TARGET_MMX; + +-/* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and +- DFmode (2) arguments in SSE registers for a function with the +- indicated TYPE and DECL. DECL may be NULL when calling function +- indirectly or considering a libcall. Return -1 if any FP parameter +- should be rejected by error. This is used in siutation we imply SSE +- calling convetion but the function is called from another function with +- SSE disabled. Otherwise return 0. */ ++ /* SSE values are returned in XMM0 if available. */ ++ if (size == 16) ++ return !TARGET_SSE; + +-static int +-ix86_function_sseregparm (const_tree type, const_tree decl, bool warn) +-{ +- gcc_assert (!TARGET_64BIT); ++ /* AVX values are returned in YMM0 if available. */ ++ if (size == 32) ++ return !TARGET_AVX; + +- /* Use SSE registers to pass SFmode and DFmode arguments if requested +- by the sseregparm attribute. */ +- if (TARGET_SSEREGPARM +- || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type)))) +- { +- if (!TARGET_SSE) +- { +- if (warn) +- { +- if (decl) +- error ("calling %qD with attribute sseregparm without " +- "SSE/SSE2 enabled", decl); +- else +- error ("calling %qT with attribute sseregparm without " +- "SSE/SSE2 enabled", type); +- } +- return 0; ++ /* AVX512F values are returned in ZMM0 if available. */ ++ if (size == 64) ++ return !TARGET_AVX512F; + } + +- return 2; +- } ++ if (mode == XFmode) ++ return false; + +- if (!decl) +- return 0; ++ if (size > 12) ++ return true; + +- cgraph_node *target = cgraph_node::get (decl); +- if (target) +- target = target->function_symbol (); ++ /* OImode shouldn't be used directly. */ ++ gcc_assert (mode != OImode); + +- /* For local functions, pass up to SSE_REGPARM_MAX SFmode +- (and DFmode for SSE2) arguments in SSE registers. */ +- if (target +- /* TARGET_SSE_MATH */ +- && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE) +- && opt_for_fn (target->decl, optimize) +- && !(profile_flag && !flag_fentry)) +- { +- cgraph_local_info *i = &target->local; +- if (i && i->local && i->can_change_signature) +- { +- /* Refuse to produce wrong code when local function with SSE enabled +- is called from SSE disabled function. +- FIXME: We need a way to detect these cases cross-ltrans partition +- and avoid using SSE calling conventions on local functions called +- from function with SSE disabled. For now at least delay the +- warning until we know we are going to produce wrong code. +- See PR66047 */ +- if (!TARGET_SSE && warn) +- return -1; +- return TARGET_SSE2_P (target_opts_for_fn (target->decl) +- ->x_ix86_isa_flags) ? 2 : 1; +- } ++ return false; + } +- +- return 0; ++#endif + } + +-/* Return true if EAX is live at the start of the function. Used by +- ix86_expand_prologue to determine if we need special help before +- calling allocate_stack_worker. */ ++ ++/* Create the va_list data type. */ + +-static bool +-ix86_eax_live_at_start_p (void) ++static tree ++ix86_build_builtin_va_list_64 (void) + { +- /* Cheat. Don't bother working forward from ix86_function_regparm +- to the function type to whether an actual argument is located in +- eax. Instead just look at cfg info, which is still close enough +- to correct at this point. This gives false positives for broken +- functions that might use uninitialized data that happens to be +- allocated in eax, but who cares? */ +- return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0); +-} ++ tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl; + +-static bool +-ix86_keep_aggregate_return_pointer (tree fntype) +-{ +- tree attr; ++ record = lang_hooks.types.make_type (RECORD_TYPE); ++ type_decl = build_decl (BUILTINS_LOCATION, ++ TYPE_DECL, get_identifier ("__va_list_tag"), record); + +- if (!TARGET_64BIT) +- { +- attr = lookup_attribute ("callee_pop_aggregate_return", +- TYPE_ATTRIBUTES (fntype)); +- if (attr) +- return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0); ++ f_gpr = build_decl (BUILTINS_LOCATION, ++ FIELD_DECL, get_identifier ("gp_offset"), ++ unsigned_type_node); ++ f_fpr = build_decl (BUILTINS_LOCATION, ++ FIELD_DECL, get_identifier ("fp_offset"), ++ unsigned_type_node); ++ f_ovf = build_decl (BUILTINS_LOCATION, ++ FIELD_DECL, get_identifier ("overflow_arg_area"), ++ ptr_type_node); ++ f_sav = build_decl (BUILTINS_LOCATION, ++ FIELD_DECL, get_identifier ("reg_save_area"), ++ ptr_type_node); + +- /* For 32-bit MS-ABI the default is to keep aggregate +- return pointer. */ +- if (ix86_function_type_abi (fntype) == MS_ABI) +- return true; +- } +- return KEEP_AGGREGATE_RETURN_POINTER != 0; +-} ++ va_list_gpr_counter_field = f_gpr; ++ va_list_fpr_counter_field = f_fpr; + +-/* Value is the number of bytes of arguments automatically +- popped when returning from a subroutine call. +- FUNDECL is the declaration node of the function (as a tree), +- FUNTYPE is the data type of the function (as a tree), +- or for a library call it is an identifier node for the subroutine name. +- SIZE is the number of bytes of arguments passed on the stack. ++ DECL_FIELD_CONTEXT (f_gpr) = record; ++ DECL_FIELD_CONTEXT (f_fpr) = record; ++ DECL_FIELD_CONTEXT (f_ovf) = record; ++ DECL_FIELD_CONTEXT (f_sav) = record; + +- On the 80386, the RTD insn may be used to pop them if the number +- of args is fixed, but if the number is variable then the caller +- must pop them all. RTD can't be used for library calls now +- because the library is compiled with the Unix compiler. +- Use of RTD is a selectable option, since it is incompatible with +- standard Unix calling sequences. If the option is not selected, +- the caller must always pop the args. ++ TYPE_STUB_DECL (record) = type_decl; ++ TYPE_NAME (record) = type_decl; ++ TYPE_FIELDS (record) = f_gpr; ++ DECL_CHAIN (f_gpr) = f_fpr; ++ DECL_CHAIN (f_fpr) = f_ovf; ++ DECL_CHAIN (f_ovf) = f_sav; + +- The attribute stdcall is equivalent to RTD on a per module basis. */ ++ layout_type (record); + +-static poly_int64 +-ix86_return_pops_args (tree fundecl, tree funtype, poly_int64 size) +-{ +- unsigned int ccvt; ++ TYPE_ATTRIBUTES (record) = tree_cons (get_identifier ("sysv_abi va_list"), ++ NULL_TREE, TYPE_ATTRIBUTES (record)); + +- /* None of the 64-bit ABIs pop arguments. */ ++ /* The correct type is an array type of one element. */ ++ return build_array_type (record, build_index_type (size_zero_node)); ++} ++ ++/* Setup the builtin va_list data type and for 64-bit the additional ++ calling convention specific va_list data types. */ ++ ++static tree ++ix86_build_builtin_va_list (void) ++{ + if (TARGET_64BIT) +- return 0; ++ { ++ /* Initialize ABI specific va_list builtin types. + +- ccvt = ix86_get_callcvt (funtype); ++ In lto1, we can encounter two va_list types: ++ - one as a result of the type-merge across TUs, and ++ - the one constructed here. ++ These two types will not have the same TYPE_MAIN_VARIANT, and therefore ++ a type identity check in canonical_va_list_type based on ++ TYPE_MAIN_VARIANT (which we used to have) will not work. ++ Instead, we tag each va_list_type_node with its unique attribute, and ++ look for the attribute in the type identity check in ++ canonical_va_list_type. + +- if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL +- | IX86_CALLCVT_THISCALL)) != 0 +- && ! stdarg_p (funtype)) +- return size; ++ Tagging sysv_va_list_type_node directly with the attribute is ++ problematic since it's a array of one record, which will degrade into a ++ pointer to record when used as parameter (see build_va_arg comments for ++ an example), dropping the attribute in the process. So we tag the ++ record instead. */ + +- /* Lose any fake structure return argument if it is passed on the stack. */ +- if (aggregate_value_p (TREE_TYPE (funtype), fundecl) +- && !ix86_keep_aggregate_return_pointer (funtype)) ++ /* For SYSV_ABI we use an array of one record. */ ++ sysv_va_list_type_node = ix86_build_builtin_va_list_64 (); ++ ++ /* For MS_ABI we use plain pointer to argument area. */ ++ tree char_ptr_type = build_pointer_type (char_type_node); ++ tree attr = tree_cons (get_identifier ("ms_abi va_list"), NULL_TREE, ++ TYPE_ATTRIBUTES (char_ptr_type)); ++ ms_va_list_type_node = build_type_attribute_variant (char_ptr_type, attr); ++ ++ return ((ix86_abi == MS_ABI) ++ ? ms_va_list_type_node ++ : sysv_va_list_type_node); ++ } ++ else + { +- int nregs = ix86_function_regparm (funtype, fundecl); +- if (nregs == 0) +- return GET_MODE_SIZE (Pmode); ++ /* For i386 we use plain pointer to argument area. */ ++ return build_pointer_type (char_type_node); + } +- +- return 0; + } + +-/* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */ ++/* Worker function for TARGET_SETUP_INCOMING_VARARGS. */ + +-static bool +-ix86_legitimate_combined_insn (rtx_insn *insn) ++static void ++setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum) + { +- int i; ++ rtx save_area, mem; ++ alias_set_type set; ++ int i, max; + +- /* Check operand constraints in case hard registers were propagated +- into insn pattern. This check prevents combine pass from +- generating insn patterns with invalid hard register operands. +- These invalid insns can eventually confuse reload to error out +- with a spill failure. See also PRs 46829 and 46843. */ ++ /* GPR size of varargs save area. */ ++ if (cfun->va_list_gpr_size) ++ ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD; ++ else ++ ix86_varargs_gpr_size = 0; + +- gcc_assert (INSN_CODE (insn) >= 0); ++ /* FPR size of varargs save area. We don't need it if we don't pass ++ anything in SSE registers. */ ++ if (TARGET_SSE && cfun->va_list_fpr_size) ++ ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16; ++ else ++ ix86_varargs_fpr_size = 0; + +- extract_insn (insn); +- preprocess_constraints (insn); ++ if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size) ++ return; + +- int n_operands = recog_data.n_operands; +- int n_alternatives = recog_data.n_alternatives; +- for (i = 0; i < n_operands; i++) ++ save_area = frame_pointer_rtx; ++ set = get_varargs_alias_set (); ++ ++ max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD; ++ if (max > X86_64_REGPARM_MAX) ++ max = X86_64_REGPARM_MAX; ++ ++ for (i = cum->regno; i < max; i++) + { +- rtx op = recog_data.operand[i]; +- machine_mode mode = GET_MODE (op); +- const operand_alternative *op_alt; +- int offset = 0; +- bool win; +- int j; ++ mem = gen_rtx_MEM (word_mode, ++ plus_constant (Pmode, save_area, i * UNITS_PER_WORD)); ++ MEM_NOTRAP_P (mem) = 1; ++ set_mem_alias_set (mem, set); ++ emit_move_insn (mem, ++ gen_rtx_REG (word_mode, ++ x86_64_int_parameter_registers[i])); ++ } + +- /* A unary operator may be accepted by the predicate, but it +- is irrelevant for matching constraints. */ +- if (UNARY_P (op)) +- op = XEXP (op, 0); ++ if (ix86_varargs_fpr_size) ++ { ++ machine_mode smode; ++ rtx_code_label *label; ++ rtx test; + +- if (SUBREG_P (op)) +- { +- if (REG_P (SUBREG_REG (op)) +- && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER) +- offset = subreg_regno_offset (REGNO (SUBREG_REG (op)), +- GET_MODE (SUBREG_REG (op)), +- SUBREG_BYTE (op), +- GET_MODE (op)); +- op = SUBREG_REG (op); +- } ++ /* Now emit code to save SSE registers. The AX parameter contains number ++ of SSE parameter registers used to call this function, though all we ++ actually check here is the zero/non-zero status. */ + +- if (!(REG_P (op) && HARD_REGISTER_P (op))) +- continue; ++ label = gen_label_rtx (); ++ test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx); ++ emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1), ++ label)); + +- op_alt = recog_op_alt; ++ /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if ++ we used movdqa (i.e. TImode) instead? Perhaps even better would ++ be if we could determine the real mode of the data, via a hook ++ into pass_stdarg. Ignore all that for now. */ ++ smode = V4SFmode; ++ if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode)) ++ crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode); + +- /* Operand has no constraints, anything is OK. */ +- win = !n_alternatives; ++ max = cum->sse_regno + cfun->va_list_fpr_size / 16; ++ if (max > X86_64_SSE_REGPARM_MAX) ++ max = X86_64_SSE_REGPARM_MAX; + +- alternative_mask preferred = get_preferred_alternatives (insn); +- for (j = 0; j < n_alternatives; j++, op_alt += n_operands) ++ for (i = cum->sse_regno; i < max; ++i) + { +- if (!TEST_BIT (preferred, j)) +- continue; +- if (op_alt[i].anything_ok +- || (op_alt[i].matches != -1 +- && operands_match_p +- (recog_data.operand[i], +- recog_data.operand[op_alt[i].matches])) +- || reg_fits_class_p (op, op_alt[i].cl, offset, mode)) +- { +- win = true; +- break; +- } ++ mem = plus_constant (Pmode, save_area, ++ i * 16 + ix86_varargs_gpr_size); ++ mem = gen_rtx_MEM (smode, mem); ++ MEM_NOTRAP_P (mem) = 1; ++ set_mem_alias_set (mem, set); ++ set_mem_align (mem, GET_MODE_ALIGNMENT (smode)); ++ ++ emit_move_insn (mem, gen_rtx_REG (smode, GET_SSE_REGNO (i))); + } + +- if (!win) +- return false; ++ emit_label (label); + } +- +- return true; +-} +- +-/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */ +- +-static unsigned HOST_WIDE_INT +-ix86_asan_shadow_offset (void) +-{ +- return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44) +- : HOST_WIDE_INT_C (0x7fff8000)) +- : (HOST_WIDE_INT_1 << 29); + } +- +-/* Argument support functions. */ + +-/* Return true when register may be used to pass function parameters. */ +-bool +-ix86_function_arg_regno_p (int regno) ++static void ++setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum) + { ++ alias_set_type set = get_varargs_alias_set (); + int i; +- enum calling_abi call_abi; +- const int *parm_regs; + +- if (!TARGET_64BIT) +- { +- if (TARGET_MACHO) +- return (regno < REGPARM_MAX +- || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno])); +- else +- return (regno < REGPARM_MAX +- || (TARGET_MMX && MMX_REGNO_P (regno) +- && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX)) +- || (TARGET_SSE && SSE_REGNO_P (regno) +- && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))); +- } ++ /* Reset to zero, as there might be a sysv vaarg used ++ before. */ ++ ix86_varargs_gpr_size = 0; ++ ix86_varargs_fpr_size = 0; + +- if (TARGET_SSE && SSE_REGNO_P (regno) +- && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)) +- return true; +- +- /* TODO: The function should depend on current function ABI but +- builtins.c would need updating then. Therefore we use the +- default ABI. */ +- call_abi = ix86_cfun_abi (); +- +- /* RAX is used as hidden argument to va_arg functions. */ +- if (call_abi == SYSV_ABI && regno == AX_REG) +- return true; +- +- if (call_abi == MS_ABI) +- parm_regs = x86_64_ms_abi_int_parameter_registers; +- else +- parm_regs = x86_64_int_parameter_registers; +- +- for (i = 0; i < (call_abi == MS_ABI +- ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++) +- if (regno == parm_regs[i]) +- return true; +- return false; +-} +- +-/* Return if we do not know how to pass TYPE solely in registers. */ +- +-static bool +-ix86_must_pass_in_stack (machine_mode mode, const_tree type) +-{ +- if (must_pass_in_stack_var_size_or_pad (mode, type)) +- return true; +- +- /* For 32-bit, we want TImode aggregates to go on the stack. But watch out! +- The layout_type routine is crafty and tries to trick us into passing +- currently unsupported vector types on the stack by using TImode. */ +- return (!TARGET_64BIT && mode == TImode +- && type && TREE_CODE (type) != VECTOR_TYPE); +-} ++ for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++) ++ { ++ rtx reg, mem; + +-/* It returns the size, in bytes, of the area reserved for arguments passed +- in registers for the function represented by fndecl dependent to the used +- abi format. */ +-int +-ix86_reg_parm_stack_space (const_tree fndecl) +-{ +- enum calling_abi call_abi = SYSV_ABI; +- if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL) +- call_abi = ix86_function_abi (fndecl); +- else +- call_abi = ix86_function_type_abi (fndecl); +- if (TARGET_64BIT && call_abi == MS_ABI) +- return 32; +- return 0; +-} ++ mem = gen_rtx_MEM (Pmode, ++ plus_constant (Pmode, virtual_incoming_args_rtx, ++ i * UNITS_PER_WORD)); ++ MEM_NOTRAP_P (mem) = 1; ++ set_mem_alias_set (mem, set); + +-/* We add this as a workaround in order to use libc_has_function +- hook in i386.md. */ +-bool +-ix86_libc_has_function (enum function_class fn_class) +-{ +- return targetm.libc_has_function (fn_class); ++ reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]); ++ emit_move_insn (mem, reg); ++ } + } + +-/* Returns value SYSV_ABI, MS_ABI dependent on fntype, +- specifying the call abi used. */ +-enum calling_abi +-ix86_function_type_abi (const_tree fntype) ++static void ++ix86_setup_incoming_varargs (cumulative_args_t cum_v, ++ const function_arg_info &arg, ++ int *, int no_rtl) + { +- enum calling_abi abi = ix86_abi; ++ CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); ++ CUMULATIVE_ARGS next_cum; ++ tree fntype; + +- if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE) +- return abi; ++ /* This argument doesn't appear to be used anymore. Which is good, ++ because the old code here didn't suppress rtl generation. */ ++ gcc_assert (!no_rtl); + +- if (abi == SYSV_ABI +- && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype))) +- { +- static int warned; +- if (TARGET_X32 && !warned) +- { +- error ("X32 does not support ms_abi attribute"); +- warned = 1; +- } ++ if (!TARGET_64BIT) ++ return; + +- abi = MS_ABI; +- } +- else if (abi == MS_ABI +- && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype))) +- abi = SYSV_ABI; ++ fntype = TREE_TYPE (current_function_decl); + +- return abi; +-} ++ /* For varargs, we do not want to skip the dummy va_dcl argument. ++ For stdargs, we do want to skip the last named argument. */ ++ next_cum = *cum; ++ if (stdarg_p (fntype)) ++ ix86_function_arg_advance (pack_cumulative_args (&next_cum), arg); + +-static enum calling_abi +-ix86_function_abi (const_tree fndecl) +-{ +- return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi; ++ if (cum->call_abi == MS_ABI) ++ setup_incoming_varargs_ms_64 (&next_cum); ++ else ++ setup_incoming_varargs_64 (&next_cum); + } + +-/* Returns value SYSV_ABI, MS_ABI dependent on cfun, +- specifying the call abi used. */ +-enum calling_abi +-ix86_cfun_abi (void) +-{ +- return cfun ? cfun->machine->call_abi : ix86_abi; +-} ++/* Checks if TYPE is of kind va_list char *. */ + + static bool +-ix86_function_ms_hook_prologue (const_tree fn) ++is_va_list_char_pointer (tree type) + { +- if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn))) +- { +- if (decl_function_context (fn) != NULL_TREE) +- error_at (DECL_SOURCE_LOCATION (fn), +- "ms_hook_prologue is not compatible with nested function"); +- else +- return true; +- } +- return false; +-} ++ tree canonic; + +-static bool +-ix86_function_naked (const_tree fn) +-{ +- if (fn && lookup_attribute ("naked", DECL_ATTRIBUTES (fn))) ++ /* For 32-bit it is always true. */ ++ if (!TARGET_64BIT) + return true; +- +- return false; ++ canonic = ix86_canonical_va_list_type (type); ++ return (canonic == ms_va_list_type_node ++ || (ix86_abi == MS_ABI && canonic == va_list_type_node)); + } + +-/* Write the extra assembler code needed to declare a function properly. */ ++/* Implement va_start. */ + +-void +-ix86_asm_output_function_label (FILE *asm_out_file, const char *fname, +- tree decl) ++static void ++ix86_va_start (tree valist, rtx nextarg) + { +- bool is_ms_hook = ix86_function_ms_hook_prologue (decl); ++ HOST_WIDE_INT words, n_gpr, n_fpr; ++ tree f_gpr, f_fpr, f_ovf, f_sav; ++ tree gpr, fpr, ovf, sav, t; ++ tree type; ++ rtx ovf_rtx; + +- if (is_ms_hook) ++ if (flag_split_stack ++ && cfun->machine->split_stack_varargs_pointer == NULL_RTX) + { +- int i, filler_count = (TARGET_64BIT ? 32 : 16); +- unsigned int filler_cc = 0xcccccccc; ++ unsigned int scratch_regno; + +- for (i = 0; i < filler_count; i += 4) +- fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc); +- } ++ /* When we are splitting the stack, we can't refer to the stack ++ arguments using internal_arg_pointer, because they may be on ++ the old stack. The split stack prologue will arrange to ++ leave a pointer to the old stack arguments in a scratch ++ register, which we here copy to a pseudo-register. The split ++ stack prologue can't set the pseudo-register directly because ++ it (the prologue) runs before any registers have been saved. */ + +-#ifdef SUBTARGET_ASM_UNWIND_INIT +- SUBTARGET_ASM_UNWIND_INIT (asm_out_file); +-#endif ++ scratch_regno = split_stack_prologue_scratch_regno (); ++ if (scratch_regno != INVALID_REGNUM) ++ { ++ rtx reg; ++ rtx_insn *seq; + +- ASM_OUTPUT_LABEL (asm_out_file, fname); ++ reg = gen_reg_rtx (Pmode); ++ cfun->machine->split_stack_varargs_pointer = reg; + +- /* Output magic byte marker, if hot-patch attribute is set. */ +- if (is_ms_hook) +- { +- if (TARGET_64BIT) +- { +- /* leaq [%rsp + 0], %rsp */ +- fputs (ASM_BYTE "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n", +- asm_out_file); ++ start_sequence (); ++ emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno)); ++ seq = get_insns (); ++ end_sequence (); ++ ++ push_topmost_sequence (); ++ emit_insn_after (seq, entry_of_function ()); ++ pop_topmost_sequence (); + } ++ } ++ ++ /* Only 64bit target needs something special. */ ++ if (is_va_list_char_pointer (TREE_TYPE (valist))) ++ { ++ if (cfun->machine->split_stack_varargs_pointer == NULL_RTX) ++ std_expand_builtin_va_start (valist, nextarg); + else + { +- /* movl.s %edi, %edi +- push %ebp +- movl.s %esp, %ebp */ +- fputs (ASM_BYTE "0x8b, 0xff, 0x55, 0x8b, 0xec\n", asm_out_file); ++ rtx va_r, next; ++ ++ va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE); ++ next = expand_binop (ptr_mode, add_optab, ++ cfun->machine->split_stack_varargs_pointer, ++ crtl->args.arg_offset_rtx, ++ NULL_RTX, 0, OPTAB_LIB_WIDEN); ++ convert_move (va_r, next, 0); + } ++ return; + } +-} + +-/* Implementation of call abi switching target hook. Specific to FNDECL +- the specific call register sets are set. See also +- ix86_conditional_register_usage for more details. */ +-void +-ix86_call_abi_override (const_tree fndecl) +-{ +- cfun->machine->call_abi = ix86_function_abi (fndecl); +-} ++ f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node)); ++ f_fpr = DECL_CHAIN (f_gpr); ++ f_ovf = DECL_CHAIN (f_fpr); ++ f_sav = DECL_CHAIN (f_ovf); + +-/* Return 1 if pseudo register should be created and used to hold +- GOT address for PIC code. */ +-bool +-ix86_use_pseudo_pic_reg (void) +-{ +- if ((TARGET_64BIT +- && (ix86_cmodel == CM_SMALL_PIC +- || TARGET_PECOFF)) +- || !flag_pic) +- return false; +- return true; +-} ++ valist = build_simple_mem_ref (valist); ++ TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node); ++ /* The following should be folded into the MEM_REF offset. */ ++ gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist), ++ f_gpr, NULL_TREE); ++ fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist), ++ f_fpr, NULL_TREE); ++ ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist), ++ f_ovf, NULL_TREE); ++ sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist), ++ f_sav, NULL_TREE); + +-/* Initialize large model PIC register. */ ++ /* Count number of gp and fp argument registers used. */ ++ words = crtl->args.info.words; ++ n_gpr = crtl->args.info.regno; ++ n_fpr = crtl->args.info.sse_regno; + +-static void +-ix86_init_large_pic_reg (unsigned int tmp_regno) +-{ +- rtx_code_label *label; +- rtx tmp_reg; +- +- gcc_assert (Pmode == DImode); +- label = gen_label_rtx (); +- emit_label (label); +- LABEL_PRESERVE_P (label) = 1; +- tmp_reg = gen_rtx_REG (Pmode, tmp_regno); +- gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno); +- emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, +- label)); +- emit_insn (gen_set_got_offset_rex64 (tmp_reg, label)); +- emit_insn (ix86_gen_add3 (pic_offset_table_rtx, +- pic_offset_table_rtx, tmp_reg)); +- const char *name = LABEL_NAME (label); +- PUT_CODE (label, NOTE); +- NOTE_KIND (label) = NOTE_INSN_DELETED_LABEL; +- NOTE_DELETED_LABEL_NAME (label) = name; +-} +- +-/* Create and initialize PIC register if required. */ +-static void +-ix86_init_pic_reg (void) +-{ +- edge entry_edge; +- rtx_insn *seq; +- +- if (!ix86_use_pseudo_pic_reg ()) +- return; +- +- start_sequence (); +- +- if (TARGET_64BIT) ++ if (cfun->va_list_gpr_size) + { +- if (ix86_cmodel == CM_LARGE_PIC) +- ix86_init_large_pic_reg (R11_REG); +- else +- emit_insn (gen_set_got_rex64 (pic_offset_table_rtx)); ++ type = TREE_TYPE (gpr); ++ t = build2 (MODIFY_EXPR, type, ++ gpr, build_int_cst (type, n_gpr * 8)); ++ TREE_SIDE_EFFECTS (t) = 1; ++ expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); + } +- else ++ ++ if (TARGET_SSE && cfun->va_list_fpr_size) + { +- /* If there is future mcount call in the function it is more profitable +- to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */ +- rtx reg = crtl->profile +- ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM) +- : pic_offset_table_rtx; +- rtx_insn *insn = emit_insn (gen_set_got (reg)); +- RTX_FRAME_RELATED_P (insn) = 1; +- if (crtl->profile) +- emit_move_insn (pic_offset_table_rtx, reg); +- add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX); ++ type = TREE_TYPE (fpr); ++ t = build2 (MODIFY_EXPR, type, fpr, ++ build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX)); ++ TREE_SIDE_EFFECTS (t) = 1; ++ expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); + } + +- seq = get_insns (); +- end_sequence (); ++ /* Find the overflow area. */ ++ type = TREE_TYPE (ovf); ++ if (cfun->machine->split_stack_varargs_pointer == NULL_RTX) ++ ovf_rtx = crtl->args.internal_arg_pointer; ++ else ++ ovf_rtx = cfun->machine->split_stack_varargs_pointer; ++ t = make_tree (type, ovf_rtx); ++ if (words != 0) ++ t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD); + +- entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)); +- insert_insn_on_edge (seq, entry_edge); +- commit_one_edge_insertion (entry_edge); ++ t = build2 (MODIFY_EXPR, type, ovf, t); ++ TREE_SIDE_EFFECTS (t) = 1; ++ expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); ++ ++ if (ix86_varargs_gpr_size || ix86_varargs_fpr_size) ++ { ++ /* Find the register save area. ++ Prologue of the function save it right above stack frame. */ ++ type = TREE_TYPE (sav); ++ t = make_tree (type, frame_pointer_rtx); ++ if (!ix86_varargs_gpr_size) ++ t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX); ++ ++ t = build2 (MODIFY_EXPR, type, sav, t); ++ TREE_SIDE_EFFECTS (t) = 1; ++ expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); ++ } + } + +-/* Initialize a variable CUM of type CUMULATIVE_ARGS +- for a call to a function whose data type is FNTYPE. +- For a library call, FNTYPE is 0. */ ++/* Implement va_arg. */ + +-void +-init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */ +- tree fntype, /* tree ptr for function decl */ +- rtx libname, /* SYMBOL_REF of library name or 0 */ +- tree fndecl, +- int caller) ++static tree ++ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, ++ gimple_seq *post_p) + { +- struct cgraph_local_info *i = NULL; +- struct cgraph_node *target = NULL; ++ static const int intreg[6] = { 0, 1, 2, 3, 4, 5 }; ++ tree f_gpr, f_fpr, f_ovf, f_sav; ++ tree gpr, fpr, ovf, sav, t; ++ int size, rsize; ++ tree lab_false, lab_over = NULL_TREE; ++ tree addr, t2; ++ rtx container; ++ int indirect_p = 0; ++ tree ptrtype; ++ machine_mode nat_mode; ++ unsigned int arg_boundary; ++ unsigned int type_align; + +- memset (cum, 0, sizeof (*cum)); ++ /* Only 64bit target needs something special. */ ++ if (is_va_list_char_pointer (TREE_TYPE (valist))) ++ return std_gimplify_va_arg_expr (valist, type, pre_p, post_p); + +- if (fndecl) ++ f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node)); ++ f_fpr = DECL_CHAIN (f_gpr); ++ f_ovf = DECL_CHAIN (f_fpr); ++ f_sav = DECL_CHAIN (f_ovf); ++ ++ gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), ++ valist, f_gpr, NULL_TREE); ++ ++ fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE); ++ ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE); ++ sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE); ++ ++ indirect_p = pass_va_arg_by_reference (type); ++ if (indirect_p) ++ type = build_pointer_type (type); ++ size = arg_int_size_in_bytes (type); ++ rsize = CEIL (size, UNITS_PER_WORD); ++ ++ nat_mode = type_natural_mode (type, NULL, false); ++ switch (nat_mode) + { +- target = cgraph_node::get (fndecl); +- if (target) ++ case E_V8SFmode: ++ case E_V8SImode: ++ case E_V32QImode: ++ case E_V16HImode: ++ case E_V4DFmode: ++ case E_V4DImode: ++ case E_V16SFmode: ++ case E_V16SImode: ++ case E_V64QImode: ++ case E_V32HImode: ++ case E_V8DFmode: ++ case E_V8DImode: ++ /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */ ++ if (!TARGET_64BIT_MS_ABI) + { +- target = target->function_symbol (); +- i = cgraph_node::local_info (target->decl); +- cum->call_abi = ix86_function_abi (target->decl); ++ container = NULL; ++ break; + } +- else +- cum->call_abi = ix86_function_abi (fndecl); ++ /* FALLTHRU */ ++ ++ default: ++ container = construct_container (nat_mode, TYPE_MODE (type), ++ type, 0, X86_64_REGPARM_MAX, ++ X86_64_SSE_REGPARM_MAX, intreg, ++ 0); ++ break; + } +- else +- cum->call_abi = ix86_function_type_abi (fntype); + +- cum->caller = caller; ++ /* Pull the value out of the saved registers. */ + +- /* Set up the number of registers to use for passing arguments. */ +- cum->nregs = ix86_regparm; +- if (TARGET_64BIT) +- { +- cum->nregs = (cum->call_abi == SYSV_ABI +- ? X86_64_REGPARM_MAX +- : X86_64_MS_REGPARM_MAX); +- } +- if (TARGET_SSE) ++ addr = create_tmp_var (ptr_type_node, "addr"); ++ type_align = TYPE_ALIGN (type); ++ ++ if (container) + { +- cum->sse_nregs = SSE_REGPARM_MAX; +- if (TARGET_64BIT) +- { +- cum->sse_nregs = (cum->call_abi == SYSV_ABI +- ? X86_64_SSE_REGPARM_MAX +- : X86_64_MS_SSE_REGPARM_MAX); +- } +- } +- if (TARGET_MMX) +- cum->mmx_nregs = MMX_REGPARM_MAX; +- cum->warn_avx512f = true; +- cum->warn_avx = true; +- cum->warn_sse = true; +- cum->warn_mmx = true; ++ int needed_intregs, needed_sseregs; ++ bool need_temp; ++ tree int_addr, sse_addr; + +- /* Because type might mismatch in between caller and callee, we need to +- use actual type of function for local calls. +- FIXME: cgraph_analyze can be told to actually record if function uses +- va_start so for local functions maybe_vaarg can be made aggressive +- helping K&R code. +- FIXME: once typesytem is fixed, we won't need this code anymore. */ +- if (i && i->local && i->can_change_signature) +- fntype = TREE_TYPE (target->decl); +- cum->stdarg = stdarg_p (fntype); +- cum->maybe_vaarg = (fntype +- ? (!prototype_p (fntype) || stdarg_p (fntype)) +- : !libname); ++ lab_false = create_artificial_label (UNKNOWN_LOCATION); ++ lab_over = create_artificial_label (UNKNOWN_LOCATION); + +- cum->decl = fndecl; ++ examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs); + +- cum->warn_empty = !warn_abi || cum->stdarg; +- if (!cum->warn_empty && fntype) +- { +- function_args_iterator iter; +- tree argtype; +- bool seen_empty_type = false; +- FOREACH_FUNCTION_ARGS (fntype, argtype, iter) ++ need_temp = (!REG_P (container) ++ && ((needed_intregs && TYPE_ALIGN (type) > 64) ++ || TYPE_ALIGN (type) > 128)); ++ ++ /* In case we are passing structure, verify that it is consecutive block ++ on the register save area. If not we need to do moves. */ ++ if (!need_temp && !REG_P (container)) + { +- if (argtype == error_mark_node || VOID_TYPE_P (argtype)) +- break; +- if (TYPE_EMPTY_P (argtype)) +- seen_empty_type = true; +- else if (seen_empty_type) ++ /* Verify that all registers are strictly consecutive */ ++ if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0)))) + { +- cum->warn_empty = true; +- break; +- } +- } +- } ++ int i; + +- if (!TARGET_64BIT) +- { +- /* If there are variable arguments, then we won't pass anything +- in registers in 32-bit mode. */ +- if (stdarg_p (fntype)) ++ for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) ++ { ++ rtx slot = XVECEXP (container, 0, i); ++ if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i ++ || INTVAL (XEXP (slot, 1)) != i * 16) ++ need_temp = true; ++ } ++ } ++ else ++ { ++ int i; ++ ++ for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) ++ { ++ rtx slot = XVECEXP (container, 0, i); ++ if (REGNO (XEXP (slot, 0)) != (unsigned int) i ++ || INTVAL (XEXP (slot, 1)) != i * 8) ++ need_temp = true; ++ } ++ } ++ } ++ if (!need_temp) + { +- cum->nregs = 0; +- /* Since in 32-bit, variable arguments are always passed on +- stack, there is scratch register available for indirect +- sibcall. */ +- cfun->machine->arg_reg_available = true; +- cum->sse_nregs = 0; +- cum->mmx_nregs = 0; +- cum->warn_avx512f = false; +- cum->warn_avx = false; +- cum->warn_sse = false; +- cum->warn_mmx = false; +- return; ++ int_addr = addr; ++ sse_addr = addr; ++ } ++ else ++ { ++ int_addr = create_tmp_var (ptr_type_node, "int_addr"); ++ sse_addr = create_tmp_var (ptr_type_node, "sse_addr"); + } + +- /* Use ecx and edx registers if function has fastcall attribute, +- else look for regparm information. */ +- if (fntype) ++ /* First ensure that we fit completely in registers. */ ++ if (needed_intregs) + { +- unsigned int ccvt = ix86_get_callcvt (fntype); +- if ((ccvt & IX86_CALLCVT_THISCALL) != 0) +- { +- cum->nregs = 1; +- cum->fastcall = 1; /* Same first register as in fastcall. */ +- } +- else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) +- { +- cum->nregs = 2; +- cum->fastcall = 1; +- } +- else +- cum->nregs = ix86_function_regparm (fntype, fndecl); ++ t = build_int_cst (TREE_TYPE (gpr), ++ (X86_64_REGPARM_MAX - needed_intregs + 1) * 8); ++ t = build2 (GE_EXPR, boolean_type_node, gpr, t); ++ t2 = build1 (GOTO_EXPR, void_type_node, lab_false); ++ t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE); ++ gimplify_and_add (t, pre_p); ++ } ++ if (needed_sseregs) ++ { ++ t = build_int_cst (TREE_TYPE (fpr), ++ (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16 ++ + X86_64_REGPARM_MAX * 8); ++ t = build2 (GE_EXPR, boolean_type_node, fpr, t); ++ t2 = build1 (GOTO_EXPR, void_type_node, lab_false); ++ t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE); ++ gimplify_and_add (t, pre_p); + } + +- /* Set up the number of SSE registers used for passing SFmode +- and DFmode arguments. Warn for mismatching ABI. */ +- cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true); +- } ++ /* Compute index to start of area used for integer regs. */ ++ if (needed_intregs) ++ { ++ /* int_addr = gpr + sav; */ ++ t = fold_build_pointer_plus (sav, gpr); ++ gimplify_assign (int_addr, t, pre_p); ++ } ++ if (needed_sseregs) ++ { ++ /* sse_addr = fpr + sav; */ ++ t = fold_build_pointer_plus (sav, fpr); ++ gimplify_assign (sse_addr, t, pre_p); ++ } ++ if (need_temp) ++ { ++ int i, prev_size = 0; ++ tree temp = create_tmp_var (type, "va_arg_tmp"); + +- cfun->machine->arg_reg_available = (cum->nregs > 0); +-} ++ /* addr = &temp; */ ++ t = build1 (ADDR_EXPR, build_pointer_type (type), temp); ++ gimplify_assign (addr, t, pre_p); + +-/* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE. +- But in the case of vector types, it is some vector mode. ++ for (i = 0; i < XVECLEN (container, 0); i++) ++ { ++ rtx slot = XVECEXP (container, 0, i); ++ rtx reg = XEXP (slot, 0); ++ machine_mode mode = GET_MODE (reg); ++ tree piece_type; ++ tree addr_type; ++ tree daddr_type; ++ tree src_addr, src; ++ int src_offset; ++ tree dest_addr, dest; ++ int cur_size = GET_MODE_SIZE (mode); + +- When we have only some of our vector isa extensions enabled, then there +- are some modes for which vector_mode_supported_p is false. For these +- modes, the generic vector support in gcc will choose some non-vector mode +- in order to implement the type. By computing the natural mode, we'll +- select the proper ABI location for the operand and not depend on whatever +- the middle-end decides to do with these vector types. ++ gcc_assert (prev_size <= INTVAL (XEXP (slot, 1))); ++ prev_size = INTVAL (XEXP (slot, 1)); ++ if (prev_size + cur_size > size) ++ { ++ cur_size = size - prev_size; ++ unsigned int nbits = cur_size * BITS_PER_UNIT; ++ if (!int_mode_for_size (nbits, 1).exists (&mode)) ++ mode = QImode; ++ } ++ piece_type = lang_hooks.types.type_for_mode (mode, 1); ++ if (mode == GET_MODE (reg)) ++ addr_type = build_pointer_type (piece_type); ++ else ++ addr_type = build_pointer_type_for_mode (piece_type, ptr_mode, ++ true); ++ daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode, ++ true); + +- The midde-end can't deal with the vector types > 16 bytes. In this +- case, we return the original mode and warn ABI change if CUM isn't +- NULL. ++ if (SSE_REGNO_P (REGNO (reg))) ++ { ++ src_addr = sse_addr; ++ src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16; ++ } ++ else ++ { ++ src_addr = int_addr; ++ src_offset = REGNO (reg) * 8; ++ } ++ src_addr = fold_convert (addr_type, src_addr); ++ src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset); + +- If INT_RETURN is true, warn ABI change if the vector mode isn't +- available for function return value. */ ++ dest_addr = fold_convert (daddr_type, addr); ++ dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size); ++ if (cur_size == GET_MODE_SIZE (mode)) ++ { ++ src = build_va_arg_indirect_ref (src_addr); ++ dest = build_va_arg_indirect_ref (dest_addr); + +-static machine_mode +-type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum, +- bool in_return) +-{ +- machine_mode mode = TYPE_MODE (type); ++ gimplify_assign (dest, src, pre_p); ++ } ++ else ++ { ++ tree copy ++ = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY), ++ 3, dest_addr, src_addr, ++ size_int (cur_size)); ++ gimplify_and_add (copy, pre_p); ++ } ++ prev_size += cur_size; ++ } ++ } + +- if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode)) +- { +- HOST_WIDE_INT size = int_size_in_bytes (type); +- if ((size == 8 || size == 16 || size == 32 || size == 64) +- /* ??? Generic code allows us to create width 1 vectors. Ignore. */ +- && TYPE_VECTOR_SUBPARTS (type) > 1) ++ if (needed_intregs) + { +- machine_mode innermode = TYPE_MODE (TREE_TYPE (type)); ++ t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr, ++ build_int_cst (TREE_TYPE (gpr), needed_intregs * 8)); ++ gimplify_assign (gpr, t, pre_p); ++ /* The GPR save area guarantees only 8-byte alignment. */ ++ if (!need_temp) ++ type_align = MIN (type_align, 64); ++ } + +- /* There are no XFmode vector modes. */ +- if (innermode == XFmode) +- return mode; ++ if (needed_sseregs) ++ { ++ t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr, ++ build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16)); ++ gimplify_assign (unshare_expr (fpr), t, pre_p); ++ } + +- if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE) +- mode = MIN_MODE_VECTOR_FLOAT; +- else +- mode = MIN_MODE_VECTOR_INT; ++ gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over)); + +- /* Get the mode which has this inner mode and number of units. */ +- FOR_EACH_MODE_FROM (mode, mode) +- if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type) +- && GET_MODE_INNER (mode) == innermode) +- { +- if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU) +- { +- static bool warnedavx512f; +- static bool warnedavx512f_ret; ++ gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false)); ++ } + +- if (cum && cum->warn_avx512f && !warnedavx512f) +- { +- if (warning (OPT_Wpsabi, "AVX512F vector argument " +- "without AVX512F enabled changes the ABI")) +- warnedavx512f = true; +- } +- else if (in_return && !warnedavx512f_ret) +- { +- if (warning (OPT_Wpsabi, "AVX512F vector return " +- "without AVX512F enabled changes the ABI")) +- warnedavx512f_ret = true; +- } ++ /* ... otherwise out of the overflow area. */ + +- return TYPE_MODE (type); +- } +- else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU) +- { +- static bool warnedavx; +- static bool warnedavx_ret; ++ /* When we align parameter on stack for caller, if the parameter ++ alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be ++ aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee ++ here with caller. */ ++ arg_boundary = ix86_function_arg_boundary (VOIDmode, type); ++ if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT) ++ arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT; + +- if (cum && cum->warn_avx && !warnedavx) +- { +- if (warning (OPT_Wpsabi, "AVX vector argument " +- "without AVX enabled changes the ABI")) +- warnedavx = true; +- } +- else if (in_return && !warnedavx_ret) +- { +- if (warning (OPT_Wpsabi, "AVX vector return " +- "without AVX enabled changes the ABI")) +- warnedavx_ret = true; +- } ++ /* Care for on-stack alignment if needed. */ ++ if (arg_boundary <= 64 || size == 0) ++ t = ovf; ++ else ++ { ++ HOST_WIDE_INT align = arg_boundary / 8; ++ t = fold_build_pointer_plus_hwi (ovf, align - 1); ++ t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, ++ build_int_cst (TREE_TYPE (t), -align)); ++ } + +- return TYPE_MODE (type); +- } +- else if (((size == 8 && TARGET_64BIT) || size == 16) +- && !TARGET_SSE +- && !TARGET_IAMCU) +- { +- static bool warnedsse; +- static bool warnedsse_ret; ++ gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue); ++ gimplify_assign (addr, t, pre_p); + +- if (cum && cum->warn_sse && !warnedsse) +- { +- if (warning (OPT_Wpsabi, "SSE vector argument " +- "without SSE enabled changes the ABI")) +- warnedsse = true; +- } +- else if (!TARGET_64BIT && in_return && !warnedsse_ret) +- { +- if (warning (OPT_Wpsabi, "SSE vector return " +- "without SSE enabled changes the ABI")) +- warnedsse_ret = true; +- } +- } +- else if ((size == 8 && !TARGET_64BIT) +- && (!cfun +- || cfun->machine->func_type == TYPE_NORMAL) +- && !TARGET_MMX +- && !TARGET_IAMCU) +- { +- static bool warnedmmx; +- static bool warnedmmx_ret; ++ t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD); ++ gimplify_assign (unshare_expr (ovf), t, pre_p); + +- if (cum && cum->warn_mmx && !warnedmmx) +- { +- if (warning (OPT_Wpsabi, "MMX vector argument " +- "without MMX enabled changes the ABI")) +- warnedmmx = true; +- } +- else if (in_return && !warnedmmx_ret) +- { +- if (warning (OPT_Wpsabi, "MMX vector return " +- "without MMX enabled changes the ABI")) +- warnedmmx_ret = true; +- } +- } +- return mode; +- } ++ if (container) ++ gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over)); + +- gcc_unreachable (); +- } +- } ++ type = build_aligned_type (type, type_align); ++ ptrtype = build_pointer_type_for_mode (type, ptr_mode, true); ++ addr = fold_convert (ptrtype, addr); + +- return mode; ++ if (indirect_p) ++ addr = build_va_arg_indirect_ref (addr); ++ return build_va_arg_indirect_ref (addr); + } ++ ++/* Return true if OPNUM's MEM should be matched ++ in movabs* patterns. */ + +-/* We want to pass a value in REGNO whose "natural" mode is MODE. However, +- this may not agree with the mode that the type system has chosen for the +- register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can +- go ahead and use it. Otherwise we have to build a PARALLEL instead. */ +- +-static rtx +-gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode, +- unsigned int regno) ++bool ++ix86_check_movabs (rtx insn, int opnum) + { +- rtx tmp; ++ rtx set, mem; + +- if (orig_mode != BLKmode) +- tmp = gen_rtx_REG (orig_mode, regno); +- else ++ set = PATTERN (insn); ++ if (GET_CODE (set) == PARALLEL) ++ set = XVECEXP (set, 0, 0); ++ gcc_assert (GET_CODE (set) == SET); ++ mem = XEXP (set, opnum); ++ while (SUBREG_P (mem)) ++ mem = SUBREG_REG (mem); ++ gcc_assert (MEM_P (mem)); ++ return volatile_ok || !MEM_VOLATILE_P (mem); ++} ++ ++/* Return false if INSN contains a MEM with a non-default address space. */ ++bool ++ix86_check_no_addr_space (rtx insn) ++{ ++ subrtx_var_iterator::array_type array; ++ FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), ALL) + { +- tmp = gen_rtx_REG (mode, regno); +- tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx); +- tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp)); ++ rtx x = *iter; ++ if (MEM_P (x) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x))) ++ return false; + } +- +- return tmp; ++ return true; + } ++ ++/* Initialize the table of extra 80387 mathematical constants. */ + +-/* x86-64 register passing implementation. See x86-64 ABI for details. Goal +- of this code is to classify each 8bytes of incoming argument by the register +- class and assign registers accordingly. */ +- +-/* Return the union class of CLASS1 and CLASS2. +- See the x86-64 PS ABI for details. */ +- +-static enum x86_64_reg_class +-merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2) ++static void ++init_ext_80387_constants (void) + { +- /* Rule #1: If both classes are equal, this is the resulting class. */ +- if (class1 == class2) +- return class1; +- +- /* Rule #2: If one of the classes is NO_CLASS, the resulting class is +- the other class. */ +- if (class1 == X86_64_NO_CLASS) +- return class2; +- if (class2 == X86_64_NO_CLASS) +- return class1; +- +- /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */ +- if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS) +- return X86_64_MEMORY_CLASS; +- +- /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */ +- if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS) +- || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS)) +- return X86_64_INTEGERSI_CLASS; +- if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS +- || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS) +- return X86_64_INTEGER_CLASS; ++ static const char * cst[5] = ++ { ++ "0.3010299956639811952256464283594894482", /* 0: fldlg2 */ ++ "0.6931471805599453094286904741849753009", /* 1: fldln2 */ ++ "1.4426950408889634073876517827983434472", /* 2: fldl2e */ ++ "3.3219280948873623478083405569094566090", /* 3: fldl2t */ ++ "3.1415926535897932385128089594061862044", /* 4: fldpi */ ++ }; ++ int i; + +- /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class, +- MEMORY is used. */ +- if (class1 == X86_64_X87_CLASS +- || class1 == X86_64_X87UP_CLASS +- || class1 == X86_64_COMPLEX_X87_CLASS +- || class2 == X86_64_X87_CLASS +- || class2 == X86_64_X87UP_CLASS +- || class2 == X86_64_COMPLEX_X87_CLASS) +- return X86_64_MEMORY_CLASS; ++ for (i = 0; i < 5; i++) ++ { ++ real_from_string (&ext_80387_constants_table[i], cst[i]); ++ /* Ensure each constant is rounded to XFmode precision. */ ++ real_convert (&ext_80387_constants_table[i], ++ XFmode, &ext_80387_constants_table[i]); ++ } + +- /* Rule #6: Otherwise class SSE is used. */ +- return X86_64_SSE_CLASS; ++ ext_80387_constants_init = 1; + } + +-/* Classify the argument of type TYPE and mode MODE. +- CLASSES will be filled by the register class used to pass each word +- of the operand. The number of words is returned. In case the parameter +- should be passed in memory, 0 is returned. As a special case for zero +- sized containers, classes[0] will be NO_CLASS and 1 is returned. ++/* Return non-zero if the constant is something that ++ can be loaded with a special instruction. */ + +- BIT_OFFSET is used internally for handling records and specifies offset +- of the offset in bits modulo 512 to avoid overflow cases. ++int ++standard_80387_constant_p (rtx x) ++{ ++ machine_mode mode = GET_MODE (x); + +- See the x86-64 PS ABI for details. +-*/ ++ const REAL_VALUE_TYPE *r; + +-static int +-classify_argument (machine_mode mode, const_tree type, +- enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset) +-{ +- HOST_WIDE_INT bytes +- = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); +- int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD); ++ if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode))) ++ return -1; + +- /* Variable sized entities are always passed/returned in memory. */ +- if (bytes < 0) +- return 0; ++ if (x == CONST0_RTX (mode)) ++ return 1; ++ if (x == CONST1_RTX (mode)) ++ return 2; + +- if (mode != VOIDmode +- && targetm.calls.must_pass_in_stack (mode, type)) +- return 0; ++ r = CONST_DOUBLE_REAL_VALUE (x); + +- if (type && AGGREGATE_TYPE_P (type)) ++ /* For XFmode constants, try to find a special 80387 instruction when ++ optimizing for size or on those CPUs that benefit from them. */ ++ if (mode == XFmode ++ && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS)) + { + int i; +- tree field; +- enum x86_64_reg_class subclasses[MAX_CLASSES]; + +- /* On x86-64 we pass structures larger than 64 bytes on the stack. */ +- if (bytes > 64) +- return 0; ++ if (! ext_80387_constants_init) ++ init_ext_80387_constants (); + +- for (i = 0; i < words; i++) +- classes[i] = X86_64_NO_CLASS; ++ for (i = 0; i < 5; i++) ++ if (real_identical (r, &ext_80387_constants_table[i])) ++ return i + 3; ++ } + +- /* Zero sized arrays or structures are NO_CLASS. We return 0 to +- signalize memory class, so handle it as special case. */ +- if (!words) +- { +- classes[0] = X86_64_NO_CLASS; +- return 1; +- } ++ /* Load of the constant -0.0 or -1.0 will be split as ++ fldz;fchs or fld1;fchs sequence. */ ++ if (real_isnegzero (r)) ++ return 8; ++ if (real_identical (r, &dconstm1)) ++ return 9; + +- /* Classify each field of record and merge classes. */ +- switch (TREE_CODE (type)) +- { +- case RECORD_TYPE: +- /* And now merge the fields of structure. */ +- for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) +- { +- if (TREE_CODE (field) == FIELD_DECL) +- { +- int num; ++ return 0; ++} + +- if (TREE_TYPE (field) == error_mark_node) +- continue; ++/* Return the opcode of the special instruction to be used to load ++ the constant X. */ + +- /* Bitfields are always classified as integer. Handle them +- early, since later code would consider them to be +- misaligned integers. */ +- if (DECL_BIT_FIELD (field)) +- { +- for (i = (int_bit_position (field) +- + (bit_offset % 64)) / 8 / 8; +- i < ((int_bit_position (field) + (bit_offset % 64)) +- + tree_to_shwi (DECL_SIZE (field)) +- + 63) / 8 / 8; i++) +- classes[i] +- = merge_classes (X86_64_INTEGER_CLASS, classes[i]); +- } +- else +- { +- int pos; ++const char * ++standard_80387_constant_opcode (rtx x) ++{ ++ switch (standard_80387_constant_p (x)) ++ { ++ case 1: ++ return "fldz"; ++ case 2: ++ return "fld1"; ++ case 3: ++ return "fldlg2"; ++ case 4: ++ return "fldln2"; ++ case 5: ++ return "fldl2e"; ++ case 6: ++ return "fldl2t"; ++ case 7: ++ return "fldpi"; ++ case 8: ++ case 9: ++ return "#"; ++ default: ++ gcc_unreachable (); ++ } ++} + +- type = TREE_TYPE (field); ++/* Return the CONST_DOUBLE representing the 80387 constant that is ++ loaded by the specified special instruction. The argument IDX ++ matches the return value from standard_80387_constant_p. */ + +- /* Flexible array member is ignored. */ +- if (TYPE_MODE (type) == BLKmode +- && TREE_CODE (type) == ARRAY_TYPE +- && TYPE_SIZE (type) == NULL_TREE +- && TYPE_DOMAIN (type) != NULL_TREE +- && (TYPE_MAX_VALUE (TYPE_DOMAIN (type)) +- == NULL_TREE)) +- { +- static bool warned; ++rtx ++standard_80387_constant_rtx (int idx) ++{ ++ int i; + +- if (!warned && warn_psabi) +- { +- warned = true; +- inform (input_location, +- "the ABI of passing struct with" +- " a flexible array member has" +- " changed in GCC 4.4"); +- } +- continue; +- } +- num = classify_argument (TYPE_MODE (type), type, +- subclasses, +- (int_bit_position (field) +- + bit_offset) % 512); +- if (!num) +- return 0; +- pos = (int_bit_position (field) +- + (bit_offset % 64)) / 8 / 8; +- for (i = 0; i < num && (i + pos) < words; i++) +- classes[i + pos] +- = merge_classes (subclasses[i], classes[i + pos]); +- } +- } +- } +- break; ++ if (! ext_80387_constants_init) ++ init_ext_80387_constants (); + +- case ARRAY_TYPE: +- /* Arrays are handled as small records. */ +- { +- int num; +- num = classify_argument (TYPE_MODE (TREE_TYPE (type)), +- TREE_TYPE (type), subclasses, bit_offset); +- if (!num) +- return 0; ++ switch (idx) ++ { ++ case 3: ++ case 4: ++ case 5: ++ case 6: ++ case 7: ++ i = idx - 3; ++ break; + +- /* The partial classes are now full classes. */ +- if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4) +- subclasses[0] = X86_64_SSE_CLASS; +- if (subclasses[0] == X86_64_INTEGERSI_CLASS +- && !((bit_offset % 64) == 0 && bytes == 4)) +- subclasses[0] = X86_64_INTEGER_CLASS; ++ default: ++ gcc_unreachable (); ++ } + +- for (i = 0; i < words; i++) +- classes[i] = subclasses[i % num]; ++ return const_double_from_real_value (ext_80387_constants_table[i], ++ XFmode); ++} + +- break; +- } +- case UNION_TYPE: +- case QUAL_UNION_TYPE: +- /* Unions are similar to RECORD_TYPE but offset is always 0. +- */ +- for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) +- { +- if (TREE_CODE (field) == FIELD_DECL) +- { +- int num; ++/* Return 1 if X is all bits 0 and 2 if X is all bits 1 ++ in supported SSE/AVX vector mode. */ + +- if (TREE_TYPE (field) == error_mark_node) +- continue; ++int ++standard_sse_constant_p (rtx x, machine_mode pred_mode) ++{ ++ machine_mode mode; + +- num = classify_argument (TYPE_MODE (TREE_TYPE (field)), +- TREE_TYPE (field), subclasses, +- bit_offset); +- if (!num) +- return 0; +- for (i = 0; i < num && i < words; i++) +- classes[i] = merge_classes (subclasses[i], classes[i]); +- } +- } +- break; ++ if (!TARGET_SSE) ++ return 0; + +- default: +- gcc_unreachable (); +- } ++ mode = GET_MODE (x); + +- if (words > 2) +- { +- /* When size > 16 bytes, if the first one isn't +- X86_64_SSE_CLASS or any other ones aren't +- X86_64_SSEUP_CLASS, everything should be passed in +- memory. */ +- if (classes[0] != X86_64_SSE_CLASS) +- return 0; ++ if (x == const0_rtx || const0_operand (x, mode)) ++ return 1; + +- for (i = 1; i < words; i++) +- if (classes[i] != X86_64_SSEUP_CLASS) +- return 0; +- } ++ if (x == constm1_rtx || vector_all_ones_operand (x, mode)) ++ { ++ /* VOIDmode integer constant, get mode from the predicate. */ ++ if (mode == VOIDmode) ++ mode = pred_mode; + +- /* Final merger cleanup. */ +- for (i = 0; i < words; i++) ++ switch (GET_MODE_SIZE (mode)) + { +- /* If one class is MEMORY, everything should be passed in +- memory. */ +- if (classes[i] == X86_64_MEMORY_CLASS) +- return 0; +- +- /* The X86_64_SSEUP_CLASS should be always preceded by +- X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */ +- if (classes[i] == X86_64_SSEUP_CLASS +- && classes[i - 1] != X86_64_SSE_CLASS +- && classes[i - 1] != X86_64_SSEUP_CLASS) +- { +- /* The first one should never be X86_64_SSEUP_CLASS. */ +- gcc_assert (i != 0); +- classes[i] = X86_64_SSE_CLASS; +- } +- +- /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS, +- everything should be passed in memory. */ +- if (classes[i] == X86_64_X87UP_CLASS +- && (classes[i - 1] != X86_64_X87_CLASS)) +- { +- static bool warned; +- +- /* The first one should never be X86_64_X87UP_CLASS. */ +- gcc_assert (i != 0); +- if (!warned && warn_psabi) +- { +- warned = true; +- inform (input_location, +- "the ABI of passing union with long double" +- " has changed in GCC 4.4"); +- } +- return 0; +- } ++ case 64: ++ if (TARGET_AVX512F) ++ return 2; ++ break; ++ case 32: ++ if (TARGET_AVX2) ++ return 2; ++ break; ++ case 16: ++ if (TARGET_SSE2) ++ return 2; ++ break; ++ case 0: ++ /* VOIDmode */ ++ gcc_unreachable (); ++ default: ++ break; + } +- return words; + } + +- /* Compute alignment needed. We align all types to natural boundaries with +- exception of XFmode that is aligned to 64bits. */ +- if (mode != VOIDmode && mode != BLKmode) +- { +- int mode_alignment = GET_MODE_BITSIZE (mode); +- +- if (mode == XFmode) +- mode_alignment = 128; +- else if (mode == XCmode) +- mode_alignment = 256; +- if (COMPLEX_MODE_P (mode)) +- mode_alignment /= 2; +- /* Misaligned fields are always returned in memory. */ +- if (bit_offset % mode_alignment) +- return 0; +- } ++ return 0; ++} + +- /* for V1xx modes, just use the base mode */ +- if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode +- && GET_MODE_UNIT_SIZE (mode) == bytes) +- mode = GET_MODE_INNER (mode); ++/* Return the opcode of the special instruction to be used to load ++ the constant operands[1] into operands[0]. */ + +- /* Classification of atomic types. */ +- switch (mode) +- { +- case E_SDmode: +- case E_DDmode: +- classes[0] = X86_64_SSE_CLASS; +- return 1; +- case E_TDmode: +- classes[0] = X86_64_SSE_CLASS; +- classes[1] = X86_64_SSEUP_CLASS; +- return 2; +- case E_DImode: +- case E_SImode: +- case E_HImode: +- case E_QImode: +- case E_CSImode: +- case E_CHImode: +- case E_CQImode: +- { +- int size = bit_offset + (int) GET_MODE_BITSIZE (mode); ++const char * ++standard_sse_constant_opcode (rtx_insn *insn, rtx *operands) ++{ ++ machine_mode mode; ++ rtx x = operands[1]; + +- /* Analyze last 128 bits only. */ +- size = (size - 1) & 0x7f; ++ gcc_assert (TARGET_SSE); + +- if (size < 32) +- { +- classes[0] = X86_64_INTEGERSI_CLASS; +- return 1; +- } +- else if (size < 64) +- { +- classes[0] = X86_64_INTEGER_CLASS; +- return 1; +- } +- else if (size < 64+32) +- { +- classes[0] = X86_64_INTEGER_CLASS; +- classes[1] = X86_64_INTEGERSI_CLASS; +- return 2; +- } +- else if (size < 64+64) +- { +- classes[0] = classes[1] = X86_64_INTEGER_CLASS; +- return 2; +- } +- else +- gcc_unreachable (); +- } +- case E_CDImode: +- case E_TImode: +- classes[0] = classes[1] = X86_64_INTEGER_CLASS; +- return 2; +- case E_COImode: +- case E_OImode: +- /* OImode shouldn't be used directly. */ +- gcc_unreachable (); +- case E_CTImode: +- return 0; +- case E_SFmode: +- if (!(bit_offset % 64)) +- classes[0] = X86_64_SSESF_CLASS; +- else +- classes[0] = X86_64_SSE_CLASS; +- return 1; +- case E_DFmode: +- classes[0] = X86_64_SSEDF_CLASS; +- return 1; +- case E_XFmode: +- classes[0] = X86_64_X87_CLASS; +- classes[1] = X86_64_X87UP_CLASS; +- return 2; +- case E_TFmode: +- classes[0] = X86_64_SSE_CLASS; +- classes[1] = X86_64_SSEUP_CLASS; +- return 2; +- case E_SCmode: +- classes[0] = X86_64_SSE_CLASS; +- if (!(bit_offset % 64)) +- return 1; +- else ++ mode = GET_MODE (x); ++ ++ if (x == const0_rtx || const0_operand (x, mode)) ++ { ++ switch (get_attr_mode (insn)) + { +- static bool warned; ++ case MODE_TI: ++ if (!EXT_REX_SSE_REG_P (operands[0])) ++ return "%vpxor\t%0, %d0"; ++ /* FALLTHRU */ ++ case MODE_XI: ++ case MODE_OI: ++ if (EXT_REX_SSE_REG_P (operands[0])) ++ return (TARGET_AVX512VL ++ ? "vpxord\t%x0, %x0, %x0" ++ : "vpxord\t%g0, %g0, %g0"); ++ return "vpxor\t%x0, %x0, %x0"; + +- if (!warned && warn_psabi) +- { +- warned = true; +- inform (input_location, +- "the ABI of passing structure with complex float" +- " member has changed in GCC 4.4"); +- } +- classes[1] = X86_64_SSESF_CLASS; +- return 2; ++ case MODE_V2DF: ++ if (!EXT_REX_SSE_REG_P (operands[0])) ++ return "%vxorpd\t%0, %d0"; ++ /* FALLTHRU */ ++ case MODE_V8DF: ++ case MODE_V4DF: ++ if (!EXT_REX_SSE_REG_P (operands[0])) ++ return "vxorpd\t%x0, %x0, %x0"; ++ else if (TARGET_AVX512DQ) ++ return (TARGET_AVX512VL ++ ? "vxorpd\t%x0, %x0, %x0" ++ : "vxorpd\t%g0, %g0, %g0"); ++ else ++ return (TARGET_AVX512VL ++ ? "vpxorq\t%x0, %x0, %x0" ++ : "vpxorq\t%g0, %g0, %g0"); ++ ++ case MODE_V4SF: ++ if (!EXT_REX_SSE_REG_P (operands[0])) ++ return "%vxorps\t%0, %d0"; ++ /* FALLTHRU */ ++ case MODE_V16SF: ++ case MODE_V8SF: ++ if (!EXT_REX_SSE_REG_P (operands[0])) ++ return "vxorps\t%x0, %x0, %x0"; ++ else if (TARGET_AVX512DQ) ++ return (TARGET_AVX512VL ++ ? "vxorps\t%x0, %x0, %x0" ++ : "vxorps\t%g0, %g0, %g0"); ++ else ++ return (TARGET_AVX512VL ++ ? "vpxord\t%x0, %x0, %x0" ++ : "vpxord\t%g0, %g0, %g0"); ++ ++ default: ++ gcc_unreachable (); + } +- case E_DCmode: +- classes[0] = X86_64_SSEDF_CLASS; +- classes[1] = X86_64_SSEDF_CLASS; +- return 2; +- case E_XCmode: +- classes[0] = X86_64_COMPLEX_X87_CLASS; +- return 1; +- case E_TCmode: +- /* This modes is larger than 16 bytes. */ +- return 0; +- case E_V8SFmode: +- case E_V8SImode: +- case E_V32QImode: +- case E_V16HImode: +- case E_V4DFmode: +- case E_V4DImode: +- classes[0] = X86_64_SSE_CLASS; +- classes[1] = X86_64_SSEUP_CLASS; +- classes[2] = X86_64_SSEUP_CLASS; +- classes[3] = X86_64_SSEUP_CLASS; +- return 4; +- case E_V8DFmode: +- case E_V16SFmode: +- case E_V8DImode: +- case E_V16SImode: +- case E_V32HImode: +- case E_V64QImode: +- classes[0] = X86_64_SSE_CLASS; +- classes[1] = X86_64_SSEUP_CLASS; +- classes[2] = X86_64_SSEUP_CLASS; +- classes[3] = X86_64_SSEUP_CLASS; +- classes[4] = X86_64_SSEUP_CLASS; +- classes[5] = X86_64_SSEUP_CLASS; +- classes[6] = X86_64_SSEUP_CLASS; +- classes[7] = X86_64_SSEUP_CLASS; +- return 8; +- case E_V4SFmode: +- case E_V4SImode: +- case E_V16QImode: +- case E_V8HImode: +- case E_V2DFmode: +- case E_V2DImode: +- classes[0] = X86_64_SSE_CLASS; +- classes[1] = X86_64_SSEUP_CLASS; +- return 2; +- case E_V1TImode: +- case E_V1DImode: +- case E_V2SFmode: +- case E_V2SImode: +- case E_V4HImode: +- case E_V8QImode: +- classes[0] = X86_64_SSE_CLASS; +- return 1; +- case E_BLKmode: +- case E_VOIDmode: +- return 0; +- default: +- gcc_assert (VECTOR_MODE_P (mode)); ++ } ++ else if (x == constm1_rtx || vector_all_ones_operand (x, mode)) ++ { ++ enum attr_mode insn_mode = get_attr_mode (insn); ++ ++ switch (insn_mode) ++ { ++ case MODE_XI: ++ case MODE_V8DF: ++ case MODE_V16SF: ++ gcc_assert (TARGET_AVX512F); ++ return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}"; + +- if (bytes > 16) +- return 0; ++ case MODE_OI: ++ case MODE_V4DF: ++ case MODE_V8SF: ++ gcc_assert (TARGET_AVX2); ++ /* FALLTHRU */ ++ case MODE_TI: ++ case MODE_V2DF: ++ case MODE_V4SF: ++ gcc_assert (TARGET_SSE2); ++ if (!EXT_REX_SSE_REG_P (operands[0])) ++ return (TARGET_AVX ++ ? "vpcmpeqd\t%0, %0, %0" ++ : "pcmpeqd\t%0, %0"); ++ else if (TARGET_AVX512VL) ++ return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}"; ++ else ++ return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}"; + +- gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT); ++ default: ++ gcc_unreachable (); ++ } ++ } + +- if (bit_offset + GET_MODE_BITSIZE (mode) <= 32) +- classes[0] = X86_64_INTEGERSI_CLASS; +- else +- classes[0] = X86_64_INTEGER_CLASS; +- classes[1] = X86_64_INTEGER_CLASS; +- return 1 + (bytes > 8); +- } ++ gcc_unreachable (); + } + +-/* Examine the argument and return set number of register required in each +- class. Return true iff parameter should be passed in memory. */ ++/* Returns true if INSN can be transformed from a memory load ++ to a supported FP constant load. */ + +-static bool +-examine_argument (machine_mode mode, const_tree type, int in_return, +- int *int_nregs, int *sse_nregs) ++bool ++ix86_standard_x87sse_constant_load_p (const rtx_insn *insn, rtx dst) + { +- enum x86_64_reg_class regclass[MAX_CLASSES]; +- int n = classify_argument (mode, type, regclass, 0); ++ rtx src = find_constant_src (insn); + +- *int_nregs = 0; +- *sse_nregs = 0; ++ gcc_assert (REG_P (dst)); + +- if (!n) +- return true; +- for (n--; n >= 0; n--) +- switch (regclass[n]) +- { +- case X86_64_INTEGER_CLASS: +- case X86_64_INTEGERSI_CLASS: +- (*int_nregs)++; +- break; +- case X86_64_SSE_CLASS: +- case X86_64_SSESF_CLASS: +- case X86_64_SSEDF_CLASS: +- (*sse_nregs)++; +- break; +- case X86_64_NO_CLASS: +- case X86_64_SSEUP_CLASS: +- break; +- case X86_64_X87_CLASS: +- case X86_64_X87UP_CLASS: +- case X86_64_COMPLEX_X87_CLASS: +- if (!in_return) +- return true; +- break; +- case X86_64_MEMORY_CLASS: +- gcc_unreachable (); +- } ++ if (src == NULL ++ || (SSE_REGNO_P (REGNO (dst)) ++ && standard_sse_constant_p (src, GET_MODE (dst)) != 1) ++ || (STACK_REGNO_P (REGNO (dst)) ++ && standard_80387_constant_p (src) < 1)) ++ return false; + +- return false; ++ return true; + } + +-/* Construct container for the argument used by GCC interface. See +- FUNCTION_ARG for the detailed description. */ ++/* Returns true if OP contains a symbol reference */ + +-static rtx +-construct_container (machine_mode mode, machine_mode orig_mode, +- const_tree type, int in_return, int nintregs, int nsseregs, +- const int *intreg, int sse_regno) ++bool ++symbolic_reference_mentioned_p (rtx op) + { +- /* The following variables hold the static issued_error state. */ +- static bool issued_sse_arg_error; +- static bool issued_sse_ret_error; +- static bool issued_x87_ret_error; +- +- machine_mode tmpmode; +- int bytes +- = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); +- enum x86_64_reg_class regclass[MAX_CLASSES]; +- int n; ++ const char *fmt; + int i; +- int nexps = 0; +- int needed_sseregs, needed_intregs; +- rtx exp[MAX_CLASSES]; +- rtx ret; + +- n = classify_argument (mode, type, regclass, 0); +- if (!n) +- return NULL; +- if (examine_argument (mode, type, in_return, &needed_intregs, +- &needed_sseregs)) +- return NULL; +- if (needed_intregs > nintregs || needed_sseregs > nsseregs) +- return NULL; ++ if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF) ++ return true; + +- /* We allowed the user to turn off SSE for kernel mode. Don't crash if +- some less clueful developer tries to use floating-point anyway. */ +- if (needed_sseregs && !TARGET_SSE) ++ fmt = GET_RTX_FORMAT (GET_CODE (op)); ++ for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--) + { +- if (in_return) +- { +- if (!issued_sse_ret_error) +- { +- error ("SSE register return with SSE disabled"); +- issued_sse_ret_error = true; +- } +- } +- else if (!issued_sse_arg_error) ++ if (fmt[i] == 'E') + { +- error ("SSE register argument with SSE disabled"); +- issued_sse_arg_error = true; ++ int j; ++ ++ for (j = XVECLEN (op, i) - 1; j >= 0; j--) ++ if (symbolic_reference_mentioned_p (XVECEXP (op, i, j))) ++ return true; + } +- return NULL; ++ ++ else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i))) ++ return true; + } + +- /* Likewise, error if the ABI requires us to return values in the +- x87 registers and the user specified -mno-80387. */ +- if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return) +- for (i = 0; i < n; i++) +- if (regclass[i] == X86_64_X87_CLASS +- || regclass[i] == X86_64_X87UP_CLASS +- || regclass[i] == X86_64_COMPLEX_X87_CLASS) +- { +- if (!issued_x87_ret_error) +- { +- error ("x87 register return with x87 disabled"); +- issued_x87_ret_error = true; +- } +- return NULL; +- } ++ return false; ++} + +- /* First construct simple cases. Avoid SCmode, since we want to use +- single register to pass this type. */ +- if (n == 1 && mode != SCmode) +- switch (regclass[0]) +- { +- case X86_64_INTEGER_CLASS: +- case X86_64_INTEGERSI_CLASS: +- return gen_rtx_REG (mode, intreg[0]); +- case X86_64_SSE_CLASS: +- case X86_64_SSESF_CLASS: +- case X86_64_SSEDF_CLASS: +- if (mode != BLKmode) +- return gen_reg_or_parallel (mode, orig_mode, +- GET_SSE_REGNO (sse_regno)); +- break; +- case X86_64_X87_CLASS: +- case X86_64_COMPLEX_X87_CLASS: +- return gen_rtx_REG (mode, FIRST_STACK_REG); +- case X86_64_NO_CLASS: +- /* Zero sized array, struct or class. */ +- return NULL; +- default: +- gcc_unreachable (); +- } +- if (n == 2 +- && regclass[0] == X86_64_SSE_CLASS +- && regclass[1] == X86_64_SSEUP_CLASS +- && mode != BLKmode) +- return gen_reg_or_parallel (mode, orig_mode, +- GET_SSE_REGNO (sse_regno)); +- if (n == 4 +- && regclass[0] == X86_64_SSE_CLASS +- && regclass[1] == X86_64_SSEUP_CLASS +- && regclass[2] == X86_64_SSEUP_CLASS +- && regclass[3] == X86_64_SSEUP_CLASS +- && mode != BLKmode) +- return gen_reg_or_parallel (mode, orig_mode, +- GET_SSE_REGNO (sse_regno)); +- if (n == 8 +- && regclass[0] == X86_64_SSE_CLASS +- && regclass[1] == X86_64_SSEUP_CLASS +- && regclass[2] == X86_64_SSEUP_CLASS +- && regclass[3] == X86_64_SSEUP_CLASS +- && regclass[4] == X86_64_SSEUP_CLASS +- && regclass[5] == X86_64_SSEUP_CLASS +- && regclass[6] == X86_64_SSEUP_CLASS +- && regclass[7] == X86_64_SSEUP_CLASS +- && mode != BLKmode) +- return gen_reg_or_parallel (mode, orig_mode, +- GET_SSE_REGNO (sse_regno)); +- if (n == 2 +- && regclass[0] == X86_64_X87_CLASS +- && regclass[1] == X86_64_X87UP_CLASS) +- return gen_rtx_REG (XFmode, FIRST_STACK_REG); ++/* Return true if it is appropriate to emit `ret' instructions in the ++ body of a function. Do this only if the epilogue is simple, needing a ++ couple of insns. Prior to reloading, we can't tell how many registers ++ must be saved, so return false then. Return false if there is no frame ++ marker to de-allocate. */ + +- if (n == 2 +- && regclass[0] == X86_64_INTEGER_CLASS +- && regclass[1] == X86_64_INTEGER_CLASS +- && (mode == CDImode || mode == TImode || mode == BLKmode) +- && intreg[0] + 1 == intreg[1]) +- { +- if (mode == BLKmode) +- { +- /* Use TImode for BLKmode values in 2 integer registers. */ +- exp[0] = gen_rtx_EXPR_LIST (VOIDmode, +- gen_rtx_REG (TImode, intreg[0]), +- GEN_INT (0)); +- ret = gen_rtx_PARALLEL (mode, rtvec_alloc (1)); +- XVECEXP (ret, 0, 0) = exp[0]; +- return ret; +- } +- else +- return gen_rtx_REG (mode, intreg[0]); +- } ++bool ++ix86_can_use_return_insn_p (void) ++{ ++ if (ix86_function_naked (current_function_decl)) ++ return false; + +- /* Otherwise figure out the entries of the PARALLEL. */ +- for (i = 0; i < n; i++) +- { +- int pos; ++ /* Don't use `ret' instruction in interrupt handler. */ ++ if (! reload_completed ++ || frame_pointer_needed ++ || cfun->machine->func_type != TYPE_NORMAL) ++ return 0; + +- switch (regclass[i]) +- { +- case X86_64_NO_CLASS: +- break; +- case X86_64_INTEGER_CLASS: +- case X86_64_INTEGERSI_CLASS: +- /* Merge TImodes on aligned occasions here too. */ +- if (i * 8 + 8 > bytes) +- { +- unsigned int tmpbits = (bytes - i * 8) * BITS_PER_UNIT; +- if (!int_mode_for_size (tmpbits, 0).exists (&tmpmode)) +- /* We've requested 24 bytes we +- don't have mode for. Use DImode. */ +- tmpmode = DImode; +- } +- else if (regclass[i] == X86_64_INTEGERSI_CLASS) +- tmpmode = SImode; +- else +- tmpmode = DImode; +- exp [nexps++] +- = gen_rtx_EXPR_LIST (VOIDmode, +- gen_rtx_REG (tmpmode, *intreg), +- GEN_INT (i*8)); +- intreg++; +- break; +- case X86_64_SSESF_CLASS: +- exp [nexps++] +- = gen_rtx_EXPR_LIST (VOIDmode, +- gen_rtx_REG (SFmode, +- GET_SSE_REGNO (sse_regno)), +- GEN_INT (i*8)); +- sse_regno++; +- break; +- case X86_64_SSEDF_CLASS: +- exp [nexps++] +- = gen_rtx_EXPR_LIST (VOIDmode, +- gen_rtx_REG (DFmode, +- GET_SSE_REGNO (sse_regno)), +- GEN_INT (i*8)); +- sse_regno++; +- break; +- case X86_64_SSE_CLASS: +- pos = i; +- switch (n) +- { +- case 1: +- tmpmode = DImode; +- break; +- case 2: +- if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS) +- { +- tmpmode = TImode; +- i++; +- } +- else +- tmpmode = DImode; +- break; +- case 4: +- gcc_assert (i == 0 +- && regclass[1] == X86_64_SSEUP_CLASS +- && regclass[2] == X86_64_SSEUP_CLASS +- && regclass[3] == X86_64_SSEUP_CLASS); +- tmpmode = OImode; +- i += 3; +- break; +- case 8: +- gcc_assert (i == 0 +- && regclass[1] == X86_64_SSEUP_CLASS +- && regclass[2] == X86_64_SSEUP_CLASS +- && regclass[3] == X86_64_SSEUP_CLASS +- && regclass[4] == X86_64_SSEUP_CLASS +- && regclass[5] == X86_64_SSEUP_CLASS +- && regclass[6] == X86_64_SSEUP_CLASS +- && regclass[7] == X86_64_SSEUP_CLASS); +- tmpmode = XImode; +- i += 7; +- break; +- default: +- gcc_unreachable (); +- } +- exp [nexps++] +- = gen_rtx_EXPR_LIST (VOIDmode, +- gen_rtx_REG (tmpmode, +- GET_SSE_REGNO (sse_regno)), +- GEN_INT (pos*8)); +- sse_regno++; +- break; +- default: +- gcc_unreachable (); +- } +- } ++ /* Don't allow more than 32k pop, since that's all we can do ++ with one instruction. */ ++ if (crtl->args.pops_args && crtl->args.size >= 32768) ++ return 0; + +- /* Empty aligned struct, union or class. */ +- if (nexps == 0) +- return NULL; ++ struct ix86_frame &frame = cfun->machine->frame; ++ return (frame.stack_pointer_offset == UNITS_PER_WORD ++ && (frame.nregs + frame.nsseregs) == 0); ++} ++ ++/* Value should be nonzero if functions must have frame pointers. ++ Zero means the frame pointer need not be set up (and parms may ++ be accessed via the stack pointer) in functions that seem suitable. */ + +- ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps)); +- for (i = 0; i < nexps; i++) +- XVECEXP (ret, 0, i) = exp [i]; +- return ret; ++static bool ++ix86_frame_pointer_required (void) ++{ ++ /* If we accessed previous frames, then the generated code expects ++ to be able to access the saved ebp value in our frame. */ ++ if (cfun->machine->accesses_prev_frame) ++ return true; ++ ++ /* Several x86 os'es need a frame pointer for other reasons, ++ usually pertaining to setjmp. */ ++ if (SUBTARGET_FRAME_POINTER_REQUIRED) ++ return true; ++ ++ /* For older 32-bit runtimes setjmp requires valid frame-pointer. */ ++ if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp) ++ return true; ++ ++ /* Win64 SEH, very large frames need a frame-pointer as maximum stack ++ allocation is 4GB. */ ++ if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE) ++ return true; ++ ++ /* SSE saves require frame-pointer when stack is misaligned. */ ++ if (TARGET_64BIT_MS_ABI && ix86_incoming_stack_boundary < 128) ++ return true; ++ ++ /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER ++ turns off the frame pointer by default. Turn it back on now if ++ we've not got a leaf function. */ ++ if (TARGET_OMIT_LEAF_FRAME_POINTER ++ && (!crtl->is_leaf ++ || ix86_current_function_calls_tls_descriptor)) ++ return true; ++ ++ if (crtl->profile && !flag_fentry) ++ return true; ++ ++ return false; + } + +-/* Update the data in CUM to advance over an argument of mode MODE +- and data type TYPE. (TYPE is null for libcalls where that information +- may not be available.) ++/* Record that the current function accesses previous call frames. */ + +- Return a number of integer regsiters advanced over. */ ++void ++ix86_setup_frame_addresses (void) ++{ ++ cfun->machine->accesses_prev_frame = 1; ++} ++ ++#ifndef USE_HIDDEN_LINKONCE ++# if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0) ++# define USE_HIDDEN_LINKONCE 1 ++# else ++# define USE_HIDDEN_LINKONCE 0 ++# endif ++#endif + +-static int +-function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode, +- const_tree type, HOST_WIDE_INT bytes, +- HOST_WIDE_INT words) ++/* Label count for call and return thunks. It is used to make unique ++ labels in call and return thunks. */ ++static int indirectlabelno; ++ ++/* True if call thunk function is needed. */ ++static bool indirect_thunk_needed = false; ++ ++/* Bit masks of integer registers, which contain branch target, used ++ by call thunk functions. */ ++static int indirect_thunks_used; ++ ++/* True if return thunk function is needed. */ ++static bool indirect_return_needed = false; ++ ++/* True if return thunk function via CX is needed. */ ++static bool indirect_return_via_cx; ++ ++#ifndef INDIRECT_LABEL ++# define INDIRECT_LABEL "LIND" ++#endif ++ ++/* Indicate what prefix is needed for an indirect branch. */ ++enum indirect_thunk_prefix + { +- int res = 0; +- bool error_p = false; ++ indirect_thunk_prefix_none, ++ indirect_thunk_prefix_nt ++}; + +- if (TARGET_IAMCU) ++/* Return the prefix needed for an indirect branch INSN. */ ++ ++enum indirect_thunk_prefix ++indirect_thunk_need_prefix (rtx_insn *insn) ++{ ++ enum indirect_thunk_prefix need_prefix; ++ if ((cfun->machine->indirect_branch_type ++ == indirect_branch_thunk_extern) ++ && ix86_notrack_prefixed_insn_p (insn)) + { +- /* Intel MCU psABI passes scalars and aggregates no larger than 8 +- bytes in registers. */ +- if (!VECTOR_MODE_P (mode) && bytes <= 8) +- goto pass_in_reg; +- return res; ++ /* NOTRACK prefix is only used with external thunk so that it ++ can be properly updated to support CET at run-time. */ ++ need_prefix = indirect_thunk_prefix_nt; + } ++ else ++ need_prefix = indirect_thunk_prefix_none; ++ return need_prefix; ++} + +- switch (mode) +- { +- default: +- break; ++/* Fills in the label name that should be used for the indirect thunk. */ + +- case E_BLKmode: +- if (bytes < 0) +- break; +- /* FALLTHRU */ ++static void ++indirect_thunk_name (char name[32], unsigned int regno, ++ enum indirect_thunk_prefix need_prefix, ++ bool ret_p) ++{ ++ if (regno != INVALID_REGNUM && regno != CX_REG && ret_p) ++ gcc_unreachable (); + +- case E_DImode: +- case E_SImode: +- case E_HImode: +- case E_QImode: +-pass_in_reg: +- cum->words += words; +- cum->nregs -= words; +- cum->regno += words; +- if (cum->nregs >= 0) +- res = words; +- if (cum->nregs <= 0) ++ if (USE_HIDDEN_LINKONCE) ++ { ++ const char *prefix; ++ ++ if (need_prefix == indirect_thunk_prefix_nt ++ && regno != INVALID_REGNUM) + { +- cum->nregs = 0; +- cfun->machine->arg_reg_available = false; +- cum->regno = 0; ++ /* NOTRACK prefix is only used with external thunk via ++ register so that NOTRACK prefix can be added to indirect ++ branch via register to support CET at run-time. */ ++ prefix = "_nt"; + } +- break; +- +- case E_OImode: +- /* OImode shouldn't be used directly. */ +- gcc_unreachable (); ++ else ++ prefix = ""; + +- case E_DFmode: +- if (cum->float_in_sse == -1) +- error_p = true; +- if (cum->float_in_sse < 2) +- break; +- /* FALLTHRU */ +- case E_SFmode: +- if (cum->float_in_sse == -1) +- error_p = true; +- if (cum->float_in_sse < 1) +- break; +- /* FALLTHRU */ ++ const char *ret = ret_p ? "return" : "indirect"; + +- case E_V8SFmode: +- case E_V8SImode: +- case E_V64QImode: +- case E_V32HImode: +- case E_V16SImode: +- case E_V8DImode: +- case E_V16SFmode: +- case E_V8DFmode: +- case E_V32QImode: +- case E_V16HImode: +- case E_V4DFmode: +- case E_V4DImode: +- case E_TImode: +- case E_V16QImode: +- case E_V8HImode: +- case E_V4SImode: +- case E_V2DImode: +- case E_V4SFmode: +- case E_V2DFmode: +- if (!type || !AGGREGATE_TYPE_P (type)) ++ if (regno != INVALID_REGNUM) + { +- cum->sse_words += words; +- cum->sse_nregs -= 1; +- cum->sse_regno += 1; +- if (cum->sse_nregs <= 0) +- { +- cum->sse_nregs = 0; +- cum->sse_regno = 0; +- } +- } +- break; +- +- case E_V8QImode: +- case E_V4HImode: +- case E_V2SImode: +- case E_V2SFmode: +- case E_V1TImode: +- case E_V1DImode: +- if (!type || !AGGREGATE_TYPE_P (type)) +- { +- cum->mmx_words += words; +- cum->mmx_nregs -= 1; +- cum->mmx_regno += 1; +- if (cum->mmx_nregs <= 0) +- { +- cum->mmx_nregs = 0; +- cum->mmx_regno = 0; +- } ++ const char *reg_prefix; ++ if (LEGACY_INT_REGNO_P (regno)) ++ reg_prefix = TARGET_64BIT ? "r" : "e"; ++ else ++ reg_prefix = ""; ++ sprintf (name, "__x86_%s_thunk%s_%s%s", ++ ret, prefix, reg_prefix, reg_names[regno]); + } +- break; ++ else ++ sprintf (name, "__x86_%s_thunk%s", ret, prefix); + } +- if (error_p) ++ else + { +- cum->float_in_sse = 0; +- error ("calling %qD with SSE calling convention without " +- "SSE/SSE2 enabled", cum->decl); +- sorry ("this is a GCC bug that can be worked around by adding " +- "attribute used to function called"); ++ if (regno != INVALID_REGNUM) ++ ASM_GENERATE_INTERNAL_LABEL (name, "LITR", regno); ++ else ++ { ++ if (ret_p) ++ ASM_GENERATE_INTERNAL_LABEL (name, "LRT", 0); ++ else ++ ASM_GENERATE_INTERNAL_LABEL (name, "LIT", 0); ++ } + } +- +- return res; + } + +-static int +-function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode, +- const_tree type, HOST_WIDE_INT words, bool named) +-{ +- int int_nregs, sse_nregs; ++/* Output a call and return thunk for indirect branch. If REGNO != -1, ++ the function address is in REGNO and the call and return thunk looks like: + +- /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */ +- if (!named && (VALID_AVX512F_REG_MODE (mode) +- || VALID_AVX256_REG_MODE (mode))) +- return 0; ++ call L2 ++ L1: ++ pause ++ lfence ++ jmp L1 ++ L2: ++ mov %REG, (%sp) ++ ret + +- if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs) +- && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs) +- { +- cum->nregs -= int_nregs; +- cum->sse_nregs -= sse_nregs; +- cum->regno += int_nregs; +- cum->sse_regno += sse_nregs; +- return int_nregs; +- } +- else +- { +- int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD; +- cum->words = ROUND_UP (cum->words, align); +- cum->words += words; +- return 0; +- } +-} ++ Otherwise, the function address is on the top of stack and the ++ call and return thunk looks like: + +-static int +-function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes, +- HOST_WIDE_INT words) ++ call L2 ++ L1: ++ pause ++ lfence ++ jmp L1 ++ L2: ++ lea WORD_SIZE(%sp), %sp ++ ret ++ */ ++ ++static void ++output_indirect_thunk (unsigned int regno) + { +- /* Otherwise, this should be passed indirect. */ +- gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8); ++ char indirectlabel1[32]; ++ char indirectlabel2[32]; + +- cum->words += words; +- if (cum->nregs > 0) +- { +- cum->nregs -= 1; +- cum->regno += 1; +- return 1; +- } +- return 0; +-} ++ ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, INDIRECT_LABEL, ++ indirectlabelno++); ++ ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, INDIRECT_LABEL, ++ indirectlabelno++); + +-/* Update the data in CUM to advance over an argument of mode MODE and +- data type TYPE. (TYPE is null for libcalls where that information +- may not be available.) */ ++ /* Call */ ++ fputs ("\tcall\t", asm_out_file); ++ assemble_name_raw (asm_out_file, indirectlabel2); ++ fputc ('\n', asm_out_file); + +-static void +-ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode, +- const_tree type, bool named) +-{ +- CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); +- HOST_WIDE_INT bytes, words; +- int nregs; ++ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1); + +- /* The argument of interrupt handler is a special case and is +- handled in ix86_function_arg. */ +- if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL) +- return; ++ /* AMD and Intel CPUs prefer each a different instruction as loop filler. ++ Usage of both pause + lfence is compromise solution. */ ++ fprintf (asm_out_file, "\tpause\n\tlfence\n"); + +- if (mode == BLKmode) +- bytes = int_size_in_bytes (type); +- else +- bytes = GET_MODE_SIZE (mode); +- words = CEIL (bytes, UNITS_PER_WORD); ++ /* Jump. */ ++ fputs ("\tjmp\t", asm_out_file); ++ assemble_name_raw (asm_out_file, indirectlabel1); ++ fputc ('\n', asm_out_file); + +- if (type) +- mode = type_natural_mode (type, NULL, false); ++ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2); + +- if (TARGET_64BIT) ++ /* The above call insn pushed a word to stack. Adjust CFI info. */ ++ if (flag_asynchronous_unwind_tables && dwarf2out_do_frame ()) + { +- enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi; ++ if (! dwarf2out_do_cfi_asm ()) ++ { ++ dw_cfi_ref xcfi = ggc_cleared_alloc (); ++ xcfi->dw_cfi_opc = DW_CFA_advance_loc4; ++ xcfi->dw_cfi_oprnd1.dw_cfi_addr = ggc_strdup (indirectlabel2); ++ vec_safe_push (cfun->fde->dw_fde_cfi, xcfi); ++ } ++ dw_cfi_ref xcfi = ggc_cleared_alloc (); ++ xcfi->dw_cfi_opc = DW_CFA_def_cfa_offset; ++ xcfi->dw_cfi_oprnd1.dw_cfi_offset = 2 * UNITS_PER_WORD; ++ vec_safe_push (cfun->fde->dw_fde_cfi, xcfi); ++ dwarf2out_emit_cfi (xcfi); ++ } + +- if (call_abi == MS_ABI) +- nregs = function_arg_advance_ms_64 (cum, bytes, words); +- else +- nregs = function_arg_advance_64 (cum, mode, type, words, named); ++ if (regno != INVALID_REGNUM) ++ { ++ /* MOV. */ ++ rtx xops[2]; ++ xops[0] = gen_rtx_MEM (word_mode, stack_pointer_rtx); ++ xops[1] = gen_rtx_REG (word_mode, regno); ++ output_asm_insn ("mov\t{%1, %0|%0, %1}", xops); + } + else +- nregs = function_arg_advance_32 (cum, mode, type, bytes, words); +- +- if (!nregs) + { +- /* Track if there are outgoing arguments on stack. */ +- if (cum->caller) +- cfun->machine->outgoing_args_on_stack = true; ++ /* LEA. */ ++ rtx xops[2]; ++ xops[0] = stack_pointer_rtx; ++ xops[1] = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); ++ output_asm_insn ("lea\t{%E1, %0|%0, %E1}", xops); + } +-} + +-/* Define where to put the arguments to a function. +- Value is zero to push the argument on the stack, +- or a hard register in which to store the argument. ++ fputs ("\tret\n", asm_out_file); ++} + +- MODE is the argument's machine mode. +- TYPE is the data type of the argument (as a tree). +- This is null for libcalls where that information may +- not be available. +- CUM is a variable of type CUMULATIVE_ARGS which gives info about +- the preceding args and about the function being called. +- NAMED is nonzero if this argument is a named parameter +- (otherwise it is an extra parameter matching an ellipsis). */ ++/* Output a funtion with a call and return thunk for indirect branch. ++ If REGNO != INVALID_REGNUM, the function address is in REGNO. ++ Otherwise, the function address is on the top of stack. Thunk is ++ used for function return if RET_P is true. */ + +-static rtx +-function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode, +- machine_mode orig_mode, const_tree type, +- HOST_WIDE_INT bytes, HOST_WIDE_INT words) ++static void ++output_indirect_thunk_function (enum indirect_thunk_prefix need_prefix, ++ unsigned int regno, bool ret_p) + { +- bool error_p = false; ++ char name[32]; ++ tree decl; + +- /* Avoid the AL settings for the Unix64 ABI. */ +- if (mode == VOIDmode) +- return constm1_rtx; ++ /* Create __x86_indirect_thunk. */ ++ indirect_thunk_name (name, regno, need_prefix, ret_p); ++ decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, ++ get_identifier (name), ++ build_function_type_list (void_type_node, NULL_TREE)); ++ DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL, ++ NULL_TREE, void_type_node); ++ TREE_PUBLIC (decl) = 1; ++ TREE_STATIC (decl) = 1; ++ DECL_IGNORED_P (decl) = 1; + +- if (TARGET_IAMCU) ++#if TARGET_MACHO ++ if (TARGET_MACHO) + { +- /* Intel MCU psABI passes scalars and aggregates no larger than 8 +- bytes in registers. */ +- if (!VECTOR_MODE_P (mode) && bytes <= 8) +- goto pass_in_reg; +- return NULL_RTX; ++ switch_to_section (darwin_sections[picbase_thunk_section]); ++ fputs ("\t.weak_definition\t", asm_out_file); ++ assemble_name (asm_out_file, name); ++ fputs ("\n\t.private_extern\t", asm_out_file); ++ assemble_name (asm_out_file, name); ++ putc ('\n', asm_out_file); ++ ASM_OUTPUT_LABEL (asm_out_file, name); ++ DECL_WEAK (decl) = 1; + } ++ else ++#endif ++ if (USE_HIDDEN_LINKONCE) ++ { ++ cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl)); + +- switch (mode) +- { +- default: +- break; ++ targetm.asm_out.unique_section (decl, 0); ++ switch_to_section (get_named_section (decl, NULL, 0)); + +- case E_BLKmode: +- if (bytes < 0) +- break; +- /* FALLTHRU */ +- case E_DImode: +- case E_SImode: +- case E_HImode: +- case E_QImode: +-pass_in_reg: +- if (words <= cum->nregs) +- { +- int regno = cum->regno; ++ targetm.asm_out.globalize_label (asm_out_file, name); ++ fputs ("\t.hidden\t", asm_out_file); ++ assemble_name (asm_out_file, name); ++ putc ('\n', asm_out_file); ++ ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl); ++ } ++ else ++ { ++ switch_to_section (text_section); ++ ASM_OUTPUT_LABEL (asm_out_file, name); ++ } + +- /* Fastcall allocates the first two DWORD (SImode) or +- smaller arguments to ECX and EDX if it isn't an +- aggregate type . */ +- if (cum->fastcall) +- { +- if (mode == BLKmode +- || mode == DImode +- || (type && AGGREGATE_TYPE_P (type))) +- break; ++ DECL_INITIAL (decl) = make_node (BLOCK); ++ current_function_decl = decl; ++ allocate_struct_function (decl, false); ++ init_function_start (decl); ++ /* We're about to hide the function body from callees of final_* by ++ emitting it directly; tell them we're a thunk, if they care. */ ++ cfun->is_thunk = true; ++ first_function_block_is_cold = false; ++ /* Make sure unwind info is emitted for the thunk if needed. */ ++ final_start_function (emit_barrier (), asm_out_file, 1); + +- /* ECX not EAX is the first allocated register. */ +- if (regno == AX_REG) +- regno = CX_REG; +- } +- return gen_rtx_REG (mode, regno); +- } +- break; ++ output_indirect_thunk (regno); + +- case E_DFmode: +- if (cum->float_in_sse == -1) +- error_p = true; +- if (cum->float_in_sse < 2) +- break; +- /* FALLTHRU */ +- case E_SFmode: +- if (cum->float_in_sse == -1) +- error_p = true; +- if (cum->float_in_sse < 1) +- break; +- /* FALLTHRU */ +- case E_TImode: +- /* In 32bit, we pass TImode in xmm registers. */ +- case E_V16QImode: +- case E_V8HImode: +- case E_V4SImode: +- case E_V2DImode: +- case E_V4SFmode: +- case E_V2DFmode: +- if (!type || !AGGREGATE_TYPE_P (type)) +- { +- if (cum->sse_nregs) +- return gen_reg_or_parallel (mode, orig_mode, +- cum->sse_regno + FIRST_SSE_REG); +- } +- break; ++ final_end_function (); ++ init_insn_lengths (); ++ free_after_compilation (cfun); ++ set_cfun (NULL); ++ current_function_decl = NULL; ++} + +- case E_OImode: +- case E_XImode: +- /* OImode and XImode shouldn't be used directly. */ +- gcc_unreachable (); ++static int pic_labels_used; + +- case E_V64QImode: +- case E_V32HImode: +- case E_V16SImode: +- case E_V8DImode: +- case E_V16SFmode: +- case E_V8DFmode: +- case E_V8SFmode: +- case E_V8SImode: +- case E_V32QImode: +- case E_V16HImode: +- case E_V4DFmode: +- case E_V4DImode: +- if (!type || !AGGREGATE_TYPE_P (type)) +- { +- if (cum->sse_nregs) +- return gen_reg_or_parallel (mode, orig_mode, +- cum->sse_regno + FIRST_SSE_REG); +- } +- break; ++/* Fills in the label name that should be used for a pc thunk for ++ the given register. */ + +- case E_V8QImode: +- case E_V4HImode: +- case E_V2SImode: +- case E_V2SFmode: +- case E_V1TImode: +- case E_V1DImode: +- if (!type || !AGGREGATE_TYPE_P (type)) +- { +- if (cum->mmx_nregs) +- return gen_reg_or_parallel (mode, orig_mode, +- cum->mmx_regno + FIRST_MMX_REG); +- } +- break; +- } +- if (error_p) +- { +- cum->float_in_sse = 0; +- error ("calling %qD with SSE calling convention without " +- "SSE/SSE2 enabled", cum->decl); +- sorry ("this is a GCC bug that can be worked around by adding " +- "attribute used to function called"); +- } ++static void ++get_pc_thunk_name (char name[32], unsigned int regno) ++{ ++ gcc_assert (!TARGET_64BIT); + +- return NULL_RTX; ++ if (USE_HIDDEN_LINKONCE) ++ sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]); ++ else ++ ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno); + } + +-static rtx +-function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode, +- machine_mode orig_mode, const_tree type, bool named) ++ ++/* This function generates code for -fpic that loads %ebx with ++ the return address of the caller and then returns. */ ++ ++static void ++ix86_code_end (void) + { +- /* Handle a hidden AL argument containing number of registers +- for varargs x86-64 functions. */ +- if (mode == VOIDmode) +- return GEN_INT (cum->maybe_vaarg +- ? (cum->sse_nregs < 0 +- ? X86_64_SSE_REGPARM_MAX +- : cum->sse_regno) +- : -1); ++ rtx xops[2]; ++ unsigned int regno; + +- switch (mode) +- { +- default: +- break; ++ if (indirect_return_needed) ++ output_indirect_thunk_function (indirect_thunk_prefix_none, ++ INVALID_REGNUM, true); ++ if (indirect_return_via_cx) ++ output_indirect_thunk_function (indirect_thunk_prefix_none, ++ CX_REG, true); ++ if (indirect_thunk_needed) ++ output_indirect_thunk_function (indirect_thunk_prefix_none, ++ INVALID_REGNUM, false); + +- case E_V8SFmode: +- case E_V8SImode: +- case E_V32QImode: +- case E_V16HImode: +- case E_V4DFmode: +- case E_V4DImode: +- case E_V16SFmode: +- case E_V16SImode: +- case E_V64QImode: +- case E_V32HImode: +- case E_V8DFmode: +- case E_V8DImode: +- /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */ +- if (!named) +- return NULL; +- break; ++ for (regno = FIRST_REX_INT_REG; regno <= LAST_REX_INT_REG; regno++) ++ { ++ unsigned int i = regno - FIRST_REX_INT_REG + LAST_INT_REG + 1; ++ if ((indirect_thunks_used & (1 << i))) ++ output_indirect_thunk_function (indirect_thunk_prefix_none, ++ regno, false); + } + +- return construct_container (mode, orig_mode, type, 0, cum->nregs, +- cum->sse_nregs, +- &x86_64_int_parameter_registers [cum->regno], +- cum->sse_regno); +-} ++ for (regno = FIRST_INT_REG; regno <= LAST_INT_REG; regno++) ++ { ++ char name[32]; ++ tree decl; + +-static rtx +-function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode, +- machine_mode orig_mode, bool named, const_tree type, +- HOST_WIDE_INT bytes) +-{ +- unsigned int regno; ++ if ((indirect_thunks_used & (1 << regno))) ++ output_indirect_thunk_function (indirect_thunk_prefix_none, ++ regno, false); + +- /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call. +- We use value of -2 to specify that current function call is MSABI. */ +- if (mode == VOIDmode) +- return GEN_INT (-2); ++ if (!(pic_labels_used & (1 << regno))) ++ continue; + +- /* If we've run out of registers, it goes on the stack. */ +- if (cum->nregs == 0) +- return NULL_RTX; ++ get_pc_thunk_name (name, regno); + +- regno = x86_64_ms_abi_int_parameter_registers[cum->regno]; ++ decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, ++ get_identifier (name), ++ build_function_type_list (void_type_node, NULL_TREE)); ++ DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL, ++ NULL_TREE, void_type_node); ++ TREE_PUBLIC (decl) = 1; ++ TREE_STATIC (decl) = 1; ++ DECL_IGNORED_P (decl) = 1; + +- /* Only floating point modes are passed in anything but integer regs. */ +- if (TARGET_SSE && (mode == SFmode || mode == DFmode)) +- { +- if (named) ++#if TARGET_MACHO ++ if (TARGET_MACHO) + { +- if (type == NULL_TREE || !AGGREGATE_TYPE_P (type)) +- regno = cum->regno + FIRST_SSE_REG; ++ switch_to_section (darwin_sections[picbase_thunk_section]); ++ fputs ("\t.weak_definition\t", asm_out_file); ++ assemble_name (asm_out_file, name); ++ fputs ("\n\t.private_extern\t", asm_out_file); ++ assemble_name (asm_out_file, name); ++ putc ('\n', asm_out_file); ++ ASM_OUTPUT_LABEL (asm_out_file, name); ++ DECL_WEAK (decl) = 1; + } + else ++#endif ++ if (USE_HIDDEN_LINKONCE) + { +- rtx t1, t2; ++ cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl)); + +- /* Unnamed floating parameters are passed in both the +- SSE and integer registers. */ +- t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG); +- t2 = gen_rtx_REG (mode, regno); +- t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx); +- t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx); +- return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2)); +- } +- } +- /* Handle aggregated types passed in register. */ +- if (orig_mode == BLKmode) +- { +- if (bytes > 0 && bytes <= 8) +- mode = (bytes > 4 ? DImode : SImode); +- if (mode == BLKmode) +- mode = DImode; +- } +- +- return gen_reg_or_parallel (mode, orig_mode, regno); +-} +- +-/* Return where to put the arguments to a function. +- Return zero to push the argument on the stack, or a hard register in which to store the argument. +- +- MODE is the argument's machine mode. TYPE is the data type of the +- argument. It is null for libcalls where that information may not be +- available. CUM gives information about the preceding args and about +- the function being called. NAMED is nonzero if this argument is a +- named parameter (otherwise it is an extra parameter matching an +- ellipsis). */ +- +-static rtx +-ix86_function_arg (cumulative_args_t cum_v, machine_mode omode, +- const_tree type, bool named) +-{ +- CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); +- machine_mode mode = omode; +- HOST_WIDE_INT bytes, words; +- rtx arg; ++ targetm.asm_out.unique_section (decl, 0); ++ switch_to_section (get_named_section (decl, NULL, 0)); + +- if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL) +- { +- gcc_assert (type != NULL_TREE); +- if (POINTER_TYPE_P (type)) +- { +- /* This is the pointer argument. */ +- gcc_assert (TYPE_MODE (type) == Pmode); +- /* It is at -WORD(AP) in the current frame in interrupt and +- exception handlers. */ +- arg = plus_constant (Pmode, arg_pointer_rtx, -UNITS_PER_WORD); ++ targetm.asm_out.globalize_label (asm_out_file, name); ++ fputs ("\t.hidden\t", asm_out_file); ++ assemble_name (asm_out_file, name); ++ putc ('\n', asm_out_file); ++ ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl); + } + else + { +- gcc_assert (cfun->machine->func_type == TYPE_EXCEPTION +- && TREE_CODE (type) == INTEGER_TYPE +- && TYPE_MODE (type) == word_mode); +- /* The error code is the word-mode integer argument at +- -2 * WORD(AP) in the current frame of the exception +- handler. */ +- arg = gen_rtx_MEM (word_mode, +- plus_constant (Pmode, +- arg_pointer_rtx, +- -2 * UNITS_PER_WORD)); ++ switch_to_section (text_section); ++ ASM_OUTPUT_LABEL (asm_out_file, name); + } +- return arg; +- } + +- if (mode == BLKmode) +- bytes = int_size_in_bytes (type); +- else +- bytes = GET_MODE_SIZE (mode); +- words = CEIL (bytes, UNITS_PER_WORD); ++ DECL_INITIAL (decl) = make_node (BLOCK); ++ current_function_decl = decl; ++ allocate_struct_function (decl, false); ++ init_function_start (decl); ++ /* We're about to hide the function body from callees of final_* by ++ emitting it directly; tell them we're a thunk, if they care. */ ++ cfun->is_thunk = true; ++ first_function_block_is_cold = false; ++ /* Make sure unwind info is emitted for the thunk if needed. */ ++ final_start_function (emit_barrier (), asm_out_file, 1); + +- /* To simplify the code below, represent vector types with a vector mode +- even if MMX/SSE are not active. */ +- if (type && TREE_CODE (type) == VECTOR_TYPE) +- mode = type_natural_mode (type, cum, false); ++ /* Pad stack IP move with 4 instructions (two NOPs count ++ as one instruction). */ ++ if (TARGET_PAD_SHORT_FUNCTION) ++ { ++ int i = 8; + +- if (TARGET_64BIT) +- { +- enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi; ++ while (i--) ++ fputs ("\tnop\n", asm_out_file); ++ } + +- if (call_abi == MS_ABI) +- arg = function_arg_ms_64 (cum, mode, omode, named, type, bytes); +- else +- arg = function_arg_64 (cum, mode, omode, type, named); ++ xops[0] = gen_rtx_REG (Pmode, regno); ++ xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx); ++ output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops); ++ output_asm_insn ("%!ret", NULL); ++ final_end_function (); ++ init_insn_lengths (); ++ free_after_compilation (cfun); ++ set_cfun (NULL); ++ current_function_decl = NULL; + } +- else +- arg = function_arg_32 (cum, mode, omode, type, bytes, words); + +- /* Track if there are outgoing arguments on stack. */ +- if (arg == NULL_RTX && cum->caller) +- cfun->machine->outgoing_args_on_stack = true; +- +- return arg; ++ if (flag_split_stack) ++ file_end_indicate_split_stack (); + } + +-/* A C expression that indicates when an argument must be passed by +- reference. If nonzero for an argument, a copy of that argument is +- made in memory and a pointer to the argument is passed instead of +- the argument itself. The pointer is passed in whatever way is +- appropriate for passing a pointer to that type. */ ++/* Emit code for the SET_GOT patterns. */ + +-static bool +-ix86_pass_by_reference (cumulative_args_t cum_v, machine_mode mode, +- const_tree type, bool) ++const char * ++output_set_got (rtx dest, rtx label) + { +- CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); ++ rtx xops[3]; + +- if (TARGET_64BIT) ++ xops[0] = dest; ++ ++ if (TARGET_VXWORKS_RTP && flag_pic) + { +- enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi; ++ /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */ ++ xops[2] = gen_rtx_MEM (Pmode, ++ gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE)); ++ output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops); + +- /* See Windows x64 Software Convention. */ +- if (call_abi == MS_ABI) +- { +- HOST_WIDE_INT msize = GET_MODE_SIZE (mode); ++ /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register. ++ Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as ++ an unadorned address. */ ++ xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX); ++ SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL; ++ output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops); ++ return ""; ++ } + +- if (type) +- { +- /* Arrays are passed by reference. */ +- if (TREE_CODE (type) == ARRAY_TYPE) +- return true; ++ xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME); + +- if (RECORD_OR_UNION_TYPE_P (type)) +- { +- /* Structs/unions of sizes other than 8, 16, 32, or 64 bits +- are passed by reference. */ +- msize = int_size_in_bytes (type); +- } +- } ++ if (flag_pic) ++ { ++ char name[32]; ++ get_pc_thunk_name (name, REGNO (dest)); ++ pic_labels_used |= 1 << REGNO (dest); + +- /* __m128 is passed by reference. */ +- return msize != 1 && msize != 2 && msize != 4 && msize != 8; +- } +- else if (type && int_size_in_bytes (type) == -1) +- return true; ++ xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name)); ++ xops[2] = gen_rtx_MEM (QImode, xops[2]); ++ output_asm_insn ("%!call\t%X2", xops); ++ ++#if TARGET_MACHO ++ /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here. ++ This is what will be referenced by the Mach-O PIC subsystem. */ ++ if (machopic_should_output_picbase_label () || !label) ++ ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME); ++ ++ /* When we are restoring the pic base at the site of a nonlocal label, ++ and we decided to emit the pic base above, we will still output a ++ local label used for calculating the correction offset (even though ++ the offset will be 0 in that case). */ ++ if (label) ++ targetm.asm_out.internal_label (asm_out_file, "L", ++ CODE_LABEL_NUMBER (label)); ++#endif ++ } ++ else ++ { ++ if (TARGET_MACHO) ++ /* We don't need a pic base, we're not producing pic. */ ++ gcc_unreachable (); ++ ++ xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ()); ++ output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops); ++ targetm.asm_out.internal_label (asm_out_file, "L", ++ CODE_LABEL_NUMBER (XEXP (xops[2], 0))); + } + +- return false; ++ if (!TARGET_MACHO) ++ output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops); ++ ++ return ""; + } + +-/* Return true when TYPE should be 128bit aligned for 32bit argument +- passing ABI. XXX: This function is obsolete and is only used for +- checking psABI compatibility with previous versions of GCC. */ ++/* Generate an "push" pattern for input ARG. */ + +-static bool +-ix86_compat_aligned_value_p (const_tree type) ++rtx ++gen_push (rtx arg) + { +- machine_mode mode = TYPE_MODE (type); +- if (((TARGET_SSE && SSE_REG_MODE_P (mode)) +- || mode == TDmode +- || mode == TFmode +- || mode == TCmode) +- && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128)) +- return true; +- if (TYPE_ALIGN (type) < 128) +- return false; ++ struct machine_function *m = cfun->machine; + +- if (AGGREGATE_TYPE_P (type)) +- { +- /* Walk the aggregates recursively. */ +- switch (TREE_CODE (type)) +- { +- case RECORD_TYPE: +- case UNION_TYPE: +- case QUAL_UNION_TYPE: +- { +- tree field; ++ if (m->fs.cfa_reg == stack_pointer_rtx) ++ m->fs.cfa_offset += UNITS_PER_WORD; ++ m->fs.sp_offset += UNITS_PER_WORD; + +- /* Walk all the structure fields. */ +- for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) +- { +- if (TREE_CODE (field) == FIELD_DECL +- && ix86_compat_aligned_value_p (TREE_TYPE (field))) +- return true; +- } +- break; +- } ++ if (REG_P (arg) && GET_MODE (arg) != word_mode) ++ arg = gen_rtx_REG (word_mode, REGNO (arg)); + +- case ARRAY_TYPE: +- /* Just for use if some languages passes arrays by value. */ +- if (ix86_compat_aligned_value_p (TREE_TYPE (type))) +- return true; +- break; ++ return gen_rtx_SET (gen_rtx_MEM (word_mode, ++ gen_rtx_PRE_DEC (Pmode, ++ stack_pointer_rtx)), ++ arg); ++} + +- default: +- gcc_unreachable (); +- } +- } +- return false; ++/* Generate an "pop" pattern for input ARG. */ ++ ++rtx ++gen_pop (rtx arg) ++{ ++ if (REG_P (arg) && GET_MODE (arg) != word_mode) ++ arg = gen_rtx_REG (word_mode, REGNO (arg)); ++ ++ return gen_rtx_SET (arg, ++ gen_rtx_MEM (word_mode, ++ gen_rtx_POST_INC (Pmode, ++ stack_pointer_rtx))); + } + +-/* Return the alignment boundary for MODE and TYPE with alignment ALIGN. +- XXX: This function is obsolete and is only used for checking psABI +- compatibility with previous versions of GCC. */ ++/* Return >= 0 if there is an unused call-clobbered register available ++ for the entire function. */ + + static unsigned int +-ix86_compat_function_arg_boundary (machine_mode mode, +- const_tree type, unsigned int align) ++ix86_select_alt_pic_regnum (void) + { +- /* In 32bit, only _Decimal128 and __float128 are aligned to their +- natural boundaries. */ +- if (!TARGET_64BIT && mode != TDmode && mode != TFmode) +- { +- /* i386 ABI defines all arguments to be 4 byte aligned. We have to +- make an exception for SSE modes since these require 128bit +- alignment. +- +- The handling here differs from field_alignment. ICC aligns MMX +- arguments to 4 byte boundaries, while structure fields are aligned +- to 8 byte boundaries. */ +- if (!type) +- { +- if (!(TARGET_SSE && SSE_REG_MODE_P (mode))) +- align = PARM_BOUNDARY; +- } ++ if (ix86_use_pseudo_pic_reg ()) ++ return INVALID_REGNUM; ++ ++ if (crtl->is_leaf ++ && !crtl->profile ++ && !ix86_current_function_calls_tls_descriptor) ++ { ++ int i, drap; ++ /* Can't use the same register for both PIC and DRAP. */ ++ if (crtl->drap_reg) ++ drap = REGNO (crtl->drap_reg); + else +- { +- if (!ix86_compat_aligned_value_p (type)) +- align = PARM_BOUNDARY; +- } ++ drap = -1; ++ for (i = 2; i >= 0; --i) ++ if (i != drap && !df_regs_ever_live_p (i)) ++ return i; + } +- if (align > BIGGEST_ALIGNMENT) +- align = BIGGEST_ALIGNMENT; +- return align; ++ ++ return INVALID_REGNUM; + } + +-/* Return true when TYPE should be 128bit aligned for 32bit argument +- passing ABI. */ ++/* Return true if REGNO is used by the epilogue. */ + +-static bool +-ix86_contains_aligned_value_p (const_tree type) ++bool ++ix86_epilogue_uses (int regno) + { +- machine_mode mode = TYPE_MODE (type); +- +- if (mode == XFmode || mode == XCmode) +- return false; +- +- if (TYPE_ALIGN (type) < 128) +- return false; +- +- if (AGGREGATE_TYPE_P (type)) +- { +- /* Walk the aggregates recursively. */ +- switch (TREE_CODE (type)) +- { +- case RECORD_TYPE: +- case UNION_TYPE: +- case QUAL_UNION_TYPE: +- { +- tree field; +- +- /* Walk all the structure fields. */ +- for (field = TYPE_FIELDS (type); +- field; +- field = DECL_CHAIN (field)) +- { +- if (TREE_CODE (field) == FIELD_DECL +- && ix86_contains_aligned_value_p (TREE_TYPE (field))) +- return true; +- } +- break; +- } +- +- case ARRAY_TYPE: +- /* Just for use if some languages passes arrays by value. */ +- if (ix86_contains_aligned_value_p (TREE_TYPE (type))) +- return true; +- break; ++ /* If there are no caller-saved registers, we preserve all registers, ++ except for MMX and x87 registers which aren't supported when saving ++ and restoring registers. Don't explicitly save SP register since ++ it is always preserved. */ ++ return (epilogue_completed ++ && cfun->machine->no_caller_saved_registers ++ && !fixed_regs[regno] ++ && !STACK_REGNO_P (regno) ++ && !MMX_REGNO_P (regno)); ++} + +- default: +- gcc_unreachable (); +- } +- } +- else +- return TYPE_ALIGN (type) >= 128; ++/* Return nonzero if register REGNO can be used as a scratch register ++ in peephole2. */ + +- return false; ++static bool ++ix86_hard_regno_scratch_ok (unsigned int regno) ++{ ++ /* If there are no caller-saved registers, we can't use any register ++ as a scratch register after epilogue and use REGNO as scratch ++ register only if it has been used before to avoid saving and ++ restoring it. */ ++ return (!cfun->machine->no_caller_saved_registers ++ || (!epilogue_completed ++ && df_regs_ever_live_p (regno))); + } + +-/* Gives the alignment boundary, in bits, of an argument with the +- specified mode and type. */ ++/* Return TRUE if we need to save REGNO. */ + +-static unsigned int +-ix86_function_arg_boundary (machine_mode mode, const_tree type) ++bool ++ix86_save_reg (unsigned int regno, bool maybe_eh_return, bool ignore_outlined) + { +- unsigned int align; +- if (type) ++ /* If there are no caller-saved registers, we preserve all registers, ++ except for MMX and x87 registers which aren't supported when saving ++ and restoring registers. Don't explicitly save SP register since ++ it is always preserved. */ ++ if (cfun->machine->no_caller_saved_registers) + { +- /* Since the main variant type is used for call, we convert it to +- the main variant type. */ +- type = TYPE_MAIN_VARIANT (type); +- align = TYPE_ALIGN (type); +- if (TYPE_EMPTY_P (type)) +- return PARM_BOUNDARY; ++ /* Don't preserve registers used for function return value. */ ++ rtx reg = crtl->return_rtx; ++ if (reg) ++ { ++ unsigned int i = REGNO (reg); ++ unsigned int nregs = REG_NREGS (reg); ++ while (nregs-- > 0) ++ if ((i + nregs) == regno) ++ return false; ++ } ++ ++ return (df_regs_ever_live_p (regno) ++ && !fixed_regs[regno] ++ && !STACK_REGNO_P (regno) ++ && !MMX_REGNO_P (regno) ++ && (regno != HARD_FRAME_POINTER_REGNUM ++ || !frame_pointer_needed)); + } +- else +- align = GET_MODE_ALIGNMENT (mode); +- if (align < PARM_BOUNDARY) +- align = PARM_BOUNDARY; +- else +- { +- static bool warned; +- unsigned int saved_align = align; + +- if (!TARGET_64BIT) ++ if (regno == REAL_PIC_OFFSET_TABLE_REGNUM ++ && pic_offset_table_rtx) ++ { ++ if (ix86_use_pseudo_pic_reg ()) + { +- /* i386 ABI defines XFmode arguments to be 4 byte aligned. */ +- if (!type) +- { +- if (mode == XFmode || mode == XCmode) +- align = PARM_BOUNDARY; +- } +- else if (!ix86_contains_aligned_value_p (type)) +- align = PARM_BOUNDARY; +- +- if (align < 128) +- align = PARM_BOUNDARY; ++ /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to ++ _mcount in prologue. */ ++ if (!TARGET_64BIT && flag_pic && crtl->profile) ++ return true; + } ++ else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM) ++ || crtl->profile ++ || crtl->calls_eh_return ++ || crtl->uses_const_pool ++ || cfun->has_nonlocal_label) ++ return ix86_select_alt_pic_regnum () == INVALID_REGNUM; ++ } + +- if (warn_psabi +- && !warned +- && align != ix86_compat_function_arg_boundary (mode, type, +- saved_align)) ++ if (crtl->calls_eh_return && maybe_eh_return) ++ { ++ unsigned i; ++ for (i = 0; ; i++) + { +- warned = true; +- inform (input_location, +- "the ABI for passing parameters with %d-byte" +- " alignment has changed in GCC 4.6", +- align / BITS_PER_UNIT); ++ unsigned test = EH_RETURN_DATA_REGNO (i); ++ if (test == INVALID_REGNUM) ++ break; ++ if (test == regno) ++ return true; + } + } + +- return align; +-} +- +-/* Return true if N is a possible register number of function value. */ +- +-static bool +-ix86_function_value_regno_p (const unsigned int regno) +-{ +- switch (regno) ++ if (ignore_outlined && cfun->machine->call_ms2sysv) + { +- case AX_REG: +- return true; +- case DX_REG: +- return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI); +- case DI_REG: +- case SI_REG: +- return TARGET_64BIT && ix86_cfun_abi () != MS_ABI; +- +- /* Complex values are returned in %st(0)/%st(1) pair. */ +- case ST0_REG: +- case ST1_REG: +- /* TODO: The function should depend on current function ABI but +- builtins.c would need updating then. Therefore we use the +- default ABI. */ +- if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI) +- return false; +- return TARGET_FLOAT_RETURNS_IN_80387; +- +- /* Complex values are returned in %xmm0/%xmm1 pair. */ +- case XMM0_REG: +- case XMM1_REG: +- return TARGET_SSE; +- +- case MM0_REG: +- if (TARGET_MACHO || TARGET_64BIT) ++ unsigned count = cfun->machine->call_ms2sysv_extra_regs ++ + xlogue_layout::MIN_REGS; ++ if (xlogue_layout::is_stub_managed_reg (regno, count)) + return false; +- return TARGET_MMX; + } + +- return false; ++ if (crtl->drap_reg ++ && regno == REGNO (crtl->drap_reg) ++ && !cfun->machine->no_drap_save_restore) ++ return true; ++ ++ return (df_regs_ever_live_p (regno) ++ && !call_used_or_fixed_reg_p (regno) ++ && !fixed_regs[regno] ++ && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed)); + } + +-/* Define how to find the value returned by a function. +- VALTYPE is the data type of the value (as a tree). +- If the precise function being called is known, FUNC is its FUNCTION_DECL; +- otherwise, FUNC is 0. */ ++/* Return number of saved general prupose registers. */ + +-static rtx +-function_value_32 (machine_mode orig_mode, machine_mode mode, +- const_tree fntype, const_tree fn) ++static int ++ix86_nsaved_regs (void) + { +- unsigned int regno; ++ int nregs = 0; ++ int regno; + +- /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where +- we normally prevent this case when mmx is not available. However +- some ABIs may require the result to be returned like DImode. */ +- if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8) +- regno = FIRST_MMX_REG; +- +- /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where +- we prevent this case when sse is not available. However some ABIs +- may require the result to be returned like integer TImode. */ +- else if (mode == TImode +- || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16)) +- regno = FIRST_SSE_REG; +- +- /* 32-byte vector modes in %ymm0. */ +- else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32) +- regno = FIRST_SSE_REG; ++ for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) ++ if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true)) ++ nregs ++; ++ return nregs; ++} + +- /* 64-byte vector modes in %zmm0. */ +- else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64) +- regno = FIRST_SSE_REG; ++/* Return number of saved SSE registers. */ + +- /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */ +- else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387) +- regno = FIRST_FLOAT_REG; +- else +- /* Most things go in %eax. */ +- regno = AX_REG; ++static int ++ix86_nsaved_sseregs (void) ++{ ++ int nregs = 0; ++ int regno; + +- /* Override FP return register with %xmm0 for local functions when +- SSE math is enabled or for functions with sseregparm attribute. */ +- if ((fn || fntype) && (mode == SFmode || mode == DFmode)) +- { +- int sse_level = ix86_function_sseregparm (fntype, fn, false); +- if (sse_level == -1) +- { +- error ("calling %qD with SSE calling convention without " +- "SSE/SSE2 enabled", fn); +- sorry ("this is a GCC bug that can be worked around by adding " +- "attribute used to function called"); +- } +- else if ((sse_level >= 1 && mode == SFmode) +- || (sse_level == 2 && mode == DFmode)) +- regno = FIRST_SSE_REG; +- } ++ if (!TARGET_64BIT_MS_ABI) ++ return 0; ++ for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) ++ if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true)) ++ nregs ++; ++ return nregs; ++} + +- /* OImode shouldn't be used directly. */ +- gcc_assert (mode != OImode); ++/* Given FROM and TO register numbers, say whether this elimination is ++ allowed. If stack alignment is needed, we can only replace argument ++ pointer with hard frame pointer, or replace frame pointer with stack ++ pointer. Otherwise, frame pointer elimination is automatically ++ handled and all other eliminations are valid. */ + +- return gen_rtx_REG (orig_mode, regno); ++static bool ++ix86_can_eliminate (const int from, const int to) ++{ ++ if (stack_realign_fp) ++ return ((from == ARG_POINTER_REGNUM ++ && to == HARD_FRAME_POINTER_REGNUM) ++ || (from == FRAME_POINTER_REGNUM ++ && to == STACK_POINTER_REGNUM)); ++ else ++ return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true; + } + +-static rtx +-function_value_64 (machine_mode orig_mode, machine_mode mode, +- const_tree valtype) ++/* Return the offset between two registers, one to be eliminated, and the other ++ its replacement, at the start of a routine. */ ++ ++HOST_WIDE_INT ++ix86_initial_elimination_offset (int from, int to) + { +- rtx ret; ++ struct ix86_frame &frame = cfun->machine->frame; + +- /* Handle libcalls, which don't provide a type node. */ +- if (valtype == NULL) ++ if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) ++ return frame.hard_frame_pointer_offset; ++ else if (from == FRAME_POINTER_REGNUM ++ && to == HARD_FRAME_POINTER_REGNUM) ++ return frame.hard_frame_pointer_offset - frame.frame_pointer_offset; ++ else + { +- unsigned int regno; ++ gcc_assert (to == STACK_POINTER_REGNUM); + +- switch (mode) +- { +- case E_SFmode: +- case E_SCmode: +- case E_DFmode: +- case E_DCmode: +- case E_TFmode: +- case E_SDmode: +- case E_DDmode: +- case E_TDmode: +- regno = FIRST_SSE_REG; +- break; +- case E_XFmode: +- case E_XCmode: +- regno = FIRST_FLOAT_REG; +- break; +- case E_TCmode: +- return NULL; +- default: +- regno = AX_REG; +- } ++ if (from == ARG_POINTER_REGNUM) ++ return frame.stack_pointer_offset; + +- return gen_rtx_REG (mode, regno); +- } +- else if (POINTER_TYPE_P (valtype)) +- { +- /* Pointers are always returned in word_mode. */ +- mode = word_mode; ++ gcc_assert (from == FRAME_POINTER_REGNUM); ++ return frame.stack_pointer_offset - frame.frame_pointer_offset; + } +- +- ret = construct_container (mode, orig_mode, valtype, 1, +- X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX, +- x86_64_int_return_registers, 0); +- +- /* For zero sized structures, construct_container returns NULL, but we +- need to keep rest of compiler happy by returning meaningful value. */ +- if (!ret) +- ret = gen_rtx_REG (orig_mode, AX_REG); +- +- return ret; + } + ++/* In a dynamically-aligned function, we can't know the offset from ++ stack pointer to frame pointer, so we must ensure that setjmp ++ eliminates fp against the hard fp (%ebp) rather than trying to ++ index from %esp up to the top of the frame across a gap that is ++ of unknown (at compile-time) size. */ + static rtx +-function_value_ms_32 (machine_mode orig_mode, machine_mode mode, +- const_tree fntype, const_tree fn, const_tree valtype) ++ix86_builtin_setjmp_frame_value (void) + { +- unsigned int regno; +- +- /* Floating point return values in %st(0) +- (unless -mno-fp-ret-in-387 or aggregate type of up to 8 bytes). */ +- if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387 +- && (GET_MODE_SIZE (mode) > 8 +- || valtype == NULL_TREE || !AGGREGATE_TYPE_P (valtype))) +- { +- regno = FIRST_FLOAT_REG; +- return gen_rtx_REG (orig_mode, regno); +- } +- else +- return function_value_32(orig_mode, mode, fntype,fn); ++ return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx; + } + +-static rtx +-function_value_ms_64 (machine_mode orig_mode, machine_mode mode, +- const_tree valtype) ++/* Emits a warning for unsupported msabi to sysv pro/epilogues. */ ++void warn_once_call_ms2sysv_xlogues (const char *feature) + { +- unsigned int regno = AX_REG; +- +- if (TARGET_SSE) ++ static bool warned_once = false; ++ if (!warned_once) + { +- switch (GET_MODE_SIZE (mode)) +- { +- case 16: +- if (valtype != NULL_TREE +- && !VECTOR_INTEGER_TYPE_P (valtype) +- && !VECTOR_INTEGER_TYPE_P (valtype) +- && !INTEGRAL_TYPE_P (valtype) +- && !VECTOR_FLOAT_TYPE_P (valtype)) +- break; +- if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode)) +- && !COMPLEX_MODE_P (mode)) +- regno = FIRST_SSE_REG; +- break; +- case 8: +- case 4: +- if (valtype != NULL_TREE && AGGREGATE_TYPE_P (valtype)) +- break; +- if (mode == SFmode || mode == DFmode) +- regno = FIRST_SSE_REG; +- break; +- default: +- break; +- } ++ warning (0, "%<-mcall-ms2sysv-xlogues%> is not compatible with %s", ++ feature); ++ warned_once = true; + } +- return gen_rtx_REG (orig_mode, regno); + } + +-static rtx +-ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl, +- machine_mode orig_mode, machine_mode mode) +-{ +- const_tree fn, fntype; ++/* Return the probing interval for -fstack-clash-protection. */ + +- fn = NULL_TREE; +- if (fntype_or_decl && DECL_P (fntype_or_decl)) +- fn = fntype_or_decl; +- fntype = fn ? TREE_TYPE (fn) : fntype_or_decl; +- +- if (ix86_function_type_abi (fntype) == MS_ABI) +- { +- if (TARGET_64BIT) +- return function_value_ms_64 (orig_mode, mode, valtype); +- else +- return function_value_ms_32 (orig_mode, mode, fntype, fn, valtype); +- } +- else if (TARGET_64BIT) +- return function_value_64 (orig_mode, mode, valtype); ++static HOST_WIDE_INT ++get_probe_interval (void) ++{ ++ if (flag_stack_clash_protection) ++ return (HOST_WIDE_INT_1U ++ << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL)); + else +- return function_value_32 (orig_mode, mode, fntype, fn); ++ return (HOST_WIDE_INT_1U << STACK_CHECK_PROBE_INTERVAL_EXP); + } + +-static rtx +-ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool) +-{ +- machine_mode mode, orig_mode; ++/* When using -fsplit-stack, the allocation routines set a field in ++ the TCB to the bottom of the stack plus this much space, measured ++ in bytes. */ + +- orig_mode = TYPE_MODE (valtype); +- mode = type_natural_mode (valtype, NULL, true); +- return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode); +-} ++#define SPLIT_STACK_AVAILABLE 256 + +-/* Pointer function arguments and return values are promoted to +- word_mode for normal functions. */ ++/* Fill structure ix86_frame about frame of currently computed function. */ + +-static machine_mode +-ix86_promote_function_mode (const_tree type, machine_mode mode, +- int *punsignedp, const_tree fntype, +- int for_return) ++static void ++ix86_compute_frame_layout (void) + { +- if (cfun->machine->func_type == TYPE_NORMAL +- && type != NULL_TREE +- && POINTER_TYPE_P (type)) +- { +- *punsignedp = POINTERS_EXTEND_UNSIGNED; +- return word_mode; +- } +- return default_promote_function_mode (type, mode, punsignedp, fntype, +- for_return); +-} +- +-/* Return true if a structure, union or array with MODE containing FIELD +- should be accessed using BLKmode. */ +- +-static bool +-ix86_member_type_forces_blk (const_tree field, machine_mode mode) +-{ +- /* Union with XFmode must be in BLKmode. */ +- return (mode == XFmode +- && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE +- || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE)); +-} +- +-rtx +-ix86_libcall_value (machine_mode mode) +-{ +- return ix86_function_value_1 (NULL, NULL, mode, mode); +-} +- +-/* Return true iff type is returned in memory. */ +- +-static bool +-ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) +-{ +-#ifdef SUBTARGET_RETURN_IN_MEMORY +- return SUBTARGET_RETURN_IN_MEMORY (type, fntype); +-#else +- const machine_mode mode = type_natural_mode (type, NULL, true); +- HOST_WIDE_INT size; ++ struct ix86_frame *frame = &cfun->machine->frame; ++ struct machine_function *m = cfun->machine; ++ unsigned HOST_WIDE_INT stack_alignment_needed; ++ HOST_WIDE_INT offset; ++ unsigned HOST_WIDE_INT preferred_alignment; ++ HOST_WIDE_INT size = get_frame_size (); ++ HOST_WIDE_INT to_allocate; + +- if (TARGET_64BIT) ++ /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit ++ * ms_abi functions that call a sysv function. We now need to prune away ++ * cases where it should be disabled. */ ++ if (TARGET_64BIT && m->call_ms2sysv) + { +- if (ix86_function_type_abi (fntype) == MS_ABI) +- { +- size = int_size_in_bytes (type); ++ gcc_assert (TARGET_64BIT_MS_ABI); ++ gcc_assert (TARGET_CALL_MS2SYSV_XLOGUES); ++ gcc_assert (!TARGET_SEH); ++ gcc_assert (TARGET_SSE); ++ gcc_assert (!ix86_using_red_zone ()); + +- /* __m128 is returned in xmm0. */ +- if ((!type || VECTOR_INTEGER_TYPE_P (type) +- || INTEGRAL_TYPE_P (type) +- || VECTOR_FLOAT_TYPE_P (type)) +- && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode)) +- && !COMPLEX_MODE_P (mode) +- && (GET_MODE_SIZE (mode) == 16 || size == 16)) +- return false; ++ if (crtl->calls_eh_return) ++ { ++ gcc_assert (!reload_completed); ++ m->call_ms2sysv = false; ++ warn_once_call_ms2sysv_xlogues ("__builtin_eh_return"); ++ } + +- /* Otherwise, the size must be exactly in [1248]. */ +- return size != 1 && size != 2 && size != 4 && size != 8; ++ else if (ix86_static_chain_on_stack) ++ { ++ gcc_assert (!reload_completed); ++ m->call_ms2sysv = false; ++ warn_once_call_ms2sysv_xlogues ("static call chains"); + } ++ ++ /* Finally, compute which registers the stub will manage. */ + else + { +- int needed_intregs, needed_sseregs; +- +- return examine_argument (mode, type, 1, +- &needed_intregs, &needed_sseregs); ++ unsigned count = xlogue_layout::count_stub_managed_regs (); ++ m->call_ms2sysv_extra_regs = count - xlogue_layout::MIN_REGS; ++ m->call_ms2sysv_pad_in = 0; + } + } +- else +- { +- size = int_size_in_bytes (type); +- +- /* Intel MCU psABI returns scalars and aggregates no larger than 8 +- bytes in registers. */ +- if (TARGET_IAMCU) +- return VECTOR_MODE_P (mode) || size < 0 || size > 8; +- +- if (mode == BLKmode) +- return true; +- +- if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8) +- return false; + +- if (VECTOR_MODE_P (mode) || mode == TImode) +- { +- /* User-created vectors small enough to fit in EAX. */ +- if (size < 8) +- return false; ++ frame->nregs = ix86_nsaved_regs (); ++ frame->nsseregs = ix86_nsaved_sseregs (); + +- /* Unless ABI prescibes otherwise, +- MMX/3dNow values are returned in MM0 if available. */ +- +- if (size == 8) +- return TARGET_VECT8_RETURNS || !TARGET_MMX; ++ /* 64-bit MS ABI seem to require stack alignment to be always 16, ++ except for function prologues, leaf functions and when the defult ++ incoming stack boundary is overriden at command line or via ++ force_align_arg_pointer attribute. + +- /* SSE values are returned in XMM0 if available. */ +- if (size == 16) +- return !TARGET_SSE; ++ Darwin's ABI specifies 128b alignment for both 32 and 64 bit variants ++ at call sites, including profile function calls. ++ */ ++ if (((TARGET_64BIT_MS_ABI || TARGET_MACHO) ++ && crtl->preferred_stack_boundary < 128) ++ && (!crtl->is_leaf || cfun->calls_alloca != 0 ++ || ix86_current_function_calls_tls_descriptor ++ || (TARGET_MACHO && crtl->profile) ++ || ix86_incoming_stack_boundary < 128)) ++ { ++ crtl->preferred_stack_boundary = 128; ++ crtl->stack_alignment_needed = 128; ++ } + +- /* AVX values are returned in YMM0 if available. */ +- if (size == 32) +- return !TARGET_AVX; ++ stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT; ++ preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT; + +- /* AVX512F values are returned in ZMM0 if available. */ +- if (size == 64) +- return !TARGET_AVX512F; +- } ++ gcc_assert (!size || stack_alignment_needed); ++ gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT); ++ gcc_assert (preferred_alignment <= stack_alignment_needed); + +- if (mode == XFmode) +- return false; ++ /* The only ABI saving SSE regs should be 64-bit ms_abi. */ ++ gcc_assert (TARGET_64BIT || !frame->nsseregs); ++ if (TARGET_64BIT && m->call_ms2sysv) ++ { ++ gcc_assert (stack_alignment_needed >= 16); ++ gcc_assert (!frame->nsseregs); ++ } + +- if (size > 12) +- return true; ++ /* For SEH we have to limit the amount of code movement into the prologue. ++ At present we do this via a BLOCKAGE, at which point there's very little ++ scheduling that can be done, which means that there's very little point ++ in doing anything except PUSHs. */ ++ if (TARGET_SEH) ++ m->use_fast_prologue_epilogue = false; ++ else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))) ++ { ++ int count = frame->nregs; ++ struct cgraph_node *node = cgraph_node::get (current_function_decl); + +- /* OImode shouldn't be used directly. */ +- gcc_assert (mode != OImode); ++ /* The fast prologue uses move instead of push to save registers. This ++ is significantly longer, but also executes faster as modern hardware ++ can execute the moves in parallel, but can't do that for push/pop. + +- return false; ++ Be careful about choosing what prologue to emit: When function takes ++ many instructions to execute we may use slow version as well as in ++ case function is known to be outside hot spot (this is known with ++ feedback only). Weight the size of function by number of registers ++ to save as it is cheap to use one or two push instructions but very ++ slow to use many of them. */ ++ if (count) ++ count = (count - 1) * FAST_PROLOGUE_INSN_COUNT; ++ if (node->frequency < NODE_FREQUENCY_NORMAL ++ || (flag_branch_probabilities ++ && node->frequency < NODE_FREQUENCY_HOT)) ++ m->use_fast_prologue_epilogue = false; ++ else ++ m->use_fast_prologue_epilogue ++ = !expensive_function_p (count); + } +-#endif +-} + +- +-/* Create the va_list data type. */ ++ frame->save_regs_using_mov ++ = (TARGET_PROLOGUE_USING_MOVE && m->use_fast_prologue_epilogue ++ /* If static stack checking is enabled and done with probes, ++ the registers need to be saved before allocating the frame. */ ++ && flag_stack_check != STATIC_BUILTIN_STACK_CHECK); + +-static tree +-ix86_build_builtin_va_list_64 (void) +-{ +- tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl; ++ /* Skip return address and error code in exception handler. */ ++ offset = INCOMING_FRAME_SP_OFFSET; + +- record = lang_hooks.types.make_type (RECORD_TYPE); +- type_decl = build_decl (BUILTINS_LOCATION, +- TYPE_DECL, get_identifier ("__va_list_tag"), record); ++ /* Skip pushed static chain. */ ++ if (ix86_static_chain_on_stack) ++ offset += UNITS_PER_WORD; + +- f_gpr = build_decl (BUILTINS_LOCATION, +- FIELD_DECL, get_identifier ("gp_offset"), +- unsigned_type_node); +- f_fpr = build_decl (BUILTINS_LOCATION, +- FIELD_DECL, get_identifier ("fp_offset"), +- unsigned_type_node); +- f_ovf = build_decl (BUILTINS_LOCATION, +- FIELD_DECL, get_identifier ("overflow_arg_area"), +- ptr_type_node); +- f_sav = build_decl (BUILTINS_LOCATION, +- FIELD_DECL, get_identifier ("reg_save_area"), +- ptr_type_node); ++ /* Skip saved base pointer. */ ++ if (frame_pointer_needed) ++ offset += UNITS_PER_WORD; ++ frame->hfp_save_offset = offset; + +- va_list_gpr_counter_field = f_gpr; +- va_list_fpr_counter_field = f_fpr; ++ /* The traditional frame pointer location is at the top of the frame. */ ++ frame->hard_frame_pointer_offset = offset; + +- DECL_FIELD_CONTEXT (f_gpr) = record; +- DECL_FIELD_CONTEXT (f_fpr) = record; +- DECL_FIELD_CONTEXT (f_ovf) = record; +- DECL_FIELD_CONTEXT (f_sav) = record; ++ /* Register save area */ ++ offset += frame->nregs * UNITS_PER_WORD; ++ frame->reg_save_offset = offset; + +- TYPE_STUB_DECL (record) = type_decl; +- TYPE_NAME (record) = type_decl; +- TYPE_FIELDS (record) = f_gpr; +- DECL_CHAIN (f_gpr) = f_fpr; +- DECL_CHAIN (f_fpr) = f_ovf; +- DECL_CHAIN (f_ovf) = f_sav; ++ /* On SEH target, registers are pushed just before the frame pointer ++ location. */ ++ if (TARGET_SEH) ++ frame->hard_frame_pointer_offset = offset; + +- layout_type (record); ++ /* Calculate the size of the va-arg area (not including padding, if any). */ ++ frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size; + +- TYPE_ATTRIBUTES (record) = tree_cons (get_identifier ("sysv_abi va_list"), +- NULL_TREE, TYPE_ATTRIBUTES (record)); ++ /* Also adjust stack_realign_offset for the largest alignment of ++ stack slot actually used. */ ++ if (stack_realign_fp ++ || (cfun->machine->max_used_stack_alignment != 0 ++ && (offset % cfun->machine->max_used_stack_alignment) != 0)) ++ { ++ /* We may need a 16-byte aligned stack for the remainder of the ++ register save area, but the stack frame for the local function ++ may require a greater alignment if using AVX/2/512. In order ++ to avoid wasting space, we first calculate the space needed for ++ the rest of the register saves, add that to the stack pointer, ++ and then realign the stack to the boundary of the start of the ++ frame for the local function. */ ++ HOST_WIDE_INT space_needed = 0; ++ HOST_WIDE_INT sse_reg_space_needed = 0; + +- /* The correct type is an array type of one element. */ +- return build_array_type (record, build_index_type (size_zero_node)); +-} ++ if (TARGET_64BIT) ++ { ++ if (m->call_ms2sysv) ++ { ++ m->call_ms2sysv_pad_in = 0; ++ space_needed = xlogue_layout::get_instance ().get_stack_space_used (); ++ } + +-/* Setup the builtin va_list data type and for 64-bit the additional +- calling convention specific va_list data types. */ ++ else if (frame->nsseregs) ++ /* The only ABI that has saved SSE registers (Win64) also has a ++ 16-byte aligned default stack. However, many programs violate ++ the ABI, and Wine64 forces stack realignment to compensate. */ ++ space_needed = frame->nsseregs * 16; + +-static tree +-ix86_build_builtin_va_list (void) +-{ +- if (TARGET_64BIT) +- { +- /* Initialize ABI specific va_list builtin types. ++ sse_reg_space_needed = space_needed = ROUND_UP (space_needed, 16); + +- In lto1, we can encounter two va_list types: +- - one as a result of the type-merge across TUs, and +- - the one constructed here. +- These two types will not have the same TYPE_MAIN_VARIANT, and therefore +- a type identity check in canonical_va_list_type based on +- TYPE_MAIN_VARIANT (which we used to have) will not work. +- Instead, we tag each va_list_type_node with its unique attribute, and +- look for the attribute in the type identity check in +- canonical_va_list_type. ++ /* 64-bit frame->va_arg_size should always be a multiple of 16, but ++ rounding to be pedantic. */ ++ space_needed = ROUND_UP (space_needed + frame->va_arg_size, 16); ++ } ++ else ++ space_needed = frame->va_arg_size; + +- Tagging sysv_va_list_type_node directly with the attribute is +- problematic since it's a array of one record, which will degrade into a +- pointer to record when used as parameter (see build_va_arg comments for +- an example), dropping the attribute in the process. So we tag the +- record instead. */ ++ /* Record the allocation size required prior to the realignment AND. */ ++ frame->stack_realign_allocate = space_needed; + +- /* For SYSV_ABI we use an array of one record. */ +- sysv_va_list_type_node = ix86_build_builtin_va_list_64 (); +- +- /* For MS_ABI we use plain pointer to argument area. */ +- tree char_ptr_type = build_pointer_type (char_type_node); +- tree attr = tree_cons (get_identifier ("ms_abi va_list"), NULL_TREE, +- TYPE_ATTRIBUTES (char_ptr_type)); +- ms_va_list_type_node = build_type_attribute_variant (char_ptr_type, attr); ++ /* The re-aligned stack starts at frame->stack_realign_offset. Values ++ before this point are not directly comparable with values below ++ this point. Use sp_valid_at to determine if the stack pointer is ++ valid for a given offset, fp_valid_at for the frame pointer, or ++ choose_baseaddr to have a base register chosen for you. + +- return ((ix86_abi == MS_ABI) +- ? ms_va_list_type_node +- : sysv_va_list_type_node); ++ Note that the result of (frame->stack_realign_offset ++ & (stack_alignment_needed - 1)) may not equal zero. */ ++ offset = ROUND_UP (offset + space_needed, stack_alignment_needed); ++ frame->stack_realign_offset = offset - space_needed; ++ frame->sse_reg_save_offset = frame->stack_realign_offset ++ + sse_reg_space_needed; + } + else + { +- /* For i386 we use plain pointer to argument area. */ +- return build_pointer_type (char_type_node); ++ frame->stack_realign_offset = offset; ++ ++ if (TARGET_64BIT && m->call_ms2sysv) ++ { ++ m->call_ms2sysv_pad_in = !!(offset & UNITS_PER_WORD); ++ offset += xlogue_layout::get_instance ().get_stack_space_used (); ++ } ++ ++ /* Align and set SSE register save area. */ ++ else if (frame->nsseregs) ++ { ++ /* If the incoming stack boundary is at least 16 bytes, or DRAP is ++ required and the DRAP re-alignment boundary is at least 16 bytes, ++ then we want the SSE register save area properly aligned. */ ++ if (ix86_incoming_stack_boundary >= 128 ++ || (stack_realign_drap && stack_alignment_needed >= 16)) ++ offset = ROUND_UP (offset, 16); ++ offset += frame->nsseregs * 16; ++ } ++ frame->sse_reg_save_offset = offset; ++ offset += frame->va_arg_size; + } +-} + +-/* Worker function for TARGET_SETUP_INCOMING_VARARGS. */ ++ /* Align start of frame for local function. When a function call ++ is removed, it may become a leaf function. But if argument may ++ be passed on stack, we need to align the stack when there is no ++ tail call. */ ++ if (m->call_ms2sysv ++ || frame->va_arg_size != 0 ++ || size != 0 ++ || !crtl->is_leaf ++ || (!crtl->tail_call_emit ++ && cfun->machine->outgoing_args_on_stack) ++ || cfun->calls_alloca ++ || ix86_current_function_calls_tls_descriptor) ++ offset = ROUND_UP (offset, stack_alignment_needed); + +-static void +-setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum) +-{ +- rtx save_area, mem; +- alias_set_type set; +- int i, max; ++ /* Frame pointer points here. */ ++ frame->frame_pointer_offset = offset; + +- /* GPR size of varargs save area. */ +- if (cfun->va_list_gpr_size) +- ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD; +- else +- ix86_varargs_gpr_size = 0; ++ offset += size; + +- /* FPR size of varargs save area. We don't need it if we don't pass +- anything in SSE registers. */ +- if (TARGET_SSE && cfun->va_list_fpr_size) +- ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16; ++ /* Add outgoing arguments area. Can be skipped if we eliminated ++ all the function calls as dead code. ++ Skipping is however impossible when function calls alloca. Alloca ++ expander assumes that last crtl->outgoing_args_size ++ of stack frame are unused. */ ++ if (ACCUMULATE_OUTGOING_ARGS ++ && (!crtl->is_leaf || cfun->calls_alloca ++ || ix86_current_function_calls_tls_descriptor)) ++ { ++ offset += crtl->outgoing_args_size; ++ frame->outgoing_arguments_size = crtl->outgoing_args_size; ++ } + else +- ix86_varargs_fpr_size = 0; ++ frame->outgoing_arguments_size = 0; + +- if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size) +- return; ++ /* Align stack boundary. Only needed if we're calling another function ++ or using alloca. */ ++ if (!crtl->is_leaf || cfun->calls_alloca ++ || ix86_current_function_calls_tls_descriptor) ++ offset = ROUND_UP (offset, preferred_alignment); + +- save_area = frame_pointer_rtx; +- set = get_varargs_alias_set (); ++ /* We've reached end of stack frame. */ ++ frame->stack_pointer_offset = offset; + +- max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD; +- if (max > X86_64_REGPARM_MAX) +- max = X86_64_REGPARM_MAX; ++ /* Size prologue needs to allocate. */ ++ to_allocate = offset - frame->sse_reg_save_offset; + +- for (i = cum->regno; i < max; i++) ++ if ((!to_allocate && frame->nregs <= 1) ++ || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000)) ++ /* If stack clash probing needs a loop, then it needs a ++ scratch register. But the returned register is only guaranteed ++ to be safe to use after register saves are complete. So if ++ stack clash protections are enabled and the allocated frame is ++ larger than the probe interval, then use pushes to save ++ callee saved registers. */ ++ || (flag_stack_clash_protection && to_allocate > get_probe_interval ())) ++ frame->save_regs_using_mov = false; ++ ++ if (ix86_using_red_zone () ++ && crtl->sp_is_unchanging ++ && crtl->is_leaf ++ && !ix86_pc_thunk_call_expanded ++ && !ix86_current_function_calls_tls_descriptor) + { +- mem = gen_rtx_MEM (word_mode, +- plus_constant (Pmode, save_area, i * UNITS_PER_WORD)); +- MEM_NOTRAP_P (mem) = 1; +- set_mem_alias_set (mem, set); +- emit_move_insn (mem, +- gen_rtx_REG (word_mode, +- x86_64_int_parameter_registers[i])); ++ frame->red_zone_size = to_allocate; ++ if (frame->save_regs_using_mov) ++ frame->red_zone_size += frame->nregs * UNITS_PER_WORD; ++ if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE) ++ frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE; + } ++ else ++ frame->red_zone_size = 0; ++ frame->stack_pointer_offset -= frame->red_zone_size; + +- if (ix86_varargs_fpr_size) ++ /* The SEH frame pointer location is near the bottom of the frame. ++ This is enforced by the fact that the difference between the ++ stack pointer and the frame pointer is limited to 240 bytes in ++ the unwind data structure. */ ++ if (TARGET_SEH) + { +- machine_mode smode; +- rtx_code_label *label; +- rtx test; +- +- /* Now emit code to save SSE registers. The AX parameter contains number +- of SSE parameter registers used to call this function, though all we +- actually check here is the zero/non-zero status. */ +- +- label = gen_label_rtx (); +- test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx); +- emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1), +- label)); +- +- /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if +- we used movdqa (i.e. TImode) instead? Perhaps even better would +- be if we could determine the real mode of the data, via a hook +- into pass_stdarg. Ignore all that for now. */ +- smode = V4SFmode; +- if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode)) +- crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode); +- +- max = cum->sse_regno + cfun->va_list_fpr_size / 16; +- if (max > X86_64_SSE_REGPARM_MAX) +- max = X86_64_SSE_REGPARM_MAX; ++ HOST_WIDE_INT diff; + +- for (i = cum->sse_regno; i < max; ++i) ++ /* If we can leave the frame pointer where it is, do so. Also, returns ++ the establisher frame for __builtin_frame_address (0). */ ++ diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset; ++ if (diff <= SEH_MAX_FRAME_SIZE ++ && (diff > 240 || (diff & 15) != 0) ++ && !crtl->accesses_prior_frames) + { +- mem = plus_constant (Pmode, save_area, +- i * 16 + ix86_varargs_gpr_size); +- mem = gen_rtx_MEM (smode, mem); +- MEM_NOTRAP_P (mem) = 1; +- set_mem_alias_set (mem, set); +- set_mem_align (mem, GET_MODE_ALIGNMENT (smode)); +- +- emit_move_insn (mem, gen_rtx_REG (smode, GET_SSE_REGNO (i))); ++ /* Ideally we'd determine what portion of the local stack frame ++ (within the constraint of the lowest 240) is most heavily used. ++ But without that complication, simply bias the frame pointer ++ by 128 bytes so as to maximize the amount of the local stack ++ frame that is addressable with 8-bit offsets. */ ++ frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128; + } +- +- emit_label (label); + } + } + +-static void +-setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum) +-{ +- alias_set_type set = get_varargs_alias_set (); +- int i; ++/* This is semi-inlined memory_address_length, but simplified ++ since we know that we're always dealing with reg+offset, and ++ to avoid having to create and discard all that rtl. */ + +- /* Reset to zero, as there might be a sysv vaarg used +- before. */ +- ix86_varargs_gpr_size = 0; +- ix86_varargs_fpr_size = 0; ++static inline int ++choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset) ++{ ++ int len = 4; + +- for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++) ++ if (offset == 0) + { +- rtx reg, mem; ++ /* EBP and R13 cannot be encoded without an offset. */ ++ len = (regno == BP_REG || regno == R13_REG); ++ } ++ else if (IN_RANGE (offset, -128, 127)) ++ len = 1; + +- mem = gen_rtx_MEM (Pmode, +- plus_constant (Pmode, virtual_incoming_args_rtx, +- i * UNITS_PER_WORD)); +- MEM_NOTRAP_P (mem) = 1; +- set_mem_alias_set (mem, set); ++ /* ESP and R12 must be encoded with a SIB byte. */ ++ if (regno == SP_REG || regno == R12_REG) ++ len++; + +- reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]); +- emit_move_insn (mem, reg); ++ return len; ++} ++ ++/* Determine if the stack pointer is valid for accessing the CFA_OFFSET in ++ the frame save area. The register is saved at CFA - CFA_OFFSET. */ ++ ++static bool ++sp_valid_at (HOST_WIDE_INT cfa_offset) ++{ ++ const struct machine_frame_state &fs = cfun->machine->fs; ++ if (fs.sp_realigned && cfa_offset <= fs.sp_realigned_offset) ++ { ++ /* Validate that the cfa_offset isn't in a "no-man's land". */ ++ gcc_assert (cfa_offset <= fs.sp_realigned_fp_last); ++ return false; ++ } ++ return fs.sp_valid; ++} ++ ++/* Determine if the frame pointer is valid for accessing the CFA_OFFSET in ++ the frame save area. The register is saved at CFA - CFA_OFFSET. */ ++ ++static inline bool ++fp_valid_at (HOST_WIDE_INT cfa_offset) ++{ ++ const struct machine_frame_state &fs = cfun->machine->fs; ++ if (fs.sp_realigned && cfa_offset > fs.sp_realigned_fp_last) ++ { ++ /* Validate that the cfa_offset isn't in a "no-man's land". */ ++ gcc_assert (cfa_offset >= fs.sp_realigned_offset); ++ return false; + } ++ return fs.fp_valid; + } + ++/* Choose a base register based upon alignment requested, speed and/or ++ size. */ ++ + static void +-ix86_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode, +- tree type, int *, int no_rtl) ++choose_basereg (HOST_WIDE_INT cfa_offset, rtx &base_reg, ++ HOST_WIDE_INT &base_offset, ++ unsigned int align_reqested, unsigned int *align) + { +- CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); +- CUMULATIVE_ARGS next_cum; +- tree fntype; ++ const struct machine_function *m = cfun->machine; ++ unsigned int hfp_align; ++ unsigned int drap_align; ++ unsigned int sp_align; ++ bool hfp_ok = fp_valid_at (cfa_offset); ++ bool drap_ok = m->fs.drap_valid; ++ bool sp_ok = sp_valid_at (cfa_offset); + +- /* This argument doesn't appear to be used anymore. Which is good, +- because the old code here didn't suppress rtl generation. */ +- gcc_assert (!no_rtl); ++ hfp_align = drap_align = sp_align = INCOMING_STACK_BOUNDARY; + +- if (!TARGET_64BIT) +- return; ++ /* Filter out any registers that don't meet the requested alignment ++ criteria. */ ++ if (align_reqested) ++ { ++ if (m->fs.realigned) ++ hfp_align = drap_align = sp_align = crtl->stack_alignment_needed; ++ /* SEH unwind code does do not currently support REG_CFA_EXPRESSION ++ notes (which we would need to use a realigned stack pointer), ++ so disable on SEH targets. */ ++ else if (m->fs.sp_realigned) ++ sp_align = crtl->stack_alignment_needed; + +- fntype = TREE_TYPE (current_function_decl); ++ hfp_ok = hfp_ok && hfp_align >= align_reqested; ++ drap_ok = drap_ok && drap_align >= align_reqested; ++ sp_ok = sp_ok && sp_align >= align_reqested; ++ } + +- /* For varargs, we do not want to skip the dummy va_dcl argument. +- For stdargs, we do want to skip the last named argument. */ +- next_cum = *cum; +- if (stdarg_p (fntype)) +- ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type, +- true); ++ if (m->use_fast_prologue_epilogue) ++ { ++ /* Choose the base register most likely to allow the most scheduling ++ opportunities. Generally FP is valid throughout the function, ++ while DRAP must be reloaded within the epilogue. But choose either ++ over the SP due to increased encoding size. */ + +- if (cum->call_abi == MS_ABI) +- setup_incoming_varargs_ms_64 (&next_cum); ++ if (hfp_ok) ++ { ++ base_reg = hard_frame_pointer_rtx; ++ base_offset = m->fs.fp_offset - cfa_offset; ++ } ++ else if (drap_ok) ++ { ++ base_reg = crtl->drap_reg; ++ base_offset = 0 - cfa_offset; ++ } ++ else if (sp_ok) ++ { ++ base_reg = stack_pointer_rtx; ++ base_offset = m->fs.sp_offset - cfa_offset; ++ } ++ } + else +- setup_incoming_varargs_64 (&next_cum); ++ { ++ HOST_WIDE_INT toffset; ++ int len = 16, tlen; ++ ++ /* Choose the base register with the smallest address encoding. ++ With a tie, choose FP > DRAP > SP. */ ++ if (sp_ok) ++ { ++ base_reg = stack_pointer_rtx; ++ base_offset = m->fs.sp_offset - cfa_offset; ++ len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset); ++ } ++ if (drap_ok) ++ { ++ toffset = 0 - cfa_offset; ++ tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset); ++ if (tlen <= len) ++ { ++ base_reg = crtl->drap_reg; ++ base_offset = toffset; ++ len = tlen; ++ } ++ } ++ if (hfp_ok) ++ { ++ toffset = m->fs.fp_offset - cfa_offset; ++ tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset); ++ if (tlen <= len) ++ { ++ base_reg = hard_frame_pointer_rtx; ++ base_offset = toffset; ++ len = tlen; ++ } ++ } ++ } ++ ++ /* Set the align return value. */ ++ if (align) ++ { ++ if (base_reg == stack_pointer_rtx) ++ *align = sp_align; ++ else if (base_reg == crtl->drap_reg) ++ *align = drap_align; ++ else if (base_reg == hard_frame_pointer_rtx) ++ *align = hfp_align; ++ } + } + +-static void +-ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v, +- machine_mode mode, +- tree type, +- int *pretend_size ATTRIBUTE_UNUSED, +- int no_rtl) ++/* Return an RTX that points to CFA_OFFSET within the stack frame and ++ the alignment of address. If ALIGN is non-null, it should point to ++ an alignment value (in bits) that is preferred or zero and will ++ recieve the alignment of the base register that was selected, ++ irrespective of rather or not CFA_OFFSET is a multiple of that ++ alignment value. If it is possible for the base register offset to be ++ non-immediate then SCRATCH_REGNO should specify a scratch register to ++ use. ++ ++ The valid base registers are taken from CFUN->MACHINE->FS. */ ++ ++static rtx ++choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align, ++ unsigned int scratch_regno = INVALID_REGNUM) + { +- CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); +- CUMULATIVE_ARGS next_cum; +- tree fntype; ++ rtx base_reg = NULL; ++ HOST_WIDE_INT base_offset = 0; + +- gcc_assert (!no_rtl); ++ /* If a specific alignment is requested, try to get a base register ++ with that alignment first. */ ++ if (align && *align) ++ choose_basereg (cfa_offset, base_reg, base_offset, *align, align); + +- /* Do nothing if we use plain pointer to argument area. */ +- if (!TARGET_64BIT || cum->call_abi == MS_ABI) +- return; ++ if (!base_reg) ++ choose_basereg (cfa_offset, base_reg, base_offset, 0, align); + +- fntype = TREE_TYPE (current_function_decl); ++ gcc_assert (base_reg != NULL); + +- /* For varargs, we do not want to skip the dummy va_dcl argument. +- For stdargs, we do want to skip the last named argument. */ +- next_cum = *cum; +- if (stdarg_p (fntype)) +- ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type, +- true); +-} ++ rtx base_offset_rtx = GEN_INT (base_offset); + ++ if (!x86_64_immediate_operand (base_offset_rtx, Pmode)) ++ { ++ gcc_assert (scratch_regno != INVALID_REGNUM); + +-/* Checks if TYPE is of kind va_list char *. */ ++ rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno); ++ emit_move_insn (scratch_reg, base_offset_rtx); + +-static bool +-is_va_list_char_pointer (tree type) +-{ +- tree canonic; ++ return gen_rtx_PLUS (Pmode, base_reg, scratch_reg); ++ } + +- /* For 32-bit it is always true. */ +- if (!TARGET_64BIT) +- return true; +- canonic = ix86_canonical_va_list_type (type); +- return (canonic == ms_va_list_type_node +- || (ix86_abi == MS_ABI && canonic == va_list_type_node)); ++ return plus_constant (Pmode, base_reg, base_offset); + } + +-/* Implement va_start. */ ++/* Emit code to save registers in the prologue. */ + + static void +-ix86_va_start (tree valist, rtx nextarg) ++ix86_emit_save_regs (void) + { +- HOST_WIDE_INT words, n_gpr, n_fpr; +- tree f_gpr, f_fpr, f_ovf, f_sav; +- tree gpr, fpr, ovf, sav, t; +- tree type; +- rtx ovf_rtx; +- +- if (flag_split_stack +- && cfun->machine->split_stack_varargs_pointer == NULL_RTX) +- { +- unsigned int scratch_regno; ++ unsigned int regno; ++ rtx_insn *insn; + +- /* When we are splitting the stack, we can't refer to the stack +- arguments using internal_arg_pointer, because they may be on +- the old stack. The split stack prologue will arrange to +- leave a pointer to the old stack arguments in a scratch +- register, which we here copy to a pseudo-register. The split +- stack prologue can't set the pseudo-register directly because +- it (the prologue) runs before any registers have been saved. */ ++ for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; ) ++ if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true)) ++ { ++ insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno))); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ } ++} + +- scratch_regno = split_stack_prologue_scratch_regno (); +- if (scratch_regno != INVALID_REGNUM) +- { +- rtx reg; +- rtx_insn *seq; ++/* Emit a single register save at CFA - CFA_OFFSET. */ + +- reg = gen_reg_rtx (Pmode); +- cfun->machine->split_stack_varargs_pointer = reg; ++static void ++ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno, ++ HOST_WIDE_INT cfa_offset) ++{ ++ struct machine_function *m = cfun->machine; ++ rtx reg = gen_rtx_REG (mode, regno); ++ rtx mem, addr, base, insn; ++ unsigned int align = GET_MODE_ALIGNMENT (mode); + +- start_sequence (); +- emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno)); +- seq = get_insns (); +- end_sequence (); ++ addr = choose_baseaddr (cfa_offset, &align); ++ mem = gen_frame_mem (mode, addr); + +- push_topmost_sequence (); +- emit_insn_after (seq, entry_of_function ()); +- pop_topmost_sequence (); +- } +- } ++ /* The location aligment depends upon the base register. */ ++ align = MIN (GET_MODE_ALIGNMENT (mode), align); ++ gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1))); ++ set_mem_align (mem, align); + +- /* Only 64bit target needs something special. */ +- if (is_va_list_char_pointer (TREE_TYPE (valist))) ++ insn = emit_insn (gen_rtx_SET (mem, reg)); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ ++ base = addr; ++ if (GET_CODE (base) == PLUS) ++ base = XEXP (base, 0); ++ gcc_checking_assert (REG_P (base)); ++ ++ /* When saving registers into a re-aligned local stack frame, avoid ++ any tricky guessing by dwarf2out. */ ++ if (m->fs.realigned) + { +- if (cfun->machine->split_stack_varargs_pointer == NULL_RTX) +- std_expand_builtin_va_start (valist, nextarg); ++ gcc_checking_assert (stack_realign_drap); ++ ++ if (regno == REGNO (crtl->drap_reg)) ++ { ++ /* A bit of a hack. We force the DRAP register to be saved in ++ the re-aligned stack frame, which provides us with a copy ++ of the CFA that will last past the prologue. Install it. */ ++ gcc_checking_assert (cfun->machine->fs.fp_valid); ++ addr = plus_constant (Pmode, hard_frame_pointer_rtx, ++ cfun->machine->fs.fp_offset - cfa_offset); ++ mem = gen_rtx_MEM (mode, addr); ++ add_reg_note (insn, REG_CFA_DEF_CFA, mem); ++ } + else + { +- rtx va_r, next; +- +- va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE); +- next = expand_binop (ptr_mode, add_optab, +- cfun->machine->split_stack_varargs_pointer, +- crtl->args.arg_offset_rtx, +- NULL_RTX, 0, OPTAB_LIB_WIDEN); +- convert_move (va_r, next, 0); ++ /* The frame pointer is a stable reference within the ++ aligned frame. Use it. */ ++ gcc_checking_assert (cfun->machine->fs.fp_valid); ++ addr = plus_constant (Pmode, hard_frame_pointer_rtx, ++ cfun->machine->fs.fp_offset - cfa_offset); ++ mem = gen_rtx_MEM (mode, addr); ++ add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg)); + } +- return; + } + +- f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node)); +- f_fpr = DECL_CHAIN (f_gpr); +- f_ovf = DECL_CHAIN (f_fpr); +- f_sav = DECL_CHAIN (f_ovf); +- +- valist = build_simple_mem_ref (valist); +- TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node); +- /* The following should be folded into the MEM_REF offset. */ +- gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist), +- f_gpr, NULL_TREE); +- fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist), +- f_fpr, NULL_TREE); +- ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist), +- f_ovf, NULL_TREE); +- sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist), +- f_sav, NULL_TREE); +- +- /* Count number of gp and fp argument registers used. */ +- words = crtl->args.info.words; +- n_gpr = crtl->args.info.regno; +- n_fpr = crtl->args.info.sse_regno; +- +- if (cfun->va_list_gpr_size) ++ else if (base == stack_pointer_rtx && m->fs.sp_realigned ++ && cfa_offset >= m->fs.sp_realigned_offset) + { +- type = TREE_TYPE (gpr); +- t = build2 (MODIFY_EXPR, type, +- gpr, build_int_cst (type, n_gpr * 8)); +- TREE_SIDE_EFFECTS (t) = 1; +- expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); ++ gcc_checking_assert (stack_realign_fp); ++ add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg)); + } + +- if (TARGET_SSE && cfun->va_list_fpr_size) ++ /* The memory may not be relative to the current CFA register, ++ which means that we may need to generate a new pattern for ++ use by the unwind info. */ ++ else if (base != m->fs.cfa_reg) + { +- type = TREE_TYPE (fpr); +- t = build2 (MODIFY_EXPR, type, fpr, +- build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX)); +- TREE_SIDE_EFFECTS (t) = 1; +- expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); ++ addr = plus_constant (Pmode, m->fs.cfa_reg, ++ m->fs.cfa_offset - cfa_offset); ++ mem = gen_rtx_MEM (mode, addr); ++ add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg)); + } ++} + +- /* Find the overflow area. */ +- type = TREE_TYPE (ovf); +- if (cfun->machine->split_stack_varargs_pointer == NULL_RTX) +- ovf_rtx = crtl->args.internal_arg_pointer; +- else +- ovf_rtx = cfun->machine->split_stack_varargs_pointer; +- t = make_tree (type, ovf_rtx); +- if (words != 0) +- t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD); ++/* Emit code to save registers using MOV insns. ++ First register is stored at CFA - CFA_OFFSET. */ ++static void ++ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset) ++{ ++ unsigned int regno; + +- t = build2 (MODIFY_EXPR, type, ovf, t); +- TREE_SIDE_EFFECTS (t) = 1; +- expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); ++ for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) ++ if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true)) ++ { ++ ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset); ++ cfa_offset -= UNITS_PER_WORD; ++ } ++} + +- if (ix86_varargs_gpr_size || ix86_varargs_fpr_size) +- { +- /* Find the register save area. +- Prologue of the function save it right above stack frame. */ +- type = TREE_TYPE (sav); +- t = make_tree (type, frame_pointer_rtx); +- if (!ix86_varargs_gpr_size) +- t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX); ++/* Emit code to save SSE registers using MOV insns. ++ First register is stored at CFA - CFA_OFFSET. */ ++static void ++ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset) ++{ ++ unsigned int regno; + +- t = build2 (MODIFY_EXPR, type, sav, t); +- TREE_SIDE_EFFECTS (t) = 1; +- expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); +- } ++ for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) ++ if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true)) ++ { ++ ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset); ++ cfa_offset -= GET_MODE_SIZE (V4SFmode); ++ } + } + +-/* Implement va_arg. */ ++static GTY(()) rtx queued_cfa_restores; + +-static tree +-ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, +- gimple_seq *post_p) ++/* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack ++ manipulation insn. The value is on the stack at CFA - CFA_OFFSET. ++ Don't add the note if the previously saved value will be left untouched ++ within stack red-zone till return, as unwinders can find the same value ++ in the register and on the stack. */ ++ ++static void ++ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset) + { +- static const int intreg[6] = { 0, 1, 2, 3, 4, 5 }; +- tree f_gpr, f_fpr, f_ovf, f_sav; +- tree gpr, fpr, ovf, sav, t; +- int size, rsize; +- tree lab_false, lab_over = NULL_TREE; +- tree addr, t2; +- rtx container; +- int indirect_p = 0; +- tree ptrtype; +- machine_mode nat_mode; +- unsigned int arg_boundary; +- unsigned int type_align; ++ if (!crtl->shrink_wrapped ++ && cfa_offset <= cfun->machine->fs.red_zone_offset) ++ return; + +- /* Only 64bit target needs something special. */ +- if (is_va_list_char_pointer (TREE_TYPE (valist))) +- return std_gimplify_va_arg_expr (valist, type, pre_p, post_p); ++ if (insn) ++ { ++ add_reg_note (insn, REG_CFA_RESTORE, reg); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ } ++ else ++ queued_cfa_restores ++ = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores); ++} + +- f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node)); +- f_fpr = DECL_CHAIN (f_gpr); +- f_ovf = DECL_CHAIN (f_fpr); +- f_sav = DECL_CHAIN (f_ovf); ++/* Add queued REG_CFA_RESTORE notes if any to INSN. */ + +- gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), +- valist, f_gpr, NULL_TREE); ++static void ++ix86_add_queued_cfa_restore_notes (rtx insn) ++{ ++ rtx last; ++ if (!queued_cfa_restores) ++ return; ++ for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1)) ++ ; ++ XEXP (last, 1) = REG_NOTES (insn); ++ REG_NOTES (insn) = queued_cfa_restores; ++ queued_cfa_restores = NULL_RTX; ++ RTX_FRAME_RELATED_P (insn) = 1; ++} + +- fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE); +- ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE); +- sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE); ++/* Expand prologue or epilogue stack adjustment. ++ The pattern exist to put a dependency on all ebp-based memory accesses. ++ STYLE should be negative if instructions should be marked as frame related, ++ zero if %r11 register is live and cannot be freely used and positive ++ otherwise. */ + +- indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false); +- if (indirect_p) +- type = build_pointer_type (type); +- size = arg_int_size_in_bytes (type); +- rsize = CEIL (size, UNITS_PER_WORD); ++static rtx ++pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, ++ int style, bool set_cfa) ++{ ++ struct machine_function *m = cfun->machine; ++ rtx insn; ++ bool add_frame_related_expr = false; + +- nat_mode = type_natural_mode (type, NULL, false); +- switch (nat_mode) ++ if (Pmode == SImode) ++ insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset); ++ else if (x86_64_immediate_operand (offset, DImode)) ++ insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset); ++ else + { +- case E_V8SFmode: +- case E_V8SImode: +- case E_V32QImode: +- case E_V16HImode: +- case E_V4DFmode: +- case E_V4DImode: +- case E_V16SFmode: +- case E_V16SImode: +- case E_V64QImode: +- case E_V32HImode: +- case E_V8DFmode: +- case E_V8DImode: +- /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */ +- if (!TARGET_64BIT_MS_ABI) ++ rtx tmp; ++ /* r11 is used by indirect sibcall return as well, set before the ++ epilogue and used after the epilogue. */ ++ if (style) ++ tmp = gen_rtx_REG (DImode, R11_REG); ++ else + { +- container = NULL; +- break; ++ gcc_assert (src != hard_frame_pointer_rtx ++ && dest != hard_frame_pointer_rtx); ++ tmp = hard_frame_pointer_rtx; + } +- /* FALLTHRU */ ++ insn = emit_insn (gen_rtx_SET (tmp, offset)); ++ if (style < 0) ++ add_frame_related_expr = true; + +- default: +- container = construct_container (nat_mode, TYPE_MODE (type), +- type, 0, X86_64_REGPARM_MAX, +- X86_64_SSE_REGPARM_MAX, intreg, +- 0); +- break; ++ insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp); + } + +- /* Pull the value out of the saved registers. */ +- +- addr = create_tmp_var (ptr_type_node, "addr"); +- type_align = TYPE_ALIGN (type); ++ insn = emit_insn (insn); ++ if (style >= 0) ++ ix86_add_queued_cfa_restore_notes (insn); + +- if (container) ++ if (set_cfa) + { +- int needed_intregs, needed_sseregs; +- bool need_temp; +- tree int_addr, sse_addr; +- +- lab_false = create_artificial_label (UNKNOWN_LOCATION); +- lab_over = create_artificial_label (UNKNOWN_LOCATION); +- +- examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs); ++ rtx r; + +- need_temp = (!REG_P (container) +- && ((needed_intregs && TYPE_ALIGN (type) > 64) +- || TYPE_ALIGN (type) > 128)); ++ gcc_assert (m->fs.cfa_reg == src); ++ m->fs.cfa_offset += INTVAL (offset); ++ m->fs.cfa_reg = dest; + +- /* In case we are passing structure, verify that it is consecutive block +- on the register save area. If not we need to do moves. */ +- if (!need_temp && !REG_P (container)) ++ r = gen_rtx_PLUS (Pmode, src, offset); ++ r = gen_rtx_SET (dest, r); ++ add_reg_note (insn, REG_CFA_ADJUST_CFA, r); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ } ++ else if (style < 0) ++ { ++ RTX_FRAME_RELATED_P (insn) = 1; ++ if (add_frame_related_expr) + { +- /* Verify that all registers are strictly consecutive */ +- if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0)))) +- { +- int i; ++ rtx r = gen_rtx_PLUS (Pmode, src, offset); ++ r = gen_rtx_SET (dest, r); ++ add_reg_note (insn, REG_FRAME_RELATED_EXPR, r); ++ } ++ } + +- for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) +- { +- rtx slot = XVECEXP (container, 0, i); +- if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i +- || INTVAL (XEXP (slot, 1)) != i * 16) +- need_temp = true; +- } +- } +- else +- { +- int i; ++ if (dest == stack_pointer_rtx) ++ { ++ HOST_WIDE_INT ooffset = m->fs.sp_offset; ++ bool valid = m->fs.sp_valid; ++ bool realigned = m->fs.sp_realigned; + +- for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) +- { +- rtx slot = XVECEXP (container, 0, i); +- if (REGNO (XEXP (slot, 0)) != (unsigned int) i +- || INTVAL (XEXP (slot, 1)) != i * 8) +- need_temp = true; +- } +- } +- } +- if (!need_temp) +- { +- int_addr = addr; +- sse_addr = addr; +- } +- else ++ if (src == hard_frame_pointer_rtx) + { +- int_addr = create_tmp_var (ptr_type_node, "int_addr"); +- sse_addr = create_tmp_var (ptr_type_node, "sse_addr"); ++ valid = m->fs.fp_valid; ++ realigned = false; ++ ooffset = m->fs.fp_offset; + } +- +- /* First ensure that we fit completely in registers. */ +- if (needed_intregs) ++ else if (src == crtl->drap_reg) + { +- t = build_int_cst (TREE_TYPE (gpr), +- (X86_64_REGPARM_MAX - needed_intregs + 1) * 8); +- t = build2 (GE_EXPR, boolean_type_node, gpr, t); +- t2 = build1 (GOTO_EXPR, void_type_node, lab_false); +- t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE); +- gimplify_and_add (t, pre_p); ++ valid = m->fs.drap_valid; ++ realigned = false; ++ ooffset = 0; + } +- if (needed_sseregs) ++ else + { +- t = build_int_cst (TREE_TYPE (fpr), +- (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16 +- + X86_64_REGPARM_MAX * 8); +- t = build2 (GE_EXPR, boolean_type_node, fpr, t); +- t2 = build1 (GOTO_EXPR, void_type_node, lab_false); +- t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE); +- gimplify_and_add (t, pre_p); ++ /* Else there are two possibilities: SP itself, which we set ++ up as the default above. Or EH_RETURN_STACKADJ_RTX, which is ++ taken care of this by hand along the eh_return path. */ ++ gcc_checking_assert (src == stack_pointer_rtx ++ || offset == const0_rtx); + } + +- /* Compute index to start of area used for integer regs. */ +- if (needed_intregs) +- { +- /* int_addr = gpr + sav; */ +- t = fold_build_pointer_plus (sav, gpr); +- gimplify_assign (int_addr, t, pre_p); +- } +- if (needed_sseregs) +- { +- /* sse_addr = fpr + sav; */ +- t = fold_build_pointer_plus (sav, fpr); +- gimplify_assign (sse_addr, t, pre_p); +- } +- if (need_temp) +- { +- int i, prev_size = 0; +- tree temp = create_tmp_var (type, "va_arg_tmp"); ++ m->fs.sp_offset = ooffset - INTVAL (offset); ++ m->fs.sp_valid = valid; ++ m->fs.sp_realigned = realigned; ++ } ++ return insn; ++} + +- /* addr = &temp; */ +- t = build1 (ADDR_EXPR, build_pointer_type (type), temp); +- gimplify_assign (addr, t, pre_p); ++/* Find an available register to be used as dynamic realign argument ++ pointer regsiter. Such a register will be written in prologue and ++ used in begin of body, so it must not be ++ 1. parameter passing register. ++ 2. GOT pointer. ++ We reuse static-chain register if it is available. Otherwise, we ++ use DI for i386 and R13 for x86-64. We chose R13 since it has ++ shorter encoding. + +- for (i = 0; i < XVECLEN (container, 0); i++) +- { +- rtx slot = XVECEXP (container, 0, i); +- rtx reg = XEXP (slot, 0); +- machine_mode mode = GET_MODE (reg); +- tree piece_type; +- tree addr_type; +- tree daddr_type; +- tree src_addr, src; +- int src_offset; +- tree dest_addr, dest; +- int cur_size = GET_MODE_SIZE (mode); ++ Return: the regno of chosen register. */ + +- gcc_assert (prev_size <= INTVAL (XEXP (slot, 1))); +- prev_size = INTVAL (XEXP (slot, 1)); +- if (prev_size + cur_size > size) +- { +- cur_size = size - prev_size; +- unsigned int nbits = cur_size * BITS_PER_UNIT; +- if (!int_mode_for_size (nbits, 1).exists (&mode)) +- mode = QImode; +- } +- piece_type = lang_hooks.types.type_for_mode (mode, 1); +- if (mode == GET_MODE (reg)) +- addr_type = build_pointer_type (piece_type); +- else +- addr_type = build_pointer_type_for_mode (piece_type, ptr_mode, +- true); +- daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode, +- true); ++static unsigned int ++find_drap_reg (void) ++{ ++ tree decl = cfun->decl; + +- if (SSE_REGNO_P (REGNO (reg))) +- { +- src_addr = sse_addr; +- src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16; +- } +- else +- { +- src_addr = int_addr; +- src_offset = REGNO (reg) * 8; +- } +- src_addr = fold_convert (addr_type, src_addr); +- src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset); ++ /* Always use callee-saved register if there are no caller-saved ++ registers. */ ++ if (TARGET_64BIT) ++ { ++ /* Use R13 for nested function or function need static chain. ++ Since function with tail call may use any caller-saved ++ registers in epilogue, DRAP must not use caller-saved ++ register in such case. */ ++ if (DECL_STATIC_CHAIN (decl) ++ || cfun->machine->no_caller_saved_registers ++ || crtl->tail_call_emit) ++ return R13_REG; + +- dest_addr = fold_convert (daddr_type, addr); +- dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size); +- if (cur_size == GET_MODE_SIZE (mode)) +- { +- src = build_va_arg_indirect_ref (src_addr); +- dest = build_va_arg_indirect_ref (dest_addr); ++ return R10_REG; ++ } ++ else ++ { ++ /* Use DI for nested function or function need static chain. ++ Since function with tail call may use any caller-saved ++ registers in epilogue, DRAP must not use caller-saved ++ register in such case. */ ++ if (DECL_STATIC_CHAIN (decl) ++ || cfun->machine->no_caller_saved_registers ++ || crtl->tail_call_emit) ++ return DI_REG; + +- gimplify_assign (dest, src, pre_p); +- } +- else +- { +- tree copy +- = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY), +- 3, dest_addr, src_addr, +- size_int (cur_size)); +- gimplify_and_add (copy, pre_p); +- } +- prev_size += cur_size; +- } +- } +- +- if (needed_intregs) +- { +- t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr, +- build_int_cst (TREE_TYPE (gpr), needed_intregs * 8)); +- gimplify_assign (gpr, t, pre_p); +- /* The GPR save area guarantees only 8-byte alignment. */ +- if (!need_temp) +- type_align = MIN (type_align, 64); +- } +- +- if (needed_sseregs) ++ /* Reuse static chain register if it isn't used for parameter ++ passing. */ ++ if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2) + { +- t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr, +- build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16)); +- gimplify_assign (unshare_expr (fpr), t, pre_p); ++ unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl)); ++ if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0) ++ return CX_REG; + } +- +- gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over)); +- +- gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false)); +- } +- +- /* ... otherwise out of the overflow area. */ +- +- /* When we align parameter on stack for caller, if the parameter +- alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be +- aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee +- here with caller. */ +- arg_boundary = ix86_function_arg_boundary (VOIDmode, type); +- if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT) +- arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT; +- +- /* Care for on-stack alignment if needed. */ +- if (arg_boundary <= 64 || size == 0) +- t = ovf; +- else +- { +- HOST_WIDE_INT align = arg_boundary / 8; +- t = fold_build_pointer_plus_hwi (ovf, align - 1); +- t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, +- build_int_cst (TREE_TYPE (t), -align)); ++ return DI_REG; + } ++} + +- gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue); +- gimplify_assign (addr, t, pre_p); ++/* Return minimum incoming stack alignment. */ + +- t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD); +- gimplify_assign (unshare_expr (ovf), t, pre_p); ++static unsigned int ++ix86_minimum_incoming_stack_boundary (bool sibcall) ++{ ++ unsigned int incoming_stack_boundary; + +- if (container) +- gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over)); ++ /* Stack of interrupt handler is aligned to 128 bits in 64bit mode. */ ++ if (cfun->machine->func_type != TYPE_NORMAL) ++ incoming_stack_boundary = TARGET_64BIT ? 128 : MIN_STACK_BOUNDARY; ++ /* Prefer the one specified at command line. */ ++ else if (ix86_user_incoming_stack_boundary) ++ incoming_stack_boundary = ix86_user_incoming_stack_boundary; ++ /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary ++ if -mstackrealign is used, it isn't used for sibcall check and ++ estimated stack alignment is 128bit. */ ++ else if (!sibcall ++ && ix86_force_align_arg_pointer ++ && crtl->stack_alignment_estimated == 128) ++ incoming_stack_boundary = MIN_STACK_BOUNDARY; ++ else ++ incoming_stack_boundary = ix86_default_incoming_stack_boundary; + +- type = build_aligned_type (type, type_align); +- ptrtype = build_pointer_type_for_mode (type, ptr_mode, true); +- addr = fold_convert (ptrtype, addr); ++ /* Incoming stack alignment can be changed on individual functions ++ via force_align_arg_pointer attribute. We use the smallest ++ incoming stack boundary. */ ++ if (incoming_stack_boundary > MIN_STACK_BOUNDARY ++ && lookup_attribute ("force_align_arg_pointer", ++ TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))) ++ incoming_stack_boundary = MIN_STACK_BOUNDARY; + +- if (indirect_p) +- addr = build_va_arg_indirect_ref (addr); +- return build_va_arg_indirect_ref (addr); +-} +- +-/* Return true if OPNUM's MEM should be matched +- in movabs* patterns. */ ++ /* The incoming stack frame has to be aligned at least at ++ parm_stack_boundary. */ ++ if (incoming_stack_boundary < crtl->parm_stack_boundary) ++ incoming_stack_boundary = crtl->parm_stack_boundary; + +-bool +-ix86_check_movabs (rtx insn, int opnum) +-{ +- rtx set, mem; ++ /* Stack at entrance of main is aligned by runtime. We use the ++ smallest incoming stack boundary. */ ++ if (incoming_stack_boundary > MAIN_STACK_BOUNDARY ++ && DECL_NAME (current_function_decl) ++ && MAIN_NAME_P (DECL_NAME (current_function_decl)) ++ && DECL_FILE_SCOPE_P (current_function_decl)) ++ incoming_stack_boundary = MAIN_STACK_BOUNDARY; + +- set = PATTERN (insn); +- if (GET_CODE (set) == PARALLEL) +- set = XVECEXP (set, 0, 0); +- gcc_assert (GET_CODE (set) == SET); +- mem = XEXP (set, opnum); +- while (SUBREG_P (mem)) +- mem = SUBREG_REG (mem); +- gcc_assert (MEM_P (mem)); +- return volatile_ok || !MEM_VOLATILE_P (mem); ++ return incoming_stack_boundary; + } + +-/* Return false if INSN contains a MEM with a non-default address space. */ +-bool +-ix86_check_no_addr_space (rtx insn) +-{ +- subrtx_var_iterator::array_type array; +- FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), ALL) +- { +- rtx x = *iter; +- if (MEM_P (x) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x))) +- return false; +- } +- return true; +-} +- +-/* Initialize the table of extra 80387 mathematical constants. */ ++/* Update incoming stack boundary and estimated stack alignment. */ + + static void +-init_ext_80387_constants (void) ++ix86_update_stack_boundary (void) + { +- static const char * cst[5] = +- { +- "0.3010299956639811952256464283594894482", /* 0: fldlg2 */ +- "0.6931471805599453094286904741849753009", /* 1: fldln2 */ +- "1.4426950408889634073876517827983434472", /* 2: fldl2e */ +- "3.3219280948873623478083405569094566090", /* 3: fldl2t */ +- "3.1415926535897932385128089594061862044", /* 4: fldpi */ +- }; +- int i; ++ ix86_incoming_stack_boundary ++ = ix86_minimum_incoming_stack_boundary (false); + +- for (i = 0; i < 5; i++) +- { +- real_from_string (&ext_80387_constants_table[i], cst[i]); +- /* Ensure each constant is rounded to XFmode precision. */ +- real_convert (&ext_80387_constants_table[i], +- XFmode, &ext_80387_constants_table[i]); +- } ++ /* x86_64 vararg needs 16byte stack alignment for register save area. */ ++ if (TARGET_64BIT ++ && cfun->stdarg ++ && crtl->stack_alignment_estimated < 128) ++ crtl->stack_alignment_estimated = 128; + +- ext_80387_constants_init = 1; ++ /* __tls_get_addr needs to be called with 16-byte aligned stack. */ ++ if (ix86_tls_descriptor_calls_expanded_in_cfun ++ && crtl->preferred_stack_boundary < 128) ++ crtl->preferred_stack_boundary = 128; + } + +-/* Return non-zero if the constant is something that +- can be loaded with a special instruction. */ ++/* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is ++ needed or an rtx for DRAP otherwise. */ + +-int +-standard_80387_constant_p (rtx x) ++static rtx ++ix86_get_drap_rtx (void) + { +- machine_mode mode = GET_MODE (x); +- +- const REAL_VALUE_TYPE *r; +- +- if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode))) +- return -1; +- +- if (x == CONST0_RTX (mode)) +- return 1; +- if (x == CONST1_RTX (mode)) +- return 2; +- +- r = CONST_DOUBLE_REAL_VALUE (x); ++ /* We must use DRAP if there are outgoing arguments on stack and ++ ACCUMULATE_OUTGOING_ARGS is false. */ ++ if (ix86_force_drap ++ || (cfun->machine->outgoing_args_on_stack ++ && !ACCUMULATE_OUTGOING_ARGS)) ++ crtl->need_drap = true; + +- /* For XFmode constants, try to find a special 80387 instruction when +- optimizing for size or on those CPUs that benefit from them. */ +- if (mode == XFmode +- && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS)) ++ if (stack_realign_drap) + { +- int i; ++ /* Assign DRAP to vDRAP and returns vDRAP */ ++ unsigned int regno = find_drap_reg (); ++ rtx drap_vreg; ++ rtx arg_ptr; ++ rtx_insn *seq, *insn; + +- if (! ext_80387_constants_init) +- init_ext_80387_constants (); ++ arg_ptr = gen_rtx_REG (Pmode, regno); ++ crtl->drap_reg = arg_ptr; + +- for (i = 0; i < 5; i++) +- if (real_identical (r, &ext_80387_constants_table[i])) +- return i + 3; ++ start_sequence (); ++ drap_vreg = copy_to_reg (arg_ptr); ++ seq = get_insns (); ++ end_sequence (); ++ ++ insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ())); ++ if (!optimize) ++ { ++ add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ } ++ return drap_vreg; + } ++ else ++ return NULL; ++} + +- /* Load of the constant -0.0 or -1.0 will be split as +- fldz;fchs or fld1;fchs sequence. */ +- if (real_isnegzero (r)) +- return 8; +- if (real_identical (r, &dconstm1)) +- return 9; ++/* Handle the TARGET_INTERNAL_ARG_POINTER hook. */ + +- return 0; ++static rtx ++ix86_internal_arg_pointer (void) ++{ ++ return virtual_incoming_args_rtx; + } + +-/* Return the opcode of the special instruction to be used to load +- the constant X. */ ++struct scratch_reg { ++ rtx reg; ++ bool saved; ++}; + +-const char * +-standard_80387_constant_opcode (rtx x) ++/* Return a short-lived scratch register for use on function entry. ++ In 32-bit mode, it is valid only after the registers are saved ++ in the prologue. This register must be released by means of ++ release_scratch_register_on_entry once it is dead. */ ++ ++static void ++get_scratch_register_on_entry (struct scratch_reg *sr) + { +- switch (standard_80387_constant_p (x)) +- { +- case 1: +- return "fldz"; +- case 2: +- return "fld1"; +- case 3: +- return "fldlg2"; +- case 4: +- return "fldln2"; +- case 5: +- return "fldl2e"; +- case 6: +- return "fldl2t"; +- case 7: +- return "fldpi"; +- case 8: +- case 9: +- return "#"; +- default: +- gcc_unreachable (); +- } +-} +- +-/* Return the CONST_DOUBLE representing the 80387 constant that is +- loaded by the specified special instruction. The argument IDX +- matches the return value from standard_80387_constant_p. */ +- +-rtx +-standard_80387_constant_rtx (int idx) +-{ +- int i; ++ int regno; + +- if (! ext_80387_constants_init) +- init_ext_80387_constants (); ++ sr->saved = false; + +- switch (idx) ++ if (TARGET_64BIT) + { +- case 3: +- case 4: +- case 5: +- case 6: +- case 7: +- i = idx - 3; +- break; ++ /* We always use R11 in 64-bit mode. */ ++ regno = R11_REG; ++ } ++ else ++ { ++ tree decl = current_function_decl, fntype = TREE_TYPE (decl); ++ bool fastcall_p ++ = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE; ++ bool thiscall_p ++ = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE; ++ bool static_chain_p = DECL_STATIC_CHAIN (decl); ++ int regparm = ix86_function_regparm (fntype, decl); ++ int drap_regno ++ = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM; + +- default: +- gcc_unreachable (); ++ /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax ++ for the static chain register. */ ++ if ((regparm < 1 || (fastcall_p && !static_chain_p)) ++ && drap_regno != AX_REG) ++ regno = AX_REG; ++ /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx ++ for the static chain register. */ ++ else if (thiscall_p && !static_chain_p && drap_regno != AX_REG) ++ regno = AX_REG; ++ else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG) ++ regno = DX_REG; ++ /* ecx is the static chain register. */ ++ else if (regparm < 3 && !fastcall_p && !thiscall_p ++ && !static_chain_p ++ && drap_regno != CX_REG) ++ regno = CX_REG; ++ else if (ix86_save_reg (BX_REG, true, false)) ++ regno = BX_REG; ++ /* esi is the static chain register. */ ++ else if (!(regparm == 3 && static_chain_p) ++ && ix86_save_reg (SI_REG, true, false)) ++ regno = SI_REG; ++ else if (ix86_save_reg (DI_REG, true, false)) ++ regno = DI_REG; ++ else ++ { ++ regno = (drap_regno == AX_REG ? DX_REG : AX_REG); ++ sr->saved = true; ++ } + } + +- return const_double_from_real_value (ext_80387_constants_table[i], +- XFmode); ++ sr->reg = gen_rtx_REG (Pmode, regno); ++ if (sr->saved) ++ { ++ rtx_insn *insn = emit_insn (gen_push (sr->reg)); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ } + } + +-/* Return 1 if X is all bits 0 and 2 if X is all bits 1 +- in supported SSE/AVX vector mode. */ +- +-int +-standard_sse_constant_p (rtx x, machine_mode pred_mode) +-{ +- machine_mode mode; +- +- if (!TARGET_SSE) +- return 0; ++/* Release a scratch register obtained from the preceding function. + +- mode = GET_MODE (x); ++ If RELEASE_VIA_POP is true, we just pop the register off the stack ++ to release it. This is what non-Linux systems use with -fstack-check. + +- if (x == const0_rtx || const0_operand (x, mode)) +- return 1; ++ Otherwise we use OFFSET to locate the saved register and the ++ allocated stack space becomes part of the local frame and is ++ deallocated by the epilogue. */ + +- if (x == constm1_rtx || vector_all_ones_operand (x, mode)) ++static void ++release_scratch_register_on_entry (struct scratch_reg *sr, HOST_WIDE_INT offset, ++ bool release_via_pop) ++{ ++ if (sr->saved) + { +- /* VOIDmode integer constant, get mode from the predicate. */ +- if (mode == VOIDmode) +- mode = pred_mode; ++ if (release_via_pop) ++ { ++ struct machine_function *m = cfun->machine; ++ rtx x, insn = emit_insn (gen_pop (sr->reg)); + +- switch (GET_MODE_SIZE (mode)) ++ /* The RX FRAME_RELATED_P mechanism doesn't know about pop. */ ++ RTX_FRAME_RELATED_P (insn) = 1; ++ x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD)); ++ x = gen_rtx_SET (stack_pointer_rtx, x); ++ add_reg_note (insn, REG_FRAME_RELATED_EXPR, x); ++ m->fs.sp_offset -= UNITS_PER_WORD; ++ } ++ else + { +- case 64: +- if (TARGET_AVX512F) +- return 2; +- break; +- case 32: +- if (TARGET_AVX2) +- return 2; +- break; +- case 16: +- if (TARGET_SSE2) +- return 2; +- break; +- case 0: +- /* VOIDmode */ +- gcc_unreachable (); +- default: +- break; ++ rtx x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset)); ++ x = gen_rtx_SET (sr->reg, gen_rtx_MEM (word_mode, x)); ++ emit_insn (x); + } + } +- +- return 0; + } + +-/* Return the opcode of the special instruction to be used to load +- the constant operands[1] into operands[0]. */ ++/* Emit code to adjust the stack pointer by SIZE bytes while probing it. + +-const char * +-standard_sse_constant_opcode (rtx_insn *insn, rtx *operands) +-{ +- machine_mode mode; +- rtx x = operands[1]; ++ This differs from the next routine in that it tries hard to prevent ++ attacks that jump the stack guard. Thus it is never allowed to allocate ++ more than PROBE_INTERVAL bytes of stack space without a suitable ++ probe. + +- gcc_assert (TARGET_SSE); ++ INT_REGISTERS_SAVED is true if integer registers have already been ++ pushed on the stack. */ + +- mode = GET_MODE (x); ++static void ++ix86_adjust_stack_and_probe_stack_clash (HOST_WIDE_INT size, ++ const bool int_registers_saved) ++{ ++ struct machine_function *m = cfun->machine; + +- if (x == const0_rtx || const0_operand (x, mode)) ++ /* If this function does not statically allocate stack space, then ++ no probes are needed. */ ++ if (!size) + { +- switch (get_attr_mode (insn)) +- { +- case MODE_TI: +- if (!EXT_REX_SSE_REG_P (operands[0])) +- return "%vpxor\t%0, %d0"; +- /* FALLTHRU */ +- case MODE_XI: +- case MODE_OI: +- if (EXT_REX_SSE_REG_P (operands[0])) +- return (TARGET_AVX512VL +- ? "vpxord\t%x0, %x0, %x0" +- : "vpxord\t%g0, %g0, %g0"); +- return "vpxor\t%x0, %x0, %x0"; ++ /* However, the allocation of space via pushes for register ++ saves could be viewed as allocating space, but without the ++ need to probe. */ ++ if (m->frame.nregs || m->frame.nsseregs || frame_pointer_needed) ++ dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true); ++ else ++ dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false); ++ return; ++ } + +- case MODE_V2DF: +- if (!EXT_REX_SSE_REG_P (operands[0])) +- return "%vxorpd\t%0, %d0"; +- /* FALLTHRU */ +- case MODE_V8DF: +- case MODE_V4DF: +- if (!EXT_REX_SSE_REG_P (operands[0])) +- return "vxorpd\t%x0, %x0, %x0"; +- else if (TARGET_AVX512DQ) +- return (TARGET_AVX512VL +- ? "vxorpd\t%x0, %x0, %x0" +- : "vxorpd\t%g0, %g0, %g0"); +- else +- return (TARGET_AVX512VL +- ? "vpxorq\t%x0, %x0, %x0" +- : "vpxorq\t%g0, %g0, %g0"); ++ /* If we are a noreturn function, then we have to consider the ++ possibility that we're called via a jump rather than a call. + +- case MODE_V4SF: +- if (!EXT_REX_SSE_REG_P (operands[0])) +- return "%vxorps\t%0, %d0"; +- /* FALLTHRU */ +- case MODE_V16SF: +- case MODE_V8SF: +- if (!EXT_REX_SSE_REG_P (operands[0])) +- return "vxorps\t%x0, %x0, %x0"; +- else if (TARGET_AVX512DQ) +- return (TARGET_AVX512VL +- ? "vxorps\t%x0, %x0, %x0" +- : "vxorps\t%g0, %g0, %g0"); +- else +- return (TARGET_AVX512VL +- ? "vpxord\t%x0, %x0, %x0" +- : "vpxord\t%g0, %g0, %g0"); ++ Thus we don't have the implicit probe generated by saving the ++ return address into the stack at the call. Thus, the stack ++ pointer could be anywhere in the guard page. The safe thing ++ to do is emit a probe now. + +- default: +- gcc_unreachable (); +- } +- } +- else if (x == constm1_rtx || vector_all_ones_operand (x, mode)) +- { +- enum attr_mode insn_mode = get_attr_mode (insn); +- +- switch (insn_mode) +- { +- case MODE_XI: +- case MODE_V8DF: +- case MODE_V16SF: +- gcc_assert (TARGET_AVX512F); +- return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}"; +- +- case MODE_OI: +- case MODE_V4DF: +- case MODE_V8SF: +- gcc_assert (TARGET_AVX2); +- /* FALLTHRU */ +- case MODE_TI: +- case MODE_V2DF: +- case MODE_V4SF: +- gcc_assert (TARGET_SSE2); +- if (!EXT_REX_SSE_REG_P (operands[0])) +- return (TARGET_AVX +- ? "vpcmpeqd\t%0, %0, %0" +- : "pcmpeqd\t%0, %0"); +- else if (TARGET_AVX512VL) +- return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}"; +- else +- return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}"; ++ The probe can be avoided if we have already emitted any callee ++ register saves into the stack or have a frame pointer (which will ++ have been saved as well). Those saves will function as implicit ++ probes. + +- default: +- gcc_unreachable (); ++ ?!? This should be revamped to work like aarch64 and s390 where ++ we track the offset from the most recent probe. Normally that ++ offset would be zero. For a noreturn function we would reset ++ it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT). Then ++ we just probe when we cross PROBE_INTERVAL. */ ++ if (TREE_THIS_VOLATILE (cfun->decl) ++ && !(m->frame.nregs || m->frame.nsseregs || frame_pointer_needed)) ++ { ++ /* We can safely use any register here since we're just going to push ++ its value and immediately pop it back. But we do try and avoid ++ argument passing registers so as not to introduce dependencies in ++ the pipeline. For 32 bit we use %esi and for 64 bit we use %rax. */ ++ rtx dummy_reg = gen_rtx_REG (word_mode, TARGET_64BIT ? AX_REG : SI_REG); ++ rtx_insn *insn_push = emit_insn (gen_push (dummy_reg)); ++ rtx_insn *insn_pop = emit_insn (gen_pop (dummy_reg)); ++ m->fs.sp_offset -= UNITS_PER_WORD; ++ if (m->fs.cfa_reg == stack_pointer_rtx) ++ { ++ m->fs.cfa_offset -= UNITS_PER_WORD; ++ rtx x = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD); ++ x = gen_rtx_SET (stack_pointer_rtx, x); ++ add_reg_note (insn_push, REG_CFA_ADJUST_CFA, x); ++ RTX_FRAME_RELATED_P (insn_push) = 1; ++ x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); ++ x = gen_rtx_SET (stack_pointer_rtx, x); ++ add_reg_note (insn_pop, REG_CFA_ADJUST_CFA, x); ++ RTX_FRAME_RELATED_P (insn_pop) = 1; + } +- } +- +- gcc_unreachable (); +-} +- +-/* Returns true if INSN can be transformed from a memory load +- to a supported FP constant load. */ ++ emit_insn (gen_blockage ()); ++ } + +-bool +-ix86_standard_x87sse_constant_load_p (const rtx_insn *insn, rtx dst) +-{ +- rtx src = find_constant_src (insn); ++ /* If we allocate less than the size of the guard statically, ++ then no probing is necessary, but we do need to allocate ++ the stack. */ ++ if (size < (1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE))) ++ { ++ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, ++ GEN_INT (-size), -1, ++ m->fs.cfa_reg == stack_pointer_rtx); ++ dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true); ++ return; ++ } + +- gcc_assert (REG_P (dst)); ++ /* We're allocating a large enough stack frame that we need to ++ emit probes. Either emit them inline or in a loop depending ++ on the size. */ ++ HOST_WIDE_INT probe_interval = get_probe_interval (); ++ if (size <= 4 * probe_interval) ++ { ++ HOST_WIDE_INT i; ++ for (i = probe_interval; i <= size; i += probe_interval) ++ { ++ /* Allocate PROBE_INTERVAL bytes. */ ++ rtx insn ++ = pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, ++ GEN_INT (-probe_interval), -1, ++ m->fs.cfa_reg == stack_pointer_rtx); ++ add_reg_note (insn, REG_STACK_CHECK, const0_rtx); + +- if (src == NULL +- || (SSE_REGNO_P (REGNO (dst)) +- && standard_sse_constant_p (src, GET_MODE (dst)) != 1) +- || (STACK_REGNO_P (REGNO (dst)) +- && standard_80387_constant_p (src) < 1)) +- return false; ++ /* And probe at *sp. */ ++ emit_stack_probe (stack_pointer_rtx); ++ emit_insn (gen_blockage ()); ++ } + +- return true; +-} ++ /* We need to allocate space for the residual, but we do not need ++ to probe the residual. */ ++ HOST_WIDE_INT residual = (i - probe_interval - size); ++ if (residual) ++ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, ++ GEN_INT (residual), -1, ++ m->fs.cfa_reg == stack_pointer_rtx); ++ dump_stack_clash_frame_info (PROBE_INLINE, residual != 0); ++ } ++ else ++ { ++ /* We expect the GP registers to be saved when probes are used ++ as the probing sequences might need a scratch register and ++ the routine to allocate one assumes the integer registers ++ have already been saved. */ ++ gcc_assert (int_registers_saved); + +-/* Returns true if OP contains a symbol reference */ ++ struct scratch_reg sr; ++ get_scratch_register_on_entry (&sr); + +-bool +-symbolic_reference_mentioned_p (rtx op) +-{ +- const char *fmt; +- int i; ++ /* If we needed to save a register, then account for any space ++ that was pushed (we are not going to pop the register when ++ we do the restore). */ ++ if (sr.saved) ++ size -= UNITS_PER_WORD; + +- if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF) +- return true; ++ /* Step 1: round SIZE down to a multiple of the interval. */ ++ HOST_WIDE_INT rounded_size = size & -probe_interval; + +- fmt = GET_RTX_FORMAT (GET_CODE (op)); +- for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--) +- { +- if (fmt[i] == 'E') ++ /* Step 2: compute final value of the loop counter. Use lea if ++ possible. */ ++ rtx addr = plus_constant (Pmode, stack_pointer_rtx, -rounded_size); ++ rtx insn; ++ if (address_no_seg_operand (addr, Pmode)) ++ insn = emit_insn (gen_rtx_SET (sr.reg, addr)); ++ else + { +- int j; +- +- for (j = XVECLEN (op, i) - 1; j >= 0; j--) +- if (symbolic_reference_mentioned_p (XVECEXP (op, i, j))) +- return true; ++ emit_move_insn (sr.reg, GEN_INT (-rounded_size)); ++ insn = emit_insn (gen_rtx_SET (sr.reg, ++ gen_rtx_PLUS (Pmode, sr.reg, ++ stack_pointer_rtx))); ++ } ++ if (m->fs.cfa_reg == stack_pointer_rtx) ++ { ++ add_reg_note (insn, REG_CFA_DEF_CFA, ++ plus_constant (Pmode, sr.reg, ++ m->fs.cfa_offset + rounded_size)); ++ RTX_FRAME_RELATED_P (insn) = 1; + } + +- else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i))) +- return true; +- } ++ /* Step 3: the loop. */ ++ rtx size_rtx = GEN_INT (rounded_size); ++ insn = emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, ++ size_rtx)); ++ if (m->fs.cfa_reg == stack_pointer_rtx) ++ { ++ m->fs.cfa_offset += rounded_size; ++ add_reg_note (insn, REG_CFA_DEF_CFA, ++ plus_constant (Pmode, stack_pointer_rtx, ++ m->fs.cfa_offset)); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ } ++ m->fs.sp_offset += rounded_size; ++ emit_insn (gen_blockage ()); + +- return false; +-} ++ /* Step 4: adjust SP if we cannot assert at compile-time that SIZE ++ is equal to ROUNDED_SIZE. */ + +-/* Return true if it is appropriate to emit `ret' instructions in the +- body of a function. Do this only if the epilogue is simple, needing a +- couple of insns. Prior to reloading, we can't tell how many registers +- must be saved, so return false then. Return false if there is no frame +- marker to de-allocate. */ ++ if (size != rounded_size) ++ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, ++ GEN_INT (rounded_size - size), -1, ++ m->fs.cfa_reg == stack_pointer_rtx); ++ dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size); + +-bool +-ix86_can_use_return_insn_p (void) +-{ +- if (ix86_function_naked (current_function_decl)) +- return false; ++ /* This does not deallocate the space reserved for the scratch ++ register. That will be deallocated in the epilogue. */ ++ release_scratch_register_on_entry (&sr, size, false); ++ } + +- /* Don't use `ret' instruction in interrupt handler. */ +- if (! reload_completed +- || frame_pointer_needed +- || cfun->machine->func_type != TYPE_NORMAL) +- return 0; ++ /* Make sure nothing is scheduled before we are done. */ ++ emit_insn (gen_blockage ()); ++} + +- /* Don't allow more than 32k pop, since that's all we can do +- with one instruction. */ +- if (crtl->args.pops_args && crtl->args.size >= 32768) +- return 0; ++/* Emit code to adjust the stack pointer by SIZE bytes while probing it. + +- struct ix86_frame &frame = cfun->machine->frame; +- return (frame.stack_pointer_offset == UNITS_PER_WORD +- && (frame.nregs + frame.nsseregs) == 0); +-} +- +-/* Value should be nonzero if functions must have frame pointers. +- Zero means the frame pointer need not be set up (and parms may +- be accessed via the stack pointer) in functions that seem suitable. */ ++ INT_REGISTERS_SAVED is true if integer registers have already been ++ pushed on the stack. */ + +-static bool +-ix86_frame_pointer_required (void) ++static void ++ix86_adjust_stack_and_probe (HOST_WIDE_INT size, ++ const bool int_registers_saved) + { +- /* If we accessed previous frames, then the generated code expects +- to be able to access the saved ebp value in our frame. */ +- if (cfun->machine->accesses_prev_frame) +- return true; ++ /* We skip the probe for the first interval + a small dope of 4 words and ++ probe that many bytes past the specified size to maintain a protection ++ area at the botton of the stack. */ ++ const int dope = 4 * UNITS_PER_WORD; ++ rtx size_rtx = GEN_INT (size), last; + +- /* Several x86 os'es need a frame pointer for other reasons, +- usually pertaining to setjmp. */ +- if (SUBTARGET_FRAME_POINTER_REQUIRED) +- return true; ++ /* See if we have a constant small number of probes to generate. If so, ++ that's the easy case. The run-time loop is made up of 9 insns in the ++ generic case while the compile-time loop is made up of 3+2*(n-1) insns ++ for n # of intervals. */ ++ if (size <= 4 * get_probe_interval ()) ++ { ++ HOST_WIDE_INT i, adjust; ++ bool first_probe = true; + +- /* For older 32-bit runtimes setjmp requires valid frame-pointer. */ +- if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp) +- return true; ++ /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for ++ values of N from 1 until it exceeds SIZE. If only one probe is ++ needed, this will not generate any code. Then adjust and probe ++ to PROBE_INTERVAL + SIZE. */ ++ for (i = get_probe_interval (); i < size; i += get_probe_interval ()) ++ { ++ if (first_probe) ++ { ++ adjust = 2 * get_probe_interval () + dope; ++ first_probe = false; ++ } ++ else ++ adjust = get_probe_interval (); + +- /* Win64 SEH, very large frames need a frame-pointer as maximum stack +- allocation is 4GB. */ +- if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE) +- return true; ++ emit_insn (gen_rtx_SET (stack_pointer_rtx, ++ plus_constant (Pmode, stack_pointer_rtx, ++ -adjust))); ++ emit_stack_probe (stack_pointer_rtx); ++ } + +- /* SSE saves require frame-pointer when stack is misaligned. */ +- if (TARGET_64BIT_MS_ABI && ix86_incoming_stack_boundary < 128) +- return true; +- +- /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER +- turns off the frame pointer by default. Turn it back on now if +- we've not got a leaf function. */ +- if (TARGET_OMIT_LEAF_FRAME_POINTER +- && (!crtl->is_leaf +- || ix86_current_function_calls_tls_descriptor)) +- return true; ++ if (first_probe) ++ adjust = size + get_probe_interval () + dope; ++ else ++ adjust = size + get_probe_interval () - i; + +- if (crtl->profile && !flag_fentry) +- return true; ++ emit_insn (gen_rtx_SET (stack_pointer_rtx, ++ plus_constant (Pmode, stack_pointer_rtx, ++ -adjust))); ++ emit_stack_probe (stack_pointer_rtx); + +- return false; +-} ++ /* Adjust back to account for the additional first interval. */ ++ last = emit_insn (gen_rtx_SET (stack_pointer_rtx, ++ plus_constant (Pmode, stack_pointer_rtx, ++ (get_probe_interval () ++ + dope)))); ++ } + +-/* Record that the current function accesses previous call frames. */ ++ /* Otherwise, do the same as above, but in a loop. Note that we must be ++ extra careful with variables wrapping around because we might be at ++ the very top (or the very bottom) of the address space and we have ++ to be able to handle this case properly; in particular, we use an ++ equality test for the loop condition. */ ++ else ++ { ++ /* We expect the GP registers to be saved when probes are used ++ as the probing sequences might need a scratch register and ++ the routine to allocate one assumes the integer registers ++ have already been saved. */ ++ gcc_assert (int_registers_saved); + +-void +-ix86_setup_frame_addresses (void) +-{ +- cfun->machine->accesses_prev_frame = 1; +-} +- +-#ifndef USE_HIDDEN_LINKONCE +-# if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0) +-# define USE_HIDDEN_LINKONCE 1 +-# else +-# define USE_HIDDEN_LINKONCE 0 +-# endif +-#endif ++ HOST_WIDE_INT rounded_size; ++ struct scratch_reg sr; + +-/* Label count for call and return thunks. It is used to make unique +- labels in call and return thunks. */ +-static int indirectlabelno; ++ get_scratch_register_on_entry (&sr); + +-/* True if call thunk function is needed. */ +-static bool indirect_thunk_needed = false; ++ /* If we needed to save a register, then account for any space ++ that was pushed (we are not going to pop the register when ++ we do the restore). */ ++ if (sr.saved) ++ size -= UNITS_PER_WORD; + +-/* Bit masks of integer registers, which contain branch target, used +- by call thunk functions. */ +-static int indirect_thunks_used; ++ /* Step 1: round SIZE to the previous multiple of the interval. */ + +-/* True if return thunk function is needed. */ +-static bool indirect_return_needed = false; ++ rounded_size = ROUND_DOWN (size, get_probe_interval ()); + +-/* True if return thunk function via CX is needed. */ +-static bool indirect_return_via_cx; + +-#ifndef INDIRECT_LABEL +-# define INDIRECT_LABEL "LIND" +-#endif ++ /* Step 2: compute initial and final value of the loop counter. */ + +-/* Indicate what prefix is needed for an indirect branch. */ +-enum indirect_thunk_prefix +-{ +- indirect_thunk_prefix_none, +- indirect_thunk_prefix_nt +-}; ++ /* SP = SP_0 + PROBE_INTERVAL. */ ++ emit_insn (gen_rtx_SET (stack_pointer_rtx, ++ plus_constant (Pmode, stack_pointer_rtx, ++ - (get_probe_interval () + dope)))); + +-/* Return the prefix needed for an indirect branch INSN. */ ++ /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */ ++ if (rounded_size <= (HOST_WIDE_INT_1 << 31)) ++ emit_insn (gen_rtx_SET (sr.reg, ++ plus_constant (Pmode, stack_pointer_rtx, ++ -rounded_size))); ++ else ++ { ++ emit_move_insn (sr.reg, GEN_INT (-rounded_size)); ++ emit_insn (gen_rtx_SET (sr.reg, ++ gen_rtx_PLUS (Pmode, sr.reg, ++ stack_pointer_rtx))); ++ } + +-enum indirect_thunk_prefix +-indirect_thunk_need_prefix (rtx_insn *insn) +-{ +- enum indirect_thunk_prefix need_prefix; +- if ((cfun->machine->indirect_branch_type +- == indirect_branch_thunk_extern) +- && ix86_notrack_prefixed_insn_p (insn)) +- { +- /* NOTRACK prefix is only used with external thunk so that it +- can be properly updated to support CET at run-time. */ +- need_prefix = indirect_thunk_prefix_nt; +- } +- else +- need_prefix = indirect_thunk_prefix_none; +- return need_prefix; +-} + +-/* Fills in the label name that should be used for the indirect thunk. */ ++ /* Step 3: the loop + +-static void +-indirect_thunk_name (char name[32], unsigned int regno, +- enum indirect_thunk_prefix need_prefix, +- bool ret_p) +-{ +- if (regno != INVALID_REGNUM && regno != CX_REG && ret_p) +- gcc_unreachable (); ++ do ++ { ++ SP = SP + PROBE_INTERVAL ++ probe at SP ++ } ++ while (SP != LAST_ADDR) + +- if (USE_HIDDEN_LINKONCE) +- { +- const char *prefix; ++ adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for ++ values of N from 1 until it is equal to ROUNDED_SIZE. */ + +- if (need_prefix == indirect_thunk_prefix_nt +- && regno != INVALID_REGNUM) +- { +- /* NOTRACK prefix is only used with external thunk via +- register so that NOTRACK prefix can be added to indirect +- branch via register to support CET at run-time. */ +- prefix = "_nt"; +- } +- else +- prefix = ""; ++ emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx)); + +- const char *ret = ret_p ? "return" : "indirect"; + +- if (regno != INVALID_REGNUM) ++ /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot ++ assert at compile-time that SIZE is equal to ROUNDED_SIZE. */ ++ ++ if (size != rounded_size) + { +- const char *reg_prefix; +- if (LEGACY_INT_REGNO_P (regno)) +- reg_prefix = TARGET_64BIT ? "r" : "e"; +- else +- reg_prefix = ""; +- sprintf (name, "__x86_%s_thunk%s_%s%s", +- ret, prefix, reg_prefix, reg_names[regno]); ++ emit_insn (gen_rtx_SET (stack_pointer_rtx, ++ plus_constant (Pmode, stack_pointer_rtx, ++ rounded_size - size))); ++ emit_stack_probe (stack_pointer_rtx); + } +- else +- sprintf (name, "__x86_%s_thunk%s", ret, prefix); ++ ++ /* Adjust back to account for the additional first interval. */ ++ last = emit_insn (gen_rtx_SET (stack_pointer_rtx, ++ plus_constant (Pmode, stack_pointer_rtx, ++ (get_probe_interval () ++ + dope)))); ++ ++ /* This does not deallocate the space reserved for the scratch ++ register. That will be deallocated in the epilogue. */ ++ release_scratch_register_on_entry (&sr, size, false); + } +- else ++ ++ /* Even if the stack pointer isn't the CFA register, we need to correctly ++ describe the adjustments made to it, in particular differentiate the ++ frame-related ones from the frame-unrelated ones. */ ++ if (size > 0) + { +- if (regno != INVALID_REGNUM) +- ASM_GENERATE_INTERNAL_LABEL (name, "LITR", regno); +- else +- { +- if (ret_p) +- ASM_GENERATE_INTERNAL_LABEL (name, "LRT", 0); +- else +- ASM_GENERATE_INTERNAL_LABEL (name, "LIT", 0); +- } ++ rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2)); ++ XVECEXP (expr, 0, 0) ++ = gen_rtx_SET (stack_pointer_rtx, ++ plus_constant (Pmode, stack_pointer_rtx, -size)); ++ XVECEXP (expr, 0, 1) ++ = gen_rtx_SET (stack_pointer_rtx, ++ plus_constant (Pmode, stack_pointer_rtx, ++ get_probe_interval () + dope + size)); ++ add_reg_note (last, REG_FRAME_RELATED_EXPR, expr); ++ RTX_FRAME_RELATED_P (last) = 1; ++ ++ cfun->machine->fs.sp_offset += size; + } ++ ++ /* Make sure nothing is scheduled before we are done. */ ++ emit_insn (gen_blockage ()); + } + +-/* Output a call and return thunk for indirect branch. If REGNO != -1, +- the function address is in REGNO and the call and return thunk looks like: ++/* Adjust the stack pointer up to REG while probing it. */ + +- call L2 +- L1: +- pause +- lfence +- jmp L1 +- L2: +- mov %REG, (%sp) +- ret ++const char * ++output_adjust_stack_and_probe (rtx reg) ++{ ++ static int labelno = 0; ++ char loop_lab[32]; ++ rtx xops[2]; + +- Otherwise, the function address is on the top of stack and the +- call and return thunk looks like: ++ ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++); + +- call L2 +- L1: +- pause +- lfence +- jmp L1 +- L2: +- lea WORD_SIZE(%sp), %sp +- ret +- */ ++ /* Loop. */ ++ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab); + +-static void +-output_indirect_thunk (unsigned int regno) +-{ +- char indirectlabel1[32]; +- char indirectlabel2[32]; ++ /* SP = SP + PROBE_INTERVAL. */ ++ xops[0] = stack_pointer_rtx; ++ xops[1] = GEN_INT (get_probe_interval ()); ++ output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops); + +- ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, INDIRECT_LABEL, +- indirectlabelno++); +- ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, INDIRECT_LABEL, +- indirectlabelno++); ++ /* Probe at SP. */ ++ xops[1] = const0_rtx; ++ output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops); + +- /* Call */ +- fputs ("\tcall\t", asm_out_file); +- assemble_name_raw (asm_out_file, indirectlabel2); +- fputc ('\n', asm_out_file); ++ /* Test if SP == LAST_ADDR. */ ++ xops[0] = stack_pointer_rtx; ++ xops[1] = reg; ++ output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops); + +- ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1); ++ /* Branch. */ ++ fputs ("\tjne\t", asm_out_file); ++ assemble_name_raw (asm_out_file, loop_lab); ++ fputc ('\n', asm_out_file); + +- /* AMD and Intel CPUs prefer each a different instruction as loop filler. +- Usage of both pause + lfence is compromise solution. */ +- fprintf (asm_out_file, "\tpause\n\tlfence\n"); ++ return ""; ++} + +- /* Jump. */ +- fputs ("\tjmp\t", asm_out_file); +- assemble_name_raw (asm_out_file, indirectlabel1); +- fputc ('\n', asm_out_file); ++/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE, ++ inclusive. These are offsets from the current stack pointer. + +- ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2); ++ INT_REGISTERS_SAVED is true if integer registers have already been ++ pushed on the stack. */ + +- /* The above call insn pushed a word to stack. Adjust CFI info. */ +- if (flag_asynchronous_unwind_tables && dwarf2out_do_frame ()) ++static void ++ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size, ++ const bool int_registers_saved) ++{ ++ /* See if we have a constant small number of probes to generate. If so, ++ that's the easy case. The run-time loop is made up of 6 insns in the ++ generic case while the compile-time loop is made up of n insns for n # ++ of intervals. */ ++ if (size <= 6 * get_probe_interval ()) + { +- if (! dwarf2out_do_cfi_asm ()) +- { +- dw_cfi_ref xcfi = ggc_cleared_alloc (); +- xcfi->dw_cfi_opc = DW_CFA_advance_loc4; +- xcfi->dw_cfi_oprnd1.dw_cfi_addr = ggc_strdup (indirectlabel2); +- vec_safe_push (cfun->fde->dw_fde_cfi, xcfi); +- } +- dw_cfi_ref xcfi = ggc_cleared_alloc (); +- xcfi->dw_cfi_opc = DW_CFA_def_cfa_offset; +- xcfi->dw_cfi_oprnd1.dw_cfi_offset = 2 * UNITS_PER_WORD; +- vec_safe_push (cfun->fde->dw_fde_cfi, xcfi); +- dwarf2out_emit_cfi (xcfi); +- } ++ HOST_WIDE_INT i; + +- if (regno != INVALID_REGNUM) +- { +- /* MOV. */ +- rtx xops[2]; +- xops[0] = gen_rtx_MEM (word_mode, stack_pointer_rtx); +- xops[1] = gen_rtx_REG (word_mode, regno); +- output_asm_insn ("mov\t{%1, %0|%0, %1}", xops); ++ /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until ++ it exceeds SIZE. If only one probe is needed, this will not ++ generate any code. Then probe at FIRST + SIZE. */ ++ for (i = get_probe_interval (); i < size; i += get_probe_interval ()) ++ emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, ++ -(first + i))); ++ ++ emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, ++ -(first + size))); + } ++ ++ /* Otherwise, do the same as above, but in a loop. Note that we must be ++ extra careful with variables wrapping around because we might be at ++ the very top (or the very bottom) of the address space and we have ++ to be able to handle this case properly; in particular, we use an ++ equality test for the loop condition. */ + else + { +- /* LEA. */ +- rtx xops[2]; +- xops[0] = stack_pointer_rtx; +- xops[1] = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); +- output_asm_insn ("lea\t{%E1, %0|%0, %E1}", xops); +- } +- +- fputs ("\tret\n", asm_out_file); +-} ++ /* We expect the GP registers to be saved when probes are used ++ as the probing sequences might need a scratch register and ++ the routine to allocate one assumes the integer registers ++ have already been saved. */ ++ gcc_assert (int_registers_saved); + +-/* Output a funtion with a call and return thunk for indirect branch. +- If REGNO != INVALID_REGNUM, the function address is in REGNO. +- Otherwise, the function address is on the top of stack. Thunk is +- used for function return if RET_P is true. */ ++ HOST_WIDE_INT rounded_size, last; ++ struct scratch_reg sr; + +-static void +-output_indirect_thunk_function (enum indirect_thunk_prefix need_prefix, +- unsigned int regno, bool ret_p) +-{ +- char name[32]; +- tree decl; ++ get_scratch_register_on_entry (&sr); + +- /* Create __x86_indirect_thunk. */ +- indirect_thunk_name (name, regno, need_prefix, ret_p); +- decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, +- get_identifier (name), +- build_function_type_list (void_type_node, NULL_TREE)); +- DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL, +- NULL_TREE, void_type_node); +- TREE_PUBLIC (decl) = 1; +- TREE_STATIC (decl) = 1; +- DECL_IGNORED_P (decl) = 1; + +-#if TARGET_MACHO +- if (TARGET_MACHO) +- { +- switch_to_section (darwin_sections[picbase_thunk_section]); +- fputs ("\t.weak_definition\t", asm_out_file); +- assemble_name (asm_out_file, name); +- fputs ("\n\t.private_extern\t", asm_out_file); +- assemble_name (asm_out_file, name); +- putc ('\n', asm_out_file); +- ASM_OUTPUT_LABEL (asm_out_file, name); +- DECL_WEAK (decl) = 1; +- } +- else +-#endif +- if (USE_HIDDEN_LINKONCE) +- { +- cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl)); ++ /* Step 1: round SIZE to the previous multiple of the interval. */ + +- targetm.asm_out.unique_section (decl, 0); +- switch_to_section (get_named_section (decl, NULL, 0)); ++ rounded_size = ROUND_DOWN (size, get_probe_interval ()); + +- targetm.asm_out.globalize_label (asm_out_file, name); +- fputs ("\t.hidden\t", asm_out_file); +- assemble_name (asm_out_file, name); +- putc ('\n', asm_out_file); +- ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl); +- } +- else +- { +- switch_to_section (text_section); +- ASM_OUTPUT_LABEL (asm_out_file, name); +- } + +- DECL_INITIAL (decl) = make_node (BLOCK); +- current_function_decl = decl; +- allocate_struct_function (decl, false); +- init_function_start (decl); +- /* We're about to hide the function body from callees of final_* by +- emitting it directly; tell them we're a thunk, if they care. */ +- cfun->is_thunk = true; +- first_function_block_is_cold = false; +- /* Make sure unwind info is emitted for the thunk if needed. */ +- final_start_function (emit_barrier (), asm_out_file, 1); ++ /* Step 2: compute initial and final value of the loop counter. */ + +- output_indirect_thunk (regno); ++ /* TEST_OFFSET = FIRST. */ ++ emit_move_insn (sr.reg, GEN_INT (-first)); + +- final_end_function (); +- init_insn_lengths (); +- free_after_compilation (cfun); +- set_cfun (NULL); +- current_function_decl = NULL; +-} ++ /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */ ++ last = first + rounded_size; + +-static int pic_labels_used; + +-/* Fills in the label name that should be used for a pc thunk for +- the given register. */ ++ /* Step 3: the loop + +-static void +-get_pc_thunk_name (char name[32], unsigned int regno) +-{ +- gcc_assert (!TARGET_64BIT); ++ do ++ { ++ TEST_ADDR = TEST_ADDR + PROBE_INTERVAL ++ probe at TEST_ADDR ++ } ++ while (TEST_ADDR != LAST_ADDR) + +- if (USE_HIDDEN_LINKONCE) +- sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]); +- else +- ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno); +-} ++ probes at FIRST + N * PROBE_INTERVAL for values of N from 1 ++ until it is equal to ROUNDED_SIZE. */ + ++ emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last))); + +-/* This function generates code for -fpic that loads %ebx with +- the return address of the caller and then returns. */ + +-static void +-ix86_code_end (void) +-{ +- rtx xops[2]; +- unsigned int regno; ++ /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time ++ that SIZE is equal to ROUNDED_SIZE. */ + +- if (indirect_return_needed) +- output_indirect_thunk_function (indirect_thunk_prefix_none, +- INVALID_REGNUM, true); +- if (indirect_return_via_cx) +- output_indirect_thunk_function (indirect_thunk_prefix_none, +- CX_REG, true); +- if (indirect_thunk_needed) +- output_indirect_thunk_function (indirect_thunk_prefix_none, +- INVALID_REGNUM, false); ++ if (size != rounded_size) ++ emit_stack_probe (plus_constant (Pmode, ++ gen_rtx_PLUS (Pmode, ++ stack_pointer_rtx, ++ sr.reg), ++ rounded_size - size)); + +- for (regno = FIRST_REX_INT_REG; regno <= LAST_REX_INT_REG; regno++) +- { +- unsigned int i = regno - FIRST_REX_INT_REG + LAST_INT_REG + 1; +- if ((indirect_thunks_used & (1 << i))) +- output_indirect_thunk_function (indirect_thunk_prefix_none, +- regno, false); ++ release_scratch_register_on_entry (&sr, size, true); + } + +- for (regno = FIRST_INT_REG; regno <= LAST_INT_REG; regno++) +- { +- char name[32]; +- tree decl; ++ /* Make sure nothing is scheduled before we are done. */ ++ emit_insn (gen_blockage ()); ++} + +- if ((indirect_thunks_used & (1 << regno))) +- output_indirect_thunk_function (indirect_thunk_prefix_none, +- regno, false); +- +- if (!(pic_labels_used & (1 << regno))) +- continue; +- +- get_pc_thunk_name (name, regno); +- +- decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, +- get_identifier (name), +- build_function_type_list (void_type_node, NULL_TREE)); +- DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL, +- NULL_TREE, void_type_node); +- TREE_PUBLIC (decl) = 1; +- TREE_STATIC (decl) = 1; +- DECL_IGNORED_P (decl) = 1; +- +-#if TARGET_MACHO +- if (TARGET_MACHO) +- { +- switch_to_section (darwin_sections[picbase_thunk_section]); +- fputs ("\t.weak_definition\t", asm_out_file); +- assemble_name (asm_out_file, name); +- fputs ("\n\t.private_extern\t", asm_out_file); +- assemble_name (asm_out_file, name); +- putc ('\n', asm_out_file); +- ASM_OUTPUT_LABEL (asm_out_file, name); +- DECL_WEAK (decl) = 1; +- } +- else +-#endif +- if (USE_HIDDEN_LINKONCE) +- { +- cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl)); +- +- targetm.asm_out.unique_section (decl, 0); +- switch_to_section (get_named_section (decl, NULL, 0)); +- +- targetm.asm_out.globalize_label (asm_out_file, name); +- fputs ("\t.hidden\t", asm_out_file); +- assemble_name (asm_out_file, name); +- putc ('\n', asm_out_file); +- ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl); +- } +- else +- { +- switch_to_section (text_section); +- ASM_OUTPUT_LABEL (asm_out_file, name); +- } +- +- DECL_INITIAL (decl) = make_node (BLOCK); +- current_function_decl = decl; +- allocate_struct_function (decl, false); +- init_function_start (decl); +- /* We're about to hide the function body from callees of final_* by +- emitting it directly; tell them we're a thunk, if they care. */ +- cfun->is_thunk = true; +- first_function_block_is_cold = false; +- /* Make sure unwind info is emitted for the thunk if needed. */ +- final_start_function (emit_barrier (), asm_out_file, 1); +- +- /* Pad stack IP move with 4 instructions (two NOPs count +- as one instruction). */ +- if (TARGET_PAD_SHORT_FUNCTION) +- { +- int i = 8; +- +- while (i--) +- fputs ("\tnop\n", asm_out_file); +- } +- +- xops[0] = gen_rtx_REG (Pmode, regno); +- xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx); +- output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops); +- output_asm_insn ("%!ret", NULL); +- final_end_function (); +- init_insn_lengths (); +- free_after_compilation (cfun); +- set_cfun (NULL); +- current_function_decl = NULL; +- } +- +- if (flag_split_stack) +- file_end_indicate_split_stack (); +-} +- +-/* Emit code for the SET_GOT patterns. */ ++/* Probe a range of stack addresses from REG to END, inclusive. These are ++ offsets from the current stack pointer. */ + + const char * +-output_set_got (rtx dest, rtx label) ++output_probe_stack_range (rtx reg, rtx end) + { ++ static int labelno = 0; ++ char loop_lab[32]; + rtx xops[3]; + +- xops[0] = dest; +- +- if (TARGET_VXWORKS_RTP && flag_pic) +- { +- /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */ +- xops[2] = gen_rtx_MEM (Pmode, +- gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE)); +- output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops); +- +- /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register. +- Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as +- an unadorned address. */ +- xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX); +- SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL; +- output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops); +- return ""; +- } +- +- xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME); +- +- if (flag_pic) +- { +- char name[32]; +- get_pc_thunk_name (name, REGNO (dest)); +- pic_labels_used |= 1 << REGNO (dest); ++ ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++); + +- xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name)); +- xops[2] = gen_rtx_MEM (QImode, xops[2]); +- output_asm_insn ("%!call\t%X2", xops); ++ /* Loop. */ ++ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab); + +-#if TARGET_MACHO +- /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here. +- This is what will be referenced by the Mach-O PIC subsystem. */ +- if (machopic_should_output_picbase_label () || !label) +- ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME); ++ /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */ ++ xops[0] = reg; ++ xops[1] = GEN_INT (get_probe_interval ()); ++ output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops); + +- /* When we are restoring the pic base at the site of a nonlocal label, +- and we decided to emit the pic base above, we will still output a +- local label used for calculating the correction offset (even though +- the offset will be 0 in that case). */ +- if (label) +- targetm.asm_out.internal_label (asm_out_file, "L", +- CODE_LABEL_NUMBER (label)); +-#endif +- } +- else +- { +- if (TARGET_MACHO) +- /* We don't need a pic base, we're not producing pic. */ +- gcc_unreachable (); ++ /* Probe at TEST_ADDR. */ ++ xops[0] = stack_pointer_rtx; ++ xops[1] = reg; ++ xops[2] = const0_rtx; ++ output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops); + +- xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ()); +- output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops); +- targetm.asm_out.internal_label (asm_out_file, "L", +- CODE_LABEL_NUMBER (XEXP (xops[2], 0))); +- } ++ /* Test if TEST_ADDR == LAST_ADDR. */ ++ xops[0] = reg; ++ xops[1] = end; ++ output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops); + +- if (!TARGET_MACHO) +- output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops); ++ /* Branch. */ ++ fputs ("\tjne\t", asm_out_file); ++ assemble_name_raw (asm_out_file, loop_lab); ++ fputc ('\n', asm_out_file); + + return ""; + } + +-/* Generate an "push" pattern for input ARG. */ ++/* Return true if stack frame is required. Update STACK_ALIGNMENT ++ to the largest alignment, in bits, of stack slot used if stack ++ frame is required and CHECK_STACK_SLOT is true. */ + +-static rtx +-gen_push (rtx arg) ++static bool ++ix86_find_max_used_stack_alignment (unsigned int &stack_alignment, ++ bool check_stack_slot) + { +- struct machine_function *m = cfun->machine; ++ HARD_REG_SET set_up_by_prologue, prologue_used; ++ basic_block bb; + +- if (m->fs.cfa_reg == stack_pointer_rtx) +- m->fs.cfa_offset += UNITS_PER_WORD; +- m->fs.sp_offset += UNITS_PER_WORD; ++ CLEAR_HARD_REG_SET (prologue_used); ++ CLEAR_HARD_REG_SET (set_up_by_prologue); ++ add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM); ++ add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM); ++ add_to_hard_reg_set (&set_up_by_prologue, Pmode, ++ HARD_FRAME_POINTER_REGNUM); + +- if (REG_P (arg) && GET_MODE (arg) != word_mode) +- arg = gen_rtx_REG (word_mode, REGNO (arg)); ++ /* The preferred stack alignment is the minimum stack alignment. */ ++ if (stack_alignment > crtl->preferred_stack_boundary) ++ stack_alignment = crtl->preferred_stack_boundary; + +- return gen_rtx_SET (gen_rtx_MEM (word_mode, +- gen_rtx_PRE_DEC (Pmode, +- stack_pointer_rtx)), +- arg); +-} ++ bool require_stack_frame = false; + +-/* Generate an "pop" pattern for input ARG. */ ++ FOR_EACH_BB_FN (bb, cfun) ++ { ++ rtx_insn *insn; ++ FOR_BB_INSNS (bb, insn) ++ if (NONDEBUG_INSN_P (insn) ++ && requires_stack_frame_p (insn, prologue_used, ++ set_up_by_prologue)) ++ { ++ require_stack_frame = true; + +-static rtx +-gen_pop (rtx arg) +-{ +- if (REG_P (arg) && GET_MODE (arg) != word_mode) +- arg = gen_rtx_REG (word_mode, REGNO (arg)); ++ if (check_stack_slot) ++ { ++ /* Find the maximum stack alignment. */ ++ subrtx_iterator::array_type array; ++ FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL) ++ if (MEM_P (*iter) ++ && (reg_mentioned_p (stack_pointer_rtx, ++ *iter) ++ || reg_mentioned_p (frame_pointer_rtx, ++ *iter))) ++ { ++ unsigned int alignment = MEM_ALIGN (*iter); ++ if (alignment > stack_alignment) ++ stack_alignment = alignment; ++ } ++ } ++ } ++ } + +- return gen_rtx_SET (arg, +- gen_rtx_MEM (word_mode, +- gen_rtx_POST_INC (Pmode, +- stack_pointer_rtx))); ++ return require_stack_frame; + } + +-/* Return >= 0 if there is an unused call-clobbered register available +- for the entire function. */ ++/* Finalize stack_realign_needed and frame_pointer_needed flags, which ++ will guide prologue/epilogue to be generated in correct form. */ + +-static unsigned int +-ix86_select_alt_pic_regnum (void) ++static void ++ix86_finalize_stack_frame_flags (void) + { +- if (ix86_use_pseudo_pic_reg ()) +- return INVALID_REGNUM; ++ /* Check if stack realign is really needed after reload, and ++ stores result in cfun */ ++ unsigned int incoming_stack_boundary ++ = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary ++ ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary); ++ unsigned int stack_alignment ++ = (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor ++ ? crtl->max_used_stack_slot_alignment ++ : crtl->stack_alignment_needed); ++ unsigned int stack_realign ++ = (incoming_stack_boundary < stack_alignment); ++ bool recompute_frame_layout_p = false; + +- if (crtl->is_leaf +- && !crtl->profile +- && !ix86_current_function_calls_tls_descriptor) ++ if (crtl->stack_realign_finalized) + { +- int i, drap; +- /* Can't use the same register for both PIC and DRAP. */ +- if (crtl->drap_reg) +- drap = REGNO (crtl->drap_reg); +- else +- drap = -1; +- for (i = 2; i >= 0; --i) +- if (i != drap && !df_regs_ever_live_p (i)) +- return i; ++ /* After stack_realign_needed is finalized, we can't no longer ++ change it. */ ++ gcc_assert (crtl->stack_realign_needed == stack_realign); ++ return; + } + +- return INVALID_REGNUM; +-} +- +-/* Return true if REGNO is used by the epilogue. */ ++ /* If the only reason for frame_pointer_needed is that we conservatively ++ assumed stack realignment might be needed or -fno-omit-frame-pointer ++ is used, but in the end nothing that needed the stack alignment had ++ been spilled nor stack access, clear frame_pointer_needed and say we ++ don't need stack realignment. */ ++ if ((stack_realign || (!flag_omit_frame_pointer && optimize)) ++ && frame_pointer_needed ++ && crtl->is_leaf ++ && crtl->sp_is_unchanging ++ && !ix86_current_function_calls_tls_descriptor ++ && !crtl->accesses_prior_frames ++ && !cfun->calls_alloca ++ && !crtl->calls_eh_return ++ /* See ira_setup_eliminable_regset for the rationale. */ ++ && !(STACK_CHECK_MOVING_SP ++ && flag_stack_check ++ && flag_exceptions ++ && cfun->can_throw_non_call_exceptions) ++ && !ix86_frame_pointer_required () ++ && get_frame_size () == 0 ++ && ix86_nsaved_sseregs () == 0 ++ && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0) ++ { ++ if (ix86_find_max_used_stack_alignment (stack_alignment, ++ stack_realign)) ++ { ++ /* Stack frame is required. If stack alignment needed is less ++ than incoming stack boundary, don't realign stack. */ ++ stack_realign = incoming_stack_boundary < stack_alignment; ++ if (!stack_realign) ++ { ++ crtl->max_used_stack_slot_alignment ++ = incoming_stack_boundary; ++ crtl->stack_alignment_needed ++ = incoming_stack_boundary; ++ /* Also update preferred_stack_boundary for leaf ++ functions. */ ++ crtl->preferred_stack_boundary ++ = incoming_stack_boundary; ++ } ++ } ++ else ++ { ++ /* If drap has been set, but it actually isn't live at the ++ start of the function, there is no reason to set it up. */ ++ if (crtl->drap_reg) ++ { ++ basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb; ++ if (! REGNO_REG_SET_P (DF_LR_IN (bb), ++ REGNO (crtl->drap_reg))) ++ { ++ crtl->drap_reg = NULL_RTX; ++ crtl->need_drap = false; ++ } ++ } ++ else ++ cfun->machine->no_drap_save_restore = true; + +-bool +-ix86_epilogue_uses (int regno) +-{ +- /* If there are no caller-saved registers, we preserve all registers, +- except for MMX and x87 registers which aren't supported when saving +- and restoring registers. Don't explicitly save SP register since +- it is always preserved. */ +- return (epilogue_completed +- && cfun->machine->no_caller_saved_registers +- && !fixed_regs[regno] +- && !STACK_REGNO_P (regno) +- && !MMX_REGNO_P (regno)); +-} ++ frame_pointer_needed = false; ++ stack_realign = false; ++ crtl->max_used_stack_slot_alignment = incoming_stack_boundary; ++ crtl->stack_alignment_needed = incoming_stack_boundary; ++ crtl->stack_alignment_estimated = incoming_stack_boundary; ++ if (crtl->preferred_stack_boundary > incoming_stack_boundary) ++ crtl->preferred_stack_boundary = incoming_stack_boundary; ++ df_finish_pass (true); ++ df_scan_alloc (NULL); ++ df_scan_blocks (); ++ df_compute_regs_ever_live (true); ++ df_analyze (); + +-/* Return nonzero if register REGNO can be used as a scratch register +- in peephole2. */ ++ if (flag_var_tracking) ++ { ++ /* Since frame pointer is no longer available, replace it with ++ stack pointer - UNITS_PER_WORD in debug insns. */ ++ df_ref ref, next; ++ for (ref = DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM); ++ ref; ref = next) ++ { ++ next = DF_REF_NEXT_REG (ref); ++ if (!DF_REF_INSN_INFO (ref)) ++ continue; + +-static bool +-ix86_hard_regno_scratch_ok (unsigned int regno) +-{ +- /* If there are no caller-saved registers, we can't use any register +- as a scratch register after epilogue and use REGNO as scratch +- register only if it has been used before to avoid saving and +- restoring it. */ +- return (!cfun->machine->no_caller_saved_registers +- || (!epilogue_completed +- && df_regs_ever_live_p (regno))); +-} ++ /* Make sure the next ref is for a different instruction, ++ so that we're not affected by the rescan. */ ++ rtx_insn *insn = DF_REF_INSN (ref); ++ while (next && DF_REF_INSN (next) == insn) ++ next = DF_REF_NEXT_REG (next); + +-/* Return TRUE if we need to save REGNO. */ ++ if (DEBUG_INSN_P (insn)) ++ { ++ bool changed = false; ++ for (; ref != next; ref = DF_REF_NEXT_REG (ref)) ++ { ++ rtx *loc = DF_REF_LOC (ref); ++ if (*loc == hard_frame_pointer_rtx) ++ { ++ *loc = plus_constant (Pmode, ++ stack_pointer_rtx, ++ -UNITS_PER_WORD); ++ changed = true; ++ } ++ } ++ if (changed) ++ df_insn_rescan (insn); ++ } ++ } ++ } + +-static bool +-ix86_save_reg (unsigned int regno, bool maybe_eh_return, bool ignore_outlined) +-{ +- /* If there are no caller-saved registers, we preserve all registers, +- except for MMX and x87 registers which aren't supported when saving +- and restoring registers. Don't explicitly save SP register since +- it is always preserved. */ +- if (cfun->machine->no_caller_saved_registers) +- { +- /* Don't preserve registers used for function return value. */ +- rtx reg = crtl->return_rtx; +- if (reg) +- { +- unsigned int i = REGNO (reg); +- unsigned int nregs = REG_NREGS (reg); +- while (nregs-- > 0) +- if ((i + nregs) == regno) +- return false; ++ recompute_frame_layout_p = true; + } +- +- return (df_regs_ever_live_p (regno) +- && !fixed_regs[regno] +- && !STACK_REGNO_P (regno) +- && !MMX_REGNO_P (regno) +- && (regno != HARD_FRAME_POINTER_REGNUM +- || !frame_pointer_needed)); + } +- +- if (regno == REAL_PIC_OFFSET_TABLE_REGNUM +- && pic_offset_table_rtx) ++ else if (crtl->max_used_stack_slot_alignment >= 128) + { +- if (ix86_use_pseudo_pic_reg ()) +- { +- /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to +- _mcount in prologue. */ +- if (!TARGET_64BIT && flag_pic && crtl->profile) +- return true; +- } +- else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM) +- || crtl->profile +- || crtl->calls_eh_return +- || crtl->uses_const_pool +- || cfun->has_nonlocal_label) +- return ix86_select_alt_pic_regnum () == INVALID_REGNUM; ++ /* We don't need to realign stack. max_used_stack_alignment is ++ used to decide how stack frame should be aligned. This is ++ independent of any psABIs nor 32-bit vs 64-bit. It is always ++ safe to compute max_used_stack_alignment. We compute it only ++ if 128-bit aligned load/store may be generated on misaligned ++ stack slot which will lead to segfault. */ ++ if (ix86_find_max_used_stack_alignment (stack_alignment, true)) ++ cfun->machine->max_used_stack_alignment ++ = stack_alignment / BITS_PER_UNIT; + } + +- if (crtl->calls_eh_return && maybe_eh_return) ++ if (crtl->stack_realign_needed != stack_realign) ++ recompute_frame_layout_p = true; ++ crtl->stack_realign_needed = stack_realign; ++ crtl->stack_realign_finalized = true; ++ if (recompute_frame_layout_p) ++ ix86_compute_frame_layout (); ++} ++ ++/* Delete SET_GOT right after entry block if it is allocated to reg. */ ++ ++static void ++ix86_elim_entry_set_got (rtx reg) ++{ ++ basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb; ++ rtx_insn *c_insn = BB_HEAD (bb); ++ if (!NONDEBUG_INSN_P (c_insn)) ++ c_insn = next_nonnote_nondebug_insn (c_insn); ++ if (c_insn && NONJUMP_INSN_P (c_insn)) + { +- unsigned i; +- for (i = 0; ; i++) ++ rtx pat = PATTERN (c_insn); ++ if (GET_CODE (pat) == PARALLEL) + { +- unsigned test = EH_RETURN_DATA_REGNO (i); +- if (test == INVALID_REGNUM) +- break; +- if (test == regno) +- return true; ++ rtx vec = XVECEXP (pat, 0, 0); ++ if (GET_CODE (vec) == SET ++ && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT ++ && REGNO (XEXP (vec, 0)) == REGNO (reg)) ++ delete_insn (c_insn); + } + } +- +- if (ignore_outlined && cfun->machine->call_ms2sysv) +- { +- unsigned count = cfun->machine->call_ms2sysv_extra_regs +- + xlogue_layout::MIN_REGS; +- if (xlogue_layout::is_stub_managed_reg (regno, count)) +- return false; +- } +- +- if (crtl->drap_reg +- && regno == REGNO (crtl->drap_reg) +- && !cfun->machine->no_drap_save_restore) +- return true; +- +- return (df_regs_ever_live_p (regno) +- && !call_used_regs[regno] +- && !fixed_regs[regno] +- && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed)); + } + +-/* Return number of saved general prupose registers. */ +- +-static int +-ix86_nsaved_regs (void) ++static rtx ++gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store) + { +- int nregs = 0; +- int regno; ++ rtx addr, mem; + +- for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) +- if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true)) +- nregs ++; +- return nregs; ++ if (offset) ++ addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset)); ++ mem = gen_frame_mem (GET_MODE (reg), offset ? addr : frame_reg); ++ return gen_rtx_SET (store ? mem : reg, store ? reg : mem); + } + +-/* Return number of saved SSE registers. */ +- +-static int +-ix86_nsaved_sseregs (void) ++static inline rtx ++gen_frame_load (rtx reg, rtx frame_reg, int offset) + { +- int nregs = 0; +- int regno; +- +- if (!TARGET_64BIT_MS_ABI) +- return 0; +- for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) +- if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true)) +- nregs ++; +- return nregs; ++ return gen_frame_set (reg, frame_reg, offset, false); + } + +-/* Given FROM and TO register numbers, say whether this elimination is +- allowed. If stack alignment is needed, we can only replace argument +- pointer with hard frame pointer, or replace frame pointer with stack +- pointer. Otherwise, frame pointer elimination is automatically +- handled and all other eliminations are valid. */ +- +-static bool +-ix86_can_eliminate (const int from, const int to) ++static inline rtx ++gen_frame_store (rtx reg, rtx frame_reg, int offset) + { +- if (stack_realign_fp) +- return ((from == ARG_POINTER_REGNUM +- && to == HARD_FRAME_POINTER_REGNUM) +- || (from == FRAME_POINTER_REGNUM +- && to == STACK_POINTER_REGNUM)); +- else +- return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true; ++ return gen_frame_set (reg, frame_reg, offset, true); + } + +-/* Return the offset between two registers, one to be eliminated, and the other +- its replacement, at the start of a routine. */ +- +-HOST_WIDE_INT +-ix86_initial_elimination_offset (int from, int to) ++static void ++ix86_emit_outlined_ms2sysv_save (const struct ix86_frame &frame) + { +- struct ix86_frame &frame = cfun->machine->frame; ++ struct machine_function *m = cfun->machine; ++ const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS ++ + m->call_ms2sysv_extra_regs; ++ rtvec v = rtvec_alloc (ncregs + 1); ++ unsigned int align, i, vi = 0; ++ rtx_insn *insn; ++ rtx sym, addr; ++ rtx rax = gen_rtx_REG (word_mode, AX_REG); ++ const struct xlogue_layout &xlogue = xlogue_layout::get_instance (); + +- if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) +- return frame.hard_frame_pointer_offset; +- else if (from == FRAME_POINTER_REGNUM +- && to == HARD_FRAME_POINTER_REGNUM) +- return frame.hard_frame_pointer_offset - frame.frame_pointer_offset; +- else +- { +- gcc_assert (to == STACK_POINTER_REGNUM); ++ /* AL should only be live with sysv_abi. */ ++ gcc_assert (!ix86_eax_live_at_start_p ()); ++ gcc_assert (m->fs.sp_offset >= frame.sse_reg_save_offset); + +- if (from == ARG_POINTER_REGNUM) +- return frame.stack_pointer_offset; ++ /* Setup RAX as the stub's base pointer. We use stack_realign_offset rather ++ we've actually realigned the stack or not. */ ++ align = GET_MODE_ALIGNMENT (V4SFmode); ++ addr = choose_baseaddr (frame.stack_realign_offset ++ + xlogue.get_stub_ptr_offset (), &align, AX_REG); ++ gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode)); + +- gcc_assert (from == FRAME_POINTER_REGNUM); +- return frame.stack_pointer_offset - frame.frame_pointer_offset; +- } +-} ++ emit_insn (gen_rtx_SET (rax, addr)); + +-/* In a dynamically-aligned function, we can't know the offset from +- stack pointer to frame pointer, so we must ensure that setjmp +- eliminates fp against the hard fp (%ebp) rather than trying to +- index from %esp up to the top of the frame across a gap that is +- of unknown (at compile-time) size. */ +-static rtx +-ix86_builtin_setjmp_frame_value (void) +-{ +- return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx; +-} ++ /* Get the stub symbol. */ ++ sym = xlogue.get_stub_rtx (frame_pointer_needed ? XLOGUE_STUB_SAVE_HFP ++ : XLOGUE_STUB_SAVE); ++ RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym); + +-/* Emits a warning for unsupported msabi to sysv pro/epilogues. */ +-static void warn_once_call_ms2sysv_xlogues (const char *feature) +-{ +- static bool warned_once = false; +- if (!warned_once) ++ for (i = 0; i < ncregs; ++i) + { +- warning (0, "%<-mcall-ms2sysv-xlogues%> is not compatible with %s", +- feature); +- warned_once = true; ++ const xlogue_layout::reginfo &r = xlogue.get_reginfo (i); ++ rtx reg = gen_rtx_REG ((SSE_REGNO_P (r.regno) ? V4SFmode : word_mode), ++ r.regno); ++ RTVEC_ELT (v, vi++) = gen_frame_store (reg, rax, -r.offset); + } +-} + +-/* Return the probing interval for -fstack-clash-protection. */ ++ gcc_assert (vi == (unsigned)GET_NUM_ELEM (v)); + +-static HOST_WIDE_INT +-get_probe_interval (void) +-{ +- if (flag_stack_clash_protection) +- return (HOST_WIDE_INT_1U +- << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL)); +- else +- return (HOST_WIDE_INT_1U << STACK_CHECK_PROBE_INTERVAL_EXP); ++ insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, v)); ++ RTX_FRAME_RELATED_P (insn) = true; + } + +-/* When using -fsplit-stack, the allocation routines set a field in +- the TCB to the bottom of the stack plus this much space, measured +- in bytes. */ +- +-#define SPLIT_STACK_AVAILABLE 256 +- +-/* Fill structure ix86_frame about frame of currently computed function. */ ++/* Expand the prologue into a bunch of separate insns. */ + +-static void +-ix86_compute_frame_layout (void) ++void ++ix86_expand_prologue (void) + { +- struct ix86_frame *frame = &cfun->machine->frame; + struct machine_function *m = cfun->machine; +- unsigned HOST_WIDE_INT stack_alignment_needed; +- HOST_WIDE_INT offset; +- unsigned HOST_WIDE_INT preferred_alignment; +- HOST_WIDE_INT size = get_frame_size (); +- HOST_WIDE_INT to_allocate; ++ rtx insn, t; ++ HOST_WIDE_INT allocate; ++ bool int_registers_saved; ++ bool sse_registers_saved; ++ bool save_stub_call_needed; ++ rtx static_chain = NULL_RTX; + +- /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit +- * ms_abi functions that call a sysv function. We now need to prune away +- * cases where it should be disabled. */ +- if (TARGET_64BIT && m->call_ms2sysv) +- { +- gcc_assert (TARGET_64BIT_MS_ABI); +- gcc_assert (TARGET_CALL_MS2SYSV_XLOGUES); +- gcc_assert (!TARGET_SEH); +- gcc_assert (TARGET_SSE); +- gcc_assert (!ix86_using_red_zone ()); ++ if (ix86_function_naked (current_function_decl)) ++ return; + +- if (crtl->calls_eh_return) +- { +- gcc_assert (!reload_completed); +- m->call_ms2sysv = false; +- warn_once_call_ms2sysv_xlogues ("__builtin_eh_return"); +- } ++ ix86_finalize_stack_frame_flags (); + +- else if (ix86_static_chain_on_stack) +- { +- gcc_assert (!reload_completed); +- m->call_ms2sysv = false; +- warn_once_call_ms2sysv_xlogues ("static call chains"); +- } ++ /* DRAP should not coexist with stack_realign_fp */ ++ gcc_assert (!(crtl->drap_reg && stack_realign_fp)); + +- /* Finally, compute which registers the stub will manage. */ +- else +- { +- unsigned count = xlogue_layout::count_stub_managed_regs (); +- m->call_ms2sysv_extra_regs = count - xlogue_layout::MIN_REGS; +- m->call_ms2sysv_pad_in = 0; +- } +- } ++ memset (&m->fs, 0, sizeof (m->fs)); + +- frame->nregs = ix86_nsaved_regs (); +- frame->nsseregs = ix86_nsaved_sseregs (); ++ /* Initialize CFA state for before the prologue. */ ++ m->fs.cfa_reg = stack_pointer_rtx; ++ m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET; + +- /* 64-bit MS ABI seem to require stack alignment to be always 16, +- except for function prologues, leaf functions and when the defult +- incoming stack boundary is overriden at command line or via +- force_align_arg_pointer attribute. ++ /* Track SP offset to the CFA. We continue tracking this after we've ++ swapped the CFA register away from SP. In the case of re-alignment ++ this is fudged; we're interested to offsets within the local frame. */ ++ m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET; ++ m->fs.sp_valid = true; ++ m->fs.sp_realigned = false; + +- Darwin's ABI specifies 128b alignment for both 32 and 64 bit variants +- at call sites, including profile function calls. +- */ +- if (((TARGET_64BIT_MS_ABI || TARGET_MACHO) +- && crtl->preferred_stack_boundary < 128) +- && (!crtl->is_leaf || cfun->calls_alloca != 0 +- || ix86_current_function_calls_tls_descriptor +- || (TARGET_MACHO && crtl->profile) +- || ix86_incoming_stack_boundary < 128)) ++ const struct ix86_frame &frame = cfun->machine->frame; ++ ++ if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl)) + { +- crtl->preferred_stack_boundary = 128; +- crtl->stack_alignment_needed = 128; +- } ++ /* We should have already generated an error for any use of ++ ms_hook on a nested function. */ ++ gcc_checking_assert (!ix86_static_chain_on_stack); + +- stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT; +- preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT; ++ /* Check if profiling is active and we shall use profiling before ++ prologue variant. If so sorry. */ ++ if (crtl->profile && flag_fentry != 0) ++ sorry ("% attribute is not compatible " ++ "with %<-mfentry%> for 32-bit"); + +- gcc_assert (!size || stack_alignment_needed); +- gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT); +- gcc_assert (preferred_alignment <= stack_alignment_needed); ++ /* In ix86_asm_output_function_label we emitted: ++ 8b ff movl.s %edi,%edi ++ 55 push %ebp ++ 8b ec movl.s %esp,%ebp + +- /* The only ABI saving SSE regs should be 64-bit ms_abi. */ +- gcc_assert (TARGET_64BIT || !frame->nsseregs); +- if (TARGET_64BIT && m->call_ms2sysv) +- { +- gcc_assert (stack_alignment_needed >= 16); +- gcc_assert (!frame->nsseregs); +- } ++ This matches the hookable function prologue in Win32 API ++ functions in Microsoft Windows XP Service Pack 2 and newer. ++ Wine uses this to enable Windows apps to hook the Win32 API ++ functions provided by Wine. + +- /* For SEH we have to limit the amount of code movement into the prologue. +- At present we do this via a BLOCKAGE, at which point there's very little +- scheduling that can be done, which means that there's very little point +- in doing anything except PUSHs. */ +- if (TARGET_SEH) +- m->use_fast_prologue_epilogue = false; +- else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))) +- { +- int count = frame->nregs; +- struct cgraph_node *node = cgraph_node::get (current_function_decl); ++ What that means is that we've already set up the frame pointer. */ + +- /* The fast prologue uses move instead of push to save registers. This +- is significantly longer, but also executes faster as modern hardware +- can execute the moves in parallel, but can't do that for push/pop. ++ if (frame_pointer_needed ++ && !(crtl->drap_reg && crtl->stack_realign_needed)) ++ { ++ rtx push, mov; + +- Be careful about choosing what prologue to emit: When function takes +- many instructions to execute we may use slow version as well as in +- case function is known to be outside hot spot (this is known with +- feedback only). Weight the size of function by number of registers +- to save as it is cheap to use one or two push instructions but very +- slow to use many of them. */ +- if (count) +- count = (count - 1) * FAST_PROLOGUE_INSN_COUNT; +- if (node->frequency < NODE_FREQUENCY_NORMAL +- || (flag_branch_probabilities +- && node->frequency < NODE_FREQUENCY_HOT)) +- m->use_fast_prologue_epilogue = false; +- else +- m->use_fast_prologue_epilogue +- = !expensive_function_p (count); +- } ++ /* We've decided to use the frame pointer already set up. ++ Describe this to the unwinder by pretending that both ++ push and mov insns happen right here. + +- frame->save_regs_using_mov +- = (TARGET_PROLOGUE_USING_MOVE && m->use_fast_prologue_epilogue +- /* If static stack checking is enabled and done with probes, +- the registers need to be saved before allocating the frame. */ +- && flag_stack_check != STATIC_BUILTIN_STACK_CHECK); ++ Putting the unwind info here at the end of the ms_hook ++ is done so that we can make absolutely certain we get ++ the required byte sequence at the start of the function, ++ rather than relying on an assembler that can produce ++ the exact encoding required. + +- /* Skip return address and error code in exception handler. */ +- offset = INCOMING_FRAME_SP_OFFSET; ++ However it does mean (in the unpatched case) that we have ++ a 1 insn window where the asynchronous unwind info is ++ incorrect. However, if we placed the unwind info at ++ its correct location we would have incorrect unwind info ++ in the patched case. Which is probably all moot since ++ I don't expect Wine generates dwarf2 unwind info for the ++ system libraries that use this feature. */ + +- /* Skip pushed static chain. */ +- if (ix86_static_chain_on_stack) +- offset += UNITS_PER_WORD; ++ insn = emit_insn (gen_blockage ()); + +- /* Skip saved base pointer. */ +- if (frame_pointer_needed) +- offset += UNITS_PER_WORD; +- frame->hfp_save_offset = offset; ++ push = gen_push (hard_frame_pointer_rtx); ++ mov = gen_rtx_SET (hard_frame_pointer_rtx, ++ stack_pointer_rtx); ++ RTX_FRAME_RELATED_P (push) = 1; ++ RTX_FRAME_RELATED_P (mov) = 1; + +- /* The traditional frame pointer location is at the top of the frame. */ +- frame->hard_frame_pointer_offset = offset; ++ RTX_FRAME_RELATED_P (insn) = 1; ++ add_reg_note (insn, REG_FRAME_RELATED_EXPR, ++ gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov))); + +- /* Register save area */ +- offset += frame->nregs * UNITS_PER_WORD; +- frame->reg_save_offset = offset; ++ /* Note that gen_push incremented m->fs.cfa_offset, even ++ though we didn't emit the push insn here. */ ++ m->fs.cfa_reg = hard_frame_pointer_rtx; ++ m->fs.fp_offset = m->fs.cfa_offset; ++ m->fs.fp_valid = true; ++ } ++ else ++ { ++ /* The frame pointer is not needed so pop %ebp again. ++ This leaves us with a pristine state. */ ++ emit_insn (gen_pop (hard_frame_pointer_rtx)); ++ } ++ } + +- /* On SEH target, registers are pushed just before the frame pointer +- location. */ +- if (TARGET_SEH) +- frame->hard_frame_pointer_offset = offset; ++ /* The first insn of a function that accepts its static chain on the ++ stack is to push the register that would be filled in by a direct ++ call. This insn will be skipped by the trampoline. */ ++ else if (ix86_static_chain_on_stack) ++ { ++ static_chain = ix86_static_chain (cfun->decl, false); ++ insn = emit_insn (gen_push (static_chain)); ++ emit_insn (gen_blockage ()); + +- /* Calculate the size of the va-arg area (not including padding, if any). */ +- frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size; ++ /* We don't want to interpret this push insn as a register save, ++ only as a stack adjustment. The real copy of the register as ++ a save will be done later, if needed. */ ++ t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD); ++ t = gen_rtx_SET (stack_pointer_rtx, t); ++ add_reg_note (insn, REG_CFA_ADJUST_CFA, t); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ } + +- /* Also adjust stack_realign_offset for the largest alignment of +- stack slot actually used. */ +- if (stack_realign_fp +- || (cfun->machine->max_used_stack_alignment != 0 +- && (offset % cfun->machine->max_used_stack_alignment) != 0)) ++ /* Emit prologue code to adjust stack alignment and setup DRAP, in case ++ of DRAP is needed and stack realignment is really needed after reload */ ++ if (stack_realign_drap) + { +- /* We may need a 16-byte aligned stack for the remainder of the +- register save area, but the stack frame for the local function +- may require a greater alignment if using AVX/2/512. In order +- to avoid wasting space, we first calculate the space needed for +- the rest of the register saves, add that to the stack pointer, +- and then realign the stack to the boundary of the start of the +- frame for the local function. */ +- HOST_WIDE_INT space_needed = 0; +- HOST_WIDE_INT sse_reg_space_needed = 0; ++ int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT; + +- if (TARGET_64BIT) +- { +- if (m->call_ms2sysv) +- { +- m->call_ms2sysv_pad_in = 0; +- space_needed = xlogue_layout::get_instance ().get_stack_space_used (); +- } ++ /* Can't use DRAP in interrupt function. */ ++ if (cfun->machine->func_type != TYPE_NORMAL) ++ sorry ("Dynamic Realign Argument Pointer (DRAP) not supported " ++ "in interrupt service routine. This may be worked " ++ "around by avoiding functions with aggregate return."); + +- else if (frame->nsseregs) +- /* The only ABI that has saved SSE registers (Win64) also has a +- 16-byte aligned default stack. However, many programs violate +- the ABI, and Wine64 forces stack realignment to compensate. */ +- space_needed = frame->nsseregs * 16; ++ /* Only need to push parameter pointer reg if it is caller saved. */ ++ if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg))) ++ { ++ /* Push arg pointer reg */ ++ insn = emit_insn (gen_push (crtl->drap_reg)); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ } + +- sse_reg_space_needed = space_needed = ROUND_UP (space_needed, 16); ++ /* Grab the argument pointer. */ ++ t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset); ++ insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t)); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ m->fs.cfa_reg = crtl->drap_reg; ++ m->fs.cfa_offset = 0; + +- /* 64-bit frame->va_arg_size should always be a multiple of 16, but +- rounding to be pedantic. */ +- space_needed = ROUND_UP (space_needed + frame->va_arg_size, 16); +- } +- else +- space_needed = frame->va_arg_size; ++ /* Align the stack. */ ++ insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx, ++ stack_pointer_rtx, ++ GEN_INT (-align_bytes))); ++ RTX_FRAME_RELATED_P (insn) = 1; + +- /* Record the allocation size required prior to the realignment AND. */ +- frame->stack_realign_allocate = space_needed; ++ /* Replicate the return address on the stack so that return ++ address can be reached via (argp - 1) slot. This is needed ++ to implement macro RETURN_ADDR_RTX and intrinsic function ++ expand_builtin_return_addr etc. */ ++ t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD); ++ t = gen_frame_mem (word_mode, t); ++ insn = emit_insn (gen_push (t)); ++ RTX_FRAME_RELATED_P (insn) = 1; + +- /* The re-aligned stack starts at frame->stack_realign_offset. Values +- before this point are not directly comparable with values below +- this point. Use sp_valid_at to determine if the stack pointer is +- valid for a given offset, fp_valid_at for the frame pointer, or +- choose_baseaddr to have a base register chosen for you. ++ /* For the purposes of frame and register save area addressing, ++ we've started over with a new frame. */ ++ m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET; ++ m->fs.realigned = true; + +- Note that the result of (frame->stack_realign_offset +- & (stack_alignment_needed - 1)) may not equal zero. */ +- offset = ROUND_UP (offset + space_needed, stack_alignment_needed); +- frame->stack_realign_offset = offset - space_needed; +- frame->sse_reg_save_offset = frame->stack_realign_offset +- + sse_reg_space_needed; ++ if (static_chain) ++ { ++ /* Replicate static chain on the stack so that static chain ++ can be reached via (argp - 2) slot. This is needed for ++ nested function with stack realignment. */ ++ insn = emit_insn (gen_push (static_chain)); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ } + } +- else ++ ++ int_registers_saved = (frame.nregs == 0); ++ sse_registers_saved = (frame.nsseregs == 0); ++ save_stub_call_needed = (m->call_ms2sysv); ++ gcc_assert (sse_registers_saved || !save_stub_call_needed); ++ ++ if (frame_pointer_needed && !m->fs.fp_valid) + { +- frame->stack_realign_offset = offset; ++ /* Note: AT&T enter does NOT have reversed args. Enter is probably ++ slower on all targets. Also sdb didn't like it. */ ++ insn = emit_insn (gen_push (hard_frame_pointer_rtx)); ++ RTX_FRAME_RELATED_P (insn) = 1; + +- if (TARGET_64BIT && m->call_ms2sysv) ++ /* Push registers now, before setting the frame pointer ++ on SEH target. */ ++ if (!int_registers_saved ++ && TARGET_SEH ++ && !frame.save_regs_using_mov) + { +- m->call_ms2sysv_pad_in = !!(offset & UNITS_PER_WORD); +- offset += xlogue_layout::get_instance ().get_stack_space_used (); ++ ix86_emit_save_regs (); ++ int_registers_saved = true; ++ gcc_assert (m->fs.sp_offset == frame.reg_save_offset); + } + +- /* Align and set SSE register save area. */ +- else if (frame->nsseregs) ++ if (m->fs.sp_offset == frame.hard_frame_pointer_offset) + { +- /* If the incoming stack boundary is at least 16 bytes, or DRAP is +- required and the DRAP re-alignment boundary is at least 16 bytes, +- then we want the SSE register save area properly aligned. */ +- if (ix86_incoming_stack_boundary >= 128 +- || (stack_realign_drap && stack_alignment_needed >= 16)) +- offset = ROUND_UP (offset, 16); +- offset += frame->nsseregs * 16; ++ insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ ++ if (m->fs.cfa_reg == stack_pointer_rtx) ++ m->fs.cfa_reg = hard_frame_pointer_rtx; ++ m->fs.fp_offset = m->fs.sp_offset; ++ m->fs.fp_valid = true; + } +- frame->sse_reg_save_offset = offset; +- offset += frame->va_arg_size; + } + +- /* Align start of frame for local function. When a function call +- is removed, it may become a leaf function. But if argument may +- be passed on stack, we need to align the stack when there is no +- tail call. */ +- if (m->call_ms2sysv +- || frame->va_arg_size != 0 +- || size != 0 +- || !crtl->is_leaf +- || (!crtl->tail_call_emit +- && cfun->machine->outgoing_args_on_stack) +- || cfun->calls_alloca +- || ix86_current_function_calls_tls_descriptor) +- offset = ROUND_UP (offset, stack_alignment_needed); +- +- /* Frame pointer points here. */ +- frame->frame_pointer_offset = offset; +- +- offset += size; +- +- /* Add outgoing arguments area. Can be skipped if we eliminated +- all the function calls as dead code. +- Skipping is however impossible when function calls alloca. Alloca +- expander assumes that last crtl->outgoing_args_size +- of stack frame are unused. */ +- if (ACCUMULATE_OUTGOING_ARGS +- && (!crtl->is_leaf || cfun->calls_alloca +- || ix86_current_function_calls_tls_descriptor)) ++ if (!int_registers_saved) + { +- offset += crtl->outgoing_args_size; +- frame->outgoing_arguments_size = crtl->outgoing_args_size; +- } +- else +- frame->outgoing_arguments_size = 0; ++ /* If saving registers via PUSH, do so now. */ ++ if (!frame.save_regs_using_mov) ++ { ++ ix86_emit_save_regs (); ++ int_registers_saved = true; ++ gcc_assert (m->fs.sp_offset == frame.reg_save_offset); ++ } + +- /* Align stack boundary. Only needed if we're calling another function +- or using alloca. */ +- if (!crtl->is_leaf || cfun->calls_alloca +- || ix86_current_function_calls_tls_descriptor) +- offset = ROUND_UP (offset, preferred_alignment); ++ /* When using red zone we may start register saving before allocating ++ the stack frame saving one cycle of the prologue. However, avoid ++ doing this if we have to probe the stack; at least on x86_64 the ++ stack probe can turn into a call that clobbers a red zone location. */ ++ else if (ix86_using_red_zone () ++ && (! TARGET_STACK_PROBE ++ || frame.stack_pointer_offset < CHECK_STACK_LIMIT)) ++ { ++ ix86_emit_save_regs_using_mov (frame.reg_save_offset); ++ int_registers_saved = true; ++ } ++ } + +- /* We've reached end of stack frame. */ +- frame->stack_pointer_offset = offset; ++ if (stack_realign_fp) ++ { ++ int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT; ++ gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT); + +- /* Size prologue needs to allocate. */ +- to_allocate = offset - frame->sse_reg_save_offset; ++ /* Record last valid frame pointer offset. */ ++ m->fs.sp_realigned_fp_last = frame.reg_save_offset; + +- if ((!to_allocate && frame->nregs <= 1) +- || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000)) +- /* If stack clash probing needs a loop, then it needs a +- scratch register. But the returned register is only guaranteed +- to be safe to use after register saves are complete. So if +- stack clash protections are enabled and the allocated frame is +- larger than the probe interval, then use pushes to save +- callee saved registers. */ +- || (flag_stack_clash_protection && to_allocate > get_probe_interval ())) +- frame->save_regs_using_mov = false; ++ /* The computation of the size of the re-aligned stack frame means ++ that we must allocate the size of the register save area before ++ performing the actual alignment. Otherwise we cannot guarantee ++ that there's enough storage above the realignment point. */ ++ allocate = frame.reg_save_offset - m->fs.sp_offset ++ + frame.stack_realign_allocate; ++ if (allocate) ++ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, ++ GEN_INT (-allocate), -1, false); + +- if (ix86_using_red_zone () +- && crtl->sp_is_unchanging +- && crtl->is_leaf +- && !ix86_pc_thunk_call_expanded +- && !ix86_current_function_calls_tls_descriptor) +- { +- frame->red_zone_size = to_allocate; +- if (frame->save_regs_using_mov) +- frame->red_zone_size += frame->nregs * UNITS_PER_WORD; +- if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE) +- frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE; +- } +- else +- frame->red_zone_size = 0; +- frame->stack_pointer_offset -= frame->red_zone_size; ++ /* Align the stack. */ ++ insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx, ++ stack_pointer_rtx, ++ GEN_INT (-align_bytes))); ++ m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes); ++ m->fs.sp_realigned_offset = m->fs.sp_offset ++ - frame.stack_realign_allocate; ++ /* The stack pointer may no longer be equal to CFA - m->fs.sp_offset. ++ Beyond this point, stack access should be done via choose_baseaddr or ++ by using sp_valid_at and fp_valid_at to determine the correct base ++ register. Henceforth, any CFA offset should be thought of as logical ++ and not physical. */ ++ gcc_assert (m->fs.sp_realigned_offset >= m->fs.sp_realigned_fp_last); ++ gcc_assert (m->fs.sp_realigned_offset == frame.stack_realign_offset); ++ m->fs.sp_realigned = true; + +- /* The SEH frame pointer location is near the bottom of the frame. +- This is enforced by the fact that the difference between the +- stack pointer and the frame pointer is limited to 240 bytes in +- the unwind data structure. */ +- if (TARGET_SEH) +- { +- HOST_WIDE_INT diff; ++ /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which ++ is needed to describe where a register is saved using a realigned ++ stack pointer, so we need to invalidate the stack pointer for that ++ target. */ ++ if (TARGET_SEH) ++ m->fs.sp_valid = false; + +- /* If we can leave the frame pointer where it is, do so. Also, returns +- the establisher frame for __builtin_frame_address (0). */ +- diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset; +- if (diff <= SEH_MAX_FRAME_SIZE +- && (diff > 240 || (diff & 15) != 0) +- && !crtl->accesses_prior_frames) ++ /* If SP offset is non-immediate after allocation of the stack frame, ++ then emit SSE saves or stub call prior to allocating the rest of the ++ stack frame. This is less efficient for the out-of-line stub because ++ we can't combine allocations across the call barrier, but it's better ++ than using a scratch register. */ ++ else if (!x86_64_immediate_operand (GEN_INT (frame.stack_pointer_offset ++ - m->fs.sp_realigned_offset), ++ Pmode)) + { +- /* Ideally we'd determine what portion of the local stack frame +- (within the constraint of the lowest 240) is most heavily used. +- But without that complication, simply bias the frame pointer +- by 128 bytes so as to maximize the amount of the local stack +- frame that is addressable with 8-bit offsets. */ +- frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128; ++ if (!sse_registers_saved) ++ { ++ ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset); ++ sse_registers_saved = true; ++ } ++ else if (save_stub_call_needed) ++ { ++ ix86_emit_outlined_ms2sysv_save (frame); ++ save_stub_call_needed = false; ++ } + } + } +-} +- +-/* This is semi-inlined memory_address_length, but simplified +- since we know that we're always dealing with reg+offset, and +- to avoid having to create and discard all that rtl. */ + +-static inline int +-choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset) +-{ +- int len = 4; ++ allocate = frame.stack_pointer_offset - m->fs.sp_offset; + +- if (offset == 0) ++ if (flag_stack_usage_info) + { +- /* EBP and R13 cannot be encoded without an offset. */ +- len = (regno == BP_REG || regno == R13_REG); +- } +- else if (IN_RANGE (offset, -128, 127)) +- len = 1; ++ /* We start to count from ARG_POINTER. */ ++ HOST_WIDE_INT stack_size = frame.stack_pointer_offset; + +- /* ESP and R12 must be encoded with a SIB byte. */ +- if (regno == SP_REG || regno == R12_REG) +- len++; ++ /* If it was realigned, take into account the fake frame. */ ++ if (stack_realign_drap) ++ { ++ if (ix86_static_chain_on_stack) ++ stack_size += UNITS_PER_WORD; + +- return len; +-} ++ if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg))) ++ stack_size += UNITS_PER_WORD; + +-/* Determine if the stack pointer is valid for accessing the CFA_OFFSET in +- the frame save area. The register is saved at CFA - CFA_OFFSET. */ ++ /* This over-estimates by 1 minimal-stack-alignment-unit but ++ mitigates that by counting in the new return address slot. */ ++ current_function_dynamic_stack_size ++ += crtl->stack_alignment_needed / BITS_PER_UNIT; ++ } + +-static bool +-sp_valid_at (HOST_WIDE_INT cfa_offset) +-{ +- const struct machine_frame_state &fs = cfun->machine->fs; +- if (fs.sp_realigned && cfa_offset <= fs.sp_realigned_offset) +- { +- /* Validate that the cfa_offset isn't in a "no-man's land". */ +- gcc_assert (cfa_offset <= fs.sp_realigned_fp_last); +- return false; ++ current_function_static_stack_size = stack_size; + } +- return fs.sp_valid; +-} + +-/* Determine if the frame pointer is valid for accessing the CFA_OFFSET in +- the frame save area. The register is saved at CFA - CFA_OFFSET. */ +- +-static inline bool +-fp_valid_at (HOST_WIDE_INT cfa_offset) +-{ +- const struct machine_frame_state &fs = cfun->machine->fs; +- if (fs.sp_realigned && cfa_offset > fs.sp_realigned_fp_last) ++ /* On SEH target with very large frame size, allocate an area to save ++ SSE registers (as the very large allocation won't be described). */ ++ if (TARGET_SEH ++ && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE ++ && !sse_registers_saved) + { +- /* Validate that the cfa_offset isn't in a "no-man's land". */ +- gcc_assert (cfa_offset >= fs.sp_realigned_offset); +- return false; +- } +- return fs.fp_valid; +-} +- +-/* Choose a base register based upon alignment requested, speed and/or +- size. */ +- +-static void +-choose_basereg (HOST_WIDE_INT cfa_offset, rtx &base_reg, +- HOST_WIDE_INT &base_offset, +- unsigned int align_reqested, unsigned int *align) +-{ +- const struct machine_function *m = cfun->machine; +- unsigned int hfp_align; +- unsigned int drap_align; +- unsigned int sp_align; +- bool hfp_ok = fp_valid_at (cfa_offset); +- bool drap_ok = m->fs.drap_valid; +- bool sp_ok = sp_valid_at (cfa_offset); +- +- hfp_align = drap_align = sp_align = INCOMING_STACK_BOUNDARY; ++ HOST_WIDE_INT sse_size ++ = frame.sse_reg_save_offset - frame.reg_save_offset; + +- /* Filter out any registers that don't meet the requested alignment +- criteria. */ +- if (align_reqested) +- { +- if (m->fs.realigned) +- hfp_align = drap_align = sp_align = crtl->stack_alignment_needed; +- /* SEH unwind code does do not currently support REG_CFA_EXPRESSION +- notes (which we would need to use a realigned stack pointer), +- so disable on SEH targets. */ +- else if (m->fs.sp_realigned) +- sp_align = crtl->stack_alignment_needed; ++ gcc_assert (int_registers_saved); + +- hfp_ok = hfp_ok && hfp_align >= align_reqested; +- drap_ok = drap_ok && drap_align >= align_reqested; +- sp_ok = sp_ok && sp_align >= align_reqested; ++ /* No need to do stack checking as the area will be immediately ++ written. */ ++ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, ++ GEN_INT (-sse_size), -1, ++ m->fs.cfa_reg == stack_pointer_rtx); ++ allocate -= sse_size; ++ ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset); ++ sse_registers_saved = true; + } + +- if (m->use_fast_prologue_epilogue) ++ /* The stack has already been decremented by the instruction calling us ++ so probe if the size is non-negative to preserve the protection area. */ ++ if (allocate >= 0 ++ && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK ++ || flag_stack_clash_protection)) + { +- /* Choose the base register most likely to allow the most scheduling +- opportunities. Generally FP is valid throughout the function, +- while DRAP must be reloaded within the epilogue. But choose either +- over the SP due to increased encoding size. */ +- +- if (hfp_ok) ++ if (flag_stack_clash_protection) + { +- base_reg = hard_frame_pointer_rtx; +- base_offset = m->fs.fp_offset - cfa_offset; ++ ix86_adjust_stack_and_probe_stack_clash (allocate, ++ int_registers_saved); ++ allocate = 0; + } +- else if (drap_ok) ++ else if (STACK_CHECK_MOVING_SP) + { +- base_reg = crtl->drap_reg; +- base_offset = 0 - cfa_offset; ++ if (!(crtl->is_leaf && !cfun->calls_alloca ++ && allocate <= get_probe_interval ())) ++ { ++ ix86_adjust_stack_and_probe (allocate, int_registers_saved); ++ allocate = 0; ++ } + } +- else if (sp_ok) ++ else + { +- base_reg = stack_pointer_rtx; +- base_offset = m->fs.sp_offset - cfa_offset; ++ HOST_WIDE_INT size = allocate; ++ ++ if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000)) ++ size = 0x80000000 - get_stack_check_protect () - 1; ++ ++ if (TARGET_STACK_PROBE) ++ { ++ if (crtl->is_leaf && !cfun->calls_alloca) ++ { ++ if (size > get_probe_interval ()) ++ ix86_emit_probe_stack_range (0, size, int_registers_saved); ++ } ++ else ++ ix86_emit_probe_stack_range (0, ++ size + get_stack_check_protect (), ++ int_registers_saved); ++ } ++ else ++ { ++ if (crtl->is_leaf && !cfun->calls_alloca) ++ { ++ if (size > get_probe_interval () ++ && size > get_stack_check_protect ()) ++ ix86_emit_probe_stack_range (get_stack_check_protect (), ++ (size ++ - get_stack_check_protect ()), ++ int_registers_saved); ++ } ++ else ++ ix86_emit_probe_stack_range (get_stack_check_protect (), size, ++ int_registers_saved); ++ } + } + } ++ ++ if (allocate == 0) ++ ; ++ else if (!ix86_target_stack_probe () ++ || frame.stack_pointer_offset < CHECK_STACK_LIMIT) ++ { ++ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, ++ GEN_INT (-allocate), -1, ++ m->fs.cfa_reg == stack_pointer_rtx); ++ } + else + { +- HOST_WIDE_INT toffset; +- int len = 16, tlen; ++ rtx eax = gen_rtx_REG (Pmode, AX_REG); ++ rtx r10 = NULL; ++ rtx (*adjust_stack_insn)(rtx, rtx, rtx); ++ const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx); ++ bool eax_live = ix86_eax_live_at_start_p (); ++ bool r10_live = false; + +- /* Choose the base register with the smallest address encoding. +- With a tie, choose FP > DRAP > SP. */ +- if (sp_ok) +- { +- base_reg = stack_pointer_rtx; +- base_offset = m->fs.sp_offset - cfa_offset; +- len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset); +- } +- if (drap_ok) ++ if (TARGET_64BIT) ++ r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0); ++ ++ if (eax_live) + { +- toffset = 0 - cfa_offset; +- tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset); +- if (tlen <= len) ++ insn = emit_insn (gen_push (eax)); ++ allocate -= UNITS_PER_WORD; ++ /* Note that SEH directives need to continue tracking the stack ++ pointer even after the frame pointer has been set up. */ ++ if (sp_is_cfa_reg || TARGET_SEH) + { +- base_reg = crtl->drap_reg; +- base_offset = toffset; +- len = tlen; ++ if (sp_is_cfa_reg) ++ m->fs.cfa_offset += UNITS_PER_WORD; ++ RTX_FRAME_RELATED_P (insn) = 1; ++ add_reg_note (insn, REG_FRAME_RELATED_EXPR, ++ gen_rtx_SET (stack_pointer_rtx, ++ plus_constant (Pmode, stack_pointer_rtx, ++ -UNITS_PER_WORD))); + } + } +- if (hfp_ok) ++ ++ if (r10_live) + { +- toffset = m->fs.fp_offset - cfa_offset; +- tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset); +- if (tlen <= len) ++ r10 = gen_rtx_REG (Pmode, R10_REG); ++ insn = emit_insn (gen_push (r10)); ++ allocate -= UNITS_PER_WORD; ++ if (sp_is_cfa_reg || TARGET_SEH) + { +- base_reg = hard_frame_pointer_rtx; +- base_offset = toffset; ++ if (sp_is_cfa_reg) ++ m->fs.cfa_offset += UNITS_PER_WORD; ++ RTX_FRAME_RELATED_P (insn) = 1; ++ add_reg_note (insn, REG_FRAME_RELATED_EXPR, ++ gen_rtx_SET (stack_pointer_rtx, ++ plus_constant (Pmode, stack_pointer_rtx, ++ -UNITS_PER_WORD))); + } + } +- } + +- /* Set the align return value. */ +- if (align) +- { +- if (base_reg == stack_pointer_rtx) +- *align = sp_align; +- else if (base_reg == crtl->drap_reg) +- *align = drap_align; +- else if (base_reg == hard_frame_pointer_rtx) +- *align = hfp_align; +- } +-} ++ emit_move_insn (eax, GEN_INT (allocate)); ++ emit_insn (ix86_gen_allocate_stack_worker (eax, eax)); + +-/* Return an RTX that points to CFA_OFFSET within the stack frame and +- the alignment of address. If ALIGN is non-null, it should point to +- an alignment value (in bits) that is preferred or zero and will +- recieve the alignment of the base register that was selected, +- irrespective of rather or not CFA_OFFSET is a multiple of that +- alignment value. If it is possible for the base register offset to be +- non-immediate then SCRATCH_REGNO should specify a scratch register to +- use. ++ /* Use the fact that AX still contains ALLOCATE. */ ++ adjust_stack_insn = (Pmode == DImode ++ ? gen_pro_epilogue_adjust_stack_di_sub ++ : gen_pro_epilogue_adjust_stack_si_sub); + +- The valid base registers are taken from CFUN->MACHINE->FS. */ ++ insn = emit_insn (adjust_stack_insn (stack_pointer_rtx, ++ stack_pointer_rtx, eax)); + +-static rtx +-choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align, +- unsigned int scratch_regno = INVALID_REGNUM) +-{ +- rtx base_reg = NULL; +- HOST_WIDE_INT base_offset = 0; ++ if (sp_is_cfa_reg || TARGET_SEH) ++ { ++ if (sp_is_cfa_reg) ++ m->fs.cfa_offset += allocate; ++ RTX_FRAME_RELATED_P (insn) = 1; ++ add_reg_note (insn, REG_FRAME_RELATED_EXPR, ++ gen_rtx_SET (stack_pointer_rtx, ++ plus_constant (Pmode, stack_pointer_rtx, ++ -allocate))); ++ } ++ m->fs.sp_offset += allocate; + +- /* If a specific alignment is requested, try to get a base register +- with that alignment first. */ +- if (align && *align) +- choose_basereg (cfa_offset, base_reg, base_offset, *align, align); ++ /* Use stack_pointer_rtx for relative addressing so that code works for ++ realigned stack. But this means that we need a blockage to prevent ++ stores based on the frame pointer from being scheduled before. */ ++ if (r10_live && eax_live) ++ { ++ t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax); ++ emit_move_insn (gen_rtx_REG (word_mode, R10_REG), ++ gen_frame_mem (word_mode, t)); ++ t = plus_constant (Pmode, t, UNITS_PER_WORD); ++ emit_move_insn (gen_rtx_REG (word_mode, AX_REG), ++ gen_frame_mem (word_mode, t)); ++ emit_insn (gen_memory_blockage ()); ++ } ++ else if (eax_live || r10_live) ++ { ++ t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax); ++ emit_move_insn (gen_rtx_REG (word_mode, ++ (eax_live ? AX_REG : R10_REG)), ++ gen_frame_mem (word_mode, t)); ++ emit_insn (gen_memory_blockage ()); ++ } ++ } ++ gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset); + +- if (!base_reg) +- choose_basereg (cfa_offset, base_reg, base_offset, 0, align); ++ /* If we havn't already set up the frame pointer, do so now. */ ++ if (frame_pointer_needed && !m->fs.fp_valid) ++ { ++ insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx, ++ GEN_INT (frame.stack_pointer_offset ++ - frame.hard_frame_pointer_offset)); ++ insn = emit_insn (insn); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL); + +- gcc_assert (base_reg != NULL); ++ if (m->fs.cfa_reg == stack_pointer_rtx) ++ m->fs.cfa_reg = hard_frame_pointer_rtx; ++ m->fs.fp_offset = frame.hard_frame_pointer_offset; ++ m->fs.fp_valid = true; ++ } + +- rtx base_offset_rtx = GEN_INT (base_offset); ++ if (!int_registers_saved) ++ ix86_emit_save_regs_using_mov (frame.reg_save_offset); ++ if (!sse_registers_saved) ++ ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset); ++ else if (save_stub_call_needed) ++ ix86_emit_outlined_ms2sysv_save (frame); + +- if (!x86_64_immediate_operand (base_offset_rtx, Pmode)) ++ /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT ++ in PROLOGUE. */ ++ if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry) + { +- gcc_assert (scratch_regno != INVALID_REGNUM); +- +- rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno); +- emit_move_insn (scratch_reg, base_offset_rtx); +- +- return gen_rtx_PLUS (Pmode, base_reg, scratch_reg); ++ rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM); ++ insn = emit_insn (gen_set_got (pic)); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX); ++ emit_insn (gen_prologue_use (pic)); ++ /* Deleting already emmitted SET_GOT if exist and allocated to ++ REAL_PIC_OFFSET_TABLE_REGNUM. */ ++ ix86_elim_entry_set_got (pic); + } + +- return plus_constant (Pmode, base_reg, base_offset); +-} +- +-/* Emit code to save registers in the prologue. */ ++ if (crtl->drap_reg && !crtl->stack_realign_needed) ++ { ++ /* vDRAP is setup but after reload it turns out stack realign ++ isn't necessary, here we will emit prologue to setup DRAP ++ without stack realign adjustment */ ++ t = choose_baseaddr (0, NULL); ++ emit_insn (gen_rtx_SET (crtl->drap_reg, t)); ++ } + +-static void +-ix86_emit_save_regs (void) +-{ +- unsigned int regno; +- rtx_insn *insn; ++ /* Prevent instructions from being scheduled into register save push ++ sequence when access to the redzone area is done through frame pointer. ++ The offset between the frame pointer and the stack pointer is calculated ++ relative to the value of the stack pointer at the end of the function ++ prologue, and moving instructions that access redzone area via frame ++ pointer inside push sequence violates this assumption. */ ++ if (frame_pointer_needed && frame.red_zone_size) ++ emit_insn (gen_memory_blockage ()); + +- for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; ) +- if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true)) +- { +- insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno))); +- RTX_FRAME_RELATED_P (insn) = 1; +- } ++ /* SEH requires that the prologue end within 256 bytes of the start of ++ the function. Prevent instruction schedules that would extend that. ++ Further, prevent alloca modifications to the stack pointer from being ++ combined with prologue modifications. */ ++ if (TARGET_SEH) ++ emit_insn (gen_prologue_use (stack_pointer_rtx)); + } + +-/* Emit a single register save at CFA - CFA_OFFSET. */ ++/* Emit code to restore REG using a POP insn. */ + + static void +-ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno, +- HOST_WIDE_INT cfa_offset) ++ix86_emit_restore_reg_using_pop (rtx reg) + { + struct machine_function *m = cfun->machine; +- rtx reg = gen_rtx_REG (mode, regno); +- rtx mem, addr, base, insn; +- unsigned int align = GET_MODE_ALIGNMENT (mode); +- +- addr = choose_baseaddr (cfa_offset, &align); +- mem = gen_frame_mem (mode, addr); ++ rtx_insn *insn = emit_insn (gen_pop (reg)); + +- /* The location aligment depends upon the base register. */ +- align = MIN (GET_MODE_ALIGNMENT (mode), align); +- gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1))); +- set_mem_align (mem, align); ++ ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset); ++ m->fs.sp_offset -= UNITS_PER_WORD; + +- insn = emit_insn (gen_rtx_SET (mem, reg)); +- RTX_FRAME_RELATED_P (insn) = 1; ++ if (m->fs.cfa_reg == crtl->drap_reg ++ && REGNO (reg) == REGNO (crtl->drap_reg)) ++ { ++ /* Previously we'd represented the CFA as an expression ++ like *(%ebp - 8). We've just popped that value from ++ the stack, which means we need to reset the CFA to ++ the drap register. This will remain until we restore ++ the stack pointer. */ ++ add_reg_note (insn, REG_CFA_DEF_CFA, reg); ++ RTX_FRAME_RELATED_P (insn) = 1; + +- base = addr; +- if (GET_CODE (base) == PLUS) +- base = XEXP (base, 0); +- gcc_checking_assert (REG_P (base)); ++ /* This means that the DRAP register is valid for addressing too. */ ++ m->fs.drap_valid = true; ++ return; ++ } + +- /* When saving registers into a re-aligned local stack frame, avoid +- any tricky guessing by dwarf2out. */ +- if (m->fs.realigned) ++ if (m->fs.cfa_reg == stack_pointer_rtx) + { +- gcc_checking_assert (stack_realign_drap); ++ rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); ++ x = gen_rtx_SET (stack_pointer_rtx, x); ++ add_reg_note (insn, REG_CFA_ADJUST_CFA, x); ++ RTX_FRAME_RELATED_P (insn) = 1; + +- if (regno == REGNO (crtl->drap_reg)) +- { +- /* A bit of a hack. We force the DRAP register to be saved in +- the re-aligned stack frame, which provides us with a copy +- of the CFA that will last past the prologue. Install it. */ +- gcc_checking_assert (cfun->machine->fs.fp_valid); +- addr = plus_constant (Pmode, hard_frame_pointer_rtx, +- cfun->machine->fs.fp_offset - cfa_offset); +- mem = gen_rtx_MEM (mode, addr); +- add_reg_note (insn, REG_CFA_DEF_CFA, mem); +- } +- else +- { +- /* The frame pointer is a stable reference within the +- aligned frame. Use it. */ +- gcc_checking_assert (cfun->machine->fs.fp_valid); +- addr = plus_constant (Pmode, hard_frame_pointer_rtx, +- cfun->machine->fs.fp_offset - cfa_offset); +- mem = gen_rtx_MEM (mode, addr); +- add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg)); +- } ++ m->fs.cfa_offset -= UNITS_PER_WORD; + } + +- else if (base == stack_pointer_rtx && m->fs.sp_realigned +- && cfa_offset >= m->fs.sp_realigned_offset) ++ /* When the frame pointer is the CFA, and we pop it, we are ++ swapping back to the stack pointer as the CFA. This happens ++ for stack frames that don't allocate other data, so we assume ++ the stack pointer is now pointing at the return address, i.e. ++ the function entry state, which makes the offset be 1 word. */ ++ if (reg == hard_frame_pointer_rtx) + { +- gcc_checking_assert (stack_realign_fp); +- add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg)); ++ m->fs.fp_valid = false; ++ if (m->fs.cfa_reg == hard_frame_pointer_rtx) ++ { ++ m->fs.cfa_reg = stack_pointer_rtx; ++ m->fs.cfa_offset -= UNITS_PER_WORD; ++ ++ add_reg_note (insn, REG_CFA_DEF_CFA, ++ gen_rtx_PLUS (Pmode, stack_pointer_rtx, ++ GEN_INT (m->fs.cfa_offset))); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ } + } ++} + +- /* The memory may not be relative to the current CFA register, +- which means that we may need to generate a new pattern for +- use by the unwind info. */ +- else if (base != m->fs.cfa_reg) ++/* Emit code to restore saved registers using POP insns. */ ++ ++static void ++ix86_emit_restore_regs_using_pop (void) ++{ ++ unsigned int regno; ++ ++ for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) ++ if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true)) ++ ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno)); ++} ++ ++/* Emit code and notes for the LEAVE instruction. If insn is non-null, ++ omits the emit and only attaches the notes. */ ++ ++static void ++ix86_emit_leave (rtx_insn *insn) ++{ ++ struct machine_function *m = cfun->machine; ++ if (!insn) ++ insn = emit_insn (ix86_gen_leave ()); ++ ++ ix86_add_queued_cfa_restore_notes (insn); ++ ++ gcc_assert (m->fs.fp_valid); ++ m->fs.sp_valid = true; ++ m->fs.sp_realigned = false; ++ m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD; ++ m->fs.fp_valid = false; ++ ++ if (m->fs.cfa_reg == hard_frame_pointer_rtx) + { +- addr = plus_constant (Pmode, m->fs.cfa_reg, +- m->fs.cfa_offset - cfa_offset); +- mem = gen_rtx_MEM (mode, addr); +- add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg)); ++ m->fs.cfa_reg = stack_pointer_rtx; ++ m->fs.cfa_offset = m->fs.sp_offset; ++ ++ add_reg_note (insn, REG_CFA_DEF_CFA, ++ plus_constant (Pmode, stack_pointer_rtx, ++ m->fs.sp_offset)); ++ RTX_FRAME_RELATED_P (insn) = 1; + } ++ ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx, ++ m->fs.fp_offset); + } + +-/* Emit code to save registers using MOV insns. +- First register is stored at CFA - CFA_OFFSET. */ ++/* Emit code to restore saved registers using MOV insns. ++ First register is restored from CFA - CFA_OFFSET. */ + static void +-ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset) ++ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset, ++ bool maybe_eh_return) + { ++ struct machine_function *m = cfun->machine; + unsigned int regno; + + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) +- if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true)) ++ if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true)) + { +- ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset); ++ rtx reg = gen_rtx_REG (word_mode, regno); ++ rtx mem; ++ rtx_insn *insn; ++ ++ mem = choose_baseaddr (cfa_offset, NULL); ++ mem = gen_frame_mem (word_mode, mem); ++ insn = emit_move_insn (reg, mem); ++ ++ if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg)) ++ { ++ /* Previously we'd represented the CFA as an expression ++ like *(%ebp - 8). We've just popped that value from ++ the stack, which means we need to reset the CFA to ++ the drap register. This will remain until we restore ++ the stack pointer. */ ++ add_reg_note (insn, REG_CFA_DEF_CFA, reg); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ ++ /* This means that the DRAP register is valid for addressing. */ ++ m->fs.drap_valid = true; ++ } ++ else ++ ix86_add_cfa_restore_note (NULL, reg, cfa_offset); ++ + cfa_offset -= UNITS_PER_WORD; + } + } + +-/* Emit code to save SSE registers using MOV insns. +- First register is stored at CFA - CFA_OFFSET. */ ++/* Emit code to restore saved registers using MOV insns. ++ First register is restored from CFA - CFA_OFFSET. */ + static void +-ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset) ++ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset, ++ bool maybe_eh_return) + { + unsigned int regno; + + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) +- if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true)) ++ if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true)) + { +- ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset); +- cfa_offset -= GET_MODE_SIZE (V4SFmode); +- } +-} ++ rtx reg = gen_rtx_REG (V4SFmode, regno); ++ rtx mem; ++ unsigned int align = GET_MODE_ALIGNMENT (V4SFmode); + +-static GTY(()) rtx queued_cfa_restores; ++ mem = choose_baseaddr (cfa_offset, &align); ++ mem = gen_rtx_MEM (V4SFmode, mem); + +-/* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack +- manipulation insn. The value is on the stack at CFA - CFA_OFFSET. +- Don't add the note if the previously saved value will be left untouched +- within stack red-zone till return, as unwinders can find the same value +- in the register and on the stack. */ ++ /* The location aligment depends upon the base register. */ ++ align = MIN (GET_MODE_ALIGNMENT (V4SFmode), align); ++ gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1))); ++ set_mem_align (mem, align); ++ emit_insn (gen_rtx_SET (reg, mem)); + +-static void +-ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset) +-{ +- if (!crtl->shrink_wrapped +- && cfa_offset <= cfun->machine->fs.red_zone_offset) +- return; ++ ix86_add_cfa_restore_note (NULL, reg, cfa_offset); + +- if (insn) +- { +- add_reg_note (insn, REG_CFA_RESTORE, reg); +- RTX_FRAME_RELATED_P (insn) = 1; +- } +- else +- queued_cfa_restores +- = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores); ++ cfa_offset -= GET_MODE_SIZE (V4SFmode); ++ } + } + +-/* Add queued REG_CFA_RESTORE notes if any to INSN. */ +- + static void +-ix86_add_queued_cfa_restore_notes (rtx insn) +-{ +- rtx last; +- if (!queued_cfa_restores) +- return; +- for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1)) +- ; +- XEXP (last, 1) = REG_NOTES (insn); +- REG_NOTES (insn) = queued_cfa_restores; +- queued_cfa_restores = NULL_RTX; +- RTX_FRAME_RELATED_P (insn) = 1; +-} +- +-/* Expand prologue or epilogue stack adjustment. +- The pattern exist to put a dependency on all ebp-based memory accesses. +- STYLE should be negative if instructions should be marked as frame related, +- zero if %r11 register is live and cannot be freely used and positive +- otherwise. */ +- +-static rtx +-pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, +- int style, bool set_cfa) ++ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame &frame, ++ bool use_call, int style) + { + struct machine_function *m = cfun->machine; +- rtx insn; +- bool add_frame_related_expr = false; ++ const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS ++ + m->call_ms2sysv_extra_regs; ++ rtvec v; ++ unsigned int elems_needed, align, i, vi = 0; ++ rtx_insn *insn; ++ rtx sym, tmp; ++ rtx rsi = gen_rtx_REG (word_mode, SI_REG); ++ rtx r10 = NULL_RTX; ++ const struct xlogue_layout &xlogue = xlogue_layout::get_instance (); ++ HOST_WIDE_INT stub_ptr_offset = xlogue.get_stub_ptr_offset (); ++ HOST_WIDE_INT rsi_offset = frame.stack_realign_offset + stub_ptr_offset; ++ rtx rsi_frame_load = NULL_RTX; ++ HOST_WIDE_INT rsi_restore_offset = (HOST_WIDE_INT)-1; ++ enum xlogue_stub stub; + +- if (Pmode == SImode) +- insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset); +- else if (x86_64_immediate_operand (offset, DImode)) +- insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset); +- else +- { +- rtx tmp; +- /* r11 is used by indirect sibcall return as well, set before the +- epilogue and used after the epilogue. */ +- if (style) +- tmp = gen_rtx_REG (DImode, R11_REG); +- else +- { +- gcc_assert (src != hard_frame_pointer_rtx +- && dest != hard_frame_pointer_rtx); +- tmp = hard_frame_pointer_rtx; +- } +- insn = emit_insn (gen_rtx_SET (tmp, offset)); +- if (style < 0) +- add_frame_related_expr = true; ++ gcc_assert (!m->fs.fp_valid || frame_pointer_needed); + +- insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp); +- } ++ /* If using a realigned stack, we should never start with padding. */ ++ gcc_assert (!stack_realign_fp || !xlogue.get_stack_align_off_in ()); + +- insn = emit_insn (insn); +- if (style >= 0) +- ix86_add_queued_cfa_restore_notes (insn); ++ /* Setup RSI as the stub's base pointer. */ ++ align = GET_MODE_ALIGNMENT (V4SFmode); ++ tmp = choose_baseaddr (rsi_offset, &align, SI_REG); ++ gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode)); + +- if (set_cfa) +- { +- rtx r; ++ emit_insn (gen_rtx_SET (rsi, tmp)); + +- gcc_assert (m->fs.cfa_reg == src); +- m->fs.cfa_offset += INTVAL (offset); +- m->fs.cfa_reg = dest; ++ /* Get a symbol for the stub. */ ++ if (frame_pointer_needed) ++ stub = use_call ? XLOGUE_STUB_RESTORE_HFP ++ : XLOGUE_STUB_RESTORE_HFP_TAIL; ++ else ++ stub = use_call ? XLOGUE_STUB_RESTORE ++ : XLOGUE_STUB_RESTORE_TAIL; ++ sym = xlogue.get_stub_rtx (stub); + +- r = gen_rtx_PLUS (Pmode, src, offset); +- r = gen_rtx_SET (dest, r); +- add_reg_note (insn, REG_CFA_ADJUST_CFA, r); +- RTX_FRAME_RELATED_P (insn) = 1; +- } +- else if (style < 0) ++ elems_needed = ncregs; ++ if (use_call) ++ elems_needed += 1; ++ else ++ elems_needed += frame_pointer_needed ? 5 : 3; ++ v = rtvec_alloc (elems_needed); ++ ++ /* We call the epilogue stub when we need to pop incoming args or we are ++ doing a sibling call as the tail. Otherwise, we will emit a jmp to the ++ epilogue stub and it is the tail-call. */ ++ if (use_call) ++ RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym); ++ else + { +- RTX_FRAME_RELATED_P (insn) = 1; +- if (add_frame_related_expr) ++ RTVEC_ELT (v, vi++) = ret_rtx; ++ RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym); ++ if (frame_pointer_needed) + { +- rtx r = gen_rtx_PLUS (Pmode, src, offset); +- r = gen_rtx_SET (dest, r); +- add_reg_note (insn, REG_FRAME_RELATED_EXPR, r); ++ rtx rbp = gen_rtx_REG (DImode, BP_REG); ++ gcc_assert (m->fs.fp_valid); ++ gcc_assert (m->fs.cfa_reg == hard_frame_pointer_rtx); ++ ++ tmp = gen_rtx_PLUS (DImode, rbp, GEN_INT (8)); ++ RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, tmp); ++ RTVEC_ELT (v, vi++) = gen_rtx_SET (rbp, gen_rtx_MEM (DImode, rbp)); ++ tmp = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode)); ++ RTVEC_ELT (v, vi++) = gen_rtx_CLOBBER (VOIDmode, tmp); ++ } ++ else ++ { ++ /* If no hard frame pointer, we set R10 to the SP restore value. */ ++ gcc_assert (!m->fs.fp_valid); ++ gcc_assert (m->fs.cfa_reg == stack_pointer_rtx); ++ gcc_assert (m->fs.sp_valid); ++ ++ r10 = gen_rtx_REG (DImode, R10_REG); ++ tmp = gen_rtx_PLUS (Pmode, rsi, GEN_INT (stub_ptr_offset)); ++ emit_insn (gen_rtx_SET (r10, tmp)); ++ ++ RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, r10); + } + } + +- if (dest == stack_pointer_rtx) ++ /* Generate frame load insns and restore notes. */ ++ for (i = 0; i < ncregs; ++i) + { +- HOST_WIDE_INT ooffset = m->fs.sp_offset; +- bool valid = m->fs.sp_valid; +- bool realigned = m->fs.sp_realigned; ++ const xlogue_layout::reginfo &r = xlogue.get_reginfo (i); ++ machine_mode mode = SSE_REGNO_P (r.regno) ? V4SFmode : word_mode; ++ rtx reg, frame_load; + +- if (src == hard_frame_pointer_rtx) +- { +- valid = m->fs.fp_valid; +- realigned = false; +- ooffset = m->fs.fp_offset; +- } +- else if (src == crtl->drap_reg) ++ reg = gen_rtx_REG (mode, r.regno); ++ frame_load = gen_frame_load (reg, rsi, r.offset); ++ ++ /* Save RSI frame load insn & note to add last. */ ++ if (r.regno == SI_REG) + { +- valid = m->fs.drap_valid; +- realigned = false; +- ooffset = 0; ++ gcc_assert (!rsi_frame_load); ++ rsi_frame_load = frame_load; ++ rsi_restore_offset = r.offset; + } + else + { +- /* Else there are two possibilities: SP itself, which we set +- up as the default above. Or EH_RETURN_STACKADJ_RTX, which is +- taken care of this by hand along the eh_return path. */ +- gcc_checking_assert (src == stack_pointer_rtx +- || offset == const0_rtx); ++ RTVEC_ELT (v, vi++) = frame_load; ++ ix86_add_cfa_restore_note (NULL, reg, r.offset); + } +- +- m->fs.sp_offset = ooffset - INTVAL (offset); +- m->fs.sp_valid = valid; +- m->fs.sp_realigned = realigned; + } +- return insn; +-} +- +-/* Find an available register to be used as dynamic realign argument +- pointer regsiter. Such a register will be written in prologue and +- used in begin of body, so it must not be +- 1. parameter passing register. +- 2. GOT pointer. +- We reuse static-chain register if it is available. Otherwise, we +- use DI for i386 and R13 for x86-64. We chose R13 since it has +- shorter encoding. +- +- Return: the regno of chosen register. */ + +-static unsigned int +-find_drap_reg (void) +-{ +- tree decl = cfun->decl; ++ /* Add RSI frame load & restore note at the end. */ ++ gcc_assert (rsi_frame_load); ++ gcc_assert (rsi_restore_offset != (HOST_WIDE_INT)-1); ++ RTVEC_ELT (v, vi++) = rsi_frame_load; ++ ix86_add_cfa_restore_note (NULL, gen_rtx_REG (DImode, SI_REG), ++ rsi_restore_offset); + +- /* Always use callee-saved register if there are no caller-saved +- registers. */ +- if (TARGET_64BIT) ++ /* Finally, for tail-call w/o a hard frame pointer, set SP to R10. */ ++ if (!use_call && !frame_pointer_needed) + { +- /* Use R13 for nested function or function need static chain. +- Since function with tail call may use any caller-saved +- registers in epilogue, DRAP must not use caller-saved +- register in such case. */ +- if (DECL_STATIC_CHAIN (decl) +- || cfun->machine->no_caller_saved_registers +- || crtl->tail_call_emit) +- return R13_REG; ++ gcc_assert (m->fs.sp_valid); ++ gcc_assert (!m->fs.sp_realigned); + +- return R10_REG; ++ /* At this point, R10 should point to frame.stack_realign_offset. */ ++ if (m->fs.cfa_reg == stack_pointer_rtx) ++ m->fs.cfa_offset += m->fs.sp_offset - frame.stack_realign_offset; ++ m->fs.sp_offset = frame.stack_realign_offset; + } ++ ++ gcc_assert (vi == (unsigned int)GET_NUM_ELEM (v)); ++ tmp = gen_rtx_PARALLEL (VOIDmode, v); ++ if (use_call) ++ insn = emit_insn (tmp); + else + { +- /* Use DI for nested function or function need static chain. +- Since function with tail call may use any caller-saved +- registers in epilogue, DRAP must not use caller-saved +- register in such case. */ +- if (DECL_STATIC_CHAIN (decl) +- || cfun->machine->no_caller_saved_registers +- || crtl->tail_call_emit) +- return DI_REG; ++ insn = emit_jump_insn (tmp); ++ JUMP_LABEL (insn) = ret_rtx; + +- /* Reuse static chain register if it isn't used for parameter +- passing. */ +- if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2) ++ if (frame_pointer_needed) ++ ix86_emit_leave (insn); ++ else + { +- unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl)); +- if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0) +- return CX_REG; ++ /* Need CFA adjust note. */ ++ tmp = gen_rtx_SET (stack_pointer_rtx, r10); ++ add_reg_note (insn, REG_CFA_ADJUST_CFA, tmp); + } +- return DI_REG; + } +-} + +-/* Handle a "force_align_arg_pointer" attribute. */ ++ RTX_FRAME_RELATED_P (insn) = true; ++ ix86_add_queued_cfa_restore_notes (insn); + +-static tree +-ix86_handle_force_align_arg_pointer_attribute (tree *node, tree name, +- tree, int, bool *no_add_attrs) +-{ +- if (TREE_CODE (*node) != FUNCTION_TYPE +- && TREE_CODE (*node) != METHOD_TYPE +- && TREE_CODE (*node) != FIELD_DECL +- && TREE_CODE (*node) != TYPE_DECL) ++ /* If we're not doing a tail-call, we need to adjust the stack. */ ++ if (use_call && m->fs.sp_valid) + { +- warning (OPT_Wattributes, "%qE attribute only applies to functions", +- name); +- *no_add_attrs = true; ++ HOST_WIDE_INT dealloc = m->fs.sp_offset - frame.stack_realign_offset; ++ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, ++ GEN_INT (dealloc), style, ++ m->fs.cfa_reg == stack_pointer_rtx); + } +- +- return NULL_TREE; + } + +-/* Return minimum incoming stack alignment. */ ++/* Restore function stack, frame, and registers. */ + +-static unsigned int +-ix86_minimum_incoming_stack_boundary (bool sibcall) ++void ++ix86_expand_epilogue (int style) + { +- unsigned int incoming_stack_boundary; +- +- /* Stack of interrupt handler is aligned to 128 bits in 64bit mode. */ +- if (cfun->machine->func_type != TYPE_NORMAL) +- incoming_stack_boundary = TARGET_64BIT ? 128 : MIN_STACK_BOUNDARY; +- /* Prefer the one specified at command line. */ +- else if (ix86_user_incoming_stack_boundary) +- incoming_stack_boundary = ix86_user_incoming_stack_boundary; +- /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary +- if -mstackrealign is used, it isn't used for sibcall check and +- estimated stack alignment is 128bit. */ +- else if (!sibcall +- && ix86_force_align_arg_pointer +- && crtl->stack_alignment_estimated == 128) +- incoming_stack_boundary = MIN_STACK_BOUNDARY; +- else +- incoming_stack_boundary = ix86_default_incoming_stack_boundary; ++ struct machine_function *m = cfun->machine; ++ struct machine_frame_state frame_state_save = m->fs; ++ bool restore_regs_via_mov; ++ bool using_drap; ++ bool restore_stub_is_tail = false; + +- /* Incoming stack alignment can be changed on individual functions +- via force_align_arg_pointer attribute. We use the smallest +- incoming stack boundary. */ +- if (incoming_stack_boundary > MIN_STACK_BOUNDARY +- && lookup_attribute (ix86_force_align_arg_pointer_string, +- TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))) +- incoming_stack_boundary = MIN_STACK_BOUNDARY; ++ if (ix86_function_naked (current_function_decl)) ++ { ++ /* The program should not reach this point. */ ++ emit_insn (gen_ud2 ()); ++ return; ++ } + +- /* The incoming stack frame has to be aligned at least at +- parm_stack_boundary. */ +- if (incoming_stack_boundary < crtl->parm_stack_boundary) +- incoming_stack_boundary = crtl->parm_stack_boundary; ++ ix86_finalize_stack_frame_flags (); ++ const struct ix86_frame &frame = cfun->machine->frame; + +- /* Stack at entrance of main is aligned by runtime. We use the +- smallest incoming stack boundary. */ +- if (incoming_stack_boundary > MAIN_STACK_BOUNDARY +- && DECL_NAME (current_function_decl) +- && MAIN_NAME_P (DECL_NAME (current_function_decl)) +- && DECL_FILE_SCOPE_P (current_function_decl)) +- incoming_stack_boundary = MAIN_STACK_BOUNDARY; ++ m->fs.sp_realigned = stack_realign_fp; ++ m->fs.sp_valid = stack_realign_fp ++ || !frame_pointer_needed ++ || crtl->sp_is_unchanging; ++ gcc_assert (!m->fs.sp_valid ++ || m->fs.sp_offset == frame.stack_pointer_offset); + +- return incoming_stack_boundary; +-} ++ /* The FP must be valid if the frame pointer is present. */ ++ gcc_assert (frame_pointer_needed == m->fs.fp_valid); ++ gcc_assert (!m->fs.fp_valid ++ || m->fs.fp_offset == frame.hard_frame_pointer_offset); + +-/* Update incoming stack boundary and estimated stack alignment. */ ++ /* We must have *some* valid pointer to the stack frame. */ ++ gcc_assert (m->fs.sp_valid || m->fs.fp_valid); + +-static void +-ix86_update_stack_boundary (void) +-{ +- ix86_incoming_stack_boundary +- = ix86_minimum_incoming_stack_boundary (false); ++ /* The DRAP is never valid at this point. */ ++ gcc_assert (!m->fs.drap_valid); + +- /* x86_64 vararg needs 16byte stack alignment for register save area. */ +- if (TARGET_64BIT +- && cfun->stdarg +- && crtl->stack_alignment_estimated < 128) +- crtl->stack_alignment_estimated = 128; ++ /* See the comment about red zone and frame ++ pointer usage in ix86_expand_prologue. */ ++ if (frame_pointer_needed && frame.red_zone_size) ++ emit_insn (gen_memory_blockage ()); + +- /* __tls_get_addr needs to be called with 16-byte aligned stack. */ +- if (ix86_tls_descriptor_calls_expanded_in_cfun +- && crtl->preferred_stack_boundary < 128) +- crtl->preferred_stack_boundary = 128; +-} ++ using_drap = crtl->drap_reg && crtl->stack_realign_needed; ++ gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg); + +-/* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is +- needed or an rtx for DRAP otherwise. */ ++ /* Determine the CFA offset of the end of the red-zone. */ ++ m->fs.red_zone_offset = 0; ++ if (ix86_using_red_zone () && crtl->args.pops_args < 65536) ++ { ++ /* The red-zone begins below return address and error code in ++ exception handler. */ ++ m->fs.red_zone_offset = RED_ZONE_SIZE + INCOMING_FRAME_SP_OFFSET; + +-static rtx +-ix86_get_drap_rtx (void) +-{ +- /* We must use DRAP if there are outgoing arguments on stack and +- ACCUMULATE_OUTGOING_ARGS is false. */ +- if (ix86_force_drap +- || (cfun->machine->outgoing_args_on_stack +- && !ACCUMULATE_OUTGOING_ARGS)) +- crtl->need_drap = true; ++ /* When the register save area is in the aligned portion of ++ the stack, determine the maximum runtime displacement that ++ matches up with the aligned frame. */ ++ if (stack_realign_drap) ++ m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT ++ + UNITS_PER_WORD); ++ } + +- if (stack_realign_drap) +- { +- /* Assign DRAP to vDRAP and returns vDRAP */ +- unsigned int regno = find_drap_reg (); +- rtx drap_vreg; +- rtx arg_ptr; +- rtx_insn *seq, *insn; ++ HOST_WIDE_INT reg_save_offset = frame.reg_save_offset; + +- arg_ptr = gen_rtx_REG (Pmode, regno); +- crtl->drap_reg = arg_ptr; ++ /* Special care must be taken for the normal return case of a function ++ using eh_return: the eax and edx registers are marked as saved, but ++ not restored along this path. Adjust the save location to match. */ ++ if (crtl->calls_eh_return && style != 2) ++ reg_save_offset -= 2 * UNITS_PER_WORD; + +- start_sequence (); +- drap_vreg = copy_to_reg (arg_ptr); +- seq = get_insns (); +- end_sequence (); ++ /* EH_RETURN requires the use of moves to function properly. */ ++ if (crtl->calls_eh_return) ++ restore_regs_via_mov = true; ++ /* SEH requires the use of pops to identify the epilogue. */ ++ else if (TARGET_SEH) ++ restore_regs_via_mov = false; ++ /* If we're only restoring one register and sp cannot be used then ++ using a move instruction to restore the register since it's ++ less work than reloading sp and popping the register. */ ++ else if (!sp_valid_at (frame.hfp_save_offset) && frame.nregs <= 1) ++ restore_regs_via_mov = true; ++ else if (TARGET_EPILOGUE_USING_MOVE ++ && cfun->machine->use_fast_prologue_epilogue ++ && (frame.nregs > 1 ++ || m->fs.sp_offset != reg_save_offset)) ++ restore_regs_via_mov = true; ++ else if (frame_pointer_needed ++ && !frame.nregs ++ && m->fs.sp_offset != reg_save_offset) ++ restore_regs_via_mov = true; ++ else if (frame_pointer_needed ++ && TARGET_USE_LEAVE ++ && cfun->machine->use_fast_prologue_epilogue ++ && frame.nregs == 1) ++ restore_regs_via_mov = true; ++ else ++ restore_regs_via_mov = false; + +- insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ())); +- if (!optimize) ++ if (restore_regs_via_mov || frame.nsseregs) ++ { ++ /* Ensure that the entire register save area is addressable via ++ the stack pointer, if we will restore SSE regs via sp. */ ++ if (TARGET_64BIT ++ && m->fs.sp_offset > 0x7fffffff ++ && sp_valid_at (frame.stack_realign_offset + 1) ++ && (frame.nsseregs + frame.nregs) != 0) + { +- add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg); +- RTX_FRAME_RELATED_P (insn) = 1; ++ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, ++ GEN_INT (m->fs.sp_offset ++ - frame.sse_reg_save_offset), ++ style, ++ m->fs.cfa_reg == stack_pointer_rtx); + } +- return drap_vreg; + } +- else +- return NULL; +-} +- +-/* Handle the TARGET_INTERNAL_ARG_POINTER hook. */ + +-static rtx +-ix86_internal_arg_pointer (void) +-{ +- return virtual_incoming_args_rtx; +-} ++ /* If there are any SSE registers to restore, then we have to do it ++ via moves, since there's obviously no pop for SSE regs. */ ++ if (frame.nsseregs) ++ ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset, ++ style == 2); + +-struct scratch_reg { +- rtx reg; +- bool saved; +-}; ++ if (m->call_ms2sysv) ++ { ++ int pop_incoming_args = crtl->args.pops_args && crtl->args.size; + +-/* Return a short-lived scratch register for use on function entry. +- In 32-bit mode, it is valid only after the registers are saved +- in the prologue. This register must be released by means of +- release_scratch_register_on_entry once it is dead. */ ++ /* We cannot use a tail-call for the stub if: ++ 1. We have to pop incoming args, ++ 2. We have additional int regs to restore, or ++ 3. A sibling call will be the tail-call, or ++ 4. We are emitting an eh_return_internal epilogue. + +-static void +-get_scratch_register_on_entry (struct scratch_reg *sr) +-{ +- int regno; ++ TODO: Item 4 has not yet tested! + +- sr->saved = false; ++ If any of the above are true, we will call the stub rather than ++ jump to it. */ ++ restore_stub_is_tail = !(pop_incoming_args || frame.nregs || style != 1); ++ ix86_emit_outlined_ms2sysv_restore (frame, !restore_stub_is_tail, style); ++ } + +- if (TARGET_64BIT) ++ /* If using out-of-line stub that is a tail-call, then...*/ ++ if (m->call_ms2sysv && restore_stub_is_tail) + { +- /* We always use R11 in 64-bit mode. */ +- regno = R11_REG; ++ /* TODO: parinoid tests. (remove eventually) */ ++ gcc_assert (m->fs.sp_valid); ++ gcc_assert (!m->fs.sp_realigned); ++ gcc_assert (!m->fs.fp_valid); ++ gcc_assert (!m->fs.realigned); ++ gcc_assert (m->fs.sp_offset == UNITS_PER_WORD); ++ gcc_assert (!crtl->drap_reg); ++ gcc_assert (!frame.nregs); + } +- else ++ else if (restore_regs_via_mov) + { +- tree decl = current_function_decl, fntype = TREE_TYPE (decl); +- bool fastcall_p +- = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE; +- bool thiscall_p +- = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE; +- bool static_chain_p = DECL_STATIC_CHAIN (decl); +- int regparm = ix86_function_regparm (fntype, decl); +- int drap_regno +- = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM; ++ rtx t; + +- /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax +- for the static chain register. */ +- if ((regparm < 1 || (fastcall_p && !static_chain_p)) +- && drap_regno != AX_REG) +- regno = AX_REG; +- /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx +- for the static chain register. */ +- else if (thiscall_p && !static_chain_p && drap_regno != AX_REG) +- regno = AX_REG; +- else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG) +- regno = DX_REG; +- /* ecx is the static chain register. */ +- else if (regparm < 3 && !fastcall_p && !thiscall_p +- && !static_chain_p +- && drap_regno != CX_REG) +- regno = CX_REG; +- else if (ix86_save_reg (BX_REG, true, false)) +- regno = BX_REG; +- /* esi is the static chain register. */ +- else if (!(regparm == 3 && static_chain_p) +- && ix86_save_reg (SI_REG, true, false)) +- regno = SI_REG; +- else if (ix86_save_reg (DI_REG, true, false)) +- regno = DI_REG; +- else ++ if (frame.nregs) ++ ix86_emit_restore_regs_using_mov (reg_save_offset, style == 2); ++ ++ /* eh_return epilogues need %ecx added to the stack pointer. */ ++ if (style == 2) + { +- regno = (drap_regno == AX_REG ? DX_REG : AX_REG); +- sr->saved = true; +- } +- } ++ rtx sa = EH_RETURN_STACKADJ_RTX; ++ rtx_insn *insn; + +- sr->reg = gen_rtx_REG (Pmode, regno); +- if (sr->saved) +- { +- rtx_insn *insn = emit_insn (gen_push (sr->reg)); +- RTX_FRAME_RELATED_P (insn) = 1; +- } +-} ++ /* %ecx can't be used for both DRAP register and eh_return. */ ++ if (crtl->drap_reg) ++ gcc_assert (REGNO (crtl->drap_reg) != CX_REG); + +-/* Release a scratch register obtained from the preceding function. ++ /* regparm nested functions don't work with eh_return. */ ++ gcc_assert (!ix86_static_chain_on_stack); + +- If RELEASE_VIA_POP is true, we just pop the register off the stack +- to release it. This is what non-Linux systems use with -fstack-check. ++ if (frame_pointer_needed) ++ { ++ t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa); ++ t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD); ++ emit_insn (gen_rtx_SET (sa, t)); + +- Otherwise we use OFFSET to locate the saved register and the +- allocated stack space becomes part of the local frame and is +- deallocated by the epilogue. */ ++ t = gen_frame_mem (Pmode, hard_frame_pointer_rtx); ++ insn = emit_move_insn (hard_frame_pointer_rtx, t); + +-static void +-release_scratch_register_on_entry (struct scratch_reg *sr, HOST_WIDE_INT offset, +- bool release_via_pop) +-{ +- if (sr->saved) ++ /* Note that we use SA as a temporary CFA, as the return ++ address is at the proper place relative to it. We ++ pretend this happens at the FP restore insn because ++ prior to this insn the FP would be stored at the wrong ++ offset relative to SA, and after this insn we have no ++ other reasonable register to use for the CFA. We don't ++ bother resetting the CFA to the SP for the duration of ++ the return insn, unless the control flow instrumentation ++ is done. In this case the SP is used later and we have ++ to reset CFA to SP. */ ++ add_reg_note (insn, REG_CFA_DEF_CFA, ++ plus_constant (Pmode, sa, UNITS_PER_WORD)); ++ ix86_add_queued_cfa_restore_notes (insn); ++ add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ ++ m->fs.cfa_reg = sa; ++ m->fs.cfa_offset = UNITS_PER_WORD; ++ m->fs.fp_valid = false; ++ ++ pro_epilogue_adjust_stack (stack_pointer_rtx, sa, ++ const0_rtx, style, ++ flag_cf_protection); ++ } ++ else ++ { ++ t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa); ++ t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD); ++ insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t)); ++ ix86_add_queued_cfa_restore_notes (insn); ++ ++ gcc_assert (m->fs.cfa_reg == stack_pointer_rtx); ++ if (m->fs.cfa_offset != UNITS_PER_WORD) ++ { ++ m->fs.cfa_offset = UNITS_PER_WORD; ++ add_reg_note (insn, REG_CFA_DEF_CFA, ++ plus_constant (Pmode, stack_pointer_rtx, ++ UNITS_PER_WORD)); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ } ++ } ++ m->fs.sp_offset = UNITS_PER_WORD; ++ m->fs.sp_valid = true; ++ m->fs.sp_realigned = false; ++ } ++ } ++ else + { +- if (release_via_pop) ++ /* SEH requires that the function end with (1) a stack adjustment ++ if necessary, (2) a sequence of pops, and (3) a return or ++ jump instruction. Prevent insns from the function body from ++ being scheduled into this sequence. */ ++ if (TARGET_SEH) + { +- struct machine_function *m = cfun->machine; +- rtx x, insn = emit_insn (gen_pop (sr->reg)); ++ /* Prevent a catch region from being adjacent to the standard ++ epilogue sequence. Unfortunately neither crtl->uses_eh_lsda ++ nor several other flags that would be interesting to test are ++ set up yet. */ ++ if (flag_non_call_exceptions) ++ emit_insn (gen_nops (const1_rtx)); ++ else ++ emit_insn (gen_blockage ()); ++ } + +- /* The RX FRAME_RELATED_P mechanism doesn't know about pop. */ +- RTX_FRAME_RELATED_P (insn) = 1; +- x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD)); +- x = gen_rtx_SET (stack_pointer_rtx, x); +- add_reg_note (insn, REG_FRAME_RELATED_EXPR, x); +- m->fs.sp_offset -= UNITS_PER_WORD; ++ /* First step is to deallocate the stack frame so that we can ++ pop the registers. If the stack pointer was realigned, it needs ++ to be restored now. Also do it on SEH target for very large ++ frame as the emitted instructions aren't allowed by the ABI ++ in epilogues. */ ++ if (!m->fs.sp_valid || m->fs.sp_realigned ++ || (TARGET_SEH ++ && (m->fs.sp_offset - reg_save_offset ++ >= SEH_MAX_FRAME_SIZE))) ++ { ++ pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx, ++ GEN_INT (m->fs.fp_offset ++ - reg_save_offset), ++ style, false); + } +- else ++ else if (m->fs.sp_offset != reg_save_offset) + { +- rtx x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset)); +- x = gen_rtx_SET (sr->reg, gen_rtx_MEM (word_mode, x)); +- emit_insn (x); ++ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, ++ GEN_INT (m->fs.sp_offset ++ - reg_save_offset), ++ style, ++ m->fs.cfa_reg == stack_pointer_rtx); + } ++ ++ ix86_emit_restore_regs_using_pop (); + } +-} + +-/* Emit code to adjust the stack pointer by SIZE bytes while probing it. ++ /* If we used a stack pointer and haven't already got rid of it, ++ then do so now. */ ++ if (m->fs.fp_valid) ++ { ++ /* If the stack pointer is valid and pointing at the frame ++ pointer store address, then we only need a pop. */ ++ if (sp_valid_at (frame.hfp_save_offset) ++ && m->fs.sp_offset == frame.hfp_save_offset) ++ ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx); ++ /* Leave results in shorter dependency chains on CPUs that are ++ able to grok it fast. */ ++ else if (TARGET_USE_LEAVE ++ || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun)) ++ || !cfun->machine->use_fast_prologue_epilogue) ++ ix86_emit_leave (NULL); ++ else ++ { ++ pro_epilogue_adjust_stack (stack_pointer_rtx, ++ hard_frame_pointer_rtx, ++ const0_rtx, style, !using_drap); ++ ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx); ++ } ++ } + +- This differs from the next routine in that it tries hard to prevent +- attacks that jump the stack guard. Thus it is never allowed to allocate +- more than PROBE_INTERVAL bytes of stack space without a suitable +- probe. ++ if (using_drap) ++ { ++ int param_ptr_offset = UNITS_PER_WORD; ++ rtx_insn *insn; + +- INT_REGISTERS_SAVED is true if integer registers have already been +- pushed on the stack. */ ++ gcc_assert (stack_realign_drap); + +-static void +-ix86_adjust_stack_and_probe_stack_clash (HOST_WIDE_INT size, +- const bool int_registers_saved) +-{ +- struct machine_function *m = cfun->machine; ++ if (ix86_static_chain_on_stack) ++ param_ptr_offset += UNITS_PER_WORD; ++ if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg))) ++ param_ptr_offset += UNITS_PER_WORD; + +- /* If this function does not statically allocate stack space, then +- no probes are needed. */ +- if (!size) ++ insn = emit_insn (gen_rtx_SET ++ (stack_pointer_rtx, ++ gen_rtx_PLUS (Pmode, ++ crtl->drap_reg, ++ GEN_INT (-param_ptr_offset)))); ++ m->fs.cfa_reg = stack_pointer_rtx; ++ m->fs.cfa_offset = param_ptr_offset; ++ m->fs.sp_offset = param_ptr_offset; ++ m->fs.realigned = false; ++ ++ add_reg_note (insn, REG_CFA_DEF_CFA, ++ gen_rtx_PLUS (Pmode, stack_pointer_rtx, ++ GEN_INT (param_ptr_offset))); ++ RTX_FRAME_RELATED_P (insn) = 1; ++ ++ if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg))) ++ ix86_emit_restore_reg_using_pop (crtl->drap_reg); ++ } ++ ++ /* At this point the stack pointer must be valid, and we must have ++ restored all of the registers. We may not have deallocated the ++ entire stack frame. We've delayed this until now because it may ++ be possible to merge the local stack deallocation with the ++ deallocation forced by ix86_static_chain_on_stack. */ ++ gcc_assert (m->fs.sp_valid); ++ gcc_assert (!m->fs.sp_realigned); ++ gcc_assert (!m->fs.fp_valid); ++ gcc_assert (!m->fs.realigned); ++ if (m->fs.sp_offset != UNITS_PER_WORD) + { +- /* However, the allocation of space via pushes for register +- saves could be viewed as allocating space, but without the +- need to probe. */ +- if (m->frame.nregs || m->frame.nsseregs || frame_pointer_needed) +- dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true); +- else +- dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false); +- return; ++ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, ++ GEN_INT (m->fs.sp_offset - UNITS_PER_WORD), ++ style, true); + } ++ else ++ ix86_add_queued_cfa_restore_notes (get_last_insn ()); + +- /* If we are a noreturn function, then we have to consider the +- possibility that we're called via a jump rather than a call. ++ /* Sibcall epilogues don't want a return instruction. */ ++ if (style == 0) ++ { ++ m->fs = frame_state_save; ++ return; ++ } + +- Thus we don't have the implicit probe generated by saving the +- return address into the stack at the call. Thus, the stack +- pointer could be anywhere in the guard page. The safe thing +- to do is emit a probe now. ++ if (cfun->machine->func_type != TYPE_NORMAL) ++ emit_jump_insn (gen_interrupt_return ()); ++ else if (crtl->args.pops_args && crtl->args.size) ++ { ++ rtx popc = GEN_INT (crtl->args.pops_args); + +- The probe can be avoided if we have already emitted any callee +- register saves into the stack or have a frame pointer (which will +- have been saved as well). Those saves will function as implicit +- probes. ++ /* i386 can only pop 64K bytes. If asked to pop more, pop return ++ address, do explicit add, and jump indirectly to the caller. */ + +- ?!? This should be revamped to work like aarch64 and s390 where +- we track the offset from the most recent probe. Normally that +- offset would be zero. For a noreturn function we would reset +- it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT). Then +- we just probe when we cross PROBE_INTERVAL. */ +- if (TREE_THIS_VOLATILE (cfun->decl) +- && !(m->frame.nregs || m->frame.nsseregs || frame_pointer_needed)) +- { +- /* We can safely use any register here since we're just going to push +- its value and immediately pop it back. But we do try and avoid +- argument passing registers so as not to introduce dependencies in +- the pipeline. For 32 bit we use %esi and for 64 bit we use %rax. */ +- rtx dummy_reg = gen_rtx_REG (word_mode, TARGET_64BIT ? AX_REG : SI_REG); +- rtx_insn *insn_push = emit_insn (gen_push (dummy_reg)); +- rtx_insn *insn_pop = emit_insn (gen_pop (dummy_reg)); +- m->fs.sp_offset -= UNITS_PER_WORD; +- if (m->fs.cfa_reg == stack_pointer_rtx) ++ if (crtl->args.pops_args >= 65536) + { +- m->fs.cfa_offset -= UNITS_PER_WORD; +- rtx x = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD); +- x = gen_rtx_SET (stack_pointer_rtx, x); +- add_reg_note (insn_push, REG_CFA_ADJUST_CFA, x); +- RTX_FRAME_RELATED_P (insn_push) = 1; +- x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); +- x = gen_rtx_SET (stack_pointer_rtx, x); +- add_reg_note (insn_pop, REG_CFA_ADJUST_CFA, x); +- RTX_FRAME_RELATED_P (insn_pop) = 1; +- } +- emit_insn (gen_blockage ()); +- } ++ rtx ecx = gen_rtx_REG (SImode, CX_REG); ++ rtx_insn *insn; + +- /* If we allocate less than the size of the guard statically, +- then no probing is necessary, but we do need to allocate +- the stack. */ +- if (size < (1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE))) +- { +- pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, +- GEN_INT (-size), -1, +- m->fs.cfa_reg == stack_pointer_rtx); +- dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true); +- return; +- } ++ /* There is no "pascal" calling convention in any 64bit ABI. */ ++ gcc_assert (!TARGET_64BIT); + +- /* We're allocating a large enough stack frame that we need to +- emit probes. Either emit them inline or in a loop depending +- on the size. */ +- HOST_WIDE_INT probe_interval = get_probe_interval (); +- if (size <= 4 * probe_interval) +- { +- HOST_WIDE_INT i; +- for (i = probe_interval; i <= size; i += probe_interval) +- { +- /* Allocate PROBE_INTERVAL bytes. */ +- rtx insn +- = pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, +- GEN_INT (-probe_interval), -1, +- m->fs.cfa_reg == stack_pointer_rtx); +- add_reg_note (insn, REG_STACK_CHECK, const0_rtx); ++ insn = emit_insn (gen_pop (ecx)); ++ m->fs.cfa_offset -= UNITS_PER_WORD; ++ m->fs.sp_offset -= UNITS_PER_WORD; + +- /* And probe at *sp. */ +- emit_stack_probe (stack_pointer_rtx); +- emit_insn (gen_blockage ()); +- } ++ rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); ++ x = gen_rtx_SET (stack_pointer_rtx, x); ++ add_reg_note (insn, REG_CFA_ADJUST_CFA, x); ++ add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx)); ++ RTX_FRAME_RELATED_P (insn) = 1; + +- /* We need to allocate space for the residual, but we do not need +- to probe the residual. */ +- HOST_WIDE_INT residual = (i - probe_interval - size); +- if (residual) +- pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, +- GEN_INT (residual), -1, +- m->fs.cfa_reg == stack_pointer_rtx); +- dump_stack_clash_frame_info (PROBE_INLINE, residual != 0); ++ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, ++ popc, -1, true); ++ emit_jump_insn (gen_simple_return_indirect_internal (ecx)); ++ } ++ else ++ emit_jump_insn (gen_simple_return_pop_internal (popc)); + } +- else ++ else if (!m->call_ms2sysv || !restore_stub_is_tail) + { +- /* We expect the GP registers to be saved when probes are used +- as the probing sequences might need a scratch register and +- the routine to allocate one assumes the integer registers +- have already been saved. */ +- gcc_assert (int_registers_saved); +- +- struct scratch_reg sr; +- get_scratch_register_on_entry (&sr); +- +- /* If we needed to save a register, then account for any space +- that was pushed (we are not going to pop the register when +- we do the restore). */ +- if (sr.saved) +- size -= UNITS_PER_WORD; ++ /* In case of return from EH a simple return cannot be used ++ as a return address will be compared with a shadow stack ++ return address. Use indirect jump instead. */ ++ if (style == 2 && flag_cf_protection) ++ { ++ /* Register used in indirect jump must be in word_mode. But ++ Pmode may not be the same as word_mode for x32. */ ++ rtx ecx = gen_rtx_REG (word_mode, CX_REG); ++ rtx_insn *insn; + +- /* Step 1: round SIZE down to a multiple of the interval. */ +- HOST_WIDE_INT rounded_size = size & -probe_interval; ++ insn = emit_insn (gen_pop (ecx)); ++ m->fs.cfa_offset -= UNITS_PER_WORD; ++ m->fs.sp_offset -= UNITS_PER_WORD; + +- /* Step 2: compute final value of the loop counter. Use lea if +- possible. */ +- rtx addr = plus_constant (Pmode, stack_pointer_rtx, -rounded_size); +- rtx insn; +- if (address_no_seg_operand (addr, Pmode)) +- insn = emit_insn (gen_rtx_SET (sr.reg, addr)); +- else +- { +- emit_move_insn (sr.reg, GEN_INT (-rounded_size)); +- insn = emit_insn (gen_rtx_SET (sr.reg, +- gen_rtx_PLUS (Pmode, sr.reg, +- stack_pointer_rtx))); +- } +- if (m->fs.cfa_reg == stack_pointer_rtx) +- { +- add_reg_note (insn, REG_CFA_DEF_CFA, +- plus_constant (Pmode, sr.reg, +- m->fs.cfa_offset + rounded_size)); ++ rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); ++ x = gen_rtx_SET (stack_pointer_rtx, x); ++ add_reg_note (insn, REG_CFA_ADJUST_CFA, x); ++ add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx)); + RTX_FRAME_RELATED_P (insn) = 1; +- } + +- /* Step 3: the loop. */ +- rtx size_rtx = GEN_INT (rounded_size); +- insn = emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, +- size_rtx)); +- if (m->fs.cfa_reg == stack_pointer_rtx) +- { +- m->fs.cfa_offset += rounded_size; +- add_reg_note (insn, REG_CFA_DEF_CFA, +- plus_constant (Pmode, stack_pointer_rtx, +- m->fs.cfa_offset)); +- RTX_FRAME_RELATED_P (insn) = 1; ++ emit_jump_insn (gen_simple_return_indirect_internal (ecx)); + } +- m->fs.sp_offset += rounded_size; +- emit_insn (gen_blockage ()); +- +- /* Step 4: adjust SP if we cannot assert at compile-time that SIZE +- is equal to ROUNDED_SIZE. */ +- +- if (size != rounded_size) +- pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, +- GEN_INT (rounded_size - size), -1, +- m->fs.cfa_reg == stack_pointer_rtx); +- dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size); +- +- /* This does not deallocate the space reserved for the scratch +- register. That will be deallocated in the epilogue. */ +- release_scratch_register_on_entry (&sr, size, false); ++ else ++ emit_jump_insn (gen_simple_return_internal ()); + } + +- /* Make sure nothing is scheduled before we are done. */ +- emit_insn (gen_blockage ()); ++ /* Restore the state back to the state from the prologue, ++ so that it's correct for the next epilogue. */ ++ m->fs = frame_state_save; + } + +-/* Emit code to adjust the stack pointer by SIZE bytes while probing it. +- +- INT_REGISTERS_SAVED is true if integer registers have already been +- pushed on the stack. */ ++/* Reset from the function's potential modifications. */ + + static void +-ix86_adjust_stack_and_probe (HOST_WIDE_INT size, +- const bool int_registers_saved) ++ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED) + { +- /* We skip the probe for the first interval + a small dope of 4 words and +- probe that many bytes past the specified size to maintain a protection +- area at the botton of the stack. */ +- const int dope = 4 * UNITS_PER_WORD; +- rtx size_rtx = GEN_INT (size), last; ++ if (pic_offset_table_rtx ++ && !ix86_use_pseudo_pic_reg ()) ++ SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM); + +- /* See if we have a constant small number of probes to generate. If so, +- that's the easy case. The run-time loop is made up of 9 insns in the +- generic case while the compile-time loop is made up of 3+2*(n-1) insns +- for n # of intervals. */ +- if (size <= 4 * get_probe_interval ()) ++ if (TARGET_MACHO) + { +- HOST_WIDE_INT i, adjust; +- bool first_probe = true; ++ rtx_insn *insn = get_last_insn (); ++ rtx_insn *deleted_debug_label = NULL; + +- /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for +- values of N from 1 until it exceeds SIZE. If only one probe is +- needed, this will not generate any code. Then adjust and probe +- to PROBE_INTERVAL + SIZE. */ +- for (i = get_probe_interval (); i < size; i += get_probe_interval ()) ++ /* Mach-O doesn't support labels at the end of objects, so if ++ it looks like we might want one, take special action. ++ First, collect any sequence of deleted debug labels. */ ++ while (insn ++ && NOTE_P (insn) ++ && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL) + { +- if (first_probe) +- { +- adjust = 2 * get_probe_interval () + dope; +- first_probe = false; +- } +- else +- adjust = get_probe_interval (); +- +- emit_insn (gen_rtx_SET (stack_pointer_rtx, +- plus_constant (Pmode, stack_pointer_rtx, +- -adjust))); +- emit_stack_probe (stack_pointer_rtx); ++ /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL ++ notes only, instead set their CODE_LABEL_NUMBER to -1, ++ otherwise there would be code generation differences ++ in between -g and -g0. */ ++ if (NOTE_P (insn) && NOTE_KIND (insn) ++ == NOTE_INSN_DELETED_DEBUG_LABEL) ++ deleted_debug_label = insn; ++ insn = PREV_INSN (insn); + } + +- if (first_probe) +- adjust = size + get_probe_interval () + dope; +- else +- adjust = size + get_probe_interval () - i; +- +- emit_insn (gen_rtx_SET (stack_pointer_rtx, +- plus_constant (Pmode, stack_pointer_rtx, +- -adjust))); +- emit_stack_probe (stack_pointer_rtx); ++ /* If we have: ++ label: ++ barrier ++ then this needs to be detected, so skip past the barrier. */ + +- /* Adjust back to account for the additional first interval. */ +- last = emit_insn (gen_rtx_SET (stack_pointer_rtx, +- plus_constant (Pmode, stack_pointer_rtx, +- (get_probe_interval () +- + dope)))); +- } ++ if (insn && BARRIER_P (insn)) ++ insn = PREV_INSN (insn); + +- /* Otherwise, do the same as above, but in a loop. Note that we must be +- extra careful with variables wrapping around because we might be at +- the very top (or the very bottom) of the address space and we have +- to be able to handle this case properly; in particular, we use an +- equality test for the loop condition. */ +- else +- { +- /* We expect the GP registers to be saved when probes are used +- as the probing sequences might need a scratch register and +- the routine to allocate one assumes the integer registers +- have already been saved. */ +- gcc_assert (int_registers_saved); +- +- HOST_WIDE_INT rounded_size; +- struct scratch_reg sr; +- +- get_scratch_register_on_entry (&sr); +- +- /* If we needed to save a register, then account for any space +- that was pushed (we are not going to pop the register when +- we do the restore). */ +- if (sr.saved) +- size -= UNITS_PER_WORD; +- +- /* Step 1: round SIZE to the previous multiple of the interval. */ +- +- rounded_size = ROUND_DOWN (size, get_probe_interval ()); ++ /* Up to now we've only seen notes or barriers. */ ++ if (insn) ++ { ++ if (LABEL_P (insn) ++ || (NOTE_P (insn) ++ && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)) ++ /* Trailing label. */ ++ fputs ("\tnop\n", file); ++ else if (cfun && ! cfun->is_thunk) ++ { ++ /* See if we have a completely empty function body, skipping ++ the special case of the picbase thunk emitted as asm. */ ++ while (insn && ! INSN_P (insn)) ++ insn = PREV_INSN (insn); ++ /* If we don't find any insns, we've got an empty function body; ++ I.e. completely empty - without a return or branch. This is ++ taken as the case where a function body has been removed ++ because it contains an inline __builtin_unreachable(). GCC ++ declares that reaching __builtin_unreachable() means UB so ++ we're not obliged to do anything special; however, we want ++ non-zero-sized function bodies. To meet this, and help the ++ user out, let's trap the case. */ ++ if (insn == NULL) ++ fputs ("\tud2\n", file); ++ } ++ } ++ else if (deleted_debug_label) ++ for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn)) ++ if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL) ++ CODE_LABEL_NUMBER (insn) = -1; ++ } ++} + ++/* Return a scratch register to use in the split stack prologue. The ++ split stack prologue is used for -fsplit-stack. It is the first ++ instructions in the function, even before the regular prologue. ++ The scratch register can be any caller-saved register which is not ++ used for parameters or for the static chain. */ + +- /* Step 2: compute initial and final value of the loop counter. */ ++static unsigned int ++split_stack_prologue_scratch_regno (void) ++{ ++ if (TARGET_64BIT) ++ return R11_REG; ++ else ++ { ++ bool is_fastcall, is_thiscall; ++ int regparm; + +- /* SP = SP_0 + PROBE_INTERVAL. */ +- emit_insn (gen_rtx_SET (stack_pointer_rtx, +- plus_constant (Pmode, stack_pointer_rtx, +- - (get_probe_interval () + dope)))); ++ is_fastcall = (lookup_attribute ("fastcall", ++ TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl))) ++ != NULL); ++ is_thiscall = (lookup_attribute ("thiscall", ++ TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl))) ++ != NULL); ++ regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl); + +- /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */ +- if (rounded_size <= (HOST_WIDE_INT_1 << 31)) +- emit_insn (gen_rtx_SET (sr.reg, +- plus_constant (Pmode, stack_pointer_rtx, +- -rounded_size))); ++ if (is_fastcall) ++ { ++ if (DECL_STATIC_CHAIN (cfun->decl)) ++ { ++ sorry ("%<-fsplit-stack%> does not support fastcall with " ++ "nested function"); ++ return INVALID_REGNUM; ++ } ++ return AX_REG; ++ } ++ else if (is_thiscall) ++ { ++ if (!DECL_STATIC_CHAIN (cfun->decl)) ++ return DX_REG; ++ return AX_REG; ++ } ++ else if (regparm < 3) ++ { ++ if (!DECL_STATIC_CHAIN (cfun->decl)) ++ return CX_REG; ++ else ++ { ++ if (regparm >= 2) ++ { ++ sorry ("%<-fsplit-stack%> does not support 2 register " ++ "parameters for a nested function"); ++ return INVALID_REGNUM; ++ } ++ return DX_REG; ++ } ++ } + else + { +- emit_move_insn (sr.reg, GEN_INT (-rounded_size)); +- emit_insn (gen_rtx_SET (sr.reg, +- gen_rtx_PLUS (Pmode, sr.reg, +- stack_pointer_rtx))); ++ /* FIXME: We could make this work by pushing a register ++ around the addition and comparison. */ ++ sorry ("%<-fsplit-stack%> does not support 3 register parameters"); ++ return INVALID_REGNUM; + } ++ } ++} + ++/* A SYMBOL_REF for the function which allocates new stackspace for ++ -fsplit-stack. */ + +- /* Step 3: the loop +- +- do +- { +- SP = SP + PROBE_INTERVAL +- probe at SP +- } +- while (SP != LAST_ADDR) +- +- adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for +- values of N from 1 until it is equal to ROUNDED_SIZE. */ +- +- emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx)); ++static GTY(()) rtx split_stack_fn; + ++/* A SYMBOL_REF for the more stack function when using the large ++ model. */ + +- /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot +- assert at compile-time that SIZE is equal to ROUNDED_SIZE. */ ++static GTY(()) rtx split_stack_fn_large; + +- if (size != rounded_size) +- { +- emit_insn (gen_rtx_SET (stack_pointer_rtx, +- plus_constant (Pmode, stack_pointer_rtx, +- rounded_size - size))); +- emit_stack_probe (stack_pointer_rtx); +- } ++/* Return location of the stack guard value in the TLS block. */ + +- /* Adjust back to account for the additional first interval. */ +- last = emit_insn (gen_rtx_SET (stack_pointer_rtx, +- plus_constant (Pmode, stack_pointer_rtx, +- (get_probe_interval () +- + dope)))); ++rtx ++ix86_split_stack_guard (void) ++{ ++ int offset; ++ addr_space_t as = DEFAULT_TLS_SEG_REG; ++ rtx r; + +- /* This does not deallocate the space reserved for the scratch +- register. That will be deallocated in the epilogue. */ +- release_scratch_register_on_entry (&sr, size, false); +- } ++ gcc_assert (flag_split_stack); + +- /* Even if the stack pointer isn't the CFA register, we need to correctly +- describe the adjustments made to it, in particular differentiate the +- frame-related ones from the frame-unrelated ones. */ +- if (size > 0) +- { +- rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2)); +- XVECEXP (expr, 0, 0) +- = gen_rtx_SET (stack_pointer_rtx, +- plus_constant (Pmode, stack_pointer_rtx, -size)); +- XVECEXP (expr, 0, 1) +- = gen_rtx_SET (stack_pointer_rtx, +- plus_constant (Pmode, stack_pointer_rtx, +- get_probe_interval () + dope + size)); +- add_reg_note (last, REG_FRAME_RELATED_EXPR, expr); +- RTX_FRAME_RELATED_P (last) = 1; ++#ifdef TARGET_THREAD_SPLIT_STACK_OFFSET ++ offset = TARGET_THREAD_SPLIT_STACK_OFFSET; ++#else ++ gcc_unreachable (); ++#endif + +- cfun->machine->fs.sp_offset += size; +- } ++ r = GEN_INT (offset); ++ r = gen_const_mem (Pmode, r); ++ set_mem_addr_space (r, as); + +- /* Make sure nothing is scheduled before we are done. */ +- emit_insn (gen_blockage ()); ++ return r; + } + +-/* Adjust the stack pointer up to REG while probing it. */ ++/* Handle -fsplit-stack. These are the first instructions in the ++ function, even before the regular prologue. */ + +-const char * +-output_adjust_stack_and_probe (rtx reg) ++void ++ix86_expand_split_stack_prologue (void) + { +- static int labelno = 0; +- char loop_lab[32]; +- rtx xops[2]; +- +- ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++); ++ HOST_WIDE_INT allocate; ++ unsigned HOST_WIDE_INT args_size; ++ rtx_code_label *label; ++ rtx limit, current, allocate_rtx, call_fusage; ++ rtx_insn *call_insn; ++ rtx scratch_reg = NULL_RTX; ++ rtx_code_label *varargs_label = NULL; ++ rtx fn; + +- /* Loop. */ +- ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab); ++ gcc_assert (flag_split_stack && reload_completed); + +- /* SP = SP + PROBE_INTERVAL. */ +- xops[0] = stack_pointer_rtx; +- xops[1] = GEN_INT (get_probe_interval ()); +- output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops); ++ ix86_finalize_stack_frame_flags (); ++ struct ix86_frame &frame = cfun->machine->frame; ++ allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET; + +- /* Probe at SP. */ +- xops[1] = const0_rtx; +- output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops); ++ /* This is the label we will branch to if we have enough stack ++ space. We expect the basic block reordering pass to reverse this ++ branch if optimizing, so that we branch in the unlikely case. */ ++ label = gen_label_rtx (); + +- /* Test if SP == LAST_ADDR. */ +- xops[0] = stack_pointer_rtx; +- xops[1] = reg; +- output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops); ++ /* We need to compare the stack pointer minus the frame size with ++ the stack boundary in the TCB. The stack boundary always gives ++ us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we ++ can compare directly. Otherwise we need to do an addition. */ + +- /* Branch. */ +- fputs ("\tjne\t", asm_out_file); +- assemble_name_raw (asm_out_file, loop_lab); +- fputc ('\n', asm_out_file); ++ limit = ix86_split_stack_guard (); + +- return ""; +-} ++ if (allocate < SPLIT_STACK_AVAILABLE) ++ current = stack_pointer_rtx; ++ else ++ { ++ unsigned int scratch_regno; ++ rtx offset; + +-/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE, +- inclusive. These are offsets from the current stack pointer. +- +- INT_REGISTERS_SAVED is true if integer registers have already been +- pushed on the stack. */ ++ /* We need a scratch register to hold the stack pointer minus ++ the required frame size. Since this is the very start of the ++ function, the scratch register can be any caller-saved ++ register which is not used for parameters. */ ++ offset = GEN_INT (- allocate); ++ scratch_regno = split_stack_prologue_scratch_regno (); ++ if (scratch_regno == INVALID_REGNUM) ++ return; ++ scratch_reg = gen_rtx_REG (Pmode, scratch_regno); ++ if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode)) ++ { ++ /* We don't use ix86_gen_add3 in this case because it will ++ want to split to lea, but when not optimizing the insn ++ will not be split after this point. */ ++ emit_insn (gen_rtx_SET (scratch_reg, ++ gen_rtx_PLUS (Pmode, stack_pointer_rtx, ++ offset))); ++ } ++ else ++ { ++ emit_move_insn (scratch_reg, offset); ++ emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg, ++ stack_pointer_rtx)); ++ } ++ current = scratch_reg; ++ } + +-static void +-ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size, +- const bool int_registers_saved) +-{ +- /* See if we have a constant small number of probes to generate. If so, +- that's the easy case. The run-time loop is made up of 6 insns in the +- generic case while the compile-time loop is made up of n insns for n # +- of intervals. */ +- if (size <= 6 * get_probe_interval ()) +- { +- HOST_WIDE_INT i; ++ ix86_expand_branch (GEU, current, limit, label); ++ rtx_insn *jump_insn = get_last_insn (); ++ JUMP_LABEL (jump_insn) = label; + +- /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until +- it exceeds SIZE. If only one probe is needed, this will not +- generate any code. Then probe at FIRST + SIZE. */ +- for (i = get_probe_interval (); i < size; i += get_probe_interval ()) +- emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, +- -(first + i))); ++ /* Mark the jump as very likely to be taken. */ ++ add_reg_br_prob_note (jump_insn, profile_probability::very_likely ()); + +- emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, +- -(first + size))); ++ if (split_stack_fn == NULL_RTX) ++ { ++ split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack"); ++ SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL; + } ++ fn = split_stack_fn; + +- /* Otherwise, do the same as above, but in a loop. Note that we must be +- extra careful with variables wrapping around because we might be at +- the very top (or the very bottom) of the address space and we have +- to be able to handle this case properly; in particular, we use an +- equality test for the loop condition. */ +- else ++ /* Get more stack space. We pass in the desired stack space and the ++ size of the arguments to copy to the new stack. In 32-bit mode ++ we push the parameters; __morestack will return on a new stack ++ anyhow. In 64-bit mode we pass the parameters in r10 and ++ r11. */ ++ allocate_rtx = GEN_INT (allocate); ++ args_size = crtl->args.size >= 0 ? (HOST_WIDE_INT) crtl->args.size : 0; ++ call_fusage = NULL_RTX; ++ rtx pop = NULL_RTX; ++ if (TARGET_64BIT) + { +- /* We expect the GP registers to be saved when probes are used +- as the probing sequences might need a scratch register and +- the routine to allocate one assumes the integer registers +- have already been saved. */ +- gcc_assert (int_registers_saved); ++ rtx reg10, reg11; + +- HOST_WIDE_INT rounded_size, last; +- struct scratch_reg sr; ++ reg10 = gen_rtx_REG (Pmode, R10_REG); ++ reg11 = gen_rtx_REG (Pmode, R11_REG); + +- get_scratch_register_on_entry (&sr); ++ /* If this function uses a static chain, it will be in %r10. ++ Preserve it across the call to __morestack. */ ++ if (DECL_STATIC_CHAIN (cfun->decl)) ++ { ++ rtx rax; + ++ rax = gen_rtx_REG (word_mode, AX_REG); ++ emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG)); ++ use_reg (&call_fusage, rax); ++ } + +- /* Step 1: round SIZE to the previous multiple of the interval. */ ++ if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC) ++ && !TARGET_PECOFF) ++ { ++ HOST_WIDE_INT argval; + +- rounded_size = ROUND_DOWN (size, get_probe_interval ()); ++ gcc_assert (Pmode == DImode); ++ /* When using the large model we need to load the address ++ into a register, and we've run out of registers. So we ++ switch to a different calling convention, and we call a ++ different function: __morestack_large. We pass the ++ argument size in the upper 32 bits of r10 and pass the ++ frame size in the lower 32 bits. */ ++ gcc_assert ((allocate & HOST_WIDE_INT_C (0xffffffff)) == allocate); ++ gcc_assert ((args_size & 0xffffffff) == args_size); + ++ if (split_stack_fn_large == NULL_RTX) ++ { ++ split_stack_fn_large ++ = gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model"); ++ SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL; ++ } ++ if (ix86_cmodel == CM_LARGE_PIC) ++ { ++ rtx_code_label *label; ++ rtx x; + +- /* Step 2: compute initial and final value of the loop counter. */ ++ label = gen_label_rtx (); ++ emit_label (label); ++ LABEL_PRESERVE_P (label) = 1; ++ emit_insn (gen_set_rip_rex64 (reg10, label)); ++ emit_insn (gen_set_got_offset_rex64 (reg11, label)); ++ emit_insn (ix86_gen_add3 (reg10, reg10, reg11)); ++ x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large), ++ UNSPEC_GOT); ++ x = gen_rtx_CONST (Pmode, x); ++ emit_move_insn (reg11, x); ++ x = gen_rtx_PLUS (Pmode, reg10, reg11); ++ x = gen_const_mem (Pmode, x); ++ emit_move_insn (reg11, x); ++ } ++ else ++ emit_move_insn (reg11, split_stack_fn_large); + +- /* TEST_OFFSET = FIRST. */ +- emit_move_insn (sr.reg, GEN_INT (-first)); ++ fn = reg11; + +- /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */ +- last = first + rounded_size; ++ argval = ((args_size << 16) << 16) + allocate; ++ emit_move_insn (reg10, GEN_INT (argval)); ++ } ++ else ++ { ++ emit_move_insn (reg10, allocate_rtx); ++ emit_move_insn (reg11, GEN_INT (args_size)); ++ use_reg (&call_fusage, reg11); ++ } + ++ use_reg (&call_fusage, reg10); ++ } ++ else ++ { ++ rtx_insn *insn = emit_insn (gen_push (GEN_INT (args_size))); ++ add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (UNITS_PER_WORD)); ++ insn = emit_insn (gen_push (allocate_rtx)); ++ add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (2 * UNITS_PER_WORD)); ++ pop = GEN_INT (2 * UNITS_PER_WORD); ++ } ++ call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn), ++ GEN_INT (UNITS_PER_WORD), constm1_rtx, ++ pop, false); ++ add_function_usage_to (call_insn, call_fusage); ++ if (!TARGET_64BIT) ++ add_reg_note (call_insn, REG_ARGS_SIZE, GEN_INT (0)); ++ /* Indicate that this function can't jump to non-local gotos. */ ++ make_reg_eh_region_note_nothrow_nononlocal (call_insn); + +- /* Step 3: the loop ++ /* In order to make call/return prediction work right, we now need ++ to execute a return instruction. See ++ libgcc/config/i386/morestack.S for the details on how this works. + +- do +- { +- TEST_ADDR = TEST_ADDR + PROBE_INTERVAL +- probe at TEST_ADDR +- } +- while (TEST_ADDR != LAST_ADDR) ++ For flow purposes gcc must not see this as a return ++ instruction--we need control flow to continue at the subsequent ++ label. Therefore, we use an unspec. */ ++ gcc_assert (crtl->args.pops_args < 65536); ++ rtx_insn *ret_insn ++ = emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args))); + +- probes at FIRST + N * PROBE_INTERVAL for values of N from 1 +- until it is equal to ROUNDED_SIZE. */ ++ if ((flag_cf_protection & CF_BRANCH)) ++ { ++ /* Insert ENDBR since __morestack will jump back here via indirect ++ call. */ ++ rtx cet_eb = gen_nop_endbr (); ++ emit_insn_after (cet_eb, ret_insn); ++ } + +- emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last))); ++ /* If we are in 64-bit mode and this function uses a static chain, ++ we saved %r10 in %rax before calling _morestack. */ ++ if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl)) ++ emit_move_insn (gen_rtx_REG (word_mode, R10_REG), ++ gen_rtx_REG (word_mode, AX_REG)); + ++ /* If this function calls va_start, we need to store a pointer to ++ the arguments on the old stack, because they may not have been ++ all copied to the new stack. At this point the old stack can be ++ found at the frame pointer value used by __morestack, because ++ __morestack has set that up before calling back to us. Here we ++ store that pointer in a scratch register, and in ++ ix86_expand_prologue we store the scratch register in a stack ++ slot. */ ++ if (cfun->machine->split_stack_varargs_pointer != NULL_RTX) ++ { ++ unsigned int scratch_regno; ++ rtx frame_reg; ++ int words; + +- /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time +- that SIZE is equal to ROUNDED_SIZE. */ ++ scratch_regno = split_stack_prologue_scratch_regno (); ++ scratch_reg = gen_rtx_REG (Pmode, scratch_regno); ++ frame_reg = gen_rtx_REG (Pmode, BP_REG); + +- if (size != rounded_size) +- emit_stack_probe (plus_constant (Pmode, +- gen_rtx_PLUS (Pmode, +- stack_pointer_rtx, +- sr.reg), +- rounded_size - size)); ++ /* 64-bit: ++ fp -> old fp value ++ return address within this function ++ return address of caller of this function ++ stack arguments ++ So we add three words to get to the stack arguments. + +- release_scratch_register_on_entry (&sr, size, true); +- } ++ 32-bit: ++ fp -> old fp value ++ return address within this function ++ first argument to __morestack ++ second argument to __morestack ++ return address of caller of this function ++ stack arguments ++ So we add five words to get to the stack arguments. ++ */ ++ words = TARGET_64BIT ? 3 : 5; ++ emit_insn (gen_rtx_SET (scratch_reg, ++ gen_rtx_PLUS (Pmode, frame_reg, ++ GEN_INT (words * UNITS_PER_WORD)))); + +- /* Make sure nothing is scheduled before we are done. */ +- emit_insn (gen_blockage ()); +-} ++ varargs_label = gen_label_rtx (); ++ emit_jump_insn (gen_jump (varargs_label)); ++ JUMP_LABEL (get_last_insn ()) = varargs_label; + +-/* Probe a range of stack addresses from REG to END, inclusive. These are +- offsets from the current stack pointer. */ ++ emit_barrier (); ++ } + +-const char * +-output_probe_stack_range (rtx reg, rtx end) +-{ +- static int labelno = 0; +- char loop_lab[32]; +- rtx xops[3]; +- +- ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++); +- +- /* Loop. */ +- ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab); +- +- /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */ +- xops[0] = reg; +- xops[1] = GEN_INT (get_probe_interval ()); +- output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops); +- +- /* Probe at TEST_ADDR. */ +- xops[0] = stack_pointer_rtx; +- xops[1] = reg; +- xops[2] = const0_rtx; +- output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops); +- +- /* Test if TEST_ADDR == LAST_ADDR. */ +- xops[0] = reg; +- xops[1] = end; +- output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops); ++ emit_label (label); ++ LABEL_NUSES (label) = 1; + +- /* Branch. */ +- fputs ("\tjne\t", asm_out_file); +- assemble_name_raw (asm_out_file, loop_lab); +- fputc ('\n', asm_out_file); ++ /* If this function calls va_start, we now have to set the scratch ++ register for the case where we do not call __morestack. In this ++ case we need to set it based on the stack pointer. */ ++ if (cfun->machine->split_stack_varargs_pointer != NULL_RTX) ++ { ++ emit_insn (gen_rtx_SET (scratch_reg, ++ gen_rtx_PLUS (Pmode, stack_pointer_rtx, ++ GEN_INT (UNITS_PER_WORD)))); + +- return ""; ++ emit_label (varargs_label); ++ LABEL_NUSES (varargs_label) = 1; ++ } + } + +-/* Return true if stack frame is required. Update STACK_ALIGNMENT +- to the largest alignment, in bits, of stack slot used if stack +- frame is required and CHECK_STACK_SLOT is true. */ ++/* We may have to tell the dataflow pass that the split stack prologue ++ is initializing a scratch register. */ + +-static bool +-ix86_find_max_used_stack_alignment (unsigned int &stack_alignment, +- bool check_stack_slot) ++static void ++ix86_live_on_entry (bitmap regs) + { +- HARD_REG_SET set_up_by_prologue, prologue_used; +- basic_block bb; +- +- CLEAR_HARD_REG_SET (prologue_used); +- CLEAR_HARD_REG_SET (set_up_by_prologue); +- add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM); +- add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM); +- add_to_hard_reg_set (&set_up_by_prologue, Pmode, +- HARD_FRAME_POINTER_REGNUM); +- +- /* The preferred stack alignment is the minimum stack alignment. */ +- if (stack_alignment > crtl->preferred_stack_boundary) +- stack_alignment = crtl->preferred_stack_boundary; +- +- bool require_stack_frame = false; +- +- FOR_EACH_BB_FN (bb, cfun) ++ if (cfun->machine->split_stack_varargs_pointer != NULL_RTX) + { +- rtx_insn *insn; +- FOR_BB_INSNS (bb, insn) +- if (NONDEBUG_INSN_P (insn) +- && requires_stack_frame_p (insn, prologue_used, +- set_up_by_prologue)) +- { +- require_stack_frame = true; +- +- if (check_stack_slot) +- { +- /* Find the maximum stack alignment. */ +- subrtx_iterator::array_type array; +- FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL) +- if (MEM_P (*iter) +- && (reg_mentioned_p (stack_pointer_rtx, +- *iter) +- || reg_mentioned_p (frame_pointer_rtx, +- *iter))) +- { +- unsigned int alignment = MEM_ALIGN (*iter); +- if (alignment > stack_alignment) +- stack_alignment = alignment; +- } +- } +- } ++ gcc_assert (flag_split_stack); ++ bitmap_set_bit (regs, split_stack_prologue_scratch_regno ()); + } +- +- return require_stack_frame; + } ++ ++/* Extract the parts of an RTL expression that is a valid memory address ++ for an instruction. Return 0 if the structure of the address is ++ grossly off. Return -1 if the address contains ASHIFT, so it is not ++ strictly valid, but still used for computing length of lea instruction. */ + +-/* Finalize stack_realign_needed and frame_pointer_needed flags, which +- will guide prologue/epilogue to be generated in correct form. */ +- +-static void +-ix86_finalize_stack_frame_flags (void) ++int ++ix86_decompose_address (rtx addr, struct ix86_address *out) + { +- /* Check if stack realign is really needed after reload, and +- stores result in cfun */ +- unsigned int incoming_stack_boundary +- = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary +- ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary); +- unsigned int stack_alignment +- = (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor +- ? crtl->max_used_stack_slot_alignment +- : crtl->stack_alignment_needed); +- unsigned int stack_realign +- = (incoming_stack_boundary < stack_alignment); +- bool recompute_frame_layout_p = false; ++ rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX; ++ rtx base_reg, index_reg; ++ HOST_WIDE_INT scale = 1; ++ rtx scale_rtx = NULL_RTX; ++ rtx tmp; ++ int retval = 1; ++ addr_space_t seg = ADDR_SPACE_GENERIC; + +- if (crtl->stack_realign_finalized) ++ /* Allow zero-extended SImode addresses, ++ they will be emitted with addr32 prefix. */ ++ if (TARGET_64BIT && GET_MODE (addr) == DImode) + { +- /* After stack_realign_needed is finalized, we can't no longer +- change it. */ +- gcc_assert (crtl->stack_realign_needed == stack_realign); +- return; ++ if (GET_CODE (addr) == ZERO_EXTEND ++ && GET_MODE (XEXP (addr, 0)) == SImode) ++ { ++ addr = XEXP (addr, 0); ++ if (CONST_INT_P (addr)) ++ return 0; ++ } ++ else if (GET_CODE (addr) == AND ++ && const_32bit_mask (XEXP (addr, 1), DImode)) ++ { ++ addr = lowpart_subreg (SImode, XEXP (addr, 0), DImode); ++ if (addr == NULL_RTX) ++ return 0; ++ ++ if (CONST_INT_P (addr)) ++ return 0; ++ } + } + +- /* If the only reason for frame_pointer_needed is that we conservatively +- assumed stack realignment might be needed or -fno-omit-frame-pointer +- is used, but in the end nothing that needed the stack alignment had +- been spilled nor stack access, clear frame_pointer_needed and say we +- don't need stack realignment. */ +- if ((stack_realign || (!flag_omit_frame_pointer && optimize)) +- && frame_pointer_needed +- && crtl->is_leaf +- && crtl->sp_is_unchanging +- && !ix86_current_function_calls_tls_descriptor +- && !crtl->accesses_prior_frames +- && !cfun->calls_alloca +- && !crtl->calls_eh_return +- /* See ira_setup_eliminable_regset for the rationale. */ +- && !(STACK_CHECK_MOVING_SP +- && flag_stack_check +- && flag_exceptions +- && cfun->can_throw_non_call_exceptions) +- && !ix86_frame_pointer_required () +- && get_frame_size () == 0 +- && ix86_nsaved_sseregs () == 0 +- && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0) ++ /* Allow SImode subregs of DImode addresses, ++ they will be emitted with addr32 prefix. */ ++ if (TARGET_64BIT && GET_MODE (addr) == SImode) + { +- if (ix86_find_max_used_stack_alignment (stack_alignment, +- stack_realign)) ++ if (SUBREG_P (addr) ++ && GET_MODE (SUBREG_REG (addr)) == DImode) + { +- /* Stack frame is required. If stack alignment needed is less +- than incoming stack boundary, don't realign stack. */ +- stack_realign = incoming_stack_boundary < stack_alignment; +- if (!stack_realign) +- { +- crtl->max_used_stack_slot_alignment +- = incoming_stack_boundary; +- crtl->stack_alignment_needed +- = incoming_stack_boundary; +- /* Also update preferred_stack_boundary for leaf +- functions. */ +- crtl->preferred_stack_boundary +- = incoming_stack_boundary; +- } ++ addr = SUBREG_REG (addr); ++ if (CONST_INT_P (addr)) ++ return 0; + } ++ } ++ ++ if (REG_P (addr)) ++ base = addr; ++ else if (SUBREG_P (addr)) ++ { ++ if (REG_P (SUBREG_REG (addr))) ++ base = addr; + else ++ return 0; ++ } ++ else if (GET_CODE (addr) == PLUS) ++ { ++ rtx addends[4], op; ++ int n = 0, i; ++ ++ op = addr; ++ do + { +- /* If drap has been set, but it actually isn't live at the +- start of the function, there is no reason to set it up. */ +- if (crtl->drap_reg) ++ if (n >= 4) ++ return 0; ++ addends[n++] = XEXP (op, 1); ++ op = XEXP (op, 0); ++ } ++ while (GET_CODE (op) == PLUS); ++ if (n >= 4) ++ return 0; ++ addends[n] = op; ++ ++ for (i = n; i >= 0; --i) ++ { ++ op = addends[i]; ++ switch (GET_CODE (op)) + { +- basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb; +- if (! REGNO_REG_SET_P (DF_LR_IN (bb), +- REGNO (crtl->drap_reg))) +- { +- crtl->drap_reg = NULL_RTX; +- crtl->need_drap = false; +- } +- } +- else +- cfun->machine->no_drap_save_restore = true; ++ case MULT: ++ if (index) ++ return 0; ++ index = XEXP (op, 0); ++ scale_rtx = XEXP (op, 1); ++ break; + +- frame_pointer_needed = false; +- stack_realign = false; +- crtl->max_used_stack_slot_alignment = incoming_stack_boundary; +- crtl->stack_alignment_needed = incoming_stack_boundary; +- crtl->stack_alignment_estimated = incoming_stack_boundary; +- if (crtl->preferred_stack_boundary > incoming_stack_boundary) +- crtl->preferred_stack_boundary = incoming_stack_boundary; +- df_finish_pass (true); +- df_scan_alloc (NULL); +- df_scan_blocks (); +- df_compute_regs_ever_live (true); +- df_analyze (); ++ case ASHIFT: ++ if (index) ++ return 0; ++ index = XEXP (op, 0); ++ tmp = XEXP (op, 1); ++ if (!CONST_INT_P (tmp)) ++ return 0; ++ scale = INTVAL (tmp); ++ if ((unsigned HOST_WIDE_INT) scale > 3) ++ return 0; ++ scale = 1 << scale; ++ break; + +- if (flag_var_tracking) +- { +- /* Since frame pointer is no longer available, replace it with +- stack pointer - UNITS_PER_WORD in debug insns. */ +- df_ref ref, next; +- for (ref = DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM); +- ref; ref = next) +- { +- next = DF_REF_NEXT_REG (ref); +- if (!DF_REF_INSN_INFO (ref)) +- continue; ++ case ZERO_EXTEND: ++ op = XEXP (op, 0); ++ if (GET_CODE (op) != UNSPEC) ++ return 0; ++ /* FALLTHRU */ + +- /* Make sure the next ref is for a different instruction, +- so that we're not affected by the rescan. */ +- rtx_insn *insn = DF_REF_INSN (ref); +- while (next && DF_REF_INSN (next) == insn) +- next = DF_REF_NEXT_REG (next); ++ case UNSPEC: ++ if (XINT (op, 1) == UNSPEC_TP ++ && TARGET_TLS_DIRECT_SEG_REFS ++ && seg == ADDR_SPACE_GENERIC) ++ seg = DEFAULT_TLS_SEG_REG; ++ else ++ return 0; ++ break; + +- if (DEBUG_INSN_P (insn)) +- { +- bool changed = false; +- for (; ref != next; ref = DF_REF_NEXT_REG (ref)) +- { +- rtx *loc = DF_REF_LOC (ref); +- if (*loc == hard_frame_pointer_rtx) +- { +- *loc = plus_constant (Pmode, +- stack_pointer_rtx, +- -UNITS_PER_WORD); +- changed = true; +- } +- } +- if (changed) +- df_insn_rescan (insn); +- } +- } +- } ++ case SUBREG: ++ if (!REG_P (SUBREG_REG (op))) ++ return 0; ++ /* FALLTHRU */ + +- recompute_frame_layout_p = true; ++ case REG: ++ if (!base) ++ base = op; ++ else if (!index) ++ index = op; ++ else ++ return 0; ++ break; ++ ++ case CONST: ++ case CONST_INT: ++ case SYMBOL_REF: ++ case LABEL_REF: ++ if (disp) ++ return 0; ++ disp = op; ++ break; ++ ++ default: ++ return 0; ++ } + } + } +- else if (crtl->max_used_stack_slot_alignment >= 128) ++ else if (GET_CODE (addr) == MULT) + { +- /* We don't need to realign stack. max_used_stack_alignment is +- used to decide how stack frame should be aligned. This is +- independent of any psABIs nor 32-bit vs 64-bit. It is always +- safe to compute max_used_stack_alignment. We compute it only +- if 128-bit aligned load/store may be generated on misaligned +- stack slot which will lead to segfault. */ +- if (ix86_find_max_used_stack_alignment (stack_alignment, true)) +- cfun->machine->max_used_stack_alignment +- = stack_alignment / BITS_PER_UNIT; ++ index = XEXP (addr, 0); /* index*scale */ ++ scale_rtx = XEXP (addr, 1); + } ++ else if (GET_CODE (addr) == ASHIFT) ++ { ++ /* We're called for lea too, which implements ashift on occasion. */ ++ index = XEXP (addr, 0); ++ tmp = XEXP (addr, 1); ++ if (!CONST_INT_P (tmp)) ++ return 0; ++ scale = INTVAL (tmp); ++ if ((unsigned HOST_WIDE_INT) scale > 3) ++ return 0; ++ scale = 1 << scale; ++ retval = -1; ++ } ++ else ++ disp = addr; /* displacement */ + +- if (crtl->stack_realign_needed != stack_realign) +- recompute_frame_layout_p = true; +- crtl->stack_realign_needed = stack_realign; +- crtl->stack_realign_finalized = true; +- if (recompute_frame_layout_p) +- ix86_compute_frame_layout (); +-} +- +-/* Delete SET_GOT right after entry block if it is allocated to reg. */ +- +-static void +-ix86_elim_entry_set_got (rtx reg) +-{ +- basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb; +- rtx_insn *c_insn = BB_HEAD (bb); +- if (!NONDEBUG_INSN_P (c_insn)) +- c_insn = next_nonnote_nondebug_insn (c_insn); +- if (c_insn && NONJUMP_INSN_P (c_insn)) ++ if (index) + { +- rtx pat = PATTERN (c_insn); +- if (GET_CODE (pat) == PARALLEL) +- { +- rtx vec = XVECEXP (pat, 0, 0); +- if (GET_CODE (vec) == SET +- && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT +- && REGNO (XEXP (vec, 0)) == REGNO (reg)) +- delete_insn (c_insn); +- } ++ if (REG_P (index)) ++ ; ++ else if (SUBREG_P (index) ++ && REG_P (SUBREG_REG (index))) ++ ; ++ else ++ return 0; + } +-} + +-static rtx +-gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store) +-{ +- rtx addr, mem; ++ /* Extract the integral value of scale. */ ++ if (scale_rtx) ++ { ++ if (!CONST_INT_P (scale_rtx)) ++ return 0; ++ scale = INTVAL (scale_rtx); ++ } + +- if (offset) +- addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset)); +- mem = gen_frame_mem (GET_MODE (reg), offset ? addr : frame_reg); +- return gen_rtx_SET (store ? mem : reg, store ? reg : mem); +-} ++ base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base; ++ index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index; + +-static inline rtx +-gen_frame_load (rtx reg, rtx frame_reg, int offset) +-{ +- return gen_frame_set (reg, frame_reg, offset, false); +-} ++ /* Avoid useless 0 displacement. */ ++ if (disp == const0_rtx && (base || index)) ++ disp = NULL_RTX; + +-static inline rtx +-gen_frame_store (rtx reg, rtx frame_reg, int offset) +-{ +- return gen_frame_set (reg, frame_reg, offset, true); +-} ++ /* Allow arg pointer and stack pointer as index if there is not scaling. */ ++ if (base_reg && index_reg && scale == 1 ++ && (REGNO (index_reg) == ARG_POINTER_REGNUM ++ || REGNO (index_reg) == FRAME_POINTER_REGNUM ++ || REGNO (index_reg) == SP_REG)) ++ { ++ std::swap (base, index); ++ std::swap (base_reg, index_reg); ++ } + +-static void +-ix86_emit_outlined_ms2sysv_save (const struct ix86_frame &frame) +-{ +- struct machine_function *m = cfun->machine; +- const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS +- + m->call_ms2sysv_extra_regs; +- rtvec v = rtvec_alloc (ncregs + 1); +- unsigned int align, i, vi = 0; +- rtx_insn *insn; +- rtx sym, addr; +- rtx rax = gen_rtx_REG (word_mode, AX_REG); +- const struct xlogue_layout &xlogue = xlogue_layout::get_instance (); ++ /* Special case: %ebp cannot be encoded as a base without a displacement. ++ Similarly %r13. */ ++ if (!disp && base_reg ++ && (REGNO (base_reg) == ARG_POINTER_REGNUM ++ || REGNO (base_reg) == FRAME_POINTER_REGNUM ++ || REGNO (base_reg) == BP_REG ++ || REGNO (base_reg) == R13_REG)) ++ disp = const0_rtx; + +- /* AL should only be live with sysv_abi. */ +- gcc_assert (!ix86_eax_live_at_start_p ()); +- gcc_assert (m->fs.sp_offset >= frame.sse_reg_save_offset); ++ /* Special case: on K6, [%esi] makes the instruction vector decoded. ++ Avoid this by transforming to [%esi+0]. ++ Reload calls address legitimization without cfun defined, so we need ++ to test cfun for being non-NULL. */ ++ if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun) ++ && base_reg && !index_reg && !disp ++ && REGNO (base_reg) == SI_REG) ++ disp = const0_rtx; + +- /* Setup RAX as the stub's base pointer. We use stack_realign_offset rather +- we've actually realigned the stack or not. */ +- align = GET_MODE_ALIGNMENT (V4SFmode); +- addr = choose_baseaddr (frame.stack_realign_offset +- + xlogue.get_stub_ptr_offset (), &align, AX_REG); +- gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode)); ++ /* Special case: encode reg+reg instead of reg*2. */ ++ if (!base && index && scale == 2) ++ base = index, base_reg = index_reg, scale = 1; + +- emit_insn (gen_rtx_SET (rax, addr)); ++ /* Special case: scaling cannot be encoded without base or displacement. */ ++ if (!base && !disp && index && scale != 1) ++ disp = const0_rtx; + +- /* Get the stub symbol. */ +- sym = xlogue.get_stub_rtx (frame_pointer_needed ? XLOGUE_STUB_SAVE_HFP +- : XLOGUE_STUB_SAVE); +- RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym); ++ out->base = base; ++ out->index = index; ++ out->disp = disp; ++ out->scale = scale; ++ out->seg = seg; + +- for (i = 0; i < ncregs; ++i) +- { +- const xlogue_layout::reginfo &r = xlogue.get_reginfo (i); +- rtx reg = gen_rtx_REG ((SSE_REGNO_P (r.regno) ? V4SFmode : word_mode), +- r.regno); +- RTVEC_ELT (v, vi++) = gen_frame_store (reg, rax, -r.offset); +- } +- +- gcc_assert (vi == (unsigned)GET_NUM_ELEM (v)); +- +- insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, v)); +- RTX_FRAME_RELATED_P (insn) = true; ++ return retval; + } ++ ++/* Return cost of the memory address x. ++ For i386, it is better to use a complex address than let gcc copy ++ the address into a reg and make a new pseudo. But not if the address ++ requires to two regs - that would mean more pseudos with longer ++ lifetimes. */ ++static int ++ix86_address_cost (rtx x, machine_mode, addr_space_t, bool) ++{ ++ struct ix86_address parts; ++ int cost = 1; ++ int ok = ix86_decompose_address (x, &parts); + +-/* Expand the prologue into a bunch of separate insns. */ ++ gcc_assert (ok); + +-void +-ix86_expand_prologue (void) +-{ +- struct machine_function *m = cfun->machine; +- rtx insn, t; +- HOST_WIDE_INT allocate; +- bool int_registers_saved; +- bool sse_registers_saved; +- bool save_stub_call_needed; +- rtx static_chain = NULL_RTX; ++ if (parts.base && SUBREG_P (parts.base)) ++ parts.base = SUBREG_REG (parts.base); ++ if (parts.index && SUBREG_P (parts.index)) ++ parts.index = SUBREG_REG (parts.index); + +- if (ix86_function_naked (current_function_decl)) +- return; ++ /* Attempt to minimize number of registers in the address by increasing ++ address cost for each used register. We don't increase address cost ++ for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx" ++ is not invariant itself it most likely means that base or index is not ++ invariant. Therefore only "pic_offset_table_rtx" could be hoisted out, ++ which is not profitable for x86. */ ++ if (parts.base ++ && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER) ++ && (current_pass->type == GIMPLE_PASS ++ || !pic_offset_table_rtx ++ || !REG_P (parts.base) ++ || REGNO (pic_offset_table_rtx) != REGNO (parts.base))) ++ cost++; + +- ix86_finalize_stack_frame_flags (); ++ if (parts.index ++ && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER) ++ && (current_pass->type == GIMPLE_PASS ++ || !pic_offset_table_rtx ++ || !REG_P (parts.index) ++ || REGNO (pic_offset_table_rtx) != REGNO (parts.index))) ++ cost++; + +- /* DRAP should not coexist with stack_realign_fp */ +- gcc_assert (!(crtl->drap_reg && stack_realign_fp)); ++ /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b, ++ since it's predecode logic can't detect the length of instructions ++ and it degenerates to vector decoded. Increase cost of such ++ addresses here. The penalty is minimally 2 cycles. It may be worthwhile ++ to split such addresses or even refuse such addresses at all. + +- memset (&m->fs, 0, sizeof (m->fs)); ++ Following addressing modes are affected: ++ [base+scale*index] ++ [scale*index+disp] ++ [base+index] + +- /* Initialize CFA state for before the prologue. */ +- m->fs.cfa_reg = stack_pointer_rtx; +- m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET; ++ The first and last case may be avoidable by explicitly coding the zero in ++ memory address, but I don't have AMD-K6 machine handy to check this ++ theory. */ + +- /* Track SP offset to the CFA. We continue tracking this after we've +- swapped the CFA register away from SP. In the case of re-alignment +- this is fudged; we're interested to offsets within the local frame. */ +- m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET; +- m->fs.sp_valid = true; +- m->fs.sp_realigned = false; ++ if (TARGET_K6 ++ && ((!parts.disp && parts.base && parts.index && parts.scale != 1) ++ || (parts.disp && !parts.base && parts.index && parts.scale != 1) ++ || (!parts.disp && parts.base && parts.index && parts.scale == 1))) ++ cost += 10; + +- const struct ix86_frame &frame = cfun->machine->frame; ++ return cost; ++} ++ ++/* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as ++ this is used for to form addresses to local data when -fPIC is in ++ use. */ + +- if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl)) +- { +- /* We should have already generated an error for any use of +- ms_hook on a nested function. */ +- gcc_checking_assert (!ix86_static_chain_on_stack); ++static bool ++darwin_local_data_pic (rtx disp) ++{ ++ return (GET_CODE (disp) == UNSPEC ++ && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET); ++} + +- /* Check if profiling is active and we shall use profiling before +- prologue variant. If so sorry. */ +- if (crtl->profile && flag_fentry != 0) +- sorry ("ms_hook_prologue attribute isn%'t compatible " +- "with %<-mfentry%> for 32-bit"); ++/* True if operand X should be loaded from GOT. */ + +- /* In ix86_asm_output_function_label we emitted: +- 8b ff movl.s %edi,%edi +- 55 push %ebp +- 8b ec movl.s %esp,%ebp ++bool ++ix86_force_load_from_GOT_p (rtx x) ++{ ++ return ((TARGET_64BIT || HAVE_AS_IX86_GOT32X) ++ && !TARGET_PECOFF && !TARGET_MACHO ++ && !flag_pic ++ && ix86_cmodel != CM_LARGE ++ && GET_CODE (x) == SYMBOL_REF ++ && SYMBOL_REF_FUNCTION_P (x) ++ && (!flag_plt ++ || (SYMBOL_REF_DECL (x) ++ && lookup_attribute ("noplt", ++ DECL_ATTRIBUTES (SYMBOL_REF_DECL (x))))) ++ && !SYMBOL_REF_LOCAL_P (x)); ++} + +- This matches the hookable function prologue in Win32 API +- functions in Microsoft Windows XP Service Pack 2 and newer. +- Wine uses this to enable Windows apps to hook the Win32 API +- functions provided by Wine. ++/* Determine if a given RTX is a valid constant. We already know this ++ satisfies CONSTANT_P. */ + +- What that means is that we've already set up the frame pointer. */ ++static bool ++ix86_legitimate_constant_p (machine_mode mode, rtx x) ++{ ++ switch (GET_CODE (x)) ++ { ++ case CONST: ++ x = XEXP (x, 0); + +- if (frame_pointer_needed +- && !(crtl->drap_reg && crtl->stack_realign_needed)) ++ if (GET_CODE (x) == PLUS) + { +- rtx push, mov; ++ if (!CONST_INT_P (XEXP (x, 1))) ++ return false; ++ x = XEXP (x, 0); ++ } + +- /* We've decided to use the frame pointer already set up. +- Describe this to the unwinder by pretending that both +- push and mov insns happen right here. ++ if (TARGET_MACHO && darwin_local_data_pic (x)) ++ return true; + +- Putting the unwind info here at the end of the ms_hook +- is done so that we can make absolutely certain we get +- the required byte sequence at the start of the function, +- rather than relying on an assembler that can produce +- the exact encoding required. ++ /* Only some unspecs are valid as "constants". */ ++ if (GET_CODE (x) == UNSPEC) ++ switch (XINT (x, 1)) ++ { ++ case UNSPEC_GOT: ++ case UNSPEC_GOTOFF: ++ case UNSPEC_PLTOFF: ++ return TARGET_64BIT; ++ case UNSPEC_TPOFF: ++ case UNSPEC_NTPOFF: ++ x = XVECEXP (x, 0, 0); ++ return (GET_CODE (x) == SYMBOL_REF ++ && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC); ++ case UNSPEC_DTPOFF: ++ x = XVECEXP (x, 0, 0); ++ return (GET_CODE (x) == SYMBOL_REF ++ && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC); ++ default: ++ return false; ++ } + +- However it does mean (in the unpatched case) that we have +- a 1 insn window where the asynchronous unwind info is +- incorrect. However, if we placed the unwind info at +- its correct location we would have incorrect unwind info +- in the patched case. Which is probably all moot since +- I don't expect Wine generates dwarf2 unwind info for the +- system libraries that use this feature. */ ++ /* We must have drilled down to a symbol. */ ++ if (GET_CODE (x) == LABEL_REF) ++ return true; ++ if (GET_CODE (x) != SYMBOL_REF) ++ return false; ++ /* FALLTHRU */ + +- insn = emit_insn (gen_blockage ()); ++ case SYMBOL_REF: ++ /* TLS symbols are never valid. */ ++ if (SYMBOL_REF_TLS_MODEL (x)) ++ return false; + +- push = gen_push (hard_frame_pointer_rtx); +- mov = gen_rtx_SET (hard_frame_pointer_rtx, +- stack_pointer_rtx); +- RTX_FRAME_RELATED_P (push) = 1; +- RTX_FRAME_RELATED_P (mov) = 1; ++ /* DLLIMPORT symbols are never valid. */ ++ if (TARGET_DLLIMPORT_DECL_ATTRIBUTES ++ && SYMBOL_REF_DLLIMPORT_P (x)) ++ return false; + +- RTX_FRAME_RELATED_P (insn) = 1; +- add_reg_note (insn, REG_FRAME_RELATED_EXPR, +- gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov))); ++#if TARGET_MACHO ++ /* mdynamic-no-pic */ ++ if (MACHO_DYNAMIC_NO_PIC_P) ++ return machopic_symbol_defined_p (x); ++#endif + +- /* Note that gen_push incremented m->fs.cfa_offset, even +- though we didn't emit the push insn here. */ +- m->fs.cfa_reg = hard_frame_pointer_rtx; +- m->fs.fp_offset = m->fs.cfa_offset; +- m->fs.fp_valid = true; +- } +- else ++ /* External function address should be loaded ++ via the GOT slot to avoid PLT. */ ++ if (ix86_force_load_from_GOT_p (x)) ++ return false; ++ ++ break; ++ ++ CASE_CONST_SCALAR_INT: ++ switch (mode) + { +- /* The frame pointer is not needed so pop %ebp again. +- This leaves us with a pristine state. */ +- emit_insn (gen_pop (hard_frame_pointer_rtx)); ++ case E_TImode: ++ if (TARGET_64BIT) ++ return true; ++ /* FALLTHRU */ ++ case E_OImode: ++ case E_XImode: ++ if (!standard_sse_constant_p (x, mode)) ++ return false; ++ default: ++ break; + } ++ break; ++ ++ case CONST_VECTOR: ++ if (!standard_sse_constant_p (x, mode)) ++ return false; ++ ++ default: ++ break; + } + +- /* The first insn of a function that accepts its static chain on the +- stack is to push the register that would be filled in by a direct +- call. This insn will be skipped by the trampoline. */ +- else if (ix86_static_chain_on_stack) +- { +- static_chain = ix86_static_chain (cfun->decl, false); +- insn = emit_insn (gen_push (static_chain)); +- emit_insn (gen_blockage ()); ++ /* Otherwise we handle everything else in the move patterns. */ ++ return true; ++} + +- /* We don't want to interpret this push insn as a register save, +- only as a stack adjustment. The real copy of the register as +- a save will be done later, if needed. */ +- t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD); +- t = gen_rtx_SET (stack_pointer_rtx, t); +- add_reg_note (insn, REG_CFA_ADJUST_CFA, t); +- RTX_FRAME_RELATED_P (insn) = 1; +- } ++/* Determine if it's legal to put X into the constant pool. This ++ is not possible for the address of thread-local symbols, which ++ is checked above. */ + +- /* Emit prologue code to adjust stack alignment and setup DRAP, in case +- of DRAP is needed and stack realignment is really needed after reload */ +- if (stack_realign_drap) ++static bool ++ix86_cannot_force_const_mem (machine_mode mode, rtx x) ++{ ++ /* We can put any immediate constant in memory. */ ++ switch (GET_CODE (x)) + { +- int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT; ++ CASE_CONST_ANY: ++ return false; + +- /* Can't use DRAP in interrupt function. */ +- if (cfun->machine->func_type != TYPE_NORMAL) +- sorry ("Dynamic Realign Argument Pointer (DRAP) not supported " +- "in interrupt service routine. This may be worked " +- "around by avoiding functions with aggregate return."); ++ default: ++ break; ++ } + +- /* Only need to push parameter pointer reg if it is caller saved. */ +- if (!call_used_regs[REGNO (crtl->drap_reg)]) +- { +- /* Push arg pointer reg */ +- insn = emit_insn (gen_push (crtl->drap_reg)); +- RTX_FRAME_RELATED_P (insn) = 1; +- } ++ return !ix86_legitimate_constant_p (mode, x); ++} + +- /* Grab the argument pointer. */ +- t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset); +- insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t)); +- RTX_FRAME_RELATED_P (insn) = 1; +- m->fs.cfa_reg = crtl->drap_reg; +- m->fs.cfa_offset = 0; ++/* Nonzero if the symbol is marked as dllimport, or as stub-variable, ++ otherwise zero. */ + +- /* Align the stack. */ +- insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx, +- stack_pointer_rtx, +- GEN_INT (-align_bytes))); +- RTX_FRAME_RELATED_P (insn) = 1; ++static bool ++is_imported_p (rtx x) ++{ ++ if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES ++ || GET_CODE (x) != SYMBOL_REF) ++ return false; + +- /* Replicate the return address on the stack so that return +- address can be reached via (argp - 1) slot. This is needed +- to implement macro RETURN_ADDR_RTX and intrinsic function +- expand_builtin_return_addr etc. */ +- t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD); +- t = gen_frame_mem (word_mode, t); +- insn = emit_insn (gen_push (t)); +- RTX_FRAME_RELATED_P (insn) = 1; ++ return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x); ++} + +- /* For the purposes of frame and register save area addressing, +- we've started over with a new frame. */ +- m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET; +- m->fs.realigned = true; + +- if (static_chain) +- { +- /* Replicate static chain on the stack so that static chain +- can be reached via (argp - 2) slot. This is needed for +- nested function with stack realignment. */ +- insn = emit_insn (gen_push (static_chain)); +- RTX_FRAME_RELATED_P (insn) = 1; +- } +- } ++/* Nonzero if the constant value X is a legitimate general operand ++ when generating PIC code. It is given that flag_pic is on and ++ that X satisfies CONSTANT_P. */ + +- int_registers_saved = (frame.nregs == 0); +- sse_registers_saved = (frame.nsseregs == 0); +- save_stub_call_needed = (m->call_ms2sysv); +- gcc_assert (sse_registers_saved || !save_stub_call_needed); ++bool ++legitimate_pic_operand_p (rtx x) ++{ ++ rtx inner; + +- if (frame_pointer_needed && !m->fs.fp_valid) ++ switch (GET_CODE (x)) + { +- /* Note: AT&T enter does NOT have reversed args. Enter is probably +- slower on all targets. Also sdb didn't like it. */ +- insn = emit_insn (gen_push (hard_frame_pointer_rtx)); +- RTX_FRAME_RELATED_P (insn) = 1; ++ case CONST: ++ inner = XEXP (x, 0); ++ if (GET_CODE (inner) == PLUS ++ && CONST_INT_P (XEXP (inner, 1))) ++ inner = XEXP (inner, 0); + +- /* Push registers now, before setting the frame pointer +- on SEH target. */ +- if (!int_registers_saved +- && TARGET_SEH +- && !frame.save_regs_using_mov) +- { +- ix86_emit_save_regs (); +- int_registers_saved = true; +- gcc_assert (m->fs.sp_offset == frame.reg_save_offset); +- } ++ /* Only some unspecs are valid as "constants". */ ++ if (GET_CODE (inner) == UNSPEC) ++ switch (XINT (inner, 1)) ++ { ++ case UNSPEC_GOT: ++ case UNSPEC_GOTOFF: ++ case UNSPEC_PLTOFF: ++ return TARGET_64BIT; ++ case UNSPEC_TPOFF: ++ x = XVECEXP (inner, 0, 0); ++ return (GET_CODE (x) == SYMBOL_REF ++ && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC); ++ case UNSPEC_MACHOPIC_OFFSET: ++ return legitimate_pic_address_disp_p (x); ++ default: ++ return false; ++ } ++ /* FALLTHRU */ + +- if (m->fs.sp_offset == frame.hard_frame_pointer_offset) +- { +- insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); +- RTX_FRAME_RELATED_P (insn) = 1; ++ case SYMBOL_REF: ++ case LABEL_REF: ++ return legitimate_pic_address_disp_p (x); + +- if (m->fs.cfa_reg == stack_pointer_rtx) +- m->fs.cfa_reg = hard_frame_pointer_rtx; +- m->fs.fp_offset = m->fs.sp_offset; +- m->fs.fp_valid = true; +- } ++ default: ++ return true; + } ++} + +- if (!int_registers_saved) +- { +- /* If saving registers via PUSH, do so now. */ +- if (!frame.save_regs_using_mov) +- { +- ix86_emit_save_regs (); +- int_registers_saved = true; +- gcc_assert (m->fs.sp_offset == frame.reg_save_offset); +- } ++/* Determine if a given CONST RTX is a valid memory displacement ++ in PIC mode. */ + +- /* When using red zone we may start register saving before allocating +- the stack frame saving one cycle of the prologue. However, avoid +- doing this if we have to probe the stack; at least on x86_64 the +- stack probe can turn into a call that clobbers a red zone location. */ +- else if (ix86_using_red_zone () +- && (! TARGET_STACK_PROBE +- || frame.stack_pointer_offset < CHECK_STACK_LIMIT)) +- { +- ix86_emit_save_regs_using_mov (frame.reg_save_offset); +- int_registers_saved = true; +- } +- } ++bool ++legitimate_pic_address_disp_p (rtx disp) ++{ ++ bool saw_plus; + +- if (stack_realign_fp) ++ /* In 64bit mode we can allow direct addresses of symbols and labels ++ when they are not dynamic symbols. */ ++ if (TARGET_64BIT) + { +- int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT; +- gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT); ++ rtx op0 = disp, op1; + +- /* Record last valid frame pointer offset. */ +- m->fs.sp_realigned_fp_last = frame.reg_save_offset; ++ switch (GET_CODE (disp)) ++ { ++ case LABEL_REF: ++ return true; + +- /* The computation of the size of the re-aligned stack frame means +- that we must allocate the size of the register save area before +- performing the actual alignment. Otherwise we cannot guarantee +- that there's enough storage above the realignment point. */ +- allocate = frame.reg_save_offset - m->fs.sp_offset +- + frame.stack_realign_allocate; +- if (allocate) +- pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, +- GEN_INT (-allocate), -1, false); ++ case CONST: ++ if (GET_CODE (XEXP (disp, 0)) != PLUS) ++ break; ++ op0 = XEXP (XEXP (disp, 0), 0); ++ op1 = XEXP (XEXP (disp, 0), 1); ++ if (!CONST_INT_P (op1)) ++ break; ++ if (GET_CODE (op0) == UNSPEC ++ && (XINT (op0, 1) == UNSPEC_DTPOFF ++ || XINT (op0, 1) == UNSPEC_NTPOFF) ++ && trunc_int_for_mode (INTVAL (op1), SImode) == INTVAL (op1)) ++ return true; ++ if (INTVAL (op1) >= 16*1024*1024 ++ || INTVAL (op1) < -16*1024*1024) ++ break; ++ if (GET_CODE (op0) == LABEL_REF) ++ return true; ++ if (GET_CODE (op0) == CONST ++ && GET_CODE (XEXP (op0, 0)) == UNSPEC ++ && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL) ++ return true; ++ if (GET_CODE (op0) == UNSPEC ++ && XINT (op0, 1) == UNSPEC_PCREL) ++ return true; ++ if (GET_CODE (op0) != SYMBOL_REF) ++ break; ++ /* FALLTHRU */ + +- /* Align the stack. */ +- insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx, +- stack_pointer_rtx, +- GEN_INT (-align_bytes))); +- m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes); +- m->fs.sp_realigned_offset = m->fs.sp_offset +- - frame.stack_realign_allocate; +- /* The stack pointer may no longer be equal to CFA - m->fs.sp_offset. +- Beyond this point, stack access should be done via choose_baseaddr or +- by using sp_valid_at and fp_valid_at to determine the correct base +- register. Henceforth, any CFA offset should be thought of as logical +- and not physical. */ +- gcc_assert (m->fs.sp_realigned_offset >= m->fs.sp_realigned_fp_last); +- gcc_assert (m->fs.sp_realigned_offset == frame.stack_realign_offset); +- m->fs.sp_realigned = true; ++ case SYMBOL_REF: ++ /* TLS references should always be enclosed in UNSPEC. ++ The dllimported symbol needs always to be resolved. */ ++ if (SYMBOL_REF_TLS_MODEL (op0) ++ || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0))) ++ return false; + +- /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which +- is needed to describe where a register is saved using a realigned +- stack pointer, so we need to invalidate the stack pointer for that +- target. */ +- if (TARGET_SEH) +- m->fs.sp_valid = false; +- +- /* If SP offset is non-immediate after allocation of the stack frame, +- then emit SSE saves or stub call prior to allocating the rest of the +- stack frame. This is less efficient for the out-of-line stub because +- we can't combine allocations across the call barrier, but it's better +- than using a scratch register. */ +- else if (!x86_64_immediate_operand (GEN_INT (frame.stack_pointer_offset +- - m->fs.sp_realigned_offset), +- Pmode)) +- { +- if (!sse_registers_saved) +- { +- ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset); +- sse_registers_saved = true; +- } +- else if (save_stub_call_needed) ++ if (TARGET_PECOFF) + { +- ix86_emit_outlined_ms2sysv_save (frame); +- save_stub_call_needed = false; ++ if (is_imported_p (op0)) ++ return true; ++ ++ if (SYMBOL_REF_FAR_ADDR_P (op0) ++ || !SYMBOL_REF_LOCAL_P (op0)) ++ break; ++ ++ /* Function-symbols need to be resolved only for ++ large-model. ++ For the small-model we don't need to resolve anything ++ here. */ ++ if ((ix86_cmodel != CM_LARGE_PIC ++ && SYMBOL_REF_FUNCTION_P (op0)) ++ || ix86_cmodel == CM_SMALL_PIC) ++ return true; ++ /* Non-external symbols don't need to be resolved for ++ large, and medium-model. */ ++ if ((ix86_cmodel == CM_LARGE_PIC ++ || ix86_cmodel == CM_MEDIUM_PIC) ++ && !SYMBOL_REF_EXTERNAL_P (op0)) ++ return true; + } ++ else if (!SYMBOL_REF_FAR_ADDR_P (op0) ++ && (SYMBOL_REF_LOCAL_P (op0) ++ || (HAVE_LD_PIE_COPYRELOC ++ && flag_pie ++ && !SYMBOL_REF_WEAK (op0) ++ && !SYMBOL_REF_FUNCTION_P (op0))) ++ && ix86_cmodel != CM_LARGE_PIC) ++ return true; ++ break; ++ ++ default: ++ break; + } + } ++ if (GET_CODE (disp) != CONST) ++ return false; ++ disp = XEXP (disp, 0); + +- allocate = frame.stack_pointer_offset - m->fs.sp_offset; +- +- if (flag_stack_usage_info) ++ if (TARGET_64BIT) + { +- /* We start to count from ARG_POINTER. */ +- HOST_WIDE_INT stack_size = frame.stack_pointer_offset; ++ /* We are unsafe to allow PLUS expressions. This limit allowed distance ++ of GOT tables. We should not need these anyway. */ ++ if (GET_CODE (disp) != UNSPEC ++ || (XINT (disp, 1) != UNSPEC_GOTPCREL ++ && XINT (disp, 1) != UNSPEC_GOTOFF ++ && XINT (disp, 1) != UNSPEC_PCREL ++ && XINT (disp, 1) != UNSPEC_PLTOFF)) ++ return false; + +- /* If it was realigned, take into account the fake frame. */ +- if (stack_realign_drap) +- { +- if (ix86_static_chain_on_stack) +- stack_size += UNITS_PER_WORD; ++ if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF ++ && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF) ++ return false; ++ return true; ++ } + +- if (!call_used_regs[REGNO (crtl->drap_reg)]) +- stack_size += UNITS_PER_WORD; ++ saw_plus = false; ++ if (GET_CODE (disp) == PLUS) ++ { ++ if (!CONST_INT_P (XEXP (disp, 1))) ++ return false; ++ disp = XEXP (disp, 0); ++ saw_plus = true; ++ } + +- /* This over-estimates by 1 minimal-stack-alignment-unit but +- mitigates that by counting in the new return address slot. */ +- current_function_dynamic_stack_size +- += crtl->stack_alignment_needed / BITS_PER_UNIT; +- } ++ if (TARGET_MACHO && darwin_local_data_pic (disp)) ++ return true; + +- current_function_static_stack_size = stack_size; +- } ++ if (GET_CODE (disp) != UNSPEC) ++ return false; + +- /* On SEH target with very large frame size, allocate an area to save +- SSE registers (as the very large allocation won't be described). */ +- if (TARGET_SEH +- && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE +- && !sse_registers_saved) ++ switch (XINT (disp, 1)) + { +- HOST_WIDE_INT sse_size +- = frame.sse_reg_save_offset - frame.reg_save_offset; ++ case UNSPEC_GOT: ++ if (saw_plus) ++ return false; ++ /* We need to check for both symbols and labels because VxWorks loads ++ text labels with @GOT rather than @GOTOFF. See gotoff_operand for ++ details. */ ++ return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF ++ || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF); ++ case UNSPEC_GOTOFF: ++ /* Refuse GOTOFF in 64bit mode since it is always 64bit when used. ++ While ABI specify also 32bit relocation but we don't produce it in ++ small PIC model at all. */ ++ if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF ++ || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF) ++ && !TARGET_64BIT) ++ return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode); ++ return false; ++ case UNSPEC_GOTTPOFF: ++ case UNSPEC_GOTNTPOFF: ++ case UNSPEC_INDNTPOFF: ++ if (saw_plus) ++ return false; ++ disp = XVECEXP (disp, 0, 0); ++ return (GET_CODE (disp) == SYMBOL_REF ++ && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC); ++ case UNSPEC_NTPOFF: ++ disp = XVECEXP (disp, 0, 0); ++ return (GET_CODE (disp) == SYMBOL_REF ++ && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC); ++ case UNSPEC_DTPOFF: ++ disp = XVECEXP (disp, 0, 0); ++ return (GET_CODE (disp) == SYMBOL_REF ++ && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC); ++ } + +- gcc_assert (int_registers_saved); ++ return false; ++} + +- /* No need to do stack checking as the area will be immediately +- written. */ +- pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, +- GEN_INT (-sse_size), -1, +- m->fs.cfa_reg == stack_pointer_rtx); +- allocate -= sse_size; +- ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset); +- sse_registers_saved = true; +- } ++/* Determine if op is suitable RTX for an address register. ++ Return naked register if a register or a register subreg is ++ found, otherwise return NULL_RTX. */ + +- /* The stack has already been decremented by the instruction calling us +- so probe if the size is non-negative to preserve the protection area. */ +- if (allocate >= 0 +- && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK +- || flag_stack_clash_protection)) ++static rtx ++ix86_validate_address_register (rtx op) ++{ ++ machine_mode mode = GET_MODE (op); ++ ++ /* Only SImode or DImode registers can form the address. */ ++ if (mode != SImode && mode != DImode) ++ return NULL_RTX; ++ ++ if (REG_P (op)) ++ return op; ++ else if (SUBREG_P (op)) + { +- if (flag_stack_clash_protection) +- { +- ix86_adjust_stack_and_probe_stack_clash (allocate, +- int_registers_saved); +- allocate = 0; +- } +- else if (STACK_CHECK_MOVING_SP) +- { +- if (!(crtl->is_leaf && !cfun->calls_alloca +- && allocate <= get_probe_interval ())) +- { +- ix86_adjust_stack_and_probe (allocate, int_registers_saved); +- allocate = 0; +- } +- } +- else +- { +- HOST_WIDE_INT size = allocate; ++ rtx reg = SUBREG_REG (op); + +- if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000)) +- size = 0x80000000 - get_stack_check_protect () - 1; ++ if (!REG_P (reg)) ++ return NULL_RTX; + +- if (TARGET_STACK_PROBE) +- { +- if (crtl->is_leaf && !cfun->calls_alloca) +- { +- if (size > get_probe_interval ()) +- ix86_emit_probe_stack_range (0, size, int_registers_saved); +- } +- else +- ix86_emit_probe_stack_range (0, +- size + get_stack_check_protect (), +- int_registers_saved); +- } +- else +- { +- if (crtl->is_leaf && !cfun->calls_alloca) +- { +- if (size > get_probe_interval () +- && size > get_stack_check_protect ()) +- ix86_emit_probe_stack_range (get_stack_check_protect (), +- (size +- - get_stack_check_protect ()), +- int_registers_saved); +- } +- else +- ix86_emit_probe_stack_range (get_stack_check_protect (), size, +- int_registers_saved); +- } +- } +- } ++ mode = GET_MODE (reg); + +- if (allocate == 0) +- ; +- else if (!ix86_target_stack_probe () +- || frame.stack_pointer_offset < CHECK_STACK_LIMIT) +- { +- pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, +- GEN_INT (-allocate), -1, +- m->fs.cfa_reg == stack_pointer_rtx); +- } +- else +- { +- rtx eax = gen_rtx_REG (Pmode, AX_REG); +- rtx r10 = NULL; +- rtx (*adjust_stack_insn)(rtx, rtx, rtx); +- const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx); +- bool eax_live = ix86_eax_live_at_start_p (); +- bool r10_live = false; ++ /* Don't allow SUBREGs that span more than a word. It can ++ lead to spill failures when the register is one word out ++ of a two word structure. */ ++ if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) ++ return NULL_RTX; + +- if (TARGET_64BIT) +- r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0); ++ /* Allow only SUBREGs of non-eliminable hard registers. */ ++ if (register_no_elim_operand (reg, mode)) ++ return reg; ++ } + +- if (eax_live) +- { +- insn = emit_insn (gen_push (eax)); +- allocate -= UNITS_PER_WORD; +- /* Note that SEH directives need to continue tracking the stack +- pointer even after the frame pointer has been set up. */ +- if (sp_is_cfa_reg || TARGET_SEH) +- { +- if (sp_is_cfa_reg) +- m->fs.cfa_offset += UNITS_PER_WORD; +- RTX_FRAME_RELATED_P (insn) = 1; +- add_reg_note (insn, REG_FRAME_RELATED_EXPR, +- gen_rtx_SET (stack_pointer_rtx, +- plus_constant (Pmode, stack_pointer_rtx, +- -UNITS_PER_WORD))); +- } +- } +- +- if (r10_live) +- { +- r10 = gen_rtx_REG (Pmode, R10_REG); +- insn = emit_insn (gen_push (r10)); +- allocate -= UNITS_PER_WORD; +- if (sp_is_cfa_reg || TARGET_SEH) +- { +- if (sp_is_cfa_reg) +- m->fs.cfa_offset += UNITS_PER_WORD; +- RTX_FRAME_RELATED_P (insn) = 1; +- add_reg_note (insn, REG_FRAME_RELATED_EXPR, +- gen_rtx_SET (stack_pointer_rtx, +- plus_constant (Pmode, stack_pointer_rtx, +- -UNITS_PER_WORD))); +- } +- } ++ /* Op is not a register. */ ++ return NULL_RTX; ++} + +- emit_move_insn (eax, GEN_INT (allocate)); +- emit_insn (ix86_gen_allocate_stack_worker (eax, eax)); ++/* Recognizes RTL expressions that are valid memory addresses for an ++ instruction. The MODE argument is the machine mode for the MEM ++ expression that wants to use this address. + +- /* Use the fact that AX still contains ALLOCATE. */ +- adjust_stack_insn = (Pmode == DImode +- ? gen_pro_epilogue_adjust_stack_di_sub +- : gen_pro_epilogue_adjust_stack_si_sub); ++ It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should ++ convert common non-canonical forms to canonical form so that they will ++ be recognized. */ + +- insn = emit_insn (adjust_stack_insn (stack_pointer_rtx, +- stack_pointer_rtx, eax)); ++static bool ++ix86_legitimate_address_p (machine_mode, rtx addr, bool strict) ++{ ++ struct ix86_address parts; ++ rtx base, index, disp; ++ HOST_WIDE_INT scale; ++ addr_space_t seg; + +- if (sp_is_cfa_reg || TARGET_SEH) +- { +- if (sp_is_cfa_reg) +- m->fs.cfa_offset += allocate; +- RTX_FRAME_RELATED_P (insn) = 1; +- add_reg_note (insn, REG_FRAME_RELATED_EXPR, +- gen_rtx_SET (stack_pointer_rtx, +- plus_constant (Pmode, stack_pointer_rtx, +- -allocate))); +- } +- m->fs.sp_offset += allocate; ++ if (ix86_decompose_address (addr, &parts) <= 0) ++ /* Decomposition failed. */ ++ return false; + +- /* Use stack_pointer_rtx for relative addressing so that code works for +- realigned stack. But this means that we need a blockage to prevent +- stores based on the frame pointer from being scheduled before. */ +- if (r10_live && eax_live) +- { +- t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax); +- emit_move_insn (gen_rtx_REG (word_mode, R10_REG), +- gen_frame_mem (word_mode, t)); +- t = plus_constant (Pmode, t, UNITS_PER_WORD); +- emit_move_insn (gen_rtx_REG (word_mode, AX_REG), +- gen_frame_mem (word_mode, t)); +- emit_insn (gen_memory_blockage ()); +- } +- else if (eax_live || r10_live) +- { +- t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax); +- emit_move_insn (gen_rtx_REG (word_mode, +- (eax_live ? AX_REG : R10_REG)), +- gen_frame_mem (word_mode, t)); +- emit_insn (gen_memory_blockage ()); +- } +- } +- gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset); ++ base = parts.base; ++ index = parts.index; ++ disp = parts.disp; ++ scale = parts.scale; ++ seg = parts.seg; + +- /* If we havn't already set up the frame pointer, do so now. */ +- if (frame_pointer_needed && !m->fs.fp_valid) ++ /* Validate base register. */ ++ if (base) + { +- insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx, +- GEN_INT (frame.stack_pointer_offset +- - frame.hard_frame_pointer_offset)); +- insn = emit_insn (insn); +- RTX_FRAME_RELATED_P (insn) = 1; +- add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL); +- +- if (m->fs.cfa_reg == stack_pointer_rtx) +- m->fs.cfa_reg = hard_frame_pointer_rtx; +- m->fs.fp_offset = frame.hard_frame_pointer_offset; +- m->fs.fp_valid = true; +- } ++ rtx reg = ix86_validate_address_register (base); + +- if (!int_registers_saved) +- ix86_emit_save_regs_using_mov (frame.reg_save_offset); +- if (!sse_registers_saved) +- ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset); +- else if (save_stub_call_needed) +- ix86_emit_outlined_ms2sysv_save (frame); ++ if (reg == NULL_RTX) ++ return false; + +- /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT +- in PROLOGUE. */ +- if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry) +- { +- rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM); +- insn = emit_insn (gen_set_got (pic)); +- RTX_FRAME_RELATED_P (insn) = 1; +- add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX); +- emit_insn (gen_prologue_use (pic)); +- /* Deleting already emmitted SET_GOT if exist and allocated to +- REAL_PIC_OFFSET_TABLE_REGNUM. */ +- ix86_elim_entry_set_got (pic); ++ if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg)) ++ || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg))) ++ /* Base is not valid. */ ++ return false; + } + +- if (crtl->drap_reg && !crtl->stack_realign_needed) ++ /* Validate index register. */ ++ if (index) + { +- /* vDRAP is setup but after reload it turns out stack realign +- isn't necessary, here we will emit prologue to setup DRAP +- without stack realign adjustment */ +- t = choose_baseaddr (0, NULL); +- emit_insn (gen_rtx_SET (crtl->drap_reg, t)); +- } +- +- /* Prevent instructions from being scheduled into register save push +- sequence when access to the redzone area is done through frame pointer. +- The offset between the frame pointer and the stack pointer is calculated +- relative to the value of the stack pointer at the end of the function +- prologue, and moving instructions that access redzone area via frame +- pointer inside push sequence violates this assumption. */ +- if (frame_pointer_needed && frame.red_zone_size) +- emit_insn (gen_memory_blockage ()); ++ rtx reg = ix86_validate_address_register (index); + +- /* SEH requires that the prologue end within 256 bytes of the start of +- the function. Prevent instruction schedules that would extend that. +- Further, prevent alloca modifications to the stack pointer from being +- combined with prologue modifications. */ +- if (TARGET_SEH) +- emit_insn (gen_prologue_use (stack_pointer_rtx)); +-} ++ if (reg == NULL_RTX) ++ return false; + +-/* Emit code to restore REG using a POP insn. */ ++ if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg)) ++ || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg))) ++ /* Index is not valid. */ ++ return false; ++ } + +-static void +-ix86_emit_restore_reg_using_pop (rtx reg) +-{ +- struct machine_function *m = cfun->machine; +- rtx_insn *insn = emit_insn (gen_pop (reg)); ++ /* Index and base should have the same mode. */ ++ if (base && index ++ && GET_MODE (base) != GET_MODE (index)) ++ return false; + +- ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset); +- m->fs.sp_offset -= UNITS_PER_WORD; ++ /* Address override works only on the (%reg) part of %fs:(%reg). */ ++ if (seg != ADDR_SPACE_GENERIC ++ && ((base && GET_MODE (base) != word_mode) ++ || (index && GET_MODE (index) != word_mode))) ++ return false; + +- if (m->fs.cfa_reg == crtl->drap_reg +- && REGNO (reg) == REGNO (crtl->drap_reg)) ++ /* Validate scale factor. */ ++ if (scale != 1) + { +- /* Previously we'd represented the CFA as an expression +- like *(%ebp - 8). We've just popped that value from +- the stack, which means we need to reset the CFA to +- the drap register. This will remain until we restore +- the stack pointer. */ +- add_reg_note (insn, REG_CFA_DEF_CFA, reg); +- RTX_FRAME_RELATED_P (insn) = 1; ++ if (!index) ++ /* Scale without index. */ ++ return false; + +- /* This means that the DRAP register is valid for addressing too. */ +- m->fs.drap_valid = true; +- return; ++ if (scale != 2 && scale != 4 && scale != 8) ++ /* Scale is not a valid multiplier. */ ++ return false; + } + +- if (m->fs.cfa_reg == stack_pointer_rtx) ++ /* Validate displacement. */ ++ if (disp) + { +- rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); +- x = gen_rtx_SET (stack_pointer_rtx, x); +- add_reg_note (insn, REG_CFA_ADJUST_CFA, x); +- RTX_FRAME_RELATED_P (insn) = 1; +- +- m->fs.cfa_offset -= UNITS_PER_WORD; +- } ++ if (GET_CODE (disp) == CONST ++ && GET_CODE (XEXP (disp, 0)) == UNSPEC ++ && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET) ++ switch (XINT (XEXP (disp, 0), 1)) ++ { ++ /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit ++ when used. While ABI specify also 32bit relocations, we ++ don't produce them at all and use IP relative instead. ++ Allow GOT in 32bit mode for both PIC and non-PIC if symbol ++ should be loaded via GOT. */ ++ case UNSPEC_GOT: ++ if (!TARGET_64BIT ++ && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0))) ++ goto is_legitimate_pic; ++ /* FALLTHRU */ ++ case UNSPEC_GOTOFF: ++ gcc_assert (flag_pic); ++ if (!TARGET_64BIT) ++ goto is_legitimate_pic; + +- /* When the frame pointer is the CFA, and we pop it, we are +- swapping back to the stack pointer as the CFA. This happens +- for stack frames that don't allocate other data, so we assume +- the stack pointer is now pointing at the return address, i.e. +- the function entry state, which makes the offset be 1 word. */ +- if (reg == hard_frame_pointer_rtx) +- { +- m->fs.fp_valid = false; +- if (m->fs.cfa_reg == hard_frame_pointer_rtx) +- { +- m->fs.cfa_reg = stack_pointer_rtx; +- m->fs.cfa_offset -= UNITS_PER_WORD; ++ /* 64bit address unspec. */ ++ return false; + +- add_reg_note (insn, REG_CFA_DEF_CFA, +- gen_rtx_PLUS (Pmode, stack_pointer_rtx, +- GEN_INT (m->fs.cfa_offset))); +- RTX_FRAME_RELATED_P (insn) = 1; +- } +- } +-} +- +-/* Emit code to restore saved registers using POP insns. */ ++ case UNSPEC_GOTPCREL: ++ if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0))) ++ goto is_legitimate_pic; ++ /* FALLTHRU */ ++ case UNSPEC_PCREL: ++ gcc_assert (flag_pic); ++ goto is_legitimate_pic; + +-static void +-ix86_emit_restore_regs_using_pop (void) +-{ +- unsigned int regno; ++ case UNSPEC_GOTTPOFF: ++ case UNSPEC_GOTNTPOFF: ++ case UNSPEC_INDNTPOFF: ++ case UNSPEC_NTPOFF: ++ case UNSPEC_DTPOFF: ++ break; + +- for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) +- if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true)) +- ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno)); +-} ++ default: ++ /* Invalid address unspec. */ ++ return false; ++ } + +-/* Emit code and notes for the LEAVE instruction. If insn is non-null, +- omits the emit and only attaches the notes. */ ++ else if (SYMBOLIC_CONST (disp) ++ && (flag_pic ++ || (TARGET_MACHO ++#if TARGET_MACHO ++ && MACHOPIC_INDIRECT ++ && !machopic_operand_p (disp) ++#endif ++ ))) ++ { + +-static void +-ix86_emit_leave (rtx_insn *insn) +-{ +- struct machine_function *m = cfun->machine; +- if (!insn) +- insn = emit_insn (ix86_gen_leave ()); ++ is_legitimate_pic: ++ if (TARGET_64BIT && (index || base)) ++ { ++ /* foo@dtpoff(%rX) is ok. */ ++ if (GET_CODE (disp) != CONST ++ || GET_CODE (XEXP (disp, 0)) != PLUS ++ || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC ++ || !CONST_INT_P (XEXP (XEXP (disp, 0), 1)) ++ || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF ++ && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF)) ++ /* Non-constant pic memory reference. */ ++ return false; ++ } ++ else if ((!TARGET_MACHO || flag_pic) ++ && ! legitimate_pic_address_disp_p (disp)) ++ /* Displacement is an invalid pic construct. */ ++ return false; ++#if TARGET_MACHO ++ else if (MACHO_DYNAMIC_NO_PIC_P ++ && !ix86_legitimate_constant_p (Pmode, disp)) ++ /* displacment must be referenced via non_lazy_pointer */ ++ return false; ++#endif + +- ix86_add_queued_cfa_restore_notes (insn); ++ /* This code used to verify that a symbolic pic displacement ++ includes the pic_offset_table_rtx register. + +- gcc_assert (m->fs.fp_valid); +- m->fs.sp_valid = true; +- m->fs.sp_realigned = false; +- m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD; +- m->fs.fp_valid = false; ++ While this is good idea, unfortunately these constructs may ++ be created by "adds using lea" optimization for incorrect ++ code like: + +- if (m->fs.cfa_reg == hard_frame_pointer_rtx) +- { +- m->fs.cfa_reg = stack_pointer_rtx; +- m->fs.cfa_offset = m->fs.sp_offset; ++ int a; ++ int foo(int i) ++ { ++ return *(&a+i); ++ } + +- add_reg_note (insn, REG_CFA_DEF_CFA, +- plus_constant (Pmode, stack_pointer_rtx, +- m->fs.sp_offset)); +- RTX_FRAME_RELATED_P (insn) = 1; ++ This code is nonsensical, but results in addressing ++ GOT table with pic_offset_table_rtx base. We can't ++ just refuse it easily, since it gets matched by ++ "addsi3" pattern, that later gets split to lea in the ++ case output register differs from input. While this ++ can be handled by separate addsi pattern for this case ++ that never results in lea, this seems to be easier and ++ correct fix for crash to disable this test. */ ++ } ++ else if (GET_CODE (disp) != LABEL_REF ++ && !CONST_INT_P (disp) ++ && (GET_CODE (disp) != CONST ++ || !ix86_legitimate_constant_p (Pmode, disp)) ++ && (GET_CODE (disp) != SYMBOL_REF ++ || !ix86_legitimate_constant_p (Pmode, disp))) ++ /* Displacement is not constant. */ ++ return false; ++ else if (TARGET_64BIT ++ && !x86_64_immediate_operand (disp, VOIDmode)) ++ /* Displacement is out of range. */ ++ return false; ++ /* In x32 mode, constant addresses are sign extended to 64bit, so ++ we have to prevent addresses from 0x80000000 to 0xffffffff. */ ++ else if (TARGET_X32 && !(index || base) ++ && CONST_INT_P (disp) ++ && val_signbit_known_set_p (SImode, INTVAL (disp))) ++ return false; + } +- ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx, +- m->fs.fp_offset); +-} +- +-/* Emit code to restore saved registers using MOV insns. +- First register is restored from CFA - CFA_OFFSET. */ +-static void +-ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset, +- bool maybe_eh_return) +-{ +- struct machine_function *m = cfun->machine; +- unsigned int regno; +- +- for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) +- if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true)) +- { +- rtx reg = gen_rtx_REG (word_mode, regno); +- rtx mem; +- rtx_insn *insn; +- +- mem = choose_baseaddr (cfa_offset, NULL); +- mem = gen_frame_mem (word_mode, mem); +- insn = emit_move_insn (reg, mem); + +- if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg)) +- { +- /* Previously we'd represented the CFA as an expression +- like *(%ebp - 8). We've just popped that value from +- the stack, which means we need to reset the CFA to +- the drap register. This will remain until we restore +- the stack pointer. */ +- add_reg_note (insn, REG_CFA_DEF_CFA, reg); +- RTX_FRAME_RELATED_P (insn) = 1; ++ /* Everything looks valid. */ ++ return true; ++} + +- /* This means that the DRAP register is valid for addressing. */ +- m->fs.drap_valid = true; +- } +- else +- ix86_add_cfa_restore_note (NULL, reg, cfa_offset); ++/* Determine if a given RTX is a valid constant address. */ + +- cfa_offset -= UNITS_PER_WORD; +- } ++bool ++constant_address_p (rtx x) ++{ ++ return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1); + } ++ ++/* Return a unique alias set for the GOT. */ + +-/* Emit code to restore saved registers using MOV insns. +- First register is restored from CFA - CFA_OFFSET. */ +-static void +-ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset, +- bool maybe_eh_return) ++alias_set_type ++ix86_GOT_alias_set (void) + { +- unsigned int regno; ++ static alias_set_type set = -1; ++ if (set == -1) ++ set = new_alias_set (); ++ return set; ++} + +- for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) +- if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true)) +- { +- rtx reg = gen_rtx_REG (V4SFmode, regno); +- rtx mem; +- unsigned int align = GET_MODE_ALIGNMENT (V4SFmode); ++/* Return a legitimate reference for ORIG (an address) using the ++ register REG. If REG is 0, a new pseudo is generated. + +- mem = choose_baseaddr (cfa_offset, &align); +- mem = gen_rtx_MEM (V4SFmode, mem); ++ There are two types of references that must be handled: + +- /* The location aligment depends upon the base register. */ +- align = MIN (GET_MODE_ALIGNMENT (V4SFmode), align); +- gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1))); +- set_mem_align (mem, align); +- emit_insn (gen_rtx_SET (reg, mem)); ++ 1. Global data references must load the address from the GOT, via ++ the PIC reg. An insn is emitted to do this load, and the reg is ++ returned. + +- ix86_add_cfa_restore_note (NULL, reg, cfa_offset); ++ 2. Static data references, constant pool addresses, and code labels ++ compute the address as an offset from the GOT, whose base is in ++ the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to ++ differentiate them from global data objects. The returned ++ address is the PIC reg + an unspec constant. + +- cfa_offset -= GET_MODE_SIZE (V4SFmode); +- } +-} ++ TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC ++ reg also appears in the address. */ + +-static void +-ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame &frame, +- bool use_call, int style) ++rtx ++legitimize_pic_address (rtx orig, rtx reg) + { +- struct machine_function *m = cfun->machine; +- const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS +- + m->call_ms2sysv_extra_regs; +- rtvec v; +- unsigned int elems_needed, align, i, vi = 0; +- rtx_insn *insn; +- rtx sym, tmp; +- rtx rsi = gen_rtx_REG (word_mode, SI_REG); +- rtx r10 = NULL_RTX; +- const struct xlogue_layout &xlogue = xlogue_layout::get_instance (); +- HOST_WIDE_INT stub_ptr_offset = xlogue.get_stub_ptr_offset (); +- HOST_WIDE_INT rsi_offset = frame.stack_realign_offset + stub_ptr_offset; +- rtx rsi_frame_load = NULL_RTX; +- HOST_WIDE_INT rsi_restore_offset = (HOST_WIDE_INT)-1; +- enum xlogue_stub stub; ++ rtx addr = orig; ++ rtx new_rtx = orig; + +- gcc_assert (!m->fs.fp_valid || frame_pointer_needed); ++#if TARGET_MACHO ++ if (TARGET_MACHO && !TARGET_64BIT) ++ { ++ if (reg == 0) ++ reg = gen_reg_rtx (Pmode); ++ /* Use the generic Mach-O PIC machinery. */ ++ return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg); ++ } ++#endif + +- /* If using a realigned stack, we should never start with padding. */ +- gcc_assert (!stack_realign_fp || !xlogue.get_stack_align_off_in ()); ++ if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES) ++ { ++ rtx tmp = legitimize_pe_coff_symbol (addr, true); ++ if (tmp) ++ return tmp; ++ } + +- /* Setup RSI as the stub's base pointer. */ +- align = GET_MODE_ALIGNMENT (V4SFmode); +- tmp = choose_baseaddr (rsi_offset, &align, SI_REG); +- gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode)); ++ if (TARGET_64BIT && legitimate_pic_address_disp_p (addr)) ++ new_rtx = addr; ++ else if ((!TARGET_64BIT ++ || /* TARGET_64BIT && */ ix86_cmodel != CM_SMALL_PIC) ++ && !TARGET_PECOFF ++ && gotoff_operand (addr, Pmode)) ++ { ++ /* This symbol may be referenced via a displacement ++ from the PIC base address (@GOTOFF). */ ++ if (GET_CODE (addr) == CONST) ++ addr = XEXP (addr, 0); + +- emit_insn (gen_rtx_SET (rsi, tmp)); ++ if (GET_CODE (addr) == PLUS) ++ { ++ new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), ++ UNSPEC_GOTOFF); ++ new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1)); ++ } ++ else ++ new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF); + +- /* Get a symbol for the stub. */ +- if (frame_pointer_needed) +- stub = use_call ? XLOGUE_STUB_RESTORE_HFP +- : XLOGUE_STUB_RESTORE_HFP_TAIL; +- else +- stub = use_call ? XLOGUE_STUB_RESTORE +- : XLOGUE_STUB_RESTORE_TAIL; +- sym = xlogue.get_stub_rtx (stub); ++ new_rtx = gen_rtx_CONST (Pmode, new_rtx); + +- elems_needed = ncregs; +- if (use_call) +- elems_needed += 1; +- else +- elems_needed += frame_pointer_needed ? 5 : 3; +- v = rtvec_alloc (elems_needed); ++ if (TARGET_64BIT) ++ new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode); + +- /* We call the epilogue stub when we need to pop incoming args or we are +- doing a sibling call as the tail. Otherwise, we will emit a jmp to the +- epilogue stub and it is the tail-call. */ +- if (use_call) +- RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym); +- else +- { +- RTVEC_ELT (v, vi++) = ret_rtx; +- RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym); +- if (frame_pointer_needed) ++ if (reg != 0) + { +- rtx rbp = gen_rtx_REG (DImode, BP_REG); +- gcc_assert (m->fs.fp_valid); +- gcc_assert (m->fs.cfa_reg == hard_frame_pointer_rtx); +- +- tmp = gen_rtx_PLUS (DImode, rbp, GEN_INT (8)); +- RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, tmp); +- RTVEC_ELT (v, vi++) = gen_rtx_SET (rbp, gen_rtx_MEM (DImode, rbp)); +- tmp = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode)); +- RTVEC_ELT (v, vi++) = gen_rtx_CLOBBER (VOIDmode, tmp); +- } ++ gcc_assert (REG_P (reg)); ++ new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx, ++ new_rtx, reg, 1, OPTAB_DIRECT); ++ } + else +- { +- /* If no hard frame pointer, we set R10 to the SP restore value. */ +- gcc_assert (!m->fs.fp_valid); +- gcc_assert (m->fs.cfa_reg == stack_pointer_rtx); +- gcc_assert (m->fs.sp_valid); +- +- r10 = gen_rtx_REG (DImode, R10_REG); +- tmp = gen_rtx_PLUS (Pmode, rsi, GEN_INT (stub_ptr_offset)); +- emit_insn (gen_rtx_SET (r10, tmp)); +- +- RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, r10); +- } ++ new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx); + } +- +- /* Generate frame load insns and restore notes. */ +- for (i = 0; i < ncregs; ++i) ++ else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0) ++ /* We can't use @GOTOFF for text labels ++ on VxWorks, see gotoff_operand. */ ++ || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF)) + { +- const xlogue_layout::reginfo &r = xlogue.get_reginfo (i); +- machine_mode mode = SSE_REGNO_P (r.regno) ? V4SFmode : word_mode; +- rtx reg, frame_load; +- +- reg = gen_rtx_REG (mode, r.regno); +- frame_load = gen_frame_load (reg, rsi, r.offset); ++ rtx tmp = legitimize_pe_coff_symbol (addr, true); ++ if (tmp) ++ return tmp; + +- /* Save RSI frame load insn & note to add last. */ +- if (r.regno == SI_REG) ++ /* For x64 PE-COFF there is no GOT table, ++ so we use address directly. */ ++ if (TARGET_64BIT && TARGET_PECOFF) + { +- gcc_assert (!rsi_frame_load); +- rsi_frame_load = frame_load; +- rsi_restore_offset = r.offset; ++ new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL); ++ new_rtx = gen_rtx_CONST (Pmode, new_rtx); ++ } ++ else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC) ++ { ++ new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), ++ UNSPEC_GOTPCREL); ++ new_rtx = gen_rtx_CONST (Pmode, new_rtx); ++ new_rtx = gen_const_mem (Pmode, new_rtx); ++ set_mem_alias_set (new_rtx, ix86_GOT_alias_set ()); + } + else + { +- RTVEC_ELT (v, vi++) = frame_load; +- ix86_add_cfa_restore_note (NULL, reg, r.offset); ++ /* This symbol must be referenced via a load ++ from the Global Offset Table (@GOT). */ ++ new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT); ++ new_rtx = gen_rtx_CONST (Pmode, new_rtx); ++ if (TARGET_64BIT) ++ new_rtx = force_reg (Pmode, new_rtx); ++ new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx); ++ new_rtx = gen_const_mem (Pmode, new_rtx); ++ set_mem_alias_set (new_rtx, ix86_GOT_alias_set ()); + } +- } +- +- /* Add RSI frame load & restore note at the end. */ +- gcc_assert (rsi_frame_load); +- gcc_assert (rsi_restore_offset != (HOST_WIDE_INT)-1); +- RTVEC_ELT (v, vi++) = rsi_frame_load; +- ix86_add_cfa_restore_note (NULL, gen_rtx_REG (DImode, SI_REG), +- rsi_restore_offset); +- +- /* Finally, for tail-call w/o a hard frame pointer, set SP to R10. */ +- if (!use_call && !frame_pointer_needed) +- { +- gcc_assert (m->fs.sp_valid); +- gcc_assert (!m->fs.sp_realigned); + +- /* At this point, R10 should point to frame.stack_realign_offset. */ +- if (m->fs.cfa_reg == stack_pointer_rtx) +- m->fs.cfa_offset += m->fs.sp_offset - frame.stack_realign_offset; +- m->fs.sp_offset = frame.stack_realign_offset; ++ new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode); + } +- +- gcc_assert (vi == (unsigned int)GET_NUM_ELEM (v)); +- tmp = gen_rtx_PARALLEL (VOIDmode, v); +- if (use_call) +- insn = emit_insn (tmp); + else + { +- insn = emit_jump_insn (tmp); +- JUMP_LABEL (insn) = ret_rtx; +- +- if (frame_pointer_needed) +- ix86_emit_leave (insn); +- else ++ if (CONST_INT_P (addr) ++ && !x86_64_immediate_operand (addr, VOIDmode)) ++ new_rtx = copy_to_suggested_reg (addr, reg, Pmode); ++ else if (GET_CODE (addr) == CONST) + { +- /* Need CFA adjust note. */ +- tmp = gen_rtx_SET (stack_pointer_rtx, r10); +- add_reg_note (insn, REG_CFA_ADJUST_CFA, tmp); +- } +- } ++ addr = XEXP (addr, 0); + +- RTX_FRAME_RELATED_P (insn) = true; +- ix86_add_queued_cfa_restore_notes (insn); ++ /* We must match stuff we generate before. Assume the only ++ unspecs that can get here are ours. Not that we could do ++ anything with them anyway.... */ ++ if (GET_CODE (addr) == UNSPEC ++ || (GET_CODE (addr) == PLUS ++ && GET_CODE (XEXP (addr, 0)) == UNSPEC)) ++ return orig; ++ gcc_assert (GET_CODE (addr) == PLUS); ++ } + +- /* If we're not doing a tail-call, we need to adjust the stack. */ +- if (use_call && m->fs.sp_valid) +- { +- HOST_WIDE_INT dealloc = m->fs.sp_offset - frame.stack_realign_offset; +- pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, +- GEN_INT (dealloc), style, +- m->fs.cfa_reg == stack_pointer_rtx); +- } +-} ++ if (GET_CODE (addr) == PLUS) ++ { ++ rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1); + +-/* Restore function stack, frame, and registers. */ ++ /* Check first to see if this is a constant ++ offset from a @GOTOFF symbol reference. */ ++ if (!TARGET_PECOFF ++ && gotoff_operand (op0, Pmode) ++ && CONST_INT_P (op1)) ++ { ++ if (!TARGET_64BIT) ++ { ++ new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), ++ UNSPEC_GOTOFF); ++ new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1); ++ new_rtx = gen_rtx_CONST (Pmode, new_rtx); + +-void +-ix86_expand_epilogue (int style) +-{ +- struct machine_function *m = cfun->machine; +- struct machine_frame_state frame_state_save = m->fs; +- bool restore_regs_via_mov; +- bool using_drap; +- bool restore_stub_is_tail = false; ++ if (reg != 0) ++ { ++ gcc_assert (REG_P (reg)); ++ new_rtx = expand_simple_binop (Pmode, PLUS, ++ pic_offset_table_rtx, ++ new_rtx, reg, 1, ++ OPTAB_DIRECT); ++ } ++ else ++ new_rtx ++ = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx); ++ } ++ else ++ { ++ if (INTVAL (op1) < -16*1024*1024 ++ || INTVAL (op1) >= 16*1024*1024) ++ { ++ if (!x86_64_immediate_operand (op1, Pmode)) ++ op1 = force_reg (Pmode, op1); + +- if (ix86_function_naked (current_function_decl)) +- { +- /* The program should not reach this point. */ +- emit_insn (gen_ud2 ()); +- return; +- } ++ new_rtx ++ = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1); ++ } ++ } ++ } ++ else ++ { ++ rtx base = legitimize_pic_address (op0, reg); ++ machine_mode mode = GET_MODE (base); ++ new_rtx ++ = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg); + +- ix86_finalize_stack_frame_flags (); +- const struct ix86_frame &frame = cfun->machine->frame; ++ if (CONST_INT_P (new_rtx)) ++ { ++ if (INTVAL (new_rtx) < -16*1024*1024 ++ || INTVAL (new_rtx) >= 16*1024*1024) ++ { ++ if (!x86_64_immediate_operand (new_rtx, mode)) ++ new_rtx = force_reg (mode, new_rtx); + +- m->fs.sp_realigned = stack_realign_fp; +- m->fs.sp_valid = stack_realign_fp +- || !frame_pointer_needed +- || crtl->sp_is_unchanging; +- gcc_assert (!m->fs.sp_valid +- || m->fs.sp_offset == frame.stack_pointer_offset); ++ new_rtx ++ = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx); ++ } ++ else ++ new_rtx = plus_constant (mode, base, INTVAL (new_rtx)); ++ } ++ else ++ { ++ /* For %rip addressing, we have to use ++ just disp32, not base nor index. */ ++ if (TARGET_64BIT ++ && (GET_CODE (base) == SYMBOL_REF ++ || GET_CODE (base) == LABEL_REF)) ++ base = force_reg (mode, base); ++ if (GET_CODE (new_rtx) == PLUS ++ && CONSTANT_P (XEXP (new_rtx, 1))) ++ { ++ base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0)); ++ new_rtx = XEXP (new_rtx, 1); ++ } ++ new_rtx = gen_rtx_PLUS (mode, base, new_rtx); ++ } ++ } ++ } ++ } ++ return new_rtx; ++} ++ ++/* Load the thread pointer. If TO_REG is true, force it into a register. */ + +- /* The FP must be valid if the frame pointer is present. */ +- gcc_assert (frame_pointer_needed == m->fs.fp_valid); +- gcc_assert (!m->fs.fp_valid +- || m->fs.fp_offset == frame.hard_frame_pointer_offset); ++static rtx ++get_thread_pointer (machine_mode tp_mode, bool to_reg) ++{ ++ rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP); + +- /* We must have *some* valid pointer to the stack frame. */ +- gcc_assert (m->fs.sp_valid || m->fs.fp_valid); ++ if (GET_MODE (tp) != tp_mode) ++ { ++ gcc_assert (GET_MODE (tp) == SImode); ++ gcc_assert (tp_mode == DImode); + +- /* The DRAP is never valid at this point. */ +- gcc_assert (!m->fs.drap_valid); ++ tp = gen_rtx_ZERO_EXTEND (tp_mode, tp); ++ } + +- /* See the comment about red zone and frame +- pointer usage in ix86_expand_prologue. */ +- if (frame_pointer_needed && frame.red_zone_size) +- emit_insn (gen_memory_blockage ()); ++ if (to_reg) ++ tp = copy_to_mode_reg (tp_mode, tp); + +- using_drap = crtl->drap_reg && crtl->stack_realign_needed; +- gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg); ++ return tp; ++} + +- /* Determine the CFA offset of the end of the red-zone. */ +- m->fs.red_zone_offset = 0; +- if (ix86_using_red_zone () && crtl->args.pops_args < 65536) ++/* Construct the SYMBOL_REF for the tls_get_addr function. */ ++ ++static GTY(()) rtx ix86_tls_symbol; ++ ++static rtx ++ix86_tls_get_addr (void) ++{ ++ if (!ix86_tls_symbol) + { +- /* The red-zone begins below return address and error code in +- exception handler. */ +- m->fs.red_zone_offset = RED_ZONE_SIZE + INCOMING_FRAME_SP_OFFSET; ++ const char *sym ++ = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT) ++ ? "___tls_get_addr" : "__tls_get_addr"); + +- /* When the register save area is in the aligned portion of +- the stack, determine the maximum runtime displacement that +- matches up with the aligned frame. */ +- if (stack_realign_drap) +- m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT +- + UNITS_PER_WORD); ++ ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym); + } + +- HOST_WIDE_INT reg_save_offset = frame.reg_save_offset; ++ if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF) ++ { ++ rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol), ++ UNSPEC_PLTOFF); ++ return gen_rtx_PLUS (Pmode, pic_offset_table_rtx, ++ gen_rtx_CONST (Pmode, unspec)); ++ } + +- /* Special care must be taken for the normal return case of a function +- using eh_return: the eax and edx registers are marked as saved, but +- not restored along this path. Adjust the save location to match. */ +- if (crtl->calls_eh_return && style != 2) +- reg_save_offset -= 2 * UNITS_PER_WORD; ++ return ix86_tls_symbol; ++} + +- /* EH_RETURN requires the use of moves to function properly. */ +- if (crtl->calls_eh_return) +- restore_regs_via_mov = true; +- /* SEH requires the use of pops to identify the epilogue. */ +- else if (TARGET_SEH) +- restore_regs_via_mov = false; +- /* If we're only restoring one register and sp cannot be used then +- using a move instruction to restore the register since it's +- less work than reloading sp and popping the register. */ +- else if (!sp_valid_at (frame.hfp_save_offset) && frame.nregs <= 1) +- restore_regs_via_mov = true; +- else if (TARGET_EPILOGUE_USING_MOVE +- && cfun->machine->use_fast_prologue_epilogue +- && (frame.nregs > 1 +- || m->fs.sp_offset != reg_save_offset)) +- restore_regs_via_mov = true; +- else if (frame_pointer_needed +- && !frame.nregs +- && m->fs.sp_offset != reg_save_offset) +- restore_regs_via_mov = true; +- else if (frame_pointer_needed +- && TARGET_USE_LEAVE +- && cfun->machine->use_fast_prologue_epilogue +- && frame.nregs == 1) +- restore_regs_via_mov = true; +- else +- restore_regs_via_mov = false; ++/* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */ + +- if (restore_regs_via_mov || frame.nsseregs) ++static GTY(()) rtx ix86_tls_module_base_symbol; ++ ++rtx ++ix86_tls_module_base (void) ++{ ++ if (!ix86_tls_module_base_symbol) + { +- /* Ensure that the entire register save area is addressable via +- the stack pointer, if we will restore SSE regs via sp. */ +- if (TARGET_64BIT +- && m->fs.sp_offset > 0x7fffffff +- && sp_valid_at (frame.stack_realign_offset + 1) +- && (frame.nsseregs + frame.nregs) != 0) +- { +- pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, +- GEN_INT (m->fs.sp_offset +- - frame.sse_reg_save_offset), +- style, +- m->fs.cfa_reg == stack_pointer_rtx); +- } +- } ++ ix86_tls_module_base_symbol ++ = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_"); + +- /* If there are any SSE registers to restore, then we have to do it +- via moves, since there's obviously no pop for SSE regs. */ +- if (frame.nsseregs) +- ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset, +- style == 2); ++ SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol) ++ |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT; ++ } + +- if (m->call_ms2sysv) +- { +- int pop_incoming_args = crtl->args.pops_args && crtl->args.size; ++ return ix86_tls_module_base_symbol; ++} + +- /* We cannot use a tail-call for the stub if: +- 1. We have to pop incoming args, +- 2. We have additional int regs to restore, or +- 3. A sibling call will be the tail-call, or +- 4. We are emitting an eh_return_internal epilogue. ++/* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is ++ false if we expect this to be used for a memory address and true if ++ we expect to load the address into a register. */ + +- TODO: Item 4 has not yet tested! ++rtx ++legitimize_tls_address (rtx x, enum tls_model model, bool for_mov) ++{ ++ rtx dest, base, off; ++ rtx pic = NULL_RTX, tp = NULL_RTX; ++ machine_mode tp_mode = Pmode; ++ int type; + +- If any of the above are true, we will call the stub rather than +- jump to it. */ +- restore_stub_is_tail = !(pop_incoming_args || frame.nregs || style != 1); +- ix86_emit_outlined_ms2sysv_restore (frame, !restore_stub_is_tail, style); +- } ++ /* Fall back to global dynamic model if tool chain cannot support local ++ dynamic. */ ++ if (TARGET_SUN_TLS && !TARGET_64BIT ++ && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM ++ && model == TLS_MODEL_LOCAL_DYNAMIC) ++ model = TLS_MODEL_GLOBAL_DYNAMIC; + +- /* If using out-of-line stub that is a tail-call, then...*/ +- if (m->call_ms2sysv && restore_stub_is_tail) +- { +- /* TODO: parinoid tests. (remove eventually) */ +- gcc_assert (m->fs.sp_valid); +- gcc_assert (!m->fs.sp_realigned); +- gcc_assert (!m->fs.fp_valid); +- gcc_assert (!m->fs.realigned); +- gcc_assert (m->fs.sp_offset == UNITS_PER_WORD); +- gcc_assert (!crtl->drap_reg); +- gcc_assert (!frame.nregs); +- } +- else if (restore_regs_via_mov) ++ switch (model) + { +- rtx t; ++ case TLS_MODEL_GLOBAL_DYNAMIC: ++ dest = gen_reg_rtx (Pmode); + +- if (frame.nregs) +- ix86_emit_restore_regs_using_mov (reg_save_offset, style == 2); ++ if (!TARGET_64BIT) ++ { ++ if (flag_pic && !TARGET_PECOFF) ++ pic = pic_offset_table_rtx; ++ else ++ { ++ pic = gen_reg_rtx (Pmode); ++ emit_insn (gen_set_got (pic)); ++ } ++ } + +- /* eh_return epilogues need %ecx added to the stack pointer. */ +- if (style == 2) ++ if (TARGET_GNU2_TLS) + { +- rtx sa = EH_RETURN_STACKADJ_RTX; +- rtx_insn *insn; ++ if (TARGET_64BIT) ++ emit_insn (gen_tls_dynamic_gnu2_64 (dest, x)); ++ else ++ emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic)); + +- /* %ecx can't be used for both DRAP register and eh_return. */ +- if (crtl->drap_reg) +- gcc_assert (REGNO (crtl->drap_reg) != CX_REG); ++ tp = get_thread_pointer (Pmode, true); ++ dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest)); + +- /* regparm nested functions don't work with eh_return. */ +- gcc_assert (!ix86_static_chain_on_stack); ++ if (GET_MODE (x) != Pmode) ++ x = gen_rtx_ZERO_EXTEND (Pmode, x); + +- if (frame_pointer_needed) ++ set_unique_reg_note (get_last_insn (), REG_EQUAL, x); ++ } ++ else ++ { ++ rtx caddr = ix86_tls_get_addr (); ++ ++ if (TARGET_64BIT) + { +- t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa); +- t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD); +- emit_insn (gen_rtx_SET (sa, t)); ++ rtx rax = gen_rtx_REG (Pmode, AX_REG); ++ rtx_insn *insns; + +- t = gen_frame_mem (Pmode, hard_frame_pointer_rtx); +- insn = emit_move_insn (hard_frame_pointer_rtx, t); ++ start_sequence (); ++ emit_call_insn ++ (ix86_gen_tls_global_dynamic_64 (rax, x, caddr)); ++ insns = get_insns (); ++ end_sequence (); + +- /* Note that we use SA as a temporary CFA, as the return +- address is at the proper place relative to it. We +- pretend this happens at the FP restore insn because +- prior to this insn the FP would be stored at the wrong +- offset relative to SA, and after this insn we have no +- other reasonable register to use for the CFA. We don't +- bother resetting the CFA to the SP for the duration of +- the return insn, unless the control flow instrumentation +- is done. In this case the SP is used later and we have +- to reset CFA to SP. */ +- add_reg_note (insn, REG_CFA_DEF_CFA, +- plus_constant (Pmode, sa, UNITS_PER_WORD)); +- ix86_add_queued_cfa_restore_notes (insn); +- add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx); +- RTX_FRAME_RELATED_P (insn) = 1; +- +- m->fs.cfa_reg = sa; +- m->fs.cfa_offset = UNITS_PER_WORD; +- m->fs.fp_valid = false; ++ if (GET_MODE (x) != Pmode) ++ x = gen_rtx_ZERO_EXTEND (Pmode, x); + +- pro_epilogue_adjust_stack (stack_pointer_rtx, sa, +- const0_rtx, style, +- flag_cf_protection); ++ RTL_CONST_CALL_P (insns) = 1; ++ emit_libcall_block (insns, dest, rax, x); + } + else +- { +- t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa); +- t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD); +- insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t)); +- ix86_add_queued_cfa_restore_notes (insn); +- +- gcc_assert (m->fs.cfa_reg == stack_pointer_rtx); +- if (m->fs.cfa_offset != UNITS_PER_WORD) +- { +- m->fs.cfa_offset = UNITS_PER_WORD; +- add_reg_note (insn, REG_CFA_DEF_CFA, +- plus_constant (Pmode, stack_pointer_rtx, +- UNITS_PER_WORD)); +- RTX_FRAME_RELATED_P (insn) = 1; +- } +- } +- m->fs.sp_offset = UNITS_PER_WORD; +- m->fs.sp_valid = true; +- m->fs.sp_realigned = false; ++ emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr)); + } +- } +- else +- { +- /* SEH requires that the function end with (1) a stack adjustment +- if necessary, (2) a sequence of pops, and (3) a return or +- jump instruction. Prevent insns from the function body from +- being scheduled into this sequence. */ +- if (TARGET_SEH) ++ break; ++ ++ case TLS_MODEL_LOCAL_DYNAMIC: ++ base = gen_reg_rtx (Pmode); ++ ++ if (!TARGET_64BIT) + { +- /* Prevent a catch region from being adjacent to the standard +- epilogue sequence. Unfortunately neither crtl->uses_eh_lsda +- nor several other flags that would be interesting to test are +- set up yet. */ +- if (flag_non_call_exceptions) +- emit_insn (gen_nops (const1_rtx)); ++ if (flag_pic) ++ pic = pic_offset_table_rtx; + else +- emit_insn (gen_blockage ()); ++ { ++ pic = gen_reg_rtx (Pmode); ++ emit_insn (gen_set_got (pic)); ++ } + } + +- /* First step is to deallocate the stack frame so that we can +- pop the registers. If the stack pointer was realigned, it needs +- to be restored now. Also do it on SEH target for very large +- frame as the emitted instructions aren't allowed by the ABI +- in epilogues. */ +- if (!m->fs.sp_valid || m->fs.sp_realigned +- || (TARGET_SEH +- && (m->fs.sp_offset - reg_save_offset +- >= SEH_MAX_FRAME_SIZE))) +- { +- pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx, +- GEN_INT (m->fs.fp_offset +- - reg_save_offset), +- style, false); +- } +- else if (m->fs.sp_offset != reg_save_offset) ++ if (TARGET_GNU2_TLS) + { +- pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, +- GEN_INT (m->fs.sp_offset +- - reg_save_offset), +- style, +- m->fs.cfa_reg == stack_pointer_rtx); +- } ++ rtx tmp = ix86_tls_module_base (); + +- ix86_emit_restore_regs_using_pop (); +- } ++ if (TARGET_64BIT) ++ emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp)); ++ else ++ emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic)); + +- /* If we used a stack pointer and haven't already got rid of it, +- then do so now. */ +- if (m->fs.fp_valid) +- { +- /* If the stack pointer is valid and pointing at the frame +- pointer store address, then we only need a pop. */ +- if (sp_valid_at (frame.hfp_save_offset) +- && m->fs.sp_offset == frame.hfp_save_offset) +- ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx); +- /* Leave results in shorter dependency chains on CPUs that are +- able to grok it fast. */ +- else if (TARGET_USE_LEAVE +- || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun)) +- || !cfun->machine->use_fast_prologue_epilogue) +- ix86_emit_leave (NULL); ++ tp = get_thread_pointer (Pmode, true); ++ set_unique_reg_note (get_last_insn (), REG_EQUAL, ++ gen_rtx_MINUS (Pmode, tmp, tp)); ++ } + else +- { +- pro_epilogue_adjust_stack (stack_pointer_rtx, +- hard_frame_pointer_rtx, +- const0_rtx, style, !using_drap); +- ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx); +- } +- } +- +- if (using_drap) +- { +- int param_ptr_offset = UNITS_PER_WORD; +- rtx_insn *insn; ++ { ++ rtx caddr = ix86_tls_get_addr (); + +- gcc_assert (stack_realign_drap); ++ if (TARGET_64BIT) ++ { ++ rtx rax = gen_rtx_REG (Pmode, AX_REG); ++ rtx_insn *insns; ++ rtx eqv; + +- if (ix86_static_chain_on_stack) +- param_ptr_offset += UNITS_PER_WORD; +- if (!call_used_regs[REGNO (crtl->drap_reg)]) +- param_ptr_offset += UNITS_PER_WORD; ++ start_sequence (); ++ emit_call_insn ++ (ix86_gen_tls_local_dynamic_base_64 (rax, caddr)); ++ insns = get_insns (); ++ end_sequence (); + +- insn = emit_insn (gen_rtx_SET +- (stack_pointer_rtx, +- gen_rtx_PLUS (Pmode, +- crtl->drap_reg, +- GEN_INT (-param_ptr_offset)))); +- m->fs.cfa_reg = stack_pointer_rtx; +- m->fs.cfa_offset = param_ptr_offset; +- m->fs.sp_offset = param_ptr_offset; +- m->fs.realigned = false; ++ /* Attach a unique REG_EQUAL, to allow the RTL optimizers to ++ share the LD_BASE result with other LD model accesses. */ ++ eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), ++ UNSPEC_TLS_LD_BASE); + +- add_reg_note (insn, REG_CFA_DEF_CFA, +- gen_rtx_PLUS (Pmode, stack_pointer_rtx, +- GEN_INT (param_ptr_offset))); +- RTX_FRAME_RELATED_P (insn) = 1; ++ RTL_CONST_CALL_P (insns) = 1; ++ emit_libcall_block (insns, base, rax, eqv); ++ } ++ else ++ emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr)); ++ } + +- if (!call_used_regs[REGNO (crtl->drap_reg)]) +- ix86_emit_restore_reg_using_pop (crtl->drap_reg); +- } ++ off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF); ++ off = gen_rtx_CONST (Pmode, off); + +- /* At this point the stack pointer must be valid, and we must have +- restored all of the registers. We may not have deallocated the +- entire stack frame. We've delayed this until now because it may +- be possible to merge the local stack deallocation with the +- deallocation forced by ix86_static_chain_on_stack. */ +- gcc_assert (m->fs.sp_valid); +- gcc_assert (!m->fs.sp_realigned); +- gcc_assert (!m->fs.fp_valid); +- gcc_assert (!m->fs.realigned); +- if (m->fs.sp_offset != UNITS_PER_WORD) +- { +- pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, +- GEN_INT (m->fs.sp_offset - UNITS_PER_WORD), +- style, true); +- } +- else +- ix86_add_queued_cfa_restore_notes (get_last_insn ()); ++ dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off)); + +- /* Sibcall epilogues don't want a return instruction. */ +- if (style == 0) +- { +- m->fs = frame_state_save; +- return; +- } ++ if (TARGET_GNU2_TLS) ++ { ++ dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp)); + +- if (cfun->machine->func_type != TYPE_NORMAL) +- emit_jump_insn (gen_interrupt_return ()); +- else if (crtl->args.pops_args && crtl->args.size) +- { +- rtx popc = GEN_INT (crtl->args.pops_args); ++ if (GET_MODE (x) != Pmode) ++ x = gen_rtx_ZERO_EXTEND (Pmode, x); + +- /* i386 can only pop 64K bytes. If asked to pop more, pop return +- address, do explicit add, and jump indirectly to the caller. */ ++ set_unique_reg_note (get_last_insn (), REG_EQUAL, x); ++ } ++ break; + +- if (crtl->args.pops_args >= 65536) ++ case TLS_MODEL_INITIAL_EXEC: ++ if (TARGET_64BIT) + { +- rtx ecx = gen_rtx_REG (SImode, CX_REG); +- rtx_insn *insn; +- +- /* There is no "pascal" calling convention in any 64bit ABI. */ +- gcc_assert (!TARGET_64BIT); +- +- insn = emit_insn (gen_pop (ecx)); +- m->fs.cfa_offset -= UNITS_PER_WORD; +- m->fs.sp_offset -= UNITS_PER_WORD; ++ if (TARGET_SUN_TLS && !TARGET_X32) ++ { ++ /* The Sun linker took the AMD64 TLS spec literally ++ and can only handle %rax as destination of the ++ initial executable code sequence. */ + +- rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); +- x = gen_rtx_SET (stack_pointer_rtx, x); +- add_reg_note (insn, REG_CFA_ADJUST_CFA, x); +- add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx)); +- RTX_FRAME_RELATED_P (insn) = 1; ++ dest = gen_reg_rtx (DImode); ++ emit_insn (gen_tls_initial_exec_64_sun (dest, x)); ++ return dest; ++ } + +- pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, +- popc, -1, true); +- emit_jump_insn (gen_simple_return_indirect_internal (ecx)); ++ /* Generate DImode references to avoid %fs:(%reg32) ++ problems and linker IE->LE relaxation bug. */ ++ tp_mode = DImode; ++ pic = NULL; ++ type = UNSPEC_GOTNTPOFF; + } +- else +- emit_jump_insn (gen_simple_return_pop_internal (popc)); +- } +- else if (!m->call_ms2sysv || !restore_stub_is_tail) +- { +- /* In case of return from EH a simple return cannot be used +- as a return address will be compared with a shadow stack +- return address. Use indirect jump instead. */ +- if (style == 2 && flag_cf_protection) ++ else if (flag_pic) + { +- /* Register used in indirect jump must be in word_mode. But +- Pmode may not be the same as word_mode for x32. */ +- rtx ecx = gen_rtx_REG (word_mode, CX_REG); +- rtx_insn *insn; +- +- insn = emit_insn (gen_pop (ecx)); +- m->fs.cfa_offset -= UNITS_PER_WORD; +- m->fs.sp_offset -= UNITS_PER_WORD; +- +- rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); +- x = gen_rtx_SET (stack_pointer_rtx, x); +- add_reg_note (insn, REG_CFA_ADJUST_CFA, x); +- add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx)); +- RTX_FRAME_RELATED_P (insn) = 1; +- +- emit_jump_insn (gen_simple_return_indirect_internal (ecx)); ++ pic = pic_offset_table_rtx; ++ type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF; ++ } ++ else if (!TARGET_ANY_GNU_TLS) ++ { ++ pic = gen_reg_rtx (Pmode); ++ emit_insn (gen_set_got (pic)); ++ type = UNSPEC_GOTTPOFF; + } + else +- emit_jump_insn (gen_simple_return_internal ()); +- } +- +- /* Restore the state back to the state from the prologue, +- so that it's correct for the next epilogue. */ +- m->fs = frame_state_save; +-} ++ { ++ pic = NULL; ++ type = UNSPEC_INDNTPOFF; ++ } + +-/* Reset from the function's potential modifications. */ ++ off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type); ++ off = gen_rtx_CONST (tp_mode, off); ++ if (pic) ++ off = gen_rtx_PLUS (tp_mode, pic, off); ++ off = gen_const_mem (tp_mode, off); ++ set_mem_alias_set (off, ix86_GOT_alias_set ()); + +-static void +-ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED) +-{ +- if (pic_offset_table_rtx +- && !ix86_use_pseudo_pic_reg ()) +- SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM); ++ if (TARGET_64BIT || TARGET_ANY_GNU_TLS) ++ { ++ base = get_thread_pointer (tp_mode, ++ for_mov || !TARGET_TLS_DIRECT_SEG_REFS); ++ off = force_reg (tp_mode, off); ++ dest = gen_rtx_PLUS (tp_mode, base, off); ++ if (tp_mode != Pmode) ++ dest = convert_to_mode (Pmode, dest, 1); ++ } ++ else ++ { ++ base = get_thread_pointer (Pmode, true); ++ dest = gen_reg_rtx (Pmode); ++ emit_insn (ix86_gen_sub3 (dest, base, off)); ++ } ++ break; + +- if (TARGET_MACHO) +- { +- rtx_insn *insn = get_last_insn (); +- rtx_insn *deleted_debug_label = NULL; ++ case TLS_MODEL_LOCAL_EXEC: ++ off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), ++ (TARGET_64BIT || TARGET_ANY_GNU_TLS) ++ ? UNSPEC_NTPOFF : UNSPEC_TPOFF); ++ off = gen_rtx_CONST (Pmode, off); + +- /* Mach-O doesn't support labels at the end of objects, so if +- it looks like we might want one, take special action. +- First, collect any sequence of deleted debug labels. */ +- while (insn +- && NOTE_P (insn) +- && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL) ++ if (TARGET_64BIT || TARGET_ANY_GNU_TLS) + { +- /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL +- notes only, instead set their CODE_LABEL_NUMBER to -1, +- otherwise there would be code generation differences +- in between -g and -g0. */ +- if (NOTE_P (insn) && NOTE_KIND (insn) +- == NOTE_INSN_DELETED_DEBUG_LABEL) +- deleted_debug_label = insn; +- insn = PREV_INSN (insn); ++ base = get_thread_pointer (Pmode, ++ for_mov || !TARGET_TLS_DIRECT_SEG_REFS); ++ return gen_rtx_PLUS (Pmode, base, off); ++ } ++ else ++ { ++ base = get_thread_pointer (Pmode, true); ++ dest = gen_reg_rtx (Pmode); ++ emit_insn (ix86_gen_sub3 (dest, base, off)); + } ++ break; + +- /* If we have: +- label: +- barrier +- then this needs to be detected, so skip past the barrier. */ ++ default: ++ gcc_unreachable (); ++ } + +- if (insn && BARRIER_P (insn)) +- insn = PREV_INSN (insn); ++ return dest; ++} + +- /* Up to now we've only seen notes or barriers. */ +- if (insn) ++/* Return true if OP refers to a TLS address. */ ++bool ++ix86_tls_address_pattern_p (rtx op) ++{ ++ subrtx_var_iterator::array_type array; ++ FOR_EACH_SUBRTX_VAR (iter, array, op, ALL) ++ { ++ rtx op = *iter; ++ if (MEM_P (op)) + { +- if (LABEL_P (insn) +- || (NOTE_P (insn) +- && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)) +- /* Trailing label. */ +- fputs ("\tnop\n", file); +- else if (cfun && ! cfun->is_thunk) ++ rtx *x = &XEXP (op, 0); ++ while (GET_CODE (*x) == PLUS) + { +- /* See if we have a completely empty function body, skipping +- the special case of the picbase thunk emitted as asm. */ +- while (insn && ! INSN_P (insn)) +- insn = PREV_INSN (insn); +- /* If we don't find any insns, we've got an empty function body; +- I.e. completely empty - without a return or branch. This is +- taken as the case where a function body has been removed +- because it contains an inline __builtin_unreachable(). GCC +- declares that reaching __builtin_unreachable() means UB so +- we're not obliged to do anything special; however, we want +- non-zero-sized function bodies. To meet this, and help the +- user out, let's trap the case. */ +- if (insn == NULL) +- fputs ("\tud2\n", file); ++ int i; ++ for (i = 0; i < 2; i++) ++ { ++ rtx u = XEXP (*x, i); ++ if (GET_CODE (u) == ZERO_EXTEND) ++ u = XEXP (u, 0); ++ if (GET_CODE (u) == UNSPEC ++ && XINT (u, 1) == UNSPEC_TP) ++ return true; ++ } ++ x = &XEXP (*x, 0); + } ++ ++ iter.skip_subrtxes (); + } +- else if (deleted_debug_label) +- for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn)) +- if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL) +- CODE_LABEL_NUMBER (insn) = -1; + } +-} + +-/* Return a scratch register to use in the split stack prologue. The +- split stack prologue is used for -fsplit-stack. It is the first +- instructions in the function, even before the regular prologue. +- The scratch register can be any caller-saved register which is not +- used for parameters or for the static chain. */ ++ return false; ++} + +-static unsigned int +-split_stack_prologue_scratch_regno (void) ++/* Rewrite *LOC so that it refers to a default TLS address space. */ ++void ++ix86_rewrite_tls_address_1 (rtx *loc) + { +- if (TARGET_64BIT) +- return R11_REG; +- else ++ subrtx_ptr_iterator::array_type array; ++ FOR_EACH_SUBRTX_PTR (iter, array, loc, ALL) + { +- bool is_fastcall, is_thiscall; +- int regparm; +- +- is_fastcall = (lookup_attribute ("fastcall", +- TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl))) +- != NULL); +- is_thiscall = (lookup_attribute ("thiscall", +- TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl))) +- != NULL); +- regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl); +- +- if (is_fastcall) +- { +- if (DECL_STATIC_CHAIN (cfun->decl)) +- { +- sorry ("%<-fsplit-stack%> does not support fastcall with " +- "nested function"); +- return INVALID_REGNUM; +- } +- return AX_REG; +- } +- else if (is_thiscall) +- { +- if (!DECL_STATIC_CHAIN (cfun->decl)) +- return DX_REG; +- return AX_REG; +- } +- else if (regparm < 3) ++ rtx *loc = *iter; ++ if (MEM_P (*loc)) + { +- if (!DECL_STATIC_CHAIN (cfun->decl)) +- return CX_REG; +- else ++ rtx addr = XEXP (*loc, 0); ++ rtx *x = &addr; ++ while (GET_CODE (*x) == PLUS) + { +- if (regparm >= 2) ++ int i; ++ for (i = 0; i < 2; i++) + { +- sorry ("%<-fsplit-stack%> does not support 2 register " +- "parameters for a nested function"); +- return INVALID_REGNUM; ++ rtx u = XEXP (*x, i); ++ if (GET_CODE (u) == ZERO_EXTEND) ++ u = XEXP (u, 0); ++ if (GET_CODE (u) == UNSPEC ++ && XINT (u, 1) == UNSPEC_TP) ++ { ++ addr_space_t as = DEFAULT_TLS_SEG_REG; ++ ++ *x = XEXP (*x, 1 - i); ++ ++ *loc = replace_equiv_address_nv (*loc, addr, true); ++ set_mem_addr_space (*loc, as); ++ return; ++ } + } +- return DX_REG; ++ x = &XEXP (*x, 0); + } +- } +- else +- { +- /* FIXME: We could make this work by pushing a register +- around the addition and comparison. */ +- sorry ("%<-fsplit-stack%> does not support 3 register parameters"); +- return INVALID_REGNUM; ++ ++ iter.skip_subrtxes (); + } + } + } + +-/* A SYMBOL_REF for the function which allocates new stackspace for +- -fsplit-stack. */ +- +-static GTY(()) rtx split_stack_fn; +- +-/* A SYMBOL_REF for the more stack function when using the large +- model. */ +- +-static GTY(()) rtx split_stack_fn_large; ++/* Rewrite instruction pattern involvning TLS address ++ so that it refers to a default TLS address space. */ ++rtx ++ix86_rewrite_tls_address (rtx pattern) ++{ ++ pattern = copy_insn (pattern); ++ ix86_rewrite_tls_address_1 (&pattern); ++ return pattern; ++} + +-/* Return location of the stack guard value in the TLS block. */ ++/* Create or return the unique __imp_DECL dllimport symbol corresponding ++ to symbol DECL if BEIMPORT is true. Otherwise create or return the ++ unique refptr-DECL symbol corresponding to symbol DECL. */ + +-rtx +-ix86_split_stack_guard (void) ++struct dllimport_hasher : ggc_cache_ptr_hash + { +- int offset; +- addr_space_t as = DEFAULT_TLS_SEG_REG; +- rtx r; ++ static inline hashval_t hash (tree_map *m) { return m->hash; } ++ static inline bool ++ equal (tree_map *a, tree_map *b) ++ { ++ return a->base.from == b->base.from; ++ } + +- gcc_assert (flag_split_stack); ++ static int ++ keep_cache_entry (tree_map *&m) ++ { ++ return ggc_marked_p (m->base.from); ++ } ++}; + +-#ifdef TARGET_THREAD_SPLIT_STACK_OFFSET +- offset = TARGET_THREAD_SPLIT_STACK_OFFSET; +-#else +- gcc_unreachable (); ++static GTY((cache)) hash_table *dllimport_map; ++ ++static tree ++get_dllimport_decl (tree decl, bool beimport) ++{ ++ struct tree_map *h, in; ++ const char *name; ++ const char *prefix; ++ size_t namelen, prefixlen; ++ char *imp_name; ++ tree to; ++ rtx rtl; ++ ++ if (!dllimport_map) ++ dllimport_map = hash_table::create_ggc (512); ++ ++ in.hash = htab_hash_pointer (decl); ++ in.base.from = decl; ++ tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT); ++ h = *loc; ++ if (h) ++ return h->to; ++ ++ *loc = h = ggc_alloc (); ++ h->hash = in.hash; ++ h->base.from = decl; ++ h->to = to = build_decl (DECL_SOURCE_LOCATION (decl), ++ VAR_DECL, NULL, ptr_type_node); ++ DECL_ARTIFICIAL (to) = 1; ++ DECL_IGNORED_P (to) = 1; ++ DECL_EXTERNAL (to) = 1; ++ TREE_READONLY (to) = 1; ++ ++ name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); ++ name = targetm.strip_name_encoding (name); ++ if (beimport) ++ prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0 ++ ? "*__imp_" : "*__imp__"; ++ else ++ prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr."; ++ namelen = strlen (name); ++ prefixlen = strlen (prefix); ++ imp_name = (char *) alloca (namelen + prefixlen + 1); ++ memcpy (imp_name, prefix, prefixlen); ++ memcpy (imp_name + prefixlen, name, namelen + 1); ++ ++ name = ggc_alloc_string (imp_name, namelen + prefixlen); ++ rtl = gen_rtx_SYMBOL_REF (Pmode, name); ++ SET_SYMBOL_REF_DECL (rtl, to); ++ SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR; ++ if (!beimport) ++ { ++ SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL; ++#ifdef SUB_TARGET_RECORD_STUB ++ SUB_TARGET_RECORD_STUB (name); + #endif ++ } + +- r = GEN_INT (offset); +- r = gen_const_mem (Pmode, r); +- set_mem_addr_space (r, as); ++ rtl = gen_const_mem (Pmode, rtl); ++ set_mem_alias_set (rtl, ix86_GOT_alias_set ()); + +- return r; ++ SET_DECL_RTL (to, rtl); ++ SET_DECL_ASSEMBLER_NAME (to, get_identifier (name)); ++ ++ return to; + } + +-/* Handle -fsplit-stack. These are the first instructions in the +- function, even before the regular prologue. */ ++/* Expand SYMBOL into its corresponding far-address symbol. ++ WANT_REG is true if we require the result be a register. */ + +-void +-ix86_expand_split_stack_prologue (void) ++static rtx ++legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg) + { +- HOST_WIDE_INT allocate; +- unsigned HOST_WIDE_INT args_size; +- rtx_code_label *label; +- rtx limit, current, allocate_rtx, call_fusage; +- rtx_insn *call_insn; +- rtx scratch_reg = NULL_RTX; +- rtx_code_label *varargs_label = NULL; +- rtx fn; ++ tree imp_decl; ++ rtx x; + +- gcc_assert (flag_split_stack && reload_completed); ++ gcc_assert (SYMBOL_REF_DECL (symbol)); ++ imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false); + +- ix86_finalize_stack_frame_flags (); +- struct ix86_frame &frame = cfun->machine->frame; +- allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET; ++ x = DECL_RTL (imp_decl); ++ if (want_reg) ++ x = force_reg (Pmode, x); ++ return x; ++} + +- /* This is the label we will branch to if we have enough stack +- space. We expect the basic block reordering pass to reverse this +- branch if optimizing, so that we branch in the unlikely case. */ +- label = gen_label_rtx (); ++/* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is ++ true if we require the result be a register. */ + +- /* We need to compare the stack pointer minus the frame size with +- the stack boundary in the TCB. The stack boundary always gives +- us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we +- can compare directly. Otherwise we need to do an addition. */ ++static rtx ++legitimize_dllimport_symbol (rtx symbol, bool want_reg) ++{ ++ tree imp_decl; ++ rtx x; + +- limit = ix86_split_stack_guard (); ++ gcc_assert (SYMBOL_REF_DECL (symbol)); ++ imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true); + +- if (allocate < SPLIT_STACK_AVAILABLE) +- current = stack_pointer_rtx; +- else +- { +- unsigned int scratch_regno; +- rtx offset; ++ x = DECL_RTL (imp_decl); ++ if (want_reg) ++ x = force_reg (Pmode, x); ++ return x; ++} + +- /* We need a scratch register to hold the stack pointer minus +- the required frame size. Since this is the very start of the +- function, the scratch register can be any caller-saved +- register which is not used for parameters. */ +- offset = GEN_INT (- allocate); +- scratch_regno = split_stack_prologue_scratch_regno (); +- if (scratch_regno == INVALID_REGNUM) +- return; +- scratch_reg = gen_rtx_REG (Pmode, scratch_regno); +- if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode)) +- { +- /* We don't use ix86_gen_add3 in this case because it will +- want to split to lea, but when not optimizing the insn +- will not be split after this point. */ +- emit_insn (gen_rtx_SET (scratch_reg, +- gen_rtx_PLUS (Pmode, stack_pointer_rtx, +- offset))); +- } +- else ++/* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG ++ is true if we require the result be a register. */ ++ ++rtx ++legitimize_pe_coff_symbol (rtx addr, bool inreg) ++{ ++ if (!TARGET_PECOFF) ++ return NULL_RTX; ++ ++ if (TARGET_DLLIMPORT_DECL_ATTRIBUTES) ++ { ++ if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr)) ++ return legitimize_dllimport_symbol (addr, inreg); ++ if (GET_CODE (addr) == CONST ++ && GET_CODE (XEXP (addr, 0)) == PLUS ++ && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF ++ && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0))) + { +- emit_move_insn (scratch_reg, offset); +- emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg, +- stack_pointer_rtx)); ++ rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg); ++ return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1)); + } +- current = scratch_reg; + } + +- ix86_expand_branch (GEU, current, limit, label); +- rtx_insn *jump_insn = get_last_insn (); +- JUMP_LABEL (jump_insn) = label; +- +- /* Mark the jump as very likely to be taken. */ +- add_reg_br_prob_note (jump_insn, profile_probability::very_likely ()); ++ if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC) ++ return NULL_RTX; ++ if (GET_CODE (addr) == SYMBOL_REF ++ && !is_imported_p (addr) ++ && SYMBOL_REF_EXTERNAL_P (addr) ++ && SYMBOL_REF_DECL (addr)) ++ return legitimize_pe_coff_extern_decl (addr, inreg); + +- if (split_stack_fn == NULL_RTX) ++ if (GET_CODE (addr) == CONST ++ && GET_CODE (XEXP (addr, 0)) == PLUS ++ && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF ++ && !is_imported_p (XEXP (XEXP (addr, 0), 0)) ++ && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0)) ++ && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0))) + { +- split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack"); +- SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL; ++ rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg); ++ return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1)); + } +- fn = split_stack_fn; ++ return NULL_RTX; ++} + +- /* Get more stack space. We pass in the desired stack space and the +- size of the arguments to copy to the new stack. In 32-bit mode +- we push the parameters; __morestack will return on a new stack +- anyhow. In 64-bit mode we pass the parameters in r10 and +- r11. */ +- allocate_rtx = GEN_INT (allocate); +- args_size = crtl->args.size >= 0 ? (HOST_WIDE_INT) crtl->args.size : 0; +- call_fusage = NULL_RTX; +- rtx pop = NULL_RTX; +- if (TARGET_64BIT) +- { +- rtx reg10, reg11; ++/* Try machine-dependent ways of modifying an illegitimate address ++ to be legitimate. If we find one, return the new, valid address. ++ This macro is used in only one place: `memory_address' in explow.c. + +- reg10 = gen_rtx_REG (Pmode, R10_REG); +- reg11 = gen_rtx_REG (Pmode, R11_REG); ++ OLDX is the address as it was before break_out_memory_refs was called. ++ In some cases it is useful to look at this to decide what needs to be done. + +- /* If this function uses a static chain, it will be in %r10. +- Preserve it across the call to __morestack. */ +- if (DECL_STATIC_CHAIN (cfun->decl)) +- { +- rtx rax; ++ It is always safe for this macro to do nothing. It exists to recognize ++ opportunities to optimize the output. + +- rax = gen_rtx_REG (word_mode, AX_REG); +- emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG)); +- use_reg (&call_fusage, rax); +- } +- +- if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC) +- && !TARGET_PECOFF) +- { +- HOST_WIDE_INT argval; +- +- gcc_assert (Pmode == DImode); +- /* When using the large model we need to load the address +- into a register, and we've run out of registers. So we +- switch to a different calling convention, and we call a +- different function: __morestack_large. We pass the +- argument size in the upper 32 bits of r10 and pass the +- frame size in the lower 32 bits. */ +- gcc_assert ((allocate & HOST_WIDE_INT_C (0xffffffff)) == allocate); +- gcc_assert ((args_size & 0xffffffff) == args_size); +- +- if (split_stack_fn_large == NULL_RTX) +- { +- split_stack_fn_large +- = gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model"); +- SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL; +- } +- if (ix86_cmodel == CM_LARGE_PIC) +- { +- rtx_code_label *label; +- rtx x; +- +- label = gen_label_rtx (); +- emit_label (label); +- LABEL_PRESERVE_P (label) = 1; +- emit_insn (gen_set_rip_rex64 (reg10, label)); +- emit_insn (gen_set_got_offset_rex64 (reg11, label)); +- emit_insn (ix86_gen_add3 (reg10, reg10, reg11)); +- x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large), +- UNSPEC_GOT); +- x = gen_rtx_CONST (Pmode, x); +- emit_move_insn (reg11, x); +- x = gen_rtx_PLUS (Pmode, reg10, reg11); +- x = gen_const_mem (Pmode, x); +- emit_move_insn (reg11, x); +- } +- else +- emit_move_insn (reg11, split_stack_fn_large); ++ For the 80386, we handle X+REG by loading X into a register R and ++ using R+REG. R will go in a general reg and indexing will be used. ++ However, if REG is a broken-out memory address or multiplication, ++ nothing needs to be done because REG can certainly go in a general reg. + +- fn = reg11; ++ When -fpic is used, special handling is needed for symbolic references. ++ See comments by legitimize_pic_address in i386.c for details. */ + +- argval = ((args_size << 16) << 16) + allocate; +- emit_move_insn (reg10, GEN_INT (argval)); +- } +- else +- { +- emit_move_insn (reg10, allocate_rtx); +- emit_move_insn (reg11, GEN_INT (args_size)); +- use_reg (&call_fusage, reg11); +- } ++static rtx ++ix86_legitimize_address (rtx x, rtx, machine_mode mode) ++{ ++ bool changed = false; ++ unsigned log; + +- use_reg (&call_fusage, reg10); ++ log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0; ++ if (log) ++ return legitimize_tls_address (x, (enum tls_model) log, false); ++ if (GET_CODE (x) == CONST ++ && GET_CODE (XEXP (x, 0)) == PLUS ++ && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF ++ && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)))) ++ { ++ rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), ++ (enum tls_model) log, false); ++ return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1)); + } +- else ++ ++ if (TARGET_DLLIMPORT_DECL_ATTRIBUTES) + { +- rtx_insn *insn = emit_insn (gen_push (GEN_INT (args_size))); +- add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (UNITS_PER_WORD)); +- insn = emit_insn (gen_push (allocate_rtx)); +- add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (2 * UNITS_PER_WORD)); +- pop = GEN_INT (2 * UNITS_PER_WORD); ++ rtx tmp = legitimize_pe_coff_symbol (x, true); ++ if (tmp) ++ return tmp; + } +- call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn), +- GEN_INT (UNITS_PER_WORD), constm1_rtx, +- pop, false); +- add_function_usage_to (call_insn, call_fusage); +- if (!TARGET_64BIT) +- add_reg_note (call_insn, REG_ARGS_SIZE, GEN_INT (0)); +- /* Indicate that this function can't jump to non-local gotos. */ +- make_reg_eh_region_note_nothrow_nononlocal (call_insn); + +- /* In order to make call/return prediction work right, we now need +- to execute a return instruction. See +- libgcc/config/i386/morestack.S for the details on how this works. ++ if (flag_pic && SYMBOLIC_CONST (x)) ++ return legitimize_pic_address (x, 0); + +- For flow purposes gcc must not see this as a return +- instruction--we need control flow to continue at the subsequent +- label. Therefore, we use an unspec. */ +- gcc_assert (crtl->args.pops_args < 65536); +- rtx_insn *ret_insn +- = emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args))); ++#if TARGET_MACHO ++ if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x)) ++ return machopic_indirect_data_reference (x, 0); ++#endif + +- if ((flag_cf_protection & CF_BRANCH)) ++ /* Canonicalize shifts by 0, 1, 2, 3 into multiply */ ++ if (GET_CODE (x) == ASHIFT ++ && CONST_INT_P (XEXP (x, 1)) ++ && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4) + { +- /* Insert ENDBR since __morestack will jump back here via indirect +- call. */ +- rtx cet_eb = gen_nop_endbr (); +- emit_insn_after (cet_eb, ret_insn); ++ changed = true; ++ log = INTVAL (XEXP (x, 1)); ++ x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)), ++ GEN_INT (1 << log)); + } + +- /* If we are in 64-bit mode and this function uses a static chain, +- we saved %r10 in %rax before calling _morestack. */ +- if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl)) +- emit_move_insn (gen_rtx_REG (word_mode, R10_REG), +- gen_rtx_REG (word_mode, AX_REG)); +- +- /* If this function calls va_start, we need to store a pointer to +- the arguments on the old stack, because they may not have been +- all copied to the new stack. At this point the old stack can be +- found at the frame pointer value used by __morestack, because +- __morestack has set that up before calling back to us. Here we +- store that pointer in a scratch register, and in +- ix86_expand_prologue we store the scratch register in a stack +- slot. */ +- if (cfun->machine->split_stack_varargs_pointer != NULL_RTX) ++ if (GET_CODE (x) == PLUS) + { +- unsigned int scratch_regno; +- rtx frame_reg; +- int words; ++ /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */ + +- scratch_regno = split_stack_prologue_scratch_regno (); +- scratch_reg = gen_rtx_REG (Pmode, scratch_regno); +- frame_reg = gen_rtx_REG (Pmode, BP_REG); ++ if (GET_CODE (XEXP (x, 0)) == ASHIFT ++ && CONST_INT_P (XEXP (XEXP (x, 0), 1)) ++ && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4) ++ { ++ changed = true; ++ log = INTVAL (XEXP (XEXP (x, 0), 1)); ++ XEXP (x, 0) = gen_rtx_MULT (Pmode, ++ force_reg (Pmode, XEXP (XEXP (x, 0), 0)), ++ GEN_INT (1 << log)); ++ } + +- /* 64-bit: +- fp -> old fp value +- return address within this function +- return address of caller of this function +- stack arguments +- So we add three words to get to the stack arguments. ++ if (GET_CODE (XEXP (x, 1)) == ASHIFT ++ && CONST_INT_P (XEXP (XEXP (x, 1), 1)) ++ && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4) ++ { ++ changed = true; ++ log = INTVAL (XEXP (XEXP (x, 1), 1)); ++ XEXP (x, 1) = gen_rtx_MULT (Pmode, ++ force_reg (Pmode, XEXP (XEXP (x, 1), 0)), ++ GEN_INT (1 << log)); ++ } + +- 32-bit: +- fp -> old fp value +- return address within this function +- first argument to __morestack +- second argument to __morestack +- return address of caller of this function +- stack arguments +- So we add five words to get to the stack arguments. +- */ +- words = TARGET_64BIT ? 3 : 5; +- emit_insn (gen_rtx_SET (scratch_reg, +- gen_rtx_PLUS (Pmode, frame_reg, +- GEN_INT (words * UNITS_PER_WORD)))); ++ /* Put multiply first if it isn't already. */ ++ if (GET_CODE (XEXP (x, 1)) == MULT) ++ { ++ std::swap (XEXP (x, 0), XEXP (x, 1)); ++ changed = true; ++ } + +- varargs_label = gen_label_rtx (); +- emit_jump_insn (gen_jump (varargs_label)); +- JUMP_LABEL (get_last_insn ()) = varargs_label; ++ /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const))) ++ into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be ++ created by virtual register instantiation, register elimination, and ++ similar optimizations. */ ++ if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS) ++ { ++ changed = true; ++ x = gen_rtx_PLUS (Pmode, ++ gen_rtx_PLUS (Pmode, XEXP (x, 0), ++ XEXP (XEXP (x, 1), 0)), ++ XEXP (XEXP (x, 1), 1)); ++ } + +- emit_barrier (); +- } ++ /* Canonicalize ++ (plus (plus (mult (reg) (const)) (plus (reg) (const))) const) ++ into (plus (plus (mult (reg) (const)) (reg)) (const)). */ ++ else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS ++ && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT ++ && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS ++ && CONSTANT_P (XEXP (x, 1))) ++ { ++ rtx constant; ++ rtx other = NULL_RTX; + +- emit_label (label); +- LABEL_NUSES (label) = 1; ++ if (CONST_INT_P (XEXP (x, 1))) ++ { ++ constant = XEXP (x, 1); ++ other = XEXP (XEXP (XEXP (x, 0), 1), 1); ++ } ++ else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1))) ++ { ++ constant = XEXP (XEXP (XEXP (x, 0), 1), 1); ++ other = XEXP (x, 1); ++ } ++ else ++ constant = 0; + +- /* If this function calls va_start, we now have to set the scratch +- register for the case where we do not call __morestack. In this +- case we need to set it based on the stack pointer. */ +- if (cfun->machine->split_stack_varargs_pointer != NULL_RTX) +- { +- emit_insn (gen_rtx_SET (scratch_reg, +- gen_rtx_PLUS (Pmode, stack_pointer_rtx, +- GEN_INT (UNITS_PER_WORD)))); ++ if (constant) ++ { ++ changed = true; ++ x = gen_rtx_PLUS (Pmode, ++ gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0), ++ XEXP (XEXP (XEXP (x, 0), 1), 0)), ++ plus_constant (Pmode, other, ++ INTVAL (constant))); ++ } ++ } + +- emit_label (varargs_label); +- LABEL_NUSES (varargs_label) = 1; +- } +-} +- +-/* We may have to tell the dataflow pass that the split stack prologue +- is initializing a scratch register. */ +- +-static void +-ix86_live_on_entry (bitmap regs) +-{ +- if (cfun->machine->split_stack_varargs_pointer != NULL_RTX) +- { +- gcc_assert (flag_split_stack); +- bitmap_set_bit (regs, split_stack_prologue_scratch_regno ()); +- } +-} +- +-/* Extract the parts of an RTL expression that is a valid memory address +- for an instruction. Return 0 if the structure of the address is +- grossly off. Return -1 if the address contains ASHIFT, so it is not +- strictly valid, but still used for computing length of lea instruction. */ +- +-int +-ix86_decompose_address (rtx addr, struct ix86_address *out) +-{ +- rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX; +- rtx base_reg, index_reg; +- HOST_WIDE_INT scale = 1; +- rtx scale_rtx = NULL_RTX; +- rtx tmp; +- int retval = 1; +- addr_space_t seg = ADDR_SPACE_GENERIC; ++ if (changed && ix86_legitimate_address_p (mode, x, false)) ++ return x; + +- /* Allow zero-extended SImode addresses, +- they will be emitted with addr32 prefix. */ +- if (TARGET_64BIT && GET_MODE (addr) == DImode) +- { +- if (GET_CODE (addr) == ZERO_EXTEND +- && GET_MODE (XEXP (addr, 0)) == SImode) +- { +- addr = XEXP (addr, 0); +- if (CONST_INT_P (addr)) +- return 0; +- } +- else if (GET_CODE (addr) == AND +- && const_32bit_mask (XEXP (addr, 1), DImode)) ++ if (GET_CODE (XEXP (x, 0)) == MULT) + { +- addr = lowpart_subreg (SImode, XEXP (addr, 0), DImode); +- if (addr == NULL_RTX) +- return 0; +- +- if (CONST_INT_P (addr)) +- return 0; ++ changed = true; ++ XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0)); + } +- } + +- /* Allow SImode subregs of DImode addresses, +- they will be emitted with addr32 prefix. */ +- if (TARGET_64BIT && GET_MODE (addr) == SImode) +- { +- if (SUBREG_P (addr) +- && GET_MODE (SUBREG_REG (addr)) == DImode) ++ if (GET_CODE (XEXP (x, 1)) == MULT) + { +- addr = SUBREG_REG (addr); +- if (CONST_INT_P (addr)) +- return 0; ++ changed = true; ++ XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1)); + } +- } + +- if (REG_P (addr)) +- base = addr; +- else if (SUBREG_P (addr)) +- { +- if (REG_P (SUBREG_REG (addr))) +- base = addr; +- else +- return 0; +- } +- else if (GET_CODE (addr) == PLUS) +- { +- rtx addends[4], op; +- int n = 0, i; ++ if (changed ++ && REG_P (XEXP (x, 1)) ++ && REG_P (XEXP (x, 0))) ++ return x; + +- op = addr; +- do ++ if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1))) + { +- if (n >= 4) +- return 0; +- addends[n++] = XEXP (op, 1); +- op = XEXP (op, 0); ++ changed = true; ++ x = legitimize_pic_address (x, 0); + } +- while (GET_CODE (op) == PLUS); +- if (n >= 4) +- return 0; +- addends[n] = op; + +- for (i = n; i >= 0; --i) ++ if (changed && ix86_legitimate_address_p (mode, x, false)) ++ return x; ++ ++ if (REG_P (XEXP (x, 0))) + { +- op = addends[i]; +- switch (GET_CODE (op)) ++ rtx temp = gen_reg_rtx (Pmode); ++ rtx val = force_operand (XEXP (x, 1), temp); ++ if (val != temp) + { +- case MULT: +- if (index) +- return 0; +- index = XEXP (op, 0); +- scale_rtx = XEXP (op, 1); +- break; +- +- case ASHIFT: +- if (index) +- return 0; +- index = XEXP (op, 0); +- tmp = XEXP (op, 1); +- if (!CONST_INT_P (tmp)) +- return 0; +- scale = INTVAL (tmp); +- if ((unsigned HOST_WIDE_INT) scale > 3) +- return 0; +- scale = 1 << scale; +- break; +- +- case ZERO_EXTEND: +- op = XEXP (op, 0); +- if (GET_CODE (op) != UNSPEC) +- return 0; +- /* FALLTHRU */ +- +- case UNSPEC: +- if (XINT (op, 1) == UNSPEC_TP +- && TARGET_TLS_DIRECT_SEG_REFS +- && seg == ADDR_SPACE_GENERIC) +- seg = DEFAULT_TLS_SEG_REG; +- else +- return 0; +- break; +- +- case SUBREG: +- if (!REG_P (SUBREG_REG (op))) +- return 0; +- /* FALLTHRU */ +- +- case REG: +- if (!base) +- base = op; +- else if (!index) +- index = op; +- else +- return 0; +- break; ++ val = convert_to_mode (Pmode, val, 1); ++ emit_move_insn (temp, val); ++ } + +- case CONST: +- case CONST_INT: +- case SYMBOL_REF: +- case LABEL_REF: +- if (disp) +- return 0; +- disp = op; +- break; ++ XEXP (x, 1) = temp; ++ return x; ++ } + +- default: +- return 0; ++ else if (REG_P (XEXP (x, 1))) ++ { ++ rtx temp = gen_reg_rtx (Pmode); ++ rtx val = force_operand (XEXP (x, 0), temp); ++ if (val != temp) ++ { ++ val = convert_to_mode (Pmode, val, 1); ++ emit_move_insn (temp, val); + } ++ ++ XEXP (x, 0) = temp; ++ return x; + } + } +- else if (GET_CODE (addr) == MULT) +- { +- index = XEXP (addr, 0); /* index*scale */ +- scale_rtx = XEXP (addr, 1); +- } +- else if (GET_CODE (addr) == ASHIFT) +- { +- /* We're called for lea too, which implements ashift on occasion. */ +- index = XEXP (addr, 0); +- tmp = XEXP (addr, 1); +- if (!CONST_INT_P (tmp)) +- return 0; +- scale = INTVAL (tmp); +- if ((unsigned HOST_WIDE_INT) scale > 3) +- return 0; +- scale = 1 << scale; +- retval = -1; +- } +- else +- disp = addr; /* displacement */ + +- if (index) +- { +- if (REG_P (index)) +- ; +- else if (SUBREG_P (index) +- && REG_P (SUBREG_REG (index))) +- ; +- else +- return 0; +- } ++ return x; ++} ++ ++/* Print an integer constant expression in assembler syntax. Addition ++ and subtraction are the only arithmetic that may appear in these ++ expressions. FILE is the stdio stream to write to, X is the rtx, and ++ CODE is the operand print code from the output string. */ + +- /* Extract the integral value of scale. */ +- if (scale_rtx) ++static void ++output_pic_addr_const (FILE *file, rtx x, int code) ++{ ++ char buf[256]; ++ ++ switch (GET_CODE (x)) + { +- if (!CONST_INT_P (scale_rtx)) +- return 0; +- scale = INTVAL (scale_rtx); +- } ++ case PC: ++ gcc_assert (flag_pic); ++ putc ('.', file); ++ break; + +- base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base; +- index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index; ++ case SYMBOL_REF: ++ if (TARGET_64BIT || ! TARGET_MACHO_SYMBOL_STUBS) ++ output_addr_const (file, x); ++ else ++ { ++ const char *name = XSTR (x, 0); + +- /* Avoid useless 0 displacement. */ +- if (disp == const0_rtx && (base || index)) +- disp = NULL_RTX; ++ /* Mark the decl as referenced so that cgraph will ++ output the function. */ ++ if (SYMBOL_REF_DECL (x)) ++ mark_decl_referenced (SYMBOL_REF_DECL (x)); + +- /* Allow arg pointer and stack pointer as index if there is not scaling. */ +- if (base_reg && index_reg && scale == 1 +- && (REGNO (index_reg) == ARG_POINTER_REGNUM +- || REGNO (index_reg) == FRAME_POINTER_REGNUM +- || REGNO (index_reg) == SP_REG)) +- { +- std::swap (base, index); +- std::swap (base_reg, index_reg); +- } +- +- /* Special case: %ebp cannot be encoded as a base without a displacement. +- Similarly %r13. */ +- if (!disp && base_reg +- && (REGNO (base_reg) == ARG_POINTER_REGNUM +- || REGNO (base_reg) == FRAME_POINTER_REGNUM +- || REGNO (base_reg) == BP_REG +- || REGNO (base_reg) == R13_REG)) +- disp = const0_rtx; +- +- /* Special case: on K6, [%esi] makes the instruction vector decoded. +- Avoid this by transforming to [%esi+0]. +- Reload calls address legitimization without cfun defined, so we need +- to test cfun for being non-NULL. */ +- if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun) +- && base_reg && !index_reg && !disp +- && REGNO (base_reg) == SI_REG) +- disp = const0_rtx; +- +- /* Special case: encode reg+reg instead of reg*2. */ +- if (!base && index && scale == 2) +- base = index, base_reg = index_reg, scale = 1; +- +- /* Special case: scaling cannot be encoded without base or displacement. */ +- if (!base && !disp && index && scale != 1) +- disp = const0_rtx; +- +- out->base = base; +- out->index = index; +- out->disp = disp; +- out->scale = scale; +- out->seg = seg; +- +- return retval; +-} +- +-/* Return cost of the memory address x. +- For i386, it is better to use a complex address than let gcc copy +- the address into a reg and make a new pseudo. But not if the address +- requires to two regs - that would mean more pseudos with longer +- lifetimes. */ +-static int +-ix86_address_cost (rtx x, machine_mode, addr_space_t, bool) +-{ +- struct ix86_address parts; +- int cost = 1; +- int ok = ix86_decompose_address (x, &parts); +- +- gcc_assert (ok); ++#if TARGET_MACHO ++ if (MACHOPIC_INDIRECT ++ && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION) ++ name = machopic_indirection_name (x, /*stub_p=*/true); ++#endif ++ assemble_name (file, name); ++ } ++ if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF) ++ && code == 'P' && ! SYMBOL_REF_LOCAL_P (x)) ++ fputs ("@PLT", file); ++ break; + +- if (parts.base && SUBREG_P (parts.base)) +- parts.base = SUBREG_REG (parts.base); +- if (parts.index && SUBREG_P (parts.index)) +- parts.index = SUBREG_REG (parts.index); ++ case LABEL_REF: ++ x = XEXP (x, 0); ++ /* FALLTHRU */ ++ case CODE_LABEL: ++ ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x)); ++ assemble_name (asm_out_file, buf); ++ break; + +- /* Attempt to minimize number of registers in the address by increasing +- address cost for each used register. We don't increase address cost +- for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx" +- is not invariant itself it most likely means that base or index is not +- invariant. Therefore only "pic_offset_table_rtx" could be hoisted out, +- which is not profitable for x86. */ +- if (parts.base +- && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER) +- && (current_pass->type == GIMPLE_PASS +- || !pic_offset_table_rtx +- || !REG_P (parts.base) +- || REGNO (pic_offset_table_rtx) != REGNO (parts.base))) +- cost++; ++ case CONST_INT: ++ fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); ++ break; + +- if (parts.index +- && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER) +- && (current_pass->type == GIMPLE_PASS +- || !pic_offset_table_rtx +- || !REG_P (parts.index) +- || REGNO (pic_offset_table_rtx) != REGNO (parts.index))) +- cost++; ++ case CONST: ++ /* This used to output parentheses around the expression, ++ but that does not work on the 386 (either ATT or BSD assembler). */ ++ output_pic_addr_const (file, XEXP (x, 0), code); ++ break; + +- /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b, +- since it's predecode logic can't detect the length of instructions +- and it degenerates to vector decoded. Increase cost of such +- addresses here. The penalty is minimally 2 cycles. It may be worthwhile +- to split such addresses or even refuse such addresses at all. ++ case CONST_DOUBLE: ++ /* We can't handle floating point constants; ++ TARGET_PRINT_OPERAND must handle them. */ ++ output_operand_lossage ("floating constant misused"); ++ break; + +- Following addressing modes are affected: +- [base+scale*index] +- [scale*index+disp] +- [base+index] ++ case PLUS: ++ /* Some assemblers need integer constants to appear first. */ ++ if (CONST_INT_P (XEXP (x, 0))) ++ { ++ output_pic_addr_const (file, XEXP (x, 0), code); ++ putc ('+', file); ++ output_pic_addr_const (file, XEXP (x, 1), code); ++ } ++ else ++ { ++ gcc_assert (CONST_INT_P (XEXP (x, 1))); ++ output_pic_addr_const (file, XEXP (x, 1), code); ++ putc ('+', file); ++ output_pic_addr_const (file, XEXP (x, 0), code); ++ } ++ break; + +- The first and last case may be avoidable by explicitly coding the zero in +- memory address, but I don't have AMD-K6 machine handy to check this +- theory. */ ++ case MINUS: ++ if (!TARGET_MACHO) ++ putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file); ++ output_pic_addr_const (file, XEXP (x, 0), code); ++ putc ('-', file); ++ output_pic_addr_const (file, XEXP (x, 1), code); ++ if (!TARGET_MACHO) ++ putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file); ++ break; + +- if (TARGET_K6 +- && ((!parts.disp && parts.base && parts.index && parts.scale != 1) +- || (parts.disp && !parts.base && parts.index && parts.scale != 1) +- || (!parts.disp && parts.base && parts.index && parts.scale == 1))) +- cost += 10; ++ case UNSPEC: ++ gcc_assert (XVECLEN (x, 0) == 1); ++ output_pic_addr_const (file, XVECEXP (x, 0, 0), code); ++ switch (XINT (x, 1)) ++ { ++ case UNSPEC_GOT: ++ fputs ("@GOT", file); ++ break; ++ case UNSPEC_GOTOFF: ++ fputs ("@GOTOFF", file); ++ break; ++ case UNSPEC_PLTOFF: ++ fputs ("@PLTOFF", file); ++ break; ++ case UNSPEC_PCREL: ++ fputs (ASSEMBLER_DIALECT == ASM_ATT ? ++ "(%rip)" : "[rip]", file); ++ break; ++ case UNSPEC_GOTPCREL: ++ fputs (ASSEMBLER_DIALECT == ASM_ATT ? ++ "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file); ++ break; ++ case UNSPEC_GOTTPOFF: ++ /* FIXME: This might be @TPOFF in Sun ld too. */ ++ fputs ("@gottpoff", file); ++ break; ++ case UNSPEC_TPOFF: ++ fputs ("@tpoff", file); ++ break; ++ case UNSPEC_NTPOFF: ++ if (TARGET_64BIT) ++ fputs ("@tpoff", file); ++ else ++ fputs ("@ntpoff", file); ++ break; ++ case UNSPEC_DTPOFF: ++ fputs ("@dtpoff", file); ++ break; ++ case UNSPEC_GOTNTPOFF: ++ if (TARGET_64BIT) ++ fputs (ASSEMBLER_DIALECT == ASM_ATT ? ++ "@gottpoff(%rip)": "@gottpoff[rip]", file); ++ else ++ fputs ("@gotntpoff", file); ++ break; ++ case UNSPEC_INDNTPOFF: ++ fputs ("@indntpoff", file); ++ break; ++#if TARGET_MACHO ++ case UNSPEC_MACHOPIC_OFFSET: ++ putc ('-', file); ++ machopic_output_function_base_name (file); ++ break; ++#endif ++ default: ++ output_operand_lossage ("invalid UNSPEC as operand"); ++ break; ++ } ++ break; + +- return cost; ++ default: ++ output_operand_lossage ("invalid expression as operand"); ++ } + } +- +-/* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as +- this is used for to form addresses to local data when -fPIC is in +- use. */ + +-static bool +-darwin_local_data_pic (rtx disp) ++/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL. ++ We need to emit DTP-relative relocations. */ ++ ++static void ATTRIBUTE_UNUSED ++i386_output_dwarf_dtprel (FILE *file, int size, rtx x) + { +- return (GET_CODE (disp) == UNSPEC +- && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET); ++ fputs (ASM_LONG, file); ++ output_addr_const (file, x); ++ fputs ("@dtpoff", file); ++ switch (size) ++ { ++ case 4: ++ break; ++ case 8: ++ fputs (", 0", file); ++ break; ++ default: ++ gcc_unreachable (); ++ } + } + +-/* True if operand X should be loaded from GOT. */ ++/* Return true if X is a representation of the PIC register. This copes ++ with calls from ix86_find_base_term, where the register might have ++ been replaced by a cselib value. */ + +-bool +-ix86_force_load_from_GOT_p (rtx x) ++static bool ++ix86_pic_register_p (rtx x) + { +- return ((TARGET_64BIT || HAVE_AS_IX86_GOT32X) +- && !TARGET_PECOFF && !TARGET_MACHO +- && !flag_pic +- && ix86_cmodel != CM_LARGE +- && GET_CODE (x) == SYMBOL_REF +- && SYMBOL_REF_FUNCTION_P (x) +- && (!flag_plt +- || (SYMBOL_REF_DECL (x) +- && lookup_attribute ("noplt", +- DECL_ATTRIBUTES (SYMBOL_REF_DECL (x))))) +- && !SYMBOL_REF_LOCAL_P (x)); ++ if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x)) ++ return (pic_offset_table_rtx ++ && rtx_equal_for_cselib_p (x, pic_offset_table_rtx)); ++ else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SET_GOT) ++ return true; ++ else if (!REG_P (x)) ++ return false; ++ else if (pic_offset_table_rtx) ++ { ++ if (REGNO (x) == REGNO (pic_offset_table_rtx)) ++ return true; ++ if (HARD_REGISTER_P (x) ++ && !HARD_REGISTER_P (pic_offset_table_rtx) ++ && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx)) ++ return true; ++ return false; ++ } ++ else ++ return REGNO (x) == PIC_OFFSET_TABLE_REGNUM; + } + +-/* Determine if a given RTX is a valid constant. We already know this +- satisfies CONSTANT_P. */ ++/* Helper function for ix86_delegitimize_address. ++ Attempt to delegitimize TLS local-exec accesses. */ + +-static bool +-ix86_legitimate_constant_p (machine_mode mode, rtx x) ++static rtx ++ix86_delegitimize_tls_address (rtx orig_x) + { +- switch (GET_CODE (x)) +- { +- case CONST: +- x = XEXP (x, 0); +- +- if (GET_CODE (x) == PLUS) +- { +- if (!CONST_INT_P (XEXP (x, 1))) +- return false; +- x = XEXP (x, 0); +- } ++ rtx x = orig_x, unspec; ++ struct ix86_address addr; + +- if (TARGET_MACHO && darwin_local_data_pic (x)) +- return true; ++ if (!TARGET_TLS_DIRECT_SEG_REFS) ++ return orig_x; ++ if (MEM_P (x)) ++ x = XEXP (x, 0); ++ if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode) ++ return orig_x; ++ if (ix86_decompose_address (x, &addr) == 0 ++ || addr.seg != DEFAULT_TLS_SEG_REG ++ || addr.disp == NULL_RTX ++ || GET_CODE (addr.disp) != CONST) ++ return orig_x; ++ unspec = XEXP (addr.disp, 0); ++ if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1))) ++ unspec = XEXP (unspec, 0); ++ if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF) ++ return orig_x; ++ x = XVECEXP (unspec, 0, 0); ++ gcc_assert (GET_CODE (x) == SYMBOL_REF); ++ if (unspec != XEXP (addr.disp, 0)) ++ x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1)); ++ if (addr.index) ++ { ++ rtx idx = addr.index; ++ if (addr.scale != 1) ++ idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale)); ++ x = gen_rtx_PLUS (Pmode, idx, x); ++ } ++ if (addr.base) ++ x = gen_rtx_PLUS (Pmode, addr.base, x); ++ if (MEM_P (orig_x)) ++ x = replace_equiv_address_nv (orig_x, x); ++ return x; ++} + +- /* Only some unspecs are valid as "constants". */ +- if (GET_CODE (x) == UNSPEC) +- switch (XINT (x, 1)) +- { +- case UNSPEC_GOT: +- case UNSPEC_GOTOFF: +- case UNSPEC_PLTOFF: +- return TARGET_64BIT; +- case UNSPEC_TPOFF: +- case UNSPEC_NTPOFF: +- x = XVECEXP (x, 0, 0); +- return (GET_CODE (x) == SYMBOL_REF +- && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC); +- case UNSPEC_DTPOFF: +- x = XVECEXP (x, 0, 0); +- return (GET_CODE (x) == SYMBOL_REF +- && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC); +- default: +- return false; +- } ++/* In the name of slightly smaller debug output, and to cater to ++ general assembler lossage, recognize PIC+GOTOFF and turn it back ++ into a direct symbol reference. + +- /* We must have drilled down to a symbol. */ +- if (GET_CODE (x) == LABEL_REF) +- return true; +- if (GET_CODE (x) != SYMBOL_REF) +- return false; +- /* FALLTHRU */ ++ On Darwin, this is necessary to avoid a crash, because Darwin ++ has a different PIC label for each routine but the DWARF debugging ++ information is not associated with any particular routine, so it's ++ necessary to remove references to the PIC label from RTL stored by ++ the DWARF output code. + +- case SYMBOL_REF: +- /* TLS symbols are never valid. */ +- if (SYMBOL_REF_TLS_MODEL (x)) +- return false; ++ This helper is used in the normal ix86_delegitimize_address ++ entrypoint (e.g. used in the target delegitimization hook) and ++ in ix86_find_base_term. As compile time memory optimization, we ++ avoid allocating rtxes that will not change anything on the outcome ++ of the callers (find_base_value and find_base_term). */ + +- /* DLLIMPORT symbols are never valid. */ +- if (TARGET_DLLIMPORT_DECL_ATTRIBUTES +- && SYMBOL_REF_DLLIMPORT_P (x)) +- return false; ++static inline rtx ++ix86_delegitimize_address_1 (rtx x, bool base_term_p) ++{ ++ rtx orig_x = delegitimize_mem_from_attrs (x); ++ /* addend is NULL or some rtx if x is something+GOTOFF where ++ something doesn't include the PIC register. */ ++ rtx addend = NULL_RTX; ++ /* reg_addend is NULL or a multiple of some register. */ ++ rtx reg_addend = NULL_RTX; ++ /* const_addend is NULL or a const_int. */ ++ rtx const_addend = NULL_RTX; ++ /* This is the result, or NULL. */ ++ rtx result = NULL_RTX; + +-#if TARGET_MACHO +- /* mdynamic-no-pic */ +- if (MACHO_DYNAMIC_NO_PIC_P) +- return machopic_symbol_defined_p (x); +-#endif ++ x = orig_x; + +- /* External function address should be loaded +- via the GOT slot to avoid PLT. */ +- if (ix86_force_load_from_GOT_p (x)) +- return false; ++ if (MEM_P (x)) ++ x = XEXP (x, 0); + +- break; ++ if (TARGET_64BIT) ++ { ++ if (GET_CODE (x) == CONST ++ && GET_CODE (XEXP (x, 0)) == PLUS ++ && GET_MODE (XEXP (x, 0)) == Pmode ++ && CONST_INT_P (XEXP (XEXP (x, 0), 1)) ++ && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC ++ && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL) ++ { ++ /* find_base_{value,term} only care about MEMs with arg_pointer_rtx ++ base. A CONST can't be arg_pointer_rtx based. */ ++ if (base_term_p && MEM_P (orig_x)) ++ return orig_x; ++ rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0); ++ x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2); ++ if (MEM_P (orig_x)) ++ x = replace_equiv_address_nv (orig_x, x); ++ return x; ++ } + +- CASE_CONST_SCALAR_INT: +- switch (mode) ++ if (GET_CODE (x) == CONST ++ && GET_CODE (XEXP (x, 0)) == UNSPEC ++ && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL ++ || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL) ++ && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)) + { +- case E_TImode: +- if (TARGET_64BIT) +- return true; +- /* FALLTHRU */ +- case E_OImode: +- case E_XImode: +- if (!standard_sse_constant_p (x, mode)) +- return false; +- default: +- break; ++ x = XVECEXP (XEXP (x, 0), 0, 0); ++ if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x)) ++ { ++ x = lowpart_subreg (GET_MODE (orig_x), x, GET_MODE (x)); ++ if (x == NULL_RTX) ++ return orig_x; ++ } ++ return x; + } +- break; + +- case CONST_VECTOR: +- if (!standard_sse_constant_p (x, mode)) +- return false; ++ if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC) ++ return ix86_delegitimize_tls_address (orig_x); + +- default: +- break; ++ /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic ++ and -mcmodel=medium -fpic. */ + } + +- /* Otherwise we handle everything else in the move patterns. */ +- return true; +-} +- +-/* Determine if it's legal to put X into the constant pool. This +- is not possible for the address of thread-local symbols, which +- is checked above. */ ++ if (GET_CODE (x) != PLUS ++ || GET_CODE (XEXP (x, 1)) != CONST) ++ return ix86_delegitimize_tls_address (orig_x); + +-static bool +-ix86_cannot_force_const_mem (machine_mode mode, rtx x) +-{ +- /* We can put any immediate constant in memory. */ +- switch (GET_CODE (x)) ++ if (ix86_pic_register_p (XEXP (x, 0))) ++ /* %ebx + GOT/GOTOFF */ ++ ; ++ else if (GET_CODE (XEXP (x, 0)) == PLUS) + { +- CASE_CONST_ANY: +- return false; ++ /* %ebx + %reg * scale + GOT/GOTOFF */ ++ reg_addend = XEXP (x, 0); ++ if (ix86_pic_register_p (XEXP (reg_addend, 0))) ++ reg_addend = XEXP (reg_addend, 1); ++ else if (ix86_pic_register_p (XEXP (reg_addend, 1))) ++ reg_addend = XEXP (reg_addend, 0); ++ else ++ { ++ reg_addend = NULL_RTX; ++ addend = XEXP (x, 0); ++ } ++ } ++ else ++ addend = XEXP (x, 0); + +- default: +- break; ++ x = XEXP (XEXP (x, 1), 0); ++ if (GET_CODE (x) == PLUS ++ && CONST_INT_P (XEXP (x, 1))) ++ { ++ const_addend = XEXP (x, 1); ++ x = XEXP (x, 0); + } + +- return !ix86_legitimate_constant_p (mode, x); +-} ++ if (GET_CODE (x) == UNSPEC ++ && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend) ++ || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x)) ++ || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC ++ && !MEM_P (orig_x) && !addend))) ++ result = XVECEXP (x, 0, 0); + +-/* Nonzero if the symbol is marked as dllimport, or as stub-variable, +- otherwise zero. */ ++ if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x) ++ && !MEM_P (orig_x)) ++ result = XVECEXP (x, 0, 0); + +-static bool +-is_imported_p (rtx x) +-{ +- if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES +- || GET_CODE (x) != SYMBOL_REF) +- return false; ++ if (! result) ++ return ix86_delegitimize_tls_address (orig_x); + +- return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x); ++ /* For (PLUS something CONST_INT) both find_base_{value,term} just ++ recurse on the first operand. */ ++ if (const_addend && !base_term_p) ++ result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend)); ++ if (reg_addend) ++ result = gen_rtx_PLUS (Pmode, reg_addend, result); ++ if (addend) ++ { ++ /* If the rest of original X doesn't involve the PIC register, add ++ addend and subtract pic_offset_table_rtx. This can happen e.g. ++ for code like: ++ leal (%ebx, %ecx, 4), %ecx ++ ... ++ movl foo@GOTOFF(%ecx), %edx ++ in which case we return (%ecx - %ebx) + foo ++ or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg ++ and reload has completed. Don't do the latter for debug, ++ as _GLOBAL_OFFSET_TABLE_ can't be expressed in the assembly. */ ++ if (pic_offset_table_rtx ++ && (!reload_completed || !ix86_use_pseudo_pic_reg ())) ++ result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend), ++ pic_offset_table_rtx), ++ result); ++ else if (base_term_p ++ && pic_offset_table_rtx ++ && !TARGET_MACHO ++ && !TARGET_VXWORKS_RTP) ++ { ++ rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME); ++ tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp); ++ result = gen_rtx_PLUS (Pmode, tmp, result); ++ } ++ else ++ return orig_x; ++ } ++ if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x)) ++ { ++ result = lowpart_subreg (GET_MODE (orig_x), result, Pmode); ++ if (result == NULL_RTX) ++ return orig_x; ++ } ++ return result; + } + ++/* The normal instantiation of the above template. */ + +-/* Nonzero if the constant value X is a legitimate general operand +- when generating PIC code. It is given that flag_pic is on and +- that X satisfies CONSTANT_P. */ +- +-bool +-legitimate_pic_operand_p (rtx x) ++static rtx ++ix86_delegitimize_address (rtx x) + { +- rtx inner; +- +- switch (GET_CODE (x)) +- { +- case CONST: +- inner = XEXP (x, 0); +- if (GET_CODE (inner) == PLUS +- && CONST_INT_P (XEXP (inner, 1))) +- inner = XEXP (inner, 0); +- +- /* Only some unspecs are valid as "constants". */ +- if (GET_CODE (inner) == UNSPEC) +- switch (XINT (inner, 1)) +- { +- case UNSPEC_GOT: +- case UNSPEC_GOTOFF: +- case UNSPEC_PLTOFF: +- return TARGET_64BIT; +- case UNSPEC_TPOFF: +- x = XVECEXP (inner, 0, 0); +- return (GET_CODE (x) == SYMBOL_REF +- && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC); +- case UNSPEC_MACHOPIC_OFFSET: +- return legitimate_pic_address_disp_p (x); +- default: +- return false; +- } +- /* FALLTHRU */ +- +- case SYMBOL_REF: +- case LABEL_REF: +- return legitimate_pic_address_disp_p (x); +- +- default: +- return true; +- } ++ return ix86_delegitimize_address_1 (x, false); + } + +-/* Determine if a given CONST RTX is a valid memory displacement +- in PIC mode. */ ++/* If X is a machine specific address (i.e. a symbol or label being ++ referenced as a displacement from the GOT implemented using an ++ UNSPEC), then return the base term. Otherwise return X. */ + +-bool +-legitimate_pic_address_disp_p (rtx disp) ++rtx ++ix86_find_base_term (rtx x) + { +- bool saw_plus; ++ rtx term; + +- /* In 64bit mode we can allow direct addresses of symbols and labels +- when they are not dynamic symbols. */ + if (TARGET_64BIT) + { +- rtx op0 = disp, op1; +- +- switch (GET_CODE (disp)) +- { +- case LABEL_REF: +- return true; ++ if (GET_CODE (x) != CONST) ++ return x; ++ term = XEXP (x, 0); ++ if (GET_CODE (term) == PLUS ++ && CONST_INT_P (XEXP (term, 1))) ++ term = XEXP (term, 0); ++ if (GET_CODE (term) != UNSPEC ++ || (XINT (term, 1) != UNSPEC_GOTPCREL ++ && XINT (term, 1) != UNSPEC_PCREL)) ++ return x; + +- case CONST: +- if (GET_CODE (XEXP (disp, 0)) != PLUS) +- break; +- op0 = XEXP (XEXP (disp, 0), 0); +- op1 = XEXP (XEXP (disp, 0), 1); +- if (!CONST_INT_P (op1)) +- break; +- if (GET_CODE (op0) == UNSPEC +- && (XINT (op0, 1) == UNSPEC_DTPOFF +- || XINT (op0, 1) == UNSPEC_NTPOFF) +- && trunc_int_for_mode (INTVAL (op1), SImode) == INTVAL (op1)) +- return true; +- if (INTVAL (op1) >= 16*1024*1024 +- || INTVAL (op1) < -16*1024*1024) +- break; +- if (GET_CODE (op0) == LABEL_REF) +- return true; +- if (GET_CODE (op0) == CONST +- && GET_CODE (XEXP (op0, 0)) == UNSPEC +- && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL) +- return true; +- if (GET_CODE (op0) == UNSPEC +- && XINT (op0, 1) == UNSPEC_PCREL) +- return true; +- if (GET_CODE (op0) != SYMBOL_REF) +- break; +- /* FALLTHRU */ ++ return XVECEXP (term, 0, 0); ++ } + +- case SYMBOL_REF: +- /* TLS references should always be enclosed in UNSPEC. +- The dllimported symbol needs always to be resolved. */ +- if (SYMBOL_REF_TLS_MODEL (op0) +- || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0))) +- return false; ++ return ix86_delegitimize_address_1 (x, true); ++} + +- if (TARGET_PECOFF) +- { +- if (is_imported_p (op0)) +- return true; ++/* Return true if X shouldn't be emitted into the debug info. ++ Disallow UNSPECs other than @gotoff - we can't emit _GLOBAL_OFFSET_TABLE_ ++ symbol easily into the .debug_info section, so we need not to ++ delegitimize, but instead assemble as @gotoff. ++ Disallow _GLOBAL_OFFSET_TABLE_ SYMBOL_REF - the assembler magically ++ assembles that as _GLOBAL_OFFSET_TABLE_-. expression. */ + +- if (SYMBOL_REF_FAR_ADDR_P (op0) +- || !SYMBOL_REF_LOCAL_P (op0)) +- break; ++static bool ++ix86_const_not_ok_for_debug_p (rtx x) ++{ ++ if (GET_CODE (x) == UNSPEC && XINT (x, 1) != UNSPEC_GOTOFF) ++ return true; + +- /* Function-symbols need to be resolved only for +- large-model. +- For the small-model we don't need to resolve anything +- here. */ +- if ((ix86_cmodel != CM_LARGE_PIC +- && SYMBOL_REF_FUNCTION_P (op0)) +- || ix86_cmodel == CM_SMALL_PIC) +- return true; +- /* Non-external symbols don't need to be resolved for +- large, and medium-model. */ +- if ((ix86_cmodel == CM_LARGE_PIC +- || ix86_cmodel == CM_MEDIUM_PIC) +- && !SYMBOL_REF_EXTERNAL_P (op0)) +- return true; +- } +- else if (!SYMBOL_REF_FAR_ADDR_P (op0) +- && (SYMBOL_REF_LOCAL_P (op0) +- || (HAVE_LD_PIE_COPYRELOC +- && flag_pie +- && !SYMBOL_REF_WEAK (op0) +- && !SYMBOL_REF_FUNCTION_P (op0))) +- && ix86_cmodel != CM_LARGE_PIC) +- return true; +- break; ++ if (SYMBOL_REF_P (x) && strcmp (XSTR (x, 0), GOT_SYMBOL_NAME) == 0) ++ return true; + +- default: +- break; +- } +- } +- if (GET_CODE (disp) != CONST) +- return false; +- disp = XEXP (disp, 0); ++ return false; ++} ++ ++static void ++put_condition_code (enum rtx_code code, machine_mode mode, bool reverse, ++ bool fp, FILE *file) ++{ ++ const char *suffix; + +- if (TARGET_64BIT) ++ if (mode == CCFPmode) + { +- /* We are unsafe to allow PLUS expressions. This limit allowed distance +- of GOT tables. We should not need these anyway. */ +- if (GET_CODE (disp) != UNSPEC +- || (XINT (disp, 1) != UNSPEC_GOTPCREL +- && XINT (disp, 1) != UNSPEC_GOTOFF +- && XINT (disp, 1) != UNSPEC_PCREL +- && XINT (disp, 1) != UNSPEC_PLTOFF)) +- return false; +- +- if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF +- && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF) +- return false; +- return true; ++ code = ix86_fp_compare_code_to_integer (code); ++ mode = CCmode; + } ++ if (reverse) ++ code = reverse_condition (code); + +- saw_plus = false; +- if (GET_CODE (disp) == PLUS) ++ switch (code) + { +- if (!CONST_INT_P (XEXP (disp, 1))) +- return false; +- disp = XEXP (disp, 0); +- saw_plus = true; +- } +- +- if (TARGET_MACHO && darwin_local_data_pic (disp)) +- return true; ++ case EQ: ++ gcc_assert (mode != CCGZmode); ++ switch (mode) ++ { ++ case E_CCAmode: ++ suffix = "a"; ++ break; ++ case E_CCCmode: ++ suffix = "c"; ++ break; ++ case E_CCOmode: ++ suffix = "o"; ++ break; ++ case E_CCPmode: ++ suffix = "p"; ++ break; ++ case E_CCSmode: ++ suffix = "s"; ++ break; ++ default: ++ suffix = "e"; ++ break; ++ } ++ break; ++ case NE: ++ gcc_assert (mode != CCGZmode); ++ switch (mode) ++ { ++ case E_CCAmode: ++ suffix = "na"; ++ break; ++ case E_CCCmode: ++ suffix = "nc"; ++ break; ++ case E_CCOmode: ++ suffix = "no"; ++ break; ++ case E_CCPmode: ++ suffix = "np"; ++ break; ++ case E_CCSmode: ++ suffix = "ns"; ++ break; ++ default: ++ suffix = "ne"; ++ break; ++ } ++ break; ++ case GT: ++ gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode); ++ suffix = "g"; ++ break; ++ case GTU: ++ /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers. ++ Those same assemblers have the same but opposite lossage on cmov. */ ++ if (mode == CCmode) ++ suffix = fp ? "nbe" : "a"; ++ else ++ gcc_unreachable (); ++ break; ++ case LT: ++ switch (mode) ++ { ++ case E_CCNOmode: ++ case E_CCGOCmode: ++ suffix = "s"; ++ break; + +- if (GET_CODE (disp) != UNSPEC) +- return false; ++ case E_CCmode: ++ case E_CCGCmode: ++ case E_CCGZmode: ++ suffix = "l"; ++ break; + +- switch (XINT (disp, 1)) +- { +- case UNSPEC_GOT: +- if (saw_plus) +- return false; +- /* We need to check for both symbols and labels because VxWorks loads +- text labels with @GOT rather than @GOTOFF. See gotoff_operand for +- details. */ +- return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF +- || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF); +- case UNSPEC_GOTOFF: +- /* Refuse GOTOFF in 64bit mode since it is always 64bit when used. +- While ABI specify also 32bit relocation but we don't produce it in +- small PIC model at all. */ +- if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF +- || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF) +- && !TARGET_64BIT) +- return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode); +- return false; +- case UNSPEC_GOTTPOFF: +- case UNSPEC_GOTNTPOFF: +- case UNSPEC_INDNTPOFF: +- if (saw_plus) +- return false; +- disp = XVECEXP (disp, 0, 0); +- return (GET_CODE (disp) == SYMBOL_REF +- && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC); +- case UNSPEC_NTPOFF: +- disp = XVECEXP (disp, 0, 0); +- return (GET_CODE (disp) == SYMBOL_REF +- && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC); +- case UNSPEC_DTPOFF: +- disp = XVECEXP (disp, 0, 0); +- return (GET_CODE (disp) == SYMBOL_REF +- && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC); +- } ++ default: ++ gcc_unreachable (); ++ } ++ break; ++ case LTU: ++ if (mode == CCmode || mode == CCGZmode) ++ suffix = "b"; ++ else if (mode == CCCmode) ++ suffix = fp ? "b" : "c"; ++ else ++ gcc_unreachable (); ++ break; ++ case GE: ++ switch (mode) ++ { ++ case E_CCNOmode: ++ case E_CCGOCmode: ++ suffix = "ns"; ++ break; + +- return false; ++ case E_CCmode: ++ case E_CCGCmode: ++ case E_CCGZmode: ++ suffix = "ge"; ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ break; ++ case GEU: ++ if (mode == CCmode || mode == CCGZmode) ++ suffix = "nb"; ++ else if (mode == CCCmode) ++ suffix = fp ? "nb" : "nc"; ++ else ++ gcc_unreachable (); ++ break; ++ case LE: ++ gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode); ++ suffix = "le"; ++ break; ++ case LEU: ++ if (mode == CCmode) ++ suffix = "be"; ++ else ++ gcc_unreachable (); ++ break; ++ case UNORDERED: ++ suffix = fp ? "u" : "p"; ++ break; ++ case ORDERED: ++ suffix = fp ? "nu" : "np"; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ fputs (suffix, file); + } + +-/* Determine if op is suitable RTX for an address register. +- Return naked register if a register or a register subreg is +- found, otherwise return NULL_RTX. */ ++/* Print the name of register X to FILE based on its machine mode and number. ++ If CODE is 'w', pretend the mode is HImode. ++ If CODE is 'b', pretend the mode is QImode. ++ If CODE is 'k', pretend the mode is SImode. ++ If CODE is 'q', pretend the mode is DImode. ++ If CODE is 'x', pretend the mode is V4SFmode. ++ If CODE is 't', pretend the mode is V8SFmode. ++ If CODE is 'g', pretend the mode is V16SFmode. ++ If CODE is 'h', pretend the reg is the 'high' byte register. ++ If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. ++ If CODE is 'd', duplicate the operand for AVX instruction. ++ If CODE is 'V', print naked full integer register name without %. ++ */ + +-static rtx +-ix86_validate_address_register (rtx op) ++void ++print_reg (rtx x, int code, FILE *file) + { +- machine_mode mode = GET_MODE (op); ++ const char *reg; ++ int msize; ++ unsigned int regno; ++ bool duplicated; + +- /* Only SImode or DImode registers can form the address. */ +- if (mode != SImode && mode != DImode) +- return NULL_RTX; ++ if (ASSEMBLER_DIALECT == ASM_ATT && code != 'V') ++ putc ('%', file); + +- if (REG_P (op)) +- return op; +- else if (SUBREG_P (op)) ++ if (x == pc_rtx) + { +- rtx reg = SUBREG_REG (op); ++ gcc_assert (TARGET_64BIT); ++ fputs ("rip", file); ++ return; ++ } + +- if (!REG_P (reg)) +- return NULL_RTX; ++ if (code == 'y' && STACK_TOP_P (x)) ++ { ++ fputs ("st(0)", file); ++ return; ++ } + +- mode = GET_MODE (reg); ++ if (code == 'w') ++ msize = 2; ++ else if (code == 'b') ++ msize = 1; ++ else if (code == 'k') ++ msize = 4; ++ else if (code == 'q') ++ msize = 8; ++ else if (code == 'h') ++ msize = 0; ++ else if (code == 'x') ++ msize = 16; ++ else if (code == 't') ++ msize = 32; ++ else if (code == 'g') ++ msize = 64; ++ else ++ msize = GET_MODE_SIZE (GET_MODE (x)); + +- /* Don't allow SUBREGs that span more than a word. It can +- lead to spill failures when the register is one word out +- of a two word structure. */ +- if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) +- return NULL_RTX; ++ regno = REGNO (x); + +- /* Allow only SUBREGs of non-eliminable hard registers. */ +- if (register_no_elim_operand (reg, mode)) +- return reg; ++ if (regno == ARG_POINTER_REGNUM ++ || regno == FRAME_POINTER_REGNUM ++ || regno == FPSR_REG) ++ { ++ output_operand_lossage ++ ("invalid use of register '%s'", reg_names[regno]); ++ return; ++ } ++ else if (regno == FLAGS_REG) ++ { ++ output_operand_lossage ("invalid use of asm flag output"); ++ return; + } + +- /* Op is not a register. */ +- return NULL_RTX; +-} +- +-/* Recognizes RTL expressions that are valid memory addresses for an +- instruction. The MODE argument is the machine mode for the MEM +- expression that wants to use this address. +- +- It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should +- convert common non-canonical forms to canonical form so that they will +- be recognized. */ +- +-static bool +-ix86_legitimate_address_p (machine_mode, rtx addr, bool strict) +-{ +- struct ix86_address parts; +- rtx base, index, disp; +- HOST_WIDE_INT scale; +- addr_space_t seg; +- +- if (ix86_decompose_address (addr, &parts) <= 0) +- /* Decomposition failed. */ +- return false; ++ if (code == 'V') ++ { ++ if (GENERAL_REGNO_P (regno)) ++ msize = GET_MODE_SIZE (word_mode); ++ else ++ error ("% modifier on non-integer register"); ++ } + +- base = parts.base; +- index = parts.index; +- disp = parts.disp; +- scale = parts.scale; +- seg = parts.seg; ++ duplicated = code == 'd' && TARGET_AVX; + +- /* Validate base register. */ +- if (base) ++ switch (msize) + { +- rtx reg = ix86_validate_address_register (base); ++ case 16: ++ case 12: ++ case 8: ++ if (GENERAL_REGNO_P (regno) && msize > GET_MODE_SIZE (word_mode)) ++ warning (0, "unsupported size for integer register"); ++ /* FALLTHRU */ ++ case 4: ++ if (LEGACY_INT_REGNO_P (regno)) ++ putc (msize > 4 && TARGET_64BIT ? 'r' : 'e', file); ++ /* FALLTHRU */ ++ case 2: ++ normal: ++ reg = hi_reg_name[regno]; ++ break; ++ case 1: ++ if (regno >= ARRAY_SIZE (qi_reg_name)) ++ goto normal; ++ if (!ANY_QI_REGNO_P (regno)) ++ error ("unsupported size for integer register"); ++ reg = qi_reg_name[regno]; ++ break; ++ case 0: ++ if (regno >= ARRAY_SIZE (qi_high_reg_name)) ++ goto normal; ++ reg = qi_high_reg_name[regno]; ++ break; ++ case 32: ++ case 64: ++ if (SSE_REGNO_P (regno)) ++ { ++ gcc_assert (!duplicated); ++ putc (msize == 32 ? 'y' : 'z', file); ++ reg = hi_reg_name[regno] + 1; ++ break; ++ } ++ goto normal; ++ default: ++ gcc_unreachable (); ++ } + +- if (reg == NULL_RTX) +- return false; ++ fputs (reg, file); + +- if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg)) +- || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg))) +- /* Base is not valid. */ +- return false; ++ /* Irritatingly, AMD extended registers use ++ different naming convention: "r%d[bwd]" */ ++ if (REX_INT_REGNO_P (regno)) ++ { ++ gcc_assert (TARGET_64BIT); ++ switch (msize) ++ { ++ case 0: ++ error ("extended registers have no high halves"); ++ break; ++ case 1: ++ putc ('b', file); ++ break; ++ case 2: ++ putc ('w', file); ++ break; ++ case 4: ++ putc ('d', file); ++ break; ++ case 8: ++ /* no suffix */ ++ break; ++ default: ++ error ("unsupported operand size for extended register"); ++ break; ++ } ++ return; + } + +- /* Validate index register. */ +- if (index) ++ if (duplicated) + { +- rtx reg = ix86_validate_address_register (index); +- +- if (reg == NULL_RTX) +- return false; +- +- if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg)) +- || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg))) +- /* Index is not valid. */ +- return false; ++ if (ASSEMBLER_DIALECT == ASM_ATT) ++ fprintf (file, ", %%%s", reg); ++ else ++ fprintf (file, ", %s", reg); + } ++} + +- /* Index and base should have the same mode. */ +- if (base && index +- && GET_MODE (base) != GET_MODE (index)) +- return false; +- +- /* Address override works only on the (%reg) part of %fs:(%reg). */ +- if (seg != ADDR_SPACE_GENERIC +- && ((base && GET_MODE (base) != word_mode) +- || (index && GET_MODE (index) != word_mode))) +- return false; ++/* Meaning of CODE: ++ L,W,B,Q,S,T -- print the opcode suffix for specified size of operand. ++ C -- print opcode suffix for set/cmov insn. ++ c -- like C, but print reversed condition ++ F,f -- likewise, but for floating-point. ++ O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.", ++ otherwise nothing ++ R -- print embedded rounding and sae. ++ r -- print only sae. ++ z -- print the opcode suffix for the size of the current operand. ++ Z -- likewise, with special suffixes for x87 instructions. ++ * -- print a star (in certain assembler syntax) ++ A -- print an absolute memory reference. ++ E -- print address with DImode register names if TARGET_64BIT. ++ w -- print the operand as if it's a "word" (HImode) even if it isn't. ++ s -- print a shift double count, followed by the assemblers argument ++ delimiter. ++ b -- print the QImode name of the register for the indicated operand. ++ %b0 would print %al if operands[0] is reg 0. ++ w -- likewise, print the HImode name of the register. ++ k -- likewise, print the SImode name of the register. ++ q -- likewise, print the DImode name of the register. ++ x -- likewise, print the V4SFmode name of the register. ++ t -- likewise, print the V8SFmode name of the register. ++ g -- likewise, print the V16SFmode name of the register. ++ h -- print the QImode name for a "high" register, either ah, bh, ch or dh. ++ y -- print "st(0)" instead of "st" as a register. ++ d -- print duplicated register operand for AVX instruction. ++ D -- print condition for SSE cmp instruction. ++ P -- if PIC, print an @PLT suffix. ++ p -- print raw symbol name. ++ X -- don't print any sort of PIC '@' suffix for a symbol. ++ & -- print some in-use local-dynamic symbol name. ++ H -- print a memory address offset by 8; used for sse high-parts ++ Y -- print condition for XOP pcom* instruction. ++ V -- print naked full integer register name without %. ++ + -- print a branch hint as 'cs' or 'ds' prefix ++ ; -- print a semicolon (after prefixes due to bug in older gas). ++ ~ -- print "i" if TARGET_AVX2, "f" otherwise. ++ ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode ++ M -- print addr32 prefix for TARGET_X32 with VSIB address. ++ ! -- print NOTRACK prefix for jxx/call/ret instructions if required. ++ */ + +- /* Validate scale factor. */ +- if (scale != 1) ++void ++ix86_print_operand (FILE *file, rtx x, int code) ++{ ++ if (code) + { +- if (!index) +- /* Scale without index. */ +- return false; ++ switch (code) ++ { ++ case 'A': ++ switch (ASSEMBLER_DIALECT) ++ { ++ case ASM_ATT: ++ putc ('*', file); ++ break; + +- if (scale != 2 && scale != 4 && scale != 8) +- /* Scale is not a valid multiplier. */ +- return false; +- } ++ case ASM_INTEL: ++ /* Intel syntax. For absolute addresses, registers should not ++ be surrounded by braces. */ ++ if (!REG_P (x)) ++ { ++ putc ('[', file); ++ ix86_print_operand (file, x, 0); ++ putc (']', file); ++ return; ++ } ++ break; + +- /* Validate displacement. */ +- if (disp) +- { +- if (GET_CODE (disp) == CONST +- && GET_CODE (XEXP (disp, 0)) == UNSPEC +- && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET) +- switch (XINT (XEXP (disp, 0), 1)) +- { +- /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit +- when used. While ABI specify also 32bit relocations, we +- don't produce them at all and use IP relative instead. +- Allow GOT in 32bit mode for both PIC and non-PIC if symbol +- should be loaded via GOT. */ +- case UNSPEC_GOT: +- if (!TARGET_64BIT +- && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0))) +- goto is_legitimate_pic; +- /* FALLTHRU */ +- case UNSPEC_GOTOFF: +- gcc_assert (flag_pic); +- if (!TARGET_64BIT) +- goto is_legitimate_pic; ++ default: ++ gcc_unreachable (); ++ } + +- /* 64bit address unspec. */ +- return false; ++ ix86_print_operand (file, x, 0); ++ return; + +- case UNSPEC_GOTPCREL: +- if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0))) +- goto is_legitimate_pic; +- /* FALLTHRU */ +- case UNSPEC_PCREL: +- gcc_assert (flag_pic); +- goto is_legitimate_pic; ++ case 'E': ++ /* Wrap address in an UNSPEC to declare special handling. */ ++ if (TARGET_64BIT) ++ x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR); + +- case UNSPEC_GOTTPOFF: +- case UNSPEC_GOTNTPOFF: +- case UNSPEC_INDNTPOFF: +- case UNSPEC_NTPOFF: +- case UNSPEC_DTPOFF: +- break; +- +- default: +- /* Invalid address unspec. */ +- return false; +- } ++ output_address (VOIDmode, x); ++ return; + +- else if (SYMBOLIC_CONST (disp) +- && (flag_pic +- || (TARGET_MACHO +-#if TARGET_MACHO +- && MACHOPIC_INDIRECT +- && !machopic_operand_p (disp) +-#endif +- ))) +- { ++ case 'L': ++ if (ASSEMBLER_DIALECT == ASM_ATT) ++ putc ('l', file); ++ return; + +- is_legitimate_pic: +- if (TARGET_64BIT && (index || base)) +- { +- /* foo@dtpoff(%rX) is ok. */ +- if (GET_CODE (disp) != CONST +- || GET_CODE (XEXP (disp, 0)) != PLUS +- || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC +- || !CONST_INT_P (XEXP (XEXP (disp, 0), 1)) +- || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF +- && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF)) +- /* Non-constant pic memory reference. */ +- return false; +- } +- else if ((!TARGET_MACHO || flag_pic) +- && ! legitimate_pic_address_disp_p (disp)) +- /* Displacement is an invalid pic construct. */ +- return false; +-#if TARGET_MACHO +- else if (MACHO_DYNAMIC_NO_PIC_P +- && !ix86_legitimate_constant_p (Pmode, disp)) +- /* displacment must be referenced via non_lazy_pointer */ +- return false; +-#endif ++ case 'W': ++ if (ASSEMBLER_DIALECT == ASM_ATT) ++ putc ('w', file); ++ return; + +- /* This code used to verify that a symbolic pic displacement +- includes the pic_offset_table_rtx register. ++ case 'B': ++ if (ASSEMBLER_DIALECT == ASM_ATT) ++ putc ('b', file); ++ return; + +- While this is good idea, unfortunately these constructs may +- be created by "adds using lea" optimization for incorrect +- code like: ++ case 'Q': ++ if (ASSEMBLER_DIALECT == ASM_ATT) ++ putc ('l', file); ++ return; + +- int a; +- int foo(int i) +- { +- return *(&a+i); +- } ++ case 'S': ++ if (ASSEMBLER_DIALECT == ASM_ATT) ++ putc ('s', file); ++ return; + +- This code is nonsensical, but results in addressing +- GOT table with pic_offset_table_rtx base. We can't +- just refuse it easily, since it gets matched by +- "addsi3" pattern, that later gets split to lea in the +- case output register differs from input. While this +- can be handled by separate addsi pattern for this case +- that never results in lea, this seems to be easier and +- correct fix for crash to disable this test. */ +- } +- else if (GET_CODE (disp) != LABEL_REF +- && !CONST_INT_P (disp) +- && (GET_CODE (disp) != CONST +- || !ix86_legitimate_constant_p (Pmode, disp)) +- && (GET_CODE (disp) != SYMBOL_REF +- || !ix86_legitimate_constant_p (Pmode, disp))) +- /* Displacement is not constant. */ +- return false; +- else if (TARGET_64BIT +- && !x86_64_immediate_operand (disp, VOIDmode)) +- /* Displacement is out of range. */ +- return false; +- /* In x32 mode, constant addresses are sign extended to 64bit, so +- we have to prevent addresses from 0x80000000 to 0xffffffff. */ +- else if (TARGET_X32 && !(index || base) +- && CONST_INT_P (disp) +- && val_signbit_known_set_p (SImode, INTVAL (disp))) +- return false; +- } ++ case 'T': ++ if (ASSEMBLER_DIALECT == ASM_ATT) ++ putc ('t', file); ++ return; + +- /* Everything looks valid. */ +- return true; +-} ++ case 'O': ++#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX ++ if (ASSEMBLER_DIALECT != ASM_ATT) ++ return; + +-/* Determine if a given RTX is a valid constant address. */ ++ switch (GET_MODE_SIZE (GET_MODE (x))) ++ { ++ case 2: ++ putc ('w', file); ++ break; ++ ++ case 4: ++ putc ('l', file); ++ break; + +-bool +-constant_address_p (rtx x) +-{ +- return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1); +-} +- +-/* Return a unique alias set for the GOT. */ ++ case 8: ++ putc ('q', file); ++ break; + +-static alias_set_type +-ix86_GOT_alias_set (void) +-{ +- static alias_set_type set = -1; +- if (set == -1) +- set = new_alias_set (); +- return set; +-} ++ default: ++ output_operand_lossage ("invalid operand size for operand " ++ "code 'O'"); ++ return; ++ } + +-/* Return a legitimate reference for ORIG (an address) using the +- register REG. If REG is 0, a new pseudo is generated. ++ putc ('.', file); ++#endif ++ return; + +- There are two types of references that must be handled: ++ case 'z': ++ if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) ++ { ++ /* Opcodes don't get size suffixes if using Intel opcodes. */ ++ if (ASSEMBLER_DIALECT == ASM_INTEL) ++ return; + +- 1. Global data references must load the address from the GOT, via +- the PIC reg. An insn is emitted to do this load, and the reg is +- returned. ++ switch (GET_MODE_SIZE (GET_MODE (x))) ++ { ++ case 1: ++ putc ('b', file); ++ return; + +- 2. Static data references, constant pool addresses, and code labels +- compute the address as an offset from the GOT, whose base is in +- the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to +- differentiate them from global data objects. The returned +- address is the PIC reg + an unspec constant. ++ case 2: ++ putc ('w', file); ++ return; + +- TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC +- reg also appears in the address. */ ++ case 4: ++ putc ('l', file); ++ return; + +-static rtx +-legitimize_pic_address (rtx orig, rtx reg) +-{ +- rtx addr = orig; +- rtx new_rtx = orig; ++ case 8: ++ putc ('q', file); ++ return; + +-#if TARGET_MACHO +- if (TARGET_MACHO && !TARGET_64BIT) +- { +- if (reg == 0) +- reg = gen_reg_rtx (Pmode); +- /* Use the generic Mach-O PIC machinery. */ +- return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg); +- } +-#endif ++ default: ++ output_operand_lossage ("invalid operand size for operand " ++ "code 'z'"); ++ return; ++ } ++ } + +- if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES) +- { +- rtx tmp = legitimize_pe_coff_symbol (addr, true); +- if (tmp) +- return tmp; +- } ++ if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) ++ warning (0, "non-integer operand used with operand code %"); ++ /* FALLTHRU */ + +- if (TARGET_64BIT && legitimate_pic_address_disp_p (addr)) +- new_rtx = addr; +- else if ((!TARGET_64BIT +- || /* TARGET_64BIT && */ ix86_cmodel != CM_SMALL_PIC) +- && !TARGET_PECOFF +- && gotoff_operand (addr, Pmode)) +- { +- /* This symbol may be referenced via a displacement +- from the PIC base address (@GOTOFF). */ +- if (GET_CODE (addr) == CONST) +- addr = XEXP (addr, 0); ++ case 'Z': ++ /* 387 opcodes don't get size suffixes if using Intel opcodes. */ ++ if (ASSEMBLER_DIALECT == ASM_INTEL) ++ return; + +- if (GET_CODE (addr) == PLUS) +- { +- new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), +- UNSPEC_GOTOFF); +- new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1)); +- } +- else +- new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF); ++ if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) ++ { ++ switch (GET_MODE_SIZE (GET_MODE (x))) ++ { ++ case 2: ++#ifdef HAVE_AS_IX86_FILDS ++ putc ('s', file); ++#endif ++ return; + +- new_rtx = gen_rtx_CONST (Pmode, new_rtx); ++ case 4: ++ putc ('l', file); ++ return; + +- if (TARGET_64BIT) +- new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode); ++ case 8: ++#ifdef HAVE_AS_IX86_FILDQ ++ putc ('q', file); ++#else ++ fputs ("ll", file); ++#endif ++ return; + +- if (reg != 0) +- { +- gcc_assert (REG_P (reg)); +- new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx, +- new_rtx, reg, 1, OPTAB_DIRECT); +- } +- else +- new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx); +- } +- else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0) +- /* We can't use @GOTOFF for text labels +- on VxWorks, see gotoff_operand. */ +- || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF)) +- { +- rtx tmp = legitimize_pe_coff_symbol (addr, true); +- if (tmp) +- return tmp; +- +- /* For x64 PE-COFF there is no GOT table, +- so we use address directly. */ +- if (TARGET_64BIT && TARGET_PECOFF) +- { +- new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL); +- new_rtx = gen_rtx_CONST (Pmode, new_rtx); +- } +- else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC) +- { +- new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), +- UNSPEC_GOTPCREL); +- new_rtx = gen_rtx_CONST (Pmode, new_rtx); +- new_rtx = gen_const_mem (Pmode, new_rtx); +- set_mem_alias_set (new_rtx, ix86_GOT_alias_set ()); +- } +- else +- { +- /* This symbol must be referenced via a load +- from the Global Offset Table (@GOT). */ +- new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT); +- new_rtx = gen_rtx_CONST (Pmode, new_rtx); +- if (TARGET_64BIT) +- new_rtx = force_reg (Pmode, new_rtx); +- new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx); +- new_rtx = gen_const_mem (Pmode, new_rtx); +- set_mem_alias_set (new_rtx, ix86_GOT_alias_set ()); +- } +- +- new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode); +- } +- else +- { +- if (CONST_INT_P (addr) +- && !x86_64_immediate_operand (addr, VOIDmode)) +- new_rtx = copy_to_suggested_reg (addr, reg, Pmode); +- else if (GET_CODE (addr) == CONST) +- { +- addr = XEXP (addr, 0); +- +- /* We must match stuff we generate before. Assume the only +- unspecs that can get here are ours. Not that we could do +- anything with them anyway.... */ +- if (GET_CODE (addr) == UNSPEC +- || (GET_CODE (addr) == PLUS +- && GET_CODE (XEXP (addr, 0)) == UNSPEC)) +- return orig; +- gcc_assert (GET_CODE (addr) == PLUS); +- } +- +- if (GET_CODE (addr) == PLUS) +- { +- rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1); +- +- /* Check first to see if this is a constant +- offset from a @GOTOFF symbol reference. */ +- if (!TARGET_PECOFF +- && gotoff_operand (op0, Pmode) +- && CONST_INT_P (op1)) ++ default: ++ break; ++ } ++ } ++ else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) + { +- if (!TARGET_64BIT) +- { +- new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), +- UNSPEC_GOTOFF); +- new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1); +- new_rtx = gen_rtx_CONST (Pmode, new_rtx); ++ /* 387 opcodes don't get size suffixes ++ if the operands are registers. */ ++ if (STACK_REG_P (x)) ++ return; + +- if (reg != 0) +- { +- gcc_assert (REG_P (reg)); +- new_rtx = expand_simple_binop (Pmode, PLUS, +- pic_offset_table_rtx, +- new_rtx, reg, 1, +- OPTAB_DIRECT); +- } +- else +- new_rtx +- = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx); +- } +- else ++ switch (GET_MODE_SIZE (GET_MODE (x))) + { +- if (INTVAL (op1) < -16*1024*1024 +- || INTVAL (op1) >= 16*1024*1024) +- { +- if (!x86_64_immediate_operand (op1, Pmode)) +- op1 = force_reg (Pmode, op1); ++ case 4: ++ putc ('s', file); ++ return; + +- new_rtx +- = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1); +- } ++ case 8: ++ putc ('l', file); ++ return; ++ ++ case 12: ++ case 16: ++ putc ('t', file); ++ return; ++ ++ default: ++ break; + } + } + else + { +- rtx base = legitimize_pic_address (op0, reg); +- machine_mode mode = GET_MODE (base); +- new_rtx +- = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg); +- +- if (CONST_INT_P (new_rtx)) +- { +- if (INTVAL (new_rtx) < -16*1024*1024 +- || INTVAL (new_rtx) >= 16*1024*1024) +- { +- if (!x86_64_immediate_operand (new_rtx, mode)) +- new_rtx = force_reg (mode, new_rtx); +- +- new_rtx +- = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx); +- } +- else +- new_rtx = plus_constant (mode, base, INTVAL (new_rtx)); +- } +- else +- { +- /* For %rip addressing, we have to use +- just disp32, not base nor index. */ +- if (TARGET_64BIT +- && (GET_CODE (base) == SYMBOL_REF +- || GET_CODE (base) == LABEL_REF)) +- base = force_reg (mode, base); +- if (GET_CODE (new_rtx) == PLUS +- && CONSTANT_P (XEXP (new_rtx, 1))) +- { +- base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0)); +- new_rtx = XEXP (new_rtx, 1); +- } +- new_rtx = gen_rtx_PLUS (mode, base, new_rtx); +- } ++ output_operand_lossage ("invalid operand type used with " ++ "operand code 'Z'"); ++ return; + } +- } +- } +- return new_rtx; +-} +- +-/* Load the thread pointer. If TO_REG is true, force it into a register. */ +- +-static rtx +-get_thread_pointer (machine_mode tp_mode, bool to_reg) +-{ +- rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP); +- +- if (GET_MODE (tp) != tp_mode) +- { +- gcc_assert (GET_MODE (tp) == SImode); +- gcc_assert (tp_mode == DImode); +- +- tp = gen_rtx_ZERO_EXTEND (tp_mode, tp); +- } +- +- if (to_reg) +- tp = copy_to_mode_reg (tp_mode, tp); +- +- return tp; +-} +- +-/* Construct the SYMBOL_REF for the tls_get_addr function. */ +- +-static GTY(()) rtx ix86_tls_symbol; +- +-static rtx +-ix86_tls_get_addr (void) +-{ +- if (!ix86_tls_symbol) +- { +- const char *sym +- = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT) +- ? "___tls_get_addr" : "__tls_get_addr"); +- +- ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym); +- } +- +- if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF) +- { +- rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol), +- UNSPEC_PLTOFF); +- return gen_rtx_PLUS (Pmode, pic_offset_table_rtx, +- gen_rtx_CONST (Pmode, unspec)); +- } +- +- return ix86_tls_symbol; +-} +- +-/* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */ + +-static GTY(()) rtx ix86_tls_module_base_symbol; +- +-rtx +-ix86_tls_module_base (void) +-{ +- if (!ix86_tls_module_base_symbol) +- { +- ix86_tls_module_base_symbol +- = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_"); ++ output_operand_lossage ("invalid operand size for operand code 'Z'"); ++ return; + +- SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol) +- |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT; +- } ++ case 'd': ++ case 'b': ++ case 'w': ++ case 'k': ++ case 'q': ++ case 'h': ++ case 't': ++ case 'g': ++ case 'y': ++ case 'x': ++ case 'X': ++ case 'P': ++ case 'p': ++ case 'V': ++ break; + +- return ix86_tls_module_base_symbol; +-} ++ case 's': ++ if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT) ++ { ++ ix86_print_operand (file, x, 0); ++ fputs (", ", file); ++ } ++ return; + +-/* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is +- false if we expect this to be used for a memory address and true if +- we expect to load the address into a register. */ +- +-static rtx +-legitimize_tls_address (rtx x, enum tls_model model, bool for_mov) +-{ +- rtx dest, base, off; +- rtx pic = NULL_RTX, tp = NULL_RTX; +- machine_mode tp_mode = Pmode; +- int type; ++ case 'Y': ++ switch (GET_CODE (x)) ++ { ++ case NE: ++ fputs ("neq", file); ++ break; ++ case EQ: ++ fputs ("eq", file); ++ break; ++ case GE: ++ case GEU: ++ fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file); ++ break; ++ case GT: ++ case GTU: ++ fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file); ++ break; ++ case LE: ++ case LEU: ++ fputs ("le", file); ++ break; ++ case LT: ++ case LTU: ++ fputs ("lt", file); ++ break; ++ case UNORDERED: ++ fputs ("unord", file); ++ break; ++ case ORDERED: ++ fputs ("ord", file); ++ break; ++ case UNEQ: ++ fputs ("ueq", file); ++ break; ++ case UNGE: ++ fputs ("nlt", file); ++ break; ++ case UNGT: ++ fputs ("nle", file); ++ break; ++ case UNLE: ++ fputs ("ule", file); ++ break; ++ case UNLT: ++ fputs ("ult", file); ++ break; ++ case LTGT: ++ fputs ("une", file); ++ break; ++ default: ++ output_operand_lossage ("operand is not a condition code, " ++ "invalid operand code 'Y'"); ++ return; ++ } ++ return; + +- /* Fall back to global dynamic model if tool chain cannot support local +- dynamic. */ +- if (TARGET_SUN_TLS && !TARGET_64BIT +- && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM +- && model == TLS_MODEL_LOCAL_DYNAMIC) +- model = TLS_MODEL_GLOBAL_DYNAMIC; ++ case 'D': ++ /* Little bit of braindamage here. The SSE compare instructions ++ does use completely different names for the comparisons that the ++ fp conditional moves. */ ++ switch (GET_CODE (x)) ++ { ++ case UNEQ: ++ if (TARGET_AVX) ++ { ++ fputs ("eq_us", file); ++ break; ++ } ++ /* FALLTHRU */ ++ case EQ: ++ fputs ("eq", file); ++ break; ++ case UNLT: ++ if (TARGET_AVX) ++ { ++ fputs ("nge", file); ++ break; ++ } ++ /* FALLTHRU */ ++ case LT: ++ fputs ("lt", file); ++ break; ++ case UNLE: ++ if (TARGET_AVX) ++ { ++ fputs ("ngt", file); ++ break; ++ } ++ /* FALLTHRU */ ++ case LE: ++ fputs ("le", file); ++ break; ++ case UNORDERED: ++ fputs ("unord", file); ++ break; ++ case LTGT: ++ if (TARGET_AVX) ++ { ++ fputs ("neq_oq", file); ++ break; ++ } ++ /* FALLTHRU */ ++ case NE: ++ fputs ("neq", file); ++ break; ++ case GE: ++ if (TARGET_AVX) ++ { ++ fputs ("ge", file); ++ break; ++ } ++ /* FALLTHRU */ ++ case UNGE: ++ fputs ("nlt", file); ++ break; ++ case GT: ++ if (TARGET_AVX) ++ { ++ fputs ("gt", file); ++ break; ++ } ++ /* FALLTHRU */ ++ case UNGT: ++ fputs ("nle", file); ++ break; ++ case ORDERED: ++ fputs ("ord", file); ++ break; ++ default: ++ output_operand_lossage ("operand is not a condition code, " ++ "invalid operand code 'D'"); ++ return; ++ } ++ return; + +- switch (model) +- { +- case TLS_MODEL_GLOBAL_DYNAMIC: +- dest = gen_reg_rtx (Pmode); ++ case 'F': ++ case 'f': ++#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX ++ if (ASSEMBLER_DIALECT == ASM_ATT) ++ putc ('.', file); ++ gcc_fallthrough (); ++#endif + +- if (!TARGET_64BIT) +- { +- if (flag_pic && !TARGET_PECOFF) +- pic = pic_offset_table_rtx; +- else ++ case 'C': ++ case 'c': ++ if (!COMPARISON_P (x)) + { +- pic = gen_reg_rtx (Pmode); +- emit_insn (gen_set_got (pic)); ++ output_operand_lossage ("operand is not a condition code, " ++ "invalid operand code '%c'", code); ++ return; + } +- } ++ put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), ++ code == 'c' || code == 'f', ++ code == 'F' || code == 'f', ++ file); ++ return; + +- if (TARGET_GNU2_TLS) +- { +- if (TARGET_64BIT) +- emit_insn (gen_tls_dynamic_gnu2_64 (dest, x)); +- else +- emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic)); ++ case 'H': ++ if (!offsettable_memref_p (x)) ++ { ++ output_operand_lossage ("operand is not an offsettable memory " ++ "reference, invalid operand code 'H'"); ++ return; ++ } ++ /* It doesn't actually matter what mode we use here, as we're ++ only going to use this for printing. */ ++ x = adjust_address_nv (x, DImode, 8); ++ /* Output 'qword ptr' for intel assembler dialect. */ ++ if (ASSEMBLER_DIALECT == ASM_INTEL) ++ code = 'q'; ++ break; + +- tp = get_thread_pointer (Pmode, true); +- dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest)); ++ case 'K': ++ if (!CONST_INT_P (x)) ++ { ++ output_operand_lossage ("operand is not an integer, invalid " ++ "operand code 'K'"); ++ return; ++ } + +- if (GET_MODE (x) != Pmode) +- x = gen_rtx_ZERO_EXTEND (Pmode, x); ++ if (INTVAL (x) & IX86_HLE_ACQUIRE) ++#ifdef HAVE_AS_IX86_HLE ++ fputs ("xacquire ", file); ++#else ++ fputs ("\n" ASM_BYTE "0xf2\n\t", file); ++#endif ++ else if (INTVAL (x) & IX86_HLE_RELEASE) ++#ifdef HAVE_AS_IX86_HLE ++ fputs ("xrelease ", file); ++#else ++ fputs ("\n" ASM_BYTE "0xf3\n\t", file); ++#endif ++ /* We do not want to print value of the operand. */ ++ return; + +- set_unique_reg_note (get_last_insn (), REG_EQUAL, x); +- } +- else +- { +- rtx caddr = ix86_tls_get_addr (); ++ case 'N': ++ if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x))) ++ fputs ("{z}", file); ++ return; + +- if (TARGET_64BIT) ++ case 'r': ++ if (!CONST_INT_P (x) || INTVAL (x) != ROUND_SAE) + { +- rtx rax = gen_rtx_REG (Pmode, AX_REG); +- rtx_insn *insns; ++ output_operand_lossage ("operand is not a specific integer, " ++ "invalid operand code 'r'"); ++ return; ++ } + +- start_sequence (); +- emit_call_insn +- (ix86_gen_tls_global_dynamic_64 (rax, x, caddr)); +- insns = get_insns (); +- end_sequence (); ++ if (ASSEMBLER_DIALECT == ASM_INTEL) ++ fputs (", ", file); + +- if (GET_MODE (x) != Pmode) +- x = gen_rtx_ZERO_EXTEND (Pmode, x); ++ fputs ("{sae}", file); + +- RTL_CONST_CALL_P (insns) = 1; +- emit_libcall_block (insns, dest, rax, x); +- } +- else +- emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr)); +- } +- break; ++ if (ASSEMBLER_DIALECT == ASM_ATT) ++ fputs (", ", file); + +- case TLS_MODEL_LOCAL_DYNAMIC: +- base = gen_reg_rtx (Pmode); ++ return; + +- if (!TARGET_64BIT) +- { +- if (flag_pic) +- pic = pic_offset_table_rtx; +- else ++ case 'R': ++ if (!CONST_INT_P (x)) + { +- pic = gen_reg_rtx (Pmode); +- emit_insn (gen_set_got (pic)); ++ output_operand_lossage ("operand is not an integer, invalid " ++ "operand code 'R'"); ++ return; + } +- } + +- if (TARGET_GNU2_TLS) +- { +- rtx tmp = ix86_tls_module_base (); ++ if (ASSEMBLER_DIALECT == ASM_INTEL) ++ fputs (", ", file); + +- if (TARGET_64BIT) +- emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp)); +- else +- emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic)); ++ switch (INTVAL (x)) ++ { ++ case ROUND_NEAREST_INT | ROUND_SAE: ++ fputs ("{rn-sae}", file); ++ break; ++ case ROUND_NEG_INF | ROUND_SAE: ++ fputs ("{rd-sae}", file); ++ break; ++ case ROUND_POS_INF | ROUND_SAE: ++ fputs ("{ru-sae}", file); ++ break; ++ case ROUND_ZERO | ROUND_SAE: ++ fputs ("{rz-sae}", file); ++ break; ++ default: ++ output_operand_lossage ("operand is not a specific integer, " ++ "invalid operand code 'R'"); ++ } + +- tp = get_thread_pointer (Pmode, true); +- set_unique_reg_note (get_last_insn (), REG_EQUAL, +- gen_rtx_MINUS (Pmode, tmp, tp)); +- } +- else +- { +- rtx caddr = ix86_tls_get_addr (); ++ if (ASSEMBLER_DIALECT == ASM_ATT) ++ fputs (", ", file); + +- if (TARGET_64BIT) +- { +- rtx rax = gen_rtx_REG (Pmode, AX_REG); +- rtx_insn *insns; +- rtx eqv; ++ return; + +- start_sequence (); +- emit_call_insn +- (ix86_gen_tls_local_dynamic_base_64 (rax, caddr)); +- insns = get_insns (); +- end_sequence (); ++ case '*': ++ if (ASSEMBLER_DIALECT == ASM_ATT) ++ putc ('*', file); ++ return; + +- /* Attach a unique REG_EQUAL, to allow the RTL optimizers to +- share the LD_BASE result with other LD model accesses. */ +- eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), +- UNSPEC_TLS_LD_BASE); ++ case '&': ++ { ++ const char *name = get_some_local_dynamic_name (); ++ if (name == NULL) ++ output_operand_lossage ("'%%&' used without any " ++ "local dynamic TLS references"); ++ else ++ assemble_name (file, name); ++ return; ++ } + +- RTL_CONST_CALL_P (insns) = 1; +- emit_libcall_block (insns, base, rax, eqv); +- } +- else +- emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr)); +- } ++ case '+': ++ { ++ rtx x; + +- off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF); +- off = gen_rtx_CONST (Pmode, off); ++ if (!optimize ++ || optimize_function_for_size_p (cfun) ++ || !TARGET_BRANCH_PREDICTION_HINTS) ++ return; + +- dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off)); ++ x = find_reg_note (current_output_insn, REG_BR_PROB, 0); ++ if (x) ++ { ++ int pred_val = profile_probability::from_reg_br_prob_note ++ (XINT (x, 0)).to_reg_br_prob_base (); + +- if (TARGET_GNU2_TLS) +- { +- dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp)); ++ if (pred_val < REG_BR_PROB_BASE * 45 / 100 ++ || pred_val > REG_BR_PROB_BASE * 55 / 100) ++ { ++ bool taken = pred_val > REG_BR_PROB_BASE / 2; ++ bool cputaken ++ = final_forward_branch_p (current_output_insn) == 0; + +- if (GET_MODE (x) != Pmode) +- x = gen_rtx_ZERO_EXTEND (Pmode, x); ++ /* Emit hints only in the case default branch prediction ++ heuristics would fail. */ ++ if (taken != cputaken) ++ { ++ /* We use 3e (DS) prefix for taken branches and ++ 2e (CS) prefix for not taken branches. */ ++ if (taken) ++ fputs ("ds ; ", file); ++ else ++ fputs ("cs ; ", file); ++ } ++ } ++ } ++ return; ++ } + +- set_unique_reg_note (get_last_insn (), REG_EQUAL, x); +- } +- break; ++ case ';': ++#ifndef HAVE_AS_IX86_REP_LOCK_PREFIX ++ putc (';', file); ++#endif ++ return; + +- case TLS_MODEL_INITIAL_EXEC: +- if (TARGET_64BIT) +- { +- if (TARGET_SUN_TLS && !TARGET_X32) +- { +- /* The Sun linker took the AMD64 TLS spec literally +- and can only handle %rax as destination of the +- initial executable code sequence. */ ++ case '~': ++ putc (TARGET_AVX2 ? 'i' : 'f', file); ++ return; + +- dest = gen_reg_rtx (DImode); +- emit_insn (gen_tls_initial_exec_64_sun (dest, x)); +- return dest; ++ case 'M': ++ if (TARGET_X32) ++ { ++ /* NB: 32-bit indices in VSIB address are sign-extended ++ to 64 bits. In x32, if 32-bit address 0xf7fa3010 is ++ sign-extended to 0xfffffffff7fa3010 which is invalid ++ address. Add addr32 prefix if there is no base ++ register nor symbol. */ ++ bool ok; ++ struct ix86_address parts; ++ ok = ix86_decompose_address (x, &parts); ++ gcc_assert (ok && parts.index == NULL_RTX); ++ if (parts.base == NULL_RTX ++ && (parts.disp == NULL_RTX ++ || !symbolic_operand (parts.disp, ++ GET_MODE (parts.disp)))) ++ fputs ("addr32 ", file); + } ++ return; + +- /* Generate DImode references to avoid %fs:(%reg32) +- problems and linker IE->LE relaxation bug. */ +- tp_mode = DImode; +- pic = NULL; +- type = UNSPEC_GOTNTPOFF; +- } +- else if (flag_pic) +- { +- pic = pic_offset_table_rtx; +- type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF; +- } +- else if (!TARGET_ANY_GNU_TLS) +- { +- pic = gen_reg_rtx (Pmode); +- emit_insn (gen_set_got (pic)); +- type = UNSPEC_GOTTPOFF; +- } +- else +- { +- pic = NULL; +- type = UNSPEC_INDNTPOFF; +- } ++ case '^': ++ if (TARGET_64BIT && Pmode != word_mode) ++ fputs ("addr32 ", file); ++ return; + +- off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type); +- off = gen_rtx_CONST (tp_mode, off); +- if (pic) +- off = gen_rtx_PLUS (tp_mode, pic, off); +- off = gen_const_mem (tp_mode, off); +- set_mem_alias_set (off, ix86_GOT_alias_set ()); ++ case '!': ++ if (ix86_notrack_prefixed_insn_p (current_output_insn)) ++ fputs ("notrack ", file); ++ return; + +- if (TARGET_64BIT || TARGET_ANY_GNU_TLS) +- { +- base = get_thread_pointer (tp_mode, +- for_mov || !TARGET_TLS_DIRECT_SEG_REFS); +- off = force_reg (tp_mode, off); +- dest = gen_rtx_PLUS (tp_mode, base, off); +- if (tp_mode != Pmode) +- dest = convert_to_mode (Pmode, dest, 1); +- } +- else +- { +- base = get_thread_pointer (Pmode, true); +- dest = gen_reg_rtx (Pmode); +- emit_insn (ix86_gen_sub3 (dest, base, off)); ++ default: ++ output_operand_lossage ("invalid operand code '%c'", code); + } +- break; ++ } + +- case TLS_MODEL_LOCAL_EXEC: +- off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), +- (TARGET_64BIT || TARGET_ANY_GNU_TLS) +- ? UNSPEC_NTPOFF : UNSPEC_TPOFF); +- off = gen_rtx_CONST (Pmode, off); ++ if (REG_P (x)) ++ print_reg (x, code, file); + +- if (TARGET_64BIT || TARGET_ANY_GNU_TLS) +- { +- base = get_thread_pointer (Pmode, +- for_mov || !TARGET_TLS_DIRECT_SEG_REFS); +- return gen_rtx_PLUS (Pmode, base, off); +- } +- else ++ else if (MEM_P (x)) ++ { ++ rtx addr = XEXP (x, 0); ++ ++ /* No `byte ptr' prefix for call instructions ... */ ++ if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P') + { +- base = get_thread_pointer (Pmode, true); +- dest = gen_reg_rtx (Pmode); +- emit_insn (ix86_gen_sub3 (dest, base, off)); +- } +- break; ++ machine_mode mode = GET_MODE (x); ++ const char *size; + +- default: +- gcc_unreachable (); +- } ++ /* Check for explicit size override codes. */ ++ if (code == 'b') ++ size = "BYTE"; ++ else if (code == 'w') ++ size = "WORD"; ++ else if (code == 'k') ++ size = "DWORD"; ++ else if (code == 'q') ++ size = "QWORD"; ++ else if (code == 'x') ++ size = "XMMWORD"; ++ else if (code == 't') ++ size = "YMMWORD"; ++ else if (code == 'g') ++ size = "ZMMWORD"; ++ else if (mode == BLKmode) ++ /* ... or BLKmode operands, when not overridden. */ ++ size = NULL; ++ else ++ switch (GET_MODE_SIZE (mode)) ++ { ++ case 1: size = "BYTE"; break; ++ case 2: size = "WORD"; break; ++ case 4: size = "DWORD"; break; ++ case 8: size = "QWORD"; break; ++ case 12: size = "TBYTE"; break; ++ case 16: ++ if (mode == XFmode) ++ size = "TBYTE"; ++ else ++ size = "XMMWORD"; ++ break; ++ case 32: size = "YMMWORD"; break; ++ case 64: size = "ZMMWORD"; break; ++ default: ++ gcc_unreachable (); ++ } ++ if (size) ++ { ++ fputs (size, file); ++ fputs (" PTR ", file); ++ } ++ } + +- return dest; +-} ++ if (this_is_asm_operands && ! address_operand (addr, VOIDmode)) ++ output_operand_lossage ("invalid constraints for operand"); ++ else ++ ix86_print_operand_address_as ++ (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P'); ++ } + +-/* Return true if OP refers to a TLS address. */ +-bool +-ix86_tls_address_pattern_p (rtx op) +-{ +- subrtx_var_iterator::array_type array; +- FOR_EACH_SUBRTX_VAR (iter, array, op, ALL) ++ else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode) + { +- rtx op = *iter; +- if (MEM_P (op)) +- { +- rtx *x = &XEXP (op, 0); +- while (GET_CODE (*x) == PLUS) +- { +- int i; +- for (i = 0; i < 2; i++) +- { +- rtx u = XEXP (*x, i); +- if (GET_CODE (u) == ZERO_EXTEND) +- u = XEXP (u, 0); +- if (GET_CODE (u) == UNSPEC +- && XINT (u, 1) == UNSPEC_TP) +- return true; +- } +- x = &XEXP (*x, 0); +- } ++ long l; + +- iter.skip_subrtxes (); +- } ++ REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l); ++ ++ if (ASSEMBLER_DIALECT == ASM_ATT) ++ putc ('$', file); ++ /* Sign extend 32bit SFmode immediate to 8 bytes. */ ++ if (code == 'q') ++ fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x", ++ (unsigned long long) (int) l); ++ else ++ fprintf (file, "0x%08x", (unsigned int) l); + } + +- return false; +-} ++ else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode) ++ { ++ long l[2]; + +-/* Rewrite *LOC so that it refers to a default TLS address space. */ +-void +-ix86_rewrite_tls_address_1 (rtx *loc) +-{ +- subrtx_ptr_iterator::array_type array; +- FOR_EACH_SUBRTX_PTR (iter, array, loc, ALL) ++ REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l); ++ ++ if (ASSEMBLER_DIALECT == ASM_ATT) ++ putc ('$', file); ++ fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff); ++ } ++ ++ /* These float cases don't actually occur as immediate operands. */ ++ else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode) + { +- rtx *loc = *iter; +- if (MEM_P (*loc)) +- { +- rtx addr = XEXP (*loc, 0); +- rtx *x = &addr; +- while (GET_CODE (*x) == PLUS) +- { +- int i; +- for (i = 0; i < 2; i++) +- { +- rtx u = XEXP (*x, i); +- if (GET_CODE (u) == ZERO_EXTEND) +- u = XEXP (u, 0); +- if (GET_CODE (u) == UNSPEC +- && XINT (u, 1) == UNSPEC_TP) +- { +- addr_space_t as = DEFAULT_TLS_SEG_REG; ++ char dstr[30]; + +- *x = XEXP (*x, 1 - i); ++ real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1); ++ fputs (dstr, file); ++ } + +- *loc = replace_equiv_address_nv (*loc, addr, true); +- set_mem_addr_space (*loc, as); +- return; +- } +- } +- x = &XEXP (*x, 0); +- } ++ else ++ { ++ /* We have patterns that allow zero sets of memory, for instance. ++ In 64-bit mode, we should probably support all 8-byte vectors, ++ since we can in fact encode that into an immediate. */ ++ if (GET_CODE (x) == CONST_VECTOR) ++ { ++ if (x != CONST0_RTX (GET_MODE (x))) ++ output_operand_lossage ("invalid vector immediate"); ++ x = const0_rtx; ++ } + +- iter.skip_subrtxes (); ++ if (code != 'P' && code != 'p') ++ { ++ if (CONST_INT_P (x)) ++ { ++ if (ASSEMBLER_DIALECT == ASM_ATT) ++ putc ('$', file); ++ } ++ else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF ++ || GET_CODE (x) == LABEL_REF) ++ { ++ if (ASSEMBLER_DIALECT == ASM_ATT) ++ putc ('$', file); ++ else ++ fputs ("OFFSET FLAT:", file); ++ } + } ++ if (CONST_INT_P (x)) ++ fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); ++ else if (flag_pic || MACHOPIC_INDIRECT) ++ output_pic_addr_const (file, x, code); ++ else ++ output_addr_const (file, x); + } + } + +-/* Rewrite instruction pattern involvning TLS address +- so that it refers to a default TLS address space. */ +-rtx +-ix86_rewrite_tls_address (rtx pattern) ++static bool ++ix86_print_operand_punct_valid_p (unsigned char code) + { +- pattern = copy_insn (pattern); +- ix86_rewrite_tls_address_1 (&pattern); +- return pattern; ++ return (code == '*' || code == '+' || code == '&' || code == ';' ++ || code == '~' || code == '^' || code == '!'); + } ++ ++/* Print a memory operand whose address is ADDR. */ + +-/* Create or return the unique __imp_DECL dllimport symbol corresponding +- to symbol DECL if BEIMPORT is true. Otherwise create or return the +- unique refptr-DECL symbol corresponding to symbol DECL. */ +- +-struct dllimport_hasher : ggc_cache_ptr_hash ++static void ++ix86_print_operand_address_as (FILE *file, rtx addr, ++ addr_space_t as, bool no_rip) + { +- static inline hashval_t hash (tree_map *m) { return m->hash; } +- static inline bool +- equal (tree_map *a, tree_map *b) +- { +- return a->base.from == b->base.from; +- } +- +- static int +- keep_cache_entry (tree_map *&m) +- { +- return ggc_marked_p (m->base.from); +- } +-}; ++ struct ix86_address parts; ++ rtx base, index, disp; ++ int scale; ++ int ok; ++ bool vsib = false; ++ int code = 0; + +-static GTY((cache)) hash_table *dllimport_map; ++ if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR) ++ { ++ ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts); ++ gcc_assert (parts.index == NULL_RTX); ++ parts.index = XVECEXP (addr, 0, 1); ++ parts.scale = INTVAL (XVECEXP (addr, 0, 2)); ++ addr = XVECEXP (addr, 0, 0); ++ vsib = true; ++ } ++ else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR) ++ { ++ gcc_assert (TARGET_64BIT); ++ ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts); ++ code = 'q'; ++ } ++ else ++ ok = ix86_decompose_address (addr, &parts); + +-static tree +-get_dllimport_decl (tree decl, bool beimport) +-{ +- struct tree_map *h, in; +- const char *name; +- const char *prefix; +- size_t namelen, prefixlen; +- char *imp_name; +- tree to; +- rtx rtl; ++ gcc_assert (ok); + +- if (!dllimport_map) +- dllimport_map = hash_table::create_ggc (512); ++ base = parts.base; ++ index = parts.index; ++ disp = parts.disp; ++ scale = parts.scale; + +- in.hash = htab_hash_pointer (decl); +- in.base.from = decl; +- tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT); +- h = *loc; +- if (h) +- return h->to; ++ if (ADDR_SPACE_GENERIC_P (as)) ++ as = parts.seg; ++ else ++ gcc_assert (ADDR_SPACE_GENERIC_P (parts.seg)); + +- *loc = h = ggc_alloc (); +- h->hash = in.hash; +- h->base.from = decl; +- h->to = to = build_decl (DECL_SOURCE_LOCATION (decl), +- VAR_DECL, NULL, ptr_type_node); +- DECL_ARTIFICIAL (to) = 1; +- DECL_IGNORED_P (to) = 1; +- DECL_EXTERNAL (to) = 1; +- TREE_READONLY (to) = 1; ++ if (!ADDR_SPACE_GENERIC_P (as)) ++ { ++ if (ASSEMBLER_DIALECT == ASM_ATT) ++ putc ('%', file); + +- name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); +- name = targetm.strip_name_encoding (name); +- if (beimport) +- prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0 +- ? "*__imp_" : "*__imp__"; +- else +- prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr."; +- namelen = strlen (name); +- prefixlen = strlen (prefix); +- imp_name = (char *) alloca (namelen + prefixlen + 1); +- memcpy (imp_name, prefix, prefixlen); +- memcpy (imp_name + prefixlen, name, namelen + 1); ++ switch (as) ++ { ++ case ADDR_SPACE_SEG_FS: ++ fputs ("fs:", file); ++ break; ++ case ADDR_SPACE_SEG_GS: ++ fputs ("gs:", file); ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ } + +- name = ggc_alloc_string (imp_name, namelen + prefixlen); +- rtl = gen_rtx_SYMBOL_REF (Pmode, name); +- SET_SYMBOL_REF_DECL (rtl, to); +- SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR; +- if (!beimport) ++ /* Use one byte shorter RIP relative addressing for 64bit mode. */ ++ if (TARGET_64BIT && !base && !index && !no_rip) + { +- SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL; +-#ifdef SUB_TARGET_RECORD_STUB +- SUB_TARGET_RECORD_STUB (name); +-#endif +- } ++ rtx symbol = disp; + +- rtl = gen_const_mem (Pmode, rtl); +- set_mem_alias_set (rtl, ix86_GOT_alias_set ()); ++ if (GET_CODE (disp) == CONST ++ && GET_CODE (XEXP (disp, 0)) == PLUS ++ && CONST_INT_P (XEXP (XEXP (disp, 0), 1))) ++ symbol = XEXP (XEXP (disp, 0), 0); + +- SET_DECL_RTL (to, rtl); +- SET_DECL_ASSEMBLER_NAME (to, get_identifier (name)); ++ if (GET_CODE (symbol) == LABEL_REF ++ || (GET_CODE (symbol) == SYMBOL_REF ++ && SYMBOL_REF_TLS_MODEL (symbol) == 0)) ++ base = pc_rtx; ++ } + +- return to; +-} ++ if (!base && !index) ++ { ++ /* Displacement only requires special attention. */ ++ if (CONST_INT_P (disp)) ++ { ++ if (ASSEMBLER_DIALECT == ASM_INTEL && ADDR_SPACE_GENERIC_P (as)) ++ fputs ("ds:", file); ++ fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp)); ++ } ++ /* Load the external function address via the GOT slot to avoid PLT. */ ++ else if (GET_CODE (disp) == CONST ++ && GET_CODE (XEXP (disp, 0)) == UNSPEC ++ && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOTPCREL ++ || XINT (XEXP (disp, 0), 1) == UNSPEC_GOT) ++ && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0))) ++ output_pic_addr_const (file, disp, 0); ++ else if (flag_pic) ++ output_pic_addr_const (file, disp, 0); ++ else ++ output_addr_const (file, disp); ++ } ++ else ++ { ++ /* Print SImode register names to force addr32 prefix. */ ++ if (SImode_address_operand (addr, VOIDmode)) ++ { ++ if (flag_checking) ++ { ++ gcc_assert (TARGET_64BIT); ++ switch (GET_CODE (addr)) ++ { ++ case SUBREG: ++ gcc_assert (GET_MODE (addr) == SImode); ++ gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode); ++ break; ++ case ZERO_EXTEND: ++ case AND: ++ gcc_assert (GET_MODE (addr) == DImode); ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ } ++ gcc_assert (!code); ++ code = 'k'; ++ } ++ else if (code == 0 ++ && TARGET_X32 ++ && disp ++ && CONST_INT_P (disp) ++ && INTVAL (disp) < -16*1024*1024) ++ { ++ /* X32 runs in 64-bit mode, where displacement, DISP, in ++ address DISP(%r64), is encoded as 32-bit immediate sign- ++ extended from 32-bit to 64-bit. For -0x40000300(%r64), ++ address is %r64 + 0xffffffffbffffd00. When %r64 < ++ 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64, ++ which is invalid for x32. The correct address is %r64 ++ - 0x40000300 == 0xf7ffdd64. To properly encode ++ -0x40000300(%r64) for x32, we zero-extend negative ++ displacement by forcing addr32 prefix which truncates ++ 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should ++ zero-extend all negative displacements, including -1(%rsp). ++ However, for small negative displacements, sign-extension ++ won't cause overflow. We only zero-extend negative ++ displacements if they < -16*1024*1024, which is also used ++ to check legitimate address displacements for PIC. */ ++ code = 'k'; ++ } + +-/* Expand SYMBOL into its corresponding far-address symbol. +- WANT_REG is true if we require the result be a register. */ ++ /* Since the upper 32 bits of RSP are always zero for x32, ++ we can encode %esp as %rsp to avoid 0x67 prefix if ++ there is no index register. */ ++ if (TARGET_X32 && Pmode == SImode ++ && !index && base && REG_P (base) && REGNO (base) == SP_REG) ++ code = 'q'; + +-static rtx +-legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg) +-{ +- tree imp_decl; +- rtx x; ++ if (ASSEMBLER_DIALECT == ASM_ATT) ++ { ++ if (disp) ++ { ++ if (flag_pic) ++ output_pic_addr_const (file, disp, 0); ++ else if (GET_CODE (disp) == LABEL_REF) ++ output_asm_label (disp); ++ else ++ output_addr_const (file, disp); ++ } + +- gcc_assert (SYMBOL_REF_DECL (symbol)); +- imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false); ++ putc ('(', file); ++ if (base) ++ print_reg (base, code, file); ++ if (index) ++ { ++ putc (',', file); ++ print_reg (index, vsib ? 0 : code, file); ++ if (scale != 1 || vsib) ++ fprintf (file, ",%d", scale); ++ } ++ putc (')', file); ++ } ++ else ++ { ++ rtx offset = NULL_RTX; + +- x = DECL_RTL (imp_decl); +- if (want_reg) +- x = force_reg (Pmode, x); +- return x; +-} ++ if (disp) ++ { ++ /* Pull out the offset of a symbol; print any symbol itself. */ ++ if (GET_CODE (disp) == CONST ++ && GET_CODE (XEXP (disp, 0)) == PLUS ++ && CONST_INT_P (XEXP (XEXP (disp, 0), 1))) ++ { ++ offset = XEXP (XEXP (disp, 0), 1); ++ disp = gen_rtx_CONST (VOIDmode, ++ XEXP (XEXP (disp, 0), 0)); ++ } + +-/* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is +- true if we require the result be a register. */ ++ if (flag_pic) ++ output_pic_addr_const (file, disp, 0); ++ else if (GET_CODE (disp) == LABEL_REF) ++ output_asm_label (disp); ++ else if (CONST_INT_P (disp)) ++ offset = disp; ++ else ++ output_addr_const (file, disp); ++ } + +-static rtx +-legitimize_dllimport_symbol (rtx symbol, bool want_reg) +-{ +- tree imp_decl; +- rtx x; ++ putc ('[', file); ++ if (base) ++ { ++ print_reg (base, code, file); ++ if (offset) ++ { ++ if (INTVAL (offset) >= 0) ++ putc ('+', file); ++ fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); ++ } ++ } ++ else if (offset) ++ fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); ++ else ++ putc ('0', file); + +- gcc_assert (SYMBOL_REF_DECL (symbol)); +- imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true); ++ if (index) ++ { ++ putc ('+', file); ++ print_reg (index, vsib ? 0 : code, file); ++ if (scale != 1 || vsib) ++ fprintf (file, "*%d", scale); ++ } ++ putc (']', file); ++ } ++ } ++} + +- x = DECL_RTL (imp_decl); +- if (want_reg) +- x = force_reg (Pmode, x); +- return x; ++static void ++ix86_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr) ++{ ++ ix86_print_operand_address_as (file, addr, ADDR_SPACE_GENERIC, false); + } + +-/* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG +- is true if we require the result be a register. */ ++/* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */ + +-static rtx +-legitimize_pe_coff_symbol (rtx addr, bool inreg) ++static bool ++i386_asm_output_addr_const_extra (FILE *file, rtx x) + { +- if (!TARGET_PECOFF) +- return NULL_RTX; ++ rtx op; + +- if (TARGET_DLLIMPORT_DECL_ATTRIBUTES) +- { +- if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr)) +- return legitimize_dllimport_symbol (addr, inreg); +- if (GET_CODE (addr) == CONST +- && GET_CODE (XEXP (addr, 0)) == PLUS +- && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF +- && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0))) +- { +- rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg); +- return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1)); +- } +- } +- +- if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC) +- return NULL_RTX; +- if (GET_CODE (addr) == SYMBOL_REF +- && !is_imported_p (addr) +- && SYMBOL_REF_EXTERNAL_P (addr) +- && SYMBOL_REF_DECL (addr)) +- return legitimize_pe_coff_extern_decl (addr, inreg); ++ if (GET_CODE (x) != UNSPEC) ++ return false; + +- if (GET_CODE (addr) == CONST +- && GET_CODE (XEXP (addr, 0)) == PLUS +- && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF +- && !is_imported_p (XEXP (XEXP (addr, 0), 0)) +- && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0)) +- && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0))) ++ op = XVECEXP (x, 0, 0); ++ switch (XINT (x, 1)) + { +- rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg); +- return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1)); ++ case UNSPEC_GOTOFF: ++ output_addr_const (file, op); ++ fputs ("@gotoff", file); ++ break; ++ case UNSPEC_GOTTPOFF: ++ output_addr_const (file, op); ++ /* FIXME: This might be @TPOFF in Sun ld. */ ++ fputs ("@gottpoff", file); ++ break; ++ case UNSPEC_TPOFF: ++ output_addr_const (file, op); ++ fputs ("@tpoff", file); ++ break; ++ case UNSPEC_NTPOFF: ++ output_addr_const (file, op); ++ if (TARGET_64BIT) ++ fputs ("@tpoff", file); ++ else ++ fputs ("@ntpoff", file); ++ break; ++ case UNSPEC_DTPOFF: ++ output_addr_const (file, op); ++ fputs ("@dtpoff", file); ++ break; ++ case UNSPEC_GOTNTPOFF: ++ output_addr_const (file, op); ++ if (TARGET_64BIT) ++ fputs (ASSEMBLER_DIALECT == ASM_ATT ? ++ "@gottpoff(%rip)" : "@gottpoff[rip]", file); ++ else ++ fputs ("@gotntpoff", file); ++ break; ++ case UNSPEC_INDNTPOFF: ++ output_addr_const (file, op); ++ fputs ("@indntpoff", file); ++ break; ++#if TARGET_MACHO ++ case UNSPEC_MACHOPIC_OFFSET: ++ output_addr_const (file, op); ++ putc ('-', file); ++ machopic_output_function_base_name (file); ++ break; ++#endif ++ ++ default: ++ return false; + } +- return NULL_RTX; +-} + +-/* Try machine-dependent ways of modifying an illegitimate address +- to be legitimate. If we find one, return the new, valid address. +- This macro is used in only one place: `memory_address' in explow.c. ++ return true; ++} ++ ++ ++/* Output code to perform a 387 binary operation in INSN, one of PLUS, ++ MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3] ++ is the expression of the binary operation. The output may either be ++ emitted here, or returned to the caller, like all output_* functions. + +- OLDX is the address as it was before break_out_memory_refs was called. +- In some cases it is useful to look at this to decide what needs to be done. ++ There is no guarantee that the operands are the same mode, as they ++ might be within FLOAT or FLOAT_EXTEND expressions. */ + +- It is always safe for this macro to do nothing. It exists to recognize +- opportunities to optimize the output. ++#ifndef SYSV386_COMPAT ++/* Set to 1 for compatibility with brain-damaged assemblers. No-one ++ wants to fix the assemblers because that causes incompatibility ++ with gcc. No-one wants to fix gcc because that causes ++ incompatibility with assemblers... You can use the option of ++ -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */ ++#define SYSV386_COMPAT 1 ++#endif + +- For the 80386, we handle X+REG by loading X into a register R and +- using R+REG. R will go in a general reg and indexing will be used. +- However, if REG is a broken-out memory address or multiplication, +- nothing needs to be done because REG can certainly go in a general reg. ++const char * ++output_387_binary_op (rtx_insn *insn, rtx *operands) ++{ ++ static char buf[40]; ++ const char *p; ++ bool is_sse ++ = (SSE_REG_P (operands[0]) ++ || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2])); + +- When -fpic is used, special handling is needed for symbolic references. +- See comments by legitimize_pic_address in i386.c for details. */ ++ if (is_sse) ++ p = "%v"; ++ else if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT ++ || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) ++ p = "fi"; ++ else ++ p = "f"; + +-static rtx +-ix86_legitimize_address (rtx x, rtx, machine_mode mode) +-{ +- bool changed = false; +- unsigned log; ++ strcpy (buf, p); + +- log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0; +- if (log) +- return legitimize_tls_address (x, (enum tls_model) log, false); +- if (GET_CODE (x) == CONST +- && GET_CODE (XEXP (x, 0)) == PLUS +- && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF +- && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)))) ++ switch (GET_CODE (operands[3])) + { +- rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), +- (enum tls_model) log, false); +- return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1)); ++ case PLUS: ++ p = "add"; break; ++ case MINUS: ++ p = "sub"; break; ++ case MULT: ++ p = "mul"; break; ++ case DIV: ++ p = "div"; break; ++ default: ++ gcc_unreachable (); + } + +- if (TARGET_DLLIMPORT_DECL_ATTRIBUTES) +- { +- rtx tmp = legitimize_pe_coff_symbol (x, true); +- if (tmp) +- return tmp; +- } ++ strcat (buf, p); + +- if (flag_pic && SYMBOLIC_CONST (x)) +- return legitimize_pic_address (x, 0); ++ if (is_sse) ++ { ++ p = (GET_MODE (operands[0]) == SFmode) ? "ss" : "sd"; ++ strcat (buf, p); + +-#if TARGET_MACHO +- if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x)) +- return machopic_indirect_data_reference (x, 0); +-#endif ++ if (TARGET_AVX) ++ p = "\t{%2, %1, %0|%0, %1, %2}"; ++ else ++ p = "\t{%2, %0|%0, %2}"; + +- /* Canonicalize shifts by 0, 1, 2, 3 into multiply */ +- if (GET_CODE (x) == ASHIFT +- && CONST_INT_P (XEXP (x, 1)) +- && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4) ++ strcat (buf, p); ++ return buf; ++ } ++ ++ /* Even if we do not want to check the inputs, this documents input ++ constraints. Which helps in understanding the following code. */ ++ if (flag_checking) + { +- changed = true; +- log = INTVAL (XEXP (x, 1)); +- x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)), +- GEN_INT (1 << log)); ++ if (STACK_REG_P (operands[0]) ++ && ((REG_P (operands[1]) ++ && REGNO (operands[0]) == REGNO (operands[1]) ++ && (STACK_REG_P (operands[2]) || MEM_P (operands[2]))) ++ || (REG_P (operands[2]) ++ && REGNO (operands[0]) == REGNO (operands[2]) ++ && (STACK_REG_P (operands[1]) || MEM_P (operands[1])))) ++ && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2]))) ++ ; /* ok */ ++ else ++ gcc_unreachable (); + } + +- if (GET_CODE (x) == PLUS) ++ switch (GET_CODE (operands[3])) + { +- /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */ ++ case MULT: ++ case PLUS: ++ if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2])) ++ std::swap (operands[1], operands[2]); + +- if (GET_CODE (XEXP (x, 0)) == ASHIFT +- && CONST_INT_P (XEXP (XEXP (x, 0), 1)) +- && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4) ++ /* know operands[0] == operands[1]. */ ++ ++ if (MEM_P (operands[2])) + { +- changed = true; +- log = INTVAL (XEXP (XEXP (x, 0), 1)); +- XEXP (x, 0) = gen_rtx_MULT (Pmode, +- force_reg (Pmode, XEXP (XEXP (x, 0), 0)), +- GEN_INT (1 << log)); ++ p = "%Z2\t%2"; ++ break; + } + +- if (GET_CODE (XEXP (x, 1)) == ASHIFT +- && CONST_INT_P (XEXP (XEXP (x, 1), 1)) +- && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4) ++ if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) + { +- changed = true; +- log = INTVAL (XEXP (XEXP (x, 1), 1)); +- XEXP (x, 1) = gen_rtx_MULT (Pmode, +- force_reg (Pmode, XEXP (XEXP (x, 1), 0)), +- GEN_INT (1 << log)); ++ if (STACK_TOP_P (operands[0])) ++ /* How is it that we are storing to a dead operand[2]? ++ Well, presumably operands[1] is dead too. We can't ++ store the result to st(0) as st(0) gets popped on this ++ instruction. Instead store to operands[2] (which I ++ think has to be st(1)). st(1) will be popped later. ++ gcc <= 2.8.1 didn't have this check and generated ++ assembly code that the Unixware assembler rejected. */ ++ p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */ ++ else ++ p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */ ++ break; + } + +- /* Put multiply first if it isn't already. */ +- if (GET_CODE (XEXP (x, 1)) == MULT) ++ if (STACK_TOP_P (operands[0])) ++ p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */ ++ else ++ p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */ ++ break; ++ ++ case MINUS: ++ case DIV: ++ if (MEM_P (operands[1])) + { +- std::swap (XEXP (x, 0), XEXP (x, 1)); +- changed = true; ++ p = "r%Z1\t%1"; ++ break; + } + +- /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const))) +- into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be +- created by virtual register instantiation, register elimination, and +- similar optimizations. */ +- if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS) ++ if (MEM_P (operands[2])) + { +- changed = true; +- x = gen_rtx_PLUS (Pmode, +- gen_rtx_PLUS (Pmode, XEXP (x, 0), +- XEXP (XEXP (x, 1), 0)), +- XEXP (XEXP (x, 1), 1)); ++ p = "%Z2\t%2"; ++ break; + } + +- /* Canonicalize +- (plus (plus (mult (reg) (const)) (plus (reg) (const))) const) +- into (plus (plus (mult (reg) (const)) (reg)) (const)). */ +- else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS +- && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT +- && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS +- && CONSTANT_P (XEXP (x, 1))) ++ if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) + { +- rtx constant; +- rtx other = NULL_RTX; +- +- if (CONST_INT_P (XEXP (x, 1))) +- { +- constant = XEXP (x, 1); +- other = XEXP (XEXP (XEXP (x, 0), 1), 1); +- } +- else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1))) +- { +- constant = XEXP (XEXP (XEXP (x, 0), 1), 1); +- other = XEXP (x, 1); +- } ++#if SYSV386_COMPAT ++ /* The SystemV/386 SVR3.2 assembler, and probably all AT&T ++ derived assemblers, confusingly reverse the direction of ++ the operation for fsub{r} and fdiv{r} when the ++ destination register is not st(0). The Intel assembler ++ doesn't have this brain damage. Read !SYSV386_COMPAT to ++ figure out what the hardware really does. */ ++ if (STACK_TOP_P (operands[0])) ++ p = "{p\t%0, %2|rp\t%2, %0}"; + else +- constant = 0; +- +- if (constant) +- { +- changed = true; +- x = gen_rtx_PLUS (Pmode, +- gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0), +- XEXP (XEXP (XEXP (x, 0), 1), 0)), +- plus_constant (Pmode, other, +- INTVAL (constant))); +- } ++ p = "{rp\t%2, %0|p\t%0, %2}"; ++#else ++ if (STACK_TOP_P (operands[0])) ++ /* As above for fmul/fadd, we can't store to st(0). */ ++ p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */ ++ else ++ p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */ ++#endif ++ break; + } + +- if (changed && ix86_legitimate_address_p (mode, x, false)) +- return x; +- +- if (GET_CODE (XEXP (x, 0)) == MULT) ++ if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) + { +- changed = true; +- XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0)); ++#if SYSV386_COMPAT ++ if (STACK_TOP_P (operands[0])) ++ p = "{rp\t%0, %1|p\t%1, %0}"; ++ else ++ p = "{p\t%1, %0|rp\t%0, %1}"; ++#else ++ if (STACK_TOP_P (operands[0])) ++ p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */ ++ else ++ p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */ ++#endif ++ break; + } + +- if (GET_CODE (XEXP (x, 1)) == MULT) ++ if (STACK_TOP_P (operands[0])) + { +- changed = true; +- XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1)); ++ if (STACK_TOP_P (operands[1])) ++ p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */ ++ else ++ p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */ ++ break; + } +- +- if (changed +- && REG_P (XEXP (x, 1)) +- && REG_P (XEXP (x, 0))) +- return x; +- +- if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1))) ++ else if (STACK_TOP_P (operands[1])) + { +- changed = true; +- x = legitimize_pic_address (x, 0); ++#if SYSV386_COMPAT ++ p = "{\t%1, %0|r\t%0, %1}"; ++#else ++ p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */ ++#endif ++ } ++ else ++ { ++#if SYSV386_COMPAT ++ p = "{r\t%2, %0|\t%0, %2}"; ++#else ++ p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */ ++#endif + } ++ break; + +- if (changed && ix86_legitimate_address_p (mode, x, false)) +- return x; ++ default: ++ gcc_unreachable (); ++ } + +- if (REG_P (XEXP (x, 0))) +- { +- rtx temp = gen_reg_rtx (Pmode); +- rtx val = force_operand (XEXP (x, 1), temp); +- if (val != temp) +- { +- val = convert_to_mode (Pmode, val, 1); +- emit_move_insn (temp, val); +- } ++ strcat (buf, p); ++ return buf; ++} + +- XEXP (x, 1) = temp; +- return x; +- } ++/* Return needed mode for entity in optimize_mode_switching pass. */ + +- else if (REG_P (XEXP (x, 1))) +- { +- rtx temp = gen_reg_rtx (Pmode); +- rtx val = force_operand (XEXP (x, 0), temp); +- if (val != temp) +- { +- val = convert_to_mode (Pmode, val, 1); +- emit_move_insn (temp, val); +- } ++static int ++ix86_dirflag_mode_needed (rtx_insn *insn) ++{ ++ if (CALL_P (insn)) ++ { ++ if (cfun->machine->func_type == TYPE_NORMAL) ++ return X86_DIRFLAG_ANY; ++ else ++ /* No need to emit CLD in interrupt handler for TARGET_CLD. */ ++ return TARGET_CLD ? X86_DIRFLAG_ANY : X86_DIRFLAG_RESET; ++ } + +- XEXP (x, 0) = temp; +- return x; +- } ++ if (recog_memoized (insn) < 0) ++ return X86_DIRFLAG_ANY; ++ ++ if (get_attr_type (insn) == TYPE_STR) ++ { ++ /* Emit cld instruction if stringops are used in the function. */ ++ if (cfun->machine->func_type == TYPE_NORMAL) ++ return TARGET_CLD ? X86_DIRFLAG_RESET : X86_DIRFLAG_ANY; ++ else ++ return X86_DIRFLAG_RESET; + } + +- return x; ++ return X86_DIRFLAG_ANY; + } +- +-/* Print an integer constant expression in assembler syntax. Addition +- and subtraction are the only arithmetic that may appear in these +- expressions. FILE is the stdio stream to write to, X is the rtx, and +- CODE is the operand print code from the output string. */ + +-static void +-output_pic_addr_const (FILE *file, rtx x, int code) ++/* Check if a 256bit or 512 bit AVX register is referenced inside of EXP. */ ++ ++static bool ++ix86_check_avx_upper_register (const_rtx exp) + { +- char buf[256]; ++ return SSE_REG_P (exp) && GET_MODE_BITSIZE (GET_MODE (exp)) > 128; ++} + +- switch (GET_CODE (x)) ++/* Return needed mode for entity in optimize_mode_switching pass. */ ++ ++static int ++ix86_avx_u128_mode_needed (rtx_insn *insn) ++{ ++ if (CALL_P (insn)) + { +- case PC: +- gcc_assert (flag_pic); +- putc ('.', file); +- break; ++ rtx link; + +- case SYMBOL_REF: +- if (TARGET_64BIT || ! TARGET_MACHO_SYMBOL_STUBS) +- output_addr_const (file, x); +- else ++ /* Needed mode is set to AVX_U128_CLEAN if there are ++ no 256bit or 512bit modes used in function arguments. */ ++ for (link = CALL_INSN_FUNCTION_USAGE (insn); ++ link; ++ link = XEXP (link, 1)) + { +- const char *name = XSTR (x, 0); +- +- /* Mark the decl as referenced so that cgraph will +- output the function. */ +- if (SYMBOL_REF_DECL (x)) +- mark_decl_referenced (SYMBOL_REF_DECL (x)); ++ if (GET_CODE (XEXP (link, 0)) == USE) ++ { ++ rtx arg = XEXP (XEXP (link, 0), 0); + +-#if TARGET_MACHO +- if (MACHOPIC_INDIRECT +- && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION) +- name = machopic_indirection_name (x, /*stub_p=*/true); +-#endif +- assemble_name (file, name); ++ if (ix86_check_avx_upper_register (arg)) ++ return AVX_U128_DIRTY; ++ } + } +- if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF) +- && code == 'P' && ! SYMBOL_REF_LOCAL_P (x)) +- fputs ("@PLT", file); +- break; + +- case LABEL_REF: +- x = XEXP (x, 0); +- /* FALLTHRU */ +- case CODE_LABEL: +- ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x)); +- assemble_name (asm_out_file, buf); +- break; ++ return AVX_U128_CLEAN; ++ } + +- case CONST_INT: +- fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); +- break; ++ /* Require DIRTY mode if a 256bit or 512bit AVX register is referenced. ++ Hardware changes state only when a 256bit register is written to, ++ but we need to prevent the compiler from moving optimal insertion ++ point above eventual read from 256bit or 512 bit register. */ ++ subrtx_iterator::array_type array; ++ FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST) ++ if (ix86_check_avx_upper_register (*iter)) ++ return AVX_U128_DIRTY; + +- case CONST: +- /* This used to output parentheses around the expression, +- but that does not work on the 386 (either ATT or BSD assembler). */ +- output_pic_addr_const (file, XEXP (x, 0), code); +- break; ++ return AVX_U128_ANY; ++} + +- case CONST_DOUBLE: +- /* We can't handle floating point constants; +- TARGET_PRINT_OPERAND must handle them. */ +- output_operand_lossage ("floating constant misused"); +- break; ++/* Return mode that i387 must be switched into ++ prior to the execution of insn. */ + +- case PLUS: +- /* Some assemblers need integer constants to appear first. */ +- if (CONST_INT_P (XEXP (x, 0))) +- { +- output_pic_addr_const (file, XEXP (x, 0), code); +- putc ('+', file); +- output_pic_addr_const (file, XEXP (x, 1), code); +- } +- else +- { +- gcc_assert (CONST_INT_P (XEXP (x, 1))); +- output_pic_addr_const (file, XEXP (x, 1), code); +- putc ('+', file); +- output_pic_addr_const (file, XEXP (x, 0), code); +- } ++static int ++ix86_i387_mode_needed (int entity, rtx_insn *insn) ++{ ++ enum attr_i387_cw mode; ++ ++ /* The mode UNINITIALIZED is used to store control word after a ++ function call or ASM pattern. The mode ANY specify that function ++ has no requirements on the control word and make no changes in the ++ bits we are interested in. */ ++ ++ if (CALL_P (insn) ++ || (NONJUMP_INSN_P (insn) ++ && (asm_noperands (PATTERN (insn)) >= 0 ++ || GET_CODE (PATTERN (insn)) == ASM_INPUT))) ++ return I387_CW_UNINITIALIZED; ++ ++ if (recog_memoized (insn) < 0) ++ return I387_CW_ANY; ++ ++ mode = get_attr_i387_cw (insn); ++ ++ switch (entity) ++ { ++ case I387_TRUNC: ++ if (mode == I387_CW_TRUNC) ++ return mode; + break; + +- case MINUS: +- if (!TARGET_MACHO) +- putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file); +- output_pic_addr_const (file, XEXP (x, 0), code); +- putc ('-', file); +- output_pic_addr_const (file, XEXP (x, 1), code); +- if (!TARGET_MACHO) +- putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file); ++ case I387_FLOOR: ++ if (mode == I387_CW_FLOOR) ++ return mode; + break; + +- case UNSPEC: +- gcc_assert (XVECLEN (x, 0) == 1); +- output_pic_addr_const (file, XVECEXP (x, 0, 0), code); +- switch (XINT (x, 1)) +- { +- case UNSPEC_GOT: +- fputs ("@GOT", file); +- break; +- case UNSPEC_GOTOFF: +- fputs ("@GOTOFF", file); +- break; +- case UNSPEC_PLTOFF: +- fputs ("@PLTOFF", file); +- break; +- case UNSPEC_PCREL: +- fputs (ASSEMBLER_DIALECT == ASM_ATT ? +- "(%rip)" : "[rip]", file); +- break; +- case UNSPEC_GOTPCREL: +- fputs (ASSEMBLER_DIALECT == ASM_ATT ? +- "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file); +- break; +- case UNSPEC_GOTTPOFF: +- /* FIXME: This might be @TPOFF in Sun ld too. */ +- fputs ("@gottpoff", file); +- break; +- case UNSPEC_TPOFF: +- fputs ("@tpoff", file); +- break; +- case UNSPEC_NTPOFF: +- if (TARGET_64BIT) +- fputs ("@tpoff", file); +- else +- fputs ("@ntpoff", file); +- break; +- case UNSPEC_DTPOFF: +- fputs ("@dtpoff", file); +- break; +- case UNSPEC_GOTNTPOFF: +- if (TARGET_64BIT) +- fputs (ASSEMBLER_DIALECT == ASM_ATT ? +- "@gottpoff(%rip)": "@gottpoff[rip]", file); +- else +- fputs ("@gotntpoff", file); +- break; +- case UNSPEC_INDNTPOFF: +- fputs ("@indntpoff", file); +- break; +-#if TARGET_MACHO +- case UNSPEC_MACHOPIC_OFFSET: +- putc ('-', file); +- machopic_output_function_base_name (file); +- break; +-#endif +- default: +- output_operand_lossage ("invalid UNSPEC as operand"); +- break; +- } +- break; ++ case I387_CEIL: ++ if (mode == I387_CW_CEIL) ++ return mode; ++ break; + + default: +- output_operand_lossage ("invalid expression as operand"); ++ gcc_unreachable (); + } ++ ++ return I387_CW_ANY; + } + +-/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL. +- We need to emit DTP-relative relocations. */ ++/* Return mode that entity must be switched into ++ prior to the execution of insn. */ + +-static void ATTRIBUTE_UNUSED +-i386_output_dwarf_dtprel (FILE *file, int size, rtx x) ++static int ++ix86_mode_needed (int entity, rtx_insn *insn) + { +- fputs (ASM_LONG, file); +- output_addr_const (file, x); +- fputs ("@dtpoff", file); +- switch (size) ++ switch (entity) + { +- case 4: +- break; +- case 8: +- fputs (", 0", file); +- break; ++ case X86_DIRFLAG: ++ return ix86_dirflag_mode_needed (insn); ++ case AVX_U128: ++ return ix86_avx_u128_mode_needed (insn); ++ case I387_TRUNC: ++ case I387_FLOOR: ++ case I387_CEIL: ++ return ix86_i387_mode_needed (entity, insn); + default: + gcc_unreachable (); +- } ++ } ++ return 0; + } + +-/* Return true if X is a representation of the PIC register. This copes +- with calls from ix86_find_base_term, where the register might have +- been replaced by a cselib value. */ +- +-static bool +-ix86_pic_register_p (rtx x) +-{ +- if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x)) +- return (pic_offset_table_rtx +- && rtx_equal_for_cselib_p (x, pic_offset_table_rtx)); +- else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SET_GOT) +- return true; +- else if (!REG_P (x)) +- return false; +- else if (pic_offset_table_rtx) ++/* Check if a 256bit or 512bit AVX register is referenced in stores. */ ++ ++static void ++ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data) ++ { ++ if (ix86_check_avx_upper_register (dest)) + { +- if (REGNO (x) == REGNO (pic_offset_table_rtx)) +- return true; +- if (HARD_REGISTER_P (x) +- && !HARD_REGISTER_P (pic_offset_table_rtx) +- && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx)) +- return true; +- return false; ++ bool *used = (bool *) data; ++ *used = true; + } +- else +- return REGNO (x) == PIC_OFFSET_TABLE_REGNUM; +-} ++ } + +-/* Helper function for ix86_delegitimize_address. +- Attempt to delegitimize TLS local-exec accesses. */ ++/* Calculate mode of upper 128bit AVX registers after the insn. */ + +-static rtx +-ix86_delegitimize_tls_address (rtx orig_x) ++static int ++ix86_avx_u128_mode_after (int mode, rtx_insn *insn) + { +- rtx x = orig_x, unspec; +- struct ix86_address addr; ++ rtx pat = PATTERN (insn); + +- if (!TARGET_TLS_DIRECT_SEG_REFS) +- return orig_x; +- if (MEM_P (x)) +- x = XEXP (x, 0); +- if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode) +- return orig_x; +- if (ix86_decompose_address (x, &addr) == 0 +- || addr.seg != DEFAULT_TLS_SEG_REG +- || addr.disp == NULL_RTX +- || GET_CODE (addr.disp) != CONST) +- return orig_x; +- unspec = XEXP (addr.disp, 0); +- if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1))) +- unspec = XEXP (unspec, 0); +- if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF) +- return orig_x; +- x = XVECEXP (unspec, 0, 0); +- gcc_assert (GET_CODE (x) == SYMBOL_REF); +- if (unspec != XEXP (addr.disp, 0)) +- x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1)); +- if (addr.index) ++ if (vzeroupper_pattern (pat, VOIDmode) ++ || vzeroall_pattern (pat, VOIDmode)) ++ return AVX_U128_CLEAN; ++ ++ /* We know that state is clean after CALL insn if there are no ++ 256bit or 512bit registers used in the function return register. */ ++ if (CALL_P (insn)) + { +- rtx idx = addr.index; +- if (addr.scale != 1) +- idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale)); +- x = gen_rtx_PLUS (Pmode, idx, x); ++ bool avx_upper_reg_found = false; ++ note_stores (insn, ix86_check_avx_upper_stores, &avx_upper_reg_found); ++ ++ return avx_upper_reg_found ? AVX_U128_DIRTY : AVX_U128_CLEAN; + } +- if (addr.base) +- x = gen_rtx_PLUS (Pmode, addr.base, x); +- if (MEM_P (orig_x)) +- x = replace_equiv_address_nv (orig_x, x); +- return x; +-} + +-/* In the name of slightly smaller debug output, and to cater to +- general assembler lossage, recognize PIC+GOTOFF and turn it back +- into a direct symbol reference. ++ /* Otherwise, return current mode. Remember that if insn ++ references AVX 256bit or 512bit registers, the mode was already ++ changed to DIRTY from MODE_NEEDED. */ ++ return mode; ++} + +- On Darwin, this is necessary to avoid a crash, because Darwin +- has a different PIC label for each routine but the DWARF debugging +- information is not associated with any particular routine, so it's +- necessary to remove references to the PIC label from RTL stored by +- the DWARF output code. ++/* Return the mode that an insn results in. */ + +- This helper is used in the normal ix86_delegitimize_address +- entrypoint (e.g. used in the target delegitimization hook) and +- in ix86_find_base_term. As compile time memory optimization, we +- avoid allocating rtxes that will not change anything on the outcome +- of the callers (find_base_value and find_base_term). */ ++static int ++ix86_mode_after (int entity, int mode, rtx_insn *insn) ++{ ++ switch (entity) ++ { ++ case X86_DIRFLAG: ++ return mode; ++ case AVX_U128: ++ return ix86_avx_u128_mode_after (mode, insn); ++ case I387_TRUNC: ++ case I387_FLOOR: ++ case I387_CEIL: ++ return mode; ++ default: ++ gcc_unreachable (); ++ } ++} + +-static inline rtx +-ix86_delegitimize_address_1 (rtx x, bool base_term_p) ++static int ++ix86_dirflag_mode_entry (void) + { +- rtx orig_x = delegitimize_mem_from_attrs (x); +- /* addend is NULL or some rtx if x is something+GOTOFF where +- something doesn't include the PIC register. */ +- rtx addend = NULL_RTX; +- /* reg_addend is NULL or a multiple of some register. */ +- rtx reg_addend = NULL_RTX; +- /* const_addend is NULL or a const_int. */ +- rtx const_addend = NULL_RTX; +- /* This is the result, or NULL. */ +- rtx result = NULL_RTX; ++ /* For TARGET_CLD or in the interrupt handler we can't assume ++ direction flag state at function entry. */ ++ if (TARGET_CLD ++ || cfun->machine->func_type != TYPE_NORMAL) ++ return X86_DIRFLAG_ANY; + +- x = orig_x; ++ return X86_DIRFLAG_RESET; ++} + +- if (MEM_P (x)) +- x = XEXP (x, 0); ++static int ++ix86_avx_u128_mode_entry (void) ++{ ++ tree arg; + +- if (TARGET_64BIT) ++ /* Entry mode is set to AVX_U128_DIRTY if there are ++ 256bit or 512bit modes used in function arguments. */ ++ for (arg = DECL_ARGUMENTS (current_function_decl); arg; ++ arg = TREE_CHAIN (arg)) + { +- if (GET_CODE (x) == CONST +- && GET_CODE (XEXP (x, 0)) == PLUS +- && GET_MODE (XEXP (x, 0)) == Pmode +- && CONST_INT_P (XEXP (XEXP (x, 0), 1)) +- && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC +- && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL) +- { +- /* find_base_{value,term} only care about MEMs with arg_pointer_rtx +- base. A CONST can't be arg_pointer_rtx based. */ +- if (base_term_p && MEM_P (orig_x)) +- return orig_x; +- rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0); +- x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2); +- if (MEM_P (orig_x)) +- x = replace_equiv_address_nv (orig_x, x); +- return x; +- } +- +- if (GET_CODE (x) == CONST +- && GET_CODE (XEXP (x, 0)) == UNSPEC +- && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL +- || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL) +- && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)) +- { +- x = XVECEXP (XEXP (x, 0), 0, 0); +- if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x)) +- { +- x = lowpart_subreg (GET_MODE (orig_x), x, GET_MODE (x)); +- if (x == NULL_RTX) +- return orig_x; +- } +- return x; +- } +- +- if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC) +- return ix86_delegitimize_tls_address (orig_x); ++ rtx incoming = DECL_INCOMING_RTL (arg); + +- /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic +- and -mcmodel=medium -fpic. */ ++ if (incoming && ix86_check_avx_upper_register (incoming)) ++ return AVX_U128_DIRTY; + } + +- if (GET_CODE (x) != PLUS +- || GET_CODE (XEXP (x, 1)) != CONST) +- return ix86_delegitimize_tls_address (orig_x); ++ return AVX_U128_CLEAN; ++} + +- if (ix86_pic_register_p (XEXP (x, 0))) +- /* %ebx + GOT/GOTOFF */ +- ; +- else if (GET_CODE (XEXP (x, 0)) == PLUS) +- { +- /* %ebx + %reg * scale + GOT/GOTOFF */ +- reg_addend = XEXP (x, 0); +- if (ix86_pic_register_p (XEXP (reg_addend, 0))) +- reg_addend = XEXP (reg_addend, 1); +- else if (ix86_pic_register_p (XEXP (reg_addend, 1))) +- reg_addend = XEXP (reg_addend, 0); +- else +- { +- reg_addend = NULL_RTX; +- addend = XEXP (x, 0); +- } +- } +- else +- addend = XEXP (x, 0); ++/* Return a mode that ENTITY is assumed to be ++ switched to at function entry. */ + +- x = XEXP (XEXP (x, 1), 0); +- if (GET_CODE (x) == PLUS +- && CONST_INT_P (XEXP (x, 1))) ++static int ++ix86_mode_entry (int entity) ++{ ++ switch (entity) + { +- const_addend = XEXP (x, 1); +- x = XEXP (x, 0); ++ case X86_DIRFLAG: ++ return ix86_dirflag_mode_entry (); ++ case AVX_U128: ++ return ix86_avx_u128_mode_entry (); ++ case I387_TRUNC: ++ case I387_FLOOR: ++ case I387_CEIL: ++ return I387_CW_ANY; ++ default: ++ gcc_unreachable (); + } ++} + +- if (GET_CODE (x) == UNSPEC +- && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend) +- || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x)) +- || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC +- && !MEM_P (orig_x) && !addend))) +- result = XVECEXP (x, 0, 0); ++static int ++ix86_avx_u128_mode_exit (void) ++{ ++ rtx reg = crtl->return_rtx; + +- if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x) +- && !MEM_P (orig_x)) +- result = XVECEXP (x, 0, 0); ++ /* Exit mode is set to AVX_U128_DIRTY if there are 256bit ++ or 512 bit modes used in the function return register. */ ++ if (reg && ix86_check_avx_upper_register (reg)) ++ return AVX_U128_DIRTY; + +- if (! result) +- return ix86_delegitimize_tls_address (orig_x); ++ /* Exit mode is set to AVX_U128_DIRTY if there are 256bit or 512bit ++ modes used in function arguments, otherwise return AVX_U128_CLEAN. ++ */ ++ return ix86_avx_u128_mode_entry (); ++} + +- /* For (PLUS something CONST_INT) both find_base_{value,term} just +- recurse on the first operand. */ +- if (const_addend && !base_term_p) +- result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend)); +- if (reg_addend) +- result = gen_rtx_PLUS (Pmode, reg_addend, result); +- if (addend) +- { +- /* If the rest of original X doesn't involve the PIC register, add +- addend and subtract pic_offset_table_rtx. This can happen e.g. +- for code like: +- leal (%ebx, %ecx, 4), %ecx +- ... +- movl foo@GOTOFF(%ecx), %edx +- in which case we return (%ecx - %ebx) + foo +- or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg +- and reload has completed. Don't do the latter for debug, +- as _GLOBAL_OFFSET_TABLE_ can't be expressed in the assembly. */ +- if (pic_offset_table_rtx +- && (!reload_completed || !ix86_use_pseudo_pic_reg ())) +- result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend), +- pic_offset_table_rtx), +- result); +- else if (base_term_p +- && pic_offset_table_rtx +- && !TARGET_MACHO +- && !TARGET_VXWORKS_RTP) +- { +- rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME); +- tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp); +- result = gen_rtx_PLUS (Pmode, tmp, result); +- } +- else +- return orig_x; +- } +- if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x)) ++/* Return a mode that ENTITY is assumed to be ++ switched to at function exit. */ ++ ++static int ++ix86_mode_exit (int entity) ++{ ++ switch (entity) + { +- result = lowpart_subreg (GET_MODE (orig_x), result, Pmode); +- if (result == NULL_RTX) +- return orig_x; ++ case X86_DIRFLAG: ++ return X86_DIRFLAG_ANY; ++ case AVX_U128: ++ return ix86_avx_u128_mode_exit (); ++ case I387_TRUNC: ++ case I387_FLOOR: ++ case I387_CEIL: ++ return I387_CW_ANY; ++ default: ++ gcc_unreachable (); + } +- return result; + } + +-/* The normal instantiation of the above template. */ +- +-static rtx +-ix86_delegitimize_address (rtx x) ++static int ++ix86_mode_priority (int, int n) + { +- return ix86_delegitimize_address_1 (x, false); ++ return n; + } + +-/* If X is a machine specific address (i.e. a symbol or label being +- referenced as a displacement from the GOT implemented using an +- UNSPEC), then return the base term. Otherwise return X. */ ++/* Output code to initialize control word copies used by trunc?f?i and ++ rounding patterns. CURRENT_MODE is set to current control word, ++ while NEW_MODE is set to new control word. */ + +-rtx +-ix86_find_base_term (rtx x) ++static void ++emit_i387_cw_initialization (int mode) + { +- rtx term; ++ rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED); ++ rtx new_mode; + +- if (TARGET_64BIT) +- { +- if (GET_CODE (x) != CONST) +- return x; +- term = XEXP (x, 0); +- if (GET_CODE (term) == PLUS +- && CONST_INT_P (XEXP (term, 1))) +- term = XEXP (term, 0); +- if (GET_CODE (term) != UNSPEC +- || (XINT (term, 1) != UNSPEC_GOTPCREL +- && XINT (term, 1) != UNSPEC_PCREL)) +- return x; ++ enum ix86_stack_slot slot; + +- return XVECEXP (term, 0, 0); +- } ++ rtx reg = gen_reg_rtx (HImode); + +- return ix86_delegitimize_address_1 (x, true); +-} +- +-/* Return true if X shouldn't be emitted into the debug info. +- Disallow UNSPECs other than @gotoff - we can't emit _GLOBAL_OFFSET_TABLE_ +- symbol easily into the .debug_info section, so we need not to +- delegitimize, but instead assemble as @gotoff. +- Disallow _GLOBAL_OFFSET_TABLE_ SYMBOL_REF - the assembler magically +- assembles that as _GLOBAL_OFFSET_TABLE_-. expression. */ +- +-static bool +-ix86_const_not_ok_for_debug_p (rtx x) +-{ +- if (GET_CODE (x) == UNSPEC && XINT (x, 1) != UNSPEC_GOTOFF) +- return true; +- +- if (SYMBOL_REF_P (x) && strcmp (XSTR (x, 0), GOT_SYMBOL_NAME) == 0) +- return true; +- +- return false; +-} +- +-static void +-put_condition_code (enum rtx_code code, machine_mode mode, bool reverse, +- bool fp, FILE *file) +-{ +- const char *suffix; +- +- if (mode == CCFPmode) +- { +- code = ix86_fp_compare_code_to_integer (code); +- mode = CCmode; +- } +- if (reverse) +- code = reverse_condition (code); ++ emit_insn (gen_x86_fnstcw_1 (stored_mode)); ++ emit_move_insn (reg, copy_rtx (stored_mode)); + +- switch (code) ++ switch (mode) + { +- case EQ: +- gcc_assert (mode != CCGZmode); +- switch (mode) +- { +- case E_CCAmode: +- suffix = "a"; +- break; +- case E_CCCmode: +- suffix = "c"; +- break; +- case E_CCOmode: +- suffix = "o"; +- break; +- case E_CCPmode: +- suffix = "p"; +- break; +- case E_CCSmode: +- suffix = "s"; +- break; +- default: +- suffix = "e"; +- break; +- } +- break; +- case NE: +- gcc_assert (mode != CCGZmode); +- switch (mode) +- { +- case E_CCAmode: +- suffix = "na"; +- break; +- case E_CCCmode: +- suffix = "nc"; +- break; +- case E_CCOmode: +- suffix = "no"; +- break; +- case E_CCPmode: +- suffix = "np"; +- break; +- case E_CCSmode: +- suffix = "ns"; +- break; +- default: +- suffix = "ne"; +- break; +- } ++ case I387_CW_TRUNC: ++ /* round toward zero (truncate) */ ++ emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00))); ++ slot = SLOT_CW_TRUNC; + break; +- case GT: +- gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode); +- suffix = "g"; ++ ++ case I387_CW_FLOOR: ++ /* round down toward -oo */ ++ emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00))); ++ emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400))); ++ slot = SLOT_CW_FLOOR; + break; +- case GTU: +- /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers. +- Those same assemblers have the same but opposite lossage on cmov. */ +- if (mode == CCmode) +- suffix = fp ? "nbe" : "a"; +- else +- gcc_unreachable (); ++ ++ case I387_CW_CEIL: ++ /* round up toward +oo */ ++ emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00))); ++ emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800))); ++ slot = SLOT_CW_CEIL; + break; +- case LT: +- switch (mode) +- { +- case E_CCNOmode: +- case E_CCGOCmode: +- suffix = "s"; +- break; + +- case E_CCmode: +- case E_CCGCmode: +- case E_CCGZmode: +- suffix = "l"; +- break; ++ default: ++ gcc_unreachable (); ++ } + +- default: +- gcc_unreachable (); +- } +- break; +- case LTU: +- if (mode == CCmode || mode == CCGZmode) +- suffix = "b"; +- else if (mode == CCCmode) +- suffix = fp ? "b" : "c"; +- else +- gcc_unreachable (); +- break; +- case GE: +- switch (mode) +- { +- case E_CCNOmode: +- case E_CCGOCmode: +- suffix = "ns"; +- break; ++ gcc_assert (slot < MAX_386_STACK_LOCALS); + +- case E_CCmode: +- case E_CCGCmode: +- case E_CCGZmode: +- suffix = "ge"; +- break; ++ new_mode = assign_386_stack_local (HImode, slot); ++ emit_move_insn (new_mode, reg); ++} + +- default: +- gcc_unreachable (); +- } +- break; +- case GEU: +- if (mode == CCmode || mode == CCGZmode) +- suffix = "nb"; +- else if (mode == CCCmode) +- suffix = fp ? "nb" : "nc"; +- else +- gcc_unreachable (); +- break; +- case LE: +- gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode); +- suffix = "le"; +- break; +- case LEU: +- if (mode == CCmode) +- suffix = "be"; +- else +- gcc_unreachable (); ++/* Generate one or more insns to set ENTITY to MODE. */ ++ ++static void ++ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED, ++ HARD_REG_SET regs_live ATTRIBUTE_UNUSED) ++{ ++ switch (entity) ++ { ++ case X86_DIRFLAG: ++ if (mode == X86_DIRFLAG_RESET) ++ emit_insn (gen_cld ()); + break; +- case UNORDERED: +- suffix = fp ? "u" : "p"; ++ case AVX_U128: ++ if (mode == AVX_U128_CLEAN) ++ emit_insn (gen_avx_vzeroupper ()); + break; +- case ORDERED: +- suffix = fp ? "nu" : "np"; ++ case I387_TRUNC: ++ case I387_FLOOR: ++ case I387_CEIL: ++ if (mode != I387_CW_ANY ++ && mode != I387_CW_UNINITIALIZED) ++ emit_i387_cw_initialization (mode); + break; + default: + gcc_unreachable (); + } +- fputs (suffix, file); + } + +-/* Print the name of register X to FILE based on its machine mode and number. +- If CODE is 'w', pretend the mode is HImode. +- If CODE is 'b', pretend the mode is QImode. +- If CODE is 'k', pretend the mode is SImode. +- If CODE is 'q', pretend the mode is DImode. +- If CODE is 'x', pretend the mode is V4SFmode. +- If CODE is 't', pretend the mode is V8SFmode. +- If CODE is 'g', pretend the mode is V16SFmode. +- If CODE is 'h', pretend the reg is the 'high' byte register. +- If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. +- If CODE is 'd', duplicate the operand for AVX instruction. +- If CODE is 'V', print naked full integer register name without %. +- */ ++/* Output code for INSN to convert a float to a signed int. OPERANDS ++ are the insn operands. The output may be [HSD]Imode and the input ++ operand may be [SDX]Fmode. */ + +-void +-print_reg (rtx x, int code, FILE *file) ++const char * ++output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp) + { +- const char *reg; +- int msize; +- unsigned int regno; +- bool duplicated; ++ bool stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG); ++ bool dimode_p = GET_MODE (operands[0]) == DImode; ++ int round_mode = get_attr_i387_cw (insn); + +- if (ASSEMBLER_DIALECT == ASM_ATT && code != 'V') +- putc ('%', file); ++ static char buf[40]; ++ const char *p; + +- if (x == pc_rtx) +- { +- gcc_assert (TARGET_64BIT); +- fputs ("rip", file); +- return; +- } ++ /* Jump through a hoop or two for DImode, since the hardware has no ++ non-popping instruction. We used to do this a different way, but ++ that was somewhat fragile and broke with post-reload splitters. */ ++ if ((dimode_p || fisttp) && !stack_top_dies) ++ output_asm_insn ("fld\t%y1", operands); + +- if (code == 'y' && STACK_TOP_P (x)) +- { +- fputs ("st(0)", file); +- return; +- } ++ gcc_assert (STACK_TOP_P (operands[1])); ++ gcc_assert (MEM_P (operands[0])); ++ gcc_assert (GET_MODE (operands[1]) != TFmode); + +- if (code == 'w') +- msize = 2; +- else if (code == 'b') +- msize = 1; +- else if (code == 'k') +- msize = 4; +- else if (code == 'q') +- msize = 8; +- else if (code == 'h') +- msize = 0; +- else if (code == 'x') +- msize = 16; +- else if (code == 't') +- msize = 32; +- else if (code == 'g') +- msize = 64; +- else +- msize = GET_MODE_SIZE (GET_MODE (x)); +- +- regno = REGNO (x); +- +- if (regno == ARG_POINTER_REGNUM +- || regno == FRAME_POINTER_REGNUM +- || regno == FPSR_REG) +- { +- output_operand_lossage +- ("invalid use of register '%s'", reg_names[regno]); +- return; +- } +- else if (regno == FLAGS_REG) +- { +- output_operand_lossage ("invalid use of asm flag output"); +- return; +- } ++ if (fisttp) ++ return "fisttp%Z0\t%0"; + +- if (code == 'V') +- { +- if (GENERAL_REGNO_P (regno)) +- msize = GET_MODE_SIZE (word_mode); +- else +- error ("% modifier on non-integer register"); +- } ++ strcpy (buf, "fist"); + +- duplicated = code == 'd' && TARGET_AVX; ++ if (round_mode != I387_CW_ANY) ++ output_asm_insn ("fldcw\t%3", operands); + +- switch (msize) +- { +- case 16: +- case 12: +- case 8: +- if (GENERAL_REGNO_P (regno) && msize > GET_MODE_SIZE (word_mode)) +- warning (0, "unsupported size for integer register"); +- /* FALLTHRU */ +- case 4: +- if (LEGACY_INT_REGNO_P (regno)) +- putc (msize > 4 && TARGET_64BIT ? 'r' : 'e', file); +- /* FALLTHRU */ +- case 2: +- normal: +- reg = hi_reg_name[regno]; +- break; +- case 1: +- if (regno >= ARRAY_SIZE (qi_reg_name)) +- goto normal; +- if (!ANY_QI_REGNO_P (regno)) +- error ("unsupported size for integer register"); +- reg = qi_reg_name[regno]; +- break; +- case 0: +- if (regno >= ARRAY_SIZE (qi_high_reg_name)) +- goto normal; +- reg = qi_high_reg_name[regno]; +- break; +- case 32: +- case 64: +- if (SSE_REGNO_P (regno)) +- { +- gcc_assert (!duplicated); +- putc (msize == 32 ? 'y' : 'z', file); +- reg = hi_reg_name[regno] + 1; +- break; +- } +- goto normal; +- default: +- gcc_unreachable (); +- } ++ p = "p%Z0\t%0"; ++ strcat (buf, p + !(stack_top_dies || dimode_p)); + +- fputs (reg, file); ++ output_asm_insn (buf, operands); + +- /* Irritatingly, AMD extended registers use +- different naming convention: "r%d[bwd]" */ +- if (REX_INT_REGNO_P (regno)) +- { +- gcc_assert (TARGET_64BIT); +- switch (msize) +- { +- case 0: +- error ("extended registers have no high halves"); +- break; +- case 1: +- putc ('b', file); +- break; +- case 2: +- putc ('w', file); +- break; +- case 4: +- putc ('d', file); +- break; +- case 8: +- /* no suffix */ +- break; +- default: +- error ("unsupported operand size for extended register"); +- break; +- } +- return; +- } ++ if (round_mode != I387_CW_ANY) ++ output_asm_insn ("fldcw\t%2", operands); + +- if (duplicated) +- { +- if (ASSEMBLER_DIALECT == ASM_ATT) +- fprintf (file, ", %%%s", reg); +- else +- fprintf (file, ", %s", reg); +- } ++ return ""; + } + +-/* Meaning of CODE: +- L,W,B,Q,S,T -- print the opcode suffix for specified size of operand. +- C -- print opcode suffix for set/cmov insn. +- c -- like C, but print reversed condition +- F,f -- likewise, but for floating-point. +- O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.", +- otherwise nothing +- R -- print embedded rounding and sae. +- r -- print only sae. +- z -- print the opcode suffix for the size of the current operand. +- Z -- likewise, with special suffixes for x87 instructions. +- * -- print a star (in certain assembler syntax) +- A -- print an absolute memory reference. +- E -- print address with DImode register names if TARGET_64BIT. +- w -- print the operand as if it's a "word" (HImode) even if it isn't. +- s -- print a shift double count, followed by the assemblers argument +- delimiter. +- b -- print the QImode name of the register for the indicated operand. +- %b0 would print %al if operands[0] is reg 0. +- w -- likewise, print the HImode name of the register. +- k -- likewise, print the SImode name of the register. +- q -- likewise, print the DImode name of the register. +- x -- likewise, print the V4SFmode name of the register. +- t -- likewise, print the V8SFmode name of the register. +- g -- likewise, print the V16SFmode name of the register. +- h -- print the QImode name for a "high" register, either ah, bh, ch or dh. +- y -- print "st(0)" instead of "st" as a register. +- d -- print duplicated register operand for AVX instruction. +- D -- print condition for SSE cmp instruction. +- P -- if PIC, print an @PLT suffix. +- p -- print raw symbol name. +- X -- don't print any sort of PIC '@' suffix for a symbol. +- & -- print some in-use local-dynamic symbol name. +- H -- print a memory address offset by 8; used for sse high-parts +- Y -- print condition for XOP pcom* instruction. +- V -- print naked full integer register name without %. +- + -- print a branch hint as 'cs' or 'ds' prefix +- ; -- print a semicolon (after prefixes due to bug in older gas). +- ~ -- print "i" if TARGET_AVX2, "f" otherwise. +- ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode +- M -- print addr32 prefix for TARGET_X32 with VSIB address. +- ! -- print NOTRACK prefix for jxx/call/ret instructions if required. +- */ ++/* Output code for x87 ffreep insn. The OPNO argument, which may only ++ have the values zero or one, indicates the ffreep insn's operand ++ from the OPERANDS array. */ + +-void +-ix86_print_operand (FILE *file, rtx x, int code) ++static const char * ++output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno) + { +- if (code) ++ if (TARGET_USE_FFREEP) ++#ifdef HAVE_AS_IX86_FFREEP ++ return opno ? "ffreep\t%y1" : "ffreep\t%y0"; ++#else + { +- switch (code) +- { +- case 'A': +- switch (ASSEMBLER_DIALECT) +- { +- case ASM_ATT: +- putc ('*', file); +- break; +- +- case ASM_INTEL: +- /* Intel syntax. For absolute addresses, registers should not +- be surrounded by braces. */ +- if (!REG_P (x)) +- { +- putc ('[', file); +- ix86_print_operand (file, x, 0); +- putc (']', file); +- return; +- } +- break; ++ static char retval[32]; ++ int regno = REGNO (operands[opno]); + +- default: +- gcc_unreachable (); +- } ++ gcc_assert (STACK_REGNO_P (regno)); + +- ix86_print_operand (file, x, 0); +- return; ++ regno -= FIRST_STACK_REG; + +- case 'E': +- /* Wrap address in an UNSPEC to declare special handling. */ +- if (TARGET_64BIT) +- x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR); ++ snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno); ++ return retval; ++ } ++#endif + +- output_address (VOIDmode, x); +- return; ++ return opno ? "fstp\t%y1" : "fstp\t%y0"; ++} + +- case 'L': +- if (ASSEMBLER_DIALECT == ASM_ATT) +- putc ('l', file); +- return; + +- case 'W': +- if (ASSEMBLER_DIALECT == ASM_ATT) +- putc ('w', file); +- return; ++/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi ++ should be used. UNORDERED_P is true when fucom should be used. */ + +- case 'B': +- if (ASSEMBLER_DIALECT == ASM_ATT) +- putc ('b', file); +- return; ++const char * ++output_fp_compare (rtx_insn *insn, rtx *operands, ++ bool eflags_p, bool unordered_p) ++{ ++ rtx *xops = eflags_p ? &operands[0] : &operands[1]; ++ bool stack_top_dies; + +- case 'Q': +- if (ASSEMBLER_DIALECT == ASM_ATT) +- putc ('l', file); +- return; ++ static char buf[40]; ++ const char *p; + +- case 'S': +- if (ASSEMBLER_DIALECT == ASM_ATT) +- putc ('s', file); +- return; ++ gcc_assert (STACK_TOP_P (xops[0])); + +- case 'T': +- if (ASSEMBLER_DIALECT == ASM_ATT) +- putc ('t', file); +- return; ++ stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG); + +- case 'O': +-#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX +- if (ASSEMBLER_DIALECT != ASM_ATT) +- return; +- +- switch (GET_MODE_SIZE (GET_MODE (x))) +- { +- case 2: +- putc ('w', file); +- break; +- +- case 4: +- putc ('l', file); +- break; ++ if (eflags_p) ++ { ++ p = unordered_p ? "fucomi" : "fcomi"; ++ strcpy (buf, p); + +- case 8: +- putc ('q', file); +- break; ++ p = "p\t{%y1, %0|%0, %y1}"; ++ strcat (buf, p + !stack_top_dies); + +- default: +- output_operand_lossage ("invalid operand size for operand " +- "code 'O'"); +- return; +- } ++ return buf; ++ } + +- putc ('.', file); +-#endif +- return; ++ if (STACK_REG_P (xops[1]) ++ && stack_top_dies ++ && find_regno_note (insn, REG_DEAD, FIRST_STACK_REG + 1)) ++ { ++ gcc_assert (REGNO (xops[1]) == FIRST_STACK_REG + 1); + +- case 'z': +- if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) +- { +- /* Opcodes don't get size suffixes if using Intel opcodes. */ +- if (ASSEMBLER_DIALECT == ASM_INTEL) +- return; ++ /* If both the top of the 387 stack die, and the other operand ++ is also a stack register that dies, then this must be a ++ `fcompp' float compare. */ ++ p = unordered_p ? "fucompp" : "fcompp"; ++ strcpy (buf, p); ++ } ++ else if (const0_operand (xops[1], VOIDmode)) ++ { ++ gcc_assert (!unordered_p); ++ strcpy (buf, "ftst"); ++ } ++ else ++ { ++ if (GET_MODE_CLASS (GET_MODE (xops[1])) == MODE_INT) ++ { ++ gcc_assert (!unordered_p); ++ p = "ficom"; ++ } ++ else ++ p = unordered_p ? "fucom" : "fcom"; + +- switch (GET_MODE_SIZE (GET_MODE (x))) +- { +- case 1: +- putc ('b', file); +- return; ++ strcpy (buf, p); + +- case 2: +- putc ('w', file); +- return; ++ p = "p%Z2\t%y2"; ++ strcat (buf, p + !stack_top_dies); ++ } + +- case 4: +- putc ('l', file); +- return; ++ output_asm_insn (buf, operands); ++ return "fnstsw\t%0"; ++} + +- case 8: +- putc ('q', file); +- return; ++void ++ix86_output_addr_vec_elt (FILE *file, int value) ++{ ++ const char *directive = ASM_LONG; + +- default: +- output_operand_lossage ("invalid operand size for operand " +- "code 'z'"); +- return; +- } +- } ++#ifdef ASM_QUAD ++ if (TARGET_LP64) ++ directive = ASM_QUAD; ++#else ++ gcc_assert (!TARGET_64BIT); ++#endif + +- if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) +- warning (0, "non-integer operand used with operand code %"); +- /* FALLTHRU */ ++ fprintf (file, "%s%s%d\n", directive, LPREFIX, value); ++} + +- case 'Z': +- /* 387 opcodes don't get size suffixes if using Intel opcodes. */ +- if (ASSEMBLER_DIALECT == ASM_INTEL) +- return; ++void ++ix86_output_addr_diff_elt (FILE *file, int value, int rel) ++{ ++ const char *directive = ASM_LONG; + +- if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) +- { +- switch (GET_MODE_SIZE (GET_MODE (x))) +- { +- case 2: +-#ifdef HAVE_AS_IX86_FILDS +- putc ('s', file); ++#ifdef ASM_QUAD ++ if (TARGET_64BIT && CASE_VECTOR_MODE == DImode) ++ directive = ASM_QUAD; ++#else ++ gcc_assert (!TARGET_64BIT); + #endif +- return; ++ /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */ ++ if (TARGET_64BIT || TARGET_VXWORKS_RTP) ++ fprintf (file, "%s%s%d-%s%d\n", ++ directive, LPREFIX, value, LPREFIX, rel); ++#if TARGET_MACHO ++ else if (TARGET_MACHO) ++ { ++ fprintf (file, ASM_LONG "%s%d-", LPREFIX, value); ++ machopic_output_function_base_name (file); ++ putc ('\n', file); ++ } ++#endif ++ else if (HAVE_AS_GOTOFF_IN_DATA) ++ fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value); ++ else ++ asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n", ++ GOT_SYMBOL_NAME, LPREFIX, value); ++} ++ ++#define LEA_MAX_STALL (3) ++#define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1) + +- case 4: +- putc ('l', file); +- return; ++/* Increase given DISTANCE in half-cycles according to ++ dependencies between PREV and NEXT instructions. ++ Add 1 half-cycle if there is no dependency and ++ go to next cycle if there is some dependecy. */ + +- case 8: +-#ifdef HAVE_AS_IX86_FILDQ +- putc ('q', file); +-#else +- fputs ("ll", file); +-#endif +- return; ++static unsigned int ++increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance) ++{ ++ df_ref def, use; + +- default: +- break; +- } +- } +- else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) +- { +- /* 387 opcodes don't get size suffixes +- if the operands are registers. */ +- if (STACK_REG_P (x)) +- return; ++ if (!prev || !next) ++ return distance + (distance & 1) + 2; + +- switch (GET_MODE_SIZE (GET_MODE (x))) +- { +- case 4: +- putc ('s', file); +- return; ++ if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev)) ++ return distance + 1; + +- case 8: +- putc ('l', file); +- return; ++ FOR_EACH_INSN_USE (use, next) ++ FOR_EACH_INSN_DEF (def, prev) ++ if (!DF_REF_IS_ARTIFICIAL (def) ++ && DF_REF_REGNO (use) == DF_REF_REGNO (def)) ++ return distance + (distance & 1) + 2; + +- case 12: +- case 16: +- putc ('t', file); +- return; ++ return distance + 1; ++} + +- default: +- break; +- } +- } +- else +- { +- output_operand_lossage ("invalid operand type used with " +- "operand code 'Z'"); +- return; +- } ++/* Function checks if instruction INSN defines register number ++ REGNO1 or REGNO2. */ + +- output_operand_lossage ("invalid operand size for operand code 'Z'"); +- return; ++bool ++insn_defines_reg (unsigned int regno1, unsigned int regno2, ++ rtx_insn *insn) ++{ ++ df_ref def; + +- case 'd': +- case 'b': +- case 'w': +- case 'k': +- case 'q': +- case 'h': +- case 't': +- case 'g': +- case 'y': +- case 'x': +- case 'X': +- case 'P': +- case 'p': +- case 'V': +- break; ++ FOR_EACH_INSN_DEF (def, insn) ++ if (DF_REF_REG_DEF_P (def) ++ && !DF_REF_IS_ARTIFICIAL (def) ++ && (regno1 == DF_REF_REGNO (def) ++ || regno2 == DF_REF_REGNO (def))) ++ return true; + +- case 's': +- if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT) +- { +- ix86_print_operand (file, x, 0); +- fputs (", ", file); +- } +- return; ++ return false; ++} + +- case 'Y': +- switch (GET_CODE (x)) +- { +- case NE: +- fputs ("neq", file); +- break; +- case EQ: +- fputs ("eq", file); +- break; +- case GE: +- case GEU: +- fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file); +- break; +- case GT: +- case GTU: +- fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file); +- break; +- case LE: +- case LEU: +- fputs ("le", file); +- break; +- case LT: +- case LTU: +- fputs ("lt", file); +- break; +- case UNORDERED: +- fputs ("unord", file); +- break; +- case ORDERED: +- fputs ("ord", file); +- break; +- case UNEQ: +- fputs ("ueq", file); +- break; +- case UNGE: +- fputs ("nlt", file); +- break; +- case UNGT: +- fputs ("nle", file); +- break; +- case UNLE: +- fputs ("ule", file); +- break; +- case UNLT: +- fputs ("ult", file); +- break; +- case LTGT: +- fputs ("une", file); +- break; +- default: +- output_operand_lossage ("operand is not a condition code, " +- "invalid operand code 'Y'"); +- return; +- } +- return; ++/* Function checks if instruction INSN uses register number ++ REGNO as a part of address expression. */ + +- case 'D': +- /* Little bit of braindamage here. The SSE compare instructions +- does use completely different names for the comparisons that the +- fp conditional moves. */ +- switch (GET_CODE (x)) +- { +- case UNEQ: +- if (TARGET_AVX) +- { +- fputs ("eq_us", file); +- break; +- } +- /* FALLTHRU */ +- case EQ: +- fputs ("eq", file); +- break; +- case UNLT: +- if (TARGET_AVX) +- { +- fputs ("nge", file); +- break; +- } +- /* FALLTHRU */ +- case LT: +- fputs ("lt", file); +- break; +- case UNLE: +- if (TARGET_AVX) +- { +- fputs ("ngt", file); +- break; +- } +- /* FALLTHRU */ +- case LE: +- fputs ("le", file); +- break; +- case UNORDERED: +- fputs ("unord", file); +- break; +- case LTGT: +- if (TARGET_AVX) +- { +- fputs ("neq_oq", file); +- break; +- } +- /* FALLTHRU */ +- case NE: +- fputs ("neq", file); +- break; +- case GE: +- if (TARGET_AVX) +- { +- fputs ("ge", file); +- break; +- } +- /* FALLTHRU */ +- case UNGE: +- fputs ("nlt", file); +- break; +- case GT: +- if (TARGET_AVX) +- { +- fputs ("gt", file); +- break; +- } +- /* FALLTHRU */ +- case UNGT: +- fputs ("nle", file); +- break; +- case ORDERED: +- fputs ("ord", file); +- break; +- default: +- output_operand_lossage ("operand is not a condition code, " +- "invalid operand code 'D'"); +- return; +- } +- return; ++static bool ++insn_uses_reg_mem (unsigned int regno, rtx insn) ++{ ++ df_ref use; + +- case 'F': +- case 'f': +-#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX +- if (ASSEMBLER_DIALECT == ASM_ATT) +- putc ('.', file); +- gcc_fallthrough (); +-#endif ++ FOR_EACH_INSN_USE (use, insn) ++ if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use)) ++ return true; + +- case 'C': +- case 'c': +- if (!COMPARISON_P (x)) +- { +- output_operand_lossage ("operand is not a condition code, " +- "invalid operand code '%c'", code); +- return; +- } +- put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), +- code == 'c' || code == 'f', +- code == 'F' || code == 'f', +- file); +- return; ++ return false; ++} + +- case 'H': +- if (!offsettable_memref_p (x)) +- { +- output_operand_lossage ("operand is not an offsettable memory " +- "reference, invalid operand code 'H'"); +- return; +- } +- /* It doesn't actually matter what mode we use here, as we're +- only going to use this for printing. */ +- x = adjust_address_nv (x, DImode, 8); +- /* Output 'qword ptr' for intel assembler dialect. */ +- if (ASSEMBLER_DIALECT == ASM_INTEL) +- code = 'q'; +- break; ++/* Search backward for non-agu definition of register number REGNO1 ++ or register number REGNO2 in basic block starting from instruction ++ START up to head of basic block or instruction INSN. + +- case 'K': +- if (!CONST_INT_P (x)) +- { +- output_operand_lossage ("operand is not an integer, invalid " +- "operand code 'K'"); +- return; +- } ++ Function puts true value into *FOUND var if definition was found ++ and false otherwise. + +- if (INTVAL (x) & IX86_HLE_ACQUIRE) +-#ifdef HAVE_AS_IX86_HLE +- fputs ("xacquire ", file); +-#else +- fputs ("\n" ASM_BYTE "0xf2\n\t", file); +-#endif +- else if (INTVAL (x) & IX86_HLE_RELEASE) +-#ifdef HAVE_AS_IX86_HLE +- fputs ("xrelease ", file); +-#else +- fputs ("\n" ASM_BYTE "0xf3\n\t", file); +-#endif +- /* We do not want to print value of the operand. */ +- return; ++ Distance in half-cycles between START and found instruction or head ++ of BB is added to DISTANCE and returned. */ + +- case 'N': +- if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x))) +- fputs ("{z}", file); +- return; ++static int ++distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2, ++ rtx_insn *insn, int distance, ++ rtx_insn *start, bool *found) ++{ ++ basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL; ++ rtx_insn *prev = start; ++ rtx_insn *next = NULL; + +- case 'r': +- if (!CONST_INT_P (x) || INTVAL (x) != ROUND_SAE) ++ *found = false; ++ ++ while (prev ++ && prev != insn ++ && distance < LEA_SEARCH_THRESHOLD) ++ { ++ if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev)) ++ { ++ distance = increase_distance (prev, next, distance); ++ if (insn_defines_reg (regno1, regno2, prev)) + { +- output_operand_lossage ("operand is not a specific integer, " +- "invalid operand code 'r'"); +- return; ++ if (recog_memoized (prev) < 0 ++ || get_attr_type (prev) != TYPE_LEA) ++ { ++ *found = true; ++ return distance; ++ } + } + +- if (ASSEMBLER_DIALECT == ASM_INTEL) +- fputs (", ", file); ++ next = prev; ++ } ++ if (prev == BB_HEAD (bb)) ++ break; + +- fputs ("{sae}", file); ++ prev = PREV_INSN (prev); ++ } + +- if (ASSEMBLER_DIALECT == ASM_ATT) +- fputs (", ", file); ++ return distance; ++} + +- return; ++/* Search backward for non-agu definition of register number REGNO1 ++ or register number REGNO2 in INSN's basic block until ++ 1. Pass LEA_SEARCH_THRESHOLD instructions, or ++ 2. Reach neighbor BBs boundary, or ++ 3. Reach agu definition. ++ Returns the distance between the non-agu definition point and INSN. ++ If no definition point, returns -1. */ + +- case 'R': +- if (!CONST_INT_P (x)) +- { +- output_operand_lossage ("operand is not an integer, invalid " +- "operand code 'R'"); +- return; +- } ++static int ++distance_non_agu_define (unsigned int regno1, unsigned int regno2, ++ rtx_insn *insn) ++{ ++ basic_block bb = BLOCK_FOR_INSN (insn); ++ int distance = 0; ++ bool found = false; + +- if (ASSEMBLER_DIALECT == ASM_INTEL) +- fputs (", ", file); ++ if (insn != BB_HEAD (bb)) ++ distance = distance_non_agu_define_in_bb (regno1, regno2, insn, ++ distance, PREV_INSN (insn), ++ &found); + +- switch (INTVAL (x)) ++ if (!found && distance < LEA_SEARCH_THRESHOLD) ++ { ++ edge e; ++ edge_iterator ei; ++ bool simple_loop = false; ++ ++ FOR_EACH_EDGE (e, ei, bb->preds) ++ if (e->src == bb) ++ { ++ simple_loop = true; ++ break; ++ } ++ ++ if (simple_loop) ++ distance = distance_non_agu_define_in_bb (regno1, regno2, ++ insn, distance, ++ BB_END (bb), &found); ++ else ++ { ++ int shortest_dist = -1; ++ bool found_in_bb = false; ++ ++ FOR_EACH_EDGE (e, ei, bb->preds) + { +- case ROUND_NEAREST_INT | ROUND_SAE: +- fputs ("{rn-sae}", file); +- break; +- case ROUND_NEG_INF | ROUND_SAE: +- fputs ("{rd-sae}", file); +- break; +- case ROUND_POS_INF | ROUND_SAE: +- fputs ("{ru-sae}", file); +- break; +- case ROUND_ZERO | ROUND_SAE: +- fputs ("{rz-sae}", file); +- break; +- default: +- output_operand_lossage ("operand is not a specific integer, " +- "invalid operand code 'R'"); +- } ++ int bb_dist ++ = distance_non_agu_define_in_bb (regno1, regno2, ++ insn, distance, ++ BB_END (e->src), ++ &found_in_bb); ++ if (found_in_bb) ++ { ++ if (shortest_dist < 0) ++ shortest_dist = bb_dist; ++ else if (bb_dist > 0) ++ shortest_dist = MIN (bb_dist, shortest_dist); + +- if (ASSEMBLER_DIALECT == ASM_ATT) +- fputs (", ", file); ++ found = true; ++ } ++ } + +- return; ++ distance = shortest_dist; ++ } ++ } + +- case '*': +- if (ASSEMBLER_DIALECT == ASM_ATT) +- putc ('*', file); +- return; ++ /* get_attr_type may modify recog data. We want to make sure ++ that recog data is valid for instruction INSN, on which ++ distance_non_agu_define is called. INSN is unchanged here. */ ++ extract_insn_cached (insn); + +- case '&': +- { +- const char *name = get_some_local_dynamic_name (); +- if (name == NULL) +- output_operand_lossage ("'%%&' used without any " +- "local dynamic TLS references"); +- else +- assemble_name (file, name); +- return; +- } ++ if (!found) ++ return -1; + +- case '+': +- { +- rtx x; ++ return distance >> 1; ++} + +- if (!optimize +- || optimize_function_for_size_p (cfun) +- || !TARGET_BRANCH_PREDICTION_HINTS) +- return; ++/* Return the distance in half-cycles between INSN and the next ++ insn that uses register number REGNO in memory address added ++ to DISTANCE. Return -1 if REGNO0 is set. + +- x = find_reg_note (current_output_insn, REG_BR_PROB, 0); +- if (x) +- { +- int pred_val = profile_probability::from_reg_br_prob_note +- (XINT (x, 0)).to_reg_br_prob_base (); ++ Put true value into *FOUND if register usage was found and ++ false otherwise. ++ Put true value into *REDEFINED if register redefinition was ++ found and false otherwise. */ + +- if (pred_val < REG_BR_PROB_BASE * 45 / 100 +- || pred_val > REG_BR_PROB_BASE * 55 / 100) +- { +- bool taken = pred_val > REG_BR_PROB_BASE / 2; +- bool cputaken +- = final_forward_branch_p (current_output_insn) == 0; ++static int ++distance_agu_use_in_bb (unsigned int regno, ++ rtx_insn *insn, int distance, rtx_insn *start, ++ bool *found, bool *redefined) ++{ ++ basic_block bb = NULL; ++ rtx_insn *next = start; ++ rtx_insn *prev = NULL; + +- /* Emit hints only in the case default branch prediction +- heuristics would fail. */ +- if (taken != cputaken) +- { +- /* We use 3e (DS) prefix for taken branches and +- 2e (CS) prefix for not taken branches. */ +- if (taken) +- fputs ("ds ; ", file); +- else +- fputs ("cs ; ", file); +- } +- } +- } +- return; +- } ++ *found = false; ++ *redefined = false; + +- case ';': +-#ifndef HAVE_AS_IX86_REP_LOCK_PREFIX +- putc (';', file); +-#endif +- return; ++ if (start != NULL_RTX) ++ { ++ bb = BLOCK_FOR_INSN (start); ++ if (start != BB_HEAD (bb)) ++ /* If insn and start belong to the same bb, set prev to insn, ++ so the call to increase_distance will increase the distance ++ between insns by 1. */ ++ prev = insn; ++ } + +- case '~': +- putc (TARGET_AVX2 ? 'i' : 'f', file); +- return; ++ while (next ++ && next != insn ++ && distance < LEA_SEARCH_THRESHOLD) ++ { ++ if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next)) ++ { ++ distance = increase_distance(prev, next, distance); ++ if (insn_uses_reg_mem (regno, next)) ++ { ++ /* Return DISTANCE if OP0 is used in memory ++ address in NEXT. */ ++ *found = true; ++ return distance; ++ } + +- case 'M': +- if (TARGET_X32) ++ if (insn_defines_reg (regno, INVALID_REGNUM, next)) + { +- /* NB: 32-bit indices in VSIB address are sign-extended +- to 64 bits. In x32, if 32-bit address 0xf7fa3010 is +- sign-extended to 0xfffffffff7fa3010 which is invalid +- address. Add addr32 prefix if there is no base +- register nor symbol. */ +- bool ok; +- struct ix86_address parts; +- ok = ix86_decompose_address (x, &parts); +- gcc_assert (ok && parts.index == NULL_RTX); +- if (parts.base == NULL_RTX +- && (parts.disp == NULL_RTX +- || !symbolic_operand (parts.disp, +- GET_MODE (parts.disp)))) +- fputs ("addr32 ", file); ++ /* Return -1 if OP0 is set in NEXT. */ ++ *redefined = true; ++ return -1; + } +- return; + +- case '^': +- if (TARGET_64BIT && Pmode != word_mode) +- fputs ("addr32 ", file); +- return; ++ prev = next; ++ } + +- case '!': +- if (ix86_notrack_prefixed_insn_p (current_output_insn)) +- fputs ("notrack ", file); +- return; ++ if (next == BB_END (bb)) ++ break; + +- default: +- output_operand_lossage ("invalid operand code '%c'", code); +- } ++ next = NEXT_INSN (next); + } + +- if (REG_P (x)) +- print_reg (x, code, file); ++ return distance; ++} + +- else if (MEM_P (x)) ++/* Return the distance between INSN and the next insn that uses ++ register number REGNO0 in memory address. Return -1 if no such ++ a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */ ++ ++static int ++distance_agu_use (unsigned int regno0, rtx_insn *insn) ++{ ++ basic_block bb = BLOCK_FOR_INSN (insn); ++ int distance = 0; ++ bool found = false; ++ bool redefined = false; ++ ++ if (insn != BB_END (bb)) ++ distance = distance_agu_use_in_bb (regno0, insn, distance, ++ NEXT_INSN (insn), ++ &found, &redefined); ++ ++ if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD) + { +- rtx addr = XEXP (x, 0); ++ edge e; ++ edge_iterator ei; ++ bool simple_loop = false; + +- /* No `byte ptr' prefix for call instructions ... */ +- if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P') +- { +- machine_mode mode = GET_MODE (x); +- const char *size; ++ FOR_EACH_EDGE (e, ei, bb->succs) ++ if (e->dest == bb) ++ { ++ simple_loop = true; ++ break; ++ } + +- /* Check for explicit size override codes. */ +- if (code == 'b') +- size = "BYTE"; +- else if (code == 'w') +- size = "WORD"; +- else if (code == 'k') +- size = "DWORD"; +- else if (code == 'q') +- size = "QWORD"; +- else if (code == 'x') +- size = "XMMWORD"; +- else if (code == 't') +- size = "YMMWORD"; +- else if (code == 'g') +- size = "ZMMWORD"; +- else if (mode == BLKmode) +- /* ... or BLKmode operands, when not overridden. */ +- size = NULL; +- else +- switch (GET_MODE_SIZE (mode)) +- { +- case 1: size = "BYTE"; break; +- case 2: size = "WORD"; break; +- case 4: size = "DWORD"; break; +- case 8: size = "QWORD"; break; +- case 12: size = "TBYTE"; break; +- case 16: +- if (mode == XFmode) +- size = "TBYTE"; +- else +- size = "XMMWORD"; +- break; +- case 32: size = "YMMWORD"; break; +- case 64: size = "ZMMWORD"; break; +- default: +- gcc_unreachable (); +- } +- if (size) ++ if (simple_loop) ++ distance = distance_agu_use_in_bb (regno0, insn, ++ distance, BB_HEAD (bb), ++ &found, &redefined); ++ else ++ { ++ int shortest_dist = -1; ++ bool found_in_bb = false; ++ bool redefined_in_bb = false; ++ ++ FOR_EACH_EDGE (e, ei, bb->succs) + { +- fputs (size, file); +- fputs (" PTR ", file); ++ int bb_dist ++ = distance_agu_use_in_bb (regno0, insn, ++ distance, BB_HEAD (e->dest), ++ &found_in_bb, &redefined_in_bb); ++ if (found_in_bb) ++ { ++ if (shortest_dist < 0) ++ shortest_dist = bb_dist; ++ else if (bb_dist > 0) ++ shortest_dist = MIN (bb_dist, shortest_dist); ++ ++ found = true; ++ } + } +- } + +- if (this_is_asm_operands && ! address_operand (addr, VOIDmode)) +- output_operand_lossage ("invalid constraints for operand"); +- else +- ix86_print_operand_address_as +- (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P'); ++ distance = shortest_dist; ++ } + } + +- else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode) ++ if (!found || redefined) ++ return -1; ++ ++ return distance >> 1; ++} ++ ++/* Define this macro to tune LEA priority vs ADD, it take effect when ++ there is a dilemma of choicing LEA or ADD ++ Negative value: ADD is more preferred than LEA ++ Zero: Netrual ++ Positive value: LEA is more preferred than ADD*/ ++#define IX86_LEA_PRIORITY 0 ++ ++/* Return true if usage of lea INSN has performance advantage ++ over a sequence of instructions. Instructions sequence has ++ SPLIT_COST cycles higher latency than lea latency. */ ++ ++static bool ++ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1, ++ unsigned int regno2, int split_cost, bool has_scale) ++{ ++ int dist_define, dist_use; ++ ++ /* For Silvermont if using a 2-source or 3-source LEA for ++ non-destructive destination purposes, or due to wanting ++ ability to use SCALE, the use of LEA is justified. */ ++ if (TARGET_SILVERMONT || TARGET_GOLDMONT || TARGET_GOLDMONT_PLUS ++ || TARGET_TREMONT || TARGET_INTEL) + { +- long l; ++ if (has_scale) ++ return true; ++ if (split_cost < 1) ++ return false; ++ if (regno0 == regno1 || regno0 == regno2) ++ return false; ++ return true; ++ } + +- REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l); ++ dist_define = distance_non_agu_define (regno1, regno2, insn); ++ dist_use = distance_agu_use (regno0, insn); + +- if (ASSEMBLER_DIALECT == ASM_ATT) +- putc ('$', file); +- /* Sign extend 32bit SFmode immediate to 8 bytes. */ +- if (code == 'q') +- fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x", +- (unsigned long long) (int) l); ++ if (dist_define < 0 || dist_define >= LEA_MAX_STALL) ++ { ++ /* If there is no non AGU operand definition, no AGU ++ operand usage and split cost is 0 then both lea ++ and non lea variants have same priority. Currently ++ we prefer lea for 64 bit code and non lea on 32 bit ++ code. */ ++ if (dist_use < 0 && split_cost == 0) ++ return TARGET_64BIT || IX86_LEA_PRIORITY; + else +- fprintf (file, "0x%08x", (unsigned int) l); ++ return true; + } + +- else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode) +- { +- long l[2]; ++ /* With longer definitions distance lea is more preferable. ++ Here we change it to take into account splitting cost and ++ lea priority. */ ++ dist_define += split_cost + IX86_LEA_PRIORITY; + +- REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l); ++ /* If there is no use in memory addess then we just check ++ that split cost exceeds AGU stall. */ ++ if (dist_use < 0) ++ return dist_define > LEA_MAX_STALL; + +- if (ASSEMBLER_DIALECT == ASM_ATT) +- putc ('$', file); +- fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff); +- } ++ /* If this insn has both backward non-agu dependence and forward ++ agu dependence, the one with short distance takes effect. */ ++ return dist_define >= dist_use; ++} + +- /* These float cases don't actually occur as immediate operands. */ +- else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode) +- { +- char dstr[30]; ++/* Return true if it is legal to clobber flags by INSN and ++ false otherwise. */ + +- real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1); +- fputs (dstr, file); +- } ++static bool ++ix86_ok_to_clobber_flags (rtx_insn *insn) ++{ ++ basic_block bb = BLOCK_FOR_INSN (insn); ++ df_ref use; ++ bitmap live; + +- else ++ while (insn) + { +- /* We have patterns that allow zero sets of memory, for instance. +- In 64-bit mode, we should probably support all 8-byte vectors, +- since we can in fact encode that into an immediate. */ +- if (GET_CODE (x) == CONST_VECTOR) ++ if (NONDEBUG_INSN_P (insn)) + { +- if (x != CONST0_RTX (GET_MODE (x))) +- output_operand_lossage ("invalid vector immediate"); +- x = const0_rtx; +- } ++ FOR_EACH_INSN_USE (use, insn) ++ if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG) ++ return false; + +- if (code != 'P' && code != 'p') +- { +- if (CONST_INT_P (x)) +- { +- if (ASSEMBLER_DIALECT == ASM_ATT) +- putc ('$', file); +- } +- else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF +- || GET_CODE (x) == LABEL_REF) +- { +- if (ASSEMBLER_DIALECT == ASM_ATT) +- putc ('$', file); +- else +- fputs ("OFFSET FLAT:", file); +- } ++ if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn)) ++ return true; + } +- if (CONST_INT_P (x)) +- fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); +- else if (flag_pic || MACHOPIC_INDIRECT) +- output_pic_addr_const (file, x, code); +- else +- output_addr_const (file, x); ++ ++ if (insn == BB_END (bb)) ++ break; ++ ++ insn = NEXT_INSN (insn); + } +-} + +-static bool +-ix86_print_operand_punct_valid_p (unsigned char code) +-{ +- return (code == '*' || code == '+' || code == '&' || code == ';' +- || code == '~' || code == '^' || code == '!'); ++ live = df_get_live_out(bb); ++ return !REGNO_REG_SET_P (live, FLAGS_REG); + } +- +-/* Print a memory operand whose address is ADDR. */ + +-static void +-ix86_print_operand_address_as (FILE *file, rtx addr, +- addr_space_t as, bool no_rip) ++/* Return true if we need to split op0 = op1 + op2 into a sequence of ++ move and add to avoid AGU stalls. */ ++ ++bool ++ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[]) + { +- struct ix86_address parts; +- rtx base, index, disp; +- int scale; +- int ok; +- bool vsib = false; +- int code = 0; ++ unsigned int regno0, regno1, regno2; + +- if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR) +- { +- ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts); +- gcc_assert (parts.index == NULL_RTX); +- parts.index = XVECEXP (addr, 0, 1); +- parts.scale = INTVAL (XVECEXP (addr, 0, 2)); +- addr = XVECEXP (addr, 0, 0); +- vsib = true; +- } +- else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR) +- { +- gcc_assert (TARGET_64BIT); +- ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts); +- code = 'q'; +- } +- else +- ok = ix86_decompose_address (addr, &parts); ++ /* Check if we need to optimize. */ ++ if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun)) ++ return false; + +- gcc_assert (ok); ++ /* Check it is correct to split here. */ ++ if (!ix86_ok_to_clobber_flags(insn)) ++ return false; + +- base = parts.base; +- index = parts.index; +- disp = parts.disp; +- scale = parts.scale; ++ regno0 = true_regnum (operands[0]); ++ regno1 = true_regnum (operands[1]); ++ regno2 = true_regnum (operands[2]); + +- if (ADDR_SPACE_GENERIC_P (as)) +- as = parts.seg; ++ /* We need to split only adds with non destructive ++ destination operand. */ ++ if (regno0 == regno1 || regno0 == regno2) ++ return false; + else +- gcc_assert (ADDR_SPACE_GENERIC_P (parts.seg)); +- +- if (!ADDR_SPACE_GENERIC_P (as)) +- { +- if (ASSEMBLER_DIALECT == ASM_ATT) +- putc ('%', file); +- +- switch (as) +- { +- case ADDR_SPACE_SEG_FS: +- fputs ("fs:", file); +- break; +- case ADDR_SPACE_SEG_GS: +- fputs ("gs:", file); +- break; +- default: +- gcc_unreachable (); +- } +- } +- +- /* Use one byte shorter RIP relative addressing for 64bit mode. */ +- if (TARGET_64BIT && !base && !index && !no_rip) +- { +- rtx symbol = disp; ++ return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false); ++} + +- if (GET_CODE (disp) == CONST +- && GET_CODE (XEXP (disp, 0)) == PLUS +- && CONST_INT_P (XEXP (XEXP (disp, 0), 1))) +- symbol = XEXP (XEXP (disp, 0), 0); ++/* Return true if we should emit lea instruction instead of mov ++ instruction. */ + +- if (GET_CODE (symbol) == LABEL_REF +- || (GET_CODE (symbol) == SYMBOL_REF +- && SYMBOL_REF_TLS_MODEL (symbol) == 0)) +- base = pc_rtx; +- } ++bool ++ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[]) ++{ ++ unsigned int regno0, regno1; + +- if (!base && !index) +- { +- /* Displacement only requires special attention. */ +- if (CONST_INT_P (disp)) +- { +- if (ASSEMBLER_DIALECT == ASM_INTEL && ADDR_SPACE_GENERIC_P (as)) +- fputs ("ds:", file); +- fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp)); +- } +- /* Load the external function address via the GOT slot to avoid PLT. */ +- else if (GET_CODE (disp) == CONST +- && GET_CODE (XEXP (disp, 0)) == UNSPEC +- && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOTPCREL +- || XINT (XEXP (disp, 0), 1) == UNSPEC_GOT) +- && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0))) +- output_pic_addr_const (file, disp, 0); +- else if (flag_pic) +- output_pic_addr_const (file, disp, 0); +- else +- output_addr_const (file, disp); +- } +- else +- { +- /* Print SImode register names to force addr32 prefix. */ +- if (SImode_address_operand (addr, VOIDmode)) +- { +- if (flag_checking) +- { +- gcc_assert (TARGET_64BIT); +- switch (GET_CODE (addr)) +- { +- case SUBREG: +- gcc_assert (GET_MODE (addr) == SImode); +- gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode); +- break; +- case ZERO_EXTEND: +- case AND: +- gcc_assert (GET_MODE (addr) == DImode); +- break; +- default: +- gcc_unreachable (); +- } +- } +- gcc_assert (!code); +- code = 'k'; +- } +- else if (code == 0 +- && TARGET_X32 +- && disp +- && CONST_INT_P (disp) +- && INTVAL (disp) < -16*1024*1024) +- { +- /* X32 runs in 64-bit mode, where displacement, DISP, in +- address DISP(%r64), is encoded as 32-bit immediate sign- +- extended from 32-bit to 64-bit. For -0x40000300(%r64), +- address is %r64 + 0xffffffffbffffd00. When %r64 < +- 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64, +- which is invalid for x32. The correct address is %r64 +- - 0x40000300 == 0xf7ffdd64. To properly encode +- -0x40000300(%r64) for x32, we zero-extend negative +- displacement by forcing addr32 prefix which truncates +- 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should +- zero-extend all negative displacements, including -1(%rsp). +- However, for small negative displacements, sign-extension +- won't cause overflow. We only zero-extend negative +- displacements if they < -16*1024*1024, which is also used +- to check legitimate address displacements for PIC. */ +- code = 'k'; +- } ++ /* Check if we need to optimize. */ ++ if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun)) ++ return false; + +- /* Since the upper 32 bits of RSP are always zero for x32, +- we can encode %esp as %rsp to avoid 0x67 prefix if +- there is no index register. */ +- if (TARGET_X32 && Pmode == SImode +- && !index && base && REG_P (base) && REGNO (base) == SP_REG) +- code = 'q'; ++ /* Use lea for reg to reg moves only. */ ++ if (!REG_P (operands[0]) || !REG_P (operands[1])) ++ return false; + +- if (ASSEMBLER_DIALECT == ASM_ATT) +- { +- if (disp) +- { +- if (flag_pic) +- output_pic_addr_const (file, disp, 0); +- else if (GET_CODE (disp) == LABEL_REF) +- output_asm_label (disp); +- else +- output_addr_const (file, disp); +- } ++ regno0 = true_regnum (operands[0]); ++ regno1 = true_regnum (operands[1]); + +- putc ('(', file); +- if (base) +- print_reg (base, code, file); +- if (index) +- { +- putc (',', file); +- print_reg (index, vsib ? 0 : code, file); +- if (scale != 1 || vsib) +- fprintf (file, ",%d", scale); +- } +- putc (')', file); +- } +- else +- { +- rtx offset = NULL_RTX; ++ return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false); ++} + +- if (disp) +- { +- /* Pull out the offset of a symbol; print any symbol itself. */ +- if (GET_CODE (disp) == CONST +- && GET_CODE (XEXP (disp, 0)) == PLUS +- && CONST_INT_P (XEXP (XEXP (disp, 0), 1))) +- { +- offset = XEXP (XEXP (disp, 0), 1); +- disp = gen_rtx_CONST (VOIDmode, +- XEXP (XEXP (disp, 0), 0)); +- } ++/* Return true if we need to split lea into a sequence of ++ instructions to avoid AGU stalls. */ + +- if (flag_pic) +- output_pic_addr_const (file, disp, 0); +- else if (GET_CODE (disp) == LABEL_REF) +- output_asm_label (disp); +- else if (CONST_INT_P (disp)) +- offset = disp; +- else +- output_addr_const (file, disp); +- } ++bool ++ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[]) ++{ ++ unsigned int regno0, regno1, regno2; ++ int split_cost; ++ struct ix86_address parts; ++ int ok; + +- putc ('[', file); +- if (base) +- { +- print_reg (base, code, file); +- if (offset) +- { +- if (INTVAL (offset) >= 0) +- putc ('+', file); +- fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); +- } +- } +- else if (offset) +- fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); +- else +- putc ('0', file); ++ /* Check we need to optimize. */ ++ if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun)) ++ return false; + +- if (index) +- { +- putc ('+', file); +- print_reg (index, vsib ? 0 : code, file); +- if (scale != 1 || vsib) +- fprintf (file, "*%d", scale); +- } +- putc (']', file); +- } +- } +-} ++ /* The "at least two components" test below might not catch simple ++ move or zero extension insns if parts.base is non-NULL and parts.disp ++ is const0_rtx as the only components in the address, e.g. if the ++ register is %rbp or %r13. As this test is much cheaper and moves or ++ zero extensions are the common case, do this check first. */ ++ if (REG_P (operands[1]) ++ || (SImode_address_operand (operands[1], VOIDmode) ++ && REG_P (XEXP (operands[1], 0)))) ++ return false; + +-static void +-ix86_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr) +-{ +- ix86_print_operand_address_as (file, addr, ADDR_SPACE_GENERIC, false); +-} ++ /* Check if it is OK to split here. */ ++ if (!ix86_ok_to_clobber_flags (insn)) ++ return false; + +-/* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */ ++ ok = ix86_decompose_address (operands[1], &parts); ++ gcc_assert (ok); + +-static bool +-i386_asm_output_addr_const_extra (FILE *file, rtx x) +-{ +- rtx op; ++ /* There should be at least two components in the address. */ ++ if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX) ++ + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2) ++ return false; + +- if (GET_CODE (x) != UNSPEC) ++ /* We should not split into add if non legitimate pic ++ operand is used as displacement. */ ++ if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp)) + return false; + +- op = XVECEXP (x, 0, 0); +- switch (XINT (x, 1)) +- { +- case UNSPEC_GOTOFF: +- output_addr_const (file, op); +- fputs ("@gotoff", file); +- break; +- case UNSPEC_GOTTPOFF: +- output_addr_const (file, op); +- /* FIXME: This might be @TPOFF in Sun ld. */ +- fputs ("@gottpoff", file); +- break; +- case UNSPEC_TPOFF: +- output_addr_const (file, op); +- fputs ("@tpoff", file); +- break; +- case UNSPEC_NTPOFF: +- output_addr_const (file, op); +- if (TARGET_64BIT) +- fputs ("@tpoff", file); +- else +- fputs ("@ntpoff", file); +- break; +- case UNSPEC_DTPOFF: +- output_addr_const (file, op); +- fputs ("@dtpoff", file); +- break; +- case UNSPEC_GOTNTPOFF: +- output_addr_const (file, op); +- if (TARGET_64BIT) +- fputs (ASSEMBLER_DIALECT == ASM_ATT ? +- "@gottpoff(%rip)" : "@gottpoff[rip]", file); +- else +- fputs ("@gotntpoff", file); +- break; +- case UNSPEC_INDNTPOFF: +- output_addr_const (file, op); +- fputs ("@indntpoff", file); +- break; +-#if TARGET_MACHO +- case UNSPEC_MACHOPIC_OFFSET: +- output_addr_const (file, op); +- putc ('-', file); +- machopic_output_function_base_name (file); +- break; +-#endif +- +- default: +- return false; +- } ++ regno0 = true_regnum (operands[0]) ; ++ regno1 = INVALID_REGNUM; ++ regno2 = INVALID_REGNUM; + +- return true; +-} +- +-/* Split one or more double-mode RTL references into pairs of half-mode +- references. The RTL can be REG, offsettable MEM, integer constant, or +- CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to +- split and "num" is its length. lo_half and hi_half are output arrays +- that parallel "operands". */ ++ if (parts.base) ++ regno1 = true_regnum (parts.base); ++ if (parts.index) ++ regno2 = true_regnum (parts.index); + +-void +-split_double_mode (machine_mode mode, rtx operands[], +- int num, rtx lo_half[], rtx hi_half[]) +-{ +- machine_mode half_mode; +- unsigned int byte; ++ split_cost = 0; + +- switch (mode) ++ /* Compute how many cycles we will add to execution time ++ if split lea into a sequence of instructions. */ ++ if (parts.base || parts.index) + { +- case E_TImode: +- half_mode = DImode; +- break; +- case E_DImode: +- half_mode = SImode; +- break; +- default: +- gcc_unreachable (); +- } +- +- byte = GET_MODE_SIZE (half_mode); ++ /* Have to use mov instruction if non desctructive ++ destination form is used. */ ++ if (regno1 != regno0 && regno2 != regno0) ++ split_cost += 1; + +- while (num--) +- { +- rtx op = operands[num]; ++ /* Have to add index to base if both exist. */ ++ if (parts.base && parts.index) ++ split_cost += 1; + +- /* simplify_subreg refuse to split volatile memory addresses, +- but we still have to handle it. */ +- if (MEM_P (op)) +- { +- lo_half[num] = adjust_address (op, half_mode, 0); +- hi_half[num] = adjust_address (op, half_mode, byte); +- } +- else ++ /* Have to use shift and adds if scale is 2 or greater. */ ++ if (parts.scale > 1) + { +- lo_half[num] = simplify_gen_subreg (half_mode, op, +- GET_MODE (op) == VOIDmode +- ? mode : GET_MODE (op), 0); +- hi_half[num] = simplify_gen_subreg (half_mode, op, +- GET_MODE (op) == VOIDmode +- ? mode : GET_MODE (op), byte); ++ if (regno0 != regno1) ++ split_cost += 1; ++ else if (regno2 == regno0) ++ split_cost += 4; ++ else ++ split_cost += parts.scale; + } +- } +-} +- +-/* Output code to perform a 387 binary operation in INSN, one of PLUS, +- MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3] +- is the expression of the binary operation. The output may either be +- emitted here, or returned to the caller, like all output_* functions. + +- There is no guarantee that the operands are the same mode, as they +- might be within FLOAT or FLOAT_EXTEND expressions. */ ++ /* Have to use add instruction with immediate if ++ disp is non zero. */ ++ if (parts.disp && parts.disp != const0_rtx) ++ split_cost += 1; + +-#ifndef SYSV386_COMPAT +-/* Set to 1 for compatibility with brain-damaged assemblers. No-one +- wants to fix the assemblers because that causes incompatibility +- with gcc. No-one wants to fix gcc because that causes +- incompatibility with assemblers... You can use the option of +- -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */ +-#define SYSV386_COMPAT 1 +-#endif ++ /* Subtract the price of lea. */ ++ split_cost -= 1; ++ } + +-const char * +-output_387_binary_op (rtx_insn *insn, rtx *operands) +-{ +- static char buf[40]; +- const char *p; +- bool is_sse +- = (SSE_REG_P (operands[0]) +- || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2])); ++ return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost, ++ parts.scale > 1); ++} + +- if (is_sse) +- p = "%v"; +- else if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT +- || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) +- p = "fi"; +- else +- p = "f"; ++/* Return true if it is ok to optimize an ADD operation to LEA ++ operation to avoid flag register consumation. For most processors, ++ ADD is faster than LEA. For the processors like BONNELL, if the ++ destination register of LEA holds an actual address which will be ++ used soon, LEA is better and otherwise ADD is better. */ + +- strcpy (buf, p); ++bool ++ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[]) ++{ ++ unsigned int regno0 = true_regnum (operands[0]); ++ unsigned int regno1 = true_regnum (operands[1]); ++ unsigned int regno2 = true_regnum (operands[2]); + +- switch (GET_CODE (operands[3])) +- { +- case PLUS: +- p = "add"; break; +- case MINUS: +- p = "sub"; break; +- case MULT: +- p = "mul"; break; +- case DIV: +- p = "div"; break; +- default: +- gcc_unreachable (); +- } ++ /* If a = b + c, (a!=b && a!=c), must use lea form. */ ++ if (regno0 != regno1 && regno0 != regno2) ++ return true; + +- strcat (buf, p); ++ if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun)) ++ return false; + +- if (is_sse) +- { +- p = (GET_MODE (operands[0]) == SFmode) ? "ss" : "sd"; +- strcat (buf, p); ++ return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false); ++} + +- if (TARGET_AVX) +- p = "\t{%2, %1, %0|%0, %1, %2}"; +- else +- p = "\t{%2, %0|%0, %2}"; ++/* Return true if destination reg of SET_BODY is shift count of ++ USE_BODY. */ + +- strcat (buf, p); +- return buf; +- } ++static bool ++ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body) ++{ ++ rtx set_dest; ++ rtx shift_rtx; ++ int i; + +- /* Even if we do not want to check the inputs, this documents input +- constraints. Which helps in understanding the following code. */ +- if (flag_checking) ++ /* Retrieve destination of SET_BODY. */ ++ switch (GET_CODE (set_body)) + { +- if (STACK_REG_P (operands[0]) +- && ((REG_P (operands[1]) +- && REGNO (operands[0]) == REGNO (operands[1]) +- && (STACK_REG_P (operands[2]) || MEM_P (operands[2]))) +- || (REG_P (operands[2]) +- && REGNO (operands[0]) == REGNO (operands[2]) +- && (STACK_REG_P (operands[1]) || MEM_P (operands[1])))) +- && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2]))) +- ; /* ok */ +- else +- gcc_unreachable (); ++ case SET: ++ set_dest = SET_DEST (set_body); ++ if (!set_dest || !REG_P (set_dest)) ++ return false; ++ break; ++ case PARALLEL: ++ for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--) ++ if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i), ++ use_body)) ++ return true; ++ /* FALLTHROUGH */ ++ default: ++ return false; + } + +- switch (GET_CODE (operands[3])) ++ /* Retrieve shift count of USE_BODY. */ ++ switch (GET_CODE (use_body)) + { +- case MULT: +- case PLUS: +- if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2])) +- std::swap (operands[1], operands[2]); ++ case SET: ++ shift_rtx = XEXP (use_body, 1); ++ break; ++ case PARALLEL: ++ for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--) ++ if (ix86_dep_by_shift_count_body (set_body, ++ XVECEXP (use_body, 0, i))) ++ return true; ++ /* FALLTHROUGH */ ++ default: ++ return false; ++ } + +- /* know operands[0] == operands[1]. */ ++ if (shift_rtx ++ && (GET_CODE (shift_rtx) == ASHIFT ++ || GET_CODE (shift_rtx) == LSHIFTRT ++ || GET_CODE (shift_rtx) == ASHIFTRT ++ || GET_CODE (shift_rtx) == ROTATE ++ || GET_CODE (shift_rtx) == ROTATERT)) ++ { ++ rtx shift_count = XEXP (shift_rtx, 1); + +- if (MEM_P (operands[2])) ++ /* Return true if shift count is dest of SET_BODY. */ ++ if (REG_P (shift_count)) + { +- p = "%Z2\t%2"; +- break; ++ /* Add check since it can be invoked before register ++ allocation in pre-reload schedule. */ ++ if (reload_completed ++ && true_regnum (set_dest) == true_regnum (shift_count)) ++ return true; ++ else if (REGNO(set_dest) == REGNO(shift_count)) ++ return true; + } ++ } + +- if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) +- { +- if (STACK_TOP_P (operands[0])) +- /* How is it that we are storing to a dead operand[2]? +- Well, presumably operands[1] is dead too. We can't +- store the result to st(0) as st(0) gets popped on this +- instruction. Instead store to operands[2] (which I +- think has to be st(1)). st(1) will be popped later. +- gcc <= 2.8.1 didn't have this check and generated +- assembly code that the Unixware assembler rejected. */ +- p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */ +- else +- p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */ +- break; +- } +- +- if (STACK_TOP_P (operands[0])) +- p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */ +- else +- p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */ +- break; +- +- case MINUS: +- case DIV: +- if (MEM_P (operands[1])) +- { +- p = "r%Z1\t%1"; +- break; +- } +- +- if (MEM_P (operands[2])) +- { +- p = "%Z2\t%2"; +- break; +- } +- +- if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) +- { +-#if SYSV386_COMPAT +- /* The SystemV/386 SVR3.2 assembler, and probably all AT&T +- derived assemblers, confusingly reverse the direction of +- the operation for fsub{r} and fdiv{r} when the +- destination register is not st(0). The Intel assembler +- doesn't have this brain damage. Read !SYSV386_COMPAT to +- figure out what the hardware really does. */ +- if (STACK_TOP_P (operands[0])) +- p = "{p\t%0, %2|rp\t%2, %0}"; +- else +- p = "{rp\t%2, %0|p\t%0, %2}"; +-#else +- if (STACK_TOP_P (operands[0])) +- /* As above for fmul/fadd, we can't store to st(0). */ +- p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */ +- else +- p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */ +-#endif +- break; +- } +- +- if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) +- { +-#if SYSV386_COMPAT +- if (STACK_TOP_P (operands[0])) +- p = "{rp\t%0, %1|p\t%1, %0}"; +- else +- p = "{p\t%1, %0|rp\t%0, %1}"; +-#else +- if (STACK_TOP_P (operands[0])) +- p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */ +- else +- p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */ +-#endif +- break; +- } +- +- if (STACK_TOP_P (operands[0])) +- { +- if (STACK_TOP_P (operands[1])) +- p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */ +- else +- p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */ +- break; +- } +- else if (STACK_TOP_P (operands[1])) +- { +-#if SYSV386_COMPAT +- p = "{\t%1, %0|r\t%0, %1}"; +-#else +- p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */ +-#endif +- } +- else +- { +-#if SYSV386_COMPAT +- p = "{r\t%2, %0|\t%0, %2}"; +-#else +- p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */ +-#endif +- } +- break; +- +- default: +- gcc_unreachable (); +- } +- +- strcat (buf, p); +- return buf; ++ return false; + } + +-/* Return needed mode for entity in optimize_mode_switching pass. */ ++/* Return true if destination reg of SET_INSN is shift count of ++ USE_INSN. */ + +-static int +-ix86_dirflag_mode_needed (rtx_insn *insn) ++bool ++ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn) + { +- if (CALL_P (insn)) +- { +- if (cfun->machine->func_type == TYPE_NORMAL) +- return X86_DIRFLAG_ANY; +- else +- /* No need to emit CLD in interrupt handler for TARGET_CLD. */ +- return TARGET_CLD ? X86_DIRFLAG_ANY : X86_DIRFLAG_RESET; +- } +- +- if (recog_memoized (insn) < 0) +- return X86_DIRFLAG_ANY; ++ return ix86_dep_by_shift_count_body (PATTERN (set_insn), ++ PATTERN (use_insn)); ++} + +- if (get_attr_type (insn) == TYPE_STR) +- { +- /* Emit cld instruction if stringops are used in the function. */ +- if (cfun->machine->func_type == TYPE_NORMAL) +- return TARGET_CLD ? X86_DIRFLAG_RESET : X86_DIRFLAG_ANY; +- else +- return X86_DIRFLAG_RESET; +- } ++/* Return TRUE or FALSE depending on whether the unary operator meets the ++ appropriate constraints. */ + +- return X86_DIRFLAG_ANY; ++bool ++ix86_unary_operator_ok (enum rtx_code, ++ machine_mode, ++ rtx operands[2]) ++{ ++ /* If one of operands is memory, source and destination must match. */ ++ if ((MEM_P (operands[0]) ++ || MEM_P (operands[1])) ++ && ! rtx_equal_p (operands[0], operands[1])) ++ return false; ++ return true; + } + +-/* Check if a 256bit or 512 bit AVX register is referenced inside of EXP. */ ++/* Return TRUE if the operands to a vec_interleave_{high,low}v2df ++ are ok, keeping in mind the possible movddup alternative. */ + +-static bool +-ix86_check_avx_upper_register (const_rtx exp) ++bool ++ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high) + { +- return SSE_REG_P (exp) && GET_MODE_BITSIZE (GET_MODE (exp)) > 128; ++ if (MEM_P (operands[0])) ++ return rtx_equal_p (operands[0], operands[1 + high]); ++ if (MEM_P (operands[1]) && MEM_P (operands[2])) ++ return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]); ++ return true; + } + +-/* Return needed mode for entity in optimize_mode_switching pass. */ ++/* A subroutine of ix86_build_signbit_mask. If VECT is true, ++ then replicate the value for all elements of the vector ++ register. */ + +-static int +-ix86_avx_u128_mode_needed (rtx_insn *insn) ++rtx ++ix86_build_const_vector (machine_mode mode, bool vect, rtx value) + { +- if (CALL_P (insn)) +- { +- rtx link; ++ int i, n_elt; ++ rtvec v; ++ machine_mode scalar_mode; + +- /* Needed mode is set to AVX_U128_CLEAN if there are +- no 256bit or 512bit modes used in function arguments. */ +- for (link = CALL_INSN_FUNCTION_USAGE (insn); +- link; +- link = XEXP (link, 1)) +- { +- if (GET_CODE (XEXP (link, 0)) == USE) +- { +- rtx arg = XEXP (XEXP (link, 0), 0); ++ switch (mode) ++ { ++ case E_V64QImode: ++ case E_V32QImode: ++ case E_V16QImode: ++ case E_V32HImode: ++ case E_V16HImode: ++ case E_V8HImode: ++ case E_V16SImode: ++ case E_V8SImode: ++ case E_V4SImode: ++ case E_V8DImode: ++ case E_V4DImode: ++ case E_V2DImode: ++ gcc_assert (vect); ++ /* FALLTHRU */ ++ case E_V16SFmode: ++ case E_V8SFmode: ++ case E_V4SFmode: ++ case E_V8DFmode: ++ case E_V4DFmode: ++ case E_V2DFmode: ++ n_elt = GET_MODE_NUNITS (mode); ++ v = rtvec_alloc (n_elt); ++ scalar_mode = GET_MODE_INNER (mode); + +- if (ix86_check_avx_upper_register (arg)) +- return AVX_U128_DIRTY; +- } +- } ++ RTVEC_ELT (v, 0) = value; + +- return AVX_U128_CLEAN; +- } ++ for (i = 1; i < n_elt; ++i) ++ RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode); + +- /* Require DIRTY mode if a 256bit or 512bit AVX register is referenced. +- Hardware changes state only when a 256bit register is written to, +- but we need to prevent the compiler from moving optimal insertion +- point above eventual read from 256bit or 512 bit register. */ +- subrtx_iterator::array_type array; +- FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST) +- if (ix86_check_avx_upper_register (*iter)) +- return AVX_U128_DIRTY; ++ return gen_rtx_CONST_VECTOR (mode, v); + +- return AVX_U128_ANY; ++ default: ++ gcc_unreachable (); ++ } + } + +-/* Return mode that i387 must be switched into +- prior to the execution of insn. */ ++/* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders ++ and ix86_expand_int_vcond. Create a mask for the sign bit in MODE ++ for an SSE register. If VECT is true, then replicate the mask for ++ all elements of the vector register. If INVERT is true, then create ++ a mask excluding the sign bit. */ + +-static int +-ix86_i387_mode_needed (int entity, rtx_insn *insn) ++rtx ++ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert) + { +- enum attr_i387_cw mode; +- +- /* The mode UNINITIALIZED is used to store control word after a +- function call or ASM pattern. The mode ANY specify that function +- has no requirements on the control word and make no changes in the +- bits we are interested in. */ ++ machine_mode vec_mode, imode; ++ wide_int w; ++ rtx mask, v; + +- if (CALL_P (insn) +- || (NONJUMP_INSN_P (insn) +- && (asm_noperands (PATTERN (insn)) >= 0 +- || GET_CODE (PATTERN (insn)) == ASM_INPUT))) +- return I387_CW_UNINITIALIZED; +- +- if (recog_memoized (insn) < 0) +- return I387_CW_ANY; +- +- mode = get_attr_i387_cw (insn); +- +- switch (entity) ++ switch (mode) + { +- case I387_TRUNC: +- if (mode == I387_CW_TRUNC) +- return mode; ++ case E_V16SImode: ++ case E_V16SFmode: ++ case E_V8SImode: ++ case E_V4SImode: ++ case E_V8SFmode: ++ case E_V4SFmode: ++ vec_mode = mode; ++ imode = SImode; + break; + +- case I387_FLOOR: +- if (mode == I387_CW_FLOOR) +- return mode; ++ case E_V8DImode: ++ case E_V4DImode: ++ case E_V2DImode: ++ case E_V8DFmode: ++ case E_V4DFmode: ++ case E_V2DFmode: ++ vec_mode = mode; ++ imode = DImode; + break; + +- case I387_CEIL: +- if (mode == I387_CW_CEIL) +- return mode; ++ case E_TImode: ++ case E_TFmode: ++ vec_mode = VOIDmode; ++ imode = TImode; + break; + + default: + gcc_unreachable (); + } + +- return I387_CW_ANY; +-} ++ machine_mode inner_mode = GET_MODE_INNER (mode); ++ w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1, ++ GET_MODE_BITSIZE (inner_mode)); ++ if (invert) ++ w = wi::bit_not (w); + +-/* Return mode that entity must be switched into +- prior to the execution of insn. */ ++ /* Force this value into the low part of a fp vector constant. */ ++ mask = immed_wide_int_const (w, imode); ++ mask = gen_lowpart (inner_mode, mask); + +-static int +-ix86_mode_needed (int entity, rtx_insn *insn) +-{ +- switch (entity) +- { +- case X86_DIRFLAG: +- return ix86_dirflag_mode_needed (insn); +- case AVX_U128: +- return ix86_avx_u128_mode_needed (insn); +- case I387_TRUNC: +- case I387_FLOOR: +- case I387_CEIL: +- return ix86_i387_mode_needed (entity, insn); +- default: +- gcc_unreachable (); +- } +- return 0; +-} ++ if (vec_mode == VOIDmode) ++ return force_reg (inner_mode, mask); + +-/* Check if a 256bit or 512bit AVX register is referenced in stores. */ +- +-static void +-ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data) +- { +- if (ix86_check_avx_upper_register (dest)) +- { +- bool *used = (bool *) data; +- *used = true; +- } +- } ++ v = ix86_build_const_vector (vec_mode, vect, mask); ++ return force_reg (vec_mode, v); ++} + +-/* Calculate mode of upper 128bit AVX registers after the insn. */ ++/* Return TRUE or FALSE depending on whether the first SET in INSN ++ has source and destination with matching CC modes, and that the ++ CC mode is at least as constrained as REQ_MODE. */ + +-static int +-ix86_avx_u128_mode_after (int mode, rtx_insn *insn) ++bool ++ix86_match_ccmode (rtx insn, machine_mode req_mode) + { +- rtx pat = PATTERN (insn); ++ rtx set; ++ machine_mode set_mode; + +- if (vzeroupper_pattern (pat, VOIDmode) +- || vzeroall_pattern (pat, VOIDmode)) +- return AVX_U128_CLEAN; ++ set = PATTERN (insn); ++ if (GET_CODE (set) == PARALLEL) ++ set = XVECEXP (set, 0, 0); ++ gcc_assert (GET_CODE (set) == SET); ++ gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE); + +- /* We know that state is clean after CALL insn if there are no +- 256bit or 512bit registers used in the function return register. */ +- if (CALL_P (insn)) ++ set_mode = GET_MODE (SET_DEST (set)); ++ switch (set_mode) + { +- bool avx_upper_reg_found = false; +- note_stores (pat, ix86_check_avx_upper_stores, &avx_upper_reg_found); +- +- return avx_upper_reg_found ? AVX_U128_DIRTY : AVX_U128_CLEAN; +- } ++ case E_CCNOmode: ++ if (req_mode != CCNOmode ++ && (req_mode != CCmode ++ || XEXP (SET_SRC (set), 1) != const0_rtx)) ++ return false; ++ break; ++ case E_CCmode: ++ if (req_mode == CCGCmode) ++ return false; ++ /* FALLTHRU */ ++ case E_CCGCmode: ++ if (req_mode == CCGOCmode || req_mode == CCNOmode) ++ return false; ++ /* FALLTHRU */ ++ case E_CCGOCmode: ++ if (req_mode == CCZmode) ++ return false; ++ /* FALLTHRU */ ++ case E_CCZmode: ++ break; + +- /* Otherwise, return current mode. Remember that if insn +- references AVX 256bit or 512bit registers, the mode was already +- changed to DIRTY from MODE_NEEDED. */ +- return mode; +-} ++ case E_CCGZmode: + +-/* Return the mode that an insn results in. */ ++ case E_CCAmode: ++ case E_CCCmode: ++ case E_CCOmode: ++ case E_CCPmode: ++ case E_CCSmode: ++ if (set_mode != req_mode) ++ return false; ++ break; + +-static int +-ix86_mode_after (int entity, int mode, rtx_insn *insn) +-{ +- switch (entity) +- { +- case X86_DIRFLAG: +- return mode; +- case AVX_U128: +- return ix86_avx_u128_mode_after (mode, insn); +- case I387_TRUNC: +- case I387_FLOOR: +- case I387_CEIL: +- return mode; + default: + gcc_unreachable (); + } +-} +- +-static int +-ix86_dirflag_mode_entry (void) +-{ +- /* For TARGET_CLD or in the interrupt handler we can't assume +- direction flag state at function entry. */ +- if (TARGET_CLD +- || cfun->machine->func_type != TYPE_NORMAL) +- return X86_DIRFLAG_ANY; + +- return X86_DIRFLAG_RESET; ++ return GET_MODE (SET_SRC (set)) == set_mode; + } + +-static int +-ix86_avx_u128_mode_entry (void) ++machine_mode ++ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1) + { +- tree arg; ++ machine_mode mode = GET_MODE (op0); + +- /* Entry mode is set to AVX_U128_DIRTY if there are +- 256bit or 512bit modes used in function arguments. */ +- for (arg = DECL_ARGUMENTS (current_function_decl); arg; +- arg = TREE_CHAIN (arg)) ++ if (SCALAR_FLOAT_MODE_P (mode)) + { +- rtx incoming = DECL_INCOMING_RTL (arg); +- +- if (incoming && ix86_check_avx_upper_register (incoming)) +- return AVX_U128_DIRTY; ++ gcc_assert (!DECIMAL_FLOAT_MODE_P (mode)); ++ return CCFPmode; + } + +- return AVX_U128_CLEAN; +-} +- +-/* Return a mode that ENTITY is assumed to be +- switched to at function entry. */ +- +-static int +-ix86_mode_entry (int entity) +-{ +- switch (entity) ++ switch (code) + { +- case X86_DIRFLAG: +- return ix86_dirflag_mode_entry (); +- case AVX_U128: +- return ix86_avx_u128_mode_entry (); +- case I387_TRUNC: +- case I387_FLOOR: +- case I387_CEIL: +- return I387_CW_ANY; ++ /* Only zero flag is needed. */ ++ case EQ: /* ZF=0 */ ++ case NE: /* ZF!=0 */ ++ return CCZmode; ++ /* Codes needing carry flag. */ ++ case GEU: /* CF=0 */ ++ case LTU: /* CF=1 */ ++ /* Detect overflow checks. They need just the carry flag. */ ++ if (GET_CODE (op0) == PLUS ++ && (rtx_equal_p (op1, XEXP (op0, 0)) ++ || rtx_equal_p (op1, XEXP (op0, 1)))) ++ return CCCmode; ++ else ++ return CCmode; ++ case GTU: /* CF=0 & ZF=0 */ ++ case LEU: /* CF=1 | ZF=1 */ ++ return CCmode; ++ /* Codes possibly doable only with sign flag when ++ comparing against zero. */ ++ case GE: /* SF=OF or SF=0 */ ++ case LT: /* SF<>OF or SF=1 */ ++ if (op1 == const0_rtx) ++ return CCGOCmode; ++ else ++ /* For other cases Carry flag is not required. */ ++ return CCGCmode; ++ /* Codes doable only with sign flag when comparing ++ against zero, but we miss jump instruction for it ++ so we need to use relational tests against overflow ++ that thus needs to be zero. */ ++ case GT: /* ZF=0 & SF=OF */ ++ case LE: /* ZF=1 | SF<>OF */ ++ if (op1 == const0_rtx) ++ return CCNOmode; ++ else ++ return CCGCmode; ++ /* strcmp pattern do (use flags) and combine may ask us for proper ++ mode. */ ++ case USE: ++ return CCmode; + default: + gcc_unreachable (); + } + } + +-static int +-ix86_avx_u128_mode_exit (void) +-{ +- rtx reg = crtl->return_rtx; ++/* Return the fixed registers used for condition codes. */ + +- /* Exit mode is set to AVX_U128_DIRTY if there are 256bit +- or 512 bit modes used in the function return register. */ +- if (reg && ix86_check_avx_upper_register (reg)) +- return AVX_U128_DIRTY; +- +- /* Exit mode is set to AVX_U128_DIRTY if there are 256bit or 512bit +- modes used in function arguments, otherwise return AVX_U128_CLEAN. +- */ +- return ix86_avx_u128_mode_entry (); +-} +- +-/* Return a mode that ENTITY is assumed to be +- switched to at function exit. */ +- +-static int +-ix86_mode_exit (int entity) +-{ +- switch (entity) +- { +- case X86_DIRFLAG: +- return X86_DIRFLAG_ANY; +- case AVX_U128: +- return ix86_avx_u128_mode_exit (); +- case I387_TRUNC: +- case I387_FLOOR: +- case I387_CEIL: +- return I387_CW_ANY; +- default: +- gcc_unreachable (); +- } +-} +- +-static int +-ix86_mode_priority (int, int n) ++static bool ++ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2) + { +- return n; ++ *p1 = FLAGS_REG; ++ *p2 = INVALID_REGNUM; ++ return true; + } + +-/* Output code to initialize control word copies used by trunc?f?i and +- rounding patterns. CURRENT_MODE is set to current control word, +- while NEW_MODE is set to new control word. */ ++/* If two condition code modes are compatible, return a condition code ++ mode which is compatible with both. Otherwise, return ++ VOIDmode. */ + +-static void +-emit_i387_cw_initialization (int mode) ++static machine_mode ++ix86_cc_modes_compatible (machine_mode m1, machine_mode m2) + { +- rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED); +- rtx new_mode; ++ if (m1 == m2) ++ return m1; + +- enum ix86_stack_slot slot; ++ if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC) ++ return VOIDmode; + +- rtx reg = gen_reg_rtx (HImode); ++ if ((m1 == CCGCmode && m2 == CCGOCmode) ++ || (m1 == CCGOCmode && m2 == CCGCmode)) ++ return CCGCmode; + +- emit_insn (gen_x86_fnstcw_1 (stored_mode)); +- emit_move_insn (reg, copy_rtx (stored_mode)); ++ if ((m1 == CCNOmode && m2 == CCGOCmode) ++ || (m1 == CCGOCmode && m2 == CCNOmode)) ++ return CCNOmode; + +- switch (mode) ++ if (m1 == CCZmode ++ && (m2 == CCGCmode || m2 == CCGOCmode || m2 == CCNOmode)) ++ return m2; ++ else if (m2 == CCZmode ++ && (m1 == CCGCmode || m1 == CCGOCmode || m1 == CCNOmode)) ++ return m1; ++ ++ switch (m1) + { +- case I387_CW_TRUNC: +- /* round toward zero (truncate) */ +- emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00))); +- slot = SLOT_CW_TRUNC; +- break; ++ default: ++ gcc_unreachable (); + +- case I387_CW_FLOOR: +- /* round down toward -oo */ +- emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00))); +- emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400))); +- slot = SLOT_CW_FLOOR; +- break; ++ case E_CCmode: ++ case E_CCGCmode: ++ case E_CCGOCmode: ++ case E_CCNOmode: ++ case E_CCAmode: ++ case E_CCCmode: ++ case E_CCOmode: ++ case E_CCPmode: ++ case E_CCSmode: ++ case E_CCZmode: ++ switch (m2) ++ { ++ default: ++ return VOIDmode; + +- case I387_CW_CEIL: +- /* round up toward +oo */ +- emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00))); +- emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800))); +- slot = SLOT_CW_CEIL; +- break; ++ case E_CCmode: ++ case E_CCGCmode: ++ case E_CCGOCmode: ++ case E_CCNOmode: ++ case E_CCAmode: ++ case E_CCCmode: ++ case E_CCOmode: ++ case E_CCPmode: ++ case E_CCSmode: ++ case E_CCZmode: ++ return CCmode; ++ } + +- default: +- gcc_unreachable (); ++ case E_CCFPmode: ++ /* These are only compatible with themselves, which we already ++ checked above. */ ++ return VOIDmode; + } ++} + +- gcc_assert (slot < MAX_386_STACK_LOCALS); ++/* Return strategy to use for floating-point. We assume that fcomi is always ++ preferrable where available, since that is also true when looking at size ++ (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */ + +- new_mode = assign_386_stack_local (HImode, slot); +- emit_move_insn (new_mode, reg); ++enum ix86_fpcmp_strategy ++ix86_fp_comparison_strategy (enum rtx_code) ++{ ++ /* Do fcomi/sahf based test when profitable. */ ++ ++ if (TARGET_CMOVE) ++ return IX86_FPCMP_COMI; ++ ++ if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ())) ++ return IX86_FPCMP_SAHF; ++ ++ return IX86_FPCMP_ARITH; + } + +-/* Generate one or more insns to set ENTITY to MODE. */ ++/* Convert comparison codes we use to represent FP comparison to integer ++ code that will result in proper branch. Return UNKNOWN if no such code ++ is available. */ + +-static void +-ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED, +- HARD_REG_SET regs_live ATTRIBUTE_UNUSED) ++enum rtx_code ++ix86_fp_compare_code_to_integer (enum rtx_code code) + { +- switch (entity) ++ switch (code) + { +- case X86_DIRFLAG: +- if (mode == X86_DIRFLAG_RESET) +- emit_insn (gen_cld ()); +- break; +- case AVX_U128: +- if (mode == AVX_U128_CLEAN) +- emit_insn (gen_avx_vzeroupper ()); +- break; +- case I387_TRUNC: +- case I387_FLOOR: +- case I387_CEIL: +- if (mode != I387_CW_ANY +- && mode != I387_CW_UNINITIALIZED) +- emit_i387_cw_initialization (mode); +- break; ++ case GT: ++ return GTU; ++ case GE: ++ return GEU; ++ case ORDERED: ++ case UNORDERED: ++ return code; ++ case UNEQ: ++ return EQ; ++ case UNLT: ++ return LTU; ++ case UNLE: ++ return LEU; ++ case LTGT: ++ return NE; + default: +- gcc_unreachable (); ++ return UNKNOWN; + } + } + +-/* Output code for INSN to convert a float to a signed int. OPERANDS +- are the insn operands. The output may be [HSD]Imode and the input +- operand may be [SDX]Fmode. */ +- +-const char * +-output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp) ++/* Zero extend possibly SImode EXP to Pmode register. */ ++rtx ++ix86_zero_extend_to_Pmode (rtx exp) + { +- bool stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG); +- bool dimode_p = GET_MODE (operands[0]) == DImode; +- int round_mode = get_attr_i387_cw (insn); +- +- static char buf[40]; +- const char *p; +- +- /* Jump through a hoop or two for DImode, since the hardware has no +- non-popping instruction. We used to do this a different way, but +- that was somewhat fragile and broke with post-reload splitters. */ +- if ((dimode_p || fisttp) && !stack_top_dies) +- output_asm_insn ("fld\t%y1", operands); +- +- gcc_assert (STACK_TOP_P (operands[1])); +- gcc_assert (MEM_P (operands[0])); +- gcc_assert (GET_MODE (operands[1]) != TFmode); +- +- if (fisttp) +- return "fisttp%Z0\t%0"; +- +- strcpy (buf, "fist"); ++ return force_reg (Pmode, convert_to_mode (Pmode, exp, 1)); ++} + +- if (round_mode != I387_CW_ANY) +- output_asm_insn ("fldcw\t%3", operands); ++/* Return true if the function being called was marked with attribute ++ "noplt" or using -fno-plt and we are compiling for non-PIC. We need ++ to handle the non-PIC case in the backend because there is no easy ++ interface for the front-end to force non-PLT calls to use the GOT. ++ This is currently used only with 64-bit or 32-bit GOT32X ELF targets ++ to call the function marked "noplt" indirectly. */ + +- p = "p%Z0\t%0"; +- strcat (buf, p + !(stack_top_dies || dimode_p)); ++static bool ++ix86_nopic_noplt_attribute_p (rtx call_op) ++{ ++ if (flag_pic || ix86_cmodel == CM_LARGE ++ || !(TARGET_64BIT || HAVE_AS_IX86_GOT32X) ++ || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF ++ || SYMBOL_REF_LOCAL_P (call_op)) ++ return false; + +- output_asm_insn (buf, operands); ++ tree symbol_decl = SYMBOL_REF_DECL (call_op); + +- if (round_mode != I387_CW_ANY) +- output_asm_insn ("fldcw\t%2", operands); ++ if (!flag_plt ++ || (symbol_decl != NULL_TREE ++ && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl)))) ++ return true; + +- return ""; ++ return false; + } + +-/* Output code for x87 ffreep insn. The OPNO argument, which may only +- have the values zero or one, indicates the ffreep insn's operand +- from the OPERANDS array. */ +- +-static const char * +-output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno) ++/* Helper to output the jmp/call. */ ++static void ++ix86_output_jmp_thunk_or_indirect (const char *thunk_name, const int regno) + { +- if (TARGET_USE_FFREEP) +-#ifdef HAVE_AS_IX86_FFREEP +- return opno ? "ffreep\t%y1" : "ffreep\t%y0"; +-#else ++ if (thunk_name != NULL) + { +- static char retval[32]; +- int regno = REGNO (operands[opno]); +- +- gcc_assert (STACK_REGNO_P (regno)); +- +- regno -= FIRST_STACK_REG; +- +- snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno); +- return retval; ++ fprintf (asm_out_file, "\tjmp\t"); ++ assemble_name (asm_out_file, thunk_name); ++ putc ('\n', asm_out_file); + } +-#endif +- +- return opno ? "fstp\t%y1" : "fstp\t%y0"; ++ else ++ output_indirect_thunk (regno); + } + ++/* Output indirect branch via a call and return thunk. CALL_OP is a ++ register which contains the branch target. XASM is the assembly ++ template for CALL_OP. Branch is a tail call if SIBCALL_P is true. ++ A normal call is converted to: + +-/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi +- should be used. UNORDERED_P is true when fucom should be used. */ +- +-const char * +-output_fp_compare (rtx_insn *insn, rtx *operands, +- bool eflags_p, bool unordered_p) +-{ +- rtx *xops = eflags_p ? &operands[0] : &operands[1]; +- bool stack_top_dies; ++ call __x86_indirect_thunk_reg + +- static char buf[40]; +- const char *p; ++ and a tail call is converted to: + +- gcc_assert (STACK_TOP_P (xops[0])); ++ jmp __x86_indirect_thunk_reg ++ */ + +- stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG); ++static void ++ix86_output_indirect_branch_via_reg (rtx call_op, bool sibcall_p) ++{ ++ char thunk_name_buf[32]; ++ char *thunk_name; ++ enum indirect_thunk_prefix need_prefix ++ = indirect_thunk_need_prefix (current_output_insn); ++ int regno = REGNO (call_op); + +- if (eflags_p) ++ if (cfun->machine->indirect_branch_type ++ != indirect_branch_thunk_inline) + { +- p = unordered_p ? "fucomi" : "fcomi"; +- strcpy (buf, p); +- +- p = "p\t{%y1, %0|%0, %y1}"; +- strcat (buf, p + !stack_top_dies); +- +- return buf; ++ if (cfun->machine->indirect_branch_type == indirect_branch_thunk) ++ { ++ int i = regno; ++ if (i >= FIRST_REX_INT_REG) ++ i -= (FIRST_REX_INT_REG - LAST_INT_REG - 1); ++ indirect_thunks_used |= 1 << i; ++ } ++ indirect_thunk_name (thunk_name_buf, regno, need_prefix, false); ++ thunk_name = thunk_name_buf; + } ++ else ++ thunk_name = NULL; + +- if (STACK_REG_P (xops[1]) +- && stack_top_dies +- && find_regno_note (insn, REG_DEAD, FIRST_STACK_REG + 1)) +- { +- gcc_assert (REGNO (xops[1]) == FIRST_STACK_REG + 1); +- +- /* If both the top of the 387 stack die, and the other operand +- is also a stack register that dies, then this must be a +- `fcompp' float compare. */ +- p = unordered_p ? "fucompp" : "fcompp"; +- strcpy (buf, p); +- } +- else if (const0_operand (xops[1], VOIDmode)) +- { +- gcc_assert (!unordered_p); +- strcpy (buf, "ftst"); +- } ++ if (sibcall_p) ++ ix86_output_jmp_thunk_or_indirect (thunk_name, regno); + else + { +- if (GET_MODE_CLASS (GET_MODE (xops[1])) == MODE_INT) ++ if (thunk_name != NULL) + { +- gcc_assert (!unordered_p); +- p = "ficom"; ++ fprintf (asm_out_file, "\tcall\t"); ++ assemble_name (asm_out_file, thunk_name); ++ putc ('\n', asm_out_file); ++ return; + } +- else +- p = unordered_p ? "fucom" : "fcom"; +- +- strcpy (buf, p); + +- p = "p%Z2\t%y2"; +- strcat (buf, p + !stack_top_dies); +- } ++ char indirectlabel1[32]; ++ char indirectlabel2[32]; + +- output_asm_insn (buf, operands); +- return "fnstsw\t%0"; +-} ++ ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, ++ INDIRECT_LABEL, ++ indirectlabelno++); ++ ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, ++ INDIRECT_LABEL, ++ indirectlabelno++); + +-void +-ix86_output_addr_vec_elt (FILE *file, int value) +-{ +- const char *directive = ASM_LONG; ++ /* Jump. */ ++ fputs ("\tjmp\t", asm_out_file); ++ assemble_name_raw (asm_out_file, indirectlabel2); ++ fputc ('\n', asm_out_file); + +-#ifdef ASM_QUAD +- if (TARGET_LP64) +- directive = ASM_QUAD; +-#else +- gcc_assert (!TARGET_64BIT); +-#endif ++ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1); + +- fprintf (file, "%s%s%d\n", directive, LPREFIX, value); +-} ++ ix86_output_jmp_thunk_or_indirect (thunk_name, regno); + +-void +-ix86_output_addr_diff_elt (FILE *file, int value, int rel) +-{ +- const char *directive = ASM_LONG; ++ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2); + +-#ifdef ASM_QUAD +- if (TARGET_64BIT && CASE_VECTOR_MODE == DImode) +- directive = ASM_QUAD; +-#else +- gcc_assert (!TARGET_64BIT); +-#endif +- /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */ +- if (TARGET_64BIT || TARGET_VXWORKS_RTP) +- fprintf (file, "%s%s%d-%s%d\n", +- directive, LPREFIX, value, LPREFIX, rel); +-#if TARGET_MACHO +- else if (TARGET_MACHO) +- { +- fprintf (file, ASM_LONG "%s%d-", LPREFIX, value); +- machopic_output_function_base_name (file); +- putc ('\n', file); ++ /* Call. */ ++ fputs ("\tcall\t", asm_out_file); ++ assemble_name_raw (asm_out_file, indirectlabel1); ++ fputc ('\n', asm_out_file); + } +-#endif +- else if (HAVE_AS_GOTOFF_IN_DATA) +- fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value); +- else +- asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n", +- GOT_SYMBOL_NAME, LPREFIX, value); + } +- +-/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate +- for the target. */ +- +-void +-ix86_expand_clear (rtx dest) +-{ +- rtx tmp; + +- /* We play register width games, which are only valid after reload. */ +- gcc_assert (reload_completed); ++/* Output indirect branch via a call and return thunk. CALL_OP is ++ the branch target. XASM is the assembly template for CALL_OP. ++ Branch is a tail call if SIBCALL_P is true. A normal call is ++ converted to: + +- /* Avoid HImode and its attendant prefix byte. */ +- if (GET_MODE_SIZE (GET_MODE (dest)) < 4) +- dest = gen_rtx_REG (SImode, REGNO (dest)); +- tmp = gen_rtx_SET (dest, const0_rtx); ++ jmp L2 ++ L1: ++ push CALL_OP ++ jmp __x86_indirect_thunk ++ L2: ++ call L1 + +- if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ()) +- { +- rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); +- tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob)); +- } ++ and a tail call is converted to: + +- emit_insn (tmp); +-} ++ push CALL_OP ++ jmp __x86_indirect_thunk ++ */ + +-void +-ix86_expand_move (machine_mode mode, rtx operands[]) ++static void ++ix86_output_indirect_branch_via_push (rtx call_op, const char *xasm, ++ bool sibcall_p) + { +- rtx op0, op1; +- rtx tmp, addend = NULL_RTX; +- enum tls_model model; ++ char thunk_name_buf[32]; ++ char *thunk_name; ++ char push_buf[64]; ++ enum indirect_thunk_prefix need_prefix ++ = indirect_thunk_need_prefix (current_output_insn); ++ int regno = -1; ++ ++ if (cfun->machine->indirect_branch_type ++ != indirect_branch_thunk_inline) ++ { ++ if (cfun->machine->indirect_branch_type == indirect_branch_thunk) ++ indirect_thunk_needed = true; ++ indirect_thunk_name (thunk_name_buf, regno, need_prefix, false); ++ thunk_name = thunk_name_buf; ++ } ++ else ++ thunk_name = NULL; + +- op0 = operands[0]; +- op1 = operands[1]; ++ snprintf (push_buf, sizeof (push_buf), "push{%c}\t%s", ++ TARGET_64BIT ? 'q' : 'l', xasm); + +- switch (GET_CODE (op1)) ++ if (sibcall_p) + { +- case CONST: +- tmp = XEXP (op1, 0); ++ output_asm_insn (push_buf, &call_op); ++ ix86_output_jmp_thunk_or_indirect (thunk_name, regno); ++ } ++ else ++ { ++ char indirectlabel1[32]; ++ char indirectlabel2[32]; + +- if (GET_CODE (tmp) != PLUS +- || GET_CODE (XEXP (tmp, 0)) != SYMBOL_REF) +- break; ++ ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, ++ INDIRECT_LABEL, ++ indirectlabelno++); ++ ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, ++ INDIRECT_LABEL, ++ indirectlabelno++); + +- op1 = XEXP (tmp, 0); +- addend = XEXP (tmp, 1); +- /* FALLTHRU */ ++ /* Jump. */ ++ fputs ("\tjmp\t", asm_out_file); ++ assemble_name_raw (asm_out_file, indirectlabel2); ++ fputc ('\n', asm_out_file); + +- case SYMBOL_REF: +- model = SYMBOL_REF_TLS_MODEL (op1); ++ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1); + +- if (model) +- op1 = legitimize_tls_address (op1, model, true); +- else if (ix86_force_load_from_GOT_p (op1)) +- { +- /* Load the external function address via GOT slot to avoid PLT. */ +- op1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op1), +- (TARGET_64BIT +- ? UNSPEC_GOTPCREL +- : UNSPEC_GOT)); +- op1 = gen_rtx_CONST (Pmode, op1); +- op1 = gen_const_mem (Pmode, op1); +- set_mem_alias_set (op1, ix86_GOT_alias_set ()); +- } +- else ++ /* An external function may be called via GOT, instead of PLT. */ ++ if (MEM_P (call_op)) + { +- tmp = legitimize_pe_coff_symbol (op1, addend != NULL_RTX); +- if (tmp) +- { +- op1 = tmp; +- if (!addend) +- break; +- } +- else ++ struct ix86_address parts; ++ rtx addr = XEXP (call_op, 0); ++ if (ix86_decompose_address (addr, &parts) ++ && parts.base == stack_pointer_rtx) + { +- op1 = operands[1]; +- break; +- } +- } +- +- if (addend) +- { +- op1 = force_operand (op1, NULL_RTX); +- op1 = expand_simple_binop (Pmode, PLUS, op1, addend, +- op0, 1, OPTAB_DIRECT); +- } +- else +- op1 = force_operand (op1, op0); +- +- if (op1 == op0) +- return; +- +- op1 = convert_to_mode (mode, op1, 1); ++ /* Since call will adjust stack by -UNITS_PER_WORD, ++ we must convert "disp(stack, index, scale)" to ++ "disp+UNITS_PER_WORD(stack, index, scale)". */ ++ if (parts.index) ++ { ++ addr = gen_rtx_MULT (Pmode, parts.index, ++ GEN_INT (parts.scale)); ++ addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx, ++ addr); ++ } ++ else ++ addr = stack_pointer_rtx; + +- default: +- break; +- } ++ rtx disp; ++ if (parts.disp != NULL_RTX) ++ disp = plus_constant (Pmode, parts.disp, ++ UNITS_PER_WORD); ++ else ++ disp = GEN_INT (UNITS_PER_WORD); + +- if ((flag_pic || MACHOPIC_INDIRECT) +- && symbolic_operand (op1, mode)) +- { +- if (TARGET_MACHO && !TARGET_64BIT) +- { +-#if TARGET_MACHO +- /* dynamic-no-pic */ +- if (MACHOPIC_INDIRECT) +- { +- rtx temp = (op0 && REG_P (op0) && mode == Pmode) +- ? op0 : gen_reg_rtx (Pmode); +- op1 = machopic_indirect_data_reference (op1, temp); +- if (MACHOPIC_PURE) +- op1 = machopic_legitimize_pic_address (op1, mode, +- temp == op1 ? 0 : temp); +- } +- if (op0 != op1 && GET_CODE (op0) != MEM) +- { +- rtx insn = gen_rtx_SET (op0, op1); +- emit_insn (insn); +- return; +- } +- if (GET_CODE (op0) == MEM) +- op1 = force_reg (Pmode, op1); +- else +- { +- rtx temp = op0; +- if (GET_CODE (temp) != REG) +- temp = gen_reg_rtx (Pmode); +- temp = legitimize_pic_address (op1, temp); +- if (temp == op0) +- return; +- op1 = temp; +- } +- /* dynamic-no-pic */ +-#endif +- } +- else +- { +- if (MEM_P (op0)) +- op1 = force_reg (mode, op1); +- else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode))) +- { +- rtx reg = can_create_pseudo_p () ? NULL_RTX : op0; +- op1 = legitimize_pic_address (op1, reg); +- if (op0 == op1) +- return; +- op1 = convert_to_mode (mode, op1, 1); +- } +- } +- } +- else +- { +- if (MEM_P (op0) +- && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode) +- || !push_operand (op0, mode)) +- && MEM_P (op1)) +- op1 = force_reg (mode, op1); +- +- if (push_operand (op0, mode) +- && ! general_no_elim_operand (op1, mode)) +- op1 = copy_to_mode_reg (mode, op1); +- +- /* Force large constants in 64bit compilation into register +- to get them CSEed. */ +- if (can_create_pseudo_p () +- && (mode == DImode) && TARGET_64BIT +- && immediate_operand (op1, mode) +- && !x86_64_zext_immediate_operand (op1, VOIDmode) +- && !register_operand (op0, mode) +- && optimize) +- op1 = copy_to_mode_reg (mode, op1); +- +- if (can_create_pseudo_p () +- && CONST_DOUBLE_P (op1)) +- { +- /* If we are loading a floating point constant to a register, +- force the value to memory now, since we'll get better code +- out the back end. */ +- +- op1 = validize_mem (force_const_mem (mode, op1)); +- if (!register_operand (op0, mode)) +- { +- rtx temp = gen_reg_rtx (mode); +- emit_insn (gen_rtx_SET (temp, op1)); +- emit_move_insn (op0, temp); +- return; ++ addr = gen_rtx_PLUS (Pmode, addr, disp); ++ call_op = gen_rtx_MEM (GET_MODE (call_op), addr); + } + } +- } +- +- emit_insn (gen_rtx_SET (op0, op1)); +-} +- +-void +-ix86_expand_vector_move (machine_mode mode, rtx operands[]) +-{ +- rtx op0 = operands[0], op1 = operands[1]; +- /* Use GET_MODE_BITSIZE instead of GET_MODE_ALIGNMENT for IA MCU +- psABI since the biggest alignment is 4 byte for IA MCU psABI. */ +- unsigned int align = (TARGET_IAMCU +- ? GET_MODE_BITSIZE (mode) +- : GET_MODE_ALIGNMENT (mode)); +- +- if (push_operand (op0, VOIDmode)) +- op0 = emit_move_resolve_push (mode, op0); +- +- /* Force constants other than zero into memory. We do not know how +- the instructions used to build constants modify the upper 64 bits +- of the register, once we have that information we may be able +- to handle some of them more efficiently. */ +- if (can_create_pseudo_p () +- && (CONSTANT_P (op1) +- || (SUBREG_P (op1) +- && CONSTANT_P (SUBREG_REG (op1)))) +- && ((register_operand (op0, mode) +- && !standard_sse_constant_p (op1, mode)) +- /* ix86_expand_vector_move_misalign() does not like constants. */ +- || (SSE_REG_MODE_P (mode) +- && MEM_P (op0) +- && MEM_ALIGN (op0) < align))) +- { +- if (SUBREG_P (op1)) +- { +- machine_mode imode = GET_MODE (SUBREG_REG (op1)); +- rtx r = force_const_mem (imode, SUBREG_REG (op1)); +- if (r) +- r = validize_mem (r); +- else +- r = force_reg (imode, SUBREG_REG (op1)); +- op1 = simplify_gen_subreg (mode, r, imode, SUBREG_BYTE (op1)); +- } +- else +- op1 = validize_mem (force_const_mem (mode, op1)); +- } + +- /* We need to check memory alignment for SSE mode since attribute +- can make operands unaligned. */ +- if (can_create_pseudo_p () +- && SSE_REG_MODE_P (mode) +- && ((MEM_P (op0) && (MEM_ALIGN (op0) < align)) +- || (MEM_P (op1) && (MEM_ALIGN (op1) < align)))) +- { +- rtx tmp[2]; ++ output_asm_insn (push_buf, &call_op); + +- /* ix86_expand_vector_move_misalign() does not like both +- arguments in memory. */ +- if (!register_operand (op0, mode) +- && !register_operand (op1, mode)) +- op1 = force_reg (mode, op1); ++ ix86_output_jmp_thunk_or_indirect (thunk_name, regno); + +- tmp[0] = op0; tmp[1] = op1; +- ix86_expand_vector_move_misalign (mode, tmp); +- return; +- } ++ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2); + +- /* Make operand1 a register if it isn't already. */ +- if (can_create_pseudo_p () +- && !register_operand (op0, mode) +- && !register_operand (op1, mode)) +- { +- emit_move_insn (op0, force_reg (GET_MODE (op0), op1)); +- return; ++ /* Call. */ ++ fputs ("\tcall\t", asm_out_file); ++ assemble_name_raw (asm_out_file, indirectlabel1); ++ fputc ('\n', asm_out_file); + } +- +- emit_insn (gen_rtx_SET (op0, op1)); + } + +-/* Split 32-byte AVX unaligned load and store if needed. */ ++/* Output indirect branch via a call and return thunk. CALL_OP is ++ the branch target. XASM is the assembly template for CALL_OP. ++ Branch is a tail call if SIBCALL_P is true. */ + + static void +-ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1) ++ix86_output_indirect_branch (rtx call_op, const char *xasm, ++ bool sibcall_p) + { +- rtx m; +- rtx (*extract) (rtx, rtx, rtx); +- machine_mode mode; ++ if (REG_P (call_op)) ++ ix86_output_indirect_branch_via_reg (call_op, sibcall_p); ++ else ++ ix86_output_indirect_branch_via_push (call_op, xasm, sibcall_p); ++} ++ ++/* Output indirect jump. CALL_OP is the jump target. */ + +- if ((MEM_P (op1) && !TARGET_AVX256_SPLIT_UNALIGNED_LOAD) +- || (MEM_P (op0) && !TARGET_AVX256_SPLIT_UNALIGNED_STORE)) ++const char * ++ix86_output_indirect_jmp (rtx call_op) ++{ ++ if (cfun->machine->indirect_branch_type != indirect_branch_keep) + { +- emit_insn (gen_rtx_SET (op0, op1)); +- return; ++ /* We can't have red-zone since "call" in the indirect thunk ++ pushes the return address onto stack, destroying red-zone. */ ++ if (ix86_red_zone_size != 0) ++ gcc_unreachable (); ++ ++ ix86_output_indirect_branch (call_op, "%0", true); ++ return ""; + } ++ else ++ return "%!jmp\t%A0"; ++} + +- rtx orig_op0 = NULL_RTX; +- mode = GET_MODE (op0); +- switch (GET_MODE_CLASS (mode)) ++/* Output return instrumentation for current function if needed. */ ++ ++static void ++output_return_instrumentation (void) ++{ ++ if (ix86_instrument_return != instrument_return_none ++ && flag_fentry ++ && !DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (cfun->decl)) + { +- case MODE_VECTOR_INT: +- case MODE_INT: +- if (mode != V32QImode) ++ if (ix86_flag_record_return) ++ fprintf (asm_out_file, "1:\n"); ++ switch (ix86_instrument_return) + { +- if (!MEM_P (op0)) +- { +- orig_op0 = op0; +- op0 = gen_reg_rtx (V32QImode); +- } +- else +- op0 = gen_lowpart (V32QImode, op0); +- op1 = gen_lowpart (V32QImode, op1); +- mode = V32QImode; ++ case instrument_return_call: ++ fprintf (asm_out_file, "\tcall\t__return__\n"); ++ break; ++ case instrument_return_nop5: ++ /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */ ++ fprintf (asm_out_file, ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n"); ++ break; ++ case instrument_return_none: ++ break; + } +- break; +- case MODE_VECTOR_FLOAT: +- break; +- default: +- gcc_unreachable (); +- } +- +- switch (mode) +- { +- default: +- gcc_unreachable (); +- case E_V32QImode: +- extract = gen_avx_vextractf128v32qi; +- mode = V16QImode; +- break; +- case E_V8SFmode: +- extract = gen_avx_vextractf128v8sf; +- mode = V4SFmode; +- break; +- case E_V4DFmode: +- extract = gen_avx_vextractf128v4df; +- mode = V2DFmode; +- break; +- } + +- if (MEM_P (op1)) +- { +- rtx r = gen_reg_rtx (mode); +- m = adjust_address (op1, mode, 0); +- emit_move_insn (r, m); +- m = adjust_address (op1, mode, 16); +- r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m); +- emit_move_insn (op0, r); +- } +- else if (MEM_P (op0)) +- { +- m = adjust_address (op0, mode, 0); +- emit_insn (extract (m, op1, const0_rtx)); +- m = adjust_address (op0, mode, 16); +- emit_insn (extract (m, copy_rtx (op1), const1_rtx)); ++ if (ix86_flag_record_return) ++ { ++ fprintf (asm_out_file, "\t.section __return_loc, \"a\",@progbits\n"); ++ fprintf (asm_out_file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long"); ++ fprintf (asm_out_file, "\t.previous\n"); ++ } + } +- else +- gcc_unreachable (); +- +- if (orig_op0) +- emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0)); + } + +-/* Implement the movmisalign patterns for SSE. Non-SSE modes go +- straight to ix86_expand_vector_move. */ +-/* Code generation for scalar reg-reg moves of single and double precision data: +- if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true) +- movaps reg, reg +- else +- movss reg, reg +- if (x86_sse_partial_reg_dependency == true) +- movapd reg, reg +- else +- movsd reg, reg +- +- Code generation for scalar loads of double precision data: +- if (x86_sse_split_regs == true) +- movlpd mem, reg (gas syntax) +- else +- movsd mem, reg +- +- Code generation for unaligned packed loads of single precision data +- (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency): +- if (x86_sse_unaligned_move_optimal) +- movups mem, reg +- +- if (x86_sse_partial_reg_dependency == true) +- { +- xorps reg, reg +- movlps mem, reg +- movhps mem+8, reg +- } +- else +- { +- movlps mem, reg +- movhps mem+8, reg +- } +- +- Code generation for unaligned packed loads of double precision data +- (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs): +- if (x86_sse_unaligned_move_optimal) +- movupd mem, reg +- +- if (x86_sse_split_regs == true) +- { +- movlpd mem, reg +- movhpd mem+8, reg +- } +- else +- { +- movsd mem, reg +- movhpd mem+8, reg +- } +- */ ++/* Output function return. CALL_OP is the jump target. Add a REP ++ prefix to RET if LONG_P is true and function return is kept. */ + +-void +-ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[]) ++const char * ++ix86_output_function_return (bool long_p) + { +- rtx op0, op1, m; +- +- op0 = operands[0]; +- op1 = operands[1]; ++ output_return_instrumentation (); + +- /* Use unaligned load/store for AVX512 or when optimizing for size. */ +- if (GET_MODE_SIZE (mode) == 64 || optimize_insn_for_size_p ()) ++ if (cfun->machine->function_return_type != indirect_branch_keep) + { +- emit_insn (gen_rtx_SET (op0, op1)); +- return; +- } ++ char thunk_name[32]; ++ enum indirect_thunk_prefix need_prefix ++ = indirect_thunk_need_prefix (current_output_insn); + +- if (TARGET_AVX) +- { +- if (GET_MODE_SIZE (mode) == 32) +- ix86_avx256_split_vector_move_misalign (op0, op1); ++ if (cfun->machine->function_return_type ++ != indirect_branch_thunk_inline) ++ { ++ bool need_thunk = (cfun->machine->function_return_type ++ == indirect_branch_thunk); ++ indirect_thunk_name (thunk_name, INVALID_REGNUM, need_prefix, ++ true); ++ indirect_return_needed |= need_thunk; ++ fprintf (asm_out_file, "\tjmp\t"); ++ assemble_name (asm_out_file, thunk_name); ++ putc ('\n', asm_out_file); ++ } + else +- /* Always use 128-bit mov_internal pattern for AVX. */ +- emit_insn (gen_rtx_SET (op0, op1)); +- return; +- } +- +- if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL +- || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) +- { +- emit_insn (gen_rtx_SET (op0, op1)); +- return; +- } ++ output_indirect_thunk (INVALID_REGNUM); + +- /* ??? If we have typed data, then it would appear that using +- movdqu is the only way to get unaligned data loaded with +- integer type. */ +- if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT) +- { +- emit_insn (gen_rtx_SET (op0, op1)); +- return; ++ return ""; + } + +- if (MEM_P (op1)) +- { +- if (TARGET_SSE2 && mode == V2DFmode) +- { +- rtx zero; +- +- /* When SSE registers are split into halves, we can avoid +- writing to the top half twice. */ +- if (TARGET_SSE_SPLIT_REGS) +- { +- emit_clobber (op0); +- zero = op0; +- } +- else +- { +- /* ??? Not sure about the best option for the Intel chips. +- The following would seem to satisfy; the register is +- entirely cleared, breaking the dependency chain. We +- then store to the upper half, with a dependency depth +- of one. A rumor has it that Intel recommends two movsd +- followed by an unpacklpd, but this is unconfirmed. And +- given that the dependency depth of the unpacklpd would +- still be one, I'm not sure why this would be better. */ +- zero = CONST0_RTX (V2DFmode); +- } ++ if (!long_p) ++ return "%!ret"; + +- m = adjust_address (op1, DFmode, 0); +- emit_insn (gen_sse2_loadlpd (op0, zero, m)); +- m = adjust_address (op1, DFmode, 8); +- emit_insn (gen_sse2_loadhpd (op0, op0, m)); +- } +- else +- { +- rtx t; ++ return "rep%; ret"; ++} + +- if (mode != V4SFmode) +- t = gen_reg_rtx (V4SFmode); +- else +- t = op0; +- +- if (TARGET_SSE_PARTIAL_REG_DEPENDENCY) +- emit_move_insn (t, CONST0_RTX (V4SFmode)); +- else +- emit_clobber (t); ++/* Output indirect function return. RET_OP is the function return ++ target. */ + +- m = adjust_address (op1, V2SFmode, 0); +- emit_insn (gen_sse_loadlps (t, t, m)); +- m = adjust_address (op1, V2SFmode, 8); +- emit_insn (gen_sse_loadhps (t, t, m)); +- if (mode != V4SFmode) +- emit_move_insn (op0, gen_lowpart (mode, t)); +- } +- } +- else if (MEM_P (op0)) ++const char * ++ix86_output_indirect_function_return (rtx ret_op) ++{ ++ if (cfun->machine->function_return_type != indirect_branch_keep) + { +- if (TARGET_SSE2 && mode == V2DFmode) ++ char thunk_name[32]; ++ enum indirect_thunk_prefix need_prefix ++ = indirect_thunk_need_prefix (current_output_insn); ++ unsigned int regno = REGNO (ret_op); ++ gcc_assert (regno == CX_REG); ++ ++ if (cfun->machine->function_return_type ++ != indirect_branch_thunk_inline) + { +- m = adjust_address (op0, DFmode, 0); +- emit_insn (gen_sse2_storelpd (m, op1)); +- m = adjust_address (op0, DFmode, 8); +- emit_insn (gen_sse2_storehpd (m, op1)); ++ bool need_thunk = (cfun->machine->function_return_type ++ == indirect_branch_thunk); ++ indirect_thunk_name (thunk_name, regno, need_prefix, true); ++ ++ if (need_thunk) ++ { ++ indirect_return_via_cx = true; ++ indirect_thunks_used |= 1 << CX_REG; ++ } ++ fprintf (asm_out_file, "\tjmp\t"); ++ assemble_name (asm_out_file, thunk_name); ++ putc ('\n', asm_out_file); + } + else +- { +- if (mode != V4SFmode) +- op1 = gen_lowpart (V4SFmode, op1); ++ output_indirect_thunk (regno); + +- m = adjust_address (op0, V2SFmode, 0); +- emit_insn (gen_sse_storelps (m, op1)); +- m = adjust_address (op0, V2SFmode, 8); +- emit_insn (gen_sse_storehps (m, copy_rtx (op1))); +- } ++ return ""; + } + else +- gcc_unreachable (); ++ return "%!jmp\t%A0"; + } + +-/* Helper function of ix86_fixup_binary_operands to canonicalize +- operand order. Returns true if the operands should be swapped. */ ++/* Output the assembly for a call instruction. */ + +-static bool +-ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode, +- rtx operands[]) ++const char * ++ix86_output_call_insn (rtx_insn *insn, rtx call_op) + { +- rtx dst = operands[0]; +- rtx src1 = operands[1]; +- rtx src2 = operands[2]; +- +- /* If the operation is not commutative, we can't do anything. */ +- if (GET_RTX_CLASS (code) != RTX_COMM_ARITH +- && GET_RTX_CLASS (code) != RTX_COMM_COMPARE) +- return false; +- +- /* Highest priority is that src1 should match dst. */ +- if (rtx_equal_p (dst, src1)) +- return false; +- if (rtx_equal_p (dst, src2)) +- return true; +- +- /* Next highest priority is that immediate constants come second. */ +- if (immediate_operand (src2, mode)) +- return false; +- if (immediate_operand (src1, mode)) +- return true; +- +- /* Lowest priority is that memory references should come second. */ +- if (MEM_P (src2)) +- return false; +- if (MEM_P (src1)) +- return true; ++ bool direct_p = constant_call_address_operand (call_op, VOIDmode); ++ bool output_indirect_p ++ = (!TARGET_SEH ++ && cfun->machine->indirect_branch_type != indirect_branch_keep); ++ bool seh_nop_p = false; ++ const char *xasm; + +- return false; +-} ++ if (SIBLING_CALL_P (insn)) ++ { ++ output_return_instrumentation (); ++ if (direct_p) ++ { ++ if (ix86_nopic_noplt_attribute_p (call_op)) ++ { ++ direct_p = false; ++ if (TARGET_64BIT) ++ { ++ if (output_indirect_p) ++ xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}"; ++ else ++ xasm = "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}"; ++ } ++ else ++ { ++ if (output_indirect_p) ++ xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}"; ++ else ++ xasm = "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}"; ++ } ++ } ++ else ++ xasm = "%!jmp\t%P0"; ++ } ++ /* SEH epilogue detection requires the indirect branch case ++ to include REX.W. */ ++ else if (TARGET_SEH) ++ xasm = "%!rex.W jmp\t%A0"; ++ else ++ { ++ if (output_indirect_p) ++ xasm = "%0"; ++ else ++ xasm = "%!jmp\t%A0"; ++ } + ++ if (output_indirect_p && !direct_p) ++ ix86_output_indirect_branch (call_op, xasm, true); ++ else ++ output_asm_insn (xasm, &call_op); ++ return ""; ++ } + +-/* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the +- destination to use for the operation. If different from the true +- destination in operands[0], a copy operation will be required. */ ++ /* SEH unwinding can require an extra nop to be emitted in several ++ circumstances. Determine if we have one of those. */ ++ if (TARGET_SEH) ++ { ++ rtx_insn *i; + +-rtx +-ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode, +- rtx operands[]) +-{ +- rtx dst = operands[0]; +- rtx src1 = operands[1]; +- rtx src2 = operands[2]; ++ for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i)) ++ { ++ /* Prevent a catch region from being adjacent to a jump that would ++ be interpreted as an epilogue sequence by the unwinder. */ ++ if (JUMP_P(i) && CROSSING_JUMP_P (i)) ++ { ++ seh_nop_p = true; ++ break; ++ } ++ ++ /* If we get to another real insn, we don't need the nop. */ ++ if (INSN_P (i)) ++ break; + +- /* Canonicalize operand order. */ +- if (ix86_swap_binary_operands_p (code, mode, operands)) +- { +- /* It is invalid to swap operands of different modes. */ +- gcc_assert (GET_MODE (src1) == GET_MODE (src2)); ++ /* If we get to the epilogue note, prevent a catch region from ++ being adjacent to the standard epilogue sequence. If non- ++ call-exceptions, we'll have done this during epilogue emission. */ ++ if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG ++ && !flag_non_call_exceptions ++ && !can_throw_internal (insn)) ++ { ++ seh_nop_p = true; ++ break; ++ } ++ } + +- std::swap (src1, src2); ++ /* If we didn't find a real insn following the call, prevent the ++ unwinder from looking into the next function. */ ++ if (i == NULL) ++ seh_nop_p = true; + } + +- /* Both source operands cannot be in memory. */ +- if (MEM_P (src1) && MEM_P (src2)) ++ if (direct_p) + { +- /* Optimization: Only read from memory once. */ +- if (rtx_equal_p (src1, src2)) ++ if (ix86_nopic_noplt_attribute_p (call_op)) + { +- src2 = force_reg (mode, src2); +- src1 = src2; ++ direct_p = false; ++ if (TARGET_64BIT) ++ { ++ if (output_indirect_p) ++ xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}"; ++ else ++ xasm = "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}"; ++ } ++ else ++ { ++ if (output_indirect_p) ++ xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}"; ++ else ++ xasm = "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}"; ++ } + } +- else if (rtx_equal_p (dst, src1)) +- src2 = force_reg (mode, src2); + else +- src1 = force_reg (mode, src1); ++ xasm = "%!call\t%P0"; ++ } ++ else ++ { ++ if (output_indirect_p) ++ xasm = "%0"; ++ else ++ xasm = "%!call\t%A0"; + } + +- /* If the destination is memory, and we do not have matching source +- operands, do things in registers. */ +- if (MEM_P (dst) && !rtx_equal_p (dst, src1)) +- dst = gen_reg_rtx (mode); +- +- /* Source 1 cannot be a constant. */ +- if (CONSTANT_P (src1)) +- src1 = force_reg (mode, src1); +- +- /* Source 1 cannot be a non-matching memory. */ +- if (MEM_P (src1) && !rtx_equal_p (dst, src1)) +- src1 = force_reg (mode, src1); +- +- /* Improve address combine. */ +- if (code == PLUS +- && GET_MODE_CLASS (mode) == MODE_INT +- && MEM_P (src2)) +- src2 = force_reg (mode, src2); +- +- operands[1] = src1; +- operands[2] = src2; +- return dst; +-} ++ if (output_indirect_p && !direct_p) ++ ix86_output_indirect_branch (call_op, xasm, false); ++ else ++ output_asm_insn (xasm, &call_op); + +-/* Similarly, but assume that the destination has already been +- set up properly. */ ++ if (seh_nop_p) ++ return "nop"; + +-void +-ix86_fixup_binary_operands_no_copy (enum rtx_code code, +- machine_mode mode, rtx operands[]) +-{ +- rtx dst = ix86_fixup_binary_operands (code, mode, operands); +- gcc_assert (dst == operands[0]); ++ return ""; + } ++ ++/* Return a MEM corresponding to a stack slot with mode MODE. ++ Allocate a new slot if necessary. + +-/* Attempt to expand a binary operator. Make the expansion closer to the +- actual machine, then just general_operand, which will allow 3 separate +- memory references (one output, two input) in a single insn. */ ++ The RTL for a function can have several slots available: N is ++ which slot to use. */ + +-void +-ix86_expand_binary_operator (enum rtx_code code, machine_mode mode, +- rtx operands[]) ++rtx ++assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n) + { +- rtx src1, src2, dst, op, clob; +- +- dst = ix86_fixup_binary_operands (code, mode, operands); +- src1 = operands[1]; +- src2 = operands[2]; ++ struct stack_local_entry *s; + +- /* Emit the instruction. */ ++ gcc_assert (n < MAX_386_STACK_LOCALS); + +- op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, src1, src2)); ++ for (s = ix86_stack_locals; s; s = s->next) ++ if (s->mode == mode && s->n == n) ++ return validize_mem (copy_rtx (s->rtl)); + +- if (reload_completed +- && code == PLUS +- && !rtx_equal_p (dst, src1)) +- { +- /* This is going to be an LEA; avoid splitting it later. */ +- emit_insn (op); +- } +- else +- { +- clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); +- emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); +- } ++ s = ggc_alloc (); ++ s->n = n; ++ s->mode = mode; ++ s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0); + +- /* Fix up the destination if needed. */ +- if (dst != operands[0]) +- emit_move_insn (operands[0], dst); ++ s->next = ix86_stack_locals; ++ ix86_stack_locals = s; ++ return validize_mem (copy_rtx (s->rtl)); + } + +-/* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with +- the given OPERANDS. */ ++static void ++ix86_instantiate_decls (void) ++{ ++ struct stack_local_entry *s; + +-void +-ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode, +- rtx operands[]) +-{ +- rtx op1 = NULL_RTX, op2 = NULL_RTX; +- if (SUBREG_P (operands[1])) +- { +- op1 = operands[1]; +- op2 = operands[2]; +- } +- else if (SUBREG_P (operands[2])) +- { +- op1 = operands[2]; +- op2 = operands[1]; +- } +- /* Optimize (__m128i) d | (__m128i) e and similar code +- when d and e are float vectors into float vector logical +- insn. In C/C++ without using intrinsics there is no other way +- to express vector logical operation on float vectors than +- to cast them temporarily to integer vectors. */ +- if (op1 +- && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL +- && (SUBREG_P (op2) || GET_CODE (op2) == CONST_VECTOR) +- && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT +- && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode) +- && SUBREG_BYTE (op1) == 0 +- && (GET_CODE (op2) == CONST_VECTOR +- || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2)) +- && SUBREG_BYTE (op2) == 0)) +- && can_create_pseudo_p ()) +- { +- rtx dst; +- switch (GET_MODE (SUBREG_REG (op1))) +- { +- case E_V4SFmode: +- case E_V8SFmode: +- case E_V16SFmode: +- case E_V2DFmode: +- case E_V4DFmode: +- case E_V8DFmode: +- dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1))); +- if (GET_CODE (op2) == CONST_VECTOR) +- { +- op2 = gen_lowpart (GET_MODE (dst), op2); +- op2 = force_reg (GET_MODE (dst), op2); +- } +- else +- { +- op1 = operands[1]; +- op2 = SUBREG_REG (operands[2]); +- if (!vector_operand (op2, GET_MODE (dst))) +- op2 = force_reg (GET_MODE (dst), op2); +- } +- op1 = SUBREG_REG (op1); +- if (!vector_operand (op1, GET_MODE (dst))) +- op1 = force_reg (GET_MODE (dst), op1); +- emit_insn (gen_rtx_SET (dst, +- gen_rtx_fmt_ee (code, GET_MODE (dst), +- op1, op2))); +- emit_move_insn (operands[0], gen_lowpart (mode, dst)); +- return; +- default: +- break; +- } +- } +- if (!vector_operand (operands[1], mode)) +- operands[1] = force_reg (mode, operands[1]); +- if (!vector_operand (operands[2], mode)) +- operands[2] = force_reg (mode, operands[2]); +- ix86_fixup_binary_operands_no_copy (code, mode, operands); +- emit_insn (gen_rtx_SET (operands[0], +- gen_rtx_fmt_ee (code, mode, operands[1], +- operands[2]))); ++ for (s = ix86_stack_locals; s; s = s->next) ++ if (s->rtl != NULL_RTX) ++ instantiate_decl_rtl (s->rtl); + } +- +-/* Return TRUE or FALSE depending on whether the binary operator meets the +- appropriate constraints. */ ++ ++/* Check whether x86 address PARTS is a pc-relative address. */ + + bool +-ix86_binary_operator_ok (enum rtx_code code, machine_mode mode, +- rtx operands[3]) ++ix86_rip_relative_addr_p (struct ix86_address *parts) + { +- rtx dst = operands[0]; +- rtx src1 = operands[1]; +- rtx src2 = operands[2]; +- +- /* Both source operands cannot be in memory. */ +- if (MEM_P (src1) && MEM_P (src2)) +- return false; +- +- /* Canonicalize operand order for commutative operators. */ +- if (ix86_swap_binary_operands_p (code, mode, operands)) +- std::swap (src1, src2); ++ rtx base, index, disp; + +- /* If the destination is memory, we must have a matching source operand. */ +- if (MEM_P (dst) && !rtx_equal_p (dst, src1)) +- return false; ++ base = parts->base; ++ index = parts->index; ++ disp = parts->disp; + +- /* Source 1 cannot be a constant. */ +- if (CONSTANT_P (src1)) +- return false; ++ if (disp && !base && !index) ++ { ++ if (TARGET_64BIT) ++ { ++ rtx symbol = disp; + +- /* Source 1 cannot be a non-matching memory. */ +- if (MEM_P (src1) && !rtx_equal_p (dst, src1)) +- /* Support "andhi/andsi/anddi" as a zero-extending move. */ +- return (code == AND +- && (mode == HImode +- || mode == SImode +- || (TARGET_64BIT && mode == DImode)) +- && satisfies_constraint_L (src2)); ++ if (GET_CODE (disp) == CONST) ++ symbol = XEXP (disp, 0); ++ if (GET_CODE (symbol) == PLUS ++ && CONST_INT_P (XEXP (symbol, 1))) ++ symbol = XEXP (symbol, 0); + +- return true; ++ if (GET_CODE (symbol) == LABEL_REF ++ || (GET_CODE (symbol) == SYMBOL_REF ++ && SYMBOL_REF_TLS_MODEL (symbol) == 0) ++ || (GET_CODE (symbol) == UNSPEC ++ && (XINT (symbol, 1) == UNSPEC_GOTPCREL ++ || XINT (symbol, 1) == UNSPEC_PCREL ++ || XINT (symbol, 1) == UNSPEC_GOTNTPOFF))) ++ return true; ++ } ++ } ++ return false; + } + +-/* Attempt to expand a unary operator. Make the expansion closer to the +- actual machine, then just general_operand, which will allow 2 separate +- memory references (one output, one input) in a single insn. */ ++/* Calculate the length of the memory address in the instruction encoding. ++ Includes addr32 prefix, does not include the one-byte modrm, opcode, ++ or other prefixes. We never generate addr32 prefix for LEA insn. */ + +-void +-ix86_expand_unary_operator (enum rtx_code code, machine_mode mode, +- rtx operands[]) ++int ++memory_address_length (rtx addr, bool lea) + { +- bool matching_memory = false; +- rtx src, dst, op, clob; +- +- dst = operands[0]; +- src = operands[1]; ++ struct ix86_address parts; ++ rtx base, index, disp; ++ int len; ++ int ok; + +- /* If the destination is memory, and we do not have matching source +- operands, do things in registers. */ +- if (MEM_P (dst)) +- { +- if (rtx_equal_p (dst, src)) +- matching_memory = true; +- else +- dst = gen_reg_rtx (mode); +- } ++ if (GET_CODE (addr) == PRE_DEC ++ || GET_CODE (addr) == POST_INC ++ || GET_CODE (addr) == PRE_MODIFY ++ || GET_CODE (addr) == POST_MODIFY) ++ return 0; + +- /* When source operand is memory, destination must match. */ +- if (MEM_P (src) && !matching_memory) +- src = force_reg (mode, src); ++ ok = ix86_decompose_address (addr, &parts); ++ gcc_assert (ok); + +- /* Emit the instruction. */ ++ len = (parts.seg == ADDR_SPACE_GENERIC) ? 0 : 1; + +- op = gen_rtx_SET (dst, gen_rtx_fmt_e (code, mode, src)); ++ /* If this is not LEA instruction, add the length of addr32 prefix. */ ++ if (TARGET_64BIT && !lea ++ && (SImode_address_operand (addr, VOIDmode) ++ || (parts.base && GET_MODE (parts.base) == SImode) ++ || (parts.index && GET_MODE (parts.index) == SImode))) ++ len++; + +- if (code == NOT) +- emit_insn (op); +- else +- { +- clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); +- emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); +- } ++ base = parts.base; ++ index = parts.index; ++ disp = parts.disp; + +- /* Fix up the destination if needed. */ +- if (dst != operands[0]) +- emit_move_insn (operands[0], dst); +-} ++ if (base && SUBREG_P (base)) ++ base = SUBREG_REG (base); ++ if (index && SUBREG_P (index)) ++ index = SUBREG_REG (index); + +-/* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and +- divisor are within the range [0-255]. */ ++ gcc_assert (base == NULL_RTX || REG_P (base)); ++ gcc_assert (index == NULL_RTX || REG_P (index)); + +-void +-ix86_split_idivmod (machine_mode mode, rtx operands[], +- bool signed_p) +-{ +- rtx_code_label *end_label, *qimode_label; +- rtx div, mod; +- rtx_insn *insn; +- rtx scratch, tmp0, tmp1, tmp2; +- rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx); +- rtx (*gen_zero_extend) (rtx, rtx); +- rtx (*gen_test_ccno_1) (rtx, rtx); ++ /* Rule of thumb: ++ - esp as the base always wants an index, ++ - ebp as the base always wants a displacement, ++ - r12 as the base always wants an index, ++ - r13 as the base always wants a displacement. */ + +- switch (mode) ++ /* Register Indirect. */ ++ if (base && !index && !disp) + { +- case E_SImode: +- if (GET_MODE (operands[0]) == SImode) +- { +- if (GET_MODE (operands[1]) == SImode) +- gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1; +- else +- gen_divmod4_1 +- = signed_p ? gen_divmodsi4_zext_2 : gen_udivmodsi4_zext_2; +- gen_zero_extend = gen_zero_extendqisi2; +- } +- else +- { +- gen_divmod4_1 +- = signed_p ? gen_divmodsi4_zext_1 : gen_udivmodsi4_zext_1; +- gen_zero_extend = gen_zero_extendqidi2; +- } +- gen_test_ccno_1 = gen_testsi_ccno_1; +- break; +- case E_DImode: +- gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1; +- gen_test_ccno_1 = gen_testdi_ccno_1; +- gen_zero_extend = gen_zero_extendqidi2; +- break; +- default: +- gcc_unreachable (); ++ /* esp (for its index) and ebp (for its displacement) need ++ the two-byte modrm form. Similarly for r12 and r13 in 64-bit ++ code. */ ++ if (base == arg_pointer_rtx ++ || base == frame_pointer_rtx ++ || REGNO (base) == SP_REG ++ || REGNO (base) == BP_REG ++ || REGNO (base) == R12_REG ++ || REGNO (base) == R13_REG) ++ len++; + } + +- end_label = gen_label_rtx (); +- qimode_label = gen_label_rtx (); +- +- scratch = gen_reg_rtx (mode); +- +- /* Use 8bit unsigned divimod if dividend and divisor are within +- the range [0-255]. */ +- emit_move_insn (scratch, operands[2]); +- scratch = expand_simple_binop (mode, IOR, scratch, operands[3], +- scratch, 1, OPTAB_DIRECT); +- emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100))); +- tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG); +- tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx); +- tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0, +- gen_rtx_LABEL_REF (VOIDmode, qimode_label), +- pc_rtx); +- insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp0)); +- predict_jump (REG_BR_PROB_BASE * 50 / 100); +- JUMP_LABEL (insn) = qimode_label; +- +- /* Generate original signed/unsigned divimod. */ +- div = gen_divmod4_1 (operands[0], operands[1], +- operands[2], operands[3]); +- emit_insn (div); +- +- /* Branch to the end. */ +- emit_jump_insn (gen_jump (end_label)); +- emit_barrier (); +- +- /* Generate 8bit unsigned divide. */ +- emit_label (qimode_label); +- /* Don't use operands[0] for result of 8bit divide since not all +- registers support QImode ZERO_EXTRACT. */ +- tmp0 = lowpart_subreg (HImode, scratch, mode); +- tmp1 = lowpart_subreg (HImode, operands[2], mode); +- tmp2 = lowpart_subreg (QImode, operands[3], mode); +- emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2)); +- +- if (signed_p) ++ /* Direct Addressing. In 64-bit mode mod 00 r/m 5 ++ is not disp32, but disp32(%rip), so for disp32 ++ SIB byte is needed, unless print_operand_address ++ optimizes it into disp32(%rip) or (%rip) is implied ++ by UNSPEC. */ ++ else if (disp && !base && !index) + { +- div = gen_rtx_DIV (mode, operands[2], operands[3]); +- mod = gen_rtx_MOD (mode, operands[2], operands[3]); ++ len += 4; ++ if (!ix86_rip_relative_addr_p (&parts)) ++ len++; + } + else + { +- div = gen_rtx_UDIV (mode, operands[2], operands[3]); +- mod = gen_rtx_UMOD (mode, operands[2], operands[3]); +- } +- if (mode == SImode) +- { +- if (GET_MODE (operands[0]) != SImode) +- div = gen_rtx_ZERO_EXTEND (DImode, div); +- if (GET_MODE (operands[1]) != SImode) +- mod = gen_rtx_ZERO_EXTEND (DImode, mod); +- } ++ /* Find the length of the displacement constant. */ ++ if (disp) ++ { ++ if (base && satisfies_constraint_K (disp)) ++ len += 1; ++ else ++ len += 4; ++ } ++ /* ebp always wants a displacement. Similarly r13. */ ++ else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG)) ++ len++; + +- /* Extract remainder from AH. */ +- tmp1 = gen_rtx_ZERO_EXTRACT (GET_MODE (operands[1]), +- tmp0, GEN_INT (8), GEN_INT (8)); +- if (REG_P (operands[1])) +- insn = emit_move_insn (operands[1], tmp1); +- else +- { +- /* Need a new scratch register since the old one has result +- of 8bit divide. */ +- scratch = gen_reg_rtx (GET_MODE (operands[1])); +- emit_move_insn (scratch, tmp1); +- insn = emit_move_insn (operands[1], scratch); ++ /* An index requires the two-byte modrm form.... */ ++ if (index ++ /* ...like esp (or r12), which always wants an index. */ ++ || base == arg_pointer_rtx ++ || base == frame_pointer_rtx ++ || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG))) ++ len++; + } +- set_unique_reg_note (insn, REG_EQUAL, mod); + +- /* Zero extend quotient from AL. */ +- tmp1 = gen_lowpart (QImode, tmp0); +- insn = emit_insn (gen_zero_extend (operands[0], tmp1)); +- set_unique_reg_note (insn, REG_EQUAL, div); +- +- emit_label (end_label); ++ return len; + } + +-#define LEA_MAX_STALL (3) +-#define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1) ++/* Compute default value for "length_immediate" attribute. When SHORTFORM ++ is set, expect that insn have 8bit immediate alternative. */ ++int ++ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform) ++{ ++ int len = 0; ++ int i; ++ extract_insn_cached (insn); ++ for (i = recog_data.n_operands - 1; i >= 0; --i) ++ if (CONSTANT_P (recog_data.operand[i])) ++ { ++ enum attr_mode mode = get_attr_mode (insn); + +-/* Increase given DISTANCE in half-cycles according to +- dependencies between PREV and NEXT instructions. +- Add 1 half-cycle if there is no dependency and +- go to next cycle if there is some dependecy. */ ++ gcc_assert (!len); ++ if (shortform && CONST_INT_P (recog_data.operand[i])) ++ { ++ HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]); ++ switch (mode) ++ { ++ case MODE_QI: ++ len = 1; ++ continue; ++ case MODE_HI: ++ ival = trunc_int_for_mode (ival, HImode); ++ break; ++ case MODE_SI: ++ ival = trunc_int_for_mode (ival, SImode); ++ break; ++ default: ++ break; ++ } ++ if (IN_RANGE (ival, -128, 127)) ++ { ++ len = 1; ++ continue; ++ } ++ } ++ switch (mode) ++ { ++ case MODE_QI: ++ len = 1; ++ break; ++ case MODE_HI: ++ len = 2; ++ break; ++ case MODE_SI: ++ len = 4; ++ break; ++ /* Immediates for DImode instructions are encoded ++ as 32bit sign extended values. */ ++ case MODE_DI: ++ len = 4; ++ break; ++ default: ++ fatal_insn ("unknown insn mode", insn); ++ } ++ } ++ return len; ++} + +-static unsigned int +-increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance) ++/* Compute default value for "length_address" attribute. */ ++int ++ix86_attr_length_address_default (rtx_insn *insn) + { +- df_ref def, use; ++ int i; + +- if (!prev || !next) +- return distance + (distance & 1) + 2; ++ if (get_attr_type (insn) == TYPE_LEA) ++ { ++ rtx set = PATTERN (insn), addr; + +- if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev)) +- return distance + 1; ++ if (GET_CODE (set) == PARALLEL) ++ set = XVECEXP (set, 0, 0); + +- FOR_EACH_INSN_USE (use, next) +- FOR_EACH_INSN_DEF (def, prev) +- if (!DF_REF_IS_ARTIFICIAL (def) +- && DF_REF_REGNO (use) == DF_REF_REGNO (def)) +- return distance + (distance & 1) + 2; ++ gcc_assert (GET_CODE (set) == SET); + +- return distance + 1; +-} ++ addr = SET_SRC (set); + +-/* Function checks if instruction INSN defines register number +- REGNO1 or REGNO2. */ ++ return memory_address_length (addr, true); ++ } + +-static bool +-insn_defines_reg (unsigned int regno1, unsigned int regno2, +- rtx_insn *insn) +-{ +- df_ref def; ++ extract_insn_cached (insn); ++ for (i = recog_data.n_operands - 1; i >= 0; --i) ++ { ++ rtx op = recog_data.operand[i]; ++ if (MEM_P (op)) ++ { ++ constrain_operands_cached (insn, reload_completed); ++ if (which_alternative != -1) ++ { ++ const char *constraints = recog_data.constraints[i]; ++ int alt = which_alternative; + +- FOR_EACH_INSN_DEF (def, insn) +- if (DF_REF_REG_DEF_P (def) +- && !DF_REF_IS_ARTIFICIAL (def) +- && (regno1 == DF_REF_REGNO (def) +- || regno2 == DF_REF_REGNO (def))) +- return true; ++ while (*constraints == '=' || *constraints == '+') ++ constraints++; ++ while (alt-- > 0) ++ while (*constraints++ != ',') ++ ; ++ /* Skip ignored operands. */ ++ if (*constraints == 'X') ++ continue; ++ } + +- return false; ++ int len = memory_address_length (XEXP (op, 0), false); ++ ++ /* Account for segment prefix for non-default addr spaces. */ ++ if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op))) ++ len++; ++ ++ return len; ++ } ++ } ++ return 0; + } + +-/* Function checks if instruction INSN uses register number +- REGNO as a part of address expression. */ ++/* Compute default value for "length_vex" attribute. It includes ++ 2 or 3 byte VEX prefix and 1 opcode byte. */ + +-static bool +-insn_uses_reg_mem (unsigned int regno, rtx insn) ++int ++ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode, ++ bool has_vex_w) + { +- df_ref use; ++ int i; + +- FOR_EACH_INSN_USE (use, insn) +- if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use)) +- return true; ++ /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3 ++ byte VEX prefix. */ ++ if (!has_0f_opcode || has_vex_w) ++ return 3 + 1; + +- return false; +-} ++ /* We can always use 2 byte VEX prefix in 32bit. */ ++ if (!TARGET_64BIT) ++ return 2 + 1; + +-/* Search backward for non-agu definition of register number REGNO1 +- or register number REGNO2 in basic block starting from instruction +- START up to head of basic block or instruction INSN. ++ extract_insn_cached (insn); + +- Function puts true value into *FOUND var if definition was found +- and false otherwise. ++ for (i = recog_data.n_operands - 1; i >= 0; --i) ++ if (REG_P (recog_data.operand[i])) ++ { ++ /* REX.W bit uses 3 byte VEX prefix. */ ++ if (GET_MODE (recog_data.operand[i]) == DImode ++ && GENERAL_REG_P (recog_data.operand[i])) ++ return 3 + 1; ++ } ++ else ++ { ++ /* REX.X or REX.B bits use 3 byte VEX prefix. */ ++ if (MEM_P (recog_data.operand[i]) ++ && x86_extended_reg_mentioned_p (recog_data.operand[i])) ++ return 3 + 1; ++ } + +- Distance in half-cycles between START and found instruction or head +- of BB is added to DISTANCE and returned. */ ++ return 2 + 1; ++} ++ + +-static int +-distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2, +- rtx_insn *insn, int distance, +- rtx_insn *start, bool *found) +-{ +- basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL; +- rtx_insn *prev = start; +- rtx_insn *next = NULL; ++static bool ++ix86_class_likely_spilled_p (reg_class_t); + +- *found = false; ++/* Returns true if lhs of insn is HW function argument register and set up ++ is_spilled to true if it is likely spilled HW register. */ ++static bool ++insn_is_function_arg (rtx insn, bool* is_spilled) ++{ ++ rtx dst; + +- while (prev +- && prev != insn +- && distance < LEA_SEARCH_THRESHOLD) ++ if (!NONDEBUG_INSN_P (insn)) ++ return false; ++ /* Call instructions are not movable, ignore it. */ ++ if (CALL_P (insn)) ++ return false; ++ insn = PATTERN (insn); ++ if (GET_CODE (insn) == PARALLEL) ++ insn = XVECEXP (insn, 0, 0); ++ if (GET_CODE (insn) != SET) ++ return false; ++ dst = SET_DEST (insn); ++ if (REG_P (dst) && HARD_REGISTER_P (dst) ++ && ix86_function_arg_regno_p (REGNO (dst))) + { +- if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev)) +- { +- distance = increase_distance (prev, next, distance); +- if (insn_defines_reg (regno1, regno2, prev)) +- { +- if (recog_memoized (prev) < 0 +- || get_attr_type (prev) != TYPE_LEA) +- { +- *found = true; +- return distance; +- } +- } +- +- next = prev; +- } +- if (prev == BB_HEAD (bb)) +- break; +- +- prev = PREV_INSN (prev); ++ /* Is it likely spilled HW register? */ ++ if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst)) ++ && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst)))) ++ *is_spilled = true; ++ return true; + } +- +- return distance; ++ return false; + } + +-/* Search backward for non-agu definition of register number REGNO1 +- or register number REGNO2 in INSN's basic block until +- 1. Pass LEA_SEARCH_THRESHOLD instructions, or +- 2. Reach neighbor BBs boundary, or +- 3. Reach agu definition. +- Returns the distance between the non-agu definition point and INSN. +- If no definition point, returns -1. */ +- +-static int +-distance_non_agu_define (unsigned int regno1, unsigned int regno2, +- rtx_insn *insn) ++/* Add output dependencies for chain of function adjacent arguments if only ++ there is a move to likely spilled HW register. Return first argument ++ if at least one dependence was added or NULL otherwise. */ ++static rtx_insn * ++add_parameter_dependencies (rtx_insn *call, rtx_insn *head) + { +- basic_block bb = BLOCK_FOR_INSN (insn); +- int distance = 0; +- bool found = false; ++ rtx_insn *insn; ++ rtx_insn *last = call; ++ rtx_insn *first_arg = NULL; ++ bool is_spilled = false; + +- if (insn != BB_HEAD (bb)) +- distance = distance_non_agu_define_in_bb (regno1, regno2, insn, +- distance, PREV_INSN (insn), +- &found); ++ head = PREV_INSN (head); + +- if (!found && distance < LEA_SEARCH_THRESHOLD) ++ /* Find nearest to call argument passing instruction. */ ++ while (true) + { +- edge e; +- edge_iterator ei; +- bool simple_loop = false; +- +- FOR_EACH_EDGE (e, ei, bb->preds) +- if (e->src == bb) +- { +- simple_loop = true; +- break; +- } ++ last = PREV_INSN (last); ++ if (last == head) ++ return NULL; ++ if (!NONDEBUG_INSN_P (last)) ++ continue; ++ if (insn_is_function_arg (last, &is_spilled)) ++ break; ++ return NULL; ++ } + +- if (simple_loop) +- distance = distance_non_agu_define_in_bb (regno1, regno2, +- insn, distance, +- BB_END (bb), &found); +- else ++ first_arg = last; ++ while (true) ++ { ++ insn = PREV_INSN (last); ++ if (!INSN_P (insn)) ++ break; ++ if (insn == head) ++ break; ++ if (!NONDEBUG_INSN_P (insn)) + { +- int shortest_dist = -1; +- bool found_in_bb = false; +- +- FOR_EACH_EDGE (e, ei, bb->preds) +- { +- int bb_dist +- = distance_non_agu_define_in_bb (regno1, regno2, +- insn, distance, +- BB_END (e->src), +- &found_in_bb); +- if (found_in_bb) +- { +- if (shortest_dist < 0) +- shortest_dist = bb_dist; +- else if (bb_dist > 0) +- shortest_dist = MIN (bb_dist, shortest_dist); +- +- found = true; +- } +- } +- +- distance = shortest_dist; ++ last = insn; ++ continue; ++ } ++ if (insn_is_function_arg (insn, &is_spilled)) ++ { ++ /* Add output depdendence between two function arguments if chain ++ of output arguments contains likely spilled HW registers. */ ++ if (is_spilled) ++ add_dependence (first_arg, insn, REG_DEP_OUTPUT); ++ first_arg = last = insn; + } ++ else ++ break; + } +- +- /* get_attr_type may modify recog data. We want to make sure +- that recog data is valid for instruction INSN, on which +- distance_non_agu_define is called. INSN is unchanged here. */ +- extract_insn_cached (insn); +- +- if (!found) +- return -1; +- +- return distance >> 1; ++ if (!is_spilled) ++ return NULL; ++ return first_arg; + } + +-/* Return the distance in half-cycles between INSN and the next +- insn that uses register number REGNO in memory address added +- to DISTANCE. Return -1 if REGNO0 is set. +- +- Put true value into *FOUND if register usage was found and +- false otherwise. +- Put true value into *REDEFINED if register redefinition was +- found and false otherwise. */ +- +-static int +-distance_agu_use_in_bb (unsigned int regno, +- rtx_insn *insn, int distance, rtx_insn *start, +- bool *found, bool *redefined) ++/* Add output or anti dependency from insn to first_arg to restrict its code ++ motion. */ ++static void ++avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn) + { +- basic_block bb = NULL; +- rtx_insn *next = start; +- rtx_insn *prev = NULL; +- +- *found = false; +- *redefined = false; ++ rtx set; ++ rtx tmp; + +- if (start != NULL_RTX) ++ set = single_set (insn); ++ if (!set) ++ return; ++ tmp = SET_DEST (set); ++ if (REG_P (tmp)) + { +- bb = BLOCK_FOR_INSN (start); +- if (start != BB_HEAD (bb)) +- /* If insn and start belong to the same bb, set prev to insn, +- so the call to increase_distance will increase the distance +- between insns by 1. */ +- prev = insn; ++ /* Add output dependency to the first function argument. */ ++ add_dependence (first_arg, insn, REG_DEP_OUTPUT); ++ return; + } ++ /* Add anti dependency. */ ++ add_dependence (first_arg, insn, REG_DEP_ANTI); ++} + +- while (next +- && next != insn +- && distance < LEA_SEARCH_THRESHOLD) ++/* Avoid cross block motion of function argument through adding dependency ++ from the first non-jump instruction in bb. */ ++static void ++add_dependee_for_func_arg (rtx_insn *arg, basic_block bb) ++{ ++ rtx_insn *insn = BB_END (bb); ++ ++ while (insn) + { +- if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next)) ++ if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn)) + { +- distance = increase_distance(prev, next, distance); +- if (insn_uses_reg_mem (regno, next)) +- { +- /* Return DISTANCE if OP0 is used in memory +- address in NEXT. */ +- *found = true; +- return distance; +- } +- +- if (insn_defines_reg (regno, INVALID_REGNUM, next)) ++ rtx set = single_set (insn); ++ if (set) + { +- /* Return -1 if OP0 is set in NEXT. */ +- *redefined = true; +- return -1; ++ avoid_func_arg_motion (arg, insn); ++ return; + } +- +- prev = next; + } +- +- if (next == BB_END (bb)) +- break; +- +- next = NEXT_INSN (next); ++ if (insn == BB_HEAD (bb)) ++ return; ++ insn = PREV_INSN (insn); + } +- +- return distance; + } + +-/* Return the distance between INSN and the next insn that uses +- register number REGNO0 in memory address. Return -1 if no such +- a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */ ++/* Hook for pre-reload schedule - avoid motion of function arguments ++ passed in likely spilled HW registers. */ ++static void ++ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail) ++{ ++ rtx_insn *insn; ++ rtx_insn *first_arg = NULL; ++ if (reload_completed) ++ return; ++ while (head != tail && DEBUG_INSN_P (head)) ++ head = NEXT_INSN (head); ++ for (insn = tail; insn != head; insn = PREV_INSN (insn)) ++ if (INSN_P (insn) && CALL_P (insn)) ++ { ++ first_arg = add_parameter_dependencies (insn, head); ++ if (first_arg) ++ { ++ /* Add dependee for first argument to predecessors if only ++ region contains more than one block. */ ++ basic_block bb = BLOCK_FOR_INSN (insn); ++ int rgn = CONTAINING_RGN (bb->index); ++ int nr_blks = RGN_NR_BLOCKS (rgn); ++ /* Skip trivial regions and region head blocks that can have ++ predecessors outside of region. */ ++ if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0) ++ { ++ edge e; ++ edge_iterator ei; ++ ++ /* Regions are SCCs with the exception of selective ++ scheduling with pipelining of outer blocks enabled. ++ So also check that immediate predecessors of a non-head ++ block are in the same region. */ ++ FOR_EACH_EDGE (e, ei, bb->preds) ++ { ++ /* Avoid creating of loop-carried dependencies through ++ using topological ordering in the region. */ ++ if (rgn == CONTAINING_RGN (e->src->index) ++ && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index)) ++ add_dependee_for_func_arg (first_arg, e->src); ++ } ++ } ++ insn = first_arg; ++ if (insn == head) ++ break; ++ } ++ } ++ else if (first_arg) ++ avoid_func_arg_motion (first_arg, insn); ++} + ++/* Hook for pre-reload schedule - set priority of moves from likely spilled ++ HW registers to maximum, to schedule them at soon as possible. These are ++ moves from function argument registers at the top of the function entry ++ and moves from function return value registers after call. */ + static int +-distance_agu_use (unsigned int regno0, rtx_insn *insn) ++ix86_adjust_priority (rtx_insn *insn, int priority) + { +- basic_block bb = BLOCK_FOR_INSN (insn); +- int distance = 0; +- bool found = false; +- bool redefined = false; ++ rtx set; + +- if (insn != BB_END (bb)) +- distance = distance_agu_use_in_bb (regno0, insn, distance, +- NEXT_INSN (insn), +- &found, &redefined); ++ if (reload_completed) ++ return priority; + +- if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD) ++ if (!NONDEBUG_INSN_P (insn)) ++ return priority; ++ ++ set = single_set (insn); ++ if (set) + { +- edge e; +- edge_iterator ei; +- bool simple_loop = false; ++ rtx tmp = SET_SRC (set); ++ if (REG_P (tmp) ++ && HARD_REGISTER_P (tmp) ++ && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp)) ++ && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp)))) ++ return current_sched_info->sched_max_insns_priority; ++ } + +- FOR_EACH_EDGE (e, ei, bb->succs) +- if (e->dest == bb) +- { +- simple_loop = true; +- break; +- } ++ return priority; ++} + +- if (simple_loop) +- distance = distance_agu_use_in_bb (regno0, insn, +- distance, BB_HEAD (bb), +- &found, &redefined); +- else ++/* Prepare for scheduling pass. */ ++static void ++ix86_sched_init_global (FILE *, int, int) ++{ ++ /* Install scheduling hooks for current CPU. Some of these hooks are used ++ in time-critical parts of the scheduler, so we only set them up when ++ they are actually used. */ ++ switch (ix86_tune) ++ { ++ case PROCESSOR_CORE2: ++ case PROCESSOR_NEHALEM: ++ case PROCESSOR_SANDYBRIDGE: ++ case PROCESSOR_HASWELL: ++ case PROCESSOR_GENERIC: ++ /* Do not perform multipass scheduling for pre-reload schedule ++ to save compile time. */ ++ if (reload_completed) + { +- int shortest_dist = -1; +- bool found_in_bb = false; +- bool redefined_in_bb = false; +- +- FOR_EACH_EDGE (e, ei, bb->succs) +- { +- int bb_dist +- = distance_agu_use_in_bb (regno0, insn, +- distance, BB_HEAD (e->dest), +- &found_in_bb, &redefined_in_bb); +- if (found_in_bb) +- { +- if (shortest_dist < 0) +- shortest_dist = bb_dist; +- else if (bb_dist > 0) +- shortest_dist = MIN (bb_dist, shortest_dist); +- +- found = true; +- } +- } +- +- distance = shortest_dist; ++ ix86_core2i7_init_hooks (); ++ break; + } ++ /* Fall through. */ ++ default: ++ targetm.sched.dfa_post_advance_cycle = NULL; ++ targetm.sched.first_cycle_multipass_init = NULL; ++ targetm.sched.first_cycle_multipass_begin = NULL; ++ targetm.sched.first_cycle_multipass_issue = NULL; ++ targetm.sched.first_cycle_multipass_backtrack = NULL; ++ targetm.sched.first_cycle_multipass_end = NULL; ++ targetm.sched.first_cycle_multipass_fini = NULL; ++ break; + } +- +- if (!found || redefined) +- return -1; +- +- return distance >> 1; + } + +-/* Define this macro to tune LEA priority vs ADD, it take effect when +- there is a dilemma of choicing LEA or ADD +- Negative value: ADD is more preferred than LEA +- Zero: Netrual +- Positive value: LEA is more preferred than ADD*/ +-#define IX86_LEA_PRIORITY 0 +- +-/* Return true if usage of lea INSN has performance advantage +- over a sequence of instructions. Instructions sequence has +- SPLIT_COST cycles higher latency than lea latency. */ ++ ++/* Implement TARGET_STATIC_RTX_ALIGNMENT. */ + +-static bool +-ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1, +- unsigned int regno2, int split_cost, bool has_scale) ++static HOST_WIDE_INT ++ix86_static_rtx_alignment (machine_mode mode) + { +- int dist_define, dist_use; +- +- /* For Silvermont if using a 2-source or 3-source LEA for +- non-destructive destination purposes, or due to wanting +- ability to use SCALE, the use of LEA is justified. */ +- if (TARGET_SILVERMONT || TARGET_GOLDMONT || TARGET_GOLDMONT_PLUS +- || TARGET_TREMONT || TARGET_INTEL) +- { +- if (has_scale) +- return true; +- if (split_cost < 1) +- return false; +- if (regno0 == regno1 || regno0 == regno2) +- return false; +- return true; +- } ++ if (mode == DFmode) ++ return 64; ++ if (ALIGN_MODE_128 (mode)) ++ return MAX (128, GET_MODE_ALIGNMENT (mode)); ++ return GET_MODE_ALIGNMENT (mode); ++} + +- dist_define = distance_non_agu_define (regno1, regno2, insn); +- dist_use = distance_agu_use (regno0, insn); ++/* Implement TARGET_CONSTANT_ALIGNMENT. */ + +- if (dist_define < 0 || dist_define >= LEA_MAX_STALL) ++static HOST_WIDE_INT ++ix86_constant_alignment (const_tree exp, HOST_WIDE_INT align) ++{ ++ if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST ++ || TREE_CODE (exp) == INTEGER_CST) + { +- /* If there is no non AGU operand definition, no AGU +- operand usage and split cost is 0 then both lea +- and non lea variants have same priority. Currently +- we prefer lea for 64 bit code and non lea on 32 bit +- code. */ +- if (dist_use < 0 && split_cost == 0) +- return TARGET_64BIT || IX86_LEA_PRIORITY; +- else +- return true; ++ machine_mode mode = TYPE_MODE (TREE_TYPE (exp)); ++ HOST_WIDE_INT mode_align = ix86_static_rtx_alignment (mode); ++ return MAX (mode_align, align); + } ++ else if (!optimize_size && TREE_CODE (exp) == STRING_CST ++ && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD) ++ return BITS_PER_WORD; + +- /* With longer definitions distance lea is more preferable. +- Here we change it to take into account splitting cost and +- lea priority. */ +- dist_define += split_cost + IX86_LEA_PRIORITY; ++ return align; ++} + +- /* If there is no use in memory addess then we just check +- that split cost exceeds AGU stall. */ +- if (dist_use < 0) +- return dist_define > LEA_MAX_STALL; ++/* Implement TARGET_EMPTY_RECORD_P. */ + +- /* If this insn has both backward non-agu dependence and forward +- agu dependence, the one with short distance takes effect. */ +- return dist_define >= dist_use; ++static bool ++ix86_is_empty_record (const_tree type) ++{ ++ if (!TARGET_64BIT) ++ return false; ++ return default_is_empty_record (type); + } + +-/* Return true if it is legal to clobber flags by INSN and +- false otherwise. */ ++/* Implement TARGET_WARN_PARAMETER_PASSING_ABI. */ + +-static bool +-ix86_ok_to_clobber_flags (rtx_insn *insn) ++static void ++ix86_warn_parameter_passing_abi (cumulative_args_t cum_v, tree type) + { +- basic_block bb = BLOCK_FOR_INSN (insn); +- df_ref use; +- bitmap live; ++ CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); + +- while (insn) +- { +- if (NONDEBUG_INSN_P (insn)) +- { +- FOR_EACH_INSN_USE (use, insn) +- if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG) +- return false; ++ if (!cum->warn_empty) ++ return; + +- if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn)) +- return true; +- } +- +- if (insn == BB_END (bb)) +- break; +- +- insn = NEXT_INSN (insn); +- } +- +- live = df_get_live_out(bb); +- return !REGNO_REG_SET_P (live, FLAGS_REG); +-} +- +-/* Return true if we need to split op0 = op1 + op2 into a sequence of +- move and add to avoid AGU stalls. */ ++ if (!TYPE_EMPTY_P (type)) ++ return; + +-bool +-ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[]) +-{ +- unsigned int regno0, regno1, regno2; ++ /* Don't warn if the function isn't visible outside of the TU. */ ++ if (cum->decl && !TREE_PUBLIC (cum->decl)) ++ return; + +- /* Check if we need to optimize. */ +- if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun)) +- return false; ++ const_tree ctx = get_ultimate_context (cum->decl); ++ if (ctx != NULL_TREE ++ && !TRANSLATION_UNIT_WARN_EMPTY_P (ctx)) ++ return; + +- /* Check it is correct to split here. */ +- if (!ix86_ok_to_clobber_flags(insn)) +- return false; ++ /* If the actual size of the type is zero, then there is no change ++ in how objects of this size are passed. */ ++ if (int_size_in_bytes (type) == 0) ++ return; + +- regno0 = true_regnum (operands[0]); +- regno1 = true_regnum (operands[1]); +- regno2 = true_regnum (operands[2]); ++ warning (OPT_Wabi, "empty class %qT parameter passing ABI " ++ "changes in %<-fabi-version=12%> (GCC 8)", type); + +- /* We need to split only adds with non destructive +- destination operand. */ +- if (regno0 == regno1 || regno0 == regno2) +- return false; +- else +- return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false); ++ /* Only warn once. */ ++ cum->warn_empty = false; + } + +-/* Return true if we should emit lea instruction instead of mov +- instruction. */ ++/* This hook returns name of multilib ABI. */ + +-bool +-ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[]) ++static const char * ++ix86_get_multilib_abi_name (void) + { +- unsigned int regno0, regno1; +- +- /* Check if we need to optimize. */ +- if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun)) +- return false; +- +- /* Use lea for reg to reg moves only. */ +- if (!REG_P (operands[0]) || !REG_P (operands[1])) +- return false; +- +- regno0 = true_regnum (operands[0]); +- regno1 = true_regnum (operands[1]); +- +- return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false); ++ if (!(TARGET_64BIT_P (ix86_isa_flags))) ++ return "i386"; ++ else if (TARGET_X32_P (ix86_isa_flags)) ++ return "x32"; ++ else ++ return "x86_64"; + } + +-/* Return true if we need to split lea into a sequence of +- instructions to avoid AGU stalls. */ ++/* Compute the alignment for a variable for Intel MCU psABI. TYPE is ++ the data type, and ALIGN is the alignment that the object would ++ ordinarily have. */ + +-bool +-ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[]) ++static int ++iamcu_alignment (tree type, int align) + { +- unsigned int regno0, regno1, regno2; +- int split_cost; +- struct ix86_address parts; +- int ok; +- +- /* Check we need to optimize. */ +- if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun)) +- return false; +- +- /* The "at least two components" test below might not catch simple +- move or zero extension insns if parts.base is non-NULL and parts.disp +- is const0_rtx as the only components in the address, e.g. if the +- register is %rbp or %r13. As this test is much cheaper and moves or +- zero extensions are the common case, do this check first. */ +- if (REG_P (operands[1]) +- || (SImode_address_operand (operands[1], VOIDmode) +- && REG_P (XEXP (operands[1], 0)))) +- return false; +- +- /* Check if it is OK to split here. */ +- if (!ix86_ok_to_clobber_flags (insn)) +- return false; +- +- ok = ix86_decompose_address (operands[1], &parts); +- gcc_assert (ok); +- +- /* There should be at least two components in the address. */ +- if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX) +- + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2) +- return false; +- +- /* We should not split into add if non legitimate pic +- operand is used as displacement. */ +- if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp)) +- return false; +- +- regno0 = true_regnum (operands[0]) ; +- regno1 = INVALID_REGNUM; +- regno2 = INVALID_REGNUM; +- +- if (parts.base) +- regno1 = true_regnum (parts.base); +- if (parts.index) +- regno2 = true_regnum (parts.index); ++ machine_mode mode; + +- split_cost = 0; ++ if (align < 32 || TYPE_USER_ALIGN (type)) ++ return align; + +- /* Compute how many cycles we will add to execution time +- if split lea into a sequence of instructions. */ +- if (parts.base || parts.index) ++ /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4 ++ bytes. */ ++ mode = TYPE_MODE (strip_array_types (type)); ++ switch (GET_MODE_CLASS (mode)) + { +- /* Have to use mov instruction if non desctructive +- destination form is used. */ +- if (regno1 != regno0 && regno2 != regno0) +- split_cost += 1; +- +- /* Have to add index to base if both exist. */ +- if (parts.base && parts.index) +- split_cost += 1; +- +- /* Have to use shift and adds if scale is 2 or greater. */ +- if (parts.scale > 1) +- { +- if (regno0 != regno1) +- split_cost += 1; +- else if (regno2 == regno0) +- split_cost += 4; +- else +- split_cost += parts.scale; +- } +- +- /* Have to use add instruction with immediate if +- disp is non zero. */ +- if (parts.disp && parts.disp != const0_rtx) +- split_cost += 1; +- +- /* Subtract the price of lea. */ +- split_cost -= 1; ++ case MODE_INT: ++ case MODE_COMPLEX_INT: ++ case MODE_COMPLEX_FLOAT: ++ case MODE_FLOAT: ++ case MODE_DECIMAL_FLOAT: ++ return 32; ++ default: ++ return align; + } +- +- return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost, +- parts.scale > 1); + } + +-/* Emit x86 binary operand CODE in mode MODE, where the first operand +- matches destination. RTX includes clobber of FLAGS_REG. */ ++/* Compute the alignment for a static variable. ++ TYPE is the data type, and ALIGN is the alignment that ++ the object would ordinarily have. The value of this function is used ++ instead of that alignment to align the object. */ + +-static void +-ix86_emit_binop (enum rtx_code code, machine_mode mode, +- rtx dst, rtx src) ++int ++ix86_data_alignment (tree type, unsigned int align, bool opt) + { +- rtx op, clob; ++ /* GCC 4.8 and earlier used to incorrectly assume this alignment even ++ for symbols from other compilation units or symbols that don't need ++ to bind locally. In order to preserve some ABI compatibility with ++ those compilers, ensure we don't decrease alignment from what we ++ used to assume. */ + +- op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, dst, src)); +- clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); +- +- emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); +-} ++ unsigned int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT); + +-/* Return true if regno1 def is nearest to the insn. */ ++ /* A data structure, equal or greater than the size of a cache line ++ (64 bytes in the Pentium 4 and other recent Intel processors, including ++ processors based on Intel Core microarchitecture) should be aligned ++ so that its base address is a multiple of a cache line size. */ + +-static bool +-find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2) +-{ +- rtx_insn *prev = insn; +- rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn)); ++ unsigned int max_align ++ = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT); + +- if (insn == start) +- return false; +- while (prev && prev != start) ++ if (max_align < BITS_PER_WORD) ++ max_align = BITS_PER_WORD; ++ ++ switch (ix86_align_data_type) + { +- if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev)) +- { +- prev = PREV_INSN (prev); +- continue; +- } +- if (insn_defines_reg (regno1, INVALID_REGNUM, prev)) +- return true; +- else if (insn_defines_reg (regno2, INVALID_REGNUM, prev)) +- return false; +- prev = PREV_INSN (prev); ++ case ix86_align_data_type_abi: opt = false; break; ++ case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break; ++ case ix86_align_data_type_cacheline: break; + } + +- /* None of the regs is defined in the bb. */ +- return false; +-} +- +-/* Split lea instructions into a sequence of instructions +- which are executed on ALU to avoid AGU stalls. +- It is assumed that it is allowed to clobber flags register +- at lea position. */ +- +-void +-ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode) +-{ +- unsigned int regno0, regno1, regno2; +- struct ix86_address parts; +- rtx target, tmp; +- int ok, adds; +- +- ok = ix86_decompose_address (operands[1], &parts); +- gcc_assert (ok); +- +- target = gen_lowpart (mode, operands[0]); +- +- regno0 = true_regnum (target); +- regno1 = INVALID_REGNUM; +- regno2 = INVALID_REGNUM; ++ if (TARGET_IAMCU) ++ align = iamcu_alignment (type, align); + +- if (parts.base) ++ if (opt ++ && AGGREGATE_TYPE_P (type) ++ && TYPE_SIZE (type) ++ && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST) + { +- parts.base = gen_lowpart (mode, parts.base); +- regno1 = true_regnum (parts.base); ++ if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align_compat) ++ && align < max_align_compat) ++ align = max_align_compat; ++ if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align) ++ && align < max_align) ++ align = max_align; + } + +- if (parts.index) ++ /* x86-64 ABI requires arrays greater than 16 bytes to be aligned ++ to 16byte boundary. */ ++ if (TARGET_64BIT) + { +- parts.index = gen_lowpart (mode, parts.index); +- regno2 = true_regnum (parts.index); ++ if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE) ++ && TYPE_SIZE (type) ++ && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST ++ && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128) ++ && align < 128) ++ return 128; + } + +- if (parts.disp) +- parts.disp = gen_lowpart (mode, parts.disp); ++ if (!opt) ++ return align; + +- if (parts.scale > 1) ++ if (TREE_CODE (type) == ARRAY_TYPE) ++ { ++ if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) ++ return 64; ++ if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) ++ return 128; ++ } ++ else if (TREE_CODE (type) == COMPLEX_TYPE) + { +- /* Case r1 = r1 + ... */ +- if (regno1 == regno0) +- { +- /* If we have a case r1 = r1 + C * r2 then we +- should use multiplication which is very +- expensive. Assume cost model is wrong if we +- have such case here. */ +- gcc_assert (regno2 != regno0); +- +- for (adds = parts.scale; adds > 0; adds--) +- ix86_emit_binop (PLUS, mode, target, parts.index); +- } +- else +- { +- /* r1 = r2 + r3 * C case. Need to move r3 into r1. */ +- if (regno0 != regno2) +- emit_insn (gen_rtx_SET (target, parts.index)); +- +- /* Use shift for scaling. */ +- ix86_emit_binop (ASHIFT, mode, target, +- GEN_INT (exact_log2 (parts.scale))); +- +- if (parts.base) +- ix86_emit_binop (PLUS, mode, target, parts.base); + +- if (parts.disp && parts.disp != const0_rtx) +- ix86_emit_binop (PLUS, mode, target, parts.disp); +- } ++ if (TYPE_MODE (type) == DCmode && align < 64) ++ return 64; ++ if ((TYPE_MODE (type) == XCmode ++ || TYPE_MODE (type) == TCmode) && align < 128) ++ return 128; + } +- else if (!parts.base && !parts.index) ++ else if ((TREE_CODE (type) == RECORD_TYPE ++ || TREE_CODE (type) == UNION_TYPE ++ || TREE_CODE (type) == QUAL_UNION_TYPE) ++ && TYPE_FIELDS (type)) + { +- gcc_assert(parts.disp); +- emit_insn (gen_rtx_SET (target, parts.disp)); ++ if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) ++ return 64; ++ if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) ++ return 128; + } +- else ++ else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE ++ || TREE_CODE (type) == INTEGER_TYPE) + { +- if (!parts.base) +- { +- if (regno0 != regno2) +- emit_insn (gen_rtx_SET (target, parts.index)); +- } +- else if (!parts.index) +- { +- if (regno0 != regno1) +- emit_insn (gen_rtx_SET (target, parts.base)); +- } +- else +- { +- if (regno0 == regno1) +- tmp = parts.index; +- else if (regno0 == regno2) +- tmp = parts.base; +- else +- { +- rtx tmp1; ++ if (TYPE_MODE (type) == DFmode && align < 64) ++ return 64; ++ if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) ++ return 128; ++ } + +- /* Find better operand for SET instruction, depending +- on which definition is farther from the insn. */ +- if (find_nearest_reg_def (insn, regno1, regno2)) +- tmp = parts.index, tmp1 = parts.base; +- else +- tmp = parts.base, tmp1 = parts.index; ++ return align; ++} + +- emit_insn (gen_rtx_SET (target, tmp)); ++/* Compute the alignment for a local variable or a stack slot. EXP is ++ the data type or decl itself, MODE is the widest mode available and ++ ALIGN is the alignment that the object would ordinarily have. The ++ value of this macro is used instead of that alignment to align the ++ object. */ + +- if (parts.disp && parts.disp != const0_rtx) +- ix86_emit_binop (PLUS, mode, target, parts.disp); ++unsigned int ++ix86_local_alignment (tree exp, machine_mode mode, ++ unsigned int align) ++{ ++ tree type, decl; + +- ix86_emit_binop (PLUS, mode, target, tmp1); +- return; +- } ++ if (exp && DECL_P (exp)) ++ { ++ type = TREE_TYPE (exp); ++ decl = exp; ++ } ++ else ++ { ++ type = exp; ++ decl = NULL; ++ } + +- ix86_emit_binop (PLUS, mode, target, tmp); +- } ++ /* Don't do dynamic stack realignment for long long objects with ++ -mpreferred-stack-boundary=2. */ ++ if (!TARGET_64BIT ++ && align == 64 ++ && ix86_preferred_stack_boundary < 64 ++ && (mode == DImode || (type && TYPE_MODE (type) == DImode)) ++ && (!type || !TYPE_USER_ALIGN (type)) ++ && (!decl || !DECL_USER_ALIGN (decl))) ++ align = 32; + +- if (parts.disp && parts.disp != const0_rtx) +- ix86_emit_binop (PLUS, mode, target, parts.disp); ++ /* If TYPE is NULL, we are allocating a stack slot for caller-save ++ register in MODE. We will return the largest alignment of XF ++ and DF. */ ++ if (!type) ++ { ++ if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode)) ++ align = GET_MODE_ALIGNMENT (DFmode); ++ return align; + } +-} + +-/* Return true if it is ok to optimize an ADD operation to LEA +- operation to avoid flag register consumation. For most processors, +- ADD is faster than LEA. For the processors like BONNELL, if the +- destination register of LEA holds an actual address which will be +- used soon, LEA is better and otherwise ADD is better. */ ++ /* Don't increase alignment for Intel MCU psABI. */ ++ if (TARGET_IAMCU) ++ return align; + +-bool +-ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[]) +-{ +- unsigned int regno0 = true_regnum (operands[0]); +- unsigned int regno1 = true_regnum (operands[1]); +- unsigned int regno2 = true_regnum (operands[2]); ++ /* x86-64 ABI requires arrays greater than 16 bytes to be aligned ++ to 16byte boundary. Exact wording is: + +- /* If a = b + c, (a!=b && a!=c), must use lea form. */ +- if (regno0 != regno1 && regno0 != regno2) +- return true; ++ An array uses the same alignment as its elements, except that a local or ++ global array variable of length at least 16 bytes or ++ a C99 variable-length array variable always has alignment of at least 16 bytes. + +- if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun)) +- return false; ++ This was added to allow use of aligned SSE instructions at arrays. This ++ rule is meant for static storage (where compiler cannot do the analysis ++ by itself). We follow it for automatic variables only when convenient. ++ We fully control everything in the function compiled and functions from ++ other unit cannot rely on the alignment. + +- return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false); ++ Exclude va_list type. It is the common case of local array where ++ we cannot benefit from the alignment. ++ ++ TODO: Probably one should optimize for size only when var is not escaping. */ ++ if (TARGET_64BIT && optimize_function_for_speed_p (cfun) ++ && TARGET_SSE) ++ { ++ if (AGGREGATE_TYPE_P (type) ++ && (va_list_type_node == NULL_TREE ++ || (TYPE_MAIN_VARIANT (type) ++ != TYPE_MAIN_VARIANT (va_list_type_node))) ++ && TYPE_SIZE (type) ++ && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST ++ && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128) ++ && align < 128) ++ return 128; ++ } ++ if (TREE_CODE (type) == ARRAY_TYPE) ++ { ++ if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) ++ return 64; ++ if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) ++ return 128; ++ } ++ else if (TREE_CODE (type) == COMPLEX_TYPE) ++ { ++ if (TYPE_MODE (type) == DCmode && align < 64) ++ return 64; ++ if ((TYPE_MODE (type) == XCmode ++ || TYPE_MODE (type) == TCmode) && align < 128) ++ return 128; ++ } ++ else if ((TREE_CODE (type) == RECORD_TYPE ++ || TREE_CODE (type) == UNION_TYPE ++ || TREE_CODE (type) == QUAL_UNION_TYPE) ++ && TYPE_FIELDS (type)) ++ { ++ if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) ++ return 64; ++ if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) ++ return 128; ++ } ++ else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE ++ || TREE_CODE (type) == INTEGER_TYPE) ++ { ++ ++ if (TYPE_MODE (type) == DFmode && align < 64) ++ return 64; ++ if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) ++ return 128; ++ } ++ return align; + } + +-/* Return true if destination reg of SET_BODY is shift count of +- USE_BODY. */ ++/* Compute the minimum required alignment for dynamic stack realignment ++ purposes for a local variable, parameter or a stack slot. EXP is ++ the data type or decl itself, MODE is its mode and ALIGN is the ++ alignment that the object would ordinarily have. */ + +-static bool +-ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body) ++unsigned int ++ix86_minimum_alignment (tree exp, machine_mode mode, ++ unsigned int align) + { +- rtx set_dest; +- rtx shift_rtx; +- int i; ++ tree type, decl; + +- /* Retrieve destination of SET_BODY. */ +- switch (GET_CODE (set_body)) ++ if (exp && DECL_P (exp)) + { +- case SET: +- set_dest = SET_DEST (set_body); +- if (!set_dest || !REG_P (set_dest)) +- return false; +- break; +- case PARALLEL: +- for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--) +- if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i), +- use_body)) +- return true; +- /* FALLTHROUGH */ +- default: +- return false; ++ type = TREE_TYPE (exp); ++ decl = exp; + } +- +- /* Retrieve shift count of USE_BODY. */ +- switch (GET_CODE (use_body)) ++ else + { +- case SET: +- shift_rtx = XEXP (use_body, 1); +- break; +- case PARALLEL: +- for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--) +- if (ix86_dep_by_shift_count_body (set_body, +- XVECEXP (use_body, 0, i))) +- return true; +- /* FALLTHROUGH */ +- default: +- return false; ++ type = exp; ++ decl = NULL; + } + +- if (shift_rtx +- && (GET_CODE (shift_rtx) == ASHIFT +- || GET_CODE (shift_rtx) == LSHIFTRT +- || GET_CODE (shift_rtx) == ASHIFTRT +- || GET_CODE (shift_rtx) == ROTATE +- || GET_CODE (shift_rtx) == ROTATERT)) +- { +- rtx shift_count = XEXP (shift_rtx, 1); ++ if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64) ++ return align; + +- /* Return true if shift count is dest of SET_BODY. */ +- if (REG_P (shift_count)) +- { +- /* Add check since it can be invoked before register +- allocation in pre-reload schedule. */ +- if (reload_completed +- && true_regnum (set_dest) == true_regnum (shift_count)) +- return true; +- else if (REGNO(set_dest) == REGNO(shift_count)) +- return true; +- } ++ /* Don't do dynamic stack realignment for long long objects with ++ -mpreferred-stack-boundary=2. */ ++ if ((mode == DImode || (type && TYPE_MODE (type) == DImode)) ++ && (!type || !TYPE_USER_ALIGN (type)) ++ && (!decl || !DECL_USER_ALIGN (decl))) ++ { ++ gcc_checking_assert (!TARGET_STV); ++ return 32; + } + +- return false; ++ return align; + } ++ ++/* Find a location for the static chain incoming to a nested function. ++ This is a register, unless all free registers are used by arguments. */ + +-/* Return true if destination reg of SET_INSN is shift count of +- USE_INSN. */ +- +-bool +-ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn) ++static rtx ++ix86_static_chain (const_tree fndecl_or_type, bool incoming_p) + { +- return ix86_dep_by_shift_count_body (PATTERN (set_insn), +- PATTERN (use_insn)); +-} ++ unsigned regno; + +-/* Return TRUE or FALSE depending on whether the unary operator meets the +- appropriate constraints. */ ++ if (TARGET_64BIT) ++ { ++ /* We always use R10 in 64-bit mode. */ ++ regno = R10_REG; ++ } ++ else ++ { ++ const_tree fntype, fndecl; ++ unsigned int ccvt; + +-bool +-ix86_unary_operator_ok (enum rtx_code, +- machine_mode, +- rtx operands[2]) +-{ +- /* If one of operands is memory, source and destination must match. */ +- if ((MEM_P (operands[0]) +- || MEM_P (operands[1])) +- && ! rtx_equal_p (operands[0], operands[1])) +- return false; +- return true; +-} ++ /* By default in 32-bit mode we use ECX to pass the static chain. */ ++ regno = CX_REG; + +-/* Return TRUE if the operands to a vec_interleave_{high,low}v2df +- are ok, keeping in mind the possible movddup alternative. */ ++ if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL) ++ { ++ fntype = TREE_TYPE (fndecl_or_type); ++ fndecl = fndecl_or_type; ++ } ++ else ++ { ++ fntype = fndecl_or_type; ++ fndecl = NULL; ++ } + +-bool +-ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high) +-{ +- if (MEM_P (operands[0])) +- return rtx_equal_p (operands[0], operands[1 + high]); +- if (MEM_P (operands[1]) && MEM_P (operands[2])) +- return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]); +- return true; ++ ccvt = ix86_get_callcvt (fntype); ++ if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) ++ { ++ /* Fastcall functions use ecx/edx for arguments, which leaves ++ us with EAX for the static chain. ++ Thiscall functions use ecx for arguments, which also ++ leaves us with EAX for the static chain. */ ++ regno = AX_REG; ++ } ++ else if ((ccvt & IX86_CALLCVT_THISCALL) != 0) ++ { ++ /* Thiscall functions use ecx for arguments, which leaves ++ us with EAX and EDX for the static chain. ++ We are using for abi-compatibility EAX. */ ++ regno = AX_REG; ++ } ++ else if (ix86_function_regparm (fntype, fndecl) == 3) ++ { ++ /* For regparm 3, we have no free call-clobbered registers in ++ which to store the static chain. In order to implement this, ++ we have the trampoline push the static chain to the stack. ++ However, we can't push a value below the return address when ++ we call the nested function directly, so we have to use an ++ alternate entry point. For this we use ESI, and have the ++ alternate entry point push ESI, so that things appear the ++ same once we're executing the nested function. */ ++ if (incoming_p) ++ { ++ if (fndecl == current_function_decl ++ && !ix86_static_chain_on_stack) ++ { ++ gcc_assert (!reload_completed); ++ ix86_static_chain_on_stack = true; ++ } ++ return gen_frame_mem (SImode, ++ plus_constant (Pmode, ++ arg_pointer_rtx, -8)); ++ } ++ regno = SI_REG; ++ } ++ } ++ ++ return gen_rtx_REG (Pmode, regno); + } + +-/* Post-reload splitter for converting an SF or DFmode value in an +- SSE register into an unsigned SImode. */ ++/* Emit RTL insns to initialize the variable parts of a trampoline. ++ FNDECL is the decl of the target address; M_TRAMP is a MEM for ++ the trampoline, and CHAIN_VALUE is an RTX for the static chain ++ to be passed to the target function. */ + +-void +-ix86_split_convert_uns_si_sse (rtx operands[]) ++static void ++ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) + { +- machine_mode vecmode; +- rtx value, large, zero_or_two31, input, two31, x; ++ rtx mem, fnaddr; ++ int opcode; ++ int offset = 0; ++ bool need_endbr = (flag_cf_protection & CF_BRANCH); + +- large = operands[1]; +- zero_or_two31 = operands[2]; +- input = operands[3]; +- two31 = operands[4]; +- vecmode = GET_MODE (large); +- value = gen_rtx_REG (vecmode, REGNO (operands[0])); ++ fnaddr = XEXP (DECL_RTL (fndecl), 0); + +- /* Load up the value into the low element. We must ensure that the other +- elements are valid floats -- zero is the easiest such value. */ +- if (MEM_P (input)) +- { +- if (vecmode == V4SFmode) +- emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input)); +- else +- emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input)); +- } +- else ++ if (TARGET_64BIT) + { +- input = gen_rtx_REG (vecmode, REGNO (input)); +- emit_move_insn (value, CONST0_RTX (vecmode)); +- if (vecmode == V4SFmode) +- emit_insn (gen_sse_movss (value, value, input)); +- else +- emit_insn (gen_sse2_movsd (value, value, input)); +- } +- +- emit_move_insn (large, two31); +- emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31); ++ int size; + +- x = gen_rtx_fmt_ee (LE, vecmode, large, value); +- emit_insn (gen_rtx_SET (large, x)); ++ if (need_endbr) ++ { ++ /* Insert ENDBR64. */ ++ mem = adjust_address (m_tramp, SImode, offset); ++ emit_move_insn (mem, gen_int_mode (0xfa1e0ff3, SImode)); ++ offset += 4; ++ } + +- x = gen_rtx_AND (vecmode, zero_or_two31, large); +- emit_insn (gen_rtx_SET (zero_or_two31, x)); ++ /* Load the function address to r11. Try to load address using ++ the shorter movl instead of movabs. We may want to support ++ movq for kernel mode, but kernel does not use trampolines at ++ the moment. FNADDR is a 32bit address and may not be in ++ DImode when ptr_mode == SImode. Always use movl in this ++ case. */ ++ if (ptr_mode == SImode ++ || x86_64_zext_immediate_operand (fnaddr, VOIDmode)) ++ { ++ fnaddr = copy_addr_to_reg (fnaddr); + +- x = gen_rtx_MINUS (vecmode, value, zero_or_two31); +- emit_insn (gen_rtx_SET (value, x)); ++ mem = adjust_address (m_tramp, HImode, offset); ++ emit_move_insn (mem, gen_int_mode (0xbb41, HImode)); + +- large = gen_rtx_REG (V4SImode, REGNO (large)); +- emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31))); ++ mem = adjust_address (m_tramp, SImode, offset + 2); ++ emit_move_insn (mem, gen_lowpart (SImode, fnaddr)); ++ offset += 6; ++ } ++ else ++ { ++ mem = adjust_address (m_tramp, HImode, offset); ++ emit_move_insn (mem, gen_int_mode (0xbb49, HImode)); + +- x = gen_rtx_REG (V4SImode, REGNO (value)); +- if (vecmode == V4SFmode) +- emit_insn (gen_fix_truncv4sfv4si2 (x, value)); +- else +- emit_insn (gen_sse2_cvttpd2dq (x, value)); +- value = x; ++ mem = adjust_address (m_tramp, DImode, offset + 2); ++ emit_move_insn (mem, fnaddr); ++ offset += 10; ++ } + +- emit_insn (gen_xorv4si3 (value, value, large)); +-} ++ /* Load static chain using movabs to r10. Use the shorter movl ++ instead of movabs when ptr_mode == SImode. */ ++ if (ptr_mode == SImode) ++ { ++ opcode = 0xba41; ++ size = 6; ++ } ++ else ++ { ++ opcode = 0xba49; ++ size = 10; ++ } + +-/* Convert an unsigned DImode value into a DFmode, using only SSE. +- Expects the 64-bit DImode to be supplied in a pair of integral +- registers. Requires SSE2; will use SSE3 if available. For x86_32, +- -mfpmath=sse, !optimize_size only. */ ++ mem = adjust_address (m_tramp, HImode, offset); ++ emit_move_insn (mem, gen_int_mode (opcode, HImode)); + +-void +-ix86_expand_convert_uns_didf_sse (rtx target, rtx input) +-{ +- REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt; +- rtx int_xmm, fp_xmm; +- rtx biases, exponents; +- rtx x; ++ mem = adjust_address (m_tramp, ptr_mode, offset + 2); ++ emit_move_insn (mem, chain_value); ++ offset += size; + +- int_xmm = gen_reg_rtx (V4SImode); +- if (TARGET_INTER_UNIT_MOVES_TO_VEC) +- emit_insn (gen_movdi_to_sse (int_xmm, input)); +- else if (TARGET_SSE_SPLIT_REGS) +- { +- emit_clobber (int_xmm); +- emit_move_insn (gen_lowpart (DImode, int_xmm), input); ++ /* Jump to r11; the last (unused) byte is a nop, only there to ++ pad the write out to a single 32-bit store. */ ++ mem = adjust_address (m_tramp, SImode, offset); ++ emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode)); ++ offset += 4; + } + else + { +- x = gen_reg_rtx (V2DImode); +- ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0); +- emit_move_insn (int_xmm, gen_lowpart (V4SImode, x)); +- } +- +- x = gen_rtx_CONST_VECTOR (V4SImode, +- gen_rtvec (4, GEN_INT (0x43300000UL), +- GEN_INT (0x45300000UL), +- const0_rtx, const0_rtx)); +- exponents = validize_mem (force_const_mem (V4SImode, x)); +- +- /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */ +- emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents)); +- +- /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm) +- yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)). +- Similarly (0x45300000UL ## fp_value_hi_xmm) yields +- (0x1.0p84 + double(fp_value_hi_xmm)). +- Note these exponents differ by 32. */ +- +- fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm)); +- +- /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values +- in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */ +- real_ldexp (&bias_lo_rvt, &dconst1, 52); +- real_ldexp (&bias_hi_rvt, &dconst1, 84); +- biases = const_double_from_real_value (bias_lo_rvt, DFmode); +- x = const_double_from_real_value (bias_hi_rvt, DFmode); +- biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x)); +- biases = validize_mem (force_const_mem (V2DFmode, biases)); +- emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases)); +- +- /* Add the upper and lower DFmode values together. */ +- if (TARGET_SSE3) +- emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm)); +- else +- { +- x = copy_to_mode_reg (V2DFmode, fp_xmm); +- emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm)); +- emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x)); +- } ++ rtx disp, chain; + +- ix86_expand_vector_extract (false, target, fp_xmm, 0); +-} ++ /* Depending on the static chain location, either load a register ++ with a constant, or push the constant to the stack. All of the ++ instructions are the same size. */ ++ chain = ix86_static_chain (fndecl, true); ++ if (REG_P (chain)) ++ { ++ switch (REGNO (chain)) ++ { ++ case AX_REG: ++ opcode = 0xb8; break; ++ case CX_REG: ++ opcode = 0xb9; break; ++ default: ++ gcc_unreachable (); ++ } ++ } ++ else ++ opcode = 0x68; + +-/* Not used, but eases macroization of patterns. */ +-void +-ix86_expand_convert_uns_sixf_sse (rtx, rtx) +-{ +- gcc_unreachable (); +-} ++ if (need_endbr) ++ { ++ /* Insert ENDBR32. */ ++ mem = adjust_address (m_tramp, SImode, offset); ++ emit_move_insn (mem, gen_int_mode (0xfb1e0ff3, SImode)); ++ offset += 4; ++ } + +-/* Convert an unsigned SImode value into a DFmode. Only currently used +- for SSE, but applicable anywhere. */ ++ mem = adjust_address (m_tramp, QImode, offset); ++ emit_move_insn (mem, gen_int_mode (opcode, QImode)); + +-void +-ix86_expand_convert_uns_sidf_sse (rtx target, rtx input) +-{ +- REAL_VALUE_TYPE TWO31r; +- rtx x, fp; ++ mem = adjust_address (m_tramp, SImode, offset + 1); ++ emit_move_insn (mem, chain_value); ++ offset += 5; + +- x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1), +- NULL, 1, OPTAB_DIRECT); ++ mem = adjust_address (m_tramp, QImode, offset); ++ emit_move_insn (mem, gen_int_mode (0xe9, QImode)); ++ ++ mem = adjust_address (m_tramp, SImode, offset + 1); + +- fp = gen_reg_rtx (DFmode); +- emit_insn (gen_floatsidf2 (fp, x)); ++ /* Compute offset from the end of the jmp to the target function. ++ In the case in which the trampoline stores the static chain on ++ the stack, we need to skip the first insn which pushes the ++ (call-saved) register static chain; this push is 1 byte. */ ++ offset += 5; ++ int skip = MEM_P (chain) ? 1 : 0; ++ /* Skip ENDBR32 at the entry of the target function. */ ++ if (need_endbr ++ && !cgraph_node::get (fndecl)->only_called_directly_p ()) ++ skip += 4; ++ disp = expand_binop (SImode, sub_optab, fnaddr, ++ plus_constant (Pmode, XEXP (m_tramp, 0), ++ offset - skip), ++ NULL_RTX, 1, OPTAB_DIRECT); ++ emit_move_insn (mem, disp); ++ } + +- real_ldexp (&TWO31r, &dconst1, 31); +- x = const_double_from_real_value (TWO31r, DFmode); ++ gcc_assert (offset <= TRAMPOLINE_SIZE); + +- x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT); +- if (x != target) +- emit_move_insn (target, x); ++#ifdef HAVE_ENABLE_EXECUTE_STACK ++#ifdef CHECK_EXECUTE_STACK_ENABLED ++ if (CHECK_EXECUTE_STACK_ENABLED) ++#endif ++ emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"), ++ LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode); ++#endif + } + +-/* Convert a signed DImode value into a DFmode. Only used for SSE in +- 32-bit mode; otherwise we have a direct convert instruction. */ +- +-void +-ix86_expand_convert_sign_didf_sse (rtx target, rtx input) ++static bool ++ix86_allocate_stack_slots_for_args (void) + { +- REAL_VALUE_TYPE TWO32r; +- rtx fp_lo, fp_hi, x; +- +- fp_lo = gen_reg_rtx (DFmode); +- fp_hi = gen_reg_rtx (DFmode); ++ /* Naked functions should not allocate stack slots for arguments. */ ++ return !ix86_function_naked (current_function_decl); ++} + +- emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input))); +- +- real_ldexp (&TWO32r, &dconst1, 32); +- x = const_double_from_real_value (TWO32r, DFmode); +- fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT); +- +- ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input)); +- +- x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target, +- 0, OPTAB_DIRECT); +- if (x != target) +- emit_move_insn (target, x); +-} +- +-/* Convert an unsigned SImode value into a SFmode, using only SSE. +- For x86_32, -mfpmath=sse, !optimize_size only. */ +-void +-ix86_expand_convert_uns_sisf_sse (rtx target, rtx input) +-{ +- REAL_VALUE_TYPE ONE16r; +- rtx fp_hi, fp_lo, int_hi, int_lo, x; +- +- real_ldexp (&ONE16r, &dconst1, 16); +- x = const_double_from_real_value (ONE16r, SFmode); +- int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff), +- NULL, 0, OPTAB_DIRECT); +- int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16), +- NULL, 0, OPTAB_DIRECT); +- fp_hi = gen_reg_rtx (SFmode); +- fp_lo = gen_reg_rtx (SFmode); +- emit_insn (gen_floatsisf2 (fp_hi, int_hi)); +- emit_insn (gen_floatsisf2 (fp_lo, int_lo)); +- fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi, +- 0, OPTAB_DIRECT); +- fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target, +- 0, OPTAB_DIRECT); +- if (!rtx_equal_p (target, fp_hi)) +- emit_move_insn (target, fp_hi); +-} +- +-/* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert +- a vector of unsigned ints VAL to vector of floats TARGET. */ +- +-void +-ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val) +-{ +- rtx tmp[8]; +- REAL_VALUE_TYPE TWO16r; +- machine_mode intmode = GET_MODE (val); +- machine_mode fltmode = GET_MODE (target); +- rtx (*cvt) (rtx, rtx); +- +- if (intmode == V4SImode) +- cvt = gen_floatv4siv4sf2; +- else +- cvt = gen_floatv8siv8sf2; +- tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff)); +- tmp[0] = force_reg (intmode, tmp[0]); +- tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1, +- OPTAB_DIRECT); +- tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16), +- NULL_RTX, 1, OPTAB_DIRECT); +- tmp[3] = gen_reg_rtx (fltmode); +- emit_insn (cvt (tmp[3], tmp[1])); +- tmp[4] = gen_reg_rtx (fltmode); +- emit_insn (cvt (tmp[4], tmp[2])); +- real_ldexp (&TWO16r, &dconst1, 16); +- tmp[5] = const_double_from_real_value (TWO16r, SFmode); +- tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5])); +- tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1, +- OPTAB_DIRECT); +- tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1, +- OPTAB_DIRECT); +- if (tmp[7] != target) +- emit_move_insn (target, tmp[7]); +-} +- +-/* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc* +- pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*. +- This is done by doing just signed conversion if < 0x1p31, and otherwise by +- subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */ +- +-rtx +-ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp) +-{ +- REAL_VALUE_TYPE TWO31r; +- rtx two31r, tmp[4]; +- machine_mode mode = GET_MODE (val); +- machine_mode scalarmode = GET_MODE_INNER (mode); +- machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode; +- rtx (*cmp) (rtx, rtx, rtx, rtx); +- int i; +- +- for (i = 0; i < 3; i++) +- tmp[i] = gen_reg_rtx (mode); +- real_ldexp (&TWO31r, &dconst1, 31); +- two31r = const_double_from_real_value (TWO31r, scalarmode); +- two31r = ix86_build_const_vector (mode, 1, two31r); +- two31r = force_reg (mode, two31r); +- switch (mode) +- { +- case E_V8SFmode: cmp = gen_avx_maskcmpv8sf3; break; +- case E_V4SFmode: cmp = gen_sse_maskcmpv4sf3; break; +- case E_V4DFmode: cmp = gen_avx_maskcmpv4df3; break; +- case E_V2DFmode: cmp = gen_sse2_maskcmpv2df3; break; +- default: gcc_unreachable (); +- } +- tmp[3] = gen_rtx_LE (mode, two31r, val); +- emit_insn (cmp (tmp[0], two31r, val, tmp[3])); +- tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1], +- 0, OPTAB_DIRECT); +- if (intmode == V4SImode || TARGET_AVX2) +- *xorp = expand_simple_binop (intmode, ASHIFT, +- gen_lowpart (intmode, tmp[0]), +- GEN_INT (31), NULL_RTX, 0, +- OPTAB_DIRECT); +- else +- { +- rtx two31 = GEN_INT (HOST_WIDE_INT_1U << 31); +- two31 = ix86_build_const_vector (intmode, 1, two31); +- *xorp = expand_simple_binop (intmode, AND, +- gen_lowpart (intmode, tmp[0]), +- two31, NULL_RTX, 0, +- OPTAB_DIRECT); +- } +- return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2], +- 0, OPTAB_DIRECT); +-} +- +-/* A subroutine of ix86_build_signbit_mask. If VECT is true, +- then replicate the value for all elements of the vector +- register. */ +- +-rtx +-ix86_build_const_vector (machine_mode mode, bool vect, rtx value) ++static bool ++ix86_warn_func_return (tree decl) + { +- int i, n_elt; +- rtvec v; +- machine_mode scalar_mode; +- +- switch (mode) +- { +- case E_V64QImode: +- case E_V32QImode: +- case E_V16QImode: +- case E_V32HImode: +- case E_V16HImode: +- case E_V8HImode: +- case E_V16SImode: +- case E_V8SImode: +- case E_V4SImode: +- case E_V8DImode: +- case E_V4DImode: +- case E_V2DImode: +- gcc_assert (vect); +- /* FALLTHRU */ +- case E_V16SFmode: +- case E_V8SFmode: +- case E_V4SFmode: +- case E_V8DFmode: +- case E_V4DFmode: +- case E_V2DFmode: +- n_elt = GET_MODE_NUNITS (mode); +- v = rtvec_alloc (n_elt); +- scalar_mode = GET_MODE_INNER (mode); +- +- RTVEC_ELT (v, 0) = value; +- +- for (i = 1; i < n_elt; ++i) +- RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode); +- +- return gen_rtx_CONST_VECTOR (mode, v); +- +- default: +- gcc_unreachable (); +- } ++ /* Naked functions are implemented entirely in assembly, including the ++ return sequence, so suppress warnings about this. */ ++ return !ix86_function_naked (decl); + } +- +-/* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders +- and ix86_expand_int_vcond. Create a mask for the sign bit in MODE +- for an SSE register. If VECT is true, then replicate the mask for +- all elements of the vector register. If INVERT is true, then create +- a mask excluding the sign bit. */ +- +-rtx +-ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert) ++ ++/* Return the shift count of a vector by scalar shift builtin second argument ++ ARG1. */ ++static tree ++ix86_vector_shift_count (tree arg1) + { +- machine_mode vec_mode, imode; +- wide_int w; +- rtx mask, v; +- +- switch (mode) ++ if (tree_fits_uhwi_p (arg1)) ++ return arg1; ++ else if (TREE_CODE (arg1) == VECTOR_CST && CHAR_BIT == 8) + { +- case E_V16SImode: +- case E_V16SFmode: +- case E_V8SImode: +- case E_V4SImode: +- case E_V8SFmode: +- case E_V4SFmode: +- vec_mode = mode; +- imode = SImode; +- break; +- +- case E_V8DImode: +- case E_V4DImode: +- case E_V2DImode: +- case E_V8DFmode: +- case E_V4DFmode: +- case E_V2DFmode: +- vec_mode = mode; +- imode = DImode; +- break; +- +- case E_TImode: +- case E_TFmode: +- vec_mode = VOIDmode; +- imode = TImode; +- break; +- +- default: +- gcc_unreachable (); ++ /* The count argument is weird, passed in as various 128-bit ++ (or 64-bit) vectors, the low 64 bits from it are the count. */ ++ unsigned char buf[16]; ++ int len = native_encode_expr (arg1, buf, 16); ++ if (len == 0) ++ return NULL_TREE; ++ tree t = native_interpret_expr (uint64_type_node, buf, len); ++ if (t && tree_fits_uhwi_p (t)) ++ return t; + } +- +- machine_mode inner_mode = GET_MODE_INNER (mode); +- w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1, +- GET_MODE_BITSIZE (inner_mode)); +- if (invert) +- w = wi::bit_not (w); +- +- /* Force this value into the low part of a fp vector constant. */ +- mask = immed_wide_int_const (w, imode); +- mask = gen_lowpart (inner_mode, mask); +- +- if (vec_mode == VOIDmode) +- return force_reg (inner_mode, mask); +- +- v = ix86_build_const_vector (vec_mode, vect, mask); +- return force_reg (vec_mode, v); ++ return NULL_TREE; + } + +-/* Generate code for floating point ABS or NEG. */ +- +-void +-ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode, +- rtx operands[]) ++static tree ++ix86_fold_builtin (tree fndecl, int n_args, ++ tree *args, bool ignore ATTRIBUTE_UNUSED) + { +- rtx mask, set, dst, src; +- bool use_sse = false; +- bool vector_mode = VECTOR_MODE_P (mode); +- machine_mode vmode = mode; +- +- if (vector_mode) +- use_sse = true; +- else if (mode == TFmode) +- use_sse = true; +- else if (TARGET_SSE_MATH) +- { +- use_sse = SSE_FLOAT_MODE_P (mode); +- if (mode == SFmode) +- vmode = V4SFmode; +- else if (mode == DFmode) +- vmode = V2DFmode; +- } +- +- /* NEG and ABS performed with SSE use bitwise mask operations. +- Create the appropriate mask now. */ +- if (use_sse) +- mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS); +- else +- mask = NULL_RTX; +- +- dst = operands[0]; +- src = operands[1]; +- +- set = gen_rtx_fmt_e (code, mode, src); +- set = gen_rtx_SET (dst, set); +- +- if (mask) ++ if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD) + { +- rtx use, clob; +- rtvec par; ++ enum ix86_builtins fn_code ++ = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl); ++ enum rtx_code rcode; ++ bool is_vshift; ++ unsigned HOST_WIDE_INT mask; + +- use = gen_rtx_USE (VOIDmode, mask); +- if (vector_mode) +- par = gen_rtvec (2, set, use); +- else ++ switch (fn_code) + { +- clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); +- par = gen_rtvec (3, set, use, clob); +- } +- emit_insn (gen_rtx_PARALLEL (VOIDmode, par)); +- } +- else +- emit_insn (set); +-} +- +-/* Expand a copysign operation. Special case operand 0 being a constant. */ +- +-void +-ix86_expand_copysign (rtx operands[]) +-{ +- machine_mode mode, vmode; +- rtx dest, op0, op1, mask, nmask; +- +- dest = operands[0]; +- op0 = operands[1]; +- op1 = operands[2]; +- +- mode = GET_MODE (dest); ++ case IX86_BUILTIN_CPU_IS: ++ case IX86_BUILTIN_CPU_SUPPORTS: ++ gcc_assert (n_args == 1); ++ return fold_builtin_cpu (fndecl, args); + +- if (mode == SFmode) +- vmode = V4SFmode; +- else if (mode == DFmode) +- vmode = V2DFmode; +- else +- vmode = mode; ++ case IX86_BUILTIN_NANQ: ++ case IX86_BUILTIN_NANSQ: ++ { ++ tree type = TREE_TYPE (TREE_TYPE (fndecl)); ++ const char *str = c_getstr (*args); ++ int quiet = fn_code == IX86_BUILTIN_NANQ; ++ REAL_VALUE_TYPE real; + +- if (CONST_DOUBLE_P (op0)) +- { +- rtx (*copysign_insn)(rtx, rtx, rtx, rtx); ++ if (str && real_nan (&real, str, quiet, TYPE_MODE (type))) ++ return build_real (type, real); ++ return NULL_TREE; ++ } + +- if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0))) +- op0 = simplify_unary_operation (ABS, mode, op0, mode); ++ case IX86_BUILTIN_INFQ: ++ case IX86_BUILTIN_HUGE_VALQ: ++ { ++ tree type = TREE_TYPE (TREE_TYPE (fndecl)); ++ REAL_VALUE_TYPE inf; ++ real_inf (&inf); ++ return build_real (type, inf); ++ } + +- if (mode == SFmode || mode == DFmode) +- { +- if (op0 == CONST0_RTX (mode)) +- op0 = CONST0_RTX (vmode); +- else ++ case IX86_BUILTIN_TZCNT16: ++ case IX86_BUILTIN_CTZS: ++ case IX86_BUILTIN_TZCNT32: ++ case IX86_BUILTIN_TZCNT64: ++ gcc_assert (n_args == 1); ++ if (TREE_CODE (args[0]) == INTEGER_CST) + { +- rtx v = ix86_build_const_vector (vmode, false, op0); +- +- op0 = force_reg (vmode, v); ++ tree type = TREE_TYPE (TREE_TYPE (fndecl)); ++ tree arg = args[0]; ++ if (fn_code == IX86_BUILTIN_TZCNT16 ++ || fn_code == IX86_BUILTIN_CTZS) ++ arg = fold_convert (short_unsigned_type_node, arg); ++ if (integer_zerop (arg)) ++ return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg))); ++ else ++ return fold_const_call (CFN_CTZ, type, arg); + } +- } +- else if (op0 != CONST0_RTX (mode)) +- op0 = force_reg (mode, op0); +- +- mask = ix86_build_signbit_mask (vmode, 0, 0); +- +- if (mode == SFmode) +- copysign_insn = gen_copysignsf3_const; +- else if (mode == DFmode) +- copysign_insn = gen_copysigndf3_const; +- else +- copysign_insn = gen_copysigntf3_const; +- +- emit_insn (copysign_insn (dest, op0, op1, mask)); +- } +- else +- { +- rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx); +- +- nmask = ix86_build_signbit_mask (vmode, 0, 1); +- mask = ix86_build_signbit_mask (vmode, 0, 0); +- +- if (mode == SFmode) +- copysign_insn = gen_copysignsf3_var; +- else if (mode == DFmode) +- copysign_insn = gen_copysigndf3_var; +- else +- copysign_insn = gen_copysigntf3_var; +- +- emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask)); +- } +-} ++ break; + +-/* Deconstruct a copysign operation into bit masks. Operand 0 is known to +- be a constant, and so has already been expanded into a vector constant. */ ++ case IX86_BUILTIN_LZCNT16: ++ case IX86_BUILTIN_CLZS: ++ case IX86_BUILTIN_LZCNT32: ++ case IX86_BUILTIN_LZCNT64: ++ gcc_assert (n_args == 1); ++ if (TREE_CODE (args[0]) == INTEGER_CST) ++ { ++ tree type = TREE_TYPE (TREE_TYPE (fndecl)); ++ tree arg = args[0]; ++ if (fn_code == IX86_BUILTIN_LZCNT16 ++ || fn_code == IX86_BUILTIN_CLZS) ++ arg = fold_convert (short_unsigned_type_node, arg); ++ if (integer_zerop (arg)) ++ return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg))); ++ else ++ return fold_const_call (CFN_CLZ, type, arg); ++ } ++ break; + +-void +-ix86_split_copysign_const (rtx operands[]) +-{ +- machine_mode mode, vmode; +- rtx dest, op0, mask, x; +- +- dest = operands[0]; +- op0 = operands[1]; +- mask = operands[3]; +- +- mode = GET_MODE (dest); +- vmode = GET_MODE (mask); +- +- dest = lowpart_subreg (vmode, dest, mode); +- x = gen_rtx_AND (vmode, dest, mask); +- emit_insn (gen_rtx_SET (dest, x)); +- +- if (op0 != CONST0_RTX (vmode)) +- { +- x = gen_rtx_IOR (vmode, dest, op0); +- emit_insn (gen_rtx_SET (dest, x)); +- } +-} +- +-/* Deconstruct a copysign operation into bit masks. Operand 0 is variable, +- so we have to do two masks. */ +- +-void +-ix86_split_copysign_var (rtx operands[]) +-{ +- machine_mode mode, vmode; +- rtx dest, scratch, op0, op1, mask, nmask, x; +- +- dest = operands[0]; +- scratch = operands[1]; +- op0 = operands[2]; +- op1 = operands[3]; +- nmask = operands[4]; +- mask = operands[5]; +- +- mode = GET_MODE (dest); +- vmode = GET_MODE (mask); +- +- if (rtx_equal_p (op0, op1)) +- { +- /* Shouldn't happen often (it's useless, obviously), but when it does +- we'd generate incorrect code if we continue below. */ +- emit_move_insn (dest, op0); +- return; +- } +- +- if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */ +- { +- gcc_assert (REGNO (op1) == REGNO (scratch)); +- +- x = gen_rtx_AND (vmode, scratch, mask); +- emit_insn (gen_rtx_SET (scratch, x)); +- +- dest = mask; +- op0 = lowpart_subreg (vmode, op0, mode); +- x = gen_rtx_NOT (vmode, dest); +- x = gen_rtx_AND (vmode, x, op0); +- emit_insn (gen_rtx_SET (dest, x)); +- } +- else +- { +- if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */ +- { +- x = gen_rtx_AND (vmode, scratch, mask); +- } +- else /* alternative 2,4 */ +- { +- gcc_assert (REGNO (mask) == REGNO (scratch)); +- op1 = lowpart_subreg (vmode, op1, mode); +- x = gen_rtx_AND (vmode, scratch, op1); +- } +- emit_insn (gen_rtx_SET (scratch, x)); +- +- if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */ +- { +- dest = lowpart_subreg (vmode, op0, mode); +- x = gen_rtx_AND (vmode, dest, nmask); +- } +- else /* alternative 3,4 */ +- { +- gcc_assert (REGNO (nmask) == REGNO (dest)); +- dest = nmask; +- op0 = lowpart_subreg (vmode, op0, mode); +- x = gen_rtx_AND (vmode, dest, op0); +- } +- emit_insn (gen_rtx_SET (dest, x)); +- } +- +- x = gen_rtx_IOR (vmode, dest, scratch); +- emit_insn (gen_rtx_SET (dest, x)); +-} +- +-/* Expand an xorsign operation. */ +- +-void +-ix86_expand_xorsign (rtx operands[]) +-{ +- rtx (*xorsign_insn)(rtx, rtx, rtx, rtx); +- machine_mode mode, vmode; +- rtx dest, op0, op1, mask; +- +- dest = operands[0]; +- op0 = operands[1]; +- op1 = operands[2]; +- +- mode = GET_MODE (dest); +- +- if (mode == SFmode) +- { +- xorsign_insn = gen_xorsignsf3_1; +- vmode = V4SFmode; +- } +- else if (mode == DFmode) +- { +- xorsign_insn = gen_xorsigndf3_1; +- vmode = V2DFmode; +- } +- else +- gcc_unreachable (); +- +- mask = ix86_build_signbit_mask (vmode, 0, 0); +- +- emit_insn (xorsign_insn (dest, op0, op1, mask)); +-} +- +-/* Deconstruct an xorsign operation into bit masks. */ +- +-void +-ix86_split_xorsign (rtx operands[]) +-{ +- machine_mode mode, vmode; +- rtx dest, op0, mask, x; +- +- dest = operands[0]; +- op0 = operands[1]; +- mask = operands[3]; +- +- mode = GET_MODE (dest); +- vmode = GET_MODE (mask); +- +- dest = lowpart_subreg (vmode, dest, mode); +- x = gen_rtx_AND (vmode, dest, mask); +- emit_insn (gen_rtx_SET (dest, x)); +- +- op0 = lowpart_subreg (vmode, op0, mode); +- x = gen_rtx_XOR (vmode, dest, op0); +- emit_insn (gen_rtx_SET (dest, x)); +-} +- +-/* Return TRUE or FALSE depending on whether the first SET in INSN +- has source and destination with matching CC modes, and that the +- CC mode is at least as constrained as REQ_MODE. */ +- +-bool +-ix86_match_ccmode (rtx insn, machine_mode req_mode) +-{ +- rtx set; +- machine_mode set_mode; +- +- set = PATTERN (insn); +- if (GET_CODE (set) == PARALLEL) +- set = XVECEXP (set, 0, 0); +- gcc_assert (GET_CODE (set) == SET); +- gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE); +- +- set_mode = GET_MODE (SET_DEST (set)); +- switch (set_mode) +- { +- case E_CCNOmode: +- if (req_mode != CCNOmode +- && (req_mode != CCmode +- || XEXP (SET_SRC (set), 1) != const0_rtx)) +- return false; +- break; +- case E_CCmode: +- if (req_mode == CCGCmode) +- return false; +- /* FALLTHRU */ +- case E_CCGCmode: +- if (req_mode == CCGOCmode || req_mode == CCNOmode) +- return false; +- /* FALLTHRU */ +- case E_CCGOCmode: +- if (req_mode == CCZmode) +- return false; +- /* FALLTHRU */ +- case E_CCZmode: +- break; +- +- case E_CCGZmode: +- +- case E_CCAmode: +- case E_CCCmode: +- case E_CCOmode: +- case E_CCPmode: +- case E_CCSmode: +- if (set_mode != req_mode) +- return false; +- break; +- +- default: +- gcc_unreachable (); +- } +- +- return GET_MODE (SET_SRC (set)) == set_mode; +-} +- +-/* Generate insn patterns to do an integer compare of OPERANDS. */ +- +-static rtx +-ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1) +-{ +- machine_mode cmpmode; +- rtx tmp, flags; +- +- cmpmode = SELECT_CC_MODE (code, op0, op1); +- flags = gen_rtx_REG (cmpmode, FLAGS_REG); +- +- /* This is very simple, but making the interface the same as in the +- FP case makes the rest of the code easier. */ +- tmp = gen_rtx_COMPARE (cmpmode, op0, op1); +- emit_insn (gen_rtx_SET (flags, tmp)); +- +- /* Return the test that should be put into the flags user, i.e. +- the bcc, scc, or cmov instruction. */ +- return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx); +-} ++ case IX86_BUILTIN_BEXTR32: ++ case IX86_BUILTIN_BEXTR64: ++ case IX86_BUILTIN_BEXTRI32: ++ case IX86_BUILTIN_BEXTRI64: ++ gcc_assert (n_args == 2); ++ if (tree_fits_uhwi_p (args[1])) ++ { ++ unsigned HOST_WIDE_INT res = 0; ++ unsigned int prec = TYPE_PRECISION (TREE_TYPE (args[0])); ++ unsigned int start = tree_to_uhwi (args[1]); ++ unsigned int len = (start & 0xff00) >> 8; ++ start &= 0xff; ++ if (start >= prec || len == 0) ++ res = 0; ++ else if (!tree_fits_uhwi_p (args[0])) ++ break; ++ else ++ res = tree_to_uhwi (args[0]) >> start; ++ if (len > prec) ++ len = prec; ++ if (len < HOST_BITS_PER_WIDE_INT) ++ res &= (HOST_WIDE_INT_1U << len) - 1; ++ return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res); ++ } ++ break; + +-/* Figure out whether to use unordered fp comparisons. */ ++ case IX86_BUILTIN_BZHI32: ++ case IX86_BUILTIN_BZHI64: ++ gcc_assert (n_args == 2); ++ if (tree_fits_uhwi_p (args[1])) ++ { ++ unsigned int idx = tree_to_uhwi (args[1]) & 0xff; ++ if (idx >= TYPE_PRECISION (TREE_TYPE (args[0]))) ++ return args[0]; ++ if (idx == 0) ++ return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), 0); ++ if (!tree_fits_uhwi_p (args[0])) ++ break; ++ unsigned HOST_WIDE_INT res = tree_to_uhwi (args[0]); ++ res &= ~(HOST_WIDE_INT_M1U << idx); ++ return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res); ++ } ++ break; + +-static bool +-ix86_unordered_fp_compare (enum rtx_code code) +-{ +- if (!TARGET_IEEE_FP) +- return false; ++ case IX86_BUILTIN_PDEP32: ++ case IX86_BUILTIN_PDEP64: ++ gcc_assert (n_args == 2); ++ if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1])) ++ { ++ unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]); ++ unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]); ++ unsigned HOST_WIDE_INT res = 0; ++ unsigned HOST_WIDE_INT m, k = 1; ++ for (m = 1; m; m <<= 1) ++ if ((mask & m) != 0) ++ { ++ if ((src & k) != 0) ++ res |= m; ++ k <<= 1; ++ } ++ return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res); ++ } ++ break; + +- switch (code) +- { +- case GT: +- case GE: +- case LT: +- case LE: +- return false; ++ case IX86_BUILTIN_PEXT32: ++ case IX86_BUILTIN_PEXT64: ++ gcc_assert (n_args == 2); ++ if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1])) ++ { ++ unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]); ++ unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]); ++ unsigned HOST_WIDE_INT res = 0; ++ unsigned HOST_WIDE_INT m, k = 1; ++ for (m = 1; m; m <<= 1) ++ if ((mask & m) != 0) ++ { ++ if ((src & m) != 0) ++ res |= k; ++ k <<= 1; ++ } ++ return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res); ++ } ++ break; + +- case EQ: +- case NE: ++ case IX86_BUILTIN_MOVMSKPS: ++ case IX86_BUILTIN_PMOVMSKB: ++ case IX86_BUILTIN_MOVMSKPD: ++ case IX86_BUILTIN_PMOVMSKB128: ++ case IX86_BUILTIN_MOVMSKPD256: ++ case IX86_BUILTIN_MOVMSKPS256: ++ case IX86_BUILTIN_PMOVMSKB256: ++ gcc_assert (n_args == 1); ++ if (TREE_CODE (args[0]) == VECTOR_CST) ++ { ++ HOST_WIDE_INT res = 0; ++ for (unsigned i = 0; i < VECTOR_CST_NELTS (args[0]); ++i) ++ { ++ tree e = VECTOR_CST_ELT (args[0], i); ++ if (TREE_CODE (e) == INTEGER_CST && !TREE_OVERFLOW (e)) ++ { ++ if (wi::neg_p (wi::to_wide (e))) ++ res |= HOST_WIDE_INT_1 << i; ++ } ++ else if (TREE_CODE (e) == REAL_CST && !TREE_OVERFLOW (e)) ++ { ++ if (TREE_REAL_CST (e).sign) ++ res |= HOST_WIDE_INT_1 << i; ++ } ++ else ++ return NULL_TREE; ++ } ++ return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), res); ++ } ++ break; + +- case LTGT: +- case UNORDERED: +- case ORDERED: +- case UNLT: +- case UNLE: +- case UNGT: +- case UNGE: +- case UNEQ: +- return true; +- +- default: +- gcc_unreachable (); +- } +-} +- +-machine_mode +-ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1) +-{ +- machine_mode mode = GET_MODE (op0); +- +- if (SCALAR_FLOAT_MODE_P (mode)) +- { +- gcc_assert (!DECIMAL_FLOAT_MODE_P (mode)); +- return CCFPmode; +- } +- +- switch (code) +- { +- /* Only zero flag is needed. */ +- case EQ: /* ZF=0 */ +- case NE: /* ZF!=0 */ +- return CCZmode; +- /* Codes needing carry flag. */ +- case GEU: /* CF=0 */ +- case LTU: /* CF=1 */ +- /* Detect overflow checks. They need just the carry flag. */ +- if (GET_CODE (op0) == PLUS +- && (rtx_equal_p (op1, XEXP (op0, 0)) +- || rtx_equal_p (op1, XEXP (op0, 1)))) +- return CCCmode; +- else +- return CCmode; +- case GTU: /* CF=0 & ZF=0 */ +- case LEU: /* CF=1 | ZF=1 */ +- return CCmode; +- /* Codes possibly doable only with sign flag when +- comparing against zero. */ +- case GE: /* SF=OF or SF=0 */ +- case LT: /* SF<>OF or SF=1 */ +- if (op1 == const0_rtx) +- return CCGOCmode; +- else +- /* For other cases Carry flag is not required. */ +- return CCGCmode; +- /* Codes doable only with sign flag when comparing +- against zero, but we miss jump instruction for it +- so we need to use relational tests against overflow +- that thus needs to be zero. */ +- case GT: /* ZF=0 & SF=OF */ +- case LE: /* ZF=1 | SF<>OF */ +- if (op1 == const0_rtx) +- return CCNOmode; +- else +- return CCGCmode; +- /* strcmp pattern do (use flags) and combine may ask us for proper +- mode. */ +- case USE: +- return CCmode; +- default: +- gcc_unreachable (); +- } +-} +- +-/* Return the fixed registers used for condition codes. */ +- +-static bool +-ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2) +-{ +- *p1 = FLAGS_REG; +- *p2 = INVALID_REGNUM; +- return true; +-} +- +-/* If two condition code modes are compatible, return a condition code +- mode which is compatible with both. Otherwise, return +- VOIDmode. */ +- +-static machine_mode +-ix86_cc_modes_compatible (machine_mode m1, machine_mode m2) +-{ +- if (m1 == m2) +- return m1; +- +- if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC) +- return VOIDmode; +- +- if ((m1 == CCGCmode && m2 == CCGOCmode) +- || (m1 == CCGOCmode && m2 == CCGCmode)) +- return CCGCmode; +- +- if ((m1 == CCNOmode && m2 == CCGOCmode) +- || (m1 == CCGOCmode && m2 == CCNOmode)) +- return CCNOmode; +- +- if (m1 == CCZmode +- && (m2 == CCGCmode || m2 == CCGOCmode || m2 == CCNOmode)) +- return m2; +- else if (m2 == CCZmode +- && (m1 == CCGCmode || m1 == CCGOCmode || m1 == CCNOmode)) +- return m1; +- +- switch (m1) +- { +- default: +- gcc_unreachable (); +- +- case E_CCmode: +- case E_CCGCmode: +- case E_CCGOCmode: +- case E_CCNOmode: +- case E_CCAmode: +- case E_CCCmode: +- case E_CCOmode: +- case E_CCPmode: +- case E_CCSmode: +- case E_CCZmode: +- switch (m2) +- { +- default: +- return VOIDmode; +- +- case E_CCmode: +- case E_CCGCmode: +- case E_CCGOCmode: +- case E_CCNOmode: +- case E_CCAmode: +- case E_CCCmode: +- case E_CCOmode: +- case E_CCPmode: +- case E_CCSmode: +- case E_CCZmode: +- return CCmode; +- } +- +- case E_CCFPmode: +- /* These are only compatible with themselves, which we already +- checked above. */ +- return VOIDmode; +- } +-} +- +- +-/* Return a comparison we can do and that it is equivalent to +- swap_condition (code) apart possibly from orderedness. +- But, never change orderedness if TARGET_IEEE_FP, returning +- UNKNOWN in that case if necessary. */ +- +-static enum rtx_code +-ix86_fp_swap_condition (enum rtx_code code) +-{ +- switch (code) +- { +- case GT: /* GTU - CF=0 & ZF=0 */ +- return TARGET_IEEE_FP ? UNKNOWN : UNLT; +- case GE: /* GEU - CF=0 */ +- return TARGET_IEEE_FP ? UNKNOWN : UNLE; +- case UNLT: /* LTU - CF=1 */ +- return TARGET_IEEE_FP ? UNKNOWN : GT; +- case UNLE: /* LEU - CF=1 | ZF=1 */ +- return TARGET_IEEE_FP ? UNKNOWN : GE; +- default: +- return swap_condition (code); +- } +-} +- +-/* Return cost of comparison CODE using the best strategy for performance. +- All following functions do use number of instructions as a cost metrics. +- In future this should be tweaked to compute bytes for optimize_size and +- take into account performance of various instructions on various CPUs. */ +- +-static int +-ix86_fp_comparison_cost (enum rtx_code code) +-{ +- int arith_cost; +- +- /* The cost of code using bit-twiddling on %ah. */ +- switch (code) +- { +- case UNLE: +- case UNLT: +- case LTGT: +- case GT: +- case GE: +- case UNORDERED: +- case ORDERED: +- case UNEQ: +- arith_cost = 4; +- break; +- case LT: +- case NE: +- case EQ: +- case UNGE: +- arith_cost = TARGET_IEEE_FP ? 5 : 4; +- break; +- case LE: +- case UNGT: +- arith_cost = TARGET_IEEE_FP ? 6 : 4; +- break; +- default: +- gcc_unreachable (); +- } +- +- switch (ix86_fp_comparison_strategy (code)) +- { +- case IX86_FPCMP_COMI: +- return arith_cost > 4 ? 3 : 2; +- case IX86_FPCMP_SAHF: +- return arith_cost > 4 ? 4 : 3; +- default: +- return arith_cost; +- } +-} +- +-/* Return strategy to use for floating-point. We assume that fcomi is always +- preferrable where available, since that is also true when looking at size +- (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */ +- +-enum ix86_fpcmp_strategy +-ix86_fp_comparison_strategy (enum rtx_code) +-{ +- /* Do fcomi/sahf based test when profitable. */ +- +- if (TARGET_CMOVE) +- return IX86_FPCMP_COMI; +- +- if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ())) +- return IX86_FPCMP_SAHF; +- +- return IX86_FPCMP_ARITH; +-} +- +-/* Swap, force into registers, or otherwise massage the two operands +- to a fp comparison. The operands are updated in place; the new +- comparison code is returned. */ +- +-static enum rtx_code +-ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1) +-{ +- bool unordered_compare = ix86_unordered_fp_compare (code); +- rtx op0 = *pop0, op1 = *pop1; +- machine_mode op_mode = GET_MODE (op0); +- bool is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode); +- +- /* All of the unordered compare instructions only work on registers. +- The same is true of the fcomi compare instructions. The XFmode +- compare instructions require registers except when comparing +- against zero or when converting operand 1 from fixed point to +- floating point. */ +- +- if (!is_sse +- && (unordered_compare +- || (op_mode == XFmode +- && ! (standard_80387_constant_p (op0) == 1 +- || standard_80387_constant_p (op1) == 1) +- && GET_CODE (op1) != FLOAT) +- || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI)) +- { +- op0 = force_reg (op_mode, op0); +- op1 = force_reg (op_mode, op1); +- } +- else +- { +- /* %%% We only allow op1 in memory; op0 must be st(0). So swap +- things around if they appear profitable, otherwise force op0 +- into a register. */ +- +- if (standard_80387_constant_p (op0) == 0 +- || (MEM_P (op0) +- && ! (standard_80387_constant_p (op1) == 0 +- || MEM_P (op1)))) +- { +- enum rtx_code new_code = ix86_fp_swap_condition (code); +- if (new_code != UNKNOWN) +- { +- std::swap (op0, op1); +- code = new_code; +- } +- } +- +- if (!REG_P (op0)) +- op0 = force_reg (op_mode, op0); +- +- if (CONSTANT_P (op1)) +- { +- int tmp = standard_80387_constant_p (op1); +- if (tmp == 0) +- op1 = validize_mem (force_const_mem (op_mode, op1)); +- else if (tmp == 1) +- { +- if (TARGET_CMOVE) +- op1 = force_reg (op_mode, op1); +- } +- else +- op1 = force_reg (op_mode, op1); +- } +- } +- +- /* Try to rearrange the comparison to make it cheaper. */ +- if (ix86_fp_comparison_cost (code) +- > ix86_fp_comparison_cost (swap_condition (code)) +- && (REG_P (op1) || can_create_pseudo_p ())) +- { +- std::swap (op0, op1); +- code = swap_condition (code); +- if (!REG_P (op0)) +- op0 = force_reg (op_mode, op0); +- } +- +- *pop0 = op0; +- *pop1 = op1; +- return code; +-} +- +-/* Convert comparison codes we use to represent FP comparison to integer +- code that will result in proper branch. Return UNKNOWN if no such code +- is available. */ +- +-enum rtx_code +-ix86_fp_compare_code_to_integer (enum rtx_code code) +-{ +- switch (code) +- { +- case GT: +- return GTU; +- case GE: +- return GEU; +- case ORDERED: +- case UNORDERED: +- return code; +- case UNEQ: +- return EQ; +- case UNLT: +- return LTU; +- case UNLE: +- return LEU; +- case LTGT: +- return NE; +- default: +- return UNKNOWN; +- } +-} +- +-/* Generate insn patterns to do a floating point compare of OPERANDS. */ +- +-static rtx +-ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1) +-{ +- bool unordered_compare = ix86_unordered_fp_compare (code); +- machine_mode cmp_mode; +- rtx tmp, scratch; +- +- code = ix86_prepare_fp_compare_args (code, &op0, &op1); +- +- tmp = gen_rtx_COMPARE (CCFPmode, op0, op1); +- if (unordered_compare) +- tmp = gen_rtx_UNSPEC (CCFPmode, gen_rtvec (1, tmp), UNSPEC_NOTRAP); +- +- /* Do fcomi/sahf based test when profitable. */ +- switch (ix86_fp_comparison_strategy (code)) +- { +- case IX86_FPCMP_COMI: +- cmp_mode = CCFPmode; +- emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode, FLAGS_REG), tmp)); +- break; +- +- case IX86_FPCMP_SAHF: +- cmp_mode = CCFPmode; +- tmp = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW); +- scratch = gen_reg_rtx (HImode); +- emit_insn (gen_rtx_SET (scratch, tmp)); +- emit_insn (gen_x86_sahf_1 (scratch)); +- break; +- +- case IX86_FPCMP_ARITH: +- cmp_mode = CCNOmode; +- tmp = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW); +- scratch = gen_reg_rtx (HImode); +- emit_insn (gen_rtx_SET (scratch, tmp)); +- +- /* In the unordered case, we have to check C2 for NaN's, which +- doesn't happen to work out to anything nice combination-wise. +- So do some bit twiddling on the value we've got in AH to come +- up with an appropriate set of condition codes. */ +- +- switch (code) +- { +- case GT: +- case UNGT: +- if (code == GT || !TARGET_IEEE_FP) +- { +- emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x45))); +- code = EQ; +- } +- else +- { +- emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45))); +- emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx)); +- emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44))); +- cmp_mode = CCmode; +- code = GEU; +- } +- break; +- case LT: +- case UNLT: +- if (code == LT && TARGET_IEEE_FP) +- { +- emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45))); +- emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx)); +- cmp_mode = CCmode; +- code = EQ; +- } +- else +- { +- emit_insn (gen_testqi_ext_1_ccno (scratch, const1_rtx)); +- code = NE; +- } +- break; +- case GE: +- case UNGE: +- if (code == GE || !TARGET_IEEE_FP) +- { +- emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x05))); +- code = EQ; +- } +- else +- { +- emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45))); +- emit_insn (gen_xorqi_ext_1_cc (scratch, scratch, const1_rtx)); +- code = NE; +- } +- break; +- case LE: +- case UNLE: +- if (code == LE && TARGET_IEEE_FP) +- { +- emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45))); +- emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx)); +- emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40))); +- cmp_mode = CCmode; +- code = LTU; +- } +- else +- { +- emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x45))); +- code = NE; +- } +- break; +- case EQ: +- case UNEQ: +- if (code == EQ && TARGET_IEEE_FP) +- { +- emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45))); +- emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40))); +- cmp_mode = CCmode; +- code = EQ; +- } +- else +- { +- emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x40))); +- code = NE; +- } +- break; +- case NE: +- case LTGT: +- if (code == NE && TARGET_IEEE_FP) +- { +- emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45))); +- emit_insn (gen_xorqi_ext_1_cc (scratch, scratch, +- GEN_INT (0x40))); +- code = NE; +- } +- else +- { +- emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x40))); +- code = EQ; +- } +- break; +- +- case UNORDERED: +- emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x04))); +- code = NE; +- break; +- case ORDERED: +- emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x04))); +- code = EQ; +- break; +- +- default: +- gcc_unreachable (); +- } +- break; +- +- default: +- gcc_unreachable(); +- } +- +- /* Return the test that should be put into the flags user, i.e. +- the bcc, scc, or cmov instruction. */ +- return gen_rtx_fmt_ee (code, VOIDmode, +- gen_rtx_REG (cmp_mode, FLAGS_REG), +- const0_rtx); +-} +- +-static rtx +-ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1) +-{ +- rtx ret; +- +- if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC) +- ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1); +- +- else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0))) +- { +- gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0))); +- ret = ix86_expand_fp_compare (code, op0, op1); +- } +- else +- ret = ix86_expand_int_compare (code, op0, op1); +- +- return ret; +-} +- +-void +-ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label) +-{ +- machine_mode mode = GET_MODE (op0); +- rtx tmp; +- +- /* Handle special case - vector comparsion with boolean result, transform +- it using ptest instruction. */ +- if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) +- { +- rtx flag = gen_rtx_REG (CCZmode, FLAGS_REG); +- machine_mode p_mode = GET_MODE_SIZE (mode) == 32 ? V4DImode : V2DImode; +- +- gcc_assert (code == EQ || code == NE); +- /* Generate XOR since we can't check that one operand is zero vector. */ +- tmp = gen_reg_rtx (mode); +- emit_insn (gen_rtx_SET (tmp, gen_rtx_XOR (mode, op0, op1))); +- tmp = gen_lowpart (p_mode, tmp); +- emit_insn (gen_rtx_SET (gen_rtx_REG (CCmode, FLAGS_REG), +- gen_rtx_UNSPEC (CCmode, +- gen_rtvec (2, tmp, tmp), +- UNSPEC_PTEST))); +- tmp = gen_rtx_fmt_ee (code, VOIDmode, flag, const0_rtx); +- tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, +- gen_rtx_LABEL_REF (VOIDmode, label), +- pc_rtx); +- emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); +- return; +- } +- +- switch (mode) +- { +- case E_SFmode: +- case E_DFmode: +- case E_XFmode: +- case E_QImode: +- case E_HImode: +- case E_SImode: +- simple: +- tmp = ix86_expand_compare (code, op0, op1); +- tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, +- gen_rtx_LABEL_REF (VOIDmode, label), +- pc_rtx); +- emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); +- return; +- +- case E_DImode: +- if (TARGET_64BIT) +- goto simple; +- /* For 32-bit target DI comparison may be performed on +- SSE registers. To allow this we should avoid split +- to SI mode which is achieved by doing xor in DI mode +- and then comparing with zero (which is recognized by +- STV pass). We don't compare using xor when optimizing +- for size. */ +- if (!optimize_insn_for_size_p () +- && TARGET_STV +- && (code == EQ || code == NE)) +- { +- op0 = force_reg (mode, gen_rtx_XOR (mode, op0, op1)); +- op1 = const0_rtx; +- } +- /* FALLTHRU */ +- case E_TImode: +- /* Expand DImode branch into multiple compare+branch. */ +- { +- rtx lo[2], hi[2]; +- rtx_code_label *label2; +- enum rtx_code code1, code2, code3; +- machine_mode submode; +- +- if (CONSTANT_P (op0) && !CONSTANT_P (op1)) +- { +- std::swap (op0, op1); +- code = swap_condition (code); +- } +- +- split_double_mode (mode, &op0, 1, lo+0, hi+0); +- split_double_mode (mode, &op1, 1, lo+1, hi+1); +- +- submode = mode == DImode ? SImode : DImode; +- +- /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to +- avoid two branches. This costs one extra insn, so disable when +- optimizing for size. */ +- +- if ((code == EQ || code == NE) +- && (!optimize_insn_for_size_p () +- || hi[1] == const0_rtx || lo[1] == const0_rtx)) +- { +- rtx xor0, xor1; +- +- xor1 = hi[0]; +- if (hi[1] != const0_rtx) +- xor1 = expand_binop (submode, xor_optab, xor1, hi[1], +- NULL_RTX, 0, OPTAB_WIDEN); +- +- xor0 = lo[0]; +- if (lo[1] != const0_rtx) +- xor0 = expand_binop (submode, xor_optab, xor0, lo[1], +- NULL_RTX, 0, OPTAB_WIDEN); +- +- tmp = expand_binop (submode, ior_optab, xor1, xor0, +- NULL_RTX, 0, OPTAB_WIDEN); +- +- ix86_expand_branch (code, tmp, const0_rtx, label); +- return; +- } +- +- /* Otherwise, if we are doing less-than or greater-or-equal-than, +- op1 is a constant and the low word is zero, then we can just +- examine the high word. Similarly for low word -1 and +- less-or-equal-than or greater-than. */ +- +- if (CONST_INT_P (hi[1])) +- switch (code) +- { +- case LT: case LTU: case GE: case GEU: +- if (lo[1] == const0_rtx) +- { +- ix86_expand_branch (code, hi[0], hi[1], label); +- return; +- } +- break; +- case LE: case LEU: case GT: case GTU: +- if (lo[1] == constm1_rtx) +- { +- ix86_expand_branch (code, hi[0], hi[1], label); +- return; +- } +- break; +- default: +- break; +- } +- +- /* Emulate comparisons that do not depend on Zero flag with +- double-word subtraction. Note that only Overflow, Sign +- and Carry flags are valid, so swap arguments and condition +- of comparisons that would otherwise test Zero flag. */ +- +- switch (code) +- { +- case LE: case LEU: case GT: case GTU: +- std::swap (lo[0], lo[1]); +- std::swap (hi[0], hi[1]); +- code = swap_condition (code); +- /* FALLTHRU */ +- +- case LT: case LTU: case GE: case GEU: +- { +- rtx (*cmp_insn) (rtx, rtx); +- rtx (*sbb_insn) (rtx, rtx, rtx); +- bool uns = (code == LTU || code == GEU); +- +- if (TARGET_64BIT) +- { +- cmp_insn = gen_cmpdi_1; +- sbb_insn +- = uns ? gen_subdi3_carry_ccc : gen_subdi3_carry_ccgz; +- } +- else +- { +- cmp_insn = gen_cmpsi_1; +- sbb_insn +- = uns ? gen_subsi3_carry_ccc : gen_subsi3_carry_ccgz; +- } +- +- if (!nonimmediate_operand (lo[0], submode)) +- lo[0] = force_reg (submode, lo[0]); +- if (!x86_64_general_operand (lo[1], submode)) +- lo[1] = force_reg (submode, lo[1]); +- +- if (!register_operand (hi[0], submode)) +- hi[0] = force_reg (submode, hi[0]); +- if ((uns && !nonimmediate_operand (hi[1], submode)) +- || (!uns && !x86_64_general_operand (hi[1], submode))) +- hi[1] = force_reg (submode, hi[1]); +- +- emit_insn (cmp_insn (lo[0], lo[1])); +- emit_insn (sbb_insn (gen_rtx_SCRATCH (submode), hi[0], hi[1])); +- +- tmp = gen_rtx_REG (uns ? CCCmode : CCGZmode, FLAGS_REG); +- +- ix86_expand_branch (code, tmp, const0_rtx, label); +- return; +- } +- +- default: +- break; +- } +- +- /* Otherwise, we need two or three jumps. */ +- +- label2 = gen_label_rtx (); +- +- code1 = code; +- code2 = swap_condition (code); +- code3 = unsigned_condition (code); +- +- switch (code) +- { +- case LT: case GT: case LTU: case GTU: +- break; +- +- case LE: code1 = LT; code2 = GT; break; +- case GE: code1 = GT; code2 = LT; break; +- case LEU: code1 = LTU; code2 = GTU; break; +- case GEU: code1 = GTU; code2 = LTU; break; +- +- case EQ: code1 = UNKNOWN; code2 = NE; break; +- case NE: code2 = UNKNOWN; break; +- +- default: +- gcc_unreachable (); +- } +- +- /* +- * a < b => +- * if (hi(a) < hi(b)) goto true; +- * if (hi(a) > hi(b)) goto false; +- * if (lo(a) < lo(b)) goto true; +- * false: +- */ +- +- if (code1 != UNKNOWN) +- ix86_expand_branch (code1, hi[0], hi[1], label); +- if (code2 != UNKNOWN) +- ix86_expand_branch (code2, hi[0], hi[1], label2); +- +- ix86_expand_branch (code3, lo[0], lo[1], label); +- +- if (code2 != UNKNOWN) +- emit_label (label2); +- return; +- } +- +- default: +- gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC); +- goto simple; +- } +-} +- +-void +-ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1) +-{ +- rtx ret; +- +- gcc_assert (GET_MODE (dest) == QImode); +- +- ret = ix86_expand_compare (code, op0, op1); +- PUT_MODE (ret, QImode); +- emit_insn (gen_rtx_SET (dest, ret)); +-} +- +-/* Expand comparison setting or clearing carry flag. Return true when +- successful and set pop for the operation. */ +-static bool +-ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop) +-{ +- machine_mode mode +- = GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1); +- +- /* Do not handle double-mode compares that go through special path. */ +- if (mode == (TARGET_64BIT ? TImode : DImode)) +- return false; +- +- if (SCALAR_FLOAT_MODE_P (mode)) +- { +- rtx compare_op; +- rtx_insn *compare_seq; +- +- gcc_assert (!DECIMAL_FLOAT_MODE_P (mode)); +- +- /* Shortcut: following common codes never translate +- into carry flag compares. */ +- if (code == EQ || code == NE || code == UNEQ || code == LTGT +- || code == ORDERED || code == UNORDERED) +- return false; +- +- /* These comparisons require zero flag; swap operands so they won't. */ +- if ((code == GT || code == UNLE || code == LE || code == UNGT) +- && !TARGET_IEEE_FP) +- { +- std::swap (op0, op1); +- code = swap_condition (code); +- } +- +- /* Try to expand the comparison and verify that we end up with +- carry flag based comparison. This fails to be true only when +- we decide to expand comparison using arithmetic that is not +- too common scenario. */ +- start_sequence (); +- compare_op = ix86_expand_fp_compare (code, op0, op1); +- compare_seq = get_insns (); +- end_sequence (); +- +- if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode) +- code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op)); +- else +- code = GET_CODE (compare_op); +- +- if (code != LTU && code != GEU) +- return false; +- +- emit_insn (compare_seq); +- *pop = compare_op; +- return true; +- } +- +- if (!INTEGRAL_MODE_P (mode)) +- return false; +- +- switch (code) +- { +- case LTU: +- case GEU: +- break; +- +- /* Convert a==0 into (unsigned)a<1. */ +- case EQ: +- case NE: +- if (op1 != const0_rtx) +- return false; +- op1 = const1_rtx; +- code = (code == EQ ? LTU : GEU); +- break; +- +- /* Convert a>b into b=b-1. */ +- case GTU: +- case LEU: +- if (CONST_INT_P (op1)) +- { +- op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0)); +- /* Bail out on overflow. We still can swap operands but that +- would force loading of the constant into register. */ +- if (op1 == const0_rtx +- || !x86_64_immediate_operand (op1, GET_MODE (op1))) +- return false; +- code = (code == GTU ? GEU : LTU); +- } +- else +- { +- std::swap (op0, op1); +- code = (code == GTU ? LTU : GEU); +- } +- break; +- +- /* Convert a>=0 into (unsigned)a<0x80000000. */ +- case LT: +- case GE: +- if (mode == DImode || op1 != const0_rtx) +- return false; +- op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode); +- code = (code == LT ? GEU : LTU); +- break; +- case LE: +- case GT: +- if (mode == DImode || op1 != constm1_rtx) +- return false; +- op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode); +- code = (code == LE ? GEU : LTU); +- break; +- +- default: +- return false; +- } +- /* Swapping operands may cause constant to appear as first operand. */ +- if (!nonimmediate_operand (op0, VOIDmode)) +- { +- if (!can_create_pseudo_p ()) +- return false; +- op0 = force_reg (mode, op0); +- } +- *pop = ix86_expand_compare (code, op0, op1); +- gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU); +- return true; +-} +- +-bool +-ix86_expand_int_movcc (rtx operands[]) +-{ +- enum rtx_code code = GET_CODE (operands[1]), compare_code; +- rtx_insn *compare_seq; +- rtx compare_op; +- machine_mode mode = GET_MODE (operands[0]); +- bool sign_bit_compare_p = false; +- rtx op0 = XEXP (operands[1], 0); +- rtx op1 = XEXP (operands[1], 1); +- +- if (GET_MODE (op0) == TImode +- || (GET_MODE (op0) == DImode +- && !TARGET_64BIT)) +- return false; +- +- start_sequence (); +- compare_op = ix86_expand_compare (code, op0, op1); +- compare_seq = get_insns (); +- end_sequence (); +- +- compare_code = GET_CODE (compare_op); +- +- if ((op1 == const0_rtx && (code == GE || code == LT)) +- || (op1 == constm1_rtx && (code == GT || code == LE))) +- sign_bit_compare_p = true; +- +- /* Don't attempt mode expansion here -- if we had to expand 5 or 6 +- HImode insns, we'd be swallowed in word prefix ops. */ +- +- if ((mode != HImode || TARGET_FAST_PREFIX) +- && (mode != (TARGET_64BIT ? TImode : DImode)) +- && CONST_INT_P (operands[2]) +- && CONST_INT_P (operands[3])) +- { +- rtx out = operands[0]; +- HOST_WIDE_INT ct = INTVAL (operands[2]); +- HOST_WIDE_INT cf = INTVAL (operands[3]); +- HOST_WIDE_INT diff; +- +- diff = ct - cf; +- /* Sign bit compares are better done using shifts than we do by using +- sbb. */ +- if (sign_bit_compare_p +- || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op)) +- { +- /* Detect overlap between destination and compare sources. */ +- rtx tmp = out; +- +- if (!sign_bit_compare_p) +- { +- rtx flags; +- bool fpcmp = false; +- +- compare_code = GET_CODE (compare_op); +- +- flags = XEXP (compare_op, 0); +- +- if (GET_MODE (flags) == CCFPmode) +- { +- fpcmp = true; +- compare_code +- = ix86_fp_compare_code_to_integer (compare_code); +- } +- +- /* To simplify rest of code, restrict to the GEU case. */ +- if (compare_code == LTU) +- { +- std::swap (ct, cf); +- compare_code = reverse_condition (compare_code); +- code = reverse_condition (code); +- } +- else +- { +- if (fpcmp) +- PUT_CODE (compare_op, +- reverse_condition_maybe_unordered +- (GET_CODE (compare_op))); +- else +- PUT_CODE (compare_op, +- reverse_condition (GET_CODE (compare_op))); +- } +- diff = ct - cf; +- +- if (reg_overlap_mentioned_p (out, op0) +- || reg_overlap_mentioned_p (out, op1)) +- tmp = gen_reg_rtx (mode); +- +- if (mode == DImode) +- emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op)); +- else +- emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), +- flags, compare_op)); +- } +- else +- { +- if (code == GT || code == GE) +- code = reverse_condition (code); +- else +- { +- std::swap (ct, cf); +- diff = ct - cf; +- } +- tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1); +- } +- +- if (diff == 1) +- { +- /* +- * cmpl op0,op1 +- * sbbl dest,dest +- * [addl dest, ct] +- * +- * Size 5 - 8. +- */ +- if (ct) +- tmp = expand_simple_binop (mode, PLUS, +- tmp, GEN_INT (ct), +- copy_rtx (tmp), 1, OPTAB_DIRECT); +- } +- else if (cf == -1) +- { +- /* +- * cmpl op0,op1 +- * sbbl dest,dest +- * orl $ct, dest +- * +- * Size 8. +- */ +- tmp = expand_simple_binop (mode, IOR, +- tmp, GEN_INT (ct), +- copy_rtx (tmp), 1, OPTAB_DIRECT); +- } +- else if (diff == -1 && ct) +- { +- /* +- * cmpl op0,op1 +- * sbbl dest,dest +- * notl dest +- * [addl dest, cf] +- * +- * Size 8 - 11. +- */ +- tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1); +- if (cf) +- tmp = expand_simple_binop (mode, PLUS, +- copy_rtx (tmp), GEN_INT (cf), +- copy_rtx (tmp), 1, OPTAB_DIRECT); +- } +- else +- { +- /* +- * cmpl op0,op1 +- * sbbl dest,dest +- * [notl dest] +- * andl cf - ct, dest +- * [addl dest, ct] +- * +- * Size 8 - 11. +- */ +- +- if (cf == 0) +- { +- cf = ct; +- ct = 0; +- tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1); +- } +- +- tmp = expand_simple_binop (mode, AND, +- copy_rtx (tmp), +- gen_int_mode (cf - ct, mode), +- copy_rtx (tmp), 1, OPTAB_DIRECT); +- if (ct) +- tmp = expand_simple_binop (mode, PLUS, +- copy_rtx (tmp), GEN_INT (ct), +- copy_rtx (tmp), 1, OPTAB_DIRECT); +- } +- +- if (!rtx_equal_p (tmp, out)) +- emit_move_insn (copy_rtx (out), copy_rtx (tmp)); +- +- return true; +- } +- +- if (diff < 0) +- { +- machine_mode cmp_mode = GET_MODE (op0); +- enum rtx_code new_code; +- +- if (SCALAR_FLOAT_MODE_P (cmp_mode)) +- { +- gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode)); +- +- /* We may be reversing unordered compare to normal compare, that +- is not valid in general (we may convert non-trapping condition +- to trapping one), however on i386 we currently emit all +- comparisons unordered. */ +- new_code = reverse_condition_maybe_unordered (code); +- } +- else +- new_code = ix86_reverse_condition (code, cmp_mode); +- if (new_code != UNKNOWN) +- { +- std::swap (ct, cf); +- diff = -diff; +- code = new_code; +- } +- } +- +- compare_code = UNKNOWN; +- if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT +- && CONST_INT_P (op1)) +- { +- if (op1 == const0_rtx +- && (code == LT || code == GE)) +- compare_code = code; +- else if (op1 == constm1_rtx) +- { +- if (code == LE) +- compare_code = LT; +- else if (code == GT) +- compare_code = GE; +- } +- } +- +- /* Optimize dest = (op0 < 0) ? -1 : cf. */ +- if (compare_code != UNKNOWN +- && GET_MODE (op0) == GET_MODE (out) +- && (cf == -1 || ct == -1)) +- { +- /* If lea code below could be used, only optimize +- if it results in a 2 insn sequence. */ +- +- if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8 +- || diff == 3 || diff == 5 || diff == 9) +- || (compare_code == LT && ct == -1) +- || (compare_code == GE && cf == -1)) +- { +- /* +- * notl op1 (if necessary) +- * sarl $31, op1 +- * orl cf, op1 +- */ +- if (ct != -1) +- { +- cf = ct; +- ct = -1; +- code = reverse_condition (code); +- } +- +- out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1); +- +- out = expand_simple_binop (mode, IOR, +- out, GEN_INT (cf), +- out, 1, OPTAB_DIRECT); +- if (out != operands[0]) +- emit_move_insn (operands[0], out); +- +- return true; +- } +- } +- +- +- if ((diff == 1 || diff == 2 || diff == 4 || diff == 8 +- || diff == 3 || diff == 5 || diff == 9) +- && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL) +- && (mode != DImode +- || x86_64_immediate_operand (GEN_INT (cf), VOIDmode))) +- { +- /* +- * xorl dest,dest +- * cmpl op1,op2 +- * setcc dest +- * lea cf(dest*(ct-cf)),dest +- * +- * Size 14. +- * +- * This also catches the degenerate setcc-only case. +- */ +- +- rtx tmp; +- int nops; +- +- out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1); +- +- nops = 0; +- /* On x86_64 the lea instruction operates on Pmode, so we need +- to get arithmetics done in proper mode to match. */ +- if (diff == 1) +- tmp = copy_rtx (out); +- else +- { +- rtx out1; +- out1 = copy_rtx (out); +- tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1)); +- nops++; +- if (diff & 1) +- { +- tmp = gen_rtx_PLUS (mode, tmp, out1); +- nops++; +- } +- } +- if (cf != 0) +- { +- tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf)); +- nops++; +- } +- if (!rtx_equal_p (tmp, out)) +- { +- if (nops == 1) +- out = force_operand (tmp, copy_rtx (out)); +- else +- emit_insn (gen_rtx_SET (copy_rtx (out), copy_rtx (tmp))); +- } +- if (!rtx_equal_p (out, operands[0])) +- emit_move_insn (operands[0], copy_rtx (out)); +- +- return true; +- } +- +- /* +- * General case: Jumpful: +- * xorl dest,dest cmpl op1, op2 +- * cmpl op1, op2 movl ct, dest +- * setcc dest jcc 1f +- * decl dest movl cf, dest +- * andl (cf-ct),dest 1: +- * addl ct,dest +- * +- * Size 20. Size 14. +- * +- * This is reasonably steep, but branch mispredict costs are +- * high on modern cpus, so consider failing only if optimizing +- * for space. +- */ +- +- if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL)) +- && BRANCH_COST (optimize_insn_for_speed_p (), +- false) >= 2) +- { +- if (cf == 0) +- { +- machine_mode cmp_mode = GET_MODE (op0); +- enum rtx_code new_code; +- +- if (SCALAR_FLOAT_MODE_P (cmp_mode)) +- { +- gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode)); +- +- /* We may be reversing unordered compare to normal compare, +- that is not valid in general (we may convert non-trapping +- condition to trapping one), however on i386 we currently +- emit all comparisons unordered. */ +- new_code = reverse_condition_maybe_unordered (code); +- } +- else +- { +- new_code = ix86_reverse_condition (code, cmp_mode); +- if (compare_code != UNKNOWN && new_code != UNKNOWN) +- compare_code = reverse_condition (compare_code); +- } +- +- if (new_code != UNKNOWN) +- { +- cf = ct; +- ct = 0; +- code = new_code; +- } +- } +- +- if (compare_code != UNKNOWN) +- { +- /* notl op1 (if needed) +- sarl $31, op1 +- andl (cf-ct), op1 +- addl ct, op1 +- +- For x < 0 (resp. x <= -1) there will be no notl, +- so if possible swap the constants to get rid of the +- complement. +- True/false will be -1/0 while code below (store flag +- followed by decrement) is 0/-1, so the constants need +- to be exchanged once more. */ +- +- if (compare_code == GE || !cf) +- { +- code = reverse_condition (code); +- compare_code = LT; +- } +- else +- std::swap (ct, cf); +- +- out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1); +- } +- else +- { +- out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1); +- +- out = expand_simple_binop (mode, PLUS, copy_rtx (out), +- constm1_rtx, +- copy_rtx (out), 1, OPTAB_DIRECT); +- } +- +- out = expand_simple_binop (mode, AND, copy_rtx (out), +- gen_int_mode (cf - ct, mode), +- copy_rtx (out), 1, OPTAB_DIRECT); +- if (ct) +- out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct), +- copy_rtx (out), 1, OPTAB_DIRECT); +- if (!rtx_equal_p (out, operands[0])) +- emit_move_insn (operands[0], copy_rtx (out)); +- +- return true; +- } +- } +- +- if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL)) +- { +- /* Try a few things more with specific constants and a variable. */ +- +- optab op; +- rtx var, orig_out, out, tmp; +- +- if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2) +- return false; +- +- /* If one of the two operands is an interesting constant, load a +- constant with the above and mask it in with a logical operation. */ +- +- if (CONST_INT_P (operands[2])) +- { +- var = operands[3]; +- if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx) +- operands[3] = constm1_rtx, op = and_optab; +- else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx) +- operands[3] = const0_rtx, op = ior_optab; +- else +- return false; +- } +- else if (CONST_INT_P (operands[3])) +- { +- var = operands[2]; +- if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx) +- operands[2] = constm1_rtx, op = and_optab; +- else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx) +- operands[2] = const0_rtx, op = ior_optab; +- else +- return false; +- } +- else +- return false; +- +- orig_out = operands[0]; +- tmp = gen_reg_rtx (mode); +- operands[0] = tmp; +- +- /* Recurse to get the constant loaded. */ +- if (!ix86_expand_int_movcc (operands)) +- return false; +- +- /* Mask in the interesting variable. */ +- out = expand_binop (mode, op, var, tmp, orig_out, 0, +- OPTAB_WIDEN); +- if (!rtx_equal_p (out, orig_out)) +- emit_move_insn (copy_rtx (orig_out), copy_rtx (out)); +- +- return true; +- } +- +- /* +- * For comparison with above, +- * +- * movl cf,dest +- * movl ct,tmp +- * cmpl op1,op2 +- * cmovcc tmp,dest +- * +- * Size 15. +- */ +- +- if (! nonimmediate_operand (operands[2], mode)) +- operands[2] = force_reg (mode, operands[2]); +- if (! nonimmediate_operand (operands[3], mode)) +- operands[3] = force_reg (mode, operands[3]); +- +- if (! register_operand (operands[2], VOIDmode) +- && (mode == QImode +- || ! register_operand (operands[3], VOIDmode))) +- operands[2] = force_reg (mode, operands[2]); +- +- if (mode == QImode +- && ! register_operand (operands[3], VOIDmode)) +- operands[3] = force_reg (mode, operands[3]); +- +- emit_insn (compare_seq); +- emit_insn (gen_rtx_SET (operands[0], +- gen_rtx_IF_THEN_ELSE (mode, +- compare_op, operands[2], +- operands[3]))); +- return true; +-} +- +-/* Swap, force into registers, or otherwise massage the two operands +- to an sse comparison with a mask result. Thus we differ a bit from +- ix86_prepare_fp_compare_args which expects to produce a flags result. +- +- The DEST operand exists to help determine whether to commute commutative +- operators. The POP0/POP1 operands are updated in place. The new +- comparison code is returned, or UNKNOWN if not implementable. */ +- +-static enum rtx_code +-ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code, +- rtx *pop0, rtx *pop1) +-{ +- switch (code) +- { +- case LTGT: +- case UNEQ: +- /* AVX supports all the needed comparisons. */ +- if (TARGET_AVX) +- break; +- /* We have no LTGT as an operator. We could implement it with +- NE & ORDERED, but this requires an extra temporary. It's +- not clear that it's worth it. */ +- return UNKNOWN; +- +- case LT: +- case LE: +- case UNGT: +- case UNGE: +- /* These are supported directly. */ +- break; +- +- case EQ: +- case NE: +- case UNORDERED: +- case ORDERED: +- /* AVX has 3 operand comparisons, no need to swap anything. */ +- if (TARGET_AVX) +- break; +- /* For commutative operators, try to canonicalize the destination +- operand to be first in the comparison - this helps reload to +- avoid extra moves. */ +- if (!dest || !rtx_equal_p (dest, *pop1)) +- break; +- /* FALLTHRU */ +- +- case GE: +- case GT: +- case UNLE: +- case UNLT: +- /* These are not supported directly before AVX, and furthermore +- ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the +- comparison operands to transform into something that is +- supported. */ +- std::swap (*pop0, *pop1); +- code = swap_condition (code); +- break; +- +- default: +- gcc_unreachable (); +- } +- +- return code; +-} +- +-/* Detect conditional moves that exactly match min/max operational +- semantics. Note that this is IEEE safe, as long as we don't +- interchange the operands. +- +- Returns FALSE if this conditional move doesn't match a MIN/MAX, +- and TRUE if the operation is successful and instructions are emitted. */ +- +-static bool +-ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0, +- rtx cmp_op1, rtx if_true, rtx if_false) +-{ +- machine_mode mode; +- bool is_min; +- rtx tmp; +- +- if (code == LT) +- ; +- else if (code == UNGE) +- std::swap (if_true, if_false); +- else +- return false; +- +- if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false)) +- is_min = true; +- else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false)) +- is_min = false; +- else +- return false; +- +- mode = GET_MODE (dest); +- +- /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here, +- but MODE may be a vector mode and thus not appropriate. */ +- if (!flag_finite_math_only || flag_signed_zeros) +- { +- int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX; +- rtvec v; +- +- if_true = force_reg (mode, if_true); +- v = gen_rtvec (2, if_true, if_false); +- tmp = gen_rtx_UNSPEC (mode, v, u); +- } +- else +- { +- code = is_min ? SMIN : SMAX; +- if (MEM_P (if_true) && MEM_P (if_false)) +- if_true = force_reg (mode, if_true); +- tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false); +- } +- +- emit_insn (gen_rtx_SET (dest, tmp)); +- return true; +-} +- +-/* Expand an SSE comparison. Return the register with the result. */ +- +-static rtx +-ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1, +- rtx op_true, rtx op_false) +-{ +- machine_mode mode = GET_MODE (dest); +- machine_mode cmp_ops_mode = GET_MODE (cmp_op0); +- +- /* In general case result of comparison can differ from operands' type. */ +- machine_mode cmp_mode; +- +- /* In AVX512F the result of comparison is an integer mask. */ +- bool maskcmp = false; +- rtx x; +- +- if (GET_MODE_SIZE (cmp_ops_mode) == 64) +- { +- unsigned int nbits = GET_MODE_NUNITS (cmp_ops_mode); +- cmp_mode = int_mode_for_size (nbits, 0).require (); +- maskcmp = true; +- } +- else +- cmp_mode = cmp_ops_mode; +- +- cmp_op0 = force_reg (cmp_ops_mode, cmp_op0); +- +- int (*op1_predicate)(rtx, machine_mode) +- = VECTOR_MODE_P (cmp_ops_mode) ? vector_operand : nonimmediate_operand; +- +- if (!op1_predicate (cmp_op1, cmp_ops_mode)) +- cmp_op1 = force_reg (cmp_ops_mode, cmp_op1); +- +- if (optimize +- || (maskcmp && cmp_mode != mode) +- || (op_true && reg_overlap_mentioned_p (dest, op_true)) +- || (op_false && reg_overlap_mentioned_p (dest, op_false))) +- dest = gen_reg_rtx (maskcmp ? cmp_mode : mode); +- +- /* Compare patterns for int modes are unspec in AVX512F only. */ +- if (maskcmp && (code == GT || code == EQ)) +- { +- rtx (*gen)(rtx, rtx, rtx); +- +- switch (cmp_ops_mode) +- { +- case E_V64QImode: +- gcc_assert (TARGET_AVX512BW); +- gen = code == GT ? gen_avx512bw_gtv64qi3 : gen_avx512bw_eqv64qi3_1; +- break; +- case E_V32HImode: +- gcc_assert (TARGET_AVX512BW); +- gen = code == GT ? gen_avx512bw_gtv32hi3 : gen_avx512bw_eqv32hi3_1; +- break; +- case E_V16SImode: +- gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1; +- break; +- case E_V8DImode: +- gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1; +- break; +- default: +- gen = NULL; +- } +- +- if (gen) +- { +- emit_insn (gen (dest, cmp_op0, cmp_op1)); +- return dest; +- } +- } +- x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1); +- +- if (cmp_mode != mode && !maskcmp) +- { +- x = force_reg (cmp_ops_mode, x); +- convert_move (dest, x, false); +- } +- else +- emit_insn (gen_rtx_SET (dest, x)); +- +- return dest; +-} +- +-/* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical +- operations. This is used for both scalar and vector conditional moves. */ +- +-void +-ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false) +-{ +- machine_mode mode = GET_MODE (dest); +- machine_mode cmpmode = GET_MODE (cmp); +- +- /* In AVX512F the result of comparison is an integer mask. */ +- bool maskcmp = (mode != cmpmode && TARGET_AVX512F); +- +- rtx t2, t3, x; +- +- /* If we have an integer mask and FP value then we need +- to cast mask to FP mode. */ +- if (mode != cmpmode && VECTOR_MODE_P (cmpmode)) +- { +- cmp = force_reg (cmpmode, cmp); +- cmp = gen_rtx_SUBREG (mode, cmp, 0); +- } +- +- if (maskcmp) +- { +- rtx (*gen) (rtx, rtx) = NULL; +- if ((op_true == CONST0_RTX (mode) +- && vector_all_ones_operand (op_false, mode)) +- || (op_false == CONST0_RTX (mode) +- && vector_all_ones_operand (op_true, mode))) +- switch (mode) +- { +- case E_V64QImode: +- if (TARGET_AVX512BW) +- gen = gen_avx512bw_cvtmask2bv64qi; +- break; +- case E_V32QImode: +- if (TARGET_AVX512VL && TARGET_AVX512BW) +- gen = gen_avx512vl_cvtmask2bv32qi; +- break; +- case E_V16QImode: +- if (TARGET_AVX512VL && TARGET_AVX512BW) +- gen = gen_avx512vl_cvtmask2bv16qi; +- break; +- case E_V32HImode: +- if (TARGET_AVX512BW) +- gen = gen_avx512bw_cvtmask2wv32hi; +- break; +- case E_V16HImode: +- if (TARGET_AVX512VL && TARGET_AVX512BW) +- gen = gen_avx512vl_cvtmask2wv16hi; +- break; +- case E_V8HImode: +- if (TARGET_AVX512VL && TARGET_AVX512BW) +- gen = gen_avx512vl_cvtmask2wv8hi; +- break; +- case E_V16SImode: +- if (TARGET_AVX512DQ) +- gen = gen_avx512f_cvtmask2dv16si; +- break; +- case E_V8SImode: +- if (TARGET_AVX512VL && TARGET_AVX512DQ) +- gen = gen_avx512vl_cvtmask2dv8si; +- break; +- case E_V4SImode: +- if (TARGET_AVX512VL && TARGET_AVX512DQ) +- gen = gen_avx512vl_cvtmask2dv4si; +- break; +- case E_V8DImode: +- if (TARGET_AVX512DQ) +- gen = gen_avx512f_cvtmask2qv8di; +- break; +- case E_V4DImode: +- if (TARGET_AVX512VL && TARGET_AVX512DQ) +- gen = gen_avx512vl_cvtmask2qv4di; +- break; +- case E_V2DImode: +- if (TARGET_AVX512VL && TARGET_AVX512DQ) +- gen = gen_avx512vl_cvtmask2qv2di; +- break; +- default: +- break; +- } +- if (gen && SCALAR_INT_MODE_P (cmpmode)) +- { +- cmp = force_reg (cmpmode, cmp); +- if (op_true == CONST0_RTX (mode)) +- { +- rtx (*gen_not) (rtx, rtx); +- switch (cmpmode) +- { +- case E_QImode: gen_not = gen_knotqi; break; +- case E_HImode: gen_not = gen_knothi; break; +- case E_SImode: gen_not = gen_knotsi; break; +- case E_DImode: gen_not = gen_knotdi; break; +- default: gcc_unreachable (); +- } +- rtx n = gen_reg_rtx (cmpmode); +- emit_insn (gen_not (n, cmp)); +- cmp = n; +- } +- emit_insn (gen (dest, cmp)); +- return; +- } +- } +- else if (vector_all_ones_operand (op_true, mode) +- && op_false == CONST0_RTX (mode)) +- { +- emit_insn (gen_rtx_SET (dest, cmp)); +- return; +- } +- else if (op_false == CONST0_RTX (mode)) +- { +- op_true = force_reg (mode, op_true); +- x = gen_rtx_AND (mode, cmp, op_true); +- emit_insn (gen_rtx_SET (dest, x)); +- return; +- } +- else if (op_true == CONST0_RTX (mode)) +- { +- op_false = force_reg (mode, op_false); +- x = gen_rtx_NOT (mode, cmp); +- x = gen_rtx_AND (mode, x, op_false); +- emit_insn (gen_rtx_SET (dest, x)); +- return; +- } +- else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)) +- { +- op_false = force_reg (mode, op_false); +- x = gen_rtx_IOR (mode, cmp, op_false); +- emit_insn (gen_rtx_SET (dest, x)); +- return; +- } +- else if (TARGET_XOP) +- { +- op_true = force_reg (mode, op_true); +- +- if (!nonimmediate_operand (op_false, mode)) +- op_false = force_reg (mode, op_false); +- +- emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cmp, +- op_true, +- op_false))); +- return; +- } +- +- rtx (*gen) (rtx, rtx, rtx, rtx) = NULL; +- rtx d = dest; +- +- if (!vector_operand (op_true, mode)) +- op_true = force_reg (mode, op_true); +- +- op_false = force_reg (mode, op_false); +- +- switch (mode) +- { +- case E_V4SFmode: +- if (TARGET_SSE4_1) +- gen = gen_sse4_1_blendvps; +- break; +- case E_V2DFmode: +- if (TARGET_SSE4_1) +- gen = gen_sse4_1_blendvpd; +- break; +- case E_SFmode: +- if (TARGET_SSE4_1) +- { +- gen = gen_sse4_1_blendvss; +- op_true = force_reg (mode, op_true); +- } +- break; +- case E_DFmode: +- if (TARGET_SSE4_1) +- { +- gen = gen_sse4_1_blendvsd; +- op_true = force_reg (mode, op_true); +- } +- break; +- case E_V16QImode: +- case E_V8HImode: +- case E_V4SImode: +- case E_V2DImode: +- if (TARGET_SSE4_1) +- { +- gen = gen_sse4_1_pblendvb; +- if (mode != V16QImode) +- d = gen_reg_rtx (V16QImode); +- op_false = gen_lowpart (V16QImode, op_false); +- op_true = gen_lowpart (V16QImode, op_true); +- cmp = gen_lowpart (V16QImode, cmp); +- } +- break; +- case E_V8SFmode: +- if (TARGET_AVX) +- gen = gen_avx_blendvps256; +- break; +- case E_V4DFmode: +- if (TARGET_AVX) +- gen = gen_avx_blendvpd256; +- break; +- case E_V32QImode: +- case E_V16HImode: +- case E_V8SImode: +- case E_V4DImode: +- if (TARGET_AVX2) +- { +- gen = gen_avx2_pblendvb; +- if (mode != V32QImode) +- d = gen_reg_rtx (V32QImode); +- op_false = gen_lowpart (V32QImode, op_false); +- op_true = gen_lowpart (V32QImode, op_true); +- cmp = gen_lowpart (V32QImode, cmp); +- } +- break; +- +- case E_V64QImode: +- gen = gen_avx512bw_blendmv64qi; +- break; +- case E_V32HImode: +- gen = gen_avx512bw_blendmv32hi; +- break; +- case E_V16SImode: +- gen = gen_avx512f_blendmv16si; +- break; +- case E_V8DImode: +- gen = gen_avx512f_blendmv8di; +- break; +- case E_V8DFmode: +- gen = gen_avx512f_blendmv8df; +- break; +- case E_V16SFmode: +- gen = gen_avx512f_blendmv16sf; +- break; +- +- default: +- break; +- } +- +- if (gen != NULL) +- { +- emit_insn (gen (d, op_false, op_true, cmp)); +- if (d != dest) +- emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d)); +- } +- else +- { +- op_true = force_reg (mode, op_true); +- +- t2 = gen_reg_rtx (mode); +- if (optimize) +- t3 = gen_reg_rtx (mode); +- else +- t3 = dest; +- +- x = gen_rtx_AND (mode, op_true, cmp); +- emit_insn (gen_rtx_SET (t2, x)); +- +- x = gen_rtx_NOT (mode, cmp); +- x = gen_rtx_AND (mode, x, op_false); +- emit_insn (gen_rtx_SET (t3, x)); +- +- x = gen_rtx_IOR (mode, t3, t2); +- emit_insn (gen_rtx_SET (dest, x)); +- } +-} +- +-/* Expand a floating-point conditional move. Return true if successful. */ +- +-bool +-ix86_expand_fp_movcc (rtx operands[]) +-{ +- machine_mode mode = GET_MODE (operands[0]); +- enum rtx_code code = GET_CODE (operands[1]); +- rtx tmp, compare_op; +- rtx op0 = XEXP (operands[1], 0); +- rtx op1 = XEXP (operands[1], 1); +- +- if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode)) +- { +- machine_mode cmode; +- +- /* Since we've no cmove for sse registers, don't force bad register +- allocation just to gain access to it. Deny movcc when the +- comparison mode doesn't match the move mode. */ +- cmode = GET_MODE (op0); +- if (cmode == VOIDmode) +- cmode = GET_MODE (op1); +- if (cmode != mode) +- return false; +- +- code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1); +- if (code == UNKNOWN) +- return false; +- +- if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1, +- operands[2], operands[3])) +- return true; +- +- tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1, +- operands[2], operands[3]); +- ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]); +- return true; +- } +- +- if (GET_MODE (op0) == TImode +- || (GET_MODE (op0) == DImode +- && !TARGET_64BIT)) +- return false; +- +- /* The floating point conditional move instructions don't directly +- support conditions resulting from a signed integer comparison. */ +- +- compare_op = ix86_expand_compare (code, op0, op1); +- if (!fcmov_comparison_operator (compare_op, VOIDmode)) +- { +- tmp = gen_reg_rtx (QImode); +- ix86_expand_setcc (tmp, code, op0, op1); +- +- compare_op = ix86_expand_compare (NE, tmp, const0_rtx); +- } +- +- emit_insn (gen_rtx_SET (operands[0], +- gen_rtx_IF_THEN_ELSE (mode, compare_op, +- operands[2], operands[3]))); +- +- return true; +-} +- +-/* Helper for ix86_cmp_code_to_pcmp_immediate for int modes. */ +- +-static int +-ix86_int_cmp_code_to_pcmp_immediate (enum rtx_code code) +-{ +- switch (code) +- { +- case EQ: +- return 0; +- case LT: +- case LTU: +- return 1; +- case LE: +- case LEU: +- return 2; +- case NE: +- return 4; +- case GE: +- case GEU: +- return 5; +- case GT: +- case GTU: +- return 6; +- default: +- gcc_unreachable (); +- } +-} +- +-/* Helper for ix86_cmp_code_to_pcmp_immediate for fp modes. */ +- +-static int +-ix86_fp_cmp_code_to_pcmp_immediate (enum rtx_code code) +-{ +- switch (code) +- { +- case EQ: +- return 0x00; +- case NE: +- return 0x04; +- case GT: +- return 0x0e; +- case LE: +- return 0x02; +- case GE: +- return 0x0d; +- case LT: +- return 0x01; +- case UNLE: +- return 0x0a; +- case UNLT: +- return 0x09; +- case UNGE: +- return 0x05; +- case UNGT: +- return 0x06; +- case UNEQ: +- return 0x18; +- case LTGT: +- return 0x0c; +- case ORDERED: +- return 0x07; +- case UNORDERED: +- return 0x03; +- default: +- gcc_unreachable (); +- } +-} +- +-/* Return immediate value to be used in UNSPEC_PCMP +- for comparison CODE in MODE. */ +- +-static int +-ix86_cmp_code_to_pcmp_immediate (enum rtx_code code, machine_mode mode) +-{ +- if (FLOAT_MODE_P (mode)) +- return ix86_fp_cmp_code_to_pcmp_immediate (code); +- return ix86_int_cmp_code_to_pcmp_immediate (code); +-} +- +-/* Expand AVX-512 vector comparison. */ +- +-bool +-ix86_expand_mask_vec_cmp (rtx operands[]) +-{ +- machine_mode mask_mode = GET_MODE (operands[0]); +- machine_mode cmp_mode = GET_MODE (operands[2]); +- enum rtx_code code = GET_CODE (operands[1]); +- rtx imm = GEN_INT (ix86_cmp_code_to_pcmp_immediate (code, cmp_mode)); +- int unspec_code; +- rtx unspec; +- +- switch (code) +- { +- case LEU: +- case GTU: +- case GEU: +- case LTU: +- unspec_code = UNSPEC_UNSIGNED_PCMP; +- break; +- +- default: +- unspec_code = UNSPEC_PCMP; +- } +- +- unspec = gen_rtx_UNSPEC (mask_mode, gen_rtvec (3, operands[2], +- operands[3], imm), +- unspec_code); +- emit_insn (gen_rtx_SET (operands[0], unspec)); +- +- return true; +-} +- +-/* Expand fp vector comparison. */ +- +-bool +-ix86_expand_fp_vec_cmp (rtx operands[]) +-{ +- enum rtx_code code = GET_CODE (operands[1]); +- rtx cmp; +- +- code = ix86_prepare_sse_fp_compare_args (operands[0], code, +- &operands[2], &operands[3]); +- if (code == UNKNOWN) +- { +- rtx temp; +- switch (GET_CODE (operands[1])) +- { +- case LTGT: +- temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[2], +- operands[3], NULL, NULL); +- cmp = ix86_expand_sse_cmp (operands[0], NE, operands[2], +- operands[3], NULL, NULL); +- code = AND; +- break; +- case UNEQ: +- temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[2], +- operands[3], NULL, NULL); +- cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[2], +- operands[3], NULL, NULL); +- code = IOR; +- break; +- default: +- gcc_unreachable (); +- } +- cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1, +- OPTAB_DIRECT); +- } +- else +- cmp = ix86_expand_sse_cmp (operands[0], code, operands[2], operands[3], +- operands[1], operands[2]); +- +- if (operands[0] != cmp) +- emit_move_insn (operands[0], cmp); +- +- return true; +-} +- +-static rtx +-ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1, +- rtx op_true, rtx op_false, bool *negate) +-{ +- machine_mode data_mode = GET_MODE (dest); +- machine_mode mode = GET_MODE (cop0); +- rtx x; +- +- *negate = false; +- +- /* XOP supports all of the comparisons on all 128-bit vector int types. */ +- if (TARGET_XOP +- && (mode == V16QImode || mode == V8HImode +- || mode == V4SImode || mode == V2DImode)) +- ; +- else +- { +- /* Canonicalize the comparison to EQ, GT, GTU. */ +- switch (code) +- { +- case EQ: +- case GT: +- case GTU: +- break; +- +- case NE: +- case LE: +- case LEU: +- code = reverse_condition (code); +- *negate = true; +- break; +- +- case GE: +- case GEU: +- code = reverse_condition (code); +- *negate = true; +- /* FALLTHRU */ +- +- case LT: +- case LTU: +- std::swap (cop0, cop1); +- code = swap_condition (code); +- break; +- +- default: +- gcc_unreachable (); +- } +- +- /* Only SSE4.1/SSE4.2 supports V2DImode. */ +- if (mode == V2DImode) +- { +- switch (code) +- { +- case EQ: +- /* SSE4.1 supports EQ. */ +- if (!TARGET_SSE4_1) +- return NULL; +- break; +- +- case GT: +- case GTU: +- /* SSE4.2 supports GT/GTU. */ +- if (!TARGET_SSE4_2) +- return NULL; +- break; +- +- default: +- gcc_unreachable (); +- } +- } +- +- rtx optrue = op_true ? op_true : CONSTM1_RTX (data_mode); +- rtx opfalse = op_false ? op_false : CONST0_RTX (data_mode); +- if (*negate) +- std::swap (optrue, opfalse); +- +- /* Transform x > y ? 0 : -1 (i.e. x <= y ? -1 : 0 or x <= y) when +- not using integer masks into min (x, y) == x ? -1 : 0 (i.e. +- min (x, y) == x). While we add one instruction (the minimum), +- we remove the need for two instructions in the negation, as the +- result is done this way. +- When using masks, do it for SI/DImode element types, as it is shorter +- than the two subtractions. */ +- if ((code != EQ +- && GET_MODE_SIZE (mode) != 64 +- && vector_all_ones_operand (opfalse, data_mode) +- && optrue == CONST0_RTX (data_mode)) +- || (code == GTU +- && GET_MODE_SIZE (GET_MODE_INNER (mode)) >= 4 +- /* Don't do it if not using integer masks and we'd end up with +- the right values in the registers though. */ +- && (GET_MODE_SIZE (mode) == 64 +- || !vector_all_ones_operand (optrue, data_mode) +- || opfalse != CONST0_RTX (data_mode)))) +- { +- rtx (*gen) (rtx, rtx, rtx) = NULL; +- +- switch (mode) +- { +- case E_V16SImode: +- gen = (code == GTU) ? gen_uminv16si3 : gen_sminv16si3; +- break; +- case E_V8DImode: +- gen = (code == GTU) ? gen_uminv8di3 : gen_sminv8di3; +- cop0 = force_reg (mode, cop0); +- cop1 = force_reg (mode, cop1); +- break; +- case E_V32QImode: +- if (TARGET_AVX2) +- gen = (code == GTU) ? gen_uminv32qi3 : gen_sminv32qi3; +- break; +- case E_V16HImode: +- if (TARGET_AVX2) +- gen = (code == GTU) ? gen_uminv16hi3 : gen_sminv16hi3; +- break; +- case E_V8SImode: +- if (TARGET_AVX2) +- gen = (code == GTU) ? gen_uminv8si3 : gen_sminv8si3; +- break; +- case E_V4DImode: +- if (TARGET_AVX512VL) +- { +- gen = (code == GTU) ? gen_uminv4di3 : gen_sminv4di3; +- cop0 = force_reg (mode, cop0); +- cop1 = force_reg (mode, cop1); +- } +- break; +- case E_V16QImode: +- if (code == GTU && TARGET_SSE2) +- gen = gen_uminv16qi3; +- else if (code == GT && TARGET_SSE4_1) +- gen = gen_sminv16qi3; +- break; +- case E_V8HImode: +- if (code == GTU && TARGET_SSE4_1) +- gen = gen_uminv8hi3; +- else if (code == GT && TARGET_SSE2) +- gen = gen_sminv8hi3; +- break; +- case E_V4SImode: +- if (TARGET_SSE4_1) +- gen = (code == GTU) ? gen_uminv4si3 : gen_sminv4si3; +- break; +- case E_V2DImode: +- if (TARGET_AVX512VL) +- { +- gen = (code == GTU) ? gen_uminv2di3 : gen_sminv2di3; +- cop0 = force_reg (mode, cop0); +- cop1 = force_reg (mode, cop1); +- } +- break; +- default: +- break; +- } +- +- if (gen) +- { +- rtx tem = gen_reg_rtx (mode); +- if (!vector_operand (cop0, mode)) +- cop0 = force_reg (mode, cop0); +- if (!vector_operand (cop1, mode)) +- cop1 = force_reg (mode, cop1); +- *negate = !*negate; +- emit_insn (gen (tem, cop0, cop1)); +- cop1 = tem; +- code = EQ; +- } +- } +- +- /* Unsigned parallel compare is not supported by the hardware. +- Play some tricks to turn this into a signed comparison +- against 0. */ +- if (code == GTU) +- { +- cop0 = force_reg (mode, cop0); +- +- switch (mode) +- { +- case E_V16SImode: +- case E_V8DImode: +- case E_V8SImode: +- case E_V4DImode: +- case E_V4SImode: +- case E_V2DImode: +- { +- rtx t1, t2, mask; +- rtx (*gen_sub3) (rtx, rtx, rtx); +- +- switch (mode) +- { +- case E_V16SImode: gen_sub3 = gen_subv16si3; break; +- case E_V8DImode: gen_sub3 = gen_subv8di3; break; +- case E_V8SImode: gen_sub3 = gen_subv8si3; break; +- case E_V4DImode: gen_sub3 = gen_subv4di3; break; +- case E_V4SImode: gen_sub3 = gen_subv4si3; break; +- case E_V2DImode: gen_sub3 = gen_subv2di3; break; +- default: +- gcc_unreachable (); +- } +- /* Subtract (-(INT MAX) - 1) from both operands to make +- them signed. */ +- mask = ix86_build_signbit_mask (mode, true, false); +- t1 = gen_reg_rtx (mode); +- emit_insn (gen_sub3 (t1, cop0, mask)); +- +- t2 = gen_reg_rtx (mode); +- emit_insn (gen_sub3 (t2, cop1, mask)); +- +- cop0 = t1; +- cop1 = t2; +- code = GT; +- } +- break; +- +- case E_V64QImode: +- case E_V32HImode: +- case E_V32QImode: +- case E_V16HImode: +- case E_V16QImode: +- case E_V8HImode: +- /* Perform a parallel unsigned saturating subtraction. */ +- x = gen_reg_rtx (mode); +- emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, cop0, +- cop1))); +- +- cop0 = x; +- cop1 = CONST0_RTX (mode); +- code = EQ; +- *negate = !*negate; +- break; +- +- default: +- gcc_unreachable (); +- } +- } +- } +- +- if (*negate) +- std::swap (op_true, op_false); +- +- /* Allow the comparison to be done in one mode, but the movcc to +- happen in another mode. */ +- if (data_mode == mode) +- { +- x = ix86_expand_sse_cmp (dest, code, cop0, cop1, +- op_true, op_false); +- } +- else +- { +- gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode)); +- x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1, +- op_true, op_false); +- if (GET_MODE (x) == mode) +- x = gen_lowpart (data_mode, x); +- } +- +- return x; +-} +- +-/* Expand integer vector comparison. */ +- +-bool +-ix86_expand_int_vec_cmp (rtx operands[]) +-{ +- rtx_code code = GET_CODE (operands[1]); +- bool negate = false; +- rtx cmp = ix86_expand_int_sse_cmp (operands[0], code, operands[2], +- operands[3], NULL, NULL, &negate); +- +- if (!cmp) +- return false; +- +- if (negate) +- cmp = ix86_expand_int_sse_cmp (operands[0], EQ, cmp, +- CONST0_RTX (GET_MODE (cmp)), +- NULL, NULL, &negate); +- +- gcc_assert (!negate); +- +- if (operands[0] != cmp) +- emit_move_insn (operands[0], cmp); +- +- return true; +-} +- +-/* Expand a floating-point vector conditional move; a vcond operation +- rather than a movcc operation. */ +- +-bool +-ix86_expand_fp_vcond (rtx operands[]) +-{ +- enum rtx_code code = GET_CODE (operands[3]); +- rtx cmp; +- +- code = ix86_prepare_sse_fp_compare_args (operands[0], code, +- &operands[4], &operands[5]); +- if (code == UNKNOWN) +- { +- rtx temp; +- switch (GET_CODE (operands[3])) +- { +- case LTGT: +- temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4], +- operands[5], operands[0], operands[0]); +- cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4], +- operands[5], operands[1], operands[2]); +- code = AND; +- break; +- case UNEQ: +- temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4], +- operands[5], operands[0], operands[0]); +- cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4], +- operands[5], operands[1], operands[2]); +- code = IOR; +- break; +- default: +- gcc_unreachable (); +- } +- cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1, +- OPTAB_DIRECT); +- ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]); +- return true; +- } +- +- if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4], +- operands[5], operands[1], operands[2])) +- return true; +- +- cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5], +- operands[1], operands[2]); +- ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]); +- return true; +-} +- +-/* Expand a signed/unsigned integral vector conditional move. */ +- +-bool +-ix86_expand_int_vcond (rtx operands[]) +-{ +- machine_mode data_mode = GET_MODE (operands[0]); +- machine_mode mode = GET_MODE (operands[4]); +- enum rtx_code code = GET_CODE (operands[3]); +- bool negate = false; +- rtx x, cop0, cop1; +- +- cop0 = operands[4]; +- cop1 = operands[5]; +- +- /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31 +- and x < 0 ? 1 : 0 into (unsigned) x >> 31. */ +- if ((code == LT || code == GE) +- && data_mode == mode +- && cop1 == CONST0_RTX (mode) +- && operands[1 + (code == LT)] == CONST0_RTX (data_mode) +- && GET_MODE_UNIT_SIZE (data_mode) > 1 +- && GET_MODE_UNIT_SIZE (data_mode) <= 8 +- && (GET_MODE_SIZE (data_mode) == 16 +- || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32))) +- { +- rtx negop = operands[2 - (code == LT)]; +- int shift = GET_MODE_UNIT_BITSIZE (data_mode) - 1; +- if (negop == CONST1_RTX (data_mode)) +- { +- rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift), +- operands[0], 1, OPTAB_DIRECT); +- if (res != operands[0]) +- emit_move_insn (operands[0], res); +- return true; +- } +- else if (GET_MODE_INNER (data_mode) != DImode +- && vector_all_ones_operand (negop, data_mode)) +- { +- rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift), +- operands[0], 0, OPTAB_DIRECT); +- if (res != operands[0]) +- emit_move_insn (operands[0], res); +- return true; +- } +- } +- +- if (!nonimmediate_operand (cop1, mode)) +- cop1 = force_reg (mode, cop1); +- if (!general_operand (operands[1], data_mode)) +- operands[1] = force_reg (data_mode, operands[1]); +- if (!general_operand (operands[2], data_mode)) +- operands[2] = force_reg (data_mode, operands[2]); +- +- x = ix86_expand_int_sse_cmp (operands[0], code, cop0, cop1, +- operands[1], operands[2], &negate); +- +- if (!x) +- return false; +- +- ix86_expand_sse_movcc (operands[0], x, operands[1+negate], +- operands[2-negate]); +- return true; +-} +- +-/* AVX512F does support 64-byte integer vector operations, +- thus the longest vector we are faced with is V64QImode. */ +-#define MAX_VECT_LEN 64 +- +-struct expand_vec_perm_d +-{ +- rtx target, op0, op1; +- unsigned char perm[MAX_VECT_LEN]; +- machine_mode vmode; +- unsigned char nelt; +- bool one_operand_p; +- bool testing_p; +-}; +- +-static bool +-ix86_expand_vec_perm_vpermt2 (rtx target, rtx mask, rtx op0, rtx op1, +- struct expand_vec_perm_d *d) +-{ +- /* ix86_expand_vec_perm_vpermt2 is called from both const and non-const +- expander, so args are either in d, or in op0, op1 etc. */ +- machine_mode mode = GET_MODE (d ? d->op0 : op0); +- machine_mode maskmode = mode; +- rtx (*gen) (rtx, rtx, rtx, rtx) = NULL; +- +- switch (mode) +- { +- case E_V8HImode: +- if (TARGET_AVX512VL && TARGET_AVX512BW) +- gen = gen_avx512vl_vpermt2varv8hi3; +- break; +- case E_V16HImode: +- if (TARGET_AVX512VL && TARGET_AVX512BW) +- gen = gen_avx512vl_vpermt2varv16hi3; +- break; +- case E_V64QImode: +- if (TARGET_AVX512VBMI) +- gen = gen_avx512bw_vpermt2varv64qi3; +- break; +- case E_V32HImode: +- if (TARGET_AVX512BW) +- gen = gen_avx512bw_vpermt2varv32hi3; +- break; +- case E_V4SImode: +- if (TARGET_AVX512VL) +- gen = gen_avx512vl_vpermt2varv4si3; +- break; +- case E_V8SImode: +- if (TARGET_AVX512VL) +- gen = gen_avx512vl_vpermt2varv8si3; +- break; +- case E_V16SImode: +- if (TARGET_AVX512F) +- gen = gen_avx512f_vpermt2varv16si3; +- break; +- case E_V4SFmode: +- if (TARGET_AVX512VL) +- { +- gen = gen_avx512vl_vpermt2varv4sf3; +- maskmode = V4SImode; +- } +- break; +- case E_V8SFmode: +- if (TARGET_AVX512VL) +- { +- gen = gen_avx512vl_vpermt2varv8sf3; +- maskmode = V8SImode; +- } +- break; +- case E_V16SFmode: +- if (TARGET_AVX512F) +- { +- gen = gen_avx512f_vpermt2varv16sf3; +- maskmode = V16SImode; +- } +- break; +- case E_V2DImode: +- if (TARGET_AVX512VL) +- gen = gen_avx512vl_vpermt2varv2di3; +- break; +- case E_V4DImode: +- if (TARGET_AVX512VL) +- gen = gen_avx512vl_vpermt2varv4di3; +- break; +- case E_V8DImode: +- if (TARGET_AVX512F) +- gen = gen_avx512f_vpermt2varv8di3; +- break; +- case E_V2DFmode: +- if (TARGET_AVX512VL) +- { +- gen = gen_avx512vl_vpermt2varv2df3; +- maskmode = V2DImode; +- } +- break; +- case E_V4DFmode: +- if (TARGET_AVX512VL) +- { +- gen = gen_avx512vl_vpermt2varv4df3; +- maskmode = V4DImode; +- } +- break; +- case E_V8DFmode: +- if (TARGET_AVX512F) +- { +- gen = gen_avx512f_vpermt2varv8df3; +- maskmode = V8DImode; +- } +- break; +- default: +- break; +- } +- +- if (gen == NULL) +- return false; +- +- /* ix86_expand_vec_perm_vpermt2 is called from both const and non-const +- expander, so args are either in d, or in op0, op1 etc. */ +- if (d) +- { +- rtx vec[64]; +- target = d->target; +- op0 = d->op0; +- op1 = d->op1; +- for (int i = 0; i < d->nelt; ++i) +- vec[i] = GEN_INT (d->perm[i]); +- mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec)); +- } +- +- emit_insn (gen (target, force_reg (maskmode, mask), op0, op1)); +- return true; +-} +- +-/* Expand a variable vector permutation. */ +- +-void +-ix86_expand_vec_perm (rtx operands[]) +-{ +- rtx target = operands[0]; +- rtx op0 = operands[1]; +- rtx op1 = operands[2]; +- rtx mask = operands[3]; +- rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32]; +- machine_mode mode = GET_MODE (op0); +- machine_mode maskmode = GET_MODE (mask); +- int w, e, i; +- bool one_operand_shuffle = rtx_equal_p (op0, op1); +- +- /* Number of elements in the vector. */ +- w = GET_MODE_NUNITS (mode); +- e = GET_MODE_UNIT_SIZE (mode); +- gcc_assert (w <= 64); +- +- if (TARGET_AVX512F && one_operand_shuffle) +- { +- rtx (*gen) (rtx, rtx, rtx) = NULL; +- switch (mode) +- { +- case E_V16SImode: +- gen =gen_avx512f_permvarv16si; +- break; +- case E_V16SFmode: +- gen = gen_avx512f_permvarv16sf; +- break; +- case E_V8DImode: +- gen = gen_avx512f_permvarv8di; +- break; +- case E_V8DFmode: +- gen = gen_avx512f_permvarv8df; +- break; +- default: +- break; +- } +- if (gen != NULL) +- { +- emit_insn (gen (target, op0, mask)); +- return; +- } +- } +- +- if (ix86_expand_vec_perm_vpermt2 (target, mask, op0, op1, NULL)) +- return; +- +- if (TARGET_AVX2) +- { +- if (mode == V4DImode || mode == V4DFmode || mode == V16HImode) +- { +- /* Unfortunately, the VPERMQ and VPERMPD instructions only support +- an constant shuffle operand. With a tiny bit of effort we can +- use VPERMD instead. A re-interpretation stall for V4DFmode is +- unfortunate but there's no avoiding it. +- Similarly for V16HImode we don't have instructions for variable +- shuffling, while for V32QImode we can use after preparing suitable +- masks vpshufb; vpshufb; vpermq; vpor. */ +- +- if (mode == V16HImode) +- { +- maskmode = mode = V32QImode; +- w = 32; +- e = 1; +- } +- else +- { +- maskmode = mode = V8SImode; +- w = 8; +- e = 4; +- } +- t1 = gen_reg_rtx (maskmode); +- +- /* Replicate the low bits of the V4DImode mask into V8SImode: +- mask = { A B C D } +- t1 = { A A B B C C D D }. */ +- for (i = 0; i < w / 2; ++i) +- vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2); +- vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec)); +- vt = force_reg (maskmode, vt); +- mask = gen_lowpart (maskmode, mask); +- if (maskmode == V8SImode) +- emit_insn (gen_avx2_permvarv8si (t1, mask, vt)); +- else +- emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt)); +- +- /* Multiply the shuffle indicies by two. */ +- t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1, +- OPTAB_DIRECT); +- +- /* Add one to the odd shuffle indicies: +- t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */ +- for (i = 0; i < w / 2; ++i) +- { +- vec[i * 2] = const0_rtx; +- vec[i * 2 + 1] = const1_rtx; +- } +- vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec)); +- vt = validize_mem (force_const_mem (maskmode, vt)); +- t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1, +- OPTAB_DIRECT); +- +- /* Continue as if V8SImode (resp. V32QImode) was used initially. */ +- operands[3] = mask = t1; +- target = gen_reg_rtx (mode); +- op0 = gen_lowpart (mode, op0); +- op1 = gen_lowpart (mode, op1); +- } +- +- switch (mode) +- { +- case E_V8SImode: +- /* The VPERMD and VPERMPS instructions already properly ignore +- the high bits of the shuffle elements. No need for us to +- perform an AND ourselves. */ +- if (one_operand_shuffle) +- { +- emit_insn (gen_avx2_permvarv8si (target, op0, mask)); +- if (target != operands[0]) +- emit_move_insn (operands[0], +- gen_lowpart (GET_MODE (operands[0]), target)); +- } +- else +- { +- t1 = gen_reg_rtx (V8SImode); +- t2 = gen_reg_rtx (V8SImode); +- emit_insn (gen_avx2_permvarv8si (t1, op0, mask)); +- emit_insn (gen_avx2_permvarv8si (t2, op1, mask)); +- goto merge_two; +- } +- return; +- +- case E_V8SFmode: +- mask = gen_lowpart (V8SImode, mask); +- if (one_operand_shuffle) +- emit_insn (gen_avx2_permvarv8sf (target, op0, mask)); +- else +- { +- t1 = gen_reg_rtx (V8SFmode); +- t2 = gen_reg_rtx (V8SFmode); +- emit_insn (gen_avx2_permvarv8sf (t1, op0, mask)); +- emit_insn (gen_avx2_permvarv8sf (t2, op1, mask)); +- goto merge_two; +- } +- return; +- +- case E_V4SImode: +- /* By combining the two 128-bit input vectors into one 256-bit +- input vector, we can use VPERMD and VPERMPS for the full +- two-operand shuffle. */ +- t1 = gen_reg_rtx (V8SImode); +- t2 = gen_reg_rtx (V8SImode); +- emit_insn (gen_avx_vec_concatv8si (t1, op0, op1)); +- emit_insn (gen_avx_vec_concatv8si (t2, mask, mask)); +- emit_insn (gen_avx2_permvarv8si (t1, t1, t2)); +- emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx)); +- return; +- +- case E_V4SFmode: +- t1 = gen_reg_rtx (V8SFmode); +- t2 = gen_reg_rtx (V8SImode); +- mask = gen_lowpart (V4SImode, mask); +- emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1)); +- emit_insn (gen_avx_vec_concatv8si (t2, mask, mask)); +- emit_insn (gen_avx2_permvarv8sf (t1, t1, t2)); +- emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx)); +- return; +- +- case E_V32QImode: +- t1 = gen_reg_rtx (V32QImode); +- t2 = gen_reg_rtx (V32QImode); +- t3 = gen_reg_rtx (V32QImode); +- vt2 = GEN_INT (-128); +- vt = gen_const_vec_duplicate (V32QImode, vt2); +- vt = force_reg (V32QImode, vt); +- for (i = 0; i < 32; i++) +- vec[i] = i < 16 ? vt2 : const0_rtx; +- vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec)); +- vt2 = force_reg (V32QImode, vt2); +- /* From mask create two adjusted masks, which contain the same +- bits as mask in the low 7 bits of each vector element. +- The first mask will have the most significant bit clear +- if it requests element from the same 128-bit lane +- and MSB set if it requests element from the other 128-bit lane. +- The second mask will have the opposite values of the MSB, +- and additionally will have its 128-bit lanes swapped. +- E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have +- t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and +- t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ... +- stands for other 12 bytes. */ +- /* The bit whether element is from the same lane or the other +- lane is bit 4, so shift it up by 3 to the MSB position. */ +- t5 = gen_reg_rtx (V4DImode); +- emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask), +- GEN_INT (3))); +- /* Clear MSB bits from the mask just in case it had them set. */ +- emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask)); +- /* After this t1 will have MSB set for elements from other lane. */ +- emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2)); +- /* Clear bits other than MSB. */ +- emit_insn (gen_andv32qi3 (t1, t1, vt)); +- /* Or in the lower bits from mask into t3. */ +- emit_insn (gen_iorv32qi3 (t3, t1, t2)); +- /* And invert MSB bits in t1, so MSB is set for elements from the same +- lane. */ +- emit_insn (gen_xorv32qi3 (t1, t1, vt)); +- /* Swap 128-bit lanes in t3. */ +- t6 = gen_reg_rtx (V4DImode); +- emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3), +- const2_rtx, GEN_INT (3), +- const0_rtx, const1_rtx)); +- /* And or in the lower bits from mask into t1. */ +- emit_insn (gen_iorv32qi3 (t1, t1, t2)); +- if (one_operand_shuffle) +- { +- /* Each of these shuffles will put 0s in places where +- element from the other 128-bit lane is needed, otherwise +- will shuffle in the requested value. */ +- emit_insn (gen_avx2_pshufbv32qi3 (t3, op0, +- gen_lowpart (V32QImode, t6))); +- emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1)); +- /* For t3 the 128-bit lanes are swapped again. */ +- t7 = gen_reg_rtx (V4DImode); +- emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3), +- const2_rtx, GEN_INT (3), +- const0_rtx, const1_rtx)); +- /* And oring both together leads to the result. */ +- emit_insn (gen_iorv32qi3 (target, t1, +- gen_lowpart (V32QImode, t7))); +- if (target != operands[0]) +- emit_move_insn (operands[0], +- gen_lowpart (GET_MODE (operands[0]), target)); +- return; +- } +- +- t4 = gen_reg_rtx (V32QImode); +- /* Similarly to the above one_operand_shuffle code, +- just for repeated twice for each operand. merge_two: +- code will merge the two results together. */ +- emit_insn (gen_avx2_pshufbv32qi3 (t4, op0, +- gen_lowpart (V32QImode, t6))); +- emit_insn (gen_avx2_pshufbv32qi3 (t3, op1, +- gen_lowpart (V32QImode, t6))); +- emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1)); +- emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1)); +- t7 = gen_reg_rtx (V4DImode); +- emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4), +- const2_rtx, GEN_INT (3), +- const0_rtx, const1_rtx)); +- t8 = gen_reg_rtx (V4DImode); +- emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3), +- const2_rtx, GEN_INT (3), +- const0_rtx, const1_rtx)); +- emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7))); +- emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8))); +- t1 = t4; +- t2 = t3; +- goto merge_two; +- +- default: +- gcc_assert (GET_MODE_SIZE (mode) <= 16); +- break; +- } +- } +- +- if (TARGET_XOP) +- { +- /* The XOP VPPERM insn supports three inputs. By ignoring the +- one_operand_shuffle special case, we avoid creating another +- set of constant vectors in memory. */ +- one_operand_shuffle = false; +- +- /* mask = mask & {2*w-1, ...} */ +- vt = GEN_INT (2*w - 1); +- } +- else +- { +- /* mask = mask & {w-1, ...} */ +- vt = GEN_INT (w - 1); +- } +- +- vt = gen_const_vec_duplicate (maskmode, vt); +- mask = expand_simple_binop (maskmode, AND, mask, vt, +- NULL_RTX, 0, OPTAB_DIRECT); +- +- /* For non-QImode operations, convert the word permutation control +- into a byte permutation control. */ +- if (mode != V16QImode) +- { +- mask = expand_simple_binop (maskmode, ASHIFT, mask, +- GEN_INT (exact_log2 (e)), +- NULL_RTX, 0, OPTAB_DIRECT); +- +- /* Convert mask to vector of chars. */ +- mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask)); +- +- /* Replicate each of the input bytes into byte positions: +- (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8} +- (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12} +- (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */ +- for (i = 0; i < 16; ++i) +- vec[i] = GEN_INT (i/e * e); +- vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec)); +- vt = validize_mem (force_const_mem (V16QImode, vt)); +- if (TARGET_XOP) +- emit_insn (gen_xop_pperm (mask, mask, mask, vt)); +- else +- emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt)); +- +- /* Convert it into the byte positions by doing +- mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */ +- for (i = 0; i < 16; ++i) +- vec[i] = GEN_INT (i % e); +- vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec)); +- vt = validize_mem (force_const_mem (V16QImode, vt)); +- emit_insn (gen_addv16qi3 (mask, mask, vt)); +- } +- +- /* The actual shuffle operations all operate on V16QImode. */ +- op0 = gen_lowpart (V16QImode, op0); +- op1 = gen_lowpart (V16QImode, op1); +- +- if (TARGET_XOP) +- { +- if (GET_MODE (target) != V16QImode) +- target = gen_reg_rtx (V16QImode); +- emit_insn (gen_xop_pperm (target, op0, op1, mask)); +- if (target != operands[0]) +- emit_move_insn (operands[0], +- gen_lowpart (GET_MODE (operands[0]), target)); +- } +- else if (one_operand_shuffle) +- { +- if (GET_MODE (target) != V16QImode) +- target = gen_reg_rtx (V16QImode); +- emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask)); +- if (target != operands[0]) +- emit_move_insn (operands[0], +- gen_lowpart (GET_MODE (operands[0]), target)); +- } +- else +- { +- rtx xops[6]; +- bool ok; +- +- /* Shuffle the two input vectors independently. */ +- t1 = gen_reg_rtx (V16QImode); +- t2 = gen_reg_rtx (V16QImode); +- emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask)); +- emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask)); +- +- merge_two: +- /* Then merge them together. The key is whether any given control +- element contained a bit set that indicates the second word. */ +- mask = operands[3]; +- vt = GEN_INT (w); +- if (maskmode == V2DImode && !TARGET_SSE4_1) +- { +- /* Without SSE4.1, we don't have V2DImode EQ. Perform one +- more shuffle to convert the V2DI input mask into a V4SI +- input mask. At which point the masking that expand_int_vcond +- will work as desired. */ +- rtx t3 = gen_reg_rtx (V4SImode); +- emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask), +- const0_rtx, const0_rtx, +- const2_rtx, const2_rtx)); +- mask = t3; +- maskmode = V4SImode; +- e = w = 4; +- } +- +- vt = gen_const_vec_duplicate (maskmode, vt); +- vt = force_reg (maskmode, vt); +- mask = expand_simple_binop (maskmode, AND, mask, vt, +- NULL_RTX, 0, OPTAB_DIRECT); +- +- if (GET_MODE (target) != mode) +- target = gen_reg_rtx (mode); +- xops[0] = target; +- xops[1] = gen_lowpart (mode, t2); +- xops[2] = gen_lowpart (mode, t1); +- xops[3] = gen_rtx_EQ (maskmode, mask, vt); +- xops[4] = mask; +- xops[5] = vt; +- ok = ix86_expand_int_vcond (xops); +- gcc_assert (ok); +- if (target != operands[0]) +- emit_move_insn (operands[0], +- gen_lowpart (GET_MODE (operands[0]), target)); +- } +-} +- +-/* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is +- true if we should do zero extension, else sign extension. HIGH_P is +- true if we want the N/2 high elements, else the low elements. */ +- +-void +-ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p) +-{ +- machine_mode imode = GET_MODE (src); +- rtx tmp; +- +- if (TARGET_SSE4_1) +- { +- rtx (*unpack)(rtx, rtx); +- rtx (*extract)(rtx, rtx) = NULL; +- machine_mode halfmode = BLKmode; +- +- switch (imode) +- { +- case E_V64QImode: +- if (unsigned_p) +- unpack = gen_avx512bw_zero_extendv32qiv32hi2; +- else +- unpack = gen_avx512bw_sign_extendv32qiv32hi2; +- halfmode = V32QImode; +- extract +- = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi; +- break; +- case E_V32QImode: +- if (unsigned_p) +- unpack = gen_avx2_zero_extendv16qiv16hi2; +- else +- unpack = gen_avx2_sign_extendv16qiv16hi2; +- halfmode = V16QImode; +- extract +- = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi; +- break; +- case E_V32HImode: +- if (unsigned_p) +- unpack = gen_avx512f_zero_extendv16hiv16si2; +- else +- unpack = gen_avx512f_sign_extendv16hiv16si2; +- halfmode = V16HImode; +- extract +- = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi; +- break; +- case E_V16HImode: +- if (unsigned_p) +- unpack = gen_avx2_zero_extendv8hiv8si2; +- else +- unpack = gen_avx2_sign_extendv8hiv8si2; +- halfmode = V8HImode; +- extract +- = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi; +- break; +- case E_V16SImode: +- if (unsigned_p) +- unpack = gen_avx512f_zero_extendv8siv8di2; +- else +- unpack = gen_avx512f_sign_extendv8siv8di2; +- halfmode = V8SImode; +- extract +- = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si; +- break; +- case E_V8SImode: +- if (unsigned_p) +- unpack = gen_avx2_zero_extendv4siv4di2; +- else +- unpack = gen_avx2_sign_extendv4siv4di2; +- halfmode = V4SImode; +- extract +- = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si; +- break; +- case E_V16QImode: +- if (unsigned_p) +- unpack = gen_sse4_1_zero_extendv8qiv8hi2; +- else +- unpack = gen_sse4_1_sign_extendv8qiv8hi2; +- break; +- case E_V8HImode: +- if (unsigned_p) +- unpack = gen_sse4_1_zero_extendv4hiv4si2; +- else +- unpack = gen_sse4_1_sign_extendv4hiv4si2; +- break; +- case E_V4SImode: +- if (unsigned_p) +- unpack = gen_sse4_1_zero_extendv2siv2di2; +- else +- unpack = gen_sse4_1_sign_extendv2siv2di2; +- break; +- default: +- gcc_unreachable (); +- } +- +- if (GET_MODE_SIZE (imode) >= 32) +- { +- tmp = gen_reg_rtx (halfmode); +- emit_insn (extract (tmp, src)); +- } +- else if (high_p) +- { +- /* Shift higher 8 bytes to lower 8 bytes. */ +- tmp = gen_reg_rtx (V1TImode); +- emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src), +- GEN_INT (64))); +- tmp = gen_lowpart (imode, tmp); +- } +- else +- tmp = src; +- +- emit_insn (unpack (dest, tmp)); +- } +- else +- { +- rtx (*unpack)(rtx, rtx, rtx); +- +- switch (imode) +- { +- case E_V16QImode: +- if (high_p) +- unpack = gen_vec_interleave_highv16qi; +- else +- unpack = gen_vec_interleave_lowv16qi; +- break; +- case E_V8HImode: +- if (high_p) +- unpack = gen_vec_interleave_highv8hi; +- else +- unpack = gen_vec_interleave_lowv8hi; +- break; +- case E_V4SImode: +- if (high_p) +- unpack = gen_vec_interleave_highv4si; +- else +- unpack = gen_vec_interleave_lowv4si; +- break; +- default: +- gcc_unreachable (); +- } +- +- if (unsigned_p) +- tmp = force_reg (imode, CONST0_RTX (imode)); +- else +- tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode), +- src, pc_rtx, pc_rtx); +- +- rtx tmp2 = gen_reg_rtx (imode); +- emit_insn (unpack (tmp2, src, tmp)); +- emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2)); +- } +-} +- +-/* Expand conditional increment or decrement using adb/sbb instructions. +- The default case using setcc followed by the conditional move can be +- done by generic code. */ +-bool +-ix86_expand_int_addcc (rtx operands[]) +-{ +- enum rtx_code code = GET_CODE (operands[1]); +- rtx flags; +- rtx (*insn)(rtx, rtx, rtx, rtx, rtx); +- rtx compare_op; +- rtx val = const0_rtx; +- bool fpcmp = false; +- machine_mode mode; +- rtx op0 = XEXP (operands[1], 0); +- rtx op1 = XEXP (operands[1], 1); +- +- if (operands[3] != const1_rtx +- && operands[3] != constm1_rtx) +- return false; +- if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op)) +- return false; +- code = GET_CODE (compare_op); +- +- flags = XEXP (compare_op, 0); +- +- if (GET_MODE (flags) == CCFPmode) +- { +- fpcmp = true; +- code = ix86_fp_compare_code_to_integer (code); +- } +- +- if (code != LTU) +- { +- val = constm1_rtx; +- if (fpcmp) +- PUT_CODE (compare_op, +- reverse_condition_maybe_unordered +- (GET_CODE (compare_op))); +- else +- PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op))); +- } +- +- mode = GET_MODE (operands[0]); +- +- /* Construct either adc or sbb insn. */ +- if ((code == LTU) == (operands[3] == constm1_rtx)) +- { +- switch (mode) +- { +- case E_QImode: +- insn = gen_subqi3_carry; +- break; +- case E_HImode: +- insn = gen_subhi3_carry; +- break; +- case E_SImode: +- insn = gen_subsi3_carry; +- break; +- case E_DImode: +- insn = gen_subdi3_carry; +- break; +- default: +- gcc_unreachable (); +- } +- } +- else +- { +- switch (mode) +- { +- case E_QImode: +- insn = gen_addqi3_carry; +- break; +- case E_HImode: +- insn = gen_addhi3_carry; +- break; +- case E_SImode: +- insn = gen_addsi3_carry; +- break; +- case E_DImode: +- insn = gen_adddi3_carry; +- break; +- default: +- gcc_unreachable (); +- } +- } +- emit_insn (insn (operands[0], operands[2], val, flags, compare_op)); +- +- return true; +-} +- +- +-/* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode, +- but works for floating pointer parameters and nonoffsetable memories. +- For pushes, it returns just stack offsets; the values will be saved +- in the right order. Maximally three parts are generated. */ +- +-static int +-ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode) +-{ +- int size; +- +- if (!TARGET_64BIT) +- size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4; +- else +- size = (GET_MODE_SIZE (mode) + 4) / 8; +- +- gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand))); +- gcc_assert (size >= 2 && size <= 4); +- +- /* Optimize constant pool reference to immediates. This is used by fp +- moves, that force all constants to memory to allow combining. */ +- if (MEM_P (operand) && MEM_READONLY_P (operand)) +- operand = avoid_constant_pool_reference (operand); +- +- if (MEM_P (operand) && !offsettable_memref_p (operand)) +- { +- /* The only non-offsetable memories we handle are pushes. */ +- int ok = push_operand (operand, VOIDmode); +- +- gcc_assert (ok); +- +- operand = copy_rtx (operand); +- PUT_MODE (operand, word_mode); +- parts[0] = parts[1] = parts[2] = parts[3] = operand; +- return size; +- } +- +- if (GET_CODE (operand) == CONST_VECTOR) +- { +- scalar_int_mode imode = int_mode_for_mode (mode).require (); +- /* Caution: if we looked through a constant pool memory above, +- the operand may actually have a different mode now. That's +- ok, since we want to pun this all the way back to an integer. */ +- operand = simplify_subreg (imode, operand, GET_MODE (operand), 0); +- gcc_assert (operand != NULL); +- mode = imode; +- } +- +- if (!TARGET_64BIT) +- { +- if (mode == DImode) +- split_double_mode (mode, &operand, 1, &parts[0], &parts[1]); +- else +- { +- int i; +- +- if (REG_P (operand)) +- { +- gcc_assert (reload_completed); +- for (i = 0; i < size; i++) +- parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i); +- } +- else if (offsettable_memref_p (operand)) +- { +- operand = adjust_address (operand, SImode, 0); +- parts[0] = operand; +- for (i = 1; i < size; i++) +- parts[i] = adjust_address (operand, SImode, 4 * i); +- } +- else if (CONST_DOUBLE_P (operand)) +- { +- const REAL_VALUE_TYPE *r; +- long l[4]; +- +- r = CONST_DOUBLE_REAL_VALUE (operand); +- switch (mode) +- { +- case E_TFmode: +- real_to_target (l, r, mode); +- parts[3] = gen_int_mode (l[3], SImode); +- parts[2] = gen_int_mode (l[2], SImode); +- break; +- case E_XFmode: +- /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since +- long double may not be 80-bit. */ +- real_to_target (l, r, mode); +- parts[2] = gen_int_mode (l[2], SImode); +- break; +- case E_DFmode: +- REAL_VALUE_TO_TARGET_DOUBLE (*r, l); +- break; +- default: +- gcc_unreachable (); +- } +- parts[1] = gen_int_mode (l[1], SImode); +- parts[0] = gen_int_mode (l[0], SImode); +- } +- else +- gcc_unreachable (); +- } +- } +- else +- { +- if (mode == TImode) +- split_double_mode (mode, &operand, 1, &parts[0], &parts[1]); +- if (mode == XFmode || mode == TFmode) +- { +- machine_mode upper_mode = mode==XFmode ? SImode : DImode; +- if (REG_P (operand)) +- { +- gcc_assert (reload_completed); +- parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0); +- parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1); +- } +- else if (offsettable_memref_p (operand)) +- { +- operand = adjust_address (operand, DImode, 0); +- parts[0] = operand; +- parts[1] = adjust_address (operand, upper_mode, 8); +- } +- else if (CONST_DOUBLE_P (operand)) +- { +- long l[4]; +- +- real_to_target (l, CONST_DOUBLE_REAL_VALUE (operand), mode); +- +- /* real_to_target puts 32-bit pieces in each long. */ +- parts[0] = gen_int_mode ((l[0] & HOST_WIDE_INT_C (0xffffffff)) +- | ((l[1] & HOST_WIDE_INT_C (0xffffffff)) +- << 32), DImode); +- +- if (upper_mode == SImode) +- parts[1] = gen_int_mode (l[2], SImode); +- else +- parts[1] +- = gen_int_mode ((l[2] & HOST_WIDE_INT_C (0xffffffff)) +- | ((l[3] & HOST_WIDE_INT_C (0xffffffff)) +- << 32), DImode); +- } +- else +- gcc_unreachable (); +- } +- } +- +- return size; +-} +- +-/* Emit insns to perform a move or push of DI, DF, XF, and TF values. +- Return false when normal moves are needed; true when all required +- insns have been emitted. Operands 2-4 contain the input values +- int the correct order; operands 5-7 contain the output values. */ +- +-void +-ix86_split_long_move (rtx operands[]) +-{ +- rtx part[2][4]; +- int nparts, i, j; +- int push = 0; +- int collisions = 0; +- machine_mode mode = GET_MODE (operands[0]); +- bool collisionparts[4]; +- +- /* The DFmode expanders may ask us to move double. +- For 64bit target this is single move. By hiding the fact +- here we simplify i386.md splitters. */ +- if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8) +- { +- /* Optimize constant pool reference to immediates. This is used by +- fp moves, that force all constants to memory to allow combining. */ +- +- if (MEM_P (operands[1]) +- && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF +- && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0))) +- operands[1] = get_pool_constant (XEXP (operands[1], 0)); +- if (push_operand (operands[0], VOIDmode)) +- { +- operands[0] = copy_rtx (operands[0]); +- PUT_MODE (operands[0], word_mode); +- } +- else +- operands[0] = gen_lowpart (DImode, operands[0]); +- operands[1] = gen_lowpart (DImode, operands[1]); +- emit_move_insn (operands[0], operands[1]); +- return; +- } +- +- /* The only non-offsettable memory we handle is push. */ +- if (push_operand (operands[0], VOIDmode)) +- push = 1; +- else +- gcc_assert (!MEM_P (operands[0]) +- || offsettable_memref_p (operands[0])); +- +- nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0])); +- ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0])); +- +- /* When emitting push, take care for source operands on the stack. */ +- if (push && MEM_P (operands[1]) +- && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1])) +- { +- rtx src_base = XEXP (part[1][nparts - 1], 0); +- +- /* Compensate for the stack decrement by 4. */ +- if (!TARGET_64BIT && nparts == 3 +- && mode == XFmode && TARGET_128BIT_LONG_DOUBLE) +- src_base = plus_constant (Pmode, src_base, 4); +- +- /* src_base refers to the stack pointer and is +- automatically decreased by emitted push. */ +- for (i = 0; i < nparts; i++) +- part[1][i] = change_address (part[1][i], +- GET_MODE (part[1][i]), src_base); +- } +- +- /* We need to do copy in the right order in case an address register +- of the source overlaps the destination. */ +- if (REG_P (part[0][0]) && MEM_P (part[1][0])) +- { +- rtx tmp; +- +- for (i = 0; i < nparts; i++) +- { +- collisionparts[i] +- = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0)); +- if (collisionparts[i]) +- collisions++; +- } +- +- /* Collision in the middle part can be handled by reordering. */ +- if (collisions == 1 && nparts == 3 && collisionparts [1]) +- { +- std::swap (part[0][1], part[0][2]); +- std::swap (part[1][1], part[1][2]); +- } +- else if (collisions == 1 +- && nparts == 4 +- && (collisionparts [1] || collisionparts [2])) +- { +- if (collisionparts [1]) +- { +- std::swap (part[0][1], part[0][2]); +- std::swap (part[1][1], part[1][2]); +- } +- else +- { +- std::swap (part[0][2], part[0][3]); +- std::swap (part[1][2], part[1][3]); +- } +- } +- +- /* If there are more collisions, we can't handle it by reordering. +- Do an lea to the last part and use only one colliding move. */ +- else if (collisions > 1) +- { +- rtx base, addr; +- +- collisions = 1; +- +- base = part[0][nparts - 1]; +- +- /* Handle the case when the last part isn't valid for lea. +- Happens in 64-bit mode storing the 12-byte XFmode. */ +- if (GET_MODE (base) != Pmode) +- base = gen_rtx_REG (Pmode, REGNO (base)); +- +- addr = XEXP (part[1][0], 0); +- if (TARGET_TLS_DIRECT_SEG_REFS) +- { +- struct ix86_address parts; +- int ok = ix86_decompose_address (addr, &parts); +- gcc_assert (ok); +- /* It is not valid to use %gs: or %fs: in lea. */ +- gcc_assert (parts.seg == ADDR_SPACE_GENERIC); +- } +- emit_insn (gen_rtx_SET (base, addr)); +- part[1][0] = replace_equiv_address (part[1][0], base); +- for (i = 1; i < nparts; i++) +- { +- tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i); +- part[1][i] = replace_equiv_address (part[1][i], tmp); +- } +- } +- } +- +- if (push) +- { +- if (!TARGET_64BIT) +- { +- if (nparts == 3) +- { +- if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode) +- emit_insn (ix86_gen_add3 (stack_pointer_rtx, +- stack_pointer_rtx, GEN_INT (-4))); +- emit_move_insn (part[0][2], part[1][2]); +- } +- else if (nparts == 4) +- { +- emit_move_insn (part[0][3], part[1][3]); +- emit_move_insn (part[0][2], part[1][2]); +- } +- } +- else +- { +- /* In 64bit mode we don't have 32bit push available. In case this is +- register, it is OK - we will just use larger counterpart. We also +- retype memory - these comes from attempt to avoid REX prefix on +- moving of second half of TFmode value. */ +- if (GET_MODE (part[1][1]) == SImode) +- { +- switch (GET_CODE (part[1][1])) +- { +- case MEM: +- part[1][1] = adjust_address (part[1][1], DImode, 0); +- break; +- +- case REG: +- part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1])); +- break; +- +- default: +- gcc_unreachable (); +- } +- +- if (GET_MODE (part[1][0]) == SImode) +- part[1][0] = part[1][1]; +- } +- } +- emit_move_insn (part[0][1], part[1][1]); +- emit_move_insn (part[0][0], part[1][0]); +- return; +- } +- +- /* Choose correct order to not overwrite the source before it is copied. */ +- if ((REG_P (part[0][0]) +- && REG_P (part[1][1]) +- && (REGNO (part[0][0]) == REGNO (part[1][1]) +- || (nparts == 3 +- && REGNO (part[0][0]) == REGNO (part[1][2])) +- || (nparts == 4 +- && REGNO (part[0][0]) == REGNO (part[1][3])))) +- || (collisions > 0 +- && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))) +- { +- for (i = 0, j = nparts - 1; i < nparts; i++, j--) +- { +- operands[2 + i] = part[0][j]; +- operands[6 + i] = part[1][j]; +- } +- } +- else +- { +- for (i = 0; i < nparts; i++) +- { +- operands[2 + i] = part[0][i]; +- operands[6 + i] = part[1][i]; +- } +- } +- +- /* If optimizing for size, attempt to locally unCSE nonzero constants. */ +- if (optimize_insn_for_size_p ()) +- { +- for (j = 0; j < nparts - 1; j++) +- if (CONST_INT_P (operands[6 + j]) +- && operands[6 + j] != const0_rtx +- && REG_P (operands[2 + j])) +- for (i = j; i < nparts - 1; i++) +- if (CONST_INT_P (operands[7 + i]) +- && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j])) +- operands[7 + i] = operands[2 + j]; +- } +- +- for (i = 0; i < nparts; i++) +- emit_move_insn (operands[2 + i], operands[6 + i]); +- +- return; +-} +- +-/* Helper function of ix86_split_ashl used to generate an SImode/DImode +- left shift by a constant, either using a single shift or +- a sequence of add instructions. */ +- +-static void +-ix86_expand_ashl_const (rtx operand, int count, machine_mode mode) +-{ +- rtx (*insn)(rtx, rtx, rtx); +- +- if (count == 1 +- || (count * ix86_cost->add <= ix86_cost->shift_const +- && !optimize_insn_for_size_p ())) +- { +- insn = mode == DImode ? gen_addsi3 : gen_adddi3; +- while (count-- > 0) +- emit_insn (insn (operand, operand, operand)); +- } +- else +- { +- insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3; +- emit_insn (insn (operand, operand, GEN_INT (count))); +- } +-} +- +-void +-ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode) +-{ +- rtx (*gen_ashl3)(rtx, rtx, rtx); +- rtx (*gen_shld)(rtx, rtx, rtx); +- int half_width = GET_MODE_BITSIZE (mode) >> 1; +- +- rtx low[2], high[2]; +- int count; +- +- if (CONST_INT_P (operands[2])) +- { +- split_double_mode (mode, operands, 2, low, high); +- count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1); +- +- if (count >= half_width) +- { +- emit_move_insn (high[0], low[1]); +- emit_move_insn (low[0], const0_rtx); +- +- if (count > half_width) +- ix86_expand_ashl_const (high[0], count - half_width, mode); +- } +- else +- { +- gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld; +- +- if (!rtx_equal_p (operands[0], operands[1])) +- emit_move_insn (operands[0], operands[1]); +- +- emit_insn (gen_shld (high[0], low[0], GEN_INT (count))); +- ix86_expand_ashl_const (low[0], count, mode); +- } +- return; +- } +- +- split_double_mode (mode, operands, 1, low, high); +- +- gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3; +- +- if (operands[1] == const1_rtx) +- { +- /* Assuming we've chosen a QImode capable registers, then 1 << N +- can be done with two 32/64-bit shifts, no branches, no cmoves. */ +- if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0])) +- { +- rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG); +- +- ix86_expand_clear (low[0]); +- ix86_expand_clear (high[0]); +- emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width))); +- +- d = gen_lowpart (QImode, low[0]); +- d = gen_rtx_STRICT_LOW_PART (VOIDmode, d); +- s = gen_rtx_EQ (QImode, flags, const0_rtx); +- emit_insn (gen_rtx_SET (d, s)); +- +- d = gen_lowpart (QImode, high[0]); +- d = gen_rtx_STRICT_LOW_PART (VOIDmode, d); +- s = gen_rtx_NE (QImode, flags, const0_rtx); +- emit_insn (gen_rtx_SET (d, s)); +- } +- +- /* Otherwise, we can get the same results by manually performing +- a bit extract operation on bit 5/6, and then performing the two +- shifts. The two methods of getting 0/1 into low/high are exactly +- the same size. Avoiding the shift in the bit extract case helps +- pentium4 a bit; no one else seems to care much either way. */ +- else +- { +- machine_mode half_mode; +- rtx (*gen_lshr3)(rtx, rtx, rtx); +- rtx (*gen_and3)(rtx, rtx, rtx); +- rtx (*gen_xor3)(rtx, rtx, rtx); +- HOST_WIDE_INT bits; +- rtx x; +- +- if (mode == DImode) +- { +- half_mode = SImode; +- gen_lshr3 = gen_lshrsi3; +- gen_and3 = gen_andsi3; +- gen_xor3 = gen_xorsi3; +- bits = 5; +- } +- else +- { +- half_mode = DImode; +- gen_lshr3 = gen_lshrdi3; +- gen_and3 = gen_anddi3; +- gen_xor3 = gen_xordi3; +- bits = 6; +- } +- +- if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ()) +- x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]); +- else +- x = gen_lowpart (half_mode, operands[2]); +- emit_insn (gen_rtx_SET (high[0], x)); +- +- emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits))); +- emit_insn (gen_and3 (high[0], high[0], const1_rtx)); +- emit_move_insn (low[0], high[0]); +- emit_insn (gen_xor3 (low[0], low[0], const1_rtx)); +- } +- +- emit_insn (gen_ashl3 (low[0], low[0], operands[2])); +- emit_insn (gen_ashl3 (high[0], high[0], operands[2])); +- return; +- } +- +- if (operands[1] == constm1_rtx) +- { +- /* For -1 << N, we can avoid the shld instruction, because we +- know that we're shifting 0...31/63 ones into a -1. */ +- emit_move_insn (low[0], constm1_rtx); +- if (optimize_insn_for_size_p ()) +- emit_move_insn (high[0], low[0]); +- else +- emit_move_insn (high[0], constm1_rtx); +- } +- else +- { +- gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld; +- +- if (!rtx_equal_p (operands[0], operands[1])) +- emit_move_insn (operands[0], operands[1]); +- +- split_double_mode (mode, operands, 1, low, high); +- emit_insn (gen_shld (high[0], low[0], operands[2])); +- } +- +- emit_insn (gen_ashl3 (low[0], low[0], operands[2])); +- +- if (TARGET_CMOVE && scratch) +- { +- rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx) +- = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1; +- +- ix86_expand_clear (scratch); +- emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch)); +- } +- else +- { +- rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx) +- = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2; +- +- emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2])); +- } +-} +- +-void +-ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode) +-{ +- rtx (*gen_ashr3)(rtx, rtx, rtx) +- = mode == DImode ? gen_ashrsi3 : gen_ashrdi3; +- rtx (*gen_shrd)(rtx, rtx, rtx); +- int half_width = GET_MODE_BITSIZE (mode) >> 1; +- +- rtx low[2], high[2]; +- int count; +- +- if (CONST_INT_P (operands[2])) +- { +- split_double_mode (mode, operands, 2, low, high); +- count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1); +- +- if (count == GET_MODE_BITSIZE (mode) - 1) +- { +- emit_move_insn (high[0], high[1]); +- emit_insn (gen_ashr3 (high[0], high[0], +- GEN_INT (half_width - 1))); +- emit_move_insn (low[0], high[0]); +- +- } +- else if (count >= half_width) +- { +- emit_move_insn (low[0], high[1]); +- emit_move_insn (high[0], low[0]); +- emit_insn (gen_ashr3 (high[0], high[0], +- GEN_INT (half_width - 1))); +- +- if (count > half_width) +- emit_insn (gen_ashr3 (low[0], low[0], +- GEN_INT (count - half_width))); +- } +- else +- { +- gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd; +- +- if (!rtx_equal_p (operands[0], operands[1])) +- emit_move_insn (operands[0], operands[1]); +- +- emit_insn (gen_shrd (low[0], high[0], GEN_INT (count))); +- emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count))); +- } +- } +- else +- { +- gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd; +- +- if (!rtx_equal_p (operands[0], operands[1])) +- emit_move_insn (operands[0], operands[1]); +- +- split_double_mode (mode, operands, 1, low, high); +- +- emit_insn (gen_shrd (low[0], high[0], operands[2])); +- emit_insn (gen_ashr3 (high[0], high[0], operands[2])); +- +- if (TARGET_CMOVE && scratch) +- { +- rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx) +- = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1; +- +- emit_move_insn (scratch, high[0]); +- emit_insn (gen_ashr3 (scratch, scratch, +- GEN_INT (half_width - 1))); +- emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2], +- scratch)); +- } +- else +- { +- rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx) +- = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3; +- +- emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2])); +- } +- } +-} +- +-void +-ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode) +-{ +- rtx (*gen_lshr3)(rtx, rtx, rtx) +- = mode == DImode ? gen_lshrsi3 : gen_lshrdi3; +- rtx (*gen_shrd)(rtx, rtx, rtx); +- int half_width = GET_MODE_BITSIZE (mode) >> 1; +- +- rtx low[2], high[2]; +- int count; +- +- if (CONST_INT_P (operands[2])) +- { +- split_double_mode (mode, operands, 2, low, high); +- count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1); +- +- if (count >= half_width) +- { +- emit_move_insn (low[0], high[1]); +- ix86_expand_clear (high[0]); +- +- if (count > half_width) +- emit_insn (gen_lshr3 (low[0], low[0], +- GEN_INT (count - half_width))); +- } +- else +- { +- gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd; +- +- if (!rtx_equal_p (operands[0], operands[1])) +- emit_move_insn (operands[0], operands[1]); +- +- emit_insn (gen_shrd (low[0], high[0], GEN_INT (count))); +- emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count))); +- } +- } +- else +- { +- gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd; +- +- if (!rtx_equal_p (operands[0], operands[1])) +- emit_move_insn (operands[0], operands[1]); +- +- split_double_mode (mode, operands, 1, low, high); +- +- emit_insn (gen_shrd (low[0], high[0], operands[2])); +- emit_insn (gen_lshr3 (high[0], high[0], operands[2])); +- +- if (TARGET_CMOVE && scratch) +- { +- rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx) +- = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1; +- +- ix86_expand_clear (scratch); +- emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2], +- scratch)); +- } +- else +- { +- rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx) +- = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2; +- +- emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2])); +- } +- } +-} +- +-/* Predict just emitted jump instruction to be taken with probability PROB. */ +-static void +-predict_jump (int prob) +-{ +- rtx_insn *insn = get_last_insn (); +- gcc_assert (JUMP_P (insn)); +- add_reg_br_prob_note (insn, profile_probability::from_reg_br_prob_base (prob)); +-} +- +-/* Helper function for the string operations below. Dest VARIABLE whether +- it is aligned to VALUE bytes. If true, jump to the label. */ +-static rtx_code_label * +-ix86_expand_aligntest (rtx variable, int value, bool epilogue) +-{ +- rtx_code_label *label = gen_label_rtx (); +- rtx tmpcount = gen_reg_rtx (GET_MODE (variable)); +- if (GET_MODE (variable) == DImode) +- emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value))); +- else +- emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value))); +- emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable), +- 1, label); +- if (epilogue) +- predict_jump (REG_BR_PROB_BASE * 50 / 100); +- else +- predict_jump (REG_BR_PROB_BASE * 90 / 100); +- return label; +-} +- +-/* Adjust COUNTER by the VALUE. */ +-static void +-ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value) +-{ +- rtx (*gen_add)(rtx, rtx, rtx) +- = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3; +- +- emit_insn (gen_add (countreg, countreg, GEN_INT (-value))); +-} +- +-/* Zero extend possibly SImode EXP to Pmode register. */ +-rtx +-ix86_zero_extend_to_Pmode (rtx exp) +-{ +- return force_reg (Pmode, convert_to_mode (Pmode, exp, 1)); +-} +- +-/* Divide COUNTREG by SCALE. */ +-static rtx +-scale_counter (rtx countreg, int scale) +-{ +- rtx sc; +- +- if (scale == 1) +- return countreg; +- if (CONST_INT_P (countreg)) +- return GEN_INT (INTVAL (countreg) / scale); +- gcc_assert (REG_P (countreg)); +- +- sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg, +- GEN_INT (exact_log2 (scale)), +- NULL, 1, OPTAB_DIRECT); +- return sc; +-} +- +-/* Return mode for the memcpy/memset loop counter. Prefer SImode over +- DImode for constant loop counts. */ +- +-static machine_mode +-counter_mode (rtx count_exp) +-{ +- if (GET_MODE (count_exp) != VOIDmode) +- return GET_MODE (count_exp); +- if (!CONST_INT_P (count_exp)) +- return Pmode; +- if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff)) +- return DImode; +- return SImode; +-} +- +-/* Copy the address to a Pmode register. This is used for x32 to +- truncate DImode TLS address to a SImode register. */ +- +-static rtx +-ix86_copy_addr_to_reg (rtx addr) +-{ +- rtx reg; +- if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode) +- { +- reg = copy_addr_to_reg (addr); +- REG_POINTER (reg) = 1; +- return reg; +- } +- else +- { +- gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode); +- reg = copy_to_mode_reg (DImode, addr); +- REG_POINTER (reg) = 1; +- return gen_rtx_SUBREG (SImode, reg, 0); +- } +-} +- +-/* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR +- to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT +- specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set +- memory by VALUE (supposed to be in MODE). +- +- The size is rounded down to whole number of chunk size moved at once. +- SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */ +- +- +-static void +-expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem, +- rtx destptr, rtx srcptr, rtx value, +- rtx count, machine_mode mode, int unroll, +- int expected_size, bool issetmem) +-{ +- rtx_code_label *out_label, *top_label; +- rtx iter, tmp; +- machine_mode iter_mode = counter_mode (count); +- int piece_size_n = GET_MODE_SIZE (mode) * unroll; +- rtx piece_size = GEN_INT (piece_size_n); +- rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1)); +- rtx size; +- int i; +- +- top_label = gen_label_rtx (); +- out_label = gen_label_rtx (); +- iter = gen_reg_rtx (iter_mode); +- +- size = expand_simple_binop (iter_mode, AND, count, piece_size_mask, +- NULL, 1, OPTAB_DIRECT); +- /* Those two should combine. */ +- if (piece_size == const1_rtx) +- { +- emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode, +- true, out_label); +- predict_jump (REG_BR_PROB_BASE * 10 / 100); +- } +- emit_move_insn (iter, const0_rtx); +- +- emit_label (top_label); +- +- tmp = convert_modes (Pmode, iter_mode, iter, true); +- +- /* This assert could be relaxed - in this case we'll need to compute +- smallest power of two, containing in PIECE_SIZE_N and pass it to +- offset_address. */ +- gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0); +- destmem = offset_address (destmem, tmp, piece_size_n); +- destmem = adjust_address (destmem, mode, 0); +- +- if (!issetmem) +- { +- srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n); +- srcmem = adjust_address (srcmem, mode, 0); +- +- /* When unrolling for chips that reorder memory reads and writes, +- we can save registers by using single temporary. +- Also using 4 temporaries is overkill in 32bit mode. */ +- if (!TARGET_64BIT && 0) +- { +- for (i = 0; i < unroll; i++) +- { +- if (i) +- { +- destmem = adjust_address (copy_rtx (destmem), mode, +- GET_MODE_SIZE (mode)); +- srcmem = adjust_address (copy_rtx (srcmem), mode, +- GET_MODE_SIZE (mode)); +- } +- emit_move_insn (destmem, srcmem); +- } +- } +- else +- { +- rtx tmpreg[4]; +- gcc_assert (unroll <= 4); +- for (i = 0; i < unroll; i++) +- { +- tmpreg[i] = gen_reg_rtx (mode); +- if (i) +- srcmem = adjust_address (copy_rtx (srcmem), mode, +- GET_MODE_SIZE (mode)); +- emit_move_insn (tmpreg[i], srcmem); +- } +- for (i = 0; i < unroll; i++) +- { +- if (i) +- destmem = adjust_address (copy_rtx (destmem), mode, +- GET_MODE_SIZE (mode)); +- emit_move_insn (destmem, tmpreg[i]); +- } +- } +- } +- else +- for (i = 0; i < unroll; i++) +- { +- if (i) +- destmem = adjust_address (copy_rtx (destmem), mode, +- GET_MODE_SIZE (mode)); +- emit_move_insn (destmem, value); +- } +- +- tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter, +- true, OPTAB_LIB_WIDEN); +- if (tmp != iter) +- emit_move_insn (iter, tmp); +- +- emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode, +- true, top_label); +- if (expected_size != -1) +- { +- expected_size /= GET_MODE_SIZE (mode) * unroll; +- if (expected_size == 0) +- predict_jump (0); +- else if (expected_size > REG_BR_PROB_BASE) +- predict_jump (REG_BR_PROB_BASE - 1); +- else +- predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) +- / expected_size); +- } +- else +- predict_jump (REG_BR_PROB_BASE * 80 / 100); +- iter = ix86_zero_extend_to_Pmode (iter); +- tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr, +- true, OPTAB_LIB_WIDEN); +- if (tmp != destptr) +- emit_move_insn (destptr, tmp); +- if (!issetmem) +- { +- tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr, +- true, OPTAB_LIB_WIDEN); +- if (tmp != srcptr) +- emit_move_insn (srcptr, tmp); +- } +- emit_label (out_label); +-} +- +-/* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument. +- When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored. +- When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored. +- For setmem case, VALUE is a promoted to a wider size ORIG_VALUE. +- ORIG_VALUE is the original value passed to memset to fill the memory with. +- Other arguments have same meaning as for previous function. */ +- +-static void +-expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem, +- rtx destptr, rtx srcptr, rtx value, rtx orig_value, +- rtx count, +- machine_mode mode, bool issetmem) +-{ +- rtx destexp; +- rtx srcexp; +- rtx countreg; +- HOST_WIDE_INT rounded_count; +- +- /* If possible, it is shorter to use rep movs. +- TODO: Maybe it is better to move this logic to decide_alg. */ +- if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3) +- && (!issetmem || orig_value == const0_rtx)) +- mode = SImode; +- +- if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode) +- destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0); +- +- countreg = ix86_zero_extend_to_Pmode (scale_counter (count, +- GET_MODE_SIZE (mode))); +- if (mode != QImode) +- { +- destexp = gen_rtx_ASHIFT (Pmode, countreg, +- GEN_INT (exact_log2 (GET_MODE_SIZE (mode)))); +- destexp = gen_rtx_PLUS (Pmode, destexp, destptr); +- } +- else +- destexp = gen_rtx_PLUS (Pmode, destptr, countreg); +- if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count)) +- { +- rounded_count +- = ROUND_DOWN (INTVAL (count), (HOST_WIDE_INT) GET_MODE_SIZE (mode)); +- destmem = shallow_copy_rtx (destmem); +- set_mem_size (destmem, rounded_count); +- } +- else if (MEM_SIZE_KNOWN_P (destmem)) +- clear_mem_size (destmem); +- +- if (issetmem) +- { +- value = force_reg (mode, gen_lowpart (mode, value)); +- emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp)); +- } +- else +- { +- if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode) +- srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0); +- if (mode != QImode) +- { +- srcexp = gen_rtx_ASHIFT (Pmode, countreg, +- GEN_INT (exact_log2 (GET_MODE_SIZE (mode)))); +- srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr); +- } +- else +- srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg); +- if (CONST_INT_P (count)) +- { +- rounded_count +- = ROUND_DOWN (INTVAL (count), (HOST_WIDE_INT) GET_MODE_SIZE (mode)); +- srcmem = shallow_copy_rtx (srcmem); +- set_mem_size (srcmem, rounded_count); +- } +- else +- { +- if (MEM_SIZE_KNOWN_P (srcmem)) +- clear_mem_size (srcmem); +- } +- emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg, +- destexp, srcexp)); +- } +-} +- +-/* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to +- DESTMEM. +- SRC is passed by pointer to be updated on return. +- Return value is updated DST. */ +-static rtx +-emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr, +- HOST_WIDE_INT size_to_move) +-{ +- rtx dst = destmem, src = *srcmem, adjust, tempreg; +- enum insn_code code; +- machine_mode move_mode; +- int piece_size, i; +- +- /* Find the widest mode in which we could perform moves. +- Start with the biggest power of 2 less than SIZE_TO_MOVE and half +- it until move of such size is supported. */ +- piece_size = 1 << floor_log2 (size_to_move); +- while (!int_mode_for_size (piece_size * BITS_PER_UNIT, 0).exists (&move_mode) +- || (code = optab_handler (mov_optab, move_mode)) == CODE_FOR_nothing) +- { +- gcc_assert (piece_size > 1); +- piece_size >>= 1; +- } +- +- /* Find the corresponding vector mode with the same size as MOVE_MODE. +- MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */ +- if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode)) +- { +- int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode); +- if (!mode_for_vector (word_mode, nunits).exists (&move_mode) +- || (code = optab_handler (mov_optab, move_mode)) == CODE_FOR_nothing) +- { +- move_mode = word_mode; +- piece_size = GET_MODE_SIZE (move_mode); +- code = optab_handler (mov_optab, move_mode); +- } +- } +- gcc_assert (code != CODE_FOR_nothing); +- +- dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0); +- src = adjust_automodify_address_nv (src, move_mode, srcptr, 0); +- +- /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */ +- gcc_assert (size_to_move % piece_size == 0); +- adjust = GEN_INT (piece_size); +- for (i = 0; i < size_to_move; i += piece_size) +- { +- /* We move from memory to memory, so we'll need to do it via +- a temporary register. */ +- tempreg = gen_reg_rtx (move_mode); +- emit_insn (GEN_FCN (code) (tempreg, src)); +- emit_insn (GEN_FCN (code) (dst, tempreg)); +- +- emit_move_insn (destptr, +- gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust)); +- emit_move_insn (srcptr, +- gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust)); +- +- dst = adjust_automodify_address_nv (dst, move_mode, destptr, +- piece_size); +- src = adjust_automodify_address_nv (src, move_mode, srcptr, +- piece_size); +- } +- +- /* Update DST and SRC rtx. */ +- *srcmem = src; +- return dst; +-} +- +-/* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */ +-static void +-expand_movmem_epilogue (rtx destmem, rtx srcmem, +- rtx destptr, rtx srcptr, rtx count, int max_size) +-{ +- rtx src, dest; +- if (CONST_INT_P (count)) +- { +- HOST_WIDE_INT countval = INTVAL (count); +- HOST_WIDE_INT epilogue_size = countval % max_size; +- int i; +- +- /* For now MAX_SIZE should be a power of 2. This assert could be +- relaxed, but it'll require a bit more complicated epilogue +- expanding. */ +- gcc_assert ((max_size & (max_size - 1)) == 0); +- for (i = max_size; i >= 1; i >>= 1) +- { +- if (epilogue_size & i) +- destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i); +- } +- return; +- } +- if (max_size > 8) +- { +- count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1), +- count, 1, OPTAB_DIRECT); +- expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL, +- count, QImode, 1, 4, false); +- return; +- } +- +- /* When there are stringops, we can cheaply increase dest and src pointers. +- Otherwise we save code size by maintaining offset (zero is readily +- available from preceding rep operation) and using x86 addressing modes. +- */ +- if (TARGET_SINGLE_STRINGOP) +- { +- if (max_size > 4) +- { +- rtx_code_label *label = ix86_expand_aligntest (count, 4, true); +- src = change_address (srcmem, SImode, srcptr); +- dest = change_address (destmem, SImode, destptr); +- emit_insn (gen_strmov (destptr, dest, srcptr, src)); +- emit_label (label); +- LABEL_NUSES (label) = 1; +- } +- if (max_size > 2) +- { +- rtx_code_label *label = ix86_expand_aligntest (count, 2, true); +- src = change_address (srcmem, HImode, srcptr); +- dest = change_address (destmem, HImode, destptr); +- emit_insn (gen_strmov (destptr, dest, srcptr, src)); +- emit_label (label); +- LABEL_NUSES (label) = 1; +- } +- if (max_size > 1) +- { +- rtx_code_label *label = ix86_expand_aligntest (count, 1, true); +- src = change_address (srcmem, QImode, srcptr); +- dest = change_address (destmem, QImode, destptr); +- emit_insn (gen_strmov (destptr, dest, srcptr, src)); +- emit_label (label); +- LABEL_NUSES (label) = 1; +- } +- } +- else +- { +- rtx offset = force_reg (Pmode, const0_rtx); +- rtx tmp; +- +- if (max_size > 4) +- { +- rtx_code_label *label = ix86_expand_aligntest (count, 4, true); +- src = change_address (srcmem, SImode, srcptr); +- dest = change_address (destmem, SImode, destptr); +- emit_move_insn (dest, src); +- tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL, +- true, OPTAB_LIB_WIDEN); +- if (tmp != offset) +- emit_move_insn (offset, tmp); +- emit_label (label); +- LABEL_NUSES (label) = 1; +- } +- if (max_size > 2) +- { +- rtx_code_label *label = ix86_expand_aligntest (count, 2, true); +- tmp = gen_rtx_PLUS (Pmode, srcptr, offset); +- src = change_address (srcmem, HImode, tmp); +- tmp = gen_rtx_PLUS (Pmode, destptr, offset); +- dest = change_address (destmem, HImode, tmp); +- emit_move_insn (dest, src); +- tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp, +- true, OPTAB_LIB_WIDEN); +- if (tmp != offset) +- emit_move_insn (offset, tmp); +- emit_label (label); +- LABEL_NUSES (label) = 1; +- } +- if (max_size > 1) +- { +- rtx_code_label *label = ix86_expand_aligntest (count, 1, true); +- tmp = gen_rtx_PLUS (Pmode, srcptr, offset); +- src = change_address (srcmem, QImode, tmp); +- tmp = gen_rtx_PLUS (Pmode, destptr, offset); +- dest = change_address (destmem, QImode, tmp); +- emit_move_insn (dest, src); +- emit_label (label); +- LABEL_NUSES (label) = 1; +- } +- } +-} +- +-/* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM +- with value PROMOTED_VAL. +- SRC is passed by pointer to be updated on return. +- Return value is updated DST. */ +-static rtx +-emit_memset (rtx destmem, rtx destptr, rtx promoted_val, +- HOST_WIDE_INT size_to_move) +-{ +- rtx dst = destmem, adjust; +- enum insn_code code; +- machine_mode move_mode; +- int piece_size, i; +- +- /* Find the widest mode in which we could perform moves. +- Start with the biggest power of 2 less than SIZE_TO_MOVE and half +- it until move of such size is supported. */ +- move_mode = GET_MODE (promoted_val); +- if (move_mode == VOIDmode) +- move_mode = QImode; +- if (size_to_move < GET_MODE_SIZE (move_mode)) +- { +- unsigned int move_bits = size_to_move * BITS_PER_UNIT; +- move_mode = int_mode_for_size (move_bits, 0).require (); +- promoted_val = gen_lowpart (move_mode, promoted_val); +- } +- piece_size = GET_MODE_SIZE (move_mode); +- code = optab_handler (mov_optab, move_mode); +- gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX); +- +- dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0); +- +- /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */ +- gcc_assert (size_to_move % piece_size == 0); +- adjust = GEN_INT (piece_size); +- for (i = 0; i < size_to_move; i += piece_size) +- { +- if (piece_size <= GET_MODE_SIZE (word_mode)) +- { +- emit_insn (gen_strset (destptr, dst, promoted_val)); +- dst = adjust_automodify_address_nv (dst, move_mode, destptr, +- piece_size); +- continue; +- } +- +- emit_insn (GEN_FCN (code) (dst, promoted_val)); +- +- emit_move_insn (destptr, +- gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust)); +- +- dst = adjust_automodify_address_nv (dst, move_mode, destptr, +- piece_size); +- } +- +- /* Update DST rtx. */ +- return dst; +-} +-/* Output code to set at most count & (max_size - 1) bytes starting by DEST. */ +-static void +-expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value, +- rtx count, int max_size) +-{ +- count = expand_simple_binop (counter_mode (count), AND, count, +- GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT); +- expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL, +- gen_lowpart (QImode, value), count, QImode, +- 1, max_size / 2, true); +-} +- +-/* Output code to set at most count & (max_size - 1) bytes starting by DEST. */ +-static void +-expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value, +- rtx count, int max_size) +-{ +- rtx dest; +- +- if (CONST_INT_P (count)) +- { +- HOST_WIDE_INT countval = INTVAL (count); +- HOST_WIDE_INT epilogue_size = countval % max_size; +- int i; +- +- /* For now MAX_SIZE should be a power of 2. This assert could be +- relaxed, but it'll require a bit more complicated epilogue +- expanding. */ +- gcc_assert ((max_size & (max_size - 1)) == 0); +- for (i = max_size; i >= 1; i >>= 1) +- { +- if (epilogue_size & i) +- { +- if (vec_value && i > GET_MODE_SIZE (GET_MODE (value))) +- destmem = emit_memset (destmem, destptr, vec_value, i); +- else +- destmem = emit_memset (destmem, destptr, value, i); +- } +- } +- return; +- } +- if (max_size > 32) +- { +- expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size); +- return; +- } +- if (max_size > 16) +- { +- rtx_code_label *label = ix86_expand_aligntest (count, 16, true); +- if (TARGET_64BIT) +- { +- dest = change_address (destmem, DImode, destptr); +- emit_insn (gen_strset (destptr, dest, value)); +- dest = adjust_automodify_address_nv (dest, DImode, destptr, 8); +- emit_insn (gen_strset (destptr, dest, value)); +- } +- else +- { +- dest = change_address (destmem, SImode, destptr); +- emit_insn (gen_strset (destptr, dest, value)); +- dest = adjust_automodify_address_nv (dest, SImode, destptr, 4); +- emit_insn (gen_strset (destptr, dest, value)); +- dest = adjust_automodify_address_nv (dest, SImode, destptr, 8); +- emit_insn (gen_strset (destptr, dest, value)); +- dest = adjust_automodify_address_nv (dest, SImode, destptr, 12); +- emit_insn (gen_strset (destptr, dest, value)); +- } +- emit_label (label); +- LABEL_NUSES (label) = 1; +- } +- if (max_size > 8) +- { +- rtx_code_label *label = ix86_expand_aligntest (count, 8, true); +- if (TARGET_64BIT) +- { +- dest = change_address (destmem, DImode, destptr); +- emit_insn (gen_strset (destptr, dest, value)); +- } +- else +- { +- dest = change_address (destmem, SImode, destptr); +- emit_insn (gen_strset (destptr, dest, value)); +- dest = adjust_automodify_address_nv (dest, SImode, destptr, 4); +- emit_insn (gen_strset (destptr, dest, value)); +- } +- emit_label (label); +- LABEL_NUSES (label) = 1; +- } +- if (max_size > 4) +- { +- rtx_code_label *label = ix86_expand_aligntest (count, 4, true); +- dest = change_address (destmem, SImode, destptr); +- emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value))); +- emit_label (label); +- LABEL_NUSES (label) = 1; +- } +- if (max_size > 2) +- { +- rtx_code_label *label = ix86_expand_aligntest (count, 2, true); +- dest = change_address (destmem, HImode, destptr); +- emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value))); +- emit_label (label); +- LABEL_NUSES (label) = 1; +- } +- if (max_size > 1) +- { +- rtx_code_label *label = ix86_expand_aligntest (count, 1, true); +- dest = change_address (destmem, QImode, destptr); +- emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value))); +- emit_label (label); +- LABEL_NUSES (label) = 1; +- } +-} +- +-/* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to +- DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN. +- Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are +- ignored. +- Return value is updated DESTMEM. */ +-static rtx +-expand_set_or_movmem_prologue (rtx destmem, rtx srcmem, +- rtx destptr, rtx srcptr, rtx value, +- rtx vec_value, rtx count, int align, +- int desired_alignment, bool issetmem) +-{ +- int i; +- for (i = 1; i < desired_alignment; i <<= 1) +- { +- if (align <= i) +- { +- rtx_code_label *label = ix86_expand_aligntest (destptr, i, false); +- if (issetmem) +- { +- if (vec_value && i > GET_MODE_SIZE (GET_MODE (value))) +- destmem = emit_memset (destmem, destptr, vec_value, i); +- else +- destmem = emit_memset (destmem, destptr, value, i); +- } +- else +- destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i); +- ix86_adjust_counter (count, i); +- emit_label (label); +- LABEL_NUSES (label) = 1; +- set_mem_align (destmem, i * 2 * BITS_PER_UNIT); +- } +- } +- return destmem; +-} +- +-/* Test if COUNT&SIZE is nonzero and if so, expand movme +- or setmem sequence that is valid for SIZE..2*SIZE-1 bytes +- and jump to DONE_LABEL. */ +-static void +-expand_small_movmem_or_setmem (rtx destmem, rtx srcmem, +- rtx destptr, rtx srcptr, +- rtx value, rtx vec_value, +- rtx count, int size, +- rtx done_label, bool issetmem) +-{ +- rtx_code_label *label = ix86_expand_aligntest (count, size, false); +- machine_mode mode = int_mode_for_size (size * BITS_PER_UNIT, 1).else_blk (); +- rtx modesize; +- int n; +- +- /* If we do not have vector value to copy, we must reduce size. */ +- if (issetmem) +- { +- if (!vec_value) +- { +- if (GET_MODE (value) == VOIDmode && size > 8) +- mode = Pmode; +- else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value))) +- mode = GET_MODE (value); +- } +- else +- mode = GET_MODE (vec_value), value = vec_value; +- } +- else +- { +- /* Choose appropriate vector mode. */ +- if (size >= 32) +- mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode; +- else if (size >= 16) +- mode = TARGET_SSE ? V16QImode : DImode; +- srcmem = change_address (srcmem, mode, srcptr); +- } +- destmem = change_address (destmem, mode, destptr); +- modesize = GEN_INT (GET_MODE_SIZE (mode)); +- gcc_assert (GET_MODE_SIZE (mode) <= size); +- for (n = 0; n * GET_MODE_SIZE (mode) < size; n++) +- { +- if (issetmem) +- emit_move_insn (destmem, gen_lowpart (mode, value)); +- else +- { +- emit_move_insn (destmem, srcmem); +- srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode)); +- } +- destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode)); +- } +- +- destmem = offset_address (destmem, count, 1); +- destmem = offset_address (destmem, GEN_INT (-2 * size), +- GET_MODE_SIZE (mode)); +- if (!issetmem) +- { +- srcmem = offset_address (srcmem, count, 1); +- srcmem = offset_address (srcmem, GEN_INT (-2 * size), +- GET_MODE_SIZE (mode)); +- } +- for (n = 0; n * GET_MODE_SIZE (mode) < size; n++) +- { +- if (issetmem) +- emit_move_insn (destmem, gen_lowpart (mode, value)); +- else +- { +- emit_move_insn (destmem, srcmem); +- srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode)); +- } +- destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode)); +- } +- emit_jump_insn (gen_jump (done_label)); +- emit_barrier (); +- +- emit_label (label); +- LABEL_NUSES (label) = 1; +-} +- +-/* Handle small memcpy (up to SIZE that is supposed to be small power of 2. +- and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN +- bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can +- proceed with an loop copying SIZE bytes at once. Do moves in MODE. +- DONE_LABEL is a label after the whole copying sequence. The label is created +- on demand if *DONE_LABEL is NULL. +- MIN_SIZE is minimal size of block copied. This value gets adjusted for new +- bounds after the initial copies. +- +- DESTMEM/SRCMEM are memory expressions pointing to the copies block, +- DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether +- we will dispatch to a library call for large blocks. +- +- In pseudocode we do: +- +- if (COUNT < SIZE) +- { +- Assume that SIZE is 4. Bigger sizes are handled analogously +- if (COUNT & 4) +- { +- copy 4 bytes from SRCPTR to DESTPTR +- copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4 +- goto done_label +- } +- if (!COUNT) +- goto done_label; +- copy 1 byte from SRCPTR to DESTPTR +- if (COUNT & 2) +- { +- copy 2 bytes from SRCPTR to DESTPTR +- copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2 +- } +- } +- else +- { +- copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR +- copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE +- +- OLD_DESPTR = DESTPTR; +- Align DESTPTR up to DESIRED_ALIGN +- SRCPTR += DESTPTR - OLD_DESTPTR +- COUNT -= DEST_PTR - OLD_DESTPTR +- if (DYNAMIC_CHECK) +- Round COUNT down to multiple of SIZE +- << optional caller supplied zero size guard is here >> +- << optional caller supplied dynamic check is here >> +- << caller supplied main copy loop is here >> +- } +- done_label: +- */ +-static void +-expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem, +- rtx *destptr, rtx *srcptr, +- machine_mode mode, +- rtx value, rtx vec_value, +- rtx *count, +- rtx_code_label **done_label, +- int size, +- int desired_align, +- int align, +- unsigned HOST_WIDE_INT *min_size, +- bool dynamic_check, +- bool issetmem) +-{ +- rtx_code_label *loop_label = NULL, *label; +- int n; +- rtx modesize; +- int prolog_size = 0; +- rtx mode_value; +- +- /* Chose proper value to copy. */ +- if (issetmem && VECTOR_MODE_P (mode)) +- mode_value = vec_value; +- else +- mode_value = value; +- gcc_assert (GET_MODE_SIZE (mode) <= size); +- +- /* See if block is big or small, handle small blocks. */ +- if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size) +- { +- int size2 = size; +- loop_label = gen_label_rtx (); +- +- if (!*done_label) +- *done_label = gen_label_rtx (); +- +- emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count), +- 1, loop_label); +- size2 >>= 1; +- +- /* Handle sizes > 3. */ +- for (;size2 > 2; size2 >>= 1) +- expand_small_movmem_or_setmem (destmem, srcmem, +- *destptr, *srcptr, +- value, vec_value, +- *count, +- size2, *done_label, issetmem); +- /* Nothing to copy? Jump to DONE_LABEL if so */ +- emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count), +- 1, *done_label); +- +- /* Do a byte copy. */ +- destmem = change_address (destmem, QImode, *destptr); +- if (issetmem) +- emit_move_insn (destmem, gen_lowpart (QImode, value)); +- else +- { +- srcmem = change_address (srcmem, QImode, *srcptr); +- emit_move_insn (destmem, srcmem); +- } +- +- /* Handle sizes 2 and 3. */ +- label = ix86_expand_aligntest (*count, 2, false); +- destmem = change_address (destmem, HImode, *destptr); +- destmem = offset_address (destmem, *count, 1); +- destmem = offset_address (destmem, GEN_INT (-2), 2); +- if (issetmem) +- emit_move_insn (destmem, gen_lowpart (HImode, value)); +- else +- { +- srcmem = change_address (srcmem, HImode, *srcptr); +- srcmem = offset_address (srcmem, *count, 1); +- srcmem = offset_address (srcmem, GEN_INT (-2), 2); +- emit_move_insn (destmem, srcmem); +- } +- +- emit_label (label); +- LABEL_NUSES (label) = 1; +- emit_jump_insn (gen_jump (*done_label)); +- emit_barrier (); +- } +- else +- gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size +- || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size); +- +- /* Start memcpy for COUNT >= SIZE. */ +- if (loop_label) +- { +- emit_label (loop_label); +- LABEL_NUSES (loop_label) = 1; +- } +- +- /* Copy first desired_align bytes. */ +- if (!issetmem) +- srcmem = change_address (srcmem, mode, *srcptr); +- destmem = change_address (destmem, mode, *destptr); +- modesize = GEN_INT (GET_MODE_SIZE (mode)); +- for (n = 0; prolog_size < desired_align - align; n++) +- { +- if (issetmem) +- emit_move_insn (destmem, mode_value); +- else +- { +- emit_move_insn (destmem, srcmem); +- srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode)); +- } +- destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode)); +- prolog_size += GET_MODE_SIZE (mode); +- } +- +- +- /* Copy last SIZE bytes. */ +- destmem = offset_address (destmem, *count, 1); +- destmem = offset_address (destmem, +- GEN_INT (-size - prolog_size), +- 1); +- if (issetmem) +- emit_move_insn (destmem, mode_value); +- else +- { +- srcmem = offset_address (srcmem, *count, 1); +- srcmem = offset_address (srcmem, +- GEN_INT (-size - prolog_size), +- 1); +- emit_move_insn (destmem, srcmem); +- } +- for (n = 1; n * GET_MODE_SIZE (mode) < size; n++) +- { +- destmem = offset_address (destmem, modesize, 1); +- if (issetmem) +- emit_move_insn (destmem, mode_value); +- else +- { +- srcmem = offset_address (srcmem, modesize, 1); +- emit_move_insn (destmem, srcmem); +- } +- } +- +- /* Align destination. */ +- if (desired_align > 1 && desired_align > align) +- { +- rtx saveddest = *destptr; +- +- gcc_assert (desired_align <= size); +- /* Align destptr up, place it to new register. */ +- *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr, +- GEN_INT (prolog_size), +- NULL_RTX, 1, OPTAB_DIRECT); +- if (REG_P (*destptr) && REG_P (saveddest) && REG_POINTER (saveddest)) +- REG_POINTER (*destptr) = 1; +- *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr, +- GEN_INT (-desired_align), +- *destptr, 1, OPTAB_DIRECT); +- /* See how many bytes we skipped. */ +- saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest, +- *destptr, +- saveddest, 1, OPTAB_DIRECT); +- /* Adjust srcptr and count. */ +- if (!issetmem) +- *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr, +- saveddest, *srcptr, 1, OPTAB_DIRECT); +- *count = expand_simple_binop (GET_MODE (*count), PLUS, *count, +- saveddest, *count, 1, OPTAB_DIRECT); +- /* We copied at most size + prolog_size. */ +- if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size)) +- *min_size +- = ROUND_DOWN (*min_size - size, (unsigned HOST_WIDE_INT)size); +- else +- *min_size = 0; +- +- /* Our loops always round down the block size, but for dispatch to +- library we need precise value. */ +- if (dynamic_check) +- *count = expand_simple_binop (GET_MODE (*count), AND, *count, +- GEN_INT (-size), *count, 1, OPTAB_DIRECT); +- } +- else +- { +- gcc_assert (prolog_size == 0); +- /* Decrease count, so we won't end up copying last word twice. */ +- if (!CONST_INT_P (*count)) +- *count = expand_simple_binop (GET_MODE (*count), PLUS, *count, +- constm1_rtx, *count, 1, OPTAB_DIRECT); +- else +- *count = GEN_INT (ROUND_DOWN (UINTVAL (*count) - 1, +- (unsigned HOST_WIDE_INT)size)); +- if (*min_size) +- *min_size = ROUND_DOWN (*min_size - 1, (unsigned HOST_WIDE_INT)size); +- } +-} +- +- +-/* This function is like the previous one, except here we know how many bytes +- need to be copied. That allows us to update alignment not only of DST, which +- is returned, but also of SRC, which is passed as a pointer for that +- reason. */ +-static rtx +-expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg, +- rtx srcreg, rtx value, rtx vec_value, +- int desired_align, int align_bytes, +- bool issetmem) +-{ +- rtx src = NULL; +- rtx orig_dst = dst; +- rtx orig_src = NULL; +- int piece_size = 1; +- int copied_bytes = 0; +- +- if (!issetmem) +- { +- gcc_assert (srcp != NULL); +- src = *srcp; +- orig_src = src; +- } +- +- for (piece_size = 1; +- piece_size <= desired_align && copied_bytes < align_bytes; +- piece_size <<= 1) +- { +- if (align_bytes & piece_size) +- { +- if (issetmem) +- { +- if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value))) +- dst = emit_memset (dst, destreg, vec_value, piece_size); +- else +- dst = emit_memset (dst, destreg, value, piece_size); +- } +- else +- dst = emit_memmov (dst, &src, destreg, srcreg, piece_size); +- copied_bytes += piece_size; +- } +- } +- if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT) +- set_mem_align (dst, desired_align * BITS_PER_UNIT); +- if (MEM_SIZE_KNOWN_P (orig_dst)) +- set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes); +- +- if (!issetmem) +- { +- int src_align_bytes = get_mem_align_offset (src, desired_align +- * BITS_PER_UNIT); +- if (src_align_bytes >= 0) +- src_align_bytes = desired_align - src_align_bytes; +- if (src_align_bytes >= 0) +- { +- unsigned int src_align; +- for (src_align = desired_align; src_align >= 2; src_align >>= 1) +- { +- if ((src_align_bytes & (src_align - 1)) +- == (align_bytes & (src_align - 1))) +- break; +- } +- if (src_align > (unsigned int) desired_align) +- src_align = desired_align; +- if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT) +- set_mem_align (src, src_align * BITS_PER_UNIT); +- } +- if (MEM_SIZE_KNOWN_P (orig_src)) +- set_mem_size (src, MEM_SIZE (orig_src) - align_bytes); +- *srcp = src; +- } +- +- return dst; +-} +- +-/* Return true if ALG can be used in current context. +- Assume we expand memset if MEMSET is true. */ +-static bool +-alg_usable_p (enum stringop_alg alg, bool memset, bool have_as) +-{ +- if (alg == no_stringop) +- return false; +- if (alg == vector_loop) +- return TARGET_SSE || TARGET_AVX; +- /* Algorithms using the rep prefix want at least edi and ecx; +- additionally, memset wants eax and memcpy wants esi. Don't +- consider such algorithms if the user has appropriated those +- registers for their own purposes, or if we have a non-default +- address space, since some string insns cannot override the segment. */ +- if (alg == rep_prefix_1_byte +- || alg == rep_prefix_4_byte +- || alg == rep_prefix_8_byte) +- { +- if (have_as) +- return false; +- if (fixed_regs[CX_REG] +- || fixed_regs[DI_REG] +- || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG])) +- return false; +- } +- return true; +-} +- +-/* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */ +-static enum stringop_alg +-decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, +- unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size, +- bool memset, bool zero_memset, bool have_as, +- int *dynamic_check, bool *noalign, bool recur) +-{ +- const struct stringop_algs *algs; +- bool optimize_for_speed; +- int max = 0; +- const struct processor_costs *cost; +- int i; +- bool any_alg_usable_p = false; +- +- *noalign = false; +- *dynamic_check = -1; +- +- /* Even if the string operation call is cold, we still might spend a lot +- of time processing large blocks. */ +- if (optimize_function_for_size_p (cfun) +- || (optimize_insn_for_size_p () +- && (max_size < 256 +- || (expected_size != -1 && expected_size < 256)))) +- optimize_for_speed = false; +- else +- optimize_for_speed = true; +- +- cost = optimize_for_speed ? ix86_cost : &ix86_size_cost; +- if (memset) +- algs = &cost->memset[TARGET_64BIT != 0]; +- else +- algs = &cost->memcpy[TARGET_64BIT != 0]; +- +- /* See maximal size for user defined algorithm. */ +- for (i = 0; i < MAX_STRINGOP_ALGS; i++) +- { +- enum stringop_alg candidate = algs->size[i].alg; +- bool usable = alg_usable_p (candidate, memset, have_as); +- any_alg_usable_p |= usable; +- +- if (candidate != libcall && candidate && usable) +- max = algs->size[i].max; +- } +- +- /* If expected size is not known but max size is small enough +- so inline version is a win, set expected size into +- the range. */ +- if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1) +- && expected_size == -1) +- expected_size = min_size / 2 + max_size / 2; +- +- /* If user specified the algorithm, honor it if possible. */ +- if (ix86_stringop_alg != no_stringop +- && alg_usable_p (ix86_stringop_alg, memset, have_as)) +- return ix86_stringop_alg; +- /* rep; movq or rep; movl is the smallest variant. */ +- else if (!optimize_for_speed) +- { +- *noalign = true; +- if (!count || (count & 3) || (memset && !zero_memset)) +- return alg_usable_p (rep_prefix_1_byte, memset, have_as) +- ? rep_prefix_1_byte : loop_1_byte; +- else +- return alg_usable_p (rep_prefix_4_byte, memset, have_as) +- ? rep_prefix_4_byte : loop; +- } +- /* Very tiny blocks are best handled via the loop, REP is expensive to +- setup. */ +- else if (expected_size != -1 && expected_size < 4) +- return loop_1_byte; +- else if (expected_size != -1) +- { +- enum stringop_alg alg = libcall; +- bool alg_noalign = false; +- for (i = 0; i < MAX_STRINGOP_ALGS; i++) +- { +- /* We get here if the algorithms that were not libcall-based +- were rep-prefix based and we are unable to use rep prefixes +- based on global register usage. Break out of the loop and +- use the heuristic below. */ +- if (algs->size[i].max == 0) +- break; +- if (algs->size[i].max >= expected_size || algs->size[i].max == -1) +- { +- enum stringop_alg candidate = algs->size[i].alg; +- +- if (candidate != libcall +- && alg_usable_p (candidate, memset, have_as)) +- { +- alg = candidate; +- alg_noalign = algs->size[i].noalign; +- } +- /* Honor TARGET_INLINE_ALL_STRINGOPS by picking +- last non-libcall inline algorithm. */ +- if (TARGET_INLINE_ALL_STRINGOPS) +- { +- /* When the current size is best to be copied by a libcall, +- but we are still forced to inline, run the heuristic below +- that will pick code for medium sized blocks. */ +- if (alg != libcall) +- { +- *noalign = alg_noalign; +- return alg; +- } +- else if (!any_alg_usable_p) +- break; +- } +- else if (alg_usable_p (candidate, memset, have_as)) +- { +- *noalign = algs->size[i].noalign; +- return candidate; +- } +- } +- } +- } +- /* When asked to inline the call anyway, try to pick meaningful choice. +- We look for maximal size of block that is faster to copy by hand and +- take blocks of at most of that size guessing that average size will +- be roughly half of the block. +- +- If this turns out to be bad, we might simply specify the preferred +- choice in ix86_costs. */ +- if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY) +- && (algs->unknown_size == libcall +- || !alg_usable_p (algs->unknown_size, memset, have_as))) +- { +- enum stringop_alg alg; +- HOST_WIDE_INT new_expected_size = (max > 0 ? max : 4096) / 2; +- +- /* If there aren't any usable algorithms or if recursing already, +- then recursing on smaller sizes or same size isn't going to +- find anything. Just return the simple byte-at-a-time copy loop. */ +- if (!any_alg_usable_p || recur) +- { +- /* Pick something reasonable. */ +- if (TARGET_INLINE_STRINGOPS_DYNAMICALLY && !recur) +- *dynamic_check = 128; +- return loop_1_byte; +- } +- alg = decide_alg (count, new_expected_size, min_size, max_size, memset, +- zero_memset, have_as, dynamic_check, noalign, true); +- gcc_assert (*dynamic_check == -1); +- if (TARGET_INLINE_STRINGOPS_DYNAMICALLY) +- *dynamic_check = max; +- else +- gcc_assert (alg != libcall); +- return alg; +- } +- return (alg_usable_p (algs->unknown_size, memset, have_as) +- ? algs->unknown_size : libcall); +-} +- +-/* Decide on alignment. We know that the operand is already aligned to ALIGN +- (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */ +-static int +-decide_alignment (int align, +- enum stringop_alg alg, +- int expected_size, +- machine_mode move_mode) +-{ +- int desired_align = 0; +- +- gcc_assert (alg != no_stringop); +- +- if (alg == libcall) +- return 0; +- if (move_mode == VOIDmode) +- return 0; +- +- desired_align = GET_MODE_SIZE (move_mode); +- /* PentiumPro has special logic triggering for 8 byte aligned blocks. +- copying whole cacheline at once. */ +- if (TARGET_PENTIUMPRO +- && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte)) +- desired_align = 8; +- +- if (optimize_size) +- desired_align = 1; +- if (desired_align < align) +- desired_align = align; +- if (expected_size != -1 && expected_size < 4) +- desired_align = align; +- +- return desired_align; +-} +- +- +-/* Helper function for memcpy. For QImode value 0xXY produce +- 0xXYXYXYXY of wide specified by MODE. This is essentially +- a * 0x10101010, but we can do slightly better than +- synth_mult by unwinding the sequence by hand on CPUs with +- slow multiply. */ +-static rtx +-promote_duplicated_reg (machine_mode mode, rtx val) +-{ +- machine_mode valmode = GET_MODE (val); +- rtx tmp; +- int nops = mode == DImode ? 3 : 2; +- +- gcc_assert (mode == SImode || mode == DImode || val == const0_rtx); +- if (val == const0_rtx) +- return copy_to_mode_reg (mode, CONST0_RTX (mode)); +- if (CONST_INT_P (val)) +- { +- HOST_WIDE_INT v = INTVAL (val) & 255; +- +- v |= v << 8; +- v |= v << 16; +- if (mode == DImode) +- v |= (v << 16) << 16; +- return copy_to_mode_reg (mode, gen_int_mode (v, mode)); +- } +- +- if (valmode == VOIDmode) +- valmode = QImode; +- if (valmode != QImode) +- val = gen_lowpart (QImode, val); +- if (mode == QImode) +- return val; +- if (!TARGET_PARTIAL_REG_STALL) +- nops--; +- if (ix86_cost->mult_init[mode == DImode ? 3 : 2] +- + ix86_cost->mult_bit * (mode == DImode ? 8 : 4) +- <= (ix86_cost->shift_const + ix86_cost->add) * nops +- + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0))) +- { +- rtx reg = convert_modes (mode, QImode, val, true); +- tmp = promote_duplicated_reg (mode, const1_rtx); +- return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1, +- OPTAB_DIRECT); +- } +- else +- { +- rtx reg = convert_modes (mode, QImode, val, true); +- +- if (!TARGET_PARTIAL_REG_STALL) +- if (mode == SImode) +- emit_insn (gen_insvsi_1 (reg, reg)); +- else +- emit_insn (gen_insvdi_1 (reg, reg)); +- else +- { +- tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8), +- NULL, 1, OPTAB_DIRECT); +- reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, +- OPTAB_DIRECT); +- } +- tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16), +- NULL, 1, OPTAB_DIRECT); +- reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT); +- if (mode == SImode) +- return reg; +- tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32), +- NULL, 1, OPTAB_DIRECT); +- reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT); +- return reg; +- } +-} +- +-/* Duplicate value VAL using promote_duplicated_reg into maximal size that will +- be needed by main loop copying SIZE_NEEDED chunks and prologue getting +- alignment from ALIGN to DESIRED_ALIGN. */ +-static rtx +-promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, +- int align) +-{ +- rtx promoted_val; +- +- if (TARGET_64BIT +- && (size_needed > 4 || (desired_align > align && desired_align > 4))) +- promoted_val = promote_duplicated_reg (DImode, val); +- else if (size_needed > 2 || (desired_align > align && desired_align > 2)) +- promoted_val = promote_duplicated_reg (SImode, val); +- else if (size_needed > 1 || (desired_align > align && desired_align > 1)) +- promoted_val = promote_duplicated_reg (HImode, val); +- else +- promoted_val = val; +- +- return promoted_val; +-} +- +-/* Expand string move (memcpy) ot store (memset) operation. Use i386 string +- operations when profitable. The code depends upon architecture, block size +- and alignment, but always has one of the following overall structures: +- +- Aligned move sequence: +- +- 1) Prologue guard: Conditional that jumps up to epilogues for small +- blocks that can be handled by epilogue alone. This is faster +- but also needed for correctness, since prologue assume the block +- is larger than the desired alignment. +- +- Optional dynamic check for size and libcall for large +- blocks is emitted here too, with -minline-stringops-dynamically. +- +- 2) Prologue: copy first few bytes in order to get destination +- aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less +- than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be +- copied. We emit either a jump tree on power of two sized +- blocks, or a byte loop. +- +- 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks +- with specified algorithm. +- +- 4) Epilogue: code copying tail of the block that is too small to be +- handled by main body (or up to size guarded by prologue guard). +- +- Misaligned move sequence +- +- 1) missaligned move prologue/epilogue containing: +- a) Prologue handling small memory blocks and jumping to done_label +- (skipped if blocks are known to be large enough) +- b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is +- needed by single possibly misaligned move +- (skipped if alignment is not needed) +- c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves +- +- 2) Zero size guard dispatching to done_label, if needed +- +- 3) dispatch to library call, if needed, +- +- 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks +- with specified algorithm. */ +-bool +-ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp, +- rtx align_exp, rtx expected_align_exp, +- rtx expected_size_exp, rtx min_size_exp, +- rtx max_size_exp, rtx probable_max_size_exp, +- bool issetmem) +-{ +- rtx destreg; +- rtx srcreg = NULL; +- rtx_code_label *label = NULL; +- rtx tmp; +- rtx_code_label *jump_around_label = NULL; +- HOST_WIDE_INT align = 1; +- unsigned HOST_WIDE_INT count = 0; +- HOST_WIDE_INT expected_size = -1; +- int size_needed = 0, epilogue_size_needed; +- int desired_align = 0, align_bytes = 0; +- enum stringop_alg alg; +- rtx promoted_val = NULL; +- rtx vec_promoted_val = NULL; +- bool force_loopy_epilogue = false; +- int dynamic_check; +- bool need_zero_guard = false; +- bool noalign; +- machine_mode move_mode = VOIDmode; +- machine_mode wider_mode; +- int unroll_factor = 1; +- /* TODO: Once value ranges are available, fill in proper data. */ +- unsigned HOST_WIDE_INT min_size = 0; +- unsigned HOST_WIDE_INT max_size = -1; +- unsigned HOST_WIDE_INT probable_max_size = -1; +- bool misaligned_prologue_used = false; +- bool have_as; +- +- if (CONST_INT_P (align_exp)) +- align = INTVAL (align_exp); +- /* i386 can do misaligned access on reasonably increased cost. */ +- if (CONST_INT_P (expected_align_exp) +- && INTVAL (expected_align_exp) > align) +- align = INTVAL (expected_align_exp); +- /* ALIGN is the minimum of destination and source alignment, but we care here +- just about destination alignment. */ +- else if (!issetmem +- && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT) +- align = MEM_ALIGN (dst) / BITS_PER_UNIT; +- +- if (CONST_INT_P (count_exp)) +- { +- min_size = max_size = probable_max_size = count = expected_size +- = INTVAL (count_exp); +- /* When COUNT is 0, there is nothing to do. */ +- if (!count) +- return true; +- } +- else +- { +- if (min_size_exp) +- min_size = INTVAL (min_size_exp); +- if (max_size_exp) +- max_size = INTVAL (max_size_exp); +- if (probable_max_size_exp) +- probable_max_size = INTVAL (probable_max_size_exp); +- if (CONST_INT_P (expected_size_exp)) +- expected_size = INTVAL (expected_size_exp); +- } +- +- /* Make sure we don't need to care about overflow later on. */ +- if (count > (HOST_WIDE_INT_1U << 30)) +- return false; +- +- have_as = !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (dst)); +- if (!issetmem) +- have_as |= !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src)); +- +- /* Step 0: Decide on preferred algorithm, desired alignment and +- size of chunks to be copied by main loop. */ +- alg = decide_alg (count, expected_size, min_size, probable_max_size, +- issetmem, +- issetmem && val_exp == const0_rtx, have_as, +- &dynamic_check, &noalign, false); +- +- if (dump_file) +- fprintf (dump_file, "Selected stringop expansion strategy: %s\n", +- stringop_alg_names[alg]); +- +- if (alg == libcall) +- return false; +- gcc_assert (alg != no_stringop); +- +- /* For now vector-version of memset is generated only for memory zeroing, as +- creating of promoted vector value is very cheap in this case. */ +- if (issetmem && alg == vector_loop && val_exp != const0_rtx) +- alg = unrolled_loop; +- +- if (!count) +- count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp); +- destreg = ix86_copy_addr_to_reg (XEXP (dst, 0)); +- if (!issetmem) +- srcreg = ix86_copy_addr_to_reg (XEXP (src, 0)); +- +- unroll_factor = 1; +- move_mode = word_mode; +- switch (alg) +- { +- case libcall: +- case no_stringop: +- case last_alg: +- gcc_unreachable (); +- case loop_1_byte: +- need_zero_guard = true; +- move_mode = QImode; +- break; +- case loop: +- need_zero_guard = true; +- break; +- case unrolled_loop: +- need_zero_guard = true; +- unroll_factor = (TARGET_64BIT ? 4 : 2); +- break; +- case vector_loop: +- need_zero_guard = true; +- unroll_factor = 4; +- /* Find the widest supported mode. */ +- move_mode = word_mode; +- while (GET_MODE_WIDER_MODE (move_mode).exists (&wider_mode) +- && optab_handler (mov_optab, wider_mode) != CODE_FOR_nothing) +- move_mode = wider_mode; +- +- if (TARGET_AVX128_OPTIMAL && GET_MODE_BITSIZE (move_mode) > 128) +- move_mode = TImode; +- +- /* Find the corresponding vector mode with the same size as MOVE_MODE. +- MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */ +- if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode)) +- { +- int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode); +- if (!mode_for_vector (word_mode, nunits).exists (&move_mode) +- || optab_handler (mov_optab, move_mode) == CODE_FOR_nothing) +- move_mode = word_mode; +- } +- gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing); +- break; +- case rep_prefix_8_byte: +- move_mode = DImode; +- break; +- case rep_prefix_4_byte: +- move_mode = SImode; +- break; +- case rep_prefix_1_byte: +- move_mode = QImode; +- break; +- } +- size_needed = GET_MODE_SIZE (move_mode) * unroll_factor; +- epilogue_size_needed = size_needed; +- +- /* If we are going to call any library calls conditionally, make sure any +- pending stack adjustment happen before the first conditional branch, +- otherwise they will be emitted before the library call only and won't +- happen from the other branches. */ +- if (dynamic_check != -1) +- do_pending_stack_adjust (); +- +- desired_align = decide_alignment (align, alg, expected_size, move_mode); +- if (!TARGET_ALIGN_STRINGOPS || noalign) +- align = desired_align; +- +- /* Step 1: Prologue guard. */ +- +- /* Alignment code needs count to be in register. */ +- if (CONST_INT_P (count_exp) && desired_align > align) +- { +- if (INTVAL (count_exp) > desired_align +- && INTVAL (count_exp) > size_needed) +- { +- align_bytes +- = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT); +- if (align_bytes <= 0) +- align_bytes = 0; +- else +- align_bytes = desired_align - align_bytes; +- } +- if (align_bytes == 0) +- count_exp = force_reg (counter_mode (count_exp), count_exp); +- } +- gcc_assert (desired_align >= 1 && align >= 1); +- +- /* Misaligned move sequences handle both prologue and epilogue at once. +- Default code generation results in a smaller code for large alignments +- and also avoids redundant job when sizes are known precisely. */ +- misaligned_prologue_used +- = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES +- && MAX (desired_align, epilogue_size_needed) <= 32 +- && desired_align <= epilogue_size_needed +- && ((desired_align > align && !align_bytes) +- || (!count && epilogue_size_needed > 1))); +- +- /* Do the cheap promotion to allow better CSE across the +- main loop and epilogue (ie one load of the big constant in the +- front of all code. +- For now the misaligned move sequences do not have fast path +- without broadcasting. */ +- if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used))) +- { +- if (alg == vector_loop) +- { +- gcc_assert (val_exp == const0_rtx); +- vec_promoted_val = promote_duplicated_reg (move_mode, val_exp); +- promoted_val = promote_duplicated_reg_to_size (val_exp, +- GET_MODE_SIZE (word_mode), +- desired_align, align); +- } +- else +- { +- promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed, +- desired_align, align); +- } +- } +- /* Misaligned move sequences handles both prologues and epilogues at once. +- Default code generation results in smaller code for large alignments and +- also avoids redundant job when sizes are known precisely. */ +- if (misaligned_prologue_used) +- { +- /* Misaligned move prologue handled small blocks by itself. */ +- expand_set_or_movmem_prologue_epilogue_by_misaligned_moves +- (dst, src, &destreg, &srcreg, +- move_mode, promoted_val, vec_promoted_val, +- &count_exp, +- &jump_around_label, +- desired_align < align +- ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed, +- desired_align, align, &min_size, dynamic_check, issetmem); +- if (!issetmem) +- src = change_address (src, BLKmode, srcreg); +- dst = change_address (dst, BLKmode, destreg); +- set_mem_align (dst, desired_align * BITS_PER_UNIT); +- epilogue_size_needed = 0; +- if (need_zero_guard +- && min_size < (unsigned HOST_WIDE_INT) size_needed) +- { +- /* It is possible that we copied enough so the main loop will not +- execute. */ +- gcc_assert (size_needed > 1); +- if (jump_around_label == NULL_RTX) +- jump_around_label = gen_label_rtx (); +- emit_cmp_and_jump_insns (count_exp, +- GEN_INT (size_needed), +- LTU, 0, counter_mode (count_exp), 1, jump_around_label); +- if (expected_size == -1 +- || expected_size < (desired_align - align) / 2 + size_needed) +- predict_jump (REG_BR_PROB_BASE * 20 / 100); +- else +- predict_jump (REG_BR_PROB_BASE * 60 / 100); +- } +- } +- /* Ensure that alignment prologue won't copy past end of block. */ +- else if (size_needed > 1 || (desired_align > 1 && desired_align > align)) +- { +- epilogue_size_needed = MAX (size_needed - 1, desired_align - align); +- /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes. +- Make sure it is power of 2. */ +- epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1); +- +- /* To improve performance of small blocks, we jump around the VAL +- promoting mode. This mean that if the promoted VAL is not constant, +- we might not use it in the epilogue and have to use byte +- loop variant. */ +- if (issetmem && epilogue_size_needed > 2 && !promoted_val) +- force_loopy_epilogue = true; +- if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed) +- || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed) +- { +- /* If main algorithm works on QImode, no epilogue is needed. +- For small sizes just don't align anything. */ +- if (size_needed == 1) +- desired_align = align; +- else +- goto epilogue; +- } +- else if (!count +- && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed) +- { +- label = gen_label_rtx (); +- emit_cmp_and_jump_insns (count_exp, +- GEN_INT (epilogue_size_needed), +- LTU, 0, counter_mode (count_exp), 1, label); +- if (expected_size == -1 || expected_size < epilogue_size_needed) +- predict_jump (REG_BR_PROB_BASE * 60 / 100); +- else +- predict_jump (REG_BR_PROB_BASE * 20 / 100); +- } +- } +- +- /* Emit code to decide on runtime whether library call or inline should be +- used. */ +- if (dynamic_check != -1) +- { +- if (!issetmem && CONST_INT_P (count_exp)) +- { +- if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check) +- { +- emit_block_copy_via_libcall (dst, src, count_exp); +- count_exp = const0_rtx; +- goto epilogue; +- } +- } +- else +- { +- rtx_code_label *hot_label = gen_label_rtx (); +- if (jump_around_label == NULL_RTX) +- jump_around_label = gen_label_rtx (); +- emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1), +- LEU, 0, counter_mode (count_exp), +- 1, hot_label); +- predict_jump (REG_BR_PROB_BASE * 90 / 100); +- if (issetmem) +- set_storage_via_libcall (dst, count_exp, val_exp); +- else +- emit_block_copy_via_libcall (dst, src, count_exp); +- emit_jump (jump_around_label); +- emit_label (hot_label); +- } +- } +- +- /* Step 2: Alignment prologue. */ +- /* Do the expensive promotion once we branched off the small blocks. */ +- if (issetmem && !promoted_val) +- promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed, +- desired_align, align); +- +- if (desired_align > align && !misaligned_prologue_used) +- { +- if (align_bytes == 0) +- { +- /* Except for the first move in prologue, we no longer know +- constant offset in aliasing info. It don't seems to worth +- the pain to maintain it for the first move, so throw away +- the info early. */ +- dst = change_address (dst, BLKmode, destreg); +- if (!issetmem) +- src = change_address (src, BLKmode, srcreg); +- dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg, +- promoted_val, vec_promoted_val, +- count_exp, align, desired_align, +- issetmem); +- /* At most desired_align - align bytes are copied. */ +- if (min_size < (unsigned)(desired_align - align)) +- min_size = 0; +- else +- min_size -= desired_align - align; +- } +- else +- { +- /* If we know how many bytes need to be stored before dst is +- sufficiently aligned, maintain aliasing info accurately. */ +- dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg, +- srcreg, +- promoted_val, +- vec_promoted_val, +- desired_align, +- align_bytes, +- issetmem); +- +- count_exp = plus_constant (counter_mode (count_exp), +- count_exp, -align_bytes); +- count -= align_bytes; +- min_size -= align_bytes; +- max_size -= align_bytes; +- } +- if (need_zero_guard +- && min_size < (unsigned HOST_WIDE_INT) size_needed +- && (count < (unsigned HOST_WIDE_INT) size_needed +- || (align_bytes == 0 +- && count < ((unsigned HOST_WIDE_INT) size_needed +- + desired_align - align)))) +- { +- /* It is possible that we copied enough so the main loop will not +- execute. */ +- gcc_assert (size_needed > 1); +- if (label == NULL_RTX) +- label = gen_label_rtx (); +- emit_cmp_and_jump_insns (count_exp, +- GEN_INT (size_needed), +- LTU, 0, counter_mode (count_exp), 1, label); +- if (expected_size == -1 +- || expected_size < (desired_align - align) / 2 + size_needed) +- predict_jump (REG_BR_PROB_BASE * 20 / 100); +- else +- predict_jump (REG_BR_PROB_BASE * 60 / 100); +- } +- } +- if (label && size_needed == 1) +- { +- emit_label (label); +- LABEL_NUSES (label) = 1; +- label = NULL; +- epilogue_size_needed = 1; +- if (issetmem) +- promoted_val = val_exp; +- } +- else if (label == NULL_RTX && !misaligned_prologue_used) +- epilogue_size_needed = size_needed; +- +- /* Step 3: Main loop. */ +- +- switch (alg) +- { +- case libcall: +- case no_stringop: +- case last_alg: +- gcc_unreachable (); +- case loop_1_byte: +- case loop: +- case unrolled_loop: +- expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val, +- count_exp, move_mode, unroll_factor, +- expected_size, issetmem); +- break; +- case vector_loop: +- expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, +- vec_promoted_val, count_exp, move_mode, +- unroll_factor, expected_size, issetmem); +- break; +- case rep_prefix_8_byte: +- case rep_prefix_4_byte: +- case rep_prefix_1_byte: +- expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val, +- val_exp, count_exp, move_mode, issetmem); +- break; +- } +- /* Adjust properly the offset of src and dest memory for aliasing. */ +- if (CONST_INT_P (count_exp)) +- { +- if (!issetmem) +- src = adjust_automodify_address_nv (src, BLKmode, srcreg, +- (count / size_needed) * size_needed); +- dst = adjust_automodify_address_nv (dst, BLKmode, destreg, +- (count / size_needed) * size_needed); +- } +- else +- { +- if (!issetmem) +- src = change_address (src, BLKmode, srcreg); +- dst = change_address (dst, BLKmode, destreg); +- } +- +- /* Step 4: Epilogue to copy the remaining bytes. */ +- epilogue: +- if (label) +- { +- /* When the main loop is done, COUNT_EXP might hold original count, +- while we want to copy only COUNT_EXP & SIZE_NEEDED bytes. +- Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED +- bytes. Compensate if needed. */ +- +- if (size_needed < epilogue_size_needed) +- { +- tmp = expand_simple_binop (counter_mode (count_exp), AND, count_exp, +- GEN_INT (size_needed - 1), count_exp, 1, +- OPTAB_DIRECT); +- if (tmp != count_exp) +- emit_move_insn (count_exp, tmp); +- } +- emit_label (label); +- LABEL_NUSES (label) = 1; +- } +- +- if (count_exp != const0_rtx && epilogue_size_needed > 1) +- { +- if (force_loopy_epilogue) +- expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp, +- epilogue_size_needed); +- else +- { +- if (issetmem) +- expand_setmem_epilogue (dst, destreg, promoted_val, +- vec_promoted_val, count_exp, +- epilogue_size_needed); +- else +- expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp, +- epilogue_size_needed); +- } +- } +- if (jump_around_label) +- emit_label (jump_around_label); +- return true; +-} +- +- +-/* Expand the appropriate insns for doing strlen if not just doing +- repnz; scasb +- +- out = result, initialized with the start address +- align_rtx = alignment of the address. +- scratch = scratch register, initialized with the startaddress when +- not aligned, otherwise undefined +- +- This is just the body. It needs the initializations mentioned above and +- some address computing at the end. These things are done in i386.md. */ +- +-static void +-ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx) +-{ +- int align; +- rtx tmp; +- rtx_code_label *align_2_label = NULL; +- rtx_code_label *align_3_label = NULL; +- rtx_code_label *align_4_label = gen_label_rtx (); +- rtx_code_label *end_0_label = gen_label_rtx (); +- rtx mem; +- rtx tmpreg = gen_reg_rtx (SImode); +- rtx scratch = gen_reg_rtx (SImode); +- rtx cmp; +- +- align = 0; +- if (CONST_INT_P (align_rtx)) +- align = INTVAL (align_rtx); +- +- /* Loop to check 1..3 bytes for null to get an aligned pointer. */ +- +- /* Is there a known alignment and is it less than 4? */ +- if (align < 4) +- { +- rtx scratch1 = gen_reg_rtx (Pmode); +- emit_move_insn (scratch1, out); +- /* Is there a known alignment and is it not 2? */ +- if (align != 2) +- { +- align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */ +- align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */ +- +- /* Leave just the 3 lower bits. */ +- align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3), +- NULL_RTX, 0, OPTAB_WIDEN); +- +- emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL, +- Pmode, 1, align_4_label); +- emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL, +- Pmode, 1, align_2_label); +- emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL, +- Pmode, 1, align_3_label); +- } +- else +- { +- /* Since the alignment is 2, we have to check 2 or 0 bytes; +- check if is aligned to 4 - byte. */ +- +- align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx, +- NULL_RTX, 0, OPTAB_WIDEN); +- +- emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL, +- Pmode, 1, align_4_label); +- } +- +- mem = change_address (src, QImode, out); +- +- /* Now compare the bytes. */ +- +- /* Compare the first n unaligned byte on a byte per byte basis. */ +- emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, +- QImode, 1, end_0_label); +- +- /* Increment the address. */ +- emit_insn (ix86_gen_add3 (out, out, const1_rtx)); +- +- /* Not needed with an alignment of 2 */ +- if (align != 2) +- { +- emit_label (align_2_label); +- +- emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1, +- end_0_label); +- +- emit_insn (ix86_gen_add3 (out, out, const1_rtx)); +- +- emit_label (align_3_label); +- } +- +- emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1, +- end_0_label); +- +- emit_insn (ix86_gen_add3 (out, out, const1_rtx)); +- } +- +- /* Generate loop to check 4 bytes at a time. It is not a good idea to +- align this loop. It gives only huge programs, but does not help to +- speed up. */ +- emit_label (align_4_label); +- +- mem = change_address (src, SImode, out); +- emit_move_insn (scratch, mem); +- emit_insn (ix86_gen_add3 (out, out, GEN_INT (4))); +- +- /* This formula yields a nonzero result iff one of the bytes is zero. +- This saves three branches inside loop and many cycles. */ +- +- emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101))); +- emit_insn (gen_one_cmplsi2 (scratch, scratch)); +- emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch)); +- emit_insn (gen_andsi3 (tmpreg, tmpreg, +- gen_int_mode (0x80808080, SImode))); +- emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1, +- align_4_label); +- +- if (TARGET_CMOVE) +- { +- rtx reg = gen_reg_rtx (SImode); +- rtx reg2 = gen_reg_rtx (Pmode); +- emit_move_insn (reg, tmpreg); +- emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16))); +- +- /* If zero is not in the first two bytes, move two bytes forward. */ +- emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080))); +- tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); +- tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); +- emit_insn (gen_rtx_SET (tmpreg, +- gen_rtx_IF_THEN_ELSE (SImode, tmp, +- reg, +- tmpreg))); +- /* Emit lea manually to avoid clobbering of flags. */ +- emit_insn (gen_rtx_SET (reg2, gen_rtx_PLUS (Pmode, out, const2_rtx))); +- +- tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); +- tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); +- emit_insn (gen_rtx_SET (out, +- gen_rtx_IF_THEN_ELSE (Pmode, tmp, +- reg2, +- out))); +- } +- else +- { +- rtx_code_label *end_2_label = gen_label_rtx (); +- /* Is zero in the first two bytes? */ +- +- emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080))); +- tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); +- tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx); +- tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, +- gen_rtx_LABEL_REF (VOIDmode, end_2_label), +- pc_rtx); +- tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); +- JUMP_LABEL (tmp) = end_2_label; +- +- /* Not in the first two. Move two bytes forward. */ +- emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16))); +- emit_insn (ix86_gen_add3 (out, out, const2_rtx)); +- +- emit_label (end_2_label); +- +- } +- +- /* Avoid branch in fixing the byte. */ +- tmpreg = gen_lowpart (QImode, tmpreg); +- emit_insn (gen_addqi3_cconly_overflow (tmpreg, tmpreg)); +- tmp = gen_rtx_REG (CCmode, FLAGS_REG); +- cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx); +- emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp)); +- +- emit_label (end_0_label); +-} +- +-/* Expand strlen. */ +- +-bool +-ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align) +-{ +- rtx addr, scratch1, scratch2, scratch3, scratch4; +- +- /* The generic case of strlen expander is long. Avoid it's +- expanding unless TARGET_INLINE_ALL_STRINGOPS. */ +- +- if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1 +- && !TARGET_INLINE_ALL_STRINGOPS +- && !optimize_insn_for_size_p () +- && (!CONST_INT_P (align) || INTVAL (align) < 4)) +- return false; +- +- addr = force_reg (Pmode, XEXP (src, 0)); +- scratch1 = gen_reg_rtx (Pmode); +- +- if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1 +- && !optimize_insn_for_size_p ()) +- { +- /* Well it seems that some optimizer does not combine a call like +- foo(strlen(bar), strlen(bar)); +- when the move and the subtraction is done here. It does calculate +- the length just once when these instructions are done inside of +- output_strlen_unroll(). But I think since &bar[strlen(bar)] is +- often used and I use one fewer register for the lifetime of +- output_strlen_unroll() this is better. */ +- +- emit_move_insn (out, addr); +- +- ix86_expand_strlensi_unroll_1 (out, src, align); +- +- /* strlensi_unroll_1 returns the address of the zero at the end of +- the string, like memchr(), so compute the length by subtracting +- the start address. */ +- emit_insn (ix86_gen_sub3 (out, out, addr)); +- } +- else +- { +- rtx unspec; +- +- /* Can't use this if the user has appropriated eax, ecx, or edi. */ +- if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG]) +- return false; +- /* Can't use this for non-default address spaces. */ +- if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src))) +- return false; +- +- scratch2 = gen_reg_rtx (Pmode); +- scratch3 = gen_reg_rtx (Pmode); +- scratch4 = force_reg (Pmode, constm1_rtx); +- +- emit_move_insn (scratch3, addr); +- eoschar = force_reg (QImode, eoschar); +- +- src = replace_equiv_address_nv (src, scratch3); +- +- /* If .md starts supporting :P, this can be done in .md. */ +- unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align, +- scratch4), UNSPEC_SCAS); +- emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec)); +- emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1)); +- emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx)); +- } +- return true; +-} +- +-/* For given symbol (function) construct code to compute address of it's PLT +- entry in large x86-64 PIC model. */ +-static rtx +-construct_plt_address (rtx symbol) +-{ +- rtx tmp, unspec; +- +- gcc_assert (GET_CODE (symbol) == SYMBOL_REF); +- gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF); +- gcc_assert (Pmode == DImode); +- +- tmp = gen_reg_rtx (Pmode); +- unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF); +- +- emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec)); +- emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx)); +- return tmp; +-} +- +-rtx_insn * +-ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, +- rtx callarg2, +- rtx pop, bool sibcall) +-{ +- rtx vec[3]; +- rtx use = NULL, call; +- unsigned int vec_len = 0; +- tree fndecl; +- +- if (GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF) +- { +- fndecl = SYMBOL_REF_DECL (XEXP (fnaddr, 0)); +- if (fndecl +- && (lookup_attribute ("interrupt", +- TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))) +- error ("interrupt service routine can%'t be called directly"); +- } +- else +- fndecl = NULL_TREE; +- +- if (pop == const0_rtx) +- pop = NULL; +- gcc_assert (!TARGET_64BIT || !pop); +- +- if (TARGET_MACHO && !TARGET_64BIT) +- { +-#if TARGET_MACHO +- if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF) +- fnaddr = machopic_indirect_call_target (fnaddr); +-#endif +- } +- else +- { +- /* Static functions and indirect calls don't need the pic register. Also, +- check if PLT was explicitly avoided via no-plt or "noplt" attribute, making +- it an indirect call. */ +- rtx addr = XEXP (fnaddr, 0); +- if (flag_pic +- && GET_CODE (addr) == SYMBOL_REF +- && !SYMBOL_REF_LOCAL_P (addr)) +- { +- if (flag_plt +- && (SYMBOL_REF_DECL (addr) == NULL_TREE +- || !lookup_attribute ("noplt", +- DECL_ATTRIBUTES (SYMBOL_REF_DECL (addr))))) +- { +- if (!TARGET_64BIT +- || (ix86_cmodel == CM_LARGE_PIC +- && DEFAULT_ABI != MS_ABI)) +- { +- use_reg (&use, gen_rtx_REG (Pmode, +- REAL_PIC_OFFSET_TABLE_REGNUM)); +- if (ix86_use_pseudo_pic_reg ()) +- emit_move_insn (gen_rtx_REG (Pmode, +- REAL_PIC_OFFSET_TABLE_REGNUM), +- pic_offset_table_rtx); +- } +- } +- else if (!TARGET_PECOFF && !TARGET_MACHO) +- { +- if (TARGET_64BIT) +- { +- fnaddr = gen_rtx_UNSPEC (Pmode, +- gen_rtvec (1, addr), +- UNSPEC_GOTPCREL); +- fnaddr = gen_rtx_CONST (Pmode, fnaddr); +- } +- else +- { +- fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), +- UNSPEC_GOT); +- fnaddr = gen_rtx_CONST (Pmode, fnaddr); +- fnaddr = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, +- fnaddr); +- } +- fnaddr = gen_const_mem (Pmode, fnaddr); +- /* Pmode may not be the same as word_mode for x32, which +- doesn't support indirect branch via 32-bit memory slot. +- Since x32 GOT slot is 64 bit with zero upper 32 bits, +- indirect branch via x32 GOT slot is OK. */ +- if (GET_MODE (fnaddr) != word_mode) +- fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr); +- fnaddr = gen_rtx_MEM (QImode, fnaddr); +- } +- } +- } +- +- /* Skip setting up RAX register for -mskip-rax-setup when there are no +- parameters passed in vector registers. */ +- if (TARGET_64BIT +- && (INTVAL (callarg2) > 0 +- || (INTVAL (callarg2) == 0 +- && (TARGET_SSE || !flag_skip_rax_setup)))) +- { +- rtx al = gen_rtx_REG (QImode, AX_REG); +- emit_move_insn (al, callarg2); +- use_reg (&use, al); +- } +- +- if (ix86_cmodel == CM_LARGE_PIC +- && !TARGET_PECOFF +- && MEM_P (fnaddr) +- && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF +- && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode)) +- fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0))); +- /* Since x32 GOT slot is 64 bit with zero upper 32 bits, indirect +- branch via x32 GOT slot is OK. */ +- else if (!(TARGET_X32 +- && MEM_P (fnaddr) +- && GET_CODE (XEXP (fnaddr, 0)) == ZERO_EXTEND +- && GOT_memory_operand (XEXP (XEXP (fnaddr, 0), 0), Pmode)) +- && (sibcall +- ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode) +- : !call_insn_operand (XEXP (fnaddr, 0), word_mode))) +- { +- fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1); +- fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr)); +- } +- +- call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1); +- +- if (retval) +- call = gen_rtx_SET (retval, call); +- vec[vec_len++] = call; +- +- if (pop) +- { +- pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop); +- pop = gen_rtx_SET (stack_pointer_rtx, pop); +- vec[vec_len++] = pop; +- } +- +- if (cfun->machine->no_caller_saved_registers +- && (!fndecl +- || (!TREE_THIS_VOLATILE (fndecl) +- && !lookup_attribute ("no_caller_saved_registers", +- TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))))) +- { +- static const char ix86_call_used_regs[] = CALL_USED_REGISTERS; +- bool is_64bit_ms_abi = (TARGET_64BIT +- && ix86_function_abi (fndecl) == MS_ABI); +- char c_mask = CALL_USED_REGISTERS_MASK (is_64bit_ms_abi); +- +- /* If there are no caller-saved registers, add all registers +- that are clobbered by the call which returns. */ +- for (int i = 0; i < FIRST_PSEUDO_REGISTER; i++) +- if (!fixed_regs[i] +- && (ix86_call_used_regs[i] == 1 +- || (ix86_call_used_regs[i] & c_mask)) +- && !STACK_REGNO_P (i) +- && !MMX_REGNO_P (i)) +- clobber_reg (&use, +- gen_rtx_REG (GET_MODE (regno_reg_rtx[i]), i)); +- } +- else if (TARGET_64BIT_MS_ABI +- && (!callarg2 || INTVAL (callarg2) != -2)) +- { +- unsigned i; +- +- for (i = 0; i < NUM_X86_64_MS_CLOBBERED_REGS; i++) +- { +- int regno = x86_64_ms_sysv_extra_clobbered_registers[i]; +- machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode; +- +- clobber_reg (&use, gen_rtx_REG (mode, regno)); +- } +- +- /* Set here, but it may get cleared later. */ +- if (TARGET_CALL_MS2SYSV_XLOGUES) +- { +- if (!TARGET_SSE) +- ; +- +- /* Don't break hot-patched functions. */ +- else if (ix86_function_ms_hook_prologue (current_function_decl)) +- ; +- +- /* TODO: Cases not yet examined. */ +- else if (flag_split_stack) +- warn_once_call_ms2sysv_xlogues ("-fsplit-stack"); +- +- else +- { +- gcc_assert (!reload_completed); +- cfun->machine->call_ms2sysv = true; +- } +- } +- } +- +- if (vec_len > 1) +- call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec)); +- rtx_insn *call_insn = emit_call_insn (call); +- if (use) +- CALL_INSN_FUNCTION_USAGE (call_insn) = use; +- +- return call_insn; +-} +- +-/* Return true if the function being called was marked with attribute +- "noplt" or using -fno-plt and we are compiling for non-PIC. We need +- to handle the non-PIC case in the backend because there is no easy +- interface for the front-end to force non-PLT calls to use the GOT. +- This is currently used only with 64-bit or 32-bit GOT32X ELF targets +- to call the function marked "noplt" indirectly. */ +- +-static bool +-ix86_nopic_noplt_attribute_p (rtx call_op) +-{ +- if (flag_pic || ix86_cmodel == CM_LARGE +- || !(TARGET_64BIT || HAVE_AS_IX86_GOT32X) +- || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF +- || SYMBOL_REF_LOCAL_P (call_op)) +- return false; +- +- tree symbol_decl = SYMBOL_REF_DECL (call_op); +- +- if (!flag_plt +- || (symbol_decl != NULL_TREE +- && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl)))) +- return true; +- +- return false; +-} +- +-/* Helper to output the jmp/call. */ +-static void +-ix86_output_jmp_thunk_or_indirect (const char *thunk_name, const int regno) +-{ +- if (thunk_name != NULL) +- { +- fprintf (asm_out_file, "\tjmp\t"); +- assemble_name (asm_out_file, thunk_name); +- putc ('\n', asm_out_file); +- } +- else +- output_indirect_thunk (regno); +-} +- +-/* Output indirect branch via a call and return thunk. CALL_OP is a +- register which contains the branch target. XASM is the assembly +- template for CALL_OP. Branch is a tail call if SIBCALL_P is true. +- A normal call is converted to: +- +- call __x86_indirect_thunk_reg +- +- and a tail call is converted to: +- +- jmp __x86_indirect_thunk_reg +- */ +- +-static void +-ix86_output_indirect_branch_via_reg (rtx call_op, bool sibcall_p) +-{ +- char thunk_name_buf[32]; +- char *thunk_name; +- enum indirect_thunk_prefix need_prefix +- = indirect_thunk_need_prefix (current_output_insn); +- int regno = REGNO (call_op); +- +- if (cfun->machine->indirect_branch_type +- != indirect_branch_thunk_inline) +- { +- if (cfun->machine->indirect_branch_type == indirect_branch_thunk) +- { +- int i = regno; +- if (i >= FIRST_REX_INT_REG) +- i -= (FIRST_REX_INT_REG - LAST_INT_REG - 1); +- indirect_thunks_used |= 1 << i; +- } +- indirect_thunk_name (thunk_name_buf, regno, need_prefix, false); +- thunk_name = thunk_name_buf; +- } +- else +- thunk_name = NULL; +- +- if (sibcall_p) +- ix86_output_jmp_thunk_or_indirect (thunk_name, regno); +- else +- { +- if (thunk_name != NULL) +- { +- fprintf (asm_out_file, "\tcall\t"); +- assemble_name (asm_out_file, thunk_name); +- putc ('\n', asm_out_file); +- return; +- } +- +- char indirectlabel1[32]; +- char indirectlabel2[32]; +- +- ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, +- INDIRECT_LABEL, +- indirectlabelno++); +- ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, +- INDIRECT_LABEL, +- indirectlabelno++); +- +- /* Jump. */ +- fputs ("\tjmp\t", asm_out_file); +- assemble_name_raw (asm_out_file, indirectlabel2); +- fputc ('\n', asm_out_file); +- +- ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1); +- +- ix86_output_jmp_thunk_or_indirect (thunk_name, regno); +- +- ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2); +- +- /* Call. */ +- fputs ("\tcall\t", asm_out_file); +- assemble_name_raw (asm_out_file, indirectlabel1); +- fputc ('\n', asm_out_file); +- } +-} +- +-/* Output indirect branch via a call and return thunk. CALL_OP is +- the branch target. XASM is the assembly template for CALL_OP. +- Branch is a tail call if SIBCALL_P is true. A normal call is +- converted to: +- +- jmp L2 +- L1: +- push CALL_OP +- jmp __x86_indirect_thunk +- L2: +- call L1 +- +- and a tail call is converted to: +- +- push CALL_OP +- jmp __x86_indirect_thunk +- */ +- +-static void +-ix86_output_indirect_branch_via_push (rtx call_op, const char *xasm, +- bool sibcall_p) +-{ +- char thunk_name_buf[32]; +- char *thunk_name; +- char push_buf[64]; +- enum indirect_thunk_prefix need_prefix +- = indirect_thunk_need_prefix (current_output_insn); +- int regno = -1; +- +- if (cfun->machine->indirect_branch_type +- != indirect_branch_thunk_inline) +- { +- if (cfun->machine->indirect_branch_type == indirect_branch_thunk) +- indirect_thunk_needed = true; +- indirect_thunk_name (thunk_name_buf, regno, need_prefix, false); +- thunk_name = thunk_name_buf; +- } +- else +- thunk_name = NULL; +- +- snprintf (push_buf, sizeof (push_buf), "push{%c}\t%s", +- TARGET_64BIT ? 'q' : 'l', xasm); +- +- if (sibcall_p) +- { +- output_asm_insn (push_buf, &call_op); +- ix86_output_jmp_thunk_or_indirect (thunk_name, regno); +- } +- else +- { +- char indirectlabel1[32]; +- char indirectlabel2[32]; +- +- ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, +- INDIRECT_LABEL, +- indirectlabelno++); +- ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, +- INDIRECT_LABEL, +- indirectlabelno++); +- +- /* Jump. */ +- fputs ("\tjmp\t", asm_out_file); +- assemble_name_raw (asm_out_file, indirectlabel2); +- fputc ('\n', asm_out_file); +- +- ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1); +- +- /* An external function may be called via GOT, instead of PLT. */ +- if (MEM_P (call_op)) +- { +- struct ix86_address parts; +- rtx addr = XEXP (call_op, 0); +- if (ix86_decompose_address (addr, &parts) +- && parts.base == stack_pointer_rtx) +- { +- /* Since call will adjust stack by -UNITS_PER_WORD, +- we must convert "disp(stack, index, scale)" to +- "disp+UNITS_PER_WORD(stack, index, scale)". */ +- if (parts.index) +- { +- addr = gen_rtx_MULT (Pmode, parts.index, +- GEN_INT (parts.scale)); +- addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx, +- addr); +- } +- else +- addr = stack_pointer_rtx; +- +- rtx disp; +- if (parts.disp != NULL_RTX) +- disp = plus_constant (Pmode, parts.disp, +- UNITS_PER_WORD); +- else +- disp = GEN_INT (UNITS_PER_WORD); +- +- addr = gen_rtx_PLUS (Pmode, addr, disp); +- call_op = gen_rtx_MEM (GET_MODE (call_op), addr); +- } +- } +- +- output_asm_insn (push_buf, &call_op); +- +- ix86_output_jmp_thunk_or_indirect (thunk_name, regno); +- +- ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2); +- +- /* Call. */ +- fputs ("\tcall\t", asm_out_file); +- assemble_name_raw (asm_out_file, indirectlabel1); +- fputc ('\n', asm_out_file); +- } +-} +- +-/* Output indirect branch via a call and return thunk. CALL_OP is +- the branch target. XASM is the assembly template for CALL_OP. +- Branch is a tail call if SIBCALL_P is true. */ +- +-static void +-ix86_output_indirect_branch (rtx call_op, const char *xasm, +- bool sibcall_p) +-{ +- if (REG_P (call_op)) +- ix86_output_indirect_branch_via_reg (call_op, sibcall_p); +- else +- ix86_output_indirect_branch_via_push (call_op, xasm, sibcall_p); +-} +- +-/* Output indirect jump. CALL_OP is the jump target. */ +- +-const char * +-ix86_output_indirect_jmp (rtx call_op) +-{ +- if (cfun->machine->indirect_branch_type != indirect_branch_keep) +- { +- /* We can't have red-zone since "call" in the indirect thunk +- pushes the return address onto stack, destroying red-zone. */ +- if (ix86_red_zone_size != 0) +- gcc_unreachable (); +- +- ix86_output_indirect_branch (call_op, "%0", true); +- return ""; +- } +- else +- return "%!jmp\t%A0"; +-} +- +-/* Output return instrumentation for current function if needed. */ +- +-static void +-output_return_instrumentation (void) +-{ +- if (ix86_instrument_return != instrument_return_none +- && flag_fentry +- && !DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (cfun->decl)) +- { +- if (ix86_flag_record_return) +- fprintf (asm_out_file, "1:\n"); +- switch (ix86_instrument_return) +- { +- case instrument_return_call: +- fprintf (asm_out_file, "\tcall\t__return__\n"); +- break; +- case instrument_return_nop5: +- /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */ +- fprintf (asm_out_file, ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n"); +- break; +- case instrument_return_none: +- break; +- } +- +- if (ix86_flag_record_return) +- { +- fprintf (asm_out_file, "\t.section __return_loc, \"a\",@progbits\n"); +- fprintf (asm_out_file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long"); +- fprintf (asm_out_file, "\t.previous\n"); +- } +- } +-} +- +-/* Output function return. CALL_OP is the jump target. Add a REP +- prefix to RET if LONG_P is true and function return is kept. */ +- +-const char * +-ix86_output_function_return (bool long_p) +-{ +- output_return_instrumentation (); +- +- if (cfun->machine->function_return_type != indirect_branch_keep) +- { +- char thunk_name[32]; +- enum indirect_thunk_prefix need_prefix +- = indirect_thunk_need_prefix (current_output_insn); +- +- if (cfun->machine->function_return_type +- != indirect_branch_thunk_inline) +- { +- bool need_thunk = (cfun->machine->function_return_type +- == indirect_branch_thunk); +- indirect_thunk_name (thunk_name, INVALID_REGNUM, need_prefix, +- true); +- indirect_return_needed |= need_thunk; +- fprintf (asm_out_file, "\tjmp\t"); +- assemble_name (asm_out_file, thunk_name); +- putc ('\n', asm_out_file); +- } +- else +- output_indirect_thunk (INVALID_REGNUM); +- +- return ""; +- } +- +- if (!long_p) +- return "%!ret"; +- +- return "rep%; ret"; +-} +- +-/* Output indirect function return. RET_OP is the function return +- target. */ +- +-const char * +-ix86_output_indirect_function_return (rtx ret_op) +-{ +- if (cfun->machine->function_return_type != indirect_branch_keep) +- { +- char thunk_name[32]; +- enum indirect_thunk_prefix need_prefix +- = indirect_thunk_need_prefix (current_output_insn); +- unsigned int regno = REGNO (ret_op); +- gcc_assert (regno == CX_REG); +- +- if (cfun->machine->function_return_type +- != indirect_branch_thunk_inline) +- { +- bool need_thunk = (cfun->machine->function_return_type +- == indirect_branch_thunk); +- indirect_thunk_name (thunk_name, regno, need_prefix, true); +- +- if (need_thunk) +- { +- indirect_return_via_cx = true; +- indirect_thunks_used |= 1 << CX_REG; +- } +- fprintf (asm_out_file, "\tjmp\t"); +- assemble_name (asm_out_file, thunk_name); +- putc ('\n', asm_out_file); +- } +- else +- output_indirect_thunk (regno); +- +- return ""; +- } +- else +- return "%!jmp\t%A0"; +-} +- +-/* Split simple return with popping POPC bytes from stack to indirect +- branch with stack adjustment . */ +- +-void +-ix86_split_simple_return_pop_internal (rtx popc) +-{ +- struct machine_function *m = cfun->machine; +- rtx ecx = gen_rtx_REG (SImode, CX_REG); +- rtx_insn *insn; +- +- /* There is no "pascal" calling convention in any 64bit ABI. */ +- gcc_assert (!TARGET_64BIT); +- +- insn = emit_insn (gen_pop (ecx)); +- m->fs.cfa_offset -= UNITS_PER_WORD; +- m->fs.sp_offset -= UNITS_PER_WORD; +- +- rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); +- x = gen_rtx_SET (stack_pointer_rtx, x); +- add_reg_note (insn, REG_CFA_ADJUST_CFA, x); +- add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx)); +- RTX_FRAME_RELATED_P (insn) = 1; +- +- x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, popc); +- x = gen_rtx_SET (stack_pointer_rtx, x); +- insn = emit_insn (x); +- add_reg_note (insn, REG_CFA_ADJUST_CFA, x); +- RTX_FRAME_RELATED_P (insn) = 1; +- +- /* Now return address is in ECX. */ +- emit_jump_insn (gen_simple_return_indirect_internal (ecx)); +-} +- +-/* Output the assembly for a call instruction. */ +- +-const char * +-ix86_output_call_insn (rtx_insn *insn, rtx call_op) +-{ +- bool direct_p = constant_call_address_operand (call_op, VOIDmode); +- bool output_indirect_p +- = (!TARGET_SEH +- && cfun->machine->indirect_branch_type != indirect_branch_keep); +- bool seh_nop_p = false; +- const char *xasm; +- +- if (SIBLING_CALL_P (insn)) +- { +- output_return_instrumentation (); +- if (direct_p) +- { +- if (ix86_nopic_noplt_attribute_p (call_op)) +- { +- direct_p = false; +- if (TARGET_64BIT) +- { +- if (output_indirect_p) +- xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}"; +- else +- xasm = "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}"; +- } +- else +- { +- if (output_indirect_p) +- xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}"; +- else +- xasm = "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}"; +- } +- } +- else +- xasm = "%!jmp\t%P0"; +- } +- /* SEH epilogue detection requires the indirect branch case +- to include REX.W. */ +- else if (TARGET_SEH) +- xasm = "%!rex.W jmp\t%A0"; +- else +- { +- if (output_indirect_p) +- xasm = "%0"; +- else +- xasm = "%!jmp\t%A0"; +- } +- +- if (output_indirect_p && !direct_p) +- ix86_output_indirect_branch (call_op, xasm, true); +- else +- output_asm_insn (xasm, &call_op); +- return ""; +- } +- +- /* SEH unwinding can require an extra nop to be emitted in several +- circumstances. Determine if we have one of those. */ +- if (TARGET_SEH) +- { +- rtx_insn *i; +- +- for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i)) +- { +- /* Prevent a catch region from being adjacent to a jump that would +- be interpreted as an epilogue sequence by the unwinder. */ +- if (JUMP_P(i) && CROSSING_JUMP_P (i)) +- { +- seh_nop_p = true; +- break; +- } +- +- /* If we get to another real insn, we don't need the nop. */ +- if (INSN_P (i)) +- break; +- +- /* If we get to the epilogue note, prevent a catch region from +- being adjacent to the standard epilogue sequence. If non- +- call-exceptions, we'll have done this during epilogue emission. */ +- if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG +- && !flag_non_call_exceptions +- && !can_throw_internal (insn)) +- { +- seh_nop_p = true; +- break; +- } +- } +- +- /* If we didn't find a real insn following the call, prevent the +- unwinder from looking into the next function. */ +- if (i == NULL) +- seh_nop_p = true; +- } +- +- if (direct_p) +- { +- if (ix86_nopic_noplt_attribute_p (call_op)) +- { +- direct_p = false; +- if (TARGET_64BIT) +- { +- if (output_indirect_p) +- xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}"; +- else +- xasm = "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}"; +- } +- else +- { +- if (output_indirect_p) +- xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}"; +- else +- xasm = "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}"; +- } +- } +- else +- xasm = "%!call\t%P0"; +- } +- else +- { +- if (output_indirect_p) +- xasm = "%0"; +- else +- xasm = "%!call\t%A0"; +- } +- +- if (output_indirect_p && !direct_p) +- ix86_output_indirect_branch (call_op, xasm, false); +- else +- output_asm_insn (xasm, &call_op); +- +- if (seh_nop_p) +- return "nop"; +- +- return ""; +-} +- +-/* Clear stack slot assignments remembered from previous functions. +- This is called from INIT_EXPANDERS once before RTL is emitted for each +- function. */ +- +-static struct machine_function * +-ix86_init_machine_status (void) +-{ +- struct machine_function *f; +- +- f = ggc_cleared_alloc (); +- f->call_abi = ix86_abi; +- +- return f; +-} +- +-/* Return a MEM corresponding to a stack slot with mode MODE. +- Allocate a new slot if necessary. +- +- The RTL for a function can have several slots available: N is +- which slot to use. */ +- +-rtx +-assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n) +-{ +- struct stack_local_entry *s; +- +- gcc_assert (n < MAX_386_STACK_LOCALS); +- +- for (s = ix86_stack_locals; s; s = s->next) +- if (s->mode == mode && s->n == n) +- return validize_mem (copy_rtx (s->rtl)); +- +- s = ggc_alloc (); +- s->n = n; +- s->mode = mode; +- s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0); +- +- s->next = ix86_stack_locals; +- ix86_stack_locals = s; +- return validize_mem (copy_rtx (s->rtl)); +-} +- +-static void +-ix86_instantiate_decls (void) +-{ +- struct stack_local_entry *s; +- +- for (s = ix86_stack_locals; s; s = s->next) +- if (s->rtl != NULL_RTX) +- instantiate_decl_rtl (s->rtl); +-} +- +-/* Check whether x86 address PARTS is a pc-relative address. */ +- +-bool +-ix86_rip_relative_addr_p (struct ix86_address *parts) +-{ +- rtx base, index, disp; +- +- base = parts->base; +- index = parts->index; +- disp = parts->disp; +- +- if (disp && !base && !index) +- { +- if (TARGET_64BIT) +- { +- rtx symbol = disp; +- +- if (GET_CODE (disp) == CONST) +- symbol = XEXP (disp, 0); +- if (GET_CODE (symbol) == PLUS +- && CONST_INT_P (XEXP (symbol, 1))) +- symbol = XEXP (symbol, 0); +- +- if (GET_CODE (symbol) == LABEL_REF +- || (GET_CODE (symbol) == SYMBOL_REF +- && SYMBOL_REF_TLS_MODEL (symbol) == 0) +- || (GET_CODE (symbol) == UNSPEC +- && (XINT (symbol, 1) == UNSPEC_GOTPCREL +- || XINT (symbol, 1) == UNSPEC_PCREL +- || XINT (symbol, 1) == UNSPEC_GOTNTPOFF))) +- return true; +- } +- } +- return false; +-} +- +-/* Calculate the length of the memory address in the instruction encoding. +- Includes addr32 prefix, does not include the one-byte modrm, opcode, +- or other prefixes. We never generate addr32 prefix for LEA insn. */ +- +-int +-memory_address_length (rtx addr, bool lea) +-{ +- struct ix86_address parts; +- rtx base, index, disp; +- int len; +- int ok; +- +- if (GET_CODE (addr) == PRE_DEC +- || GET_CODE (addr) == POST_INC +- || GET_CODE (addr) == PRE_MODIFY +- || GET_CODE (addr) == POST_MODIFY) +- return 0; +- +- ok = ix86_decompose_address (addr, &parts); +- gcc_assert (ok); +- +- len = (parts.seg == ADDR_SPACE_GENERIC) ? 0 : 1; +- +- /* If this is not LEA instruction, add the length of addr32 prefix. */ +- if (TARGET_64BIT && !lea +- && (SImode_address_operand (addr, VOIDmode) +- || (parts.base && GET_MODE (parts.base) == SImode) +- || (parts.index && GET_MODE (parts.index) == SImode))) +- len++; +- +- base = parts.base; +- index = parts.index; +- disp = parts.disp; +- +- if (base && SUBREG_P (base)) +- base = SUBREG_REG (base); +- if (index && SUBREG_P (index)) +- index = SUBREG_REG (index); +- +- gcc_assert (base == NULL_RTX || REG_P (base)); +- gcc_assert (index == NULL_RTX || REG_P (index)); +- +- /* Rule of thumb: +- - esp as the base always wants an index, +- - ebp as the base always wants a displacement, +- - r12 as the base always wants an index, +- - r13 as the base always wants a displacement. */ +- +- /* Register Indirect. */ +- if (base && !index && !disp) +- { +- /* esp (for its index) and ebp (for its displacement) need +- the two-byte modrm form. Similarly for r12 and r13 in 64-bit +- code. */ +- if (base == arg_pointer_rtx +- || base == frame_pointer_rtx +- || REGNO (base) == SP_REG +- || REGNO (base) == BP_REG +- || REGNO (base) == R12_REG +- || REGNO (base) == R13_REG) +- len++; +- } +- +- /* Direct Addressing. In 64-bit mode mod 00 r/m 5 +- is not disp32, but disp32(%rip), so for disp32 +- SIB byte is needed, unless print_operand_address +- optimizes it into disp32(%rip) or (%rip) is implied +- by UNSPEC. */ +- else if (disp && !base && !index) +- { +- len += 4; +- if (!ix86_rip_relative_addr_p (&parts)) +- len++; +- } +- else +- { +- /* Find the length of the displacement constant. */ +- if (disp) +- { +- if (base && satisfies_constraint_K (disp)) +- len += 1; +- else +- len += 4; +- } +- /* ebp always wants a displacement. Similarly r13. */ +- else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG)) +- len++; +- +- /* An index requires the two-byte modrm form.... */ +- if (index +- /* ...like esp (or r12), which always wants an index. */ +- || base == arg_pointer_rtx +- || base == frame_pointer_rtx +- || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG))) +- len++; +- } +- +- return len; +-} +- +-/* Compute default value for "length_immediate" attribute. When SHORTFORM +- is set, expect that insn have 8bit immediate alternative. */ +-int +-ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform) +-{ +- int len = 0; +- int i; +- extract_insn_cached (insn); +- for (i = recog_data.n_operands - 1; i >= 0; --i) +- if (CONSTANT_P (recog_data.operand[i])) +- { +- enum attr_mode mode = get_attr_mode (insn); +- +- gcc_assert (!len); +- if (shortform && CONST_INT_P (recog_data.operand[i])) +- { +- HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]); +- switch (mode) +- { +- case MODE_QI: +- len = 1; +- continue; +- case MODE_HI: +- ival = trunc_int_for_mode (ival, HImode); +- break; +- case MODE_SI: +- ival = trunc_int_for_mode (ival, SImode); +- break; +- default: +- break; +- } +- if (IN_RANGE (ival, -128, 127)) +- { +- len = 1; +- continue; +- } +- } +- switch (mode) +- { +- case MODE_QI: +- len = 1; +- break; +- case MODE_HI: +- len = 2; +- break; +- case MODE_SI: +- len = 4; +- break; +- /* Immediates for DImode instructions are encoded +- as 32bit sign extended values. */ +- case MODE_DI: +- len = 4; +- break; +- default: +- fatal_insn ("unknown insn mode", insn); +- } +- } +- return len; +-} +- +-/* Compute default value for "length_address" attribute. */ +-int +-ix86_attr_length_address_default (rtx_insn *insn) +-{ +- int i; +- +- if (get_attr_type (insn) == TYPE_LEA) +- { +- rtx set = PATTERN (insn), addr; +- +- if (GET_CODE (set) == PARALLEL) +- set = XVECEXP (set, 0, 0); +- +- gcc_assert (GET_CODE (set) == SET); +- +- addr = SET_SRC (set); +- +- return memory_address_length (addr, true); +- } +- +- extract_insn_cached (insn); +- for (i = recog_data.n_operands - 1; i >= 0; --i) +- { +- rtx op = recog_data.operand[i]; +- if (MEM_P (op)) +- { +- constrain_operands_cached (insn, reload_completed); +- if (which_alternative != -1) +- { +- const char *constraints = recog_data.constraints[i]; +- int alt = which_alternative; +- +- while (*constraints == '=' || *constraints == '+') +- constraints++; +- while (alt-- > 0) +- while (*constraints++ != ',') +- ; +- /* Skip ignored operands. */ +- if (*constraints == 'X') +- continue; +- } +- +- int len = memory_address_length (XEXP (op, 0), false); +- +- /* Account for segment prefix for non-default addr spaces. */ +- if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op))) +- len++; +- +- return len; +- } +- } +- return 0; +-} +- +-/* Compute default value for "length_vex" attribute. It includes +- 2 or 3 byte VEX prefix and 1 opcode byte. */ +- +-int +-ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode, +- bool has_vex_w) +-{ +- int i; +- +- /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3 +- byte VEX prefix. */ +- if (!has_0f_opcode || has_vex_w) +- return 3 + 1; +- +- /* We can always use 2 byte VEX prefix in 32bit. */ +- if (!TARGET_64BIT) +- return 2 + 1; +- +- extract_insn_cached (insn); +- +- for (i = recog_data.n_operands - 1; i >= 0; --i) +- if (REG_P (recog_data.operand[i])) +- { +- /* REX.W bit uses 3 byte VEX prefix. */ +- if (GET_MODE (recog_data.operand[i]) == DImode +- && GENERAL_REG_P (recog_data.operand[i])) +- return 3 + 1; +- } +- else +- { +- /* REX.X or REX.B bits use 3 byte VEX prefix. */ +- if (MEM_P (recog_data.operand[i]) +- && x86_extended_reg_mentioned_p (recog_data.operand[i])) +- return 3 + 1; +- } +- +- return 2 + 1; +-} +- +- +-static bool +-ix86_class_likely_spilled_p (reg_class_t); +- +-/* Returns true if lhs of insn is HW function argument register and set up +- is_spilled to true if it is likely spilled HW register. */ +-static bool +-insn_is_function_arg (rtx insn, bool* is_spilled) +-{ +- rtx dst; +- +- if (!NONDEBUG_INSN_P (insn)) +- return false; +- /* Call instructions are not movable, ignore it. */ +- if (CALL_P (insn)) +- return false; +- insn = PATTERN (insn); +- if (GET_CODE (insn) == PARALLEL) +- insn = XVECEXP (insn, 0, 0); +- if (GET_CODE (insn) != SET) +- return false; +- dst = SET_DEST (insn); +- if (REG_P (dst) && HARD_REGISTER_P (dst) +- && ix86_function_arg_regno_p (REGNO (dst))) +- { +- /* Is it likely spilled HW register? */ +- if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst)) +- && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst)))) +- *is_spilled = true; +- return true; +- } +- return false; +-} +- +-/* Add output dependencies for chain of function adjacent arguments if only +- there is a move to likely spilled HW register. Return first argument +- if at least one dependence was added or NULL otherwise. */ +-static rtx_insn * +-add_parameter_dependencies (rtx_insn *call, rtx_insn *head) +-{ +- rtx_insn *insn; +- rtx_insn *last = call; +- rtx_insn *first_arg = NULL; +- bool is_spilled = false; +- +- head = PREV_INSN (head); +- +- /* Find nearest to call argument passing instruction. */ +- while (true) +- { +- last = PREV_INSN (last); +- if (last == head) +- return NULL; +- if (!NONDEBUG_INSN_P (last)) +- continue; +- if (insn_is_function_arg (last, &is_spilled)) +- break; +- return NULL; +- } +- +- first_arg = last; +- while (true) +- { +- insn = PREV_INSN (last); +- if (!INSN_P (insn)) +- break; +- if (insn == head) +- break; +- if (!NONDEBUG_INSN_P (insn)) +- { +- last = insn; +- continue; +- } +- if (insn_is_function_arg (insn, &is_spilled)) +- { +- /* Add output depdendence between two function arguments if chain +- of output arguments contains likely spilled HW registers. */ +- if (is_spilled) +- add_dependence (first_arg, insn, REG_DEP_OUTPUT); +- first_arg = last = insn; +- } +- else +- break; +- } +- if (!is_spilled) +- return NULL; +- return first_arg; +-} +- +-/* Add output or anti dependency from insn to first_arg to restrict its code +- motion. */ +-static void +-avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn) +-{ +- rtx set; +- rtx tmp; +- +- set = single_set (insn); +- if (!set) +- return; +- tmp = SET_DEST (set); +- if (REG_P (tmp)) +- { +- /* Add output dependency to the first function argument. */ +- add_dependence (first_arg, insn, REG_DEP_OUTPUT); +- return; +- } +- /* Add anti dependency. */ +- add_dependence (first_arg, insn, REG_DEP_ANTI); +-} +- +-/* Avoid cross block motion of function argument through adding dependency +- from the first non-jump instruction in bb. */ +-static void +-add_dependee_for_func_arg (rtx_insn *arg, basic_block bb) +-{ +- rtx_insn *insn = BB_END (bb); +- +- while (insn) +- { +- if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn)) +- { +- rtx set = single_set (insn); +- if (set) +- { +- avoid_func_arg_motion (arg, insn); +- return; +- } +- } +- if (insn == BB_HEAD (bb)) +- return; +- insn = PREV_INSN (insn); +- } +-} +- +-/* Hook for pre-reload schedule - avoid motion of function arguments +- passed in likely spilled HW registers. */ +-static void +-ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail) +-{ +- rtx_insn *insn; +- rtx_insn *first_arg = NULL; +- if (reload_completed) +- return; +- while (head != tail && DEBUG_INSN_P (head)) +- head = NEXT_INSN (head); +- for (insn = tail; insn != head; insn = PREV_INSN (insn)) +- if (INSN_P (insn) && CALL_P (insn)) +- { +- first_arg = add_parameter_dependencies (insn, head); +- if (first_arg) +- { +- /* Add dependee for first argument to predecessors if only +- region contains more than one block. */ +- basic_block bb = BLOCK_FOR_INSN (insn); +- int rgn = CONTAINING_RGN (bb->index); +- int nr_blks = RGN_NR_BLOCKS (rgn); +- /* Skip trivial regions and region head blocks that can have +- predecessors outside of region. */ +- if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0) +- { +- edge e; +- edge_iterator ei; +- +- /* Regions are SCCs with the exception of selective +- scheduling with pipelining of outer blocks enabled. +- So also check that immediate predecessors of a non-head +- block are in the same region. */ +- FOR_EACH_EDGE (e, ei, bb->preds) +- { +- /* Avoid creating of loop-carried dependencies through +- using topological ordering in the region. */ +- if (rgn == CONTAINING_RGN (e->src->index) +- && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index)) +- add_dependee_for_func_arg (first_arg, e->src); +- } +- } +- insn = first_arg; +- if (insn == head) +- break; +- } +- } +- else if (first_arg) +- avoid_func_arg_motion (first_arg, insn); +-} +- +-/* Hook for pre-reload schedule - set priority of moves from likely spilled +- HW registers to maximum, to schedule them at soon as possible. These are +- moves from function argument registers at the top of the function entry +- and moves from function return value registers after call. */ +-static int +-ix86_adjust_priority (rtx_insn *insn, int priority) +-{ +- rtx set; +- +- if (reload_completed) +- return priority; +- +- if (!NONDEBUG_INSN_P (insn)) +- return priority; +- +- set = single_set (insn); +- if (set) +- { +- rtx tmp = SET_SRC (set); +- if (REG_P (tmp) +- && HARD_REGISTER_P (tmp) +- && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp)) +- && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp)))) +- return current_sched_info->sched_max_insns_priority; +- } +- +- return priority; +-} +- +-/* Prepare for scheduling pass. */ +-static void +-ix86_sched_init_global (FILE *, int, int) +-{ +- /* Install scheduling hooks for current CPU. Some of these hooks are used +- in time-critical parts of the scheduler, so we only set them up when +- they are actually used. */ +- switch (ix86_tune) +- { +- case PROCESSOR_CORE2: +- case PROCESSOR_NEHALEM: +- case PROCESSOR_SANDYBRIDGE: +- case PROCESSOR_HASWELL: +- case PROCESSOR_GENERIC: +- /* Do not perform multipass scheduling for pre-reload schedule +- to save compile time. */ +- if (reload_completed) +- { +- ix86_core2i7_init_hooks (); +- break; +- } +- /* Fall through. */ +- default: +- targetm.sched.dfa_post_advance_cycle = NULL; +- targetm.sched.first_cycle_multipass_init = NULL; +- targetm.sched.first_cycle_multipass_begin = NULL; +- targetm.sched.first_cycle_multipass_issue = NULL; +- targetm.sched.first_cycle_multipass_backtrack = NULL; +- targetm.sched.first_cycle_multipass_end = NULL; +- targetm.sched.first_cycle_multipass_fini = NULL; +- break; +- } +-} +- +- +-/* Implement TARGET_STATIC_RTX_ALIGNMENT. */ +- +-static HOST_WIDE_INT +-ix86_static_rtx_alignment (machine_mode mode) +-{ +- if (mode == DFmode) +- return 64; +- if (ALIGN_MODE_128 (mode)) +- return MAX (128, GET_MODE_ALIGNMENT (mode)); +- return GET_MODE_ALIGNMENT (mode); +-} +- +-/* Implement TARGET_CONSTANT_ALIGNMENT. */ +- +-static HOST_WIDE_INT +-ix86_constant_alignment (const_tree exp, HOST_WIDE_INT align) +-{ +- if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST +- || TREE_CODE (exp) == INTEGER_CST) +- { +- machine_mode mode = TYPE_MODE (TREE_TYPE (exp)); +- HOST_WIDE_INT mode_align = ix86_static_rtx_alignment (mode); +- return MAX (mode_align, align); +- } +- else if (!optimize_size && TREE_CODE (exp) == STRING_CST +- && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD) +- return BITS_PER_WORD; +- +- return align; +-} +- +-/* Implement TARGET_EMPTY_RECORD_P. */ +- +-static bool +-ix86_is_empty_record (const_tree type) +-{ +- if (!TARGET_64BIT) +- return false; +- return default_is_empty_record (type); +-} +- +-/* Implement TARGET_WARN_PARAMETER_PASSING_ABI. */ +- +-static void +-ix86_warn_parameter_passing_abi (cumulative_args_t cum_v, tree type) +-{ +- CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); +- +- if (!cum->warn_empty) +- return; +- +- if (!TYPE_EMPTY_P (type)) +- return; +- +- /* Don't warn if the function isn't visible outside of the TU. */ +- if (cum->decl && !TREE_PUBLIC (cum->decl)) +- return; +- +- const_tree ctx = get_ultimate_context (cum->decl); +- if (ctx != NULL_TREE +- && !TRANSLATION_UNIT_WARN_EMPTY_P (ctx)) +- return; +- +- /* If the actual size of the type is zero, then there is no change +- in how objects of this size are passed. */ +- if (int_size_in_bytes (type) == 0) +- return; +- +- warning (OPT_Wabi, "empty class %qT parameter passing ABI " +- "changes in %<-fabi-version=12%> (GCC 8)", type); +- +- /* Only warn once. */ +- cum->warn_empty = false; +-} +- +-/* This hook returns name of multilib ABI. */ +- +-static const char * +-ix86_get_multilib_abi_name (void) +-{ +- if (!(TARGET_64BIT_P (ix86_isa_flags))) +- return "i386"; +- else if (TARGET_X32_P (ix86_isa_flags)) +- return "x32"; +- else +- return "x86_64"; +-} +- +-/* Compute the alignment for a variable for Intel MCU psABI. TYPE is +- the data type, and ALIGN is the alignment that the object would +- ordinarily have. */ +- +-static int +-iamcu_alignment (tree type, int align) +-{ +- machine_mode mode; +- +- if (align < 32 || TYPE_USER_ALIGN (type)) +- return align; +- +- /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4 +- bytes. */ +- mode = TYPE_MODE (strip_array_types (type)); +- switch (GET_MODE_CLASS (mode)) +- { +- case MODE_INT: +- case MODE_COMPLEX_INT: +- case MODE_COMPLEX_FLOAT: +- case MODE_FLOAT: +- case MODE_DECIMAL_FLOAT: +- return 32; +- default: +- return align; +- } +-} +- +-/* Compute the alignment for a static variable. +- TYPE is the data type, and ALIGN is the alignment that +- the object would ordinarily have. The value of this function is used +- instead of that alignment to align the object. */ +- +-int +-ix86_data_alignment (tree type, int align, bool opt) +-{ +- /* GCC 4.8 and earlier used to incorrectly assume this alignment even +- for symbols from other compilation units or symbols that don't need +- to bind locally. In order to preserve some ABI compatibility with +- those compilers, ensure we don't decrease alignment from what we +- used to assume. */ +- +- int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT); +- +- /* A data structure, equal or greater than the size of a cache line +- (64 bytes in the Pentium 4 and other recent Intel processors, including +- processors based on Intel Core microarchitecture) should be aligned +- so that its base address is a multiple of a cache line size. */ +- +- int max_align +- = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT); +- +- if (max_align < BITS_PER_WORD) +- max_align = BITS_PER_WORD; +- +- switch (ix86_align_data_type) +- { +- case ix86_align_data_type_abi: opt = false; break; +- case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break; +- case ix86_align_data_type_cacheline: break; +- } +- +- if (TARGET_IAMCU) +- align = iamcu_alignment (type, align); +- +- if (opt +- && AGGREGATE_TYPE_P (type) +- && TYPE_SIZE (type) +- && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST) +- { +- if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align_compat) +- && align < max_align_compat) +- align = max_align_compat; +- if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align) +- && align < max_align) +- align = max_align; +- } +- +- /* x86-64 ABI requires arrays greater than 16 bytes to be aligned +- to 16byte boundary. */ +- if (TARGET_64BIT) +- { +- if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE) +- && TYPE_SIZE (type) +- && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST +- && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128) +- && align < 128) +- return 128; +- } +- +- if (!opt) +- return align; +- +- if (TREE_CODE (type) == ARRAY_TYPE) +- { +- if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) +- return 64; +- if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) +- return 128; +- } +- else if (TREE_CODE (type) == COMPLEX_TYPE) +- { +- +- if (TYPE_MODE (type) == DCmode && align < 64) +- return 64; +- if ((TYPE_MODE (type) == XCmode +- || TYPE_MODE (type) == TCmode) && align < 128) +- return 128; +- } +- else if ((TREE_CODE (type) == RECORD_TYPE +- || TREE_CODE (type) == UNION_TYPE +- || TREE_CODE (type) == QUAL_UNION_TYPE) +- && TYPE_FIELDS (type)) +- { +- if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) +- return 64; +- if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) +- return 128; +- } +- else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE +- || TREE_CODE (type) == INTEGER_TYPE) +- { +- if (TYPE_MODE (type) == DFmode && align < 64) +- return 64; +- if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) +- return 128; +- } +- +- return align; +-} +- +-/* Compute the alignment for a local variable or a stack slot. EXP is +- the data type or decl itself, MODE is the widest mode available and +- ALIGN is the alignment that the object would ordinarily have. The +- value of this macro is used instead of that alignment to align the +- object. */ +- +-unsigned int +-ix86_local_alignment (tree exp, machine_mode mode, +- unsigned int align) +-{ +- tree type, decl; +- +- if (exp && DECL_P (exp)) +- { +- type = TREE_TYPE (exp); +- decl = exp; +- } +- else +- { +- type = exp; +- decl = NULL; +- } +- +- /* Don't do dynamic stack realignment for long long objects with +- -mpreferred-stack-boundary=2. */ +- if (!TARGET_64BIT +- && align == 64 +- && ix86_preferred_stack_boundary < 64 +- && (mode == DImode || (type && TYPE_MODE (type) == DImode)) +- && (!type || !TYPE_USER_ALIGN (type)) +- && (!decl || !DECL_USER_ALIGN (decl))) +- align = 32; +- +- /* If TYPE is NULL, we are allocating a stack slot for caller-save +- register in MODE. We will return the largest alignment of XF +- and DF. */ +- if (!type) +- { +- if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode)) +- align = GET_MODE_ALIGNMENT (DFmode); +- return align; +- } +- +- /* Don't increase alignment for Intel MCU psABI. */ +- if (TARGET_IAMCU) +- return align; +- +- /* x86-64 ABI requires arrays greater than 16 bytes to be aligned +- to 16byte boundary. Exact wording is: +- +- An array uses the same alignment as its elements, except that a local or +- global array variable of length at least 16 bytes or +- a C99 variable-length array variable always has alignment of at least 16 bytes. +- +- This was added to allow use of aligned SSE instructions at arrays. This +- rule is meant for static storage (where compiler cannot do the analysis +- by itself). We follow it for automatic variables only when convenient. +- We fully control everything in the function compiled and functions from +- other unit cannot rely on the alignment. +- +- Exclude va_list type. It is the common case of local array where +- we cannot benefit from the alignment. +- +- TODO: Probably one should optimize for size only when var is not escaping. */ +- if (TARGET_64BIT && optimize_function_for_speed_p (cfun) +- && TARGET_SSE) +- { +- if (AGGREGATE_TYPE_P (type) +- && (va_list_type_node == NULL_TREE +- || (TYPE_MAIN_VARIANT (type) +- != TYPE_MAIN_VARIANT (va_list_type_node))) +- && TYPE_SIZE (type) +- && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST +- && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128) +- && align < 128) +- return 128; +- } +- if (TREE_CODE (type) == ARRAY_TYPE) +- { +- if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) +- return 64; +- if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) +- return 128; +- } +- else if (TREE_CODE (type) == COMPLEX_TYPE) +- { +- if (TYPE_MODE (type) == DCmode && align < 64) +- return 64; +- if ((TYPE_MODE (type) == XCmode +- || TYPE_MODE (type) == TCmode) && align < 128) +- return 128; +- } +- else if ((TREE_CODE (type) == RECORD_TYPE +- || TREE_CODE (type) == UNION_TYPE +- || TREE_CODE (type) == QUAL_UNION_TYPE) +- && TYPE_FIELDS (type)) +- { +- if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) +- return 64; +- if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) +- return 128; +- } +- else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE +- || TREE_CODE (type) == INTEGER_TYPE) +- { +- +- if (TYPE_MODE (type) == DFmode && align < 64) +- return 64; +- if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) +- return 128; +- } +- return align; +-} +- +-/* Compute the minimum required alignment for dynamic stack realignment +- purposes for a local variable, parameter or a stack slot. EXP is +- the data type or decl itself, MODE is its mode and ALIGN is the +- alignment that the object would ordinarily have. */ +- +-unsigned int +-ix86_minimum_alignment (tree exp, machine_mode mode, +- unsigned int align) +-{ +- tree type, decl; +- +- if (exp && DECL_P (exp)) +- { +- type = TREE_TYPE (exp); +- decl = exp; +- } +- else +- { +- type = exp; +- decl = NULL; +- } +- +- if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64) +- return align; +- +- /* Don't do dynamic stack realignment for long long objects with +- -mpreferred-stack-boundary=2. */ +- if ((mode == DImode || (type && TYPE_MODE (type) == DImode)) +- && (!type || !TYPE_USER_ALIGN (type)) +- && (!decl || !DECL_USER_ALIGN (decl))) +- { +- gcc_checking_assert (!TARGET_STV); +- return 32; +- } +- +- return align; +-} +- +-/* Find a location for the static chain incoming to a nested function. +- This is a register, unless all free registers are used by arguments. */ +- +-static rtx +-ix86_static_chain (const_tree fndecl_or_type, bool incoming_p) +-{ +- unsigned regno; +- +- if (TARGET_64BIT) +- { +- /* We always use R10 in 64-bit mode. */ +- regno = R10_REG; +- } +- else +- { +- const_tree fntype, fndecl; +- unsigned int ccvt; +- +- /* By default in 32-bit mode we use ECX to pass the static chain. */ +- regno = CX_REG; +- +- if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL) +- { +- fntype = TREE_TYPE (fndecl_or_type); +- fndecl = fndecl_or_type; +- } +- else +- { +- fntype = fndecl_or_type; +- fndecl = NULL; +- } +- +- ccvt = ix86_get_callcvt (fntype); +- if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) +- { +- /* Fastcall functions use ecx/edx for arguments, which leaves +- us with EAX for the static chain. +- Thiscall functions use ecx for arguments, which also +- leaves us with EAX for the static chain. */ +- regno = AX_REG; +- } +- else if ((ccvt & IX86_CALLCVT_THISCALL) != 0) +- { +- /* Thiscall functions use ecx for arguments, which leaves +- us with EAX and EDX for the static chain. +- We are using for abi-compatibility EAX. */ +- regno = AX_REG; +- } +- else if (ix86_function_regparm (fntype, fndecl) == 3) +- { +- /* For regparm 3, we have no free call-clobbered registers in +- which to store the static chain. In order to implement this, +- we have the trampoline push the static chain to the stack. +- However, we can't push a value below the return address when +- we call the nested function directly, so we have to use an +- alternate entry point. For this we use ESI, and have the +- alternate entry point push ESI, so that things appear the +- same once we're executing the nested function. */ +- if (incoming_p) +- { +- if (fndecl == current_function_decl +- && !ix86_static_chain_on_stack) +- { +- gcc_assert (!reload_completed); +- ix86_static_chain_on_stack = true; +- } +- return gen_frame_mem (SImode, +- plus_constant (Pmode, +- arg_pointer_rtx, -8)); +- } +- regno = SI_REG; +- } +- } +- +- return gen_rtx_REG (Pmode, regno); +-} +- +-/* Emit RTL insns to initialize the variable parts of a trampoline. +- FNDECL is the decl of the target address; M_TRAMP is a MEM for +- the trampoline, and CHAIN_VALUE is an RTX for the static chain +- to be passed to the target function. */ +- +-static void +-ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) +-{ +- rtx mem, fnaddr; +- int opcode; +- int offset = 0; +- bool need_endbr = (flag_cf_protection & CF_BRANCH); +- +- fnaddr = XEXP (DECL_RTL (fndecl), 0); +- +- if (TARGET_64BIT) +- { +- int size; +- +- if (need_endbr) +- { +- /* Insert ENDBR64. */ +- mem = adjust_address (m_tramp, SImode, offset); +- emit_move_insn (mem, gen_int_mode (0xfa1e0ff3, SImode)); +- offset += 4; +- } +- +- /* Load the function address to r11. Try to load address using +- the shorter movl instead of movabs. We may want to support +- movq for kernel mode, but kernel does not use trampolines at +- the moment. FNADDR is a 32bit address and may not be in +- DImode when ptr_mode == SImode. Always use movl in this +- case. */ +- if (ptr_mode == SImode +- || x86_64_zext_immediate_operand (fnaddr, VOIDmode)) +- { +- fnaddr = copy_addr_to_reg (fnaddr); +- +- mem = adjust_address (m_tramp, HImode, offset); +- emit_move_insn (mem, gen_int_mode (0xbb41, HImode)); +- +- mem = adjust_address (m_tramp, SImode, offset + 2); +- emit_move_insn (mem, gen_lowpart (SImode, fnaddr)); +- offset += 6; +- } +- else +- { +- mem = adjust_address (m_tramp, HImode, offset); +- emit_move_insn (mem, gen_int_mode (0xbb49, HImode)); +- +- mem = adjust_address (m_tramp, DImode, offset + 2); +- emit_move_insn (mem, fnaddr); +- offset += 10; +- } +- +- /* Load static chain using movabs to r10. Use the shorter movl +- instead of movabs when ptr_mode == SImode. */ +- if (ptr_mode == SImode) +- { +- opcode = 0xba41; +- size = 6; +- } +- else +- { +- opcode = 0xba49; +- size = 10; +- } +- +- mem = adjust_address (m_tramp, HImode, offset); +- emit_move_insn (mem, gen_int_mode (opcode, HImode)); +- +- mem = adjust_address (m_tramp, ptr_mode, offset + 2); +- emit_move_insn (mem, chain_value); +- offset += size; +- +- /* Jump to r11; the last (unused) byte is a nop, only there to +- pad the write out to a single 32-bit store. */ +- mem = adjust_address (m_tramp, SImode, offset); +- emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode)); +- offset += 4; +- } +- else +- { +- rtx disp, chain; +- +- /* Depending on the static chain location, either load a register +- with a constant, or push the constant to the stack. All of the +- instructions are the same size. */ +- chain = ix86_static_chain (fndecl, true); +- if (REG_P (chain)) +- { +- switch (REGNO (chain)) +- { +- case AX_REG: +- opcode = 0xb8; break; +- case CX_REG: +- opcode = 0xb9; break; +- default: +- gcc_unreachable (); +- } +- } +- else +- opcode = 0x68; +- +- if (need_endbr) +- { +- /* Insert ENDBR32. */ +- mem = adjust_address (m_tramp, SImode, offset); +- emit_move_insn (mem, gen_int_mode (0xfb1e0ff3, SImode)); +- offset += 4; +- } +- +- mem = adjust_address (m_tramp, QImode, offset); +- emit_move_insn (mem, gen_int_mode (opcode, QImode)); +- +- mem = adjust_address (m_tramp, SImode, offset + 1); +- emit_move_insn (mem, chain_value); +- offset += 5; +- +- mem = adjust_address (m_tramp, QImode, offset); +- emit_move_insn (mem, gen_int_mode (0xe9, QImode)); +- +- mem = adjust_address (m_tramp, SImode, offset + 1); +- +- /* Compute offset from the end of the jmp to the target function. +- In the case in which the trampoline stores the static chain on +- the stack, we need to skip the first insn which pushes the +- (call-saved) register static chain; this push is 1 byte. */ +- offset += 5; +- int skip = MEM_P (chain) ? 1 : 0; +- /* Skip ENDBR32 at the entry of the target function. */ +- if (need_endbr +- && !cgraph_node::get (fndecl)->only_called_directly_p ()) +- skip += 4; +- disp = expand_binop (SImode, sub_optab, fnaddr, +- plus_constant (Pmode, XEXP (m_tramp, 0), +- offset - skip), +- NULL_RTX, 1, OPTAB_DIRECT); +- emit_move_insn (mem, disp); +- } +- +- gcc_assert (offset <= TRAMPOLINE_SIZE); +- +-#ifdef HAVE_ENABLE_EXECUTE_STACK +-#ifdef CHECK_EXECUTE_STACK_ENABLED +- if (CHECK_EXECUTE_STACK_ENABLED) +-#endif +- emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"), +- LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode); +-#endif +-} +- +-static bool +-ix86_allocate_stack_slots_for_args (void) +-{ +- /* Naked functions should not allocate stack slots for arguments. */ +- return !ix86_function_naked (current_function_decl); +-} +- +-static bool +-ix86_warn_func_return (tree decl) +-{ +- /* Naked functions are implemented entirely in assembly, including the +- return sequence, so suppress warnings about this. */ +- return !ix86_function_naked (decl); +-} +- +-/* The following file contains several enumerations and data structures +- built from the definitions in i386-builtin-types.def. */ +- +-#include "i386-builtin-types.inc" +- +-/* Table for the ix86 builtin non-function types. */ +-static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1]; +- +-/* Retrieve an element from the above table, building some of +- the types lazily. */ +- +-static tree +-ix86_get_builtin_type (enum ix86_builtin_type tcode) +-{ +- unsigned int index; +- tree type, itype; +- +- gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab)); +- +- type = ix86_builtin_type_tab[(int) tcode]; +- if (type != NULL) +- return type; +- +- gcc_assert (tcode > IX86_BT_LAST_PRIM); +- if (tcode <= IX86_BT_LAST_VECT) +- { +- machine_mode mode; +- +- index = tcode - IX86_BT_LAST_PRIM - 1; +- itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]); +- mode = ix86_builtin_type_vect_mode[index]; +- +- type = build_vector_type_for_mode (itype, mode); +- } +- else +- { +- int quals; +- +- index = tcode - IX86_BT_LAST_VECT - 1; +- if (tcode <= IX86_BT_LAST_PTR) +- quals = TYPE_UNQUALIFIED; +- else +- quals = TYPE_QUAL_CONST; +- +- itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]); +- if (quals != TYPE_UNQUALIFIED) +- itype = build_qualified_type (itype, quals); +- +- type = build_pointer_type (itype); +- } +- +- ix86_builtin_type_tab[(int) tcode] = type; +- return type; +-} +- +-/* Table for the ix86 builtin function types. */ +-static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1]; +- +-/* Retrieve an element from the above table, building some of +- the types lazily. */ +- +-static tree +-ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode) +-{ +- tree type; +- +- gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab)); +- +- type = ix86_builtin_func_type_tab[(int) tcode]; +- if (type != NULL) +- return type; +- +- if (tcode <= IX86_BT_LAST_FUNC) +- { +- unsigned start = ix86_builtin_func_start[(int) tcode]; +- unsigned after = ix86_builtin_func_start[(int) tcode + 1]; +- tree rtype, atype, args = void_list_node; +- unsigned i; +- +- rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]); +- for (i = after - 1; i > start; --i) +- { +- atype = ix86_get_builtin_type (ix86_builtin_func_args[i]); +- args = tree_cons (NULL, atype, args); +- } +- +- type = build_function_type (rtype, args); +- } +- else +- { +- unsigned index = tcode - IX86_BT_LAST_FUNC - 1; +- enum ix86_builtin_func_type icode; +- +- icode = ix86_builtin_func_alias_base[index]; +- type = ix86_get_builtin_func_type (icode); +- } +- +- ix86_builtin_func_type_tab[(int) tcode] = type; +- return type; +-} +- +- +-/* Codes for all the SSE/MMX builtins. Builtins not mentioned in any +- bdesc_* arrays below should come first, then builtins for each bdesc_* +- array in ascending order, so that we can use direct array accesses. */ +-enum ix86_builtins +-{ +- IX86_BUILTIN_MASKMOVQ, +- IX86_BUILTIN_LDMXCSR, +- IX86_BUILTIN_STMXCSR, +- IX86_BUILTIN_MASKMOVDQU, +- IX86_BUILTIN_PSLLDQ128, +- IX86_BUILTIN_CLFLUSH, +- IX86_BUILTIN_MONITOR, +- IX86_BUILTIN_MWAIT, +- IX86_BUILTIN_UMONITOR, +- IX86_BUILTIN_UMWAIT, +- IX86_BUILTIN_TPAUSE, +- IX86_BUILTIN_CLZERO, +- IX86_BUILTIN_CLDEMOTE, +- IX86_BUILTIN_VEC_INIT_V2SI, +- IX86_BUILTIN_VEC_INIT_V4HI, +- IX86_BUILTIN_VEC_INIT_V8QI, +- IX86_BUILTIN_VEC_EXT_V2DF, +- IX86_BUILTIN_VEC_EXT_V2DI, +- IX86_BUILTIN_VEC_EXT_V4SF, +- IX86_BUILTIN_VEC_EXT_V4SI, +- IX86_BUILTIN_VEC_EXT_V8HI, +- IX86_BUILTIN_VEC_EXT_V2SI, +- IX86_BUILTIN_VEC_EXT_V4HI, +- IX86_BUILTIN_VEC_EXT_V16QI, +- IX86_BUILTIN_VEC_SET_V2DI, +- IX86_BUILTIN_VEC_SET_V4SF, +- IX86_BUILTIN_VEC_SET_V4SI, +- IX86_BUILTIN_VEC_SET_V8HI, +- IX86_BUILTIN_VEC_SET_V4HI, +- IX86_BUILTIN_VEC_SET_V16QI, +- IX86_BUILTIN_GATHERSIV2DF, +- IX86_BUILTIN_GATHERSIV4DF, +- IX86_BUILTIN_GATHERDIV2DF, +- IX86_BUILTIN_GATHERDIV4DF, +- IX86_BUILTIN_GATHERSIV4SF, +- IX86_BUILTIN_GATHERSIV8SF, +- IX86_BUILTIN_GATHERDIV4SF, +- IX86_BUILTIN_GATHERDIV8SF, +- IX86_BUILTIN_GATHERSIV2DI, +- IX86_BUILTIN_GATHERSIV4DI, +- IX86_BUILTIN_GATHERDIV2DI, +- IX86_BUILTIN_GATHERDIV4DI, +- IX86_BUILTIN_GATHERSIV4SI, +- IX86_BUILTIN_GATHERSIV8SI, +- IX86_BUILTIN_GATHERDIV4SI, +- IX86_BUILTIN_GATHERDIV8SI, +- IX86_BUILTIN_GATHER3SIV8SF, +- IX86_BUILTIN_GATHER3SIV4SF, +- IX86_BUILTIN_GATHER3SIV4DF, +- IX86_BUILTIN_GATHER3SIV2DF, +- IX86_BUILTIN_GATHER3DIV8SF, +- IX86_BUILTIN_GATHER3DIV4SF, +- IX86_BUILTIN_GATHER3DIV4DF, +- IX86_BUILTIN_GATHER3DIV2DF, +- IX86_BUILTIN_GATHER3SIV8SI, +- IX86_BUILTIN_GATHER3SIV4SI, +- IX86_BUILTIN_GATHER3SIV4DI, +- IX86_BUILTIN_GATHER3SIV2DI, +- IX86_BUILTIN_GATHER3DIV8SI, +- IX86_BUILTIN_GATHER3DIV4SI, +- IX86_BUILTIN_GATHER3DIV4DI, +- IX86_BUILTIN_GATHER3DIV2DI, +- IX86_BUILTIN_SCATTERSIV8SF, +- IX86_BUILTIN_SCATTERSIV4SF, +- IX86_BUILTIN_SCATTERSIV4DF, +- IX86_BUILTIN_SCATTERSIV2DF, +- IX86_BUILTIN_SCATTERDIV8SF, +- IX86_BUILTIN_SCATTERDIV4SF, +- IX86_BUILTIN_SCATTERDIV4DF, +- IX86_BUILTIN_SCATTERDIV2DF, +- IX86_BUILTIN_SCATTERSIV8SI, +- IX86_BUILTIN_SCATTERSIV4SI, +- IX86_BUILTIN_SCATTERSIV4DI, +- IX86_BUILTIN_SCATTERSIV2DI, +- IX86_BUILTIN_SCATTERDIV8SI, +- IX86_BUILTIN_SCATTERDIV4SI, +- IX86_BUILTIN_SCATTERDIV4DI, +- IX86_BUILTIN_SCATTERDIV2DI, +- /* Alternate 4 and 8 element gather/scatter for the vectorizer +- where all operands are 32-byte or 64-byte wide respectively. */ +- IX86_BUILTIN_GATHERALTSIV4DF, +- IX86_BUILTIN_GATHERALTDIV8SF, +- IX86_BUILTIN_GATHERALTSIV4DI, +- IX86_BUILTIN_GATHERALTDIV8SI, +- IX86_BUILTIN_GATHER3ALTDIV16SF, +- IX86_BUILTIN_GATHER3ALTDIV16SI, +- IX86_BUILTIN_GATHER3ALTSIV4DF, +- IX86_BUILTIN_GATHER3ALTDIV8SF, +- IX86_BUILTIN_GATHER3ALTSIV4DI, +- IX86_BUILTIN_GATHER3ALTDIV8SI, +- IX86_BUILTIN_GATHER3ALTSIV8DF, +- IX86_BUILTIN_GATHER3ALTSIV8DI, +- IX86_BUILTIN_GATHER3DIV16SF, +- IX86_BUILTIN_GATHER3DIV16SI, +- IX86_BUILTIN_GATHER3DIV8DF, +- IX86_BUILTIN_GATHER3DIV8DI, +- IX86_BUILTIN_GATHER3SIV16SF, +- IX86_BUILTIN_GATHER3SIV16SI, +- IX86_BUILTIN_GATHER3SIV8DF, +- IX86_BUILTIN_GATHER3SIV8DI, +- IX86_BUILTIN_SCATTERALTSIV8DF, +- IX86_BUILTIN_SCATTERALTDIV16SF, +- IX86_BUILTIN_SCATTERALTSIV8DI, +- IX86_BUILTIN_SCATTERALTDIV16SI, +- IX86_BUILTIN_SCATTERALTSIV4DF, +- IX86_BUILTIN_SCATTERALTDIV8SF, +- IX86_BUILTIN_SCATTERALTSIV4DI, +- IX86_BUILTIN_SCATTERALTDIV8SI, +- IX86_BUILTIN_SCATTERALTSIV2DF, +- IX86_BUILTIN_SCATTERALTDIV4SF, +- IX86_BUILTIN_SCATTERALTSIV2DI, +- IX86_BUILTIN_SCATTERALTDIV4SI, +- IX86_BUILTIN_SCATTERDIV16SF, +- IX86_BUILTIN_SCATTERDIV16SI, +- IX86_BUILTIN_SCATTERDIV8DF, +- IX86_BUILTIN_SCATTERDIV8DI, +- IX86_BUILTIN_SCATTERSIV16SF, +- IX86_BUILTIN_SCATTERSIV16SI, +- IX86_BUILTIN_SCATTERSIV8DF, +- IX86_BUILTIN_SCATTERSIV8DI, +- IX86_BUILTIN_GATHERPFQPD, +- IX86_BUILTIN_GATHERPFDPS, +- IX86_BUILTIN_GATHERPFDPD, +- IX86_BUILTIN_GATHERPFQPS, +- IX86_BUILTIN_SCATTERPFDPD, +- IX86_BUILTIN_SCATTERPFDPS, +- IX86_BUILTIN_SCATTERPFQPD, +- IX86_BUILTIN_SCATTERPFQPS, +- IX86_BUILTIN_CLWB, +- IX86_BUILTIN_CLFLUSHOPT, +- IX86_BUILTIN_INFQ, +- IX86_BUILTIN_HUGE_VALQ, +- IX86_BUILTIN_NANQ, +- IX86_BUILTIN_NANSQ, +- IX86_BUILTIN_XABORT, +- IX86_BUILTIN_ADDCARRYX32, +- IX86_BUILTIN_ADDCARRYX64, +- IX86_BUILTIN_SBB32, +- IX86_BUILTIN_SBB64, +- IX86_BUILTIN_RDRAND16_STEP, +- IX86_BUILTIN_RDRAND32_STEP, +- IX86_BUILTIN_RDRAND64_STEP, +- IX86_BUILTIN_RDSEED16_STEP, +- IX86_BUILTIN_RDSEED32_STEP, +- IX86_BUILTIN_RDSEED64_STEP, +- IX86_BUILTIN_MONITORX, +- IX86_BUILTIN_MWAITX, +- IX86_BUILTIN_CFSTRING, +- IX86_BUILTIN_CPU_INIT, +- IX86_BUILTIN_CPU_IS, +- IX86_BUILTIN_CPU_SUPPORTS, +- IX86_BUILTIN_READ_FLAGS, +- IX86_BUILTIN_WRITE_FLAGS, +- +- /* All the remaining builtins are tracked in bdesc_* arrays in +- i386-builtin.def. Don't add any IX86_BUILTIN_* enumerators after +- this point. */ +-#define BDESC(mask, mask2, icode, name, code, comparison, flag) \ +- code, +-#define BDESC_FIRST(kind, kindu, mask, mask2, icode, name, code, comparison, flag) \ +- code, \ +- IX86_BUILTIN__BDESC_##kindu##_FIRST = code, +-#define BDESC_END(kind, next_kind) +- +-#include "i386-builtin.def" +- +-#undef BDESC +-#undef BDESC_FIRST +-#undef BDESC_END +- +- IX86_BUILTIN_MAX, +- +- IX86_BUILTIN__BDESC_MAX_FIRST = IX86_BUILTIN_MAX, +- +- /* Now just the aliases for bdesc_* start/end. */ +-#define BDESC(mask, mask2, icode, name, code, comparison, flag) +-#define BDESC_FIRST(kind, kindu, mask, mask2, icode, name, code, comparison, flag) +-#define BDESC_END(kind, next_kind) \ +- IX86_BUILTIN__BDESC_##kind##_LAST \ +- = IX86_BUILTIN__BDESC_##next_kind##_FIRST - 1, +- +-#include "i386-builtin.def" +- +-#undef BDESC +-#undef BDESC_FIRST +-#undef BDESC_END +- +- /* Just to make sure there is no comma after the last enumerator. */ +- IX86_BUILTIN__BDESC_MAX_LAST = IX86_BUILTIN__BDESC_MAX_FIRST +-}; +- +-/* Table for the ix86 builtin decls. */ +-static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX]; +- +-/* Table of all of the builtin functions that are possible with different ISA's +- but are waiting to be built until a function is declared to use that +- ISA. */ +-struct builtin_isa { +- HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */ +- HOST_WIDE_INT isa2; /* additional isa_flags this builtin is defined for */ +- const char *name; /* function name */ +- enum ix86_builtin_func_type tcode; /* type to use in the declaration */ +- unsigned char const_p:1; /* true if the declaration is constant */ +- unsigned char pure_p:1; /* true if the declaration has pure attribute */ +- bool set_and_not_built_p; +-}; +- +-static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX]; +- +-/* Bits that can still enable any inclusion of a builtin. */ +-static HOST_WIDE_INT deferred_isa_values = 0; +-static HOST_WIDE_INT deferred_isa_values2 = 0; +- +-/* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the +- MASK and MASK2 of which isa_flags and ix86_isa_flags2 to use in the +- ix86_builtins_isa array. Stores the function decl in the ix86_builtins +- array. Returns the function decl or NULL_TREE, if the builtin was not +- added. +- +- If the front end has a special hook for builtin functions, delay adding +- builtin functions that aren't in the current ISA until the ISA is changed +- with function specific optimization. Doing so, can save about 300K for the +- default compiler. When the builtin is expanded, check at that time whether +- it is valid. +- +- If the front end doesn't have a special hook, record all builtins, even if +- it isn't an instruction set in the current ISA in case the user uses +- function specific options for a different ISA, so that we don't get scope +- errors if a builtin is added in the middle of a function scope. */ +- +-static inline tree +-def_builtin (HOST_WIDE_INT mask, HOST_WIDE_INT mask2, +- const char *name, +- enum ix86_builtin_func_type tcode, +- enum ix86_builtins code) +-{ +- tree decl = NULL_TREE; +- +- /* An instruction may be 64bit only regardless of ISAs. */ +- if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT) +- { +- ix86_builtins_isa[(int) code].isa = mask; +- ix86_builtins_isa[(int) code].isa2 = mask2; +- +- mask &= ~OPTION_MASK_ISA_64BIT; +- +- /* Filter out the masks most often ored together with others. */ +- if ((mask & ix86_isa_flags & OPTION_MASK_ISA_AVX512VL) +- && mask != OPTION_MASK_ISA_AVX512VL) +- mask &= ~OPTION_MASK_ISA_AVX512VL; +- if ((mask & ix86_isa_flags & OPTION_MASK_ISA_AVX512BW) +- && mask != OPTION_MASK_ISA_AVX512BW) +- mask &= ~OPTION_MASK_ISA_AVX512BW; +- +- if (((mask2 == 0 || (mask2 & ix86_isa_flags2) != 0) +- && (mask == 0 || (mask & ix86_isa_flags) != 0)) +- || (lang_hooks.builtin_function +- == lang_hooks.builtin_function_ext_scope)) +- { +- tree type = ix86_get_builtin_func_type (tcode); +- decl = add_builtin_function (name, type, code, BUILT_IN_MD, +- NULL, NULL_TREE); +- ix86_builtins[(int) code] = decl; +- ix86_builtins_isa[(int) code].set_and_not_built_p = false; +- } +- else +- { +- /* Just MASK and MASK2 where set_and_not_built_p == true can potentially +- include a builtin. */ +- deferred_isa_values |= mask; +- deferred_isa_values2 |= mask2; +- ix86_builtins[(int) code] = NULL_TREE; +- ix86_builtins_isa[(int) code].tcode = tcode; +- ix86_builtins_isa[(int) code].name = name; +- ix86_builtins_isa[(int) code].const_p = false; +- ix86_builtins_isa[(int) code].pure_p = false; +- ix86_builtins_isa[(int) code].set_and_not_built_p = true; +- } +- } +- +- return decl; +-} +- +-/* Like def_builtin, but also marks the function decl "const". */ +- +-static inline tree +-def_builtin_const (HOST_WIDE_INT mask, HOST_WIDE_INT mask2, const char *name, +- enum ix86_builtin_func_type tcode, enum ix86_builtins code) +-{ +- tree decl = def_builtin (mask, mask2, name, tcode, code); +- if (decl) +- TREE_READONLY (decl) = 1; +- else +- ix86_builtins_isa[(int) code].const_p = true; +- +- return decl; +-} +- +-/* Like def_builtin, but also marks the function decl "pure". */ +- +-static inline tree +-def_builtin_pure (HOST_WIDE_INT mask, HOST_WIDE_INT mask2, const char *name, +- enum ix86_builtin_func_type tcode, enum ix86_builtins code) +-{ +- tree decl = def_builtin (mask, mask2, name, tcode, code); +- if (decl) +- DECL_PURE_P (decl) = 1; +- else +- ix86_builtins_isa[(int) code].pure_p = true; +- +- return decl; +-} +- +-/* Add any new builtin functions for a given ISA that may not have been +- declared. This saves a bit of space compared to adding all of the +- declarations to the tree, even if we didn't use them. */ +- +-static void +-ix86_add_new_builtins (HOST_WIDE_INT isa, HOST_WIDE_INT isa2) +-{ +- isa &= ~OPTION_MASK_ISA_64BIT; +- +- if ((isa & deferred_isa_values) == 0 +- && (isa2 & deferred_isa_values2) == 0) +- return; +- +- /* Bits in ISA value can be removed from potential isa values. */ +- deferred_isa_values &= ~isa; +- deferred_isa_values2 &= ~isa2; +- +- int i; +- tree saved_current_target_pragma = current_target_pragma; +- current_target_pragma = NULL_TREE; +- +- for (i = 0; i < (int)IX86_BUILTIN_MAX; i++) +- { +- if (((ix86_builtins_isa[i].isa & isa) != 0 +- || (ix86_builtins_isa[i].isa2 & isa2) != 0) +- && ix86_builtins_isa[i].set_and_not_built_p) +- { +- tree decl, type; +- +- /* Don't define the builtin again. */ +- ix86_builtins_isa[i].set_and_not_built_p = false; +- +- type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode); +- decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name, +- type, i, BUILT_IN_MD, NULL, +- NULL_TREE); +- +- ix86_builtins[i] = decl; +- if (ix86_builtins_isa[i].const_p) +- TREE_READONLY (decl) = 1; +- } +- } +- +- current_target_pragma = saved_current_target_pragma; +-} +- +-/* Bits for builtin_description.flag. */ +- +-/* Set when we don't support the comparison natively, and should +- swap_comparison in order to support it. */ +-#define BUILTIN_DESC_SWAP_OPERANDS 1 +- +-struct builtin_description +-{ +- const HOST_WIDE_INT mask; +- const HOST_WIDE_INT mask2; +- const enum insn_code icode; +- const char *const name; +- const enum ix86_builtins code; +- const enum rtx_code comparison; +- const int flag; +-}; +- +-#define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT +-#define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT +-#define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT +-#define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT +-#define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF +-#define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF +-#define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF +-#define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF +-#define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI +-#define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI +-#define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI +-#define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI +-#define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI +-#define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI +-#define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI +-#define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI +-#define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI +-#define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI +-#define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF +-#define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF +-#define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI +-#define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI +-#define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI +-#define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI +-#define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI +-#define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI +-#define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI +-#define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI +-#define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP +-#define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP +-#define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP +-#define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP +-#define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF +-#define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF +-#define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF +-#define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF +-#define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF +-#define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF +-#define MULTI_ARG_1_SF V4SF_FTYPE_V4SF +-#define MULTI_ARG_1_DF V2DF_FTYPE_V2DF +-#define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF +-#define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF +-#define MULTI_ARG_1_DI V2DI_FTYPE_V2DI +-#define MULTI_ARG_1_SI V4SI_FTYPE_V4SI +-#define MULTI_ARG_1_HI V8HI_FTYPE_V8HI +-#define MULTI_ARG_1_QI V16QI_FTYPE_V16QI +-#define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI +-#define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI +-#define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI +-#define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI +-#define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI +-#define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI +- +-#define BDESC(mask, mask2, icode, name, code, comparison, flag) \ +- { mask, mask2, icode, name, code, comparison, flag }, +-#define BDESC_FIRST(kind, kindu, mask, mask2, icode, name, code, comparison, flag) \ +-static const struct builtin_description bdesc_##kind[] = \ +-{ \ +- BDESC (mask, mask2, icode, name, code, comparison, flag) +-#define BDESC_END(kind, next_kind) \ +-}; +- +-#include "i386-builtin.def" +- +-#undef BDESC +-#undef BDESC_FIRST +-#undef BDESC_END +- +- +-/* TM vector builtins. */ +- +-/* Reuse the existing x86-specific `struct builtin_description' cause +- we're lazy. Add casts to make them fit. */ +-static const struct builtin_description bdesc_tm[] = +-{ +- { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI }, +- { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI }, +- { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI }, +- { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI }, +- { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI }, +- { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI }, +- { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI }, +- +- { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF }, +- { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF }, +- { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF }, +- { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF }, +- { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF }, +- { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF }, +- { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF }, +- +- { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF }, +- { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF }, +- { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF }, +- { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF }, +- { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF }, +- { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF }, +- { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF }, +- +- { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID }, +- { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID }, +- { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID }, +-}; +- +-/* Initialize the transactional memory vector load/store builtins. */ +- +-static void +-ix86_init_tm_builtins (void) +-{ +- enum ix86_builtin_func_type ftype; +- const struct builtin_description *d; +- size_t i; +- tree decl; +- tree attrs_load, attrs_type_load, attrs_store, attrs_type_store; +- tree attrs_log, attrs_type_log; +- +- if (!flag_tm) +- return; +- +- /* If there are no builtins defined, we must be compiling in a +- language without trans-mem support. */ +- if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1)) +- return; +- +- /* Use whatever attributes a normal TM load has. */ +- decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1); +- attrs_load = DECL_ATTRIBUTES (decl); +- attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl)); +- /* Use whatever attributes a normal TM store has. */ +- decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1); +- attrs_store = DECL_ATTRIBUTES (decl); +- attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl)); +- /* Use whatever attributes a normal TM log has. */ +- decl = builtin_decl_explicit (BUILT_IN_TM_LOG); +- attrs_log = DECL_ATTRIBUTES (decl); +- attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl)); +- +- for (i = 0, d = bdesc_tm; +- i < ARRAY_SIZE (bdesc_tm); +- i++, d++) +- { +- if ((d->mask & ix86_isa_flags) != 0 +- || (lang_hooks.builtin_function +- == lang_hooks.builtin_function_ext_scope)) +- { +- tree type, attrs, attrs_type; +- enum built_in_function code = (enum built_in_function) d->code; +- +- ftype = (enum ix86_builtin_func_type) d->flag; +- type = ix86_get_builtin_func_type (ftype); +- +- if (BUILTIN_TM_LOAD_P (code)) +- { +- attrs = attrs_load; +- attrs_type = attrs_type_load; +- } +- else if (BUILTIN_TM_STORE_P (code)) +- { +- attrs = attrs_store; +- attrs_type = attrs_type_store; +- } +- else +- { +- attrs = attrs_log; +- attrs_type = attrs_type_log; +- } +- decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL, +- /* The builtin without the prefix for +- calling it directly. */ +- d->name + strlen ("__builtin_"), +- attrs); +- /* add_builtin_function() will set the DECL_ATTRIBUTES, now +- set the TYPE_ATTRIBUTES. */ +- decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN); +- +- set_builtin_decl (code, decl, false); +- } +- } +-} +- +-/* Macros for verification of enum ix86_builtins order. */ +-#define BDESC_VERIFY(x, y, z) \ +- gcc_checking_assert ((x) == (enum ix86_builtins) ((y) + (z))) +-#define BDESC_VERIFYS(x, y, z) \ +- STATIC_ASSERT ((x) == (enum ix86_builtins) ((y) + (z))) +- +-BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPESTR_FIRST, +- IX86_BUILTIN__BDESC_COMI_LAST, 1); +-BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPISTR_FIRST, +- IX86_BUILTIN__BDESC_PCMPESTR_LAST, 1); +-BDESC_VERIFYS (IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST, +- IX86_BUILTIN__BDESC_PCMPISTR_LAST, 1); +-BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS_FIRST, +- IX86_BUILTIN__BDESC_SPECIAL_ARGS_LAST, 1); +-BDESC_VERIFYS (IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST, +- IX86_BUILTIN__BDESC_ARGS_LAST, 1); +-BDESC_VERIFYS (IX86_BUILTIN__BDESC_MULTI_ARG_FIRST, +- IX86_BUILTIN__BDESC_ROUND_ARGS_LAST, 1); +-BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_FIRST, +- IX86_BUILTIN__BDESC_MULTI_ARG_LAST, 1); +-BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_NORMAL_FIRST, +- IX86_BUILTIN__BDESC_CET_LAST, 1); +-BDESC_VERIFYS (IX86_BUILTIN_MAX, +- IX86_BUILTIN__BDESC_CET_NORMAL_LAST, 1); +- +-/* Set up all the MMX/SSE builtins, even builtins for instructions that are not +- in the current target ISA to allow the user to compile particular modules +- with different target specific options that differ from the command line +- options. */ +-static void +-ix86_init_mmx_sse_builtins (void) +-{ +- const struct builtin_description * d; +- enum ix86_builtin_func_type ftype; +- size_t i; +- +- /* Add all special builtins with variable number of operands. */ +- for (i = 0, d = bdesc_special_args; +- i < ARRAY_SIZE (bdesc_special_args); +- i++, d++) +- { +- BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST, i); +- if (d->name == 0) +- continue; +- +- ftype = (enum ix86_builtin_func_type) d->flag; +- def_builtin (d->mask, d->mask2, d->name, ftype, d->code); +- } +- BDESC_VERIFYS (IX86_BUILTIN__BDESC_SPECIAL_ARGS_LAST, +- IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST, +- ARRAY_SIZE (bdesc_special_args) - 1); +- +- /* Add all builtins with variable number of operands. */ +- for (i = 0, d = bdesc_args; +- i < ARRAY_SIZE (bdesc_args); +- i++, d++) +- { +- BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_ARGS_FIRST, i); +- if (d->name == 0) +- continue; +- +- ftype = (enum ix86_builtin_func_type) d->flag; +- def_builtin_const (d->mask, d->mask2, d->name, ftype, d->code); +- } +- BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS_LAST, +- IX86_BUILTIN__BDESC_ARGS_FIRST, +- ARRAY_SIZE (bdesc_args) - 1); +- +- /* Add all builtins with rounding. */ +- for (i = 0, d = bdesc_round_args; +- i < ARRAY_SIZE (bdesc_round_args); +- i++, d++) +- { +- BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST, i); +- if (d->name == 0) +- continue; +- +- ftype = (enum ix86_builtin_func_type) d->flag; +- def_builtin_const (d->mask, d->mask2, d->name, ftype, d->code); +- } +- BDESC_VERIFYS (IX86_BUILTIN__BDESC_ROUND_ARGS_LAST, +- IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST, +- ARRAY_SIZE (bdesc_round_args) - 1); +- +- /* pcmpestr[im] insns. */ +- for (i = 0, d = bdesc_pcmpestr; +- i < ARRAY_SIZE (bdesc_pcmpestr); +- i++, d++) +- { +- BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_PCMPESTR_FIRST, i); +- if (d->code == IX86_BUILTIN_PCMPESTRM128) +- ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT; +- else +- ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT; +- def_builtin_const (d->mask, d->mask2, d->name, ftype, d->code); +- } +- BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPESTR_LAST, +- IX86_BUILTIN__BDESC_PCMPESTR_FIRST, +- ARRAY_SIZE (bdesc_pcmpestr) - 1); +- +- /* pcmpistr[im] insns. */ +- for (i = 0, d = bdesc_pcmpistr; +- i < ARRAY_SIZE (bdesc_pcmpistr); +- i++, d++) +- { +- BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_PCMPISTR_FIRST, i); +- if (d->code == IX86_BUILTIN_PCMPISTRM128) +- ftype = V16QI_FTYPE_V16QI_V16QI_INT; +- else +- ftype = INT_FTYPE_V16QI_V16QI_INT; +- def_builtin_const (d->mask, d->mask2, d->name, ftype, d->code); +- } +- BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPISTR_LAST, +- IX86_BUILTIN__BDESC_PCMPISTR_FIRST, +- ARRAY_SIZE (bdesc_pcmpistr) - 1); +- +- /* comi/ucomi insns. */ +- for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++) +- { +- BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_COMI_FIRST, i); +- if (d->mask == OPTION_MASK_ISA_SSE2) +- ftype = INT_FTYPE_V2DF_V2DF; +- else +- ftype = INT_FTYPE_V4SF_V4SF; +- def_builtin_const (d->mask, d->mask2, d->name, ftype, d->code); +- } +- BDESC_VERIFYS (IX86_BUILTIN__BDESC_COMI_LAST, +- IX86_BUILTIN__BDESC_COMI_FIRST, +- ARRAY_SIZE (bdesc_comi) - 1); +- +- /* SSE */ +- def_builtin (OPTION_MASK_ISA_SSE, 0, "__builtin_ia32_ldmxcsr", +- VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR); +- def_builtin_pure (OPTION_MASK_ISA_SSE, 0, "__builtin_ia32_stmxcsr", +- UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR); +- +- /* SSE or 3DNow!A */ +- def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A +- /* As it uses V4HImode, we have to require -mmmx too. */ +- | OPTION_MASK_ISA_MMX, 0, +- "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR, +- IX86_BUILTIN_MASKMOVQ); +- +- /* SSE2 */ +- def_builtin (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_maskmovdqu", +- VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU); +- +- def_builtin (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_clflush", +- VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH); +- x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_mfence", +- VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE); +- +- /* SSE3. */ +- def_builtin (OPTION_MASK_ISA_SSE3, 0, "__builtin_ia32_monitor", +- VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR); +- def_builtin (OPTION_MASK_ISA_SSE3, 0, "__builtin_ia32_mwait", +- VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT); +- +- /* AES */ +- def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, 0, +- "__builtin_ia32_aesenc128", +- V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128); +- def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, 0, +- "__builtin_ia32_aesenclast128", +- V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128); +- def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, 0, +- "__builtin_ia32_aesdec128", +- V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128); +- def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, 0, +- "__builtin_ia32_aesdeclast128", +- V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128); +- def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, 0, +- "__builtin_ia32_aesimc128", +- V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128); +- def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, 0, +- "__builtin_ia32_aeskeygenassist128", +- V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128); +- +- /* PCLMUL */ +- def_builtin_const (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2, 0, +- "__builtin_ia32_pclmulqdq128", +- V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128); +- +- /* RDRND */ +- def_builtin (OPTION_MASK_ISA_RDRND, 0, "__builtin_ia32_rdrand16_step", +- INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP); +- def_builtin (OPTION_MASK_ISA_RDRND, 0, "__builtin_ia32_rdrand32_step", +- INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP); +- def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT, 0, +- "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG, +- IX86_BUILTIN_RDRAND64_STEP); +- +- /* AVX2 */ +- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gathersiv2df", +- V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT, +- IX86_BUILTIN_GATHERSIV2DF); +- +- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gathersiv4df", +- V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT, +- IX86_BUILTIN_GATHERSIV4DF); +- +- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatherdiv2df", +- V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT, +- IX86_BUILTIN_GATHERDIV2DF); +- +- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatherdiv4df", +- V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT, +- IX86_BUILTIN_GATHERDIV4DF); +- +- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gathersiv4sf", +- V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT, +- IX86_BUILTIN_GATHERSIV4SF); +- +- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gathersiv8sf", +- V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT, +- IX86_BUILTIN_GATHERSIV8SF); +- +- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatherdiv4sf", +- V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT, +- IX86_BUILTIN_GATHERDIV4SF); +- +- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatherdiv4sf256", +- V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT, +- IX86_BUILTIN_GATHERDIV8SF); +- +- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gathersiv2di", +- V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT, +- IX86_BUILTIN_GATHERSIV2DI); +- +- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gathersiv4di", +- V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT, +- IX86_BUILTIN_GATHERSIV4DI); +- +- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatherdiv2di", +- V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT, +- IX86_BUILTIN_GATHERDIV2DI); +- +- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatherdiv4di", +- V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT, +- IX86_BUILTIN_GATHERDIV4DI); +- +- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gathersiv4si", +- V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT, +- IX86_BUILTIN_GATHERSIV4SI); +- +- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gathersiv8si", +- V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT, +- IX86_BUILTIN_GATHERSIV8SI); +- +- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatherdiv4si", +- V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT, +- IX86_BUILTIN_GATHERDIV4SI); +- +- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatherdiv4si256", +- V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT, +- IX86_BUILTIN_GATHERDIV8SI); +- +- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatheraltsiv4df ", +- V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT, +- IX86_BUILTIN_GATHERALTSIV4DF); +- +- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatheraltdiv8sf ", +- V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT, +- IX86_BUILTIN_GATHERALTDIV8SF); +- +- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatheraltsiv4di ", +- V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT, +- IX86_BUILTIN_GATHERALTSIV4DI); +- +- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatheraltdiv8si ", +- V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT, +- IX86_BUILTIN_GATHERALTDIV8SI); +- +- /* AVX512F */ +- def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gathersiv16sf", +- V16SF_FTYPE_V16SF_PCVOID_V16SI_HI_INT, +- IX86_BUILTIN_GATHER3SIV16SF); +- +- def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gathersiv8df", +- V8DF_FTYPE_V8DF_PCVOID_V8SI_QI_INT, +- IX86_BUILTIN_GATHER3SIV8DF); +- +- def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gatherdiv16sf", +- V8SF_FTYPE_V8SF_PCVOID_V8DI_QI_INT, +- IX86_BUILTIN_GATHER3DIV16SF); +- +- def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gatherdiv8df", +- V8DF_FTYPE_V8DF_PCVOID_V8DI_QI_INT, +- IX86_BUILTIN_GATHER3DIV8DF); +- +- def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gathersiv16si", +- V16SI_FTYPE_V16SI_PCVOID_V16SI_HI_INT, +- IX86_BUILTIN_GATHER3SIV16SI); +- +- def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gathersiv8di", +- V8DI_FTYPE_V8DI_PCVOID_V8SI_QI_INT, +- IX86_BUILTIN_GATHER3SIV8DI); +- +- def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gatherdiv16si", +- V8SI_FTYPE_V8SI_PCVOID_V8DI_QI_INT, +- IX86_BUILTIN_GATHER3DIV16SI); +- +- def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gatherdiv8di", +- V8DI_FTYPE_V8DI_PCVOID_V8DI_QI_INT, +- IX86_BUILTIN_GATHER3DIV8DI); +- +- def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gather3altsiv8df ", +- V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT, +- IX86_BUILTIN_GATHER3ALTSIV8DF); +- +- def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gather3altdiv16sf ", +- V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT, +- IX86_BUILTIN_GATHER3ALTDIV16SF); +- +- def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gather3altsiv8di ", +- V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT, +- IX86_BUILTIN_GATHER3ALTSIV8DI); +- +- def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gather3altdiv16si ", +- V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT, +- IX86_BUILTIN_GATHER3ALTDIV16SI); +- +- def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scattersiv16sf", +- VOID_FTYPE_PVOID_HI_V16SI_V16SF_INT, +- IX86_BUILTIN_SCATTERSIV16SF); +- +- def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scattersiv8df", +- VOID_FTYPE_PVOID_QI_V8SI_V8DF_INT, +- IX86_BUILTIN_SCATTERSIV8DF); +- +- def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatterdiv16sf", +- VOID_FTYPE_PVOID_QI_V8DI_V8SF_INT, +- IX86_BUILTIN_SCATTERDIV16SF); +- +- def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatterdiv8df", +- VOID_FTYPE_PVOID_QI_V8DI_V8DF_INT, +- IX86_BUILTIN_SCATTERDIV8DF); +- +- def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scattersiv16si", +- VOID_FTYPE_PVOID_HI_V16SI_V16SI_INT, +- IX86_BUILTIN_SCATTERSIV16SI); +- +- def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scattersiv8di", +- VOID_FTYPE_PVOID_QI_V8SI_V8DI_INT, +- IX86_BUILTIN_SCATTERSIV8DI); +- +- def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatterdiv16si", +- VOID_FTYPE_PVOID_QI_V8DI_V8SI_INT, +- IX86_BUILTIN_SCATTERDIV16SI); +- +- def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatterdiv8di", +- VOID_FTYPE_PVOID_QI_V8DI_V8DI_INT, +- IX86_BUILTIN_SCATTERDIV8DI); +- +- /* AVX512VL */ +- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3siv2df", +- V2DF_FTYPE_V2DF_PCVOID_V4SI_QI_INT, +- IX86_BUILTIN_GATHER3SIV2DF); +- +- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3siv4df", +- V4DF_FTYPE_V4DF_PCVOID_V4SI_QI_INT, +- IX86_BUILTIN_GATHER3SIV4DF); +- +- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3div2df", +- V2DF_FTYPE_V2DF_PCVOID_V2DI_QI_INT, +- IX86_BUILTIN_GATHER3DIV2DF); +- +- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3div4df", +- V4DF_FTYPE_V4DF_PCVOID_V4DI_QI_INT, +- IX86_BUILTIN_GATHER3DIV4DF); +- +- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3siv4sf", +- V4SF_FTYPE_V4SF_PCVOID_V4SI_QI_INT, +- IX86_BUILTIN_GATHER3SIV4SF); +- +- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3siv8sf", +- V8SF_FTYPE_V8SF_PCVOID_V8SI_QI_INT, +- IX86_BUILTIN_GATHER3SIV8SF); +- +- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3div4sf", +- V4SF_FTYPE_V4SF_PCVOID_V2DI_QI_INT, +- IX86_BUILTIN_GATHER3DIV4SF); +- +- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3div8sf", +- V4SF_FTYPE_V4SF_PCVOID_V4DI_QI_INT, +- IX86_BUILTIN_GATHER3DIV8SF); +- +- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3siv2di", +- V2DI_FTYPE_V2DI_PCVOID_V4SI_QI_INT, +- IX86_BUILTIN_GATHER3SIV2DI); +- +- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3siv4di", +- V4DI_FTYPE_V4DI_PCVOID_V4SI_QI_INT, +- IX86_BUILTIN_GATHER3SIV4DI); +- +- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3div2di", +- V2DI_FTYPE_V2DI_PCVOID_V2DI_QI_INT, +- IX86_BUILTIN_GATHER3DIV2DI); +- +- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3div4di", +- V4DI_FTYPE_V4DI_PCVOID_V4DI_QI_INT, +- IX86_BUILTIN_GATHER3DIV4DI); +- +- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3siv4si", +- V4SI_FTYPE_V4SI_PCVOID_V4SI_QI_INT, +- IX86_BUILTIN_GATHER3SIV4SI); +- +- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3siv8si", +- V8SI_FTYPE_V8SI_PCVOID_V8SI_QI_INT, +- IX86_BUILTIN_GATHER3SIV8SI); +- +- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3div4si", +- V4SI_FTYPE_V4SI_PCVOID_V2DI_QI_INT, +- IX86_BUILTIN_GATHER3DIV4SI); +- +- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3div8si", +- V4SI_FTYPE_V4SI_PCVOID_V4DI_QI_INT, +- IX86_BUILTIN_GATHER3DIV8SI); +- +- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3altsiv4df ", +- V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT, +- IX86_BUILTIN_GATHER3ALTSIV4DF); +- +- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3altdiv8sf ", +- V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT, +- IX86_BUILTIN_GATHER3ALTDIV8SF); +- +- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3altsiv4di ", +- V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT, +- IX86_BUILTIN_GATHER3ALTSIV4DI); +- +- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3altdiv8si ", +- V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT, +- IX86_BUILTIN_GATHER3ALTDIV8SI); +- +- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scattersiv8sf", +- VOID_FTYPE_PVOID_QI_V8SI_V8SF_INT, +- IX86_BUILTIN_SCATTERSIV8SF); +- +- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scattersiv4sf", +- VOID_FTYPE_PVOID_QI_V4SI_V4SF_INT, +- IX86_BUILTIN_SCATTERSIV4SF); +- +- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scattersiv4df", +- VOID_FTYPE_PVOID_QI_V4SI_V4DF_INT, +- IX86_BUILTIN_SCATTERSIV4DF); +- +- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scattersiv2df", +- VOID_FTYPE_PVOID_QI_V4SI_V2DF_INT, +- IX86_BUILTIN_SCATTERSIV2DF); +- +- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatterdiv8sf", +- VOID_FTYPE_PVOID_QI_V4DI_V4SF_INT, +- IX86_BUILTIN_SCATTERDIV8SF); +- +- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatterdiv4sf", +- VOID_FTYPE_PVOID_QI_V2DI_V4SF_INT, +- IX86_BUILTIN_SCATTERDIV4SF); +- +- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatterdiv4df", +- VOID_FTYPE_PVOID_QI_V4DI_V4DF_INT, +- IX86_BUILTIN_SCATTERDIV4DF); +- +- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatterdiv2df", +- VOID_FTYPE_PVOID_QI_V2DI_V2DF_INT, +- IX86_BUILTIN_SCATTERDIV2DF); +- +- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scattersiv8si", +- VOID_FTYPE_PVOID_QI_V8SI_V8SI_INT, +- IX86_BUILTIN_SCATTERSIV8SI); +- +- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scattersiv4si", +- VOID_FTYPE_PVOID_QI_V4SI_V4SI_INT, +- IX86_BUILTIN_SCATTERSIV4SI); +- +- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scattersiv4di", +- VOID_FTYPE_PVOID_QI_V4SI_V4DI_INT, +- IX86_BUILTIN_SCATTERSIV4DI); +- +- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scattersiv2di", +- VOID_FTYPE_PVOID_QI_V4SI_V2DI_INT, +- IX86_BUILTIN_SCATTERSIV2DI); +- +- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatterdiv8si", +- VOID_FTYPE_PVOID_QI_V4DI_V4SI_INT, +- IX86_BUILTIN_SCATTERDIV8SI); +- +- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatterdiv4si", +- VOID_FTYPE_PVOID_QI_V2DI_V4SI_INT, +- IX86_BUILTIN_SCATTERDIV4SI); +- +- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatterdiv4di", +- VOID_FTYPE_PVOID_QI_V4DI_V4DI_INT, +- IX86_BUILTIN_SCATTERDIV4DI); +- +- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatterdiv2di", +- VOID_FTYPE_PVOID_QI_V2DI_V2DI_INT, +- IX86_BUILTIN_SCATTERDIV2DI); +- +- def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatteraltsiv8df ", +- VOID_FTYPE_PDOUBLE_QI_V16SI_V8DF_INT, +- IX86_BUILTIN_SCATTERALTSIV8DF); +- +- def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatteraltdiv16sf ", +- VOID_FTYPE_PFLOAT_HI_V8DI_V16SF_INT, +- IX86_BUILTIN_SCATTERALTDIV16SF); +- +- def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatteraltsiv8di ", +- VOID_FTYPE_PLONGLONG_QI_V16SI_V8DI_INT, +- IX86_BUILTIN_SCATTERALTSIV8DI); +- +- def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatteraltdiv16si ", +- VOID_FTYPE_PINT_HI_V8DI_V16SI_INT, +- IX86_BUILTIN_SCATTERALTDIV16SI); +- +- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatteraltsiv4df ", +- VOID_FTYPE_PDOUBLE_QI_V8SI_V4DF_INT, +- IX86_BUILTIN_SCATTERALTSIV4DF); +- +- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatteraltdiv8sf ", +- VOID_FTYPE_PFLOAT_QI_V4DI_V8SF_INT, +- IX86_BUILTIN_SCATTERALTDIV8SF); +- +- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatteraltsiv4di ", +- VOID_FTYPE_PLONGLONG_QI_V8SI_V4DI_INT, +- IX86_BUILTIN_SCATTERALTSIV4DI); +- +- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatteraltdiv8si ", +- VOID_FTYPE_PINT_QI_V4DI_V8SI_INT, +- IX86_BUILTIN_SCATTERALTDIV8SI); +- +- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatteraltsiv2df ", +- VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT, +- IX86_BUILTIN_SCATTERALTSIV2DF); +- +- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatteraltdiv4sf ", +- VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT, +- IX86_BUILTIN_SCATTERALTDIV4SF); +- +- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatteraltsiv2di ", +- VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT, +- IX86_BUILTIN_SCATTERALTSIV2DI); +- +- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatteraltdiv4si ", +- VOID_FTYPE_PINT_QI_V2DI_V4SI_INT, +- IX86_BUILTIN_SCATTERALTDIV4SI); +- +- /* AVX512PF */ +- def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_gatherpfdpd", +- VOID_FTYPE_QI_V8SI_PCVOID_INT_INT, +- IX86_BUILTIN_GATHERPFDPD); +- def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_gatherpfdps", +- VOID_FTYPE_HI_V16SI_PCVOID_INT_INT, +- IX86_BUILTIN_GATHERPFDPS); +- def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_gatherpfqpd", +- VOID_FTYPE_QI_V8DI_PCVOID_INT_INT, +- IX86_BUILTIN_GATHERPFQPD); +- def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_gatherpfqps", +- VOID_FTYPE_QI_V8DI_PCVOID_INT_INT, +- IX86_BUILTIN_GATHERPFQPS); +- def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_scatterpfdpd", +- VOID_FTYPE_QI_V8SI_PCVOID_INT_INT, +- IX86_BUILTIN_SCATTERPFDPD); +- def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_scatterpfdps", +- VOID_FTYPE_HI_V16SI_PCVOID_INT_INT, +- IX86_BUILTIN_SCATTERPFDPS); +- def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_scatterpfqpd", +- VOID_FTYPE_QI_V8DI_PCVOID_INT_INT, +- IX86_BUILTIN_SCATTERPFQPD); +- def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_scatterpfqps", +- VOID_FTYPE_QI_V8DI_PCVOID_INT_INT, +- IX86_BUILTIN_SCATTERPFQPS); +- +- /* SHA */ +- def_builtin_const (OPTION_MASK_ISA_SHA, 0, "__builtin_ia32_sha1msg1", +- V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1); +- def_builtin_const (OPTION_MASK_ISA_SHA, 0, "__builtin_ia32_sha1msg2", +- V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2); +- def_builtin_const (OPTION_MASK_ISA_SHA, 0, "__builtin_ia32_sha1nexte", +- V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE); +- def_builtin_const (OPTION_MASK_ISA_SHA, 0, "__builtin_ia32_sha1rnds4", +- V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4); +- def_builtin_const (OPTION_MASK_ISA_SHA, 0, "__builtin_ia32_sha256msg1", +- V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1); +- def_builtin_const (OPTION_MASK_ISA_SHA, 0, "__builtin_ia32_sha256msg2", +- V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2); +- def_builtin_const (OPTION_MASK_ISA_SHA, 0, "__builtin_ia32_sha256rnds2", +- V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2); +- +- /* RTM. */ +- def_builtin (OPTION_MASK_ISA_RTM, 0, "__builtin_ia32_xabort", +- VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT); +- +- /* MMX access to the vec_init patterns. */ +- def_builtin_const (OPTION_MASK_ISA_MMX, 0, "__builtin_ia32_vec_init_v2si", +- V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI); +- +- def_builtin_const (OPTION_MASK_ISA_MMX, 0, "__builtin_ia32_vec_init_v4hi", +- V4HI_FTYPE_HI_HI_HI_HI, +- IX86_BUILTIN_VEC_INIT_V4HI); +- +- def_builtin_const (OPTION_MASK_ISA_MMX, 0, "__builtin_ia32_vec_init_v8qi", +- V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI, +- IX86_BUILTIN_VEC_INIT_V8QI); +- +- /* Access to the vec_extract patterns. */ +- def_builtin_const (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_vec_ext_v2df", +- DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF); +- def_builtin_const (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_vec_ext_v2di", +- DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI); +- def_builtin_const (OPTION_MASK_ISA_SSE, 0, "__builtin_ia32_vec_ext_v4sf", +- FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF); +- def_builtin_const (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_vec_ext_v4si", +- SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI); +- def_builtin_const (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_vec_ext_v8hi", +- HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI); +- +- def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A +- /* As it uses V4HImode, we have to require -mmmx too. */ +- | OPTION_MASK_ISA_MMX, 0, +- "__builtin_ia32_vec_ext_v4hi", +- HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI); +- +- def_builtin_const (OPTION_MASK_ISA_MMX, 0, "__builtin_ia32_vec_ext_v2si", +- SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI); +- +- def_builtin_const (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_vec_ext_v16qi", +- QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI); +- +- /* Access to the vec_set patterns. */ +- def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, 0, +- "__builtin_ia32_vec_set_v2di", +- V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI); +- +- def_builtin_const (OPTION_MASK_ISA_SSE4_1, 0, "__builtin_ia32_vec_set_v4sf", +- V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF); +- +- def_builtin_const (OPTION_MASK_ISA_SSE4_1, 0, "__builtin_ia32_vec_set_v4si", +- V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI); +- +- def_builtin_const (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_vec_set_v8hi", +- V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI); +- +- def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A +- /* As it uses V4HImode, we have to require -mmmx too. */ +- | OPTION_MASK_ISA_MMX, 0, +- "__builtin_ia32_vec_set_v4hi", +- V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI); +- +- def_builtin_const (OPTION_MASK_ISA_SSE4_1, 0, "__builtin_ia32_vec_set_v16qi", +- V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI); +- +- /* RDSEED */ +- def_builtin (OPTION_MASK_ISA_RDSEED, 0, "__builtin_ia32_rdseed_hi_step", +- INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP); +- def_builtin (OPTION_MASK_ISA_RDSEED, 0, "__builtin_ia32_rdseed_si_step", +- INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP); +- def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT, 0, +- "__builtin_ia32_rdseed_di_step", +- INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP); +- +- /* ADCX */ +- def_builtin (0, 0, "__builtin_ia32_addcarryx_u32", +- UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32); +- def_builtin (OPTION_MASK_ISA_64BIT, 0, +- "__builtin_ia32_addcarryx_u64", +- UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG, +- IX86_BUILTIN_ADDCARRYX64); +- +- /* SBB */ +- def_builtin (0, 0, "__builtin_ia32_sbb_u32", +- UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32); +- def_builtin (OPTION_MASK_ISA_64BIT, 0, +- "__builtin_ia32_sbb_u64", +- UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG, +- IX86_BUILTIN_SBB64); +- +- /* Read/write FLAGS. */ +- if (TARGET_64BIT) +- { +- def_builtin (OPTION_MASK_ISA_64BIT, 0, "__builtin_ia32_readeflags_u64", +- UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS); +- def_builtin (OPTION_MASK_ISA_64BIT, 0, "__builtin_ia32_writeeflags_u64", +- VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS); +- } +- else +- { +- def_builtin (0, 0, "__builtin_ia32_readeflags_u32", +- UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS); +- def_builtin (0, 0, "__builtin_ia32_writeeflags_u32", +- VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS); +- } +- +- /* CLFLUSHOPT. */ +- def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, 0, "__builtin_ia32_clflushopt", +- VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT); +- +- /* CLWB. */ +- def_builtin (OPTION_MASK_ISA_CLWB, 0, "__builtin_ia32_clwb", +- VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB); +- +- /* MONITORX and MWAITX. */ +- def_builtin (0, OPTION_MASK_ISA_MWAITX, "__builtin_ia32_monitorx", +- VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITORX); +- def_builtin (0, OPTION_MASK_ISA_MWAITX, "__builtin_ia32_mwaitx", +- VOID_FTYPE_UNSIGNED_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAITX); +- +- /* CLZERO. */ +- def_builtin (0, OPTION_MASK_ISA_CLZERO, "__builtin_ia32_clzero", +- VOID_FTYPE_PCVOID, IX86_BUILTIN_CLZERO); +- +- /* WAITPKG. */ +- def_builtin (0, OPTION_MASK_ISA_WAITPKG, "__builtin_ia32_umonitor", +- VOID_FTYPE_PVOID, IX86_BUILTIN_UMONITOR); +- def_builtin (0, OPTION_MASK_ISA_WAITPKG, "__builtin_ia32_umwait", +- UINT8_FTYPE_UNSIGNED_UINT64, IX86_BUILTIN_UMWAIT); +- def_builtin (0, OPTION_MASK_ISA_WAITPKG, "__builtin_ia32_tpause", +- UINT8_FTYPE_UNSIGNED_UINT64, IX86_BUILTIN_TPAUSE); +- +- /* CLDEMOTE. */ +- def_builtin (0, OPTION_MASK_ISA_CLDEMOTE, "__builtin_ia32_cldemote", +- VOID_FTYPE_PCVOID, IX86_BUILTIN_CLDEMOTE); +- +- /* Add FMA4 multi-arg argument instructions */ +- for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++) +- { +- BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_MULTI_ARG_FIRST, i); +- if (d->name == 0) +- continue; +- +- ftype = (enum ix86_builtin_func_type) d->flag; +- def_builtin_const (d->mask, d->mask2, d->name, ftype, d->code); +- } +- BDESC_VERIFYS (IX86_BUILTIN__BDESC_MULTI_ARG_LAST, +- IX86_BUILTIN__BDESC_MULTI_ARG_FIRST, +- ARRAY_SIZE (bdesc_multi_arg) - 1); +- +- /* Add CET inrinsics. */ +- for (i = 0, d = bdesc_cet; i < ARRAY_SIZE (bdesc_cet); i++, d++) +- { +- BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_CET_FIRST, i); +- if (d->name == 0) +- continue; +- +- ftype = (enum ix86_builtin_func_type) d->flag; +- def_builtin (d->mask, d->mask2, d->name, ftype, d->code); +- } +- BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_LAST, +- IX86_BUILTIN__BDESC_CET_FIRST, +- ARRAY_SIZE (bdesc_cet) - 1); +- +- for (i = 0, d = bdesc_cet_rdssp; +- i < ARRAY_SIZE (bdesc_cet_rdssp); +- i++, d++) +- { +- BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_CET_NORMAL_FIRST, i); +- if (d->name == 0) +- continue; +- +- ftype = (enum ix86_builtin_func_type) d->flag; +- def_builtin (d->mask, d->mask2, d->name, ftype, d->code); +- } +- BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_NORMAL_LAST, +- IX86_BUILTIN__BDESC_CET_NORMAL_FIRST, +- ARRAY_SIZE (bdesc_cet_rdssp) - 1); +-} +- +-#undef BDESC_VERIFY +-#undef BDESC_VERIFYS +- +-/* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL +- to return a pointer to VERSION_DECL if the outcome of the expression +- formed by PREDICATE_CHAIN is true. This function will be called during +- version dispatch to decide which function version to execute. It returns +- the basic block at the end, to which more conditions can be added. */ +- +-static basic_block +-add_condition_to_bb (tree function_decl, tree version_decl, +- tree predicate_chain, basic_block new_bb) +-{ +- gimple *return_stmt; +- tree convert_expr, result_var; +- gimple *convert_stmt; +- gimple *call_cond_stmt; +- gimple *if_else_stmt; +- +- basic_block bb1, bb2, bb3; +- edge e12, e23; +- +- tree cond_var, and_expr_var = NULL_TREE; +- gimple_seq gseq; +- +- tree predicate_decl, predicate_arg; +- +- push_cfun (DECL_STRUCT_FUNCTION (function_decl)); +- +- gcc_assert (new_bb != NULL); +- gseq = bb_seq (new_bb); +- +- +- convert_expr = build1 (CONVERT_EXPR, ptr_type_node, +- build_fold_addr_expr (version_decl)); +- result_var = create_tmp_var (ptr_type_node); +- convert_stmt = gimple_build_assign (result_var, convert_expr); +- return_stmt = gimple_build_return (result_var); +- +- if (predicate_chain == NULL_TREE) +- { +- gimple_seq_add_stmt (&gseq, convert_stmt); +- gimple_seq_add_stmt (&gseq, return_stmt); +- set_bb_seq (new_bb, gseq); +- gimple_set_bb (convert_stmt, new_bb); +- gimple_set_bb (return_stmt, new_bb); +- pop_cfun (); +- return new_bb; +- } +- +- while (predicate_chain != NULL) +- { +- cond_var = create_tmp_var (integer_type_node); +- predicate_decl = TREE_PURPOSE (predicate_chain); +- predicate_arg = TREE_VALUE (predicate_chain); +- call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg); +- gimple_call_set_lhs (call_cond_stmt, cond_var); +- +- gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl)); +- gimple_set_bb (call_cond_stmt, new_bb); +- gimple_seq_add_stmt (&gseq, call_cond_stmt); +- +- predicate_chain = TREE_CHAIN (predicate_chain); +- +- if (and_expr_var == NULL) +- and_expr_var = cond_var; +- else +- { +- gimple *assign_stmt; +- /* Use MIN_EXPR to check if any integer is zero?. +- and_expr_var = min_expr */ +- assign_stmt = gimple_build_assign (and_expr_var, +- build2 (MIN_EXPR, integer_type_node, +- cond_var, and_expr_var)); +- +- gimple_set_block (assign_stmt, DECL_INITIAL (function_decl)); +- gimple_set_bb (assign_stmt, new_bb); +- gimple_seq_add_stmt (&gseq, assign_stmt); +- } +- } +- +- if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var, +- integer_zero_node, +- NULL_TREE, NULL_TREE); +- gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl)); +- gimple_set_bb (if_else_stmt, new_bb); +- gimple_seq_add_stmt (&gseq, if_else_stmt); +- +- gimple_seq_add_stmt (&gseq, convert_stmt); +- gimple_seq_add_stmt (&gseq, return_stmt); +- set_bb_seq (new_bb, gseq); +- +- bb1 = new_bb; +- e12 = split_block (bb1, if_else_stmt); +- bb2 = e12->dest; +- e12->flags &= ~EDGE_FALLTHRU; +- e12->flags |= EDGE_TRUE_VALUE; +- +- e23 = split_block (bb2, return_stmt); +- +- gimple_set_bb (convert_stmt, bb2); +- gimple_set_bb (return_stmt, bb2); +- +- bb3 = e23->dest; +- make_edge (bb1, bb3, EDGE_FALSE_VALUE); +- +- remove_edge (e23); +- make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0); +- +- pop_cfun (); +- +- return bb3; +-} +- +-/* Priority of i386 features, greater value is higher priority. This is +- used to decide the order in which function dispatch must happen. For +- instance, a version specialized for SSE4.2 should be checked for dispatch +- before a version for SSE3, as SSE4.2 implies SSE3. */ +-enum feature_priority +-{ +- P_ZERO = 0, +- P_MMX, +- P_SSE, +- P_SSE2, +- P_SSE3, +- P_SSSE3, +- P_PROC_SSSE3, +- P_SSE4_A, +- P_PROC_SSE4_A, +- P_SSE4_1, +- P_SSE4_2, +- P_PROC_SSE4_2, +- P_POPCNT, +- P_AES, +- P_PCLMUL, +- P_AVX, +- P_PROC_AVX, +- P_BMI, +- P_PROC_BMI, +- P_FMA4, +- P_XOP, +- P_PROC_XOP, +- P_FMA, +- P_PROC_FMA, +- P_BMI2, +- P_AVX2, +- P_PROC_AVX2, +- P_AVX512F, +- P_PROC_AVX512F +-}; +- +-/* This is the order of bit-fields in __processor_features in cpuinfo.c */ +-enum processor_features +-{ +- F_CMOV = 0, +- F_MMX, +- F_POPCNT, +- F_SSE, +- F_SSE2, +- F_SSE3, +- F_SSSE3, +- F_SSE4_1, +- F_SSE4_2, +- F_AVX, +- F_AVX2, +- F_SSE4_A, +- F_FMA4, +- F_XOP, +- F_FMA, +- F_AVX512F, +- F_BMI, +- F_BMI2, +- F_AES, +- F_PCLMUL, +- F_AVX512VL, +- F_AVX512BW, +- F_AVX512DQ, +- F_AVX512CD, +- F_AVX512ER, +- F_AVX512PF, +- F_AVX512VBMI, +- F_AVX512IFMA, +- F_AVX5124VNNIW, +- F_AVX5124FMAPS, +- F_AVX512VPOPCNTDQ, +- F_AVX512VBMI2, +- F_GFNI, +- F_VPCLMULQDQ, +- F_AVX512VNNI, +- F_AVX512BITALG, +- F_MAX +-}; +- +-/* These are the values for vendor types and cpu types and subtypes +- in cpuinfo.c. Cpu types and subtypes should be subtracted by +- the corresponding start value. */ +-enum processor_model +-{ +- M_INTEL = 1, +- M_AMD, +- M_CPU_TYPE_START, +- M_INTEL_BONNELL, +- M_INTEL_CORE2, +- M_INTEL_COREI7, +- M_AMDFAM10H, +- M_AMDFAM15H, +- M_INTEL_SILVERMONT, +- M_INTEL_KNL, +- M_AMD_BTVER1, +- M_AMD_BTVER2, +- M_AMDFAM17H, +- M_INTEL_KNM, +- M_INTEL_GOLDMONT, +- M_INTEL_GOLDMONT_PLUS, +- M_INTEL_TREMONT, +- M_CPU_SUBTYPE_START, +- M_INTEL_COREI7_NEHALEM, +- M_INTEL_COREI7_WESTMERE, +- M_INTEL_COREI7_SANDYBRIDGE, +- M_AMDFAM10H_BARCELONA, +- M_AMDFAM10H_SHANGHAI, +- M_AMDFAM10H_ISTANBUL, +- M_AMDFAM15H_BDVER1, +- M_AMDFAM15H_BDVER2, +- M_AMDFAM15H_BDVER3, +- M_AMDFAM15H_BDVER4, +- M_AMDFAM17H_ZNVER1, +- M_INTEL_COREI7_IVYBRIDGE, +- M_INTEL_COREI7_HASWELL, +- M_INTEL_COREI7_BROADWELL, +- M_INTEL_COREI7_SKYLAKE, +- M_INTEL_COREI7_SKYLAKE_AVX512, +- M_INTEL_COREI7_CANNONLAKE, +- M_INTEL_COREI7_ICELAKE_CLIENT, +- M_INTEL_COREI7_ICELAKE_SERVER, +- M_AMDFAM17H_ZNVER2, +- M_INTEL_COREI7_CASCADELAKE +-}; +- +-struct _arch_names_table +-{ +- const char *const name; +- const enum processor_model model; +-}; +- +-static const _arch_names_table arch_names_table[] = +-{ +- {"amd", M_AMD}, +- {"intel", M_INTEL}, +- {"atom", M_INTEL_BONNELL}, +- {"slm", M_INTEL_SILVERMONT}, +- {"core2", M_INTEL_CORE2}, +- {"corei7", M_INTEL_COREI7}, +- {"nehalem", M_INTEL_COREI7_NEHALEM}, +- {"westmere", M_INTEL_COREI7_WESTMERE}, +- {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE}, +- {"ivybridge", M_INTEL_COREI7_IVYBRIDGE}, +- {"haswell", M_INTEL_COREI7_HASWELL}, +- {"broadwell", M_INTEL_COREI7_BROADWELL}, +- {"skylake", M_INTEL_COREI7_SKYLAKE}, +- {"skylake-avx512", M_INTEL_COREI7_SKYLAKE_AVX512}, +- {"cannonlake", M_INTEL_COREI7_CANNONLAKE}, +- {"icelake-client", M_INTEL_COREI7_ICELAKE_CLIENT}, +- {"icelake-server", M_INTEL_COREI7_ICELAKE_SERVER}, +- {"cascadelake", M_INTEL_COREI7_CASCADELAKE}, +- {"bonnell", M_INTEL_BONNELL}, +- {"silvermont", M_INTEL_SILVERMONT}, +- {"goldmont", M_INTEL_GOLDMONT}, +- {"goldmont-plus", M_INTEL_GOLDMONT_PLUS}, +- {"tremont", M_INTEL_TREMONT}, +- {"knl", M_INTEL_KNL}, +- {"knm", M_INTEL_KNM}, +- {"amdfam10h", M_AMDFAM10H}, +- {"barcelona", M_AMDFAM10H_BARCELONA}, +- {"shanghai", M_AMDFAM10H_SHANGHAI}, +- {"istanbul", M_AMDFAM10H_ISTANBUL}, +- {"btver1", M_AMD_BTVER1}, +- {"amdfam15h", M_AMDFAM15H}, +- {"bdver1", M_AMDFAM15H_BDVER1}, +- {"bdver2", M_AMDFAM15H_BDVER2}, +- {"bdver3", M_AMDFAM15H_BDVER3}, +- {"bdver4", M_AMDFAM15H_BDVER4}, +- {"btver2", M_AMD_BTVER2}, +- {"amdfam17h", M_AMDFAM17H}, +- {"znver1", M_AMDFAM17H_ZNVER1}, +- {"znver2", M_AMDFAM17H_ZNVER2}, +-}; +- +-/* These are the target attribute strings for which a dispatcher is +- available, from fold_builtin_cpu. */ +-struct _isa_names_table +-{ +- const char *const name; +- const enum processor_features feature; +- const enum feature_priority priority; +-}; +- +-static const _isa_names_table isa_names_table[] = +-{ +- {"cmov", F_CMOV, P_ZERO}, +- {"mmx", F_MMX, P_MMX}, +- {"popcnt", F_POPCNT, P_POPCNT}, +- {"sse", F_SSE, P_SSE}, +- {"sse2", F_SSE2, P_SSE2}, +- {"sse3", F_SSE3, P_SSE3}, +- {"ssse3", F_SSSE3, P_SSSE3}, +- {"sse4a", F_SSE4_A, P_SSE4_A}, +- {"sse4.1", F_SSE4_1, P_SSE4_1}, +- {"sse4.2", F_SSE4_2, P_SSE4_2}, +- {"avx", F_AVX, P_AVX}, +- {"fma4", F_FMA4, P_FMA4}, +- {"xop", F_XOP, P_XOP}, +- {"fma", F_FMA, P_FMA}, +- {"avx2", F_AVX2, P_AVX2}, +- {"avx512f", F_AVX512F, P_AVX512F}, +- {"bmi", F_BMI, P_BMI}, +- {"bmi2", F_BMI2, P_BMI2}, +- {"aes", F_AES, P_AES}, +- {"pclmul", F_PCLMUL, P_PCLMUL}, +- {"avx512vl",F_AVX512VL, P_ZERO}, +- {"avx512bw",F_AVX512BW, P_ZERO}, +- {"avx512dq",F_AVX512DQ, P_ZERO}, +- {"avx512cd",F_AVX512CD, P_ZERO}, +- {"avx512er",F_AVX512ER, P_ZERO}, +- {"avx512pf",F_AVX512PF, P_ZERO}, +- {"avx512vbmi",F_AVX512VBMI, P_ZERO}, +- {"avx512ifma",F_AVX512IFMA, P_ZERO}, +- {"avx5124vnniw",F_AVX5124VNNIW, P_ZERO}, +- {"avx5124fmaps",F_AVX5124FMAPS, P_ZERO}, +- {"avx512vpopcntdq",F_AVX512VPOPCNTDQ, P_ZERO}, +- {"avx512vbmi2", F_AVX512VBMI2, P_ZERO}, +- {"gfni", F_GFNI, P_ZERO}, +- {"vpclmulqdq", F_VPCLMULQDQ, P_ZERO}, +- {"avx512vnni", F_AVX512VNNI, P_ZERO}, +- {"avx512bitalg", F_AVX512BITALG, P_ZERO} +-}; +- +-/* This parses the attribute arguments to target in DECL and determines +- the right builtin to use to match the platform specification. +- It returns the priority value for this version decl. If PREDICATE_LIST +- is not NULL, it stores the list of cpu features that need to be checked +- before dispatching this function. */ +- +-static unsigned int +-get_builtin_code_for_version (tree decl, tree *predicate_list) +-{ +- tree attrs; +- struct cl_target_option cur_target; +- tree target_node; +- struct cl_target_option *new_target; +- const char *arg_str = NULL; +- const char *attrs_str = NULL; +- char *tok_str = NULL; +- char *token; +- +- enum feature_priority priority = P_ZERO; +- +- static unsigned int NUM_FEATURES +- = sizeof (isa_names_table) / sizeof (_isa_names_table); +- +- unsigned int i; +- +- tree predicate_chain = NULL_TREE; +- tree predicate_decl, predicate_arg; +- +- attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl)); +- gcc_assert (attrs != NULL); +- +- attrs = TREE_VALUE (TREE_VALUE (attrs)); +- +- gcc_assert (TREE_CODE (attrs) == STRING_CST); +- attrs_str = TREE_STRING_POINTER (attrs); +- +- /* Return priority zero for default function. */ +- if (strcmp (attrs_str, "default") == 0) +- return 0; +- +- /* Handle arch= if specified. For priority, set it to be 1 more than +- the best instruction set the processor can handle. For instance, if +- there is a version for atom and a version for ssse3 (the highest ISA +- priority for atom), the atom version must be checked for dispatch +- before the ssse3 version. */ +- if (strstr (attrs_str, "arch=") != NULL) +- { +- cl_target_option_save (&cur_target, &global_options); +- target_node = ix86_valid_target_attribute_tree (attrs, &global_options, +- &global_options_set); +- +- gcc_assert (target_node); +- if (target_node == error_mark_node) +- return 0; +- new_target = TREE_TARGET_OPTION (target_node); +- gcc_assert (new_target); +- +- if (new_target->arch_specified && new_target->arch > 0) +- { +- switch (new_target->arch) +- { +- case PROCESSOR_CORE2: +- arg_str = "core2"; +- priority = P_PROC_SSSE3; +- break; +- case PROCESSOR_NEHALEM: +- if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_PCLMUL) +- { +- arg_str = "westmere"; +- priority = P_PCLMUL; +- } +- else +- { +- /* We translate "arch=corei7" and "arch=nehalem" to +- "corei7" so that it will be mapped to M_INTEL_COREI7 +- as cpu type to cover all M_INTEL_COREI7_XXXs. */ +- arg_str = "corei7"; +- priority = P_PROC_SSE4_2; +- } +- break; +- case PROCESSOR_SANDYBRIDGE: +- if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C) +- arg_str = "ivybridge"; +- else +- arg_str = "sandybridge"; +- priority = P_PROC_AVX; +- break; +- case PROCESSOR_HASWELL: +- if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX) +- arg_str = "broadwell"; +- else +- arg_str = "haswell"; +- priority = P_PROC_AVX2; +- break; +- case PROCESSOR_SKYLAKE: +- arg_str = "skylake"; +- priority = P_PROC_AVX2; +- break; +- case PROCESSOR_SKYLAKE_AVX512: +- arg_str = "skylake-avx512"; +- priority = P_PROC_AVX512F; +- break; +- case PROCESSOR_CANNONLAKE: +- arg_str = "cannonlake"; +- priority = P_PROC_AVX512F; +- break; +- case PROCESSOR_ICELAKE_CLIENT: +- arg_str = "icelake-client"; +- priority = P_PROC_AVX512F; +- break; +- case PROCESSOR_ICELAKE_SERVER: +- arg_str = "icelake-server"; +- priority = P_PROC_AVX512F; +- break; +- case PROCESSOR_CASCADELAKE: +- arg_str = "cascadelake"; +- priority = P_PROC_AVX512F; +- break; +- case PROCESSOR_BONNELL: +- arg_str = "bonnell"; +- priority = P_PROC_SSSE3; +- break; +- case PROCESSOR_KNL: +- arg_str = "knl"; +- priority = P_PROC_AVX512F; +- break; +- case PROCESSOR_KNM: +- arg_str = "knm"; +- priority = P_PROC_AVX512F; +- break; +- case PROCESSOR_SILVERMONT: +- arg_str = "silvermont"; +- priority = P_PROC_SSE4_2; +- break; +- case PROCESSOR_GOLDMONT: +- arg_str = "goldmont"; +- priority = P_PROC_SSE4_2; +- break; +- case PROCESSOR_GOLDMONT_PLUS: +- arg_str = "goldmont-plus"; +- priority = P_PROC_SSE4_2; +- break; +- case PROCESSOR_TREMONT: +- arg_str = "tremont"; +- priority = P_PROC_SSE4_2; +- break; +- case PROCESSOR_AMDFAM10: +- arg_str = "amdfam10h"; +- priority = P_PROC_SSE4_A; +- break; +- case PROCESSOR_BTVER1: +- arg_str = "btver1"; +- priority = P_PROC_SSE4_A; +- break; +- case PROCESSOR_BTVER2: +- arg_str = "btver2"; +- priority = P_PROC_BMI; +- break; +- case PROCESSOR_BDVER1: +- arg_str = "bdver1"; +- priority = P_PROC_XOP; +- break; +- case PROCESSOR_BDVER2: +- arg_str = "bdver2"; +- priority = P_PROC_FMA; +- break; +- case PROCESSOR_BDVER3: +- arg_str = "bdver3"; +- priority = P_PROC_FMA; +- break; +- case PROCESSOR_BDVER4: +- arg_str = "bdver4"; +- priority = P_PROC_AVX2; +- break; +- case PROCESSOR_ZNVER1: +- arg_str = "znver1"; +- priority = P_PROC_AVX2; +- break; +- case PROCESSOR_ZNVER2: +- arg_str = "znver2"; +- priority = P_PROC_AVX2; +- break; +- } +- } +- +- cl_target_option_restore (&global_options, &cur_target); +- +- if (predicate_list && arg_str == NULL) +- { +- error_at (DECL_SOURCE_LOCATION (decl), +- "no dispatcher found for the versioning attributes"); +- return 0; +- } +- +- if (predicate_list) +- { +- predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS]; +- /* For a C string literal the length includes the trailing NULL. */ +- predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str); +- predicate_chain = tree_cons (predicate_decl, predicate_arg, +- predicate_chain); +- } +- } +- +- /* Process feature name. */ +- tok_str = (char *) xmalloc (strlen (attrs_str) + 1); +- strcpy (tok_str, attrs_str); +- token = strtok (tok_str, ","); +- predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS]; +- +- while (token != NULL) +- { +- /* Do not process "arch=" */ +- if (strncmp (token, "arch=", 5) == 0) +- { +- token = strtok (NULL, ","); +- continue; +- } +- for (i = 0; i < NUM_FEATURES; ++i) +- { +- if (strcmp (token, isa_names_table[i].name) == 0) +- { +- if (predicate_list) +- { +- predicate_arg = build_string_literal ( +- strlen (isa_names_table[i].name) + 1, +- isa_names_table[i].name); +- predicate_chain = tree_cons (predicate_decl, predicate_arg, +- predicate_chain); +- } +- /* Find the maximum priority feature. */ +- if (isa_names_table[i].priority > priority) +- priority = isa_names_table[i].priority; +- +- break; +- } +- } +- if (predicate_list && priority == P_ZERO) +- { +- error_at (DECL_SOURCE_LOCATION (decl), +- "ISA %qs is not supported in % attribute, " +- "use % syntax", token); +- return 0; +- } +- token = strtok (NULL, ","); +- } +- free (tok_str); +- +- if (predicate_list && predicate_chain == NULL_TREE) +- { +- error_at (DECL_SOURCE_LOCATION (decl), +- "no dispatcher found for the versioning attributes: %s", +- attrs_str); +- return 0; +- } +- else if (predicate_list) +- { +- predicate_chain = nreverse (predicate_chain); +- *predicate_list = predicate_chain; +- } +- +- return priority; +-} +- +-/* This compares the priority of target features in function DECL1 +- and DECL2. It returns positive value if DECL1 is higher priority, +- negative value if DECL2 is higher priority and 0 if they are the +- same. */ +- +-static int +-ix86_compare_version_priority (tree decl1, tree decl2) +-{ +- unsigned int priority1 = get_builtin_code_for_version (decl1, NULL); +- unsigned int priority2 = get_builtin_code_for_version (decl2, NULL); +- +- return (int)priority1 - (int)priority2; +-} +- +-/* V1 and V2 point to function versions with different priorities +- based on the target ISA. This function compares their priorities. */ +- +-static int +-feature_compare (const void *v1, const void *v2) +-{ +- typedef struct _function_version_info +- { +- tree version_decl; +- tree predicate_chain; +- unsigned int dispatch_priority; +- } function_version_info; +- +- const function_version_info c1 = *(const function_version_info *)v1; +- const function_version_info c2 = *(const function_version_info *)v2; +- return (c2.dispatch_priority - c1.dispatch_priority); +-} +- +-/* This function generates the dispatch function for +- multi-versioned functions. DISPATCH_DECL is the function which will +- contain the dispatch logic. FNDECLS are the function choices for +- dispatch, and is a tree chain. EMPTY_BB is the basic block pointer +- in DISPATCH_DECL in which the dispatch code is generated. */ +- +-static int +-dispatch_function_versions (tree dispatch_decl, +- void *fndecls_p, +- basic_block *empty_bb) +-{ +- tree default_decl; +- gimple *ifunc_cpu_init_stmt; +- gimple_seq gseq; +- int ix; +- tree ele; +- vec *fndecls; +- unsigned int num_versions = 0; +- unsigned int actual_versions = 0; +- unsigned int i; +- +- struct _function_version_info +- { +- tree version_decl; +- tree predicate_chain; +- unsigned int dispatch_priority; +- }*function_version_info; +- +- gcc_assert (dispatch_decl != NULL +- && fndecls_p != NULL +- && empty_bb != NULL); +- +- /*fndecls_p is actually a vector. */ +- fndecls = static_cast *> (fndecls_p); +- +- /* At least one more version other than the default. */ +- num_versions = fndecls->length (); +- gcc_assert (num_versions >= 2); +- +- function_version_info = (struct _function_version_info *) +- XNEWVEC (struct _function_version_info, (num_versions - 1)); +- +- /* The first version in the vector is the default decl. */ +- default_decl = (*fndecls)[0]; +- +- push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl)); +- +- gseq = bb_seq (*empty_bb); +- /* Function version dispatch is via IFUNC. IFUNC resolvers fire before +- constructors, so explicity call __builtin_cpu_init here. */ +- ifunc_cpu_init_stmt = gimple_build_call_vec ( +- ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL); +- gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt); +- gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb); +- set_bb_seq (*empty_bb, gseq); +- +- pop_cfun (); +- +- +- for (ix = 1; fndecls->iterate (ix, &ele); ++ix) +- { +- tree version_decl = ele; +- tree predicate_chain = NULL_TREE; +- unsigned int priority; +- /* Get attribute string, parse it and find the right predicate decl. +- The predicate function could be a lengthy combination of many +- features, like arch-type and various isa-variants. */ +- priority = get_builtin_code_for_version (version_decl, +- &predicate_chain); +- +- if (predicate_chain == NULL_TREE) +- continue; +- +- function_version_info [actual_versions].version_decl = version_decl; +- function_version_info [actual_versions].predicate_chain +- = predicate_chain; +- function_version_info [actual_versions].dispatch_priority = priority; +- actual_versions++; +- } +- +- /* Sort the versions according to descending order of dispatch priority. The +- priority is based on the ISA. This is not a perfect solution. There +- could still be ambiguity. If more than one function version is suitable +- to execute, which one should be dispatched? In future, allow the user +- to specify a dispatch priority next to the version. */ +- qsort (function_version_info, actual_versions, +- sizeof (struct _function_version_info), feature_compare); +- +- for (i = 0; i < actual_versions; ++i) +- *empty_bb = add_condition_to_bb (dispatch_decl, +- function_version_info[i].version_decl, +- function_version_info[i].predicate_chain, +- *empty_bb); +- +- /* dispatch default version at the end. */ +- *empty_bb = add_condition_to_bb (dispatch_decl, default_decl, +- NULL, *empty_bb); +- +- free (function_version_info); +- return 0; +-} +- +-/* This function changes the assembler name for functions that are +- versions. If DECL is a function version and has a "target" +- attribute, it appends the attribute string to its assembler name. */ +- +-static tree +-ix86_mangle_function_version_assembler_name (tree decl, tree id) +-{ +- tree version_attr; +- const char *orig_name, *version_string; +- char *attr_str, *assembler_name; +- +- if (DECL_DECLARED_INLINE_P (decl) +- && lookup_attribute ("gnu_inline", +- DECL_ATTRIBUTES (decl))) +- error_at (DECL_SOURCE_LOCATION (decl), +- "function versions cannot be marked as gnu_inline," +- " bodies have to be generated"); +- +- if (DECL_VIRTUAL_P (decl) +- || DECL_VINDEX (decl)) +- sorry ("virtual function multiversioning not supported"); +- +- version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl)); +- +- /* target attribute string cannot be NULL. */ +- gcc_assert (version_attr != NULL_TREE); +- +- orig_name = IDENTIFIER_POINTER (id); +- version_string +- = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr))); +- +- if (strcmp (version_string, "default") == 0) +- return id; +- +- attr_str = sorted_attr_string (TREE_VALUE (version_attr)); +- assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2); +- +- sprintf (assembler_name, "%s.%s", orig_name, attr_str); +- +- /* Allow assembler name to be modified if already set. */ +- if (DECL_ASSEMBLER_NAME_SET_P (decl)) +- SET_DECL_RTL (decl, NULL); +- +- tree ret = get_identifier (assembler_name); +- XDELETEVEC (attr_str); +- XDELETEVEC (assembler_name); +- return ret; +-} +- +- +-static tree +-ix86_mangle_decl_assembler_name (tree decl, tree id) +-{ +- /* For function version, add the target suffix to the assembler name. */ +- if (TREE_CODE (decl) == FUNCTION_DECL +- && DECL_FUNCTION_VERSIONED (decl)) +- id = ix86_mangle_function_version_assembler_name (decl, id); +-#ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME +- id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id); +-#endif +- +- return id; +-} +- +-/* Make a dispatcher declaration for the multi-versioned function DECL. +- Calls to DECL function will be replaced with calls to the dispatcher +- by the front-end. Returns the decl of the dispatcher function. */ +- +-static tree +-ix86_get_function_versions_dispatcher (void *decl) +-{ +- tree fn = (tree) decl; +- struct cgraph_node *node = NULL; +- struct cgraph_node *default_node = NULL; +- struct cgraph_function_version_info *node_v = NULL; +- struct cgraph_function_version_info *first_v = NULL; +- +- tree dispatch_decl = NULL; +- +- struct cgraph_function_version_info *default_version_info = NULL; +- +- gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn)); +- +- node = cgraph_node::get (fn); +- gcc_assert (node != NULL); +- +- node_v = node->function_version (); +- gcc_assert (node_v != NULL); +- +- if (node_v->dispatcher_resolver != NULL) +- return node_v->dispatcher_resolver; +- +- /* Find the default version and make it the first node. */ +- first_v = node_v; +- /* Go to the beginning of the chain. */ +- while (first_v->prev != NULL) +- first_v = first_v->prev; +- default_version_info = first_v; +- while (default_version_info != NULL) +- { +- if (is_function_default_version +- (default_version_info->this_node->decl)) +- break; +- default_version_info = default_version_info->next; +- } +- +- /* If there is no default node, just return NULL. */ +- if (default_version_info == NULL) +- return NULL; +- +- /* Make default info the first node. */ +- if (first_v != default_version_info) +- { +- default_version_info->prev->next = default_version_info->next; +- if (default_version_info->next) +- default_version_info->next->prev = default_version_info->prev; +- first_v->prev = default_version_info; +- default_version_info->next = first_v; +- default_version_info->prev = NULL; +- } +- +- default_node = default_version_info->this_node; +- +-#if defined (ASM_OUTPUT_TYPE_DIRECTIVE) +- if (targetm.has_ifunc_p ()) +- { +- struct cgraph_function_version_info *it_v = NULL; +- struct cgraph_node *dispatcher_node = NULL; +- struct cgraph_function_version_info *dispatcher_version_info = NULL; +- +- /* Right now, the dispatching is done via ifunc. */ +- dispatch_decl = make_dispatcher_decl (default_node->decl); +- +- dispatcher_node = cgraph_node::get_create (dispatch_decl); +- gcc_assert (dispatcher_node != NULL); +- dispatcher_node->dispatcher_function = 1; +- dispatcher_version_info +- = dispatcher_node->insert_new_function_version (); +- dispatcher_version_info->next = default_version_info; +- dispatcher_node->definition = 1; +- +- /* Set the dispatcher for all the versions. */ +- it_v = default_version_info; +- while (it_v != NULL) +- { +- it_v->dispatcher_resolver = dispatch_decl; +- it_v = it_v->next; +- } +- } +- else +-#endif +- { +- error_at (DECL_SOURCE_LOCATION (default_node->decl), +- "multiversioning needs ifunc which is not supported " +- "on this target"); +- } +- +- return dispatch_decl; +-} +- +-/* Make the resolver function decl to dispatch the versions of +- a multi-versioned function, DEFAULT_DECL. IFUNC_ALIAS_DECL is +- ifunc alias that will point to the created resolver. Create an +- empty basic block in the resolver and store the pointer in +- EMPTY_BB. Return the decl of the resolver function. */ +- +-static tree +-make_resolver_func (const tree default_decl, +- const tree ifunc_alias_decl, +- basic_block *empty_bb) +-{ +- char *resolver_name; +- tree decl, type, decl_name, t; +- +- /* IFUNC's have to be globally visible. So, if the default_decl is +- not, then the name of the IFUNC should be made unique. */ +- if (TREE_PUBLIC (default_decl) == 0) +- { +- char *ifunc_name = make_unique_name (default_decl, "ifunc", true); +- symtab->change_decl_assembler_name (ifunc_alias_decl, +- get_identifier (ifunc_name)); +- XDELETEVEC (ifunc_name); +- } +- +- resolver_name = make_unique_name (default_decl, "resolver", false); +- +- /* The resolver function should return a (void *). */ +- type = build_function_type_list (ptr_type_node, NULL_TREE); +- +- decl = build_fn_decl (resolver_name, type); +- decl_name = get_identifier (resolver_name); +- SET_DECL_ASSEMBLER_NAME (decl, decl_name); +- +- DECL_NAME (decl) = decl_name; +- TREE_USED (decl) = 1; +- DECL_ARTIFICIAL (decl) = 1; +- DECL_IGNORED_P (decl) = 1; +- TREE_PUBLIC (decl) = 0; +- DECL_UNINLINABLE (decl) = 1; +- +- /* Resolver is not external, body is generated. */ +- DECL_EXTERNAL (decl) = 0; +- DECL_EXTERNAL (ifunc_alias_decl) = 0; +- +- DECL_CONTEXT (decl) = NULL_TREE; +- DECL_INITIAL (decl) = make_node (BLOCK); +- DECL_STATIC_CONSTRUCTOR (decl) = 0; +- +- if (DECL_COMDAT_GROUP (default_decl) +- || TREE_PUBLIC (default_decl)) +- { +- /* In this case, each translation unit with a call to this +- versioned function will put out a resolver. Ensure it +- is comdat to keep just one copy. */ +- DECL_COMDAT (decl) = 1; +- make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl)); +- } +- /* Build result decl and add to function_decl. */ +- t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node); +- DECL_CONTEXT (t) = decl; +- DECL_ARTIFICIAL (t) = 1; +- DECL_IGNORED_P (t) = 1; +- DECL_RESULT (decl) = t; +- +- gimplify_function_tree (decl); +- push_cfun (DECL_STRUCT_FUNCTION (decl)); +- *empty_bb = init_lowered_empty_function (decl, false, +- profile_count::uninitialized ()); +- +- cgraph_node::add_new_function (decl, true); +- symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl)); +- +- pop_cfun (); +- +- gcc_assert (ifunc_alias_decl != NULL); +- /* Mark ifunc_alias_decl as "ifunc" with resolver as resolver_name. */ +- DECL_ATTRIBUTES (ifunc_alias_decl) +- = make_attribute ("ifunc", resolver_name, +- DECL_ATTRIBUTES (ifunc_alias_decl)); +- +- /* Create the alias for dispatch to resolver here. */ +- cgraph_node::create_same_body_alias (ifunc_alias_decl, decl); +- XDELETEVEC (resolver_name); +- return decl; +-} +- +-/* Generate the dispatching code body to dispatch multi-versioned function +- DECL. The target hook is called to process the "target" attributes and +- provide the code to dispatch the right function at run-time. NODE points +- to the dispatcher decl whose body will be created. */ +- +-static tree +-ix86_generate_version_dispatcher_body (void *node_p) +-{ +- tree resolver_decl; +- basic_block empty_bb; +- tree default_ver_decl; +- struct cgraph_node *versn; +- struct cgraph_node *node; +- +- struct cgraph_function_version_info *node_version_info = NULL; +- struct cgraph_function_version_info *versn_info = NULL; +- +- node = (cgraph_node *)node_p; +- +- node_version_info = node->function_version (); +- gcc_assert (node->dispatcher_function +- && node_version_info != NULL); +- +- if (node_version_info->dispatcher_resolver) +- return node_version_info->dispatcher_resolver; +- +- /* The first version in the chain corresponds to the default version. */ +- default_ver_decl = node_version_info->next->this_node->decl; +- +- /* node is going to be an alias, so remove the finalized bit. */ +- node->definition = false; +- +- resolver_decl = make_resolver_func (default_ver_decl, +- node->decl, &empty_bb); +- +- node_version_info->dispatcher_resolver = resolver_decl; +- +- push_cfun (DECL_STRUCT_FUNCTION (resolver_decl)); +- +- auto_vec fn_ver_vec; +- +- for (versn_info = node_version_info->next; versn_info; +- versn_info = versn_info->next) +- { +- versn = versn_info->this_node; +- /* Check for virtual functions here again, as by this time it should +- have been determined if this function needs a vtable index or +- not. This happens for methods in derived classes that override +- virtual methods in base classes but are not explicitly marked as +- virtual. */ +- if (DECL_VINDEX (versn->decl)) +- sorry ("virtual function multiversioning not supported"); +- +- fn_ver_vec.safe_push (versn->decl); +- } +- +- dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb); +- cgraph_edge::rebuild_edges (); +- pop_cfun (); +- return resolver_decl; +-} +-/* This builds the processor_model struct type defined in +- libgcc/config/i386/cpuinfo.c */ +- +-static tree +-build_processor_model_struct (void) +-{ +- const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype", +- "__cpu_features"}; +- tree field = NULL_TREE, field_chain = NULL_TREE; +- int i; +- tree type = make_node (RECORD_TYPE); +- +- /* The first 3 fields are unsigned int. */ +- for (i = 0; i < 3; ++i) +- { +- field = build_decl (UNKNOWN_LOCATION, FIELD_DECL, +- get_identifier (field_name[i]), unsigned_type_node); +- if (field_chain != NULL_TREE) +- DECL_CHAIN (field) = field_chain; +- field_chain = field; +- } +- +- /* The last field is an array of unsigned integers of size one. */ +- field = build_decl (UNKNOWN_LOCATION, FIELD_DECL, +- get_identifier (field_name[3]), +- build_array_type (unsigned_type_node, +- build_index_type (size_one_node))); +- if (field_chain != NULL_TREE) +- DECL_CHAIN (field) = field_chain; +- field_chain = field; +- +- finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE); +- return type; +-} +- +-/* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */ +- +-static tree +-make_var_decl (tree type, const char *name) +-{ +- tree new_decl; +- +- new_decl = build_decl (UNKNOWN_LOCATION, +- VAR_DECL, +- get_identifier(name), +- type); +- +- DECL_EXTERNAL (new_decl) = 1; +- TREE_STATIC (new_decl) = 1; +- TREE_PUBLIC (new_decl) = 1; +- DECL_INITIAL (new_decl) = 0; +- DECL_ARTIFICIAL (new_decl) = 0; +- DECL_PRESERVE_P (new_decl) = 1; +- +- make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl)); +- assemble_variable (new_decl, 0, 0, 0); +- +- return new_decl; +-} +- +-/* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded +- into an integer defined in libgcc/config/i386/cpuinfo.c */ +- +-static tree +-fold_builtin_cpu (tree fndecl, tree *args) +-{ +- unsigned int i; +- enum ix86_builtins fn_code = (enum ix86_builtins) +- DECL_FUNCTION_CODE (fndecl); +- tree param_string_cst = NULL; +- +- tree __processor_model_type = build_processor_model_struct (); +- tree __cpu_model_var = make_var_decl (__processor_model_type, +- "__cpu_model"); +- +- +- varpool_node::add (__cpu_model_var); +- +- gcc_assert ((args != NULL) && (*args != NULL)); +- +- param_string_cst = *args; +- while (param_string_cst +- && TREE_CODE (param_string_cst) != STRING_CST) +- { +- /* *args must be a expr that can contain other EXPRS leading to a +- STRING_CST. */ +- if (!EXPR_P (param_string_cst)) +- { +- error ("parameter to builtin must be a string constant or literal"); +- return integer_zero_node; +- } +- param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0); +- } +- +- gcc_assert (param_string_cst); +- +- if (fn_code == IX86_BUILTIN_CPU_IS) +- { +- tree ref; +- tree field; +- tree final; +- +- unsigned int field_val = 0; +- unsigned int NUM_ARCH_NAMES +- = sizeof (arch_names_table) / sizeof (struct _arch_names_table); +- +- for (i = 0; i < NUM_ARCH_NAMES; i++) +- if (strcmp (arch_names_table[i].name, +- TREE_STRING_POINTER (param_string_cst)) == 0) +- break; +- +- if (i == NUM_ARCH_NAMES) +- { +- error ("parameter to builtin not valid: %s", +- TREE_STRING_POINTER (param_string_cst)); +- return integer_zero_node; +- } +- +- field = TYPE_FIELDS (__processor_model_type); +- field_val = arch_names_table[i].model; +- +- /* CPU types are stored in the next field. */ +- if (field_val > M_CPU_TYPE_START +- && field_val < M_CPU_SUBTYPE_START) +- { +- field = DECL_CHAIN (field); +- field_val -= M_CPU_TYPE_START; +- } +- +- /* CPU subtypes are stored in the next field. */ +- if (field_val > M_CPU_SUBTYPE_START) +- { +- field = DECL_CHAIN ( DECL_CHAIN (field)); +- field_val -= M_CPU_SUBTYPE_START; +- } +- +- /* Get the appropriate field in __cpu_model. */ +- ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var, +- field, NULL_TREE); +- +- /* Check the value. */ +- final = build2 (EQ_EXPR, unsigned_type_node, ref, +- build_int_cstu (unsigned_type_node, field_val)); +- return build1 (CONVERT_EXPR, integer_type_node, final); +- } +- else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS) +- { +- tree ref; +- tree array_elt; +- tree field; +- tree final; +- +- unsigned int field_val = 0; +- unsigned int NUM_ISA_NAMES +- = sizeof (isa_names_table) / sizeof (struct _isa_names_table); +- +- for (i = 0; i < NUM_ISA_NAMES; i++) +- if (strcmp (isa_names_table[i].name, +- TREE_STRING_POINTER (param_string_cst)) == 0) +- break; +- +- if (i == NUM_ISA_NAMES) +- { +- error ("parameter to builtin not valid: %s", +- TREE_STRING_POINTER (param_string_cst)); +- return integer_zero_node; +- } +- +- if (isa_names_table[i].feature >= 32) +- { +- tree __cpu_features2_var = make_var_decl (unsigned_type_node, +- "__cpu_features2"); +- +- varpool_node::add (__cpu_features2_var); +- field_val = (1U << (isa_names_table[i].feature - 32)); +- /* Return __cpu_features2 & field_val */ +- final = build2 (BIT_AND_EXPR, unsigned_type_node, +- __cpu_features2_var, +- build_int_cstu (unsigned_type_node, field_val)); +- return build1 (CONVERT_EXPR, integer_type_node, final); +- } +- +- field = TYPE_FIELDS (__processor_model_type); +- /* Get the last field, which is __cpu_features. */ +- while (DECL_CHAIN (field)) +- field = DECL_CHAIN (field); +- +- /* Get the appropriate field: __cpu_model.__cpu_features */ +- ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var, +- field, NULL_TREE); +- +- /* Access the 0th element of __cpu_features array. */ +- array_elt = build4 (ARRAY_REF, unsigned_type_node, ref, +- integer_zero_node, NULL_TREE, NULL_TREE); +- +- field_val = (1U << isa_names_table[i].feature); +- /* Return __cpu_model.__cpu_features[0] & field_val */ +- final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt, +- build_int_cstu (unsigned_type_node, field_val)); +- return build1 (CONVERT_EXPR, integer_type_node, final); +- } +- gcc_unreachable (); +-} +- +-/* Return the shift count of a vector by scalar shift builtin second argument +- ARG1. */ +-static tree +-ix86_vector_shift_count (tree arg1) +-{ +- if (tree_fits_uhwi_p (arg1)) +- return arg1; +- else if (TREE_CODE (arg1) == VECTOR_CST && CHAR_BIT == 8) +- { +- /* The count argument is weird, passed in as various 128-bit +- (or 64-bit) vectors, the low 64 bits from it are the count. */ +- unsigned char buf[16]; +- int len = native_encode_expr (arg1, buf, 16); +- if (len == 0) +- return NULL_TREE; +- tree t = native_interpret_expr (uint64_type_node, buf, len); +- if (t && tree_fits_uhwi_p (t)) +- return t; +- } +- return NULL_TREE; +-} +- +-static tree +-ix86_fold_builtin (tree fndecl, int n_args, +- tree *args, bool ignore ATTRIBUTE_UNUSED) +-{ +- if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD) +- { +- enum ix86_builtins fn_code = (enum ix86_builtins) +- DECL_FUNCTION_CODE (fndecl); +- enum rtx_code rcode; +- bool is_vshift; +- unsigned HOST_WIDE_INT mask; +- +- switch (fn_code) +- { +- case IX86_BUILTIN_CPU_IS: +- case IX86_BUILTIN_CPU_SUPPORTS: +- gcc_assert (n_args == 1); +- return fold_builtin_cpu (fndecl, args); +- +- case IX86_BUILTIN_NANQ: +- case IX86_BUILTIN_NANSQ: +- { +- tree type = TREE_TYPE (TREE_TYPE (fndecl)); +- const char *str = c_getstr (*args); +- int quiet = fn_code == IX86_BUILTIN_NANQ; +- REAL_VALUE_TYPE real; +- +- if (str && real_nan (&real, str, quiet, TYPE_MODE (type))) +- return build_real (type, real); +- return NULL_TREE; +- } +- +- case IX86_BUILTIN_INFQ: +- case IX86_BUILTIN_HUGE_VALQ: +- { +- tree type = TREE_TYPE (TREE_TYPE (fndecl)); +- REAL_VALUE_TYPE inf; +- real_inf (&inf); +- return build_real (type, inf); +- } +- +- case IX86_BUILTIN_TZCNT16: +- case IX86_BUILTIN_CTZS: +- case IX86_BUILTIN_TZCNT32: +- case IX86_BUILTIN_TZCNT64: +- gcc_assert (n_args == 1); +- if (TREE_CODE (args[0]) == INTEGER_CST) +- { +- tree type = TREE_TYPE (TREE_TYPE (fndecl)); +- tree arg = args[0]; +- if (fn_code == IX86_BUILTIN_TZCNT16 +- || fn_code == IX86_BUILTIN_CTZS) +- arg = fold_convert (short_unsigned_type_node, arg); +- if (integer_zerop (arg)) +- return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg))); +- else +- return fold_const_call (CFN_CTZ, type, arg); +- } +- break; +- +- case IX86_BUILTIN_LZCNT16: +- case IX86_BUILTIN_CLZS: +- case IX86_BUILTIN_LZCNT32: +- case IX86_BUILTIN_LZCNT64: +- gcc_assert (n_args == 1); +- if (TREE_CODE (args[0]) == INTEGER_CST) +- { +- tree type = TREE_TYPE (TREE_TYPE (fndecl)); +- tree arg = args[0]; +- if (fn_code == IX86_BUILTIN_LZCNT16 +- || fn_code == IX86_BUILTIN_CLZS) +- arg = fold_convert (short_unsigned_type_node, arg); +- if (integer_zerop (arg)) +- return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg))); +- else +- return fold_const_call (CFN_CLZ, type, arg); +- } +- break; +- +- case IX86_BUILTIN_BEXTR32: +- case IX86_BUILTIN_BEXTR64: +- case IX86_BUILTIN_BEXTRI32: +- case IX86_BUILTIN_BEXTRI64: +- gcc_assert (n_args == 2); +- if (tree_fits_uhwi_p (args[1])) +- { +- unsigned HOST_WIDE_INT res = 0; +- unsigned int prec = TYPE_PRECISION (TREE_TYPE (args[0])); +- unsigned int start = tree_to_uhwi (args[1]); +- unsigned int len = (start & 0xff00) >> 8; +- start &= 0xff; +- if (start >= prec || len == 0) +- res = 0; +- else if (!tree_fits_uhwi_p (args[0])) +- break; +- else +- res = tree_to_uhwi (args[0]) >> start; +- if (len > prec) +- len = prec; +- if (len < HOST_BITS_PER_WIDE_INT) +- res &= (HOST_WIDE_INT_1U << len) - 1; +- return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res); +- } +- break; +- +- case IX86_BUILTIN_BZHI32: +- case IX86_BUILTIN_BZHI64: +- gcc_assert (n_args == 2); +- if (tree_fits_uhwi_p (args[1])) +- { +- unsigned int idx = tree_to_uhwi (args[1]) & 0xff; +- if (idx >= TYPE_PRECISION (TREE_TYPE (args[0]))) +- return args[0]; +- if (idx == 0) +- return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), 0); +- if (!tree_fits_uhwi_p (args[0])) +- break; +- unsigned HOST_WIDE_INT res = tree_to_uhwi (args[0]); +- res &= ~(HOST_WIDE_INT_M1U << idx); +- return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res); +- } +- break; +- +- case IX86_BUILTIN_PDEP32: +- case IX86_BUILTIN_PDEP64: +- gcc_assert (n_args == 2); +- if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1])) +- { +- unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]); +- unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]); +- unsigned HOST_WIDE_INT res = 0; +- unsigned HOST_WIDE_INT m, k = 1; +- for (m = 1; m; m <<= 1) +- if ((mask & m) != 0) +- { +- if ((src & k) != 0) +- res |= m; +- k <<= 1; +- } +- return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res); +- } +- break; +- +- case IX86_BUILTIN_PEXT32: +- case IX86_BUILTIN_PEXT64: +- gcc_assert (n_args == 2); +- if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1])) +- { +- unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]); +- unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]); +- unsigned HOST_WIDE_INT res = 0; +- unsigned HOST_WIDE_INT m, k = 1; +- for (m = 1; m; m <<= 1) +- if ((mask & m) != 0) +- { +- if ((src & m) != 0) +- res |= k; +- k <<= 1; +- } +- return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res); +- } +- break; +- +- case IX86_BUILTIN_MOVMSKPS: +- case IX86_BUILTIN_PMOVMSKB: +- case IX86_BUILTIN_MOVMSKPD: +- case IX86_BUILTIN_PMOVMSKB128: +- case IX86_BUILTIN_MOVMSKPD256: +- case IX86_BUILTIN_MOVMSKPS256: +- case IX86_BUILTIN_PMOVMSKB256: +- gcc_assert (n_args == 1); +- if (TREE_CODE (args[0]) == VECTOR_CST) +- { +- HOST_WIDE_INT res = 0; +- for (unsigned i = 0; i < VECTOR_CST_NELTS (args[0]); ++i) +- { +- tree e = VECTOR_CST_ELT (args[0], i); +- if (TREE_CODE (e) == INTEGER_CST && !TREE_OVERFLOW (e)) +- { +- if (wi::neg_p (wi::to_wide (e))) +- res |= HOST_WIDE_INT_1 << i; +- } +- else if (TREE_CODE (e) == REAL_CST && !TREE_OVERFLOW (e)) +- { +- if (TREE_REAL_CST (e).sign) +- res |= HOST_WIDE_INT_1 << i; +- } +- else +- return NULL_TREE; +- } +- return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), res); +- } +- break; +- +- case IX86_BUILTIN_PSLLD: +- case IX86_BUILTIN_PSLLD128: +- case IX86_BUILTIN_PSLLD128_MASK: +- case IX86_BUILTIN_PSLLD256: +- case IX86_BUILTIN_PSLLD256_MASK: +- case IX86_BUILTIN_PSLLD512: +- case IX86_BUILTIN_PSLLDI: +- case IX86_BUILTIN_PSLLDI128: +- case IX86_BUILTIN_PSLLDI128_MASK: +- case IX86_BUILTIN_PSLLDI256: +- case IX86_BUILTIN_PSLLDI256_MASK: +- case IX86_BUILTIN_PSLLDI512: +- case IX86_BUILTIN_PSLLQ: +- case IX86_BUILTIN_PSLLQ128: +- case IX86_BUILTIN_PSLLQ128_MASK: +- case IX86_BUILTIN_PSLLQ256: +- case IX86_BUILTIN_PSLLQ256_MASK: +- case IX86_BUILTIN_PSLLQ512: +- case IX86_BUILTIN_PSLLQI: +- case IX86_BUILTIN_PSLLQI128: +- case IX86_BUILTIN_PSLLQI128_MASK: +- case IX86_BUILTIN_PSLLQI256: +- case IX86_BUILTIN_PSLLQI256_MASK: +- case IX86_BUILTIN_PSLLQI512: +- case IX86_BUILTIN_PSLLW: +- case IX86_BUILTIN_PSLLW128: +- case IX86_BUILTIN_PSLLW128_MASK: +- case IX86_BUILTIN_PSLLW256: +- case IX86_BUILTIN_PSLLW256_MASK: +- case IX86_BUILTIN_PSLLW512_MASK: +- case IX86_BUILTIN_PSLLWI: +- case IX86_BUILTIN_PSLLWI128: +- case IX86_BUILTIN_PSLLWI128_MASK: +- case IX86_BUILTIN_PSLLWI256: +- case IX86_BUILTIN_PSLLWI256_MASK: +- case IX86_BUILTIN_PSLLWI512_MASK: +- rcode = ASHIFT; +- is_vshift = false; +- goto do_shift; +- case IX86_BUILTIN_PSRAD: +- case IX86_BUILTIN_PSRAD128: +- case IX86_BUILTIN_PSRAD128_MASK: +- case IX86_BUILTIN_PSRAD256: +- case IX86_BUILTIN_PSRAD256_MASK: +- case IX86_BUILTIN_PSRAD512: +- case IX86_BUILTIN_PSRADI: +- case IX86_BUILTIN_PSRADI128: +- case IX86_BUILTIN_PSRADI128_MASK: +- case IX86_BUILTIN_PSRADI256: +- case IX86_BUILTIN_PSRADI256_MASK: +- case IX86_BUILTIN_PSRADI512: +- case IX86_BUILTIN_PSRAQ128_MASK: +- case IX86_BUILTIN_PSRAQ256_MASK: +- case IX86_BUILTIN_PSRAQ512: +- case IX86_BUILTIN_PSRAQI128_MASK: +- case IX86_BUILTIN_PSRAQI256_MASK: +- case IX86_BUILTIN_PSRAQI512: +- case IX86_BUILTIN_PSRAW: +- case IX86_BUILTIN_PSRAW128: +- case IX86_BUILTIN_PSRAW128_MASK: +- case IX86_BUILTIN_PSRAW256: +- case IX86_BUILTIN_PSRAW256_MASK: +- case IX86_BUILTIN_PSRAW512: +- case IX86_BUILTIN_PSRAWI: +- case IX86_BUILTIN_PSRAWI128: +- case IX86_BUILTIN_PSRAWI128_MASK: +- case IX86_BUILTIN_PSRAWI256: +- case IX86_BUILTIN_PSRAWI256_MASK: +- case IX86_BUILTIN_PSRAWI512: +- rcode = ASHIFTRT; +- is_vshift = false; +- goto do_shift; +- case IX86_BUILTIN_PSRLD: +- case IX86_BUILTIN_PSRLD128: +- case IX86_BUILTIN_PSRLD128_MASK: +- case IX86_BUILTIN_PSRLD256: +- case IX86_BUILTIN_PSRLD256_MASK: +- case IX86_BUILTIN_PSRLD512: +- case IX86_BUILTIN_PSRLDI: +- case IX86_BUILTIN_PSRLDI128: +- case IX86_BUILTIN_PSRLDI128_MASK: +- case IX86_BUILTIN_PSRLDI256: +- case IX86_BUILTIN_PSRLDI256_MASK: +- case IX86_BUILTIN_PSRLDI512: +- case IX86_BUILTIN_PSRLQ: +- case IX86_BUILTIN_PSRLQ128: +- case IX86_BUILTIN_PSRLQ128_MASK: +- case IX86_BUILTIN_PSRLQ256: +- case IX86_BUILTIN_PSRLQ256_MASK: +- case IX86_BUILTIN_PSRLQ512: +- case IX86_BUILTIN_PSRLQI: +- case IX86_BUILTIN_PSRLQI128: +- case IX86_BUILTIN_PSRLQI128_MASK: +- case IX86_BUILTIN_PSRLQI256: +- case IX86_BUILTIN_PSRLQI256_MASK: +- case IX86_BUILTIN_PSRLQI512: +- case IX86_BUILTIN_PSRLW: +- case IX86_BUILTIN_PSRLW128: +- case IX86_BUILTIN_PSRLW128_MASK: +- case IX86_BUILTIN_PSRLW256: +- case IX86_BUILTIN_PSRLW256_MASK: +- case IX86_BUILTIN_PSRLW512: +- case IX86_BUILTIN_PSRLWI: +- case IX86_BUILTIN_PSRLWI128: +- case IX86_BUILTIN_PSRLWI128_MASK: +- case IX86_BUILTIN_PSRLWI256: +- case IX86_BUILTIN_PSRLWI256_MASK: +- case IX86_BUILTIN_PSRLWI512: +- rcode = LSHIFTRT; +- is_vshift = false; +- goto do_shift; +- case IX86_BUILTIN_PSLLVV16HI: +- case IX86_BUILTIN_PSLLVV16SI: +- case IX86_BUILTIN_PSLLVV2DI: +- case IX86_BUILTIN_PSLLVV2DI_MASK: +- case IX86_BUILTIN_PSLLVV32HI: +- case IX86_BUILTIN_PSLLVV4DI: +- case IX86_BUILTIN_PSLLVV4DI_MASK: +- case IX86_BUILTIN_PSLLVV4SI: +- case IX86_BUILTIN_PSLLVV4SI_MASK: +- case IX86_BUILTIN_PSLLVV8DI: +- case IX86_BUILTIN_PSLLVV8HI: +- case IX86_BUILTIN_PSLLVV8SI: +- case IX86_BUILTIN_PSLLVV8SI_MASK: +- rcode = ASHIFT; +- is_vshift = true; +- goto do_shift; +- case IX86_BUILTIN_PSRAVQ128: +- case IX86_BUILTIN_PSRAVQ256: +- case IX86_BUILTIN_PSRAVV16HI: +- case IX86_BUILTIN_PSRAVV16SI: +- case IX86_BUILTIN_PSRAVV32HI: +- case IX86_BUILTIN_PSRAVV4SI: +- case IX86_BUILTIN_PSRAVV4SI_MASK: +- case IX86_BUILTIN_PSRAVV8DI: +- case IX86_BUILTIN_PSRAVV8HI: +- case IX86_BUILTIN_PSRAVV8SI: +- case IX86_BUILTIN_PSRAVV8SI_MASK: +- rcode = ASHIFTRT; +- is_vshift = true; +- goto do_shift; +- case IX86_BUILTIN_PSRLVV16HI: +- case IX86_BUILTIN_PSRLVV16SI: +- case IX86_BUILTIN_PSRLVV2DI: +- case IX86_BUILTIN_PSRLVV2DI_MASK: +- case IX86_BUILTIN_PSRLVV32HI: +- case IX86_BUILTIN_PSRLVV4DI: +- case IX86_BUILTIN_PSRLVV4DI_MASK: +- case IX86_BUILTIN_PSRLVV4SI: +- case IX86_BUILTIN_PSRLVV4SI_MASK: +- case IX86_BUILTIN_PSRLVV8DI: +- case IX86_BUILTIN_PSRLVV8HI: +- case IX86_BUILTIN_PSRLVV8SI: +- case IX86_BUILTIN_PSRLVV8SI_MASK: +- rcode = LSHIFTRT; +- is_vshift = true; +- goto do_shift; +- +- do_shift: +- gcc_assert (n_args >= 2); +- if (TREE_CODE (args[0]) != VECTOR_CST) +- break; +- mask = HOST_WIDE_INT_M1U; +- if (n_args > 2) +- { +- /* This is masked shift. */ +- if (!tree_fits_uhwi_p (args[n_args - 1]) +- || TREE_SIDE_EFFECTS (args[n_args - 2])) +- break; +- mask = tree_to_uhwi (args[n_args - 1]); +- unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0])); +- mask |= HOST_WIDE_INT_M1U << elems; +- if (mask != HOST_WIDE_INT_M1U +- && TREE_CODE (args[n_args - 2]) != VECTOR_CST) +- break; +- if (mask == (HOST_WIDE_INT_M1U << elems)) +- return args[n_args - 2]; +- } +- if (is_vshift && TREE_CODE (args[1]) != VECTOR_CST) +- break; +- if (tree tem = (is_vshift ? integer_one_node +- : ix86_vector_shift_count (args[1]))) +- { +- unsigned HOST_WIDE_INT count = tree_to_uhwi (tem); +- unsigned HOST_WIDE_INT prec +- = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (args[0]))); +- if (count == 0 && mask == HOST_WIDE_INT_M1U) +- return args[0]; +- if (count >= prec) +- { +- if (rcode == ASHIFTRT) +- count = prec - 1; +- else if (mask == HOST_WIDE_INT_M1U) +- return build_zero_cst (TREE_TYPE (args[0])); +- } +- tree countt = NULL_TREE; +- if (!is_vshift) +- { +- if (count >= prec) +- countt = integer_zero_node; +- else +- countt = build_int_cst (integer_type_node, count); +- } +- tree_vector_builder builder; +- if (mask != HOST_WIDE_INT_M1U || is_vshift) +- builder.new_vector (TREE_TYPE (args[0]), +- TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0])), +- 1); +- else +- builder.new_unary_operation (TREE_TYPE (args[0]), args[0], +- false); +- unsigned int cnt = builder.encoded_nelts (); +- for (unsigned int i = 0; i < cnt; ++i) +- { +- tree elt = VECTOR_CST_ELT (args[0], i); +- if (TREE_CODE (elt) != INTEGER_CST || TREE_OVERFLOW (elt)) +- return NULL_TREE; +- tree type = TREE_TYPE (elt); +- if (rcode == LSHIFTRT) +- elt = fold_convert (unsigned_type_for (type), elt); +- if (is_vshift) +- { +- countt = VECTOR_CST_ELT (args[1], i); +- if (TREE_CODE (countt) != INTEGER_CST +- || TREE_OVERFLOW (countt)) +- return NULL_TREE; +- if (wi::neg_p (wi::to_wide (countt)) +- || wi::to_widest (countt) >= prec) +- { +- if (rcode == ASHIFTRT) +- countt = build_int_cst (TREE_TYPE (countt), +- prec - 1); +- else +- { +- elt = build_zero_cst (TREE_TYPE (elt)); +- countt = build_zero_cst (TREE_TYPE (countt)); +- } +- } +- } +- else if (count >= prec) +- elt = build_zero_cst (TREE_TYPE (elt)); +- elt = const_binop (rcode == ASHIFT +- ? LSHIFT_EXPR : RSHIFT_EXPR, +- TREE_TYPE (elt), elt, countt); +- if (!elt || TREE_CODE (elt) != INTEGER_CST) +- return NULL_TREE; +- if (rcode == LSHIFTRT) +- elt = fold_convert (type, elt); +- if ((mask & (HOST_WIDE_INT_1U << i)) == 0) +- { +- elt = VECTOR_CST_ELT (args[n_args - 2], i); +- if (TREE_CODE (elt) != INTEGER_CST +- || TREE_OVERFLOW (elt)) +- return NULL_TREE; +- } +- builder.quick_push (elt); +- } +- return builder.build (); +- } +- break; +- +- default: +- break; +- } +- } +- +-#ifdef SUBTARGET_FOLD_BUILTIN +- return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore); +-#endif +- +- return NULL_TREE; +-} +- +-/* Fold a MD builtin (use ix86_fold_builtin for folding into +- constant) in GIMPLE. */ +- +-bool +-ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) +-{ +- gimple *stmt = gsi_stmt (*gsi); +- tree fndecl = gimple_call_fndecl (stmt); +- gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD)); +- int n_args = gimple_call_num_args (stmt); +- enum ix86_builtins fn_code = (enum ix86_builtins) DECL_FUNCTION_CODE (fndecl); +- tree decl = NULL_TREE; +- tree arg0, arg1; +- enum rtx_code rcode; +- unsigned HOST_WIDE_INT count; +- bool is_vshift; +- +- switch (fn_code) +- { +- case IX86_BUILTIN_TZCNT32: +- decl = builtin_decl_implicit (BUILT_IN_CTZ); +- goto fold_tzcnt_lzcnt; +- +- case IX86_BUILTIN_TZCNT64: +- decl = builtin_decl_implicit (BUILT_IN_CTZLL); +- goto fold_tzcnt_lzcnt; +- +- case IX86_BUILTIN_LZCNT32: +- decl = builtin_decl_implicit (BUILT_IN_CLZ); +- goto fold_tzcnt_lzcnt; +- +- case IX86_BUILTIN_LZCNT64: +- decl = builtin_decl_implicit (BUILT_IN_CLZLL); +- goto fold_tzcnt_lzcnt; +- +- fold_tzcnt_lzcnt: +- gcc_assert (n_args == 1); +- arg0 = gimple_call_arg (stmt, 0); +- if (TREE_CODE (arg0) == SSA_NAME && decl && gimple_call_lhs (stmt)) +- { +- int prec = TYPE_PRECISION (TREE_TYPE (arg0)); +- /* If arg0 is provably non-zero, optimize into generic +- __builtin_c[tl]z{,ll} function the middle-end handles +- better. */ +- if (!expr_not_equal_to (arg0, wi::zero (prec))) +- return false; +- +- location_t loc = gimple_location (stmt); +- gimple *g = gimple_build_call (decl, 1, arg0); +- gimple_set_location (g, loc); +- tree lhs = make_ssa_name (integer_type_node); +- gimple_call_set_lhs (g, lhs); +- gsi_insert_before (gsi, g, GSI_SAME_STMT); +- g = gimple_build_assign (gimple_call_lhs (stmt), NOP_EXPR, lhs); +- gimple_set_location (g, loc); +- gsi_replace (gsi, g, false); +- return true; +- } +- break; +- +- case IX86_BUILTIN_BZHI32: +- case IX86_BUILTIN_BZHI64: +- gcc_assert (n_args == 2); +- arg1 = gimple_call_arg (stmt, 1); +- if (tree_fits_uhwi_p (arg1) && gimple_call_lhs (stmt)) +- { +- unsigned int idx = tree_to_uhwi (arg1) & 0xff; +- arg0 = gimple_call_arg (stmt, 0); +- if (idx < TYPE_PRECISION (TREE_TYPE (arg0))) +- break; +- location_t loc = gimple_location (stmt); +- gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0); +- gimple_set_location (g, loc); +- gsi_replace (gsi, g, false); +- return true; +- } +- break; +- +- case IX86_BUILTIN_PDEP32: +- case IX86_BUILTIN_PDEP64: +- case IX86_BUILTIN_PEXT32: +- case IX86_BUILTIN_PEXT64: +- gcc_assert (n_args == 2); +- arg1 = gimple_call_arg (stmt, 1); +- if (integer_all_onesp (arg1) && gimple_call_lhs (stmt)) +- { +- location_t loc = gimple_location (stmt); +- arg0 = gimple_call_arg (stmt, 0); +- gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0); +- gimple_set_location (g, loc); +- gsi_replace (gsi, g, false); +- return true; +- } +- break; +- +- case IX86_BUILTIN_PSLLD: +- case IX86_BUILTIN_PSLLD128: +- case IX86_BUILTIN_PSLLD128_MASK: +- case IX86_BUILTIN_PSLLD256: +- case IX86_BUILTIN_PSLLD256_MASK: +- case IX86_BUILTIN_PSLLD512: +- case IX86_BUILTIN_PSLLDI: +- case IX86_BUILTIN_PSLLDI128: +- case IX86_BUILTIN_PSLLDI128_MASK: +- case IX86_BUILTIN_PSLLDI256: +- case IX86_BUILTIN_PSLLDI256_MASK: +- case IX86_BUILTIN_PSLLDI512: +- case IX86_BUILTIN_PSLLQ: +- case IX86_BUILTIN_PSLLQ128: +- case IX86_BUILTIN_PSLLQ128_MASK: +- case IX86_BUILTIN_PSLLQ256: +- case IX86_BUILTIN_PSLLQ256_MASK: +- case IX86_BUILTIN_PSLLQ512: +- case IX86_BUILTIN_PSLLQI: +- case IX86_BUILTIN_PSLLQI128: +- case IX86_BUILTIN_PSLLQI128_MASK: +- case IX86_BUILTIN_PSLLQI256: +- case IX86_BUILTIN_PSLLQI256_MASK: +- case IX86_BUILTIN_PSLLQI512: +- case IX86_BUILTIN_PSLLW: +- case IX86_BUILTIN_PSLLW128: +- case IX86_BUILTIN_PSLLW128_MASK: +- case IX86_BUILTIN_PSLLW256: +- case IX86_BUILTIN_PSLLW256_MASK: +- case IX86_BUILTIN_PSLLW512_MASK: +- case IX86_BUILTIN_PSLLWI: +- case IX86_BUILTIN_PSLLWI128: +- case IX86_BUILTIN_PSLLWI128_MASK: +- case IX86_BUILTIN_PSLLWI256: +- case IX86_BUILTIN_PSLLWI256_MASK: +- case IX86_BUILTIN_PSLLWI512_MASK: +- rcode = ASHIFT; +- is_vshift = false; +- goto do_shift; +- case IX86_BUILTIN_PSRAD: +- case IX86_BUILTIN_PSRAD128: +- case IX86_BUILTIN_PSRAD128_MASK: +- case IX86_BUILTIN_PSRAD256: +- case IX86_BUILTIN_PSRAD256_MASK: +- case IX86_BUILTIN_PSRAD512: +- case IX86_BUILTIN_PSRADI: +- case IX86_BUILTIN_PSRADI128: +- case IX86_BUILTIN_PSRADI128_MASK: +- case IX86_BUILTIN_PSRADI256: +- case IX86_BUILTIN_PSRADI256_MASK: +- case IX86_BUILTIN_PSRADI512: +- case IX86_BUILTIN_PSRAQ128_MASK: +- case IX86_BUILTIN_PSRAQ256_MASK: +- case IX86_BUILTIN_PSRAQ512: +- case IX86_BUILTIN_PSRAQI128_MASK: +- case IX86_BUILTIN_PSRAQI256_MASK: +- case IX86_BUILTIN_PSRAQI512: +- case IX86_BUILTIN_PSRAW: +- case IX86_BUILTIN_PSRAW128: +- case IX86_BUILTIN_PSRAW128_MASK: +- case IX86_BUILTIN_PSRAW256: +- case IX86_BUILTIN_PSRAW256_MASK: +- case IX86_BUILTIN_PSRAW512: +- case IX86_BUILTIN_PSRAWI: +- case IX86_BUILTIN_PSRAWI128: +- case IX86_BUILTIN_PSRAWI128_MASK: +- case IX86_BUILTIN_PSRAWI256: +- case IX86_BUILTIN_PSRAWI256_MASK: +- case IX86_BUILTIN_PSRAWI512: +- rcode = ASHIFTRT; +- is_vshift = false; +- goto do_shift; +- case IX86_BUILTIN_PSRLD: +- case IX86_BUILTIN_PSRLD128: +- case IX86_BUILTIN_PSRLD128_MASK: +- case IX86_BUILTIN_PSRLD256: +- case IX86_BUILTIN_PSRLD256_MASK: +- case IX86_BUILTIN_PSRLD512: +- case IX86_BUILTIN_PSRLDI: +- case IX86_BUILTIN_PSRLDI128: +- case IX86_BUILTIN_PSRLDI128_MASK: +- case IX86_BUILTIN_PSRLDI256: +- case IX86_BUILTIN_PSRLDI256_MASK: +- case IX86_BUILTIN_PSRLDI512: +- case IX86_BUILTIN_PSRLQ: +- case IX86_BUILTIN_PSRLQ128: +- case IX86_BUILTIN_PSRLQ128_MASK: +- case IX86_BUILTIN_PSRLQ256: +- case IX86_BUILTIN_PSRLQ256_MASK: +- case IX86_BUILTIN_PSRLQ512: +- case IX86_BUILTIN_PSRLQI: +- case IX86_BUILTIN_PSRLQI128: +- case IX86_BUILTIN_PSRLQI128_MASK: +- case IX86_BUILTIN_PSRLQI256: +- case IX86_BUILTIN_PSRLQI256_MASK: +- case IX86_BUILTIN_PSRLQI512: +- case IX86_BUILTIN_PSRLW: +- case IX86_BUILTIN_PSRLW128: +- case IX86_BUILTIN_PSRLW128_MASK: +- case IX86_BUILTIN_PSRLW256: +- case IX86_BUILTIN_PSRLW256_MASK: +- case IX86_BUILTIN_PSRLW512: +- case IX86_BUILTIN_PSRLWI: +- case IX86_BUILTIN_PSRLWI128: +- case IX86_BUILTIN_PSRLWI128_MASK: +- case IX86_BUILTIN_PSRLWI256: +- case IX86_BUILTIN_PSRLWI256_MASK: +- case IX86_BUILTIN_PSRLWI512: +- rcode = LSHIFTRT; +- is_vshift = false; +- goto do_shift; +- case IX86_BUILTIN_PSLLVV16HI: +- case IX86_BUILTIN_PSLLVV16SI: +- case IX86_BUILTIN_PSLLVV2DI: +- case IX86_BUILTIN_PSLLVV2DI_MASK: +- case IX86_BUILTIN_PSLLVV32HI: +- case IX86_BUILTIN_PSLLVV4DI: +- case IX86_BUILTIN_PSLLVV4DI_MASK: +- case IX86_BUILTIN_PSLLVV4SI: +- case IX86_BUILTIN_PSLLVV4SI_MASK: +- case IX86_BUILTIN_PSLLVV8DI: +- case IX86_BUILTIN_PSLLVV8HI: +- case IX86_BUILTIN_PSLLVV8SI: +- case IX86_BUILTIN_PSLLVV8SI_MASK: +- rcode = ASHIFT; +- is_vshift = true; +- goto do_shift; +- case IX86_BUILTIN_PSRAVQ128: +- case IX86_BUILTIN_PSRAVQ256: +- case IX86_BUILTIN_PSRAVV16HI: +- case IX86_BUILTIN_PSRAVV16SI: +- case IX86_BUILTIN_PSRAVV32HI: +- case IX86_BUILTIN_PSRAVV4SI: +- case IX86_BUILTIN_PSRAVV4SI_MASK: +- case IX86_BUILTIN_PSRAVV8DI: +- case IX86_BUILTIN_PSRAVV8HI: +- case IX86_BUILTIN_PSRAVV8SI: +- case IX86_BUILTIN_PSRAVV8SI_MASK: +- rcode = ASHIFTRT; +- is_vshift = true; +- goto do_shift; +- case IX86_BUILTIN_PSRLVV16HI: +- case IX86_BUILTIN_PSRLVV16SI: +- case IX86_BUILTIN_PSRLVV2DI: +- case IX86_BUILTIN_PSRLVV2DI_MASK: +- case IX86_BUILTIN_PSRLVV32HI: +- case IX86_BUILTIN_PSRLVV4DI: +- case IX86_BUILTIN_PSRLVV4DI_MASK: +- case IX86_BUILTIN_PSRLVV4SI: +- case IX86_BUILTIN_PSRLVV4SI_MASK: +- case IX86_BUILTIN_PSRLVV8DI: +- case IX86_BUILTIN_PSRLVV8HI: +- case IX86_BUILTIN_PSRLVV8SI: +- case IX86_BUILTIN_PSRLVV8SI_MASK: +- rcode = LSHIFTRT; +- is_vshift = true; +- goto do_shift; +- +- do_shift: +- gcc_assert (n_args >= 2); +- arg0 = gimple_call_arg (stmt, 0); +- arg1 = gimple_call_arg (stmt, 1); +- if (n_args > 2) +- { +- /* This is masked shift. Only optimize if the mask is all ones. */ +- tree argl = gimple_call_arg (stmt, n_args - 1); +- if (!tree_fits_uhwi_p (argl)) +- break; +- unsigned HOST_WIDE_INT mask = tree_to_uhwi (argl); +- unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); +- if ((mask | (HOST_WIDE_INT_M1U << elems)) != HOST_WIDE_INT_M1U) +- break; +- } +- if (is_vshift) +- { +- if (TREE_CODE (arg1) != VECTOR_CST) +- break; +- count = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0))); +- if (integer_zerop (arg1)) +- count = 0; +- else if (rcode == ASHIFTRT) +- break; +- else +- for (unsigned int i = 0; i < VECTOR_CST_NELTS (arg1); ++i) +- { +- tree elt = VECTOR_CST_ELT (arg1, i); +- if (!wi::neg_p (wi::to_wide (elt)) +- && wi::to_widest (elt) < count) +- return false; +- } +- } +- else +- { +- arg1 = ix86_vector_shift_count (arg1); +- if (!arg1) +- break; +- count = tree_to_uhwi (arg1); +- } +- if (count == 0) +- { +- /* Just return the first argument for shift by 0. */ +- location_t loc = gimple_location (stmt); +- gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0); +- gimple_set_location (g, loc); +- gsi_replace (gsi, g, false); +- return true; +- } +- if (rcode != ASHIFTRT +- && count >= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0)))) +- { +- /* For shift counts equal or greater than precision, except for +- arithmetic right shift the result is zero. */ +- location_t loc = gimple_location (stmt); +- gimple *g = gimple_build_assign (gimple_call_lhs (stmt), +- build_zero_cst (TREE_TYPE (arg0))); +- gimple_set_location (g, loc); +- gsi_replace (gsi, g, false); +- return true; +- } +- break; +- +- default: +- break; +- } +- +- return false; +-} +- +-/* Make builtins to detect cpu type and features supported. NAME is +- the builtin name, CODE is the builtin code, and FTYPE is the function +- type of the builtin. */ +- +-static void +-make_cpu_type_builtin (const char* name, int code, +- enum ix86_builtin_func_type ftype, bool is_const) +-{ +- tree decl; +- tree type; +- +- type = ix86_get_builtin_func_type (ftype); +- decl = add_builtin_function (name, type, code, BUILT_IN_MD, +- NULL, NULL_TREE); +- gcc_assert (decl != NULL_TREE); +- ix86_builtins[(int) code] = decl; +- TREE_READONLY (decl) = is_const; +-} +- +-/* Make builtins to get CPU type and features supported. The created +- builtins are : +- +- __builtin_cpu_init (), to detect cpu type and features, +- __builtin_cpu_is (""), to check if cpu is of type , +- __builtin_cpu_supports (""), to check if cpu supports +- */ +- +-static void +-ix86_init_platform_type_builtins (void) +-{ +- make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT, +- INT_FTYPE_VOID, false); +- make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS, +- INT_FTYPE_PCCHAR, true); +- make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS, +- INT_FTYPE_PCCHAR, true); +-} +- +-/* Internal method for ix86_init_builtins. */ +- +-static void +-ix86_init_builtins_va_builtins_abi (void) +-{ +- tree ms_va_ref, sysv_va_ref; +- tree fnvoid_va_end_ms, fnvoid_va_end_sysv; +- tree fnvoid_va_start_ms, fnvoid_va_start_sysv; +- tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv; +- tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE; +- +- if (!TARGET_64BIT) +- return; +- fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE); +- fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE); +- ms_va_ref = build_reference_type (ms_va_list_type_node); +- sysv_va_ref = build_pointer_type (TREE_TYPE (sysv_va_list_type_node)); +- +- fnvoid_va_end_ms = build_function_type_list (void_type_node, ms_va_ref, +- NULL_TREE); +- fnvoid_va_start_ms +- = build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE); +- fnvoid_va_end_sysv +- = build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE); +- fnvoid_va_start_sysv +- = build_varargs_function_type_list (void_type_node, sysv_va_ref, +- NULL_TREE); +- fnvoid_va_copy_ms +- = build_function_type_list (void_type_node, ms_va_ref, +- ms_va_list_type_node, NULL_TREE); +- fnvoid_va_copy_sysv +- = build_function_type_list (void_type_node, sysv_va_ref, +- sysv_va_ref, NULL_TREE); +- +- add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms, +- BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms); +- add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms, +- BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms); +- add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms, +- BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms); +- add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv, +- BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv); +- add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv, +- BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv); +- add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv, +- BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv); +-} +- +-static void +-ix86_init_builtin_types (void) +-{ +- tree float80_type_node, const_string_type_node; +- +- /* The __float80 type. */ +- float80_type_node = long_double_type_node; +- if (TYPE_MODE (float80_type_node) != XFmode) +- { +- if (float64x_type_node != NULL_TREE +- && TYPE_MODE (float64x_type_node) == XFmode) +- float80_type_node = float64x_type_node; +- else +- { +- /* The __float80 type. */ +- float80_type_node = make_node (REAL_TYPE); +- +- TYPE_PRECISION (float80_type_node) = 80; +- layout_type (float80_type_node); +- } +- } +- lang_hooks.types.register_builtin_type (float80_type_node, "__float80"); +- +- /* The __float128 type. The node has already been created as +- _Float128, so we only need to register the __float128 name for +- it. */ +- lang_hooks.types.register_builtin_type (float128_type_node, "__float128"); +- +- const_string_type_node +- = build_pointer_type (build_qualified_type +- (char_type_node, TYPE_QUAL_CONST)); +- +- /* This macro is built by i386-builtin-types.awk. */ +- DEFINE_BUILTIN_PRIMITIVE_TYPES; +-} +- +-static void +-ix86_init_builtins (void) +-{ +- tree ftype, decl; +- +- ix86_init_builtin_types (); +- +- /* Builtins to get CPU type and features. */ +- ix86_init_platform_type_builtins (); +- +- /* TFmode support builtins. */ +- def_builtin_const (0, 0, "__builtin_infq", +- FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ); +- def_builtin_const (0, 0, "__builtin_huge_valq", +- FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ); +- +- ftype = ix86_get_builtin_func_type (FLOAT128_FTYPE_CONST_STRING); +- decl = add_builtin_function ("__builtin_nanq", ftype, IX86_BUILTIN_NANQ, +- BUILT_IN_MD, "nanq", NULL_TREE); +- TREE_READONLY (decl) = 1; +- ix86_builtins[(int) IX86_BUILTIN_NANQ] = decl; +- +- decl = add_builtin_function ("__builtin_nansq", ftype, IX86_BUILTIN_NANSQ, +- BUILT_IN_MD, "nansq", NULL_TREE); +- TREE_READONLY (decl) = 1; +- ix86_builtins[(int) IX86_BUILTIN_NANSQ] = decl; +- +- /* We will expand them to normal call if SSE isn't available since +- they are used by libgcc. */ +- ftype = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128); +- decl = add_builtin_function ("__builtin_fabsq", ftype, IX86_BUILTIN_FABSQ, +- BUILT_IN_MD, "__fabstf2", NULL_TREE); +- TREE_READONLY (decl) = 1; +- ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl; +- +- ftype = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128); +- decl = add_builtin_function ("__builtin_copysignq", ftype, +- IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD, +- "__copysigntf3", NULL_TREE); +- TREE_READONLY (decl) = 1; +- ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl; +- +- ix86_init_tm_builtins (); +- ix86_init_mmx_sse_builtins (); +- +- if (TARGET_LP64) +- ix86_init_builtins_va_builtins_abi (); +- +-#ifdef SUBTARGET_INIT_BUILTINS +- SUBTARGET_INIT_BUILTINS; +-#endif +-} +- +-/* Return the ix86 builtin for CODE. */ +- +-static tree +-ix86_builtin_decl (unsigned code, bool) +-{ +- if (code >= IX86_BUILTIN_MAX) +- return error_mark_node; +- +- return ix86_builtins[code]; +-} +- +-/* Errors in the source file can cause expand_expr to return const0_rtx +- where we expect a vector. To avoid crashing, use one of the vector +- clear instructions. */ +-static rtx +-safe_vector_operand (rtx x, machine_mode mode) +-{ +- if (x == const0_rtx) +- x = CONST0_RTX (mode); +- return x; +-} +- +-/* Fixup modeless constants to fit required mode. */ +-static rtx +-fixup_modeless_constant (rtx x, machine_mode mode) +-{ +- if (GET_MODE (x) == VOIDmode) +- x = convert_to_mode (mode, x, 1); +- return x; +-} +- +-/* Subroutine of ix86_expand_builtin to take care of binop insns. */ +- +-static rtx +-ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target) +-{ +- rtx pat; +- tree arg0 = CALL_EXPR_ARG (exp, 0); +- tree arg1 = CALL_EXPR_ARG (exp, 1); +- rtx op0 = expand_normal (arg0); +- rtx op1 = expand_normal (arg1); +- machine_mode tmode = insn_data[icode].operand[0].mode; +- machine_mode mode0 = insn_data[icode].operand[1].mode; +- machine_mode mode1 = insn_data[icode].operand[2].mode; +- +- if (VECTOR_MODE_P (mode0)) +- op0 = safe_vector_operand (op0, mode0); +- if (VECTOR_MODE_P (mode1)) +- op1 = safe_vector_operand (op1, mode1); +- +- if (optimize || !target +- || GET_MODE (target) != tmode +- || !insn_data[icode].operand[0].predicate (target, tmode)) +- target = gen_reg_rtx (tmode); +- +- if (GET_MODE (op1) == SImode && mode1 == TImode) +- { +- rtx x = gen_reg_rtx (V4SImode); +- emit_insn (gen_sse2_loadd (x, op1)); +- op1 = gen_lowpart (TImode, x); +- } +- +- if (!insn_data[icode].operand[1].predicate (op0, mode0)) +- op0 = copy_to_mode_reg (mode0, op0); +- if (!insn_data[icode].operand[2].predicate (op1, mode1)) +- op1 = copy_to_mode_reg (mode1, op1); +- +- pat = GEN_FCN (icode) (target, op0, op1); +- if (! pat) +- return 0; +- +- emit_insn (pat); +- +- return target; +-} +- +-/* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */ +- +-static rtx +-ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target, +- enum ix86_builtin_func_type m_type, +- enum rtx_code sub_code) +-{ +- rtx pat; +- int i; +- int nargs; +- bool comparison_p = false; +- bool tf_p = false; +- bool last_arg_constant = false; +- int num_memory = 0; +- struct { +- rtx op; +- machine_mode mode; +- } args[4]; +- +- machine_mode tmode = insn_data[icode].operand[0].mode; +- +- switch (m_type) +- { +- case MULTI_ARG_4_DF2_DI_I: +- case MULTI_ARG_4_DF2_DI_I1: +- case MULTI_ARG_4_SF2_SI_I: +- case MULTI_ARG_4_SF2_SI_I1: +- nargs = 4; +- last_arg_constant = true; +- break; +- +- case MULTI_ARG_3_SF: +- case MULTI_ARG_3_DF: +- case MULTI_ARG_3_SF2: +- case MULTI_ARG_3_DF2: +- case MULTI_ARG_3_DI: +- case MULTI_ARG_3_SI: +- case MULTI_ARG_3_SI_DI: +- case MULTI_ARG_3_HI: +- case MULTI_ARG_3_HI_SI: +- case MULTI_ARG_3_QI: +- case MULTI_ARG_3_DI2: +- case MULTI_ARG_3_SI2: +- case MULTI_ARG_3_HI2: +- case MULTI_ARG_3_QI2: +- nargs = 3; +- break; +- +- case MULTI_ARG_2_SF: +- case MULTI_ARG_2_DF: +- case MULTI_ARG_2_DI: +- case MULTI_ARG_2_SI: +- case MULTI_ARG_2_HI: +- case MULTI_ARG_2_QI: +- nargs = 2; +- break; +- +- case MULTI_ARG_2_DI_IMM: +- case MULTI_ARG_2_SI_IMM: +- case MULTI_ARG_2_HI_IMM: +- case MULTI_ARG_2_QI_IMM: +- nargs = 2; +- last_arg_constant = true; +- break; +- +- case MULTI_ARG_1_SF: +- case MULTI_ARG_1_DF: +- case MULTI_ARG_1_SF2: +- case MULTI_ARG_1_DF2: +- case MULTI_ARG_1_DI: +- case MULTI_ARG_1_SI: +- case MULTI_ARG_1_HI: +- case MULTI_ARG_1_QI: +- case MULTI_ARG_1_SI_DI: +- case MULTI_ARG_1_HI_DI: +- case MULTI_ARG_1_HI_SI: +- case MULTI_ARG_1_QI_DI: +- case MULTI_ARG_1_QI_SI: +- case MULTI_ARG_1_QI_HI: +- nargs = 1; +- break; +- +- case MULTI_ARG_2_DI_CMP: +- case MULTI_ARG_2_SI_CMP: +- case MULTI_ARG_2_HI_CMP: +- case MULTI_ARG_2_QI_CMP: +- nargs = 2; +- comparison_p = true; +- break; +- +- case MULTI_ARG_2_SF_TF: +- case MULTI_ARG_2_DF_TF: +- case MULTI_ARG_2_DI_TF: +- case MULTI_ARG_2_SI_TF: +- case MULTI_ARG_2_HI_TF: +- case MULTI_ARG_2_QI_TF: +- nargs = 2; +- tf_p = true; +- break; +- +- default: +- gcc_unreachable (); +- } +- +- if (optimize || !target +- || GET_MODE (target) != tmode +- || !insn_data[icode].operand[0].predicate (target, tmode)) +- target = gen_reg_rtx (tmode); +- else if (memory_operand (target, tmode)) +- num_memory++; +- +- gcc_assert (nargs <= 4); +- +- for (i = 0; i < nargs; i++) +- { +- tree arg = CALL_EXPR_ARG (exp, i); +- rtx op = expand_normal (arg); +- int adjust = (comparison_p) ? 1 : 0; +- machine_mode mode = insn_data[icode].operand[i+adjust+1].mode; +- +- if (last_arg_constant && i == nargs - 1) +- { +- if (!insn_data[icode].operand[i + 1].predicate (op, mode)) +- { +- enum insn_code new_icode = icode; +- switch (icode) +- { +- case CODE_FOR_xop_vpermil2v2df3: +- case CODE_FOR_xop_vpermil2v4sf3: +- case CODE_FOR_xop_vpermil2v4df3: +- case CODE_FOR_xop_vpermil2v8sf3: +- error ("the last argument must be a 2-bit immediate"); +- return gen_reg_rtx (tmode); +- case CODE_FOR_xop_rotlv2di3: +- new_icode = CODE_FOR_rotlv2di3; +- goto xop_rotl; +- case CODE_FOR_xop_rotlv4si3: +- new_icode = CODE_FOR_rotlv4si3; +- goto xop_rotl; +- case CODE_FOR_xop_rotlv8hi3: +- new_icode = CODE_FOR_rotlv8hi3; +- goto xop_rotl; +- case CODE_FOR_xop_rotlv16qi3: +- new_icode = CODE_FOR_rotlv16qi3; +- xop_rotl: +- if (CONST_INT_P (op)) +- { +- int mask = GET_MODE_UNIT_BITSIZE (tmode) - 1; +- op = GEN_INT (INTVAL (op) & mask); +- gcc_checking_assert +- (insn_data[icode].operand[i + 1].predicate (op, mode)); +- } +- else +- { +- gcc_checking_assert +- (nargs == 2 +- && insn_data[new_icode].operand[0].mode == tmode +- && insn_data[new_icode].operand[1].mode == tmode +- && insn_data[new_icode].operand[2].mode == mode +- && insn_data[new_icode].operand[0].predicate +- == insn_data[icode].operand[0].predicate +- && insn_data[new_icode].operand[1].predicate +- == insn_data[icode].operand[1].predicate); +- icode = new_icode; +- goto non_constant; +- } +- break; +- default: +- gcc_unreachable (); +- } +- } +- } +- else +- { +- non_constant: +- if (VECTOR_MODE_P (mode)) +- op = safe_vector_operand (op, mode); +- +- /* If we aren't optimizing, only allow one memory operand to be +- generated. */ +- if (memory_operand (op, mode)) +- num_memory++; +- +- gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode); +- +- if (optimize +- || !insn_data[icode].operand[i+adjust+1].predicate (op, mode) +- || num_memory > 1) +- op = force_reg (mode, op); +- } +- +- args[i].op = op; +- args[i].mode = mode; +- } +- +- switch (nargs) +- { +- case 1: +- pat = GEN_FCN (icode) (target, args[0].op); +- break; +- +- case 2: +- if (tf_p) +- pat = GEN_FCN (icode) (target, args[0].op, args[1].op, +- GEN_INT ((int)sub_code)); +- else if (! comparison_p) +- pat = GEN_FCN (icode) (target, args[0].op, args[1].op); +- else +- { +- rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target), +- args[0].op, +- args[1].op); +- +- pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op); +- } +- break; +- +- case 3: +- pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op); +- break; +- +- case 4: +- pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op); +- break; +- +- default: +- gcc_unreachable (); +- } +- +- if (! pat) +- return 0; +- +- emit_insn (pat); +- return target; +-} +- +-/* Subroutine of ix86_expand_args_builtin to take care of scalar unop +- insns with vec_merge. */ +- +-static rtx +-ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp, +- rtx target) +-{ +- rtx pat; +- tree arg0 = CALL_EXPR_ARG (exp, 0); +- rtx op1, op0 = expand_normal (arg0); +- machine_mode tmode = insn_data[icode].operand[0].mode; +- machine_mode mode0 = insn_data[icode].operand[1].mode; +- +- if (optimize || !target +- || GET_MODE (target) != tmode +- || !insn_data[icode].operand[0].predicate (target, tmode)) +- target = gen_reg_rtx (tmode); +- +- if (VECTOR_MODE_P (mode0)) +- op0 = safe_vector_operand (op0, mode0); +- +- if ((optimize && !register_operand (op0, mode0)) +- || !insn_data[icode].operand[1].predicate (op0, mode0)) +- op0 = copy_to_mode_reg (mode0, op0); +- +- op1 = op0; +- if (!insn_data[icode].operand[2].predicate (op1, mode0)) +- op1 = copy_to_mode_reg (mode0, op1); +- +- pat = GEN_FCN (icode) (target, op0, op1); +- if (! pat) +- return 0; +- emit_insn (pat); +- return target; +-} +- +-/* Subroutine of ix86_expand_builtin to take care of comparison insns. */ +- +-static rtx +-ix86_expand_sse_compare (const struct builtin_description *d, +- tree exp, rtx target, bool swap) +-{ +- rtx pat; +- tree arg0 = CALL_EXPR_ARG (exp, 0); +- tree arg1 = CALL_EXPR_ARG (exp, 1); +- rtx op0 = expand_normal (arg0); +- rtx op1 = expand_normal (arg1); +- rtx op2; +- machine_mode tmode = insn_data[d->icode].operand[0].mode; +- machine_mode mode0 = insn_data[d->icode].operand[1].mode; +- machine_mode mode1 = insn_data[d->icode].operand[2].mode; +- enum rtx_code comparison = d->comparison; +- +- if (VECTOR_MODE_P (mode0)) +- op0 = safe_vector_operand (op0, mode0); +- if (VECTOR_MODE_P (mode1)) +- op1 = safe_vector_operand (op1, mode1); +- +- /* Swap operands if we have a comparison that isn't available in +- hardware. */ +- if (swap) +- std::swap (op0, op1); +- +- if (optimize || !target +- || GET_MODE (target) != tmode +- || !insn_data[d->icode].operand[0].predicate (target, tmode)) +- target = gen_reg_rtx (tmode); +- +- if ((optimize && !register_operand (op0, mode0)) +- || !insn_data[d->icode].operand[1].predicate (op0, mode0)) +- op0 = copy_to_mode_reg (mode0, op0); +- if ((optimize && !register_operand (op1, mode1)) +- || !insn_data[d->icode].operand[2].predicate (op1, mode1)) +- op1 = copy_to_mode_reg (mode1, op1); +- +- op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1); +- pat = GEN_FCN (d->icode) (target, op0, op1, op2); +- if (! pat) +- return 0; +- emit_insn (pat); +- return target; +-} +- +-/* Subroutine of ix86_expand_builtin to take care of comi insns. */ +- +-static rtx +-ix86_expand_sse_comi (const struct builtin_description *d, tree exp, +- rtx target) +-{ +- rtx pat; +- tree arg0 = CALL_EXPR_ARG (exp, 0); +- tree arg1 = CALL_EXPR_ARG (exp, 1); +- rtx op0 = expand_normal (arg0); +- rtx op1 = expand_normal (arg1); +- machine_mode mode0 = insn_data[d->icode].operand[0].mode; +- machine_mode mode1 = insn_data[d->icode].operand[1].mode; +- enum rtx_code comparison = d->comparison; +- +- if (VECTOR_MODE_P (mode0)) +- op0 = safe_vector_operand (op0, mode0); +- if (VECTOR_MODE_P (mode1)) +- op1 = safe_vector_operand (op1, mode1); +- +- /* Swap operands if we have a comparison that isn't available in +- hardware. */ +- if (d->flag & BUILTIN_DESC_SWAP_OPERANDS) +- std::swap (op0, op1); +- +- target = gen_reg_rtx (SImode); +- emit_move_insn (target, const0_rtx); +- target = gen_rtx_SUBREG (QImode, target, 0); +- +- if ((optimize && !register_operand (op0, mode0)) +- || !insn_data[d->icode].operand[0].predicate (op0, mode0)) +- op0 = copy_to_mode_reg (mode0, op0); +- if ((optimize && !register_operand (op1, mode1)) +- || !insn_data[d->icode].operand[1].predicate (op1, mode1)) +- op1 = copy_to_mode_reg (mode1, op1); +- +- pat = GEN_FCN (d->icode) (op0, op1); +- if (! pat) +- return 0; +- emit_insn (pat); +- emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target), +- gen_rtx_fmt_ee (comparison, QImode, +- SET_DEST (pat), +- const0_rtx))); +- +- return SUBREG_REG (target); +-} +- +-/* Subroutines of ix86_expand_args_builtin to take care of round insns. */ +- +-static rtx +-ix86_expand_sse_round (const struct builtin_description *d, tree exp, +- rtx target) +-{ +- rtx pat; +- tree arg0 = CALL_EXPR_ARG (exp, 0); +- rtx op1, op0 = expand_normal (arg0); +- machine_mode tmode = insn_data[d->icode].operand[0].mode; +- machine_mode mode0 = insn_data[d->icode].operand[1].mode; +- +- if (optimize || target == 0 +- || GET_MODE (target) != tmode +- || !insn_data[d->icode].operand[0].predicate (target, tmode)) +- target = gen_reg_rtx (tmode); +- +- if (VECTOR_MODE_P (mode0)) +- op0 = safe_vector_operand (op0, mode0); +- +- if ((optimize && !register_operand (op0, mode0)) +- || !insn_data[d->icode].operand[0].predicate (op0, mode0)) +- op0 = copy_to_mode_reg (mode0, op0); +- +- op1 = GEN_INT (d->comparison); +- +- pat = GEN_FCN (d->icode) (target, op0, op1); +- if (! pat) +- return 0; +- emit_insn (pat); +- return target; +-} +- +-static rtx +-ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d, +- tree exp, rtx target) +-{ +- rtx pat; +- tree arg0 = CALL_EXPR_ARG (exp, 0); +- tree arg1 = CALL_EXPR_ARG (exp, 1); +- rtx op0 = expand_normal (arg0); +- rtx op1 = expand_normal (arg1); +- rtx op2; +- machine_mode tmode = insn_data[d->icode].operand[0].mode; +- machine_mode mode0 = insn_data[d->icode].operand[1].mode; +- machine_mode mode1 = insn_data[d->icode].operand[2].mode; +- +- if (optimize || target == 0 +- || GET_MODE (target) != tmode +- || !insn_data[d->icode].operand[0].predicate (target, tmode)) +- target = gen_reg_rtx (tmode); +- +- op0 = safe_vector_operand (op0, mode0); +- op1 = safe_vector_operand (op1, mode1); +- +- if ((optimize && !register_operand (op0, mode0)) +- || !insn_data[d->icode].operand[0].predicate (op0, mode0)) +- op0 = copy_to_mode_reg (mode0, op0); +- if ((optimize && !register_operand (op1, mode1)) +- || !insn_data[d->icode].operand[1].predicate (op1, mode1)) +- op1 = copy_to_mode_reg (mode1, op1); +- +- op2 = GEN_INT (d->comparison); +- +- pat = GEN_FCN (d->icode) (target, op0, op1, op2); +- if (! pat) +- return 0; +- emit_insn (pat); +- return target; +-} +- +-/* Subroutine of ix86_expand_builtin to take care of ptest insns. */ +- +-static rtx +-ix86_expand_sse_ptest (const struct builtin_description *d, tree exp, +- rtx target) +-{ +- rtx pat; +- tree arg0 = CALL_EXPR_ARG (exp, 0); +- tree arg1 = CALL_EXPR_ARG (exp, 1); +- rtx op0 = expand_normal (arg0); +- rtx op1 = expand_normal (arg1); +- machine_mode mode0 = insn_data[d->icode].operand[0].mode; +- machine_mode mode1 = insn_data[d->icode].operand[1].mode; +- enum rtx_code comparison = d->comparison; +- +- if (VECTOR_MODE_P (mode0)) +- op0 = safe_vector_operand (op0, mode0); +- if (VECTOR_MODE_P (mode1)) +- op1 = safe_vector_operand (op1, mode1); +- +- target = gen_reg_rtx (SImode); +- emit_move_insn (target, const0_rtx); +- target = gen_rtx_SUBREG (QImode, target, 0); +- +- if ((optimize && !register_operand (op0, mode0)) +- || !insn_data[d->icode].operand[0].predicate (op0, mode0)) +- op0 = copy_to_mode_reg (mode0, op0); +- if ((optimize && !register_operand (op1, mode1)) +- || !insn_data[d->icode].operand[1].predicate (op1, mode1)) +- op1 = copy_to_mode_reg (mode1, op1); +- +- pat = GEN_FCN (d->icode) (op0, op1); +- if (! pat) +- return 0; +- emit_insn (pat); +- emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target), +- gen_rtx_fmt_ee (comparison, QImode, +- SET_DEST (pat), +- const0_rtx))); +- +- return SUBREG_REG (target); +-} +- +-/* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */ +- +-static rtx +-ix86_expand_sse_pcmpestr (const struct builtin_description *d, +- tree exp, rtx target) +-{ +- rtx pat; +- tree arg0 = CALL_EXPR_ARG (exp, 0); +- tree arg1 = CALL_EXPR_ARG (exp, 1); +- tree arg2 = CALL_EXPR_ARG (exp, 2); +- tree arg3 = CALL_EXPR_ARG (exp, 3); +- tree arg4 = CALL_EXPR_ARG (exp, 4); +- rtx scratch0, scratch1; +- rtx op0 = expand_normal (arg0); +- rtx op1 = expand_normal (arg1); +- rtx op2 = expand_normal (arg2); +- rtx op3 = expand_normal (arg3); +- rtx op4 = expand_normal (arg4); +- machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm; +- +- tmode0 = insn_data[d->icode].operand[0].mode; +- tmode1 = insn_data[d->icode].operand[1].mode; +- modev2 = insn_data[d->icode].operand[2].mode; +- modei3 = insn_data[d->icode].operand[3].mode; +- modev4 = insn_data[d->icode].operand[4].mode; +- modei5 = insn_data[d->icode].operand[5].mode; +- modeimm = insn_data[d->icode].operand[6].mode; +- +- if (VECTOR_MODE_P (modev2)) +- op0 = safe_vector_operand (op0, modev2); +- if (VECTOR_MODE_P (modev4)) +- op2 = safe_vector_operand (op2, modev4); +- +- if (!insn_data[d->icode].operand[2].predicate (op0, modev2)) +- op0 = copy_to_mode_reg (modev2, op0); +- if (!insn_data[d->icode].operand[3].predicate (op1, modei3)) +- op1 = copy_to_mode_reg (modei3, op1); +- if ((optimize && !register_operand (op2, modev4)) +- || !insn_data[d->icode].operand[4].predicate (op2, modev4)) +- op2 = copy_to_mode_reg (modev4, op2); +- if (!insn_data[d->icode].operand[5].predicate (op3, modei5)) +- op3 = copy_to_mode_reg (modei5, op3); +- +- if (!insn_data[d->icode].operand[6].predicate (op4, modeimm)) +- { +- error ("the fifth argument must be an 8-bit immediate"); +- return const0_rtx; +- } +- +- if (d->code == IX86_BUILTIN_PCMPESTRI128) +- { +- if (optimize || !target +- || GET_MODE (target) != tmode0 +- || !insn_data[d->icode].operand[0].predicate (target, tmode0)) +- target = gen_reg_rtx (tmode0); +- +- scratch1 = gen_reg_rtx (tmode1); +- +- pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4); +- } +- else if (d->code == IX86_BUILTIN_PCMPESTRM128) +- { +- if (optimize || !target +- || GET_MODE (target) != tmode1 +- || !insn_data[d->icode].operand[1].predicate (target, tmode1)) +- target = gen_reg_rtx (tmode1); +- +- scratch0 = gen_reg_rtx (tmode0); +- +- pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4); +- } +- else +- { +- gcc_assert (d->flag); +- +- scratch0 = gen_reg_rtx (tmode0); +- scratch1 = gen_reg_rtx (tmode1); +- +- pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4); +- } +- +- if (! pat) +- return 0; +- +- emit_insn (pat); +- +- if (d->flag) +- { +- target = gen_reg_rtx (SImode); +- emit_move_insn (target, const0_rtx); +- target = gen_rtx_SUBREG (QImode, target, 0); +- +- emit_insn +- (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target), +- gen_rtx_fmt_ee (EQ, QImode, +- gen_rtx_REG ((machine_mode) d->flag, +- FLAGS_REG), +- const0_rtx))); +- return SUBREG_REG (target); +- } +- else +- return target; +-} +- +- +-/* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */ +- +-static rtx +-ix86_expand_sse_pcmpistr (const struct builtin_description *d, +- tree exp, rtx target) +-{ +- rtx pat; +- tree arg0 = CALL_EXPR_ARG (exp, 0); +- tree arg1 = CALL_EXPR_ARG (exp, 1); +- tree arg2 = CALL_EXPR_ARG (exp, 2); +- rtx scratch0, scratch1; +- rtx op0 = expand_normal (arg0); +- rtx op1 = expand_normal (arg1); +- rtx op2 = expand_normal (arg2); +- machine_mode tmode0, tmode1, modev2, modev3, modeimm; +- +- tmode0 = insn_data[d->icode].operand[0].mode; +- tmode1 = insn_data[d->icode].operand[1].mode; +- modev2 = insn_data[d->icode].operand[2].mode; +- modev3 = insn_data[d->icode].operand[3].mode; +- modeimm = insn_data[d->icode].operand[4].mode; +- +- if (VECTOR_MODE_P (modev2)) +- op0 = safe_vector_operand (op0, modev2); +- if (VECTOR_MODE_P (modev3)) +- op1 = safe_vector_operand (op1, modev3); +- +- if (!insn_data[d->icode].operand[2].predicate (op0, modev2)) +- op0 = copy_to_mode_reg (modev2, op0); +- if ((optimize && !register_operand (op1, modev3)) +- || !insn_data[d->icode].operand[3].predicate (op1, modev3)) +- op1 = copy_to_mode_reg (modev3, op1); +- +- if (!insn_data[d->icode].operand[4].predicate (op2, modeimm)) +- { +- error ("the third argument must be an 8-bit immediate"); +- return const0_rtx; +- } +- +- if (d->code == IX86_BUILTIN_PCMPISTRI128) +- { +- if (optimize || !target +- || GET_MODE (target) != tmode0 +- || !insn_data[d->icode].operand[0].predicate (target, tmode0)) +- target = gen_reg_rtx (tmode0); +- +- scratch1 = gen_reg_rtx (tmode1); +- +- pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2); +- } +- else if (d->code == IX86_BUILTIN_PCMPISTRM128) +- { +- if (optimize || !target +- || GET_MODE (target) != tmode1 +- || !insn_data[d->icode].operand[1].predicate (target, tmode1)) +- target = gen_reg_rtx (tmode1); +- +- scratch0 = gen_reg_rtx (tmode0); +- +- pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2); +- } +- else +- { +- gcc_assert (d->flag); +- +- scratch0 = gen_reg_rtx (tmode0); +- scratch1 = gen_reg_rtx (tmode1); +- +- pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2); +- } +- +- if (! pat) +- return 0; +- +- emit_insn (pat); +- +- if (d->flag) +- { +- target = gen_reg_rtx (SImode); +- emit_move_insn (target, const0_rtx); +- target = gen_rtx_SUBREG (QImode, target, 0); +- +- emit_insn +- (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target), +- gen_rtx_fmt_ee (EQ, QImode, +- gen_rtx_REG ((machine_mode) d->flag, +- FLAGS_REG), +- const0_rtx))); +- return SUBREG_REG (target); +- } +- else +- return target; +-} +- +-/* Subroutine of ix86_expand_builtin to take care of insns with +- variable number of operands. */ +- +-static rtx +-ix86_expand_args_builtin (const struct builtin_description *d, +- tree exp, rtx target) +-{ +- rtx pat, real_target; +- unsigned int i, nargs; +- unsigned int nargs_constant = 0; +- unsigned int mask_pos = 0; +- int num_memory = 0; +- struct +- { +- rtx op; +- machine_mode mode; +- } args[6]; +- bool second_arg_count = false; +- enum insn_code icode = d->icode; +- const struct insn_data_d *insn_p = &insn_data[icode]; +- machine_mode tmode = insn_p->operand[0].mode; +- machine_mode rmode = VOIDmode; +- bool swap = false; +- enum rtx_code comparison = d->comparison; +- +- switch ((enum ix86_builtin_func_type) d->flag) +- { +- case V2DF_FTYPE_V2DF_ROUND: +- case V4DF_FTYPE_V4DF_ROUND: +- case V8DF_FTYPE_V8DF_ROUND: +- case V4SF_FTYPE_V4SF_ROUND: +- case V8SF_FTYPE_V8SF_ROUND: +- case V16SF_FTYPE_V16SF_ROUND: +- case V4SI_FTYPE_V4SF_ROUND: +- case V8SI_FTYPE_V8SF_ROUND: +- case V16SI_FTYPE_V16SF_ROUND: +- return ix86_expand_sse_round (d, exp, target); +- case V4SI_FTYPE_V2DF_V2DF_ROUND: +- case V8SI_FTYPE_V4DF_V4DF_ROUND: +- case V16SI_FTYPE_V8DF_V8DF_ROUND: +- return ix86_expand_sse_round_vec_pack_sfix (d, exp, target); +- case INT_FTYPE_V8SF_V8SF_PTEST: +- case INT_FTYPE_V4DI_V4DI_PTEST: +- case INT_FTYPE_V4DF_V4DF_PTEST: +- case INT_FTYPE_V4SF_V4SF_PTEST: +- case INT_FTYPE_V2DI_V2DI_PTEST: +- case INT_FTYPE_V2DF_V2DF_PTEST: +- return ix86_expand_sse_ptest (d, exp, target); +- case FLOAT128_FTYPE_FLOAT128: +- case FLOAT_FTYPE_FLOAT: +- case INT_FTYPE_INT: +- case UINT_FTYPE_UINT: +- case UINT16_FTYPE_UINT16: +- case UINT64_FTYPE_INT: +- case UINT64_FTYPE_UINT64: +- case INT64_FTYPE_INT64: +- case INT64_FTYPE_V4SF: +- case INT64_FTYPE_V2DF: +- case INT_FTYPE_V16QI: +- case INT_FTYPE_V8QI: +- case INT_FTYPE_V8SF: +- case INT_FTYPE_V4DF: +- case INT_FTYPE_V4SF: +- case INT_FTYPE_V2DF: +- case INT_FTYPE_V32QI: +- case V16QI_FTYPE_V16QI: +- case V8SI_FTYPE_V8SF: +- case V8SI_FTYPE_V4SI: +- case V8HI_FTYPE_V8HI: +- case V8HI_FTYPE_V16QI: +- case V8QI_FTYPE_V8QI: +- case V8SF_FTYPE_V8SF: +- case V8SF_FTYPE_V8SI: +- case V8SF_FTYPE_V4SF: +- case V8SF_FTYPE_V8HI: +- case V4SI_FTYPE_V4SI: +- case V4SI_FTYPE_V16QI: +- case V4SI_FTYPE_V4SF: +- case V4SI_FTYPE_V8SI: +- case V4SI_FTYPE_V8HI: +- case V4SI_FTYPE_V4DF: +- case V4SI_FTYPE_V2DF: +- case V4HI_FTYPE_V4HI: +- case V4DF_FTYPE_V4DF: +- case V4DF_FTYPE_V4SI: +- case V4DF_FTYPE_V4SF: +- case V4DF_FTYPE_V2DF: +- case V4SF_FTYPE_V4SF: +- case V4SF_FTYPE_V4SI: +- case V4SF_FTYPE_V8SF: +- case V4SF_FTYPE_V4DF: +- case V4SF_FTYPE_V8HI: +- case V4SF_FTYPE_V2DF: +- case V2DI_FTYPE_V2DI: +- case V2DI_FTYPE_V16QI: +- case V2DI_FTYPE_V8HI: +- case V2DI_FTYPE_V4SI: +- case V2DF_FTYPE_V2DF: +- case V2DF_FTYPE_V4SI: +- case V2DF_FTYPE_V4DF: +- case V2DF_FTYPE_V4SF: +- case V2DF_FTYPE_V2SI: +- case V2SI_FTYPE_V2SI: +- case V2SI_FTYPE_V4SF: +- case V2SI_FTYPE_V2SF: +- case V2SI_FTYPE_V2DF: +- case V2SF_FTYPE_V2SF: +- case V2SF_FTYPE_V2SI: +- case V32QI_FTYPE_V32QI: +- case V32QI_FTYPE_V16QI: +- case V16HI_FTYPE_V16HI: +- case V16HI_FTYPE_V8HI: +- case V8SI_FTYPE_V8SI: +- case V16HI_FTYPE_V16QI: +- case V8SI_FTYPE_V16QI: +- case V4DI_FTYPE_V16QI: +- case V8SI_FTYPE_V8HI: +- case V4DI_FTYPE_V8HI: +- case V4DI_FTYPE_V4SI: +- case V4DI_FTYPE_V2DI: +- case UQI_FTYPE_UQI: +- case UHI_FTYPE_UHI: +- case USI_FTYPE_USI: +- case USI_FTYPE_UQI: +- case USI_FTYPE_UHI: +- case UDI_FTYPE_UDI: +- case UHI_FTYPE_V16QI: +- case USI_FTYPE_V32QI: +- case UDI_FTYPE_V64QI: +- case V16QI_FTYPE_UHI: +- case V32QI_FTYPE_USI: +- case V64QI_FTYPE_UDI: +- case V8HI_FTYPE_UQI: +- case V16HI_FTYPE_UHI: +- case V32HI_FTYPE_USI: +- case V4SI_FTYPE_UQI: +- case V8SI_FTYPE_UQI: +- case V4SI_FTYPE_UHI: +- case V8SI_FTYPE_UHI: +- case UQI_FTYPE_V8HI: +- case UHI_FTYPE_V16HI: +- case USI_FTYPE_V32HI: +- case UQI_FTYPE_V4SI: +- case UQI_FTYPE_V8SI: +- case UHI_FTYPE_V16SI: +- case UQI_FTYPE_V2DI: +- case UQI_FTYPE_V4DI: +- case UQI_FTYPE_V8DI: +- case V16SI_FTYPE_UHI: +- case V2DI_FTYPE_UQI: +- case V4DI_FTYPE_UQI: +- case V16SI_FTYPE_INT: +- case V16SF_FTYPE_V8SF: +- case V16SI_FTYPE_V8SI: +- case V16SF_FTYPE_V4SF: +- case V16SI_FTYPE_V4SI: +- case V16SI_FTYPE_V16SF: +- case V16SI_FTYPE_V16SI: +- case V64QI_FTYPE_V64QI: +- case V32HI_FTYPE_V32HI: +- case V16SF_FTYPE_V16SF: +- case V8DI_FTYPE_UQI: +- case V8DI_FTYPE_V8DI: +- case V8DF_FTYPE_V4DF: +- case V8DF_FTYPE_V2DF: +- case V8DF_FTYPE_V8DF: +- case V4DI_FTYPE_V4DI: +- nargs = 1; +- break; +- case V4SF_FTYPE_V4SF_VEC_MERGE: +- case V2DF_FTYPE_V2DF_VEC_MERGE: +- return ix86_expand_unop_vec_merge_builtin (icode, exp, target); +- case FLOAT128_FTYPE_FLOAT128_FLOAT128: +- case V16QI_FTYPE_V16QI_V16QI: +- case V16QI_FTYPE_V8HI_V8HI: +- case V16SF_FTYPE_V16SF_V16SF: +- case V8QI_FTYPE_V8QI_V8QI: +- case V8QI_FTYPE_V4HI_V4HI: +- case V8HI_FTYPE_V8HI_V8HI: +- case V8HI_FTYPE_V16QI_V16QI: +- case V8HI_FTYPE_V4SI_V4SI: +- case V8SF_FTYPE_V8SF_V8SF: +- case V8SF_FTYPE_V8SF_V8SI: +- case V8DF_FTYPE_V8DF_V8DF: +- case V4SI_FTYPE_V4SI_V4SI: +- case V4SI_FTYPE_V8HI_V8HI: +- case V4SI_FTYPE_V2DF_V2DF: +- case V4HI_FTYPE_V4HI_V4HI: +- case V4HI_FTYPE_V8QI_V8QI: +- case V4HI_FTYPE_V2SI_V2SI: +- case V4DF_FTYPE_V4DF_V4DF: +- case V4DF_FTYPE_V4DF_V4DI: +- case V4SF_FTYPE_V4SF_V4SF: +- case V4SF_FTYPE_V4SF_V4SI: +- case V4SF_FTYPE_V4SF_V2SI: +- case V4SF_FTYPE_V4SF_V2DF: +- case V4SF_FTYPE_V4SF_UINT: +- case V4SF_FTYPE_V4SF_DI: +- case V4SF_FTYPE_V4SF_SI: +- case V2DI_FTYPE_V2DI_V2DI: +- case V2DI_FTYPE_V16QI_V16QI: +- case V2DI_FTYPE_V4SI_V4SI: +- case V2DI_FTYPE_V2DI_V16QI: +- case V2SI_FTYPE_V2SI_V2SI: +- case V2SI_FTYPE_V4HI_V4HI: +- case V2SI_FTYPE_V2SF_V2SF: +- case V2DF_FTYPE_V2DF_V2DF: +- case V2DF_FTYPE_V2DF_V4SF: +- case V2DF_FTYPE_V2DF_V2DI: +- case V2DF_FTYPE_V2DF_DI: +- case V2DF_FTYPE_V2DF_SI: +- case V2DF_FTYPE_V2DF_UINT: +- case V2SF_FTYPE_V2SF_V2SF: +- case V1DI_FTYPE_V1DI_V1DI: +- case V1DI_FTYPE_V8QI_V8QI: +- case V1DI_FTYPE_V2SI_V2SI: +- case V32QI_FTYPE_V16HI_V16HI: +- case V16HI_FTYPE_V8SI_V8SI: +- case V64QI_FTYPE_V64QI_V64QI: +- case V32QI_FTYPE_V32QI_V32QI: +- case V16HI_FTYPE_V32QI_V32QI: +- case V16HI_FTYPE_V16HI_V16HI: +- case V8SI_FTYPE_V4DF_V4DF: +- case V8SI_FTYPE_V8SI_V8SI: +- case V8SI_FTYPE_V16HI_V16HI: +- case V4DI_FTYPE_V4DI_V4DI: +- case V4DI_FTYPE_V8SI_V8SI: +- case V8DI_FTYPE_V64QI_V64QI: +- if (comparison == UNKNOWN) +- return ix86_expand_binop_builtin (icode, exp, target); +- nargs = 2; +- break; +- case V4SF_FTYPE_V4SF_V4SF_SWAP: +- case V2DF_FTYPE_V2DF_V2DF_SWAP: +- gcc_assert (comparison != UNKNOWN); +- nargs = 2; +- swap = true; +- break; +- case V16HI_FTYPE_V16HI_V8HI_COUNT: +- case V16HI_FTYPE_V16HI_SI_COUNT: +- case V8SI_FTYPE_V8SI_V4SI_COUNT: +- case V8SI_FTYPE_V8SI_SI_COUNT: +- case V4DI_FTYPE_V4DI_V2DI_COUNT: +- case V4DI_FTYPE_V4DI_INT_COUNT: +- case V8HI_FTYPE_V8HI_V8HI_COUNT: +- case V8HI_FTYPE_V8HI_SI_COUNT: +- case V4SI_FTYPE_V4SI_V4SI_COUNT: +- case V4SI_FTYPE_V4SI_SI_COUNT: +- case V4HI_FTYPE_V4HI_V4HI_COUNT: +- case V4HI_FTYPE_V4HI_SI_COUNT: +- case V2DI_FTYPE_V2DI_V2DI_COUNT: +- case V2DI_FTYPE_V2DI_SI_COUNT: +- case V2SI_FTYPE_V2SI_V2SI_COUNT: +- case V2SI_FTYPE_V2SI_SI_COUNT: +- case V1DI_FTYPE_V1DI_V1DI_COUNT: +- case V1DI_FTYPE_V1DI_SI_COUNT: +- nargs = 2; +- second_arg_count = true; +- break; +- case V16HI_FTYPE_V16HI_INT_V16HI_UHI_COUNT: +- case V16HI_FTYPE_V16HI_V8HI_V16HI_UHI_COUNT: +- case V16SI_FTYPE_V16SI_INT_V16SI_UHI_COUNT: +- case V16SI_FTYPE_V16SI_V4SI_V16SI_UHI_COUNT: +- case V2DI_FTYPE_V2DI_INT_V2DI_UQI_COUNT: +- case V2DI_FTYPE_V2DI_V2DI_V2DI_UQI_COUNT: +- case V32HI_FTYPE_V32HI_INT_V32HI_USI_COUNT: +- case V32HI_FTYPE_V32HI_V8HI_V32HI_USI_COUNT: +- case V4DI_FTYPE_V4DI_INT_V4DI_UQI_COUNT: +- case V4DI_FTYPE_V4DI_V2DI_V4DI_UQI_COUNT: +- case V4SI_FTYPE_V4SI_INT_V4SI_UQI_COUNT: +- case V4SI_FTYPE_V4SI_V4SI_V4SI_UQI_COUNT: +- case V8DI_FTYPE_V8DI_INT_V8DI_UQI_COUNT: +- case V8DI_FTYPE_V8DI_V2DI_V8DI_UQI_COUNT: +- case V8HI_FTYPE_V8HI_INT_V8HI_UQI_COUNT: +- case V8HI_FTYPE_V8HI_V8HI_V8HI_UQI_COUNT: +- case V8SI_FTYPE_V8SI_INT_V8SI_UQI_COUNT: +- case V8SI_FTYPE_V8SI_V4SI_V8SI_UQI_COUNT: +- nargs = 4; +- second_arg_count = true; +- break; +- case UINT64_FTYPE_UINT64_UINT64: +- case UINT_FTYPE_UINT_UINT: +- case UINT_FTYPE_UINT_USHORT: +- case UINT_FTYPE_UINT_UCHAR: +- case UINT16_FTYPE_UINT16_INT: +- case UINT8_FTYPE_UINT8_INT: +- case UQI_FTYPE_UQI_UQI: +- case UHI_FTYPE_UHI_UHI: +- case USI_FTYPE_USI_USI: +- case UDI_FTYPE_UDI_UDI: +- case V16SI_FTYPE_V8DF_V8DF: +- nargs = 2; +- break; +- case V2DI_FTYPE_V2DI_INT_CONVERT: +- nargs = 2; +- rmode = V1TImode; +- nargs_constant = 1; +- break; +- case V4DI_FTYPE_V4DI_INT_CONVERT: +- nargs = 2; +- rmode = V2TImode; +- nargs_constant = 1; +- break; +- case V8DI_FTYPE_V8DI_INT_CONVERT: +- nargs = 2; +- rmode = V4TImode; +- nargs_constant = 1; +- break; +- case V8HI_FTYPE_V8HI_INT: +- case V8HI_FTYPE_V8SF_INT: +- case V16HI_FTYPE_V16SF_INT: +- case V8HI_FTYPE_V4SF_INT: +- case V8SF_FTYPE_V8SF_INT: +- case V4SF_FTYPE_V16SF_INT: +- case V16SF_FTYPE_V16SF_INT: +- case V4SI_FTYPE_V4SI_INT: +- case V4SI_FTYPE_V8SI_INT: +- case V4HI_FTYPE_V4HI_INT: +- case V4DF_FTYPE_V4DF_INT: +- case V4DF_FTYPE_V8DF_INT: +- case V4SF_FTYPE_V4SF_INT: +- case V4SF_FTYPE_V8SF_INT: +- case V2DI_FTYPE_V2DI_INT: +- case V2DF_FTYPE_V2DF_INT: +- case V2DF_FTYPE_V4DF_INT: +- case V16HI_FTYPE_V16HI_INT: +- case V8SI_FTYPE_V8SI_INT: +- case V16SI_FTYPE_V16SI_INT: +- case V4SI_FTYPE_V16SI_INT: +- case V4DI_FTYPE_V4DI_INT: +- case V2DI_FTYPE_V4DI_INT: +- case V4DI_FTYPE_V8DI_INT: +- case QI_FTYPE_V4SF_INT: +- case QI_FTYPE_V2DF_INT: +- case UQI_FTYPE_UQI_UQI_CONST: +- case UHI_FTYPE_UHI_UQI: +- case USI_FTYPE_USI_UQI: +- case UDI_FTYPE_UDI_UQI: +- nargs = 2; +- nargs_constant = 1; +- break; +- case V16QI_FTYPE_V16QI_V16QI_V16QI: +- case V8SF_FTYPE_V8SF_V8SF_V8SF: +- case V4DF_FTYPE_V4DF_V4DF_V4DF: +- case V4SF_FTYPE_V4SF_V4SF_V4SF: +- case V2DF_FTYPE_V2DF_V2DF_V2DF: +- case V32QI_FTYPE_V32QI_V32QI_V32QI: +- case UHI_FTYPE_V16SI_V16SI_UHI: +- case UQI_FTYPE_V8DI_V8DI_UQI: +- case V16HI_FTYPE_V16SI_V16HI_UHI: +- case V16QI_FTYPE_V16SI_V16QI_UHI: +- case V16QI_FTYPE_V8DI_V16QI_UQI: +- case V16SF_FTYPE_V16SF_V16SF_UHI: +- case V16SF_FTYPE_V4SF_V16SF_UHI: +- case V16SI_FTYPE_SI_V16SI_UHI: +- case V16SI_FTYPE_V16HI_V16SI_UHI: +- case V16SI_FTYPE_V16QI_V16SI_UHI: +- case V8SF_FTYPE_V4SF_V8SF_UQI: +- case V4DF_FTYPE_V2DF_V4DF_UQI: +- case V8SI_FTYPE_V4SI_V8SI_UQI: +- case V8SI_FTYPE_SI_V8SI_UQI: +- case V4SI_FTYPE_V4SI_V4SI_UQI: +- case V4SI_FTYPE_SI_V4SI_UQI: +- case V4DI_FTYPE_V2DI_V4DI_UQI: +- case V4DI_FTYPE_DI_V4DI_UQI: +- case V2DI_FTYPE_V2DI_V2DI_UQI: +- case V2DI_FTYPE_DI_V2DI_UQI: +- case V64QI_FTYPE_V64QI_V64QI_UDI: +- case V64QI_FTYPE_V16QI_V64QI_UDI: +- case V64QI_FTYPE_QI_V64QI_UDI: +- case V32QI_FTYPE_V32QI_V32QI_USI: +- case V32QI_FTYPE_V16QI_V32QI_USI: +- case V32QI_FTYPE_QI_V32QI_USI: +- case V16QI_FTYPE_V16QI_V16QI_UHI: +- case V16QI_FTYPE_QI_V16QI_UHI: +- case V32HI_FTYPE_V8HI_V32HI_USI: +- case V32HI_FTYPE_HI_V32HI_USI: +- case V16HI_FTYPE_V8HI_V16HI_UHI: +- case V16HI_FTYPE_HI_V16HI_UHI: +- case V8HI_FTYPE_V8HI_V8HI_UQI: +- case V8HI_FTYPE_HI_V8HI_UQI: +- case V8SF_FTYPE_V8HI_V8SF_UQI: +- case V4SF_FTYPE_V8HI_V4SF_UQI: +- case V8SI_FTYPE_V8SF_V8SI_UQI: +- case V4SI_FTYPE_V4SF_V4SI_UQI: +- case V4DI_FTYPE_V4SF_V4DI_UQI: +- case V2DI_FTYPE_V4SF_V2DI_UQI: +- case V4SF_FTYPE_V4DI_V4SF_UQI: +- case V4SF_FTYPE_V2DI_V4SF_UQI: +- case V4DF_FTYPE_V4DI_V4DF_UQI: +- case V2DF_FTYPE_V2DI_V2DF_UQI: +- case V16QI_FTYPE_V8HI_V16QI_UQI: +- case V16QI_FTYPE_V16HI_V16QI_UHI: +- case V16QI_FTYPE_V4SI_V16QI_UQI: +- case V16QI_FTYPE_V8SI_V16QI_UQI: +- case V8HI_FTYPE_V4SI_V8HI_UQI: +- case V8HI_FTYPE_V8SI_V8HI_UQI: +- case V16QI_FTYPE_V2DI_V16QI_UQI: +- case V16QI_FTYPE_V4DI_V16QI_UQI: +- case V8HI_FTYPE_V2DI_V8HI_UQI: +- case V8HI_FTYPE_V4DI_V8HI_UQI: +- case V4SI_FTYPE_V2DI_V4SI_UQI: +- case V4SI_FTYPE_V4DI_V4SI_UQI: +- case V32QI_FTYPE_V32HI_V32QI_USI: +- case UHI_FTYPE_V16QI_V16QI_UHI: +- case USI_FTYPE_V32QI_V32QI_USI: +- case UDI_FTYPE_V64QI_V64QI_UDI: +- case UQI_FTYPE_V8HI_V8HI_UQI: +- case UHI_FTYPE_V16HI_V16HI_UHI: +- case USI_FTYPE_V32HI_V32HI_USI: +- case UQI_FTYPE_V4SI_V4SI_UQI: +- case UQI_FTYPE_V8SI_V8SI_UQI: +- case UQI_FTYPE_V2DI_V2DI_UQI: +- case UQI_FTYPE_V4DI_V4DI_UQI: +- case V4SF_FTYPE_V2DF_V4SF_UQI: +- case V4SF_FTYPE_V4DF_V4SF_UQI: +- case V16SI_FTYPE_V16SI_V16SI_UHI: +- case V16SI_FTYPE_V4SI_V16SI_UHI: +- case V2DI_FTYPE_V4SI_V2DI_UQI: +- case V2DI_FTYPE_V8HI_V2DI_UQI: +- case V2DI_FTYPE_V16QI_V2DI_UQI: +- case V4DI_FTYPE_V4DI_V4DI_UQI: +- case V4DI_FTYPE_V4SI_V4DI_UQI: +- case V4DI_FTYPE_V8HI_V4DI_UQI: +- case V4DI_FTYPE_V16QI_V4DI_UQI: +- case V4DI_FTYPE_V4DF_V4DI_UQI: +- case V2DI_FTYPE_V2DF_V2DI_UQI: +- case V4SI_FTYPE_V4DF_V4SI_UQI: +- case V4SI_FTYPE_V2DF_V4SI_UQI: +- case V4SI_FTYPE_V8HI_V4SI_UQI: +- case V4SI_FTYPE_V16QI_V4SI_UQI: +- case V4DI_FTYPE_V4DI_V4DI_V4DI: +- case V8DF_FTYPE_V2DF_V8DF_UQI: +- case V8DF_FTYPE_V4DF_V8DF_UQI: +- case V8DF_FTYPE_V8DF_V8DF_UQI: +- case V8SF_FTYPE_V8SF_V8SF_UQI: +- case V8SF_FTYPE_V8SI_V8SF_UQI: +- case V4DF_FTYPE_V4DF_V4DF_UQI: +- case V4SF_FTYPE_V4SF_V4SF_UQI: +- case V2DF_FTYPE_V2DF_V2DF_UQI: +- case V2DF_FTYPE_V4SF_V2DF_UQI: +- case V2DF_FTYPE_V4SI_V2DF_UQI: +- case V4SF_FTYPE_V4SI_V4SF_UQI: +- case V4DF_FTYPE_V4SF_V4DF_UQI: +- case V4DF_FTYPE_V4SI_V4DF_UQI: +- case V8SI_FTYPE_V8SI_V8SI_UQI: +- case V8SI_FTYPE_V8HI_V8SI_UQI: +- case V8SI_FTYPE_V16QI_V8SI_UQI: +- case V8DF_FTYPE_V8SI_V8DF_UQI: +- case V8DI_FTYPE_DI_V8DI_UQI: +- case V16SF_FTYPE_V8SF_V16SF_UHI: +- case V16SI_FTYPE_V8SI_V16SI_UHI: +- case V16HI_FTYPE_V16HI_V16HI_UHI: +- case V8HI_FTYPE_V16QI_V8HI_UQI: +- case V16HI_FTYPE_V16QI_V16HI_UHI: +- case V32HI_FTYPE_V32HI_V32HI_USI: +- case V32HI_FTYPE_V32QI_V32HI_USI: +- case V8DI_FTYPE_V16QI_V8DI_UQI: +- case V8DI_FTYPE_V2DI_V8DI_UQI: +- case V8DI_FTYPE_V4DI_V8DI_UQI: +- case V8DI_FTYPE_V8DI_V8DI_UQI: +- case V8DI_FTYPE_V8HI_V8DI_UQI: +- case V8DI_FTYPE_V8SI_V8DI_UQI: +- case V8HI_FTYPE_V8DI_V8HI_UQI: +- case V8SI_FTYPE_V8DI_V8SI_UQI: +- case V4SI_FTYPE_V4SI_V4SI_V4SI: +- case V16SI_FTYPE_V16SI_V16SI_V16SI: +- case V8DI_FTYPE_V8DI_V8DI_V8DI: +- case V32HI_FTYPE_V32HI_V32HI_V32HI: +- case V2DI_FTYPE_V2DI_V2DI_V2DI: +- case V16HI_FTYPE_V16HI_V16HI_V16HI: +- case V8SI_FTYPE_V8SI_V8SI_V8SI: +- case V8HI_FTYPE_V8HI_V8HI_V8HI: +- nargs = 3; +- break; +- case V32QI_FTYPE_V32QI_V32QI_INT: +- case V16HI_FTYPE_V16HI_V16HI_INT: +- case V16QI_FTYPE_V16QI_V16QI_INT: +- case V4DI_FTYPE_V4DI_V4DI_INT: +- case V8HI_FTYPE_V8HI_V8HI_INT: +- case V8SI_FTYPE_V8SI_V8SI_INT: +- case V8SI_FTYPE_V8SI_V4SI_INT: +- case V8SF_FTYPE_V8SF_V8SF_INT: +- case V8SF_FTYPE_V8SF_V4SF_INT: +- case V4SI_FTYPE_V4SI_V4SI_INT: +- case V4DF_FTYPE_V4DF_V4DF_INT: +- case V16SF_FTYPE_V16SF_V16SF_INT: +- case V16SF_FTYPE_V16SF_V4SF_INT: +- case V16SI_FTYPE_V16SI_V4SI_INT: +- case V4DF_FTYPE_V4DF_V2DF_INT: +- case V4SF_FTYPE_V4SF_V4SF_INT: +- case V2DI_FTYPE_V2DI_V2DI_INT: +- case V4DI_FTYPE_V4DI_V2DI_INT: +- case V2DF_FTYPE_V2DF_V2DF_INT: +- case UQI_FTYPE_V8DI_V8UDI_INT: +- case UQI_FTYPE_V8DF_V8DF_INT: +- case UQI_FTYPE_V2DF_V2DF_INT: +- case UQI_FTYPE_V4SF_V4SF_INT: +- case UHI_FTYPE_V16SI_V16SI_INT: +- case UHI_FTYPE_V16SF_V16SF_INT: +- case V64QI_FTYPE_V64QI_V64QI_INT: +- case V32HI_FTYPE_V32HI_V32HI_INT: +- case V16SI_FTYPE_V16SI_V16SI_INT: +- case V8DI_FTYPE_V8DI_V8DI_INT: +- nargs = 3; +- nargs_constant = 1; +- break; +- case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT: +- nargs = 3; +- rmode = V4DImode; +- nargs_constant = 1; +- break; +- case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT: +- nargs = 3; +- rmode = V2DImode; +- nargs_constant = 1; +- break; +- case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT: +- nargs = 3; +- rmode = DImode; +- nargs_constant = 1; +- break; +- case V2DI_FTYPE_V2DI_UINT_UINT: +- nargs = 3; +- nargs_constant = 2; +- break; +- case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT: +- nargs = 3; +- rmode = V8DImode; +- nargs_constant = 1; +- break; +- case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UDI_CONVERT: +- nargs = 5; +- rmode = V8DImode; +- mask_pos = 2; +- nargs_constant = 1; +- break; +- case QI_FTYPE_V8DF_INT_UQI: +- case QI_FTYPE_V4DF_INT_UQI: +- case QI_FTYPE_V2DF_INT_UQI: +- case HI_FTYPE_V16SF_INT_UHI: +- case QI_FTYPE_V8SF_INT_UQI: +- case QI_FTYPE_V4SF_INT_UQI: +- case V4SI_FTYPE_V4SI_V4SI_UHI: +- case V8SI_FTYPE_V8SI_V8SI_UHI: +- nargs = 3; +- mask_pos = 1; +- nargs_constant = 1; +- break; +- case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_USI_CONVERT: +- nargs = 5; +- rmode = V4DImode; +- mask_pos = 2; +- nargs_constant = 1; +- break; +- case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UHI_CONVERT: +- nargs = 5; +- rmode = V2DImode; +- mask_pos = 2; +- nargs_constant = 1; +- break; +- case V32QI_FTYPE_V32QI_V32QI_V32QI_USI: +- case V32HI_FTYPE_V32HI_V32HI_V32HI_USI: +- case V32HI_FTYPE_V64QI_V64QI_V32HI_USI: +- case V16SI_FTYPE_V32HI_V32HI_V16SI_UHI: +- case V64QI_FTYPE_V64QI_V64QI_V64QI_UDI: +- case V32HI_FTYPE_V32HI_V8HI_V32HI_USI: +- case V16HI_FTYPE_V16HI_V8HI_V16HI_UHI: +- case V8SI_FTYPE_V8SI_V4SI_V8SI_UQI: +- case V4DI_FTYPE_V4DI_V2DI_V4DI_UQI: +- case V64QI_FTYPE_V32HI_V32HI_V64QI_UDI: +- case V32QI_FTYPE_V16HI_V16HI_V32QI_USI: +- case V16QI_FTYPE_V8HI_V8HI_V16QI_UHI: +- case V32HI_FTYPE_V16SI_V16SI_V32HI_USI: +- case V16HI_FTYPE_V8SI_V8SI_V16HI_UHI: +- case V8HI_FTYPE_V4SI_V4SI_V8HI_UQI: +- case V4DF_FTYPE_V4DF_V4DI_V4DF_UQI: +- case V8SF_FTYPE_V8SF_V8SI_V8SF_UQI: +- case V4SF_FTYPE_V4SF_V4SI_V4SF_UQI: +- case V2DF_FTYPE_V2DF_V2DI_V2DF_UQI: +- case V2DI_FTYPE_V4SI_V4SI_V2DI_UQI: +- case V4DI_FTYPE_V8SI_V8SI_V4DI_UQI: +- case V4DF_FTYPE_V4DI_V4DF_V4DF_UQI: +- case V8SF_FTYPE_V8SI_V8SF_V8SF_UQI: +- case V2DF_FTYPE_V2DI_V2DF_V2DF_UQI: +- case V4SF_FTYPE_V4SI_V4SF_V4SF_UQI: +- case V16SF_FTYPE_V16SF_V16SF_V16SF_UHI: +- case V16SF_FTYPE_V16SF_V16SI_V16SF_UHI: +- case V16SF_FTYPE_V16SI_V16SF_V16SF_UHI: +- case V16SI_FTYPE_V16SI_V16SI_V16SI_UHI: +- case V16SI_FTYPE_V16SI_V4SI_V16SI_UHI: +- case V8HI_FTYPE_V8HI_V8HI_V8HI_UQI: +- case V8SI_FTYPE_V8SI_V8SI_V8SI_UQI: +- case V4SI_FTYPE_V4SI_V4SI_V4SI_UQI: +- case V8SF_FTYPE_V8SF_V8SF_V8SF_UQI: +- case V16QI_FTYPE_V16QI_V16QI_V16QI_UHI: +- case V16HI_FTYPE_V16HI_V16HI_V16HI_UHI: +- case V2DI_FTYPE_V2DI_V2DI_V2DI_UQI: +- case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI: +- case V4DI_FTYPE_V4DI_V4DI_V4DI_UQI: +- case V4DF_FTYPE_V4DF_V4DF_V4DF_UQI: +- case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI: +- case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI: +- case V8DF_FTYPE_V8DF_V8DI_V8DF_UQI: +- case V8DF_FTYPE_V8DI_V8DF_V8DF_UQI: +- case V8DI_FTYPE_V16SI_V16SI_V8DI_UQI: +- case V8DI_FTYPE_V8DI_V2DI_V8DI_UQI: +- case V8DI_FTYPE_V8DI_V8DI_V8DI_UQI: +- case V8HI_FTYPE_V16QI_V16QI_V8HI_UQI: +- case V16HI_FTYPE_V32QI_V32QI_V16HI_UHI: +- case V8SI_FTYPE_V16HI_V16HI_V8SI_UQI: +- case V4SI_FTYPE_V8HI_V8HI_V4SI_UQI: +- nargs = 4; +- break; +- case V2DF_FTYPE_V2DF_V2DF_V2DI_INT: +- case V4DF_FTYPE_V4DF_V4DF_V4DI_INT: +- case V4SF_FTYPE_V4SF_V4SF_V4SI_INT: +- case V8SF_FTYPE_V8SF_V8SF_V8SI_INT: +- case V16SF_FTYPE_V16SF_V16SF_V16SI_INT: +- nargs = 4; +- nargs_constant = 1; +- break; +- case UQI_FTYPE_V4DI_V4DI_INT_UQI: +- case UQI_FTYPE_V8SI_V8SI_INT_UQI: +- case QI_FTYPE_V4DF_V4DF_INT_UQI: +- case QI_FTYPE_V8SF_V8SF_INT_UQI: +- case UQI_FTYPE_V2DI_V2DI_INT_UQI: +- case UQI_FTYPE_V4SI_V4SI_INT_UQI: +- case UQI_FTYPE_V2DF_V2DF_INT_UQI: +- case UQI_FTYPE_V4SF_V4SF_INT_UQI: +- case UDI_FTYPE_V64QI_V64QI_INT_UDI: +- case USI_FTYPE_V32QI_V32QI_INT_USI: +- case UHI_FTYPE_V16QI_V16QI_INT_UHI: +- case USI_FTYPE_V32HI_V32HI_INT_USI: +- case UHI_FTYPE_V16HI_V16HI_INT_UHI: +- case UQI_FTYPE_V8HI_V8HI_INT_UQI: +- case V32HI_FTYPE_V32HI_V32HI_V32HI_INT: +- case V16HI_FTYPE_V16HI_V16HI_V16HI_INT: +- case V8HI_FTYPE_V8HI_V8HI_V8HI_INT: +- case V8SI_FTYPE_V8SI_V8SI_V8SI_INT: +- case V4DI_FTYPE_V4DI_V4DI_V4DI_INT: +- case V8DI_FTYPE_V8DI_V8DI_V8DI_INT: +- case V16SI_FTYPE_V16SI_V16SI_V16SI_INT: +- case V2DI_FTYPE_V2DI_V2DI_V2DI_INT: +- case V4SI_FTYPE_V4SI_V4SI_V4SI_INT: +- nargs = 4; +- mask_pos = 1; +- nargs_constant = 1; +- break; +- case V2DI_FTYPE_V2DI_V2DI_UINT_UINT: +- nargs = 4; +- nargs_constant = 2; +- break; +- case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED: +- case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG: +- nargs = 4; +- break; +- case UQI_FTYPE_V8DI_V8DI_INT_UQI: +- case UHI_FTYPE_V16SI_V16SI_INT_UHI: +- mask_pos = 1; +- nargs = 4; +- nargs_constant = 1; +- break; +- case V8SF_FTYPE_V8SF_INT_V8SF_UQI: +- case V4SF_FTYPE_V4SF_INT_V4SF_UQI: +- case V2DF_FTYPE_V4DF_INT_V2DF_UQI: +- case V2DI_FTYPE_V4DI_INT_V2DI_UQI: +- case V8SF_FTYPE_V16SF_INT_V8SF_UQI: +- case V8SI_FTYPE_V16SI_INT_V8SI_UQI: +- case V2DF_FTYPE_V8DF_INT_V2DF_UQI: +- case V2DI_FTYPE_V8DI_INT_V2DI_UQI: +- case V4SF_FTYPE_V8SF_INT_V4SF_UQI: +- case V4SI_FTYPE_V8SI_INT_V4SI_UQI: +- case V8HI_FTYPE_V8SF_INT_V8HI_UQI: +- case V8HI_FTYPE_V4SF_INT_V8HI_UQI: +- case V32HI_FTYPE_V32HI_INT_V32HI_USI: +- case V16HI_FTYPE_V16HI_INT_V16HI_UHI: +- case V8HI_FTYPE_V8HI_INT_V8HI_UQI: +- case V4DI_FTYPE_V4DI_INT_V4DI_UQI: +- case V2DI_FTYPE_V2DI_INT_V2DI_UQI: +- case V8SI_FTYPE_V8SI_INT_V8SI_UQI: +- case V4SI_FTYPE_V4SI_INT_V4SI_UQI: +- case V4DF_FTYPE_V4DF_INT_V4DF_UQI: +- case V2DF_FTYPE_V2DF_INT_V2DF_UQI: +- case V8DF_FTYPE_V8DF_INT_V8DF_UQI: +- case V16SF_FTYPE_V16SF_INT_V16SF_UHI: +- case V16HI_FTYPE_V16SF_INT_V16HI_UHI: +- case V16SI_FTYPE_V16SI_INT_V16SI_UHI: +- case V4SI_FTYPE_V16SI_INT_V4SI_UQI: +- case V4DI_FTYPE_V8DI_INT_V4DI_UQI: +- case V4DF_FTYPE_V8DF_INT_V4DF_UQI: +- case V4SF_FTYPE_V16SF_INT_V4SF_UQI: +- case V8DI_FTYPE_V8DI_INT_V8DI_UQI: +- nargs = 4; +- mask_pos = 2; +- nargs_constant = 1; +- break; +- case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_UHI: +- case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_UHI: +- case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI: +- case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI: +- case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI: +- case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI: +- case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI: +- case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI: +- case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_UQI: +- case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_UQI: +- case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI: +- case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI: +- case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_UQI: +- case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_UQI: +- case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_UQI: +- case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_UQI: +- case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_UQI: +- case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UQI: +- case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI: +- case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_UHI: +- case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_UQI: +- case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_UHI: +- case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_UHI: +- case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_UQI: +- case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_UQI: +- case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_UQI: +- case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_UQI: +- nargs = 5; +- mask_pos = 2; +- nargs_constant = 1; +- break; +- case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI: +- case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI: +- case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_UQI: +- case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_UQI: +- case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_UQI: +- case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_UQI: +- case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_UQI: +- case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_UQI: +- case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_UQI: +- case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_UQI: +- nargs = 5; +- mask_pos = 1; +- nargs_constant = 1; +- break; +- case V64QI_FTYPE_V64QI_V64QI_INT_V64QI_UDI: +- case V32QI_FTYPE_V32QI_V32QI_INT_V32QI_USI: +- case V16QI_FTYPE_V16QI_V16QI_INT_V16QI_UHI: +- case V32HI_FTYPE_V32HI_V32HI_INT_V32HI_INT: +- case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_INT: +- case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_INT: +- case V16HI_FTYPE_V16HI_V16HI_INT_V16HI_INT: +- case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_INT: +- case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_INT: +- case V8HI_FTYPE_V8HI_V8HI_INT_V8HI_INT: +- case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_INT: +- case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_INT: +- nargs = 5; +- mask_pos = 1; +- nargs_constant = 2; +- break; +- +- default: +- gcc_unreachable (); +- } +- +- gcc_assert (nargs <= ARRAY_SIZE (args)); +- +- if (comparison != UNKNOWN) +- { +- gcc_assert (nargs == 2); +- return ix86_expand_sse_compare (d, exp, target, swap); +- } +- +- if (rmode == VOIDmode || rmode == tmode) +- { +- if (optimize +- || target == 0 +- || GET_MODE (target) != tmode +- || !insn_p->operand[0].predicate (target, tmode)) +- target = gen_reg_rtx (tmode); +- else if (memory_operand (target, tmode)) +- num_memory++; +- real_target = target; +- } +- else +- { +- real_target = gen_reg_rtx (tmode); +- target = lowpart_subreg (rmode, real_target, tmode); +- } +- +- for (i = 0; i < nargs; i++) +- { +- tree arg = CALL_EXPR_ARG (exp, i); +- rtx op = expand_normal (arg); +- machine_mode mode = insn_p->operand[i + 1].mode; +- bool match = insn_p->operand[i + 1].predicate (op, mode); +- +- if (second_arg_count && i == 1) +- { +- /* SIMD shift insns take either an 8-bit immediate or +- register as count. But builtin functions take int as +- count. If count doesn't match, we put it in register. +- The instructions are using 64-bit count, if op is just +- 32-bit, zero-extend it, as negative shift counts +- are undefined behavior and zero-extension is more +- efficient. */ +- if (!match) +- { +- if (SCALAR_INT_MODE_P (GET_MODE (op))) +- op = convert_modes (mode, GET_MODE (op), op, 1); +- else +- op = lowpart_subreg (mode, op, GET_MODE (op)); +- if (!insn_p->operand[i + 1].predicate (op, mode)) +- op = copy_to_reg (op); +- } +- } +- else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) || +- (!mask_pos && (nargs - i) <= nargs_constant)) +- { +- if (!match) +- switch (icode) +- { +- case CODE_FOR_avx_vinsertf128v4di: +- case CODE_FOR_avx_vextractf128v4di: +- error ("the last argument must be an 1-bit immediate"); +- return const0_rtx; +- +- case CODE_FOR_avx512f_cmpv8di3_mask: +- case CODE_FOR_avx512f_cmpv16si3_mask: +- case CODE_FOR_avx512f_ucmpv8di3_mask: +- case CODE_FOR_avx512f_ucmpv16si3_mask: +- case CODE_FOR_avx512vl_cmpv4di3_mask: +- case CODE_FOR_avx512vl_cmpv8si3_mask: +- case CODE_FOR_avx512vl_ucmpv4di3_mask: +- case CODE_FOR_avx512vl_ucmpv8si3_mask: +- case CODE_FOR_avx512vl_cmpv2di3_mask: +- case CODE_FOR_avx512vl_cmpv4si3_mask: +- case CODE_FOR_avx512vl_ucmpv2di3_mask: +- case CODE_FOR_avx512vl_ucmpv4si3_mask: +- error ("the last argument must be a 3-bit immediate"); +- return const0_rtx; +- +- case CODE_FOR_sse4_1_roundsd: +- case CODE_FOR_sse4_1_roundss: +- +- case CODE_FOR_sse4_1_roundpd: +- case CODE_FOR_sse4_1_roundps: +- case CODE_FOR_avx_roundpd256: +- case CODE_FOR_avx_roundps256: +- +- case CODE_FOR_sse4_1_roundpd_vec_pack_sfix: +- case CODE_FOR_sse4_1_roundps_sfix: +- case CODE_FOR_avx_roundpd_vec_pack_sfix256: +- case CODE_FOR_avx_roundps_sfix256: +- +- case CODE_FOR_sse4_1_blendps: +- case CODE_FOR_avx_blendpd256: +- case CODE_FOR_avx_vpermilv4df: +- case CODE_FOR_avx_vpermilv4df_mask: +- case CODE_FOR_avx512f_getmantv8df_mask: +- case CODE_FOR_avx512f_getmantv16sf_mask: +- case CODE_FOR_avx512vl_getmantv8sf_mask: +- case CODE_FOR_avx512vl_getmantv4df_mask: +- case CODE_FOR_avx512vl_getmantv4sf_mask: +- case CODE_FOR_avx512vl_getmantv2df_mask: +- case CODE_FOR_avx512dq_rangepv8df_mask_round: +- case CODE_FOR_avx512dq_rangepv16sf_mask_round: +- case CODE_FOR_avx512dq_rangepv4df_mask: +- case CODE_FOR_avx512dq_rangepv8sf_mask: +- case CODE_FOR_avx512dq_rangepv2df_mask: +- case CODE_FOR_avx512dq_rangepv4sf_mask: +- case CODE_FOR_avx_shufpd256_mask: +- error ("the last argument must be a 4-bit immediate"); +- return const0_rtx; +- +- case CODE_FOR_sha1rnds4: +- case CODE_FOR_sse4_1_blendpd: +- case CODE_FOR_avx_vpermilv2df: +- case CODE_FOR_avx_vpermilv2df_mask: +- case CODE_FOR_xop_vpermil2v2df3: +- case CODE_FOR_xop_vpermil2v4sf3: +- case CODE_FOR_xop_vpermil2v4df3: +- case CODE_FOR_xop_vpermil2v8sf3: +- case CODE_FOR_avx512f_vinsertf32x4_mask: +- case CODE_FOR_avx512f_vinserti32x4_mask: +- case CODE_FOR_avx512f_vextractf32x4_mask: +- case CODE_FOR_avx512f_vextracti32x4_mask: +- case CODE_FOR_sse2_shufpd: +- case CODE_FOR_sse2_shufpd_mask: +- case CODE_FOR_avx512dq_shuf_f64x2_mask: +- case CODE_FOR_avx512dq_shuf_i64x2_mask: +- case CODE_FOR_avx512vl_shuf_i32x4_mask: +- case CODE_FOR_avx512vl_shuf_f32x4_mask: +- error ("the last argument must be a 2-bit immediate"); +- return const0_rtx; +- +- case CODE_FOR_avx_vextractf128v4df: +- case CODE_FOR_avx_vextractf128v8sf: +- case CODE_FOR_avx_vextractf128v8si: +- case CODE_FOR_avx_vinsertf128v4df: +- case CODE_FOR_avx_vinsertf128v8sf: +- case CODE_FOR_avx_vinsertf128v8si: +- case CODE_FOR_avx512f_vinsertf64x4_mask: +- case CODE_FOR_avx512f_vinserti64x4_mask: +- case CODE_FOR_avx512f_vextractf64x4_mask: +- case CODE_FOR_avx512f_vextracti64x4_mask: +- case CODE_FOR_avx512dq_vinsertf32x8_mask: +- case CODE_FOR_avx512dq_vinserti32x8_mask: +- case CODE_FOR_avx512vl_vinsertv4df: +- case CODE_FOR_avx512vl_vinsertv4di: +- case CODE_FOR_avx512vl_vinsertv8sf: +- case CODE_FOR_avx512vl_vinsertv8si: +- error ("the last argument must be a 1-bit immediate"); +- return const0_rtx; +- +- case CODE_FOR_avx_vmcmpv2df3: +- case CODE_FOR_avx_vmcmpv4sf3: +- case CODE_FOR_avx_cmpv2df3: +- case CODE_FOR_avx_cmpv4sf3: +- case CODE_FOR_avx_cmpv4df3: +- case CODE_FOR_avx_cmpv8sf3: +- case CODE_FOR_avx512f_cmpv8df3_mask: +- case CODE_FOR_avx512f_cmpv16sf3_mask: +- case CODE_FOR_avx512f_vmcmpv2df3_mask: +- case CODE_FOR_avx512f_vmcmpv4sf3_mask: +- error ("the last argument must be a 5-bit immediate"); +- return const0_rtx; +- +- default: +- switch (nargs_constant) +- { +- case 2: +- if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) || +- (!mask_pos && (nargs - i) == nargs_constant)) +- { +- error ("the next to last argument must be an 8-bit immediate"); +- break; +- } +- /* FALLTHRU */ +- case 1: +- error ("the last argument must be an 8-bit immediate"); +- break; +- default: +- gcc_unreachable (); +- } +- return const0_rtx; +- } +- } +- else +- { +- if (VECTOR_MODE_P (mode)) +- op = safe_vector_operand (op, mode); +- +- /* If we aren't optimizing, only allow one memory operand to +- be generated. */ +- if (memory_operand (op, mode)) +- num_memory++; +- +- op = fixup_modeless_constant (op, mode); +- +- if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode) +- { +- if (optimize || !match || num_memory > 1) +- op = copy_to_mode_reg (mode, op); +- } +- else +- { +- op = copy_to_reg (op); +- op = lowpart_subreg (mode, op, GET_MODE (op)); +- } +- } +- +- args[i].op = op; +- args[i].mode = mode; +- } +- +- switch (nargs) +- { +- case 1: +- pat = GEN_FCN (icode) (real_target, args[0].op); +- break; +- case 2: +- pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op); +- break; +- case 3: +- pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op, +- args[2].op); +- break; +- case 4: +- pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op, +- args[2].op, args[3].op); +- break; +- case 5: +- pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op, +- args[2].op, args[3].op, args[4].op); +- break; +- case 6: +- pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op, +- args[2].op, args[3].op, args[4].op, +- args[5].op); +- break; +- default: +- gcc_unreachable (); +- } +- +- if (! pat) +- return 0; +- +- emit_insn (pat); +- return target; +-} +- +-/* Transform pattern of following layout: +- (set A +- (unspec [B C] UNSPEC_EMBEDDED_ROUNDING)) +- ) +- into: +- (set (A B)) */ +- +-static rtx +-ix86_erase_embedded_rounding (rtx pat) +-{ +- if (GET_CODE (pat) == INSN) +- pat = PATTERN (pat); +- +- gcc_assert (GET_CODE (pat) == SET); +- rtx src = SET_SRC (pat); +- gcc_assert (XVECLEN (src, 0) == 2); +- rtx p0 = XVECEXP (src, 0, 0); +- gcc_assert (GET_CODE (src) == UNSPEC +- && XINT (src, 1) == UNSPEC_EMBEDDED_ROUNDING); +- rtx res = gen_rtx_SET (SET_DEST (pat), p0); +- return res; +-} +- +-/* Subroutine of ix86_expand_round_builtin to take care of comi insns +- with rounding. */ +-static rtx +-ix86_expand_sse_comi_round (const struct builtin_description *d, +- tree exp, rtx target) +-{ +- rtx pat, set_dst; +- tree arg0 = CALL_EXPR_ARG (exp, 0); +- tree arg1 = CALL_EXPR_ARG (exp, 1); +- tree arg2 = CALL_EXPR_ARG (exp, 2); +- tree arg3 = CALL_EXPR_ARG (exp, 3); +- rtx op0 = expand_normal (arg0); +- rtx op1 = expand_normal (arg1); +- rtx op2 = expand_normal (arg2); +- rtx op3 = expand_normal (arg3); +- enum insn_code icode = d->icode; +- const struct insn_data_d *insn_p = &insn_data[icode]; +- machine_mode mode0 = insn_p->operand[0].mode; +- machine_mode mode1 = insn_p->operand[1].mode; +- enum rtx_code comparison = UNEQ; +- bool need_ucomi = false; +- +- /* See avxintrin.h for values. */ +- enum rtx_code comi_comparisons[32] = +- { +- UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT, +- UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE, +- UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT +- }; +- bool need_ucomi_values[32] = +- { +- true, false, false, true, true, false, false, true, +- true, false, false, true, true, false, false, true, +- false, true, true, false, false, true, true, false, +- false, true, true, false, false, true, true, false +- }; +- +- if (!CONST_INT_P (op2)) +- { +- error ("the third argument must be comparison constant"); +- return const0_rtx; +- } +- if (INTVAL (op2) < 0 || INTVAL (op2) >= 32) +- { +- error ("incorrect comparison mode"); +- return const0_rtx; +- } +- +- if (!insn_p->operand[2].predicate (op3, SImode)) +- { +- error ("incorrect rounding operand"); +- return const0_rtx; +- } +- +- comparison = comi_comparisons[INTVAL (op2)]; +- need_ucomi = need_ucomi_values[INTVAL (op2)]; +- +- if (VECTOR_MODE_P (mode0)) +- op0 = safe_vector_operand (op0, mode0); +- if (VECTOR_MODE_P (mode1)) +- op1 = safe_vector_operand (op1, mode1); +- +- target = gen_reg_rtx (SImode); +- emit_move_insn (target, const0_rtx); +- target = gen_rtx_SUBREG (QImode, target, 0); +- +- if ((optimize && !register_operand (op0, mode0)) +- || !insn_p->operand[0].predicate (op0, mode0)) +- op0 = copy_to_mode_reg (mode0, op0); +- if ((optimize && !register_operand (op1, mode1)) +- || !insn_p->operand[1].predicate (op1, mode1)) +- op1 = copy_to_mode_reg (mode1, op1); +- +- if (need_ucomi) +- icode = icode == CODE_FOR_sse_comi_round +- ? CODE_FOR_sse_ucomi_round +- : CODE_FOR_sse2_ucomi_round; +- +- pat = GEN_FCN (icode) (op0, op1, op3); +- if (! pat) +- return 0; +- +- /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */ +- if (INTVAL (op3) == NO_ROUND) +- { +- pat = ix86_erase_embedded_rounding (pat); +- if (! pat) +- return 0; +- +- set_dst = SET_DEST (pat); +- } +- else +- { +- gcc_assert (GET_CODE (pat) == SET); +- set_dst = SET_DEST (pat); +- } +- +- emit_insn (pat); +- emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target), +- gen_rtx_fmt_ee (comparison, QImode, +- set_dst, +- const0_rtx))); +- +- return SUBREG_REG (target); +-} +- +-static rtx +-ix86_expand_round_builtin (const struct builtin_description *d, +- tree exp, rtx target) +-{ +- rtx pat; +- unsigned int i, nargs; +- struct +- { +- rtx op; +- machine_mode mode; +- } args[6]; +- enum insn_code icode = d->icode; +- const struct insn_data_d *insn_p = &insn_data[icode]; +- machine_mode tmode = insn_p->operand[0].mode; +- unsigned int nargs_constant = 0; +- unsigned int redundant_embed_rnd = 0; +- +- switch ((enum ix86_builtin_func_type) d->flag) +- { +- case UINT64_FTYPE_V2DF_INT: +- case UINT64_FTYPE_V4SF_INT: +- case UINT_FTYPE_V2DF_INT: +- case UINT_FTYPE_V4SF_INT: +- case INT64_FTYPE_V2DF_INT: +- case INT64_FTYPE_V4SF_INT: +- case INT_FTYPE_V2DF_INT: +- case INT_FTYPE_V4SF_INT: +- nargs = 2; +- break; +- case V4SF_FTYPE_V4SF_UINT_INT: +- case V4SF_FTYPE_V4SF_UINT64_INT: +- case V2DF_FTYPE_V2DF_UINT64_INT: +- case V4SF_FTYPE_V4SF_INT_INT: +- case V4SF_FTYPE_V4SF_INT64_INT: +- case V2DF_FTYPE_V2DF_INT64_INT: +- case V4SF_FTYPE_V4SF_V4SF_INT: +- case V2DF_FTYPE_V2DF_V2DF_INT: +- case V4SF_FTYPE_V4SF_V2DF_INT: +- case V2DF_FTYPE_V2DF_V4SF_INT: +- nargs = 3; +- break; +- case V8SF_FTYPE_V8DF_V8SF_QI_INT: +- case V8DF_FTYPE_V8DF_V8DF_QI_INT: +- case V8SI_FTYPE_V8DF_V8SI_QI_INT: +- case V8DI_FTYPE_V8DF_V8DI_QI_INT: +- case V8SF_FTYPE_V8DI_V8SF_QI_INT: +- case V8DF_FTYPE_V8DI_V8DF_QI_INT: +- case V16SF_FTYPE_V16SF_V16SF_HI_INT: +- case V8DI_FTYPE_V8SF_V8DI_QI_INT: +- case V16SF_FTYPE_V16SI_V16SF_HI_INT: +- case V16SI_FTYPE_V16SF_V16SI_HI_INT: +- case V8DF_FTYPE_V8SF_V8DF_QI_INT: +- case V16SF_FTYPE_V16HI_V16SF_HI_INT: +- case V2DF_FTYPE_V2DF_V2DF_V2DF_INT: +- case V4SF_FTYPE_V4SF_V4SF_V4SF_INT: +- nargs = 4; +- break; +- case V4SF_FTYPE_V4SF_V4SF_INT_INT: +- case V2DF_FTYPE_V2DF_V2DF_INT_INT: +- nargs_constant = 2; +- nargs = 4; +- break; +- case INT_FTYPE_V4SF_V4SF_INT_INT: +- case INT_FTYPE_V2DF_V2DF_INT_INT: +- return ix86_expand_sse_comi_round (d, exp, target); +- case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT: +- case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT: +- case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT: +- case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT: +- case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT: +- case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT: +- case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT: +- case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT: +- nargs = 5; +- break; +- case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT: +- case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT: +- nargs_constant = 4; +- nargs = 5; +- break; +- case UQI_FTYPE_V8DF_V8DF_INT_UQI_INT: +- case UQI_FTYPE_V2DF_V2DF_INT_UQI_INT: +- case UHI_FTYPE_V16SF_V16SF_INT_UHI_INT: +- case UQI_FTYPE_V4SF_V4SF_INT_UQI_INT: +- nargs_constant = 3; +- nargs = 5; +- break; +- case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT: +- case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT: +- case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT: +- case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT: +- case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT: +- case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT: +- nargs = 6; +- nargs_constant = 4; +- break; +- case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT: +- case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT: +- case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT: +- case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT: +- nargs = 6; +- nargs_constant = 3; +- break; +- default: +- gcc_unreachable (); +- } +- gcc_assert (nargs <= ARRAY_SIZE (args)); +- +- if (optimize +- || target == 0 +- || GET_MODE (target) != tmode +- || !insn_p->operand[0].predicate (target, tmode)) +- target = gen_reg_rtx (tmode); +- +- for (i = 0; i < nargs; i++) +- { +- tree arg = CALL_EXPR_ARG (exp, i); +- rtx op = expand_normal (arg); +- machine_mode mode = insn_p->operand[i + 1].mode; +- bool match = insn_p->operand[i + 1].predicate (op, mode); +- +- if (i == nargs - nargs_constant) +- { +- if (!match) +- { +- switch (icode) +- { +- case CODE_FOR_avx512f_getmantv8df_mask_round: +- case CODE_FOR_avx512f_getmantv16sf_mask_round: +- case CODE_FOR_avx512f_vgetmantv2df_round: +- case CODE_FOR_avx512f_vgetmantv2df_mask_round: +- case CODE_FOR_avx512f_vgetmantv4sf_round: +- case CODE_FOR_avx512f_vgetmantv4sf_mask_round: +- error ("the immediate argument must be a 4-bit immediate"); +- return const0_rtx; +- case CODE_FOR_avx512f_cmpv8df3_mask_round: +- case CODE_FOR_avx512f_cmpv16sf3_mask_round: +- case CODE_FOR_avx512f_vmcmpv2df3_mask_round: +- case CODE_FOR_avx512f_vmcmpv4sf3_mask_round: +- error ("the immediate argument must be a 5-bit immediate"); +- return const0_rtx; +- default: +- error ("the immediate argument must be an 8-bit immediate"); +- return const0_rtx; +- } +- } +- } +- else if (i == nargs-1) +- { +- if (!insn_p->operand[nargs].predicate (op, SImode)) +- { +- error ("incorrect rounding operand"); +- return const0_rtx; +- } +- +- /* If there is no rounding use normal version of the pattern. */ +- if (INTVAL (op) == NO_ROUND) +- redundant_embed_rnd = 1; +- } +- else +- { +- if (VECTOR_MODE_P (mode)) +- op = safe_vector_operand (op, mode); +- +- op = fixup_modeless_constant (op, mode); +- +- if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode) +- { +- if (optimize || !match) +- op = copy_to_mode_reg (mode, op); +- } +- else +- { +- op = copy_to_reg (op); +- op = lowpart_subreg (mode, op, GET_MODE (op)); +- } +- } +- +- args[i].op = op; +- args[i].mode = mode; +- } +- +- switch (nargs) +- { +- case 1: +- pat = GEN_FCN (icode) (target, args[0].op); +- break; +- case 2: +- pat = GEN_FCN (icode) (target, args[0].op, args[1].op); +- break; +- case 3: +- pat = GEN_FCN (icode) (target, args[0].op, args[1].op, +- args[2].op); +- break; +- case 4: +- pat = GEN_FCN (icode) (target, args[0].op, args[1].op, +- args[2].op, args[3].op); +- break; +- case 5: +- pat = GEN_FCN (icode) (target, args[0].op, args[1].op, +- args[2].op, args[3].op, args[4].op); +- break; +- case 6: +- pat = GEN_FCN (icode) (target, args[0].op, args[1].op, +- args[2].op, args[3].op, args[4].op, +- args[5].op); +- break; +- default: +- gcc_unreachable (); +- } +- +- if (!pat) +- return 0; +- +- if (redundant_embed_rnd) +- pat = ix86_erase_embedded_rounding (pat); +- +- emit_insn (pat); +- return target; +-} +- +-/* Subroutine of ix86_expand_builtin to take care of special insns +- with variable number of operands. */ +- +-static rtx +-ix86_expand_special_args_builtin (const struct builtin_description *d, +- tree exp, rtx target) +-{ +- tree arg; +- rtx pat, op; +- unsigned int i, nargs, arg_adjust, memory; +- bool aligned_mem = false; +- struct +- { +- rtx op; +- machine_mode mode; +- } args[3]; +- enum insn_code icode = d->icode; +- bool last_arg_constant = false; +- const struct insn_data_d *insn_p = &insn_data[icode]; +- machine_mode tmode = insn_p->operand[0].mode; +- enum { load, store } klass; +- +- switch ((enum ix86_builtin_func_type) d->flag) +- { +- case VOID_FTYPE_VOID: +- emit_insn (GEN_FCN (icode) (target)); +- return 0; +- case VOID_FTYPE_UINT64: +- case VOID_FTYPE_UNSIGNED: +- nargs = 0; +- klass = store; +- memory = 0; +- break; +- +- case INT_FTYPE_VOID: +- case USHORT_FTYPE_VOID: +- case UINT64_FTYPE_VOID: +- case UINT_FTYPE_VOID: +- case UNSIGNED_FTYPE_VOID: +- nargs = 0; +- klass = load; +- memory = 0; +- break; +- case UINT64_FTYPE_PUNSIGNED: +- case V2DI_FTYPE_PV2DI: +- case V4DI_FTYPE_PV4DI: +- case V32QI_FTYPE_PCCHAR: +- case V16QI_FTYPE_PCCHAR: +- case V8SF_FTYPE_PCV4SF: +- case V8SF_FTYPE_PCFLOAT: +- case V4SF_FTYPE_PCFLOAT: +- case V4DF_FTYPE_PCV2DF: +- case V4DF_FTYPE_PCDOUBLE: +- case V2DF_FTYPE_PCDOUBLE: +- case VOID_FTYPE_PVOID: +- case V8DI_FTYPE_PV8DI: +- nargs = 1; +- klass = load; +- memory = 0; +- switch (icode) +- { +- case CODE_FOR_sse4_1_movntdqa: +- case CODE_FOR_avx2_movntdqa: +- case CODE_FOR_avx512f_movntdqa: +- aligned_mem = true; +- break; +- default: +- break; +- } +- break; +- case VOID_FTYPE_PV2SF_V4SF: +- case VOID_FTYPE_PV8DI_V8DI: +- case VOID_FTYPE_PV4DI_V4DI: +- case VOID_FTYPE_PV2DI_V2DI: +- case VOID_FTYPE_PCHAR_V32QI: +- case VOID_FTYPE_PCHAR_V16QI: +- case VOID_FTYPE_PFLOAT_V16SF: +- case VOID_FTYPE_PFLOAT_V8SF: +- case VOID_FTYPE_PFLOAT_V4SF: +- case VOID_FTYPE_PDOUBLE_V8DF: +- case VOID_FTYPE_PDOUBLE_V4DF: +- case VOID_FTYPE_PDOUBLE_V2DF: +- case VOID_FTYPE_PLONGLONG_LONGLONG: +- case VOID_FTYPE_PULONGLONG_ULONGLONG: +- case VOID_FTYPE_PUNSIGNED_UNSIGNED: +- case VOID_FTYPE_PINT_INT: +- nargs = 1; +- klass = store; +- /* Reserve memory operand for target. */ +- memory = ARRAY_SIZE (args); +- switch (icode) +- { +- /* These builtins and instructions require the memory +- to be properly aligned. */ +- case CODE_FOR_avx_movntv4di: +- case CODE_FOR_sse2_movntv2di: +- case CODE_FOR_avx_movntv8sf: +- case CODE_FOR_sse_movntv4sf: +- case CODE_FOR_sse4a_vmmovntv4sf: +- case CODE_FOR_avx_movntv4df: +- case CODE_FOR_sse2_movntv2df: +- case CODE_FOR_sse4a_vmmovntv2df: +- case CODE_FOR_sse2_movntidi: +- case CODE_FOR_sse_movntq: +- case CODE_FOR_sse2_movntisi: +- case CODE_FOR_avx512f_movntv16sf: +- case CODE_FOR_avx512f_movntv8df: +- case CODE_FOR_avx512f_movntv8di: +- aligned_mem = true; +- break; +- default: +- break; +- } +- break; +- case VOID_FTYPE_PVOID_PCVOID: +- nargs = 1; +- klass = store; +- memory = 0; +- +- break; +- case V4SF_FTYPE_V4SF_PCV2SF: +- case V2DF_FTYPE_V2DF_PCDOUBLE: +- nargs = 2; +- klass = load; +- memory = 1; +- break; +- case V8SF_FTYPE_PCV8SF_V8SI: +- case V4DF_FTYPE_PCV4DF_V4DI: +- case V4SF_FTYPE_PCV4SF_V4SI: +- case V2DF_FTYPE_PCV2DF_V2DI: +- case V8SI_FTYPE_PCV8SI_V8SI: +- case V4DI_FTYPE_PCV4DI_V4DI: +- case V4SI_FTYPE_PCV4SI_V4SI: +- case V2DI_FTYPE_PCV2DI_V2DI: +- case VOID_FTYPE_INT_INT64: +- nargs = 2; +- klass = load; +- memory = 0; +- break; +- case VOID_FTYPE_PV8DF_V8DF_UQI: +- case VOID_FTYPE_PV4DF_V4DF_UQI: +- case VOID_FTYPE_PV2DF_V2DF_UQI: +- case VOID_FTYPE_PV16SF_V16SF_UHI: +- case VOID_FTYPE_PV8SF_V8SF_UQI: +- case VOID_FTYPE_PV4SF_V4SF_UQI: +- case VOID_FTYPE_PV8DI_V8DI_UQI: +- case VOID_FTYPE_PV4DI_V4DI_UQI: +- case VOID_FTYPE_PV2DI_V2DI_UQI: +- case VOID_FTYPE_PV16SI_V16SI_UHI: +- case VOID_FTYPE_PV8SI_V8SI_UQI: +- case VOID_FTYPE_PV4SI_V4SI_UQI: +- case VOID_FTYPE_PV64QI_V64QI_UDI: +- case VOID_FTYPE_PV32HI_V32HI_USI: +- case VOID_FTYPE_PV32QI_V32QI_USI: +- case VOID_FTYPE_PV16QI_V16QI_UHI: +- case VOID_FTYPE_PV16HI_V16HI_UHI: +- case VOID_FTYPE_PV8HI_V8HI_UQI: +- switch (icode) +- { +- /* These builtins and instructions require the memory +- to be properly aligned. */ +- case CODE_FOR_avx512f_storev16sf_mask: +- case CODE_FOR_avx512f_storev16si_mask: +- case CODE_FOR_avx512f_storev8df_mask: +- case CODE_FOR_avx512f_storev8di_mask: +- case CODE_FOR_avx512vl_storev8sf_mask: +- case CODE_FOR_avx512vl_storev8si_mask: +- case CODE_FOR_avx512vl_storev4df_mask: +- case CODE_FOR_avx512vl_storev4di_mask: +- case CODE_FOR_avx512vl_storev4sf_mask: +- case CODE_FOR_avx512vl_storev4si_mask: +- case CODE_FOR_avx512vl_storev2df_mask: +- case CODE_FOR_avx512vl_storev2di_mask: +- aligned_mem = true; +- break; +- default: +- break; +- } +- /* FALLTHRU */ +- case VOID_FTYPE_PV8SF_V8SI_V8SF: +- case VOID_FTYPE_PV4DF_V4DI_V4DF: +- case VOID_FTYPE_PV4SF_V4SI_V4SF: +- case VOID_FTYPE_PV2DF_V2DI_V2DF: +- case VOID_FTYPE_PV8SI_V8SI_V8SI: +- case VOID_FTYPE_PV4DI_V4DI_V4DI: +- case VOID_FTYPE_PV4SI_V4SI_V4SI: +- case VOID_FTYPE_PV2DI_V2DI_V2DI: +- case VOID_FTYPE_PV8SI_V8DI_UQI: +- case VOID_FTYPE_PV8HI_V8DI_UQI: +- case VOID_FTYPE_PV16HI_V16SI_UHI: +- case VOID_FTYPE_PV16QI_V8DI_UQI: +- case VOID_FTYPE_PV16QI_V16SI_UHI: +- case VOID_FTYPE_PV4SI_V4DI_UQI: +- case VOID_FTYPE_PV4SI_V2DI_UQI: +- case VOID_FTYPE_PV8HI_V4DI_UQI: +- case VOID_FTYPE_PV8HI_V2DI_UQI: +- case VOID_FTYPE_PV8HI_V8SI_UQI: +- case VOID_FTYPE_PV8HI_V4SI_UQI: +- case VOID_FTYPE_PV16QI_V4DI_UQI: +- case VOID_FTYPE_PV16QI_V2DI_UQI: +- case VOID_FTYPE_PV16QI_V8SI_UQI: +- case VOID_FTYPE_PV16QI_V4SI_UQI: +- case VOID_FTYPE_PCHAR_V64QI_UDI: +- case VOID_FTYPE_PCHAR_V32QI_USI: +- case VOID_FTYPE_PCHAR_V16QI_UHI: +- case VOID_FTYPE_PSHORT_V32HI_USI: +- case VOID_FTYPE_PSHORT_V16HI_UHI: +- case VOID_FTYPE_PSHORT_V8HI_UQI: +- case VOID_FTYPE_PINT_V16SI_UHI: +- case VOID_FTYPE_PINT_V8SI_UQI: +- case VOID_FTYPE_PINT_V4SI_UQI: +- case VOID_FTYPE_PINT64_V8DI_UQI: +- case VOID_FTYPE_PINT64_V4DI_UQI: +- case VOID_FTYPE_PINT64_V2DI_UQI: +- case VOID_FTYPE_PDOUBLE_V8DF_UQI: +- case VOID_FTYPE_PDOUBLE_V4DF_UQI: +- case VOID_FTYPE_PDOUBLE_V2DF_UQI: +- case VOID_FTYPE_PFLOAT_V16SF_UHI: +- case VOID_FTYPE_PFLOAT_V8SF_UQI: +- case VOID_FTYPE_PFLOAT_V4SF_UQI: +- case VOID_FTYPE_PV32QI_V32HI_USI: +- case VOID_FTYPE_PV16QI_V16HI_UHI: +- case VOID_FTYPE_PV8QI_V8HI_UQI: +- nargs = 2; +- klass = store; +- /* Reserve memory operand for target. */ +- memory = ARRAY_SIZE (args); +- break; +- case V4SF_FTYPE_PCV4SF_V4SF_UQI: +- case V8SF_FTYPE_PCV8SF_V8SF_UQI: +- case V16SF_FTYPE_PCV16SF_V16SF_UHI: +- case V4SI_FTYPE_PCV4SI_V4SI_UQI: +- case V8SI_FTYPE_PCV8SI_V8SI_UQI: +- case V16SI_FTYPE_PCV16SI_V16SI_UHI: +- case V2DF_FTYPE_PCV2DF_V2DF_UQI: +- case V4DF_FTYPE_PCV4DF_V4DF_UQI: +- case V8DF_FTYPE_PCV8DF_V8DF_UQI: +- case V2DI_FTYPE_PCV2DI_V2DI_UQI: +- case V4DI_FTYPE_PCV4DI_V4DI_UQI: +- case V8DI_FTYPE_PCV8DI_V8DI_UQI: +- case V64QI_FTYPE_PCV64QI_V64QI_UDI: +- case V32HI_FTYPE_PCV32HI_V32HI_USI: +- case V32QI_FTYPE_PCV32QI_V32QI_USI: +- case V16QI_FTYPE_PCV16QI_V16QI_UHI: +- case V16HI_FTYPE_PCV16HI_V16HI_UHI: +- case V8HI_FTYPE_PCV8HI_V8HI_UQI: +- switch (icode) +- { +- /* These builtins and instructions require the memory +- to be properly aligned. */ +- case CODE_FOR_avx512f_loadv16sf_mask: +- case CODE_FOR_avx512f_loadv16si_mask: +- case CODE_FOR_avx512f_loadv8df_mask: +- case CODE_FOR_avx512f_loadv8di_mask: +- case CODE_FOR_avx512vl_loadv8sf_mask: +- case CODE_FOR_avx512vl_loadv8si_mask: +- case CODE_FOR_avx512vl_loadv4df_mask: +- case CODE_FOR_avx512vl_loadv4di_mask: +- case CODE_FOR_avx512vl_loadv4sf_mask: +- case CODE_FOR_avx512vl_loadv4si_mask: +- case CODE_FOR_avx512vl_loadv2df_mask: +- case CODE_FOR_avx512vl_loadv2di_mask: +- case CODE_FOR_avx512bw_loadv64qi_mask: +- case CODE_FOR_avx512vl_loadv32qi_mask: +- case CODE_FOR_avx512vl_loadv16qi_mask: +- case CODE_FOR_avx512bw_loadv32hi_mask: +- case CODE_FOR_avx512vl_loadv16hi_mask: +- case CODE_FOR_avx512vl_loadv8hi_mask: +- aligned_mem = true; +- break; +- default: +- break; +- } +- /* FALLTHRU */ +- case V64QI_FTYPE_PCCHAR_V64QI_UDI: +- case V32QI_FTYPE_PCCHAR_V32QI_USI: +- case V16QI_FTYPE_PCCHAR_V16QI_UHI: +- case V32HI_FTYPE_PCSHORT_V32HI_USI: +- case V16HI_FTYPE_PCSHORT_V16HI_UHI: +- case V8HI_FTYPE_PCSHORT_V8HI_UQI: +- case V16SI_FTYPE_PCINT_V16SI_UHI: +- case V8SI_FTYPE_PCINT_V8SI_UQI: +- case V4SI_FTYPE_PCINT_V4SI_UQI: +- case V8DI_FTYPE_PCINT64_V8DI_UQI: +- case V4DI_FTYPE_PCINT64_V4DI_UQI: +- case V2DI_FTYPE_PCINT64_V2DI_UQI: +- case V8DF_FTYPE_PCDOUBLE_V8DF_UQI: +- case V4DF_FTYPE_PCDOUBLE_V4DF_UQI: +- case V2DF_FTYPE_PCDOUBLE_V2DF_UQI: +- case V16SF_FTYPE_PCFLOAT_V16SF_UHI: +- case V8SF_FTYPE_PCFLOAT_V8SF_UQI: +- case V4SF_FTYPE_PCFLOAT_V4SF_UQI: +- nargs = 3; +- klass = load; +- memory = 0; +- break; +- case VOID_FTYPE_UINT_UINT_UINT: +- case VOID_FTYPE_UINT64_UINT_UINT: +- case UCHAR_FTYPE_UINT_UINT_UINT: +- case UCHAR_FTYPE_UINT64_UINT_UINT: +- nargs = 3; +- klass = load; +- memory = ARRAY_SIZE (args); +- last_arg_constant = true; +- break; +- default: +- gcc_unreachable (); +- } +- +- gcc_assert (nargs <= ARRAY_SIZE (args)); +- +- if (klass == store) +- { +- arg = CALL_EXPR_ARG (exp, 0); +- op = expand_normal (arg); +- gcc_assert (target == 0); +- if (memory) +- { +- op = ix86_zero_extend_to_Pmode (op); +- target = gen_rtx_MEM (tmode, op); +- /* target at this point has just BITS_PER_UNIT MEM_ALIGN +- on it. Try to improve it using get_pointer_alignment, +- and if the special builtin is one that requires strict +- mode alignment, also from it's GET_MODE_ALIGNMENT. +- Failure to do so could lead to ix86_legitimate_combined_insn +- rejecting all changes to such insns. */ +- unsigned int align = get_pointer_alignment (arg); +- if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode)) +- align = GET_MODE_ALIGNMENT (tmode); +- if (MEM_ALIGN (target) < align) +- set_mem_align (target, align); +- } +- else +- target = force_reg (tmode, op); +- arg_adjust = 1; +- } +- else +- { +- arg_adjust = 0; +- if (optimize +- || target == 0 +- || !register_operand (target, tmode) +- || GET_MODE (target) != tmode) +- target = gen_reg_rtx (tmode); +- } +- +- for (i = 0; i < nargs; i++) +- { +- machine_mode mode = insn_p->operand[i + 1].mode; +- bool match; +- +- arg = CALL_EXPR_ARG (exp, i + arg_adjust); +- op = expand_normal (arg); +- match = insn_p->operand[i + 1].predicate (op, mode); +- +- if (last_arg_constant && (i + 1) == nargs) +- { +- if (!match) +- { +- if (icode == CODE_FOR_lwp_lwpvalsi3 +- || icode == CODE_FOR_lwp_lwpinssi3 +- || icode == CODE_FOR_lwp_lwpvaldi3 +- || icode == CODE_FOR_lwp_lwpinsdi3) +- error ("the last argument must be a 32-bit immediate"); +- else +- error ("the last argument must be an 8-bit immediate"); +- return const0_rtx; +- } +- } +- else +- { +- if (i == memory) +- { +- /* This must be the memory operand. */ +- op = ix86_zero_extend_to_Pmode (op); +- op = gen_rtx_MEM (mode, op); +- /* op at this point has just BITS_PER_UNIT MEM_ALIGN +- on it. Try to improve it using get_pointer_alignment, +- and if the special builtin is one that requires strict +- mode alignment, also from it's GET_MODE_ALIGNMENT. +- Failure to do so could lead to ix86_legitimate_combined_insn +- rejecting all changes to such insns. */ +- unsigned int align = get_pointer_alignment (arg); +- if (aligned_mem && align < GET_MODE_ALIGNMENT (mode)) +- align = GET_MODE_ALIGNMENT (mode); +- if (MEM_ALIGN (op) < align) +- set_mem_align (op, align); +- } +- else +- { +- /* This must be register. */ +- if (VECTOR_MODE_P (mode)) +- op = safe_vector_operand (op, mode); +- +- op = fixup_modeless_constant (op, mode); +- +- if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode) +- op = copy_to_mode_reg (mode, op); +- else +- { +- op = copy_to_reg (op); +- op = lowpart_subreg (mode, op, GET_MODE (op)); +- } +- } +- } +- +- args[i].op = op; +- args[i].mode = mode; +- } +- +- switch (nargs) +- { +- case 0: +- pat = GEN_FCN (icode) (target); +- break; +- case 1: +- pat = GEN_FCN (icode) (target, args[0].op); +- break; +- case 2: +- pat = GEN_FCN (icode) (target, args[0].op, args[1].op); +- break; +- case 3: +- pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op); +- break; +- default: +- gcc_unreachable (); +- } +- +- if (! pat) +- return 0; +- emit_insn (pat); +- return klass == store ? 0 : target; +-} +- +-/* Return the integer constant in ARG. Constrain it to be in the range +- of the subparts of VEC_TYPE; issue an error if not. */ +- +-static int +-get_element_number (tree vec_type, tree arg) +-{ +- unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1; +- +- if (!tree_fits_uhwi_p (arg) +- || (elt = tree_to_uhwi (arg), elt > max)) +- { +- error ("selector must be an integer constant in the range 0..%wi", max); +- return 0; +- } +- +- return elt; +-} +- +-/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around +- ix86_expand_vector_init. We DO have language-level syntax for this, in +- the form of (type){ init-list }. Except that since we can't place emms +- instructions from inside the compiler, we can't allow the use of MMX +- registers unless the user explicitly asks for it. So we do *not* define +- vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead +- we have builtins invoked by mmintrin.h that gives us license to emit +- these sorts of instructions. */ +- +-static rtx +-ix86_expand_vec_init_builtin (tree type, tree exp, rtx target) +-{ +- machine_mode tmode = TYPE_MODE (type); +- machine_mode inner_mode = GET_MODE_INNER (tmode); +- int i, n_elt = GET_MODE_NUNITS (tmode); +- rtvec v = rtvec_alloc (n_elt); +- +- gcc_assert (VECTOR_MODE_P (tmode)); +- gcc_assert (call_expr_nargs (exp) == n_elt); +- +- for (i = 0; i < n_elt; ++i) +- { +- rtx x = expand_normal (CALL_EXPR_ARG (exp, i)); +- RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x); +- } +- +- if (!target || !register_operand (target, tmode)) +- target = gen_reg_rtx (tmode); +- +- ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v)); +- return target; +-} +- +-/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around +- ix86_expand_vector_extract. They would be redundant (for non-MMX) if we +- had a language-level syntax for referencing vector elements. */ +- +-static rtx +-ix86_expand_vec_ext_builtin (tree exp, rtx target) +-{ +- machine_mode tmode, mode0; +- tree arg0, arg1; +- int elt; +- rtx op0; +- +- arg0 = CALL_EXPR_ARG (exp, 0); +- arg1 = CALL_EXPR_ARG (exp, 1); +- +- op0 = expand_normal (arg0); +- elt = get_element_number (TREE_TYPE (arg0), arg1); +- +- tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0))); +- mode0 = TYPE_MODE (TREE_TYPE (arg0)); +- gcc_assert (VECTOR_MODE_P (mode0)); +- +- op0 = force_reg (mode0, op0); +- +- if (optimize || !target || !register_operand (target, tmode)) +- target = gen_reg_rtx (tmode); +- +- ix86_expand_vector_extract (true, target, op0, elt); +- +- return target; +-} +- +-/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around +- ix86_expand_vector_set. They would be redundant (for non-MMX) if we had +- a language-level syntax for referencing vector elements. */ +- +-static rtx +-ix86_expand_vec_set_builtin (tree exp) +-{ +- machine_mode tmode, mode1; +- tree arg0, arg1, arg2; +- int elt; +- rtx op0, op1, target; +- +- arg0 = CALL_EXPR_ARG (exp, 0); +- arg1 = CALL_EXPR_ARG (exp, 1); +- arg2 = CALL_EXPR_ARG (exp, 2); +- +- tmode = TYPE_MODE (TREE_TYPE (arg0)); +- mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0))); +- gcc_assert (VECTOR_MODE_P (tmode)); +- +- op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL); +- op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL); +- elt = get_element_number (TREE_TYPE (arg0), arg2); +- +- if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode) +- op1 = convert_modes (mode1, GET_MODE (op1), op1, true); +- +- op0 = force_reg (tmode, op0); +- op1 = force_reg (mode1, op1); +- +- /* OP0 is the source of these builtin functions and shouldn't be +- modified. Create a copy, use it and return it as target. */ +- target = gen_reg_rtx (tmode); +- emit_move_insn (target, op0); +- ix86_expand_vector_set (true, target, op1, elt); +- +- return target; +-} +- +-/* Expand an expression EXP that calls a built-in function, +- with result going to TARGET if that's convenient +- (and in mode MODE if that's convenient). +- SUBTARGET may be used as the target for computing one of EXP's operands. +- IGNORE is nonzero if the value is to be ignored. */ +- +-static rtx +-ix86_expand_builtin (tree exp, rtx target, rtx subtarget, +- machine_mode mode, int ignore) +-{ +- size_t i; +- enum insn_code icode, icode2; +- tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); +- tree arg0, arg1, arg2, arg3, arg4; +- rtx op0, op1, op2, op3, op4, pat, pat2, insn; +- machine_mode mode0, mode1, mode2, mode3, mode4; +- unsigned int fcode = DECL_FUNCTION_CODE (fndecl); +- +- /* For CPU builtins that can be folded, fold first and expand the fold. */ +- switch (fcode) +- { +- case IX86_BUILTIN_CPU_INIT: +- { +- /* Make it call __cpu_indicator_init in libgcc. */ +- tree call_expr, fndecl, type; +- type = build_function_type_list (integer_type_node, NULL_TREE); +- fndecl = build_fn_decl ("__cpu_indicator_init", type); +- call_expr = build_call_expr (fndecl, 0); +- return expand_expr (call_expr, target, mode, EXPAND_NORMAL); +- } +- case IX86_BUILTIN_CPU_IS: +- case IX86_BUILTIN_CPU_SUPPORTS: +- { +- tree arg0 = CALL_EXPR_ARG (exp, 0); +- tree fold_expr = fold_builtin_cpu (fndecl, &arg0); +- gcc_assert (fold_expr != NULL_TREE); +- return expand_expr (fold_expr, target, mode, EXPAND_NORMAL); +- } +- } +- +- HOST_WIDE_INT isa = ix86_isa_flags; +- HOST_WIDE_INT isa2 = ix86_isa_flags2; +- HOST_WIDE_INT bisa = ix86_builtins_isa[fcode].isa; +- HOST_WIDE_INT bisa2 = ix86_builtins_isa[fcode].isa2; +- /* The general case is we require all the ISAs specified in bisa{,2} +- to be enabled. +- The exceptions are: +- OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A +- OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 +- OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4 +- where for each this pair it is sufficient if either of the ISAs is +- enabled, plus if it is ored with other options also those others. */ +- if (((bisa & (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A)) +- == (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A)) +- && (isa & (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A)) != 0) +- isa |= (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A); +- if (((bisa & (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32)) +- == (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32)) +- && (isa & (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32)) != 0) +- isa |= (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32); +- if (((bisa & (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4)) +- == (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4)) +- && (isa & (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4)) != 0) +- isa |= (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4); +- if ((bisa & isa) != bisa || (bisa2 & isa2) != bisa2) +- { +- bool add_abi_p = bisa & OPTION_MASK_ISA_64BIT; +- if (TARGET_ABI_X32) +- bisa |= OPTION_MASK_ABI_X32; +- else +- bisa |= OPTION_MASK_ABI_64; +- char *opts = ix86_target_string (bisa, bisa2, 0, 0, NULL, NULL, +- (enum fpmath_unit) 0, false, add_abi_p); +- if (!opts) +- error ("%qE needs unknown isa option", fndecl); +- else +- { +- gcc_assert (opts != NULL); +- error ("%qE needs isa option %s", fndecl, opts); +- free (opts); +- } +- return expand_call (exp, target, ignore); +- } +- +- switch (fcode) +- { +- case IX86_BUILTIN_MASKMOVQ: +- case IX86_BUILTIN_MASKMOVDQU: +- icode = (fcode == IX86_BUILTIN_MASKMOVQ +- ? CODE_FOR_mmx_maskmovq +- : CODE_FOR_sse2_maskmovdqu); +- /* Note the arg order is different from the operand order. */ +- arg1 = CALL_EXPR_ARG (exp, 0); +- arg2 = CALL_EXPR_ARG (exp, 1); +- arg0 = CALL_EXPR_ARG (exp, 2); +- op0 = expand_normal (arg0); +- op1 = expand_normal (arg1); +- op2 = expand_normal (arg2); +- mode0 = insn_data[icode].operand[0].mode; +- mode1 = insn_data[icode].operand[1].mode; +- mode2 = insn_data[icode].operand[2].mode; +- +- op0 = ix86_zero_extend_to_Pmode (op0); +- op0 = gen_rtx_MEM (mode1, op0); +- +- if (!insn_data[icode].operand[0].predicate (op0, mode0)) +- op0 = copy_to_mode_reg (mode0, op0); +- if (!insn_data[icode].operand[1].predicate (op1, mode1)) +- op1 = copy_to_mode_reg (mode1, op1); +- if (!insn_data[icode].operand[2].predicate (op2, mode2)) +- op2 = copy_to_mode_reg (mode2, op2); +- pat = GEN_FCN (icode) (op0, op1, op2); +- if (! pat) +- return 0; +- emit_insn (pat); +- return 0; +- +- case IX86_BUILTIN_LDMXCSR: +- op0 = expand_normal (CALL_EXPR_ARG (exp, 0)); +- target = assign_386_stack_local (SImode, SLOT_TEMP); +- emit_move_insn (target, op0); +- emit_insn (gen_sse_ldmxcsr (target)); +- return 0; +- +- case IX86_BUILTIN_STMXCSR: +- target = assign_386_stack_local (SImode, SLOT_TEMP); +- emit_insn (gen_sse_stmxcsr (target)); +- return copy_to_mode_reg (SImode, target); +- +- case IX86_BUILTIN_CLFLUSH: +- arg0 = CALL_EXPR_ARG (exp, 0); +- op0 = expand_normal (arg0); +- icode = CODE_FOR_sse2_clflush; +- if (!insn_data[icode].operand[0].predicate (op0, Pmode)) +- op0 = ix86_zero_extend_to_Pmode (op0); +- +- emit_insn (gen_sse2_clflush (op0)); +- return 0; +- +- case IX86_BUILTIN_CLWB: +- arg0 = CALL_EXPR_ARG (exp, 0); +- op0 = expand_normal (arg0); +- icode = CODE_FOR_clwb; +- if (!insn_data[icode].operand[0].predicate (op0, Pmode)) +- op0 = ix86_zero_extend_to_Pmode (op0); +- +- emit_insn (gen_clwb (op0)); +- return 0; +- +- case IX86_BUILTIN_CLFLUSHOPT: +- arg0 = CALL_EXPR_ARG (exp, 0); +- op0 = expand_normal (arg0); +- icode = CODE_FOR_clflushopt; +- if (!insn_data[icode].operand[0].predicate (op0, Pmode)) +- op0 = ix86_zero_extend_to_Pmode (op0); +- +- emit_insn (gen_clflushopt (op0)); +- return 0; +- +- case IX86_BUILTIN_MONITOR: +- case IX86_BUILTIN_MONITORX: +- arg0 = CALL_EXPR_ARG (exp, 0); +- arg1 = CALL_EXPR_ARG (exp, 1); +- arg2 = CALL_EXPR_ARG (exp, 2); +- op0 = expand_normal (arg0); +- op1 = expand_normal (arg1); +- op2 = expand_normal (arg2); +- if (!REG_P (op0)) +- op0 = ix86_zero_extend_to_Pmode (op0); +- if (!REG_P (op1)) +- op1 = copy_to_mode_reg (SImode, op1); +- if (!REG_P (op2)) +- op2 = copy_to_mode_reg (SImode, op2); +- +- emit_insn (fcode == IX86_BUILTIN_MONITOR +- ? ix86_gen_monitor (op0, op1, op2) +- : ix86_gen_monitorx (op0, op1, op2)); +- return 0; +- +- case IX86_BUILTIN_MWAIT: +- arg0 = CALL_EXPR_ARG (exp, 0); +- arg1 = CALL_EXPR_ARG (exp, 1); +- op0 = expand_normal (arg0); +- op1 = expand_normal (arg1); +- if (!REG_P (op0)) +- op0 = copy_to_mode_reg (SImode, op0); +- if (!REG_P (op1)) +- op1 = copy_to_mode_reg (SImode, op1); +- emit_insn (gen_sse3_mwait (op0, op1)); +- return 0; +- +- case IX86_BUILTIN_MWAITX: +- arg0 = CALL_EXPR_ARG (exp, 0); +- arg1 = CALL_EXPR_ARG (exp, 1); +- arg2 = CALL_EXPR_ARG (exp, 2); +- op0 = expand_normal (arg0); +- op1 = expand_normal (arg1); +- op2 = expand_normal (arg2); +- if (!REG_P (op0)) +- op0 = copy_to_mode_reg (SImode, op0); +- if (!REG_P (op1)) +- op1 = copy_to_mode_reg (SImode, op1); +- if (!REG_P (op2)) +- op2 = copy_to_mode_reg (SImode, op2); +- emit_insn (gen_mwaitx (op0, op1, op2)); +- return 0; +- +- case IX86_BUILTIN_UMONITOR: +- arg0 = CALL_EXPR_ARG (exp, 0); +- op0 = expand_normal (arg0); +- +- op0 = ix86_zero_extend_to_Pmode (op0); +- +- insn = (TARGET_64BIT +- ? gen_umonitor_di (op0) +- : gen_umonitor_si (op0)); +- +- emit_insn (insn); +- return 0; +- +- case IX86_BUILTIN_UMWAIT: +- case IX86_BUILTIN_TPAUSE: +- arg0 = CALL_EXPR_ARG (exp, 0); +- arg1 = CALL_EXPR_ARG (exp, 1); +- op0 = expand_normal (arg0); +- op1 = expand_normal (arg1); +- +- if (!REG_P (op0)) +- op0 = copy_to_mode_reg (SImode, op0); +- +- op1 = force_reg (DImode, op1); +- +- if (TARGET_64BIT) +- { +- op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32), +- NULL, 1, OPTAB_DIRECT); +- switch (fcode) +- { +- case IX86_BUILTIN_UMWAIT: +- icode = CODE_FOR_umwait_rex64; +- break; +- case IX86_BUILTIN_TPAUSE: +- icode = CODE_FOR_tpause_rex64; +- break; +- default: +- gcc_unreachable (); +- } +- +- op2 = gen_lowpart (SImode, op2); +- op1 = gen_lowpart (SImode, op1); +- pat = GEN_FCN (icode) (op0, op1, op2); +- } +- else +- { +- switch (fcode) +- { +- case IX86_BUILTIN_UMWAIT: +- icode = CODE_FOR_umwait; +- break; +- case IX86_BUILTIN_TPAUSE: +- icode = CODE_FOR_tpause; +- break; +- default: +- gcc_unreachable (); +- } +- pat = GEN_FCN (icode) (op0, op1); +- } +- +- if (!pat) +- return 0; +- +- emit_insn (pat); +- +- if (target == 0 +- || !register_operand (target, QImode)) +- target = gen_reg_rtx (QImode); +- +- pat = gen_rtx_EQ (QImode, gen_rtx_REG (CCCmode, FLAGS_REG), +- const0_rtx); +- emit_insn (gen_rtx_SET (target, pat)); +- +- return target; +- +- case IX86_BUILTIN_CLZERO: +- arg0 = CALL_EXPR_ARG (exp, 0); +- op0 = expand_normal (arg0); +- if (!REG_P (op0)) +- op0 = ix86_zero_extend_to_Pmode (op0); +- emit_insn (ix86_gen_clzero (op0)); +- return 0; +- +- case IX86_BUILTIN_CLDEMOTE: +- arg0 = CALL_EXPR_ARG (exp, 0); +- op0 = expand_normal (arg0); +- icode = CODE_FOR_cldemote; +- if (!insn_data[icode].operand[0].predicate (op0, Pmode)) +- op0 = ix86_zero_extend_to_Pmode (op0); +- +- emit_insn (gen_cldemote (op0)); +- return 0; +- +- case IX86_BUILTIN_VEC_INIT_V2SI: +- case IX86_BUILTIN_VEC_INIT_V4HI: +- case IX86_BUILTIN_VEC_INIT_V8QI: +- return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target); +- +- case IX86_BUILTIN_VEC_EXT_V2DF: +- case IX86_BUILTIN_VEC_EXT_V2DI: +- case IX86_BUILTIN_VEC_EXT_V4SF: +- case IX86_BUILTIN_VEC_EXT_V4SI: +- case IX86_BUILTIN_VEC_EXT_V8HI: +- case IX86_BUILTIN_VEC_EXT_V2SI: +- case IX86_BUILTIN_VEC_EXT_V4HI: +- case IX86_BUILTIN_VEC_EXT_V16QI: +- return ix86_expand_vec_ext_builtin (exp, target); +- +- case IX86_BUILTIN_VEC_SET_V2DI: +- case IX86_BUILTIN_VEC_SET_V4SF: +- case IX86_BUILTIN_VEC_SET_V4SI: +- case IX86_BUILTIN_VEC_SET_V8HI: +- case IX86_BUILTIN_VEC_SET_V4HI: +- case IX86_BUILTIN_VEC_SET_V16QI: +- return ix86_expand_vec_set_builtin (exp); +- +- case IX86_BUILTIN_NANQ: +- case IX86_BUILTIN_NANSQ: +- return expand_call (exp, target, ignore); +- +- case IX86_BUILTIN_RDPID: +- +- op0 = gen_reg_rtx (word_mode); +- +- if (TARGET_64BIT) +- { +- insn = gen_rdpid_rex64 (op0); +- op0 = convert_to_mode (SImode, op0, 1); +- } +- else +- insn = gen_rdpid (op0); +- +- emit_insn (insn); +- +- if (target == 0 +- || !register_operand (target, SImode)) +- target = gen_reg_rtx (SImode); +- +- emit_move_insn (target, op0); +- return target; +- +- case IX86_BUILTIN_RDPMC: +- case IX86_BUILTIN_RDTSC: +- case IX86_BUILTIN_RDTSCP: +- case IX86_BUILTIN_XGETBV: +- +- op0 = gen_reg_rtx (DImode); +- op1 = gen_reg_rtx (DImode); +- +- if (fcode == IX86_BUILTIN_RDPMC) +- { +- arg0 = CALL_EXPR_ARG (exp, 0); +- op2 = expand_normal (arg0); +- if (!register_operand (op2, SImode)) +- op2 = copy_to_mode_reg (SImode, op2); +- +- insn = (TARGET_64BIT +- ? gen_rdpmc_rex64 (op0, op1, op2) +- : gen_rdpmc (op0, op2)); +- emit_insn (insn); +- } +- else if (fcode == IX86_BUILTIN_XGETBV) +- { +- arg0 = CALL_EXPR_ARG (exp, 0); +- op2 = expand_normal (arg0); +- if (!register_operand (op2, SImode)) +- op2 = copy_to_mode_reg (SImode, op2); +- +- insn = (TARGET_64BIT +- ? gen_xgetbv_rex64 (op0, op1, op2) +- : gen_xgetbv (op0, op2)); +- emit_insn (insn); +- } +- else if (fcode == IX86_BUILTIN_RDTSC) +- { +- insn = (TARGET_64BIT +- ? gen_rdtsc_rex64 (op0, op1) +- : gen_rdtsc (op0)); +- emit_insn (insn); +- } +- else +- { +- op2 = gen_reg_rtx (SImode); +- +- insn = (TARGET_64BIT +- ? gen_rdtscp_rex64 (op0, op1, op2) +- : gen_rdtscp (op0, op2)); +- emit_insn (insn); +- +- arg0 = CALL_EXPR_ARG (exp, 0); +- op4 = expand_normal (arg0); +- if (!address_operand (op4, VOIDmode)) +- { +- op4 = convert_memory_address (Pmode, op4); +- op4 = copy_addr_to_reg (op4); +- } +- emit_move_insn (gen_rtx_MEM (SImode, op4), op2); +- } +- +- if (target == 0 +- || !register_operand (target, DImode)) +- target = gen_reg_rtx (DImode); +- +- if (TARGET_64BIT) +- { +- op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32), +- op1, 1, OPTAB_DIRECT); +- op0 = expand_simple_binop (DImode, IOR, op0, op1, +- op0, 1, OPTAB_DIRECT); +- } +- +- emit_move_insn (target, op0); +- return target; +- +- case IX86_BUILTIN_MOVDIR64B: +- +- arg0 = CALL_EXPR_ARG (exp, 0); +- arg1 = CALL_EXPR_ARG (exp, 1); +- op0 = expand_normal (arg0); +- op1 = expand_normal (arg1); +- +- op0 = ix86_zero_extend_to_Pmode (op0); +- if (!address_operand (op1, VOIDmode)) +- { +- op1 = convert_memory_address (Pmode, op1); +- op1 = copy_addr_to_reg (op1); +- } +- op1 = gen_rtx_MEM (XImode, op1); +- +- insn = (TARGET_64BIT +- ? gen_movdir64b_di (op0, op1) +- : gen_movdir64b_si (op0, op1)); +- emit_insn (insn); +- return 0; +- +- case IX86_BUILTIN_FXSAVE: +- case IX86_BUILTIN_FXRSTOR: +- case IX86_BUILTIN_FXSAVE64: +- case IX86_BUILTIN_FXRSTOR64: +- case IX86_BUILTIN_FNSTENV: +- case IX86_BUILTIN_FLDENV: +- mode0 = BLKmode; +- switch (fcode) +- { +- case IX86_BUILTIN_FXSAVE: +- icode = CODE_FOR_fxsave; +- break; +- case IX86_BUILTIN_FXRSTOR: +- icode = CODE_FOR_fxrstor; +- break; +- case IX86_BUILTIN_FXSAVE64: +- icode = CODE_FOR_fxsave64; +- break; +- case IX86_BUILTIN_FXRSTOR64: +- icode = CODE_FOR_fxrstor64; +- break; +- case IX86_BUILTIN_FNSTENV: +- icode = CODE_FOR_fnstenv; +- break; +- case IX86_BUILTIN_FLDENV: +- icode = CODE_FOR_fldenv; +- break; +- default: +- gcc_unreachable (); +- } +- +- arg0 = CALL_EXPR_ARG (exp, 0); +- op0 = expand_normal (arg0); +- +- if (!address_operand (op0, VOIDmode)) +- { +- op0 = convert_memory_address (Pmode, op0); +- op0 = copy_addr_to_reg (op0); +- } +- op0 = gen_rtx_MEM (mode0, op0); +- +- pat = GEN_FCN (icode) (op0); +- if (pat) +- emit_insn (pat); +- return 0; +- +- case IX86_BUILTIN_XSETBV: +- arg0 = CALL_EXPR_ARG (exp, 0); +- arg1 = CALL_EXPR_ARG (exp, 1); +- op0 = expand_normal (arg0); +- op1 = expand_normal (arg1); +- +- if (!REG_P (op0)) +- op0 = copy_to_mode_reg (SImode, op0); +- +- op1 = force_reg (DImode, op1); +- +- if (TARGET_64BIT) +- { +- op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32), +- NULL, 1, OPTAB_DIRECT); +- +- icode = CODE_FOR_xsetbv_rex64; +- +- op2 = gen_lowpart (SImode, op2); +- op1 = gen_lowpart (SImode, op1); +- pat = GEN_FCN (icode) (op0, op1, op2); +- } +- else +- { +- icode = CODE_FOR_xsetbv; +- +- pat = GEN_FCN (icode) (op0, op1); +- } +- if (pat) +- emit_insn (pat); +- return 0; +- +- case IX86_BUILTIN_XSAVE: +- case IX86_BUILTIN_XRSTOR: +- case IX86_BUILTIN_XSAVE64: +- case IX86_BUILTIN_XRSTOR64: +- case IX86_BUILTIN_XSAVEOPT: +- case IX86_BUILTIN_XSAVEOPT64: +- case IX86_BUILTIN_XSAVES: +- case IX86_BUILTIN_XRSTORS: +- case IX86_BUILTIN_XSAVES64: +- case IX86_BUILTIN_XRSTORS64: +- case IX86_BUILTIN_XSAVEC: +- case IX86_BUILTIN_XSAVEC64: +- arg0 = CALL_EXPR_ARG (exp, 0); +- arg1 = CALL_EXPR_ARG (exp, 1); +- op0 = expand_normal (arg0); +- op1 = expand_normal (arg1); +- +- if (!address_operand (op0, VOIDmode)) +- { +- op0 = convert_memory_address (Pmode, op0); +- op0 = copy_addr_to_reg (op0); +- } +- op0 = gen_rtx_MEM (BLKmode, op0); +- +- op1 = force_reg (DImode, op1); +- +- if (TARGET_64BIT) +- { +- op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32), +- NULL, 1, OPTAB_DIRECT); +- switch (fcode) +- { +- case IX86_BUILTIN_XSAVE: +- icode = CODE_FOR_xsave_rex64; +- break; +- case IX86_BUILTIN_XRSTOR: +- icode = CODE_FOR_xrstor_rex64; +- break; +- case IX86_BUILTIN_XSAVE64: +- icode = CODE_FOR_xsave64; +- break; +- case IX86_BUILTIN_XRSTOR64: +- icode = CODE_FOR_xrstor64; +- break; +- case IX86_BUILTIN_XSAVEOPT: +- icode = CODE_FOR_xsaveopt_rex64; +- break; +- case IX86_BUILTIN_XSAVEOPT64: +- icode = CODE_FOR_xsaveopt64; +- break; +- case IX86_BUILTIN_XSAVES: +- icode = CODE_FOR_xsaves_rex64; +- break; +- case IX86_BUILTIN_XRSTORS: +- icode = CODE_FOR_xrstors_rex64; +- break; +- case IX86_BUILTIN_XSAVES64: +- icode = CODE_FOR_xsaves64; +- break; +- case IX86_BUILTIN_XRSTORS64: +- icode = CODE_FOR_xrstors64; +- break; +- case IX86_BUILTIN_XSAVEC: +- icode = CODE_FOR_xsavec_rex64; +- break; +- case IX86_BUILTIN_XSAVEC64: +- icode = CODE_FOR_xsavec64; +- break; +- default: +- gcc_unreachable (); +- } +- +- op2 = gen_lowpart (SImode, op2); +- op1 = gen_lowpart (SImode, op1); +- pat = GEN_FCN (icode) (op0, op1, op2); +- } +- else +- { +- switch (fcode) +- { +- case IX86_BUILTIN_XSAVE: +- icode = CODE_FOR_xsave; +- break; +- case IX86_BUILTIN_XRSTOR: +- icode = CODE_FOR_xrstor; +- break; +- case IX86_BUILTIN_XSAVEOPT: +- icode = CODE_FOR_xsaveopt; +- break; +- case IX86_BUILTIN_XSAVES: +- icode = CODE_FOR_xsaves; +- break; +- case IX86_BUILTIN_XRSTORS: +- icode = CODE_FOR_xrstors; +- break; +- case IX86_BUILTIN_XSAVEC: +- icode = CODE_FOR_xsavec; +- break; +- default: +- gcc_unreachable (); +- } +- pat = GEN_FCN (icode) (op0, op1); +- } +- +- if (pat) +- emit_insn (pat); +- return 0; +- +- case IX86_BUILTIN_LLWPCB: +- arg0 = CALL_EXPR_ARG (exp, 0); +- op0 = expand_normal (arg0); +- icode = CODE_FOR_lwp_llwpcb; +- if (!insn_data[icode].operand[0].predicate (op0, Pmode)) +- op0 = ix86_zero_extend_to_Pmode (op0); +- emit_insn (gen_lwp_llwpcb (op0)); +- return 0; +- +- case IX86_BUILTIN_SLWPCB: +- icode = CODE_FOR_lwp_slwpcb; +- if (!target +- || !insn_data[icode].operand[0].predicate (target, Pmode)) +- target = gen_reg_rtx (Pmode); +- emit_insn (gen_lwp_slwpcb (target)); +- return target; +- +- case IX86_BUILTIN_BEXTRI32: +- case IX86_BUILTIN_BEXTRI64: +- arg0 = CALL_EXPR_ARG (exp, 0); +- arg1 = CALL_EXPR_ARG (exp, 1); +- op0 = expand_normal (arg0); +- op1 = expand_normal (arg1); +- icode = (fcode == IX86_BUILTIN_BEXTRI32 +- ? CODE_FOR_tbm_bextri_si +- : CODE_FOR_tbm_bextri_di); +- if (!CONST_INT_P (op1)) +- { +- error ("last argument must be an immediate"); +- return const0_rtx; +- } +- else +- { +- unsigned char length = (INTVAL (op1) >> 8) & 0xFF; +- unsigned char lsb_index = INTVAL (op1) & 0xFF; +- op1 = GEN_INT (length); +- op2 = GEN_INT (lsb_index); +- +- mode1 = insn_data[icode].operand[1].mode; +- if (!insn_data[icode].operand[1].predicate (op0, mode1)) +- op0 = copy_to_mode_reg (mode1, op0); +- +- mode0 = insn_data[icode].operand[0].mode; +- if (target == 0 +- || !register_operand (target, mode0)) +- target = gen_reg_rtx (mode0); +- +- pat = GEN_FCN (icode) (target, op0, op1, op2); +- if (pat) +- emit_insn (pat); +- return target; +- } +- +- case IX86_BUILTIN_RDRAND16_STEP: +- icode = CODE_FOR_rdrandhi_1; +- mode0 = HImode; +- goto rdrand_step; +- +- case IX86_BUILTIN_RDRAND32_STEP: +- icode = CODE_FOR_rdrandsi_1; +- mode0 = SImode; +- goto rdrand_step; +- +- case IX86_BUILTIN_RDRAND64_STEP: +- icode = CODE_FOR_rdranddi_1; +- mode0 = DImode; +- +-rdrand_step: +- arg0 = CALL_EXPR_ARG (exp, 0); +- op1 = expand_normal (arg0); +- if (!address_operand (op1, VOIDmode)) +- { +- op1 = convert_memory_address (Pmode, op1); +- op1 = copy_addr_to_reg (op1); +- } +- +- op0 = gen_reg_rtx (mode0); +- emit_insn (GEN_FCN (icode) (op0)); +- +- emit_move_insn (gen_rtx_MEM (mode0, op1), op0); +- +- op1 = gen_reg_rtx (SImode); +- emit_move_insn (op1, CONST1_RTX (SImode)); +- +- /* Emit SImode conditional move. */ +- if (mode0 == HImode) +- { +- if (TARGET_ZERO_EXTEND_WITH_AND +- && optimize_function_for_speed_p (cfun)) +- { +- op2 = force_reg (SImode, const0_rtx); +- +- emit_insn (gen_movstricthi +- (gen_lowpart (HImode, op2), op0)); +- } +- else +- { +- op2 = gen_reg_rtx (SImode); +- +- emit_insn (gen_zero_extendhisi2 (op2, op0)); +- } +- } +- else if (mode0 == SImode) +- op2 = op0; +- else +- op2 = gen_rtx_SUBREG (SImode, op0, 0); +- +- if (target == 0 +- || !register_operand (target, SImode)) +- target = gen_reg_rtx (SImode); +- +- pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG), +- const0_rtx); +- emit_insn (gen_rtx_SET (target, +- gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1))); +- return target; +- +- case IX86_BUILTIN_RDSEED16_STEP: +- icode = CODE_FOR_rdseedhi_1; +- mode0 = HImode; +- goto rdseed_step; +- +- case IX86_BUILTIN_RDSEED32_STEP: +- icode = CODE_FOR_rdseedsi_1; +- mode0 = SImode; +- goto rdseed_step; +- +- case IX86_BUILTIN_RDSEED64_STEP: +- icode = CODE_FOR_rdseeddi_1; +- mode0 = DImode; +- +-rdseed_step: +- arg0 = CALL_EXPR_ARG (exp, 0); +- op1 = expand_normal (arg0); +- if (!address_operand (op1, VOIDmode)) +- { +- op1 = convert_memory_address (Pmode, op1); +- op1 = copy_addr_to_reg (op1); +- } +- +- op0 = gen_reg_rtx (mode0); +- emit_insn (GEN_FCN (icode) (op0)); +- +- emit_move_insn (gen_rtx_MEM (mode0, op1), op0); +- +- op2 = gen_reg_rtx (QImode); +- +- pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG), +- const0_rtx); +- emit_insn (gen_rtx_SET (op2, pat)); +- +- if (target == 0 +- || !register_operand (target, SImode)) +- target = gen_reg_rtx (SImode); +- +- emit_insn (gen_zero_extendqisi2 (target, op2)); +- return target; +- +- case IX86_BUILTIN_SBB32: +- icode = CODE_FOR_subborrowsi; +- icode2 = CODE_FOR_subborrowsi_0; +- mode0 = SImode; +- mode1 = DImode; +- mode2 = CCmode; +- goto handlecarry; +- +- case IX86_BUILTIN_SBB64: +- icode = CODE_FOR_subborrowdi; +- icode2 = CODE_FOR_subborrowdi_0; +- mode0 = DImode; +- mode1 = TImode; +- mode2 = CCmode; +- goto handlecarry; +- +- case IX86_BUILTIN_ADDCARRYX32: +- icode = CODE_FOR_addcarrysi; +- icode2 = CODE_FOR_addcarrysi_0; +- mode0 = SImode; +- mode1 = DImode; +- mode2 = CCCmode; +- goto handlecarry; +- +- case IX86_BUILTIN_ADDCARRYX64: +- icode = CODE_FOR_addcarrydi; +- icode2 = CODE_FOR_addcarrydi_0; +- mode0 = DImode; +- mode1 = TImode; +- mode2 = CCCmode; +- +- handlecarry: +- arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */ +- arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */ +- arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */ +- arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */ +- +- op1 = expand_normal (arg0); +- if (!integer_zerop (arg0)) +- op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1)); +- +- op2 = expand_normal (arg1); +- if (!register_operand (op2, mode0)) +- op2 = copy_to_mode_reg (mode0, op2); +- +- op3 = expand_normal (arg2); +- if (!register_operand (op3, mode0)) +- op3 = copy_to_mode_reg (mode0, op3); +- +- op4 = expand_normal (arg3); +- if (!address_operand (op4, VOIDmode)) +- { +- op4 = convert_memory_address (Pmode, op4); +- op4 = copy_addr_to_reg (op4); +- } +- +- op0 = gen_reg_rtx (mode0); +- if (integer_zerop (arg0)) +- { +- /* If arg0 is 0, optimize right away into add or sub +- instruction that sets CCCmode flags. */ +- op1 = gen_rtx_REG (mode2, FLAGS_REG); +- emit_insn (GEN_FCN (icode2) (op0, op2, op3)); +- } +- else +- { +- /* Generate CF from input operand. */ +- emit_insn (gen_addqi3_cconly_overflow (op1, constm1_rtx)); +- +- /* Generate instruction that consumes CF. */ +- op1 = gen_rtx_REG (CCCmode, FLAGS_REG); +- pat = gen_rtx_LTU (mode1, op1, const0_rtx); +- pat2 = gen_rtx_LTU (mode0, op1, const0_rtx); +- emit_insn (GEN_FCN (icode) (op0, op2, op3, op1, pat, pat2)); +- } +- +- /* Return current CF value. */ +- if (target == 0) +- target = gen_reg_rtx (QImode); +- +- pat = gen_rtx_LTU (QImode, op1, const0_rtx); +- emit_insn (gen_rtx_SET (target, pat)); +- +- /* Store the result. */ +- emit_move_insn (gen_rtx_MEM (mode0, op4), op0); +- +- return target; +- +- case IX86_BUILTIN_READ_FLAGS: +- emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG))); +- +- if (optimize +- || target == NULL_RTX +- || !nonimmediate_operand (target, word_mode) +- || GET_MODE (target) != word_mode) +- target = gen_reg_rtx (word_mode); +- +- emit_insn (gen_pop (target)); +- return target; +- +- case IX86_BUILTIN_WRITE_FLAGS: +- +- arg0 = CALL_EXPR_ARG (exp, 0); +- op0 = expand_normal (arg0); +- if (!general_no_elim_operand (op0, word_mode)) +- op0 = copy_to_mode_reg (word_mode, op0); +- +- emit_insn (gen_push (op0)); +- emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG))); +- return 0; +- +- case IX86_BUILTIN_KTESTC8: +- icode = CODE_FOR_ktestqi; +- mode3 = CCCmode; +- goto kortest; +- +- case IX86_BUILTIN_KTESTZ8: +- icode = CODE_FOR_ktestqi; +- mode3 = CCZmode; +- goto kortest; +- +- case IX86_BUILTIN_KTESTC16: +- icode = CODE_FOR_ktesthi; +- mode3 = CCCmode; +- goto kortest; +- +- case IX86_BUILTIN_KTESTZ16: +- icode = CODE_FOR_ktesthi; +- mode3 = CCZmode; +- goto kortest; +- +- case IX86_BUILTIN_KTESTC32: +- icode = CODE_FOR_ktestsi; +- mode3 = CCCmode; +- goto kortest; +- +- case IX86_BUILTIN_KTESTZ32: +- icode = CODE_FOR_ktestsi; +- mode3 = CCZmode; +- goto kortest; +- +- case IX86_BUILTIN_KTESTC64: +- icode = CODE_FOR_ktestdi; +- mode3 = CCCmode; +- goto kortest; +- +- case IX86_BUILTIN_KTESTZ64: +- icode = CODE_FOR_ktestdi; +- mode3 = CCZmode; +- goto kortest; +- +- case IX86_BUILTIN_KORTESTC8: +- icode = CODE_FOR_kortestqi; +- mode3 = CCCmode; +- goto kortest; +- +- case IX86_BUILTIN_KORTESTZ8: +- icode = CODE_FOR_kortestqi; +- mode3 = CCZmode; +- goto kortest; +- +- case IX86_BUILTIN_KORTESTC16: +- icode = CODE_FOR_kortesthi; +- mode3 = CCCmode; +- goto kortest; +- +- case IX86_BUILTIN_KORTESTZ16: +- icode = CODE_FOR_kortesthi; +- mode3 = CCZmode; +- goto kortest; +- +- case IX86_BUILTIN_KORTESTC32: +- icode = CODE_FOR_kortestsi; +- mode3 = CCCmode; +- goto kortest; +- +- case IX86_BUILTIN_KORTESTZ32: +- icode = CODE_FOR_kortestsi; +- mode3 = CCZmode; +- goto kortest; +- +- case IX86_BUILTIN_KORTESTC64: +- icode = CODE_FOR_kortestdi; +- mode3 = CCCmode; +- goto kortest; +- +- case IX86_BUILTIN_KORTESTZ64: +- icode = CODE_FOR_kortestdi; +- mode3 = CCZmode; +- +- kortest: +- arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */ +- arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */ +- op0 = expand_normal (arg0); +- op1 = expand_normal (arg1); +- +- mode0 = insn_data[icode].operand[0].mode; +- mode1 = insn_data[icode].operand[1].mode; +- +- if (GET_MODE (op0) != VOIDmode) +- op0 = force_reg (GET_MODE (op0), op0); +- +- op0 = gen_lowpart (mode0, op0); +- +- if (!insn_data[icode].operand[0].predicate (op0, mode0)) +- op0 = copy_to_mode_reg (mode0, op0); +- +- if (GET_MODE (op1) != VOIDmode) +- op1 = force_reg (GET_MODE (op1), op1); +- +- op1 = gen_lowpart (mode1, op1); +- +- if (!insn_data[icode].operand[1].predicate (op1, mode1)) +- op1 = copy_to_mode_reg (mode1, op1); +- +- target = gen_reg_rtx (QImode); +- +- /* Emit kortest. */ +- emit_insn (GEN_FCN (icode) (op0, op1)); +- /* And use setcc to return result from flags. */ +- ix86_expand_setcc (target, EQ, +- gen_rtx_REG (mode3, FLAGS_REG), const0_rtx); +- return target; +- +- case IX86_BUILTIN_GATHERSIV2DF: +- icode = CODE_FOR_avx2_gathersiv2df; +- goto gather_gen; +- case IX86_BUILTIN_GATHERSIV4DF: +- icode = CODE_FOR_avx2_gathersiv4df; +- goto gather_gen; +- case IX86_BUILTIN_GATHERDIV2DF: +- icode = CODE_FOR_avx2_gatherdiv2df; +- goto gather_gen; +- case IX86_BUILTIN_GATHERDIV4DF: +- icode = CODE_FOR_avx2_gatherdiv4df; +- goto gather_gen; +- case IX86_BUILTIN_GATHERSIV4SF: +- icode = CODE_FOR_avx2_gathersiv4sf; +- goto gather_gen; +- case IX86_BUILTIN_GATHERSIV8SF: +- icode = CODE_FOR_avx2_gathersiv8sf; +- goto gather_gen; +- case IX86_BUILTIN_GATHERDIV4SF: +- icode = CODE_FOR_avx2_gatherdiv4sf; +- goto gather_gen; +- case IX86_BUILTIN_GATHERDIV8SF: +- icode = CODE_FOR_avx2_gatherdiv8sf; +- goto gather_gen; +- case IX86_BUILTIN_GATHERSIV2DI: +- icode = CODE_FOR_avx2_gathersiv2di; +- goto gather_gen; +- case IX86_BUILTIN_GATHERSIV4DI: +- icode = CODE_FOR_avx2_gathersiv4di; +- goto gather_gen; +- case IX86_BUILTIN_GATHERDIV2DI: +- icode = CODE_FOR_avx2_gatherdiv2di; +- goto gather_gen; +- case IX86_BUILTIN_GATHERDIV4DI: +- icode = CODE_FOR_avx2_gatherdiv4di; +- goto gather_gen; +- case IX86_BUILTIN_GATHERSIV4SI: +- icode = CODE_FOR_avx2_gathersiv4si; +- goto gather_gen; +- case IX86_BUILTIN_GATHERSIV8SI: +- icode = CODE_FOR_avx2_gathersiv8si; +- goto gather_gen; +- case IX86_BUILTIN_GATHERDIV4SI: +- icode = CODE_FOR_avx2_gatherdiv4si; +- goto gather_gen; +- case IX86_BUILTIN_GATHERDIV8SI: +- icode = CODE_FOR_avx2_gatherdiv8si; +- goto gather_gen; +- case IX86_BUILTIN_GATHERALTSIV4DF: +- icode = CODE_FOR_avx2_gathersiv4df; +- goto gather_gen; +- case IX86_BUILTIN_GATHERALTDIV8SF: +- icode = CODE_FOR_avx2_gatherdiv8sf; +- goto gather_gen; +- case IX86_BUILTIN_GATHERALTSIV4DI: +- icode = CODE_FOR_avx2_gathersiv4di; +- goto gather_gen; +- case IX86_BUILTIN_GATHERALTDIV8SI: +- icode = CODE_FOR_avx2_gatherdiv8si; +- goto gather_gen; +- case IX86_BUILTIN_GATHER3SIV16SF: +- icode = CODE_FOR_avx512f_gathersiv16sf; +- goto gather_gen; +- case IX86_BUILTIN_GATHER3SIV8DF: +- icode = CODE_FOR_avx512f_gathersiv8df; +- goto gather_gen; +- case IX86_BUILTIN_GATHER3DIV16SF: +- icode = CODE_FOR_avx512f_gatherdiv16sf; +- goto gather_gen; +- case IX86_BUILTIN_GATHER3DIV8DF: +- icode = CODE_FOR_avx512f_gatherdiv8df; +- goto gather_gen; +- case IX86_BUILTIN_GATHER3SIV16SI: +- icode = CODE_FOR_avx512f_gathersiv16si; +- goto gather_gen; +- case IX86_BUILTIN_GATHER3SIV8DI: +- icode = CODE_FOR_avx512f_gathersiv8di; +- goto gather_gen; +- case IX86_BUILTIN_GATHER3DIV16SI: +- icode = CODE_FOR_avx512f_gatherdiv16si; +- goto gather_gen; +- case IX86_BUILTIN_GATHER3DIV8DI: +- icode = CODE_FOR_avx512f_gatherdiv8di; +- goto gather_gen; +- case IX86_BUILTIN_GATHER3ALTSIV8DF: +- icode = CODE_FOR_avx512f_gathersiv8df; +- goto gather_gen; +- case IX86_BUILTIN_GATHER3ALTDIV16SF: +- icode = CODE_FOR_avx512f_gatherdiv16sf; +- goto gather_gen; +- case IX86_BUILTIN_GATHER3ALTSIV8DI: +- icode = CODE_FOR_avx512f_gathersiv8di; +- goto gather_gen; +- case IX86_BUILTIN_GATHER3ALTDIV16SI: +- icode = CODE_FOR_avx512f_gatherdiv16si; +- goto gather_gen; +- case IX86_BUILTIN_GATHER3SIV2DF: +- icode = CODE_FOR_avx512vl_gathersiv2df; +- goto gather_gen; +- case IX86_BUILTIN_GATHER3SIV4DF: +- icode = CODE_FOR_avx512vl_gathersiv4df; +- goto gather_gen; +- case IX86_BUILTIN_GATHER3DIV2DF: +- icode = CODE_FOR_avx512vl_gatherdiv2df; +- goto gather_gen; +- case IX86_BUILTIN_GATHER3DIV4DF: +- icode = CODE_FOR_avx512vl_gatherdiv4df; +- goto gather_gen; +- case IX86_BUILTIN_GATHER3SIV4SF: +- icode = CODE_FOR_avx512vl_gathersiv4sf; +- goto gather_gen; +- case IX86_BUILTIN_GATHER3SIV8SF: +- icode = CODE_FOR_avx512vl_gathersiv8sf; +- goto gather_gen; +- case IX86_BUILTIN_GATHER3DIV4SF: +- icode = CODE_FOR_avx512vl_gatherdiv4sf; +- goto gather_gen; +- case IX86_BUILTIN_GATHER3DIV8SF: +- icode = CODE_FOR_avx512vl_gatherdiv8sf; +- goto gather_gen; +- case IX86_BUILTIN_GATHER3SIV2DI: +- icode = CODE_FOR_avx512vl_gathersiv2di; +- goto gather_gen; +- case IX86_BUILTIN_GATHER3SIV4DI: +- icode = CODE_FOR_avx512vl_gathersiv4di; +- goto gather_gen; +- case IX86_BUILTIN_GATHER3DIV2DI: +- icode = CODE_FOR_avx512vl_gatherdiv2di; +- goto gather_gen; +- case IX86_BUILTIN_GATHER3DIV4DI: +- icode = CODE_FOR_avx512vl_gatherdiv4di; +- goto gather_gen; +- case IX86_BUILTIN_GATHER3SIV4SI: +- icode = CODE_FOR_avx512vl_gathersiv4si; +- goto gather_gen; +- case IX86_BUILTIN_GATHER3SIV8SI: +- icode = CODE_FOR_avx512vl_gathersiv8si; +- goto gather_gen; +- case IX86_BUILTIN_GATHER3DIV4SI: +- icode = CODE_FOR_avx512vl_gatherdiv4si; +- goto gather_gen; +- case IX86_BUILTIN_GATHER3DIV8SI: +- icode = CODE_FOR_avx512vl_gatherdiv8si; +- goto gather_gen; +- case IX86_BUILTIN_GATHER3ALTSIV4DF: +- icode = CODE_FOR_avx512vl_gathersiv4df; +- goto gather_gen; +- case IX86_BUILTIN_GATHER3ALTDIV8SF: +- icode = CODE_FOR_avx512vl_gatherdiv8sf; +- goto gather_gen; +- case IX86_BUILTIN_GATHER3ALTSIV4DI: +- icode = CODE_FOR_avx512vl_gathersiv4di; +- goto gather_gen; +- case IX86_BUILTIN_GATHER3ALTDIV8SI: +- icode = CODE_FOR_avx512vl_gatherdiv8si; +- goto gather_gen; +- case IX86_BUILTIN_SCATTERSIV16SF: +- icode = CODE_FOR_avx512f_scattersiv16sf; +- goto scatter_gen; +- case IX86_BUILTIN_SCATTERSIV8DF: +- icode = CODE_FOR_avx512f_scattersiv8df; +- goto scatter_gen; +- case IX86_BUILTIN_SCATTERDIV16SF: +- icode = CODE_FOR_avx512f_scatterdiv16sf; +- goto scatter_gen; +- case IX86_BUILTIN_SCATTERDIV8DF: +- icode = CODE_FOR_avx512f_scatterdiv8df; +- goto scatter_gen; +- case IX86_BUILTIN_SCATTERSIV16SI: +- icode = CODE_FOR_avx512f_scattersiv16si; +- goto scatter_gen; +- case IX86_BUILTIN_SCATTERSIV8DI: +- icode = CODE_FOR_avx512f_scattersiv8di; +- goto scatter_gen; +- case IX86_BUILTIN_SCATTERDIV16SI: +- icode = CODE_FOR_avx512f_scatterdiv16si; +- goto scatter_gen; +- case IX86_BUILTIN_SCATTERDIV8DI: +- icode = CODE_FOR_avx512f_scatterdiv8di; +- goto scatter_gen; +- case IX86_BUILTIN_SCATTERSIV8SF: +- icode = CODE_FOR_avx512vl_scattersiv8sf; +- goto scatter_gen; +- case IX86_BUILTIN_SCATTERSIV4SF: +- icode = CODE_FOR_avx512vl_scattersiv4sf; +- goto scatter_gen; +- case IX86_BUILTIN_SCATTERSIV4DF: +- icode = CODE_FOR_avx512vl_scattersiv4df; +- goto scatter_gen; +- case IX86_BUILTIN_SCATTERSIV2DF: +- icode = CODE_FOR_avx512vl_scattersiv2df; +- goto scatter_gen; +- case IX86_BUILTIN_SCATTERDIV8SF: +- icode = CODE_FOR_avx512vl_scatterdiv8sf; +- goto scatter_gen; +- case IX86_BUILTIN_SCATTERDIV4SF: +- icode = CODE_FOR_avx512vl_scatterdiv4sf; +- goto scatter_gen; +- case IX86_BUILTIN_SCATTERDIV4DF: +- icode = CODE_FOR_avx512vl_scatterdiv4df; +- goto scatter_gen; +- case IX86_BUILTIN_SCATTERDIV2DF: +- icode = CODE_FOR_avx512vl_scatterdiv2df; +- goto scatter_gen; +- case IX86_BUILTIN_SCATTERSIV8SI: +- icode = CODE_FOR_avx512vl_scattersiv8si; +- goto scatter_gen; +- case IX86_BUILTIN_SCATTERSIV4SI: +- icode = CODE_FOR_avx512vl_scattersiv4si; +- goto scatter_gen; +- case IX86_BUILTIN_SCATTERSIV4DI: +- icode = CODE_FOR_avx512vl_scattersiv4di; +- goto scatter_gen; +- case IX86_BUILTIN_SCATTERSIV2DI: +- icode = CODE_FOR_avx512vl_scattersiv2di; +- goto scatter_gen; +- case IX86_BUILTIN_SCATTERDIV8SI: +- icode = CODE_FOR_avx512vl_scatterdiv8si; +- goto scatter_gen; +- case IX86_BUILTIN_SCATTERDIV4SI: +- icode = CODE_FOR_avx512vl_scatterdiv4si; +- goto scatter_gen; +- case IX86_BUILTIN_SCATTERDIV4DI: +- icode = CODE_FOR_avx512vl_scatterdiv4di; +- goto scatter_gen; +- case IX86_BUILTIN_SCATTERDIV2DI: +- icode = CODE_FOR_avx512vl_scatterdiv2di; +- goto scatter_gen; +- case IX86_BUILTIN_GATHERPFDPD: +- icode = CODE_FOR_avx512pf_gatherpfv8sidf; +- goto vec_prefetch_gen; +- case IX86_BUILTIN_SCATTERALTSIV8DF: +- icode = CODE_FOR_avx512f_scattersiv8df; +- goto scatter_gen; +- case IX86_BUILTIN_SCATTERALTDIV16SF: +- icode = CODE_FOR_avx512f_scatterdiv16sf; +- goto scatter_gen; +- case IX86_BUILTIN_SCATTERALTSIV8DI: +- icode = CODE_FOR_avx512f_scattersiv8di; +- goto scatter_gen; +- case IX86_BUILTIN_SCATTERALTDIV16SI: +- icode = CODE_FOR_avx512f_scatterdiv16si; +- goto scatter_gen; +- case IX86_BUILTIN_SCATTERALTSIV4DF: +- icode = CODE_FOR_avx512vl_scattersiv4df; +- goto scatter_gen; +- case IX86_BUILTIN_SCATTERALTDIV8SF: +- icode = CODE_FOR_avx512vl_scatterdiv8sf; +- goto scatter_gen; +- case IX86_BUILTIN_SCATTERALTSIV4DI: +- icode = CODE_FOR_avx512vl_scattersiv4di; +- goto scatter_gen; +- case IX86_BUILTIN_SCATTERALTDIV8SI: +- icode = CODE_FOR_avx512vl_scatterdiv8si; +- goto scatter_gen; +- case IX86_BUILTIN_SCATTERALTSIV2DF: +- icode = CODE_FOR_avx512vl_scattersiv2df; +- goto scatter_gen; +- case IX86_BUILTIN_SCATTERALTDIV4SF: +- icode = CODE_FOR_avx512vl_scatterdiv4sf; +- goto scatter_gen; +- case IX86_BUILTIN_SCATTERALTSIV2DI: +- icode = CODE_FOR_avx512vl_scattersiv2di; +- goto scatter_gen; +- case IX86_BUILTIN_SCATTERALTDIV4SI: +- icode = CODE_FOR_avx512vl_scatterdiv4si; +- goto scatter_gen; +- case IX86_BUILTIN_GATHERPFDPS: +- icode = CODE_FOR_avx512pf_gatherpfv16sisf; +- goto vec_prefetch_gen; +- case IX86_BUILTIN_GATHERPFQPD: +- icode = CODE_FOR_avx512pf_gatherpfv8didf; +- goto vec_prefetch_gen; +- case IX86_BUILTIN_GATHERPFQPS: +- icode = CODE_FOR_avx512pf_gatherpfv8disf; +- goto vec_prefetch_gen; +- case IX86_BUILTIN_SCATTERPFDPD: +- icode = CODE_FOR_avx512pf_scatterpfv8sidf; +- goto vec_prefetch_gen; +- case IX86_BUILTIN_SCATTERPFDPS: +- icode = CODE_FOR_avx512pf_scatterpfv16sisf; +- goto vec_prefetch_gen; +- case IX86_BUILTIN_SCATTERPFQPD: +- icode = CODE_FOR_avx512pf_scatterpfv8didf; +- goto vec_prefetch_gen; +- case IX86_BUILTIN_SCATTERPFQPS: +- icode = CODE_FOR_avx512pf_scatterpfv8disf; +- goto vec_prefetch_gen; +- +- gather_gen: +- rtx half; +- rtx (*gen) (rtx, rtx); +- +- arg0 = CALL_EXPR_ARG (exp, 0); +- arg1 = CALL_EXPR_ARG (exp, 1); +- arg2 = CALL_EXPR_ARG (exp, 2); +- arg3 = CALL_EXPR_ARG (exp, 3); +- arg4 = CALL_EXPR_ARG (exp, 4); +- op0 = expand_normal (arg0); +- op1 = expand_normal (arg1); +- op2 = expand_normal (arg2); +- op3 = expand_normal (arg3); +- op4 = expand_normal (arg4); +- /* Note the arg order is different from the operand order. */ +- mode0 = insn_data[icode].operand[1].mode; +- mode2 = insn_data[icode].operand[3].mode; +- mode3 = insn_data[icode].operand[4].mode; +- mode4 = insn_data[icode].operand[5].mode; +- +- if (target == NULL_RTX +- || GET_MODE (target) != insn_data[icode].operand[0].mode +- || !insn_data[icode].operand[0].predicate (target, +- GET_MODE (target))) +- subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode); +- else +- subtarget = target; +- +- switch (fcode) +- { +- case IX86_BUILTIN_GATHER3ALTSIV8DF: +- case IX86_BUILTIN_GATHER3ALTSIV8DI: +- half = gen_reg_rtx (V8SImode); +- if (!nonimmediate_operand (op2, V16SImode)) +- op2 = copy_to_mode_reg (V16SImode, op2); +- emit_insn (gen_vec_extract_lo_v16si (half, op2)); +- op2 = half; +- break; +- case IX86_BUILTIN_GATHER3ALTSIV4DF: +- case IX86_BUILTIN_GATHER3ALTSIV4DI: +- case IX86_BUILTIN_GATHERALTSIV4DF: +- case IX86_BUILTIN_GATHERALTSIV4DI: +- half = gen_reg_rtx (V4SImode); +- if (!nonimmediate_operand (op2, V8SImode)) +- op2 = copy_to_mode_reg (V8SImode, op2); +- emit_insn (gen_vec_extract_lo_v8si (half, op2)); +- op2 = half; +- break; +- case IX86_BUILTIN_GATHER3ALTDIV16SF: +- case IX86_BUILTIN_GATHER3ALTDIV16SI: +- half = gen_reg_rtx (mode0); +- if (mode0 == V8SFmode) +- gen = gen_vec_extract_lo_v16sf; +- else +- gen = gen_vec_extract_lo_v16si; +- if (!nonimmediate_operand (op0, GET_MODE (op0))) +- op0 = copy_to_mode_reg (GET_MODE (op0), op0); +- emit_insn (gen (half, op0)); +- op0 = half; +- op3 = lowpart_subreg (QImode, op3, HImode); +- break; +- case IX86_BUILTIN_GATHER3ALTDIV8SF: +- case IX86_BUILTIN_GATHER3ALTDIV8SI: +- case IX86_BUILTIN_GATHERALTDIV8SF: +- case IX86_BUILTIN_GATHERALTDIV8SI: +- half = gen_reg_rtx (mode0); +- if (mode0 == V4SFmode) +- gen = gen_vec_extract_lo_v8sf; +- else +- gen = gen_vec_extract_lo_v8si; +- if (!nonimmediate_operand (op0, GET_MODE (op0))) +- op0 = copy_to_mode_reg (GET_MODE (op0), op0); +- emit_insn (gen (half, op0)); +- op0 = half; +- if (VECTOR_MODE_P (GET_MODE (op3))) +- { +- half = gen_reg_rtx (mode0); +- if (!nonimmediate_operand (op3, GET_MODE (op3))) +- op3 = copy_to_mode_reg (GET_MODE (op3), op3); +- emit_insn (gen (half, op3)); +- op3 = half; +- } +- break; +- default: +- break; +- } +- +- /* Force memory operand only with base register here. But we +- don't want to do it on memory operand for other builtin +- functions. */ +- op1 = ix86_zero_extend_to_Pmode (op1); +- +- if (!insn_data[icode].operand[1].predicate (op0, mode0)) +- op0 = copy_to_mode_reg (mode0, op0); +- if (!insn_data[icode].operand[2].predicate (op1, Pmode)) +- op1 = copy_to_mode_reg (Pmode, op1); +- if (!insn_data[icode].operand[3].predicate (op2, mode2)) +- op2 = copy_to_mode_reg (mode2, op2); +- +- op3 = fixup_modeless_constant (op3, mode3); +- +- if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode) +- { +- if (!insn_data[icode].operand[4].predicate (op3, mode3)) +- op3 = copy_to_mode_reg (mode3, op3); +- } +- else +- { +- op3 = copy_to_reg (op3); +- op3 = lowpart_subreg (mode3, op3, GET_MODE (op3)); +- } +- if (!insn_data[icode].operand[5].predicate (op4, mode4)) +- { +- error ("the last argument must be scale 1, 2, 4, 8"); +- return const0_rtx; +- } +- +- /* Optimize. If mask is known to have all high bits set, +- replace op0 with pc_rtx to signal that the instruction +- overwrites the whole destination and doesn't use its +- previous contents. */ +- if (optimize) +- { +- if (TREE_CODE (arg3) == INTEGER_CST) +- { +- if (integer_all_onesp (arg3)) +- op0 = pc_rtx; +- } +- else if (TREE_CODE (arg3) == VECTOR_CST) +- { +- unsigned int negative = 0; +- for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i) +- { +- tree cst = VECTOR_CST_ELT (arg3, i); +- if (TREE_CODE (cst) == INTEGER_CST +- && tree_int_cst_sign_bit (cst)) +- negative++; +- else if (TREE_CODE (cst) == REAL_CST +- && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst))) +- negative++; +- } +- if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3))) +- op0 = pc_rtx; +- } +- else if (TREE_CODE (arg3) == SSA_NAME +- && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE) +- { +- /* Recognize also when mask is like: +- __v2df src = _mm_setzero_pd (); +- __v2df mask = _mm_cmpeq_pd (src, src); +- or +- __v8sf src = _mm256_setzero_ps (); +- __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ); +- as that is a cheaper way to load all ones into +- a register than having to load a constant from +- memory. */ +- gimple *def_stmt = SSA_NAME_DEF_STMT (arg3); +- if (is_gimple_call (def_stmt)) +- { +- tree fndecl = gimple_call_fndecl (def_stmt); +- if (fndecl +- && fndecl_built_in_p (fndecl, BUILT_IN_MD)) +- switch ((unsigned int) DECL_FUNCTION_CODE (fndecl)) +- { +- case IX86_BUILTIN_CMPPD: +- case IX86_BUILTIN_CMPPS: +- case IX86_BUILTIN_CMPPD256: +- case IX86_BUILTIN_CMPPS256: +- if (!integer_zerop (gimple_call_arg (def_stmt, 2))) +- break; +- /* FALLTHRU */ +- case IX86_BUILTIN_CMPEQPD: +- case IX86_BUILTIN_CMPEQPS: +- if (initializer_zerop (gimple_call_arg (def_stmt, 0)) +- && initializer_zerop (gimple_call_arg (def_stmt, +- 1))) +- op0 = pc_rtx; +- break; +- default: +- break; +- } +- } +- } +- } +- +- pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4); +- if (! pat) +- return const0_rtx; +- emit_insn (pat); +- +- switch (fcode) +- { +- case IX86_BUILTIN_GATHER3DIV16SF: +- if (target == NULL_RTX) +- target = gen_reg_rtx (V8SFmode); +- emit_insn (gen_vec_extract_lo_v16sf (target, subtarget)); +- break; +- case IX86_BUILTIN_GATHER3DIV16SI: +- if (target == NULL_RTX) +- target = gen_reg_rtx (V8SImode); +- emit_insn (gen_vec_extract_lo_v16si (target, subtarget)); +- break; +- case IX86_BUILTIN_GATHER3DIV8SF: +- case IX86_BUILTIN_GATHERDIV8SF: +- if (target == NULL_RTX) +- target = gen_reg_rtx (V4SFmode); +- emit_insn (gen_vec_extract_lo_v8sf (target, subtarget)); +- break; +- case IX86_BUILTIN_GATHER3DIV8SI: +- case IX86_BUILTIN_GATHERDIV8SI: +- if (target == NULL_RTX) +- target = gen_reg_rtx (V4SImode); +- emit_insn (gen_vec_extract_lo_v8si (target, subtarget)); +- break; +- default: +- target = subtarget; +- break; +- } +- return target; +- +- scatter_gen: +- arg0 = CALL_EXPR_ARG (exp, 0); +- arg1 = CALL_EXPR_ARG (exp, 1); +- arg2 = CALL_EXPR_ARG (exp, 2); +- arg3 = CALL_EXPR_ARG (exp, 3); +- arg4 = CALL_EXPR_ARG (exp, 4); +- op0 = expand_normal (arg0); +- op1 = expand_normal (arg1); +- op2 = expand_normal (arg2); +- op3 = expand_normal (arg3); +- op4 = expand_normal (arg4); +- mode1 = insn_data[icode].operand[1].mode; +- mode2 = insn_data[icode].operand[2].mode; +- mode3 = insn_data[icode].operand[3].mode; +- mode4 = insn_data[icode].operand[4].mode; +- +- /* Scatter instruction stores operand op3 to memory with +- indices from op2 and scale from op4 under writemask op1. +- If index operand op2 has more elements then source operand +- op3 one need to use only its low half. And vice versa. */ +- switch (fcode) +- { +- case IX86_BUILTIN_SCATTERALTSIV8DF: +- case IX86_BUILTIN_SCATTERALTSIV8DI: +- half = gen_reg_rtx (V8SImode); +- if (!nonimmediate_operand (op2, V16SImode)) +- op2 = copy_to_mode_reg (V16SImode, op2); +- emit_insn (gen_vec_extract_lo_v16si (half, op2)); +- op2 = half; +- break; +- case IX86_BUILTIN_SCATTERALTDIV16SF: +- case IX86_BUILTIN_SCATTERALTDIV16SI: +- half = gen_reg_rtx (mode3); +- if (mode3 == V8SFmode) +- gen = gen_vec_extract_lo_v16sf; +- else +- gen = gen_vec_extract_lo_v16si; +- if (!nonimmediate_operand (op3, GET_MODE (op3))) +- op3 = copy_to_mode_reg (GET_MODE (op3), op3); +- emit_insn (gen (half, op3)); +- op3 = half; +- break; +- case IX86_BUILTIN_SCATTERALTSIV4DF: +- case IX86_BUILTIN_SCATTERALTSIV4DI: +- half = gen_reg_rtx (V4SImode); +- if (!nonimmediate_operand (op2, V8SImode)) +- op2 = copy_to_mode_reg (V8SImode, op2); +- emit_insn (gen_vec_extract_lo_v8si (half, op2)); +- op2 = half; +- break; +- case IX86_BUILTIN_SCATTERALTDIV8SF: +- case IX86_BUILTIN_SCATTERALTDIV8SI: +- half = gen_reg_rtx (mode3); +- if (mode3 == V4SFmode) +- gen = gen_vec_extract_lo_v8sf; +- else +- gen = gen_vec_extract_lo_v8si; +- if (!nonimmediate_operand (op3, GET_MODE (op3))) +- op3 = copy_to_mode_reg (GET_MODE (op3), op3); +- emit_insn (gen (half, op3)); +- op3 = half; +- break; +- case IX86_BUILTIN_SCATTERALTSIV2DF: +- case IX86_BUILTIN_SCATTERALTSIV2DI: +- if (!nonimmediate_operand (op2, V4SImode)) +- op2 = copy_to_mode_reg (V4SImode, op2); +- break; +- case IX86_BUILTIN_SCATTERALTDIV4SF: +- case IX86_BUILTIN_SCATTERALTDIV4SI: +- if (!nonimmediate_operand (op3, GET_MODE (op3))) +- op3 = copy_to_mode_reg (GET_MODE (op3), op3); +- break; +- default: +- break; +- } +- +- /* Force memory operand only with base register here. But we +- don't want to do it on memory operand for other builtin +- functions. */ +- op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1)); +- +- if (!insn_data[icode].operand[0].predicate (op0, Pmode)) +- op0 = copy_to_mode_reg (Pmode, op0); +- +- op1 = fixup_modeless_constant (op1, mode1); +- +- if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode) +- { +- if (!insn_data[icode].operand[1].predicate (op1, mode1)) +- op1 = copy_to_mode_reg (mode1, op1); +- } +- else +- { +- op1 = copy_to_reg (op1); +- op1 = lowpart_subreg (mode1, op1, GET_MODE (op1)); +- } +- +- if (!insn_data[icode].operand[2].predicate (op2, mode2)) +- op2 = copy_to_mode_reg (mode2, op2); +- +- if (!insn_data[icode].operand[3].predicate (op3, mode3)) +- op3 = copy_to_mode_reg (mode3, op3); +- +- if (!insn_data[icode].operand[4].predicate (op4, mode4)) +- { +- error ("the last argument must be scale 1, 2, 4, 8"); +- return const0_rtx; +- } +- +- pat = GEN_FCN (icode) (op0, op1, op2, op3, op4); +- if (! pat) +- return const0_rtx; +- +- emit_insn (pat); +- return 0; +- +- vec_prefetch_gen: +- arg0 = CALL_EXPR_ARG (exp, 0); +- arg1 = CALL_EXPR_ARG (exp, 1); +- arg2 = CALL_EXPR_ARG (exp, 2); +- arg3 = CALL_EXPR_ARG (exp, 3); +- arg4 = CALL_EXPR_ARG (exp, 4); +- op0 = expand_normal (arg0); +- op1 = expand_normal (arg1); +- op2 = expand_normal (arg2); +- op3 = expand_normal (arg3); +- op4 = expand_normal (arg4); +- mode0 = insn_data[icode].operand[0].mode; +- mode1 = insn_data[icode].operand[1].mode; +- mode3 = insn_data[icode].operand[3].mode; +- mode4 = insn_data[icode].operand[4].mode; +- +- op0 = fixup_modeless_constant (op0, mode0); +- +- if (GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode) +- { +- if (!insn_data[icode].operand[0].predicate (op0, mode0)) +- op0 = copy_to_mode_reg (mode0, op0); +- } +- else +- { +- op0 = copy_to_reg (op0); +- op0 = lowpart_subreg (mode0, op0, GET_MODE (op0)); +- } +- +- if (!insn_data[icode].operand[1].predicate (op1, mode1)) +- op1 = copy_to_mode_reg (mode1, op1); +- +- /* Force memory operand only with base register here. But we +- don't want to do it on memory operand for other builtin +- functions. */ +- op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1)); +- +- if (!insn_data[icode].operand[2].predicate (op2, Pmode)) +- op2 = copy_to_mode_reg (Pmode, op2); +- +- if (!insn_data[icode].operand[3].predicate (op3, mode3)) +- { +- error ("the forth argument must be scale 1, 2, 4, 8"); +- return const0_rtx; +- } +- +- if (!insn_data[icode].operand[4].predicate (op4, mode4)) +- { +- error ("incorrect hint operand"); +- return const0_rtx; +- } +- +- pat = GEN_FCN (icode) (op0, op1, op2, op3, op4); +- if (! pat) +- return const0_rtx; +- +- emit_insn (pat); +- +- return 0; +- +- case IX86_BUILTIN_XABORT: +- icode = CODE_FOR_xabort; +- arg0 = CALL_EXPR_ARG (exp, 0); +- op0 = expand_normal (arg0); +- mode0 = insn_data[icode].operand[0].mode; +- if (!insn_data[icode].operand[0].predicate (op0, mode0)) +- { +- error ("the argument to % intrinsic must " +- "be an 8-bit immediate"); +- return const0_rtx; +- } +- emit_insn (gen_xabort (op0)); +- return 0; +- +- case IX86_BUILTIN_RSTORSSP: +- case IX86_BUILTIN_CLRSSBSY: +- arg0 = CALL_EXPR_ARG (exp, 0); +- op0 = expand_normal (arg0); +- icode = (fcode == IX86_BUILTIN_RSTORSSP +- ? CODE_FOR_rstorssp +- : CODE_FOR_clrssbsy); +- if (!address_operand (op0, VOIDmode)) +- { +- op1 = convert_memory_address (Pmode, op0); +- op0 = copy_addr_to_reg (op1); +- } +- emit_insn (GEN_FCN (icode) (gen_rtx_MEM (Pmode, op0))); +- return 0; +- +- case IX86_BUILTIN_WRSSD: +- case IX86_BUILTIN_WRSSQ: +- case IX86_BUILTIN_WRUSSD: +- case IX86_BUILTIN_WRUSSQ: +- arg0 = CALL_EXPR_ARG (exp, 0); +- op0 = expand_normal (arg0); +- arg1 = CALL_EXPR_ARG (exp, 1); +- op1 = expand_normal (arg1); +- switch (fcode) +- { +- case IX86_BUILTIN_WRSSD: +- icode = CODE_FOR_wrsssi; +- mode = SImode; +- break; +- case IX86_BUILTIN_WRSSQ: +- icode = CODE_FOR_wrssdi; +- mode = DImode; +- break; +- case IX86_BUILTIN_WRUSSD: +- icode = CODE_FOR_wrusssi; +- mode = SImode; +- break; +- case IX86_BUILTIN_WRUSSQ: +- icode = CODE_FOR_wrussdi; +- mode = DImode; +- break; +- } +- op0 = force_reg (mode, op0); +- if (!address_operand (op1, VOIDmode)) +- { +- op2 = convert_memory_address (Pmode, op1); +- op1 = copy_addr_to_reg (op2); +- } +- emit_insn (GEN_FCN (icode) (op0, gen_rtx_MEM (mode, op1))); +- return 0; +- +- default: +- break; +- } +- +- if (fcode >= IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST +- && fcode <= IX86_BUILTIN__BDESC_SPECIAL_ARGS_LAST) +- { +- i = fcode - IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST; +- return ix86_expand_special_args_builtin (bdesc_special_args + i, exp, +- target); +- } +- +- if (fcode >= IX86_BUILTIN__BDESC_ARGS_FIRST +- && fcode <= IX86_BUILTIN__BDESC_ARGS_LAST) +- { +- i = fcode - IX86_BUILTIN__BDESC_ARGS_FIRST; +- rtx (*fcn) (rtx, rtx, rtx, rtx) = NULL; +- rtx (*fcn_mask) (rtx, rtx, rtx, rtx, rtx); +- rtx (*fcn_maskz) (rtx, rtx, rtx, rtx, rtx, rtx); +- int masked = 1; +- machine_mode mode, wide_mode, nar_mode; +- +- nar_mode = V4SFmode; +- mode = V16SFmode; +- wide_mode = V64SFmode; +- fcn_mask = gen_avx5124fmaddps_4fmaddps_mask; +- fcn_maskz = gen_avx5124fmaddps_4fmaddps_maskz; +- +- switch (fcode) +- { +- case IX86_BUILTIN_4FMAPS: +- fcn = gen_avx5124fmaddps_4fmaddps; +- masked = 0; +- goto v4fma_expand; +- +- case IX86_BUILTIN_4DPWSSD: +- nar_mode = V4SImode; +- mode = V16SImode; +- wide_mode = V64SImode; +- fcn = gen_avx5124vnniw_vp4dpwssd; +- masked = 0; +- goto v4fma_expand; +- +- case IX86_BUILTIN_4DPWSSDS: +- nar_mode = V4SImode; +- mode = V16SImode; +- wide_mode = V64SImode; +- fcn = gen_avx5124vnniw_vp4dpwssds; +- masked = 0; +- goto v4fma_expand; +- +- case IX86_BUILTIN_4FNMAPS: +- fcn = gen_avx5124fmaddps_4fnmaddps; +- masked = 0; +- goto v4fma_expand; +- +- case IX86_BUILTIN_4FNMAPS_MASK: +- fcn_mask = gen_avx5124fmaddps_4fnmaddps_mask; +- fcn_maskz = gen_avx5124fmaddps_4fnmaddps_maskz; +- goto v4fma_expand; +- +- case IX86_BUILTIN_4DPWSSD_MASK: +- nar_mode = V4SImode; +- mode = V16SImode; +- wide_mode = V64SImode; +- fcn_mask = gen_avx5124vnniw_vp4dpwssd_mask; +- fcn_maskz = gen_avx5124vnniw_vp4dpwssd_maskz; +- goto v4fma_expand; +- +- case IX86_BUILTIN_4DPWSSDS_MASK: +- nar_mode = V4SImode; +- mode = V16SImode; +- wide_mode = V64SImode; +- fcn_mask = gen_avx5124vnniw_vp4dpwssds_mask; +- fcn_maskz = gen_avx5124vnniw_vp4dpwssds_maskz; +- goto v4fma_expand; +- +- case IX86_BUILTIN_4FMAPS_MASK: +- { +- tree args[4]; +- rtx ops[4]; +- rtx wide_reg; +- rtx accum; +- rtx addr; +- rtx mem; +- +-v4fma_expand: +- wide_reg = gen_reg_rtx (wide_mode); +- for (i = 0; i < 4; i++) +- { +- args[i] = CALL_EXPR_ARG (exp, i); +- ops[i] = expand_normal (args[i]); +- +- emit_move_insn (gen_rtx_SUBREG (mode, wide_reg, i * 64), +- ops[i]); +- } +- +- accum = expand_normal (CALL_EXPR_ARG (exp, 4)); +- accum = force_reg (mode, accum); +- +- addr = expand_normal (CALL_EXPR_ARG (exp, 5)); +- addr = force_reg (Pmode, addr); +- +- mem = gen_rtx_MEM (nar_mode, addr); +- +- target = gen_reg_rtx (mode); +- +- emit_move_insn (target, accum); +- +- if (! masked) +- emit_insn (fcn (target, accum, wide_reg, mem)); +- else +- { +- rtx merge, mask; +- merge = expand_normal (CALL_EXPR_ARG (exp, 6)); +- +- mask = expand_normal (CALL_EXPR_ARG (exp, 7)); +- +- if (CONST_INT_P (mask)) +- mask = fixup_modeless_constant (mask, HImode); +- +- mask = force_reg (HImode, mask); +- +- if (GET_MODE (mask) != HImode) +- mask = gen_rtx_SUBREG (HImode, mask, 0); +- +- /* If merge is 0 then we're about to emit z-masked variant. */ +- if (const0_operand (merge, mode)) +- emit_insn (fcn_maskz (target, accum, wide_reg, mem, merge, mask)); +- /* If merge is the same as accum then emit merge-masked variant. */ +- else if (CALL_EXPR_ARG (exp, 6) == CALL_EXPR_ARG (exp, 4)) +- { +- merge = force_reg (mode, merge); +- emit_insn (fcn_mask (target, wide_reg, mem, merge, mask)); +- } +- /* Merge with something unknown might happen if we z-mask w/ -O0. */ +- else +- { +- target = gen_reg_rtx (mode); +- emit_move_insn (target, merge); +- emit_insn (fcn_mask (target, wide_reg, mem, target, mask)); +- } +- } +- return target; +- } +- +- case IX86_BUILTIN_4FNMASS: +- fcn = gen_avx5124fmaddps_4fnmaddss; +- masked = 0; +- goto s4fma_expand; +- +- case IX86_BUILTIN_4FMASS: +- fcn = gen_avx5124fmaddps_4fmaddss; +- masked = 0; +- goto s4fma_expand; +- +- case IX86_BUILTIN_4FNMASS_MASK: +- fcn_mask = gen_avx5124fmaddps_4fnmaddss_mask; +- fcn_maskz = gen_avx5124fmaddps_4fnmaddss_maskz; +- goto s4fma_expand; +- +- case IX86_BUILTIN_4FMASS_MASK: +- { +- tree args[4]; +- rtx ops[4]; +- rtx wide_reg; +- rtx accum; +- rtx addr; +- rtx mem; +- +- fcn_mask = gen_avx5124fmaddps_4fmaddss_mask; +- fcn_maskz = gen_avx5124fmaddps_4fmaddss_maskz; +- +-s4fma_expand: +- mode = V4SFmode; +- wide_reg = gen_reg_rtx (V64SFmode); +- for (i = 0; i < 4; i++) +- { +- rtx tmp; +- args[i] = CALL_EXPR_ARG (exp, i); +- ops[i] = expand_normal (args[i]); +- +- tmp = gen_reg_rtx (SFmode); +- emit_move_insn (tmp, gen_rtx_SUBREG (SFmode, ops[i], 0)); +- +- emit_move_insn (gen_rtx_SUBREG (V16SFmode, wide_reg, i * 64), +- gen_rtx_SUBREG (V16SFmode, tmp, 0)); +- } +- +- accum = expand_normal (CALL_EXPR_ARG (exp, 4)); +- accum = force_reg (V4SFmode, accum); +- +- addr = expand_normal (CALL_EXPR_ARG (exp, 5)); +- addr = force_reg (Pmode, addr); +- +- mem = gen_rtx_MEM (V4SFmode, addr); +- +- target = gen_reg_rtx (V4SFmode); +- +- emit_move_insn (target, accum); +- +- if (! masked) +- emit_insn (fcn (target, accum, wide_reg, mem)); +- else +- { +- rtx merge, mask; +- merge = expand_normal (CALL_EXPR_ARG (exp, 6)); +- +- mask = expand_normal (CALL_EXPR_ARG (exp, 7)); +- +- if (CONST_INT_P (mask)) +- mask = fixup_modeless_constant (mask, QImode); +- +- mask = force_reg (QImode, mask); +- +- if (GET_MODE (mask) != QImode) +- mask = gen_rtx_SUBREG (QImode, mask, 0); +- +- /* If merge is 0 then we're about to emit z-masked variant. */ +- if (const0_operand (merge, mode)) +- emit_insn (fcn_maskz (target, accum, wide_reg, mem, merge, mask)); +- /* If merge is the same as accum then emit merge-masked +- variant. */ +- else if (CALL_EXPR_ARG (exp, 6) == CALL_EXPR_ARG (exp, 4)) +- { +- merge = force_reg (mode, merge); +- emit_insn (fcn_mask (target, wide_reg, mem, merge, mask)); +- } +- /* Merge with something unknown might happen if we z-mask +- w/ -O0. */ +- else +- { +- target = gen_reg_rtx (mode); +- emit_move_insn (target, merge); +- emit_insn (fcn_mask (target, wide_reg, mem, target, mask)); +- } +- } +- return target; +- } +- case IX86_BUILTIN_RDPID: +- return ix86_expand_special_args_builtin (bdesc_args + i, exp, +- target); +- case IX86_BUILTIN_FABSQ: +- case IX86_BUILTIN_COPYSIGNQ: +- if (!TARGET_SSE) +- /* Emit a normal call if SSE isn't available. */ +- return expand_call (exp, target, ignore); +- /* FALLTHRU */ +- default: +- return ix86_expand_args_builtin (bdesc_args + i, exp, target); +- } +- } +- +- if (fcode >= IX86_BUILTIN__BDESC_COMI_FIRST +- && fcode <= IX86_BUILTIN__BDESC_COMI_LAST) +- { +- i = fcode - IX86_BUILTIN__BDESC_COMI_FIRST; +- return ix86_expand_sse_comi (bdesc_comi + i, exp, target); +- } +- +- if (fcode >= IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST +- && fcode <= IX86_BUILTIN__BDESC_ROUND_ARGS_LAST) +- { +- i = fcode - IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST; +- return ix86_expand_round_builtin (bdesc_round_args + i, exp, target); +- } +- +- if (fcode >= IX86_BUILTIN__BDESC_PCMPESTR_FIRST +- && fcode <= IX86_BUILTIN__BDESC_PCMPESTR_LAST) +- { +- i = fcode - IX86_BUILTIN__BDESC_PCMPESTR_FIRST; +- return ix86_expand_sse_pcmpestr (bdesc_pcmpestr + i, exp, target); +- } +- +- if (fcode >= IX86_BUILTIN__BDESC_PCMPISTR_FIRST +- && fcode <= IX86_BUILTIN__BDESC_PCMPISTR_LAST) +- { +- i = fcode - IX86_BUILTIN__BDESC_PCMPISTR_FIRST; +- return ix86_expand_sse_pcmpistr (bdesc_pcmpistr + i, exp, target); +- } +- +- if (fcode >= IX86_BUILTIN__BDESC_MULTI_ARG_FIRST +- && fcode <= IX86_BUILTIN__BDESC_MULTI_ARG_LAST) +- { +- i = fcode - IX86_BUILTIN__BDESC_MULTI_ARG_FIRST; +- const struct builtin_description *d = bdesc_multi_arg + i; +- return ix86_expand_multi_arg_builtin (d->icode, exp, target, +- (enum ix86_builtin_func_type) +- d->flag, d->comparison); +- } +- +- if (fcode >= IX86_BUILTIN__BDESC_CET_FIRST +- && fcode <= IX86_BUILTIN__BDESC_CET_LAST) +- { +- i = fcode - IX86_BUILTIN__BDESC_CET_FIRST; +- return ix86_expand_special_args_builtin (bdesc_cet + i, exp, +- target); +- } +- +- if (fcode >= IX86_BUILTIN__BDESC_CET_NORMAL_FIRST +- && fcode <= IX86_BUILTIN__BDESC_CET_NORMAL_LAST) +- { +- i = fcode - IX86_BUILTIN__BDESC_CET_NORMAL_FIRST; +- return ix86_expand_special_args_builtin (bdesc_cet_rdssp + i, exp, +- target); +- } +- +- gcc_unreachable (); +-} +- +-/* This returns the target-specific builtin with code CODE if +- current_function_decl has visibility on this builtin, which is checked +- using isa flags. Returns NULL_TREE otherwise. */ +- +-static tree ix86_get_builtin (enum ix86_builtins code) +-{ +- struct cl_target_option *opts; +- tree target_tree = NULL_TREE; +- +- /* Determine the isa flags of current_function_decl. */ +- +- if (current_function_decl) +- target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl); +- +- if (target_tree == NULL) +- target_tree = target_option_default_node; +- +- opts = TREE_TARGET_OPTION (target_tree); +- +- if ((ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags) +- || (ix86_builtins_isa[(int) code].isa2 & opts->x_ix86_isa_flags2)) +- return ix86_builtin_decl (code, true); +- else +- return NULL_TREE; +-} +- +-/* Returns a function decl for a vectorized version of the combined function +- with combined_fn code FN and the result vector type TYPE, or NULL_TREE +- if it is not available. */ +- +-static tree +-ix86_builtin_vectorized_function (unsigned int fn, tree type_out, +- tree type_in) +-{ +- machine_mode in_mode, out_mode; +- int in_n, out_n; +- +- if (TREE_CODE (type_out) != VECTOR_TYPE +- || TREE_CODE (type_in) != VECTOR_TYPE) +- return NULL_TREE; +- +- out_mode = TYPE_MODE (TREE_TYPE (type_out)); +- out_n = TYPE_VECTOR_SUBPARTS (type_out); +- in_mode = TYPE_MODE (TREE_TYPE (type_in)); +- in_n = TYPE_VECTOR_SUBPARTS (type_in); +- +- switch (fn) +- { +- CASE_CFN_EXP2: +- if (out_mode == SFmode && in_mode == SFmode) +- { +- if (out_n == 16 && in_n == 16) +- return ix86_get_builtin (IX86_BUILTIN_EXP2PS); +- } +- break; +- +- CASE_CFN_IFLOOR: +- CASE_CFN_LFLOOR: +- CASE_CFN_LLFLOOR: +- /* The round insn does not trap on denormals. */ +- if (flag_trapping_math || !TARGET_SSE4_1) +- break; +- +- if (out_mode == SImode && in_mode == DFmode) +- { +- if (out_n == 4 && in_n == 2) +- return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX); +- else if (out_n == 8 && in_n == 4) +- return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256); +- else if (out_n == 16 && in_n == 8) +- return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512); +- } +- if (out_mode == SImode && in_mode == SFmode) +- { +- if (out_n == 4 && in_n == 4) +- return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX); +- else if (out_n == 8 && in_n == 8) +- return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256); +- else if (out_n == 16 && in_n == 16) +- return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX512); +- } +- break; +- +- CASE_CFN_ICEIL: +- CASE_CFN_LCEIL: +- CASE_CFN_LLCEIL: +- /* The round insn does not trap on denormals. */ +- if (flag_trapping_math || !TARGET_SSE4_1) +- break; +- +- if (out_mode == SImode && in_mode == DFmode) +- { +- if (out_n == 4 && in_n == 2) +- return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX); +- else if (out_n == 8 && in_n == 4) +- return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256); +- else if (out_n == 16 && in_n == 8) +- return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512); +- } +- if (out_mode == SImode && in_mode == SFmode) +- { +- if (out_n == 4 && in_n == 4) +- return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX); +- else if (out_n == 8 && in_n == 8) +- return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256); +- else if (out_n == 16 && in_n == 16) +- return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX512); +- } +- break; +- +- CASE_CFN_IRINT: +- CASE_CFN_LRINT: +- CASE_CFN_LLRINT: +- if (out_mode == SImode && in_mode == DFmode) +- { +- if (out_n == 4 && in_n == 2) +- return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX); +- else if (out_n == 8 && in_n == 4) +- return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256); +- else if (out_n == 16 && in_n == 8) +- return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX512); +- } +- if (out_mode == SImode && in_mode == SFmode) +- { +- if (out_n == 4 && in_n == 4) +- return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ); +- else if (out_n == 8 && in_n == 8) +- return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256); +- else if (out_n == 16 && in_n == 16) +- return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ512); +- } +- break; +- +- CASE_CFN_IROUND: +- CASE_CFN_LROUND: +- CASE_CFN_LLROUND: +- /* The round insn does not trap on denormals. */ +- if (flag_trapping_math || !TARGET_SSE4_1) +- break; +- +- if (out_mode == SImode && in_mode == DFmode) +- { +- if (out_n == 4 && in_n == 2) +- return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX); +- else if (out_n == 8 && in_n == 4) +- return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256); +- else if (out_n == 16 && in_n == 8) +- return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512); +- } +- if (out_mode == SImode && in_mode == SFmode) +- { +- if (out_n == 4 && in_n == 4) +- return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX); +- else if (out_n == 8 && in_n == 8) +- return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256); +- else if (out_n == 16 && in_n == 16) +- return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX512); +- } +- break; +- +- CASE_CFN_FLOOR: +- /* The round insn does not trap on denormals. */ +- if (flag_trapping_math || !TARGET_SSE4_1) +- break; +- +- if (out_mode == DFmode && in_mode == DFmode) +- { +- if (out_n == 2 && in_n == 2) +- return ix86_get_builtin (IX86_BUILTIN_FLOORPD); +- else if (out_n == 4 && in_n == 4) +- return ix86_get_builtin (IX86_BUILTIN_FLOORPD256); +- else if (out_n == 8 && in_n == 8) +- return ix86_get_builtin (IX86_BUILTIN_FLOORPD512); +- } +- if (out_mode == SFmode && in_mode == SFmode) +- { +- if (out_n == 4 && in_n == 4) +- return ix86_get_builtin (IX86_BUILTIN_FLOORPS); +- else if (out_n == 8 && in_n == 8) +- return ix86_get_builtin (IX86_BUILTIN_FLOORPS256); +- else if (out_n == 16 && in_n == 16) +- return ix86_get_builtin (IX86_BUILTIN_FLOORPS512); +- } +- break; +- +- CASE_CFN_CEIL: +- /* The round insn does not trap on denormals. */ +- if (flag_trapping_math || !TARGET_SSE4_1) +- break; +- +- if (out_mode == DFmode && in_mode == DFmode) +- { +- if (out_n == 2 && in_n == 2) +- return ix86_get_builtin (IX86_BUILTIN_CEILPD); +- else if (out_n == 4 && in_n == 4) +- return ix86_get_builtin (IX86_BUILTIN_CEILPD256); +- else if (out_n == 8 && in_n == 8) +- return ix86_get_builtin (IX86_BUILTIN_CEILPD512); +- } +- if (out_mode == SFmode && in_mode == SFmode) +- { +- if (out_n == 4 && in_n == 4) +- return ix86_get_builtin (IX86_BUILTIN_CEILPS); +- else if (out_n == 8 && in_n == 8) +- return ix86_get_builtin (IX86_BUILTIN_CEILPS256); +- else if (out_n == 16 && in_n == 16) +- return ix86_get_builtin (IX86_BUILTIN_CEILPS512); +- } +- break; +- +- CASE_CFN_TRUNC: +- /* The round insn does not trap on denormals. */ +- if (flag_trapping_math || !TARGET_SSE4_1) +- break; +- +- if (out_mode == DFmode && in_mode == DFmode) +- { +- if (out_n == 2 && in_n == 2) +- return ix86_get_builtin (IX86_BUILTIN_TRUNCPD); +- else if (out_n == 4 && in_n == 4) +- return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256); +- else if (out_n == 8 && in_n == 8) +- return ix86_get_builtin (IX86_BUILTIN_TRUNCPD512); +- } +- if (out_mode == SFmode && in_mode == SFmode) +- { +- if (out_n == 4 && in_n == 4) +- return ix86_get_builtin (IX86_BUILTIN_TRUNCPS); +- else if (out_n == 8 && in_n == 8) +- return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256); +- else if (out_n == 16 && in_n == 16) +- return ix86_get_builtin (IX86_BUILTIN_TRUNCPS512); +- } +- break; +- +- CASE_CFN_RINT: +- /* The round insn does not trap on denormals. */ +- if (flag_trapping_math || !TARGET_SSE4_1) +- break; +- +- if (out_mode == DFmode && in_mode == DFmode) +- { +- if (out_n == 2 && in_n == 2) +- return ix86_get_builtin (IX86_BUILTIN_RINTPD); +- else if (out_n == 4 && in_n == 4) +- return ix86_get_builtin (IX86_BUILTIN_RINTPD256); +- } +- if (out_mode == SFmode && in_mode == SFmode) +- { +- if (out_n == 4 && in_n == 4) +- return ix86_get_builtin (IX86_BUILTIN_RINTPS); +- else if (out_n == 8 && in_n == 8) +- return ix86_get_builtin (IX86_BUILTIN_RINTPS256); +- } +- break; +- +- CASE_CFN_FMA: +- if (out_mode == DFmode && in_mode == DFmode) +- { +- if (out_n == 2 && in_n == 2) +- return ix86_get_builtin (IX86_BUILTIN_VFMADDPD); +- if (out_n == 4 && in_n == 4) +- return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256); +- } +- if (out_mode == SFmode && in_mode == SFmode) +- { +- if (out_n == 4 && in_n == 4) +- return ix86_get_builtin (IX86_BUILTIN_VFMADDPS); +- if (out_n == 8 && in_n == 8) +- return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256); +- } +- break; +- +- default: +- break; +- } +- +- /* Dispatch to a handler for a vectorization library. */ +- if (ix86_veclib_handler) +- return ix86_veclib_handler (combined_fn (fn), type_out, type_in); +- +- return NULL_TREE; +-} +- +-/* Handler for an SVML-style interface to +- a library with vectorized intrinsics. */ +- +-static tree +-ix86_veclibabi_svml (combined_fn fn, tree type_out, tree type_in) +-{ +- char name[20]; +- tree fntype, new_fndecl, args; +- unsigned arity; +- const char *bname; +- machine_mode el_mode, in_mode; +- int n, in_n; +- +- /* The SVML is suitable for unsafe math only. */ +- if (!flag_unsafe_math_optimizations) +- return NULL_TREE; +- +- el_mode = TYPE_MODE (TREE_TYPE (type_out)); +- n = TYPE_VECTOR_SUBPARTS (type_out); +- in_mode = TYPE_MODE (TREE_TYPE (type_in)); +- in_n = TYPE_VECTOR_SUBPARTS (type_in); +- if (el_mode != in_mode +- || n != in_n) +- return NULL_TREE; +- +- switch (fn) +- { +- CASE_CFN_EXP: +- CASE_CFN_LOG: +- CASE_CFN_LOG10: +- CASE_CFN_POW: +- CASE_CFN_TANH: +- CASE_CFN_TAN: +- CASE_CFN_ATAN: +- CASE_CFN_ATAN2: +- CASE_CFN_ATANH: +- CASE_CFN_CBRT: +- CASE_CFN_SINH: +- CASE_CFN_SIN: +- CASE_CFN_ASINH: +- CASE_CFN_ASIN: +- CASE_CFN_COSH: +- CASE_CFN_COS: +- CASE_CFN_ACOSH: +- CASE_CFN_ACOS: +- if ((el_mode != DFmode || n != 2) +- && (el_mode != SFmode || n != 4)) +- return NULL_TREE; +- break; +- +- default: +- return NULL_TREE; +- } +- +- tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn); +- bname = IDENTIFIER_POINTER (DECL_NAME (fndecl)); +- +- if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOGF) +- strcpy (name, "vmlsLn4"); +- else if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOG) +- strcpy (name, "vmldLn2"); +- else if (n == 4) +- { +- sprintf (name, "vmls%s", bname+10); +- name[strlen (name)-1] = '4'; +- } +- else +- sprintf (name, "vmld%s2", bname+10); +- +- /* Convert to uppercase. */ +- name[4] &= ~0x20; +- +- arity = 0; +- for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args)) +- arity++; +- +- if (arity == 1) +- fntype = build_function_type_list (type_out, type_in, NULL); +- else +- fntype = build_function_type_list (type_out, type_in, type_in, NULL); +- +- /* Build a function declaration for the vectorized function. */ +- new_fndecl = build_decl (BUILTINS_LOCATION, +- FUNCTION_DECL, get_identifier (name), fntype); +- TREE_PUBLIC (new_fndecl) = 1; +- DECL_EXTERNAL (new_fndecl) = 1; +- DECL_IS_NOVOPS (new_fndecl) = 1; +- TREE_READONLY (new_fndecl) = 1; +- +- return new_fndecl; +-} +- +-/* Handler for an ACML-style interface to +- a library with vectorized intrinsics. */ +- +-static tree +-ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in) +-{ +- char name[20] = "__vr.._"; +- tree fntype, new_fndecl, args; +- unsigned arity; +- const char *bname; +- machine_mode el_mode, in_mode; +- int n, in_n; +- +- /* The ACML is 64bits only and suitable for unsafe math only as +- it does not correctly support parts of IEEE with the required +- precision such as denormals. */ +- if (!TARGET_64BIT +- || !flag_unsafe_math_optimizations) +- return NULL_TREE; +- +- el_mode = TYPE_MODE (TREE_TYPE (type_out)); +- n = TYPE_VECTOR_SUBPARTS (type_out); +- in_mode = TYPE_MODE (TREE_TYPE (type_in)); +- in_n = TYPE_VECTOR_SUBPARTS (type_in); +- if (el_mode != in_mode +- || n != in_n) +- return NULL_TREE; +- +- switch (fn) +- { +- CASE_CFN_SIN: +- CASE_CFN_COS: +- CASE_CFN_EXP: +- CASE_CFN_LOG: +- CASE_CFN_LOG2: +- CASE_CFN_LOG10: +- if (el_mode == DFmode && n == 2) +- { +- name[4] = 'd'; +- name[5] = '2'; +- } +- else if (el_mode == SFmode && n == 4) +- { +- name[4] = 's'; +- name[5] = '4'; +- } +- else +- return NULL_TREE; +- break; +- +- default: +- return NULL_TREE; +- } +- +- tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn); +- bname = IDENTIFIER_POINTER (DECL_NAME (fndecl)); +- sprintf (name + 7, "%s", bname+10); +- +- arity = 0; +- for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args)) +- arity++; +- +- if (arity == 1) +- fntype = build_function_type_list (type_out, type_in, NULL); +- else +- fntype = build_function_type_list (type_out, type_in, type_in, NULL); +- +- /* Build a function declaration for the vectorized function. */ +- new_fndecl = build_decl (BUILTINS_LOCATION, +- FUNCTION_DECL, get_identifier (name), fntype); +- TREE_PUBLIC (new_fndecl) = 1; +- DECL_EXTERNAL (new_fndecl) = 1; +- DECL_IS_NOVOPS (new_fndecl) = 1; +- TREE_READONLY (new_fndecl) = 1; +- +- return new_fndecl; +-} +- +-/* Returns a decl of a function that implements gather load with +- memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE. +- Return NULL_TREE if it is not available. */ +- +-static tree +-ix86_vectorize_builtin_gather (const_tree mem_vectype, +- const_tree index_type, int scale) +-{ +- bool si; +- enum ix86_builtins code; +- +- if (! TARGET_AVX2 || !TARGET_USE_GATHER) +- return NULL_TREE; +- +- if ((TREE_CODE (index_type) != INTEGER_TYPE +- && !POINTER_TYPE_P (index_type)) +- || (TYPE_MODE (index_type) != SImode +- && TYPE_MODE (index_type) != DImode)) +- return NULL_TREE; +- +- if (TYPE_PRECISION (index_type) > POINTER_SIZE) +- return NULL_TREE; +- +- /* v*gather* insn sign extends index to pointer mode. */ +- if (TYPE_PRECISION (index_type) < POINTER_SIZE +- && TYPE_UNSIGNED (index_type)) +- return NULL_TREE; +- +- if (scale <= 0 +- || scale > 8 +- || (scale & (scale - 1)) != 0) +- return NULL_TREE; +- +- si = TYPE_MODE (index_type) == SImode; +- switch (TYPE_MODE (mem_vectype)) +- { +- case E_V2DFmode: +- if (TARGET_AVX512VL) +- code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF; +- else +- code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF; +- break; +- case E_V4DFmode: +- if (TARGET_AVX512VL) +- code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF; +- else +- code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF; +- break; +- case E_V2DImode: +- if (TARGET_AVX512VL) +- code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI; +- else +- code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI; +- break; +- case E_V4DImode: +- if (TARGET_AVX512VL) +- code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI; +- else +- code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI; +- break; +- case E_V4SFmode: +- if (TARGET_AVX512VL) +- code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF; +- else +- code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF; +- break; +- case E_V8SFmode: +- if (TARGET_AVX512VL) +- code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF; +- else +- code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF; +- break; +- case E_V4SImode: +- if (TARGET_AVX512VL) +- code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI; +- else +- code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI; +- break; +- case E_V8SImode: +- if (TARGET_AVX512VL) +- code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI; +- else +- code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI; +- break; +- case E_V8DFmode: +- if (TARGET_AVX512F) +- code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF; +- else +- return NULL_TREE; +- break; +- case E_V8DImode: +- if (TARGET_AVX512F) +- code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI; +- else +- return NULL_TREE; +- break; +- case E_V16SFmode: +- if (TARGET_AVX512F) +- code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF; +- else +- return NULL_TREE; +- break; +- case E_V16SImode: +- if (TARGET_AVX512F) +- code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI; +- else +- return NULL_TREE; +- break; +- default: +- return NULL_TREE; +- } +- +- return ix86_get_builtin (code); +-} +- +-/* Returns a decl of a function that implements scatter store with +- register type VECTYPE and index type INDEX_TYPE and SCALE. +- Return NULL_TREE if it is not available. */ +- +-static tree +-ix86_vectorize_builtin_scatter (const_tree vectype, +- const_tree index_type, int scale) +-{ +- bool si; +- enum ix86_builtins code; +- +- if (!TARGET_AVX512F) +- return NULL_TREE; +- +- if ((TREE_CODE (index_type) != INTEGER_TYPE +- && !POINTER_TYPE_P (index_type)) +- || (TYPE_MODE (index_type) != SImode +- && TYPE_MODE (index_type) != DImode)) +- return NULL_TREE; +- +- if (TYPE_PRECISION (index_type) > POINTER_SIZE) +- return NULL_TREE; +- +- /* v*scatter* insn sign extends index to pointer mode. */ +- if (TYPE_PRECISION (index_type) < POINTER_SIZE +- && TYPE_UNSIGNED (index_type)) +- return NULL_TREE; +- +- /* Scale can be 1, 2, 4 or 8. */ +- if (scale <= 0 +- || scale > 8 +- || (scale & (scale - 1)) != 0) +- return NULL_TREE; +- +- si = TYPE_MODE (index_type) == SImode; +- switch (TYPE_MODE (vectype)) +- { +- case E_V8DFmode: +- code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF; +- break; +- case E_V8DImode: +- code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI; +- break; +- case E_V16SFmode: +- code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF; +- break; +- case E_V16SImode: +- code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI; +- break; +- case E_V4DFmode: +- if (TARGET_AVX512VL) +- code = si ? IX86_BUILTIN_SCATTERALTSIV4DF : IX86_BUILTIN_SCATTERDIV4DF; +- else +- return NULL_TREE; +- break; +- case E_V4DImode: +- if (TARGET_AVX512VL) +- code = si ? IX86_BUILTIN_SCATTERALTSIV4DI : IX86_BUILTIN_SCATTERDIV4DI; +- else +- return NULL_TREE; +- break; +- case E_V8SFmode: +- if (TARGET_AVX512VL) +- code = si ? IX86_BUILTIN_SCATTERSIV8SF : IX86_BUILTIN_SCATTERALTDIV8SF; +- else +- return NULL_TREE; +- break; +- case E_V8SImode: +- if (TARGET_AVX512VL) +- code = si ? IX86_BUILTIN_SCATTERSIV8SI : IX86_BUILTIN_SCATTERALTDIV8SI; +- else +- return NULL_TREE; +- break; +- case E_V2DFmode: +- if (TARGET_AVX512VL) +- code = si ? IX86_BUILTIN_SCATTERALTSIV2DF : IX86_BUILTIN_SCATTERDIV2DF; +- else +- return NULL_TREE; +- break; +- case E_V2DImode: +- if (TARGET_AVX512VL) +- code = si ? IX86_BUILTIN_SCATTERALTSIV2DI : IX86_BUILTIN_SCATTERDIV2DI; +- else +- return NULL_TREE; +- break; +- case E_V4SFmode: +- if (TARGET_AVX512VL) +- code = si ? IX86_BUILTIN_SCATTERSIV4SF : IX86_BUILTIN_SCATTERALTDIV4SF; +- else +- return NULL_TREE; +- break; +- case E_V4SImode: +- if (TARGET_AVX512VL) +- code = si ? IX86_BUILTIN_SCATTERSIV4SI : IX86_BUILTIN_SCATTERALTDIV4SI; +- else +- return NULL_TREE; +- break; +- default: +- return NULL_TREE; +- } +- +- return ix86_builtins[code]; +-} +- +-/* Return true if it is safe to use the rsqrt optabs to optimize +- 1.0/sqrt. */ +- +-static bool +-use_rsqrt_p () +-{ +- return (TARGET_SSE && TARGET_SSE_MATH +- && flag_finite_math_only +- && !flag_trapping_math +- && flag_unsafe_math_optimizations); +-} +- +-/* Returns a code for a target-specific builtin that implements +- reciprocal of the function, or NULL_TREE if not available. */ +- +-static tree +-ix86_builtin_reciprocal (tree fndecl) +-{ +- switch (DECL_FUNCTION_CODE (fndecl)) +- { +- /* Vectorized version of sqrt to rsqrt conversion. */ +- case IX86_BUILTIN_SQRTPS_NR: +- return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR); +- +- case IX86_BUILTIN_SQRTPS_NR256: +- return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256); +- +- default: +- return NULL_TREE; +- } +-} +- +-/* Helper for avx_vpermilps256_operand et al. This is also used by +- the expansion functions to turn the parallel back into a mask. +- The return value is 0 for no match and the imm8+1 for a match. */ +- +-int +-avx_vpermilp_parallel (rtx par, machine_mode mode) +-{ +- unsigned i, nelt = GET_MODE_NUNITS (mode); +- unsigned mask = 0; +- unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */ +- +- if (XVECLEN (par, 0) != (int) nelt) +- return 0; +- +- /* Validate that all of the elements are constants, and not totally +- out of range. Copy the data into an integral array to make the +- subsequent checks easier. */ +- for (i = 0; i < nelt; ++i) +- { +- rtx er = XVECEXP (par, 0, i); +- unsigned HOST_WIDE_INT ei; +- +- if (!CONST_INT_P (er)) +- return 0; +- ei = INTVAL (er); +- if (ei >= nelt) +- return 0; +- ipar[i] = ei; +- } +- +- switch (mode) +- { +- case E_V8DFmode: +- /* In the 512-bit DFmode case, we can only move elements within +- a 128-bit lane. First fill the second part of the mask, +- then fallthru. */ +- for (i = 4; i < 6; ++i) +- { +- if (ipar[i] < 4 || ipar[i] >= 6) +- return 0; +- mask |= (ipar[i] - 4) << i; +- } +- for (i = 6; i < 8; ++i) +- { +- if (ipar[i] < 6) +- return 0; +- mask |= (ipar[i] - 6) << i; +- } +- /* FALLTHRU */ +- +- case E_V4DFmode: +- /* In the 256-bit DFmode case, we can only move elements within +- a 128-bit lane. */ +- for (i = 0; i < 2; ++i) +- { +- if (ipar[i] >= 2) +- return 0; +- mask |= ipar[i] << i; +- } +- for (i = 2; i < 4; ++i) +- { +- if (ipar[i] < 2) +- return 0; +- mask |= (ipar[i] - 2) << i; +- } +- break; +- +- case E_V16SFmode: +- /* In 512 bit SFmode case, permutation in the upper 256 bits +- must mirror the permutation in the lower 256-bits. */ +- for (i = 0; i < 8; ++i) +- if (ipar[i] + 8 != ipar[i + 8]) +- return 0; +- /* FALLTHRU */ +- +- case E_V8SFmode: +- /* In 256 bit SFmode case, we have full freedom of +- movement within the low 128-bit lane, but the high 128-bit +- lane must mirror the exact same pattern. */ +- for (i = 0; i < 4; ++i) +- if (ipar[i] + 4 != ipar[i + 4]) +- return 0; +- nelt = 4; +- /* FALLTHRU */ +- +- case E_V2DFmode: +- case E_V4SFmode: +- /* In the 128-bit case, we've full freedom in the placement of +- the elements from the source operand. */ +- for (i = 0; i < nelt; ++i) +- mask |= ipar[i] << (i * (nelt / 2)); +- break; +- +- default: +- gcc_unreachable (); +- } +- +- /* Make sure success has a non-zero value by adding one. */ +- return mask + 1; +-} +- +-/* Helper for avx_vperm2f128_v4df_operand et al. This is also used by +- the expansion functions to turn the parallel back into a mask. +- The return value is 0 for no match and the imm8+1 for a match. */ +- +-int +-avx_vperm2f128_parallel (rtx par, machine_mode mode) +-{ +- unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2; +- unsigned mask = 0; +- unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */ +- +- if (XVECLEN (par, 0) != (int) nelt) +- return 0; +- +- /* Validate that all of the elements are constants, and not totally +- out of range. Copy the data into an integral array to make the +- subsequent checks easier. */ +- for (i = 0; i < nelt; ++i) +- { +- rtx er = XVECEXP (par, 0, i); +- unsigned HOST_WIDE_INT ei; +- +- if (!CONST_INT_P (er)) +- return 0; +- ei = INTVAL (er); +- if (ei >= 2 * nelt) +- return 0; +- ipar[i] = ei; +- } +- +- /* Validate that the halves of the permute are halves. */ +- for (i = 0; i < nelt2 - 1; ++i) +- if (ipar[i] + 1 != ipar[i + 1]) +- return 0; +- for (i = nelt2; i < nelt - 1; ++i) +- if (ipar[i] + 1 != ipar[i + 1]) +- return 0; +- +- /* Reconstruct the mask. */ +- for (i = 0; i < 2; ++i) +- { +- unsigned e = ipar[i * nelt2]; +- if (e % nelt2) +- return 0; +- e /= nelt2; +- mask |= e << (i * 4); +- } +- +- /* Make sure success has a non-zero value by adding one. */ +- return mask + 1; +-} +- +-/* Return a register priority for hard reg REGNO. */ +-static int +-ix86_register_priority (int hard_regno) +-{ +- /* ebp and r13 as the base always wants a displacement, r12 as the +- base always wants an index. So discourage their usage in an +- address. */ +- if (hard_regno == R12_REG || hard_regno == R13_REG) +- return 0; +- if (hard_regno == BP_REG) +- return 1; +- /* New x86-64 int registers result in bigger code size. Discourage +- them. */ +- if (IN_RANGE (hard_regno, FIRST_REX_INT_REG, LAST_REX_INT_REG)) +- return 2; +- /* New x86-64 SSE registers result in bigger code size. Discourage +- them. */ +- if (IN_RANGE (hard_regno, FIRST_REX_SSE_REG, LAST_REX_SSE_REG)) +- return 2; +- if (IN_RANGE (hard_regno, FIRST_EXT_REX_SSE_REG, LAST_EXT_REX_SSE_REG)) +- return 1; +- /* Usage of AX register results in smaller code. Prefer it. */ +- if (hard_regno == AX_REG) +- return 4; +- return 3; +-} +- +-/* Implement TARGET_PREFERRED_RELOAD_CLASS. +- +- Put float CONST_DOUBLE in the constant pool instead of fp regs. +- QImode must go into class Q_REGS. +- Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and +- movdf to do mem-to-mem moves through integer regs. */ +- +-static reg_class_t +-ix86_preferred_reload_class (rtx x, reg_class_t regclass) +-{ +- machine_mode mode = GET_MODE (x); +- +- /* We're only allowed to return a subclass of CLASS. Many of the +- following checks fail for NO_REGS, so eliminate that early. */ +- if (regclass == NO_REGS) +- return NO_REGS; +- +- /* All classes can load zeros. */ +- if (x == CONST0_RTX (mode)) +- return regclass; +- +- /* Force constants into memory if we are loading a (nonzero) constant into +- an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK +- instructions to load from a constant. */ +- if (CONSTANT_P (x) +- && (MAYBE_MMX_CLASS_P (regclass) +- || MAYBE_SSE_CLASS_P (regclass) +- || MAYBE_MASK_CLASS_P (regclass))) +- return NO_REGS; +- +- /* Floating-point constants need more complex checks. */ +- if (CONST_DOUBLE_P (x)) +- { +- /* General regs can load everything. */ +- if (INTEGER_CLASS_P (regclass)) +- return regclass; +- +- /* Floats can load 0 and 1 plus some others. Note that we eliminated +- zero above. We only want to wind up preferring 80387 registers if +- we plan on doing computation with them. */ +- if (IS_STACK_MODE (mode) +- && standard_80387_constant_p (x) > 0) +- { +- /* Limit class to FP regs. */ +- if (FLOAT_CLASS_P (regclass)) +- return FLOAT_REGS; +- } +- +- return NO_REGS; +- } +- +- /* Prefer SSE regs only, if we can use them for math. */ +- if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) +- return SSE_CLASS_P (regclass) ? regclass : NO_REGS; +- +- /* Generally when we see PLUS here, it's the function invariant +- (plus soft-fp const_int). Which can only be computed into general +- regs. */ +- if (GET_CODE (x) == PLUS) +- return INTEGER_CLASS_P (regclass) ? regclass : NO_REGS; +- +- /* QImode constants are easy to load, but non-constant QImode data +- must go into Q_REGS. */ +- if (GET_MODE (x) == QImode && !CONSTANT_P (x)) +- { +- if (Q_CLASS_P (regclass)) +- return regclass; +- else if (reg_class_subset_p (Q_REGS, regclass)) +- return Q_REGS; +- else +- return NO_REGS; +- } +- +- return regclass; +-} +- +-/* Discourage putting floating-point values in SSE registers unless +- SSE math is being used, and likewise for the 387 registers. */ +-static reg_class_t +-ix86_preferred_output_reload_class (rtx x, reg_class_t regclass) +-{ +- /* Restrict the output reload class to the register bank that we are doing +- math on. If we would like not to return a subset of CLASS, reject this +- alternative: if reload cannot do this, it will still use its choice. */ +- machine_mode mode = GET_MODE (x); +- if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) +- return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS; +- +- if (IS_STACK_MODE (mode)) +- return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS; +- +- return regclass; +-} +- +-static reg_class_t +-ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass, +- machine_mode mode, secondary_reload_info *sri) +-{ +- /* Double-word spills from general registers to non-offsettable memory +- references (zero-extended addresses) require special handling. */ +- if (TARGET_64BIT +- && MEM_P (x) +- && GET_MODE_SIZE (mode) > UNITS_PER_WORD +- && INTEGER_CLASS_P (rclass) +- && !offsettable_memref_p (x)) +- { +- sri->icode = (in_p +- ? CODE_FOR_reload_noff_load +- : CODE_FOR_reload_noff_store); +- /* Add the cost of moving address to a temporary. */ +- sri->extra_cost = 1; +- +- return NO_REGS; +- } +- +- /* QImode spills from non-QI registers require +- intermediate register on 32bit targets. */ +- if (mode == QImode +- && ((!TARGET_64BIT && !in_p +- && INTEGER_CLASS_P (rclass) +- && MAYBE_NON_Q_CLASS_P (rclass)) +- || (!TARGET_AVX512DQ +- && MAYBE_MASK_CLASS_P (rclass)))) +- { +- int regno = true_regnum (x); +- +- /* Return Q_REGS if the operand is in memory. */ +- if (regno == -1) +- return Q_REGS; +- +- return NO_REGS; +- } +- +- /* This condition handles corner case where an expression involving +- pointers gets vectorized. We're trying to use the address of a +- stack slot as a vector initializer. +- +- (set (reg:V2DI 74 [ vect_cst_.2 ]) +- (vec_duplicate:V2DI (reg/f:DI 20 frame))) +- +- Eventually frame gets turned into sp+offset like this: +- +- (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74]) +- (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp) +- (const_int 392 [0x188])))) +- +- That later gets turned into: +- +- (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74]) +- (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp) +- (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64])))) +- +- We'll have the following reload recorded: +- +- Reload 0: reload_in (DI) = +- (plus:DI (reg/f:DI 7 sp) +- (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64])) +- reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74]) +- SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine +- reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188])) +- reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74]) +- reload_reg_rtx: (reg:V2DI 22 xmm1) +- +- Which isn't going to work since SSE instructions can't handle scalar +- additions. Returning GENERAL_REGS forces the addition into integer +- register and reload can handle subsequent reloads without problems. */ +- +- if (in_p && GET_CODE (x) == PLUS +- && SSE_CLASS_P (rclass) +- && SCALAR_INT_MODE_P (mode)) +- return GENERAL_REGS; +- +- return NO_REGS; +-} +- +-/* Implement TARGET_CLASS_LIKELY_SPILLED_P. */ +- +-static bool +-ix86_class_likely_spilled_p (reg_class_t rclass) +-{ +- switch (rclass) +- { +- case AREG: +- case DREG: +- case CREG: +- case BREG: +- case AD_REGS: +- case SIREG: +- case DIREG: +- case SSE_FIRST_REG: +- case FP_TOP_REG: +- case FP_SECOND_REG: +- return true; +- +- default: +- break; +- } +- +- return false; +-} +- +-/* If we are copying between registers from different register sets +- (e.g. FP and integer), we may need a memory location. +- +- The function can't work reliably when one of the CLASSES is a class +- containing registers from multiple sets. We avoid this by never combining +- different sets in a single alternative in the machine description. +- Ensure that this constraint holds to avoid unexpected surprises. +- +- When STRICT is false, we are being called from REGISTER_MOVE_COST, +- so do not enforce these sanity checks. +- +- To optimize register_move_cost performance, define inline variant. */ +- +-static inline bool +-inline_secondary_memory_needed (machine_mode mode, reg_class_t class1, +- reg_class_t class2, int strict) +-{ +- if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS)) +- return false; +- +- if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1) +- || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2) +- || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1) +- || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2) +- || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1) +- || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2) +- || MAYBE_MASK_CLASS_P (class1) != MASK_CLASS_P (class1) +- || MAYBE_MASK_CLASS_P (class2) != MASK_CLASS_P (class2)) +- { +- gcc_assert (!strict || lra_in_progress); +- return true; +- } +- +- if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)) +- return true; +- +- /* Between mask and general, we have moves no larger than word size. */ +- if ((MASK_CLASS_P (class1) != MASK_CLASS_P (class2)) +- && (GET_MODE_SIZE (mode) > UNITS_PER_WORD)) +- return true; +- +- /* ??? This is a lie. We do have moves between mmx/general, and for +- mmx/sse2. But by saying we need secondary memory we discourage the +- register allocator from using the mmx registers unless needed. */ +- if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)) +- return true; +- +- if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)) +- { +- /* SSE1 doesn't have any direct moves from other classes. */ +- if (!TARGET_SSE2) +- return true; +- +- /* If the target says that inter-unit moves are more expensive +- than moving through memory, then don't generate them. */ +- if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC) +- || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC)) +- return true; +- +- /* Between SSE and general, we have moves no larger than word size. */ +- if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) +- return true; +- } +- +- return false; +-} +- +-/* Implement TARGET_SECONDARY_MEMORY_NEEDED. */ +- +-static bool +-ix86_secondary_memory_needed (machine_mode mode, reg_class_t class1, +- reg_class_t class2) +-{ +- return inline_secondary_memory_needed (mode, class1, class2, true); +-} +- +-/* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE. +- +- get_secondary_mem widens integral modes to BITS_PER_WORD. +- There is no need to emit full 64 bit move on 64 bit targets +- for integral modes that can be moved using 32 bit move. */ +- +-static machine_mode +-ix86_secondary_memory_needed_mode (machine_mode mode) +-{ +- if (GET_MODE_BITSIZE (mode) < 32 && INTEGRAL_MODE_P (mode)) +- return mode_for_size (32, GET_MODE_CLASS (mode), 0).require (); +- return mode; +-} +- +-/* Implement the TARGET_CLASS_MAX_NREGS hook. +- +- On the 80386, this is the size of MODE in words, +- except in the FP regs, where a single reg is always enough. */ +- +-static unsigned char +-ix86_class_max_nregs (reg_class_t rclass, machine_mode mode) +-{ +- if (MAYBE_INTEGER_CLASS_P (rclass)) +- { +- if (mode == XFmode) +- return (TARGET_64BIT ? 2 : 3); +- else if (mode == XCmode) +- return (TARGET_64BIT ? 4 : 6); +- else +- return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD); +- } +- else +- { +- if (COMPLEX_MODE_P (mode)) +- return 2; +- else +- return 1; +- } +-} +- +-/* Implement TARGET_CAN_CHANGE_MODE_CLASS. */ +- +-static bool +-ix86_can_change_mode_class (machine_mode from, machine_mode to, +- reg_class_t regclass) +-{ +- if (from == to) +- return true; +- +- /* x87 registers can't do subreg at all, as all values are reformatted +- to extended precision. */ +- if (MAYBE_FLOAT_CLASS_P (regclass)) +- return false; +- +- if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass)) +- { +- /* Vector registers do not support QI or HImode loads. If we don't +- disallow a change to these modes, reload will assume it's ok to +- drop the subreg from (subreg:SI (reg:HI 100) 0). This affects +- the vec_dupv4hi pattern. */ +- if (GET_MODE_SIZE (from) < 4) +- return false; +- } +- +- return true; +-} +- +-/* Return index of MODE in the sse load/store tables. */ +- +-static inline int +-sse_store_index (machine_mode mode) +-{ +- switch (GET_MODE_SIZE (mode)) +- { +- case 4: +- return 0; +- case 8: +- return 1; +- case 16: +- return 2; +- case 32: +- return 3; +- case 64: +- return 4; +- default: +- return -1; +- } +-} +- +-/* Return the cost of moving data of mode M between a +- register and memory. A value of 2 is the default; this cost is +- relative to those in `REGISTER_MOVE_COST'. +- +- This function is used extensively by register_move_cost that is used to +- build tables at startup. Make it inline in this case. +- When IN is 2, return maximum of in and out move cost. +- +- If moving between registers and memory is more expensive than +- between two registers, you should define this macro to express the +- relative cost. +- +- Model also increased moving costs of QImode registers in non +- Q_REGS classes. +- */ +-static inline int +-inline_memory_move_cost (machine_mode mode, enum reg_class regclass, int in) +-{ +- int cost; +- if (FLOAT_CLASS_P (regclass)) +- { +- int index; +- switch (mode) +- { +- case E_SFmode: +- index = 0; +- break; +- case E_DFmode: +- index = 1; +- break; +- case E_XFmode: +- index = 2; +- break; +- default: +- return 100; +- } +- if (in == 2) +- return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]); +- return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index]; +- } +- if (SSE_CLASS_P (regclass)) +- { +- int index = sse_store_index (mode); +- if (index == -1) +- return 100; +- if (in == 2) +- return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]); +- return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index]; +- } +- if (MMX_CLASS_P (regclass)) +- { +- int index; +- switch (GET_MODE_SIZE (mode)) +- { +- case 4: +- index = 0; +- break; +- case 8: +- index = 1; +- break; +- default: +- return 100; +- } +- if (in == 2) +- return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]); +- return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index]; +- } +- switch (GET_MODE_SIZE (mode)) +- { +- case 1: +- if (Q_CLASS_P (regclass) || TARGET_64BIT) +- { +- if (!in) +- return ix86_cost->int_store[0]; +- if (TARGET_PARTIAL_REG_DEPENDENCY +- && optimize_function_for_speed_p (cfun)) +- cost = ix86_cost->movzbl_load; +- else +- cost = ix86_cost->int_load[0]; +- if (in == 2) +- return MAX (cost, ix86_cost->int_store[0]); +- return cost; +- } +- else +- { +- if (in == 2) +- return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4); +- if (in) +- return ix86_cost->movzbl_load; +- else +- return ix86_cost->int_store[0] + 4; +- } +- break; +- case 2: +- if (in == 2) +- return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]); +- return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1]; +- default: +- if (in == 2) +- cost = MAX (ix86_cost->int_load[2], ix86_cost->int_store[2]); +- else if (in) +- cost = ix86_cost->int_load[2]; +- else +- cost = ix86_cost->int_store[2]; +- /* Multiply with the number of GPR moves needed. */ +- return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD); +- } +-} +- +-static int +-ix86_memory_move_cost (machine_mode mode, reg_class_t regclass, bool in) +-{ +- return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0); +-} +- +- +-/* Return the cost of moving data from a register in class CLASS1 to +- one in class CLASS2. +- +- It is not required that the cost always equal 2 when FROM is the same as TO; +- on some machines it is expensive to move between registers if they are not +- general registers. */ +- +-static int +-ix86_register_move_cost (machine_mode mode, reg_class_t class1_i, +- reg_class_t class2_i) +-{ +- enum reg_class class1 = (enum reg_class) class1_i; +- enum reg_class class2 = (enum reg_class) class2_i; +- +- /* In case we require secondary memory, compute cost of the store followed +- by load. In order to avoid bad register allocation choices, we need +- for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */ +- +- if (inline_secondary_memory_needed (mode, class1, class2, false)) +- { +- int cost = 1; +- +- cost += inline_memory_move_cost (mode, class1, 2); +- cost += inline_memory_move_cost (mode, class2, 2); +- +- /* In case of copying from general_purpose_register we may emit multiple +- stores followed by single load causing memory size mismatch stall. +- Count this as arbitrarily high cost of 20. */ +- if (GET_MODE_BITSIZE (mode) > BITS_PER_WORD +- && TARGET_MEMORY_MISMATCH_STALL +- && targetm.class_max_nregs (class1, mode) +- > targetm.class_max_nregs (class2, mode)) +- cost += 20; +- +- /* In the case of FP/MMX moves, the registers actually overlap, and we +- have to switch modes in order to treat them differently. */ +- if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2)) +- || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1))) +- cost += 20; +- +- return cost; +- } +- +- /* Moves between SSE/MMX and integer unit are expensive. */ +- if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2) +- || SSE_CLASS_P (class1) != SSE_CLASS_P (class2)) +- +- /* ??? By keeping returned value relatively high, we limit the number +- of moves between integer and MMX/SSE registers for all targets. +- Additionally, high value prevents problem with x86_modes_tieable_p(), +- where integer modes in MMX/SSE registers are not tieable +- because of missing QImode and HImode moves to, from or between +- MMX/SSE registers. */ +- return MAX (8, MMX_CLASS_P (class1) || MMX_CLASS_P (class2) +- ? ix86_cost->mmxsse_to_integer : ix86_cost->ssemmx_to_integer); +- +- if (MAYBE_FLOAT_CLASS_P (class1)) +- return ix86_cost->fp_move; +- if (MAYBE_SSE_CLASS_P (class1)) +- { +- if (GET_MODE_BITSIZE (mode) <= 128) +- return ix86_cost->xmm_move; +- if (GET_MODE_BITSIZE (mode) <= 256) +- return ix86_cost->ymm_move; +- return ix86_cost->zmm_move; +- } +- if (MAYBE_MMX_CLASS_P (class1)) +- return ix86_cost->mmx_move; +- return 2; +-} +- +-/* Implement TARGET_HARD_REGNO_NREGS. This is ordinarily the length in +- words of a value of mode MODE but can be less for certain modes in +- special long registers. +- +- Actually there are no two word move instructions for consecutive +- registers. And only registers 0-3 may have mov byte instructions +- applied to them. */ +- +-static unsigned int +-ix86_hard_regno_nregs (unsigned int regno, machine_mode mode) +-{ +- if (GENERAL_REGNO_P (regno)) +- { +- if (mode == XFmode) +- return TARGET_64BIT ? 2 : 3; +- if (mode == XCmode) +- return TARGET_64BIT ? 4 : 6; +- return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD); +- } +- if (COMPLEX_MODE_P (mode)) +- return 2; +- if (mode == V64SFmode || mode == V64SImode) +- return 4; +- return 1; +-} +- +-/* Implement TARGET_HARD_REGNO_MODE_OK. */ +- +-static bool +-ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode) +-{ +- /* Flags and only flags can only hold CCmode values. */ +- if (CC_REGNO_P (regno)) +- return GET_MODE_CLASS (mode) == MODE_CC; +- if (GET_MODE_CLASS (mode) == MODE_CC +- || GET_MODE_CLASS (mode) == MODE_RANDOM +- || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT) +- return false; +- if (STACK_REGNO_P (regno)) +- return VALID_FP_MODE_P (mode); +- if (MASK_REGNO_P (regno)) +- return (VALID_MASK_REG_MODE (mode) +- || (TARGET_AVX512BW +- && VALID_MASK_AVX512BW_MODE (mode))); +- if (SSE_REGNO_P (regno)) +- { +- /* We implement the move patterns for all vector modes into and +- out of SSE registers, even when no operation instructions +- are available. */ +- +- /* For AVX-512 we allow, regardless of regno: +- - XI mode +- - any of 512-bit wide vector mode +- - any scalar mode. */ +- if (TARGET_AVX512F +- && (mode == XImode +- || VALID_AVX512F_REG_MODE (mode) +- || VALID_AVX512F_SCALAR_MODE (mode))) +- return true; +- +- /* For AVX-5124FMAPS or AVX-5124VNNIW +- allow V64SF and V64SI modes for special regnos. */ +- if ((TARGET_AVX5124FMAPS || TARGET_AVX5124VNNIW) +- && (mode == V64SFmode || mode == V64SImode) +- && MOD4_SSE_REGNO_P (regno)) +- return true; +- +- /* TODO check for QI/HI scalars. */ +- /* AVX512VL allows sse regs16+ for 128/256 bit modes. */ +- if (TARGET_AVX512VL +- && (mode == OImode +- || mode == TImode +- || VALID_AVX256_REG_MODE (mode) +- || VALID_AVX512VL_128_REG_MODE (mode))) +- return true; +- +- /* xmm16-xmm31 are only available for AVX-512. */ +- if (EXT_REX_SSE_REGNO_P (regno)) +- return false; +- +- /* OImode and AVX modes are available only when AVX is enabled. */ +- return ((TARGET_AVX +- && VALID_AVX256_REG_OR_OI_MODE (mode)) +- || VALID_SSE_REG_MODE (mode) +- || VALID_SSE2_REG_MODE (mode) +- || VALID_MMX_REG_MODE (mode) +- || VALID_MMX_REG_MODE_3DNOW (mode)); +- } +- if (MMX_REGNO_P (regno)) +- { +- /* We implement the move patterns for 3DNOW modes even in MMX mode, +- so if the register is available at all, then we can move data of +- the given mode into or out of it. */ +- return (VALID_MMX_REG_MODE (mode) +- || VALID_MMX_REG_MODE_3DNOW (mode)); +- } +- +- if (mode == QImode) +- { +- /* Take care for QImode values - they can be in non-QI regs, +- but then they do cause partial register stalls. */ +- if (ANY_QI_REGNO_P (regno)) +- return true; +- if (!TARGET_PARTIAL_REG_STALL) +- return true; +- /* LRA checks if the hard register is OK for the given mode. +- QImode values can live in non-QI regs, so we allow all +- registers here. */ +- if (lra_in_progress) +- return true; +- return !can_create_pseudo_p (); +- } +- /* We handle both integer and floats in the general purpose registers. */ +- else if (VALID_INT_MODE_P (mode)) +- return true; +- else if (VALID_FP_MODE_P (mode)) +- return true; +- else if (VALID_DFP_MODE_P (mode)) +- return true; +- /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go +- on to use that value in smaller contexts, this can easily force a +- pseudo to be allocated to GENERAL_REGS. Since this is no worse than +- supporting DImode, allow it. */ +- else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode)) +- return true; +- +- return false; +-} +- +-/* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The only ABI that +- saves SSE registers across calls is Win64 (thus no need to check the +- current ABI here), and with AVX enabled Win64 only guarantees that +- the low 16 bytes are saved. */ +- +-static bool +-ix86_hard_regno_call_part_clobbered (rtx_insn *insn ATTRIBUTE_UNUSED, +- unsigned int regno, machine_mode mode) +-{ +- return SSE_REGNO_P (regno) && GET_MODE_SIZE (mode) > 16; +-} +- +-/* A subroutine of ix86_modes_tieable_p. Return true if MODE is a +- tieable integer mode. */ +- +-static bool +-ix86_tieable_integer_mode_p (machine_mode mode) +-{ +- switch (mode) +- { +- case E_HImode: +- case E_SImode: +- return true; +- +- case E_QImode: +- return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL; +- +- case E_DImode: +- return TARGET_64BIT; +- +- default: +- return false; +- } +-} +- +-/* Implement TARGET_MODES_TIEABLE_P. +- +- Return true if MODE1 is accessible in a register that can hold MODE2 +- without copying. That is, all register classes that can hold MODE2 +- can also hold MODE1. */ +- +-static bool +-ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2) +-{ +- if (mode1 == mode2) +- return true; +- +- if (ix86_tieable_integer_mode_p (mode1) +- && ix86_tieable_integer_mode_p (mode2)) +- return true; +- +- /* MODE2 being XFmode implies fp stack or general regs, which means we +- can tie any smaller floating point modes to it. Note that we do not +- tie this with TFmode. */ +- if (mode2 == XFmode) +- return mode1 == SFmode || mode1 == DFmode; +- +- /* MODE2 being DFmode implies fp stack, general or sse regs, which means +- that we can tie it with SFmode. */ +- if (mode2 == DFmode) +- return mode1 == SFmode; +- +- /* If MODE2 is only appropriate for an SSE register, then tie with +- any other mode acceptable to SSE registers. */ +- if (GET_MODE_SIZE (mode2) == 64 +- && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2)) +- return (GET_MODE_SIZE (mode1) == 64 +- && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1)); +- if (GET_MODE_SIZE (mode2) == 32 +- && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2)) +- return (GET_MODE_SIZE (mode1) == 32 +- && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1)); +- if (GET_MODE_SIZE (mode2) == 16 +- && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2)) +- return (GET_MODE_SIZE (mode1) == 16 +- && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1)); +- +- /* If MODE2 is appropriate for an MMX register, then tie +- with any other mode acceptable to MMX registers. */ +- if (GET_MODE_SIZE (mode2) == 8 +- && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2)) +- return (GET_MODE_SIZE (mode1) == 8 +- && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1)); +- +- return false; +-} +- +-/* Return the cost of moving between two registers of mode MODE. */ +- +-static int +-ix86_set_reg_reg_cost (machine_mode mode) +-{ +- unsigned int units = UNITS_PER_WORD; +- +- switch (GET_MODE_CLASS (mode)) +- { +- default: +- break; +- +- case MODE_CC: +- units = GET_MODE_SIZE (CCmode); +- break; +- +- case MODE_FLOAT: +- if ((TARGET_SSE && mode == TFmode) +- || (TARGET_80387 && mode == XFmode) +- || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode) +- || ((TARGET_80387 || TARGET_SSE) && mode == SFmode)) +- units = GET_MODE_SIZE (mode); +- break; +- +- case MODE_COMPLEX_FLOAT: +- if ((TARGET_SSE && mode == TCmode) +- || (TARGET_80387 && mode == XCmode) +- || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode) +- || ((TARGET_80387 || TARGET_SSE) && mode == SCmode)) +- units = GET_MODE_SIZE (mode); +- break; +- +- case MODE_VECTOR_INT: +- case MODE_VECTOR_FLOAT: +- if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode)) +- || (TARGET_AVX && VALID_AVX256_REG_MODE (mode)) +- || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode)) +- || (TARGET_SSE && VALID_SSE_REG_MODE (mode)) +- || (TARGET_MMX && VALID_MMX_REG_MODE (mode))) +- units = GET_MODE_SIZE (mode); +- } +- +- /* Return the cost of moving between two registers of mode MODE, +- assuming that the move will be in pieces of at most UNITS bytes. */ +- return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units)); +-} +- +-/* Return cost of vector operation in MODE given that scalar version has +- COST. */ +- +-static int +-ix86_vec_cost (machine_mode mode, int cost) +-{ +- if (!VECTOR_MODE_P (mode)) +- return cost; +- +- if (GET_MODE_BITSIZE (mode) == 128 +- && TARGET_SSE_SPLIT_REGS) +- return cost * 2; +- if (GET_MODE_BITSIZE (mode) > 128 +- && TARGET_AVX128_OPTIMAL) +- return cost * GET_MODE_BITSIZE (mode) / 128; +- return cost; +-} +- +-/* Return cost of multiplication in MODE. */ +- +-static int +-ix86_multiplication_cost (const struct processor_costs *cost, +- enum machine_mode mode) +-{ +- machine_mode inner_mode = mode; +- if (VECTOR_MODE_P (mode)) +- inner_mode = GET_MODE_INNER (mode); +- +- if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) +- return inner_mode == DFmode ? cost->mulsd : cost->mulss; +- else if (X87_FLOAT_MODE_P (mode)) +- return cost->fmul; +- else if (FLOAT_MODE_P (mode)) +- return ix86_vec_cost (mode, +- inner_mode == DFmode ? cost->mulsd : cost->mulss); +- else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) +- { +- /* vpmullq is used in this case. No emulation is needed. */ +- if (TARGET_AVX512DQ) +- return ix86_vec_cost (mode, cost->mulss); +- +- /* V*QImode is emulated with 7-13 insns. */ +- if (mode == V16QImode || mode == V32QImode) +- { +- int extra = 11; +- if (TARGET_XOP && mode == V16QImode) +- extra = 5; +- else if (TARGET_SSSE3) +- extra = 6; +- return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * extra); +- } +- /* V*DImode is emulated with 5-8 insns. */ +- else if (mode == V2DImode || mode == V4DImode) +- { +- if (TARGET_XOP && mode == V2DImode) +- return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 3); +- else +- return ix86_vec_cost (mode, cost->mulss * 3 + cost->sse_op * 5); +- } +- /* Without sse4.1, we don't have PMULLD; it's emulated with 7 +- insns, including two PMULUDQ. */ +- else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX)) +- return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 5); +- else +- return ix86_vec_cost (mode, cost->mulss); +- } +- else +- return (cost->mult_init[MODE_INDEX (mode)] + cost->mult_bit * 7); +-} +- +-/* Return cost of multiplication in MODE. */ +- +-static int +-ix86_division_cost (const struct processor_costs *cost, +- enum machine_mode mode) +-{ +- machine_mode inner_mode = mode; +- if (VECTOR_MODE_P (mode)) +- inner_mode = GET_MODE_INNER (mode); +- +- if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) +- return inner_mode == DFmode ? cost->divsd : cost->divss; +- else if (X87_FLOAT_MODE_P (mode)) +- return cost->fdiv; +- else if (FLOAT_MODE_P (mode)) +- return ix86_vec_cost (mode, +- inner_mode == DFmode ? cost->divsd : cost->divss); +- else +- return cost->divide[MODE_INDEX (mode)]; +-} +- +-/* Return cost of shift in MODE. +- If CONSTANT_OP1 is true, the op1 value is known and set in OP1_VAL. +- AND_IN_OP1 specify in op1 is result of and and SHIFT_AND_TRUNCATE +- if op1 is a result of subreg. +- +- SKIP_OP0/1 is set to true if cost of OP0/1 should be ignored. */ +- +-static int +-ix86_shift_rotate_cost (const struct processor_costs *cost, +- enum machine_mode mode, bool constant_op1, +- HOST_WIDE_INT op1_val, +- bool speed, +- bool and_in_op1, +- bool shift_and_truncate, +- bool *skip_op0, bool *skip_op1) +-{ +- if (skip_op0) +- *skip_op0 = *skip_op1 = false; +- if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) +- { +- /* V*QImode is emulated with 1-11 insns. */ +- if (mode == V16QImode || mode == V32QImode) +- { +- int count = 11; +- if (TARGET_XOP && mode == V16QImode) +- { +- /* For XOP we use vpshab, which requires a broadcast of the +- value to the variable shift insn. For constants this +- means a V16Q const in mem; even when we can perform the +- shift with one insn set the cost to prefer paddb. */ +- if (constant_op1) +- { +- if (skip_op1) +- *skip_op1 = true; +- return ix86_vec_cost (mode, +- cost->sse_op +- + (speed +- ? 2 +- : COSTS_N_BYTES +- (GET_MODE_UNIT_SIZE (mode)))); +- } +- count = 3; +- } +- else if (TARGET_SSSE3) +- count = 7; +- return ix86_vec_cost (mode, cost->sse_op * count); +- } +- else +- return ix86_vec_cost (mode, cost->sse_op); +- } +- if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) +- { +- if (constant_op1) +- { +- if (op1_val > 32) +- return cost->shift_const + COSTS_N_INSNS (2); +- else +- return cost->shift_const * 2; +- } +- else +- { +- if (and_in_op1) +- return cost->shift_var * 2; +- else +- return cost->shift_var * 6 + COSTS_N_INSNS (2); +- } +- } +- else +- { +- if (constant_op1) +- return cost->shift_const; +- else if (shift_and_truncate) +- { +- if (skip_op0) +- *skip_op0 = *skip_op1 = true; +- /* Return the cost after shift-and truncation. */ +- return cost->shift_var; +- } +- else +- return cost->shift_var; +- } +- return cost->shift_const; +-} +- +-/* Compute a (partial) cost for rtx X. Return true if the complete +- cost has been computed, and false if subexpressions should be +- scanned. In either case, *TOTAL contains the cost result. */ +- +-static bool +-ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, +- int *total, bool speed) +-{ +- rtx mask; +- enum rtx_code code = GET_CODE (x); +- enum rtx_code outer_code = (enum rtx_code) outer_code_i; +- const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost; +- int src_cost; +- +- switch (code) +- { +- case SET: +- if (register_operand (SET_DEST (x), VOIDmode) +- && register_operand (SET_SRC (x), VOIDmode)) +- { +- *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x))); +- return true; +- } +- +- if (register_operand (SET_SRC (x), VOIDmode)) +- /* Avoid potentially incorrect high cost from rtx_costs +- for non-tieable SUBREGs. */ +- src_cost = 0; +- else +- { +- src_cost = rtx_cost (SET_SRC (x), mode, SET, 1, speed); +- +- if (CONSTANT_P (SET_SRC (x))) +- /* Constant costs assume a base value of COSTS_N_INSNS (1) and add +- a small value, possibly zero for cheap constants. */ +- src_cost += COSTS_N_INSNS (1); +- } +- +- *total = src_cost + rtx_cost (SET_DEST (x), mode, SET, 0, speed); +- return true; +- +- case CONST_INT: +- case CONST: +- case LABEL_REF: +- case SYMBOL_REF: +- if (x86_64_immediate_operand (x, VOIDmode)) +- *total = 0; +- else +- *total = 1; +- return true; +- +- case CONST_DOUBLE: +- if (IS_STACK_MODE (mode)) +- switch (standard_80387_constant_p (x)) +- { +- case -1: +- case 0: +- break; +- case 1: /* 0.0 */ +- *total = 1; +- return true; +- default: /* Other constants */ +- *total = 2; +- return true; +- } +- /* FALLTHRU */ +- +- case CONST_VECTOR: +- switch (standard_sse_constant_p (x, mode)) +- { +- case 0: +- break; +- case 1: /* 0: xor eliminates false dependency */ +- *total = 0; +- return true; +- default: /* -1: cmp contains false dependency */ +- *total = 1; +- return true; +- } +- /* FALLTHRU */ +- +- case CONST_WIDE_INT: +- /* Fall back to (MEM (SYMBOL_REF)), since that's where +- it'll probably end up. Add a penalty for size. */ +- *total = (COSTS_N_INSNS (1) +- + (!TARGET_64BIT && flag_pic) +- + (GET_MODE_SIZE (mode) <= 4 +- ? 0 : GET_MODE_SIZE (mode) <= 8 ? 1 : 2)); +- return true; +- +- case ZERO_EXTEND: +- /* The zero extensions is often completely free on x86_64, so make +- it as cheap as possible. */ +- if (TARGET_64BIT && mode == DImode +- && GET_MODE (XEXP (x, 0)) == SImode) +- *total = 1; +- else if (TARGET_ZERO_EXTEND_WITH_AND) +- *total = cost->add; +- else +- *total = cost->movzx; +- return false; +- +- case SIGN_EXTEND: +- *total = cost->movsx; +- return false; +- +- case ASHIFT: +- if (SCALAR_INT_MODE_P (mode) +- && GET_MODE_SIZE (mode) < UNITS_PER_WORD +- && CONST_INT_P (XEXP (x, 1))) +- { +- HOST_WIDE_INT value = INTVAL (XEXP (x, 1)); +- if (value == 1) +- { +- *total = cost->add; +- return false; +- } +- if ((value == 2 || value == 3) +- && cost->lea <= cost->shift_const) +- { +- *total = cost->lea; +- return false; +- } +- } +- /* FALLTHRU */ +- +- case ROTATE: +- case ASHIFTRT: +- case LSHIFTRT: +- case ROTATERT: +- bool skip_op0, skip_op1; +- *total = ix86_shift_rotate_cost (cost, mode, CONSTANT_P (XEXP (x, 1)), +- CONST_INT_P (XEXP (x, 1)) +- ? INTVAL (XEXP (x, 1)) : -1, +- speed, +- GET_CODE (XEXP (x, 1)) == AND, +- SUBREG_P (XEXP (x, 1)) +- && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND, +- &skip_op0, &skip_op1); +- if (skip_op0 || skip_op1) +- { +- if (!skip_op0) +- *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed); +- if (!skip_op1) +- *total += rtx_cost (XEXP (x, 1), mode, code, 0, speed); +- return true; +- } +- return false; +- +- case FMA: +- { +- rtx sub; +- +- gcc_assert (FLOAT_MODE_P (mode)); +- gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F); +- +- *total = ix86_vec_cost (mode, +- GET_MODE_INNER (mode) == SFmode +- ? cost->fmass : cost->fmasd); +- *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed); +- +- /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */ +- sub = XEXP (x, 0); +- if (GET_CODE (sub) == NEG) +- sub = XEXP (sub, 0); +- *total += rtx_cost (sub, mode, FMA, 0, speed); +- +- sub = XEXP (x, 2); +- if (GET_CODE (sub) == NEG) +- sub = XEXP (sub, 0); +- *total += rtx_cost (sub, mode, FMA, 2, speed); +- return true; +- } +- +- case MULT: +- if (!FLOAT_MODE_P (mode) && !VECTOR_MODE_P (mode)) +- { +- rtx op0 = XEXP (x, 0); +- rtx op1 = XEXP (x, 1); +- int nbits; +- if (CONST_INT_P (XEXP (x, 1))) +- { +- unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1)); +- for (nbits = 0; value != 0; value &= value - 1) +- nbits++; +- } +- else +- /* This is arbitrary. */ +- nbits = 7; +- +- /* Compute costs correctly for widening multiplication. */ +- if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND) +- && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2 +- == GET_MODE_SIZE (mode)) +- { +- int is_mulwiden = 0; +- machine_mode inner_mode = GET_MODE (op0); +- +- if (GET_CODE (op0) == GET_CODE (op1)) +- is_mulwiden = 1, op1 = XEXP (op1, 0); +- else if (CONST_INT_P (op1)) +- { +- if (GET_CODE (op0) == SIGN_EXTEND) +- is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode) +- == INTVAL (op1); +- else +- is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode)); +- } +- +- if (is_mulwiden) +- op0 = XEXP (op0, 0), mode = GET_MODE (op0); +- } +- +- *total = (cost->mult_init[MODE_INDEX (mode)] +- + nbits * cost->mult_bit +- + rtx_cost (op0, mode, outer_code, opno, speed) +- + rtx_cost (op1, mode, outer_code, opno, speed)); +- +- return true; +- } +- *total = ix86_multiplication_cost (cost, mode); +- return false; +- +- case DIV: +- case UDIV: +- case MOD: +- case UMOD: +- *total = ix86_division_cost (cost, mode); +- return false; +- +- case PLUS: +- if (GET_MODE_CLASS (mode) == MODE_INT +- && GET_MODE_SIZE (mode) <= UNITS_PER_WORD) +- { +- if (GET_CODE (XEXP (x, 0)) == PLUS +- && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT +- && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1)) +- && CONSTANT_P (XEXP (x, 1))) +- { +- HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)); +- if (val == 2 || val == 4 || val == 8) +- { +- *total = cost->lea; +- *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode, +- outer_code, opno, speed); +- *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode, +- outer_code, opno, speed); +- *total += rtx_cost (XEXP (x, 1), mode, +- outer_code, opno, speed); +- return true; +- } +- } +- else if (GET_CODE (XEXP (x, 0)) == MULT +- && CONST_INT_P (XEXP (XEXP (x, 0), 1))) +- { +- HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1)); +- if (val == 2 || val == 4 || val == 8) +- { +- *total = cost->lea; +- *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, +- outer_code, opno, speed); +- *total += rtx_cost (XEXP (x, 1), mode, +- outer_code, opno, speed); +- return true; +- } +- } +- else if (GET_CODE (XEXP (x, 0)) == PLUS) +- { +- /* Add with carry, ignore the cost of adding a carry flag. */ +- if (ix86_carry_flag_operator (XEXP (XEXP (x, 0), 0), mode)) +- *total = cost->add; +- else +- { +- *total = cost->lea; +- *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, +- outer_code, opno, speed); +- } +- +- *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode, +- outer_code, opno, speed); +- *total += rtx_cost (XEXP (x, 1), mode, +- outer_code, opno, speed); +- return true; +- } +- } +- /* FALLTHRU */ +- +- case MINUS: +- /* Subtract with borrow, ignore the cost of subtracting a carry flag. */ +- if (GET_MODE_CLASS (mode) == MODE_INT +- && GET_MODE_SIZE (mode) <= UNITS_PER_WORD +- && GET_CODE (XEXP (x, 0)) == MINUS +- && ix86_carry_flag_operator (XEXP (XEXP (x, 0), 1), mode)) +- { +- *total = cost->add; +- *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, +- outer_code, opno, speed); +- *total += rtx_cost (XEXP (x, 1), mode, +- outer_code, opno, speed); +- return true; +- } +- +- if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) +- { +- *total = cost->addss; +- return false; +- } +- else if (X87_FLOAT_MODE_P (mode)) +- { +- *total = cost->fadd; +- return false; +- } +- else if (FLOAT_MODE_P (mode)) +- { +- *total = ix86_vec_cost (mode, cost->addss); +- return false; +- } +- /* FALLTHRU */ +- +- case AND: +- case IOR: +- case XOR: +- if (GET_MODE_CLASS (mode) == MODE_INT +- && GET_MODE_SIZE (mode) > UNITS_PER_WORD) +- { +- *total = (cost->add * 2 +- + (rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed) +- << (GET_MODE (XEXP (x, 0)) != DImode)) +- + (rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed) +- << (GET_MODE (XEXP (x, 1)) != DImode))); +- return true; +- } +- /* FALLTHRU */ +- +- case NEG: +- if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) +- { +- *total = cost->sse_op; +- return false; +- } +- else if (X87_FLOAT_MODE_P (mode)) +- { +- *total = cost->fchs; +- return false; +- } +- else if (FLOAT_MODE_P (mode)) +- { +- *total = ix86_vec_cost (mode, cost->sse_op); +- return false; +- } +- /* FALLTHRU */ +- +- case NOT: +- if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) +- *total = ix86_vec_cost (mode, cost->sse_op); +- else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) +- *total = cost->add * 2; +- else +- *total = cost->add; +- return false; +- +- case COMPARE: +- if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT +- && XEXP (XEXP (x, 0), 1) == const1_rtx +- && CONST_INT_P (XEXP (XEXP (x, 0), 2)) +- && XEXP (x, 1) == const0_rtx) +- { +- /* This kind of construct is implemented using test[bwl]. +- Treat it as if we had an AND. */ +- mode = GET_MODE (XEXP (XEXP (x, 0), 0)); +- *total = (cost->add +- + rtx_cost (XEXP (XEXP (x, 0), 0), mode, outer_code, +- opno, speed) +- + rtx_cost (const1_rtx, mode, outer_code, opno, speed)); +- return true; +- } +- +- /* The embedded comparison operand is completely free. */ +- if (!general_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0))) +- && XEXP (x, 1) == const0_rtx) +- *total = 0; +- +- return false; +- +- case FLOAT_EXTEND: +- if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)) +- *total = 0; +- else +- *total = ix86_vec_cost (mode, cost->addss); +- return false; +- +- case FLOAT_TRUNCATE: +- if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)) +- *total = cost->fadd; +- else +- *total = ix86_vec_cost (mode, cost->addss); +- return false; +- +- case ABS: +- /* SSE requires memory load for the constant operand. It may make +- sense to account for this. Of course the constant operand may or +- may not be reused. */ +- if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) +- *total = cost->sse_op; +- else if (X87_FLOAT_MODE_P (mode)) +- *total = cost->fabs; +- else if (FLOAT_MODE_P (mode)) +- *total = ix86_vec_cost (mode, cost->sse_op); +- return false; +- +- case SQRT: +- if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) +- *total = mode == SFmode ? cost->sqrtss : cost->sqrtsd; +- else if (X87_FLOAT_MODE_P (mode)) +- *total = cost->fsqrt; +- else if (FLOAT_MODE_P (mode)) +- *total = ix86_vec_cost (mode, +- mode == SFmode ? cost->sqrtss : cost->sqrtsd); +- return false; +- +- case UNSPEC: +- if (XINT (x, 1) == UNSPEC_TP) +- *total = 0; +- return false; +- +- case VEC_SELECT: +- case VEC_CONCAT: +- case VEC_DUPLICATE: +- /* ??? Assume all of these vector manipulation patterns are +- recognizable. In which case they all pretty much have the +- same cost. */ +- *total = cost->sse_op; +- return true; +- case VEC_MERGE: +- mask = XEXP (x, 2); +- /* This is masked instruction, assume the same cost, +- as nonmasked variant. */ +- if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask))) +- *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed); +- else +- *total = cost->sse_op; +- return true; +- +- default: +- return false; +- } +-} +- +-#if TARGET_MACHO +- +-static int current_machopic_label_num; +- +-/* Given a symbol name and its associated stub, write out the +- definition of the stub. */ +- +-void +-machopic_output_stub (FILE *file, const char *symb, const char *stub) +-{ +- unsigned int length; +- char *binder_name, *symbol_name, lazy_ptr_name[32]; +- int label = ++current_machopic_label_num; +- +- /* For 64-bit we shouldn't get here. */ +- gcc_assert (!TARGET_64BIT); +- +- /* Lose our funky encoding stuff so it doesn't contaminate the stub. */ +- symb = targetm.strip_name_encoding (symb); +- +- length = strlen (stub); +- binder_name = XALLOCAVEC (char, length + 32); +- GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length); +- +- length = strlen (symb); +- symbol_name = XALLOCAVEC (char, length + 32); +- GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length); +- +- sprintf (lazy_ptr_name, "L%d$lz", label); +- +- if (MACHOPIC_ATT_STUB) +- switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]); +- else if (MACHOPIC_PURE) +- switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]); +- else +- switch_to_section (darwin_sections[machopic_symbol_stub_section]); +- +- fprintf (file, "%s:\n", stub); +- fprintf (file, "\t.indirect_symbol %s\n", symbol_name); +- +- if (MACHOPIC_ATT_STUB) +- { +- fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n"); +- } +- else if (MACHOPIC_PURE) +- { +- /* PIC stub. */ +- /* 25-byte PIC stub using "CALL get_pc_thunk". */ +- rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */); +- output_set_got (tmp, NULL_RTX); /* "CALL ___.get_pc_thunk.cx". */ +- fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n", +- label, lazy_ptr_name, label); +- fprintf (file, "\tjmp\t*%%ecx\n"); +- } +- else +- fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name); +- +- /* The AT&T-style ("self-modifying") stub is not lazily bound, thus +- it needs no stub-binding-helper. */ +- if (MACHOPIC_ATT_STUB) +- return; +- +- fprintf (file, "%s:\n", binder_name); +- +- if (MACHOPIC_PURE) +- { +- fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name); +- fprintf (file, "\tpushl\t%%ecx\n"); +- } +- else +- fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name); +- +- fputs ("\tjmp\tdyld_stub_binding_helper\n", file); +- +- /* N.B. Keep the correspondence of these +- 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the +- old-pic/new-pic/non-pic stubs; altering this will break +- compatibility with existing dylibs. */ +- if (MACHOPIC_PURE) +- { +- /* 25-byte PIC stub using "CALL get_pc_thunk". */ +- switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]); +- } +- else +- /* 16-byte -mdynamic-no-pic stub. */ +- switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]); +- +- fprintf (file, "%s:\n", lazy_ptr_name); +- fprintf (file, "\t.indirect_symbol %s\n", symbol_name); +- fprintf (file, ASM_LONG "%s\n", binder_name); +-} +-#endif /* TARGET_MACHO */ +- +-/* Order the registers for register allocator. */ +- +-void +-x86_order_regs_for_local_alloc (void) +-{ +- int pos = 0; +- int i; +- +- /* First allocate the local general purpose registers. */ +- for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) +- if (GENERAL_REGNO_P (i) && call_used_regs[i]) +- reg_alloc_order [pos++] = i; +- +- /* Global general purpose registers. */ +- for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) +- if (GENERAL_REGNO_P (i) && !call_used_regs[i]) +- reg_alloc_order [pos++] = i; +- +- /* x87 registers come first in case we are doing FP math +- using them. */ +- if (!TARGET_SSE_MATH) +- for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) +- reg_alloc_order [pos++] = i; +- +- /* SSE registers. */ +- for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++) +- reg_alloc_order [pos++] = i; +- for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++) +- reg_alloc_order [pos++] = i; +- +- /* Extended REX SSE registers. */ +- for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++) +- reg_alloc_order [pos++] = i; +- +- /* Mask register. */ +- for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++) +- reg_alloc_order [pos++] = i; +- +- /* x87 registers. */ +- if (TARGET_SSE_MATH) +- for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) +- reg_alloc_order [pos++] = i; +- +- for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++) +- reg_alloc_order [pos++] = i; +- +- /* Initialize the rest of array as we do not allocate some registers +- at all. */ +- while (pos < FIRST_PSEUDO_REGISTER) +- reg_alloc_order [pos++] = 0; +-} +- +-/* Handle a "callee_pop_aggregate_return" attribute; arguments as +- in struct attribute_spec handler. */ +-static tree +-ix86_handle_callee_pop_aggregate_return (tree *node, tree name, tree args, int, +- bool *no_add_attrs) +-{ +- if (TREE_CODE (*node) != FUNCTION_TYPE +- && TREE_CODE (*node) != METHOD_TYPE +- && TREE_CODE (*node) != FIELD_DECL +- && TREE_CODE (*node) != TYPE_DECL) +- { +- warning (OPT_Wattributes, "%qE attribute only applies to functions", +- name); +- *no_add_attrs = true; +- return NULL_TREE; +- } +- if (TARGET_64BIT) +- { +- warning (OPT_Wattributes, "%qE attribute only available for 32-bit", +- name); +- *no_add_attrs = true; +- return NULL_TREE; +- } +- if (is_attribute_p ("callee_pop_aggregate_return", name)) +- { +- tree cst; +- +- cst = TREE_VALUE (args); +- if (TREE_CODE (cst) != INTEGER_CST) +- { +- warning (OPT_Wattributes, +- "%qE attribute requires an integer constant argument", +- name); +- *no_add_attrs = true; +- } +- else if (compare_tree_int (cst, 0) != 0 +- && compare_tree_int (cst, 1) != 0) +- { +- warning (OPT_Wattributes, +- "argument to %qE attribute is neither zero, nor one", +- name); +- *no_add_attrs = true; +- } +- +- return NULL_TREE; +- } +- +- return NULL_TREE; +-} +- +-/* Handle a "ms_abi" or "sysv" attribute; arguments as in +- struct attribute_spec.handler. */ +-static tree +-ix86_handle_abi_attribute (tree *node, tree name, tree, int, +- bool *no_add_attrs) +-{ +- if (TREE_CODE (*node) != FUNCTION_TYPE +- && TREE_CODE (*node) != METHOD_TYPE +- && TREE_CODE (*node) != FIELD_DECL +- && TREE_CODE (*node) != TYPE_DECL) +- { +- warning (OPT_Wattributes, "%qE attribute only applies to functions", +- name); +- *no_add_attrs = true; +- return NULL_TREE; +- } +- +- /* Can combine regparm with all attributes but fastcall. */ +- if (is_attribute_p ("ms_abi", name)) +- { +- if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node))) +- { +- error ("ms_abi and sysv_abi attributes are not compatible"); +- } +- +- return NULL_TREE; +- } +- else if (is_attribute_p ("sysv_abi", name)) +- { +- if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node))) +- { +- error ("ms_abi and sysv_abi attributes are not compatible"); +- } +- +- return NULL_TREE; +- } +- +- return NULL_TREE; +-} +- +-/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in +- struct attribute_spec.handler. */ +-static tree +-ix86_handle_struct_attribute (tree *node, tree name, tree, int, +- bool *no_add_attrs) +-{ +- tree *type = NULL; +- if (DECL_P (*node)) +- { +- if (TREE_CODE (*node) == TYPE_DECL) +- type = &TREE_TYPE (*node); +- } +- else +- type = node; +- +- if (!(type && RECORD_OR_UNION_TYPE_P (*type))) +- { +- warning (OPT_Wattributes, "%qE attribute ignored", +- name); +- *no_add_attrs = true; +- } +- +- else if ((is_attribute_p ("ms_struct", name) +- && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type))) +- || ((is_attribute_p ("gcc_struct", name) +- && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type))))) +- { +- warning (OPT_Wattributes, "%qE incompatible attribute ignored", +- name); +- *no_add_attrs = true; +- } +- +- return NULL_TREE; +-} +- +-static tree +-ix86_handle_fndecl_attribute (tree *node, tree name, tree args, int, +- bool *no_add_attrs) +-{ +- if (TREE_CODE (*node) != FUNCTION_DECL) +- { +- warning (OPT_Wattributes, "%qE attribute only applies to functions", +- name); +- *no_add_attrs = true; +- } +- +- if (is_attribute_p ("indirect_branch", name)) +- { +- tree cst = TREE_VALUE (args); +- if (TREE_CODE (cst) != STRING_CST) +- { +- warning (OPT_Wattributes, +- "%qE attribute requires a string constant argument", +- name); +- *no_add_attrs = true; +- } +- else if (strcmp (TREE_STRING_POINTER (cst), "keep") != 0 +- && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0 +- && strcmp (TREE_STRING_POINTER (cst), "thunk-inline") != 0 +- && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0) +- { +- warning (OPT_Wattributes, +- "argument to %qE attribute is not " +- "(keep|thunk|thunk-inline|thunk-extern)", name); +- *no_add_attrs = true; +- } +- } +- +- if (is_attribute_p ("function_return", name)) +- { +- tree cst = TREE_VALUE (args); +- if (TREE_CODE (cst) != STRING_CST) +- { +- warning (OPT_Wattributes, +- "%qE attribute requires a string constant argument", +- name); +- *no_add_attrs = true; +- } +- else if (strcmp (TREE_STRING_POINTER (cst), "keep") != 0 +- && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0 +- && strcmp (TREE_STRING_POINTER (cst), "thunk-inline") != 0 +- && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0) +- { +- warning (OPT_Wattributes, +- "argument to %qE attribute is not " +- "(keep|thunk|thunk-inline|thunk-extern)", name); +- *no_add_attrs = true; +- } +- } +- +- return NULL_TREE; +-} +- +-static tree +-ix86_handle_no_caller_saved_registers_attribute (tree *, tree, tree, +- int, bool *) +-{ +- return NULL_TREE; +-} +- +-static tree +-ix86_handle_interrupt_attribute (tree *node, tree, tree, int, bool *) +-{ +- /* DECL_RESULT and DECL_ARGUMENTS do not exist there yet, +- but the function type contains args and return type data. */ +- tree func_type = *node; +- tree return_type = TREE_TYPE (func_type); +- +- int nargs = 0; +- tree current_arg_type = TYPE_ARG_TYPES (func_type); +- while (current_arg_type +- && ! VOID_TYPE_P (TREE_VALUE (current_arg_type))) +- { +- if (nargs == 0) +- { +- if (! POINTER_TYPE_P (TREE_VALUE (current_arg_type))) +- error ("interrupt service routine should have a pointer " +- "as the first argument"); +- } +- else if (nargs == 1) +- { +- if (TREE_CODE (TREE_VALUE (current_arg_type)) != INTEGER_TYPE +- || TYPE_MODE (TREE_VALUE (current_arg_type)) != word_mode) +- error ("interrupt service routine should have %qs " +- "as the second argument", +- TARGET_64BIT +- ? (TARGET_X32 ? "unsigned long long int" +- : "unsigned long int") +- : "unsigned int"); +- } +- nargs++; +- current_arg_type = TREE_CHAIN (current_arg_type); +- } +- if (!nargs || nargs > 2) +- error ("interrupt service routine can only have a pointer argument " +- "and an optional integer argument"); +- if (! VOID_TYPE_P (return_type)) +- error ("interrupt service routine can%'t have non-void return value"); +- +- return NULL_TREE; +-} +- +-static bool +-ix86_ms_bitfield_layout_p (const_tree record_type) +-{ +- return ((TARGET_MS_BITFIELD_LAYOUT +- && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type))) +- || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type))); +-} +- +-/* Returns an expression indicating where the this parameter is +- located on entry to the FUNCTION. */ +- +-static rtx +-x86_this_parameter (tree function) +-{ +- tree type = TREE_TYPE (function); +- bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0; +- int nregs; +- +- if (TARGET_64BIT) +- { +- const int *parm_regs; +- +- if (ix86_function_type_abi (type) == MS_ABI) +- parm_regs = x86_64_ms_abi_int_parameter_registers; +- else +- parm_regs = x86_64_int_parameter_registers; +- return gen_rtx_REG (Pmode, parm_regs[aggr]); +- } +- +- nregs = ix86_function_regparm (type, function); +- +- if (nregs > 0 && !stdarg_p (type)) +- { +- int regno; +- unsigned int ccvt = ix86_get_callcvt (type); +- +- if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) +- regno = aggr ? DX_REG : CX_REG; +- else if ((ccvt & IX86_CALLCVT_THISCALL) != 0) +- { +- regno = CX_REG; +- if (aggr) +- return gen_rtx_MEM (SImode, +- plus_constant (Pmode, stack_pointer_rtx, 4)); +- } +- else +- { +- regno = AX_REG; +- if (aggr) +- { +- regno = DX_REG; +- if (nregs == 1) +- return gen_rtx_MEM (SImode, +- plus_constant (Pmode, +- stack_pointer_rtx, 4)); +- } +- } +- return gen_rtx_REG (SImode, regno); +- } +- +- return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx, +- aggr ? 8 : 4)); +-} +- +-/* Determine whether x86_output_mi_thunk can succeed. */ +- +-static bool +-x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset, +- const_tree function) +-{ +- /* 64-bit can handle anything. */ +- if (TARGET_64BIT) +- return true; +- +- /* For 32-bit, everything's fine if we have one free register. */ +- if (ix86_function_regparm (TREE_TYPE (function), function) < 3) +- return true; +- +- /* Need a free register for vcall_offset. */ +- if (vcall_offset) +- return false; +- +- /* Need a free register for GOT references. */ +- if (flag_pic && !targetm.binds_local_p (function)) +- return false; +- +- /* Otherwise ok. */ +- return true; +-} +- +-/* Output the assembler code for a thunk function. THUNK_DECL is the +- declaration for the thunk function itself, FUNCTION is the decl for +- the target function. DELTA is an immediate constant offset to be +- added to THIS. If VCALL_OFFSET is nonzero, the word at +- *(*this + vcall_offset) should be added to THIS. */ +- +-static void +-x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta, +- HOST_WIDE_INT vcall_offset, tree function) +-{ +- rtx this_param = x86_this_parameter (function); +- rtx this_reg, tmp, fnaddr; +- unsigned int tmp_regno; +- rtx_insn *insn; +- +- if (TARGET_64BIT) +- tmp_regno = R10_REG; +- else +- { +- unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function)); +- if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) +- tmp_regno = AX_REG; +- else if ((ccvt & IX86_CALLCVT_THISCALL) != 0) +- tmp_regno = DX_REG; +- else +- tmp_regno = CX_REG; +- } +- +- emit_note (NOTE_INSN_PROLOGUE_END); +- +- /* CET is enabled, insert EB instruction. */ +- if ((flag_cf_protection & CF_BRANCH)) +- emit_insn (gen_nop_endbr ()); +- +- /* If VCALL_OFFSET, we'll need THIS in a register. Might as well +- pull it in now and let DELTA benefit. */ +- if (REG_P (this_param)) +- this_reg = this_param; +- else if (vcall_offset) +- { +- /* Put the this parameter into %eax. */ +- this_reg = gen_rtx_REG (Pmode, AX_REG); +- emit_move_insn (this_reg, this_param); +- } +- else +- this_reg = NULL_RTX; +- +- /* Adjust the this parameter by a fixed constant. */ +- if (delta) +- { +- rtx delta_rtx = GEN_INT (delta); +- rtx delta_dst = this_reg ? this_reg : this_param; +- +- if (TARGET_64BIT) +- { +- if (!x86_64_general_operand (delta_rtx, Pmode)) +- { +- tmp = gen_rtx_REG (Pmode, tmp_regno); +- emit_move_insn (tmp, delta_rtx); +- delta_rtx = tmp; +- } +- } +- +- ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx); +- } +- +- /* Adjust the this parameter by a value stored in the vtable. */ +- if (vcall_offset) +- { +- rtx vcall_addr, vcall_mem, this_mem; +- +- tmp = gen_rtx_REG (Pmode, tmp_regno); +- +- this_mem = gen_rtx_MEM (ptr_mode, this_reg); +- if (Pmode != ptr_mode) +- this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem); +- emit_move_insn (tmp, this_mem); +- +- /* Adjust the this parameter. */ +- vcall_addr = plus_constant (Pmode, tmp, vcall_offset); +- if (TARGET_64BIT +- && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true)) +- { +- rtx tmp2 = gen_rtx_REG (Pmode, R11_REG); +- emit_move_insn (tmp2, GEN_INT (vcall_offset)); +- vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2); +- } +- +- vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr); +- if (Pmode != ptr_mode) +- emit_insn (gen_addsi_1_zext (this_reg, +- gen_rtx_REG (ptr_mode, +- REGNO (this_reg)), +- vcall_mem)); +- else +- ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem); +- } +- +- /* If necessary, drop THIS back to its stack slot. */ +- if (this_reg && this_reg != this_param) +- emit_move_insn (this_param, this_reg); +- +- fnaddr = XEXP (DECL_RTL (function), 0); +- if (TARGET_64BIT) +- { +- if (!flag_pic || targetm.binds_local_p (function) +- || TARGET_PECOFF) +- ; +- else +- { +- tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL); +- tmp = gen_rtx_CONST (Pmode, tmp); +- fnaddr = gen_const_mem (Pmode, tmp); +- } +- } +- else +- { +- if (!flag_pic || targetm.binds_local_p (function)) +- ; +-#if TARGET_MACHO +- else if (TARGET_MACHO) +- { +- fnaddr = machopic_indirect_call_target (DECL_RTL (function)); +- fnaddr = XEXP (fnaddr, 0); +- } +-#endif /* TARGET_MACHO */ +- else +- { +- tmp = gen_rtx_REG (Pmode, CX_REG); +- output_set_got (tmp, NULL_RTX); +- +- fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT); +- fnaddr = gen_rtx_CONST (Pmode, fnaddr); +- fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr); +- fnaddr = gen_const_mem (Pmode, fnaddr); +- } +- } +- +- /* Our sibling call patterns do not allow memories, because we have no +- predicate that can distinguish between frame and non-frame memory. +- For our purposes here, we can get away with (ab)using a jump pattern, +- because we're going to do no optimization. */ +- if (MEM_P (fnaddr)) +- { +- if (sibcall_insn_operand (fnaddr, word_mode)) +- { +- fnaddr = XEXP (DECL_RTL (function), 0); +- tmp = gen_rtx_MEM (QImode, fnaddr); +- tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx); +- tmp = emit_call_insn (tmp); +- SIBLING_CALL_P (tmp) = 1; +- } +- else +- emit_jump_insn (gen_indirect_jump (fnaddr)); +- } +- else +- { +- if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr)) +- { +- // CM_LARGE_PIC always uses pseudo PIC register which is +- // uninitialized. Since FUNCTION is local and calling it +- // doesn't go through PLT, we use scratch register %r11 as +- // PIC register and initialize it here. +- pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG); +- ix86_init_large_pic_reg (tmp_regno); +- fnaddr = legitimize_pic_address (fnaddr, +- gen_rtx_REG (Pmode, tmp_regno)); +- } +- +- if (!sibcall_insn_operand (fnaddr, word_mode)) +- { +- tmp = gen_rtx_REG (word_mode, tmp_regno); +- if (GET_MODE (fnaddr) != word_mode) +- fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr); +- emit_move_insn (tmp, fnaddr); +- fnaddr = tmp; +- } +- +- tmp = gen_rtx_MEM (QImode, fnaddr); +- tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx); +- tmp = emit_call_insn (tmp); +- SIBLING_CALL_P (tmp) = 1; +- } +- emit_barrier (); +- +- /* Emit just enough of rest_of_compilation to get the insns emitted. +- Note that use_thunk calls assemble_start_function et al. */ +- insn = get_insns (); +- shorten_branches (insn); +- final_start_function (insn, file, 1); +- final (insn, file, 1); +- final_end_function (); +-} +- +-static void +-x86_file_start (void) +-{ +- default_file_start (); +- if (TARGET_16BIT) +- fputs ("\t.code16gcc\n", asm_out_file); +-#if TARGET_MACHO +- darwin_file_start (); +-#endif +- if (X86_FILE_START_VERSION_DIRECTIVE) +- fputs ("\t.version\t\"01.01\"\n", asm_out_file); +- if (X86_FILE_START_FLTUSED) +- fputs ("\t.global\t__fltused\n", asm_out_file); +- if (ix86_asm_dialect == ASM_INTEL) +- fputs ("\t.intel_syntax noprefix\n", asm_out_file); +-} +- +-int +-x86_field_alignment (tree type, int computed) +-{ +- machine_mode mode; +- +- if (TARGET_64BIT || TARGET_ALIGN_DOUBLE) +- return computed; +- if (TARGET_IAMCU) +- return iamcu_alignment (type, computed); +- mode = TYPE_MODE (strip_array_types (type)); +- if (mode == DFmode || mode == DCmode +- || GET_MODE_CLASS (mode) == MODE_INT +- || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT) +- return MIN (32, computed); +- return computed; +-} +- +-/* Print call to TARGET to FILE. */ +- +-static void +-x86_print_call_or_nop (FILE *file, const char *target) +-{ +- if (flag_nop_mcount || !strcmp (target, "nop")) +- /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */ +- fprintf (file, "1:" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n"); +- else +- fprintf (file, "1:\tcall\t%s\n", target); +-} +- +-static bool +-current_fentry_name (const char **name) +-{ +- tree attr = lookup_attribute ("fentry_name", +- DECL_ATTRIBUTES (current_function_decl)); +- if (!attr) +- return false; +- *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr))); +- return true; +-} +- +-static bool +-current_fentry_section (const char **name) +-{ +- tree attr = lookup_attribute ("fentry_section", +- DECL_ATTRIBUTES (current_function_decl)); +- if (!attr) +- return false; +- *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr))); +- return true; +-} +- +-/* Output assembler code to FILE to increment profiler label # LABELNO +- for profiling a function entry. */ +-void +-x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED) +-{ +- if (cfun->machine->endbr_queued_at_entrance) +- fprintf (file, "\t%s\n", TARGET_64BIT ? "endbr64" : "endbr32"); +- +- const char *mcount_name = MCOUNT_NAME; +- +- if (current_fentry_name (&mcount_name)) +- ; +- else if (fentry_name) +- mcount_name = fentry_name; +- else if (flag_fentry) +- mcount_name = MCOUNT_NAME_BEFORE_PROLOGUE; +- +- if (TARGET_64BIT) +- { +-#ifndef NO_PROFILE_COUNTERS +- fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno); +-#endif +- +- if (!TARGET_PECOFF && flag_pic) +- fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name); +- else +- x86_print_call_or_nop (file, mcount_name); +- } +- else if (flag_pic) +- { +-#ifndef NO_PROFILE_COUNTERS +- fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n", +- LPREFIX, labelno); +-#endif +- fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name); +- } +- else +- { +-#ifndef NO_PROFILE_COUNTERS +- fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n", +- LPREFIX, labelno); +-#endif +- x86_print_call_or_nop (file, mcount_name); +- } +- +- if (flag_record_mcount +- || lookup_attribute ("fentry_section", +- DECL_ATTRIBUTES (current_function_decl))) +- { +- const char *sname = "__mcount_loc"; +- +- if (current_fentry_section (&sname)) +- ; +- else if (fentry_section) +- sname = fentry_section; +- +- fprintf (file, "\t.section %s, \"a\",@progbits\n", sname); +- fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long"); +- fprintf (file, "\t.previous\n"); +- } +-} +- +-/* We don't have exact information about the insn sizes, but we may assume +- quite safely that we are informed about all 1 byte insns and memory +- address sizes. This is enough to eliminate unnecessary padding in +- 99% of cases. */ +- +-int +-ix86_min_insn_size (rtx_insn *insn) +-{ +- int l = 0, len; +- +- if (!INSN_P (insn) || !active_insn_p (insn)) +- return 0; +- +- /* Discard alignments we've emit and jump instructions. */ +- if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE +- && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN) +- return 0; +- +- /* Important case - calls are always 5 bytes. +- It is common to have many calls in the row. */ +- if (CALL_P (insn) +- && symbolic_reference_mentioned_p (PATTERN (insn)) +- && !SIBLING_CALL_P (insn)) +- return 5; +- len = get_attr_length (insn); +- if (len <= 1) +- return 1; +- +- /* For normal instructions we rely on get_attr_length being exact, +- with a few exceptions. */ +- if (!JUMP_P (insn)) +- { +- enum attr_type type = get_attr_type (insn); +- +- switch (type) +- { +- case TYPE_MULTI: +- if (GET_CODE (PATTERN (insn)) == ASM_INPUT +- || asm_noperands (PATTERN (insn)) >= 0) +- return 0; +- break; +- case TYPE_OTHER: +- case TYPE_FCMP: +- break; +- default: +- /* Otherwise trust get_attr_length. */ +- return len; +- } +- +- l = get_attr_length_address (insn); +- if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn))) +- l = 4; +- } +- if (l) +- return 1+l; +- else +- return 2; +-} +- +-#ifdef ASM_OUTPUT_MAX_SKIP_PAD +- +-/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte +- window. */ +- +-static void +-ix86_avoid_jump_mispredicts (void) +-{ +- rtx_insn *insn, *start = get_insns (); +- int nbytes = 0, njumps = 0; +- bool isjump = false; +- +- /* Look for all minimal intervals of instructions containing 4 jumps. +- The intervals are bounded by START and INSN. NBYTES is the total +- size of instructions in the interval including INSN and not including +- START. When the NBYTES is smaller than 16 bytes, it is possible +- that the end of START and INSN ends up in the same 16byte page. +- +- The smallest offset in the page INSN can start is the case where START +- ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN). +- We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN). +- +- Don't consider asm goto as jump, while it can contain a jump, it doesn't +- have to, control transfer to label(s) can be performed through other +- means, and also we estimate minimum length of all asm stmts as 0. */ +- for (insn = start; insn; insn = NEXT_INSN (insn)) +- { +- int min_size; +- +- if (LABEL_P (insn)) +- { +- align_flags alignment = label_to_alignment (insn); +- int align = alignment.levels[0].log; +- int max_skip = alignment.levels[0].maxskip; +- +- if (max_skip > 15) +- max_skip = 15; +- /* If align > 3, only up to 16 - max_skip - 1 bytes can be +- already in the current 16 byte page, because otherwise +- ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer +- bytes to reach 16 byte boundary. */ +- if (align <= 0 +- || (align <= 3 && max_skip != (1 << align) - 1)) +- max_skip = 0; +- if (dump_file) +- fprintf (dump_file, "Label %i with max_skip %i\n", +- INSN_UID (insn), max_skip); +- if (max_skip) +- { +- while (nbytes + max_skip >= 16) +- { +- start = NEXT_INSN (start); +- if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0) +- || CALL_P (start)) +- njumps--, isjump = true; +- else +- isjump = false; +- nbytes -= ix86_min_insn_size (start); +- } +- } +- continue; +- } +- +- min_size = ix86_min_insn_size (insn); +- nbytes += min_size; +- if (dump_file) +- fprintf (dump_file, "Insn %i estimated to %i bytes\n", +- INSN_UID (insn), min_size); +- if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0) +- || CALL_P (insn)) +- njumps++; +- else +- continue; +- +- while (njumps > 3) +- { +- start = NEXT_INSN (start); +- if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0) +- || CALL_P (start)) +- njumps--, isjump = true; +- else +- isjump = false; +- nbytes -= ix86_min_insn_size (start); +- } +- gcc_assert (njumps >= 0); +- if (dump_file) +- fprintf (dump_file, "Interval %i to %i has %i bytes\n", +- INSN_UID (start), INSN_UID (insn), nbytes); +- +- if (njumps == 3 && isjump && nbytes < 16) +- { +- int padsize = 15 - nbytes + ix86_min_insn_size (insn); +- +- if (dump_file) +- fprintf (dump_file, "Padding insn %i by %i bytes!\n", +- INSN_UID (insn), padsize); +- emit_insn_before (gen_pad (GEN_INT (padsize)), insn); +- } +- } +-} +-#endif +- +-/* AMD Athlon works faster +- when RET is not destination of conditional jump or directly preceded +- by other jump instruction. We avoid the penalty by inserting NOP just +- before the RET instructions in such cases. */ +-static void +-ix86_pad_returns (void) +-{ +- edge e; +- edge_iterator ei; +- +- FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) +- { +- basic_block bb = e->src; +- rtx_insn *ret = BB_END (bb); +- rtx_insn *prev; +- bool replace = false; +- +- if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret)) +- || optimize_bb_for_size_p (bb)) +- continue; +- for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev)) +- if (active_insn_p (prev) || LABEL_P (prev)) +- break; +- if (prev && LABEL_P (prev)) +- { +- edge e; +- edge_iterator ei; +- +- FOR_EACH_EDGE (e, ei, bb->preds) +- if (EDGE_FREQUENCY (e) && e->src->index >= 0 +- && !(e->flags & EDGE_FALLTHRU)) +- { +- replace = true; +- break; +- } +- } +- if (!replace) +- { +- prev = prev_active_insn (ret); +- if (prev +- && ((JUMP_P (prev) && any_condjump_p (prev)) +- || CALL_P (prev))) +- replace = true; +- /* Empty functions get branch mispredict even when +- the jump destination is not visible to us. */ +- if (!prev && !optimize_function_for_size_p (cfun)) +- replace = true; +- } +- if (replace) +- { +- emit_jump_insn_before (gen_simple_return_internal_long (), ret); +- delete_insn (ret); +- } +- } +-} +- +-/* Count the minimum number of instructions in BB. Return 4 if the +- number of instructions >= 4. */ +- +-static int +-ix86_count_insn_bb (basic_block bb) +-{ +- rtx_insn *insn; +- int insn_count = 0; +- +- /* Count number of instructions in this block. Return 4 if the number +- of instructions >= 4. */ +- FOR_BB_INSNS (bb, insn) +- { +- /* Only happen in exit blocks. */ +- if (JUMP_P (insn) +- && ANY_RETURN_P (PATTERN (insn))) +- break; +- +- if (NONDEBUG_INSN_P (insn) +- && GET_CODE (PATTERN (insn)) != USE +- && GET_CODE (PATTERN (insn)) != CLOBBER) +- { +- insn_count++; +- if (insn_count >= 4) +- return insn_count; +- } +- } +- +- return insn_count; +-} +- +- +-/* Count the minimum number of instructions in code path in BB. +- Return 4 if the number of instructions >= 4. */ +- +-static int +-ix86_count_insn (basic_block bb) +-{ +- edge e; +- edge_iterator ei; +- int min_prev_count; +- +- /* Only bother counting instructions along paths with no +- more than 2 basic blocks between entry and exit. Given +- that BB has an edge to exit, determine if a predecessor +- of BB has an edge from entry. If so, compute the number +- of instructions in the predecessor block. If there +- happen to be multiple such blocks, compute the minimum. */ +- min_prev_count = 4; +- FOR_EACH_EDGE (e, ei, bb->preds) +- { +- edge prev_e; +- edge_iterator prev_ei; +- +- if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun)) +- { +- min_prev_count = 0; +- break; +- } +- FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds) +- { +- if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun)) +- { +- int count = ix86_count_insn_bb (e->src); +- if (count < min_prev_count) +- min_prev_count = count; +- break; +- } +- } +- } +- +- if (min_prev_count < 4) +- min_prev_count += ix86_count_insn_bb (bb); +- +- return min_prev_count; +-} +- +-/* Pad short function to 4 instructions. */ +- +-static void +-ix86_pad_short_function (void) +-{ +- edge e; +- edge_iterator ei; +- +- FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) +- { +- rtx_insn *ret = BB_END (e->src); +- if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret))) +- { +- int insn_count = ix86_count_insn (e->src); +- +- /* Pad short function. */ +- if (insn_count < 4) +- { +- rtx_insn *insn = ret; +- +- /* Find epilogue. */ +- while (insn +- && (!NOTE_P (insn) +- || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG)) +- insn = PREV_INSN (insn); +- +- if (!insn) +- insn = ret; +- +- /* Two NOPs count as one instruction. */ +- insn_count = 2 * (4 - insn_count); +- emit_insn_before (gen_nops (GEN_INT (insn_count)), insn); +- } +- } +- } +-} +- +-/* Fix up a Windows system unwinder issue. If an EH region falls through into +- the epilogue, the Windows system unwinder will apply epilogue logic and +- produce incorrect offsets. This can be avoided by adding a nop between +- the last insn that can throw and the first insn of the epilogue. */ +- +-static void +-ix86_seh_fixup_eh_fallthru (void) +-{ +- edge e; +- edge_iterator ei; +- +- FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) +- { +- rtx_insn *insn, *next; +- +- /* Find the beginning of the epilogue. */ +- for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn)) +- if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG) +- break; +- if (insn == NULL) +- continue; +- +- /* We only care about preceding insns that can throw. */ +- insn = prev_active_insn (insn); +- if (insn == NULL || !can_throw_internal (insn)) +- continue; +- +- /* Do not separate calls from their debug information. */ +- for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next)) +- if (NOTE_P (next) && NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION) +- insn = next; +- else +- break; +- +- emit_insn_after (gen_nops (const1_rtx), insn); +- } +-} +- +-/* Implement machine specific optimizations. We implement padding of returns +- for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */ +-static void +-ix86_reorg (void) +-{ +- /* We are freeing block_for_insn in the toplev to keep compatibility +- with old MDEP_REORGS that are not CFG based. Recompute it now. */ +- compute_bb_for_insn (); +- +- if (TARGET_SEH && current_function_has_exception_handlers ()) +- ix86_seh_fixup_eh_fallthru (); +- +- if (optimize && optimize_function_for_speed_p (cfun)) +- { +- if (TARGET_PAD_SHORT_FUNCTION) +- ix86_pad_short_function (); +- else if (TARGET_PAD_RETURNS) +- ix86_pad_returns (); +-#ifdef ASM_OUTPUT_MAX_SKIP_PAD +- if (TARGET_FOUR_JUMP_LIMIT) +- ix86_avoid_jump_mispredicts (); +-#endif +- } +-} +- +-/* Return nonzero when QImode register that must be represented via REX prefix +- is used. */ +-bool +-x86_extended_QIreg_mentioned_p (rtx_insn *insn) +-{ +- int i; +- extract_insn_cached (insn); +- for (i = 0; i < recog_data.n_operands; i++) +- if (GENERAL_REG_P (recog_data.operand[i]) +- && !QI_REGNO_P (REGNO (recog_data.operand[i]))) +- return true; +- return false; +-} +- +-/* Return true when INSN mentions register that must be encoded using REX +- prefix. */ +-bool +-x86_extended_reg_mentioned_p (rtx insn) +-{ +- subrtx_iterator::array_type array; +- FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST) +- { +- const_rtx x = *iter; +- if (REG_P (x) +- && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x)))) +- return true; +- } +- return false; +-} +- +-/* If profitable, negate (without causing overflow) integer constant +- of mode MODE at location LOC. Return true in this case. */ +-bool +-x86_maybe_negate_const_int (rtx *loc, machine_mode mode) +-{ +- HOST_WIDE_INT val; +- +- if (!CONST_INT_P (*loc)) +- return false; +- +- switch (mode) +- { +- case E_DImode: +- /* DImode x86_64 constants must fit in 32 bits. */ +- gcc_assert (x86_64_immediate_operand (*loc, mode)); +- +- mode = SImode; +- break; +- +- case E_SImode: +- case E_HImode: +- case E_QImode: +- break; +- +- default: +- gcc_unreachable (); +- } +- +- /* Avoid overflows. */ +- if (mode_signbit_p (mode, *loc)) +- return false; +- +- val = INTVAL (*loc); +- +- /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'. +- Exceptions: -128 encodes smaller than 128, so swap sign and op. */ +- if ((val < 0 && val != -128) +- || val == 128) +- { +- *loc = GEN_INT (-val); +- return true; +- } +- +- return false; +-} +- +-/* Generate an unsigned DImode/SImode to FP conversion. This is the same code +- optabs would emit if we didn't have TFmode patterns. */ +- +-void +-x86_emit_floatuns (rtx operands[2]) +-{ +- rtx_code_label *neglab, *donelab; +- rtx i0, i1, f0, in, out; +- machine_mode mode, inmode; +- +- inmode = GET_MODE (operands[1]); +- gcc_assert (inmode == SImode || inmode == DImode); +- +- out = operands[0]; +- in = force_reg (inmode, operands[1]); +- mode = GET_MODE (out); +- neglab = gen_label_rtx (); +- donelab = gen_label_rtx (); +- f0 = gen_reg_rtx (mode); +- +- emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab); +- +- expand_float (out, in, 0); +- +- emit_jump_insn (gen_jump (donelab)); +- emit_barrier (); +- +- emit_label (neglab); +- +- i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL, +- 1, OPTAB_DIRECT); +- i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL, +- 1, OPTAB_DIRECT); +- i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT); +- +- expand_float (f0, i0, 0); +- +- emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0))); +- +- emit_label (donelab); +-} +- +-static bool canonicalize_perm (struct expand_vec_perm_d *d); +-static bool expand_vec_perm_1 (struct expand_vec_perm_d *d); +-static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d); +-static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool); +- +-/* Get a vector mode of the same size as the original but with elements +- twice as wide. This is only guaranteed to apply to integral vectors. */ +- +-static inline machine_mode +-get_mode_wider_vector (machine_mode o) +-{ +- /* ??? Rely on the ordering that genmodes.c gives to vectors. */ +- machine_mode n = GET_MODE_WIDER_MODE (o).require (); +- gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2); +- gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n)); +- return n; +-} +- +-/* A subroutine of ix86_expand_vector_init_duplicate. Tries to +- fill target with val via vec_duplicate. */ +- +-static bool +-ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val) +-{ +- bool ok; +- rtx_insn *insn; +- rtx dup; +- +- /* First attempt to recognize VAL as-is. */ +- dup = gen_vec_duplicate (mode, val); +- insn = emit_insn (gen_rtx_SET (target, dup)); +- if (recog_memoized (insn) < 0) +- { +- rtx_insn *seq; +- machine_mode innermode = GET_MODE_INNER (mode); +- rtx reg; +- +- /* If that fails, force VAL into a register. */ +- +- start_sequence (); +- reg = force_reg (innermode, val); +- if (GET_MODE (reg) != innermode) +- reg = gen_lowpart (innermode, reg); +- SET_SRC (PATTERN (insn)) = gen_vec_duplicate (mode, reg); +- seq = get_insns (); +- end_sequence (); +- if (seq) +- emit_insn_before (seq, insn); +- +- ok = recog_memoized (insn) >= 0; +- gcc_assert (ok); +- } +- return true; +-} +- +-/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector +- with all elements equal to VAR. Return true if successful. */ +- +-static bool +-ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode, +- rtx target, rtx val) +-{ +- bool ok; +- +- switch (mode) +- { +- case E_V2SImode: +- case E_V2SFmode: +- if (!mmx_ok) +- return false; +- /* FALLTHRU */ +- +- case E_V4DFmode: +- case E_V4DImode: +- case E_V8SFmode: +- case E_V8SImode: +- case E_V2DFmode: +- case E_V2DImode: +- case E_V4SFmode: +- case E_V4SImode: +- case E_V16SImode: +- case E_V8DImode: +- case E_V16SFmode: +- case E_V8DFmode: +- return ix86_vector_duplicate_value (mode, target, val); +- +- case E_V4HImode: +- if (!mmx_ok) +- return false; +- if (TARGET_SSE || TARGET_3DNOW_A) +- { +- rtx x; +- +- val = gen_lowpart (SImode, val); +- x = gen_rtx_TRUNCATE (HImode, val); +- x = gen_rtx_VEC_DUPLICATE (mode, x); +- emit_insn (gen_rtx_SET (target, x)); +- return true; +- } +- goto widen; +- +- case E_V8QImode: +- if (!mmx_ok) +- return false; +- goto widen; +- +- case E_V8HImode: +- if (TARGET_AVX2) +- return ix86_vector_duplicate_value (mode, target, val); +- +- if (TARGET_SSE2) +- { +- struct expand_vec_perm_d dperm; +- rtx tmp1, tmp2; +- +- permute: +- memset (&dperm, 0, sizeof (dperm)); +- dperm.target = target; +- dperm.vmode = mode; +- dperm.nelt = GET_MODE_NUNITS (mode); +- dperm.op0 = dperm.op1 = gen_reg_rtx (mode); +- dperm.one_operand_p = true; +- +- /* Extend to SImode using a paradoxical SUBREG. */ +- tmp1 = gen_reg_rtx (SImode); +- emit_move_insn (tmp1, gen_lowpart (SImode, val)); +- +- /* Insert the SImode value as low element of a V4SImode vector. */ +- tmp2 = gen_reg_rtx (V4SImode); +- emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1)); +- emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2)); +- +- ok = (expand_vec_perm_1 (&dperm) +- || expand_vec_perm_broadcast_1 (&dperm)); +- gcc_assert (ok); +- return ok; +- } +- goto widen; +- +- case E_V16QImode: +- if (TARGET_AVX2) +- return ix86_vector_duplicate_value (mode, target, val); +- +- if (TARGET_SSE2) +- goto permute; +- goto widen; +- +- widen: +- /* Replicate the value once into the next wider mode and recurse. */ +- { +- machine_mode smode, wsmode, wvmode; +- rtx x; +- +- smode = GET_MODE_INNER (mode); +- wvmode = get_mode_wider_vector (mode); +- wsmode = GET_MODE_INNER (wvmode); +- +- val = convert_modes (wsmode, smode, val, true); +- x = expand_simple_binop (wsmode, ASHIFT, val, +- GEN_INT (GET_MODE_BITSIZE (smode)), +- NULL_RTX, 1, OPTAB_LIB_WIDEN); +- val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN); +- +- x = gen_reg_rtx (wvmode); +- ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val); +- gcc_assert (ok); +- emit_move_insn (target, gen_lowpart (GET_MODE (target), x)); +- return ok; +- } +- +- case E_V16HImode: +- case E_V32QImode: +- if (TARGET_AVX2) +- return ix86_vector_duplicate_value (mode, target, val); +- else +- { +- machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode); +- rtx x = gen_reg_rtx (hvmode); +- +- ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val); +- gcc_assert (ok); +- +- x = gen_rtx_VEC_CONCAT (mode, x, x); +- emit_insn (gen_rtx_SET (target, x)); +- } +- return true; +- +- case E_V64QImode: +- case E_V32HImode: +- if (TARGET_AVX512BW) +- return ix86_vector_duplicate_value (mode, target, val); +- else +- { +- machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode); +- rtx x = gen_reg_rtx (hvmode); +- +- ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val); +- gcc_assert (ok); +- +- x = gen_rtx_VEC_CONCAT (mode, x, x); +- emit_insn (gen_rtx_SET (target, x)); +- } +- return true; +- +- default: +- return false; +- } +-} +- +-/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector +- whose ONE_VAR element is VAR, and other elements are zero. Return true +- if successful. */ +- +-static bool +-ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode, +- rtx target, rtx var, int one_var) +-{ +- machine_mode vsimode; +- rtx new_target; +- rtx x, tmp; +- bool use_vector_set = false; +- rtx (*gen_vec_set_0) (rtx, rtx, rtx) = NULL; +- +- switch (mode) +- { +- case E_V2DImode: +- /* For SSE4.1, we normally use vector set. But if the second +- element is zero and inter-unit moves are OK, we use movq +- instead. */ +- use_vector_set = (TARGET_64BIT && TARGET_SSE4_1 +- && !(TARGET_INTER_UNIT_MOVES_TO_VEC +- && one_var == 0)); +- break; +- case E_V16QImode: +- case E_V4SImode: +- case E_V4SFmode: +- use_vector_set = TARGET_SSE4_1; +- break; +- case E_V8HImode: +- use_vector_set = TARGET_SSE2; +- break; +- case E_V4HImode: +- use_vector_set = TARGET_SSE || TARGET_3DNOW_A; +- break; +- case E_V32QImode: +- case E_V16HImode: +- use_vector_set = TARGET_AVX; +- break; +- case E_V8SImode: +- use_vector_set = TARGET_AVX; +- gen_vec_set_0 = gen_vec_setv8si_0; +- break; +- case E_V8SFmode: +- use_vector_set = TARGET_AVX; +- gen_vec_set_0 = gen_vec_setv8sf_0; +- break; +- case E_V4DFmode: +- use_vector_set = TARGET_AVX; +- gen_vec_set_0 = gen_vec_setv4df_0; +- break; +- case E_V4DImode: +- /* Use ix86_expand_vector_set in 64bit mode only. */ +- use_vector_set = TARGET_AVX && TARGET_64BIT; +- gen_vec_set_0 = gen_vec_setv4di_0; +- break; +- case E_V16SImode: +- use_vector_set = TARGET_AVX512F && one_var == 0; +- gen_vec_set_0 = gen_vec_setv16si_0; +- break; +- case E_V16SFmode: +- use_vector_set = TARGET_AVX512F && one_var == 0; +- gen_vec_set_0 = gen_vec_setv16sf_0; +- break; +- case E_V8DFmode: +- use_vector_set = TARGET_AVX512F && one_var == 0; +- gen_vec_set_0 = gen_vec_setv8df_0; +- break; +- case E_V8DImode: +- /* Use ix86_expand_vector_set in 64bit mode only. */ +- use_vector_set = TARGET_AVX512F && TARGET_64BIT && one_var == 0; +- gen_vec_set_0 = gen_vec_setv8di_0; +- break; +- default: +- break; +- } +- +- if (use_vector_set) +- { +- if (gen_vec_set_0 && one_var == 0) +- { +- var = force_reg (GET_MODE_INNER (mode), var); +- emit_insn (gen_vec_set_0 (target, CONST0_RTX (mode), var)); +- return true; +- } +- emit_insn (gen_rtx_SET (target, CONST0_RTX (mode))); +- var = force_reg (GET_MODE_INNER (mode), var); +- ix86_expand_vector_set (mmx_ok, target, var, one_var); +- return true; +- } +- +- switch (mode) +- { +- case E_V2SFmode: +- case E_V2SImode: +- if (!mmx_ok) +- return false; +- /* FALLTHRU */ +- +- case E_V2DFmode: +- case E_V2DImode: +- if (one_var != 0) +- return false; +- var = force_reg (GET_MODE_INNER (mode), var); +- x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode))); +- emit_insn (gen_rtx_SET (target, x)); +- return true; +- +- case E_V4SFmode: +- case E_V4SImode: +- if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER) +- new_target = gen_reg_rtx (mode); +- else +- new_target = target; +- var = force_reg (GET_MODE_INNER (mode), var); +- x = gen_rtx_VEC_DUPLICATE (mode, var); +- x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx); +- emit_insn (gen_rtx_SET (new_target, x)); +- if (one_var != 0) +- { +- /* We need to shuffle the value to the correct position, so +- create a new pseudo to store the intermediate result. */ +- +- /* With SSE2, we can use the integer shuffle insns. */ +- if (mode != V4SFmode && TARGET_SSE2) +- { +- emit_insn (gen_sse2_pshufd_1 (new_target, new_target, +- const1_rtx, +- GEN_INT (one_var == 1 ? 0 : 1), +- GEN_INT (one_var == 2 ? 0 : 1), +- GEN_INT (one_var == 3 ? 0 : 1))); +- if (target != new_target) +- emit_move_insn (target, new_target); +- return true; +- } +- +- /* Otherwise convert the intermediate result to V4SFmode and +- use the SSE1 shuffle instructions. */ +- if (mode != V4SFmode) +- { +- tmp = gen_reg_rtx (V4SFmode); +- emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target)); +- } +- else +- tmp = new_target; +- +- emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp, +- const1_rtx, +- GEN_INT (one_var == 1 ? 0 : 1), +- GEN_INT (one_var == 2 ? 0+4 : 1+4), +- GEN_INT (one_var == 3 ? 0+4 : 1+4))); +- +- if (mode != V4SFmode) +- emit_move_insn (target, gen_lowpart (V4SImode, tmp)); +- else if (tmp != target) +- emit_move_insn (target, tmp); +- } +- else if (target != new_target) +- emit_move_insn (target, new_target); +- return true; +- +- case E_V8HImode: +- case E_V16QImode: +- vsimode = V4SImode; +- goto widen; +- case E_V4HImode: +- case E_V8QImode: +- if (!mmx_ok) +- return false; +- vsimode = V2SImode; +- goto widen; +- widen: +- if (one_var != 0) +- return false; +- +- /* Zero extend the variable element to SImode and recurse. */ +- var = convert_modes (SImode, GET_MODE_INNER (mode), var, true); +- +- x = gen_reg_rtx (vsimode); +- if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x, +- var, one_var)) +- gcc_unreachable (); +- +- emit_move_insn (target, gen_lowpart (mode, x)); +- return true; +- +- default: +- return false; +- } +-} +- +-/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector +- consisting of the values in VALS. It is known that all elements +- except ONE_VAR are constants. Return true if successful. */ +- +-static bool +-ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode, +- rtx target, rtx vals, int one_var) +-{ +- rtx var = XVECEXP (vals, 0, one_var); +- machine_mode wmode; +- rtx const_vec, x; +- +- const_vec = copy_rtx (vals); +- XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode)); +- const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0)); +- +- switch (mode) +- { +- case E_V2DFmode: +- case E_V2DImode: +- case E_V2SFmode: +- case E_V2SImode: +- /* For the two element vectors, it's just as easy to use +- the general case. */ +- return false; +- +- case E_V4DImode: +- /* Use ix86_expand_vector_set in 64bit mode only. */ +- if (!TARGET_64BIT) +- return false; +- /* FALLTHRU */ +- case E_V4DFmode: +- case E_V8SFmode: +- case E_V8SImode: +- case E_V16HImode: +- case E_V32QImode: +- case E_V4SFmode: +- case E_V4SImode: +- case E_V8HImode: +- case E_V4HImode: +- break; +- +- case E_V16QImode: +- if (TARGET_SSE4_1) +- break; +- wmode = V8HImode; +- goto widen; +- case E_V8QImode: +- wmode = V4HImode; +- goto widen; +- widen: +- /* There's no way to set one QImode entry easily. Combine +- the variable value with its adjacent constant value, and +- promote to an HImode set. */ +- x = XVECEXP (vals, 0, one_var ^ 1); +- if (one_var & 1) +- { +- var = convert_modes (HImode, QImode, var, true); +- var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8), +- NULL_RTX, 1, OPTAB_LIB_WIDEN); +- x = GEN_INT (INTVAL (x) & 0xff); +- } +- else +- { +- var = convert_modes (HImode, QImode, var, true); +- x = gen_int_mode (UINTVAL (x) << 8, HImode); +- } +- if (x != const0_rtx) +- var = expand_simple_binop (HImode, IOR, var, x, var, +- 1, OPTAB_LIB_WIDEN); +- +- x = gen_reg_rtx (wmode); +- emit_move_insn (x, gen_lowpart (wmode, const_vec)); +- ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1); +- +- emit_move_insn (target, gen_lowpart (mode, x)); +- return true; +- +- default: +- return false; +- } +- +- emit_move_insn (target, const_vec); +- ix86_expand_vector_set (mmx_ok, target, var, one_var); +- return true; +-} +- +-/* A subroutine of ix86_expand_vector_init_general. Use vector +- concatenate to handle the most general case: all values variable, +- and none identical. */ +- +-static void +-ix86_expand_vector_init_concat (machine_mode mode, +- rtx target, rtx *ops, int n) +-{ +- machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode; +- rtx first[16], second[8], third[4]; +- rtvec v; +- int i, j; +- +- switch (n) +- { +- case 2: +- switch (mode) +- { +- case E_V16SImode: +- cmode = V8SImode; +- break; +- case E_V16SFmode: +- cmode = V8SFmode; +- break; +- case E_V8DImode: +- cmode = V4DImode; +- break; +- case E_V8DFmode: +- cmode = V4DFmode; +- break; +- case E_V8SImode: +- cmode = V4SImode; +- break; +- case E_V8SFmode: +- cmode = V4SFmode; +- break; +- case E_V4DImode: +- cmode = V2DImode; +- break; +- case E_V4DFmode: +- cmode = V2DFmode; +- break; +- case E_V4SImode: +- cmode = V2SImode; +- break; +- case E_V4SFmode: +- cmode = V2SFmode; +- break; +- case E_V2DImode: +- cmode = DImode; +- break; +- case E_V2SImode: +- cmode = SImode; +- break; +- case E_V2DFmode: +- cmode = DFmode; +- break; +- case E_V2SFmode: +- cmode = SFmode; +- break; +- default: +- gcc_unreachable (); +- } +- +- if (!register_operand (ops[1], cmode)) +- ops[1] = force_reg (cmode, ops[1]); +- if (!register_operand (ops[0], cmode)) +- ops[0] = force_reg (cmode, ops[0]); +- emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, ops[0], +- ops[1]))); +- break; +- +- case 4: +- switch (mode) +- { +- case E_V4DImode: +- cmode = V2DImode; +- break; +- case E_V4DFmode: +- cmode = V2DFmode; +- break; +- case E_V4SImode: +- cmode = V2SImode; +- break; +- case E_V4SFmode: +- cmode = V2SFmode; +- break; +- default: +- gcc_unreachable (); +- } +- goto half; +- +- case 8: +- switch (mode) +- { +- case E_V8DImode: +- cmode = V2DImode; +- hmode = V4DImode; +- break; +- case E_V8DFmode: +- cmode = V2DFmode; +- hmode = V4DFmode; +- break; +- case E_V8SImode: +- cmode = V2SImode; +- hmode = V4SImode; +- break; +- case E_V8SFmode: +- cmode = V2SFmode; +- hmode = V4SFmode; +- break; +- default: +- gcc_unreachable (); +- } +- goto half; +- +- case 16: +- switch (mode) +- { +- case E_V16SImode: +- cmode = V2SImode; +- hmode = V4SImode; +- gmode = V8SImode; +- break; +- case E_V16SFmode: +- cmode = V2SFmode; +- hmode = V4SFmode; +- gmode = V8SFmode; +- break; +- default: +- gcc_unreachable (); +- } +- goto half; +- +-half: +- /* FIXME: We process inputs backward to help RA. PR 36222. */ +- i = n - 1; +- j = (n >> 1) - 1; +- for (; i > 0; i -= 2, j--) +- { +- first[j] = gen_reg_rtx (cmode); +- v = gen_rtvec (2, ops[i - 1], ops[i]); +- ix86_expand_vector_init (false, first[j], +- gen_rtx_PARALLEL (cmode, v)); +- } +- +- n >>= 1; +- if (n > 4) +- { +- gcc_assert (hmode != VOIDmode); +- gcc_assert (gmode != VOIDmode); +- for (i = j = 0; i < n; i += 2, j++) +- { +- second[j] = gen_reg_rtx (hmode); +- ix86_expand_vector_init_concat (hmode, second [j], +- &first [i], 2); +- } +- n >>= 1; +- for (i = j = 0; i < n; i += 2, j++) +- { +- third[j] = gen_reg_rtx (gmode); +- ix86_expand_vector_init_concat (gmode, third[j], +- &second[i], 2); +- } +- n >>= 1; +- ix86_expand_vector_init_concat (mode, target, third, n); +- } +- else if (n > 2) +- { +- gcc_assert (hmode != VOIDmode); +- for (i = j = 0; i < n; i += 2, j++) +- { +- second[j] = gen_reg_rtx (hmode); +- ix86_expand_vector_init_concat (hmode, second [j], +- &first [i], 2); +- } +- n >>= 1; +- ix86_expand_vector_init_concat (mode, target, second, n); +- } +- else +- ix86_expand_vector_init_concat (mode, target, first, n); +- break; +- +- default: +- gcc_unreachable (); +- } +-} +- +-/* A subroutine of ix86_expand_vector_init_general. Use vector +- interleave to handle the most general case: all values variable, +- and none identical. */ +- +-static void +-ix86_expand_vector_init_interleave (machine_mode mode, +- rtx target, rtx *ops, int n) +-{ +- machine_mode first_imode, second_imode, third_imode, inner_mode; +- int i, j; +- rtx op0, op1; +- rtx (*gen_load_even) (rtx, rtx, rtx); +- rtx (*gen_interleave_first_low) (rtx, rtx, rtx); +- rtx (*gen_interleave_second_low) (rtx, rtx, rtx); +- +- switch (mode) +- { +- case E_V8HImode: +- gen_load_even = gen_vec_setv8hi; +- gen_interleave_first_low = gen_vec_interleave_lowv4si; +- gen_interleave_second_low = gen_vec_interleave_lowv2di; +- inner_mode = HImode; +- first_imode = V4SImode; +- second_imode = V2DImode; +- third_imode = VOIDmode; +- break; +- case E_V16QImode: +- gen_load_even = gen_vec_setv16qi; +- gen_interleave_first_low = gen_vec_interleave_lowv8hi; +- gen_interleave_second_low = gen_vec_interleave_lowv4si; +- inner_mode = QImode; +- first_imode = V8HImode; +- second_imode = V4SImode; +- third_imode = V2DImode; +- break; +- default: +- gcc_unreachable (); +- } +- +- for (i = 0; i < n; i++) +- { +- /* Extend the odd elment to SImode using a paradoxical SUBREG. */ +- op0 = gen_reg_rtx (SImode); +- emit_move_insn (op0, gen_lowpart (SImode, ops [i + i])); +- +- /* Insert the SImode value as low element of V4SImode vector. */ +- op1 = gen_reg_rtx (V4SImode); +- op0 = gen_rtx_VEC_MERGE (V4SImode, +- gen_rtx_VEC_DUPLICATE (V4SImode, +- op0), +- CONST0_RTX (V4SImode), +- const1_rtx); +- emit_insn (gen_rtx_SET (op1, op0)); +- +- /* Cast the V4SImode vector back to a vector in orignal mode. */ +- op0 = gen_reg_rtx (mode); +- emit_move_insn (op0, gen_lowpart (mode, op1)); +- +- /* Load even elements into the second position. */ +- emit_insn (gen_load_even (op0, +- force_reg (inner_mode, +- ops [i + i + 1]), +- const1_rtx)); +- +- /* Cast vector to FIRST_IMODE vector. */ +- ops[i] = gen_reg_rtx (first_imode); +- emit_move_insn (ops[i], gen_lowpart (first_imode, op0)); +- } +- +- /* Interleave low FIRST_IMODE vectors. */ +- for (i = j = 0; i < n; i += 2, j++) +- { +- op0 = gen_reg_rtx (first_imode); +- emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1])); +- +- /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */ +- ops[j] = gen_reg_rtx (second_imode); +- emit_move_insn (ops[j], gen_lowpart (second_imode, op0)); +- } +- +- /* Interleave low SECOND_IMODE vectors. */ +- switch (second_imode) +- { +- case E_V4SImode: +- for (i = j = 0; i < n / 2; i += 2, j++) +- { +- op0 = gen_reg_rtx (second_imode); +- emit_insn (gen_interleave_second_low (op0, ops[i], +- ops[i + 1])); +- +- /* Cast the SECOND_IMODE vector to the THIRD_IMODE +- vector. */ +- ops[j] = gen_reg_rtx (third_imode); +- emit_move_insn (ops[j], gen_lowpart (third_imode, op0)); +- } +- second_imode = V2DImode; +- gen_interleave_second_low = gen_vec_interleave_lowv2di; +- /* FALLTHRU */ +- +- case E_V2DImode: +- op0 = gen_reg_rtx (second_imode); +- emit_insn (gen_interleave_second_low (op0, ops[0], +- ops[1])); +- +- /* Cast the SECOND_IMODE vector back to a vector on original +- mode. */ +- emit_insn (gen_rtx_SET (target, gen_lowpart (mode, op0))); +- break; +- +- default: +- gcc_unreachable (); +- } +-} +- +-/* A subroutine of ix86_expand_vector_init. Handle the most general case: +- all values variable, and none identical. */ +- +-static void +-ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode, +- rtx target, rtx vals) +-{ +- rtx ops[64], op0, op1, op2, op3, op4, op5; +- machine_mode half_mode = VOIDmode; +- machine_mode quarter_mode = VOIDmode; +- int n, i; +- +- switch (mode) +- { +- case E_V2SFmode: +- case E_V2SImode: +- if (!mmx_ok && !TARGET_SSE) +- break; +- /* FALLTHRU */ +- +- case E_V16SImode: +- case E_V16SFmode: +- case E_V8DFmode: +- case E_V8DImode: +- case E_V8SFmode: +- case E_V8SImode: +- case E_V4DFmode: +- case E_V4DImode: +- case E_V4SFmode: +- case E_V4SImode: +- case E_V2DFmode: +- case E_V2DImode: +- n = GET_MODE_NUNITS (mode); +- for (i = 0; i < n; i++) +- ops[i] = XVECEXP (vals, 0, i); +- ix86_expand_vector_init_concat (mode, target, ops, n); +- return; +- +- case E_V2TImode: +- for (i = 0; i < 2; i++) +- ops[i] = gen_lowpart (V2DImode, XVECEXP (vals, 0, i)); +- op0 = gen_reg_rtx (V4DImode); +- ix86_expand_vector_init_concat (V4DImode, op0, ops, 2); +- emit_move_insn (target, gen_lowpart (GET_MODE (target), op0)); +- return; +- +- case E_V4TImode: +- for (i = 0; i < 4; i++) +- ops[i] = gen_lowpart (V2DImode, XVECEXP (vals, 0, i)); +- ops[4] = gen_reg_rtx (V4DImode); +- ix86_expand_vector_init_concat (V4DImode, ops[4], ops, 2); +- ops[5] = gen_reg_rtx (V4DImode); +- ix86_expand_vector_init_concat (V4DImode, ops[5], ops + 2, 2); +- op0 = gen_reg_rtx (V8DImode); +- ix86_expand_vector_init_concat (V8DImode, op0, ops + 4, 2); +- emit_move_insn (target, gen_lowpart (GET_MODE (target), op0)); +- return; +- +- case E_V32QImode: +- half_mode = V16QImode; +- goto half; +- +- case E_V16HImode: +- half_mode = V8HImode; +- goto half; +- +-half: +- n = GET_MODE_NUNITS (mode); +- for (i = 0; i < n; i++) +- ops[i] = XVECEXP (vals, 0, i); +- op0 = gen_reg_rtx (half_mode); +- op1 = gen_reg_rtx (half_mode); +- ix86_expand_vector_init_interleave (half_mode, op0, ops, +- n >> 2); +- ix86_expand_vector_init_interleave (half_mode, op1, +- &ops [n >> 1], n >> 2); +- emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op0, op1))); +- return; +- +- case E_V64QImode: +- quarter_mode = V16QImode; +- half_mode = V32QImode; +- goto quarter; +- +- case E_V32HImode: +- quarter_mode = V8HImode; +- half_mode = V16HImode; +- goto quarter; +- +-quarter: +- n = GET_MODE_NUNITS (mode); +- for (i = 0; i < n; i++) +- ops[i] = XVECEXP (vals, 0, i); +- op0 = gen_reg_rtx (quarter_mode); +- op1 = gen_reg_rtx (quarter_mode); +- op2 = gen_reg_rtx (quarter_mode); +- op3 = gen_reg_rtx (quarter_mode); +- op4 = gen_reg_rtx (half_mode); +- op5 = gen_reg_rtx (half_mode); +- ix86_expand_vector_init_interleave (quarter_mode, op0, ops, +- n >> 3); +- ix86_expand_vector_init_interleave (quarter_mode, op1, +- &ops [n >> 2], n >> 3); +- ix86_expand_vector_init_interleave (quarter_mode, op2, +- &ops [n >> 1], n >> 3); +- ix86_expand_vector_init_interleave (quarter_mode, op3, +- &ops [(n >> 1) | (n >> 2)], n >> 3); +- emit_insn (gen_rtx_SET (op4, gen_rtx_VEC_CONCAT (half_mode, op0, op1))); +- emit_insn (gen_rtx_SET (op5, gen_rtx_VEC_CONCAT (half_mode, op2, op3))); +- emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op4, op5))); +- return; +- +- case E_V16QImode: +- if (!TARGET_SSE4_1) +- break; +- /* FALLTHRU */ +- +- case E_V8HImode: +- if (!TARGET_SSE2) +- break; +- +- /* Don't use ix86_expand_vector_init_interleave if we can't +- move from GPR to SSE register directly. */ +- if (!TARGET_INTER_UNIT_MOVES_TO_VEC) +- break; +- +- n = GET_MODE_NUNITS (mode); +- for (i = 0; i < n; i++) +- ops[i] = XVECEXP (vals, 0, i); +- ix86_expand_vector_init_interleave (mode, target, ops, n >> 1); +- return; +- +- case E_V4HImode: +- case E_V8QImode: +- break; +- +- default: +- gcc_unreachable (); +- } +- +- { +- int i, j, n_elts, n_words, n_elt_per_word; +- machine_mode inner_mode; +- rtx words[4], shift; +- +- inner_mode = GET_MODE_INNER (mode); +- n_elts = GET_MODE_NUNITS (mode); +- n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD; +- n_elt_per_word = n_elts / n_words; +- shift = GEN_INT (GET_MODE_BITSIZE (inner_mode)); +- +- for (i = 0; i < n_words; ++i) +- { +- rtx word = NULL_RTX; +- +- for (j = 0; j < n_elt_per_word; ++j) +- { +- rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1); +- elt = convert_modes (word_mode, inner_mode, elt, true); +- +- if (j == 0) +- word = elt; +- else +- { +- word = expand_simple_binop (word_mode, ASHIFT, word, shift, +- word, 1, OPTAB_LIB_WIDEN); +- word = expand_simple_binop (word_mode, IOR, word, elt, +- word, 1, OPTAB_LIB_WIDEN); +- } +- } +- +- words[i] = word; +- } +- +- if (n_words == 1) +- emit_move_insn (target, gen_lowpart (mode, words[0])); +- else if (n_words == 2) +- { +- rtx tmp = gen_reg_rtx (mode); +- emit_clobber (tmp); +- emit_move_insn (gen_lowpart (word_mode, tmp), words[0]); +- emit_move_insn (gen_highpart (word_mode, tmp), words[1]); +- emit_move_insn (target, tmp); +- } +- else if (n_words == 4) +- { +- rtx tmp = gen_reg_rtx (V4SImode); +- gcc_assert (word_mode == SImode); +- vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words)); +- ix86_expand_vector_init_general (false, V4SImode, tmp, vals); +- emit_move_insn (target, gen_lowpart (mode, tmp)); +- } +- else +- gcc_unreachable (); +- } +-} +- +-/* Initialize vector TARGET via VALS. Suppress the use of MMX +- instructions unless MMX_OK is true. */ +- +-void +-ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals) +-{ +- machine_mode mode = GET_MODE (target); +- machine_mode inner_mode = GET_MODE_INNER (mode); +- int n_elts = GET_MODE_NUNITS (mode); +- int n_var = 0, one_var = -1; +- bool all_same = true, all_const_zero = true; +- int i; +- rtx x; +- +- /* Handle first initialization from vector elts. */ +- if (n_elts != XVECLEN (vals, 0)) +- { +- rtx subtarget = target; +- x = XVECEXP (vals, 0, 0); +- gcc_assert (GET_MODE_INNER (GET_MODE (x)) == inner_mode); +- if (GET_MODE_NUNITS (GET_MODE (x)) * 2 == n_elts) +- { +- rtx ops[2] = { XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1) }; +- if (inner_mode == QImode || inner_mode == HImode) +- { +- unsigned int n_bits = n_elts * GET_MODE_SIZE (inner_mode); +- mode = mode_for_vector (SImode, n_bits / 4).require (); +- inner_mode = mode_for_vector (SImode, n_bits / 8).require (); +- ops[0] = gen_lowpart (inner_mode, ops[0]); +- ops[1] = gen_lowpart (inner_mode, ops[1]); +- subtarget = gen_reg_rtx (mode); +- } +- ix86_expand_vector_init_concat (mode, subtarget, ops, 2); +- if (subtarget != target) +- emit_move_insn (target, gen_lowpart (GET_MODE (target), subtarget)); +- return; +- } +- gcc_unreachable (); +- } +- +- for (i = 0; i < n_elts; ++i) +- { +- x = XVECEXP (vals, 0, i); +- if (!(CONST_SCALAR_INT_P (x) +- || CONST_DOUBLE_P (x) +- || CONST_FIXED_P (x))) +- n_var++, one_var = i; +- else if (x != CONST0_RTX (inner_mode)) +- all_const_zero = false; +- if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) +- all_same = false; +- } +- +- /* Constants are best loaded from the constant pool. */ +- if (n_var == 0) +- { +- emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))); +- return; +- } +- +- /* If all values are identical, broadcast the value. */ +- if (all_same +- && ix86_expand_vector_init_duplicate (mmx_ok, mode, target, +- XVECEXP (vals, 0, 0))) +- return; +- +- /* Values where only one field is non-constant are best loaded from +- the pool and overwritten via move later. */ +- if (n_var == 1) +- { +- if (all_const_zero +- && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target, +- XVECEXP (vals, 0, one_var), +- one_var)) +- return; +- +- if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var)) +- return; +- } +- +- ix86_expand_vector_init_general (mmx_ok, mode, target, vals); +-} +- +-void +-ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt) +-{ +- machine_mode mode = GET_MODE (target); +- machine_mode inner_mode = GET_MODE_INNER (mode); +- machine_mode half_mode; +- bool use_vec_merge = false; +- rtx tmp; +- static rtx (*gen_extract[6][2]) (rtx, rtx) +- = { +- { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi }, +- { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi }, +- { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si }, +- { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di }, +- { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf }, +- { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df } +- }; +- static rtx (*gen_insert[6][2]) (rtx, rtx, rtx) +- = { +- { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi }, +- { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi }, +- { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si }, +- { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di }, +- { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf }, +- { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df } +- }; +- int i, j, n; +- machine_mode mmode = VOIDmode; +- rtx (*gen_blendm) (rtx, rtx, rtx, rtx); +- +- switch (mode) +- { +- case E_V2SFmode: +- case E_V2SImode: +- if (mmx_ok) +- { +- tmp = gen_reg_rtx (GET_MODE_INNER (mode)); +- ix86_expand_vector_extract (true, tmp, target, 1 - elt); +- if (elt == 0) +- tmp = gen_rtx_VEC_CONCAT (mode, val, tmp); +- else +- tmp = gen_rtx_VEC_CONCAT (mode, tmp, val); +- emit_insn (gen_rtx_SET (target, tmp)); +- return; +- } +- break; +- +- case E_V2DImode: +- use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT; +- if (use_vec_merge) +- break; +- +- tmp = gen_reg_rtx (GET_MODE_INNER (mode)); +- ix86_expand_vector_extract (false, tmp, target, 1 - elt); +- if (elt == 0) +- tmp = gen_rtx_VEC_CONCAT (mode, val, tmp); +- else +- tmp = gen_rtx_VEC_CONCAT (mode, tmp, val); +- emit_insn (gen_rtx_SET (target, tmp)); +- return; +- +- case E_V2DFmode: +- { +- rtx op0, op1; +- +- /* For the two element vectors, we implement a VEC_CONCAT with +- the extraction of the other element. */ +- +- tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt))); +- tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp); +- +- if (elt == 0) +- op0 = val, op1 = tmp; +- else +- op0 = tmp, op1 = val; +- +- tmp = gen_rtx_VEC_CONCAT (mode, op0, op1); +- emit_insn (gen_rtx_SET (target, tmp)); +- } +- return; +- +- case E_V4SFmode: +- use_vec_merge = TARGET_SSE4_1; +- if (use_vec_merge) +- break; +- +- switch (elt) +- { +- case 0: +- use_vec_merge = true; +- break; +- +- case 1: +- /* tmp = target = A B C D */ +- tmp = copy_to_reg (target); +- /* target = A A B B */ +- emit_insn (gen_vec_interleave_lowv4sf (target, target, target)); +- /* target = X A B B */ +- ix86_expand_vector_set (false, target, val, 0); +- /* target = A X C D */ +- emit_insn (gen_sse_shufps_v4sf (target, target, tmp, +- const1_rtx, const0_rtx, +- GEN_INT (2+4), GEN_INT (3+4))); +- return; +- +- case 2: +- /* tmp = target = A B C D */ +- tmp = copy_to_reg (target); +- /* tmp = X B C D */ +- ix86_expand_vector_set (false, tmp, val, 0); +- /* target = A B X D */ +- emit_insn (gen_sse_shufps_v4sf (target, target, tmp, +- const0_rtx, const1_rtx, +- GEN_INT (0+4), GEN_INT (3+4))); +- return; +- +- case 3: +- /* tmp = target = A B C D */ +- tmp = copy_to_reg (target); +- /* tmp = X B C D */ +- ix86_expand_vector_set (false, tmp, val, 0); +- /* target = A B X D */ +- emit_insn (gen_sse_shufps_v4sf (target, target, tmp, +- const0_rtx, const1_rtx, +- GEN_INT (2+4), GEN_INT (0+4))); +- return; +- +- default: +- gcc_unreachable (); +- } +- break; +- +- case E_V4SImode: +- use_vec_merge = TARGET_SSE4_1; +- if (use_vec_merge) +- break; +- +- /* Element 0 handled by vec_merge below. */ +- if (elt == 0) +- { +- use_vec_merge = true; +- break; +- } +- +- if (TARGET_SSE2) +- { +- /* With SSE2, use integer shuffles to swap element 0 and ELT, +- store into element 0, then shuffle them back. */ +- +- rtx order[4]; +- +- order[0] = GEN_INT (elt); +- order[1] = const1_rtx; +- order[2] = const2_rtx; +- order[3] = GEN_INT (3); +- order[elt] = const0_rtx; +- +- emit_insn (gen_sse2_pshufd_1 (target, target, order[0], +- order[1], order[2], order[3])); +- +- ix86_expand_vector_set (false, target, val, 0); +- +- emit_insn (gen_sse2_pshufd_1 (target, target, order[0], +- order[1], order[2], order[3])); +- } +- else +- { +- /* For SSE1, we have to reuse the V4SF code. */ +- rtx t = gen_reg_rtx (V4SFmode); +- emit_move_insn (t, gen_lowpart (V4SFmode, target)); +- ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt); +- emit_move_insn (target, gen_lowpart (mode, t)); +- } +- return; +- +- case E_V8HImode: +- use_vec_merge = TARGET_SSE2; +- break; +- case E_V4HImode: +- use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A); +- break; +- +- case E_V16QImode: +- use_vec_merge = TARGET_SSE4_1; +- break; +- +- case E_V8QImode: +- break; +- +- case E_V32QImode: +- half_mode = V16QImode; +- j = 0; +- n = 16; +- goto half; +- +- case E_V16HImode: +- half_mode = V8HImode; +- j = 1; +- n = 8; +- goto half; +- +- case E_V8SImode: +- half_mode = V4SImode; +- j = 2; +- n = 4; +- goto half; +- +- case E_V4DImode: +- half_mode = V2DImode; +- j = 3; +- n = 2; +- goto half; +- +- case E_V8SFmode: +- half_mode = V4SFmode; +- j = 4; +- n = 4; +- goto half; +- +- case E_V4DFmode: +- half_mode = V2DFmode; +- j = 5; +- n = 2; +- goto half; +- +-half: +- /* Compute offset. */ +- i = elt / n; +- elt %= n; +- +- gcc_assert (i <= 1); +- +- /* Extract the half. */ +- tmp = gen_reg_rtx (half_mode); +- emit_insn (gen_extract[j][i] (tmp, target)); +- +- /* Put val in tmp at elt. */ +- ix86_expand_vector_set (false, tmp, val, elt); +- +- /* Put it back. */ +- emit_insn (gen_insert[j][i] (target, target, tmp)); +- return; +- +- case E_V8DFmode: +- if (TARGET_AVX512F) +- { +- mmode = QImode; +- gen_blendm = gen_avx512f_blendmv8df; +- } +- break; +- +- case E_V8DImode: +- if (TARGET_AVX512F) +- { +- mmode = QImode; +- gen_blendm = gen_avx512f_blendmv8di; +- } +- break; +- +- case E_V16SFmode: +- if (TARGET_AVX512F) +- { +- mmode = HImode; +- gen_blendm = gen_avx512f_blendmv16sf; +- } +- break; +- +- case E_V16SImode: +- if (TARGET_AVX512F) +- { +- mmode = HImode; +- gen_blendm = gen_avx512f_blendmv16si; +- } +- break; +- +- case E_V32HImode: +- if (TARGET_AVX512BW) +- { +- mmode = SImode; +- gen_blendm = gen_avx512bw_blendmv32hi; +- } +- else if (TARGET_AVX512F) +- { +- half_mode = E_V8HImode; +- n = 8; +- goto quarter; +- } +- break; +- +- case E_V64QImode: +- if (TARGET_AVX512BW) +- { +- mmode = DImode; +- gen_blendm = gen_avx512bw_blendmv64qi; +- } +- else if (TARGET_AVX512F) +- { +- half_mode = E_V16QImode; +- n = 16; +- goto quarter; +- } +- break; +- +-quarter: +- /* Compute offset. */ +- i = elt / n; +- elt %= n; +- +- gcc_assert (i <= 3); +- +- { +- /* Extract the quarter. */ +- tmp = gen_reg_rtx (V4SImode); +- rtx tmp2 = gen_lowpart (V16SImode, target); +- rtx mask = gen_reg_rtx (QImode); +- +- emit_move_insn (mask, constm1_rtx); +- emit_insn (gen_avx512f_vextracti32x4_mask (tmp, tmp2, GEN_INT (i), +- tmp, mask)); +- +- tmp2 = gen_reg_rtx (half_mode); +- emit_move_insn (tmp2, gen_lowpart (half_mode, tmp)); +- tmp = tmp2; +- +- /* Put val in tmp at elt. */ +- ix86_expand_vector_set (false, tmp, val, elt); +- +- /* Put it back. */ +- tmp2 = gen_reg_rtx (V16SImode); +- rtx tmp3 = gen_lowpart (V16SImode, target); +- mask = gen_reg_rtx (HImode); +- emit_move_insn (mask, constm1_rtx); +- tmp = gen_lowpart (V4SImode, tmp); +- emit_insn (gen_avx512f_vinserti32x4_mask (tmp2, tmp3, tmp, GEN_INT (i), +- tmp3, mask)); +- emit_move_insn (target, gen_lowpart (mode, tmp2)); +- } +- return; +- +- default: +- break; +- } +- +- if (mmode != VOIDmode) +- { +- tmp = gen_reg_rtx (mode); +- emit_insn (gen_rtx_SET (tmp, gen_rtx_VEC_DUPLICATE (mode, val))); +- /* The avx512*_blendm expanders have different operand order +- from VEC_MERGE. In VEC_MERGE, the first input operand is used for +- elements where the mask is set and second input operand otherwise, +- in {sse,avx}*_*blend* the first input operand is used for elements +- where the mask is clear and second input operand otherwise. */ +- emit_insn (gen_blendm (target, target, tmp, +- force_reg (mmode, +- gen_int_mode (HOST_WIDE_INT_1U << elt, +- mmode)))); +- } +- else if (use_vec_merge) +- { +- tmp = gen_rtx_VEC_DUPLICATE (mode, val); +- tmp = gen_rtx_VEC_MERGE (mode, tmp, target, +- GEN_INT (HOST_WIDE_INT_1U << elt)); +- emit_insn (gen_rtx_SET (target, tmp)); +- } +- else +- { +- rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode)); +- +- emit_move_insn (mem, target); +- +- tmp = adjust_address (mem, inner_mode, elt * GET_MODE_SIZE (inner_mode)); +- emit_move_insn (tmp, val); +- +- emit_move_insn (target, mem); +- } +-} +- +-void +-ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt) +-{ +- machine_mode mode = GET_MODE (vec); +- machine_mode inner_mode = GET_MODE_INNER (mode); +- bool use_vec_extr = false; +- rtx tmp; +- +- switch (mode) +- { +- case E_V2SImode: +- case E_V2SFmode: +- if (!mmx_ok) +- break; +- /* FALLTHRU */ +- +- case E_V2DFmode: +- case E_V2DImode: +- case E_V2TImode: +- case E_V4TImode: +- use_vec_extr = true; +- break; +- +- case E_V4SFmode: +- use_vec_extr = TARGET_SSE4_1; +- if (use_vec_extr) +- break; +- +- switch (elt) +- { +- case 0: +- tmp = vec; +- break; +- +- case 1: +- case 3: +- tmp = gen_reg_rtx (mode); +- emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec, +- GEN_INT (elt), GEN_INT (elt), +- GEN_INT (elt+4), GEN_INT (elt+4))); +- break; +- +- case 2: +- tmp = gen_reg_rtx (mode); +- emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec)); +- break; +- +- default: +- gcc_unreachable (); +- } +- vec = tmp; +- use_vec_extr = true; +- elt = 0; +- break; +- +- case E_V4SImode: +- use_vec_extr = TARGET_SSE4_1; +- if (use_vec_extr) +- break; +- +- if (TARGET_SSE2) +- { +- switch (elt) +- { +- case 0: +- tmp = vec; +- break; +- +- case 1: +- case 3: +- tmp = gen_reg_rtx (mode); +- emit_insn (gen_sse2_pshufd_1 (tmp, vec, +- GEN_INT (elt), GEN_INT (elt), +- GEN_INT (elt), GEN_INT (elt))); +- break; +- +- case 2: +- tmp = gen_reg_rtx (mode); +- emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec)); +- break; +- +- default: +- gcc_unreachable (); +- } +- vec = tmp; +- use_vec_extr = true; +- elt = 0; +- } +- else +- { +- /* For SSE1, we have to reuse the V4SF code. */ +- ix86_expand_vector_extract (false, gen_lowpart (SFmode, target), +- gen_lowpart (V4SFmode, vec), elt); +- return; +- } +- break; +- +- case E_V8HImode: +- use_vec_extr = TARGET_SSE2; +- break; +- case E_V4HImode: +- use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A); +- break; +- +- case E_V16QImode: +- use_vec_extr = TARGET_SSE4_1; +- break; +- +- case E_V8SFmode: +- if (TARGET_AVX) +- { +- tmp = gen_reg_rtx (V4SFmode); +- if (elt < 4) +- emit_insn (gen_vec_extract_lo_v8sf (tmp, vec)); +- else +- emit_insn (gen_vec_extract_hi_v8sf (tmp, vec)); +- ix86_expand_vector_extract (false, target, tmp, elt & 3); +- return; +- } +- break; +- +- case E_V4DFmode: +- if (TARGET_AVX) +- { +- tmp = gen_reg_rtx (V2DFmode); +- if (elt < 2) +- emit_insn (gen_vec_extract_lo_v4df (tmp, vec)); +- else +- emit_insn (gen_vec_extract_hi_v4df (tmp, vec)); +- ix86_expand_vector_extract (false, target, tmp, elt & 1); +- return; +- } +- break; +- +- case E_V32QImode: +- if (TARGET_AVX) +- { +- tmp = gen_reg_rtx (V16QImode); +- if (elt < 16) +- emit_insn (gen_vec_extract_lo_v32qi (tmp, vec)); +- else +- emit_insn (gen_vec_extract_hi_v32qi (tmp, vec)); +- ix86_expand_vector_extract (false, target, tmp, elt & 15); +- return; +- } +- break; +- +- case E_V16HImode: +- if (TARGET_AVX) +- { +- tmp = gen_reg_rtx (V8HImode); +- if (elt < 8) +- emit_insn (gen_vec_extract_lo_v16hi (tmp, vec)); +- else +- emit_insn (gen_vec_extract_hi_v16hi (tmp, vec)); +- ix86_expand_vector_extract (false, target, tmp, elt & 7); +- return; +- } +- break; +- +- case E_V8SImode: +- if (TARGET_AVX) +- { +- tmp = gen_reg_rtx (V4SImode); +- if (elt < 4) +- emit_insn (gen_vec_extract_lo_v8si (tmp, vec)); +- else +- emit_insn (gen_vec_extract_hi_v8si (tmp, vec)); +- ix86_expand_vector_extract (false, target, tmp, elt & 3); +- return; +- } +- break; +- +- case E_V4DImode: +- if (TARGET_AVX) +- { +- tmp = gen_reg_rtx (V2DImode); +- if (elt < 2) +- emit_insn (gen_vec_extract_lo_v4di (tmp, vec)); +- else +- emit_insn (gen_vec_extract_hi_v4di (tmp, vec)); +- ix86_expand_vector_extract (false, target, tmp, elt & 1); +- return; +- } +- break; +- +- case E_V32HImode: +- if (TARGET_AVX512BW) +- { +- tmp = gen_reg_rtx (V16HImode); +- if (elt < 16) +- emit_insn (gen_vec_extract_lo_v32hi (tmp, vec)); +- else +- emit_insn (gen_vec_extract_hi_v32hi (tmp, vec)); +- ix86_expand_vector_extract (false, target, tmp, elt & 15); +- return; +- } +- break; +- +- case E_V64QImode: +- if (TARGET_AVX512BW) +- { +- tmp = gen_reg_rtx (V32QImode); +- if (elt < 32) +- emit_insn (gen_vec_extract_lo_v64qi (tmp, vec)); +- else +- emit_insn (gen_vec_extract_hi_v64qi (tmp, vec)); +- ix86_expand_vector_extract (false, target, tmp, elt & 31); +- return; +- } +- break; +- +- case E_V16SFmode: +- tmp = gen_reg_rtx (V8SFmode); +- if (elt < 8) +- emit_insn (gen_vec_extract_lo_v16sf (tmp, vec)); +- else +- emit_insn (gen_vec_extract_hi_v16sf (tmp, vec)); +- ix86_expand_vector_extract (false, target, tmp, elt & 7); +- return; +- +- case E_V8DFmode: +- tmp = gen_reg_rtx (V4DFmode); +- if (elt < 4) +- emit_insn (gen_vec_extract_lo_v8df (tmp, vec)); +- else +- emit_insn (gen_vec_extract_hi_v8df (tmp, vec)); +- ix86_expand_vector_extract (false, target, tmp, elt & 3); +- return; +- +- case E_V16SImode: +- tmp = gen_reg_rtx (V8SImode); +- if (elt < 8) +- emit_insn (gen_vec_extract_lo_v16si (tmp, vec)); +- else +- emit_insn (gen_vec_extract_hi_v16si (tmp, vec)); +- ix86_expand_vector_extract (false, target, tmp, elt & 7); +- return; +- +- case E_V8DImode: +- tmp = gen_reg_rtx (V4DImode); +- if (elt < 4) +- emit_insn (gen_vec_extract_lo_v8di (tmp, vec)); +- else +- emit_insn (gen_vec_extract_hi_v8di (tmp, vec)); +- ix86_expand_vector_extract (false, target, tmp, elt & 3); +- return; +- +- case E_V8QImode: +- /* ??? Could extract the appropriate HImode element and shift. */ +- default: +- break; +- } +- +- if (use_vec_extr) +- { +- tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt))); +- tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp); +- +- /* Let the rtl optimizers know about the zero extension performed. */ +- if (inner_mode == QImode || inner_mode == HImode) +- { +- tmp = gen_rtx_ZERO_EXTEND (SImode, tmp); +- target = gen_lowpart (SImode, target); +- } +- +- emit_insn (gen_rtx_SET (target, tmp)); +- } +- else +- { +- rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode)); +- +- emit_move_insn (mem, vec); +- +- tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode)); +- emit_move_insn (target, tmp); +- } +-} +- +-/* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC +- to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode. +- The upper bits of DEST are undefined, though they shouldn't cause +- exceptions (some bits from src or all zeros are ok). */ +- +-static void +-emit_reduc_half (rtx dest, rtx src, int i) +-{ +- rtx tem, d = dest; +- switch (GET_MODE (src)) +- { +- case E_V4SFmode: +- if (i == 128) +- tem = gen_sse_movhlps (dest, src, src); +- else +- tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx, +- GEN_INT (1 + 4), GEN_INT (1 + 4)); +- break; +- case E_V2DFmode: +- tem = gen_vec_interleave_highv2df (dest, src, src); +- break; +- case E_V16QImode: +- case E_V8HImode: +- case E_V4SImode: +- case E_V2DImode: +- d = gen_reg_rtx (V1TImode); +- tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src), +- GEN_INT (i / 2)); +- break; +- case E_V8SFmode: +- if (i == 256) +- tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx); +- else +- tem = gen_avx_shufps256 (dest, src, src, +- GEN_INT (i == 128 ? 2 + (3 << 2) : 1)); +- break; +- case E_V4DFmode: +- if (i == 256) +- tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx); +- else +- tem = gen_avx_shufpd256 (dest, src, src, const1_rtx); +- break; +- case E_V32QImode: +- case E_V16HImode: +- case E_V8SImode: +- case E_V4DImode: +- if (i == 256) +- { +- if (GET_MODE (dest) != V4DImode) +- d = gen_reg_rtx (V4DImode); +- tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src), +- gen_lowpart (V4DImode, src), +- const1_rtx); +- } +- else +- { +- d = gen_reg_rtx (V2TImode); +- tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src), +- GEN_INT (i / 2)); +- } +- break; +- case E_V64QImode: +- case E_V32HImode: +- case E_V16SImode: +- case E_V16SFmode: +- case E_V8DImode: +- case E_V8DFmode: +- if (i > 128) +- tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest), +- gen_lowpart (V16SImode, src), +- gen_lowpart (V16SImode, src), +- GEN_INT (0x4 + (i == 512 ? 4 : 0)), +- GEN_INT (0x5 + (i == 512 ? 4 : 0)), +- GEN_INT (0x6 + (i == 512 ? 4 : 0)), +- GEN_INT (0x7 + (i == 512 ? 4 : 0)), +- GEN_INT (0xC), GEN_INT (0xD), +- GEN_INT (0xE), GEN_INT (0xF), +- GEN_INT (0x10), GEN_INT (0x11), +- GEN_INT (0x12), GEN_INT (0x13), +- GEN_INT (0x14), GEN_INT (0x15), +- GEN_INT (0x16), GEN_INT (0x17)); +- else +- tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest), +- gen_lowpart (V16SImode, src), +- GEN_INT (i == 128 ? 0x2 : 0x1), +- GEN_INT (0x3), +- GEN_INT (0x3), +- GEN_INT (0x3), +- GEN_INT (i == 128 ? 0x6 : 0x5), +- GEN_INT (0x7), +- GEN_INT (0x7), +- GEN_INT (0x7), +- GEN_INT (i == 128 ? 0xA : 0x9), +- GEN_INT (0xB), +- GEN_INT (0xB), +- GEN_INT (0xB), +- GEN_INT (i == 128 ? 0xE : 0xD), +- GEN_INT (0xF), +- GEN_INT (0xF), +- GEN_INT (0xF)); +- break; +- default: +- gcc_unreachable (); +- } +- emit_insn (tem); +- if (d != dest) +- emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d)); +-} +- +-/* Expand a vector reduction. FN is the binary pattern to reduce; +- DEST is the destination; IN is the input vector. */ +- +-void +-ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in) +-{ +- rtx half, dst, vec = in; +- machine_mode mode = GET_MODE (in); +- int i; +- +- /* SSE4 has a special instruction for V8HImode UMIN reduction. */ +- if (TARGET_SSE4_1 +- && mode == V8HImode +- && fn == gen_uminv8hi3) +- { +- emit_insn (gen_sse4_1_phminposuw (dest, in)); +- return; +- } +- +- for (i = GET_MODE_BITSIZE (mode); +- i > GET_MODE_UNIT_BITSIZE (mode); +- i >>= 1) +- { +- half = gen_reg_rtx (mode); +- emit_reduc_half (half, vec, i); +- if (i == GET_MODE_UNIT_BITSIZE (mode) * 2) +- dst = dest; +- else +- dst = gen_reg_rtx (mode); +- emit_insn (fn (dst, half, vec)); +- vec = dst; +- } +-} +- +-/* Target hook for scalar_mode_supported_p. */ +-static bool +-ix86_scalar_mode_supported_p (scalar_mode mode) +-{ +- if (DECIMAL_FLOAT_MODE_P (mode)) +- return default_decimal_float_supported_p (); +- else if (mode == TFmode) +- return true; +- else +- return default_scalar_mode_supported_p (mode); +-} +- +-/* Implements target hook vector_mode_supported_p. */ +-static bool +-ix86_vector_mode_supported_p (machine_mode mode) +-{ +- if (TARGET_SSE && VALID_SSE_REG_MODE (mode)) +- return true; +- if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode)) +- return true; +- if (TARGET_AVX && VALID_AVX256_REG_MODE (mode)) +- return true; +- if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode)) +- return true; +- if (TARGET_MMX && VALID_MMX_REG_MODE (mode)) +- return true; +- if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode)) +- return true; +- return false; +-} +- +-/* Target hook for c_mode_for_suffix. */ +-static machine_mode +-ix86_c_mode_for_suffix (char suffix) +-{ +- if (suffix == 'q') +- return TFmode; +- if (suffix == 'w') +- return XFmode; +- +- return VOIDmode; +-} +- +-/* Worker function for TARGET_MD_ASM_ADJUST. +- +- We implement asm flag outputs, and maintain source compatibility +- with the old cc0-based compiler. */ +- +-static rtx_insn * +-ix86_md_asm_adjust (vec &outputs, vec &/*inputs*/, +- vec &constraints, +- vec &clobbers, HARD_REG_SET &clobbered_regs) +-{ +- bool saw_asm_flag = false; +- +- start_sequence (); +- for (unsigned i = 0, n = outputs.length (); i < n; ++i) +- { +- const char *con = constraints[i]; +- if (strncmp (con, "=@cc", 4) != 0) +- continue; +- con += 4; +- if (strchr (con, ',') != NULL) +- { +- error ("alternatives not allowed in asm flag output"); +- continue; +- } +- +- bool invert = false; +- if (con[0] == 'n') +- invert = true, con++; +- +- machine_mode mode = CCmode; +- rtx_code code = UNKNOWN; +- +- switch (con[0]) +- { +- case 'a': +- if (con[1] == 0) +- mode = CCAmode, code = EQ; +- else if (con[1] == 'e' && con[2] == 0) +- mode = CCCmode, code = NE; +- break; +- case 'b': +- if (con[1] == 0) +- mode = CCCmode, code = EQ; +- else if (con[1] == 'e' && con[2] == 0) +- mode = CCAmode, code = NE; +- break; +- case 'c': +- if (con[1] == 0) +- mode = CCCmode, code = EQ; +- break; +- case 'e': +- if (con[1] == 0) +- mode = CCZmode, code = EQ; +- break; +- case 'g': +- if (con[1] == 0) +- mode = CCGCmode, code = GT; +- else if (con[1] == 'e' && con[2] == 0) +- mode = CCGCmode, code = GE; +- break; +- case 'l': +- if (con[1] == 0) +- mode = CCGCmode, code = LT; +- else if (con[1] == 'e' && con[2] == 0) +- mode = CCGCmode, code = LE; +- break; +- case 'o': +- if (con[1] == 0) +- mode = CCOmode, code = EQ; +- break; +- case 'p': +- if (con[1] == 0) +- mode = CCPmode, code = EQ; +- break; +- case 's': +- if (con[1] == 0) +- mode = CCSmode, code = EQ; +- break; +- case 'z': +- if (con[1] == 0) +- mode = CCZmode, code = EQ; +- break; +- } +- if (code == UNKNOWN) +- { +- error ("unknown asm flag output %qs", constraints[i]); +- continue; +- } +- if (invert) +- code = reverse_condition (code); +- +- rtx dest = outputs[i]; +- if (!saw_asm_flag) +- { +- /* This is the first asm flag output. Here we put the flags +- register in as the real output and adjust the condition to +- allow it. */ +- constraints[i] = "=Bf"; +- outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG); +- saw_asm_flag = true; +- } +- else +- { +- /* We don't need the flags register as output twice. */ +- constraints[i] = "=X"; +- outputs[i] = gen_rtx_SCRATCH (SImode); +- } +- +- rtx x = gen_rtx_REG (mode, FLAGS_REG); +- x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx); +- +- machine_mode dest_mode = GET_MODE (dest); +- if (!SCALAR_INT_MODE_P (dest_mode)) +- { +- error ("invalid type for asm flag output"); +- continue; +- } +- +- if (dest_mode == DImode && !TARGET_64BIT) +- dest_mode = SImode; +- +- if (dest_mode != QImode) +- { +- rtx destqi = gen_reg_rtx (QImode); +- emit_insn (gen_rtx_SET (destqi, x)); +- +- if (TARGET_ZERO_EXTEND_WITH_AND +- && optimize_function_for_speed_p (cfun)) +- { +- x = force_reg (dest_mode, const0_rtx); +- +- emit_insn (gen_movstrictqi (gen_lowpart (QImode, x), destqi)); +- } +- else +- { +- x = gen_rtx_ZERO_EXTEND (dest_mode, destqi); +- if (dest_mode == GET_MODE (dest) +- && !register_operand (dest, GET_MODE (dest))) +- x = force_reg (dest_mode, x); +- } +- } +- +- if (dest_mode != GET_MODE (dest)) +- { +- rtx tmp = gen_reg_rtx (SImode); +- +- emit_insn (gen_rtx_SET (tmp, x)); +- emit_insn (gen_zero_extendsidi2 (dest, tmp)); +- } +- else +- emit_insn (gen_rtx_SET (dest, x)); +- } +- rtx_insn *seq = get_insns (); +- end_sequence (); +- +- if (saw_asm_flag) +- return seq; +- else +- { +- /* If we had no asm flag outputs, clobber the flags. */ +- clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG)); +- SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG); +- return NULL; +- } +-} +- +-/* Implements target vector targetm.asm.encode_section_info. */ +- +-static void ATTRIBUTE_UNUSED +-ix86_encode_section_info (tree decl, rtx rtl, int first) +-{ +- default_encode_section_info (decl, rtl, first); +- +- if (ix86_in_large_data_p (decl)) +- SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR; +-} +- +-/* Worker function for REVERSE_CONDITION. */ +- +-enum rtx_code +-ix86_reverse_condition (enum rtx_code code, machine_mode mode) +-{ +- return (mode == CCFPmode +- ? reverse_condition_maybe_unordered (code) +- : reverse_condition (code)); +-} +- +-/* Output code to perform an x87 FP register move, from OPERANDS[1] +- to OPERANDS[0]. */ +- +-const char * +-output_387_reg_move (rtx_insn *insn, rtx *operands) +-{ +- if (REG_P (operands[0])) +- { +- if (REG_P (operands[1]) +- && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) +- { +- if (REGNO (operands[0]) == FIRST_STACK_REG) +- return output_387_ffreep (operands, 0); +- return "fstp\t%y0"; +- } +- if (STACK_TOP_P (operands[0])) +- return "fld%Z1\t%y1"; +- return "fst\t%y0"; +- } +- else if (MEM_P (operands[0])) +- { +- gcc_assert (REG_P (operands[1])); +- if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) +- return "fstp%Z0\t%y0"; +- else +- { +- /* There is no non-popping store to memory for XFmode. +- So if we need one, follow the store with a load. */ +- if (GET_MODE (operands[0]) == XFmode) +- return "fstp%Z0\t%y0\n\tfld%Z0\t%y0"; +- else +- return "fst%Z0\t%y0"; +- } +- } +- else +- gcc_unreachable(); +-} +- +-/* Output code to perform a conditional jump to LABEL, if C2 flag in +- FP status register is set. */ +- +-void +-ix86_emit_fp_unordered_jump (rtx label) +-{ +- rtx reg = gen_reg_rtx (HImode); +- rtx_insn *insn; +- rtx temp; +- +- emit_insn (gen_x86_fnstsw_1 (reg)); +- +- if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ())) +- { +- emit_insn (gen_x86_sahf_1 (reg)); +- +- temp = gen_rtx_REG (CCmode, FLAGS_REG); +- temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx); +- } +- else +- { +- emit_insn (gen_testqi_ext_1_ccno (reg, GEN_INT (0x04))); +- +- temp = gen_rtx_REG (CCNOmode, FLAGS_REG); +- temp = gen_rtx_NE (VOIDmode, temp, const0_rtx); +- } +- +- temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp, +- gen_rtx_LABEL_REF (VOIDmode, label), +- pc_rtx); +- insn = emit_jump_insn (gen_rtx_SET (pc_rtx, temp)); +- predict_jump (REG_BR_PROB_BASE * 10 / 100); +- JUMP_LABEL (insn) = label; +-} +- +-/* Output code to perform an sinh XFmode calculation. */ +- +-void ix86_emit_i387_sinh (rtx op0, rtx op1) +-{ +- rtx e1 = gen_reg_rtx (XFmode); +- rtx e2 = gen_reg_rtx (XFmode); +- rtx scratch = gen_reg_rtx (HImode); +- rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG); +- rtx half = const_double_from_real_value (dconsthalf, XFmode); +- rtx cst1, tmp; +- rtx_code_label *jump_label = gen_label_rtx (); +- rtx_insn *insn; +- +- /* scratch = fxam (op1) */ +- emit_insn (gen_fxamxf2_i387 (scratch, op1)); +- +- /* e1 = expm1 (|op1|) */ +- emit_insn (gen_absxf2 (e2, op1)); +- emit_insn (gen_expm1xf2 (e1, e2)); +- +- /* e2 = e1 / (e1 + 1.0) + e1 */ +- cst1 = force_reg (XFmode, CONST1_RTX (XFmode)); +- emit_insn (gen_addxf3 (e2, e1, cst1)); +- emit_insn (gen_divxf3 (e2, e1, e2)); +- emit_insn (gen_addxf3 (e2, e2, e1)); +- +- /* flags = signbit (op1) */ +- emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x02))); +- +- /* if (flags) then e2 = -e2 */ +- tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, +- gen_rtx_EQ (VOIDmode, flags, const0_rtx), +- gen_rtx_LABEL_REF (VOIDmode, jump_label), +- pc_rtx); +- insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); +- predict_jump (REG_BR_PROB_BASE * 50 / 100); +- JUMP_LABEL (insn) = jump_label; +- +- emit_insn (gen_negxf2 (e2, e2)); +- +- emit_label (jump_label); +- LABEL_NUSES (jump_label) = 1; +- +- /* op0 = 0.5 * e2 */ +- half = force_reg (XFmode, half); +- emit_insn (gen_mulxf3 (op0, e2, half)); +-} +- +-/* Output code to perform an cosh XFmode calculation. */ +- +-void ix86_emit_i387_cosh (rtx op0, rtx op1) +-{ +- rtx e1 = gen_reg_rtx (XFmode); +- rtx e2 = gen_reg_rtx (XFmode); +- rtx half = const_double_from_real_value (dconsthalf, XFmode); +- rtx cst1; +- +- /* e1 = exp (op1) */ +- emit_insn (gen_expxf2 (e1, op1)); +- +- /* e2 = e1 + 1.0 / e1 */ +- cst1 = force_reg (XFmode, CONST1_RTX (XFmode)); +- emit_insn (gen_divxf3 (e2, cst1, e1)); +- emit_insn (gen_addxf3 (e2, e1, e2)); +- +- /* op0 = 0.5 * e2 */ +- half = force_reg (XFmode, half); +- emit_insn (gen_mulxf3 (op0, e2, half)); +-} +- +-/* Output code to perform an tanh XFmode calculation. */ +- +-void ix86_emit_i387_tanh (rtx op0, rtx op1) +-{ +- rtx e1 = gen_reg_rtx (XFmode); +- rtx e2 = gen_reg_rtx (XFmode); +- rtx scratch = gen_reg_rtx (HImode); +- rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG); +- rtx cst2, tmp; +- rtx_code_label *jump_label = gen_label_rtx (); +- rtx_insn *insn; +- +- /* scratch = fxam (op1) */ +- emit_insn (gen_fxamxf2_i387 (scratch, op1)); +- +- /* e1 = expm1 (-|2 * op1|) */ +- emit_insn (gen_addxf3 (e2, op1, op1)); +- emit_insn (gen_absxf2 (e2, e2)); +- emit_insn (gen_negxf2 (e2, e2)); +- emit_insn (gen_expm1xf2 (e1, e2)); +- +- /* e2 = e1 / (e1 + 2.0) */ +- cst2 = force_reg (XFmode, CONST2_RTX (XFmode)); +- emit_insn (gen_addxf3 (e2, e1, cst2)); +- emit_insn (gen_divxf3 (e2, e1, e2)); +- +- /* flags = signbit (op1) */ +- emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x02))); +- +- /* if (!flags) then e2 = -e2 */ +- tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, +- gen_rtx_NE (VOIDmode, flags, const0_rtx), +- gen_rtx_LABEL_REF (VOIDmode, jump_label), +- pc_rtx); +- insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); +- predict_jump (REG_BR_PROB_BASE * 50 / 100); +- JUMP_LABEL (insn) = jump_label; +- +- emit_insn (gen_negxf2 (e2, e2)); +- +- emit_label (jump_label); +- LABEL_NUSES (jump_label) = 1; +- +- emit_move_insn (op0, e2); +-} +- +-/* Output code to perform an asinh XFmode calculation. */ +- +-void ix86_emit_i387_asinh (rtx op0, rtx op1) +-{ +- rtx e1 = gen_reg_rtx (XFmode); +- rtx e2 = gen_reg_rtx (XFmode); +- rtx scratch = gen_reg_rtx (HImode); +- rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG); +- rtx cst1, tmp; +- rtx_code_label *jump_label = gen_label_rtx (); +- rtx_insn *insn; +- +- /* e2 = sqrt (op1^2 + 1.0) + 1.0 */ +- emit_insn (gen_mulxf3 (e1, op1, op1)); +- cst1 = force_reg (XFmode, CONST1_RTX (XFmode)); +- emit_insn (gen_addxf3 (e2, e1, cst1)); +- emit_insn (gen_sqrtxf2 (e2, e2)); +- emit_insn (gen_addxf3 (e2, e2, cst1)); +- +- /* e1 = e1 / e2 */ +- emit_insn (gen_divxf3 (e1, e1, e2)); +- +- /* scratch = fxam (op1) */ +- emit_insn (gen_fxamxf2_i387 (scratch, op1)); +- +- /* e1 = e1 + |op1| */ +- emit_insn (gen_absxf2 (e2, op1)); +- emit_insn (gen_addxf3 (e1, e1, e2)); +- +- /* e2 = log1p (e1) */ +- ix86_emit_i387_log1p (e2, e1); +- +- /* flags = signbit (op1) */ +- emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x02))); +- +- /* if (flags) then e2 = -e2 */ +- tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, +- gen_rtx_EQ (VOIDmode, flags, const0_rtx), +- gen_rtx_LABEL_REF (VOIDmode, jump_label), +- pc_rtx); +- insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); +- predict_jump (REG_BR_PROB_BASE * 50 / 100); +- JUMP_LABEL (insn) = jump_label; +- +- emit_insn (gen_negxf2 (e2, e2)); +- +- emit_label (jump_label); +- LABEL_NUSES (jump_label) = 1; +- +- emit_move_insn (op0, e2); +-} +- +-/* Output code to perform an acosh XFmode calculation. */ +- +-void ix86_emit_i387_acosh (rtx op0, rtx op1) +-{ +- rtx e1 = gen_reg_rtx (XFmode); +- rtx e2 = gen_reg_rtx (XFmode); +- rtx cst1 = force_reg (XFmode, CONST1_RTX (XFmode)); +- +- /* e2 = sqrt (op1 + 1.0) */ +- emit_insn (gen_addxf3 (e2, op1, cst1)); +- emit_insn (gen_sqrtxf2 (e2, e2)); +- +- /* e1 = sqrt (op1 - 1.0) */ +- emit_insn (gen_subxf3 (e1, op1, cst1)); +- emit_insn (gen_sqrtxf2 (e1, e1)); +- +- /* e1 = e1 * e2 */ +- emit_insn (gen_mulxf3 (e1, e1, e2)); +- +- /* e1 = e1 + op1 */ +- emit_insn (gen_addxf3 (e1, e1, op1)); +- +- /* op0 = log (e1) */ +- emit_insn (gen_logxf2 (op0, e1)); +-} +- +-/* Output code to perform an atanh XFmode calculation. */ +- +-void ix86_emit_i387_atanh (rtx op0, rtx op1) +-{ +- rtx e1 = gen_reg_rtx (XFmode); +- rtx e2 = gen_reg_rtx (XFmode); +- rtx scratch = gen_reg_rtx (HImode); +- rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG); +- rtx half = const_double_from_real_value (dconsthalf, XFmode); +- rtx cst1, tmp; +- rtx_code_label *jump_label = gen_label_rtx (); +- rtx_insn *insn; +- +- /* scratch = fxam (op1) */ +- emit_insn (gen_fxamxf2_i387 (scratch, op1)); +- +- /* e2 = |op1| */ +- emit_insn (gen_absxf2 (e2, op1)); +- +- /* e1 = -(e2 + e2) / (e2 + 1.0) */ +- cst1 = force_reg (XFmode, CONST1_RTX (XFmode)); +- emit_insn (gen_addxf3 (e1, e2, cst1)); +- emit_insn (gen_addxf3 (e2, e2, e2)); +- emit_insn (gen_negxf2 (e2, e2)); +- emit_insn (gen_divxf3 (e1, e2, e1)); +- +- /* e2 = log1p (e1) */ +- ix86_emit_i387_log1p (e2, e1); +- +- /* flags = signbit (op1) */ +- emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x02))); +- +- /* if (!flags) then e2 = -e2 */ +- tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, +- gen_rtx_NE (VOIDmode, flags, const0_rtx), +- gen_rtx_LABEL_REF (VOIDmode, jump_label), +- pc_rtx); +- insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); +- predict_jump (REG_BR_PROB_BASE * 50 / 100); +- JUMP_LABEL (insn) = jump_label; +- +- emit_insn (gen_negxf2 (e2, e2)); +- +- emit_label (jump_label); +- LABEL_NUSES (jump_label) = 1; +- +- /* op0 = 0.5 * e2 */ +- half = force_reg (XFmode, half); +- emit_insn (gen_mulxf3 (op0, e2, half)); +-} +- +-/* Output code to perform a log1p XFmode calculation. */ +- +-void ix86_emit_i387_log1p (rtx op0, rtx op1) +-{ +- rtx_code_label *label1 = gen_label_rtx (); +- rtx_code_label *label2 = gen_label_rtx (); +- +- rtx tmp = gen_reg_rtx (XFmode); +- rtx res = gen_reg_rtx (XFmode); +- rtx cst, cstln2, cst1; +- rtx_insn *insn; +- +- cst = const_double_from_real_value +- (REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode), XFmode); +- cstln2 = force_reg (XFmode, standard_80387_constant_rtx (4)); /* fldln2 */ +- +- emit_insn (gen_absxf2 (tmp, op1)); +- +- cst = force_reg (XFmode, cst); +- ix86_expand_branch (GE, tmp, cst, label1); +- predict_jump (REG_BR_PROB_BASE * 10 / 100); +- insn = get_last_insn (); +- JUMP_LABEL (insn) = label1; +- +- emit_insn (gen_fyl2xp1xf3_i387 (res, op1, cstln2)); +- emit_jump (label2); +- +- emit_label (label1); +- LABEL_NUSES (label1) = 1; +- +- cst1 = force_reg (XFmode, CONST1_RTX (XFmode)); +- emit_insn (gen_rtx_SET (tmp, gen_rtx_PLUS (XFmode, op1, cst1))); +- emit_insn (gen_fyl2xxf3_i387 (res, tmp, cstln2)); +- +- emit_label (label2); +- LABEL_NUSES (label2) = 1; +- +- emit_move_insn (op0, res); +-} +- +-/* Emit code for round calculation. */ +-void ix86_emit_i387_round (rtx op0, rtx op1) +-{ +- machine_mode inmode = GET_MODE (op1); +- machine_mode outmode = GET_MODE (op0); +- rtx e1 = gen_reg_rtx (XFmode); +- rtx e2 = gen_reg_rtx (XFmode); +- rtx scratch = gen_reg_rtx (HImode); +- rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG); +- rtx half = const_double_from_real_value (dconsthalf, XFmode); +- rtx res = gen_reg_rtx (outmode); +- rtx_code_label *jump_label = gen_label_rtx (); +- rtx (*floor_insn) (rtx, rtx); +- rtx (*neg_insn) (rtx, rtx); +- rtx_insn *insn; +- rtx tmp; +- +- switch (inmode) +- { +- case E_SFmode: +- case E_DFmode: +- tmp = gen_reg_rtx (XFmode); +- +- emit_insn (gen_rtx_SET (tmp, gen_rtx_FLOAT_EXTEND (XFmode, op1))); +- op1 = tmp; +- break; +- case E_XFmode: +- break; +- default: +- gcc_unreachable (); +- } +- +- switch (outmode) +- { +- case E_SFmode: +- floor_insn = gen_frndintxf2_floor; +- neg_insn = gen_negsf2; +- break; +- case E_DFmode: +- floor_insn = gen_frndintxf2_floor; +- neg_insn = gen_negdf2; +- break; +- case E_XFmode: +- floor_insn = gen_frndintxf2_floor; +- neg_insn = gen_negxf2; +- break; +- case E_HImode: +- floor_insn = gen_lfloorxfhi2; +- neg_insn = gen_neghi2; +- break; +- case E_SImode: +- floor_insn = gen_lfloorxfsi2; +- neg_insn = gen_negsi2; +- break; +- case E_DImode: +- floor_insn = gen_lfloorxfdi2; +- neg_insn = gen_negdi2; +- break; +- default: +- gcc_unreachable (); +- } +- +- /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */ +- +- /* scratch = fxam(op1) */ +- emit_insn (gen_fxamxf2_i387 (scratch, op1)); +- +- /* e1 = fabs(op1) */ +- emit_insn (gen_absxf2 (e1, op1)); +- +- /* e2 = e1 + 0.5 */ +- half = force_reg (XFmode, half); +- emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (XFmode, e1, half))); +- +- /* res = floor(e2) */ +- switch (outmode) +- { +- case E_SFmode: +- case E_DFmode: +- { +- tmp = gen_reg_rtx (XFmode); +- +- emit_insn (floor_insn (tmp, e2)); +- emit_insn (gen_rtx_SET (res, +- gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp), +- UNSPEC_TRUNC_NOOP))); +- } +- break; +- default: +- emit_insn (floor_insn (res, e2)); +- } +- +- /* flags = signbit(a) */ +- emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x02))); +- +- /* if (flags) then res = -res */ +- tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, +- gen_rtx_EQ (VOIDmode, flags, const0_rtx), +- gen_rtx_LABEL_REF (VOIDmode, jump_label), +- pc_rtx); +- insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); +- predict_jump (REG_BR_PROB_BASE * 50 / 100); +- JUMP_LABEL (insn) = jump_label; +- +- emit_insn (neg_insn (res, res)); +- +- emit_label (jump_label); +- LABEL_NUSES (jump_label) = 1; +- +- emit_move_insn (op0, res); +-} +- +-/* Output code to perform a Newton-Rhapson approximation of a single precision +- floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */ +- +-void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode) +-{ +- rtx x0, x1, e0, e1; +- +- x0 = gen_reg_rtx (mode); +- e0 = gen_reg_rtx (mode); +- e1 = gen_reg_rtx (mode); +- x1 = gen_reg_rtx (mode); +- +- /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */ +- +- b = force_reg (mode, b); +- +- /* x0 = rcp(b) estimate */ +- if (mode == V16SFmode || mode == V8DFmode) +- { +- if (TARGET_AVX512ER) +- { +- emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b), +- UNSPEC_RCP28))); +- /* res = a * x0 */ +- emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x0))); +- return; +- } +- else +- emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b), +- UNSPEC_RCP14))); +- } +- else +- emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b), +- UNSPEC_RCP))); +- +- /* e0 = x0 * b */ +- emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b))); +- +- /* e0 = x0 * e0 */ +- emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0))); +- +- /* e1 = x0 + x0 */ +- emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0))); +- +- /* x1 = e1 - e0 */ +- emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0))); +- +- /* res = a * x1 */ +- emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1))); +-} +- +-/* Output code to perform a Newton-Rhapson approximation of a +- single precision floating point [reciprocal] square root. */ +- +-void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode, bool recip) +-{ +- rtx x0, e0, e1, e2, e3, mthree, mhalf; +- REAL_VALUE_TYPE r; +- int unspec; +- +- x0 = gen_reg_rtx (mode); +- e0 = gen_reg_rtx (mode); +- e1 = gen_reg_rtx (mode); +- e2 = gen_reg_rtx (mode); +- e3 = gen_reg_rtx (mode); +- +- if (TARGET_AVX512ER && mode == V16SFmode) +- { +- if (recip) +- /* res = rsqrt28(a) estimate */ +- emit_insn (gen_rtx_SET (res, gen_rtx_UNSPEC (mode, gen_rtvec (1, a), +- UNSPEC_RSQRT28))); +- else +- { +- /* x0 = rsqrt28(a) estimate */ +- emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, a), +- UNSPEC_RSQRT28))); +- /* res = rcp28(x0) estimate */ +- emit_insn (gen_rtx_SET (res, gen_rtx_UNSPEC (mode, gen_rtvec (1, x0), +- UNSPEC_RCP28))); +- } +- return; +- } +- +- real_from_integer (&r, VOIDmode, -3, SIGNED); +- mthree = const_double_from_real_value (r, SFmode); +- +- real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL); +- mhalf = const_double_from_real_value (r, SFmode); +- unspec = UNSPEC_RSQRT; +- +- if (VECTOR_MODE_P (mode)) +- { +- mthree = ix86_build_const_vector (mode, true, mthree); +- mhalf = ix86_build_const_vector (mode, true, mhalf); +- /* There is no 512-bit rsqrt. There is however rsqrt14. */ +- if (GET_MODE_SIZE (mode) == 64) +- unspec = UNSPEC_RSQRT14; +- } +- +- /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) +- rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */ +- +- a = force_reg (mode, a); +- +- /* x0 = rsqrt(a) estimate */ +- emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, a), +- unspec))); +- +- /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */ +- if (!recip) +- { +- rtx zero = force_reg (mode, CONST0_RTX(mode)); +- rtx mask; +- +- /* Handle masked compare. */ +- if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64) +- { +- mask = gen_reg_rtx (HImode); +- /* Imm value 0x4 corresponds to not-equal comparison. */ +- emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4))); +- emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask)); +- } +- else +- { +- mask = gen_reg_rtx (mode); +- emit_insn (gen_rtx_SET (mask, gen_rtx_NE (mode, zero, a))); +- emit_insn (gen_rtx_SET (x0, gen_rtx_AND (mode, x0, mask))); +- } +- } +- +- /* e0 = x0 * a */ +- emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a))); +- /* e1 = e0 * x0 */ +- emit_insn (gen_rtx_SET (e1, gen_rtx_MULT (mode, e0, x0))); +- +- /* e2 = e1 - 3. */ +- mthree = force_reg (mode, mthree); +- emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (mode, e1, mthree))); +- +- mhalf = force_reg (mode, mhalf); +- if (recip) +- /* e3 = -.5 * x0 */ +- emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, x0, mhalf))); +- else +- /* e3 = -.5 * e0 */ +- emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, e0, mhalf))); +- /* ret = e2 * e3 */ +- emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e2, e3))); +-} +- +-#ifdef TARGET_SOLARIS +-/* Solaris implementation of TARGET_ASM_NAMED_SECTION. */ ++ case IX86_BUILTIN_PSLLD: ++ case IX86_BUILTIN_PSLLD128: ++ case IX86_BUILTIN_PSLLD128_MASK: ++ case IX86_BUILTIN_PSLLD256: ++ case IX86_BUILTIN_PSLLD256_MASK: ++ case IX86_BUILTIN_PSLLD512: ++ case IX86_BUILTIN_PSLLDI: ++ case IX86_BUILTIN_PSLLDI128: ++ case IX86_BUILTIN_PSLLDI128_MASK: ++ case IX86_BUILTIN_PSLLDI256: ++ case IX86_BUILTIN_PSLLDI256_MASK: ++ case IX86_BUILTIN_PSLLDI512: ++ case IX86_BUILTIN_PSLLQ: ++ case IX86_BUILTIN_PSLLQ128: ++ case IX86_BUILTIN_PSLLQ128_MASK: ++ case IX86_BUILTIN_PSLLQ256: ++ case IX86_BUILTIN_PSLLQ256_MASK: ++ case IX86_BUILTIN_PSLLQ512: ++ case IX86_BUILTIN_PSLLQI: ++ case IX86_BUILTIN_PSLLQI128: ++ case IX86_BUILTIN_PSLLQI128_MASK: ++ case IX86_BUILTIN_PSLLQI256: ++ case IX86_BUILTIN_PSLLQI256_MASK: ++ case IX86_BUILTIN_PSLLQI512: ++ case IX86_BUILTIN_PSLLW: ++ case IX86_BUILTIN_PSLLW128: ++ case IX86_BUILTIN_PSLLW128_MASK: ++ case IX86_BUILTIN_PSLLW256: ++ case IX86_BUILTIN_PSLLW256_MASK: ++ case IX86_BUILTIN_PSLLW512_MASK: ++ case IX86_BUILTIN_PSLLWI: ++ case IX86_BUILTIN_PSLLWI128: ++ case IX86_BUILTIN_PSLLWI128_MASK: ++ case IX86_BUILTIN_PSLLWI256: ++ case IX86_BUILTIN_PSLLWI256_MASK: ++ case IX86_BUILTIN_PSLLWI512_MASK: ++ rcode = ASHIFT; ++ is_vshift = false; ++ goto do_shift; ++ case IX86_BUILTIN_PSRAD: ++ case IX86_BUILTIN_PSRAD128: ++ case IX86_BUILTIN_PSRAD128_MASK: ++ case IX86_BUILTIN_PSRAD256: ++ case IX86_BUILTIN_PSRAD256_MASK: ++ case IX86_BUILTIN_PSRAD512: ++ case IX86_BUILTIN_PSRADI: ++ case IX86_BUILTIN_PSRADI128: ++ case IX86_BUILTIN_PSRADI128_MASK: ++ case IX86_BUILTIN_PSRADI256: ++ case IX86_BUILTIN_PSRADI256_MASK: ++ case IX86_BUILTIN_PSRADI512: ++ case IX86_BUILTIN_PSRAQ128_MASK: ++ case IX86_BUILTIN_PSRAQ256_MASK: ++ case IX86_BUILTIN_PSRAQ512: ++ case IX86_BUILTIN_PSRAQI128_MASK: ++ case IX86_BUILTIN_PSRAQI256_MASK: ++ case IX86_BUILTIN_PSRAQI512: ++ case IX86_BUILTIN_PSRAW: ++ case IX86_BUILTIN_PSRAW128: ++ case IX86_BUILTIN_PSRAW128_MASK: ++ case IX86_BUILTIN_PSRAW256: ++ case IX86_BUILTIN_PSRAW256_MASK: ++ case IX86_BUILTIN_PSRAW512: ++ case IX86_BUILTIN_PSRAWI: ++ case IX86_BUILTIN_PSRAWI128: ++ case IX86_BUILTIN_PSRAWI128_MASK: ++ case IX86_BUILTIN_PSRAWI256: ++ case IX86_BUILTIN_PSRAWI256_MASK: ++ case IX86_BUILTIN_PSRAWI512: ++ rcode = ASHIFTRT; ++ is_vshift = false; ++ goto do_shift; ++ case IX86_BUILTIN_PSRLD: ++ case IX86_BUILTIN_PSRLD128: ++ case IX86_BUILTIN_PSRLD128_MASK: ++ case IX86_BUILTIN_PSRLD256: ++ case IX86_BUILTIN_PSRLD256_MASK: ++ case IX86_BUILTIN_PSRLD512: ++ case IX86_BUILTIN_PSRLDI: ++ case IX86_BUILTIN_PSRLDI128: ++ case IX86_BUILTIN_PSRLDI128_MASK: ++ case IX86_BUILTIN_PSRLDI256: ++ case IX86_BUILTIN_PSRLDI256_MASK: ++ case IX86_BUILTIN_PSRLDI512: ++ case IX86_BUILTIN_PSRLQ: ++ case IX86_BUILTIN_PSRLQ128: ++ case IX86_BUILTIN_PSRLQ128_MASK: ++ case IX86_BUILTIN_PSRLQ256: ++ case IX86_BUILTIN_PSRLQ256_MASK: ++ case IX86_BUILTIN_PSRLQ512: ++ case IX86_BUILTIN_PSRLQI: ++ case IX86_BUILTIN_PSRLQI128: ++ case IX86_BUILTIN_PSRLQI128_MASK: ++ case IX86_BUILTIN_PSRLQI256: ++ case IX86_BUILTIN_PSRLQI256_MASK: ++ case IX86_BUILTIN_PSRLQI512: ++ case IX86_BUILTIN_PSRLW: ++ case IX86_BUILTIN_PSRLW128: ++ case IX86_BUILTIN_PSRLW128_MASK: ++ case IX86_BUILTIN_PSRLW256: ++ case IX86_BUILTIN_PSRLW256_MASK: ++ case IX86_BUILTIN_PSRLW512: ++ case IX86_BUILTIN_PSRLWI: ++ case IX86_BUILTIN_PSRLWI128: ++ case IX86_BUILTIN_PSRLWI128_MASK: ++ case IX86_BUILTIN_PSRLWI256: ++ case IX86_BUILTIN_PSRLWI256_MASK: ++ case IX86_BUILTIN_PSRLWI512: ++ rcode = LSHIFTRT; ++ is_vshift = false; ++ goto do_shift; ++ case IX86_BUILTIN_PSLLVV16HI: ++ case IX86_BUILTIN_PSLLVV16SI: ++ case IX86_BUILTIN_PSLLVV2DI: ++ case IX86_BUILTIN_PSLLVV2DI_MASK: ++ case IX86_BUILTIN_PSLLVV32HI: ++ case IX86_BUILTIN_PSLLVV4DI: ++ case IX86_BUILTIN_PSLLVV4DI_MASK: ++ case IX86_BUILTIN_PSLLVV4SI: ++ case IX86_BUILTIN_PSLLVV4SI_MASK: ++ case IX86_BUILTIN_PSLLVV8DI: ++ case IX86_BUILTIN_PSLLVV8HI: ++ case IX86_BUILTIN_PSLLVV8SI: ++ case IX86_BUILTIN_PSLLVV8SI_MASK: ++ rcode = ASHIFT; ++ is_vshift = true; ++ goto do_shift; ++ case IX86_BUILTIN_PSRAVQ128: ++ case IX86_BUILTIN_PSRAVQ256: ++ case IX86_BUILTIN_PSRAVV16HI: ++ case IX86_BUILTIN_PSRAVV16SI: ++ case IX86_BUILTIN_PSRAVV32HI: ++ case IX86_BUILTIN_PSRAVV4SI: ++ case IX86_BUILTIN_PSRAVV4SI_MASK: ++ case IX86_BUILTIN_PSRAVV8DI: ++ case IX86_BUILTIN_PSRAVV8HI: ++ case IX86_BUILTIN_PSRAVV8SI: ++ case IX86_BUILTIN_PSRAVV8SI_MASK: ++ rcode = ASHIFTRT; ++ is_vshift = true; ++ goto do_shift; ++ case IX86_BUILTIN_PSRLVV16HI: ++ case IX86_BUILTIN_PSRLVV16SI: ++ case IX86_BUILTIN_PSRLVV2DI: ++ case IX86_BUILTIN_PSRLVV2DI_MASK: ++ case IX86_BUILTIN_PSRLVV32HI: ++ case IX86_BUILTIN_PSRLVV4DI: ++ case IX86_BUILTIN_PSRLVV4DI_MASK: ++ case IX86_BUILTIN_PSRLVV4SI: ++ case IX86_BUILTIN_PSRLVV4SI_MASK: ++ case IX86_BUILTIN_PSRLVV8DI: ++ case IX86_BUILTIN_PSRLVV8HI: ++ case IX86_BUILTIN_PSRLVV8SI: ++ case IX86_BUILTIN_PSRLVV8SI_MASK: ++ rcode = LSHIFTRT; ++ is_vshift = true; ++ goto do_shift; + +-static void +-i386_solaris_elf_named_section (const char *name, unsigned int flags, +- tree decl) +-{ +- /* With Binutils 2.15, the "@unwind" marker must be specified on +- every occurrence of the ".eh_frame" section, not just the first +- one. */ +- if (TARGET_64BIT +- && strcmp (name, ".eh_frame") == 0) +- { +- fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name, +- flags & SECTION_WRITE ? "aw" : "a"); +- return; +- } ++ do_shift: ++ gcc_assert (n_args >= 2); ++ if (TREE_CODE (args[0]) != VECTOR_CST) ++ break; ++ mask = HOST_WIDE_INT_M1U; ++ if (n_args > 2) ++ { ++ /* This is masked shift. */ ++ if (!tree_fits_uhwi_p (args[n_args - 1]) ++ || TREE_SIDE_EFFECTS (args[n_args - 2])) ++ break; ++ mask = tree_to_uhwi (args[n_args - 1]); ++ unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0])); ++ mask |= HOST_WIDE_INT_M1U << elems; ++ if (mask != HOST_WIDE_INT_M1U ++ && TREE_CODE (args[n_args - 2]) != VECTOR_CST) ++ break; ++ if (mask == (HOST_WIDE_INT_M1U << elems)) ++ return args[n_args - 2]; ++ } ++ if (is_vshift && TREE_CODE (args[1]) != VECTOR_CST) ++ break; ++ if (tree tem = (is_vshift ? integer_one_node ++ : ix86_vector_shift_count (args[1]))) ++ { ++ unsigned HOST_WIDE_INT count = tree_to_uhwi (tem); ++ unsigned HOST_WIDE_INT prec ++ = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (args[0]))); ++ if (count == 0 && mask == HOST_WIDE_INT_M1U) ++ return args[0]; ++ if (count >= prec) ++ { ++ if (rcode == ASHIFTRT) ++ count = prec - 1; ++ else if (mask == HOST_WIDE_INT_M1U) ++ return build_zero_cst (TREE_TYPE (args[0])); ++ } ++ tree countt = NULL_TREE; ++ if (!is_vshift) ++ { ++ if (count >= prec) ++ countt = integer_zero_node; ++ else ++ countt = build_int_cst (integer_type_node, count); ++ } ++ tree_vector_builder builder; ++ if (mask != HOST_WIDE_INT_M1U || is_vshift) ++ builder.new_vector (TREE_TYPE (args[0]), ++ TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0])), ++ 1); ++ else ++ builder.new_unary_operation (TREE_TYPE (args[0]), args[0], ++ false); ++ unsigned int cnt = builder.encoded_nelts (); ++ for (unsigned int i = 0; i < cnt; ++i) ++ { ++ tree elt = VECTOR_CST_ELT (args[0], i); ++ if (TREE_CODE (elt) != INTEGER_CST || TREE_OVERFLOW (elt)) ++ return NULL_TREE; ++ tree type = TREE_TYPE (elt); ++ if (rcode == LSHIFTRT) ++ elt = fold_convert (unsigned_type_for (type), elt); ++ if (is_vshift) ++ { ++ countt = VECTOR_CST_ELT (args[1], i); ++ if (TREE_CODE (countt) != INTEGER_CST ++ || TREE_OVERFLOW (countt)) ++ return NULL_TREE; ++ if (wi::neg_p (wi::to_wide (countt)) ++ || wi::to_widest (countt) >= prec) ++ { ++ if (rcode == ASHIFTRT) ++ countt = build_int_cst (TREE_TYPE (countt), ++ prec - 1); ++ else ++ { ++ elt = build_zero_cst (TREE_TYPE (elt)); ++ countt = build_zero_cst (TREE_TYPE (countt)); ++ } ++ } ++ } ++ else if (count >= prec) ++ elt = build_zero_cst (TREE_TYPE (elt)); ++ elt = const_binop (rcode == ASHIFT ++ ? LSHIFT_EXPR : RSHIFT_EXPR, ++ TREE_TYPE (elt), elt, countt); ++ if (!elt || TREE_CODE (elt) != INTEGER_CST) ++ return NULL_TREE; ++ if (rcode == LSHIFTRT) ++ elt = fold_convert (type, elt); ++ if ((mask & (HOST_WIDE_INT_1U << i)) == 0) ++ { ++ elt = VECTOR_CST_ELT (args[n_args - 2], i); ++ if (TREE_CODE (elt) != INTEGER_CST ++ || TREE_OVERFLOW (elt)) ++ return NULL_TREE; ++ } ++ builder.quick_push (elt); ++ } ++ return builder.build (); ++ } ++ break; + +-#ifndef USE_GAS +- if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE) +- { +- solaris_elf_asm_comdat_section (name, flags, decl); +- return; ++ default: ++ break; ++ } + } + +- /* Solaris/x86 as uses the same syntax for the SHF_EXCLUDE flags as the +- SPARC assembler. One cannot mix single-letter flags and #exclude, so +- only emit the latter here. */ +- if (flags & SECTION_EXCLUDE) +- { +- fprintf (asm_out_file, "\t.section\t%s,#exclude\n", name); +- return; +- } ++#ifdef SUBTARGET_FOLD_BUILTIN ++ return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore); + #endif + +- default_elf_asm_named_section (name, flags, decl); ++ return NULL_TREE; + } +-#endif /* TARGET_SOLARIS */ + +-/* Return the mangling of TYPE if it is an extended fundamental type. */ ++/* Fold a MD builtin (use ix86_fold_builtin for folding into ++ constant) in GIMPLE. */ + +-static const char * +-ix86_mangle_type (const_tree type) ++bool ++ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) + { +- type = TYPE_MAIN_VARIANT (type); +- +- if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE +- && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE) +- return NULL; ++ gimple *stmt = gsi_stmt (*gsi); ++ tree fndecl = gimple_call_fndecl (stmt); ++ gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD)); ++ int n_args = gimple_call_num_args (stmt); ++ enum ix86_builtins fn_code ++ = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl); ++ tree decl = NULL_TREE; ++ tree arg0, arg1, arg2; ++ enum rtx_code rcode; ++ unsigned HOST_WIDE_INT count; ++ bool is_vshift; + +- switch (TYPE_MODE (type)) ++ switch (fn_code) + { +- case E_TFmode: +- /* __float128 is "g". */ +- return "g"; +- case E_XFmode: +- /* "long double" or __float80 is "e". */ +- return "e"; +- default: +- return NULL; +- } +-} ++ case IX86_BUILTIN_TZCNT32: ++ decl = builtin_decl_implicit (BUILT_IN_CTZ); ++ goto fold_tzcnt_lzcnt; + +-static GTY(()) tree ix86_tls_stack_chk_guard_decl; ++ case IX86_BUILTIN_TZCNT64: ++ decl = builtin_decl_implicit (BUILT_IN_CTZLL); ++ goto fold_tzcnt_lzcnt; + +-static tree +-ix86_stack_protect_guard (void) +-{ +- if (TARGET_SSP_TLS_GUARD) +- { +- tree type_node = lang_hooks.types.type_for_mode (ptr_mode, 1); +- int qual = ENCODE_QUAL_ADDR_SPACE (ix86_stack_protector_guard_reg); +- tree type = build_qualified_type (type_node, qual); +- tree t; ++ case IX86_BUILTIN_LZCNT32: ++ decl = builtin_decl_implicit (BUILT_IN_CLZ); ++ goto fold_tzcnt_lzcnt; ++ ++ case IX86_BUILTIN_LZCNT64: ++ decl = builtin_decl_implicit (BUILT_IN_CLZLL); ++ goto fold_tzcnt_lzcnt; ++ ++ fold_tzcnt_lzcnt: ++ gcc_assert (n_args == 1); ++ arg0 = gimple_call_arg (stmt, 0); ++ if (TREE_CODE (arg0) == SSA_NAME && decl && gimple_call_lhs (stmt)) ++ { ++ int prec = TYPE_PRECISION (TREE_TYPE (arg0)); ++ /* If arg0 is provably non-zero, optimize into generic ++ __builtin_c[tl]z{,ll} function the middle-end handles ++ better. */ ++ if (!expr_not_equal_to (arg0, wi::zero (prec))) ++ return false; ++ ++ location_t loc = gimple_location (stmt); ++ gimple *g = gimple_build_call (decl, 1, arg0); ++ gimple_set_location (g, loc); ++ tree lhs = make_ssa_name (integer_type_node); ++ gimple_call_set_lhs (g, lhs); ++ gsi_insert_before (gsi, g, GSI_SAME_STMT); ++ g = gimple_build_assign (gimple_call_lhs (stmt), NOP_EXPR, lhs); ++ gimple_set_location (g, loc); ++ gsi_replace (gsi, g, false); ++ return true; ++ } ++ break; ++ ++ case IX86_BUILTIN_BZHI32: ++ case IX86_BUILTIN_BZHI64: ++ gcc_assert (n_args == 2); ++ arg1 = gimple_call_arg (stmt, 1); ++ if (tree_fits_uhwi_p (arg1) && gimple_call_lhs (stmt)) ++ { ++ unsigned int idx = tree_to_uhwi (arg1) & 0xff; ++ arg0 = gimple_call_arg (stmt, 0); ++ if (idx < TYPE_PRECISION (TREE_TYPE (arg0))) ++ break; ++ location_t loc = gimple_location (stmt); ++ gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0); ++ gimple_set_location (g, loc); ++ gsi_replace (gsi, g, false); ++ return true; ++ } ++ break; ++ ++ case IX86_BUILTIN_PDEP32: ++ case IX86_BUILTIN_PDEP64: ++ case IX86_BUILTIN_PEXT32: ++ case IX86_BUILTIN_PEXT64: ++ gcc_assert (n_args == 2); ++ arg1 = gimple_call_arg (stmt, 1); ++ if (integer_all_onesp (arg1) && gimple_call_lhs (stmt)) ++ { ++ location_t loc = gimple_location (stmt); ++ arg0 = gimple_call_arg (stmt, 0); ++ gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0); ++ gimple_set_location (g, loc); ++ gsi_replace (gsi, g, false); ++ return true; ++ } ++ break; ++ ++ case IX86_BUILTIN_PSLLD: ++ case IX86_BUILTIN_PSLLD128: ++ case IX86_BUILTIN_PSLLD128_MASK: ++ case IX86_BUILTIN_PSLLD256: ++ case IX86_BUILTIN_PSLLD256_MASK: ++ case IX86_BUILTIN_PSLLD512: ++ case IX86_BUILTIN_PSLLDI: ++ case IX86_BUILTIN_PSLLDI128: ++ case IX86_BUILTIN_PSLLDI128_MASK: ++ case IX86_BUILTIN_PSLLDI256: ++ case IX86_BUILTIN_PSLLDI256_MASK: ++ case IX86_BUILTIN_PSLLDI512: ++ case IX86_BUILTIN_PSLLQ: ++ case IX86_BUILTIN_PSLLQ128: ++ case IX86_BUILTIN_PSLLQ128_MASK: ++ case IX86_BUILTIN_PSLLQ256: ++ case IX86_BUILTIN_PSLLQ256_MASK: ++ case IX86_BUILTIN_PSLLQ512: ++ case IX86_BUILTIN_PSLLQI: ++ case IX86_BUILTIN_PSLLQI128: ++ case IX86_BUILTIN_PSLLQI128_MASK: ++ case IX86_BUILTIN_PSLLQI256: ++ case IX86_BUILTIN_PSLLQI256_MASK: ++ case IX86_BUILTIN_PSLLQI512: ++ case IX86_BUILTIN_PSLLW: ++ case IX86_BUILTIN_PSLLW128: ++ case IX86_BUILTIN_PSLLW128_MASK: ++ case IX86_BUILTIN_PSLLW256: ++ case IX86_BUILTIN_PSLLW256_MASK: ++ case IX86_BUILTIN_PSLLW512_MASK: ++ case IX86_BUILTIN_PSLLWI: ++ case IX86_BUILTIN_PSLLWI128: ++ case IX86_BUILTIN_PSLLWI128_MASK: ++ case IX86_BUILTIN_PSLLWI256: ++ case IX86_BUILTIN_PSLLWI256_MASK: ++ case IX86_BUILTIN_PSLLWI512_MASK: ++ rcode = ASHIFT; ++ is_vshift = false; ++ goto do_shift; ++ case IX86_BUILTIN_PSRAD: ++ case IX86_BUILTIN_PSRAD128: ++ case IX86_BUILTIN_PSRAD128_MASK: ++ case IX86_BUILTIN_PSRAD256: ++ case IX86_BUILTIN_PSRAD256_MASK: ++ case IX86_BUILTIN_PSRAD512: ++ case IX86_BUILTIN_PSRADI: ++ case IX86_BUILTIN_PSRADI128: ++ case IX86_BUILTIN_PSRADI128_MASK: ++ case IX86_BUILTIN_PSRADI256: ++ case IX86_BUILTIN_PSRADI256_MASK: ++ case IX86_BUILTIN_PSRADI512: ++ case IX86_BUILTIN_PSRAQ128_MASK: ++ case IX86_BUILTIN_PSRAQ256_MASK: ++ case IX86_BUILTIN_PSRAQ512: ++ case IX86_BUILTIN_PSRAQI128_MASK: ++ case IX86_BUILTIN_PSRAQI256_MASK: ++ case IX86_BUILTIN_PSRAQI512: ++ case IX86_BUILTIN_PSRAW: ++ case IX86_BUILTIN_PSRAW128: ++ case IX86_BUILTIN_PSRAW128_MASK: ++ case IX86_BUILTIN_PSRAW256: ++ case IX86_BUILTIN_PSRAW256_MASK: ++ case IX86_BUILTIN_PSRAW512: ++ case IX86_BUILTIN_PSRAWI: ++ case IX86_BUILTIN_PSRAWI128: ++ case IX86_BUILTIN_PSRAWI128_MASK: ++ case IX86_BUILTIN_PSRAWI256: ++ case IX86_BUILTIN_PSRAWI256_MASK: ++ case IX86_BUILTIN_PSRAWI512: ++ rcode = ASHIFTRT; ++ is_vshift = false; ++ goto do_shift; ++ case IX86_BUILTIN_PSRLD: ++ case IX86_BUILTIN_PSRLD128: ++ case IX86_BUILTIN_PSRLD128_MASK: ++ case IX86_BUILTIN_PSRLD256: ++ case IX86_BUILTIN_PSRLD256_MASK: ++ case IX86_BUILTIN_PSRLD512: ++ case IX86_BUILTIN_PSRLDI: ++ case IX86_BUILTIN_PSRLDI128: ++ case IX86_BUILTIN_PSRLDI128_MASK: ++ case IX86_BUILTIN_PSRLDI256: ++ case IX86_BUILTIN_PSRLDI256_MASK: ++ case IX86_BUILTIN_PSRLDI512: ++ case IX86_BUILTIN_PSRLQ: ++ case IX86_BUILTIN_PSRLQ128: ++ case IX86_BUILTIN_PSRLQ128_MASK: ++ case IX86_BUILTIN_PSRLQ256: ++ case IX86_BUILTIN_PSRLQ256_MASK: ++ case IX86_BUILTIN_PSRLQ512: ++ case IX86_BUILTIN_PSRLQI: ++ case IX86_BUILTIN_PSRLQI128: ++ case IX86_BUILTIN_PSRLQI128_MASK: ++ case IX86_BUILTIN_PSRLQI256: ++ case IX86_BUILTIN_PSRLQI256_MASK: ++ case IX86_BUILTIN_PSRLQI512: ++ case IX86_BUILTIN_PSRLW: ++ case IX86_BUILTIN_PSRLW128: ++ case IX86_BUILTIN_PSRLW128_MASK: ++ case IX86_BUILTIN_PSRLW256: ++ case IX86_BUILTIN_PSRLW256_MASK: ++ case IX86_BUILTIN_PSRLW512: ++ case IX86_BUILTIN_PSRLWI: ++ case IX86_BUILTIN_PSRLWI128: ++ case IX86_BUILTIN_PSRLWI128_MASK: ++ case IX86_BUILTIN_PSRLWI256: ++ case IX86_BUILTIN_PSRLWI256_MASK: ++ case IX86_BUILTIN_PSRLWI512: ++ rcode = LSHIFTRT; ++ is_vshift = false; ++ goto do_shift; ++ case IX86_BUILTIN_PSLLVV16HI: ++ case IX86_BUILTIN_PSLLVV16SI: ++ case IX86_BUILTIN_PSLLVV2DI: ++ case IX86_BUILTIN_PSLLVV2DI_MASK: ++ case IX86_BUILTIN_PSLLVV32HI: ++ case IX86_BUILTIN_PSLLVV4DI: ++ case IX86_BUILTIN_PSLLVV4DI_MASK: ++ case IX86_BUILTIN_PSLLVV4SI: ++ case IX86_BUILTIN_PSLLVV4SI_MASK: ++ case IX86_BUILTIN_PSLLVV8DI: ++ case IX86_BUILTIN_PSLLVV8HI: ++ case IX86_BUILTIN_PSLLVV8SI: ++ case IX86_BUILTIN_PSLLVV8SI_MASK: ++ rcode = ASHIFT; ++ is_vshift = true; ++ goto do_shift; ++ case IX86_BUILTIN_PSRAVQ128: ++ case IX86_BUILTIN_PSRAVQ256: ++ case IX86_BUILTIN_PSRAVV16HI: ++ case IX86_BUILTIN_PSRAVV16SI: ++ case IX86_BUILTIN_PSRAVV32HI: ++ case IX86_BUILTIN_PSRAVV4SI: ++ case IX86_BUILTIN_PSRAVV4SI_MASK: ++ case IX86_BUILTIN_PSRAVV8DI: ++ case IX86_BUILTIN_PSRAVV8HI: ++ case IX86_BUILTIN_PSRAVV8SI: ++ case IX86_BUILTIN_PSRAVV8SI_MASK: ++ rcode = ASHIFTRT; ++ is_vshift = true; ++ goto do_shift; ++ case IX86_BUILTIN_PSRLVV16HI: ++ case IX86_BUILTIN_PSRLVV16SI: ++ case IX86_BUILTIN_PSRLVV2DI: ++ case IX86_BUILTIN_PSRLVV2DI_MASK: ++ case IX86_BUILTIN_PSRLVV32HI: ++ case IX86_BUILTIN_PSRLVV4DI: ++ case IX86_BUILTIN_PSRLVV4DI_MASK: ++ case IX86_BUILTIN_PSRLVV4SI: ++ case IX86_BUILTIN_PSRLVV4SI_MASK: ++ case IX86_BUILTIN_PSRLVV8DI: ++ case IX86_BUILTIN_PSRLVV8HI: ++ case IX86_BUILTIN_PSRLVV8SI: ++ case IX86_BUILTIN_PSRLVV8SI_MASK: ++ rcode = LSHIFTRT; ++ is_vshift = true; ++ goto do_shift; + +- if (global_options_set.x_ix86_stack_protector_guard_symbol_str) ++ do_shift: ++ gcc_assert (n_args >= 2); ++ arg0 = gimple_call_arg (stmt, 0); ++ arg1 = gimple_call_arg (stmt, 1); ++ if (n_args > 2) + { +- t = ix86_tls_stack_chk_guard_decl; +- +- if (t == NULL) +- { +- rtx x; +- +- t = build_decl +- (UNKNOWN_LOCATION, VAR_DECL, +- get_identifier (ix86_stack_protector_guard_symbol_str), +- type); +- TREE_STATIC (t) = 1; +- TREE_PUBLIC (t) = 1; +- DECL_EXTERNAL (t) = 1; +- TREE_USED (t) = 1; +- TREE_THIS_VOLATILE (t) = 1; +- DECL_ARTIFICIAL (t) = 1; +- DECL_IGNORED_P (t) = 1; +- +- /* Do not share RTL as the declaration is visible outside of +- current function. */ +- x = DECL_RTL (t); +- RTX_FLAG (x, used) = 1; +- +- ix86_tls_stack_chk_guard_decl = t; +- } ++ /* This is masked shift. Only optimize if the mask is all ones. */ ++ tree argl = gimple_call_arg (stmt, n_args - 1); ++ if (!tree_fits_uhwi_p (argl)) ++ break; ++ unsigned HOST_WIDE_INT mask = tree_to_uhwi (argl); ++ unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); ++ if ((mask | (HOST_WIDE_INT_M1U << elems)) != HOST_WIDE_INT_M1U) ++ break; + } +- else ++ if (is_vshift) + { +- tree asptrtype = build_pointer_type (type); +- +- t = build_int_cst (asptrtype, ix86_stack_protector_guard_offset); +- t = build2 (MEM_REF, asptrtype, t, +- build_int_cst (asptrtype, 0)); +- TREE_THIS_VOLATILE (t) = 1; ++ if (TREE_CODE (arg1) != VECTOR_CST) ++ break; ++ count = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0))); ++ if (integer_zerop (arg1)) ++ count = 0; ++ else if (rcode == ASHIFTRT) ++ break; ++ else ++ for (unsigned int i = 0; i < VECTOR_CST_NELTS (arg1); ++i) ++ { ++ tree elt = VECTOR_CST_ELT (arg1, i); ++ if (!wi::neg_p (wi::to_wide (elt)) ++ && wi::to_widest (elt) < count) ++ return false; ++ } + } +- +- return t; +- } +- +- return default_stack_protect_guard (); +-} +- +-/* For 32-bit code we can save PIC register setup by using +- __stack_chk_fail_local hidden function instead of calling +- __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC +- register, so it is better to call __stack_chk_fail directly. */ +- +-static tree ATTRIBUTE_UNUSED +-ix86_stack_protect_fail (void) +-{ +- return TARGET_64BIT +- ? default_external_stack_protect_fail () +- : default_hidden_stack_protect_fail (); +-} +- +-/* Select a format to encode pointers in exception handling data. CODE +- is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is +- true if the symbol may be affected by dynamic relocations. +- +- ??? All x86 object file formats are capable of representing this. +- After all, the relocation needed is the same as for the call insn. +- Whether or not a particular assembler allows us to enter such, I +- guess we'll have to see. */ +-int +-asm_preferred_eh_data_format (int code, int global) +-{ +- if (flag_pic) +- { +- int type = DW_EH_PE_sdata8; +- if (!TARGET_64BIT +- || ix86_cmodel == CM_SMALL_PIC +- || (ix86_cmodel == CM_MEDIUM_PIC && (global || code))) +- type = DW_EH_PE_sdata4; +- return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type; +- } +- if (ix86_cmodel == CM_SMALL +- || (ix86_cmodel == CM_MEDIUM && code)) +- return DW_EH_PE_udata4; +- return DW_EH_PE_absptr; +-} +- +-/* Expand copysign from SIGN to the positive value ABS_VALUE +- storing in RESULT. If MASK is non-null, it shall be a mask to mask out +- the sign-bit. */ +-static void +-ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask) +-{ +- machine_mode mode = GET_MODE (sign); +- rtx sgn = gen_reg_rtx (mode); +- if (mask == NULL_RTX) +- { +- machine_mode vmode; +- +- if (mode == SFmode) +- vmode = V4SFmode; +- else if (mode == DFmode) +- vmode = V2DFmode; + else +- vmode = mode; +- +- mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false); +- if (!VECTOR_MODE_P (mode)) + { +- /* We need to generate a scalar mode mask in this case. */ +- rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx)); +- tmp = gen_rtx_VEC_SELECT (mode, mask, tmp); +- mask = gen_reg_rtx (mode); +- emit_insn (gen_rtx_SET (mask, tmp)); ++ arg1 = ix86_vector_shift_count (arg1); ++ if (!arg1) ++ break; ++ count = tree_to_uhwi (arg1); + } +- } +- else +- mask = gen_rtx_NOT (mode, mask); +- emit_insn (gen_rtx_SET (sgn, gen_rtx_AND (mode, mask, sign))); +- emit_insn (gen_rtx_SET (result, gen_rtx_IOR (mode, abs_value, sgn))); +-} ++ if (count == 0) ++ { ++ /* Just return the first argument for shift by 0. */ ++ location_t loc = gimple_location (stmt); ++ gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0); ++ gimple_set_location (g, loc); ++ gsi_replace (gsi, g, false); ++ return true; ++ } ++ if (rcode != ASHIFTRT ++ && count >= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0)))) ++ { ++ /* For shift counts equal or greater than precision, except for ++ arithmetic right shift the result is zero. */ ++ location_t loc = gimple_location (stmt); ++ gimple *g = gimple_build_assign (gimple_call_lhs (stmt), ++ build_zero_cst (TREE_TYPE (arg0))); ++ gimple_set_location (g, loc); ++ gsi_replace (gsi, g, false); ++ return true; ++ } ++ break; + +-/* Expand fabs (OP0) and return a new rtx that holds the result. The +- mask for masking out the sign-bit is stored in *SMASK, if that is +- non-null. */ +-static rtx +-ix86_expand_sse_fabs (rtx op0, rtx *smask) +-{ +- machine_mode vmode, mode = GET_MODE (op0); +- rtx xa, mask; ++ case IX86_BUILTIN_SHUFPD: ++ arg2 = gimple_call_arg (stmt, 2); ++ if (TREE_CODE (arg2) == INTEGER_CST) ++ { ++ location_t loc = gimple_location (stmt); ++ unsigned HOST_WIDE_INT imask = TREE_INT_CST_LOW (arg2); ++ arg0 = gimple_call_arg (stmt, 0); ++ arg1 = gimple_call_arg (stmt, 1); ++ tree itype = long_long_integer_type_node; ++ tree vtype = build_vector_type (itype, 2); /* V2DI */ ++ tree_vector_builder elts (vtype, 2, 1); ++ /* Ignore bits other than the lowest 2. */ ++ elts.quick_push (build_int_cst (itype, imask & 1)); ++ imask >>= 1; ++ elts.quick_push (build_int_cst (itype, 2 + (imask & 1))); ++ tree omask = elts.build (); ++ gimple *g = gimple_build_assign (gimple_call_lhs (stmt), ++ VEC_PERM_EXPR, ++ arg0, arg1, omask); ++ gimple_set_location (g, loc); ++ gsi_replace (gsi, g, false); ++ return true; ++ } ++ // Do not error yet, the constant could be propagated later? ++ break; + +- xa = gen_reg_rtx (mode); +- if (mode == SFmode) +- vmode = V4SFmode; +- else if (mode == DFmode) +- vmode = V2DFmode; +- else +- vmode = mode; +- mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true); +- if (!VECTOR_MODE_P (mode)) +- { +- /* We need to generate a scalar mode mask in this case. */ +- rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx)); +- tmp = gen_rtx_VEC_SELECT (mode, mask, tmp); +- mask = gen_reg_rtx (mode); +- emit_insn (gen_rtx_SET (mask, tmp)); ++ default: ++ break; + } +- emit_insn (gen_rtx_SET (xa, gen_rtx_AND (mode, op0, mask))); +- +- if (smask) +- *smask = mask; +- +- return xa; +-} +- +-/* Expands a comparison of OP0 with OP1 using comparison code CODE, +- swapping the operands if SWAP_OPERANDS is true. The expanded +- code is a forward jump to a newly created label in case the +- comparison is true. The generated label rtx is returned. */ +-static rtx_code_label * +-ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1, +- bool swap_operands) +-{ +- bool unordered_compare = ix86_unordered_fp_compare (code); +- rtx_code_label *label; +- rtx tmp, reg; +- +- if (swap_operands) +- std::swap (op0, op1); +- +- label = gen_label_rtx (); +- tmp = gen_rtx_COMPARE (CCFPmode, op0, op1); +- if (unordered_compare) +- tmp = gen_rtx_UNSPEC (CCFPmode, gen_rtvec (1, tmp), UNSPEC_NOTRAP); +- reg = gen_rtx_REG (CCFPmode, FLAGS_REG); +- emit_insn (gen_rtx_SET (reg, tmp)); +- tmp = gen_rtx_fmt_ee (code, VOIDmode, reg, const0_rtx); +- tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, +- gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx); +- tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); +- JUMP_LABEL (tmp) = label; +- +- return label; +-} +- +-/* Expand a mask generating SSE comparison instruction comparing OP0 with OP1 +- using comparison code CODE. Operands are swapped for the comparison if +- SWAP_OPERANDS is true. Returns a rtx for the generated mask. */ +-static rtx +-ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1, +- bool swap_operands) +-{ +- rtx (*insn)(rtx, rtx, rtx, rtx); +- machine_mode mode = GET_MODE (op0); +- rtx mask = gen_reg_rtx (mode); +- +- if (swap_operands) +- std::swap (op0, op1); +- +- insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse; +- +- emit_insn (insn (mask, op0, op1, +- gen_rtx_fmt_ee (code, mode, op0, op1))); +- return mask; +-} +- +-/* Generate and return a rtx of mode MODE for 2**n where n is the number +- of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */ +-static rtx +-ix86_gen_TWO52 (machine_mode mode) +-{ +- REAL_VALUE_TYPE TWO52r; +- rtx TWO52; +- +- real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23); +- TWO52 = const_double_from_real_value (TWO52r, mode); +- TWO52 = force_reg (mode, TWO52); +- +- return TWO52; +-} +- +-/* Expand SSE sequence for computing lround from OP1 storing +- into OP0. */ +-void +-ix86_expand_lround (rtx op0, rtx op1) +-{ +- /* C code for the stuff we're doing below: +- tmp = op1 + copysign (nextafter (0.5, 0.0), op1) +- return (long)tmp; +- */ +- machine_mode mode = GET_MODE (op1); +- const struct real_format *fmt; +- REAL_VALUE_TYPE pred_half, half_minus_pred_half; +- rtx adj; +- +- /* load nextafter (0.5, 0.0) */ +- fmt = REAL_MODE_FORMAT (mode); +- real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode); +- real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half); + +- /* adj = copysign (0.5, op1) */ +- adj = force_reg (mode, const_double_from_real_value (pred_half, mode)); +- ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX); +- +- /* adj = op1 + adj */ +- adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT); +- +- /* op0 = (imode)adj */ +- expand_fix (op0, adj, 0); +-} +- +-/* Expand SSE2 sequence for computing lround from OPERAND1 storing +- into OPERAND0. */ +-void +-ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor) +-{ +- /* C code for the stuff we're doing below (for do_floor): +- xi = (long)op1; +- xi -= (double)xi > op1 ? 1 : 0; +- return xi; +- */ +- machine_mode fmode = GET_MODE (op1); +- machine_mode imode = GET_MODE (op0); +- rtx ireg, freg, tmp; +- rtx_code_label *label; +- +- /* reg = (long)op1 */ +- ireg = gen_reg_rtx (imode); +- expand_fix (ireg, op1, 0); +- +- /* freg = (double)reg */ +- freg = gen_reg_rtx (fmode); +- expand_float (freg, ireg, 0); +- +- /* ireg = (freg > op1) ? ireg - 1 : ireg */ +- label = ix86_expand_sse_compare_and_jump (UNLE, +- freg, op1, !do_floor); +- tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS, +- ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT); +- emit_move_insn (ireg, tmp); +- +- emit_label (label); +- LABEL_NUSES (label) = 1; +- +- emit_move_insn (op0, ireg); ++ return false; + } + +-/* Expand rint rounding OPERAND1 and storing the result in OPERAND0. */ +-void +-ix86_expand_rint (rtx operand0, rtx operand1) +-{ +- /* C code for the stuff we're doing below: +- xa = fabs (operand1); +- if (!isless (xa, 2**52)) +- return operand1; +- two52 = 2**52; +- if (flag_rounding_math) +- { +- two52 = copysign (two52, operand1); +- xa = operand1; +- } +- xa = xa + two52 - two52; +- return copysign (xa, operand1); +- */ +- machine_mode mode = GET_MODE (operand0); +- rtx res, xa, TWO52, two52, mask; +- rtx_code_label *label; +- +- res = gen_reg_rtx (mode); +- emit_move_insn (res, operand1); +- +- /* xa = abs (operand1) */ +- xa = ix86_expand_sse_fabs (res, &mask); +- +- /* if (!isless (xa, TWO52)) goto label; */ +- TWO52 = ix86_gen_TWO52 (mode); +- label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); +- +- two52 = TWO52; +- if (flag_rounding_math) +- { +- two52 = gen_reg_rtx (mode); +- ix86_sse_copysign_to_positive (two52, TWO52, res, mask); +- xa = res; +- } +- +- xa = expand_simple_binop (mode, PLUS, xa, two52, NULL_RTX, 0, OPTAB_DIRECT); +- xa = expand_simple_binop (mode, MINUS, xa, two52, xa, 0, OPTAB_DIRECT); +- +- ix86_sse_copysign_to_positive (res, xa, res, mask); +- +- emit_label (label); +- LABEL_NUSES (label) = 1; ++/* Handler for an SVML-style interface to ++ a library with vectorized intrinsics. */ + +- emit_move_insn (operand0, res); +-} ++tree ++ix86_veclibabi_svml (combined_fn fn, tree type_out, tree type_in) ++{ ++ char name[20]; ++ tree fntype, new_fndecl, args; ++ unsigned arity; ++ const char *bname; ++ machine_mode el_mode, in_mode; ++ int n, in_n; + +-/* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing +- into OPERAND0. */ +-void +-ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor) +-{ +- /* C code for the stuff we expand below. +- double xa = fabs (x), x2; +- if (!isless (xa, TWO52)) +- return x; +- xa = xa + TWO52 - TWO52; +- x2 = copysign (xa, x); +- Compensate. Floor: +- if (x2 > x) +- x2 -= 1; +- Compensate. Ceil: +- if (x2 < x) +- x2 += 1; +- if (HONOR_SIGNED_ZEROS (mode)) +- x2 = copysign (x2, x); +- return x2; +- */ +- machine_mode mode = GET_MODE (operand0); +- rtx xa, TWO52, tmp, one, res, mask; +- rtx_code_label *label; ++ /* The SVML is suitable for unsafe math only. */ ++ if (!flag_unsafe_math_optimizations) ++ return NULL_TREE; + +- TWO52 = ix86_gen_TWO52 (mode); ++ el_mode = TYPE_MODE (TREE_TYPE (type_out)); ++ n = TYPE_VECTOR_SUBPARTS (type_out); ++ in_mode = TYPE_MODE (TREE_TYPE (type_in)); ++ in_n = TYPE_VECTOR_SUBPARTS (type_in); ++ if (el_mode != in_mode ++ || n != in_n) ++ return NULL_TREE; + +- /* Temporary for holding the result, initialized to the input +- operand to ease control flow. */ +- res = gen_reg_rtx (mode); +- emit_move_insn (res, operand1); ++ switch (fn) ++ { ++ CASE_CFN_EXP: ++ CASE_CFN_LOG: ++ CASE_CFN_LOG10: ++ CASE_CFN_POW: ++ CASE_CFN_TANH: ++ CASE_CFN_TAN: ++ CASE_CFN_ATAN: ++ CASE_CFN_ATAN2: ++ CASE_CFN_ATANH: ++ CASE_CFN_CBRT: ++ CASE_CFN_SINH: ++ CASE_CFN_SIN: ++ CASE_CFN_ASINH: ++ CASE_CFN_ASIN: ++ CASE_CFN_COSH: ++ CASE_CFN_COS: ++ CASE_CFN_ACOSH: ++ CASE_CFN_ACOS: ++ if ((el_mode != DFmode || n != 2) ++ && (el_mode != SFmode || n != 4)) ++ return NULL_TREE; ++ break; + +- /* xa = abs (operand1) */ +- xa = ix86_expand_sse_fabs (res, &mask); ++ default: ++ return NULL_TREE; ++ } + +- /* if (!isless (xa, TWO52)) goto label; */ +- label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); ++ tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn); ++ bname = IDENTIFIER_POINTER (DECL_NAME (fndecl)); + +- /* xa = xa + TWO52 - TWO52; */ +- xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT); +- xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT); ++ if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOGF) ++ strcpy (name, "vmlsLn4"); ++ else if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOG) ++ strcpy (name, "vmldLn2"); ++ else if (n == 4) ++ { ++ sprintf (name, "vmls%s", bname+10); ++ name[strlen (name)-1] = '4'; ++ } ++ else ++ sprintf (name, "vmld%s2", bname+10); + +- /* xa = copysign (xa, operand1) */ +- ix86_sse_copysign_to_positive (xa, xa, res, mask); ++ /* Convert to uppercase. */ ++ name[4] &= ~0x20; + +- /* generate 1.0 */ +- one = force_reg (mode, const_double_from_real_value (dconst1, mode)); ++ arity = 0; ++ for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args)) ++ arity++; + +- /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */ +- tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor); +- emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp))); +- tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS, +- xa, tmp, NULL_RTX, 0, OPTAB_DIRECT); +- if (!do_floor && HONOR_SIGNED_ZEROS (mode)) +- ix86_sse_copysign_to_positive (tmp, tmp, res, mask); +- emit_move_insn (res, tmp); ++ if (arity == 1) ++ fntype = build_function_type_list (type_out, type_in, NULL); ++ else ++ fntype = build_function_type_list (type_out, type_in, type_in, NULL); + +- emit_label (label); +- LABEL_NUSES (label) = 1; ++ /* Build a function declaration for the vectorized function. */ ++ new_fndecl = build_decl (BUILTINS_LOCATION, ++ FUNCTION_DECL, get_identifier (name), fntype); ++ TREE_PUBLIC (new_fndecl) = 1; ++ DECL_EXTERNAL (new_fndecl) = 1; ++ DECL_IS_NOVOPS (new_fndecl) = 1; ++ TREE_READONLY (new_fndecl) = 1; + +- emit_move_insn (operand0, res); ++ return new_fndecl; + } + +-/* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing +- into OPERAND0. */ +-void +-ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor) +-{ +- /* C code for the stuff we expand below. +- double xa = fabs (x), x2; +- if (!isless (xa, TWO52)) +- return x; +- x2 = (double)(long)x; +- Compensate. Floor: +- if (x2 > x) +- x2 -= 1; +- Compensate. Ceil: +- if (x2 < x) +- x2 += 1; +- if (HONOR_SIGNED_ZEROS (mode)) +- return copysign (x2, x); +- return x2; +- */ +- machine_mode mode = GET_MODE (operand0); +- rtx xa, xi, TWO52, tmp, one, res, mask; +- rtx_code_label *label; ++/* Handler for an ACML-style interface to ++ a library with vectorized intrinsics. */ + +- TWO52 = ix86_gen_TWO52 (mode); ++tree ++ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in) ++{ ++ char name[20] = "__vr.._"; ++ tree fntype, new_fndecl, args; ++ unsigned arity; ++ const char *bname; ++ machine_mode el_mode, in_mode; ++ int n, in_n; + +- /* Temporary for holding the result, initialized to the input +- operand to ease control flow. */ +- res = gen_reg_rtx (mode); +- emit_move_insn (res, operand1); ++ /* The ACML is 64bits only and suitable for unsafe math only as ++ it does not correctly support parts of IEEE with the required ++ precision such as denormals. */ ++ if (!TARGET_64BIT ++ || !flag_unsafe_math_optimizations) ++ return NULL_TREE; + +- /* xa = abs (operand1) */ +- xa = ix86_expand_sse_fabs (res, &mask); ++ el_mode = TYPE_MODE (TREE_TYPE (type_out)); ++ n = TYPE_VECTOR_SUBPARTS (type_out); ++ in_mode = TYPE_MODE (TREE_TYPE (type_in)); ++ in_n = TYPE_VECTOR_SUBPARTS (type_in); ++ if (el_mode != in_mode ++ || n != in_n) ++ return NULL_TREE; + +- /* if (!isless (xa, TWO52)) goto label; */ +- label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); ++ switch (fn) ++ { ++ CASE_CFN_SIN: ++ CASE_CFN_COS: ++ CASE_CFN_EXP: ++ CASE_CFN_LOG: ++ CASE_CFN_LOG2: ++ CASE_CFN_LOG10: ++ if (el_mode == DFmode && n == 2) ++ { ++ name[4] = 'd'; ++ name[5] = '2'; ++ } ++ else if (el_mode == SFmode && n == 4) ++ { ++ name[4] = 's'; ++ name[5] = '4'; ++ } ++ else ++ return NULL_TREE; ++ break; + +- /* xa = (double)(long)x */ +- xi = gen_reg_rtx (mode == DFmode ? DImode : SImode); +- expand_fix (xi, res, 0); +- expand_float (xa, xi, 0); ++ default: ++ return NULL_TREE; ++ } + +- /* generate 1.0 */ +- one = force_reg (mode, const_double_from_real_value (dconst1, mode)); ++ tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn); ++ bname = IDENTIFIER_POINTER (DECL_NAME (fndecl)); ++ sprintf (name + 7, "%s", bname+10); + +- /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */ +- tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor); +- emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp))); +- tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS, +- xa, tmp, NULL_RTX, 0, OPTAB_DIRECT); +- emit_move_insn (res, tmp); ++ arity = 0; ++ for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args)) ++ arity++; + +- if (HONOR_SIGNED_ZEROS (mode)) +- ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask); ++ if (arity == 1) ++ fntype = build_function_type_list (type_out, type_in, NULL); ++ else ++ fntype = build_function_type_list (type_out, type_in, type_in, NULL); + +- emit_label (label); +- LABEL_NUSES (label) = 1; ++ /* Build a function declaration for the vectorized function. */ ++ new_fndecl = build_decl (BUILTINS_LOCATION, ++ FUNCTION_DECL, get_identifier (name), fntype); ++ TREE_PUBLIC (new_fndecl) = 1; ++ DECL_EXTERNAL (new_fndecl) = 1; ++ DECL_IS_NOVOPS (new_fndecl) = 1; ++ TREE_READONLY (new_fndecl) = 1; + +- emit_move_insn (operand0, res); ++ return new_fndecl; + } + +-/* Expand SSE sequence for computing round from OPERAND1 storing +- into OPERAND0. Sequence that works without relying on DImode truncation +- via cvttsd2siq that is only available on 64bit targets. */ +-void +-ix86_expand_rounddf_32 (rtx operand0, rtx operand1) +-{ +- /* C code for the stuff we expand below. +- double xa = fabs (x), xa2, x2; +- if (!isless (xa, TWO52)) +- return x; +- Using the absolute value and copying back sign makes +- -0.0 -> -0.0 correct. +- xa2 = xa + TWO52 - TWO52; +- Compensate. +- dxa = xa2 - xa; +- if (dxa <= -0.5) +- xa2 += 1; +- else if (dxa > 0.5) +- xa2 -= 1; +- x2 = copysign (xa2, x); +- return x2; +- */ +- machine_mode mode = GET_MODE (operand0); +- rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask; +- rtx_code_label *label; +- +- TWO52 = ix86_gen_TWO52 (mode); +- +- /* Temporary for holding the result, initialized to the input +- operand to ease control flow. */ +- res = gen_reg_rtx (mode); +- emit_move_insn (res, operand1); +- +- /* xa = abs (operand1) */ +- xa = ix86_expand_sse_fabs (res, &mask); ++/* Returns a decl of a function that implements scatter store with ++ register type VECTYPE and index type INDEX_TYPE and SCALE. ++ Return NULL_TREE if it is not available. */ + +- /* if (!isless (xa, TWO52)) goto label; */ +- label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); ++static tree ++ix86_vectorize_builtin_scatter (const_tree vectype, ++ const_tree index_type, int scale) ++{ ++ bool si; ++ enum ix86_builtins code; + +- /* xa2 = xa + TWO52 - TWO52; */ +- xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT); +- xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT); ++ if (!TARGET_AVX512F) ++ return NULL_TREE; + +- /* dxa = xa2 - xa; */ +- dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT); ++ if ((TREE_CODE (index_type) != INTEGER_TYPE ++ && !POINTER_TYPE_P (index_type)) ++ || (TYPE_MODE (index_type) != SImode ++ && TYPE_MODE (index_type) != DImode)) ++ return NULL_TREE; + +- /* generate 0.5, 1.0 and -0.5 */ +- half = force_reg (mode, const_double_from_real_value (dconsthalf, mode)); +- one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT); +- mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX, +- 0, OPTAB_DIRECT); ++ if (TYPE_PRECISION (index_type) > POINTER_SIZE) ++ return NULL_TREE; + +- /* Compensate. */ +- /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */ +- tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false); +- emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, tmp, one))); +- xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT); +- /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */ +- tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false); +- emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, tmp, one))); +- xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT); ++ /* v*scatter* insn sign extends index to pointer mode. */ ++ if (TYPE_PRECISION (index_type) < POINTER_SIZE ++ && TYPE_UNSIGNED (index_type)) ++ return NULL_TREE; + +- /* res = copysign (xa2, operand1) */ +- ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask); ++ /* Scale can be 1, 2, 4 or 8. */ ++ if (scale <= 0 ++ || scale > 8 ++ || (scale & (scale - 1)) != 0) ++ return NULL_TREE; + +- emit_label (label); +- LABEL_NUSES (label) = 1; ++ si = TYPE_MODE (index_type) == SImode; ++ switch (TYPE_MODE (vectype)) ++ { ++ case E_V8DFmode: ++ code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF; ++ break; ++ case E_V8DImode: ++ code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI; ++ break; ++ case E_V16SFmode: ++ code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF; ++ break; ++ case E_V16SImode: ++ code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI; ++ break; ++ case E_V4DFmode: ++ if (TARGET_AVX512VL) ++ code = si ? IX86_BUILTIN_SCATTERALTSIV4DF : IX86_BUILTIN_SCATTERDIV4DF; ++ else ++ return NULL_TREE; ++ break; ++ case E_V4DImode: ++ if (TARGET_AVX512VL) ++ code = si ? IX86_BUILTIN_SCATTERALTSIV4DI : IX86_BUILTIN_SCATTERDIV4DI; ++ else ++ return NULL_TREE; ++ break; ++ case E_V8SFmode: ++ if (TARGET_AVX512VL) ++ code = si ? IX86_BUILTIN_SCATTERSIV8SF : IX86_BUILTIN_SCATTERALTDIV8SF; ++ else ++ return NULL_TREE; ++ break; ++ case E_V8SImode: ++ if (TARGET_AVX512VL) ++ code = si ? IX86_BUILTIN_SCATTERSIV8SI : IX86_BUILTIN_SCATTERALTDIV8SI; ++ else ++ return NULL_TREE; ++ break; ++ case E_V2DFmode: ++ if (TARGET_AVX512VL) ++ code = si ? IX86_BUILTIN_SCATTERALTSIV2DF : IX86_BUILTIN_SCATTERDIV2DF; ++ else ++ return NULL_TREE; ++ break; ++ case E_V2DImode: ++ if (TARGET_AVX512VL) ++ code = si ? IX86_BUILTIN_SCATTERALTSIV2DI : IX86_BUILTIN_SCATTERDIV2DI; ++ else ++ return NULL_TREE; ++ break; ++ case E_V4SFmode: ++ if (TARGET_AVX512VL) ++ code = si ? IX86_BUILTIN_SCATTERSIV4SF : IX86_BUILTIN_SCATTERALTDIV4SF; ++ else ++ return NULL_TREE; ++ break; ++ case E_V4SImode: ++ if (TARGET_AVX512VL) ++ code = si ? IX86_BUILTIN_SCATTERSIV4SI : IX86_BUILTIN_SCATTERALTDIV4SI; ++ else ++ return NULL_TREE; ++ break; ++ default: ++ return NULL_TREE; ++ } + +- emit_move_insn (operand0, res); ++ return get_ix86_builtin (code); + } + +-/* Expand SSE sequence for computing trunc from OPERAND1 storing +- into OPERAND0. */ +-void +-ix86_expand_trunc (rtx operand0, rtx operand1) +-{ +- /* C code for SSE variant we expand below. +- double xa = fabs (x), x2; +- if (!isless (xa, TWO52)) +- return x; +- x2 = (double)(long)x; +- if (HONOR_SIGNED_ZEROS (mode)) +- return copysign (x2, x); +- return x2; +- */ +- machine_mode mode = GET_MODE (operand0); +- rtx xa, xi, TWO52, res, mask; +- rtx_code_label *label; ++/* Return true if it is safe to use the rsqrt optabs to optimize ++ 1.0/sqrt. */ + +- TWO52 = ix86_gen_TWO52 (mode); ++static bool ++use_rsqrt_p () ++{ ++ return (TARGET_SSE && TARGET_SSE_MATH ++ && flag_finite_math_only ++ && !flag_trapping_math ++ && flag_unsafe_math_optimizations); ++} ++ ++/* Helper for avx_vpermilps256_operand et al. This is also used by ++ the expansion functions to turn the parallel back into a mask. ++ The return value is 0 for no match and the imm8+1 for a match. */ + +- /* Temporary for holding the result, initialized to the input +- operand to ease control flow. */ +- res = gen_reg_rtx (mode); +- emit_move_insn (res, operand1); ++int ++avx_vpermilp_parallel (rtx par, machine_mode mode) ++{ ++ unsigned i, nelt = GET_MODE_NUNITS (mode); ++ unsigned mask = 0; ++ unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */ + +- /* xa = abs (operand1) */ +- xa = ix86_expand_sse_fabs (res, &mask); ++ if (XVECLEN (par, 0) != (int) nelt) ++ return 0; + +- /* if (!isless (xa, TWO52)) goto label; */ +- label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); ++ /* Validate that all of the elements are constants, and not totally ++ out of range. Copy the data into an integral array to make the ++ subsequent checks easier. */ ++ for (i = 0; i < nelt; ++i) ++ { ++ rtx er = XVECEXP (par, 0, i); ++ unsigned HOST_WIDE_INT ei; + +- /* x = (double)(long)x */ +- xi = gen_reg_rtx (mode == DFmode ? DImode : SImode); +- expand_fix (xi, res, 0); +- expand_float (res, xi, 0); ++ if (!CONST_INT_P (er)) ++ return 0; ++ ei = INTVAL (er); ++ if (ei >= nelt) ++ return 0; ++ ipar[i] = ei; ++ } + +- if (HONOR_SIGNED_ZEROS (mode)) +- ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask); ++ switch (mode) ++ { ++ case E_V8DFmode: ++ /* In the 512-bit DFmode case, we can only move elements within ++ a 128-bit lane. First fill the second part of the mask, ++ then fallthru. */ ++ for (i = 4; i < 6; ++i) ++ { ++ if (ipar[i] < 4 || ipar[i] >= 6) ++ return 0; ++ mask |= (ipar[i] - 4) << i; ++ } ++ for (i = 6; i < 8; ++i) ++ { ++ if (ipar[i] < 6) ++ return 0; ++ mask |= (ipar[i] - 6) << i; ++ } ++ /* FALLTHRU */ + +- emit_label (label); +- LABEL_NUSES (label) = 1; ++ case E_V4DFmode: ++ /* In the 256-bit DFmode case, we can only move elements within ++ a 128-bit lane. */ ++ for (i = 0; i < 2; ++i) ++ { ++ if (ipar[i] >= 2) ++ return 0; ++ mask |= ipar[i] << i; ++ } ++ for (i = 2; i < 4; ++i) ++ { ++ if (ipar[i] < 2) ++ return 0; ++ mask |= (ipar[i] - 2) << i; ++ } ++ break; + +- emit_move_insn (operand0, res); +-} ++ case E_V16SFmode: ++ /* In 512 bit SFmode case, permutation in the upper 256 bits ++ must mirror the permutation in the lower 256-bits. */ ++ for (i = 0; i < 8; ++i) ++ if (ipar[i] + 8 != ipar[i + 8]) ++ return 0; ++ /* FALLTHRU */ + +-/* Expand SSE sequence for computing trunc from OPERAND1 storing +- into OPERAND0. */ +-void +-ix86_expand_truncdf_32 (rtx operand0, rtx operand1) +-{ +- machine_mode mode = GET_MODE (operand0); +- rtx xa, mask, TWO52, one, res, smask, tmp; +- rtx_code_label *label; ++ case E_V8SFmode: ++ /* In 256 bit SFmode case, we have full freedom of ++ movement within the low 128-bit lane, but the high 128-bit ++ lane must mirror the exact same pattern. */ ++ for (i = 0; i < 4; ++i) ++ if (ipar[i] + 4 != ipar[i + 4]) ++ return 0; ++ nelt = 4; ++ /* FALLTHRU */ + +- /* C code for SSE variant we expand below. +- double xa = fabs (x), x2; +- if (!isless (xa, TWO52)) +- return x; +- xa2 = xa + TWO52 - TWO52; +- Compensate: +- if (xa2 > xa) +- xa2 -= 1.0; +- x2 = copysign (xa2, x); +- return x2; +- */ ++ case E_V2DFmode: ++ case E_V4SFmode: ++ /* In the 128-bit case, we've full freedom in the placement of ++ the elements from the source operand. */ ++ for (i = 0; i < nelt; ++i) ++ mask |= ipar[i] << (i * (nelt / 2)); ++ break; + +- TWO52 = ix86_gen_TWO52 (mode); ++ default: ++ gcc_unreachable (); ++ } + +- /* Temporary for holding the result, initialized to the input +- operand to ease control flow. */ +- res = gen_reg_rtx (mode); +- emit_move_insn (res, operand1); ++ /* Make sure success has a non-zero value by adding one. */ ++ return mask + 1; ++} + +- /* xa = abs (operand1) */ +- xa = ix86_expand_sse_fabs (res, &smask); ++/* Helper for avx_vperm2f128_v4df_operand et al. This is also used by ++ the expansion functions to turn the parallel back into a mask. ++ The return value is 0 for no match and the imm8+1 for a match. */ + +- /* if (!isless (xa, TWO52)) goto label; */ +- label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); ++int ++avx_vperm2f128_parallel (rtx par, machine_mode mode) ++{ ++ unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2; ++ unsigned mask = 0; ++ unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */ + +- /* res = xa + TWO52 - TWO52; */ +- tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT); +- tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT); +- emit_move_insn (res, tmp); ++ if (XVECLEN (par, 0) != (int) nelt) ++ return 0; + +- /* generate 1.0 */ +- one = force_reg (mode, const_double_from_real_value (dconst1, mode)); ++ /* Validate that all of the elements are constants, and not totally ++ out of range. Copy the data into an integral array to make the ++ subsequent checks easier. */ ++ for (i = 0; i < nelt; ++i) ++ { ++ rtx er = XVECEXP (par, 0, i); ++ unsigned HOST_WIDE_INT ei; + +- /* Compensate: res = xa2 - (res > xa ? 1 : 0) */ +- mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false); +- emit_insn (gen_rtx_SET (mask, gen_rtx_AND (mode, mask, one))); +- tmp = expand_simple_binop (mode, MINUS, +- res, mask, NULL_RTX, 0, OPTAB_DIRECT); +- emit_move_insn (res, tmp); ++ if (!CONST_INT_P (er)) ++ return 0; ++ ei = INTVAL (er); ++ if (ei >= 2 * nelt) ++ return 0; ++ ipar[i] = ei; ++ } + +- /* res = copysign (res, operand1) */ +- ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask); ++ /* Validate that the halves of the permute are halves. */ ++ for (i = 0; i < nelt2 - 1; ++i) ++ if (ipar[i] + 1 != ipar[i + 1]) ++ return 0; ++ for (i = nelt2; i < nelt - 1; ++i) ++ if (ipar[i] + 1 != ipar[i + 1]) ++ return 0; + +- emit_label (label); +- LABEL_NUSES (label) = 1; ++ /* Reconstruct the mask. */ ++ for (i = 0; i < 2; ++i) ++ { ++ unsigned e = ipar[i * nelt2]; ++ if (e % nelt2) ++ return 0; ++ e /= nelt2; ++ mask |= e << (i * 4); ++ } + +- emit_move_insn (operand0, res); ++ /* Make sure success has a non-zero value by adding one. */ ++ return mask + 1; ++} ++ ++/* Return a register priority for hard reg REGNO. */ ++static int ++ix86_register_priority (int hard_regno) ++{ ++ /* ebp and r13 as the base always wants a displacement, r12 as the ++ base always wants an index. So discourage their usage in an ++ address. */ ++ if (hard_regno == R12_REG || hard_regno == R13_REG) ++ return 0; ++ if (hard_regno == BP_REG) ++ return 1; ++ /* New x86-64 int registers result in bigger code size. Discourage ++ them. */ ++ if (IN_RANGE (hard_regno, FIRST_REX_INT_REG, LAST_REX_INT_REG)) ++ return 2; ++ /* New x86-64 SSE registers result in bigger code size. Discourage ++ them. */ ++ if (IN_RANGE (hard_regno, FIRST_REX_SSE_REG, LAST_REX_SSE_REG)) ++ return 2; ++ if (IN_RANGE (hard_regno, FIRST_EXT_REX_SSE_REG, LAST_EXT_REX_SSE_REG)) ++ return 1; ++ /* Usage of AX register results in smaller code. Prefer it. */ ++ if (hard_regno == AX_REG) ++ return 4; ++ return 3; + } + +-/* Expand SSE sequence for computing round from OPERAND1 storing +- into OPERAND0. */ +-void +-ix86_expand_round (rtx operand0, rtx operand1) +-{ +- /* C code for the stuff we're doing below: +- double xa = fabs (x); +- if (!isless (xa, TWO52)) +- return x; +- xa = (double)(long)(xa + nextafter (0.5, 0.0)); +- return copysign (xa, x); +- */ +- machine_mode mode = GET_MODE (operand0); +- rtx res, TWO52, xa, xi, half, mask; +- rtx_code_label *label; +- const struct real_format *fmt; +- REAL_VALUE_TYPE pred_half, half_minus_pred_half; ++/* Implement TARGET_PREFERRED_RELOAD_CLASS. ++ ++ Put float CONST_DOUBLE in the constant pool instead of fp regs. ++ QImode must go into class Q_REGS. ++ Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and ++ movdf to do mem-to-mem moves through integer regs. */ + +- /* Temporary for holding the result, initialized to the input +- operand to ease control flow. */ +- res = gen_reg_rtx (mode); +- emit_move_insn (res, operand1); ++static reg_class_t ++ix86_preferred_reload_class (rtx x, reg_class_t regclass) ++{ ++ machine_mode mode = GET_MODE (x); + +- TWO52 = ix86_gen_TWO52 (mode); +- xa = ix86_expand_sse_fabs (res, &mask); +- label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); ++ /* We're only allowed to return a subclass of CLASS. Many of the ++ following checks fail for NO_REGS, so eliminate that early. */ ++ if (regclass == NO_REGS) ++ return NO_REGS; + +- /* load nextafter (0.5, 0.0) */ +- fmt = REAL_MODE_FORMAT (mode); +- real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode); +- real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half); ++ /* All classes can load zeros. */ ++ if (x == CONST0_RTX (mode)) ++ return regclass; + +- /* xa = xa + 0.5 */ +- half = force_reg (mode, const_double_from_real_value (pred_half, mode)); +- xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT); ++ /* Force constants into memory if we are loading a (nonzero) constant into ++ an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK ++ instructions to load from a constant. */ ++ if (CONSTANT_P (x) ++ && (MAYBE_MMX_CLASS_P (regclass) ++ || MAYBE_SSE_CLASS_P (regclass) ++ || MAYBE_MASK_CLASS_P (regclass))) ++ return NO_REGS; + +- /* xa = (double)(int64_t)xa */ +- xi = gen_reg_rtx (mode == DFmode ? DImode : SImode); +- expand_fix (xi, xa, 0); +- expand_float (xa, xi, 0); ++ /* Floating-point constants need more complex checks. */ ++ if (CONST_DOUBLE_P (x)) ++ { ++ /* General regs can load everything. */ ++ if (INTEGER_CLASS_P (regclass)) ++ return regclass; + +- /* res = copysign (xa, operand1) */ +- ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask); ++ /* Floats can load 0 and 1 plus some others. Note that we eliminated ++ zero above. We only want to wind up preferring 80387 registers if ++ we plan on doing computation with them. */ ++ if (IS_STACK_MODE (mode) ++ && standard_80387_constant_p (x) > 0) ++ { ++ /* Limit class to FP regs. */ ++ if (FLOAT_CLASS_P (regclass)) ++ return FLOAT_REGS; ++ } + +- emit_label (label); +- LABEL_NUSES (label) = 1; ++ return NO_REGS; ++ } + +- emit_move_insn (operand0, res); +-} ++ /* Prefer SSE regs only, if we can use them for math. */ ++ if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) ++ return SSE_CLASS_P (regclass) ? regclass : NO_REGS; + +-/* Expand SSE sequence for computing round +- from OP1 storing into OP0 using sse4 round insn. */ +-void +-ix86_expand_round_sse4 (rtx op0, rtx op1) +-{ +- machine_mode mode = GET_MODE (op0); +- rtx e1, e2, res, half; +- const struct real_format *fmt; +- REAL_VALUE_TYPE pred_half, half_minus_pred_half; +- rtx (*gen_copysign) (rtx, rtx, rtx); +- rtx (*gen_round) (rtx, rtx, rtx); ++ /* Generally when we see PLUS here, it's the function invariant ++ (plus soft-fp const_int). Which can only be computed into general ++ regs. */ ++ if (GET_CODE (x) == PLUS) ++ return INTEGER_CLASS_P (regclass) ? regclass : NO_REGS; + +- switch (mode) ++ /* QImode constants are easy to load, but non-constant QImode data ++ must go into Q_REGS. */ ++ if (GET_MODE (x) == QImode && !CONSTANT_P (x)) + { +- case E_SFmode: +- gen_copysign = gen_copysignsf3; +- gen_round = gen_sse4_1_roundsf2; +- break; +- case E_DFmode: +- gen_copysign = gen_copysigndf3; +- gen_round = gen_sse4_1_rounddf2; +- break; +- default: +- gcc_unreachable (); ++ if (Q_CLASS_P (regclass)) ++ return regclass; ++ else if (reg_class_subset_p (Q_REGS, regclass)) ++ return Q_REGS; ++ else ++ return NO_REGS; + } + +- /* round (a) = trunc (a + copysign (0.5, a)) */ +- +- /* load nextafter (0.5, 0.0) */ +- fmt = REAL_MODE_FORMAT (mode); +- real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode); +- real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half); +- half = const_double_from_real_value (pred_half, mode); ++ return regclass; ++} + +- /* e1 = copysign (0.5, op1) */ +- e1 = gen_reg_rtx (mode); +- emit_insn (gen_copysign (e1, half, op1)); ++/* Discourage putting floating-point values in SSE registers unless ++ SSE math is being used, and likewise for the 387 registers. */ ++static reg_class_t ++ix86_preferred_output_reload_class (rtx x, reg_class_t regclass) ++{ ++ machine_mode mode = GET_MODE (x); + +- /* e2 = op1 + e1 */ +- e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT); ++ /* Restrict the output reload class to the register bank that we are doing ++ math on. If we would like not to return a subset of CLASS, reject this ++ alternative: if reload cannot do this, it will still use its choice. */ ++ mode = GET_MODE (x); ++ if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) ++ return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS; + +- /* res = trunc (e2) */ +- res = gen_reg_rtx (mode); +- emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC))); ++ if (IS_STACK_MODE (mode)) ++ return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS; + +- emit_move_insn (op0, res); ++ return regclass; + } + +-/* Handle fentry_name / fentry_section attribute. */ +- +-static tree +-ix86_handle_fentry_name (tree *node, tree name, tree args, +- int, bool *no_add_attrs) ++static reg_class_t ++ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass, ++ machine_mode mode, secondary_reload_info *sri) + { +- if (TREE_CODE (*node) == FUNCTION_DECL +- && TREE_CODE (TREE_VALUE (args)) == STRING_CST) +- /* Do nothing else, just set the attribute. We'll get at +- it later with lookup_attribute. */ +- ; +- else ++ /* Double-word spills from general registers to non-offsettable memory ++ references (zero-extended addresses) require special handling. */ ++ if (TARGET_64BIT ++ && MEM_P (x) ++ && GET_MODE_SIZE (mode) > UNITS_PER_WORD ++ && INTEGER_CLASS_P (rclass) ++ && !offsettable_memref_p (x)) + { +- warning (OPT_Wattributes, "%qE attribute ignored", name); +- *no_add_attrs = true; +- } +- +- return NULL_TREE; +-} +- +- +-/* Table of valid machine attributes. */ +-static const struct attribute_spec ix86_attribute_table[] = +-{ +- /* { name, min_len, max_len, decl_req, type_req, fn_type_req, +- affects_type_identity, handler, exclude } */ +- /* Stdcall attribute says callee is responsible for popping arguments +- if they are not variable. */ +- { "stdcall", 0, 0, false, true, true, true, ix86_handle_cconv_attribute, +- NULL }, +- /* Fastcall attribute says callee is responsible for popping arguments +- if they are not variable. */ +- { "fastcall", 0, 0, false, true, true, true, ix86_handle_cconv_attribute, +- NULL }, +- /* Thiscall attribute says callee is responsible for popping arguments +- if they are not variable. */ +- { "thiscall", 0, 0, false, true, true, true, ix86_handle_cconv_attribute, +- NULL }, +- /* Cdecl attribute says the callee is a normal C declaration */ +- { "cdecl", 0, 0, false, true, true, true, ix86_handle_cconv_attribute, +- NULL }, +- /* Regparm attribute specifies how many integer arguments are to be +- passed in registers. */ +- { "regparm", 1, 1, false, true, true, true, ix86_handle_cconv_attribute, +- NULL }, +- /* Sseregparm attribute says we are using x86_64 calling conventions +- for FP arguments. */ +- { "sseregparm", 0, 0, false, true, true, true, ix86_handle_cconv_attribute, +- NULL }, +- /* The transactional memory builtins are implicitly regparm or fastcall +- depending on the ABI. Override the generic do-nothing attribute that +- these builtins were declared with. */ +- { "*tm regparm", 0, 0, false, true, true, true, +- ix86_handle_tm_regparm_attribute, NULL }, +- /* force_align_arg_pointer says this function realigns the stack at entry. */ +- { (const char *)&ix86_force_align_arg_pointer_string, 0, 0, +- false, true, true, false, ix86_handle_force_align_arg_pointer_attribute, +- NULL }, +-#if TARGET_DLLIMPORT_DECL_ATTRIBUTES +- { "dllimport", 0, 0, false, false, false, false, handle_dll_attribute, +- NULL }, +- { "dllexport", 0, 0, false, false, false, false, handle_dll_attribute, +- NULL }, +- { "shared", 0, 0, true, false, false, false, +- ix86_handle_shared_attribute, NULL }, +-#endif +- { "ms_struct", 0, 0, false, false, false, false, +- ix86_handle_struct_attribute, NULL }, +- { "gcc_struct", 0, 0, false, false, false, false, +- ix86_handle_struct_attribute, NULL }, +-#ifdef SUBTARGET_ATTRIBUTE_TABLE +- SUBTARGET_ATTRIBUTE_TABLE, +-#endif +- /* ms_abi and sysv_abi calling convention function attributes. */ +- { "ms_abi", 0, 0, false, true, true, true, ix86_handle_abi_attribute, NULL }, +- { "sysv_abi", 0, 0, false, true, true, true, ix86_handle_abi_attribute, +- NULL }, +- { "ms_abi va_list", 0, 0, false, false, false, false, NULL, NULL }, +- { "sysv_abi va_list", 0, 0, false, false, false, false, NULL, NULL }, +- { "ms_hook_prologue", 0, 0, true, false, false, false, +- ix86_handle_fndecl_attribute, NULL }, +- { "callee_pop_aggregate_return", 1, 1, false, true, true, true, +- ix86_handle_callee_pop_aggregate_return, NULL }, +- { "interrupt", 0, 0, false, true, true, false, +- ix86_handle_interrupt_attribute, NULL }, +- { "no_caller_saved_registers", 0, 0, false, true, true, false, +- ix86_handle_no_caller_saved_registers_attribute, NULL }, +- { "naked", 0, 0, true, false, false, false, +- ix86_handle_fndecl_attribute, NULL }, +- { "indirect_branch", 1, 1, true, false, false, false, +- ix86_handle_fndecl_attribute, NULL }, +- { "function_return", 1, 1, true, false, false, false, +- ix86_handle_fndecl_attribute, NULL }, +- { "indirect_return", 0, 0, false, true, true, false, +- NULL, NULL }, +- { "fentry_name", 1, 1, true, false, false, false, +- ix86_handle_fentry_name, NULL }, +- { "fentry_section", 1, 1, true, false, false, false, +- ix86_handle_fentry_name, NULL }, +- { "cf_check", 0, 0, true, false, false, false, +- ix86_handle_fndecl_attribute, NULL }, +- +- /* End element. */ +- { NULL, 0, 0, false, false, false, false, NULL, NULL } +-}; ++ sri->icode = (in_p ++ ? CODE_FOR_reload_noff_load ++ : CODE_FOR_reload_noff_store); ++ /* Add the cost of moving address to a temporary. */ ++ sri->extra_cost = 1; + +-/* Implement targetm.vectorize.builtin_vectorization_cost. */ +-static int +-ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, +- tree vectype, int) +-{ +- bool fp = false; +- machine_mode mode = TImode; +- int index; +- if (vectype != NULL) +- { +- fp = FLOAT_TYPE_P (vectype); +- mode = TYPE_MODE (vectype); ++ return NO_REGS; + } + +- switch (type_of_cost) ++ /* QImode spills from non-QI registers require ++ intermediate register on 32bit targets. */ ++ if (mode == QImode ++ && ((!TARGET_64BIT && !in_p ++ && INTEGER_CLASS_P (rclass) ++ && MAYBE_NON_Q_CLASS_P (rclass)) ++ || (!TARGET_AVX512DQ ++ && MAYBE_MASK_CLASS_P (rclass)))) + { +- case scalar_stmt: +- return fp ? ix86_cost->addss : COSTS_N_INSNS (1); ++ int regno = true_regnum (x); + +- case scalar_load: +- /* load/store costs are relative to register move which is 2. Recompute +- it to COSTS_N_INSNS so everything have same base. */ +- return COSTS_N_INSNS (fp ? ix86_cost->sse_load[0] +- : ix86_cost->int_load [2]) / 2; ++ /* Return Q_REGS if the operand is in memory. */ ++ if (regno == -1) ++ return Q_REGS; + +- case scalar_store: +- return COSTS_N_INSNS (fp ? ix86_cost->sse_store[0] +- : ix86_cost->int_store [2]) / 2; ++ return NO_REGS; ++ } + +- case vector_stmt: +- return ix86_vec_cost (mode, +- fp ? ix86_cost->addss : ix86_cost->sse_op); ++ /* This condition handles corner case where an expression involving ++ pointers gets vectorized. We're trying to use the address of a ++ stack slot as a vector initializer. + +- case vector_load: +- index = sse_store_index (mode); +- /* See PR82713 - we may end up being called on non-vector type. */ +- if (index < 0) +- index = 2; +- return COSTS_N_INSNS (ix86_cost->sse_load[index]) / 2; ++ (set (reg:V2DI 74 [ vect_cst_.2 ]) ++ (vec_duplicate:V2DI (reg/f:DI 20 frame))) + +- case vector_store: +- index = sse_store_index (mode); +- /* See PR82713 - we may end up being called on non-vector type. */ +- if (index < 0) +- index = 2; +- return COSTS_N_INSNS (ix86_cost->sse_store[index]) / 2; ++ Eventually frame gets turned into sp+offset like this: + +- case vec_to_scalar: +- case scalar_to_vec: +- return ix86_vec_cost (mode, ix86_cost->sse_op); ++ (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74]) ++ (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp) ++ (const_int 392 [0x188])))) + +- /* We should have separate costs for unaligned loads and gather/scatter. +- Do that incrementally. */ +- case unaligned_load: +- index = sse_store_index (mode); +- /* See PR82713 - we may end up being called on non-vector type. */ +- if (index < 0) +- index = 2; +- return COSTS_N_INSNS (ix86_cost->sse_unaligned_load[index]) / 2; ++ That later gets turned into: + +- case unaligned_store: +- index = sse_store_index (mode); +- /* See PR82713 - we may end up being called on non-vector type. */ +- if (index < 0) +- index = 2; +- return COSTS_N_INSNS (ix86_cost->sse_unaligned_store[index]) / 2; ++ (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74]) ++ (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp) ++ (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64])))) + +- case vector_gather_load: +- return ix86_vec_cost (mode, +- COSTS_N_INSNS +- (ix86_cost->gather_static +- + ix86_cost->gather_per_elt +- * TYPE_VECTOR_SUBPARTS (vectype)) / 2); ++ We'll have the following reload recorded: + +- case vector_scatter_store: +- return ix86_vec_cost (mode, +- COSTS_N_INSNS +- (ix86_cost->scatter_static +- + ix86_cost->scatter_per_elt +- * TYPE_VECTOR_SUBPARTS (vectype)) / 2); ++ Reload 0: reload_in (DI) = ++ (plus:DI (reg/f:DI 7 sp) ++ (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64])) ++ reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74]) ++ SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine ++ reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188])) ++ reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74]) ++ reload_reg_rtx: (reg:V2DI 22 xmm1) + +- case cond_branch_taken: +- return ix86_cost->cond_taken_branch_cost; ++ Which isn't going to work since SSE instructions can't handle scalar ++ additions. Returning GENERAL_REGS forces the addition into integer ++ register and reload can handle subsequent reloads without problems. */ + +- case cond_branch_not_taken: +- return ix86_cost->cond_not_taken_branch_cost; ++ if (in_p && GET_CODE (x) == PLUS ++ && SSE_CLASS_P (rclass) ++ && SCALAR_INT_MODE_P (mode)) ++ return GENERAL_REGS; + +- case vec_perm: +- case vec_promote_demote: +- return ix86_vec_cost (mode, ix86_cost->sse_op); ++ return NO_REGS; ++} + +- case vec_construct: +- { +- /* N element inserts into SSE vectors. */ +- int cost = TYPE_VECTOR_SUBPARTS (vectype) * ix86_cost->sse_op; +- /* One vinserti128 for combining two SSE vectors for AVX256. */ +- if (GET_MODE_BITSIZE (mode) == 256) +- cost += ix86_vec_cost (mode, ix86_cost->addss); +- /* One vinserti64x4 and two vinserti128 for combining SSE +- and AVX256 vectors to AVX512. */ +- else if (GET_MODE_BITSIZE (mode) == 512) +- cost += 3 * ix86_vec_cost (mode, ix86_cost->addss); +- return cost; +- } ++/* Implement TARGET_CLASS_LIKELY_SPILLED_P. */ ++ ++static bool ++ix86_class_likely_spilled_p (reg_class_t rclass) ++{ ++ switch (rclass) ++ { ++ case AREG: ++ case DREG: ++ case CREG: ++ case BREG: ++ case AD_REGS: ++ case SIREG: ++ case DIREG: ++ case SSE_FIRST_REG: ++ case FP_TOP_REG: ++ case FP_SECOND_REG: ++ return true; + + default: +- gcc_unreachable (); ++ break; + } ++ ++ return false; + } + +-/* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel []))) +- insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh +- insn every time. */ ++/* If we are copying between registers from different register sets ++ (e.g. FP and integer), we may need a memory location. ++ ++ The function can't work reliably when one of the CLASSES is a class ++ containing registers from multiple sets. We avoid this by never combining ++ different sets in a single alternative in the machine description. ++ Ensure that this constraint holds to avoid unexpected surprises. + +-static GTY(()) rtx_insn *vselect_insn; ++ When STRICT is false, we are being called from REGISTER_MOVE_COST, ++ so do not enforce these sanity checks. + +-/* Initialize vselect_insn. */ ++ To optimize register_move_cost performance, define inline variant. */ + +-static void +-init_vselect_insn (void) ++static inline bool ++inline_secondary_memory_needed (machine_mode mode, reg_class_t class1, ++ reg_class_t class2, int strict) + { +- unsigned i; +- rtx x; ++ if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS)) ++ return false; + +- x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN)); +- for (i = 0; i < MAX_VECT_LEN; ++i) +- XVECEXP (x, 0, i) = const0_rtx; +- x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx, +- const0_rtx), x); +- x = gen_rtx_SET (const0_rtx, x); +- start_sequence (); +- vselect_insn = emit_insn (x); +- end_sequence (); +-} ++ if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1) ++ || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2) ++ || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1) ++ || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2) ++ || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1) ++ || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2) ++ || MAYBE_MASK_CLASS_P (class1) != MASK_CLASS_P (class1) ++ || MAYBE_MASK_CLASS_P (class2) != MASK_CLASS_P (class2)) ++ { ++ gcc_assert (!strict || lra_in_progress); ++ return true; ++ } + +-/* Construct (set target (vec_select op0 (parallel perm))) and +- return true if that's a valid instruction in the active ISA. */ ++ if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)) ++ return true; + +-static bool +-expand_vselect (rtx target, rtx op0, const unsigned char *perm, +- unsigned nelt, bool testing_p) +-{ +- unsigned int i; +- rtx x, save_vconcat; +- int icode; ++ /* Between mask and general, we have moves no larger than word size. */ ++ if ((MASK_CLASS_P (class1) != MASK_CLASS_P (class2)) ++ && (GET_MODE_SIZE (mode) > UNITS_PER_WORD)) ++ return true; + +- if (vselect_insn == NULL_RTX) +- init_vselect_insn (); ++ /* ??? This is a lie. We do have moves between mmx/general, and for ++ mmx/sse2. But by saying we need secondary memory we discourage the ++ register allocator from using the mmx registers unless needed. */ ++ if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)) ++ return true; + +- x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1); +- PUT_NUM_ELEM (XVEC (x, 0), nelt); +- for (i = 0; i < nelt; ++i) +- XVECEXP (x, 0, i) = GEN_INT (perm[i]); +- save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0); +- XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0; +- PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target)); +- SET_DEST (PATTERN (vselect_insn)) = target; +- icode = recog_memoized (vselect_insn); ++ if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)) ++ { ++ /* SSE1 doesn't have any direct moves from other classes. */ ++ if (!TARGET_SSE2) ++ return true; + +- if (icode >= 0 && !testing_p) +- emit_insn (copy_rtx (PATTERN (vselect_insn))); ++ /* If the target says that inter-unit moves are more expensive ++ than moving through memory, then don't generate them. */ ++ if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC) ++ || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC)) ++ return true; + +- SET_DEST (PATTERN (vselect_insn)) = const0_rtx; +- XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat; +- INSN_CODE (vselect_insn) = -1; ++ /* Between SSE and general, we have moves no larger than word size. */ ++ if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) ++ return true; ++ } + +- return icode >= 0; ++ return false; + } + +-/* Similar, but generate a vec_concat from op0 and op1 as well. */ ++/* Implement TARGET_SECONDARY_MEMORY_NEEDED. */ + + static bool +-expand_vselect_vconcat (rtx target, rtx op0, rtx op1, +- const unsigned char *perm, unsigned nelt, +- bool testing_p) ++ix86_secondary_memory_needed (machine_mode mode, reg_class_t class1, ++ reg_class_t class2) + { +- machine_mode v2mode; +- rtx x; +- bool ok; +- +- if (vselect_insn == NULL_RTX) +- init_vselect_insn (); ++ return inline_secondary_memory_needed (mode, class1, class2, true); ++} + +- if (!GET_MODE_2XWIDER_MODE (GET_MODE (op0)).exists (&v2mode)) +- return false; +- x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0); +- PUT_MODE (x, v2mode); +- XEXP (x, 0) = op0; +- XEXP (x, 1) = op1; +- ok = expand_vselect (target, x, perm, nelt, testing_p); +- XEXP (x, 0) = const0_rtx; +- XEXP (x, 1) = const0_rtx; +- return ok; +-} +- +-/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D +- using movss or movsd. */ +-static bool +-expand_vec_perm_movs (struct expand_vec_perm_d *d) +-{ +- machine_mode vmode = d->vmode; +- unsigned i, nelt = d->nelt; +- rtx x; ++/* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE. + +- if (d->one_operand_p) +- return false; ++ get_secondary_mem widens integral modes to BITS_PER_WORD. ++ There is no need to emit full 64 bit move on 64 bit targets ++ for integral modes that can be moved using 32 bit move. */ + +- if (!(TARGET_SSE && vmode == V4SFmode) +- && !(TARGET_SSE2 && vmode == V2DFmode)) +- return false; ++static machine_mode ++ix86_secondary_memory_needed_mode (machine_mode mode) ++{ ++ if (GET_MODE_BITSIZE (mode) < 32 && INTEGRAL_MODE_P (mode)) ++ return mode_for_size (32, GET_MODE_CLASS (mode), 0).require (); ++ return mode; ++} + +- /* Only the first element is changed. */ +- if (d->perm[0] != nelt && d->perm[0] != 0) +- return false; +- for (i = 1; i < nelt; ++i) +- if (d->perm[i] != i + nelt - d->perm[0]) +- return false; ++/* Implement the TARGET_CLASS_MAX_NREGS hook. + +- if (d->testing_p) +- return true; ++ On the 80386, this is the size of MODE in words, ++ except in the FP regs, where a single reg is always enough. */ + +- if (d->perm[0] == nelt) +- x = gen_rtx_VEC_MERGE (vmode, d->op1, d->op0, GEN_INT (1)); ++static unsigned char ++ix86_class_max_nregs (reg_class_t rclass, machine_mode mode) ++{ ++ if (MAYBE_INTEGER_CLASS_P (rclass)) ++ { ++ if (mode == XFmode) ++ return (TARGET_64BIT ? 2 : 3); ++ else if (mode == XCmode) ++ return (TARGET_64BIT ? 4 : 6); ++ else ++ return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD); ++ } + else +- x = gen_rtx_VEC_MERGE (vmode, d->op0, d->op1, GEN_INT (1)); +- +- emit_insn (gen_rtx_SET (d->target, x)); +- +- return true; ++ { ++ if (COMPLEX_MODE_P (mode)) ++ return 2; ++ else ++ return 1; ++ } + } + +-/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D +- in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */ ++/* Implement TARGET_CAN_CHANGE_MODE_CLASS. */ + + static bool +-expand_vec_perm_blend (struct expand_vec_perm_d *d) ++ix86_can_change_mode_class (machine_mode from, machine_mode to, ++ reg_class_t regclass) + { +- machine_mode mmode, vmode = d->vmode; +- unsigned i, nelt = d->nelt; +- unsigned HOST_WIDE_INT mask; +- rtx target, op0, op1, maskop, x; +- rtx rperm[32], vperm; ++ if (from == to) ++ return true; + +- if (d->one_operand_p) +- return false; +- if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64 +- && (TARGET_AVX512BW +- || GET_MODE_UNIT_SIZE (vmode) >= 4)) +- ; +- else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32) +- ; +- else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode)) +- ; +- else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16) +- ; +- else ++ /* x87 registers can't do subreg at all, as all values are reformatted ++ to extended precision. */ ++ if (MAYBE_FLOAT_CLASS_P (regclass)) + return false; + +- /* This is a blend, not a permute. Elements must stay in their +- respective lanes. */ +- for (i = 0; i < nelt; ++i) ++ if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass)) + { +- unsigned e = d->perm[i]; +- if (!(e == i || e == i + nelt)) ++ /* Vector registers do not support QI or HImode loads. If we don't ++ disallow a change to these modes, reload will assume it's ok to ++ drop the subreg from (subreg:SI (reg:HI 100) 0). This affects ++ the vec_dupv4hi pattern. */ ++ if (GET_MODE_SIZE (from) < 4) + return false; + } + +- if (d->testing_p) +- return true; +- +- /* ??? Without SSE4.1, we could implement this with and/andn/or. This +- decision should be extracted elsewhere, so that we only try that +- sequence once all budget==3 options have been tried. */ +- target = d->target; +- op0 = d->op0; +- op1 = d->op1; +- mask = 0; +- +- switch (vmode) +- { +- case E_V8DFmode: +- case E_V16SFmode: +- case E_V4DFmode: +- case E_V8SFmode: +- case E_V2DFmode: +- case E_V4SFmode: +- case E_V8HImode: +- case E_V8SImode: +- case E_V32HImode: +- case E_V64QImode: +- case E_V16SImode: +- case E_V8DImode: +- for (i = 0; i < nelt; ++i) +- mask |= ((unsigned HOST_WIDE_INT) (d->perm[i] >= nelt)) << i; +- break; +- +- case E_V2DImode: +- for (i = 0; i < 2; ++i) +- mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4); +- vmode = V8HImode; +- goto do_subreg; +- +- case E_V4SImode: +- for (i = 0; i < 4; ++i) +- mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2); +- vmode = V8HImode; +- goto do_subreg; ++ return true; ++} + +- case E_V16QImode: +- /* See if bytes move in pairs so we can use pblendw with +- an immediate argument, rather than pblendvb with a vector +- argument. */ +- for (i = 0; i < 16; i += 2) +- if (d->perm[i] + 1 != d->perm[i + 1]) +- { +- use_pblendvb: +- for (i = 0; i < nelt; ++i) +- rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx); ++/* Return index of MODE in the sse load/store tables. */ + +- finish_pblendvb: +- vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm)); +- vperm = force_reg (vmode, vperm); ++static inline int ++sse_store_index (machine_mode mode) ++{ ++ switch (GET_MODE_SIZE (mode)) ++ { ++ case 4: ++ return 0; ++ case 8: ++ return 1; ++ case 16: ++ return 2; ++ case 32: ++ return 3; ++ case 64: ++ return 4; ++ default: ++ return -1; ++ } ++} + +- if (GET_MODE_SIZE (vmode) == 16) +- emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm)); +- else +- emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm)); +- if (target != d->target) +- emit_move_insn (d->target, gen_lowpart (d->vmode, target)); +- return true; +- } ++/* Return the cost of moving data of mode M between a ++ register and memory. A value of 2 is the default; this cost is ++ relative to those in `REGISTER_MOVE_COST'. + +- for (i = 0; i < 8; ++i) +- mask |= (d->perm[i * 2] >= 16) << i; +- vmode = V8HImode; +- /* FALLTHRU */ ++ This function is used extensively by register_move_cost that is used to ++ build tables at startup. Make it inline in this case. ++ When IN is 2, return maximum of in and out move cost. + +- do_subreg: +- target = gen_reg_rtx (vmode); +- op0 = gen_lowpart (vmode, op0); +- op1 = gen_lowpart (vmode, op1); +- break; ++ If moving between registers and memory is more expensive than ++ between two registers, you should define this macro to express the ++ relative cost. + +- case E_V32QImode: +- /* See if bytes move in pairs. If not, vpblendvb must be used. */ +- for (i = 0; i < 32; i += 2) +- if (d->perm[i] + 1 != d->perm[i + 1]) +- goto use_pblendvb; +- /* See if bytes move in quadruplets. If yes, vpblendd +- with immediate can be used. */ +- for (i = 0; i < 32; i += 4) +- if (d->perm[i] + 2 != d->perm[i + 2]) +- break; +- if (i < 32) ++ Model also increased moving costs of QImode registers in non ++ Q_REGS classes. ++ */ ++static inline int ++inline_memory_move_cost (machine_mode mode, enum reg_class regclass, int in) ++{ ++ int cost; ++ if (FLOAT_CLASS_P (regclass)) ++ { ++ int index; ++ switch (mode) + { +- /* See if bytes move the same in both lanes. If yes, +- vpblendw with immediate can be used. */ +- for (i = 0; i < 16; i += 2) +- if (d->perm[i] + 16 != d->perm[i + 16]) +- goto use_pblendvb; +- +- /* Use vpblendw. */ +- for (i = 0; i < 16; ++i) +- mask |= (d->perm[i * 2] >= 32) << i; +- vmode = V16HImode; +- goto do_subreg; ++ case E_SFmode: ++ index = 0; ++ break; ++ case E_DFmode: ++ index = 1; ++ break; ++ case E_XFmode: ++ index = 2; ++ break; ++ default: ++ return 100; + } +- +- /* Use vpblendd. */ +- for (i = 0; i < 8; ++i) +- mask |= (d->perm[i * 4] >= 32) << i; +- vmode = V8SImode; +- goto do_subreg; +- +- case E_V16HImode: +- /* See if words move in pairs. If yes, vpblendd can be used. */ +- for (i = 0; i < 16; i += 2) +- if (d->perm[i] + 1 != d->perm[i + 1]) +- break; +- if (i < 16) ++ if (in == 2) ++ return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]); ++ return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index]; ++ } ++ if (SSE_CLASS_P (regclass)) ++ { ++ int index = sse_store_index (mode); ++ if (index == -1) ++ return 100; ++ if (in == 2) ++ return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]); ++ return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index]; ++ } ++ if (MMX_CLASS_P (regclass)) ++ { ++ int index; ++ switch (GET_MODE_SIZE (mode)) + { +- /* See if words move the same in both lanes. If not, +- vpblendvb must be used. */ +- for (i = 0; i < 8; i++) +- if (d->perm[i] + 8 != d->perm[i + 8]) +- { +- /* Use vpblendvb. */ +- for (i = 0; i < 32; ++i) +- rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx); +- +- vmode = V32QImode; +- nelt = 32; +- target = gen_reg_rtx (vmode); +- op0 = gen_lowpart (vmode, op0); +- op1 = gen_lowpart (vmode, op1); +- goto finish_pblendvb; +- } +- +- /* Use vpblendw. */ +- for (i = 0; i < 16; ++i) +- mask |= (d->perm[i] >= 16) << i; +- break; ++ case 4: ++ index = 0; ++ break; ++ case 8: ++ index = 1; ++ break; ++ default: ++ return 100; + } +- +- /* Use vpblendd. */ +- for (i = 0; i < 8; ++i) +- mask |= (d->perm[i * 2] >= 16) << i; +- vmode = V8SImode; +- goto do_subreg; +- +- case E_V4DImode: +- /* Use vpblendd. */ +- for (i = 0; i < 4; ++i) +- mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2); +- vmode = V8SImode; +- goto do_subreg; +- +- default: +- gcc_unreachable (); ++ if (in == 2) ++ return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]); ++ return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index]; + } +- +- switch (vmode) ++ switch (GET_MODE_SIZE (mode)) + { +- case E_V8DFmode: +- case E_V8DImode: +- mmode = QImode; +- break; +- case E_V16SFmode: +- case E_V16SImode: +- mmode = HImode; +- break; +- case E_V32HImode: +- mmode = SImode; +- break; +- case E_V64QImode: +- mmode = DImode; +- break; +- default: +- mmode = VOIDmode; ++ case 1: ++ if (Q_CLASS_P (regclass) || TARGET_64BIT) ++ { ++ if (!in) ++ return ix86_cost->int_store[0]; ++ if (TARGET_PARTIAL_REG_DEPENDENCY ++ && optimize_function_for_speed_p (cfun)) ++ cost = ix86_cost->movzbl_load; ++ else ++ cost = ix86_cost->int_load[0]; ++ if (in == 2) ++ return MAX (cost, ix86_cost->int_store[0]); ++ return cost; ++ } ++ else ++ { ++ if (in == 2) ++ return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4); ++ if (in) ++ return ix86_cost->movzbl_load; ++ else ++ return ix86_cost->int_store[0] + 4; ++ } ++ break; ++ case 2: ++ if (in == 2) ++ return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]); ++ return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1]; ++ default: ++ if (in == 2) ++ cost = MAX (ix86_cost->int_load[2], ix86_cost->int_store[2]); ++ else if (in) ++ cost = ix86_cost->int_load[2]; ++ else ++ cost = ix86_cost->int_store[2]; ++ /* Multiply with the number of GPR moves needed. */ ++ return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD); + } ++} + +- if (mmode != VOIDmode) +- maskop = force_reg (mmode, gen_int_mode (mask, mmode)); +- else +- maskop = GEN_INT (mask); +- +- /* This matches five different patterns with the different modes. */ +- x = gen_rtx_VEC_MERGE (vmode, op1, op0, maskop); +- x = gen_rtx_SET (target, x); +- emit_insn (x); +- if (target != d->target) +- emit_move_insn (d->target, gen_lowpart (d->vmode, target)); +- +- return true; ++static int ++ix86_memory_move_cost (machine_mode mode, reg_class_t regclass, bool in) ++{ ++ return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0); + } + +-/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D +- in terms of the variable form of vpermilps. + +- Note that we will have already failed the immediate input vpermilps, +- which requires that the high and low part shuffle be identical; the +- variable form doesn't require that. */ ++/* Return the cost of moving data from a register in class CLASS1 to ++ one in class CLASS2. + +-static bool +-expand_vec_perm_vpermil (struct expand_vec_perm_d *d) ++ It is not required that the cost always equal 2 when FROM is the same as TO; ++ on some machines it is expensive to move between registers if they are not ++ general registers. */ ++ ++static int ++ix86_register_move_cost (machine_mode mode, reg_class_t class1_i, ++ reg_class_t class2_i) + { +- rtx rperm[8], vperm; +- unsigned i; ++ enum reg_class class1 = (enum reg_class) class1_i; ++ enum reg_class class2 = (enum reg_class) class2_i; + +- if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p) +- return false; ++ /* In case we require secondary memory, compute cost of the store followed ++ by load. In order to avoid bad register allocation choices, we need ++ for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */ + +- /* We can only permute within the 128-bit lane. */ +- for (i = 0; i < 8; ++i) ++ if (inline_secondary_memory_needed (mode, class1, class2, false)) + { +- unsigned e = d->perm[i]; +- if (i < 4 ? e >= 4 : e < 4) +- return false; +- } ++ int cost = 1; + +- if (d->testing_p) +- return true; ++ cost += inline_memory_move_cost (mode, class1, 2); ++ cost += inline_memory_move_cost (mode, class2, 2); + +- for (i = 0; i < 8; ++i) +- { +- unsigned e = d->perm[i]; ++ /* In case of copying from general_purpose_register we may emit multiple ++ stores followed by single load causing memory size mismatch stall. ++ Count this as arbitrarily high cost of 20. */ ++ if (GET_MODE_BITSIZE (mode) > BITS_PER_WORD ++ && TARGET_MEMORY_MISMATCH_STALL ++ && targetm.class_max_nregs (class1, mode) ++ > targetm.class_max_nregs (class2, mode)) ++ cost += 20; + +- /* Within each 128-bit lane, the elements of op0 are numbered +- from 0 and the elements of op1 are numbered from 4. */ +- if (e >= 8 + 4) +- e -= 8; +- else if (e >= 4) +- e -= 4; ++ /* In the case of FP/MMX moves, the registers actually overlap, and we ++ have to switch modes in order to treat them differently. */ ++ if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2)) ++ || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1))) ++ cost += 20; + +- rperm[i] = GEN_INT (e); ++ return cost; + } + +- vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm)); +- vperm = force_reg (V8SImode, vperm); +- emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm)); +- +- return true; +-} +- +-/* Return true if permutation D can be performed as VMODE permutation +- instead. */ ++ /* Moves between SSE/MMX and integer unit are expensive. */ ++ if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2) ++ || SSE_CLASS_P (class1) != SSE_CLASS_P (class2)) + +-static bool +-valid_perm_using_mode_p (machine_mode vmode, struct expand_vec_perm_d *d) +-{ +- unsigned int i, j, chunk; ++ /* ??? By keeping returned value relatively high, we limit the number ++ of moves between integer and MMX/SSE registers for all targets. ++ Additionally, high value prevents problem with x86_modes_tieable_p(), ++ where integer modes in MMX/SSE registers are not tieable ++ because of missing QImode and HImode moves to, from or between ++ MMX/SSE registers. */ ++ return MAX (8, MMX_CLASS_P (class1) || MMX_CLASS_P (class2) ++ ? ix86_cost->mmxsse_to_integer : ix86_cost->ssemmx_to_integer); + +- if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT +- || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT +- || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode)) +- return false; ++ if (MAYBE_FLOAT_CLASS_P (class1)) ++ return ix86_cost->fp_move; ++ if (MAYBE_SSE_CLASS_P (class1)) ++ { ++ if (GET_MODE_BITSIZE (mode) <= 128) ++ return ix86_cost->xmm_move; ++ if (GET_MODE_BITSIZE (mode) <= 256) ++ return ix86_cost->ymm_move; ++ return ix86_cost->zmm_move; ++ } ++ if (MAYBE_MMX_CLASS_P (class1)) ++ return ix86_cost->mmx_move; ++ return 2; ++} + +- if (GET_MODE_NUNITS (vmode) >= d->nelt) +- return true; ++/* Implement TARGET_HARD_REGNO_NREGS. This is ordinarily the length in ++ words of a value of mode MODE but can be less for certain modes in ++ special long registers. + +- chunk = d->nelt / GET_MODE_NUNITS (vmode); +- for (i = 0; i < d->nelt; i += chunk) +- if (d->perm[i] & (chunk - 1)) +- return false; +- else +- for (j = 1; j < chunk; ++j) +- if (d->perm[i] + j != d->perm[i + j]) +- return false; ++ Actually there are no two word move instructions for consecutive ++ registers. And only registers 0-3 may have mov byte instructions ++ applied to them. */ + +- return true; ++static unsigned int ++ix86_hard_regno_nregs (unsigned int regno, machine_mode mode) ++{ ++ if (GENERAL_REGNO_P (regno)) ++ { ++ if (mode == XFmode) ++ return TARGET_64BIT ? 2 : 3; ++ if (mode == XCmode) ++ return TARGET_64BIT ? 4 : 6; ++ return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD); ++ } ++ if (COMPLEX_MODE_P (mode)) ++ return 2; ++ if (mode == V64SFmode || mode == V64SImode) ++ return 4; ++ return 1; + } + +-/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D +- in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */ ++/* Implement TARGET_HARD_REGNO_MODE_OK. */ + + static bool +-expand_vec_perm_pshufb (struct expand_vec_perm_d *d) ++ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode) + { +- unsigned i, nelt, eltsz, mask; +- unsigned char perm[64]; +- machine_mode vmode = V16QImode; +- rtx rperm[64], vperm, target, op0, op1; +- +- nelt = d->nelt; +- +- if (!d->one_operand_p) +- { +- if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16) +- { +- if (TARGET_AVX2 +- && valid_perm_using_mode_p (V2TImode, d)) +- { +- if (d->testing_p) +- return true; +- +- /* Use vperm2i128 insn. The pattern uses +- V4DImode instead of V2TImode. */ +- target = d->target; +- if (d->vmode != V4DImode) +- target = gen_reg_rtx (V4DImode); +- op0 = gen_lowpart (V4DImode, d->op0); +- op1 = gen_lowpart (V4DImode, d->op1); +- rperm[0] +- = GEN_INT ((d->perm[0] / (nelt / 2)) +- | ((d->perm[nelt / 2] / (nelt / 2)) * 16)); +- emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0])); +- if (target != d->target) +- emit_move_insn (d->target, gen_lowpart (d->vmode, target)); +- return true; +- } +- return false; +- } +- } +- else ++ /* Flags and only flags can only hold CCmode values. */ ++ if (CC_REGNO_P (regno)) ++ return GET_MODE_CLASS (mode) == MODE_CC; ++ if (GET_MODE_CLASS (mode) == MODE_CC ++ || GET_MODE_CLASS (mode) == MODE_RANDOM ++ || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT) ++ return false; ++ if (STACK_REGNO_P (regno)) ++ return VALID_FP_MODE_P (mode); ++ if (MASK_REGNO_P (regno)) ++ return (VALID_MASK_REG_MODE (mode) ++ || (TARGET_AVX512BW ++ && VALID_MASK_AVX512BW_MODE (mode))); ++ if (SSE_REGNO_P (regno)) + { +- if (GET_MODE_SIZE (d->vmode) == 16) +- { +- if (!TARGET_SSSE3) +- return false; +- } +- else if (GET_MODE_SIZE (d->vmode) == 32) +- { +- if (!TARGET_AVX2) +- return false; +- +- /* V4DImode should be already handled through +- expand_vselect by vpermq instruction. */ +- gcc_assert (d->vmode != V4DImode); +- +- vmode = V32QImode; +- if (d->vmode == V8SImode +- || d->vmode == V16HImode +- || d->vmode == V32QImode) +- { +- /* First see if vpermq can be used for +- V8SImode/V16HImode/V32QImode. */ +- if (valid_perm_using_mode_p (V4DImode, d)) +- { +- for (i = 0; i < 4; i++) +- perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3; +- if (d->testing_p) +- return true; +- target = gen_reg_rtx (V4DImode); +- if (expand_vselect (target, gen_lowpart (V4DImode, d->op0), +- perm, 4, false)) +- { +- emit_move_insn (d->target, +- gen_lowpart (d->vmode, target)); +- return true; +- } +- return false; +- } +- +- /* Next see if vpermd can be used. */ +- if (valid_perm_using_mode_p (V8SImode, d)) +- vmode = V8SImode; +- } +- /* Or if vpermps can be used. */ +- else if (d->vmode == V8SFmode) +- vmode = V8SImode; ++ /* We implement the move patterns for all vector modes into and ++ out of SSE registers, even when no operation instructions ++ are available. */ + +- if (vmode == V32QImode) +- { +- /* vpshufb only works intra lanes, it is not +- possible to shuffle bytes in between the lanes. */ +- for (i = 0; i < nelt; ++i) +- if ((d->perm[i] ^ i) & (nelt / 2)) +- return false; +- } +- } +- else if (GET_MODE_SIZE (d->vmode) == 64) +- { +- if (!TARGET_AVX512BW) +- return false; ++ /* For AVX-512 we allow, regardless of regno: ++ - XI mode ++ - any of 512-bit wide vector mode ++ - any scalar mode. */ ++ if (TARGET_AVX512F ++ && (mode == XImode ++ || VALID_AVX512F_REG_MODE (mode) ++ || VALID_AVX512F_SCALAR_MODE (mode))) ++ return true; + +- /* If vpermq didn't work, vpshufb won't work either. */ +- if (d->vmode == V8DFmode || d->vmode == V8DImode) +- return false; ++ /* For AVX-5124FMAPS or AVX-5124VNNIW ++ allow V64SF and V64SI modes for special regnos. */ ++ if ((TARGET_AVX5124FMAPS || TARGET_AVX5124VNNIW) ++ && (mode == V64SFmode || mode == V64SImode) ++ && MOD4_SSE_REGNO_P (regno)) ++ return true; + +- vmode = V64QImode; +- if (d->vmode == V16SImode +- || d->vmode == V32HImode +- || d->vmode == V64QImode) +- { +- /* First see if vpermq can be used for +- V16SImode/V32HImode/V64QImode. */ +- if (valid_perm_using_mode_p (V8DImode, d)) +- { +- for (i = 0; i < 8; i++) +- perm[i] = (d->perm[i * nelt / 8] * 8 / nelt) & 7; +- if (d->testing_p) +- return true; +- target = gen_reg_rtx (V8DImode); +- if (expand_vselect (target, gen_lowpart (V8DImode, d->op0), +- perm, 8, false)) +- { +- emit_move_insn (d->target, +- gen_lowpart (d->vmode, target)); +- return true; +- } +- return false; +- } ++ /* TODO check for QI/HI scalars. */ ++ /* AVX512VL allows sse regs16+ for 128/256 bit modes. */ ++ if (TARGET_AVX512VL ++ && (mode == OImode ++ || mode == TImode ++ || VALID_AVX256_REG_MODE (mode) ++ || VALID_AVX512VL_128_REG_MODE (mode))) ++ return true; + +- /* Next see if vpermd can be used. */ +- if (valid_perm_using_mode_p (V16SImode, d)) +- vmode = V16SImode; +- } +- /* Or if vpermps can be used. */ +- else if (d->vmode == V16SFmode) +- vmode = V16SImode; +- if (vmode == V64QImode) +- { +- /* vpshufb only works intra lanes, it is not +- possible to shuffle bytes in between the lanes. */ +- for (i = 0; i < nelt; ++i) +- if ((d->perm[i] ^ i) & (nelt / 4)) +- return false; +- } +- } +- else ++ /* xmm16-xmm31 are only available for AVX-512. */ ++ if (EXT_REX_SSE_REGNO_P (regno)) + return false; +- } +- +- if (d->testing_p) +- return true; + +- if (vmode == V8SImode) +- for (i = 0; i < 8; ++i) +- rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7); +- else if (vmode == V16SImode) +- for (i = 0; i < 16; ++i) +- rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15); +- else ++ /* OImode and AVX modes are available only when AVX is enabled. */ ++ return ((TARGET_AVX ++ && VALID_AVX256_REG_OR_OI_MODE (mode)) ++ || VALID_SSE_REG_MODE (mode) ++ || VALID_SSE2_REG_MODE (mode) ++ || VALID_MMX_REG_MODE (mode) ++ || VALID_MMX_REG_MODE_3DNOW (mode)); ++ } ++ if (MMX_REGNO_P (regno)) + { +- eltsz = GET_MODE_UNIT_SIZE (d->vmode); +- if (!d->one_operand_p) +- mask = 2 * nelt - 1; +- else if (vmode == V16QImode) +- mask = nelt - 1; +- else if (vmode == V64QImode) +- mask = nelt / 4 - 1; +- else +- mask = nelt / 2 - 1; +- +- for (i = 0; i < nelt; ++i) +- { +- unsigned j, e = d->perm[i] & mask; +- for (j = 0; j < eltsz; ++j) +- rperm[i * eltsz + j] = GEN_INT (e * eltsz + j); +- } +- } +- +- vperm = gen_rtx_CONST_VECTOR (vmode, +- gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm)); +- vperm = force_reg (vmode, vperm); +- +- target = d->target; +- if (d->vmode != vmode) +- target = gen_reg_rtx (vmode); +- op0 = gen_lowpart (vmode, d->op0); +- if (d->one_operand_p) +- { +- if (vmode == V16QImode) +- emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm)); +- else if (vmode == V32QImode) +- emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm)); +- else if (vmode == V64QImode) +- emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm)); +- else if (vmode == V8SFmode) +- emit_insn (gen_avx2_permvarv8sf (target, op0, vperm)); +- else if (vmode == V8SImode) +- emit_insn (gen_avx2_permvarv8si (target, op0, vperm)); +- else if (vmode == V16SFmode) +- emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm)); +- else if (vmode == V16SImode) +- emit_insn (gen_avx512f_permvarv16si (target, op0, vperm)); +- else +- gcc_unreachable (); ++ /* We implement the move patterns for 3DNOW modes even in MMX mode, ++ so if the register is available at all, then we can move data of ++ the given mode into or out of it. */ ++ return (VALID_MMX_REG_MODE (mode) ++ || VALID_MMX_REG_MODE_3DNOW (mode)); + } +- else ++ ++ if (mode == QImode) + { +- op1 = gen_lowpart (vmode, d->op1); +- emit_insn (gen_xop_pperm (target, op0, op1, vperm)); ++ /* Take care for QImode values - they can be in non-QI regs, ++ but then they do cause partial register stalls. */ ++ if (ANY_QI_REGNO_P (regno)) ++ return true; ++ if (!TARGET_PARTIAL_REG_STALL) ++ return true; ++ /* LRA checks if the hard register is OK for the given mode. ++ QImode values can live in non-QI regs, so we allow all ++ registers here. */ ++ if (lra_in_progress) ++ return true; ++ return !can_create_pseudo_p (); + } +- if (target != d->target) +- emit_move_insn (d->target, gen_lowpart (d->vmode, target)); ++ /* We handle both integer and floats in the general purpose registers. */ ++ else if (VALID_INT_MODE_P (mode)) ++ return true; ++ else if (VALID_FP_MODE_P (mode)) ++ return true; ++ else if (VALID_DFP_MODE_P (mode)) ++ return true; ++ /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go ++ on to use that value in smaller contexts, this can easily force a ++ pseudo to be allocated to GENERAL_REGS. Since this is no worse than ++ supporting DImode, allow it. */ ++ else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode)) ++ return true; + +- return true; ++ return false; + } + +-/* For V*[QHS]Imode permutations, check if the same permutation +- can't be performed in a 2x, 4x or 8x wider inner mode. */ ++/* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The only ABI that ++ saves SSE registers across calls is Win64 (thus no need to check the ++ current ABI here), and with AVX enabled Win64 only guarantees that ++ the low 16 bytes are saved. */ + + static bool +-canonicalize_vector_int_perm (const struct expand_vec_perm_d *d, +- struct expand_vec_perm_d *nd) ++ix86_hard_regno_call_part_clobbered (unsigned int, unsigned int regno, ++ machine_mode mode) + { +- int i; +- machine_mode mode = VOIDmode; +- +- switch (d->vmode) +- { +- case E_V16QImode: mode = V8HImode; break; +- case E_V32QImode: mode = V16HImode; break; +- case E_V64QImode: mode = V32HImode; break; +- case E_V8HImode: mode = V4SImode; break; +- case E_V16HImode: mode = V8SImode; break; +- case E_V32HImode: mode = V16SImode; break; +- case E_V4SImode: mode = V2DImode; break; +- case E_V8SImode: mode = V4DImode; break; +- case E_V16SImode: mode = V8DImode; break; +- default: return false; +- } +- for (i = 0; i < d->nelt; i += 2) +- if ((d->perm[i] & 1) || d->perm[i + 1] != d->perm[i] + 1) +- return false; +- nd->vmode = mode; +- nd->nelt = d->nelt / 2; +- for (i = 0; i < nd->nelt; i++) +- nd->perm[i] = d->perm[2 * i] / 2; +- if (GET_MODE_INNER (mode) != DImode) +- canonicalize_vector_int_perm (nd, nd); +- if (nd != d) +- { +- nd->one_operand_p = d->one_operand_p; +- nd->testing_p = d->testing_p; +- if (d->op0 == d->op1) +- nd->op0 = nd->op1 = gen_lowpart (nd->vmode, d->op0); +- else +- { +- nd->op0 = gen_lowpart (nd->vmode, d->op0); +- nd->op1 = gen_lowpart (nd->vmode, d->op1); +- } +- if (d->testing_p) +- nd->target = gen_raw_REG (nd->vmode, LAST_VIRTUAL_REGISTER + 1); +- else +- nd->target = gen_reg_rtx (nd->vmode); +- } +- return true; ++ return SSE_REGNO_P (regno) && GET_MODE_SIZE (mode) > 16; + } + +-/* Try to expand one-operand permutation with constant mask. */ ++/* A subroutine of ix86_modes_tieable_p. Return true if MODE is a ++ tieable integer mode. */ + + static bool +-ix86_expand_vec_one_operand_perm_avx512 (struct expand_vec_perm_d *d) ++ix86_tieable_integer_mode_p (machine_mode mode) + { +- machine_mode mode = GET_MODE (d->op0); +- machine_mode maskmode = mode; +- rtx (*gen) (rtx, rtx, rtx) = NULL; +- rtx target, op0, mask; +- rtx vec[64]; ++ switch (mode) ++ { ++ case E_HImode: ++ case E_SImode: ++ return true; + +- if (!rtx_equal_p (d->op0, d->op1)) +- return false; ++ case E_QImode: ++ return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL; + +- if (!TARGET_AVX512F) +- return false; ++ case E_DImode: ++ return TARGET_64BIT; + +- switch (mode) +- { +- case E_V16SImode: +- gen = gen_avx512f_permvarv16si; +- break; +- case E_V16SFmode: +- gen = gen_avx512f_permvarv16sf; +- maskmode = V16SImode; +- break; +- case E_V8DImode: +- gen = gen_avx512f_permvarv8di; +- break; +- case E_V8DFmode: +- gen = gen_avx512f_permvarv8df; +- maskmode = V8DImode; +- break; + default: + return false; + } +- +- target = d->target; +- op0 = d->op0; +- for (int i = 0; i < d->nelt; ++i) +- vec[i] = GEN_INT (d->perm[i]); +- mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec)); +- emit_insn (gen (target, op0, force_reg (maskmode, mask))); +- return true; + } + +-/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D +- in a single instruction. */ ++/* Implement TARGET_MODES_TIEABLE_P. ++ ++ Return true if MODE1 is accessible in a register that can hold MODE2 ++ without copying. That is, all register classes that can hold MODE2 ++ can also hold MODE1. */ + + static bool +-expand_vec_perm_1 (struct expand_vec_perm_d *d) ++ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2) + { +- unsigned i, nelt = d->nelt; +- struct expand_vec_perm_d nd; +- +- /* Check plain VEC_SELECT first, because AVX has instructions that could +- match both SEL and SEL+CONCAT, but the plain SEL will allow a memory +- input where SEL+CONCAT may not. */ +- if (d->one_operand_p) +- { +- int mask = nelt - 1; +- bool identity_perm = true; +- bool broadcast_perm = true; +- +- for (i = 0; i < nelt; i++) +- { +- nd.perm[i] = d->perm[i] & mask; +- if (nd.perm[i] != i) +- identity_perm = false; +- if (nd.perm[i]) +- broadcast_perm = false; +- } ++ if (mode1 == mode2) ++ return true; + +- if (identity_perm) +- { +- if (!d->testing_p) +- emit_move_insn (d->target, d->op0); +- return true; +- } +- else if (broadcast_perm && TARGET_AVX2) +- { +- /* Use vpbroadcast{b,w,d}. */ +- rtx (*gen) (rtx, rtx) = NULL; +- switch (d->vmode) +- { +- case E_V64QImode: +- if (TARGET_AVX512BW) +- gen = gen_avx512bw_vec_dupv64qi_1; +- break; +- case E_V32QImode: +- gen = gen_avx2_pbroadcastv32qi_1; +- break; +- case E_V32HImode: +- if (TARGET_AVX512BW) +- gen = gen_avx512bw_vec_dupv32hi_1; +- break; +- case E_V16HImode: +- gen = gen_avx2_pbroadcastv16hi_1; +- break; +- case E_V16SImode: +- if (TARGET_AVX512F) +- gen = gen_avx512f_vec_dupv16si_1; +- break; +- case E_V8SImode: +- gen = gen_avx2_pbroadcastv8si_1; +- break; +- case E_V16QImode: +- gen = gen_avx2_pbroadcastv16qi; +- break; +- case E_V8HImode: +- gen = gen_avx2_pbroadcastv8hi; +- break; +- case E_V16SFmode: +- if (TARGET_AVX512F) +- gen = gen_avx512f_vec_dupv16sf_1; +- break; +- case E_V8SFmode: +- gen = gen_avx2_vec_dupv8sf_1; +- break; +- case E_V8DFmode: +- if (TARGET_AVX512F) +- gen = gen_avx512f_vec_dupv8df_1; +- break; +- case E_V8DImode: +- if (TARGET_AVX512F) +- gen = gen_avx512f_vec_dupv8di_1; +- break; +- /* For other modes prefer other shuffles this function creates. */ +- default: break; +- } +- if (gen != NULL) +- { +- if (!d->testing_p) +- emit_insn (gen (d->target, d->op0)); +- return true; +- } +- } ++ if (ix86_tieable_integer_mode_p (mode1) ++ && ix86_tieable_integer_mode_p (mode2)) ++ return true; + +- if (expand_vselect (d->target, d->op0, nd.perm, nelt, d->testing_p)) +- return true; ++ /* MODE2 being XFmode implies fp stack or general regs, which means we ++ can tie any smaller floating point modes to it. Note that we do not ++ tie this with TFmode. */ ++ if (mode2 == XFmode) ++ return mode1 == SFmode || mode1 == DFmode; + +- /* There are plenty of patterns in sse.md that are written for +- SEL+CONCAT and are not replicated for a single op. Perhaps +- that should be changed, to avoid the nastiness here. */ ++ /* MODE2 being DFmode implies fp stack, general or sse regs, which means ++ that we can tie it with SFmode. */ ++ if (mode2 == DFmode) ++ return mode1 == SFmode; + +- /* Recognize interleave style patterns, which means incrementing +- every other permutation operand. */ +- for (i = 0; i < nelt; i += 2) +- { +- nd.perm[i] = d->perm[i] & mask; +- nd.perm[i + 1] = (d->perm[i + 1] & mask) + nelt; +- } +- if (expand_vselect_vconcat (d->target, d->op0, d->op0, nd.perm, nelt, +- d->testing_p)) +- return true; ++ /* If MODE2 is only appropriate for an SSE register, then tie with ++ any other mode acceptable to SSE registers. */ ++ if (GET_MODE_SIZE (mode2) == 64 ++ && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2)) ++ return (GET_MODE_SIZE (mode1) == 64 ++ && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1)); ++ if (GET_MODE_SIZE (mode2) == 32 ++ && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2)) ++ return (GET_MODE_SIZE (mode1) == 32 ++ && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1)); ++ if (GET_MODE_SIZE (mode2) == 16 ++ && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2)) ++ return (GET_MODE_SIZE (mode1) == 16 ++ && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1)); + +- /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */ +- if (nelt >= 4) +- { +- for (i = 0; i < nelt; i += 4) +- { +- nd.perm[i + 0] = d->perm[i + 0] & mask; +- nd.perm[i + 1] = d->perm[i + 1] & mask; +- nd.perm[i + 2] = (d->perm[i + 2] & mask) + nelt; +- nd.perm[i + 3] = (d->perm[i + 3] & mask) + nelt; +- } ++ /* If MODE2 is appropriate for an MMX register, then tie ++ with any other mode acceptable to MMX registers. */ ++ if (GET_MODE_SIZE (mode2) == 8 ++ && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2)) ++ return (GET_MODE_SIZE (mode1) == 8 ++ && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1)); + +- if (expand_vselect_vconcat (d->target, d->op0, d->op0, nd.perm, nelt, +- d->testing_p)) +- return true; +- } +- } ++ return false; ++} + +- /* Try movss/movsd instructions. */ +- if (expand_vec_perm_movs (d)) +- return true; ++/* Return the cost of moving between two registers of mode MODE. */ + +- /* Finally, try the fully general two operand permute. */ +- if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt, +- d->testing_p)) +- return true; ++static int ++ix86_set_reg_reg_cost (machine_mode mode) ++{ ++ unsigned int units = UNITS_PER_WORD; + +- /* Recognize interleave style patterns with reversed operands. */ +- if (!d->one_operand_p) ++ switch (GET_MODE_CLASS (mode)) + { +- for (i = 0; i < nelt; ++i) +- { +- unsigned e = d->perm[i]; +- if (e >= nelt) +- e -= nelt; +- else +- e += nelt; +- nd.perm[i] = e; +- } ++ default: ++ break; + +- if (expand_vselect_vconcat (d->target, d->op1, d->op0, nd.perm, nelt, +- d->testing_p)) +- return true; +- } ++ case MODE_CC: ++ units = GET_MODE_SIZE (CCmode); ++ break; + +- /* Try the SSE4.1 blend variable merge instructions. */ +- if (expand_vec_perm_blend (d)) +- return true; ++ case MODE_FLOAT: ++ if ((TARGET_SSE && mode == TFmode) ++ || (TARGET_80387 && mode == XFmode) ++ || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode) ++ || ((TARGET_80387 || TARGET_SSE) && mode == SFmode)) ++ units = GET_MODE_SIZE (mode); ++ break; + +- /* Try one of the AVX vpermil variable permutations. */ +- if (expand_vec_perm_vpermil (d)) +- return true; ++ case MODE_COMPLEX_FLOAT: ++ if ((TARGET_SSE && mode == TCmode) ++ || (TARGET_80387 && mode == XCmode) ++ || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode) ++ || ((TARGET_80387 || TARGET_SSE) && mode == SCmode)) ++ units = GET_MODE_SIZE (mode); ++ break; + +- /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128, +- vpshufb, vpermd, vpermps or vpermq variable permutation. */ +- if (expand_vec_perm_pshufb (d)) +- return true; ++ case MODE_VECTOR_INT: ++ case MODE_VECTOR_FLOAT: ++ if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode)) ++ || (TARGET_AVX && VALID_AVX256_REG_MODE (mode)) ++ || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode)) ++ || (TARGET_SSE && VALID_SSE_REG_MODE (mode)) ++ || (TARGET_MMX && VALID_MMX_REG_MODE (mode))) ++ units = GET_MODE_SIZE (mode); ++ } + +- /* Try the AVX2 vpalignr instruction. */ +- if (expand_vec_perm_palignr (d, true)) +- return true; ++ /* Return the cost of moving between two registers of mode MODE, ++ assuming that the move will be in pieces of at most UNITS bytes. */ ++ return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units)); ++} + +- /* Try the AVX512F vperm{s,d} instructions. */ +- if (ix86_expand_vec_one_operand_perm_avx512 (d)) +- return true; ++/* Return cost of vector operation in MODE given that scalar version has ++ COST. */ + +- /* Try the AVX512F vpermt2/vpermi2 instructions. */ +- if (ix86_expand_vec_perm_vpermt2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d)) +- return true; ++static int ++ix86_vec_cost (machine_mode mode, int cost) ++{ ++ if (!VECTOR_MODE_P (mode)) ++ return cost; + +- /* See if we can get the same permutation in different vector integer +- mode. */ +- if (canonicalize_vector_int_perm (d, &nd) && expand_vec_perm_1 (&nd)) +- { +- if (!d->testing_p) +- emit_move_insn (d->target, gen_lowpart (d->vmode, nd.target)); +- return true; +- } +- return false; ++ if (GET_MODE_BITSIZE (mode) == 128 ++ && TARGET_SSE_SPLIT_REGS) ++ return cost * 2; ++ if (GET_MODE_BITSIZE (mode) > 128 ++ && TARGET_AVX128_OPTIMAL) ++ return cost * GET_MODE_BITSIZE (mode) / 128; ++ return cost; + } + +-/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D +- in terms of a pair of pshuflw + pshufhw instructions. */ ++/* Return cost of multiplication in MODE. */ + +-static bool +-expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d) ++static int ++ix86_multiplication_cost (const struct processor_costs *cost, ++ enum machine_mode mode) + { +- unsigned char perm2[MAX_VECT_LEN]; +- unsigned i; +- bool ok; +- +- if (d->vmode != V8HImode || !d->one_operand_p) +- return false; ++ machine_mode inner_mode = mode; ++ if (VECTOR_MODE_P (mode)) ++ inner_mode = GET_MODE_INNER (mode); + +- /* The two permutations only operate in 64-bit lanes. */ +- for (i = 0; i < 4; ++i) +- if (d->perm[i] >= 4) +- return false; +- for (i = 4; i < 8; ++i) +- if (d->perm[i] < 4) +- return false; ++ if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) ++ return inner_mode == DFmode ? cost->mulsd : cost->mulss; ++ else if (X87_FLOAT_MODE_P (mode)) ++ return cost->fmul; ++ else if (FLOAT_MODE_P (mode)) ++ return ix86_vec_cost (mode, ++ inner_mode == DFmode ? cost->mulsd : cost->mulss); ++ else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) ++ { ++ /* vpmullq is used in this case. No emulation is needed. */ ++ if (TARGET_AVX512DQ) ++ return ix86_vec_cost (mode, cost->mulss); + +- if (d->testing_p) +- return true; ++ /* V*QImode is emulated with 7-13 insns. */ ++ if (mode == V16QImode || mode == V32QImode) ++ { ++ int extra = 11; ++ if (TARGET_XOP && mode == V16QImode) ++ extra = 5; ++ else if (TARGET_SSSE3) ++ extra = 6; ++ return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * extra); ++ } ++ /* V*DImode is emulated with 5-8 insns. */ ++ else if (mode == V2DImode || mode == V4DImode) ++ { ++ if (TARGET_XOP && mode == V2DImode) ++ return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 3); ++ else ++ return ix86_vec_cost (mode, cost->mulss * 3 + cost->sse_op * 5); ++ } ++ /* Without sse4.1, we don't have PMULLD; it's emulated with 7 ++ insns, including two PMULUDQ. */ ++ else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX)) ++ return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 5); ++ else ++ return ix86_vec_cost (mode, cost->mulss); ++ } ++ else ++ return (cost->mult_init[MODE_INDEX (mode)] + cost->mult_bit * 7); ++} + +- /* Emit the pshuflw. */ +- memcpy (perm2, d->perm, 4); +- for (i = 4; i < 8; ++i) +- perm2[i] = i; +- ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p); +- gcc_assert (ok); ++/* Return cost of multiplication in MODE. */ + +- /* Emit the pshufhw. */ +- memcpy (perm2 + 4, d->perm + 4, 4); +- for (i = 0; i < 4; ++i) +- perm2[i] = i; +- ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p); +- gcc_assert (ok); ++static int ++ix86_division_cost (const struct processor_costs *cost, ++ enum machine_mode mode) ++{ ++ machine_mode inner_mode = mode; ++ if (VECTOR_MODE_P (mode)) ++ inner_mode = GET_MODE_INNER (mode); + +- return true; ++ if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) ++ return inner_mode == DFmode ? cost->divsd : cost->divss; ++ else if (X87_FLOAT_MODE_P (mode)) ++ return cost->fdiv; ++ else if (FLOAT_MODE_P (mode)) ++ return ix86_vec_cost (mode, ++ inner_mode == DFmode ? cost->divsd : cost->divss); ++ else ++ return cost->divide[MODE_INDEX (mode)]; + } + +-/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify +- the permutation using the SSSE3 palignr instruction. This succeeds +- when all of the elements in PERM fit within one vector and we merely +- need to shift them down so that a single vector permutation has a +- chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only +- the vpalignr instruction itself can perform the requested permutation. */ +- +-static bool +-expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p) +-{ +- unsigned i, nelt = d->nelt; +- unsigned min, max, minswap, maxswap; +- bool in_order, ok, swap = false; +- rtx shift, target; +- struct expand_vec_perm_d dcopy; +- +- /* Even with AVX, palignr only operates on 128-bit vectors, +- in AVX2 palignr operates on both 128-bit lanes. */ +- if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16) +- && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32)) +- return false; +- +- min = 2 * nelt; +- max = 0; +- minswap = 2 * nelt; +- maxswap = 0; +- for (i = 0; i < nelt; ++i) +- { +- unsigned e = d->perm[i]; +- unsigned eswap = d->perm[i] ^ nelt; +- if (GET_MODE_SIZE (d->vmode) == 32) +- { +- e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1); +- eswap = e ^ (nelt / 2); +- } +- if (e < min) +- min = e; +- if (e > max) +- max = e; +- if (eswap < minswap) +- minswap = eswap; +- if (eswap > maxswap) +- maxswap = eswap; +- } +- if (min == 0 +- || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt)) +- { +- if (d->one_operand_p +- || minswap == 0 +- || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32 +- ? nelt / 2 : nelt)) +- return false; +- swap = true; +- min = minswap; +- max = maxswap; +- } ++#define COSTS_N_BYTES(N) ((N) * 2) + +- /* Given that we have SSSE3, we know we'll be able to implement the +- single operand permutation after the palignr with pshufb for +- 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed +- first. */ +- if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p) +- return true; ++/* Return cost of shift in MODE. ++ If CONSTANT_OP1 is true, the op1 value is known and set in OP1_VAL. ++ AND_IN_OP1 specify in op1 is result of and and SHIFT_AND_TRUNCATE ++ if op1 is a result of subreg. + +- dcopy = *d; +- if (swap) +- { +- dcopy.op0 = d->op1; +- dcopy.op1 = d->op0; +- for (i = 0; i < nelt; ++i) +- dcopy.perm[i] ^= nelt; +- } ++ SKIP_OP0/1 is set to true if cost of OP0/1 should be ignored. */ + +- in_order = true; +- for (i = 0; i < nelt; ++i) ++static int ++ix86_shift_rotate_cost (const struct processor_costs *cost, ++ enum machine_mode mode, bool constant_op1, ++ HOST_WIDE_INT op1_val, ++ bool speed, ++ bool and_in_op1, ++ bool shift_and_truncate, ++ bool *skip_op0, bool *skip_op1) ++{ ++ if (skip_op0) ++ *skip_op0 = *skip_op1 = false; ++ if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) + { +- unsigned e = dcopy.perm[i]; +- if (GET_MODE_SIZE (d->vmode) == 32 +- && e >= nelt +- && (e & (nelt / 2 - 1)) < min) +- e = e - min - (nelt / 2); ++ /* V*QImode is emulated with 1-11 insns. */ ++ if (mode == V16QImode || mode == V32QImode) ++ { ++ int count = 11; ++ if (TARGET_XOP && mode == V16QImode) ++ { ++ /* For XOP we use vpshab, which requires a broadcast of the ++ value to the variable shift insn. For constants this ++ means a V16Q const in mem; even when we can perform the ++ shift with one insn set the cost to prefer paddb. */ ++ if (constant_op1) ++ { ++ if (skip_op1) ++ *skip_op1 = true; ++ return ix86_vec_cost (mode, ++ cost->sse_op ++ + (speed ++ ? 2 ++ : COSTS_N_BYTES ++ (GET_MODE_UNIT_SIZE (mode)))); ++ } ++ count = 3; ++ } ++ else if (TARGET_SSSE3) ++ count = 7; ++ return ix86_vec_cost (mode, cost->sse_op * count); ++ } + else +- e = e - min; +- if (e != i) +- in_order = false; +- dcopy.perm[i] = e; +- } +- dcopy.one_operand_p = true; +- +- if (single_insn_only_p && !in_order) +- return false; +- +- /* For AVX2, test whether we can permute the result in one instruction. */ +- if (d->testing_p) +- { +- if (in_order) +- return true; +- dcopy.op1 = dcopy.op0; +- return expand_vec_perm_1 (&dcopy); ++ return ix86_vec_cost (mode, cost->sse_op); + } +- +- shift = GEN_INT (min * GET_MODE_UNIT_BITSIZE (d->vmode)); +- if (GET_MODE_SIZE (d->vmode) == 16) ++ if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) + { +- target = gen_reg_rtx (TImode); +- emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1), +- gen_lowpart (TImode, dcopy.op0), shift)); ++ if (constant_op1) ++ { ++ if (op1_val > 32) ++ return cost->shift_const + COSTS_N_INSNS (2); ++ else ++ return cost->shift_const * 2; ++ } ++ else ++ { ++ if (and_in_op1) ++ return cost->shift_var * 2; ++ else ++ return cost->shift_var * 6 + COSTS_N_INSNS (2); ++ } + } + else + { +- target = gen_reg_rtx (V2TImode); +- emit_insn (gen_avx2_palignrv2ti (target, +- gen_lowpart (V2TImode, dcopy.op1), +- gen_lowpart (V2TImode, dcopy.op0), +- shift)); +- } +- +- dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target); +- +- /* Test for the degenerate case where the alignment by itself +- produces the desired permutation. */ +- if (in_order) +- { +- emit_move_insn (d->target, dcopy.op0); +- return true; ++ if (constant_op1) ++ return cost->shift_const; ++ else if (shift_and_truncate) ++ { ++ if (skip_op0) ++ *skip_op0 = *skip_op1 = true; ++ /* Return the cost after shift-and truncation. */ ++ return cost->shift_var; ++ } ++ else ++ return cost->shift_var; + } +- +- ok = expand_vec_perm_1 (&dcopy); +- gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32); +- +- return ok; ++ return cost->shift_const; + } + +-/* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify +- the permutation using the SSE4_1 pblendv instruction. Potentially +- reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */ ++/* Compute a (partial) cost for rtx X. Return true if the complete ++ cost has been computed, and false if subexpressions should be ++ scanned. In either case, *TOTAL contains the cost result. */ + + static bool +-expand_vec_perm_pblendv (struct expand_vec_perm_d *d) ++ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, ++ int *total, bool speed) + { +- unsigned i, which, nelt = d->nelt; +- struct expand_vec_perm_d dcopy, dcopy1; +- machine_mode vmode = d->vmode; +- bool ok; +- +- /* Use the same checks as in expand_vec_perm_blend. */ +- if (d->one_operand_p) +- return false; +- if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32) +- ; +- else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode)) +- ; +- else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16) +- ; +- else +- return false; +- +- /* Figure out where permutation elements stay not in their +- respective lanes. */ +- for (i = 0, which = 0; i < nelt; ++i) +- { +- unsigned e = d->perm[i]; +- if (e != i) +- which |= (e < nelt ? 1 : 2); +- } +- /* We can pblend the part where elements stay not in their +- respective lanes only when these elements are all in one +- half of a permutation. +- {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective +- lanes, but both 8 and 9 >= 8 +- {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their +- respective lanes and 8 >= 8, but 2 not. */ +- if (which != 1 && which != 2) +- return false; +- if (d->testing_p && GET_MODE_SIZE (vmode) == 16) +- return true; +- +- /* First we apply one operand permutation to the part where +- elements stay not in their respective lanes. */ +- dcopy = *d; +- if (which == 2) +- dcopy.op0 = dcopy.op1 = d->op1; +- else +- dcopy.op0 = dcopy.op1 = d->op0; +- if (!d->testing_p) +- dcopy.target = gen_reg_rtx (vmode); +- dcopy.one_operand_p = true; +- +- for (i = 0; i < nelt; ++i) +- dcopy.perm[i] = d->perm[i] & (nelt - 1); +- +- ok = expand_vec_perm_1 (&dcopy); +- if (GET_MODE_SIZE (vmode) != 16 && !ok) +- return false; +- else +- gcc_assert (ok); +- if (d->testing_p) +- return true; +- +- /* Next we put permuted elements into their positions. */ +- dcopy1 = *d; +- if (which == 2) +- dcopy1.op1 = dcopy.target; +- else +- dcopy1.op0 = dcopy.target; +- +- for (i = 0; i < nelt; ++i) +- dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i); ++ rtx mask; ++ enum rtx_code code = GET_CODE (x); ++ enum rtx_code outer_code = (enum rtx_code) outer_code_i; ++ const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost; ++ int src_cost; + +- ok = expand_vec_perm_blend (&dcopy1); +- gcc_assert (ok); ++ switch (code) ++ { ++ case SET: ++ if (register_operand (SET_DEST (x), VOIDmode) ++ && register_operand (SET_SRC (x), VOIDmode)) ++ { ++ *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x))); ++ return true; ++ } + +- return true; +-} ++ if (register_operand (SET_SRC (x), VOIDmode)) ++ /* Avoid potentially incorrect high cost from rtx_costs ++ for non-tieable SUBREGs. */ ++ src_cost = 0; ++ else ++ { ++ src_cost = rtx_cost (SET_SRC (x), mode, SET, 1, speed); + +-static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d); ++ if (CONSTANT_P (SET_SRC (x))) ++ /* Constant costs assume a base value of COSTS_N_INSNS (1) and add ++ a small value, possibly zero for cheap constants. */ ++ src_cost += COSTS_N_INSNS (1); ++ } + +-/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify +- a two vector permutation into a single vector permutation by using +- an interleave operation to merge the vectors. */ ++ *total = src_cost + rtx_cost (SET_DEST (x), mode, SET, 0, speed); ++ return true; + +-static bool +-expand_vec_perm_interleave2 (struct expand_vec_perm_d *d) +-{ +- struct expand_vec_perm_d dremap, dfinal; +- unsigned i, nelt = d->nelt, nelt2 = nelt / 2; +- unsigned HOST_WIDE_INT contents; +- unsigned char remap[2 * MAX_VECT_LEN]; +- rtx_insn *seq; +- bool ok, same_halves = false; ++ case CONST_INT: ++ case CONST: ++ case LABEL_REF: ++ case SYMBOL_REF: ++ if (x86_64_immediate_operand (x, VOIDmode)) ++ *total = 0; ++ else ++ *total = 1; ++ return true; + +- if (GET_MODE_SIZE (d->vmode) == 16) +- { +- if (d->one_operand_p) +- return false; +- } +- else if (GET_MODE_SIZE (d->vmode) == 32) +- { +- if (!TARGET_AVX) +- return false; +- /* For 32-byte modes allow even d->one_operand_p. +- The lack of cross-lane shuffling in some instructions +- might prevent a single insn shuffle. */ +- dfinal = *d; +- dfinal.testing_p = true; +- /* If expand_vec_perm_interleave3 can expand this into +- a 3 insn sequence, give up and let it be expanded as +- 3 insn sequence. While that is one insn longer, +- it doesn't need a memory operand and in the common +- case that both interleave low and high permutations +- with the same operands are adjacent needs 4 insns +- for both after CSE. */ +- if (expand_vec_perm_interleave3 (&dfinal)) +- return false; +- } +- else +- return false; ++ case CONST_DOUBLE: ++ if (IS_STACK_MODE (mode)) ++ switch (standard_80387_constant_p (x)) ++ { ++ case -1: ++ case 0: ++ break; ++ case 1: /* 0.0 */ ++ *total = 1; ++ return true; ++ default: /* Other constants */ ++ *total = 2; ++ return true; ++ } ++ /* FALLTHRU */ + +- /* Examine from whence the elements come. */ +- contents = 0; +- for (i = 0; i < nelt; ++i) +- contents |= HOST_WIDE_INT_1U << d->perm[i]; ++ case CONST_VECTOR: ++ switch (standard_sse_constant_p (x, mode)) ++ { ++ case 0: ++ break; ++ case 1: /* 0: xor eliminates false dependency */ ++ *total = 0; ++ return true; ++ default: /* -1: cmp contains false dependency */ ++ *total = 1; ++ return true; ++ } ++ /* FALLTHRU */ + +- memset (remap, 0xff, sizeof (remap)); +- dremap = *d; ++ case CONST_WIDE_INT: ++ /* Fall back to (MEM (SYMBOL_REF)), since that's where ++ it'll probably end up. Add a penalty for size. */ ++ *total = (COSTS_N_INSNS (1) ++ + (!TARGET_64BIT && flag_pic) ++ + (GET_MODE_SIZE (mode) <= 4 ++ ? 0 : GET_MODE_SIZE (mode) <= 8 ? 1 : 2)); ++ return true; + +- if (GET_MODE_SIZE (d->vmode) == 16) +- { +- unsigned HOST_WIDE_INT h1, h2, h3, h4; ++ case ZERO_EXTEND: ++ /* The zero extensions is often completely free on x86_64, so make ++ it as cheap as possible. */ ++ if (TARGET_64BIT && mode == DImode ++ && GET_MODE (XEXP (x, 0)) == SImode) ++ *total = 1; ++ else if (TARGET_ZERO_EXTEND_WITH_AND) ++ *total = cost->add; ++ else ++ *total = cost->movzx; ++ return false; + +- /* Split the two input vectors into 4 halves. */ +- h1 = (HOST_WIDE_INT_1U << nelt2) - 1; +- h2 = h1 << nelt2; +- h3 = h2 << nelt2; +- h4 = h3 << nelt2; ++ case SIGN_EXTEND: ++ *total = cost->movsx; ++ return false; + +- /* If the elements from the low halves use interleave low, and similarly +- for interleave high. If the elements are from mis-matched halves, we +- can use shufps for V4SF/V4SI or do a DImode shuffle. */ +- if ((contents & (h1 | h3)) == contents) +- { +- /* punpckl* */ +- for (i = 0; i < nelt2; ++i) +- { +- remap[i] = i * 2; +- remap[i + nelt] = i * 2 + 1; +- dremap.perm[i * 2] = i; +- dremap.perm[i * 2 + 1] = i + nelt; +- } +- if (!TARGET_SSE2 && d->vmode == V4SImode) +- dremap.vmode = V4SFmode; +- } +- else if ((contents & (h2 | h4)) == contents) +- { +- /* punpckh* */ +- for (i = 0; i < nelt2; ++i) +- { +- remap[i + nelt2] = i * 2; +- remap[i + nelt + nelt2] = i * 2 + 1; +- dremap.perm[i * 2] = i + nelt2; +- dremap.perm[i * 2 + 1] = i + nelt + nelt2; +- } +- if (!TARGET_SSE2 && d->vmode == V4SImode) +- dremap.vmode = V4SFmode; +- } +- else if ((contents & (h1 | h4)) == contents) ++ case ASHIFT: ++ if (SCALAR_INT_MODE_P (mode) ++ && GET_MODE_SIZE (mode) < UNITS_PER_WORD ++ && CONST_INT_P (XEXP (x, 1))) + { +- /* shufps */ +- for (i = 0; i < nelt2; ++i) ++ HOST_WIDE_INT value = INTVAL (XEXP (x, 1)); ++ if (value == 1) + { +- remap[i] = i; +- remap[i + nelt + nelt2] = i + nelt2; +- dremap.perm[i] = i; +- dremap.perm[i + nelt2] = i + nelt + nelt2; ++ *total = cost->add; ++ return false; + } +- if (nelt != 4) ++ if ((value == 2 || value == 3) ++ && cost->lea <= cost->shift_const) + { +- /* shufpd */ +- dremap.vmode = V2DImode; +- dremap.nelt = 2; +- dremap.perm[0] = 0; +- dremap.perm[1] = 3; ++ *total = cost->lea; ++ return false; + } + } +- else if ((contents & (h2 | h3)) == contents) ++ /* FALLTHRU */ ++ ++ case ROTATE: ++ case ASHIFTRT: ++ case LSHIFTRT: ++ case ROTATERT: ++ bool skip_op0, skip_op1; ++ *total = ix86_shift_rotate_cost (cost, mode, CONSTANT_P (XEXP (x, 1)), ++ CONST_INT_P (XEXP (x, 1)) ++ ? INTVAL (XEXP (x, 1)) : -1, ++ speed, ++ GET_CODE (XEXP (x, 1)) == AND, ++ SUBREG_P (XEXP (x, 1)) ++ && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND, ++ &skip_op0, &skip_op1); ++ if (skip_op0 || skip_op1) + { +- /* shufps */ +- for (i = 0; i < nelt2; ++i) +- { +- remap[i + nelt2] = i; +- remap[i + nelt] = i + nelt2; +- dremap.perm[i] = i + nelt2; +- dremap.perm[i + nelt2] = i + nelt; +- } +- if (nelt != 4) +- { +- /* shufpd */ +- dremap.vmode = V2DImode; +- dremap.nelt = 2; +- dremap.perm[0] = 1; +- dremap.perm[1] = 2; +- } ++ if (!skip_op0) ++ *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed); ++ if (!skip_op1) ++ *total += rtx_cost (XEXP (x, 1), mode, code, 0, speed); ++ return true; + } +- else +- return false; +- } +- else +- { +- unsigned int nelt4 = nelt / 4, nzcnt = 0; +- unsigned HOST_WIDE_INT q[8]; +- unsigned int nonzero_halves[4]; ++ return false; + +- /* Split the two input vectors into 8 quarters. */ +- q[0] = (HOST_WIDE_INT_1U << nelt4) - 1; +- for (i = 1; i < 8; ++i) +- q[i] = q[0] << (nelt4 * i); +- for (i = 0; i < 4; ++i) +- if (((q[2 * i] | q[2 * i + 1]) & contents) != 0) +- { +- nonzero_halves[nzcnt] = i; +- ++nzcnt; +- } ++ case FMA: ++ { ++ rtx sub; + +- if (nzcnt == 1) +- { +- gcc_assert (d->one_operand_p); +- nonzero_halves[1] = nonzero_halves[0]; +- same_halves = true; +- } +- else if (d->one_operand_p) +- { +- gcc_assert (nonzero_halves[0] == 0); +- gcc_assert (nonzero_halves[1] == 1); +- } ++ gcc_assert (FLOAT_MODE_P (mode)); ++ gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F); ++ ++ *total = ix86_vec_cost (mode, ++ GET_MODE_INNER (mode) == SFmode ++ ? cost->fmass : cost->fmasd); ++ *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed); ++ ++ /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */ ++ sub = XEXP (x, 0); ++ if (GET_CODE (sub) == NEG) ++ sub = XEXP (sub, 0); ++ *total += rtx_cost (sub, mode, FMA, 0, speed); ++ ++ sub = XEXP (x, 2); ++ if (GET_CODE (sub) == NEG) ++ sub = XEXP (sub, 0); ++ *total += rtx_cost (sub, mode, FMA, 2, speed); ++ return true; ++ } + +- if (nzcnt <= 2) ++ case MULT: ++ if (!FLOAT_MODE_P (mode) && !VECTOR_MODE_P (mode)) + { +- if (d->perm[0] / nelt2 == nonzero_halves[1]) ++ rtx op0 = XEXP (x, 0); ++ rtx op1 = XEXP (x, 1); ++ int nbits; ++ if (CONST_INT_P (XEXP (x, 1))) + { +- /* Attempt to increase the likelihood that dfinal +- shuffle will be intra-lane. */ +- std::swap (nonzero_halves[0], nonzero_halves[1]); ++ unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1)); ++ for (nbits = 0; value != 0; value &= value - 1) ++ nbits++; + } ++ else ++ /* This is arbitrary. */ ++ nbits = 7; + +- /* vperm2f128 or vperm2i128. */ +- for (i = 0; i < nelt2; ++i) ++ /* Compute costs correctly for widening multiplication. */ ++ if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND) ++ && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2 ++ == GET_MODE_SIZE (mode)) + { +- remap[i + nonzero_halves[1] * nelt2] = i + nelt2; +- remap[i + nonzero_halves[0] * nelt2] = i; +- dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2; +- dremap.perm[i] = i + nonzero_halves[0] * nelt2; ++ int is_mulwiden = 0; ++ machine_mode inner_mode = GET_MODE (op0); ++ ++ if (GET_CODE (op0) == GET_CODE (op1)) ++ is_mulwiden = 1, op1 = XEXP (op1, 0); ++ else if (CONST_INT_P (op1)) ++ { ++ if (GET_CODE (op0) == SIGN_EXTEND) ++ is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode) ++ == INTVAL (op1); ++ else ++ is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode)); ++ } ++ ++ if (is_mulwiden) ++ op0 = XEXP (op0, 0), mode = GET_MODE (op0); + } + +- if (d->vmode != V8SFmode +- && d->vmode != V4DFmode +- && d->vmode != V8SImode) ++ *total = (cost->mult_init[MODE_INDEX (mode)] ++ + nbits * cost->mult_bit ++ + rtx_cost (op0, mode, outer_code, opno, speed) ++ + rtx_cost (op1, mode, outer_code, opno, speed)); ++ ++ return true; ++ } ++ *total = ix86_multiplication_cost (cost, mode); ++ return false; ++ ++ case DIV: ++ case UDIV: ++ case MOD: ++ case UMOD: ++ *total = ix86_division_cost (cost, mode); ++ return false; ++ ++ case PLUS: ++ if (GET_MODE_CLASS (mode) == MODE_INT ++ && GET_MODE_SIZE (mode) <= UNITS_PER_WORD) ++ { ++ if (GET_CODE (XEXP (x, 0)) == PLUS ++ && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT ++ && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1)) ++ && CONSTANT_P (XEXP (x, 1))) + { +- dremap.vmode = V8SImode; +- dremap.nelt = 8; +- for (i = 0; i < 4; ++i) ++ HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)); ++ if (val == 2 || val == 4 || val == 8) + { +- dremap.perm[i] = i + nonzero_halves[0] * 4; +- dremap.perm[i + 4] = i + nonzero_halves[1] * 4; ++ *total = cost->lea; ++ *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode, ++ outer_code, opno, speed); ++ *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode, ++ outer_code, opno, speed); ++ *total += rtx_cost (XEXP (x, 1), mode, ++ outer_code, opno, speed); ++ return true; + } + } +- } +- else if (d->one_operand_p) +- return false; +- else if (TARGET_AVX2 +- && (contents & (q[0] | q[2] | q[4] | q[6])) == contents) +- { +- /* vpunpckl* */ +- for (i = 0; i < nelt4; ++i) ++ else if (GET_CODE (XEXP (x, 0)) == MULT ++ && CONST_INT_P (XEXP (XEXP (x, 0), 1))) + { +- remap[i] = i * 2; +- remap[i + nelt] = i * 2 + 1; +- remap[i + nelt2] = i * 2 + nelt2; +- remap[i + nelt + nelt2] = i * 2 + nelt2 + 1; +- dremap.perm[i * 2] = i; +- dremap.perm[i * 2 + 1] = i + nelt; +- dremap.perm[i * 2 + nelt2] = i + nelt2; +- dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2; ++ HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1)); ++ if (val == 2 || val == 4 || val == 8) ++ { ++ *total = cost->lea; ++ *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ++ outer_code, opno, speed); ++ *total += rtx_cost (XEXP (x, 1), mode, ++ outer_code, opno, speed); ++ return true; ++ } + } +- } +- else if (TARGET_AVX2 +- && (contents & (q[1] | q[3] | q[5] | q[7])) == contents) +- { +- /* vpunpckh* */ +- for (i = 0; i < nelt4; ++i) ++ else if (GET_CODE (XEXP (x, 0)) == PLUS) + { +- remap[i + nelt4] = i * 2; +- remap[i + nelt + nelt4] = i * 2 + 1; +- remap[i + nelt2 + nelt4] = i * 2 + nelt2; +- remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1; +- dremap.perm[i * 2] = i + nelt4; +- dremap.perm[i * 2 + 1] = i + nelt + nelt4; +- dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4; +- dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4; ++ /* Add with carry, ignore the cost of adding a carry flag. */ ++ if (ix86_carry_flag_operator (XEXP (XEXP (x, 0), 0), mode)) ++ *total = cost->add; ++ else ++ { ++ *total = cost->lea; ++ *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ++ outer_code, opno, speed); ++ } ++ ++ *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode, ++ outer_code, opno, speed); ++ *total += rtx_cost (XEXP (x, 1), mode, ++ outer_code, opno, speed); ++ return true; + } + } +- else +- return false; +- } ++ /* FALLTHRU */ + +- /* Use the remapping array set up above to move the elements from their +- swizzled locations into their final destinations. */ +- dfinal = *d; +- for (i = 0; i < nelt; ++i) +- { +- unsigned e = remap[d->perm[i]]; +- gcc_assert (e < nelt); +- /* If same_halves is true, both halves of the remapped vector are the +- same. Avoid cross-lane accesses if possible. */ +- if (same_halves && i >= nelt2) ++ case MINUS: ++ /* Subtract with borrow, ignore the cost of subtracting a carry flag. */ ++ if (GET_MODE_CLASS (mode) == MODE_INT ++ && GET_MODE_SIZE (mode) <= UNITS_PER_WORD ++ && GET_CODE (XEXP (x, 0)) == MINUS ++ && ix86_carry_flag_operator (XEXP (XEXP (x, 0), 1), mode)) + { +- gcc_assert (e < nelt2); +- dfinal.perm[i] = e + nelt2; ++ *total = cost->add; ++ *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ++ outer_code, opno, speed); ++ *total += rtx_cost (XEXP (x, 1), mode, ++ outer_code, opno, speed); ++ return true; + } +- else +- dfinal.perm[i] = e; +- } +- if (!d->testing_p) +- { +- dremap.target = gen_reg_rtx (dremap.vmode); +- dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target); +- } +- dfinal.op1 = dfinal.op0; +- dfinal.one_operand_p = true; + +- /* Test if the final remap can be done with a single insn. For V4SFmode or +- V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */ +- start_sequence (); +- ok = expand_vec_perm_1 (&dfinal); +- seq = get_insns (); +- end_sequence (); +- +- if (!ok) +- return false; +- +- if (d->testing_p) +- return true; +- +- if (dremap.vmode != dfinal.vmode) +- { +- dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0); +- dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1); +- } +- +- ok = expand_vec_perm_1 (&dremap); +- gcc_assert (ok); +- +- emit_insn (seq); +- return true; +-} +- +-/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify +- a single vector cross-lane permutation into vpermq followed +- by any of the single insn permutations. */ +- +-static bool +-expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d) +-{ +- struct expand_vec_perm_d dremap, dfinal; +- unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4; +- unsigned contents[2]; +- bool ok; +- +- if (!(TARGET_AVX2 +- && (d->vmode == V32QImode || d->vmode == V16HImode) +- && d->one_operand_p)) +- return false; +- +- contents[0] = 0; +- contents[1] = 0; +- for (i = 0; i < nelt2; ++i) +- { +- contents[0] |= 1u << (d->perm[i] / nelt4); +- contents[1] |= 1u << (d->perm[i + nelt2] / nelt4); +- } +- +- for (i = 0; i < 2; ++i) +- { +- unsigned int cnt = 0; +- for (j = 0; j < 4; ++j) +- if ((contents[i] & (1u << j)) != 0 && ++cnt > 2) ++ if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) ++ { ++ *total = cost->addss; + return false; +- } +- +- if (d->testing_p) +- return true; +- +- dremap = *d; +- dremap.vmode = V4DImode; +- dremap.nelt = 4; +- dremap.target = gen_reg_rtx (V4DImode); +- dremap.op0 = gen_lowpart (V4DImode, d->op0); +- dremap.op1 = dremap.op0; +- dremap.one_operand_p = true; +- for (i = 0; i < 2; ++i) +- { +- unsigned int cnt = 0; +- for (j = 0; j < 4; ++j) +- if ((contents[i] & (1u << j)) != 0) +- dremap.perm[2 * i + cnt++] = j; +- for (; cnt < 2; ++cnt) +- dremap.perm[2 * i + cnt] = 0; +- } +- +- dfinal = *d; +- dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target); +- dfinal.op1 = dfinal.op0; +- dfinal.one_operand_p = true; +- for (i = 0, j = 0; i < nelt; ++i) +- { +- if (i == nelt2) +- j = 2; +- dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0); +- if ((d->perm[i] / nelt4) == dremap.perm[j]) +- ; +- else if ((d->perm[i] / nelt4) == dremap.perm[j + 1]) +- dfinal.perm[i] |= nelt4; +- else +- gcc_unreachable (); +- } +- +- ok = expand_vec_perm_1 (&dremap); +- gcc_assert (ok); +- +- ok = expand_vec_perm_1 (&dfinal); +- gcc_assert (ok); +- +- return true; +-} +- +-/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand +- a vector permutation using two instructions, vperm2f128 resp. +- vperm2i128 followed by any single in-lane permutation. */ +- +-static bool +-expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d) +-{ +- struct expand_vec_perm_d dfirst, dsecond; +- unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm; +- bool ok; ++ } ++ else if (X87_FLOAT_MODE_P (mode)) ++ { ++ *total = cost->fadd; ++ return false; ++ } ++ else if (FLOAT_MODE_P (mode)) ++ { ++ *total = ix86_vec_cost (mode, cost->addss); ++ return false; ++ } ++ /* FALLTHRU */ + +- if (!TARGET_AVX +- || GET_MODE_SIZE (d->vmode) != 32 +- || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2)) +- return false; ++ case AND: ++ case IOR: ++ case XOR: ++ if (GET_MODE_CLASS (mode) == MODE_INT ++ && GET_MODE_SIZE (mode) > UNITS_PER_WORD) ++ { ++ *total = (cost->add * 2 ++ + (rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed) ++ << (GET_MODE (XEXP (x, 0)) != DImode)) ++ + (rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed) ++ << (GET_MODE (XEXP (x, 1)) != DImode))); ++ return true; ++ } ++ /* FALLTHRU */ + +- dsecond = *d; +- dsecond.one_operand_p = false; +- dsecond.testing_p = true; +- +- /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128 +- immediate. For perm < 16 the second permutation uses +- d->op0 as first operand, for perm >= 16 it uses d->op1 +- as first operand. The second operand is the result of +- vperm2[fi]128. */ +- for (perm = 0; perm < 32; perm++) +- { +- /* Ignore permutations which do not move anything cross-lane. */ +- if (perm < 16) +- { +- /* The second shuffle for e.g. V4DFmode has +- 0123 and ABCD operands. +- Ignore AB23, as 23 is already in the second lane +- of the first operand. */ +- if ((perm & 0xc) == (1 << 2)) continue; +- /* And 01CD, as 01 is in the first lane of the first +- operand. */ +- if ((perm & 3) == 0) continue; +- /* And 4567, as then the vperm2[fi]128 doesn't change +- anything on the original 4567 second operand. */ +- if ((perm & 0xf) == ((3 << 2) | 2)) continue; ++ case NEG: ++ if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) ++ { ++ *total = cost->sse_op; ++ return false; + } +- else ++ else if (X87_FLOAT_MODE_P (mode)) + { +- /* The second shuffle for e.g. V4DFmode has +- 4567 and ABCD operands. +- Ignore AB67, as 67 is already in the second lane +- of the first operand. */ +- if ((perm & 0xc) == (3 << 2)) continue; +- /* And 45CD, as 45 is in the first lane of the first +- operand. */ +- if ((perm & 3) == 2) continue; +- /* And 0123, as then the vperm2[fi]128 doesn't change +- anything on the original 0123 first operand. */ +- if ((perm & 0xf) == (1 << 2)) continue; +- } +- +- for (i = 0; i < nelt; i++) +- { +- j = d->perm[i] / nelt2; +- if (j == ((perm >> (2 * (i >= nelt2))) & 3)) +- dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1)); +- else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16)) +- dsecond.perm[i] = d->perm[i] & (nelt - 1); +- else +- break; ++ *total = cost->fchs; ++ return false; + } +- +- if (i == nelt) ++ else if (FLOAT_MODE_P (mode)) + { +- start_sequence (); +- ok = expand_vec_perm_1 (&dsecond); +- end_sequence (); ++ *total = ix86_vec_cost (mode, cost->sse_op); ++ return false; + } ++ /* FALLTHRU */ ++ ++ case NOT: ++ if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) ++ *total = ix86_vec_cost (mode, cost->sse_op); ++ else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) ++ *total = cost->add * 2; + else +- ok = false; ++ *total = cost->add; ++ return false; + +- if (ok) ++ case COMPARE: ++ if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT ++ && XEXP (XEXP (x, 0), 1) == const1_rtx ++ && CONST_INT_P (XEXP (XEXP (x, 0), 2)) ++ && XEXP (x, 1) == const0_rtx) + { +- if (d->testing_p) +- return true; +- +- /* Found a usable second shuffle. dfirst will be +- vperm2f128 on d->op0 and d->op1. */ +- dsecond.testing_p = false; +- dfirst = *d; +- dfirst.target = gen_reg_rtx (d->vmode); +- for (i = 0; i < nelt; i++) +- dfirst.perm[i] = (i & (nelt2 - 1)) +- + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2; +- +- canonicalize_perm (&dfirst); +- ok = expand_vec_perm_1 (&dfirst); +- gcc_assert (ok); +- +- /* And dsecond is some single insn shuffle, taking +- d->op0 and result of vperm2f128 (if perm < 16) or +- d->op1 and result of vperm2f128 (otherwise). */ +- if (perm >= 16) +- dsecond.op0 = dsecond.op1; +- dsecond.op1 = dfirst.target; +- +- ok = expand_vec_perm_1 (&dsecond); +- gcc_assert (ok); +- ++ /* This kind of construct is implemented using test[bwl]. ++ Treat it as if we had an AND. */ ++ mode = GET_MODE (XEXP (XEXP (x, 0), 0)); ++ *total = (cost->add ++ + rtx_cost (XEXP (XEXP (x, 0), 0), mode, outer_code, ++ opno, speed) ++ + rtx_cost (const1_rtx, mode, outer_code, opno, speed)); + return true; + } + +- /* For one operand, the only useful vperm2f128 permutation is 0x01 +- aka lanes swap. */ +- if (d->one_operand_p) +- return false; +- } ++ /* The embedded comparison operand is completely free. */ ++ if (!general_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0))) ++ && XEXP (x, 1) == const0_rtx) ++ *total = 0; + +- return false; +-} ++ return false; + +-/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify +- a two vector permutation using 2 intra-lane interleave insns +- and cross-lane shuffle for 32-byte vectors. */ ++ case FLOAT_EXTEND: ++ if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)) ++ *total = 0; ++ else ++ *total = ix86_vec_cost (mode, cost->addss); ++ return false; + +-static bool +-expand_vec_perm_interleave3 (struct expand_vec_perm_d *d) +-{ +- unsigned i, nelt; +- rtx (*gen) (rtx, rtx, rtx); ++ case FLOAT_TRUNCATE: ++ if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)) ++ *total = cost->fadd; ++ else ++ *total = ix86_vec_cost (mode, cost->addss); ++ return false; + +- if (d->one_operand_p) +- return false; +- if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32) +- ; +- else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode)) +- ; +- else +- return false; ++ case ABS: ++ /* SSE requires memory load for the constant operand. It may make ++ sense to account for this. Of course the constant operand may or ++ may not be reused. */ ++ if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) ++ *total = cost->sse_op; ++ else if (X87_FLOAT_MODE_P (mode)) ++ *total = cost->fabs; ++ else if (FLOAT_MODE_P (mode)) ++ *total = ix86_vec_cost (mode, cost->sse_op); ++ return false; + +- nelt = d->nelt; +- if (d->perm[0] != 0 && d->perm[0] != nelt / 2) +- return false; +- for (i = 0; i < nelt; i += 2) +- if (d->perm[i] != d->perm[0] + i / 2 +- || d->perm[i + 1] != d->perm[0] + i / 2 + nelt) ++ case SQRT: ++ if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) ++ *total = mode == SFmode ? cost->sqrtss : cost->sqrtsd; ++ else if (X87_FLOAT_MODE_P (mode)) ++ *total = cost->fsqrt; ++ else if (FLOAT_MODE_P (mode)) ++ *total = ix86_vec_cost (mode, ++ mode == SFmode ? cost->sqrtss : cost->sqrtsd); + return false; + +- if (d->testing_p) +- return true; ++ case UNSPEC: ++ if (XINT (x, 1) == UNSPEC_TP) ++ *total = 0; ++ return false; + +- switch (d->vmode) +- { +- case E_V32QImode: +- if (d->perm[0]) +- gen = gen_vec_interleave_highv32qi; +- else +- gen = gen_vec_interleave_lowv32qi; +- break; +- case E_V16HImode: +- if (d->perm[0]) +- gen = gen_vec_interleave_highv16hi; +- else +- gen = gen_vec_interleave_lowv16hi; +- break; +- case E_V8SImode: +- if (d->perm[0]) +- gen = gen_vec_interleave_highv8si; +- else +- gen = gen_vec_interleave_lowv8si; +- break; +- case E_V4DImode: +- if (d->perm[0]) +- gen = gen_vec_interleave_highv4di; +- else +- gen = gen_vec_interleave_lowv4di; +- break; +- case E_V8SFmode: +- if (d->perm[0]) +- gen = gen_vec_interleave_highv8sf; +- else +- gen = gen_vec_interleave_lowv8sf; +- break; +- case E_V4DFmode: +- if (d->perm[0]) +- gen = gen_vec_interleave_highv4df; ++ case VEC_SELECT: ++ case VEC_CONCAT: ++ case VEC_DUPLICATE: ++ /* ??? Assume all of these vector manipulation patterns are ++ recognizable. In which case they all pretty much have the ++ same cost. */ ++ *total = cost->sse_op; ++ return true; ++ case VEC_MERGE: ++ mask = XEXP (x, 2); ++ /* This is masked instruction, assume the same cost, ++ as nonmasked variant. */ ++ if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask))) ++ *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed); + else +- gen = gen_vec_interleave_lowv4df; +- break; ++ *total = cost->sse_op; ++ return true; ++ + default: +- gcc_unreachable (); ++ return false; + } +- +- emit_insn (gen (d->target, d->op0, d->op1)); +- return true; + } + +-/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement +- a single vector permutation using a single intra-lane vector +- permutation, vperm2f128 swapping the lanes and vblend* insn blending +- the non-swapped and swapped vectors together. */ +- +-static bool +-expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d) +-{ +- struct expand_vec_perm_d dfirst, dsecond; +- unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2; +- rtx_insn *seq; +- bool ok; +- rtx (*blend) (rtx, rtx, rtx, rtx) = NULL; +- +- if (!TARGET_AVX +- || TARGET_AVX2 +- || (d->vmode != V8SFmode && d->vmode != V4DFmode) +- || !d->one_operand_p) +- return false; +- +- dfirst = *d; +- for (i = 0; i < nelt; i++) +- dfirst.perm[i] = 0xff; +- for (i = 0, msk = 0; i < nelt; i++) +- { +- j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2; +- if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i]) +- return false; +- dfirst.perm[j] = d->perm[i]; +- if (j != i) +- msk |= (1 << i); +- } +- for (i = 0; i < nelt; i++) +- if (dfirst.perm[i] == 0xff) +- dfirst.perm[i] = i; +- +- if (!d->testing_p) +- dfirst.target = gen_reg_rtx (dfirst.vmode); +- +- start_sequence (); +- ok = expand_vec_perm_1 (&dfirst); +- seq = get_insns (); +- end_sequence (); +- +- if (!ok) +- return false; +- +- if (d->testing_p) +- return true; +- +- emit_insn (seq); +- +- dsecond = *d; +- dsecond.op0 = dfirst.target; +- dsecond.op1 = dfirst.target; +- dsecond.one_operand_p = true; +- dsecond.target = gen_reg_rtx (dsecond.vmode); +- for (i = 0; i < nelt; i++) +- dsecond.perm[i] = i ^ nelt2; +- +- ok = expand_vec_perm_1 (&dsecond); +- gcc_assert (ok); ++#if TARGET_MACHO + +- blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256; +- emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk))); +- return true; +-} ++static int current_machopic_label_num; + +-/* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF +- permutation using two vperm2f128, followed by a vshufpd insn blending +- the two vectors together. */ ++/* Given a symbol name and its associated stub, write out the ++ definition of the stub. */ + +-static bool +-expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d) ++void ++machopic_output_stub (FILE *file, const char *symb, const char *stub) + { +- struct expand_vec_perm_d dfirst, dsecond, dthird; +- bool ok; +- +- if (!TARGET_AVX || (d->vmode != V4DFmode)) +- return false; +- +- if (d->testing_p) +- return true; +- +- dfirst = *d; +- dsecond = *d; +- dthird = *d; +- +- dfirst.perm[0] = (d->perm[0] & ~1); +- dfirst.perm[1] = (d->perm[0] & ~1) + 1; +- dfirst.perm[2] = (d->perm[2] & ~1); +- dfirst.perm[3] = (d->perm[2] & ~1) + 1; +- dsecond.perm[0] = (d->perm[1] & ~1); +- dsecond.perm[1] = (d->perm[1] & ~1) + 1; +- dsecond.perm[2] = (d->perm[3] & ~1); +- dsecond.perm[3] = (d->perm[3] & ~1) + 1; +- dthird.perm[0] = (d->perm[0] % 2); +- dthird.perm[1] = (d->perm[1] % 2) + 4; +- dthird.perm[2] = (d->perm[2] % 2) + 2; +- dthird.perm[3] = (d->perm[3] % 2) + 6; +- +- dfirst.target = gen_reg_rtx (dfirst.vmode); +- dsecond.target = gen_reg_rtx (dsecond.vmode); +- dthird.op0 = dfirst.target; +- dthird.op1 = dsecond.target; +- dthird.one_operand_p = false; +- +- canonicalize_perm (&dfirst); +- canonicalize_perm (&dsecond); +- +- ok = expand_vec_perm_1 (&dfirst) +- && expand_vec_perm_1 (&dsecond) +- && expand_vec_perm_1 (&dthird); ++ unsigned int length; ++ char *binder_name, *symbol_name, lazy_ptr_name[32]; ++ int label = ++current_machopic_label_num; + +- gcc_assert (ok); ++ /* For 64-bit we shouldn't get here. */ ++ gcc_assert (!TARGET_64BIT); + +- return true; +-} ++ /* Lose our funky encoding stuff so it doesn't contaminate the stub. */ ++ symb = targetm.strip_name_encoding (symb); + +-/* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word +- permutation with two pshufb insns and an ior. We should have already +- failed all two instruction sequences. */ ++ length = strlen (stub); ++ binder_name = XALLOCAVEC (char, length + 32); ++ GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length); + +-static bool +-expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d) +-{ +- rtx rperm[2][16], vperm, l, h, op, m128; +- unsigned int i, nelt, eltsz; ++ length = strlen (symb); ++ symbol_name = XALLOCAVEC (char, length + 32); ++ GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length); + +- if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16) +- return false; +- gcc_assert (!d->one_operand_p); ++ sprintf (lazy_ptr_name, "L%d$lz", label); + +- if (d->testing_p) +- return true; ++ if (MACHOPIC_ATT_STUB) ++ switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]); ++ else if (MACHOPIC_PURE) ++ switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]); ++ else ++ switch_to_section (darwin_sections[machopic_symbol_stub_section]); + +- nelt = d->nelt; +- eltsz = GET_MODE_UNIT_SIZE (d->vmode); ++ fprintf (file, "%s:\n", stub); ++ fprintf (file, "\t.indirect_symbol %s\n", symbol_name); + +- /* Generate two permutation masks. If the required element is within +- the given vector it is shuffled into the proper lane. If the required +- element is in the other vector, force a zero into the lane by setting +- bit 7 in the permutation mask. */ +- m128 = GEN_INT (-128); +- for (i = 0; i < nelt; ++i) ++ if (MACHOPIC_ATT_STUB) + { +- unsigned j, e = d->perm[i]; +- unsigned which = (e >= nelt); +- if (e >= nelt) +- e -= nelt; +- +- for (j = 0; j < eltsz; ++j) +- { +- rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j); +- rperm[1-which][i*eltsz + j] = m128; +- } ++ fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n"); + } +- +- vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0])); +- vperm = force_reg (V16QImode, vperm); +- +- l = gen_reg_rtx (V16QImode); +- op = gen_lowpart (V16QImode, d->op0); +- emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm)); +- +- vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1])); +- vperm = force_reg (V16QImode, vperm); +- +- h = gen_reg_rtx (V16QImode); +- op = gen_lowpart (V16QImode, d->op1); +- emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm)); +- +- op = d->target; +- if (d->vmode != V16QImode) +- op = gen_reg_rtx (V16QImode); +- emit_insn (gen_iorv16qi3 (op, l, h)); +- if (op != d->target) +- emit_move_insn (d->target, gen_lowpart (d->vmode, op)); +- +- return true; +-} +- +-/* Implement arbitrary permutation of one V32QImode and V16QImode operand +- with two vpshufb insns, vpermq and vpor. We should have already failed +- all two or three instruction sequences. */ +- +-static bool +-expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d) +-{ +- rtx rperm[2][32], vperm, l, h, hp, op, m128; +- unsigned int i, nelt, eltsz; +- +- if (!TARGET_AVX2 +- || !d->one_operand_p +- || (d->vmode != V32QImode && d->vmode != V16HImode)) +- return false; +- +- if (d->testing_p) +- return true; +- +- nelt = d->nelt; +- eltsz = GET_MODE_UNIT_SIZE (d->vmode); +- +- /* Generate two permutation masks. If the required element is within +- the same lane, it is shuffled in. If the required element from the +- other lane, force a zero by setting bit 7 in the permutation mask. +- In the other mask the mask has non-negative elements if element +- is requested from the other lane, but also moved to the other lane, +- so that the result of vpshufb can have the two V2TImode halves +- swapped. */ +- m128 = GEN_INT (-128); +- for (i = 0; i < nelt; ++i) ++ else if (MACHOPIC_PURE) + { +- unsigned j, e = d->perm[i] & (nelt / 2 - 1); +- unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz; +- +- for (j = 0; j < eltsz; ++j) +- { +- rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j); +- rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128; +- } ++ /* PIC stub. */ ++ /* 25-byte PIC stub using "CALL get_pc_thunk". */ ++ rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */); ++ output_set_got (tmp, NULL_RTX); /* "CALL ___.get_pc_thunk.cx". */ ++ fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n", ++ label, lazy_ptr_name, label); ++ fprintf (file, "\tjmp\t*%%ecx\n"); + } ++ else ++ fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name); + +- vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1])); +- vperm = force_reg (V32QImode, vperm); +- +- h = gen_reg_rtx (V32QImode); +- op = gen_lowpart (V32QImode, d->op0); +- emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm)); ++ /* The AT&T-style ("self-modifying") stub is not lazily bound, thus ++ it needs no stub-binding-helper. */ ++ if (MACHOPIC_ATT_STUB) ++ return; + +- /* Swap the 128-byte lanes of h into hp. */ +- hp = gen_reg_rtx (V4DImode); +- op = gen_lowpart (V4DImode, h); +- emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx, +- const1_rtx)); ++ fprintf (file, "%s:\n", binder_name); + +- vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0])); +- vperm = force_reg (V32QImode, vperm); ++ if (MACHOPIC_PURE) ++ { ++ fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name); ++ fprintf (file, "\tpushl\t%%ecx\n"); ++ } ++ else ++ fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name); + +- l = gen_reg_rtx (V32QImode); +- op = gen_lowpart (V32QImode, d->op0); +- emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm)); ++ fputs ("\tjmp\tdyld_stub_binding_helper\n", file); + +- op = d->target; +- if (d->vmode != V32QImode) +- op = gen_reg_rtx (V32QImode); +- emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp))); +- if (op != d->target) +- emit_move_insn (d->target, gen_lowpart (d->vmode, op)); ++ /* N.B. Keep the correspondence of these ++ 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the ++ old-pic/new-pic/non-pic stubs; altering this will break ++ compatibility with existing dylibs. */ ++ if (MACHOPIC_PURE) ++ { ++ /* 25-byte PIC stub using "CALL get_pc_thunk". */ ++ switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]); ++ } ++ else ++ /* 16-byte -mdynamic-no-pic stub. */ ++ switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]); + +- return true; ++ fprintf (file, "%s:\n", lazy_ptr_name); ++ fprintf (file, "\t.indirect_symbol %s\n", symbol_name); ++ fprintf (file, ASM_LONG "%s\n", binder_name); + } ++#endif /* TARGET_MACHO */ + +-/* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even +- and extract-odd permutations of two V32QImode and V16QImode operand +- with two vpshufb insns, vpor and vpermq. We should have already +- failed all two or three instruction sequences. */ ++/* Order the registers for register allocator. */ + +-static bool +-expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d) ++void ++x86_order_regs_for_local_alloc (void) + { +- rtx rperm[2][32], vperm, l, h, ior, op, m128; +- unsigned int i, nelt, eltsz; +- +- if (!TARGET_AVX2 +- || d->one_operand_p +- || (d->vmode != V32QImode && d->vmode != V16HImode)) +- return false; +- +- for (i = 0; i < d->nelt; ++i) +- if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2)) +- return false; +- +- if (d->testing_p) +- return true; ++ int pos = 0; ++ int i; + +- nelt = d->nelt; +- eltsz = GET_MODE_UNIT_SIZE (d->vmode); +- +- /* Generate two permutation masks. In the first permutation mask +- the first quarter will contain indexes for the first half +- of the op0, the second quarter will contain bit 7 set, third quarter +- will contain indexes for the second half of the op0 and the +- last quarter bit 7 set. In the second permutation mask +- the first quarter will contain bit 7 set, the second quarter +- indexes for the first half of the op1, the third quarter bit 7 set +- and last quarter indexes for the second half of the op1. +- I.e. the first mask e.g. for V32QImode extract even will be: +- 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128 +- (all values masked with 0xf except for -128) and second mask +- for extract even will be +- -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */ +- m128 = GEN_INT (-128); +- for (i = 0; i < nelt; ++i) +- { +- unsigned j, e = d->perm[i] & (nelt / 2 - 1); +- unsigned which = d->perm[i] >= nelt; +- unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0; ++ /* First allocate the local general purpose registers. */ ++ for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) ++ if (GENERAL_REGNO_P (i) && call_used_or_fixed_reg_p (i)) ++ reg_alloc_order [pos++] = i; + +- for (j = 0; j < eltsz; ++j) +- { +- rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j); +- rperm[1 - which][(i * eltsz + j) ^ xorv] = m128; +- } +- } ++ /* Global general purpose registers. */ ++ for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) ++ if (GENERAL_REGNO_P (i) && !call_used_or_fixed_reg_p (i)) ++ reg_alloc_order [pos++] = i; + +- vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0])); +- vperm = force_reg (V32QImode, vperm); ++ /* x87 registers come first in case we are doing FP math ++ using them. */ ++ if (!TARGET_SSE_MATH) ++ for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) ++ reg_alloc_order [pos++] = i; + +- l = gen_reg_rtx (V32QImode); +- op = gen_lowpart (V32QImode, d->op0); +- emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm)); ++ /* SSE registers. */ ++ for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++) ++ reg_alloc_order [pos++] = i; ++ for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++) ++ reg_alloc_order [pos++] = i; + +- vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1])); +- vperm = force_reg (V32QImode, vperm); ++ /* Extended REX SSE registers. */ ++ for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++) ++ reg_alloc_order [pos++] = i; + +- h = gen_reg_rtx (V32QImode); +- op = gen_lowpart (V32QImode, d->op1); +- emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm)); ++ /* Mask register. */ ++ for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++) ++ reg_alloc_order [pos++] = i; + +- ior = gen_reg_rtx (V32QImode); +- emit_insn (gen_iorv32qi3 (ior, l, h)); ++ /* x87 registers. */ ++ if (TARGET_SSE_MATH) ++ for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) ++ reg_alloc_order [pos++] = i; + +- /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */ +- op = gen_reg_rtx (V4DImode); +- ior = gen_lowpart (V4DImode, ior); +- emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx, +- const1_rtx, GEN_INT (3))); +- emit_move_insn (d->target, gen_lowpart (d->vmode, op)); ++ for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++) ++ reg_alloc_order [pos++] = i; + +- return true; ++ /* Initialize the rest of array as we do not allocate some registers ++ at all. */ ++ while (pos < FIRST_PSEUDO_REGISTER) ++ reg_alloc_order [pos++] = 0; + } + +-/* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even +- and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands +- with two "and" and "pack" or two "shift" and "pack" insns. We should +- have already failed all two instruction sequences. */ +- + static bool +-expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d) ++ix86_ms_bitfield_layout_p (const_tree record_type) + { +- rtx op, dop0, dop1, t; +- unsigned i, odd, c, s, nelt = d->nelt; +- bool end_perm = false; +- machine_mode half_mode; +- rtx (*gen_and) (rtx, rtx, rtx); +- rtx (*gen_pack) (rtx, rtx, rtx); +- rtx (*gen_shift) (rtx, rtx, rtx); ++ return ((TARGET_MS_BITFIELD_LAYOUT ++ && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type))) ++ || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type))); ++} + +- if (d->one_operand_p) +- return false; ++/* Returns an expression indicating where the this parameter is ++ located on entry to the FUNCTION. */ + +- switch (d->vmode) +- { +- case E_V8HImode: +- /* Required for "pack". */ +- if (!TARGET_SSE4_1) +- return false; +- c = 0xffff; +- s = 16; +- half_mode = V4SImode; +- gen_and = gen_andv4si3; +- gen_pack = gen_sse4_1_packusdw; +- gen_shift = gen_lshrv4si3; +- break; +- case E_V16QImode: +- /* No check as all instructions are SSE2. */ +- c = 0xff; +- s = 8; +- half_mode = V8HImode; +- gen_and = gen_andv8hi3; +- gen_pack = gen_sse2_packuswb; +- gen_shift = gen_lshrv8hi3; +- break; +- case E_V16HImode: +- if (!TARGET_AVX2) +- return false; +- c = 0xffff; +- s = 16; +- half_mode = V8SImode; +- gen_and = gen_andv8si3; +- gen_pack = gen_avx2_packusdw; +- gen_shift = gen_lshrv8si3; +- end_perm = true; +- break; +- case E_V32QImode: +- if (!TARGET_AVX2) +- return false; +- c = 0xff; +- s = 8; +- half_mode = V16HImode; +- gen_and = gen_andv16hi3; +- gen_pack = gen_avx2_packuswb; +- gen_shift = gen_lshrv16hi3; +- end_perm = true; +- break; +- default: +- /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than +- general shuffles. */ +- return false; +- } ++static rtx ++x86_this_parameter (tree function) ++{ ++ tree type = TREE_TYPE (function); ++ bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0; ++ int nregs; + +- /* Check that permutation is even or odd. */ +- odd = d->perm[0]; +- if (odd > 1) +- return false; ++ if (TARGET_64BIT) ++ { ++ const int *parm_regs; + +- for (i = 1; i < nelt; ++i) +- if (d->perm[i] != 2 * i + odd) +- return false; ++ if (ix86_function_type_abi (type) == MS_ABI) ++ parm_regs = x86_64_ms_abi_int_parameter_registers; ++ else ++ parm_regs = x86_64_int_parameter_registers; ++ return gen_rtx_REG (Pmode, parm_regs[aggr]); ++ } + +- if (d->testing_p) +- return true; ++ nregs = ix86_function_regparm (type, function); + +- dop0 = gen_reg_rtx (half_mode); +- dop1 = gen_reg_rtx (half_mode); +- if (odd == 0) +- { +- t = gen_const_vec_duplicate (half_mode, GEN_INT (c)); +- t = force_reg (half_mode, t); +- emit_insn (gen_and (dop0, t, gen_lowpart (half_mode, d->op0))); +- emit_insn (gen_and (dop1, t, gen_lowpart (half_mode, d->op1))); +- } +- else ++ if (nregs > 0 && !stdarg_p (type)) + { +- emit_insn (gen_shift (dop0, +- gen_lowpart (half_mode, d->op0), +- GEN_INT (s))); +- emit_insn (gen_shift (dop1, +- gen_lowpart (half_mode, d->op1), +- GEN_INT (s))); +- } +- /* In AVX2 for 256 bit case we need to permute pack result. */ +- if (TARGET_AVX2 && end_perm) +- { +- op = gen_reg_rtx (d->vmode); +- t = gen_reg_rtx (V4DImode); +- emit_insn (gen_pack (op, dop0, dop1)); +- emit_insn (gen_avx2_permv4di_1 (t, +- gen_lowpart (V4DImode, op), +- const0_rtx, +- const2_rtx, +- const1_rtx, +- GEN_INT (3))); +- emit_move_insn (d->target, gen_lowpart (d->vmode, t)); ++ int regno; ++ unsigned int ccvt = ix86_get_callcvt (type); ++ ++ if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) ++ regno = aggr ? DX_REG : CX_REG; ++ else if ((ccvt & IX86_CALLCVT_THISCALL) != 0) ++ { ++ regno = CX_REG; ++ if (aggr) ++ return gen_rtx_MEM (SImode, ++ plus_constant (Pmode, stack_pointer_rtx, 4)); ++ } ++ else ++ { ++ regno = AX_REG; ++ if (aggr) ++ { ++ regno = DX_REG; ++ if (nregs == 1) ++ return gen_rtx_MEM (SImode, ++ plus_constant (Pmode, ++ stack_pointer_rtx, 4)); ++ } ++ } ++ return gen_rtx_REG (SImode, regno); + } +- else +- emit_insn (gen_pack (d->target, dop0, dop1)); + +- return true; ++ return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx, ++ aggr ? 8 : 4)); + } + +-/* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even +- and extract-odd permutations of two V64QI operands +- with two "shifts", two "truncs" and one "concat" insns for "odd" +- and two "truncs" and one concat insn for "even." +- Have already failed all two instruction sequences. */ ++/* Determine whether x86_output_mi_thunk can succeed. */ + + static bool +-expand_vec_perm_even_odd_trunc (struct expand_vec_perm_d *d) ++x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset, ++ const_tree function) + { +- rtx t1, t2, t3, t4; +- unsigned i, odd, nelt = d->nelt; +- +- if (!TARGET_AVX512BW +- || d->one_operand_p +- || d->vmode != V64QImode) +- return false; +- +- /* Check that permutation is even or odd. */ +- odd = d->perm[0]; +- if (odd > 1) +- return false; +- +- for (i = 1; i < nelt; ++i) +- if (d->perm[i] != 2 * i + odd) +- return false; +- +- if (d->testing_p) ++ /* 64-bit can handle anything. */ ++ if (TARGET_64BIT) + return true; + ++ /* For 32-bit, everything's fine if we have one free register. */ ++ if (ix86_function_regparm (TREE_TYPE (function), function) < 3) ++ return true; + +- if (odd) +- { +- t1 = gen_reg_rtx (V32HImode); +- t2 = gen_reg_rtx (V32HImode); +- emit_insn (gen_lshrv32hi3 (t1, +- gen_lowpart (V32HImode, d->op0), +- GEN_INT (8))); +- emit_insn (gen_lshrv32hi3 (t2, +- gen_lowpart (V32HImode, d->op1), +- GEN_INT (8))); +- } +- else +- { +- t1 = gen_lowpart (V32HImode, d->op0); +- t2 = gen_lowpart (V32HImode, d->op1); +- } ++ /* Need a free register for vcall_offset. */ ++ if (vcall_offset) ++ return false; + +- t3 = gen_reg_rtx (V32QImode); +- t4 = gen_reg_rtx (V32QImode); +- emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t3, t1)); +- emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t4, t2)); +- emit_insn (gen_avx_vec_concatv64qi (d->target, t3, t4)); ++ /* Need a free register for GOT references. */ ++ if (flag_pic && !targetm.binds_local_p (function)) ++ return false; + ++ /* Otherwise ok. */ + return true; + } + +-/* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even +- and extract-odd permutations. */ ++/* Output the assembler code for a thunk function. THUNK_DECL is the ++ declaration for the thunk function itself, FUNCTION is the decl for ++ the target function. DELTA is an immediate constant offset to be ++ added to THIS. If VCALL_OFFSET is nonzero, the word at ++ *(*this + vcall_offset) should be added to THIS. */ + +-static bool +-expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd) ++static void ++x86_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta, ++ HOST_WIDE_INT vcall_offset, tree function) + { +- rtx t1, t2, t3, t4, t5; ++ const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl)); ++ rtx this_param = x86_this_parameter (function); ++ rtx this_reg, tmp, fnaddr; ++ unsigned int tmp_regno; ++ rtx_insn *insn; + +- switch (d->vmode) ++ if (TARGET_64BIT) ++ tmp_regno = R10_REG; ++ else + { +- case E_V4DFmode: +- if (d->testing_p) +- break; +- t1 = gen_reg_rtx (V4DFmode); +- t2 = gen_reg_rtx (V4DFmode); +- +- /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */ +- emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20))); +- emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31))); +- +- /* Now an unpck[lh]pd will produce the result required. */ +- if (odd) +- t3 = gen_avx_unpckhpd256 (d->target, t1, t2); ++ unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function)); ++ if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) ++ tmp_regno = AX_REG; ++ else if ((ccvt & IX86_CALLCVT_THISCALL) != 0) ++ tmp_regno = DX_REG; + else +- t3 = gen_avx_unpcklpd256 (d->target, t1, t2); +- emit_insn (t3); +- break; ++ tmp_regno = CX_REG; ++ } + +- case E_V8SFmode: +- { +- int mask = odd ? 0xdd : 0x88; ++ emit_note (NOTE_INSN_PROLOGUE_END); + +- if (d->testing_p) +- break; +- t1 = gen_reg_rtx (V8SFmode); +- t2 = gen_reg_rtx (V8SFmode); +- t3 = gen_reg_rtx (V8SFmode); +- +- /* Shuffle within the 128-bit lanes to produce: +- { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */ +- emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1, +- GEN_INT (mask))); +- +- /* Shuffle the lanes around to produce: +- { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */ +- emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1, +- GEN_INT (0x3))); +- +- /* Shuffle within the 128-bit lanes to produce: +- { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */ +- emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44))); +- +- /* Shuffle within the 128-bit lanes to produce: +- { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */ +- emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee))); +- +- /* Shuffle the lanes around to produce: +- { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */ +- emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2, +- GEN_INT (0x20))); +- } +- break; ++ /* CET is enabled, insert EB instruction. */ ++ if ((flag_cf_protection & CF_BRANCH)) ++ emit_insn (gen_nop_endbr ()); + +- case E_V2DFmode: +- case E_V4SFmode: +- case E_V2DImode: +- case E_V4SImode: +- /* These are always directly implementable by expand_vec_perm_1. */ +- gcc_unreachable (); ++ /* If VCALL_OFFSET, we'll need THIS in a register. Might as well ++ pull it in now and let DELTA benefit. */ ++ if (REG_P (this_param)) ++ this_reg = this_param; ++ else if (vcall_offset) ++ { ++ /* Put the this parameter into %eax. */ ++ this_reg = gen_rtx_REG (Pmode, AX_REG); ++ emit_move_insn (this_reg, this_param); ++ } ++ else ++ this_reg = NULL_RTX; + +- case E_V8HImode: +- if (TARGET_SSE4_1) +- return expand_vec_perm_even_odd_pack (d); +- else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB) +- return expand_vec_perm_pshufb2 (d); +- else ++ /* Adjust the this parameter by a fixed constant. */ ++ if (delta) ++ { ++ rtx delta_rtx = GEN_INT (delta); ++ rtx delta_dst = this_reg ? this_reg : this_param; ++ ++ if (TARGET_64BIT) + { +- if (d->testing_p) +- break; +- /* We need 2*log2(N)-1 operations to achieve odd/even +- with interleave. */ +- t1 = gen_reg_rtx (V8HImode); +- t2 = gen_reg_rtx (V8HImode); +- emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1)); +- emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1)); +- emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1)); +- emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1)); +- if (odd) +- t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2); +- else +- t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2); +- emit_insn (t3); ++ if (!x86_64_general_operand (delta_rtx, Pmode)) ++ { ++ tmp = gen_rtx_REG (Pmode, tmp_regno); ++ emit_move_insn (tmp, delta_rtx); ++ delta_rtx = tmp; ++ } + } +- break; + +- case E_V16QImode: +- return expand_vec_perm_even_odd_pack (d); ++ ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx); ++ } ++ ++ /* Adjust the this parameter by a value stored in the vtable. */ ++ if (vcall_offset) ++ { ++ rtx vcall_addr, vcall_mem, this_mem; + +- case E_V16HImode: +- case E_V32QImode: +- return expand_vec_perm_even_odd_pack (d); ++ tmp = gen_rtx_REG (Pmode, tmp_regno); + +- case E_V64QImode: +- return expand_vec_perm_even_odd_trunc (d); ++ this_mem = gen_rtx_MEM (ptr_mode, this_reg); ++ if (Pmode != ptr_mode) ++ this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem); ++ emit_move_insn (tmp, this_mem); + +- case E_V4DImode: +- if (!TARGET_AVX2) ++ /* Adjust the this parameter. */ ++ vcall_addr = plus_constant (Pmode, tmp, vcall_offset); ++ if (TARGET_64BIT ++ && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true)) + { +- struct expand_vec_perm_d d_copy = *d; +- d_copy.vmode = V4DFmode; +- if (d->testing_p) +- d_copy.target = gen_raw_REG (V4DFmode, LAST_VIRTUAL_REGISTER + 1); +- else +- d_copy.target = gen_reg_rtx (V4DFmode); +- d_copy.op0 = gen_lowpart (V4DFmode, d->op0); +- d_copy.op1 = gen_lowpart (V4DFmode, d->op1); +- if (expand_vec_perm_even_odd_1 (&d_copy, odd)) +- { +- if (!d->testing_p) +- emit_move_insn (d->target, +- gen_lowpart (V4DImode, d_copy.target)); +- return true; +- } +- return false; ++ rtx tmp2 = gen_rtx_REG (Pmode, R11_REG); ++ emit_move_insn (tmp2, GEN_INT (vcall_offset)); ++ vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2); + } + +- if (d->testing_p) +- break; +- +- t1 = gen_reg_rtx (V4DImode); +- t2 = gen_reg_rtx (V4DImode); ++ vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr); ++ if (Pmode != ptr_mode) ++ emit_insn (gen_addsi_1_zext (this_reg, ++ gen_rtx_REG (ptr_mode, ++ REGNO (this_reg)), ++ vcall_mem)); ++ else ++ ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem); ++ } + +- /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */ +- emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20))); +- emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31))); ++ /* If necessary, drop THIS back to its stack slot. */ ++ if (this_reg && this_reg != this_param) ++ emit_move_insn (this_param, this_reg); + +- /* Now an vpunpck[lh]qdq will produce the result required. */ +- if (odd) +- t3 = gen_avx2_interleave_highv4di (d->target, t1, t2); ++ fnaddr = XEXP (DECL_RTL (function), 0); ++ if (TARGET_64BIT) ++ { ++ if (!flag_pic || targetm.binds_local_p (function) ++ || TARGET_PECOFF) ++ ; + else +- t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2); +- emit_insn (t3); +- break; +- +- case E_V8SImode: +- if (!TARGET_AVX2) + { +- struct expand_vec_perm_d d_copy = *d; +- d_copy.vmode = V8SFmode; +- if (d->testing_p) +- d_copy.target = gen_raw_REG (V8SFmode, LAST_VIRTUAL_REGISTER + 1); +- else +- d_copy.target = gen_reg_rtx (V8SFmode); +- d_copy.op0 = gen_lowpart (V8SFmode, d->op0); +- d_copy.op1 = gen_lowpart (V8SFmode, d->op1); +- if (expand_vec_perm_even_odd_1 (&d_copy, odd)) +- { +- if (!d->testing_p) +- emit_move_insn (d->target, +- gen_lowpart (V8SImode, d_copy.target)); +- return true; +- } +- return false; ++ tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL); ++ tmp = gen_rtx_CONST (Pmode, tmp); ++ fnaddr = gen_const_mem (Pmode, tmp); ++ } ++ } ++ else ++ { ++ if (!flag_pic || targetm.binds_local_p (function)) ++ ; ++#if TARGET_MACHO ++ else if (TARGET_MACHO) ++ { ++ fnaddr = machopic_indirect_call_target (DECL_RTL (function)); ++ fnaddr = XEXP (fnaddr, 0); + } ++#endif /* TARGET_MACHO */ ++ else ++ { ++ tmp = gen_rtx_REG (Pmode, CX_REG); ++ output_set_got (tmp, NULL_RTX); + +- if (d->testing_p) +- break; ++ fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT); ++ fnaddr = gen_rtx_CONST (Pmode, fnaddr); ++ fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr); ++ fnaddr = gen_const_mem (Pmode, fnaddr); ++ } ++ } + +- t1 = gen_reg_rtx (V8SImode); +- t2 = gen_reg_rtx (V8SImode); +- t3 = gen_reg_rtx (V4DImode); +- t4 = gen_reg_rtx (V4DImode); +- t5 = gen_reg_rtx (V4DImode); +- +- /* Shuffle the lanes around into +- { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */ +- emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0), +- gen_lowpart (V4DImode, d->op1), +- GEN_INT (0x20))); +- emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0), +- gen_lowpart (V4DImode, d->op1), +- GEN_INT (0x31))); +- +- /* Swap the 2nd and 3rd position in each lane into +- { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */ +- emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3), +- GEN_INT (2 * 4 + 1 * 16 + 3 * 64))); +- emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4), +- GEN_INT (2 * 4 + 1 * 16 + 3 * 64))); +- +- /* Now an vpunpck[lh]qdq will produce +- { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */ +- if (odd) +- t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1), +- gen_lowpart (V4DImode, t2)); ++ /* Our sibling call patterns do not allow memories, because we have no ++ predicate that can distinguish between frame and non-frame memory. ++ For our purposes here, we can get away with (ab)using a jump pattern, ++ because we're going to do no optimization. */ ++ if (MEM_P (fnaddr)) ++ { ++ if (sibcall_insn_operand (fnaddr, word_mode)) ++ { ++ fnaddr = XEXP (DECL_RTL (function), 0); ++ tmp = gen_rtx_MEM (QImode, fnaddr); ++ tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx); ++ tmp = emit_call_insn (tmp); ++ SIBLING_CALL_P (tmp) = 1; ++ } + else +- t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1), +- gen_lowpart (V4DImode, t2)); +- emit_insn (t3); +- emit_move_insn (d->target, gen_lowpart (V8SImode, t5)); +- break; ++ emit_jump_insn (gen_indirect_jump (fnaddr)); ++ } ++ else ++ { ++ if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr)) ++ { ++ // CM_LARGE_PIC always uses pseudo PIC register which is ++ // uninitialized. Since FUNCTION is local and calling it ++ // doesn't go through PLT, we use scratch register %r11 as ++ // PIC register and initialize it here. ++ pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG); ++ ix86_init_large_pic_reg (tmp_regno); ++ fnaddr = legitimize_pic_address (fnaddr, ++ gen_rtx_REG (Pmode, tmp_regno)); ++ } + +- default: +- gcc_unreachable (); ++ if (!sibcall_insn_operand (fnaddr, word_mode)) ++ { ++ tmp = gen_rtx_REG (word_mode, tmp_regno); ++ if (GET_MODE (fnaddr) != word_mode) ++ fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr); ++ emit_move_insn (tmp, fnaddr); ++ fnaddr = tmp; ++ } ++ ++ tmp = gen_rtx_MEM (QImode, fnaddr); ++ tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx); ++ tmp = emit_call_insn (tmp); ++ SIBLING_CALL_P (tmp) = 1; + } ++ emit_barrier (); + +- return true; ++ /* Emit just enough of rest_of_compilation to get the insns emitted. ++ Note that use_thunk calls assemble_start_function et al. */ ++ insn = get_insns (); ++ shorten_branches (insn); ++ assemble_start_function (thunk_fndecl, fnname); ++ final_start_function (insn, file, 1); ++ final (insn, file, 1); ++ final_end_function (); ++ assemble_end_function (thunk_fndecl, fnname); + } + +-/* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match +- extract-even and extract-odd permutations. */ ++static void ++x86_file_start (void) ++{ ++ default_file_start (); ++ if (TARGET_16BIT) ++ fputs ("\t.code16gcc\n", asm_out_file); ++#if TARGET_MACHO ++ darwin_file_start (); ++#endif ++ if (X86_FILE_START_VERSION_DIRECTIVE) ++ fputs ("\t.version\t\"01.01\"\n", asm_out_file); ++ if (X86_FILE_START_FLTUSED) ++ fputs ("\t.global\t__fltused\n", asm_out_file); ++ if (ix86_asm_dialect == ASM_INTEL) ++ fputs ("\t.intel_syntax noprefix\n", asm_out_file); ++} + +-static bool +-expand_vec_perm_even_odd (struct expand_vec_perm_d *d) ++int ++x86_field_alignment (tree type, int computed) + { +- unsigned i, odd, nelt = d->nelt; ++ machine_mode mode; + +- odd = d->perm[0]; +- if (odd != 0 && odd != 1) +- return false; ++ if (TARGET_64BIT || TARGET_ALIGN_DOUBLE) ++ return computed; ++ if (TARGET_IAMCU) ++ return iamcu_alignment (type, computed); ++ mode = TYPE_MODE (strip_array_types (type)); ++ if (mode == DFmode || mode == DCmode ++ || GET_MODE_CLASS (mode) == MODE_INT ++ || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT) ++ return MIN (32, computed); ++ return computed; ++} + +- for (i = 1; i < nelt; ++i) +- if (d->perm[i] != 2 * i + odd) +- return false; ++/* Print call to TARGET to FILE. */ + +- return expand_vec_perm_even_odd_1 (d, odd); ++static void ++x86_print_call_or_nop (FILE *file, const char *target) ++{ ++ if (flag_nop_mcount || !strcmp (target, "nop")) ++ /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */ ++ fprintf (file, "1:" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n"); ++ else ++ fprintf (file, "1:\tcall\t%s\n", target); + } + +-/* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast +- permutations. We assume that expand_vec_perm_1 has already failed. */ +- + static bool +-expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d) ++current_fentry_name (const char **name) + { +- unsigned elt = d->perm[0], nelt2 = d->nelt / 2; +- machine_mode vmode = d->vmode; +- unsigned char perm2[4]; +- rtx op0 = d->op0, dest; +- bool ok; +- +- switch (vmode) +- { +- case E_V4DFmode: +- case E_V8SFmode: +- /* These are special-cased in sse.md so that we can optionally +- use the vbroadcast instruction. They expand to two insns +- if the input happens to be in a register. */ +- gcc_unreachable (); +- +- case E_V2DFmode: +- case E_V2DImode: +- case E_V4SFmode: +- case E_V4SImode: +- /* These are always implementable using standard shuffle patterns. */ +- gcc_unreachable (); ++ tree attr = lookup_attribute ("fentry_name", ++ DECL_ATTRIBUTES (current_function_decl)); ++ if (!attr) ++ return false; ++ *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr))); ++ return true; ++} + +- case E_V8HImode: +- case E_V16QImode: +- /* These can be implemented via interleave. We save one insn by +- stopping once we have promoted to V4SImode and then use pshufd. */ +- if (d->testing_p) +- return true; +- do +- { +- rtx dest; +- rtx (*gen) (rtx, rtx, rtx) +- = vmode == V16QImode ? gen_vec_interleave_lowv16qi +- : gen_vec_interleave_lowv8hi; ++static bool ++current_fentry_section (const char **name) ++{ ++ tree attr = lookup_attribute ("fentry_section", ++ DECL_ATTRIBUTES (current_function_decl)); ++ if (!attr) ++ return false; ++ *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr))); ++ return true; ++} + +- if (elt >= nelt2) +- { +- gen = vmode == V16QImode ? gen_vec_interleave_highv16qi +- : gen_vec_interleave_highv8hi; +- elt -= nelt2; +- } +- nelt2 /= 2; ++/* Output assembler code to FILE to increment profiler label # LABELNO ++ for profiling a function entry. */ ++void ++x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED) ++{ ++ if (cfun->machine->endbr_queued_at_entrance) ++ fprintf (file, "\t%s\n", TARGET_64BIT ? "endbr64" : "endbr32"); + +- dest = gen_reg_rtx (vmode); +- emit_insn (gen (dest, op0, op0)); +- vmode = get_mode_wider_vector (vmode); +- op0 = gen_lowpart (vmode, dest); +- } +- while (vmode != V4SImode); ++ const char *mcount_name = MCOUNT_NAME; + +- memset (perm2, elt, 4); +- dest = gen_reg_rtx (V4SImode); +- ok = expand_vselect (dest, op0, perm2, 4, d->testing_p); +- gcc_assert (ok); +- if (!d->testing_p) +- emit_move_insn (d->target, gen_lowpart (d->vmode, dest)); +- return true; ++ if (current_fentry_name (&mcount_name)) ++ ; ++ else if (fentry_name) ++ mcount_name = fentry_name; ++ else if (flag_fentry) ++ mcount_name = MCOUNT_NAME_BEFORE_PROLOGUE; + +- case E_V64QImode: +- case E_V32QImode: +- case E_V16HImode: +- case E_V8SImode: +- case E_V4DImode: +- /* For AVX2 broadcasts of the first element vpbroadcast* or +- vpermq should be used by expand_vec_perm_1. */ +- gcc_assert (!TARGET_AVX2 || d->perm[0]); +- return false; ++ if (TARGET_64BIT) ++ { ++#ifndef NO_PROFILE_COUNTERS ++ fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno); ++#endif + +- default: +- gcc_unreachable (); ++ if (!TARGET_PECOFF && flag_pic) ++ fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name); ++ else ++ x86_print_call_or_nop (file, mcount_name); ++ } ++ else if (flag_pic) ++ { ++#ifndef NO_PROFILE_COUNTERS ++ fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n", ++ LPREFIX, labelno); ++#endif ++ fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name); ++ } ++ else ++ { ++#ifndef NO_PROFILE_COUNTERS ++ fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n", ++ LPREFIX, labelno); ++#endif ++ x86_print_call_or_nop (file, mcount_name); + } +-} +- +-/* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match +- broadcast permutations. */ + +-static bool +-expand_vec_perm_broadcast (struct expand_vec_perm_d *d) +-{ +- unsigned i, elt, nelt = d->nelt; +- +- if (!d->one_operand_p) +- return false; ++ if (flag_record_mcount ++ || lookup_attribute ("fentry_section", ++ DECL_ATTRIBUTES (current_function_decl))) ++ { ++ const char *sname = "__mcount_loc"; + +- elt = d->perm[0]; +- for (i = 1; i < nelt; ++i) +- if (d->perm[i] != elt) +- return false; ++ if (current_fentry_section (&sname)) ++ ; ++ else if (fentry_section) ++ sname = fentry_section; + +- return expand_vec_perm_broadcast_1 (d); ++ fprintf (file, "\t.section %s, \"a\",@progbits\n", sname); ++ fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long"); ++ fprintf (file, "\t.previous\n"); ++ } + } + +-/* Implement arbitrary permutations of two V64QImode operands +- with 2 vperm[it]2w, 2 vpshufb and one vpor instruction. */ +-static bool +-expand_vec_perm_vpermt2_vpshub2 (struct expand_vec_perm_d *d) ++/* We don't have exact information about the insn sizes, but we may assume ++ quite safely that we are informed about all 1 byte insns and memory ++ address sizes. This is enough to eliminate unnecessary padding in ++ 99% of cases. */ ++ ++int ++ix86_min_insn_size (rtx_insn *insn) + { +- if (!TARGET_AVX512BW || !(d->vmode == V64QImode)) +- return false; ++ int l = 0, len; + +- if (d->testing_p) +- return true; ++ if (!INSN_P (insn) || !active_insn_p (insn)) ++ return 0; + +- struct expand_vec_perm_d ds[2]; +- rtx rperm[128], vperm, target0, target1; +- unsigned int i, nelt; +- machine_mode vmode; ++ /* Discard alignments we've emit and jump instructions. */ ++ if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE ++ && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN) ++ return 0; + +- nelt = d->nelt; +- vmode = V64QImode; ++ /* Important case - calls are always 5 bytes. ++ It is common to have many calls in the row. */ ++ if (CALL_P (insn) ++ && symbolic_reference_mentioned_p (PATTERN (insn)) ++ && !SIBLING_CALL_P (insn)) ++ return 5; ++ len = get_attr_length (insn); ++ if (len <= 1) ++ return 1; + +- for (i = 0; i < 2; i++) ++ /* For normal instructions we rely on get_attr_length being exact, ++ with a few exceptions. */ ++ if (!JUMP_P (insn)) + { +- ds[i] = *d; +- ds[i].vmode = V32HImode; +- ds[i].nelt = 32; +- ds[i].target = gen_reg_rtx (V32HImode); +- ds[i].op0 = gen_lowpart (V32HImode, d->op0); +- ds[i].op1 = gen_lowpart (V32HImode, d->op1); +- } +- +- /* Prepare permutations such that the first one takes care of +- putting the even bytes into the right positions or one higher +- positions (ds[0]) and the second one takes care of +- putting the odd bytes into the right positions or one below +- (ds[1]). */ ++ enum attr_type type = get_attr_type (insn); + +- for (i = 0; i < nelt; i++) +- { +- ds[i & 1].perm[i / 2] = d->perm[i] / 2; +- if (i & 1) +- { +- rperm[i] = constm1_rtx; +- rperm[i + 64] = GEN_INT ((i & 14) + (d->perm[i] & 1)); +- } +- else ++ switch (type) + { +- rperm[i] = GEN_INT ((i & 14) + (d->perm[i] & 1)); +- rperm[i + 64] = constm1_rtx; ++ case TYPE_MULTI: ++ if (GET_CODE (PATTERN (insn)) == ASM_INPUT ++ || asm_noperands (PATTERN (insn)) >= 0) ++ return 0; ++ break; ++ case TYPE_OTHER: ++ case TYPE_FCMP: ++ break; ++ default: ++ /* Otherwise trust get_attr_length. */ ++ return len; + } +- } +- +- bool ok = expand_vec_perm_1 (&ds[0]); +- gcc_assert (ok); +- ds[0].target = gen_lowpart (V64QImode, ds[0].target); +- +- ok = expand_vec_perm_1 (&ds[1]); +- gcc_assert (ok); +- ds[1].target = gen_lowpart (V64QImode, ds[1].target); +- +- vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm)); +- vperm = force_reg (vmode, vperm); +- target0 = gen_reg_rtx (V64QImode); +- emit_insn (gen_avx512bw_pshufbv64qi3 (target0, ds[0].target, vperm)); + +- vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm + 64)); +- vperm = force_reg (vmode, vperm); +- target1 = gen_reg_rtx (V64QImode); +- emit_insn (gen_avx512bw_pshufbv64qi3 (target1, ds[1].target, vperm)); +- +- emit_insn (gen_iorv64qi3 (d->target, target0, target1)); +- return true; ++ l = get_attr_length_address (insn); ++ if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn))) ++ l = 4; ++ } ++ if (l) ++ return 1+l; ++ else ++ return 2; + } + +-/* Implement arbitrary permutation of two V32QImode and V16QImode operands +- with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed +- all the shorter instruction sequences. */ ++#ifdef ASM_OUTPUT_MAX_SKIP_PAD + +-static bool +-expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d) ++/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte ++ window. */ ++ ++static void ++ix86_avoid_jump_mispredicts (void) + { +- rtx rperm[4][32], vperm, l[2], h[2], op, m128; +- unsigned int i, nelt, eltsz; +- bool used[4]; ++ rtx_insn *insn, *start = get_insns (); ++ int nbytes = 0, njumps = 0; ++ bool isjump = false; + +- if (!TARGET_AVX2 +- || d->one_operand_p +- || (d->vmode != V32QImode && d->vmode != V16HImode)) +- return false; ++ /* Look for all minimal intervals of instructions containing 4 jumps. ++ The intervals are bounded by START and INSN. NBYTES is the total ++ size of instructions in the interval including INSN and not including ++ START. When the NBYTES is smaller than 16 bytes, it is possible ++ that the end of START and INSN ends up in the same 16byte page. + +- if (d->testing_p) +- return true; ++ The smallest offset in the page INSN can start is the case where START ++ ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN). ++ We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN). + +- nelt = d->nelt; +- eltsz = GET_MODE_UNIT_SIZE (d->vmode); +- +- /* Generate 4 permutation masks. If the required element is within +- the same lane, it is shuffled in. If the required element from the +- other lane, force a zero by setting bit 7 in the permutation mask. +- In the other mask the mask has non-negative elements if element +- is requested from the other lane, but also moved to the other lane, +- so that the result of vpshufb can have the two V2TImode halves +- swapped. */ +- m128 = GEN_INT (-128); +- for (i = 0; i < 32; ++i) +- { +- rperm[0][i] = m128; +- rperm[1][i] = m128; +- rperm[2][i] = m128; +- rperm[3][i] = m128; +- } +- used[0] = false; +- used[1] = false; +- used[2] = false; +- used[3] = false; +- for (i = 0; i < nelt; ++i) ++ Don't consider asm goto as jump, while it can contain a jump, it doesn't ++ have to, control transfer to label(s) can be performed through other ++ means, and also we estimate minimum length of all asm stmts as 0. */ ++ for (insn = start; insn; insn = NEXT_INSN (insn)) + { +- unsigned j, e = d->perm[i] & (nelt / 2 - 1); +- unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz; +- unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0); +- +- for (j = 0; j < eltsz; ++j) +- rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j); +- used[which] = true; +- } ++ int min_size; + +- for (i = 0; i < 2; ++i) +- { +- if (!used[2 * i + 1]) ++ if (LABEL_P (insn)) + { +- h[i] = NULL_RTX; ++ align_flags alignment = label_to_alignment (insn); ++ int align = alignment.levels[0].log; ++ int max_skip = alignment.levels[0].maxskip; ++ ++ if (max_skip > 15) ++ max_skip = 15; ++ /* If align > 3, only up to 16 - max_skip - 1 bytes can be ++ already in the current 16 byte page, because otherwise ++ ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer ++ bytes to reach 16 byte boundary. */ ++ if (align <= 0 ++ || (align <= 3 && max_skip != (1 << align) - 1)) ++ max_skip = 0; ++ if (dump_file) ++ fprintf (dump_file, "Label %i with max_skip %i\n", ++ INSN_UID (insn), max_skip); ++ if (max_skip) ++ { ++ while (nbytes + max_skip >= 16) ++ { ++ start = NEXT_INSN (start); ++ if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0) ++ || CALL_P (start)) ++ njumps--, isjump = true; ++ else ++ isjump = false; ++ nbytes -= ix86_min_insn_size (start); ++ } ++ } + continue; + } +- vperm = gen_rtx_CONST_VECTOR (V32QImode, +- gen_rtvec_v (32, rperm[2 * i + 1])); +- vperm = force_reg (V32QImode, vperm); +- h[i] = gen_reg_rtx (V32QImode); +- op = gen_lowpart (V32QImode, i ? d->op1 : d->op0); +- emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm)); +- } + +- /* Swap the 128-byte lanes of h[X]. */ +- for (i = 0; i < 2; ++i) +- { +- if (h[i] == NULL_RTX) +- continue; +- op = gen_reg_rtx (V4DImode); +- emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]), +- const2_rtx, GEN_INT (3), const0_rtx, +- const1_rtx)); +- h[i] = gen_lowpart (V32QImode, op); +- } ++ min_size = ix86_min_insn_size (insn); ++ nbytes += min_size; ++ if (dump_file) ++ fprintf (dump_file, "Insn %i estimated to %i bytes\n", ++ INSN_UID (insn), min_size); ++ if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0) ++ || CALL_P (insn)) ++ njumps++; ++ else ++ continue; + +- for (i = 0; i < 2; ++i) +- { +- if (!used[2 * i]) ++ while (njumps > 3) + { +- l[i] = NULL_RTX; +- continue; ++ start = NEXT_INSN (start); ++ if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0) ++ || CALL_P (start)) ++ njumps--, isjump = true; ++ else ++ isjump = false; ++ nbytes -= ix86_min_insn_size (start); + } +- vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i])); +- vperm = force_reg (V32QImode, vperm); +- l[i] = gen_reg_rtx (V32QImode); +- op = gen_lowpart (V32QImode, i ? d->op1 : d->op0); +- emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm)); +- } ++ gcc_assert (njumps >= 0); ++ if (dump_file) ++ fprintf (dump_file, "Interval %i to %i has %i bytes\n", ++ INSN_UID (start), INSN_UID (insn), nbytes); + +- for (i = 0; i < 2; ++i) +- { +- if (h[i] && l[i]) ++ if (njumps == 3 && isjump && nbytes < 16) + { +- op = gen_reg_rtx (V32QImode); +- emit_insn (gen_iorv32qi3 (op, l[i], h[i])); +- l[i] = op; ++ int padsize = 15 - nbytes + ix86_min_insn_size (insn); ++ ++ if (dump_file) ++ fprintf (dump_file, "Padding insn %i by %i bytes!\n", ++ INSN_UID (insn), padsize); ++ emit_insn_before (gen_pad (GEN_INT (padsize)), insn); + } +- else if (h[i]) +- l[i] = h[i]; + } +- +- gcc_assert (l[0] && l[1]); +- op = d->target; +- if (d->vmode != V32QImode) +- op = gen_reg_rtx (V32QImode); +- emit_insn (gen_iorv32qi3 (op, l[0], l[1])); +- if (op != d->target) +- emit_move_insn (d->target, gen_lowpart (d->vmode, op)); +- return true; + } ++#endif + +-/* The guts of ix86_vectorize_vec_perm_const. With all of the interface bits +- taken care of, perform the expansion in D and return true on success. */ +- +-static bool +-ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) ++/* AMD Athlon works faster ++ when RET is not destination of conditional jump or directly preceded ++ by other jump instruction. We avoid the penalty by inserting NOP just ++ before the RET instructions in such cases. */ ++static void ++ix86_pad_returns (void) + { +- /* Try a single instruction expansion. */ +- if (expand_vec_perm_1 (d)) +- return true; +- +- /* Try sequences of two instructions. */ +- +- if (expand_vec_perm_pshuflw_pshufhw (d)) +- return true; +- +- if (expand_vec_perm_palignr (d, false)) +- return true; +- +- if (expand_vec_perm_interleave2 (d)) +- return true; +- +- if (expand_vec_perm_broadcast (d)) +- return true; +- +- if (expand_vec_perm_vpermq_perm_1 (d)) +- return true; +- +- if (expand_vec_perm_vperm2f128 (d)) +- return true; +- +- if (expand_vec_perm_pblendv (d)) +- return true; +- +- /* Try sequences of three instructions. */ +- +- if (expand_vec_perm_even_odd_pack (d)) +- return true; +- +- if (expand_vec_perm_2vperm2f128_vshuf (d)) +- return true; +- +- if (expand_vec_perm_pshufb2 (d)) +- return true; +- +- if (expand_vec_perm_interleave3 (d)) +- return true; +- +- if (expand_vec_perm_vperm2f128_vblend (d)) +- return true; +- +- /* Try sequences of four instructions. */ +- +- if (expand_vec_perm_even_odd_trunc (d)) +- return true; +- if (expand_vec_perm_vpshufb2_vpermq (d)) +- return true; +- +- if (expand_vec_perm_vpshufb2_vpermq_even_odd (d)) +- return true; +- +- if (expand_vec_perm_vpermt2_vpshub2 (d)) +- return true; ++ edge e; ++ edge_iterator ei; + +- /* ??? Look for narrow permutations whose element orderings would +- allow the promotion to a wider mode. */ ++ FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) ++ { ++ basic_block bb = e->src; ++ rtx_insn *ret = BB_END (bb); ++ rtx_insn *prev; ++ bool replace = false; + +- /* ??? Look for sequences of interleave or a wider permute that place +- the data into the correct lanes for a half-vector shuffle like +- pshuf[lh]w or vpermilps. */ ++ if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret)) ++ || optimize_bb_for_size_p (bb)) ++ continue; ++ for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev)) ++ if (active_insn_p (prev) || LABEL_P (prev)) ++ break; ++ if (prev && LABEL_P (prev)) ++ { ++ edge e; ++ edge_iterator ei; + +- /* ??? Look for sequences of interleave that produce the desired results. +- The combinatorics of punpck[lh] get pretty ugly... */ ++ FOR_EACH_EDGE (e, ei, bb->preds) ++ if (EDGE_FREQUENCY (e) && e->src->index >= 0 ++ && !(e->flags & EDGE_FALLTHRU)) ++ { ++ replace = true; ++ break; ++ } ++ } ++ if (!replace) ++ { ++ prev = prev_active_insn (ret); ++ if (prev ++ && ((JUMP_P (prev) && any_condjump_p (prev)) ++ || CALL_P (prev))) ++ replace = true; ++ /* Empty functions get branch mispredict even when ++ the jump destination is not visible to us. */ ++ if (!prev && !optimize_function_for_size_p (cfun)) ++ replace = true; ++ } ++ if (replace) ++ { ++ emit_jump_insn_before (gen_simple_return_internal_long (), ret); ++ delete_insn (ret); ++ } ++ } ++} + +- if (expand_vec_perm_even_odd (d)) +- return true; ++/* Count the minimum number of instructions in BB. Return 4 if the ++ number of instructions >= 4. */ + +- /* Even longer sequences. */ +- if (expand_vec_perm_vpshufb4_vpermq2 (d)) +- return true; ++static int ++ix86_count_insn_bb (basic_block bb) ++{ ++ rtx_insn *insn; ++ int insn_count = 0; + +- /* See if we can get the same permutation in different vector integer +- mode. */ +- struct expand_vec_perm_d nd; +- if (canonicalize_vector_int_perm (d, &nd) && expand_vec_perm_1 (&nd)) ++ /* Count number of instructions in this block. Return 4 if the number ++ of instructions >= 4. */ ++ FOR_BB_INSNS (bb, insn) + { +- if (!d->testing_p) +- emit_move_insn (d->target, gen_lowpart (d->vmode, nd.target)); +- return true; ++ /* Only happen in exit blocks. */ ++ if (JUMP_P (insn) ++ && ANY_RETURN_P (PATTERN (insn))) ++ break; ++ ++ if (NONDEBUG_INSN_P (insn) ++ && GET_CODE (PATTERN (insn)) != USE ++ && GET_CODE (PATTERN (insn)) != CLOBBER) ++ { ++ insn_count++; ++ if (insn_count >= 4) ++ return insn_count; ++ } + } + +- return false; ++ return insn_count; + } + +-/* If a permutation only uses one operand, make it clear. Returns true +- if the permutation references both operands. */ + +-static bool +-canonicalize_perm (struct expand_vec_perm_d *d) +-{ +- int i, which, nelt = d->nelt; ++/* Count the minimum number of instructions in code path in BB. ++ Return 4 if the number of instructions >= 4. */ + +- for (i = which = 0; i < nelt; ++i) +- which |= (d->perm[i] < nelt ? 1 : 2); ++static int ++ix86_count_insn (basic_block bb) ++{ ++ edge e; ++ edge_iterator ei; ++ int min_prev_count; + +- d->one_operand_p = true; +- switch (which) ++ /* Only bother counting instructions along paths with no ++ more than 2 basic blocks between entry and exit. Given ++ that BB has an edge to exit, determine if a predecessor ++ of BB has an edge from entry. If so, compute the number ++ of instructions in the predecessor block. If there ++ happen to be multiple such blocks, compute the minimum. */ ++ min_prev_count = 4; ++ FOR_EACH_EDGE (e, ei, bb->preds) + { +- default: +- gcc_unreachable(); ++ edge prev_e; ++ edge_iterator prev_ei; + +- case 3: +- if (!rtx_equal_p (d->op0, d->op1)) +- { +- d->one_operand_p = false; ++ if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun)) ++ { ++ min_prev_count = 0; + break; +- } +- /* The elements of PERM do not suggest that only the first operand +- is used, but both operands are identical. Allow easier matching +- of the permutation by folding the permutation into the single +- input vector. */ +- /* FALLTHRU */ +- +- case 2: +- for (i = 0; i < nelt; ++i) +- d->perm[i] &= nelt - 1; +- d->op0 = d->op1; +- break; +- +- case 1: +- d->op1 = d->op0; +- break; ++ } ++ FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds) ++ { ++ if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun)) ++ { ++ int count = ix86_count_insn_bb (e->src); ++ if (count < min_prev_count) ++ min_prev_count = count; ++ break; ++ } ++ } + } + +- return (which == 3); ++ if (min_prev_count < 4) ++ min_prev_count += ix86_count_insn_bb (bb); ++ ++ return min_prev_count; + } + +-/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */ ++/* Pad short function to 4 instructions. */ + +-static bool +-ix86_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, +- rtx op1, const vec_perm_indices &sel) ++static void ++ix86_pad_short_function (void) + { +- struct expand_vec_perm_d d; +- unsigned char perm[MAX_VECT_LEN]; +- unsigned int i, nelt, which; +- bool two_args; ++ edge e; ++ edge_iterator ei; + +- d.target = target; +- d.op0 = op0; +- d.op1 = op1; ++ FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) ++ { ++ rtx_insn *ret = BB_END (e->src); ++ if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret))) ++ { ++ int insn_count = ix86_count_insn (e->src); + +- d.vmode = vmode; +- gcc_assert (VECTOR_MODE_P (d.vmode)); +- d.nelt = nelt = GET_MODE_NUNITS (d.vmode); +- d.testing_p = !target; ++ /* Pad short function. */ ++ if (insn_count < 4) ++ { ++ rtx_insn *insn = ret; + +- gcc_assert (sel.length () == nelt); +- gcc_checking_assert (sizeof (d.perm) == sizeof (perm)); ++ /* Find epilogue. */ ++ while (insn ++ && (!NOTE_P (insn) ++ || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG)) ++ insn = PREV_INSN (insn); + +- /* Given sufficient ISA support we can just return true here +- for selected vector modes. */ +- switch (d.vmode) +- { +- case E_V16SFmode: +- case E_V16SImode: +- case E_V8DImode: +- case E_V8DFmode: +- if (!TARGET_AVX512F) +- return false; +- /* All implementable with a single vperm[it]2 insn. */ +- if (d.testing_p) +- return true; +- break; +- case E_V32HImode: +- if (!TARGET_AVX512BW) +- return false; +- if (d.testing_p) +- /* All implementable with a single vperm[it]2 insn. */ +- return true; +- break; +- case E_V64QImode: +- if (!TARGET_AVX512BW) +- return false; +- if (d.testing_p) +- /* Implementable with 2 vperm[it]2, 2 vpshufb and 1 or insn. */ +- return true; +- break; +- case E_V8SImode: +- case E_V8SFmode: +- case E_V4DFmode: +- case E_V4DImode: +- if (!TARGET_AVX) +- return false; +- if (d.testing_p && TARGET_AVX512VL) +- /* All implementable with a single vperm[it]2 insn. */ +- return true; +- break; +- case E_V16HImode: +- if (!TARGET_SSE2) +- return false; +- if (d.testing_p && TARGET_AVX2) +- /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */ +- return true; +- break; +- case E_V32QImode: +- if (!TARGET_SSE2) +- return false; +- if (d.testing_p && TARGET_AVX2) +- /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */ +- return true; +- break; +- case E_V8HImode: +- case E_V16QImode: +- if (!TARGET_SSE2) +- return false; +- /* Fall through. */ +- case E_V4SImode: +- case E_V4SFmode: +- if (!TARGET_SSE) +- return false; +- /* All implementable with a single vpperm insn. */ +- if (d.testing_p && TARGET_XOP) +- return true; +- /* All implementable with 2 pshufb + 1 ior. */ +- if (d.testing_p && TARGET_SSSE3) +- return true; +- break; +- case E_V2DImode: +- case E_V2DFmode: +- if (!TARGET_SSE) +- return false; +- /* All implementable with shufpd or unpck[lh]pd. */ +- if (d.testing_p) +- return true; +- break; +- default: +- return false; +- } ++ if (!insn) ++ insn = ret; + +- for (i = which = 0; i < nelt; ++i) +- { +- unsigned char e = sel[i]; +- gcc_assert (e < 2 * nelt); +- d.perm[i] = e; +- perm[i] = e; +- which |= (e < nelt ? 1 : 2); ++ /* Two NOPs count as one instruction. */ ++ insn_count = 2 * (4 - insn_count); ++ emit_insn_before (gen_nops (GEN_INT (insn_count)), insn); ++ } ++ } + } ++} + +- if (d.testing_p) +- { +- /* For all elements from second vector, fold the elements to first. */ +- if (which == 2) +- for (i = 0; i < nelt; ++i) +- d.perm[i] -= nelt; +- +- /* Check whether the mask can be applied to the vector type. */ +- d.one_operand_p = (which != 3); +- +- /* Implementable with shufps or pshufd. */ +- if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode)) +- return true; ++/* Fix up a Windows system unwinder issue. If an EH region falls through into ++ the epilogue, the Windows system unwinder will apply epilogue logic and ++ produce incorrect offsets. This can be avoided by adding a nop between ++ the last insn that can throw and the first insn of the epilogue. */ + +- /* Otherwise we have to go through the motions and see if we can +- figure out how to generate the requested permutation. */ +- d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1); +- d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2); +- if (!d.one_operand_p) +- d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3); ++static void ++ix86_seh_fixup_eh_fallthru (void) ++{ ++ edge e; ++ edge_iterator ei; + +- start_sequence (); +- bool ret = ix86_expand_vec_perm_const_1 (&d); +- end_sequence (); ++ FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) ++ { ++ rtx_insn *insn, *next; + +- return ret; +- } ++ /* Find the beginning of the epilogue. */ ++ for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn)) ++ if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG) ++ break; ++ if (insn == NULL) ++ continue; + +- two_args = canonicalize_perm (&d); ++ /* We only care about preceding insns that can throw. */ ++ insn = prev_active_insn (insn); ++ if (insn == NULL || !can_throw_internal (insn)) ++ continue; + +- if (ix86_expand_vec_perm_const_1 (&d)) +- return true; ++ /* Do not separate calls from their debug information. */ ++ for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next)) ++ if (NOTE_P (next) && NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION) ++ insn = next; ++ else ++ break; + +- /* If the selector says both arguments are needed, but the operands are the +- same, the above tried to expand with one_operand_p and flattened selector. +- If that didn't work, retry without one_operand_p; we succeeded with that +- during testing. */ +- if (two_args && d.one_operand_p) +- { +- d.one_operand_p = false; +- memcpy (d.perm, perm, sizeof (perm)); +- return ix86_expand_vec_perm_const_1 (&d); ++ emit_insn_after (gen_nops (const1_rtx), insn); + } +- +- return false; + } + +-void +-ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd) ++/* Implement machine specific optimizations. We implement padding of returns ++ for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */ ++static void ++ix86_reorg (void) + { +- struct expand_vec_perm_d d; +- unsigned i, nelt; +- +- d.target = targ; +- d.op0 = op0; +- d.op1 = op1; +- d.vmode = GET_MODE (targ); +- d.nelt = nelt = GET_MODE_NUNITS (d.vmode); +- d.one_operand_p = false; +- d.testing_p = false; ++ /* We are freeing block_for_insn in the toplev to keep compatibility ++ with old MDEP_REORGS that are not CFG based. Recompute it now. */ ++ compute_bb_for_insn (); + +- for (i = 0; i < nelt; ++i) +- d.perm[i] = i * 2 + odd; ++ if (TARGET_SEH && current_function_has_exception_handlers ()) ++ ix86_seh_fixup_eh_fallthru (); + +- /* We'll either be able to implement the permutation directly... */ +- if (expand_vec_perm_1 (&d)) +- return; ++ if (optimize && optimize_function_for_speed_p (cfun)) ++ { ++ if (TARGET_PAD_SHORT_FUNCTION) ++ ix86_pad_short_function (); ++ else if (TARGET_PAD_RETURNS) ++ ix86_pad_returns (); ++#ifdef ASM_OUTPUT_MAX_SKIP_PAD ++ if (TARGET_FOUR_JUMP_LIMIT) ++ ix86_avoid_jump_mispredicts (); ++#endif ++ } ++} + +- /* ... or we use the special-case patterns. */ +- expand_vec_perm_even_odd_1 (&d, odd); ++/* Return nonzero when QImode register that must be represented via REX prefix ++ is used. */ ++bool ++x86_extended_QIreg_mentioned_p (rtx_insn *insn) ++{ ++ int i; ++ extract_insn_cached (insn); ++ for (i = 0; i < recog_data.n_operands; i++) ++ if (GENERAL_REG_P (recog_data.operand[i]) ++ && !QI_REGNO_P (REGNO (recog_data.operand[i]))) ++ return true; ++ return false; + } + +-static void +-ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p) ++/* Return true when INSN mentions register that must be encoded using REX ++ prefix. */ ++bool ++x86_extended_reg_mentioned_p (rtx insn) + { +- struct expand_vec_perm_d d; +- unsigned i, nelt, base; +- bool ok; +- +- d.target = targ; +- d.op0 = op0; +- d.op1 = op1; +- d.vmode = GET_MODE (targ); +- d.nelt = nelt = GET_MODE_NUNITS (d.vmode); +- d.one_operand_p = false; +- d.testing_p = false; +- +- base = high_p ? nelt / 2 : 0; +- for (i = 0; i < nelt / 2; ++i) ++ subrtx_iterator::array_type array; ++ FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST) + { +- d.perm[i * 2] = i + base; +- d.perm[i * 2 + 1] = i + base + nelt; ++ const_rtx x = *iter; ++ if (REG_P (x) ++ && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x)))) ++ return true; + } +- +- /* Note that for AVX this isn't one instruction. */ +- ok = ix86_expand_vec_perm_const_1 (&d); +- gcc_assert (ok); ++ return false; + } + ++/* If profitable, negate (without causing overflow) integer constant ++ of mode MODE at location LOC. Return true in this case. */ ++bool ++x86_maybe_negate_const_int (rtx *loc, machine_mode mode) ++{ ++ HOST_WIDE_INT val; + +-/* Expand a vector operation CODE for a V*QImode in terms of the +- same operation on V*HImode. */ +- +-void +-ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2) +-{ +- machine_mode qimode = GET_MODE (dest); +- machine_mode himode; +- rtx (*gen_il) (rtx, rtx, rtx); +- rtx (*gen_ih) (rtx, rtx, rtx); +- rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h; +- struct expand_vec_perm_d d; +- bool ok, full_interleave; +- bool uns_p = false; +- int i; ++ if (!CONST_INT_P (*loc)) ++ return false; + +- switch (qimode) ++ switch (mode) + { +- case E_V16QImode: +- himode = V8HImode; +- gen_il = gen_vec_interleave_lowv16qi; +- gen_ih = gen_vec_interleave_highv16qi; +- break; +- case E_V32QImode: +- himode = V16HImode; +- gen_il = gen_avx2_interleave_lowv32qi; +- gen_ih = gen_avx2_interleave_highv32qi; +- break; +- case E_V64QImode: +- himode = V32HImode; +- gen_il = gen_avx512bw_interleave_lowv64qi; +- gen_ih = gen_avx512bw_interleave_highv64qi; +- break; +- default: +- gcc_unreachable (); +- } ++ case E_DImode: ++ /* DImode x86_64 constants must fit in 32 bits. */ ++ gcc_assert (x86_64_immediate_operand (*loc, mode)); + +- op2_l = op2_h = op2; +- switch (code) +- { +- case MULT: +- /* Unpack data such that we've got a source byte in each low byte of +- each word. We don't care what goes into the high byte of each word. +- Rather than trying to get zero in there, most convenient is to let +- it be a copy of the low byte. */ +- op2_l = gen_reg_rtx (qimode); +- op2_h = gen_reg_rtx (qimode); +- emit_insn (gen_il (op2_l, op2, op2)); +- emit_insn (gen_ih (op2_h, op2, op2)); +- +- op1_l = gen_reg_rtx (qimode); +- op1_h = gen_reg_rtx (qimode); +- emit_insn (gen_il (op1_l, op1, op1)); +- emit_insn (gen_ih (op1_h, op1, op1)); +- full_interleave = qimode == V16QImode; ++ mode = SImode; + break; + +- case ASHIFT: +- case LSHIFTRT: +- uns_p = true; +- /* FALLTHRU */ +- case ASHIFTRT: +- op1_l = gen_reg_rtx (himode); +- op1_h = gen_reg_rtx (himode); +- ix86_expand_sse_unpack (op1_l, op1, uns_p, false); +- ix86_expand_sse_unpack (op1_h, op1, uns_p, true); +- full_interleave = true; ++ case E_SImode: ++ case E_HImode: ++ case E_QImode: + break; ++ + default: + gcc_unreachable (); + } + +- /* Perform the operation. */ +- res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX, +- 1, OPTAB_DIRECT); +- res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX, +- 1, OPTAB_DIRECT); +- gcc_assert (res_l && res_h); ++ /* Avoid overflows. */ ++ if (mode_signbit_p (mode, *loc)) ++ return false; + +- /* Merge the data back into the right place. */ +- d.target = dest; +- d.op0 = gen_lowpart (qimode, res_l); +- d.op1 = gen_lowpart (qimode, res_h); +- d.vmode = qimode; +- d.nelt = GET_MODE_NUNITS (qimode); +- d.one_operand_p = false; +- d.testing_p = false; ++ val = INTVAL (*loc); + +- if (full_interleave) ++ /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'. ++ Exceptions: -128 encodes smaller than 128, so swap sign and op. */ ++ if ((val < 0 && val != -128) ++ || val == 128) + { +- /* For SSE2, we used an full interleave, so the desired +- results are in the even elements. */ +- for (i = 0; i < d.nelt; ++i) +- d.perm[i] = i * 2; ++ *loc = GEN_INT (-val); ++ return true; + } +- else +- { +- /* For AVX, the interleave used above was not cross-lane. So the +- extraction is evens but with the second and third quarter swapped. +- Happily, that is even one insn shorter than even extraction. +- For AVX512BW we have 4 lanes. We extract evens from within a lane, +- always first from the first and then from the second source operand, +- the index bits above the low 4 bits remains the same. +- Thus, for d.nelt == 32 we want permutation +- 0,2,4,..14, 32,34,36,..46, 16,18,20,..30, 48,50,52,..62 +- and for d.nelt == 64 we want permutation +- 0,2,4,..14, 64,66,68,..78, 16,18,20,..30, 80,82,84,..94, +- 32,34,36,..46, 96,98,100,..110, 48,50,52,..62, 112,114,116,..126. */ +- for (i = 0; i < d.nelt; ++i) +- d.perm[i] = ((i * 2) & 14) + ((i & 8) ? d.nelt : 0) + (i & ~15); +- } +- +- ok = ix86_expand_vec_perm_const_1 (&d); +- gcc_assert (ok); + +- set_unique_reg_note (get_last_insn (), REG_EQUAL, +- gen_rtx_fmt_ee (code, qimode, op1, op2)); ++ return false; + } + +-/* Helper function of ix86_expand_mul_widen_evenodd. Return true +- if op is CONST_VECTOR with all odd elements equal to their +- preceding element. */ +- +-static bool +-const_vector_equal_evenodd_p (rtx op) +-{ +- machine_mode mode = GET_MODE (op); +- int i, nunits = GET_MODE_NUNITS (mode); +- if (GET_CODE (op) != CONST_VECTOR +- || nunits != CONST_VECTOR_NUNITS (op)) +- return false; +- for (i = 0; i < nunits; i += 2) +- if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1)) +- return false; +- return true; +-} ++/* Generate an unsigned DImode/SImode to FP conversion. This is the same code ++ optabs would emit if we didn't have TFmode patterns. */ + + void +-ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2, +- bool uns_p, bool odd_p) ++x86_emit_floatuns (rtx operands[2]) + { +- machine_mode mode = GET_MODE (op1); +- machine_mode wmode = GET_MODE (dest); +- rtx x; +- rtx orig_op1 = op1, orig_op2 = op2; +- +- if (!nonimmediate_operand (op1, mode)) +- op1 = force_reg (mode, op1); +- if (!nonimmediate_operand (op2, mode)) +- op2 = force_reg (mode, op2); ++ rtx_code_label *neglab, *donelab; ++ rtx i0, i1, f0, in, out; ++ machine_mode mode, inmode; + +- /* We only play even/odd games with vectors of SImode. */ +- gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode); ++ inmode = GET_MODE (operands[1]); ++ gcc_assert (inmode == SImode || inmode == DImode); + +- /* If we're looking for the odd results, shift those members down to +- the even slots. For some cpus this is faster than a PSHUFD. */ +- if (odd_p) +- { +- /* For XOP use vpmacsdqh, but only for smult, as it is only +- signed. */ +- if (TARGET_XOP && mode == V4SImode && !uns_p) +- { +- x = force_reg (wmode, CONST0_RTX (wmode)); +- emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x)); +- return; +- } ++ out = operands[0]; ++ in = force_reg (inmode, operands[1]); ++ mode = GET_MODE (out); ++ neglab = gen_label_rtx (); ++ donelab = gen_label_rtx (); ++ f0 = gen_reg_rtx (mode); + +- x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode)); +- if (!const_vector_equal_evenodd_p (orig_op1)) +- op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1), +- x, NULL, 1, OPTAB_DIRECT); +- if (!const_vector_equal_evenodd_p (orig_op2)) +- op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2), +- x, NULL, 1, OPTAB_DIRECT); +- op1 = gen_lowpart (mode, op1); +- op2 = gen_lowpart (mode, op2); +- } ++ emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab); + +- if (mode == V16SImode) +- { +- if (uns_p) +- x = gen_vec_widen_umult_even_v16si (dest, op1, op2); +- else +- x = gen_vec_widen_smult_even_v16si (dest, op1, op2); +- } +- else if (mode == V8SImode) +- { +- if (uns_p) +- x = gen_vec_widen_umult_even_v8si (dest, op1, op2); +- else +- x = gen_vec_widen_smult_even_v8si (dest, op1, op2); +- } +- else if (uns_p) +- x = gen_vec_widen_umult_even_v4si (dest, op1, op2); +- else if (TARGET_SSE4_1) +- x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2); +- else +- { +- rtx s1, s2, t0, t1, t2; ++ expand_float (out, in, 0); + +- /* The easiest way to implement this without PMULDQ is to go through +- the motions as if we are performing a full 64-bit multiply. With +- the exception that we need to do less shuffling of the elements. */ ++ emit_jump_insn (gen_jump (donelab)); ++ emit_barrier (); + +- /* Compute the sign-extension, aka highparts, of the two operands. */ +- s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode), +- op1, pc_rtx, pc_rtx); +- s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode), +- op2, pc_rtx, pc_rtx); ++ emit_label (neglab); + +- /* Multiply LO(A) * HI(B), and vice-versa. */ +- t1 = gen_reg_rtx (wmode); +- t2 = gen_reg_rtx (wmode); +- emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2)); +- emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1)); ++ i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL, ++ 1, OPTAB_DIRECT); ++ i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL, ++ 1, OPTAB_DIRECT); ++ i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT); + +- /* Multiply LO(A) * LO(B). */ +- t0 = gen_reg_rtx (wmode); +- emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2)); ++ expand_float (f0, i0, 0); + +- /* Combine and shift the highparts into place. */ +- t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT); +- t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1, +- 1, OPTAB_DIRECT); ++ emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0))); + +- /* Combine high and low parts. */ +- force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT); +- return; +- } +- emit_insn (x); ++ emit_label (donelab); + } +- +-void +-ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2, +- bool uns_p, bool high_p) ++ ++/* Target hook for scalar_mode_supported_p. */ ++static bool ++ix86_scalar_mode_supported_p (scalar_mode mode) + { +- machine_mode wmode = GET_MODE (dest); +- machine_mode mode = GET_MODE (op1); +- rtx t1, t2, t3, t4, mask; +- +- switch (mode) +- { +- case E_V4SImode: +- t1 = gen_reg_rtx (mode); +- t2 = gen_reg_rtx (mode); +- if (TARGET_XOP && !uns_p) +- { +- /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case, +- shuffle the elements once so that all elements are in the right +- place for immediate use: { A C B D }. */ +- emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx, +- const1_rtx, GEN_INT (3))); +- emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx, +- const1_rtx, GEN_INT (3))); +- } +- else +- { +- /* Put the elements into place for the multiply. */ +- ix86_expand_vec_interleave (t1, op1, op1, high_p); +- ix86_expand_vec_interleave (t2, op2, op2, high_p); +- high_p = false; +- } +- ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p); +- break; +- +- case E_V8SImode: +- /* Shuffle the elements between the lanes. After this we +- have { A B E F | C D G H } for each operand. */ +- t1 = gen_reg_rtx (V4DImode); +- t2 = gen_reg_rtx (V4DImode); +- emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1), +- const0_rtx, const2_rtx, +- const1_rtx, GEN_INT (3))); +- emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2), +- const0_rtx, const2_rtx, +- const1_rtx, GEN_INT (3))); +- +- /* Shuffle the elements within the lanes. After this we +- have { A A B B | C C D D } or { E E F F | G G H H }. */ +- t3 = gen_reg_rtx (V8SImode); +- t4 = gen_reg_rtx (V8SImode); +- mask = GEN_INT (high_p +- ? 2 + (2 << 2) + (3 << 4) + (3 << 6) +- : 0 + (0 << 2) + (1 << 4) + (1 << 6)); +- emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask)); +- emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask)); +- +- ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false); +- break; +- +- case E_V8HImode: +- case E_V16HImode: +- t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX, +- uns_p, OPTAB_DIRECT); +- t2 = expand_binop (mode, +- uns_p ? umul_highpart_optab : smul_highpart_optab, +- op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT); +- gcc_assert (t1 && t2); +- +- t3 = gen_reg_rtx (mode); +- ix86_expand_vec_interleave (t3, t1, t2, high_p); +- emit_move_insn (dest, gen_lowpart (wmode, t3)); +- break; +- +- case E_V16QImode: +- case E_V32QImode: +- case E_V32HImode: +- case E_V16SImode: +- case E_V64QImode: +- t1 = gen_reg_rtx (wmode); +- t2 = gen_reg_rtx (wmode); +- ix86_expand_sse_unpack (t1, op1, uns_p, high_p); +- ix86_expand_sse_unpack (t2, op2, uns_p, high_p); +- +- emit_insn (gen_rtx_SET (dest, gen_rtx_MULT (wmode, t1, t2))); +- break; +- +- default: +- gcc_unreachable (); +- } ++ if (DECIMAL_FLOAT_MODE_P (mode)) ++ return default_decimal_float_supported_p (); ++ else if (mode == TFmode) ++ return true; ++ else ++ return default_scalar_mode_supported_p (mode); + } + +-void +-ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2) ++/* Implements target hook vector_mode_supported_p. */ ++static bool ++ix86_vector_mode_supported_p (machine_mode mode) + { +- rtx res_1, res_2, res_3, res_4; +- +- res_1 = gen_reg_rtx (V4SImode); +- res_2 = gen_reg_rtx (V4SImode); +- res_3 = gen_reg_rtx (V2DImode); +- res_4 = gen_reg_rtx (V2DImode); +- ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false); +- ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true); +- +- /* Move the results in element 2 down to element 1; we don't care +- what goes in elements 2 and 3. Then we can merge the parts +- back together with an interleave. +- +- Note that two other sequences were tried: +- (1) Use interleaves at the start instead of psrldq, which allows +- us to use a single shufps to merge things back at the end. +- (2) Use shufps here to combine the two vectors, then pshufd to +- put the elements in the correct order. +- In both cases the cost of the reformatting stall was too high +- and the overall sequence slower. */ +- +- emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3), +- const0_rtx, const2_rtx, +- const0_rtx, const0_rtx)); +- emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4), +- const0_rtx, const2_rtx, +- const0_rtx, const0_rtx)); +- res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2)); +- +- set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2)); ++ if (TARGET_SSE && VALID_SSE_REG_MODE (mode)) ++ return true; ++ if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode)) ++ return true; ++ if (TARGET_AVX && VALID_AVX256_REG_MODE (mode)) ++ return true; ++ if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode)) ++ return true; ++ if (TARGET_MMX && VALID_MMX_REG_MODE (mode)) ++ return true; ++ if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode)) ++ return true; ++ return false; + } + +-void +-ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2) ++/* Target hook for c_mode_for_suffix. */ ++static machine_mode ++ix86_c_mode_for_suffix (char suffix) + { +- machine_mode mode = GET_MODE (op0); +- rtx t1, t2, t3, t4, t5, t6; ++ if (suffix == 'q') ++ return TFmode; ++ if (suffix == 'w') ++ return XFmode; + +- if (TARGET_AVX512DQ && mode == V8DImode) +- emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2)); +- else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode) +- emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2)); +- else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode) +- emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2)); +- else if (TARGET_XOP && mode == V2DImode) +- { +- /* op1: A,B,C,D, op2: E,F,G,H */ +- op1 = gen_lowpart (V4SImode, op1); +- op2 = gen_lowpart (V4SImode, op2); ++ return VOIDmode; ++} + +- t1 = gen_reg_rtx (V4SImode); +- t2 = gen_reg_rtx (V4SImode); +- t3 = gen_reg_rtx (V2DImode); +- t4 = gen_reg_rtx (V2DImode); ++/* Worker function for TARGET_MD_ASM_ADJUST. + +- /* t1: B,A,D,C */ +- emit_insn (gen_sse2_pshufd_1 (t1, op1, +- GEN_INT (1), +- GEN_INT (0), +- GEN_INT (3), +- GEN_INT (2))); ++ We implement asm flag outputs, and maintain source compatibility ++ with the old cc0-based compiler. */ + +- /* t2: (B*E),(A*F),(D*G),(C*H) */ +- emit_insn (gen_mulv4si3 (t2, t1, op2)); ++static rtx_insn * ++ix86_md_asm_adjust (vec &outputs, vec &/*inputs*/, ++ vec &constraints, ++ vec &clobbers, HARD_REG_SET &clobbered_regs) ++{ ++ bool saw_asm_flag = false; + +- /* t3: (B*E)+(A*F), (D*G)+(C*H) */ +- emit_insn (gen_xop_phadddq (t3, t2)); ++ start_sequence (); ++ for (unsigned i = 0, n = outputs.length (); i < n; ++i) ++ { ++ const char *con = constraints[i]; ++ if (strncmp (con, "=@cc", 4) != 0) ++ continue; ++ con += 4; ++ if (strchr (con, ',') != NULL) ++ { ++ error ("alternatives not allowed in % flag output"); ++ continue; ++ } + +- /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */ +- emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32))); ++ bool invert = false; ++ if (con[0] == 'n') ++ invert = true, con++; + +- /* Multiply lower parts and add all */ +- t5 = gen_reg_rtx (V2DImode); +- emit_insn (gen_vec_widen_umult_even_v4si (t5, +- gen_lowpart (V4SImode, op1), +- gen_lowpart (V4SImode, op2))); +- op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT); ++ machine_mode mode = CCmode; ++ rtx_code code = UNKNOWN; + +- } +- else +- { +- machine_mode nmode; +- rtx (*umul) (rtx, rtx, rtx); ++ switch (con[0]) ++ { ++ case 'a': ++ if (con[1] == 0) ++ mode = CCAmode, code = EQ; ++ else if (con[1] == 'e' && con[2] == 0) ++ mode = CCCmode, code = NE; ++ break; ++ case 'b': ++ if (con[1] == 0) ++ mode = CCCmode, code = EQ; ++ else if (con[1] == 'e' && con[2] == 0) ++ mode = CCAmode, code = NE; ++ break; ++ case 'c': ++ if (con[1] == 0) ++ mode = CCCmode, code = EQ; ++ break; ++ case 'e': ++ if (con[1] == 0) ++ mode = CCZmode, code = EQ; ++ break; ++ case 'g': ++ if (con[1] == 0) ++ mode = CCGCmode, code = GT; ++ else if (con[1] == 'e' && con[2] == 0) ++ mode = CCGCmode, code = GE; ++ break; ++ case 'l': ++ if (con[1] == 0) ++ mode = CCGCmode, code = LT; ++ else if (con[1] == 'e' && con[2] == 0) ++ mode = CCGCmode, code = LE; ++ break; ++ case 'o': ++ if (con[1] == 0) ++ mode = CCOmode, code = EQ; ++ break; ++ case 'p': ++ if (con[1] == 0) ++ mode = CCPmode, code = EQ; ++ break; ++ case 's': ++ if (con[1] == 0) ++ mode = CCSmode, code = EQ; ++ break; ++ case 'z': ++ if (con[1] == 0) ++ mode = CCZmode, code = EQ; ++ break; ++ } ++ if (code == UNKNOWN) ++ { ++ error ("unknown % flag output %qs", constraints[i]); ++ continue; ++ } ++ if (invert) ++ code = reverse_condition (code); + +- if (mode == V2DImode) ++ rtx dest = outputs[i]; ++ if (!saw_asm_flag) + { +- umul = gen_vec_widen_umult_even_v4si; +- nmode = V4SImode; ++ /* This is the first asm flag output. Here we put the flags ++ register in as the real output and adjust the condition to ++ allow it. */ ++ constraints[i] = "=Bf"; ++ outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG); ++ saw_asm_flag = true; + } +- else if (mode == V4DImode) ++ else + { +- umul = gen_vec_widen_umult_even_v8si; +- nmode = V8SImode; ++ /* We don't need the flags register as output twice. */ ++ constraints[i] = "=X"; ++ outputs[i] = gen_rtx_SCRATCH (SImode); + } +- else if (mode == V8DImode) ++ ++ rtx x = gen_rtx_REG (mode, FLAGS_REG); ++ x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx); ++ ++ machine_mode dest_mode = GET_MODE (dest); ++ if (!SCALAR_INT_MODE_P (dest_mode)) + { +- umul = gen_vec_widen_umult_even_v16si; +- nmode = V16SImode; ++ error ("invalid type for % flag output"); ++ continue; + } +- else +- gcc_unreachable (); + ++ if (dest_mode == DImode && !TARGET_64BIT) ++ dest_mode = SImode; + +- /* Multiply low parts. */ +- t1 = gen_reg_rtx (mode); +- emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2))); ++ if (dest_mode != QImode) ++ { ++ rtx destqi = gen_reg_rtx (QImode); ++ emit_insn (gen_rtx_SET (destqi, x)); + +- /* Shift input vectors right 32 bits so we can multiply high parts. */ +- t6 = GEN_INT (32); +- t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT); +- t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT); ++ if (TARGET_ZERO_EXTEND_WITH_AND ++ && optimize_function_for_speed_p (cfun)) ++ { ++ x = force_reg (dest_mode, const0_rtx); + +- /* Multiply high parts by low parts. */ +- t4 = gen_reg_rtx (mode); +- t5 = gen_reg_rtx (mode); +- emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2))); +- emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1))); ++ emit_insn (gen_movstrictqi (gen_lowpart (QImode, x), destqi)); ++ } ++ else ++ { ++ x = gen_rtx_ZERO_EXTEND (dest_mode, destqi); ++ if (dest_mode == GET_MODE (dest) ++ && !register_operand (dest, GET_MODE (dest))) ++ x = force_reg (dest_mode, x); ++ } ++ } + +- /* Combine and shift the highparts back. */ +- t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT); +- t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT); ++ if (dest_mode != GET_MODE (dest)) ++ { ++ rtx tmp = gen_reg_rtx (SImode); + +- /* Combine high and low parts. */ +- force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT); ++ emit_insn (gen_rtx_SET (tmp, x)); ++ emit_insn (gen_zero_extendsidi2 (dest, tmp)); ++ } ++ else ++ emit_insn (gen_rtx_SET (dest, x)); + } ++ rtx_insn *seq = get_insns (); ++ end_sequence (); + +- set_unique_reg_note (get_last_insn (), REG_EQUAL, +- gen_rtx_MULT (mode, op1, op2)); ++ if (saw_asm_flag) ++ return seq; ++ else ++ { ++ /* If we had no asm flag outputs, clobber the flags. */ ++ clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG)); ++ SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG); ++ return NULL; ++ } + } + +-/* Return 1 if control tansfer instruction INSN +- should be encoded with notrack prefix. */ ++/* Implements target vector targetm.asm.encode_section_info. */ + +-static bool +-ix86_notrack_prefixed_insn_p (rtx insn) ++static void ATTRIBUTE_UNUSED ++ix86_encode_section_info (tree decl, rtx rtl, int first) + { +- if (!insn || !((flag_cf_protection & CF_BRANCH))) +- return false; +- +- if (CALL_P (insn)) +- { +- rtx call = get_call_rtx_from (insn); +- gcc_assert (call != NULL_RTX); +- rtx addr = XEXP (call, 0); ++ default_encode_section_info (decl, rtl, first); + +- /* Do not emit 'notrack' if it's not an indirect call. */ +- if (MEM_P (addr) +- && GET_CODE (XEXP (addr, 0)) == SYMBOL_REF) +- return false; +- else +- return find_reg_note (insn, REG_CALL_NOCF_CHECK, 0); +- } ++ if (ix86_in_large_data_p (decl)) ++ SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR; ++} + +- if (JUMP_P (insn) && !flag_cet_switch) +- { +- rtx target = JUMP_LABEL (insn); +- if (target == NULL_RTX || ANY_RETURN_P (target)) +- return false; ++/* Worker function for REVERSE_CONDITION. */ + +- /* Check the jump is a switch table. */ +- rtx_insn *label = as_a (target); +- rtx_insn *table = next_insn (label); +- if (table == NULL_RTX || !JUMP_TABLE_DATA_P (table)) +- return false; +- else +- return true; +- } +- return false; ++enum rtx_code ++ix86_reverse_condition (enum rtx_code code, machine_mode mode) ++{ ++ return (mode == CCFPmode ++ ? reverse_condition_maybe_unordered (code) ++ : reverse_condition (code)); + } + +-/* Calculate integer abs() using only SSE2 instructions. */ ++/* Output code to perform an x87 FP register move, from OPERANDS[1] ++ to OPERANDS[0]. */ + +-void +-ix86_expand_sse2_abs (rtx target, rtx input) ++const char * ++output_387_reg_move (rtx_insn *insn, rtx *operands) + { +- machine_mode mode = GET_MODE (target); +- rtx tmp0, tmp1, x; +- +- switch (mode) ++ if (REG_P (operands[0])) + { +- case E_V2DImode: +- case E_V4DImode: +- /* For 64-bit signed integer X, with SSE4.2 use +- pxor t0, t0; pcmpgtq X, t0; pxor t0, X; psubq t0, X. +- Otherwise handle it similarly to V4SImode, except use 64 as W instead of +- 32 and use logical instead of arithmetic right shift (which is +- unimplemented) and subtract. */ +- if (TARGET_SSE4_2) +- { +- tmp0 = gen_reg_rtx (mode); +- tmp1 = gen_reg_rtx (mode); +- emit_move_insn (tmp1, CONST0_RTX (mode)); +- if (mode == E_V2DImode) +- emit_insn (gen_sse4_2_gtv2di3 (tmp0, tmp1, input)); +- else +- emit_insn (gen_avx2_gtv4di3 (tmp0, tmp1, input)); ++ if (REG_P (operands[1]) ++ && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) ++ { ++ if (REGNO (operands[0]) == FIRST_STACK_REG) ++ return output_387_ffreep (operands, 0); ++ return "fstp\t%y0"; + } ++ if (STACK_TOP_P (operands[0])) ++ return "fld%Z1\t%y1"; ++ return "fst\t%y0"; ++ } ++ else if (MEM_P (operands[0])) ++ { ++ gcc_assert (REG_P (operands[1])); ++ if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) ++ return "fstp%Z0\t%y0"; + else + { +- tmp0 = expand_simple_binop (mode, LSHIFTRT, input, +- GEN_INT (GET_MODE_UNIT_BITSIZE (mode) +- - 1), NULL, 0, OPTAB_DIRECT); +- tmp0 = expand_simple_unop (mode, NEG, tmp0, NULL, false); ++ /* There is no non-popping store to memory for XFmode. ++ So if we need one, follow the store with a load. */ ++ if (GET_MODE (operands[0]) == XFmode) ++ return "fstp%Z0\t%y0\n\tfld%Z0\t%y0"; ++ else ++ return "fst%Z0\t%y0"; + } +- +- tmp1 = expand_simple_binop (mode, XOR, tmp0, input, +- NULL, 0, OPTAB_DIRECT); +- x = expand_simple_binop (mode, MINUS, tmp1, tmp0, +- target, 0, OPTAB_DIRECT); +- break; +- +- case E_V4SImode: +- /* For 32-bit signed integer X, the best way to calculate the absolute +- value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */ +- tmp0 = expand_simple_binop (mode, ASHIFTRT, input, +- GEN_INT (GET_MODE_UNIT_BITSIZE (mode) - 1), +- NULL, 0, OPTAB_DIRECT); +- tmp1 = expand_simple_binop (mode, XOR, tmp0, input, +- NULL, 0, OPTAB_DIRECT); +- x = expand_simple_binop (mode, MINUS, tmp1, tmp0, +- target, 0, OPTAB_DIRECT); +- break; +- +- case E_V8HImode: +- /* For 16-bit signed integer X, the best way to calculate the absolute +- value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */ +- tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0); +- +- x = expand_simple_binop (mode, SMAX, tmp0, input, +- target, 0, OPTAB_DIRECT); +- break; +- +- case E_V16QImode: +- /* For 8-bit signed integer X, the best way to calculate the absolute +- value of X is min ((unsigned char) X, (unsigned char) (-X)), +- as SSE2 provides the PMINUB insn. */ +- tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0); +- +- x = expand_simple_binop (V16QImode, UMIN, tmp0, input, +- target, 0, OPTAB_DIRECT); +- break; +- +- default: +- gcc_unreachable (); + } +- +- if (x != target) +- emit_move_insn (target, x); ++ else ++ gcc_unreachable(); + } ++#ifdef TARGET_SOLARIS ++/* Solaris implementation of TARGET_ASM_NAMED_SECTION. */ + +-/* Expand an extract from a vector register through pextr insn. +- Return true if successful. */ +- +-bool +-ix86_expand_pextr (rtx *operands) ++static void ++i386_solaris_elf_named_section (const char *name, unsigned int flags, ++ tree decl) + { +- rtx dst = operands[0]; +- rtx src = operands[1]; +- +- unsigned int size = INTVAL (operands[2]); +- unsigned int pos = INTVAL (operands[3]); +- +- if (SUBREG_P (dst)) ++ /* With Binutils 2.15, the "@unwind" marker must be specified on ++ every occurrence of the ".eh_frame" section, not just the first ++ one. */ ++ if (TARGET_64BIT ++ && strcmp (name, ".eh_frame") == 0) + { +- /* Reject non-lowpart subregs. */ +- if (SUBREG_BYTE (dst) > 0) +- return false; +- dst = SUBREG_REG (dst); ++ fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name, ++ flags & SECTION_WRITE ? "aw" : "a"); ++ return; + } +- +- if (SUBREG_P (src)) ++ ++#ifndef USE_GAS ++ if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE) + { +- pos += SUBREG_BYTE (src) * BITS_PER_UNIT; +- src = SUBREG_REG (src); ++ solaris_elf_asm_comdat_section (name, flags, decl); ++ return; + } + +- switch (GET_MODE (src)) ++ /* Solaris/x86 as uses the same syntax for the SHF_EXCLUDE flags as the ++ SPARC assembler. One cannot mix single-letter flags and #exclude, so ++ only emit the latter here. */ ++ if (flags & SECTION_EXCLUDE) + { +- case E_V16QImode: +- case E_V8HImode: +- case E_V4SImode: +- case E_V2DImode: +- case E_V1TImode: +- case E_TImode: +- { +- machine_mode srcmode, dstmode; +- rtx d, pat; ++ fprintf (asm_out_file, "\t.section\t%s,#exclude\n", name); ++ return; ++ } ++#endif + +- if (!int_mode_for_size (size, 0).exists (&dstmode)) +- return false; ++ default_elf_asm_named_section (name, flags, decl); ++} ++#endif /* TARGET_SOLARIS */ + +- switch (dstmode) +- { +- case E_QImode: +- if (!TARGET_SSE4_1) +- return false; +- srcmode = V16QImode; +- break; ++/* Return the mangling of TYPE if it is an extended fundamental type. */ + +- case E_HImode: +- if (!TARGET_SSE2) +- return false; +- srcmode = V8HImode; +- break; ++static const char * ++ix86_mangle_type (const_tree type) ++{ ++ type = TYPE_MAIN_VARIANT (type); + +- case E_SImode: +- if (!TARGET_SSE4_1) +- return false; +- srcmode = V4SImode; +- break; ++ if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE ++ && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE) ++ return NULL; + +- case E_DImode: +- gcc_assert (TARGET_64BIT); +- if (!TARGET_SSE4_1) +- return false; +- srcmode = V2DImode; +- break; ++ switch (TYPE_MODE (type)) ++ { ++ case E_TFmode: ++ /* __float128 is "g". */ ++ return "g"; ++ case E_XFmode: ++ /* "long double" or __float80 is "e". */ ++ return "e"; ++ default: ++ return NULL; ++ } ++} + +- default: +- return false; +- } ++static GTY(()) tree ix86_tls_stack_chk_guard_decl; ++ ++static tree ++ix86_stack_protect_guard (void) ++{ ++ if (TARGET_SSP_TLS_GUARD) ++ { ++ tree type_node = lang_hooks.types.type_for_mode (ptr_mode, 1); ++ int qual = ENCODE_QUAL_ADDR_SPACE (ix86_stack_protector_guard_reg); ++ tree type = build_qualified_type (type_node, qual); ++ tree t; + +- /* Reject extractions from misaligned positions. */ +- if (pos & (size-1)) +- return false; ++ if (global_options_set.x_ix86_stack_protector_guard_symbol_str) ++ { ++ t = ix86_tls_stack_chk_guard_decl; + +- if (GET_MODE (dst) == dstmode) +- d = dst; +- else +- d = gen_reg_rtx (dstmode); ++ if (t == NULL) ++ { ++ rtx x; + +- /* Construct insn pattern. */ +- pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (pos / size))); +- pat = gen_rtx_VEC_SELECT (dstmode, gen_lowpart (srcmode, src), pat); ++ t = build_decl ++ (UNKNOWN_LOCATION, VAR_DECL, ++ get_identifier (ix86_stack_protector_guard_symbol_str), ++ type); ++ TREE_STATIC (t) = 1; ++ TREE_PUBLIC (t) = 1; ++ DECL_EXTERNAL (t) = 1; ++ TREE_USED (t) = 1; ++ TREE_THIS_VOLATILE (t) = 1; ++ DECL_ARTIFICIAL (t) = 1; ++ DECL_IGNORED_P (t) = 1; + +- /* Let the rtl optimizers know about the zero extension performed. */ +- if (dstmode == QImode || dstmode == HImode) +- { +- pat = gen_rtx_ZERO_EXTEND (SImode, pat); +- d = gen_lowpart (SImode, d); +- } ++ /* Do not share RTL as the declaration is visible outside of ++ current function. */ ++ x = DECL_RTL (t); ++ RTX_FLAG (x, used) = 1; + +- emit_insn (gen_rtx_SET (d, pat)); ++ ix86_tls_stack_chk_guard_decl = t; ++ } ++ } ++ else ++ { ++ tree asptrtype = build_pointer_type (type); + +- if (d != dst) +- emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d)); +- return true; +- } ++ t = build_int_cst (asptrtype, ix86_stack_protector_guard_offset); ++ t = build2 (MEM_REF, asptrtype, t, ++ build_int_cst (asptrtype, 0)); ++ TREE_THIS_VOLATILE (t) = 1; ++ } + +- default: +- return false; ++ return t; + } ++ ++ return default_stack_protect_guard (); + } + +-/* Expand an insert into a vector register through pinsr insn. +- Return true if successful. */ ++/* For 32-bit code we can save PIC register setup by using ++ __stack_chk_fail_local hidden function instead of calling ++ __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC ++ register, so it is better to call __stack_chk_fail directly. */ + +-bool +-ix86_expand_pinsr (rtx *operands) ++static tree ATTRIBUTE_UNUSED ++ix86_stack_protect_fail (void) + { +- rtx dst = operands[0]; +- rtx src = operands[3]; ++ return TARGET_64BIT ++ ? default_external_stack_protect_fail () ++ : default_hidden_stack_protect_fail (); ++} + +- unsigned int size = INTVAL (operands[1]); +- unsigned int pos = INTVAL (operands[2]); ++/* Select a format to encode pointers in exception handling data. CODE ++ is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is ++ true if the symbol may be affected by dynamic relocations. + +- if (SUBREG_P (dst)) ++ ??? All x86 object file formats are capable of representing this. ++ After all, the relocation needed is the same as for the call insn. ++ Whether or not a particular assembler allows us to enter such, I ++ guess we'll have to see. */ ++int ++asm_preferred_eh_data_format (int code, int global) ++{ ++ if (flag_pic) ++ { ++ int type = DW_EH_PE_sdata8; ++ if (!TARGET_64BIT ++ || ix86_cmodel == CM_SMALL_PIC ++ || (ix86_cmodel == CM_MEDIUM_PIC && (global || code))) ++ type = DW_EH_PE_sdata4; ++ return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type; ++ } ++ if (ix86_cmodel == CM_SMALL ++ || (ix86_cmodel == CM_MEDIUM && code)) ++ return DW_EH_PE_udata4; ++ return DW_EH_PE_absptr; ++} ++ ++/* Implement targetm.vectorize.builtin_vectorization_cost. */ ++static int ++ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, ++ tree vectype, int) ++{ ++ bool fp = false; ++ machine_mode mode = TImode; ++ int index; ++ if (vectype != NULL) + { +- pos += SUBREG_BYTE (dst) * BITS_PER_UNIT; +- dst = SUBREG_REG (dst); ++ fp = FLOAT_TYPE_P (vectype); ++ mode = TYPE_MODE (vectype); + } + +- switch (GET_MODE (dst)) ++ switch (type_of_cost) + { +- case E_V16QImode: +- case E_V8HImode: +- case E_V4SImode: +- case E_V2DImode: +- case E_V1TImode: +- case E_TImode: +- { +- machine_mode srcmode, dstmode; +- rtx (*pinsr)(rtx, rtx, rtx, rtx); +- rtx d; ++ case scalar_stmt: ++ return fp ? ix86_cost->addss : COSTS_N_INSNS (1); + +- if (!int_mode_for_size (size, 0).exists (&srcmode)) +- return false; ++ case scalar_load: ++ /* load/store costs are relative to register move which is 2. Recompute ++ it to COSTS_N_INSNS so everything have same base. */ ++ return COSTS_N_INSNS (fp ? ix86_cost->sse_load[0] ++ : ix86_cost->int_load [2]) / 2; + +- switch (srcmode) +- { +- case E_QImode: +- if (!TARGET_SSE4_1) +- return false; +- dstmode = V16QImode; +- pinsr = gen_sse4_1_pinsrb; +- break; ++ case scalar_store: ++ return COSTS_N_INSNS (fp ? ix86_cost->sse_store[0] ++ : ix86_cost->int_store [2]) / 2; + +- case E_HImode: +- if (!TARGET_SSE2) +- return false; +- dstmode = V8HImode; +- pinsr = gen_sse2_pinsrw; +- break; ++ case vector_stmt: ++ return ix86_vec_cost (mode, ++ fp ? ix86_cost->addss : ix86_cost->sse_op); + +- case E_SImode: +- if (!TARGET_SSE4_1) +- return false; +- dstmode = V4SImode; +- pinsr = gen_sse4_1_pinsrd; +- break; ++ case vector_load: ++ index = sse_store_index (mode); ++ /* See PR82713 - we may end up being called on non-vector type. */ ++ if (index < 0) ++ index = 2; ++ return COSTS_N_INSNS (ix86_cost->sse_load[index]) / 2; + +- case E_DImode: +- gcc_assert (TARGET_64BIT); +- if (!TARGET_SSE4_1) +- return false; +- dstmode = V2DImode; +- pinsr = gen_sse4_1_pinsrq; +- break; ++ case vector_store: ++ index = sse_store_index (mode); ++ /* See PR82713 - we may end up being called on non-vector type. */ ++ if (index < 0) ++ index = 2; ++ return COSTS_N_INSNS (ix86_cost->sse_store[index]) / 2; + +- default: +- return false; +- } ++ case vec_to_scalar: ++ case scalar_to_vec: ++ return ix86_vec_cost (mode, ix86_cost->sse_op); + +- /* Reject insertions to misaligned positions. */ +- if (pos & (size-1)) +- return false; ++ /* We should have separate costs for unaligned loads and gather/scatter. ++ Do that incrementally. */ ++ case unaligned_load: ++ index = sse_store_index (mode); ++ /* See PR82713 - we may end up being called on non-vector type. */ ++ if (index < 0) ++ index = 2; ++ return COSTS_N_INSNS (ix86_cost->sse_unaligned_load[index]) / 2; + +- if (SUBREG_P (src)) +- { +- unsigned int srcpos = SUBREG_BYTE (src); ++ case unaligned_store: ++ index = sse_store_index (mode); ++ /* See PR82713 - we may end up being called on non-vector type. */ ++ if (index < 0) ++ index = 2; ++ return COSTS_N_INSNS (ix86_cost->sse_unaligned_store[index]) / 2; + +- if (srcpos > 0) +- { +- rtx extr_ops[4]; ++ case vector_gather_load: ++ return ix86_vec_cost (mode, ++ COSTS_N_INSNS ++ (ix86_cost->gather_static ++ + ix86_cost->gather_per_elt ++ * TYPE_VECTOR_SUBPARTS (vectype)) / 2); + +- extr_ops[0] = gen_reg_rtx (srcmode); +- extr_ops[1] = gen_lowpart (srcmode, SUBREG_REG (src)); +- extr_ops[2] = GEN_INT (size); +- extr_ops[3] = GEN_INT (srcpos * BITS_PER_UNIT); ++ case vector_scatter_store: ++ return ix86_vec_cost (mode, ++ COSTS_N_INSNS ++ (ix86_cost->scatter_static ++ + ix86_cost->scatter_per_elt ++ * TYPE_VECTOR_SUBPARTS (vectype)) / 2); + +- if (!ix86_expand_pextr (extr_ops)) +- return false; ++ case cond_branch_taken: ++ return ix86_cost->cond_taken_branch_cost; + +- src = extr_ops[0]; +- } +- else +- src = gen_lowpart (srcmode, SUBREG_REG (src)); +- } ++ case cond_branch_not_taken: ++ return ix86_cost->cond_not_taken_branch_cost; + +- if (GET_MODE (dst) == dstmode) +- d = dst; +- else +- d = gen_reg_rtx (dstmode); ++ case vec_perm: ++ case vec_promote_demote: ++ return ix86_vec_cost (mode, ix86_cost->sse_op); + +- emit_insn (pinsr (d, gen_lowpart (dstmode, dst), +- gen_lowpart (srcmode, src), +- GEN_INT (1 << (pos / size)))); +- if (d != dst) +- emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d)); +- return true; +- } ++ case vec_construct: ++ { ++ /* N element inserts into SSE vectors. */ ++ int cost = TYPE_VECTOR_SUBPARTS (vectype) * ix86_cost->sse_op; ++ /* One vinserti128 for combining two SSE vectors for AVX256. */ ++ if (GET_MODE_BITSIZE (mode) == 256) ++ cost += ix86_vec_cost (mode, ix86_cost->addss); ++ /* One vinserti64x4 and two vinserti128 for combining SSE ++ and AVX256 vectors to AVX512. */ ++ else if (GET_MODE_BITSIZE (mode) == 512) ++ cost += 3 * ix86_vec_cost (mode, ix86_cost->addss); ++ return cost; ++ } + +- default: +- return false; ++ default: ++ gcc_unreachable (); + } + } ++ + + /* This function returns the calling abi specific va_list type node. + It returns the FNDECL specific va_list type. */ +@@ -50192,39 +21332,6 @@ ix86_preferred_simd_mode (scalar_mode mode) + } + } + +-/* All CPUs prefer to avoid cross-lane operations so perform reductions +- upper against lower halves up to SSE reg size. */ +- +-static machine_mode +-ix86_split_reduction (machine_mode mode) +-{ +- /* Reduce lowpart against highpart until we reach SSE reg width to +- avoid cross-lane operations. */ +- switch (mode) +- { +- case E_V8DImode: +- case E_V4DImode: +- return V2DImode; +- case E_V16SImode: +- case E_V8SImode: +- return V4SImode; +- case E_V32HImode: +- case E_V16HImode: +- return V8HImode; +- case E_V64QImode: +- case E_V32QImode: +- return V16QImode; +- case E_V16SFmode: +- case E_V8SFmode: +- return V4SFmode; +- case E_V8DFmode: +- case E_V4DFmode: +- return V2DFmode; +- default: +- return mode; +- } +-} +- + /* If AVX is enabled then try vectorizing with both 256bit and 128bit + vectors. If AVX512F is enabled then try vectorizing with 512bit, + 256bit and 128bit vectors. */ +@@ -50596,13 +21703,15 @@ ix86_memmodel_check (unsigned HOST_WIDE_INT val) + if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong)) + { + warning (OPT_Winvalid_memory_model, +- "HLE_ACQUIRE not used with ACQUIRE or stronger memory model"); ++ "% not used with % or stronger " ++ "memory model"); + return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE; + } + if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong)) + { + warning (OPT_Winvalid_memory_model, +- "HLE_RELEASE not used with RELEASE or stronger memory model"); ++ "% not used with % or stronger " ++ "memory model"); + return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE; + } + return val; +@@ -50760,50 +21869,6 @@ ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, + return ret; + } + +-/* Add target attribute to SIMD clone NODE if needed. */ +- +-static void +-ix86_simd_clone_adjust (struct cgraph_node *node) +-{ +- const char *str = NULL; +- +- /* Attributes need to be adjusted for definitions, not declarations. */ +- if (!node->definition) +- return; +- +- gcc_assert (node->decl == cfun->decl); +- switch (node->simdclone->vecsize_mangle) +- { +- case 'b': +- if (!TARGET_SSE2) +- str = "sse2"; +- break; +- case 'c': +- if (!TARGET_AVX) +- str = "avx"; +- break; +- case 'd': +- if (!TARGET_AVX2) +- str = "avx2"; +- break; +- case 'e': +- if (!TARGET_AVX512F) +- str = "avx512f"; +- break; +- default: +- gcc_unreachable (); +- } +- if (str == NULL) +- return; +- push_cfun (NULL); +- tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str)); +- bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0); +- gcc_assert (ok); +- pop_cfun (); +- ix86_reset_previous_fndecl (); +- ix86_set_current_function (node->decl); +-} +- + /* If SIMD clone NODE can't be used in a vectorized loop + in current function, return -1, otherwise return a badness of using it + (0 if it is most desirable from vecsize_mangle point of view, 1 +@@ -50912,10 +21977,10 @@ ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) + tree fenv_ptr = build_pointer_type (fenv_type); + tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var); + fenv_addr = fold_convert (ptr_type_node, fenv_addr); +- tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV]; +- tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV]; +- tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW]; +- tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX]; ++ tree fnstenv = get_ix86_builtin (IX86_BUILTIN_FNSTENV); ++ tree fldenv = get_ix86_builtin (IX86_BUILTIN_FLDENV); ++ tree fnstsw = get_ix86_builtin (IX86_BUILTIN_FNSTSW); ++ tree fnclex = get_ix86_builtin (IX86_BUILTIN_FNCLEX); + tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr); + tree hold_fnclex = build_call_expr (fnclex, 0); + fenv_var = build4 (TARGET_EXPR, fenv_type, fenv_var, hold_fnstenv, +@@ -50939,8 +22004,8 @@ ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) + { + tree mxcsr_orig_var = create_tmp_var_raw (unsigned_type_node); + tree mxcsr_mod_var = create_tmp_var_raw (unsigned_type_node); +- tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR]; +- tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR]; ++ tree stmxcsr = get_ix86_builtin (IX86_BUILTIN_STMXCSR); ++ tree ldmxcsr = get_ix86_builtin (IX86_BUILTIN_LDMXCSR); + tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0); + tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node, + mxcsr_orig_var, stmxcsr_hold_call); +@@ -51183,22 +22248,6 @@ ix86_init_libfuncs (void) + #endif + } + +-/* Generate call to __divmoddi4. */ +- +-static void +-ix86_expand_divmod_libfunc (rtx libfunc, machine_mode mode, +- rtx op0, rtx op1, +- rtx *quot_p, rtx *rem_p) +-{ +- rtx rem = assign_386_stack_local (mode, SLOT_TEMP); +- +- rtx quot = emit_library_call_value (libfunc, NULL_RTX, LCT_NORMAL, +- mode, op0, mode, op1, mode, +- XEXP (rem, 0), Pmode); +- *quot_p = quot; +- *rem_p = rem; +-} +- + /* Set the value of FLT_EVAL_METHOD in float.h. When using only the + FPU, assume that the fpcw is set to extended precision; when using + only SSE, rounding is correct; when using both SSE and the FPU, +@@ -51970,9 +23019,6 @@ ix86_run_selftests (void) + #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS + #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true + +-#undef TARGET_SETUP_INCOMING_VARARG_BOUNDS +-#define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds +- + #undef TARGET_OFFLOAD_OPTIONS + #define TARGET_OFFLOAD_OPTIONS \ + ix86_offload_options +diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h +index 14e5a392f..187e52a5b 100644 +--- a/gcc/config/i386/i386.h ++++ b/gcc/config/i386/i386.h +@@ -1891,7 +1891,7 @@ typedef struct ix86_args { + ? GET_MODE_SIZE (TImode) : UNITS_PER_WORD) + + /* If a memory-to-memory move would take MOVE_RATIO or more simple +- move-instruction pairs, we will do a movmem or libcall instead. ++ move-instruction pairs, we will do a cpymem or libcall instead. + Increasing the value will always make code faster, but eventually + incurs high cost in increased code size. + +@@ -2784,6 +2784,9 @@ struct GTY(()) machine_function { + /* During SEH output, this is non-null. */ + struct seh_frame_state * GTY((skip(""))) seh; + }; ++ ++extern GTY(()) tree sysv_va_list_type_node; ++extern GTY(()) tree ms_va_list_type_node; + #endif + + #define ix86_stack_locals (cfun->machine->stack_locals) +@@ -2881,6 +2884,12 @@ extern void debug_dispatch_window (int); + + #define TARGET_SUPPORTS_WIDE_INT 1 + ++#if !defined(GENERATOR_FILE) && !defined(IN_LIBGCC2) ++extern enum attr_cpu ix86_schedule; ++ ++#define NUM_X86_64_MS_CLOBBERED_REGS 12 ++#endif ++ + /* + Local variables: + version-control: t +diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md +index 698c31a0a..861248899 100644 +--- a/gcc/config/i386/i386.md ++++ b/gcc/config/i386/i386.md +@@ -16731,7 +16731,7 @@ + (set_attr "length_immediate" "0") + (set_attr "modrm" "0")]) + +-(define_expand "movmem" ++(define_expand "cpymem" + [(use (match_operand:BLK 0 "memory_operand")) + (use (match_operand:BLK 1 "memory_operand")) + (use (match_operand:SWI48 2 "nonmemory_operand")) +@@ -16743,7 +16743,7 @@ + (use (match_operand:SI 8 ""))] + "" + { +- if (ix86_expand_set_or_movmem (operands[0], operands[1], ++ if (ix86_expand_set_or_cpymem (operands[0], operands[1], + operands[2], NULL, operands[3], + operands[4], operands[5], + operands[6], operands[7], +@@ -16958,7 +16958,7 @@ + (use (match_operand:SI 8 ""))] + "" + { +- if (ix86_expand_set_or_movmem (operands[0], NULL, ++ if (ix86_expand_set_or_cpymem (operands[0], NULL, + operands[1], operands[2], + operands[3], operands[4], + operands[5], operands[6], +diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md +index 865947deb..4135159ac 100644 +--- a/gcc/config/i386/predicates.md ++++ b/gcc/config/i386/predicates.md +@@ -683,7 +683,7 @@ + if (GET_CODE (op) == PLUS && REG_P (XEXP (op, 0))) + { + int regno = REGNO (XEXP (op, 0)); +- if (!HARD_REGISTER_NUM_P (regno) || call_used_regs[regno]) ++ if (!HARD_REGISTER_NUM_P (regno) || call_used_or_fixed_reg_p (regno)) + { + op = XEXP (op, 1); + if (GOT32_symbol_operand (op, VOIDmode)) +diff --git a/gcc/config/i386/t-i386 b/gcc/config/i386/t-i386 +index 0dac80fbc..50caf2c69 100644 +--- a/gcc/config/i386/t-i386 ++++ b/gcc/config/i386/t-i386 +@@ -44,6 +44,22 @@ i386-d.o: $(srcdir)/config/i386/i386-d.c + $(COMPILE) $< + $(POSTCOMPILE) + ++i386-options.o: $(srcdir)/config/i386/i386-options.c ++ $(COMPILE) $< ++ $(POSTCOMPILE) ++ ++i386-builtins.o: $(srcdir)/config/i386/i386-builtins.c ++ $(COMPILE) $< ++ $(POSTCOMPILE) ++ ++i386-expand.o: $(srcdir)/config/i386/i386-expand.c ++ $(COMPILE) $< ++ $(POSTCOMPILE) ++ ++i386-features.o: $(srcdir)/config/i386/i386-features.c ++ $(COMPILE) $< ++ $(POSTCOMPILE) ++ + i386.o: i386-builtin-types.inc + + i386-builtin-types.inc: s-i386-bt ; @true +diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c +index e8d905e22..d09e49637 100644 +--- a/gcc/config/ia64/ia64.c ++++ b/gcc/config/ia64/ia64.c +@@ -5147,7 +5147,7 @@ ia64_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, + gimple_seq *post_p) + { + /* Variable sized types are passed by reference. */ +- if (pass_by_reference (NULL, TYPE_MODE (type), type, false)) ++ if (pass_va_arg_by_reference (type)) + { + tree ptrtype = build_pointer_type (type); + tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p); +diff --git a/gcc/config/lm32/lm32.md b/gcc/config/lm32/lm32.md +index c09052c62..91a5fe1e0 100644 +--- a/gcc/config/lm32/lm32.md ++++ b/gcc/config/lm32/lm32.md +@@ -216,7 +216,7 @@ + } + }") + +-(define_expand "movmemsi" ++(define_expand "cpymemsi" + [(parallel [(set (match_operand:BLK 0 "general_operand" "") + (match_operand:BLK 1 "general_operand" "")) + (use (match_operand:SI 2 "" "")) +diff --git a/gcc/config/m32c/blkmov.md b/gcc/config/m32c/blkmov.md +index d7da439c2..e5cdc801f 100644 +--- a/gcc/config/m32c/blkmov.md ++++ b/gcc/config/m32c/blkmov.md +@@ -40,14 +40,14 @@ + ;; 1 = source (mem:BLK ...) + ;; 2 = count + ;; 3 = alignment +-(define_expand "movmemhi" ++(define_expand "cpymemhi" + [(match_operand 0 "ap_operand" "") + (match_operand 1 "ap_operand" "") + (match_operand 2 "m32c_r3_operand" "") + (match_operand 3 "" "") + ] + "" +- "if (m32c_expand_movmemhi(operands)) DONE; FAIL;" ++ "if (m32c_expand_cpymemhi(operands)) DONE; FAIL;" + ) + + ;; We can't use mode iterators for these because M16C uses r1h to extend +@@ -60,7 +60,7 @@ + ;; 3 = dest (in) + ;; 4 = src (in) + ;; 5 = count (in) +-(define_insn "movmemhi_bhi_op" ++(define_insn "cpymemhi_bhi_op" + [(set (mem:QI (match_operand:HI 3 "ap_operand" "0")) + (mem:QI (match_operand:HI 4 "ap_operand" "1"))) + (set (match_operand:HI 2 "m32c_r3_operand" "=R3w") +@@ -75,7 +75,7 @@ + "TARGET_A16" + "mov.b:q\t#0,r1h\n\tsmovf.b\t; %0[0..%2-1]=r1h%1[]" + ) +-(define_insn "movmemhi_bpsi_op" ++(define_insn "cpymemhi_bpsi_op" + [(set (mem:QI (match_operand:PSI 3 "ap_operand" "0")) + (mem:QI (match_operand:PSI 4 "ap_operand" "1"))) + (set (match_operand:HI 2 "m32c_r3_operand" "=R3w") +@@ -89,7 +89,7 @@ + "TARGET_A24" + "smovf.b\t; %0[0..%2-1]=%1[]" + ) +-(define_insn "movmemhi_whi_op" ++(define_insn "cpymemhi_whi_op" + [(set (mem:HI (match_operand:HI 3 "ap_operand" "0")) + (mem:HI (match_operand:HI 4 "ap_operand" "1"))) + (set (match_operand:HI 2 "m32c_r3_operand" "=R3w") +@@ -104,7 +104,7 @@ + "TARGET_A16" + "mov.b:q\t#0,r1h\n\tsmovf.w\t; %0[0..%2-1]=r1h%1[]" + ) +-(define_insn "movmemhi_wpsi_op" ++(define_insn "cpymemhi_wpsi_op" + [(set (mem:HI (match_operand:PSI 3 "ap_operand" "0")) + (mem:HI (match_operand:PSI 4 "ap_operand" "1"))) + (set (match_operand:HI 2 "m32c_r3_operand" "=R3w") +diff --git a/gcc/config/m32c/m32c-protos.h b/gcc/config/m32c/m32c-protos.h +index 7d4d478fd..fe926fd50 100644 +--- a/gcc/config/m32c/m32c-protos.h ++++ b/gcc/config/m32c/m32c-protos.h +@@ -43,7 +43,7 @@ void m32c_emit_eh_epilogue (rtx); + int m32c_expand_cmpstr (rtx *); + int m32c_expand_insv (rtx *); + int m32c_expand_movcc (rtx *); +-int m32c_expand_movmemhi (rtx *); ++int m32c_expand_cpymemhi (rtx *); + int m32c_expand_movstr (rtx *); + void m32c_expand_neg_mulpsi3 (rtx *); + int m32c_expand_setmemhi (rtx *); +diff --git a/gcc/config/m32c/m32c.c b/gcc/config/m32c/m32c.c +index 1a0d0c681..d0d24bb5f 100644 +--- a/gcc/config/m32c/m32c.c ++++ b/gcc/config/m32c/m32c.c +@@ -3592,7 +3592,7 @@ m32c_expand_setmemhi(rtx *operands) + addresses, not [mem] syntax. $0 is the destination (MEM:BLK), $1 + is the source (MEM:BLK), and $2 the count (HI). */ + int +-m32c_expand_movmemhi(rtx *operands) ++m32c_expand_cpymemhi(rtx *operands) + { + rtx desta, srca, count; + rtx desto, srco, counto; +@@ -3620,9 +3620,9 @@ m32c_expand_movmemhi(rtx *operands) + { + count = copy_to_mode_reg (HImode, GEN_INT (INTVAL (count) / 2)); + if (TARGET_A16) +- emit_insn (gen_movmemhi_whi_op (desto, srco, counto, desta, srca, count)); ++ emit_insn (gen_cpymemhi_whi_op (desto, srco, counto, desta, srca, count)); + else +- emit_insn (gen_movmemhi_wpsi_op (desto, srco, counto, desta, srca, count)); ++ emit_insn (gen_cpymemhi_wpsi_op (desto, srco, counto, desta, srca, count)); + return 1; + } + +@@ -3632,9 +3632,9 @@ m32c_expand_movmemhi(rtx *operands) + count = copy_to_mode_reg (HImode, count); + + if (TARGET_A16) +- emit_insn (gen_movmemhi_bhi_op (desto, srco, counto, desta, srca, count)); ++ emit_insn (gen_cpymemhi_bhi_op (desto, srco, counto, desta, srca, count)); + else +- emit_insn (gen_movmemhi_bpsi_op (desto, srco, counto, desta, srca, count)); ++ emit_insn (gen_cpymemhi_bpsi_op (desto, srco, counto, desta, srca, count)); + + return 1; + } +diff --git a/gcc/config/m32r/m32r.c b/gcc/config/m32r/m32r.c +index 6e79b2aec..ac18aa286 100644 +--- a/gcc/config/m32r/m32r.c ++++ b/gcc/config/m32r/m32r.c +@@ -2598,7 +2598,7 @@ m32r_expand_block_move (rtx operands[]) + to the word after the end of the source block, and dst_reg to point + to the last word of the destination block, provided that the block + is MAX_MOVE_BYTES long. */ +- emit_insn (gen_movmemsi_internal (dst_reg, src_reg, at_a_time, ++ emit_insn (gen_cpymemsi_internal (dst_reg, src_reg, at_a_time, + new_dst_reg, new_src_reg)); + emit_move_insn (dst_reg, new_dst_reg); + emit_move_insn (src_reg, new_src_reg); +@@ -2612,7 +2612,7 @@ m32r_expand_block_move (rtx operands[]) + } + + if (leftover) +- emit_insn (gen_movmemsi_internal (dst_reg, src_reg, GEN_INT (leftover), ++ emit_insn (gen_cpymemsi_internal (dst_reg, src_reg, GEN_INT (leftover), + gen_reg_rtx (SImode), + gen_reg_rtx (SImode))); + return 1; +diff --git a/gcc/config/m32r/m32r.md b/gcc/config/m32r/m32r.md +index be5739763..e944363fd 100644 +--- a/gcc/config/m32r/m32r.md ++++ b/gcc/config/m32r/m32r.md +@@ -2195,7 +2195,7 @@ + ;; Argument 2 is the length + ;; Argument 3 is the alignment + +-(define_expand "movmemsi" ++(define_expand "cpymemsi" + [(parallel [(set (match_operand:BLK 0 "general_operand" "") + (match_operand:BLK 1 "general_operand" "")) + (use (match_operand:SI 2 "immediate_operand" "")) +@@ -2214,7 +2214,7 @@ + + ;; Insn generated by block moves + +-(define_insn "movmemsi_internal" ++(define_insn "cpymemsi_internal" + [(set (mem:BLK (match_operand:SI 0 "register_operand" "r")) ;; destination + (mem:BLK (match_operand:SI 1 "register_operand" "r"))) ;; source + (use (match_operand:SI 2 "m32r_block_immediate_operand" "J"));; # bytes to move +diff --git a/gcc/config/mcore/mcore.md b/gcc/config/mcore/mcore.md +index cc84e342b..c6893518d 100644 +--- a/gcc/config/mcore/mcore.md ++++ b/gcc/config/mcore/mcore.md +@@ -2552,7 +2552,7 @@ + ;; Block move - adapted from m88k.md + ;; ------------------------------------------------------------------------ + +-(define_expand "movmemsi" ++(define_expand "cpymemsi" + [(parallel [(set (mem:BLK (match_operand:BLK 0 "" "")) + (mem:BLK (match_operand:BLK 1 "" ""))) + (use (match_operand:SI 2 "general_operand" "")) +diff --git a/gcc/config/microblaze/microblaze.c b/gcc/config/microblaze/microblaze.c +index 55c1becf9..07dd0bc6f 100644 +--- a/gcc/config/microblaze/microblaze.c ++++ b/gcc/config/microblaze/microblaze.c +@@ -1250,7 +1250,7 @@ microblaze_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length) + microblaze_block_move_straight (dest, src, leftover); + } + +-/* Expand a movmemsi instruction. */ ++/* Expand a cpymemsi instruction. */ + + bool + microblaze_expand_block_move (rtx dest, rtx src, rtx length, rtx align_rtx) +diff --git a/gcc/config/microblaze/microblaze.md b/gcc/config/microblaze/microblaze.md +index 183afff37..1509e4318 100644 +--- a/gcc/config/microblaze/microblaze.md ++++ b/gcc/config/microblaze/microblaze.md +@@ -1144,7 +1144,7 @@ + ;; Argument 2 is the length + ;; Argument 3 is the alignment + +-(define_expand "movmemsi" ++(define_expand "cpymemsi" + [(parallel [(set (match_operand:BLK 0 "general_operand") + (match_operand:BLK 1 "general_operand")) + (use (match_operand:SI 2 "")) +diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c +index 100894720..3c95636bf 100644 +--- a/gcc/config/mips/mips.c ++++ b/gcc/config/mips/mips.c +@@ -6780,7 +6780,7 @@ mips_std_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, + unsigned HOST_WIDE_INT align, boundary; + bool indirect; + +- indirect = pass_by_reference (NULL, TYPE_MODE (type), type, false); ++ indirect = pass_va_arg_by_reference (type); + if (indirect) + type = build_pointer_type (type); + +@@ -6867,7 +6867,7 @@ mips_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, + tree addr; + bool indirect_p; + +- indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, 0); ++ indirect_p = pass_va_arg_by_reference (type); + if (indirect_p) + type = build_pointer_type (type); + +@@ -7938,15 +7938,15 @@ mips_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size, + { + if (op == STORE_BY_PIECES) + return mips_store_by_pieces_p (size, align); +- if (op == MOVE_BY_PIECES && HAVE_movmemsi) ++ if (op == MOVE_BY_PIECES && HAVE_cpymemsi) + { +- /* movmemsi is meant to generate code that is at least as good as +- move_by_pieces. However, movmemsi effectively uses a by-pieces ++ /* cpymemsi is meant to generate code that is at least as good as ++ move_by_pieces. However, cpymemsi effectively uses a by-pieces + implementation both for moves smaller than a word and for + word-aligned moves of no more than MIPS_MAX_MOVE_BYTES_STRAIGHT + bytes. We should allow the tree-level optimisers to do such + moves by pieces, as it often exposes other optimization +- opportunities. We might as well continue to use movmemsi at ++ opportunities. We might as well continue to use cpymemsi at + the rtl level though, as it produces better code when + scheduling is disabled (such as at -O). */ + if (currently_expanding_to_rtl) +@@ -8165,7 +8165,7 @@ mips_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length, + emit_insn (gen_nop ()); + } + +-/* Expand a movmemsi instruction, which copies LENGTH bytes from ++/* Expand a cpymemsi instruction, which copies LENGTH bytes from + memory reference SRC to memory reference DEST. */ + + bool +diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h +index 953d82e85..a5be7fa39 100644 +--- a/gcc/config/mips/mips.h ++++ b/gcc/config/mips/mips.h +@@ -3099,12 +3099,12 @@ while (0) + #define MIPS_MIN_MOVE_MEM_ALIGN 16 + + /* The maximum number of bytes that can be copied by one iteration of +- a movmemsi loop; see mips_block_move_loop. */ ++ a cpymemsi loop; see mips_block_move_loop. */ + #define MIPS_MAX_MOVE_BYTES_PER_LOOP_ITER \ + (UNITS_PER_WORD * 4) + + /* The maximum number of bytes that can be copied by a straight-line +- implementation of movmemsi; see mips_block_move_straight. We want ++ implementation of cpymemsi; see mips_block_move_straight. We want + to make sure that any loop-based implementation will iterate at + least twice. */ + #define MIPS_MAX_MOVE_BYTES_STRAIGHT \ +@@ -3119,11 +3119,11 @@ while (0) + + #define MIPS_CALL_RATIO 8 + +-/* Any loop-based implementation of movmemsi will have at least ++/* Any loop-based implementation of cpymemsi will have at least + MIPS_MAX_MOVE_BYTES_STRAIGHT / UNITS_PER_WORD memory-to-memory + moves, so allow individual copies of fewer elements. + +- When movmemsi is not available, use a value approximating ++ When cpymemsi is not available, use a value approximating + the length of a memcpy call sequence, so that move_by_pieces + will generate inline code if it is shorter than a function call. + Since move_by_pieces_ninsns counts memory-to-memory moves, but +@@ -3131,7 +3131,7 @@ while (0) + value of MIPS_CALL_RATIO to take that into account. */ + + #define MOVE_RATIO(speed) \ +- (HAVE_movmemsi \ ++ (HAVE_cpymemsi \ + ? MIPS_MAX_MOVE_BYTES_STRAIGHT / MOVE_MAX \ + : MIPS_CALL_RATIO / 2) + +diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md +index 3cfb1a751..a9abb6fdd 100644 +--- a/gcc/config/mips/mips.md ++++ b/gcc/config/mips/mips.md +@@ -5638,7 +5638,7 @@ + ;; Argument 2 is the length + ;; Argument 3 is the alignment + +-(define_expand "movmemsi" ++(define_expand "cpymemsi" + [(parallel [(set (match_operand:BLK 0 "general_operand") + (match_operand:BLK 1 "general_operand")) + (use (match_operand:SI 2 "")) +diff --git a/gcc/config/msp430/msp430.c b/gcc/config/msp430/msp430.c +index 020e980b8..3ce649648 100644 +--- a/gcc/config/msp430/msp430.c ++++ b/gcc/config/msp430/msp430.c +@@ -1457,7 +1457,7 @@ msp430_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, + unsigned HOST_WIDE_INT align, boundary; + bool indirect; + +- indirect = pass_by_reference (NULL, TYPE_MODE (type), type, false); ++ indirect = pass_va_arg_by_reference (type); + if (indirect) + type = build_pointer_type (type); + +diff --git a/gcc/config/nds32/nds32-memory-manipulation.c b/gcc/config/nds32/nds32-memory-manipulation.c +index 71b75dca5..b3f2cd698 100644 +--- a/gcc/config/nds32/nds32-memory-manipulation.c ++++ b/gcc/config/nds32/nds32-memory-manipulation.c +@@ -1,4 +1,4 @@ +-/* Auxiliary functions for expand movmem, setmem, cmpmem, load_multiple ++/* Auxiliary functions for expand cpymem, setmem, cmpmem, load_multiple + and store_multiple pattern of Andes NDS32 cpu for GNU compiler + Copyright (C) 2012-2019 Free Software Foundation, Inc. + Contributed by Andes Technology Corporation. +@@ -120,14 +120,14 @@ nds32_emit_mem_move_block (int base_regno, int count, + + /* ------------------------------------------------------------------------ */ + +-/* Auxiliary function for expand movmem pattern. */ ++/* Auxiliary function for expand cpymem pattern. */ + + static bool +-nds32_expand_movmemsi_loop_unknown_size (rtx dstmem, rtx srcmem, ++nds32_expand_cpymemsi_loop_unknown_size (rtx dstmem, rtx srcmem, + rtx size, + rtx alignment) + { +- /* Emit loop version of movmem. ++ /* Emit loop version of cpymem. + + andi $size_least_3_bit, $size, #~7 + add $dst_end, $dst, $size +@@ -254,7 +254,7 @@ nds32_expand_movmemsi_loop_unknown_size (rtx dstmem, rtx srcmem, + } + + static bool +-nds32_expand_movmemsi_loop_known_size (rtx dstmem, rtx srcmem, ++nds32_expand_cpymemsi_loop_known_size (rtx dstmem, rtx srcmem, + rtx size, rtx alignment) + { + rtx dst_base_reg, src_base_reg; +@@ -288,7 +288,7 @@ nds32_expand_movmemsi_loop_known_size (rtx dstmem, rtx srcmem, + + if (total_bytes < 8) + { +- /* Emit total_bytes less than 8 loop version of movmem. ++ /* Emit total_bytes less than 8 loop version of cpymem. + add $dst_end, $dst, $size + move $dst_itr, $dst + .Lbyte_mode_loop: +@@ -321,7 +321,7 @@ nds32_expand_movmemsi_loop_known_size (rtx dstmem, rtx srcmem, + } + else if (total_bytes % 8 == 0) + { +- /* Emit multiple of 8 loop version of movmem. ++ /* Emit multiple of 8 loop version of cpymem. + + add $dst_end, $dst, $size + move $dst_itr, $dst +@@ -370,7 +370,7 @@ nds32_expand_movmemsi_loop_known_size (rtx dstmem, rtx srcmem, + else + { + /* Handle size greater than 8, and not a multiple of 8. */ +- return nds32_expand_movmemsi_loop_unknown_size (dstmem, srcmem, ++ return nds32_expand_cpymemsi_loop_unknown_size (dstmem, srcmem, + size, alignment); + } + +@@ -378,19 +378,19 @@ nds32_expand_movmemsi_loop_known_size (rtx dstmem, rtx srcmem, + } + + static bool +-nds32_expand_movmemsi_loop (rtx dstmem, rtx srcmem, ++nds32_expand_cpymemsi_loop (rtx dstmem, rtx srcmem, + rtx size, rtx alignment) + { + if (CONST_INT_P (size)) +- return nds32_expand_movmemsi_loop_known_size (dstmem, srcmem, ++ return nds32_expand_cpymemsi_loop_known_size (dstmem, srcmem, + size, alignment); + else +- return nds32_expand_movmemsi_loop_unknown_size (dstmem, srcmem, ++ return nds32_expand_cpymemsi_loop_unknown_size (dstmem, srcmem, + size, alignment); + } + + static bool +-nds32_expand_movmemsi_unroll (rtx dstmem, rtx srcmem, ++nds32_expand_cpymemsi_unroll (rtx dstmem, rtx srcmem, + rtx total_bytes, rtx alignment) + { + rtx dst_base_reg, src_base_reg; +@@ -533,13 +533,13 @@ nds32_expand_movmemsi_unroll (rtx dstmem, rtx srcmem, + This is auxiliary extern function to help create rtx template. + Check nds32-multiple.md file for the patterns. */ + bool +-nds32_expand_movmemsi (rtx dstmem, rtx srcmem, rtx total_bytes, rtx alignment) ++nds32_expand_cpymemsi (rtx dstmem, rtx srcmem, rtx total_bytes, rtx alignment) + { +- if (nds32_expand_movmemsi_unroll (dstmem, srcmem, total_bytes, alignment)) ++ if (nds32_expand_cpymemsi_unroll (dstmem, srcmem, total_bytes, alignment)) + return true; + + if (!optimize_size && optimize > 2) +- return nds32_expand_movmemsi_loop (dstmem, srcmem, total_bytes, alignment); ++ return nds32_expand_cpymemsi_loop (dstmem, srcmem, total_bytes, alignment); + + return false; + } +diff --git a/gcc/config/nds32/nds32-multiple.md b/gcc/config/nds32/nds32-multiple.md +index a1e10c055..98d9508c0 100644 +--- a/gcc/config/nds32/nds32-multiple.md ++++ b/gcc/config/nds32/nds32-multiple.md +@@ -3751,14 +3751,14 @@ + ;; operands[3] is the known shared alignment. + + +-(define_expand "movmemsi" ++(define_expand "cpymemsi" + [(match_operand:BLK 0 "general_operand" "") + (match_operand:BLK 1 "general_operand" "") + (match_operand:SI 2 "nds32_reg_constant_operand" "") + (match_operand:SI 3 "const_int_operand" "")] + "" + { +- if (nds32_expand_movmemsi (operands[0], ++ if (nds32_expand_cpymemsi (operands[0], + operands[1], + operands[2], + operands[3])) +diff --git a/gcc/config/nds32/nds32-protos.h b/gcc/config/nds32/nds32-protos.h +index aaa65d6f0..7ae1954d0 100644 +--- a/gcc/config/nds32/nds32-protos.h ++++ b/gcc/config/nds32/nds32-protos.h +@@ -78,7 +78,7 @@ extern rtx nds32_di_low_part_subreg(rtx); + + extern rtx nds32_expand_load_multiple (int, int, rtx, rtx, bool, rtx *); + extern rtx nds32_expand_store_multiple (int, int, rtx, rtx, bool, rtx *); +-extern bool nds32_expand_movmemsi (rtx, rtx, rtx, rtx); ++extern bool nds32_expand_cpymemsi (rtx, rtx, rtx, rtx); + extern bool nds32_expand_setmem (rtx, rtx, rtx, rtx, rtx, rtx); + extern bool nds32_expand_strlen (rtx, rtx, rtx, rtx); + +diff --git a/gcc/config/pa/pa.c b/gcc/config/pa/pa.c +index 84a8cae22..73109c6f9 100644 +--- a/gcc/config/pa/pa.c ++++ b/gcc/config/pa/pa.c +@@ -107,7 +107,7 @@ static int pa_can_combine_p (rtx_insn *, rtx_insn *, rtx_insn *, int, rtx, + static bool forward_branch_p (rtx_insn *); + static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *); + static void compute_zdepdi_operands (unsigned HOST_WIDE_INT, unsigned *); +-static int compute_movmem_length (rtx_insn *); ++static int compute_cpymem_length (rtx_insn *); + static int compute_clrmem_length (rtx_insn *); + static bool pa_assemble_integer (rtx, unsigned int, int); + static void remove_useless_addtr_insns (int); +@@ -2986,7 +2986,7 @@ pa_output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED) + count insns rather than emit them. */ + + static int +-compute_movmem_length (rtx_insn *insn) ++compute_cpymem_length (rtx_insn *insn) + { + rtx pat = PATTERN (insn); + unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0)); +@@ -5061,7 +5061,7 @@ pa_adjust_insn_length (rtx_insn *insn, int length) + && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM + && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode + && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode) +- length += compute_movmem_length (insn) - 4; ++ length += compute_cpymem_length (insn) - 4; + /* Block clear pattern. */ + else if (NONJUMP_INSN_P (insn) + && GET_CODE (pat) == PARALLEL +@@ -6378,7 +6378,7 @@ hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, + unsigned int size, ofs; + bool indirect; + +- indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0); ++ indirect = pass_va_arg_by_reference (type); + if (indirect) + { + type = ptr; +diff --git a/gcc/config/pa/pa.md b/gcc/config/pa/pa.md +index 18f8e127d..a37989032 100644 +--- a/gcc/config/pa/pa.md ++++ b/gcc/config/pa/pa.md +@@ -3162,9 +3162,9 @@ + + ;; The definition of this insn does not really explain what it does, + ;; but it should suffice that anything generated as this insn will be +-;; recognized as a movmemsi operation, and that it will not successfully ++;; recognized as a cpymemsi operation, and that it will not successfully + ;; combine with anything. +-(define_expand "movmemsi" ++(define_expand "cpymemsi" + [(parallel [(set (match_operand:BLK 0 "" "") + (match_operand:BLK 1 "" "")) + (clobber (match_dup 4)) +@@ -3244,7 +3244,7 @@ + ;; operands 0 and 1 are both equivalent to symbolic MEMs. Thus, we are + ;; forced to internally copy operands 0 and 1 to operands 7 and 8, + ;; respectively. We then split or peephole optimize after reload. +-(define_insn "movmemsi_prereload" ++(define_insn "cpymemsi_prereload" + [(set (mem:BLK (match_operand:SI 0 "register_operand" "r,r")) + (mem:BLK (match_operand:SI 1 "register_operand" "r,r"))) + (clobber (match_operand:SI 2 "register_operand" "=&r,&r")) ;loop cnt/tmp +@@ -3337,7 +3337,7 @@ + } + }") + +-(define_insn "movmemsi_postreload" ++(define_insn "cpymemsi_postreload" + [(set (mem:BLK (match_operand:SI 0 "register_operand" "+r,r")) + (mem:BLK (match_operand:SI 1 "register_operand" "+r,r"))) + (clobber (match_operand:SI 2 "register_operand" "=&r,&r")) ;loop cnt/tmp +@@ -3352,7 +3352,7 @@ + "* return pa_output_block_move (operands, !which_alternative);" + [(set_attr "type" "multi,multi")]) + +-(define_expand "movmemdi" ++(define_expand "cpymemdi" + [(parallel [(set (match_operand:BLK 0 "" "") + (match_operand:BLK 1 "" "")) + (clobber (match_dup 4)) +@@ -3432,7 +3432,7 @@ + ;; operands 0 and 1 are both equivalent to symbolic MEMs. Thus, we are + ;; forced to internally copy operands 0 and 1 to operands 7 and 8, + ;; respectively. We then split or peephole optimize after reload. +-(define_insn "movmemdi_prereload" ++(define_insn "cpymemdi_prereload" + [(set (mem:BLK (match_operand:DI 0 "register_operand" "r,r")) + (mem:BLK (match_operand:DI 1 "register_operand" "r,r"))) + (clobber (match_operand:DI 2 "register_operand" "=&r,&r")) ;loop cnt/tmp +@@ -3525,7 +3525,7 @@ + } + }") + +-(define_insn "movmemdi_postreload" ++(define_insn "cpymemdi_postreload" + [(set (mem:BLK (match_operand:DI 0 "register_operand" "+r,r")) + (mem:BLK (match_operand:DI 1 "register_operand" "+r,r"))) + (clobber (match_operand:DI 2 "register_operand" "=&r,&r")) ;loop cnt/tmp +diff --git a/gcc/config/pdp11/pdp11.md b/gcc/config/pdp11/pdp11.md +index ce781db06..be5ddc4c3 100644 +--- a/gcc/config/pdp11/pdp11.md ++++ b/gcc/config/pdp11/pdp11.md +@@ -26,7 +26,7 @@ + UNSPECV_BLOCKAGE + UNSPECV_SETD + UNSPECV_SETI +- UNSPECV_MOVMEM ++ UNSPECV_CPYMEM + ]) + + (define_constants +@@ -664,8 +664,8 @@ + [(set_attr "length" "2,2,4,4,2")]) + + ;; Expand a block move. We turn this into a move loop. +-(define_expand "movmemhi" +- [(parallel [(unspec_volatile [(const_int 0)] UNSPECV_MOVMEM) ++(define_expand "cpymemhi" ++ [(parallel [(unspec_volatile [(const_int 0)] UNSPECV_CPYMEM) + (match_operand:BLK 0 "general_operand" "=g") + (match_operand:BLK 1 "general_operand" "g") + (match_operand:HI 2 "immediate_operand" "i") +@@ -694,8 +694,8 @@ + }") + + ;; Expand a block move. We turn this into a move loop. +-(define_insn_and_split "movmemhi1" +- [(unspec_volatile [(const_int 0)] UNSPECV_MOVMEM) ++(define_insn_and_split "cpymemhi1" ++ [(unspec_volatile [(const_int 0)] UNSPECV_CPYMEM) + (match_operand:HI 0 "register_operand" "+r") + (match_operand:HI 1 "register_operand" "+r") + (match_operand:HI 2 "register_operand" "+r") +@@ -707,7 +707,7 @@ + "" + "#" + "reload_completed" +- [(parallel [(unspec_volatile [(const_int 0)] UNSPECV_MOVMEM) ++ [(parallel [(unspec_volatile [(const_int 0)] UNSPECV_CPYMEM) + (match_dup 0) + (match_dup 1) + (match_dup 2) +@@ -719,8 +719,8 @@ + (clobber (reg:CC CC_REGNUM))])] + "") + +-(define_insn "movmemhi_nocc" +- [(unspec_volatile [(const_int 0)] UNSPECV_MOVMEM) ++(define_insn "cpymemhi_nocc" ++ [(unspec_volatile [(const_int 0)] UNSPECV_CPYMEM) + (match_operand:HI 0 "register_operand" "+r") + (match_operand:HI 1 "register_operand" "+r") + (match_operand:HI 2 "register_operand" "+r") +diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c +index b3297a381..49383d857 100644 +--- a/gcc/config/riscv/riscv.c ++++ b/gcc/config/riscv/riscv.c +@@ -3024,7 +3024,7 @@ riscv_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length, + emit_insn(gen_nop ()); + } + +-/* Expand a movmemsi instruction, which copies LENGTH bytes from ++/* Expand a cpymemsi instruction, which copies LENGTH bytes from + memory reference SRC to memory reference DEST. */ + + bool +diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h +index 5130dc826..7e3612641 100644 +--- a/gcc/config/riscv/riscv.h ++++ b/gcc/config/riscv/riscv.h +@@ -829,20 +829,20 @@ while (0) + #undef PTRDIFF_TYPE + #define PTRDIFF_TYPE (POINTER_SIZE == 64 ? "long int" : "int") + +-/* The maximum number of bytes copied by one iteration of a movmemsi loop. */ ++/* The maximum number of bytes copied by one iteration of a cpymemsi loop. */ + + #define RISCV_MAX_MOVE_BYTES_PER_LOOP_ITER (UNITS_PER_WORD * 4) + + /* The maximum number of bytes that can be copied by a straight-line +- movmemsi implementation. */ ++ cpymemsi implementation. */ + + #define RISCV_MAX_MOVE_BYTES_STRAIGHT (RISCV_MAX_MOVE_BYTES_PER_LOOP_ITER * 3) + + /* If a memory-to-memory move would take MOVE_RATIO or more simple +- move-instruction pairs, we will do a movmem or libcall instead. ++ move-instruction pairs, we will do a cpymem or libcall instead. + Do not use move_by_pieces at all when strict alignment is not + in effect but the target has slow unaligned accesses; in this +- case, movmem or libcall is more efficient. */ ++ case, cpymem or libcall is more efficient. */ + + #define MOVE_RATIO(speed) \ + (!STRICT_ALIGNMENT && riscv_slow_unaligned_access_p ? 1 : \ +diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md +index e40535c9e..cfb5fdd6a 100644 +--- a/gcc/config/riscv/riscv.md ++++ b/gcc/config/riscv/riscv.md +@@ -1503,7 +1503,7 @@ + DONE; + }) + +-(define_expand "movmemsi" ++(define_expand "cpymemsi" + [(parallel [(set (match_operand:BLK 0 "general_operand") + (match_operand:BLK 1 "general_operand")) + (use (match_operand:SI 2 "")) +diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c +index 8f046de42..ee07aa9df 100644 +--- a/gcc/config/rs6000/rs6000.c ++++ b/gcc/config/rs6000/rs6000.c +@@ -33472,7 +33472,7 @@ get_prev_label (tree function_name) + return NULL_TREE; + } + +-/* Generate PIC and indirect symbol stubs. */ ++/* Generate external symbol indirection stubs (PIC and non-PIC). */ + + void + machopic_output_stub (FILE *file, const char *symb, const char *stub) +@@ -38392,7 +38392,8 @@ rs6000_call_darwin_1 (rtx value, rtx func_desc, rtx tlsarg, + if ((cookie_val & CALL_LONG) != 0 + && GET_CODE (func_desc) == SYMBOL_REF) + { +- /* FIXME: the longcall opt should not hang off picsymbol stubs. */ ++ /* FIXME: the longcall opt should not hang off this flag, it is most ++ likely incorrect for kernel-mode code-generation. */ + if (darwin_symbol_stubs && TARGET_32BIT) + make_island = true; /* Do nothing yet, retain the CALL_LONG flag. */ + else +diff --git a/gcc/config/rx/rx.md b/gcc/config/rx/rx.md +index 2790882c9..9df73e6ef 100644 +--- a/gcc/config/rx/rx.md ++++ b/gcc/config/rx/rx.md +@@ -46,7 +46,7 @@ + (UNSPEC_CONST 13) + + (UNSPEC_MOVSTR 20) +- (UNSPEC_MOVMEM 21) ++ (UNSPEC_CPYMEM 21) + (UNSPEC_SETMEM 22) + (UNSPEC_STRLEN 23) + (UNSPEC_CMPSTRN 24) +@@ -2449,13 +2449,13 @@ + (set_attr "timings" "1111")] ;; The timing is a guesstimate. + ) + +-(define_expand "movmemsi" ++(define_expand "cpymemsi" + [(parallel + [(set (match_operand:BLK 0 "memory_operand") ;; Dest + (match_operand:BLK 1 "memory_operand")) ;; Source + (use (match_operand:SI 2 "register_operand")) ;; Length in bytes + (match_operand 3 "immediate_operand") ;; Align +- (unspec_volatile:BLK [(reg:SI 1) (reg:SI 2) (reg:SI 3)] UNSPEC_MOVMEM)] ++ (unspec_volatile:BLK [(reg:SI 1) (reg:SI 2) (reg:SI 3)] UNSPEC_CPYMEM)] + )] + "rx_allow_string_insns" + { +@@ -2486,16 +2486,16 @@ + emit_move_insn (len, force_operand (operands[2], NULL_RTX)); + operands[0] = replace_equiv_address_nv (operands[0], addr1); + operands[1] = replace_equiv_address_nv (operands[1], addr2); +- emit_insn (gen_rx_movmem ()); ++ emit_insn (gen_rx_cpymem ()); + DONE; + } + ) + +-(define_insn "rx_movmem" ++(define_insn "rx_cpymem" + [(set (mem:BLK (reg:SI 1)) + (mem:BLK (reg:SI 2))) + (use (reg:SI 3)) +- (unspec_volatile:BLK [(reg:SI 1) (reg:SI 2) (reg:SI 3)] UNSPEC_MOVMEM) ++ (unspec_volatile:BLK [(reg:SI 1) (reg:SI 2) (reg:SI 3)] UNSPEC_CPYMEM) + (clobber (reg:SI 1)) + (clobber (reg:SI 2)) + (clobber (reg:SI 3))] +diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h +index aa04479ec..b162b26b3 100644 +--- a/gcc/config/s390/s390-protos.h ++++ b/gcc/config/s390/s390-protos.h +@@ -104,7 +104,7 @@ extern void s390_reload_symref_address (rtx , rtx , rtx , bool); + extern void s390_expand_plus_operand (rtx, rtx, rtx); + extern void emit_symbolic_move (rtx *); + extern void s390_load_address (rtx, rtx); +-extern bool s390_expand_movmem (rtx, rtx, rtx); ++extern bool s390_expand_cpymem (rtx, rtx, rtx); + extern void s390_expand_setmem (rtx, rtx, rtx); + extern bool s390_expand_cmpmem (rtx, rtx, rtx, rtx); + extern void s390_expand_vec_strlen (rtx, rtx, rtx); +diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c +index c35666dec..2959f6423 100644 +--- a/gcc/config/s390/s390.c ++++ b/gcc/config/s390/s390.c +@@ -5400,7 +5400,7 @@ legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED, + /* Emit code to move LEN bytes from DST to SRC. */ + + bool +-s390_expand_movmem (rtx dst, rtx src, rtx len) ++s390_expand_cpymem (rtx dst, rtx src, rtx len) + { + /* When tuning for z10 or higher we rely on the Glibc functions to + do the right thing. Only for constant lengths below 64k we will +@@ -5425,14 +5425,14 @@ s390_expand_movmem (rtx dst, rtx src, rtx len) + { + rtx newdst = adjust_address (dst, BLKmode, o); + rtx newsrc = adjust_address (src, BLKmode, o); +- emit_insn (gen_movmem_short (newdst, newsrc, ++ emit_insn (gen_cpymem_short (newdst, newsrc, + GEN_INT (l > 256 ? 255 : l - 1))); + } + } + + else if (TARGET_MVCLE) + { +- emit_insn (gen_movmem_long (dst, src, convert_to_mode (Pmode, len, 1))); ++ emit_insn (gen_cpymem_long (dst, src, convert_to_mode (Pmode, len, 1))); + } + + else +@@ -5494,7 +5494,7 @@ s390_expand_movmem (rtx dst, rtx src, rtx len) + emit_insn (prefetch); + } + +- emit_insn (gen_movmem_short (dst, src, GEN_INT (255))); ++ emit_insn (gen_cpymem_short (dst, src, GEN_INT (255))); + s390_load_address (dst_addr, + gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256))); + s390_load_address (src_addr, +@@ -5511,7 +5511,7 @@ s390_expand_movmem (rtx dst, rtx src, rtx len) + emit_jump (loop_start_label); + emit_label (loop_end_label); + +- emit_insn (gen_movmem_short (dst, src, ++ emit_insn (gen_cpymem_short (dst, src, + convert_to_mode (Pmode, count, 1))); + emit_label (end_label); + } +@@ -5563,7 +5563,7 @@ s390_expand_setmem (rtx dst, rtx len, rtx val) + if (l > 1) + { + rtx newdstp1 = adjust_address (dst, BLKmode, o + 1); +- emit_insn (gen_movmem_short (newdstp1, newdst, ++ emit_insn (gen_cpymem_short (newdstp1, newdst, + GEN_INT (l > 257 ? 255 : l - 2))); + } + } +@@ -5670,7 +5670,7 @@ s390_expand_setmem (rtx dst, rtx len, rtx val) + /* Set the first byte in the block to the value and use an + overlapping mvc for the block. */ + emit_move_insn (adjust_address (dst, QImode, 0), val); +- emit_insn (gen_movmem_short (dstp1, dst, GEN_INT (254))); ++ emit_insn (gen_cpymem_short (dstp1, dst, GEN_INT (254))); + } + s390_load_address (dst_addr, + gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256))); +@@ -5694,7 +5694,7 @@ s390_expand_setmem (rtx dst, rtx len, rtx val) + emit_move_insn (adjust_address (dst, QImode, 0), val); + /* execute only uses the lowest 8 bits of count that's + exactly what we need here. */ +- emit_insn (gen_movmem_short (dstp1, dst, ++ emit_insn (gen_cpymem_short (dstp1, dst, + convert_to_mode (Pmode, count, 1))); + } + +@@ -6336,7 +6336,7 @@ s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src) + + dest = adjust_address (dest, BLKmode, 0); + set_mem_size (dest, size); +- s390_expand_movmem (dest, src_mem, GEN_INT (size)); ++ s390_expand_cpymem (dest, src_mem, GEN_INT (size)); + return true; + } + +@@ -12408,7 +12408,7 @@ s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, + + s390_check_type_for_vector_abi (type, true, false); + +- if (pass_by_reference (NULL, TYPE_MODE (type), type, false)) ++ if (pass_va_arg_by_reference (type)) + { + if (TARGET_DEBUG_ARG) + { +diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md +index 5a3496ac9..8dc3c12df 100644 +--- a/gcc/config/s390/s390.md ++++ b/gcc/config/s390/s390.md +@@ -3196,17 +3196,17 @@ + + + ; +-; movmemM instruction pattern(s). ++; cpymemM instruction pattern(s). + ; + +-(define_expand "movmem" ++(define_expand "cpymem" + [(set (match_operand:BLK 0 "memory_operand" "") ; destination + (match_operand:BLK 1 "memory_operand" "")) ; source + (use (match_operand:GPR 2 "general_operand" "")) ; count + (match_operand 3 "" "")] + "" + { +- if (s390_expand_movmem (operands[0], operands[1], operands[2])) ++ if (s390_expand_cpymem (operands[0], operands[1], operands[2])) + DONE; + else + FAIL; +@@ -3215,7 +3215,7 @@ + ; Move a block that is up to 256 bytes in length. + ; The block length is taken as (operands[2] % 256) + 1. + +-(define_expand "movmem_short" ++(define_expand "cpymem_short" + [(parallel + [(set (match_operand:BLK 0 "memory_operand" "") + (match_operand:BLK 1 "memory_operand" "")) +@@ -3225,7 +3225,7 @@ + "" + "operands[3] = gen_rtx_SCRATCH (Pmode);") + +-(define_insn "*movmem_short" ++(define_insn "*cpymem_short" + [(set (match_operand:BLK 0 "memory_operand" "=Q,Q,Q,Q") + (match_operand:BLK 1 "memory_operand" "Q,Q,Q,Q")) + (use (match_operand 2 "nonmemory_operand" "n,a,a,a")) +@@ -3293,7 +3293,7 @@ + + ; Move a block of arbitrary length. + +-(define_expand "movmem_long" ++(define_expand "cpymem_long" + [(parallel + [(clobber (match_dup 2)) + (clobber (match_dup 3)) +@@ -3327,7 +3327,7 @@ + operands[3] = reg1; + }) + +-(define_insn "*movmem_long" ++(define_insn "*cpymem_long" + [(clobber (match_operand: 0 "register_operand" "=d")) + (clobber (match_operand: 1 "register_operand" "=d")) + (set (mem:BLK (subreg:P (match_operand: 2 "register_operand" "0") 0)) +@@ -3340,7 +3340,7 @@ + [(set_attr "length" "8") + (set_attr "type" "vs")]) + +-(define_insn "*movmem_long_31z" ++(define_insn "*cpymem_long_31z" + [(clobber (match_operand:TI 0 "register_operand" "=d")) + (clobber (match_operand:TI 1 "register_operand" "=d")) + (set (mem:BLK (subreg:SI (match_operand:TI 2 "register_operand" "0") 4)) +diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md +index fdb80d5d9..e687cf22a 100644 +--- a/gcc/config/sh/sh.md ++++ b/gcc/config/sh/sh.md +@@ -8906,7 +8906,7 @@ + + ;; String/block move insn. + +-(define_expand "movmemsi" ++(define_expand "cpymemsi" + [(parallel [(set (mem:BLK (match_operand:BLK 0)) + (mem:BLK (match_operand:BLK 1))) + (use (match_operand:SI 2 "nonmemory_operand")) +diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c +index a993aab76..02966fd03 100644 +--- a/gcc/config/sparc/sparc.c ++++ b/gcc/config/sparc/sparc.c +@@ -7965,7 +7965,7 @@ sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, + bool indirect; + tree ptrtype = build_pointer_type (type); + +- if (pass_by_reference (NULL, TYPE_MODE (type), type, false)) ++ if (pass_va_arg_by_reference (type)) + { + indirect = true; + size = rsize = UNITS_PER_WORD; +diff --git a/gcc/config/sparc/sparc.h b/gcc/config/sparc/sparc.h +index 4b09fc86b..8807a56f4 100644 +--- a/gcc/config/sparc/sparc.h ++++ b/gcc/config/sparc/sparc.h +@@ -1419,7 +1419,7 @@ do { \ + #define MOVE_MAX 8 + + /* If a memory-to-memory move would take MOVE_RATIO or more simple +- move-instruction pairs, we will do a movmem or libcall instead. */ ++ move-instruction pairs, we will do a cpymem or libcall instead. */ + + #define MOVE_RATIO(speed) ((speed) ? 8 : 3) + +diff --git a/gcc/config/spu/spu.c b/gcc/config/spu/spu.c +index 8d7439e69..ecc767bfa 100644 +--- a/gcc/config/spu/spu.c ++++ b/gcc/config/spu/spu.c +@@ -4053,8 +4053,7 @@ spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p, + + /* if an object is dynamically sized, a pointer to it is passed + instead of the object itself. */ +- pass_by_reference_p = pass_by_reference (NULL, TYPE_MODE (type), type, +- false); ++ pass_by_reference_p = pass_va_arg_by_reference (type); + if (pass_by_reference_p) + type = build_pointer_type (type); + size = int_size_in_bytes (type); +diff --git a/gcc/config/tilegx/tilegx.c b/gcc/config/tilegx/tilegx.c +index 82226da3a..d12f1a99d 100644 +--- a/gcc/config/tilegx/tilegx.c ++++ b/gcc/config/tilegx/tilegx.c +@@ -471,8 +471,7 @@ tilegx_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, + + /* If an object is dynamically sized, a pointer to it is passed + instead of the object itself. */ +- pass_by_reference_p = pass_by_reference (NULL, TYPE_MODE (type), type, +- false); ++ pass_by_reference_p = pass_va_arg_by_reference (type); + + if (pass_by_reference_p) + type = build_pointer_type (type); +diff --git a/gcc/config/tilepro/tilepro.c b/gcc/config/tilepro/tilepro.c +index c8d69d32f..f1a0df0ad 100644 +--- a/gcc/config/tilepro/tilepro.c ++++ b/gcc/config/tilepro/tilepro.c +@@ -419,8 +419,7 @@ tilepro_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p, + + /* if an object is dynamically sized, a pointer to it is passed + instead of the object itself. */ +- pass_by_reference_p = pass_by_reference (NULL, TYPE_MODE (type), type, +- false); ++ pass_by_reference_p = pass_va_arg_by_reference (type); + + if (pass_by_reference_p) + type = build_pointer_type (type); +diff --git a/gcc/config/vax/vax-protos.h b/gcc/config/vax/vax-protos.h +index a76cf0239..a85cf3611 100644 +--- a/gcc/config/vax/vax-protos.h ++++ b/gcc/config/vax/vax-protos.h +@@ -31,7 +31,6 @@ extern void vax_expand_addsub_di_operands (rtx *, enum rtx_code); + extern const char * vax_output_int_move (rtx, rtx *, machine_mode); + extern const char * vax_output_int_add (rtx_insn *, rtx *, machine_mode); + extern const char * vax_output_int_subtract (rtx_insn *, rtx *, machine_mode); +-extern const char * vax_output_movmemsi (rtx, rtx *); + #endif /* RTX_CODE */ + + #ifdef REAL_VALUE_TYPE +diff --git a/gcc/config/vax/vax.h b/gcc/config/vax/vax.h +index a6a8227f7..e7137dc09 100644 +--- a/gcc/config/vax/vax.h ++++ b/gcc/config/vax/vax.h +@@ -430,7 +430,7 @@ enum reg_class { NO_REGS, ALL_REGS, LIM_REG_CLASSES }; + #define MOVE_MAX 8 + + /* If a memory-to-memory move would take MOVE_RATIO or more simple +- move-instruction pairs, we will do a movmem or libcall instead. */ ++ move-instruction pairs, we will do a cpymem or libcall instead. */ + #define MOVE_RATIO(speed) ((speed) ? 6 : 3) + #define CLEAR_RATIO(speed) ((speed) ? 6 : 2) + +diff --git a/gcc/config/vax/vax.md b/gcc/config/vax/vax.md +index bfeae7f80..298f3393d 100644 +--- a/gcc/config/vax/vax.md ++++ b/gcc/config/vax/vax.md +@@ -206,8 +206,8 @@ + }") + + ;; This is here to accept 4 arguments and pass the first 3 along +-;; to the movmemhi1 pattern that really does the work. +-(define_expand "movmemhi" ++;; to the cpymemhi1 pattern that really does the work. ++(define_expand "cpymemhi" + [(set (match_operand:BLK 0 "general_operand" "=g") + (match_operand:BLK 1 "general_operand" "g")) + (use (match_operand:HI 2 "general_operand" "g")) +@@ -215,7 +215,7 @@ + "" + " + { +- emit_insn (gen_movmemhi1 (operands[0], operands[1], operands[2])); ++ emit_insn (gen_cpymemhi1 (operands[0], operands[1], operands[2])); + DONE; + }") + +@@ -224,7 +224,7 @@ + ;; that anything generated as this insn will be recognized as one + ;; and that it won't successfully combine with anything. + +-(define_insn "movmemhi1" ++(define_insn "cpymemhi1" + [(set (match_operand:BLK 0 "memory_operand" "=o") + (match_operand:BLK 1 "memory_operand" "o")) + (use (match_operand:HI 2 "general_operand" "g")) +diff --git a/gcc/config/visium/visium.c b/gcc/config/visium/visium.c +index 431f64cfc..4ff331362 100644 +--- a/gcc/config/visium/visium.c ++++ b/gcc/config/visium/visium.c +@@ -1637,8 +1637,7 @@ visium_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, + tree f_ovfl, f_gbase, f_fbase, f_gbytes, f_fbytes; + tree ovfl, base, bytes; + HOST_WIDE_INT size, rsize; +- const bool by_reference_p +- = pass_by_reference (NULL, TYPE_MODE (type), type, false); ++ const bool by_reference_p = pass_va_arg_by_reference (type); + const bool float_reg_arg_p + = (TARGET_FPU && !by_reference_p + && ((GET_MODE_CLASS (TYPE_MODE (type)) == MODE_FLOAT +diff --git a/gcc/config/visium/visium.h b/gcc/config/visium/visium.h +index 817e7dc70..c9376b28f 100644 +--- a/gcc/config/visium/visium.h ++++ b/gcc/config/visium/visium.h +@@ -1138,8 +1138,8 @@ do \ + always make code faster, but eventually incurs high cost in + increased code size. + +- Since we have a movmemsi pattern, the default MOVE_RATIO is 2, which +- is too low given that movmemsi will invoke a libcall. */ ++ Since we have a cpymemsi pattern, the default MOVE_RATIO is 2, which ++ is too low given that cpymemsi will invoke a libcall. */ + #define MOVE_RATIO(speed) ((speed) ? 9 : 3) + + /* `CLEAR_RATIO (SPEED)` +diff --git a/gcc/config/visium/visium.md b/gcc/config/visium/visium.md +index f53544134..e146b89d1 100644 +--- a/gcc/config/visium/visium.md ++++ b/gcc/config/visium/visium.md +@@ -3006,7 +3006,7 @@ + ;; Argument 2 is the length + ;; Argument 3 is the alignment + +-(define_expand "movmemsi" ++(define_expand "cpymemsi" + [(parallel [(set (match_operand:BLK 0 "memory_operand" "") + (match_operand:BLK 1 "memory_operand" "")) + (use (match_operand:SI 2 "general_operand" "")) +diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c +index ee5612441..b275deafa 100644 +--- a/gcc/config/xtensa/xtensa.c ++++ b/gcc/config/xtensa/xtensa.c +@@ -3252,7 +3252,7 @@ xtensa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, + tree lab_false, lab_over, lab_false2; + bool indirect; + +- indirect = pass_by_reference (NULL, TYPE_MODE (type), type, false); ++ indirect = pass_va_arg_by_reference (type); + if (indirect) + type = build_pointer_type (type); + +diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md +index 362e5ff3c..d1448a02f 100644 +--- a/gcc/config/xtensa/xtensa.md ++++ b/gcc/config/xtensa/xtensa.md +@@ -1026,7 +1026,7 @@ + + ;; Block moves + +-(define_expand "movmemsi" ++(define_expand "cpymemsi" + [(parallel [(set (match_operand:BLK 0 "" "") + (match_operand:BLK 1 "" "")) + (use (match_operand:SI 2 "arith_operand" "")) +diff --git a/gcc/coretypes.h b/gcc/coretypes.h +index 2f6b8599d..88fe8a3f9 100644 +--- a/gcc/coretypes.h ++++ b/gcc/coretypes.h +@@ -153,6 +153,14 @@ struct cl_option_handlers; + struct diagnostic_context; + struct pretty_printer; + ++template struct array_traits; ++ ++/* Provides a read-only bitmap view of a single integer bitmask or an ++ array of integer bitmasks, or of a wrapper around such bitmasks. */ ++template, ++ bool has_constant_size = Traits::has_constant_size> ++struct bitmap_view; ++ + /* Address space number for named address space support. */ + typedef unsigned char addr_space_t; + +@@ -332,6 +340,7 @@ namespace gcc { + } + + typedef std::pair tree_pair; ++typedef std::pair string_int_pair; + + /* Define a name->value mapping. */ + template +diff --git a/gcc/coverage.c b/gcc/coverage.c +index 1ffefd5f4..a63cb94e9 100644 +--- a/gcc/coverage.c ++++ b/gcc/coverage.c +@@ -643,7 +643,7 @@ coverage_begin_function (unsigned lineno_checksum, unsigned cfg_checksum) + (DECL_ASSEMBLER_NAME (current_function_decl))); + gcov_write_unsigned (DECL_ARTIFICIAL (current_function_decl) + && !DECL_FUNCTION_VERSIONED (current_function_decl) +- && !DECL_LAMBDA_FUNCTION (current_function_decl)); ++ && !DECL_LAMBDA_FUNCTION_P (current_function_decl)); + gcov_write_filename (xloc.file); + gcov_write_unsigned (xloc.line); + gcov_write_unsigned (xloc.column); +diff --git a/gcc/cp/call.c b/gcc/cp/call.c +index 23a54f3c3..3a821de7a 100644 +--- a/gcc/cp/call.c ++++ b/gcc/cp/call.c +@@ -9166,12 +9166,14 @@ maybe_warn_class_memaccess (location_t loc, tree fndecl, + } + + /* Build and return a call to FN, using NARGS arguments in ARGARRAY. ++ If FN is the result of resolving an overloaded target built-in, ++ ORIG_FNDECL is the original function decl, otherwise it is null. + This function performs no overload resolution, conversion, or other + high-level operations. */ + + tree + build_cxx_call (tree fn, int nargs, tree *argarray, +- tsubst_flags_t complain) ++ tsubst_flags_t complain, tree orig_fndecl) + { + tree fndecl; + +@@ -9181,11 +9183,13 @@ build_cxx_call (tree fn, int nargs, tree *argarray, + SET_EXPR_LOCATION (fn, loc); + + fndecl = get_callee_fndecl (fn); ++ if (!orig_fndecl) ++ orig_fndecl = fndecl; + + /* Check that arguments to builtin functions match the expectations. */ + if (fndecl + && !processing_template_decl +- && fndecl_built_in_p (fndecl, BUILT_IN_NORMAL)) ++ && fndecl_built_in_p (fndecl)) + { + int i; + +@@ -9195,7 +9199,7 @@ build_cxx_call (tree fn, int nargs, tree *argarray, + argarray[i] = maybe_constant_value (argarray[i]); + + if (!check_builtin_function_arguments (EXPR_LOCATION (fn), vNULL, fndecl, +- nargs, argarray)) ++ orig_fndecl, nargs, argarray)) + return error_mark_node; + } + +diff --git a/gcc/cp/cp-objcp-common.h b/gcc/cp/cp-objcp-common.h +index 89a889a7d..e5d34f180 100644 +--- a/gcc/cp/cp-objcp-common.h ++++ b/gcc/cp/cp-objcp-common.h +@@ -35,6 +35,8 @@ extern tree cp_get_global_decls (); + extern tree cp_pushdecl (tree); + extern void cp_register_dumps (gcc::dump_manager *); + extern tree cxx_make_type_hook (tree_code); ++extern tree cxx_simulate_enum_decl (location_t, const char *, ++ vec); + + /* Lang hooks that are shared between C++ and ObjC++ are defined here. Hooks + specific to C++ or ObjC++ go in cp/cp-lang.c and objcp/objcp-lang.c, +@@ -100,6 +102,9 @@ extern tree cxx_make_type_hook (tree_code); + #define LANG_HOOKS_BUILTIN_FUNCTION cxx_builtin_function + #undef LANG_HOOKS_BUILTIN_FUNCTION_EXT_SCOPE + #define LANG_HOOKS_BUILTIN_FUNCTION_EXT_SCOPE cxx_builtin_function_ext_scope ++#undef LANG_HOOKS_SIMULATE_BUILTIN_FUNCTION_DECL ++#define LANG_HOOKS_SIMULATE_BUILTIN_FUNCTION_DECL \ ++ cxx_simulate_builtin_function_decl + #undef LANG_HOOKS_TYPE_HASH_EQ + #define LANG_HOOKS_TYPE_HASH_EQ cxx_type_hash_eq + #undef LANG_HOOKS_COPY_LANG_QUALIFIERS +@@ -128,6 +133,8 @@ extern tree cxx_make_type_hook (tree_code); + + #undef LANG_HOOKS_MAKE_TYPE + #define LANG_HOOKS_MAKE_TYPE cxx_make_type_hook ++#undef LANG_HOOKS_SIMULATE_ENUM_DECL ++#define LANG_HOOKS_SIMULATE_ENUM_DECL cxx_simulate_enum_decl + #undef LANG_HOOKS_TYPE_FOR_MODE + #define LANG_HOOKS_TYPE_FOR_MODE c_common_type_for_mode + #undef LANG_HOOKS_TYPE_FOR_SIZE +diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h +index f7c3eea4c..4bba1887f 100644 +--- a/gcc/cp/cp-tree.h ++++ b/gcc/cp/cp-tree.h +@@ -6245,7 +6245,8 @@ extern tree perform_direct_initialization_if_possible (tree, tree, bool, + tsubst_flags_t); + extern tree in_charge_arg_for_name (tree); + extern tree build_cxx_call (tree, int, tree *, +- tsubst_flags_t); ++ tsubst_flags_t, ++ tree = NULL_TREE); + extern bool is_std_init_list (tree); + extern bool is_list_ctor (tree); + extern void validate_conversion_obstack (void); +@@ -6451,6 +6452,7 @@ extern tmpl_spec_kind current_tmpl_spec_kind (int); + extern tree cp_fname_init (const char *, tree *); + extern tree cxx_builtin_function (tree decl); + extern tree cxx_builtin_function_ext_scope (tree decl); ++extern tree cxx_simulate_builtin_function_decl (tree); + extern tree check_elaborated_type_specifier (enum tag_types, tree, bool); + extern void warn_extern_redeclared_static (tree, tree); + extern tree cxx_comdat_group (tree); +@@ -7386,7 +7388,8 @@ extern tree get_member_function_from_ptrfunc (tree *, tree, tsubst_flags_t); + extern tree cp_build_function_call_nary (tree, tsubst_flags_t, ...) + ATTRIBUTE_SENTINEL; + extern tree cp_build_function_call_vec (tree, vec **, +- tsubst_flags_t); ++ tsubst_flags_t, ++ tree = NULL_TREE); + extern tree build_x_binary_op (const op_location_t &, + enum tree_code, tree, + enum tree_code, tree, +diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c +index 5c82c2272..928ac3f21 100644 +--- a/gcc/cp/decl.c ++++ b/gcc/cp/decl.c +@@ -2273,7 +2273,8 @@ next_arg:; + DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (newdecl) + |= DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (olddecl); + DECL_NO_LIMIT_STACK (newdecl) |= DECL_NO_LIMIT_STACK (olddecl); +- DECL_IS_OPERATOR_NEW (newdecl) |= DECL_IS_OPERATOR_NEW (olddecl); ++ if (DECL_IS_OPERATOR_NEW_P (olddecl)) ++ DECL_SET_IS_OPERATOR_NEW (newdecl, true); + DECL_LOOPING_CONST_OR_PURE_P (newdecl) + |= DECL_LOOPING_CONST_OR_PURE_P (olddecl); + +@@ -2520,8 +2521,7 @@ next_arg:; + if (fndecl_built_in_p (olddecl) + && (new_defines_function ? GNU_INLINE_P (newdecl) : types_match)) + { +- DECL_BUILT_IN_CLASS (newdecl) = DECL_BUILT_IN_CLASS (olddecl); +- DECL_FUNCTION_CODE (newdecl) = DECL_FUNCTION_CODE (olddecl); ++ copy_decl_built_in_function (newdecl, olddecl); + /* If we're keeping the built-in definition, keep the rtl, + regardless of declaration matches. */ + COPY_DECL_RTL (olddecl, newdecl); +@@ -4335,10 +4335,10 @@ cxx_init_decl_processing (void) + deltype = build_exception_variant (deltype, empty_except_spec); + tree opnew = push_cp_library_fn (NEW_EXPR, newtype, 0); + DECL_IS_MALLOC (opnew) = 1; +- DECL_IS_OPERATOR_NEW (opnew) = 1; ++ DECL_SET_IS_OPERATOR_NEW (opnew, true); + opnew = push_cp_library_fn (VEC_NEW_EXPR, newtype, 0); + DECL_IS_MALLOC (opnew) = 1; +- DECL_IS_OPERATOR_NEW (opnew) = 1; ++ DECL_SET_IS_OPERATOR_NEW (opnew, true); + push_cp_library_fn (DELETE_EXPR, deltype, ECF_NOTHROW); + push_cp_library_fn (VEC_DELETE_EXPR, deltype, ECF_NOTHROW); + if (flag_sized_deallocation) +@@ -4371,10 +4371,10 @@ cxx_init_decl_processing (void) + newtype = build_exception_variant (newtype, new_eh_spec); + opnew = push_cp_library_fn (NEW_EXPR, newtype, 0); + DECL_IS_MALLOC (opnew) = 1; +- DECL_IS_OPERATOR_NEW (opnew) = 1; ++ DECL_SET_IS_OPERATOR_NEW (opnew, true); + opnew = push_cp_library_fn (VEC_NEW_EXPR, newtype, 0); + DECL_IS_MALLOC (opnew) = 1; +- DECL_IS_OPERATOR_NEW (opnew) = 1; ++ DECL_SET_IS_OPERATOR_NEW (opnew, true); + + /* operator delete (void *, align_val_t); */ + deltype = build_function_type_list (void_type_node, ptr_type_node, +@@ -4614,6 +4614,19 @@ cxx_builtin_function_ext_scope (tree decl) + return builtin_function_1 (decl, NULL_TREE, true); + } + ++/* Implement LANG_HOOKS_SIMULATE_BUILTIN_FUNCTION_DECL. */ ++ ++tree ++cxx_simulate_builtin_function_decl (tree decl) ++{ ++ retrofit_lang_decl (decl); ++ ++ DECL_ARTIFICIAL (decl) = 1; ++ SET_DECL_LANGUAGE (decl, lang_cplusplus); ++ DECL_CONTEXT (decl) = FROB_CONTEXT (current_namespace); ++ return pushdecl (decl); ++} ++ + /* Generate a FUNCTION_DECL with the typical flags for a runtime library + function. Not called directly. */ + +@@ -13570,7 +13583,7 @@ grok_op_properties (tree decl, bool complain) + coerce_delete_type (decl, loc); + else + { +- DECL_IS_OPERATOR_NEW (decl) = 1; ++ DECL_SET_IS_OPERATOR_NEW (decl, true); + TREE_TYPE (decl) = coerce_new_type (TREE_TYPE (decl), loc); + } + +@@ -15119,6 +15132,40 @@ lookup_enumerator (tree enumtype, tree name) + return e? TREE_VALUE (e) : NULL_TREE; + } + ++/* Implement LANG_HOOKS_SIMULATE_ENUM_DECL. */ ++ ++tree ++cxx_simulate_enum_decl (location_t loc, const char *name, ++ vec values) ++{ ++ location_t saved_loc = input_location; ++ input_location = loc; ++ ++ tree enumtype = start_enum (get_identifier (name), NULL_TREE, NULL_TREE, ++ NULL_TREE, false, NULL); ++ if (!OPAQUE_ENUM_P (enumtype)) ++ { ++ error_at (loc, "multiple definition of %q#T", enumtype); ++ inform (DECL_SOURCE_LOCATION (TYPE_MAIN_DECL (enumtype)), ++ "previous definition here"); ++ return enumtype; ++ } ++ SET_OPAQUE_ENUM_P (enumtype, false); ++ DECL_SOURCE_LOCATION (TYPE_NAME (enumtype)) = loc; ++ ++ string_int_pair *value; ++ unsigned int i; ++ FOR_EACH_VEC_ELT (values, i, value) ++ build_enumerator (get_identifier (value->first), ++ build_int_cst (integer_type_node, value->second), ++ enumtype, NULL_TREE, loc); ++ ++ finish_enum_value_list (enumtype); ++ finish_enum (enumtype); ++ ++ input_location = saved_loc; ++ return enumtype; ++} + + /* We're defining DECL. Make sure that its type is OK. */ + +diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c +index 60fe58e03..6fc6ed4e3 100644 +--- a/gcc/cp/parser.c ++++ b/gcc/cp/parser.c +@@ -10977,7 +10977,7 @@ cp_parser_lambda_declarator_opt (cp_parser* parser, tree lambda_expr) + DECL_ARTIFICIAL (fco) = 1; + /* Give the object parameter a different name. */ + DECL_NAME (DECL_ARGUMENTS (fco)) = closure_identifier; +- DECL_LAMBDA_FUNCTION (fco) = 1; ++ DECL_SET_LAMBDA_FUNCTION (fco, true); + } + if (template_param_list) + { +diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c +index ff7921533..bd6df79a4 100644 +--- a/gcc/cp/pt.c ++++ b/gcc/cp/pt.c +@@ -28431,9 +28431,8 @@ declare_integer_pack (void) + NULL_TREE), + NULL_TREE, ECF_CONST); + DECL_DECLARED_CONSTEXPR_P (ipfn) = true; +- DECL_BUILT_IN_CLASS (ipfn) = BUILT_IN_FRONTEND; +- DECL_FUNCTION_CODE (ipfn) +- = (enum built_in_function) (int) CP_BUILT_IN_INTEGER_PACK; ++ set_decl_built_in_function (ipfn, BUILT_IN_FRONTEND, ++ CP_BUILT_IN_INTEGER_PACK); + } + + /* Set up the hash tables for template instantiations. */ +diff --git a/gcc/cp/typeck.c b/gcc/cp/typeck.c +index c42fd731c..82f7bb0bd 100644 +--- a/gcc/cp/typeck.c ++++ b/gcc/cp/typeck.c +@@ -3738,11 +3738,11 @@ build_function_call (location_t /*loc*/, + tree + build_function_call_vec (location_t /*loc*/, vec /*arg_loc*/, + tree function, vec *params, +- vec * /*origtypes*/) ++ vec * /*origtypes*/, tree orig_function) + { + vec *orig_params = params; + tree ret = cp_build_function_call_vec (function, ¶ms, +- tf_warning_or_error); ++ tf_warning_or_error, orig_function); + + /* cp_build_function_call_vec can reallocate PARAMS by adding + default arguments. That should never happen here. Verify +@@ -3787,13 +3787,15 @@ cp_build_function_call_nary (tree function, tsubst_flags_t complain, ...) + return ret; + } + +-/* Build a function call using a vector of arguments. PARAMS may be +- NULL if there are no parameters. This changes the contents of +- PARAMS. */ ++/* Build a function call using a vector of arguments. ++ If FUNCTION is the result of resolving an overloaded target built-in, ++ ORIG_FNDECL is the original function decl, otherwise it is null. ++ PARAMS may be NULL if there are no parameters. This changes the ++ contents of PARAMS. */ + + tree + cp_build_function_call_vec (tree function, vec **params, +- tsubst_flags_t complain) ++ tsubst_flags_t complain, tree orig_fndecl) + { + tree fntype, fndecl; + int is_method; +@@ -3918,7 +3920,7 @@ cp_build_function_call_vec (tree function, vec **params, + bool warned_p = check_function_arguments (input_location, fndecl, fntype, + nargs, argarray, NULL); + +- ret = build_cxx_call (function, nargs, argarray, complain); ++ ret = build_cxx_call (function, nargs, argarray, complain, orig_fndecl); + + if (warned_p) + { +diff --git a/gcc/cse.c b/gcc/cse.c +index 6c9cda16a..18eb8dfbb 100644 +--- a/gcc/cse.c ++++ b/gcc/cse.c +@@ -559,7 +559,6 @@ static struct table_elt *insert_with_costs (rtx, struct table_elt *, unsigned, + static struct table_elt *insert (rtx, struct table_elt *, unsigned, + machine_mode); + static void merge_equiv_classes (struct table_elt *, struct table_elt *); +-static void invalidate_reg (rtx, bool); + static void invalidate (rtx, machine_mode); + static void remove_invalid_refs (unsigned int); + static void remove_invalid_subreg_refs (unsigned int, poly_uint64, +@@ -1821,12 +1820,10 @@ check_dependence (const_rtx x, rtx exp, machine_mode mode, rtx addr) + } + + /* Remove from the hash table, or mark as invalid, all expressions whose +- values could be altered by storing in register X. +- +- CLOBBER_HIGH is set if X was part of a CLOBBER_HIGH expression. */ ++ values could be altered by storing in register X. */ + + static void +-invalidate_reg (rtx x, bool clobber_high) ++invalidate_reg (rtx x) + { + gcc_assert (GET_CODE (x) == REG); + +@@ -1851,10 +1848,7 @@ invalidate_reg (rtx x, bool clobber_high) + SUBREG_TICKED (regno) = -1; + + if (regno >= FIRST_PSEUDO_REGISTER) +- { +- gcc_assert (!clobber_high); +- remove_pseudo_from_table (x, hash); +- } ++ remove_pseudo_from_table (x, hash); + else + { + HOST_WIDE_INT in_table = TEST_HARD_REG_BIT (hard_regs_in_table, regno); +@@ -1882,18 +1876,10 @@ invalidate_reg (rtx x, bool clobber_high) + if (!REG_P (p->exp) || REGNO (p->exp) >= FIRST_PSEUDO_REGISTER) + continue; + +- if (clobber_high) +- { +- if (reg_is_clobbered_by_clobber_high (p->exp, x)) +- remove_from_table (p, hash); +- } +- else +- { +- unsigned int tregno = REGNO (p->exp); +- unsigned int tendregno = END_REGNO (p->exp); +- if (tendregno > regno && tregno < endregno) +- remove_from_table (p, hash); +- } ++ unsigned int tregno = REGNO (p->exp); ++ unsigned int tendregno = END_REGNO (p->exp); ++ if (tendregno > regno && tregno < endregno) ++ remove_from_table (p, hash); + } + } + } +@@ -1920,7 +1906,7 @@ invalidate (rtx x, machine_mode full_mode) + switch (GET_CODE (x)) + { + case REG: +- invalidate_reg (x, false); ++ invalidate_reg (x); + return; + + case SUBREG: +@@ -4420,8 +4406,6 @@ canonicalize_insn (rtx_insn *insn, struct set **psets, int n_sets) + if (MEM_P (XEXP (x, 0))) + canon_reg (XEXP (x, 0), insn); + } +- else if (GET_CODE (x) == CLOBBER_HIGH) +- gcc_assert (REG_P (XEXP (x, 0))); + else if (GET_CODE (x) == USE + && ! (REG_P (XEXP (x, 0)) + && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER)) +@@ -4453,8 +4437,6 @@ canonicalize_insn (rtx_insn *insn, struct set **psets, int n_sets) + if (MEM_P (XEXP (y, 0))) + canon_reg (XEXP (y, 0), insn); + } +- else if (GET_CODE (y) == CLOBBER_HIGH) +- gcc_assert (REG_P (XEXP (y, 0))); + else if (GET_CODE (y) == USE + && ! (REG_P (XEXP (y, 0)) + && REGNO (XEXP (y, 0)) < FIRST_PSEUDO_REGISTER)) +@@ -6155,12 +6137,6 @@ invalidate_from_clobbers (rtx_insn *insn) + invalidate (XEXP (ref, 0), GET_MODE (ref)); + } + } +- if (GET_CODE (x) == CLOBBER_HIGH) +- { +- rtx ref = XEXP (x, 0); +- gcc_assert (REG_P (ref)); +- invalidate_reg (ref, true); +- } + else if (GET_CODE (x) == PARALLEL) + { + int i; +@@ -6177,12 +6153,6 @@ invalidate_from_clobbers (rtx_insn *insn) + || GET_CODE (ref) == ZERO_EXTRACT) + invalidate (XEXP (ref, 0), GET_MODE (ref)); + } +- else if (GET_CODE (y) == CLOBBER_HIGH) +- { +- rtx ref = XEXP (y, 0); +- gcc_assert (REG_P (ref)); +- invalidate_reg (ref, true); +- } + } + } + } +@@ -6204,12 +6174,6 @@ invalidate_from_sets_and_clobbers (rtx_insn *insn) + rtx temx = XEXP (tem, 0); + if (GET_CODE (temx) == CLOBBER) + invalidate (SET_DEST (temx), VOIDmode); +- else if (GET_CODE (temx) == CLOBBER_HIGH) +- { +- rtx temref = XEXP (temx, 0); +- gcc_assert (REG_P (temref)); +- invalidate_reg (temref, true); +- } + } + } + +@@ -6237,12 +6201,6 @@ invalidate_from_sets_and_clobbers (rtx_insn *insn) + || GET_CODE (clobbered) == ZERO_EXTRACT) + invalidate (XEXP (clobbered, 0), GET_MODE (clobbered)); + } +- else if (GET_CODE (y) == CLOBBER_HIGH) +- { +- rtx ref = XEXP (y, 0); +- gcc_assert (REG_P (ref)); +- invalidate_reg (ref, true); +- } + else if (GET_CODE (y) == SET && GET_CODE (SET_SRC (y)) == CALL) + invalidate (SET_DEST (y), VOIDmode); + } +@@ -6902,10 +6860,6 @@ count_reg_usage (rtx x, int *counts, rtx dest, int incr) + count_reg_usage (XEXP (XEXP (x, 0), 0), counts, NULL_RTX, incr); + return; + +- case CLOBBER_HIGH: +- gcc_assert (REG_P ((XEXP (x, 0)))); +- return; +- + case SET: + /* Unless we are setting a REG, count everything in SET_DEST. */ + if (!REG_P (SET_DEST (x))) +@@ -6958,8 +6912,7 @@ count_reg_usage (rtx x, int *counts, rtx dest, int incr) + || (REG_NOTE_KIND (x) != REG_NONNEG && GET_CODE (XEXP (x,0)) == USE) + /* FUNCTION_USAGE expression lists may include (CLOBBER (mem /u)), + involving registers in the address. */ +- || GET_CODE (XEXP (x, 0)) == CLOBBER +- || GET_CODE (XEXP (x, 0)) == CLOBBER_HIGH) ++ || GET_CODE (XEXP (x, 0)) == CLOBBER) + count_reg_usage (XEXP (x, 0), counts, NULL_RTX, incr); + + count_reg_usage (XEXP (x, 1), counts, NULL_RTX, incr); +@@ -7043,9 +6996,7 @@ insn_live_p (rtx_insn *insn, int *counts) + if (set_live_p (elt, insn, counts)) + return true; + } +- else if (GET_CODE (elt) != CLOBBER +- && GET_CODE (elt) != CLOBBER_HIGH +- && GET_CODE (elt) != USE) ++ else if (GET_CODE (elt) != CLOBBER && GET_CODE (elt) != USE) + return true; + } + return false; +@@ -7158,7 +7109,7 @@ delete_trivially_dead_insns (rtx_insn *insns, int nreg) + else if (INSN_P (insn)) + { + count_reg_usage (insn, counts, NULL_RTX, 1); +- note_stores (PATTERN (insn), count_stores, counts + nreg * 2); ++ note_stores (insn, count_stores, counts + nreg * 2); + } + /* If there can be debug insns, COUNTS are 3 consecutive arrays. + First one counts how many times each pseudo is used outside +diff --git a/gcc/cselib.c b/gcc/cselib.c +index 108b2588c..e3408bb38 100644 +--- a/gcc/cselib.c ++++ b/gcc/cselib.c +@@ -32,6 +32,7 @@ along with GCC; see the file COPYING3. If not see + #include "dumpfile.h" + #include "cselib.h" + #include "params.h" ++#include "function-abi.h" + + /* A list of cselib_val structures. */ + struct elt_list +@@ -54,8 +55,7 @@ static unsigned int cselib_hash_rtx (rtx, int, machine_mode); + static cselib_val *new_cselib_val (unsigned int, machine_mode, rtx); + static void add_mem_for_addr (cselib_val *, cselib_val *, rtx); + static cselib_val *cselib_lookup_mem (rtx, int); +-static void cselib_invalidate_regno (unsigned int, machine_mode, +- const_rtx = NULL); ++static void cselib_invalidate_regno (unsigned int, machine_mode); + static void cselib_invalidate_mem (rtx); + static void cselib_record_set (rtx, cselib_val *, cselib_val *); + static void cselib_record_sets (rtx_insn *); +@@ -1662,7 +1662,6 @@ cselib_expand_value_rtx_1 (rtx orig, struct expand_value_data *evd, + /* SCRATCH must be shared because they represent distinct values. */ + return orig; + case CLOBBER: +- case CLOBBER_HIGH: + if (REG_P (XEXP (orig, 0)) && HARD_REGISTER_NUM_P (REGNO (XEXP (orig, 0)))) + return orig; + break; +@@ -2165,8 +2164,7 @@ cselib_lookup (rtx x, machine_mode mode, + invalidating call clobbered registers across a call. */ + + static void +-cselib_invalidate_regno (unsigned int regno, machine_mode mode, +- const_rtx setter) ++cselib_invalidate_regno (unsigned int regno, machine_mode mode) + { + unsigned int endregno; + unsigned int i; +@@ -2189,9 +2187,6 @@ cselib_invalidate_regno (unsigned int regno, machine_mode mode, + i = regno - max_value_regs; + + endregno = end_hard_regno (mode, regno); +- +- if (setter && GET_CODE (setter) == CLOBBER_HIGH) +- gcc_assert (endregno == regno + 1); + } + else + { +@@ -2224,19 +2219,6 @@ cselib_invalidate_regno (unsigned int regno, machine_mode mode, + continue; + } + +- /* Ignore if clobber high and the register isn't clobbered. */ +- if (setter && GET_CODE (setter) == CLOBBER_HIGH) +- { +- gcc_assert (endregno == regno + 1); +- const_rtx x = XEXP (setter, 0); +- if (!reg_is_clobbered_by_clobber_high (i, GET_MODE (v->val_rtx), +- x)) +- { +- l = &(*l)->next; +- continue; +- } +- } +- + /* We have an overlap. */ + if (*l == REG_VALUES (i)) + { +@@ -2371,10 +2353,10 @@ cselib_invalidate_mem (rtx mem_rtx) + *vp = &dummy_val; + } + +-/* Invalidate DEST, which is being assigned to or clobbered by SETTER. */ ++/* Invalidate DEST. */ + + void +-cselib_invalidate_rtx (rtx dest, const_rtx setter) ++cselib_invalidate_rtx (rtx dest) + { + while (GET_CODE (dest) == SUBREG + || GET_CODE (dest) == ZERO_EXTRACT +@@ -2382,7 +2364,7 @@ cselib_invalidate_rtx (rtx dest, const_rtx setter) + dest = XEXP (dest, 0); + + if (REG_P (dest)) +- cselib_invalidate_regno (REGNO (dest), GET_MODE (dest), setter); ++ cselib_invalidate_regno (REGNO (dest), GET_MODE (dest)); + else if (MEM_P (dest)) + cselib_invalidate_mem (dest); + } +@@ -2390,10 +2372,10 @@ cselib_invalidate_rtx (rtx dest, const_rtx setter) + /* A wrapper for cselib_invalidate_rtx to be called via note_stores. */ + + static void +-cselib_invalidate_rtx_note_stores (rtx dest, const_rtx setter, ++cselib_invalidate_rtx_note_stores (rtx dest, const_rtx, + void *data ATTRIBUTE_UNUSED) + { +- cselib_invalidate_rtx (dest, setter); ++ cselib_invalidate_rtx (dest); + } + + /* Record the result of a SET instruction. DEST is being set; the source +@@ -2659,7 +2641,7 @@ cselib_record_sets (rtx_insn *insn) + /* Invalidate all locations written by this insn. Note that the elts we + looked up in the previous loop aren't affected, just some of their + locations may go away. */ +- note_stores (body, cselib_invalidate_rtx_note_stores, NULL); ++ note_pattern_stores (body, cselib_invalidate_rtx_note_stores, NULL); + + for (i = n_sets_before_autoinc; i < n_sets; i++) + cselib_invalidate_rtx (sets[i].dest); +@@ -2765,11 +2747,13 @@ cselib_process_insn (rtx_insn *insn) + memory. */ + if (CALL_P (insn)) + { ++ function_abi callee_abi = insn_callee_abi (insn); + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) +- if (call_used_regs[i] ++ if (call_used_or_fixed_reg_p (i) + || (REG_VALUES (i) && REG_VALUES (i)->elt + && (targetm.hard_regno_call_part_clobbered +- (insn, i, GET_MODE (REG_VALUES (i)->elt->val_rtx))))) ++ (callee_abi.id (), i, ++ GET_MODE (REG_VALUES (i)->elt->val_rtx))))) + cselib_invalidate_regno (i, reg_raw_mode[i]); + + /* Since it is not clear how cselib is going to be used, be +@@ -2794,11 +2778,9 @@ cselib_process_insn (rtx_insn *insn) + if (CALL_P (insn)) + { + for (x = CALL_INSN_FUNCTION_USAGE (insn); x; x = XEXP (x, 1)) +- { +- gcc_assert (GET_CODE (XEXP (x, 0)) != CLOBBER_HIGH); +- if (GET_CODE (XEXP (x, 0)) == CLOBBER) +- cselib_invalidate_rtx (XEXP (XEXP (x, 0), 0)); +- } ++ if (GET_CODE (XEXP (x, 0)) == CLOBBER) ++ cselib_invalidate_rtx (XEXP (XEXP (x, 0), 0)); ++ + /* Flush everything on setjmp. */ + if (cselib_preserve_constants + && find_reg_note (insn, REG_SETJMP, NULL)) +diff --git a/gcc/cselib.h b/gcc/cselib.h +index 8b8d3e8d5..b5854aedc 100644 +--- a/gcc/cselib.h ++++ b/gcc/cselib.h +@@ -92,7 +92,7 @@ extern bool cselib_dummy_expand_value_rtx_cb (rtx, bitmap, int, + cselib_expand_callback, void *); + extern rtx cselib_subst_to_values (rtx, machine_mode); + extern rtx cselib_subst_to_values_from_insn (rtx, machine_mode, rtx_insn *); +-extern void cselib_invalidate_rtx (rtx, const_rtx = NULL); ++extern void cselib_invalidate_rtx (rtx); + + extern void cselib_reset_table (unsigned int); + extern unsigned int cselib_get_next_uid (void); +diff --git a/gcc/d/intrinsics.cc b/gcc/d/intrinsics.cc +index 4bd321b2d..56eab522e 100644 +--- a/gcc/d/intrinsics.cc ++++ b/gcc/d/intrinsics.cc +@@ -134,10 +134,7 @@ maybe_set_intrinsic (FuncDeclaration *decl) + /* If there is no function body, then the implementation is always + provided by the compiler. */ + if (!decl->fbody) +- { +- DECL_BUILT_IN_CLASS (decl->csym) = BUILT_IN_FRONTEND; +- DECL_FUNCTION_CODE (decl->csym) = (built_in_function) code; +- } ++ set_decl_built_in_function (decl->csym, BUILT_IN_FRONTEND, code); + + /* Infer whether the intrinsic can be used for CTFE, let the + front-end know that it can be evaluated at compile-time. */ +diff --git a/gcc/dce.c b/gcc/dce.c +index 68d3713b0..2894fa57b 100644 +--- a/gcc/dce.c ++++ b/gcc/dce.c +@@ -174,7 +174,6 @@ deletable_insn_p (rtx_insn *insn, bool fast, bitmap arg_stores) + return false; + + case CLOBBER: +- case CLOBBER_HIGH: + if (fast) + { + /* A CLOBBER of a dead pseudo register serves no purpose. +@@ -244,10 +243,7 @@ static void + mark_nonreg_stores_1 (rtx dest, const_rtx pattern, void *data) + { + if (GET_CODE (pattern) != CLOBBER && !REG_P (dest)) +- { +- gcc_checking_assert (GET_CODE (pattern) != CLOBBER_HIGH); +- mark_insn ((rtx_insn *) data, true); +- } ++ mark_insn ((rtx_insn *) data, true); + } + + +@@ -258,22 +254,19 @@ static void + mark_nonreg_stores_2 (rtx dest, const_rtx pattern, void *data) + { + if (GET_CODE (pattern) != CLOBBER && !REG_P (dest)) +- { +- gcc_checking_assert (GET_CODE (pattern) != CLOBBER_HIGH); +- mark_insn ((rtx_insn *) data, false); +- } ++ mark_insn ((rtx_insn *) data, false); + } + + +-/* Mark INSN if BODY stores to a non-register destination. */ ++/* Mark INSN if it stores to a non-register destination. */ + + static void +-mark_nonreg_stores (rtx body, rtx_insn *insn, bool fast) ++mark_nonreg_stores (rtx_insn *insn, bool fast) + { + if (fast) +- note_stores (body, mark_nonreg_stores_1, insn); ++ note_stores (insn, mark_nonreg_stores_1, insn); + else +- note_stores (body, mark_nonreg_stores_2, insn); ++ note_stores (insn, mark_nonreg_stores_2, insn); + } + + +@@ -691,7 +684,7 @@ prescan_insns_for_dce (bool fast) + if (arg_stores && bitmap_bit_p (arg_stores, INSN_UID (insn))) + continue; + if (deletable_insn_p (insn, fast, arg_stores)) +- mark_nonreg_stores (PATTERN (insn), insn, fast); ++ mark_nonreg_stores (insn, fast); + else + mark_insn (insn, fast); + } +diff --git a/gcc/ddg.c b/gcc/ddg.c +index 82554ed96..47a50d8ea 100644 +--- a/gcc/ddg.c ++++ b/gcc/ddg.c +@@ -84,7 +84,7 @@ static bool + mem_write_insn_p (rtx_insn *insn) + { + mem_ref_p = false; +- note_stores (PATTERN (insn), mark_mem_store, NULL); ++ note_stores (insn, mark_mem_store, NULL); + return mem_ref_p; + } + +diff --git a/gcc/defaults.h b/gcc/defaults.h +index b75342561..72d4fba11 100644 +--- a/gcc/defaults.h ++++ b/gcc/defaults.h +@@ -1318,10 +1318,10 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + #endif + + /* If a memory-to-memory move would take MOVE_RATIO or more simple +- move-instruction sequences, we will do a movmem or libcall instead. */ ++ move-instruction sequences, we will do a cpymem or libcall instead. */ + + #ifndef MOVE_RATIO +-#if defined (HAVE_movmemqi) || defined (HAVE_movmemhi) || defined (HAVE_movmemsi) || defined (HAVE_movmemdi) || defined (HAVE_movmemti) ++#if defined (HAVE_cpymemqi) || defined (HAVE_cpymemhi) || defined (HAVE_cpymemsi) || defined (HAVE_cpymemdi) || defined (HAVE_cpymemti) + #define MOVE_RATIO(speed) 2 + #else + /* If we are optimizing for space (-Os), cut down the default move ratio. */ +@@ -1342,7 +1342,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + #endif + + /* If a memory set (to value other than zero) operation would take +- SET_RATIO or more simple move-instruction sequences, we will do a movmem ++ SET_RATIO or more simple move-instruction sequences, we will do a setmem + or libcall instead. */ + #ifndef SET_RATIO + #define SET_RATIO(speed) MOVE_RATIO (speed) +@@ -1459,4 +1459,18 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + #define DWARF_GNAT_ENCODINGS_DEFAULT DWARF_GNAT_ENCODINGS_GDB + #endif + ++#ifndef USED_FOR_TARGET ++/* Done this way to keep gengtype happy. */ ++#if BITS_PER_UNIT == 8 ++#define TARGET_UNIT uint8_t ++#elif BITS_PER_UNIT == 16 ++#define TARGET_UNIT uint16_t ++#elif BITS_PER_UNIT == 32 ++#define TARGET_UNIT uint32_t ++#else ++#error Unknown BITS_PER_UNIT ++#endif ++typedef TARGET_UNIT target_unit; ++#endif ++ + #endif /* ! GCC_DEFAULTS_H */ +diff --git a/gcc/df-core.c b/gcc/df-core.c +index b19ba289d..2181ff131 100644 +--- a/gcc/df-core.c ++++ b/gcc/df-core.c +@@ -2052,7 +2052,7 @@ debug_regset (regset r) + This is part of making a debugging dump. */ + + void +-df_print_regset (FILE *file, bitmap r) ++df_print_regset (FILE *file, const_bitmap r) + { + unsigned int i; + bitmap_iterator bi; +@@ -2077,7 +2077,7 @@ df_print_regset (FILE *file, bitmap r) + debugging dump. */ + + void +-df_print_word_regset (FILE *file, bitmap r) ++df_print_word_regset (FILE *file, const_bitmap r) + { + unsigned int max_reg = max_reg_num (); + +diff --git a/gcc/df-problems.c b/gcc/df-problems.c +index a9dfa6203..3c7aeceb2 100644 +--- a/gcc/df-problems.c ++++ b/gcc/df-problems.c +@@ -388,7 +388,6 @@ df_rd_local_compute (bitmap all_blocks) + { + unsigned int bb_index; + bitmap_iterator bi; +- unsigned int regno; + struct df_rd_problem_data *problem_data + = (struct df_rd_problem_data *) df_rd->problem_data; + bitmap sparse_invalidated = &problem_data->sparse_invalidated_by_call; +@@ -405,10 +404,9 @@ df_rd_local_compute (bitmap all_blocks) + } + + /* Set up the knockout bit vectors to be applied across EH_EDGES. */ +- EXECUTE_IF_SET_IN_BITMAP (regs_invalidated_by_call_regset, 0, regno, bi) +- { +- if (! HARD_REGISTER_NUM_P (regno) +- || !(df->changeable_flags & DF_NO_HARD_REGS)) ++ if (!(df->changeable_flags & DF_NO_HARD_REGS)) ++ for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno) ++ if (TEST_HARD_REG_BIT (regs_invalidated_by_call, regno)) + { + if (DF_DEFS_COUNT (regno) > DF_SPARSE_THRESHOLD) + bitmap_set_bit (sparse_invalidated, regno); +@@ -417,7 +415,6 @@ df_rd_local_compute (bitmap all_blocks) + DF_DEFS_BEGIN (regno), + DF_DEFS_COUNT (regno)); + } +- } + + bitmap_release (&seen_in_block); + bitmap_release (&seen_in_insn); +@@ -982,7 +979,10 @@ df_lr_confluence_n (edge e) + /* ??? Abnormal call edges ignored for the moment, as this gets + confused by sibling call edges, which crashes reg-stack. */ + if (e->flags & EDGE_EH) +- changed = bitmap_ior_and_compl_into (op1, op2, regs_invalidated_by_call_regset); ++ { ++ bitmap_view eh_kills (regs_invalidated_by_call); ++ changed = bitmap_ior_and_compl_into (op1, op2, eh_kills); ++ } + else + changed = bitmap_ior_into (op1, op2); + +@@ -4093,8 +4093,7 @@ can_move_insns_across (rtx_insn *from, rtx_insn *to, + if (volatile_insn_p (PATTERN (insn))) + return false; + memrefs_in_across |= find_memory (insn); +- note_stores (PATTERN (insn), find_memory_stores, +- &mem_sets_in_across); ++ note_stores (insn, find_memory_stores, &mem_sets_in_across); + /* This is used just to find sets of the stack pointer. */ + memrefs_in_across |= mem_sets_in_across; + trapping_insns_in_across |= may_trap_p (PATTERN (insn)); +@@ -4173,7 +4172,7 @@ can_move_insns_across (rtx_insn *from, rtx_insn *to, + { + int mem_ref_flags = 0; + int mem_set_flags = 0; +- note_stores (PATTERN (insn), find_memory_stores, &mem_set_flags); ++ note_stores (insn, find_memory_stores, &mem_set_flags); + mem_ref_flags = find_memory (insn); + /* Catch sets of the stack pointer. */ + mem_ref_flags |= mem_set_flags; +@@ -4635,8 +4634,10 @@ df_md_confluence_n (edge e) + return false; + + if (e->flags & EDGE_EH) +- return bitmap_ior_and_compl_into (op1, op2, +- regs_invalidated_by_call_regset); ++ { ++ bitmap_view eh_kills (regs_invalidated_by_call); ++ return bitmap_ior_and_compl_into (op1, op2, eh_kills); ++ } + else + return bitmap_ior_into (op1, op2); + } +diff --git a/gcc/df-scan.c b/gcc/df-scan.c +index 84c2e54c8..ea149c6cc 100644 +--- a/gcc/df-scan.c ++++ b/gcc/df-scan.c +@@ -35,7 +35,7 @@ along with GCC; see the file COPYING3. If not see + #include "emit-rtl.h" /* FIXME: Can go away once crtl is moved to rtl.h. */ + #include "dumpfile.h" + #include "calls.h" +- ++#include "function-abi.h" + + /* The set of hard registers in eliminables[i].from. */ + +@@ -312,7 +312,7 @@ df_scan_start_dump (FILE *file ATTRIBUTE_UNUSED) + rtx_insn *insn; + + fprintf (file, ";; invalidated by call \t"); +- df_print_regset (file, regs_invalidated_by_call_regset); ++ df_print_regset (file, bitmap_view (regs_invalidated_by_call)); + fprintf (file, ";; hardware regs used \t"); + df_print_regset (file, &df->hardware_regs_used); + fprintf (file, ";; regular block artificial uses \t"); +@@ -2773,7 +2773,6 @@ df_find_hard_reg_defs (rtx x, HARD_REG_SET *defs) + break; + + case CLOBBER: +- case CLOBBER_HIGH: + df_find_hard_reg_defs_1 (XEXP (x, 0), defs); + break; + +@@ -2833,10 +2832,6 @@ df_uses_record (struct df_collection_rec *collection_rec, + /* If we're clobbering a REG then we have a def so ignore. */ + return; + +- case CLOBBER_HIGH: +- gcc_assert (REG_P (XEXP (x, 0))); +- return; +- + case MEM: + df_uses_record (collection_rec, + &XEXP (x, 0), DF_REF_REG_MEM_LOAD, +@@ -3087,13 +3082,11 @@ df_get_call_refs (struct df_collection_rec *collection_rec, + bool is_sibling_call; + unsigned int i; + HARD_REG_SET defs_generated; +- HARD_REG_SET fn_reg_set_usage; + + CLEAR_HARD_REG_SET (defs_generated); + df_find_hard_reg_defs (PATTERN (insn_info->insn), &defs_generated); + is_sibling_call = SIBLING_CALL_P (insn_info->insn); +- get_call_reg_set_usage (insn_info->insn, &fn_reg_set_usage, +- regs_invalidated_by_call); ++ function_abi callee_abi = insn_callee_abi (insn_info->insn); + + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) + { +@@ -3117,7 +3110,7 @@ df_get_call_refs (struct df_collection_rec *collection_rec, + NULL, bb, insn_info, DF_REF_REG_DEF, flags); + } + } +- else if (TEST_HARD_REG_BIT (fn_reg_set_usage, i) ++ else if (callee_abi.clobbers_full_reg_p (i) + /* no clobbers for regs that are the result of the call */ + && !TEST_HARD_REG_BIT (defs_generated, i) + && (!is_sibling_call +@@ -3133,7 +3126,6 @@ df_get_call_refs (struct df_collection_rec *collection_rec, + for (note = CALL_INSN_FUNCTION_USAGE (insn_info->insn); note; + note = XEXP (note, 1)) + { +- gcc_assert (GET_CODE (XEXP (note, 0)) != CLOBBER_HIGH); + if (GET_CODE (XEXP (note, 0)) == USE) + df_uses_record (collection_rec, &XEXP (XEXP (note, 0), 0), + DF_REF_REG_USE, bb, insn_info, flags); +@@ -3499,7 +3491,9 @@ df_get_entry_block_def_set (bitmap entry_block_defs) + /* Defs for the callee saved registers are inserted so that the + pushes have some defining location. */ + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) +- if ((call_used_regs[i] == 0) && (df_regs_ever_live_p (i))) ++ if (!crtl->abi->clobbers_full_reg_p (i) ++ && !fixed_regs[i] ++ && df_regs_ever_live_p (i)) + bitmap_set_bit (entry_block_defs, i); + } + +@@ -3682,8 +3676,9 @@ df_get_exit_block_use_set (bitmap exit_block_uses) + { + /* Mark all call-saved registers that we actually used. */ + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) +- if (df_regs_ever_live_p (i) && !LOCAL_REGNO (i) +- && !TEST_HARD_REG_BIT (regs_invalidated_by_call, i)) ++ if (df_regs_ever_live_p (i) ++ && !LOCAL_REGNO (i) ++ && !crtl->abi->clobbers_full_reg_p (i)) + bitmap_set_bit (exit_block_uses, i); + } + +diff --git a/gcc/df.h b/gcc/df.h +index d76d31baa..241812235 100644 +--- a/gcc/df.h ++++ b/gcc/df.h +@@ -984,8 +984,8 @@ extern bool df_reg_defined (rtx_insn *, rtx); + extern df_ref df_find_use (rtx_insn *, rtx); + extern bool df_reg_used (rtx_insn *, rtx); + extern void df_worklist_dataflow (struct dataflow *,bitmap, int *, int); +-extern void df_print_regset (FILE *file, bitmap r); +-extern void df_print_word_regset (FILE *file, bitmap r); ++extern void df_print_regset (FILE *file, const_bitmap r); ++extern void df_print_word_regset (FILE *file, const_bitmap r); + extern void df_dump (FILE *); + extern void df_dump_region (FILE *); + extern void df_dump_start (FILE *); +diff --git a/gcc/diagnostic-color.c b/gcc/diagnostic-color.c +index 69e759ff6..abc919f63 100644 +--- a/gcc/diagnostic-color.c ++++ b/gcc/diagnostic-color.c +@@ -19,6 +19,7 @@ + #include "config.h" + #include "system.h" + #include "diagnostic-color.h" ++#include "diagnostic-url.h" + + #ifdef __MINGW32__ + # include +@@ -236,3 +237,22 @@ colorize_init (diagnostic_color_rule_t rule) + gcc_unreachable (); + } + } ++ ++/* Determine if URLs should be enabled, based on RULE. ++ This reuses the logic for colorization. */ ++ ++bool ++diagnostic_urls_enabled_p (diagnostic_url_rule_t rule) ++{ ++ switch (rule) ++ { ++ case DIAGNOSTICS_URL_NO: ++ return false; ++ case DIAGNOSTICS_URL_YES: ++ return true; ++ case DIAGNOSTICS_URL_AUTO: ++ return should_colorize (); ++ default: ++ gcc_unreachable (); ++ } ++} +diff --git a/gcc/diagnostic-url.h b/gcc/diagnostic-url.h +new file mode 100644 +index 000000000..ce0de459f +--- /dev/null ++++ b/gcc/diagnostic-url.h +@@ -0,0 +1,36 @@ ++/* Copyright (C) 2019 Free Software Foundation, Inc. ++ Contributed by David Malcolm . ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify it under ++the terms of the GNU General Public License as published by the Free ++Software Foundation; either version 3, or (at your option) any later ++version. ++ ++GCC is distributed in the hope that it will be useful, but WITHOUT ANY ++WARRANTY; without even the implied warranty of MERCHANTABILITY or ++FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++#ifndef GCC_DIAGNOSTIC_URL_H ++#define GCC_DIAGNOSTIC_URL_H ++ ++/* Whether to add URLs to diagnostics: ++ - DIAGNOSTICS_URL_NO: never ++ - DIAGNOSTICS_URL_YES: always ++ - DIAGNOSTICS_URL_AUTO: depending on the output stream. */ ++typedef enum ++{ ++ DIAGNOSTICS_URL_NO = 0, ++ DIAGNOSTICS_URL_YES = 1, ++ DIAGNOSTICS_URL_AUTO = 2 ++} diagnostic_url_rule_t; ++ ++extern bool diagnostic_urls_enabled_p (diagnostic_url_rule_t); ++ ++#endif /* ! GCC_DIAGNOSTIC_URL_H */ +diff --git a/gcc/diagnostic.c b/gcc/diagnostic.c +index be6b65722..a9acda7cc 100644 +--- a/gcc/diagnostic.c ++++ b/gcc/diagnostic.c +@@ -31,6 +31,7 @@ along with GCC; see the file COPYING3. If not see + #include "backtrace.h" + #include "diagnostic.h" + #include "diagnostic-color.h" ++#include "diagnostic-url.h" + #include "edit-context.h" + #include "selftest.h" + #include "selftest-diagnostic.h" +@@ -238,6 +239,18 @@ diagnostic_color_init (diagnostic_context *context, int value /*= -1 */) + = colorize_init ((diagnostic_color_rule_t) value); + } + ++/* Initialize URL support within CONTEXT based on VALUE, handling "auto". */ ++ ++void ++diagnostic_urls_init (diagnostic_context *context, int value /*= -1 */) ++{ ++ if (value < 0) ++ value = DIAGNOSTICS_COLOR_DEFAULT; ++ ++ context->printer->show_urls ++ = diagnostic_urls_enabled_p ((diagnostic_url_rule_t) value); ++} ++ + /* Do any cleaning up required after the last diagnostic is emitted. */ + + void +diff --git a/gcc/diagnostic.h b/gcc/diagnostic.h +index 46c3b50a5..5daf4f288 100644 +--- a/gcc/diagnostic.h ++++ b/gcc/diagnostic.h +@@ -328,6 +328,7 @@ diagnostic_override_option_index (diagnostic_info *info, int optidx) + /* Diagnostic related functions. */ + extern void diagnostic_initialize (diagnostic_context *, int); + extern void diagnostic_color_init (diagnostic_context *, int value = -1); ++extern void diagnostic_urls_init (diagnostic_context *, int value = -1); + extern void diagnostic_finish (diagnostic_context *); + extern void diagnostic_report_current_module (diagnostic_context *, location_t); + extern void diagnostic_show_locus (diagnostic_context *, +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index 9c87792ff..e366ab923 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -271,6 +271,7 @@ Objective-C and Objective-C++ Dialects}. + @gccoptlist{-fmessage-length=@var{n} @gol + -fdiagnostics-show-location=@r{[}once@r{|}every-line@r{]} @gol + -fdiagnostics-color=@r{[}auto@r{|}never@r{|}always@r{]} @gol ++-fdiagnostics-urls=@r{[}auto@r{|}never@r{|}always@r{]} @gol + -fdiagnostics-format=@r{[}text@r{|}json@r{]} @gol + -fno-diagnostics-show-option -fno-diagnostics-show-caret @gol + -fno-diagnostics-show-labels -fno-diagnostics-show-line-numbers @gol +@@ -403,8 +404,7 @@ Objective-C and Objective-C++ Dialects}. + -fallow-store-data-races @gol + -fassociative-math -fauto-profile -fauto-profile[=@var{path}] @gol + -fauto-inc-dec -fbranch-probabilities @gol +--fbranch-target-load-optimize -fbranch-target-load-optimize2 @gol +--fbtr-bb-exclusive -fcaller-saves @gol ++-fcaller-saves @gol + -fcombine-stack-adjustments -fconserve-stack @gol + -fcompare-elim -fcprop-registers -fcrossjumping @gol + -fcse-follow-jumps -fcse-skip-blocks -fcx-fortran-rules @gol +@@ -636,11 +636,13 @@ Objective-C and Objective-C++ Dialects}. + -mlow-precision-recip-sqrt -mlow-precision-sqrt -mlow-precision-div @gol + -mpc-relative-literal-loads @gol + -msign-return-address=@var{scope} @gol +--mbranch-protection=@var{none}|@var{standard}|@var{pac-ret}[+@var{leaf}]|@var{bti} @gol ++-mbranch-protection=@var{none}|@var{standard}|@var{pac-ret}[+@var{leaf} +++@var{b-key}]|@var{bti} @gol + -march=@var{name} -mcpu=@var{name} -mtune=@var{name} @gol + -moverride=@var{string} -mverbose-cost-dump @gol + -mstack-protector-guard=@var{guard} -mstack-protector-guard-reg=@var{sysreg} @gol +--mstack-protector-guard-offset=@var{offset} -mtrack-speculation } ++-mstack-protector-guard-offset=@var{offset} -mtrack-speculation @gol ++-moutline-atomics } + + @emph{Adapteva Epiphany Options} + @gccoptlist{-mhalf-reg-file -mprefer-short-insn-regs @gol +@@ -3885,6 +3887,18 @@ SGR substring for highlighting mismatching types within template + arguments in the C++ frontend. + @end table + ++@item -fdiagnostics-urls[=@var{WHEN}] ++@opindex fdiagnostics-urls ++@cindex urls ++Use escape sequences to embed URLs in diagnostics. For example, when ++@option{-fdiagnostics-show-option} emits text showing the command-line ++option controlling a diagnostic, embed a URL for documentation of that ++option. ++ ++@var{WHEN} is @samp{never}, @samp{always}, or @samp{auto}. ++The default is @samp{auto}, which means to use URL escape sequences only ++when the standard error is a terminal. ++ + @item -fno-diagnostics-show-option + @opindex fno-diagnostics-show-option + @opindex fdiagnostics-show-option +@@ -8295,6 +8309,7 @@ also turns on the following optimization flags: + -ffinite-loops @gol + -fgcse -fgcse-lm @gol + -fhoist-adjacent-loads @gol ++-finline-functions @gol + -finline-small-functions @gol + -findirect-inlining @gol + -fipa-bit-cp -fipa-cp -fipa-icf @gol +@@ -8328,7 +8343,6 @@ by @option{-O2} and also turns on the following optimization flags: + + @c Please keep the following list alphabetized! + @gccoptlist{-fgcse-after-reload @gol +--finline-functions @gol + -fipa-cp-clone + -floop-interchange @gol + -floop-unroll-and-jam @gol +@@ -8386,10 +8400,10 @@ no effect. Otherwise @option{-Og} enables all @option{-O1} + optimization flags except for those that may interfere with debugging: + + @gccoptlist{-fbranch-count-reg -fdelayed-branch @gol +--fif-conversion -fif-conversion2 @gol ++-fdse -fif-conversion -fif-conversion2 @gol + -finline-functions-called-once @gol + -fmove-loop-invariants -fssa-phiopt @gol +--ftree-bit-ccp -ftree-pta -ftree-sra} ++-ftree-bit-ccp -ftree-dse -ftree-pta -ftree-sra} + + @end table + +@@ -8508,7 +8522,7 @@ If all calls to a given function are integrated, and the function is + declared @code{static}, then the function is normally not output as + assembler code in its own right. + +-Enabled at levels @option{-O3}, @option{-Os}. Also enabled ++Enabled at levels @option{-O2}, @option{-O3}, @option{-Os}. Also enabled + by @option{-fprofile-use} and @option{-fauto-profile}. + + @item -finline-functions-called-once +@@ -10986,24 +11000,6 @@ locations inside a translation unit since the locations are unknown until + link time. An example of such an optimization is relaxing calls to short call + instructions. + +-@item -fbranch-target-load-optimize +-@opindex fbranch-target-load-optimize +-Perform branch target register load optimization before prologue / epilogue +-threading. +-The use of target registers can typically be exposed only during reload, +-thus hoisting loads out of loops and doing inter-block scheduling needs +-a separate optimization pass. +- +-@item -fbranch-target-load-optimize2 +-@opindex fbranch-target-load-optimize2 +-Perform branch target register load optimization after prologue / epilogue +-threading. +- +-@item -fbtr-bb-exclusive +-@opindex fbtr-bb-exclusive +-When performing branch target register load optimization, don't reuse +-branch target registers within any basic block. +- + @item -fstdarg-opt + @opindex fstdarg-opt + Optimize the prologue of variadic argument functions with respect to usage of +@@ -11154,19 +11150,30 @@ when modulo scheduling a loop. Larger values can exponentially increase + compilation time. + + @item max-inline-insns-single +-Several parameters control the tree inliner used in GCC@. +-This number sets the maximum number of instructions (counted in GCC's +-internal representation) in a single function that the tree inliner +-considers for inlining. This only affects functions declared +-inline and methods implemented in a class declaration (C++). ++@item max-inline-insns-single-O2 ++Several parameters control the tree inliner used in GCC@. This number sets the ++maximum number of instructions (counted in GCC's internal representation) in a ++single function that the tree inliner considers for inlining. This only ++affects functions declared inline and methods implemented in a class ++declaration (C++). ++ ++For functions compiled with optimization levels ++@option{-O3} and @option{-Ofast} parameter @option{max-inline-insns-single} is ++applied. In other cases @option{max-inline-insns-single-O2} is applied. ++ + + @item max-inline-insns-auto ++@item max-inline-insns-auto-O2 + When you use @option{-finline-functions} (included in @option{-O3}), + a lot of functions that would otherwise not be considered for inlining + by the compiler are investigated. To those functions, a different + (more restrictive) limit compared to functions declared inline can + be applied. + ++For functions compiled with optimization levels ++@option{-O3} and @option{-Ofast} parameter @option{max-inline-insns-auto} is ++applied. In other cases @option{max-inline-insns-auto-O2} is applied. ++ + @item max-inline-insns-small + This is bound applied to calls which are considered relevant with + @option{-finline-small-functions}. +@@ -11189,11 +11196,16 @@ Same as @option{--param uninlined-function-insns} and + @option{--param uninlined-function-time} but applied to function thunks + + @item inline-min-speedup ++@item inline-min-speedup-O2 + When estimated performance improvement of caller + callee runtime exceeds this + threshold (in percent), the function can be inlined regardless of the limit on + @option{--param max-inline-insns-single} and @option{--param + max-inline-insns-auto}. + ++For functions compiled with optimization levels ++@option{-O3} and @option{-Ofast} parameter @option{inline-min-speedup} is ++applied. In other cases @option{inline-min-speedup-O2} is applied. ++ + @item large-function-insns + The limit specifying really large functions. For functions larger than this + limit after inlining, inlining is constrained by +@@ -11271,9 +11283,14 @@ via a given call expression. This parameter limits inlining only to call + expressions whose probability exceeds the given threshold (in percents). + + @item early-inlining-insns ++@item early-inlining-insns-O2 + Specify growth that the early inliner can make. In effect it increases + the amount of inlining for code having a large abstraction penalty. + ++For functions compiled with optimization levels ++@option{-O3} and @option{-Ofast} parameter @option{early-inlining-insns} is ++applied. In other cases @option{early-inlining-insns-O2} is applied. ++ + @item max-early-inliner-iterations + Limit of iterations of the early inliner. This basically bounds + the number of nested indirect calls the early inliner can resolve. +@@ -15816,31 +15833,38 @@ be used by the compiler when expanding calls to + @code{__builtin_speculation_safe_copy} to permit a more efficient code + sequence to be generated. + ++@item -moutline-atomics ++@itemx -mno-outline-atomics ++Enable or disable calls to out-of-line helpers to implement atomic operations. ++These helpers will, at runtime, determine if the LSE instructions from ++ARMv8.1-A can be used; if not, they will use the load/store-exclusive ++instructions that are present in the base ARMv8.0 ISA. ++ ++This option is only applicable when compiling for the base ARMv8.0 ++instruction set. If using a later revision, e.g. @option{-march=armv8.1-a} ++or @option{-march=armv8-a+lse}, the ARMv8.1-Atomics instructions will be ++used directly. The same applies when using @option{-mcpu=} when the ++selected cpu supports the @samp{lse} feature. ++ + @item -march=@var{name} + @opindex march + Specify the name of the target architecture and, optionally, one or + more feature modifiers. This option has the form + @option{-march=@var{arch}@r{@{}+@r{[}no@r{]}@var{feature}@r{@}*}}. + +-The permissible values for @var{arch} are @samp{armv8-a}, +-@samp{armv8.1-a}, @samp{armv8.2-a}, @samp{armv8.3-a}, @samp{armv8.4-a}, +-@samp{armv8.5-a} or @var{native}. +- +-The value @samp{armv8.5-a} implies @samp{armv8.4-a} and enables compiler +-support for the ARMv8.5-A architecture extensions. +- +-The value @samp{armv8.4-a} implies @samp{armv8.3-a} and enables compiler +-support for the ARMv8.4-A architecture extensions. +- +-The value @samp{armv8.3-a} implies @samp{armv8.2-a} and enables compiler +-support for the ARMv8.3-A architecture extensions. +- +-The value @samp{armv8.2-a} implies @samp{armv8.1-a} and enables compiler +-support for the ARMv8.2-A architecture extensions. +- +-The value @samp{armv8.1-a} implies @samp{armv8-a} and enables compiler +-support for the ARMv8.1-A architecture extension. In particular, it +-enables the @samp{+crc}, @samp{+lse}, and @samp{+rdma} features. ++The table below summarizes the permissible values for @var{arch} ++and the features that they enable by default: ++ ++@multitable @columnfractions 0.20 0.20 0.60 ++@headitem @var{arch} value @tab Architecture @tab Includes by default ++@item @samp{armv8-a} @tab Armv8-A @tab @samp{+fp}, @samp{+simd} ++@item @samp{armv8.1-a} @tab Armv8.1-A @tab @samp{armv8-a}, @samp{+crc}, @samp{+lse}, @samp{+rdma} ++@item @samp{armv8.2-a} @tab Armv8.2-A @tab @samp{armv8.1-a} ++@item @samp{armv8.3-a} @tab Armv8.3-A @tab @samp{armv8.2-a} ++@item @samp{armv8.4-a} @tab Armv8.4-A @tab @samp{armv8.3-a}, @samp{+fp16fml}, @samp{+dotprod} ++@item @samp{armv8.5-a} @tab Armv8.5-A @tab @samp{armv8.4-a}, @samp{+sb}, @samp{+ssbs}, @samp{+predres} ++@item @samp{armv8.6-a} @tab Armv8.6-A @tab @samp{armv8.5-a}, @samp{+bf16}, @samp{+i8mm} ++@end multitable + + The value @samp{native} is available on native AArch64 GNU/Linux and + causes the compiler to pick the architecture of the host system. This +@@ -15864,7 +15888,9 @@ Specify the name of the target processor for which GCC should tune the + performance of the code. Permissible values for this option are: + @samp{generic}, @samp{cortex-a35}, @samp{cortex-a53}, @samp{cortex-a55}, + @samp{cortex-a57}, @samp{cortex-a72}, @samp{cortex-a73}, @samp{cortex-a75}, +-@samp{cortex-a76}, @samp{ares}, @samp{exynos-m1}, @samp{emag}, @samp{falkor}, ++@samp{cortex-a76}, @samp{cortex-a76ae}, @samp{cortex-a77}, ++@samp{cortex-a65}, @samp{cortex-a65ae}, @samp{cortex-a34}, ++@samp{ares}, @samp{exynos-m1}, @samp{emag}, @samp{falkor}, + @samp{neoverse-e1},@samp{neoverse-n1},@samp{qdf24xx}, @samp{saphira}, + @samp{phecda}, @samp{xgene1}, @samp{vulcan}, @samp{octeontx}, + @samp{octeontx81}, @samp{octeontx83}, @samp{thunderx}, @samp{thunderxt88}, +@@ -15941,7 +15967,7 @@ functions, and @samp{all}, which enables pointer signing for all functions. The + default value is @samp{none}. This option has been deprecated by + -mbranch-protection. + +-@item -mbranch-protection=@var{none}|@var{standard}|@var{pac-ret}[+@var{leaf}]|@var{bti} ++@item -mbranch-protection=@var{none}|@var{standard}|@var{pac-ret}[+@var{leaf}+@var{b-key}]|@var{bti} + @opindex mbranch-protection + Select the branch protection features to use. + @samp{none} is the default and turns off all types of branch protection. +@@ -15952,7 +15978,8 @@ level. + level: signing functions that save the return address to memory (non-leaf + functions will practically always do this) using the a-key. The optional + argument @samp{leaf} can be used to extend the signing to include leaf +-functions. ++functions. The optional argument @samp{b-key} can be used to sign the functions ++with the B-key instead of the A-key. + @samp{bti} turns on branch target identification mechanism. + + @item -msve-vector-bits=@var{bits} +@@ -16054,6 +16081,37 @@ Enable the Armv8-a Execution and Data Prediction Restriction instructions. + This option is only to enable the extension at the assembler level and does + not affect code generation. This option is enabled by default for + @option{-march=armv8.5-a}. ++@item sve2 ++Enable the Armv8-a Scalable Vector Extension 2. This also enables SVE ++instructions. ++@item sve2-bitperm ++Enable SVE2 bitperm instructions. This also enables SVE2 instructions. ++@item sve2-sm4 ++Enable SVE2 sm4 instructions. This also enables SVE2 instructions. ++@item sve2-aes ++Enable SVE2 aes instructions. This also enables SVE2 instructions. ++@item sve2-sha3 ++Enable SVE2 sha3 instructions. This also enables SVE2 instructions. ++@item tme ++Enable the Transactional Memory Extension. ++@item i8mm ++Enable 8-bit Integer Matrix Multiply instructions. This also enables ++Advanced SIMD and floating-point instructions. This option is enabled by ++default for @option{-march=armv8.6-a}. Use of this option with architectures ++prior to Armv8.2-A is not supported. ++@item f32mm ++Enable 32-bit Floating point Matrix Multiply instructions. This also enables ++SVE instructions. Use of this option with architectures prior to Armv8.2-A is ++not supported. ++@item f64mm ++Enable 64-bit Floating point Matrix Multiply instructions. This also enables ++SVE instructions. Use of this option with architectures prior to Armv8.2-A is ++not supported. ++@item bf16 ++Enable brain half-precision floating-point instructions. This also enables ++Advanced SIMD and floating-point instructions. This option is enabled by ++default for @option{-march=armv8.6-a}. Use of this option with architectures ++prior to Armv8.2-A is not supported. + + @end table + +@@ -28567,8 +28625,9 @@ By default GCC inlines string operations only when the destination is + known to be aligned to least a 4-byte boundary. + This enables more inlining and increases code + size, but may improve performance of code that depends on fast +-@code{memcpy}, @code{strlen}, +-and @code{memset} for short lengths. ++@code{memcpy} and @code{memset} for short lengths. ++The option enables inline expansion of @code{strlen} for all ++pointer alignments. + + @item -minline-stringops-dynamically + @opindex minline-stringops-dynamically +diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi +index 50e13124b..75482d7a2 100644 +--- a/gcc/doc/md.texi ++++ b/gcc/doc/md.texi +@@ -1748,6 +1748,12 @@ The stack pointer register (@code{SP}) + @item w + Floating point register, Advanced SIMD vector register or SVE vector register + ++@item x ++Like @code{w}, but restricted to registers 0 to 15 inclusive. ++ ++@item y ++Like @code{w}, but restricted to registers 0 to 7 inclusive. ++ + @item Upl + One of the low eight SVE predicate registers (@code{P0} to @code{P7}) + +@@ -5470,6 +5476,11 @@ mode @var{m} and the scalars have the mode appropriate for one + element of @var{m}. The operation is strictly in-order: there is + no reassociation. + ++@cindex @code{mask_fold_left_plus_@var{m}} instruction pattern ++@item @code{mask_fold_left_plus_@var{m}} ++Like @samp{fold_left_plus_@var{m}}, but takes an additional mask operand ++(operand 3) that specifies which elements of the source vector should be added. ++ + @cindex @code{sdot_prod@var{m}} instruction pattern + @item @samp{sdot_prod@var{m}} + @cindex @code{udot_prod@var{m}} instruction pattern +@@ -5499,6 +5510,44 @@ operand 1. Add operand 1 to operand 2 and place the widened result in + operand 0. (This is used express accumulation of elements into an accumulator + of a wider mode.) + ++@cindex @code{smulhs@var{m3}} instruction pattern ++@item @samp{smulhs@var{m3}} ++@cindex @code{umulhs@var{m3}} instruction pattern ++@itemx @samp{umulhs@var{m3}} ++Signed/unsigned multiply high with scale. This is equivalent to the C code: ++@smallexample ++narrow op0, op1, op2; ++@dots{} ++op0 = (narrow) (((wide) op1 * (wide) op2) >> (N / 2 - 1)); ++@end smallexample ++where the sign of @samp{narrow} determines whether this is a signed ++or unsigned operation, and @var{N} is the size of @samp{wide} in bits. ++ ++@cindex @code{smulhrs@var{m3}} instruction pattern ++@item @samp{smulhrs@var{m3}} ++@cindex @code{umulhrs@var{m3}} instruction pattern ++@itemx @samp{umulhrs@var{m3}} ++Signed/unsigned multiply high with round and scale. This is ++equivalent to the C code: ++@smallexample ++narrow op0, op1, op2; ++@dots{} ++op0 = (narrow) (((((wide) op1 * (wide) op2) >> (N / 2 - 2)) + 1) >> 1); ++@end smallexample ++where the sign of @samp{narrow} determines whether this is a signed ++or unsigned operation, and @var{N} is the size of @samp{wide} in bits. ++ ++@cindex @code{sdiv_pow2@var{m3}} instruction pattern ++@item @samp{sdiv_pow2@var{m3}} ++@cindex @code{sdiv_pow2@var{m3}} instruction pattern ++@itemx @samp{sdiv_pow2@var{m3}} ++Signed division by power-of-2 immediate. Equivalent to: ++@smallexample ++signed op0, op1; ++@dots{} ++op0 = op1 / (1 << imm); ++@end smallexample ++ + @cindex @code{vec_shl_insert_@var{m}} instruction pattern + @item @samp{vec_shl_insert_@var{m}} + Shift the elements in vector input operand 1 left one element (i.e.@: +@@ -6240,13 +6289,13 @@ This pattern is not allowed to @code{FAIL}. + @item @samp{one_cmpl@var{m}2} + Store the bitwise-complement of operand 1 into operand 0. + +-@cindex @code{movmem@var{m}} instruction pattern +-@item @samp{movmem@var{m}} +-Block move instruction. The destination and source blocks of memory ++@cindex @code{cpymem@var{m}} instruction pattern ++@item @samp{cpymem@var{m}} ++Block copy instruction. The destination and source blocks of memory + are the first two operands, and both are @code{mem:BLK}s with an + address in mode @code{Pmode}. + +-The number of bytes to move is the third operand, in mode @var{m}. ++The number of bytes to copy is the third operand, in mode @var{m}. + Usually, you specify @code{Pmode} for @var{m}. However, if you can + generate better code knowing the range of valid lengths is smaller than + those representable in a full Pmode pointer, you should provide +@@ -6266,14 +6315,16 @@ in a way that the blocks are not required to be aligned according to it in + all cases. This expected alignment is also in bytes, just like operand 4. + Expected size, when unknown, is set to @code{(const_int -1)}. + +-Descriptions of multiple @code{movmem@var{m}} patterns can only be ++Descriptions of multiple @code{cpymem@var{m}} patterns can only be + beneficial if the patterns for smaller modes have fewer restrictions + on their first, second and fourth operands. Note that the mode @var{m} +-in @code{movmem@var{m}} does not impose any restriction on the mode of +-individually moved data units in the block. ++in @code{cpymem@var{m}} does not impose any restriction on the mode of ++individually copied data units in the block. + +-These patterns need not give special consideration to the possibility +-that the source and destination strings might overlap. ++The @code{cpymem@var{m}} patterns need not give special consideration ++to the possibility that the source and destination strings might ++overlap. These patterns are used to do inline expansion of ++@code{__builtin_memcpy}. + + @cindex @code{movstr} instruction pattern + @item @samp{movstr} +@@ -6294,7 +6345,7 @@ given as a @code{mem:BLK} whose address is in mode @code{Pmode}. The + number of bytes to set is the second operand, in mode @var{m}. The value to + initialize the memory with is the third operand. Targets that only support the + clearing of memory should reject any value that is not the constant 0. See +-@samp{movmem@var{m}} for a discussion of the choice of mode. ++@samp{cpymem@var{m}} for a discussion of the choice of mode. + + The fourth operand is the known alignment of the destination, in the form + of a @code{const_int} rtx. Thus, if the compiler knows that the +@@ -6312,13 +6363,13 @@ Operand 9 is the probable maximal size (i.e.@: we cannot rely on it for + correctness, but it can be used for choosing proper code sequence for a + given size). + +-The use for multiple @code{setmem@var{m}} is as for @code{movmem@var{m}}. ++The use for multiple @code{setmem@var{m}} is as for @code{cpymem@var{m}}. + + @cindex @code{cmpstrn@var{m}} instruction pattern + @item @samp{cmpstrn@var{m}} + String compare instruction, with five operands. Operand 0 is the output; + it has mode @var{m}. The remaining four operands are like the operands +-of @samp{movmem@var{m}}. The two memory blocks specified are compared ++of @samp{cpymem@var{m}}. The two memory blocks specified are compared + byte by byte in lexicographic order starting at the beginning of each + string. The instruction is not allowed to prefetch more than one byte + at a time since either string may end in the first byte and reading past +@@ -8537,6 +8588,119 @@ functionality as two separate @code{define_insn} and @code{define_split} + patterns. It exists for compactness, and as a maintenance tool to prevent + having to ensure the two patterns' templates match. + ++@findex define_insn_and_rewrite ++It is sometimes useful to have a @code{define_insn_and_split} ++that replaces specific operands of an instruction but leaves the ++rest of the instruction pattern unchanged. You can do this directly ++with a @code{define_insn_and_split}, but it requires a ++@var{new-insn-pattern-1} that repeats most of the original @var{insn-pattern}. ++There is also the complication that an implicit @code{parallel} in ++@var{insn-pattern} must become an explicit @code{parallel} in ++@var{new-insn-pattern-1}, which is easy to overlook. ++A simpler alternative is to use @code{define_insn_and_rewrite}, which ++is a form of @code{define_insn_and_split} that automatically generates ++@var{new-insn-pattern-1} by replacing each @code{match_operand} ++in @var{insn-pattern} with a corresponding @code{match_dup}, and each ++@code{match_operator} in the pattern with a corresponding @code{match_op_dup}. ++The arguments are otherwise identical to @code{define_insn_and_split}: ++ ++@smallexample ++(define_insn_and_rewrite ++ [@var{insn-pattern}] ++ "@var{condition}" ++ "@var{output-template}" ++ "@var{split-condition}" ++ "@var{preparation-statements}" ++ [@var{insn-attributes}]) ++@end smallexample ++ ++The @code{match_dup}s and @code{match_op_dup}s in the new ++instruction pattern use any new operand values that the ++@var{preparation-statements} store in the @code{operands} array, ++as for a normal @code{define_insn_and_split}. @var{preparation-statements} ++can also emit additional instructions before the new instruction. ++They can even emit an entirely different sequence of instructions and ++use @code{DONE} to avoid emitting a new form of the original ++instruction. ++ ++The split in a @code{define_insn_and_rewrite} is only intended ++to apply to existing instructions that match @var{insn-pattern}. ++@var{split-condition} must therefore start with @code{&&}, ++so that the split condition applies on top of @var{condition}. ++ ++Here is an example from the AArch64 SVE port, in which operand 1 is ++known to be equivalent to an all-true constant and isn't used by the ++output template: ++ ++@smallexample ++(define_insn_and_rewrite "*while_ult_cc" ++ [(set (reg:CC CC_REGNUM) ++ (compare:CC ++ (unspec:SI [(match_operand:PRED_ALL 1) ++ (unspec:PRED_ALL ++ [(match_operand:GPI 2 "aarch64_reg_or_zero" "rZ") ++ (match_operand:GPI 3 "aarch64_reg_or_zero" "rZ")] ++ UNSPEC_WHILE_LO)] ++ UNSPEC_PTEST_PTRUE) ++ (const_int 0))) ++ (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") ++ (unspec:PRED_ALL [(match_dup 2) ++ (match_dup 3)] ++ UNSPEC_WHILE_LO))] ++ "TARGET_SVE" ++ "whilelo\t%0., %2, %3" ++ ;; Force the compiler to drop the unused predicate operand, so that we ++ ;; don't have an unnecessary PTRUE. ++ "&& !CONSTANT_P (operands[1])" ++ @{ ++ operands[1] = CONSTM1_RTX (mode); ++ @} ++) ++@end smallexample ++ ++The splitter in this case simply replaces operand 1 with the constant ++value that it is known to have. The equivalent @code{define_insn_and_split} ++would be: ++ ++@smallexample ++(define_insn_and_split "*while_ult_cc" ++ [(set (reg:CC CC_REGNUM) ++ (compare:CC ++ (unspec:SI [(match_operand:PRED_ALL 1) ++ (unspec:PRED_ALL ++ [(match_operand:GPI 2 "aarch64_reg_or_zero" "rZ") ++ (match_operand:GPI 3 "aarch64_reg_or_zero" "rZ")] ++ UNSPEC_WHILE_LO)] ++ UNSPEC_PTEST_PTRUE) ++ (const_int 0))) ++ (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") ++ (unspec:PRED_ALL [(match_dup 2) ++ (match_dup 3)] ++ UNSPEC_WHILE_LO))] ++ "TARGET_SVE" ++ "whilelo\t%0., %2, %3" ++ ;; Force the compiler to drop the unused predicate operand, so that we ++ ;; don't have an unnecessary PTRUE. ++ "&& !CONSTANT_P (operands[1])" ++ [(parallel ++ [(set (reg:CC CC_REGNUM) ++ (compare:CC ++ (unspec:SI [(match_dup 1) ++ (unspec:PRED_ALL [(match_dup 2) ++ (match_dup 3)] ++ UNSPEC_WHILE_LO)] ++ UNSPEC_PTEST_PTRUE) ++ (const_int 0))) ++ (set (match_dup 0) ++ (unspec:PRED_ALL [(match_dup 2) ++ (match_dup 3)] ++ UNSPEC_WHILE_LO))])] ++ @{ ++ operands[1] = CONSTM1_RTX (mode); ++ @} ++) ++@end smallexample ++ + @end ifset + @ifset INTERNALS + @node Including Patterns +@@ -10979,6 +11143,27 @@ Other attributes are defined using: + (define_code_attr @var{name} [(@var{code1} "@var{value1}") @dots{} (@var{coden} "@var{valuen}")]) + @end smallexample + ++Instruction patterns can use code attributes as rtx codes, which can be ++useful if two sets of codes act in tandem. For example, the following ++@code{define_insn} defines two patterns, one calculating a signed absolute ++difference and another calculating an unsigned absolute difference: ++ ++@smallexample ++(define_code_iterator any_max [smax umax]) ++(define_code_attr paired_min [(smax "smin") (umax "umin")]) ++(define_insn @dots{} ++ [(set (match_operand:SI 0 @dots{}) ++ (minus:SI (any_max:SI (match_operand:SI 1 @dots{}) ++ (match_operand:SI 2 @dots{})) ++ (:SI (match_dup 1) (match_dup 2))))] ++ @dots{}) ++@end smallexample ++ ++The signed version of the instruction uses @code{smax} and @code{smin} ++while the unsigned version uses @code{umax} and @code{umin}. There ++are no versions that pair @code{smax} with @code{umin} or @code{umax} ++with @code{smin}. ++ + Here's an example of code iterators in action, taken from the MIPS port: + + @smallexample +@@ -11249,4 +11434,13 @@ name and same types of iterator. For example: + would produce a single set of functions that handles both + @code{INTEGER_MODES} and @code{FLOAT_MODES}. + ++It is also possible for these @samp{@@} patterns to have different ++numbers of operands from each other. For example, patterns with ++a binary rtl code might take three operands (one output and two inputs) ++while patterns with a ternary rtl code might take four operands (one ++output and three inputs). This combination would produce separate ++@samp{maybe_gen_@var{name}} and @samp{gen_@var{name}} functions for ++each operand count, but it would still produce a single ++@samp{maybe_code_for_@var{name}} and a single @samp{code_for_@var{name}}. ++ + @end ifset +diff --git a/gcc/doc/rtl.texi b/gcc/doc/rtl.texi +index f5f2de756..3df798216 100644 +--- a/gcc/doc/rtl.texi ++++ b/gcc/doc/rtl.texi +@@ -3295,18 +3295,6 @@ There is one other known use for clobbering a pseudo register in a + clobbered by the insn. In this case, using the same pseudo register in + the clobber and elsewhere in the insn produces the expected results. + +-@findex clobber_high +-@item (clobber_high @var{x}) +-Represents the storing or possible storing of an unpredictable, +-undescribed value into the upper parts of @var{x}. The mode of the expression +-represents the lower parts of the register which will not be overwritten. +-@code{reg} must be a reg expression. +- +-One place this is used is when calling into functions where the registers are +-preserved, but only up to a given number of bits. For example when using +-Aarch64 SVE, calling a TLS descriptor will cause only the lower 128 bits of +-each of the vector registers to be preserved. +- + @findex use + @item (use @var{x}) + Represents the use of the value of @var{x}. It indicates that the +@@ -3341,7 +3329,7 @@ that the register is live. You should think twice before adding + instead. The @code{use} RTX is most commonly useful to describe that + a fixed register is implicitly used in an insn. It is also safe to use + in patterns where the compiler knows for other reasons that the result +-of the whole pattern is variable, such as @samp{movmem@var{m}} or ++of the whole pattern is variable, such as @samp{cpymem@var{m}} or + @samp{call} patterns. + + During the reload phase, an insn that has a @code{use} as pattern +@@ -3360,8 +3348,7 @@ Represents several side effects performed in parallel. The square + brackets stand for a vector; the operand of @code{parallel} is a + vector of expressions. @var{x0}, @var{x1} and so on are individual + side effect expressions---expressions of code @code{set}, @code{call}, +-@code{return}, @code{simple_return}, @code{clobber} @code{use} or +-@code{clobber_high}. ++@code{return}, @code{simple_return}, @code{clobber} or @code{use}. + + ``In parallel'' means that first all the values used in the individual + side-effects are computed, and second all the actual side-effects are +diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi +index 546af7f72..62245c2b3 100644 +--- a/gcc/doc/sourcebuild.texi ++++ b/gcc/doc/sourcebuild.texi +@@ -1439,6 +1439,14 @@ vector alignment. + Target supports both signed and unsigned averaging operations on vectors + of bytes. + ++@item vect_mulhrs_hi ++Target supports both signed and unsigned multiply-high-with-round-and-scale ++operations on vectors of half-words. ++ ++@item vect_sdiv_pow2_si ++Target supports signed division by constant power-of-2 operations ++on vectors of 4-byte integers. ++ + @item vect_condition + Target supports vector conditional operations. + +@@ -1854,6 +1862,16 @@ ARM target supports extensions to generate the @code{VFMAL} and @code{VFMLS} + half-precision floating-point instructions available from ARMv8.2-A and + onwards. Some multilibs may be incompatible with these options. + ++@item arm_v8_2a_bf16_neon_ok ++ARM target supports options to generate instructions from ARMv8.2-A with ++the BFloat16 extension (bf16). Some multilibs may be incompatible with these ++options. ++ ++@item arm_v8_2a_i8mm_ok ++ARM target supports options to generate instructions from ARMv8.2-A with ++the 8-Bit Integer Matrix Multiply extension (i8mm). Some multilibs may be ++incompatible with these options. ++ + @item arm_prefer_ldrd_strd + ARM target prefers @code{LDRD} and @code{STRD} instructions over + @code{LDM} and @code{STM} instructions. +@@ -2663,6 +2681,91 @@ assembly output. + @item scan-not-hidden @var{symbol} [@{ target/xfail @var{selector} @}] + Passes if @var{symbol} is not defined as a hidden symbol in the test's + assembly output. ++ ++@item check-function-bodies @var{prefix} @var{terminator} [@var{option} [@{ target/xfail @var{selector} @}]] ++Looks through the source file for comments that give the expected assembly ++output for selected functions. Each line of expected output starts with the ++prefix string @var{prefix} and the expected output for a function as a whole ++is followed by a line that starts with the string @var{terminator}. ++Specifying an empty terminator is equivalent to specifying @samp{"*/"}. ++ ++If @var{option} is specified, the test only applies to command lines ++that contain @var{option}. This can be useful if a source file is compiled ++both with and without optimization, since it is rarely useful to check the ++assembly output for unoptimized code. ++ ++The first line of the expected output for a function @var{fn} has the form: ++ ++@smallexample ++@var{prefix} @var{fn}: [@{ target/xfail @var{selector} @}] ++@end smallexample ++ ++Subsequent lines of the expected output also start with @var{prefix}. ++In both cases, whitespace after @var{prefix} is not significant. ++ ++The test discards assembly directives such as @code{.cfi_startproc} ++and local label definitions such as @code{.LFB0} from the compiler's ++assembly output. It then matches the result against the expected ++output for a function as a single regular expression. This means that ++later lines can use backslashes to refer back to @samp{(@dots{})} ++captures on earlier lines. For example: ++ ++@smallexample ++/* @{ dg-final @{ check-function-bodies "**" "" "-DCHECK_ASM" @} @} */ ++@dots{} ++/* ++** add_w0_s8_m: ++** mov (z[0-9]+\.b), w0 ++** add z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++svint8_t add_w0_s8_m (@dots{}) @{ @dots{} @} ++@dots{} ++/* ++** add_b0_s8_m: ++** mov (z[0-9]+\.b), b0 ++** add z1\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++svint8_t add_b0_s8_m (@dots{}) @{ @dots{} @} ++@end smallexample ++ ++checks whether the implementations of @code{add_w0_s8_m} and ++@code{add_b0_s8_m} match the regular expressions given. The test only ++runs when @samp{-DCHECK_ASM} is passed on the command line. ++ ++It is possible to create non-capturing multi-line regular expression ++groups of the form @samp{(@var{a}|@var{b}|@dots{})} by putting the ++@samp{(}, @samp{|} and @samp{)} on separate lines (each still using ++@var{prefix}). For example: ++ ++@smallexample ++/* ++** cmple_f16_tied: ++** ( ++** fcmge p0\.h, p0/z, z1\.h, z0\.h ++** | ++** fcmle p0\.h, p0/z, z0\.h, z1\.h ++** ) ++** ret ++*/ ++svbool_t cmple_f16_tied (@dots{}) @{ @dots{} @} ++@end smallexample ++ ++checks whether @code{cmple_f16_tied} is implemented by the ++@code{fcmge} instruction followed by @code{ret} or by the ++@code{fcmle} instruction followed by @code{ret}. The test is ++still a single regular rexpression. ++ ++A line containing just: ++ ++@smallexample ++@var{prefix} ... ++@end smallexample ++ ++stands for zero or more unmatched lines; the whitespace after ++@var{prefix} is again not significant. ++ + @end table + + @subsubsection Scan optimization dump files +diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi +index 73db70867..3f22bb1f6 100644 +--- a/gcc/doc/tm.texi ++++ b/gcc/doc/tm.texi +@@ -1878,6 +1878,9 @@ function calls. + If a register has 0 in @code{CALL_USED_REGISTERS}, the compiler + automatically saves it on function entry and restores it on function + exit, if the register is used within the function. ++ ++Exactly one of @code{CALL_USED_REGISTERS} and @code{CALL_REALLY_USED_REGISTERS} ++must be defined. Modern ports should define @code{CALL_REALLY_USED_REGISTERS}. + @end defmac + + @defmac CALL_REALLY_USED_REGISTERS +@@ -1887,48 +1890,55 @@ exit, if the register is used within the function. + Like @code{CALL_USED_REGISTERS} except this macro doesn't require + that the entire set of @code{FIXED_REGISTERS} be included. + (@code{CALL_USED_REGISTERS} must be a superset of @code{FIXED_REGISTERS}). +-This macro is optional. If not specified, it defaults to the value +-of @code{CALL_USED_REGISTERS}. ++ ++Exactly one of @code{CALL_USED_REGISTERS} and @code{CALL_REALLY_USED_REGISTERS} ++must be defined. Modern ports should define @code{CALL_REALLY_USED_REGISTERS}. + @end defmac + + @cindex call-used register + @cindex call-clobbered register + @cindex call-saved register +-@deftypefn {Target Hook} bool TARGET_HARD_REGNO_CALL_PART_CLOBBERED (rtx_insn *@var{insn}, unsigned int @var{regno}, machine_mode @var{mode}) +-This hook should return true if @var{regno} is partly call-saved and +-partly call-clobbered, and if a value of mode @var{mode} would be partly +-clobbered by call instruction @var{insn}. If @var{insn} is NULL then it +-should return true if any call could partly clobber the register. +-For example, if the low 32 bits of @var{regno} are preserved across a call +-but higher bits are clobbered, this hook should return true for a 64-bit +-mode but false for a 32-bit mode. +- +-The default implementation returns false, which is correct +-for targets that don't have partly call-clobbered registers. ++@deftypefn {Target Hook} {const predefined_function_abi &} TARGET_FNTYPE_ABI (const_tree @var{type}) ++Return the ABI used by a function with type @var{type}; see the ++definition of @code{predefined_function_abi} for details of the ABI ++descriptor. Targets only need to define this hook if they support ++interoperability between several ABIs in the same translation unit. + @end deftypefn + +-@deftypefn {Target Hook} void TARGET_REMOVE_EXTRA_CALL_PRESERVED_REGS (rtx_insn *@var{insn}, HARD_REG_SET *@var{used_regs}) +-This hook removes registers from the set of call-clobbered registers +- in @var{used_regs} if, contrary to the default rules, something guarantees +- that @samp{insn} preserves those registers. For example, some targets +- support variant ABIs in which functions preserve more registers than +- normal functions would. Removing those extra registers from @var{used_regs} +- can lead to better register allocation. +- +- The default implementation does nothing, which is always safe. +- Defining the hook is purely an optimization. ++@deftypefn {Target Hook} {const predefined_function_abi &} TARGET_INSN_CALLEE_ABI (const rtx_insn *@var{insn}) ++This hook returns a description of the ABI used by the target of ++call instruction @var{insn}; see the definition of ++@code{predefined_function_abi} for details of the ABI descriptor. ++Only the global function @code{insn_callee_abi} should call this hook ++directly. ++ ++Targets only need to define this hook if they support ++interoperability between several ABIs in the same translation unit. + @end deftypefn + +-@deftypefn {Target Hook} {rtx_insn *} TARGET_RETURN_CALL_WITH_MAX_CLOBBERS (rtx_insn *@var{call_1}, rtx_insn *@var{call_2}) +-This hook returns a pointer to the call that partially clobbers the +-most registers. If a platform supports multiple ABIs where the registers +-that are partially clobbered may vary, this function compares two +-calls and returns a pointer to the one that clobbers the most registers. +-If both calls clobber the same registers, @var{call_1} must be returned. ++@cindex call-used register ++@cindex call-clobbered register ++@cindex call-saved register ++@deftypefn {Target Hook} bool TARGET_HARD_REGNO_CALL_PART_CLOBBERED (unsigned int @var{abi_id}, unsigned int @var{regno}, machine_mode @var{mode}) ++ABIs usually specify that calls must preserve the full contents ++of a particular register, or that calls can alter any part of a ++particular register. This information is captured by the target macro ++@code{CALL_REALLY_USED_REGISTERS}. However, some ABIs specify that calls ++must preserve certain bits of a particular register but can alter others. ++This hook should return true if this applies to at least one of the ++registers in @samp{(reg:@var{mode} @var{regno})}, and if as a result the ++call would alter part of the @var{mode} value. For example, if a call ++preserves the low 32 bits of a 64-bit hard register @var{regno} but can ++clobber the upper 32 bits, this hook should return true for a 64-bit mode ++but false for a 32-bit mode. ++ ++The value of @var{abi_id} comes from the @code{predefined_function_abi} ++structure that describes the ABI of the call; see the definition of the ++structure for more details. If (as is usual) the target uses the same ABI ++for all functions in a translation unit, @var{abi_id} is always 0. + +-The registers clobbered in different ABIs must be a proper subset or +-superset of all other ABIs. @var{call_1} must always be a call insn, +-call_2 may be NULL or a call insn. ++The default implementation returns false, which is correct ++for targets that don't have partly call-clobbered registers. + @end deftypefn + + @deftypefn {Target Hook} {const char *} TARGET_GET_MULTILIB_ABI_NAME (void) +@@ -3961,18 +3971,10 @@ This section describes the macros which let you control how various + types of arguments are passed in registers or how they are arranged in + the stack. + +-@deftypefn {Target Hook} rtx TARGET_FUNCTION_ARG (cumulative_args_t @var{ca}, machine_mode @var{mode}, const_tree @var{type}, bool @var{named}) +-Return an RTX indicating whether a function argument is passed in a +-register and if so, which register. +- +-The arguments are @var{ca}, which summarizes all the previous +-arguments; @var{mode}, the machine mode of the argument; @var{type}, +-the data type of the argument as a tree node or 0 if that is not known +-(which happens for C support library functions); and @var{named}, +-which is @code{true} for an ordinary argument and @code{false} for +-nameless arguments that correspond to @samp{@dots{}} in the called +-function's prototype. @var{type} can be an incomplete type if a +-syntax error has previously occurred. ++@deftypefn {Target Hook} rtx TARGET_FUNCTION_ARG (cumulative_args_t @var{ca}, const function_arg_info @var{&arg}) ++Return an RTX indicating whether function argument @var{arg} is passed ++in a register and if so, which register. Argument @var{ca} summarizes all ++the previous arguments. + + The return value is usually either a @code{reg} RTX for the hard + register in which to pass the argument, or zero to pass the argument +@@ -4020,14 +4022,14 @@ defined, the argument will be computed in the stack and then loaded into + a register. + @end deftypefn + +-@deftypefn {Target Hook} bool TARGET_MUST_PASS_IN_STACK (machine_mode @var{mode}, const_tree @var{type}) +-This target hook should return @code{true} if we should not pass @var{type} ++@deftypefn {Target Hook} bool TARGET_MUST_PASS_IN_STACK (const function_arg_info @var{&arg}) ++This target hook should return @code{true} if we should not pass @var{arg} + solely in registers. The file @file{expr.h} defines a + definition that is usually appropriate, refer to @file{expr.h} for additional + documentation. + @end deftypefn + +-@deftypefn {Target Hook} rtx TARGET_FUNCTION_INCOMING_ARG (cumulative_args_t @var{ca}, machine_mode @var{mode}, const_tree @var{type}, bool @var{named}) ++@deftypefn {Target Hook} rtx TARGET_FUNCTION_INCOMING_ARG (cumulative_args_t @var{ca}, const function_arg_info @var{&arg}) + Define this hook if the caller and callee on the target have different + views of where arguments are passed. Also define this hook if there are + functions that are never directly called, but are invoked by the hardware +@@ -4057,7 +4059,7 @@ Perform a target dependent initialization of pic_offset_table_rtx. + This hook is called at the start of register allocation. + @end deftypefn + +-@deftypefn {Target Hook} int TARGET_ARG_PARTIAL_BYTES (cumulative_args_t @var{cum}, machine_mode @var{mode}, tree @var{type}, bool @var{named}) ++@deftypefn {Target Hook} int TARGET_ARG_PARTIAL_BYTES (cumulative_args_t @var{cum}, const function_arg_info @var{&arg}) + This target hook returns the number of bytes at the beginning of an + argument that must be put in registers. The value must be zero for + arguments that are passed entirely in registers or that are entirely +@@ -4076,11 +4078,11 @@ register to be used by the caller for this argument; likewise + @code{TARGET_FUNCTION_INCOMING_ARG}, for the called function. + @end deftypefn + +-@deftypefn {Target Hook} bool TARGET_PASS_BY_REFERENCE (cumulative_args_t @var{cum}, machine_mode @var{mode}, const_tree @var{type}, bool @var{named}) +-This target hook should return @code{true} if an argument at the ++@deftypefn {Target Hook} bool TARGET_PASS_BY_REFERENCE (cumulative_args_t @var{cum}, const function_arg_info @var{&arg}) ++This target hook should return @code{true} if argument @var{arg} at the + position indicated by @var{cum} should be passed by reference. This + predicate is queried after target independent reasons for being +-passed by reference, such as @code{TREE_ADDRESSABLE (type)}. ++passed by reference, such as @code{TREE_ADDRESSABLE (@var{arg}.type)}. + + If the hook returns true, a copy of that argument is made in memory and a + pointer to the argument is passed instead of the argument itself. +@@ -4088,7 +4090,7 @@ The pointer is passed in whatever way is appropriate for passing a pointer + to that type. + @end deftypefn + +-@deftypefn {Target Hook} bool TARGET_CALLEE_COPIES (cumulative_args_t @var{cum}, machine_mode @var{mode}, const_tree @var{type}, bool @var{named}) ++@deftypefn {Target Hook} bool TARGET_CALLEE_COPIES (cumulative_args_t @var{cum}, const function_arg_info @var{&arg}) + The function argument described by the parameters to this hook is + known to be passed by reference. The hook should return true if the + function argument should be copied by the callee instead of copied +@@ -4167,10 +4169,9 @@ argument @var{libname} exists for symmetry with + @c --mew 5feb93 i switched the order of the sentences. --mew 10feb93 + @end defmac + +-@deftypefn {Target Hook} void TARGET_FUNCTION_ARG_ADVANCE (cumulative_args_t @var{ca}, machine_mode @var{mode}, const_tree @var{type}, bool @var{named}) ++@deftypefn {Target Hook} void TARGET_FUNCTION_ARG_ADVANCE (cumulative_args_t @var{ca}, const function_arg_info @var{&arg}) + This hook updates the summarizer variable pointed to by @var{ca} to +-advance past an argument in the argument list. The values @var{mode}, +-@var{type} and @var{named} describe that argument. Once this is done, ++advance past argument @var{arg} in the argument list. Once this is done, + the variable @var{cum} is suitable for analyzing the @emph{following} + argument with @code{TARGET_FUNCTION_ARG}, etc. + +@@ -4331,6 +4332,27 @@ insns involving vector mode @var{mode}. At the very least, it + must have move patterns for this mode. + @end deftypefn + ++@deftypefn {Target Hook} bool TARGET_COMPATIBLE_VECTOR_TYPES_P (const_tree @var{type1}, const_tree @var{type2}) ++Return true if there is no target-specific reason for treating ++vector types @var{type1} and @var{type2} as distinct types. The caller ++has already checked for target-independent reasons, meaning that the ++types are known to have the same mode, to have the same number of elements, ++and to have what the caller considers to be compatible element types. ++ ++The main reason for defining this hook is to reject pairs of types ++that are handled differently by the target's calling convention. ++For example, when a new @var{N}-bit vector architecture is added ++to a target, the target may want to handle normal @var{N}-bit ++@code{VECTOR_TYPE} arguments and return values in the same way as ++before, to maintain backwards compatibility. However, it may also ++provide new, architecture-specific @code{VECTOR_TYPE}s that are passed ++and returned in a more efficient way. It is then important to maintain ++a distinction between the ``normal'' @code{VECTOR_TYPE}s and the new ++architecture-specific ones. ++ ++The default implementation returns true, which is correct for most targets. ++@end deftypefn ++ + @deftypefn {Target Hook} opt_machine_mode TARGET_ARRAY_MODE (machine_mode @var{mode}, unsigned HOST_WIDE_INT @var{nelems}) + Return the mode that GCC should use for an array that has + @var{nelems} elements, with each element having mode @var{mode}. +@@ -5202,7 +5224,7 @@ return value of this function should be an RTX that contains the value + to use as the return of @code{__builtin_saveregs}. + @end deftypefn + +-@deftypefn {Target Hook} void TARGET_SETUP_INCOMING_VARARGS (cumulative_args_t @var{args_so_far}, machine_mode @var{mode}, tree @var{type}, int *@var{pretend_args_size}, int @var{second_time}) ++@deftypefn {Target Hook} void TARGET_SETUP_INCOMING_VARARGS (cumulative_args_t @var{args_so_far}, const function_arg_info @var{&arg}, int *@var{pretend_args_size}, int @var{second_time}) + This target hook offers an alternative to using + @code{__builtin_saveregs} and defining the hook + @code{TARGET_EXPAND_BUILTIN_SAVEREGS}. Use it to store the anonymous +@@ -5213,8 +5235,8 @@ pass all their arguments on the stack. + + The argument @var{args_so_far} points to the @code{CUMULATIVE_ARGS} data + structure, containing the values that are obtained after processing the +-named arguments. The arguments @var{mode} and @var{type} describe the +-last named argument---its machine mode and its data type as a tree node. ++named arguments. The argument @var{arg} describes the last of these named ++arguments. + + The target hook should do two things: first, push onto the stack all the + argument registers @emph{not} used for the named arguments, and second, +@@ -5314,12 +5336,6 @@ This hook is used by expand pass to emit insn to store @var{bounds} + returned by function call into @var{slot}. + @end deftypefn + +-@deftypefn {Target Hook} void TARGET_SETUP_INCOMING_VARARG_BOUNDS (cumulative_args_t @var{args_so_far}, machine_mode @var{mode}, tree @var{type}, int *@var{pretend_args_size}, int @var{second_time}) +-Use it to store bounds for anonymous register arguments stored +-into the stack. Arguments meaning is similar to +-@code{TARGET_SETUP_INCOMING_VARARGS}. +-@end deftypefn +- + @node Trampolines + @section Support for Nested Functions + @cindex support for nested functions +@@ -5967,18 +5983,6 @@ instruction pattern. There is no need for the hook to handle these two + implementation approaches itself. + @end deftypefn + +-@deftypefn {Target Hook} tree TARGET_VECTORIZE_BUILTIN_CONVERSION (unsigned @var{code}, tree @var{dest_type}, tree @var{src_type}) +-This hook should return the DECL of a function that implements conversion of the +-input vector of type @var{src_type} to type @var{dest_type}. +-The value of @var{code} is one of the enumerators in @code{enum tree_code} and +-specifies how the conversion is to be applied +-(truncation, rounding, etc.). +- +-If this hook is defined, the autovectorizer will use the +-@code{TARGET_VECTORIZE_BUILTIN_CONVERSION} target hook when vectorizing +-conversion. Otherwise, it will return @code{NULL_TREE}. +-@end deftypefn +- + @deftypefn {Target Hook} tree TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION (unsigned @var{code}, tree @var{vec_type_out}, tree @var{vec_type_in}) + This hook should return the decl of a function that implements the + vectorized variant of the function with the @code{combined_fn} code +@@ -6698,7 +6702,7 @@ two areas of memory, or to set, clear or store to memory, for example + when copying a @code{struct}. The @code{by_pieces} infrastructure + implements such memory operations as a sequence of load, store or move + insns. Alternate strategies are to expand the +-@code{movmem} or @code{setmem} optabs, to emit a library call, or to emit ++@code{cpymem} or @code{setmem} optabs, to emit a library call, or to emit + unit-by-unit, loop-based operations. + + This target hook should return true if, for a memory operation with a +@@ -6717,7 +6721,7 @@ optimized for speed rather than size. + + Returning true for higher values of @var{size} can improve code generation + for speed if the target does not provide an implementation of the +-@code{movmem} or @code{setmem} standard names, if the @code{movmem} or ++@code{cpymem} or @code{setmem} standard names, if the @code{cpymem} or + @code{setmem} implementation would be more expensive than a sequence of + insns, or if the overhead of a library call would dominate that of + the body of the memory operation. +@@ -11607,6 +11611,21 @@ another @code{CALL_EXPR}. + @var{arglist} really has type @samp{VEC(tree,gc)*} + @end deftypefn + ++@deftypefn {Target Hook} bool TARGET_CHECK_BUILTIN_CALL (location_t @var{loc}, vec @var{arg_loc}, tree @var{fndecl}, tree @var{orig_fndecl}, unsigned int @var{nargs}, tree *@var{args}) ++Perform semantic checking on a call to a machine-specific built-in ++function after its arguments have been constrained to the function ++signature. Return true if the call is valid, otherwise report an error ++and return false. ++ ++This hook is called after @code{TARGET_RESOLVE_OVERLOADED_BUILTIN}. ++The call was originally to built-in function @var{orig_fndecl}, ++but after the optional @code{TARGET_RESOLVE_OVERLOADED_BUILTIN} ++step is now to built-in function @var{fndecl}. @var{loc} is the ++location of the call and @var{args} is an array of function arguments, ++of which there are @var{nargs}. @var{arg_loc} specifies the location ++of each argument. ++@end deftypefn ++ + @deftypefn {Target Hook} tree TARGET_FOLD_BUILTIN (tree @var{fndecl}, int @var{n_args}, tree *@var{argp}, bool @var{ignore}) + Fold a call to a machine specific built-in function that was set up by + @samp{TARGET_INIT_BUILTINS}. @var{fndecl} is the declaration of the +@@ -11791,28 +11810,6 @@ cannot_modify_jumps_past_reload_p () + @end smallexample + @end deftypefn + +-@deftypefn {Target Hook} reg_class_t TARGET_BRANCH_TARGET_REGISTER_CLASS (void) +-This target hook returns a register class for which branch target register +-optimizations should be applied. All registers in this class should be +-usable interchangeably. After reload, registers in this class will be +-re-allocated and loads will be hoisted out of loops and be subjected +-to inter-block scheduling. +-@end deftypefn +- +-@deftypefn {Target Hook} bool TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED (bool @var{after_prologue_epilogue_gen}) +-Branch target register optimization will by default exclude callee-saved +-registers +-that are not already live during the current function; if this target hook +-returns true, they will be included. The target code must than make sure +-that all target registers in the class returned by +-@samp{TARGET_BRANCH_TARGET_REGISTER_CLASS} that might need saving are +-saved. @var{after_prologue_epilogue_gen} indicates if prologues and +-epilogues have already been generated. Note, even if you only return +-true when @var{after_prologue_epilogue_gen} is false, you still are likely +-to have to make special provisions in @code{INITIAL_ELIMINATION_OFFSET} +-to reserve space for caller-saved target registers. +-@end deftypefn +- + @deftypefn {Target Hook} bool TARGET_HAVE_CONDITIONAL_EXECUTION (void) + This target hook returns true if the target supports conditional execution. + This target hook is required only when the target has several different +diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in +index bc362dca0..89cfb5253 100644 +--- a/gcc/doc/tm.texi.in ++++ b/gcc/doc/tm.texi.in +@@ -1689,6 +1689,9 @@ function calls. + If a register has 0 in @code{CALL_USED_REGISTERS}, the compiler + automatically saves it on function entry and restores it on function + exit, if the register is used within the function. ++ ++Exactly one of @code{CALL_USED_REGISTERS} and @code{CALL_REALLY_USED_REGISTERS} ++must be defined. Modern ports should define @code{CALL_REALLY_USED_REGISTERS}. + @end defmac + + @defmac CALL_REALLY_USED_REGISTERS +@@ -1698,18 +1701,22 @@ exit, if the register is used within the function. + Like @code{CALL_USED_REGISTERS} except this macro doesn't require + that the entire set of @code{FIXED_REGISTERS} be included. + (@code{CALL_USED_REGISTERS} must be a superset of @code{FIXED_REGISTERS}). +-This macro is optional. If not specified, it defaults to the value +-of @code{CALL_USED_REGISTERS}. ++ ++Exactly one of @code{CALL_USED_REGISTERS} and @code{CALL_REALLY_USED_REGISTERS} ++must be defined. Modern ports should define @code{CALL_REALLY_USED_REGISTERS}. + @end defmac + + @cindex call-used register + @cindex call-clobbered register + @cindex call-saved register +-@hook TARGET_HARD_REGNO_CALL_PART_CLOBBERED ++@hook TARGET_FNTYPE_ABI + +-@hook TARGET_REMOVE_EXTRA_CALL_PRESERVED_REGS ++@hook TARGET_INSN_CALLEE_ABI + +-@hook TARGET_RETURN_CALL_WITH_MAX_CLOBBERS ++@cindex call-used register ++@cindex call-clobbered register ++@cindex call-saved register ++@hook TARGET_HARD_REGNO_CALL_PART_CLOBBERED + + @hook TARGET_GET_MULTILIB_ABI_NAME + +@@ -3362,6 +3369,8 @@ stack. + + @hook TARGET_VECTOR_MODE_SUPPORTED_P + ++@hook TARGET_COMPATIBLE_VECTOR_TYPES_P ++ + @hook TARGET_ARRAY_MODE + + @hook TARGET_ARRAY_MODE_SUPPORTED_P +@@ -3785,8 +3794,6 @@ These machine description macros help implement varargs: + + @hook TARGET_STORE_RETURNED_BOUNDS + +-@hook TARGET_SETUP_INCOMING_VARARG_BOUNDS +- + @node Trampolines + @section Support for Nested Functions + @cindex support for nested functions +@@ -4160,8 +4167,6 @@ address; but often a machine-dependent strategy can generate better code. + + @hook TARGET_VECTORIZE_VEC_PERM_CONST + +-@hook TARGET_VECTORIZE_BUILTIN_CONVERSION +- + @hook TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION + + @hook TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION +@@ -7934,6 +7939,8 @@ to by @var{ce_info}. + + @hook TARGET_RESOLVE_OVERLOADED_BUILTIN + ++@hook TARGET_CHECK_BUILTIN_CALL ++ + @hook TARGET_FOLD_BUILTIN + + @hook TARGET_GIMPLE_FOLD_BUILTIN +@@ -7999,10 +8006,6 @@ build_type_attribute_variant (@var{mdecl}, + + @hook TARGET_CANNOT_MODIFY_JUMPS_P + +-@hook TARGET_BRANCH_TARGET_REGISTER_CLASS +- +-@hook TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED +- + @hook TARGET_HAVE_CONDITIONAL_EXECUTION + + @hook TARGET_GEN_CCMP_FIRST +diff --git a/gcc/dse.c b/gcc/dse.c +index 4becdcf1c..874ff507c 100644 +--- a/gcc/dse.c ++++ b/gcc/dse.c +@@ -50,6 +50,7 @@ along with GCC; see the file COPYING3. If not see + #include "params.h" + #include "rtl-iter.h" + #include "cfgcleanup.h" ++#include "calls.h" + + /* This file contains three techniques for performing Dead Store + Elimination (dse). +@@ -819,7 +820,7 @@ emit_inc_dec_insn_before (rtx mem ATTRIBUTE_UNUSED, + for (cur = new_insn; cur; cur = NEXT_INSN (cur)) + { + info.current = cur; +- note_stores (PATTERN (cur), note_add_store, &info); ++ note_stores (cur, note_add_store, &info); + } + + /* If a failure was flagged above, return 1 so that for_each_inc_dec will +@@ -1976,7 +1977,7 @@ replace_read (store_info *store_info, insn_info_t store_insn, + bitmap regs_set = BITMAP_ALLOC (®_obstack); + + for (this_insn = insns; this_insn != NULL_RTX; this_insn = NEXT_INSN (this_insn)) +- note_stores (PATTERN (this_insn), look_for_hardregs, regs_set); ++ note_stores (this_insn, look_for_hardregs, regs_set); + + bitmap_and_into (regs_set, regs_live); + if (!bitmap_empty_p (regs_set)) +@@ -2341,7 +2342,8 @@ get_call_args (rtx call_insn, tree fn, rtx *args, int nargs) + if (!is_int_mode (TYPE_MODE (TREE_VALUE (arg)), &mode)) + return false; + +- reg = targetm.calls.function_arg (args_so_far, mode, NULL_TREE, true); ++ function_arg_info arg (mode, /*named=*/true); ++ reg = targetm.calls.function_arg (args_so_far, arg); + if (!reg || !REG_P (reg) || GET_MODE (reg) != mode) + return false; + +@@ -2373,7 +2375,7 @@ get_call_args (rtx call_insn, tree fn, rtx *args, int nargs) + if (tmp) + args[idx] = tmp; + +- targetm.calls.function_arg_advance (args_so_far, mode, NULL_TREE, true); ++ targetm.calls.function_arg_advance (args_so_far, arg); + } + if (arg != void_list_node || idx != nargs) + return false; +@@ -2388,7 +2390,7 @@ copy_fixed_regs (const_bitmap in) + bitmap ret; + + ret = ALLOC_REG_SET (NULL); +- bitmap_and (ret, in, fixed_reg_set_regset); ++ bitmap_and (ret, in, bitmap_view (fixed_reg_set)); + return ret; + } + +diff --git a/gcc/dwarf2out.c b/gcc/dwarf2out.c +index 30c4c7007..a219d7fc3 100644 +--- a/gcc/dwarf2out.c ++++ b/gcc/dwarf2out.c +@@ -16428,7 +16428,6 @@ mem_loc_descriptor (rtx rtl, machine_mode mode, + case CONST_FIXED: + case CLRSB: + case CLOBBER: +- case CLOBBER_HIGH: + break; + + case CONST_STRING: +@@ -18566,6 +18565,24 @@ loc_list_from_tree_1 (tree loc, int want_address, + } + break; + ++ case POLY_INT_CST: ++ { ++ if (want_address) ++ { ++ expansion_failed (loc, NULL_RTX, ++ "constant address with a runtime component"); ++ return 0; ++ } ++ poly_int64 value; ++ if (!poly_int_tree_p (loc, &value)) ++ { ++ expansion_failed (loc, NULL_RTX, "constant too big"); ++ return 0; ++ } ++ ret = int_loc_descriptor (value); ++ } ++ break; ++ + case CONSTRUCTOR: + case REAL_CST: + case STRING_CST: +@@ -19682,6 +19699,7 @@ add_const_value_attribute (dw_die_ref die, rtx rtl) + case MINUS: + case SIGN_EXTEND: + case ZERO_EXTEND: ++ case CONST_POLY_INT: + return false; + + case MEM: +diff --git a/gcc/emit-rtl.c b/gcc/emit-rtl.c +index 78104603c..d6636ccb0 100644 +--- a/gcc/emit-rtl.c ++++ b/gcc/emit-rtl.c +@@ -2865,7 +2865,6 @@ verify_rtx_sharing (rtx orig, rtx insn) + /* SCRATCH must be shared because they represent distinct values. */ + return; + case CLOBBER: +- case CLOBBER_HIGH: + /* Share clobbers of hard registers (like cc0), but do not share pseudo reg + clobbers or clobbers of hard registers that originated as pseudos. + This is needed to allow safe register renaming. */ +@@ -3119,7 +3118,6 @@ repeat: + /* SCRATCH must be shared because they represent distinct values. */ + return; + case CLOBBER: +- case CLOBBER_HIGH: + /* Share clobbers of hard registers (like cc0), but do not share pseudo reg + clobbers or clobbers of hard registers that originated as pseudos. + This is needed to allow safe register renaming. */ +@@ -5693,7 +5691,6 @@ copy_insn_1 (rtx orig) + case SIMPLE_RETURN: + return orig; + case CLOBBER: +- case CLOBBER_HIGH: + /* Share clobbers of hard registers (like cc0), but do not share pseudo reg + clobbers or clobbers of hard registers that originated as pseudos. + This is needed to allow safe register renaming. */ +@@ -6505,21 +6502,6 @@ gen_hard_reg_clobber (machine_mode mode, unsigned int regno) + gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (mode, regno))); + } + +-static GTY((deletable)) rtx +-hard_reg_clobbers_high[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER]; +- +-/* Return a CLOBBER_HIGH expression for register REGNO that clobbers MODE, +- caching into HARD_REG_CLOBBERS_HIGH. */ +-rtx +-gen_hard_reg_clobber_high (machine_mode mode, unsigned int regno) +-{ +- if (hard_reg_clobbers_high[mode][regno]) +- return hard_reg_clobbers_high[mode][regno]; +- else +- return (hard_reg_clobbers_high[mode][regno] +- = gen_rtx_CLOBBER_HIGH (VOIDmode, gen_rtx_REG (mode, regno))); +-} +- + location_t prologue_location; + location_t epilogue_location; + +diff --git a/gcc/emit-rtl.h b/gcc/emit-rtl.h +index 7b1cecd3c..573140e84 100644 +--- a/gcc/emit-rtl.h ++++ b/gcc/emit-rtl.h +@@ -22,6 +22,7 @@ along with GCC; see the file COPYING3. If not see + + struct temp_slot; + typedef struct temp_slot *temp_slot_p; ++struct predefined_function_abi; + + /* Information mainlined about RTL representation of incoming arguments. */ + struct GTY(()) incoming_args { +@@ -64,6 +65,14 @@ struct GTY(()) rtl_data { + struct function_subsections subsections; + struct rtl_eh eh; + ++ /* The ABI of the function, i.e. the interface it presents to its callers. ++ This is the ABI that should be queried to see which registers the ++ function needs to save before it uses them. ++ ++ Other functions (including those called by this function) might use ++ different ABIs. */ ++ const predefined_function_abi *GTY((skip)) abi; ++ + /* For function.c */ + + /* # of bytes of outgoing arguments. If ACCUMULATE_OUTGOING_ARGS is +diff --git a/gcc/expr.c b/gcc/expr.c +index 650be8dad..b77f0409e 100644 +--- a/gcc/expr.c ++++ b/gcc/expr.c +@@ -73,7 +73,7 @@ along with GCC; see the file COPYING3. If not see + int cse_not_expected; + + static bool block_move_libcall_safe_for_call_parm (void); +-static bool emit_block_move_via_movmem (rtx, rtx, rtx, unsigned, unsigned, HOST_WIDE_INT, ++static bool emit_block_move_via_cpymem (rtx, rtx, rtx, unsigned, unsigned, HOST_WIDE_INT, + unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT, + unsigned HOST_WIDE_INT); + static void emit_block_move_via_loop (rtx, rtx, rtx, unsigned); +@@ -1645,7 +1645,7 @@ emit_block_move_hints (rtx x, rtx y, rtx size, enum block_op_methods method, + + if (CONST_INT_P (size) && can_move_by_pieces (INTVAL (size), align)) + move_by_pieces (x, y, INTVAL (size), align, RETURN_BEGIN); +- else if (emit_block_move_via_movmem (x, y, size, align, ++ else if (emit_block_move_via_cpymem (x, y, size, align, + expected_align, expected_size, + min_size, max_size, probable_max_size)) + ; +@@ -1723,31 +1723,28 @@ block_move_libcall_safe_for_call_parm (void) + for ( ; arg != void_list_node ; arg = TREE_CHAIN (arg)) + { + machine_mode mode = TYPE_MODE (TREE_VALUE (arg)); +- rtx tmp = targetm.calls.function_arg (args_so_far, mode, +- NULL_TREE, true); ++ function_arg_info arg_info (mode, /*named=*/true); ++ rtx tmp = targetm.calls.function_arg (args_so_far, arg_info); + if (!tmp || !REG_P (tmp)) + return false; +- if (targetm.calls.arg_partial_bytes (args_so_far, mode, NULL, 1)) ++ if (targetm.calls.arg_partial_bytes (args_so_far, arg_info)) + return false; +- targetm.calls.function_arg_advance (args_so_far, mode, +- NULL_TREE, true); ++ targetm.calls.function_arg_advance (args_so_far, arg_info); + } + } + return true; + } + +-/* A subroutine of emit_block_move. Expand a movmem pattern; ++/* A subroutine of emit_block_move. Expand a cpymem pattern; + return true if successful. */ + + static bool +-emit_block_move_via_movmem (rtx x, rtx y, rtx size, unsigned int align, ++emit_block_move_via_cpymem (rtx x, rtx y, rtx size, unsigned int align, + unsigned int expected_align, HOST_WIDE_INT expected_size, + unsigned HOST_WIDE_INT min_size, + unsigned HOST_WIDE_INT max_size, + unsigned HOST_WIDE_INT probable_max_size) + { +- int save_volatile_ok = volatile_ok; +- + if (expected_align < align) + expected_align = align; + if (expected_size != -1) +@@ -1759,7 +1756,7 @@ emit_block_move_via_movmem (rtx x, rtx y, rtx size, unsigned int align, + } + + /* Since this is a move insn, we don't care about volatility. */ +- volatile_ok = 1; ++ temporary_volatile_ok v (true); + + /* Try the most limited insn first, because there's no point + including more than one in the machine description unless +@@ -1769,7 +1766,7 @@ emit_block_move_via_movmem (rtx x, rtx y, rtx size, unsigned int align, + FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT) + { + scalar_int_mode mode = mode_iter.require (); +- enum insn_code code = direct_optab_handler (movmem_optab, mode); ++ enum insn_code code = direct_optab_handler (cpymem_optab, mode); + + if (code != CODE_FOR_nothing + /* We don't need MODE to be narrower than BITS_PER_HOST_WIDE_INT +@@ -1823,14 +1820,10 @@ emit_block_move_via_movmem (rtx x, rtx y, rtx size, unsigned int align, + create_fixed_operand (&ops[8], NULL); + } + if (maybe_expand_insn (code, nops, ops)) +- { +- volatile_ok = save_volatile_ok; +- return true; +- } ++ return true; + } + } + +- volatile_ok = save_volatile_ok; + return false; + } + +@@ -5841,7 +5834,8 @@ store_expr (tree exp, rtx target, int call_param_p, + copy_blkmode_from_reg (target, temp, TREE_TYPE (exp)); + else + store_bit_field (target, +- INTVAL (expr_size (exp)) * BITS_PER_UNIT, ++ rtx_to_poly_int64 (expr_size (exp)) ++ * BITS_PER_UNIT, + 0, 0, 0, GET_MODE (temp), temp, reverse); + } + else +diff --git a/gcc/final.c b/gcc/final.c +index fefc4874b..7cf9ef1ef 100644 +--- a/gcc/final.c ++++ b/gcc/final.c +@@ -81,6 +81,7 @@ along with GCC; see the file COPYING3. If not see + #include "asan.h" + #include "rtl-iter.h" + #include "print-rtl.h" ++#include "function-abi.h" + + #ifdef XCOFF_DEBUGGING_INFO + #include "xcoffout.h" /* Needed for external data declarations. */ +@@ -230,7 +231,6 @@ static int alter_cond (rtx); + #endif + static int align_fuzz (rtx, rtx, int, unsigned); + static void collect_fn_hard_reg_usage (void); +-static tree get_call_fndecl (rtx_insn *); + + /* Initialize data in final at the beginning of a compilation. */ + +@@ -4994,7 +4994,16 @@ collect_fn_hard_reg_usage (void) + if (!targetm.call_fusage_contains_non_callee_clobbers) + return; + +- CLEAR_HARD_REG_SET (function_used_regs); ++ /* Be conservative - mark fixed and global registers as used. */ ++ function_used_regs = fixed_reg_set; ++ ++#ifdef STACK_REGS ++ /* Handle STACK_REGS conservatively, since the df-framework does not ++ provide accurate information for them. */ ++ ++ for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) ++ SET_HARD_REG_BIT (function_used_regs, i); ++#endif + + for (insn = get_insns (); insn != NULL_RTX; insn = next_insn (insn)) + { +@@ -5005,97 +5014,23 @@ collect_fn_hard_reg_usage (void) + + if (CALL_P (insn) + && !self_recursive_call_p (insn)) +- { +- if (!get_call_reg_set_usage (insn, &insn_used_regs, +- call_used_reg_set)) +- return; +- +- IOR_HARD_REG_SET (function_used_regs, insn_used_regs); +- } ++ function_used_regs ++ |= insn_callee_abi (insn).full_and_partial_reg_clobbers (); + + find_all_hard_reg_sets (insn, &insn_used_regs, false); +- IOR_HARD_REG_SET (function_used_regs, insn_used_regs); +- } ++ function_used_regs |= insn_used_regs; + +- /* Be conservative - mark fixed and global registers as used. */ +- IOR_HARD_REG_SET (function_used_regs, fixed_reg_set); +- +-#ifdef STACK_REGS +- /* Handle STACK_REGS conservatively, since the df-framework does not +- provide accurate information for them. */ +- +- for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) +- SET_HARD_REG_BIT (function_used_regs, i); +-#endif ++ if (hard_reg_set_subset_p (crtl->abi->full_and_partial_reg_clobbers (), ++ function_used_regs)) ++ return; ++ } + +- /* The information we have gathered is only interesting if it exposes a +- register from the call_used_regs that is not used in this function. */ +- if (hard_reg_set_subset_p (call_used_reg_set, function_used_regs)) +- return; ++ /* Mask out fully-saved registers, so that they don't affect equality ++ comparisons between function_abis. */ ++ function_used_regs &= crtl->abi->full_and_partial_reg_clobbers (); + + node = cgraph_node::rtl_info (current_function_decl); + gcc_assert (node != NULL); + +- COPY_HARD_REG_SET (node->function_used_regs, function_used_regs); +- node->function_used_regs_valid = 1; +-} +- +-/* Get the declaration of the function called by INSN. */ +- +-static tree +-get_call_fndecl (rtx_insn *insn) +-{ +- rtx note, datum; +- +- note = find_reg_note (insn, REG_CALL_DECL, NULL_RTX); +- if (note == NULL_RTX) +- return NULL_TREE; +- +- datum = XEXP (note, 0); +- if (datum != NULL_RTX) +- return SYMBOL_REF_DECL (datum); +- +- return NULL_TREE; +-} +- +-/* Return the cgraph_rtl_info of the function called by INSN. Returns NULL for +- call targets that can be overwritten. */ +- +-static struct cgraph_rtl_info * +-get_call_cgraph_rtl_info (rtx_insn *insn) +-{ +- tree fndecl; +- +- if (insn == NULL_RTX) +- return NULL; +- +- fndecl = get_call_fndecl (insn); +- if (fndecl == NULL_TREE +- || !decl_binds_to_current_def_p (fndecl)) +- return NULL; +- +- return cgraph_node::rtl_info (fndecl); +-} +- +-/* Find hard registers used by function call instruction INSN, and return them +- in REG_SET. Return DEFAULT_SET in REG_SET if not found. */ +- +-bool +-get_call_reg_set_usage (rtx_insn *insn, HARD_REG_SET *reg_set, +- HARD_REG_SET default_set) +-{ +- if (flag_ipa_ra) +- { +- struct cgraph_rtl_info *node = get_call_cgraph_rtl_info (insn); +- if (node != NULL +- && node->function_used_regs_valid) +- { +- COPY_HARD_REG_SET (*reg_set, node->function_used_regs); +- AND_HARD_REG_SET (*reg_set, default_set); +- return true; +- } +- } +- COPY_HARD_REG_SET (*reg_set, default_set); +- targetm.remove_extra_call_preserved_regs (insn, reg_set); +- return false; ++ node->function_used_regs = function_used_regs; + } +diff --git a/gcc/fold-const-call.c b/gcc/fold-const-call.c +index 702c8b405..e21d8e110 100644 +--- a/gcc/fold-const-call.c ++++ b/gcc/fold-const-call.c +@@ -689,6 +689,36 @@ fold_const_vec_convert (tree ret_type, tree arg) + return elts.build (); + } + ++/* Try to evaluate: ++ ++ IFN_WHILE_ULT (ARG0, ARG1, (TYPE) { ... }) ++ ++ Return the value on success and null on failure. */ ++ ++static tree ++fold_while_ult (tree type, poly_uint64 arg0, poly_uint64 arg1) ++{ ++ if (known_ge (arg0, arg1)) ++ return build_zero_cst (type); ++ ++ if (maybe_ge (arg0, arg1)) ++ return NULL_TREE; ++ ++ poly_uint64 diff = arg1 - arg0; ++ poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (type); ++ if (known_ge (diff, nelts)) ++ return build_all_ones_cst (type); ++ ++ unsigned HOST_WIDE_INT const_diff; ++ if (known_le (diff, nelts) && diff.is_constant (&const_diff)) ++ { ++ tree minus_one = build_minus_one_cst (TREE_TYPE (type)); ++ tree zero = build_zero_cst (TREE_TYPE (type)); ++ return build_vector_a_then_b (type, const_diff, minus_one, zero); ++ } ++ return NULL_TREE; ++} ++ + /* Try to evaluate: + + *RESULT = FN (*ARG) +@@ -1782,6 +1812,14 @@ fold_const_call (combined_fn fn, tree type, tree arg0, tree arg1, tree arg2) + } + return NULL_TREE; + ++ case CFN_WHILE_ULT: ++ { ++ poly_uint64 parg0, parg1; ++ if (poly_int_tree_p (arg0, &parg0) && poly_int_tree_p (arg1, &parg1)) ++ return fold_while_ult (type, parg0, parg1); ++ return NULL_TREE; ++ } ++ + default: + return fold_const_call_1 (fn, type, arg0, arg1, arg2); + } +diff --git a/gcc/fold-const.c b/gcc/fold-const.c +index c717f2450..ffc2669a7 100644 +--- a/gcc/fold-const.c ++++ b/gcc/fold-const.c +@@ -3477,7 +3477,8 @@ operand_equal_p (const_tree arg0, const_tree arg1, unsigned int flags) + return (TREE_CODE (arg0) == FUNCTION_DECL + && fndecl_built_in_p (arg0) && fndecl_built_in_p (arg1) + && DECL_BUILT_IN_CLASS (arg0) == DECL_BUILT_IN_CLASS (arg1) +- && DECL_FUNCTION_CODE (arg0) == DECL_FUNCTION_CODE (arg1)); ++ && (DECL_UNCHECKED_FUNCTION_CODE (arg0) ++ == DECL_UNCHECKED_FUNCTION_CODE (arg1))); + + case tcc_exceptional: + if (TREE_CODE (arg0) == CONSTRUCTOR) +@@ -7380,22 +7381,18 @@ native_encode_complex (const_tree expr, unsigned char *ptr, int len, int off) + return rsize + isize; + } + +- +-/* Subroutine of native_encode_expr. Encode the VECTOR_CST +- specified by EXPR into the buffer PTR of length LEN bytes. +- Return the number of bytes placed in the buffer, or zero +- upon failure. */ ++/* Like native_encode_vector, but only encode the first COUNT elements. ++ The other arguments are as for native_encode_vector. */ + + static int +-native_encode_vector (const_tree expr, unsigned char *ptr, int len, int off) ++native_encode_vector_part (const_tree expr, unsigned char *ptr, int len, ++ int off, unsigned HOST_WIDE_INT count) + { +- unsigned HOST_WIDE_INT i, count; ++ unsigned HOST_WIDE_INT i; + int size, offset; + tree itype, elem; + + offset = 0; +- if (!VECTOR_CST_NELTS (expr).is_constant (&count)) +- return 0; + itype = TREE_TYPE (TREE_TYPE (expr)); + size = GET_MODE_SIZE (SCALAR_TYPE_MODE (itype)); + for (i = 0; i < count; i++) +@@ -7419,6 +7416,20 @@ native_encode_vector (const_tree expr, unsigned char *ptr, int len, int off) + return offset; + } + ++/* Subroutine of native_encode_expr. Encode the VECTOR_CST ++ specified by EXPR into the buffer PTR of length LEN bytes. ++ Return the number of bytes placed in the buffer, or zero ++ upon failure. */ ++ ++static int ++native_encode_vector (const_tree expr, unsigned char *ptr, int len, int off) ++{ ++ unsigned HOST_WIDE_INT count; ++ if (!VECTOR_CST_NELTS (expr).is_constant (&count)) ++ return 0; ++ return native_encode_vector_part (expr, ptr, len, off, count); ++} ++ + + /* Subroutine of native_encode_expr. Encode the STRING_CST + specified by EXPR into the buffer PTR of length LEN bytes. +@@ -7714,6 +7725,113 @@ can_native_interpret_type_p (tree type) + } + } + ++/* Read a vector of type TYPE from the target memory image given by BYTES, ++ starting at byte FIRST_BYTE. The vector is known to be encodable using ++ NPATTERNS interleaved patterns with NELTS_PER_PATTERN elements each, ++ and BYTES is known to have enough bytes to supply NPATTERNS * ++ NELTS_PER_PATTERN vector elements. Each element of BYTES contains ++ BITS_PER_UNIT bits and the bytes are in target memory order. ++ ++ Return the vector on success, otherwise return null. */ ++ ++static tree ++native_decode_vector_tree (tree type, vec bytes, ++ unsigned int first_byte, unsigned int npatterns, ++ unsigned int nelts_per_pattern) ++{ ++ tree_vector_builder builder (type, npatterns, nelts_per_pattern); ++ tree elt_type = TREE_TYPE (type); ++ unsigned int elt_bits = tree_to_uhwi (TYPE_SIZE (elt_type)); ++ if (VECTOR_BOOLEAN_TYPE_P (type) && elt_bits <= BITS_PER_UNIT) ++ { ++ /* This is the only case in which elements can be smaller than a byte. ++ Element 0 is always in the lsb of the containing byte. */ ++ elt_bits = TYPE_PRECISION (elt_type); ++ for (unsigned int i = 0; i < builder.encoded_nelts (); ++i) ++ { ++ unsigned int bit_index = first_byte * BITS_PER_UNIT + i * elt_bits; ++ unsigned int byte_index = bit_index / BITS_PER_UNIT; ++ unsigned int lsb = bit_index % BITS_PER_UNIT; ++ builder.quick_push (bytes[byte_index] & (1 << lsb) ++ ? build_all_ones_cst (elt_type) ++ : build_zero_cst (elt_type)); ++ } ++ } ++ else ++ { ++ unsigned int elt_bytes = elt_bits / BITS_PER_UNIT; ++ for (unsigned int i = 0; i < builder.encoded_nelts (); ++i) ++ { ++ tree elt = native_interpret_expr (elt_type, &bytes[first_byte], ++ elt_bytes); ++ if (!elt) ++ return NULL_TREE; ++ builder.quick_push (elt); ++ first_byte += elt_bytes; ++ } ++ } ++ return builder.build (); ++} ++ ++/* Try to view-convert VECTOR_CST EXPR to VECTOR_TYPE TYPE by operating ++ directly on the VECTOR_CST encoding, in a way that works for variable- ++ length vectors. Return the resulting VECTOR_CST on success or null ++ on failure. */ ++ ++static tree ++fold_view_convert_vector_encoding (tree type, tree expr) ++{ ++ tree expr_type = TREE_TYPE (expr); ++ poly_uint64 type_bits, expr_bits; ++ if (!poly_int_tree_p (TYPE_SIZE (type), &type_bits) ++ || !poly_int_tree_p (TYPE_SIZE (expr_type), &expr_bits)) ++ return NULL_TREE; ++ ++ poly_uint64 type_units = TYPE_VECTOR_SUBPARTS (type); ++ poly_uint64 expr_units = TYPE_VECTOR_SUBPARTS (expr_type); ++ unsigned int type_elt_bits = vector_element_size (type_bits, type_units); ++ unsigned int expr_elt_bits = vector_element_size (expr_bits, expr_units); ++ ++ /* We can only preserve the semantics of a stepped pattern if the new ++ vector element is an integer of the same size. */ ++ if (VECTOR_CST_STEPPED_P (expr) ++ && (!INTEGRAL_TYPE_P (type) || type_elt_bits != expr_elt_bits)) ++ return NULL_TREE; ++ ++ /* The number of bits needed to encode one element from every pattern ++ of the original vector. */ ++ unsigned int expr_sequence_bits ++ = VECTOR_CST_NPATTERNS (expr) * expr_elt_bits; ++ ++ /* The number of bits needed to encode one element from every pattern ++ of the result. */ ++ unsigned int type_sequence_bits ++ = least_common_multiple (expr_sequence_bits, type_elt_bits); ++ ++ /* Don't try to read more bytes than are available, which can happen ++ for constant-sized vectors if TYPE has larger elements than EXPR_TYPE. ++ The general VIEW_CONVERT handling can cope with that case, so there's ++ no point complicating things here. */ ++ unsigned int nelts_per_pattern = VECTOR_CST_NELTS_PER_PATTERN (expr); ++ unsigned int buffer_bytes = CEIL (nelts_per_pattern * type_sequence_bits, ++ BITS_PER_UNIT); ++ unsigned int buffer_bits = buffer_bytes * BITS_PER_UNIT; ++ if (known_gt (buffer_bits, expr_bits)) ++ return NULL_TREE; ++ ++ /* Get enough bytes of EXPR to form the new encoding. */ ++ auto_vec buffer (buffer_bytes); ++ buffer.quick_grow (buffer_bytes); ++ if (native_encode_vector_part (expr, buffer.address (), buffer_bytes, 0, ++ buffer_bits / expr_elt_bits) ++ != (int) buffer_bytes) ++ return NULL_TREE; ++ ++ /* Reencode the bytes as TYPE. */ ++ unsigned int type_npatterns = type_sequence_bits / type_elt_bits; ++ return native_decode_vector_tree (type, buffer, 0, type_npatterns, ++ nelts_per_pattern); ++} + + /* Fold a VIEW_CONVERT_EXPR of a constant expression EXPR to type + TYPE at compile-time. If we're unable to perform the conversion +@@ -7730,6 +7848,10 @@ fold_view_convert_expr (tree type, tree expr) + if (CHAR_BIT != 8 || BITS_PER_UNIT != 8) + return NULL_TREE; + ++ if (VECTOR_TYPE_P (type) && TREE_CODE (expr) == VECTOR_CST) ++ if (tree res = fold_view_convert_vector_encoding (type, expr)) ++ return res; ++ + len = native_encode_expr (expr, buffer, sizeof (buffer)); + if (len == 0) + return NULL_TREE; +@@ -9030,7 +9152,7 @@ vec_cst_ctor_to_array (tree arg, unsigned int nelts, tree *elts) + selector. Return the folded VECTOR_CST or CONSTRUCTOR if successful, + NULL_TREE otherwise. */ + +-static tree ++tree + fold_vec_perm (tree type, tree arg0, tree arg1, const vec_perm_indices &sel) + { + unsigned int i; +@@ -9254,7 +9376,7 @@ tree_expr_nonzero_warnv_p (tree t, bool *strict_overflow_p) + tree fndecl = get_callee_fndecl (t); + if (!fndecl) return false; + if (flag_delete_null_pointer_checks && !flag_check_new +- && DECL_IS_OPERATOR_NEW (fndecl) ++ && DECL_IS_OPERATOR_NEW_P (fndecl) + && !TREE_NOTHROW (fndecl)) + return true; + if (flag_delete_null_pointer_checks +@@ -11778,7 +11900,10 @@ fold_ternary_loc (location_t loc, enum tree_code code, tree type, + return NULL_TREE; + + case VEC_PERM_EXPR: +- if (TREE_CODE (arg2) == VECTOR_CST) ++ /* Perform constant folding of BIT_INSERT_EXPR. */ ++ if (TREE_CODE (arg2) == VECTOR_CST ++ && TREE_CODE (op0) == VECTOR_CST ++ && TREE_CODE (op1) == VECTOR_CST) + { + /* Build a vector of integers from the tree mask. */ + vec_perm_builder builder; +@@ -11789,61 +11914,7 @@ fold_ternary_loc (location_t loc, enum tree_code code, tree type, + poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (type); + bool single_arg = (op0 == op1); + vec_perm_indices sel (builder, single_arg ? 1 : 2, nelts); +- +- /* Check for cases that fold to OP0 or OP1 in their original +- element order. */ +- if (sel.series_p (0, 1, 0, 1)) +- return op0; +- if (sel.series_p (0, 1, nelts, 1)) +- return op1; +- +- if (!single_arg) +- { +- if (sel.all_from_input_p (0)) +- op1 = op0; +- else if (sel.all_from_input_p (1)) +- { +- op0 = op1; +- sel.rotate_inputs (1); +- } +- } +- +- if ((TREE_CODE (op0) == VECTOR_CST +- || TREE_CODE (op0) == CONSTRUCTOR) +- && (TREE_CODE (op1) == VECTOR_CST +- || TREE_CODE (op1) == CONSTRUCTOR)) +- { +- tree t = fold_vec_perm (type, op0, op1, sel); +- if (t != NULL_TREE) +- return t; +- } +- +- bool changed = (op0 == op1 && !single_arg); +- +- /* Generate a canonical form of the selector. */ +- if (arg2 == op2 && sel.encoding () != builder) +- { +- /* Some targets are deficient and fail to expand a single +- argument permutation while still allowing an equivalent +- 2-argument version. */ +- if (sel.ninputs () == 2 +- || can_vec_perm_const_p (TYPE_MODE (type), sel, false)) +- op2 = vec_perm_indices_to_tree (TREE_TYPE (arg2), sel); +- else +- { +- vec_perm_indices sel2 (builder, 2, nelts); +- if (can_vec_perm_const_p (TYPE_MODE (type), sel2, false)) +- op2 = vec_perm_indices_to_tree (TREE_TYPE (arg2), sel2); +- else +- /* Not directly supported with either encoding, +- so use the preferred form. */ +- op2 = vec_perm_indices_to_tree (TREE_TYPE (arg2), sel); +- } +- changed = true; +- } +- +- if (changed) +- return build3_loc (loc, VEC_PERM_EXPR, type, op0, op1, op2); ++ return fold_vec_perm (type, op0, op1, sel); + } + return NULL_TREE; + +diff --git a/gcc/fold-const.h b/gcc/fold-const.h +index e2e662463..1d94e2894 100644 +--- a/gcc/fold-const.h ++++ b/gcc/fold-const.h +@@ -100,6 +100,9 @@ extern tree fold_bit_and_mask (tree, tree, enum tree_code, + tree, enum tree_code, tree, tree, + tree, enum tree_code, tree, tree, tree *); + extern tree fold_read_from_constant_string (tree); ++#if GCC_VEC_PERN_INDICES_H ++extern tree fold_vec_perm (tree, tree, tree, const vec_perm_indices &); ++#endif + extern bool wide_int_binop (wide_int &res, enum tree_code, + const wide_int &arg1, const wide_int &arg2, + signop, wi::overflow_type *); +diff --git a/gcc/function-abi.cc b/gcc/function-abi.cc +new file mode 100644 +index 000000000..b4a183963 +--- /dev/null ++++ b/gcc/function-abi.cc +@@ -0,0 +1,260 @@ ++/* Information about fuunction binary interfaces. ++ Copyright (C) 2019 Free Software Foundation, Inc. ++ ++This file is part of GCC ++ ++GCC is free software; you can redistribute it and/or modify it under ++the terms of the GNU General Public License as published by the Free ++Software Foundation; either version 3, or (at your option) any later ++version. ++ ++GCC is distributed in the hope that it will be useful, but WITHOUT ANY ++WARRANTY; without even the implied warranty of MERCHANTABILITY or ++FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "backend.h" ++#include "target.h" ++#include "rtl.h" ++#include "tree.h" ++#include "regs.h" ++#include "function-abi.h" ++#include "varasm.h" ++#include "cgraph.h" ++ ++target_function_abi_info default_target_function_abi_info; ++#if SWITCHABLE_TARGET ++target_function_abi_info *this_target_function_abi_info ++ = &default_target_function_abi_info; ++#endif ++ ++/* Initialize a predefined function ABI with the given values of ++ ID and FULL_REG_CLOBBERS. */ ++ ++void ++predefined_function_abi::initialize (unsigned int id, ++ const_hard_reg_set full_reg_clobbers) ++{ ++ m_id = id; ++ m_initialized = true; ++ m_full_reg_clobbers = full_reg_clobbers; ++ ++ /* Set up the value of m_full_and_partial_reg_clobbers. ++ ++ If the ABI specifies that part of a hard register R is call-clobbered, ++ we should be able to find a single-register mode M for which ++ targetm.hard_regno_call_part_clobbered (m_id, R, M) is true. ++ In other words, it shouldn't be the case that R can hold all ++ single-register modes across a call, but can't hold part of ++ a multi-register mode. ++ ++ If that assumption doesn't hold for a future target, we would need ++ to change the interface of TARGET_HARD_REGNO_CALL_PART_CLOBBERED so ++ that it tells us which registers in a multi-register value are ++ actually clobbered. */ ++ m_full_and_partial_reg_clobbers = full_reg_clobbers; ++ for (unsigned int i = 0; i < NUM_MACHINE_MODES; ++i) ++ { ++ machine_mode mode = (machine_mode) i; ++ for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno) ++ if (targetm.hard_regno_mode_ok (regno, mode) ++ && hard_regno_nregs (regno, mode) == 1 ++ && targetm.hard_regno_call_part_clobbered (m_id, regno, mode)) ++ SET_HARD_REG_BIT (m_full_and_partial_reg_clobbers, regno); ++ } ++ ++ /* For each mode MODE, work out which registers are unable to hold ++ any part of a MODE value across a call, i.e. those for which no ++ overlapping call-preserved (reg:MODE REGNO) exists. ++ ++ We assume that this can be flipped around to say that a call ++ preserves (reg:MODE REGNO) unless the register overlaps this set. ++ The usual reason for this being true is that if (reg:MODE REGNO) ++ contains a part-clobbered register, that register would be ++ part-clobbered regardless of which part of MODE it holds. ++ For example, if (reg:M 2) occupies two registers and if the ++ register 3 portion of it is part-clobbered, (reg:M 3) is usually ++ either invalid or also part-clobbered. */ ++ for (unsigned int i = 0; i < NUM_MACHINE_MODES; ++i) ++ { ++ machine_mode mode = (machine_mode) i; ++ m_mode_clobbers[i] = m_full_and_partial_reg_clobbers; ++ for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno) ++ if (targetm.hard_regno_mode_ok (regno, mode) ++ && !overlaps_hard_reg_set_p (m_full_reg_clobbers, mode, regno) ++ && !targetm.hard_regno_call_part_clobbered (m_id, regno, mode)) ++ remove_from_hard_reg_set (&m_mode_clobbers[i], mode, regno); ++ } ++ ++ /* Check that the assumptions above actually hold, i.e. that testing ++ for single-register modes makes sense, and that overlap tests for ++ mode_clobbers work as expected. */ ++ if (flag_checking) ++ for (unsigned int i = 0; i < NUM_MACHINE_MODES; ++i) ++ { ++ machine_mode mode = (machine_mode) i; ++ const_hard_reg_set all_clobbers = m_full_and_partial_reg_clobbers; ++ for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno) ++ if (targetm.hard_regno_mode_ok (regno, mode) ++ && !overlaps_hard_reg_set_p (m_full_reg_clobbers, mode, regno) ++ && targetm.hard_regno_call_part_clobbered (m_id, regno, mode)) ++ gcc_assert (overlaps_hard_reg_set_p (all_clobbers, mode, regno) ++ && overlaps_hard_reg_set_p (m_mode_clobbers[i], ++ mode, regno)); ++ } ++} ++ ++/* If the ABI has been initialized, add REGNO to the set of registers ++ that can be completely altered by a call. */ ++ ++void ++predefined_function_abi::add_full_reg_clobber (unsigned int regno) ++{ ++ if (!m_initialized) ++ return; ++ ++ SET_HARD_REG_BIT (m_full_reg_clobbers, regno); ++ SET_HARD_REG_BIT (m_full_and_partial_reg_clobbers, regno); ++ for (unsigned int i = 0; i < NUM_MACHINE_MODES; ++i) ++ SET_HARD_REG_BIT (m_mode_clobbers[i], regno); ++} ++ ++/* Return the set of registers that the caller of the recorded functions must ++ save in order to honor the requirements of CALLER_ABI. */ ++ ++HARD_REG_SET ++function_abi_aggregator:: ++caller_save_regs (const function_abi &caller_abi) const ++{ ++ HARD_REG_SET result; ++ CLEAR_HARD_REG_SET (result); ++ for (unsigned int abi_id = 0; abi_id < NUM_ABI_IDS; ++abi_id) ++ { ++ const predefined_function_abi &callee_abi = function_abis[abi_id]; ++ ++ /* Skip cases that clearly aren't problematic. */ ++ if (abi_id == caller_abi.id () ++ || hard_reg_set_empty_p (m_abi_clobbers[abi_id])) ++ continue; ++ ++ /* Collect the set of registers that can be "more clobbered" by ++ CALLEE_ABI than by CALLER_ABI. */ ++ HARD_REG_SET extra_clobbers; ++ CLEAR_HARD_REG_SET (extra_clobbers); ++ for (unsigned int i = 0; i < NUM_MACHINE_MODES; ++i) ++ { ++ machine_mode mode = (machine_mode) i; ++ extra_clobbers |= (callee_abi.mode_clobbers (mode) ++ & ~caller_abi.mode_clobbers (mode)); ++ } ++ ++ /* Restrict it to the set of registers that we actually saw ++ clobbers for (e.g. taking -fipa-ra into account). */ ++ result |= (extra_clobbers & m_abi_clobbers[abi_id]); ++ } ++ return result; ++} ++ ++/* Return the set of registers that cannot be used to hold a value of ++ mode MODE across the calls in a region described by ABIS and MASK, where: ++ ++ * Bit ID of ABIS is set if the region contains a call with ++ function_abi identifier ID. ++ ++ * MASK contains all the registers that are fully or partially ++ clobbered by calls in the region. ++ ++ This is not quite as accurate as testing each individual call, ++ but it's a close and conservatively-correct approximation. ++ It's much better for some targets than just using MASK. */ ++ ++HARD_REG_SET ++call_clobbers_in_region (unsigned int abis, const_hard_reg_set mask, ++ machine_mode mode) ++{ ++ HARD_REG_SET result; ++ CLEAR_HARD_REG_SET (result); ++ for (unsigned int id = 0; abis; abis >>= 1, ++id) ++ if (abis & 1) ++ result |= function_abis[id].mode_clobbers (mode); ++ return result & mask; ++} ++ ++/* Return the predefined ABI used by functions with type TYPE. */ ++ ++const predefined_function_abi & ++fntype_abi (const_tree type) ++{ ++ gcc_assert (FUNC_OR_METHOD_TYPE_P (type)); ++ if (targetm.calls.fntype_abi) ++ return targetm.calls.fntype_abi (type); ++ return default_function_abi; ++} ++ ++/* Return the ABI of function decl FNDECL. */ ++ ++function_abi ++fndecl_abi (const_tree fndecl) ++{ ++ gcc_assert (TREE_CODE (fndecl) == FUNCTION_DECL); ++ const predefined_function_abi &base_abi = fntype_abi (TREE_TYPE (fndecl)); ++ ++ if (flag_ipa_ra && decl_binds_to_current_def_p (fndecl)) ++ if (cgraph_rtl_info *info = cgraph_node::rtl_info (fndecl)) ++ return function_abi (base_abi, info->function_used_regs); ++ ++ return base_abi; ++} ++ ++/* Return the ABI of the function called by INSN. */ ++ ++function_abi ++insn_callee_abi (const rtx_insn *insn) ++{ ++ gcc_assert (insn && CALL_P (insn)); ++ ++ if (flag_ipa_ra) ++ if (tree fndecl = get_call_fndecl (insn)) ++ return fndecl_abi (fndecl); ++ ++ if (targetm.calls.insn_callee_abi) ++ return targetm.calls.insn_callee_abi (insn); ++ ++ return default_function_abi; ++} ++ ++/* Return the ABI of the function called by CALL_EXPR EXP. Return the ++ default ABI for erroneous calls. */ ++ ++function_abi ++expr_callee_abi (const_tree exp) ++{ ++ gcc_assert (TREE_CODE (exp) == CALL_EXPR); ++ ++ if (tree fndecl = get_callee_fndecl (exp)) ++ return fndecl_abi (fndecl); ++ ++ tree callee = CALL_EXPR_FN (exp); ++ if (callee == error_mark_node) ++ return default_function_abi; ++ ++ tree type = TREE_TYPE (callee); ++ if (type == error_mark_node) ++ return default_function_abi; ++ ++ if (POINTER_TYPE_P (type)) ++ { ++ type = TREE_TYPE (type); ++ if (type == error_mark_node) ++ return default_function_abi; ++ } ++ ++ return fntype_abi (type); ++} +diff --git a/gcc/function-abi.h b/gcc/function-abi.h +new file mode 100644 +index 000000000..96a49dfbe +--- /dev/null ++++ b/gcc/function-abi.h +@@ -0,0 +1,320 @@ ++/* Information about fuunction binary interfaces. ++ Copyright (C) 2019 Free Software Foundation, Inc. ++ ++This file is part of GCC ++ ++GCC is free software; you can redistribute it and/or modify it under ++the terms of the GNU General Public License as published by the Free ++Software Foundation; either version 3, or (at your option) any later ++version. ++ ++GCC is distributed in the hope that it will be useful, but WITHOUT ANY ++WARRANTY; without even the implied warranty of MERCHANTABILITY or ++FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++#ifndef GCC_FUNCTION_ABI_H ++#define GCC_FUNCTION_ABI_H ++ ++/* Most targets use the same ABI for all functions in a translation ++ unit, but some targets support interoperability between several ABIs. ++ Each such ABI has a unique 0-based identifier, with 0 always being ++ the default choice of ABI. ++ ++ NUM_ABI_IDS is the maximum number of such ABIs that GCC can handle at once. ++ A bitfield with this number of bits can represent any combinaion of the ++ supported ABIs. */ ++const size_t NUM_ABI_IDS = 8; ++ ++/* Information about one of the target's predefined ABIs. */ ++class predefined_function_abi ++{ ++public: ++ /* A target-specific identifier for this ABI. The value must be in ++ the range [0, NUM_ABI_IDS - 1]. */ ++ unsigned int id () const { return m_id; } ++ ++ /* True if this ABI has been initialized. */ ++ bool initialized_p () const { return m_initialized; } ++ ++ /* Return true if a function call is allowed to alter every bit of ++ register REGNO, so that the register contains an arbitrary value ++ on return. If so, the register cannot hold any part of a value ++ that is live across a call. */ ++ bool ++ clobbers_full_reg_p (unsigned int regno) const ++ { ++ return TEST_HARD_REG_BIT (m_full_reg_clobbers, regno); ++ } ++ ++ /* Return true if a function call is allowed to alter some or all bits ++ of register REGNO. ++ ++ This is true whenever clobbers_full_reg_p (REGNO) is true. It is ++ also true if, for example, the ABI says that a call must preserve the ++ low 32 or 64 bits of REGNO, but can clobber the upper bits of REGNO. ++ In the latter case, it is possible for REGNO to hold values that ++ are live across a call, provided that the value occupies only the ++ call-preserved part of the register. */ ++ bool ++ clobbers_at_least_part_of_reg_p (unsigned int regno) const ++ { ++ return TEST_HARD_REG_BIT (m_full_and_partial_reg_clobbers, regno); ++ } ++ ++ /* Return true if a function call is allowed to clobber at least part ++ of (reg:MODE REGNO). If so, it is not possible for the register ++ as a whole to be live across a call. */ ++ bool ++ clobbers_reg_p (machine_mode mode, unsigned int regno) const ++ { ++ return overlaps_hard_reg_set_p (m_mode_clobbers[mode], mode, regno); ++ } ++ ++ /* Return the set of registers that a function call is allowed to ++ alter completely, so that the registers contain arbitrary values ++ on return. This doesn't include registers that a call can only ++ partly clobber (as per TARGET_HARD_REGNO_CALL_PART_CLOBBERED). ++ ++ These registers cannot hold any part of a value that is live across ++ a call. */ ++ HARD_REG_SET full_reg_clobbers () const { return m_full_reg_clobbers; } ++ ++ /* Return the set of registers that a function call is allowed to alter ++ to some degree. For example, if an ABI says that a call must preserve ++ the low 32 or 64 bits of a register R, but can clobber the upper bits ++ of R, R would be in this set but not in full_reg_clobbers (). ++ ++ This set is a superset of full_reg_clobbers (). It is possible for a ++ register in full_and_partial_reg_clobbers () & ~full_reg_clobbers () ++ to contain values that are live across a call, provided that the live ++ value only occupies the call-preserved part of the register. */ ++ HARD_REG_SET ++ full_and_partial_reg_clobbers () const ++ { ++ return m_full_and_partial_reg_clobbers; ++ } ++ ++ /* Return the set of registers that cannot be used to hold a value of ++ mode MODE across a function call. That is: ++ ++ (reg:REGNO MODE) ++ ++ might be clobbered by a call whenever: ++ ++ overlaps_hard_reg_set (mode_clobbers (MODE), MODE, REGNO) ++ ++ In allocation terms, the registers in the returned set conflict ++ with any value of mode MODE that is live across a call. */ ++ HARD_REG_SET ++ mode_clobbers (machine_mode mode) const ++ { ++ return m_mode_clobbers[mode]; ++ } ++ ++ void initialize (unsigned int, const_hard_reg_set); ++ void add_full_reg_clobber (unsigned int); ++ ++private: ++ unsigned int m_id : NUM_ABI_IDS; ++ unsigned int m_initialized : 1; ++ HARD_REG_SET m_full_reg_clobbers; ++ HARD_REG_SET m_full_and_partial_reg_clobbers; ++ HARD_REG_SET m_mode_clobbers[NUM_MACHINE_MODES]; ++}; ++ ++/* Describes either a predefined ABI or the ABI of a particular function. ++ In the latter case, the ABI might make use of extra function-specific ++ information, such as for -fipa-ra. */ ++class function_abi ++{ ++public: ++ /* Initialize the structure for a general function with the given ABI. */ ++ function_abi (const predefined_function_abi &base_abi) ++ : m_base_abi (&base_abi), ++ m_mask (base_abi.full_and_partial_reg_clobbers ()) {} ++ ++ /* Initialize the structure for a function that has the given ABI and ++ that is known not to clobber registers outside MASK. */ ++ function_abi (const predefined_function_abi &base_abi, ++ const_hard_reg_set mask) ++ : m_base_abi (&base_abi), m_mask (mask) {} ++ ++ /* The predefined ABI from which this ABI is derived. */ ++ const predefined_function_abi &base_abi () const { return *m_base_abi; } ++ ++ /* The target-specific identifier of the predefined ABI. */ ++ unsigned int id () const { return m_base_abi->id (); } ++ ++ /* See the corresponding predefined_function_abi functions for ++ details about the following functions. */ ++ ++ HARD_REG_SET ++ full_reg_clobbers () const ++ { ++ return m_mask & m_base_abi->full_reg_clobbers (); ++ } ++ ++ HARD_REG_SET ++ full_and_partial_reg_clobbers () const ++ { ++ return m_mask & m_base_abi->full_and_partial_reg_clobbers (); ++ } ++ ++ HARD_REG_SET ++ mode_clobbers (machine_mode mode) const ++ { ++ return m_mask & m_base_abi->mode_clobbers (mode); ++ } ++ ++ bool ++ clobbers_full_reg_p (unsigned int regno) const ++ { ++ return (TEST_HARD_REG_BIT (m_mask, regno) ++ & m_base_abi->clobbers_full_reg_p (regno)); ++ } ++ ++ bool ++ clobbers_at_least_part_of_reg_p (unsigned int regno) const ++ { ++ return (TEST_HARD_REG_BIT (m_mask, regno) ++ & m_base_abi->clobbers_at_least_part_of_reg_p (regno)); ++ } ++ ++ bool ++ clobbers_reg_p (machine_mode mode, unsigned int regno) const ++ { ++ return overlaps_hard_reg_set_p (mode_clobbers (mode), mode, regno); ++ } ++ ++ bool ++ operator== (const function_abi &other) const ++ { ++ return m_base_abi == other.m_base_abi && m_mask == other.m_mask; ++ } ++ ++ bool ++ operator!= (const function_abi &other) const ++ { ++ return !operator== (other); ++ } ++ ++protected: ++ const predefined_function_abi *m_base_abi; ++ HARD_REG_SET m_mask; ++}; ++ ++/* This class collects information about the ABIs of functions that are ++ called in a particular region of code. It is mostly intended to be ++ used as a local variable during an IR walk. */ ++class function_abi_aggregator ++{ ++public: ++ function_abi_aggregator () : m_abi_clobbers () {} ++ ++ /* Record that the code region calls a function with the given ABI. */ ++ void ++ note_callee_abi (const function_abi &abi) ++ { ++ m_abi_clobbers[abi.id ()] |= abi.full_and_partial_reg_clobbers (); ++ } ++ ++ HARD_REG_SET caller_save_regs (const function_abi &) const; ++ ++private: ++ HARD_REG_SET m_abi_clobbers[NUM_ABI_IDS]; ++}; ++ ++struct target_function_abi_info ++{ ++ /* An array of all the target ABIs that are available in this ++ translation unit. Not all entries are used for all targets, ++ but the structures are relatively small, and using a fixed-size ++ array avoids extra indirection. ++ ++ There are various ways of getting an ABI descriptor: ++ ++ * fndecl_abi (FNDECL) is the ABI of function FNDECL. ++ ++ * fntype_abi (FNTYPE) is the ABI of a function with type FNTYPE. ++ ++ * crtl->abi is the ABI of the function that we are currently ++ compiling to rtl. ++ ++ * insn_callee_abi (INSN) is the ABI used by the target of call insn INSN. ++ ++ * eh_edge_abi is the "ABI" used when taking an EH edge from an ++ exception-throwing statement to an exception handler. Catching ++ exceptions from calls can be treated as an abnormal return from ++ those calls, and this ABI therefore describes the ABI of functions ++ on such an abnormal return. Statements that throw non-call ++ exceptions can be treated as being implicitly wrapped in a call ++ that has such an abnormal return. ++ ++ At present, no target needs to support more than one EH ABI. ++ ++ * function_abis[N] is the ABI with identifier N. This can be useful ++ when referring back to ABIs that have been collected by number in ++ a bitmask, such as after walking function calls in a particular ++ region of code. ++ ++ * default_function_abi refers specifically to the target's default ++ choice of ABI, regardless of which (if any) functions actually ++ use it. This ABI and data derived from it do *not* provide ++ globally conservatively-correct information, so it is only ++ useful in very specific circumstances. */ ++ predefined_function_abi x_function_abis[NUM_ABI_IDS]; ++}; ++ ++extern target_function_abi_info default_target_function_abi_info; ++#if SWITCHABLE_TARGET ++extern target_function_abi_info *this_target_function_abi_info; ++#else ++#define this_target_function_abi_info (&default_target_function_abi_info) ++#endif ++ ++/* See the comment above x_function_abis for when these macros should be used. ++ At present, eh_edge_abi is always the default ABI, but that could change ++ in future if a target needs it to. */ ++#define function_abis \ ++ (this_target_function_abi_info->x_function_abis) ++#define default_function_abi \ ++ (this_target_function_abi_info->x_function_abis[0]) ++#define eh_edge_abi default_function_abi ++ ++extern HARD_REG_SET call_clobbers_in_region (unsigned int, const_hard_reg_set, ++ machine_mode mode); ++ ++/* Return true if (reg:MODE REGNO) might be clobbered by one of the ++ calls in a region described by ABIS and MASK, where: ++ ++ * Bit ID of ABIS is set if the region contains a call with ++ function_abi identifier ID. ++ ++ * MASK contains all the registers that are fully or partially ++ clobbered by calls in the region. ++ ++ This is not quite as accurate as testing each individual call, ++ but it's a close and conservatively-correct approximation. ++ It's much better for some targets than: ++ ++ overlaps_hard_reg_set_p (MASK, MODE, REGNO). */ ++ ++inline bool ++call_clobbered_in_region_p (unsigned int abis, const_hard_reg_set mask, ++ machine_mode mode, unsigned int regno) ++{ ++ HARD_REG_SET clobbers = call_clobbers_in_region (abis, mask, mode); ++ return overlaps_hard_reg_set_p (clobbers, mode, regno); ++} ++ ++extern const predefined_function_abi &fntype_abi (const_tree); ++extern function_abi fndecl_abi (const_tree); ++extern function_abi insn_callee_abi (const rtx_insn *); ++extern function_abi expr_callee_abi (const_tree); ++ ++#endif +diff --git a/gcc/function.c b/gcc/function.c +index acf9f9e60..6d5574244 100644 +--- a/gcc/function.c ++++ b/gcc/function.c +@@ -79,6 +79,7 @@ along with GCC; see the file COPYING3. If not see + #include "attribs.h" + #include "gimple.h" + #include "options.h" ++#include "function-abi.h" + + /* So we can assign to cfun in this file. */ + #undef cfun +@@ -2121,7 +2122,7 @@ aggregate_value_p (const_tree exp, const_tree fntype) + regno = REGNO (reg); + nregs = hard_regno_nregs (regno, TYPE_MODE (type)); + for (i = 0; i < nregs; i++) +- if (! call_used_regs[regno + i]) ++ if (! call_used_or_fixed_reg_p (regno + i)) + return 1; + + return 0; +@@ -2454,13 +2455,15 @@ assign_parm_find_data_types (struct assign_parm_data_all *all, tree parm, + passed_type = TREE_TYPE (first_field (passed_type)); + + /* See if this arg was passed by invisible reference. */ +- if (pass_by_reference (&all->args_so_far_v, passed_mode, +- passed_type, data->named_arg)) +- { +- passed_type = nominal_type = build_pointer_type (passed_type); +- data->passed_pointer = true; +- passed_mode = nominal_mode = TYPE_MODE (nominal_type); +- } ++ { ++ function_arg_info arg (passed_type, passed_mode, data->named_arg); ++ if (apply_pass_by_reference_rules (&all->args_so_far_v, arg)) ++ { ++ passed_type = nominal_type = arg.type; ++ data->passed_pointer = true; ++ passed_mode = nominal_mode = arg.mode; ++ } ++ } + + /* Find mode as it is passed by the ABI. */ + unsignedp = TYPE_UNSIGNED (passed_type); +@@ -2483,9 +2486,9 @@ assign_parms_setup_varargs (struct assign_parm_data_all *all, + { + int varargs_pretend_bytes = 0; + +- targetm.calls.setup_incoming_varargs (all->args_so_far, +- data->promoted_mode, +- data->passed_type, ++ function_arg_info last_named_arg (data->passed_type, data->promoted_mode, ++ /*named=*/true); ++ targetm.calls.setup_incoming_varargs (all->args_so_far, last_named_arg, + &varargs_pretend_bytes, no_rtl); + + /* If the back-end has requested extra stack space, record how much is +@@ -2515,11 +2518,9 @@ assign_parm_find_entry_rtl (struct assign_parm_data_all *all, + targetm.calls.warn_parameter_passing_abi (all->args_so_far, + data->passed_type); + +- entry_parm = targetm.calls.function_incoming_arg (all->args_so_far, +- data->promoted_mode, +- data->passed_type, +- data->named_arg); +- ++ function_arg_info arg (data->passed_type, data->promoted_mode, ++ data->named_arg); ++ entry_parm = targetm.calls.function_incoming_arg (all->args_so_far, arg); + if (entry_parm == 0) + data->promoted_mode = data->passed_mode; + +@@ -2542,27 +2543,26 @@ assign_parm_find_entry_rtl (struct assign_parm_data_all *all, + if (targetm.calls.pretend_outgoing_varargs_named (all->args_so_far)) + { + rtx tem; ++ function_arg_info named_arg (data->passed_type, data->promoted_mode, ++ /*named=*/true); + tem = targetm.calls.function_incoming_arg (all->args_so_far, +- data->promoted_mode, +- data->passed_type, true); ++ named_arg); + in_regs = tem != NULL; + } + } + + /* If this parameter was passed both in registers and in the stack, use + the copy on the stack. */ +- if (targetm.calls.must_pass_in_stack (data->promoted_mode, +- data->passed_type)) ++ if (targetm.calls.must_pass_in_stack (arg)) + entry_parm = 0; + + if (entry_parm) + { + int partial; + +- partial = targetm.calls.arg_partial_bytes (all->args_so_far, +- data->promoted_mode, +- data->passed_type, +- data->named_arg); ++ function_arg_info arg (data->passed_type, data->promoted_mode, ++ data->named_arg); ++ partial = targetm.calls.arg_partial_bytes (all->args_so_far, arg); + data->partial = partial; + + /* The caller might already have allocated stack space for the +@@ -3226,8 +3226,7 @@ assign_parm_setup_reg (struct assign_parm_data_all *all, tree parm, + for (insn = insns; insn && moved; insn = NEXT_INSN (insn)) + { + if (INSN_P (insn)) +- note_stores (PATTERN (insn), record_hard_reg_sets, +- &hardregs); ++ note_stores (insn, record_hard_reg_sets, &hardregs); + if (!hard_reg_set_empty_p (hardregs)) + moved = false; + } +@@ -3647,8 +3646,9 @@ assign_parms (tree fndecl) + assign_parms_setup_varargs (&all, &data, false); + + /* Update info on where next arg arrives in registers. */ +- targetm.calls.function_arg_advance (all.args_so_far, data.promoted_mode, +- data.passed_type, data.named_arg); ++ function_arg_info arg (data.passed_type, data.promoted_mode, ++ data.named_arg); ++ targetm.calls.function_arg_advance (all.args_so_far, arg); + } + + if (targetm.calls.split_complex_arg) +@@ -3835,8 +3835,9 @@ gimplify_parameters (gimple_seq *cleanup) + continue; + + /* Update info on where next arg arrives in registers. */ +- targetm.calls.function_arg_advance (all.args_so_far, data.promoted_mode, +- data.passed_type, data.named_arg); ++ function_arg_info arg (data.passed_type, data.promoted_mode, ++ data.named_arg); ++ targetm.calls.function_arg_advance (all.args_so_far, arg); + + /* ??? Once upon a time variable_size stuffed parameter list + SAVE_EXPRs (amongst others) onto a pending sizes list. This +@@ -3854,8 +3855,8 @@ gimplify_parameters (gimple_seq *cleanup) + if (data.passed_pointer) + { + tree type = TREE_TYPE (data.passed_type); +- if (reference_callee_copied (&all.args_so_far_v, TYPE_MODE (type), +- type, data.named_arg)) ++ function_arg_info orig_arg (type, data.named_arg); ++ if (reference_callee_copied (&all.args_so_far_v, orig_arg)) + { + tree local, t; + +@@ -4823,6 +4824,12 @@ static void + prepare_function_start (void) + { + gcc_assert (!get_last_insn ()); ++ ++ if (in_dummy_function) ++ crtl->abi = &default_function_abi; ++ else ++ crtl->abi = &fndecl_abi (cfun->decl).base_abi (); ++ + init_temp_slots (); + init_emit (); + init_varasm_status (); +diff --git a/gcc/fwprop.c b/gcc/fwprop.c +index f2966fada..e6f375271 100644 +--- a/gcc/fwprop.c ++++ b/gcc/fwprop.c +@@ -740,7 +740,7 @@ propagate_rtx (rtx x, machine_mode mode, rtx old_rtx, rtx new_rtx, + || CONSTANT_P (new_rtx) + || (GET_CODE (new_rtx) == SUBREG + && REG_P (SUBREG_REG (new_rtx)) +- && !paradoxical_subreg_p (mode, GET_MODE (SUBREG_REG (new_rtx))))) ++ && !paradoxical_subreg_p (new_rtx))) + flags |= PR_CAN_APPEAR; + if (!varying_mem_p (new_rtx)) + flags |= PR_HANDLE_MEM; +diff --git a/gcc/gcc.c b/gcc/gcc.c +index 4f57765b0..1a5ad7db3 100644 +--- a/gcc/gcc.c ++++ b/gcc/gcc.c +@@ -4041,6 +4041,10 @@ driver_handle_option (struct gcc_options *opts, + diagnostic_color_init (dc, value); + break; + ++ case OPT_fdiagnostics_urls_: ++ diagnostic_urls_init (dc, value); ++ break; ++ + case OPT_fdiagnostics_format_: + diagnostic_output_format_init (dc, + (enum diagnostics_output_format)value); +@@ -7438,6 +7442,7 @@ driver::global_initializations () + + diagnostic_initialize (global_dc, 0); + diagnostic_color_init (global_dc); ++ diagnostic_urls_init (global_dc); + + #ifdef GCC_DRIVER_HOST_INITIALIZATION + /* Perform host dependent initialization when needed. */ +diff --git a/gcc/gcse-common.c b/gcc/gcse-common.c +index e6e4b642b..55148623f 100644 +--- a/gcc/gcse-common.c ++++ b/gcc/gcse-common.c +@@ -89,7 +89,7 @@ record_last_mem_set_info_common (rtx_insn *insn, + struct gcse_note_stores_info data; + data.insn = insn; + data.canon_mem_list = canon_modify_mem_list; +- note_stores (PATTERN (insn), canon_list_insert, (void*) &data); ++ note_stores (insn, canon_list_insert, (void*) &data); + } + } + +diff --git a/gcc/gcse.c b/gcc/gcse.c +index 7fbdd6750..373ba7a16 100644 +--- a/gcc/gcse.c ++++ b/gcc/gcse.c +@@ -1049,7 +1049,7 @@ load_killed_in_block_p (const_basic_block bb, int uid_limit, const_rtx x, + note_stores to examine each hunk of memory that is modified. */ + mci.mem = x; + mci.conflict = false; +- note_stores (PATTERN (setter), mems_conflict_for_gcse_p, &mci); ++ note_stores (setter, mems_conflict_for_gcse_p, &mci); + if (mci.conflict) + return 1; + } +@@ -1537,7 +1537,7 @@ compute_hash_table_work (struct gcse_hash_table_d *table) + record_last_mem_set_info (insn); + } + +- note_stores (PATTERN (insn), record_last_set_info, insn); ++ note_stores (insn, record_last_set_info, insn); + } + + /* The next pass builds the hash table. */ +@@ -2415,7 +2415,7 @@ single_set_gcse (rtx_insn *insn) + + s.insn = insn; + s.nsets = 0; +- note_stores (pattern, record_set_data, &s); ++ note_pattern_stores (pattern, record_set_data, &s); + + /* Considered invariant insns have exactly one set. */ + gcc_assert (s.nsets == 1); +diff --git a/gcc/genconfig.c b/gcc/genconfig.c +index 194fe950d..6f914b1e4 100644 +--- a/gcc/genconfig.c ++++ b/gcc/genconfig.c +@@ -72,7 +72,6 @@ walk_insn_part (rtx part, int recog_p, int non_pc_set_src) + switch (code) + { + case CLOBBER: +- case CLOBBER_HIGH: + clobbers_seen_this_insn++; + break; + +diff --git a/gcc/genemit.c b/gcc/genemit.c +index 83f86a35c..e03af01f2 100644 +--- a/gcc/genemit.c ++++ b/gcc/genemit.c +@@ -169,15 +169,6 @@ gen_exp (rtx x, enum rtx_code subroutine_type, char *used, md_rtx_info *info) + return; + } + break; +- case CLOBBER_HIGH: +- if (!REG_P (XEXP (x, 0))) +- error ("CLOBBER_HIGH argument is not a register expr, at %s:%d", +- info->loc.filename, info->loc.lineno); +- printf ("gen_hard_reg_clobber_high (%smode, %i)", +- GET_MODE_NAME (GET_MODE (XEXP (x, 0))), +- REGNO (XEXP (x, 0))); +- return; +- break; + case CC0: + printf ("cc0_rtx"); + return; +@@ -343,8 +334,7 @@ gen_insn (md_rtx_info *info) + + for (i = XVECLEN (insn, 1) - 1; i > 0; i--) + { +- if (GET_CODE (XVECEXP (insn, 1, i)) != CLOBBER +- && GET_CODE (XVECEXP (insn, 1, i)) != CLOBBER_HIGH) ++ if (GET_CODE (XVECEXP (insn, 1, i)) != CLOBBER) + break; + + if (REG_P (XEXP (XVECEXP (insn, 1, i), 0))) +@@ -811,42 +801,45 @@ handle_overloaded_code_for (overloaded_name *oname) + static void + handle_overloaded_gen (overloaded_name *oname) + { ++ unsigned HOST_WIDE_INT seen = 0; + /* All patterns must have the same number of operands. */ +- pattern_stats stats; +- get_pattern_stats (&stats, XVEC (oname->first_instance->insn, 1)); + for (overloaded_instance *instance = oname->first_instance->next; + instance; instance = instance->next) + { +- pattern_stats stats2; +- get_pattern_stats (&stats2, XVEC (instance->insn, 1)); +- if (stats.num_generator_args != stats2.num_generator_args) +- fatal_at (get_file_location (instance->insn), +- "inconsistent number of operands for '%s'; " +- "this instance has %d, but previous instances had %d", +- oname->name, stats2.num_generator_args, +- stats.num_generator_args); ++ pattern_stats stats; ++ get_pattern_stats (&stats, XVEC (instance->insn, 1)); ++ unsigned HOST_WIDE_INT mask ++ = HOST_WIDE_INT_1U << stats.num_generator_args; ++ if (seen & mask) ++ continue; ++ ++ seen |= mask; ++ ++ /* Print the function prototype. */ ++ printf ("\nrtx\nmaybe_gen_%s (", oname->name); ++ print_overload_arguments (oname); ++ for (int i = 0; i < stats.num_generator_args; ++i) ++ printf (", rtx x%d", i); ++ printf (")\n{\n"); ++ ++ /* Use maybe_code_for_*, instead of duplicating the selection ++ logic here. */ ++ printf (" insn_code code = maybe_code_for_%s (", oname->name); ++ for (unsigned int i = 0; i < oname->arg_types.length (); ++i) ++ printf ("%sarg%d", i == 0 ? "" : ", ", i); ++ printf (");\n" ++ " if (code != CODE_FOR_nothing)\n" ++ " {\n" ++ " gcc_assert (insn_data[code].n_generator_args == %d);\n" ++ " return GEN_FCN (code) (", stats.num_generator_args); ++ for (int i = 0; i < stats.num_generator_args; ++i) ++ printf ("%sx%d", i == 0 ? "" : ", ", i); ++ printf (");\n" ++ " }\n" ++ " else\n" ++ " return NULL_RTX;\n" ++ "}\n"); + } +- +- /* Print the function prototype. */ +- printf ("\nrtx\nmaybe_gen_%s (", oname->name); +- print_overload_arguments (oname); +- for (int i = 0; i < stats.num_generator_args; ++i) +- printf (", rtx x%d", i); +- printf (")\n{\n"); +- +- /* Use maybe_code_for_*, instead of duplicating the selection logic here. */ +- printf (" insn_code code = maybe_code_for_%s (", oname->name); +- for (unsigned int i = 0; i < oname->arg_types.length (); ++i) +- printf ("%sarg%d", i == 0 ? "" : ", ", i); +- printf (");\n" +- " if (code != CODE_FOR_nothing)\n" +- " return GEN_FCN (code) ("); +- for (int i = 0; i < stats.num_generator_args; ++i) +- printf ("%sx%d", i == 0 ? "" : ", ", i); +- printf (");\n" +- " else\n" +- " return NULL_RTX;\n" +- "}\n"); + } + + int +diff --git a/gcc/generic-match-head.c b/gcc/generic-match-head.c +index 3478cf59f..e9ef343c9 100644 +--- a/gcc/generic-match-head.c ++++ b/gcc/generic-match-head.c +@@ -27,6 +27,7 @@ along with GCC; see the file COPYING3. If not see + #include "gimple.h" + #include "ssa.h" + #include "cgraph.h" ++#include "vec-perm-indices.h" + #include "fold-const.h" + #include "fold-const-call.h" + #include "stor-layout.h" +diff --git a/gcc/genmodes.c b/gcc/genmodes.c +index f33eefa24..95522d6b5 100644 +--- a/gcc/genmodes.c ++++ b/gcc/genmodes.c +@@ -53,6 +53,7 @@ struct mode_data + + const char *name; /* printable mode name -- SI, not SImode */ + enum mode_class cl; /* this mode class */ ++ unsigned int order; /* top-level sorting order */ + unsigned int precision; /* size in bits, equiv to TYPE_PRECISION */ + unsigned int bytesize; /* storage size in addressable units */ + unsigned int ncomponents; /* number of subunits */ +@@ -85,7 +86,7 @@ static struct mode_data *void_mode; + + static const struct mode_data blank_mode = { + 0, "", MAX_MODE_CLASS, +- -1U, -1U, -1U, -1U, ++ 0, -1U, -1U, -1U, -1U, + 0, 0, 0, 0, 0, 0, + "", 0, 0, 0, 0, false, false, 0 + }; +@@ -484,14 +485,15 @@ make_complex_modes (enum mode_class cl, + } + } + +-/* For all modes in class CL, construct vector modes of width +- WIDTH, having as many components as necessary. */ +-#define VECTOR_MODES_WITH_PREFIX(PREFIX, C, W) \ +- make_vector_modes (MODE_##C, #PREFIX, W, __FILE__, __LINE__) +-#define VECTOR_MODES(C, W) VECTOR_MODES_WITH_PREFIX (V, C, W) ++/* For all modes in class CL, construct vector modes of width WIDTH, ++ having as many components as necessary. ORDER is the sorting order ++ of the mode, with smaller numbers indicating a higher priority. */ ++#define VECTOR_MODES_WITH_PREFIX(PREFIX, C, W, ORDER) \ ++ make_vector_modes (MODE_##C, #PREFIX, W, ORDER, __FILE__, __LINE__) ++#define VECTOR_MODES(C, W) VECTOR_MODES_WITH_PREFIX (V, C, W, 0) + static void ATTRIBUTE_UNUSED + make_vector_modes (enum mode_class cl, const char *prefix, unsigned int width, +- const char *file, unsigned int line) ++ unsigned int order, const char *file, unsigned int line) + { + struct mode_data *m; + struct mode_data *v; +@@ -530,6 +532,7 @@ make_vector_modes (enum mode_class cl, const char *prefix, unsigned int width, + } + + v = new_mode (vclass, xstrdup (buf), file, line); ++ v->order = order; + v->component = m; + v->ncomponents = ncomponents; + } +@@ -832,6 +835,11 @@ cmp_modes (const void *a, const void *b) + const struct mode_data *const m = *(const struct mode_data *const*)a; + const struct mode_data *const n = *(const struct mode_data *const*)b; + ++ if (m->order > n->order) ++ return 1; ++ else if (m->order < n->order) ++ return -1; ++ + if (m->bytesize > n->bytesize) + return 1; + else if (m->bytesize < n->bytesize) +diff --git a/gcc/genopinit.c b/gcc/genopinit.c +index ea4c3ce01..1dd1d82d0 100644 +--- a/gcc/genopinit.c ++++ b/gcc/genopinit.c +@@ -134,31 +134,43 @@ handle_overloaded_code_for (FILE *file, overloaded_name *oname) + static void + handle_overloaded_gen (FILE *file, overloaded_name *oname) + { +- pattern_stats stats; +- get_pattern_stats (&stats, XVEC (oname->first_instance->insn, 1)); +- +- fprintf (file, "\nextern rtx maybe_gen_%s (", oname->name); +- for (unsigned int i = 0; i < oname->arg_types.length (); ++i) +- fprintf (file, "%s%s", i == 0 ? "" : ", ", oname->arg_types[i]); +- for (int i = 0; i < stats.num_generator_args; ++i) +- fprintf (file, ", rtx"); +- fprintf (file, ");\n"); +- +- fprintf (file, "inline rtx\ngen_%s (", oname->name); +- for (unsigned int i = 0; i < oname->arg_types.length (); ++i) +- fprintf (file, "%s%s arg%d", i == 0 ? "" : ", ", oname->arg_types[i], i); +- for (int i = 0; i < stats.num_generator_args; ++i) +- fprintf (file, ", rtx x%d", i); +- fprintf (file, ")\n{\n rtx res = maybe_gen_%s (", oname->name); +- for (unsigned int i = 0; i < oname->arg_types.length (); ++i) +- fprintf (file, "%sarg%d", i == 0 ? "" : ", ", i); +- for (int i = 0; i < stats.num_generator_args; ++i) +- fprintf (file, ", x%d", i); +- fprintf (file, +- ");\n" +- " gcc_assert (res);\n" +- " return res;\n" +- "}\n"); ++ unsigned HOST_WIDE_INT seen = 0; ++ for (overloaded_instance *instance = oname->first_instance->next; ++ instance; instance = instance->next) ++ { ++ pattern_stats stats; ++ get_pattern_stats (&stats, XVEC (instance->insn, 1)); ++ unsigned HOST_WIDE_INT mask ++ = HOST_WIDE_INT_1U << stats.num_generator_args; ++ if (seen & mask) ++ continue; ++ ++ seen |= mask; ++ ++ fprintf (file, "\nextern rtx maybe_gen_%s (", oname->name); ++ for (unsigned int i = 0; i < oname->arg_types.length (); ++i) ++ fprintf (file, "%s%s", i == 0 ? "" : ", ", oname->arg_types[i]); ++ for (int i = 0; i < stats.num_generator_args; ++i) ++ fprintf (file, ", rtx"); ++ fprintf (file, ");\n"); ++ ++ fprintf (file, "inline rtx\ngen_%s (", oname->name); ++ for (unsigned int i = 0; i < oname->arg_types.length (); ++i) ++ fprintf (file, "%s%s arg%d", i == 0 ? "" : ", ", ++ oname->arg_types[i], i); ++ for (int i = 0; i < stats.num_generator_args; ++i) ++ fprintf (file, ", rtx x%d", i); ++ fprintf (file, ")\n{\n rtx res = maybe_gen_%s (", oname->name); ++ for (unsigned int i = 0; i < oname->arg_types.length (); ++i) ++ fprintf (file, "%sarg%d", i == 0 ? "" : ", ", i); ++ for (int i = 0; i < stats.num_generator_args; ++i) ++ fprintf (file, ", x%d", i); ++ fprintf (file, ++ ");\n" ++ " gcc_assert (res);\n" ++ " return res;\n" ++ "}\n"); ++ } + } + + int +diff --git a/gcc/genrecog.c b/gcc/genrecog.c +index 90e2508fa..ec921702a 100644 +--- a/gcc/genrecog.c ++++ b/gcc/genrecog.c +@@ -718,7 +718,6 @@ validate_pattern (rtx pattern, md_rtx_info *info, rtx set, int set_code) + } + + case CLOBBER: +- case CLOBBER_HIGH: + validate_pattern (SET_DEST (pattern), info, pattern, '='); + return; + +@@ -5295,7 +5294,7 @@ remove_clobbers (acceptance_type *acceptance_ptr, rtx *pattern_ptr) + for (i = XVECLEN (pattern, 0); i > 0; i--) + { + rtx x = XVECEXP (pattern, 0, i - 1); +- if ((GET_CODE (x) != CLOBBER && GET_CODE (x) != CLOBBER_HIGH) ++ if (GET_CODE (x) != CLOBBER + || (!REG_P (XEXP (x, 0)) + && GET_CODE (XEXP (x, 0)) != MATCH_SCRATCH)) + break; +diff --git a/gcc/gensupport.c b/gcc/gensupport.c +index 31a67d5ad..ab6a523dd 100644 +--- a/gcc/gensupport.c ++++ b/gcc/gensupport.c +@@ -70,8 +70,8 @@ struct queue_elem + rtx data; + file_location loc; + struct queue_elem *next; +- /* In a DEFINE_INSN that came from a DEFINE_INSN_AND_SPLIT, SPLIT +- points to the generated DEFINE_SPLIT. */ ++ /* In a DEFINE_INSN that came from a DEFINE_INSN_AND_SPLIT or ++ DEFINE_INSN_AND_REWRITE, SPLIT points to the generated DEFINE_SPLIT. */ + struct queue_elem *split; + }; + +@@ -485,6 +485,65 @@ remove_constraints (rtx part) + } + } + ++/* Recursively replace MATCH_OPERANDs with MATCH_DUPs and MATCH_OPERATORs ++ with MATCH_OP_DUPs in X. */ ++ ++static rtx ++replace_operands_with_dups (rtx x) ++{ ++ if (x == 0) ++ return x; ++ ++ rtx newx; ++ if (GET_CODE (x) == MATCH_OPERAND) ++ { ++ newx = rtx_alloc (MATCH_DUP); ++ XINT (newx, 0) = XINT (x, 0); ++ x = newx; ++ } ++ else if (GET_CODE (x) == MATCH_OPERATOR) ++ { ++ newx = rtx_alloc (MATCH_OP_DUP); ++ XINT (newx, 0) = XINT (x, 0); ++ XVEC (newx, 1) = XVEC (x, 2); ++ x = newx; ++ } ++ else ++ newx = shallow_copy_rtx (x); ++ ++ const char *format_ptr = GET_RTX_FORMAT (GET_CODE (x)); ++ for (int i = 0; i < GET_RTX_LENGTH (GET_CODE (x)); i++) ++ switch (*format_ptr++) ++ { ++ case 'e': ++ case 'u': ++ XEXP (newx, i) = replace_operands_with_dups (XEXP (x, i)); ++ break; ++ case 'E': ++ if (XVEC (x, i) != NULL) ++ { ++ XVEC (newx, i) = rtvec_alloc (XVECLEN (x, i)); ++ for (int j = 0; j < XVECLEN (x, i); j++) ++ XVECEXP (newx, i, j) ++ = replace_operands_with_dups (XVECEXP (x, i, j)); ++ } ++ break; ++ } ++ return newx; ++} ++ ++/* Convert matching pattern VEC from a DEFINE_INSN_AND_REWRITE into ++ a sequence that should be generated by the splitter. */ ++ ++static rtvec ++gen_rewrite_sequence (rtvec vec) ++{ ++ rtvec new_vec = rtvec_alloc (1); ++ rtx x = add_implicit_parallel (vec); ++ RTVEC_ELT (new_vec, 0) = replace_operands_with_dups (x); ++ return new_vec; ++} ++ + /* Process a top level rtx in some way, queuing as appropriate. */ + + static void +@@ -527,6 +586,7 @@ process_rtx (rtx desc, file_location loc) + break; + + case DEFINE_INSN_AND_SPLIT: ++ case DEFINE_INSN_AND_REWRITE: + { + const char *split_cond; + rtx split; +@@ -534,6 +594,7 @@ process_rtx (rtx desc, file_location loc) + int i; + struct queue_elem *insn_elem; + struct queue_elem *split_elem; ++ int split_code = (GET_CODE (desc) == DEFINE_INSN_AND_REWRITE ? 5 : 6); + + /* Create a split with values from the insn_and_split. */ + split = rtx_alloc (DEFINE_SPLIT); +@@ -555,12 +616,17 @@ process_rtx (rtx desc, file_location loc) + split_cond = rtx_reader_ptr->join_c_conditions (XSTR (desc, 2), + split_cond + 2); + } ++ else if (GET_CODE (desc) == DEFINE_INSN_AND_REWRITE) ++ error_at (loc, "the rewrite condition must start with `&&'"); + XSTR (split, 1) = split_cond; +- XVEC (split, 2) = XVEC (desc, 5); +- XSTR (split, 3) = XSTR (desc, 6); ++ if (GET_CODE (desc) == DEFINE_INSN_AND_REWRITE) ++ XVEC (split, 2) = gen_rewrite_sequence (XVEC (desc, 1)); ++ else ++ XVEC (split, 2) = XVEC (desc, 5); ++ XSTR (split, 3) = XSTR (desc, split_code); + + /* Fix up the DEFINE_INSN. */ +- attr = XVEC (desc, 7); ++ attr = XVEC (desc, split_code + 1); + PUT_CODE (desc, DEFINE_INSN); + XVEC (desc, 4) = attr; + +diff --git a/gcc/gimple-expr.c b/gcc/gimple-expr.c +index b0c9f9b67..4ba194ff4 100644 +--- a/gcc/gimple-expr.c ++++ b/gcc/gimple-expr.c +@@ -37,6 +37,7 @@ along with GCC; see the file COPYING3. If not see + #include "tree-pass.h" + #include "stringpool.h" + #include "attribs.h" ++#include "target.h" + + /* ----- Type related ----- */ + +@@ -147,10 +148,12 @@ useless_type_conversion_p (tree outer_type, tree inner_type) + + /* Recurse for vector types with the same number of subparts. */ + else if (TREE_CODE (inner_type) == VECTOR_TYPE +- && TREE_CODE (outer_type) == VECTOR_TYPE +- && TYPE_PRECISION (inner_type) == TYPE_PRECISION (outer_type)) +- return useless_type_conversion_p (TREE_TYPE (outer_type), +- TREE_TYPE (inner_type)); ++ && TREE_CODE (outer_type) == VECTOR_TYPE) ++ return (known_eq (TYPE_VECTOR_SUBPARTS (inner_type), ++ TYPE_VECTOR_SUBPARTS (outer_type)) ++ && useless_type_conversion_p (TREE_TYPE (outer_type), ++ TREE_TYPE (inner_type)) ++ && targetm.compatible_vector_types_p (inner_type, outer_type)); + + else if (TREE_CODE (inner_type) == ARRAY_TYPE + && TREE_CODE (outer_type) == ARRAY_TYPE) +diff --git a/gcc/gimple-fold.c b/gcc/gimple-fold.c +index d33d93242..bbee8eb46 100644 +--- a/gcc/gimple-fold.c ++++ b/gcc/gimple-fold.c +@@ -631,14 +631,7 @@ replace_call_with_call_and_fold (gimple_stmt_iterator *gsi, gimple *repl) + gimple *stmt = gsi_stmt (*gsi); + gimple_call_set_lhs (repl, gimple_call_lhs (stmt)); + gimple_set_location (repl, gimple_location (stmt)); +- if (gimple_vdef (stmt) +- && TREE_CODE (gimple_vdef (stmt)) == SSA_NAME) +- { +- gimple_set_vdef (repl, gimple_vdef (stmt)); +- SSA_NAME_DEF_STMT (gimple_vdef (repl)) = repl; +- } +- if (gimple_vuse (stmt)) +- gimple_set_vuse (repl, gimple_vuse (stmt)); ++ gimple_move_vops (repl, stmt); + gsi_replace (gsi, repl, false); + fold_stmt (gsi); + } +@@ -822,11 +815,7 @@ gimple_fold_builtin_memory_op (gimple_stmt_iterator *gsi, + = gimple_build_assign (fold_build2 (MEM_REF, desttype, + dest, off0), + srcmem); +- gimple_set_vuse (new_stmt, gimple_vuse (stmt)); +- gimple_set_vdef (new_stmt, gimple_vdef (stmt)); +- if (gimple_vdef (new_stmt) +- && TREE_CODE (gimple_vdef (new_stmt)) == SSA_NAME) +- SSA_NAME_DEF_STMT (gimple_vdef (new_stmt)) = new_stmt; ++ gimple_move_vops (new_stmt, stmt); + if (!lhs) + { + gsi_replace (gsi, new_stmt, false); +@@ -1087,11 +1076,7 @@ gimple_fold_builtin_memory_op (gimple_stmt_iterator *gsi, + = gimple_build_assign (fold_build2 (MEM_REF, desttype, dest, off0), + fold_build2 (MEM_REF, srctype, src, off0)); + set_vop_and_replace: +- gimple_set_vuse (new_stmt, gimple_vuse (stmt)); +- gimple_set_vdef (new_stmt, gimple_vdef (stmt)); +- if (gimple_vdef (new_stmt) +- && TREE_CODE (gimple_vdef (new_stmt)) == SSA_NAME) +- SSA_NAME_DEF_STMT (gimple_vdef (new_stmt)) = new_stmt; ++ gimple_move_vops (new_stmt, stmt); + if (!lhs) + { + gsi_replace (gsi, new_stmt, false); +@@ -1264,13 +1249,7 @@ gimple_fold_builtin_memset (gimple_stmt_iterator *gsi, tree c, tree len) + + var = fold_build2 (MEM_REF, etype, dest, build_int_cst (ptr_type_node, 0)); + gimple *store = gimple_build_assign (var, build_int_cst_type (etype, cval)); +- gimple_set_vuse (store, gimple_vuse (stmt)); +- tree vdef = gimple_vdef (stmt); +- if (vdef && TREE_CODE (vdef) == SSA_NAME) +- { +- gimple_set_vdef (store, gimple_vdef (stmt)); +- SSA_NAME_DEF_STMT (gimple_vdef (stmt)) = store; +- } ++ gimple_move_vops (store, stmt); + gsi_insert_before (gsi, store, GSI_SAME_STMT); + if (gimple_call_lhs (stmt)) + { +@@ -2979,11 +2958,7 @@ gimple_fold_builtin_stpcpy (gimple_stmt_iterator *gsi) + tem, build_int_cst (size_type_node, 1)); + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); + gcall *repl = gimple_build_call (fn, 3, dest, src, lenp1); +- gimple_set_vuse (repl, gimple_vuse (stmt)); +- gimple_set_vdef (repl, gimple_vdef (stmt)); +- if (gimple_vdef (repl) +- && TREE_CODE (gimple_vdef (repl)) == SSA_NAME) +- SSA_NAME_DEF_STMT (gimple_vdef (repl)) = repl; ++ gimple_move_vops (repl, stmt); + gsi_insert_before (gsi, repl, GSI_SAME_STMT); + /* Replace the result with dest + len. */ + stmts = NULL; +@@ -4135,9 +4110,7 @@ fold_builtin_atomic_compare_exchange (gimple_stmt_iterator *gsi) + gimple_call_arg (stmt, 5)); + tree lhs = make_ssa_name (ctype); + gimple_call_set_lhs (g, lhs); +- gimple_set_vdef (g, gimple_vdef (stmt)); +- gimple_set_vuse (g, gimple_vuse (stmt)); +- SSA_NAME_DEF_STMT (gimple_vdef (g)) = g; ++ gimple_move_vops (g, stmt); + tree oldlhs = gimple_call_lhs (stmt); + if (stmt_can_throw_internal (cfun, stmt)) + { +@@ -4316,8 +4289,7 @@ gimple_fold_call (gimple_stmt_iterator *gsi, bool inplace) + SSA_NAME_DEF_STMT (lhs) = gimple_build_nop (); + set_ssa_default_def (cfun, var, lhs); + } +- gimple_set_vuse (new_stmt, gimple_vuse (stmt)); +- gimple_set_vdef (new_stmt, gimple_vdef (stmt)); ++ gimple_move_vops (new_stmt, stmt); + gsi_replace (gsi, new_stmt, false); + return true; + } +diff --git a/gcc/gimple-match-head.c b/gcc/gimple-match-head.c +index bbbc0f2c2..f83f22561 100644 +--- a/gcc/gimple-match-head.c ++++ b/gcc/gimple-match-head.c +@@ -27,6 +27,7 @@ along with GCC; see the file COPYING3. If not see + #include "gimple.h" + #include "ssa.h" + #include "cgraph.h" ++#include "vec-perm-indices.h" + #include "fold-const.h" + #include "fold-const-call.h" + #include "stor-layout.h" +diff --git a/gcc/gimple.c b/gcc/gimple.c +index bf362dbe5..763c8e7e1 100644 +--- a/gcc/gimple.c ++++ b/gcc/gimple.c +@@ -1564,7 +1564,7 @@ gimple_call_nonnull_result_p (gcall *call) + if (!fndecl) + return false; + if (flag_delete_null_pointer_checks && !flag_check_new +- && DECL_IS_OPERATOR_NEW (fndecl) ++ && DECL_IS_OPERATOR_NEW_P (fndecl) + && !TREE_NOTHROW (fndecl)) + return true; + +@@ -2034,6 +2034,18 @@ gimple_copy (gimple *stmt) + return copy; + } + ++/* Move OLD_STMT's vuse and vdef operands to NEW_STMT, on the assumption ++ that OLD_STMT is about to be removed. */ ++ ++void ++gimple_move_vops (gimple *new_stmt, gimple *old_stmt) ++{ ++ tree vdef = gimple_vdef (old_stmt); ++ gimple_set_vuse (new_stmt, gimple_vuse (old_stmt)); ++ gimple_set_vdef (new_stmt, vdef); ++ if (vdef && TREE_CODE (vdef) == SSA_NAME) ++ SSA_NAME_DEF_STMT (vdef) = new_stmt; ++} + + /* Return true if statement S has side-effects. We consider a + statement to have side effects if: +diff --git a/gcc/gimple.h b/gcc/gimple.h +index 8b5c9e219..f91c6db4d 100644 +--- a/gcc/gimple.h ++++ b/gcc/gimple.h +@@ -1509,6 +1509,7 @@ void gimple_assign_set_rhs_with_ops (gimple_stmt_iterator *, enum tree_code, + tree gimple_get_lhs (const gimple *); + void gimple_set_lhs (gimple *, tree); + gimple *gimple_copy (gimple *); ++void gimple_move_vops (gimple *, gimple *); + bool gimple_has_side_effects (const gimple *); + bool gimple_could_trap_p_1 (gimple *, bool, bool); + bool gimple_could_trap_p (gimple *); +diff --git a/gcc/gimplify.c b/gcc/gimplify.c +index bd8bd6d7e..b23680f96 100644 +--- a/gcc/gimplify.c ++++ b/gcc/gimplify.c +@@ -1699,11 +1699,12 @@ gimplify_decl_expr (tree *stmt_p, gimple_seq *seq_p) + tree init = DECL_INITIAL (decl); + bool is_vla = false; + +- if (TREE_CODE (DECL_SIZE_UNIT (decl)) != INTEGER_CST ++ poly_uint64 size; ++ if (!poly_int_tree_p (DECL_SIZE_UNIT (decl), &size) + || (!TREE_STATIC (decl) + && flag_stack_check == GENERIC_STACK_CHECK +- && compare_tree_int (DECL_SIZE_UNIT (decl), +- STACK_CHECK_MAX_VAR_SIZE) > 0)) ++ && maybe_gt (size, ++ (unsigned HOST_WIDE_INT) STACK_CHECK_MAX_VAR_SIZE))) + { + gimplify_vla_decl (decl, seq_p); + is_vla = true; +diff --git a/gcc/haifa-sched.c b/gcc/haifa-sched.c +index 5025aae42..33a77542a 100644 +--- a/gcc/haifa-sched.c ++++ b/gcc/haifa-sched.c +@@ -529,9 +529,6 @@ haifa_classify_rtx (const_rtx x) + /* Test if it is a 'store'. */ + tmp_class = may_trap_exp (XEXP (x, 0), 1); + break; +- case CLOBBER_HIGH: +- gcc_assert (REG_P (XEXP (x, 0))); +- break; + case SET: + /* Test if it is a store. */ + tmp_class = may_trap_exp (SET_DEST (x), 1); +@@ -7207,7 +7204,7 @@ alloc_global_sched_pressure_data (void) + fixed_regs_num[cl] = 0; + + for (int i = 0; i < ira_class_hard_regs_num[cl]; ++i) +- if (!call_used_regs[ira_class_hard_regs[cl][i]]) ++ if (!call_used_or_fixed_reg_p (ira_class_hard_regs[cl][i])) + ++call_saved_regs_num[cl]; + else if (fixed_regs[ira_class_hard_regs[cl][i]]) + ++fixed_regs_num[cl]; +diff --git a/gcc/hard-reg-set.h b/gcc/hard-reg-set.h +index a72819662..51c9e72bb 100644 +--- a/gcc/hard-reg-set.h ++++ b/gcc/hard-reg-set.h +@@ -20,6 +20,8 @@ along with GCC; see the file COPYING3. If not see + #ifndef GCC_HARD_REG_SET_H + #define GCC_HARD_REG_SET_H + ++#include "array-traits.h" ++ + /* Define the type of a set of hard registers. */ + + /* HARD_REG_ELT_TYPE is a typedef of the unsigned integral type which +@@ -42,14 +44,88 @@ typedef unsigned HOST_WIDEST_FAST_INT HARD_REG_ELT_TYPE; + + #if FIRST_PSEUDO_REGISTER <= HOST_BITS_PER_WIDEST_FAST_INT + +-#define HARD_REG_SET HARD_REG_ELT_TYPE ++typedef HARD_REG_ELT_TYPE HARD_REG_SET; ++typedef const HARD_REG_SET const_hard_reg_set; + + #else + + #define HARD_REG_SET_LONGS \ + ((FIRST_PSEUDO_REGISTER + HOST_BITS_PER_WIDEST_FAST_INT - 1) \ + / HOST_BITS_PER_WIDEST_FAST_INT) +-typedef HARD_REG_ELT_TYPE HARD_REG_SET[HARD_REG_SET_LONGS]; ++ ++struct HARD_REG_SET ++{ ++ HARD_REG_SET ++ operator~ () const ++ { ++ HARD_REG_SET res; ++ for (unsigned int i = 0; i < ARRAY_SIZE (elts); ++i) ++ res.elts[i] = ~elts[i]; ++ return res; ++ } ++ ++ HARD_REG_SET ++ operator& (const HARD_REG_SET &other) const ++ { ++ HARD_REG_SET res; ++ for (unsigned int i = 0; i < ARRAY_SIZE (elts); ++i) ++ res.elts[i] = elts[i] & other.elts[i]; ++ return res; ++ } ++ ++ HARD_REG_SET & ++ operator&= (const HARD_REG_SET &other) ++ { ++ for (unsigned int i = 0; i < ARRAY_SIZE (elts); ++i) ++ elts[i] &= other.elts[i]; ++ return *this; ++ } ++ ++ HARD_REG_SET ++ operator| (const HARD_REG_SET &other) const ++ { ++ HARD_REG_SET res; ++ for (unsigned int i = 0; i < ARRAY_SIZE (elts); ++i) ++ res.elts[i] = elts[i] | other.elts[i]; ++ return res; ++ } ++ ++ HARD_REG_SET & ++ operator|= (const HARD_REG_SET &other) ++ { ++ for (unsigned int i = 0; i < ARRAY_SIZE (elts); ++i) ++ elts[i] |= other.elts[i]; ++ return *this; ++ } ++ ++ bool ++ operator== (const HARD_REG_SET &other) const ++ { ++ HARD_REG_ELT_TYPE bad = 0; ++ for (unsigned int i = 0; i < ARRAY_SIZE (elts); ++i) ++ bad |= (elts[i] ^ other.elts[i]); ++ return bad == 0; ++ } ++ ++ bool ++ operator!= (const HARD_REG_SET &other) const ++ { ++ return !operator== (other); ++ } ++ ++ HARD_REG_ELT_TYPE elts[HARD_REG_SET_LONGS]; ++}; ++typedef const HARD_REG_SET &const_hard_reg_set; ++ ++template<> ++struct array_traits ++{ ++ typedef HARD_REG_ELT_TYPE element_type; ++ static const bool has_constant_size = true; ++ static const size_t constant_size = HARD_REG_SET_LONGS; ++ static const element_type *base (const HARD_REG_SET &x) { return x.elts; } ++ static size_t size (const HARD_REG_SET &) { return HARD_REG_SET_LONGS; } ++}; + + #endif + +@@ -77,28 +153,15 @@ struct hard_reg_set_container + CLEAR_HARD_REG_SET and SET_HARD_REG_SET. + These take just one argument. + +- Also define macros for copying hard reg sets: +- COPY_HARD_REG_SET and COMPL_HARD_REG_SET. +- These take two arguments TO and FROM; they read from FROM +- and store into TO. COMPL_HARD_REG_SET complements each bit. +- +- Also define macros for combining hard reg sets: +- IOR_HARD_REG_SET and AND_HARD_REG_SET. +- These take two arguments TO and FROM; they read from FROM +- and combine bitwise into TO. Define also two variants +- IOR_COMPL_HARD_REG_SET and AND_COMPL_HARD_REG_SET +- which use the complement of the set FROM. +- + Also define: + + hard_reg_set_subset_p (X, Y), which returns true if X is a subset of Y. +- hard_reg_set_equal_p (X, Y), which returns true if X and Y are equal. + hard_reg_set_intersect_p (X, Y), which returns true if X and Y intersect. + hard_reg_set_empty_p (X), which returns true if X is empty. */ + + #define UHOST_BITS_PER_WIDE_INT ((unsigned) HOST_BITS_PER_WIDEST_FAST_INT) + +-#ifdef HARD_REG_SET ++#if FIRST_PSEUDO_REGISTER <= HOST_BITS_PER_WIDEST_FAST_INT + + #define SET_HARD_REG_BIT(SET, BIT) \ + ((SET) |= HARD_CONST (1) << (BIT)) +@@ -110,404 +173,87 @@ struct hard_reg_set_container + #define CLEAR_HARD_REG_SET(TO) ((TO) = HARD_CONST (0)) + #define SET_HARD_REG_SET(TO) ((TO) = ~ HARD_CONST (0)) + +-#define COPY_HARD_REG_SET(TO, FROM) ((TO) = (FROM)) +-#define COMPL_HARD_REG_SET(TO, FROM) ((TO) = ~(FROM)) +- +-#define IOR_HARD_REG_SET(TO, FROM) ((TO) |= (FROM)) +-#define IOR_COMPL_HARD_REG_SET(TO, FROM) ((TO) |= ~ (FROM)) +-#define AND_HARD_REG_SET(TO, FROM) ((TO) &= (FROM)) +-#define AND_COMPL_HARD_REG_SET(TO, FROM) ((TO) &= ~ (FROM)) +- + static inline bool +-hard_reg_set_subset_p (const HARD_REG_SET x, const HARD_REG_SET y) ++hard_reg_set_subset_p (const_hard_reg_set x, const_hard_reg_set y) + { + return (x & ~y) == HARD_CONST (0); + } + + static inline bool +-hard_reg_set_equal_p (const HARD_REG_SET x, const HARD_REG_SET y) +-{ +- return x == y; +-} +- +-static inline bool +-hard_reg_set_intersect_p (const HARD_REG_SET x, const HARD_REG_SET y) ++hard_reg_set_intersect_p (const_hard_reg_set x, const_hard_reg_set y) + { + return (x & y) != HARD_CONST (0); + } + + static inline bool +-hard_reg_set_empty_p (const HARD_REG_SET x) ++hard_reg_set_empty_p (const_hard_reg_set x) + { + return x == HARD_CONST (0); + } + + #else + +-#define SET_HARD_REG_BIT(SET, BIT) \ +- ((SET)[(BIT) / UHOST_BITS_PER_WIDE_INT] \ +- |= HARD_CONST (1) << ((BIT) % UHOST_BITS_PER_WIDE_INT)) +- +-#define CLEAR_HARD_REG_BIT(SET, BIT) \ +- ((SET)[(BIT) / UHOST_BITS_PER_WIDE_INT] \ +- &= ~(HARD_CONST (1) << ((BIT) % UHOST_BITS_PER_WIDE_INT))) +- +-#define TEST_HARD_REG_BIT(SET, BIT) \ +- (!!((SET)[(BIT) / UHOST_BITS_PER_WIDE_INT] \ +- & (HARD_CONST (1) << ((BIT) % UHOST_BITS_PER_WIDE_INT)))) +- +-#if FIRST_PSEUDO_REGISTER <= 2*HOST_BITS_PER_WIDEST_FAST_INT +-#define CLEAR_HARD_REG_SET(TO) \ +-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ +- scan_tp_[0] = 0; \ +- scan_tp_[1] = 0; } while (0) +- +-#define SET_HARD_REG_SET(TO) \ +-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ +- scan_tp_[0] = -1; \ +- scan_tp_[1] = -1; } while (0) +- +-#define COPY_HARD_REG_SET(TO, FROM) \ +-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ +- const HARD_REG_ELT_TYPE *scan_fp_ = (FROM); \ +- scan_tp_[0] = scan_fp_[0]; \ +- scan_tp_[1] = scan_fp_[1]; } while (0) +- +-#define COMPL_HARD_REG_SET(TO, FROM) \ +-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ +- const HARD_REG_ELT_TYPE *scan_fp_ = (FROM); \ +- scan_tp_[0] = ~ scan_fp_[0]; \ +- scan_tp_[1] = ~ scan_fp_[1]; } while (0) +- +-#define AND_HARD_REG_SET(TO, FROM) \ +-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ +- const HARD_REG_ELT_TYPE *scan_fp_ = (FROM); \ +- scan_tp_[0] &= scan_fp_[0]; \ +- scan_tp_[1] &= scan_fp_[1]; } while (0) +- +-#define AND_COMPL_HARD_REG_SET(TO, FROM) \ +-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ +- const HARD_REG_ELT_TYPE *scan_fp_ = (FROM); \ +- scan_tp_[0] &= ~ scan_fp_[0]; \ +- scan_tp_[1] &= ~ scan_fp_[1]; } while (0) +- +-#define IOR_HARD_REG_SET(TO, FROM) \ +-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ +- const HARD_REG_ELT_TYPE *scan_fp_ = (FROM); \ +- scan_tp_[0] |= scan_fp_[0]; \ +- scan_tp_[1] |= scan_fp_[1]; } while (0) +- +-#define IOR_COMPL_HARD_REG_SET(TO, FROM) \ +-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ +- const HARD_REG_ELT_TYPE *scan_fp_ = (FROM); \ +- scan_tp_[0] |= ~ scan_fp_[0]; \ +- scan_tp_[1] |= ~ scan_fp_[1]; } while (0) +- +-static inline bool +-hard_reg_set_subset_p (const HARD_REG_SET x, const HARD_REG_SET y) +-{ +- return (x[0] & ~y[0]) == 0 && (x[1] & ~y[1]) == 0; +-} +- +-static inline bool +-hard_reg_set_equal_p (const HARD_REG_SET x, const HARD_REG_SET y) ++inline void ++SET_HARD_REG_BIT (HARD_REG_SET &set, unsigned int bit) + { +- return x[0] == y[0] && x[1] == y[1]; ++ set.elts[bit / UHOST_BITS_PER_WIDE_INT] ++ |= HARD_CONST (1) << (bit % UHOST_BITS_PER_WIDE_INT); + } + +-static inline bool +-hard_reg_set_intersect_p (const HARD_REG_SET x, const HARD_REG_SET y) ++inline void ++CLEAR_HARD_REG_BIT (HARD_REG_SET &set, unsigned int bit) + { +- return (x[0] & y[0]) != 0 || (x[1] & y[1]) != 0; ++ set.elts[bit / UHOST_BITS_PER_WIDE_INT] ++ &= ~(HARD_CONST (1) << (bit % UHOST_BITS_PER_WIDE_INT)); + } + +-static inline bool +-hard_reg_set_empty_p (const HARD_REG_SET x) ++inline bool ++TEST_HARD_REG_BIT (const_hard_reg_set set, unsigned int bit) + { +- return x[0] == 0 && x[1] == 0; ++ return (set.elts[bit / UHOST_BITS_PER_WIDE_INT] ++ & (HARD_CONST (1) << (bit % UHOST_BITS_PER_WIDE_INT))); + } + +-#else +-#if FIRST_PSEUDO_REGISTER <= 3*HOST_BITS_PER_WIDEST_FAST_INT +-#define CLEAR_HARD_REG_SET(TO) \ +-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ +- scan_tp_[0] = 0; \ +- scan_tp_[1] = 0; \ +- scan_tp_[2] = 0; } while (0) +- +-#define SET_HARD_REG_SET(TO) \ +-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ +- scan_tp_[0] = -1; \ +- scan_tp_[1] = -1; \ +- scan_tp_[2] = -1; } while (0) +- +-#define COPY_HARD_REG_SET(TO, FROM) \ +-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ +- const HARD_REG_ELT_TYPE *scan_fp_ = (FROM); \ +- scan_tp_[0] = scan_fp_[0]; \ +- scan_tp_[1] = scan_fp_[1]; \ +- scan_tp_[2] = scan_fp_[2]; } while (0) +- +-#define COMPL_HARD_REG_SET(TO, FROM) \ +-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ +- const HARD_REG_ELT_TYPE *scan_fp_ = (FROM); \ +- scan_tp_[0] = ~ scan_fp_[0]; \ +- scan_tp_[1] = ~ scan_fp_[1]; \ +- scan_tp_[2] = ~ scan_fp_[2]; } while (0) +- +-#define AND_HARD_REG_SET(TO, FROM) \ +-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ +- const HARD_REG_ELT_TYPE *scan_fp_ = (FROM); \ +- scan_tp_[0] &= scan_fp_[0]; \ +- scan_tp_[1] &= scan_fp_[1]; \ +- scan_tp_[2] &= scan_fp_[2]; } while (0) +- +-#define AND_COMPL_HARD_REG_SET(TO, FROM) \ +-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ +- const HARD_REG_ELT_TYPE *scan_fp_ = (FROM); \ +- scan_tp_[0] &= ~ scan_fp_[0]; \ +- scan_tp_[1] &= ~ scan_fp_[1]; \ +- scan_tp_[2] &= ~ scan_fp_[2]; } while (0) +- +-#define IOR_HARD_REG_SET(TO, FROM) \ +-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ +- const HARD_REG_ELT_TYPE *scan_fp_ = (FROM); \ +- scan_tp_[0] |= scan_fp_[0]; \ +- scan_tp_[1] |= scan_fp_[1]; \ +- scan_tp_[2] |= scan_fp_[2]; } while (0) +- +-#define IOR_COMPL_HARD_REG_SET(TO, FROM) \ +-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ +- const HARD_REG_ELT_TYPE *scan_fp_ = (FROM); \ +- scan_tp_[0] |= ~ scan_fp_[0]; \ +- scan_tp_[1] |= ~ scan_fp_[1]; \ +- scan_tp_[2] |= ~ scan_fp_[2]; } while (0) +- +-static inline bool +-hard_reg_set_subset_p (const HARD_REG_SET x, const HARD_REG_SET y) ++inline void ++CLEAR_HARD_REG_SET (HARD_REG_SET &set) + { +- return ((x[0] & ~y[0]) == 0 +- && (x[1] & ~y[1]) == 0 +- && (x[2] & ~y[2]) == 0); ++ for (unsigned int i = 0; i < ARRAY_SIZE (set.elts); ++i) ++ set.elts[i] = 0; + } + +-static inline bool +-hard_reg_set_equal_p (const HARD_REG_SET x, const HARD_REG_SET y) ++inline void ++SET_HARD_REG_SET (HARD_REG_SET &set) + { +- return x[0] == y[0] && x[1] == y[1] && x[2] == y[2]; ++ for (unsigned int i = 0; i < ARRAY_SIZE (set.elts); ++i) ++ set.elts[i] = -1; + } + + static inline bool +-hard_reg_set_intersect_p (const HARD_REG_SET x, const HARD_REG_SET y) +-{ +- return ((x[0] & y[0]) != 0 +- || (x[1] & y[1]) != 0 +- || (x[2] & y[2]) != 0); +-} +- +-static inline bool +-hard_reg_set_empty_p (const HARD_REG_SET x) +-{ +- return x[0] == 0 && x[1] == 0 && x[2] == 0; +-} +- +-#else +-#if FIRST_PSEUDO_REGISTER <= 4*HOST_BITS_PER_WIDEST_FAST_INT +-#define CLEAR_HARD_REG_SET(TO) \ +-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ +- scan_tp_[0] = 0; \ +- scan_tp_[1] = 0; \ +- scan_tp_[2] = 0; \ +- scan_tp_[3] = 0; } while (0) +- +-#define SET_HARD_REG_SET(TO) \ +-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ +- scan_tp_[0] = -1; \ +- scan_tp_[1] = -1; \ +- scan_tp_[2] = -1; \ +- scan_tp_[3] = -1; } while (0) +- +-#define COPY_HARD_REG_SET(TO, FROM) \ +-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ +- const HARD_REG_ELT_TYPE *scan_fp_ = (FROM); \ +- scan_tp_[0] = scan_fp_[0]; \ +- scan_tp_[1] = scan_fp_[1]; \ +- scan_tp_[2] = scan_fp_[2]; \ +- scan_tp_[3] = scan_fp_[3]; } while (0) +- +-#define COMPL_HARD_REG_SET(TO, FROM) \ +-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ +- const HARD_REG_ELT_TYPE *scan_fp_ = (FROM); \ +- scan_tp_[0] = ~ scan_fp_[0]; \ +- scan_tp_[1] = ~ scan_fp_[1]; \ +- scan_tp_[2] = ~ scan_fp_[2]; \ +- scan_tp_[3] = ~ scan_fp_[3]; } while (0) +- +-#define AND_HARD_REG_SET(TO, FROM) \ +-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ +- const HARD_REG_ELT_TYPE *scan_fp_ = (FROM); \ +- scan_tp_[0] &= scan_fp_[0]; \ +- scan_tp_[1] &= scan_fp_[1]; \ +- scan_tp_[2] &= scan_fp_[2]; \ +- scan_tp_[3] &= scan_fp_[3]; } while (0) +- +-#define AND_COMPL_HARD_REG_SET(TO, FROM) \ +-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ +- const HARD_REG_ELT_TYPE *scan_fp_ = (FROM); \ +- scan_tp_[0] &= ~ scan_fp_[0]; \ +- scan_tp_[1] &= ~ scan_fp_[1]; \ +- scan_tp_[2] &= ~ scan_fp_[2]; \ +- scan_tp_[3] &= ~ scan_fp_[3]; } while (0) +- +-#define IOR_HARD_REG_SET(TO, FROM) \ +-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ +- const HARD_REG_ELT_TYPE *scan_fp_ = (FROM); \ +- scan_tp_[0] |= scan_fp_[0]; \ +- scan_tp_[1] |= scan_fp_[1]; \ +- scan_tp_[2] |= scan_fp_[2]; \ +- scan_tp_[3] |= scan_fp_[3]; } while (0) +- +-#define IOR_COMPL_HARD_REG_SET(TO, FROM) \ +-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ +- const HARD_REG_ELT_TYPE *scan_fp_ = (FROM); \ +- scan_tp_[0] |= ~ scan_fp_[0]; \ +- scan_tp_[1] |= ~ scan_fp_[1]; \ +- scan_tp_[2] |= ~ scan_fp_[2]; \ +- scan_tp_[3] |= ~ scan_fp_[3]; } while (0) +- +-static inline bool +-hard_reg_set_subset_p (const HARD_REG_SET x, const HARD_REG_SET y) ++hard_reg_set_subset_p (const_hard_reg_set x, const_hard_reg_set y) + { +- return ((x[0] & ~y[0]) == 0 +- && (x[1] & ~y[1]) == 0 +- && (x[2] & ~y[2]) == 0 +- && (x[3] & ~y[3]) == 0); ++ HARD_REG_ELT_TYPE bad = 0; ++ for (unsigned int i = 0; i < ARRAY_SIZE (x.elts); ++i) ++ bad |= (x.elts[i] & ~y.elts[i]); ++ return bad == 0; + } + + static inline bool +-hard_reg_set_equal_p (const HARD_REG_SET x, const HARD_REG_SET y) ++hard_reg_set_intersect_p (const_hard_reg_set x, const_hard_reg_set y) + { +- return x[0] == y[0] && x[1] == y[1] && x[2] == y[2] && x[3] == y[3]; ++ HARD_REG_ELT_TYPE good = 0; ++ for (unsigned int i = 0; i < ARRAY_SIZE (x.elts); ++i) ++ good |= (x.elts[i] & y.elts[i]); ++ return good != 0; + } + + static inline bool +-hard_reg_set_intersect_p (const HARD_REG_SET x, const HARD_REG_SET y) ++hard_reg_set_empty_p (const_hard_reg_set x) + { +- return ((x[0] & y[0]) != 0 +- || (x[1] & y[1]) != 0 +- || (x[2] & y[2]) != 0 +- || (x[3] & y[3]) != 0); ++ HARD_REG_ELT_TYPE bad = 0; ++ for (unsigned int i = 0; i < ARRAY_SIZE (x.elts); ++i) ++ bad |= x.elts[i]; ++ return bad == 0; + } +- +-static inline bool +-hard_reg_set_empty_p (const HARD_REG_SET x) +-{ +- return x[0] == 0 && x[1] == 0 && x[2] == 0 && x[3] == 0; +-} +- +-#else /* FIRST_PSEUDO_REGISTER > 4*HOST_BITS_PER_WIDEST_FAST_INT */ +- +-#define CLEAR_HARD_REG_SET(TO) \ +-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ +- int i; \ +- for (i = 0; i < HARD_REG_SET_LONGS; i++) \ +- *scan_tp_++ = 0; } while (0) +- +-#define SET_HARD_REG_SET(TO) \ +-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ +- int i; \ +- for (i = 0; i < HARD_REG_SET_LONGS; i++) \ +- *scan_tp_++ = -1; } while (0) +- +-#define COPY_HARD_REG_SET(TO, FROM) \ +-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ +- const HARD_REG_ELT_TYPE *scan_fp_ = (FROM); \ +- int i; \ +- for (i = 0; i < HARD_REG_SET_LONGS; i++) \ +- *scan_tp_++ = *scan_fp_++; } while (0) +- +-#define COMPL_HARD_REG_SET(TO, FROM) \ +-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ +- const HARD_REG_ELT_TYPE *scan_fp_ = (FROM); \ +- int i; \ +- for (i = 0; i < HARD_REG_SET_LONGS; i++) \ +- *scan_tp_++ = ~ *scan_fp_++; } while (0) +- +-#define AND_HARD_REG_SET(TO, FROM) \ +-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ +- const HARD_REG_ELT_TYPE *scan_fp_ = (FROM); \ +- int i; \ +- for (i = 0; i < HARD_REG_SET_LONGS; i++) \ +- *scan_tp_++ &= *scan_fp_++; } while (0) +- +-#define AND_COMPL_HARD_REG_SET(TO, FROM) \ +-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ +- const HARD_REG_ELT_TYPE *scan_fp_ = (FROM); \ +- int i; \ +- for (i = 0; i < HARD_REG_SET_LONGS; i++) \ +- *scan_tp_++ &= ~ *scan_fp_++; } while (0) +- +-#define IOR_HARD_REG_SET(TO, FROM) \ +-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ +- const HARD_REG_ELT_TYPE *scan_fp_ = (FROM); \ +- int i; \ +- for (i = 0; i < HARD_REG_SET_LONGS; i++) \ +- *scan_tp_++ |= *scan_fp_++; } while (0) +- +-#define IOR_COMPL_HARD_REG_SET(TO, FROM) \ +-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ +- const HARD_REG_ELT_TYPE *scan_fp_ = (FROM); \ +- int i; \ +- for (i = 0; i < HARD_REG_SET_LONGS; i++) \ +- *scan_tp_++ |= ~ *scan_fp_++; } while (0) +- +-static inline bool +-hard_reg_set_subset_p (const HARD_REG_SET x, const HARD_REG_SET y) +-{ +- int i; +- +- for (i = 0; i < HARD_REG_SET_LONGS; i++) +- if ((x[i] & ~y[i]) != 0) +- return false; +- return true; +-} +- +-static inline bool +-hard_reg_set_equal_p (const HARD_REG_SET x, const HARD_REG_SET y) +-{ +- int i; +- +- for (i = 0; i < HARD_REG_SET_LONGS; i++) +- if (x[i] != y[i]) +- return false; +- return true; +-} +- +-static inline bool +-hard_reg_set_intersect_p (const HARD_REG_SET x, const HARD_REG_SET y) +-{ +- int i; +- +- for (i = 0; i < HARD_REG_SET_LONGS; i++) +- if ((x[i] & y[i]) != 0) +- return true; +- return false; +-} +- +-static inline bool +-hard_reg_set_empty_p (const HARD_REG_SET x) +-{ +- int i; +- +- for (i = 0; i < HARD_REG_SET_LONGS; i++) +- if (x[i] != 0) +- return false; +- return true; +-} +- +-#endif +-#endif +-#endif + #endif + + /* Iterator for hard register sets. */ +@@ -515,7 +261,7 @@ hard_reg_set_empty_p (const HARD_REG_SET x) + struct hard_reg_set_iterator + { + /* Pointer to the current element. */ +- HARD_REG_ELT_TYPE *pelt; ++ const HARD_REG_ELT_TYPE *pelt; + + /* The length of the set. */ + unsigned short length; +@@ -534,11 +280,11 @@ struct hard_reg_set_iterator + /* The implementation of the iterator functions is fully analogous to + the bitmap iterators. */ + static inline void +-hard_reg_set_iter_init (hard_reg_set_iterator *iter, HARD_REG_SET set, ++hard_reg_set_iter_init (hard_reg_set_iterator *iter, const_hard_reg_set set, + unsigned min, unsigned *regno) + { + #ifdef HARD_REG_SET_LONGS +- iter->pelt = set; ++ iter->pelt = set.elts; + iter->length = HARD_REG_SET_LONGS; + #else + iter->pelt = &set; +@@ -649,16 +395,15 @@ struct target_hard_regs { + a pseudo reg whose life crosses calls. */ + char x_call_used_regs[FIRST_PSEUDO_REGISTER]; + +- char x_call_really_used_regs[FIRST_PSEUDO_REGISTER]; +- +- /* The same info as a HARD_REG_SET. */ +- HARD_REG_SET x_call_used_reg_set; ++ /* For targets that use reload rather than LRA, this is the set ++ of registers that we are able to save and restore around calls ++ (i.e. those for which we know a suitable mode and set of ++ load/store instructions exist). For LRA targets it contains ++ all registers. + +- /* Contains registers that are fixed use -- i.e. in fixed_reg_set -- or +- a function value return register or TARGET_STRUCT_VALUE_RTX or +- STATIC_CHAIN_REGNUM. These are the registers that cannot hold quantities +- across calls even if we are willing to save and restore them. */ +- HARD_REG_SET x_call_fixed_reg_set; ++ This is legacy information and should be removed if all targets ++ switch to LRA. */ ++ HARD_REG_SET x_savable_regs; + + /* Contains registers that are fixed use -- i.e. in fixed_reg_set -- but + only if they are not merely part of that set because they are global +@@ -674,10 +419,6 @@ struct target_hard_regs { + with the local stack frame are safe, but scant others. */ + HARD_REG_SET x_regs_invalidated_by_call; + +- /* Call used hard registers which cannot be saved because there is no +- insn for this. */ +- HARD_REG_SET x_no_caller_save_reg_set; +- + /* Table of register numbers in the order in which to try to use them. */ + int x_reg_alloc_order[FIRST_PSEUDO_REGISTER]; + +@@ -730,18 +471,16 @@ extern struct target_hard_regs *this_target_hard_regs; + (this_target_hard_regs->x_fixed_reg_set) + #define fixed_nonglobal_reg_set \ + (this_target_hard_regs->x_fixed_nonglobal_reg_set) ++#ifdef IN_TARGET_CODE + #define call_used_regs \ + (this_target_hard_regs->x_call_used_regs) +-#define call_really_used_regs \ +- (this_target_hard_regs->x_call_really_used_regs) +-#define call_used_reg_set \ +- (this_target_hard_regs->x_call_used_reg_set) +-#define call_fixed_reg_set \ +- (this_target_hard_regs->x_call_fixed_reg_set) ++#endif ++#define savable_regs \ ++ (this_target_hard_regs->x_savable_regs) + #define regs_invalidated_by_call \ + (this_target_hard_regs->x_regs_invalidated_by_call) +-#define no_caller_save_reg_set \ +- (this_target_hard_regs->x_no_caller_save_reg_set) ++#define call_used_or_fixed_regs \ ++ (regs_invalidated_by_call | fixed_reg_set) + #define reg_alloc_order \ + (this_target_hard_regs->x_reg_alloc_order) + #define inv_reg_alloc_order \ +@@ -770,4 +509,13 @@ extern const char * reg_class_names[]; + #define REG_CAN_CHANGE_MODE_P(REGN, FROM, TO) \ + (targetm.can_change_mode_class (FROM, TO, REGNO_REG_CLASS (REGN))) + ++/* Return true if register REGNO is either fixed or call-used ++ (aka call-clobbered). */ ++ ++inline bool ++call_used_or_fixed_reg_p (unsigned int regno) ++{ ++ return fixed_regs[regno] || this_target_hard_regs->x_call_used_regs[regno]; ++} ++ + #endif /* ! GCC_HARD_REG_SET_H */ +diff --git a/gcc/hooks.c b/gcc/hooks.c +index f95659b38..98038860e 100644 +--- a/gcc/hooks.c ++++ b/gcc/hooks.c +@@ -140,9 +140,8 @@ hook_bool_puint64_puint64_true (poly_uint64, poly_uint64) + return true; + } + +-/* Generic hook that takes (unsigned int, machine_mode) and returns false. */ + bool +-hook_bool_insn_uint_mode_false (rtx_insn *, unsigned int, machine_mode) ++hook_bool_uint_uint_mode_false (unsigned int, unsigned int, machine_mode) + { + return false; + } +@@ -313,6 +312,12 @@ hook_bool_const_tree_false (const_tree) + return false; + } + ++bool ++hook_bool_const_tree_const_tree_true (const_tree, const_tree) ++{ ++ return true; ++} ++ + bool + hook_bool_tree_true (tree) + { +diff --git a/gcc/hooks.h b/gcc/hooks.h +index 0bc8117c2..b398d13ce 100644 +--- a/gcc/hooks.h ++++ b/gcc/hooks.h +@@ -40,11 +40,12 @@ extern bool hook_bool_const_rtx_insn_const_rtx_insn_true (const rtx_insn *, + extern bool hook_bool_mode_uhwi_false (machine_mode, + unsigned HOST_WIDE_INT); + extern bool hook_bool_puint64_puint64_true (poly_uint64, poly_uint64); +-extern bool hook_bool_insn_uint_mode_false (rtx_insn *, unsigned int, ++extern bool hook_bool_uint_uint_mode_false (unsigned int, unsigned int, + machine_mode); + extern bool hook_bool_uint_mode_true (unsigned int, machine_mode); + extern bool hook_bool_tree_false (tree); + extern bool hook_bool_const_tree_false (const_tree); ++extern bool hook_bool_const_tree_const_tree_true (const_tree, const_tree); + extern bool hook_bool_tree_true (tree); + extern bool hook_bool_const_tree_true (const_tree); + extern bool hook_bool_gsiptr_false (gimple_stmt_iterator *); +diff --git a/gcc/hw-doloop.c b/gcc/hw-doloop.c +index 2decece62..3ee0b4098 100644 +--- a/gcc/hw-doloop.c ++++ b/gcc/hw-doloop.c +@@ -141,7 +141,7 @@ scan_loop (hwloop_info loop) + CLEAR_HARD_REG_BIT (set_this_insn, REGNO (loop->iter_reg)); + else if (reg_mentioned_p (loop->iter_reg, PATTERN (insn))) + loop->iter_reg_used = true; +- IOR_HARD_REG_SET (loop->regs_set_in_loop, set_this_insn); ++ loop->regs_set_in_loop |= set_this_insn; + } + } + } +@@ -581,7 +581,7 @@ optimize_loop (hwloop_info loop, struct hw_doloop_hooks *hooks) + inner_depth = inner->depth; + /* The set of registers may be changed while optimizing the inner + loop. */ +- IOR_HARD_REG_SET (loop->regs_set_in_loop, inner->regs_set_in_loop); ++ loop->regs_set_in_loop |= inner->regs_set_in_loop; + } + + loop->depth = inner_depth + 1; +diff --git a/gcc/int-vector-builder.h b/gcc/int-vector-builder.h +index adf0904c5..dc9651021 100644 +--- a/gcc/int-vector-builder.h ++++ b/gcc/int-vector-builder.h +@@ -26,10 +26,11 @@ along with GCC; see the file COPYING3. If not see + encoding as tree and rtx constants. See vector_builder for more + details. */ + template +-class int_vector_builder : public vector_builder > ++class int_vector_builder : public vector_builder > + { +- typedef vector_builder parent; +- friend class vector_builder; ++ typedef vector_builder parent; ++ friend class vector_builder; + + public: + int_vector_builder () {} +@@ -45,6 +46,8 @@ private: + T apply_step (T, unsigned int, T) const; + bool can_elide_p (T) const { return true; } + void note_representative (T *, T) {} ++ ++ static poly_uint64 shape_nelts (poly_uint64 x) { return x; } + }; + + /* Create a new builder for a vector with FULL_NELTS elements. +diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c +index 21ecd5667..9753a12f3 100644 +--- a/gcc/internal-fn.c ++++ b/gcc/internal-fn.c +@@ -117,6 +117,7 @@ init_internal_fns () + #define while_direct { 0, 2, false } + #define fold_extract_direct { 2, 2, false } + #define fold_left_direct { 1, 1, false } ++#define mask_fold_left_direct { 1, 1, false } + + const direct_internal_fn_info direct_internal_fn_array[IFN_LAST + 1] = { + #define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) not_direct, +@@ -3005,6 +3006,9 @@ expand_while_optab_fn (internal_fn, gcall *stmt, convert_optab optab) + #define expand_fold_left_optab_fn(FN, STMT, OPTAB) \ + expand_direct_optab_fn (FN, STMT, OPTAB, 2) + ++#define expand_mask_fold_left_optab_fn(FN, STMT, OPTAB) \ ++ expand_direct_optab_fn (FN, STMT, OPTAB, 3) ++ + /* RETURN_TYPE and ARGS are a return type and argument list that are + in principle compatible with FN (which satisfies direct_internal_fn_p). + Return the types that should be used to determine whether the +@@ -3093,6 +3097,7 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types, + #define direct_while_optab_supported_p convert_optab_supported_p + #define direct_fold_extract_optab_supported_p direct_optab_supported_p + #define direct_fold_left_optab_supported_p direct_optab_supported_p ++#define direct_mask_fold_left_optab_supported_p direct_optab_supported_p + + /* Return the optab used by internal function FN. */ + +@@ -3210,6 +3215,8 @@ first_commutative_argument (internal_fn fn) + case IFN_FNMS: + case IFN_AVG_FLOOR: + case IFN_AVG_CEIL: ++ case IFN_MULHS: ++ case IFN_MULHRS: + case IFN_FMIN: + case IFN_FMAX: + return 0; +@@ -3286,7 +3293,9 @@ static void (*const internal_fn_expanders[]) (internal_fn, gcall *) = { + T (MAX_EXPR, IFN_COND_MAX) \ + T (BIT_AND_EXPR, IFN_COND_AND) \ + T (BIT_IOR_EXPR, IFN_COND_IOR) \ +- T (BIT_XOR_EXPR, IFN_COND_XOR) ++ T (BIT_XOR_EXPR, IFN_COND_XOR) \ ++ T (LSHIFT_EXPR, IFN_COND_SHL) \ ++ T (RSHIFT_EXPR, IFN_COND_SHR) + + /* Return a function that only performs CODE when a certain condition is met + and that uses a given fallback value otherwise. For example, if CODE is +diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def +index e370eaa84..ae32fc7bd 100644 +--- a/gcc/internal-fn.def ++++ b/gcc/internal-fn.def +@@ -140,6 +140,8 @@ DEF_INTERNAL_OPTAB_FN (WHILE_ULT, ECF_CONST | ECF_NOTHROW, while_ult, while) + DEF_INTERNAL_OPTAB_FN (VEC_SHL_INSERT, ECF_CONST | ECF_NOTHROW, + vec_shl_insert, binary) + ++DEF_INTERNAL_OPTAB_FN (DIV_POW2, ECF_CONST | ECF_NOTHROW, sdiv_pow2, binary) ++ + DEF_INTERNAL_OPTAB_FN (FMS, ECF_CONST, fms, ternary) + DEF_INTERNAL_OPTAB_FN (FNMA, ECF_CONST, fnma, ternary) + DEF_INTERNAL_OPTAB_FN (FNMS, ECF_CONST, fnms, ternary) +@@ -149,6 +151,11 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_FLOOR, ECF_CONST | ECF_NOTHROW, first, + DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_CEIL, ECF_CONST | ECF_NOTHROW, first, + savg_ceil, uavg_ceil, binary) + ++DEF_INTERNAL_SIGNED_OPTAB_FN (MULHS, ECF_CONST | ECF_NOTHROW, first, ++ smulhs, umulhs, binary) ++DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW, first, ++ smulhrs, umulhrs, binary) ++ + DEF_INTERNAL_OPTAB_FN (COND_ADD, ECF_CONST, cond_add, cond_binary) + DEF_INTERNAL_OPTAB_FN (COND_SUB, ECF_CONST, cond_sub, cond_binary) + DEF_INTERNAL_OPTAB_FN (COND_MUL, ECF_CONST, cond_smul, cond_binary) +@@ -167,6 +174,10 @@ DEF_INTERNAL_OPTAB_FN (COND_IOR, ECF_CONST | ECF_NOTHROW, + cond_ior, cond_binary) + DEF_INTERNAL_OPTAB_FN (COND_XOR, ECF_CONST | ECF_NOTHROW, + cond_xor, cond_binary) ++DEF_INTERNAL_OPTAB_FN (COND_SHL, ECF_CONST | ECF_NOTHROW, ++ cond_ashl, cond_binary) ++DEF_INTERNAL_SIGNED_OPTAB_FN (COND_SHR, ECF_CONST | ECF_NOTHROW, first, ++ cond_ashr, cond_lshr, cond_binary) + + DEF_INTERNAL_OPTAB_FN (COND_FMA, ECF_CONST, cond_fma, cond_ternary) + DEF_INTERNAL_OPTAB_FN (COND_FMS, ECF_CONST, cond_fms, cond_ternary) +@@ -199,6 +210,9 @@ DEF_INTERNAL_OPTAB_FN (FOLD_EXTRACT_LAST, ECF_CONST | ECF_NOTHROW, + DEF_INTERNAL_OPTAB_FN (FOLD_LEFT_PLUS, ECF_CONST | ECF_NOTHROW, + fold_left_plus, fold_left) + ++DEF_INTERNAL_OPTAB_FN (MASK_FOLD_LEFT_PLUS, ECF_CONST | ECF_NOTHROW, ++ mask_fold_left_plus, mask_fold_left) ++ + /* Unary math functions. */ + DEF_INTERNAL_FLT_FN (ACOS, ECF_CONST, acos, unary) + DEF_INTERNAL_FLT_FN (ACOSH, ECF_CONST, acosh, unary) +@@ -217,6 +231,7 @@ DEF_INTERNAL_FLT_FN (LOG10, ECF_CONST, log10, unary) + DEF_INTERNAL_FLT_FN (LOG1P, ECF_CONST, log1p, unary) + DEF_INTERNAL_FLT_FN (LOG2, ECF_CONST, log2, unary) + DEF_INTERNAL_FLT_FN (LOGB, ECF_CONST, logb, unary) ++DEF_INTERNAL_FLT_FN (SIGNBIT, ECF_CONST, signbit, unary) + DEF_INTERNAL_FLT_FN (SIGNIFICAND, ECF_CONST, significand, unary) + DEF_INTERNAL_FLT_FN (SIN, ECF_CONST, sin, unary) + DEF_INTERNAL_FLT_FN (SINH, ECF_CONST, sinh, unary) +diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c +index 8988a4e49..b9e2ef450 100644 +--- a/gcc/ipa-cp.c ++++ b/gcc/ipa-cp.c +@@ -2862,8 +2862,7 @@ ipa_get_indirect_edge_target_1 (struct cgraph_edge *ie, + if (can_refer) + { + if (!target +- || (TREE_CODE (TREE_TYPE (target)) == FUNCTION_TYPE +- && DECL_FUNCTION_CODE (target) == BUILT_IN_UNREACHABLE) ++ || fndecl_built_in_p (target, BUILT_IN_UNREACHABLE) + || !possible_polymorphic_call_target_p + (ie, cgraph_node::get (target))) + { +diff --git a/gcc/ipa-devirt.c b/gcc/ipa-devirt.c +index 2d8a0b383..df1ea21b4 100644 +--- a/gcc/ipa-devirt.c ++++ b/gcc/ipa-devirt.c +@@ -3576,12 +3576,10 @@ possible_polymorphic_call_target_p (tree otr_type, + { + vec targets; + unsigned int i; +- enum built_in_function fcode; + bool final; + +- if (TREE_CODE (TREE_TYPE (n->decl)) == FUNCTION_TYPE +- && ((fcode = DECL_FUNCTION_CODE (n->decl)) == BUILT_IN_UNREACHABLE +- || fcode == BUILT_IN_TRAP)) ++ if (fndecl_built_in_p (n->decl, BUILT_IN_UNREACHABLE) ++ || fndecl_built_in_p (n->decl, BUILT_IN_TRAP)) + return true; + + if (is_cxa_pure_virtual_p (n->decl)) +diff --git a/gcc/ipa-icf.c b/gcc/ipa-icf.c +index 568c6a452..8b6961486 100644 +--- a/gcc/ipa-icf.c ++++ b/gcc/ipa-icf.c +@@ -351,8 +351,8 @@ sem_item::compare_referenced_symbol_properties (symtab_node *used_by, + return return_false_with_msg ("inline attributes are different"); + } + +- if (DECL_IS_OPERATOR_NEW (n1->decl) +- != DECL_IS_OPERATOR_NEW (n2->decl)) ++ if (DECL_IS_OPERATOR_NEW_P (n1->decl) ++ != DECL_IS_OPERATOR_NEW_P (n2->decl)) + return return_false_with_msg ("operator new flags are different"); + } + +@@ -416,7 +416,7 @@ sem_item::hash_referenced_symbol_properties (symtab_node *ref, + hstate.add_flag (DECL_DISREGARD_INLINE_LIMITS (ref->decl)); + hstate.add_flag (DECL_DECLARED_INLINE_P (ref->decl)); + } +- hstate.add_flag (DECL_IS_OPERATOR_NEW (ref->decl)); ++ hstate.add_flag (DECL_IS_OPERATOR_NEW_P (ref->decl)); + } + else if (is_a (ref)) + { +diff --git a/gcc/ipa-inline.c b/gcc/ipa-inline.c +index a2fb20320..7c627eff8 100644 +--- a/gcc/ipa-inline.c ++++ b/gcc/ipa-inline.c +@@ -390,6 +390,28 @@ can_inline_edge_p (struct cgraph_edge *e, bool report, + return inlinable; + } + ++/* Return inlining_insns_single limit for function N */ ++ ++static int ++inline_insns_single (cgraph_node *n) ++{ ++ if (opt_for_fn (n->decl, optimize >= 3)) ++ return PARAM_VALUE (PARAM_MAX_INLINE_INSNS_SINGLE); ++ else ++ return PARAM_VALUE (PARAM_MAX_INLINE_INSNS_SINGLE_O2); ++} ++ ++/* Return inlining_insns_auto limit for function N */ ++ ++static int ++inline_insns_auto (cgraph_node *n) ++{ ++ if (opt_for_fn (n->decl, optimize >= 3)) ++ return PARAM_VALUE (PARAM_MAX_INLINE_INSNS_AUTO); ++ else ++ return PARAM_VALUE (PARAM_MAX_INLINE_INSNS_AUTO_O2); ++} ++ + /* Decide if we can inline the edge and possibly update + inline_failed reason. + We check whether inlining is possible at all and whether +@@ -532,8 +554,8 @@ can_inline_edge_by_limits_p (struct cgraph_edge *e, bool report, + int growth = estimate_edge_growth (e); + if (growth > PARAM_VALUE (PARAM_MAX_INLINE_INSNS_SIZE) + && (!DECL_DECLARED_INLINE_P (callee->decl) +- && growth >= MAX (MAX_INLINE_INSNS_SINGLE, +- MAX_INLINE_INSNS_AUTO))) ++ && growth >= MAX (inline_insns_single (caller), ++ inline_insns_auto (caller)))) + { + e->inline_failed = CIF_OPTIMIZATION_MISMATCH; + inlinable = false; +@@ -641,6 +663,10 @@ want_early_inline_function_p (struct cgraph_edge *e) + { + int growth = estimate_edge_growth (e); + int n; ++ int early_inlining_insns = opt_for_fn (e->caller->decl, optimize) >= 3 ++ ? PARAM_VALUE (PARAM_EARLY_INLINING_INSNS) ++ : PARAM_VALUE (PARAM_EARLY_INLINING_INSNS_O2); ++ + + if (growth <= PARAM_VALUE (PARAM_MAX_INLINE_INSNS_SIZE)) + ; +@@ -654,26 +680,28 @@ want_early_inline_function_p (struct cgraph_edge *e) + growth); + want_inline = false; + } +- else if (growth > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS)) ++ else if (growth > early_inlining_insns) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt, + " will not early inline: %C->%C, " +- "growth %i exceeds --param early-inlining-insns\n", +- e->caller, callee, +- growth); ++ "growth %i exceeds --param early-inlining-insns%s\n", ++ e->caller, callee, growth, ++ opt_for_fn (e->caller->decl, optimize) >= 3 ++ ? "" : "-O2"); + want_inline = false; + } + else if ((n = num_calls (callee)) != 0 +- && growth * (n + 1) > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS)) ++ && growth * (n + 1) > early_inlining_insns) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt, + " will not early inline: %C->%C, " +- "growth %i exceeds --param early-inlining-insns " ++ "growth %i exceeds --param early-inlining-insns%s " + "divided by number of calls\n", +- e->caller, callee, +- growth); ++ e->caller, callee, growth, ++ opt_for_fn (e->caller->decl, optimize) >= 3 ++ ? "" : "-O2"); + want_inline = false; + } + } +@@ -739,9 +767,14 @@ big_speedup_p (struct cgraph_edge *e) + sreal spec_time = estimate_edge_time (e, &unspec_time); + sreal time = compute_uninlined_call_time (e, unspec_time); + sreal inlined_time = compute_inlined_call_time (e, spec_time); ++ cgraph_node *caller = (e->caller->inlined_to ++ ? e->caller->inlined_to ++ : e->caller); ++ int limit = opt_for_fn (caller->decl, optimize) >= 3 ++ ? PARAM_VALUE (PARAM_INLINE_MIN_SPEEDUP) ++ : PARAM_VALUE (PARAM_INLINE_MIN_SPEEDUP_O2); + +- if ((time - inlined_time) * 100 +- > (sreal) (time * PARAM_VALUE (PARAM_INLINE_MIN_SPEEDUP))) ++ if ((time - inlined_time) * 100 > time * limit) + return true; + return false; + } +@@ -775,20 +808,29 @@ want_inline_small_function_p (struct cgraph_edge *e, bool report) + && (!e->count.ipa ().initialized_p () || !e->maybe_hot_p ())) + && ipa_fn_summaries->get (callee)->min_size + - ipa_call_summaries->get (e)->call_stmt_size +- > MAX (MAX_INLINE_INSNS_SINGLE, MAX_INLINE_INSNS_AUTO)) ++ > MAX (inline_insns_single (e->caller), ++ inline_insns_auto (e->caller))) + { +- e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT; ++ if (opt_for_fn (e->caller->decl, optimize) >= 3) ++ e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT; ++ else ++ e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_O2_LIMIT; + want_inline = false; + } + else if ((DECL_DECLARED_INLINE_P (callee->decl) + || e->count.ipa ().nonzero_p ()) + && ipa_fn_summaries->get (callee)->min_size + - ipa_call_summaries->get (e)->call_stmt_size +- > 16 * MAX_INLINE_INSNS_SINGLE) ++ > 16 * inline_insns_single (e->caller)) + { +- e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl) +- ? CIF_MAX_INLINE_INSNS_SINGLE_LIMIT +- : CIF_MAX_INLINE_INSNS_AUTO_LIMIT); ++ if (opt_for_fn (e->caller->decl, optimize) >= 3) ++ e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl) ++ ? CIF_MAX_INLINE_INSNS_SINGLE_LIMIT ++ : CIF_MAX_INLINE_INSNS_AUTO_LIMIT); ++ else ++ e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl) ++ ? CIF_MAX_INLINE_INSNS_SINGLE_O2_LIMIT ++ : CIF_MAX_INLINE_INSNS_AUTO_O2_LIMIT); + want_inline = false; + } + else +@@ -802,15 +844,18 @@ want_inline_small_function_p (struct cgraph_edge *e, bool report) + /* Apply MAX_INLINE_INSNS_SINGLE limit. Do not do so when + hints suggests that inlining given function is very profitable. */ + else if (DECL_DECLARED_INLINE_P (callee->decl) +- && growth >= MAX_INLINE_INSNS_SINGLE +- && (growth >= MAX_INLINE_INSNS_SINGLE * 16 ++ && growth >= inline_insns_single (e->caller) ++ && (growth >= inline_insns_single (e->caller) * 16 + || (!(hints & (INLINE_HINT_indirect_call + | INLINE_HINT_known_hot + | INLINE_HINT_loop_iterations + | INLINE_HINT_loop_stride)) + && !(big_speedup = big_speedup_p (e))))) + { +- e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT; ++ if (opt_for_fn (e->caller->decl, optimize) >= 3) ++ e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT; ++ else ++ e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_O2_LIMIT; + want_inline = false; + } + else if (!DECL_DECLARED_INLINE_P (callee->decl) +@@ -818,7 +863,7 @@ want_inline_small_function_p (struct cgraph_edge *e, bool report) + && growth >= PARAM_VALUE (PARAM_MAX_INLINE_INSNS_SMALL)) + { + /* growth_likely_positive is expensive, always test it last. */ +- if (growth >= MAX_INLINE_INSNS_SINGLE ++ if (growth >= inline_insns_single (e->caller) + || growth_likely_positive (callee, growth)) + { + e->inline_failed = CIF_NOT_DECLARED_INLINED; +@@ -833,22 +878,25 @@ want_inline_small_function_p (struct cgraph_edge *e, bool report) + && growth >= ((hints & (INLINE_HINT_indirect_call + | INLINE_HINT_loop_iterations + | INLINE_HINT_loop_stride)) +- ? MAX (MAX_INLINE_INSNS_AUTO, +- MAX_INLINE_INSNS_SINGLE) +- : MAX_INLINE_INSNS_AUTO) ++ ? MAX (inline_insns_auto (e->caller), ++ inline_insns_single (e->caller)) ++ : inline_insns_auto (e->caller)) + && !(big_speedup == -1 ? big_speedup_p (e) : big_speedup)) + { + /* growth_likely_positive is expensive, always test it last. */ +- if (growth >= MAX_INLINE_INSNS_SINGLE ++ if (growth >= inline_insns_single (e->caller) + || growth_likely_positive (callee, growth)) + { +- e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT; ++ if (opt_for_fn (e->caller->decl, optimize) >= 3) ++ e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT; ++ else ++ e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_O2_LIMIT; + want_inline = false; + } + } + /* If call is cold, do not inline when function body would grow. */ + else if (!e->maybe_hot_p () +- && (growth >= MAX_INLINE_INSNS_SINGLE ++ && (growth >= inline_insns_single (e->caller) + || growth_likely_positive (callee, growth))) + { + e->inline_failed = CIF_UNLIKELY_CALL; +@@ -1157,7 +1205,7 @@ edge_badness (struct cgraph_edge *edge, bool dump) + && caller_info->inlinable + && ipa_size_summaries->get (caller)->size + < (DECL_DECLARED_INLINE_P (caller->decl) +- ? MAX_INLINE_INSNS_SINGLE : MAX_INLINE_INSNS_AUTO)) ++ ? inline_insns_single (caller) : inline_insns_auto (caller))) + { + if (dump) + fprintf (dump_file, +diff --git a/gcc/ipa-param-manipulation.c b/gcc/ipa-param-manipulation.c +index 037253a87..1af6d050c 100644 +--- a/gcc/ipa-param-manipulation.c ++++ b/gcc/ipa-param-manipulation.c +@@ -219,10 +219,7 @@ ipa_modify_formal_parameters (tree fndecl, ipa_parm_adjustment_vec adjustments) + + /* When signature changes, we need to clear builtin info. */ + if (fndecl_built_in_p (fndecl)) +- { +- DECL_BUILT_IN_CLASS (fndecl) = NOT_BUILT_IN; +- DECL_FUNCTION_CODE (fndecl) = (enum built_in_function) 0; +- } ++ set_decl_built_in_function (fndecl, NOT_BUILT_IN, 0); + + TREE_TYPE (fndecl) = new_type; + DECL_VIRTUAL_P (fndecl) = 0; +@@ -452,14 +449,7 @@ ipa_modify_call_arguments (struct cgraph_edge *cs, gcall *stmt, + gimple_call_set_chain (new_stmt, gimple_call_chain (stmt)); + gimple_call_copy_flags (new_stmt, stmt); + if (gimple_in_ssa_p (cfun)) +- { +- gimple_set_vuse (new_stmt, gimple_vuse (stmt)); +- if (gimple_vdef (stmt)) +- { +- gimple_set_vdef (new_stmt, gimple_vdef (stmt)); +- SSA_NAME_DEF_STMT (gimple_vdef (new_stmt)) = new_stmt; +- } +- } ++ gimple_move_vops (new_stmt, stmt); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { +diff --git a/gcc/ipa-prop.c b/gcc/ipa-prop.c +index 0439ce0c5..a70319505 100644 +--- a/gcc/ipa-prop.c ++++ b/gcc/ipa-prop.c +@@ -3685,8 +3685,7 @@ try_make_edge_direct_virtual_call (struct cgraph_edge *ie, + if (can_refer) + { + if (!t +- || (TREE_CODE (TREE_TYPE (t)) == FUNCTION_TYPE +- && DECL_FUNCTION_CODE (t) == BUILT_IN_UNREACHABLE) ++ || fndecl_built_in_p (t, BUILT_IN_UNREACHABLE) + || !possible_polymorphic_call_target_p + (ie, cgraph_node::get (t))) + { +diff --git a/gcc/ipa-split.c b/gcc/ipa-split.c +index 5eaf8257f..aef2fa53c 100644 +--- a/gcc/ipa-split.c ++++ b/gcc/ipa-split.c +@@ -1348,10 +1348,7 @@ split_function (basic_block return_bb, struct split_point *split_point, + changes. For partial inlining we however cannot expect the part + of builtin implementation to have same semantic as the whole. */ + if (fndecl_built_in_p (node->decl)) +- { +- DECL_BUILT_IN_CLASS (node->decl) = NOT_BUILT_IN; +- DECL_FUNCTION_CODE (node->decl) = (enum built_in_function) 0; +- } ++ set_decl_built_in_function (node->decl, NOT_BUILT_IN, 0); + + /* If return_bb contains any clobbers that refer to SSA_NAMEs + set in the split part, remove them. Also reset debug stmts that +diff --git a/gcc/ira-build.c b/gcc/ira-build.c +index 83caa3a8e..55c552679 100644 +--- a/gcc/ira-build.c ++++ b/gcc/ira-build.c +@@ -456,12 +456,10 @@ ira_create_object (ira_allocno_t a, int subword) + OBJECT_CONFLICT_VEC_P (obj) = false; + OBJECT_CONFLICT_ARRAY (obj) = NULL; + OBJECT_NUM_CONFLICTS (obj) = 0; +- COPY_HARD_REG_SET (OBJECT_CONFLICT_HARD_REGS (obj), ira_no_alloc_regs); +- COPY_HARD_REG_SET (OBJECT_TOTAL_CONFLICT_HARD_REGS (obj), ira_no_alloc_regs); +- IOR_COMPL_HARD_REG_SET (OBJECT_CONFLICT_HARD_REGS (obj), +- reg_class_contents[aclass]); +- IOR_COMPL_HARD_REG_SET (OBJECT_TOTAL_CONFLICT_HARD_REGS (obj), +- reg_class_contents[aclass]); ++ OBJECT_CONFLICT_HARD_REGS (obj) = ira_no_alloc_regs; ++ OBJECT_TOTAL_CONFLICT_HARD_REGS (obj) = ira_no_alloc_regs; ++ OBJECT_CONFLICT_HARD_REGS (obj) |= ~reg_class_contents[aclass]; ++ OBJECT_TOTAL_CONFLICT_HARD_REGS (obj) |= ~reg_class_contents[aclass]; + OBJECT_MIN (obj) = INT_MAX; + OBJECT_MAX (obj) = -1; + OBJECT_LIVE_RANGES (obj) = NULL; +@@ -549,10 +547,8 @@ ira_set_allocno_class (ira_allocno_t a, enum reg_class aclass) + ALLOCNO_CLASS (a) = aclass; + FOR_EACH_ALLOCNO_OBJECT (a, obj, oi) + { +- IOR_COMPL_HARD_REG_SET (OBJECT_CONFLICT_HARD_REGS (obj), +- reg_class_contents[aclass]); +- IOR_COMPL_HARD_REG_SET (OBJECT_TOTAL_CONFLICT_HARD_REGS (obj), +- reg_class_contents[aclass]); ++ OBJECT_CONFLICT_HARD_REGS (obj) |= ~reg_class_contents[aclass]; ++ OBJECT_TOTAL_CONFLICT_HARD_REGS (obj) |= ~reg_class_contents[aclass]; + } + } + +@@ -602,10 +598,10 @@ merge_hard_reg_conflicts (ira_allocno_t from, ira_allocno_t to, + ira_object_t to_obj = ALLOCNO_OBJECT (to, i); + + if (!total_only) +- IOR_HARD_REG_SET (OBJECT_CONFLICT_HARD_REGS (to_obj), +- OBJECT_CONFLICT_HARD_REGS (from_obj)); +- IOR_HARD_REG_SET (OBJECT_TOTAL_CONFLICT_HARD_REGS (to_obj), +- OBJECT_TOTAL_CONFLICT_HARD_REGS (from_obj)); ++ OBJECT_CONFLICT_HARD_REGS (to_obj) ++ |= OBJECT_CONFLICT_HARD_REGS (from_obj); ++ OBJECT_TOTAL_CONFLICT_HARD_REGS (to_obj) ++ |= OBJECT_TOTAL_CONFLICT_HARD_REGS (from_obj); + } + #ifdef STACK_REGS + if (!total_only && ALLOCNO_NO_STACK_REG_P (from)) +@@ -618,15 +614,15 @@ merge_hard_reg_conflicts (ira_allocno_t from, ira_allocno_t to, + /* Update hard register conflict information for all objects associated with + A to include the regs in SET. */ + void +-ior_hard_reg_conflicts (ira_allocno_t a, HARD_REG_SET *set) ++ior_hard_reg_conflicts (ira_allocno_t a, const_hard_reg_set set) + { + ira_allocno_object_iterator i; + ira_object_t obj; + + FOR_EACH_ALLOCNO_OBJECT (a, obj, i) + { +- IOR_HARD_REG_SET (OBJECT_CONFLICT_HARD_REGS (obj), *set); +- IOR_HARD_REG_SET (OBJECT_TOTAL_CONFLICT_HARD_REGS (obj), *set); ++ OBJECT_CONFLICT_HARD_REGS (obj) |= set; ++ OBJECT_TOTAL_CONFLICT_HARD_REGS (obj) |= set; + } + } + +@@ -907,8 +903,9 @@ create_cap_allocno (ira_allocno_t a) + + ALLOCNO_CALLS_CROSSED_NUM (cap) = ALLOCNO_CALLS_CROSSED_NUM (a); + ALLOCNO_CHEAP_CALLS_CROSSED_NUM (cap) = ALLOCNO_CHEAP_CALLS_CROSSED_NUM (a); +- IOR_HARD_REG_SET (ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (cap), +- ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (a)); ++ ALLOCNO_CROSSED_CALLS_ABIS (cap) = ALLOCNO_CROSSED_CALLS_ABIS (a); ++ ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (cap) ++ = ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (a); + if (internal_flag_ira_verbose > 2 && ira_dump_file != NULL) + { + fprintf (ira_dump_file, " Creating cap "); +@@ -1876,11 +1873,6 @@ create_insn_allocnos (rtx x, rtx outer, bool output_p) + create_insn_allocnos (XEXP (x, 0), NULL, true); + return; + } +- else if (code == CLOBBER_HIGH) +- { +- gcc_assert (REG_P (XEXP (x, 0)) && HARD_REGISTER_P (XEXP (x, 0))); +- return; +- } + else if (code == MEM) + { + create_insn_allocnos (XEXP (x, 0), NULL, false); +@@ -2036,8 +2028,10 @@ propagate_allocno_info (void) + += ALLOCNO_CALLS_CROSSED_NUM (a); + ALLOCNO_CHEAP_CALLS_CROSSED_NUM (parent_a) + += ALLOCNO_CHEAP_CALLS_CROSSED_NUM (a); +- IOR_HARD_REG_SET (ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (parent_a), +- ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (a)); ++ ALLOCNO_CROSSED_CALLS_ABIS (parent_a) ++ |= ALLOCNO_CROSSED_CALLS_ABIS (a); ++ ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (parent_a) ++ |= ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (a); + ALLOCNO_EXCESS_PRESSURE_POINTS_NUM (parent_a) + += ALLOCNO_EXCESS_PRESSURE_POINTS_NUM (a); + aclass = ALLOCNO_CLASS (a); +@@ -2419,8 +2413,9 @@ propagate_some_info_from_allocno (ira_allocno_t a, ira_allocno_t from_a) + ALLOCNO_CALLS_CROSSED_NUM (a) += ALLOCNO_CALLS_CROSSED_NUM (from_a); + ALLOCNO_CHEAP_CALLS_CROSSED_NUM (a) + += ALLOCNO_CHEAP_CALLS_CROSSED_NUM (from_a); +- IOR_HARD_REG_SET (ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (a), +- ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (from_a)); ++ ALLOCNO_CROSSED_CALLS_ABIS (a) |= ALLOCNO_CROSSED_CALLS_ABIS (from_a); ++ ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (a) ++ |= ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (from_a); + + ALLOCNO_EXCESS_PRESSURE_POINTS_NUM (a) + += ALLOCNO_EXCESS_PRESSURE_POINTS_NUM (from_a); +@@ -2569,8 +2564,8 @@ remove_low_level_allocnos (void) + ALLOCNO_NEXT_REGNO_ALLOCNO (a) = NULL; + ALLOCNO_CAP_MEMBER (a) = NULL; + FOR_EACH_ALLOCNO_OBJECT (a, obj, oi) +- COPY_HARD_REG_SET (OBJECT_CONFLICT_HARD_REGS (obj), +- OBJECT_TOTAL_CONFLICT_HARD_REGS (obj)); ++ OBJECT_CONFLICT_HARD_REGS (obj) ++ = OBJECT_TOTAL_CONFLICT_HARD_REGS (obj); + #ifdef STACK_REGS + if (ALLOCNO_TOTAL_NO_STACK_REG_P (a)) + ALLOCNO_NO_STACK_REG_P (a) = true; +@@ -3060,8 +3055,10 @@ copy_info_to_removed_store_destinations (int regno) + += ALLOCNO_CALLS_CROSSED_NUM (a); + ALLOCNO_CHEAP_CALLS_CROSSED_NUM (parent_a) + += ALLOCNO_CHEAP_CALLS_CROSSED_NUM (a); +- IOR_HARD_REG_SET (ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (parent_a), +- ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (a)); ++ ALLOCNO_CROSSED_CALLS_ABIS (parent_a) ++ |= ALLOCNO_CROSSED_CALLS_ABIS (a); ++ ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (parent_a) ++ |= ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (a); + ALLOCNO_EXCESS_PRESSURE_POINTS_NUM (parent_a) + += ALLOCNO_EXCESS_PRESSURE_POINTS_NUM (a); + merged_p = true; +@@ -3108,8 +3105,8 @@ ira_flattening (int max_regno_before_emit, int ira_max_point_before_emit) + flattening. */ + continue; + FOR_EACH_ALLOCNO_OBJECT (a, obj, oi) +- COPY_HARD_REG_SET (OBJECT_TOTAL_CONFLICT_HARD_REGS (obj), +- OBJECT_CONFLICT_HARD_REGS (obj)); ++ OBJECT_TOTAL_CONFLICT_HARD_REGS (obj) ++ = OBJECT_CONFLICT_HARD_REGS (obj); + #ifdef STACK_REGS + ALLOCNO_TOTAL_NO_STACK_REG_P (a) = ALLOCNO_NO_STACK_REG_P (a); + #endif +@@ -3159,6 +3156,9 @@ ira_flattening (int max_regno_before_emit, int ira_max_point_before_emit) + -= ALLOCNO_CALLS_CROSSED_NUM (a); + ALLOCNO_CHEAP_CALLS_CROSSED_NUM (parent_a) + -= ALLOCNO_CHEAP_CALLS_CROSSED_NUM (a); ++ /* Assume that ALLOCNO_CROSSED_CALLS_ABIS and ++ ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS stay the same. ++ We'd need to rebuild the IR to do better. */ + ALLOCNO_EXCESS_PRESSURE_POINTS_NUM (parent_a) + -= ALLOCNO_EXCESS_PRESSURE_POINTS_NUM (a); + ira_assert (ALLOCNO_CALLS_CROSSED_NUM (parent_a) >= 0 +@@ -3466,7 +3466,7 @@ ira_build (void) + allocno crossing calls. */ + FOR_EACH_ALLOCNO (a, ai) + if (ALLOCNO_CALLS_CROSSED_NUM (a) != 0) +- ior_hard_reg_conflicts (a, &call_used_reg_set); ++ ior_hard_reg_conflicts (a, ira_need_caller_save_regs (a)); + } + if (internal_flag_ira_verbose > 2 && ira_dump_file != NULL) + print_copies (ira_dump_file); +diff --git a/gcc/ira-color.c b/gcc/ira-color.c +index 8a90ae1b4..62499be91 100644 +--- a/gcc/ira-color.c ++++ b/gcc/ira-color.c +@@ -218,7 +218,7 @@ inline bool + allocno_hard_regs_hasher::equal (const allocno_hard_regs *hv1, + const allocno_hard_regs *hv2) + { +- return hard_reg_set_equal_p (hv1->set, hv2->set); ++ return hv1->set == hv2->set; + } + + /* Hash table of unique allocno hard registers. */ +@@ -261,14 +261,14 @@ add_allocno_hard_regs (HARD_REG_SET set, int64_t cost) + allocno_hard_regs_t hv; + + gcc_assert (! hard_reg_set_empty_p (set)); +- COPY_HARD_REG_SET (temp.set, set); ++ temp.set = set; + if ((hv = find_hard_regs (&temp)) != NULL) + hv->cost += cost; + else + { + hv = ((struct allocno_hard_regs *) + ira_allocate (sizeof (struct allocno_hard_regs))); +- COPY_HARD_REG_SET (hv->set, set); ++ hv->set = set; + hv->cost = cost; + allocno_hard_regs_vec.safe_push (hv); + insert_hard_regs (hv); +@@ -371,7 +371,7 @@ add_allocno_hard_regs_to_forest (allocno_hard_regs_node_t *roots, + start = hard_regs_node_vec.length (); + for (node = *roots; node != NULL; node = node->next) + { +- if (hard_reg_set_equal_p (hv->set, node->hard_regs->set)) ++ if (hv->set == node->hard_regs->set) + return; + if (hard_reg_set_subset_p (hv->set, node->hard_regs->set)) + { +@@ -382,8 +382,7 @@ add_allocno_hard_regs_to_forest (allocno_hard_regs_node_t *roots, + hard_regs_node_vec.safe_push (node); + else if (hard_reg_set_intersect_p (hv->set, node->hard_regs->set)) + { +- COPY_HARD_REG_SET (temp_set, hv->set); +- AND_HARD_REG_SET (temp_set, node->hard_regs->set); ++ temp_set = hv->set & node->hard_regs->set; + hv2 = add_allocno_hard_regs (temp_set, hv->cost); + add_allocno_hard_regs_to_forest (&node->first, hv2); + } +@@ -398,7 +397,7 @@ add_allocno_hard_regs_to_forest (allocno_hard_regs_node_t *roots, + i++) + { + node = hard_regs_node_vec[i]; +- IOR_HARD_REG_SET (temp_set, node->hard_regs->set); ++ temp_set |= node->hard_regs->set; + } + hv = add_allocno_hard_regs (temp_set, hv->cost); + new_node = create_new_allocno_hard_regs_node (hv); +@@ -717,8 +716,7 @@ form_allocno_hard_regs_nodes_forest (void) + (allocno_data->profitable_hard_regs, + ALLOCNO_MEMORY_COST (a) - ALLOCNO_CLASS_COST (a))); + } +- SET_HARD_REG_SET (temp); +- AND_COMPL_HARD_REG_SET (temp, ira_no_alloc_regs); ++ temp = ~ira_no_alloc_regs; + add_allocno_hard_regs (temp, 0); + qsort (allocno_hard_regs_vec.address () + start, + allocno_hard_regs_vec.length () - start, +@@ -833,10 +831,10 @@ setup_left_conflict_sizes_p (ira_allocno_t a) + nobj = ALLOCNO_NUM_OBJECTS (a); + data = ALLOCNO_COLOR_DATA (a); + subnodes = allocno_hard_regs_subnodes + data->hard_regs_subnodes_start; +- COPY_HARD_REG_SET (profitable_hard_regs, data->profitable_hard_regs); ++ profitable_hard_regs = data->profitable_hard_regs; + node = data->hard_regs_node; + node_preorder_num = node->preorder_num; +- COPY_HARD_REG_SET (node_set, node->hard_regs->set); ++ node_set = node->hard_regs->set; + node_check_tick++; + for (k = 0; k < nobj; k++) + { +@@ -859,7 +857,7 @@ setup_left_conflict_sizes_p (ira_allocno_t a) + ->profitable_hard_regs)) + continue; + conflict_node = conflict_data->hard_regs_node; +- COPY_HARD_REG_SET (conflict_node_set, conflict_node->hard_regs->set); ++ conflict_node_set = conflict_node->hard_regs->set; + if (hard_reg_set_subset_p (node_set, conflict_node_set)) + temp_node = node; + else +@@ -897,8 +895,7 @@ setup_left_conflict_sizes_p (ira_allocno_t a) + int j, n, hard_regno; + enum reg_class aclass; + +- COPY_HARD_REG_SET (temp_set, temp_node->hard_regs->set); +- AND_HARD_REG_SET (temp_set, profitable_hard_regs); ++ temp_set = temp_node->hard_regs->set & profitable_hard_regs; + aclass = ALLOCNO_CLASS (a); + for (n = 0, j = ira_class_hard_regs_num[aclass] - 1; j >= 0; j--) + { +@@ -1042,15 +1039,15 @@ setup_profitable_hard_regs (void) + else + { + mode = ALLOCNO_MODE (a); +- COPY_HARD_REG_SET (data->profitable_hard_regs, +- ira_useful_class_mode_regs[aclass][mode]); ++ data->profitable_hard_regs ++ = ira_useful_class_mode_regs[aclass][mode]; + nobj = ALLOCNO_NUM_OBJECTS (a); + for (k = 0; k < nobj; k++) + { + ira_object_t obj = ALLOCNO_OBJECT (a, k); + +- AND_COMPL_HARD_REG_SET (data->profitable_hard_regs, +- OBJECT_TOTAL_CONFLICT_HARD_REGS (obj)); ++ data->profitable_hard_regs ++ &= ~OBJECT_TOTAL_CONFLICT_HARD_REGS (obj); + } + } + } +@@ -1091,9 +1088,8 @@ setup_profitable_hard_regs (void) + hard_regno + num); + } + else +- AND_COMPL_HARD_REG_SET +- (ALLOCNO_COLOR_DATA (conflict_a)->profitable_hard_regs, +- ira_reg_mode_hard_regset[hard_regno][mode]); ++ ALLOCNO_COLOR_DATA (conflict_a)->profitable_hard_regs ++ &= ~ira_reg_mode_hard_regset[hard_regno][mode]; + } + } + } +@@ -1589,20 +1585,15 @@ get_conflict_and_start_profitable_regs (ira_allocno_t a, bool retry_p, + for (i = 0; i < nwords; i++) + { + obj = ALLOCNO_OBJECT (a, i); +- COPY_HARD_REG_SET (conflict_regs[i], +- OBJECT_TOTAL_CONFLICT_HARD_REGS (obj)); ++ conflict_regs[i] = OBJECT_TOTAL_CONFLICT_HARD_REGS (obj); + } + if (retry_p) +- { +- COPY_HARD_REG_SET (*start_profitable_regs, +- reg_class_contents[ALLOCNO_CLASS (a)]); +- AND_COMPL_HARD_REG_SET (*start_profitable_regs, +- ira_prohibited_class_mode_regs +- [ALLOCNO_CLASS (a)][ALLOCNO_MODE (a)]); +- } ++ *start_profitable_regs ++ = (reg_class_contents[ALLOCNO_CLASS (a)] ++ &~ (ira_prohibited_class_mode_regs ++ [ALLOCNO_CLASS (a)][ALLOCNO_MODE (a)])); + else +- COPY_HARD_REG_SET (*start_profitable_regs, +- ALLOCNO_COLOR_DATA (a)->profitable_hard_regs); ++ *start_profitable_regs = ALLOCNO_COLOR_DATA (a)->profitable_hard_regs; + } + + /* Return true if HARD_REGNO is ok for assigning to allocno A with +@@ -1659,7 +1650,7 @@ calculate_saved_nregs (int hard_regno, machine_mode mode) + ira_assert (hard_regno >= 0); + for (i = hard_regno_nregs (hard_regno, mode) - 1; i >= 0; i--) + if (!allocated_hardreg_p[hard_regno + i] +- && !TEST_HARD_REG_BIT (call_used_reg_set, hard_regno + i) ++ && !crtl->abi->clobbers_full_reg_p (hard_regno + i) + && !LOCAL_REGNO (hard_regno + i)) + nregs++; + return nregs; +@@ -1803,9 +1794,8 @@ assign_hard_reg (ira_allocno_t a, bool retry_p) + hard_regno + num); + } + else +- IOR_HARD_REG_SET +- (conflicting_regs[word], +- ira_reg_mode_hard_regset[hard_regno][mode]); ++ conflicting_regs[word] ++ |= ira_reg_mode_hard_regset[hard_regno][mode]; + if (hard_reg_set_subset_p (profitable_hard_regs, + conflicting_regs[word])) + goto fail; +@@ -2698,8 +2688,7 @@ setup_allocno_available_regs_num (ira_allocno_t a) + reg_class_names[aclass], ira_class_hard_regs_num[aclass], n); + print_hard_reg_set (ira_dump_file, data->profitable_hard_regs, false); + fprintf (ira_dump_file, ", %snode: ", +- hard_reg_set_equal_p (data->profitable_hard_regs, +- data->hard_regs_node->hard_regs->set) ++ data->profitable_hard_regs == data->hard_regs_node->hard_regs->set + ? "" : "^"); + print_hard_reg_set (ira_dump_file, + data->hard_regs_node->hard_regs->set, false); +@@ -4387,11 +4376,10 @@ allocno_reload_assign (ira_allocno_t a, HARD_REG_SET forbidden_regs) + for (i = 0; i < n; i++) + { + ira_object_t obj = ALLOCNO_OBJECT (a, i); +- COPY_HARD_REG_SET (saved[i], OBJECT_TOTAL_CONFLICT_HARD_REGS (obj)); +- IOR_HARD_REG_SET (OBJECT_TOTAL_CONFLICT_HARD_REGS (obj), forbidden_regs); ++ saved[i] = OBJECT_TOTAL_CONFLICT_HARD_REGS (obj); ++ OBJECT_TOTAL_CONFLICT_HARD_REGS (obj) |= forbidden_regs; + if (! flag_caller_saves && ALLOCNO_CALLS_CROSSED_NUM (a) != 0) +- IOR_HARD_REG_SET (OBJECT_TOTAL_CONFLICT_HARD_REGS (obj), +- call_used_reg_set); ++ OBJECT_TOTAL_CONFLICT_HARD_REGS (obj) |= ira_need_caller_save_regs (a); + } + ALLOCNO_ASSIGNED_P (a) = false; + aclass = ALLOCNO_CLASS (a); +@@ -4410,9 +4398,7 @@ allocno_reload_assign (ira_allocno_t a, HARD_REG_SET forbidden_regs) + ? ALLOCNO_CLASS_COST (a) + : ALLOCNO_HARD_REG_COSTS (a)[ira_class_hard_reg_index + [aclass][hard_regno]])); +- if (ALLOCNO_CALLS_CROSSED_NUM (a) != 0 +- && ira_hard_reg_set_intersection_p (hard_regno, ALLOCNO_MODE (a), +- call_used_reg_set)) ++ if (ira_need_caller_save_p (a, regno)) + { + ira_assert (flag_caller_saves); + caller_save_needed = 1; +@@ -4434,7 +4420,7 @@ allocno_reload_assign (ira_allocno_t a, HARD_REG_SET forbidden_regs) + for (i = 0; i < n; i++) + { + ira_object_t obj = ALLOCNO_OBJECT (a, i); +- COPY_HARD_REG_SET (OBJECT_TOTAL_CONFLICT_HARD_REGS (obj), saved[i]); ++ OBJECT_TOTAL_CONFLICT_HARD_REGS (obj) = saved[i]; + } + return reg_renumber[regno] >= 0; + } +@@ -4519,9 +4505,9 @@ ira_reassign_pseudos (int *spilled_pseudo_regs, int num, + for (i = 0; i < num; i++) + { + regno = spilled_pseudo_regs[i]; +- COPY_HARD_REG_SET (forbidden_regs, bad_spill_regs); +- IOR_HARD_REG_SET (forbidden_regs, pseudo_forbidden_regs[regno]); +- IOR_HARD_REG_SET (forbidden_regs, pseudo_previous_regs[regno]); ++ forbidden_regs = (bad_spill_regs ++ | pseudo_forbidden_regs[regno] ++ | pseudo_previous_regs[regno]); + gcc_assert (reg_renumber[regno] < 0); + a = ira_regno_allocno_map[regno]; + ira_mark_allocation_change (regno); +@@ -4699,16 +4685,16 @@ ira_mark_new_stack_slot (rtx x, int regno, poly_uint64 total_size) + given IN and OUT for INSN. Return also number points (through + EXCESS_PRESSURE_LIVE_LENGTH) where the pseudo-register lives and + the register pressure is high, number of references of the +- pseudo-registers (through NREFS), number of callee-clobbered +- hard-registers occupied by the pseudo-registers (through +- CALL_USED_COUNT), and the first hard regno occupied by the ++ pseudo-registers (through NREFS), the number of psuedo registers ++ whose allocated register wouldn't need saving in the prologue ++ (through CALL_USED_COUNT), and the first hard regno occupied by the + pseudo-registers (through FIRST_HARD_REGNO). */ + static int + calculate_spill_cost (int *regnos, rtx in, rtx out, rtx_insn *insn, + int *excess_pressure_live_length, + int *nrefs, int *call_used_count, int *first_hard_regno) + { +- int i, cost, regno, hard_regno, j, count, saved_cost, nregs; ++ int i, cost, regno, hard_regno, count, saved_cost; + bool in_p, out_p; + int length; + ira_allocno_t a; +@@ -4725,11 +4711,8 @@ calculate_spill_cost (int *regnos, rtx in, rtx out, rtx_insn *insn, + a = ira_regno_allocno_map[regno]; + length += ALLOCNO_EXCESS_PRESSURE_POINTS_NUM (a) / ALLOCNO_NUM_OBJECTS (a); + cost += ALLOCNO_MEMORY_COST (a) - ALLOCNO_CLASS_COST (a); +- nregs = hard_regno_nregs (hard_regno, ALLOCNO_MODE (a)); +- for (j = 0; j < nregs; j++) +- if (! TEST_HARD_REG_BIT (call_used_reg_set, hard_regno + j)) +- break; +- if (j == nregs) ++ if (in_hard_reg_set_p (crtl->abi->full_reg_clobbers (), ++ ALLOCNO_MODE (a), hard_regno)) + count++; + in_p = in && REG_P (in) && (int) REGNO (in) == hard_regno; + out_p = out && REG_P (out) && (int) REGNO (out) == hard_regno; +@@ -4886,11 +4869,10 @@ fast_allocation (void) + for (l = 0; l < nr; l++) + { + ira_object_t obj = ALLOCNO_OBJECT (a, l); +- IOR_HARD_REG_SET (conflict_hard_regs, +- OBJECT_CONFLICT_HARD_REGS (obj)); ++ conflict_hard_regs |= OBJECT_CONFLICT_HARD_REGS (obj); + for (r = OBJECT_LIVE_RANGES (obj); r != NULL; r = r->next) + for (j = r->start; j <= r->finish; j++) +- IOR_HARD_REG_SET (conflict_hard_regs, used_hard_regs[j]); ++ conflict_hard_regs |= used_hard_regs[j]; + } + aclass = ALLOCNO_CLASS (a); + ALLOCNO_ASSIGNED_P (a) = true; +@@ -4938,8 +4920,7 @@ fast_allocation (void) + ira_object_t obj = ALLOCNO_OBJECT (a, l); + for (r = OBJECT_LIVE_RANGES (obj); r != NULL; r = r->next) + for (k = r->start; k <= r->finish; k++) +- IOR_HARD_REG_SET (used_hard_regs[k], +- ira_reg_mode_hard_regset[hard_regno][mode]); ++ used_hard_regs[k] |= ira_reg_mode_hard_regset[hard_regno][mode]; + } + } + ira_free (sorted_allocnos); +diff --git a/gcc/ira-conflicts.c b/gcc/ira-conflicts.c +index 9a3e3811d..a0aefaa05 100644 +--- a/gcc/ira-conflicts.c ++++ b/gcc/ira-conflicts.c +@@ -325,12 +325,37 @@ process_regs_for_copy (rtx reg1, rtx reg2, bool constraint_p, + return true; + } + +-/* Process all of the output registers of the current insn which are +- not bound (BOUND_P) and the input register REG (its operand number ++/* Return true if output operand OUTPUT and input operand INPUT of ++ INSN can use the same register class for at least one alternative. ++ INSN is already described in recog_data and recog_op_alt. */ ++static bool ++can_use_same_reg_p (rtx_insn *insn, int output, int input) ++{ ++ alternative_mask preferred = get_preferred_alternatives (insn); ++ for (int nalt = 0; nalt < recog_data.n_alternatives; nalt++) ++ { ++ if (!TEST_BIT (preferred, nalt)) ++ continue; ++ ++ const operand_alternative *op_alt ++ = &recog_op_alt[nalt * recog_data.n_operands]; ++ if (op_alt[input].matches == output) ++ return true; ++ ++ if (ira_reg_class_intersect[op_alt[input].cl][op_alt[output].cl] ++ != NO_REGS) ++ return true; ++ } ++ return false; ++} ++ ++/* Process all of the output registers of the current insn (INSN) which ++ are not bound (BOUND_P) and the input register REG (its operand number + OP_NUM) which dies in the insn as if there were a move insn between + them with frequency FREQ. */ + static void +-process_reg_shuffles (rtx reg, int op_num, int freq, bool *bound_p) ++process_reg_shuffles (rtx_insn *insn, rtx reg, int op_num, int freq, ++ bool *bound_p) + { + int i; + rtx another_reg; +@@ -342,7 +367,13 @@ process_reg_shuffles (rtx reg, int op_num, int freq, bool *bound_p) + + if (!REG_SUBREG_P (another_reg) || op_num == i + || recog_data.operand_type[i] != OP_OUT +- || bound_p[i]) ++ || bound_p[i] ++ || (!can_use_same_reg_p (insn, i, op_num) ++ && (recog_data.constraints[op_num][0] != '%' ++ || !can_use_same_reg_p (insn, i, op_num + 1)) ++ && (op_num == 0 ++ || recog_data.constraints[op_num - 1][0] != '%' ++ || !can_use_same_reg_p (insn, i, op_num - 1)))) + continue; + + process_regs_for_copy (reg, another_reg, false, NULL, freq); +@@ -358,7 +389,7 @@ add_insn_allocno_copies (rtx_insn *insn) + rtx set, operand, dup; + bool bound_p[MAX_RECOG_OPERANDS]; + int i, n, freq; +- HARD_REG_SET alts; ++ alternative_mask alts; + + freq = REG_FREQ_FROM_BB (BLOCK_FOR_INSN (insn)); + if (freq == 0) +@@ -379,7 +410,7 @@ add_insn_allocno_copies (rtx_insn *insn) + there are no dead registers, there will be no such copies. */ + if (! find_reg_note (insn, REG_DEAD, NULL_RTX)) + return; +- ira_setup_alts (insn, alts); ++ alts = ira_setup_alts (insn); + for (i = 0; i < recog_data.n_operands; i++) + bound_p[i] = false; + for (i = 0; i < recog_data.n_operands; i++) +@@ -412,7 +443,8 @@ add_insn_allocno_copies (rtx_insn *insn) + the corresponding allocno copies. The cost will not + correspond to a real move insn cost, so make the frequency + smaller. */ +- process_reg_shuffles (operand, i, freq < 8 ? 1 : freq / 8, bound_p); ++ process_reg_shuffles (insn, operand, i, freq < 8 ? 1 : freq / 8, ++ bound_p); + } + } + +@@ -660,17 +692,15 @@ print_allocno_conflicts (FILE * file, bool reg_p, ira_allocno_t a) + putc (')', file); + } + } +- COPY_HARD_REG_SET (conflicting_hard_regs, OBJECT_TOTAL_CONFLICT_HARD_REGS (obj)); +- AND_COMPL_HARD_REG_SET (conflicting_hard_regs, ira_no_alloc_regs); +- AND_HARD_REG_SET (conflicting_hard_regs, +- reg_class_contents[ALLOCNO_CLASS (a)]); ++ conflicting_hard_regs = (OBJECT_TOTAL_CONFLICT_HARD_REGS (obj) ++ & ~ira_no_alloc_regs ++ & reg_class_contents[ALLOCNO_CLASS (a)]); + print_hard_reg_set (file, "\n;; total conflict hard regs:", + conflicting_hard_regs); + +- COPY_HARD_REG_SET (conflicting_hard_regs, OBJECT_CONFLICT_HARD_REGS (obj)); +- AND_COMPL_HARD_REG_SET (conflicting_hard_regs, ira_no_alloc_regs); +- AND_HARD_REG_SET (conflicting_hard_regs, +- reg_class_contents[ALLOCNO_CLASS (a)]); ++ conflicting_hard_regs = (OBJECT_CONFLICT_HARD_REGS (obj) ++ & ~ira_no_alloc_regs ++ & reg_class_contents[ALLOCNO_CLASS (a)]); + print_hard_reg_set (file, ";; conflict hard regs:", + conflicting_hard_regs); + putc ('\n', file); +@@ -740,11 +770,7 @@ ira_build_conflicts (void) + if (! targetm.class_likely_spilled_p (base)) + CLEAR_HARD_REG_SET (temp_hard_reg_set); + else +- { +- COPY_HARD_REG_SET (temp_hard_reg_set, reg_class_contents[base]); +- AND_COMPL_HARD_REG_SET (temp_hard_reg_set, ira_no_alloc_regs); +- AND_HARD_REG_SET (temp_hard_reg_set, call_used_reg_set); +- } ++ temp_hard_reg_set = reg_class_contents[base] & ~ira_no_alloc_regs; + FOR_EACH_ALLOCNO (a, ai) + { + int i, n = ALLOCNO_NUM_OBJECTS (a); +@@ -752,33 +778,28 @@ ira_build_conflicts (void) + for (i = 0; i < n; i++) + { + ira_object_t obj = ALLOCNO_OBJECT (a, i); +- machine_mode obj_mode = obj->allocno->mode; + rtx allocno_reg = regno_reg_rtx [ALLOCNO_REGNO (a)]; + +- if ((! flag_caller_saves && ALLOCNO_CALLS_CROSSED_NUM (a) != 0) +- /* For debugging purposes don't put user defined variables in +- callee-clobbered registers. However, do allow parameters +- in callee-clobbered registers to improve debugging. This +- is a bit of a fragile hack. */ +- || (optimize == 0 +- && REG_USERVAR_P (allocno_reg) +- && ! reg_is_parm_p (allocno_reg))) ++ /* For debugging purposes don't put user defined variables in ++ callee-clobbered registers. However, do allow parameters ++ in callee-clobbered registers to improve debugging. This ++ is a bit of a fragile hack. */ ++ if (optimize == 0 ++ && REG_USERVAR_P (allocno_reg) ++ && ! reg_is_parm_p (allocno_reg)) + { +- IOR_HARD_REG_SET (OBJECT_TOTAL_CONFLICT_HARD_REGS (obj), +- call_used_reg_set); +- IOR_HARD_REG_SET (OBJECT_CONFLICT_HARD_REGS (obj), +- call_used_reg_set); ++ HARD_REG_SET new_conflict_regs = crtl->abi->full_reg_clobbers (); ++ OBJECT_TOTAL_CONFLICT_HARD_REGS (obj) |= new_conflict_regs; ++ OBJECT_CONFLICT_HARD_REGS (obj) |= new_conflict_regs; + } +- else if (ALLOCNO_CALLS_CROSSED_NUM (a) != 0) ++ ++ if (ALLOCNO_CALLS_CROSSED_NUM (a) != 0) + { +- IOR_HARD_REG_SET (OBJECT_TOTAL_CONFLICT_HARD_REGS (obj), +- no_caller_save_reg_set); +- IOR_HARD_REG_SET (OBJECT_TOTAL_CONFLICT_HARD_REGS (obj), +- temp_hard_reg_set); +- IOR_HARD_REG_SET (OBJECT_CONFLICT_HARD_REGS (obj), +- no_caller_save_reg_set); +- IOR_HARD_REG_SET (OBJECT_CONFLICT_HARD_REGS (obj), +- temp_hard_reg_set); ++ HARD_REG_SET new_conflict_regs = ira_need_caller_save_regs (a); ++ if (flag_caller_saves) ++ new_conflict_regs &= (~savable_regs | temp_hard_reg_set); ++ OBJECT_TOTAL_CONFLICT_HARD_REGS (obj) |= new_conflict_regs; ++ OBJECT_CONFLICT_HARD_REGS (obj) |= new_conflict_regs; + } + + /* Now we deal with paradoxical subreg cases where certain registers +@@ -805,23 +826,6 @@ ira_build_conflicts (void) + } + } + } +- +- if (ALLOCNO_CALLS_CROSSED_NUM (a) != 0) +- { +- int regno; +- +- /* Allocnos bigger than the saved part of call saved +- regs must conflict with them. */ +- for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) +- if (!TEST_HARD_REG_BIT (call_used_reg_set, regno) +- && targetm.hard_regno_call_part_clobbered (NULL, regno, +- obj_mode)) +- { +- SET_HARD_REG_BIT (OBJECT_CONFLICT_HARD_REGS (obj), regno); +- SET_HARD_REG_BIT (OBJECT_TOTAL_CONFLICT_HARD_REGS (obj), +- regno); +- } +- } + } + } + if (optimize && ira_conflicts_p +diff --git a/gcc/ira-costs.c b/gcc/ira-costs.c +index c7feaba37..baf7261dd 100644 +--- a/gcc/ira-costs.c ++++ b/gcc/ira-costs.c +@@ -237,7 +237,7 @@ setup_cost_classes (cost_classes_t from) + allocated. */ + static cost_classes_t + restrict_cost_classes (cost_classes_t full, machine_mode mode, +- const HARD_REG_SET ®s) ++ const_hard_reg_set regs) + { + static struct cost_classes narrow; + int map[N_REG_CLASSES]; +@@ -254,12 +254,9 @@ restrict_cost_classes (cost_classes_t full, machine_mode mode, + + /* Calculate the set of registers in CL that belong to REGS and + are valid for MODE. */ +- HARD_REG_SET valid_for_cl; +- COPY_HARD_REG_SET (valid_for_cl, reg_class_contents[cl]); +- AND_HARD_REG_SET (valid_for_cl, regs); +- AND_COMPL_HARD_REG_SET (valid_for_cl, +- ira_prohibited_class_mode_regs[cl][mode]); +- AND_COMPL_HARD_REG_SET (valid_for_cl, ira_no_alloc_regs); ++ HARD_REG_SET valid_for_cl = reg_class_contents[cl] & regs; ++ valid_for_cl &= ~(ira_prohibited_class_mode_regs[cl][mode] ++ | ira_no_alloc_regs); + if (hard_reg_set_empty_p (valid_for_cl)) + continue; + +@@ -343,8 +340,7 @@ setup_regno_cost_classes_by_aclass (int regno, enum reg_class aclass) + + if ((classes_ptr = cost_classes_aclass_cache[aclass]) == NULL) + { +- COPY_HARD_REG_SET (temp, reg_class_contents[aclass]); +- AND_COMPL_HARD_REG_SET (temp, ira_no_alloc_regs); ++ temp = reg_class_contents[aclass] & ~ira_no_alloc_regs; + /* We exclude classes from consideration which are subsets of + ACLASS only if ACLASS is an uniform class. */ + exclude_p = ira_uniform_class_p[aclass]; +@@ -356,8 +352,7 @@ setup_regno_cost_classes_by_aclass (int regno, enum reg_class aclass) + { + /* Exclude non-uniform classes which are subsets of + ACLASS. */ +- COPY_HARD_REG_SET (temp2, reg_class_contents[cl]); +- AND_COMPL_HARD_REG_SET (temp2, ira_no_alloc_regs); ++ temp2 = reg_class_contents[cl] & ~ira_no_alloc_regs; + if (hard_reg_set_subset_p (temp2, temp) && cl != aclass) + continue; + } +@@ -1482,13 +1477,6 @@ scan_one_insn (rtx_insn *insn) + return insn; + } + +- if (pat_code == CLOBBER_HIGH) +- { +- gcc_assert (REG_P (XEXP (PATTERN (insn), 0)) +- && HARD_REGISTER_P (XEXP (PATTERN (insn), 0))); +- return insn; +- } +- + counted_mem = false; + set = single_set (insn); + extract_insn (insn); +@@ -2345,7 +2333,6 @@ ira_tune_allocno_costs (void) + ira_allocno_object_iterator oi; + ira_object_t obj; + bool skip_p; +- HARD_REG_SET *crossed_calls_clobber_regs; + + FOR_EACH_ALLOCNO (a, ai) + { +@@ -2380,14 +2367,7 @@ ira_tune_allocno_costs (void) + continue; + rclass = REGNO_REG_CLASS (regno); + cost = 0; +- crossed_calls_clobber_regs +- = &(ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (a)); +- if (ira_hard_reg_set_intersection_p (regno, mode, +- *crossed_calls_clobber_regs) +- && (ira_hard_reg_set_intersection_p (regno, mode, +- call_used_reg_set) +- || targetm.hard_regno_call_part_clobbered (NULL, regno, +- mode))) ++ if (ira_need_caller_save_p (a, regno)) + cost += (ALLOCNO_CALL_FREQ (a) + * (ira_memory_move_cost[mode][rclass][0] + + ira_memory_move_cost[mode][rclass][1])); +diff --git a/gcc/ira-emit.c b/gcc/ira-emit.c +index 51bf9c8bc..f44a0d199 100644 +--- a/gcc/ira-emit.c ++++ b/gcc/ira-emit.c +@@ -1115,8 +1115,8 @@ add_range_and_copies_from_move_list (move_t list, ira_loop_tree_node_t node, + ira_allocate_object_conflicts (to_obj, n); + } + } +- ior_hard_reg_conflicts (from, &hard_regs_live); +- ior_hard_reg_conflicts (to, &hard_regs_live); ++ ior_hard_reg_conflicts (from, hard_regs_live); ++ ior_hard_reg_conflicts (to, hard_regs_live); + + update_costs (from, true, freq); + update_costs (to, false, freq); +diff --git a/gcc/ira-int.h b/gcc/ira-int.h +index 3c7fe4e64..a2529ff81 100644 +--- a/gcc/ira-int.h ++++ b/gcc/ira-int.h +@@ -22,6 +22,7 @@ along with GCC; see the file COPYING3. If not see + #define GCC_IRA_INT_H + + #include "recog.h" ++#include "function-abi.h" + + /* To provide consistency in naming, all IRA external variables, + functions, common typedefs start with prefix ira_. */ +@@ -287,6 +288,9 @@ struct ira_allocno + /* Register class which should be used for allocation for given + allocno. NO_REGS means that we should use memory. */ + ENUM_BITFIELD (reg_class) aclass : 16; ++ /* A bitmask of the ABIs used by calls that occur while the allocno ++ is live. */ ++ unsigned int crossed_calls_abis : NUM_ABI_IDS; + /* During the reload, value TRUE means that we should not reassign a + hard register to the allocno got memory earlier. It is set up + when we removed memory-memory move insn before each iteration of +@@ -423,6 +427,7 @@ struct ira_allocno + #define ALLOCNO_CALL_FREQ(A) ((A)->call_freq) + #define ALLOCNO_CALLS_CROSSED_NUM(A) ((A)->calls_crossed_num) + #define ALLOCNO_CHEAP_CALLS_CROSSED_NUM(A) ((A)->cheap_calls_crossed_num) ++#define ALLOCNO_CROSSED_CALLS_ABIS(A) ((A)->crossed_calls_abis) + #define ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS(A) \ + ((A)->crossed_calls_clobbered_regs) + #define ALLOCNO_MEM_OPTIMIZED_DEST(A) ((A)->mem_optimized_dest) +@@ -963,8 +968,8 @@ extern void ira_print_disposition (FILE *); + extern void ira_debug_disposition (void); + extern void ira_debug_allocno_classes (void); + extern void ira_init_register_move_cost (machine_mode); +-extern void ira_setup_alts (rtx_insn *insn, HARD_REG_SET &alts); +-extern int ira_get_dup_out_num (int op_num, HARD_REG_SET &alts); ++extern alternative_mask ira_setup_alts (rtx_insn *); ++extern int ira_get_dup_out_num (int, alternative_mask); + + /* ira-build.c */ + +@@ -996,7 +1001,7 @@ extern void ira_set_allocno_class (ira_allocno_t, enum reg_class); + extern bool ira_conflict_vector_profitable_p (ira_object_t, int); + extern void ira_allocate_conflict_vec (ira_object_t, int); + extern void ira_allocate_object_conflicts (ira_object_t, int); +-extern void ior_hard_reg_conflicts (ira_allocno_t, HARD_REG_SET *); ++extern void ior_hard_reg_conflicts (ira_allocno_t, const_hard_reg_set); + extern void ira_print_expanded_allocno (ira_allocno_t); + extern void ira_add_live_range_to_object (ira_object_t, int, int); + extern live_range_t ira_create_live_range (ira_object_t, int, int, +@@ -1508,4 +1513,28 @@ ira_allocate_and_set_or_copy_costs (int **vec, enum reg_class aclass, + extern rtx ira_create_new_reg (rtx); + extern int first_moveable_pseudo, last_moveable_pseudo; + ++/* Return the set of registers that would need a caller save if allocno A ++ overlapped them. */ ++ ++inline HARD_REG_SET ++ira_need_caller_save_regs (ira_allocno_t a) ++{ ++ return call_clobbers_in_region (ALLOCNO_CROSSED_CALLS_ABIS (a), ++ ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (a), ++ ALLOCNO_MODE (a)); ++} ++ ++/* Return true if we would need to save allocno A around a call if we ++ assigned hard register REGNO. */ ++ ++inline bool ++ira_need_caller_save_p (ira_allocno_t a, unsigned int regno) ++{ ++ if (ALLOCNO_CALLS_CROSSED_NUM (a) == 0) ++ return false; ++ return call_clobbered_in_region_p (ALLOCNO_CROSSED_CALLS_ABIS (a), ++ ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (a), ++ ALLOCNO_MODE (a), regno); ++} ++ + #endif /* GCC_IRA_INT_H */ +diff --git a/gcc/ira-lives.c b/gcc/ira-lives.c +index faadf08b0..b933dff16 100644 +--- a/gcc/ira-lives.c ++++ b/gcc/ira-lives.c +@@ -33,6 +33,7 @@ along with GCC; see the file COPYING3. If not see + #include "ira.h" + #include "ira-int.h" + #include "sparseset.h" ++#include "function-abi.h" + + /* The code in this file is similar to one in global but the code + works on the allocno basis and creates live ranges instead of +@@ -80,8 +81,9 @@ static int last_call_num; + /* The number of last call at which given allocno was saved. */ + static int *allocno_saved_at_call; + +-/* The value of get_preferred_alternatives for the current instruction, +- supplemental to recog_data. */ ++/* The value returned by ira_setup_alts for the current instruction; ++ i.e. the set of alternatives that we should consider to be likely ++ candidates during reloading. */ + static alternative_mask preferred_alternatives; + + /* If non-NULL, the source operand of a register to register copy for which +@@ -187,8 +189,8 @@ make_object_dead (ira_object_t obj) + } + } + +- IOR_HARD_REG_SET (OBJECT_CONFLICT_HARD_REGS (obj), hard_regs_live); +- IOR_HARD_REG_SET (OBJECT_TOTAL_CONFLICT_HARD_REGS (obj), hard_regs_live); ++ OBJECT_CONFLICT_HARD_REGS (obj) |= hard_regs_live; ++ OBJECT_TOTAL_CONFLICT_HARD_REGS (obj) |= hard_regs_live; + + /* If IGNORE_REG_FOR_CONFLICTS did not already conflict with OBJ, make + sure it still doesn't. */ +@@ -989,10 +991,8 @@ process_single_reg_class_operands (bool in_p, int freq) + /* We could increase costs of A instead of making it + conflicting with the hard register. But it works worse + because it will be spilled in reload in anyway. */ +- IOR_HARD_REG_SET (OBJECT_CONFLICT_HARD_REGS (obj), +- reg_class_contents[cl]); +- IOR_HARD_REG_SET (OBJECT_TOTAL_CONFLICT_HARD_REGS (obj), +- reg_class_contents[cl]); ++ OBJECT_CONFLICT_HARD_REGS (obj) |= reg_class_contents[cl]; ++ OBJECT_TOTAL_CONFLICT_HARD_REGS (obj) |= reg_class_contents[cl]; + } + } + } +@@ -1130,8 +1130,7 @@ process_bb_node_lives (ira_loop_tree_node_t loop_tree_node) + reg_live_out = df_get_live_out (bb); + sparseset_clear (objects_live); + REG_SET_TO_HARD_REG_SET (hard_regs_live, reg_live_out); +- AND_COMPL_HARD_REG_SET (hard_regs_live, eliminable_regset); +- AND_COMPL_HARD_REG_SET (hard_regs_live, ira_no_alloc_regs); ++ hard_regs_live &= ~(eliminable_regset | ira_no_alloc_regs); + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) + if (TEST_HARD_REG_BIT (hard_regs_live, i)) + { +@@ -1236,9 +1235,7 @@ process_bb_node_lives (ira_loop_tree_node_t loop_tree_node) + } + } + +- extract_insn (insn); +- preferred_alternatives = get_preferred_alternatives (insn); +- preprocess_constraints (insn); ++ preferred_alternatives = ira_setup_alts (insn); + process_single_reg_class_operands (false, freq); + + /* See which defined values die here. */ +@@ -1263,10 +1260,7 @@ process_bb_node_lives (ira_loop_tree_node_t loop_tree_node) + ira_object_t obj = ira_object_id_map[i]; + a = OBJECT_ALLOCNO (obj); + int num = ALLOCNO_NUM (a); +- HARD_REG_SET this_call_used_reg_set; +- +- get_call_reg_set_usage (insn, &this_call_used_reg_set, +- call_used_reg_set); ++ function_abi callee_abi = insn_callee_abi (insn); + + /* Don't allocate allocnos that cross setjmps or any + call, if this function receives a nonlocal +@@ -1281,10 +1275,10 @@ process_bb_node_lives (ira_loop_tree_node_t loop_tree_node) + } + if (can_throw_internal (insn)) + { +- IOR_HARD_REG_SET (OBJECT_CONFLICT_HARD_REGS (obj), +- this_call_used_reg_set); +- IOR_HARD_REG_SET (OBJECT_TOTAL_CONFLICT_HARD_REGS (obj), +- this_call_used_reg_set); ++ OBJECT_CONFLICT_HARD_REGS (obj) ++ |= callee_abi.mode_clobbers (ALLOCNO_MODE (a)); ++ OBJECT_TOTAL_CONFLICT_HARD_REGS (obj) ++ |= callee_abi.mode_clobbers (ALLOCNO_MODE (a)); + } + + if (sparseset_bit_p (allocnos_processed, num)) +@@ -1301,8 +1295,9 @@ process_bb_node_lives (ira_loop_tree_node_t loop_tree_node) + /* Mark it as saved at the next call. */ + allocno_saved_at_call[num] = last_call_num + 1; + ALLOCNO_CALLS_CROSSED_NUM (a)++; +- IOR_HARD_REG_SET (ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (a), +- this_call_used_reg_set); ++ ALLOCNO_CROSSED_CALLS_ABIS (a) |= 1 << callee_abi.id (); ++ ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (a) ++ |= callee_abi.full_and_partial_reg_clobbers (); + if (cheap_reg != NULL_RTX + && ALLOCNO_REGNO (a) == (int) REGNO (cheap_reg)) + ALLOCNO_CHEAP_CALLS_CROSSED_NUM (a)++; +@@ -1355,10 +1350,11 @@ process_bb_node_lives (ira_loop_tree_node_t loop_tree_node) + } + + /* Allocnos can't go in stack regs at the start of a basic block +- that is reached by an abnormal edge. Likewise for call +- clobbered regs, because caller-save, fixup_abnormal_edges and +- possibly the table driven EH machinery are not quite ready to +- handle such allocnos live across such edges. */ ++ that is reached by an abnormal edge. Likewise for registers ++ that are at least partly call clobbered, because caller-save, ++ fixup_abnormal_edges and possibly the table driven EH machinery ++ are not quite ready to handle such allocnos live across such ++ edges. */ + if (bb_has_abnormal_pred (bb)) + { + #ifdef STACK_REGS +@@ -1378,7 +1374,7 @@ process_bb_node_lives (ira_loop_tree_node_t loop_tree_node) + if (!cfun->has_nonlocal_label + && has_abnormal_call_or_eh_pred_edge_p (bb)) + for (px = 0; px < FIRST_PSEUDO_REGISTER; px++) +- if (call_used_regs[px] ++ if (eh_edge_abi.clobbers_at_least_part_of_reg_p (px) + #ifdef REAL_PIC_OFFSET_TABLE_REGNUM + /* We should create a conflict of PIC pseudo with + PIC hard reg as PIC hard reg can have a wrong +diff --git a/gcc/ira.c b/gcc/ira.c +index 4262e5cf3..a985dddaf 100644 +--- a/gcc/ira.c ++++ b/gcc/ira.c +@@ -471,8 +471,7 @@ setup_class_hard_regs (void) + ira_assert (SHRT_MAX >= FIRST_PSEUDO_REGISTER); + for (cl = (int) N_REG_CLASSES - 1; cl >= 0; cl--) + { +- COPY_HARD_REG_SET (temp_hard_regset, reg_class_contents[cl]); +- AND_COMPL_HARD_REG_SET (temp_hard_regset, no_unit_alloc_regs); ++ temp_hard_regset = reg_class_contents[cl] & ~no_unit_alloc_regs; + CLEAR_HARD_REG_SET (processed_hard_reg_set); + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) + { +@@ -514,7 +513,7 @@ setup_alloc_regs (bool use_hard_frame_p) + #ifdef ADJUST_REG_ALLOC_ORDER + ADJUST_REG_ALLOC_ORDER; + #endif +- COPY_HARD_REG_SET (no_unit_alloc_regs, fixed_nonglobal_reg_set); ++ no_unit_alloc_regs = fixed_nonglobal_reg_set; + if (! use_hard_frame_p) + SET_HARD_REG_BIT (no_unit_alloc_regs, HARD_FRAME_POINTER_REGNUM); + setup_class_hard_regs (); +@@ -541,8 +540,7 @@ setup_reg_subclasses (void) + if (i == (int) NO_REGS) + continue; + +- COPY_HARD_REG_SET (temp_hard_regset, reg_class_contents[i]); +- AND_COMPL_HARD_REG_SET (temp_hard_regset, no_unit_alloc_regs); ++ temp_hard_regset = reg_class_contents[i] & ~no_unit_alloc_regs; + if (hard_reg_set_empty_p (temp_hard_regset)) + continue; + for (j = 0; j < N_REG_CLASSES; j++) +@@ -550,8 +548,7 @@ setup_reg_subclasses (void) + { + enum reg_class *p; + +- COPY_HARD_REG_SET (temp_hard_regset2, reg_class_contents[j]); +- AND_COMPL_HARD_REG_SET (temp_hard_regset2, no_unit_alloc_regs); ++ temp_hard_regset2 = reg_class_contents[j] & ~no_unit_alloc_regs; + if (! hard_reg_set_subset_p (temp_hard_regset, + temp_hard_regset2)) + continue; +@@ -605,10 +602,8 @@ setup_class_subset_and_memory_move_costs (void) + for (cl = (int) N_REG_CLASSES - 1; cl >= 0; cl--) + for (cl2 = (int) N_REG_CLASSES - 1; cl2 >= 0; cl2--) + { +- COPY_HARD_REG_SET (temp_hard_regset, reg_class_contents[cl]); +- AND_COMPL_HARD_REG_SET (temp_hard_regset, no_unit_alloc_regs); +- COPY_HARD_REG_SET (temp_hard_regset2, reg_class_contents[cl2]); +- AND_COMPL_HARD_REG_SET (temp_hard_regset2, no_unit_alloc_regs); ++ temp_hard_regset = reg_class_contents[cl] & ~no_unit_alloc_regs; ++ temp_hard_regset2 = reg_class_contents[cl2] & ~no_unit_alloc_regs; + ira_class_subset_p[cl][cl2] + = hard_reg_set_subset_p (temp_hard_regset, temp_hard_regset2); + if (! hard_reg_set_empty_p (temp_hard_regset2) +@@ -757,8 +752,7 @@ setup_stack_reg_pressure_class (void) + for (i = 0; i < ira_pressure_classes_num; i++) + { + cl = ira_pressure_classes[i]; +- COPY_HARD_REG_SET (temp_hard_regset2, temp_hard_regset); +- AND_HARD_REG_SET (temp_hard_regset2, reg_class_contents[cl]); ++ temp_hard_regset2 = temp_hard_regset & reg_class_contents[cl]; + size = hard_reg_set_size (temp_hard_regset2); + if (best < size) + { +@@ -816,10 +810,10 @@ setup_pressure_classes (void) + register pressure class. */ + for (m = 0; m < NUM_MACHINE_MODES; m++) + { +- COPY_HARD_REG_SET (temp_hard_regset, reg_class_contents[cl]); +- AND_COMPL_HARD_REG_SET (temp_hard_regset, no_unit_alloc_regs); +- AND_COMPL_HARD_REG_SET (temp_hard_regset, +- ira_prohibited_class_mode_regs[cl][m]); ++ temp_hard_regset ++ = (reg_class_contents[cl] ++ & ~(no_unit_alloc_regs ++ | ira_prohibited_class_mode_regs[cl][m])); + if (hard_reg_set_empty_p (temp_hard_regset)) + continue; + ira_init_register_move_cost_if_necessary ((machine_mode) m); +@@ -833,8 +827,7 @@ setup_pressure_classes (void) + } + curr = 0; + insert_p = true; +- COPY_HARD_REG_SET (temp_hard_regset, reg_class_contents[cl]); +- AND_COMPL_HARD_REG_SET (temp_hard_regset, no_unit_alloc_regs); ++ temp_hard_regset = reg_class_contents[cl] & ~no_unit_alloc_regs; + /* Remove so far added pressure classes which are subset of the + current candidate class. Prefer GENERAL_REGS as a pressure + register class to another class containing the same +@@ -845,11 +838,10 @@ setup_pressure_classes (void) + for (i = 0; i < n; i++) + { + cl2 = pressure_classes[i]; +- COPY_HARD_REG_SET (temp_hard_regset2, reg_class_contents[cl2]); +- AND_COMPL_HARD_REG_SET (temp_hard_regset2, no_unit_alloc_regs); ++ temp_hard_regset2 = (reg_class_contents[cl2] ++ & ~no_unit_alloc_regs); + if (hard_reg_set_subset_p (temp_hard_regset, temp_hard_regset2) +- && (! hard_reg_set_equal_p (temp_hard_regset, +- temp_hard_regset2) ++ && (temp_hard_regset != temp_hard_regset2 + || cl2 == (int) GENERAL_REGS)) + { + pressure_classes[curr++] = (enum reg_class) cl2; +@@ -857,11 +849,10 @@ setup_pressure_classes (void) + continue; + } + if (hard_reg_set_subset_p (temp_hard_regset2, temp_hard_regset) +- && (! hard_reg_set_equal_p (temp_hard_regset2, +- temp_hard_regset) ++ && (temp_hard_regset2 != temp_hard_regset + || cl == (int) GENERAL_REGS)) + continue; +- if (hard_reg_set_equal_p (temp_hard_regset2, temp_hard_regset)) ++ if (temp_hard_regset2 == temp_hard_regset) + insert_p = false; + pressure_classes[curr++] = (enum reg_class) cl2; + } +@@ -882,7 +873,7 @@ setup_pressure_classes (void) + registers available for the allocation. */ + CLEAR_HARD_REG_SET (temp_hard_regset); + CLEAR_HARD_REG_SET (temp_hard_regset2); +- COPY_HARD_REG_SET (ignore_hard_regs, no_unit_alloc_regs); ++ ignore_hard_regs = no_unit_alloc_regs; + for (cl = 0; cl < LIM_REG_CLASSES; cl++) + { + /* For some targets (like MIPS with MD_REGS), there are some +@@ -893,23 +884,23 @@ setup_pressure_classes (void) + break; + if (m >= NUM_MACHINE_MODES) + { +- IOR_HARD_REG_SET (ignore_hard_regs, reg_class_contents[cl]); ++ ignore_hard_regs |= reg_class_contents[cl]; + continue; + } + for (i = 0; i < n; i++) + if ((int) pressure_classes[i] == cl) + break; +- IOR_HARD_REG_SET (temp_hard_regset2, reg_class_contents[cl]); ++ temp_hard_regset2 |= reg_class_contents[cl]; + if (i < n) +- IOR_HARD_REG_SET (temp_hard_regset, reg_class_contents[cl]); ++ temp_hard_regset |= reg_class_contents[cl]; + } + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) + /* Some targets (like SPARC with ICC reg) have allocatable regs + for which no reg class is defined. */ + if (REGNO_REG_CLASS (i) == NO_REGS) + SET_HARD_REG_BIT (ignore_hard_regs, i); +- AND_COMPL_HARD_REG_SET (temp_hard_regset, ignore_hard_regs); +- AND_COMPL_HARD_REG_SET (temp_hard_regset2, ignore_hard_regs); ++ temp_hard_regset &= ~ignore_hard_regs; ++ temp_hard_regset2 &= ~ignore_hard_regs; + ira_assert (hard_reg_set_subset_p (temp_hard_regset2, temp_hard_regset)); + } + #endif +@@ -1001,16 +992,12 @@ setup_allocno_and_important_classes (void) + same set of hard registers. */ + for (i = 0; i < LIM_REG_CLASSES; i++) + { +- COPY_HARD_REG_SET (temp_hard_regset, reg_class_contents[i]); +- AND_COMPL_HARD_REG_SET (temp_hard_regset, no_unit_alloc_regs); ++ temp_hard_regset = reg_class_contents[i] & ~no_unit_alloc_regs; + for (j = 0; j < n; j++) + { + cl = classes[j]; +- COPY_HARD_REG_SET (temp_hard_regset2, reg_class_contents[cl]); +- AND_COMPL_HARD_REG_SET (temp_hard_regset2, +- no_unit_alloc_regs); +- if (hard_reg_set_equal_p (temp_hard_regset, +- temp_hard_regset2)) ++ temp_hard_regset2 = reg_class_contents[cl] & ~no_unit_alloc_regs; ++ if (temp_hard_regset == temp_hard_regset2) + break; + } + if (j >= n || targetm.additional_allocno_class_p (i)) +@@ -1037,14 +1024,12 @@ setup_allocno_and_important_classes (void) + for (cl = 0; cl < N_REG_CLASSES; cl++) + if (ira_class_hard_regs_num[cl] > 0) + { +- COPY_HARD_REG_SET (temp_hard_regset, reg_class_contents[cl]); +- AND_COMPL_HARD_REG_SET (temp_hard_regset, no_unit_alloc_regs); ++ temp_hard_regset = reg_class_contents[cl] & ~no_unit_alloc_regs; + set_p = false; + for (j = 0; j < ira_allocno_classes_num; j++) + { +- COPY_HARD_REG_SET (temp_hard_regset2, +- reg_class_contents[ira_allocno_classes[j]]); +- AND_COMPL_HARD_REG_SET (temp_hard_regset2, no_unit_alloc_regs); ++ temp_hard_regset2 = (reg_class_contents[ira_allocno_classes[j]] ++ & ~no_unit_alloc_regs); + if ((enum reg_class) cl == ira_allocno_classes[j]) + break; + else if (hard_reg_set_subset_p (temp_hard_regset, +@@ -1118,10 +1103,9 @@ setup_class_translate_array (enum reg_class *class_translate, + for (i = 0; i < classes_num; i++) + { + aclass = classes[i]; +- COPY_HARD_REG_SET (temp_hard_regset, +- reg_class_contents[aclass]); +- AND_HARD_REG_SET (temp_hard_regset, reg_class_contents[cl]); +- AND_COMPL_HARD_REG_SET (temp_hard_regset, no_unit_alloc_regs); ++ temp_hard_regset = (reg_class_contents[aclass] ++ & reg_class_contents[cl] ++ & ~no_unit_alloc_regs); + if (! hard_reg_set_empty_p (temp_hard_regset)) + { + min_cost = INT_MAX; +@@ -1223,10 +1207,8 @@ setup_reg_class_relations (void) + ira_reg_classes_intersect_p[cl1][cl2] = false; + ira_reg_class_intersect[cl1][cl2] = NO_REGS; + ira_reg_class_subset[cl1][cl2] = NO_REGS; +- COPY_HARD_REG_SET (temp_hard_regset, reg_class_contents[cl1]); +- AND_COMPL_HARD_REG_SET (temp_hard_regset, no_unit_alloc_regs); +- COPY_HARD_REG_SET (temp_set2, reg_class_contents[cl2]); +- AND_COMPL_HARD_REG_SET (temp_set2, no_unit_alloc_regs); ++ temp_hard_regset = reg_class_contents[cl1] & ~no_unit_alloc_regs; ++ temp_set2 = reg_class_contents[cl2] & ~no_unit_alloc_regs; + if (hard_reg_set_empty_p (temp_hard_regset) + && hard_reg_set_empty_p (temp_set2)) + { +@@ -1264,16 +1246,14 @@ setup_reg_class_relations (void) + } + ira_reg_class_subunion[cl1][cl2] = NO_REGS; + ira_reg_class_superunion[cl1][cl2] = NO_REGS; +- COPY_HARD_REG_SET (intersection_set, reg_class_contents[cl1]); +- AND_HARD_REG_SET (intersection_set, reg_class_contents[cl2]); +- AND_COMPL_HARD_REG_SET (intersection_set, no_unit_alloc_regs); +- COPY_HARD_REG_SET (union_set, reg_class_contents[cl1]); +- IOR_HARD_REG_SET (union_set, reg_class_contents[cl2]); +- AND_COMPL_HARD_REG_SET (union_set, no_unit_alloc_regs); ++ intersection_set = (reg_class_contents[cl1] ++ & reg_class_contents[cl2] ++ & ~no_unit_alloc_regs); ++ union_set = ((reg_class_contents[cl1] | reg_class_contents[cl2]) ++ & ~no_unit_alloc_regs); + for (cl3 = 0; cl3 < N_REG_CLASSES; cl3++) + { +- COPY_HARD_REG_SET (temp_hard_regset, reg_class_contents[cl3]); +- AND_COMPL_HARD_REG_SET (temp_hard_regset, no_unit_alloc_regs); ++ temp_hard_regset = reg_class_contents[cl3] & ~no_unit_alloc_regs; + if (hard_reg_set_subset_p (temp_hard_regset, intersection_set)) + { + /* CL3 allocatable hard register set is inside of +@@ -1281,17 +1261,16 @@ setup_reg_class_relations (void) + of CL1 and CL2. */ + if (important_class_p[cl3]) + { +- COPY_HARD_REG_SET +- (temp_set2, +- reg_class_contents +- [(int) ira_reg_class_intersect[cl1][cl2]]); +- AND_COMPL_HARD_REG_SET (temp_set2, no_unit_alloc_regs); ++ temp_set2 ++ = (reg_class_contents ++ [ira_reg_class_intersect[cl1][cl2]]); ++ temp_set2 &= ~no_unit_alloc_regs; + if (! hard_reg_set_subset_p (temp_hard_regset, temp_set2) + /* If the allocatable hard register sets are + the same, prefer GENERAL_REGS or the + smallest class for debugging + purposes. */ +- || (hard_reg_set_equal_p (temp_hard_regset, temp_set2) ++ || (temp_hard_regset == temp_set2 + && (cl3 == GENERAL_REGS + || ((ira_reg_class_intersect[cl1][cl2] + != GENERAL_REGS) +@@ -1302,14 +1281,13 @@ setup_reg_class_relations (void) + ira_reg_class_intersect[cl1][cl2]]))))) + ira_reg_class_intersect[cl1][cl2] = (enum reg_class) cl3; + } +- COPY_HARD_REG_SET +- (temp_set2, +- reg_class_contents[(int) ira_reg_class_subset[cl1][cl2]]); +- AND_COMPL_HARD_REG_SET (temp_set2, no_unit_alloc_regs); ++ temp_set2 ++ = (reg_class_contents[ira_reg_class_subset[cl1][cl2]] ++ & ~no_unit_alloc_regs); + if (! hard_reg_set_subset_p (temp_hard_regset, temp_set2) + /* Ignore unavailable hard registers and prefer + smallest class for debugging purposes. */ +- || (hard_reg_set_equal_p (temp_hard_regset, temp_set2) ++ || (temp_hard_regset == temp_set2 + && hard_reg_set_subset_p + (reg_class_contents[cl3], + reg_class_contents +@@ -1322,15 +1300,13 @@ setup_reg_class_relations (void) + /* CL3 allocatable hard register set is inside of + union of allocatable hard register sets of CL1 + and CL2. */ +- COPY_HARD_REG_SET +- (temp_set2, +- reg_class_contents[(int) ira_reg_class_subunion[cl1][cl2]]); +- AND_COMPL_HARD_REG_SET (temp_set2, no_unit_alloc_regs); ++ temp_set2 ++ = (reg_class_contents[ira_reg_class_subunion[cl1][cl2]] ++ & ~no_unit_alloc_regs); + if (ira_reg_class_subunion[cl1][cl2] == NO_REGS + || (hard_reg_set_subset_p (temp_set2, temp_hard_regset) + +- && (! hard_reg_set_equal_p (temp_set2, +- temp_hard_regset) ++ && (temp_set2 != temp_hard_regset + || cl3 == GENERAL_REGS + /* If the allocatable hard register sets are the + same, prefer GENERAL_REGS or the smallest +@@ -1347,15 +1323,13 @@ setup_reg_class_relations (void) + /* CL3 allocatable hard register set contains union + of allocatable hard register sets of CL1 and + CL2. */ +- COPY_HARD_REG_SET +- (temp_set2, +- reg_class_contents[(int) ira_reg_class_superunion[cl1][cl2]]); +- AND_COMPL_HARD_REG_SET (temp_set2, no_unit_alloc_regs); ++ temp_set2 ++ = (reg_class_contents[ira_reg_class_superunion[cl1][cl2]] ++ & ~no_unit_alloc_regs); + if (ira_reg_class_superunion[cl1][cl2] == NO_REGS + || (hard_reg_set_subset_p (temp_hard_regset, temp_set2) + +- && (! hard_reg_set_equal_p (temp_set2, +- temp_hard_regset) ++ && (temp_set2 != temp_hard_regset + || cl3 == GENERAL_REGS + /* If the allocatable hard register sets are the + same, prefer GENERAL_REGS or the smallest +@@ -1499,8 +1473,7 @@ setup_prohibited_class_mode_regs (void) + + for (cl = (int) N_REG_CLASSES - 1; cl >= 0; cl--) + { +- COPY_HARD_REG_SET (temp_hard_regset, reg_class_contents[cl]); +- AND_COMPL_HARD_REG_SET (temp_hard_regset, no_unit_alloc_regs); ++ temp_hard_regset = reg_class_contents[cl] & ~no_unit_alloc_regs; + for (j = 0; j < NUM_MACHINE_MODES; j++) + { + count = 0; +@@ -1784,68 +1757,59 @@ setup_prohibited_mode_move_regs (void) + + + +-/* Setup possible alternatives in ALTS for INSN. */ +-void +-ira_setup_alts (rtx_insn *insn, HARD_REG_SET &alts) ++/* Extract INSN and return the set of alternatives that we should consider. ++ This excludes any alternatives whose constraints are obviously impossible ++ to meet (e.g. because the constraint requires a constant and the operand ++ is nonconstant). It also excludes alternatives that are bound to need ++ a spill or reload, as long as we have other alternatives that match ++ exactly. */ ++alternative_mask ++ira_setup_alts (rtx_insn *insn) + { +- /* MAP nalt * nop -> start of constraints for given operand and +- alternative. */ +- static vec insn_constraints; + int nop, nalt; + bool curr_swapped; + const char *p; + int commutative = -1; + + extract_insn (insn); ++ preprocess_constraints (insn); + alternative_mask preferred = get_preferred_alternatives (insn); +- CLEAR_HARD_REG_SET (alts); +- insn_constraints.release (); +- insn_constraints.safe_grow_cleared (recog_data.n_operands +- * recog_data.n_alternatives + 1); ++ alternative_mask alts = 0; ++ alternative_mask exact_alts = 0; + /* Check that the hard reg set is enough for holding all + alternatives. It is hard to imagine the situation when the + assertion is wrong. */ + ira_assert (recog_data.n_alternatives + <= (int) MAX (sizeof (HARD_REG_ELT_TYPE) * CHAR_BIT, + FIRST_PSEUDO_REGISTER)); ++ for (nop = 0; nop < recog_data.n_operands; nop++) ++ if (recog_data.constraints[nop][0] == '%') ++ { ++ commutative = nop; ++ break; ++ } + for (curr_swapped = false;; curr_swapped = true) + { +- /* Calculate some data common for all alternatives to speed up the +- function. */ +- for (nop = 0; nop < recog_data.n_operands; nop++) +- { +- for (nalt = 0, p = recog_data.constraints[nop]; +- nalt < recog_data.n_alternatives; +- nalt++) +- { +- insn_constraints[nop * recog_data.n_alternatives + nalt] = p; +- while (*p && *p != ',') +- { +- /* We only support one commutative marker, the first +- one. We already set commutative above. */ +- if (*p == '%' && commutative < 0) +- commutative = nop; +- p++; +- } +- if (*p) +- p++; +- } +- } + for (nalt = 0; nalt < recog_data.n_alternatives; nalt++) + { +- if (!TEST_BIT (preferred, nalt) +- || TEST_HARD_REG_BIT (alts, nalt)) ++ if (!TEST_BIT (preferred, nalt) || TEST_BIT (exact_alts, nalt)) + continue; + ++ const operand_alternative *op_alt ++ = &recog_op_alt[nalt * recog_data.n_operands]; ++ int this_reject = 0; + for (nop = 0; nop < recog_data.n_operands; nop++) + { + int c, len; + ++ this_reject += op_alt[nop].reject; ++ + rtx op = recog_data.operand[nop]; +- p = insn_constraints[nop * recog_data.n_alternatives + nalt]; ++ p = op_alt[nop].constraint; + if (*p == 0 || *p == ',') + continue; +- ++ ++ bool win_p = false; + do + switch (c = *p, len = CONSTRAINT_LEN (c, p), c) + { +@@ -1863,7 +1827,14 @@ ira_setup_alts (rtx_insn *insn, HARD_REG_SET &alts) + + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': +- goto op_success; ++ { ++ rtx other = recog_data.operand[c - '0']; ++ if (MEM_P (other) ++ ? rtx_equal_p (other, op) ++ : REG_P (op) || SUBREG_P (op)) ++ goto op_success; ++ win_p = true; ++ } + break; + + case 'g': +@@ -1877,7 +1848,11 @@ ira_setup_alts (rtx_insn *insn, HARD_REG_SET &alts) + { + case CT_REGISTER: + if (reg_class_for_constraint (cn) != NO_REGS) +- goto op_success; ++ { ++ if (REG_P (op) || SUBREG_P (op)) ++ goto op_success; ++ win_p = true; ++ } + break; + + case CT_CONST_INT: +@@ -1888,9 +1863,14 @@ ira_setup_alts (rtx_insn *insn, HARD_REG_SET &alts) + break; + + case CT_ADDRESS: ++ goto op_success; ++ + case CT_MEMORY: + case CT_SPECIAL_MEMORY: +- goto op_success; ++ if (MEM_P (op)) ++ goto op_success; ++ win_p = true; ++ break; + + case CT_FIXED_FORM: + if (constraint_satisfied_p (op, cn)) +@@ -1901,12 +1881,22 @@ ira_setup_alts (rtx_insn *insn, HARD_REG_SET &alts) + } + } + while (p += len, c); +- break; ++ if (!win_p) ++ break; ++ /* We can make the alternative match by spilling a register ++ to memory or loading something into a register. Count a ++ cost of one reload (the equivalent of the '?' constraint). */ ++ this_reject += 6; + op_success: + ; + } ++ + if (nop >= recog_data.n_operands) +- SET_HARD_REG_BIT (alts, nalt); ++ { ++ alts |= ALTERNATIVE_BIT (nalt); ++ if (this_reject == 0) ++ exact_alts |= ALTERNATIVE_BIT (nalt); ++ } + } + if (commutative < 0) + break; +@@ -1916,14 +1906,15 @@ ira_setup_alts (rtx_insn *insn, HARD_REG_SET &alts) + if (curr_swapped) + break; + } ++ return exact_alts ? exact_alts : alts; + } + + /* Return the number of the output non-early clobber operand which + should be the same in any case as operand with number OP_NUM (or +- negative value if there is no such operand). The function takes +- only really possible alternatives into consideration. */ ++ negative value if there is no such operand). ALTS is the mask ++ of alternatives that we should consider. */ + int +-ira_get_dup_out_num (int op_num, HARD_REG_SET &alts) ++ira_get_dup_out_num (int op_num, alternative_mask alts) + { + int curr_alt, c, original, dup; + bool ignore_p, use_commut_op_p; +@@ -1940,7 +1931,7 @@ ira_get_dup_out_num (int op_num, HARD_REG_SET &alts) + { + rtx op = recog_data.operand[op_num]; + +- for (curr_alt = 0, ignore_p = !TEST_HARD_REG_BIT (alts, curr_alt), ++ for (curr_alt = 0, ignore_p = !TEST_BIT (alts, curr_alt), + original = -1;;) + { + c = *str; +@@ -1951,7 +1942,7 @@ ira_get_dup_out_num (int op_num, HARD_REG_SET &alts) + else if (c == ',') + { + curr_alt++; +- ignore_p = !TEST_HARD_REG_BIT (alts, curr_alt); ++ ignore_p = !TEST_BIT (alts, curr_alt); + } + else if (! ignore_p) + switch (c) +@@ -1981,26 +1972,8 @@ ira_get_dup_out_num (int op_num, HARD_REG_SET &alts) + } + if (original == -1) + goto fail; +- dup = -1; +- for (ignore_p = false, str = recog_data.constraints[original - '0']; +- *str != 0; +- str++) +- if (ignore_p) +- { +- if (*str == ',') +- ignore_p = false; +- } +- else if (*str == '#') +- ignore_p = true; +- else if (! ignore_p) +- { +- if (*str == '=') +- dup = original - '0'; +- /* It is better ignore an alternative with early clobber. */ +- else if (*str == '&') +- goto fail; +- } +- if (dup >= 0) ++ dup = original - '0'; ++ if (recog_data.operand_type[dup] == OP_OUT) + return dup; + fail: + if (use_commut_op_p) +@@ -2305,7 +2278,7 @@ ira_setup_eliminable_regset (void) + if (frame_pointer_needed) + df_set_regs_ever_live (HARD_FRAME_POINTER_REGNUM, true); + +- COPY_HARD_REG_SET (ira_no_alloc_regs, no_unit_alloc_regs); ++ ira_no_alloc_regs = no_unit_alloc_regs; + CLEAR_HARD_REG_SET (eliminable_regset); + + compute_regs_asm_clobbered (); +@@ -2326,7 +2299,7 @@ ira_setup_eliminable_regset (void) + SET_HARD_REG_BIT (ira_no_alloc_regs, eliminables[i].from); + } + else if (cannot_elim) +- error ("%s cannot be used in asm here", ++ error ("%s cannot be used in % here", + reg_names[eliminables[i].from]); + else + df_set_regs_ever_live (eliminables[i].from, true); +@@ -2340,7 +2313,7 @@ ira_setup_eliminable_regset (void) + SET_HARD_REG_BIT (ira_no_alloc_regs, HARD_FRAME_POINTER_REGNUM); + } + else if (frame_pointer_needed) +- error ("%s cannot be used in asm here", ++ error ("%s cannot be used in % here", + reg_names[HARD_FRAME_POINTER_REGNUM]); + else + df_set_regs_ever_live (HARD_FRAME_POINTER_REGNUM, true); +@@ -2392,12 +2365,10 @@ setup_reg_renumber (void) + for (i = 0; i < nwords; i++) + { + obj = ALLOCNO_OBJECT (a, i); +- IOR_COMPL_HARD_REG_SET (OBJECT_TOTAL_CONFLICT_HARD_REGS (obj), +- reg_class_contents[pclass]); ++ OBJECT_TOTAL_CONFLICT_HARD_REGS (obj) ++ |= ~reg_class_contents[pclass]; + } +- if (ALLOCNO_CALLS_CROSSED_NUM (a) != 0 +- && ira_hard_reg_set_intersection_p (hard_regno, ALLOCNO_MODE (a), +- call_used_reg_set)) ++ if (ira_need_caller_save_p (a, hard_regno)) + { + ira_assert (!optimize || flag_caller_saves + || (ALLOCNO_CALLS_CROSSED_NUM (a) +@@ -3004,7 +2975,7 @@ validate_equiv_mem (rtx_insn *start, rtx reg, rtx memref) + return valid_none; + } + +- note_stores (PATTERN (insn), validate_equiv_mem_from_store, &info); ++ note_stores (insn, validate_equiv_mem_from_store, &info); + if (info.equiv_mem_modified) + return valid_none; + +@@ -3092,7 +3063,6 @@ equiv_init_movable_p (rtx x, int regno) + + case CC0: + case CLOBBER: +- case CLOBBER_HIGH: + return 0; + + case PRE_INC: +@@ -3199,7 +3169,6 @@ memref_referenced_p (rtx memref, rtx x, bool read_p) + return memref_referenced_p (memref, SET_SRC (x), true); + + case CLOBBER: +- case CLOBBER_HIGH: + if (process_set_for_memref_referenced_p (memref, XEXP (x, 0))) + return true; + +@@ -3391,6 +3360,37 @@ def_dominates_uses (int regno) + return true; + } + ++/* Scan the instructions before update_equiv_regs. Record which registers ++ are referenced as paradoxical subregs. Also check for cases in which ++ the current function needs to save a register that one of its call ++ instructions clobbers. ++ ++ These things are logically unrelated, but it's more efficient to do ++ them together. */ ++ ++static void ++update_equiv_regs_prescan (void) ++{ ++ basic_block bb; ++ rtx_insn *insn; ++ function_abi_aggregator callee_abis; ++ ++ FOR_EACH_BB_FN (bb, cfun) ++ FOR_BB_INSNS (bb, insn) ++ if (NONDEBUG_INSN_P (insn)) ++ { ++ set_paradoxical_subreg (insn); ++ if (CALL_P (insn)) ++ callee_abis.note_callee_abi (insn_callee_abi (insn)); ++ } ++ ++ HARD_REG_SET extra_caller_saves = callee_abis.caller_save_regs (*crtl->abi); ++ if (!hard_reg_set_empty_p (extra_caller_saves)) ++ for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno) ++ if (TEST_HARD_REG_BIT (extra_caller_saves, regno)) ++ df_set_regs_ever_live (regno, true); ++} ++ + /* Find registers that are equivalent to a single value throughout the + compilation (either because they can be referenced in memory or are + set once from a single constant). Lower their priority for a +@@ -3407,15 +3407,6 @@ update_equiv_regs (void) + rtx_insn *insn; + basic_block bb; + +- /* Scan insns and set pdx_subregs if the reg is used in a +- paradoxical subreg. Don't set such reg equivalent to a mem, +- because lra will not substitute such equiv memory in order to +- prevent access beyond allocated memory for paradoxical memory subreg. */ +- FOR_EACH_BB_FN (bb, cfun) +- FOR_BB_INSNS (bb, insn) +- if (NONDEBUG_INSN_P (insn)) +- set_paradoxical_subreg (insn); +- + /* Scan the insns and find which registers have equivalences. Do this + in a separate scan of the insns because (due to -fcse-follow-jumps) + a register can be set below its use. */ +@@ -3447,7 +3438,7 @@ update_equiv_regs (void) + if (set == NULL_RTX + || side_effects_p (SET_SRC (set))) + { +- note_stores (PATTERN (insn), no_equiv, NULL); ++ note_pattern_stores (PATTERN (insn), no_equiv, NULL); + continue; + } + else if (GET_CODE (PATTERN (insn)) == PARALLEL) +@@ -3458,7 +3449,7 @@ update_equiv_regs (void) + { + rtx part = XVECEXP (PATTERN (insn), 0, i); + if (part != set) +- note_stores (part, no_equiv, NULL); ++ note_pattern_stores (part, no_equiv, NULL); + } + } + +@@ -3516,7 +3507,7 @@ update_equiv_regs (void) + { + /* This might be setting a SUBREG of a pseudo, a pseudo that is + also set somewhere else to a constant. */ +- note_stores (set, no_equiv, NULL); ++ note_pattern_stores (set, no_equiv, NULL); + continue; + } + +@@ -3524,7 +3515,7 @@ update_equiv_regs (void) + equivalent to a mem. */ + if (MEM_P (src) && reg_equiv[regno].pdx_subregs) + { +- note_stores (set, no_equiv, NULL); ++ note_pattern_stores (set, no_equiv, NULL); + continue; + } + +@@ -4458,7 +4449,6 @@ rtx_moveable_p (rtx *loc, enum op_type type) + && rtx_moveable_p (&XEXP (x, 2), OP_IN)); + + case CLOBBER: +- case CLOBBER_HIGH: + return rtx_moveable_p (&SET_DEST (x), OP_OUT); + + case UNSPEC_VOLATILE: +@@ -4911,9 +4901,7 @@ interesting_dest_for_shprep (rtx_insn *insn, basic_block call_dom) + for (int i = 0; i < XVECLEN (pat, 0); i++) + { + rtx sub = XVECEXP (pat, 0, i); +- if (GET_CODE (sub) == USE +- || GET_CODE (sub) == CLOBBER +- || GET_CODE (sub) == CLOBBER_HIGH) ++ if (GET_CODE (sub) == USE || GET_CODE (sub) == CLOBBER) + continue; + if (GET_CODE (sub) != SET + || side_effects_p (sub)) +@@ -5305,6 +5293,7 @@ ira (FILE *f) + init_alias_analysis (); + loop_optimizer_init (AVOID_CFG_MODIFICATIONS); + reg_equiv = XCNEWVEC (struct equivalence, max_reg_num ()); ++ update_equiv_regs_prescan (); + update_equiv_regs (); + + /* Don't move insns if live range shrinkage or register +@@ -5616,7 +5605,9 @@ do_reload (void) + poly_int64 size = get_frame_size () + STACK_CHECK_FIXED_FRAME_SIZE; + + for (int i = 0; i < FIRST_PSEUDO_REGISTER; i++) +- if (df_regs_ever_live_p (i) && !fixed_regs[i] && call_used_regs[i]) ++ if (df_regs_ever_live_p (i) ++ && !fixed_regs[i] ++ && !crtl->abi->clobbers_full_reg_p (i)) + size += UNITS_PER_WORD; + + if (constant_lower_bound (size) > STACK_CHECK_MAX_FRAME_SIZE) +diff --git a/gcc/jit/jit-playback.c b/gcc/jit/jit-playback.c +index b74495c58..8b16e81d5 100644 +--- a/gcc/jit/jit-playback.c ++++ b/gcc/jit/jit-playback.c +@@ -399,12 +399,11 @@ new_function (location *loc, + + if (builtin_id) + { +- DECL_FUNCTION_CODE (fndecl) = builtin_id; + gcc_assert (loc == NULL); + DECL_SOURCE_LOCATION (fndecl) = BUILTINS_LOCATION; + +- DECL_BUILT_IN_CLASS (fndecl) = +- builtins_manager::get_class (builtin_id); ++ built_in_class fclass = builtins_manager::get_class (builtin_id); ++ set_decl_built_in_function (fndecl, fclass, builtin_id); + set_builtin_decl (builtin_id, fndecl, + builtins_manager::implicit_p (builtin_id)); + +diff --git a/gcc/jump.c b/gcc/jump.c +index ce5cee523..17642a95b 100644 +--- a/gcc/jump.c ++++ b/gcc/jump.c +@@ -1094,7 +1094,6 @@ mark_jump_label_1 (rtx x, rtx_insn *insn, bool in_mem, bool is_target) + case CC0: + case REG: + case CLOBBER: +- case CLOBBER_HIGH: + case CALL: + return; + +diff --git a/gcc/langhooks-def.h b/gcc/langhooks-def.h +index a059841b3..842f6a502 100644 +--- a/gcc/langhooks-def.h ++++ b/gcc/langhooks-def.h +@@ -122,6 +122,7 @@ extern int lhd_type_dwarf_attribute (const_tree, int); + #define LANG_HOOKS_TYPES_COMPATIBLE_P lhd_types_compatible_p + #define LANG_HOOKS_BUILTIN_FUNCTION lhd_builtin_function + #define LANG_HOOKS_BUILTIN_FUNCTION_EXT_SCOPE LANG_HOOKS_BUILTIN_FUNCTION ++#define LANG_HOOKS_SIMULATE_BUILTIN_FUNCTION_DECL LANG_HOOKS_BUILTIN_FUNCTION + #define LANG_HOOKS_EXPR_TO_DECL lhd_expr_to_decl + #define LANG_HOOKS_TO_TARGET_CHARSET lhd_to_target_charset + #define LANG_HOOKS_INIT_TS lhd_do_nothing +@@ -170,6 +171,7 @@ extern tree lhd_make_node (enum tree_code); + extern tree lhd_unit_size_without_reusable_padding (tree); + + #define LANG_HOOKS_MAKE_TYPE lhd_make_node ++#define LANG_HOOKS_SIMULATE_ENUM_DECL NULL + #define LANG_HOOKS_CLASSIFY_RECORD NULL + #define LANG_HOOKS_TYPE_FOR_SIZE lhd_type_for_size + #define LANG_HOOKS_INCOMPLETE_TYPE_ERROR lhd_incomplete_type_error +@@ -203,6 +205,7 @@ extern tree lhd_unit_size_without_reusable_padding (tree); + + #define LANG_HOOKS_FOR_TYPES_INITIALIZER { \ + LANG_HOOKS_MAKE_TYPE, \ ++ LANG_HOOKS_SIMULATE_ENUM_DECL, \ + LANG_HOOKS_CLASSIFY_RECORD, \ + LANG_HOOKS_TYPE_FOR_MODE, \ + LANG_HOOKS_TYPE_FOR_SIZE, \ +@@ -338,6 +341,7 @@ extern void lhd_end_section (void); + LANG_HOOKS_GIMPLIFY_EXPR, \ + LANG_HOOKS_BUILTIN_FUNCTION, \ + LANG_HOOKS_BUILTIN_FUNCTION_EXT_SCOPE, \ ++ LANG_HOOKS_SIMULATE_BUILTIN_FUNCTION_DECL, \ + LANG_HOOKS_INIT_TS, \ + LANG_HOOKS_EXPR_TO_DECL, \ + LANG_HOOKS_EH_PERSONALITY, \ +diff --git a/gcc/langhooks.c b/gcc/langhooks.c +index 2df97f2b6..fd8f43312 100644 +--- a/gcc/langhooks.c ++++ b/gcc/langhooks.c +@@ -599,28 +599,21 @@ lhd_omp_mappable_type (tree type) + return true; + } + +-/* Common function for add_builtin_function and +- add_builtin_function_ext_scope. */ ++/* Common function for add_builtin_function, add_builtin_function_ext_scope ++ and simulate_builtin_function_decl. */ ++ + static tree +-add_builtin_function_common (const char *name, +- tree type, +- int function_code, +- enum built_in_class cl, +- const char *library_name, +- tree attrs, +- tree (*hook) (tree)) ++build_builtin_function (location_t location, const char *name, tree type, ++ int function_code, enum built_in_class cl, ++ const char *library_name, tree attrs) + { + tree id = get_identifier (name); +- tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, id, type); ++ tree decl = build_decl (location, FUNCTION_DECL, id, type); + + TREE_PUBLIC (decl) = 1; + DECL_EXTERNAL (decl) = 1; +- DECL_BUILT_IN_CLASS (decl) = cl; +- +- DECL_FUNCTION_CODE (decl) = (enum built_in_function) function_code; + +- /* DECL_FUNCTION_CODE is a bitfield; verify that the value fits. */ +- gcc_assert (DECL_FUNCTION_CODE (decl) == function_code); ++ set_decl_built_in_function (decl, cl, function_code); + + if (library_name) + { +@@ -636,8 +629,7 @@ add_builtin_function_common (const char *name, + else + decl_attributes (&decl, NULL_TREE, 0); + +- return hook (decl); +- ++ return decl; + } + + /* Create a builtin function. */ +@@ -650,9 +642,9 @@ add_builtin_function (const char *name, + const char *library_name, + tree attrs) + { +- return add_builtin_function_common (name, type, function_code, cl, +- library_name, attrs, +- lang_hooks.builtin_function); ++ tree decl = build_builtin_function (BUILTINS_LOCATION, name, type, ++ function_code, cl, library_name, attrs); ++ return lang_hooks.builtin_function (decl); + } + + /* Like add_builtin_function, but make sure the scope is the external scope. +@@ -670,9 +662,40 @@ add_builtin_function_ext_scope (const char *name, + const char *library_name, + tree attrs) + { +- return add_builtin_function_common (name, type, function_code, cl, +- library_name, attrs, +- lang_hooks.builtin_function_ext_scope); ++ tree decl = build_builtin_function (BUILTINS_LOCATION, name, type, ++ function_code, cl, library_name, attrs); ++ return lang_hooks.builtin_function_ext_scope (decl); ++} ++ ++/* Simulate a declaration of a target-specific built-in function at ++ location LOCATION, as though it had been declared directly in the ++ source language. NAME is the name of the function, TYPE is its function ++ type, FUNCTION_CODE is the target-specific function code, LIBRARY_NAME ++ is the name of the underlying library function (NULL if none) and ++ ATTRS is a list of function attributes. ++ ++ Return the decl of the declared function. */ ++ ++tree ++simulate_builtin_function_decl (location_t location, const char *name, ++ tree type, int function_code, ++ const char *library_name, tree attrs) ++{ ++ tree decl = build_builtin_function (location, name, type, ++ function_code, BUILT_IN_MD, ++ library_name, attrs); ++ tree new_decl = lang_hooks.simulate_builtin_function_decl (decl); ++ ++ /* Give the front end a chance to create a new decl if necessary, ++ but if the front end discards the decl in favour of a conflicting ++ (erroneous) previous definition, return the decl that we tried but ++ failed to add. This allows the caller to process the returned decl ++ normally, even though the source code won't be able to use it. */ ++ if (TREE_CODE (new_decl) == FUNCTION_DECL ++ && fndecl_built_in_p (new_decl, function_code, BUILT_IN_MD)) ++ return new_decl; ++ ++ return decl; + } + + tree +diff --git a/gcc/langhooks.h b/gcc/langhooks.h +index a45579b33..b8cee93f5 100644 +--- a/gcc/langhooks.h ++++ b/gcc/langhooks.h +@@ -64,6 +64,10 @@ struct lang_hooks_for_types + language-specific processing is required. */ + tree (*make_type) (enum tree_code); + ++ /* Make an enum type with the given name and values, associating ++ them all with the given source location. */ ++ tree (*simulate_enum_decl) (location_t, const char *, vec); ++ + /* Return what kind of RECORD_TYPE this is, mainly for purposes of + debug information. If not defined, record types are assumed to + be structures. */ +@@ -494,6 +498,15 @@ struct lang_hooks + backend must add all of the builtins at program initialization time. */ + tree (*builtin_function_ext_scope) (tree decl); + ++ /* Do language-specific processing for target-specific built-in ++ function DECL, so that it is defined in the global scope (only) ++ and is available without needing to be explicitly declared. ++ ++ This is intended for targets that want to inject declarations of ++ built-in functions into the source language (such as in response ++ to a pragma) rather than providing them in the source language itself. */ ++ tree (*simulate_builtin_function_decl) (tree decl); ++ + /* Used to set up the tree_contains_structure array for a frontend. */ + void (*init_ts) (void); + +@@ -562,6 +575,8 @@ extern tree add_builtin_function_ext_scope (const char *name, tree type, + enum built_in_class cl, + const char *library_name, + tree attrs); ++extern tree simulate_builtin_function_decl (location_t, const char *, tree, ++ int, const char *, tree); + extern tree add_builtin_type (const char *name, tree type); + + /* Language helper functions. */ +diff --git a/gcc/loop-doloop.c b/gcc/loop-doloop.c +index 89714be76..732687dba 100644 +--- a/gcc/loop-doloop.c ++++ b/gcc/loop-doloop.c +@@ -731,7 +731,7 @@ doloop_optimize (struct loop *loop) + bitmap modified = BITMAP_ALLOC (NULL); + + for (rtx_insn *i = doloop_seq; i != NULL; i = NEXT_INSN (i)) +- note_stores (PATTERN (i), record_reg_sets, modified); ++ note_stores (i, record_reg_sets, modified); + + basic_block loop_end = desc->out_edge->src; + bool fail = bitmap_intersect_p (df_get_live_out (loop_end), modified); +diff --git a/gcc/loop-invariant.c b/gcc/loop-invariant.c +index b880ead3d..1af88876c 100644 +--- a/gcc/loop-invariant.c ++++ b/gcc/loop-invariant.c +@@ -2170,7 +2170,7 @@ calculate_loop_reg_pressure (void) + + mark_ref_regs (PATTERN (insn)); + n_regs_set = 0; +- note_stores (PATTERN (insn), mark_reg_clobber, NULL); ++ note_stores (insn, mark_reg_clobber, NULL); + + /* Mark any registers dead after INSN as dead now. */ + +@@ -2183,7 +2183,7 @@ calculate_loop_reg_pressure (void) + Clobbers are processed again, so they conflict with + the registers that are set. */ + +- note_stores (PATTERN (insn), mark_reg_store, NULL); ++ note_stores (insn, mark_reg_store, NULL); + + if (AUTO_INC_DEC) + for (link = REG_NOTES (insn); link; link = XEXP (link, 1)) +diff --git a/gcc/loop-iv.c b/gcc/loop-iv.c +index 340045ce8..1dc3bc74d 100644 +--- a/gcc/loop-iv.c ++++ b/gcc/loop-iv.c +@@ -1967,16 +1967,10 @@ simplify_using_initial_values (struct loop *loop, enum rtx_code op, rtx *expr) + continue; + + CLEAR_REG_SET (this_altered); +- note_stores (PATTERN (insn), mark_altered, this_altered); ++ note_stores (insn, mark_altered, this_altered); + if (CALL_P (insn)) +- { +- /* Kill all call clobbered registers. */ +- unsigned int i; +- hard_reg_set_iterator hrsi; +- EXECUTE_IF_SET_IN_HARD_REG_SET (regs_invalidated_by_call, +- 0, i, hrsi) +- SET_REGNO_REG_SET (this_altered, i); +- } ++ /* Kill all call clobbered registers. */ ++ IOR_REG_SET_HRS (this_altered, regs_invalidated_by_call); + + if (suitable_set_for_replacement (insn, &dest, &src)) + { +diff --git a/gcc/lra-assigns.c b/gcc/lra-assigns.c +index 5c5c73293..a35fc41ac 100644 +--- a/gcc/lra-assigns.c ++++ b/gcc/lra-assigns.c +@@ -94,6 +94,7 @@ along with GCC; see the file COPYING3. If not see + #include "params.h" + #include "lra.h" + #include "lra-int.h" ++#include "function-abi.h" + + /* Current iteration number of the pass and current iteration number + of the pass after the latest spill pass when any former reload +@@ -493,18 +494,15 @@ find_hard_regno_for_1 (int regno, int *cost, int try_only_hard_regno, + HARD_REG_SET impossible_start_hard_regs, available_regs; + + if (hard_reg_set_empty_p (regno_set)) +- COPY_HARD_REG_SET (conflict_set, lra_no_alloc_regs); ++ conflict_set = lra_no_alloc_regs; + else +- { +- COMPL_HARD_REG_SET (conflict_set, regno_set); +- IOR_HARD_REG_SET (conflict_set, lra_no_alloc_regs); +- } ++ conflict_set = ~regno_set | lra_no_alloc_regs; + rclass = regno_allocno_class_array[regno]; + rclass_intersect_p = ira_reg_classes_intersect_p[rclass]; + curr_hard_regno_costs_check++; + sparseset_clear (conflict_reload_and_inheritance_pseudos); + sparseset_clear (live_range_hard_reg_pseudos); +- IOR_HARD_REG_SET (conflict_set, lra_reg_info[regno].conflict_hard_regs); ++ conflict_set |= lra_reg_info[regno].conflict_hard_regs; + biggest_mode = lra_reg_info[regno].biggest_mode; + for (r = lra_reg_info[regno].live_ranges; r != NULL; r = r->next) + { +@@ -614,7 +612,7 @@ find_hard_regno_for_1 (int regno, int *cost, int try_only_hard_regno, + } + /* Make sure that all registers in a multi-word pseudo belong to the + required class. */ +- IOR_COMPL_HARD_REG_SET (conflict_set, reg_class_contents[rclass]); ++ conflict_set |= ~reg_class_contents[rclass]; + lra_assert (rclass != NO_REGS); + rclass_size = ira_class_hard_regs_num[rclass]; + best_hard_regno = -1; +@@ -622,8 +620,7 @@ find_hard_regno_for_1 (int regno, int *cost, int try_only_hard_regno, + biggest_nregs = hard_regno_nregs (hard_regno, biggest_mode); + nregs_diff = (biggest_nregs + - hard_regno_nregs (hard_regno, PSEUDO_REGNO_MODE (regno))); +- COPY_HARD_REG_SET (available_regs, reg_class_contents[rclass]); +- AND_COMPL_HARD_REG_SET (available_regs, lra_no_alloc_regs); ++ available_regs = reg_class_contents[rclass] & ~lra_no_alloc_regs; + for (i = 0; i < rclass_size; i++) + { + if (try_only_hard_regno >= 0) +@@ -658,7 +655,7 @@ find_hard_regno_for_1 (int regno, int *cost, int try_only_hard_regno, + for (j = 0; + j < hard_regno_nregs (hard_regno, PSEUDO_REGNO_MODE (regno)); + j++) +- if (! TEST_HARD_REG_BIT (call_used_reg_set, hard_regno + j) ++ if (! crtl->abi->clobbers_full_reg_p (hard_regno + j) + && ! df_regs_ever_live_p (hard_regno + j)) + /* It needs save restore. */ + hard_regno_costs[hard_regno] +@@ -1219,8 +1216,8 @@ setup_live_pseudos_and_spill_after_risky_transforms (bitmap + sparseset_set_bit (live_range_hard_reg_pseudos, r2->regno); + } + } +- COPY_HARD_REG_SET (conflict_set, lra_no_alloc_regs); +- IOR_HARD_REG_SET (conflict_set, lra_reg_info[regno].conflict_hard_regs); ++ conflict_set = lra_no_alloc_regs; ++ conflict_set |= lra_reg_info[regno].conflict_hard_regs; + val = lra_reg_info[regno].val; + offset = lra_reg_info[regno].offset; + EXECUTE_IF_SET_IN_SPARSESET (live_range_hard_reg_pseudos, conflict_regno) +@@ -1640,14 +1637,14 @@ lra_assign (bool &fails_p) + bitmap_initialize (&all_spilled_pseudos, ®_obstack); + create_live_range_start_chains (); + setup_live_pseudos_and_spill_after_risky_transforms (&all_spilled_pseudos); +- if (! lra_asm_error_p && flag_checking && !flag_ipa_ra) ++ if (! lra_asm_error_p && flag_checking) + /* Check correctness of allocation for call-crossed pseudos but + only when there are no asm errors as in the case of errors the + asm is removed and it can result in incorrect allocation. */ + for (i = FIRST_PSEUDO_REGISTER; i < max_regno; i++) +- if (lra_reg_info[i].nrefs != 0 && reg_renumber[i] >= 0 +- && lra_reg_info[i].call_insn +- && overlaps_hard_reg_set_p (call_used_reg_set, ++ if (lra_reg_info[i].nrefs != 0 ++ && reg_renumber[i] >= 0 ++ && overlaps_hard_reg_set_p (lra_reg_info[i].conflict_hard_regs, + PSEUDO_REGNO_MODE (i), reg_renumber[i])) + gcc_unreachable (); + /* Setup insns to process on the next constraint pass. */ +diff --git a/gcc/lra-constraints.c b/gcc/lra-constraints.c +index f0a2f0491..b34aec227 100644 +--- a/gcc/lra-constraints.c ++++ b/gcc/lra-constraints.c +@@ -131,6 +131,7 @@ + #include "lra.h" + #include "lra-int.h" + #include "print-rtl.h" ++#include "function-abi.h" + + /* Value of LRA_CURR_RELOAD_NUM at the beginning of BB of the current + insn. Remember that LRA_CURR_RELOAD_NUM is the number of emitted +@@ -394,11 +395,24 @@ address_eliminator::~address_eliminator () + *m_index_loc = m_index_reg; + } + +-/* Return true if the eliminated form of AD is a legitimate target address. */ ++/* Return true if the eliminated form of AD is a legitimate target address. ++ If OP is a MEM, AD is the address within OP, otherwise OP should be ++ ignored. CONSTRAINT is one constraint that the operand may need ++ to meet. */ + static bool +-valid_address_p (struct address_info *ad) ++valid_address_p (rtx op, struct address_info *ad, ++ enum constraint_num constraint) + { + address_eliminator eliminator (ad); ++ ++ /* Allow a memory OP if it matches CONSTRAINT, even if CONSTRAINT is more ++ forgiving than "m". */ ++ if (MEM_P (op) ++ && (insn_extra_memory_constraint (constraint) ++ || insn_extra_special_memory_constraint (constraint)) ++ && constraint_satisfied_p (op, constraint)) ++ return true; ++ + return valid_address_p (ad->mode, *ad->outer, ad->as); + } + +@@ -1888,8 +1902,7 @@ prohibited_class_reg_set_mode_p (enum reg_class rclass, + HARD_REG_SET temp; + + lra_assert (hard_reg_set_subset_p (reg_class_contents[rclass], set)); +- COPY_HARD_REG_SET (temp, set); +- AND_COMPL_HARD_REG_SET (temp, lra_no_alloc_regs); ++ temp = set & ~lra_no_alloc_regs; + return (hard_reg_set_subset_p + (temp, ira_prohibited_class_mode_regs[rclass][mode])); + } +@@ -1900,11 +1913,12 @@ prohibited_class_reg_set_mode_p (enum reg_class rclass, + alternative. */ + static unsigned int curr_small_class_check = 0; + +-/* Update number of used inputs of class OP_CLASS for operand NOP. +- Return true if we have more such class operands than the number of +- available regs. */ ++/* Update number of used inputs of class OP_CLASS for operand NOP ++ of alternative NALT. Return true if we have more such class operands ++ than the number of available regs. */ + static bool +-update_and_check_small_class_inputs (int nop, enum reg_class op_class) ++update_and_check_small_class_inputs (int nop, int nalt, ++ enum reg_class op_class) + { + static unsigned int small_class_check[LIM_REG_CLASSES]; + static int small_class_input_nums[LIM_REG_CLASSES]; +@@ -1915,7 +1929,7 @@ update_and_check_small_class_inputs (int nop, enum reg_class op_class) + && hard_reg_set_intersect_p (reg_class_contents[op_class], + ira_no_alloc_regs) + && (curr_static_id->operand[nop].type != OP_OUT +- || curr_static_id->operand[nop].early_clobber)) ++ || TEST_BIT (curr_static_id->operand[nop].early_clobber_alts, nalt))) + { + if (small_class_check[op_class] == curr_small_class_check) + small_class_input_nums[op_class]++; +@@ -2184,7 +2198,8 @@ process_alt_operands (int only_alternative) + /* We should reject matching of an early + clobber operand if the matching operand is + not dying in the insn. */ +- if (! curr_static_id->operand[m].early_clobber ++ if (!TEST_BIT (curr_static_id->operand[m] ++ .early_clobber_alts, nalt) + || operand_reg[nop] == NULL_RTX + || (find_regno_note (curr_insn, REG_DEAD, + REGNO (op)) +@@ -2251,7 +2266,8 @@ process_alt_operands (int only_alternative) + it results in less hard regs required for + the insn than a non-matching earlyclobber + alternative. */ +- if (curr_static_id->operand[m].early_clobber) ++ if (TEST_BIT (curr_static_id->operand[m] ++ .early_clobber_alts, nalt)) + { + if (lra_dump_file != NULL) + fprintf +@@ -2302,7 +2318,7 @@ process_alt_operands (int only_alternative) + reloads. */ + badop = false; + this_alternative = curr_alt[m]; +- COPY_HARD_REG_SET (this_alternative_set, curr_alt_set[m]); ++ this_alternative_set = curr_alt_set[m]; + winreg = this_alternative != NO_REGS; + break; + } +@@ -2387,14 +2403,12 @@ process_alt_operands (int only_alternative) + if (mode == BLKmode) + break; + this_alternative = reg_class_subunion[this_alternative][cl]; +- IOR_HARD_REG_SET (this_alternative_set, +- reg_class_contents[cl]); ++ this_alternative_set |= reg_class_contents[cl]; + if (costly_p) + { + this_costly_alternative + = reg_class_subunion[this_costly_alternative][cl]; +- IOR_HARD_REG_SET (this_costly_alternative_set, +- reg_class_contents[cl]); ++ this_costly_alternative_set |= reg_class_contents[cl]; + } + winreg = true; + if (REG_P (op)) +@@ -2529,14 +2543,11 @@ process_alt_operands (int only_alternative) + + if (this_alternative != NO_REGS) + { +- HARD_REG_SET available_regs; +- +- COPY_HARD_REG_SET (available_regs, +- reg_class_contents[this_alternative]); +- AND_COMPL_HARD_REG_SET +- (available_regs, +- ira_prohibited_class_mode_regs[this_alternative][mode]); +- AND_COMPL_HARD_REG_SET (available_regs, lra_no_alloc_regs); ++ HARD_REG_SET available_regs ++ = (reg_class_contents[this_alternative] ++ & ~((ira_prohibited_class_mode_regs ++ [this_alternative][mode]) ++ | lra_no_alloc_regs)); + if (hard_reg_set_empty_p (available_regs)) + { + /* There are no hard regs holding a value of given +@@ -2892,7 +2903,8 @@ process_alt_operands (int only_alternative) + goto fail; + } + +- if (update_and_check_small_class_inputs (nop, this_alternative)) ++ if (update_and_check_small_class_inputs (nop, nalt, ++ this_alternative)) + { + if (lra_dump_file != NULL) + fprintf (lra_dump_file, +@@ -2901,7 +2913,7 @@ process_alt_operands (int only_alternative) + goto fail; + } + curr_alt[nop] = this_alternative; +- COPY_HARD_REG_SET (curr_alt_set[nop], this_alternative_set); ++ curr_alt_set[nop] = this_alternative_set; + curr_alt_win[nop] = this_alternative_win; + curr_alt_match_win[nop] = this_alternative_match_win; + curr_alt_offmemok[nop] = this_alternative_offmemok; +@@ -3416,7 +3428,7 @@ process_address_1 (int nop, bool check_only_p, + + All these cases involve a non-autoinc address, so there is no + point revalidating other types. */ +- if (ad.autoinc_p || valid_address_p (&ad)) ++ if (ad.autoinc_p || valid_address_p (op, &ad, cn)) + return change_p; + + /* Any index existed before LRA started, so we can assume that the +@@ -3445,7 +3457,7 @@ process_address_1 (int nop, bool check_only_p, + if (code >= 0) + { + *ad.inner = gen_rtx_LO_SUM (Pmode, new_reg, addr); +- if (! valid_address_p (ad.mode, *ad.outer, ad.as)) ++ if (!valid_address_p (op, &ad, cn)) + { + /* Try to put lo_sum into register. */ + insn = emit_insn (gen_rtx_SET +@@ -3455,7 +3467,7 @@ process_address_1 (int nop, bool check_only_p, + if (code >= 0) + { + *ad.inner = new_reg; +- if (! valid_address_p (ad.mode, *ad.outer, ad.as)) ++ if (!valid_address_p (op, &ad, cn)) + { + *ad.inner = addr; + code = -1; +@@ -3550,7 +3562,7 @@ process_address_1 (int nop, bool check_only_p, + && CONSTANT_P (XEXP (SET_SRC (set), 1))) + { + *ad.inner = SET_SRC (set); +- if (valid_address_p (ad.mode, *ad.outer, ad.as)) ++ if (valid_address_p (op, &ad, cn)) + { + *ad.base_term = XEXP (SET_SRC (set), 0); + *ad.disp_term = XEXP (SET_SRC (set), 1); +@@ -4573,7 +4585,7 @@ contains_reg_p (rtx x, bool hard_reg_p, bool spilled_p) + regno = lra_get_regno_hard_regno (regno); + if (regno < 0) + return false; +- COMPL_HARD_REG_SET (alloc_regs, lra_no_alloc_regs); ++ alloc_regs = ~lra_no_alloc_regs; + return overlaps_hard_reg_set_p (alloc_regs, GET_MODE (x), regno); + } + else +@@ -5165,6 +5177,14 @@ static int reloads_num; + /* Number of calls passed so far in current EBB. */ + static int calls_num; + ++/* Index ID is the CALLS_NUM associated the last call we saw with ++ ABI identifier ID. */ ++static int last_call_for_abi[NUM_ABI_IDS]; ++ ++/* Which registers have been fully or partially clobbered by a call ++ since they were last used. */ ++static HARD_REG_SET full_and_partial_call_clobbers; ++ + /* Current reload pseudo check for validity of elements in + USAGE_INSNS. */ + static int curr_usage_insns_check; +@@ -5208,6 +5228,10 @@ setup_next_usage_insn (int regno, rtx insn, int reloads_num, bool after_p) + usage_insns[regno].reloads_num = reloads_num; + usage_insns[regno].calls_num = calls_num; + usage_insns[regno].after_p = after_p; ++ if (regno >= FIRST_PSEUDO_REGISTER && reg_renumber[regno] >= 0) ++ remove_from_hard_reg_set (&full_and_partial_call_clobbers, ++ PSEUDO_REGNO_MODE (regno), ++ reg_renumber[regno]); + } + + /* The function is used to form list REGNO usages which consists of +@@ -5453,16 +5477,19 @@ static inline bool + need_for_call_save_p (int regno) + { + lra_assert (regno >= FIRST_PSEUDO_REGISTER && reg_renumber[regno] >= 0); +- return (usage_insns[regno].calls_num < calls_num +- && (overlaps_hard_reg_set_p +- ((flag_ipa_ra && +- ! hard_reg_set_empty_p (lra_reg_info[regno].actual_call_used_reg_set)) +- ? lra_reg_info[regno].actual_call_used_reg_set +- : call_used_reg_set, +- PSEUDO_REGNO_MODE (regno), reg_renumber[regno]) +- || (targetm.hard_regno_call_part_clobbered +- (lra_reg_info[regno].call_insn, +- reg_renumber[regno], PSEUDO_REGNO_MODE (regno))))); ++ if (usage_insns[regno].calls_num < calls_num) ++ { ++ unsigned int abis = 0; ++ for (unsigned int i = 0; i < NUM_ABI_IDS; ++i) ++ if (last_call_for_abi[i] > usage_insns[regno].calls_num) ++ abis |= 1 << i; ++ gcc_assert (abis); ++ if (call_clobbered_in_region_p (abis, full_and_partial_call_clobbers, ++ PSEUDO_REGNO_MODE (regno), ++ reg_renumber[regno])) ++ return true; ++ } ++ return false; + } + + /* Global registers occurring in the current EBB. */ +@@ -5502,8 +5529,7 @@ need_for_split_p (HARD_REG_SET potential_reload_hard_regs, int regno) + true) the assign pass assumes that all pseudos living + through calls are assigned to call saved hard regs. */ + && (regno >= FIRST_PSEUDO_REGISTER +- || ! TEST_HARD_REG_BIT (call_used_reg_set, regno) +- || usage_insns[regno].calls_num == calls_num) ++ || !TEST_HARD_REG_BIT (full_and_partial_call_clobbers, regno)) + /* We need at least 2 reloads to make pseudo splitting + profitable. We should provide hard regno splitting in + any case to solve 1st insn scheduling problem when +@@ -6255,12 +6281,14 @@ inherit_in_ebb (rtx_insn *head, rtx_insn *tail) + curr_usage_insns_check++; + clear_invariants (); + reloads_num = calls_num = 0; ++ for (unsigned int i = 0; i < NUM_ABI_IDS; ++i) ++ last_call_for_abi[i] = 0; ++ CLEAR_HARD_REG_SET (full_and_partial_call_clobbers); + bitmap_clear (&check_only_regs); + bitmap_clear (&invalid_invariant_regs); + last_processed_bb = NULL; + CLEAR_HARD_REG_SET (potential_reload_hard_regs); +- COPY_HARD_REG_SET (live_hard_regs, eliminable_regset); +- IOR_HARD_REG_SET (live_hard_regs, lra_no_alloc_regs); ++ live_hard_regs = eliminable_regset | lra_no_alloc_regs; + /* We don't process new insns generated in the loop. */ + for (curr_insn = tail; curr_insn != PREV_INSN (head); curr_insn = prev_insn) + { +@@ -6330,8 +6358,7 @@ inherit_in_ebb (rtx_insn *head, rtx_insn *tail) + else + setup_next_usage_insn (src_regno, curr_insn, reloads_num, false); + if (hard_reg_set_subset_p (reg_class_contents[cl], live_hard_regs)) +- IOR_HARD_REG_SET (potential_reload_hard_regs, +- reg_class_contents[cl]); ++ potential_reload_hard_regs |= reg_class_contents[cl]; + } + else if (src_regno < 0 + && dst_regno >= lra_constraint_new_regno_start +@@ -6348,8 +6375,7 @@ inherit_in_ebb (rtx_insn *head, rtx_insn *tail) + if (process_invariant_for_inheritance (SET_DEST (curr_set), SET_SRC (curr_set))) + change_p = true; + if (hard_reg_set_subset_p (reg_class_contents[cl], live_hard_regs)) +- IOR_HARD_REG_SET (potential_reload_hard_regs, +- reg_class_contents[cl]); ++ potential_reload_hard_regs |= reg_class_contents[cl]; + } + else if (src_regno >= lra_constraint_new_regno_start + && dst_regno < lra_constraint_new_regno_start +@@ -6371,8 +6397,7 @@ inherit_in_ebb (rtx_insn *head, rtx_insn *tail) + /* Invalidate. */ + usage_insns[dst_regno].check = 0; + if (hard_reg_set_subset_p (reg_class_contents[cl], live_hard_regs)) +- IOR_HARD_REG_SET (potential_reload_hard_regs, +- reg_class_contents[cl]); ++ potential_reload_hard_regs |= reg_class_contents[cl]; + } + else if (INSN_P (curr_insn)) + { +@@ -6427,8 +6452,8 @@ inherit_in_ebb (rtx_insn *head, rtx_insn *tail) + else + add_to_hard_reg_set (&s, PSEUDO_REGNO_MODE (dst_regno), + reg_renumber[dst_regno]); +- AND_COMPL_HARD_REG_SET (live_hard_regs, s); +- AND_COMPL_HARD_REG_SET (potential_reload_hard_regs, s); ++ live_hard_regs &= ~s; ++ potential_reload_hard_regs &= ~s; + } + /* We should invalidate potential inheritance or + splitting for the current insn usages to the next +@@ -6472,6 +6497,10 @@ inherit_in_ebb (rtx_insn *head, rtx_insn *tail) + int regno, hard_regno; + + calls_num++; ++ function_abi callee_abi = insn_callee_abi (curr_insn); ++ last_call_for_abi[callee_abi.id ()] = calls_num; ++ full_and_partial_call_clobbers ++ |= callee_abi.full_and_partial_reg_clobbers (); + if ((cheap = find_reg_note (curr_insn, + REG_RETURNED, NULL_RTX)) != NULL_RTX + && ((cheap = XEXP (cheap, 0)), true) +@@ -6481,7 +6510,7 @@ inherit_in_ebb (rtx_insn *head, rtx_insn *tail) + /* If there are pending saves/restores, the + optimization is not worth. */ + && usage_insns[regno].calls_num == calls_num - 1 +- && TEST_HARD_REG_BIT (call_used_reg_set, hard_regno)) ++ && callee_abi.clobbers_reg_p (GET_MODE (cheap), hard_regno)) + { + /* Restore the pseudo from the call result as + REG_RETURNED note says that the pseudo value is +@@ -6504,6 +6533,9 @@ inherit_in_ebb (rtx_insn *head, rtx_insn *tail) + /* We don't need to save/restore of the pseudo from + this call. */ + usage_insns[regno].calls_num = calls_num; ++ remove_from_hard_reg_set ++ (&full_and_partial_call_clobbers, ++ GET_MODE (cheap), hard_regno); + bitmap_set_bit (&check_only_regs, regno); + } + } +@@ -6607,8 +6639,7 @@ inherit_in_ebb (rtx_insn *head, rtx_insn *tail) + if (ira_class_hard_regs_num[cl] <= max_small_class_regs_num) + reloads_num++; + if (hard_reg_set_subset_p (reg_class_contents[cl], live_hard_regs)) +- IOR_HARD_REG_SET (potential_reload_hard_regs, +- reg_class_contents[cl]); ++ potential_reload_hard_regs |= reg_class_contents[cl]; + } + } + if (NONDEBUG_INSN_P (curr_insn)) +diff --git a/gcc/lra-eliminations.c b/gcc/lra-eliminations.c +index 7a345a52a..9568c13cb 100644 +--- a/gcc/lra-eliminations.c ++++ b/gcc/lra-eliminations.c +@@ -654,7 +654,6 @@ lra_eliminate_regs_1 (rtx_insn *insn, rtx x, machine_mode mem_mode, + return x; + + case CLOBBER: +- case CLOBBER_HIGH: + case SET: + gcc_unreachable (); + +@@ -807,16 +806,6 @@ mark_not_eliminable (rtx x, machine_mode mem_mode) + setup_can_eliminate (ep, false); + return; + +- case CLOBBER_HIGH: +- gcc_assert (REG_P (XEXP (x, 0))); +- gcc_assert (REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER); +- for (ep = reg_eliminate; +- ep < ®_eliminate[NUM_ELIMINABLE_REGS]; +- ep++) +- if (reg_is_clobbered_by_clobber_high (ep->to_rtx, XEXP (x, 0))) +- setup_can_eliminate (ep, false); +- return; +- + case SET: + if (SET_DEST (x) == stack_pointer_rtx + && GET_CODE (SET_SRC (x)) == PLUS +@@ -1180,7 +1169,7 @@ spill_pseudos (HARD_REG_SET set) + reg_renumber[i] = -1; + bitmap_ior_into (&to_process, &lra_reg_info[i].insn_bitmap); + } +- IOR_HARD_REG_SET (lra_no_alloc_regs, set); ++ lra_no_alloc_regs |= set; + for (insn = get_insns (); insn != NULL_RTX; insn = NEXT_INSN (insn)) + if (bitmap_bit_p (&to_process, INSN_UID (insn))) + { +@@ -1293,8 +1282,8 @@ update_reg_eliminate (bitmap insns_with_changed_offsets) + result = true; + } + } +- IOR_HARD_REG_SET (lra_no_alloc_regs, temp_hard_reg_set); +- AND_COMPL_HARD_REG_SET (eliminable_regset, temp_hard_reg_set); ++ lra_no_alloc_regs |= temp_hard_reg_set; ++ eliminable_regset &= ~temp_hard_reg_set; + spill_pseudos (temp_hard_reg_set); + return result; + } +diff --git a/gcc/lra-int.h b/gcc/lra-int.h +index 253ae1e6c..5671e2e65 100644 +--- a/gcc/lra-int.h ++++ b/gcc/lra-int.h +@@ -72,10 +72,6 @@ struct lra_reg + /* The following fields are defined only for pseudos. */ + /* Hard registers with which the pseudo conflicts. */ + HARD_REG_SET conflict_hard_regs; +- /* Call used registers with which the pseudo conflicts, taking into account +- the registers used by functions called from calls which cross the +- pseudo. */ +- HARD_REG_SET actual_call_used_reg_set; + /* We assign hard registers to reload pseudos which can occur in few + places. So two hard register preferences are enough for them. + The following fields define the preferred hard registers. If +@@ -103,8 +99,6 @@ struct lra_reg + int val; + /* Offset from relative eliminate register to pesudo reg. */ + poly_int64 offset; +- /* Call instruction, if any, that may affect this psuedo reg. */ +- rtx_insn *call_insn; + /* These members are set up in lra-lives.c and updated in + lra-coalesce.c. */ + /* The biggest size mode in which each pseudo reg is referred in +@@ -141,10 +135,6 @@ struct lra_operand_data + unsigned int strict_low : 1; + /* True if the operand is an operator. */ + unsigned int is_operator : 1; +- /* True if there is an early clobber alternative for this operand. +- This field is set up every time when corresponding +- operand_alternative in lra_static_insn_data is set up. */ +- unsigned int early_clobber : 1; + /* True if the operand is an address. */ + unsigned int is_address : 1; + }; +@@ -163,11 +153,6 @@ struct lra_insn_reg + /* True if the reg is accessed through a subreg and the subreg is + just a part of the register. */ + unsigned int subreg_p : 1; +- /* True if there is an early clobber alternative for this +- operand. */ +- unsigned int early_clobber : 1; +- /* True if the reg is clobber highed by the operand. */ +- unsigned int clobber_high : 1; + /* The corresponding regno of the register. */ + int regno; + /* Next reg info of the same insn. */ +diff --git a/gcc/lra-lives.c b/gcc/lra-lives.c +index 55b2adc2a..bce123d73 100644 +--- a/gcc/lra-lives.c ++++ b/gcc/lra-lives.c +@@ -43,6 +43,7 @@ along with GCC; see the file COPYING3. If not see + #include "sparseset.h" + #include "lra-int.h" + #include "target.h" ++#include "function-abi.h" + + /* Program points are enumerated by numbers from range + 0..LRA_LIVE_MAX_POINT-1. There are approximately two times more +@@ -327,7 +328,7 @@ static void + mark_pseudo_dead (int regno) + { + lra_assert (!HARD_REGISTER_NUM_P (regno)); +- IOR_HARD_REG_SET (lra_reg_info[regno].conflict_hard_regs, hard_regs_live); ++ lra_reg_info[regno].conflict_hard_regs |= hard_regs_live; + if (!sparseset_bit_p (pseudos_live, regno)) + return; + +@@ -574,41 +575,21 @@ lra_setup_reload_pseudo_preferenced_hard_reg (int regno, + } + } + +-/* Check that REGNO living through calls and setjumps, set up conflict +- regs using LAST_CALL_USED_REG_SET, and clear corresponding bits in +- PSEUDOS_LIVE_THROUGH_CALLS and PSEUDOS_LIVE_THROUGH_SETJUMPS. +- CALL_INSN is a call that is representative of all calls in the region +- described by the PSEUDOS_LIVE_THROUGH_* sets, in terms of the registers +- that it preserves and clobbers. */ ++/* Check whether REGNO lives through calls and setjmps and clear ++ the corresponding bits in PSEUDOS_LIVE_THROUGH_CALLS and ++ PSEUDOS_LIVE_THROUGH_SETJUMPS. All calls in the region described ++ by PSEUDOS_LIVE_THROUGH_CALLS have the given ABI. */ + + static inline void +-check_pseudos_live_through_calls (int regno, +- HARD_REG_SET last_call_used_reg_set, +- rtx_insn *call_insn) ++check_pseudos_live_through_calls (int regno, const function_abi &abi) + { +- int hr; +- rtx_insn *old_call_insn; +- + if (! sparseset_bit_p (pseudos_live_through_calls, regno)) + return; + +- gcc_assert (call_insn && CALL_P (call_insn)); +- old_call_insn = lra_reg_info[regno].call_insn; +- if (!old_call_insn +- || (targetm.return_call_with_max_clobbers +- && targetm.return_call_with_max_clobbers (old_call_insn, call_insn) +- == call_insn)) +- lra_reg_info[regno].call_insn = call_insn; ++ machine_mode mode = PSEUDO_REGNO_MODE (regno); + + sparseset_clear_bit (pseudos_live_through_calls, regno); +- IOR_HARD_REG_SET (lra_reg_info[regno].conflict_hard_regs, +- last_call_used_reg_set); +- +- for (hr = 0; HARD_REGISTER_NUM_P (hr); hr++) +- if (targetm.hard_regno_call_part_clobbered (call_insn, hr, +- PSEUDO_REGNO_MODE (regno))) +- add_to_hard_reg_set (&lra_reg_info[regno].conflict_hard_regs, +- PSEUDO_REGNO_MODE (regno), hr); ++ lra_reg_info[regno].conflict_hard_regs |= abi.mode_clobbers (mode); + if (! sparseset_bit_p (pseudos_live_through_setjumps, regno)) + return; + sparseset_clear_bit (pseudos_live_through_setjumps, regno); +@@ -623,23 +604,10 @@ check_pseudos_live_through_calls (int regno, + static inline bool + reg_early_clobber_p (const struct lra_insn_reg *reg, int n_alt) + { +- return (reg->early_clobber +- && (n_alt == LRA_UNKNOWN_ALT +- || (n_alt != LRA_NON_CLOBBERED_ALT +- && TEST_BIT (reg->early_clobber_alts, n_alt)))); +-} +- +-/* Return true if call instructions CALL1 and CALL2 use ABIs that +- preserve the same set of registers. */ +- +-static bool +-calls_have_same_clobbers_p (rtx_insn *call1, rtx_insn *call2) +-{ +- if (!targetm.return_call_with_max_clobbers) +- return false; +- +- return (targetm.return_call_with_max_clobbers (call1, call2) == call1 +- && targetm.return_call_with_max_clobbers (call2, call1) == call2); ++ return (n_alt == LRA_UNKNOWN_ALT ++ ? reg->early_clobber_alts != 0 ++ : (n_alt != LRA_NON_CLOBBERED_ALT ++ && TEST_BIT (reg->early_clobber_alts, n_alt))); + } + + /* Process insns of the basic block BB to update pseudo live ranges, +@@ -661,17 +629,15 @@ process_bb_lives (basic_block bb, int &curr_point, bool dead_insn_p) + rtx_insn *next; + rtx link, *link_loc; + bool need_curr_point_incr; +- HARD_REG_SET last_call_used_reg_set; +- rtx_insn *call_insn = NULL; +- rtx_insn *last_call_insn = NULL; ++ /* Only has a meaningful value once we've seen a call. */ ++ function_abi last_call_abi = default_function_abi; + + reg_live_out = df_get_live_out (bb); + sparseset_clear (pseudos_live); + sparseset_clear (pseudos_live_through_calls); + sparseset_clear (pseudos_live_through_setjumps); +- CLEAR_HARD_REG_SET (last_call_used_reg_set); + REG_SET_TO_HARD_REG_SET (hard_regs_live, reg_live_out); +- AND_COMPL_HARD_REG_SET (hard_regs_live, eliminable_regset); ++ hard_regs_live &= ~eliminable_regset; + EXECUTE_IF_SET_IN_BITMAP (reg_live_out, FIRST_PSEUDO_REGISTER, j, bi) + { + update_pseudo_point (j, curr_point, USE_POINT); +@@ -701,7 +667,7 @@ process_bb_lives (basic_block bb, int &curr_point, bool dead_insn_p) + bool call_p; + int n_alt, dst_regno, src_regno; + rtx set; +- struct lra_insn_reg *reg, *hr; ++ struct lra_insn_reg *reg; + + if (!NONDEBUG_INSN_P (curr_insn)) + continue; +@@ -733,7 +699,7 @@ process_bb_lives (basic_block bb, int &curr_point, bool dead_insn_p) + break; + } + for (reg = curr_static_id->hard_regs; reg != NULL; reg = reg->next) +- if (reg->type != OP_IN && !reg->clobber_high) ++ if (reg->type != OP_IN) + { + remove_p = false; + break; +@@ -870,24 +836,13 @@ process_bb_lives (basic_block bb, int &curr_point, bool dead_insn_p) + unused values because they still conflict with quantities + that are live at the time of the definition. */ + for (reg = curr_id->regs; reg != NULL; reg = reg->next) +- { +- if (reg->type != OP_IN) +- { +- update_pseudo_point (reg->regno, curr_point, USE_POINT); +- mark_regno_live (reg->regno, reg->biggest_mode); +- check_pseudos_live_through_calls (reg->regno, +- last_call_used_reg_set, +- call_insn); +- } +- +- if (!HARD_REGISTER_NUM_P (reg->regno)) +- for (hr = curr_static_id->hard_regs; hr != NULL; hr = hr->next) +- if (hr->clobber_high +- && maybe_gt (GET_MODE_SIZE (PSEUDO_REGNO_MODE (reg->regno)), +- GET_MODE_SIZE (hr->biggest_mode))) +- SET_HARD_REG_BIT (lra_reg_info[reg->regno].conflict_hard_regs, +- hr->regno); +- } ++ if (reg->type != OP_IN) ++ { ++ update_pseudo_point (reg->regno, curr_point, USE_POINT); ++ mark_regno_live (reg->regno, reg->biggest_mode); ++ /* ??? Should be a no-op for unused registers. */ ++ check_pseudos_live_through_calls (reg->regno, last_call_abi); ++ } + + for (reg = curr_static_id->hard_regs; reg != NULL; reg = reg->next) + if (reg->type != OP_IN) +@@ -926,35 +881,13 @@ process_bb_lives (basic_block bb, int &curr_point, bool dead_insn_p) + + if (call_p) + { +- call_insn = curr_insn; +- if (! flag_ipa_ra && ! targetm.return_call_with_max_clobbers) +- COPY_HARD_REG_SET(last_call_used_reg_set, call_used_reg_set); +- else +- { +- HARD_REG_SET this_call_used_reg_set; +- get_call_reg_set_usage (curr_insn, &this_call_used_reg_set, +- call_used_reg_set); +- +- bool flush = (! hard_reg_set_empty_p (last_call_used_reg_set) +- && ( ! hard_reg_set_equal_p (last_call_used_reg_set, +- this_call_used_reg_set))) +- || (last_call_insn && ! calls_have_same_clobbers_p +- (call_insn, +- last_call_insn)); +- +- EXECUTE_IF_SET_IN_SPARSESET (pseudos_live, j) +- { +- IOR_HARD_REG_SET (lra_reg_info[j].actual_call_used_reg_set, +- this_call_used_reg_set); ++ function_abi call_abi = insn_callee_abi (curr_insn); + +- if (flush) +- check_pseudos_live_through_calls (j, +- last_call_used_reg_set, +- last_call_insn); +- } +- COPY_HARD_REG_SET(last_call_used_reg_set, this_call_used_reg_set); +- last_call_insn = call_insn; +- } ++ if (last_call_abi != call_abi) ++ EXECUTE_IF_SET_IN_SPARSESET (pseudos_live, j) ++ check_pseudos_live_through_calls (j, last_call_abi); ++ ++ last_call_abi = call_abi; + + sparseset_ior (pseudos_live_through_calls, + pseudos_live_through_calls, pseudos_live); +@@ -992,9 +925,7 @@ process_bb_lives (basic_block bb, int &curr_point, bool dead_insn_p) + if (reg->type == OP_IN) + update_pseudo_point (reg->regno, curr_point, USE_POINT); + mark_regno_live (reg->regno, reg->biggest_mode); +- check_pseudos_live_through_calls (reg->regno, +- last_call_used_reg_set, +- call_insn); ++ check_pseudos_live_through_calls (reg->regno, last_call_abi); + } + + for (reg = curr_static_id->hard_regs; reg != NULL; reg = reg->next) +@@ -1088,10 +1019,10 @@ process_bb_lives (basic_block bb, int &curr_point, bool dead_insn_p) + } + + /* Pseudos can't go in stack regs at the start of a basic block that +- is reached by an abnormal edge. Likewise for call clobbered regs, +- because caller-save, fixup_abnormal_edges and possibly the table +- driven EH machinery are not quite ready to handle such pseudos +- live across such edges. */ ++ is reached by an abnormal edge. Likewise for registers that are at ++ least partly call clobbered, because caller-save, fixup_abnormal_edges ++ and possibly the table driven EH machinery are not quite ready to ++ handle such pseudos live across such edges. */ + if (bb_has_abnormal_pred (bb)) + { + #ifdef STACK_REGS +@@ -1106,7 +1037,7 @@ process_bb_lives (basic_block bb, int &curr_point, bool dead_insn_p) + if (!cfun->has_nonlocal_label + && has_abnormal_call_or_eh_pred_edge_p (bb)) + for (px = 0; HARD_REGISTER_NUM_P (px); px++) +- if (call_used_regs[px] ++ if (eh_edge_abi.clobbers_at_least_part_of_reg_p (px) + #ifdef REAL_PIC_OFFSET_TABLE_REGNUM + /* We should create a conflict of PIC pseudo with PIC + hard reg as PIC hard reg can have a wrong value after +@@ -1163,7 +1094,7 @@ process_bb_lives (basic_block bb, int &curr_point, bool dead_insn_p) + if (sparseset_cardinality (pseudos_live_through_calls) == 0) + break; + if (sparseset_bit_p (pseudos_live_through_calls, j)) +- check_pseudos_live_through_calls (j, last_call_used_reg_set, call_insn); ++ check_pseudos_live_through_calls (j, last_call_abi); + } + + for (i = 0; HARD_REGISTER_NUM_P (i); ++i) +@@ -1397,7 +1328,6 @@ lra_create_live_ranges_1 (bool all_p, bool dead_insn_p) + lra_reg_info[i].biggest_mode = GET_MODE (regno_reg_rtx[i]); + else + lra_reg_info[i].biggest_mode = VOIDmode; +- lra_reg_info[i].call_insn = NULL; + if (!HARD_REGISTER_NUM_P (i) + && lra_reg_info[i].nrefs != 0) + { +diff --git a/gcc/lra-remat.c b/gcc/lra-remat.c +index 69209b2a1..914f5e2ce 100644 +--- a/gcc/lra-remat.c ++++ b/gcc/lra-remat.c +@@ -65,16 +65,11 @@ along with GCC; see the file COPYING3. If not see + #include "recog.h" + #include "lra.h" + #include "lra-int.h" ++#include "function-abi.h" + + /* Number of candidates for rematerialization. */ + static unsigned int cands_num; + +-/* The following is used for representation of call_used_reg_set in +- form array whose elements are hard register numbers with nonzero bit +- in CALL_USED_REG_SET. */ +-static int call_used_regs_arr_len; +-static int call_used_regs_arr[FIRST_PSEUDO_REGISTER]; +- + /* Bitmap used for different calculations. */ + static bitmap_head temp_bitmap; + +@@ -632,9 +627,12 @@ set_bb_regs (basic_block bb, rtx_insn *insn) + bitmap_set_bit (&subreg_regs, regno); + } + if (CALL_P (insn)) +- for (int i = 0; i < call_used_regs_arr_len; i++) +- bitmap_set_bit (&get_remat_bb_data (bb)->dead_regs, +- call_used_regs_arr[i]); ++ { ++ /* Partially-clobbered registers might still be live. */ ++ HARD_REG_SET clobbers = insn_callee_abi (insn).full_reg_clobbers (); ++ bitmap_ior_into (&get_remat_bb_data (bb)->dead_regs, ++ bitmap_view (clobbers)); ++ } + } + + /* Calculate changed_regs and dead_regs for each BB. */ +@@ -697,7 +695,7 @@ reg_overlap_for_remat_p (lra_insn_reg *reg, rtx_insn *insn) + + /* Return true if a call used register is an input operand of INSN. */ + static bool +-call_used_input_regno_present_p (rtx_insn *insn) ++call_used_input_regno_present_p (const function_abi &abi, rtx_insn *insn) + { + int iter; + lra_insn_recog_data_t id = lra_get_insn_recog_data (insn); +@@ -708,8 +706,9 @@ call_used_input_regno_present_p (rtx_insn *insn) + for (reg = (iter == 0 ? id->regs : static_id->hard_regs); + reg != NULL; + reg = reg->next) +- if (reg->type == OP_IN && reg->regno < FIRST_PSEUDO_REGISTER +- && TEST_HARD_REG_BIT (call_used_reg_set, reg->regno)) ++ if (reg->type == OP_IN ++ && reg->regno < FIRST_PSEUDO_REGISTER ++ && abi.clobbers_reg_p (reg->biggest_mode, reg->regno)) + return true; + return false; + } +@@ -798,18 +797,21 @@ calculate_gen_cands (void) + } + + if (CALL_P (insn)) +- EXECUTE_IF_SET_IN_BITMAP (gen_insns, 0, uid, bi) +- { +- rtx_insn *insn2 = lra_insn_recog_data[uid]->insn; ++ { ++ function_abi callee_abi = insn_callee_abi (insn); ++ EXECUTE_IF_SET_IN_BITMAP (gen_insns, 0, uid, bi) ++ { ++ rtx_insn *insn2 = lra_insn_recog_data[uid]->insn; + +- cand = insn_to_cand[INSN_UID (insn2)]; +- gcc_assert (cand != NULL); +- if (call_used_input_regno_present_p (insn2)) +- { +- bitmap_clear_bit (gen_cands, cand->index); +- bitmap_set_bit (&temp_bitmap, uid); +- } +- } ++ cand = insn_to_cand[INSN_UID (insn2)]; ++ gcc_assert (cand != NULL); ++ if (call_used_input_regno_present_p (callee_abi, insn2)) ++ { ++ bitmap_clear_bit (gen_cands, cand->index); ++ bitmap_set_bit (&temp_bitmap, uid); ++ } ++ } ++ } + bitmap_and_compl_into (gen_insns, &temp_bitmap); + + cand = insn_to_cand[INSN_UID (insn)]; +@@ -1204,13 +1206,16 @@ do_remat (void) + } + + if (CALL_P (insn)) +- EXECUTE_IF_SET_IN_BITMAP (avail_cands, 0, cid, bi) +- { +- cand = all_cands[cid]; ++ { ++ function_abi callee_abi = insn_callee_abi (insn); ++ EXECUTE_IF_SET_IN_BITMAP (avail_cands, 0, cid, bi) ++ { ++ cand = all_cands[cid]; + +- if (call_used_input_regno_present_p (cand->insn)) +- bitmap_set_bit (&temp_bitmap, cand->index); +- } ++ if (call_used_input_regno_present_p (callee_abi, cand->insn)) ++ bitmap_set_bit (&temp_bitmap, cand->index); ++ } ++ } + + bitmap_and_compl_into (avail_cands, &temp_bitmap); + +@@ -1306,10 +1311,6 @@ lra_remat (void) + insn_to_cand_activation = XCNEWVEC (cand_t, get_max_uid ()); + regno_cands = XCNEWVEC (cand_t, max_regno); + all_cands.create (8000); +- call_used_regs_arr_len = 0; +- for (int i = 0; i < FIRST_PSEUDO_REGISTER; i++) +- if (call_used_regs[i]) +- call_used_regs_arr[call_used_regs_arr_len++] = i; + initiate_cand_table (); + create_remat_bb_data (); + bitmap_initialize (&temp_bitmap, ®_obstack); +diff --git a/gcc/lra-spills.c b/gcc/lra-spills.c +index c0f61c119..d4163eb75 100644 +--- a/gcc/lra-spills.c ++++ b/gcc/lra-spills.c +@@ -242,7 +242,7 @@ assign_spill_hard_regs (int *pseudo_regnos, int n) + /* Set up reserved hard regs for every program point. */ + reserved_hard_regs = XNEWVEC (HARD_REG_SET, lra_live_max_point); + for (p = 0; p < lra_live_max_point; p++) +- COPY_HARD_REG_SET (reserved_hard_regs[p], lra_no_alloc_regs); ++ reserved_hard_regs[p] = lra_no_alloc_regs; + for (i = FIRST_PSEUDO_REGISTER; i < regs_num; i++) + if (lra_reg_info[i].nrefs != 0 + && (hard_regno = lra_get_regno_hard_regno (i)) >= 0) +@@ -273,11 +273,10 @@ assign_spill_hard_regs (int *pseudo_regnos, int n) + continue; + } + lra_assert (spill_class != NO_REGS); +- COPY_HARD_REG_SET (conflict_hard_regs, +- lra_reg_info[regno].conflict_hard_regs); ++ conflict_hard_regs = lra_reg_info[regno].conflict_hard_regs; + for (r = lra_reg_info[regno].live_ranges; r != NULL; r = r->next) + for (p = r->start; p <= r->finish; p++) +- IOR_HARD_REG_SET (conflict_hard_regs, reserved_hard_regs[p]); ++ conflict_hard_regs |= reserved_hard_regs[p]; + spill_class_size = ira_class_hard_regs_num[spill_class]; + mode = lra_reg_info[regno].biggest_mode; + for (k = 0; k < spill_class_size; k++) +diff --git a/gcc/lra.c b/gcc/lra.c +index 10b85340f..db2f82fb1 100644 +--- a/gcc/lra.c ++++ b/gcc/lra.c +@@ -121,6 +121,7 @@ along with GCC; see the file COPYING3. If not see + #include "lra.h" + #include "lra-int.h" + #include "print-rtl.h" ++#include "function-abi.h" + + /* Dump bitmap SET with TITLE and BB INDEX. */ + void +@@ -536,18 +537,15 @@ object_allocator lra_insn_reg_pool ("insn regs"); + + /* Create LRA insn related info about a reference to REGNO in INSN + with TYPE (in/out/inout), biggest reference mode MODE, flag that it +- is reference through subreg (SUBREG_P), flag that is early +- clobbered in the insn (EARLY_CLOBBER), and reference to the next ++ is reference through subreg (SUBREG_P), and reference to the next + insn reg info (NEXT). If REGNO can be early clobbered, + alternatives in which it can be early clobbered are given by +- EARLY_CLOBBER_ALTS. CLOBBER_HIGH marks if reference is a clobber +- high. */ ++ EARLY_CLOBBER_ALTS. */ + static struct lra_insn_reg * + new_insn_reg (rtx_insn *insn, int regno, enum op_type type, +- machine_mode mode, +- bool subreg_p, bool early_clobber, ++ machine_mode mode, bool subreg_p, + alternative_mask early_clobber_alts, +- struct lra_insn_reg *next, bool clobber_high) ++ struct lra_insn_reg *next) + { + lra_insn_reg *ir = lra_insn_reg_pool.allocate (); + ir->type = type; +@@ -556,9 +554,7 @@ new_insn_reg (rtx_insn *insn, int regno, enum op_type type, + && partial_subreg_p (lra_reg_info[regno].biggest_mode, mode)) + lra_reg_info[regno].biggest_mode = mode; + ir->subreg_p = subreg_p; +- ir->early_clobber = early_clobber; + ir->early_clobber_alts = early_clobber_alts; +- ir->clobber_high = clobber_high; + ir->regno = regno; + ir->next = next; + return ir; +@@ -605,7 +601,7 @@ static struct lra_operand_data debug_operand_data = + 0, /* early_clobber_alts */ + E_VOIDmode, /* We are not interesting in the operand mode. */ + OP_IN, +- 0, 0, 0, 0 ++ 0, 0, 0 + }; + + /* The following data are used as static insn data for all debug +@@ -801,7 +797,6 @@ setup_operand_alternative (lra_insn_recog_data_t data, + for (i = 0; i < nop; i++) + { + static_data->operand[i].early_clobber_alts = 0; +- static_data->operand[i].early_clobber = false; + static_data->operand[i].is_address = false; + if (static_data->operand[i].constraint[0] == '%') + { +@@ -817,7 +812,6 @@ setup_operand_alternative (lra_insn_recog_data_t data, + for (j = 0; j < nalt; j++) + for (i = 0; i < nop; i++, op_alt++) + { +- static_data->operand[i].early_clobber |= op_alt->earlyclobber; + if (op_alt->earlyclobber) + static_data->operand[i].early_clobber_alts |= (alternative_mask) 1 << j; + static_data->operand[i].is_address |= op_alt->is_address; +@@ -828,13 +822,12 @@ setup_operand_alternative (lra_insn_recog_data_t data, + not the insn operands, in X with TYPE (in/out/inout) and flag that + it is early clobbered in the insn (EARLY_CLOBBER) and add the info + to LIST. X is a part of insn given by DATA. Return the result +- list. CLOBBER_HIGH marks if X is a clobber high. */ ++ list. */ + static struct lra_insn_reg * + collect_non_operand_hard_regs (rtx_insn *insn, rtx *x, + lra_insn_recog_data_t data, + struct lra_insn_reg *list, +- enum op_type type, bool early_clobber, +- bool clobber_high) ++ enum op_type type, bool early_clobber) + { + int i, j, regno, last; + bool subreg_p; +@@ -878,10 +871,7 @@ collect_non_operand_hard_regs (rtx_insn *insn, rtx *x, + if (curr->type != type) + curr->type = OP_INOUT; + if (early_clobber) +- { +- curr->early_clobber = true; +- curr->early_clobber_alts = ALL_ALTERNATIVES; +- } ++ curr->early_clobber_alts = ALL_ALTERNATIVES; + break; + } + if (curr == NULL) +@@ -897,9 +887,7 @@ collect_non_operand_hard_regs (rtx_insn *insn, rtx *x, + && regno <= LAST_STACK_REG)); + #endif + list = new_insn_reg (data->insn, regno, type, mode, subreg_p, +- early_clobber, +- early_clobber ? ALL_ALTERNATIVES : 0, list, +- clobber_high); ++ early_clobber ? ALL_ALTERNATIVES : 0, list); + } + } + return list; +@@ -908,31 +896,24 @@ collect_non_operand_hard_regs (rtx_insn *insn, rtx *x, + { + case SET: + list = collect_non_operand_hard_regs (insn, &SET_DEST (op), data, +- list, OP_OUT, false, false); ++ list, OP_OUT, false); + list = collect_non_operand_hard_regs (insn, &SET_SRC (op), data, +- list, OP_IN, false, false); ++ list, OP_IN, false); + break; + case CLOBBER: + /* We treat clobber of non-operand hard registers as early clobber. */ + list = collect_non_operand_hard_regs (insn, &XEXP (op, 0), data, +- list, OP_OUT, true, false); +- break; +- case CLOBBER_HIGH: +- /* Clobber high should always span exactly one register. */ +- gcc_assert (REG_NREGS (XEXP (op, 0)) == 1); +- /* We treat clobber of non-operand hard registers as early clobber. */ +- list = collect_non_operand_hard_regs (insn, &XEXP (op, 0), data, +- list, OP_OUT, true, true); ++ list, OP_OUT, true); + break; + case PRE_INC: case PRE_DEC: case POST_INC: case POST_DEC: + list = collect_non_operand_hard_regs (insn, &XEXP (op, 0), data, +- list, OP_INOUT, false, false); ++ list, OP_INOUT, false); + break; + case PRE_MODIFY: case POST_MODIFY: + list = collect_non_operand_hard_regs (insn, &XEXP (op, 0), data, +- list, OP_INOUT, false, false); ++ list, OP_INOUT, false); + list = collect_non_operand_hard_regs (insn, &XEXP (op, 1), data, +- list, OP_IN, false, false); ++ list, OP_IN, false); + break; + default: + fmt = GET_RTX_FORMAT (code); +@@ -940,12 +921,11 @@ collect_non_operand_hard_regs (rtx_insn *insn, rtx *x, + { + if (fmt[i] == 'e') + list = collect_non_operand_hard_regs (insn, &XEXP (op, i), data, +- list, OP_IN, false, false); ++ list, OP_IN, false); + else if (fmt[i] == 'E') + for (j = XVECLEN (op, i) - 1; j >= 0; j--) + list = collect_non_operand_hard_regs (insn, &XVECEXP (op, i, j), +- data, list, OP_IN, false, +- false); ++ data, list, OP_IN, false); + } + } + return list; +@@ -1094,7 +1074,7 @@ lra_set_insn_recog_data (rtx_insn *insn) + else + insn_static_data->hard_regs + = collect_non_operand_hard_regs (insn, &PATTERN (insn), data, +- NULL, OP_IN, false, false); ++ NULL, OP_IN, false); + data->arg_hard_regs = NULL; + if (CALL_P (insn)) + { +@@ -1120,10 +1100,6 @@ lra_set_insn_recog_data (rtx_insn *insn) + arg_hard_regs[n_hard_regs++] + = regno + i + (use_p ? 0 : FIRST_PSEUDO_REGISTER); + } +- else if (GET_CODE (XEXP (link, 0)) == CLOBBER_HIGH) +- /* We could support CLOBBER_HIGH and treat it in the same way as +- HARD_REGNO_CALL_PART_CLOBBERED, but no port needs that yet. */ +- gcc_unreachable (); + + if (n_hard_regs != 0) + { +@@ -1332,7 +1308,6 @@ initialize_lra_reg_info_element (int i) + lra_reg_info[i].no_stack_p = false; + #endif + CLEAR_HARD_REG_SET (lra_reg_info[i].conflict_hard_regs); +- CLEAR_HARD_REG_SET (lra_reg_info[i].actual_call_used_reg_set); + lra_reg_info[i].preferred_hard_regno1 = -1; + lra_reg_info[i].preferred_hard_regno2 = -1; + lra_reg_info[i].preferred_hard_regno_profit1 = 0; +@@ -1345,7 +1320,6 @@ initialize_lra_reg_info_element (int i) + lra_reg_info[i].val = get_new_reg_value (); + lra_reg_info[i].offset = 0; + lra_reg_info[i].copies = NULL; +- lra_reg_info[i].call_insn = NULL; + } + + /* Initialize common reg info and copies. */ +@@ -1449,15 +1423,13 @@ lra_get_copy (int n) + /* This page contains code dealing with info about registers in + insns. */ + +-/* Process X of INSN recursively and add info (operand type is +- given by TYPE, flag of that it is early clobber is EARLY_CLOBBER) +- about registers in X to the insn DATA. If X can be early clobbered, +- alternatives in which it can be early clobbered are given by +- EARLY_CLOBBER_ALTS. */ ++/* Process X of INSN recursively and add info (operand type is given ++ by TYPE) about registers in X to the insn DATA. If X can be early ++ clobbered, alternatives in which it can be early clobbered are given ++ by EARLY_CLOBBER_ALTS. */ + static void + add_regs_to_insn_regno_info (lra_insn_recog_data_t data, rtx x, +- rtx_insn *insn, +- enum op_type type, bool early_clobber, ++ rtx_insn *insn, enum op_type type, + alternative_mask early_clobber_alts) + { + int i, j, regno; +@@ -1487,8 +1459,7 @@ add_regs_to_insn_regno_info (lra_insn_recog_data_t data, rtx x, + if (bitmap_set_bit (&lra_reg_info[regno].insn_bitmap, INSN_UID (insn))) + { + data->regs = new_insn_reg (data->insn, regno, type, mode, subreg_p, +- early_clobber, early_clobber_alts, +- data->regs, false); ++ early_clobber_alts, data->regs); + return; + } + else +@@ -1500,15 +1471,12 @@ add_regs_to_insn_regno_info (lra_insn_recog_data_t data, rtx x, + /* The info cannot be integrated into the found + structure. */ + data->regs = new_insn_reg (data->insn, regno, type, mode, +- subreg_p, early_clobber, +- early_clobber_alts, data->regs, +- false); ++ subreg_p, early_clobber_alts, ++ data->regs); + else + { + if (curr->type != type) + curr->type = OP_INOUT; +- if (curr->early_clobber != early_clobber) +- curr->early_clobber = true; + curr->early_clobber_alts |= early_clobber_alts; + } + return; +@@ -1520,23 +1488,21 @@ add_regs_to_insn_regno_info (lra_insn_recog_data_t data, rtx x, + switch (code) + { + case SET: +- add_regs_to_insn_regno_info (data, SET_DEST (x), insn, OP_OUT, false, 0); +- add_regs_to_insn_regno_info (data, SET_SRC (x), insn, OP_IN, false, 0); ++ add_regs_to_insn_regno_info (data, SET_DEST (x), insn, OP_OUT, 0); ++ add_regs_to_insn_regno_info (data, SET_SRC (x), insn, OP_IN, 0); + break; + case CLOBBER: + /* We treat clobber of non-operand hard registers as early + clobber. */ + add_regs_to_insn_regno_info (data, XEXP (x, 0), insn, OP_OUT, +- true, ALL_ALTERNATIVES); ++ ALL_ALTERNATIVES); + break; +- case CLOBBER_HIGH: +- gcc_unreachable (); + case PRE_INC: case PRE_DEC: case POST_INC: case POST_DEC: +- add_regs_to_insn_regno_info (data, XEXP (x, 0), insn, OP_INOUT, false, 0); ++ add_regs_to_insn_regno_info (data, XEXP (x, 0), insn, OP_INOUT, 0); + break; + case PRE_MODIFY: case POST_MODIFY: +- add_regs_to_insn_regno_info (data, XEXP (x, 0), insn, OP_INOUT, false, 0); +- add_regs_to_insn_regno_info (data, XEXP (x, 1), insn, OP_IN, false, 0); ++ add_regs_to_insn_regno_info (data, XEXP (x, 0), insn, OP_INOUT, 0); ++ add_regs_to_insn_regno_info (data, XEXP (x, 1), insn, OP_IN, 0); + break; + default: + if ((code != PARALLEL && code != EXPR_LIST) || type != OP_OUT) +@@ -1557,12 +1523,12 @@ add_regs_to_insn_regno_info (lra_insn_recog_data_t data, rtx x, + for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) + { + if (fmt[i] == 'e') +- add_regs_to_insn_regno_info (data, XEXP (x, i), insn, type, false, 0); ++ add_regs_to_insn_regno_info (data, XEXP (x, i), insn, type, 0); + else if (fmt[i] == 'E') + { + for (j = XVECLEN (x, i) - 1; j >= 0; j--) + add_regs_to_insn_regno_info (data, XVECEXP (x, i, j), insn, +- type, false, 0); ++ type, 0); + } + } + } +@@ -1652,11 +1618,10 @@ lra_update_insn_regno_info (rtx_insn *insn) + for (i = static_data->n_operands - 1; i >= 0; i--) + add_regs_to_insn_regno_info (data, *data->operand_loc[i], insn, + static_data->operand[i].type, +- static_data->operand[i].early_clobber, + static_data->operand[i].early_clobber_alts); + if ((code = GET_CODE (PATTERN (insn))) == CLOBBER || code == USE) + add_regs_to_insn_regno_info (data, XEXP (PATTERN (insn), 0), insn, +- code == USE ? OP_IN : OP_OUT, false, 0); ++ code == USE ? OP_IN : OP_OUT, 0); + if (CALL_P (insn)) + /* On some targets call insns can refer to pseudos in memory in + CALL_INSN_FUNCTION_USAGE list. Process them in order to +@@ -1667,13 +1632,10 @@ lra_update_insn_regno_info (rtx_insn *insn) + link = XEXP (link, 1)) + { + code = GET_CODE (XEXP (link, 0)); +- /* We could support CLOBBER_HIGH and treat it in the same way as +- HARD_REGNO_CALL_PART_CLOBBERED, but no port needs that yet. */ +- gcc_assert (code != CLOBBER_HIGH); + if ((code == USE || code == CLOBBER) + && MEM_P (XEXP (XEXP (link, 0), 0))) + add_regs_to_insn_regno_info (data, XEXP (XEXP (link, 0), 0), insn, +- code == USE ? OP_IN : OP_OUT, false, 0); ++ code == USE ? OP_IN : OP_OUT, 0); + } + if (NONDEBUG_INSN_P (insn)) + setup_insn_reg_info (data, freq); +@@ -2400,7 +2362,7 @@ lra (FILE *f) + need it. */ + emit_note (NOTE_INSN_DELETED); + +- COPY_HARD_REG_SET (lra_no_alloc_regs, ira_no_alloc_regs); ++ lra_no_alloc_regs = ira_no_alloc_regs; + + init_reg_info (); + expand_reg_info (); +@@ -2436,7 +2398,9 @@ lra (FILE *f) + + if (crtl->saves_all_registers) + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) +- if (! call_used_regs[i] && ! fixed_regs[i] && ! LOCAL_REGNO (i)) ++ if (!crtl->abi->clobbers_full_reg_p (i) ++ && !fixed_regs[i] ++ && !LOCAL_REGNO (i)) + df_set_regs_ever_live (i, true); + + /* We don't DF from now and avoid its using because it is to +@@ -2494,19 +2458,7 @@ lra (FILE *f) + } + /* Do inheritance only for regular algorithms. */ + if (! lra_simple_p) +- { +- if (flag_ipa_ra) +- { +- if (live_p) +- lra_clear_live_ranges (); +- /* As a side-effect of lra_create_live_ranges, we calculate +- actual_call_used_reg_set, which is needed during +- lra_inheritance. */ +- lra_create_live_ranges (true, true); +- live_p = true; +- } +- lra_inheritance (); +- } ++ lra_inheritance (); + if (live_p) + lra_clear_live_ranges (); + bool fails_p; +diff --git a/gcc/lto-streamer-out.c b/gcc/lto-streamer-out.c +index c1b160237..f47ac5b76 100644 +--- a/gcc/lto-streamer-out.c ++++ b/gcc/lto-streamer-out.c +@@ -1122,12 +1122,12 @@ hash_tree (struct streamer_tree_cache_d *cache, hash_map *map, + hstate.add_int (DECL_BUILT_IN_CLASS (t)); + hstate.add_flag (DECL_STATIC_CONSTRUCTOR (t)); + hstate.add_flag (DECL_STATIC_DESTRUCTOR (t)); ++ hstate.add_flag (FUNCTION_DECL_DECL_TYPE (t)); + hstate.add_flag (DECL_UNINLINABLE (t)); + hstate.add_flag (DECL_POSSIBLY_INLINED (t)); + hstate.add_flag (DECL_IS_NOVOPS (t)); + hstate.add_flag (DECL_IS_RETURNS_TWICE (t)); + hstate.add_flag (DECL_IS_MALLOC (t)); +- hstate.add_flag (DECL_IS_OPERATOR_NEW (t)); + hstate.add_flag (DECL_DECLARED_INLINE_P (t)); + hstate.add_flag (DECL_STATIC_CHAIN (t)); + hstate.add_flag (DECL_NO_INLINE_WARNING_P (t)); +@@ -1138,7 +1138,7 @@ hash_tree (struct streamer_tree_cache_d *cache, hash_map *map, + hstate.add_flag (DECL_LOOPING_CONST_OR_PURE_P (t)); + hstate.commit_flag (); + if (DECL_BUILT_IN_CLASS (t) != NOT_BUILT_IN) +- hstate.add_int (DECL_FUNCTION_CODE (t)); ++ hstate.add_int (DECL_UNCHECKED_FUNCTION_CODE (t)); + } + + if (CODE_CONTAINS_STRUCT (code, TS_TYPE_COMMON)) +diff --git a/gcc/lto/Make-lang.in b/gcc/lto/Make-lang.in +index 1b856d6d4..b7ed96eac 100644 +--- a/gcc/lto/Make-lang.in ++++ b/gcc/lto/Make-lang.in +@@ -22,7 +22,7 @@ + # The name of the LTO compiler. + LTO_EXE = lto1$(exeext) + # The LTO-specific object files inclued in $(LTO_EXE). +-LTO_OBJS = lto/lto-lang.o lto/lto.o lto/lto-object.o attribs.o lto/lto-partition.o lto/lto-symtab.o ++LTO_OBJS = lto/lto-lang.o lto/lto.o lto/lto-object.o attribs.o lto/lto-partition.o lto/lto-symtab.o lto/lto-common.o + lto_OBJS = $(LTO_OBJS) + + # this is only useful in a LTO bootstrap, but this does not work right +diff --git a/gcc/lto/config-lang.in b/gcc/lto/config-lang.in +index de9712504..07214365f 100644 +--- a/gcc/lto/config-lang.in ++++ b/gcc/lto/config-lang.in +@@ -20,7 +20,7 @@ + language="lto" + compilers="lto1\$(exeext)" + +-gtfiles="\$(srcdir)/lto/lto-tree.h \$(srcdir)/lto/lto-lang.c \$(srcdir)/lto/lto.c \$(srcdir)/lto/lto.h" ++gtfiles="\$(srcdir)/lto/lto-tree.h \$(srcdir)/lto/lto-lang.c \$(srcdir)/lto/lto.c \$(srcdir)/lto/lto.h \$(srcdir)/lto/lto-common.h \$(srcdir)/lto/lto-common.c" + + # LTO is a special front end. From a user's perspective it is not + # really a language, but a middle end feature. However, the GIMPLE +diff --git a/gcc/lto/lto-common.c b/gcc/lto/lto-common.c +new file mode 100644 +index 000000000..daf7f7b47 +--- /dev/null ++++ b/gcc/lto/lto-common.c +@@ -0,0 +1,2837 @@ ++/* Top-level LTO routines. ++ Copyright (C) 2009-2018 Free Software Foundation, Inc. ++ Contributed by CodeSourcery, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify it under ++the terms of the GNU General Public License as published by the Free ++Software Foundation; either version 3, or (at your option) any later ++version. ++ ++GCC is distributed in the hope that it will be useful, but WITHOUT ANY ++WARRANTY; without even the implied warranty of MERCHANTABILITY or ++FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "tm.h" ++#include "function.h" ++#include "bitmap.h" ++#include "basic-block.h" ++#include "tree.h" ++#include "gimple.h" ++#include "cfghooks.h" ++#include "alloc-pool.h" ++#include "tree-pass.h" ++#include "tree-streamer.h" ++#include "cgraph.h" ++#include "opts.h" ++#include "toplev.h" ++#include "stor-layout.h" ++#include "symbol-summary.h" ++#include "tree-vrp.h" ++#include "ipa-prop.h" ++#include "common.h" ++#include "debug.h" ++#include "lto.h" ++#include "lto-section-names.h" ++#include "splay-tree.h" ++#include "lto-partition.h" ++#include "context.h" ++#include "pass_manager.h" ++#include "ipa-fnsummary.h" ++#include "params.h" ++#include "ipa-utils.h" ++#include "gomp-constants.h" ++#include "lto-symtab.h" ++#include "stringpool.h" ++#include "fold-const.h" ++#include "attribs.h" ++#include "builtins.h" ++#include "lto-common.h" ++ ++GTY(()) tree first_personality_decl; ++ ++GTY(()) const unsigned char *lto_mode_identity_table; ++ ++/* Returns a hash code for P. */ ++ ++static hashval_t ++hash_name (const void *p) ++{ ++ const struct lto_section_slot *ds = (const struct lto_section_slot *) p; ++ return (hashval_t) htab_hash_string (ds->name); ++} ++ ++ ++/* Returns nonzero if P1 and P2 are equal. */ ++ ++static int ++eq_name (const void *p1, const void *p2) ++{ ++ const struct lto_section_slot *s1 = ++ (const struct lto_section_slot *) p1; ++ const struct lto_section_slot *s2 = ++ (const struct lto_section_slot *) p2; ++ ++ return strcmp (s1->name, s2->name) == 0; ++} ++ ++/* Free lto_section_slot */ ++ ++static void ++free_with_string (void *arg) ++{ ++ struct lto_section_slot *s = (struct lto_section_slot *)arg; ++ ++ free (CONST_CAST (char *, s->name)); ++ free (arg); ++} ++ ++/* Create section hash table */ ++ ++htab_t ++lto_obj_create_section_hash_table (void) ++{ ++ return htab_create (37, hash_name, eq_name, free_with_string); ++} ++ ++/* Delete an allocated integer KEY in the splay tree. */ ++ ++static void ++lto_splay_tree_delete_id (splay_tree_key key) ++{ ++ free ((void *) key); ++} ++ ++/* Compare splay tree node ids A and B. */ ++ ++static int ++lto_splay_tree_compare_ids (splay_tree_key a, splay_tree_key b) ++{ ++ unsigned HOST_WIDE_INT ai; ++ unsigned HOST_WIDE_INT bi; ++ ++ ai = *(unsigned HOST_WIDE_INT *) a; ++ bi = *(unsigned HOST_WIDE_INT *) b; ++ ++ if (ai < bi) ++ return -1; ++ else if (ai > bi) ++ return 1; ++ return 0; ++} ++ ++/* Look up splay tree node by ID in splay tree T. */ ++ ++static splay_tree_node ++lto_splay_tree_lookup (splay_tree t, unsigned HOST_WIDE_INT id) ++{ ++ return splay_tree_lookup (t, (splay_tree_key) &id); ++} ++ ++/* Check if KEY has ID. */ ++ ++static bool ++lto_splay_tree_id_equal_p (splay_tree_key key, unsigned HOST_WIDE_INT id) ++{ ++ return *(unsigned HOST_WIDE_INT *) key == id; ++} ++ ++/* Insert a splay tree node into tree T with ID as key and FILE_DATA as value. ++ The ID is allocated separately because we need HOST_WIDE_INTs which may ++ be wider than a splay_tree_key. */ ++ ++static void ++lto_splay_tree_insert (splay_tree t, unsigned HOST_WIDE_INT id, ++ struct lto_file_decl_data *file_data) ++{ ++ unsigned HOST_WIDE_INT *idp = XCNEW (unsigned HOST_WIDE_INT); ++ *idp = id; ++ splay_tree_insert (t, (splay_tree_key) idp, (splay_tree_value) file_data); ++} ++ ++/* Create a splay tree. */ ++ ++static splay_tree ++lto_splay_tree_new (void) ++{ ++ return splay_tree_new (lto_splay_tree_compare_ids, ++ lto_splay_tree_delete_id, ++ NULL); ++} ++ ++/* Decode the content of memory pointed to by DATA in the in decl ++ state object STATE. DATA_IN points to a data_in structure for ++ decoding. Return the address after the decoded object in the ++ input. */ ++ ++static const uint32_t * ++lto_read_in_decl_state (struct data_in *data_in, const uint32_t *data, ++ struct lto_in_decl_state *state) ++{ ++ uint32_t ix; ++ tree decl; ++ uint32_t i, j; ++ ++ ix = *data++; ++ state->compressed = ix & 1; ++ ix /= 2; ++ decl = streamer_tree_cache_get_tree (data_in->reader_cache, ix); ++ if (!VAR_OR_FUNCTION_DECL_P (decl)) ++ { ++ gcc_assert (decl == void_type_node); ++ decl = NULL_TREE; ++ } ++ state->fn_decl = decl; ++ ++ for (i = 0; i < LTO_N_DECL_STREAMS; i++) ++ { ++ uint32_t size = *data++; ++ vec *decls = NULL; ++ vec_alloc (decls, size); ++ ++ for (j = 0; j < size; j++) ++ vec_safe_push (decls, ++ streamer_tree_cache_get_tree (data_in->reader_cache, ++ data[j])); ++ ++ state->streams[i] = decls; ++ data += size; ++ } ++ ++ return data; ++} ++ ++ ++/* Global canonical type table. */ ++static htab_t gimple_canonical_types; ++static hash_map *canonical_type_hash_cache; ++static unsigned long num_canonical_type_hash_entries; ++static unsigned long num_canonical_type_hash_queries; ++ ++static void iterative_hash_canonical_type (tree type, inchash::hash &hstate); ++static hashval_t gimple_canonical_type_hash (const void *p); ++static void gimple_register_canonical_type_1 (tree t, hashval_t hash); ++ ++/* Returning a hash value for gimple type TYPE. ++ ++ The hash value returned is equal for types considered compatible ++ by gimple_canonical_types_compatible_p. */ ++ ++static hashval_t ++hash_canonical_type (tree type) ++{ ++ inchash::hash hstate; ++ enum tree_code code; ++ ++ /* We compute alias sets only for types that needs them. ++ Be sure we do not recurse to something else as we cannot hash incomplete ++ types in a way they would have same hash value as compatible complete ++ types. */ ++ gcc_checking_assert (type_with_alias_set_p (type)); ++ ++ /* Combine a few common features of types so that types are grouped into ++ smaller sets; when searching for existing matching types to merge, ++ only existing types having the same features as the new type will be ++ checked. */ ++ code = tree_code_for_canonical_type_merging (TREE_CODE (type)); ++ hstate.add_int (code); ++ hstate.add_int (TYPE_MODE (type)); ++ ++ /* Incorporate common features of numerical types. */ ++ if (INTEGRAL_TYPE_P (type) ++ || SCALAR_FLOAT_TYPE_P (type) ++ || FIXED_POINT_TYPE_P (type) ++ || TREE_CODE (type) == OFFSET_TYPE ++ || POINTER_TYPE_P (type)) ++ { ++ hstate.add_int (TYPE_PRECISION (type)); ++ if (!type_with_interoperable_signedness (type)) ++ hstate.add_int (TYPE_UNSIGNED (type)); ++ } ++ ++ if (VECTOR_TYPE_P (type)) ++ { ++ hstate.add_poly_int (TYPE_VECTOR_SUBPARTS (type)); ++ hstate.add_int (TYPE_UNSIGNED (type)); ++ } ++ ++ if (TREE_CODE (type) == COMPLEX_TYPE) ++ hstate.add_int (TYPE_UNSIGNED (type)); ++ ++ /* Fortran's C_SIGNED_CHAR is !TYPE_STRING_FLAG but needs to be ++ interoperable with "signed char". Unless all frontends are revisited to ++ agree on these types, we must ignore the flag completely. */ ++ ++ /* Fortran standard define C_PTR type that is compatible with every ++ C pointer. For this reason we need to glob all pointers into one. ++ Still pointers in different address spaces are not compatible. */ ++ if (POINTER_TYPE_P (type)) ++ hstate.add_int (TYPE_ADDR_SPACE (TREE_TYPE (type))); ++ ++ /* For array types hash the domain bounds and the string flag. */ ++ if (TREE_CODE (type) == ARRAY_TYPE && TYPE_DOMAIN (type)) ++ { ++ hstate.add_int (TYPE_STRING_FLAG (type)); ++ /* OMP lowering can introduce error_mark_node in place of ++ random local decls in types. */ ++ if (TYPE_MIN_VALUE (TYPE_DOMAIN (type)) != error_mark_node) ++ inchash::add_expr (TYPE_MIN_VALUE (TYPE_DOMAIN (type)), hstate); ++ if (TYPE_MAX_VALUE (TYPE_DOMAIN (type)) != error_mark_node) ++ inchash::add_expr (TYPE_MAX_VALUE (TYPE_DOMAIN (type)), hstate); ++ } ++ ++ /* Recurse for aggregates with a single element type. */ ++ if (TREE_CODE (type) == ARRAY_TYPE ++ || TREE_CODE (type) == COMPLEX_TYPE ++ || TREE_CODE (type) == VECTOR_TYPE) ++ iterative_hash_canonical_type (TREE_TYPE (type), hstate); ++ ++ /* Incorporate function return and argument types. */ ++ if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE) ++ { ++ unsigned na; ++ tree p; ++ ++ iterative_hash_canonical_type (TREE_TYPE (type), hstate); ++ ++ for (p = TYPE_ARG_TYPES (type), na = 0; p; p = TREE_CHAIN (p)) ++ { ++ iterative_hash_canonical_type (TREE_VALUE (p), hstate); ++ na++; ++ } ++ ++ hstate.add_int (na); ++ } ++ ++ if (RECORD_OR_UNION_TYPE_P (type)) ++ { ++ unsigned nf; ++ tree f; ++ ++ for (f = TYPE_FIELDS (type), nf = 0; f; f = TREE_CHAIN (f)) ++ if (TREE_CODE (f) == FIELD_DECL ++ && (! DECL_SIZE (f) ++ || ! integer_zerop (DECL_SIZE (f)))) ++ { ++ iterative_hash_canonical_type (TREE_TYPE (f), hstate); ++ nf++; ++ } ++ ++ hstate.add_int (nf); ++ } ++ ++ return hstate.end(); ++} ++ ++/* Returning a hash value for gimple type TYPE combined with VAL. */ ++ ++static void ++iterative_hash_canonical_type (tree type, inchash::hash &hstate) ++{ ++ hashval_t v; ++ ++ /* All type variants have same TYPE_CANONICAL. */ ++ type = TYPE_MAIN_VARIANT (type); ++ ++ if (!canonical_type_used_p (type)) ++ v = hash_canonical_type (type); ++ /* An already processed type. */ ++ else if (TYPE_CANONICAL (type)) ++ { ++ type = TYPE_CANONICAL (type); ++ v = gimple_canonical_type_hash (type); ++ } ++ else ++ { ++ /* Canonical types should not be able to form SCCs by design, this ++ recursion is just because we do not register canonical types in ++ optimal order. To avoid quadratic behavior also register the ++ type here. */ ++ v = hash_canonical_type (type); ++ gimple_register_canonical_type_1 (type, v); ++ } ++ hstate.add_int (v); ++} ++ ++/* Returns the hash for a canonical type P. */ ++ ++static hashval_t ++gimple_canonical_type_hash (const void *p) ++{ ++ num_canonical_type_hash_queries++; ++ hashval_t *slot = canonical_type_hash_cache->get ((const_tree) p); ++ gcc_assert (slot != NULL); ++ return *slot; ++} ++ ++ ++ ++/* Returns nonzero if P1 and P2 are equal. */ ++ ++static int ++gimple_canonical_type_eq (const void *p1, const void *p2) ++{ ++ const_tree t1 = (const_tree) p1; ++ const_tree t2 = (const_tree) p2; ++ return gimple_canonical_types_compatible_p (CONST_CAST_TREE (t1), ++ CONST_CAST_TREE (t2)); ++} ++ ++/* Main worker for gimple_register_canonical_type. */ ++ ++static void ++gimple_register_canonical_type_1 (tree t, hashval_t hash) ++{ ++ void **slot; ++ ++ gcc_checking_assert (TYPE_P (t) && !TYPE_CANONICAL (t) ++ && type_with_alias_set_p (t) ++ && canonical_type_used_p (t)); ++ ++ slot = htab_find_slot_with_hash (gimple_canonical_types, t, hash, INSERT); ++ if (*slot) ++ { ++ tree new_type = (tree)(*slot); ++ gcc_checking_assert (new_type != t); ++ TYPE_CANONICAL (t) = new_type; ++ } ++ else ++ { ++ TYPE_CANONICAL (t) = t; ++ *slot = (void *) t; ++ /* Cache the just computed hash value. */ ++ num_canonical_type_hash_entries++; ++ bool existed_p = canonical_type_hash_cache->put (t, hash); ++ gcc_assert (!existed_p); ++ } ++} ++ ++/* Register type T in the global type table gimple_types and set ++ TYPE_CANONICAL of T accordingly. ++ This is used by LTO to merge structurally equivalent types for ++ type-based aliasing purposes across different TUs and languages. ++ ++ ??? This merging does not exactly match how the tree.c middle-end ++ functions will assign TYPE_CANONICAL when new types are created ++ during optimization (which at least happens for pointer and array ++ types). */ ++ ++static void ++gimple_register_canonical_type (tree t) ++{ ++ if (TYPE_CANONICAL (t) || !type_with_alias_set_p (t) ++ || !canonical_type_used_p (t)) ++ return; ++ ++ /* Canonical types are same among all complete variants. */ ++ if (TYPE_CANONICAL (TYPE_MAIN_VARIANT (t))) ++ TYPE_CANONICAL (t) = TYPE_CANONICAL (TYPE_MAIN_VARIANT (t)); ++ else ++ { ++ gimple_register_canonical_type_1 (TYPE_MAIN_VARIANT (t), ++ hash_canonical_type (TYPE_MAIN_VARIANT (t))); ++ TYPE_CANONICAL (t) = TYPE_CANONICAL (TYPE_MAIN_VARIANT (t)); ++ } ++} ++ ++/* Re-compute TYPE_CANONICAL for NODE and related types. */ ++ ++static void ++lto_register_canonical_types (tree node, bool first_p) ++{ ++ if (!node ++ || !TYPE_P (node)) ++ return; ++ ++ if (first_p) ++ TYPE_CANONICAL (node) = NULL_TREE; ++ ++ if (POINTER_TYPE_P (node) ++ || TREE_CODE (node) == COMPLEX_TYPE ++ || TREE_CODE (node) == ARRAY_TYPE) ++ lto_register_canonical_types (TREE_TYPE (node), first_p); ++ ++ if (!first_p) ++ gimple_register_canonical_type (node); ++} ++ ++ ++/* Remember trees that contains references to declarations. */ ++vec *tree_with_vars; ++ ++#define CHECK_VAR(tt) \ ++ do \ ++ { \ ++ if ((tt) && VAR_OR_FUNCTION_DECL_P (tt) \ ++ && (TREE_PUBLIC (tt) || DECL_EXTERNAL (tt))) \ ++ return true; \ ++ } while (0) ++ ++#define CHECK_NO_VAR(tt) \ ++ gcc_checking_assert (!(tt) || !VAR_OR_FUNCTION_DECL_P (tt)) ++ ++/* Check presence of pointers to decls in fields of a tree_typed T. */ ++ ++static inline bool ++mentions_vars_p_typed (tree t) ++{ ++ CHECK_NO_VAR (TREE_TYPE (t)); ++ return false; ++} ++ ++/* Check presence of pointers to decls in fields of a tree_common T. */ ++ ++static inline bool ++mentions_vars_p_common (tree t) ++{ ++ if (mentions_vars_p_typed (t)) ++ return true; ++ CHECK_NO_VAR (TREE_CHAIN (t)); ++ return false; ++} ++ ++/* Check presence of pointers to decls in fields of a decl_minimal T. */ ++ ++static inline bool ++mentions_vars_p_decl_minimal (tree t) ++{ ++ if (mentions_vars_p_common (t)) ++ return true; ++ CHECK_NO_VAR (DECL_NAME (t)); ++ CHECK_VAR (DECL_CONTEXT (t)); ++ return false; ++} ++ ++/* Check presence of pointers to decls in fields of a decl_common T. */ ++ ++static inline bool ++mentions_vars_p_decl_common (tree t) ++{ ++ if (mentions_vars_p_decl_minimal (t)) ++ return true; ++ CHECK_VAR (DECL_SIZE (t)); ++ CHECK_VAR (DECL_SIZE_UNIT (t)); ++ CHECK_VAR (DECL_INITIAL (t)); ++ CHECK_NO_VAR (DECL_ATTRIBUTES (t)); ++ CHECK_VAR (DECL_ABSTRACT_ORIGIN (t)); ++ return false; ++} ++ ++/* Check presence of pointers to decls in fields of a decl_with_vis T. */ ++ ++static inline bool ++mentions_vars_p_decl_with_vis (tree t) ++{ ++ if (mentions_vars_p_decl_common (t)) ++ return true; ++ ++ /* Accessor macro has side-effects, use field-name here. */ ++ CHECK_NO_VAR (DECL_ASSEMBLER_NAME_RAW (t)); ++ return false; ++} ++ ++/* Check presence of pointers to decls in fields of a decl_non_common T. */ ++ ++static inline bool ++mentions_vars_p_decl_non_common (tree t) ++{ ++ if (mentions_vars_p_decl_with_vis (t)) ++ return true; ++ CHECK_NO_VAR (DECL_RESULT_FLD (t)); ++ return false; ++} ++ ++/* Check presence of pointers to decls in fields of a decl_non_common T. */ ++ ++static bool ++mentions_vars_p_function (tree t) ++{ ++ if (mentions_vars_p_decl_non_common (t)) ++ return true; ++ CHECK_NO_VAR (DECL_ARGUMENTS (t)); ++ CHECK_NO_VAR (DECL_VINDEX (t)); ++ CHECK_VAR (DECL_FUNCTION_PERSONALITY (t)); ++ return false; ++} ++ ++/* Check presence of pointers to decls in fields of a field_decl T. */ ++ ++static bool ++mentions_vars_p_field_decl (tree t) ++{ ++ if (mentions_vars_p_decl_common (t)) ++ return true; ++ CHECK_VAR (DECL_FIELD_OFFSET (t)); ++ CHECK_NO_VAR (DECL_BIT_FIELD_TYPE (t)); ++ CHECK_NO_VAR (DECL_QUALIFIER (t)); ++ CHECK_NO_VAR (DECL_FIELD_BIT_OFFSET (t)); ++ CHECK_NO_VAR (DECL_FCONTEXT (t)); ++ return false; ++} ++ ++/* Check presence of pointers to decls in fields of a type T. */ ++ ++static bool ++mentions_vars_p_type (tree t) ++{ ++ if (mentions_vars_p_common (t)) ++ return true; ++ CHECK_NO_VAR (TYPE_CACHED_VALUES (t)); ++ CHECK_VAR (TYPE_SIZE (t)); ++ CHECK_VAR (TYPE_SIZE_UNIT (t)); ++ CHECK_NO_VAR (TYPE_ATTRIBUTES (t)); ++ CHECK_NO_VAR (TYPE_NAME (t)); ++ ++ CHECK_VAR (TYPE_MIN_VALUE_RAW (t)); ++ CHECK_VAR (TYPE_MAX_VALUE_RAW (t)); ++ ++ /* Accessor is for derived node types only. */ ++ CHECK_NO_VAR (TYPE_LANG_SLOT_1 (t)); ++ ++ CHECK_VAR (TYPE_CONTEXT (t)); ++ CHECK_NO_VAR (TYPE_CANONICAL (t)); ++ CHECK_NO_VAR (TYPE_MAIN_VARIANT (t)); ++ CHECK_NO_VAR (TYPE_NEXT_VARIANT (t)); ++ return false; ++} ++ ++/* Check presence of pointers to decls in fields of a BINFO T. */ ++ ++static bool ++mentions_vars_p_binfo (tree t) ++{ ++ unsigned HOST_WIDE_INT i, n; ++ ++ if (mentions_vars_p_common (t)) ++ return true; ++ CHECK_VAR (BINFO_VTABLE (t)); ++ CHECK_NO_VAR (BINFO_OFFSET (t)); ++ CHECK_NO_VAR (BINFO_VIRTUALS (t)); ++ CHECK_NO_VAR (BINFO_VPTR_FIELD (t)); ++ n = vec_safe_length (BINFO_BASE_ACCESSES (t)); ++ for (i = 0; i < n; i++) ++ CHECK_NO_VAR (BINFO_BASE_ACCESS (t, i)); ++ /* Do not walk BINFO_INHERITANCE_CHAIN, BINFO_SUBVTT_INDEX ++ and BINFO_VPTR_INDEX; these are used by C++ FE only. */ ++ n = BINFO_N_BASE_BINFOS (t); ++ for (i = 0; i < n; i++) ++ CHECK_NO_VAR (BINFO_BASE_BINFO (t, i)); ++ return false; ++} ++ ++/* Check presence of pointers to decls in fields of a CONSTRUCTOR T. */ ++ ++static bool ++mentions_vars_p_constructor (tree t) ++{ ++ unsigned HOST_WIDE_INT idx; ++ constructor_elt *ce; ++ ++ if (mentions_vars_p_typed (t)) ++ return true; ++ ++ for (idx = 0; vec_safe_iterate (CONSTRUCTOR_ELTS (t), idx, &ce); idx++) ++ { ++ CHECK_NO_VAR (ce->index); ++ CHECK_VAR (ce->value); ++ } ++ return false; ++} ++ ++/* Check presence of pointers to decls in fields of an expression tree T. */ ++ ++static bool ++mentions_vars_p_expr (tree t) ++{ ++ int i; ++ if (mentions_vars_p_typed (t)) ++ return true; ++ for (i = TREE_OPERAND_LENGTH (t) - 1; i >= 0; --i) ++ CHECK_VAR (TREE_OPERAND (t, i)); ++ return false; ++} ++ ++/* Check presence of pointers to decls in fields of an OMP_CLAUSE T. */ ++ ++static bool ++mentions_vars_p_omp_clause (tree t) ++{ ++ int i; ++ if (mentions_vars_p_common (t)) ++ return true; ++ for (i = omp_clause_num_ops[OMP_CLAUSE_CODE (t)] - 1; i >= 0; --i) ++ CHECK_VAR (OMP_CLAUSE_OPERAND (t, i)); ++ return false; ++} ++ ++/* Check presence of pointers to decls that needs later fixup in T. */ ++ ++static bool ++mentions_vars_p (tree t) ++{ ++ switch (TREE_CODE (t)) ++ { ++ case IDENTIFIER_NODE: ++ break; ++ ++ case TREE_LIST: ++ CHECK_VAR (TREE_VALUE (t)); ++ CHECK_VAR (TREE_PURPOSE (t)); ++ CHECK_NO_VAR (TREE_CHAIN (t)); ++ break; ++ ++ case FIELD_DECL: ++ return mentions_vars_p_field_decl (t); ++ ++ case LABEL_DECL: ++ case CONST_DECL: ++ case PARM_DECL: ++ case RESULT_DECL: ++ case IMPORTED_DECL: ++ case NAMESPACE_DECL: ++ case NAMELIST_DECL: ++ return mentions_vars_p_decl_common (t); ++ ++ case VAR_DECL: ++ return mentions_vars_p_decl_with_vis (t); ++ ++ case TYPE_DECL: ++ return mentions_vars_p_decl_non_common (t); ++ ++ case FUNCTION_DECL: ++ return mentions_vars_p_function (t); ++ ++ case TREE_BINFO: ++ return mentions_vars_p_binfo (t); ++ ++ case PLACEHOLDER_EXPR: ++ return mentions_vars_p_common (t); ++ ++ case BLOCK: ++ case TRANSLATION_UNIT_DECL: ++ case OPTIMIZATION_NODE: ++ case TARGET_OPTION_NODE: ++ break; ++ ++ case CONSTRUCTOR: ++ return mentions_vars_p_constructor (t); ++ ++ case OMP_CLAUSE: ++ return mentions_vars_p_omp_clause (t); ++ ++ default: ++ if (TYPE_P (t)) ++ { ++ if (mentions_vars_p_type (t)) ++ return true; ++ } ++ else if (EXPR_P (t)) ++ { ++ if (mentions_vars_p_expr (t)) ++ return true; ++ } ++ else if (CONSTANT_CLASS_P (t)) ++ CHECK_NO_VAR (TREE_TYPE (t)); ++ else ++ gcc_unreachable (); ++ } ++ return false; ++} ++ ++ ++/* Return the resolution for the decl with index INDEX from DATA_IN. */ ++ ++static enum ld_plugin_symbol_resolution ++get_resolution (struct data_in *data_in, unsigned index) ++{ ++ if (data_in->globals_resolution.exists ()) ++ { ++ ld_plugin_symbol_resolution_t ret; ++ /* We can have references to not emitted functions in ++ DECL_FUNCTION_PERSONALITY at least. So we can and have ++ to indeed return LDPR_UNKNOWN in some cases. */ ++ if (data_in->globals_resolution.length () <= index) ++ return LDPR_UNKNOWN; ++ ret = data_in->globals_resolution[index]; ++ return ret; ++ } ++ else ++ /* Delay resolution finding until decl merging. */ ++ return LDPR_UNKNOWN; ++} ++ ++/* We need to record resolutions until symbol table is read. */ ++static void ++register_resolution (struct lto_file_decl_data *file_data, tree decl, ++ enum ld_plugin_symbol_resolution resolution) ++{ ++ bool existed; ++ if (resolution == LDPR_UNKNOWN) ++ return; ++ if (!file_data->resolution_map) ++ file_data->resolution_map ++ = new hash_map; ++ ld_plugin_symbol_resolution_t &res ++ = file_data->resolution_map->get_or_insert (decl, &existed); ++ if (!existed ++ || resolution == LDPR_PREVAILING_DEF_IRONLY ++ || resolution == LDPR_PREVAILING_DEF ++ || resolution == LDPR_PREVAILING_DEF_IRONLY_EXP) ++ res = resolution; ++} ++ ++/* Register DECL with the global symbol table and change its ++ name if necessary to avoid name clashes for static globals across ++ different files. */ ++ ++static void ++lto_register_var_decl_in_symtab (struct data_in *data_in, tree decl, ++ unsigned ix) ++{ ++ tree context; ++ ++ /* Variable has file scope, not local. */ ++ if (!TREE_PUBLIC (decl) ++ && !((context = decl_function_context (decl)) ++ && auto_var_in_fn_p (decl, context))) ++ rest_of_decl_compilation (decl, 1, 0); ++ ++ /* If this variable has already been declared, queue the ++ declaration for merging. */ ++ if (TREE_PUBLIC (decl)) ++ register_resolution (data_in->file_data, ++ decl, get_resolution (data_in, ix)); ++} ++ ++ ++/* Register DECL with the global symbol table and change its ++ name if necessary to avoid name clashes for static globals across ++ different files. DATA_IN contains descriptors and tables for the ++ file being read. */ ++ ++static void ++lto_register_function_decl_in_symtab (struct data_in *data_in, tree decl, ++ unsigned ix) ++{ ++ /* If this variable has already been declared, queue the ++ declaration for merging. */ ++ if (TREE_PUBLIC (decl) && !DECL_ABSTRACT_P (decl)) ++ register_resolution (data_in->file_data, ++ decl, get_resolution (data_in, ix)); ++} ++ ++/* Check if T is a decl and needs register its resolution info. */ ++ ++static void ++lto_maybe_register_decl (struct data_in *data_in, tree t, unsigned ix) ++{ ++ if (TREE_CODE (t) == VAR_DECL) ++ lto_register_var_decl_in_symtab (data_in, t, ix); ++ else if (TREE_CODE (t) == FUNCTION_DECL ++ && !fndecl_built_in_p (t)) ++ lto_register_function_decl_in_symtab (data_in, t, ix); ++} ++ ++ ++/* For the type T re-materialize it in the type variant list and ++ the pointer/reference-to chains. */ ++ ++static void ++lto_fixup_prevailing_type (tree t) ++{ ++ /* The following re-creates proper variant lists while fixing up ++ the variant leaders. We do not stream TYPE_NEXT_VARIANT so the ++ variant list state before fixup is broken. */ ++ ++ /* If we are not our own variant leader link us into our new leaders ++ variant list. */ ++ if (TYPE_MAIN_VARIANT (t) != t) ++ { ++ tree mv = TYPE_MAIN_VARIANT (t); ++ TYPE_NEXT_VARIANT (t) = TYPE_NEXT_VARIANT (mv); ++ TYPE_NEXT_VARIANT (mv) = t; ++ } ++ ++ /* The following reconstructs the pointer chains ++ of the new pointed-to type if we are a main variant. We do ++ not stream those so they are broken before fixup. */ ++ if (TREE_CODE (t) == POINTER_TYPE ++ && TYPE_MAIN_VARIANT (t) == t) ++ { ++ TYPE_NEXT_PTR_TO (t) = TYPE_POINTER_TO (TREE_TYPE (t)); ++ TYPE_POINTER_TO (TREE_TYPE (t)) = t; ++ } ++ else if (TREE_CODE (t) == REFERENCE_TYPE ++ && TYPE_MAIN_VARIANT (t) == t) ++ { ++ TYPE_NEXT_REF_TO (t) = TYPE_REFERENCE_TO (TREE_TYPE (t)); ++ TYPE_REFERENCE_TO (TREE_TYPE (t)) = t; ++ } ++} ++ ++ ++/* We keep prevailing tree SCCs in a hashtable with manual collision ++ handling (in case all hashes compare the same) and keep the colliding ++ entries in the tree_scc->next chain. */ ++ ++struct tree_scc ++{ ++ tree_scc *next; ++ /* Hash of the whole SCC. */ ++ hashval_t hash; ++ /* Number of trees in the SCC. */ ++ unsigned len; ++ /* Number of possible entries into the SCC (tree nodes [0..entry_len-1] ++ which share the same individual tree hash). */ ++ unsigned entry_len; ++ /* The members of the SCC. ++ We only need to remember the first entry node candidate for prevailing ++ SCCs (but of course have access to all entries for SCCs we are ++ processing). ++ ??? For prevailing SCCs we really only need hash and the first ++ entry candidate, but that's too awkward to implement. */ ++ tree entries[1]; ++}; ++ ++struct tree_scc_hasher : nofree_ptr_hash ++{ ++ static inline hashval_t hash (const tree_scc *); ++ static inline bool equal (const tree_scc *, const tree_scc *); ++}; ++ ++hashval_t ++tree_scc_hasher::hash (const tree_scc *scc) ++{ ++ return scc->hash; ++} ++ ++bool ++tree_scc_hasher::equal (const tree_scc *scc1, const tree_scc *scc2) ++{ ++ if (scc1->hash != scc2->hash ++ || scc1->len != scc2->len ++ || scc1->entry_len != scc2->entry_len) ++ return false; ++ return true; ++} ++ ++static hash_table *tree_scc_hash; ++static struct obstack tree_scc_hash_obstack; ++ ++static unsigned long num_merged_types; ++static unsigned long num_prevailing_types; ++static unsigned long num_type_scc_trees; ++static unsigned long total_scc_size; ++static unsigned long num_sccs_read; ++static unsigned long total_scc_size_merged; ++static unsigned long num_sccs_merged; ++static unsigned long num_scc_compares; ++static unsigned long num_scc_compare_collisions; ++ ++ ++/* Compare the two entries T1 and T2 of two SCCs that are possibly equal, ++ recursing through in-SCC tree edges. Returns true if the SCCs entered ++ through T1 and T2 are equal and fills in *MAP with the pairs of ++ SCC entries we visited, starting with (*MAP)[0] = T1 and (*MAP)[1] = T2. */ ++ ++static bool ++compare_tree_sccs_1 (tree t1, tree t2, tree **map) ++{ ++ enum tree_code code; ++ ++ /* Mark already visited nodes. */ ++ TREE_ASM_WRITTEN (t2) = 1; ++ ++ /* Push the pair onto map. */ ++ (*map)[0] = t1; ++ (*map)[1] = t2; ++ *map = *map + 2; ++ ++ /* Compare value-fields. */ ++#define compare_values(X) \ ++ do { \ ++ if (X(t1) != X(t2)) \ ++ return false; \ ++ } while (0) ++ ++ compare_values (TREE_CODE); ++ code = TREE_CODE (t1); ++ ++ if (!TYPE_P (t1)) ++ { ++ compare_values (TREE_SIDE_EFFECTS); ++ compare_values (TREE_CONSTANT); ++ compare_values (TREE_READONLY); ++ compare_values (TREE_PUBLIC); ++ } ++ compare_values (TREE_ADDRESSABLE); ++ compare_values (TREE_THIS_VOLATILE); ++ if (DECL_P (t1)) ++ compare_values (DECL_UNSIGNED); ++ else if (TYPE_P (t1)) ++ compare_values (TYPE_UNSIGNED); ++ if (TYPE_P (t1)) ++ compare_values (TYPE_ARTIFICIAL); ++ else ++ compare_values (TREE_NO_WARNING); ++ compare_values (TREE_NOTHROW); ++ compare_values (TREE_STATIC); ++ if (code != TREE_BINFO) ++ compare_values (TREE_PRIVATE); ++ compare_values (TREE_PROTECTED); ++ compare_values (TREE_DEPRECATED); ++ if (TYPE_P (t1)) ++ { ++ if (AGGREGATE_TYPE_P (t1)) ++ compare_values (TYPE_REVERSE_STORAGE_ORDER); ++ else ++ compare_values (TYPE_SATURATING); ++ compare_values (TYPE_ADDR_SPACE); ++ } ++ else if (code == SSA_NAME) ++ compare_values (SSA_NAME_IS_DEFAULT_DEF); ++ ++ if (CODE_CONTAINS_STRUCT (code, TS_INT_CST)) ++ { ++ if (wi::to_wide (t1) != wi::to_wide (t2)) ++ return false; ++ } ++ ++ if (CODE_CONTAINS_STRUCT (code, TS_REAL_CST)) ++ { ++ /* ??? No suitable compare routine available. */ ++ REAL_VALUE_TYPE r1 = TREE_REAL_CST (t1); ++ REAL_VALUE_TYPE r2 = TREE_REAL_CST (t2); ++ if (r1.cl != r2.cl ++ || r1.decimal != r2.decimal ++ || r1.sign != r2.sign ++ || r1.signalling != r2.signalling ++ || r1.canonical != r2.canonical ++ || r1.uexp != r2.uexp) ++ return false; ++ for (unsigned i = 0; i < SIGSZ; ++i) ++ if (r1.sig[i] != r2.sig[i]) ++ return false; ++ } ++ ++ if (CODE_CONTAINS_STRUCT (code, TS_FIXED_CST)) ++ if (!fixed_compare (EQ_EXPR, ++ TREE_FIXED_CST_PTR (t1), TREE_FIXED_CST_PTR (t2))) ++ return false; ++ ++ if (CODE_CONTAINS_STRUCT (code, TS_VECTOR)) ++ { ++ compare_values (VECTOR_CST_LOG2_NPATTERNS); ++ compare_values (VECTOR_CST_NELTS_PER_PATTERN); ++ } ++ ++ if (CODE_CONTAINS_STRUCT (code, TS_DECL_COMMON)) ++ { ++ compare_values (DECL_MODE); ++ compare_values (DECL_NONLOCAL); ++ compare_values (DECL_VIRTUAL_P); ++ compare_values (DECL_IGNORED_P); ++ compare_values (DECL_ABSTRACT_P); ++ compare_values (DECL_ARTIFICIAL); ++ compare_values (DECL_USER_ALIGN); ++ compare_values (DECL_PRESERVE_P); ++ compare_values (DECL_EXTERNAL); ++ compare_values (DECL_GIMPLE_REG_P); ++ compare_values (DECL_ALIGN); ++ if (code == LABEL_DECL) ++ { ++ compare_values (EH_LANDING_PAD_NR); ++ compare_values (LABEL_DECL_UID); ++ } ++ else if (code == FIELD_DECL) ++ { ++ compare_values (DECL_PACKED); ++ compare_values (DECL_NONADDRESSABLE_P); ++ compare_values (DECL_PADDING_P); ++ compare_values (DECL_OFFSET_ALIGN); ++ } ++ else if (code == VAR_DECL) ++ { ++ compare_values (DECL_HAS_DEBUG_EXPR_P); ++ compare_values (DECL_NONLOCAL_FRAME); ++ } ++ if (code == RESULT_DECL ++ || code == PARM_DECL ++ || code == VAR_DECL) ++ { ++ compare_values (DECL_BY_REFERENCE); ++ if (code == VAR_DECL ++ || code == PARM_DECL) ++ compare_values (DECL_HAS_VALUE_EXPR_P); ++ } ++ } ++ ++ if (CODE_CONTAINS_STRUCT (code, TS_DECL_WRTL)) ++ compare_values (DECL_REGISTER); ++ ++ if (CODE_CONTAINS_STRUCT (code, TS_DECL_WITH_VIS)) ++ { ++ compare_values (DECL_COMMON); ++ compare_values (DECL_DLLIMPORT_P); ++ compare_values (DECL_WEAK); ++ compare_values (DECL_SEEN_IN_BIND_EXPR_P); ++ compare_values (DECL_COMDAT); ++ compare_values (DECL_VISIBILITY); ++ compare_values (DECL_VISIBILITY_SPECIFIED); ++ if (code == VAR_DECL) ++ { ++ compare_values (DECL_HARD_REGISTER); ++ /* DECL_IN_TEXT_SECTION is set during final asm output only. */ ++ compare_values (DECL_IN_CONSTANT_POOL); ++ } ++ } ++ ++ if (CODE_CONTAINS_STRUCT (code, TS_FUNCTION_DECL)) ++ { ++ compare_values (DECL_BUILT_IN_CLASS); ++ compare_values (DECL_STATIC_CONSTRUCTOR); ++ compare_values (DECL_STATIC_DESTRUCTOR); ++ compare_values (DECL_UNINLINABLE); ++ compare_values (DECL_POSSIBLY_INLINED); ++ compare_values (DECL_IS_NOVOPS); ++ compare_values (DECL_IS_RETURNS_TWICE); ++ compare_values (DECL_IS_MALLOC); ++ compare_values (DECL_IS_OPERATOR_NEW_P); ++ compare_values (DECL_DECLARED_INLINE_P); ++ compare_values (DECL_STATIC_CHAIN); ++ compare_values (DECL_NO_INLINE_WARNING_P); ++ compare_values (DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT); ++ compare_values (DECL_NO_LIMIT_STACK); ++ compare_values (DECL_DISREGARD_INLINE_LIMITS); ++ compare_values (DECL_PURE_P); ++ compare_values (DECL_LOOPING_CONST_OR_PURE_P); ++ compare_values (DECL_FINAL_P); ++ compare_values (DECL_CXX_CONSTRUCTOR_P); ++ compare_values (DECL_CXX_DESTRUCTOR_P); ++ if (DECL_BUILT_IN_CLASS (t1) != NOT_BUILT_IN) ++ compare_values (DECL_UNCHECKED_FUNCTION_CODE); ++ } ++ ++ if (CODE_CONTAINS_STRUCT (code, TS_TYPE_COMMON)) ++ { ++ compare_values (TYPE_MODE); ++ compare_values (TYPE_STRING_FLAG); ++ compare_values (TYPE_NEEDS_CONSTRUCTING); ++ if (RECORD_OR_UNION_TYPE_P (t1)) ++ { ++ compare_values (TYPE_TRANSPARENT_AGGR); ++ compare_values (TYPE_FINAL_P); ++ } ++ else if (code == ARRAY_TYPE) ++ compare_values (TYPE_NONALIASED_COMPONENT); ++ if (AGGREGATE_TYPE_P (t1)) ++ compare_values (TYPE_TYPELESS_STORAGE); ++ compare_values (TYPE_EMPTY_P); ++ compare_values (TYPE_PACKED); ++ compare_values (TYPE_RESTRICT); ++ compare_values (TYPE_USER_ALIGN); ++ compare_values (TYPE_READONLY); ++ compare_values (TYPE_PRECISION); ++ compare_values (TYPE_ALIGN); ++ /* Do not compare TYPE_ALIAS_SET. Doing so introduce ordering issues ++ with calls to get_alias_set which may initialize it for streamed ++ in types. */ ++ } ++ ++ /* We don't want to compare locations, so there is nothing do compare ++ for TS_EXP. */ ++ ++ /* BLOCKs are function local and we don't merge anything there, so ++ simply refuse to merge. */ ++ if (CODE_CONTAINS_STRUCT (code, TS_BLOCK)) ++ return false; ++ ++ if (CODE_CONTAINS_STRUCT (code, TS_TRANSLATION_UNIT_DECL)) ++ if (strcmp (TRANSLATION_UNIT_LANGUAGE (t1), ++ TRANSLATION_UNIT_LANGUAGE (t2)) != 0) ++ return false; ++ ++ if (CODE_CONTAINS_STRUCT (code, TS_TARGET_OPTION)) ++ if (!cl_target_option_eq (TREE_TARGET_OPTION (t1), TREE_TARGET_OPTION (t2))) ++ return false; ++ ++ if (CODE_CONTAINS_STRUCT (code, TS_OPTIMIZATION)) ++ if (!cl_optimization_option_eq (TREE_OPTIMIZATION (t1), ++ TREE_OPTIMIZATION (t2))) ++ return false; ++ ++ if (CODE_CONTAINS_STRUCT (code, TS_BINFO)) ++ if (vec_safe_length (BINFO_BASE_ACCESSES (t1)) ++ != vec_safe_length (BINFO_BASE_ACCESSES (t2))) ++ return false; ++ ++ if (CODE_CONTAINS_STRUCT (code, TS_CONSTRUCTOR)) ++ compare_values (CONSTRUCTOR_NELTS); ++ ++ if (CODE_CONTAINS_STRUCT (code, TS_IDENTIFIER)) ++ if (IDENTIFIER_LENGTH (t1) != IDENTIFIER_LENGTH (t2) ++ || memcmp (IDENTIFIER_POINTER (t1), IDENTIFIER_POINTER (t2), ++ IDENTIFIER_LENGTH (t1)) != 0) ++ return false; ++ ++ if (CODE_CONTAINS_STRUCT (code, TS_STRING)) ++ if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2) ++ || memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2), ++ TREE_STRING_LENGTH (t1)) != 0) ++ return false; ++ ++ if (code == OMP_CLAUSE) ++ { ++ compare_values (OMP_CLAUSE_CODE); ++ switch (OMP_CLAUSE_CODE (t1)) ++ { ++ case OMP_CLAUSE_DEFAULT: ++ compare_values (OMP_CLAUSE_DEFAULT_KIND); ++ break; ++ case OMP_CLAUSE_SCHEDULE: ++ compare_values (OMP_CLAUSE_SCHEDULE_KIND); ++ break; ++ case OMP_CLAUSE_DEPEND: ++ compare_values (OMP_CLAUSE_DEPEND_KIND); ++ break; ++ case OMP_CLAUSE_MAP: ++ compare_values (OMP_CLAUSE_MAP_KIND); ++ break; ++ case OMP_CLAUSE_PROC_BIND: ++ compare_values (OMP_CLAUSE_PROC_BIND_KIND); ++ break; ++ case OMP_CLAUSE_REDUCTION: ++ compare_values (OMP_CLAUSE_REDUCTION_CODE); ++ compare_values (OMP_CLAUSE_REDUCTION_GIMPLE_INIT); ++ compare_values (OMP_CLAUSE_REDUCTION_GIMPLE_MERGE); ++ break; ++ default: ++ break; ++ } ++ } ++ ++#undef compare_values ++ ++ ++ /* Compare pointer fields. */ ++ ++ /* Recurse. Search & Replaced from DFS_write_tree_body. ++ Folding the early checks into the compare_tree_edges recursion ++ macro makes debugging way quicker as you are able to break on ++ compare_tree_sccs_1 and simply finish until a call returns false ++ to spot the SCC members with the difference. */ ++#define compare_tree_edges(E1, E2) \ ++ do { \ ++ tree t1_ = (E1), t2_ = (E2); \ ++ if (t1_ != t2_ \ ++ && (!t1_ || !t2_ \ ++ || !TREE_VISITED (t2_) \ ++ || (!TREE_ASM_WRITTEN (t2_) \ ++ && !compare_tree_sccs_1 (t1_, t2_, map)))) \ ++ return false; \ ++ /* Only non-NULL trees outside of the SCC may compare equal. */ \ ++ gcc_checking_assert (t1_ != t2_ || (!t2_ || !TREE_VISITED (t2_))); \ ++ } while (0) ++ ++ if (CODE_CONTAINS_STRUCT (code, TS_TYPED)) ++ { ++ if (code != IDENTIFIER_NODE) ++ compare_tree_edges (TREE_TYPE (t1), TREE_TYPE (t2)); ++ } ++ ++ if (CODE_CONTAINS_STRUCT (code, TS_VECTOR)) ++ { ++ /* Note that the number of elements for EXPR has already been emitted ++ in EXPR's header (see streamer_write_tree_header). */ ++ unsigned int count = vector_cst_encoded_nelts (t1); ++ for (unsigned int i = 0; i < count; ++i) ++ compare_tree_edges (VECTOR_CST_ENCODED_ELT (t1, i), ++ VECTOR_CST_ENCODED_ELT (t2, i)); ++ } ++ ++ if (CODE_CONTAINS_STRUCT (code, TS_COMPLEX)) ++ { ++ compare_tree_edges (TREE_REALPART (t1), TREE_REALPART (t2)); ++ compare_tree_edges (TREE_IMAGPART (t1), TREE_IMAGPART (t2)); ++ } ++ ++ if (CODE_CONTAINS_STRUCT (code, TS_DECL_MINIMAL)) ++ { ++ compare_tree_edges (DECL_NAME (t1), DECL_NAME (t2)); ++ /* ??? Global decls from different TUs have non-matching ++ TRANSLATION_UNIT_DECLs. Only consider a small set of ++ decls equivalent, we should not end up merging others. */ ++ if ((code == TYPE_DECL ++ || code == NAMESPACE_DECL ++ || code == IMPORTED_DECL ++ || code == CONST_DECL ++ || (VAR_OR_FUNCTION_DECL_P (t1) ++ && (TREE_PUBLIC (t1) || DECL_EXTERNAL (t1)))) ++ && DECL_FILE_SCOPE_P (t1) && DECL_FILE_SCOPE_P (t2)) ++ ; ++ else ++ compare_tree_edges (DECL_CONTEXT (t1), DECL_CONTEXT (t2)); ++ } ++ ++ if (CODE_CONTAINS_STRUCT (code, TS_DECL_COMMON)) ++ { ++ compare_tree_edges (DECL_SIZE (t1), DECL_SIZE (t2)); ++ compare_tree_edges (DECL_SIZE_UNIT (t1), DECL_SIZE_UNIT (t2)); ++ compare_tree_edges (DECL_ATTRIBUTES (t1), DECL_ATTRIBUTES (t2)); ++ compare_tree_edges (DECL_ABSTRACT_ORIGIN (t1), DECL_ABSTRACT_ORIGIN (t2)); ++ if ((code == VAR_DECL ++ || code == PARM_DECL) ++ && DECL_HAS_VALUE_EXPR_P (t1)) ++ compare_tree_edges (DECL_VALUE_EXPR (t1), DECL_VALUE_EXPR (t2)); ++ if (code == VAR_DECL ++ && DECL_HAS_DEBUG_EXPR_P (t1)) ++ compare_tree_edges (DECL_DEBUG_EXPR (t1), DECL_DEBUG_EXPR (t2)); ++ /* LTO specific edges. */ ++ if (code != FUNCTION_DECL ++ && code != TRANSLATION_UNIT_DECL) ++ compare_tree_edges (DECL_INITIAL (t1), DECL_INITIAL (t2)); ++ } ++ ++ if (CODE_CONTAINS_STRUCT (code, TS_DECL_NON_COMMON)) ++ { ++ if (code == FUNCTION_DECL) ++ { ++ tree a1, a2; ++ for (a1 = DECL_ARGUMENTS (t1), a2 = DECL_ARGUMENTS (t2); ++ a1 || a2; ++ a1 = TREE_CHAIN (a1), a2 = TREE_CHAIN (a2)) ++ compare_tree_edges (a1, a2); ++ compare_tree_edges (DECL_RESULT (t1), DECL_RESULT (t2)); ++ } ++ else if (code == TYPE_DECL) ++ compare_tree_edges (DECL_ORIGINAL_TYPE (t1), DECL_ORIGINAL_TYPE (t2)); ++ } ++ ++ if (CODE_CONTAINS_STRUCT (code, TS_DECL_WITH_VIS)) ++ { ++ /* Make sure we don't inadvertently set the assembler name. */ ++ if (DECL_ASSEMBLER_NAME_SET_P (t1)) ++ compare_tree_edges (DECL_ASSEMBLER_NAME (t1), ++ DECL_ASSEMBLER_NAME (t2)); ++ } ++ ++ if (CODE_CONTAINS_STRUCT (code, TS_FIELD_DECL)) ++ { ++ compare_tree_edges (DECL_FIELD_OFFSET (t1), DECL_FIELD_OFFSET (t2)); ++ compare_tree_edges (DECL_BIT_FIELD_TYPE (t1), DECL_BIT_FIELD_TYPE (t2)); ++ compare_tree_edges (DECL_BIT_FIELD_REPRESENTATIVE (t1), ++ DECL_BIT_FIELD_REPRESENTATIVE (t2)); ++ compare_tree_edges (DECL_FIELD_BIT_OFFSET (t1), ++ DECL_FIELD_BIT_OFFSET (t2)); ++ compare_tree_edges (DECL_FCONTEXT (t1), DECL_FCONTEXT (t2)); ++ } ++ ++ if (CODE_CONTAINS_STRUCT (code, TS_FUNCTION_DECL)) ++ { ++ compare_tree_edges (DECL_FUNCTION_PERSONALITY (t1), ++ DECL_FUNCTION_PERSONALITY (t2)); ++ compare_tree_edges (DECL_VINDEX (t1), DECL_VINDEX (t2)); ++ compare_tree_edges (DECL_FUNCTION_SPECIFIC_TARGET (t1), ++ DECL_FUNCTION_SPECIFIC_TARGET (t2)); ++ compare_tree_edges (DECL_FUNCTION_SPECIFIC_OPTIMIZATION (t1), ++ DECL_FUNCTION_SPECIFIC_OPTIMIZATION (t2)); ++ } ++ ++ if (CODE_CONTAINS_STRUCT (code, TS_TYPE_COMMON)) ++ { ++ compare_tree_edges (TYPE_SIZE (t1), TYPE_SIZE (t2)); ++ compare_tree_edges (TYPE_SIZE_UNIT (t1), TYPE_SIZE_UNIT (t2)); ++ compare_tree_edges (TYPE_ATTRIBUTES (t1), TYPE_ATTRIBUTES (t2)); ++ compare_tree_edges (TYPE_NAME (t1), TYPE_NAME (t2)); ++ /* Do not compare TYPE_POINTER_TO or TYPE_REFERENCE_TO. They will be ++ reconstructed during fixup. */ ++ /* Do not compare TYPE_NEXT_VARIANT, we reconstruct the variant lists ++ during fixup. */ ++ compare_tree_edges (TYPE_MAIN_VARIANT (t1), TYPE_MAIN_VARIANT (t2)); ++ /* ??? Global types from different TUs have non-matching ++ TRANSLATION_UNIT_DECLs. Still merge them if they are otherwise ++ equal. */ ++ if (TYPE_FILE_SCOPE_P (t1) && TYPE_FILE_SCOPE_P (t2)) ++ ; ++ else ++ compare_tree_edges (TYPE_CONTEXT (t1), TYPE_CONTEXT (t2)); ++ /* TYPE_CANONICAL is re-computed during type merging, so do not ++ compare it here. */ ++ compare_tree_edges (TYPE_STUB_DECL (t1), TYPE_STUB_DECL (t2)); ++ } ++ ++ if (CODE_CONTAINS_STRUCT (code, TS_TYPE_NON_COMMON)) ++ { ++ if (code == ENUMERAL_TYPE) ++ compare_tree_edges (TYPE_VALUES (t1), TYPE_VALUES (t2)); ++ else if (code == ARRAY_TYPE) ++ compare_tree_edges (TYPE_DOMAIN (t1), TYPE_DOMAIN (t2)); ++ else if (RECORD_OR_UNION_TYPE_P (t1)) ++ { ++ tree f1, f2; ++ for (f1 = TYPE_FIELDS (t1), f2 = TYPE_FIELDS (t2); ++ f1 || f2; ++ f1 = TREE_CHAIN (f1), f2 = TREE_CHAIN (f2)) ++ compare_tree_edges (f1, f2); ++ } ++ else if (code == FUNCTION_TYPE ++ || code == METHOD_TYPE) ++ compare_tree_edges (TYPE_ARG_TYPES (t1), TYPE_ARG_TYPES (t2)); ++ ++ if (!POINTER_TYPE_P (t1)) ++ compare_tree_edges (TYPE_MIN_VALUE_RAW (t1), TYPE_MIN_VALUE_RAW (t2)); ++ compare_tree_edges (TYPE_MAX_VALUE_RAW (t1), TYPE_MAX_VALUE_RAW (t2)); ++ } ++ ++ if (CODE_CONTAINS_STRUCT (code, TS_LIST)) ++ { ++ compare_tree_edges (TREE_PURPOSE (t1), TREE_PURPOSE (t2)); ++ compare_tree_edges (TREE_VALUE (t1), TREE_VALUE (t2)); ++ compare_tree_edges (TREE_CHAIN (t1), TREE_CHAIN (t2)); ++ } ++ ++ if (CODE_CONTAINS_STRUCT (code, TS_VEC)) ++ for (int i = 0; i < TREE_VEC_LENGTH (t1); i++) ++ compare_tree_edges (TREE_VEC_ELT (t1, i), TREE_VEC_ELT (t2, i)); ++ ++ if (CODE_CONTAINS_STRUCT (code, TS_EXP)) ++ { ++ for (int i = 0; i < TREE_OPERAND_LENGTH (t1); i++) ++ compare_tree_edges (TREE_OPERAND (t1, i), ++ TREE_OPERAND (t2, i)); ++ ++ /* BLOCKs are function local and we don't merge anything there. */ ++ if (TREE_BLOCK (t1) || TREE_BLOCK (t2)) ++ return false; ++ } ++ ++ if (CODE_CONTAINS_STRUCT (code, TS_BINFO)) ++ { ++ unsigned i; ++ tree t; ++ /* Lengths have already been compared above. */ ++ FOR_EACH_VEC_ELT (*BINFO_BASE_BINFOS (t1), i, t) ++ compare_tree_edges (t, BINFO_BASE_BINFO (t2, i)); ++ FOR_EACH_VEC_SAFE_ELT (BINFO_BASE_ACCESSES (t1), i, t) ++ compare_tree_edges (t, BINFO_BASE_ACCESS (t2, i)); ++ compare_tree_edges (BINFO_OFFSET (t1), BINFO_OFFSET (t2)); ++ compare_tree_edges (BINFO_VTABLE (t1), BINFO_VTABLE (t2)); ++ compare_tree_edges (BINFO_VPTR_FIELD (t1), BINFO_VPTR_FIELD (t2)); ++ /* Do not walk BINFO_INHERITANCE_CHAIN, BINFO_SUBVTT_INDEX ++ and BINFO_VPTR_INDEX; these are used by C++ FE only. */ ++ } ++ ++ if (CODE_CONTAINS_STRUCT (code, TS_CONSTRUCTOR)) ++ { ++ unsigned i; ++ tree index, value; ++ /* Lengths have already been compared above. */ ++ FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (t1), i, index, value) ++ { ++ compare_tree_edges (index, CONSTRUCTOR_ELT (t2, i)->index); ++ compare_tree_edges (value, CONSTRUCTOR_ELT (t2, i)->value); ++ } ++ } ++ ++ if (code == OMP_CLAUSE) ++ { ++ int i; ++ ++ for (i = 0; i < omp_clause_num_ops[OMP_CLAUSE_CODE (t1)]; i++) ++ compare_tree_edges (OMP_CLAUSE_OPERAND (t1, i), ++ OMP_CLAUSE_OPERAND (t2, i)); ++ compare_tree_edges (OMP_CLAUSE_CHAIN (t1), OMP_CLAUSE_CHAIN (t2)); ++ } ++ ++#undef compare_tree_edges ++ ++ return true; ++} ++ ++/* Compare the tree scc SCC to the prevailing candidate PSCC, filling ++ out MAP if they are equal. */ ++ ++static bool ++compare_tree_sccs (tree_scc *pscc, tree_scc *scc, ++ tree *map) ++{ ++ /* Assume SCC entry hashes are sorted after their cardinality. Which ++ means we can simply take the first n-tuple of equal hashes ++ (which is recorded as entry_len) and do n SCC entry candidate ++ comparisons. */ ++ for (unsigned i = 0; i < pscc->entry_len; ++i) ++ { ++ tree *mapp = map; ++ num_scc_compare_collisions++; ++ if (compare_tree_sccs_1 (pscc->entries[0], scc->entries[i], &mapp)) ++ { ++ /* Equal - no need to reset TREE_VISITED or TREE_ASM_WRITTEN ++ on the scc as all trees will be freed. */ ++ return true; ++ } ++ /* Reset TREE_ASM_WRITTEN on scc for the next compare or in case ++ the SCC prevails. */ ++ for (unsigned j = 0; j < scc->len; ++j) ++ TREE_ASM_WRITTEN (scc->entries[j]) = 0; ++ } ++ ++ return false; ++} ++ ++/* QSort sort function to sort a map of two pointers after the 2nd ++ pointer. */ ++ ++static int ++cmp_tree (const void *p1_, const void *p2_) ++{ ++ tree *p1 = (tree *)(const_cast(p1_)); ++ tree *p2 = (tree *)(const_cast(p2_)); ++ if (p1[1] == p2[1]) ++ return 0; ++ return ((uintptr_t)p1[1] < (uintptr_t)p2[1]) ? -1 : 1; ++} ++ ++/* Try to unify the SCC with nodes FROM to FROM + LEN in CACHE and ++ hash value SCC_HASH with an already recorded SCC. Return true if ++ that was successful, otherwise return false. */ ++ ++static bool ++unify_scc (struct data_in *data_in, unsigned from, ++ unsigned len, unsigned scc_entry_len, hashval_t scc_hash) ++{ ++ bool unified_p = false; ++ struct streamer_tree_cache_d *cache = data_in->reader_cache; ++ tree_scc *scc ++ = (tree_scc *) alloca (sizeof (tree_scc) + (len - 1) * sizeof (tree)); ++ scc->next = NULL; ++ scc->hash = scc_hash; ++ scc->len = len; ++ scc->entry_len = scc_entry_len; ++ for (unsigned i = 0; i < len; ++i) ++ { ++ tree t = streamer_tree_cache_get_tree (cache, from + i); ++ scc->entries[i] = t; ++ /* Do not merge SCCs with local entities inside them. Also do ++ not merge TRANSLATION_UNIT_DECLs. */ ++ if (TREE_CODE (t) == TRANSLATION_UNIT_DECL ++ || (VAR_OR_FUNCTION_DECL_P (t) ++ && !(TREE_PUBLIC (t) || DECL_EXTERNAL (t))) ++ || TREE_CODE (t) == LABEL_DECL) ++ { ++ /* Avoid doing any work for these cases and do not worry to ++ record the SCCs for further merging. */ ++ return false; ++ } ++ } ++ ++ /* Look for the list of candidate SCCs to compare against. */ ++ tree_scc **slot; ++ slot = tree_scc_hash->find_slot_with_hash (scc, scc_hash, INSERT); ++ if (*slot) ++ { ++ /* Try unifying against each candidate. */ ++ num_scc_compares++; ++ ++ /* Set TREE_VISITED on the scc so we can easily identify tree nodes ++ outside of the scc when following tree edges. Make sure ++ that TREE_ASM_WRITTEN is unset so we can use it as 2nd bit ++ to track whether we visited the SCC member during the compare. ++ We cannot use TREE_VISITED on the pscc members as the extended ++ scc and pscc can overlap. */ ++ for (unsigned i = 0; i < scc->len; ++i) ++ { ++ TREE_VISITED (scc->entries[i]) = 1; ++ gcc_checking_assert (!TREE_ASM_WRITTEN (scc->entries[i])); ++ } ++ ++ tree *map = XALLOCAVEC (tree, 2 * len); ++ for (tree_scc *pscc = *slot; pscc; pscc = pscc->next) ++ { ++ if (!compare_tree_sccs (pscc, scc, map)) ++ continue; ++ ++ /* Found an equal SCC. */ ++ unified_p = true; ++ num_scc_compare_collisions--; ++ num_sccs_merged++; ++ total_scc_size_merged += len; ++ ++ if (flag_checking) ++ for (unsigned i = 0; i < len; ++i) ++ { ++ tree t = map[2*i+1]; ++ enum tree_code code = TREE_CODE (t); ++ /* IDENTIFIER_NODEs should be singletons and are merged by the ++ streamer. The others should be singletons, too, and we ++ should not merge them in any way. */ ++ gcc_assert (code != TRANSLATION_UNIT_DECL ++ && code != IDENTIFIER_NODE); ++ } ++ ++ /* Fixup the streamer cache with the prevailing nodes according ++ to the tree node mapping computed by compare_tree_sccs. */ ++ if (len == 1) ++ { ++ /* If we got a debug reference queued, see if the prevailing ++ tree has a debug reference and if not, register the one ++ for the tree we are about to throw away. */ ++ if (dref_queue.length () == 1) ++ { ++ dref_entry e = dref_queue.pop (); ++ gcc_assert (e.decl ++ == streamer_tree_cache_get_tree (cache, from)); ++ const char *sym; ++ unsigned HOST_WIDE_INT off; ++ if (!debug_hooks->die_ref_for_decl (pscc->entries[0], &sym, ++ &off)) ++ debug_hooks->register_external_die (pscc->entries[0], ++ e.sym, e.off); ++ } ++ lto_maybe_register_decl (data_in, pscc->entries[0], from); ++ streamer_tree_cache_replace_tree (cache, pscc->entries[0], from); ++ } ++ else ++ { ++ tree *map2 = XALLOCAVEC (tree, 2 * len); ++ for (unsigned i = 0; i < len; ++i) ++ { ++ map2[i*2] = (tree)(uintptr_t)(from + i); ++ map2[i*2+1] = scc->entries[i]; ++ } ++ qsort (map2, len, 2 * sizeof (tree), cmp_tree); ++ qsort (map, len, 2 * sizeof (tree), cmp_tree); ++ for (unsigned i = 0; i < len; ++i) ++ { ++ lto_maybe_register_decl (data_in, map[2*i], ++ (uintptr_t)map2[2*i]); ++ streamer_tree_cache_replace_tree (cache, map[2*i], ++ (uintptr_t)map2[2*i]); ++ } ++ } ++ ++ /* Free the tree nodes from the read SCC. */ ++ data_in->location_cache.revert_location_cache (); ++ for (unsigned i = 0; i < len; ++i) ++ { ++ if (TYPE_P (scc->entries[i])) ++ num_merged_types++; ++ free_node (scc->entries[i]); ++ } ++ ++ /* Drop DIE references. ++ ??? Do as in the size-one SCC case which involves sorting ++ the queue. */ ++ dref_queue.truncate (0); ++ ++ break; ++ } ++ ++ /* Reset TREE_VISITED if we didn't unify the SCC with another. */ ++ if (!unified_p) ++ for (unsigned i = 0; i < scc->len; ++i) ++ TREE_VISITED (scc->entries[i]) = 0; ++ } ++ ++ /* If we didn't unify it to any candidate duplicate the relevant ++ pieces to permanent storage and link it into the chain. */ ++ if (!unified_p) ++ { ++ tree_scc *pscc ++ = XOBNEWVAR (&tree_scc_hash_obstack, tree_scc, sizeof (tree_scc)); ++ memcpy (pscc, scc, sizeof (tree_scc)); ++ pscc->next = (*slot); ++ *slot = pscc; ++ } ++ return unified_p; ++} ++ ++ ++/* Read all the symbols from buffer DATA, using descriptors in DECL_DATA. ++ RESOLUTIONS is the set of symbols picked by the linker (read from the ++ resolution file when the linker plugin is being used). */ ++ ++static void ++lto_read_decls (struct lto_file_decl_data *decl_data, const void *data, ++ vec resolutions) ++{ ++ const struct lto_decl_header *header = (const struct lto_decl_header *) data; ++ const int decl_offset = sizeof (struct lto_decl_header); ++ const int main_offset = decl_offset + header->decl_state_size; ++ const int string_offset = main_offset + header->main_size; ++ struct data_in *data_in; ++ unsigned int i; ++ const uint32_t *data_ptr, *data_end; ++ uint32_t num_decl_states; ++ ++ lto_input_block ib_main ((const char *) data + main_offset, ++ header->main_size, decl_data->mode_table); ++ ++ data_in = lto_data_in_create (decl_data, (const char *) data + string_offset, ++ header->string_size, resolutions); ++ ++ /* We do not uniquify the pre-loaded cache entries, those are middle-end ++ internal types that should not be merged. */ ++ ++ /* Read the global declarations and types. */ ++ while (ib_main.p < ib_main.len) ++ { ++ tree t; ++ unsigned from = data_in->reader_cache->nodes.length (); ++ /* Read and uniquify SCCs as in the input stream. */ ++ enum LTO_tags tag = streamer_read_record_start (&ib_main); ++ if (tag == LTO_tree_scc) ++ { ++ unsigned len_; ++ unsigned scc_entry_len; ++ hashval_t scc_hash = lto_input_scc (&ib_main, data_in, &len_, ++ &scc_entry_len); ++ unsigned len = data_in->reader_cache->nodes.length () - from; ++ gcc_assert (len == len_); ++ ++ total_scc_size += len; ++ num_sccs_read++; ++ ++ /* We have the special case of size-1 SCCs that are pre-merged ++ by means of identifier and string sharing for example. ++ ??? Maybe we should avoid streaming those as SCCs. */ ++ tree first = streamer_tree_cache_get_tree (data_in->reader_cache, ++ from); ++ if (len == 1 ++ && (TREE_CODE (first) == IDENTIFIER_NODE ++ || (TREE_CODE (first) == INTEGER_CST ++ && !TREE_OVERFLOW (first)))) ++ continue; ++ ++ /* Try to unify the SCC with already existing ones. */ ++ if (!flag_ltrans ++ && unify_scc (data_in, from, ++ len, scc_entry_len, scc_hash)) ++ continue; ++ ++ /* Tree merging failed, mark entries in location cache as ++ permanent. */ ++ data_in->location_cache.accept_location_cache (); ++ ++ bool seen_type = false; ++ for (unsigned i = 0; i < len; ++i) ++ { ++ tree t = streamer_tree_cache_get_tree (data_in->reader_cache, ++ from + i); ++ /* Reconstruct the type variant and pointer-to/reference-to ++ chains. */ ++ if (TYPE_P (t)) ++ { ++ seen_type = true; ++ num_prevailing_types++; ++ lto_fixup_prevailing_type (t); ++ ++ /* Compute the canonical type of all types. ++ Because SCC components are streamed in random (hash) order ++ we may have encountered the type before while registering ++ type canonical of a derived type in the same SCC. */ ++ if (!TYPE_CANONICAL (t)) ++ gimple_register_canonical_type (t); ++ if (TYPE_MAIN_VARIANT (t) == t && odr_type_p (t)) ++ register_odr_type (t); ++ } ++ /* Link shared INTEGER_CSTs into TYPE_CACHED_VALUEs of its ++ type which is also member of this SCC. */ ++ if (TREE_CODE (t) == INTEGER_CST ++ && !TREE_OVERFLOW (t)) ++ cache_integer_cst (t); ++ if (!flag_ltrans) ++ { ++ lto_maybe_register_decl (data_in, t, from + i); ++ /* Scan the tree for references to global functions or ++ variables and record those for later fixup. */ ++ if (mentions_vars_p (t)) ++ vec_safe_push (tree_with_vars, t); ++ } ++ } ++ ++ /* Register DECLs with the debuginfo machinery. */ ++ while (!dref_queue.is_empty ()) ++ { ++ dref_entry e = dref_queue.pop (); ++ debug_hooks->register_external_die (e.decl, e.sym, e.off); ++ } ++ ++ if (seen_type) ++ num_type_scc_trees += len; ++ } ++ else ++ { ++ /* Pickle stray references. */ ++ t = lto_input_tree_1 (&ib_main, data_in, tag, 0); ++ gcc_assert (t && data_in->reader_cache->nodes.length () == from); ++ } ++ } ++ data_in->location_cache.apply_location_cache (); ++ ++ /* Read in lto_in_decl_state objects. */ ++ data_ptr = (const uint32_t *) ((const char*) data + decl_offset); ++ data_end = ++ (const uint32_t *) ((const char*) data_ptr + header->decl_state_size); ++ num_decl_states = *data_ptr++; ++ ++ gcc_assert (num_decl_states > 0); ++ decl_data->global_decl_state = lto_new_in_decl_state (); ++ data_ptr = lto_read_in_decl_state (data_in, data_ptr, ++ decl_data->global_decl_state); ++ ++ /* Read in per-function decl states and enter them in hash table. */ ++ decl_data->function_decl_states = ++ hash_table::create_ggc (37); ++ ++ for (i = 1; i < num_decl_states; i++) ++ { ++ struct lto_in_decl_state *state = lto_new_in_decl_state (); ++ ++ data_ptr = lto_read_in_decl_state (data_in, data_ptr, state); ++ lto_in_decl_state **slot ++ = decl_data->function_decl_states->find_slot (state, INSERT); ++ gcc_assert (*slot == NULL); ++ *slot = state; ++ } ++ ++ if (data_ptr != data_end) ++ internal_error ("bytecode stream: garbage at the end of symbols section"); ++ ++ /* Set the current decl state to be the global state. */ ++ decl_data->current_decl_state = decl_data->global_decl_state; ++ ++ lto_data_in_delete (data_in); ++} ++ ++/* Custom version of strtoll, which is not portable. */ ++ ++static int64_t ++lto_parse_hex (const char *p) ++{ ++ int64_t ret = 0; ++ ++ for (; *p != '\0'; ++p) ++ { ++ char c = *p; ++ unsigned char part; ++ ret <<= 4; ++ if (c >= '0' && c <= '9') ++ part = c - '0'; ++ else if (c >= 'a' && c <= 'f') ++ part = c - 'a' + 10; ++ else if (c >= 'A' && c <= 'F') ++ part = c - 'A' + 10; ++ else ++ internal_error ("could not parse hex number"); ++ ret |= part; ++ } ++ ++ return ret; ++} ++ ++/* Read resolution for file named FILE_NAME. The resolution is read from ++ RESOLUTION. */ ++ ++static void ++lto_resolution_read (splay_tree file_ids, FILE *resolution, lto_file *file) ++{ ++ /* We require that objects in the resolution file are in the same ++ order as the lto1 command line. */ ++ unsigned int name_len; ++ char *obj_name; ++ unsigned int num_symbols; ++ unsigned int i; ++ struct lto_file_decl_data *file_data; ++ splay_tree_node nd = NULL; ++ ++ if (!resolution) ++ return; ++ ++ name_len = strlen (file->filename); ++ obj_name = XNEWVEC (char, name_len + 1); ++ fscanf (resolution, " "); /* Read white space. */ ++ ++ fread (obj_name, sizeof (char), name_len, resolution); ++ obj_name[name_len] = '\0'; ++ if (filename_cmp (obj_name, file->filename) != 0) ++ internal_error ("unexpected file name %s in linker resolution file. " ++ "Expected %s", obj_name, file->filename); ++ if (file->offset != 0) ++ { ++ int t; ++ char offset_p[17]; ++ int64_t offset; ++ t = fscanf (resolution, "@0x%16s", offset_p); ++ if (t != 1) ++ internal_error ("could not parse file offset"); ++ offset = lto_parse_hex (offset_p); ++ if (offset != file->offset) ++ internal_error ("unexpected offset"); ++ } ++ ++ free (obj_name); ++ ++ fscanf (resolution, "%u", &num_symbols); ++ ++ for (i = 0; i < num_symbols; i++) ++ { ++ int t; ++ unsigned index; ++ unsigned HOST_WIDE_INT id; ++ char r_str[27]; ++ enum ld_plugin_symbol_resolution r = (enum ld_plugin_symbol_resolution) 0; ++ unsigned int j; ++ unsigned int lto_resolution_str_len = ++ sizeof (lto_resolution_str) / sizeof (char *); ++ res_pair rp; ++ ++ t = fscanf (resolution, "%u " HOST_WIDE_INT_PRINT_HEX_PURE " %26s %*[^\n]\n", ++ &index, &id, r_str); ++ if (t != 3) ++ internal_error ("invalid line in the resolution file"); ++ ++ for (j = 0; j < lto_resolution_str_len; j++) ++ { ++ if (strcmp (lto_resolution_str[j], r_str) == 0) ++ { ++ r = (enum ld_plugin_symbol_resolution) j; ++ break; ++ } ++ } ++ if (j == lto_resolution_str_len) ++ internal_error ("invalid resolution in the resolution file"); ++ ++ if (!(nd && lto_splay_tree_id_equal_p (nd->key, id))) ++ { ++ nd = lto_splay_tree_lookup (file_ids, id); ++ if (nd == NULL) ++ internal_error ("resolution sub id %wx not in object file", id); ++ } ++ ++ file_data = (struct lto_file_decl_data *)nd->value; ++ /* The indexes are very sparse. To save memory save them in a compact ++ format that is only unpacked later when the subfile is processed. */ ++ rp.res = r; ++ rp.index = index; ++ file_data->respairs.safe_push (rp); ++ if (file_data->max_index < index) ++ file_data->max_index = index; ++ } ++} ++ ++/* List of file_decl_datas */ ++struct file_data_list ++ { ++ struct lto_file_decl_data *first, *last; ++ }; ++ ++/* Is the name for a id'ed LTO section? */ ++ ++static int ++lto_section_with_id (const char *name, unsigned HOST_WIDE_INT *id) ++{ ++ const char *s; ++ ++ if (strncmp (name, section_name_prefix, strlen (section_name_prefix))) ++ return 0; ++ s = strrchr (name, '.'); ++ if (!s) ++ return 0; ++ /* If the section is not suffixed with an ID return. */ ++ if ((size_t)(s - name) == strlen (section_name_prefix)) ++ return 0; ++ return sscanf (s, "." HOST_WIDE_INT_PRINT_HEX_PURE, id) == 1; ++} ++ ++/* Create file_data of each sub file id */ ++ ++static int ++create_subid_section_table (struct lto_section_slot *ls, splay_tree file_ids, ++ struct file_data_list *list) ++{ ++ struct lto_section_slot s_slot, *new_slot; ++ unsigned HOST_WIDE_INT id; ++ splay_tree_node nd; ++ void **hash_slot; ++ char *new_name; ++ struct lto_file_decl_data *file_data; ++ ++ if (!lto_section_with_id (ls->name, &id)) ++ return 1; ++ ++ /* Find hash table of sub module id */ ++ nd = lto_splay_tree_lookup (file_ids, id); ++ if (nd != NULL) ++ { ++ file_data = (struct lto_file_decl_data *)nd->value; ++ } ++ else ++ { ++ file_data = ggc_alloc (); ++ memset(file_data, 0, sizeof (struct lto_file_decl_data)); ++ file_data->id = id; ++ file_data->section_hash_table = lto_obj_create_section_hash_table (); ++ lto_splay_tree_insert (file_ids, id, file_data); ++ ++ /* Maintain list in linker order */ ++ if (!list->first) ++ list->first = file_data; ++ if (list->last) ++ list->last->next = file_data; ++ list->last = file_data; ++ } ++ ++ /* Copy section into sub module hash table */ ++ new_name = XDUPVEC (char, ls->name, strlen (ls->name) + 1); ++ s_slot.name = new_name; ++ hash_slot = htab_find_slot (file_data->section_hash_table, &s_slot, INSERT); ++ gcc_assert (*hash_slot == NULL); ++ ++ new_slot = XDUP (struct lto_section_slot, ls); ++ new_slot->name = new_name; ++ *hash_slot = new_slot; ++ return 1; ++} ++ ++/* Read declarations and other initializations for a FILE_DATA. */ ++ ++static void ++lto_file_finalize (struct lto_file_decl_data *file_data, lto_file *file) ++{ ++ const char *data; ++ size_t len; ++ vec ++ resolutions = vNULL; ++ int i; ++ res_pair *rp; ++ ++ /* Create vector for fast access of resolution. We do this lazily ++ to save memory. */ ++ resolutions.safe_grow_cleared (file_data->max_index + 1); ++ for (i = 0; file_data->respairs.iterate (i, &rp); i++) ++ resolutions[rp->index] = rp->res; ++ file_data->respairs.release (); ++ ++ file_data->renaming_hash_table = lto_create_renaming_table (); ++ file_data->file_name = file->filename; ++#ifdef ACCEL_COMPILER ++ lto_input_mode_table (file_data); ++#else ++ file_data->mode_table = lto_mode_identity_table; ++#endif ++ data = lto_get_section_data (file_data, LTO_section_decls, NULL, &len); ++ if (data == NULL) ++ { ++ internal_error ("cannot read LTO decls from %s", file_data->file_name); ++ return; ++ } ++ /* Frees resolutions */ ++ lto_read_decls (file_data, data, resolutions); ++ lto_free_section_data (file_data, LTO_section_decls, NULL, data, len); ++} ++ ++/* Finalize FILE_DATA in FILE and increase COUNT. */ ++ ++static int ++lto_create_files_from_ids (lto_file *file, struct lto_file_decl_data *file_data, ++ int *count) ++{ ++ lto_file_finalize (file_data, file); ++ if (symtab->dump_file) ++ fprintf (symtab->dump_file, ++ "Creating file %s with sub id " HOST_WIDE_INT_PRINT_HEX "\n", ++ file_data->file_name, file_data->id); ++ (*count)++; ++ return 0; ++} ++ ++/* Generate a TREE representation for all types and external decls ++ entities in FILE. ++ ++ Read all of the globals out of the file. Then read the cgraph ++ and process the .o index into the cgraph nodes so that it can open ++ the .o file to load the functions and ipa information. */ ++ ++static struct lto_file_decl_data * ++lto_file_read (lto_file *file, FILE *resolution_file, int *count) ++{ ++ struct lto_file_decl_data *file_data = NULL; ++ splay_tree file_ids; ++ htab_t section_hash_table; ++ struct lto_section_slot *section; ++ struct file_data_list file_list; ++ struct lto_section_list section_list; ++ ++ memset (§ion_list, 0, sizeof (struct lto_section_list)); ++ section_hash_table = lto_obj_build_section_table (file, §ion_list); ++ ++ /* Find all sub modules in the object and put their sections into new hash ++ tables in a splay tree. */ ++ file_ids = lto_splay_tree_new (); ++ memset (&file_list, 0, sizeof (struct file_data_list)); ++ for (section = section_list.first; section != NULL; section = section->next) ++ create_subid_section_table (section, file_ids, &file_list); ++ ++ /* Add resolutions to file ids */ ++ lto_resolution_read (file_ids, resolution_file, file); ++ ++ /* Finalize each lto file for each submodule in the merged object */ ++ for (file_data = file_list.first; file_data != NULL; file_data = file_data->next) ++ lto_create_files_from_ids (file, file_data, count); ++ ++ splay_tree_delete (file_ids); ++ htab_delete (section_hash_table); ++ ++ return file_list.first; ++} ++ ++#if HAVE_MMAP_FILE && HAVE_SYSCONF && defined _SC_PAGE_SIZE ++#define LTO_MMAP_IO 1 ++#endif ++ ++#if LTO_MMAP_IO ++/* Page size of machine is used for mmap and munmap calls. */ ++static size_t page_mask; ++#endif ++ ++/* Get the section data of length LEN from FILENAME starting at ++ OFFSET. The data segment must be freed by the caller when the ++ caller is finished. Returns NULL if all was not well. */ ++ ++static char * ++lto_read_section_data (struct lto_file_decl_data *file_data, ++ intptr_t offset, size_t len) ++{ ++ char *result; ++ static int fd = -1; ++ static char *fd_name; ++#if LTO_MMAP_IO ++ intptr_t computed_len; ++ intptr_t computed_offset; ++ intptr_t diff; ++#endif ++ ++ /* Keep a single-entry file-descriptor cache. The last file we ++ touched will get closed at exit. ++ ??? Eventually we want to add a more sophisticated larger cache ++ or rather fix function body streaming to not stream them in ++ practically random order. */ ++ if (fd != -1 ++ && filename_cmp (fd_name, file_data->file_name) != 0) ++ { ++ free (fd_name); ++ close (fd); ++ fd = -1; ++ } ++ if (fd == -1) ++ { ++ fd = open (file_data->file_name, O_RDONLY|O_BINARY); ++ if (fd == -1) ++ { ++ fatal_error (input_location, "Cannot open %s", file_data->file_name); ++ return NULL; ++ } ++ fd_name = xstrdup (file_data->file_name); ++ } ++ ++#if LTO_MMAP_IO ++ if (!page_mask) ++ { ++ size_t page_size = sysconf (_SC_PAGE_SIZE); ++ page_mask = ~(page_size - 1); ++ } ++ ++ computed_offset = offset & page_mask; ++ diff = offset - computed_offset; ++ computed_len = len + diff; ++ ++ result = (char *) mmap (NULL, computed_len, PROT_READ, MAP_PRIVATE, ++ fd, computed_offset); ++ if (result == MAP_FAILED) ++ { ++ fatal_error (input_location, "Cannot map %s", file_data->file_name); ++ return NULL; ++ } ++ ++ return result + diff; ++#else ++ result = (char *) xmalloc (len); ++ if (lseek (fd, offset, SEEK_SET) != offset ++ || read (fd, result, len) != (ssize_t) len) ++ { ++ free (result); ++ fatal_error (input_location, "Cannot read %s", file_data->file_name); ++ result = NULL; ++ } ++#ifdef __MINGW32__ ++ /* Native windows doesn't supports delayed unlink on opened file. So ++ we close file here again. This produces higher I/O load, but at least ++ it prevents to have dangling file handles preventing unlink. */ ++ free (fd_name); ++ fd_name = NULL; ++ close (fd); ++ fd = -1; ++#endif ++ return result; ++#endif ++} ++ ++ ++/* Get the section data from FILE_DATA of SECTION_TYPE with NAME. ++ NAME will be NULL unless the section type is for a function ++ body. */ ++ ++static const char * ++get_section_data (struct lto_file_decl_data *file_data, ++ enum lto_section_type section_type, ++ const char *name, ++ size_t *len) ++{ ++ htab_t section_hash_table = file_data->section_hash_table; ++ struct lto_section_slot *f_slot; ++ struct lto_section_slot s_slot; ++ const char *section_name = lto_get_section_name (section_type, name, file_data); ++ char *data = NULL; ++ ++ *len = 0; ++ s_slot.name = section_name; ++ f_slot = (struct lto_section_slot *) htab_find (section_hash_table, &s_slot); ++ if (f_slot) ++ { ++ data = lto_read_section_data (file_data, f_slot->start, f_slot->len); ++ *len = f_slot->len; ++ } ++ ++ free (CONST_CAST (char *, section_name)); ++ return data; ++} ++ ++ ++/* Free the section data from FILE_DATA of SECTION_TYPE with NAME that ++ starts at OFFSET and has LEN bytes. */ ++ ++static void ++free_section_data (struct lto_file_decl_data *file_data ATTRIBUTE_UNUSED, ++ enum lto_section_type section_type ATTRIBUTE_UNUSED, ++ const char *name ATTRIBUTE_UNUSED, ++ const char *offset, size_t len ATTRIBUTE_UNUSED) ++{ ++#if LTO_MMAP_IO ++ intptr_t computed_len; ++ intptr_t computed_offset; ++ intptr_t diff; ++#endif ++ ++#if LTO_MMAP_IO ++ computed_offset = ((intptr_t) offset) & page_mask; ++ diff = (intptr_t) offset - computed_offset; ++ computed_len = len + diff; ++ ++ munmap ((caddr_t) computed_offset, computed_len); ++#else ++ free (CONST_CAST(char *, offset)); ++#endif ++} ++ ++static lto_file *current_lto_file; ++ ++/* If TT is a variable or function decl replace it with its ++ prevailing variant. */ ++#define LTO_SET_PREVAIL(tt) \ ++ do {\ ++ if ((tt) && VAR_OR_FUNCTION_DECL_P (tt) \ ++ && (TREE_PUBLIC (tt) || DECL_EXTERNAL (tt))) \ ++ { \ ++ tt = lto_symtab_prevailing_decl (tt); \ ++ fixed = true; \ ++ } \ ++ } while (0) ++ ++/* Ensure that TT isn't a replacable var of function decl. */ ++#define LTO_NO_PREVAIL(tt) \ ++ gcc_checking_assert (!(tt) || !VAR_OR_FUNCTION_DECL_P (tt)) ++ ++/* Given a tree T replace all fields referring to variables or functions ++ with their prevailing variant. */ ++static void ++lto_fixup_prevailing_decls (tree t) ++{ ++ enum tree_code code = TREE_CODE (t); ++ bool fixed = false; ++ ++ gcc_checking_assert (code != TREE_BINFO); ++ LTO_NO_PREVAIL (TREE_TYPE (t)); ++ if (CODE_CONTAINS_STRUCT (code, TS_COMMON) ++ /* lto_symtab_prevail_decl use TREE_CHAIN to link to the prevailing decl. ++ in the case T is a prevailed declaration we would ICE here. */ ++ && !VAR_OR_FUNCTION_DECL_P (t)) ++ LTO_NO_PREVAIL (TREE_CHAIN (t)); ++ if (DECL_P (t)) ++ { ++ LTO_NO_PREVAIL (DECL_NAME (t)); ++ LTO_SET_PREVAIL (DECL_CONTEXT (t)); ++ if (CODE_CONTAINS_STRUCT (code, TS_DECL_COMMON)) ++ { ++ LTO_SET_PREVAIL (DECL_SIZE (t)); ++ LTO_SET_PREVAIL (DECL_SIZE_UNIT (t)); ++ LTO_SET_PREVAIL (DECL_INITIAL (t)); ++ LTO_NO_PREVAIL (DECL_ATTRIBUTES (t)); ++ LTO_SET_PREVAIL (DECL_ABSTRACT_ORIGIN (t)); ++ } ++ if (CODE_CONTAINS_STRUCT (code, TS_DECL_WITH_VIS)) ++ { ++ LTO_NO_PREVAIL (DECL_ASSEMBLER_NAME_RAW (t)); ++ } ++ if (CODE_CONTAINS_STRUCT (code, TS_DECL_NON_COMMON)) ++ { ++ LTO_NO_PREVAIL (DECL_RESULT_FLD (t)); ++ } ++ if (CODE_CONTAINS_STRUCT (code, TS_FUNCTION_DECL)) ++ { ++ LTO_NO_PREVAIL (DECL_ARGUMENTS (t)); ++ LTO_SET_PREVAIL (DECL_FUNCTION_PERSONALITY (t)); ++ LTO_NO_PREVAIL (DECL_VINDEX (t)); ++ } ++ if (CODE_CONTAINS_STRUCT (code, TS_FIELD_DECL)) ++ { ++ LTO_SET_PREVAIL (DECL_FIELD_OFFSET (t)); ++ LTO_NO_PREVAIL (DECL_BIT_FIELD_TYPE (t)); ++ LTO_NO_PREVAIL (DECL_QUALIFIER (t)); ++ LTO_NO_PREVAIL (DECL_FIELD_BIT_OFFSET (t)); ++ LTO_NO_PREVAIL (DECL_FCONTEXT (t)); ++ } ++ } ++ else if (TYPE_P (t)) ++ { ++ LTO_NO_PREVAIL (TYPE_CACHED_VALUES (t)); ++ LTO_SET_PREVAIL (TYPE_SIZE (t)); ++ LTO_SET_PREVAIL (TYPE_SIZE_UNIT (t)); ++ LTO_NO_PREVAIL (TYPE_ATTRIBUTES (t)); ++ LTO_NO_PREVAIL (TYPE_NAME (t)); ++ ++ LTO_SET_PREVAIL (TYPE_MIN_VALUE_RAW (t)); ++ LTO_SET_PREVAIL (TYPE_MAX_VALUE_RAW (t)); ++ LTO_NO_PREVAIL (TYPE_LANG_SLOT_1 (t)); ++ ++ LTO_SET_PREVAIL (TYPE_CONTEXT (t)); ++ ++ LTO_NO_PREVAIL (TYPE_CANONICAL (t)); ++ LTO_NO_PREVAIL (TYPE_MAIN_VARIANT (t)); ++ LTO_NO_PREVAIL (TYPE_NEXT_VARIANT (t)); ++ } ++ else if (EXPR_P (t)) ++ { ++ int i; ++ for (i = TREE_OPERAND_LENGTH (t) - 1; i >= 0; --i) ++ LTO_SET_PREVAIL (TREE_OPERAND (t, i)); ++ } ++ else if (TREE_CODE (t) == CONSTRUCTOR) ++ { ++ unsigned i; ++ tree val; ++ FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (t), i, val) ++ LTO_SET_PREVAIL (val); ++ } ++ else ++ { ++ switch (code) ++ { ++ case TREE_LIST: ++ LTO_SET_PREVAIL (TREE_VALUE (t)); ++ LTO_SET_PREVAIL (TREE_PURPOSE (t)); ++ LTO_NO_PREVAIL (TREE_PURPOSE (t)); ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ } ++ /* If we fixed nothing, then we missed something seen by ++ mentions_vars_p. */ ++ gcc_checking_assert (fixed); ++} ++#undef LTO_SET_PREVAIL ++#undef LTO_NO_PREVAIL ++ ++/* Helper function of lto_fixup_decls. Walks the var and fn streams in STATE, ++ replaces var and function decls with the corresponding prevailing def. */ ++ ++static void ++lto_fixup_state (struct lto_in_decl_state *state) ++{ ++ unsigned i, si; ++ ++ /* Although we only want to replace FUNCTION_DECLs and VAR_DECLs, ++ we still need to walk from all DECLs to find the reachable ++ FUNCTION_DECLs and VAR_DECLs. */ ++ for (si = 0; si < LTO_N_DECL_STREAMS; si++) ++ { ++ vec *trees = state->streams[si]; ++ for (i = 0; i < vec_safe_length (trees); i++) ++ { ++ tree t = (*trees)[i]; ++ if (flag_checking && TYPE_P (t)) ++ verify_type (t); ++ if (VAR_OR_FUNCTION_DECL_P (t) ++ && (TREE_PUBLIC (t) || DECL_EXTERNAL (t))) ++ (*trees)[i] = lto_symtab_prevailing_decl (t); ++ } ++ } ++} ++ ++/* Fix the decls from all FILES. Replaces each decl with the corresponding ++ prevailing one. */ ++ ++static void ++lto_fixup_decls (struct lto_file_decl_data **files) ++{ ++ unsigned int i; ++ tree t; ++ ++ if (tree_with_vars) ++ FOR_EACH_VEC_ELT ((*tree_with_vars), i, t) ++ lto_fixup_prevailing_decls (t); ++ ++ for (i = 0; files[i]; i++) ++ { ++ struct lto_file_decl_data *file = files[i]; ++ struct lto_in_decl_state *state = file->global_decl_state; ++ lto_fixup_state (state); ++ ++ hash_table::iterator iter; ++ lto_in_decl_state *elt; ++ FOR_EACH_HASH_TABLE_ELEMENT (*file->function_decl_states, elt, ++ lto_in_decl_state *, iter) ++ lto_fixup_state (elt); ++ } ++} ++ ++static GTY((length ("lto_stats.num_input_files + 1"))) struct lto_file_decl_data **all_file_decl_data; ++ ++/* Turn file datas for sub files into a single array, so that they look ++ like separate files for further passes. */ ++ ++static void ++lto_flatten_files (struct lto_file_decl_data **orig, int count, int last_file_ix) ++{ ++ struct lto_file_decl_data *n, *next; ++ int i, k; ++ ++ lto_stats.num_input_files = count; ++ all_file_decl_data ++ = ggc_cleared_vec_alloc (count + 1); ++ /* Set the hooks so that all of the ipa passes can read in their data. */ ++ lto_set_in_hooks (all_file_decl_data, get_section_data, free_section_data); ++ for (i = 0, k = 0; i < last_file_ix; i++) ++ { ++ for (n = orig[i]; n != NULL; n = next) ++ { ++ all_file_decl_data[k++] = n; ++ next = n->next; ++ n->next = NULL; ++ } ++ } ++ all_file_decl_data[k] = NULL; ++ gcc_assert (k == count); ++} ++ ++/* Input file data before flattening (i.e. splitting them to subfiles to support ++ incremental linking. */ ++static int real_file_count; ++static GTY((length ("real_file_count + 1"))) struct lto_file_decl_data **real_file_decl_data; ++ ++/* Read all the symbols from the input files FNAMES. NFILES is the ++ number of files requested in the command line. Instantiate a ++ global call graph by aggregating all the sub-graphs found in each ++ file. */ ++ ++void ++read_cgraph_and_symbols (unsigned nfiles, const char **fnames) ++{ ++ unsigned int i, last_file_ix; ++ FILE *resolution; ++ int count = 0; ++ struct lto_file_decl_data **decl_data; ++ symtab_node *snode; ++ ++ symtab->initialize (); ++ ++ timevar_push (TV_IPA_LTO_DECL_IN); ++ ++#ifdef ACCEL_COMPILER ++ section_name_prefix = OFFLOAD_SECTION_NAME_PREFIX; ++ lto_stream_offload_p = true; ++#endif ++ ++ real_file_decl_data ++ = decl_data = ggc_cleared_vec_alloc (nfiles + 1); ++ real_file_count = nfiles; ++ ++ /* Read the resolution file. */ ++ resolution = NULL; ++ if (resolution_file_name) ++ { ++ int t; ++ unsigned num_objects; ++ ++ resolution = fopen (resolution_file_name, "r"); ++ if (resolution == NULL) ++ fatal_error (input_location, ++ "could not open symbol resolution file: %m"); ++ ++ t = fscanf (resolution, "%u", &num_objects); ++ gcc_assert (t == 1); ++ ++ /* True, since the plugin splits the archives. */ ++ gcc_assert (num_objects == nfiles); ++ } ++ symtab->state = LTO_STREAMING; ++ ++ canonical_type_hash_cache = new hash_map (251); ++ gimple_canonical_types = htab_create (16381, gimple_canonical_type_hash, ++ gimple_canonical_type_eq, NULL); ++ gcc_obstack_init (&tree_scc_hash_obstack); ++ tree_scc_hash = new hash_table (4096); ++ ++ /* Register the common node types with the canonical type machinery so ++ we properly share alias-sets across languages and TUs. Do not ++ expose the common nodes as type merge target - those that should be ++ are already exposed so by pre-loading the LTO streamer caches. ++ Do two passes - first clear TYPE_CANONICAL and then re-compute it. */ ++ for (i = 0; i < itk_none; ++i) ++ lto_register_canonical_types (integer_types[i], true); ++ for (i = 0; i < stk_type_kind_last; ++i) ++ lto_register_canonical_types (sizetype_tab[i], true); ++ for (i = 0; i < TI_MAX; ++i) ++ lto_register_canonical_types (global_trees[i], true); ++ for (i = 0; i < itk_none; ++i) ++ lto_register_canonical_types (integer_types[i], false); ++ for (i = 0; i < stk_type_kind_last; ++i) ++ lto_register_canonical_types (sizetype_tab[i], false); ++ for (i = 0; i < TI_MAX; ++i) ++ lto_register_canonical_types (global_trees[i], false); ++ ++ if (!quiet_flag) ++ fprintf (stderr, "Reading object files:"); ++ ++ /* Read all of the object files specified on the command line. */ ++ for (i = 0, last_file_ix = 0; i < nfiles; ++i) ++ { ++ struct lto_file_decl_data *file_data = NULL; ++ if (!quiet_flag) ++ { ++ fprintf (stderr, " %s", fnames[i]); ++ fflush (stderr); ++ } ++ ++ current_lto_file = lto_obj_file_open (fnames[i], false); ++ if (!current_lto_file) ++ break; ++ ++ file_data = lto_file_read (current_lto_file, resolution, &count); ++ if (!file_data) ++ { ++ lto_obj_file_close (current_lto_file); ++ free (current_lto_file); ++ current_lto_file = NULL; ++ break; ++ } ++ ++ decl_data[last_file_ix++] = file_data; ++ ++ lto_obj_file_close (current_lto_file); ++ free (current_lto_file); ++ current_lto_file = NULL; ++ } ++ ++ lto_flatten_files (decl_data, count, last_file_ix); ++ lto_stats.num_input_files = count; ++ ggc_free(decl_data); ++ real_file_decl_data = NULL; ++ ++ if (resolution_file_name) ++ fclose (resolution); ++ ++ /* Show the LTO report before launching LTRANS. */ ++ if (flag_lto_report || (flag_wpa && flag_lto_report_wpa)) ++ print_lto_report_1 (); ++ ++ /* Free gimple type merging datastructures. */ ++ delete tree_scc_hash; ++ tree_scc_hash = NULL; ++ obstack_free (&tree_scc_hash_obstack, NULL); ++ htab_delete (gimple_canonical_types); ++ gimple_canonical_types = NULL; ++ delete canonical_type_hash_cache; ++ canonical_type_hash_cache = NULL; ++ ++ /* At this stage we know that majority of GGC memory is reachable. ++ Growing the limits prevents unnecesary invocation of GGC. */ ++ ggc_grow (); ++ ggc_collect (); ++ ++ /* Set the hooks so that all of the ipa passes can read in their data. */ ++ lto_set_in_hooks (all_file_decl_data, get_section_data, free_section_data); ++ ++ timevar_pop (TV_IPA_LTO_DECL_IN); ++ ++ if (!quiet_flag) ++ fprintf (stderr, "\nReading the callgraph\n"); ++ ++ timevar_push (TV_IPA_LTO_CGRAPH_IO); ++ /* Read the symtab. */ ++ input_symtab (); ++ ++ input_offload_tables (!flag_ltrans); ++ ++ /* Store resolutions into the symbol table. */ ++ ++ FOR_EACH_SYMBOL (snode) ++ if (snode->externally_visible && snode->real_symbol_p () ++ && snode->lto_file_data && snode->lto_file_data->resolution_map ++ && !(TREE_CODE (snode->decl) == FUNCTION_DECL ++ && fndecl_built_in_p (snode->decl)) ++ && !(VAR_P (snode->decl) && DECL_HARD_REGISTER (snode->decl))) ++ { ++ ld_plugin_symbol_resolution_t *res; ++ ++ res = snode->lto_file_data->resolution_map->get (snode->decl); ++ if (!res || *res == LDPR_UNKNOWN) ++ { ++ if (snode->output_to_lto_symbol_table_p ()) ++ fatal_error (input_location, "missing resolution data for %s", ++ IDENTIFIER_POINTER ++ (DECL_ASSEMBLER_NAME (snode->decl))); ++ } ++ else ++ snode->resolution = *res; ++ } ++ for (i = 0; all_file_decl_data[i]; i++) ++ if (all_file_decl_data[i]->resolution_map) ++ { ++ delete all_file_decl_data[i]->resolution_map; ++ all_file_decl_data[i]->resolution_map = NULL; ++ } ++ ++ timevar_pop (TV_IPA_LTO_CGRAPH_IO); ++ ++ if (!quiet_flag) ++ fprintf (stderr, "Merging declarations\n"); ++ ++ timevar_push (TV_IPA_LTO_DECL_MERGE); ++ /* Merge global decls. In ltrans mode we read merged cgraph, we do not ++ need to care about resolving symbols again, we only need to replace ++ duplicated declarations read from the callgraph and from function ++ sections. */ ++ if (!flag_ltrans) ++ { ++ lto_symtab_merge_decls (); ++ ++ /* If there were errors during symbol merging bail out, we have no ++ good way to recover here. */ ++ if (seen_error ()) ++ fatal_error (input_location, ++ "errors during merging of translation units"); ++ ++ /* Fixup all decls. */ ++ lto_fixup_decls (all_file_decl_data); ++ } ++ if (tree_with_vars) ++ ggc_free (tree_with_vars); ++ tree_with_vars = NULL; ++ ggc_collect (); ++ ++ timevar_pop (TV_IPA_LTO_DECL_MERGE); ++ /* Each pass will set the appropriate timer. */ ++ ++ if (!quiet_flag) ++ fprintf (stderr, "Reading summaries\n"); ++ ++ /* Read the IPA summary data. */ ++ if (flag_ltrans) ++ ipa_read_optimization_summaries (); ++ else ++ ipa_read_summaries (); ++ ++ for (i = 0; all_file_decl_data[i]; i++) ++ { ++ gcc_assert (all_file_decl_data[i]->symtab_node_encoder); ++ lto_symtab_encoder_delete (all_file_decl_data[i]->symtab_node_encoder); ++ all_file_decl_data[i]->symtab_node_encoder = NULL; ++ lto_free_function_in_decl_state (all_file_decl_data[i]->global_decl_state); ++ all_file_decl_data[i]->global_decl_state = NULL; ++ all_file_decl_data[i]->current_decl_state = NULL; ++ } ++ ++ if (!flag_ltrans) ++ { ++ /* Finally merge the cgraph according to the decl merging decisions. */ ++ timevar_push (TV_IPA_LTO_CGRAPH_MERGE); ++ ++ gcc_assert (!dump_file); ++ dump_file = dump_begin (lto_link_dump_id, NULL); ++ ++ if (dump_file) ++ { ++ fprintf (dump_file, "Before merging:\n"); ++ symtab->dump (dump_file); ++ } ++ lto_symtab_merge_symbols (); ++ /* Removal of unreachable symbols is needed to make verify_symtab to pass; ++ we are still having duplicated comdat groups containing local statics. ++ We could also just remove them while merging. */ ++ symtab->remove_unreachable_nodes (dump_file); ++ ggc_collect (); ++ ++ if (dump_file) ++ dump_end (lto_link_dump_id, dump_file); ++ dump_file = NULL; ++ timevar_pop (TV_IPA_LTO_CGRAPH_MERGE); ++ } ++ symtab->state = IPA_SSA; ++ /* All node removals happening here are useless, because ++ WPA should not stream them. Still always perform remove_unreachable_nodes ++ because we may reshape clone tree, get rid of dead masters of inline ++ clones and remove symbol entries for read-only variables we keep around ++ only to be able to constant fold them. */ ++ if (flag_ltrans) ++ { ++ if (symtab->dump_file) ++ symtab->dump (symtab->dump_file); ++ symtab->remove_unreachable_nodes (symtab->dump_file); ++ } ++ ++ /* Indicate that the cgraph is built and ready. */ ++ symtab->function_flags_ready = true; ++ ++ ggc_free (all_file_decl_data); ++ all_file_decl_data = NULL; ++} ++ ++ ++ ++/* Show various memory usage statistics related to LTO. */ ++void ++print_lto_report_1 (void) ++{ ++ const char *pfx = (flag_lto) ? "LTO" : (flag_wpa) ? "WPA" : "LTRANS"; ++ fprintf (stderr, "%s statistics\n", pfx); ++ ++ fprintf (stderr, "[%s] read %lu SCCs of average size %f\n", ++ pfx, num_sccs_read, total_scc_size / (double)num_sccs_read); ++ fprintf (stderr, "[%s] %lu tree bodies read in total\n", pfx, total_scc_size); ++ if (flag_wpa && tree_scc_hash) ++ { ++ fprintf (stderr, "[%s] tree SCC table: size %ld, %ld elements, " ++ "collision ratio: %f\n", pfx, ++ (long) tree_scc_hash->size (), ++ (long) tree_scc_hash->elements (), ++ tree_scc_hash->collisions ()); ++ hash_table::iterator hiter; ++ tree_scc *scc, *max_scc = NULL; ++ unsigned max_length = 0; ++ FOR_EACH_HASH_TABLE_ELEMENT (*tree_scc_hash, scc, x, hiter) ++ { ++ unsigned length = 0; ++ tree_scc *s = scc; ++ for (; s; s = s->next) ++ length++; ++ if (length > max_length) ++ { ++ max_length = length; ++ max_scc = scc; ++ } ++ } ++ fprintf (stderr, "[%s] tree SCC max chain length %u (size %u)\n", ++ pfx, max_length, max_scc->len); ++ fprintf (stderr, "[%s] Compared %lu SCCs, %lu collisions (%f)\n", pfx, ++ num_scc_compares, num_scc_compare_collisions, ++ num_scc_compare_collisions / (double) num_scc_compares); ++ fprintf (stderr, "[%s] Merged %lu SCCs\n", pfx, num_sccs_merged); ++ fprintf (stderr, "[%s] Merged %lu tree bodies\n", pfx, ++ total_scc_size_merged); ++ fprintf (stderr, "[%s] Merged %lu types\n", pfx, num_merged_types); ++ fprintf (stderr, "[%s] %lu types prevailed (%lu associated trees)\n", ++ pfx, num_prevailing_types, num_type_scc_trees); ++ fprintf (stderr, "[%s] GIMPLE canonical type table: size %ld, " ++ "%ld elements, %ld searches, %ld collisions (ratio: %f)\n", pfx, ++ (long) htab_size (gimple_canonical_types), ++ (long) htab_elements (gimple_canonical_types), ++ (long) gimple_canonical_types->searches, ++ (long) gimple_canonical_types->collisions, ++ htab_collisions (gimple_canonical_types)); ++ fprintf (stderr, "[%s] GIMPLE canonical type pointer-map: " ++ "%lu elements, %ld searches\n", pfx, ++ num_canonical_type_hash_entries, ++ num_canonical_type_hash_queries); ++ } ++ ++ print_lto_report (pfx); ++} ++ ++GTY(()) tree lto_eh_personality_decl; ++ ++/* Return the LTO personality function decl. */ ++ ++tree ++lto_eh_personality (void) ++{ ++ if (!lto_eh_personality_decl) ++ { ++ /* Use the first personality DECL for our personality if we don't ++ support multiple ones. This ensures that we don't artificially ++ create the need for them in a single-language program. */ ++ if (first_personality_decl && !dwarf2out_do_cfi_asm ()) ++ lto_eh_personality_decl = first_personality_decl; ++ else ++ lto_eh_personality_decl = lhd_gcc_personality (); ++ } ++ ++ return lto_eh_personality_decl; ++} ++ ++/* Set the process name based on the LTO mode. */ ++ ++static void ++lto_process_name (void) ++{ ++ if (flag_lto) ++ setproctitle (flag_incremental_link == INCREMENTAL_LINK_LTO ++ ? "lto1-inclink" : "lto1-lto"); ++ if (flag_wpa) ++ setproctitle ("lto1-wpa"); ++ if (flag_ltrans) ++ setproctitle ("lto1-ltrans"); ++} ++ ++ ++/* Initialize the LTO front end. */ ++ ++void ++lto_fe_init (void) ++{ ++ lto_process_name (); ++ lto_streamer_hooks_init (); ++ lto_reader_init (); ++ lto_set_in_hooks (NULL, get_section_data, free_section_data); ++ memset (<o_stats, 0, sizeof (lto_stats)); ++ bitmap_obstack_initialize (NULL); ++ gimple_register_cfg_hooks (); ++#ifndef ACCEL_COMPILER ++ unsigned char *table ++ = ggc_vec_alloc (MAX_MACHINE_MODE); ++ for (int m = 0; m < MAX_MACHINE_MODE; m++) ++ table[m] = m; ++ lto_mode_identity_table = table; ++#endif ++} ++ ++#include "gt-lto-lto-common.h" +diff --git a/gcc/lto/lto-common.h b/gcc/lto/lto-common.h +new file mode 100644 +index 000000000..b1209a3a3 +--- /dev/null ++++ b/gcc/lto/lto-common.h +@@ -0,0 +1,33 @@ ++/* LTO common functions between lto.c and lto-dump.c header file. ++ Copyright (C) 2018 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify it under ++the terms of the GNU General Public License as published by the Free ++Software Foundation; either version 3, or (at your option) any later ++version. ++ ++GCC is distributed in the hope that it will be useful, but WITHOUT ANY ++WARRANTY; without even the implied warranty of MERCHANTABILITY or ++FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++#ifndef LTO_COMMON_H ++#define LTO_COMMON_H ++ ++void lto_fe_init (void); ++void read_cgraph_and_symbols (unsigned, const char **); ++void print_lto_report_1 (void); ++ ++extern tree lto_eh_personality_decl; ++extern GTY(()) vec *tree_with_vars; ++extern const unsigned char *lto_mode_identity_table; ++extern tree first_personality_decl; ++ ++#endif ++ +diff --git a/gcc/lto/lto-lang.c b/gcc/lto/lto-lang.c +index 4ef228fcb..1d35db11e 100644 +--- a/gcc/lto/lto-lang.c ++++ b/gcc/lto/lto-lang.c +@@ -34,6 +34,7 @@ along with GCC; see the file COPYING3. If not see + #include "debug.h" + #include "lto-tree.h" + #include "lto.h" ++#include "lto-common.h" + #include "stringpool.h" + #include "attribs.h" + +diff --git a/gcc/lto/lto-symtab.c b/gcc/lto/lto-symtab.c +index 63a633302..2fd5b1e8f 100644 +--- a/gcc/lto/lto-symtab.c ++++ b/gcc/lto/lto-symtab.c +@@ -556,7 +556,8 @@ lto_symtab_merge_p (tree prevailing, tree decl) + } + if (fndecl_built_in_p (prevailing) + && (DECL_BUILT_IN_CLASS (prevailing) != DECL_BUILT_IN_CLASS (decl) +- || DECL_FUNCTION_CODE (prevailing) != DECL_FUNCTION_CODE (decl))) ++ || (DECL_UNCHECKED_FUNCTION_CODE (prevailing) ++ != DECL_UNCHECKED_FUNCTION_CODE (decl)))) + { + if (dump_file) + fprintf (dump_file, "Not merging decls; " +diff --git a/gcc/lto/lto.c b/gcc/lto/lto.c +index 4db156fdf..c44e034a2 100644 +--- a/gcc/lto/lto.c ++++ b/gcc/lto/lto.c +@@ -38,7 +38,6 @@ along with GCC; see the file COPYING3. If not see + #include "symbol-summary.h" + #include "tree-vrp.h" + #include "ipa-prop.h" +-#include "common.h" + #include "debug.h" + #include "lto.h" + #include "lto-section-names.h" +@@ -55,122 +54,12 @@ along with GCC; see the file COPYING3. If not see + #include "fold-const.h" + #include "attribs.h" + #include "builtins.h" ++#include "lto-common.h" + + + /* Number of parallel tasks to run, -1 if we want to use GNU Make jobserver. */ + static int lto_parallelism; + +-static GTY(()) tree first_personality_decl; +- +-static GTY(()) const unsigned char *lto_mode_identity_table; +- +-/* Returns a hash code for P. */ +- +-static hashval_t +-hash_name (const void *p) +-{ +- const struct lto_section_slot *ds = (const struct lto_section_slot *) p; +- return (hashval_t) htab_hash_string (ds->name); +-} +- +- +-/* Returns nonzero if P1 and P2 are equal. */ +- +-static int +-eq_name (const void *p1, const void *p2) +-{ +- const struct lto_section_slot *s1 = +- (const struct lto_section_slot *) p1; +- const struct lto_section_slot *s2 = +- (const struct lto_section_slot *) p2; +- +- return strcmp (s1->name, s2->name) == 0; +-} +- +-/* Free lto_section_slot */ +- +-static void +-free_with_string (void *arg) +-{ +- struct lto_section_slot *s = (struct lto_section_slot *)arg; +- +- free (CONST_CAST (char *, s->name)); +- free (arg); +-} +- +-/* Create section hash table */ +- +-htab_t +-lto_obj_create_section_hash_table (void) +-{ +- return htab_create (37, hash_name, eq_name, free_with_string); +-} +- +-/* Delete an allocated integer KEY in the splay tree. */ +- +-static void +-lto_splay_tree_delete_id (splay_tree_key key) +-{ +- free ((void *) key); +-} +- +-/* Compare splay tree node ids A and B. */ +- +-static int +-lto_splay_tree_compare_ids (splay_tree_key a, splay_tree_key b) +-{ +- unsigned HOST_WIDE_INT ai; +- unsigned HOST_WIDE_INT bi; +- +- ai = *(unsigned HOST_WIDE_INT *) a; +- bi = *(unsigned HOST_WIDE_INT *) b; +- +- if (ai < bi) +- return -1; +- else if (ai > bi) +- return 1; +- return 0; +-} +- +-/* Look up splay tree node by ID in splay tree T. */ +- +-static splay_tree_node +-lto_splay_tree_lookup (splay_tree t, unsigned HOST_WIDE_INT id) +-{ +- return splay_tree_lookup (t, (splay_tree_key) &id); +-} +- +-/* Check if KEY has ID. */ +- +-static bool +-lto_splay_tree_id_equal_p (splay_tree_key key, unsigned HOST_WIDE_INT id) +-{ +- return *(unsigned HOST_WIDE_INT *) key == id; +-} +- +-/* Insert a splay tree node into tree T with ID as key and FILE_DATA as value. +- The ID is allocated separately because we need HOST_WIDE_INTs which may +- be wider than a splay_tree_key. */ +- +-static void +-lto_splay_tree_insert (splay_tree t, unsigned HOST_WIDE_INT id, +- struct lto_file_decl_data *file_data) +-{ +- unsigned HOST_WIDE_INT *idp = XCNEW (unsigned HOST_WIDE_INT); +- *idp = id; +- splay_tree_insert (t, (splay_tree_key) idp, (splay_tree_value) file_data); +-} +- +-/* Create a splay tree. */ +- +-static splay_tree +-lto_splay_tree_new (void) +-{ +- return splay_tree_new (lto_splay_tree_compare_ids, +- lto_splay_tree_delete_id, +- NULL); +-} +- + /* Return true when NODE has a clone that is analyzed (i.e. we need + to load its body even if the node itself is not needed). */ + +@@ -224,2083 +113,45 @@ lto_materialize_function (struct cgraph_node *node) + rest_of_decl_compilation (decl, 1, 0); + } + +- +-/* Decode the content of memory pointed to by DATA in the in decl +- state object STATE. DATA_IN points to a data_in structure for +- decoding. Return the address after the decoded object in the +- input. */ +- +-static const uint32_t * +-lto_read_in_decl_state (struct data_in *data_in, const uint32_t *data, +- struct lto_in_decl_state *state) +-{ +- uint32_t ix; +- tree decl; +- uint32_t i, j; +- +- ix = *data++; +- state->compressed = ix & 1; +- ix /= 2; +- decl = streamer_tree_cache_get_tree (data_in->reader_cache, ix); +- if (!VAR_OR_FUNCTION_DECL_P (decl)) +- { +- gcc_assert (decl == void_type_node); +- decl = NULL_TREE; +- } +- state->fn_decl = decl; +- +- for (i = 0; i < LTO_N_DECL_STREAMS; i++) +- { +- uint32_t size = *data++; +- vec *decls = NULL; +- vec_alloc (decls, size); +- +- for (j = 0; j < size; j++) +- vec_safe_push (decls, +- streamer_tree_cache_get_tree (data_in->reader_cache, +- data[j])); +- +- state->streams[i] = decls; +- data += size; +- } +- +- return data; +-} +- +- +-/* Global canonical type table. */ +-static htab_t gimple_canonical_types; +-static hash_map *canonical_type_hash_cache; +-static unsigned long num_canonical_type_hash_entries; +-static unsigned long num_canonical_type_hash_queries; +- +-static void iterative_hash_canonical_type (tree type, inchash::hash &hstate); +-static hashval_t gimple_canonical_type_hash (const void *p); +-static void gimple_register_canonical_type_1 (tree t, hashval_t hash); +- +-/* Returning a hash value for gimple type TYPE. +- +- The hash value returned is equal for types considered compatible +- by gimple_canonical_types_compatible_p. */ +- +-static hashval_t +-hash_canonical_type (tree type) +-{ +- inchash::hash hstate; +- enum tree_code code; +- +- /* We compute alias sets only for types that needs them. +- Be sure we do not recurse to something else as we cannot hash incomplete +- types in a way they would have same hash value as compatible complete +- types. */ +- gcc_checking_assert (type_with_alias_set_p (type)); +- +- /* Combine a few common features of types so that types are grouped into +- smaller sets; when searching for existing matching types to merge, +- only existing types having the same features as the new type will be +- checked. */ +- code = tree_code_for_canonical_type_merging (TREE_CODE (type)); +- hstate.add_int (code); +- hstate.add_int (TYPE_MODE (type)); +- +- /* Incorporate common features of numerical types. */ +- if (INTEGRAL_TYPE_P (type) +- || SCALAR_FLOAT_TYPE_P (type) +- || FIXED_POINT_TYPE_P (type) +- || TREE_CODE (type) == OFFSET_TYPE +- || POINTER_TYPE_P (type)) +- { +- hstate.add_int (TYPE_PRECISION (type)); +- if (!type_with_interoperable_signedness (type)) +- hstate.add_int (TYPE_UNSIGNED (type)); +- } +- +- if (VECTOR_TYPE_P (type)) +- { +- hstate.add_poly_int (TYPE_VECTOR_SUBPARTS (type)); +- hstate.add_int (TYPE_UNSIGNED (type)); +- } +- +- if (TREE_CODE (type) == COMPLEX_TYPE) +- hstate.add_int (TYPE_UNSIGNED (type)); +- +- /* Fortran's C_SIGNED_CHAR is !TYPE_STRING_FLAG but needs to be +- interoperable with "signed char". Unless all frontends are revisited to +- agree on these types, we must ignore the flag completely. */ +- +- /* Fortran standard define C_PTR type that is compatible with every +- C pointer. For this reason we need to glob all pointers into one. +- Still pointers in different address spaces are not compatible. */ +- if (POINTER_TYPE_P (type)) +- hstate.add_int (TYPE_ADDR_SPACE (TREE_TYPE (type))); +- +- /* For array types hash the domain bounds and the string flag. */ +- if (TREE_CODE (type) == ARRAY_TYPE && TYPE_DOMAIN (type)) +- { +- hstate.add_int (TYPE_STRING_FLAG (type)); +- /* OMP lowering can introduce error_mark_node in place of +- random local decls in types. */ +- if (TYPE_MIN_VALUE (TYPE_DOMAIN (type)) != error_mark_node) +- inchash::add_expr (TYPE_MIN_VALUE (TYPE_DOMAIN (type)), hstate); +- if (TYPE_MAX_VALUE (TYPE_DOMAIN (type)) != error_mark_node) +- inchash::add_expr (TYPE_MAX_VALUE (TYPE_DOMAIN (type)), hstate); +- } +- +- /* Recurse for aggregates with a single element type. */ +- if (TREE_CODE (type) == ARRAY_TYPE +- || TREE_CODE (type) == COMPLEX_TYPE +- || TREE_CODE (type) == VECTOR_TYPE) +- iterative_hash_canonical_type (TREE_TYPE (type), hstate); +- +- /* Incorporate function return and argument types. */ +- if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE) +- { +- unsigned na; +- tree p; +- +- iterative_hash_canonical_type (TREE_TYPE (type), hstate); +- +- for (p = TYPE_ARG_TYPES (type), na = 0; p; p = TREE_CHAIN (p)) +- { +- iterative_hash_canonical_type (TREE_VALUE (p), hstate); +- na++; +- } +- +- hstate.add_int (na); +- } +- +- if (RECORD_OR_UNION_TYPE_P (type)) +- { +- unsigned nf; +- tree f; +- +- for (f = TYPE_FIELDS (type), nf = 0; f; f = TREE_CHAIN (f)) +- if (TREE_CODE (f) == FIELD_DECL +- && (! DECL_SIZE (f) +- || ! integer_zerop (DECL_SIZE (f)))) +- { +- iterative_hash_canonical_type (TREE_TYPE (f), hstate); +- nf++; +- } +- +- hstate.add_int (nf); +- } +- +- return hstate.end(); +-} +- +-/* Returning a hash value for gimple type TYPE combined with VAL. */ +- +-static void +-iterative_hash_canonical_type (tree type, inchash::hash &hstate) +-{ +- hashval_t v; +- +- /* All type variants have same TYPE_CANONICAL. */ +- type = TYPE_MAIN_VARIANT (type); +- +- if (!canonical_type_used_p (type)) +- v = hash_canonical_type (type); +- /* An already processed type. */ +- else if (TYPE_CANONICAL (type)) +- { +- type = TYPE_CANONICAL (type); +- v = gimple_canonical_type_hash (type); +- } +- else +- { +- /* Canonical types should not be able to form SCCs by design, this +- recursion is just because we do not register canonical types in +- optimal order. To avoid quadratic behavior also register the +- type here. */ +- v = hash_canonical_type (type); +- gimple_register_canonical_type_1 (type, v); +- } +- hstate.add_int (v); +-} +- +-/* Returns the hash for a canonical type P. */ +- +-static hashval_t +-gimple_canonical_type_hash (const void *p) +-{ +- num_canonical_type_hash_queries++; +- hashval_t *slot = canonical_type_hash_cache->get ((const_tree) p); +- gcc_assert (slot != NULL); +- return *slot; +-} +- +- +- +-/* Returns nonzero if P1 and P2 are equal. */ +- +-static int +-gimple_canonical_type_eq (const void *p1, const void *p2) +-{ +- const_tree t1 = (const_tree) p1; +- const_tree t2 = (const_tree) p2; +- return gimple_canonical_types_compatible_p (CONST_CAST_TREE (t1), +- CONST_CAST_TREE (t2)); +-} +- +-/* Main worker for gimple_register_canonical_type. */ +- +-static void +-gimple_register_canonical_type_1 (tree t, hashval_t hash) +-{ +- void **slot; +- +- gcc_checking_assert (TYPE_P (t) && !TYPE_CANONICAL (t) +- && type_with_alias_set_p (t) +- && canonical_type_used_p (t)); +- +- slot = htab_find_slot_with_hash (gimple_canonical_types, t, hash, INSERT); +- if (*slot) +- { +- tree new_type = (tree)(*slot); +- gcc_checking_assert (new_type != t); +- TYPE_CANONICAL (t) = new_type; +- } +- else +- { +- TYPE_CANONICAL (t) = t; +- *slot = (void *) t; +- /* Cache the just computed hash value. */ +- num_canonical_type_hash_entries++; +- bool existed_p = canonical_type_hash_cache->put (t, hash); +- gcc_assert (!existed_p); +- } +-} +- +-/* Register type T in the global type table gimple_types and set +- TYPE_CANONICAL of T accordingly. +- This is used by LTO to merge structurally equivalent types for +- type-based aliasing purposes across different TUs and languages. +- +- ??? This merging does not exactly match how the tree.c middle-end +- functions will assign TYPE_CANONICAL when new types are created +- during optimization (which at least happens for pointer and array +- types). */ +- +-static void +-gimple_register_canonical_type (tree t) +-{ +- if (TYPE_CANONICAL (t) || !type_with_alias_set_p (t) +- || !canonical_type_used_p (t)) +- return; +- +- /* Canonical types are same among all complete variants. */ +- if (TYPE_CANONICAL (TYPE_MAIN_VARIANT (t))) +- TYPE_CANONICAL (t) = TYPE_CANONICAL (TYPE_MAIN_VARIANT (t)); +- else +- { +- gimple_register_canonical_type_1 (TYPE_MAIN_VARIANT (t), +- hash_canonical_type (TYPE_MAIN_VARIANT (t))); +- TYPE_CANONICAL (t) = TYPE_CANONICAL (TYPE_MAIN_VARIANT (t)); +- } +-} +- +-/* Re-compute TYPE_CANONICAL for NODE and related types. */ ++/* Materialize all the bodies for all the nodes in the callgraph. */ + + static void +-lto_register_canonical_types (tree node, bool first_p) +-{ +- if (!node +- || !TYPE_P (node)) +- return; +- +- if (first_p) +- TYPE_CANONICAL (node) = NULL_TREE; +- +- if (POINTER_TYPE_P (node) +- || TREE_CODE (node) == COMPLEX_TYPE +- || TREE_CODE (node) == ARRAY_TYPE) +- lto_register_canonical_types (TREE_TYPE (node), first_p); +- +- if (!first_p) +- gimple_register_canonical_type (node); +-} +- +- +-/* Remember trees that contains references to declarations. */ +-static GTY(()) vec *tree_with_vars; +- +-#define CHECK_VAR(tt) \ +- do \ +- { \ +- if ((tt) && VAR_OR_FUNCTION_DECL_P (tt) \ +- && (TREE_PUBLIC (tt) || DECL_EXTERNAL (tt))) \ +- return true; \ +- } while (0) +- +-#define CHECK_NO_VAR(tt) \ +- gcc_checking_assert (!(tt) || !VAR_OR_FUNCTION_DECL_P (tt)) +- +-/* Check presence of pointers to decls in fields of a tree_typed T. */ +- +-static inline bool +-mentions_vars_p_typed (tree t) +-{ +- CHECK_NO_VAR (TREE_TYPE (t)); +- return false; +-} +- +-/* Check presence of pointers to decls in fields of a tree_common T. */ +- +-static inline bool +-mentions_vars_p_common (tree t) +-{ +- if (mentions_vars_p_typed (t)) +- return true; +- CHECK_NO_VAR (TREE_CHAIN (t)); +- return false; +-} +- +-/* Check presence of pointers to decls in fields of a decl_minimal T. */ +- +-static inline bool +-mentions_vars_p_decl_minimal (tree t) +-{ +- if (mentions_vars_p_common (t)) +- return true; +- CHECK_NO_VAR (DECL_NAME (t)); +- CHECK_VAR (DECL_CONTEXT (t)); +- return false; +-} +- +-/* Check presence of pointers to decls in fields of a decl_common T. */ +- +-static inline bool +-mentions_vars_p_decl_common (tree t) +-{ +- if (mentions_vars_p_decl_minimal (t)) +- return true; +- CHECK_VAR (DECL_SIZE (t)); +- CHECK_VAR (DECL_SIZE_UNIT (t)); +- CHECK_VAR (DECL_INITIAL (t)); +- CHECK_NO_VAR (DECL_ATTRIBUTES (t)); +- CHECK_VAR (DECL_ABSTRACT_ORIGIN (t)); +- return false; +-} +- +-/* Check presence of pointers to decls in fields of a decl_with_vis T. */ +- +-static inline bool +-mentions_vars_p_decl_with_vis (tree t) +-{ +- if (mentions_vars_p_decl_common (t)) +- return true; +- +- /* Accessor macro has side-effects, use field-name here. */ +- CHECK_NO_VAR (DECL_ASSEMBLER_NAME_RAW (t)); +- return false; +-} +- +-/* Check presence of pointers to decls in fields of a decl_non_common T. */ +- +-static inline bool +-mentions_vars_p_decl_non_common (tree t) +-{ +- if (mentions_vars_p_decl_with_vis (t)) +- return true; +- CHECK_NO_VAR (DECL_RESULT_FLD (t)); +- return false; +-} +- +-/* Check presence of pointers to decls in fields of a decl_non_common T. */ +- +-static bool +-mentions_vars_p_function (tree t) +-{ +- if (mentions_vars_p_decl_non_common (t)) +- return true; +- CHECK_NO_VAR (DECL_ARGUMENTS (t)); +- CHECK_NO_VAR (DECL_VINDEX (t)); +- CHECK_VAR (DECL_FUNCTION_PERSONALITY (t)); +- return false; +-} +- +-/* Check presence of pointers to decls in fields of a field_decl T. */ +- +-static bool +-mentions_vars_p_field_decl (tree t) +-{ +- if (mentions_vars_p_decl_common (t)) +- return true; +- CHECK_VAR (DECL_FIELD_OFFSET (t)); +- CHECK_NO_VAR (DECL_BIT_FIELD_TYPE (t)); +- CHECK_NO_VAR (DECL_QUALIFIER (t)); +- CHECK_NO_VAR (DECL_FIELD_BIT_OFFSET (t)); +- CHECK_NO_VAR (DECL_FCONTEXT (t)); +- return false; +-} +- +-/* Check presence of pointers to decls in fields of a type T. */ +- +-static bool +-mentions_vars_p_type (tree t) +-{ +- if (mentions_vars_p_common (t)) +- return true; +- CHECK_NO_VAR (TYPE_CACHED_VALUES (t)); +- CHECK_VAR (TYPE_SIZE (t)); +- CHECK_VAR (TYPE_SIZE_UNIT (t)); +- CHECK_NO_VAR (TYPE_ATTRIBUTES (t)); +- CHECK_NO_VAR (TYPE_NAME (t)); +- +- CHECK_VAR (TYPE_MIN_VALUE_RAW (t)); +- CHECK_VAR (TYPE_MAX_VALUE_RAW (t)); +- +- /* Accessor is for derived node types only. */ +- CHECK_NO_VAR (TYPE_LANG_SLOT_1 (t)); +- +- CHECK_VAR (TYPE_CONTEXT (t)); +- CHECK_NO_VAR (TYPE_CANONICAL (t)); +- CHECK_NO_VAR (TYPE_MAIN_VARIANT (t)); +- CHECK_NO_VAR (TYPE_NEXT_VARIANT (t)); +- return false; +-} +- +-/* Check presence of pointers to decls in fields of a BINFO T. */ +- +-static bool +-mentions_vars_p_binfo (tree t) +-{ +- unsigned HOST_WIDE_INT i, n; +- +- if (mentions_vars_p_common (t)) +- return true; +- CHECK_VAR (BINFO_VTABLE (t)); +- CHECK_NO_VAR (BINFO_OFFSET (t)); +- CHECK_NO_VAR (BINFO_VIRTUALS (t)); +- CHECK_NO_VAR (BINFO_VPTR_FIELD (t)); +- n = vec_safe_length (BINFO_BASE_ACCESSES (t)); +- for (i = 0; i < n; i++) +- CHECK_NO_VAR (BINFO_BASE_ACCESS (t, i)); +- /* Do not walk BINFO_INHERITANCE_CHAIN, BINFO_SUBVTT_INDEX +- and BINFO_VPTR_INDEX; these are used by C++ FE only. */ +- n = BINFO_N_BASE_BINFOS (t); +- for (i = 0; i < n; i++) +- CHECK_NO_VAR (BINFO_BASE_BINFO (t, i)); +- return false; +-} +- +-/* Check presence of pointers to decls in fields of a CONSTRUCTOR T. */ +- +-static bool +-mentions_vars_p_constructor (tree t) +-{ +- unsigned HOST_WIDE_INT idx; +- constructor_elt *ce; +- +- if (mentions_vars_p_typed (t)) +- return true; +- +- for (idx = 0; vec_safe_iterate (CONSTRUCTOR_ELTS (t), idx, &ce); idx++) +- { +- CHECK_NO_VAR (ce->index); +- CHECK_VAR (ce->value); +- } +- return false; +-} +- +-/* Check presence of pointers to decls in fields of an expression tree T. */ +- +-static bool +-mentions_vars_p_expr (tree t) +-{ +- int i; +- if (mentions_vars_p_typed (t)) +- return true; +- for (i = TREE_OPERAND_LENGTH (t) - 1; i >= 0; --i) +- CHECK_VAR (TREE_OPERAND (t, i)); +- return false; +-} +- +-/* Check presence of pointers to decls in fields of an OMP_CLAUSE T. */ +- +-static bool +-mentions_vars_p_omp_clause (tree t) +-{ +- int i; +- if (mentions_vars_p_common (t)) +- return true; +- for (i = omp_clause_num_ops[OMP_CLAUSE_CODE (t)] - 1; i >= 0; --i) +- CHECK_VAR (OMP_CLAUSE_OPERAND (t, i)); +- return false; +-} +- +-/* Check presence of pointers to decls that needs later fixup in T. */ +- +-static bool +-mentions_vars_p (tree t) ++materialize_cgraph (void) + { +- switch (TREE_CODE (t)) +- { +- case IDENTIFIER_NODE: +- break; +- +- case TREE_LIST: +- CHECK_VAR (TREE_VALUE (t)); +- CHECK_VAR (TREE_PURPOSE (t)); +- CHECK_NO_VAR (TREE_CHAIN (t)); +- break; +- +- case FIELD_DECL: +- return mentions_vars_p_field_decl (t); +- +- case LABEL_DECL: +- case CONST_DECL: +- case PARM_DECL: +- case RESULT_DECL: +- case IMPORTED_DECL: +- case NAMESPACE_DECL: +- case NAMELIST_DECL: +- return mentions_vars_p_decl_common (t); +- +- case VAR_DECL: +- return mentions_vars_p_decl_with_vis (t); +- +- case TYPE_DECL: +- return mentions_vars_p_decl_non_common (t); +- +- case FUNCTION_DECL: +- return mentions_vars_p_function (t); +- +- case TREE_BINFO: +- return mentions_vars_p_binfo (t); +- +- case PLACEHOLDER_EXPR: +- return mentions_vars_p_common (t); +- +- case BLOCK: +- case TRANSLATION_UNIT_DECL: +- case OPTIMIZATION_NODE: +- case TARGET_OPTION_NODE: +- break; +- +- case CONSTRUCTOR: +- return mentions_vars_p_constructor (t); +- +- case OMP_CLAUSE: +- return mentions_vars_p_omp_clause (t); +- +- default: +- if (TYPE_P (t)) +- { +- if (mentions_vars_p_type (t)) +- return true; +- } +- else if (EXPR_P (t)) +- { +- if (mentions_vars_p_expr (t)) +- return true; +- } +- else if (CONSTANT_CLASS_P (t)) +- CHECK_NO_VAR (TREE_TYPE (t)); +- else +- gcc_unreachable (); +- } +- return false; +-} +- +- +-/* Return the resolution for the decl with index INDEX from DATA_IN. */ +- +-static enum ld_plugin_symbol_resolution +-get_resolution (struct data_in *data_in, unsigned index) +-{ +- if (data_in->globals_resolution.exists ()) +- { +- ld_plugin_symbol_resolution_t ret; +- /* We can have references to not emitted functions in +- DECL_FUNCTION_PERSONALITY at least. So we can and have +- to indeed return LDPR_UNKNOWN in some cases. */ +- if (data_in->globals_resolution.length () <= index) +- return LDPR_UNKNOWN; +- ret = data_in->globals_resolution[index]; +- return ret; +- } +- else +- /* Delay resolution finding until decl merging. */ +- return LDPR_UNKNOWN; +-} +- +-/* We need to record resolutions until symbol table is read. */ +-static void +-register_resolution (struct lto_file_decl_data *file_data, tree decl, +- enum ld_plugin_symbol_resolution resolution) +-{ +- bool existed; +- if (resolution == LDPR_UNKNOWN) +- return; +- if (!file_data->resolution_map) +- file_data->resolution_map +- = new hash_map; +- ld_plugin_symbol_resolution_t &res +- = file_data->resolution_map->get_or_insert (decl, &existed); +- if (!existed +- || resolution == LDPR_PREVAILING_DEF_IRONLY +- || resolution == LDPR_PREVAILING_DEF +- || resolution == LDPR_PREVAILING_DEF_IRONLY_EXP) +- res = resolution; +-} +- +-/* Register DECL with the global symbol table and change its +- name if necessary to avoid name clashes for static globals across +- different files. */ +- +-static void +-lto_register_var_decl_in_symtab (struct data_in *data_in, tree decl, +- unsigned ix) +-{ +- tree context; +- +- /* Variable has file scope, not local. */ +- if (!TREE_PUBLIC (decl) +- && !((context = decl_function_context (decl)) +- && auto_var_in_fn_p (decl, context))) +- rest_of_decl_compilation (decl, 1, 0); +- +- /* If this variable has already been declared, queue the +- declaration for merging. */ +- if (TREE_PUBLIC (decl)) +- register_resolution (data_in->file_data, +- decl, get_resolution (data_in, ix)); +-} +- +- +-/* Register DECL with the global symbol table and change its +- name if necessary to avoid name clashes for static globals across +- different files. DATA_IN contains descriptors and tables for the +- file being read. */ +- +-static void +-lto_register_function_decl_in_symtab (struct data_in *data_in, tree decl, +- unsigned ix) +-{ +- /* If this variable has already been declared, queue the +- declaration for merging. */ +- if (TREE_PUBLIC (decl) && !DECL_ABSTRACT_P (decl)) +- register_resolution (data_in->file_data, +- decl, get_resolution (data_in, ix)); +-} +- +-/* Check if T is a decl and needs register its resolution info. */ +- +-static void +-lto_maybe_register_decl (struct data_in *data_in, tree t, unsigned ix) +-{ +- if (TREE_CODE (t) == VAR_DECL) +- lto_register_var_decl_in_symtab (data_in, t, ix); +- else if (TREE_CODE (t) == FUNCTION_DECL +- && !fndecl_built_in_p (t)) +- lto_register_function_decl_in_symtab (data_in, t, ix); +-} +- +- +-/* For the type T re-materialize it in the type variant list and +- the pointer/reference-to chains. */ +- +-static void +-lto_fixup_prevailing_type (tree t) +-{ +- /* The following re-creates proper variant lists while fixing up +- the variant leaders. We do not stream TYPE_NEXT_VARIANT so the +- variant list state before fixup is broken. */ +- +- /* If we are not our own variant leader link us into our new leaders +- variant list. */ +- if (TYPE_MAIN_VARIANT (t) != t) +- { +- tree mv = TYPE_MAIN_VARIANT (t); +- TYPE_NEXT_VARIANT (t) = TYPE_NEXT_VARIANT (mv); +- TYPE_NEXT_VARIANT (mv) = t; +- } +- +- /* The following reconstructs the pointer chains +- of the new pointed-to type if we are a main variant. We do +- not stream those so they are broken before fixup. */ +- if (TREE_CODE (t) == POINTER_TYPE +- && TYPE_MAIN_VARIANT (t) == t) +- { +- TYPE_NEXT_PTR_TO (t) = TYPE_POINTER_TO (TREE_TYPE (t)); +- TYPE_POINTER_TO (TREE_TYPE (t)) = t; +- } +- else if (TREE_CODE (t) == REFERENCE_TYPE +- && TYPE_MAIN_VARIANT (t) == t) +- { +- TYPE_NEXT_REF_TO (t) = TYPE_REFERENCE_TO (TREE_TYPE (t)); +- TYPE_REFERENCE_TO (TREE_TYPE (t)) = t; +- } +-} +- +- +-/* We keep prevailing tree SCCs in a hashtable with manual collision +- handling (in case all hashes compare the same) and keep the colliding +- entries in the tree_scc->next chain. */ +- +-struct tree_scc +-{ +- tree_scc *next; +- /* Hash of the whole SCC. */ +- hashval_t hash; +- /* Number of trees in the SCC. */ +- unsigned len; +- /* Number of possible entries into the SCC (tree nodes [0..entry_len-1] +- which share the same individual tree hash). */ +- unsigned entry_len; +- /* The members of the SCC. +- We only need to remember the first entry node candidate for prevailing +- SCCs (but of course have access to all entries for SCCs we are +- processing). +- ??? For prevailing SCCs we really only need hash and the first +- entry candidate, but that's too awkward to implement. */ +- tree entries[1]; +-}; +- +-struct tree_scc_hasher : nofree_ptr_hash +-{ +- static inline hashval_t hash (const tree_scc *); +- static inline bool equal (const tree_scc *, const tree_scc *); +-}; +- +-hashval_t +-tree_scc_hasher::hash (const tree_scc *scc) +-{ +- return scc->hash; +-} +- +-bool +-tree_scc_hasher::equal (const tree_scc *scc1, const tree_scc *scc2) +-{ +- if (scc1->hash != scc2->hash +- || scc1->len != scc2->len +- || scc1->entry_len != scc2->entry_len) +- return false; +- return true; +-} +- +-static hash_table *tree_scc_hash; +-static struct obstack tree_scc_hash_obstack; +- +-static unsigned long num_merged_types; +-static unsigned long num_prevailing_types; +-static unsigned long num_type_scc_trees; +-static unsigned long total_scc_size; +-static unsigned long num_sccs_read; +-static unsigned long total_scc_size_merged; +-static unsigned long num_sccs_merged; +-static unsigned long num_scc_compares; +-static unsigned long num_scc_compare_collisions; +- +- +-/* Compare the two entries T1 and T2 of two SCCs that are possibly equal, +- recursing through in-SCC tree edges. Returns true if the SCCs entered +- through T1 and T2 are equal and fills in *MAP with the pairs of +- SCC entries we visited, starting with (*MAP)[0] = T1 and (*MAP)[1] = T2. */ +- +-static bool +-compare_tree_sccs_1 (tree t1, tree t2, tree **map) +-{ +- enum tree_code code; +- +- /* Mark already visited nodes. */ +- TREE_ASM_WRITTEN (t2) = 1; +- +- /* Push the pair onto map. */ +- (*map)[0] = t1; +- (*map)[1] = t2; +- *map = *map + 2; +- +- /* Compare value-fields. */ +-#define compare_values(X) \ +- do { \ +- if (X(t1) != X(t2)) \ +- return false; \ +- } while (0) +- +- compare_values (TREE_CODE); +- code = TREE_CODE (t1); +- +- if (!TYPE_P (t1)) +- { +- compare_values (TREE_SIDE_EFFECTS); +- compare_values (TREE_CONSTANT); +- compare_values (TREE_READONLY); +- compare_values (TREE_PUBLIC); +- } +- compare_values (TREE_ADDRESSABLE); +- compare_values (TREE_THIS_VOLATILE); +- if (DECL_P (t1)) +- compare_values (DECL_UNSIGNED); +- else if (TYPE_P (t1)) +- compare_values (TYPE_UNSIGNED); +- if (TYPE_P (t1)) +- compare_values (TYPE_ARTIFICIAL); +- else +- compare_values (TREE_NO_WARNING); +- compare_values (TREE_NOTHROW); +- compare_values (TREE_STATIC); +- if (code != TREE_BINFO) +- compare_values (TREE_PRIVATE); +- compare_values (TREE_PROTECTED); +- compare_values (TREE_DEPRECATED); +- if (TYPE_P (t1)) +- { +- if (AGGREGATE_TYPE_P (t1)) +- compare_values (TYPE_REVERSE_STORAGE_ORDER); +- else +- compare_values (TYPE_SATURATING); +- compare_values (TYPE_ADDR_SPACE); +- } +- else if (code == SSA_NAME) +- compare_values (SSA_NAME_IS_DEFAULT_DEF); +- +- if (CODE_CONTAINS_STRUCT (code, TS_INT_CST)) +- { +- if (wi::to_wide (t1) != wi::to_wide (t2)) +- return false; +- } +- +- if (CODE_CONTAINS_STRUCT (code, TS_REAL_CST)) +- { +- /* ??? No suitable compare routine available. */ +- REAL_VALUE_TYPE r1 = TREE_REAL_CST (t1); +- REAL_VALUE_TYPE r2 = TREE_REAL_CST (t2); +- if (r1.cl != r2.cl +- || r1.decimal != r2.decimal +- || r1.sign != r2.sign +- || r1.signalling != r2.signalling +- || r1.canonical != r2.canonical +- || r1.uexp != r2.uexp) +- return false; +- for (unsigned i = 0; i < SIGSZ; ++i) +- if (r1.sig[i] != r2.sig[i]) +- return false; +- } +- +- if (CODE_CONTAINS_STRUCT (code, TS_FIXED_CST)) +- if (!fixed_compare (EQ_EXPR, +- TREE_FIXED_CST_PTR (t1), TREE_FIXED_CST_PTR (t2))) +- return false; +- +- if (CODE_CONTAINS_STRUCT (code, TS_VECTOR)) +- { +- compare_values (VECTOR_CST_LOG2_NPATTERNS); +- compare_values (VECTOR_CST_NELTS_PER_PATTERN); +- } +- +- if (CODE_CONTAINS_STRUCT (code, TS_DECL_COMMON)) +- { +- compare_values (DECL_MODE); +- compare_values (DECL_NONLOCAL); +- compare_values (DECL_VIRTUAL_P); +- compare_values (DECL_IGNORED_P); +- compare_values (DECL_ABSTRACT_P); +- compare_values (DECL_ARTIFICIAL); +- compare_values (DECL_USER_ALIGN); +- compare_values (DECL_PRESERVE_P); +- compare_values (DECL_EXTERNAL); +- compare_values (DECL_GIMPLE_REG_P); +- compare_values (DECL_ALIGN); +- if (code == LABEL_DECL) +- { +- compare_values (EH_LANDING_PAD_NR); +- compare_values (LABEL_DECL_UID); +- } +- else if (code == FIELD_DECL) +- { +- compare_values (DECL_PACKED); +- compare_values (DECL_NONADDRESSABLE_P); +- compare_values (DECL_PADDING_P); +- compare_values (DECL_OFFSET_ALIGN); +- } +- else if (code == VAR_DECL) +- { +- compare_values (DECL_HAS_DEBUG_EXPR_P); +- compare_values (DECL_NONLOCAL_FRAME); +- } +- if (code == RESULT_DECL +- || code == PARM_DECL +- || code == VAR_DECL) +- { +- compare_values (DECL_BY_REFERENCE); +- if (code == VAR_DECL +- || code == PARM_DECL) +- compare_values (DECL_HAS_VALUE_EXPR_P); +- } +- } +- +- if (CODE_CONTAINS_STRUCT (code, TS_DECL_WRTL)) +- compare_values (DECL_REGISTER); +- +- if (CODE_CONTAINS_STRUCT (code, TS_DECL_WITH_VIS)) +- { +- compare_values (DECL_COMMON); +- compare_values (DECL_DLLIMPORT_P); +- compare_values (DECL_WEAK); +- compare_values (DECL_SEEN_IN_BIND_EXPR_P); +- compare_values (DECL_COMDAT); +- compare_values (DECL_VISIBILITY); +- compare_values (DECL_VISIBILITY_SPECIFIED); +- if (code == VAR_DECL) +- { +- compare_values (DECL_HARD_REGISTER); +- /* DECL_IN_TEXT_SECTION is set during final asm output only. */ +- compare_values (DECL_IN_CONSTANT_POOL); +- } +- } +- +- if (CODE_CONTAINS_STRUCT (code, TS_FUNCTION_DECL)) +- { +- compare_values (DECL_BUILT_IN_CLASS); +- compare_values (DECL_STATIC_CONSTRUCTOR); +- compare_values (DECL_STATIC_DESTRUCTOR); +- compare_values (DECL_UNINLINABLE); +- compare_values (DECL_POSSIBLY_INLINED); +- compare_values (DECL_IS_NOVOPS); +- compare_values (DECL_IS_RETURNS_TWICE); +- compare_values (DECL_IS_MALLOC); +- compare_values (DECL_IS_OPERATOR_NEW); +- compare_values (DECL_DECLARED_INLINE_P); +- compare_values (DECL_STATIC_CHAIN); +- compare_values (DECL_NO_INLINE_WARNING_P); +- compare_values (DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT); +- compare_values (DECL_NO_LIMIT_STACK); +- compare_values (DECL_DISREGARD_INLINE_LIMITS); +- compare_values (DECL_PURE_P); +- compare_values (DECL_LOOPING_CONST_OR_PURE_P); +- compare_values (DECL_FINAL_P); +- compare_values (DECL_CXX_CONSTRUCTOR_P); +- compare_values (DECL_CXX_DESTRUCTOR_P); +- if (DECL_BUILT_IN_CLASS (t1) != NOT_BUILT_IN) +- compare_values (DECL_FUNCTION_CODE); +- } +- +- if (CODE_CONTAINS_STRUCT (code, TS_TYPE_COMMON)) +- { +- compare_values (TYPE_MODE); +- compare_values (TYPE_STRING_FLAG); +- compare_values (TYPE_NEEDS_CONSTRUCTING); +- if (RECORD_OR_UNION_TYPE_P (t1)) +- { +- compare_values (TYPE_TRANSPARENT_AGGR); +- compare_values (TYPE_FINAL_P); +- } +- else if (code == ARRAY_TYPE) +- compare_values (TYPE_NONALIASED_COMPONENT); +- if (AGGREGATE_TYPE_P (t1)) +- compare_values (TYPE_TYPELESS_STORAGE); +- compare_values (TYPE_EMPTY_P); +- compare_values (TYPE_PACKED); +- compare_values (TYPE_RESTRICT); +- compare_values (TYPE_USER_ALIGN); +- compare_values (TYPE_READONLY); +- compare_values (TYPE_PRECISION); +- compare_values (TYPE_ALIGN); +- /* Do not compare TYPE_ALIAS_SET. Doing so introduce ordering issues +- with calls to get_alias_set which may initialize it for streamed +- in types. */ +- } +- +- /* We don't want to compare locations, so there is nothing do compare +- for TS_EXP. */ +- +- /* BLOCKs are function local and we don't merge anything there, so +- simply refuse to merge. */ +- if (CODE_CONTAINS_STRUCT (code, TS_BLOCK)) +- return false; +- +- if (CODE_CONTAINS_STRUCT (code, TS_TRANSLATION_UNIT_DECL)) +- if (strcmp (TRANSLATION_UNIT_LANGUAGE (t1), +- TRANSLATION_UNIT_LANGUAGE (t2)) != 0) +- return false; +- +- if (CODE_CONTAINS_STRUCT (code, TS_TARGET_OPTION)) +- if (!cl_target_option_eq (TREE_TARGET_OPTION (t1), TREE_TARGET_OPTION (t2))) +- return false; +- +- if (CODE_CONTAINS_STRUCT (code, TS_OPTIMIZATION)) +- if (!cl_optimization_option_eq (TREE_OPTIMIZATION (t1), +- TREE_OPTIMIZATION (t2))) +- return false; +- +- if (CODE_CONTAINS_STRUCT (code, TS_BINFO)) +- if (vec_safe_length (BINFO_BASE_ACCESSES (t1)) +- != vec_safe_length (BINFO_BASE_ACCESSES (t2))) +- return false; +- +- if (CODE_CONTAINS_STRUCT (code, TS_CONSTRUCTOR)) +- compare_values (CONSTRUCTOR_NELTS); +- +- if (CODE_CONTAINS_STRUCT (code, TS_IDENTIFIER)) +- if (IDENTIFIER_LENGTH (t1) != IDENTIFIER_LENGTH (t2) +- || memcmp (IDENTIFIER_POINTER (t1), IDENTIFIER_POINTER (t2), +- IDENTIFIER_LENGTH (t1)) != 0) +- return false; +- +- if (CODE_CONTAINS_STRUCT (code, TS_STRING)) +- if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2) +- || memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2), +- TREE_STRING_LENGTH (t1)) != 0) +- return false; +- +- if (code == OMP_CLAUSE) +- { +- compare_values (OMP_CLAUSE_CODE); +- switch (OMP_CLAUSE_CODE (t1)) +- { +- case OMP_CLAUSE_DEFAULT: +- compare_values (OMP_CLAUSE_DEFAULT_KIND); +- break; +- case OMP_CLAUSE_SCHEDULE: +- compare_values (OMP_CLAUSE_SCHEDULE_KIND); +- break; +- case OMP_CLAUSE_DEPEND: +- compare_values (OMP_CLAUSE_DEPEND_KIND); +- break; +- case OMP_CLAUSE_MAP: +- compare_values (OMP_CLAUSE_MAP_KIND); +- break; +- case OMP_CLAUSE_PROC_BIND: +- compare_values (OMP_CLAUSE_PROC_BIND_KIND); +- break; +- case OMP_CLAUSE_REDUCTION: +- compare_values (OMP_CLAUSE_REDUCTION_CODE); +- compare_values (OMP_CLAUSE_REDUCTION_GIMPLE_INIT); +- compare_values (OMP_CLAUSE_REDUCTION_GIMPLE_MERGE); +- break; +- default: +- break; +- } +- } +- +-#undef compare_values +- +- +- /* Compare pointer fields. */ +- +- /* Recurse. Search & Replaced from DFS_write_tree_body. +- Folding the early checks into the compare_tree_edges recursion +- macro makes debugging way quicker as you are able to break on +- compare_tree_sccs_1 and simply finish until a call returns false +- to spot the SCC members with the difference. */ +-#define compare_tree_edges(E1, E2) \ +- do { \ +- tree t1_ = (E1), t2_ = (E2); \ +- if (t1_ != t2_ \ +- && (!t1_ || !t2_ \ +- || !TREE_VISITED (t2_) \ +- || (!TREE_ASM_WRITTEN (t2_) \ +- && !compare_tree_sccs_1 (t1_, t2_, map)))) \ +- return false; \ +- /* Only non-NULL trees outside of the SCC may compare equal. */ \ +- gcc_checking_assert (t1_ != t2_ || (!t2_ || !TREE_VISITED (t2_))); \ +- } while (0) +- +- if (CODE_CONTAINS_STRUCT (code, TS_TYPED)) +- { +- if (code != IDENTIFIER_NODE) +- compare_tree_edges (TREE_TYPE (t1), TREE_TYPE (t2)); +- } +- +- if (CODE_CONTAINS_STRUCT (code, TS_VECTOR)) +- { +- /* Note that the number of elements for EXPR has already been emitted +- in EXPR's header (see streamer_write_tree_header). */ +- unsigned int count = vector_cst_encoded_nelts (t1); +- for (unsigned int i = 0; i < count; ++i) +- compare_tree_edges (VECTOR_CST_ENCODED_ELT (t1, i), +- VECTOR_CST_ENCODED_ELT (t2, i)); +- } +- +- if (CODE_CONTAINS_STRUCT (code, TS_COMPLEX)) +- { +- compare_tree_edges (TREE_REALPART (t1), TREE_REALPART (t2)); +- compare_tree_edges (TREE_IMAGPART (t1), TREE_IMAGPART (t2)); +- } +- +- if (CODE_CONTAINS_STRUCT (code, TS_DECL_MINIMAL)) +- { +- compare_tree_edges (DECL_NAME (t1), DECL_NAME (t2)); +- /* ??? Global decls from different TUs have non-matching +- TRANSLATION_UNIT_DECLs. Only consider a small set of +- decls equivalent, we should not end up merging others. */ +- if ((code == TYPE_DECL +- || code == NAMESPACE_DECL +- || code == IMPORTED_DECL +- || code == CONST_DECL +- || (VAR_OR_FUNCTION_DECL_P (t1) +- && (TREE_PUBLIC (t1) || DECL_EXTERNAL (t1)))) +- && DECL_FILE_SCOPE_P (t1) && DECL_FILE_SCOPE_P (t2)) +- ; +- else +- compare_tree_edges (DECL_CONTEXT (t1), DECL_CONTEXT (t2)); +- } +- +- if (CODE_CONTAINS_STRUCT (code, TS_DECL_COMMON)) +- { +- compare_tree_edges (DECL_SIZE (t1), DECL_SIZE (t2)); +- compare_tree_edges (DECL_SIZE_UNIT (t1), DECL_SIZE_UNIT (t2)); +- compare_tree_edges (DECL_ATTRIBUTES (t1), DECL_ATTRIBUTES (t2)); +- compare_tree_edges (DECL_ABSTRACT_ORIGIN (t1), DECL_ABSTRACT_ORIGIN (t2)); +- if ((code == VAR_DECL +- || code == PARM_DECL) +- && DECL_HAS_VALUE_EXPR_P (t1)) +- compare_tree_edges (DECL_VALUE_EXPR (t1), DECL_VALUE_EXPR (t2)); +- if (code == VAR_DECL +- && DECL_HAS_DEBUG_EXPR_P (t1)) +- compare_tree_edges (DECL_DEBUG_EXPR (t1), DECL_DEBUG_EXPR (t2)); +- /* LTO specific edges. */ +- if (code != FUNCTION_DECL +- && code != TRANSLATION_UNIT_DECL) +- compare_tree_edges (DECL_INITIAL (t1), DECL_INITIAL (t2)); +- } +- +- if (CODE_CONTAINS_STRUCT (code, TS_DECL_NON_COMMON)) +- { +- if (code == FUNCTION_DECL) +- { +- tree a1, a2; +- for (a1 = DECL_ARGUMENTS (t1), a2 = DECL_ARGUMENTS (t2); +- a1 || a2; +- a1 = TREE_CHAIN (a1), a2 = TREE_CHAIN (a2)) +- compare_tree_edges (a1, a2); +- compare_tree_edges (DECL_RESULT (t1), DECL_RESULT (t2)); +- } +- else if (code == TYPE_DECL) +- compare_tree_edges (DECL_ORIGINAL_TYPE (t1), DECL_ORIGINAL_TYPE (t2)); +- } +- +- if (CODE_CONTAINS_STRUCT (code, TS_DECL_WITH_VIS)) +- { +- /* Make sure we don't inadvertently set the assembler name. */ +- if (DECL_ASSEMBLER_NAME_SET_P (t1)) +- compare_tree_edges (DECL_ASSEMBLER_NAME (t1), +- DECL_ASSEMBLER_NAME (t2)); +- } +- +- if (CODE_CONTAINS_STRUCT (code, TS_FIELD_DECL)) +- { +- compare_tree_edges (DECL_FIELD_OFFSET (t1), DECL_FIELD_OFFSET (t2)); +- compare_tree_edges (DECL_BIT_FIELD_TYPE (t1), DECL_BIT_FIELD_TYPE (t2)); +- compare_tree_edges (DECL_BIT_FIELD_REPRESENTATIVE (t1), +- DECL_BIT_FIELD_REPRESENTATIVE (t2)); +- compare_tree_edges (DECL_FIELD_BIT_OFFSET (t1), +- DECL_FIELD_BIT_OFFSET (t2)); +- compare_tree_edges (DECL_FCONTEXT (t1), DECL_FCONTEXT (t2)); +- } +- +- if (CODE_CONTAINS_STRUCT (code, TS_FUNCTION_DECL)) +- { +- compare_tree_edges (DECL_FUNCTION_PERSONALITY (t1), +- DECL_FUNCTION_PERSONALITY (t2)); +- compare_tree_edges (DECL_VINDEX (t1), DECL_VINDEX (t2)); +- compare_tree_edges (DECL_FUNCTION_SPECIFIC_TARGET (t1), +- DECL_FUNCTION_SPECIFIC_TARGET (t2)); +- compare_tree_edges (DECL_FUNCTION_SPECIFIC_OPTIMIZATION (t1), +- DECL_FUNCTION_SPECIFIC_OPTIMIZATION (t2)); +- } +- +- if (CODE_CONTAINS_STRUCT (code, TS_TYPE_COMMON)) +- { +- compare_tree_edges (TYPE_SIZE (t1), TYPE_SIZE (t2)); +- compare_tree_edges (TYPE_SIZE_UNIT (t1), TYPE_SIZE_UNIT (t2)); +- compare_tree_edges (TYPE_ATTRIBUTES (t1), TYPE_ATTRIBUTES (t2)); +- compare_tree_edges (TYPE_NAME (t1), TYPE_NAME (t2)); +- /* Do not compare TYPE_POINTER_TO or TYPE_REFERENCE_TO. They will be +- reconstructed during fixup. */ +- /* Do not compare TYPE_NEXT_VARIANT, we reconstruct the variant lists +- during fixup. */ +- compare_tree_edges (TYPE_MAIN_VARIANT (t1), TYPE_MAIN_VARIANT (t2)); +- /* ??? Global types from different TUs have non-matching +- TRANSLATION_UNIT_DECLs. Still merge them if they are otherwise +- equal. */ +- if (TYPE_FILE_SCOPE_P (t1) && TYPE_FILE_SCOPE_P (t2)) +- ; +- else +- compare_tree_edges (TYPE_CONTEXT (t1), TYPE_CONTEXT (t2)); +- /* TYPE_CANONICAL is re-computed during type merging, so do not +- compare it here. */ +- compare_tree_edges (TYPE_STUB_DECL (t1), TYPE_STUB_DECL (t2)); +- } +- +- if (CODE_CONTAINS_STRUCT (code, TS_TYPE_NON_COMMON)) +- { +- if (code == ENUMERAL_TYPE) +- compare_tree_edges (TYPE_VALUES (t1), TYPE_VALUES (t2)); +- else if (code == ARRAY_TYPE) +- compare_tree_edges (TYPE_DOMAIN (t1), TYPE_DOMAIN (t2)); +- else if (RECORD_OR_UNION_TYPE_P (t1)) +- { +- tree f1, f2; +- for (f1 = TYPE_FIELDS (t1), f2 = TYPE_FIELDS (t2); +- f1 || f2; +- f1 = TREE_CHAIN (f1), f2 = TREE_CHAIN (f2)) +- compare_tree_edges (f1, f2); +- } +- else if (code == FUNCTION_TYPE +- || code == METHOD_TYPE) +- compare_tree_edges (TYPE_ARG_TYPES (t1), TYPE_ARG_TYPES (t2)); +- +- if (!POINTER_TYPE_P (t1)) +- compare_tree_edges (TYPE_MIN_VALUE_RAW (t1), TYPE_MIN_VALUE_RAW (t2)); +- compare_tree_edges (TYPE_MAX_VALUE_RAW (t1), TYPE_MAX_VALUE_RAW (t2)); +- } +- +- if (CODE_CONTAINS_STRUCT (code, TS_LIST)) +- { +- compare_tree_edges (TREE_PURPOSE (t1), TREE_PURPOSE (t2)); +- compare_tree_edges (TREE_VALUE (t1), TREE_VALUE (t2)); +- compare_tree_edges (TREE_CHAIN (t1), TREE_CHAIN (t2)); +- } +- +- if (CODE_CONTAINS_STRUCT (code, TS_VEC)) +- for (int i = 0; i < TREE_VEC_LENGTH (t1); i++) +- compare_tree_edges (TREE_VEC_ELT (t1, i), TREE_VEC_ELT (t2, i)); +- +- if (CODE_CONTAINS_STRUCT (code, TS_EXP)) +- { +- for (int i = 0; i < TREE_OPERAND_LENGTH (t1); i++) +- compare_tree_edges (TREE_OPERAND (t1, i), +- TREE_OPERAND (t2, i)); +- +- /* BLOCKs are function local and we don't merge anything there. */ +- if (TREE_BLOCK (t1) || TREE_BLOCK (t2)) +- return false; +- } +- +- if (CODE_CONTAINS_STRUCT (code, TS_BINFO)) +- { +- unsigned i; +- tree t; +- /* Lengths have already been compared above. */ +- FOR_EACH_VEC_ELT (*BINFO_BASE_BINFOS (t1), i, t) +- compare_tree_edges (t, BINFO_BASE_BINFO (t2, i)); +- FOR_EACH_VEC_SAFE_ELT (BINFO_BASE_ACCESSES (t1), i, t) +- compare_tree_edges (t, BINFO_BASE_ACCESS (t2, i)); +- compare_tree_edges (BINFO_OFFSET (t1), BINFO_OFFSET (t2)); +- compare_tree_edges (BINFO_VTABLE (t1), BINFO_VTABLE (t2)); +- compare_tree_edges (BINFO_VPTR_FIELD (t1), BINFO_VPTR_FIELD (t2)); +- /* Do not walk BINFO_INHERITANCE_CHAIN, BINFO_SUBVTT_INDEX +- and BINFO_VPTR_INDEX; these are used by C++ FE only. */ +- } +- +- if (CODE_CONTAINS_STRUCT (code, TS_CONSTRUCTOR)) +- { +- unsigned i; +- tree index, value; +- /* Lengths have already been compared above. */ +- FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (t1), i, index, value) +- { +- compare_tree_edges (index, CONSTRUCTOR_ELT (t2, i)->index); +- compare_tree_edges (value, CONSTRUCTOR_ELT (t2, i)->value); +- } +- } +- +- if (code == OMP_CLAUSE) +- { +- int i; +- +- for (i = 0; i < omp_clause_num_ops[OMP_CLAUSE_CODE (t1)]; i++) +- compare_tree_edges (OMP_CLAUSE_OPERAND (t1, i), +- OMP_CLAUSE_OPERAND (t2, i)); +- compare_tree_edges (OMP_CLAUSE_CHAIN (t1), OMP_CLAUSE_CHAIN (t2)); +- } +- +-#undef compare_tree_edges +- +- return true; +-} +- +-/* Compare the tree scc SCC to the prevailing candidate PSCC, filling +- out MAP if they are equal. */ +- +-static bool +-compare_tree_sccs (tree_scc *pscc, tree_scc *scc, +- tree *map) +-{ +- /* Assume SCC entry hashes are sorted after their cardinality. Which +- means we can simply take the first n-tuple of equal hashes +- (which is recorded as entry_len) and do n SCC entry candidate +- comparisons. */ +- for (unsigned i = 0; i < pscc->entry_len; ++i) +- { +- tree *mapp = map; +- num_scc_compare_collisions++; +- if (compare_tree_sccs_1 (pscc->entries[0], scc->entries[i], &mapp)) +- { +- /* Equal - no need to reset TREE_VISITED or TREE_ASM_WRITTEN +- on the scc as all trees will be freed. */ +- return true; +- } +- /* Reset TREE_ASM_WRITTEN on scc for the next compare or in case +- the SCC prevails. */ +- for (unsigned j = 0; j < scc->len; ++j) +- TREE_ASM_WRITTEN (scc->entries[j]) = 0; +- } +- +- return false; +-} +- +-/* QSort sort function to sort a map of two pointers after the 2nd +- pointer. */ +- +-static int +-cmp_tree (const void *p1_, const void *p2_) +-{ +- tree *p1 = (tree *)(const_cast(p1_)); +- tree *p2 = (tree *)(const_cast(p2_)); +- if (p1[1] == p2[1]) +- return 0; +- return ((uintptr_t)p1[1] < (uintptr_t)p2[1]) ? -1 : 1; +-} +- +-/* Try to unify the SCC with nodes FROM to FROM + LEN in CACHE and +- hash value SCC_HASH with an already recorded SCC. Return true if +- that was successful, otherwise return false. */ +- +-static bool +-unify_scc (struct data_in *data_in, unsigned from, +- unsigned len, unsigned scc_entry_len, hashval_t scc_hash) +-{ +- bool unified_p = false; +- struct streamer_tree_cache_d *cache = data_in->reader_cache; +- tree_scc *scc +- = (tree_scc *) alloca (sizeof (tree_scc) + (len - 1) * sizeof (tree)); +- scc->next = NULL; +- scc->hash = scc_hash; +- scc->len = len; +- scc->entry_len = scc_entry_len; +- for (unsigned i = 0; i < len; ++i) +- { +- tree t = streamer_tree_cache_get_tree (cache, from + i); +- scc->entries[i] = t; +- /* Do not merge SCCs with local entities inside them. Also do +- not merge TRANSLATION_UNIT_DECLs. */ +- if (TREE_CODE (t) == TRANSLATION_UNIT_DECL +- || (VAR_OR_FUNCTION_DECL_P (t) +- && !(TREE_PUBLIC (t) || DECL_EXTERNAL (t))) +- || TREE_CODE (t) == LABEL_DECL) +- { +- /* Avoid doing any work for these cases and do not worry to +- record the SCCs for further merging. */ +- return false; +- } +- } +- +- /* Look for the list of candidate SCCs to compare against. */ +- tree_scc **slot; +- slot = tree_scc_hash->find_slot_with_hash (scc, scc_hash, INSERT); +- if (*slot) +- { +- /* Try unifying against each candidate. */ +- num_scc_compares++; +- +- /* Set TREE_VISITED on the scc so we can easily identify tree nodes +- outside of the scc when following tree edges. Make sure +- that TREE_ASM_WRITTEN is unset so we can use it as 2nd bit +- to track whether we visited the SCC member during the compare. +- We cannot use TREE_VISITED on the pscc members as the extended +- scc and pscc can overlap. */ +- for (unsigned i = 0; i < scc->len; ++i) +- { +- TREE_VISITED (scc->entries[i]) = 1; +- gcc_checking_assert (!TREE_ASM_WRITTEN (scc->entries[i])); +- } +- +- tree *map = XALLOCAVEC (tree, 2 * len); +- for (tree_scc *pscc = *slot; pscc; pscc = pscc->next) +- { +- if (!compare_tree_sccs (pscc, scc, map)) +- continue; +- +- /* Found an equal SCC. */ +- unified_p = true; +- num_scc_compare_collisions--; +- num_sccs_merged++; +- total_scc_size_merged += len; +- +- if (flag_checking) +- for (unsigned i = 0; i < len; ++i) +- { +- tree t = map[2*i+1]; +- enum tree_code code = TREE_CODE (t); +- /* IDENTIFIER_NODEs should be singletons and are merged by the +- streamer. The others should be singletons, too, and we +- should not merge them in any way. */ +- gcc_assert (code != TRANSLATION_UNIT_DECL +- && code != IDENTIFIER_NODE); +- } +- +- /* Fixup the streamer cache with the prevailing nodes according +- to the tree node mapping computed by compare_tree_sccs. */ +- if (len == 1) +- { +- /* If we got a debug reference queued, see if the prevailing +- tree has a debug reference and if not, register the one +- for the tree we are about to throw away. */ +- if (dref_queue.length () == 1) +- { +- dref_entry e = dref_queue.pop (); +- gcc_assert (e.decl +- == streamer_tree_cache_get_tree (cache, from)); +- const char *sym; +- unsigned HOST_WIDE_INT off; +- if (!debug_hooks->die_ref_for_decl (pscc->entries[0], &sym, +- &off)) +- debug_hooks->register_external_die (pscc->entries[0], +- e.sym, e.off); +- } +- lto_maybe_register_decl (data_in, pscc->entries[0], from); +- streamer_tree_cache_replace_tree (cache, pscc->entries[0], from); +- } +- else +- { +- tree *map2 = XALLOCAVEC (tree, 2 * len); +- for (unsigned i = 0; i < len; ++i) +- { +- map2[i*2] = (tree)(uintptr_t)(from + i); +- map2[i*2+1] = scc->entries[i]; +- } +- qsort (map2, len, 2 * sizeof (tree), cmp_tree); +- qsort (map, len, 2 * sizeof (tree), cmp_tree); +- for (unsigned i = 0; i < len; ++i) +- { +- lto_maybe_register_decl (data_in, map[2*i], +- (uintptr_t)map2[2*i]); +- streamer_tree_cache_replace_tree (cache, map[2*i], +- (uintptr_t)map2[2*i]); +- } +- } +- +- /* Free the tree nodes from the read SCC. */ +- data_in->location_cache.revert_location_cache (); +- for (unsigned i = 0; i < len; ++i) +- { +- if (TYPE_P (scc->entries[i])) +- num_merged_types++; +- free_node (scc->entries[i]); +- } +- +- /* Drop DIE references. +- ??? Do as in the size-one SCC case which involves sorting +- the queue. */ +- dref_queue.truncate (0); +- +- break; +- } +- +- /* Reset TREE_VISITED if we didn't unify the SCC with another. */ +- if (!unified_p) +- for (unsigned i = 0; i < scc->len; ++i) +- TREE_VISITED (scc->entries[i]) = 0; +- } +- +- /* If we didn't unify it to any candidate duplicate the relevant +- pieces to permanent storage and link it into the chain. */ +- if (!unified_p) +- { +- tree_scc *pscc +- = XOBNEWVAR (&tree_scc_hash_obstack, tree_scc, sizeof (tree_scc)); +- memcpy (pscc, scc, sizeof (tree_scc)); +- pscc->next = (*slot); +- *slot = pscc; +- } +- return unified_p; +-} +- +- +-/* Read all the symbols from buffer DATA, using descriptors in DECL_DATA. +- RESOLUTIONS is the set of symbols picked by the linker (read from the +- resolution file when the linker plugin is being used). */ +- +-static void +-lto_read_decls (struct lto_file_decl_data *decl_data, const void *data, +- vec resolutions) +-{ +- const struct lto_decl_header *header = (const struct lto_decl_header *) data; +- const int decl_offset = sizeof (struct lto_decl_header); +- const int main_offset = decl_offset + header->decl_state_size; +- const int string_offset = main_offset + header->main_size; +- struct data_in *data_in; +- unsigned int i; +- const uint32_t *data_ptr, *data_end; +- uint32_t num_decl_states; +- +- lto_input_block ib_main ((const char *) data + main_offset, +- header->main_size, decl_data->mode_table); +- +- data_in = lto_data_in_create (decl_data, (const char *) data + string_offset, +- header->string_size, resolutions); +- +- /* We do not uniquify the pre-loaded cache entries, those are middle-end +- internal types that should not be merged. */ +- +- /* Read the global declarations and types. */ +- while (ib_main.p < ib_main.len) +- { +- tree t; +- unsigned from = data_in->reader_cache->nodes.length (); +- /* Read and uniquify SCCs as in the input stream. */ +- enum LTO_tags tag = streamer_read_record_start (&ib_main); +- if (tag == LTO_tree_scc) +- { +- unsigned len_; +- unsigned scc_entry_len; +- hashval_t scc_hash = lto_input_scc (&ib_main, data_in, &len_, +- &scc_entry_len); +- unsigned len = data_in->reader_cache->nodes.length () - from; +- gcc_assert (len == len_); +- +- total_scc_size += len; +- num_sccs_read++; +- +- /* We have the special case of size-1 SCCs that are pre-merged +- by means of identifier and string sharing for example. +- ??? Maybe we should avoid streaming those as SCCs. */ +- tree first = streamer_tree_cache_get_tree (data_in->reader_cache, +- from); +- if (len == 1 +- && (TREE_CODE (first) == IDENTIFIER_NODE +- || (TREE_CODE (first) == INTEGER_CST +- && !TREE_OVERFLOW (first)))) +- continue; +- +- /* Try to unify the SCC with already existing ones. */ +- if (!flag_ltrans +- && unify_scc (data_in, from, +- len, scc_entry_len, scc_hash)) +- continue; +- +- /* Tree merging failed, mark entries in location cache as +- permanent. */ +- data_in->location_cache.accept_location_cache (); +- +- bool seen_type = false; +- for (unsigned i = 0; i < len; ++i) +- { +- tree t = streamer_tree_cache_get_tree (data_in->reader_cache, +- from + i); +- /* Reconstruct the type variant and pointer-to/reference-to +- chains. */ +- if (TYPE_P (t)) +- { +- seen_type = true; +- num_prevailing_types++; +- lto_fixup_prevailing_type (t); +- +- /* Compute the canonical type of all types. +- Because SCC components are streamed in random (hash) order +- we may have encountered the type before while registering +- type canonical of a derived type in the same SCC. */ +- if (!TYPE_CANONICAL (t)) +- gimple_register_canonical_type (t); +- if (TYPE_MAIN_VARIANT (t) == t && odr_type_p (t)) +- register_odr_type (t); +- } +- /* Link shared INTEGER_CSTs into TYPE_CACHED_VALUEs of its +- type which is also member of this SCC. */ +- if (TREE_CODE (t) == INTEGER_CST +- && !TREE_OVERFLOW (t)) +- cache_integer_cst (t); +- if (!flag_ltrans) +- { +- lto_maybe_register_decl (data_in, t, from + i); +- /* Scan the tree for references to global functions or +- variables and record those for later fixup. */ +- if (mentions_vars_p (t)) +- vec_safe_push (tree_with_vars, t); +- } +- } +- +- /* Register DECLs with the debuginfo machinery. */ +- while (!dref_queue.is_empty ()) +- { +- dref_entry e = dref_queue.pop (); +- debug_hooks->register_external_die (e.decl, e.sym, e.off); +- } +- +- if (seen_type) +- num_type_scc_trees += len; +- } +- else +- { +- /* Pickle stray references. */ +- t = lto_input_tree_1 (&ib_main, data_in, tag, 0); +- gcc_assert (t && data_in->reader_cache->nodes.length () == from); +- } +- } +- data_in->location_cache.apply_location_cache (); +- +- /* Read in lto_in_decl_state objects. */ +- data_ptr = (const uint32_t *) ((const char*) data + decl_offset); +- data_end = +- (const uint32_t *) ((const char*) data_ptr + header->decl_state_size); +- num_decl_states = *data_ptr++; +- +- gcc_assert (num_decl_states > 0); +- decl_data->global_decl_state = lto_new_in_decl_state (); +- data_ptr = lto_read_in_decl_state (data_in, data_ptr, +- decl_data->global_decl_state); +- +- /* Read in per-function decl states and enter them in hash table. */ +- decl_data->function_decl_states = +- hash_table::create_ggc (37); +- +- for (i = 1; i < num_decl_states; i++) +- { +- struct lto_in_decl_state *state = lto_new_in_decl_state (); +- +- data_ptr = lto_read_in_decl_state (data_in, data_ptr, state); +- lto_in_decl_state **slot +- = decl_data->function_decl_states->find_slot (state, INSERT); +- gcc_assert (*slot == NULL); +- *slot = state; +- } +- +- if (data_ptr != data_end) +- internal_error ("bytecode stream: garbage at the end of symbols section"); +- +- /* Set the current decl state to be the global state. */ +- decl_data->current_decl_state = decl_data->global_decl_state; +- +- lto_data_in_delete (data_in); +-} +- +-/* Custom version of strtoll, which is not portable. */ +- +-static int64_t +-lto_parse_hex (const char *p) +-{ +- int64_t ret = 0; +- +- for (; *p != '\0'; ++p) +- { +- char c = *p; +- unsigned char part; +- ret <<= 4; +- if (c >= '0' && c <= '9') +- part = c - '0'; +- else if (c >= 'a' && c <= 'f') +- part = c - 'a' + 10; +- else if (c >= 'A' && c <= 'F') +- part = c - 'A' + 10; +- else +- internal_error ("could not parse hex number"); +- ret |= part; +- } +- +- return ret; +-} +- +-/* Read resolution for file named FILE_NAME. The resolution is read from +- RESOLUTION. */ +- +-static void +-lto_resolution_read (splay_tree file_ids, FILE *resolution, lto_file *file) +-{ +- /* We require that objects in the resolution file are in the same +- order as the lto1 command line. */ +- unsigned int name_len; +- char *obj_name; +- unsigned int num_symbols; +- unsigned int i; +- struct lto_file_decl_data *file_data; +- splay_tree_node nd = NULL; +- +- if (!resolution) +- return; +- +- name_len = strlen (file->filename); +- obj_name = XNEWVEC (char, name_len + 1); +- fscanf (resolution, " "); /* Read white space. */ +- +- fread (obj_name, sizeof (char), name_len, resolution); +- obj_name[name_len] = '\0'; +- if (filename_cmp (obj_name, file->filename) != 0) +- internal_error ("unexpected file name %s in linker resolution file. " +- "Expected %s", obj_name, file->filename); +- if (file->offset != 0) +- { +- int t; +- char offset_p[17]; +- int64_t offset; +- t = fscanf (resolution, "@0x%16s", offset_p); +- if (t != 1) +- internal_error ("could not parse file offset"); +- offset = lto_parse_hex (offset_p); +- if (offset != file->offset) +- internal_error ("unexpected offset"); +- } +- +- free (obj_name); +- +- fscanf (resolution, "%u", &num_symbols); +- +- for (i = 0; i < num_symbols; i++) +- { +- int t; +- unsigned index; +- unsigned HOST_WIDE_INT id; +- char r_str[27]; +- enum ld_plugin_symbol_resolution r = (enum ld_plugin_symbol_resolution) 0; +- unsigned int j; +- unsigned int lto_resolution_str_len = +- sizeof (lto_resolution_str) / sizeof (char *); +- res_pair rp; +- +- t = fscanf (resolution, "%u " HOST_WIDE_INT_PRINT_HEX_PURE " %26s %*[^\n]\n", +- &index, &id, r_str); +- if (t != 3) +- internal_error ("invalid line in the resolution file"); +- +- for (j = 0; j < lto_resolution_str_len; j++) +- { +- if (strcmp (lto_resolution_str[j], r_str) == 0) +- { +- r = (enum ld_plugin_symbol_resolution) j; +- break; +- } +- } +- if (j == lto_resolution_str_len) +- internal_error ("invalid resolution in the resolution file"); +- +- if (!(nd && lto_splay_tree_id_equal_p (nd->key, id))) +- { +- nd = lto_splay_tree_lookup (file_ids, id); +- if (nd == NULL) +- internal_error ("resolution sub id %wx not in object file", id); +- } +- +- file_data = (struct lto_file_decl_data *)nd->value; +- /* The indexes are very sparse. To save memory save them in a compact +- format that is only unpacked later when the subfile is processed. */ +- rp.res = r; +- rp.index = index; +- file_data->respairs.safe_push (rp); +- if (file_data->max_index < index) +- file_data->max_index = index; +- } +-} +- +-/* List of file_decl_datas */ +-struct file_data_list +- { +- struct lto_file_decl_data *first, *last; +- }; +- +-/* Is the name for a id'ed LTO section? */ +- +-static int +-lto_section_with_id (const char *name, unsigned HOST_WIDE_INT *id) +-{ +- const char *s; +- +- if (strncmp (name, section_name_prefix, strlen (section_name_prefix))) +- return 0; +- s = strrchr (name, '.'); +- if (!s) +- return 0; +- /* If the section is not suffixed with an ID return. */ +- if ((size_t)(s - name) == strlen (section_name_prefix)) +- return 0; +- return sscanf (s, "." HOST_WIDE_INT_PRINT_HEX_PURE, id) == 1; +-} +- +-/* Create file_data of each sub file id */ +- +-static int +-create_subid_section_table (struct lto_section_slot *ls, splay_tree file_ids, +- struct file_data_list *list) +-{ +- struct lto_section_slot s_slot, *new_slot; +- unsigned HOST_WIDE_INT id; +- splay_tree_node nd; +- void **hash_slot; +- char *new_name; +- struct lto_file_decl_data *file_data; +- +- if (!lto_section_with_id (ls->name, &id)) +- return 1; +- +- /* Find hash table of sub module id */ +- nd = lto_splay_tree_lookup (file_ids, id); +- if (nd != NULL) +- { +- file_data = (struct lto_file_decl_data *)nd->value; +- } +- else +- { +- file_data = ggc_alloc (); +- memset(file_data, 0, sizeof (struct lto_file_decl_data)); +- file_data->id = id; +- file_data->section_hash_table = lto_obj_create_section_hash_table (); +- lto_splay_tree_insert (file_ids, id, file_data); +- +- /* Maintain list in linker order */ +- if (!list->first) +- list->first = file_data; +- if (list->last) +- list->last->next = file_data; +- list->last = file_data; +- } +- +- /* Copy section into sub module hash table */ +- new_name = XDUPVEC (char, ls->name, strlen (ls->name) + 1); +- s_slot.name = new_name; +- hash_slot = htab_find_slot (file_data->section_hash_table, &s_slot, INSERT); +- gcc_assert (*hash_slot == NULL); +- +- new_slot = XDUP (struct lto_section_slot, ls); +- new_slot->name = new_name; +- *hash_slot = new_slot; +- return 1; +-} +- +-/* Read declarations and other initializations for a FILE_DATA. */ +- +-static void +-lto_file_finalize (struct lto_file_decl_data *file_data, lto_file *file) +-{ +- const char *data; +- size_t len; +- vec +- resolutions = vNULL; +- int i; +- res_pair *rp; +- +- /* Create vector for fast access of resolution. We do this lazily +- to save memory. */ +- resolutions.safe_grow_cleared (file_data->max_index + 1); +- for (i = 0; file_data->respairs.iterate (i, &rp); i++) +- resolutions[rp->index] = rp->res; +- file_data->respairs.release (); +- +- file_data->renaming_hash_table = lto_create_renaming_table (); +- file_data->file_name = file->filename; +-#ifdef ACCEL_COMPILER +- lto_input_mode_table (file_data); +-#else +- file_data->mode_table = lto_mode_identity_table; +-#endif +- data = lto_get_section_data (file_data, LTO_section_decls, NULL, &len); +- if (data == NULL) +- { +- internal_error ("cannot read LTO decls from %s", file_data->file_name); +- return; +- } +- /* Frees resolutions */ +- lto_read_decls (file_data, data, resolutions); +- lto_free_section_data (file_data, LTO_section_decls, NULL, data, len); +-} +- +-/* Finalize FILE_DATA in FILE and increase COUNT. */ +- +-static int +-lto_create_files_from_ids (lto_file *file, struct lto_file_decl_data *file_data, +- int *count) +-{ +- lto_file_finalize (file_data, file); +- if (symtab->dump_file) +- fprintf (symtab->dump_file, +- "Creating file %s with sub id " HOST_WIDE_INT_PRINT_HEX "\n", +- file_data->file_name, file_data->id); +- (*count)++; +- return 0; +-} +- +-/* Generate a TREE representation for all types and external decls +- entities in FILE. +- +- Read all of the globals out of the file. Then read the cgraph +- and process the .o index into the cgraph nodes so that it can open +- the .o file to load the functions and ipa information. */ +- +-static struct lto_file_decl_data * +-lto_file_read (lto_file *file, FILE *resolution_file, int *count) +-{ +- struct lto_file_decl_data *file_data = NULL; +- splay_tree file_ids; +- htab_t section_hash_table; +- struct lto_section_slot *section; +- struct file_data_list file_list; +- struct lto_section_list section_list; +- +- memset (§ion_list, 0, sizeof (struct lto_section_list)); +- section_hash_table = lto_obj_build_section_table (file, §ion_list); +- +- /* Find all sub modules in the object and put their sections into new hash +- tables in a splay tree. */ +- file_ids = lto_splay_tree_new (); +- memset (&file_list, 0, sizeof (struct file_data_list)); +- for (section = section_list.first; section != NULL; section = section->next) +- create_subid_section_table (section, file_ids, &file_list); +- +- /* Add resolutions to file ids */ +- lto_resolution_read (file_ids, resolution_file, file); +- +- /* Finalize each lto file for each submodule in the merged object */ +- for (file_data = file_list.first; file_data != NULL; file_data = file_data->next) +- lto_create_files_from_ids (file, file_data, count); +- +- splay_tree_delete (file_ids); +- htab_delete (section_hash_table); +- +- return file_list.first; +-} +- +-#if HAVE_MMAP_FILE && HAVE_SYSCONF && defined _SC_PAGE_SIZE +-#define LTO_MMAP_IO 1 +-#endif +- +-#if LTO_MMAP_IO +-/* Page size of machine is used for mmap and munmap calls. */ +-static size_t page_mask; +-#endif +- +-/* Get the section data of length LEN from FILENAME starting at +- OFFSET. The data segment must be freed by the caller when the +- caller is finished. Returns NULL if all was not well. */ +- +-static char * +-lto_read_section_data (struct lto_file_decl_data *file_data, +- intptr_t offset, size_t len) +-{ +- char *result; +- static int fd = -1; +- static char *fd_name; +-#if LTO_MMAP_IO +- intptr_t computed_len; +- intptr_t computed_offset; +- intptr_t diff; +-#endif +- +- /* Keep a single-entry file-descriptor cache. The last file we +- touched will get closed at exit. +- ??? Eventually we want to add a more sophisticated larger cache +- or rather fix function body streaming to not stream them in +- practically random order. */ +- if (fd != -1 +- && filename_cmp (fd_name, file_data->file_name) != 0) +- { +- free (fd_name); +- close (fd); +- fd = -1; +- } +- if (fd == -1) +- { +- fd = open (file_data->file_name, O_RDONLY|O_BINARY); +- if (fd == -1) +- { +- fatal_error (input_location, "Cannot open %s", file_data->file_name); +- return NULL; +- } +- fd_name = xstrdup (file_data->file_name); +- } +- +-#if LTO_MMAP_IO +- if (!page_mask) +- { +- size_t page_size = sysconf (_SC_PAGE_SIZE); +- page_mask = ~(page_size - 1); +- } +- +- computed_offset = offset & page_mask; +- diff = offset - computed_offset; +- computed_len = len + diff; +- +- result = (char *) mmap (NULL, computed_len, PROT_READ, MAP_PRIVATE, +- fd, computed_offset); +- if (result == MAP_FAILED) +- { +- fatal_error (input_location, "Cannot map %s", file_data->file_name); +- return NULL; +- } +- +- return result + diff; +-#else +- result = (char *) xmalloc (len); +- if (lseek (fd, offset, SEEK_SET) != offset +- || read (fd, result, len) != (ssize_t) len) +- { +- free (result); +- fatal_error (input_location, "Cannot read %s", file_data->file_name); +- result = NULL; +- } +-#ifdef __MINGW32__ +- /* Native windows doesn't supports delayed unlink on opened file. So +- we close file here again. This produces higher I/O load, but at least +- it prevents to have dangling file handles preventing unlink. */ +- free (fd_name); +- fd_name = NULL; +- close (fd); +- fd = -1; +-#endif +- return result; +-#endif +-} ++ struct cgraph_node *node; ++ timevar_id_t lto_timer; + ++ if (!quiet_flag) ++ fprintf (stderr, ++ flag_wpa ? "Materializing decls:" : "Reading function bodies:"); + +-/* Get the section data from FILE_DATA of SECTION_TYPE with NAME. +- NAME will be NULL unless the section type is for a function +- body. */ + +-static const char * +-get_section_data (struct lto_file_decl_data *file_data, +- enum lto_section_type section_type, +- const char *name, +- size_t *len) +-{ +- htab_t section_hash_table = file_data->section_hash_table; +- struct lto_section_slot *f_slot; +- struct lto_section_slot s_slot; +- const char *section_name = lto_get_section_name (section_type, name, file_data); +- char *data = NULL; +- +- *len = 0; +- s_slot.name = section_name; +- f_slot = (struct lto_section_slot *) htab_find (section_hash_table, &s_slot); +- if (f_slot) ++ FOR_EACH_FUNCTION (node) + { +- data = lto_read_section_data (file_data, f_slot->start, f_slot->len); +- *len = f_slot->len; ++ if (node->lto_file_data) ++ { ++ lto_materialize_function (node); ++ lto_stats.num_input_cgraph_nodes++; ++ } + } + +- free (CONST_CAST (char *, section_name)); +- return data; +-} +- + +-/* Free the section data from FILE_DATA of SECTION_TYPE with NAME that +- starts at OFFSET and has LEN bytes. */ ++ /* Start the appropriate timer depending on the mode that we are ++ operating in. */ ++ lto_timer = (flag_wpa) ? TV_WHOPR_WPA ++ : (flag_ltrans) ? TV_WHOPR_LTRANS ++ : TV_LTO; ++ timevar_push (lto_timer); + +-static void +-free_section_data (struct lto_file_decl_data *file_data ATTRIBUTE_UNUSED, +- enum lto_section_type section_type ATTRIBUTE_UNUSED, +- const char *name ATTRIBUTE_UNUSED, +- const char *offset, size_t len ATTRIBUTE_UNUSED) +-{ +-#if LTO_MMAP_IO +- intptr_t computed_len; +- intptr_t computed_offset; +- intptr_t diff; +-#endif ++ current_function_decl = NULL; ++ set_cfun (NULL); + +-#if LTO_MMAP_IO +- computed_offset = ((intptr_t) offset) & page_mask; +- diff = (intptr_t) offset - computed_offset; +- computed_len = len + diff; ++ if (!quiet_flag) ++ fprintf (stderr, "\n"); + +- munmap ((caddr_t) computed_offset, computed_len); +-#else +- free (CONST_CAST(char *, offset)); +-#endif ++ timevar_pop (lto_timer); + } + +-static lto_file *current_lto_file; +- + /* Actually stream out ENCODER into TEMP_FILENAME. */ + + static void +@@ -2560,581 +411,6 @@ lto_wpa_write_files (void) + timevar_pop (TV_WHOPR_WPA_IO); + } + +- +-/* If TT is a variable or function decl replace it with its +- prevailing variant. */ +-#define LTO_SET_PREVAIL(tt) \ +- do {\ +- if ((tt) && VAR_OR_FUNCTION_DECL_P (tt) \ +- && (TREE_PUBLIC (tt) || DECL_EXTERNAL (tt))) \ +- { \ +- tt = lto_symtab_prevailing_decl (tt); \ +- fixed = true; \ +- } \ +- } while (0) +- +-/* Ensure that TT isn't a replacable var of function decl. */ +-#define LTO_NO_PREVAIL(tt) \ +- gcc_checking_assert (!(tt) || !VAR_OR_FUNCTION_DECL_P (tt)) +- +-/* Given a tree T replace all fields referring to variables or functions +- with their prevailing variant. */ +-static void +-lto_fixup_prevailing_decls (tree t) +-{ +- enum tree_code code = TREE_CODE (t); +- bool fixed = false; +- +- gcc_checking_assert (code != TREE_BINFO); +- LTO_NO_PREVAIL (TREE_TYPE (t)); +- if (CODE_CONTAINS_STRUCT (code, TS_COMMON) +- /* lto_symtab_prevail_decl use TREE_CHAIN to link to the prevailing decl. +- in the case T is a prevailed declaration we would ICE here. */ +- && !VAR_OR_FUNCTION_DECL_P (t)) +- LTO_NO_PREVAIL (TREE_CHAIN (t)); +- if (DECL_P (t)) +- { +- LTO_NO_PREVAIL (DECL_NAME (t)); +- LTO_SET_PREVAIL (DECL_CONTEXT (t)); +- if (CODE_CONTAINS_STRUCT (code, TS_DECL_COMMON)) +- { +- LTO_SET_PREVAIL (DECL_SIZE (t)); +- LTO_SET_PREVAIL (DECL_SIZE_UNIT (t)); +- LTO_SET_PREVAIL (DECL_INITIAL (t)); +- LTO_NO_PREVAIL (DECL_ATTRIBUTES (t)); +- LTO_SET_PREVAIL (DECL_ABSTRACT_ORIGIN (t)); +- } +- if (CODE_CONTAINS_STRUCT (code, TS_DECL_WITH_VIS)) +- { +- LTO_NO_PREVAIL (DECL_ASSEMBLER_NAME_RAW (t)); +- } +- if (CODE_CONTAINS_STRUCT (code, TS_DECL_NON_COMMON)) +- { +- LTO_NO_PREVAIL (DECL_RESULT_FLD (t)); +- } +- if (CODE_CONTAINS_STRUCT (code, TS_FUNCTION_DECL)) +- { +- LTO_NO_PREVAIL (DECL_ARGUMENTS (t)); +- LTO_SET_PREVAIL (DECL_FUNCTION_PERSONALITY (t)); +- LTO_NO_PREVAIL (DECL_VINDEX (t)); +- } +- if (CODE_CONTAINS_STRUCT (code, TS_FIELD_DECL)) +- { +- LTO_SET_PREVAIL (DECL_FIELD_OFFSET (t)); +- LTO_NO_PREVAIL (DECL_BIT_FIELD_TYPE (t)); +- LTO_NO_PREVAIL (DECL_QUALIFIER (t)); +- LTO_NO_PREVAIL (DECL_FIELD_BIT_OFFSET (t)); +- LTO_NO_PREVAIL (DECL_FCONTEXT (t)); +- } +- } +- else if (TYPE_P (t)) +- { +- LTO_NO_PREVAIL (TYPE_CACHED_VALUES (t)); +- LTO_SET_PREVAIL (TYPE_SIZE (t)); +- LTO_SET_PREVAIL (TYPE_SIZE_UNIT (t)); +- LTO_NO_PREVAIL (TYPE_ATTRIBUTES (t)); +- LTO_NO_PREVAIL (TYPE_NAME (t)); +- +- LTO_SET_PREVAIL (TYPE_MIN_VALUE_RAW (t)); +- LTO_SET_PREVAIL (TYPE_MAX_VALUE_RAW (t)); +- LTO_NO_PREVAIL (TYPE_LANG_SLOT_1 (t)); +- +- LTO_SET_PREVAIL (TYPE_CONTEXT (t)); +- +- LTO_NO_PREVAIL (TYPE_CANONICAL (t)); +- LTO_NO_PREVAIL (TYPE_MAIN_VARIANT (t)); +- LTO_NO_PREVAIL (TYPE_NEXT_VARIANT (t)); +- } +- else if (EXPR_P (t)) +- { +- int i; +- for (i = TREE_OPERAND_LENGTH (t) - 1; i >= 0; --i) +- LTO_SET_PREVAIL (TREE_OPERAND (t, i)); +- } +- else if (TREE_CODE (t) == CONSTRUCTOR) +- { +- unsigned i; +- tree val; +- FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (t), i, val) +- LTO_SET_PREVAIL (val); +- } +- else +- { +- switch (code) +- { +- case TREE_LIST: +- LTO_SET_PREVAIL (TREE_VALUE (t)); +- LTO_SET_PREVAIL (TREE_PURPOSE (t)); +- LTO_NO_PREVAIL (TREE_PURPOSE (t)); +- break; +- default: +- gcc_unreachable (); +- } +- } +- /* If we fixed nothing, then we missed something seen by +- mentions_vars_p. */ +- gcc_checking_assert (fixed); +-} +-#undef LTO_SET_PREVAIL +-#undef LTO_NO_PREVAIL +- +-/* Helper function of lto_fixup_decls. Walks the var and fn streams in STATE, +- replaces var and function decls with the corresponding prevailing def. */ +- +-static void +-lto_fixup_state (struct lto_in_decl_state *state) +-{ +- unsigned i, si; +- +- /* Although we only want to replace FUNCTION_DECLs and VAR_DECLs, +- we still need to walk from all DECLs to find the reachable +- FUNCTION_DECLs and VAR_DECLs. */ +- for (si = 0; si < LTO_N_DECL_STREAMS; si++) +- { +- vec *trees = state->streams[si]; +- for (i = 0; i < vec_safe_length (trees); i++) +- { +- tree t = (*trees)[i]; +- if (flag_checking && TYPE_P (t)) +- verify_type (t); +- if (VAR_OR_FUNCTION_DECL_P (t) +- && (TREE_PUBLIC (t) || DECL_EXTERNAL (t))) +- (*trees)[i] = lto_symtab_prevailing_decl (t); +- } +- } +-} +- +-/* Fix the decls from all FILES. Replaces each decl with the corresponding +- prevailing one. */ +- +-static void +-lto_fixup_decls (struct lto_file_decl_data **files) +-{ +- unsigned int i; +- tree t; +- +- if (tree_with_vars) +- FOR_EACH_VEC_ELT ((*tree_with_vars), i, t) +- lto_fixup_prevailing_decls (t); +- +- for (i = 0; files[i]; i++) +- { +- struct lto_file_decl_data *file = files[i]; +- struct lto_in_decl_state *state = file->global_decl_state; +- lto_fixup_state (state); +- +- hash_table::iterator iter; +- lto_in_decl_state *elt; +- FOR_EACH_HASH_TABLE_ELEMENT (*file->function_decl_states, elt, +- lto_in_decl_state *, iter) +- lto_fixup_state (elt); +- } +-} +- +-static GTY((length ("lto_stats.num_input_files + 1"))) struct lto_file_decl_data **all_file_decl_data; +- +-/* Turn file datas for sub files into a single array, so that they look +- like separate files for further passes. */ +- +-static void +-lto_flatten_files (struct lto_file_decl_data **orig, int count, int last_file_ix) +-{ +- struct lto_file_decl_data *n, *next; +- int i, k; +- +- lto_stats.num_input_files = count; +- all_file_decl_data +- = ggc_cleared_vec_alloc (count + 1); +- /* Set the hooks so that all of the ipa passes can read in their data. */ +- lto_set_in_hooks (all_file_decl_data, get_section_data, free_section_data); +- for (i = 0, k = 0; i < last_file_ix; i++) +- { +- for (n = orig[i]; n != NULL; n = next) +- { +- all_file_decl_data[k++] = n; +- next = n->next; +- n->next = NULL; +- } +- } +- all_file_decl_data[k] = NULL; +- gcc_assert (k == count); +-} +- +-/* Input file data before flattening (i.e. splitting them to subfiles to support +- incremental linking. */ +-static int real_file_count; +-static GTY((length ("real_file_count + 1"))) struct lto_file_decl_data **real_file_decl_data; +- +-static void print_lto_report_1 (void); +- +-/* Read all the symbols from the input files FNAMES. NFILES is the +- number of files requested in the command line. Instantiate a +- global call graph by aggregating all the sub-graphs found in each +- file. */ +- +-static void +-read_cgraph_and_symbols (unsigned nfiles, const char **fnames) +-{ +- unsigned int i, last_file_ix; +- FILE *resolution; +- int count = 0; +- struct lto_file_decl_data **decl_data; +- symtab_node *snode; +- +- symtab->initialize (); +- +- timevar_push (TV_IPA_LTO_DECL_IN); +- +-#ifdef ACCEL_COMPILER +- section_name_prefix = OFFLOAD_SECTION_NAME_PREFIX; +- lto_stream_offload_p = true; +-#endif +- +- real_file_decl_data +- = decl_data = ggc_cleared_vec_alloc (nfiles + 1); +- real_file_count = nfiles; +- +- /* Read the resolution file. */ +- resolution = NULL; +- if (resolution_file_name) +- { +- int t; +- unsigned num_objects; +- +- resolution = fopen (resolution_file_name, "r"); +- if (resolution == NULL) +- fatal_error (input_location, +- "could not open symbol resolution file: %m"); +- +- t = fscanf (resolution, "%u", &num_objects); +- gcc_assert (t == 1); +- +- /* True, since the plugin splits the archives. */ +- gcc_assert (num_objects == nfiles); +- } +- symtab->state = LTO_STREAMING; +- +- canonical_type_hash_cache = new hash_map (251); +- gimple_canonical_types = htab_create (16381, gimple_canonical_type_hash, +- gimple_canonical_type_eq, NULL); +- gcc_obstack_init (&tree_scc_hash_obstack); +- tree_scc_hash = new hash_table (4096); +- +- /* Register the common node types with the canonical type machinery so +- we properly share alias-sets across languages and TUs. Do not +- expose the common nodes as type merge target - those that should be +- are already exposed so by pre-loading the LTO streamer caches. +- Do two passes - first clear TYPE_CANONICAL and then re-compute it. */ +- for (i = 0; i < itk_none; ++i) +- lto_register_canonical_types (integer_types[i], true); +- for (i = 0; i < stk_type_kind_last; ++i) +- lto_register_canonical_types (sizetype_tab[i], true); +- for (i = 0; i < TI_MAX; ++i) +- lto_register_canonical_types (global_trees[i], true); +- for (i = 0; i < itk_none; ++i) +- lto_register_canonical_types (integer_types[i], false); +- for (i = 0; i < stk_type_kind_last; ++i) +- lto_register_canonical_types (sizetype_tab[i], false); +- for (i = 0; i < TI_MAX; ++i) +- lto_register_canonical_types (global_trees[i], false); +- +- if (!quiet_flag) +- fprintf (stderr, "Reading object files:"); +- +- /* Read all of the object files specified on the command line. */ +- for (i = 0, last_file_ix = 0; i < nfiles; ++i) +- { +- struct lto_file_decl_data *file_data = NULL; +- if (!quiet_flag) +- { +- fprintf (stderr, " %s", fnames[i]); +- fflush (stderr); +- } +- +- current_lto_file = lto_obj_file_open (fnames[i], false); +- if (!current_lto_file) +- break; +- +- file_data = lto_file_read (current_lto_file, resolution, &count); +- if (!file_data) +- { +- lto_obj_file_close (current_lto_file); +- free (current_lto_file); +- current_lto_file = NULL; +- break; +- } +- +- decl_data[last_file_ix++] = file_data; +- +- lto_obj_file_close (current_lto_file); +- free (current_lto_file); +- current_lto_file = NULL; +- } +- +- lto_flatten_files (decl_data, count, last_file_ix); +- lto_stats.num_input_files = count; +- ggc_free(decl_data); +- real_file_decl_data = NULL; +- +- if (resolution_file_name) +- fclose (resolution); +- +- /* Show the LTO report before launching LTRANS. */ +- if (flag_lto_report || (flag_wpa && flag_lto_report_wpa)) +- print_lto_report_1 (); +- +- /* Free gimple type merging datastructures. */ +- delete tree_scc_hash; +- tree_scc_hash = NULL; +- obstack_free (&tree_scc_hash_obstack, NULL); +- htab_delete (gimple_canonical_types); +- gimple_canonical_types = NULL; +- delete canonical_type_hash_cache; +- canonical_type_hash_cache = NULL; +- +- /* At this stage we know that majority of GGC memory is reachable. +- Growing the limits prevents unnecesary invocation of GGC. */ +- ggc_grow (); +- ggc_collect (); +- +- /* Set the hooks so that all of the ipa passes can read in their data. */ +- lto_set_in_hooks (all_file_decl_data, get_section_data, free_section_data); +- +- timevar_pop (TV_IPA_LTO_DECL_IN); +- +- if (!quiet_flag) +- fprintf (stderr, "\nReading the callgraph\n"); +- +- timevar_push (TV_IPA_LTO_CGRAPH_IO); +- /* Read the symtab. */ +- input_symtab (); +- +- input_offload_tables (!flag_ltrans); +- +- /* Store resolutions into the symbol table. */ +- +- FOR_EACH_SYMBOL (snode) +- if (snode->externally_visible && snode->real_symbol_p () +- && snode->lto_file_data && snode->lto_file_data->resolution_map +- && !(TREE_CODE (snode->decl) == FUNCTION_DECL +- && fndecl_built_in_p (snode->decl)) +- && !(VAR_P (snode->decl) && DECL_HARD_REGISTER (snode->decl))) +- { +- ld_plugin_symbol_resolution_t *res; +- +- res = snode->lto_file_data->resolution_map->get (snode->decl); +- if (!res || *res == LDPR_UNKNOWN) +- { +- if (snode->output_to_lto_symbol_table_p ()) +- fatal_error (input_location, "missing resolution data for %s", +- IDENTIFIER_POINTER +- (DECL_ASSEMBLER_NAME (snode->decl))); +- } +- else +- snode->resolution = *res; +- } +- for (i = 0; all_file_decl_data[i]; i++) +- if (all_file_decl_data[i]->resolution_map) +- { +- delete all_file_decl_data[i]->resolution_map; +- all_file_decl_data[i]->resolution_map = NULL; +- } +- +- timevar_pop (TV_IPA_LTO_CGRAPH_IO); +- +- if (!quiet_flag) +- fprintf (stderr, "Merging declarations\n"); +- +- timevar_push (TV_IPA_LTO_DECL_MERGE); +- /* Merge global decls. In ltrans mode we read merged cgraph, we do not +- need to care about resolving symbols again, we only need to replace +- duplicated declarations read from the callgraph and from function +- sections. */ +- if (!flag_ltrans) +- { +- lto_symtab_merge_decls (); +- +- /* If there were errors during symbol merging bail out, we have no +- good way to recover here. */ +- if (seen_error ()) +- fatal_error (input_location, +- "errors during merging of translation units"); +- +- /* Fixup all decls. */ +- lto_fixup_decls (all_file_decl_data); +- } +- if (tree_with_vars) +- ggc_free (tree_with_vars); +- tree_with_vars = NULL; +- ggc_collect (); +- +- timevar_pop (TV_IPA_LTO_DECL_MERGE); +- /* Each pass will set the appropriate timer. */ +- +- if (!quiet_flag) +- fprintf (stderr, "Reading summaries\n"); +- +- /* Read the IPA summary data. */ +- if (flag_ltrans) +- ipa_read_optimization_summaries (); +- else +- ipa_read_summaries (); +- +- for (i = 0; all_file_decl_data[i]; i++) +- { +- gcc_assert (all_file_decl_data[i]->symtab_node_encoder); +- lto_symtab_encoder_delete (all_file_decl_data[i]->symtab_node_encoder); +- all_file_decl_data[i]->symtab_node_encoder = NULL; +- lto_free_function_in_decl_state (all_file_decl_data[i]->global_decl_state); +- all_file_decl_data[i]->global_decl_state = NULL; +- all_file_decl_data[i]->current_decl_state = NULL; +- } +- +- if (!flag_ltrans) +- { +- /* Finally merge the cgraph according to the decl merging decisions. */ +- timevar_push (TV_IPA_LTO_CGRAPH_MERGE); +- +- gcc_assert (!dump_file); +- dump_file = dump_begin (lto_link_dump_id, NULL); +- +- if (dump_file) +- { +- fprintf (dump_file, "Before merging:\n"); +- symtab->dump (dump_file); +- } +- lto_symtab_merge_symbols (); +- /* Removal of unreachable symbols is needed to make verify_symtab to pass; +- we are still having duplicated comdat groups containing local statics. +- We could also just remove them while merging. */ +- symtab->remove_unreachable_nodes (dump_file); +- ggc_collect (); +- +- if (dump_file) +- dump_end (lto_link_dump_id, dump_file); +- dump_file = NULL; +- timevar_pop (TV_IPA_LTO_CGRAPH_MERGE); +- } +- symtab->state = IPA_SSA; +- /* All node removals happening here are useless, because +- WPA should not stream them. Still always perform remove_unreachable_nodes +- because we may reshape clone tree, get rid of dead masters of inline +- clones and remove symbol entries for read-only variables we keep around +- only to be able to constant fold them. */ +- if (flag_ltrans) +- { +- if (symtab->dump_file) +- symtab->dump (symtab->dump_file); +- symtab->remove_unreachable_nodes (symtab->dump_file); +- } +- +- /* Indicate that the cgraph is built and ready. */ +- symtab->function_flags_ready = true; +- +- ggc_free (all_file_decl_data); +- all_file_decl_data = NULL; +-} +- +- +-/* Materialize all the bodies for all the nodes in the callgraph. */ +- +-static void +-materialize_cgraph (void) +-{ +- struct cgraph_node *node; +- timevar_id_t lto_timer; +- +- if (!quiet_flag) +- fprintf (stderr, +- flag_wpa ? "Materializing decls:" : "Reading function bodies:"); +- +- +- FOR_EACH_FUNCTION (node) +- { +- if (node->lto_file_data) +- { +- lto_materialize_function (node); +- lto_stats.num_input_cgraph_nodes++; +- } +- } +- +- +- /* Start the appropriate timer depending on the mode that we are +- operating in. */ +- lto_timer = (flag_wpa) ? TV_WHOPR_WPA +- : (flag_ltrans) ? TV_WHOPR_LTRANS +- : TV_LTO; +- timevar_push (lto_timer); +- +- current_function_decl = NULL; +- set_cfun (NULL); +- +- if (!quiet_flag) +- fprintf (stderr, "\n"); +- +- timevar_pop (lto_timer); +-} +- +- +-/* Show various memory usage statistics related to LTO. */ +-static void +-print_lto_report_1 (void) +-{ +- const char *pfx = (flag_lto) ? "LTO" : (flag_wpa) ? "WPA" : "LTRANS"; +- fprintf (stderr, "%s statistics\n", pfx); +- +- fprintf (stderr, "[%s] read %lu SCCs of average size %f\n", +- pfx, num_sccs_read, total_scc_size / (double)num_sccs_read); +- fprintf (stderr, "[%s] %lu tree bodies read in total\n", pfx, total_scc_size); +- if (flag_wpa && tree_scc_hash) +- { +- fprintf (stderr, "[%s] tree SCC table: size %ld, %ld elements, " +- "collision ratio: %f\n", pfx, +- (long) tree_scc_hash->size (), +- (long) tree_scc_hash->elements (), +- tree_scc_hash->collisions ()); +- hash_table::iterator hiter; +- tree_scc *scc, *max_scc = NULL; +- unsigned max_length = 0; +- FOR_EACH_HASH_TABLE_ELEMENT (*tree_scc_hash, scc, x, hiter) +- { +- unsigned length = 0; +- tree_scc *s = scc; +- for (; s; s = s->next) +- length++; +- if (length > max_length) +- { +- max_length = length; +- max_scc = scc; +- } +- } +- fprintf (stderr, "[%s] tree SCC max chain length %u (size %u)\n", +- pfx, max_length, max_scc->len); +- fprintf (stderr, "[%s] Compared %lu SCCs, %lu collisions (%f)\n", pfx, +- num_scc_compares, num_scc_compare_collisions, +- num_scc_compare_collisions / (double) num_scc_compares); +- fprintf (stderr, "[%s] Merged %lu SCCs\n", pfx, num_sccs_merged); +- fprintf (stderr, "[%s] Merged %lu tree bodies\n", pfx, +- total_scc_size_merged); +- fprintf (stderr, "[%s] Merged %lu types\n", pfx, num_merged_types); +- fprintf (stderr, "[%s] %lu types prevailed (%lu associated trees)\n", +- pfx, num_prevailing_types, num_type_scc_trees); +- fprintf (stderr, "[%s] GIMPLE canonical type table: size %ld, " +- "%ld elements, %ld searches, %ld collisions (ratio: %f)\n", pfx, +- (long) htab_size (gimple_canonical_types), +- (long) htab_elements (gimple_canonical_types), +- (long) gimple_canonical_types->searches, +- (long) gimple_canonical_types->collisions, +- htab_collisions (gimple_canonical_types)); +- fprintf (stderr, "[%s] GIMPLE canonical type pointer-map: " +- "%lu elements, %ld searches\n", pfx, +- num_canonical_type_hash_entries, +- num_canonical_type_hash_queries); +- } +- +- print_lto_report (pfx); +-} +- + /* Perform whole program analysis (WPA) on the callgraph and write out the + optimization plan. */ + +@@ -3262,64 +538,6 @@ do_whole_program_analysis (void) + dump_memory_report (true); + } + +- +-static GTY(()) tree lto_eh_personality_decl; +- +-/* Return the LTO personality function decl. */ +- +-tree +-lto_eh_personality (void) +-{ +- if (!lto_eh_personality_decl) +- { +- /* Use the first personality DECL for our personality if we don't +- support multiple ones. This ensures that we don't artificially +- create the need for them in a single-language program. */ +- if (first_personality_decl && !dwarf2out_do_cfi_asm ()) +- lto_eh_personality_decl = first_personality_decl; +- else +- lto_eh_personality_decl = lhd_gcc_personality (); +- } +- +- return lto_eh_personality_decl; +-} +- +-/* Set the process name based on the LTO mode. */ +- +-static void +-lto_process_name (void) +-{ +- if (flag_lto) +- setproctitle (flag_incremental_link == INCREMENTAL_LINK_LTO +- ? "lto1-inclink" : "lto1-lto"); +- if (flag_wpa) +- setproctitle ("lto1-wpa"); +- if (flag_ltrans) +- setproctitle ("lto1-ltrans"); +-} +- +- +-/* Initialize the LTO front end. */ +- +-static void +-lto_init (void) +-{ +- lto_process_name (); +- lto_streamer_hooks_init (); +- lto_reader_init (); +- lto_set_in_hooks (NULL, get_section_data, free_section_data); +- memset (<o_stats, 0, sizeof (lto_stats)); +- bitmap_obstack_initialize (NULL); +- gimple_register_cfg_hooks (); +-#ifndef ACCEL_COMPILER +- unsigned char *table +- = ggc_vec_alloc (MAX_MACHINE_MODE); +- for (int m = 0; m < MAX_MACHINE_MODE; m++) +- table[m] = m; +- lto_mode_identity_table = table; +-#endif +-} +- + /* Create artificial pointers for "omp declare target link" vars. */ + + static void +@@ -3351,7 +569,6 @@ offload_handle_link_vars (void) + #endif + } + +- + /* Main entry point for the GIMPLE front end. This front end has + three main personalities: + +@@ -3386,7 +603,7 @@ lto_main (void) + timevar_start (TV_PHASE_SETUP); + + /* Initialize the LTO front end. */ +- lto_init (); ++ lto_fe_init (); + + timevar_stop (TV_PHASE_SETUP); + timevar_start (TV_PHASE_STREAM_IN); +@@ -3439,5 +656,3 @@ lto_main (void) + timevar_start (TV_PHASE_PARSING); + timevar_push (TV_PARSE_GLOBAL); + } +- +-#include "gt-lto-lto.h" +diff --git a/gcc/machmode.h b/gcc/machmode.h +index d564f9c64..a507ed66c 100644 +--- a/gcc/machmode.h ++++ b/gcc/machmode.h +@@ -244,14 +244,15 @@ class opt_mode + public: + enum from_int { dummy = MAX_MACHINE_MODE }; + +- ALWAYS_INLINE opt_mode () : m_mode (E_VOIDmode) {} +- ALWAYS_INLINE opt_mode (const T &m) : m_mode (m) {} ++ ALWAYS_INLINE CONSTEXPR opt_mode () : m_mode (E_VOIDmode) {} ++ ALWAYS_INLINE CONSTEXPR opt_mode (const T &m) : m_mode (m) {} + template +- ALWAYS_INLINE opt_mode (const U &m) : m_mode (T (m)) {} +- ALWAYS_INLINE opt_mode (from_int m) : m_mode (machine_mode (m)) {} ++ ALWAYS_INLINE CONSTEXPR opt_mode (const U &m) : m_mode (T (m)) {} ++ ALWAYS_INLINE CONSTEXPR opt_mode (from_int m) : m_mode (machine_mode (m)) {} + + machine_mode else_void () const; +- machine_mode else_blk () const; ++ machine_mode else_blk () const { return else_mode (BLKmode); } ++ machine_mode else_mode (machine_mode) const; + T require () const; + + bool exists () const; +@@ -274,13 +275,13 @@ opt_mode::else_void () const + return m_mode; + } + +-/* If the T exists, return its enum value, otherwise return E_BLKmode. */ ++/* If the T exists, return its enum value, otherwise return FALLBACK. */ + + template + inline machine_mode +-opt_mode::else_blk () const ++opt_mode::else_mode (machine_mode fallback) const + { +- return m_mode == E_VOIDmode ? E_BLKmode : m_mode; ++ return m_mode == E_VOIDmode ? fallback : m_mode; + } + + /* Assert that the object contains a T and return it. */ +@@ -326,8 +327,12 @@ struct pod_mode + typedef typename T::measurement_type measurement_type; + + machine_mode m_mode; +- ALWAYS_INLINE operator machine_mode () const { return m_mode; } +- ALWAYS_INLINE operator T () const { return from_int (m_mode); } ++ ALWAYS_INLINE CONSTEXPR ++ operator machine_mode () const { return m_mode; } ++ ++ ALWAYS_INLINE CONSTEXPR ++ operator T () const { return from_int (m_mode); } ++ + ALWAYS_INLINE pod_mode &operator = (const T &m) { m_mode = m; return *this; } + }; + +@@ -405,8 +410,11 @@ public: + typedef unsigned short measurement_type; + + ALWAYS_INLINE scalar_int_mode () {} +- ALWAYS_INLINE scalar_int_mode (from_int m) : m_mode (machine_mode (m)) {} +- ALWAYS_INLINE operator machine_mode () const { return m_mode; } ++ ++ ALWAYS_INLINE CONSTEXPR ++ scalar_int_mode (from_int m) : m_mode (machine_mode (m)) {} ++ ++ ALWAYS_INLINE CONSTEXPR operator machine_mode () const { return m_mode; } + + static bool includes_p (machine_mode); + +@@ -430,8 +438,11 @@ public: + typedef unsigned short measurement_type; + + ALWAYS_INLINE scalar_float_mode () {} +- ALWAYS_INLINE scalar_float_mode (from_int m) : m_mode (machine_mode (m)) {} +- ALWAYS_INLINE operator machine_mode () const { return m_mode; } ++ ++ ALWAYS_INLINE CONSTEXPR ++ scalar_float_mode (from_int m) : m_mode (machine_mode (m)) {} ++ ++ ALWAYS_INLINE CONSTEXPR operator machine_mode () const { return m_mode; } + + static bool includes_p (machine_mode); + +@@ -455,11 +466,20 @@ public: + typedef unsigned short measurement_type; + + ALWAYS_INLINE scalar_mode () {} +- ALWAYS_INLINE scalar_mode (from_int m) : m_mode (machine_mode (m)) {} +- ALWAYS_INLINE scalar_mode (const scalar_int_mode &m) : m_mode (m) {} +- ALWAYS_INLINE scalar_mode (const scalar_float_mode &m) : m_mode (m) {} +- ALWAYS_INLINE scalar_mode (const scalar_int_mode_pod &m) : m_mode (m) {} +- ALWAYS_INLINE operator machine_mode () const { return m_mode; } ++ ++ ALWAYS_INLINE CONSTEXPR ++ scalar_mode (from_int m) : m_mode (machine_mode (m)) {} ++ ++ ALWAYS_INLINE CONSTEXPR ++ scalar_mode (const scalar_int_mode &m) : m_mode (m) {} ++ ++ ALWAYS_INLINE CONSTEXPR ++ scalar_mode (const scalar_float_mode &m) : m_mode (m) {} ++ ++ ALWAYS_INLINE CONSTEXPR ++ scalar_mode (const scalar_int_mode_pod &m) : m_mode (m) {} ++ ++ ALWAYS_INLINE CONSTEXPR operator machine_mode () const { return m_mode; } + + static bool includes_p (machine_mode); + +@@ -496,8 +516,11 @@ public: + typedef unsigned short measurement_type; + + ALWAYS_INLINE complex_mode () {} +- ALWAYS_INLINE complex_mode (from_int m) : m_mode (machine_mode (m)) {} +- ALWAYS_INLINE operator machine_mode () const { return m_mode; } ++ ++ ALWAYS_INLINE CONSTEXPR ++ complex_mode (from_int m) : m_mode (machine_mode (m)) {} ++ ++ ALWAYS_INLINE CONSTEXPR operator machine_mode () const { return m_mode; } + + static bool includes_p (machine_mode); + +@@ -766,14 +789,29 @@ public: + typedef unsigned short measurement_type; + + ALWAYS_INLINE fixed_size_mode () {} +- ALWAYS_INLINE fixed_size_mode (from_int m) : m_mode (machine_mode (m)) {} +- ALWAYS_INLINE fixed_size_mode (const scalar_mode &m) : m_mode (m) {} +- ALWAYS_INLINE fixed_size_mode (const scalar_int_mode &m) : m_mode (m) {} +- ALWAYS_INLINE fixed_size_mode (const scalar_float_mode &m) : m_mode (m) {} +- ALWAYS_INLINE fixed_size_mode (const scalar_mode_pod &m) : m_mode (m) {} +- ALWAYS_INLINE fixed_size_mode (const scalar_int_mode_pod &m) : m_mode (m) {} +- ALWAYS_INLINE fixed_size_mode (const complex_mode &m) : m_mode (m) {} +- ALWAYS_INLINE operator machine_mode () const { return m_mode; } ++ ++ ALWAYS_INLINE CONSTEXPR ++ fixed_size_mode (from_int m) : m_mode (machine_mode (m)) {} ++ ++ ALWAYS_INLINE CONSTEXPR ++ fixed_size_mode (const scalar_mode &m) : m_mode (m) {} ++ ++ ALWAYS_INLINE CONSTEXPR ++ fixed_size_mode (const scalar_int_mode &m) : m_mode (m) {} ++ ++ ALWAYS_INLINE CONSTEXPR ++ fixed_size_mode (const scalar_float_mode &m) : m_mode (m) {} ++ ++ ALWAYS_INLINE CONSTEXPR ++ fixed_size_mode (const scalar_mode_pod &m) : m_mode (m) {} ++ ++ ALWAYS_INLINE CONSTEXPR ++ fixed_size_mode (const scalar_int_mode_pod &m) : m_mode (m) {} ++ ++ ALWAYS_INLINE CONSTEXPR ++ fixed_size_mode (const complex_mode &m) : m_mode (m) {} ++ ++ ALWAYS_INLINE CONSTEXPR operator machine_mode () const { return m_mode; } + + static bool includes_p (machine_mode); + +diff --git a/gcc/match.pd b/gcc/match.pd +index f7e192d9b..facc43387 100644 +--- a/gcc/match.pd ++++ b/gcc/match.pd +@@ -82,12 +82,14 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + plus minus + mult trunc_div trunc_mod rdiv + min max +- bit_and bit_ior bit_xor) ++ bit_and bit_ior bit_xor ++ lshift rshift) + (define_operator_list COND_BINARY + IFN_COND_ADD IFN_COND_SUB + IFN_COND_MUL IFN_COND_DIV IFN_COND_MOD IFN_COND_RDIV + IFN_COND_MIN IFN_COND_MAX +- IFN_COND_AND IFN_COND_IOR IFN_COND_XOR) ++ IFN_COND_AND IFN_COND_IOR IFN_COND_XOR ++ IFN_COND_SHL IFN_COND_SHR) + + /* Same for ternary operations. */ + (define_operator_list UNCOND_TERNARY +@@ -5378,3 +5380,86 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + (bit_and:elt_type + (BIT_FIELD_REF:elt_type @0 { size; } { pos; }) + { elt; }))))))) ++ ++(simplify ++ (vec_perm @0 @1 VECTOR_CST@2) ++ (with ++ { ++ tree op0 = @0, op1 = @1, op2 = @2; ++ ++ /* Build a vector of integers from the tree mask. */ ++ vec_perm_builder builder; ++ if (!tree_to_vec_perm_builder (&builder, op2)) ++ return NULL_TREE; ++ ++ /* Create a vec_perm_indices for the integer vector. */ ++ poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (type); ++ bool single_arg = (op0 == op1); ++ vec_perm_indices sel (builder, single_arg ? 1 : 2, nelts); ++ } ++ (if (sel.series_p (0, 1, 0, 1)) ++ { op0; } ++ (if (sel.series_p (0, 1, nelts, 1)) ++ { op1; } ++ (with ++ { ++ if (!single_arg) ++ { ++ if (sel.all_from_input_p (0)) ++ op1 = op0; ++ else if (sel.all_from_input_p (1)) ++ { ++ op0 = op1; ++ sel.rotate_inputs (1); ++ } ++ } ++ gassign *def; ++ tree cop0 = op0, cop1 = op1; ++ if (TREE_CODE (op0) == SSA_NAME ++ && (def = dyn_cast (SSA_NAME_DEF_STMT (op0))) ++ && gimple_assign_rhs_code (def) == CONSTRUCTOR) ++ cop0 = gimple_assign_rhs1 (def); ++ if (TREE_CODE (op1) == SSA_NAME ++ && (def = dyn_cast (SSA_NAME_DEF_STMT (op1))) ++ && gimple_assign_rhs_code (def) == CONSTRUCTOR) ++ cop1 = gimple_assign_rhs1 (def); ++ ++ tree t; ++ } ++ (if ((TREE_CODE (cop0) == VECTOR_CST ++ || TREE_CODE (cop0) == CONSTRUCTOR) ++ && (TREE_CODE (cop1) == VECTOR_CST ++ || TREE_CODE (cop1) == CONSTRUCTOR) ++ && (t = fold_vec_perm (type, cop0, cop1, sel))) ++ { t; } ++ (with ++ { ++ bool changed = (op0 == op1 && !single_arg); ++ ++ /* Generate a canonical form of the selector. */ ++ if (sel.encoding () != builder) ++ { ++ /* Some targets are deficient and fail to expand a single ++ argument permutation while still allowing an equivalent ++ 2-argument version. */ ++ tree oldop2 = op2; ++ if (sel.ninputs () == 2 ++ || can_vec_perm_const_p (TYPE_MODE (type), sel, false)) ++ op2 = vec_perm_indices_to_tree (TREE_TYPE (op2), sel); ++ else ++ { ++ vec_perm_indices sel2 (builder, 2, nelts); ++ if (can_vec_perm_const_p (TYPE_MODE (type), sel2, false)) ++ op2 = vec_perm_indices_to_tree (TREE_TYPE (op2), sel2); ++ else ++ /* Not directly supported with either encoding, ++ so use the preferred form. */ ++ op2 = vec_perm_indices_to_tree (TREE_TYPE (op2), sel); ++ } ++ /* Differences in the encoder do not necessarily mean ++ differences in the resulting vector. */ ++ changed = !operand_equal_p (op2, oldop2, 0); ++ } ++ } ++ (if (changed) ++ (vec_perm { op0; } { op1; } { op2; }))))))))) +diff --git a/gcc/mode-switching.c b/gcc/mode-switching.c +index 2ff21a400..4a34d4a2b 100644 +--- a/gcc/mode-switching.c ++++ b/gcc/mode-switching.c +@@ -165,7 +165,7 @@ new_seginfo (int mode, rtx_insn *insn, int bb, HARD_REG_SET regs_live) + ptr->insn_ptr = insn; + ptr->bbnum = bb; + ptr->next = NULL; +- COPY_HARD_REG_SET (ptr->regs_live, regs_live); ++ ptr->regs_live = regs_live; + return ptr; + } + +@@ -637,7 +637,7 @@ optimize_mode_switching (void) + if (REG_NOTE_KIND (link) == REG_DEAD) + reg_dies (XEXP (link, 0), &live_now); + +- note_stores (PATTERN (insn), reg_becomes_live, &live_now); ++ note_stores (insn, reg_becomes_live, &live_now); + for (link = REG_NOTES (insn); link; link = XEXP (link, 1)) + if (REG_NOTE_KIND (link) == REG_UNUSED) + reg_dies (XEXP (link, 0), &live_now); +diff --git a/gcc/omp-simd-clone.c b/gcc/omp-simd-clone.c +index 10490f34f..d884514cc 100644 +--- a/gcc/omp-simd-clone.c ++++ b/gcc/omp-simd-clone.c +@@ -461,8 +461,7 @@ simd_clone_create (struct cgraph_node *old_node) + if (new_node == NULL) + return new_node; + +- DECL_BUILT_IN_CLASS (new_node->decl) = NOT_BUILT_IN; +- DECL_FUNCTION_CODE (new_node->decl) = (enum built_in_function) 0; ++ set_decl_built_in_function (new_node->decl, NOT_BUILT_IN, 0); + TREE_PUBLIC (new_node->decl) = TREE_PUBLIC (old_node->decl); + DECL_COMDAT (new_node->decl) = DECL_COMDAT (old_node->decl); + DECL_WEAK (new_node->decl) = DECL_WEAK (old_node->decl); +diff --git a/gcc/opt-suggestions.c b/gcc/opt-suggestions.c +index a820c78ff..1ec94203c 100644 +--- a/gcc/opt-suggestions.c ++++ b/gcc/opt-suggestions.c +@@ -307,7 +307,6 @@ test_completion_valid_options (option_proposer &proposer) + "-Wassign-intercept", + "-Wno-format-security", + "-fno-sched-stalled-insns", +- "-fbtr-bb-exclusive", + "-fno-tree-tail-merge", + "-Wlong-long", + "-Wno-unused-but-set-parameter", +diff --git a/gcc/optabs-tree.c b/gcc/optabs-tree.c +index 341e02bd5..7bad9c87b 100644 +--- a/gcc/optabs-tree.c ++++ b/gcc/optabs-tree.c +@@ -267,20 +267,16 @@ optab_for_tree_code (enum tree_code code, const_tree type, + + Convert operations we currently support directly are FIX_TRUNC and FLOAT. + This function checks if these operations are supported +- by the target platform either directly (via vector tree-codes), or via +- target builtins. ++ by the target platform directly (via vector tree-codes). + + Output: + - CODE1 is code of vector operation to be used when +- vectorizing the operation, if available. +- - DECL is decl of target builtin functions to be used +- when vectorizing the operation, if available. In this case, +- CODE1 is CALL_EXPR. */ ++ vectorizing the operation, if available. */ + + bool + supportable_convert_operation (enum tree_code code, + tree vectype_out, tree vectype_in, +- tree *decl, enum tree_code *code1) ++ enum tree_code *code1) + { + machine_mode m1,m2; + bool truncp; +@@ -314,15 +310,6 @@ supportable_convert_operation (enum tree_code code, + return true; + } + +- /* Now check for builtin. */ +- if (targetm.vectorize.builtin_conversion +- && targetm.vectorize.builtin_conversion (code, vectype_out, vectype_in)) +- { +- *code1 = CALL_EXPR; +- *decl = targetm.vectorize.builtin_conversion (code, vectype_out, +- vectype_in); +- return true; +- } + return false; + } + +diff --git a/gcc/optabs-tree.h b/gcc/optabs-tree.h +index 5e4848997..dac350142 100644 +--- a/gcc/optabs-tree.h ++++ b/gcc/optabs-tree.h +@@ -36,7 +36,7 @@ enum optab_subtype + the second argument. The third argument distinguishes between the types of + vector shifts and rotates. */ + optab optab_for_tree_code (enum tree_code, const_tree, enum optab_subtype); +-bool supportable_convert_operation (enum tree_code, tree, tree, tree *, ++bool supportable_convert_operation (enum tree_code, tree, tree, + enum tree_code *); + bool expand_vec_cmp_expr_p (tree, tree, enum tree_code); + bool expand_vec_cond_expr_p (tree, tree, enum tree_code); +diff --git a/gcc/optabs.c b/gcc/optabs.c +index c2c1274eb..d9788d248 100644 +--- a/gcc/optabs.c ++++ b/gcc/optabs.c +@@ -3727,7 +3727,7 @@ emit_libcall_block_1 (rtx_insn *insns, rtx target, rtx result, rtx equiv, + data.first = insns; + data.insn = insn; + data.must_stay = 0; +- note_stores (PATTERN (insn), no_conflict_move_test, &data); ++ note_stores (insn, no_conflict_move_test, &data); + if (! data.must_stay) + { + if (PREV_INSN (insn)) +@@ -6428,7 +6428,7 @@ expand_atomic_compare_and_swap (rtx *ptarget_bool, rtx *ptarget_oval, + /* Otherwise, work out if the compare-and-swap succeeded. */ + cc_reg = NULL_RTX; + if (have_insn_for (COMPARE, CCmode)) +- note_stores (PATTERN (get_last_insn ()), find_cc_set, &cc_reg); ++ note_stores (get_last_insn (), find_cc_set, &cc_reg); + if (cc_reg) + { + target_bool = emit_store_flag_force (target_bool, EQ, cc_reg, +@@ -7181,18 +7181,16 @@ static bool + maybe_legitimize_operand (enum insn_code icode, unsigned int opno, + struct expand_operand *op) + { +- machine_mode mode, imode; +- bool old_volatile_ok, result; ++ machine_mode mode, imode, tmode; + + mode = op->mode; + switch (op->type) + { + case EXPAND_FIXED: +- old_volatile_ok = volatile_ok; +- volatile_ok = true; +- result = maybe_legitimize_operand_same_code (icode, opno, op); +- volatile_ok = old_volatile_ok; +- return result; ++ { ++ temporary_volatile_ok v (true); ++ return maybe_legitimize_operand_same_code (icode, opno, op); ++ } + + case EXPAND_OUTPUT: + gcc_assert (mode != VOIDmode); +@@ -7230,9 +7228,17 @@ maybe_legitimize_operand (enum insn_code icode, unsigned int opno, + gcc_assert (mode != VOIDmode); + + imode = insn_data[(int) icode].operand[opno].mode; ++ tmode = (VECTOR_MODE_P (imode) && !VECTOR_MODE_P (mode) ++ ? GET_MODE_INNER (imode) : imode); ++ if (tmode != VOIDmode && tmode != mode) ++ { ++ op->value = convert_modes (tmode, mode, op->value, op->unsigned_p); ++ mode = tmode; ++ } + if (imode != VOIDmode && imode != mode) + { +- op->value = convert_modes (imode, mode, op->value, op->unsigned_p); ++ gcc_assert (VECTOR_MODE_P (imode) && !VECTOR_MODE_P (mode)); ++ op->value = expand_vector_broadcast (imode, op->value); + mode = imode; + } + goto input; +diff --git a/gcc/optabs.def b/gcc/optabs.def +index 8af3a2f43..912766656 100644 +--- a/gcc/optabs.def ++++ b/gcc/optabs.def +@@ -230,6 +230,9 @@ OPTAB_D (cond_umod_optab, "cond_umod$a") + OPTAB_D (cond_and_optab, "cond_and$a") + OPTAB_D (cond_ior_optab, "cond_ior$a") + OPTAB_D (cond_xor_optab, "cond_xor$a") ++OPTAB_D (cond_ashl_optab, "cond_ashl$a") ++OPTAB_D (cond_ashr_optab, "cond_ashr$a") ++OPTAB_D (cond_lshr_optab, "cond_lshr$a") + OPTAB_D (cond_smin_optab, "cond_smin$a") + OPTAB_D (cond_smax_optab, "cond_smax$a") + OPTAB_D (cond_umin_optab, "cond_umin$a") +@@ -256,7 +259,7 @@ OPTAB_D (umul_highpart_optab, "umul$a3_highpart") + OPTAB_D (cmpmem_optab, "cmpmem$a") + OPTAB_D (cmpstr_optab, "cmpstr$a") + OPTAB_D (cmpstrn_optab, "cmpstrn$a") +-OPTAB_D (movmem_optab, "movmem$a") ++OPTAB_D (cpymem_optab, "cpymem$a") + OPTAB_D (setmem_optab, "setmem$a") + OPTAB_D (strlen_optab, "strlen$a") + +@@ -323,6 +326,7 @@ OPTAB_D (reduc_and_scal_optab, "reduc_and_scal_$a") + OPTAB_D (reduc_ior_scal_optab, "reduc_ior_scal_$a") + OPTAB_D (reduc_xor_scal_optab, "reduc_xor_scal_$a") + OPTAB_D (fold_left_plus_optab, "fold_left_plus_$a") ++OPTAB_D (mask_fold_left_plus_optab, "mask_fold_left_plus_$a") + + OPTAB_D (extract_last_optab, "extract_last_$a") + OPTAB_D (fold_extract_last_optab, "fold_extract_last_$a") +@@ -337,6 +341,11 @@ OPTAB_D (udot_prod_optab, "udot_prod$I$a") + OPTAB_D (usum_widen_optab, "widen_usum$I$a3") + OPTAB_D (usad_optab, "usad$I$a") + OPTAB_D (ssad_optab, "ssad$I$a") ++OPTAB_D (smulhs_optab, "smulhs$a3") ++OPTAB_D (smulhrs_optab, "smulhrs$a3") ++OPTAB_D (umulhs_optab, "umulhs$a3") ++OPTAB_D (umulhrs_optab, "umulhrs$a3") ++OPTAB_D (sdiv_pow2_optab, "sdiv_pow2$a3") + OPTAB_D (vec_pack_sfix_trunc_optab, "vec_pack_sfix_trunc_$a") + OPTAB_D (vec_pack_ssat_optab, "vec_pack_ssat_$a") + OPTAB_D (vec_pack_trunc_optab, "vec_pack_trunc_$a") +diff --git a/gcc/optabs.h b/gcc/optabs.h +index 17b5dfb67..18dec50f5 100644 +--- a/gcc/optabs.h ++++ b/gcc/optabs.h +@@ -128,7 +128,11 @@ create_convert_operand_to (struct expand_operand *op, rtx value, + /* Make OP describe an input operand that should have the same value + as VALUE, after any mode conversion that the backend might request. + If VALUE is a CONST_INT, it should be treated as having mode MODE. +- UNSIGNED_P says whether VALUE is unsigned. */ ++ UNSIGNED_P says whether VALUE is unsigned. ++ ++ The conversion of VALUE can include a combination of numerical ++ conversion (as for convert_modes) and duplicating a scalar to fill ++ a vector (if VALUE is a scalar but the operand is a vector). */ + + static inline void + create_convert_operand_from (struct expand_operand *op, rtx value, +diff --git a/gcc/opts-global.c b/gcc/opts-global.c +index 4f8aac7e9..6e4f2d528 100644 +--- a/gcc/opts-global.c ++++ b/gcc/opts-global.c +@@ -255,6 +255,7 @@ init_options_once (void) + construct their pretty-printers means that all previous settings + are overriden. */ + diagnostic_color_init (global_dc); ++ diagnostic_urls_init (global_dc); + } + + /* Decode command-line options to an array, like +diff --git a/gcc/opts.c b/gcc/opts.c +index 494be7a9f..a8db491b5 100644 +--- a/gcc/opts.c ++++ b/gcc/opts.c +@@ -465,7 +465,6 @@ static const struct default_options default_options_table[] = + { OPT_LEVELS_1_PLUS, OPT_ftree_copy_prop, NULL, 1 }, + { OPT_LEVELS_1_PLUS, OPT_ftree_dce, NULL, 1 }, + { OPT_LEVELS_1_PLUS, OPT_ftree_dominator_opts, NULL, 1 }, +- { OPT_LEVELS_1_PLUS, OPT_ftree_dse, NULL, 1 }, + { OPT_LEVELS_1_PLUS, OPT_ftree_fre, NULL, 1 }, + { OPT_LEVELS_1_PLUS, OPT_ftree_sink, NULL, 1 }, + { OPT_LEVELS_1_PLUS, OPT_ftree_slsr, NULL, 1 }, +@@ -476,14 +475,16 @@ static const struct default_options default_options_table[] = + #if DELAY_SLOTS + { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_fdelayed_branch, NULL, 1 }, + #endif ++ { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_fdse, NULL, 1 }, + { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_fif_conversion, NULL, 1 }, + { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_fif_conversion2, NULL, 1 }, + { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_finline_functions_called_once, NULL, 1 }, + { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_fmove_loop_invariants, NULL, 1 }, + { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_fssa_phiopt, NULL, 1 }, + { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_ftree_bit_ccp, NULL, 1 }, +- { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_ftree_sra, NULL, 1 }, ++ { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_ftree_dse, NULL, 1 }, + { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_ftree_pta, NULL, 1 }, ++ { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_ftree_sra, NULL, 1 }, + + /* -O2 and -Os optimizations. */ + { OPT_LEVELS_2_PLUS, OPT_fcaller_saves, NULL, 1 }, +@@ -521,6 +522,7 @@ static const struct default_options default_options_table[] = + { OPT_LEVELS_2_PLUS, OPT_ftree_tail_merge, NULL, 1 }, + { OPT_LEVELS_2_PLUS, OPT_ftree_vrp, NULL, 1 }, + { OPT_LEVELS_2_PLUS, OPT_fvect_cost_model_, NULL, VECT_COST_MODEL_CHEAP }, ++ { OPT_LEVELS_2_PLUS, OPT_finline_functions, NULL, 1 }, + + /* -O2 and -Os optimizations. */ + { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_falign_functions, NULL, 1 }, +@@ -536,9 +538,6 @@ static const struct default_options default_options_table[] = + #endif + + /* -O3 and -Os optimizations. */ +- /* Inlining of functions reducing size is a good idea with -Os +- regardless of them being declared inline. */ +- { OPT_LEVELS_3_PLUS_AND_SIZE, OPT_finline_functions, NULL, 1 }, + + /* -O3 optimizations. */ + { OPT_LEVELS_3_PLUS, OPT_fgcse_after_reload, NULL, 1 }, +@@ -2400,6 +2399,10 @@ common_handle_option (struct gcc_options *opts, + diagnostic_color_init (dc, value); + break; + ++ case OPT_fdiagnostics_urls_: ++ diagnostic_urls_init (dc, value); ++ break; ++ + case OPT_fdiagnostics_format_: + diagnostic_output_format_init (dc, + (enum diagnostics_output_format)value); +diff --git a/gcc/params.def b/gcc/params.def +index 08c709636..0ef092214 100644 +--- a/gcc/params.def ++++ b/gcc/params.def +@@ -61,8 +61,13 @@ DEFPARAM (PARAM_PREDICTABLE_BRANCH_OUTCOME, + + DEFPARAM (PARAM_INLINE_MIN_SPEEDUP, + "inline-min-speedup", ++ "The minimal estimated speedup allowing inliner to ignore inline-insns-single and inline-insns-auto with -O3 and -Ofast.", ++ 15, 0, 100) ++ ++DEFPARAM (PARAM_INLINE_MIN_SPEEDUP_O2, ++ "inline-min-speedup-O2", + "The minimal estimated speedup allowing inliner to ignore inline-insns-single and inline-insns-auto.", +- 15, 0, 0) ++ 30, 0, 100) + + /* The single function inlining limit. This is the maximum size + of a function counted in internal gcc instructions (not in +@@ -77,9 +82,14 @@ DEFPARAM (PARAM_INLINE_MIN_SPEEDUP, + gets decreased. */ + DEFPARAM (PARAM_MAX_INLINE_INSNS_SINGLE, + "max-inline-insns-single", +- "The maximum number of instructions in a single function eligible for inlining.", ++ "The maximum number of instructions in a single function eligible for inlining with -O3 and -Ofast.", + 200, 0, 0) + ++DEFPARAM (PARAM_MAX_INLINE_INSNS_SINGLE_O2, ++ "max-inline-insns-single-O2", ++ "The maximum number of instructions in a single function eligible for inlining.", ++ 30, 0, 0) ++ + /* The single function inlining limit for functions that are + inlined by virtue of -finline-functions (-O3). + This limit should be chosen to be below or equal to the limit +@@ -89,9 +99,14 @@ DEFPARAM (PARAM_MAX_INLINE_INSNS_SINGLE, + The default value is 30. */ + DEFPARAM (PARAM_MAX_INLINE_INSNS_AUTO, + "max-inline-insns-auto", +- "The maximum number of instructions when automatically inlining.", ++ "The maximum number of instructions when automatically inlining with -O3 and -Ofast.", + 30, 0, 0) + ++DEFPARAM (PARAM_MAX_INLINE_INSNS_AUTO_O2, ++ "max-inline-insns-auto-O2", ++ "The maximum number of instructions when automatically inlining.", ++ 15, 0, 0) ++ + DEFPARAM (PARAM_MAX_INLINE_INSNS_SMALL, + "max-inline-insns-small", + "The maximum number of instructions when automatically inlining small functions.", +@@ -243,8 +258,12 @@ DEFPARAM(PARAM_IPCP_UNIT_GROWTH, + 10, 0, 0) + DEFPARAM(PARAM_EARLY_INLINING_INSNS, + "early-inlining-insns", +- "Maximal estimated growth of function body caused by early inlining of single call.", ++ "Maximal estimated growth of function body caused by early inlining of single call with -O3 and -Ofast.", + 14, 0, 0) ++DEFPARAM(PARAM_EARLY_INLINING_INSNS_O2, ++ "early-inlining-insns-O2", ++ "Maximal estimated growth of function body caused by early inlining of single call with -O1 and -O2.", ++ 6, 0, 0) + DEFPARAM(PARAM_LARGE_STACK_FRAME, + "large-stack-frame", + "The size of stack frame to be considered large.", +diff --git a/gcc/passes.def b/gcc/passes.def +index 901dbef93..a03685500 100644 +--- a/gcc/passes.def ++++ b/gcc/passes.def +@@ -459,7 +459,6 @@ along with GCC; see the file COPYING3. If not see + NEXT_PASS (pass_split_after_reload); + NEXT_PASS (pass_ree); + NEXT_PASS (pass_compare_elim_after_reload); +- NEXT_PASS (pass_branch_target_load_optimize1); + NEXT_PASS (pass_thread_prologue_and_epilogue); + NEXT_PASS (pass_rtl_dse2); + NEXT_PASS (pass_stack_adjustments); +@@ -472,7 +471,6 @@ along with GCC; see the file COPYING3. If not see + NEXT_PASS (pass_cprop_hardreg); + NEXT_PASS (pass_fast_rtl_dce); + NEXT_PASS (pass_reorder_blocks); +- NEXT_PASS (pass_branch_target_load_optimize2); + NEXT_PASS (pass_leaf_regs); + NEXT_PASS (pass_split_before_sched2); + NEXT_PASS (pass_sched2); +diff --git a/gcc/postreload-gcse.c b/gcc/postreload-gcse.c +index a165351ca..bc2e8fc91 100644 +--- a/gcc/postreload-gcse.c ++++ b/gcc/postreload-gcse.c +@@ -672,7 +672,7 @@ load_killed_in_block_p (int uid_limit, rtx x, bool after_insn) + It will set mems_conflict_p to nonzero if there may be a + conflict between X and SETTER. */ + mems_conflict_p = 0; +- note_stores (PATTERN (setter), find_mem_conflicts, x); ++ note_stores (setter, find_mem_conflicts, x); + if (mems_conflict_p) + return 1; + +@@ -774,7 +774,7 @@ record_opr_changes (rtx_insn *insn) + rtx note; + + /* Find all stores and record them. */ +- note_stores (PATTERN (insn), record_last_set_info, insn); ++ note_stores (insn, record_last_set_info, insn); + + /* Also record autoincremented REGs for this insn as changed. */ + for (note = REG_NOTES (insn); note; note = XEXP (note, 1)) +@@ -785,25 +785,10 @@ record_opr_changes (rtx_insn *insn) + if (CALL_P (insn)) + { + unsigned int regno; +- rtx link, x; + hard_reg_set_iterator hrsi; + EXECUTE_IF_SET_IN_HARD_REG_SET (regs_invalidated_by_call, 0, regno, hrsi) + record_last_reg_set_info_regno (insn, regno); + +- for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1)) +- { +- gcc_assert (GET_CODE (XEXP (link, 0)) != CLOBBER_HIGH); +- if (GET_CODE (XEXP (link, 0)) == CLOBBER) +- { +- x = XEXP (XEXP (link, 0), 0); +- if (REG_P (x)) +- { +- gcc_assert (HARD_REGISTER_P (x)); +- record_last_reg_set_info (insn, x); +- } +- } +- } +- + if (! RTL_CONST_OR_PURE_CALL_P (insn)) + record_last_mem_set_info (insn); + } +diff --git a/gcc/postreload.c b/gcc/postreload.c +index b76c7b0b7..ee0dc6ae8 100644 +--- a/gcc/postreload.c ++++ b/gcc/postreload.c +@@ -40,6 +40,7 @@ along with GCC; see the file COPYING3. If not see + #include "cselib.h" + #include "tree-pass.h" + #include "dbgcnt.h" ++#include "function-abi.h" + + static int reload_cse_noop_set_p (rtx); + static bool reload_cse_simplify (rtx_insn *, rtx); +@@ -133,8 +134,6 @@ reload_cse_simplify (rtx_insn *insn, rtx testreg) + for (i = XVECLEN (body, 0) - 1; i >= 0; --i) + { + rtx part = XVECEXP (body, 0, i); +- /* asms can only have full clobbers, not clobber_highs. */ +- gcc_assert (GET_CODE (part) != CLOBBER_HIGH); + if (GET_CODE (part) == CLOBBER && REG_P (XEXP (part, 0))) + cselib_invalidate_rtx (XEXP (part, 0)); + } +@@ -157,9 +156,7 @@ reload_cse_simplify (rtx_insn *insn, rtx testreg) + value = SET_DEST (part); + } + } +- else if (GET_CODE (part) != CLOBBER +- && GET_CODE (part) != CLOBBER_HIGH +- && GET_CODE (part) != USE) ++ else if (GET_CODE (part) != CLOBBER && GET_CODE (part) != USE) + break; + } + +@@ -1139,7 +1136,7 @@ reload_combine_recognize_pattern (rtx_insn *insn) + if (TEST_HARD_REG_BIT (reg_class_contents[INDEX_REG_CLASS], i) + && reg_state[i].use_index == RELOAD_COMBINE_MAX_USES + && reg_state[i].store_ruid <= reg_state[regno].use_ruid +- && (call_used_regs[i] || df_regs_ever_live_p (i)) ++ && (call_used_or_fixed_reg_p (i) || df_regs_ever_live_p (i)) + && (!frame_pointer_needed || i != HARD_FRAME_POINTER_REGNUM) + && !fixed_regs[i] && !global_regs[i] + && hard_regno_nregs (i, GET_MODE (reg)) == 1 +@@ -1271,8 +1268,8 @@ reload_combine (void) + + REG_SET_TO_HARD_REG_SET (live, live_in); + compute_use_by_pseudos (&live, live_in); +- COPY_HARD_REG_SET (LABEL_LIVE (insn), live); +- IOR_HARD_REG_SET (ever_live_at_start, live); ++ LABEL_LIVE (insn) = live; ++ ever_live_at_start |= live; + } + } + +@@ -1329,14 +1326,15 @@ reload_combine (void) + || reload_combine_recognize_pattern (insn)) + continue; + +- note_stores (PATTERN (insn), reload_combine_note_store, NULL); ++ note_stores (insn, reload_combine_note_store, NULL); + + if (CALL_P (insn)) + { + rtx link; +- HARD_REG_SET used_regs; +- +- get_call_reg_set_usage (insn, &used_regs, call_used_reg_set); ++ HARD_REG_SET used_regs = insn_callee_abi (insn).full_reg_clobbers (); ++ /* ??? This preserves traditional behavior; it might not be ++ needed. */ ++ used_regs |= fixed_reg_set; + + for (r = 0; r < FIRST_PSEUDO_REGISTER; r++) + if (TEST_HARD_REG_BIT (used_regs, r)) +@@ -1350,22 +1348,12 @@ reload_combine (void) + { + rtx setuse = XEXP (link, 0); + rtx usage_rtx = XEXP (setuse, 0); +- /* We could support CLOBBER_HIGH and treat it in the same way as +- HARD_REGNO_CALL_PART_CLOBBERED, but no port needs that yet. */ +- gcc_assert (GET_CODE (setuse) != CLOBBER_HIGH); + +- if ((GET_CODE (setuse) == USE || GET_CODE (setuse) == CLOBBER) +- && REG_P (usage_rtx)) ++ if (GET_CODE (setuse) == USE && REG_P (usage_rtx)) + { + unsigned int end_regno = END_REGNO (usage_rtx); + for (unsigned int i = REGNO (usage_rtx); i < end_regno; ++i) +- if (GET_CODE (XEXP (link, 0)) == CLOBBER) +- { +- reg_state[i].use_index = RELOAD_COMBINE_MAX_USES; +- reg_state[i].store_ruid = reload_combine_ruid; +- } +- else +- reg_state[i].use_index = -1; ++ reg_state[i].use_index = -1; + } + } + } +@@ -1529,10 +1517,6 @@ reload_combine_note_use (rtx *xp, rtx_insn *insn, int ruid, rtx containing_mem) + } + break; + +- case CLOBBER_HIGH: +- gcc_assert (REG_P (SET_DEST (x))); +- return; +- + case PLUS: + /* We are interested in (plus (reg) (const_int)) . */ + if (!REG_P (XEXP (x, 0)) +@@ -2108,7 +2092,7 @@ reload_cse_move2add (rtx_insn *first) + } + } + } +- note_stores (PATTERN (insn), move2add_note_store, insn); ++ note_stores (insn, move2add_note_store, insn); + + /* If INSN is a conditional branch, we try to extract an + implicit set out of it. */ +@@ -2138,32 +2122,12 @@ reload_cse_move2add (rtx_insn *first) + unknown values. */ + if (CALL_P (insn)) + { +- rtx link; +- + for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--) + { +- if (call_used_regs[i]) ++ if (call_used_or_fixed_reg_p (i)) + /* Reset the information about this register. */ + reg_mode[i] = VOIDmode; + } +- +- for (link = CALL_INSN_FUNCTION_USAGE (insn); link; +- link = XEXP (link, 1)) +- { +- rtx setuse = XEXP (link, 0); +- rtx usage_rtx = XEXP (setuse, 0); +- /* CALL_INSN_FUNCTION_USAGEs can only have full clobbers, not +- clobber_highs. */ +- gcc_assert (GET_CODE (setuse) != CLOBBER_HIGH); +- if (GET_CODE (setuse) == CLOBBER +- && REG_P (usage_rtx)) +- { +- unsigned int end_regno = END_REGNO (usage_rtx); +- for (unsigned int r = REGNO (usage_rtx); r < end_regno; ++r) +- /* Reset the information about this register. */ +- reg_mode[r] = VOIDmode; +- } +- } + } + } + return changed; +@@ -2317,13 +2281,6 @@ move2add_note_store (rtx dst, const_rtx set, void *data) + + move2add_record_mode (dst); + } +- else if (GET_CODE (set) == CLOBBER_HIGH) +- { +- /* Only invalidate if actually clobbered. */ +- if (reg_mode[regno] == BLKmode +- || reg_is_clobbered_by_clobber_high (regno, reg_mode[regno], dst)) +- goto invalidate; +- } + else + { + invalidate: +diff --git a/gcc/predict.c b/gcc/predict.c +index eaab47f99..03dd4ddfa 100644 +--- a/gcc/predict.c ++++ b/gcc/predict.c +@@ -2450,7 +2450,7 @@ expr_expected_value_1 (tree type, tree op0, enum tree_code code, + return NULL; + } + +- if (DECL_IS_MALLOC (decl) || DECL_IS_OPERATOR_NEW (decl)) ++ if (DECL_IS_MALLOC (decl) || DECL_IS_OPERATOR_NEW_P (decl)) + { + if (predictor) + *predictor = PRED_MALLOC_NONNULL; +diff --git a/gcc/pretty-print.c b/gcc/pretty-print.c +index 6948971ce..5af7ca764 100644 +--- a/gcc/pretty-print.c ++++ b/gcc/pretty-print.c +@@ -1579,7 +1579,8 @@ pretty_printer::pretty_printer (int maximum_length) + emitted_prefix (), + need_newline (), + translate_identifiers (true), +- show_color () ++ show_color (), ++ show_urls (false) + { + pp_line_cutoff (this) = maximum_length; + /* By default, we emit prefixes once per message. */ +@@ -2028,6 +2029,41 @@ identifier_to_locale (const char *ident) + } + } + ++/* Support for encoding URLs. ++ See egmontkob/Hyperlinks_in_Terminal_Emulators.md ++ ( https://gist.github.com/egmontkob/eb114294efbcd5adb1944c9f3cb5feda ). ++ ++ > A hyperlink is opened upon encountering an OSC 8 escape sequence with ++ > the target URI. The syntax is ++ > ++ > OSC 8 ; params ; URI ST ++ > ++ > A hyperlink is closed with the same escape sequence, omitting the ++ > parameters and the URI but keeping the separators: ++ > ++ > OSC 8 ; ; ST ++ > ++ > OSC (operating system command) is typically ESC ]. */ ++ ++/* If URL-printing is enabled, write an "open URL" escape sequence to PP ++ for the given URL. */ ++ ++void ++pp_begin_url (pretty_printer *pp, const char *url) ++{ ++ if (pp->show_urls) ++ pp_printf (pp, "\33]8;;%s\33\\", url); ++} ++ ++/* If URL-printing is enabled, write a "close URL" escape sequence to PP. */ ++ ++void ++pp_end_url (pretty_printer *pp) ++{ ++ if (pp->show_urls) ++ pp_string (pp, "\33]8;;\33\\"); ++} ++ + #if CHECKING_P + + namespace selftest { +@@ -2312,6 +2348,32 @@ test_prefixes_and_wrapping () + + } + ++/* Verify that URL-printing works as expected. */ ++ ++void ++test_urls () ++{ ++ { ++ pretty_printer pp; ++ pp.show_urls = false; ++ pp_begin_url (&pp, "http://example.com"); ++ pp_string (&pp, "This is a link"); ++ pp_end_url (&pp); ++ ASSERT_STREQ ("This is a link", ++ pp_formatted_text (&pp)); ++ } ++ ++ { ++ pretty_printer pp; ++ pp.show_urls = true; ++ pp_begin_url (&pp, "http://example.com"); ++ pp_string (&pp, "This is a link"); ++ pp_end_url (&pp); ++ ASSERT_STREQ ("\33]8;;http://example.com\33\\This is a link\33]8;;\33\\", ++ pp_formatted_text (&pp)); ++ } ++} ++ + /* Run all of the selftests within this file. */ + + void +@@ -2320,6 +2382,7 @@ pretty_print_c_tests () + test_basic_printing (); + test_pp_format (); + test_prefixes_and_wrapping (); ++ test_urls (); + } + + } // namespace selftest +diff --git a/gcc/pretty-print.h b/gcc/pretty-print.h +index e4df65907..07cd39176 100644 +--- a/gcc/pretty-print.h ++++ b/gcc/pretty-print.h +@@ -271,6 +271,9 @@ struct pretty_printer + + /* Nonzero means that text should be colorized. */ + bool show_color; ++ ++ /* Nonzero means that URLs should be emitted. */ ++ bool show_urls; + }; + + static inline const char * +@@ -391,6 +394,9 @@ extern void pp_maybe_space (pretty_printer *); + extern void pp_begin_quote (pretty_printer *, bool); + extern void pp_end_quote (pretty_printer *, bool); + ++extern void pp_begin_url (pretty_printer *pp, const char *url); ++extern void pp_end_url (pretty_printer *pp); ++ + /* Switch into verbatim mode and return the old mode. */ + static inline pp_wrapping_mode_t + pp_set_verbatim_wrapping_ (pretty_printer *pp) +diff --git a/gcc/print-rtl.c b/gcc/print-rtl.c +index fbb108568..01f281604 100644 +--- a/gcc/print-rtl.c ++++ b/gcc/print-rtl.c +@@ -1756,7 +1756,6 @@ print_pattern (pretty_printer *pp, const_rtx x, int verbose) + print_exp (pp, x, verbose); + break; + case CLOBBER: +- case CLOBBER_HIGH: + case USE: + pp_printf (pp, "%s ", GET_RTX_NAME (GET_CODE (x))); + print_value (pp, XEXP (x, 0), verbose); +diff --git a/gcc/print-tree.c b/gcc/print-tree.c +index 81b66a189..7c0d05548 100644 +--- a/gcc/print-tree.c ++++ b/gcc/print-tree.c +@@ -517,7 +517,11 @@ print_node (FILE *file, const char *prefix, tree node, int indent, + if (code == FUNCTION_DECL && fndecl_built_in_p (node)) + { + if (DECL_BUILT_IN_CLASS (node) == BUILT_IN_MD) +- fprintf (file, " built-in: BUILT_IN_MD:%d", DECL_FUNCTION_CODE (node)); ++ fprintf (file, " built-in: BUILT_IN_MD:%d", ++ DECL_MD_FUNCTION_CODE (node)); ++ else if (DECL_BUILT_IN_CLASS (node) == BUILT_IN_FRONTEND) ++ fprintf (file, " built-in: BUILT_IN_FRONTEND:%d", ++ DECL_FE_FUNCTION_CODE (node)); + else + fprintf (file, " built-in: %s:%s", + built_in_class_names[(int) DECL_BUILT_IN_CLASS (node)], +diff --git a/gcc/read-md.h b/gcc/read-md.h +index 18426f71d..327f378ea 100644 +--- a/gcc/read-md.h ++++ b/gcc/read-md.h +@@ -337,6 +337,7 @@ class rtx_reader : public md_reader + ~rtx_reader (); + + bool read_rtx (const char *rtx_name, vec *rtxen); ++ rtx rtx_alloc_for_name (const char *); + rtx read_rtx_code (const char *code_name); + virtual rtx read_rtx_operand (rtx return_rtx, int idx); + rtx read_nested_rtx (); +diff --git a/gcc/read-rtl-function.c b/gcc/read-rtl-function.c +index 53f7a94c1..ded407737 100644 +--- a/gcc/read-rtl-function.c ++++ b/gcc/read-rtl-function.c +@@ -41,6 +41,8 @@ along with GCC; see the file COPYING3. If not see + #include "read-rtl-function.h" + #include "selftest.h" + #include "selftest-rtl.h" ++#include "regs.h" ++#include "function-abi.h" + + /* Forward decls. */ + class function_reader; +@@ -1610,6 +1612,7 @@ bool + read_rtl_function_body (const char *path) + { + initialize_rtl (); ++ crtl->abi = &default_function_abi; + init_emit (); + init_varasm_status (); + +@@ -1643,6 +1646,7 @@ read_rtl_function_body_from_file_range (location_t start_loc, + } + + initialize_rtl (); ++ crtl->abi = &fndecl_abi (cfun->decl).base_abi (); + init_emit (); + init_varasm_status (); + +diff --git a/gcc/read-rtl.c b/gcc/read-rtl.c +index 1af51f686..6b1b811cb 100644 +--- a/gcc/read-rtl.c ++++ b/gcc/read-rtl.c +@@ -194,22 +194,31 @@ static const compact_insn_name compact_insn_names[] = { + { NOTE, "cnote" } + }; + +-/* Implementations of the iterator_group callbacks for codes. */ ++/* Return the rtx code for NAME, or UNKNOWN if NAME isn't a valid rtx code. */ + +-static int +-find_code (const char *name) ++static rtx_code ++maybe_find_code (const char *name) + { +- int i; +- +- for (i = 0; i < NUM_RTX_CODE; i++) ++ for (int i = 0; i < NUM_RTX_CODE; i++) + if (strcmp (GET_RTX_NAME (i), name) == 0) +- return i; ++ return (rtx_code) i; + +- for (i = 0; i < (signed)ARRAY_SIZE (compact_insn_names); i++) ++ for (int i = 0; i < (signed)ARRAY_SIZE (compact_insn_names); i++) + if (strcmp (compact_insn_names[i].name, name) == 0) + return compact_insn_names[i].code; + +- fatal_with_file_and_line ("unknown rtx code `%s'", name); ++ return UNKNOWN; ++} ++ ++/* Implementations of the iterator_group callbacks for codes. */ ++ ++static int ++find_code (const char *name) ++{ ++ rtx_code code = maybe_find_code (name); ++ if (code == UNKNOWN) ++ fatal_with_file_and_line ("unknown rtx code `%s'", name); ++ return code; + } + + static void +@@ -277,9 +286,11 @@ apply_subst_iterator (rtx rt, unsigned int, int value) + return; + gcc_assert (GET_CODE (rt) == DEFINE_INSN + || GET_CODE (rt) == DEFINE_INSN_AND_SPLIT ++ || GET_CODE (rt) == DEFINE_INSN_AND_REWRITE + || GET_CODE (rt) == DEFINE_EXPAND); + +- int attrs = GET_CODE (rt) == DEFINE_INSN_AND_SPLIT ? 7 : 4; ++ int attrs = (GET_CODE (rt) == DEFINE_INSN_AND_SPLIT ? 7 ++ : GET_CODE (rt) == DEFINE_INSN_AND_REWRITE ? 6 : 4); + attrs_vec = XVEC (rt, attrs); + + /* If we've already added attribute 'current_iterator_name', then we +@@ -540,6 +551,7 @@ add_condition_to_rtx (rtx x, const char *extra) + break; + + case DEFINE_INSN_AND_SPLIT: ++ case DEFINE_INSN_AND_REWRITE: + XSTR (x, 2) = add_condition_to_string (XSTR (x, 2), extra); + XSTR (x, 4) = add_condition_to_string (XSTR (x, 4), extra); + break; +@@ -623,6 +635,7 @@ named_rtx_p (rtx x) + case DEFINE_EXPAND: + case DEFINE_INSN: + case DEFINE_INSN_AND_SPLIT: ++ case DEFINE_INSN_AND_REWRITE: + return true; + + default: +@@ -1306,7 +1319,37 @@ check_code_iterator (struct mapping *iterator) + for (v = iterator->values->next; v != 0; v = v->next) + if (strcmp (GET_RTX_FORMAT (bellwether), GET_RTX_FORMAT (v->number)) != 0) + fatal_with_file_and_line ("code iterator `%s' combines " +- "different rtx formats", iterator->name); ++ "`%s' and `%s', which have different " ++ "rtx formats", iterator->name, ++ GET_RTX_NAME (bellwether), ++ GET_RTX_NAME (v->number)); ++} ++ ++/* Check that all values of attribute ATTR are rtx codes that have a ++ consistent format. Return a representative code. */ ++ ++static rtx_code ++check_code_attribute (mapping *attr) ++{ ++ rtx_code bellwether = UNKNOWN; ++ for (map_value *v = attr->values; v != 0; v = v->next) ++ { ++ rtx_code code = maybe_find_code (v->string); ++ if (code == UNKNOWN) ++ fatal_with_file_and_line ("code attribute `%s' contains " ++ "unrecognized rtx code `%s'", ++ attr->name, v->string); ++ if (bellwether == UNKNOWN) ++ bellwether = code; ++ else if (strcmp (GET_RTX_FORMAT (bellwether), ++ GET_RTX_FORMAT (code)) != 0) ++ fatal_with_file_and_line ("code attribute `%s' combines " ++ "`%s' and `%s', which have different " ++ "rtx formats", attr->name, ++ GET_RTX_NAME (bellwether), ++ GET_RTX_NAME (code)); ++ } ++ return bellwether; + } + + /* Read an rtx-related declaration from the MD file, given that it +@@ -1467,6 +1510,54 @@ parse_reg_note_name (const char *string) + fatal_with_file_and_line ("unrecognized REG_NOTE name: `%s'", string); + } + ++/* Allocate an rtx for code NAME. If NAME is a code iterator or code ++ attribute, record its use for later and use one of its possible ++ values as an interim rtx code. */ ++ ++rtx ++rtx_reader::rtx_alloc_for_name (const char *name) ++{ ++#ifdef GENERATOR_FILE ++ size_t len = strlen (name); ++ if (name[0] == '<' && name[len - 1] == '>') ++ { ++ /* Copy the attribute string into permanent storage, without the ++ angle brackets around it. */ ++ obstack *strings = get_string_obstack (); ++ obstack_grow0 (strings, name + 1, len - 2); ++ char *deferred_name = XOBFINISH (strings, char *); ++ ++ /* Find the name of the attribute. */ ++ const char *attr = strchr (deferred_name, ':'); ++ if (!attr) ++ attr = deferred_name; ++ ++ /* Find the attribute itself. */ ++ mapping *m = (mapping *) htab_find (codes.attrs, &attr); ++ if (!m) ++ fatal_with_file_and_line ("unknown code attribute `%s'", attr); ++ ++ /* Pick the first possible code for now, and record the attribute ++ use for later. */ ++ rtx x = rtx_alloc (check_code_attribute (m)); ++ record_attribute_use (&codes, x, 0, deferred_name); ++ return x; ++ } ++ ++ mapping *iterator = (mapping *) htab_find (codes.iterators, &name); ++ if (iterator != 0) ++ { ++ /* Pick the first possible code for now, and record the iterator ++ use for later. */ ++ rtx x = rtx_alloc (rtx_code (iterator->values->number)); ++ record_iterator_use (iterator, x, 0); ++ return x; ++ } ++#endif ++ ++ return rtx_alloc (rtx_code (codes.find_builtin (name))); ++} ++ + /* Subroutine of read_rtx and read_nested_rtx. CODE_NAME is the name of + either an rtx code or a code iterator. Parse the rest of the rtx and + return it. */ +@@ -1475,7 +1566,6 @@ rtx + rtx_reader::read_rtx_code (const char *code_name) + { + RTX_CODE code; +- struct mapping *iterator = NULL; + const char *format_ptr; + struct md_name name; + rtx return_rtx; +@@ -1509,20 +1599,9 @@ rtx_reader::read_rtx_code (const char *code_name) + return return_rtx; + } + +- /* If this code is an iterator, build the rtx using the iterator's +- first value. */ +-#ifdef GENERATOR_FILE +- iterator = (struct mapping *) htab_find (codes.iterators, &code_name); +- if (iterator != 0) +- code = (enum rtx_code) iterator->values->number; +- else +- code = (enum rtx_code) codes.find_builtin (code_name); +-#else +- code = (enum rtx_code) codes.find_builtin (code_name); +-#endif +- + /* If we end up with an insn expression then we free this space below. */ +- return_rtx = rtx_alloc (code); ++ return_rtx = rtx_alloc_for_name (code_name); ++ code = GET_CODE (return_rtx); + format_ptr = GET_RTX_FORMAT (code); + memset (return_rtx, 0, RTX_CODE_SIZE (code)); + PUT_CODE (return_rtx, code); +@@ -1534,9 +1613,6 @@ rtx_reader::read_rtx_code (const char *code_name) + m_reuse_rtx_by_id[reuse_id] = return_rtx; + } + +- if (iterator) +- record_iterator_use (iterator, return_rtx, 0); +- + /* Check for flags. */ + read_flags (return_rtx); + +@@ -1765,8 +1841,8 @@ rtx_reader::read_rtx_operand (rtx return_rtx, int idx) + break; + } + +- /* The output template slot of a DEFINE_INSN, +- DEFINE_INSN_AND_SPLIT, or DEFINE_PEEPHOLE automatically ++ /* The output template slot of a DEFINE_INSN, DEFINE_INSN_AND_SPLIT, ++ DEFINE_INSN_AND_REWRITE or DEFINE_PEEPHOLE automatically + gets a star inserted as its first character, if it is + written with a brace block instead of a string constant. */ + star_if_braced = (format_ptr[idx] == 'T'); +@@ -1783,7 +1859,8 @@ rtx_reader::read_rtx_operand (rtx return_rtx, int idx) + if (*stringbuf == '\0' + && idx == 0 + && (GET_CODE (return_rtx) == DEFINE_INSN +- || GET_CODE (return_rtx) == DEFINE_INSN_AND_SPLIT)) ++ || GET_CODE (return_rtx) == DEFINE_INSN_AND_SPLIT ++ || GET_CODE (return_rtx) == DEFINE_INSN_AND_REWRITE)) + { + struct obstack *string_obstack = get_string_obstack (); + char line_name[20]; +diff --git a/gcc/real.c b/gcc/real.c +index 0164f097a..a2bd37a9e 100644 +--- a/gcc/real.c ++++ b/gcc/real.c +@@ -4799,6 +4799,116 @@ decode_ieee_half (const struct real_format *fmt, REAL_VALUE_TYPE *r, + } + } + ++/* Encode arm_bfloat types. */ ++static void ++encode_arm_bfloat_half (const struct real_format *fmt, long *buf, ++ const REAL_VALUE_TYPE *r) ++{ ++ unsigned long image, sig, exp; ++ unsigned long sign = r->sign; ++ bool denormal = (r->sig[SIGSZ-1] & SIG_MSB) == 0; ++ ++ image = sign << 15; ++ sig = (r->sig[SIGSZ-1] >> (HOST_BITS_PER_LONG - 8)) & 0x7f; ++ ++ switch (r->cl) ++ { ++ case rvc_zero: ++ break; ++ ++ case rvc_inf: ++ if (fmt->has_inf) ++ image |= 255 << 7; ++ else ++ image |= 0x7fff; ++ break; ++ ++ case rvc_nan: ++ if (fmt->has_nans) ++ { ++ if (r->canonical) ++ sig = (fmt->canonical_nan_lsbs_set ? (1 << 6) - 1 : 0); ++ if (r->signalling == fmt->qnan_msb_set) ++ sig &= ~(1 << 6); ++ else ++ sig |= 1 << 6; ++ if (sig == 0) ++ sig = 1 << 5; ++ ++ image |= 255 << 7; ++ image |= sig; ++ } ++ else ++ image |= 0x7fff; ++ break; ++ ++ case rvc_normal: ++ if (denormal) ++ exp = 0; ++ else ++ exp = REAL_EXP (r) + 127 - 1; ++ image |= exp << 7; ++ image |= sig; ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ ++ buf[0] = image; ++} ++ ++/* Decode arm_bfloat types. */ ++static void ++decode_arm_bfloat_half (const struct real_format *fmt, REAL_VALUE_TYPE *r, ++ const long *buf) ++{ ++ unsigned long image = buf[0] & 0xffff; ++ bool sign = (image >> 15) & 1; ++ int exp = (image >> 7) & 0xff; ++ ++ memset (r, 0, sizeof (*r)); ++ image <<= HOST_BITS_PER_LONG - 8; ++ image &= ~SIG_MSB; ++ ++ if (exp == 0) ++ { ++ if (image && fmt->has_denorm) ++ { ++ r->cl = rvc_normal; ++ r->sign = sign; ++ SET_REAL_EXP (r, -126); ++ r->sig[SIGSZ-1] = image << 1; ++ normalize (r); ++ } ++ else if (fmt->has_signed_zero) ++ r->sign = sign; ++ } ++ else if (exp == 255 && (fmt->has_nans || fmt->has_inf)) ++ { ++ if (image) ++ { ++ r->cl = rvc_nan; ++ r->sign = sign; ++ r->signalling = (((image >> (HOST_BITS_PER_LONG - 2)) & 1) ++ ^ fmt->qnan_msb_set); ++ r->sig[SIGSZ-1] = image; ++ } ++ else ++ { ++ r->cl = rvc_inf; ++ r->sign = sign; ++ } ++ } ++ else ++ { ++ r->cl = rvc_normal; ++ r->sign = sign; ++ SET_REAL_EXP (r, exp - 127 + 1); ++ r->sig[SIGSZ-1] = image | SIG_MSB; ++ } ++} ++ + /* Half-precision format, as specified in IEEE 754R. */ + const struct real_format ieee_half_format = + { +@@ -4848,6 +4958,33 @@ const struct real_format arm_half_format = + false, + "arm_half" + }; ++ ++/* ARM Bfloat half-precision format. This format resembles a truncated ++ (16-bit) version of the 32-bit IEEE 754 single-precision floating-point ++ format. */ ++const struct real_format arm_bfloat_half_format = ++ { ++ encode_arm_bfloat_half, ++ decode_arm_bfloat_half, ++ 2, ++ 8, ++ 8, ++ -125, ++ 128, ++ 15, ++ 15, ++ 0, ++ false, ++ true, ++ true, ++ true, ++ true, ++ true, ++ true, ++ false, ++ "arm_bfloat_half" ++ }; ++ + + /* A synthetic "format" for internal arithmetic. It's the size of the + internal significand minus the two bits needed for proper rounding. +diff --git a/gcc/real.h b/gcc/real.h +index 95b9db83d..d1b79f804 100644 +--- a/gcc/real.h ++++ b/gcc/real.h +@@ -361,6 +361,7 @@ extern const struct real_format decimal_double_format; + extern const struct real_format decimal_quad_format; + extern const struct real_format ieee_half_format; + extern const struct real_format arm_half_format; ++extern const struct real_format arm_bfloat_half_format; + + + /* ====================================================================== */ +diff --git a/gcc/recog.c b/gcc/recog.c +index a9f584bc0..b12eba33a 100644 +--- a/gcc/recog.c ++++ b/gcc/recog.c +@@ -3227,7 +3227,8 @@ peep2_find_free_register (int from, int to, const char *class_str, + break; + } + /* And that we don't create an extra save/restore. */ +- if (! call_used_regs[regno + j] && ! df_regs_ever_live_p (regno + j)) ++ if (! call_used_or_fixed_reg_p (regno + j) ++ && ! df_regs_ever_live_p (regno + j)) + { + success = 0; + break; +@@ -3724,8 +3725,7 @@ store_data_bypass_p_1 (rtx_insn *out_insn, rtx in_set) + { + rtx out_exp = XVECEXP (out_pat, 0, i); + +- if (GET_CODE (out_exp) == CLOBBER || GET_CODE (out_exp) == USE +- || GET_CODE (out_exp) == CLOBBER_HIGH) ++ if (GET_CODE (out_exp) == CLOBBER || GET_CODE (out_exp) == USE) + continue; + + gcc_assert (GET_CODE (out_exp) == SET); +@@ -3756,8 +3756,7 @@ store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn) + { + rtx in_exp = XVECEXP (in_pat, 0, i); + +- if (GET_CODE (in_exp) == CLOBBER || GET_CODE (in_exp) == USE +- || GET_CODE (in_exp) == CLOBBER_HIGH) ++ if (GET_CODE (in_exp) == CLOBBER || GET_CODE (in_exp) == USE) + continue; + + gcc_assert (GET_CODE (in_exp) == SET); +@@ -3809,7 +3808,7 @@ if_test_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn) + { + rtx exp = XVECEXP (out_pat, 0, i); + +- if (GET_CODE (exp) == CLOBBER || GET_CODE (exp) == CLOBBER_HIGH) ++ if (GET_CODE (exp) == CLOBBER) + continue; + + gcc_assert (GET_CODE (exp) == SET); +diff --git a/gcc/recog.h b/gcc/recog.h +index 75cbbdc10..71d88e3e3 100644 +--- a/gcc/recog.h ++++ b/gcc/recog.h +@@ -142,7 +142,7 @@ extern void preprocess_constraints (rtx_insn *); + extern rtx_insn *peep2_next_insn (int); + extern int peep2_regno_dead_p (int, int); + extern int peep2_reg_dead_p (int, rtx); +-#ifdef CLEAR_HARD_REG_SET ++#ifdef HARD_CONST + extern rtx peep2_find_free_register (int, int, const char *, + machine_mode, HARD_REG_SET *); + #endif +@@ -186,6 +186,23 @@ skip_alternative (const char *p) + /* Nonzero means volatile operands are recognized. */ + extern int volatile_ok; + ++/* RAII class for temporarily setting volatile_ok. */ ++ ++class temporary_volatile_ok ++{ ++public: ++ temporary_volatile_ok (int value) : save_volatile_ok (volatile_ok) ++ { ++ volatile_ok = value; ++ } ++ ++ ~temporary_volatile_ok () { volatile_ok = save_volatile_ok; } ++ ++private: ++ temporary_volatile_ok (const temporary_volatile_ok &); ++ int save_volatile_ok; ++}; ++ + /* Set by constrain_operands to the number of the alternative that + matched. */ + extern int which_alternative; +diff --git a/gcc/reg-stack.c b/gcc/reg-stack.c +index 033c978a1..b464f493f 100644 +--- a/gcc/reg-stack.c ++++ b/gcc/reg-stack.c +@@ -368,7 +368,7 @@ straighten_stack (rtx_insn *insn, stack_ptr regstack) + if (regstack->top <= 0) + return; + +- COPY_HARD_REG_SET (temp_stack.reg_set, regstack->reg_set); ++ temp_stack.reg_set = regstack->reg_set; + + for (top = temp_stack.top = regstack->top; top >= 0; top--) + temp_stack.reg[top] = FIRST_STACK_REG + temp_stack.top - top; +@@ -568,7 +568,7 @@ check_asm_stack_operands (rtx_insn *insn) + + if (i != LAST_STACK_REG + 1) + { +- error_for_asm (insn, "output regs must be grouped at top of stack"); ++ error_for_asm (insn, "output registers must be grouped at top of stack"); + malformed_asm = 1; + } + +@@ -625,7 +625,8 @@ check_asm_stack_operands (rtx_insn *insn) + if (i != LAST_STACK_REG + 1) + { + error_for_asm (insn, +- "explicitly used regs must be grouped at top of stack"); ++ "explicitly used registers must be grouped " ++ "at top of stack"); + malformed_asm = 1; + } + +@@ -2640,7 +2641,7 @@ change_stack (rtx_insn *insn, stack_ptr old, stack_ptr new_stack, + /* By now, the only difference should be the order of the stack, + not their depth or liveliness. */ + +- gcc_assert (hard_reg_set_equal_p (old->reg_set, new_stack->reg_set)); ++ gcc_assert (old->reg_set == new_stack->reg_set); + gcc_assert (old->top == new_stack->top); + + /* If the stack is not empty (new_stack->top != -1), loop here emitting +@@ -3154,8 +3155,7 @@ convert_regs_1 (basic_block block) + asms, we zapped the instruction itself, but that didn't produce the + same pattern of register kills as before. */ + +- gcc_assert (hard_reg_set_equal_p (regstack.reg_set, bi->out_reg_set) +- || any_malformed_asm); ++ gcc_assert (regstack.reg_set == bi->out_reg_set || any_malformed_asm); + bi->stack_out = regstack; + bi->done = true; + +diff --git a/gcc/regcprop.c b/gcc/regcprop.c +index 4842ce922..675111db8 100644 +--- a/gcc/regcprop.c ++++ b/gcc/regcprop.c +@@ -35,6 +35,7 @@ + #include "rtl-iter.h" + #include "cfgrtl.h" + #include "target.h" ++#include "function-abi.h" + + /* The following code does forward propagation of hard register copies. + The object is to eliminate as many dependencies as possible, so that +@@ -237,11 +238,8 @@ static void + kill_clobbered_value (rtx x, const_rtx set, void *data) + { + struct value_data *const vd = (struct value_data *) data; +- gcc_assert (GET_CODE (set) != CLOBBER_HIGH || REG_P (x)); + +- if (GET_CODE (set) == CLOBBER +- || (GET_CODE (set) == CLOBBER_HIGH +- && reg_is_clobbered_by_clobber_high (x, XEXP (set, 0)))) ++ if (GET_CODE (set) == CLOBBER) + kill_value (x, vd); + } + +@@ -262,8 +260,7 @@ kill_set_value (rtx x, const_rtx set, void *data) + if (rtx_equal_p (x, ksvd->ignore_set_reg)) + return; + +- gcc_assert (GET_CODE (set) != CLOBBER_HIGH || REG_P (x)); +- if (GET_CODE (set) != CLOBBER && GET_CODE (set) != CLOBBER_HIGH) ++ if (GET_CODE (set) != CLOBBER) + { + kill_value (x, ksvd->vd); + if (REG_P (x)) +@@ -728,19 +725,7 @@ cprop_find_used_regs (rtx *loc, void *data) + static void + kill_clobbered_values (rtx_insn *insn, struct value_data *vd) + { +- note_stores (PATTERN (insn), kill_clobbered_value, vd); +- +- if (CALL_P (insn)) +- { +- rtx exp; +- +- for (exp = CALL_INSN_FUNCTION_USAGE (insn); exp; exp = XEXP (exp, 1)) +- { +- rtx x = XEXP (exp, 0); +- if (GET_CODE (x) == CLOBBER) +- kill_value (SET_DEST (x), vd); +- } +- } ++ note_stores (insn, kill_clobbered_value, vd); + } + + /* Perform the forward copy propagation on basic block BB. */ +@@ -1047,7 +1032,6 @@ copyprop_hardreg_forward_1 (basic_block bb, struct value_data *vd) + unsigned int set_nregs = 0; + unsigned int regno; + rtx exp; +- HARD_REG_SET regs_invalidated_by_this_call; + + for (exp = CALL_INSN_FUNCTION_USAGE (insn); exp; exp = XEXP (exp, 1)) + { +@@ -1065,13 +1049,11 @@ copyprop_hardreg_forward_1 (basic_block bb, struct value_data *vd) + } + } + +- get_call_reg_set_usage (insn, +- ®s_invalidated_by_this_call, +- regs_invalidated_by_call); ++ function_abi callee_abi = insn_callee_abi (insn); + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) +- if ((TEST_HARD_REG_BIT (regs_invalidated_by_this_call, regno) ++ if ((callee_abi.clobbers_full_reg_p (regno) + || (targetm.hard_regno_call_part_clobbered +- (insn, regno, vd->e[regno].mode))) ++ (callee_abi.id (), regno, vd->e[regno].mode))) + && (regno < set_regno || regno >= set_regno + set_nregs)) + kill_value_regno (regno, 1, vd); + +@@ -1109,7 +1091,7 @@ copyprop_hardreg_forward_1 (basic_block bb, struct value_data *vd) + if (!noop_p) + { + /* Notice stores. */ +- note_stores (PATTERN (insn), kill_set_value, &ksvd); ++ note_stores (insn, kill_set_value, &ksvd); + + /* Notice copies. */ + if (copy_p) +diff --git a/gcc/reginfo.c b/gcc/reginfo.c +index 315c5ecab..4f07e968e 100644 +--- a/gcc/reginfo.c ++++ b/gcc/reginfo.c +@@ -43,6 +43,7 @@ along with GCC; see the file COPYING3. If not see + #include "reload.h" + #include "output.h" + #include "tree-pass.h" ++#include "function-abi.h" + + /* Maximum register number used in this function, plus one. */ + +@@ -65,21 +66,20 @@ struct target_hard_regs *this_target_hard_regs = &default_target_hard_regs; + struct target_regs *this_target_regs = &default_target_regs; + #endif + ++#define call_used_regs \ ++ (this_target_hard_regs->x_call_used_regs) ++ + /* Data for initializing fixed_regs. */ + static const char initial_fixed_regs[] = FIXED_REGISTERS; + + /* Data for initializing call_used_regs. */ +-static const char initial_call_used_regs[] = CALL_USED_REGISTERS; +- + #ifdef CALL_REALLY_USED_REGISTERS +-/* Data for initializing call_really_used_regs. */ +-static const char initial_call_really_used_regs[] = CALL_REALLY_USED_REGISTERS; ++#ifdef CALL_USED_REGISTERS ++#error CALL_USED_REGISTERS and CALL_REALLY_USED_REGISTERS are both defined + #endif +- +-#ifdef CALL_REALLY_USED_REGISTERS +-#define CALL_REALLY_USED_REGNO_P(X) call_really_used_regs[X] ++static const char initial_call_used_regs[] = CALL_REALLY_USED_REGISTERS; + #else +-#define CALL_REALLY_USED_REGNO_P(X) call_used_regs[X] ++static const char initial_call_used_regs[] = CALL_USED_REGISTERS; + #endif + + /* Indexed by hard register number, contains 1 for registers +@@ -91,17 +91,6 @@ char global_regs[FIRST_PSEUDO_REGISTER]; + /* Declaration for the global register. */ + tree global_regs_decl[FIRST_PSEUDO_REGISTER]; + +-/* Same information as REGS_INVALIDATED_BY_CALL but in regset form to be used +- in dataflow more conveniently. */ +-regset regs_invalidated_by_call_regset; +- +-/* Same information as FIXED_REG_SET but in regset form. */ +-regset fixed_reg_set_regset; +- +-/* The bitmap_obstack is used to hold some static variables that +- should not be reset after each function is compiled. */ +-static bitmap_obstack persistent_obstack; +- + /* Used to initialize reg_alloc_order. */ + #ifdef REG_ALLOC_ORDER + static int initial_reg_alloc_order[FIRST_PSEUDO_REGISTER] = REG_ALLOC_ORDER; +@@ -171,10 +160,6 @@ init_reg_sets (void) + CALL_USED_REGISTERS had the right number of initializers. */ + gcc_assert (sizeof fixed_regs == sizeof initial_fixed_regs); + gcc_assert (sizeof call_used_regs == sizeof initial_call_used_regs); +-#ifdef CALL_REALLY_USED_REGISTERS +- gcc_assert (sizeof call_really_used_regs +- == sizeof initial_call_really_used_regs); +-#endif + #ifdef REG_ALLOC_ORDER + gcc_assert (sizeof reg_alloc_order == sizeof initial_reg_alloc_order); + #endif +@@ -182,10 +167,6 @@ init_reg_sets (void) + + memcpy (fixed_regs, initial_fixed_regs, sizeof fixed_regs); + memcpy (call_used_regs, initial_call_used_regs, sizeof call_used_regs); +-#ifdef CALL_REALLY_USED_REGISTERS +- memcpy (call_really_used_regs, initial_call_really_used_regs, +- sizeof call_really_used_regs); +-#endif + #ifdef REG_ALLOC_ORDER + memcpy (reg_alloc_order, initial_reg_alloc_order, sizeof reg_alloc_order); + #endif +@@ -200,9 +181,6 @@ init_reg_sets (void) + subsequent back-end reinitialization. */ + static char saved_fixed_regs[FIRST_PSEUDO_REGISTER]; + static char saved_call_used_regs[FIRST_PSEUDO_REGISTER]; +-#ifdef CALL_REALLY_USED_REGISTERS +-static char saved_call_really_used_regs[FIRST_PSEUDO_REGISTER]; +-#endif + static const char *saved_reg_names[FIRST_PSEUDO_REGISTER]; + static HARD_REG_SET saved_accessible_reg_set; + static HARD_REG_SET saved_operand_reg_set; +@@ -218,19 +196,11 @@ save_register_info (void) + memcpy (saved_fixed_regs, fixed_regs, sizeof fixed_regs); + memcpy (saved_call_used_regs, call_used_regs, sizeof call_used_regs); + +- /* Likewise for call_really_used_regs. */ +-#ifdef CALL_REALLY_USED_REGISTERS +- gcc_assert (sizeof call_really_used_regs +- == sizeof saved_call_really_used_regs); +- memcpy (saved_call_really_used_regs, call_really_used_regs, +- sizeof call_really_used_regs); +-#endif +- + /* And similarly for reg_names. */ + gcc_assert (sizeof reg_names == sizeof saved_reg_names); + memcpy (saved_reg_names, reg_names, sizeof reg_names); +- COPY_HARD_REG_SET (saved_accessible_reg_set, accessible_reg_set); +- COPY_HARD_REG_SET (saved_operand_reg_set, operand_reg_set); ++ saved_accessible_reg_set = accessible_reg_set; ++ saved_operand_reg_set = operand_reg_set; + } + + /* Restore the register information. */ +@@ -240,14 +210,9 @@ restore_register_info (void) + memcpy (fixed_regs, saved_fixed_regs, sizeof fixed_regs); + memcpy (call_used_regs, saved_call_used_regs, sizeof call_used_regs); + +-#ifdef CALL_REALLY_USED_REGISTERS +- memcpy (call_really_used_regs, saved_call_really_used_regs, +- sizeof call_really_used_regs); +-#endif +- + memcpy (reg_names, saved_reg_names, sizeof reg_names); +- COPY_HARD_REG_SET (accessible_reg_set, saved_accessible_reg_set); +- COPY_HARD_REG_SET (operand_reg_set, saved_operand_reg_set); ++ accessible_reg_set = saved_accessible_reg_set; ++ operand_reg_set = saved_operand_reg_set; + } + + /* After switches have been processed, which perhaps alter +@@ -297,8 +262,7 @@ init_reg_sets_1 (void) + HARD_REG_SET c; + int k; + +- COPY_HARD_REG_SET (c, reg_class_contents[i]); +- IOR_HARD_REG_SET (c, reg_class_contents[j]); ++ c = reg_class_contents[i] | reg_class_contents[j]; + for (k = 0; k < N_REG_CLASSES; k++) + if (hard_reg_set_subset_p (reg_class_contents[k], c) + && !hard_reg_set_subset_p (reg_class_contents[k], +@@ -320,8 +284,7 @@ init_reg_sets_1 (void) + HARD_REG_SET c; + int k; + +- COPY_HARD_REG_SET (c, reg_class_contents[i]); +- IOR_HARD_REG_SET (c, reg_class_contents[j]); ++ c = reg_class_contents[i] | reg_class_contents[j]; + for (k = 0; k < N_REG_CLASSES; k++) + if (hard_reg_set_subset_p (c, reg_class_contents[k])) + break; +@@ -362,22 +325,9 @@ init_reg_sets_1 (void) + /* Initialize "constant" tables. */ + + CLEAR_HARD_REG_SET (fixed_reg_set); +- CLEAR_HARD_REG_SET (call_used_reg_set); +- CLEAR_HARD_REG_SET (call_fixed_reg_set); + CLEAR_HARD_REG_SET (regs_invalidated_by_call); +- if (!regs_invalidated_by_call_regset) +- { +- bitmap_obstack_initialize (&persistent_obstack); +- regs_invalidated_by_call_regset = ALLOC_REG_SET (&persistent_obstack); +- } +- else +- CLEAR_REG_SET (regs_invalidated_by_call_regset); +- if (!fixed_reg_set_regset) +- fixed_reg_set_regset = ALLOC_REG_SET (&persistent_obstack); +- else +- CLEAR_REG_SET (fixed_reg_set_regset); + +- AND_HARD_REG_SET (operand_reg_set, accessible_reg_set); ++ operand_reg_set &= accessible_reg_set; + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) + { + /* As a special exception, registers whose class is NO_REGS are +@@ -393,26 +343,10 @@ init_reg_sets_1 (void) + /* If a register is too limited to be treated as a register operand, + then it should never be allocated to a pseudo. */ + if (!TEST_HARD_REG_BIT (operand_reg_set, i)) +- { +- fixed_regs[i] = 1; +- call_used_regs[i] = 1; +- } +- +- /* call_used_regs must include fixed_regs. */ +- gcc_assert (!fixed_regs[i] || call_used_regs[i]); +-#ifdef CALL_REALLY_USED_REGISTERS +- /* call_used_regs must include call_really_used_regs. */ +- gcc_assert (!call_really_used_regs[i] || call_used_regs[i]); +-#endif ++ fixed_regs[i] = 1; + + if (fixed_regs[i]) +- { +- SET_HARD_REG_BIT (fixed_reg_set, i); +- SET_REGNO_REG_SET (fixed_reg_set_regset, i); +- } +- +- if (call_used_regs[i]) +- SET_HARD_REG_BIT (call_used_reg_set, i); ++ SET_HARD_REG_BIT (fixed_reg_set, i); + + /* There are a couple of fixed registers that we know are safe to + exclude from being clobbered by calls: +@@ -427,10 +361,7 @@ init_reg_sets_1 (void) + if (i == STACK_POINTER_REGNUM) + ; + else if (global_regs[i]) +- { +- SET_HARD_REG_BIT (regs_invalidated_by_call, i); +- SET_REGNO_REG_SET (regs_invalidated_by_call_regset, i); +- } ++ SET_HARD_REG_BIT (regs_invalidated_by_call, i); + else if (i == FRAME_POINTER_REGNUM) + ; + else if (!HARD_FRAME_POINTER_IS_FRAME_POINTER +@@ -442,15 +373,12 @@ init_reg_sets_1 (void) + else if (!PIC_OFFSET_TABLE_REG_CALL_CLOBBERED + && i == (unsigned) PIC_OFFSET_TABLE_REGNUM && fixed_regs[i]) + ; +- else if (CALL_REALLY_USED_REGNO_P (i)) +- { +- SET_HARD_REG_BIT (regs_invalidated_by_call, i); +- SET_REGNO_REG_SET (regs_invalidated_by_call_regset, i); +- } ++ else if (call_used_regs[i]) ++ SET_HARD_REG_BIT (regs_invalidated_by_call, i); + } + +- COPY_HARD_REG_SET (call_fixed_reg_set, fixed_reg_set); +- COPY_HARD_REG_SET (fixed_nonglobal_reg_set, fixed_reg_set); ++ SET_HARD_REG_SET (savable_regs); ++ fixed_nonglobal_reg_set = fixed_reg_set; + + /* Preserve global registers if called more than once. */ + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) +@@ -459,8 +387,6 @@ init_reg_sets_1 (void) + { + fixed_regs[i] = call_used_regs[i] = 1; + SET_HARD_REG_BIT (fixed_reg_set, i); +- SET_HARD_REG_BIT (call_used_reg_set, i); +- SET_HARD_REG_BIT (call_fixed_reg_set, i); + } + } + +@@ -493,6 +419,8 @@ init_reg_sets_1 (void) + } + } + } ++ ++ default_function_abi.initialize (0, regs_invalidated_by_call); + } + + /* Compute the table of register modes. +@@ -639,7 +567,7 @@ choose_hard_reg_mode (unsigned int regno ATTRIBUTE_UNUSED, + if (hard_regno_nregs (regno, mode) == nregs + && targetm.hard_regno_mode_ok (regno, mode) + && (!call_saved +- || !targetm.hard_regno_call_part_clobbered (NULL, regno, mode)) ++ || !targetm.hard_regno_call_part_clobbered (0, regno, mode)) + && maybe_gt (GET_MODE_SIZE (mode), GET_MODE_SIZE (found_mode))) + found_mode = mode; + +@@ -647,7 +575,7 @@ choose_hard_reg_mode (unsigned int regno ATTRIBUTE_UNUSED, + if (hard_regno_nregs (regno, mode) == nregs + && targetm.hard_regno_mode_ok (regno, mode) + && (!call_saved +- || !targetm.hard_regno_call_part_clobbered (NULL, regno, mode)) ++ || !targetm.hard_regno_call_part_clobbered (0, regno, mode)) + && maybe_gt (GET_MODE_SIZE (mode), GET_MODE_SIZE (found_mode))) + found_mode = mode; + +@@ -655,7 +583,7 @@ choose_hard_reg_mode (unsigned int regno ATTRIBUTE_UNUSED, + if (hard_regno_nregs (regno, mode) == nregs + && targetm.hard_regno_mode_ok (regno, mode) + && (!call_saved +- || !targetm.hard_regno_call_part_clobbered (NULL, regno, mode)) ++ || !targetm.hard_regno_call_part_clobbered (0, regno, mode)) + && maybe_gt (GET_MODE_SIZE (mode), GET_MODE_SIZE (found_mode))) + found_mode = mode; + +@@ -663,7 +591,7 @@ choose_hard_reg_mode (unsigned int regno ATTRIBUTE_UNUSED, + if (hard_regno_nregs (regno, mode) == nregs + && targetm.hard_regno_mode_ok (regno, mode) + && (!call_saved +- || !targetm.hard_regno_call_part_clobbered (NULL, regno, mode)) ++ || !targetm.hard_regno_call_part_clobbered (0, regno, mode)) + && maybe_gt (GET_MODE_SIZE (mode), GET_MODE_SIZE (found_mode))) + found_mode = mode; + +@@ -677,7 +605,7 @@ choose_hard_reg_mode (unsigned int regno ATTRIBUTE_UNUSED, + if (hard_regno_nregs (regno, mode) == nregs + && targetm.hard_regno_mode_ok (regno, mode) + && (!call_saved +- || !targetm.hard_regno_call_part_clobbered (NULL, regno, mode))) ++ || !targetm.hard_regno_call_part_clobbered (0, regno, mode))) + return mode; + } + +@@ -749,10 +677,11 @@ fix_register (const char *name, int fixed, int call_used) + else + { + fixed_regs[i] = fixed; +- call_used_regs[i] = call_used; + #ifdef CALL_REALLY_USED_REGISTERS + if (fixed == 0) +- call_really_used_regs[i] = call_used; ++ call_used_regs[i] = call_used; ++#else ++ call_used_regs[i] = call_used; + #endif + } + } +@@ -803,7 +732,8 @@ globalize_reg (tree decl, int i) + if (i != STACK_POINTER_REGNUM) + { + SET_HARD_REG_BIT (regs_invalidated_by_call, i); +- SET_REGNO_REG_SET (regs_invalidated_by_call_regset, i); ++ for (unsigned int i = 0; i < NUM_ABI_IDS; ++i) ++ function_abis[i].add_full_reg_clobber (i); + } + + /* If already fixed, nothing else to do. */ +@@ -811,13 +741,8 @@ globalize_reg (tree decl, int i) + return; + + fixed_regs[i] = call_used_regs[i] = 1; +-#ifdef CALL_REALLY_USED_REGISTERS +- call_really_used_regs[i] = 1; +-#endif + + SET_HARD_REG_BIT (fixed_reg_set, i); +- SET_HARD_REG_BIT (call_used_reg_set, i); +- SET_HARD_REG_BIT (call_fixed_reg_set, i); + + reinit_regs (); + } +@@ -1101,10 +1026,6 @@ reg_scan_mark_refs (rtx x, rtx_insn *insn) + reg_scan_mark_refs (XEXP (XEXP (x, 0), 0), insn); + break; + +- case CLOBBER_HIGH: +- gcc_assert (!(MEM_P (XEXP (x, 0)))); +- break; +- + case SET: + /* Count a set of the destination if it is a register. */ + for (dest = SET_DEST (x); +@@ -1316,14 +1237,12 @@ record_subregs_of_mode (rtx subreg, bool partial_def) + } + + if (valid_mode_changes[regno]) +- AND_HARD_REG_SET (*valid_mode_changes[regno], +- simplifiable_subregs (shape)); ++ *valid_mode_changes[regno] &= simplifiable_subregs (shape); + else + { + valid_mode_changes[regno] + = XOBNEW (&valid_mode_changes_obstack, HARD_REG_SET); +- COPY_HARD_REG_SET (*valid_mode_changes[regno], +- simplifiable_subregs (shape)); ++ *valid_mode_changes[regno] = simplifiable_subregs (shape); + } + } + +diff --git a/gcc/regrename.c b/gcc/regrename.c +index 5259d565e..6f7fe0a6d 100644 +--- a/gcc/regrename.c ++++ b/gcc/regrename.c +@@ -33,6 +33,7 @@ + #include "addresses.h" + #include "cfganal.h" + #include "tree-pass.h" ++#include "function-abi.h" + #include "regrename.h" + + /* This file implements the RTL register renaming pass of the compiler. It is +@@ -253,7 +254,7 @@ create_new_chain (unsigned this_regno, unsigned this_nregs, rtx *loc, + CLEAR_HARD_REG_BIT (live_hard_regs, head->regno + nregs); + } + +- COPY_HARD_REG_SET (head->hard_conflicts, live_hard_regs); ++ head->hard_conflicts = live_hard_regs; + bitmap_set_bit (&open_chains_set, head->id); + + open_chains = head; +@@ -292,7 +293,7 @@ merge_overlapping_regs (HARD_REG_SET *pset, struct du_head *head) + { + bitmap_iterator bi; + unsigned i; +- IOR_HARD_REG_SET (*pset, head->hard_conflicts); ++ *pset |= head->hard_conflicts; + EXECUTE_IF_SET_IN_BITMAP (&head->conflicts, 0, i, bi) + { + du_head_p other = regrename_chain_from_id (i); +@@ -303,6 +304,18 @@ merge_overlapping_regs (HARD_REG_SET *pset, struct du_head *head) + } + } + ++/* Return true if (reg:MODE REGNO) would be clobbered by a call covered ++ by THIS_HEAD. */ ++ ++static bool ++call_clobbered_in_chain_p (du_head *this_head, machine_mode mode, ++ unsigned int regno) ++{ ++ return call_clobbered_in_region_p (this_head->call_abis, ++ this_head->call_clobber_mask, ++ mode, regno); ++} ++ + /* Check if NEW_REG can be the candidate register to rename for + REG in THIS_HEAD chain. THIS_UNAVAILABLE is a set of unavailable hard + registers. */ +@@ -322,7 +335,7 @@ check_new_reg_p (int reg ATTRIBUTE_UNUSED, int new_reg, + || global_regs[new_reg + i] + /* Can't use regs which aren't saved by the prologue. */ + || (! df_regs_ever_live_p (new_reg + i) +- && ! call_used_regs[new_reg + i]) ++ && ! crtl->abi->clobbers_full_reg_p (new_reg + i)) + #ifdef LEAF_REGISTERS + /* We can't use a non-leaf register if we're in a + leaf function. */ +@@ -337,11 +350,8 @@ check_new_reg_p (int reg ATTRIBUTE_UNUSED, int new_reg, + for (tmp = this_head->first; tmp; tmp = tmp->next_use) + if ((!targetm.hard_regno_mode_ok (new_reg, GET_MODE (*tmp->loc)) + && ! DEBUG_INSN_P (tmp->insn)) +- || (this_head->need_caller_save_reg +- && ! (targetm.hard_regno_call_part_clobbered +- (NULL, reg, GET_MODE (*tmp->loc))) +- && (targetm.hard_regno_call_part_clobbered +- (NULL, new_reg, GET_MODE (*tmp->loc))))) ++ || call_clobbered_in_chain_p (this_head, GET_MODE (*tmp->loc), ++ new_reg)) + return false; + + return true; +@@ -363,12 +373,6 @@ find_rename_reg (du_head_p this_head, enum reg_class super_class, + int pass; + int best_new_reg = old_reg; + +- /* Further narrow the set of registers we can use for renaming. +- If the chain needs a call-saved register, mark the call-used +- registers as unavailable. */ +- if (this_head->need_caller_save_reg) +- IOR_HARD_REG_SET (*unavailable, call_used_reg_set); +- + /* Mark registers that overlap this chain's lifetime as unavailable. */ + merge_overlapping_regs (unavailable, this_head); + +@@ -441,8 +445,7 @@ regrename_find_superclass (du_head_p head, int *pn_uses, + if (DEBUG_INSN_P (tmp->insn)) + continue; + n_uses++; +- IOR_COMPL_HARD_REG_SET (*punavailable, +- reg_class_contents[tmp->cl]); ++ *punavailable |= ~reg_class_contents[tmp->cl]; + super_class + = reg_class_superunion[(int) super_class][(int) tmp->cl]; + } +@@ -486,7 +489,7 @@ rename_chains (void) + && reg == FRAME_POINTER_REGNUM)) + continue; + +- COPY_HARD_REG_SET (this_unavailable, unavailable); ++ this_unavailable = unavailable; + + reg_class super_class = regrename_find_superclass (this_head, &n_uses, + &this_unavailable); +@@ -500,7 +503,7 @@ rename_chains (void) + { + fprintf (dump_file, "Register %s in insn %d", + reg_names[reg], INSN_UID (this_head->first->insn)); +- if (this_head->need_caller_save_reg) ++ if (this_head->call_abis) + fprintf (dump_file, " crosses a call"); + } + +@@ -677,10 +680,11 @@ merge_chains (du_head_p c1, du_head_p c2) + c2->first = c2->last = NULL; + c2->id = c1->id; + +- IOR_HARD_REG_SET (c1->hard_conflicts, c2->hard_conflicts); ++ c1->hard_conflicts |= c2->hard_conflicts; + bitmap_ior_into (&c1->conflicts, &c2->conflicts); + +- c1->need_caller_save_reg |= c2->need_caller_save_reg; ++ c1->call_clobber_mask |= c2->call_clobber_mask; ++ c1->call_abis |= c2->call_abis; + c1->cannot_rename |= c2->cannot_rename; + } + +@@ -1740,7 +1744,7 @@ build_def_use (basic_block bb) + outside an operand, as live. */ + hide_operands (n_ops, old_operands, old_dups, untracked_operands, + false); +- note_stores (PATTERN (insn), note_sets_clobbers, &clobber_code); ++ note_stores (insn, note_sets_clobbers, &clobber_code); + restore_operands (insn, n_ops, old_operands, old_dups); + + /* Step 1b: Begin new chains for earlyclobbered writes inside +@@ -1834,9 +1838,15 @@ build_def_use (basic_block bb) + requires a caller-saved reg. */ + if (CALL_P (insn)) + { ++ function_abi callee_abi = insn_callee_abi (insn); + struct du_head *p; + for (p = open_chains; p; p = p->next_chain) +- p->need_caller_save_reg = 1; ++ { ++ p->call_abis |= (1 << callee_abi.id ()); ++ p->call_clobber_mask ++ |= callee_abi.full_and_partial_reg_clobbers (); ++ p->hard_conflicts |= callee_abi.full_reg_clobbers (); ++ } + } + + /* Step 5: Close open chains that overlap writes. Similar to +@@ -1856,7 +1866,7 @@ build_def_use (basic_block bb) + outside an operand, as live. */ + hide_operands (n_ops, old_operands, old_dups, untracked_operands, + false); +- note_stores (PATTERN (insn), note_sets_clobbers, &set_code); ++ note_stores (insn, note_sets_clobbers, &set_code); + restore_operands (insn, n_ops, old_operands, old_dups); + + /* Step 6b: Begin new chains for writes inside operands. */ +diff --git a/gcc/regrename.h b/gcc/regrename.h +index 37f5e398d..1bbf78fda 100644 +--- a/gcc/regrename.h ++++ b/gcc/regrename.h +@@ -40,9 +40,12 @@ struct du_head + bitmap_head conflicts; + /* Conflicts with untracked hard registers. */ + HARD_REG_SET hard_conflicts; ++ /* Which registers are fully or partially clobbered by the calls that ++ the chain crosses. */ ++ HARD_REG_SET call_clobber_mask; + +- /* Nonzero if the chain crosses a call. */ +- unsigned int need_caller_save_reg:1; ++ /* A bitmask of ABIs used by the calls that the chain crosses. */ ++ unsigned int call_abis : NUM_ABI_IDS; + /* Nonzero if the register is used in a way that prevents renaming, + such as the SET_DEST of a CALL_INSN or an asm operand that used + to be a hard register. */ +diff --git a/gcc/regs.h b/gcc/regs.h +index 48b2e7081..821979ec6 100644 +--- a/gcc/regs.h ++++ b/gcc/regs.h +@@ -298,7 +298,7 @@ remove_from_hard_reg_set (HARD_REG_SET *regs, machine_mode mode, + /* Return true if REGS contains the whole of (reg:MODE REGNO). */ + + static inline bool +-in_hard_reg_set_p (const HARD_REG_SET regs, machine_mode mode, ++in_hard_reg_set_p (const_hard_reg_set regs, machine_mode mode, + unsigned int regno) + { + unsigned int end_regno; +@@ -323,7 +323,7 @@ in_hard_reg_set_p (const HARD_REG_SET regs, machine_mode mode, + /* Return true if (reg:MODE REGNO) includes an element of REGS. */ + + static inline bool +-overlaps_hard_reg_set_p (const HARD_REG_SET regs, machine_mode mode, ++overlaps_hard_reg_set_p (const_hard_reg_set regs, machine_mode mode, + unsigned int regno) + { + unsigned int end_regno; +@@ -363,7 +363,7 @@ remove_range_from_hard_reg_set (HARD_REG_SET *regs, unsigned int regno, + /* Like overlaps_hard_reg_set_p, but use a REGNO/NREGS range instead of + REGNO and MODE. */ + static inline bool +-range_overlaps_hard_reg_set_p (const HARD_REG_SET set, unsigned regno, ++range_overlaps_hard_reg_set_p (const_hard_reg_set set, unsigned regno, + int nregs) + { + while (nregs-- > 0) +@@ -375,7 +375,7 @@ range_overlaps_hard_reg_set_p (const HARD_REG_SET set, unsigned regno, + /* Like in_hard_reg_set_p, but use a REGNO/NREGS range instead of + REGNO and MODE. */ + static inline bool +-range_in_hard_reg_set_p (const HARD_REG_SET set, unsigned regno, int nregs) ++range_in_hard_reg_set_p (const_hard_reg_set set, unsigned regno, int nregs) + { + while (nregs-- > 0) + if (!TEST_HARD_REG_BIT (set, regno + nregs)) +@@ -383,8 +383,4 @@ range_in_hard_reg_set_p (const HARD_REG_SET set, unsigned regno, int nregs) + return true; + } + +-/* Get registers used by given function call instruction. */ +-extern bool get_call_reg_set_usage (rtx_insn *insn, HARD_REG_SET *reg_set, +- HARD_REG_SET default_set); +- + #endif /* GCC_REGS_H */ +diff --git a/gcc/regset.h b/gcc/regset.h +index 34a9eb457..72ff45891 100644 +--- a/gcc/regset.h ++++ b/gcc/regset.h +@@ -64,6 +64,10 @@ typedef bitmap regset; + /* Inclusive or a register set with a second register set. */ + #define IOR_REG_SET(TO, FROM) bitmap_ior_into (TO, FROM) + ++/* Same, but with FROM being a HARD_REG_SET. */ ++#define IOR_REG_SET_HRS(TO, FROM) \ ++ bitmap_ior_into (TO, bitmap_view (FROM)) ++ + /* Exclusive or a register set with a second register set. */ + #define XOR_REG_SET(TO, FROM) bitmap_xor_into (TO, FROM) + +@@ -107,14 +111,6 @@ typedef bitmap_iterator reg_set_iterator; + #define EXECUTE_IF_AND_IN_REG_SET(REGSET1, REGSET2, MIN, REGNUM, RSI) \ + EXECUTE_IF_AND_IN_BITMAP (REGSET1, REGSET2, MIN, REGNUM, RSI) \ + +-/* Same information as REGS_INVALIDATED_BY_CALL but in regset form to be used +- in dataflow more conveniently. */ +- +-extern regset regs_invalidated_by_call_regset; +- +-/* Same information as FIXED_REG_SET but in regset form. */ +-extern regset fixed_reg_set_regset; +- + /* An obstack for regsets. */ + extern bitmap_obstack reg_obstack; + +diff --git a/gcc/reload.c b/gcc/reload.c +index 72cc38a0e..b7601307f 100644 +--- a/gcc/reload.c ++++ b/gcc/reload.c +@@ -6911,15 +6911,15 @@ find_equiv_reg (rtx goal, rtx_insn *insn, enum reg_class rclass, int other, + + if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER) + for (i = 0; i < nregs; ++i) +- if (call_used_regs[regno + i] +- || targetm.hard_regno_call_part_clobbered (NULL, regno + i, ++ if (call_used_or_fixed_reg_p (regno + i) ++ || targetm.hard_regno_call_part_clobbered (0, regno + i, + mode)) + return 0; + + if (valueno >= 0 && valueno < FIRST_PSEUDO_REGISTER) + for (i = 0; i < valuenregs; ++i) +- if (call_used_regs[valueno + i] +- || targetm.hard_regno_call_part_clobbered (NULL, valueno + i, ++ if (call_used_or_fixed_reg_p (valueno + i) ++ || targetm.hard_regno_call_part_clobbered (0, valueno + i, + mode)) + return 0; + } +diff --git a/gcc/reload.h b/gcc/reload.h +index 813075b6f..fef6aa9da 100644 +--- a/gcc/reload.h ++++ b/gcc/reload.h +@@ -274,7 +274,7 @@ extern int reload_first_uid; + + extern int num_not_at_initial_offset; + +-#if defined SET_HARD_REG_BIT && defined CLEAR_REG_SET ++#if defined HARD_CONST && defined CLEAR_REG_SET + /* This structure describes instructions which are relevant for reload. + Apart from all regular insns, this also includes CODE_LABELs, since they + must be examined for register elimination. */ +@@ -325,7 +325,7 @@ extern struct insn_chain *reload_insn_chain; + extern struct insn_chain *new_insn_chain (void); + #endif + +-#if defined SET_HARD_REG_BIT ++#if defined HARD_CONST + extern void compute_use_by_pseudos (HARD_REG_SET *, bitmap); + #endif + +diff --git a/gcc/reload1.c b/gcc/reload1.c +index bb112d817..d36ebec60 100644 +--- a/gcc/reload1.c ++++ b/gcc/reload1.c +@@ -795,7 +795,9 @@ reload (rtx_insn *first, int global) + + if (crtl->saves_all_registers) + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) +- if (! call_used_regs[i] && ! fixed_regs[i] && ! LOCAL_REGNO (i)) ++ if (! call_used_or_fixed_reg_p (i) ++ && ! fixed_regs[i] ++ && ! LOCAL_REGNO (i)) + df_set_regs_ever_live (i, true); + + /* Find all the pseudo registers that didn't get hard regs +@@ -843,7 +845,7 @@ reload (rtx_insn *first, int global) + cannot be done. */ + for (insn = first; insn && num_eliminable; insn = NEXT_INSN (insn)) + if (INSN_P (insn)) +- note_stores (PATTERN (insn), mark_not_eliminable, NULL); ++ note_pattern_stores (PATTERN (insn), mark_not_eliminable, NULL); + + maybe_fix_stack_asms (); + +@@ -1339,8 +1341,6 @@ maybe_fix_stack_asms (void) + rtx t = XVECEXP (pat, 0, i); + if (GET_CODE (t) == CLOBBER && STACK_REG_P (XEXP (t, 0))) + SET_HARD_REG_BIT (clobbered, REGNO (XEXP (t, 0))); +- /* CLOBBER_HIGH is only supported for LRA. */ +- gcc_assert (GET_CODE (t) != CLOBBER_HIGH); + } + + /* Get the operand values and constraints out of the insn. */ +@@ -1364,7 +1364,7 @@ maybe_fix_stack_asms (void) + { + /* End of one alternative - mark the regs in the current + class, and reset the class. */ +- IOR_HARD_REG_SET (allowed, reg_class_contents[cls]); ++ allowed |= reg_class_contents[cls]; + cls = NO_REGS; + p++; + if (c == '#') +@@ -1399,7 +1399,7 @@ maybe_fix_stack_asms (void) + /* Those of the registers which are clobbered, but allowed by the + constraints, must be usable as reload registers. So clear them + out of the life information. */ +- AND_HARD_REG_SET (allowed, clobbered); ++ allowed &= clobbered; + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) + if (TEST_HARD_REG_BIT (allowed, i)) + { +@@ -1732,7 +1732,7 @@ order_regs_for_reload (struct insn_chain *chain) + HARD_REG_SET used_by_pseudos2; + reg_set_iterator rsi; + +- COPY_HARD_REG_SET (bad_spill_regs, fixed_reg_set); ++ bad_spill_regs = fixed_reg_set; + + memset (spill_cost, 0, sizeof spill_cost); + memset (spill_add_cost, 0, sizeof spill_add_cost); +@@ -1745,8 +1745,8 @@ order_regs_for_reload (struct insn_chain *chain) + + REG_SET_TO_HARD_REG_SET (used_by_pseudos, &chain->live_throughout); + REG_SET_TO_HARD_REG_SET (used_by_pseudos2, &chain->dead_or_set); +- IOR_HARD_REG_SET (bad_spill_regs, used_by_pseudos); +- IOR_HARD_REG_SET (bad_spill_regs, used_by_pseudos2); ++ bad_spill_regs |= used_by_pseudos; ++ bad_spill_regs |= used_by_pseudos2; + + /* Now find out which pseudos are allocated to it, and update + hard_reg_n_uses. */ +@@ -1823,9 +1823,9 @@ find_reg (struct insn_chain *chain, int order) + static int regno_pseudo_regs[FIRST_PSEUDO_REGISTER]; + static int best_regno_pseudo_regs[FIRST_PSEUDO_REGISTER]; + +- COPY_HARD_REG_SET (not_usable, bad_spill_regs); +- IOR_HARD_REG_SET (not_usable, bad_spill_regs_global); +- IOR_COMPL_HARD_REG_SET (not_usable, reg_class_contents[rl->rclass]); ++ not_usable = (bad_spill_regs ++ | bad_spill_regs_global ++ | ~reg_class_contents[rl->rclass]); + + CLEAR_HARD_REG_SET (used_by_other_reload); + for (k = 0; k < order; k++) +@@ -1906,8 +1906,8 @@ find_reg (struct insn_chain *chain, int order) + && (inv_reg_alloc_order[regno] + < inv_reg_alloc_order[best_reg]) + #else +- && call_used_regs[regno] +- && ! call_used_regs[best_reg] ++ && call_used_or_fixed_reg_p (regno) ++ && ! call_used_or_fixed_reg_p (best_reg) + #endif + )) + { +@@ -2007,8 +2007,8 @@ find_reload_regs (struct insn_chain *chain) + } + } + +- COPY_HARD_REG_SET (chain->used_spill_regs, used_spill_regs_local); +- IOR_HARD_REG_SET (used_spill_regs, used_spill_regs_local); ++ chain->used_spill_regs = used_spill_regs_local; ++ used_spill_regs |= used_spill_regs_local; + + memcpy (chain->rld, rld, n_reloads * sizeof (struct reload)); + } +@@ -2881,7 +2881,6 @@ eliminate_regs_1 (rtx x, machine_mode mem_mode, rtx insn, + return x; + + case CLOBBER: +- case CLOBBER_HIGH: + case ASM_OPERANDS: + gcc_assert (insn && DEBUG_INSN_P (insn)); + break; +@@ -3092,10 +3091,6 @@ elimination_effects (rtx x, machine_mode mem_mode) + elimination_effects (XEXP (x, 0), mem_mode); + return; + +- case CLOBBER_HIGH: +- /* CLOBBER_HIGH is only supported for LRA. */ +- return; +- + case SET: + /* Check for setting a register that we know about. */ + if (REG_P (SET_DEST (x))) +@@ -3817,9 +3812,6 @@ mark_not_eliminable (rtx dest, const_rtx x, void *data ATTRIBUTE_UNUSED) + if (dest == hard_frame_pointer_rtx) + return; + +- /* CLOBBER_HIGH is only supported for LRA. */ +- gcc_assert (GET_CODE (x) != CLOBBER_HIGH); +- + for (i = 0; i < NUM_ELIMINABLE_REGS; i++) + if (reg_eliminate[i].can_eliminate && dest == reg_eliminate[i].to_rtx + && (GET_CODE (x) != SET +@@ -4020,7 +4012,7 @@ update_eliminables_and_spill (void) + HARD_REG_SET to_spill; + CLEAR_HARD_REG_SET (to_spill); + update_eliminables (&to_spill); +- AND_COMPL_HARD_REG_SET (used_spill_regs, to_spill); ++ used_spill_regs &= ~to_spill; + + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) + if (TEST_HARD_REG_BIT (to_spill, i)) +@@ -4346,14 +4338,12 @@ finish_spills (int global) + EXECUTE_IF_SET_IN_REG_SET + (&chain->live_throughout, FIRST_PSEUDO_REGISTER, i, rsi) + { +- IOR_HARD_REG_SET (pseudo_forbidden_regs[i], +- chain->used_spill_regs); ++ pseudo_forbidden_regs[i] |= chain->used_spill_regs; + } + EXECUTE_IF_SET_IN_REG_SET + (&chain->dead_or_set, FIRST_PSEUDO_REGISTER, i, rsi) + { +- IOR_HARD_REG_SET (pseudo_forbidden_regs[i], +- chain->used_spill_regs); ++ pseudo_forbidden_regs[i] |= chain->used_spill_regs; + } + } + +@@ -4397,7 +4387,7 @@ finish_spills (int global) + { + REG_SET_TO_HARD_REG_SET (used_by_pseudos, &chain->live_throughout); + REG_SET_TO_HARD_REG_SET (used_by_pseudos2, &chain->dead_or_set); +- IOR_HARD_REG_SET (used_by_pseudos, used_by_pseudos2); ++ used_by_pseudos |= used_by_pseudos2; + + compute_use_by_pseudos (&used_by_pseudos, &chain->live_throughout); + compute_use_by_pseudos (&used_by_pseudos, &chain->dead_or_set); +@@ -4405,8 +4395,7 @@ finish_spills (int global) + may be not included in the value calculated here because + of possible removing caller-saves insns (see function + delete_caller_save_insns. */ +- COMPL_HARD_REG_SET (chain->used_spill_regs, used_by_pseudos); +- AND_HARD_REG_SET (chain->used_spill_regs, used_spill_regs); ++ chain->used_spill_regs = ~used_by_pseudos & used_spill_regs; + } + } + +@@ -4455,7 +4444,6 @@ scan_paradoxical_subregs (rtx x) + case PC: + case USE: + case CLOBBER: +- case CLOBBER_HIGH: + return; + + case SUBREG: +@@ -4589,7 +4577,7 @@ reload_as_needed (int live_known) + { + regset_head regs_to_forget; + INIT_REG_SET (®s_to_forget); +- note_stores (PATTERN (insn), forget_old_reloads_1, ®s_to_forget); ++ note_stores (insn, forget_old_reloads_1, ®s_to_forget); + + /* If this is a USE and CLOBBER of a MEM, ensure that any + references to eliminable registers have been removed. */ +@@ -4716,7 +4704,7 @@ reload_as_needed (int live_known) + between INSN and NEXT and use them to forget old reloads. */ + for (rtx_insn *x = NEXT_INSN (insn); x != old_next; x = NEXT_INSN (x)) + if (NONJUMP_INSN_P (x) && GET_CODE (PATTERN (x)) == CLOBBER) +- note_stores (PATTERN (x), forget_old_reloads_1, NULL); ++ note_stores (x, forget_old_reloads_1, NULL); + + #if AUTO_INC_DEC + /* Likewise for regs altered by auto-increment in this insn. +@@ -4882,8 +4870,8 @@ reload_as_needed (int live_known) + be partially clobbered by the call. */ + else if (CALL_P (insn)) + { +- AND_COMPL_HARD_REG_SET (reg_reloaded_valid, call_used_reg_set); +- AND_COMPL_HARD_REG_SET (reg_reloaded_valid, reg_reloaded_call_part_clobbered); ++ reg_reloaded_valid &= ~(call_used_or_fixed_regs ++ | reg_reloaded_call_part_clobbered); + + /* If this is a call to a setjmp-type function, we must not + reuse any reload reg contents across the call; that will +@@ -4910,8 +4898,7 @@ reload_as_needed (int live_known) + to be forgotten later. */ + + static void +-forget_old_reloads_1 (rtx x, const_rtx setter, +- void *data) ++forget_old_reloads_1 (rtx x, const_rtx, void *data) + { + unsigned int regno; + unsigned int nr; +@@ -4930,9 +4917,6 @@ forget_old_reloads_1 (rtx x, const_rtx setter, + if (!REG_P (x)) + return; + +- /* CLOBBER_HIGH is only supported for LRA. */ +- gcc_assert (setter == NULL_RTX || GET_CODE (setter) != CLOBBER_HIGH); +- + regno = REGNO (x); + + if (regno >= FIRST_PSEUDO_REGISTER) +@@ -6335,9 +6319,9 @@ choose_reload_regs_init (struct insn_chain *chain, rtx *save_reload_reg_rtx) + { + HARD_REG_SET tmp; + REG_SET_TO_HARD_REG_SET (tmp, &chain->live_throughout); +- IOR_HARD_REG_SET (reg_used_in_insn, tmp); ++ reg_used_in_insn |= tmp; + REG_SET_TO_HARD_REG_SET (tmp, &chain->dead_or_set); +- IOR_HARD_REG_SET (reg_used_in_insn, tmp); ++ reg_used_in_insn |= tmp; + compute_use_by_pseudos (®_used_in_insn, &chain->live_throughout); + compute_use_by_pseudos (®_used_in_insn, &chain->dead_or_set); + } +@@ -6352,7 +6336,7 @@ choose_reload_regs_init (struct insn_chain *chain, rtx *save_reload_reg_rtx) + CLEAR_HARD_REG_SET (reload_reg_used_in_outaddr_addr[i]); + } + +- COMPL_HARD_REG_SET (reload_reg_unavailable, chain->used_spill_regs); ++ reload_reg_unavailable = ~chain->used_spill_regs; + + CLEAR_HARD_REG_SET (reload_reg_used_for_inherit); + +@@ -7797,7 +7781,7 @@ emit_output_reload_insns (struct insn_chain *chain, struct reload *rl, + clear any memory of reloaded copies of the pseudo reg. + If this output reload comes from a spill reg, + reg_has_output_reload will make this do nothing. */ +- note_stores (pat, forget_old_reloads_1, NULL); ++ note_stores (p, forget_old_reloads_1, NULL); + + if (reg_mentioned_p (rl_reg_rtx, pat)) + { +@@ -8289,8 +8273,7 @@ emit_reload_insns (struct insn_chain *chain) + : out_regno + k); + reg_reloaded_insn[regno + k] = insn; + SET_HARD_REG_BIT (reg_reloaded_valid, regno + k); +- if (targetm.hard_regno_call_part_clobbered (NULL, +- regno + k, ++ if (targetm.hard_regno_call_part_clobbered (0, regno + k, + mode)) + SET_HARD_REG_BIT (reg_reloaded_call_part_clobbered, + regno + k); +@@ -8370,8 +8353,7 @@ emit_reload_insns (struct insn_chain *chain) + : in_regno + k); + reg_reloaded_insn[regno + k] = insn; + SET_HARD_REG_BIT (reg_reloaded_valid, regno + k); +- if (targetm.hard_regno_call_part_clobbered (NULL, +- regno + k, ++ if (targetm.hard_regno_call_part_clobbered (0, regno + k, + mode)) + SET_HARD_REG_BIT (reg_reloaded_call_part_clobbered, + regno + k); +@@ -8487,7 +8469,7 @@ emit_reload_insns (struct insn_chain *chain) + CLEAR_HARD_REG_BIT (reg_reloaded_dead, src_regno + k); + SET_HARD_REG_BIT (reg_reloaded_valid, src_regno + k); + if (targetm.hard_regno_call_part_clobbered +- (NULL, src_regno + k, mode)) ++ (0, src_regno + k, mode)) + SET_HARD_REG_BIT (reg_reloaded_call_part_clobbered, + src_regno + k); + else +@@ -8516,7 +8498,7 @@ emit_reload_insns (struct insn_chain *chain) + } + } + } +- IOR_HARD_REG_SET (reg_reloaded_dead, reg_reloaded_died); ++ reg_reloaded_dead |= reg_reloaded_died; + } + + /* Go through the motions to emit INSN and test if it is strictly valid. +diff --git a/gcc/reorg.c b/gcc/reorg.c +index bdfcf8851..cba183e9c 100644 +--- a/gcc/reorg.c ++++ b/gcc/reorg.c +@@ -410,8 +410,7 @@ find_end_label (rtx kind) + while (NOTE_P (insn) + || (NONJUMP_INSN_P (insn) + && (GET_CODE (PATTERN (insn)) == USE +- || GET_CODE (PATTERN (insn)) == CLOBBER +- || GET_CODE (PATTERN (insn)) == CLOBBER_HIGH))) ++ || GET_CODE (PATTERN (insn)) == CLOBBER))) + insn = PREV_INSN (insn); + + /* When a target threads its epilogue we might already have a +@@ -1311,8 +1310,7 @@ try_merge_delay_insns (rtx_insn *insn, rtx_insn *thread) + + /* TRIAL must be a CALL_INSN or INSN. Skip USE and CLOBBER. */ + if (NONJUMP_INSN_P (trial) +- && (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER +- || GET_CODE (pat) == CLOBBER_HIGH)) ++ && (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)) + continue; + + if (GET_CODE (next_to_match) == GET_CODE (trial) +@@ -1506,8 +1504,7 @@ redundant_insn (rtx insn, rtx_insn *target, const vec &delay_list) + --insns_to_search; + + pat = PATTERN (trial); +- if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER +- || GET_CODE (pat) == CLOBBER_HIGH) ++ if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER) + continue; + + if (GET_CODE (trial) == DEBUG_INSN) +@@ -1575,7 +1572,7 @@ redundant_insn (rtx insn, rtx_insn *target, const vec &delay_list) + /* Insns we pass may not set either NEEDED or SET, so merge them for + simpler tests. */ + needed.memory |= set.memory; +- IOR_HARD_REG_SET (needed.regs, set.regs); ++ needed.regs |= set.regs; + + /* This insn isn't redundant if it conflicts with an insn that either is + or will be in a delay slot of TARGET. */ +@@ -1605,8 +1602,7 @@ redundant_insn (rtx insn, rtx_insn *target, const vec &delay_list) + --insns_to_search; + + pat = PATTERN (trial); +- if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER +- || GET_CODE (pat) == CLOBBER_HIGH) ++ if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER) + continue; + + if (GET_CODE (trial) == DEBUG_INSN) +@@ -1718,8 +1714,7 @@ own_thread_p (rtx thread, rtx label, int allow_fallthrough) + || LABEL_P (insn) + || (NONJUMP_INSN_P (insn) + && GET_CODE (PATTERN (insn)) != USE +- && GET_CODE (PATTERN (insn)) != CLOBBER +- && GET_CODE (PATTERN (insn)) != CLOBBER_HIGH)) ++ && GET_CODE (PATTERN (insn)) != CLOBBER)) + return 0; + + return 1; +@@ -2042,8 +2037,7 @@ fill_simple_delay_slots (int non_jumps_p) + pat = PATTERN (trial); + + /* Stand-alone USE and CLOBBER are just for flow. */ +- if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER +- || GET_CODE (pat) == CLOBBER_HIGH) ++ if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER) + continue; + + /* And DEBUG_INSNs never go into delay slots. */ +@@ -2169,8 +2163,7 @@ fill_simple_delay_slots (int non_jumps_p) + pat = PATTERN (trial); + + /* Stand-alone USE and CLOBBER are just for flow. */ +- if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER +- || GET_CODE (pat) == CLOBBER_HIGH) ++ if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER) + continue; + + /* And DEBUG_INSNs do not go in delay slots. */ +@@ -2438,8 +2431,7 @@ fill_slots_from_thread (rtx_jump_insn *insn, rtx condition, + } + + pat = PATTERN (trial); +- if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER +- || GET_CODE (pat) == CLOBBER_HIGH) ++ if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER) + continue; + + if (GET_CODE (trial) == DEBUG_INSN) +@@ -3833,8 +3825,7 @@ dbr_schedule (rtx_insn *first) + if (! insn->deleted () + && NONJUMP_INSN_P (insn) + && GET_CODE (PATTERN (insn)) != USE +- && GET_CODE (PATTERN (insn)) != CLOBBER +- && GET_CODE (PATTERN (insn)) != CLOBBER_HIGH) ++ && GET_CODE (PATTERN (insn)) != CLOBBER) + { + if (GET_CODE (PATTERN (insn)) == SEQUENCE) + { +diff --git a/gcc/resource.c b/gcc/resource.c +index c4bcfd7dc..bf2d6beaf 100644 +--- a/gcc/resource.c ++++ b/gcc/resource.c +@@ -30,6 +30,7 @@ along with GCC; see the file COPYING3. If not see + #include "resource.h" + #include "insn-attr.h" + #include "params.h" ++#include "function-abi.h" + + /* This structure is used to record liveness information at the targets or + fallthrough insns of branches. We will most likely need the information +@@ -108,11 +109,6 @@ update_live_status (rtx dest, const_rtx x, void *data ATTRIBUTE_UNUSED) + if (GET_CODE (x) == CLOBBER) + for (i = first_regno; i < last_regno; i++) + CLEAR_HARD_REG_BIT (current_live_regs, i); +- else if (GET_CODE (x) == CLOBBER_HIGH) +- /* No current target supports both branch delay slots and CLOBBER_HIGH. +- We'd need more elaborate liveness tracking to handle that +- combination. */ +- gcc_unreachable (); + else + for (i = first_regno; i < last_regno; i++) + { +@@ -298,7 +294,6 @@ mark_referenced_resources (rtx x, struct resources *res, + return; + + case CLOBBER: +- case CLOBBER_HIGH: + return; + + case CALL_INSN: +@@ -450,8 +445,8 @@ find_dead_or_set_registers (rtx_insn *target, struct resources *res, + case CODE_LABEL: + /* After a label, any pending dead registers that weren't yet + used can be made dead. */ +- AND_COMPL_HARD_REG_SET (pending_dead_regs, needed.regs); +- AND_COMPL_HARD_REG_SET (res->regs, pending_dead_regs); ++ pending_dead_regs &= ~needed.regs; ++ res->regs &= ~pending_dead_regs; + CLEAR_HARD_REG_SET (pending_dead_regs); + + continue; +@@ -565,14 +560,12 @@ find_dead_or_set_registers (rtx_insn *target, struct resources *res, + } + + target_res = *res; +- COPY_HARD_REG_SET (scratch, target_set.regs); +- AND_COMPL_HARD_REG_SET (scratch, needed.regs); +- AND_COMPL_HARD_REG_SET (target_res.regs, scratch); ++ scratch = target_set.regs & ~needed.regs; ++ target_res.regs &= ~scratch; + + fallthrough_res = *res; +- COPY_HARD_REG_SET (scratch, set.regs); +- AND_COMPL_HARD_REG_SET (scratch, needed.regs); +- AND_COMPL_HARD_REG_SET (fallthrough_res.regs, scratch); ++ scratch = set.regs & ~needed.regs; ++ fallthrough_res.regs &= ~scratch; + + if (!ANY_RETURN_P (this_jump_insn->jump_label ())) + find_dead_or_set_registers +@@ -581,8 +574,8 @@ find_dead_or_set_registers (rtx_insn *target, struct resources *res, + find_dead_or_set_registers (next_insn, + &fallthrough_res, 0, jump_count, + set, needed); +- IOR_HARD_REG_SET (fallthrough_res.regs, target_res.regs); +- AND_HARD_REG_SET (res->regs, fallthrough_res.regs); ++ fallthrough_res.regs |= target_res.regs; ++ res->regs &= fallthrough_res.regs; + break; + } + else +@@ -601,9 +594,8 @@ find_dead_or_set_registers (rtx_insn *target, struct resources *res, + mark_referenced_resources (insn, &needed, true); + mark_set_resources (insn, &set, 0, MARK_SRC_DEST_CALL); + +- COPY_HARD_REG_SET (scratch, set.regs); +- AND_COMPL_HARD_REG_SET (scratch, needed.regs); +- AND_COMPL_HARD_REG_SET (res->regs, scratch); ++ scratch = set.regs & ~needed.regs; ++ res->regs &= ~scratch; + } + + return jump_insn; +@@ -665,24 +657,16 @@ mark_set_resources (rtx x, struct resources *res, int in_dest, + { + rtx_call_insn *call_insn = as_a (x); + rtx link; +- HARD_REG_SET regs; + + res->cc = res->memory = 1; + +- get_call_reg_set_usage (call_insn, ®s, regs_invalidated_by_call); +- IOR_HARD_REG_SET (res->regs, regs); ++ res->regs |= insn_callee_abi (call_insn).full_reg_clobbers (); + + for (link = CALL_INSN_FUNCTION_USAGE (call_insn); + link; link = XEXP (link, 1)) +- { +- /* We could support CLOBBER_HIGH and treat it in the same way as +- HARD_REGNO_CALL_PART_CLOBBERED, but no port needs that +- yet. */ +- gcc_assert (GET_CODE (XEXP (link, 0)) != CLOBBER_HIGH); +- if (GET_CODE (XEXP (link, 0)) == CLOBBER) +- mark_set_resources (SET_DEST (XEXP (link, 0)), res, 1, +- MARK_SRC_DEST); +- } ++ if (GET_CODE (XEXP (link, 0)) == CLOBBER) ++ mark_set_resources (SET_DEST (XEXP (link, 0)), res, 1, ++ MARK_SRC_DEST); + + /* Check for a REG_SETJMP. If it exists, then we must + assume that this call can clobber any register. */ +@@ -725,12 +709,6 @@ mark_set_resources (rtx x, struct resources *res, int in_dest, + mark_set_resources (XEXP (x, 0), res, 1, MARK_SRC_DEST); + return; + +- case CLOBBER_HIGH: +- /* No current target supports both branch delay slots and CLOBBER_HIGH. +- We'd need more elaborate liveness tracking to handle that +- combination. */ +- gcc_unreachable (); +- + case SEQUENCE: + { + rtx_sequence *seq = as_a (x); +@@ -960,7 +938,7 @@ mark_target_live_regs (rtx_insn *insns, rtx target_maybe_return, struct resource + update it below. */ + if (b == tinfo->block && b != -1 && tinfo->bb_tick == bb_ticks[b]) + { +- COPY_HARD_REG_SET (res->regs, tinfo->live_regs); ++ res->regs = tinfo->live_regs; + return; + } + } +@@ -1041,15 +1019,12 @@ mark_target_live_regs (rtx_insn *insns, rtx target_maybe_return, struct resource + predicated instruction, or if the CALL is NORETURN. */ + if (GET_CODE (PATTERN (real_insn)) != COND_EXEC) + { +- HARD_REG_SET regs_invalidated_by_this_call; +- get_call_reg_set_usage (real_insn, +- ®s_invalidated_by_this_call, +- regs_invalidated_by_call); ++ HARD_REG_SET regs_invalidated_by_this_call ++ = insn_callee_abi (real_insn).full_reg_clobbers (); + /* CALL clobbers all call-used regs that aren't fixed except + sp, ap, and fp. Do this before setting the result of the + call live. */ +- AND_COMPL_HARD_REG_SET (current_live_regs, +- regs_invalidated_by_this_call); ++ current_live_regs &= ~regs_invalidated_by_this_call; + } + + /* A CALL_INSN sets any global register live, since it may +@@ -1078,7 +1053,7 @@ mark_target_live_regs (rtx_insn *insns, rtx target_maybe_return, struct resource + GET_MODE (XEXP (link, 0)), + REGNO (XEXP (link, 0))); + +- note_stores (PATTERN (real_insn), update_live_status, NULL); ++ note_stores (real_insn, update_live_status, NULL); + + /* If any registers were unused after this insn, kill them. + These notes will always be accurate. */ +@@ -1097,7 +1072,7 @@ mark_target_live_regs (rtx_insn *insns, rtx target_maybe_return, struct resource + + /* A label clobbers the pending dead registers since neither + reload nor jump will propagate a value across a label. */ +- AND_COMPL_HARD_REG_SET (current_live_regs, pending_dead_regs); ++ current_live_regs &= ~pending_dead_regs; + CLEAR_HARD_REG_SET (pending_dead_regs); + + /* We must conservatively assume that all registers that used +@@ -1109,7 +1084,7 @@ mark_target_live_regs (rtx_insn *insns, rtx target_maybe_return, struct resource + HARD_REG_SET extra_live; + + REG_SET_TO_HARD_REG_SET (extra_live, DF_LR_IN (bb)); +- IOR_HARD_REG_SET (current_live_regs, extra_live); ++ current_live_regs |= extra_live; + } + } + +@@ -1118,10 +1093,10 @@ mark_target_live_regs (rtx_insn *insns, rtx target_maybe_return, struct resource + are implicitly required at that point. */ + else if (NOTE_P (real_insn) + && NOTE_KIND (real_insn) == NOTE_INSN_EPILOGUE_BEG) +- IOR_HARD_REG_SET (current_live_regs, start_of_epilogue_needs.regs); ++ current_live_regs |= start_of_epilogue_needs.regs; + } + +- COPY_HARD_REG_SET (res->regs, current_live_regs); ++ res->regs = current_live_regs; + if (tinfo != NULL) + { + tinfo->block = b; +@@ -1160,20 +1135,17 @@ mark_target_live_regs (rtx_insn *insns, rtx target_maybe_return, struct resource + { + mark_referenced_resources (insn, &needed, true); + +- COPY_HARD_REG_SET (scratch, needed.regs); +- AND_COMPL_HARD_REG_SET (scratch, set.regs); +- IOR_HARD_REG_SET (new_resources.regs, scratch); ++ scratch = needed.regs & ~set.regs; ++ new_resources.regs |= scratch; + + mark_set_resources (insn, &set, 0, MARK_SRC_DEST_CALL); + } + +- IOR_HARD_REG_SET (res->regs, new_resources.regs); ++ res->regs |= new_resources.regs; + } + + if (tinfo != NULL) +- { +- COPY_HARD_REG_SET (tinfo->live_regs, res->regs); +- } ++ tinfo->live_regs = res->regs; + } + + /* Initialize the resources required by mark_target_live_regs (). +diff --git a/gcc/rtl.c b/gcc/rtl.c +index d7b8e9877..ec65fbb37 100644 +--- a/gcc/rtl.c ++++ b/gcc/rtl.c +@@ -315,10 +315,6 @@ copy_rtx (rtx orig) + return orig; + break; + +- case CLOBBER_HIGH: +- gcc_assert (REG_P (XEXP (orig, 0))); +- return orig; +- + case CONST: + if (shared_const_p (orig)) + return orig; +diff --git a/gcc/rtl.def b/gcc/rtl.def +index f4c9d946c..edb34c5ac 100644 +--- a/gcc/rtl.def ++++ b/gcc/rtl.def +@@ -312,16 +312,6 @@ DEF_RTL_EXPR(USE, "use", "e", RTX_EXTRA) + is considered undeletable before reload. */ + DEF_RTL_EXPR(CLOBBER, "clobber", "e", RTX_EXTRA) + +-/* Indicate that the upper parts of something are clobbered in a way that we +- don't want to explain. The MODE references the lower bits that will be +- preserved. Anything above that size will be clobbered. +- +- CLOBBER_HIGH only occurs as the operand of a PARALLEL rtx. It cannot appear +- in other contexts, and unlike CLOBBER, it cannot appear on its own. +- CLOBBER_HIGH can only be used with fixed register rtxes. */ +- +-DEF_RTL_EXPR(CLOBBER_HIGH, "clobber_high", "e", RTX_EXTRA) +- + /* Call a subroutine. + Operand 1 is the address to call. + Operand 2 is the number of arguments. */ +@@ -936,6 +926,12 @@ DEF_RTL_EXPR(DEFINE_SPLIT, "define_split", "EsES", RTX_EXTRA) + 7: optionally, a vector of attributes for this insn. */ + DEF_RTL_EXPR(DEFINE_INSN_AND_SPLIT, "define_insn_and_split", "sEsTsESV", RTX_EXTRA) + ++/* A form of define_insn_and_split in which the split insn pattern (operand 5) ++ is determined automatically by replacing match_operands with match_dups ++ and match_operators with match_op_dups. The operands are the same as ++ define_insn_and_split but with operand 5 removed. */ ++DEF_RTL_EXPR(DEFINE_INSN_AND_REWRITE, "define_insn_and_rewrite", "sEsTsSV", RTX_EXTRA) ++ + /* Definition of an RTL peephole operation. + Follows the same arguments as define_split. */ + DEF_RTL_EXPR(DEFINE_PEEPHOLE2, "define_peephole2", "EsES", RTX_EXTRA) +diff --git a/gcc/rtl.h b/gcc/rtl.h +index b4a906f91..6093d42c0 100644 +--- a/gcc/rtl.h ++++ b/gcc/rtl.h +@@ -1623,11 +1623,17 @@ extern const char * const reg_note_name[]; + #define GET_REG_NOTE_NAME(MODE) (reg_note_name[(int) (MODE)]) + + /* This field is only present on CALL_INSNs. It holds a chain of EXPR_LIST of +- USE and CLOBBER expressions. ++ USE, CLOBBER and SET expressions. + USE expressions list the registers filled with arguments that + are passed to the function. + CLOBBER expressions document the registers explicitly clobbered + by this CALL_INSN. ++ SET expressions say that the return value of the call (the SET_DEST) ++ is equivalent to a value available before the call (the SET_SRC). ++ This kind of SET is used when the return value is predictable in ++ advance. It is purely an optimisation hint; unlike USEs and CLOBBERs, ++ it does not affect register liveness. ++ + Pseudo registers cannot be mentioned in this list. */ + #define CALL_INSN_FUNCTION_USAGE(INSN) XEXP(INSN, 7) + +@@ -2392,12 +2398,30 @@ extern int rtx_cost (rtx, machine_mode, enum rtx_code, int, bool); + extern int address_cost (rtx, machine_mode, addr_space_t, bool); + extern void get_full_rtx_cost (rtx, machine_mode, enum rtx_code, int, + struct full_rtx_costs *); ++extern bool native_encode_rtx (machine_mode, rtx, vec &, ++ unsigned int, unsigned int); ++extern rtx native_decode_rtx (machine_mode, vec, ++ unsigned int); ++extern rtx native_decode_vector_rtx (machine_mode, vec, ++ unsigned int, unsigned int, unsigned int); + extern poly_uint64 subreg_lsb (const_rtx); +-extern poly_uint64 subreg_lsb_1 (machine_mode, machine_mode, poly_uint64); ++extern poly_uint64 subreg_size_lsb (poly_uint64, poly_uint64, poly_uint64); + extern poly_uint64 subreg_size_offset_from_lsb (poly_uint64, poly_uint64, + poly_uint64); + extern bool read_modify_subreg_p (const_rtx); + ++/* Given a subreg's OUTER_MODE, INNER_MODE, and SUBREG_BYTE, return the ++ bit offset at which the subreg begins (counting from the least significant ++ bit of the operand). */ ++ ++inline poly_uint64 ++subreg_lsb_1 (machine_mode outer_mode, machine_mode inner_mode, ++ poly_uint64 subreg_byte) ++{ ++ return subreg_size_lsb (GET_MODE_SIZE (outer_mode), ++ GET_MODE_SIZE (inner_mode), subreg_byte); ++} ++ + /* Return the subreg byte offset for a subreg whose outer mode is + OUTER_MODE, whose inner mode is INNER_MODE, and where there are + LSB_SHIFT *bits* between the lsb of the outer value and the lsb of +@@ -2645,7 +2669,7 @@ do { \ + + /* For a SET rtx, SET_DEST is the place that is set + and SET_SRC is the value it is set to. */ +-#define SET_DEST(RTX) XC3EXP (RTX, 0, SET, CLOBBER, CLOBBER_HIGH) ++#define SET_DEST(RTX) XC2EXP (RTX, 0, SET, CLOBBER) + #define SET_SRC(RTX) XCEXP (RTX, 1, SET) + #define SET_IS_RETURN_P(RTX) \ + (RTL_FLAG_CHECK1 ("SET_IS_RETURN_P", (RTX), SET)->jump) +@@ -3369,8 +3393,7 @@ extern bool val_signbit_known_clear_p (machine_mode, + unsigned HOST_WIDE_INT); + + /* In reginfo.c */ +-extern machine_mode choose_hard_reg_mode (unsigned int, unsigned int, +- bool); ++extern machine_mode choose_hard_reg_mode (unsigned int, unsigned int, bool); + extern const HARD_REG_SET &simplifiable_subregs (const subreg_shape &); + + /* In emit-rtl.c */ +@@ -3407,6 +3430,7 @@ extern int rtx_unstable_p (const_rtx); + extern bool rtx_varies_p (const_rtx, bool); + extern bool rtx_addr_varies_p (const_rtx, bool); + extern rtx get_call_rtx_from (rtx); ++extern tree get_call_fndecl (const rtx_insn *); + extern HOST_WIDE_INT get_integer_term (const_rtx); + extern rtx get_related_value (const_rtx); + extern bool offset_within_block_p (const_rtx, HOST_WIDE_INT); +@@ -3435,7 +3459,10 @@ extern void record_hard_reg_sets (rtx, const_rtx, void *); + extern void record_hard_reg_uses (rtx *, void *); + extern void find_all_hard_regs (const_rtx, HARD_REG_SET *); + extern void find_all_hard_reg_sets (const rtx_insn *, HARD_REG_SET *, bool); +-extern void note_stores (const_rtx, void (*) (rtx, const_rtx, void *), void *); ++extern void note_pattern_stores (const_rtx, ++ void (*) (rtx, const_rtx, void *), void *); ++extern void note_stores (const rtx_insn *, ++ void (*) (rtx, const_rtx, void *), void *); + extern void note_uses (rtx *, void (*) (rtx *, void *), void *); + extern int dead_or_set_p (const rtx_insn *, const_rtx); + extern int dead_or_set_regno_p (const rtx_insn *, unsigned int); +@@ -3476,16 +3503,6 @@ extern bool tablejump_p (const rtx_insn *, rtx_insn **, rtx_jump_table_data **); + extern int computed_jump_p (const rtx_insn *); + extern bool tls_referenced_p (const_rtx); + extern bool contains_mem_rtx_p (rtx x); +-extern bool reg_is_clobbered_by_clobber_high (unsigned int, machine_mode, +- const_rtx); +- +-/* Convenient wrapper for reg_is_clobbered_by_clobber_high. */ +-inline bool +-reg_is_clobbered_by_clobber_high (const_rtx x, const_rtx clobber_high_op) +-{ +- return reg_is_clobbered_by_clobber_high (REGNO (x), GET_MODE (x), +- clobber_high_op); +-} + + /* Overload for refers_to_regno_p for checking a single register. */ + inline bool +@@ -4279,7 +4296,6 @@ extern void vt_equate_reg_base_value (const_rtx, const_rtx); + extern bool memory_modified_in_insn_p (const_rtx, const_rtx); + extern bool may_be_sp_based_p (rtx); + extern rtx gen_hard_reg_clobber (machine_mode, unsigned int); +-extern rtx gen_hard_reg_clobber_high (machine_mode, unsigned int); + extern rtx get_reg_known_value (unsigned int); + extern bool get_reg_known_equiv_p (unsigned int); + extern rtx get_reg_base_value (unsigned int); +@@ -4353,14 +4369,11 @@ extern tree GTY(()) global_regs_decl[FIRST_PSEUDO_REGISTER]; + Available only for functions that has been already assembled. */ + + struct GTY(()) cgraph_rtl_info { +- unsigned int preferred_incoming_stack_boundary; ++ unsigned int preferred_incoming_stack_boundary; + +- /* Call unsaved hard registers really used by the corresponding +- function (including ones used by functions called by the +- function). */ ++ /* Which registers the function clobbers, either directly or by ++ calling another function. */ + HARD_REG_SET function_used_regs; +- /* Set if function_used_regs is valid. */ +- unsigned function_used_regs_valid: 1; + }; + + /* If loads from memories of mode MODE always sign or zero extend, +diff --git a/gcc/rtlanal.c b/gcc/rtlanal.c +index 01af063a2..553d71c1c 100644 +--- a/gcc/rtlanal.c ++++ b/gcc/rtlanal.c +@@ -823,6 +823,24 @@ get_call_rtx_from (rtx x) + return x; + return NULL_RTX; + } ++ ++/* Get the declaration of the function called by INSN. */ ++ ++tree ++get_call_fndecl (const rtx_insn *insn) ++{ ++ rtx note, datum; ++ ++ note = find_reg_note (insn, REG_CALL_DECL, NULL_RTX); ++ if (note == NULL_RTX) ++ return NULL_TREE; ++ ++ datum = XEXP (note, 0); ++ if (datum != NULL_RTX) ++ return SYMBOL_REF_DECL (datum); ++ ++ return NULL_TREE; ++} + + /* Return the value of the integer term in X, if one is apparent; + otherwise return 0. +@@ -1198,10 +1216,6 @@ reg_referenced_p (const_rtx x, const_rtx body) + return 1; + return 0; + +- case CLOBBER_HIGH: +- gcc_assert (REG_P (XEXP (body, 0))); +- return 0; +- + case COND_EXEC: + if (reg_overlap_mentioned_p (x, COND_EXEC_TEST (body))) + return 1; +@@ -1424,11 +1438,7 @@ set_of_1 (rtx x, const_rtx pat, void *data1) + { + struct set_of_data *const data = (struct set_of_data *) (data1); + if (rtx_equal_p (x, data->pat) +- || (GET_CODE (pat) == CLOBBER_HIGH +- && REGNO(data->pat) == REGNO(XEXP (pat, 0)) +- && reg_is_clobbered_by_clobber_high (data->pat, XEXP (pat, 0))) +- || (GET_CODE (pat) != CLOBBER_HIGH && !MEM_P (x) +- && reg_overlap_mentioned_p (data->pat, x))) ++ || (!MEM_P (x) && reg_overlap_mentioned_p (data->pat, x))) + data->found = pat; + } + +@@ -1440,7 +1450,7 @@ set_of (const_rtx pat, const_rtx insn) + struct set_of_data data; + data.found = NULL_RTX; + data.pat = pat; +- note_stores (INSN_P (insn) ? PATTERN (insn) : insn, set_of_1, &data); ++ note_pattern_stores (INSN_P (insn) ? PATTERN (insn) : insn, set_of_1, &data); + return data.found; + } + +@@ -1476,15 +1486,9 @@ find_all_hard_reg_sets (const rtx_insn *insn, HARD_REG_SET *pset, bool implicit) + rtx link; + + CLEAR_HARD_REG_SET (*pset); +- note_stores (PATTERN (insn), record_hard_reg_sets, pset); +- if (CALL_P (insn)) +- { +- if (implicit) +- IOR_HARD_REG_SET (*pset, call_used_reg_set); +- +- for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1)) +- record_hard_reg_sets (XEXP (link, 0), NULL, pset); +- } ++ note_stores (insn, record_hard_reg_sets, pset); ++ if (CALL_P (insn) && implicit) ++ *pset |= call_used_or_fixed_regs; + for (link = REG_NOTES (insn); link; link = XEXP (link, 1)) + if (REG_NOTE_KIND (link) == REG_INC) + record_hard_reg_sets (XEXP (link, 0), NULL, pset); +@@ -1517,7 +1521,6 @@ single_set_2 (const rtx_insn *insn, const_rtx pat) + { + case USE: + case CLOBBER: +- case CLOBBER_HIGH: + break; + + case SET: +@@ -1671,9 +1674,7 @@ noop_move_p (const rtx_insn *insn) + { + rtx tem = XVECEXP (pat, 0, i); + +- if (GET_CODE (tem) == USE +- || GET_CODE (tem) == CLOBBER +- || GET_CODE (tem) == CLOBBER_HIGH) ++ if (GET_CODE (tem) == USE || GET_CODE (tem) == CLOBBER) + continue; + + if (GET_CODE (tem) != SET || ! set_noop_p (tem)) +@@ -1899,16 +1900,15 @@ reg_overlap_mentioned_p (const_rtx x, const_rtx in) + the SUBREG will be passed. */ + + void +-note_stores (const_rtx x, void (*fun) (rtx, const_rtx, void *), void *data) ++note_pattern_stores (const_rtx x, ++ void (*fun) (rtx, const_rtx, void *), void *data) + { + int i; + + if (GET_CODE (x) == COND_EXEC) + x = COND_EXEC_CODE (x); + +- if (GET_CODE (x) == SET +- || GET_CODE (x) == CLOBBER +- || GET_CODE (x) == CLOBBER_HIGH) ++ if (GET_CODE (x) == SET || GET_CODE (x) == CLOBBER) + { + rtx dest = SET_DEST (x); + +@@ -1933,7 +1933,22 @@ note_stores (const_rtx x, void (*fun) (rtx, const_rtx, void *), void *data) + + else if (GET_CODE (x) == PARALLEL) + for (i = XVECLEN (x, 0) - 1; i >= 0; i--) +- note_stores (XVECEXP (x, 0, i), fun, data); ++ note_pattern_stores (XVECEXP (x, 0, i), fun, data); ++} ++ ++/* Same, but for an instruction. If the instruction is a call, include ++ any CLOBBERs in its CALL_INSN_FUNCTION_USAGE. */ ++ ++void ++note_stores (const rtx_insn *insn, ++ void (*fun) (rtx, const_rtx, void *), void *data) ++{ ++ if (CALL_P (insn)) ++ for (rtx link = CALL_INSN_FUNCTION_USAGE (insn); ++ link; link = XEXP (link, 1)) ++ if (GET_CODE (XEXP (link, 0)) == CLOBBER) ++ note_pattern_stores (XEXP (link, 0), fun, data); ++ note_pattern_stores (PATTERN (insn), fun, data); + } + + /* Like notes_stores, but call FUN for each expression that is being +@@ -3611,23 +3626,31 @@ loc_mentioned_in_p (rtx *loc, const_rtx in) + return 0; + } + +-/* Helper function for subreg_lsb. Given a subreg's OUTER_MODE, INNER_MODE, +- and SUBREG_BYTE, return the bit offset where the subreg begins +- (counting from the least significant bit of the operand). */ ++/* Reinterpret a subreg as a bit extraction from an integer and return ++ the position of the least significant bit of the extracted value. ++ In other words, if the extraction were performed as a shift right ++ and mask, return the number of bits to shift right. ++ ++ The outer value of the subreg has OUTER_BYTES bytes and starts at ++ byte offset SUBREG_BYTE within an inner value of INNER_BYTES bytes. */ + + poly_uint64 +-subreg_lsb_1 (machine_mode outer_mode, +- machine_mode inner_mode, +- poly_uint64 subreg_byte) ++subreg_size_lsb (poly_uint64 outer_bytes, ++ poly_uint64 inner_bytes, ++ poly_uint64 subreg_byte) + { + poly_uint64 subreg_end, trailing_bytes, byte_pos; + + /* A paradoxical subreg begins at bit position 0. */ +- if (paradoxical_subreg_p (outer_mode, inner_mode)) +- return 0; ++ gcc_checking_assert (ordered_p (outer_bytes, inner_bytes)); ++ if (maybe_gt (outer_bytes, inner_bytes)) ++ { ++ gcc_checking_assert (known_eq (subreg_byte, 0U)); ++ return 0; ++ } + +- subreg_end = subreg_byte + GET_MODE_SIZE (outer_mode); +- trailing_bytes = GET_MODE_SIZE (inner_mode) - subreg_end; ++ subreg_end = subreg_byte + outer_bytes; ++ trailing_bytes = inner_bytes - subreg_end; + if (WORDS_BIG_ENDIAN && BYTES_BIG_ENDIAN) + byte_pos = trailing_bytes; + else if (!WORDS_BIG_ENDIAN && !BYTES_BIG_ENDIAN) +@@ -4123,7 +4146,7 @@ find_first_parameter_load (rtx_insn *call_insn, rtx_insn *boundary) + if (INSN_P (before)) + { + int nregs_old = parm.nregs; +- note_stores (PATTERN (before), parms_set, &parm); ++ note_stores (before, parms_set, &parm); + /* If we found something that did not set a parameter reg, + we're done. Do not keep going, as that might result + in hoisting an insn before the setting of a pseudo +@@ -6601,32 +6624,3 @@ tls_referenced_p (const_rtx x) + return true; + return false; + } +- +-/* Return true if reg REGNO with mode REG_MODE would be clobbered by the +- clobber_high operand in CLOBBER_HIGH_OP. */ +- +-bool +-reg_is_clobbered_by_clobber_high (unsigned int regno, machine_mode reg_mode, +- const_rtx clobber_high_op) +-{ +- unsigned int clobber_regno = REGNO (clobber_high_op); +- machine_mode clobber_mode = GET_MODE (clobber_high_op); +- unsigned char regno_nregs = hard_regno_nregs (regno, reg_mode); +- +- /* Clobber high should always span exactly one register. */ +- gcc_assert (REG_NREGS (clobber_high_op) == 1); +- +- /* Clobber high needs to match with one of the registers in X. */ +- if (clobber_regno < regno || clobber_regno >= regno + regno_nregs) +- return false; +- +- gcc_assert (reg_mode != BLKmode && clobber_mode != BLKmode); +- +- if (reg_mode == VOIDmode) +- return clobber_mode != VOIDmode; +- +- /* Clobber high will clobber if its size might be greater than the size of +- register regno. */ +- return maybe_gt (exact_div (GET_MODE_SIZE (reg_mode), regno_nregs), +- GET_MODE_SIZE (clobber_mode)); +-} +diff --git a/gcc/rtx-vector-builder.h b/gcc/rtx-vector-builder.h +index d5950e2b8..08b55dd36 100644 +--- a/gcc/rtx-vector-builder.h ++++ b/gcc/rtx-vector-builder.h +@@ -24,10 +24,11 @@ along with GCC; see the file COPYING3. If not see + + /* This class is used to build VECTOR_CSTs from a sequence of elements. + See vector_builder for more details. */ +-class rtx_vector_builder : public vector_builder ++class rtx_vector_builder : public vector_builder + { +- typedef vector_builder parent; +- friend class vector_builder; ++ typedef vector_builder parent; ++ friend class vector_builder; + + public: + rtx_vector_builder () : m_mode (VOIDmode) {} +@@ -48,6 +49,15 @@ private: + bool can_elide_p (rtx) const { return true; } + void note_representative (rtx *, rtx) {} + ++ static poly_uint64 shape_nelts (machine_mode mode) ++ { return GET_MODE_NUNITS (mode); } ++ static poly_uint64 nelts_of (const_rtx x) ++ { return CONST_VECTOR_NUNITS (x); } ++ static unsigned int npatterns_of (const_rtx x) ++ { return CONST_VECTOR_NPATTERNS (x); } ++ static unsigned int nelts_per_pattern_of (const_rtx x) ++ { return CONST_VECTOR_NELTS_PER_PATTERN (x); } ++ + rtx find_cached_value (); + + machine_mode m_mode; +diff --git a/gcc/sched-deps.c b/gcc/sched-deps.c +index 28b9d38ab..fe447d16a 100644 +--- a/gcc/sched-deps.c ++++ b/gcc/sched-deps.c +@@ -38,6 +38,7 @@ along with GCC; see the file COPYING3. If not see + #include "sched-int.h" + #include "params.h" + #include "cselib.h" ++#include "function-abi.h" + + #ifdef INSN_SCHEDULING + +@@ -2203,9 +2204,9 @@ init_insn_reg_pressure_info (rtx_insn *insn) + reg_pressure_info[cl].change = 0; + } + +- note_stores (PATTERN (insn), mark_insn_reg_clobber, insn); ++ note_stores (insn, mark_insn_reg_clobber, insn); + +- note_stores (PATTERN (insn), mark_insn_reg_store, insn); ++ note_stores (insn, mark_insn_reg_store, insn); + + if (AUTO_INC_DEC) + for (link = REG_NOTES (insn); link; link = XEXP (link, 1)) +@@ -2319,13 +2320,6 @@ sched_analyze_reg (struct deps_desc *deps, int regno, machine_mode mode, + while (--i >= 0) + note_reg_use (regno + i); + } +- else if (ref == CLOBBER_HIGH) +- { +- gcc_assert (i == 1); +- /* We don't know the current state of the register, so have to treat +- the clobber high as a full clobber. */ +- note_reg_clobber (regno); +- } + else + { + while (--i >= 0) +@@ -2349,8 +2343,6 @@ sched_analyze_reg (struct deps_desc *deps, int regno, machine_mode mode, + else if (ref == USE) + note_reg_use (regno); + else +- /* For CLOBBER_HIGH, we don't know the current state of the register, +- so have to treat it as a full clobber. */ + note_reg_clobber (regno); + + /* Pseudos that are REG_EQUIV to something may be replaced +@@ -2885,7 +2877,7 @@ get_implicit_reg_pending_clobbers (HARD_REG_SET *temp, rtx_insn *insn) + preprocess_constraints (insn); + alternative_mask preferred = get_preferred_alternatives (insn); + ira_implicitly_set_insn_hard_regs (temp, preferred); +- AND_COMPL_HARD_REG_SET (*temp, ira_no_alloc_regs); ++ *temp &= ~ira_no_alloc_regs; + } + + /* Analyze an INSN with pattern X to find all dependencies. */ +@@ -2901,7 +2893,7 @@ sched_analyze_insn (struct deps_desc *deps, rtx x, rtx_insn *insn) + { + HARD_REG_SET temp; + get_implicit_reg_pending_clobbers (&temp, insn); +- IOR_HARD_REG_SET (implicit_reg_pending_clobbers, temp); ++ implicit_reg_pending_clobbers |= temp; + } + + can_start_lhs_rhs_p = (NONJUMP_INSN_P (insn) +@@ -2973,7 +2965,7 @@ sched_analyze_insn (struct deps_desc *deps, rtx x, rtx_insn *insn) + sub = COND_EXEC_CODE (sub); + code = GET_CODE (sub); + } +- else if (code == SET || code == CLOBBER || code == CLOBBER_HIGH) ++ else if (code == SET || code == CLOBBER) + sched_analyze_1 (deps, sub, insn); + else + sched_analyze_2 (deps, sub, insn); +@@ -2989,10 +2981,6 @@ sched_analyze_insn (struct deps_desc *deps, rtx x, rtx_insn *insn) + { + if (GET_CODE (XEXP (link, 0)) == CLOBBER) + sched_analyze_1 (deps, XEXP (link, 0), insn); +- else if (GET_CODE (XEXP (link, 0)) == CLOBBER_HIGH) +- /* We could support CLOBBER_HIGH and treat it in the same way as +- HARD_REGNO_CALL_PART_CLOBBERED, but no port needs that yet. */ +- gcc_unreachable (); + else if (GET_CODE (XEXP (link, 0)) != SET) + sched_analyze_2 (deps, XEXP (link, 0), insn); + } +@@ -3332,10 +3320,9 @@ sched_analyze_insn (struct deps_desc *deps, rtx x, rtx_insn *insn) + IOR_REG_SET (&deps->reg_last_in_use, reg_pending_uses); + IOR_REG_SET (&deps->reg_last_in_use, reg_pending_clobbers); + IOR_REG_SET (&deps->reg_last_in_use, reg_pending_sets); +- for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) +- if (TEST_HARD_REG_BIT (implicit_reg_pending_uses, i) +- || TEST_HARD_REG_BIT (implicit_reg_pending_clobbers, i)) +- SET_REGNO_REG_SET (&deps->reg_last_in_use, i); ++ IOR_REG_SET_HRS (&deps->reg_last_in_use, ++ implicit_reg_pending_uses ++ | implicit_reg_pending_clobbers); + + /* Set up the pending barrier found. */ + deps->last_reg_pending_barrier = reg_pending_barrier; +@@ -3724,6 +3711,7 @@ deps_analyze_insn (struct deps_desc *deps, rtx_insn *insn) + } + else + { ++ function_abi callee_abi = insn_callee_abi (insn); + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) + /* A call may read and modify global register variables. */ + if (global_regs[i]) +@@ -3735,8 +3723,8 @@ deps_analyze_insn (struct deps_desc *deps, rtx_insn *insn) + Since we only have a choice between 'might be clobbered' + and 'definitely not clobbered', we must include all + partly call-clobbered registers here. */ +- else if (targetm.hard_regno_call_part_clobbered (insn, i, +- reg_raw_mode[i]) ++ else if (targetm.hard_regno_call_part_clobbered ++ (callee_abi.id (), i, reg_raw_mode[i]) + || TEST_HARD_REG_BIT (regs_invalidated_by_call, i)) + SET_REGNO_REG_SET (reg_pending_clobbers, i); + /* We don't know what set of fixed registers might be used +diff --git a/gcc/sched-rgn.c b/gcc/sched-rgn.c +index 83688b3c9..c5ee33bf5 100644 +--- a/gcc/sched-rgn.c ++++ b/gcc/sched-rgn.c +@@ -2409,7 +2409,7 @@ static bool + sets_likely_spilled (rtx pat) + { + bool ret = false; +- note_stores (pat, sets_likely_spilled_1, &ret); ++ note_pattern_stores (pat, sets_likely_spilled_1, &ret); + return ret; + } + +diff --git a/gcc/sel-sched-ir.c b/gcc/sel-sched-ir.c +index 6dec1beaa..f8f1d8238 100644 +--- a/gcc/sel-sched-ir.c ++++ b/gcc/sel-sched-ir.c +@@ -2661,12 +2661,9 @@ setup_id_implicit_regs (idata_t id, insn_t insn) + return; + + HARD_REG_SET temp; +- unsigned regno; +- hard_reg_set_iterator hrsi; + + get_implicit_reg_pending_clobbers (&temp, insn); +- EXECUTE_IF_SET_IN_HARD_REG_SET (temp, 0, regno, hrsi) +- SET_REGNO_REG_SET (IDATA_REG_SETS (id), regno); ++ IOR_REG_SET_HRS (IDATA_REG_SETS (id), temp); + } + + /* Setup register sets describing INSN in ID. */ +diff --git a/gcc/sel-sched.c b/gcc/sel-sched.c +index f127ff745..bf370b5a5 100644 +--- a/gcc/sel-sched.c ++++ b/gcc/sel-sched.c +@@ -1102,7 +1102,7 @@ init_regs_for_mode (machine_mode mode) + if (i >= 0) + continue; + +- if (targetm.hard_regno_call_part_clobbered (NULL, cur_reg, mode)) ++ if (targetm.hard_regno_call_part_clobbered (0, cur_reg, mode)) + SET_HARD_REG_BIT (sel_hrd.regs_for_call_clobbered[mode], + cur_reg); + +@@ -1123,7 +1123,7 @@ init_hard_regs_data (void) + + CLEAR_HARD_REG_SET (sel_hrd.regs_ever_used); + for (cur_reg = 0; cur_reg < FIRST_PSEUDO_REGISTER; cur_reg++) +- if (df_regs_ever_live_p (cur_reg) || call_used_regs[cur_reg]) ++ if (df_regs_ever_live_p (cur_reg) || call_used_or_fixed_reg_p (cur_reg)) + SET_HARD_REG_BIT (sel_hrd.regs_ever_used, cur_reg); + + /* Initialize registers that are valid based on mode when this is +@@ -1221,15 +1221,13 @@ mark_unavailable_hard_regs (def_t def, struct reg_rename *reg_rename_p, + The HARD_REGNO_RENAME_OK covers other cases in condition below. */ + if (IN_RANGE (REGNO (orig_dest), FIRST_STACK_REG, LAST_STACK_REG) + && REGNO_REG_SET_P (used_regs, FIRST_STACK_REG)) +- IOR_HARD_REG_SET (reg_rename_p->unavailable_hard_regs, +- sel_hrd.stack_regs); ++ reg_rename_p->unavailable_hard_regs |= sel_hrd.stack_regs; + #endif + +- /* If there's a call on this path, make regs from call_used_reg_set ++ /* If there's a call on this path, make regs from call_used_or_fixed_regs + unavailable. */ + if (def->crosses_call) +- IOR_HARD_REG_SET (reg_rename_p->unavailable_hard_regs, +- call_used_reg_set); ++ reg_rename_p->unavailable_hard_regs |= call_used_or_fixed_regs; + + /* Stop here before reload: we need FRAME_REGS, STACK_REGS, and crosses_call, + but not register classes. */ +@@ -1238,22 +1236,20 @@ mark_unavailable_hard_regs (def_t def, struct reg_rename *reg_rename_p, + + /* Leave regs as 'available' only from the current + register class. */ +- COPY_HARD_REG_SET (reg_rename_p->available_for_renaming, +- reg_class_contents[cl]); ++ reg_rename_p->available_for_renaming = reg_class_contents[cl]; + + mode = GET_MODE (orig_dest); + + /* Leave only registers available for this mode. */ + if (!sel_hrd.regs_for_mode_ok[mode]) + init_regs_for_mode (mode); +- AND_HARD_REG_SET (reg_rename_p->available_for_renaming, +- sel_hrd.regs_for_mode[mode]); ++ reg_rename_p->available_for_renaming &= sel_hrd.regs_for_mode[mode]; + + /* Exclude registers that are partially call clobbered. */ + if (def->crosses_call +- && !targetm.hard_regno_call_part_clobbered (NULL, regno, mode)) +- AND_COMPL_HARD_REG_SET (reg_rename_p->available_for_renaming, +- sel_hrd.regs_for_call_clobbered[mode]); ++ && !targetm.hard_regno_call_part_clobbered (0, regno, mode)) ++ reg_rename_p->available_for_renaming ++ &= ~sel_hrd.regs_for_call_clobbered[mode]; + + /* Leave only those that are ok to rename. */ + EXECUTE_IF_SET_IN_HARD_REG_SET (reg_rename_p->available_for_renaming, +@@ -1274,8 +1270,7 @@ mark_unavailable_hard_regs (def_t def, struct reg_rename *reg_rename_p, + cur_reg); + } + +- AND_COMPL_HARD_REG_SET (reg_rename_p->available_for_renaming, +- reg_rename_p->unavailable_hard_regs); ++ reg_rename_p->available_for_renaming &= ~reg_rename_p->unavailable_hard_regs; + + /* Regno is always ok from the renaming part of view, but it really + could be in *unavailable_hard_regs already, so set it here instead +@@ -1686,8 +1681,7 @@ find_best_reg_for_expr (expr_t expr, blist_t bnds, bool *is_orig_reg_p) + + /* Join hard registers unavailable due to register class + restrictions and live range intersection. */ +- IOR_HARD_REG_SET (hard_regs_used, +- reg_rename_data.unavailable_hard_regs); ++ hard_regs_used |= reg_rename_data.unavailable_hard_regs; + + best_reg = choose_best_reg (hard_regs_used, ®_rename_data, + original_insns, is_orig_reg_p); +@@ -2110,7 +2104,7 @@ implicit_clobber_conflict_p (insn_t through_insn, expr_t expr) + preprocess_constraints (insn); + alternative_mask prefrred = get_preferred_alternatives (insn); + ira_implicitly_set_insn_hard_regs (&temp, prefrred); +- AND_COMPL_HARD_REG_SET (temp, ira_no_alloc_regs); ++ temp &= ~ira_no_alloc_regs; + + /* If any implicit clobber registers intersect with regular ones in + through_insn, we have a dependency and thus bail out. */ +diff --git a/gcc/shrink-wrap.c b/gcc/shrink-wrap.c +index 57124db92..018696637 100644 +--- a/gcc/shrink-wrap.c ++++ b/gcc/shrink-wrap.c +@@ -76,7 +76,7 @@ requires_stack_frame_p (rtx_insn *insn, HARD_REG_SET prologue_used, + } + if (hard_reg_set_intersect_p (hardregs, prologue_used)) + return true; +- AND_COMPL_HARD_REG_SET (hardregs, call_used_reg_set); ++ hardregs &= ~call_used_or_fixed_regs; + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + if (TEST_HARD_REG_BIT (hardregs, regno) + && df_regs_ever_live_p (regno)) +@@ -151,8 +151,8 @@ live_edge_for_reg (basic_block bb, int regno, int end_regno) + + static bool + move_insn_for_shrink_wrap (basic_block bb, rtx_insn *insn, +- const HARD_REG_SET uses, +- const HARD_REG_SET defs, ++ const_hard_reg_set uses, ++ const_hard_reg_set defs, + bool *split_p, + struct dead_debug_local *debug) + { +@@ -687,9 +687,9 @@ try_shrink_wrapping (edge *entry_edge, rtx_insn *prologue_seq) + HARD_REG_SET this_used; + CLEAR_HARD_REG_SET (this_used); + note_uses (&PATTERN (insn), record_hard_reg_uses, &this_used); +- AND_COMPL_HARD_REG_SET (this_used, prologue_clobbered); +- IOR_HARD_REG_SET (prologue_used, this_used); +- note_stores (PATTERN (insn), record_hard_reg_sets, &prologue_clobbered); ++ this_used &= ~prologue_clobbered; ++ prologue_used |= this_used; ++ note_stores (insn, record_hard_reg_sets, &prologue_clobbered); + } + CLEAR_HARD_REG_BIT (prologue_clobbered, STACK_POINTER_REGNUM); + if (frame_pointer_needed) +diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c +index bdbd1b98e..612d21b72 100644 +--- a/gcc/simplify-rtx.c ++++ b/gcc/simplify-rtx.c +@@ -35,6 +35,7 @@ along with GCC; see the file COPYING3. If not see + #include "flags.h" + #include "selftest.h" + #include "selftest-rtl.h" ++#include "rtx-vector-builder.h" + + /* Simplification and canonicalization of RTL. */ + +@@ -45,7 +46,6 @@ along with GCC; see the file COPYING3. If not see + #define HWI_SIGN_EXTEND(low) \ + ((((HOST_WIDE_INT) low) < 0) ? HOST_WIDE_INT_M1 : HOST_WIDE_INT_0) + +-static rtx neg_const_int (machine_mode, const_rtx); + static bool plus_minus_operand_p (const_rtx); + static rtx simplify_plus_minus (enum rtx_code, machine_mode, rtx, rtx); + static rtx simplify_associative_operation (enum rtx_code, machine_mode, +@@ -56,17 +56,12 @@ static rtx simplify_unary_operation_1 (enum rtx_code, machine_mode, rtx); + static rtx simplify_binary_operation_1 (enum rtx_code, machine_mode, + rtx, rtx, rtx, rtx); + +-/* Negate a CONST_INT rtx. */ ++/* Negate I, which satisfies poly_int_rtx_p. MODE is the mode of I. */ ++ + static rtx +-neg_const_int (machine_mode mode, const_rtx i) ++neg_poly_int_rtx (machine_mode mode, const_rtx i) + { +- unsigned HOST_WIDE_INT val = -UINTVAL (i); +- +- if (!HWI_COMPUTABLE_MODE_P (mode) +- && val == UINTVAL (i)) +- return simplify_const_unary_operation (NEG, mode, CONST_CAST_RTX (i), +- mode); +- return gen_int_mode (val, mode); ++ return immed_wide_int_const (-wi::to_poly_wide (i, mode), mode); + } + + /* Test whether expression, X, is an immediate constant that represents +@@ -1504,12 +1499,12 @@ simplify_unary_operation_1 (enum rtx_code code, machine_mode mode, rtx op) + && CONST_INT_P (XEXP (op, 1)) + && XEXP (XEXP (op, 0), 1) == XEXP (op, 1) + && (op_mode = as_a (GET_MODE (op)), +- GET_MODE_BITSIZE (op_mode) > INTVAL (XEXP (op, 1)))) ++ GET_MODE_PRECISION (op_mode) > INTVAL (XEXP (op, 1)))) + { + scalar_int_mode tmode; +- gcc_assert (GET_MODE_BITSIZE (int_mode) +- > GET_MODE_BITSIZE (op_mode)); +- if (int_mode_for_size (GET_MODE_BITSIZE (op_mode) ++ gcc_assert (GET_MODE_PRECISION (int_mode) ++ > GET_MODE_PRECISION (op_mode)); ++ if (int_mode_for_size (GET_MODE_PRECISION (op_mode) + - INTVAL (XEXP (op, 1)), 1).exists (&tmode)) + { + rtx inner = +@@ -1735,45 +1730,42 @@ simplify_const_unary_operation (enum rtx_code code, machine_mode mode, + } + if (CONST_SCALAR_INT_P (op) || CONST_DOUBLE_AS_FLOAT_P (op)) + return gen_const_vec_duplicate (mode, op); +- unsigned int n_elts; + if (GET_CODE (op) == CONST_VECTOR +- && GET_MODE_NUNITS (mode).is_constant (&n_elts)) +- { +- /* This must be constant if we're duplicating it to a constant +- number of elements. */ +- unsigned int in_n_elts = CONST_VECTOR_NUNITS (op).to_constant (); +- gcc_assert (in_n_elts < n_elts); +- gcc_assert ((n_elts % in_n_elts) == 0); +- rtvec v = rtvec_alloc (n_elts); +- for (unsigned i = 0; i < n_elts; i++) +- RTVEC_ELT (v, i) = CONST_VECTOR_ELT (op, i % in_n_elts); +- return gen_rtx_CONST_VECTOR (mode, v); ++ && (CONST_VECTOR_DUPLICATE_P (op) ++ || CONST_VECTOR_NUNITS (op).is_constant ())) ++ { ++ unsigned int npatterns = (CONST_VECTOR_DUPLICATE_P (op) ++ ? CONST_VECTOR_NPATTERNS (op) ++ : CONST_VECTOR_NUNITS (op).to_constant ()); ++ gcc_assert (multiple_p (GET_MODE_NUNITS (mode), npatterns)); ++ rtx_vector_builder builder (mode, npatterns, 1); ++ for (unsigned i = 0; i < npatterns; i++) ++ builder.quick_push (CONST_VECTOR_ELT (op, i)); ++ return builder.build (); + } + } + +- if (VECTOR_MODE_P (mode) && GET_CODE (op) == CONST_VECTOR) ++ if (VECTOR_MODE_P (mode) ++ && GET_CODE (op) == CONST_VECTOR ++ && known_eq (GET_MODE_NUNITS (mode), CONST_VECTOR_NUNITS (op))) + { +- unsigned int n_elts; +- if (!CONST_VECTOR_NUNITS (op).is_constant (&n_elts)) +- return NULL_RTX; ++ gcc_assert (GET_MODE (op) == op_mode); + +- machine_mode opmode = GET_MODE (op); +- gcc_assert (known_eq (GET_MODE_NUNITS (mode), n_elts)); +- gcc_assert (known_eq (GET_MODE_NUNITS (opmode), n_elts)); +- +- rtvec v = rtvec_alloc (n_elts); +- unsigned int i; ++ rtx_vector_builder builder; ++ if (!builder.new_unary_operation (mode, op, false)) ++ return 0; + +- for (i = 0; i < n_elts; i++) ++ unsigned int count = builder.encoded_nelts (); ++ for (unsigned int i = 0; i < count; i++) + { + rtx x = simplify_unary_operation (code, GET_MODE_INNER (mode), + CONST_VECTOR_ELT (op, i), +- GET_MODE_INNER (opmode)); ++ GET_MODE_INNER (op_mode)); + if (!x || !valid_for_const_vector_p (mode, x)) + return 0; +- RTVEC_ELT (v, i) = x; ++ builder.quick_push (x); + } +- return gen_rtx_CONST_VECTOR (mode, v); ++ return builder.build (); + } + + /* The order of these tests is critical so that, for example, we don't +@@ -2549,10 +2541,10 @@ simplify_binary_operation_1 (enum rtx_code code, machine_mode mode, + return plus_constant (mode, op0, trunc_int_for_mode (-offset, mode)); + + /* Don't let a relocatable value get a negative coeff. */ +- if (CONST_INT_P (op1) && GET_MODE (op0) != VOIDmode) ++ if (poly_int_rtx_p (op1) && GET_MODE (op0) != VOIDmode) + return simplify_gen_binary (PLUS, mode, + op0, +- neg_const_int (mode, op1)); ++ neg_poly_int_rtx (mode, op1)); + + /* (x - (x & y)) -> (x & ~y) */ + if (INTEGRAL_MODE_P (mode) && GET_CODE (op1) == AND) +@@ -4071,6 +4063,27 @@ simplify_binary_operation_1 (enum rtx_code code, machine_mode mode, + return 0; + } + ++/* Return true if binary operation OP distributes over addition in operand ++ OPNO, with the other operand being held constant. OPNO counts from 1. */ ++ ++static bool ++distributes_over_addition_p (rtx_code op, int opno) ++{ ++ switch (op) ++ { ++ case PLUS: ++ case MINUS: ++ case MULT: ++ return true; ++ ++ case ASHIFT: ++ return opno == 1; ++ ++ default: ++ return false; ++ } ++} ++ + rtx + simplify_const_binary_operation (enum rtx_code code, machine_mode mode, + rtx op0, rtx op1) +@@ -4080,26 +4093,45 @@ simplify_const_binary_operation (enum rtx_code code, machine_mode mode, + && GET_CODE (op0) == CONST_VECTOR + && GET_CODE (op1) == CONST_VECTOR) + { +- unsigned int n_elts; +- if (!CONST_VECTOR_NUNITS (op0).is_constant (&n_elts)) +- return NULL_RTX; +- +- gcc_assert (known_eq (n_elts, CONST_VECTOR_NUNITS (op1))); +- gcc_assert (known_eq (n_elts, GET_MODE_NUNITS (mode))); +- rtvec v = rtvec_alloc (n_elts); +- unsigned int i; ++ bool step_ok_p; ++ if (CONST_VECTOR_STEPPED_P (op0) ++ && CONST_VECTOR_STEPPED_P (op1)) ++ /* We can operate directly on the encoding if: ++ ++ a3 - a2 == a2 - a1 && b3 - b2 == b2 - b1 ++ implies ++ (a3 op b3) - (a2 op b2) == (a2 op b2) - (a1 op b1) ++ ++ Addition and subtraction are the supported operators ++ for which this is true. */ ++ step_ok_p = (code == PLUS || code == MINUS); ++ else if (CONST_VECTOR_STEPPED_P (op0)) ++ /* We can operate directly on stepped encodings if: ++ ++ a3 - a2 == a2 - a1 ++ implies: ++ (a3 op c) - (a2 op c) == (a2 op c) - (a1 op c) ++ ++ which is true if (x -> x op c) distributes over addition. */ ++ step_ok_p = distributes_over_addition_p (code, 1); ++ else ++ /* Similarly in reverse. */ ++ step_ok_p = distributes_over_addition_p (code, 2); ++ rtx_vector_builder builder; ++ if (!builder.new_binary_operation (mode, op0, op1, step_ok_p)) ++ return 0; + +- for (i = 0; i < n_elts; i++) ++ unsigned int count = builder.encoded_nelts (); ++ for (unsigned int i = 0; i < count; i++) + { + rtx x = simplify_binary_operation (code, GET_MODE_INNER (mode), + CONST_VECTOR_ELT (op0, i), + CONST_VECTOR_ELT (op1, i)); + if (!x || !valid_for_const_vector_p (mode, x)) + return 0; +- RTVEC_ELT (v, i) = x; ++ builder.quick_push (x); + } +- +- return gen_rtx_CONST_VECTOR (mode, v); ++ return builder.build (); + } + + if (VECTOR_MODE_P (mode) +@@ -4593,11 +4625,12 @@ simplify_plus_minus (enum rtx_code code, machine_mode mode, rtx op0, + } + break; + +- case CONST_INT: ++ CASE_CONST_SCALAR_INT: ++ case CONST_POLY_INT: + n_constants++; + if (this_neg) + { +- ops[i].op = neg_const_int (mode, this_op); ++ ops[i].op = neg_poly_int_rtx (mode, this_op); + ops[i].neg = 0; + changed = 1; + canonicalized = 1; +@@ -4722,8 +4755,8 @@ simplify_plus_minus (enum rtx_code code, machine_mode mode, rtx op0, + lneg &= rneg; + if (GET_CODE (tem) == NEG) + tem = XEXP (tem, 0), lneg = !lneg; +- if (CONST_INT_P (tem) && lneg) +- tem = neg_const_int (mode, tem), lneg = 0; ++ if (poly_int_rtx_p (tem) && lneg) ++ tem = neg_poly_int_rtx (mode, tem), lneg = 0; + + ops[i].op = tem; + ops[i].neg = lneg; +@@ -4782,12 +4815,12 @@ simplify_plus_minus (enum rtx_code code, machine_mode mode, rtx op0, + in the array and that any other constant will be next-to-last. */ + + if (n_ops > 1 +- && CONST_INT_P (ops[n_ops - 1].op) ++ && poly_int_rtx_p (ops[n_ops - 1].op) + && CONSTANT_P (ops[n_ops - 2].op)) + { + rtx value = ops[n_ops - 1].op; + if (ops[n_ops - 1].neg ^ ops[n_ops - 2].neg) +- value = neg_const_int (mode, value); ++ value = neg_poly_int_rtx (mode, value); + if (CONST_INT_P (value)) + { + ops[n_ops - 2].op = plus_constant (mode, ops[n_ops - 2].op, +@@ -6104,342 +6137,466 @@ simplify_ternary_operation (enum rtx_code code, machine_mode mode, + return 0; + } + +-/* Evaluate a SUBREG of a CONST_INT or CONST_WIDE_INT or CONST_DOUBLE +- or CONST_FIXED or CONST_VECTOR, returning another CONST_INT or +- CONST_WIDE_INT or CONST_DOUBLE or CONST_FIXED or CONST_VECTOR. ++/* Try to calculate NUM_BYTES bytes of the target memory image of X, ++ starting at byte FIRST_BYTE. Return true on success and add the ++ bytes to BYTES, such that each byte has BITS_PER_UNIT bits and such ++ that the bytes follow target memory order. Leave BYTES unmodified ++ on failure. + +- Works by unpacking INNER_BYTES bytes of OP into a collection of 8-bit values +- represented as a little-endian array of 'unsigned char', selecting by BYTE, +- and then repacking them again for OUTERMODE. If OP is a CONST_VECTOR, +- FIRST_ELEM is the number of the first element to extract, otherwise +- FIRST_ELEM is ignored. */ ++ MODE is the mode of X. The caller must reserve NUM_BYTES bytes in ++ BYTES before calling this function. */ + +-static rtx +-simplify_immed_subreg (fixed_size_mode outermode, rtx op, +- machine_mode innermode, unsigned int byte, +- unsigned int first_elem, unsigned int inner_bytes) ++bool ++native_encode_rtx (machine_mode mode, rtx x, vec &bytes, ++ unsigned int first_byte, unsigned int num_bytes) + { +- enum { +- value_bit = 8, +- value_mask = (1 << value_bit) - 1 +- }; +- unsigned char value[MAX_BITSIZE_MODE_ANY_MODE / value_bit]; +- int value_start; +- int i; +- int elem; +- +- int num_elem; +- rtx * elems; +- int elem_bitsize; +- rtx result_s = NULL; +- rtvec result_v = NULL; +- enum mode_class outer_class; +- scalar_mode outer_submode; +- int max_bitsize; ++ /* Check the mode is sensible. */ ++ gcc_assert (GET_MODE (x) == VOIDmode ++ ? is_a (mode) ++ : mode == GET_MODE (x)); + +- /* Some ports misuse CCmode. */ +- if (GET_MODE_CLASS (outermode) == MODE_CC && CONST_INT_P (op)) +- return op; ++ if (GET_CODE (x) == CONST_VECTOR) ++ { ++ /* CONST_VECTOR_ELT follows target memory order, so no shuffling ++ is necessary. The only complication is that MODE_VECTOR_BOOL ++ vectors can have several elements per byte. */ ++ unsigned int elt_bits = vector_element_size (GET_MODE_BITSIZE (mode), ++ GET_MODE_NUNITS (mode)); ++ unsigned int elt = first_byte * BITS_PER_UNIT / elt_bits; ++ if (elt_bits < BITS_PER_UNIT) ++ { ++ /* This is the only case in which elements can be smaller than ++ a byte. */ ++ gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL); ++ for (unsigned int i = 0; i < num_bytes; ++i) ++ { ++ target_unit value = 0; ++ for (unsigned int j = 0; j < BITS_PER_UNIT; j += elt_bits) ++ { ++ value |= (INTVAL (CONST_VECTOR_ELT (x, elt)) & 1) << j; ++ elt += 1; ++ } ++ bytes.quick_push (value); ++ } ++ return true; ++ } + +- /* We have no way to represent a complex constant at the rtl level. */ +- if (COMPLEX_MODE_P (outermode)) +- return NULL_RTX; ++ unsigned int start = bytes.length (); ++ unsigned int elt_bytes = GET_MODE_UNIT_SIZE (mode); ++ /* Make FIRST_BYTE relative to ELT. */ ++ first_byte %= elt_bytes; ++ while (num_bytes > 0) ++ { ++ /* Work out how many bytes we want from element ELT. */ ++ unsigned int chunk_bytes = MIN (num_bytes, elt_bytes - first_byte); ++ if (!native_encode_rtx (GET_MODE_INNER (mode), ++ CONST_VECTOR_ELT (x, elt), bytes, ++ first_byte, chunk_bytes)) ++ { ++ bytes.truncate (start); ++ return false; ++ } ++ elt += 1; ++ first_byte = 0; ++ num_bytes -= chunk_bytes; ++ } ++ return true; ++ } + +- /* We support any size mode. */ +- max_bitsize = MAX (GET_MODE_BITSIZE (outermode), +- inner_bytes * BITS_PER_UNIT); ++ /* All subsequent cases are limited to scalars. */ ++ scalar_mode smode; ++ if (!is_a (mode, &smode)) ++ return false; + +- /* Unpack the value. */ ++ /* Make sure that the region is in range. */ ++ unsigned int end_byte = first_byte + num_bytes; ++ unsigned int mode_bytes = GET_MODE_SIZE (smode); ++ gcc_assert (end_byte <= mode_bytes); + +- if (GET_CODE (op) == CONST_VECTOR) ++ if (CONST_SCALAR_INT_P (x)) + { +- num_elem = CEIL (inner_bytes, GET_MODE_UNIT_SIZE (innermode)); +- elem_bitsize = GET_MODE_UNIT_BITSIZE (innermode); ++ /* The target memory layout is affected by both BYTES_BIG_ENDIAN ++ and WORDS_BIG_ENDIAN. Use the subreg machinery to get the lsb ++ position of each byte. */ ++ rtx_mode_t value (x, smode); ++ wide_int_ref value_wi (value); ++ for (unsigned int byte = first_byte; byte < end_byte; ++byte) ++ { ++ /* Always constant because the inputs are. */ ++ unsigned int lsb ++ = subreg_size_lsb (1, mode_bytes, byte).to_constant (); ++ /* Operate directly on the encoding rather than using ++ wi::extract_uhwi, so that we preserve the sign or zero ++ extension for modes that are not a whole number of bits in ++ size. (Zero extension is only used for the combination of ++ innermode == BImode && STORE_FLAG_VALUE == 1). */ ++ unsigned int elt = lsb / HOST_BITS_PER_WIDE_INT; ++ unsigned int shift = lsb % HOST_BITS_PER_WIDE_INT; ++ unsigned HOST_WIDE_INT uhwi = value_wi.elt (elt); ++ bytes.quick_push (uhwi >> shift); ++ } ++ return true; + } +- else ++ ++ if (CONST_DOUBLE_P (x)) + { +- num_elem = 1; +- elem_bitsize = max_bitsize; ++ /* real_to_target produces an array of integers in target memory order. ++ All integers before the last one have 32 bits; the last one may ++ have 32 bits or fewer, depending on whether the mode bitsize ++ is divisible by 32. Each of these integers is then laid out ++ in target memory as any other integer would be. */ ++ long el32[MAX_BITSIZE_MODE_ANY_MODE / 32]; ++ real_to_target (el32, CONST_DOUBLE_REAL_VALUE (x), smode); ++ ++ /* The (maximum) number of target bytes per element of el32. */ ++ unsigned int bytes_per_el32 = 32 / BITS_PER_UNIT; ++ gcc_assert (bytes_per_el32 != 0); ++ ++ /* Build up the integers in a similar way to the CONST_SCALAR_INT_P ++ handling above. */ ++ for (unsigned int byte = first_byte; byte < end_byte; ++byte) ++ { ++ unsigned int index = byte / bytes_per_el32; ++ unsigned int subbyte = byte % bytes_per_el32; ++ unsigned int int_bytes = MIN (bytes_per_el32, ++ mode_bytes - index * bytes_per_el32); ++ /* Always constant because the inputs are. */ ++ unsigned int lsb ++ = subreg_size_lsb (1, int_bytes, subbyte).to_constant (); ++ bytes.quick_push ((unsigned long) el32[index] >> lsb); ++ } ++ return true; + } +- /* If this asserts, it is too complicated; reducing value_bit may help. */ +- gcc_assert (BITS_PER_UNIT % value_bit == 0); +- /* I don't know how to handle endianness of sub-units. */ +- gcc_assert (elem_bitsize % BITS_PER_UNIT == 0); + +- for (elem = 0; elem < num_elem; elem++) ++ if (GET_CODE (x) == CONST_FIXED) + { +- unsigned char * vp; +- rtx el = (GET_CODE (op) == CONST_VECTOR +- ? CONST_VECTOR_ELT (op, first_elem + elem) +- : op); ++ for (unsigned int byte = first_byte; byte < end_byte; ++byte) ++ { ++ /* Always constant because the inputs are. */ ++ unsigned int lsb ++ = subreg_size_lsb (1, mode_bytes, byte).to_constant (); ++ unsigned HOST_WIDE_INT piece = CONST_FIXED_VALUE_LOW (x); ++ if (lsb >= HOST_BITS_PER_WIDE_INT) ++ { ++ lsb -= HOST_BITS_PER_WIDE_INT; ++ piece = CONST_FIXED_VALUE_HIGH (x); ++ } ++ bytes.quick_push (piece >> lsb); ++ } ++ return true; ++ } + +- /* Vectors are kept in target memory order. (This is probably +- a mistake.) */ +- { +- unsigned byte = (elem * elem_bitsize) / BITS_PER_UNIT; +- unsigned ibyte = (((num_elem - 1 - elem) * elem_bitsize) +- / BITS_PER_UNIT); +- unsigned word_byte = WORDS_BIG_ENDIAN ? ibyte : byte; +- unsigned subword_byte = BYTES_BIG_ENDIAN ? ibyte : byte; +- unsigned bytele = (subword_byte % UNITS_PER_WORD +- + (word_byte / UNITS_PER_WORD) * UNITS_PER_WORD); +- vp = value + (bytele * BITS_PER_UNIT) / value_bit; +- } ++ return false; ++} + +- switch (GET_CODE (el)) +- { +- case CONST_INT: +- for (i = 0; +- i < HOST_BITS_PER_WIDE_INT && i < elem_bitsize; +- i += value_bit) +- *vp++ = INTVAL (el) >> i; +- /* CONST_INTs are always logically sign-extended. */ +- for (; i < elem_bitsize; i += value_bit) +- *vp++ = INTVAL (el) < 0 ? -1 : 0; +- break; ++/* Read a vector of mode MODE from the target memory image given by BYTES, ++ starting at byte FIRST_BYTE. The vector is known to be encodable using ++ NPATTERNS interleaved patterns with NELTS_PER_PATTERN elements each, ++ and BYTES is known to have enough bytes to supply NPATTERNS * ++ NELTS_PER_PATTERN vector elements. Each element of BYTES contains ++ BITS_PER_UNIT bits and the bytes are in target memory order. + +- case CONST_WIDE_INT: +- { +- rtx_mode_t val = rtx_mode_t (el, GET_MODE_INNER (innermode)); +- unsigned char extend = wi::sign_mask (val); +- int prec = wi::get_precision (val); +- +- for (i = 0; i < prec && i < elem_bitsize; i += value_bit) +- *vp++ = wi::extract_uhwi (val, i, value_bit); +- for (; i < elem_bitsize; i += value_bit) +- *vp++ = extend; +- } +- break; ++ Return the vector on success, otherwise return NULL_RTX. */ + +- case CONST_DOUBLE: +- if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (el) == VOIDmode) +- { +- unsigned char extend = 0; +- /* If this triggers, someone should have generated a +- CONST_INT instead. */ +- gcc_assert (elem_bitsize > HOST_BITS_PER_WIDE_INT); +- +- for (i = 0; i < HOST_BITS_PER_WIDE_INT; i += value_bit) +- *vp++ = CONST_DOUBLE_LOW (el) >> i; +- while (i < HOST_BITS_PER_DOUBLE_INT && i < elem_bitsize) +- { +- *vp++ +- = CONST_DOUBLE_HIGH (el) >> (i - HOST_BITS_PER_WIDE_INT); +- i += value_bit; +- } ++rtx ++native_decode_vector_rtx (machine_mode mode, vec bytes, ++ unsigned int first_byte, unsigned int npatterns, ++ unsigned int nelts_per_pattern) ++{ ++ rtx_vector_builder builder (mode, npatterns, nelts_per_pattern); + +- if (CONST_DOUBLE_HIGH (el) >> (HOST_BITS_PER_WIDE_INT - 1)) +- extend = -1; +- for (; i < elem_bitsize; i += value_bit) +- *vp++ = extend; +- } +- else +- { +- /* This is big enough for anything on the platform. */ +- long tmp[MAX_BITSIZE_MODE_ANY_MODE / 32]; +- scalar_float_mode el_mode; ++ unsigned int elt_bits = vector_element_size (GET_MODE_BITSIZE (mode), ++ GET_MODE_NUNITS (mode)); ++ if (elt_bits < BITS_PER_UNIT) ++ { ++ /* This is the only case in which elements can be smaller than a byte. ++ Element 0 is always in the lsb of the containing byte. */ ++ gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL); ++ for (unsigned int i = 0; i < builder.encoded_nelts (); ++i) ++ { ++ unsigned int bit_index = first_byte * BITS_PER_UNIT + i * elt_bits; ++ unsigned int byte_index = bit_index / BITS_PER_UNIT; ++ unsigned int lsb = bit_index % BITS_PER_UNIT; ++ builder.quick_push (bytes[byte_index] & (1 << lsb) ++ ? CONST1_RTX (BImode) ++ : CONST0_RTX (BImode)); ++ } ++ } ++ else ++ { ++ for (unsigned int i = 0; i < builder.encoded_nelts (); ++i) ++ { ++ rtx x = native_decode_rtx (GET_MODE_INNER (mode), bytes, first_byte); ++ if (!x) ++ return NULL_RTX; ++ builder.quick_push (x); ++ first_byte += elt_bits / BITS_PER_UNIT; ++ } ++ } ++ return builder.build (); ++} + +- el_mode = as_a (GET_MODE (el)); +- int bitsize = GET_MODE_BITSIZE (el_mode); ++/* Read an rtx of mode MODE from the target memory image given by BYTES, ++ starting at byte FIRST_BYTE. Each element of BYTES contains BITS_PER_UNIT ++ bits and the bytes are in target memory order. The image has enough ++ values to specify all bytes of MODE. + +- gcc_assert (bitsize <= elem_bitsize); +- gcc_assert (bitsize % value_bit == 0); ++ Return the rtx on success, otherwise return NULL_RTX. */ + +- real_to_target (tmp, CONST_DOUBLE_REAL_VALUE (el), +- GET_MODE (el)); ++rtx ++native_decode_rtx (machine_mode mode, vec bytes, ++ unsigned int first_byte) ++{ ++ if (VECTOR_MODE_P (mode)) ++ { ++ /* If we know at compile time how many elements there are, ++ pull each element directly from BYTES. */ ++ unsigned int nelts; ++ if (GET_MODE_NUNITS (mode).is_constant (&nelts)) ++ return native_decode_vector_rtx (mode, bytes, first_byte, nelts, 1); ++ return NULL_RTX; ++ } + +- /* real_to_target produces its result in words affected by +- FLOAT_WORDS_BIG_ENDIAN. However, we ignore this, +- and use WORDS_BIG_ENDIAN instead; see the documentation +- of SUBREG in rtl.texi. */ +- for (i = 0; i < bitsize; i += value_bit) +- { +- int ibase; +- if (WORDS_BIG_ENDIAN) +- ibase = bitsize - 1 - i; +- else +- ibase = i; +- *vp++ = tmp[ibase / 32] >> i % 32; +- } ++ scalar_int_mode imode; ++ if (is_a (mode, &imode) ++ && GET_MODE_PRECISION (imode) <= MAX_BITSIZE_MODE_ANY_INT) ++ { ++ /* Pull the bytes msb first, so that we can use simple ++ shift-and-insert wide_int operations. */ ++ unsigned int size = GET_MODE_SIZE (imode); ++ wide_int result (wi::zero (GET_MODE_PRECISION (imode))); ++ for (unsigned int i = 0; i < size; ++i) ++ { ++ unsigned int lsb = (size - i - 1) * BITS_PER_UNIT; ++ /* Always constant because the inputs are. */ ++ unsigned int subbyte ++ = subreg_size_offset_from_lsb (1, size, lsb).to_constant (); ++ result <<= BITS_PER_UNIT; ++ result |= bytes[first_byte + subbyte]; ++ } ++ return immed_wide_int_const (result, imode); ++ } + +- /* It shouldn't matter what's done here, so fill it with +- zero. */ +- for (; i < elem_bitsize; i += value_bit) +- *vp++ = 0; +- } +- break; ++ scalar_float_mode fmode; ++ if (is_a (mode, &fmode)) ++ { ++ /* We need to build an array of integers in target memory order. ++ All integers before the last one have 32 bits; the last one may ++ have 32 bits or fewer, depending on whether the mode bitsize ++ is divisible by 32. */ ++ long el32[MAX_BITSIZE_MODE_ANY_MODE / 32]; ++ unsigned int num_el32 = CEIL (GET_MODE_BITSIZE (fmode), 32); ++ memset (el32, 0, num_el32 * sizeof (long)); ++ ++ /* The (maximum) number of target bytes per element of el32. */ ++ unsigned int bytes_per_el32 = 32 / BITS_PER_UNIT; ++ gcc_assert (bytes_per_el32 != 0); ++ ++ unsigned int mode_bytes = GET_MODE_SIZE (fmode); ++ for (unsigned int byte = 0; byte < mode_bytes; ++byte) ++ { ++ unsigned int index = byte / bytes_per_el32; ++ unsigned int subbyte = byte % bytes_per_el32; ++ unsigned int int_bytes = MIN (bytes_per_el32, ++ mode_bytes - index * bytes_per_el32); ++ /* Always constant because the inputs are. */ ++ unsigned int lsb ++ = subreg_size_lsb (1, int_bytes, subbyte).to_constant (); ++ el32[index] |= (unsigned long) bytes[first_byte + byte] << lsb; ++ } ++ REAL_VALUE_TYPE r; ++ real_from_target (&r, el32, fmode); ++ return const_double_from_real_value (r, fmode); ++ } + +- case CONST_FIXED: +- if (elem_bitsize <= HOST_BITS_PER_WIDE_INT) +- { +- for (i = 0; i < elem_bitsize; i += value_bit) +- *vp++ = CONST_FIXED_VALUE_LOW (el) >> i; +- } ++ if (ALL_SCALAR_FIXED_POINT_MODE_P (mode)) ++ { ++ scalar_mode smode = as_a (mode); ++ FIXED_VALUE_TYPE f; ++ f.data.low = 0; ++ f.data.high = 0; ++ f.mode = smode; ++ ++ unsigned int mode_bytes = GET_MODE_SIZE (smode); ++ for (unsigned int byte = 0; byte < mode_bytes; ++byte) ++ { ++ /* Always constant because the inputs are. */ ++ unsigned int lsb ++ = subreg_size_lsb (1, mode_bytes, byte).to_constant (); ++ unsigned HOST_WIDE_INT unit = bytes[first_byte + byte]; ++ if (lsb >= HOST_BITS_PER_WIDE_INT) ++ f.data.high |= unit << (lsb - HOST_BITS_PER_WIDE_INT); + else +- { +- for (i = 0; i < HOST_BITS_PER_WIDE_INT; i += value_bit) +- *vp++ = CONST_FIXED_VALUE_LOW (el) >> i; +- for (; i < HOST_BITS_PER_DOUBLE_INT && i < elem_bitsize; +- i += value_bit) +- *vp++ = CONST_FIXED_VALUE_HIGH (el) +- >> (i - HOST_BITS_PER_WIDE_INT); +- for (; i < elem_bitsize; i += value_bit) +- *vp++ = 0; +- } +- break; +- +- default: +- gcc_unreachable (); ++ f.data.low |= unit << lsb; + } ++ return CONST_FIXED_FROM_FIXED_VALUE (f, mode); + } + +- /* Now, pick the right byte to start with. */ +- /* Renumber BYTE so that the least-significant byte is byte 0. A special +- case is paradoxical SUBREGs, which shouldn't be adjusted since they +- will already have offset 0. */ +- if (inner_bytes >= GET_MODE_SIZE (outermode)) ++ return NULL_RTX; ++} ++ ++/* Simplify a byte offset BYTE into CONST_VECTOR X. The main purpose ++ is to convert a runtime BYTE value into a constant one. */ ++ ++static poly_uint64 ++simplify_const_vector_byte_offset (rtx x, poly_uint64 byte) ++{ ++ /* Cope with MODE_VECTOR_BOOL by operating on bits rather than bytes. */ ++ machine_mode mode = GET_MODE (x); ++ unsigned int elt_bits = vector_element_size (GET_MODE_BITSIZE (mode), ++ GET_MODE_NUNITS (mode)); ++ /* The number of bits needed to encode one element from each pattern. */ ++ unsigned int sequence_bits = CONST_VECTOR_NPATTERNS (x) * elt_bits; ++ ++ /* Identify the start point in terms of a sequence number and a byte offset ++ within that sequence. */ ++ poly_uint64 first_sequence; ++ unsigned HOST_WIDE_INT subbit; ++ if (can_div_trunc_p (byte * BITS_PER_UNIT, sequence_bits, ++ &first_sequence, &subbit)) + { +- unsigned ibyte = inner_bytes - GET_MODE_SIZE (outermode) - byte; +- unsigned word_byte = WORDS_BIG_ENDIAN ? ibyte : byte; +- unsigned subword_byte = BYTES_BIG_ENDIAN ? ibyte : byte; +- byte = (subword_byte % UNITS_PER_WORD +- + (word_byte / UNITS_PER_WORD) * UNITS_PER_WORD); ++ unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x); ++ if (nelts_per_pattern == 1) ++ /* This is a duplicated vector, so the value of FIRST_SEQUENCE ++ doesn't matter. */ ++ byte = subbit / BITS_PER_UNIT; ++ else if (nelts_per_pattern == 2 && known_gt (first_sequence, 0U)) ++ { ++ /* The subreg drops the first element from each pattern and ++ only uses the second element. Find the first sequence ++ that starts on a byte boundary. */ ++ subbit += least_common_multiple (sequence_bits, BITS_PER_UNIT); ++ byte = subbit / BITS_PER_UNIT; ++ } + } ++ return byte; ++} ++ ++/* Subroutine of simplify_subreg in which: + +- /* BYTE should still be inside OP. (Note that BYTE is unsigned, +- so if it's become negative it will instead be very large.) */ +- gcc_assert (byte < inner_bytes); ++ - X is known to be a CONST_VECTOR ++ - OUTERMODE is known to be a vector mode + +- /* Convert from bytes to chunks of size value_bit. */ +- value_start = byte * (BITS_PER_UNIT / value_bit); ++ Try to handle the subreg by operating on the CONST_VECTOR encoding ++ rather than on each individual element of the CONST_VECTOR. + +- /* Re-pack the value. */ +- num_elem = GET_MODE_NUNITS (outermode); ++ Return the simplified subreg on success, otherwise return NULL_RTX. */ + +- if (VECTOR_MODE_P (outermode)) ++static rtx ++simplify_const_vector_subreg (machine_mode outermode, rtx x, ++ machine_mode innermode, unsigned int first_byte) ++{ ++ /* Paradoxical subregs of vectors have dubious semantics. */ ++ if (paradoxical_subreg_p (outermode, innermode)) ++ return NULL_RTX; ++ ++ /* We can only preserve the semantics of a stepped pattern if the new ++ vector element is the same as the original one. */ ++ if (CONST_VECTOR_STEPPED_P (x) ++ && GET_MODE_INNER (outermode) != GET_MODE_INNER (innermode)) ++ return NULL_RTX; ++ ++ /* Cope with MODE_VECTOR_BOOL by operating on bits rather than bytes. */ ++ unsigned int x_elt_bits ++ = vector_element_size (GET_MODE_BITSIZE (innermode), ++ GET_MODE_NUNITS (innermode)); ++ unsigned int out_elt_bits ++ = vector_element_size (GET_MODE_BITSIZE (outermode), ++ GET_MODE_NUNITS (outermode)); ++ ++ /* The number of bits needed to encode one element from every pattern ++ of the original vector. */ ++ unsigned int x_sequence_bits = CONST_VECTOR_NPATTERNS (x) * x_elt_bits; ++ ++ /* The number of bits needed to encode one element from every pattern ++ of the result. */ ++ unsigned int out_sequence_bits ++ = least_common_multiple (x_sequence_bits, out_elt_bits); ++ ++ /* Work out the number of interleaved patterns in the output vector ++ and the number of encoded elements per pattern. */ ++ unsigned int out_npatterns = out_sequence_bits / out_elt_bits; ++ unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x); ++ ++ /* The encoding scheme requires the number of elements to be a multiple ++ of the number of patterns, so that each pattern appears at least once ++ and so that the same number of elements appear from each pattern. */ ++ bool ok_p = multiple_p (GET_MODE_NUNITS (outermode), out_npatterns); ++ unsigned int const_nunits; ++ if (GET_MODE_NUNITS (outermode).is_constant (&const_nunits) ++ && (!ok_p || out_npatterns * nelts_per_pattern > const_nunits)) + { +- result_v = rtvec_alloc (num_elem); +- elems = &RTVEC_ELT (result_v, 0); ++ /* Either the encoding is invalid, or applying it would give us ++ more elements than we need. Just encode each element directly. */ ++ out_npatterns = const_nunits; ++ nelts_per_pattern = 1; + } +- else +- elems = &result_s; ++ else if (!ok_p) ++ return NULL_RTX; + +- outer_submode = GET_MODE_INNER (outermode); +- outer_class = GET_MODE_CLASS (outer_submode); +- elem_bitsize = GET_MODE_BITSIZE (outer_submode); ++ /* Get enough bytes of X to form the new encoding. */ ++ unsigned int buffer_bits = out_npatterns * nelts_per_pattern * out_elt_bits; ++ unsigned int buffer_bytes = CEIL (buffer_bits, BITS_PER_UNIT); ++ auto_vec buffer (buffer_bytes); ++ if (!native_encode_rtx (innermode, x, buffer, first_byte, buffer_bytes)) ++ return NULL_RTX; + +- gcc_assert (elem_bitsize % value_bit == 0); +- gcc_assert (elem_bitsize + value_start * value_bit <= max_bitsize); ++ /* Reencode the bytes as OUTERMODE. */ ++ return native_decode_vector_rtx (outermode, buffer, 0, out_npatterns, ++ nelts_per_pattern); ++} + +- for (elem = 0; elem < num_elem; elem++) +- { +- unsigned char *vp; ++/* Try to simplify a subreg of a constant by encoding the subreg region ++ as a sequence of target bytes and reading them back in the new mode. ++ Return the new value on success, otherwise return null. + +- /* Vectors are stored in target memory order. (This is probably +- a mistake.) */ +- { +- unsigned byte = (elem * elem_bitsize) / BITS_PER_UNIT; +- unsigned ibyte = (((num_elem - 1 - elem) * elem_bitsize) +- / BITS_PER_UNIT); +- unsigned word_byte = WORDS_BIG_ENDIAN ? ibyte : byte; +- unsigned subword_byte = BYTES_BIG_ENDIAN ? ibyte : byte; +- unsigned bytele = (subword_byte % UNITS_PER_WORD +- + (word_byte / UNITS_PER_WORD) * UNITS_PER_WORD); +- vp = value + value_start + (bytele * BITS_PER_UNIT) / value_bit; +- } ++ The subreg has outer mode OUTERMODE, inner mode INNERMODE, inner value X ++ and byte offset FIRST_BYTE. */ + +- switch (outer_class) +- { +- case MODE_INT: +- case MODE_PARTIAL_INT: +- { +- int u; +- int base = 0; +- int units +- = (GET_MODE_BITSIZE (outer_submode) + HOST_BITS_PER_WIDE_INT - 1) +- / HOST_BITS_PER_WIDE_INT; +- HOST_WIDE_INT tmp[MAX_BITSIZE_MODE_ANY_INT / HOST_BITS_PER_WIDE_INT]; +- wide_int r; +- +- if (GET_MODE_PRECISION (outer_submode) > MAX_BITSIZE_MODE_ANY_INT) +- return NULL_RTX; +- for (u = 0; u < units; u++) +- { +- unsigned HOST_WIDE_INT buf = 0; +- for (i = 0; +- i < HOST_BITS_PER_WIDE_INT && base + i < elem_bitsize; +- i += value_bit) +- buf |= (unsigned HOST_WIDE_INT)(*vp++ & value_mask) << i; +- +- tmp[u] = buf; +- base += HOST_BITS_PER_WIDE_INT; +- } +- r = wide_int::from_array (tmp, units, +- GET_MODE_PRECISION (outer_submode)); +-#if TARGET_SUPPORTS_WIDE_INT == 0 +- /* Make sure r will fit into CONST_INT or CONST_DOUBLE. */ +- if (wi::min_precision (r, SIGNED) > HOST_BITS_PER_DOUBLE_INT) +- return NULL_RTX; +-#endif +- elems[elem] = immed_wide_int_const (r, outer_submode); +- } +- break; ++static rtx ++simplify_immed_subreg (fixed_size_mode outermode, rtx x, ++ machine_mode innermode, unsigned int first_byte) ++{ ++ unsigned int buffer_bytes = GET_MODE_SIZE (outermode); ++ auto_vec buffer (buffer_bytes); + +- case MODE_FLOAT: +- case MODE_DECIMAL_FLOAT: +- { +- REAL_VALUE_TYPE r; +- long tmp[MAX_BITSIZE_MODE_ANY_MODE / 32] = { 0 }; +- +- /* real_from_target wants its input in words affected by +- FLOAT_WORDS_BIG_ENDIAN. However, we ignore this, +- and use WORDS_BIG_ENDIAN instead; see the documentation +- of SUBREG in rtl.texi. */ +- for (i = 0; i < elem_bitsize; i += value_bit) +- { +- int ibase; +- if (WORDS_BIG_ENDIAN) +- ibase = elem_bitsize - 1 - i; +- else +- ibase = i; +- tmp[ibase / 32] |= (*vp++ & value_mask) << i % 32; +- } ++ /* Some ports misuse CCmode. */ ++ if (GET_MODE_CLASS (outermode) == MODE_CC && CONST_INT_P (x)) ++ return x; + +- real_from_target (&r, tmp, outer_submode); +- elems[elem] = const_double_from_real_value (r, outer_submode); +- } +- break; ++ /* Paradoxical subregs read undefined values for bytes outside of the ++ inner value. However, we have traditionally always sign-extended ++ integer constants and zero-extended others. */ ++ unsigned int inner_bytes = buffer_bytes; ++ if (paradoxical_subreg_p (outermode, innermode)) ++ { ++ if (!GET_MODE_SIZE (innermode).is_constant (&inner_bytes)) ++ return NULL_RTX; + +- case MODE_FRACT: +- case MODE_UFRACT: +- case MODE_ACCUM: +- case MODE_UACCUM: +- { +- FIXED_VALUE_TYPE f; +- f.data.low = 0; +- f.data.high = 0; +- f.mode = outer_submode; +- +- for (i = 0; +- i < HOST_BITS_PER_WIDE_INT && i < elem_bitsize; +- i += value_bit) +- f.data.low |= (unsigned HOST_WIDE_INT)(*vp++ & value_mask) << i; +- for (; i < elem_bitsize; i += value_bit) +- f.data.high |= ((unsigned HOST_WIDE_INT)(*vp++ & value_mask) +- << (i - HOST_BITS_PER_WIDE_INT)); +- +- elems[elem] = CONST_FIXED_FROM_FIXED_VALUE (f, outer_submode); +- } +- break; ++ target_unit filler = 0; ++ if (CONST_SCALAR_INT_P (x) && wi::neg_p (rtx_mode_t (x, innermode))) ++ filler = -1; + +- default: +- gcc_unreachable (); +- } ++ /* Add any leading bytes due to big-endian layout. The number of ++ bytes must be constant because both modes have constant size. */ ++ unsigned int leading_bytes ++ = -byte_lowpart_offset (outermode, innermode).to_constant (); ++ for (unsigned int i = 0; i < leading_bytes; ++i) ++ buffer.quick_push (filler); ++ ++ if (!native_encode_rtx (innermode, x, buffer, first_byte, inner_bytes)) ++ return NULL_RTX; ++ ++ /* Add any trailing bytes due to little-endian layout. */ ++ while (buffer.length () < buffer_bytes) ++ buffer.quick_push (filler); + } +- if (VECTOR_MODE_P (outermode)) +- return gen_rtx_CONST_VECTOR (outermode, result_v); + else +- return result_s; ++ { ++ if (!native_encode_rtx (innermode, x, buffer, first_byte, inner_bytes)) ++ return NULL_RTX; ++ } ++ return native_decode_rtx (outermode, buffer, 0); + } + + /* Simplify SUBREG:OUTERMODE(OP:INNERMODE, BYTE) +@@ -6468,6 +6625,9 @@ simplify_subreg (machine_mode outermode, rtx op, + if (outermode == innermode && known_eq (byte, 0U)) + return op; + ++ if (GET_CODE (op) == CONST_VECTOR) ++ byte = simplify_const_vector_byte_offset (op, byte); ++ + if (multiple_p (byte, GET_MODE_UNIT_SIZE (innermode))) + { + rtx elt; +@@ -6487,30 +6647,21 @@ simplify_subreg (machine_mode outermode, rtx op, + || CONST_FIXED_P (op) + || GET_CODE (op) == CONST_VECTOR) + { +- /* simplify_immed_subreg deconstructs OP into bytes and constructs +- the result from bytes, so it only works if the sizes of the modes +- and the value of the offset are known at compile time. Cases that +- that apply to general modes and offsets should be handled here +- before calling simplify_immed_subreg. */ +- fixed_size_mode fs_outermode, fs_innermode; + unsigned HOST_WIDE_INT cbyte; +- if (is_a (outermode, &fs_outermode) +- && is_a (innermode, &fs_innermode) +- && byte.is_constant (&cbyte)) +- return simplify_immed_subreg (fs_outermode, op, fs_innermode, cbyte, +- 0, GET_MODE_SIZE (fs_innermode)); +- +- /* Handle constant-sized outer modes and variable-sized inner modes. */ +- unsigned HOST_WIDE_INT first_elem; +- if (GET_CODE (op) == CONST_VECTOR +- && is_a (outermode, &fs_outermode) +- && constant_multiple_p (byte, GET_MODE_UNIT_SIZE (innermode), +- &first_elem)) +- return simplify_immed_subreg (fs_outermode, op, innermode, 0, +- first_elem, +- GET_MODE_SIZE (fs_outermode)); ++ if (byte.is_constant (&cbyte)) ++ { ++ if (GET_CODE (op) == CONST_VECTOR && VECTOR_MODE_P (outermode)) ++ { ++ rtx tmp = simplify_const_vector_subreg (outermode, op, ++ innermode, cbyte); ++ if (tmp) ++ return tmp; ++ } + +- return NULL_RTX; ++ fixed_size_mode fs_outermode; ++ if (is_a (outermode, &fs_outermode)) ++ return simplify_immed_subreg (fs_outermode, op, innermode, cbyte); ++ } + } + + /* Changing mode twice with SUBREG => just change it once, +@@ -6952,6 +7103,18 @@ test_vector_ops_duplicate (machine_mode mode, rtx scalar_reg) + && mode_for_vector (inner_mode, 2).exists (&narrower_mode) + && VECTOR_MODE_P (narrower_mode)) + { ++ /* Test VEC_DUPLICATE of a vector. */ ++ rtx_vector_builder nbuilder (narrower_mode, 2, 1); ++ nbuilder.quick_push (const0_rtx); ++ nbuilder.quick_push (const1_rtx); ++ rtx_vector_builder builder (mode, 2, 1); ++ builder.quick_push (const0_rtx); ++ builder.quick_push (const1_rtx); ++ ASSERT_RTX_EQ (builder.build (), ++ simplify_unary_operation (VEC_DUPLICATE, mode, ++ nbuilder.build (), ++ narrower_mode)); ++ + /* Test VEC_SELECT of a vector. */ + rtx vec_par + = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, const1_rtx, const0_rtx)); +@@ -7024,6 +7187,58 @@ test_vector_ops_series (machine_mode mode, rtx scalar_reg) + ASSERT_RTX_EQ (series_0_m1, + simplify_binary_operation (VEC_SERIES, mode, const0_rtx, + constm1_rtx)); ++ ++ /* Test NEG on constant vector series. */ ++ ASSERT_RTX_EQ (series_0_m1, ++ simplify_unary_operation (NEG, mode, series_0_1, mode)); ++ ASSERT_RTX_EQ (series_0_1, ++ simplify_unary_operation (NEG, mode, series_0_m1, mode)); ++ ++ /* Test PLUS and MINUS on constant vector series. */ ++ rtx scalar2 = gen_int_mode (2, inner_mode); ++ rtx scalar3 = gen_int_mode (3, inner_mode); ++ rtx series_1_1 = gen_const_vec_series (mode, const1_rtx, const1_rtx); ++ rtx series_0_2 = gen_const_vec_series (mode, const0_rtx, scalar2); ++ rtx series_1_3 = gen_const_vec_series (mode, const1_rtx, scalar3); ++ ASSERT_RTX_EQ (series_1_1, ++ simplify_binary_operation (PLUS, mode, series_0_1, ++ CONST1_RTX (mode))); ++ ASSERT_RTX_EQ (series_0_m1, ++ simplify_binary_operation (PLUS, mode, CONST0_RTX (mode), ++ series_0_m1)); ++ ASSERT_RTX_EQ (series_1_3, ++ simplify_binary_operation (PLUS, mode, series_1_1, ++ series_0_2)); ++ ASSERT_RTX_EQ (series_0_1, ++ simplify_binary_operation (MINUS, mode, series_1_1, ++ CONST1_RTX (mode))); ++ ASSERT_RTX_EQ (series_1_1, ++ simplify_binary_operation (MINUS, mode, CONST1_RTX (mode), ++ series_0_m1)); ++ ASSERT_RTX_EQ (series_1_1, ++ simplify_binary_operation (MINUS, mode, series_1_3, ++ series_0_2)); ++ ++ /* Test MULT between constant vectors. */ ++ rtx vec2 = gen_const_vec_duplicate (mode, scalar2); ++ rtx vec3 = gen_const_vec_duplicate (mode, scalar3); ++ rtx scalar9 = gen_int_mode (9, inner_mode); ++ rtx series_3_9 = gen_const_vec_series (mode, scalar3, scalar9); ++ ASSERT_RTX_EQ (series_0_2, ++ simplify_binary_operation (MULT, mode, series_0_1, vec2)); ++ ASSERT_RTX_EQ (series_3_9, ++ simplify_binary_operation (MULT, mode, vec3, series_1_3)); ++ if (!GET_MODE_NUNITS (mode).is_constant ()) ++ ASSERT_FALSE (simplify_binary_operation (MULT, mode, series_0_1, ++ series_0_1)); ++ ++ /* Test ASHIFT between constant vectors. */ ++ ASSERT_RTX_EQ (series_0_2, ++ simplify_binary_operation (ASHIFT, mode, series_0_1, ++ CONST1_RTX (mode))); ++ if (!GET_MODE_NUNITS (mode).is_constant ()) ++ ASSERT_FALSE (simplify_binary_operation (ASHIFT, mode, CONST1_RTX (mode), ++ series_0_1)); + } + + /* Verify simplify_merge_mask works correctly. */ +@@ -7089,6 +7304,165 @@ test_vec_merge (machine_mode mode) + simplify_rtx (nvm)); + } + ++/* Test subregs of integer vector constant X, trying elements in ++ the range [ELT_BIAS, ELT_BIAS + constant_lower_bound (NELTS)), ++ where NELTS is the number of elements in X. Subregs involving ++ elements [ELT_BIAS, ELT_BIAS + FIRST_VALID) are expected to fail. */ ++ ++static void ++test_vector_subregs_modes (rtx x, poly_uint64 elt_bias = 0, ++ unsigned int first_valid = 0) ++{ ++ machine_mode inner_mode = GET_MODE (x); ++ scalar_mode int_mode = GET_MODE_INNER (inner_mode); ++ ++ for (unsigned int modei = 0; modei < NUM_MACHINE_MODES; ++modei) ++ { ++ machine_mode outer_mode = (machine_mode) modei; ++ if (!VECTOR_MODE_P (outer_mode)) ++ continue; ++ ++ unsigned int outer_nunits; ++ if (GET_MODE_INNER (outer_mode) == int_mode ++ && GET_MODE_NUNITS (outer_mode).is_constant (&outer_nunits) ++ && multiple_p (GET_MODE_NUNITS (inner_mode), outer_nunits)) ++ { ++ /* Test subregs in which the outer mode is a smaller, ++ constant-sized vector of the same element type. */ ++ unsigned int limit ++ = constant_lower_bound (GET_MODE_NUNITS (inner_mode)); ++ for (unsigned int elt = 0; elt < limit; elt += outer_nunits) ++ { ++ rtx expected = NULL_RTX; ++ if (elt >= first_valid) ++ { ++ rtx_vector_builder builder (outer_mode, outer_nunits, 1); ++ for (unsigned int i = 0; i < outer_nunits; ++i) ++ builder.quick_push (CONST_VECTOR_ELT (x, elt + i)); ++ expected = builder.build (); ++ } ++ poly_uint64 byte = (elt_bias + elt) * GET_MODE_SIZE (int_mode); ++ ASSERT_RTX_EQ (expected, ++ simplify_subreg (outer_mode, x, ++ inner_mode, byte)); ++ } ++ } ++ else if (known_eq (GET_MODE_SIZE (outer_mode), ++ GET_MODE_SIZE (inner_mode)) ++ && known_eq (elt_bias, 0U) ++ && (GET_MODE_CLASS (outer_mode) != MODE_VECTOR_BOOL ++ || known_eq (GET_MODE_BITSIZE (outer_mode), ++ GET_MODE_NUNITS (outer_mode))) ++ && (!FLOAT_MODE_P (outer_mode) ++ || (FLOAT_MODE_FORMAT (outer_mode)->ieee_bits ++ == GET_MODE_UNIT_PRECISION (outer_mode))) ++ && (GET_MODE_SIZE (inner_mode).is_constant () ++ || !CONST_VECTOR_STEPPED_P (x))) ++ { ++ /* Try converting to OUTER_MODE and back. */ ++ rtx outer_x = simplify_subreg (outer_mode, x, inner_mode, 0); ++ ASSERT_TRUE (outer_x != NULL_RTX); ++ ASSERT_RTX_EQ (x, simplify_subreg (inner_mode, outer_x, ++ outer_mode, 0)); ++ } ++ } ++ ++ if (BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN) ++ { ++ /* Test each byte in the element range. */ ++ unsigned int limit ++ = constant_lower_bound (GET_MODE_SIZE (inner_mode)); ++ for (unsigned int i = 0; i < limit; ++i) ++ { ++ unsigned int elt = i / GET_MODE_SIZE (int_mode); ++ rtx expected = NULL_RTX; ++ if (elt >= first_valid) ++ { ++ unsigned int byte_shift = i % GET_MODE_SIZE (int_mode); ++ if (BYTES_BIG_ENDIAN) ++ byte_shift = GET_MODE_SIZE (int_mode) - byte_shift - 1; ++ rtx_mode_t vec_elt (CONST_VECTOR_ELT (x, elt), int_mode); ++ wide_int shifted_elt ++ = wi::lrshift (vec_elt, byte_shift * BITS_PER_UNIT); ++ expected = immed_wide_int_const (shifted_elt, QImode); ++ } ++ poly_uint64 byte = elt_bias * GET_MODE_SIZE (int_mode) + i; ++ ASSERT_RTX_EQ (expected, ++ simplify_subreg (QImode, x, inner_mode, byte)); ++ } ++ } ++} ++ ++/* Test constant subregs of integer vector mode INNER_MODE, using 1 ++ element per pattern. */ ++ ++static void ++test_vector_subregs_repeating (machine_mode inner_mode) ++{ ++ poly_uint64 nunits = GET_MODE_NUNITS (inner_mode); ++ unsigned int min_nunits = constant_lower_bound (nunits); ++ scalar_mode int_mode = GET_MODE_INNER (inner_mode); ++ unsigned int count = gcd (min_nunits, 8); ++ ++ rtx_vector_builder builder (inner_mode, count, 1); ++ for (unsigned int i = 0; i < count; ++i) ++ builder.quick_push (gen_int_mode (8 - i, int_mode)); ++ rtx x = builder.build (); ++ ++ test_vector_subregs_modes (x); ++ if (!nunits.is_constant ()) ++ test_vector_subregs_modes (x, nunits - min_nunits); ++} ++ ++/* Test constant subregs of integer vector mode INNER_MODE, using 2 ++ elements per pattern. */ ++ ++static void ++test_vector_subregs_fore_back (machine_mode inner_mode) ++{ ++ poly_uint64 nunits = GET_MODE_NUNITS (inner_mode); ++ unsigned int min_nunits = constant_lower_bound (nunits); ++ scalar_mode int_mode = GET_MODE_INNER (inner_mode); ++ unsigned int count = gcd (min_nunits, 4); ++ ++ rtx_vector_builder builder (inner_mode, count, 2); ++ for (unsigned int i = 0; i < count; ++i) ++ builder.quick_push (gen_int_mode (i, int_mode)); ++ for (unsigned int i = 0; i < count; ++i) ++ builder.quick_push (gen_int_mode (-(int) i, int_mode)); ++ rtx x = builder.build (); ++ ++ test_vector_subregs_modes (x); ++ if (!nunits.is_constant ()) ++ test_vector_subregs_modes (x, nunits - min_nunits, count); ++} ++ ++/* Test constant subregs of integer vector mode INNER_MODE, using 3 ++ elements per pattern. */ ++ ++static void ++test_vector_subregs_stepped (machine_mode inner_mode) ++{ ++ /* Build { 0, 1, 2, 3, ... }. */ ++ scalar_mode int_mode = GET_MODE_INNER (inner_mode); ++ rtx_vector_builder builder (inner_mode, 1, 3); ++ for (unsigned int i = 0; i < 3; ++i) ++ builder.quick_push (gen_int_mode (i, int_mode)); ++ rtx x = builder.build (); ++ ++ test_vector_subregs_modes (x); ++} ++ ++/* Test constant subregs of integer vector mode INNER_MODE. */ ++ ++static void ++test_vector_subregs (machine_mode inner_mode) ++{ ++ test_vector_subregs_repeating (inner_mode); ++ test_vector_subregs_fore_back (inner_mode); ++ test_vector_subregs_stepped (inner_mode); ++} ++ + /* Verify some simplifications involving vectors. */ + + static void +@@ -7103,7 +7477,10 @@ test_vector_ops () + test_vector_ops_duplicate (mode, scalar_reg); + if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT + && maybe_gt (GET_MODE_NUNITS (mode), 2)) +- test_vector_ops_series (mode, scalar_reg); ++ { ++ test_vector_ops_series (mode, scalar_reg); ++ test_vector_subregs (mode); ++ } + test_vec_merge (mode); + } + } +diff --git a/gcc/stack-ptr-mod.c b/gcc/stack-ptr-mod.c +index a10d59b61..5cb95e712 100644 +--- a/gcc/stack-ptr-mod.c ++++ b/gcc/stack-ptr-mod.c +@@ -91,9 +91,7 @@ pass_stack_ptr_mod::execute (function *fun) + if (INSN_P (insn)) + { + /* Check if insn modifies the stack pointer. */ +- note_stores (PATTERN (insn), +- notice_stack_pointer_modification_1, +- NULL); ++ note_stores (insn, notice_stack_pointer_modification_1, NULL); + if (! crtl->sp_is_unchanging) + return 0; + } +diff --git a/gcc/stor-layout.c b/gcc/stor-layout.c +index a054b7887..7d1917f82 100644 +--- a/gcc/stor-layout.c ++++ b/gcc/stor-layout.c +@@ -42,6 +42,7 @@ along with GCC; see the file COPYING3. If not see + #include "gimplify.h" + #include "attribs.h" + #include "debug.h" ++#include "calls.h" + + /* Data type for the expressions representing sizes of data types. + It is the first integer type laid out. */ +@@ -1835,7 +1836,8 @@ compute_record_mode (tree type) + line. */ + SET_TYPE_MODE (type, BLKmode); + +- if (! tree_fits_uhwi_p (TYPE_SIZE (type))) ++ poly_uint64 type_size; ++ if (!poly_int_tree_p (TYPE_SIZE (type), &type_size)) + return; + + /* A record which has any BLKmode members must itself be +@@ -1846,20 +1848,21 @@ compute_record_mode (tree type) + if (TREE_CODE (field) != FIELD_DECL) + continue; + ++ poly_uint64 field_size; + if (TREE_CODE (TREE_TYPE (field)) == ERROR_MARK + || (TYPE_MODE (TREE_TYPE (field)) == BLKmode + && ! TYPE_NO_FORCE_BLK (TREE_TYPE (field)) + && !(TYPE_SIZE (TREE_TYPE (field)) != 0 + && integer_zerop (TYPE_SIZE (TREE_TYPE (field))))) +- || ! tree_fits_uhwi_p (bit_position (field)) ++ || !tree_fits_poly_uint64_p (bit_position (field)) + || DECL_SIZE (field) == 0 +- || ! tree_fits_uhwi_p (DECL_SIZE (field))) ++ || !poly_int_tree_p (DECL_SIZE (field), &field_size)) + return; + + /* If this field is the whole struct, remember its mode so + that, say, we can put a double in a class into a DF + register instead of forcing it to live in the stack. */ +- if (simple_cst_equal (TYPE_SIZE (type), DECL_SIZE (field)) ++ if (known_eq (field_size, type_size) + /* Partial int types (e.g. __int20) may have TYPE_SIZE equal to + wider types (e.g. int32), despite precision being less. Ensure + that the TYPE_MODE of the struct does not get set to the partial +@@ -1879,15 +1882,14 @@ compute_record_mode (tree type) + For UNION_TYPE, if the widest field is MODE_INT then use that mode. + If the widest field is MODE_PARTIAL_INT, and the union will be passed + by reference, then use that mode. */ +- poly_uint64 type_size; + if ((TREE_CODE (type) == RECORD_TYPE + || (TREE_CODE (type) == UNION_TYPE + && (GET_MODE_CLASS (mode) == MODE_INT + || (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT +- && targetm.calls.pass_by_reference (pack_cumulative_args (0), +- mode, type, 0))))) ++ && (targetm.calls.pass_by_reference ++ (pack_cumulative_args (0), ++ function_arg_info (type, mode, /*named=*/false))))))) + && mode != VOIDmode +- && poly_int_tree_p (TYPE_SIZE (type), &type_size) + && known_eq (GET_MODE_BITSIZE (mode), type_size)) + ; + else +diff --git a/gcc/target-globals.c b/gcc/target-globals.c +index 94a465c91..00bbda69c 100644 +--- a/gcc/target-globals.c ++++ b/gcc/target-globals.c +@@ -40,6 +40,7 @@ along with GCC; see the file COPYING3. If not see + #include "gcse.h" + #include "bb-reorder.h" + #include "lower-subreg.h" ++#include "function-abi.h" + + #if SWITCHABLE_TARGET + struct target_globals default_target_globals = { +@@ -48,6 +49,7 @@ struct target_globals default_target_globals = { + &default_target_rtl, + &default_target_recog, + &default_target_hard_regs, ++ &default_target_function_abi_info, + &default_target_reload, + &default_target_expmed, + &default_target_optabs, +@@ -70,6 +72,7 @@ save_target_globals (void) + g->rtl = ggc_cleared_alloc (); + g->recog = XCNEW (struct target_recog); + g->hard_regs = XCNEW (struct target_hard_regs); ++ g->function_abi_info = XCNEW (struct target_function_abi_info); + g->reload = XCNEW (struct target_reload); + g->expmed = XCNEW (struct target_expmed); + g->optabs = XCNEW (struct target_optabs); +@@ -127,6 +130,7 @@ target_globals::~target_globals () + XDELETE (regs); + XDELETE (recog); + XDELETE (hard_regs); ++ XDELETE (function_abi_info); + XDELETE (reload); + XDELETE (expmed); + XDELETE (optabs); +diff --git a/gcc/target-globals.h b/gcc/target-globals.h +index 5af846c9f..f21580be6 100644 +--- a/gcc/target-globals.h ++++ b/gcc/target-globals.h +@@ -26,6 +26,7 @@ extern struct target_regs *this_target_regs; + extern struct target_rtl *this_target_rtl; + extern struct target_recog *this_target_recog; + extern struct target_hard_regs *this_target_hard_regs; ++extern struct target_function_abi_info *this_target_function_abi_info; + extern struct target_reload *this_target_reload; + extern struct target_expmed *this_target_expmed; + extern struct target_optabs *this_target_optabs; +@@ -47,6 +48,7 @@ struct GTY(()) target_globals { + struct target_rtl *rtl; + struct target_recog *GTY((skip)) recog; + struct target_hard_regs *GTY((skip)) hard_regs; ++ struct target_function_abi_info *GTY((skip)) function_abi_info; + struct target_reload *GTY((skip)) reload; + struct target_expmed *GTY((skip)) expmed; + struct target_optabs *GTY((skip)) optabs; +@@ -74,6 +76,7 @@ restore_target_globals (struct target_globals *g) + this_target_rtl = g->rtl; + this_target_recog = g->recog; + this_target_hard_regs = g->hard_regs; ++ this_target_function_abi_info = g->function_abi_info; + this_target_reload = g->reload; + this_target_expmed = g->expmed; + this_target_optabs = g->optabs; +diff --git a/gcc/target.def b/gcc/target.def +index f998470ff..05389cdd1 100644 +--- a/gcc/target.def ++++ b/gcc/target.def +@@ -1782,22 +1782,6 @@ return type of the vectorized function shall be of vector type\n\ + tree, (tree fndecl, tree vec_type_out, tree vec_type_in), + default_builtin_md_vectorized_function) + +-/* Returns a function declaration for a builtin that realizes the +- vector conversion, or NULL_TREE if not available. */ +-DEFHOOK +-(builtin_conversion, +- "This hook should return the DECL of a function that implements conversion of the\n\ +-input vector of type @var{src_type} to type @var{dest_type}.\n\ +-The value of @var{code} is one of the enumerators in @code{enum tree_code} and\n\ +-specifies how the conversion is to be applied\n\ +-(truncation, rounding, etc.).\n\ +-\n\ +-If this hook is defined, the autovectorizer will use the\n\ +-@code{TARGET_VECTORIZE_BUILTIN_CONVERSION} target hook when vectorizing\n\ +-conversion. Otherwise, it will return @code{NULL_TREE}.", +- tree, (unsigned code, tree dest_type, tree src_type), +- default_builtin_vectorized_conversion) +- + /* Cost of different vector/scalar statements in vectorization cost + model. In case of misaligned vector loads and stores the cost depends + on the data type and misalignment value. */ +@@ -2431,6 +2415,24 @@ another @code{CALL_EXPR}.\n\ + @var{arglist} really has type @samp{VEC(tree,gc)*}", + tree, (unsigned int /*location_t*/ loc, tree fndecl, void *arglist), NULL) + ++DEFHOOK ++(check_builtin_call, ++ "Perform semantic checking on a call to a machine-specific built-in\n\ ++function after its arguments have been constrained to the function\n\ ++signature. Return true if the call is valid, otherwise report an error\n\ ++and return false.\n\ ++\n\ ++This hook is called after @code{TARGET_RESOLVE_OVERLOADED_BUILTIN}.\n\ ++The call was originally to built-in function @var{orig_fndecl},\n\ ++but after the optional @code{TARGET_RESOLVE_OVERLOADED_BUILTIN}\n\ ++step is now to built-in function @var{fndecl}. @var{loc} is the\n\ ++location of the call and @var{args} is an array of function arguments,\n\ ++of which there are @var{nargs}. @var{arg_loc} specifies the location\n\ ++of each argument.", ++ bool, (location_t loc, vec arg_loc, tree fndecl, ++ tree orig_fndecl, unsigned int nargs, tree *args), ++ NULL) ++ + /* Fold a target-specific builtin to a tree valid for both GIMPLE + and GENERIC. */ + DEFHOOK +@@ -2624,38 +2626,6 @@ DEFHOOK + bool, (const rtx_insn *follower, const rtx_insn *followee), + hook_bool_const_rtx_insn_const_rtx_insn_true) + +-/* Return a register class for which branch target register +- optimizations should be applied. */ +-DEFHOOK +-(branch_target_register_class, +- "This target hook returns a register class for which branch target register\n\ +-optimizations should be applied. All registers in this class should be\n\ +-usable interchangeably. After reload, registers in this class will be\n\ +-re-allocated and loads will be hoisted out of loops and be subjected\n\ +-to inter-block scheduling.", +- reg_class_t, (void), +- default_branch_target_register_class) +- +-/* Return true if branch target register optimizations should include +- callee-saved registers that are not already live during the current +- function. AFTER_PE_GEN is true if prologues and epilogues have +- already been generated. */ +-DEFHOOK +-(branch_target_register_callee_saved, +- "Branch target register optimization will by default exclude callee-saved\n\ +-registers\n\ +-that are not already live during the current function; if this target hook\n\ +-returns true, they will be included. The target code must than make sure\n\ +-that all target registers in the class returned by\n\ +-@samp{TARGET_BRANCH_TARGET_REGISTER_CLASS} that might need saving are\n\ +-saved. @var{after_prologue_epilogue_gen} indicates if prologues and\n\ +-epilogues have already been generated. Note, even if you only return\n\ +-true when @var{after_prologue_epilogue_gen} is false, you still are likely\n\ +-to have to make special provisions in @code{INITIAL_ELIMINATION_OFFSET}\n\ +-to reserve space for caller-saved target registers.", +- bool, (bool after_prologue_epilogue_gen), +- hook_bool_bool_false) +- + /* Return true if the target supports conditional execution. */ + DEFHOOK + (have_conditional_execution, +@@ -3407,6 +3377,29 @@ must have move patterns for this mode.", + bool, (machine_mode mode), + hook_bool_mode_false) + ++DEFHOOK ++(compatible_vector_types_p, ++ "Return true if there is no target-specific reason for treating\n\ ++vector types @var{type1} and @var{type2} as distinct types. The caller\n\ ++has already checked for target-independent reasons, meaning that the\n\ ++types are known to have the same mode, to have the same number of elements,\n\ ++and to have what the caller considers to be compatible element types.\n\ ++\n\ ++The main reason for defining this hook is to reject pairs of types\n\ ++that are handled differently by the target's calling convention.\n\ ++For example, when a new @var{N}-bit vector architecture is added\n\ ++to a target, the target may want to handle normal @var{N}-bit\n\ ++@code{VECTOR_TYPE} arguments and return values in the same way as\n\ ++before, to maintain backwards compatibility. However, it may also\n\ ++provide new, architecture-specific @code{VECTOR_TYPE}s that are passed\n\ ++and returned in a more efficient way. It is then important to maintain\n\ ++a distinction between the ``normal'' @code{VECTOR_TYPE}s and the new\n\ ++architecture-specific ones.\n\ ++\n\ ++The default implementation returns true, which is correct for most targets.", ++ bool, (const_tree type1, const_tree type2), ++ hook_bool_const_tree_const_tree_true) ++ + DEFHOOK + (vector_alignment, + "This hook can be used to define the alignment for a vector of type\n\ +@@ -3569,7 +3562,7 @@ two areas of memory, or to set, clear or store to memory, for example\n\ + when copying a @code{struct}. The @code{by_pieces} infrastructure\n\ + implements such memory operations as a sequence of load, store or move\n\ + insns. Alternate strategies are to expand the\n\ +-@code{movmem} or @code{setmem} optabs, to emit a library call, or to emit\n\ ++@code{cpymem} or @code{setmem} optabs, to emit a library call, or to emit\n\ + unit-by-unit, loop-based operations.\n\ + \n\ + This target hook should return true if, for a memory operation with a\n\ +@@ -3588,7 +3581,7 @@ optimized for speed rather than size.\n\ + \n\ + Returning true for higher values of @var{size} can improve code generation\n\ + for speed if the target does not provide an implementation of the\n\ +-@code{movmem} or @code{setmem} standard names, if the @code{movmem} or\n\ ++@code{cpymem} or @code{setmem} standard names, if the @code{cpymem} or\n\ + @code{setmem} implementation would be more expensive than a sequence of\n\ + insns, or if the overhead of a library call would dominate that of\n\ + the body of the memory operation.\n\ +@@ -4479,18 +4472,18 @@ or 3-byte structure is returned at the most significant end of a\n\ + from __builtin_va_arg. */ + DEFHOOK + (pass_by_reference, +- "This target hook should return @code{true} if an argument at the\n\ ++ "This target hook should return @code{true} if argument @var{arg} at the\n\ + position indicated by @var{cum} should be passed by reference. This\n\ + predicate is queried after target independent reasons for being\n\ +-passed by reference, such as @code{TREE_ADDRESSABLE (type)}.\n\ ++passed by reference, such as @code{TREE_ADDRESSABLE (@var{arg}.type)}.\n\ + \n\ + If the hook returns true, a copy of that argument is made in memory and a\n\ + pointer to the argument is passed instead of the argument itself.\n\ + The pointer is passed in whatever way is appropriate for passing a pointer\n\ + to that type.", + bool, +- (cumulative_args_t cum, machine_mode mode, const_tree type, bool named), +- hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false) ++ (cumulative_args_t cum, const function_arg_info &arg), ++ hook_bool_CUMULATIVE_ARGS_arg_info_false) + + DEFHOOK + (expand_builtin_saveregs, +@@ -4515,8 +4508,8 @@ pass all their arguments on the stack.\n\ + \n\ + The argument @var{args_so_far} points to the @code{CUMULATIVE_ARGS} data\n\ + structure, containing the values that are obtained after processing the\n\ +-named arguments. The arguments @var{mode} and @var{type} describe the\n\ +-last named argument---its machine mode and its data type as a tree node.\n\ ++named arguments. The argument @var{arg} describes the last of these named\n\ ++arguments.\n\ + \n\ + The target hook should do two things: first, push onto the stack all the\n\ + argument registers @emph{not} used for the named arguments, and second,\n\ +@@ -4536,7 +4529,7 @@ arguments of the function are being analyzed for the second time. This\n\ + happens for an inline function, which is not actually compiled until the\n\ + end of the source file. The hook @code{TARGET_SETUP_INCOMING_VARARGS} should\n\ + not generate any instructions in this case.", +- void, (cumulative_args_t args_so_far, machine_mode mode, tree type, ++ void, (cumulative_args_t args_so_far, const function_arg_info &arg, + int *pretend_args_size, int second_time), + default_setup_incoming_varargs) + +@@ -4579,15 +4572,6 @@ returned by function call into @var{slot}.", + void, (rtx slot, rtx bounds), + default_store_returned_bounds) + +-DEFHOOK +-(setup_incoming_vararg_bounds, +- "Use it to store bounds for anonymous register arguments stored\n\ +-into the stack. Arguments meaning is similar to\n\ +-@code{TARGET_SETUP_INCOMING_VARARGS}.", +- void, (cumulative_args_t args_so_far, machine_mode mode, tree type, +- int *pretend_args_size, int second_time), +- default_setup_incoming_vararg_bounds) +- + DEFHOOK + (call_args, + "While generating RTL for a function call, this target hook is invoked once\n\ +@@ -4668,11 +4652,11 @@ false.", + Need audit to verify that this is the case. */ + DEFHOOK + (must_pass_in_stack, +- "This target hook should return @code{true} if we should not pass @var{type}\n\ ++ "This target hook should return @code{true} if we should not pass @var{arg}\n\ + solely in registers. The file @file{expr.h} defines a\n\ + definition that is usually appropriate, refer to @file{expr.h} for additional\n\ + documentation.", +- bool, (machine_mode mode, const_tree type), ++ bool, (const function_arg_info &arg), + must_pass_in_stack_var_size_or_pad) + + /* Return true if type TYPE, mode MODE, which is passed by reference, +@@ -4691,8 +4675,8 @@ not be generated.\n\ + \n\ + The default version of this hook always returns false.", + bool, +- (cumulative_args_t cum, machine_mode mode, const_tree type, bool named), +- hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false) ++ (cumulative_args_t cum, const function_arg_info &arg), ++ hook_bool_CUMULATIVE_ARGS_arg_info_false) + + /* Return zero for arguments passed entirely on the stack or entirely + in registers. If passed in both, return the number of bytes passed +@@ -4715,8 +4699,8 @@ compiler when this occurs, and how many bytes should go in registers.\n\ + @code{TARGET_FUNCTION_ARG} for these arguments should return the first\n\ + register to be used by the caller for this argument; likewise\n\ + @code{TARGET_FUNCTION_INCOMING_ARG}, for the called function.", +- int, (cumulative_args_t cum, machine_mode mode, tree type, bool named), +- hook_int_CUMULATIVE_ARGS_mode_tree_bool_0) ++ int, (cumulative_args_t cum, const function_arg_info &arg), ++ hook_int_CUMULATIVE_ARGS_arg_info_0) + + /* Update the state in CA to advance past an argument in the + argument list. The values MODE, TYPE, and NAMED describe that +@@ -4724,8 +4708,7 @@ register to be used by the caller for this argument; likewise\n\ + DEFHOOK + (function_arg_advance, + "This hook updates the summarizer variable pointed to by @var{ca} to\n\ +-advance past an argument in the argument list. The values @var{mode},\n\ +-@var{type} and @var{named} describe that argument. Once this is done,\n\ ++advance past argument @var{arg} in the argument list. Once this is done,\n\ + the variable @var{cum} is suitable for analyzing the @emph{following}\n\ + argument with @code{TARGET_FUNCTION_ARG}, etc.\n\ + \n\ +@@ -4733,7 +4716,7 @@ This hook need not do anything if the argument in question was passed\n\ + on the stack. The compiler knows how to track the amount of stack space\n\ + used for arguments without any special help.", + void, +- (cumulative_args_t ca, machine_mode mode, const_tree type, bool named), ++ (cumulative_args_t ca, const function_arg_info &arg), + default_function_arg_advance) + + DEFHOOK +@@ -4770,17 +4753,9 @@ constant size shorter than an @code{int}, and upward otherwise.", + argument. */ + DEFHOOK + (function_arg, +- "Return an RTX indicating whether a function argument is passed in a\n\ +-register and if so, which register.\n\ +-\n\ +-The arguments are @var{ca}, which summarizes all the previous\n\ +-arguments; @var{mode}, the machine mode of the argument; @var{type},\n\ +-the data type of the argument as a tree node or 0 if that is not known\n\ +-(which happens for C support library functions); and @var{named},\n\ +-which is @code{true} for an ordinary argument and @code{false} for\n\ +-nameless arguments that correspond to @samp{@dots{}} in the called\n\ +-function's prototype. @var{type} can be an incomplete type if a\n\ +-syntax error has previously occurred.\n\ ++ "Return an RTX indicating whether function argument @var{arg} is passed\n\ ++in a register and if so, which register. Argument @var{ca} summarizes all\n\ ++the previous arguments.\n\ + \n\ + The return value is usually either a @code{reg} RTX for the hard\n\ + register in which to pass the argument, or zero to pass the argument\n\ +@@ -4826,8 +4801,7 @@ is not defined and @code{TARGET_FUNCTION_ARG} returns nonzero for such an\n\ + argument, the compiler will abort. If @code{REG_PARM_STACK_SPACE} is\n\ + defined, the argument will be computed in the stack and then loaded into\n\ + a register.", +- rtx, (cumulative_args_t ca, machine_mode mode, const_tree type, +- bool named), ++ rtx, (cumulative_args_t ca, const function_arg_info &arg), + default_function_arg) + + DEFHOOK +@@ -4849,8 +4823,7 @@ so that it can be used to pass special arguments.\n\ + \n\ + If @code{TARGET_FUNCTION_INCOMING_ARG} is not defined,\n\ + @code{TARGET_FUNCTION_ARG} serves both purposes.", +- rtx, (cumulative_args_t ca, machine_mode mode, const_tree type, +- bool named), ++ rtx, (cumulative_args_t ca, const function_arg_info &arg), + default_function_incoming_arg) + + DEFHOOK +@@ -4962,6 +4935,28 @@ If this hook is not defined, then FUNCTION_VALUE_REGNO_P will be used.", + bool, (const unsigned int regno), + default_function_value_regno_p) + ++DEFHOOK ++(fntype_abi, ++ "Return the ABI used by a function with type @var{type}; see the\n\ ++definition of @code{predefined_function_abi} for details of the ABI\n\ ++descriptor. Targets only need to define this hook if they support\n\ ++interoperability between several ABIs in the same translation unit.", ++ const predefined_function_abi &, (const_tree type), ++ NULL) ++ ++DEFHOOK ++(insn_callee_abi, ++ "This hook returns a description of the ABI used by the target of\n\ ++call instruction @var{insn}; see the definition of\n\ ++@code{predefined_function_abi} for details of the ABI descriptor.\n\ ++Only the global function @code{insn_callee_abi} should call this hook\n\ ++directly.\n\ ++\n\ ++Targets only need to define this hook if they support\n\ ++interoperability between several ABIs in the same translation unit.", ++ const predefined_function_abi &, (const rtx_insn *insn), ++ NULL) ++ + /* ??? Documenting this hook requires a GFDL license grant. */ + DEFHOOK_UNDOC + (internal_arg_pointer, +@@ -5811,32 +5806,27 @@ The default version of this hook always returns @code{true}.", + + DEFHOOK + (hard_regno_call_part_clobbered, +- "This hook should return true if @var{regno} is partly call-saved and\n\ +-partly call-clobbered, and if a value of mode @var{mode} would be partly\n\ +-clobbered by call instruction @var{insn}. If @var{insn} is NULL then it\n\ +-should return true if any call could partly clobber the register.\n\ +-For example, if the low 32 bits of @var{regno} are preserved across a call\n\ +-but higher bits are clobbered, this hook should return true for a 64-bit\n\ +-mode but false for a 32-bit mode.\n\ ++ "ABIs usually specify that calls must preserve the full contents\n\ ++of a particular register, or that calls can alter any part of a\n\ ++particular register. This information is captured by the target macro\n\ ++@code{CALL_REALLY_USED_REGISTERS}. However, some ABIs specify that calls\n\ ++must preserve certain bits of a particular register but can alter others.\n\ ++This hook should return true if this applies to at least one of the\n\ ++registers in @samp{(reg:@var{mode} @var{regno})}, and if as a result the\n\ ++call would alter part of the @var{mode} value. For example, if a call\n\ ++preserves the low 32 bits of a 64-bit hard register @var{regno} but can\n\ ++clobber the upper 32 bits, this hook should return true for a 64-bit mode\n\ ++but false for a 32-bit mode.\n\ ++\n\ ++The value of @var{abi_id} comes from the @code{predefined_function_abi}\n\ ++structure that describes the ABI of the call; see the definition of the\n\ ++structure for more details. If (as is usual) the target uses the same ABI\n\ ++for all functions in a translation unit, @var{abi_id} is always 0.\n\ + \n\ + The default implementation returns false, which is correct\n\ + for targets that don't have partly call-clobbered registers.", +- bool, (rtx_insn *insn, unsigned int regno, machine_mode mode), +- hook_bool_insn_uint_mode_false) +- +-DEFHOOK +-(return_call_with_max_clobbers, +- "This hook returns a pointer to the call that partially clobbers the\n\ +-most registers. If a platform supports multiple ABIs where the registers\n\ +-that are partially clobbered may vary, this function compares two\n\ +-calls and returns a pointer to the one that clobbers the most registers.\n\ +-If both calls clobber the same registers, @var{call_1} must be returned.\n\ +-\n\ +-The registers clobbered in different ABIs must be a proper subset or\n\ +-superset of all other ABIs. @var{call_1} must always be a call insn,\n\ +-call_2 may be NULL or a call insn.", +- rtx_insn *, (rtx_insn *call_1, rtx_insn *call_2), +- NULL) ++ bool, (unsigned int abi_id, unsigned int regno, machine_mode mode), ++ hook_bool_uint_uint_mode_false) + + DEFHOOK + (get_multilib_abi_name, +@@ -5844,20 +5834,6 @@ DEFHOOK + const char *, (void), + hook_constcharptr_void_null) + +-DEFHOOK +-(remove_extra_call_preserved_regs, +- "This hook removes registers from the set of call-clobbered registers\n\ +- in @var{used_regs} if, contrary to the default rules, something guarantees\n\ +- that @samp{insn} preserves those registers. For example, some targets\n\ +- support variant ABIs in which functions preserve more registers than\n\ +- normal functions would. Removing those extra registers from @var{used_regs}\n\ +- can lead to better register allocation.\n\ +- \n\ +- The default implementation does nothing, which is always safe.\n\ +- Defining the hook is purely an optimization.", +- void, (rtx_insn *insn, HARD_REG_SET *used_regs), +- default_remove_extra_call_preserved_regs) +- + /* Return the smallest number of different values for which it is best to + use a jump-table instead of a tree of conditional branches. */ + DEFHOOK +diff --git a/gcc/target.h b/gcc/target.h +index 057e6ae87..964629669 100644 +--- a/gcc/target.h ++++ b/gcc/target.h +@@ -149,6 +149,12 @@ struct ao_ref; + /* This is defined in tree-vectorizer.h. */ + struct _stmt_vec_info; + ++/* This is defined in calls.h. */ ++struct function_arg_info; ++ ++/* This is defined in function-abi.h. */ ++struct predefined_function_abi; ++ + /* These are defined in tree-vect-stmts.c. */ + extern tree stmt_vectype (struct _stmt_vec_info *); + extern bool stmt_in_inner_loop_p (struct _stmt_vec_info *); +diff --git a/gcc/targhooks.c b/gcc/targhooks.c +index 6396f6f4b..6f54de0d5 100644 +--- a/gcc/targhooks.c ++++ b/gcc/targhooks.c +@@ -193,11 +193,8 @@ default_expand_builtin_saveregs (void) + } + + void +-default_setup_incoming_varargs (cumulative_args_t ca ATTRIBUTE_UNUSED, +- machine_mode mode ATTRIBUTE_UNUSED, +- tree type ATTRIBUTE_UNUSED, +- int *pretend_arg_size ATTRIBUTE_UNUSED, +- int second_time ATTRIBUTE_UNUSED) ++default_setup_incoming_varargs (cumulative_args_t, ++ const function_arg_info &, int *, int) + { + } + +@@ -323,22 +320,19 @@ default_cxx_get_cookie_size (tree type) + of the TARGET_PASS_BY_REFERENCE hook uses just MUST_PASS_IN_STACK. */ + + bool +-hook_pass_by_reference_must_pass_in_stack (cumulative_args_t c ATTRIBUTE_UNUSED, +- machine_mode mode ATTRIBUTE_UNUSED, const_tree type ATTRIBUTE_UNUSED, +- bool named_arg ATTRIBUTE_UNUSED) ++hook_pass_by_reference_must_pass_in_stack (cumulative_args_t, ++ const function_arg_info &arg) + { +- return targetm.calls.must_pass_in_stack (mode, type); ++ return targetm.calls.must_pass_in_stack (arg); + } + + /* Return true if a parameter follows callee copies conventions. This + version of the hook is true for all named arguments. */ + + bool +-hook_callee_copies_named (cumulative_args_t ca ATTRIBUTE_UNUSED, +- machine_mode mode ATTRIBUTE_UNUSED, +- const_tree type ATTRIBUTE_UNUSED, bool named) ++hook_callee_copies_named (cumulative_args_t, const function_arg_info &arg) + { +- return named; ++ return arg.named; + } + + /* Emit to STREAM the assembler syntax for insn operand X. */ +@@ -681,16 +675,6 @@ default_builtin_md_vectorized_function (tree, tree, tree) + return NULL_TREE; + } + +-/* Vectorized conversion. */ +- +-tree +-default_builtin_vectorized_conversion (unsigned int code ATTRIBUTE_UNUSED, +- tree dest_type ATTRIBUTE_UNUSED, +- tree src_type ATTRIBUTE_UNUSED) +-{ +- return NULL_TREE; +-} +- + /* Default vectorizer cost model values. */ + + int +@@ -737,28 +721,22 @@ default_builtin_reciprocal (tree) + } + + bool +-hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false ( +- cumulative_args_t ca ATTRIBUTE_UNUSED, +- machine_mode mode ATTRIBUTE_UNUSED, +- const_tree type ATTRIBUTE_UNUSED, bool named ATTRIBUTE_UNUSED) ++hook_bool_CUMULATIVE_ARGS_arg_info_false (cumulative_args_t, ++ const function_arg_info &) + { + return false; + } + + bool +-hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true ( +- cumulative_args_t ca ATTRIBUTE_UNUSED, +- machine_mode mode ATTRIBUTE_UNUSED, +- const_tree type ATTRIBUTE_UNUSED, bool named ATTRIBUTE_UNUSED) ++hook_bool_CUMULATIVE_ARGS_arg_info_true (cumulative_args_t, ++ const function_arg_info &) + { + return true; + } + + int +-hook_int_CUMULATIVE_ARGS_mode_tree_bool_0 ( +- cumulative_args_t ca ATTRIBUTE_UNUSED, +- machine_mode mode ATTRIBUTE_UNUSED, +- tree type ATTRIBUTE_UNUSED, bool named ATTRIBUTE_UNUSED) ++hook_int_CUMULATIVE_ARGS_arg_info_0 (cumulative_args_t, ++ const function_arg_info &) + { + return 0; + } +@@ -770,10 +748,7 @@ hook_void_CUMULATIVE_ARGS_tree (cumulative_args_t ca ATTRIBUTE_UNUSED, + } + + void +-default_function_arg_advance (cumulative_args_t ca ATTRIBUTE_UNUSED, +- machine_mode mode ATTRIBUTE_UNUSED, +- const_tree type ATTRIBUTE_UNUSED, +- bool named ATTRIBUTE_UNUSED) ++default_function_arg_advance (cumulative_args_t, const function_arg_info &) + { + gcc_unreachable (); + } +@@ -814,19 +789,13 @@ default_function_arg_padding (machine_mode mode, const_tree type) + } + + rtx +-default_function_arg (cumulative_args_t ca ATTRIBUTE_UNUSED, +- machine_mode mode ATTRIBUTE_UNUSED, +- const_tree type ATTRIBUTE_UNUSED, +- bool named ATTRIBUTE_UNUSED) ++default_function_arg (cumulative_args_t, const function_arg_info &) + { + gcc_unreachable (); + } + + rtx +-default_function_incoming_arg (cumulative_args_t ca ATTRIBUTE_UNUSED, +- machine_mode mode ATTRIBUTE_UNUSED, +- const_tree type ATTRIBUTE_UNUSED, +- bool named ATTRIBUTE_UNUSED) ++default_function_incoming_arg (cumulative_args_t, const function_arg_info &) + { + gcc_unreachable (); + } +@@ -1061,12 +1030,6 @@ default_return_pops_args (tree, tree, poly_int64) + return 0; + } + +-reg_class_t +-default_branch_target_register_class (void) +-{ +- return NO_REGS; +-} +- + reg_class_t + default_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED, + reg_class_t cl, +@@ -1732,9 +1695,9 @@ get_move_ratio (bool speed_p ATTRIBUTE_UNUSED) + #ifdef MOVE_RATIO + move_ratio = (unsigned int) MOVE_RATIO (speed_p); + #else +-#if defined (HAVE_movmemqi) || defined (HAVE_movmemhi) || defined (HAVE_movmemsi) || defined (HAVE_movmemdi) || defined (HAVE_movmemti) ++#if defined (HAVE_cpymemqi) || defined (HAVE_cpymemhi) || defined (HAVE_cpymemsi) || defined (HAVE_cpymemdi) || defined (HAVE_cpymemti) + move_ratio = 2; +-#else /* No movmem patterns, pick a default. */ ++#else /* No cpymem patterns, pick a default. */ + move_ratio = ((speed_p) ? 15 : 3); + #endif + #endif +@@ -1742,7 +1705,7 @@ get_move_ratio (bool speed_p ATTRIBUTE_UNUSED) + } + + /* Return TRUE if the move_by_pieces/set_by_pieces infrastructure should be +- used; return FALSE if the movmem/setmem optab should be expanded, or ++ used; return FALSE if the cpymem/setmem optab should be expanded, or + a call to memcpy emitted. */ + + bool +@@ -1941,7 +1904,7 @@ default_dwarf_frame_reg_mode (int regno) + { + machine_mode save_mode = reg_raw_mode[regno]; + +- if (targetm.hard_regno_call_part_clobbered (NULL, regno, save_mode)) ++ if (targetm.hard_regno_call_part_clobbered (0, regno, save_mode)) + save_mode = choose_hard_reg_mode (regno, 1, true); + return save_mode; + } +@@ -2163,7 +2126,7 @@ std_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, + if (ARGS_GROW_DOWNWARD) + gcc_unreachable (); + +- indirect = pass_by_reference (NULL, TYPE_MODE (type), type, false); ++ indirect = pass_va_arg_by_reference (type); + if (indirect) + type = build_pointer_type (type); + +@@ -2260,15 +2223,6 @@ std_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, + return build_va_arg_indirect_ref (addr); + } + +-void +-default_setup_incoming_vararg_bounds (cumulative_args_t ca ATTRIBUTE_UNUSED, +- machine_mode mode ATTRIBUTE_UNUSED, +- tree type ATTRIBUTE_UNUSED, +- int *pretend_arg_size ATTRIBUTE_UNUSED, +- int second_time ATTRIBUTE_UNUSED) +-{ +-} +- + /* An implementation of TARGET_CAN_USE_DOLOOP_P for targets that do + not support nested low-overhead loops. */ + +@@ -2385,9 +2339,4 @@ default_speculation_safe_value (machine_mode mode ATTRIBUTE_UNUSED, + return result; + } + +-void +-default_remove_extra_call_preserved_regs (rtx_insn *, HARD_REG_SET *) +-{ +-} +- + #include "gt-targhooks.h" +diff --git a/gcc/targhooks.h b/gcc/targhooks.h +index 2d5991908..e5e803c33 100644 +--- a/gcc/targhooks.h ++++ b/gcc/targhooks.h +@@ -40,7 +40,9 @@ extern machine_mode default_cc_modes_compatible (machine_mode, + extern bool default_return_in_memory (const_tree, const_tree); + + extern rtx default_expand_builtin_saveregs (void); +-extern void default_setup_incoming_varargs (cumulative_args_t, machine_mode, tree, int *, int); ++extern void default_setup_incoming_varargs (cumulative_args_t, ++ const function_arg_info &, ++ int *, int); + extern rtx default_builtin_setjmp_frame_value (void); + extern bool default_pretend_outgoing_varargs_named (cumulative_args_t); + +@@ -63,9 +65,9 @@ extern tree default_cxx_guard_type (void); + extern tree default_cxx_get_cookie_size (tree); + + extern bool hook_pass_by_reference_must_pass_in_stack +- (cumulative_args_t, machine_mode mode, const_tree, bool); ++ (cumulative_args_t, const function_arg_info &); + extern bool hook_callee_copies_named +- (cumulative_args_t ca, machine_mode, const_tree, bool); ++ (cumulative_args_t ca, const function_arg_info &); + + extern void default_print_operand (FILE *, rtx, int); + extern void default_print_operand_address (FILE *, machine_mode, rtx); +@@ -90,8 +92,6 @@ extern const char * default_invalid_within_doloop (const rtx_insn *); + extern tree default_builtin_vectorized_function (unsigned int, tree, tree); + extern tree default_builtin_md_vectorized_function (tree, tree, tree); + +-extern tree default_builtin_vectorized_conversion (unsigned int, tree, tree); +- + extern int default_builtin_vectorization_cost (enum vect_cost_for_stmt, tree, int); + + extern tree default_builtin_reciprocal (tree); +@@ -135,24 +135,23 @@ extern void default_goacc_reduction (gcall *); + extern bool hook_bool_CUMULATIVE_ARGS_false (cumulative_args_t); + extern bool hook_bool_CUMULATIVE_ARGS_true (cumulative_args_t); + +-extern bool hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false +- (cumulative_args_t, machine_mode, const_tree, bool); +-extern bool hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true +- (cumulative_args_t, machine_mode, const_tree, bool); +-extern int hook_int_CUMULATIVE_ARGS_mode_tree_bool_0 +- (cumulative_args_t, machine_mode, tree, bool); ++extern bool hook_bool_CUMULATIVE_ARGS_arg_info_false ++ (cumulative_args_t, const function_arg_info &); ++extern bool hook_bool_CUMULATIVE_ARGS_arg_info_true ++ (cumulative_args_t, const function_arg_info &); ++extern int hook_int_CUMULATIVE_ARGS_arg_info_0 ++ (cumulative_args_t, const function_arg_info &); + extern void hook_void_CUMULATIVE_ARGS_tree + (cumulative_args_t, tree); + extern const char *hook_invalid_arg_for_unprototyped_fn + (const_tree, const_tree, const_tree); + extern void default_function_arg_advance +- (cumulative_args_t, machine_mode, const_tree, bool); ++ (cumulative_args_t, const function_arg_info &); + extern HOST_WIDE_INT default_function_arg_offset (machine_mode, const_tree); + extern pad_direction default_function_arg_padding (machine_mode, const_tree); +-extern rtx default_function_arg +- (cumulative_args_t, machine_mode, const_tree, bool); +-extern rtx default_function_incoming_arg +- (cumulative_args_t, machine_mode, const_tree, bool); ++extern rtx default_function_arg (cumulative_args_t, const function_arg_info &); ++extern rtx default_function_incoming_arg (cumulative_args_t, ++ const function_arg_info &); + extern unsigned int default_function_arg_boundary (machine_mode, + const_tree); + extern unsigned int default_function_arg_round_boundary (machine_mode, +@@ -165,7 +164,6 @@ extern rtx default_internal_arg_pointer (void); + extern rtx default_static_chain (const_tree, bool); + extern void default_trampoline_init (rtx, tree, rtx); + extern poly_int64 default_return_pops_args (tree, tree, poly_int64); +-extern reg_class_t default_branch_target_register_class (void); + extern reg_class_t default_ira_change_pseudo_allocno_class (int, reg_class_t, + reg_class_t); + extern bool default_lra_p (void); +@@ -266,11 +264,6 @@ extern rtx default_load_bounds_for_arg (rtx, rtx, rtx); + extern void default_store_bounds_for_arg (rtx, rtx, rtx, rtx); + extern rtx default_load_returned_bounds (rtx); + extern void default_store_returned_bounds (rtx,rtx); +-extern void default_setup_incoming_vararg_bounds (cumulative_args_t ca ATTRIBUTE_UNUSED, +- machine_mode mode ATTRIBUTE_UNUSED, +- tree type ATTRIBUTE_UNUSED, +- int *pretend_arg_size ATTRIBUTE_UNUSED, +- int second_time ATTRIBUTE_UNUSED); + extern bool default_optab_supported_p (int, machine_mode, machine_mode, + optimization_type); + extern unsigned int default_max_noce_ifcvt_seq_cost (edge); +@@ -287,7 +280,5 @@ extern tree default_preferred_else_value (unsigned, tree, unsigned, tree *); + extern bool default_have_speculation_safe_value (bool); + extern bool speculation_safe_value_not_needed (bool); + extern rtx default_speculation_safe_value (machine_mode, rtx, rtx, rtx); +-extern void default_remove_extra_call_preserved_regs (rtx_insn *, +- HARD_REG_SET *); + + #endif /* GCC_TARGHOOKS_H */ +diff --git a/gcc/testsuite/c-c++-common/guality/Og-dce-1.c b/gcc/testsuite/c-c++-common/guality/Og-dce-1.c +new file mode 100644 +index 000000000..a859e3252 +--- /dev/null ++++ b/gcc/testsuite/c-c++-common/guality/Og-dce-1.c +@@ -0,0 +1,14 @@ ++/* { dg-do run } */ ++/* { dg-options "-g" } */ ++ ++int *__attribute__((noipa)) consume (int *ptr) { return ptr; } ++ ++int ++main (void) ++{ ++ int x; ++ int *volatile ptr = consume (&x); ++ x = 0; ++ x = 1; /* { dg-final { gdb-test . "*ptr" "0" } } */ ++ return 0; /* { dg-final { gdb-test . "*ptr" "1" } } */ ++} +diff --git a/gcc/testsuite/c-c++-common/guality/Og-dce-2.c b/gcc/testsuite/c-c++-common/guality/Og-dce-2.c +new file mode 100644 +index 000000000..3df2c7921 +--- /dev/null ++++ b/gcc/testsuite/c-c++-common/guality/Og-dce-2.c +@@ -0,0 +1,19 @@ ++/* { dg-do run } */ ++/* { dg-options "-g" } */ ++ ++struct s { int a, b, c, d; }; ++ ++struct s gs1 = { 1, 2, 3, 4 }; ++struct s gs2 = { 5, 6, 7, 8 }; ++ ++struct s *__attribute__((noipa)) consume (struct s *ptr) { return ptr; } ++ ++int ++main (void) ++{ ++ struct s x; ++ struct s *volatile ptr = consume (&x); ++ x = gs1; ++ x = gs2; /* { dg-final { gdb-test . "ptr->a" "1" } } */ ++ return 0; /* { dg-final { gdb-test . "ptr->a" "5" } } */ ++} +diff --git a/gcc/testsuite/c-c++-common/guality/Og-dce-3.c b/gcc/testsuite/c-c++-common/guality/Og-dce-3.c +new file mode 100644 +index 000000000..fa6186a73 +--- /dev/null ++++ b/gcc/testsuite/c-c++-common/guality/Og-dce-3.c +@@ -0,0 +1,29 @@ ++/* { dg-do run } */ ++/* { dg-options "-g" } */ ++ ++volatile int amount = 10; ++ ++void __attribute__((noipa)) ++do_something (int *ptr) ++{ ++ *ptr += 10; ++} ++ ++int __attribute__((noipa)) ++foo (int count) ++{ ++ int x = 1; ++ for (int i = 0; i < count; ++i) ++ do_something (&x); /* { dg-final { gdb-test . "x" "1" } } */ ++ int res = x; /* { dg-final { gdb-test . "x" "101" } } */ ++ x = res + 1; ++ return res; /* { dg-final { gdb-test . "x" "102" } } */ ++ ++} ++ ++int ++main (void) ++{ ++ foo (10); ++ return 0; ++} +diff --git a/gcc/testsuite/c-c++-common/guality/Og-global-dse-1.c b/gcc/testsuite/c-c++-common/guality/Og-global-dse-1.c +new file mode 100644 +index 000000000..3d4b4e60e +--- /dev/null ++++ b/gcc/testsuite/c-c++-common/guality/Og-global-dse-1.c +@@ -0,0 +1,17 @@ ++/* { dg-do run } */ ++/* { dg-options "-g" } */ ++ ++struct s { int i, j; }; ++struct s gs1, gs2 = { 3, 4 }; ++ ++void __attribute__((noipa)) consume (void) {}; ++ ++int ++main (void) ++{ ++ gs1.i = 1; ++ gs1.j = 2; /* { dg-final { gdb-test . "gs1.i" "1" } } */ ++ gs1 = gs2; /* { dg-final { gdb-test . "gs1.j" "2" } } */ ++ consume (); /* { dg-final { gdb-test . "gs1.i" "3" } } */ ++ return 0; /* { dg-final { gdb-test . "gs1.j" "4" } } */ ++} +diff --git a/gcc/testsuite/c-c++-common/guality/Og-static-wo-1.c b/gcc/testsuite/c-c++-common/guality/Og-static-wo-1.c +new file mode 100644 +index 000000000..a4c7f3067 +--- /dev/null ++++ b/gcc/testsuite/c-c++-common/guality/Og-static-wo-1.c +@@ -0,0 +1,15 @@ ++/* { dg-do run } */ ++/* { dg-options "-g" } */ ++ ++#include "../../gcc.dg/nop.h" ++ ++static int x = 0; ++ ++int ++main (void) ++{ ++ asm volatile (NOP); /* { dg-final { gdb-test . "x" "0" } } */ ++ x = 1; ++ asm volatile (NOP); /* { dg-final { gdb-test . "x" "1" } } */ ++ return 0; ++} +diff --git a/gcc/testsuite/g++.dg/abi/mangle-neon-aarch64.C b/gcc/testsuite/g++.dg/abi/mangle-neon-aarch64.C +index 5740c0281..50c1452ed 100644 +--- a/gcc/testsuite/g++.dg/abi/mangle-neon-aarch64.C ++++ b/gcc/testsuite/g++.dg/abi/mangle-neon-aarch64.C +@@ -14,6 +14,7 @@ void f4 (uint16x4_t a) {} + void f5 (uint32x2_t a) {} + void f23 (uint64x1_t a) {} + void f61 (float16x4_t a) {} ++void f62 (bfloat16x4_t a) {} + void f6 (float32x2_t a) {} + void f7 (poly8x8_t a) {} + void f8 (poly16x4_t a) {} +@@ -27,6 +28,7 @@ void f14 (uint16x8_t a) {} + void f15 (uint32x4_t a) {} + void f16 (uint64x2_t a) {} + void f171 (float16x8_t a) {} ++void f172 (bfloat16x8_t a) {} + void f17 (float32x4_t a) {} + void f18 (float64x2_t a) {} + void f19 (poly8x16_t a) {} +@@ -45,6 +47,7 @@ void g1 (int8x16_t, int8x16_t) {} + // { dg-final { scan-assembler "_Z2f512__Uint32x2_t:" } } + // { dg-final { scan-assembler "_Z3f2312__Uint64x1_t:" } } + // { dg-final { scan-assembler "_Z3f6113__Float16x4_t:" } } ++// { dg-final { scan-assembler "_Z3f6214__Bfloat16x4_t:" } } + // { dg-final { scan-assembler "_Z2f613__Float32x2_t:" } } + // { dg-final { scan-assembler "_Z2f711__Poly8x8_t:" } } + // { dg-final { scan-assembler "_Z2f812__Poly16x4_t:" } } +@@ -57,6 +60,7 @@ void g1 (int8x16_t, int8x16_t) {} + // { dg-final { scan-assembler "_Z3f1512__Uint32x4_t:" } } + // { dg-final { scan-assembler "_Z3f1612__Uint64x2_t:" } } + // { dg-final { scan-assembler "_Z4f17113__Float16x8_t:" } } ++// { dg-final { scan-assembler "_Z4f17214__Bfloat16x8_t:" } } + // { dg-final { scan-assembler "_Z3f1713__Float32x4_t:" } } + // { dg-final { scan-assembler "_Z3f1813__Float64x2_t:" } } + // { dg-final { scan-assembler "_Z3f1912__Poly8x16_t:" } } +diff --git a/gcc/testsuite/g++.dg/diagnostic/aka4.C b/gcc/testsuite/g++.dg/diagnostic/aka4.C +new file mode 100644 +index 000000000..da8c57964 +--- /dev/null ++++ b/gcc/testsuite/g++.dg/diagnostic/aka4.C +@@ -0,0 +1,9 @@ ++typedef unsigned int myvec __attribute__((vector_size (16))); ++ ++void f (float x) ++{ ++ myvec y = x; // { dg-error {cannot convert 'float' to 'myvec' {aka '__vector\([48]\) unsigned int'} in initialization} } ++ myvec *ptr = &x; // { dg-error {cannot convert 'float\*' to 'myvec\*' {aka '__vector\([48]\) unsigned int\*'} in initialization} } ++ const myvec *const_ptr = &x; // { dg-error {cannot convert 'float\*' to 'const myvec\*' {aka 'const __vector\([48]\) unsigned int\*'} in initialization} } ++ volatile myvec *volatile_ptr = &x; // { dg-error {cannot convert 'float\*' to 'volatile myvec\*' {aka 'volatile __vector\([48]\) unsigned int\*'} in initialization} } ++} +diff --git a/gcc/testsuite/g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C b/gcc/testsuite/g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C +new file mode 100644 +index 000000000..5426a1814 +--- /dev/null ++++ b/gcc/testsuite/g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C +@@ -0,0 +1,13 @@ ++/* { dg-do compile { target aarch64*-*-* } } */ ++ ++/* Test mangling */ ++ ++/* { dg-final { scan-assembler "\t.global\t_Z1fPu6__bf16" } } */ ++void f (__bf16 *x) { } ++ ++/* { dg-final { scan-assembler "\t.global\t_Z1gPu6__bf16S_" } } */ ++void g (__bf16 *x, __bf16 *y) { } ++ ++/* { dg-final { scan-assembler "\t.global\t_ZN1SIu6__bf16u6__bf16E1iE" } } */ ++template struct S { static int i; }; ++template <> int S<__bf16, __bf16>::i = 3; +diff --git a/gcc/testsuite/g++.dg/guality/guality.exp b/gcc/testsuite/g++.dg/guality/guality.exp +index 757b20b61..33571f1f2 100644 +--- a/gcc/testsuite/g++.dg/guality/guality.exp ++++ b/gcc/testsuite/g++.dg/guality/guality.exp +@@ -65,8 +65,22 @@ if {[check_guality " + return 0; + } + "]} { +- gcc-dg-runtest [lsort [glob $srcdir/$subdir/*.C]] "" "" +- gcc-dg-runtest [lsort [glob $srcdir/c-c++-common/guality/*.c]] "" "" ++ set general [list] ++ set Og [list] ++ foreach file [lsort [glob $srcdir/c-c++-common/guality/*.c]] { ++ switch -glob -- [file tail $file] { ++ Og-* { lappend Og $file } ++ * { lappend general $file } ++ } ++ } ++ ++ gcc-dg-runtest [lsort [glob $srcdir/$subdir/*.C]] "" "" ++ gcc-dg-runtest $general "" "" ++ set-torture-options \ ++ [list "-O0" "-Og"] \ ++ [list {}] \ ++ [list "-Og -flto"] ++ gcc-dg-runtest $Og "" "" + } + + if [info exists guality_gdb_name] { +diff --git a/gcc/testsuite/g++.dg/ipa/pr93763.C b/gcc/testsuite/g++.dg/ipa/pr93763.C +index 61117108e..13ab2d57f 100644 +--- a/gcc/testsuite/g++.dg/ipa/pr93763.C ++++ b/gcc/testsuite/g++.dg/ipa/pr93763.C +@@ -1,4 +1,4 @@ +-/* { dg-do compile } */ ++/* { dg-do compile { target c++11 } } */ + /* { dg-options "-O3" } */ + + struct search_param { +diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr53844.C b/gcc/testsuite/g++.dg/tree-ssa/pr53844.C +index 954cc71b4..ab9879f6a 100644 +--- a/gcc/testsuite/g++.dg/tree-ssa/pr53844.C ++++ b/gcc/testsuite/g++.dg/tree-ssa/pr53844.C +@@ -1,5 +1,5 @@ + // { dg-do compile } +-// { dg-options "-O2 -fdump-tree-optimized-vops" } ++// { dg-options "-O2 -fdump-tree-optimized-vops -fno-inline-functions --param max-inline-insns-single-O2=200" } + + struct VBase; + +diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr61034.C b/gcc/testsuite/g++.dg/tree-ssa/pr61034.C +index 870b23721..2e3dfecac 100644 +--- a/gcc/testsuite/g++.dg/tree-ssa/pr61034.C ++++ b/gcc/testsuite/g++.dg/tree-ssa/pr61034.C +@@ -1,5 +1,5 @@ + // { dg-do compile } +-// { dg-options "-O2 -fdump-tree-fre3 -fdump-tree-optimized -fdelete-null-pointer-checks" } ++// { dg-options "-O2 -fdump-tree-fre3 -fdump-tree-optimized -fdelete-null-pointer-checks --param early-inlining-insns-O2=14" } + + #define assume(x) if(!(x))__builtin_unreachable() + +diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr8781.C b/gcc/testsuite/g++.dg/tree-ssa/pr8781.C +index 1f115b2b2..5bc1ef035 100644 +--- a/gcc/testsuite/g++.dg/tree-ssa/pr8781.C ++++ b/gcc/testsuite/g++.dg/tree-ssa/pr8781.C +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O -fno-tree-sra -fdump-tree-fre1" } */ ++/* { dg-options "-O -fno-tree-sra -fdump-tree-fre1 --param early-inlining-insns-O2=14" } */ + + int f(); + +diff --git a/gcc/testsuite/g++.dg/warn/Wstringop-truncation-1.C b/gcc/testsuite/g++.dg/warn/Wstringop-truncation-1.C +index 830660197..49dde0a65 100644 +--- a/gcc/testsuite/g++.dg/warn/Wstringop-truncation-1.C ++++ b/gcc/testsuite/g++.dg/warn/Wstringop-truncation-1.C +@@ -1,7 +1,7 @@ + /* PR/tree-optimization/84480 - bogus -Wstringop-truncation despite + assignment with an inlined string literal + { dg-do compile } +- { dg-options "-O2 -Wstringop-truncation" } */ ++ { dg-options "-O2 -Wstringop-truncation --param early-inlining-insns-O2=14" } */ + + #include + +diff --git a/gcc/testsuite/g++.target/aarch64/bfloat_cpp_typecheck.C b/gcc/testsuite/g++.target/aarch64/bfloat_cpp_typecheck.C +new file mode 100644 +index 000000000..9203d91f8 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/bfloat_cpp_typecheck.C +@@ -0,0 +1,14 @@ ++/* { dg-do assemble { target { aarch64*-*-* } } } */ ++/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ ++/* { dg-add-options arm_v8_2a_bf16_neon } */ ++/* { dg-additional-options "-O3 --save-temps" } */ ++ ++#include ++ ++void foo (void) ++{ ++ bfloat16_t (); /* { dg-bogus {invalid conversion to type 'bfloat16_t'} "" { xfail *-*-* } } */ ++ bfloat16_t a = bfloat16_t(); /* { dg-bogus {invalid conversion to type 'bfloat16_t'} "" { xfail *-*-* } } */ ++ bfloat16_t (0x1234); /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ bfloat16_t (0.1); /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++} +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/aarch64-sve-acle-asm.exp b/gcc/testsuite/g++.target/aarch64/sve/acle/aarch64-sve-acle-asm.exp +new file mode 100644 +index 000000000..e9d624ff8 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/aarch64-sve-acle-asm.exp +@@ -0,0 +1,83 @@ ++# Assembly-based regression-test driver for the SVE ACLE ++# Copyright (C) 2009-2019 Free Software Foundation, Inc. ++# ++# This file is part of GCC. ++# ++# GCC is free software; you can redistribute it and/or modify it ++# under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 3, or (at your option) ++# any later version. ++# ++# GCC is distributed in the hope that it will be useful, but ++# WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++# General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with GCC; see the file COPYING3. If not see ++# . */ ++ ++# GCC testsuite that uses the `dg.exp' driver. ++ ++# Exit immediately if this isn't an AArch64 target. ++if { ![istarget aarch64*-*-*] } { ++ return ++} ++ ++# Load support procs. ++load_lib g++-dg.exp ++ ++# Initialize `dg'. ++dg-init ++ ++# Force SVE if we're not testing it already. ++if { [check_effective_target_aarch64_sve] } { ++ set sve_flags "" ++} else { ++ set sve_flags "-march=armv8.2-a+sve" ++} ++ ++global gcc_runtest_parallelize_limit_minor ++if { [info exists gcc_runtest_parallelize_limit_minor] } { ++ set old_limit_minor $gcc_runtest_parallelize_limit_minor ++ set gcc_runtest_parallelize_limit_minor 1 ++} ++ ++torture-init ++set-torture-options { ++ "-std=c++98 -O0 -g" ++ "-std=c++98 -O1 -g" ++ "-std=c++11 -O2 -g" ++ "-std=c++14 -O3 -g" ++ "-std=c++17 -Og -g" ++ "-std=c++2a -Os -g" ++ "-std=gnu++98 -O2 -fno-schedule-insns -DCHECK_ASM --save-temps" ++ "-std=gnu++11 -Ofast -g" ++ "-std=gnu++17 -O3 -g" ++ "-std=gnu++2a -O0 -g" ++} { ++ "-DTEST_FULL" ++ "-DTEST_OVERLOADS" ++} ++ ++# Main loop. ++set gcc_subdir [string replace $subdir 0 2 gcc] ++set files [glob -nocomplain $srcdir/$gcc_subdir/asm/*.c] ++set save-dg-do-what-default ${dg-do-what-default} ++if { [check_effective_target_aarch64_asm_sve_ok] ++ && [check_effective_target_aarch64_variant_pcs] } { ++ set dg-do-what-default assemble ++} else { ++ set dg-do-what-default compile ++} ++gcc-dg-runtest [lsort $files] "" "$sve_flags -fno-ipa-icf" ++set dg-do-what-default ${save-dg-do-what-default} ++ ++torture-finish ++ ++if { [info exists gcc_runtest_parallelize_limit_minor] } { ++ set gcc_runtest_parallelize_limit_minor $old_limit_minor ++} ++ ++# All done. ++dg-finish +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/aarch64-sve-acle.exp b/gcc/testsuite/g++.target/aarch64/sve/acle/aarch64-sve-acle.exp +new file mode 100644 +index 000000000..54c43a3ac +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/aarch64-sve-acle.exp +@@ -0,0 +1,55 @@ ++# Specific regression driver for AArch64 SVE. ++# Copyright (C) 2009-2019 Free Software Foundation, Inc. ++# Contributed by ARM Ltd. ++# ++# This file is part of GCC. ++# ++# GCC is free software; you can redistribute it and/or modify it ++# under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 3, or (at your option) ++# any later version. ++# ++# GCC is distributed in the hope that it will be useful, but ++# WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++# General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with GCC; see the file COPYING3. If not see ++# . */ ++ ++# GCC testsuite that uses the `dg.exp' driver. ++ ++# Exit immediately if this isn't an AArch64 target. ++if {![istarget aarch64*-*-*] } { ++ return ++} ++ ++# Load support procs. ++load_lib g++-dg.exp ++ ++# If a testcase doesn't have special options, use these. ++global DEFAULT_CXXFLAGS ++if ![info exists DEFAULT_CXXFLAGS] then { ++ set DEFAULT_CXXFLAGS " -pedantic-errors -Wno-long-long" ++} ++ ++# Initialize `dg'. ++dg-init ++ ++# Force SVE if we're not testing it already. ++if { [check_effective_target_aarch64_sve] } { ++ set sve_flags "" ++} else { ++ set sve_flags "-march=armv8.2-a+sve" ++} ++ ++# Main loop. ++set gcc_subdir [string replace $subdir 0 2 gcc] ++set files [glob -nocomplain \ ++ "$srcdir/$gcc_subdir/general/*.c" \ ++ "$srcdir/$subdir/general-c++/*.\[cC\]"] ++dg-runtest [lsort $files] "$sve_flags" $DEFAULT_CXXFLAGS ++ ++# All done. ++dg-finish +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/add_1.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/add_1.C +new file mode 100644 +index 000000000..44aa10e20 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/add_1.C +@@ -0,0 +1,9 @@ ++/* { dg-do compile } */ ++ ++#include "add_1.h" ++ ++svuint8_t ++f1 (svbool_t pg, svuint8_t x, svint8_t y) ++{ ++ return svadd_u8_x (pg, x, y); /* { dg-error "cannot convert 'svint8_t' to 'svuint8_t'" } */ ++} +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/add_1.h b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/add_1.h +new file mode 100644 +index 000000000..d441328a3 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/add_1.h +@@ -0,0 +1,2 @@ ++#pragma GCC system_header ++#pragma GCC aarch64 "arm_sve.h" /* { dg-message "initializing argument 3" } */ +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/add_2.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/add_2.C +new file mode 100644 +index 000000000..fcfb0f489 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/add_2.C +@@ -0,0 +1,14 @@ ++/* { dg-do compile } */ ++ ++#include "add_2.h" ++ ++void ++f1 (svbool_t pg, svuint8_t x, svint8_t y) ++{ ++ svadd_x (pg, x); /* { dg-error {no matching function for call to 'svadd_x\(svbool_t&, svuint8_t&\)'} } */ ++ svadd_x (pg, x, x, x); /* { dg-error {no matching function for call to 'svadd_x\(svbool_t&, svuint8_t&, svuint8_t&, svuint8_t&\)'} } */ ++ svadd_x (x, x, x); /* { dg-error {no matching function for call to 'svadd_x\(svuint8_t&, svuint8_t&, svuint8_t&\)'} } */ ++ svadd_x (pg, pg, pg); /* { dg-error {no matching function for call to 'svadd_x\(svbool_t&, svbool_t&, svbool_t&\)'} } */ ++ svadd_x (pg, 1, x); /* { dg-error {no matching function for call to 'svadd_x\(svbool_t&, int, svuint8_t&\)'} } */ ++ svadd_x (pg, x, y); /* { dg-error {no matching function for call to 'svadd_x\(svbool_t&, svuint8_t&, svint8_t&\)'} } */ ++} +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/add_2.h b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/add_2.h +new file mode 100644 +index 000000000..2b3a520d3 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/add_2.h +@@ -0,0 +1,9 @@ ++#pragma GCC system_header ++#pragma GCC aarch64 "arm_sve.h" ++/* { dg-message {note: candidate: 'svfloat16_t svadd_x\(svbool_t, svfloat16_t, svfloat16_t\)'} "" { target *-*-* } 3 } */ ++/* { dg-message {note: *candidate expects 3 arguments, 2 provided} "" { target *-*-* } 3 } */ ++/* { dg-message {note: *candidate expects 3 arguments, 4 provided} "" { target *-*-* } 3 } */ ++/* { dg-message {note: *no known conversion for argument 1 from 'svuint8_t' to 'svbool_t'} "" { target *-*-* } 3 } */ ++/* { dg-message {note: *no known conversion for argument 2 from 'svbool_t' to 'svfloat16_t'} "" { target *-*-* } 3 } */ ++/* { dg-message {note: *no known conversion for argument 2 from 'int' to 'svfloat16_t'} "" { target *-*-* } 3 } */ ++/* { dg-message {note: *no known conversion for argument 2 from 'svuint8_t' to 'svfloat16_t'} "" { target *-*-* } 3 } */ +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/add_3.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/add_3.C +new file mode 100644 +index 000000000..1d811fc76 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/add_3.C +@@ -0,0 +1,20 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fdump-tree-optimized -fnon-call-exceptions" } */ ++ ++#include ++ ++svint8_t ++foo (svbool_t pg, svint8_t a, svint8_t b) ++{ ++ try ++ { ++ a = svadd_m (pg, a, b); ++ } ++ catch (...) ++ { ++ a = b; ++ } ++ return a; ++} ++ ++/* { dg-final { scan-tree-dump-not {__cxa_begin_catch} "optimized" } } */ +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/asrd_1.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/asrd_1.C +new file mode 100644 +index 000000000..a73934f56 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/asrd_1.C +@@ -0,0 +1,39 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-std=c++11 -Wall -Wextra" } */ ++ ++#include ++ ++constexpr uint64_t const_add (uint64_t a, uint64_t b) { return a + b; } ++uint64_t add (uint64_t a, uint64_t b) { return a + b; } ++ ++void ++f1 (svbool_t pg, svuint8_t u8, svint8_t s8, svint16_t s16, ++ svint32_t s32, svint64_t s64, int x) ++{ ++ const int one = 1; ++ u8 = svasrd_x (pg, u8, 1); /* { dg-error {no matching function for call to 'svasrd_x\(svbool_t&, svuint8_t&, [^)]*\)'} } */ ++ s8 = svasrd_x (pg, s8, x); /* { dg-error "argument 3 of 'svasrd_x' must be an integer constant expression" } */ ++ s8 = svasrd_x (pg, s8, one); ++ s8 = svasrd_x (pg, s8, 0.4); /* { dg-error {passing 0 to argument 3 of 'svasrd_x', which expects a value in the range \[1, 8\]} } */ ++ s8 = svasrd_x (pg, s8, 1.0); ++ s8 = svasrd_x (pg, s8, 0); /* { dg-error {passing 0 to argument 3 of 'svasrd_x', which expects a value in the range \[1, 8\]} } */ ++ s8 = svasrd_x (pg, s8, 1); ++ s8 = svasrd_x (pg, s8, 1 + 1); ++ s8 = svasrd_x (pg, s8, const_add (1, 1)); ++ s8 = svasrd_x (pg, s8, add (1, 1)); /* { dg-error "argument 3 of 'svasrd_x' must be an integer constant expression" } */ ++ s8 = svasrd_x (pg, s8, 8); ++ s8 = svasrd_x (pg, s8, 9); /* { dg-error {passing 9 to argument 3 of 'svasrd_x', which expects a value in the range \[1, 8\]} } */ ++ s8 = svasrd_x (pg, s8, (uint64_t (1) << 62) + 1); /* { dg-error {passing [^ ]* to argument 3 of 'svasrd_x', which expects a value in the range \[1, 8\]} } */ ++ s16 = svasrd_x (pg, s16, 0); /* { dg-error {passing 0 to argument 3 of 'svasrd_x', which expects a value in the range \[1, 16\]} } */ ++ s16 = svasrd_x (pg, s16, 1); ++ s16 = svasrd_x (pg, s16, 16); ++ s16 = svasrd_x (pg, s16, 17); /* { dg-error {passing 17 to argument 3 of 'svasrd_x', which expects a value in the range \[1, 16\]} } */ ++ s32 = svasrd_x (pg, s32, 0); /* { dg-error {passing 0 to argument 3 of 'svasrd_x', which expects a value in the range \[1, 32\]} } */ ++ s32 = svasrd_x (pg, s32, 1); ++ s32 = svasrd_x (pg, s32, 32); ++ s32 = svasrd_x (pg, s32, 33); /* { dg-error {passing 33 to argument 3 of 'svasrd_x', which expects a value in the range \[1, 32\]} } */ ++ s64 = svasrd_x (pg, s64, 0); /* { dg-error {passing 0 to argument 3 of 'svasrd_x', which expects a value in the range \[1, 64\]} } */ ++ s64 = svasrd_x (pg, s64, 1); ++ s64 = svasrd_x (pg, s64, 64); ++ s64 = svasrd_x (pg, s64, 65); /* { dg-error {passing 65 to argument 3 of 'svasrd_x', which expects a value in the range \[1, 64\]} } */ ++} +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/asrd_2.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/asrd_2.C +new file mode 100644 +index 000000000..bbe7ba72b +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/asrd_2.C +@@ -0,0 +1,38 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-std=c++11 -Wall -Wextra" } */ ++ ++#include ++ ++constexpr uint64_t const_add (uint64_t a, uint64_t b) { return a + b; } ++uint64_t add (uint64_t a, uint64_t b) { return a + b; } ++ ++void ++f1 (svbool_t pg, svint8_t s8, svint16_t s16, svint32_t s32, svint64_t s64, ++ int x) ++{ ++ const int one = 1; ++ s8 = svasrd_n_s8_x (pg, s8, x); /* { dg-error "argument 3 of 'svasrd_n_s8_x' must be an integer constant expression" } */ ++ s8 = svasrd_n_s8_x (pg, s8, one); ++ s8 = svasrd_n_s8_x (pg, s8, 0.4); /* { dg-error {passing 0 to argument 3 of 'svasrd_n_s8_x', which expects a value in the range \[1, 8\]} } */ ++ s8 = svasrd_n_s8_x (pg, s8, 1.0); ++ s8 = svasrd_n_s8_x (pg, s8, 0); /* { dg-error {passing 0 to argument 3 of 'svasrd_n_s8_x', which expects a value in the range \[1, 8\]} } */ ++ s8 = svasrd_n_s8_x (pg, s8, 1); ++ s8 = svasrd_n_s8_x (pg, s8, 1 + 1); ++ s8 = svasrd_n_s8_x (pg, s8, const_add (1, 1)); ++ s8 = svasrd_n_s8_x (pg, s8, add (1, 1)); /* { dg-error "argument 3 of 'svasrd_n_s8_x' must be an integer constant expression" } */ ++ s8 = svasrd_n_s8_x (pg, s8, 8); ++ s8 = svasrd_n_s8_x (pg, s8, 9); /* { dg-error {passing 9 to argument 3 of 'svasrd_n_s8_x', which expects a value in the range \[1, 8\]} } */ ++ s8 = svasrd_n_s8_x (pg, s8, (uint64_t (1) << 62) + 1); /* { dg-error {passing [^ ]* to argument 3 of 'svasrd_n_s8_x', which expects a value in the range \[1, 8\]} } */ ++ s16 = svasrd_n_s16_x (pg, s16, 0); /* { dg-error {passing 0 to argument 3 of 'svasrd_n_s16_x', which expects a value in the range \[1, 16\]} } */ ++ s16 = svasrd_n_s16_x (pg, s16, 1); ++ s16 = svasrd_n_s16_x (pg, s16, 16); ++ s16 = svasrd_n_s16_x (pg, s16, 17); /* { dg-error {passing 17 to argument 3 of 'svasrd_n_s16_x', which expects a value in the range \[1, 16\]} } */ ++ s32 = svasrd_n_s32_x (pg, s32, 0); /* { dg-error {passing 0 to argument 3 of 'svasrd_n_s32_x', which expects a value in the range \[1, 32\]} } */ ++ s32 = svasrd_n_s32_x (pg, s32, 1); ++ s32 = svasrd_n_s32_x (pg, s32, 32); ++ s32 = svasrd_n_s32_x (pg, s32, 33); /* { dg-error {passing 33 to argument 3 of 'svasrd_n_s32_x', which expects a value in the range \[1, 32\]} } */ ++ s64 = svasrd_n_s64_x (pg, s64, 0); /* { dg-error {passing 0 to argument 3 of 'svasrd_n_s64_x', which expects a value in the range \[1, 64\]} } */ ++ s64 = svasrd_n_s64_x (pg, s64, 1); ++ s64 = svasrd_n_s64_x (pg, s64, 64); ++ s64 = svasrd_n_s64_x (pg, s64, 65); /* { dg-error {passing 65 to argument 3 of 'svasrd_n_s64_x', which expects a value in the range \[1, 64\]} } */ ++} +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/asrd_3.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/asrd_3.C +new file mode 100644 +index 000000000..5ebd770b2 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/asrd_3.C +@@ -0,0 +1,51 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-std=c++11 -Wall -Wextra" } */ ++ ++#include ++ ++constexpr uint64_t const_add (uint64_t a, uint64_t b) { return a + b; } ++uint64_t add (uint64_t a, uint64_t b) { return a + b; } ++ ++template ++T shift (svbool_t pg, T v) { return svasrd_x (pg, v, N); } ++/* { dg-error {no matching function for call to 'svasrd_x\(svbool_t&,} "" { target *-*-* } .-1 } */ ++/* { dg-error {passing 0 to argument 3 of 'svasrd_x', which expects a value in the range \[1, 8\]} "" { target *-*-* } .-2 } */ ++/* { dg-error {passing 9 to argument 3 of 'svasrd_x', which expects a value in the range \[1, 8\]} "" { target *-*-* } .-3 } */ ++/* { dg-error {passing 0 to argument 3 of 'svasrd_x', which expects a value in the range \[1, 16\]} "" { target *-*-* } .-4 } */ ++/* { dg-error {passing 17 to argument 3 of 'svasrd_x', which expects a value in the range \[1, 16\]} "" { target *-*-* } .-5 } */ ++/* { dg-error {passing 0 to argument 3 of 'svasrd_x', which expects a value in the range \[1, 32\]} "" { target *-*-* } .-6 } */ ++/* { dg-error {passing 33 to argument 3 of 'svasrd_x', which expects a value in the range \[1, 32\]} "" { target *-*-* } .-7 } */ ++/* { dg-error {passing 0 to argument 3 of 'svasrd_x', which expects a value in the range \[1, 64\]} "" { target *-*-* } .-8 } */ ++/* { dg-error {passing 65 to argument 3 of 'svasrd_x', which expects a value in the range \[1, 64\]} "" { target *-*-* } .-9 } */ ++ ++template ++T shift1 (svbool_t pg, T v, uint64_t n) { return svasrd_x (pg, v, n); } ++ ++template ++T shift2 (svbool_t pg, T v, uint64_t n) { return svasrd_x (pg, v, n); } ++/* { dg-error {argument 3 of 'svasrd_x' must be an integer constant expression} "" { target *-*-* } .-1 } */ ++ ++void ++f1 (svbool_t pg, svuint8_t u8, svint8_t s8, svint16_t s16, ++ svint32_t s32, svint64_t s64) ++{ ++ u8 = shift <1> (pg, u8); ++ s8 = shift <0> (pg, s8); ++ s8 = shift <1> (pg, s8); ++ s8 = shift <8> (pg, s8); ++ s8 = shift <9> (pg, s8); ++ s16 = shift <0> (pg, s16); ++ s16 = shift <1> (pg, s16); ++ s16 = shift <16> (pg, s16); ++ s16 = shift <17> (pg, s16); ++ s32 = shift <0> (pg, s32); ++ s32 = shift <1> (pg, s32); ++ s32 = shift <32> (pg, s32); ++ s32 = shift <33> (pg, s32); ++ s64 = shift <0> (pg, s64); ++ s64 = shift <1> (pg, s64); ++ s64 = shift <64> (pg, s64); ++ s64 = shift <65> (pg, s64); ++ ++ s8 = shift2 (pg, s8, 1); ++} +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/cntb_pat.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/cntb_pat.c +new file mode 100644 +index 000000000..bbc9f9010 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/cntb_pat.c +@@ -0,0 +1,45 @@ ++/* { dg-do compile } */ ++ ++#include ++ ++void ++test (svpattern pat, int i) ++{ ++ svcntb_pat (pat); /* { dg-error "argument 1 of 'svcntb_pat' must be an integer constant expression" } */ ++ svcntb_pat (i); /* { dg-error "invalid conversion from 'int' to 'svpattern'" } */ ++ /* { dg-error "argument 1 of 'svcntb_pat' must be an integer constant expression" "" { target *-*-* } .-1 } */ ++ svcntb_pat ((svpattern) -1); /* { dg-error "passing 4294967295 to argument 1 of 'svcntb_pat', which expects a valid 'svpattern' value" } */ ++ svcntb_pat ((svpattern) 0); ++ svcntb_pat ((svpattern) 1); ++ svcntb_pat ((svpattern) 2); ++ svcntb_pat ((svpattern) 3); ++ svcntb_pat ((svpattern) 4); ++ svcntb_pat ((svpattern) 5); ++ svcntb_pat ((svpattern) 6); ++ svcntb_pat ((svpattern) 7); ++ svcntb_pat ((svpattern) 8); ++ svcntb_pat ((svpattern) 9); ++ svcntb_pat ((svpattern) 10); ++ svcntb_pat ((svpattern) 11); ++ svcntb_pat ((svpattern) 12); ++ svcntb_pat ((svpattern) 13); ++ svcntb_pat ((svpattern) 14); /* { dg-error "passing 14 to argument 1 of 'svcntb_pat', which expects a valid 'svpattern' value" } */ ++ svcntb_pat ((svpattern) 15); /* { dg-error "passing 15 to argument 1 of 'svcntb_pat', which expects a valid 'svpattern' value" } */ ++ svcntb_pat ((svpattern) 16); /* { dg-error "passing 16 to argument 1 of 'svcntb_pat', which expects a valid 'svpattern' value" } */ ++ svcntb_pat ((svpattern) 17); /* { dg-error "passing 17 to argument 1 of 'svcntb_pat', which expects a valid 'svpattern' value" } */ ++ svcntb_pat ((svpattern) 18); /* { dg-error "passing 18 to argument 1 of 'svcntb_pat', which expects a valid 'svpattern' value" } */ ++ svcntb_pat ((svpattern) 19); /* { dg-error "passing 19 to argument 1 of 'svcntb_pat', which expects a valid 'svpattern' value" } */ ++ svcntb_pat ((svpattern) 20); /* { dg-error "passing 20 to argument 1 of 'svcntb_pat', which expects a valid 'svpattern' value" } */ ++ svcntb_pat ((svpattern) 21); /* { dg-error "passing 21 to argument 1 of 'svcntb_pat', which expects a valid 'svpattern' value" } */ ++ svcntb_pat ((svpattern) 22); /* { dg-error "passing 22 to argument 1 of 'svcntb_pat', which expects a valid 'svpattern' value" } */ ++ svcntb_pat ((svpattern) 23); /* { dg-error "passing 23 to argument 1 of 'svcntb_pat', which expects a valid 'svpattern' value" } */ ++ svcntb_pat ((svpattern) 24); /* { dg-error "passing 24 to argument 1 of 'svcntb_pat', which expects a valid 'svpattern' value" } */ ++ svcntb_pat ((svpattern) 25); /* { dg-error "passing 25 to argument 1 of 'svcntb_pat', which expects a valid 'svpattern' value" } */ ++ svcntb_pat ((svpattern) 26); /* { dg-error "passing 26 to argument 1 of 'svcntb_pat', which expects a valid 'svpattern' value" } */ ++ svcntb_pat ((svpattern) 27); /* { dg-error "passing 27 to argument 1 of 'svcntb_pat', which expects a valid 'svpattern' value" } */ ++ svcntb_pat ((svpattern) 28); /* { dg-error "passing 28 to argument 1 of 'svcntb_pat', which expects a valid 'svpattern' value" } */ ++ svcntb_pat ((svpattern) 29); ++ svcntb_pat ((svpattern) 30); ++ svcntb_pat ((svpattern) 31); ++ svcntb_pat ((svpattern) 32); /* { dg-error "passing 32 to argument 1 of 'svcntb_pat', which expects a valid 'svpattern' value" } */ ++} +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/conversion_1.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/conversion_1.C +new file mode 100644 +index 000000000..1b939cdf7 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/conversion_1.C +@@ -0,0 +1,20 @@ ++/* { dg-do compile } */ ++ ++#include ++ ++template ++struct S ++{ ++ S(T); ++ operator T() const; ++ void *base; ++}; ++ ++void f(svbool_t pg, const S &u8a, const S &u8b, ++ const S &s8a) ++{ ++ svadd_x(pg, u8a, u8b); ++ svadd_x(pg, u8a, 1); ++ svadd_x(pg, s8a, u8b); // { dg-error "no matching function for call" } ++ svadd_x(pg, s8a, 1); ++} +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/create2_1.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/create2_1.C +new file mode 100644 +index 000000000..247fd85ec +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/create2_1.C +@@ -0,0 +1,17 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-Wall -Wextra" } */ ++ ++#include ++ ++void ++f1 (svuint8x2_t *ptr, svbool_t pg, svuint8_t u8, svfloat64_t f64, ++ svuint8x2_t u8x2) ++{ ++ *ptr = svcreate2 (u8); /* { dg-error {no matching function for call to 'svcreate2\(svuint8_t\&\)'} } */ ++ *ptr = svcreate2 (u8, u8, u8); /* { dg-error {no matching function for call to 'svcreate2\(svuint8_t\&, svuint8_t\&, svuint8_t\&\)'} } */ ++ *ptr = svcreate2 (u8x2, u8x2); /* { dg-error {no matching function for call to 'svcreate2\(svuint8x2_t\&, svuint8x2_t\&\)'} } */ ++ *ptr = svcreate2 (u8, f64); /* { dg-error {no matching function for call to 'svcreate2\(svuint8_t\&, svfloat64_t\&\)'} } */ ++ *ptr = svcreate2 (u8, pg); /* { dg-error {no matching function for call to 'svcreate2\(svuint8_t\&, svbool_t\&\)'} } */ ++ *ptr = svcreate2 (u8, u8); ++ *ptr = svcreate2 (f64, f64); /* { dg-error {cannot convert 'svfloat64x2_t' to 'svuint8x2_t' in assignment} } */ ++} +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/create2_2.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/create2_2.C +new file mode 100644 +index 000000000..10f3231fa +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/create2_2.C +@@ -0,0 +1,17 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-Wall -Wextra" } */ ++ ++#include ++ ++void ++f1 (svuint8x2_t *ptr, svbool_t pg, svuint8_t u8, svfloat64_t f64, ++ svuint8x2_t u8x2) ++{ ++ *ptr = svcreate2_u8 (u8); /* { dg-error {too few arguments to function '[^']*'} } */ ++ *ptr = svcreate2_u8 (u8, u8, u8); /* { dg-error {too many arguments to function '[^']*'} } */ ++ *ptr = svcreate2_u8 (u8x2, u8x2); /* { dg-error {cannot convert 'svuint8x2_t' to 'svuint8_t'} } */ ++ *ptr = svcreate2_u8 (u8, f64); /* { dg-error {cannot convert 'svfloat64_t' to 'svuint8_t'} } */ ++ *ptr = svcreate2_u8 (pg, u8); /* { dg-error {cannot convert 'svbool_t' to 'svuint8_t'} } */ ++ *ptr = svcreate2_u8 (u8, u8); ++ *ptr = svcreate2_f64 (f64, f64); /* { dg-error {cannot convert 'svfloat64x2_t' to 'svuint8x2_t' in assignment} } */ ++} +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/create3_1.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/create3_1.C +new file mode 100644 +index 000000000..ff013634d +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/create3_1.C +@@ -0,0 +1,18 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-Wall -Wextra" } */ ++ ++#include ++ ++void ++f1 (svfloat16x3_t *ptr, svbool_t pg, svfloat16_t f16, svfloat64_t f64, ++ svfloat16x3_t f16x3) ++{ ++ *ptr = svcreate3 (f16); /* { dg-error {no matching function for call to 'svcreate3\(svfloat16_t\&\)'} } */ ++ *ptr = svcreate3 (f16, f16); /* { dg-error {no matching function for call to 'svcreate3\(svfloat16_t\&, svfloat16_t\&\)'} } */ ++ *ptr = svcreate3 (f16, f16, f16, f16); /* { dg-error {no matching function for call to 'svcreate3\(svfloat16_t\&, svfloat16_t\&, svfloat16_t\&, svfloat16_t\&\)'} } */ ++ *ptr = svcreate3 (f16x3, f16x3, f16x3); /* { dg-error {no matching function for call to 'svcreate3\(svfloat16x3_t\&, svfloat16x3_t\&, svfloat16x3_t\&\)'} } */ ++ *ptr = svcreate3 (f16, f16, f64); /* { dg-error {no matching function for call to 'svcreate3\(svfloat16_t\&, svfloat16_t\&, svfloat64_t\&\)'} } */ ++ *ptr = svcreate3 (f16, pg, f16); /* { dg-error {no matching function for call to 'svcreate3\(svfloat16_t\&, svbool_t\&, svfloat16_t\&\)'} } */ ++ *ptr = svcreate3 (f16, f16, f16); ++ *ptr = svcreate3 (f64, f64, f64); /* { dg-error {cannot convert 'svfloat64x3_t' to 'svfloat16x3_t' in assignment} } */ ++} +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/create3_2.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/create3_2.C +new file mode 100644 +index 000000000..07a72b1e2 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/create3_2.C +@@ -0,0 +1,18 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-Wall -Wextra" } */ ++ ++#include ++ ++void ++f1 (svfloat16x3_t *ptr, svbool_t pg, svfloat16_t f16, svfloat64_t f64, ++ svfloat16x3_t f16x3) ++{ ++ *ptr = svcreate3_f16 (f16); /* { dg-error {too few arguments to function '[^']*'} } */ ++ *ptr = svcreate3_f16 (f16, f16); /* { dg-error {too few arguments to function '[^']*'} } */ ++ *ptr = svcreate3_f16 (f16, f16, f16, f16); /* { dg-error {too many arguments to function '[^']*'} } */ ++ *ptr = svcreate3_f16 (f16x3, f16x3, f16x3); /* { dg-error {cannot convert 'svfloat16x3_t' to 'svfloat16_t'} } */ ++ *ptr = svcreate3_f16 (f16, f16, f64); /* { dg-error {cannot convert 'svfloat64_t' to 'svfloat16_t'} } */ ++ *ptr = svcreate3_f16 (f16, pg, f16); /* { dg-error {cannot convert 'svbool_t' to 'svfloat16_t'} } */ ++ *ptr = svcreate3_f16 (f16, f16, f16); ++ *ptr = svcreate3_f64 (f64, f64, f64); /* { dg-error {cannot convert 'svfloat64x3_t' to 'svfloat16x3_t' in assignment} } */ ++} +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/create4_1.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/create4_1.C +new file mode 100644 +index 000000000..2785d9011 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/create4_1.C +@@ -0,0 +1,19 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-Wall -Wextra" } */ ++ ++#include ++ ++void ++f1 (svint32x4_t *ptr, svbool_t pg, svint32_t s32, svfloat64_t f64, ++ svint32x4_t s32x4) ++{ ++ *ptr = svcreate4 (s32); /* { dg-error {no matching function for call to 'svcreate4\(svint32_t\&\)'} } */ ++ *ptr = svcreate4 (s32, s32); /* { dg-error {no matching function for call to 'svcreate4\(svint32_t\&, svint32_t\&\)'} } */ ++ *ptr = svcreate4 (s32, s32, s32); /* { dg-error {no matching function for call to 'svcreate4\(svint32_t\&, svint32_t\&, svint32_t\&\)'} } */ ++ *ptr = svcreate4 (s32, s32, s32, s32, s32); /* { dg-error {no matching function for call to 'svcreate4\(svint32_t\&, svint32_t\&, svint32_t\&, svint32_t\&, svint32_t\&\)'} } */ ++ *ptr = svcreate4 (s32x4, s32x4, s32x4, s32x4); /* { dg-error {no matching function for call to 'svcreate4\(svint32x4_t\&, svint32x4_t\&, svint32x4_t\&, svint32x4_t\&\)'} } */ ++ *ptr = svcreate4 (s32, s32, s32, f64); /* { dg-error {no matching function for call to 'svcreate4\(svint32_t\&, svint32_t\&, svint32_t\&, svfloat64_t\&\)'} } */ ++ *ptr = svcreate4 (s32, pg, s32, s32); /* { dg-error {no matching function for call to 'svcreate4\(svint32_t\&, svbool_t\&, svint32_t\&, svint32_t\&\)'} } */ ++ *ptr = svcreate4 (s32, s32, s32, s32); ++ *ptr = svcreate4 (f64, f64, f64, f64); /* { dg-error {cannot convert 'svfloat64x4_t' to 'svint32x4_t' in assignment} } */ ++} +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/create4_2.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/create4_2.C +new file mode 100644 +index 000000000..68f21a1d4 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/create4_2.C +@@ -0,0 +1,19 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-Wall -Wextra" } */ ++ ++#include ++ ++void ++f1 (svint32x4_t *ptr, svbool_t pg, svint32_t s32, svfloat64_t f64, ++ svint32x4_t s32x4) ++{ ++ *ptr = svcreate4_s32 (s32); /* { dg-error {too few arguments to function '[^']*'} } */ ++ *ptr = svcreate4_s32 (s32, s32); /* { dg-error {too few arguments to function '[^']*'} } */ ++ *ptr = svcreate4_s32 (s32, s32, s32); /* { dg-error {too few arguments to function '[^']*'} } */ ++ *ptr = svcreate4_s32 (s32, s32, s32, s32, s32); /* { dg-error {too many arguments to function '[^']*'} } */ ++ *ptr = svcreate4_s32 (s32x4, s32x4, s32x4, s32x4); /* { dg-error {cannot convert 'svint32x4_t' to 'svint32_t'} } */ ++ *ptr = svcreate4_s32 (s32, s32, s32, f64); /* { dg-error {cannot convert 'svfloat64_t' to 'svint32_t'} } */ ++ *ptr = svcreate4_s32 (s32, pg, s32, s32); /* { dg-error {cannot convert 'svbool_t' to 'svint32_t'} } */ ++ *ptr = svcreate4_s32 (s32, s32, s32, s32); ++ *ptr = svcreate4_f64 (f64, f64, f64, f64); /* { dg-error {cannot convert 'svfloat64x4_t' to 'svint32x4_t' in assignment} } */ ++} +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/dot_1.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/dot_1.C +new file mode 100644 +index 000000000..93397c82f +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/dot_1.C +@@ -0,0 +1,9 @@ ++/* { dg-do compile } */ ++ ++#include "dot_1.h" ++ ++svuint32_t ++f1 (svuint32_t x, svint8_t y, svuint8_t z) ++{ ++ return svdot_u32 (x, y, z); /* { dg-error "cannot convert 'svint8_t' to 'svuint8_t'" } */ ++} +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/dot_1.h b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/dot_1.h +new file mode 100644 +index 000000000..aef02f20b +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/dot_1.h +@@ -0,0 +1,2 @@ ++#pragma GCC system_header ++#pragma GCC aarch64 "arm_sve.h" /* { dg-message "initializing argument 2" } */ +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/dot_2.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/dot_2.C +new file mode 100644 +index 000000000..2084ed828 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/dot_2.C +@@ -0,0 +1,12 @@ ++/* { dg-do compile } */ ++ ++#include "dot_2.h" ++ ++void ++f1 (svuint32_t x, svint8_t y, svuint8_t z) ++{ ++ svdot (x, y); /* { dg-error {no matching function for call to 'svdot\(svuint32_t&, svint8_t&\)'} } */ ++ svdot (x, x, x); /* { dg-error {no matching function for call to 'svdot\(svuint32_t&, svuint32_t&, svuint32_t&\)'} } */ ++ svdot (1, z, z); /* { dg-error {no matching function for call to 'svdot\(int, svuint8_t&, svuint8_t&\)'} } */ ++ svdot (x, y, z); /* { dg-error {no matching function for call to 'svdot\(svuint32_t&, svint8_t&, svuint8_t&\)'} } */ ++} +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/dot_2.h b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/dot_2.h +new file mode 100644 +index 000000000..3e4a9c794 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/dot_2.h +@@ -0,0 +1,7 @@ ++#pragma GCC system_header ++#pragma GCC aarch64 "arm_sve.h" ++/* { dg-message {note: candidate: 'svuint32_t svdot\(svuint32_t, svuint8_t, svuint8_t\)'} "" { target *-*-* } 3 } */ ++/* { dg-message {note: *candidate expects 3 arguments, 2 provided} "" { target *-*-* } 3 } */ ++/* { dg-message {note: *no known conversion for argument 2 from 'svuint32_t' to 'svuint8_t'} "" { target *-*-* } 3 } */ ++/* { dg-message {note: *no known conversion for argument 1 from 'int' to 'svuint32_t'} "" { target *-*-* } 3 } */ ++/* { dg-message {note: *no known conversion for argument 2 from 'svint8_t' to 'svuint8_t'} "" { target *-*-* } 3 } */ +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_1.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_1.c +new file mode 100644 +index 000000000..8f18810c0 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_1.c +@@ -0,0 +1,5 @@ ++/* { dg-do compile } */ ++ ++int svadd_n_u8_x; /* { dg-message "note: previous declaration 'int svadd_n_u8_x'" } */ ++ ++#pragma GCC aarch64 "arm_sve.h" /* { dg-error {'svuint8_t svadd_n_u8_x\(svbool_t, svuint8_t, [^)\n]*\)' redeclared as different kind of entity} } */ +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_2.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_2.c +new file mode 100644 +index 000000000..a67f9f756 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_2.c +@@ -0,0 +1,5 @@ ++/* { dg-do compile } */ ++ ++int svadd_n_u8_x = 1; /* { dg-message "note: previous declaration 'int svadd_n_u8_x'" } */ ++ ++#pragma GCC aarch64 "arm_sve.h" /* { dg-error {'svuint8_t svadd_n_u8_x\(svbool_t, svuint8_t, [^)\n]*\)' redeclared as different kind of entity} } */ +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_3.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_3.c +new file mode 100644 +index 000000000..74b820fe6 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_3.c +@@ -0,0 +1,7 @@ ++/* { dg-do compile } */ ++ ++/* Although not supported, there's nothing to stop the user overloading ++ the sv* functions. */ ++extern __SVInt8_t svadd_u8_x (__SVBool_t, __SVInt8_t, __SVInt8_t); ++ ++#pragma GCC aarch64 "arm_sve.h" +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_4.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_4.c +new file mode 100644 +index 000000000..9591e3d01 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_4.c +@@ -0,0 +1,9 @@ ++/* { dg-do compile } */ ++ ++/* Although somewhat suspect, this isn't actively wrong, and doesn't need ++ to be diagnosed. Any attempt to call the function before including ++ arm_sve.h will lead to a link failure. (Same for taking its address, ++ etc.) */ ++extern __SVUint8_t svadd_u8_x (__SVBool_t, __SVUint8_t, __SVUint8_t); ++ ++#pragma GCC aarch64 "arm_sve.h" +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_5.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_5.c +new file mode 100644 +index 000000000..f87201984 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_5.c +@@ -0,0 +1,15 @@ ++/* { dg-do compile } */ ++ ++__SVUint8_t ++svadd_u8_x (__SVBool_t pg, __SVUint8_t x, __SVUint8_t y) ++{ ++ return x; ++} ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++svuint8_t ++f (svbool_t pg, svuint8_t x, svuint8_t y) ++{ ++ return svadd_u8_x (pg, x, y); ++} +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_6.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_6.c +new file mode 100644 +index 000000000..a65e0d65c +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_6.c +@@ -0,0 +1,5 @@ ++/* { dg-do compile } */ ++ ++typedef int svadd_u8_x; /* { dg-message "note: previous declaration 'typedef int svadd_u8_x'" } */ ++ ++#pragma GCC aarch64 "arm_sve.h" /* { dg-error {'svuint8_t svadd_u8_x\(svbool_t, svuint8_t, svuint8_t\)' redeclared as different kind of entity} } */ +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_7.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_7.c +new file mode 100644 +index 000000000..1f2e4bf66 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_7.c +@@ -0,0 +1,15 @@ ++/* { dg-do compile } */ ++ ++__SVUint8_t ++svadd_x (__SVBool_t pg, __SVUint8_t x, __SVUint8_t y) ++{ ++ return x; ++} ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++svuint8_t ++f (svbool_t pg, svuint8_t x, svuint8_t y) ++{ ++ return svadd_x (pg, x, y); ++} +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/get2_1.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/get2_1.C +new file mode 100644 +index 000000000..8d6bb2307 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/get2_1.C +@@ -0,0 +1,39 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-std=c++11 -Wall -Wextra" } */ ++ ++#include ++ ++constexpr uint64_t const_sub (uint64_t a, uint64_t b) { return a - b; } ++uint64_t add (uint64_t a, uint64_t b) { return a + b; } ++ ++svfloat64_t ++f1 (svbool_t pg, svuint8_t u8, svuint8x2_t u8x2, svuint8x3_t u8x3, int x) ++{ ++ const int one = 1; ++ svfloat64_t f64; ++ ++ u8 = svget2 (u8x2); /* { dg-error {no matching function for call to 'svget2\(svuint8x2_t\&\)'} } */ ++ u8 = svget2 (u8x2, 1, 2); /* { dg-error {no matching function for call to 'svget2\(svuint8x2_t\&, int, int\)'} } */ ++ u8 = svget2 (u8, 0); /* { dg-error {no matching function for call to 'svget2\(svuint8_t\&, int\)'} } */ ++ u8 = svget2 (u8x3, 0); /* { dg-error {no matching function for call to 'svget2\(svuint8x3_t\&, int\)'} } */ ++ u8 = svget2 (pg, 0); /* { dg-error {no matching function for call to 'svget2\(svbool_t\&, int\)'} } */ ++ u8 = svget2 (u8x2, x); /* { dg-error "argument 2 of 'svget2' must be an integer constant expression" } */ ++ u8 = svget2 (u8x2, 0); ++ f64 = svget2 (u8x2, 0); /* { dg-error "cannot convert 'svuint8_t' to 'svfloat64_t' in assignment" } */ ++ u8 = svget2 (u8x2, 1); ++ u8 = svget2 (u8x2, 2); /* { dg-error {passing 2 to argument 2 of 'svget2', which expects a value in the range \[0, 1\]} } */ ++ u8 = svget2 (u8x2, 3); /* { dg-error {passing 3 to argument 2 of 'svget2', which expects a value in the range \[0, 1\]} } */ ++ u8 = svget2 (u8x2, 4); /* { dg-error {passing 4 to argument 2 of 'svget2', which expects a value in the range \[0, 1\]} } */ ++ u8 = svget2 (u8x2, 5); /* { dg-error {passing 5 to argument 2 of 'svget2', which expects a value in the range \[0, 1\]} } */ ++ u8 = svget2 (u8x2, ~0U); /* { dg-error {passing [^ ]* to argument 2 of 'svget2', which expects a value in the range \[0, 1\]} } */ ++ u8 = svget2 (u8x2, one); ++ u8 = svget2 (u8x2, 3 - 2); ++ u8 = svget2 (u8x2, 1.0); ++ u8 = svget2 (u8x2, const_sub (5, 4)); ++ u8 = svget2 (u8x2, const_sub (6, 4)); /* { dg-error {passing 2 to argument 2 of 'svget2', which expects a value in the range \[0, 1\]} } */ ++ u8 = svget2 (u8x2, const_sub (7, 4)); /* { dg-error {passing 3 to argument 2 of 'svget2', which expects a value in the range \[0, 1\]} } */ ++ u8 = svget2 (u8x2, const_sub (8, 4)); /* { dg-error {passing 4 to argument 2 of 'svget2', which expects a value in the range \[0, 1\]} } */ ++ u8 = svget2 (u8x2, add (0, 0)); /* { dg-error "argument 2 of 'svget2' must be an integer constant expression" } */ ++ ++ return f64; ++} +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/get2_2.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/get2_2.C +new file mode 100644 +index 000000000..9c7674be1 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/get2_2.C +@@ -0,0 +1,39 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-std=c++11 -Wall -Wextra" } */ ++ ++#include ++ ++constexpr uint64_t const_sub (uint64_t a, uint64_t b) { return a - b; } ++uint64_t add (uint64_t a, uint64_t b) { return a + b; } ++ ++svfloat64_t ++f1 (svbool_t pg, svuint8_t u8, svuint8x2_t u8x2, svuint8x3_t u8x3, int x) ++{ ++ const int one = 1; ++ svfloat64_t f64; ++ ++ u8 = svget2_u8 (u8x2); /* { dg-error {too few arguments to function '[^']*'} } */ ++ u8 = svget2_u8 (u8x2, 1, 2); /* { dg-error {too many arguments to function '[^']*'} } */ ++ u8 = svget2_u8 (u8, 0); /* { dg-error {cannot convert 'svuint8_t' to 'svuint8x2_t'} } */ ++ u8 = svget2_u8 (u8x3, 0); /* { dg-error {cannot convert 'svuint8x3_t' to 'svuint8x2_t'} } */ ++ u8 = svget2_u8 (pg, 0); /* { dg-error {cannot convert 'svbool_t' to 'svuint8x2_t'} } */ ++ u8 = svget2_u8 (u8x2, x); /* { dg-error "argument 2 of 'svget2_u8' must be an integer constant expression" } */ ++ u8 = svget2_u8 (u8x2, 0); ++ f64 = svget2_u8 (u8x2, 0); /* { dg-error "cannot convert 'svuint8_t' to 'svfloat64_t' in assignment" } */ ++ u8 = svget2_u8 (u8x2, 1); ++ u8 = svget2_u8 (u8x2, 2); /* { dg-error {passing 2 to argument 2 of 'svget2_u8', which expects a value in the range \[0, 1\]} } */ ++ u8 = svget2_u8 (u8x2, 3); /* { dg-error {passing 3 to argument 2 of 'svget2_u8', which expects a value in the range \[0, 1\]} } */ ++ u8 = svget2_u8 (u8x2, 4); /* { dg-error {passing 4 to argument 2 of 'svget2_u8', which expects a value in the range \[0, 1\]} } */ ++ u8 = svget2_u8 (u8x2, 5); /* { dg-error {passing 5 to argument 2 of 'svget2_u8', which expects a value in the range \[0, 1\]} } */ ++ u8 = svget2_u8 (u8x2, ~0U); /* { dg-error {passing [^ ]* to argument 2 of 'svget2_u8', which expects a value in the range \[0, 1\]} } */ ++ u8 = svget2_u8 (u8x2, one); ++ u8 = svget2_u8 (u8x2, 3 - 2); ++ u8 = svget2_u8 (u8x2, 1.0); ++ u8 = svget2_u8 (u8x2, const_sub (5, 4)); ++ u8 = svget2_u8 (u8x2, const_sub (6, 4)); /* { dg-error {passing 2 to argument 2 of 'svget2_u8', which expects a value in the range \[0, 1\]} } */ ++ u8 = svget2_u8 (u8x2, const_sub (7, 4)); /* { dg-error {passing 3 to argument 2 of 'svget2_u8', which expects a value in the range \[0, 1\]} } */ ++ u8 = svget2_u8 (u8x2, const_sub (8, 4)); /* { dg-error {passing 4 to argument 2 of 'svget2_u8', which expects a value in the range \[0, 1\]} } */ ++ u8 = svget2_u8 (u8x2, add (0, 0)); /* { dg-error "argument 2 of 'svget2_u8' must be an integer constant expression" } */ ++ ++ return f64; ++} +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/get3_1.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/get3_1.C +new file mode 100644 +index 000000000..bd8808a8b +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/get3_1.C +@@ -0,0 +1,40 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-std=c++11 -Wall -Wextra" } */ ++ ++#include ++ ++constexpr uint64_t const_sub (uint64_t a, uint64_t b) { return a - b; } ++uint64_t add (uint64_t a, uint64_t b) { return a + b; } ++ ++svfloat64_t ++f1 (svbool_t pg, svfloat16_t f16, svfloat16x3_t f16x3, svfloat16x4_t f16x4, ++ int x) ++{ ++ const int one = 1; ++ svfloat64_t f64; ++ ++ f16 = svget3 (f16x3); /* { dg-error {no matching function for call to 'svget3\(svfloat16x3_t\&\)'} } */ ++ f16 = svget3 (f16x3, 1, 2); /* { dg-error {no matching function for call to 'svget3\(svfloat16x3_t\&, int, int\)'} } */ ++ f16 = svget3 (f16, 0); /* { dg-error {no matching function for call to 'svget3\(svfloat16_t\&, int\)'} } */ ++ f16 = svget3 (f16x4, 0); /* { dg-error {no matching function for call to 'svget3\(svfloat16x4_t\&, int\)'} } */ ++ f16 = svget3 (pg, 0); /* { dg-error {no matching function for call to 'svget3\(svbool_t\&, int\)'} } */ ++ f16 = svget3 (f16x3, x); /* { dg-error "argument 2 of 'svget3' must be an integer constant expression" } */ ++ f16 = svget3 (f16x3, 0); ++ f64 = svget3 (f16x3, 0); /* { dg-error "cannot convert 'svfloat16_t' to 'svfloat64_t' in assignment" } */ ++ f16 = svget3 (f16x3, 1); ++ f16 = svget3 (f16x3, 2); ++ f16 = svget3 (f16x3, 3); /* { dg-error {passing 3 to argument 2 of 'svget3', which expects a value in the range \[0, 2\]} } */ ++ f16 = svget3 (f16x3, 4); /* { dg-error {passing 4 to argument 2 of 'svget3', which expects a value in the range \[0, 2\]} } */ ++ f16 = svget3 (f16x3, 5); /* { dg-error {passing 5 to argument 2 of 'svget3', which expects a value in the range \[0, 2\]} } */ ++ f16 = svget3 (f16x3, ~0U); /* { dg-error {passing [^ ]* to argument 2 of 'svget3', which expects a value in the range \[0, 2\]} } */ ++ f16 = svget3 (f16x3, one); ++ f16 = svget3 (f16x3, 3 - 2); ++ f16 = svget3 (f16x3, 1.0); ++ f16 = svget3 (f16x3, const_sub (5, 4)); ++ f16 = svget3 (f16x3, const_sub (6, 4)); ++ f16 = svget3 (f16x3, const_sub (7, 4)); /* { dg-error {passing 3 to argument 2 of 'svget3', which expects a value in the range \[0, 2\]} } */ ++ f16 = svget3 (f16x3, const_sub (8, 4)); /* { dg-error {passing 4 to argument 2 of 'svget3', which expects a value in the range \[0, 2\]} } */ ++ f16 = svget3 (f16x3, add (0, 0)); /* { dg-error "argument 2 of 'svget3' must be an integer constant expression" } */ ++ ++ return f64; ++} +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/get3_2.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/get3_2.C +new file mode 100644 +index 000000000..d526947d1 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/get3_2.C +@@ -0,0 +1,40 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-std=c++11 -Wall -Wextra" } */ ++ ++#include ++ ++constexpr uint64_t const_sub (uint64_t a, uint64_t b) { return a - b; } ++uint64_t add (uint64_t a, uint64_t b) { return a + b; } ++ ++svfloat64_t ++f1 (svbool_t pg, svfloat16_t f16, svfloat16x3_t f16x3, svfloat16x4_t f16x4, ++ int x) ++{ ++ const int one = 1; ++ svfloat64_t f64; ++ ++ f16 = svget3_f16 (f16x3); /* { dg-error {too few arguments to function '[^']*'} } */ ++ f16 = svget3_f16 (f16x3, 1, 2); /* { dg-error {too many arguments to function '[^']*'} } */ ++ f16 = svget3_f16 (f16, 0); /* { dg-error {cannot convert 'svfloat16_t' to 'svfloat16x3_t'} } */ ++ f16 = svget3_f16 (f16x4, 0); /* { dg-error {cannot convert 'svfloat16x4_t' to 'svfloat16x3_t'} } */ ++ f16 = svget3_f16 (pg, 0); /* { dg-error {cannot convert 'svbool_t' to 'svfloat16x3_t'} } */ ++ f16 = svget3_f16 (f16x3, x); /* { dg-error "argument 2 of 'svget3_f16' must be an integer constant expression" } */ ++ f16 = svget3_f16 (f16x3, 0); ++ f64 = svget3_f16 (f16x3, 0); /* { dg-error "cannot convert 'svfloat16_t' to 'svfloat64_t' in assignment" } */ ++ f16 = svget3_f16 (f16x3, 1); ++ f16 = svget3_f16 (f16x3, 2); ++ f16 = svget3_f16 (f16x3, 3); /* { dg-error {passing 3 to argument 2 of 'svget3_f16', which expects a value in the range \[0, 2\]} } */ ++ f16 = svget3_f16 (f16x3, 4); /* { dg-error {passing 4 to argument 2 of 'svget3_f16', which expects a value in the range \[0, 2\]} } */ ++ f16 = svget3_f16 (f16x3, 5); /* { dg-error {passing 5 to argument 2 of 'svget3_f16', which expects a value in the range \[0, 2\]} } */ ++ f16 = svget3_f16 (f16x3, ~0U); /* { dg-error {passing [^ ]* to argument 2 of 'svget3_f16', which expects a value in the range \[0, 2\]} } */ ++ f16 = svget3_f16 (f16x3, one); ++ f16 = svget3_f16 (f16x3, 3 - 2); ++ f16 = svget3_f16 (f16x3, 1.0); ++ f16 = svget3_f16 (f16x3, const_sub (5, 4)); ++ f16 = svget3_f16 (f16x3, const_sub (6, 4)); ++ f16 = svget3_f16 (f16x3, const_sub (7, 4)); /* { dg-error {passing 3 to argument 2 of 'svget3_f16', which expects a value in the range \[0, 2\]} } */ ++ f16 = svget3_f16 (f16x3, const_sub (8, 4)); /* { dg-error {passing 4 to argument 2 of 'svget3_f16', which expects a value in the range \[0, 2\]} } */ ++ f16 = svget3_f16 (f16x3, add (0, 0)); /* { dg-error "argument 2 of 'svget3_f16' must be an integer constant expression" } */ ++ ++ return f64; ++} +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/get4_1.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/get4_1.C +new file mode 100644 +index 000000000..19853dece +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/get4_1.C +@@ -0,0 +1,39 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-std=c++11 -Wall -Wextra" } */ ++ ++#include ++ ++constexpr uint64_t const_sub (uint64_t a, uint64_t b) { return a - b; } ++uint64_t add (uint64_t a, uint64_t b) { return a + b; } ++ ++svfloat64_t ++f1 (svbool_t pg, svint32_t s32, svint32x4_t s32x4, svint32x2_t s32x2, int x) ++{ ++ const int one = 1; ++ svfloat64_t f64; ++ ++ s32 = svget4 (s32x4); /* { dg-error {no matching function for call to 'svget4\(svint32x4_t\&\)'} } */ ++ s32 = svget4 (s32x4, 1, 2); /* { dg-error {no matching function for call to 'svget4\(svint32x4_t\&, int, int\)'} } */ ++ s32 = svget4 (s32, 0); /* { dg-error {no matching function for call to 'svget4\(svint32_t\&, int\)'} } */ ++ s32 = svget4 (s32x2, 0); /* { dg-error {no matching function for call to 'svget4\(svint32x2_t\&, int\)'} } */ ++ s32 = svget4 (pg, 0); /* { dg-error {no matching function for call to 'svget4\(svbool_t\&, int\)'} } */ ++ s32 = svget4 (s32x4, x); /* { dg-error "argument 2 of 'svget4' must be an integer constant expression" } */ ++ s32 = svget4 (s32x4, 0); ++ f64 = svget4 (s32x4, 0); /* { dg-error "cannot convert 'svint32_t' to 'svfloat64_t' in assignment" } */ ++ s32 = svget4 (s32x4, 1); ++ s32 = svget4 (s32x4, 2); ++ s32 = svget4 (s32x4, 3); ++ s32 = svget4 (s32x4, 4); /* { dg-error {passing 4 to argument 2 of 'svget4', which expects a value in the range \[0, 3\]} } */ ++ s32 = svget4 (s32x4, 5); /* { dg-error {passing 5 to argument 2 of 'svget4', which expects a value in the range \[0, 3\]} } */ ++ s32 = svget4 (s32x4, ~0U); /* { dg-error {passing [^ ]* to argument 2 of 'svget4', which expects a value in the range \[0, 3\]} } */ ++ s32 = svget4 (s32x4, one); ++ s32 = svget4 (s32x4, 3 - 2); ++ s32 = svget4 (s32x4, 1.0); ++ s32 = svget4 (s32x4, const_sub (5, 4)); ++ s32 = svget4 (s32x4, const_sub (6, 4)); ++ s32 = svget4 (s32x4, const_sub (7, 4)); ++ s32 = svget4 (s32x4, const_sub (8, 4)); /* { dg-error {passing 4 to argument 2 of 'svget4', which expects a value in the range \[0, 3\]} } */ ++ s32 = svget4 (s32x4, add (0, 0)); /* { dg-error "argument 2 of 'svget4' must be an integer constant expression" } */ ++ ++ return f64; ++} +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/get4_2.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/get4_2.C +new file mode 100644 +index 000000000..7a0979225 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/get4_2.C +@@ -0,0 +1,39 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-std=c++11 -Wall -Wextra" } */ ++ ++#include ++ ++constexpr uint64_t const_sub (uint64_t a, uint64_t b) { return a - b; } ++uint64_t add (uint64_t a, uint64_t b) { return a + b; } ++ ++svfloat64_t ++f1 (svbool_t pg, svint32_t s32, svint32x4_t s32x4, svint32x2_t s32x2, int x) ++{ ++ const int one = 1; ++ svfloat64_t f64; ++ ++ s32 = svget4_s32 (s32x4); /* { dg-error {too few arguments to function '[^']*'} } */ ++ s32 = svget4_s32 (s32x4, 1, 2); /* { dg-error {too many arguments to function '[^']*'} } */ ++ s32 = svget4_s32 (s32, 0); /* { dg-error {cannot convert 'svint32_t' to 'svint32x4_t'} } */ ++ s32 = svget4_s32 (s32x2, 0); /* { dg-error {cannot convert 'svint32x2_t' to 'svint32x4_t'} } */ ++ s32 = svget4_s32 (pg, 0); /* { dg-error {cannot convert 'svbool_t' to 'svint32x4_t'} } */ ++ s32 = svget4_s32 (s32x4, x); /* { dg-error "argument 2 of 'svget4_s32' must be an integer constant expression" } */ ++ s32 = svget4_s32 (s32x4, 0); ++ f64 = svget4_s32 (s32x4, 0); /* { dg-error "cannot convert 'svint32_t' to 'svfloat64_t' in assignment" } */ ++ s32 = svget4_s32 (s32x4, 1); ++ s32 = svget4_s32 (s32x4, 2); ++ s32 = svget4_s32 (s32x4, 3); ++ s32 = svget4_s32 (s32x4, 4); /* { dg-error {passing 4 to argument 2 of 'svget4_s32', which expects a value in the range \[0, 3\]} } */ ++ s32 = svget4_s32 (s32x4, 5); /* { dg-error {passing 5 to argument 2 of 'svget4_s32', which expects a value in the range \[0, 3\]} } */ ++ s32 = svget4_s32 (s32x4, ~0U); /* { dg-error {passing [^ ]* to argument 2 of 'svget4_s32', which expects a value in the range \[0, 3\]} } */ ++ s32 = svget4_s32 (s32x4, one); ++ s32 = svget4_s32 (s32x4, 3 - 2); ++ s32 = svget4_s32 (s32x4, 1.0); ++ s32 = svget4_s32 (s32x4, const_sub (5, 4)); ++ s32 = svget4_s32 (s32x4, const_sub (6, 4)); ++ s32 = svget4_s32 (s32x4, const_sub (7, 4)); ++ s32 = svget4_s32 (s32x4, const_sub (8, 4)); /* { dg-error {passing 4 to argument 2 of 'svget4_s32', which expects a value in the range \[0, 3\]} } */ ++ s32 = svget4_s32 (s32x4, add (0, 0)); /* { dg-error "argument 2 of 'svget4_s32' must be an integer constant expression" } */ ++ ++ return f64; ++} +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/lsl_wide_1.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/lsl_wide_1.C +new file mode 100644 +index 000000000..fb31e947d +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/lsl_wide_1.C +@@ -0,0 +1,12 @@ ++/* { dg-do compile } */ ++ ++#include ++ ++svuint8_t ++f1 (svbool_t pg, svuint8_t x, svint8_t w, svuint64_t y) ++{ ++ svlsl_wide_u8_x (pg, x, x); /* { dg-error "cannot convert 'svuint8_t' to 'svuint64_t'" } */ ++ svlsl_wide_u8_x (pg, x); /* { dg-error {too few arguments to function 'svuint8_t svlsl_wide_u8_x\(svbool_t, svuint8_t, svuint64_t\)'} } */ ++ svlsl_wide_u8_x (pg, x, y, x); /* { dg-error {too many arguments to function 'svuint8_t svlsl_wide_u8_x\(svbool_t, svuint8_t, svuint64_t\)'} } */ ++ return svlsl_wide_s8_x (pg, w, y); /* { dg-error {cannot convert 'svint8_t' to 'svuint8_t' in return} } */ ++} +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/lsl_wide_2.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/lsl_wide_2.C +new file mode 100644 +index 000000000..95d341dc5 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/lsl_wide_2.C +@@ -0,0 +1,14 @@ ++/* { dg-do compile } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++void ++f1 (svbool_t pg, svuint8_t x, svuint64_t y) ++{ ++ svlsl_wide_x (pg, x); /* { dg-error {no matching function for call to 'svlsl_wide_x\(svbool_t&, svuint8_t&\)'} } */ ++ svlsl_wide_x (pg, x, x, x, x); /* { dg-error {no matching function for call to 'svlsl_wide_x\(svbool_t&, svuint8_t&, svuint8_t&, svuint8_t&, svuint8_t&\)'} } */ ++ svlsl_wide_x (x, x, y); /* { dg-error {no matching function for call to 'svlsl_wide_x\(svuint8_t&, svuint8_t&, svuint64_t&\)'} } */ ++ svlsl_wide_x (pg, 1, y); /* { dg-error {no matching function for call to 'svlsl_wide_x\(svbool_t&, int, svuint64_t&\)'} } */ ++ svlsl_wide_x (pg, x, x); /* { dg-error {no matching function for call to 'svlsl_wide_x\(svbool_t&, svuint8_t&, svuint8_t&\)'} } */ ++ svlsl_wide_x (pg, y, y); /* { dg-error {no matching function for call to 'svlsl_wide_x\(svbool_t&, svuint64_t&, svuint64_t&\)'} } */ ++} +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/mangle_1.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/mangle_1.C +new file mode 100644 +index 000000000..1a1712485 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/mangle_1.C +@@ -0,0 +1,31 @@ ++/* { dg-do compile } */ ++ ++#include ++ ++void f1(svbool_t) {} ++void f2(svint8_t) {} ++void f3(svint16_t) {} ++void f4(svint32_t) {} ++void f5(svint64_t) {} ++void f6(svuint8_t) {} ++void f7(svuint16_t) {} ++void f8(svuint32_t) {} ++void f9(svuint64_t) {} ++void f10(svfloat16_t) {} ++void f11(svfloat32_t) {} ++void f12(svfloat64_t) {} ++void f13(svbfloat16_t) {} ++ ++/* { dg-final { scan-assembler "_Z2f110__SVBool_t:" } } */ ++/* { dg-final { scan-assembler "_Z2f210__SVInt8_t:" } } */ ++/* { dg-final { scan-assembler "_Z2f311__SVInt16_t:" } } */ ++/* { dg-final { scan-assembler "_Z2f411__SVInt32_t:" } } */ ++/* { dg-final { scan-assembler "_Z2f511__SVInt64_t:" } } */ ++/* { dg-final { scan-assembler "_Z2f611__SVUint8_t:" } } */ ++/* { dg-final { scan-assembler "_Z2f712__SVUint16_t:" } } */ ++/* { dg-final { scan-assembler "_Z2f812__SVUint32_t:" } } */ ++/* { dg-final { scan-assembler "_Z2f912__SVUint64_t:" } } */ ++/* { dg-final { scan-assembler "_Z3f1013__SVFloat16_t:" } } */ ++/* { dg-final { scan-assembler "_Z3f1113__SVFloat32_t:" } } */ ++/* { dg-final { scan-assembler "_Z3f1213__SVFloat64_t:" } } */ ++/* { dg-final { scan-assembler "_Z3f1314__SVBfloat16_t:" } } */ +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/mangle_2.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/mangle_2.C +new file mode 100644 +index 000000000..6792b8a31 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/mangle_2.C +@@ -0,0 +1,29 @@ ++/* { dg-do compile } */ ++ ++void f1(__SVBool_t) {} ++void f2(__SVInt8_t) {} ++void f3(__SVInt16_t) {} ++void f4(__SVInt32_t) {} ++void f5(__SVInt64_t) {} ++void f6(__SVUint8_t) {} ++void f7(__SVUint16_t) {} ++void f8(__SVUint32_t) {} ++void f9(__SVUint64_t) {} ++void f10(__SVFloat16_t) {} ++void f11(__SVFloat32_t) {} ++void f12(__SVFloat64_t) {} ++void f13(__SVBfloat16_t) {} ++ ++/* { dg-final { scan-assembler "_Z2f110__SVBool_t:" } } */ ++/* { dg-final { scan-assembler "_Z2f210__SVInt8_t:" } } */ ++/* { dg-final { scan-assembler "_Z2f311__SVInt16_t:" } } */ ++/* { dg-final { scan-assembler "_Z2f411__SVInt32_t:" } } */ ++/* { dg-final { scan-assembler "_Z2f511__SVInt64_t:" } } */ ++/* { dg-final { scan-assembler "_Z2f611__SVUint8_t:" } } */ ++/* { dg-final { scan-assembler "_Z2f712__SVUint16_t:" } } */ ++/* { dg-final { scan-assembler "_Z2f812__SVUint32_t:" } } */ ++/* { dg-final { scan-assembler "_Z2f912__SVUint64_t:" } } */ ++/* { dg-final { scan-assembler "_Z3f1013__SVFloat16_t:" } } */ ++/* { dg-final { scan-assembler "_Z3f1113__SVFloat32_t:" } } */ ++/* { dg-final { scan-assembler "_Z3f1213__SVFloat64_t:" } } */ ++/* { dg-final { scan-assembler "_Z3f1314__SVBfloat16_t:" } } */ +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/mangle_3.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/mangle_3.C +new file mode 100644 +index 000000000..8f64f7c2e +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/mangle_3.C +@@ -0,0 +1,18 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-msve-vector-bits=256" } */ ++ ++#include ++ ++typedef __SVInt8_t t1; ++typedef svint8_t t2; ++/* Distinct from svint8_t, but compatible with it. */ ++typedef int8_t t3 __attribute__((vector_size(32))); ++ ++void f1(t1) {} ++void f2(t2) {} ++void f3(t3) {} ++void f4(t1 &a, t2 &b, t3 &c) { a = b = c; } ++ ++/* { dg-final { scan-assembler "_Z2f110__SVInt8_t:" } } */ ++/* { dg-final { scan-assembler "_Z2f210__SVInt8_t:" } } */ ++/* { dg-final { scan-assembler "_Z2f3Dv32_a:" } } */ +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/mangle_4.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/mangle_4.C +new file mode 100644 +index 000000000..7cdc6cb0c +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/mangle_4.C +@@ -0,0 +1,75 @@ ++/* { dg-do compile } */ ++ ++#include ++ ++void f1(svint8x2_t) {} ++void f2(svint16x2_t) {} ++void f3(svint32x2_t) {} ++void f4(svint64x2_t) {} ++void f5(svuint8x2_t) {} ++void f6(svuint16x2_t) {} ++void f7(svuint32x2_t) {} ++void f8(svuint64x2_t) {} ++void f9(svfloat16x2_t) {} ++void f10(svfloat32x2_t) {} ++void f11(svfloat64x2_t) {} ++ ++void g1(svint8x3_t) {} ++void g2(svint16x3_t) {} ++void g3(svint32x3_t) {} ++void g4(svint64x3_t) {} ++void g5(svuint8x3_t) {} ++void g6(svuint16x3_t) {} ++void g7(svuint32x3_t) {} ++void g8(svuint64x3_t) {} ++void g9(svfloat16x3_t) {} ++void g10(svfloat32x3_t) {} ++void g11(svfloat64x3_t) {} ++ ++void h1(svint8x4_t) {} ++void h2(svint16x4_t) {} ++void h3(svint32x4_t) {} ++void h4(svint64x4_t) {} ++void h5(svuint8x4_t) {} ++void h6(svuint16x4_t) {} ++void h7(svuint32x4_t) {} ++void h8(svuint64x4_t) {} ++void h9(svfloat16x4_t) {} ++void h10(svfloat32x4_t) {} ++void h11(svfloat64x4_t) {} ++ ++/* { dg-final { scan-assembler "_Z2f110svint8x2_t:" } } */ ++/* { dg-final { scan-assembler "_Z2f211svint16x2_t:" } } */ ++/* { dg-final { scan-assembler "_Z2f311svint32x2_t:" } } */ ++/* { dg-final { scan-assembler "_Z2f411svint64x2_t:" } } */ ++/* { dg-final { scan-assembler "_Z2f511svuint8x2_t:" } } */ ++/* { dg-final { scan-assembler "_Z2f612svuint16x2_t:" } } */ ++/* { dg-final { scan-assembler "_Z2f712svuint32x2_t:" } } */ ++/* { dg-final { scan-assembler "_Z2f812svuint64x2_t:" } } */ ++/* { dg-final { scan-assembler "_Z2f913svfloat16x2_t:" } } */ ++/* { dg-final { scan-assembler "_Z3f1013svfloat32x2_t:" } } */ ++/* { dg-final { scan-assembler "_Z3f1113svfloat64x2_t:" } } */ ++ ++/* { dg-final { scan-assembler "_Z2g110svint8x3_t:" } } */ ++/* { dg-final { scan-assembler "_Z2g211svint16x3_t:" } } */ ++/* { dg-final { scan-assembler "_Z2g311svint32x3_t:" } } */ ++/* { dg-final { scan-assembler "_Z2g411svint64x3_t:" } } */ ++/* { dg-final { scan-assembler "_Z2g511svuint8x3_t:" } } */ ++/* { dg-final { scan-assembler "_Z2g612svuint16x3_t:" } } */ ++/* { dg-final { scan-assembler "_Z2g712svuint32x3_t:" } } */ ++/* { dg-final { scan-assembler "_Z2g812svuint64x3_t:" } } */ ++/* { dg-final { scan-assembler "_Z2g913svfloat16x3_t:" } } */ ++/* { dg-final { scan-assembler "_Z3g1013svfloat32x3_t:" } } */ ++/* { dg-final { scan-assembler "_Z3g1113svfloat64x3_t:" } } */ ++ ++/* { dg-final { scan-assembler "_Z2h110svint8x4_t:" } } */ ++/* { dg-final { scan-assembler "_Z2h211svint16x4_t:" } } */ ++/* { dg-final { scan-assembler "_Z2h311svint32x4_t:" } } */ ++/* { dg-final { scan-assembler "_Z2h411svint64x4_t:" } } */ ++/* { dg-final { scan-assembler "_Z2h511svuint8x4_t:" } } */ ++/* { dg-final { scan-assembler "_Z2h612svuint16x4_t:" } } */ ++/* { dg-final { scan-assembler "_Z2h712svuint32x4_t:" } } */ ++/* { dg-final { scan-assembler "_Z2h812svuint64x4_t:" } } */ ++/* { dg-final { scan-assembler "_Z2h913svfloat16x4_t:" } } */ ++/* { dg-final { scan-assembler "_Z3h1013svfloat32x4_t:" } } */ ++/* { dg-final { scan-assembler "_Z3h1113svfloat64x4_t:" } } */ +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/set2_1.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/set2_1.C +new file mode 100644 +index 000000000..80c3ad74f +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/set2_1.C +@@ -0,0 +1,45 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-std=c++11 -Wall -Wextra" } */ ++ ++#include ++ ++constexpr uint64_t const_sub (uint64_t a, uint64_t b) { return a - b; } ++uint64_t add (uint64_t a, uint64_t b) { return a + b; } ++ ++svfloat64_t ++f1 (svbool_t pg, svuint8_t u8, svuint8x2_t u8x2, svint8x2_t s8x2, ++ svuint8x3_t u8x3, int x) ++{ ++ const int one = 1; ++ svfloat64_t f64; ++ ++ u8x2 = svset2 (u8x2); /* { dg-error {no matching function for call to 'svset2\(svuint8x2_t\&\)'} } */ ++ u8x2 = svset2 (u8x2, 1); /* { dg-error {no matching function for call to 'svset2\(svuint8x2_t\&, int\)'} } */ ++ u8x2 = svset2 (u8x2, 1, u8, 2); /* { dg-error {no matching function for call to 'svset2\(svuint8x2_t\&, int, svuint8_t\&, int\)'} } */ ++ u8x2 = svset2 (u8, 0, u8); /* { dg-error {no matching function for call to 'svset2\(svuint8_t\&, int, svuint8_t\&\)'} } */ ++ u8x2 = svset2 (s8x2, 0, u8); /* { dg-error {no matching function for call to 'svset2\(svint8x2_t\&, int, svuint8_t\&\)'} } */ ++ u8x2 = svset2 (u8x3, 0, u8); /* { dg-error {no matching function for call to 'svset2\(svuint8x3_t\&, int, svuint8_t\&\)'} } */ ++ u8x2 = svset2 (pg, 0, u8); /* { dg-error {no matching function for call to 'svset2\(svbool_t\&, int, svuint8_t\&\)'} } */ ++ u8x2 = svset2 (u8x2, 0, f64); /* { dg-error {no matching function for call to 'svset2\(svuint8x2_t\&, int, svfloat64_t\&\)'} } */ ++ u8x2 = svset2 (u8x2, 0, u8x2); /* { dg-error {no matching function for call to 'svset2\(svuint8x2_t\&, int, svuint8x2_t\&\)'} } */ ++ u8x2 = svset2 (u8x2, 0, pg); /* { dg-error {no matching function for call to 'svset2\(svuint8x2_t\&, int, svbool_t\&\)'} } */ ++ u8x2 = svset2 (u8x2, x, u8); /* { dg-error "argument 2 of 'svset2' must be an integer constant expression" } */ ++ u8x2 = svset2 (u8x2, 0, u8); ++ s8x2 = svset2 (u8x2, 0, u8); /* { dg-error {cannot convert 'svuint8x2_t' to 'svint8x2_t' in assignment} } */ ++ u8x2 = svset2 (u8x2, 1, u8); ++ u8x2 = svset2 (u8x2, 2, u8); /* { dg-error {passing 2 to argument 2 of 'svset2', which expects a value in the range \[0, 1\]} } */ ++ u8x2 = svset2 (u8x2, 3, u8); /* { dg-error {passing 3 to argument 2 of 'svset2', which expects a value in the range \[0, 1\]} } */ ++ u8x2 = svset2 (u8x2, 4, u8); /* { dg-error {passing 4 to argument 2 of 'svset2', which expects a value in the range \[0, 1\]} } */ ++ u8x2 = svset2 (u8x2, 5, u8); /* { dg-error {passing 5 to argument 2 of 'svset2', which expects a value in the range \[0, 1\]} } */ ++ u8x2 = svset2 (u8x2, ~0U, u8); /* { dg-error {passing [^ ]* to argument 2 of 'svset2', which expects a value in the range \[0, 1\]} } */ ++ u8x2 = svset2 (u8x2, one, u8); ++ u8x2 = svset2 (u8x2, 3 - 2, u8); ++ u8x2 = svset2 (u8x2, 1.0, u8); ++ u8x2 = svset2 (u8x2, const_sub (5, 4), u8); ++ u8x2 = svset2 (u8x2, const_sub (6, 4), u8); /* { dg-error {passing 2 to argument 2 of 'svset2', which expects a value in the range \[0, 1\]} } */ ++ u8x2 = svset2 (u8x2, const_sub (7, 4), u8); /* { dg-error {passing 3 to argument 2 of 'svset2', which expects a value in the range \[0, 1\]} } */ ++ u8x2 = svset2 (u8x2, const_sub (8, 4), u8); /* { dg-error {passing 4 to argument 2 of 'svset2', which expects a value in the range \[0, 1\]} } */ ++ u8x2 = svset2 (u8x2, add (0, 0), u8); /* { dg-error "argument 2 of 'svset2' must be an integer constant expression" } */ ++ ++ return f64; ++} +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/set2_2.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/set2_2.C +new file mode 100644 +index 000000000..1433b78ba +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/set2_2.C +@@ -0,0 +1,45 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-std=c++11 -Wall -Wextra" } */ ++ ++#include ++ ++constexpr uint64_t const_sub (uint64_t a, uint64_t b) { return a - b; } ++uint64_t add (uint64_t a, uint64_t b) { return a + b; } ++ ++svfloat64_t ++f1 (svbool_t pg, svuint8_t u8, svuint8x2_t u8x2, svint8x2_t s8x2, ++ svuint8x3_t u8x3, int x) ++{ ++ const int one = 1; ++ svfloat64_t f64; ++ ++ u8x2 = svset2_u8 (u8x2); /* { dg-error {too few arguments to function '[^']*'} } */ ++ u8x2 = svset2_u8 (u8x2, 1); /* { dg-error {too few arguments to function '[^']*'} } */ ++ u8x2 = svset2_u8 (u8x2, 1, u8, 2); /* { dg-error {too many arguments to function '[^']*'} } */ ++ u8x2 = svset2_u8 (u8, 0, u8); /* { dg-error {cannot convert 'svuint8_t' to 'svuint8x2_t'} } */ ++ u8x2 = svset2_u8 (s8x2, 0, u8); /* { dg-error {cannot convert 'svint8x2_t' to 'svuint8x2_t'} } */ ++ u8x2 = svset2_u8 (u8x3, 0, u8); /* { dg-error {cannot convert 'svuint8x3_t' to 'svuint8x2_t'} } */ ++ u8x2 = svset2_u8 (pg, 0, u8); /* { dg-error {cannot convert 'svbool_t' to 'svuint8x2_t'} } */ ++ u8x2 = svset2_u8 (u8x2, 0, f64); /* { dg-error {cannot convert 'svfloat64_t' to 'svuint8_t'} } */ ++ u8x2 = svset2_u8 (u8x2, 0, u8x2); /* { dg-error {cannot convert 'svuint8x2_t' to 'svuint8_t'} } */ ++ u8x2 = svset2_u8 (u8x2, 0, pg); /* { dg-error {cannot convert 'svbool_t' to 'svuint8_t'} } */ ++ u8x2 = svset2_u8 (u8x2, x, u8); /* { dg-error "argument 2 of 'svset2_u8' must be an integer constant expression" } */ ++ u8x2 = svset2_u8 (u8x2, 0, u8); ++ s8x2 = svset2_u8 (u8x2, 0, u8); /* { dg-error {cannot convert 'svuint8x2_t' to 'svint8x2_t' in assignment} } */ ++ u8x2 = svset2_u8 (u8x2, 1, u8); ++ u8x2 = svset2_u8 (u8x2, 2, u8); /* { dg-error {passing 2 to argument 2 of 'svset2_u8', which expects a value in the range \[0, 1\]} } */ ++ u8x2 = svset2_u8 (u8x2, 3, u8); /* { dg-error {passing 3 to argument 2 of 'svset2_u8', which expects a value in the range \[0, 1\]} } */ ++ u8x2 = svset2_u8 (u8x2, 4, u8); /* { dg-error {passing 4 to argument 2 of 'svset2_u8', which expects a value in the range \[0, 1\]} } */ ++ u8x2 = svset2_u8 (u8x2, 5, u8); /* { dg-error {passing 5 to argument 2 of 'svset2_u8', which expects a value in the range \[0, 1\]} } */ ++ u8x2 = svset2_u8 (u8x2, ~0U, u8); /* { dg-error {passing [^ ]* to argument 2 of 'svset2_u8', which expects a value in the range \[0, 1\]} } */ ++ u8x2 = svset2_u8 (u8x2, one, u8); ++ u8x2 = svset2_u8 (u8x2, 3 - 2, u8); ++ u8x2 = svset2_u8 (u8x2, 1.0, u8); ++ u8x2 = svset2_u8 (u8x2, const_sub (5, 4), u8); ++ u8x2 = svset2_u8 (u8x2, const_sub (6, 4), u8); /* { dg-error {passing 2 to argument 2 of 'svset2_u8', which expects a value in the range \[0, 1\]} } */ ++ u8x2 = svset2_u8 (u8x2, const_sub (7, 4), u8); /* { dg-error {passing 3 to argument 2 of 'svset2_u8', which expects a value in the range \[0, 1\]} } */ ++ u8x2 = svset2_u8 (u8x2, const_sub (8, 4), u8); /* { dg-error {passing 4 to argument 2 of 'svset2_u8', which expects a value in the range \[0, 1\]} } */ ++ u8x2 = svset2_u8 (u8x2, add (0, 0), u8); /* { dg-error "argument 2 of 'svset2_u8' must be an integer constant expression" } */ ++ ++ return f64; ++} +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/set3_1.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/set3_1.C +new file mode 100644 +index 000000000..9bb4f7a04 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/set3_1.C +@@ -0,0 +1,45 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-std=c++11 -Wall -Wextra" } */ ++ ++#include ++ ++constexpr uint64_t const_sub (uint64_t a, uint64_t b) { return a - b; } ++uint64_t add (uint64_t a, uint64_t b) { return a + b; } ++ ++svfloat64_t ++f1 (svbool_t pg, svfloat16_t f16, svfloat16x3_t f16x3, svuint16x3_t u16x3, ++ svfloat16x4_t f16x4, int x) ++{ ++ const int one = 1; ++ svfloat64_t f64; ++ ++ f16x3 = svset3 (f16x3); /* { dg-error {no matching function for call to 'svset3\(svfloat16x3_t\&\)'} } */ ++ f16x3 = svset3 (f16x3, 1); /* { dg-error {no matching function for call to 'svset3\(svfloat16x3_t\&, int\)'} } */ ++ f16x3 = svset3 (f16x3, 1, f16, 2); /* { dg-error {no matching function for call to 'svset3\(svfloat16x3_t\&, int, svfloat16_t\&, int\)'} } */ ++ f16x3 = svset3 (f16, 0, f16); /* { dg-error {no matching function for call to 'svset3\(svfloat16_t\&, int, svfloat16_t\&\)'} } */ ++ f16x3 = svset3 (u16x3, 0, f16); /* { dg-error {no matching function for call to 'svset3\(svuint16x3_t\&, int, svfloat16_t\&\)'} } */ ++ f16x3 = svset3 (f16x4, 0, f16); /* { dg-error {no matching function for call to 'svset3\(svfloat16x4_t\&, int, svfloat16_t\&\)'} } */ ++ f16x3 = svset3 (pg, 0, f16); /* { dg-error {no matching function for call to 'svset3\(svbool_t\&, int, svfloat16_t\&\)'} } */ ++ f16x3 = svset3 (f16x3, 0, f64); /* { dg-error {no matching function for call to 'svset3\(svfloat16x3_t\&, int, svfloat64_t\&\)'} } */ ++ f16x3 = svset3 (f16x3, 0, f16x3); /* { dg-error {no matching function for call to 'svset3\(svfloat16x3_t\&, int, svfloat16x3_t\&\)'} } */ ++ f16x3 = svset3 (f16x3, 0, pg); /* { dg-error {no matching function for call to 'svset3\(svfloat16x3_t\&, int, svbool_t\&\)'} } */ ++ f16x3 = svset3 (f16x3, x, f16); /* { dg-error "argument 2 of 'svset3' must be an integer constant expression" } */ ++ f16x3 = svset3 (f16x3, 0, f16); ++ u16x3 = svset3 (f16x3, 0, f16); /* { dg-error {cannot convert 'svfloat16x3_t' to 'svuint16x3_t' in assignment} } */ ++ f16x3 = svset3 (f16x3, 1, f16); ++ f16x3 = svset3 (f16x3, 2, f16); ++ f16x3 = svset3 (f16x3, 3, f16); /* { dg-error {passing 3 to argument 2 of 'svset3', which expects a value in the range \[0, 2\]} } */ ++ f16x3 = svset3 (f16x3, 4, f16); /* { dg-error {passing 4 to argument 2 of 'svset3', which expects a value in the range \[0, 2\]} } */ ++ f16x3 = svset3 (f16x3, 5, f16); /* { dg-error {passing 5 to argument 2 of 'svset3', which expects a value in the range \[0, 2\]} } */ ++ f16x3 = svset3 (f16x3, ~0U, f16); /* { dg-error {passing [^ ]* to argument 2 of 'svset3', which expects a value in the range \[0, 2\]} } */ ++ f16x3 = svset3 (f16x3, one, f16); ++ f16x3 = svset3 (f16x3, 3 - 2, f16); ++ f16x3 = svset3 (f16x3, 1.0, f16); ++ f16x3 = svset3 (f16x3, const_sub (5, 4), f16); ++ f16x3 = svset3 (f16x3, const_sub (6, 4), f16); ++ f16x3 = svset3 (f16x3, const_sub (7, 4), f16); /* { dg-error {passing 3 to argument 2 of 'svset3', which expects a value in the range \[0, 2\]} } */ ++ f16x3 = svset3 (f16x3, const_sub (8, 4), f16); /* { dg-error {passing 4 to argument 2 of 'svset3', which expects a value in the range \[0, 2\]} } */ ++ f16x3 = svset3 (f16x3, add (0, 0), f16); /* { dg-error "argument 2 of 'svset3' must be an integer constant expression" } */ ++ ++ return f64; ++} +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/set3_2.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/set3_2.C +new file mode 100644 +index 000000000..0bb604924 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/set3_2.C +@@ -0,0 +1,45 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-std=c++11 -Wall -Wextra" } */ ++ ++#include ++ ++constexpr uint64_t const_sub (uint64_t a, uint64_t b) { return a - b; } ++uint64_t add (uint64_t a, uint64_t b) { return a + b; } ++ ++svfloat64_t ++f1 (svbool_t pg, svfloat16_t f16, svfloat16x3_t f16x3, svuint16x3_t u16x3, ++ svfloat16x4_t f16x4, int x) ++{ ++ const int one = 1; ++ svfloat64_t f64; ++ ++ f16x3 = svset3_f16 (f16x3); /* { dg-error {too few arguments to function '[^']*'} } */ ++ f16x3 = svset3_f16 (f16x3, 1); /* { dg-error {too few arguments to function '[^']*'} } */ ++ f16x3 = svset3_f16 (f16x3, 1, f16, 2); /* { dg-error {too many arguments to function '[^']*'} } */ ++ f16x3 = svset3_f16 (f16, 0, f16); /* { dg-error {cannot convert 'svfloat16_t' to 'svfloat16x3_t'} } */ ++ f16x3 = svset3_f16 (u16x3, 0, f16); /* { dg-error {cannot convert 'svuint16x3_t' to 'svfloat16x3_t'} } */ ++ f16x3 = svset3_f16 (f16x4, 0, f16); /* { dg-error {cannot convert 'svfloat16x4_t' to 'svfloat16x3_t'} } */ ++ f16x3 = svset3_f16 (pg, 0, f16); /* { dg-error {cannot convert 'svbool_t' to 'svfloat16x3_t'} } */ ++ f16x3 = svset3_f16 (f16x3, 0, f64); /* { dg-error {cannot convert 'svfloat64_t' to 'svfloat16_t'} } */ ++ f16x3 = svset3_f16 (f16x3, 0, f16x3); /* { dg-error {cannot convert 'svfloat16x3_t' to 'svfloat16_t'} } */ ++ f16x3 = svset3_f16 (f16x3, 0, pg); /* { dg-error {cannot convert 'svbool_t' to 'svfloat16_t'} } */ ++ f16x3 = svset3_f16 (f16x3, x, f16); /* { dg-error "argument 2 of 'svset3_f16' must be an integer constant expression" } */ ++ f16x3 = svset3_f16 (f16x3, 0, f16); ++ u16x3 = svset3_f16 (f16x3, 0, f16); /* { dg-error {cannot convert 'svfloat16x3_t' to 'svuint16x3_t' in assignment} } */ ++ f16x3 = svset3_f16 (f16x3, 1, f16); ++ f16x3 = svset3_f16 (f16x3, 2, f16); ++ f16x3 = svset3_f16 (f16x3, 3, f16); /* { dg-error {passing 3 to argument 2 of 'svset3_f16', which expects a value in the range \[0, 2\]} } */ ++ f16x3 = svset3_f16 (f16x3, 4, f16); /* { dg-error {passing 4 to argument 2 of 'svset3_f16', which expects a value in the range \[0, 2\]} } */ ++ f16x3 = svset3_f16 (f16x3, 5, f16); /* { dg-error {passing 5 to argument 2 of 'svset3_f16', which expects a value in the range \[0, 2\]} } */ ++ f16x3 = svset3_f16 (f16x3, ~0U, f16); /* { dg-error {passing [^ ]* to argument 2 of 'svset3_f16', which expects a value in the range \[0, 2\]} } */ ++ f16x3 = svset3_f16 (f16x3, one, f16); ++ f16x3 = svset3_f16 (f16x3, 3 - 2, f16); ++ f16x3 = svset3_f16 (f16x3, 1.0, f16); ++ f16x3 = svset3_f16 (f16x3, const_sub (5, 4), f16); ++ f16x3 = svset3_f16 (f16x3, const_sub (6, 4), f16); ++ f16x3 = svset3_f16 (f16x3, const_sub (7, 4), f16); /* { dg-error {passing 3 to argument 2 of 'svset3_f16', which expects a value in the range \[0, 2\]} } */ ++ f16x3 = svset3_f16 (f16x3, const_sub (8, 4), f16); /* { dg-error {passing 4 to argument 2 of 'svset3_f16', which expects a value in the range \[0, 2\]} } */ ++ f16x3 = svset3_f16 (f16x3, add (0, 0), f16); /* { dg-error "argument 2 of 'svset3_f16' must be an integer constant expression" } */ ++ ++ return f64; ++} +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/set4_1.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/set4_1.C +new file mode 100644 +index 000000000..dc5dae872 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/set4_1.C +@@ -0,0 +1,45 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-std=c++11 -Wall -Wextra" } */ ++ ++#include ++ ++constexpr uint64_t const_sub (uint64_t a, uint64_t b) { return a - b; } ++uint64_t add (uint64_t a, uint64_t b) { return a + b; } ++ ++svfloat64_t ++f1 (svbool_t pg, svint32_t s32, svint32x4_t s32x4, svfloat32x4_t f32x4, ++ svint32x2_t s32x2, int x) ++{ ++ const int one = 1; ++ svfloat64_t f64; ++ ++ s32x4 = svset4 (s32x4); /* { dg-error {no matching function for call to 'svset4\(svint32x4_t\&\)'} } */ ++ s32x4 = svset4 (s32x4, 1); /* { dg-error {no matching function for call to 'svset4\(svint32x4_t\&, int\)'} } */ ++ s32x4 = svset4 (s32x4, 1, s32, 2); /* { dg-error {no matching function for call to 'svset4\(svint32x4_t\&, int, svint32_t\&, int\)'} } */ ++ s32x4 = svset4 (s32, 0, s32); /* { dg-error {no matching function for call to 'svset4\(svint32_t\&, int, svint32_t\&\)'} } */ ++ s32x4 = svset4 (f32x4, 0, s32); /* { dg-error {no matching function for call to 'svset4\(svfloat32x4_t\&, int, svint32_t\&\)'} } */ ++ s32x4 = svset4 (s32x2, 0, s32); /* { dg-error {no matching function for call to 'svset4\(svint32x2_t\&, int, svint32_t\&\)'} } */ ++ s32x4 = svset4 (pg, 0, s32); /* { dg-error {no matching function for call to 'svset4\(svbool_t\&, int, svint32_t\&\)'} } */ ++ s32x4 = svset4 (s32x4, 0, f64); /* { dg-error {no matching function for call to 'svset4\(svint32x4_t\&, int, svfloat64_t\&\)'} } */ ++ s32x4 = svset4 (s32x4, 0, s32x4); /* { dg-error {no matching function for call to 'svset4\(svint32x4_t\&, int, svint32x4_t\&\)'} } */ ++ s32x4 = svset4 (s32x4, 0, pg); /* { dg-error {no matching function for call to 'svset4\(svint32x4_t\&, int, svbool_t\&\)'} } */ ++ s32x4 = svset4 (s32x4, x, s32); /* { dg-error "argument 2 of 'svset4' must be an integer constant expression" } */ ++ s32x4 = svset4 (s32x4, 0, s32); ++ f32x4 = svset4 (s32x4, 0, s32); /* { dg-error {cannot convert 'svint32x4_t' to 'svfloat32x4_t' in assignment} } */ ++ s32x4 = svset4 (s32x4, 1, s32); ++ s32x4 = svset4 (s32x4, 2, s32); ++ s32x4 = svset4 (s32x4, 3, s32); ++ s32x4 = svset4 (s32x4, 4, s32); /* { dg-error {passing 4 to argument 2 of 'svset4', which expects a value in the range \[0, 3\]} } */ ++ s32x4 = svset4 (s32x4, 5, s32); /* { dg-error {passing 5 to argument 2 of 'svset4', which expects a value in the range \[0, 3\]} } */ ++ s32x4 = svset4 (s32x4, ~0U, s32); /* { dg-error {passing [^ ]* to argument 2 of 'svset4', which expects a value in the range \[0, 3\]} } */ ++ s32x4 = svset4 (s32x4, one, s32); ++ s32x4 = svset4 (s32x4, 3 - 2, s32); ++ s32x4 = svset4 (s32x4, 1.0, s32); ++ s32x4 = svset4 (s32x4, const_sub (5, 4), s32); ++ s32x4 = svset4 (s32x4, const_sub (6, 4), s32); ++ s32x4 = svset4 (s32x4, const_sub (7, 4), s32); ++ s32x4 = svset4 (s32x4, const_sub (8, 4), s32); /* { dg-error {passing 4 to argument 2 of 'svset4', which expects a value in the range \[0, 3\]} } */ ++ s32x4 = svset4 (s32x4, add (0, 0), s32); /* { dg-error "argument 2 of 'svset4' must be an integer constant expression" } */ ++ ++ return f64; ++} +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/set4_2.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/set4_2.C +new file mode 100644 +index 000000000..762a6db74 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/set4_2.C +@@ -0,0 +1,45 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-std=c++11 -Wall -Wextra" } */ ++ ++#include ++ ++constexpr uint64_t const_sub (uint64_t a, uint64_t b) { return a - b; } ++uint64_t add (uint64_t a, uint64_t b) { return a + b; } ++ ++svfloat64_t ++f1 (svbool_t pg, svint32_t s32, svint32x4_t s32x4, svfloat32x4_t f32x4, ++ svint32x2_t s32x2, int x) ++{ ++ const int one = 1; ++ svfloat64_t f64; ++ ++ s32x4 = svset4_s32 (s32x4); /* { dg-error {too few arguments to function '[^']*'} } */ ++ s32x4 = svset4_s32 (s32x4, 1); /* { dg-error {too few arguments to function '[^']*'} } */ ++ s32x4 = svset4_s32 (s32x4, 1, s32, 2); /* { dg-error {too many arguments to function '[^']*'} } */ ++ s32x4 = svset4_s32 (s32, 0, s32); /* { dg-error {cannot convert 'svint32_t' to 'svint32x4_t'} } */ ++ s32x4 = svset4_s32 (f32x4, 0, s32); /* { dg-error {cannot convert 'svfloat32x4_t' to 'svint32x4_t'} } */ ++ s32x4 = svset4_s32 (s32x2, 0, s32); /* { dg-error {cannot convert 'svint32x2_t' to 'svint32x4_t'} } */ ++ s32x4 = svset4_s32 (pg, 0, s32); /* { dg-error {cannot convert 'svbool_t' to 'svint32x4_t'} } */ ++ s32x4 = svset4_s32 (s32x4, 0, f64); /* { dg-error {cannot convert 'svfloat64_t' to 'svint32_t'} } */ ++ s32x4 = svset4_s32 (s32x4, 0, s32x4); /* { dg-error {cannot convert 'svint32x4_t' to 'svint32_t'} } */ ++ s32x4 = svset4_s32 (s32x4, 0, pg); /* { dg-error {cannot convert 'svbool_t' to 'svint32_t'} } */ ++ s32x4 = svset4_s32 (s32x4, x, s32); /* { dg-error "argument 2 of 'svset4_s32' must be an integer constant expression" } */ ++ s32x4 = svset4_s32 (s32x4, 0, s32); ++ f32x4 = svset4_s32 (s32x4, 0, s32); /* { dg-error {cannot convert 'svint32x4_t' to 'svfloat32x4_t' in assignment} } */ ++ s32x4 = svset4_s32 (s32x4, 1, s32); ++ s32x4 = svset4_s32 (s32x4, 2, s32); ++ s32x4 = svset4_s32 (s32x4, 3, s32); ++ s32x4 = svset4_s32 (s32x4, 4, s32); /* { dg-error {passing 4 to argument 2 of 'svset4_s32', which expects a value in the range \[0, 3\]} } */ ++ s32x4 = svset4_s32 (s32x4, 5, s32); /* { dg-error {passing 5 to argument 2 of 'svset4_s32', which expects a value in the range \[0, 3\]} } */ ++ s32x4 = svset4_s32 (s32x4, ~0U, s32); /* { dg-error {passing [^ ]* to argument 2 of 'svset4_s32', which expects a value in the range \[0, 3\]} } */ ++ s32x4 = svset4_s32 (s32x4, one, s32); ++ s32x4 = svset4_s32 (s32x4, 3 - 2, s32); ++ s32x4 = svset4_s32 (s32x4, 1.0, s32); ++ s32x4 = svset4_s32 (s32x4, const_sub (5, 4), s32); ++ s32x4 = svset4_s32 (s32x4, const_sub (6, 4), s32); ++ s32x4 = svset4_s32 (s32x4, const_sub (7, 4), s32); ++ s32x4 = svset4_s32 (s32x4, const_sub (8, 4), s32); /* { dg-error {passing 4 to argument 2 of 'svset4_s32', which expects a value in the range \[0, 3\]} } */ ++ s32x4 = svset4_s32 (s32x4, add (0, 0), s32); /* { dg-error "argument 2 of 'svset4_s32' must be an integer constant expression" } */ ++ ++ return f64; ++} +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_1.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_1.c +new file mode 100644 +index 000000000..ff2590032 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_1.c +@@ -0,0 +1,5 @@ ++/* { dg-do compile } */ ++ ++int svbool_t; /* { dg-message "note: previous declaration 'int svbool_t'" } */ ++ ++#pragma GCC aarch64 "arm_sve.h" /* { dg-error {'typedef [^'\n]* svbool_t' redeclared as different kind of entity} } */ +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_10.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_10.c +new file mode 100644 +index 000000000..86d87fa37 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_10.c +@@ -0,0 +1,5 @@ ++/* { dg-do compile } */ ++ ++typedef int svint8x2_t; ++ ++#pragma GCC aarch64 "arm_sve.h" /* { dg-error {conflicting declaration 'typedef struct svint8x2_t svint8x2_t'} } */ +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_11.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_11.c +new file mode 100644 +index 000000000..741d10eaf +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_11.c +@@ -0,0 +1,7 @@ ++/* { dg-do compile } */ ++ ++struct svint8x2_t; ++ ++#pragma GCC aarch64 "arm_sve.h" /* { dg-error {conflicting declaration 'typedef struct svint8x2_t svint8x2_t'} } */ ++ ++svint8_t f (svint8x2_t x) { return x.__val[0]; } /* { dg-error {'x' has incomplete type} } */ +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_12.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_12.c +new file mode 100644 +index 000000000..fc6a07ac6 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_12.c +@@ -0,0 +1,7 @@ ++/* { dg-do compile } */ ++ ++typedef struct svint8x2_t svint8x2_t; ++ ++#pragma GCC aarch64 "arm_sve.h" /* { dg-error {conflicting declaration 'typedef struct svint8x2_t svint8x2_t'} } */ ++ ++svint8_t f (svint8x2_t x) { return x.__val[0]; } /* { dg-error {'x' has incomplete type} } */ +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_13.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_13.c +new file mode 100644 +index 000000000..161aacb7b +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_13.c +@@ -0,0 +1,5 @@ ++/* { dg-do compile } */ ++ ++struct svint8x2_t {}; ++ ++#pragma GCC aarch64 "arm_sve.h" /* { dg-error {conflicting declaration 'typedef struct svint8x2_t svint8x2_t'} } */ +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_14.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_14.c +new file mode 100644 +index 000000000..83191118f +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_14.c +@@ -0,0 +1,5 @@ ++/* { dg-do compile } */ ++ ++enum svpattern { FOO }; /* { dg-message "note: previous definition here" } */ ++ ++#pragma GCC aarch64 "arm_sve.h" /* { dg-error "multiple definition of 'enum svpattern'" } */ +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_15.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_15.c +new file mode 100644 +index 000000000..71e35a4eb +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_15.c +@@ -0,0 +1,8 @@ ++/* { dg-do compile } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++enum svpattern { FOO }; /* { dg-error "multiple definition of 'enum svpattern'" } */ ++enum foo { SV_ALL }; /* { dg-error "'SV_ALL' conflicts with a previous declaration" } */ ++typedef int SV_POW2; /* { dg-error "'typedef int SV_POW2' redeclared as different kind of entity" } */ ++int SV_VL3; /* { dg-error "'int SV_VL3' redeclared as different kind of entity" } */ +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_16.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_16.c +new file mode 100644 +index 000000000..277064d31 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_16.c +@@ -0,0 +1,5 @@ ++/* { dg-do compile } */ ++ ++struct svpattern { int x; }; ++ ++#pragma GCC aarch64 "arm_sve.h" /* { dg-error "'svpattern' referred to as enum" } */ +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_17.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_17.c +new file mode 100644 +index 000000000..e4bcda6fb +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_17.c +@@ -0,0 +1,5 @@ ++/* { dg-do compile } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++struct svpattern { int x; }; /* { dg-error "'svpattern' referred to as 'struct'" } */ +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_18.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_18.c +new file mode 100644 +index 000000000..b6706150b +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_18.c +@@ -0,0 +1,5 @@ ++/* { dg-do compile } */ ++ ++int svpattern; /* OK in C. */ ++ ++#pragma GCC aarch64 "arm_sve.h" +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_19.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_19.c +new file mode 100644 +index 000000000..c6379f762 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_19.c +@@ -0,0 +1,5 @@ ++/* { dg-do compile } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++int svpattern; /* OK in C. */ +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_2.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_2.c +new file mode 100644 +index 000000000..5baf59932 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_2.c +@@ -0,0 +1,5 @@ ++/* { dg-do compile } */ ++ ++int svint8_t; /* { dg-message "note: previous declaration 'int svint8_t" } */ ++ ++#pragma GCC aarch64 "arm_sve.h" /* { dg-error {'typedef [^'\n]* svint8_t' redeclared as different kind of entity} } */ +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_20.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_20.c +new file mode 100644 +index 000000000..3ba19f596 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_20.c +@@ -0,0 +1,9 @@ ++/* { dg-do compile } */ ++ ++enum foo { SV_VL4 }; ++typedef int SV_POW2; ++int SV_ALL; ++ ++#pragma GCC aarch64 "arm_sve.h" /* { dg-error "'SV_VL4' conflicts with a previous declaration" } */ ++/* { dg-error "'SV_POW2' redeclared as different kind of entity" "" { target *-*-* } .-1 } */ ++/* { dg-error "'SV_ALL' redeclared as different kind of entity" "" { target *-*-* } .-2 } */ +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_3.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_3.c +new file mode 100644 +index 000000000..a8d7bdcc7 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_3.c +@@ -0,0 +1,5 @@ ++/* { dg-do compile } */ ++ ++int svuint16_t; /* { dg-message "note: previous declaration 'int svuint16_t'" } */ ++ ++#pragma GCC aarch64 "arm_sve.h" /* { dg-error {'typedef [^'\n]* svuint16_t' redeclared as different kind of entity} } */ +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_4.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_4.c +new file mode 100644 +index 000000000..c0563d0ee +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_4.c +@@ -0,0 +1,5 @@ ++/* { dg-do compile } */ ++ ++int svfloat32_t; /* { dg-message "note: previous declaration 'int svfloat32_t'" } */ ++ ++#pragma GCC aarch64 "arm_sve.h" /* { dg-error {'typedef [^'\n]* svfloat32_t' redeclared as different kind of entity} } */ +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_5.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_5.c +new file mode 100644 +index 000000000..ee28e9527 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_5.c +@@ -0,0 +1,5 @@ ++/* { dg-do compile } */ ++ ++typedef int svbool_t; /* { dg-message "note: previous declaration as 'typedef int svbool_t'" } */ ++ ++#pragma GCC aarch64 "arm_sve.h" /* { dg-error {conflicting declaration '[^'\n]* svbool_t'} } */ +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_6.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_6.c +new file mode 100644 +index 000000000..85c17eab6 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_6.c +@@ -0,0 +1,6 @@ ++/* { dg-do compile } */ ++ ++typedef __SVBool_t svbool_t; ++ ++#pragma GCC aarch64 "arm_sve.h" ++ +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_7.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_7.c +new file mode 100644 +index 000000000..3a0dfb1c0 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_7.c +@@ -0,0 +1,8 @@ ++/* { dg-do compile } */ ++ ++int svint8x2_t; ++ ++#pragma GCC aarch64 "arm_sve.h" /* { dg-error {'typedef struct svint8x2_t svint8x2_t' redeclared as different kind of entity} } */ ++ ++void f (struct svint8x2_t) {} /* { dg-error {incomplete type} } */ ++void g () { int &x = svint8x2_t; } +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_8.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_8.c +new file mode 100644 +index 000000000..9b0df9137 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_8.c +@@ -0,0 +1,7 @@ ++/* { dg-do compile } */ ++ ++struct svint8x2_t; ++ ++#pragma GCC aarch64 "arm_sve.h" /* { dg-error {conflicting declaration 'typedef struct svint8x2_t svint8x2_t'} } */ ++ ++void f (svint8x2_t) {} /* { dg-error {incomplete type} } */ +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_9.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_9.c +new file mode 100644 +index 000000000..43068da78 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_9.c +@@ -0,0 +1,8 @@ ++/* { dg-do compile } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++int svint8x2_t; /* { dg-error {'int svint8x2_t' redeclared as different kind of entity} } */ ++ ++void f (struct svint8x2_t) {} /* { dg-error {using typedef-name 'svint8x2_t' after 'struct'} } */ ++void g () { int &x = svint8x2_t; } /* { dg-error {expected primary-expression before ';' token} } */ +diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/whilele_1.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/whilele_1.C +new file mode 100644 +index 000000000..9571e668b +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/whilele_1.C +@@ -0,0 +1,81 @@ ++// { dg-do compile } ++ ++#include ++ ++enum foo { A, B }; ++ ++void ++test (int8_t s8, int16_t s16, int32_t s32, int64_t s64, ++ uint8_t u8, uint16_t u16, uint32_t u32, uint64_t u64, ++ bool b, foo e, int *ptr, float f32, svbool_t pg, ++ svint32_t vec) ++{ ++ svwhilele_b8 (s32); // { dg-error {no matching function for call to 'svwhilele_b8\(int32_t&\)'} } ++ svwhilele_b8 (s32, s32, s32); // { dg-error {no matching function for call to 'svwhilele_b8\(int32_t&, int32_t&, int32_t&\)'} } ++ ++ svwhilele_b8 (b, b); ++ svwhilele_b8 (e, e); ++ svwhilele_b8 (s8, s8); ++ svwhilele_b8 (u8, u8); ++ svwhilele_b8 (s16, s16); ++ svwhilele_b8 (u16, u16); ++ svwhilele_b8 (ptr, ptr); // { dg-error {no matching function for call to 'svwhilele_b8\(int\*&, int\*&\)'} } ++ // { dg-error {invalid conversion from 'int\*' to '[^']*'} "" { target *-*-* } .-1 } ++ svwhilele_b8 (f32, f32); // { dg-error {call of overloaded 'svwhilele_b8\(float&, float&\)' is ambiguous} } ++ svwhilele_b8 (pg, pg); // { dg-error {no matching function for call to 'svwhilele_b8\(svbool_t&, svbool_t&\)'} } ++ svwhilele_b8 (vec, vec); // { dg-error {no matching function for call to 'svwhilele_b8\(svint32_t&, svint32_t&\)'} } ++ ++ svwhilele_b8 (s32, b); ++ svwhilele_b8 (s32, e); ++ svwhilele_b8 (s32, s8); ++ svwhilele_b8 (s32, u8); ++ svwhilele_b8 (s32, s16); ++ svwhilele_b8 (s32, u16); ++ ++ svwhilele_b8 (u32, b); // { dg-error {call of overloaded 'svwhilele_b8\(uint32_t&, bool&\)' is ambiguous} } ++ svwhilele_b8 (u32, e); // { dg-error {call of overloaded 'svwhilele_b8\(uint32_t&, foo&\)' is ambiguous} } ++ svwhilele_b8 (u32, s8); // { dg-error {call of overloaded 'svwhilele_b8\(uint32_t&, int8_t&\)' is ambiguous} } ++ svwhilele_b8 (u32, u8); // { dg-error {call of overloaded 'svwhilele_b8\(uint32_t&, uint8_t&\)' is ambiguous} } ++ svwhilele_b8 (u32, s16); // { dg-error {call of overloaded 'svwhilele_b8\(uint32_t&, int16_t&\)' is ambiguous} } ++ svwhilele_b8 (u32, u16); // { dg-error {call of overloaded 'svwhilele_b8\(uint32_t&, uint16_t&\)' is ambiguous} } ++ ++ svwhilele_b8 (s32, s32); ++ svwhilele_b8 (s32, u32); // { dg-error {call of overloaded 'svwhilele_b8\(int32_t&, uint32_t&\)' is ambiguous} } ++ svwhilele_b8 (s32, s64); // { dg-error {call of overloaded 'svwhilele_b8\(int32_t&, int64_t&\)' is ambiguous} } ++ svwhilele_b8 (s32, u64); // { dg-error {call of overloaded 'svwhilele_b8\(int32_t&, uint64_t&\)' is ambiguous} } ++ ++ svwhilele_b8 (u32, s32); // { dg-error {call of overloaded 'svwhilele_b8\(uint32_t&, int32_t&\)' is ambiguous} } ++ svwhilele_b8 (u32, u32); ++ svwhilele_b8 (u32, s64); // { dg-error {call of overloaded 'svwhilele_b8\(uint32_t&, int64_t&\)' is ambiguous} } ++ svwhilele_b8 (u32, u64); // { dg-error {call of overloaded 'svwhilele_b8\(uint32_t&, uint64_t&\)' is ambiguous} } ++ ++ svwhilele_b8 (s64, s32); // { dg-error {call of overloaded 'svwhilele_b8\(int64_t&, int32_t&\)' is ambiguous} } ++ svwhilele_b8 (s64, u32); // { dg-error {call of overloaded 'svwhilele_b8\(int64_t&, uint32_t&\)' is ambiguous} } ++ svwhilele_b8 (s64, s64); ++ svwhilele_b8 (s64, u64); // { dg-error {call of overloaded 'svwhilele_b8\(int64_t&, uint64_t&\)' is ambiguous} } ++ ++ svwhilele_b8 (u64, s32); // { dg-error {call of overloaded 'svwhilele_b8\(uint64_t&, int32_t&\)' is ambiguous} } ++ svwhilele_b8 (u64, u32); // { dg-error {call of overloaded 'svwhilele_b8\(uint64_t&, uint32_t&\)' is ambiguous} } ++ svwhilele_b8 (u64, s64); // { dg-error {call of overloaded 'svwhilele_b8\(uint64_t&, int64_t&\)' is ambiguous} } ++ svwhilele_b8 (u64, u64); ++ ++ svwhilele_b8 (0, s32); ++ svwhilele_b8 (0, u32); // { dg-error {call of overloaded 'svwhilele_b8\(int, uint32_t&\)' is ambiguous} } ++ svwhilele_b8 (0, s64); // { dg-error {call of overloaded 'svwhilele_b8\(int, int64_t&\)' is ambiguous} } ++ svwhilele_b8 (0, u64); // { dg-error {call of overloaded 'svwhilele_b8\(int, uint64_t&\)' is ambiguous} } ++ ++ svwhilele_b8 (s32, 0); ++ svwhilele_b8 (u32, 0); // { dg-error {call of overloaded 'svwhilele_b8\(uint32_t&, int\)' is ambiguous} } ++ svwhilele_b8 (s64, 0); // { dg-error {call of overloaded 'svwhilele_b8\(int64_t&, int\)' is ambiguous} } ++ svwhilele_b8 (u64, 0); // { dg-error {call of overloaded 'svwhilele_b8\(uint64_t&, int\)' is ambiguous} } ++ ++ svwhilele_b8 (0U, s32); // { dg-error {call of overloaded 'svwhilele_b8\(unsigned int, int32_t&\)' is ambiguous} } ++ svwhilele_b8 (0U, u32); ++ svwhilele_b8 (0U, s64); // { dg-error {call of overloaded 'svwhilele_b8\(unsigned int, int64_t&\)' is ambiguous} } ++ svwhilele_b8 (0U, u64); // { dg-error {call of overloaded 'svwhilele_b8\(unsigned int, uint64_t&\)' is ambiguous} } ++ ++ svwhilele_b8 (s32, 0U); // { dg-error {call of overloaded 'svwhilele_b8\(int32_t&, unsigned int\)' is ambiguous} } ++ svwhilele_b8 (u32, 0U); ++ svwhilele_b8 (s64, 0U); // { dg-error {call of overloaded 'svwhilele_b8\(int64_t&, unsigned int\)' is ambiguous} } ++ svwhilele_b8 (u64, 0U); // { dg-error {call of overloaded 'svwhilele_b8\(uint64_t&, unsigned int\)' is ambiguous} } ++} +diff --git a/gcc/testsuite/g++.target/aarch64/sve/catch_7.C b/gcc/testsuite/g++.target/aarch64/sve/catch_7.C +new file mode 100644 +index 000000000..ac10b6984 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/catch_7.C +@@ -0,0 +1,38 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O" } */ ++ ++#include ++ ++void __attribute__ ((noipa)) ++f1 (void) ++{ ++ throw 1; ++} ++ ++void __attribute__ ((noipa)) ++f2 (svbool_t) ++{ ++ register svint8_t z8 asm ("z8") = svindex_s8 (11, 1); ++ asm volatile ("" :: "w" (z8)); ++ f1 (); ++} ++ ++void __attribute__ ((noipa)) ++f3 (int n) ++{ ++ register double d8 asm ("v8") = 42.0; ++ for (int i = 0; i < n; ++i) ++ { ++ asm volatile ("" : "=w" (d8) : "w" (d8)); ++ try { f2 (svptrue_b8 ()); } catch (int) { break; } ++ } ++ if (d8 != 42.0) ++ __builtin_abort (); ++} ++ ++int ++main (void) ++{ ++ f3 (100); ++ return 0; ++} +diff --git a/gcc/testsuite/g++.target/aarch64/sve/dup_sel_1.C b/gcc/testsuite/g++.target/aarch64/sve/dup_sel_1.C +new file mode 100644 +index 000000000..a59862cf9 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/dup_sel_1.C +@@ -0,0 +1,21 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -msve-vector-bits=256" } */ ++ ++#include ++ ++typedef int32_t vnx4si __attribute__((vector_size(32))); ++ ++void ++foo (int32_t val) ++{ ++ register vnx4si x asm ("z0"); ++ register vnx4si y asm ("z0"); ++ asm volatile ("" : "=w" (y)); ++ val += 1; ++ vnx4si z = { val, val, val, val, val, val, val, val }; ++ x = (vnx4si) { -1, 0, 0, -1, 0, -1, 0, -1 } ? z : y; ++ asm volatile ("" :: "w" (x)); ++} ++ ++/* { dg-final { scan-assembler {\tmov\tz0\.s, p[0-7]/m, w[0-9]+\n} } } */ ++/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ +diff --git a/gcc/testsuite/g++.target/aarch64/sve/dup_sel_2.C b/gcc/testsuite/g++.target/aarch64/sve/dup_sel_2.C +new file mode 100644 +index 000000000..47aad2d58 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/dup_sel_2.C +@@ -0,0 +1,20 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -msve-vector-bits=256" } */ ++ ++#include ++ ++typedef int32_t vnx4si __attribute__((vector_size(32))); ++ ++void ++foo (int32_t val) ++{ ++ register vnx4si x asm ("z0"); ++ register vnx4si y asm ("z1"); ++ asm volatile ("" : "=w" (y)); ++ val += 1; ++ vnx4si z = { val, val, val, val, val, val, val, val }; ++ x = (vnx4si) { -1, 0, 0, -1, 0, -1, 0, -1 } ? z : y; ++ asm volatile ("" :: "w" (x)); ++} ++ ++/* { dg-final { scan-assembler {\tmovprfx\tz0, z1\n\tmov\tz0\.s, p[0-7]/m, w[0-9]+\n} } } */ +diff --git a/gcc/testsuite/g++.target/aarch64/sve/dup_sel_3.C b/gcc/testsuite/g++.target/aarch64/sve/dup_sel_3.C +new file mode 100644 +index 000000000..e8ec6f8b4 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/dup_sel_3.C +@@ -0,0 +1,21 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -msve-vector-bits=256" } */ ++ ++#include ++ ++typedef int32_t vnx4si __attribute__((vector_size(32))); ++typedef float vnx4sf __attribute__((vector_size(32))); ++ ++void ++foo (float val) ++{ ++ register vnx4sf x asm ("z0"); ++ register vnx4sf y asm ("z0"); ++ asm volatile ("" : "=w" (y)); ++ vnx4sf z = { val, val, val, val, val, val, val, val }; ++ x = (vnx4si) { -1, 0, 0, -1, 0, -1, 0, -1 } ? z : y; ++ asm volatile ("" :: "w" (x)); ++} ++ ++/* { dg-final { scan-assembler {\tmov\tz0\.s, p[0-7]/m, s[0-9]+\n} } } */ ++/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ +diff --git a/gcc/testsuite/g++.target/aarch64/sve/dup_sel_4.C b/gcc/testsuite/g++.target/aarch64/sve/dup_sel_4.C +new file mode 100644 +index 000000000..32ca59439 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/dup_sel_4.C +@@ -0,0 +1,20 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -msve-vector-bits=256" } */ ++ ++#include ++ ++typedef int32_t vnx4si __attribute__((vector_size(32))); ++typedef float vnx4sf __attribute__((vector_size(32))); ++ ++void ++foo (float val) ++{ ++ register vnx4sf x asm ("z0"); ++ register vnx4sf y asm ("z1"); ++ asm volatile ("" : "=w" (y)); ++ vnx4sf z = { val, val, val, val, val, val, val, val }; ++ x = (vnx4si) { -1, 0, 0, -1, 0, -1, 0, -1 } ? z : y; ++ asm volatile ("" :: "w" (x)); ++} ++ ++/* { dg-final { scan-assembler {\tmovprfx\tz0, z1\n\tmov\tz0\.s, p[0-7]/m, s[0-9]+\n} } } */ +diff --git a/gcc/testsuite/g++.target/aarch64/sve/dup_sel_5.C b/gcc/testsuite/g++.target/aarch64/sve/dup_sel_5.C +new file mode 100644 +index 000000000..2fb903a91 +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/dup_sel_5.C +@@ -0,0 +1,18 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -msve-vector-bits=256" } */ ++ ++#include ++ ++typedef int32_t vnx4si __attribute__((vector_size(32))); ++ ++void ++foo (int32_t val) ++{ ++ register vnx4si x asm ("z0"); ++ val += 1; ++ vnx4si y = { val, val, val, val, val, val, val, val }; ++ x = (vnx4si) { -1, 0, 0, -1, 0, -1, 0, -1 } ? y : (vnx4si) { 0 }; ++ asm volatile ("" :: "w" (x)); ++} ++ ++/* { dg-final { scan-assembler {\tmovprfx\tz0\.s, p[0-7]/z, z0\.s\n\tmov\tz0\.s, p[0-7]/m, w[0-9]+\n} } } */ +diff --git a/gcc/testsuite/g++.target/aarch64/sve/dup_sel_6.C b/gcc/testsuite/g++.target/aarch64/sve/dup_sel_6.C +new file mode 100644 +index 000000000..f2b0181bb +--- /dev/null ++++ b/gcc/testsuite/g++.target/aarch64/sve/dup_sel_6.C +@@ -0,0 +1,18 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -msve-vector-bits=256" } */ ++ ++#include ++ ++typedef int32_t vnx4si __attribute__((vector_size(32))); ++typedef float vnx4sf __attribute__((vector_size(32))); ++ ++void ++foo (float val) ++{ ++ register vnx4sf x asm ("z0"); ++ vnx4sf y = { val, val, val, val, val, val, val, val }; ++ x = (vnx4si) { -1, 0, 0, -1, 0, -1, 0, -1 } ? y : (vnx4sf) { 0 }; ++ asm volatile ("" :: "w" (x)); ++} ++ ++/* { dg-final { scan-assembler {\tmovprfx\tz0\.s, p[0-7]/z, z0\.s\n\tmov\tz0\.s, p[0-7]/m, s[0-9]+\n} } } */ +diff --git a/gcc/testsuite/gcc.c-torture/execute/builtins/builtins.exp b/gcc/testsuite/gcc.c-torture/execute/builtins/builtins.exp +index acb9eacb4..3560a1ff2 100644 +--- a/gcc/testsuite/gcc.c-torture/execute/builtins/builtins.exp ++++ b/gcc/testsuite/gcc.c-torture/execute/builtins/builtins.exp +@@ -37,7 +37,7 @@ load_lib c-torture.exp + torture-init + set-torture-options $C_TORTURE_OPTIONS {{}} $LTO_TORTURE_OPTIONS + +-set additional_flags "-fno-tree-loop-distribute-patterns -fno-tracer -fno-ipa-ra" ++set additional_flags "-fno-tree-loop-distribute-patterns -fno-tracer -fno-ipa-ra -fno-inline-functions" + if [istarget "powerpc-*-darwin*"] { + lappend additional_flags "-Wl,-multiply_defined,suppress" + } +diff --git a/gcc/testsuite/gcc.dg/diag-aka-3.c b/gcc/testsuite/gcc.dg/diag-aka-3.c +new file mode 100644 +index 000000000..a3778ed7d +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/diag-aka-3.c +@@ -0,0 +1,9 @@ ++typedef unsigned int myvec __attribute__((vector_size (16))); ++ ++void f (float x) ++{ ++ myvec y = x; /* { dg-error {incompatible types when initializing type 'myvec' {aka '__vector\([48]\) unsigned int'} using type 'float'} } */ ++ myvec *ptr = &x; /* { dg-error {initialization of 'myvec \*' {aka '__vector\([48]\) unsigned int \*'} from incompatible pointer type 'float \*'} } */ ++ const myvec *const_ptr = &x; /* { dg-error {initialization of 'const myvec \*' {aka 'const __vector\([48]\) unsigned int \*'} from incompatible pointer type 'float \*'} } */ ++ volatile myvec *volatile_ptr = &x; /* { dg-error {initialization of 'volatile myvec \*' {aka 'volatile __vector\([48]\) unsigned int \*'} from incompatible pointer type 'float \*'} } */ ++} +diff --git a/gcc/testsuite/gcc.dg/enum-redef-1.c b/gcc/testsuite/gcc.dg/enum-redef-1.c +new file mode 100644 +index 000000000..b3fa6cbf8 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/enum-redef-1.c +@@ -0,0 +1,29 @@ ++enum a { A }; ++enum a { B }; /* { dg-bogus "nested redefinition" } */ ++/* { dg-error "redeclaration of 'enum a'" "" { target *-*-* } .-1 } */ ++ ++enum empty {}; /* { dg-error "empty enum is invalid" } */ ++enum empty {}; /* { dg-bogus "nested redefinition" } */ ++/* { dg-error "empty enum is invalid" "" { target *-*-* } .-1 } */ ++ ++enum nested_first { ++ C1 = sizeof(enum nested_first { C1a }), /* { dg-error "nested redefinition of 'enum nested_first" } */ ++ C2 = sizeof(enum nested_first { C2a }) /* { dg-error "redeclaration of 'enum nested_first'" "" } */ ++}; ++ ++enum nested_second { ++ D1, ++ D2 = sizeof(enum nested_second { D2a }), /* { dg-error "nested redefinition of 'enum nested_second" } */ ++ D3 = sizeof(enum nested_second { D3a }) /* { dg-error "redeclaration of 'enum nested_second'" "" } */ ++}; ++ ++enum nested_repeat { E }; ++enum nested_repeat { /* { dg-error "redeclaration of 'enum nested_repeat'" "" } */ ++ F = sizeof(enum nested_repeat { Fa }) /* { dg-error "nested redefinition of 'enum nested_repeat" } */ ++}; ++ ++enum nested_empty { ++ G1 = sizeof(enum nested_empty {}), /* { dg-error "nested redefinition of 'enum nested_empty" } */ ++ /* { dg-error "empty enum is invalid" "" { target *-*-* } .-1 } */ ++ G2 = sizeof(enum nested_empty { G2a }) ++}; +diff --git a/gcc/testsuite/gcc.dg/graphite/interchange-1.c b/gcc/testsuite/gcc.dg/graphite/interchange-1.c +index b65d4861e..65a569e71 100644 +--- a/gcc/testsuite/gcc.dg/graphite/interchange-1.c ++++ b/gcc/testsuite/gcc.dg/graphite/interchange-1.c +@@ -48,10 +48,3 @@ main (void) + + return 0; + } +- +-/*FIXME: Between isl 0.12 and isl 0.15 the schedule optimizer needs to print +-something canonical so that it can be checked in the test. The final code +-generated by both are same in this case but the messaged printed are +-not consistent. */ +- +-/* { dg-final { scan-tree-dump "tiled" "graphite" } } */ +diff --git a/gcc/testsuite/gcc.dg/graphite/interchange-10.c b/gcc/testsuite/gcc.dg/graphite/interchange-10.c +index a955644de..45c248db8 100644 +--- a/gcc/testsuite/gcc.dg/graphite/interchange-10.c ++++ b/gcc/testsuite/gcc.dg/graphite/interchange-10.c +@@ -45,5 +45,3 @@ main (void) + + return 0; + } +- +-/* { dg-final { scan-tree-dump "tiled" "graphite" } } */ +diff --git a/gcc/testsuite/gcc.dg/graphite/interchange-11.c b/gcc/testsuite/gcc.dg/graphite/interchange-11.c +index 61028225f..6ba6907a5 100644 +--- a/gcc/testsuite/gcc.dg/graphite/interchange-11.c ++++ b/gcc/testsuite/gcc.dg/graphite/interchange-11.c +@@ -45,5 +45,3 @@ main (void) + + return 0; + } +- +-/* { dg-final { scan-tree-dump "tiled" "graphite" } } */ +diff --git a/gcc/testsuite/gcc.dg/graphite/interchange-3.c b/gcc/testsuite/gcc.dg/graphite/interchange-3.c +index 4aec82418..e8539e2d3 100644 +--- a/gcc/testsuite/gcc.dg/graphite/interchange-3.c ++++ b/gcc/testsuite/gcc.dg/graphite/interchange-3.c +@@ -46,5 +46,3 @@ main (void) + + return 0; + } +- +-/* { dg-final { scan-tree-dump "tiled" "graphite" } } */ +diff --git a/gcc/testsuite/gcc.dg/graphite/interchange-4.c b/gcc/testsuite/gcc.dg/graphite/interchange-4.c +index 463ecb5a6..1370d5f9d 100644 +--- a/gcc/testsuite/gcc.dg/graphite/interchange-4.c ++++ b/gcc/testsuite/gcc.dg/graphite/interchange-4.c +@@ -45,5 +45,3 @@ main (void) + + return 0; + } +- +-/* { dg-final { scan-tree-dump "tiled" "graphite" } } */ +diff --git a/gcc/testsuite/gcc.dg/graphite/interchange-7.c b/gcc/testsuite/gcc.dg/graphite/interchange-7.c +index 50f7dd7f8..b2696dbec 100644 +--- a/gcc/testsuite/gcc.dg/graphite/interchange-7.c ++++ b/gcc/testsuite/gcc.dg/graphite/interchange-7.c +@@ -46,5 +46,3 @@ main (void) + + return 0; + } +- +-/* { dg-final { scan-tree-dump "tiled" "graphite" } } */ +diff --git a/gcc/testsuite/gcc.dg/graphite/interchange-9.c b/gcc/testsuite/gcc.dg/graphite/interchange-9.c +index 88a357893..506b5001f 100644 +--- a/gcc/testsuite/gcc.dg/graphite/interchange-9.c ++++ b/gcc/testsuite/gcc.dg/graphite/interchange-9.c +@@ -43,5 +43,3 @@ main (void) + + return 0; + } +- +-/* { dg-final { scan-tree-dump "tiled" "graphite" } } */ +diff --git a/gcc/testsuite/gcc.dg/graphite/uns-interchange-9.c b/gcc/testsuite/gcc.dg/graphite/uns-interchange-9.c +index cc108c2bb..a89578032 100644 +--- a/gcc/testsuite/gcc.dg/graphite/uns-interchange-9.c ++++ b/gcc/testsuite/gcc.dg/graphite/uns-interchange-9.c +@@ -44,5 +44,3 @@ main (void) + + return 0; + } +- +-/* { dg-final { scan-tree-dump "tiled" "graphite" } } */ +diff --git a/gcc/testsuite/gcc.dg/guality/guality.exp b/gcc/testsuite/gcc.dg/guality/guality.exp +index ca77a446f..89cd896d0 100644 +--- a/gcc/testsuite/gcc.dg/guality/guality.exp ++++ b/gcc/testsuite/gcc.dg/guality/guality.exp +@@ -80,8 +80,22 @@ if {[check_guality " + return 0; + } + "]} { +- gcc-dg-runtest [lsort [glob $srcdir/$subdir/*.c]] "" "" +- gcc-dg-runtest [lsort [glob $srcdir/c-c++-common/guality/*.c]] "" "-Wc++-compat" ++ set general [list] ++ set Og [list] ++ foreach file [lsort [glob $srcdir/c-c++-common/guality/*.c]] { ++ switch -glob -- [file tail $file] { ++ Og-* { lappend Og $file } ++ * { lappend general $file } ++ } ++ } ++ ++ gcc-dg-runtest [lsort [glob $srcdir/$subdir/*.c]] "" "" ++ gcc-dg-runtest $general "" "-Wc++-compat" ++ set-torture-options \ ++ [list "-O0" "-Og"] \ ++ [list {}] \ ++ [list "-Og -flto"] ++ gcc-dg-runtest $Og "" "-Wc++-compat" + } + + if [info exists guality_gdb_name] { +diff --git a/gcc/testsuite/gcc.dg/guality/pr59776.c b/gcc/testsuite/gcc.dg/guality/pr59776.c +index 382abb622..6c1c8165b 100644 +--- a/gcc/testsuite/gcc.dg/guality/pr59776.c ++++ b/gcc/testsuite/gcc.dg/guality/pr59776.c +@@ -12,11 +12,11 @@ foo (struct S *p) + struct S s1, s2; /* { dg-final { gdb-test pr59776.c:17 "s1.f" "5.0" } } */ + s1 = *p; /* { dg-final { gdb-test pr59776.c:17 "s1.g" "6.0" } } */ + s2 = s1; /* { dg-final { gdb-test pr59776.c:17 "s2.f" "0.0" } } */ +- *(int *) &s2.f = 0; /* { dg-final { gdb-test pr59776.c:17 "s2.g" "6.0" } } */ ++ *(int *) &s2.f = 0; /* { dg-final { gdb-test pr59776.c:17 "s2.g" "6.0" { xfail *-*-* } } } */ + asm volatile (NOP : : : "memory"); /* { dg-final { gdb-test pr59776.c:20 "s1.f" "5.0" } } */ + asm volatile (NOP : : : "memory"); /* { dg-final { gdb-test pr59776.c:20 "s1.g" "6.0" } } */ + s2 = s1; /* { dg-final { gdb-test pr59776.c:20 "s2.f" "5.0" } } */ +- asm volatile (NOP : : : "memory"); /* { dg-final { gdb-test pr59776.c:20 "s2.g" "6.0" } } */ ++ asm volatile (NOP : : : "memory"); /* { dg-final { gdb-test pr59776.c:20 "s2.g" "6.0" { xfail *-*-* } } } */ + asm volatile (NOP : : : "memory"); + } + +diff --git a/gcc/testsuite/gcc.dg/ipa/inline-7.c b/gcc/testsuite/gcc.dg/ipa/inline-7.c +index 7dabb14f6..7c6491141 100644 +--- a/gcc/testsuite/gcc.dg/ipa/inline-7.c ++++ b/gcc/testsuite/gcc.dg/ipa/inline-7.c +@@ -1,6 +1,6 @@ + /* Check that early inliner works out that a is empty of parameter 0. */ + /* { dg-do compile } */ +-/* { dg-options "-O2 -fdump-tree-einline-optimized -fopt-info-inline -fno-partial-inlining" } */ ++/* { dg-options "-O2 -fdump-tree-einline-optimized -fopt-info-inline -fno-partial-inlining -fno-inline-functions" } */ + void t(void); + int a (int b) + { +diff --git a/gcc/testsuite/gcc.dg/ipa/pr63416.c b/gcc/testsuite/gcc.dg/ipa/pr63416.c +index b5374c51f..5873954fb 100644 +--- a/gcc/testsuite/gcc.dg/ipa/pr63416.c ++++ b/gcc/testsuite/gcc.dg/ipa/pr63416.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -fdump-tree-optimized" } */ ++/* { dg-options "-O2 -fdump-tree-optimized --param early-inlining-insns-O2=14" } */ + #define _UNUSED_ __attribute__((__unused__)) + + typedef int TEST_F30 (int *v); +diff --git a/gcc/testsuite/gcc.dg/ipa/pr93763.c b/gcc/testsuite/gcc.dg/ipa/pr93763.c +index d11705932..aa2e60c5f 100644 +--- a/gcc/testsuite/gcc.dg/ipa/pr93763.c ++++ b/gcc/testsuite/gcc.dg/ipa/pr93763.c +@@ -3,44 +3,48 @@ + + typedef struct a a; + struct a { +- a *b ++ a *b; + } d; +-e, k, ah, al; +-f(aa) { ++int e, k, ah, al; ++void h(void); ++void ++f(aa) int aa; { + if (aa & 1) + goto g; + f(aa | 2); + g: + h(); + } ++void i(); ++void + l() { +- { + f(072); + i(e, d, 92); +- } + } ++void + ag() { +- { i(e, d, 36); } ++ i(e, d, 36); + } ++void j(); ++void + ai(a *m, a *n, unsigned aa) { + f(aa); + j(k, l, ah, 1); + } ++void + j(int c, a m, int aj, int aa) { + int ak = aa; +- { i(e, d, ak); } ++ i(e, d, ak); + } ++void + i(int c, a *m, unsigned aa) { +- { +- { i(c, (*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*( ++ i(c, (*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*( + *(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*( + *(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*m).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b) + .b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b) + .b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b) + .b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b) + .b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b, 0); +- } +- } + int am = aa; +- ai(ag, al, am); ++ ai((a *) (void *) ag, (a *) (__INTPTR_TYPE__) al, am); + } +diff --git a/gcc/testsuite/gcc.dg/optimize-bswapsi-5.c b/gcc/testsuite/gcc.dg/optimize-bswapsi-5.c +index 5819fd719..b4d8b9a8d 100644 +--- a/gcc/testsuite/gcc.dg/optimize-bswapsi-5.c ++++ b/gcc/testsuite/gcc.dg/optimize-bswapsi-5.c +@@ -1,6 +1,6 @@ + /* { dg-do compile } */ + /* { dg-require-effective-target bswap } */ +-/* { dg-options "-O2 -fdump-tree-bswap" } */ ++/* { dg-options "-O2 -fdump-tree-bswap -fno-inline-functions" } */ + /* { dg-additional-options "-march=z900" { target s390-*-* } } */ + + struct L { unsigned int l[2]; }; +diff --git a/gcc/testsuite/gcc.dg/pr79983.c b/gcc/testsuite/gcc.dg/pr79983.c +index 84aae6913..1e292d421 100644 +--- a/gcc/testsuite/gcc.dg/pr79983.c ++++ b/gcc/testsuite/gcc.dg/pr79983.c +@@ -8,7 +8,7 @@ struct S { int i, j; }; /* { dg-error "redefinition of 'struct S'" } */ + + enum E; + enum E { A, B, C }; /* { dg-message "originally defined here" } */ +-enum E { D, F }; /* { dg-error "nested redefinition of 'enum E'|redeclaration of 'enum E'" } */ ++enum E { D, F }; /* { dg-error "redeclaration of 'enum E'" } */ + + union U; + union U { int i; }; /* { dg-message "originally defined here" } */ +diff --git a/gcc/testsuite/gcc.dg/struct-ret-1.c b/gcc/testsuite/gcc.dg/struct-ret-1.c +index 23c9e9813..330c76ab8 100644 +--- a/gcc/testsuite/gcc.dg/struct-ret-1.c ++++ b/gcc/testsuite/gcc.dg/struct-ret-1.c +@@ -1,5 +1,5 @@ +-/* { dg-do run { target hppa*-*-* } } */ +-/* { dg-options { -O2 } { target hppa*-*-* } } */ ++/* { dg-do run } */ ++/* { dg-options { -O2 } } */ + extern void abort (void); + extern void exit (int); + typedef struct { +diff --git a/gcc/testsuite/gcc.dg/torture/pr90395.c b/gcc/testsuite/gcc.dg/torture/pr90395.c +new file mode 100644 +index 000000000..eba8750ef +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/torture/pr90395.c +@@ -0,0 +1,12 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-fexceptions -fnon-call-exceptions" } */ ++ ++typedef int v16si __attribute__ ((__vector_size__ (64))); ++ ++void ++rl (int uq) ++{ ++ v16si qw[1]; ++ ++ qw[uq] = (v16si) { uq }; ++} +diff --git a/gcc/testsuite/gcc.dg/torture/pr92690.c b/gcc/testsuite/gcc.dg/torture/pr92690.c +new file mode 100644 +index 000000000..b49f184fc +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/torture/pr92690.c +@@ -0,0 +1,38 @@ ++/* { dg-do run { target *-*-*gnu* } } */ ++/* { dg-additional-options "-D_GNU_SOURCE" } */ ++/* { dg-require-effective-target fenv_exceptions } */ ++ ++#include ++ ++typedef int v4si __attribute__((vector_size(16))); ++typedef float v4sf __attribute__((vector_size(16))); ++ ++void __attribute__((noipa)) ++foo (v4si *dstp, v4sf *srcp) ++{ ++ v4sf src = *srcp; ++ *dstp = (v4si) { src[0], src[1], 3, 4 }; ++} ++ ++void __attribute__((noipa)) ++bar (v4sf *dstp, v4si *srcp) ++{ ++ v4si src = *srcp; ++ *dstp = (v4sf) { src[0], src[1], 3.5, 4.5 }; ++} ++ ++int ++main() ++{ ++ feenableexcept (FE_INVALID|FE_INEXACT); ++ v4sf x = (v4sf) { 1, 2, __builtin_nanf (""), 3.5 }; ++ v4si y; ++ foo (&y, &x); ++ if (y[0] != 1 || y[1] != 2 || y[2] != 3 || y[3] != 4) ++ __builtin_abort (); ++ y = (v4si) { 0, 1, __INT_MAX__, -__INT_MAX__ }; ++ bar (&x, &y); ++ if (x[0] != 0 || x[1] != 1 || x[2] != 3.5 || x[3] != 4.5) ++ __builtin_abort (); ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.dg/torture/pr92715.c b/gcc/testsuite/gcc.dg/torture/pr92715.c +new file mode 100644 +index 000000000..170179c20 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/torture/pr92715.c +@@ -0,0 +1,17 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-mavx2" { target x86_64-*-* i?86-*-* } } */ ++ ++typedef double v4si __attribute__((vector_size(32))); ++typedef double v2si __attribute__((vector_size(16))); ++ ++void foo (v4si *dstp, v2si *srcp) ++{ ++ v2si src = *srcp; ++ *dstp = (v4si) { src[0], src[1], src[0], src[1] }; ++} ++ ++void bar (v4si *dstp, v2si *srcp) ++{ ++ v2si src = *srcp; ++ *dstp = (v4si) { src[0], src[0], src[0], src[0] }; ++} +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/alias-access-path-1.c b/gcc/testsuite/gcc.dg/tree-ssa/alias-access-path-1.c +new file mode 100644 +index 000000000..ba90b56fe +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/alias-access-path-1.c +@@ -0,0 +1,21 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fdump-tree-fre3" } */ ++struct foo ++{ ++ int val; ++} *fooptr; ++struct bar ++{ ++ struct foo foo; ++ int val2; ++} *barptr; ++int ++test () ++{ ++ struct foo foo = { 0 }; ++ barptr->val2 = 123; ++ *fooptr = foo; ++ return barptr->val2; ++} ++ ++/* { dg-final { scan-tree-dump-times "return 123" 1 "fre3"} } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-35.c b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-35.c +index d55197bce..24e633869 100644 +--- a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-35.c ++++ b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-35.c +@@ -16,4 +16,5 @@ v4sf vec_cast_perm(v4si f) + } + + /* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 1 "cddce1" { target { i?86-*-* x86_64-*-* } } } } */ +-/* { dg-final { scan-tree-dump-times "\\\(v4sf\\\) " 2 "cddce1" { target { i?86-*-* x86_64-*-* } } } } */ ++/* Catch (v4sf) and (vector(4) float). */ ++/* { dg-final { scan-tree-dump-times " = \\\(v" 2 "cddce1" { target { i?86-*-* x86_64-*-* } } } } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr92706-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr92706-2.c +new file mode 100644 +index 000000000..37ab9765d +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr92706-2.c +@@ -0,0 +1,19 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fdump-tree-esra" } */ ++ ++typedef __UINT64_TYPE__ uint64_t; ++typedef __UINT32_TYPE__ uint32_t; ++struct S { uint32_t i[2]; } __attribute__((aligned(__alignof__(uint64_t)))); ++typedef uint64_t my_int64 __attribute__((may_alias)); ++uint64_t load (void *p) ++{ ++ struct S u, v, w; ++ uint64_t tem; ++ tem = *(my_int64 *)p; ++ *(my_int64 *)&v = tem; ++ u = v; ++ w = u; ++ return *(my_int64 *)&w; ++} ++ ++/* { dg-final { scan-tree-dump "Created a replacement for v" "esra" } } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-26.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-26.c +index 32d63899b..836a8092a 100644 +--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-26.c ++++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-26.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -fdump-tree-dse1-details -fno-short-enums" } */ ++/* { dg-options "-O2 -fdump-tree-dse1-details -fno-short-enums -fno-tree-fre" } */ + /* { dg-skip-if "temporary variable for constraint_expr is never used" { msp430-*-* } } */ + + enum constraint_expr_type +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-31.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-31.c +index 6402c81e6..3d429ab15 100644 +--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-31.c ++++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-31.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O -fdump-tree-fre1-details" } */ ++/* { dg-options "-O -fdump-tree-fre1-details -fno-tree-forwprop" } */ + /* { dg-additional-options "-fno-common" { target hppa*-*-hpux* } } */ + + typedef double d128 __attribute__((vector_size(16))); +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-12.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-12.c +index 67526762f..fff731e8c 100644 +--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-12.c ++++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-12.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -fdump-tree-thread2-details -fdump-tree-thread3-details -fdump-tree-thread4-details -fno-finite-loops" } */ ++/* { dg-options "-O2 -fdump-tree-thread2-details -fdump-tree-thread3-details -fdump-tree-thread4-details -fno-finite-loops --param early-inlining-insns-O2=14 -fno-inline-functions" } */ + /* { dg-final { scan-tree-dump "FSM" "thread2" } } */ + /* { dg-final { scan-tree-dump "FSM" "thread3" } } */ + /* { dg-final { scan-tree-dump "FSM" "thread4" { xfail *-*-* } } } */ +diff --git a/gcc/testsuite/gcc.dg/vect/pr66142.c b/gcc/testsuite/gcc.dg/vect/pr66142.c +index 8c79f2907..a0316f1f0 100644 +--- a/gcc/testsuite/gcc.dg/vect/pr66142.c ++++ b/gcc/testsuite/gcc.dg/vect/pr66142.c +@@ -1,6 +1,6 @@ + /* PR middle-end/66142 */ + /* { dg-do compile } */ +-/* { dg-additional-options "-ffast-math -fopenmp-simd" } */ ++/* { dg-additional-options "-ffast-math -fopenmp-simd --param early-inlining-insns-O2=14" } */ + /* { dg-additional-options "-mavx" { target avx_runtime } } */ + + struct A { float x, y; }; +diff --git a/gcc/testsuite/gcc.dg/vect/vect-cond-arith-7.c b/gcc/testsuite/gcc.dg/vect/vect-cond-arith-7.c +new file mode 100644 +index 000000000..739b98f59 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/vect/vect-cond-arith-7.c +@@ -0,0 +1,60 @@ ++/* { dg-require-effective-target scalar_all_fma } */ ++/* { dg-additional-options "-fdump-tree-optimized -ffp-contract=fast" } */ ++ ++#include "tree-vect.h" ++ ++#define N (VECTOR_BITS * 11 / 64 + 3) ++ ++#define DEF(INV) \ ++ void __attribute__ ((noipa)) \ ++ f_##INV (double *restrict a, double *restrict b, \ ++ double *restrict c, double *restrict d) \ ++ { \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ double mb = (INV & 1 ? -b[i] : b[i]); \ ++ double mc = c[i]; \ ++ double md = (INV & 2 ? -d[i] : d[i]); \ ++ a[i] = b[i] < 10 ? mb * mc + md : 10.0; \ ++ } \ ++ } ++ ++#define TEST(INV) \ ++ { \ ++ f_##INV (a, b, c, d); \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ double mb = (INV & 1 ? -b[i] : b[i]); \ ++ double mc = c[i]; \ ++ double md = (INV & 2 ? -d[i] : d[i]); \ ++ double fma = __builtin_fma (mb, mc, md); \ ++ if (a[i] != (i % 17 < 10 ? fma : 10.0)) \ ++ __builtin_abort (); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ } ++ ++#define FOR_EACH_INV(T) \ ++ T (0) T (1) T (2) T (3) ++ ++FOR_EACH_INV (DEF) ++ ++int ++main (void) ++{ ++ double a[N], b[N], c[N], d[N]; ++ for (int i = 0; i < N; ++i) ++ { ++ b[i] = i % 17; ++ c[i] = i % 9 + 11; ++ d[i] = i % 13 + 14; ++ asm volatile ("" ::: "memory"); ++ } ++ FOR_EACH_INV (TEST) ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times { = \.COND_FMA } 1 "optimized" { target vect_double_cond_arith } } } */ ++/* { dg-final { scan-tree-dump-times { = \.COND_FMS } 1 "optimized" { target vect_double_cond_arith } } } */ ++/* { dg-final { scan-tree-dump-times { = \.COND_FNMA } 1 "optimized" { target vect_double_cond_arith } } } */ ++/* { dg-final { scan-tree-dump-times { = \.COND_FNMS } 1 "optimized" { target vect_double_cond_arith } } } */ +diff --git a/gcc/testsuite/gcc.dg/vect/vect-mulhrs-1.c b/gcc/testsuite/gcc.dg/vect/vect-mulhrs-1.c +new file mode 100644 +index 000000000..8e46ff6b0 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/vect/vect-mulhrs-1.c +@@ -0,0 +1,49 @@ ++/* { dg-require-effective-target vect_int } */ ++ ++#include "tree-vect.h" ++#ifndef SIGNEDNESS ++#define SIGNEDNESS signed ++#endif ++#ifndef BIAS ++#define BIAS 0 ++#endif ++ ++#define HRS(x) ((((x) >> (15 - BIAS)) + BIAS) >> BIAS) ++ ++void __attribute__ ((noipa)) ++f (SIGNEDNESS short *restrict a, SIGNEDNESS short *restrict b, ++ SIGNEDNESS short *restrict c, __INTPTR_TYPE__ n) ++{ ++ for (__INTPTR_TYPE__ i = 0; i < n; ++i) ++ a[i] = HRS((SIGNEDNESS int) b[i] * (SIGNEDNESS int) c[i]); ++} ++ ++#define N 50 ++#define BASE1 ((SIGNEDNESS int) -1 < 0 ? -126 : 4) ++#define BASE2 ((SIGNEDNESS int) -1 < 0 ? -101 : 26) ++#define CONST1 0x01AB ++#define CONST2 0x01CD ++ ++int ++main (void) ++{ ++ check_vect (); ++ ++ SIGNEDNESS short a[N], b[N], c[N]; ++ for (int i = 0; i < N; ++i) ++ { ++ b[i] = BASE1 + i * CONST1; ++ c[i] = BASE2 + i * CONST2; ++ asm volatile ("" ::: "memory"); ++ } ++ f (a, b, c, N); ++ for (int i = 0; i < N; ++i) ++ if (a[i] != HRS(BASE1 * BASE2 + i * i * (CONST1 * CONST2) ++ + i * (BASE1 * CONST2 + BASE2 * CONST1))) ++ __builtin_abort (); ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump "vect_recog_mulhs_pattern: detected" "vect" } } */ ++/* { dg-final { scan-tree-dump {\.MULHS} "vect" { target vect_mulhrs_hi } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_mulhrs_hi } } } */ +diff --git a/gcc/testsuite/gcc.dg/vect/vect-mulhrs-2.c b/gcc/testsuite/gcc.dg/vect/vect-mulhrs-2.c +new file mode 100644 +index 000000000..a16e71c6a +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/vect/vect-mulhrs-2.c +@@ -0,0 +1,9 @@ ++/* { dg-require-effective-target vect_int } */ ++ ++#define SIGNEDNESS unsigned ++ ++#include "vect-mulhrs-1.c" ++ ++/* { dg-final { scan-tree-dump "vect_recog_mulhs_pattern: detected" "vect" } } */ ++/* { dg-final { scan-tree-dump {\.MULHS} "vect" { target vect_mulhrs_hi } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_mulhrs_hi } } } */ +diff --git a/gcc/testsuite/gcc.dg/vect/vect-mulhrs-3.c b/gcc/testsuite/gcc.dg/vect/vect-mulhrs-3.c +new file mode 100644 +index 000000000..e7d44d75d +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/vect/vect-mulhrs-3.c +@@ -0,0 +1,9 @@ ++/* { dg-require-effective-target vect_int } */ ++ ++#define BIAS 1 ++ ++#include "vect-mulhrs-1.c" ++ ++/* { dg-final { scan-tree-dump "vect_recog_mulhs_pattern: detected" "vect" } } */ ++/* { dg-final { scan-tree-dump {\.MULHRS} "vect" { target vect_mulhrs_hi } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_mulhrs_hi } } } */ +diff --git a/gcc/testsuite/gcc.dg/vect/vect-mulhrs-4.c b/gcc/testsuite/gcc.dg/vect/vect-mulhrs-4.c +new file mode 100644 +index 000000000..e12176335 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/vect/vect-mulhrs-4.c +@@ -0,0 +1,10 @@ ++/* { dg-require-effective-target vect_int } */ ++ ++#define SIGNEDNESS unsigned ++#define BIAS 1 ++ ++#include "vect-mulhrs-1.c" ++ ++/* { dg-final { scan-tree-dump "vect_recog_mulhs_pattern: detected" "vect" } } */ ++/* { dg-final { scan-tree-dump {\.MULHRS} "vect" { target vect_mulhrs_hi } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_mulhrs_hi } } } */ +diff --git a/gcc/testsuite/gcc.dg/vect/vect-sdiv-pow2-1.c b/gcc/testsuite/gcc.dg/vect/vect-sdiv-pow2-1.c +new file mode 100644 +index 000000000..be70bc6c4 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/vect/vect-sdiv-pow2-1.c +@@ -0,0 +1,79 @@ ++/* { dg-require-effective-target vect_int } */ ++ ++#include "tree-vect.h" ++ ++#define DIV(x,y) ((x)/(y)) ++#define MOD(x,y) ((x)%(y)) ++ ++#define TEMPLATE(PO2,OP) \ ++void __attribute__ ((noipa)) \ ++f_##PO2##_##OP (int *restrict a, int *restrict b, __INTPTR_TYPE__ n) \ ++{ \ ++ for (__INTPTR_TYPE__ i = 0; i < n; ++i) \ ++ a[i] = OP (b[i], (1 << PO2)); \ ++} ++#define TEMPLATES(PO2) \ ++TEMPLATE (PO2,DIV); \ ++TEMPLATE (PO2,MOD); ++ ++TEMPLATES (1); ++TEMPLATES (2); ++TEMPLATES (3); ++TEMPLATES (7); ++TEMPLATES (8); ++TEMPLATES (10); ++TEMPLATES (15); ++TEMPLATES (16); ++TEMPLATES (20); ++ ++typedef void (*func_t) (int *, int *, __INTPTR_TYPE__); ++typedef struct { ++ int po2; ++ func_t div; ++ func_t mod; ++} fn_t; ++const fn_t fns[] = { ++#define FN_PAIR(PO2) { PO2, f_##PO2##_DIV, f_##PO2##_MOD } ++ FN_PAIR (1), ++ FN_PAIR (2), ++ FN_PAIR (3), ++ FN_PAIR (7), ++ FN_PAIR (8), ++ FN_PAIR (10), ++ FN_PAIR (15), ++ FN_PAIR (16), ++ FN_PAIR (20), ++}; ++ ++int __attribute__ ((noipa, noinline)) ++power2 (int x) ++{ ++ return 1 << x; ++} ++ ++#define N 50 ++ ++int ++main (void) ++{ ++ int a[N], b[N], c[N]; ++ ++ for (int i = 0; i < (sizeof(fns)/sizeof(fns[0])); i++) ++ { ++ int p = power2 (fns[i].po2); ++ for (int j = 0; j < N; j++) ++ a[j] = ((p << 4) * j) / (N - 1) - (p << 5); ++ ++ fns[i].div (b, a, N); ++ fns[i].mod (c, a, N); ++ ++ for (int j = 0; j < N; j++) ++ if (a[j] != (b[j] * p + c[j])) ++ __builtin_abort (); ++ } ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump {\.DIV_POW2} "vect" { target vect_sdiv_pow2_si } } } */ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 18 "vect" { target vect_sdiv_pow2_si } } } */ +diff --git a/gcc/testsuite/gcc.dg/winline-3.c b/gcc/testsuite/gcc.dg/winline-3.c +index 7b7c8c5b9..7043a2760 100644 +--- a/gcc/testsuite/gcc.dg/winline-3.c ++++ b/gcc/testsuite/gcc.dg/winline-3.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-Winline -O2 --param max-inline-insns-single=1 --param inline-min-speedup=100 -fgnu89-inline" } */ ++/* { dg-options "-Winline -O2 --param max-inline-insns-single-O2=1 --param inline-min-speedup-O2=100 -fgnu89-inline" } */ + + void big (void); + inline int q(void) /* { dg-warning "max-inline-insns-single" } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/acle/jcvt_1.c b/gcc/testsuite/gcc.target/aarch64/acle/jcvt_1.c +new file mode 100644 +index 000000000..0c900b1b5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/acle/jcvt_1.c +@@ -0,0 +1,15 @@ ++/* Test the __jcvt ACLE intrinsic. */ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=armv8.3-a" } */ ++ ++#include ++ ++#ifdef __ARM_FEATURE_JCVT ++int32_t ++test_jcvt (double a) ++{ ++ return __jcvt (a); ++} ++#endif ++ ++/* { dg-final { scan-assembler-times "fjcvtzs\tw\[0-9\]+, d\[0-9\]+\n" 1 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/acle/rintnzx_1.c b/gcc/testsuite/gcc.target/aarch64/acle/rintnzx_1.c +new file mode 100644 +index 000000000..125720848 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/acle/rintnzx_1.c +@@ -0,0 +1,73 @@ ++/* Test the __rint[32,64][z,x] intrinsics. */ ++ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=armv8.5-a" } */ ++ ++#include ++ ++#ifdef __ARM_FEATURE_FRINT ++float ++foo_32z_f32_scal (float a) ++{ ++ return __rint32zf (a); ++} ++ ++/* { dg-final { scan-assembler-times "frint32z\ts\[0-9\]+, s\[0-9\]+\n" 1 } } */ ++ ++double ++foo_32z_f64_scal (double a) ++{ ++ return __rint32z (a); ++} ++ ++/* { dg-final { scan-assembler-times "frint32z\td\[0-9\]+, d\[0-9\]+\n" 1 } } */ ++ ++float ++foo_32x_f32_scal (float a) ++{ ++ return __rint32xf (a); ++} ++ ++/* { dg-final { scan-assembler-times "frint32x\ts\[0-9\]+, s\[0-9\]+\n" 1 } } */ ++ ++double ++foo_32x_f64_scal (double a) ++{ ++ return __rint32x (a); ++} ++ ++/* { dg-final { scan-assembler-times "frint32x\td\[0-9\]+, d\[0-9\]+\n" 1 } } */ ++ ++float ++foo_64z_f32_scal (float a) ++{ ++ return __rint64zf (a); ++} ++ ++/* { dg-final { scan-assembler-times "frint64z\ts\[0-9\]+, s\[0-9\]+\n" 1 } } */ ++ ++double ++foo_64z_f64_scal (double a) ++{ ++ return __rint64z (a); ++} ++ ++/* { dg-final { scan-assembler-times "frint64z\td\[0-9\]+, d\[0-9\]+\n" 1 } } */ ++ ++float ++foo_64x_f32_scal (float a) ++{ ++ return __rint64xf (a); ++} ++ ++/* { dg-final { scan-assembler-times "frint64x\ts\[0-9\]+, s\[0-9\]+\n" 1 } } */ ++ ++double ++foo_64x_f64_scal (double a) ++{ ++ return __rint64x (a); ++} ++ ++/* { dg-final { scan-assembler-times "frint64x\td\[0-9\]+, d\[0-9\]+\n" 1 } } */ ++ ++#endif +diff --git a/gcc/testsuite/gcc.target/aarch64/acle/rng_1.c b/gcc/testsuite/gcc.target/aarch64/acle/rng_1.c +new file mode 100644 +index 000000000..1fbdb6276 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/acle/rng_1.c +@@ -0,0 +1,53 @@ ++/* Test the __rndr ACLE intrinsic. */ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=armv8.5-a+rng" } */ ++ ++#include ++ ++#ifdef __ARM_FEATURE_RNG ++/* Check that instruction is generated when status result is unused. */ ++uint64_t ++test_rndr_no_stat (void) ++{ ++ uint64_t res; ++ __rndr (&res); ++ return res; ++} ++ ++/* Check that instruction is generated when random number result ++ is unused. */ ++int ++test_rndr_error_check (void) ++{ ++ uint64_t res; ++ int fail = __rndr (&res); ++ if (fail) ++ return 0; ++ return -1; ++} ++ ++/* { dg-final { scan-assembler-times "mrs\tx..?, RNDR\n" 2 } } */ ++ ++/* Check that instruction is generated when status result is unused. */ ++uint64_t ++test_rndrrs_no_stat (void) ++{ ++ uint64_t res; ++ __rndrrs (&res); ++ return res; ++} ++ ++/* Check that instruction is generated when random number result ++ is unused. */ ++int ++test_rndrrs_error_check (void) ++{ ++ uint64_t res; ++ int fail = __rndrrs (&res); ++ if (fail) ++ return 0; ++ return -1; ++} ++ ++/* { dg-final { scan-assembler-times "mrs\tx..?, RNDRRS\n" 2 } } */ ++#endif +diff --git a/gcc/testsuite/gcc.target/aarch64/acle/tme.c b/gcc/testsuite/gcc.target/aarch64/acle/tme.c +new file mode 100644 +index 000000000..5df93b1dc +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/acle/tme.c +@@ -0,0 +1,34 @@ ++/* Test the TME intrinsics. */ ++ ++/* { dg-do compile } */ ++/* { dg-options "-save-temps -O2 -march=armv8-a+tme" } */ ++ ++#include "arm_acle.h" ++ ++#define tcancel_reason 0x234 ++ ++unsigned ++check_tme (void) ++{ ++ unsigned status = __tstart (); ++ if (status == 0) ++ { ++ if (__ttest () == 2) ++ { ++ __tcancel (tcancel_reason & _TMFAILURE_REASON); ++ return tcancel_reason; ++ } ++ ++ __tcommit (); ++ return 0; ++ } ++ else if (status & _TMFAILURE_NEST) ++ return _TMFAILURE_NEST; ++ else if (status & _TMFAILURE_TRIVIAL) ++ return _TMFAILURE_TRIVIAL; ++} ++ ++/* { dg-final { scan-assembler "tstart\tx..?\n" } } */ ++/* { dg-final { scan-assembler "tcancel\t#564\n" } } */ ++/* { dg-final { scan-assembler "ttest\tx..?\n" } } */ ++/* { dg-final { scan-assembler "tcommit\n" } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_dup.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_dup.c +new file mode 100644 +index 000000000..c42c7acbb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_dup.c +@@ -0,0 +1,85 @@ ++/* { dg-do assemble { target { aarch64*-*-* } } } */ ++/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ ++/* { dg-options "-O2" } */ ++/* { dg-add-options arm_v8_2a_bf16_neon } */ ++/* { dg-additional-options "-save-temps" } */ ++ ++#include ++ ++float32x2_t test_vcreate (float32x2_t r, uint64_t a, uint64_t b) ++{ ++ bfloat16x4_t _a = vcreate_bf16(a); ++ bfloat16x4_t _b = vcreate_bf16(b); ++ ++ return vbfdot_f32 (r, _a, _b); ++} ++/* { dg-final { scan-assembler {bfdot\tv[0-9]+.2s, v[0-9]+.4h, v[0-9]+.4h} } } */ ++ ++bfloat16x4_t test_vset_lane_bf16 (bfloat16_t a, bfloat16x4_t b) ++{ ++ return vset_lane_bf16 (a, b, 3); ++} ++ ++bfloat16x8_t test_vsetq_lane_bf16 (bfloat16_t a, bfloat16x8_t b) ++{ ++ return vsetq_lane_bf16 (a, b, 7); ++} ++/* { dg-final { scan-assembler-times "ins\\t" 2 } } */ ++ ++bfloat16x4_t vdup_test (bfloat16_t a) ++{ ++ return vdup_n_bf16 (a); ++} ++/* { dg-final { scan-assembler "dup\\tv\[0-9\]+\.4h, v\[0-9\]+.h\\\[0\\\]" } } */ ++ ++bfloat16x8_t vdupq_test (bfloat16_t a) ++{ ++ return vdupq_n_bf16 (a); ++} ++ ++bfloat16x8_t test_vdupq_lane_bf16 (bfloat16x4_t a) ++{ ++ return vdupq_lane_bf16 (a, 1); ++} ++/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, v\[0-9\]+.h\\\[0\\\]" 2 } } */ ++ ++bfloat16_t test_vget_lane_bf16 (bfloat16x4_t a) ++{ ++ return vget_lane_bf16 (a, 1); ++} ++/* { dg-final { scan-assembler-times "dup\\th\[0-9\]+, v\[0-9\]+\.h\\\[1\\\]" 2 } } */ ++ ++bfloat16x4_t test_vdup_lane_bf16 (bfloat16x4_t a) ++{ ++ return vdup_lane_bf16 (a, 1); ++} ++/* { dg-final { scan-assembler "dup\\tv\[0-9\]+\.4h, v\[0-9\]+\.h\\\[1\\\]" } } */ ++ ++bfloat16x4_t test_vdup_laneq_bf16 (bfloat16x8_t a) ++{ ++ return vdup_laneq_bf16 (a, 7); ++} ++/* { dg-final { scan-assembler "dup\\tv\[0-9\]+\.8h, v\[0-9\]+\.h\\\[7\\\]" } } */ ++ ++bfloat16x8_t test_vdupq_laneq_bf16 (bfloat16x8_t a) ++{ ++ return vdupq_laneq_bf16 (a, 5); ++} ++/* { dg-final { scan-assembler "dup\\tv\[0-9\]+\.8h, v\[0-9\]+\.h\\\[5\\\]" } } */ ++ ++bfloat16_t test_vduph_lane_bf16 (bfloat16x4_t a) ++{ ++ return vduph_lane_bf16 (a, 3); ++} ++/* { dg-final { scan-assembler "dup\\th\[0-9\]+, v\[0-9\]+\.h\\\[3\\\]" } } */ ++ ++bfloat16_t test_vgetq_lane_bf16 (bfloat16x8_t a) ++{ ++ return vgetq_lane_bf16 (a, 7); ++} ++ ++bfloat16_t test_vduph_laneq_bf16 (bfloat16x8_t a) ++{ ++ return vduph_laneq_bf16 (a, 7); ++} ++/* { dg-final { scan-assembler-times "dup\\th\[0-9\]+, v\[0-9\]+\.h\\\[7\\\]" 2 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_reinterpret.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_reinterpret.c +new file mode 100644 +index 000000000..f5adf40c6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_reinterpret.c +@@ -0,0 +1,466 @@ ++/* { dg-do assemble { target { aarch64*-*-* } } } */ ++/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ ++/* { dg-add-options arm_v8_2a_bf16_neon } */ ++/* { dg-additional-options "-save-temps" } */ ++ ++#include ++ ++float32x2_t ++test_vbfdot_f32_s8 (float32x2_t r, int8x8_t a, int8x8_t b) ++{ ++ bfloat16x4_t _a = vreinterpret_bf16_s8(a); ++ bfloat16x4_t _b = vreinterpret_bf16_s8(b); ++ ++ return vbfdot_f32 (r, _a, _b); ++} ++ ++float32x2_t ++test_vbfdot_f32_s16 (float32x2_t r, int16x4_t a, int16x4_t b) ++{ ++ bfloat16x4_t _a = vreinterpret_bf16_s16(a); ++ bfloat16x4_t _b = vreinterpret_bf16_s16(b); ++ ++ return vbfdot_f32 (r, _a, _b); ++} ++ ++float32x2_t ++test_vbfdot_f32_s32 (float32x2_t r, int32x2_t a, int32x2_t b) ++{ ++ bfloat16x4_t _a = vreinterpret_bf16_s32(a); ++ bfloat16x4_t _b = vreinterpret_bf16_s32(b); ++ ++ return vbfdot_f32 (r, _a, _b); ++} ++ ++float32x2_t ++test_vbfdot_f32_s64 (float32x2_t r, int64x1_t a, int64x1_t b) ++{ ++ bfloat16x4_t _a = vreinterpret_bf16_s64(a); ++ bfloat16x4_t _b = vreinterpret_bf16_s64(b); ++ ++ return vbfdot_f32 (r, _a, _b); ++} ++ ++float32x2_t ++test_vbfdot_f32_u8 (float32x2_t r, uint8x8_t a, uint8x8_t b) ++{ ++ bfloat16x4_t _a = vreinterpret_bf16_u8(a); ++ bfloat16x4_t _b = vreinterpret_bf16_u8(b); ++ ++ return vbfdot_f32 (r, _a, _b); ++} ++ ++float32x2_t ++test_vbfdot_f32_u16 (float32x2_t r, uint16x4_t a, uint16x4_t b) ++{ ++ bfloat16x4_t _a = vreinterpret_bf16_u16(a); ++ bfloat16x4_t _b = vreinterpret_bf16_u16(b); ++ ++ return vbfdot_f32 (r, _a, _b); ++} ++ ++float32x2_t ++test_vbfdot_f32_u32 (float32x2_t r, uint32x2_t a, uint32x2_t b) ++{ ++ bfloat16x4_t _a = vreinterpret_bf16_u32(a); ++ bfloat16x4_t _b = vreinterpret_bf16_u32(b); ++ ++ return vbfdot_f32 (r, _a, _b); ++} ++ ++float32x2_t ++test_vbfdot_f32_u64 (float32x2_t r, uint64x1_t a, uint64x1_t b) ++{ ++ bfloat16x4_t _a = vreinterpret_bf16_u64(a); ++ bfloat16x4_t _b = vreinterpret_bf16_u64(b); ++ ++ return vbfdot_f32 (r, _a, _b); ++} ++ ++float32x2_t ++test_vbfdot_f32_p8 (float32x2_t r, poly8x8_t a, poly8x8_t b) ++{ ++ bfloat16x4_t _a = vreinterpret_bf16_p8(a); ++ bfloat16x4_t _b = vreinterpret_bf16_p8(b); ++ ++ return vbfdot_f32 (r, _a, _b); ++} ++ ++float32x2_t ++test_vbfdot_f32_p16 (float32x2_t r, poly16x4_t a, poly16x4_t b) ++{ ++ bfloat16x4_t _a = vreinterpret_bf16_p16(a); ++ bfloat16x4_t _b = vreinterpret_bf16_p16(b); ++ ++ return vbfdot_f32 (r, _a, _b); ++} ++ ++float32x2_t ++test_vbfdot_f32_p64 (float32x2_t r, poly64x1_t a, poly64x1_t b) ++{ ++ bfloat16x4_t _a = vreinterpret_bf16_p64(a); ++ bfloat16x4_t _b = vreinterpret_bf16_p64(b); ++ ++ return vbfdot_f32 (r, _a, _b); ++} ++ ++float32x2_t ++test_vbfdot_f32_f16 (float32x2_t r, float16x4_t a, float16x4_t b) ++{ ++ bfloat16x4_t _a = vreinterpret_bf16_f16(a); ++ bfloat16x4_t _b = vreinterpret_bf16_f16(b); ++ ++ return vbfdot_f32 (r, _a, _b); ++} ++ ++float32x2_t ++test_vbfdot_f32_f32 (float32x2_t r, float32x2_t a, float32x2_t b) ++{ ++ bfloat16x4_t _a = vreinterpret_bf16_f32(a); ++ bfloat16x4_t _b = vreinterpret_bf16_f32(b); ++ ++ return vbfdot_f32 (r, _a, _b); ++} ++ ++float32x2_t ++test_vbfdot_f32_f64 (float32x2_t r, float64x1_t a, float64x1_t b) ++{ ++ bfloat16x4_t _a = vreinterpret_bf16_f64(a); ++ bfloat16x4_t _b = vreinterpret_bf16_f64(b); ++ ++ return vbfdot_f32 (r, _a, _b); ++} ++ ++float32x4_t ++test_vbfdotq_f32_s8 (float32x4_t r, int8x16_t a, int8x16_t b) ++{ ++ bfloat16x8_t _a = vreinterpretq_bf16_s8(a); ++ bfloat16x8_t _b = vreinterpretq_bf16_s8(b); ++ ++ return vbfdotq_f32 (r, _a, _b); ++} ++ ++float32x4_t ++test_vbfdotq_f32_s16 (float32x4_t r, int16x8_t a, int16x8_t b) ++{ ++ bfloat16x8_t _a = vreinterpretq_bf16_s16(a); ++ bfloat16x8_t _b = vreinterpretq_bf16_s16(b); ++ ++ return vbfdotq_f32 (r, _a, _b); ++} ++ ++float32x4_t ++test_vbfdotq_f32_s32 (float32x4_t r, int32x4_t a, int32x4_t b) ++{ ++ bfloat16x8_t _a = vreinterpretq_bf16_s32(a); ++ bfloat16x8_t _b = vreinterpretq_bf16_s32(b); ++ ++ return vbfdotq_f32 (r, _a, _b); ++} ++ ++float32x4_t ++test_vbfdotq_f32_s64 (float32x4_t r, int64x2_t a, int64x2_t b) ++{ ++ bfloat16x8_t _a = vreinterpretq_bf16_s64(a); ++ bfloat16x8_t _b = vreinterpretq_bf16_s64(b); ++ ++ return vbfdotq_f32 (r, _a, _b); ++} ++ ++float32x4_t ++test_vbfdotq_f32_u8 (float32x4_t r, uint8x16_t a, uint8x16_t b) ++{ ++ bfloat16x8_t _a = vreinterpretq_bf16_u8(a); ++ bfloat16x8_t _b = vreinterpretq_bf16_u8(b); ++ ++ return vbfdotq_f32 (r, _a, _b); ++} ++ ++float32x4_t ++test_vbfdotq_f32_u16 (float32x4_t r, uint16x8_t a, uint16x8_t b) ++{ ++ bfloat16x8_t _a = vreinterpretq_bf16_u16(a); ++ bfloat16x8_t _b = vreinterpretq_bf16_u16(b); ++ ++ return vbfdotq_f32 (r, _a, _b); ++} ++ ++float32x4_t ++test_vbfdotq_f32_u32 (float32x4_t r, uint32x4_t a, uint32x4_t b) ++{ ++ bfloat16x8_t _a = vreinterpretq_bf16_u32(a); ++ bfloat16x8_t _b = vreinterpretq_bf16_u32(b); ++ ++ return vbfdotq_f32 (r, _a, _b); ++} ++ ++float32x4_t ++test_vbfdotq_f32_u64 (float32x4_t r, uint64x2_t a, uint64x2_t b) ++{ ++ bfloat16x8_t _a = vreinterpretq_bf16_u64(a); ++ bfloat16x8_t _b = vreinterpretq_bf16_u64(b); ++ ++ return vbfdotq_f32 (r, _a, _b); ++} ++ ++float32x4_t ++test_vbfdotq_f32_p8 (float32x4_t r, poly8x16_t a, poly8x16_t b) ++{ ++ bfloat16x8_t _a = vreinterpretq_bf16_p8(a); ++ bfloat16x8_t _b = vreinterpretq_bf16_p8(b); ++ ++ return vbfdotq_f32 (r, _a, _b); ++} ++ ++float32x4_t ++test_vbfdotq_f32_p16 (float32x4_t r, poly16x8_t a, poly16x8_t b) ++{ ++ bfloat16x8_t _a = vreinterpretq_bf16_p16(a); ++ bfloat16x8_t _b = vreinterpretq_bf16_p16(b); ++ ++ return vbfdotq_f32 (r, _a, _b); ++} ++ ++float32x4_t ++test_vbfdotq_f32_p64 (float32x4_t r, poly64x2_t a, poly64x2_t b) ++{ ++ bfloat16x8_t _a = vreinterpretq_bf16_p64(a); ++ bfloat16x8_t _b = vreinterpretq_bf16_p64(b); ++ ++ return vbfdotq_f32 (r, _a, _b); ++} ++ ++float32x4_t ++test_vbfdotq_f32_p128 (float32x4_t r, poly128_t a, poly128_t b) ++{ ++ bfloat16x8_t _a = vreinterpretq_bf16_p128(a); ++ bfloat16x8_t _b = vreinterpretq_bf16_p128(b); ++ ++ return vbfdotq_f32 (r, _a, _b); ++} ++ ++float32x4_t ++test_vbfdotq_f32_f16 (float32x4_t r, float16x8_t a, float16x8_t b) ++{ ++ bfloat16x8_t _a = vreinterpretq_bf16_f16(a); ++ bfloat16x8_t _b = vreinterpretq_bf16_f16(b); ++ ++ return vbfdotq_f32 (r, _a, _b); ++} ++ ++float32x4_t ++test_vbfdotq_f32_f32 (float32x4_t r, float32x4_t a, float32x4_t b) ++{ ++ bfloat16x8_t _a = vreinterpretq_bf16_f32(a); ++ bfloat16x8_t _b = vreinterpretq_bf16_f32(b); ++ ++ return vbfdotq_f32 (r, _a, _b); ++} ++ ++float32x4_t ++test_vbfdotq_f32_f64 (float32x4_t r, float64x2_t a, float64x2_t b) ++{ ++ bfloat16x8_t _a = vreinterpretq_bf16_f64(a); ++ bfloat16x8_t _b = vreinterpretq_bf16_f64(b); ++ ++ return vbfdotq_f32 (r, _a, _b); ++} ++ ++/* { dg-final { scan-assembler-times {bfdot\tv[0-9]+.2s, v[0-9]+.4h, v[0-9]+.4h} 14 } } */ ++/* { dg-final { scan-assembler-times {bfdot\tv[0-9]+.4s, v[0-9]+.8h, v[0-9]+.8h} 15 } } */ ++ ++int8x8_t test_vreinterpret_s8_bf16 (bfloat16x4_t a, int8x8_t b) ++{ ++ int8x8_t _a = vreinterpret_s8_bf16 (a); ++ return vadd_s8 (_a, b); ++} ++ ++int16x4_t test_vreinterpret_s16_bf16 (bfloat16x4_t a, int16x4_t b) ++{ ++ int16x4_t _a = vreinterpret_s16_bf16 (a); ++ return vadd_s16 (_a, b); ++} ++ ++int32x2_t test_vreinterpret_s32_bf16 (bfloat16x4_t a, int32x2_t b) ++{ ++ int32x2_t _a = vreinterpret_s32_bf16 (a); ++ return vadd_s32 (_a, b); ++} ++ ++int64x1_t test_vreinterpret_s64_bf16 (bfloat16x4_t a, int64x1_t b) ++{ ++ int64x1_t _a = vreinterpret_s64_bf16 (a); ++ return vrshl_s64 (_a, b); ++} ++ ++uint8x8_t test_vreinterpret_u8_bf16 (bfloat16x4_t a, uint8x8_t b) ++{ ++ uint8x8_t _a = vreinterpret_u8_bf16 (a); ++ return vadd_u8 (_a, b); ++} ++ ++uint16x4_t test_vreinterpret_u16_bf16 (bfloat16x4_t a, uint16x4_t b) ++{ ++ uint16x4_t _a = vreinterpret_u16_bf16 (a); ++ return vadd_u16 (_a, b); ++} ++ ++uint32x2_t test_vreinterpret_u32_bf16 (bfloat16x4_t a, uint32x2_t b) ++{ ++ uint32x2_t _a = vreinterpret_u32_bf16 (a); ++ return vadd_u32 (_a, b); ++} ++ ++uint64x1_t test_vreinterpret_u64_bf16 (bfloat16x4_t a, int64x1_t b) ++{ ++ uint64x1_t _a = vreinterpret_u64_bf16 (a); ++ return vrshl_u64 (_a, b); ++} ++ ++poly8x8_t test_vreinterpret_p8_bf16 (bfloat16x4_t a, poly8x8_t b) ++{ ++ poly8x8_t _a = vreinterpret_p8_bf16 (a); ++ return vzip1_p8 (_a, b); ++} ++ ++poly16x4_t test_vreinterpret_p16_bf16 (bfloat16x4_t a, poly16x4_t b) ++{ ++ poly16x4_t _a = vreinterpret_p16_bf16 (a); ++ return vzip1_p16 (_a, b); ++} ++ ++poly64x1_t test_vreinterpret_p64_bf16 (bfloat16x4_t a, poly64x1_t b) ++{ ++ poly64x1_t _a = vreinterpret_p64_bf16 (a); ++ return vsli_n_p64 (_a, b, 3); ++} ++ ++float32x2_t test_vreinterpret_f32_bf16 (bfloat16x4_t a, float32x2_t b) ++{ ++ float32x2_t _a = vreinterpret_f32_bf16 (a); ++ return vsub_f32 (_a, b); ++} ++ ++float64x1_t test_vreinterpret_f64_bf16 (bfloat16x4_t a, float64x1_t b) ++{ ++ float64x1_t _a = vreinterpret_f64_bf16 (a); ++ return vsub_f64 (_a, b); ++} ++ ++int8x16_t test_vreinterpretq_s8_bf16 (bfloat16x8_t a, int8x16_t b) ++{ ++ int8x16_t _a = vreinterpretq_s8_bf16 (a); ++ return vaddq_s8 (_a, b); ++} ++ ++int16x8_t test_vreinterpretq_s16_bf16 (bfloat16x8_t a, int16x8_t b) ++{ ++ int16x8_t _a = vreinterpretq_s16_bf16 (a); ++ return vaddq_s16 (_a, b); ++} ++ ++int32x4_t test_vreinterpretq_s32_bf16 (bfloat16x8_t a, int32x4_t b) ++{ ++ int32x4_t _a = vreinterpretq_s32_bf16 (a); ++ return vaddq_s32 (_a, b); ++} ++ ++int64x2_t test_vreinterpretq_s64_bf16 (bfloat16x8_t a, int64x2_t b) ++{ ++ int64x2_t _a = vreinterpretq_s64_bf16 (a); ++ return vaddq_s64 (_a, b); ++} ++ ++uint8x16_t test_vreinterpretq_u8_bf16 (bfloat16x8_t a, uint8x16_t b) ++{ ++ uint8x16_t _a = vreinterpretq_u8_bf16 (a); ++ return vaddq_u8 (_a, b); ++} ++ ++uint16x8_t test_vreinterpretq_u16_bf16 (bfloat16x8_t a, uint16x8_t b) ++{ ++ uint16x8_t _a = vreinterpretq_u16_bf16 (a); ++ return vaddq_u16 (_a, b); ++} ++ ++uint32x4_t test_vreinterpretq_u32_bf16 (bfloat16x8_t a, uint32x4_t b) ++{ ++ uint32x4_t _a = vreinterpretq_u32_bf16 (a); ++ return vaddq_u32 (_a, b); ++} ++ ++uint64x2_t test_vreinterpretq_u64_bf16 (bfloat16x8_t a, uint64x2_t b) ++{ ++ uint64x2_t _a = vreinterpretq_u64_bf16 (a); ++ return vaddq_u64 (_a, b); ++} ++ ++poly8x16_t test_vreinterpretq_p8_bf16 (bfloat16x8_t a, poly8x16_t b) ++{ ++ poly8x16_t _a = vreinterpretq_p8_bf16 (a); ++ return vzip1q_p8 (_a, b); ++} ++ ++poly16x8_t test_vreinterpretq_p16_bf16 (bfloat16x8_t a, poly16x8_t b) ++{ ++ poly16x8_t _a = vreinterpretq_p16_bf16 (a); ++ return vzip1q_p16 (_a, b); ++} ++ ++poly64x2_t test_vreinterpretq_p64_bf16 (bfloat16x8_t a, poly64x2_t b) ++{ ++ poly64x2_t _a = vreinterpretq_p64_bf16 (a); ++ return vsliq_n_p64 (_a, b, 3); ++} ++ ++poly128_t test_vreinterpretq_p128_bf16 (bfloat16x8_t a, poly16x8_t b) ++{ ++ poly128_t _a = vreinterpretq_p128_bf16 (a); ++ return _a; ++} ++ ++float32x4_t test_vreinterpretq_f32_bf16 (bfloat16x8_t a, float32x4_t b) ++{ ++ float32x4_t _a = vreinterpretq_f32_bf16 (a); ++ return vsubq_f32 (_a, b); ++} ++ ++float64x2_t test_vreinterpretq_f64_bf16 (bfloat16x8_t a, float64x2_t b) ++{ ++ float64x2_t _a = vreinterpretq_f64_bf16 (a); ++ return vsubq_f64 (_a, b); ++} ++ ++float16x4_t test_vreinterpret_f16_bf16 (bfloat16x4_t a) ++{ ++ return vreinterpret_f16_bf16 (a); ++} ++ ++float16x8_t test_vreinterpretq_f16_bf16 (bfloat16x8_t a) ++{ ++ return vreinterpretq_f16_bf16 (a); ++} ++ ++/* { dg-final { scan-assembler-times {add\tv[0-9]+.2s, v[0-9]+.2s, v[0-9]+.2s} 2 } } */ ++/* { dg-final { scan-assembler-times {add\tv[0-9]+.4h, v[0-9]+.4h, v[0-9]+.4h} 2 } } */ ++/* { dg-final { scan-assembler-times {add\tv[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b} 2 } } */ ++ ++/* { dg-final { scan-assembler-times {add\tv[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s} 2 } } */ ++/* { dg-final { scan-assembler-times {add\tv[0-9]+.8h, v[0-9]+.8h, v[0-9]+.8h} 2 } } */ ++/* { dg-final { scan-assembler-times {add\tv[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b} 2 } } */ ++ ++/* { dg-final { scan-assembler {fsub\tv[0-9]+.2s, v[0-9]+.2s, v[0-9]+.2s} } } */ ++/* { dg-final { scan-assembler {fsub\tv[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s} } } */ ++/* { dg-final { scan-assembler {fsub\tv[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d} } } */ ++/* { dg-final { scan-assembler {fsub\td[0-9]+, d[0-9]+, d[0-9]+} } } */ ++ ++/* { dg-final { scan-assembler {zip1\tv[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b} } } */ ++/* { dg-final { scan-assembler {zip1\tv[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b} } } */ ++/* { dg-final { scan-assembler {zip1\tv[0-9]+.4h, v[0-9]+.4h, v[0-9]+.4h} } } */ ++/* { dg-final { scan-assembler {zip1\tv[0-9]+.8h, v[0-9]+.8h, v[0-9]+.8h} } } */ ++ ++/* { dg-final { scan-assembler {sli\tv[0-9]+.2d, v[0-9]+.2d, 3} } } */ ++/* { dg-final { scan-assembler {sli\td[0-9]+, d[0-9]+, 3} } } */ ++ ++/* { dg-final { scan-assembler {urshl\td[0-9]+, d[0-9]+, d[0-9]+} } } */ ++/* { dg-final { scan-assembler {srshl\td[0-9]+, d[0-9]+, d[0-9]+} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_vldn.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_vldn.c +new file mode 100644 +index 000000000..cf245091a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_vldn.c +@@ -0,0 +1,150 @@ ++/* { dg-do assemble { target { aarch64*-*-* } } } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ ++/* { dg-add-options arm_v8_2a_bf16_neon } */ ++ ++#include ++ ++bfloat16x4_t ++test_vld1_dup_bf16 (bfloat16_t * ptr) ++{ ++ return vld1_dup_bf16 (ptr); ++} ++ ++bfloat16x8_t ++test_vld1q_dup_bf16 (bfloat16_t * ptr) ++{ ++ return vld1q_dup_bf16 (ptr); ++} ++ ++bfloat16x4_t ++test_vld1_lane_bf16 (bfloat16_t * ptr, bfloat16x4_t src) ++{ ++ return vld1_lane_bf16 (ptr, src, 3); ++} ++ ++bfloat16x8_t ++test_vld1q_lane_bf16 (bfloat16_t * ptr, bfloat16x8_t src) ++{ ++ return vld1q_lane_bf16 (ptr, src, 7); ++} ++ ++bfloat16x4_t ++test_vld1_bf16 (bfloat16_t * ptr) ++{ ++ return vld1_bf16 (ptr); ++} ++ ++bfloat16x8_t ++test_vld1q_bf16 (bfloat16_t * ptr) ++{ ++ return vld1q_bf16 (ptr); ++} ++ ++bfloat16x4x2_t ++test_vld1_bf16_x2 (bfloat16_t * ptr) ++{ ++ return vld1_bf16_x2 (ptr); ++} ++ ++bfloat16x8x2_t ++test_vld1q_bf16_x2 (bfloat16_t * ptr) ++{ ++ return vld1q_bf16_x2 (ptr); ++} ++ ++bfloat16x4x3_t ++test_vld1_bf16_x3 (bfloat16_t * ptr) ++{ ++ return vld1_bf16_x3 (ptr); ++} ++ ++bfloat16x8x3_t ++test_vld1q_bf16_x3 (bfloat16_t * ptr) ++{ ++ return vld1q_bf16_x3 (ptr); ++} ++ ++bfloat16x4x4_t ++test_vld1_bf16_x4 (bfloat16_t * ptr) ++{ ++ return vld1_bf16_x4 (ptr); ++} ++ ++bfloat16x8x4_t ++test_vld1q_bf16_x4 (bfloat16_t * ptr) ++{ ++ return vld1q_bf16_x4 (ptr); ++} ++ ++bfloat16x4x2_t ++test_vld2_bf16 (bfloat16_t * ptr) ++{ ++ return vld2_bf16 (ptr); ++} ++ ++bfloat16x8x2_t ++test_vld2q_bf16 (bfloat16_t * ptr) ++{ ++ return vld2q_bf16 (ptr); ++} ++ ++bfloat16x4x2_t ++test_vld2_dup_bf16 (bfloat16_t * ptr) ++{ ++ return vld2_dup_bf16 (ptr); ++} ++ ++bfloat16x8x2_t ++test_vld2q_dup_bf16 (bfloat16_t * ptr) ++{ ++ return vld2q_dup_bf16 (ptr); ++} ++ ++bfloat16x4x3_t ++test_vld3_bf16 (bfloat16_t * ptr) ++{ ++ return vld3_bf16 (ptr); ++} ++ ++bfloat16x8x3_t ++test_vld3q_bf16 (bfloat16_t * ptr) ++{ ++ return vld3q_bf16 (ptr); ++} ++ ++bfloat16x4x3_t ++test_vld3_dup_bf16 (bfloat16_t * ptr) ++{ ++ return vld3_dup_bf16 (ptr); ++} ++ ++bfloat16x8x3_t ++test_vld3q_dup_bf16 (bfloat16_t * ptr) ++{ ++ return vld3q_dup_bf16 (ptr); ++} ++ ++bfloat16x4x4_t ++test_vld4_bf16 (bfloat16_t * ptr) ++{ ++ return vld4_bf16 (ptr); ++} ++ ++bfloat16x8x4_t ++test_vld4q_bf16 (bfloat16_t * ptr) ++{ ++ return vld4q_bf16 (ptr); ++} ++ ++bfloat16x4x4_t ++test_vld4_dup_bf16 (bfloat16_t * ptr) ++{ ++ return vld4_dup_bf16 (ptr); ++} ++ ++bfloat16x8x4_t ++test_vld4q_dup_bf16 (bfloat16_t * ptr) ++{ ++ return vld4q_dup_bf16 (ptr); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_vstn.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_vstn.c +new file mode 100644 +index 000000000..162b3ee36 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_vstn.c +@@ -0,0 +1,107 @@ ++/* { dg-do assemble { target { aarch64*-*-* } } } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ ++/* { dg-add-options arm_v8_2a_bf16_neon } */ ++ ++#include ++ ++void ++test_vst1_bf16_x2 (bfloat16_t *ptr, bfloat16x4x2_t val) ++{ ++ vst1_bf16_x2 (ptr, val); ++} ++ ++void ++test_vst1q_bf16_x2 (bfloat16_t *ptr, bfloat16x8x2_t val) ++{ ++ vst1q_bf16_x2 (ptr, val); ++} ++ ++void ++test_vst1_bf16_x3 (bfloat16_t *ptr, bfloat16x4x3_t val) ++{ ++ vst1_bf16_x3 (ptr, val); ++} ++ ++void ++test_vst1q_bf16_x3 (bfloat16_t *ptr, bfloat16x8x3_t val) ++{ ++ vst1q_bf16_x3 (ptr, val); ++} ++ ++void ++test_vst1_bf16_x4 (bfloat16_t *ptr, bfloat16x4x4_t val) ++{ ++ vst1_bf16_x4 (ptr, val); ++} ++ ++void ++test_vst1q_bf16_x4 (bfloat16_t *ptr, bfloat16x8x4_t val) ++{ ++ vst1q_bf16_x4 (ptr, val); ++} ++ ++void ++test_vst1_lane_bf16 (bfloat16_t *ptr, bfloat16x4_t val) ++{ ++ vst1_lane_bf16 (ptr, val, 3); ++} ++ ++void ++test_vst1q_lane_bf16 (bfloat16_t *ptr, bfloat16x8_t val) ++{ ++ vst1q_lane_bf16 (ptr, val, 7); ++} ++ ++void ++test_vst1_bf16 (bfloat16_t *ptr, bfloat16x4_t val) ++{ ++ vst1_bf16 (ptr, val); ++} ++ ++void ++test_vst1q_bf16 (bfloat16_t *ptr, bfloat16x8_t val) ++{ ++ vst1q_bf16 (ptr, val); ++} ++ ++void ++test_vst2_bf16 (bfloat16_t *ptr, bfloat16x4x2_t val) ++{ ++ vst2_bf16 (ptr, val); ++} ++ ++void ++test_vst2q_bf16 (bfloat16_t *ptr, bfloat16x8x2_t val) ++{ ++ vst2q_bf16 (ptr, val); ++} ++ ++void ++test_vst3_bf16 (bfloat16_t *ptr, bfloat16x4x3_t val) ++{ ++ vst3_bf16 (ptr, val); ++} ++ ++void ++test_vst3q_bf16 (bfloat16_t *ptr, bfloat16x8x3_t val) ++{ ++ vst3q_bf16 (ptr, val); ++} ++ ++void ++test_vst4_bf16 (bfloat16_t *ptr, bfloat16x4x4_t val) ++{ ++ vst4_bf16 (ptr, val); ++} ++ ++void ++test_vst4q_bf16 (bfloat16_t *ptr, bfloat16x8x4_t val) ++{ ++ vst4q_bf16 (ptr, val); ++} ++ ++int main() ++{ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c +new file mode 100644 +index 000000000..bbea630b1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c +@@ -0,0 +1,48 @@ ++/* { dg-do assemble { target { aarch64*-*-* } } } */ ++/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ ++/* { dg-add-options arm_v8_2a_bf16_neon } */ ++/* { dg-additional-options "-save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++ ++#include ++ ++/* ++**test_bfcvtn: ++** bfcvtn v0.4h, v0.4s ++** ret ++*/ ++bfloat16x4_t test_bfcvtn (float32x4_t a) ++{ ++ return vcvt_bf16_f32 (a); ++} ++ ++/* ++**test_bfcvtnq: ++** bfcvtn v0.4h, v0.4s ++** ret ++*/ ++bfloat16x8_t test_bfcvtnq (float32x4_t a) ++{ ++ return vcvtq_low_bf16_f32 (a); ++} ++ ++/* ++**test_bfcvtnq2: ++** bfcvtn2 v0.8h, v1.4s ++** ret ++*/ ++bfloat16x8_t test_bfcvtnq2 (bfloat16x8_t inactive, float32x4_t a) ++{ ++ return vcvtq_high_bf16_f32 (inactive, a); ++} ++ ++/* ++**test_bfcvt: ++** bfcvt h0, s0 ++** ret ++*/ ++bfloat16_t test_bfcvt (float32_t a) ++{ ++ return vcvth_bf16_f32 (a); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nobf16.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nobf16.c +new file mode 100644 +index 000000000..9904d65f9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nobf16.c +@@ -0,0 +1,10 @@ ++/* { dg-do assemble { target { aarch64*-*-* } } } */ ++/* { dg-additional-options "-march=armv8.2-a+nobf16" } */ ++ ++#include ++ ++bfloat16_t test_bfcvt (float32_t a) ++{ ++ /* { dg-error "inlining failed .* 'vcvth_bf16_f32" "" { target *-*-* } 0 } */ ++ return vcvth_bf16_f32 (a); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c +new file mode 100644 +index 000000000..a91468093 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c +@@ -0,0 +1,17 @@ ++/* { dg-do assemble { target { aarch64*-*-* } } } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-require-effective-target aarch64_asm_bf16_ok } */ ++/* { dg-additional-options "-save-temps -march=armv8.2-a+bf16+nosimd" } */ ++/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */ ++ ++#include ++ ++/* ++**test_bfcvt: ++** bfcvt h0, s0 ++** ret ++*/ ++bfloat16_t test_bfcvt (float32_t a) ++{ ++ return vcvth_bf16_f32 (a); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvtn-nobf16.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvtn-nobf16.c +new file mode 100644 +index 000000000..b3b6db123 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvtn-nobf16.c +@@ -0,0 +1,10 @@ ++/* { dg-do assemble { target { aarch64*-*-* } } } */ ++/* { dg-additional-options "-march=armv8.2-a+nobf16" } */ ++ ++#include ++ ++bfloat16x4_t test_bfcvtn (float32x4_t a) ++{ ++ /* { dg-error "inlining failed .* 'vcvt_bf16_f32" "" { target *-*-* } 0 } */ ++ return vcvt_bf16_f32 (a); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvtnq2-untied.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvtnq2-untied.c +new file mode 100644 +index 000000000..4b730e39d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvtnq2-untied.c +@@ -0,0 +1,20 @@ ++/* { dg-do assemble { target { aarch64*-*-* } } } */ ++/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ ++/* { dg-add-options arm_v8_2a_bf16_neon } */ ++/* { dg-additional-options "-save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++ ++#include ++ ++/* ++**test_bfcvtnq2_untied: ++** mov v0.16b, v1.16b ++** bfcvtn2 v0.8h, v2.4s ++** ret ++*/ ++bfloat16x8_t test_bfcvtnq2_untied (bfloat16x8_t unused, bfloat16x8_t inactive, ++ float32x4_t a) ++{ ++ return vcvtq_high_bf16_f32 (inactive, a); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfdot-1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfdot-1.c +new file mode 100755 +index 000000000..ad5150773 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfdot-1.c +@@ -0,0 +1,91 @@ ++/* { dg-do assemble { target { aarch64*-*-* } } } */ ++/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ ++/* { dg-add-options arm_v8_2a_bf16_neon } */ ++/* { dg-additional-options "-save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++ ++#include ++ ++/* ++**ufoo: ++** bfdot v0.2s, (v1.4h, v2.4h|v2.4h, v1.4h) ++** ret ++*/ ++float32x2_t ufoo(float32x2_t r, bfloat16x4_t x, bfloat16x4_t y) ++{ ++ return vbfdot_f32 (r, x, y); ++} ++ ++/* ++**ufooq: ++** bfdot v0.4s, (v1.8h, v2.8h|v2.8h, v1.8h) ++** ret ++*/ ++float32x4_t ufooq(float32x4_t r, bfloat16x8_t x, bfloat16x8_t y) ++{ ++ return vbfdotq_f32 (r, x, y); ++} ++ ++/* ++**ufoo_lane: ++** bfdot v0.2s, v1.4h, v2.2h\[0\] ++** ret ++*/ ++float32x2_t ufoo_lane(float32x2_t r, bfloat16x4_t x, bfloat16x4_t y) ++{ ++ return vbfdot_lane_f32 (r, x, y, 0); ++} ++ ++/* ++**ufooq_laneq: ++** bfdot v0.4s, v1.8h, v2.2h\[2\] ++** ret ++*/ ++float32x4_t ufooq_laneq(float32x4_t r, bfloat16x8_t x, bfloat16x8_t y) ++{ ++ return vbfdotq_laneq_f32 (r, x, y, 2); ++} ++ ++/* ++**ufoo_laneq: ++** bfdot v0.2s, v1.4h, v2.2h\[3\] ++** ret ++*/ ++float32x2_t ufoo_laneq(float32x2_t r, bfloat16x4_t x, bfloat16x8_t y) ++{ ++ return vbfdot_laneq_f32 (r, x, y, 3); ++} ++ ++/* ++**ufooq_lane: ++** bfdot v0.4s, v1.8h, v2.2h\[1\] ++** ret ++*/ ++float32x4_t ufooq_lane(float32x4_t r, bfloat16x8_t x, bfloat16x4_t y) ++{ ++ return vbfdotq_lane_f32 (r, x, y, 1); ++} ++ ++/* ++**ufoo_untied: ++** mov v0.8b, v1.8b ++** bfdot v0.2s, (v2.4h, v3.4h|v3.4h, v2.4h) ++** ret ++*/ ++float32x2_t ufoo_untied(float32x4_t unused, float32x2_t r, bfloat16x4_t x, bfloat16x4_t y) ++{ ++ return vbfdot_f32 (r, x, y); ++} ++ ++/* ++**ufooq_lane_untied: ++** mov v0.16b, v1.16b ++** bfdot v0.4s, v2.8h, v3.2h\[1\] ++** ret ++*/ ++float32x4_t ufooq_lane_untied(float32x4_t unused, float32x4_t r, bfloat16x8_t x, bfloat16x4_t y) ++{ ++ return vbfdotq_lane_f32 (r, x, y, 1); ++} ++ +diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c +new file mode 100755 +index 000000000..58bdee5ac +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c +@@ -0,0 +1,91 @@ ++/* { dg-do assemble { target { aarch64*-*-* } } } */ ++/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ ++/* { dg-add-options arm_v8_2a_bf16_neon } */ ++/* { dg-additional-options "-mbig-endian --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++ ++#include ++ ++/* ++**ufoo: ++** bfdot v0.2s, (v1.4h, v2.4h|v2.4h, v1.4h) ++** ret ++*/ ++float32x2_t ufoo(float32x2_t r, bfloat16x4_t x, bfloat16x4_t y) ++{ ++ return vbfdot_f32 (r, x, y); ++} ++ ++/* ++**ufooq: ++** bfdot v0.4s, (v1.8h, v2.8h|v2.8h, v1.8h) ++** ret ++*/ ++float32x4_t ufooq(float32x4_t r, bfloat16x8_t x, bfloat16x8_t y) ++{ ++ return vbfdotq_f32 (r, x, y); ++} ++ ++/* ++**ufoo_lane: ++** bfdot v0.2s, v1.4h, v2.2h\[0\] ++** ret ++*/ ++float32x2_t ufoo_lane(float32x2_t r, bfloat16x4_t x, bfloat16x4_t y) ++{ ++ return vbfdot_lane_f32 (r, x, y, 0); ++} ++ ++/* ++**ufooq_laneq: ++** bfdot v0.4s, v1.8h, v2.2h\[2\] ++** ret ++*/ ++float32x4_t ufooq_laneq(float32x4_t r, bfloat16x8_t x, bfloat16x8_t y) ++{ ++ return vbfdotq_laneq_f32 (r, x, y, 2); ++} ++ ++/* ++**ufoo_laneq: ++** bfdot v0.2s, v1.4h, v2.2h\[3\] ++** ret ++*/ ++float32x2_t ufoo_laneq(float32x2_t r, bfloat16x4_t x, bfloat16x8_t y) ++{ ++ return vbfdot_laneq_f32 (r, x, y, 3); ++} ++ ++/* ++**ufooq_lane: ++** bfdot v0.4s, v1.8h, v2.2h\[1\] ++** ret ++*/ ++float32x4_t ufooq_lane(float32x4_t r, bfloat16x8_t x, bfloat16x4_t y) ++{ ++ return vbfdotq_lane_f32 (r, x, y, 1); ++} ++ ++/* ++**ufoo_untied: ++** mov v0.8b, v1.8b ++** bfdot v0.2s, (v2.4h, v3.4h|v3.4h, v2.4h) ++** ret ++*/ ++float32x2_t ufoo_untied(float32x4_t unused, float32x2_t r, bfloat16x4_t x, bfloat16x4_t y) ++{ ++ return vbfdot_f32 (r, x, y); ++} ++ ++/* ++**ufooq_lane_untied: ++** mov v0.16b, v1.16b ++** bfdot v0.4s, v2.8h, v3.2h\[1\] ++** ret ++*/ ++float32x4_t ufooq_lane_untied(float32x4_t unused, float32x4_t r, bfloat16x8_t x, bfloat16x4_t y) ++{ ++ return vbfdotq_lane_f32 (r, x, y, 1); ++} ++ +diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfdot-3.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfdot-3.c +new file mode 100755 +index 000000000..607126203 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfdot-3.c +@@ -0,0 +1,28 @@ ++/* { dg-do assemble { target { aarch64*-*-* } } } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ ++/* { dg-add-options arm_v8_2a_bf16_neon } */ ++/* { dg-additional-options "--save-temps" } */ ++ ++#include ++ ++float32x2_t ufoo_lane(float32x2_t r, bfloat16x4_t x, bfloat16x4_t y) ++{ ++ return vbfdot_lane_f32 (r, x, y, -1); /* { dg-error {lane -1 out of range 0 - 1} "" { target *-*-* } 0 } */ ++} ++ ++float32x4_t ufooq_laneq(float32x4_t r, bfloat16x8_t x, bfloat16x8_t y) ++{ ++ return vbfdotq_laneq_f32 (r, x, y, -1); /* { dg-error {lane -1 out of range 0 - 3} "" { target *-*-* } 0 } */ ++} ++ ++float32x2_t ufoo_laneq(float32x2_t r, bfloat16x4_t x, bfloat16x8_t y) ++{ ++ return vbfdot_laneq_f32 (r, x, y, 4); /* { dg-error {lane 4 out of range 0 - 3} "" { target *-*-* } 0 } */ ++} ++ ++float32x4_t ufooq_lane(float32x4_t r, bfloat16x8_t x, bfloat16x4_t y) ++{ ++ return vbfdotq_lane_f32 (r, x, y, 2); /* { dg-error {lane 2 out of range 0 - 1} "" { target *-*-* } 0 } */ ++} ++ +diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfmlalbt-compile.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfmlalbt-compile.c +new file mode 100644 +index 000000000..9810e4ba3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfmlalbt-compile.c +@@ -0,0 +1,67 @@ ++/* { dg-do assemble { target { aarch64*-*-* } } } */ ++/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ ++/* { dg-add-options arm_v8_2a_bf16_neon } */ ++/* { dg-additional-options "-save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include ++ ++/* ++**test_bfmlalb: ++** bfmlalb v0.4s, v1.8h, v2.8h ++** ret ++*/ ++float32x4_t test_bfmlalb (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) ++{ ++ return vbfmlalbq_f32 (r, a, b); ++} ++ ++/* ++**test_bfmlalt: ++** bfmlalt v0.4s, v1.8h, v2.8h ++** ret ++*/ ++float32x4_t test_bfmlalt (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) ++{ ++ return vbfmlaltq_f32 (r, a, b); ++} ++ ++/* ++**test_bfmlalb_lane: ++** bfmlalb v0.4s, v1.8h, v2.h[0] ++** ret ++*/ ++float32x4_t test_bfmlalb_lane (float32x4_t r, bfloat16x8_t a, bfloat16x4_t b) ++{ ++ return vbfmlalbq_lane_f32 (r, a, b, 0); ++} ++ ++/* ++**test_bfmlalt_lane: ++** bfmlalt v0.4s, v1.8h, v2.h[2] ++** ret ++*/ ++float32x4_t test_bfmlalt_lane (float32x4_t r, bfloat16x8_t a, bfloat16x4_t b) ++{ ++ return vbfmlaltq_lane_f32 (r, a, b, 2); ++} ++ ++/* ++**test_bfmlalb_laneq: ++** bfmlalb v0.4s, v1.8h, v2.h[4] ++** ret ++*/ ++float32x4_t test_bfmlalb_laneq (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) ++{ ++ return vbfmlalbq_laneq_f32 (r, a, b, 4); ++} ++ ++/* ++**test_bfmlalt_laneq: ++** bfmlalt v0.4s, v1.8h, v2.h[7] ++** ret ++*/ ++float32x4_t test_bfmlalt_laneq (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) ++{ ++ return vbfmlaltq_laneq_f32 (r, a, b, 7); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfmmla-compile.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfmmla-compile.c +new file mode 100644 +index 000000000..0aaa69f00 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfmmla-compile.c +@@ -0,0 +1,18 @@ ++/* { dg-do assemble { target { aarch64*-*-* } } } */ ++/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ ++/* { dg-add-options arm_v8_2a_bf16_neon } */ ++/* { dg-additional-options "-save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include ++ ++ ++/* ++**test_bfmmla: ++** bfmmla v0.4s, v1.8h, v2.8h ++** ret ++*/ ++float32x4_t test_bfmmla (float32x4_t r, bfloat16x8_t x, bfloat16x8_t y) ++{ ++ return vbfmmlaq_f32 (r, x, y); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbfmlalbt_lane_f32_indices_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbfmlalbt_lane_f32_indices_1.c +new file mode 100644 +index 000000000..4d50ba3a3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbfmlalbt_lane_f32_indices_1.c +@@ -0,0 +1,46 @@ ++/* { dg-do compile { target { aarch64*-*-* } } } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ ++/* { dg-add-options arm_v8_2a_bf16_neon } */ ++ ++#include ++ ++void ++f_vbfmlaltq_lane_f32 (float32x4_t r, bfloat16x8_t a, bfloat16x4_t b) ++{ ++ /* { dg-error "lane -1 out of range 0 - 3" "" { target *-*-* } 0 } */ ++ vbfmlaltq_lane_f32 (r, a, b, -1); ++ /* { dg-error "lane 4 out of range 0 - 3" "" { target *-*-* } 0 } */ ++ vbfmlaltq_lane_f32 (r, a, b, 4); ++ return; ++} ++ ++void ++f_vbfmlaltq_laneq_f32 (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) ++{ ++ /* { dg-error "lane -1 out of range 0 - 7" "" { target *-*-* } 0 } */ ++ vbfmlaltq_laneq_f32 (r, a, b, -1); ++ /* { dg-error "lane 8 out of range 0 - 7" "" { target *-*-* } 0 } */ ++ vbfmlaltq_laneq_f32 (r, a, b, 8); ++ return; ++} ++ ++void ++f_vbfmlalbq_lane_f32 (float32x4_t r, bfloat16x8_t a, bfloat16x4_t b) ++{ ++ /* { dg-error "lane -2 out of range 0 - 3" "" { target *-*-* } 0 } */ ++ vbfmlalbq_lane_f32 (r, a, b, -2); ++ /* { dg-error "lane 5 out of range 0 - 3" "" { target *-*-* } 0 } */ ++ vbfmlalbq_lane_f32 (r, a, b, 5); ++ return; ++} ++ ++void ++f_vbfmlalbq_laneq_f32 (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) ++{ ++ /* { dg-error "lane -2 out of range 0 - 7" "" { target *-*-* } 0 } */ ++ vbfmlalbq_laneq_f32 (r, a, b, -2); ++ /* { dg-error "lane 9 out of range 0 - 7" "" { target *-*-* } 0 } */ ++ vbfmlalbq_laneq_f32 (r, a, b, 9); ++ return; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-1.c +new file mode 100755 +index 000000000..ac4f821e7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-1.c +@@ -0,0 +1,136 @@ ++/* { dg-do assemble { target { aarch64*-*-* } } } */ ++/* { dg-require-effective-target arm_v8_2a_i8mm_ok } */ ++/* { dg-add-options arm_v8_2a_i8mm } */ ++/* { dg-additional-options "-save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++ ++#include ++ ++/* Unsigned-Signed Dot Product instructions. */ ++ ++/* ++**ufoo: ++** usdot v0\.2s, v1\.8b, v2\.8b ++** ret ++*/ ++int32x2_t ufoo (int32x2_t r, uint8x8_t x, int8x8_t y) ++{ ++ return vusdot_s32 (r, x, y); ++} ++ ++/* ++**ufooq: ++** usdot v0\.4s, v1\.16b, v2\.16b ++** ret ++*/ ++int32x4_t ufooq (int32x4_t r, uint8x16_t x, int8x16_t y) ++{ ++ return vusdotq_s32 (r, x, y); ++} ++ ++/* ++**ufoo_lane: ++** usdot v0\.2s, v1\.8b, v2\.4b\[0\] ++** ret ++*/ ++int32x2_t ufoo_lane (int32x2_t r, uint8x8_t x, int8x8_t y) ++{ ++ return vusdot_lane_s32 (r, x, y, 0); ++} ++ ++/* ++**ufoo_laneq: ++** usdot v0\.2s, v1\.8b, v2\.4b\[2\] ++** ret ++*/ ++int32x2_t ufoo_laneq (int32x2_t r, uint8x8_t x, int8x16_t y) ++{ ++ return vusdot_laneq_s32 (r, x, y, 2); ++} ++ ++/* ++**ufooq_lane: ++** usdot v0\.4s, v1\.16b, v2\.4b\[1\] ++** ret ++*/ ++int32x4_t ufooq_lane (int32x4_t r, uint8x16_t x, int8x8_t y) ++{ ++ return vusdotq_lane_s32 (r, x, y, 1); ++} ++ ++/* ++**ufooq_laneq: ++** usdot v0\.4s, v1\.16b, v2\.4b\[3\] ++** ret ++*/ ++int32x4_t ufooq_laneq (int32x4_t r, uint8x16_t x, int8x16_t y) ++{ ++ return vusdotq_laneq_s32 (r, x, y, 3); ++} ++ ++ ++/* Signed-Unsigned Dot Product instructions. */ ++ ++/* ++**sfoo_lane: ++** sudot v0\.2s, v1\.8b, v2\.4b\[0\] ++** ret ++*/ ++int32x2_t sfoo_lane (int32x2_t r, int8x8_t x, uint8x8_t y) ++{ ++ return vsudot_lane_s32 (r, x, y, 0); ++} ++ ++/* ++**sfoo_laneq: ++** sudot v0\.2s, v1\.8b, v2\.4b\[2\] ++** ret ++*/ ++int32x2_t sfoo_laneq (int32x2_t r, int8x8_t x, uint8x16_t y) ++{ ++ return vsudot_laneq_s32 (r, x, y, 2); ++} ++ ++/* ++**sfooq_lane: ++** sudot v0\.4s, v1\.16b, v2\.4b\[1\] ++** ret ++*/ ++int32x4_t sfooq_lane (int32x4_t r, int8x16_t x, uint8x8_t y) ++{ ++ return vsudotq_lane_s32 (r, x, y, 1); ++} ++ ++/* ++**sfooq_laneq: ++** sudot v0\.4s, v1\.16b, v2\.4b\[3\] ++** ret ++*/ ++int32x4_t sfooq_laneq (int32x4_t r, int8x16_t x, uint8x16_t y) ++{ ++ return vsudotq_laneq_s32 (r, x, y, 3); ++} ++ ++/* ++**ufoo_untied: ++** mov v0\.8b, v1\.8b ++** usdot v0\.2s, v2\.8b, v3\.8b ++** ret ++*/ ++int32x2_t ufoo_untied (int32x2_t unused, int32x2_t r, uint8x8_t x, int8x8_t y) ++{ ++ return vusdot_s32 (r, x, y); ++} ++ ++/* ++**ufooq_laneq_untied: ++** mov v0\.16b, v1\.16b ++** usdot v0\.4s, v2\.16b, v3\.4b\[3\] ++** ret ++*/ ++int32x4_t ufooq_laneq_untied (int32x2_t unused, int32x4_t r, uint8x16_t x, int8x16_t y) ++{ ++ return vusdotq_laneq_s32 (r, x, y, 3); ++} ++ +diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c +new file mode 100755 +index 000000000..96bca2356 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c +@@ -0,0 +1,137 @@ ++/* { dg-do assemble { target { aarch64*-*-* } } } */ ++/* { dg-require-effective-target arm_v8_2a_i8mm_ok } */ ++/* { dg-add-options arm_v8_2a_i8mm } */ ++/* { dg-additional-options "-mbig-endian -save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++ ++#include ++ ++/* Unsigned-Signed Dot Product instructions. */ ++ ++/* ++**ufoo: ++** usdot v0\.2s, v1\.8b, v2\.8b ++** ret ++*/ ++int32x2_t ufoo (int32x2_t r, uint8x8_t x, int8x8_t y) ++{ ++ return vusdot_s32 (r, x, y); ++} ++ ++/* ++**ufooq: ++** usdot v0\.4s, v1\.16b, v2\.16b ++** ret ++*/ ++int32x4_t ufooq (int32x4_t r, uint8x16_t x, int8x16_t y) ++{ ++ return vusdotq_s32 (r, x, y); ++} ++ ++/* ++**ufoo_lane: ++** usdot v0\.2s, v1\.8b, v2\.4b\[0\] ++** ret ++*/ ++int32x2_t ufoo_lane (int32x2_t r, uint8x8_t x, int8x8_t y) ++{ ++ return vusdot_lane_s32 (r, x, y, 0); ++} ++ ++/* ++**ufoo_laneq: ++** usdot v0\.2s, v1\.8b, v2\.4b\[2\] ++** ret ++*/ ++int32x2_t ufoo_laneq (int32x2_t r, uint8x8_t x, int8x16_t y) ++{ ++ return vusdot_laneq_s32 (r, x, y, 2); ++} ++ ++/* ++**ufooq_lane: ++** usdot v0\.4s, v1\.16b, v2\.4b\[1\] ++** ret ++*/ ++int32x4_t ufooq_lane (int32x4_t r, uint8x16_t x, int8x8_t y) ++{ ++ return vusdotq_lane_s32 (r, x, y, 1); ++} ++ ++/* ++**ufooq_laneq: ++** usdot v0\.4s, v1\.16b, v2\.4b\[3\] ++** ret ++*/ ++int32x4_t ufooq_laneq (int32x4_t r, uint8x16_t x, int8x16_t y) ++{ ++ return vusdotq_laneq_s32 (r, x, y, 3); ++} ++ ++ ++/* Signed-Unsigned Dot Product instructions. */ ++ ++/* ++**sfoo_lane: ++** sudot v0\.2s, v1\.8b, v2\.4b\[0\] ++** ret ++*/ ++int32x2_t sfoo_lane (int32x2_t r, int8x8_t x, uint8x8_t y) ++{ ++ return vsudot_lane_s32 (r, x, y, 0); ++} ++ ++/* ++**sfoo_laneq: ++** sudot v0\.2s, v1\.8b, v2\.4b\[2\] ++** ret ++*/ ++int32x2_t sfoo_laneq (int32x2_t r, int8x8_t x, uint8x16_t y) ++{ ++ return vsudot_laneq_s32 (r, x, y, 2); ++} ++ ++/* ++**sfooq_lane: ++** sudot v0\.4s, v1\.16b, v2\.4b\[1\] ++** ret ++*/ ++int32x4_t sfooq_lane (int32x4_t r, int8x16_t x, uint8x8_t y) ++{ ++ return vsudotq_lane_s32 (r, x, y, 1); ++} ++ ++/* ++**sfooq_laneq: ++** sudot v0\.4s, v1\.16b, v2\.4b\[3\] ++** ret ++*/ ++int32x4_t sfooq_laneq (int32x4_t r, int8x16_t x, uint8x16_t y) ++{ ++ return vsudotq_laneq_s32 (r, x, y, 3); ++} ++ ++/* ++**ufoo_untied: ++** mov v0\.8b, v1\.8b ++** usdot v0\.2s, v2\.8b, v3\.8b ++** ret ++*/ ++int32x2_t ufoo_untied (int32x2_t unused, int32x2_t r, uint8x8_t x, int8x8_t y) ++{ ++ return vusdot_s32 (r, x, y); ++} ++ ++/* ++**ufooq_laneq_untied: ++** mov v0\.16b, v1\.16b ++** usdot v0\.4s, v2\.16b, v3\.4b\[3\] ++** ret ++*/ ++int32x4_t ufooq_laneq_untied (int32x2_t unused, int32x4_t r, uint8x16_t x, int8x16_t y) ++{ ++ return vusdotq_laneq_s32 (r, x, y, 3); ++} ++ ++ +diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-3.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-3.c +new file mode 100755 +index 000000000..18ecabef8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-3.c +@@ -0,0 +1,31 @@ ++/* { dg-do assemble { target { aarch64*-*-* } } } */ ++/* { dg-require-effective-target arm_v8_2a_i8mm_ok } */ ++/* { dg-add-options arm_v8_2a_i8mm } */ ++/* { dg-additional-options "--save-temps" } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++ ++#include ++ ++int32x2_t ufoo_lane (int32x2_t r, uint8x8_t x, int8x8_t y) ++{ ++ /* { dg-error "lane -1 out of range 0 - 1" "" { target *-*-* } 0 } */ ++ return vusdot_lane_s32 (r, x, y, -1); ++} ++ ++int32x2_t ufoo_laneq (int32x2_t r, uint8x8_t x, int8x16_t y) ++{ ++ /* { dg-error "lane -1 out of range 0 - 3" "" { target *-*-* } 0 } */ ++ return vusdot_laneq_s32 (r, x, y, -1); ++} ++ ++int32x4_t ufooq_lane (int32x4_t r, uint8x16_t x, int8x8_t y) ++{ ++ /* { dg-error "lane 2 out of range 0 - 1" "" { target *-*-* } 0 } */ ++ return vusdotq_lane_s32 (r, x, y, 2); ++} ++ ++int32x4_t ufooq_laneq (int32x4_t r, uint8x16_t x, int8x16_t y) ++{ ++ /* { dg-error "lane 4 out of range 0 - 3" "" { target *-*-* } 0 } */ ++ return vusdotq_laneq_s32 (r, x, y, 4); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-4.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-4.c +new file mode 100755 +index 000000000..66c87d486 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-4.c +@@ -0,0 +1,31 @@ ++/* { dg-do assemble { target { aarch64*-*-* } } } */ ++/* { dg-require-effective-target arm_v8_2a_i8mm_ok } */ ++/* { dg-add-options arm_v8_2a_i8mm } */ ++/* { dg-additional-options "--save-temps" } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++ ++#include ++ ++int32x2_t sfoo_lane (int32x2_t r, int8x8_t x, uint8x8_t y) ++{ ++ /* { dg-error "lane -1 out of range 0 - 1" "" { target *-*-* } 0 } */ ++ return vsudot_lane_s32 (r, x, y, -1); ++} ++ ++int32x2_t sfoo_laneq (int32x2_t r, int8x8_t x, uint8x16_t y) ++{ ++ /* { dg-error "lane -1 out of range 0 - 3" "" { target *-*-* } 0 } */ ++ return vsudot_laneq_s32 (r, x, y, -1); ++} ++ ++int32x4_t sfooq_lane (int32x4_t r, int8x16_t x, uint8x8_t y) ++{ ++ /* { dg-error "lane 2 out of range 0 - 1" "" { target *-*-* } 0 } */ ++ return vsudotq_lane_s32 (r, x, y, 2); ++} ++ ++int32x4_t sfooq_laneq (int32x4_t r, int8x16_t x, uint8x16_t y) ++{ ++ /* { dg-error "lane 4 out of range 0 - 3" "" { target *-*-* } 0 } */ ++ return vsudotq_laneq_s32 (r, x, y, 4); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1x4.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1x4.c +new file mode 100644 +index 000000000..451a0afc6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1x4.c +@@ -0,0 +1,83 @@ ++/* We haven't implemented these intrinsics for arm yet. */ ++/* { dg-xfail-if "" { arm*-*-* } } */ ++/* { dg-do run } */ ++/* { dg-options "-O3" } */ ++ ++#include ++#include "arm-neon-ref.h" ++ ++extern void abort (void); ++ ++#define TESTMETH(BASE, ELTS, SUFFIX) \ ++int __attribute__ ((noinline)) \ ++test_vld1##SUFFIX##_x4 () \ ++{ \ ++ BASE##_t data[ELTS * 4]; \ ++ BASE##_t temp[ELTS * 4]; \ ++ BASE##x##ELTS##x##4##_t vectors; \ ++ int i,j; \ ++ for (i = 0; i < ELTS * 4; i++) \ ++ data [i] = (BASE##_t) 4*i; \ ++ asm volatile ("" : : : "memory"); \ ++ vectors = vld1##SUFFIX##_x4 (data); \ ++ vst1##SUFFIX (temp, vectors.val[0]); \ ++ vst1##SUFFIX (&temp[ELTS], vectors.val[1]); \ ++ vst1##SUFFIX (&temp[ELTS * 2], vectors.val[2]); \ ++ vst1##SUFFIX (&temp[ELTS * 3], vectors.val[3]); \ ++ asm volatile ("" : : : "memory"); \ ++ for (j = 0; j < ELTS * 4; j++) \ ++ if (temp[j] != data[j]) \ ++ return 1; \ ++ return 0; \ ++} ++ ++#define VARIANTS_1(VARIANT) \ ++VARIANT (uint8, 8, _u8) \ ++VARIANT (uint16, 4, _u16) \ ++VARIANT (uint32, 2, _u32) \ ++VARIANT (uint64, 1, _u64) \ ++VARIANT (int8, 8, _s8) \ ++VARIANT (int16, 4, _s16) \ ++VARIANT (int32, 2, _s32) \ ++VARIANT (int64, 1, _s64) \ ++VARIANT (poly8, 8, _p8) \ ++VARIANT (poly16, 4, _p16) \ ++VARIANT (poly64, 1, _p64) \ ++VARIANT (float16, 4, _f16) \ ++VARIANT (float32, 2, _f32) \ ++VARIANT (uint8, 16, q_u8) \ ++VARIANT (uint16, 8, q_u16) \ ++VARIANT (uint32, 4, q_u32) \ ++VARIANT (uint64, 2, q_u64) \ ++VARIANT (int8, 16, q_s8) \ ++VARIANT (int16, 8, q_s16) \ ++VARIANT (int32, 4, q_s32) \ ++VARIANT (int64, 2, q_s64) \ ++VARIANT (poly8, 16, q_p8) \ ++VARIANT (poly16, 8, q_p16) \ ++VARIANT (poly64, 2, q_p64) \ ++VARIANT (float16, 8, q_f16) \ ++VARIANT (float32, 4, q_f32) ++ ++#ifdef __aarch64__ ++#define VARIANTS(VARIANT) VARIANTS_1(VARIANT) \ ++VARIANT (float64, 1, _f64) \ ++VARIANT (float64, 2, q_f64) ++#else ++#define VARIANTS(VARIANT) VARIANTS_1(VARIANT) ++#endif ++ ++/* Tests of vld1_x4 and vld1q_x4. */ ++VARIANTS (TESTMETH) ++ ++#define CHECKS(BASE, ELTS, SUFFIX) \ ++ if (test_vld1##SUFFIX##_x4 () != 0) \ ++ fprintf (stderr, "test_vld1##SUFFIX##_x4"); ++ ++int ++main (int argc, char **argv) ++{ ++ VARIANTS (CHECKS) ++ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst1x4.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst1x4.c +new file mode 100644 +index 000000000..1f17b5342 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst1x4.c +@@ -0,0 +1,83 @@ ++/* We haven't implemented these intrinsics for arm yet. */ ++/* { dg-xfail-if "" { arm*-*-* } } */ ++/* { dg-do run } */ ++/* { dg-options "-O3" } */ ++ ++#include ++#include "arm-neon-ref.h" ++ ++extern void abort (void); ++ ++#define TESTMETH(BASE, ELTS, SUFFIX) \ ++int __attribute__ ((noinline)) \ ++test_vst1##SUFFIX##_x4 () \ ++{ \ ++ BASE##_t data[ELTS * 4]; \ ++ BASE##_t temp[ELTS * 4]; \ ++ BASE##x##ELTS##x##4##_t vectors; \ ++ int i,j; \ ++ for (i = 0; i < ELTS * 4; i++) \ ++ data [i] = (BASE##_t) 4*i; \ ++ asm volatile ("" : : : "memory"); \ ++ vectors.val[0] = vld1##SUFFIX (data); \ ++ vectors.val[1] = vld1##SUFFIX (&data[ELTS]); \ ++ vectors.val[2] = vld1##SUFFIX (&data[ELTS * 2]); \ ++ vectors.val[3] = vld1##SUFFIX (&data[ELTS * 3]); \ ++ vst1##SUFFIX##_x4 (temp, vectors); \ ++ asm volatile ("" : : : "memory"); \ ++ for (j = 0; j < ELTS * 4; j++) \ ++ if (temp[j] != data[j]) \ ++ return 1; \ ++ return 0; \ ++} ++ ++#define VARIANTS_1(VARIANT) \ ++VARIANT (uint8, 8, _u8) \ ++VARIANT (uint16, 4, _u16) \ ++VARIANT (uint32, 2, _u32) \ ++VARIANT (uint64, 1, _u64) \ ++VARIANT (int8, 8, _s8) \ ++VARIANT (int16, 4, _s16) \ ++VARIANT (int32, 2, _s32) \ ++VARIANT (int64, 1, _s64) \ ++VARIANT (poly8, 8, _p8) \ ++VARIANT (poly16, 4, _p16) \ ++VARIANT (poly64, 1, _p64) \ ++VARIANT (float16, 4, _f16) \ ++VARIANT (float32, 2, _f32) \ ++VARIANT (uint8, 16, q_u8) \ ++VARIANT (uint16, 8, q_u16) \ ++VARIANT (uint32, 4, q_u32) \ ++VARIANT (uint64, 2, q_u64) \ ++VARIANT (int8, 16, q_s8) \ ++VARIANT (int16, 8, q_s16) \ ++VARIANT (int32, 4, q_s32) \ ++VARIANT (int64, 2, q_s64) \ ++VARIANT (poly8, 16, q_p8) \ ++VARIANT (poly16, 8, q_p16) \ ++VARIANT (poly64, 2, q_p64) \ ++VARIANT (float16, 8, q_f16) \ ++VARIANT (float32, 4, q_f32) ++ ++#ifdef __aarch64__ ++#define VARIANTS(VARIANT) VARIANTS_1(VARIANT) \ ++VARIANT (float64, 1, _f64) \ ++VARIANT (float64, 2, q_f64) ++#else ++#define VARIANTS(VARIANT) VARIANTS_1(VARIANT) ++#endif ++ ++/* Tests of vst1_x4 and vst1q_x4. */ ++VARIANTS (TESTMETH) ++ ++#define CHECKS(BASE, ELTS, SUFFIX) \ ++ if (test_vst1##SUFFIX##_x4 () != 0) \ ++ fprintf (stderr, "test_vst1##SUFFIX##_x4"); ++ ++int ++main (int argc, char **argv) ++{ ++ VARIANTS (CHECKS) ++ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/crypto-fuse-1.c b/gcc/testsuite/gcc.target/aarch64/aes-fuse-1.c +similarity index 51% +rename from gcc/testsuite/gcc.target/aarch64/crypto-fuse-1.c +rename to gcc/testsuite/gcc.target/aarch64/aes-fuse-1.c +index d8adc8946..d7b4f8991 100644 +--- a/gcc/testsuite/gcc.target/aarch64/crypto-fuse-1.c ++++ b/gcc/testsuite/gcc.target/aarch64/aes-fuse-1.c +@@ -1,45 +1,66 @@ + /* { dg-do compile } */ + /* { dg-options "-O3 -mcpu=cortex-a72+crypto -dp" } */ ++/* { dg-additional-options "-march=armv8-a+crypto" { target { aarch64*-*-* } } }*/ + + #include + + #define AESE(r, v, key) (r = vaeseq_u8 ((v), (key))); + #define AESMC(r, i) (r = vaesmcq_u8 (i)) + ++const uint8x16_t zero = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; ++ + uint8x16_t dummy; + uint8x16_t a; + uint8x16_t b; + uint8x16_t c; + uint8x16_t d; +-uint8x16_t e; ++uint8x16_t x; ++uint8x16_t y; ++uint8x16_t k; ++ ++void foo (void) + +-void +-foo (void) + { +- AESE (a, a, e); ++ AESE (a, a, k); + dummy = vaddq_u8 (dummy, dummy); + dummy = vaddq_u8 (dummy, dummy); +- AESE (b, b, e); ++ AESE (b, b, k); + dummy = vaddq_u8 (dummy, dummy); + dummy = vaddq_u8 (dummy, dummy); +- AESE (c, c, e); ++ AESE (c, c, k); + dummy = vaddq_u8 (dummy, dummy); + dummy = vaddq_u8 (dummy, dummy); +- AESE (d, d, e); ++ AESE (d, d, k); + dummy = vaddq_u8 (dummy, dummy); + dummy = vaddq_u8 (dummy, dummy); + +- AESMC (a, a); ++ x = x ^ k; ++ AESE (x, x, zero); + dummy = vaddq_u8 (dummy, dummy); + dummy = vaddq_u8 (dummy, dummy); +- AESMC (b, b); ++ y = y ^ k; ++ AESE (y, y, zero); ++ dummy = vaddq_u8 (dummy, dummy); ++ dummy = vaddq_u8 (dummy, dummy); ++ ++ AESMC (d, d); + dummy = vaddq_u8 (dummy, dummy); + dummy = vaddq_u8 (dummy, dummy); + AESMC (c, c); + dummy = vaddq_u8 (dummy, dummy); + dummy = vaddq_u8 (dummy, dummy); +- AESMC (d, d); +-} ++ AESMC (b, b); ++ dummy = vaddq_u8 (dummy, dummy); ++ dummy = vaddq_u8 (dummy, dummy); ++ AESMC (a, a); ++ dummy = vaddq_u8 (dummy, dummy); ++ dummy = vaddq_u8 (dummy, dummy); + +-/* { dg-final { scan-assembler-times "crypto_aese_fused" 4 } } */ ++ AESMC (y, y); ++ dummy = vaddq_u8 (dummy, dummy); ++ dummy = vaddq_u8 (dummy, dummy); ++ AESMC (x, x); ++} + ++/* { dg-final { scan-assembler-times "crypto_aese_fused" 6 } } */ ++/* { dg-final { scan-assembler-not "veor" } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/aes-fuse-2.c b/gcc/testsuite/gcc.target/aarch64/aes-fuse-2.c +new file mode 100644 +index 000000000..dfe01b03a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/aes-fuse-2.c +@@ -0,0 +1,65 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -mcpu=cortex-a72+crypto -dp" } */ ++/* { dg-additional-options "-march=armv8-a+crypto" { target { aarch64*-*-* } } }*/ ++ ++#include ++ ++#define AESD(r, v, key) (r = vaesdq_u8 ((v), (key))); ++#define AESIMC(r, i) (r = vaesimcq_u8 (i)) ++ ++const uint8x16_t zero = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; ++ ++uint8x16_t dummy; ++uint8x16_t a; ++uint8x16_t b; ++uint8x16_t c; ++uint8x16_t d; ++uint8x16_t x; ++uint8x16_t y; ++uint8x16_t k; ++ ++void foo (void) ++{ ++ AESD (a, a, k); ++ dummy = vaddq_u8 (dummy, dummy); ++ dummy = vaddq_u8 (dummy, dummy); ++ AESD (b, b, k); ++ dummy = vaddq_u8 (dummy, dummy); ++ dummy = vaddq_u8 (dummy, dummy); ++ AESD (c, c, k); ++ dummy = vaddq_u8 (dummy, dummy); ++ dummy = vaddq_u8 (dummy, dummy); ++ AESD (d, d, k); ++ dummy = vaddq_u8 (dummy, dummy); ++ dummy = vaddq_u8 (dummy, dummy); ++ ++ x = x ^ k; ++ AESD (x, x, zero); ++ dummy = vaddq_u8 (dummy, dummy); ++ dummy = vaddq_u8 (dummy, dummy); ++ y = y ^ k; ++ AESD (y, y, zero); ++ dummy = vaddq_u8 (dummy, dummy); ++ dummy = vaddq_u8 (dummy, dummy); ++ ++ AESIMC (d, d); ++ dummy = vaddq_u8 (dummy, dummy); ++ dummy = vaddq_u8 (dummy, dummy); ++ AESIMC (c, c); ++ dummy = vaddq_u8 (dummy, dummy); ++ dummy = vaddq_u8 (dummy, dummy); ++ AESIMC (b, b); ++ dummy = vaddq_u8 (dummy, dummy); ++ dummy = vaddq_u8 (dummy, dummy); ++ AESIMC (a, a); ++ dummy = vaddq_u8 (dummy, dummy); ++ dummy = vaddq_u8 (dummy, dummy); ++ ++ AESIMC (y, y); ++ dummy = vaddq_u8 (dummy, dummy); ++ dummy = vaddq_u8 (dummy, dummy); ++ AESIMC (x, x); ++} ++ ++/* { dg-final { scan-assembler-times "crypto_aesd_fused" 6 } } */ ++/* { dg-final { scan-assembler-not "veor" } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/asm-x-constraint-1.c b/gcc/testsuite/gcc.target/aarch64/asm-x-constraint-1.c +new file mode 100644 +index 000000000..a71043be5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/asm-x-constraint-1.c +@@ -0,0 +1,34 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O" } */ ++ ++void ++f (void) ++{ ++ register float s0 asm ("s0"); ++ register float s7 asm ("s7"); ++ register float s8 asm ("s8"); ++ register float s15 asm ("s15"); ++ register float s16 asm ("s16"); ++ register float s31 asm ("s31"); ++ asm volatile ("// s0 out: %s0" : "=w" (s0)); ++ asm volatile ("// s0 in: %s0" :: "x" (s0)); ++ asm volatile ("// s7 out: %s0" : "=w" (s7)); ++ asm volatile ("// s7 in: %s0" :: "x" (s7)); ++ asm volatile ("// s8 out: %s0" : "=w" (s8)); ++ asm volatile ("// s8 in: %s0" :: "x" (s8)); ++ asm volatile ("// s15 out: %s0" : "=w" (s15)); ++ asm volatile ("// s15 in: %s0" :: "x" (s15)); ++ asm volatile ("// s16 out: %s0" : "=w" (s16)); ++ asm volatile ("// s16 in: %s0" :: "x" (s16)); ++ asm volatile ("// s31 out: %s0" : "=w" (s31)); ++ asm volatile ("// s31 in: %s0" :: "x" (s31)); ++} ++ ++/* { dg-final { scan-assembler {\t// s0 out: s0\n.*[/]/ s0 in: s0\n} } } */ ++/* { dg-final { scan-assembler {\t// s7 out: s7\n.*[/]/ s7 in: s7\n} } } */ ++/* { dg-final { scan-assembler {\t// s8 out: s8\n.*[/]/ s8 in: s8\n} } } */ ++/* { dg-final { scan-assembler {\t// s15 out: s15\n.*[/]/ s15 in: s15\n} } } */ ++/* { dg-final { scan-assembler {\t// s16 out: s16\n.*\tfmov\t(s[0-7]), s16\n.*[/]/ s16 in: \1\n} } } */ ++/* { dg-final { scan-assembler {\t// s31 out: s31\n.*\tfmov\t(s[0-7]), s31\n.*[/]/ s31 in: \1\n} } } */ ++/* { dg-final { scan-assembler-not {\t// s16 in: s16\n} } } */ ++/* { dg-final { scan-assembler-not {\t// s31 in: s31\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/asm-y-constraint-1.c b/gcc/testsuite/gcc.target/aarch64/asm-y-constraint-1.c +new file mode 100644 +index 000000000..4a3fcac56 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/asm-y-constraint-1.c +@@ -0,0 +1,36 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O" } */ ++ ++void ++f (void) ++{ ++ register float s0 asm ("s0"); ++ register float s7 asm ("s7"); ++ register float s8 asm ("s8"); ++ register float s15 asm ("s15"); ++ register float s16 asm ("s16"); ++ register float s31 asm ("s31"); ++ asm volatile ("// s0 out: %s0" : "=w" (s0)); ++ asm volatile ("// s0 in: %s0" :: "y" (s0)); ++ asm volatile ("// s7 out: %s0" : "=w" (s7)); ++ asm volatile ("// s7 in: %s0" :: "y" (s7)); ++ asm volatile ("// s8 out: %s0" : "=w" (s8)); ++ asm volatile ("// s8 in: %s0" :: "y" (s8)); ++ asm volatile ("// s15 out: %s0" : "=w" (s15)); ++ asm volatile ("// s15 in: %s0" :: "y" (s15)); ++ asm volatile ("// s16 out: %s0" : "=w" (s16)); ++ asm volatile ("// s16 in: %s0" :: "y" (s16)); ++ asm volatile ("// s31 out: %s0" : "=w" (s31)); ++ asm volatile ("// s31 in: %s0" :: "y" (s31)); ++} ++ ++/* { dg-final { scan-assembler {\t// s0 out: s0\n.*[/]/ s0 in: s0\n} } } */ ++/* { dg-final { scan-assembler {\t// s7 out: s7\n.*[/]/ s7 in: s7\n} } } */ ++/* { dg-final { scan-assembler {\t// s8 out: s8\n.*\tfmov\t(s[0-7]), s8\n.*[/]/ s8 in: \1\n} } } */ ++/* { dg-final { scan-assembler {\t// s15 out: s15\n.*\tfmov\t(s[0-7]), s15\n.*[/]/ s15 in: \1\n} } } */ ++/* { dg-final { scan-assembler {\t// s16 out: s16\n.*\tfmov\t(s[0-7]), s16\n.*[/]/ s16 in: \1\n} } } */ ++/* { dg-final { scan-assembler {\t// s31 out: s31\n.*\tfmov\t(s[0-7]), s31\n.*[/]/ s31 in: \1\n} } } */ ++/* { dg-final { scan-assembler-not {\t// s8 in: s8\n} } } */ ++/* { dg-final { scan-assembler-not {\t// s15 in: s15\n} } } */ ++/* { dg-final { scan-assembler-not {\t// s16 in: s16\n} } } */ ++/* { dg-final { scan-assembler-not {\t// s31 in: s31\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.c b/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.c +index 49ca5d0d0..a828a72aa 100644 +--- a/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.c ++++ b/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-march=armv8-a+nolse -O2 -fno-ipa-icf" } */ ++/* { dg-options "-march=armv8-a+nolse -O2 -fno-ipa-icf -mno-outline-atomics" } */ + + #include "atomic-comp-swap-release-acquire.x" + +diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.c +index 74f26348e..6823ce381 100644 +--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.c ++++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-march=armv8-a+nolse -O2" } */ ++/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */ + + #include "atomic-op-acq_rel.x" + +diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.c +index 66c1b1efe..87937de37 100644 +--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.c ++++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-march=armv8-a+nolse -O2" } */ ++/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */ + + #include "atomic-op-acquire.x" + +diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-char.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-char.c +index c09d0434e..60955e57d 100644 +--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-char.c ++++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-char.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-march=armv8-a+nolse -O2" } */ ++/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */ + + #include "atomic-op-char.x" + +diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c +index 5783ab84f..16cb11aee 100644 +--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c ++++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-march=armv8-a+nolse -O2" } */ ++/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */ + + #include "atomic-op-consume.x" + +diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-imm.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-imm.c +index 18b8f0b04..bcab4e481 100644 +--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-imm.c ++++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-imm.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-march=armv8-a+nolse -O2" } */ ++/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */ + + int v = 0; + +diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-int.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-int.c +index 8520f0839..040e4a8d1 100644 +--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-int.c ++++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-int.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-march=armv8-a+nolse -O2" } */ ++/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */ + + #include "atomic-op-int.x" + +diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-long.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-long.c +index d011f8c5c..fc88b92cd 100644 +--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-long.c ++++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-long.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-march=armv8-a+nolse -O2" } */ ++/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */ + + long v = 0; + +diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.c +index ed96bfdb9..503d62b02 100644 +--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.c ++++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-march=armv8-a+nolse -O2" } */ ++/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */ + + #include "atomic-op-relaxed.x" + +diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-release.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-release.c +index fc4be17de..efe14aea7 100644 +--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-release.c ++++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-release.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-march=armv8-a+nolse -O2" } */ ++/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */ + + #include "atomic-op-release.x" + +diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.c +index 613000fe4..09973bf82 100644 +--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.c ++++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-march=armv8-a+nolse -O2" } */ ++/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */ + + #include "atomic-op-seq_cst.x" + +diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-short.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-short.c +index e82c8118e..e1dcebb0f 100644 +--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-short.c ++++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-short.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-march=armv8-a+nolse -O2" } */ ++/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */ + + #include "atomic-op-short.x" + +diff --git a/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_reg_1.c b/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_reg_1.c +index f2a21ddf2..29246979b 100644 +--- a/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_reg_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_reg_1.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -march=armv8-a+nolse" } */ ++/* { dg-options "-O2 -march=armv8-a+nolse -mno-outline-atomics" } */ + /* { dg-skip-if "" { *-*-* } { "-mcpu=*" } { "" } } */ + + int +diff --git a/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c b/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c +index 8d2ae67df..6daf9b08f 100644 +--- a/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -march=armv8-a+nolse" } */ ++/* { dg-options "-O2 -march=armv8-a+nolse -mno-outline-atomics" } */ + /* { dg-skip-if "" { *-*-* } { "-mcpu=*" } { "" } } */ + + int +diff --git a/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_1.c b/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_1.c +new file mode 100644 +index 000000000..ef4376649 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_1.c +@@ -0,0 +1,102 @@ ++/* { dg-do assemble { target { aarch64*-*-* } } } */ ++/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ ++/* { dg-add-options arm_v8_2a_bf16_neon } */ ++/* { dg-additional-options "-O3 --save-temps -std=gnu90" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#include ++ ++/* ++**stacktest1: ++** sub sp, sp, #16 ++** str h0, \[sp, 14\] ++** ldr h0, \[sp, 14\] ++** add sp, sp, 16 ++** ret ++*/ ++bfloat16_t stacktest1 (bfloat16_t __a) ++{ ++ volatile bfloat16_t b = __a; ++ return b; ++} ++ ++/* ++**bfloat_mov_ww: ++** mov v1.h\[0\], v2.h\[0\] ++** ret ++*/ ++void bfloat_mov_ww (void) ++{ ++ register bfloat16_t x asm ("h2"); ++ register bfloat16_t y asm ("h1"); ++ asm volatile ("" : "=w" (x)); ++ y = x; ++ asm volatile ("" :: "w" (y)); ++} ++ ++/* ++**bfloat_mov_rw: ++** dup v1.4h, w1 ++** ret ++*/ ++void bfloat_mov_rw (void) ++{ ++ register bfloat16_t x asm ("w1"); ++ register bfloat16_t y asm ("h1"); ++ asm volatile ("" : "=r" (x)); ++ y = x; ++ asm volatile ("" :: "w" (y)); ++} ++ ++/* ++**bfloat_mov_wr: ++** umov w1, v1.h\[0\] ++** ret ++*/ ++void bfloat_mov_wr (void) ++{ ++ register bfloat16_t x asm ("h1"); ++ register bfloat16_t y asm ("w1"); ++ asm volatile ("" : "=w" (x)); ++ y = x; ++ asm volatile ("" :: "r" (y)); ++} ++ ++/* ++**bfloat_mov_rr: ++** mov w1, w2 ++** ret ++*/ ++void bfloat_mov_rr (void) ++{ ++ register bfloat16_t x asm ("w2"); ++ register bfloat16_t y asm ("w1"); ++ asm volatile ("" : "=r" (x)); ++ y = x; ++ asm volatile ("" :: "r" (y)); ++} ++ ++/* ++**bfloat_mov_rm: ++** strh w2, \[x0\] ++** ret ++*/ ++void bfloat_mov_rm (bfloat16_t *ptr) ++{ ++ register bfloat16_t x asm ("w2"); ++ asm volatile ("" : "=r" (x)); ++ *ptr = x; ++} ++ ++/* ++**bfloat_mov_mr: ++** ldrh w2, \[x0\] ++** ret ++*/ ++void bfloat_mov_mr (bfloat16_t *ptr) ++{ ++ register bfloat16_t y asm ("w2"); ++ y = *ptr; ++ asm volatile ("" :: "r" (y)); ++} ++ +diff --git a/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_2.c b/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_2.c +new file mode 100644 +index 000000000..df8e7518c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_2.c +@@ -0,0 +1,106 @@ ++/* { dg-do assemble { target { aarch64*-*-* } } } */ ++/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ ++/* { dg-additional-options "-march=armv8.2-a -O3 --save-temps -std=gnu90" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#include ++ ++#pragma GCC push_options ++#pragma GCC target ("+bf16") ++ ++/* ++**stacktest1: ++** sub sp, sp, #16 ++** str h0, \[sp, 14\] ++** ldr h0, \[sp, 14\] ++** add sp, sp, 16 ++** ret ++*/ ++bfloat16_t stacktest1 (bfloat16_t __a) ++{ ++ volatile bfloat16_t b = __a; ++ return b; ++} ++ ++/* ++**bfloat_mov_ww: ++** mov v1.h\[0\], v2.h\[0\] ++** ret ++*/ ++void bfloat_mov_ww (void) ++{ ++ register bfloat16_t x asm ("h2"); ++ register bfloat16_t y asm ("h1"); ++ asm volatile ("" : "=w" (x)); ++ y = x; ++ asm volatile ("" :: "w" (y)); ++} ++ ++/* ++**bfloat_mov_rw: ++** dup v1.4h, w1 ++** ret ++*/ ++void bfloat_mov_rw (void) ++{ ++ register bfloat16_t x asm ("w1"); ++ register bfloat16_t y asm ("h1"); ++ asm volatile ("" : "=r" (x)); ++ y = x; ++ asm volatile ("" :: "w" (y)); ++} ++ ++/* ++**bfloat_mov_wr: ++** umov w1, v1.h\[0\] ++** ret ++*/ ++void bfloat_mov_wr (void) ++{ ++ register bfloat16_t x asm ("h1"); ++ register bfloat16_t y asm ("w1"); ++ asm volatile ("" : "=w" (x)); ++ y = x; ++ asm volatile ("" :: "r" (y)); ++} ++ ++/* ++**bfloat_mov_rr: ++** mov w1, w2 ++** ret ++*/ ++void bfloat_mov_rr (void) ++{ ++ register bfloat16_t x asm ("w2"); ++ register bfloat16_t y asm ("w1"); ++ asm volatile ("" : "=r" (x)); ++ y = x; ++ asm volatile ("" :: "r" (y)); ++} ++ ++/* ++**bfloat_mov_rm: ++** strh w2, \[x0\] ++** ret ++*/ ++void bfloat_mov_rm (bfloat16_t *ptr) ++{ ++ register bfloat16_t x asm ("w2"); ++ asm volatile ("" : "=r" (x)); ++ *ptr = x; ++} ++ ++/* ++**bfloat_mov_mr: ++** ldrh w2, \[x0\] ++** ret ++*/ ++void bfloat_mov_mr (bfloat16_t *ptr) ++{ ++ register bfloat16_t y asm ("w2"); ++ y = *ptr; ++ asm volatile ("" :: "r" (y)); ++} ++ ++#pragma GCC pop_options ++ +diff --git a/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_3.c b/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_3.c +new file mode 100644 +index 000000000..5d7a4317c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_3.c +@@ -0,0 +1,101 @@ ++/* { dg-do assemble { target { aarch64*-*-* } } } */ ++/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ ++/* { dg-additional-options "-march=armv8.2-a -O3 --save-temps -std=gnu90" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#include ++ ++/* ++**stacktest1: ++** sub sp, sp, #16 ++** str h0, \[sp, 14\] ++** ldr h0, \[sp, 14\] ++** add sp, sp, 16 ++** ret ++*/ ++bfloat16_t stacktest1 (bfloat16_t __a) ++{ ++ volatile bfloat16_t b = __a; ++ return b; ++} ++ ++/* ++**bfloat_mov_ww: ++** mov v1.h\[0\], v2.h\[0\] ++** ret ++*/ ++void bfloat_mov_ww (void) ++{ ++ register bfloat16_t x asm ("h2"); ++ register bfloat16_t y asm ("h1"); ++ asm volatile ("" : "=w" (x)); ++ y = x; ++ asm volatile ("" :: "w" (y)); ++} ++ ++/* ++**bfloat_mov_rw: ++** dup v1.4h, w1 ++** ret ++*/ ++void bfloat_mov_rw (void) ++{ ++ register bfloat16_t x asm ("w1"); ++ register bfloat16_t y asm ("h1"); ++ asm volatile ("" : "=r" (x)); ++ y = x; ++ asm volatile ("" :: "w" (y)); ++} ++ ++/* ++**bfloat_mov_wr: ++** umov w1, v1.h\[0\] ++** ret ++*/ ++void bfloat_mov_wr (void) ++{ ++ register bfloat16_t x asm ("h1"); ++ register bfloat16_t y asm ("w1"); ++ asm volatile ("" : "=w" (x)); ++ y = x; ++ asm volatile ("" :: "r" (y)); ++} ++ ++/* ++**bfloat_mov_rr: ++** mov w1, w2 ++** ret ++*/ ++void bfloat_mov_rr (void) ++{ ++ register bfloat16_t x asm ("w2"); ++ register bfloat16_t y asm ("w1"); ++ asm volatile ("" : "=r" (x)); ++ y = x; ++ asm volatile ("" :: "r" (y)); ++} ++ ++/* ++**bfloat_mov_rm: ++** strh w2, \[x0\] ++** ret ++*/ ++void bfloat_mov_rm (bfloat16_t *ptr) ++{ ++ register bfloat16_t x asm ("w2"); ++ asm volatile ("" : "=r" (x)); ++ *ptr = x; ++} ++ ++/* ++**bfloat_mov_mr: ++** ldrh w2, \[x0\] ++** ret ++*/ ++void bfloat_mov_mr (bfloat16_t *ptr) ++{ ++ register bfloat16_t y asm ("w2"); ++ y = *ptr; ++ asm volatile ("" :: "r" (y)); ++} ++ +diff --git a/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_4.c b/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_4.c +new file mode 100644 +index 000000000..b812011c2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_4.c +@@ -0,0 +1,16 @@ ++/* { dg-do assemble { target { aarch64*-*-* } } } */ ++/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ ++/* { dg-add-options arm_v8_2a_bf16_neon } */ ++/* { dg-additional-options "-std=c99 -pedantic-errors -O3 --save-temps" } */ ++ ++#include ++ ++_Complex bfloat16_t stacktest1 (_Complex bfloat16_t __a) ++{ ++ volatile _Complex bfloat16_t b = __a; ++ return b; ++} ++ ++/* { dg-error {ISO C does not support plain 'complex' meaning 'double complex'} "" { target *-*-* } 8 } */ ++/* { dg-error {expected '=', ',', ';', 'asm' or '__attribute__' before 'stacktest1'} "" { target *-*-* } 8 } */ ++ +diff --git a/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_typecheck.c b/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_typecheck.c +new file mode 100644 +index 000000000..7c9188cf2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_typecheck.c +@@ -0,0 +1,219 @@ ++/* { dg-do assemble { target { aarch64*-*-* } } } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ ++/* { dg-add-options arm_v8_2a_bf16_neon } */ ++/* { dg-additional-options "-Wno-pedantic -O3 --save-temps" } */ ++ ++#include ++ ++bfloat16_t glob_bfloat; ++ ++int is_an_int; ++short is_a_short_int; ++float is_a_float; ++float is_a_float16; ++double is_a_double; ++ ++float *float_ptr; ++ ++bfloat16_t foo1 (void) { return (bfloat16_t) 0x1234; } /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++bfloat16_t foo2 (void) { return (bfloat16_t) (short) 0x1234; } /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ ++bfloat16_t footest (bfloat16_t scalar0) ++{ ++ ++ /* Initialisation */ ++ ++ bfloat16_t scalar1_1; ++ bfloat16_t scalar1_2 = glob_bfloat; ++ bfloat16_t scalar1_3 = 0; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ bfloat16_t scalar1_4 = 0.1; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ bfloat16_t scalar1_5 = is_a_float; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ bfloat16_t scalar1_6 = is_an_int; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ bfloat16_t scalar1_7 = is_a_float16; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ bfloat16_t scalar1_8 = is_a_double; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ bfloat16_t scalar1_9 = is_a_short_int; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ ++ int initi_1_1 = glob_bfloat; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ ++ float initi_1_2 = glob_bfloat; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ ++ float16_t initi_1_3 = glob_bfloat; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ ++ short initi_1_4 = glob_bfloat; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ ++ double initi_1_5 = glob_bfloat; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ ++ ++ bfloat16_t scalar2_1 = {}; /* { dg-error {empty scalar initializer} } */ ++ bfloat16_t scalar2_2 = { glob_bfloat }; ++ bfloat16_t scalar2_3 = { 0 }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ bfloat16_t scalar2_4 = { 0.1 }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ bfloat16_t scalar2_5 = { is_a_float }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ bfloat16_t scalar2_6 = { is_an_int }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ bfloat16_t scalar2_7 = { is_a_float16 }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ bfloat16_t scalar2_8 = { is_a_double }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ bfloat16_t scalar2_9 = { is_a_short_int }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ ++ int initi_2_1 = { glob_bfloat }; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ ++ float initi_2_2 = { glob_bfloat }; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ ++ float16_t initi_2_3 = { glob_bfloat }; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ ++ short initi_2_4 = { glob_bfloat }; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ ++ double initi_2_5 = { glob_bfloat }; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ ++ ++ /* Assignments. */ ++ ++ glob_bfloat = glob_bfloat; ++ glob_bfloat = 0; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ glob_bfloat = 0.1; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ glob_bfloat = is_a_float; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ glob_bfloat = is_an_int; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ glob_bfloat = is_a_float16; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ glob_bfloat = is_a_double; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ glob_bfloat = is_a_short_int; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ ++ is_an_int = glob_bfloat; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ ++ is_a_float = glob_bfloat; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ ++ is_a_float16 = glob_bfloat; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ ++ is_a_double = glob_bfloat; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ ++ is_a_short_int = glob_bfloat; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ ++ ++ /* Casting. */ ++ ++ (void) glob_bfloat; ++ (bfloat16_t) glob_bfloat; ++ ++ (int) glob_bfloat; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ ++ (float) glob_bfloat; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ ++ (float16_t) glob_bfloat; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ ++ (double) glob_bfloat; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ ++ (short) glob_bfloat; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ ++ ++ (bfloat16_t) is_an_int; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ (bfloat16_t) is_a_float; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ (bfloat16_t) is_a_float16; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ (bfloat16_t) is_a_double; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ (bfloat16_t) is_a_short_int; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ ++ /* Compound literals. */ ++ ++ (bfloat16_t) {}; /* { dg-error {empty scalar initializer} } */ ++ (bfloat16_t) { glob_bfloat }; ++ (bfloat16_t) { 0 }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ (bfloat16_t) { 0.1 }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ (bfloat16_t) { is_a_float }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ (bfloat16_t) { is_an_int }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ (bfloat16_t) { is_a_float16 }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ (bfloat16_t) { is_a_double }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ (bfloat16_t) { is_a_short_int }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ ++ (int) { glob_bfloat }; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ ++ (float) { glob_bfloat }; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ ++ (float16_t) { glob_bfloat }; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ ++ (double) { glob_bfloat }; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ ++ (short) { glob_bfloat }; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ ++ ++ /* Arrays and Structs. */ ++ ++ typedef bfloat16_t array_type[2]; ++ extern bfloat16_t extern_array[]; ++ ++ bfloat16_t array[2]; ++ bfloat16_t zero_length_array[0]; ++ bfloat16_t empty_init_array[] = {}; ++ typedef bfloat16_t some_other_type[is_an_int]; ++ ++ struct struct1 { ++ bfloat16_t a; ++ }; ++ ++ union union1 { ++ bfloat16_t a; ++ }; ++ ++ /* Addressing and dereferencing. */ ++ ++ bfloat16_t *bfloat_ptr = &scalar0; ++ scalar0 = *bfloat_ptr; ++ ++ /* Pointer assignment. */ ++ ++ bfloat16_t *bfloat_ptr2 = bfloat_ptr; ++ bfloat16_t *bfloat_ptr3 = array; ++ ++ /* Pointer arithmetic. */ ++ ++ ++bfloat_ptr; ++ --bfloat_ptr; ++ bfloat_ptr++; ++ bfloat_ptr--; ++ bfloat_ptr += 1; ++ bfloat_ptr -= 1; ++ bfloat_ptr - bfloat_ptr2; ++ bfloat_ptr = &bfloat_ptr3[0]; ++ bfloat_ptr = &bfloat_ptr3[1]; ++ ++ /* Simple comparison. */ ++ scalar0 > glob_bfloat; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ glob_bfloat == scalar0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ scalar0 > is_a_float; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ is_a_float == scalar0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ scalar0 > 0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ 0 == scalar0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ scalar0 > 0.1; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ 0.1 == scalar0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ scalar0 > is_an_int; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ is_an_int == scalar0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ ++ /* Pointer comparison. */ ++ ++ bfloat_ptr == &scalar0; ++ bfloat_ptr != &scalar0; ++ bfloat_ptr < &scalar0; ++ bfloat_ptr <= &scalar0; ++ bfloat_ptr > &scalar0; ++ bfloat_ptr >= &scalar0; ++ bfloat_ptr == bfloat_ptr2; ++ bfloat_ptr != bfloat_ptr2; ++ bfloat_ptr < bfloat_ptr2; ++ bfloat_ptr <= bfloat_ptr2; ++ bfloat_ptr > bfloat_ptr2; ++ bfloat_ptr >= bfloat_ptr2; ++ ++ /* Conditional expressions. */ ++ ++ 0 ? scalar0 : scalar0; ++ 0 ? scalar0 : is_a_float; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ ++ 0 ? is_a_float : scalar0; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ ++ 0 ? scalar0 : 0; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ 0 ? 0 : scalar0; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ 0 ? 0.1 : scalar0; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ ++ 0 ? scalar0 : 0.1; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ ++ 0 ? bfloat_ptr : bfloat_ptr2; ++ 0 ? bfloat_ptr : float_ptr; /* { dg-error {pointer type mismatch in conditional expression} } */ ++ 0 ? float_ptr : bfloat_ptr; /* { dg-error {pointer type mismatch in conditional expression} } */ ++ ++ scalar0 ? scalar0 : scalar0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ scalar0 ? is_a_float : scalar0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ scalar0 ? scalar0 : is_a_float; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ scalar0 ? is_a_float : is_a_float; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ ++ /* Unary operators. */ ++ ++ +scalar0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ -scalar0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ ~scalar0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ !scalar0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ *scalar0; /* { dg-error {invalid type argument of unary '\*'} } */ ++ __real scalar0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ __imag scalar0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ ++scalar0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ --scalar0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ scalar0++; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ scalar0--; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ ++ /* Binary arithmetic operations. */ ++ ++ scalar0 = glob_bfloat + *bfloat_ptr; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ scalar0 = glob_bfloat + 0.1; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ scalar0 = glob_bfloat + 0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ scalar0 = glob_bfloat + is_a_float; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ ++ return scalar0; ++} ++ +diff --git a/gcc/testsuite/gcc.target/aarch64/bfloat16_simd_1.c b/gcc/testsuite/gcc.target/aarch64/bfloat16_simd_1.c +new file mode 100644 +index 000000000..6cad557eb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/bfloat16_simd_1.c +@@ -0,0 +1,93 @@ ++/* { dg-do assemble { target { aarch64*-*-* } } } */ ++/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ ++/* { dg-add-options arm_v8_2a_bf16_neon } */ ++/* { dg-additional-options "-O3 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#include ++ ++/* ++**stacktest1: ++** sub sp, sp, #16 ++** str h0, \[sp, 14\] ++** ldr h0, \[sp, 14\] ++** add sp, sp, 16 ++** ret ++*/ ++bfloat16_t stacktest1 (bfloat16_t __a) ++{ ++ volatile bfloat16_t b = __a; ++ return b; ++} ++ ++/* ++**stacktest2: ++** sub sp, sp, #16 ++** str d0, \[sp, 8\] ++** ldr d0, \[sp, 8\] ++** add sp, sp, 16 ++** ret ++*/ ++bfloat16x4_t stacktest2 (bfloat16x4_t __a) ++{ ++ volatile bfloat16x4_t b = __a; ++ return b; ++} ++ ++/* ++**stacktest3: ++** sub sp, sp, #16 ++** str q0, \[sp\] ++** ldr q0, \[sp\] ++** add sp, sp, 16 ++** ret ++*/ ++bfloat16x8_t stacktest3 (bfloat16x8_t __a) ++{ ++ volatile bfloat16x8_t b = __a; ++ return b; ++} ++ ++/* Test compilation of __attribute__ vectors of 8, 16, 32, etc. BFloats. */ ++typedef bfloat16_t v8bf __attribute__((vector_size(16))); ++typedef bfloat16_t v16bf __attribute__((vector_size(32))); ++typedef bfloat16_t v32bf __attribute__((vector_size(64))); ++typedef bfloat16_t v64bf __attribute__((vector_size(128))); ++typedef bfloat16_t v128bf __attribute__((vector_size(256))); ++ ++v8bf stacktest4 (v8bf __a) ++{ ++ volatile v8bf b = __a; ++ return b; ++} ++ ++v16bf stacktest5 (v16bf __a) ++{ ++ volatile v16bf b = __a; ++ return b; ++} ++ ++v32bf stacktest6 (v32bf __a) ++{ ++ volatile v32bf b = __a; ++ return b; ++} ++ ++v64bf stacktest7 (v64bf __a) ++{ ++ volatile v64bf b = __a; ++ return b; ++} ++ ++v128bf stacktest8 (v128bf __a) ++{ ++ volatile v128bf b = __a; ++ return b; ++} ++ ++/* Test use of constant values to assign values to vectors. */ ++ ++typedef bfloat16_t v2bf __attribute__((vector_size(4))); ++v2bf c2 (void) { return (v2bf) 0x12345678; } ++ ++bfloat16x4_t c3 (void) { return (bfloat16x4_t) 0x1234567812345678; } +diff --git a/gcc/testsuite/gcc.target/aarch64/bfloat16_simd_2.c b/gcc/testsuite/gcc.target/aarch64/bfloat16_simd_2.c +new file mode 100644 +index 000000000..3891dcfc9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/bfloat16_simd_2.c +@@ -0,0 +1,97 @@ ++/* { dg-do assemble { target { aarch64*-*-* } } } */ ++/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ ++/* { dg-additional-options "-march=armv8.2-a -O3 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#include ++ ++#pragma GCC push_options ++#pragma GCC target ("+bf16") ++ ++/* ++**stacktest1: ++** sub sp, sp, #16 ++** str h0, \[sp, 14\] ++** ldr h0, \[sp, 14\] ++** add sp, sp, 16 ++** ret ++*/ ++bfloat16_t stacktest1 (bfloat16_t __a) ++{ ++ volatile bfloat16_t b = __a; ++ return b; ++} ++ ++/* ++**stacktest2: ++** sub sp, sp, #16 ++** str d0, \[sp, 8\] ++** ldr d0, \[sp, 8\] ++** add sp, sp, 16 ++** ret ++*/ ++bfloat16x4_t stacktest2 (bfloat16x4_t __a) ++{ ++ volatile bfloat16x4_t b = __a; ++ return b; ++} ++ ++/* ++**stacktest3: ++** sub sp, sp, #16 ++** str q0, \[sp\] ++** ldr q0, \[sp\] ++** add sp, sp, 16 ++** ret ++*/ ++bfloat16x8_t stacktest3 (bfloat16x8_t __a) ++{ ++ volatile bfloat16x8_t b = __a; ++ return b; ++} ++ ++/* Test compilation of __attribute__ vectors of 8, 16, 32, etc. BFloats. */ ++typedef bfloat16_t v8bf __attribute__((vector_size(16))); ++typedef bfloat16_t v16bf __attribute__((vector_size(32))); ++typedef bfloat16_t v32bf __attribute__((vector_size(64))); ++typedef bfloat16_t v64bf __attribute__((vector_size(128))); ++typedef bfloat16_t v128bf __attribute__((vector_size(256))); ++ ++v8bf stacktest4 (v8bf __a) ++{ ++ volatile v8bf b = __a; ++ return b; ++} ++ ++v16bf stacktest5 (v16bf __a) ++{ ++ volatile v16bf b = __a; ++ return b; ++} ++ ++v32bf stacktest6 (v32bf __a) ++{ ++ volatile v32bf b = __a; ++ return b; ++} ++ ++v64bf stacktest7 (v64bf __a) ++{ ++ volatile v64bf b = __a; ++ return b; ++} ++ ++v128bf stacktest8 (v128bf __a) ++{ ++ volatile v128bf b = __a; ++ return b; ++} ++ ++/* Test use of constant values to assign values to vectors. */ ++ ++typedef bfloat16_t v2bf __attribute__((vector_size(4))); ++v2bf c2 (void) { return (v2bf) 0x12345678; } ++ ++bfloat16x4_t c3 (void) { return (bfloat16x4_t) 0x1234567812345678; } ++ ++#pragma GCC pop_options +diff --git a/gcc/testsuite/gcc.target/aarch64/bfloat16_simd_3.c b/gcc/testsuite/gcc.target/aarch64/bfloat16_simd_3.c +new file mode 100644 +index 000000000..b35f5e527 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/bfloat16_simd_3.c +@@ -0,0 +1,92 @@ ++/* { dg-do assemble { target { aarch64*-*-* } } } */ ++/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ ++/* { dg-additional-options "-march=armv8.2-a -O3 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#include ++ ++/* ++**stacktest1: ++** sub sp, sp, #16 ++** str h0, \[sp, 14\] ++** ldr h0, \[sp, 14\] ++** add sp, sp, 16 ++** ret ++*/ ++bfloat16_t stacktest1 (bfloat16_t __a) ++{ ++ volatile bfloat16_t b = __a; ++ return b; ++} ++ ++/* ++**stacktest2: ++** sub sp, sp, #16 ++** str d0, \[sp, 8\] ++** ldr d0, \[sp, 8\] ++** add sp, sp, 16 ++** ret ++*/ ++bfloat16x4_t stacktest2 (bfloat16x4_t __a) ++{ ++ volatile bfloat16x4_t b = __a; ++ return b; ++} ++ ++/* ++**stacktest3: ++** sub sp, sp, #16 ++** str q0, \[sp\] ++** ldr q0, \[sp\] ++** add sp, sp, 16 ++** ret ++*/ ++bfloat16x8_t stacktest3 (bfloat16x8_t __a) ++{ ++ volatile bfloat16x8_t b = __a; ++ return b; ++} ++ ++/* Test compilation of __attribute__ vectors of 8, 16, 32, etc. BFloats. */ ++typedef bfloat16_t v8bf __attribute__((vector_size(16))); ++typedef bfloat16_t v16bf __attribute__((vector_size(32))); ++typedef bfloat16_t v32bf __attribute__((vector_size(64))); ++typedef bfloat16_t v64bf __attribute__((vector_size(128))); ++typedef bfloat16_t v128bf __attribute__((vector_size(256))); ++ ++v8bf stacktest4 (v8bf __a) ++{ ++ volatile v8bf b = __a; ++ return b; ++} ++ ++v16bf stacktest5 (v16bf __a) ++{ ++ volatile v16bf b = __a; ++ return b; ++} ++ ++v32bf stacktest6 (v32bf __a) ++{ ++ volatile v32bf b = __a; ++ return b; ++} ++ ++v64bf stacktest7 (v64bf __a) ++{ ++ volatile v64bf b = __a; ++ return b; ++} ++ ++v128bf stacktest8 (v128bf __a) ++{ ++ volatile v128bf b = __a; ++ return b; ++} ++ ++/* Test use of constant values to assign values to vectors. */ ++ ++typedef bfloat16_t v2bf __attribute__((vector_size(4))); ++v2bf c2 (void) { return (v2bf) 0x12345678; } ++ ++bfloat16x4_t c3 (void) { return (bfloat16x4_t) 0x1234567812345678; } +diff --git a/gcc/testsuite/gcc.target/aarch64/bfloat16_vector_typecheck_1.c b/gcc/testsuite/gcc.target/aarch64/bfloat16_vector_typecheck_1.c +new file mode 100644 +index 000000000..4af3d295f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/bfloat16_vector_typecheck_1.c +@@ -0,0 +1,262 @@ ++/* { dg-do assemble { target { aarch64*-*-* } } } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ ++/* { dg-add-options arm_v8_2a_bf16_neon } */ ++/* { dg-additional-options "-O3 --save-temps -Wno-pedantic" } */ ++#include ++ ++bfloat16_t glob_bfloat; ++bfloat16x4_t glob_bfloat_vec; ++ ++float32x4_t is_a_float_vec; ++float32x2_t is_a_float_pair; ++ ++float16x4_t *float_ptr; ++float16x4_t is_a_float16_vec; ++ ++int32x4_t is_an_int_vec; ++int32x2_t is_an_int_pair; ++int16x4_t is_a_short_vec; ++ ++int is_an_int; ++short is_a_short_int; ++float is_a_float; ++float is_a_float16; ++double is_a_double; ++ ++/* Create a vector of 2 bfloat16_t. */ ++typedef bfloat16_t v2bf __attribute__((vector_size(4))); ++v2bf foo1 (void) { return (v2bf) 0x12345678; } ++bfloat16x4_t foo2 (void) { return (bfloat16x4_t) 0x1234567812345678; } ++ ++bfloat16x4_t footest (bfloat16x4_t vector0) ++{ ++ /* Initialisation */ ++ ++ bfloat16x4_t vector1_1; ++ bfloat16x4_t vector1_2 = glob_bfloat_vec; ++ bfloat16x4_t vector1_3 = is_a_float_vec; /* { dg-error {incompatible types when initializing type 'bfloat16x4_t' using type 'float32x4_t'} } */ ++ bfloat16x4_t vector1_4 = is_an_int_vec; /* { dg-error {incompatible types when initializing type 'bfloat16x4_t' using type 'int32x4_t'} } */ ++ bfloat16x4_t vector1_5 = is_a_float16_vec; /* { dg-error {incompatible types when initializing type 'bfloat16x4_t' using type 'float16x4_t'} } */ ++ bfloat16x4_t vector1_6 = is_a_float_pair; /* { dg-error {incompatible types when initializing type 'bfloat16x4_t' using type 'float32x2_t'} } */ ++ bfloat16x4_t vector1_7 = is_an_int_pair; /* { dg-error {incompatible types when initializing type 'bfloat16x4_t' using type 'int32x2_t'} } */ ++ bfloat16x4_t vector1_8 = is_a_short_vec; /* { dg-error {incompatible types when initializing type 'bfloat16x4_t' using type 'int16x4_t'} } */ ++ ++ int32x4_t initi_1_1 = glob_bfloat_vec; /* { dg-error {incompatible types when initializing type 'int32x4_t' using type 'bfloat16x4_t'} } */ ++ float32x4_t initi_1_2 = glob_bfloat_vec; /* { dg-error {incompatible types when initializing type 'float32x4_t' using type 'bfloat16x4_t'} } */ ++ float16x4_t initi_1_3 = glob_bfloat_vec; /* { dg-error {incompatible types when initializing type 'float16x4_t' using type 'bfloat16x4_t'} } */ ++ float32x2_t initi_1_4 = glob_bfloat_vec; /* { dg-error {incompatible types when initializing type 'float32x2_t' using type 'bfloat16x4_t'} } */ ++ int32x2_t initi_1_5 = glob_bfloat_vec; /* { dg-error {incompatible types when initializing type 'int32x2_t' using type 'bfloat16x4_t'} } */ ++ int16x4_t initi_1_6 = glob_bfloat_vec; /* { dg-error {incompatible types when initializing type 'int16x4_t' using type 'bfloat16x4_t'} } */ ++ ++ bfloat16x4_t vector2_1 = {}; ++ bfloat16x4_t vector2_2 = { glob_bfloat }; ++ bfloat16x4_t vector2_3 = { glob_bfloat, glob_bfloat, glob_bfloat, glob_bfloat }; ++ bfloat16x4_t vector2_4 = { 0 }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ bfloat16x4_t vector2_5 = { 0.1 }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ bfloat16x4_t vector2_6 = { is_a_float16 }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ bfloat16x4_t vector2_7 = { is_a_float }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ bfloat16x4_t vector2_8 = { is_an_int }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ bfloat16x4_t vector2_9 = { is_a_short_int }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ bfloat16x4_t vector2_10 = { 0.0, 0, is_a_short_int, is_a_float }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ ++ int32x4_t initi_2_1 = { glob_bfloat }; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ ++ float32x4_t initi_2_2 = { glob_bfloat }; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ ++ float16x4_t initi_2_3 = { glob_bfloat }; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ ++ float32x2_t initi_2_4 = { glob_bfloat }; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ ++ int32x2_t initi_2_5 = { glob_bfloat }; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ ++ int16x4_t initi_2_6 = { glob_bfloat }; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ ++ ++ /* Assignments to/from vectors. */ ++ ++ glob_bfloat_vec = glob_bfloat_vec; ++ glob_bfloat_vec = 0; /* { dg-error {incompatible types when assigning to type 'bfloat16x4_t' from type 'int'} } */ ++ glob_bfloat_vec = 0.1; /* { dg-error {incompatible types when assigning to type 'bfloat16x4_t' from type 'double'} } */ ++ glob_bfloat_vec = is_a_float_vec; /* { dg-error {incompatible types when assigning to type 'bfloat16x4_t' from type 'float32x4_t'} } */ ++ glob_bfloat_vec = is_an_int_vec; /* { dg-error {incompatible types when assigning to type 'bfloat16x4_t' from type 'int32x4_t'} } */ ++ glob_bfloat_vec = is_a_float16_vec; /* { dg-error {incompatible types when assigning to type 'bfloat16x4_t' from type 'float16x4_t'} } */ ++ glob_bfloat_vec = is_a_float_pair; /* { dg-error {incompatible types when assigning to type 'bfloat16x4_t' from type 'float32x2_t'} } */ ++ glob_bfloat_vec = is_an_int_pair; /* { dg-error {incompatible types when assigning to type 'bfloat16x4_t' from type 'int32x2_t'} } */ ++ glob_bfloat_vec = is_a_short_vec; /* { dg-error {incompatible types when assigning to type 'bfloat16x4_t' from type 'int16x4_t'} } */ ++ ++ is_an_int_vec = glob_bfloat_vec; /* { dg-error {incompatible types when assigning to type 'int32x4_t' from type 'bfloat16x4_t'} } */ ++ is_a_float_vec = glob_bfloat_vec; /* { dg-error {incompatible types when assigning to type 'float32x4_t' from type 'bfloat16x4_t'} } */ ++ is_a_float16_vec = glob_bfloat_vec; /* { dg-error {incompatible types when assigning to type 'float16x4_t' from type 'bfloat16x4_t'} } */ ++ is_a_float_pair = glob_bfloat_vec; /* { dg-error {incompatible types when assigning to type 'float32x2_t' from type 'bfloat16x4_t'} } */ ++ is_an_int_pair = glob_bfloat_vec; /* { dg-error {incompatible types when assigning to type 'int32x2_t' from type 'bfloat16x4_t'} } */ ++ is_a_short_vec = glob_bfloat_vec;/* { dg-error {incompatible types when assigning to type 'int16x4_t' from type 'bfloat16x4_t'} } */ ++ ++ /* Assignments to/from elements. */ ++ ++ vector2_3[0] = glob_bfloat; ++ vector2_3[0] = is_an_int; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ vector2_3[0] = is_a_short_int; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ vector2_3[0] = is_a_float; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ vector2_3[0] = is_a_float16; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ vector2_3[0] = 0; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ vector2_3[0] = 0.1; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ ++ glob_bfloat = vector2_3[0]; ++ is_an_int = vector2_3[0]; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ ++ is_a_short_int = vector2_3[0]; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ ++ is_a_float = vector2_3[0]; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ ++ is_a_float16 = vector2_3[0]; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ ++ ++ /* Compound literals. */ ++ ++ (bfloat16x4_t) {}; ++ ++ (bfloat16x4_t) { 0 }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ (bfloat16x4_t) { 0.1 }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ (bfloat16x4_t) { is_a_float_vec }; /* { dg-error {incompatible types when initializing type '__bf16' using type 'float32x4_t'} } */ ++ (bfloat16x4_t) { is_an_int_vec }; /* { dg-error {incompatible types when initializing type '__bf16' using type 'int32x4_t'} } */ ++ (bfloat16x4_t) { is_a_float_pair }; /* { dg-error {incompatible types when initializing type '__bf16' using type 'float32x2_t'} } */ ++ (bfloat16x4_t) { is_an_int_pair }; /* { dg-error {incompatible types when initializing type '__bf16' using type 'int32x2_t'} } */ ++ (bfloat16x4_t) { is_a_float16_vec }; /* { dg-error {incompatible types when initializing type '__bf16' using type 'float16x4_t'} } */ ++ (bfloat16x4_t) { is_a_short_vec }; /* { dg-error {incompatible types when initializing type '__bf16' using type 'int16x4_t'} } */ ++ ++ (bfloat16x4_t) { glob_bfloat_vec }; /* { dg-error {incompatible types when initializing type '__bf16' using type 'bfloat16x4_t'} } */ ++ (int32x4_t) { glob_bfloat_vec }; /* { dg-error {incompatible types when initializing type 'int' using type 'bfloat16x4_t'} } */ ++ (float32x4_t) { glob_bfloat_vec }; /* { dg-error {incompatible types when initializing type 'float' using type 'bfloat16x4_t'} } */ ++ (int32x2_t) { glob_bfloat_vec }; /* { dg-error {incompatible types when initializing type 'int' using type 'bfloat16x4_t'} } */ ++ (float16x4_t) { glob_bfloat_vec }; /* { dg-error {incompatible types when initializing type '__fp16' using type 'bfloat16x4_t'} } */ ++ (int16x4_t) { glob_bfloat_vec }; /* { dg-error {incompatible types when initializing type 'short int' using type 'bfloat16x4_t'} } */ ++ ++ /* Casting. */ ++ ++ (void) glob_bfloat_vec; ++ (bfloat16x4_t) glob_bfloat_vec; ++ ++ (bfloat16_t) glob_bfloat_vec; /* { dg-error {aggregate value used where a float was expected} } */ ++ (short) glob_bfloat_vec; /* { dg-error {can't convert a vector of type 'bfloat16x4_t' to type 'short int' which has different size} } */ ++ (int) glob_bfloat_vec; /* { dg-error {can't convert a vector of type 'bfloat16x4_t' to type 'int' which has different size} } */ ++ (float16_t) glob_bfloat_vec; /* { dg-error {aggregate value used where a float was expected} } */ ++ (float) glob_bfloat_vec; /* { dg-error {aggregate value used where a float was expected} } */ ++ (double) glob_bfloat_vec; /* { dg-error {aggregate value used where a float was expected} } */ ++ ++ (int32x4_t) glob_bfloat_vec; /* { dg-error {can't convert a value of type 'bfloat16x4_t' to vector type '__Int32x4_t' which has different size} } */ ++ (float32x4_t) glob_bfloat_vec; /* { dg-error {can't convert a value of type 'bfloat16x4_t' to vector type '__Float32x4_t' which has different size} } */ ++ (float16x4_t) glob_bfloat_vec; ++ (int32x2_t) glob_bfloat_vec; ++ (float32x2_t) glob_bfloat_vec; ++ (int16x4_t) glob_bfloat_vec; ++ ++ (bfloat16x4_t) is_an_int_vec; /* { dg-error {can't convert a value of type 'int32x4_t' to vector type '__Bfloat16x4_t' which has different size} } */ ++ (bfloat16x4_t) is_a_float_vec; /* { dg-error {can't convert a value of type 'float32x4_t' to vector type '__Bfloat16x4_t' which has different size} } */ ++ (bfloat16x4_t) is_a_float16_vec; ++ (bfloat16x4_t) is_an_int_pair; ++ (bfloat16x4_t) is_a_float_pair; ++ (bfloat16x4_t) is_a_short_vec; ++ (bfloat16x4_t) is_a_double; /* { dg-error {can't convert value to a vector} } */ ++ ++ /* Arrays and Structs. */ ++ ++ typedef bfloat16x4_t array_type[2]; ++ extern bfloat16x4_t extern_array[]; ++ ++ bfloat16x4_t array[2]; ++ bfloat16x4_t zero_length_array[0]; ++ bfloat16x4_t empty_init_array[] = {}; ++ typedef bfloat16x4_t some_other_type[is_an_int]; ++ ++ struct struct1 { ++ bfloat16x4_t a; ++ }; ++ ++ union union1 { ++ bfloat16x4_t a; ++ }; ++ ++ /* Addressing and dereferencing. */ ++ ++ bfloat16x4_t *bfloat_ptr = &vector0; ++ vector0 = *bfloat_ptr; ++ ++ /* Pointer assignment. */ ++ ++ bfloat16x4_t *bfloat_ptr2 = bfloat_ptr; ++ bfloat16x4_t *bfloat_ptr3 = array; ++ ++ /* Pointer arithmetic. */ ++ ++ ++bfloat_ptr; ++ --bfloat_ptr; ++ bfloat_ptr++; ++ bfloat_ptr--; ++ bfloat_ptr += 1; ++ bfloat_ptr -= 1; ++ bfloat_ptr - bfloat_ptr2; ++ bfloat_ptr = &bfloat_ptr3[0]; ++ bfloat_ptr = &bfloat_ptr3[1]; ++ ++ /* Simple comparison. */ ++ vector0 > glob_bfloat_vec; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ glob_bfloat_vec == vector0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ vector0 > is_a_float_vec; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ is_a_float_vec == vector0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ vector0 > 0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ 0 == vector0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ vector0 > 0.1; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ 0.1 == vector0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ vector0 > is_an_int_vec; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ is_an_int_vec == vector0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ ++ /* Pointer comparison. */ ++ ++ bfloat_ptr == &vector0; ++ bfloat_ptr != &vector0; ++ bfloat_ptr < &vector0; ++ bfloat_ptr <= &vector0; ++ bfloat_ptr > &vector0; ++ bfloat_ptr >= &vector0; ++ bfloat_ptr == bfloat_ptr2; ++ bfloat_ptr != bfloat_ptr2; ++ bfloat_ptr < bfloat_ptr2; ++ bfloat_ptr <= bfloat_ptr2; ++ bfloat_ptr > bfloat_ptr2; ++ bfloat_ptr >= bfloat_ptr2; ++ ++ /* Conditional expressions. */ ++ ++ 0 ? vector0 : vector0; ++ 0 ? vector0 : is_a_float_vec; /* { dg-error {type mismatch in conditional expression} } */ ++ 0 ? is_a_float_vec : vector0; /* { dg-error {type mismatch in conditional expression} } */ ++ 0 ? vector0 : is_a_float16_vec; /* { dg-error {type mismatch in conditional expression} } */ ++ 0 ? is_a_float16_vec : vector0; /* { dg-error {type mismatch in conditional expression} } */ ++ 0 ? vector0 : 0; /* { dg-error {type mismatch in conditional expression} } */ ++ 0 ? 0 : vector0; /* { dg-error {type mismatch in conditional expression} } */ ++ 0 ? 0.1 : vector0; /* { dg-error {type mismatch in conditional expression} } */ ++ 0 ? vector0 : 0.1; /* { dg-error {type mismatch in conditional expression} } */ ++ 0 ? bfloat_ptr : bfloat_ptr2; ++ 0 ? bfloat_ptr : float_ptr; /* { dg-error {pointer type mismatch in conditional expression} } */ ++ 0 ? float_ptr : bfloat_ptr; /* { dg-error {pointer type mismatch in conditional expression} } */ ++ ++ vector0 ? vector0 : vector0; /* { dg-error {used vector type where scalar is required} } */ ++ vector0 ? is_a_float16_vec : vector0; /* { dg-error {used vector type where scalar is required} } */ ++ vector0 ? vector0 : is_a_float16_vec; /* { dg-error {used vector type where scalar is required} } */ ++ vector0 ? is_a_float16_vec : is_a_float16_vec; /* { dg-error {used vector type where scalar is required} } */ ++ ++ /* Unary operators. */ ++ ++ +vector0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ -vector0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ ~vector0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ !vector0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ *vector0; /* { dg-error {invalid type argument of unary '\*'} } */ ++ __real vector0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ __imag vector0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ ++vector0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ --vector0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ vector0++; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ vector0--; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ ++ /* Binary arithmetic operations. */ ++ ++ vector0 = glob_bfloat_vec + *bfloat_ptr; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ vector0 = glob_bfloat_vec + 0.1; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ vector0 = glob_bfloat_vec + 0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ vector0 = glob_bfloat_vec + is_a_float_vec; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ ++ return vector0; ++} ++ +diff --git a/gcc/testsuite/gcc.target/aarch64/bfloat16_vector_typecheck_2.c b/gcc/testsuite/gcc.target/aarch64/bfloat16_vector_typecheck_2.c +new file mode 100644 +index 000000000..99c499ce8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/bfloat16_vector_typecheck_2.c +@@ -0,0 +1,260 @@ ++/* { dg-do assemble { target { aarch64*-*-* } } } */ ++/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ ++/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ ++/* { dg-add-options arm_v8_2a_bf16_neon } */ ++/* { dg-additional-options "-O3 --save-temps -Wno-pedantic" } */ ++#include ++ ++bfloat16_t glob_bfloat; ++bfloat16x8_t glob_bfloat_vec; ++ ++float32x4_t is_a_float_vec; ++float64x2_t is_a_double_pair; ++ ++float16x8_t *float_ptr; ++float16x8_t is_a_float16_vec; ++ ++int32x4_t is_an_int_vec; ++int64x2_t is_a_long_int_pair; ++int16x8_t is_a_short_vec; ++ ++int is_an_int; ++short is_a_short_int; ++float is_a_float; ++float is_a_float16; ++double is_a_double; ++ ++bfloat16x8_t foo3 (void) { return (bfloat16x8_t) 0x12345678123456781234567812345678; } ++ /* { dg-error {integer constant is too large for its type} "" {target *-*-*} 27 } */ ++ /* { dg-error {can't convert a value of type 'long int' to vector type '__Bfloat16x8_t' which has different size} "" {target *-*-*} 27 } */ ++ ++bfloat16x8_t footest (bfloat16x8_t vector0) ++{ ++ /* Initialisation */ ++ ++ bfloat16x8_t vector1_1; ++ bfloat16x8_t vector1_2 = glob_bfloat_vec; ++ bfloat16x8_t vector1_3 = is_a_float_vec; /* { dg-error {incompatible types when initializing type 'bfloat16x8_t' using type 'float32x4_t'} } */ ++ bfloat16x8_t vector1_4 = is_an_int_vec; /* { dg-error {incompatible types when initializing type 'bfloat16x8_t' using type 'int32x4_t'} } */ ++ bfloat16x8_t vector1_5 = is_a_float16_vec; /* { dg-error {incompatible types when initializing type 'bfloat16x8_t' using type 'float16x8_t'} } */ ++ bfloat16x8_t vector1_6 = is_a_double_pair; /* { dg-error {incompatible types when initializing type 'bfloat16x8_t' using type 'float64x2_t'} } */ ++ bfloat16x8_t vector1_7 = is_a_long_int_pair; /* { dg-error {incompatible types when initializing type 'bfloat16x8_t' using type 'int64x2_t'} } */ ++ bfloat16x8_t vector1_8 = is_a_short_vec; /* { dg-error {incompatible types when initializing type 'bfloat16x8_t' using type 'int16x8_t'} } */ ++ ++ int32x4_t initi_1_1 = glob_bfloat_vec; /* { dg-error {incompatible types when initializing type 'int32x4_t' using type 'bfloat16x8_t'} } */ ++ float32x4_t initi_1_2 = glob_bfloat_vec; /* { dg-error {incompatible types when initializing type 'float32x4_t' using type 'bfloat16x8_t'} } */ ++ float16x8_t initi_1_3 = glob_bfloat_vec; /* { dg-error {incompatible types when initializing type 'float16x8_t' using type 'bfloat16x8_t'} } */ ++ float64x2_t initi_1_4 = glob_bfloat_vec; /* { dg-error {incompatible types when initializing type 'float64x2_t' using type 'bfloat16x8_t'} } */ ++ int64x2_t initi_1_5 = glob_bfloat_vec; /* { dg-error {incompatible types when initializing type 'int64x2_t' using type 'bfloat16x8_t'} } */ ++ int16x8_t initi_1_6 = glob_bfloat_vec; /* { dg-error {incompatible types when initializing type 'int16x8_t' using type 'bfloat16x8_t'} } */ ++ ++ bfloat16x8_t vector2_1 = {}; ++ bfloat16x8_t vector2_2 = { glob_bfloat }; ++ bfloat16x8_t vector2_3 = { glob_bfloat, glob_bfloat, glob_bfloat, glob_bfloat }; ++ bfloat16x8_t vector2_4 = { 0 }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ bfloat16x8_t vector2_5 = { 0.1 }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ bfloat16x8_t vector2_6 = { is_a_float16 }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ bfloat16x8_t vector2_7 = { is_a_float }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ bfloat16x8_t vector2_8 = { is_an_int }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ bfloat16x8_t vector2_9 = { is_a_short_int }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ bfloat16x8_t vector2_10 = { 0.0, 0, is_a_short_int, is_a_float }; /* { dg-error "invalid conversion to type 'bfloat16_t'" } */ ++ ++ int32x4_t initi_2_1 = { glob_bfloat }; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ ++ float32x4_t initi_2_2 = { glob_bfloat }; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ ++ float16x8_t initi_2_3 = { glob_bfloat }; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ ++ float64x2_t initi_2_4 = { glob_bfloat }; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ ++ int64x2_t initi_2_5 = { glob_bfloat }; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ ++ int16x8_t initi_2_6 = { glob_bfloat }; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ ++ ++ /* Assignments to/from vectors. */ ++ ++ glob_bfloat_vec = glob_bfloat_vec; ++ glob_bfloat_vec = 0; /* { dg-error {incompatible types when assigning to type 'bfloat16x8_t' from type 'int'} } */ ++ glob_bfloat_vec = 0.1; /* { dg-error {incompatible types when assigning to type 'bfloat16x8_t' from type 'double'} } */ ++ glob_bfloat_vec = is_a_float_vec; /* { dg-error {incompatible types when assigning to type 'bfloat16x8_t' from type 'float32x4_t'} } */ ++ glob_bfloat_vec = is_an_int_vec; /* { dg-error {incompatible types when assigning to type 'bfloat16x8_t' from type 'int32x4_t'} } */ ++ glob_bfloat_vec = is_a_float16_vec; /* { dg-error {incompatible types when assigning to type 'bfloat16x8_t' from type 'float16x8_t'} } */ ++ glob_bfloat_vec = is_a_double_pair; /* { dg-error {incompatible types when assigning to type 'bfloat16x8_t' from type 'float64x2_t'} } */ ++ glob_bfloat_vec = is_a_long_int_pair; /* { dg-error {incompatible types when assigning to type 'bfloat16x8_t' from type 'int64x2_t'} } */ ++ glob_bfloat_vec = is_a_short_vec; /* { dg-error {incompatible types when assigning to type 'bfloat16x8_t' from type 'int16x8_t'} } */ ++ ++ is_an_int_vec = glob_bfloat_vec; /* { dg-error {incompatible types when assigning to type 'int32x4_t' from type 'bfloat16x8_t'} } */ ++ is_a_float_vec = glob_bfloat_vec; /* { dg-error {incompatible types when assigning to type 'float32x4_t' from type 'bfloat16x8_t'} } */ ++ is_a_float16_vec = glob_bfloat_vec; /* { dg-error {incompatible types when assigning to type 'float16x8_t' from type 'bfloat16x8_t'} } */ ++ is_a_double_pair = glob_bfloat_vec; /* { dg-error {incompatible types when assigning to type 'float64x2_t' from type 'bfloat16x8_t'} } */ ++ is_a_long_int_pair = glob_bfloat_vec; /* { dg-error {incompatible types when assigning to type 'int64x2_t' from type 'bfloat16x8_t'} } */ ++ is_a_short_vec = glob_bfloat_vec;/* { dg-error {incompatible types when assigning to type 'int16x8_t' from type 'bfloat16x8_t'} } */ ++ ++ /* Assignments to/from elements. */ ++ ++ vector2_3[0] = glob_bfloat; ++ vector2_3[0] = is_an_int; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ vector2_3[0] = is_a_short_int; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ vector2_3[0] = is_a_float; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ vector2_3[0] = is_a_float16; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ vector2_3[0] = 0; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ vector2_3[0] = 0.1; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ ++ glob_bfloat = vector2_3[0]; ++ is_an_int = vector2_3[0]; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ ++ is_a_short_int = vector2_3[0]; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ ++ is_a_float = vector2_3[0]; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ ++ is_a_float16 = vector2_3[0]; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ ++ ++ /* Compound literals. */ ++ ++ (bfloat16x8_t) {}; ++ ++ (bfloat16x8_t) { 0 }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ (bfloat16x8_t) { 0.1 }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ (bfloat16x8_t) { is_a_float_vec }; /* { dg-error {incompatible types when initializing type '__bf16' using type 'float32x4_t'} } */ ++ (bfloat16x8_t) { is_an_int_vec }; /* { dg-error {incompatible types when initializing type '__bf16' using type 'int32x4_t'} } */ ++ (bfloat16x8_t) { is_a_double_pair }; /* { dg-error {incompatible types when initializing type '__bf16' using type 'float64x2_t'} } */ ++ (bfloat16x8_t) { is_a_long_int_pair }; /* { dg-error {incompatible types when initializing type '__bf16' using type 'int64x2_t'} } */ ++ (bfloat16x8_t) { is_a_float16_vec }; /* { dg-error {incompatible types when initializing type '__bf16' using type 'float16x8_t'} } */ ++ (bfloat16x8_t) { is_a_short_vec }; /* { dg-error {incompatible types when initializing type '__bf16' using type 'int16x8_t'} } */ ++ ++ (bfloat16x8_t) { glob_bfloat_vec }; /* { dg-error {incompatible types when initializing type '__bf16' using type 'bfloat16x8_t'} } */ ++ (int32x4_t) { glob_bfloat_vec }; /* { dg-error {incompatible types when initializing type 'int' using type 'bfloat16x8_t'} } */ ++ (float32x4_t) { glob_bfloat_vec }; /* { dg-error {incompatible types when initializing type 'float' using type 'bfloat16x8_t'} } */ ++ (int64x2_t) { glob_bfloat_vec }; /* { dg-error {incompatible types when initializing type 'long int' using type 'bfloat16x8_t'} } */ ++ (float16x8_t) { glob_bfloat_vec }; /* { dg-error {incompatible types when initializing type '__fp16' using type 'bfloat16x8_t'} } */ ++ (int16x8_t) { glob_bfloat_vec }; /* { dg-error {incompatible types when initializing type 'short int' using type 'bfloat16x8_t'} } */ ++ ++ /* Casting. */ ++ ++ (void) glob_bfloat_vec; ++ (bfloat16x8_t) glob_bfloat_vec; ++ ++ (bfloat16_t) glob_bfloat_vec; /* { dg-error {aggregate value used where a float was expected} } */ ++ (short) glob_bfloat_vec; /* { dg-error {can't convert a vector of type 'bfloat16x8_t' to type 'short int' which has different size} } */ ++ (int) glob_bfloat_vec; /* { dg-error {can't convert a vector of type 'bfloat16x8_t' to type 'int' which has different size} } */ ++ (float16_t) glob_bfloat_vec; /* { dg-error {aggregate value used where a float was expected} } */ ++ (float) glob_bfloat_vec; /* { dg-error {aggregate value used where a float was expected} } */ ++ (double) glob_bfloat_vec; /* { dg-error {aggregate value used where a float was expected} } */ ++ ++ (int32x4_t) glob_bfloat_vec; ++ (float32x4_t) glob_bfloat_vec; ++ (float16x8_t) glob_bfloat_vec; ++ (int64x2_t) glob_bfloat_vec; ++ (float64x2_t) glob_bfloat_vec; ++ (int16x8_t) glob_bfloat_vec; ++ ++ (bfloat16x8_t) is_an_int_vec; ++ (bfloat16x8_t) is_a_float_vec; ++ (bfloat16x8_t) is_a_float16_vec; ++ (bfloat16x8_t) is_a_long_int_pair; ++ (bfloat16x8_t) is_a_double_pair; ++ (bfloat16x8_t) is_a_short_vec; ++ ++ /* Arrays and Structs. */ ++ ++ typedef bfloat16x8_t array_type[2]; ++ extern bfloat16x8_t extern_array[]; ++ ++ bfloat16x8_t array[2]; ++ bfloat16x8_t zero_length_array[0]; ++ bfloat16x8_t empty_init_array[] = {}; ++ typedef bfloat16x8_t some_other_type[is_an_int]; ++ ++ struct struct1 { ++ bfloat16x8_t a; ++ }; ++ ++ union union1 { ++ bfloat16x8_t a; ++ }; ++ ++ /* Addressing and dereferencing. */ ++ ++ bfloat16x8_t *bfloat_ptr = &vector0; ++ vector0 = *bfloat_ptr; ++ ++ /* Pointer assignment. */ ++ ++ bfloat16x8_t *bfloat_ptr2 = bfloat_ptr; ++ bfloat16x8_t *bfloat_ptr3 = array; ++ ++ /* Pointer arithmetic. */ ++ ++ ++bfloat_ptr; ++ --bfloat_ptr; ++ bfloat_ptr++; ++ bfloat_ptr--; ++ bfloat_ptr += 1; ++ bfloat_ptr -= 1; ++ bfloat_ptr - bfloat_ptr2; ++ bfloat_ptr = &bfloat_ptr3[0]; ++ bfloat_ptr = &bfloat_ptr3[1]; ++ ++ /* Simple comparison. */ ++ vector0 > glob_bfloat_vec; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ glob_bfloat_vec == vector0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ vector0 > is_a_float_vec; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ is_a_float_vec == vector0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ vector0 > 0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ 0 == vector0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ vector0 > 0.1; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ 0.1 == vector0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ vector0 > is_an_int_vec; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ is_an_int_vec == vector0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ ++ /* Pointer comparison. */ ++ ++ bfloat_ptr == &vector0; ++ bfloat_ptr != &vector0; ++ bfloat_ptr < &vector0; ++ bfloat_ptr <= &vector0; ++ bfloat_ptr > &vector0; ++ bfloat_ptr >= &vector0; ++ bfloat_ptr == bfloat_ptr2; ++ bfloat_ptr != bfloat_ptr2; ++ bfloat_ptr < bfloat_ptr2; ++ bfloat_ptr <= bfloat_ptr2; ++ bfloat_ptr > bfloat_ptr2; ++ bfloat_ptr >= bfloat_ptr2; ++ ++ /* Conditional expressions. */ ++ ++ 0 ? vector0 : vector0; ++ 0 ? vector0 : is_a_float_vec; /* { dg-error {type mismatch in conditional expression} } */ ++ 0 ? is_a_float_vec : vector0; /* { dg-error {type mismatch in conditional expression} } */ ++ 0 ? vector0 : is_a_float16_vec; /* { dg-error {type mismatch in conditional expression} } */ ++ 0 ? is_a_float16_vec : vector0; /* { dg-error {type mismatch in conditional expression} } */ ++ 0 ? vector0 : 0; /* { dg-error {type mismatch in conditional expression} } */ ++ 0 ? 0 : vector0; /* { dg-error {type mismatch in conditional expression} } */ ++ 0 ? 0.1 : vector0; /* { dg-error {type mismatch in conditional expression} } */ ++ 0 ? vector0 : 0.1; /* { dg-error {type mismatch in conditional expression} } */ ++ 0 ? bfloat_ptr : bfloat_ptr2; ++ 0 ? bfloat_ptr : float_ptr; /* { dg-error {pointer type mismatch in conditional expression} } */ ++ 0 ? float_ptr : bfloat_ptr; /* { dg-error {pointer type mismatch in conditional expression} } */ ++ ++ vector0 ? vector0 : vector0; /* { dg-error {used vector type where scalar is required} } */ ++ vector0 ? is_a_float16_vec : vector0; /* { dg-error {used vector type where scalar is required} } */ ++ vector0 ? vector0 : is_a_float16_vec; /* { dg-error {used vector type where scalar is required} } */ ++ vector0 ? is_a_float16_vec : is_a_float16_vec; /* { dg-error {used vector type where scalar is required} } */ ++ ++ /* Unary operators. */ ++ ++ +vector0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ -vector0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ ~vector0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ !vector0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ *vector0; /* { dg-error {invalid type argument of unary '\*'} } */ ++ __real vector0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ __imag vector0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ ++vector0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ --vector0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ vector0++; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ vector0--; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ ++ /* Binary arithmetic operations. */ ++ ++ vector0 = glob_bfloat_vec + *bfloat_ptr; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ vector0 = glob_bfloat_vec + 0.1; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ vector0 = glob_bfloat_vec + 0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ vector0 = glob_bfloat_vec + is_a_float_vec; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ ++ ++ return vector0; ++} ++ +diff --git a/gcc/testsuite/gcc.target/aarch64/crypto-fuse-2.c b/gcc/testsuite/gcc.target/aarch64/crypto-fuse-2.c +deleted file mode 100644 +index b12df2d3e..000000000 +--- a/gcc/testsuite/gcc.target/aarch64/crypto-fuse-2.c ++++ /dev/null +@@ -1,45 +0,0 @@ +-/* { dg-do compile } */ +-/* { dg-options "-O3 -mcpu=cortex-a72+crypto -dp" } */ +- +-#include +- +-#define AESE(r, v, key) (r = vaesdq_u8 ((v), (key))); +-#define AESMC(r, i) (r = vaesimcq_u8 (i)) +- +-uint8x16_t dummy; +-uint8x16_t a; +-uint8x16_t b; +-uint8x16_t c; +-uint8x16_t d; +-uint8x16_t e; +- +-void +-foo (void) +-{ +- AESE (a, a, e); +- dummy = vaddq_u8 (dummy, dummy); +- dummy = vaddq_u8 (dummy, dummy); +- AESE (b, b, e); +- dummy = vaddq_u8 (dummy, dummy); +- dummy = vaddq_u8 (dummy, dummy); +- AESE (c, c, e); +- dummy = vaddq_u8 (dummy, dummy); +- dummy = vaddq_u8 (dummy, dummy); +- AESE (d, d, e); +- dummy = vaddq_u8 (dummy, dummy); +- dummy = vaddq_u8 (dummy, dummy); +- +- AESMC (a, a); +- dummy = vaddq_u8 (dummy, dummy); +- dummy = vaddq_u8 (dummy, dummy); +- AESMC (b, b); +- dummy = vaddq_u8 (dummy, dummy); +- dummy = vaddq_u8 (dummy, dummy); +- AESMC (c, c); +- dummy = vaddq_u8 (dummy, dummy); +- dummy = vaddq_u8 (dummy, dummy); +- AESMC (d, d); +-} +- +-/* { dg-final { scan-assembler-times "crypto_aesd_fused" 4 } } */ +- +diff --git a/gcc/testsuite/gcc.target/aarch64/diag_aka_1.c b/gcc/testsuite/gcc.target/aarch64/diag_aka_1.c +new file mode 100644 +index 000000000..59e24f48b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/diag_aka_1.c +@@ -0,0 +1,14 @@ ++#include ++ ++typedef int16x4_t myvec; ++ ++void f (float x) ++{ ++ __Int8x8_t y1 = x; /* { dg-error {incompatible types when initializing type '__Int8x8_t' using type 'float'} } */ ++ __Int8x8_t *ptr1 = &x; /* { dg-error {initialization of '__Int8x8_t \*' from incompatible pointer type 'float \*'} } */ ++ int8x8_t y2 = x; /* { dg-error {incompatible types when initializing type 'int8x8_t' using type 'float'} } */ ++ int8x8_t *ptr2 = &x; /* { dg-error {initialization of 'int8x8_t \*' from incompatible pointer type 'float \*'} } */ ++ /* ??? For these it would be better to print an aka for 'int16x4_t'. */ ++ myvec y3 = x; /* { dg-error {incompatible types when initializing type 'myvec' using type 'float'} } */ ++ myvec *ptr3 = &x; /* { dg-error {initialization of 'myvec \*' from incompatible pointer type 'float \*'} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/fmul_scvtf_1.c b/gcc/testsuite/gcc.target/aarch64/fmul_scvtf_1.c +new file mode 100644 +index 000000000..8bfe06ac3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/fmul_scvtf_1.c +@@ -0,0 +1,140 @@ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -O2 -fno-inline" } */ ++ ++#define FUNC_DEFS(__a) \ ++float \ ++fsfoo##__a (int x) \ ++{ \ ++ return ((float) x)/(1lu << __a); \ ++} \ ++float \ ++fusfoo##__a (unsigned int x) \ ++{ \ ++ return ((float) x)/(1lu << __a); \ ++} \ ++float \ ++fslfoo##__a (long long x) \ ++{ \ ++ return ((float) x)/(1lu << __a); \ ++} \ ++float \ ++fulfoo##__a (unsigned long long x) \ ++{ \ ++ return ((float) x)/(1lu << __a); \ ++} \ ++ ++#define FUNC_DEFD(__a) \ ++double \ ++dsfoo##__a (int x) \ ++{ \ ++ return ((double) x)/(1lu << __a); \ ++} \ ++double \ ++dusfoo##__a (unsigned int x) \ ++{ \ ++ return ((double) x)/(1lu << __a); \ ++} \ ++double \ ++dslfoo##__a (long long x) \ ++{ \ ++ return ((double) x)/(1lu << __a); \ ++} \ ++double \ ++dulfoo##__a (unsigned long long x) \ ++{ \ ++ return ((double) x)/(1lu << __a); \ ++} ++ ++FUNC_DEFS (4) ++ /* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], w\[0-9\]*.*#4" 1 } } */ ++ /* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], w\[0-9\]*.*#4" 1 } } */ ++ /* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], x\[0-9\]*.*#4" 1 } } */ ++ /* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], x\[0-9\]*.*#4" 1 } } */ ++ ++FUNC_DEFD (4) ++ /* { dg-final { scan-assembler-times "scvtf\td\[0-9\], w\[0-9\]*.*#4" 1 } } */ ++ /* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], w\[0-9\]*.*#4" 1 } } */ ++ /* { dg-final { scan-assembler-times "scvtf\td\[0-9\], x\[0-9\]*.*#4" 1 } } */ ++ /* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], x\[0-9\]*.*#4" 1 } } */ ++ ++FUNC_DEFS (8) ++ /* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], w\[0-9\]*.*#8" 1 } } */ ++ /* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], w\[0-9\]*.*#8" 1 } } */ ++ /* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], x\[0-9\]*.*#8" 1 } } */ ++ /* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], x\[0-9\]*.*#8" 1 } } */ ++ ++FUNC_DEFD (8) ++ /* { dg-final { scan-assembler-times "scvtf\td\[0-9\], w\[0-9\]*.*#8" 1 } } */ ++ /* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], w\[0-9\]*.*#8" 1 } } */ ++ /* { dg-final { scan-assembler-times "scvtf\td\[0-9\], x\[0-9\]*.*#8" 1 } } */ ++ /* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], x\[0-9\]*.*#8" 1 } } */ ++ ++FUNC_DEFS (16) ++ /* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], w\[0-9\]*.*#16" 1 } } */ ++ /* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], w\[0-9\]*.*#16" 1 } } */ ++ /* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], x\[0-9\]*.*#16" 1 } } */ ++ /* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], x\[0-9\]*.*#16" 1 } } */ ++ ++FUNC_DEFD (16) ++ /* { dg-final { scan-assembler-times "scvtf\td\[0-9\], w\[0-9\]*.*#16" 1 } } */ ++ /* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], w\[0-9\]*.*#16" 1 } } */ ++ /* { dg-final { scan-assembler-times "scvtf\td\[0-9\], x\[0-9\]*.*#16" 1 } } */ ++ /* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], x\[0-9\]*.*#16" 1 } } */ ++ ++FUNC_DEFS (32) ++ /* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], w\[0-9\]*.*#32" 1 } } */ ++ /* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], w\[0-9\]*.*#32" 1 } } */ ++ /* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], x\[0-9\]*.*#32" 1 } } */ ++ /* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], x\[0-9\]*.*#32" 1 } } */ ++ ++FUNC_DEFD (32) ++ /* { dg-final { scan-assembler-times "scvtf\td\[0-9\], w\[0-9\]*.*#32" 1 } } */ ++ /* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], w\[0-9\]*.*#32" 1 } } */ ++ /* { dg-final { scan-assembler-times "scvtf\td\[0-9\], x\[0-9\]*.*#32" 1 } } */ ++ /* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], x\[0-9\]*.*#32" 1 } } */ ++ ++#define FUNC_TESTS(__a, __b) \ ++do \ ++{ \ ++ if (fsfoo##__a (__b) != ((int) i) * (1.0f/(1lu << __a)) ) \ ++ __builtin_abort (); \ ++ if (fusfoo##__a (__b) != ((int) i) * (1.0f/(1lu << __a)) ) \ ++ __builtin_abort (); \ ++ if (fslfoo##__a (__b) != ((int) i) * (1.0f/(1lu << __a)) ) \ ++ __builtin_abort (); \ ++ if (fulfoo##__a (__b) != ((int) i) * (1.0f/(1lu << __a)) ) \ ++ __builtin_abort (); \ ++} while (0) ++ ++#define FUNC_TESTD(__a, __b) \ ++do \ ++{ \ ++ if (dsfoo##__a (__b) != ((int) i) * (1.0d/(1lu << __a)) ) \ ++ __builtin_abort (); \ ++ if (dusfoo##__a (__b) != ((int) i) * (1.0d/(1lu << __a)) ) \ ++ __builtin_abort (); \ ++ if (dslfoo##__a (__b) != ((int) i) * (1.0d/(1lu << __a)) ) \ ++ __builtin_abort (); \ ++ if (dulfoo##__a (__b) != ((int) i) * (1.0d/(1lu << __a)) ) \ ++ __builtin_abort (); \ ++} while (0) ++ ++int ++main (void) ++{ ++ int i; ++ ++ for (i = 0; i < 32; i ++) ++ { ++ FUNC_TESTS (4, i); ++ FUNC_TESTS (8, i); ++ FUNC_TESTS (16, i); ++ FUNC_TESTS (32, i); ++ ++ FUNC_TESTD (4, i); ++ FUNC_TESTD (8, i); ++ FUNC_TESTD (16, i); ++ FUNC_TESTD (32, i); ++ } ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/pr88834.c b/gcc/testsuite/gcc.target/aarch64/pr88834.c +new file mode 100644 +index 000000000..ea00967ef +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/pr88834.c +@@ -0,0 +1,15 @@ ++/* { dg-do compile } */ ++/* { dg-options "-S -O3 -march=armv8.2-a+sve" } */ ++ ++void ++f (int *restrict x, int *restrict y, int *restrict z, int n) ++{ ++ for (int i = 0; i < n; i += 2) ++ { ++ x[i] = y[i] + z[i]; ++ x[i + 1] = y[i + 1] - z[i + 1]; ++ } ++} ++ ++/* { dg-final { scan-assembler-times {\tld2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tst2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+, x[0-9]+, lsl 2\]\n} 1 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_2.c b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_2.c +new file mode 100644 +index 000000000..fa2267598 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_2.c +@@ -0,0 +1,215 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++#pragma GCC push_options ++#pragma GCC target ("arch=armv8-a") ++ ++#pragma GCC push_options ++#pragma GCC target ("arch=armv8-a+tme") ++#ifndef __ARM_FEATURE_TME ++#error "__ARM_FEATURE_TME is not defined but should be!" ++#endif ++ ++#pragma GCC pop_options ++ ++#ifdef __ARM_FEATURE_TME ++#error "__ARM_FEATURE_TME is defined but should not be!" ++#endif ++ ++/* Test Armv8.6-A features. */ ++ ++#ifdef __ARM_FEATURE_MATMUL_INT8 ++#error "__ARM_FEATURE_MATMUL_INT8 is defined but should not be!" ++#endif ++#ifdef __ARM_FEATURE_SVE_MATMUL_INT8 ++#error "__ARM_FEATURE_SVE_MATMUL_INT8 is defined but should not be!" ++#endif ++#ifdef __ARM_FEATURE_SVE_MATMUL_FP32 ++#error "__ARM_FEATURE_SVE_MATMUL_FP32 is defined but should not be!" ++#endif ++#ifdef __ARM_FEATURE_SVE_MATMUL_FP64 ++#error "__ARM_FEATURE_SVE_MATMUL_FP64 is defined but should not be!" ++#endif ++ ++#pragma GCC push_options ++#pragma GCC target ("arch=armv8.6-a") ++#ifndef __ARM_FEATURE_MATMUL_INT8 ++#error "__ARM_FEATURE_MATMUL_INT8 is not defined but should be!" ++#endif ++#ifdef __ARM_FEATURE_SVE ++#error "__ARM_FEATURE_SVE is defined but should not be!" ++#endif ++#ifdef __ARM_FEATURE_SVE_MATMUL_INT8 ++#error "__ARM_FEATURE_SVE_MATMUL_INT8 is defined but should not be!" ++#endif ++#ifdef __ARM_FEATURE_SVE_MATMUL_FP32 ++#error "__ARM_FEATURE_SVE_MATMUL_FP32 is defined but should not be!" ++#endif ++#ifdef __ARM_FEATURE_SVE_MATMUL_FP64 ++#error "__ARM_FEATURE_SVE_MATMUL_FP64 is defined but should not be!" ++#endif ++#pragma GCC pop_options ++ ++#pragma GCC push_options ++#pragma GCC target ("arch=armv8.6-a+sve") ++#ifndef __ARM_FEATURE_MATMUL_INT8 ++#error "__ARM_FEATURE_MATMUL_INT8 is not defined but should be!" ++#endif ++#ifndef __ARM_FEATURE_SVE ++#error "__ARM_FEATURE_SVE is not defined but should be!" ++#endif ++#ifndef __ARM_FEATURE_SVE_MATMUL_INT8 ++#error "__ARM_FEATURE_SVE_MATMUL_INT8 is not defined but should be!" ++#endif ++#ifdef __ARM_FEATURE_SVE_MATMUL_FP32 ++#error "__ARM_FEATURE_SVE_MATMUL_FP32 is defined but should not be!" ++#endif ++#ifdef __ARM_FEATURE_SVE_MATMUL_FP64 ++#error "__ARM_FEATURE_SVE_MATMUL_FP64 is defined but should not be!" ++#endif ++#pragma GCC pop_pragma ++ ++#pragma GCC push_options ++#pragma GCC target ("arch=armv8.2-a+i8mm") ++#ifndef __ARM_FEATURE_MATMUL_INT8 ++#error "__ARM_FEATURE_MATMUL_INT8 is not defined but should be!" ++#endif ++#ifdef __ARM_FEATURE_SVE ++#error "__ARM_FEATURE_SVE is defined but should not be!" ++#endif ++#ifdef __ARM_FEATURE_SVE_MATMUL_INT8 ++#error "__ARM_FEATURE_SVE_MATMUL_INT8 is defined but should not be!" ++#endif ++#pragma GCC pop_options ++ ++#pragma GCC push_options ++#pragma GCC target ("arch=armv8.2-a+i8mm+sve") ++#ifndef __ARM_FEATURE_MATMUL_INT8 ++#error "__ARM_FEATURE_MATMUL_INT8 is not defined but should be!" ++#endif ++#ifndef __ARM_FEATURE_SVE ++#error "__ARM_FEATURE_SVE is not defined but should be!" ++#endif ++#ifndef __ARM_FEATURE_SVE_MATMUL_INT8 ++#error "__ARM_FEATURE_SVE_MATMUL_INT8 is not defined but should be!" ++#endif ++#ifdef __ARM_FEATURE_SVE_MATMUL_FP32 ++#error "__ARM_FEATURE_SVE_MATMUL_FP32 is defined but should not be!" ++#endif ++#ifdef __ARM_FEATURE_SVE_MATMUL_FP64 ++#error "__ARM_FEATURE_SVE_MATMUL_FP64 is defined but should not be!" ++#endif ++#pragma GCC pop_options ++ ++#pragma GCC push_options ++#pragma GCC target ("arch=armv8.2-a+f32mm") ++#ifndef __ARM_FEATURE_SVE ++#error "__ARM_FEATURE_SVE is not defined but should be!" ++#endif ++#ifdef __ARM_FEATURE_SVE_MATMUL_INT8 ++#error "__ARM_FEATURE_SVE_MATMUL_INT8 is defined but should not be!" ++#endif ++#ifndef __ARM_FEATURE_SVE_MATMUL_FP32 ++#error "__ARM_FEATURE_SVE_MATMUL_FP32 is not defined but should be!" ++#endif ++#ifdef __ARM_FEATURE_SVE_MATMUL_FP64 ++#error "__ARM_FEATURE_SVE_MATMUL_FP64 is defined but should not be!" ++#endif ++#pragma GCC pop_pragma ++ ++#pragma GCC push_options ++#pragma GCC target ("arch=armv8.2-a+f64mm") ++#ifndef __ARM_FEATURE_SVE ++#error "__ARM_FEATURE_SVE is not defined but should be!" ++#endif ++#ifdef __ARM_FEATURE_SVE_MATMUL_INT8 ++#error "__ARM_FEATURE_SVE_MATMUL_INT8 is defined but should not be!" ++#endif ++#ifdef __ARM_FEATURE_SVE_MATMUL_FP32 ++#error "__ARM_FEATURE_SVE_MATMUL_FP32 is defined but should not be!" ++#endif ++#ifndef __ARM_FEATURE_SVE_MATMUL_FP64 ++#error "__ARM_FEATURE_SVE_MATMUL_FP64 is not defined but should be!" ++#endif ++#pragma GCC pop_options ++ ++#pragma GCC push_options ++#pragma GCC target ("arch=armv8.6-a+nosimd") ++#ifdef __ARM_FEATURE_MATMUL_INT8 ++#error "__ARM_FEATURE_MATMUL_INT8 is defined but should not be!" ++#endif ++#ifdef __ARM_FEATURE_SVE_MATMUL_FP32 ++#error "__ARM_FEATURE_SVE_MATMUL_FP32 is defined but should not be!" ++#endif ++#ifdef __ARM_FEATURE_SVE_MATMUL_FP64 ++#error "__ARM_FEATURE_SVE_MATMUL_FP64 is defined but should not be!" ++#endif ++#pragma GCC pop_options ++ ++#pragma GCC push_options ++#pragma GCC target ("arch=armv8.6-a+nofp") ++#ifdef __ARM_FEATURE_MATMUL_INT8 ++#error "__ARM_FEATURE_MATMUL_INT8 is defined but should not be!" ++#endif ++#ifdef __ARM_FEATURE_SVE_MATMUL_FP32 ++#error "__ARM_FEATURE_SVE_MATMUL_FP32 is defined but should not be!" ++#endif ++#ifdef __ARM_FEATURE_SVE_MATMUL_FP64 ++#error "__ARM_FEATURE_SVE_MATMUL_FP64 is defined but should not be!" ++#endif ++#pragma GCC pop_options ++ ++#ifdef __ARM_FEATURE_BF16_SCALAR_ARITHMETIC ++#error "__ARM_FEATURE_BF16_SCALAR_ARITHMETIC is defined but should not be!" ++#endif ++#ifdef __ARM_FEATURE_BF16_VECTOR_ARITHMETIC ++#error "__ARM_FEATURE_BF16_VECTOR_ARITHMETIC is defined but should not be!" ++#endif ++ ++#pragma GCC push_options ++#pragma GCC target ("arch=armv8.6-a") ++#ifndef __ARM_FEATURE_BF16_SCALAR_ARITHMETIC ++#error "__ARM_FEATURE_BF16_SCALAR_ARITHMETIC is not defined but should be!" ++#endif ++#ifndef __ARM_FEATURE_BF16_VECTOR_ARITHMETIC ++#error "__ARM_FEATURE_BF16_VECTOR_ARITHMETIC is not defined but should be!" ++#endif ++#pragma GCC pop_options ++ ++#pragma GCC push_options ++#pragma GCC target ("arch=armv8.2-a+bf16") ++#ifndef __ARM_FEATURE_BF16_SCALAR_ARITHMETIC ++#error "__ARM_FEATURE_BF16_SCALAR_ARITHMETIC is not defined but should be!" ++#endif ++#ifndef __ARM_FEATURE_BF16_VECTOR_ARITHMETIC ++#error "__ARM_FEATURE_BF16_VECTOR_ARITHMETIC is not defined but should be!" ++#endif ++#pragma GCC pop_options ++ ++#pragma GCC push_options ++#pragma GCC target ("arch=armv8.2-a+bf16+nosimd") ++#ifndef __ARM_FEATURE_BF16_SCALAR_ARITHMETIC ++#error "__ARM_FEATURE_BF16_SCALAR_ARITHMETIC is not defined but should be!" ++#endif ++#ifdef __ARM_FEATURE_BF16_VECTOR_ARITHMETIC ++#error "__ARM_FEATURE_BF16_VECTOR_ARITHMETIC is defined but should not be!" ++#endif ++#pragma GCC pop_options ++ ++#pragma GCC push_options ++#pragma GCC target ("arch=armv8.6-a+nofp") ++#ifdef __ARM_FEATURE_BF16_SCALAR_ARITHMETIC ++#error "__ARM_FEATURE_BF16_SCALAR_ARITHMETIC is defined but should not be!" ++#endif ++#ifdef __ARM_FEATURE_BF16_VECTOR_ARITHMETIC ++#error "__ARM_FEATURE_BF16_VECTOR_ARITHMETIC is defined but should not be!" ++#endif ++#pragma GCC pop_options ++ ++#pragma GCC pop_options ++ ++int ++foo (int a) ++{ ++ return a; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/signbitv2sf.c b/gcc/testsuite/gcc.target/aarch64/signbitv2sf.c +new file mode 100644 +index 000000000..2587bfedd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/signbitv2sf.c +@@ -0,0 +1,40 @@ ++/* { dg-do run } */ ++/* { dg-additional-options "-O3 --save-temps" } */ ++ ++extern void abort (); ++ ++#define N 8 ++float in[N] = {1.0, -1.0, -2.0, 3.0, -5.0, -8.0, 13.0, 21.0}; ++int out[N]; ++ ++void ++foo (int *i, float *f) ++{ ++ i[0] = __builtin_signbit (f[0]); ++ i[1] = __builtin_signbit (f[1]); ++} ++ ++/* { dg-final { scan-assembler-not {-2147483648} } } */ ++/* { dg-final { scan-assembler {\tushr\tv[0-9]+.2s, v[0-9]+.2s, 31} } } */ ++ ++int ++main () ++{ ++ int i; ++ ++ foo (out, in); ++ foo (out + 2, in + 2); ++ foo (out + 4, in + 4); ++ foo (out + 6, in + 6); ++ ++ for (i = 0; i < N; i++) ++ { ++ if (in[i] >= 0.0 && out[i]) ++ abort (); ++ if (in[i] < 0.0 && !out[i]) ++ abort (); ++ } ++ ++ return 0; ++} ++ +diff --git a/gcc/testsuite/gcc.target/aarch64/signbitv4sf.c b/gcc/testsuite/gcc.target/aarch64/signbitv4sf.c +new file mode 100644 +index 000000000..18cffdc7d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/signbitv4sf.c +@@ -0,0 +1,38 @@ ++/* { dg-do run } */ ++/* { dg-additional-options "-O3 --save-temps" } */ ++ ++extern void abort (); ++ ++#define N 1024 ++float in[N] = {1.0, -1.0, -2.0, 3.0, -5.0, -8.0, 13.0, 21.0}; ++int out[N]; ++ ++void ++foo () ++{ ++ int i; ++ for (i = 0; i < N; i++) ++ out[i] = __builtin_signbit (in[i]); ++} ++ ++/* { dg-final { scan-assembler-not {-2147483648} } } */ ++/* { dg-final { scan-assembler {\tushr\tv[0-9]+.4s, v[0-9]+.4s, 31} } } */ ++ ++int ++main () ++{ ++ int i; ++ ++ foo (); ++ ++ for (i = 0; i < N; i++) ++ { ++ if (in[i] >= 0.0 && out[i]) ++ abort (); ++ if (in[i] < 0.0 && !out[i]) ++ abort (); ++ } ++ ++ return 0; ++} ++ +diff --git a/gcc/testsuite/gcc.target/aarch64/simd/ssra.c b/gcc/testsuite/gcc.target/aarch64/simd/ssra.c +new file mode 100644 +index 000000000..e9c2e04c0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/simd/ssra.c +@@ -0,0 +1,36 @@ ++/* { dg-do compile { target aarch64*-*-* } } */ ++/* { dg-options "-O3" } */ ++/* { dg-skip-if "" { *-*-* } {"*sve*"} {""} } */ ++ ++#include ++ ++#define SSRA(func, vtype, n) \ ++ void func () \ ++ { \ ++ int i; \ ++ for (i = 0; i < n; i++) \ ++ { \ ++ s1##vtype[i] += s2##vtype[i] >> 2; \ ++ } \ ++ } ++ ++#define TEST_VDQ_I_MODES(FUNC) \ ++ FUNC (test_v8qi_v16qi, _char, 16) \ ++ FUNC (test_v4hi_v8h1, _short, 8) \ ++ FUNC (test_v2si_v4si, _int, 4) \ ++ FUNC (test_v2di, _ll, 2) \ ++ ++int8_t s1_char[16], s2_char[16]; ++int16_t s1_short[8], s2_short[8]; ++int32_t s1_int[4], s2_int[4]; ++int64_t s1_ll[2], s2_ll[2]; ++ ++TEST_VDQ_I_MODES(SSRA) ++ ++/* { dg-final { scan-assembler "ssra" } } */ ++/* { dg-final { scan-assembler-not "sshr" } } */ ++ ++/* { dg-final { scan-assembler-times {ssra\tv[0-9]+\.16b, v[0-9]+\.16b, [0-9]+} 1 } } */ ++/* { dg-final { scan-assembler-times {ssra\tv[0-9]+\.8h, v[0-9]+\.8h, [0-9]+} 1 } } */ ++/* { dg-final { scan-assembler-times {ssra\tv[0-9]+\.4s, v[0-9]+\.4s, [0-9]+} 1 } } */ ++/* { dg-final { scan-assembler-times {ssra\tv[0-9]+\.2d, v[0-9]+\.2d, [0-9]+} 1 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/simd/usra.c b/gcc/testsuite/gcc.target/aarch64/simd/usra.c +new file mode 100644 +index 000000000..4e7446dfa +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/simd/usra.c +@@ -0,0 +1,36 @@ ++/* { dg-do compile { target aarch64*-*-* } } */ ++/* { dg-options "-O3" } */ ++/* { dg-skip-if "" { *-*-* } {"*sve*"} {""} } */ ++ ++#include ++ ++#define USRA(func, vtype, n) \ ++ void func () \ ++ { \ ++ int i; \ ++ for (i = 0; i < n; i++) \ ++ { \ ++ u1##vtype[i] += u2##vtype[i] >> 2; \ ++ } \ ++ } ++ ++#define TEST_VDQ_I_MODES(FUNC) \ ++ FUNC (test_v8qi_v16qi, _char, 16) \ ++ FUNC (test_v4hi_v8h1, _short, 8) \ ++ FUNC (test_v2si_v4si, _int, 4) \ ++ FUNC (test_v2di, _ll, 2) \ ++ ++uint8_t u1_char[16], u2_char[16]; ++uint16_t u1_short[8], u2_short[8]; ++uint32_t u1_int[4], u2_int[4]; ++uint64_t u1_ll[2], u2_ll[2]; ++ ++TEST_VDQ_I_MODES(USRA) ++ ++/* { dg-final { scan-assembler "usra" } } */ ++/* { dg-final { scan-assembler-not "ushr" } } */ ++ ++/* { dg-final { scan-assembler-times {usra\tv[0-9]+\.16b, v[0-9]+\.16b, [0-9]+} 1 } } */ ++/* { dg-final { scan-assembler-times {usra\tv[0-9]+\.8h, v[0-9]+\.8h, [0-9]+} 1 } } */ ++/* { dg-final { scan-assembler-times {usra\tv[0-9]+\.4s, v[0-9]+\.4s, [0-9]+} 1 } } */ ++/* { dg-final { scan-assembler-times {usra\tv[0-9]+\.2d, v[0-9]+\.2d, [0-9]+} 1 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmmla.c b/gcc/testsuite/gcc.target/aarch64/simd/vmmla.c +new file mode 100644 +index 000000000..5eec2b5cf +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/simd/vmmla.c +@@ -0,0 +1,27 @@ ++/* { dg-do assemble} */ ++/* { dg-require-effective-target arm_v8_2a_i8mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+i8mm" } */ ++ ++#include "arm_neon.h" ++ ++int32x4_t ++test_vmmlaq_s32 (int32x4_t r, int8x16_t a, int8x16_t b) ++{ ++ return vmmlaq_s32 (r, a, b); ++} ++ ++uint32x4_t ++test_vmmlaq_u32 (uint32x4_t r, uint8x16_t a, uint8x16_t b) ++{ ++ return vmmlaq_u32 (r, a, b); ++} ++ ++int32x4_t ++test_vusmmlaq_s32 (int32x4_t r, uint8x16_t a, int8x16_t b) ++{ ++ return vusmmlaq_s32 (r, a, b); ++} ++ ++/* { dg-final { scan-assembler-times {\tsmmla\tv[0-9]+.4s, v[0-9]+.16b, v[0-9]+.16b} 1 } } */ ++/* { dg-final { scan-assembler-times {\tummla\tv[0-9]+.4s, v[0-9]+.16b, v[0-9]+.16b} 1 } } */ ++/* { dg-final { scan-assembler-times {\tusmmla\tv[0-9]+.4s, v[0-9]+.16b, v[0-9]+.16b} 1 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vrndnzx_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vrndnzx_1.c +new file mode 100644 +index 000000000..0399b838d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/simd/vrndnzx_1.c +@@ -0,0 +1,137 @@ ++/* Test the vrnd[32,64][z,x] intrinsics. */ ++ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=armv8.5-a" } */ ++ ++#include "arm_neon.h" ++ ++#ifdef __ARM_FEATURE_FRINT ++ ++float32x2_t ++foo_32z (float32x2_t a) ++{ ++ return vrnd32z_f32 (a); ++} ++ ++/* { dg-final { scan-assembler-times "frint32z\tv\[0-9\]+\.2s, v\[0-9\]+\.2s\n" 1 } } */ ++ ++float32x4_t ++foo_32z_q (float32x4_t a) ++{ ++ return vrnd32zq_f32 (a); ++} ++ ++/* { dg-final { scan-assembler-times "frint32z\tv\[0-9\]+\.4s, v\[0-9\]+\.4s\n" 1 } } */ ++ ++float64x1_t ++foo_32z_f64 (float64x1_t a) ++{ ++ return vrnd32z_f64 (a); ++} ++ ++/* { dg-final { scan-assembler-times "frint32z\td\[0-9\]+, d\[0-9\]+\n" 1 } } */ ++ ++float64x2_t ++foo_32z_q_f64 (float64x2_t a) ++{ ++ return vrnd32zq_f64 (a); ++} ++ ++/* { dg-final { scan-assembler-times "frint32z\tv\[0-9\]+\.2d, v\[0-9\]+\.2d\n" 1 } } */ ++ ++float32x2_t ++foo_32x (float32x2_t a) ++{ ++ return vrnd32x_f32 (a); ++} ++ ++/* { dg-final { scan-assembler-times "frint32x\tv\[0-9\]+\.2s, v\[0-9\]+\.2s\n" 1 } } */ ++ ++float32x4_t ++foo_32x_q (float32x4_t a) ++{ ++ return vrnd32xq_f32 (a); ++} ++ ++/* { dg-final { scan-assembler-times "frint32x\tv\[0-9\]+\.4s, v\[0-9\]+\.4s\n" 1 } } */ ++ ++float64x1_t ++foo_32x_f64 (float64x1_t a) ++{ ++ return vrnd32x_f64 (a); ++} ++ ++/* { dg-final { scan-assembler-times "frint32x\td\[0-9\]+, d\[0-9\]+\n" 1 } } */ ++ ++float64x2_t ++foo_32x_q_f64 (float64x2_t a) ++{ ++ return vrnd32xq_f64 (a); ++} ++ ++/* { dg-final { scan-assembler-times "frint32x\tv\[0-9\]+\.2d, v\[0-9\]+\.2d\n" 1 } } */ ++ ++float32x2_t ++foo_64z (float32x2_t a) ++{ ++ return vrnd64z_f32 (a); ++} ++ ++/* { dg-final { scan-assembler-times "frint64z\tv\[0-9\]+\.2s, v\[0-9\]+\.2s\n" 1 } } */ ++ ++float32x4_t ++foo_64z_q (float32x4_t a) ++{ ++ return vrnd64zq_f32 (a); ++} ++ ++/* { dg-final { scan-assembler-times "frint64z\tv\[0-9\]+\.4s, v\[0-9\]+\.4s\n" 1 } } */ ++ ++float64x1_t ++foo_64z_f64 (float64x1_t a) ++{ ++ return vrnd64z_f64 (a); ++} ++ ++/* { dg-final { scan-assembler-times "frint64z\td\[0-9\]+, d\[0-9\]+\n" 1 } } */ ++ ++float64x2_t ++foo_64z_q_f64 (float64x2_t a) ++{ ++ return vrnd64zq_f64 (a); ++} ++ ++/* { dg-final { scan-assembler-times "frint64z\tv\[0-9\]+\.2d, v\[0-9\]+\.2d\n" 1 } } */ ++ ++float32x2_t ++foo_64x (float32x2_t a) ++{ ++ return vrnd64x_f32 (a); ++} ++ ++/* { dg-final { scan-assembler-times "frint64x\tv\[0-9\]+\.2s, v\[0-9\]+\.2s\n" 1 } } */ ++ ++float32x4_t ++foo_64x_q (float32x4_t a) ++{ ++ return vrnd64xq_f32 (a); ++} ++ ++/* { dg-final { scan-assembler-times "frint64x\tv\[0-9\]+\.4s, v\[0-9\]+\.4s\n" 1 } } */ ++ ++float64x1_t ++foo_64x_f64 (float64x1_t a) ++{ ++ return vrnd64x_f64 (a); ++} ++ ++/* { dg-final { scan-assembler-times "frint64x\td\[0-9\]+, d\[0-9\]+\n" 1 } } */ ++ ++float64x2_t ++foo_64x_q_f64 (float64x2_t a) ++{ ++ return vrnd64xq_f64 (a); ++} ++ ++/* { dg-final { scan-assembler-times "frint64x\tv\[0-9\]+\.2d, v\[0-9\]+\.2d\n" 1 } } */ ++#endif +diff --git a/gcc/testsuite/gcc.target/aarch64/ssadv16qi-dotprod.c b/gcc/testsuite/gcc.target/aarch64/ssadv16qi-dotprod.c +new file mode 100644 +index 000000000..08b6831cf +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/ssadv16qi-dotprod.c +@@ -0,0 +1,31 @@ ++/* { dg-do compile } */ ++/* { dg-require-effective-target arm_v8_2a_dotprod_neon_ok } */ ++/* { dg-add-options arm_v8_2a_dotprod_neon } */ ++/* { dg-additional-options "-O3" } */ ++ ++#pragma GCC target "+nosve" ++ ++#define N 1024 ++ ++signed char pix1[N], pix2[N]; ++ ++int foo (void) ++{ ++ int i_sum = 0; ++ int i; ++ ++ for (i = 0; i < N; i++) ++ i_sum += __builtin_abs (pix1[i] - pix2[i]); ++ ++ return i_sum; ++} ++ ++/* { dg-final { scan-assembler-not {\tsshll\t} } } */ ++/* { dg-final { scan-assembler-not {\tsshll2\t} } } */ ++/* { dg-final { scan-assembler-not {\tssubl\t} } } */ ++/* { dg-final { scan-assembler-not {\tssubl2\t} } } */ ++/* { dg-final { scan-assembler-not {\tabs\t} } } */ ++ ++/* { dg-final { scan-assembler {\tsabd\t} } } */ ++/* { dg-final { scan-assembler {\tudot\t} } } */ ++ +diff --git a/gcc/testsuite/gcc.target/aarch64/ssadv16qi.c b/gcc/testsuite/gcc.target/aarch64/ssadv16qi.c +index 40b288436..85a867a11 100644 +--- a/gcc/testsuite/gcc.target/aarch64/ssadv16qi.c ++++ b/gcc/testsuite/gcc.target/aarch64/ssadv16qi.c +@@ -1,7 +1,7 @@ + /* { dg-do compile } */ + /* { dg-options "-O3" } */ + +-#pragma GCC target "+nosve" ++#pragma GCC target "+nosve+nodotprod" + + #define N 1024 + +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/aarch64-sve-acle-asm.exp b/gcc/testsuite/gcc.target/aarch64/sve/acle/aarch64-sve-acle-asm.exp +new file mode 100644 +index 000000000..7ce85a414 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/aarch64-sve-acle-asm.exp +@@ -0,0 +1,79 @@ ++# Assembly-based regression-test driver for the SVE ACLE ++# Copyright (C) 2009-2019 Free Software Foundation, Inc. ++# ++# This file is part of GCC. ++# ++# GCC is free software; you can redistribute it and/or modify it ++# under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 3, or (at your option) ++# any later version. ++# ++# GCC is distributed in the hope that it will be useful, but ++# WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++# General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with GCC; see the file COPYING3. If not see ++# . */ ++ ++# GCC testsuite that uses the `dg.exp' driver. ++ ++# Exit immediately if this isn't an AArch64 target. ++if {![istarget aarch64*-*-*] } { ++ return ++} ++ ++# Load support procs. ++load_lib gcc-dg.exp ++ ++# Initialize `dg'. ++dg-init ++ ++# Force SVE if we're not testing it already. ++if { [check_effective_target_aarch64_sve] } { ++ set sve_flags "" ++} else { ++ set sve_flags "-march=armv8.2-a+sve" ++} ++ ++global gcc_runtest_parallelize_limit_minor ++if { [info exists gcc_runtest_parallelize_limit_minor] } { ++ set old_limit_minor $gcc_runtest_parallelize_limit_minor ++ set gcc_runtest_parallelize_limit_minor 1 ++} ++ ++torture-init ++set-torture-options { ++ "-std=c90 -O0 -g" ++ "-std=c90 -O1 -g" ++ "-std=c99 -O2 -g" ++ "-std=c11 -O3 -g" ++ "-std=gnu90 -O2 -fno-schedule-insns -DCHECK_ASM --save-temps" ++ "-std=gnu99 -Ofast -g" ++ "-std=gnu11 -Os -g" ++} { ++ "-DTEST_FULL" ++ "-DTEST_OVERLOADS" ++} ++ ++# Main loop. ++set files [glob -nocomplain $srcdir/$subdir/asm/*.c] ++set save-dg-do-what-default ${dg-do-what-default} ++if { [check_effective_target_aarch64_asm_sve_ok] ++ && [check_effective_target_aarch64_variant_pcs] } { ++ set dg-do-what-default assemble ++} else { ++ set dg-do-what-default compile ++} ++gcc-dg-runtest [lsort $files] "" "$sve_flags -fno-ipa-icf" ++set dg-do-what-default ${save-dg-do-what-default} ++ ++torture-finish ++ ++if { [info exists gcc_runtest_parallelize_limit_minor] } { ++ set gcc_runtest_parallelize_limit_minor $old_limit_minor ++} ++ ++# All done. ++dg-finish +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/aarch64-sve-acle.exp b/gcc/testsuite/gcc.target/aarch64/sve/acle/aarch64-sve-acle.exp +new file mode 100644 +index 000000000..34d9dfd43 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/aarch64-sve-acle.exp +@@ -0,0 +1,54 @@ ++# Specific regression driver for AArch64 SVE. ++# Copyright (C) 2009-2019 Free Software Foundation, Inc. ++# Contributed by ARM Ltd. ++# ++# This file is part of GCC. ++# ++# GCC is free software; you can redistribute it and/or modify it ++# under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 3, or (at your option) ++# any later version. ++# ++# GCC is distributed in the hope that it will be useful, but ++# WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++# General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with GCC; see the file COPYING3. If not see ++# . */ ++ ++# GCC testsuite that uses the `dg.exp' driver. ++ ++# Exit immediately if this isn't an AArch64 target. ++if {![istarget aarch64*-*-*] } { ++ return ++} ++ ++# Load support procs. ++load_lib gcc-dg.exp ++ ++# If a testcase doesn't have special options, use these. ++global DEFAULT_CFLAGS ++if ![info exists DEFAULT_CFLAGS] then { ++ set DEFAULT_CFLAGS " -ansi -pedantic-errors" ++} ++ ++# Initialize `dg'. ++dg-init ++ ++# Force SVE if we're not testing it already. ++if { [check_effective_target_aarch64_sve] } { ++ set sve_flags "" ++} else { ++ set sve_flags "-march=armv8.2-a+sve" ++} ++ ++# Main loop. ++# FIXME: This should include general/*.c too, but leave that until the ++# C frontend allows initialization of SVE vectors. ++set files [glob -nocomplain $srcdir/$subdir/general-c/*.c] ++dg-runtest [lsort $files] "$sve_flags" $DEFAULT_CFLAGS ++ ++# All done. ++dg-finish +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_f16.c +new file mode 100644 +index 000000000..c019f248d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_f16.c +@@ -0,0 +1,552 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** abd_f16_m_tied1: ++** fabd z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (abd_f16_m_tied1, svfloat16_t, ++ z0 = svabd_f16_m (p0, z0, z1), ++ z0 = svabd_m (p0, z0, z1)) ++ ++/* ++** abd_f16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fabd z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (abd_f16_m_tied2, svfloat16_t, ++ z0 = svabd_f16_m (p0, z1, z0), ++ z0 = svabd_m (p0, z1, z0)) ++ ++/* ++** abd_f16_m_untied: ++** movprfx z0, z1 ++** fabd z0\.h, p0/m, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (abd_f16_m_untied, svfloat16_t, ++ z0 = svabd_f16_m (p0, z1, z2), ++ z0 = svabd_m (p0, z1, z2)) ++ ++/* ++** abd_h4_f16_m_tied1: ++** mov (z[0-9]+\.h), h4 ++** fabd z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (abd_h4_f16_m_tied1, svfloat16_t, __fp16, ++ z0 = svabd_n_f16_m (p0, z0, d4), ++ z0 = svabd_m (p0, z0, d4)) ++ ++/* ++** abd_h4_f16_m_untied: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0, z1 ++** fabd z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (abd_h4_f16_m_untied, svfloat16_t, __fp16, ++ z0 = svabd_n_f16_m (p0, z1, d4), ++ z0 = svabd_m (p0, z1, d4)) ++ ++/* ++** abd_1_f16_m_tied1: ++** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? ++** fabd z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_f16_m_tied1, svfloat16_t, ++ z0 = svabd_n_f16_m (p0, z0, 1), ++ z0 = svabd_m (p0, z0, 1)) ++ ++/* ++** abd_1_f16_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? ++** movprfx z0, z1 ++** fabd z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_f16_m_untied, svfloat16_t, ++ z0 = svabd_n_f16_m (p0, z1, 1), ++ z0 = svabd_m (p0, z1, 1)) ++ ++/* ++** abd_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fabd z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (abd_f16_z_tied1, svfloat16_t, ++ z0 = svabd_f16_z (p0, z0, z1), ++ z0 = svabd_z (p0, z0, z1)) ++ ++/* ++** abd_f16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** fabd z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (abd_f16_z_tied2, svfloat16_t, ++ z0 = svabd_f16_z (p0, z1, z0), ++ z0 = svabd_z (p0, z1, z0)) ++ ++/* ++** abd_f16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fabd z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** fabd z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (abd_f16_z_untied, svfloat16_t, ++ z0 = svabd_f16_z (p0, z1, z2), ++ z0 = svabd_z (p0, z1, z2)) ++ ++/* ++** abd_h4_f16_z_tied1: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0\.h, p0/z, z0\.h ++** fabd z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (abd_h4_f16_z_tied1, svfloat16_t, __fp16, ++ z0 = svabd_n_f16_z (p0, z0, d4), ++ z0 = svabd_z (p0, z0, d4)) ++ ++/* ++** abd_h4_f16_z_untied: ++** mov (z[0-9]+\.h), h4 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fabd z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** fabd z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (abd_h4_f16_z_untied, svfloat16_t, __fp16, ++ z0 = svabd_n_f16_z (p0, z1, d4), ++ z0 = svabd_z (p0, z1, d4)) ++ ++/* ++** abd_1_f16_z_tied1: ++** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? ++** movprfx z0\.h, p0/z, z0\.h ++** fabd z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_f16_z_tied1, svfloat16_t, ++ z0 = svabd_n_f16_z (p0, z0, 1), ++ z0 = svabd_z (p0, z0, 1)) ++ ++/* ++** abd_1_f16_z_untied: ++** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fabd z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** fabd z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_f16_z_untied, svfloat16_t, ++ z0 = svabd_n_f16_z (p0, z1, 1), ++ z0 = svabd_z (p0, z1, 1)) ++ ++/* ++** abd_0p5_f16_z_tied1: ++** fmov (z[0-9]+\.h), #(?:0\.5|5\.0e-1) ++** movprfx z0\.h, p0/z, z0\.h ++** fabd z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_0p5_f16_z_tied1, svfloat16_t, ++ z0 = svabd_n_f16_z (p0, z0, 0.5), ++ z0 = svabd_z (p0, z0, 0.5)) ++ ++/* ++** abd_0p5_f16_z_untied: ++** fmov (z[0-9]+\.h), #(?:0\.5|5\.0e-1) ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fabd z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** fabd z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (abd_0p5_f16_z_untied, svfloat16_t, ++ z0 = svabd_n_f16_z (p0, z1, 0.5), ++ z0 = svabd_z (p0, z1, 0.5)) ++ ++/* ++** abd_m1_f16_z_tied1: ++** fmov (z[0-9]+\.h), #-1\.0(?:e\+0)? ++** movprfx z0\.h, p0/z, z0\.h ++** fabd z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_m1_f16_z_tied1, svfloat16_t, ++ z0 = svabd_n_f16_z (p0, z0, -1), ++ z0 = svabd_z (p0, z0, -1)) ++ ++/* ++** abd_m1_f16_z_untied: ++** fmov (z[0-9]+\.h), #-1\.0(?:e\+0)? ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fabd z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** fabd z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (abd_m1_f16_z_untied, svfloat16_t, ++ z0 = svabd_n_f16_z (p0, z1, -1), ++ z0 = svabd_z (p0, z1, -1)) ++ ++/* ++** abd_m0p5_f16_z_tied1: ++** fmov (z[0-9]+\.h), #-(?:0\.5|5\.0e-1) ++** movprfx z0\.h, p0/z, z0\.h ++** fabd z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_m0p5_f16_z_tied1, svfloat16_t, ++ z0 = svabd_n_f16_z (p0, z0, -0.5), ++ z0 = svabd_z (p0, z0, -0.5)) ++ ++/* ++** abd_m0p5_f16_z_untied: ++** fmov (z[0-9]+\.h), #-(?:0\.5|5\.0e-1) ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fabd z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** fabd z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (abd_m0p5_f16_z_untied, svfloat16_t, ++ z0 = svabd_n_f16_z (p0, z1, -0.5), ++ z0 = svabd_z (p0, z1, -0.5)) ++ ++/* ++** abd_m2_f16_z: ++** fmov (z[0-9]+\.h), #-2\.0(?:e\+0)? ++** movprfx z0\.h, p0/z, z0\.h ++** fabd z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_m2_f16_z, svfloat16_t, ++ z0 = svabd_n_f16_z (p0, z0, -2), ++ z0 = svabd_z (p0, z0, -2)) ++ ++/* ++** abd_f16_x_tied1: ++** fabd z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (abd_f16_x_tied1, svfloat16_t, ++ z0 = svabd_f16_x (p0, z0, z1), ++ z0 = svabd_x (p0, z0, z1)) ++ ++/* ++** abd_f16_x_tied2: ++** fabd z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (abd_f16_x_tied2, svfloat16_t, ++ z0 = svabd_f16_x (p0, z1, z0), ++ z0 = svabd_x (p0, z1, z0)) ++ ++/* ++** abd_f16_x_untied: ++** ( ++** movprfx z0, z1 ++** fabd z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0, z2 ++** fabd z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (abd_f16_x_untied, svfloat16_t, ++ z0 = svabd_f16_x (p0, z1, z2), ++ z0 = svabd_x (p0, z1, z2)) ++ ++/* ++** abd_h4_f16_x_tied1: ++** mov (z[0-9]+\.h), h4 ++** fabd z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (abd_h4_f16_x_tied1, svfloat16_t, __fp16, ++ z0 = svabd_n_f16_x (p0, z0, d4), ++ z0 = svabd_x (p0, z0, d4)) ++ ++/* ++** abd_h4_f16_x_untied: ++** mov z0\.h, h4 ++** fabd z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZD (abd_h4_f16_x_untied, svfloat16_t, __fp16, ++ z0 = svabd_n_f16_x (p0, z1, d4), ++ z0 = svabd_x (p0, z1, d4)) ++ ++/* ++** abd_1_f16_x_tied1: ++** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? ++** fabd z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_f16_x_tied1, svfloat16_t, ++ z0 = svabd_n_f16_x (p0, z0, 1), ++ z0 = svabd_x (p0, z0, 1)) ++ ++/* ++** abd_1_f16_x_untied: ++** fmov z0\.h, #1\.0(?:e\+0)? ++** fabd z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_f16_x_untied, svfloat16_t, ++ z0 = svabd_n_f16_x (p0, z1, 1), ++ z0 = svabd_x (p0, z1, 1)) ++ ++/* ++** abd_0p5_f16_x_tied1: ++** fmov (z[0-9]+\.h), #(?:0\.5|5\.0e-1) ++** fabd z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_0p5_f16_x_tied1, svfloat16_t, ++ z0 = svabd_n_f16_x (p0, z0, 0.5), ++ z0 = svabd_x (p0, z0, 0.5)) ++ ++/* ++** abd_0p5_f16_x_untied: ++** fmov z0\.h, #(?:0\.5|5\.0e-1) ++** fabd z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (abd_0p5_f16_x_untied, svfloat16_t, ++ z0 = svabd_n_f16_x (p0, z1, 0.5), ++ z0 = svabd_x (p0, z1, 0.5)) ++ ++/* ++** abd_m1_f16_x_tied1: ++** fmov (z[0-9]+\.h), #-1\.0(?:e\+0)? ++** fabd z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_m1_f16_x_tied1, svfloat16_t, ++ z0 = svabd_n_f16_x (p0, z0, -1), ++ z0 = svabd_x (p0, z0, -1)) ++ ++/* ++** abd_m1_f16_x_untied: ++** fmov z0\.h, #-1\.0(?:e\+0)? ++** fabd z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (abd_m1_f16_x_untied, svfloat16_t, ++ z0 = svabd_n_f16_x (p0, z1, -1), ++ z0 = svabd_x (p0, z1, -1)) ++ ++/* ++** abd_m0p5_f16_x_tied1: ++** fmov (z[0-9]+\.h), #-(?:0\.5|5\.0e-1) ++** fabd z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_m0p5_f16_x_tied1, svfloat16_t, ++ z0 = svabd_n_f16_x (p0, z0, -0.5), ++ z0 = svabd_x (p0, z0, -0.5)) ++ ++/* ++** abd_m0p5_f16_x_untied: ++** fmov z0\.h, #-(?:0\.5|5\.0e-1) ++** fabd z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (abd_m0p5_f16_x_untied, svfloat16_t, ++ z0 = svabd_n_f16_x (p0, z1, -0.5), ++ z0 = svabd_x (p0, z1, -0.5)) ++ ++/* ++** abd_2_f16_x_tied1: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** fabd z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_2_f16_x_tied1, svfloat16_t, ++ z0 = svabd_n_f16_x (p0, z0, 2), ++ z0 = svabd_x (p0, z0, 2)) ++ ++/* ++** abd_2_f16_x_untied: ++** fmov z0\.h, #2\.0(?:e\+0)? ++** fabd z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (abd_2_f16_x_untied, svfloat16_t, ++ z0 = svabd_n_f16_x (p0, z1, 2), ++ z0 = svabd_x (p0, z1, 2)) ++ ++/* ++** ptrue_abd_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_abd_f16_x_tied1, svfloat16_t, ++ z0 = svabd_f16_x (svptrue_b16 (), z0, z1), ++ z0 = svabd_x (svptrue_b16 (), z0, z1)) ++ ++/* ++** ptrue_abd_f16_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_abd_f16_x_tied2, svfloat16_t, ++ z0 = svabd_f16_x (svptrue_b16 (), z1, z0), ++ z0 = svabd_x (svptrue_b16 (), z1, z0)) ++ ++/* ++** ptrue_abd_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_abd_f16_x_untied, svfloat16_t, ++ z0 = svabd_f16_x (svptrue_b16 (), z1, z2), ++ z0 = svabd_x (svptrue_b16 (), z1, z2)) ++ ++/* ++** ptrue_abd_1_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_abd_1_f16_x_tied1, svfloat16_t, ++ z0 = svabd_n_f16_x (svptrue_b16 (), z0, 1), ++ z0 = svabd_x (svptrue_b16 (), z0, 1)) ++ ++/* ++** ptrue_abd_1_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_abd_1_f16_x_untied, svfloat16_t, ++ z0 = svabd_n_f16_x (svptrue_b16 (), z1, 1), ++ z0 = svabd_x (svptrue_b16 (), z1, 1)) ++ ++/* ++** ptrue_abd_0p5_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_abd_0p5_f16_x_tied1, svfloat16_t, ++ z0 = svabd_n_f16_x (svptrue_b16 (), z0, 0.5), ++ z0 = svabd_x (svptrue_b16 (), z0, 0.5)) ++ ++/* ++** ptrue_abd_0p5_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_abd_0p5_f16_x_untied, svfloat16_t, ++ z0 = svabd_n_f16_x (svptrue_b16 (), z1, 0.5), ++ z0 = svabd_x (svptrue_b16 (), z1, 0.5)) ++ ++/* ++** ptrue_abd_m1_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_abd_m1_f16_x_tied1, svfloat16_t, ++ z0 = svabd_n_f16_x (svptrue_b16 (), z0, -1), ++ z0 = svabd_x (svptrue_b16 (), z0, -1)) ++ ++/* ++** ptrue_abd_m1_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_abd_m1_f16_x_untied, svfloat16_t, ++ z0 = svabd_n_f16_x (svptrue_b16 (), z1, -1), ++ z0 = svabd_x (svptrue_b16 (), z1, -1)) ++ ++/* ++** ptrue_abd_m0p5_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_abd_m0p5_f16_x_tied1, svfloat16_t, ++ z0 = svabd_n_f16_x (svptrue_b16 (), z0, -0.5), ++ z0 = svabd_x (svptrue_b16 (), z0, -0.5)) ++ ++/* ++** ptrue_abd_m0p5_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_abd_m0p5_f16_x_untied, svfloat16_t, ++ z0 = svabd_n_f16_x (svptrue_b16 (), z1, -0.5), ++ z0 = svabd_x (svptrue_b16 (), z1, -0.5)) ++ ++/* ++** ptrue_abd_2_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_abd_2_f16_x_tied1, svfloat16_t, ++ z0 = svabd_n_f16_x (svptrue_b16 (), z0, 2), ++ z0 = svabd_x (svptrue_b16 (), z0, 2)) ++ ++/* ++** ptrue_abd_2_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_abd_2_f16_x_untied, svfloat16_t, ++ z0 = svabd_n_f16_x (svptrue_b16 (), z1, 2), ++ z0 = svabd_x (svptrue_b16 (), z1, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_f32.c +new file mode 100644 +index 000000000..bff37580c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_f32.c +@@ -0,0 +1,552 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** abd_f32_m_tied1: ++** fabd z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (abd_f32_m_tied1, svfloat32_t, ++ z0 = svabd_f32_m (p0, z0, z1), ++ z0 = svabd_m (p0, z0, z1)) ++ ++/* ++** abd_f32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fabd z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (abd_f32_m_tied2, svfloat32_t, ++ z0 = svabd_f32_m (p0, z1, z0), ++ z0 = svabd_m (p0, z1, z0)) ++ ++/* ++** abd_f32_m_untied: ++** movprfx z0, z1 ++** fabd z0\.s, p0/m, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (abd_f32_m_untied, svfloat32_t, ++ z0 = svabd_f32_m (p0, z1, z2), ++ z0 = svabd_m (p0, z1, z2)) ++ ++/* ++** abd_s4_f32_m_tied1: ++** mov (z[0-9]+\.s), s4 ++** fabd z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (abd_s4_f32_m_tied1, svfloat32_t, float, ++ z0 = svabd_n_f32_m (p0, z0, d4), ++ z0 = svabd_m (p0, z0, d4)) ++ ++/* ++** abd_s4_f32_m_untied: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0, z1 ++** fabd z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (abd_s4_f32_m_untied, svfloat32_t, float, ++ z0 = svabd_n_f32_m (p0, z1, d4), ++ z0 = svabd_m (p0, z1, d4)) ++ ++/* ++** abd_1_f32_m_tied1: ++** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? ++** fabd z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_f32_m_tied1, svfloat32_t, ++ z0 = svabd_n_f32_m (p0, z0, 1), ++ z0 = svabd_m (p0, z0, 1)) ++ ++/* ++** abd_1_f32_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? ++** movprfx z0, z1 ++** fabd z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_f32_m_untied, svfloat32_t, ++ z0 = svabd_n_f32_m (p0, z1, 1), ++ z0 = svabd_m (p0, z1, 1)) ++ ++/* ++** abd_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fabd z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (abd_f32_z_tied1, svfloat32_t, ++ z0 = svabd_f32_z (p0, z0, z1), ++ z0 = svabd_z (p0, z0, z1)) ++ ++/* ++** abd_f32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** fabd z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (abd_f32_z_tied2, svfloat32_t, ++ z0 = svabd_f32_z (p0, z1, z0), ++ z0 = svabd_z (p0, z1, z0)) ++ ++/* ++** abd_f32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fabd z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** fabd z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (abd_f32_z_untied, svfloat32_t, ++ z0 = svabd_f32_z (p0, z1, z2), ++ z0 = svabd_z (p0, z1, z2)) ++ ++/* ++** abd_s4_f32_z_tied1: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0\.s, p0/z, z0\.s ++** fabd z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (abd_s4_f32_z_tied1, svfloat32_t, float, ++ z0 = svabd_n_f32_z (p0, z0, d4), ++ z0 = svabd_z (p0, z0, d4)) ++ ++/* ++** abd_s4_f32_z_untied: ++** mov (z[0-9]+\.s), s4 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fabd z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** fabd z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (abd_s4_f32_z_untied, svfloat32_t, float, ++ z0 = svabd_n_f32_z (p0, z1, d4), ++ z0 = svabd_z (p0, z1, d4)) ++ ++/* ++** abd_1_f32_z_tied1: ++** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? ++** movprfx z0\.s, p0/z, z0\.s ++** fabd z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_f32_z_tied1, svfloat32_t, ++ z0 = svabd_n_f32_z (p0, z0, 1), ++ z0 = svabd_z (p0, z0, 1)) ++ ++/* ++** abd_1_f32_z_untied: ++** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fabd z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** fabd z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_f32_z_untied, svfloat32_t, ++ z0 = svabd_n_f32_z (p0, z1, 1), ++ z0 = svabd_z (p0, z1, 1)) ++ ++/* ++** abd_0p5_f32_z_tied1: ++** fmov (z[0-9]+\.s), #(?:0\.5|5\.0e-1) ++** movprfx z0\.s, p0/z, z0\.s ++** fabd z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_0p5_f32_z_tied1, svfloat32_t, ++ z0 = svabd_n_f32_z (p0, z0, 0.5), ++ z0 = svabd_z (p0, z0, 0.5)) ++ ++/* ++** abd_0p5_f32_z_untied: ++** fmov (z[0-9]+\.s), #(?:0\.5|5\.0e-1) ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fabd z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** fabd z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (abd_0p5_f32_z_untied, svfloat32_t, ++ z0 = svabd_n_f32_z (p0, z1, 0.5), ++ z0 = svabd_z (p0, z1, 0.5)) ++ ++/* ++** abd_m1_f32_z_tied1: ++** fmov (z[0-9]+\.s), #-1\.0(?:e\+0)? ++** movprfx z0\.s, p0/z, z0\.s ++** fabd z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_m1_f32_z_tied1, svfloat32_t, ++ z0 = svabd_n_f32_z (p0, z0, -1), ++ z0 = svabd_z (p0, z0, -1)) ++ ++/* ++** abd_m1_f32_z_untied: ++** fmov (z[0-9]+\.s), #-1\.0(?:e\+0)? ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fabd z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** fabd z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (abd_m1_f32_z_untied, svfloat32_t, ++ z0 = svabd_n_f32_z (p0, z1, -1), ++ z0 = svabd_z (p0, z1, -1)) ++ ++/* ++** abd_m0p5_f32_z_tied1: ++** fmov (z[0-9]+\.s), #-(?:0\.5|5\.0e-1) ++** movprfx z0\.s, p0/z, z0\.s ++** fabd z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_m0p5_f32_z_tied1, svfloat32_t, ++ z0 = svabd_n_f32_z (p0, z0, -0.5), ++ z0 = svabd_z (p0, z0, -0.5)) ++ ++/* ++** abd_m0p5_f32_z_untied: ++** fmov (z[0-9]+\.s), #-(?:0\.5|5\.0e-1) ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fabd z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** fabd z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (abd_m0p5_f32_z_untied, svfloat32_t, ++ z0 = svabd_n_f32_z (p0, z1, -0.5), ++ z0 = svabd_z (p0, z1, -0.5)) ++ ++/* ++** abd_m2_f32_z: ++** fmov (z[0-9]+\.s), #-2\.0(?:e\+0)? ++** movprfx z0\.s, p0/z, z0\.s ++** fabd z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_m2_f32_z, svfloat32_t, ++ z0 = svabd_n_f32_z (p0, z0, -2), ++ z0 = svabd_z (p0, z0, -2)) ++ ++/* ++** abd_f32_x_tied1: ++** fabd z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (abd_f32_x_tied1, svfloat32_t, ++ z0 = svabd_f32_x (p0, z0, z1), ++ z0 = svabd_x (p0, z0, z1)) ++ ++/* ++** abd_f32_x_tied2: ++** fabd z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (abd_f32_x_tied2, svfloat32_t, ++ z0 = svabd_f32_x (p0, z1, z0), ++ z0 = svabd_x (p0, z1, z0)) ++ ++/* ++** abd_f32_x_untied: ++** ( ++** movprfx z0, z1 ++** fabd z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0, z2 ++** fabd z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (abd_f32_x_untied, svfloat32_t, ++ z0 = svabd_f32_x (p0, z1, z2), ++ z0 = svabd_x (p0, z1, z2)) ++ ++/* ++** abd_s4_f32_x_tied1: ++** mov (z[0-9]+\.s), s4 ++** fabd z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (abd_s4_f32_x_tied1, svfloat32_t, float, ++ z0 = svabd_n_f32_x (p0, z0, d4), ++ z0 = svabd_x (p0, z0, d4)) ++ ++/* ++** abd_s4_f32_x_untied: ++** mov z0\.s, s4 ++** fabd z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZD (abd_s4_f32_x_untied, svfloat32_t, float, ++ z0 = svabd_n_f32_x (p0, z1, d4), ++ z0 = svabd_x (p0, z1, d4)) ++ ++/* ++** abd_1_f32_x_tied1: ++** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? ++** fabd z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_f32_x_tied1, svfloat32_t, ++ z0 = svabd_n_f32_x (p0, z0, 1), ++ z0 = svabd_x (p0, z0, 1)) ++ ++/* ++** abd_1_f32_x_untied: ++** fmov z0\.s, #1\.0(?:e\+0)? ++** fabd z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_f32_x_untied, svfloat32_t, ++ z0 = svabd_n_f32_x (p0, z1, 1), ++ z0 = svabd_x (p0, z1, 1)) ++ ++/* ++** abd_0p5_f32_x_tied1: ++** fmov (z[0-9]+\.s), #(?:0\.5|5\.0e-1) ++** fabd z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_0p5_f32_x_tied1, svfloat32_t, ++ z0 = svabd_n_f32_x (p0, z0, 0.5), ++ z0 = svabd_x (p0, z0, 0.5)) ++ ++/* ++** abd_0p5_f32_x_untied: ++** fmov z0\.s, #(?:0\.5|5\.0e-1) ++** fabd z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (abd_0p5_f32_x_untied, svfloat32_t, ++ z0 = svabd_n_f32_x (p0, z1, 0.5), ++ z0 = svabd_x (p0, z1, 0.5)) ++ ++/* ++** abd_m1_f32_x_tied1: ++** fmov (z[0-9]+\.s), #-1\.0(?:e\+0)? ++** fabd z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_m1_f32_x_tied1, svfloat32_t, ++ z0 = svabd_n_f32_x (p0, z0, -1), ++ z0 = svabd_x (p0, z0, -1)) ++ ++/* ++** abd_m1_f32_x_untied: ++** fmov z0\.s, #-1\.0(?:e\+0)? ++** fabd z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (abd_m1_f32_x_untied, svfloat32_t, ++ z0 = svabd_n_f32_x (p0, z1, -1), ++ z0 = svabd_x (p0, z1, -1)) ++ ++/* ++** abd_m0p5_f32_x_tied1: ++** fmov (z[0-9]+\.s), #-(?:0\.5|5\.0e-1) ++** fabd z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_m0p5_f32_x_tied1, svfloat32_t, ++ z0 = svabd_n_f32_x (p0, z0, -0.5), ++ z0 = svabd_x (p0, z0, -0.5)) ++ ++/* ++** abd_m0p5_f32_x_untied: ++** fmov z0\.s, #-(?:0\.5|5\.0e-1) ++** fabd z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (abd_m0p5_f32_x_untied, svfloat32_t, ++ z0 = svabd_n_f32_x (p0, z1, -0.5), ++ z0 = svabd_x (p0, z1, -0.5)) ++ ++/* ++** abd_2_f32_x_tied1: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** fabd z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_2_f32_x_tied1, svfloat32_t, ++ z0 = svabd_n_f32_x (p0, z0, 2), ++ z0 = svabd_x (p0, z0, 2)) ++ ++/* ++** abd_2_f32_x_untied: ++** fmov z0\.s, #2\.0(?:e\+0)? ++** fabd z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (abd_2_f32_x_untied, svfloat32_t, ++ z0 = svabd_n_f32_x (p0, z1, 2), ++ z0 = svabd_x (p0, z1, 2)) ++ ++/* ++** ptrue_abd_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_abd_f32_x_tied1, svfloat32_t, ++ z0 = svabd_f32_x (svptrue_b32 (), z0, z1), ++ z0 = svabd_x (svptrue_b32 (), z0, z1)) ++ ++/* ++** ptrue_abd_f32_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_abd_f32_x_tied2, svfloat32_t, ++ z0 = svabd_f32_x (svptrue_b32 (), z1, z0), ++ z0 = svabd_x (svptrue_b32 (), z1, z0)) ++ ++/* ++** ptrue_abd_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_abd_f32_x_untied, svfloat32_t, ++ z0 = svabd_f32_x (svptrue_b32 (), z1, z2), ++ z0 = svabd_x (svptrue_b32 (), z1, z2)) ++ ++/* ++** ptrue_abd_1_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_abd_1_f32_x_tied1, svfloat32_t, ++ z0 = svabd_n_f32_x (svptrue_b32 (), z0, 1), ++ z0 = svabd_x (svptrue_b32 (), z0, 1)) ++ ++/* ++** ptrue_abd_1_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_abd_1_f32_x_untied, svfloat32_t, ++ z0 = svabd_n_f32_x (svptrue_b32 (), z1, 1), ++ z0 = svabd_x (svptrue_b32 (), z1, 1)) ++ ++/* ++** ptrue_abd_0p5_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_abd_0p5_f32_x_tied1, svfloat32_t, ++ z0 = svabd_n_f32_x (svptrue_b32 (), z0, 0.5), ++ z0 = svabd_x (svptrue_b32 (), z0, 0.5)) ++ ++/* ++** ptrue_abd_0p5_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_abd_0p5_f32_x_untied, svfloat32_t, ++ z0 = svabd_n_f32_x (svptrue_b32 (), z1, 0.5), ++ z0 = svabd_x (svptrue_b32 (), z1, 0.5)) ++ ++/* ++** ptrue_abd_m1_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_abd_m1_f32_x_tied1, svfloat32_t, ++ z0 = svabd_n_f32_x (svptrue_b32 (), z0, -1), ++ z0 = svabd_x (svptrue_b32 (), z0, -1)) ++ ++/* ++** ptrue_abd_m1_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_abd_m1_f32_x_untied, svfloat32_t, ++ z0 = svabd_n_f32_x (svptrue_b32 (), z1, -1), ++ z0 = svabd_x (svptrue_b32 (), z1, -1)) ++ ++/* ++** ptrue_abd_m0p5_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_abd_m0p5_f32_x_tied1, svfloat32_t, ++ z0 = svabd_n_f32_x (svptrue_b32 (), z0, -0.5), ++ z0 = svabd_x (svptrue_b32 (), z0, -0.5)) ++ ++/* ++** ptrue_abd_m0p5_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_abd_m0p5_f32_x_untied, svfloat32_t, ++ z0 = svabd_n_f32_x (svptrue_b32 (), z1, -0.5), ++ z0 = svabd_x (svptrue_b32 (), z1, -0.5)) ++ ++/* ++** ptrue_abd_2_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_abd_2_f32_x_tied1, svfloat32_t, ++ z0 = svabd_n_f32_x (svptrue_b32 (), z0, 2), ++ z0 = svabd_x (svptrue_b32 (), z0, 2)) ++ ++/* ++** ptrue_abd_2_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_abd_2_f32_x_untied, svfloat32_t, ++ z0 = svabd_n_f32_x (svptrue_b32 (), z1, 2), ++ z0 = svabd_x (svptrue_b32 (), z1, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_f64.c +new file mode 100644 +index 000000000..c1e5f14e6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_f64.c +@@ -0,0 +1,552 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** abd_f64_m_tied1: ++** fabd z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (abd_f64_m_tied1, svfloat64_t, ++ z0 = svabd_f64_m (p0, z0, z1), ++ z0 = svabd_m (p0, z0, z1)) ++ ++/* ++** abd_f64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fabd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_f64_m_tied2, svfloat64_t, ++ z0 = svabd_f64_m (p0, z1, z0), ++ z0 = svabd_m (p0, z1, z0)) ++ ++/* ++** abd_f64_m_untied: ++** movprfx z0, z1 ++** fabd z0\.d, p0/m, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (abd_f64_m_untied, svfloat64_t, ++ z0 = svabd_f64_m (p0, z1, z2), ++ z0 = svabd_m (p0, z1, z2)) ++ ++/* ++** abd_d4_f64_m_tied1: ++** mov (z[0-9]+\.d), d4 ++** fabd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (abd_d4_f64_m_tied1, svfloat64_t, double, ++ z0 = svabd_n_f64_m (p0, z0, d4), ++ z0 = svabd_m (p0, z0, d4)) ++ ++/* ++** abd_d4_f64_m_untied: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0, z1 ++** fabd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (abd_d4_f64_m_untied, svfloat64_t, double, ++ z0 = svabd_n_f64_m (p0, z1, d4), ++ z0 = svabd_m (p0, z1, d4)) ++ ++/* ++** abd_1_f64_m_tied1: ++** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? ++** fabd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_f64_m_tied1, svfloat64_t, ++ z0 = svabd_n_f64_m (p0, z0, 1), ++ z0 = svabd_m (p0, z0, 1)) ++ ++/* ++** abd_1_f64_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? ++** movprfx z0, z1 ++** fabd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_f64_m_untied, svfloat64_t, ++ z0 = svabd_n_f64_m (p0, z1, 1), ++ z0 = svabd_m (p0, z1, 1)) ++ ++/* ++** abd_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fabd z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (abd_f64_z_tied1, svfloat64_t, ++ z0 = svabd_f64_z (p0, z0, z1), ++ z0 = svabd_z (p0, z0, z1)) ++ ++/* ++** abd_f64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** fabd z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (abd_f64_z_tied2, svfloat64_t, ++ z0 = svabd_f64_z (p0, z1, z0), ++ z0 = svabd_z (p0, z1, z0)) ++ ++/* ++** abd_f64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fabd z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** fabd z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (abd_f64_z_untied, svfloat64_t, ++ z0 = svabd_f64_z (p0, z1, z2), ++ z0 = svabd_z (p0, z1, z2)) ++ ++/* ++** abd_d4_f64_z_tied1: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0\.d, p0/z, z0\.d ++** fabd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (abd_d4_f64_z_tied1, svfloat64_t, double, ++ z0 = svabd_n_f64_z (p0, z0, d4), ++ z0 = svabd_z (p0, z0, d4)) ++ ++/* ++** abd_d4_f64_z_untied: ++** mov (z[0-9]+\.d), d4 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fabd z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** fabd z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (abd_d4_f64_z_untied, svfloat64_t, double, ++ z0 = svabd_n_f64_z (p0, z1, d4), ++ z0 = svabd_z (p0, z1, d4)) ++ ++/* ++** abd_1_f64_z_tied1: ++** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? ++** movprfx z0\.d, p0/z, z0\.d ++** fabd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_f64_z_tied1, svfloat64_t, ++ z0 = svabd_n_f64_z (p0, z0, 1), ++ z0 = svabd_z (p0, z0, 1)) ++ ++/* ++** abd_1_f64_z_untied: ++** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fabd z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** fabd z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_f64_z_untied, svfloat64_t, ++ z0 = svabd_n_f64_z (p0, z1, 1), ++ z0 = svabd_z (p0, z1, 1)) ++ ++/* ++** abd_0p5_f64_z_tied1: ++** fmov (z[0-9]+\.d), #(?:0\.5|5\.0e-1) ++** movprfx z0\.d, p0/z, z0\.d ++** fabd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_0p5_f64_z_tied1, svfloat64_t, ++ z0 = svabd_n_f64_z (p0, z0, 0.5), ++ z0 = svabd_z (p0, z0, 0.5)) ++ ++/* ++** abd_0p5_f64_z_untied: ++** fmov (z[0-9]+\.d), #(?:0\.5|5\.0e-1) ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fabd z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** fabd z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (abd_0p5_f64_z_untied, svfloat64_t, ++ z0 = svabd_n_f64_z (p0, z1, 0.5), ++ z0 = svabd_z (p0, z1, 0.5)) ++ ++/* ++** abd_m1_f64_z_tied1: ++** fmov (z[0-9]+\.d), #-1\.0(?:e\+0)? ++** movprfx z0\.d, p0/z, z0\.d ++** fabd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_m1_f64_z_tied1, svfloat64_t, ++ z0 = svabd_n_f64_z (p0, z0, -1), ++ z0 = svabd_z (p0, z0, -1)) ++ ++/* ++** abd_m1_f64_z_untied: ++** fmov (z[0-9]+\.d), #-1\.0(?:e\+0)? ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fabd z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** fabd z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (abd_m1_f64_z_untied, svfloat64_t, ++ z0 = svabd_n_f64_z (p0, z1, -1), ++ z0 = svabd_z (p0, z1, -1)) ++ ++/* ++** abd_m0p5_f64_z_tied1: ++** fmov (z[0-9]+\.d), #-(?:0\.5|5\.0e-1) ++** movprfx z0\.d, p0/z, z0\.d ++** fabd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_m0p5_f64_z_tied1, svfloat64_t, ++ z0 = svabd_n_f64_z (p0, z0, -0.5), ++ z0 = svabd_z (p0, z0, -0.5)) ++ ++/* ++** abd_m0p5_f64_z_untied: ++** fmov (z[0-9]+\.d), #-(?:0\.5|5\.0e-1) ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fabd z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** fabd z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (abd_m0p5_f64_z_untied, svfloat64_t, ++ z0 = svabd_n_f64_z (p0, z1, -0.5), ++ z0 = svabd_z (p0, z1, -0.5)) ++ ++/* ++** abd_m2_f64_z: ++** fmov (z[0-9]+\.d), #-2\.0(?:e\+0)? ++** movprfx z0\.d, p0/z, z0\.d ++** fabd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_m2_f64_z, svfloat64_t, ++ z0 = svabd_n_f64_z (p0, z0, -2), ++ z0 = svabd_z (p0, z0, -2)) ++ ++/* ++** abd_f64_x_tied1: ++** fabd z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (abd_f64_x_tied1, svfloat64_t, ++ z0 = svabd_f64_x (p0, z0, z1), ++ z0 = svabd_x (p0, z0, z1)) ++ ++/* ++** abd_f64_x_tied2: ++** fabd z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (abd_f64_x_tied2, svfloat64_t, ++ z0 = svabd_f64_x (p0, z1, z0), ++ z0 = svabd_x (p0, z1, z0)) ++ ++/* ++** abd_f64_x_untied: ++** ( ++** movprfx z0, z1 ++** fabd z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0, z2 ++** fabd z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (abd_f64_x_untied, svfloat64_t, ++ z0 = svabd_f64_x (p0, z1, z2), ++ z0 = svabd_x (p0, z1, z2)) ++ ++/* ++** abd_d4_f64_x_tied1: ++** mov (z[0-9]+\.d), d4 ++** fabd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (abd_d4_f64_x_tied1, svfloat64_t, double, ++ z0 = svabd_n_f64_x (p0, z0, d4), ++ z0 = svabd_x (p0, z0, d4)) ++ ++/* ++** abd_d4_f64_x_untied: ++** mov z0\.d, d4 ++** fabd z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZD (abd_d4_f64_x_untied, svfloat64_t, double, ++ z0 = svabd_n_f64_x (p0, z1, d4), ++ z0 = svabd_x (p0, z1, d4)) ++ ++/* ++** abd_1_f64_x_tied1: ++** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? ++** fabd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_f64_x_tied1, svfloat64_t, ++ z0 = svabd_n_f64_x (p0, z0, 1), ++ z0 = svabd_x (p0, z0, 1)) ++ ++/* ++** abd_1_f64_x_untied: ++** fmov z0\.d, #1\.0(?:e\+0)? ++** fabd z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_f64_x_untied, svfloat64_t, ++ z0 = svabd_n_f64_x (p0, z1, 1), ++ z0 = svabd_x (p0, z1, 1)) ++ ++/* ++** abd_0p5_f64_x_tied1: ++** fmov (z[0-9]+\.d), #(?:0\.5|5\.0e-1) ++** fabd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_0p5_f64_x_tied1, svfloat64_t, ++ z0 = svabd_n_f64_x (p0, z0, 0.5), ++ z0 = svabd_x (p0, z0, 0.5)) ++ ++/* ++** abd_0p5_f64_x_untied: ++** fmov z0\.d, #(?:0\.5|5\.0e-1) ++** fabd z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (abd_0p5_f64_x_untied, svfloat64_t, ++ z0 = svabd_n_f64_x (p0, z1, 0.5), ++ z0 = svabd_x (p0, z1, 0.5)) ++ ++/* ++** abd_m1_f64_x_tied1: ++** fmov (z[0-9]+\.d), #-1\.0(?:e\+0)? ++** fabd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_m1_f64_x_tied1, svfloat64_t, ++ z0 = svabd_n_f64_x (p0, z0, -1), ++ z0 = svabd_x (p0, z0, -1)) ++ ++/* ++** abd_m1_f64_x_untied: ++** fmov z0\.d, #-1\.0(?:e\+0)? ++** fabd z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (abd_m1_f64_x_untied, svfloat64_t, ++ z0 = svabd_n_f64_x (p0, z1, -1), ++ z0 = svabd_x (p0, z1, -1)) ++ ++/* ++** abd_m0p5_f64_x_tied1: ++** fmov (z[0-9]+\.d), #-(?:0\.5|5\.0e-1) ++** fabd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_m0p5_f64_x_tied1, svfloat64_t, ++ z0 = svabd_n_f64_x (p0, z0, -0.5), ++ z0 = svabd_x (p0, z0, -0.5)) ++ ++/* ++** abd_m0p5_f64_x_untied: ++** fmov z0\.d, #-(?:0\.5|5\.0e-1) ++** fabd z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (abd_m0p5_f64_x_untied, svfloat64_t, ++ z0 = svabd_n_f64_x (p0, z1, -0.5), ++ z0 = svabd_x (p0, z1, -0.5)) ++ ++/* ++** abd_2_f64_x_tied1: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** fabd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_2_f64_x_tied1, svfloat64_t, ++ z0 = svabd_n_f64_x (p0, z0, 2), ++ z0 = svabd_x (p0, z0, 2)) ++ ++/* ++** abd_2_f64_x_untied: ++** fmov z0\.d, #2\.0(?:e\+0)? ++** fabd z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (abd_2_f64_x_untied, svfloat64_t, ++ z0 = svabd_n_f64_x (p0, z1, 2), ++ z0 = svabd_x (p0, z1, 2)) ++ ++/* ++** ptrue_abd_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_abd_f64_x_tied1, svfloat64_t, ++ z0 = svabd_f64_x (svptrue_b64 (), z0, z1), ++ z0 = svabd_x (svptrue_b64 (), z0, z1)) ++ ++/* ++** ptrue_abd_f64_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_abd_f64_x_tied2, svfloat64_t, ++ z0 = svabd_f64_x (svptrue_b64 (), z1, z0), ++ z0 = svabd_x (svptrue_b64 (), z1, z0)) ++ ++/* ++** ptrue_abd_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_abd_f64_x_untied, svfloat64_t, ++ z0 = svabd_f64_x (svptrue_b64 (), z1, z2), ++ z0 = svabd_x (svptrue_b64 (), z1, z2)) ++ ++/* ++** ptrue_abd_1_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_abd_1_f64_x_tied1, svfloat64_t, ++ z0 = svabd_n_f64_x (svptrue_b64 (), z0, 1), ++ z0 = svabd_x (svptrue_b64 (), z0, 1)) ++ ++/* ++** ptrue_abd_1_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_abd_1_f64_x_untied, svfloat64_t, ++ z0 = svabd_n_f64_x (svptrue_b64 (), z1, 1), ++ z0 = svabd_x (svptrue_b64 (), z1, 1)) ++ ++/* ++** ptrue_abd_0p5_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_abd_0p5_f64_x_tied1, svfloat64_t, ++ z0 = svabd_n_f64_x (svptrue_b64 (), z0, 0.5), ++ z0 = svabd_x (svptrue_b64 (), z0, 0.5)) ++ ++/* ++** ptrue_abd_0p5_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_abd_0p5_f64_x_untied, svfloat64_t, ++ z0 = svabd_n_f64_x (svptrue_b64 (), z1, 0.5), ++ z0 = svabd_x (svptrue_b64 (), z1, 0.5)) ++ ++/* ++** ptrue_abd_m1_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_abd_m1_f64_x_tied1, svfloat64_t, ++ z0 = svabd_n_f64_x (svptrue_b64 (), z0, -1), ++ z0 = svabd_x (svptrue_b64 (), z0, -1)) ++ ++/* ++** ptrue_abd_m1_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_abd_m1_f64_x_untied, svfloat64_t, ++ z0 = svabd_n_f64_x (svptrue_b64 (), z1, -1), ++ z0 = svabd_x (svptrue_b64 (), z1, -1)) ++ ++/* ++** ptrue_abd_m0p5_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_abd_m0p5_f64_x_tied1, svfloat64_t, ++ z0 = svabd_n_f64_x (svptrue_b64 (), z0, -0.5), ++ z0 = svabd_x (svptrue_b64 (), z0, -0.5)) ++ ++/* ++** ptrue_abd_m0p5_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_abd_m0p5_f64_x_untied, svfloat64_t, ++ z0 = svabd_n_f64_x (svptrue_b64 (), z1, -0.5), ++ z0 = svabd_x (svptrue_b64 (), z1, -0.5)) ++ ++/* ++** ptrue_abd_2_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_abd_2_f64_x_tied1, svfloat64_t, ++ z0 = svabd_n_f64_x (svptrue_b64 (), z0, 2), ++ z0 = svabd_x (svptrue_b64 (), z0, 2)) ++ ++/* ++** ptrue_abd_2_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_abd_2_f64_x_untied, svfloat64_t, ++ z0 = svabd_n_f64_x (svptrue_b64 (), z1, 2), ++ z0 = svabd_x (svptrue_b64 (), z1, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_s16.c +new file mode 100644 +index 000000000..e2d0c0fb7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_s16.c +@@ -0,0 +1,237 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** abd_s16_m_tied1: ++** sabd z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (abd_s16_m_tied1, svint16_t, ++ z0 = svabd_s16_m (p0, z0, z1), ++ z0 = svabd_m (p0, z0, z1)) ++ ++/* ++** abd_s16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** sabd z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (abd_s16_m_tied2, svint16_t, ++ z0 = svabd_s16_m (p0, z1, z0), ++ z0 = svabd_m (p0, z1, z0)) ++ ++/* ++** abd_s16_m_untied: ++** movprfx z0, z1 ++** sabd z0\.h, p0/m, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (abd_s16_m_untied, svint16_t, ++ z0 = svabd_s16_m (p0, z1, z2), ++ z0 = svabd_m (p0, z1, z2)) ++ ++/* ++** abd_w0_s16_m_tied1: ++** mov (z[0-9]+\.h), w0 ++** sabd z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (abd_w0_s16_m_tied1, svint16_t, int16_t, ++ z0 = svabd_n_s16_m (p0, z0, x0), ++ z0 = svabd_m (p0, z0, x0)) ++ ++/* ++** abd_w0_s16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), w0 ++** movprfx z0, z1 ++** sabd z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (abd_w0_s16_m_untied, svint16_t, int16_t, ++ z0 = svabd_n_s16_m (p0, z1, x0), ++ z0 = svabd_m (p0, z1, x0)) ++ ++/* ++** abd_1_s16_m_tied1: ++** mov (z[0-9]+\.h), #1 ++** sabd z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_s16_m_tied1, svint16_t, ++ z0 = svabd_n_s16_m (p0, z0, 1), ++ z0 = svabd_m (p0, z0, 1)) ++ ++/* ++** abd_1_s16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), #1 ++** movprfx z0, z1 ++** sabd z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_s16_m_untied, svint16_t, ++ z0 = svabd_n_s16_m (p0, z1, 1), ++ z0 = svabd_m (p0, z1, 1)) ++ ++/* ++** abd_s16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** sabd z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (abd_s16_z_tied1, svint16_t, ++ z0 = svabd_s16_z (p0, z0, z1), ++ z0 = svabd_z (p0, z0, z1)) ++ ++/* ++** abd_s16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** sabd z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (abd_s16_z_tied2, svint16_t, ++ z0 = svabd_s16_z (p0, z1, z0), ++ z0 = svabd_z (p0, z1, z0)) ++ ++/* ++** abd_s16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** sabd z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** sabd z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (abd_s16_z_untied, svint16_t, ++ z0 = svabd_s16_z (p0, z1, z2), ++ z0 = svabd_z (p0, z1, z2)) ++ ++/* ++** abd_w0_s16_z_tied1: ++** mov (z[0-9]+\.h), w0 ++** movprfx z0\.h, p0/z, z0\.h ++** sabd z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (abd_w0_s16_z_tied1, svint16_t, int16_t, ++ z0 = svabd_n_s16_z (p0, z0, x0), ++ z0 = svabd_z (p0, z0, x0)) ++ ++/* ++** abd_w0_s16_z_untied: ++** mov (z[0-9]+\.h), w0 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** sabd z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** sabd z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (abd_w0_s16_z_untied, svint16_t, int16_t, ++ z0 = svabd_n_s16_z (p0, z1, x0), ++ z0 = svabd_z (p0, z1, x0)) ++ ++/* ++** abd_1_s16_z_tied1: ++** mov (z[0-9]+\.h), #1 ++** movprfx z0\.h, p0/z, z0\.h ++** sabd z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_s16_z_tied1, svint16_t, ++ z0 = svabd_n_s16_z (p0, z0, 1), ++ z0 = svabd_z (p0, z0, 1)) ++ ++/* ++** abd_1_s16_z_untied: ++** mov (z[0-9]+\.h), #1 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** sabd z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** sabd z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_s16_z_untied, svint16_t, ++ z0 = svabd_n_s16_z (p0, z1, 1), ++ z0 = svabd_z (p0, z1, 1)) ++ ++/* ++** abd_s16_x_tied1: ++** sabd z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (abd_s16_x_tied1, svint16_t, ++ z0 = svabd_s16_x (p0, z0, z1), ++ z0 = svabd_x (p0, z0, z1)) ++ ++/* ++** abd_s16_x_tied2: ++** sabd z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (abd_s16_x_tied2, svint16_t, ++ z0 = svabd_s16_x (p0, z1, z0), ++ z0 = svabd_x (p0, z1, z0)) ++ ++/* ++** abd_s16_x_untied: ++** ( ++** movprfx z0, z1 ++** sabd z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0, z2 ++** sabd z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (abd_s16_x_untied, svint16_t, ++ z0 = svabd_s16_x (p0, z1, z2), ++ z0 = svabd_x (p0, z1, z2)) ++ ++/* ++** abd_w0_s16_x_tied1: ++** mov (z[0-9]+\.h), w0 ++** sabd z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (abd_w0_s16_x_tied1, svint16_t, int16_t, ++ z0 = svabd_n_s16_x (p0, z0, x0), ++ z0 = svabd_x (p0, z0, x0)) ++ ++/* ++** abd_w0_s16_x_untied: ++** mov z0\.h, w0 ++** sabd z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZX (abd_w0_s16_x_untied, svint16_t, int16_t, ++ z0 = svabd_n_s16_x (p0, z1, x0), ++ z0 = svabd_x (p0, z1, x0)) ++ ++/* ++** abd_1_s16_x_tied1: ++** mov (z[0-9]+\.h), #1 ++** sabd z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_s16_x_tied1, svint16_t, ++ z0 = svabd_n_s16_x (p0, z0, 1), ++ z0 = svabd_x (p0, z0, 1)) ++ ++/* ++** abd_1_s16_x_untied: ++** mov z0\.h, #1 ++** sabd z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_s16_x_untied, svint16_t, ++ z0 = svabd_n_s16_x (p0, z1, 1), ++ z0 = svabd_x (p0, z1, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_s32.c +new file mode 100644 +index 000000000..5c95ec04d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_s32.c +@@ -0,0 +1,237 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** abd_s32_m_tied1: ++** sabd z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (abd_s32_m_tied1, svint32_t, ++ z0 = svabd_s32_m (p0, z0, z1), ++ z0 = svabd_m (p0, z0, z1)) ++ ++/* ++** abd_s32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** sabd z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (abd_s32_m_tied2, svint32_t, ++ z0 = svabd_s32_m (p0, z1, z0), ++ z0 = svabd_m (p0, z1, z0)) ++ ++/* ++** abd_s32_m_untied: ++** movprfx z0, z1 ++** sabd z0\.s, p0/m, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (abd_s32_m_untied, svint32_t, ++ z0 = svabd_s32_m (p0, z1, z2), ++ z0 = svabd_m (p0, z1, z2)) ++ ++/* ++** abd_w0_s32_m_tied1: ++** mov (z[0-9]+\.s), w0 ++** sabd z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (abd_w0_s32_m_tied1, svint32_t, int32_t, ++ z0 = svabd_n_s32_m (p0, z0, x0), ++ z0 = svabd_m (p0, z0, x0)) ++ ++/* ++** abd_w0_s32_m_untied: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0, z1 ++** sabd z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (abd_w0_s32_m_untied, svint32_t, int32_t, ++ z0 = svabd_n_s32_m (p0, z1, x0), ++ z0 = svabd_m (p0, z1, x0)) ++ ++/* ++** abd_1_s32_m_tied1: ++** mov (z[0-9]+\.s), #1 ++** sabd z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_s32_m_tied1, svint32_t, ++ z0 = svabd_n_s32_m (p0, z0, 1), ++ z0 = svabd_m (p0, z0, 1)) ++ ++/* ++** abd_1_s32_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.s), #1 ++** movprfx z0, z1 ++** sabd z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_s32_m_untied, svint32_t, ++ z0 = svabd_n_s32_m (p0, z1, 1), ++ z0 = svabd_m (p0, z1, 1)) ++ ++/* ++** abd_s32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** sabd z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (abd_s32_z_tied1, svint32_t, ++ z0 = svabd_s32_z (p0, z0, z1), ++ z0 = svabd_z (p0, z0, z1)) ++ ++/* ++** abd_s32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** sabd z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (abd_s32_z_tied2, svint32_t, ++ z0 = svabd_s32_z (p0, z1, z0), ++ z0 = svabd_z (p0, z1, z0)) ++ ++/* ++** abd_s32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** sabd z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** sabd z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (abd_s32_z_untied, svint32_t, ++ z0 = svabd_s32_z (p0, z1, z2), ++ z0 = svabd_z (p0, z1, z2)) ++ ++/* ++** abd_w0_s32_z_tied1: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0\.s, p0/z, z0\.s ++** sabd z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (abd_w0_s32_z_tied1, svint32_t, int32_t, ++ z0 = svabd_n_s32_z (p0, z0, x0), ++ z0 = svabd_z (p0, z0, x0)) ++ ++/* ++** abd_w0_s32_z_untied: ++** mov (z[0-9]+\.s), w0 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** sabd z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** sabd z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (abd_w0_s32_z_untied, svint32_t, int32_t, ++ z0 = svabd_n_s32_z (p0, z1, x0), ++ z0 = svabd_z (p0, z1, x0)) ++ ++/* ++** abd_1_s32_z_tied1: ++** mov (z[0-9]+\.s), #1 ++** movprfx z0\.s, p0/z, z0\.s ++** sabd z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_s32_z_tied1, svint32_t, ++ z0 = svabd_n_s32_z (p0, z0, 1), ++ z0 = svabd_z (p0, z0, 1)) ++ ++/* ++** abd_1_s32_z_untied: ++** mov (z[0-9]+\.s), #1 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** sabd z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** sabd z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_s32_z_untied, svint32_t, ++ z0 = svabd_n_s32_z (p0, z1, 1), ++ z0 = svabd_z (p0, z1, 1)) ++ ++/* ++** abd_s32_x_tied1: ++** sabd z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (abd_s32_x_tied1, svint32_t, ++ z0 = svabd_s32_x (p0, z0, z1), ++ z0 = svabd_x (p0, z0, z1)) ++ ++/* ++** abd_s32_x_tied2: ++** sabd z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (abd_s32_x_tied2, svint32_t, ++ z0 = svabd_s32_x (p0, z1, z0), ++ z0 = svabd_x (p0, z1, z0)) ++ ++/* ++** abd_s32_x_untied: ++** ( ++** movprfx z0, z1 ++** sabd z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0, z2 ++** sabd z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (abd_s32_x_untied, svint32_t, ++ z0 = svabd_s32_x (p0, z1, z2), ++ z0 = svabd_x (p0, z1, z2)) ++ ++/* ++** abd_w0_s32_x_tied1: ++** mov (z[0-9]+\.s), w0 ++** sabd z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (abd_w0_s32_x_tied1, svint32_t, int32_t, ++ z0 = svabd_n_s32_x (p0, z0, x0), ++ z0 = svabd_x (p0, z0, x0)) ++ ++/* ++** abd_w0_s32_x_untied: ++** mov z0\.s, w0 ++** sabd z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZX (abd_w0_s32_x_untied, svint32_t, int32_t, ++ z0 = svabd_n_s32_x (p0, z1, x0), ++ z0 = svabd_x (p0, z1, x0)) ++ ++/* ++** abd_1_s32_x_tied1: ++** mov (z[0-9]+\.s), #1 ++** sabd z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_s32_x_tied1, svint32_t, ++ z0 = svabd_n_s32_x (p0, z0, 1), ++ z0 = svabd_x (p0, z0, 1)) ++ ++/* ++** abd_1_s32_x_untied: ++** mov z0\.s, #1 ++** sabd z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_s32_x_untied, svint32_t, ++ z0 = svabd_n_s32_x (p0, z1, 1), ++ z0 = svabd_x (p0, z1, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_s64.c +new file mode 100644 +index 000000000..2402ecf29 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_s64.c +@@ -0,0 +1,237 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** abd_s64_m_tied1: ++** sabd z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (abd_s64_m_tied1, svint64_t, ++ z0 = svabd_s64_m (p0, z0, z1), ++ z0 = svabd_m (p0, z0, z1)) ++ ++/* ++** abd_s64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** sabd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_s64_m_tied2, svint64_t, ++ z0 = svabd_s64_m (p0, z1, z0), ++ z0 = svabd_m (p0, z1, z0)) ++ ++/* ++** abd_s64_m_untied: ++** movprfx z0, z1 ++** sabd z0\.d, p0/m, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (abd_s64_m_untied, svint64_t, ++ z0 = svabd_s64_m (p0, z1, z2), ++ z0 = svabd_m (p0, z1, z2)) ++ ++/* ++** abd_x0_s64_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** sabd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (abd_x0_s64_m_tied1, svint64_t, int64_t, ++ z0 = svabd_n_s64_m (p0, z0, x0), ++ z0 = svabd_m (p0, z0, x0)) ++ ++/* ++** abd_x0_s64_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** sabd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (abd_x0_s64_m_untied, svint64_t, int64_t, ++ z0 = svabd_n_s64_m (p0, z1, x0), ++ z0 = svabd_m (p0, z1, x0)) ++ ++/* ++** abd_1_s64_m_tied1: ++** mov (z[0-9]+\.d), #1 ++** sabd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_s64_m_tied1, svint64_t, ++ z0 = svabd_n_s64_m (p0, z0, 1), ++ z0 = svabd_m (p0, z0, 1)) ++ ++/* ++** abd_1_s64_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), #1 ++** movprfx z0, z1 ++** sabd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_s64_m_untied, svint64_t, ++ z0 = svabd_n_s64_m (p0, z1, 1), ++ z0 = svabd_m (p0, z1, 1)) ++ ++/* ++** abd_s64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** sabd z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (abd_s64_z_tied1, svint64_t, ++ z0 = svabd_s64_z (p0, z0, z1), ++ z0 = svabd_z (p0, z0, z1)) ++ ++/* ++** abd_s64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** sabd z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (abd_s64_z_tied2, svint64_t, ++ z0 = svabd_s64_z (p0, z1, z0), ++ z0 = svabd_z (p0, z1, z0)) ++ ++/* ++** abd_s64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** sabd z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** sabd z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (abd_s64_z_untied, svint64_t, ++ z0 = svabd_s64_z (p0, z1, z2), ++ z0 = svabd_z (p0, z1, z2)) ++ ++/* ++** abd_x0_s64_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.d, p0/z, z0\.d ++** sabd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (abd_x0_s64_z_tied1, svint64_t, int64_t, ++ z0 = svabd_n_s64_z (p0, z0, x0), ++ z0 = svabd_z (p0, z0, x0)) ++ ++/* ++** abd_x0_s64_z_untied: ++** mov (z[0-9]+\.d), x0 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** sabd z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** sabd z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (abd_x0_s64_z_untied, svint64_t, int64_t, ++ z0 = svabd_n_s64_z (p0, z1, x0), ++ z0 = svabd_z (p0, z1, x0)) ++ ++/* ++** abd_1_s64_z_tied1: ++** mov (z[0-9]+\.d), #1 ++** movprfx z0\.d, p0/z, z0\.d ++** sabd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_s64_z_tied1, svint64_t, ++ z0 = svabd_n_s64_z (p0, z0, 1), ++ z0 = svabd_z (p0, z0, 1)) ++ ++/* ++** abd_1_s64_z_untied: ++** mov (z[0-9]+\.d), #1 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** sabd z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** sabd z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_s64_z_untied, svint64_t, ++ z0 = svabd_n_s64_z (p0, z1, 1), ++ z0 = svabd_z (p0, z1, 1)) ++ ++/* ++** abd_s64_x_tied1: ++** sabd z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (abd_s64_x_tied1, svint64_t, ++ z0 = svabd_s64_x (p0, z0, z1), ++ z0 = svabd_x (p0, z0, z1)) ++ ++/* ++** abd_s64_x_tied2: ++** sabd z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (abd_s64_x_tied2, svint64_t, ++ z0 = svabd_s64_x (p0, z1, z0), ++ z0 = svabd_x (p0, z1, z0)) ++ ++/* ++** abd_s64_x_untied: ++** ( ++** movprfx z0, z1 ++** sabd z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0, z2 ++** sabd z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (abd_s64_x_untied, svint64_t, ++ z0 = svabd_s64_x (p0, z1, z2), ++ z0 = svabd_x (p0, z1, z2)) ++ ++/* ++** abd_x0_s64_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** sabd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (abd_x0_s64_x_tied1, svint64_t, int64_t, ++ z0 = svabd_n_s64_x (p0, z0, x0), ++ z0 = svabd_x (p0, z0, x0)) ++ ++/* ++** abd_x0_s64_x_untied: ++** mov z0\.d, x0 ++** sabd z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZX (abd_x0_s64_x_untied, svint64_t, int64_t, ++ z0 = svabd_n_s64_x (p0, z1, x0), ++ z0 = svabd_x (p0, z1, x0)) ++ ++/* ++** abd_1_s64_x_tied1: ++** mov (z[0-9]+\.d), #1 ++** sabd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_s64_x_tied1, svint64_t, ++ z0 = svabd_n_s64_x (p0, z0, 1), ++ z0 = svabd_x (p0, z0, 1)) ++ ++/* ++** abd_1_s64_x_untied: ++** mov z0\.d, #1 ++** sabd z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_s64_x_untied, svint64_t, ++ z0 = svabd_n_s64_x (p0, z1, 1), ++ z0 = svabd_x (p0, z1, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_s8.c +new file mode 100644 +index 000000000..49a2cc388 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_s8.c +@@ -0,0 +1,237 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** abd_s8_m_tied1: ++** sabd z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (abd_s8_m_tied1, svint8_t, ++ z0 = svabd_s8_m (p0, z0, z1), ++ z0 = svabd_m (p0, z0, z1)) ++ ++/* ++** abd_s8_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** sabd z0\.b, p0/m, z0\.b, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (abd_s8_m_tied2, svint8_t, ++ z0 = svabd_s8_m (p0, z1, z0), ++ z0 = svabd_m (p0, z1, z0)) ++ ++/* ++** abd_s8_m_untied: ++** movprfx z0, z1 ++** sabd z0\.b, p0/m, z0\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (abd_s8_m_untied, svint8_t, ++ z0 = svabd_s8_m (p0, z1, z2), ++ z0 = svabd_m (p0, z1, z2)) ++ ++/* ++** abd_w0_s8_m_tied1: ++** mov (z[0-9]+\.b), w0 ++** sabd z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (abd_w0_s8_m_tied1, svint8_t, int8_t, ++ z0 = svabd_n_s8_m (p0, z0, x0), ++ z0 = svabd_m (p0, z0, x0)) ++ ++/* ++** abd_w0_s8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), w0 ++** movprfx z0, z1 ++** sabd z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (abd_w0_s8_m_untied, svint8_t, int8_t, ++ z0 = svabd_n_s8_m (p0, z1, x0), ++ z0 = svabd_m (p0, z1, x0)) ++ ++/* ++** abd_1_s8_m_tied1: ++** mov (z[0-9]+\.b), #1 ++** sabd z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_s8_m_tied1, svint8_t, ++ z0 = svabd_n_s8_m (p0, z0, 1), ++ z0 = svabd_m (p0, z0, 1)) ++ ++/* ++** abd_1_s8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), #1 ++** movprfx z0, z1 ++** sabd z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_s8_m_untied, svint8_t, ++ z0 = svabd_n_s8_m (p0, z1, 1), ++ z0 = svabd_m (p0, z1, 1)) ++ ++/* ++** abd_s8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** sabd z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (abd_s8_z_tied1, svint8_t, ++ z0 = svabd_s8_z (p0, z0, z1), ++ z0 = svabd_z (p0, z0, z1)) ++ ++/* ++** abd_s8_z_tied2: ++** movprfx z0\.b, p0/z, z0\.b ++** sabd z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (abd_s8_z_tied2, svint8_t, ++ z0 = svabd_s8_z (p0, z1, z0), ++ z0 = svabd_z (p0, z1, z0)) ++ ++/* ++** abd_s8_z_untied: ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** sabd z0\.b, p0/m, z0\.b, z2\.b ++** | ++** movprfx z0\.b, p0/z, z2\.b ++** sabd z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (abd_s8_z_untied, svint8_t, ++ z0 = svabd_s8_z (p0, z1, z2), ++ z0 = svabd_z (p0, z1, z2)) ++ ++/* ++** abd_w0_s8_z_tied1: ++** mov (z[0-9]+\.b), w0 ++** movprfx z0\.b, p0/z, z0\.b ++** sabd z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (abd_w0_s8_z_tied1, svint8_t, int8_t, ++ z0 = svabd_n_s8_z (p0, z0, x0), ++ z0 = svabd_z (p0, z0, x0)) ++ ++/* ++** abd_w0_s8_z_untied: ++** mov (z[0-9]+\.b), w0 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** sabd z0\.b, p0/m, z0\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** sabd z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (abd_w0_s8_z_untied, svint8_t, int8_t, ++ z0 = svabd_n_s8_z (p0, z1, x0), ++ z0 = svabd_z (p0, z1, x0)) ++ ++/* ++** abd_1_s8_z_tied1: ++** mov (z[0-9]+\.b), #1 ++** movprfx z0\.b, p0/z, z0\.b ++** sabd z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_s8_z_tied1, svint8_t, ++ z0 = svabd_n_s8_z (p0, z0, 1), ++ z0 = svabd_z (p0, z0, 1)) ++ ++/* ++** abd_1_s8_z_untied: ++** mov (z[0-9]+\.b), #1 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** sabd z0\.b, p0/m, z0\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** sabd z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_s8_z_untied, svint8_t, ++ z0 = svabd_n_s8_z (p0, z1, 1), ++ z0 = svabd_z (p0, z1, 1)) ++ ++/* ++** abd_s8_x_tied1: ++** sabd z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (abd_s8_x_tied1, svint8_t, ++ z0 = svabd_s8_x (p0, z0, z1), ++ z0 = svabd_x (p0, z0, z1)) ++ ++/* ++** abd_s8_x_tied2: ++** sabd z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (abd_s8_x_tied2, svint8_t, ++ z0 = svabd_s8_x (p0, z1, z0), ++ z0 = svabd_x (p0, z1, z0)) ++ ++/* ++** abd_s8_x_untied: ++** ( ++** movprfx z0, z1 ++** sabd z0\.b, p0/m, z0\.b, z2\.b ++** | ++** movprfx z0, z2 ++** sabd z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (abd_s8_x_untied, svint8_t, ++ z0 = svabd_s8_x (p0, z1, z2), ++ z0 = svabd_x (p0, z1, z2)) ++ ++/* ++** abd_w0_s8_x_tied1: ++** mov (z[0-9]+\.b), w0 ++** sabd z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (abd_w0_s8_x_tied1, svint8_t, int8_t, ++ z0 = svabd_n_s8_x (p0, z0, x0), ++ z0 = svabd_x (p0, z0, x0)) ++ ++/* ++** abd_w0_s8_x_untied: ++** mov z0\.b, w0 ++** sabd z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_ZX (abd_w0_s8_x_untied, svint8_t, int8_t, ++ z0 = svabd_n_s8_x (p0, z1, x0), ++ z0 = svabd_x (p0, z1, x0)) ++ ++/* ++** abd_1_s8_x_tied1: ++** mov (z[0-9]+\.b), #1 ++** sabd z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_s8_x_tied1, svint8_t, ++ z0 = svabd_n_s8_x (p0, z0, 1), ++ z0 = svabd_x (p0, z0, 1)) ++ ++/* ++** abd_1_s8_x_untied: ++** mov z0\.b, #1 ++** sabd z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_s8_x_untied, svint8_t, ++ z0 = svabd_n_s8_x (p0, z1, 1), ++ z0 = svabd_x (p0, z1, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_u16.c +new file mode 100644 +index 000000000..60aa9429e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_u16.c +@@ -0,0 +1,237 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** abd_u16_m_tied1: ++** uabd z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (abd_u16_m_tied1, svuint16_t, ++ z0 = svabd_u16_m (p0, z0, z1), ++ z0 = svabd_m (p0, z0, z1)) ++ ++/* ++** abd_u16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** uabd z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (abd_u16_m_tied2, svuint16_t, ++ z0 = svabd_u16_m (p0, z1, z0), ++ z0 = svabd_m (p0, z1, z0)) ++ ++/* ++** abd_u16_m_untied: ++** movprfx z0, z1 ++** uabd z0\.h, p0/m, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (abd_u16_m_untied, svuint16_t, ++ z0 = svabd_u16_m (p0, z1, z2), ++ z0 = svabd_m (p0, z1, z2)) ++ ++/* ++** abd_w0_u16_m_tied1: ++** mov (z[0-9]+\.h), w0 ++** uabd z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (abd_w0_u16_m_tied1, svuint16_t, uint16_t, ++ z0 = svabd_n_u16_m (p0, z0, x0), ++ z0 = svabd_m (p0, z0, x0)) ++ ++/* ++** abd_w0_u16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), w0 ++** movprfx z0, z1 ++** uabd z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (abd_w0_u16_m_untied, svuint16_t, uint16_t, ++ z0 = svabd_n_u16_m (p0, z1, x0), ++ z0 = svabd_m (p0, z1, x0)) ++ ++/* ++** abd_1_u16_m_tied1: ++** mov (z[0-9]+\.h), #1 ++** uabd z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_u16_m_tied1, svuint16_t, ++ z0 = svabd_n_u16_m (p0, z0, 1), ++ z0 = svabd_m (p0, z0, 1)) ++ ++/* ++** abd_1_u16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), #1 ++** movprfx z0, z1 ++** uabd z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_u16_m_untied, svuint16_t, ++ z0 = svabd_n_u16_m (p0, z1, 1), ++ z0 = svabd_m (p0, z1, 1)) ++ ++/* ++** abd_u16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** uabd z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (abd_u16_z_tied1, svuint16_t, ++ z0 = svabd_u16_z (p0, z0, z1), ++ z0 = svabd_z (p0, z0, z1)) ++ ++/* ++** abd_u16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** uabd z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (abd_u16_z_tied2, svuint16_t, ++ z0 = svabd_u16_z (p0, z1, z0), ++ z0 = svabd_z (p0, z1, z0)) ++ ++/* ++** abd_u16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** uabd z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** uabd z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (abd_u16_z_untied, svuint16_t, ++ z0 = svabd_u16_z (p0, z1, z2), ++ z0 = svabd_z (p0, z1, z2)) ++ ++/* ++** abd_w0_u16_z_tied1: ++** mov (z[0-9]+\.h), w0 ++** movprfx z0\.h, p0/z, z0\.h ++** uabd z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (abd_w0_u16_z_tied1, svuint16_t, uint16_t, ++ z0 = svabd_n_u16_z (p0, z0, x0), ++ z0 = svabd_z (p0, z0, x0)) ++ ++/* ++** abd_w0_u16_z_untied: ++** mov (z[0-9]+\.h), w0 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** uabd z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** uabd z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (abd_w0_u16_z_untied, svuint16_t, uint16_t, ++ z0 = svabd_n_u16_z (p0, z1, x0), ++ z0 = svabd_z (p0, z1, x0)) ++ ++/* ++** abd_1_u16_z_tied1: ++** mov (z[0-9]+\.h), #1 ++** movprfx z0\.h, p0/z, z0\.h ++** uabd z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_u16_z_tied1, svuint16_t, ++ z0 = svabd_n_u16_z (p0, z0, 1), ++ z0 = svabd_z (p0, z0, 1)) ++ ++/* ++** abd_1_u16_z_untied: ++** mov (z[0-9]+\.h), #1 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** uabd z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** uabd z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_u16_z_untied, svuint16_t, ++ z0 = svabd_n_u16_z (p0, z1, 1), ++ z0 = svabd_z (p0, z1, 1)) ++ ++/* ++** abd_u16_x_tied1: ++** uabd z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (abd_u16_x_tied1, svuint16_t, ++ z0 = svabd_u16_x (p0, z0, z1), ++ z0 = svabd_x (p0, z0, z1)) ++ ++/* ++** abd_u16_x_tied2: ++** uabd z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (abd_u16_x_tied2, svuint16_t, ++ z0 = svabd_u16_x (p0, z1, z0), ++ z0 = svabd_x (p0, z1, z0)) ++ ++/* ++** abd_u16_x_untied: ++** ( ++** movprfx z0, z1 ++** uabd z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0, z2 ++** uabd z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (abd_u16_x_untied, svuint16_t, ++ z0 = svabd_u16_x (p0, z1, z2), ++ z0 = svabd_x (p0, z1, z2)) ++ ++/* ++** abd_w0_u16_x_tied1: ++** mov (z[0-9]+\.h), w0 ++** uabd z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (abd_w0_u16_x_tied1, svuint16_t, uint16_t, ++ z0 = svabd_n_u16_x (p0, z0, x0), ++ z0 = svabd_x (p0, z0, x0)) ++ ++/* ++** abd_w0_u16_x_untied: ++** mov z0\.h, w0 ++** uabd z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZX (abd_w0_u16_x_untied, svuint16_t, uint16_t, ++ z0 = svabd_n_u16_x (p0, z1, x0), ++ z0 = svabd_x (p0, z1, x0)) ++ ++/* ++** abd_1_u16_x_tied1: ++** mov (z[0-9]+\.h), #1 ++** uabd z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_u16_x_tied1, svuint16_t, ++ z0 = svabd_n_u16_x (p0, z0, 1), ++ z0 = svabd_x (p0, z0, 1)) ++ ++/* ++** abd_1_u16_x_untied: ++** mov z0\.h, #1 ++** uabd z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_u16_x_untied, svuint16_t, ++ z0 = svabd_n_u16_x (p0, z1, 1), ++ z0 = svabd_x (p0, z1, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_u32.c +new file mode 100644 +index 000000000..bc2410783 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_u32.c +@@ -0,0 +1,237 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** abd_u32_m_tied1: ++** uabd z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (abd_u32_m_tied1, svuint32_t, ++ z0 = svabd_u32_m (p0, z0, z1), ++ z0 = svabd_m (p0, z0, z1)) ++ ++/* ++** abd_u32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** uabd z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (abd_u32_m_tied2, svuint32_t, ++ z0 = svabd_u32_m (p0, z1, z0), ++ z0 = svabd_m (p0, z1, z0)) ++ ++/* ++** abd_u32_m_untied: ++** movprfx z0, z1 ++** uabd z0\.s, p0/m, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (abd_u32_m_untied, svuint32_t, ++ z0 = svabd_u32_m (p0, z1, z2), ++ z0 = svabd_m (p0, z1, z2)) ++ ++/* ++** abd_w0_u32_m_tied1: ++** mov (z[0-9]+\.s), w0 ++** uabd z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (abd_w0_u32_m_tied1, svuint32_t, uint32_t, ++ z0 = svabd_n_u32_m (p0, z0, x0), ++ z0 = svabd_m (p0, z0, x0)) ++ ++/* ++** abd_w0_u32_m_untied: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0, z1 ++** uabd z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (abd_w0_u32_m_untied, svuint32_t, uint32_t, ++ z0 = svabd_n_u32_m (p0, z1, x0), ++ z0 = svabd_m (p0, z1, x0)) ++ ++/* ++** abd_1_u32_m_tied1: ++** mov (z[0-9]+\.s), #1 ++** uabd z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_u32_m_tied1, svuint32_t, ++ z0 = svabd_n_u32_m (p0, z0, 1), ++ z0 = svabd_m (p0, z0, 1)) ++ ++/* ++** abd_1_u32_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.s), #1 ++** movprfx z0, z1 ++** uabd z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_u32_m_untied, svuint32_t, ++ z0 = svabd_n_u32_m (p0, z1, 1), ++ z0 = svabd_m (p0, z1, 1)) ++ ++/* ++** abd_u32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** uabd z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (abd_u32_z_tied1, svuint32_t, ++ z0 = svabd_u32_z (p0, z0, z1), ++ z0 = svabd_z (p0, z0, z1)) ++ ++/* ++** abd_u32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** uabd z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (abd_u32_z_tied2, svuint32_t, ++ z0 = svabd_u32_z (p0, z1, z0), ++ z0 = svabd_z (p0, z1, z0)) ++ ++/* ++** abd_u32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** uabd z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** uabd z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (abd_u32_z_untied, svuint32_t, ++ z0 = svabd_u32_z (p0, z1, z2), ++ z0 = svabd_z (p0, z1, z2)) ++ ++/* ++** abd_w0_u32_z_tied1: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0\.s, p0/z, z0\.s ++** uabd z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (abd_w0_u32_z_tied1, svuint32_t, uint32_t, ++ z0 = svabd_n_u32_z (p0, z0, x0), ++ z0 = svabd_z (p0, z0, x0)) ++ ++/* ++** abd_w0_u32_z_untied: ++** mov (z[0-9]+\.s), w0 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** uabd z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** uabd z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (abd_w0_u32_z_untied, svuint32_t, uint32_t, ++ z0 = svabd_n_u32_z (p0, z1, x0), ++ z0 = svabd_z (p0, z1, x0)) ++ ++/* ++** abd_1_u32_z_tied1: ++** mov (z[0-9]+\.s), #1 ++** movprfx z0\.s, p0/z, z0\.s ++** uabd z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_u32_z_tied1, svuint32_t, ++ z0 = svabd_n_u32_z (p0, z0, 1), ++ z0 = svabd_z (p0, z0, 1)) ++ ++/* ++** abd_1_u32_z_untied: ++** mov (z[0-9]+\.s), #1 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** uabd z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** uabd z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_u32_z_untied, svuint32_t, ++ z0 = svabd_n_u32_z (p0, z1, 1), ++ z0 = svabd_z (p0, z1, 1)) ++ ++/* ++** abd_u32_x_tied1: ++** uabd z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (abd_u32_x_tied1, svuint32_t, ++ z0 = svabd_u32_x (p0, z0, z1), ++ z0 = svabd_x (p0, z0, z1)) ++ ++/* ++** abd_u32_x_tied2: ++** uabd z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (abd_u32_x_tied2, svuint32_t, ++ z0 = svabd_u32_x (p0, z1, z0), ++ z0 = svabd_x (p0, z1, z0)) ++ ++/* ++** abd_u32_x_untied: ++** ( ++** movprfx z0, z1 ++** uabd z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0, z2 ++** uabd z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (abd_u32_x_untied, svuint32_t, ++ z0 = svabd_u32_x (p0, z1, z2), ++ z0 = svabd_x (p0, z1, z2)) ++ ++/* ++** abd_w0_u32_x_tied1: ++** mov (z[0-9]+\.s), w0 ++** uabd z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (abd_w0_u32_x_tied1, svuint32_t, uint32_t, ++ z0 = svabd_n_u32_x (p0, z0, x0), ++ z0 = svabd_x (p0, z0, x0)) ++ ++/* ++** abd_w0_u32_x_untied: ++** mov z0\.s, w0 ++** uabd z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZX (abd_w0_u32_x_untied, svuint32_t, uint32_t, ++ z0 = svabd_n_u32_x (p0, z1, x0), ++ z0 = svabd_x (p0, z1, x0)) ++ ++/* ++** abd_1_u32_x_tied1: ++** mov (z[0-9]+\.s), #1 ++** uabd z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_u32_x_tied1, svuint32_t, ++ z0 = svabd_n_u32_x (p0, z0, 1), ++ z0 = svabd_x (p0, z0, 1)) ++ ++/* ++** abd_1_u32_x_untied: ++** mov z0\.s, #1 ++** uabd z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_u32_x_untied, svuint32_t, ++ z0 = svabd_n_u32_x (p0, z1, 1), ++ z0 = svabd_x (p0, z1, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_u64.c +new file mode 100644 +index 000000000..d2cdaa06a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_u64.c +@@ -0,0 +1,237 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** abd_u64_m_tied1: ++** uabd z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (abd_u64_m_tied1, svuint64_t, ++ z0 = svabd_u64_m (p0, z0, z1), ++ z0 = svabd_m (p0, z0, z1)) ++ ++/* ++** abd_u64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** uabd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_u64_m_tied2, svuint64_t, ++ z0 = svabd_u64_m (p0, z1, z0), ++ z0 = svabd_m (p0, z1, z0)) ++ ++/* ++** abd_u64_m_untied: ++** movprfx z0, z1 ++** uabd z0\.d, p0/m, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (abd_u64_m_untied, svuint64_t, ++ z0 = svabd_u64_m (p0, z1, z2), ++ z0 = svabd_m (p0, z1, z2)) ++ ++/* ++** abd_x0_u64_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** uabd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (abd_x0_u64_m_tied1, svuint64_t, uint64_t, ++ z0 = svabd_n_u64_m (p0, z0, x0), ++ z0 = svabd_m (p0, z0, x0)) ++ ++/* ++** abd_x0_u64_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** uabd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (abd_x0_u64_m_untied, svuint64_t, uint64_t, ++ z0 = svabd_n_u64_m (p0, z1, x0), ++ z0 = svabd_m (p0, z1, x0)) ++ ++/* ++** abd_1_u64_m_tied1: ++** mov (z[0-9]+\.d), #1 ++** uabd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_u64_m_tied1, svuint64_t, ++ z0 = svabd_n_u64_m (p0, z0, 1), ++ z0 = svabd_m (p0, z0, 1)) ++ ++/* ++** abd_1_u64_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), #1 ++** movprfx z0, z1 ++** uabd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_u64_m_untied, svuint64_t, ++ z0 = svabd_n_u64_m (p0, z1, 1), ++ z0 = svabd_m (p0, z1, 1)) ++ ++/* ++** abd_u64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** uabd z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (abd_u64_z_tied1, svuint64_t, ++ z0 = svabd_u64_z (p0, z0, z1), ++ z0 = svabd_z (p0, z0, z1)) ++ ++/* ++** abd_u64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** uabd z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (abd_u64_z_tied2, svuint64_t, ++ z0 = svabd_u64_z (p0, z1, z0), ++ z0 = svabd_z (p0, z1, z0)) ++ ++/* ++** abd_u64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** uabd z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** uabd z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (abd_u64_z_untied, svuint64_t, ++ z0 = svabd_u64_z (p0, z1, z2), ++ z0 = svabd_z (p0, z1, z2)) ++ ++/* ++** abd_x0_u64_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.d, p0/z, z0\.d ++** uabd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (abd_x0_u64_z_tied1, svuint64_t, uint64_t, ++ z0 = svabd_n_u64_z (p0, z0, x0), ++ z0 = svabd_z (p0, z0, x0)) ++ ++/* ++** abd_x0_u64_z_untied: ++** mov (z[0-9]+\.d), x0 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** uabd z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** uabd z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (abd_x0_u64_z_untied, svuint64_t, uint64_t, ++ z0 = svabd_n_u64_z (p0, z1, x0), ++ z0 = svabd_z (p0, z1, x0)) ++ ++/* ++** abd_1_u64_z_tied1: ++** mov (z[0-9]+\.d), #1 ++** movprfx z0\.d, p0/z, z0\.d ++** uabd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_u64_z_tied1, svuint64_t, ++ z0 = svabd_n_u64_z (p0, z0, 1), ++ z0 = svabd_z (p0, z0, 1)) ++ ++/* ++** abd_1_u64_z_untied: ++** mov (z[0-9]+\.d), #1 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** uabd z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** uabd z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_u64_z_untied, svuint64_t, ++ z0 = svabd_n_u64_z (p0, z1, 1), ++ z0 = svabd_z (p0, z1, 1)) ++ ++/* ++** abd_u64_x_tied1: ++** uabd z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (abd_u64_x_tied1, svuint64_t, ++ z0 = svabd_u64_x (p0, z0, z1), ++ z0 = svabd_x (p0, z0, z1)) ++ ++/* ++** abd_u64_x_tied2: ++** uabd z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (abd_u64_x_tied2, svuint64_t, ++ z0 = svabd_u64_x (p0, z1, z0), ++ z0 = svabd_x (p0, z1, z0)) ++ ++/* ++** abd_u64_x_untied: ++** ( ++** movprfx z0, z1 ++** uabd z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0, z2 ++** uabd z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (abd_u64_x_untied, svuint64_t, ++ z0 = svabd_u64_x (p0, z1, z2), ++ z0 = svabd_x (p0, z1, z2)) ++ ++/* ++** abd_x0_u64_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** uabd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (abd_x0_u64_x_tied1, svuint64_t, uint64_t, ++ z0 = svabd_n_u64_x (p0, z0, x0), ++ z0 = svabd_x (p0, z0, x0)) ++ ++/* ++** abd_x0_u64_x_untied: ++** mov z0\.d, x0 ++** uabd z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZX (abd_x0_u64_x_untied, svuint64_t, uint64_t, ++ z0 = svabd_n_u64_x (p0, z1, x0), ++ z0 = svabd_x (p0, z1, x0)) ++ ++/* ++** abd_1_u64_x_tied1: ++** mov (z[0-9]+\.d), #1 ++** uabd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_u64_x_tied1, svuint64_t, ++ z0 = svabd_n_u64_x (p0, z0, 1), ++ z0 = svabd_x (p0, z0, 1)) ++ ++/* ++** abd_1_u64_x_untied: ++** mov z0\.d, #1 ++** uabd z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_u64_x_untied, svuint64_t, ++ z0 = svabd_n_u64_x (p0, z1, 1), ++ z0 = svabd_x (p0, z1, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_u8.c +new file mode 100644 +index 000000000..454ef153c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_u8.c +@@ -0,0 +1,237 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** abd_u8_m_tied1: ++** uabd z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (abd_u8_m_tied1, svuint8_t, ++ z0 = svabd_u8_m (p0, z0, z1), ++ z0 = svabd_m (p0, z0, z1)) ++ ++/* ++** abd_u8_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** uabd z0\.b, p0/m, z0\.b, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (abd_u8_m_tied2, svuint8_t, ++ z0 = svabd_u8_m (p0, z1, z0), ++ z0 = svabd_m (p0, z1, z0)) ++ ++/* ++** abd_u8_m_untied: ++** movprfx z0, z1 ++** uabd z0\.b, p0/m, z0\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (abd_u8_m_untied, svuint8_t, ++ z0 = svabd_u8_m (p0, z1, z2), ++ z0 = svabd_m (p0, z1, z2)) ++ ++/* ++** abd_w0_u8_m_tied1: ++** mov (z[0-9]+\.b), w0 ++** uabd z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (abd_w0_u8_m_tied1, svuint8_t, uint8_t, ++ z0 = svabd_n_u8_m (p0, z0, x0), ++ z0 = svabd_m (p0, z0, x0)) ++ ++/* ++** abd_w0_u8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), w0 ++** movprfx z0, z1 ++** uabd z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (abd_w0_u8_m_untied, svuint8_t, uint8_t, ++ z0 = svabd_n_u8_m (p0, z1, x0), ++ z0 = svabd_m (p0, z1, x0)) ++ ++/* ++** abd_1_u8_m_tied1: ++** mov (z[0-9]+\.b), #1 ++** uabd z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_u8_m_tied1, svuint8_t, ++ z0 = svabd_n_u8_m (p0, z0, 1), ++ z0 = svabd_m (p0, z0, 1)) ++ ++/* ++** abd_1_u8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), #1 ++** movprfx z0, z1 ++** uabd z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_u8_m_untied, svuint8_t, ++ z0 = svabd_n_u8_m (p0, z1, 1), ++ z0 = svabd_m (p0, z1, 1)) ++ ++/* ++** abd_u8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** uabd z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (abd_u8_z_tied1, svuint8_t, ++ z0 = svabd_u8_z (p0, z0, z1), ++ z0 = svabd_z (p0, z0, z1)) ++ ++/* ++** abd_u8_z_tied2: ++** movprfx z0\.b, p0/z, z0\.b ++** uabd z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (abd_u8_z_tied2, svuint8_t, ++ z0 = svabd_u8_z (p0, z1, z0), ++ z0 = svabd_z (p0, z1, z0)) ++ ++/* ++** abd_u8_z_untied: ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** uabd z0\.b, p0/m, z0\.b, z2\.b ++** | ++** movprfx z0\.b, p0/z, z2\.b ++** uabd z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (abd_u8_z_untied, svuint8_t, ++ z0 = svabd_u8_z (p0, z1, z2), ++ z0 = svabd_z (p0, z1, z2)) ++ ++/* ++** abd_w0_u8_z_tied1: ++** mov (z[0-9]+\.b), w0 ++** movprfx z0\.b, p0/z, z0\.b ++** uabd z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (abd_w0_u8_z_tied1, svuint8_t, uint8_t, ++ z0 = svabd_n_u8_z (p0, z0, x0), ++ z0 = svabd_z (p0, z0, x0)) ++ ++/* ++** abd_w0_u8_z_untied: ++** mov (z[0-9]+\.b), w0 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** uabd z0\.b, p0/m, z0\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** uabd z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (abd_w0_u8_z_untied, svuint8_t, uint8_t, ++ z0 = svabd_n_u8_z (p0, z1, x0), ++ z0 = svabd_z (p0, z1, x0)) ++ ++/* ++** abd_1_u8_z_tied1: ++** mov (z[0-9]+\.b), #1 ++** movprfx z0\.b, p0/z, z0\.b ++** uabd z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_u8_z_tied1, svuint8_t, ++ z0 = svabd_n_u8_z (p0, z0, 1), ++ z0 = svabd_z (p0, z0, 1)) ++ ++/* ++** abd_1_u8_z_untied: ++** mov (z[0-9]+\.b), #1 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** uabd z0\.b, p0/m, z0\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** uabd z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_u8_z_untied, svuint8_t, ++ z0 = svabd_n_u8_z (p0, z1, 1), ++ z0 = svabd_z (p0, z1, 1)) ++ ++/* ++** abd_u8_x_tied1: ++** uabd z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (abd_u8_x_tied1, svuint8_t, ++ z0 = svabd_u8_x (p0, z0, z1), ++ z0 = svabd_x (p0, z0, z1)) ++ ++/* ++** abd_u8_x_tied2: ++** uabd z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (abd_u8_x_tied2, svuint8_t, ++ z0 = svabd_u8_x (p0, z1, z0), ++ z0 = svabd_x (p0, z1, z0)) ++ ++/* ++** abd_u8_x_untied: ++** ( ++** movprfx z0, z1 ++** uabd z0\.b, p0/m, z0\.b, z2\.b ++** | ++** movprfx z0, z2 ++** uabd z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (abd_u8_x_untied, svuint8_t, ++ z0 = svabd_u8_x (p0, z1, z2), ++ z0 = svabd_x (p0, z1, z2)) ++ ++/* ++** abd_w0_u8_x_tied1: ++** mov (z[0-9]+\.b), w0 ++** uabd z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (abd_w0_u8_x_tied1, svuint8_t, uint8_t, ++ z0 = svabd_n_u8_x (p0, z0, x0), ++ z0 = svabd_x (p0, z0, x0)) ++ ++/* ++** abd_w0_u8_x_untied: ++** mov z0\.b, w0 ++** uabd z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_ZX (abd_w0_u8_x_untied, svuint8_t, uint8_t, ++ z0 = svabd_n_u8_x (p0, z1, x0), ++ z0 = svabd_x (p0, z1, x0)) ++ ++/* ++** abd_1_u8_x_tied1: ++** mov (z[0-9]+\.b), #1 ++** uabd z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_u8_x_tied1, svuint8_t, ++ z0 = svabd_n_u8_x (p0, z0, 1), ++ z0 = svabd_x (p0, z0, 1)) ++ ++/* ++** abd_1_u8_x_untied: ++** mov z0\.b, #1 ++** uabd z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (abd_1_u8_x_untied, svuint8_t, ++ z0 = svabd_n_u8_x (p0, z1, 1), ++ z0 = svabd_x (p0, z1, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abs_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abs_f16.c +new file mode 100644 +index 000000000..2aa8736e6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abs_f16.c +@@ -0,0 +1,103 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** abs_f16_m_tied12: ++** fabs z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (abs_f16_m_tied12, svfloat16_t, ++ z0 = svabs_f16_m (z0, p0, z0), ++ z0 = svabs_m (z0, p0, z0)) ++ ++/* ++** abs_f16_m_tied1: ++** fabs z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (abs_f16_m_tied1, svfloat16_t, ++ z0 = svabs_f16_m (z0, p0, z1), ++ z0 = svabs_m (z0, p0, z1)) ++ ++/* ++** abs_f16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fabs z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (abs_f16_m_tied2, svfloat16_t, ++ z0 = svabs_f16_m (z1, p0, z0), ++ z0 = svabs_m (z1, p0, z0)) ++ ++/* ++** abs_f16_m_untied: ++** movprfx z0, z2 ++** fabs z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (abs_f16_m_untied, svfloat16_t, ++ z0 = svabs_f16_m (z2, p0, z1), ++ z0 = svabs_m (z2, p0, z1)) ++ ++/* ++** abs_f16_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.h, p0/z, \1\.h ++** fabs z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (abs_f16_z_tied1, svfloat16_t, ++ z0 = svabs_f16_z (p0, z0), ++ z0 = svabs_z (p0, z0)) ++ ++/* ++** abs_f16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** fabs z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (abs_f16_z_untied, svfloat16_t, ++ z0 = svabs_f16_z (p0, z1), ++ z0 = svabs_z (p0, z1)) ++ ++/* ++** abs_f16_x_tied1: ++** fabs z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (abs_f16_x_tied1, svfloat16_t, ++ z0 = svabs_f16_x (p0, z0), ++ z0 = svabs_x (p0, z0)) ++ ++/* ++** abs_f16_x_untied: ++** fabs z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (abs_f16_x_untied, svfloat16_t, ++ z0 = svabs_f16_x (p0, z1), ++ z0 = svabs_x (p0, z1)) ++ ++/* ++** ptrue_abs_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_abs_f16_x_tied1, svfloat16_t, ++ z0 = svabs_f16_x (svptrue_b16 (), z0), ++ z0 = svabs_x (svptrue_b16 (), z0)) ++ ++/* ++** ptrue_abs_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_abs_f16_x_untied, svfloat16_t, ++ z0 = svabs_f16_x (svptrue_b16 (), z1), ++ z0 = svabs_x (svptrue_b16 (), z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abs_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abs_f32.c +new file mode 100644 +index 000000000..30286afc7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abs_f32.c +@@ -0,0 +1,103 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** abs_f32_m_tied12: ++** fabs z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (abs_f32_m_tied12, svfloat32_t, ++ z0 = svabs_f32_m (z0, p0, z0), ++ z0 = svabs_m (z0, p0, z0)) ++ ++/* ++** abs_f32_m_tied1: ++** fabs z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (abs_f32_m_tied1, svfloat32_t, ++ z0 = svabs_f32_m (z0, p0, z1), ++ z0 = svabs_m (z0, p0, z1)) ++ ++/* ++** abs_f32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fabs z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (abs_f32_m_tied2, svfloat32_t, ++ z0 = svabs_f32_m (z1, p0, z0), ++ z0 = svabs_m (z1, p0, z0)) ++ ++/* ++** abs_f32_m_untied: ++** movprfx z0, z2 ++** fabs z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (abs_f32_m_untied, svfloat32_t, ++ z0 = svabs_f32_m (z2, p0, z1), ++ z0 = svabs_m (z2, p0, z1)) ++ ++/* ++** abs_f32_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, \1\.s ++** fabs z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (abs_f32_z_tied1, svfloat32_t, ++ z0 = svabs_f32_z (p0, z0), ++ z0 = svabs_z (p0, z0)) ++ ++/* ++** abs_f32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** fabs z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (abs_f32_z_untied, svfloat32_t, ++ z0 = svabs_f32_z (p0, z1), ++ z0 = svabs_z (p0, z1)) ++ ++/* ++** abs_f32_x_tied1: ++** fabs z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (abs_f32_x_tied1, svfloat32_t, ++ z0 = svabs_f32_x (p0, z0), ++ z0 = svabs_x (p0, z0)) ++ ++/* ++** abs_f32_x_untied: ++** fabs z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (abs_f32_x_untied, svfloat32_t, ++ z0 = svabs_f32_x (p0, z1), ++ z0 = svabs_x (p0, z1)) ++ ++/* ++** ptrue_abs_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_abs_f32_x_tied1, svfloat32_t, ++ z0 = svabs_f32_x (svptrue_b32 (), z0), ++ z0 = svabs_x (svptrue_b32 (), z0)) ++ ++/* ++** ptrue_abs_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_abs_f32_x_untied, svfloat32_t, ++ z0 = svabs_f32_x (svptrue_b32 (), z1), ++ z0 = svabs_x (svptrue_b32 (), z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abs_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abs_f64.c +new file mode 100644 +index 000000000..28ef9fbba +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abs_f64.c +@@ -0,0 +1,103 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** abs_f64_m_tied12: ++** fabs z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (abs_f64_m_tied12, svfloat64_t, ++ z0 = svabs_f64_m (z0, p0, z0), ++ z0 = svabs_m (z0, p0, z0)) ++ ++/* ++** abs_f64_m_tied1: ++** fabs z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (abs_f64_m_tied1, svfloat64_t, ++ z0 = svabs_f64_m (z0, p0, z1), ++ z0 = svabs_m (z0, p0, z1)) ++ ++/* ++** abs_f64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fabs z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abs_f64_m_tied2, svfloat64_t, ++ z0 = svabs_f64_m (z1, p0, z0), ++ z0 = svabs_m (z1, p0, z0)) ++ ++/* ++** abs_f64_m_untied: ++** movprfx z0, z2 ++** fabs z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (abs_f64_m_untied, svfloat64_t, ++ z0 = svabs_f64_m (z2, p0, z1), ++ z0 = svabs_m (z2, p0, z1)) ++ ++/* ++** abs_f64_z_tied1: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, \1 ++** fabs z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abs_f64_z_tied1, svfloat64_t, ++ z0 = svabs_f64_z (p0, z0), ++ z0 = svabs_z (p0, z0)) ++ ++/* ++** abs_f64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** fabs z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (abs_f64_z_untied, svfloat64_t, ++ z0 = svabs_f64_z (p0, z1), ++ z0 = svabs_z (p0, z1)) ++ ++/* ++** abs_f64_x_tied1: ++** fabs z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (abs_f64_x_tied1, svfloat64_t, ++ z0 = svabs_f64_x (p0, z0), ++ z0 = svabs_x (p0, z0)) ++ ++/* ++** abs_f64_x_untied: ++** fabs z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (abs_f64_x_untied, svfloat64_t, ++ z0 = svabs_f64_x (p0, z1), ++ z0 = svabs_x (p0, z1)) ++ ++/* ++** ptrue_abs_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_abs_f64_x_tied1, svfloat64_t, ++ z0 = svabs_f64_x (svptrue_b64 (), z0), ++ z0 = svabs_x (svptrue_b64 (), z0)) ++ ++/* ++** ptrue_abs_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_abs_f64_x_untied, svfloat64_t, ++ z0 = svabs_f64_x (svptrue_b64 (), z1), ++ z0 = svabs_x (svptrue_b64 (), z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abs_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abs_s16.c +new file mode 100644 +index 000000000..3b16a9c4f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abs_s16.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** abs_s16_m_tied12: ++** abs z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (abs_s16_m_tied12, svint16_t, ++ z0 = svabs_s16_m (z0, p0, z0), ++ z0 = svabs_m (z0, p0, z0)) ++ ++/* ++** abs_s16_m_tied1: ++** abs z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (abs_s16_m_tied1, svint16_t, ++ z0 = svabs_s16_m (z0, p0, z1), ++ z0 = svabs_m (z0, p0, z1)) ++ ++/* ++** abs_s16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** abs z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (abs_s16_m_tied2, svint16_t, ++ z0 = svabs_s16_m (z1, p0, z0), ++ z0 = svabs_m (z1, p0, z0)) ++ ++/* ++** abs_s16_m_untied: ++** movprfx z0, z2 ++** abs z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (abs_s16_m_untied, svint16_t, ++ z0 = svabs_s16_m (z2, p0, z1), ++ z0 = svabs_m (z2, p0, z1)) ++ ++/* ++** abs_s16_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.h, p0/z, \1\.h ++** abs z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (abs_s16_z_tied1, svint16_t, ++ z0 = svabs_s16_z (p0, z0), ++ z0 = svabs_z (p0, z0)) ++ ++/* ++** abs_s16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** abs z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (abs_s16_z_untied, svint16_t, ++ z0 = svabs_s16_z (p0, z1), ++ z0 = svabs_z (p0, z1)) ++ ++/* ++** abs_s16_x_tied1: ++** abs z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (abs_s16_x_tied1, svint16_t, ++ z0 = svabs_s16_x (p0, z0), ++ z0 = svabs_x (p0, z0)) ++ ++/* ++** abs_s16_x_untied: ++** abs z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (abs_s16_x_untied, svint16_t, ++ z0 = svabs_s16_x (p0, z1), ++ z0 = svabs_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abs_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abs_s32.c +new file mode 100644 +index 000000000..14bcbd50c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abs_s32.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** abs_s32_m_tied12: ++** abs z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (abs_s32_m_tied12, svint32_t, ++ z0 = svabs_s32_m (z0, p0, z0), ++ z0 = svabs_m (z0, p0, z0)) ++ ++/* ++** abs_s32_m_tied1: ++** abs z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (abs_s32_m_tied1, svint32_t, ++ z0 = svabs_s32_m (z0, p0, z1), ++ z0 = svabs_m (z0, p0, z1)) ++ ++/* ++** abs_s32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** abs z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (abs_s32_m_tied2, svint32_t, ++ z0 = svabs_s32_m (z1, p0, z0), ++ z0 = svabs_m (z1, p0, z0)) ++ ++/* ++** abs_s32_m_untied: ++** movprfx z0, z2 ++** abs z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (abs_s32_m_untied, svint32_t, ++ z0 = svabs_s32_m (z2, p0, z1), ++ z0 = svabs_m (z2, p0, z1)) ++ ++/* ++** abs_s32_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, \1\.s ++** abs z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (abs_s32_z_tied1, svint32_t, ++ z0 = svabs_s32_z (p0, z0), ++ z0 = svabs_z (p0, z0)) ++ ++/* ++** abs_s32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** abs z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (abs_s32_z_untied, svint32_t, ++ z0 = svabs_s32_z (p0, z1), ++ z0 = svabs_z (p0, z1)) ++ ++/* ++** abs_s32_x_tied1: ++** abs z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (abs_s32_x_tied1, svint32_t, ++ z0 = svabs_s32_x (p0, z0), ++ z0 = svabs_x (p0, z0)) ++ ++/* ++** abs_s32_x_untied: ++** abs z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (abs_s32_x_untied, svint32_t, ++ z0 = svabs_s32_x (p0, z1), ++ z0 = svabs_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abs_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abs_s64.c +new file mode 100644 +index 000000000..c7b60ff48 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abs_s64.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** abs_s64_m_tied12: ++** abs z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (abs_s64_m_tied12, svint64_t, ++ z0 = svabs_s64_m (z0, p0, z0), ++ z0 = svabs_m (z0, p0, z0)) ++ ++/* ++** abs_s64_m_tied1: ++** abs z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (abs_s64_m_tied1, svint64_t, ++ z0 = svabs_s64_m (z0, p0, z1), ++ z0 = svabs_m (z0, p0, z1)) ++ ++/* ++** abs_s64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** abs z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abs_s64_m_tied2, svint64_t, ++ z0 = svabs_s64_m (z1, p0, z0), ++ z0 = svabs_m (z1, p0, z0)) ++ ++/* ++** abs_s64_m_untied: ++** movprfx z0, z2 ++** abs z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (abs_s64_m_untied, svint64_t, ++ z0 = svabs_s64_m (z2, p0, z1), ++ z0 = svabs_m (z2, p0, z1)) ++ ++/* ++** abs_s64_z_tied1: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, \1 ++** abs z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (abs_s64_z_tied1, svint64_t, ++ z0 = svabs_s64_z (p0, z0), ++ z0 = svabs_z (p0, z0)) ++ ++/* ++** abs_s64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** abs z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (abs_s64_z_untied, svint64_t, ++ z0 = svabs_s64_z (p0, z1), ++ z0 = svabs_z (p0, z1)) ++ ++/* ++** abs_s64_x_tied1: ++** abs z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (abs_s64_x_tied1, svint64_t, ++ z0 = svabs_s64_x (p0, z0), ++ z0 = svabs_x (p0, z0)) ++ ++/* ++** abs_s64_x_untied: ++** abs z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (abs_s64_x_untied, svint64_t, ++ z0 = svabs_s64_x (p0, z1), ++ z0 = svabs_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abs_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abs_s8.c +new file mode 100644 +index 000000000..0bc64c078 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abs_s8.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** abs_s8_m_tied12: ++** abs z0\.b, p0/m, z0\.b ++** ret ++*/ ++TEST_UNIFORM_Z (abs_s8_m_tied12, svint8_t, ++ z0 = svabs_s8_m (z0, p0, z0), ++ z0 = svabs_m (z0, p0, z0)) ++ ++/* ++** abs_s8_m_tied1: ++** abs z0\.b, p0/m, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (abs_s8_m_tied1, svint8_t, ++ z0 = svabs_s8_m (z0, p0, z1), ++ z0 = svabs_m (z0, p0, z1)) ++ ++/* ++** abs_s8_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** abs z0\.b, p0/m, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (abs_s8_m_tied2, svint8_t, ++ z0 = svabs_s8_m (z1, p0, z0), ++ z0 = svabs_m (z1, p0, z0)) ++ ++/* ++** abs_s8_m_untied: ++** movprfx z0, z2 ++** abs z0\.b, p0/m, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (abs_s8_m_untied, svint8_t, ++ z0 = svabs_s8_m (z2, p0, z1), ++ z0 = svabs_m (z2, p0, z1)) ++ ++/* ++** abs_s8_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.b, p0/z, \1\.b ++** abs z0\.b, p0/m, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (abs_s8_z_tied1, svint8_t, ++ z0 = svabs_s8_z (p0, z0), ++ z0 = svabs_z (p0, z0)) ++ ++/* ++** abs_s8_z_untied: ++** movprfx z0\.b, p0/z, z1\.b ++** abs z0\.b, p0/m, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (abs_s8_z_untied, svint8_t, ++ z0 = svabs_s8_z (p0, z1), ++ z0 = svabs_z (p0, z1)) ++ ++/* ++** abs_s8_x_tied1: ++** abs z0\.b, p0/m, z0\.b ++** ret ++*/ ++TEST_UNIFORM_Z (abs_s8_x_tied1, svint8_t, ++ z0 = svabs_s8_x (p0, z0), ++ z0 = svabs_x (p0, z0)) ++ ++/* ++** abs_s8_x_untied: ++** abs z0\.b, p0/m, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (abs_s8_x_untied, svint8_t, ++ z0 = svabs_s8_x (p0, z1), ++ z0 = svabs_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acge_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acge_f16.c +new file mode 100644 +index 000000000..acef17309 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acge_f16.c +@@ -0,0 +1,71 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** acge_f16_tied: ++** ( ++** facge p0\.h, p0/z, z0\.h, z1\.h ++** | ++** facle p0\.h, p0/z, z1\.h, z0\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (acge_f16_tied, svfloat16_t, ++ p0 = svacge_f16 (p0, z0, z1), ++ p0 = svacge (p0, z0, z1)) ++ ++/* ++** acge_f16_untied: ++** ( ++** facge p0\.h, p1/z, z0\.h, z1\.h ++** | ++** facle p0\.h, p1/z, z1\.h, z0\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (acge_f16_untied, svfloat16_t, ++ p0 = svacge_f16 (p1, z0, z1), ++ p0 = svacge (p1, z0, z1)) ++ ++/* ++** acge_h4_f16: ++** mov (z[0-9]+\.h), h4 ++** ( ++** facge p0\.h, p1/z, z0\.h, \1 ++** | ++** facle p0\.h, p1/z, \1, z0\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_ZD (acge_h4_f16, svfloat16_t, float16_t, ++ p0 = svacge_n_f16 (p1, z0, d4), ++ p0 = svacge (p1, z0, d4)) ++ ++/* ++** acge_0_f16: ++** mov (z[0-9]+\.h), #0 ++** ( ++** facge p0\.h, p1/z, z0\.h, \1 ++** | ++** facle p0\.h, p1/z, \1, z0\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (acge_0_f16, svfloat16_t, ++ p0 = svacge_n_f16 (p1, z0, 0), ++ p0 = svacge (p1, z0, 0)) ++ ++/* ++** acge_1_f16: ++** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? ++** ( ++** facge p0\.h, p1/z, z0\.h, \1 ++** | ++** facle p0\.h, p1/z, \1, z0\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (acge_1_f16, svfloat16_t, ++ p0 = svacge_n_f16 (p1, z0, 1), ++ p0 = svacge (p1, z0, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acge_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acge_f32.c +new file mode 100644 +index 000000000..c3d195ab8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acge_f32.c +@@ -0,0 +1,71 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** acge_f32_tied: ++** ( ++** facge p0\.s, p0/z, z0\.s, z1\.s ++** | ++** facle p0\.s, p0/z, z1\.s, z0\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (acge_f32_tied, svfloat32_t, ++ p0 = svacge_f32 (p0, z0, z1), ++ p0 = svacge (p0, z0, z1)) ++ ++/* ++** acge_f32_untied: ++** ( ++** facge p0\.s, p1/z, z0\.s, z1\.s ++** | ++** facle p0\.s, p1/z, z1\.s, z0\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (acge_f32_untied, svfloat32_t, ++ p0 = svacge_f32 (p1, z0, z1), ++ p0 = svacge (p1, z0, z1)) ++ ++/* ++** acge_s4_f32: ++** mov (z[0-9]+\.s), s4 ++** ( ++** facge p0\.s, p1/z, z0\.s, \1 ++** | ++** facle p0\.s, p1/z, \1, z0\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_ZD (acge_s4_f32, svfloat32_t, float32_t, ++ p0 = svacge_n_f32 (p1, z0, d4), ++ p0 = svacge (p1, z0, d4)) ++ ++/* ++** acge_0_f32: ++** mov (z[0-9]+\.s), #0 ++** ( ++** facge p0\.s, p1/z, z0\.s, \1 ++** | ++** facle p0\.s, p1/z, \1, z0\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (acge_0_f32, svfloat32_t, ++ p0 = svacge_n_f32 (p1, z0, 0), ++ p0 = svacge (p1, z0, 0)) ++ ++/* ++** acge_1_f32: ++** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? ++** ( ++** facge p0\.s, p1/z, z0\.s, \1 ++** | ++** facle p0\.s, p1/z, \1, z0\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (acge_1_f32, svfloat32_t, ++ p0 = svacge_n_f32 (p1, z0, 1), ++ p0 = svacge (p1, z0, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acge_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acge_f64.c +new file mode 100644 +index 000000000..207ce93a2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acge_f64.c +@@ -0,0 +1,71 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** acge_f64_tied: ++** ( ++** facge p0\.d, p0/z, z0\.d, z1\.d ++** | ++** facle p0\.d, p0/z, z1\.d, z0\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (acge_f64_tied, svfloat64_t, ++ p0 = svacge_f64 (p0, z0, z1), ++ p0 = svacge (p0, z0, z1)) ++ ++/* ++** acge_f64_untied: ++** ( ++** facge p0\.d, p1/z, z0\.d, z1\.d ++** | ++** facle p0\.d, p1/z, z1\.d, z0\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (acge_f64_untied, svfloat64_t, ++ p0 = svacge_f64 (p1, z0, z1), ++ p0 = svacge (p1, z0, z1)) ++ ++/* ++** acge_d4_f64: ++** mov (z[0-9]+\.d), d4 ++** ( ++** facge p0\.d, p1/z, z0\.d, \1 ++** | ++** facle p0\.d, p1/z, \1, z0\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_ZD (acge_d4_f64, svfloat64_t, float64_t, ++ p0 = svacge_n_f64 (p1, z0, d4), ++ p0 = svacge (p1, z0, d4)) ++ ++/* ++** acge_0_f64: ++** mov (z[0-9]+\.d), #0 ++** ( ++** facge p0\.d, p1/z, z0\.d, \1 ++** | ++** facle p0\.d, p1/z, \1, z0\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (acge_0_f64, svfloat64_t, ++ p0 = svacge_n_f64 (p1, z0, 0), ++ p0 = svacge (p1, z0, 0)) ++ ++/* ++** acge_1_f64: ++** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? ++** ( ++** facge p0\.d, p1/z, z0\.d, \1 ++** | ++** facle p0\.d, p1/z, \1, z0\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (acge_1_f64, svfloat64_t, ++ p0 = svacge_n_f64 (p1, z0, 1), ++ p0 = svacge (p1, z0, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acgt_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acgt_f16.c +new file mode 100644 +index 000000000..53c63351c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acgt_f16.c +@@ -0,0 +1,71 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** acgt_f16_tied: ++** ( ++** facgt p0\.h, p0/z, z0\.h, z1\.h ++** | ++** faclt p0\.h, p0/z, z1\.h, z0\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (acgt_f16_tied, svfloat16_t, ++ p0 = svacgt_f16 (p0, z0, z1), ++ p0 = svacgt (p0, z0, z1)) ++ ++/* ++** acgt_f16_untied: ++** ( ++** facgt p0\.h, p1/z, z0\.h, z1\.h ++** | ++** faclt p0\.h, p1/z, z1\.h, z0\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (acgt_f16_untied, svfloat16_t, ++ p0 = svacgt_f16 (p1, z0, z1), ++ p0 = svacgt (p1, z0, z1)) ++ ++/* ++** acgt_h4_f16: ++** mov (z[0-9]+\.h), h4 ++** ( ++** facgt p0\.h, p1/z, z0\.h, \1 ++** | ++** faclt p0\.h, p1/z, \1, z0\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_ZD (acgt_h4_f16, svfloat16_t, float16_t, ++ p0 = svacgt_n_f16 (p1, z0, d4), ++ p0 = svacgt (p1, z0, d4)) ++ ++/* ++** acgt_0_f16: ++** mov (z[0-9]+\.h), #0 ++** ( ++** facgt p0\.h, p1/z, z0\.h, \1 ++** | ++** faclt p0\.h, p1/z, \1, z0\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (acgt_0_f16, svfloat16_t, ++ p0 = svacgt_n_f16 (p1, z0, 0), ++ p0 = svacgt (p1, z0, 0)) ++ ++/* ++** acgt_1_f16: ++** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? ++** ( ++** facgt p0\.h, p1/z, z0\.h, \1 ++** | ++** faclt p0\.h, p1/z, \1, z0\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (acgt_1_f16, svfloat16_t, ++ p0 = svacgt_n_f16 (p1, z0, 1), ++ p0 = svacgt (p1, z0, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acgt_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acgt_f32.c +new file mode 100644 +index 000000000..d71c84ea6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acgt_f32.c +@@ -0,0 +1,71 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** acgt_f32_tied: ++** ( ++** facgt p0\.s, p0/z, z0\.s, z1\.s ++** | ++** faclt p0\.s, p0/z, z1\.s, z0\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (acgt_f32_tied, svfloat32_t, ++ p0 = svacgt_f32 (p0, z0, z1), ++ p0 = svacgt (p0, z0, z1)) ++ ++/* ++** acgt_f32_untied: ++** ( ++** facgt p0\.s, p1/z, z0\.s, z1\.s ++** | ++** faclt p0\.s, p1/z, z1\.s, z0\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (acgt_f32_untied, svfloat32_t, ++ p0 = svacgt_f32 (p1, z0, z1), ++ p0 = svacgt (p1, z0, z1)) ++ ++/* ++** acgt_s4_f32: ++** mov (z[0-9]+\.s), s4 ++** ( ++** facgt p0\.s, p1/z, z0\.s, \1 ++** | ++** faclt p0\.s, p1/z, \1, z0\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_ZD (acgt_s4_f32, svfloat32_t, float32_t, ++ p0 = svacgt_n_f32 (p1, z0, d4), ++ p0 = svacgt (p1, z0, d4)) ++ ++/* ++** acgt_0_f32: ++** mov (z[0-9]+\.s), #0 ++** ( ++** facgt p0\.s, p1/z, z0\.s, \1 ++** | ++** faclt p0\.s, p1/z, \1, z0\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (acgt_0_f32, svfloat32_t, ++ p0 = svacgt_n_f32 (p1, z0, 0), ++ p0 = svacgt (p1, z0, 0)) ++ ++/* ++** acgt_1_f32: ++** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? ++** ( ++** facgt p0\.s, p1/z, z0\.s, \1 ++** | ++** faclt p0\.s, p1/z, \1, z0\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (acgt_1_f32, svfloat32_t, ++ p0 = svacgt_n_f32 (p1, z0, 1), ++ p0 = svacgt (p1, z0, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acgt_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acgt_f64.c +new file mode 100644 +index 000000000..15d549e18 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acgt_f64.c +@@ -0,0 +1,71 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** acgt_f64_tied: ++** ( ++** facgt p0\.d, p0/z, z0\.d, z1\.d ++** | ++** faclt p0\.d, p0/z, z1\.d, z0\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (acgt_f64_tied, svfloat64_t, ++ p0 = svacgt_f64 (p0, z0, z1), ++ p0 = svacgt (p0, z0, z1)) ++ ++/* ++** acgt_f64_untied: ++** ( ++** facgt p0\.d, p1/z, z0\.d, z1\.d ++** | ++** faclt p0\.d, p1/z, z1\.d, z0\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (acgt_f64_untied, svfloat64_t, ++ p0 = svacgt_f64 (p1, z0, z1), ++ p0 = svacgt (p1, z0, z1)) ++ ++/* ++** acgt_d4_f64: ++** mov (z[0-9]+\.d), d4 ++** ( ++** facgt p0\.d, p1/z, z0\.d, \1 ++** | ++** faclt p0\.d, p1/z, \1, z0\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_ZD (acgt_d4_f64, svfloat64_t, float64_t, ++ p0 = svacgt_n_f64 (p1, z0, d4), ++ p0 = svacgt (p1, z0, d4)) ++ ++/* ++** acgt_0_f64: ++** mov (z[0-9]+\.d), #0 ++** ( ++** facgt p0\.d, p1/z, z0\.d, \1 ++** | ++** faclt p0\.d, p1/z, \1, z0\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (acgt_0_f64, svfloat64_t, ++ p0 = svacgt_n_f64 (p1, z0, 0), ++ p0 = svacgt (p1, z0, 0)) ++ ++/* ++** acgt_1_f64: ++** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? ++** ( ++** facgt p0\.d, p1/z, z0\.d, \1 ++** | ++** faclt p0\.d, p1/z, \1, z0\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (acgt_1_f64, svfloat64_t, ++ p0 = svacgt_n_f64 (p1, z0, 1), ++ p0 = svacgt (p1, z0, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acle_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acle_f16.c +new file mode 100644 +index 000000000..ed6721d57 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acle_f16.c +@@ -0,0 +1,71 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** acle_f16_tied: ++** ( ++** facge p0\.h, p0/z, z1\.h, z0\.h ++** | ++** facle p0\.h, p0/z, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (acle_f16_tied, svfloat16_t, ++ p0 = svacle_f16 (p0, z0, z1), ++ p0 = svacle (p0, z0, z1)) ++ ++/* ++** acle_f16_untied: ++** ( ++** facge p0\.h, p1/z, z1\.h, z0\.h ++** | ++** facle p0\.h, p1/z, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (acle_f16_untied, svfloat16_t, ++ p0 = svacle_f16 (p1, z0, z1), ++ p0 = svacle (p1, z0, z1)) ++ ++/* ++** acle_h4_f16: ++** mov (z[0-9]+\.h), h4 ++** ( ++** facge p0\.h, p1/z, \1, z0\.h ++** | ++** facle p0\.h, p1/z, z0\.h, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_ZD (acle_h4_f16, svfloat16_t, float16_t, ++ p0 = svacle_n_f16 (p1, z0, d4), ++ p0 = svacle (p1, z0, d4)) ++ ++/* ++** acle_0_f16: ++** mov (z[0-9]+\.h), #0 ++** ( ++** facge p0\.h, p1/z, \1, z0\.h ++** | ++** facle p0\.h, p1/z, z0\.h, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (acle_0_f16, svfloat16_t, ++ p0 = svacle_n_f16 (p1, z0, 0), ++ p0 = svacle (p1, z0, 0)) ++ ++/* ++** acle_1_f16: ++** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? ++** ( ++** facge p0\.h, p1/z, \1, z0\.h ++** | ++** facle p0\.h, p1/z, z0\.h, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (acle_1_f16, svfloat16_t, ++ p0 = svacle_n_f16 (p1, z0, 1), ++ p0 = svacle (p1, z0, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acle_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acle_f32.c +new file mode 100644 +index 000000000..7fc9da701 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acle_f32.c +@@ -0,0 +1,71 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** acle_f32_tied: ++** ( ++** facge p0\.s, p0/z, z1\.s, z0\.s ++** | ++** facle p0\.s, p0/z, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (acle_f32_tied, svfloat32_t, ++ p0 = svacle_f32 (p0, z0, z1), ++ p0 = svacle (p0, z0, z1)) ++ ++/* ++** acle_f32_untied: ++** ( ++** facge p0\.s, p1/z, z1\.s, z0\.s ++** | ++** facle p0\.s, p1/z, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (acle_f32_untied, svfloat32_t, ++ p0 = svacle_f32 (p1, z0, z1), ++ p0 = svacle (p1, z0, z1)) ++ ++/* ++** acle_s4_f32: ++** mov (z[0-9]+\.s), s4 ++** ( ++** facge p0\.s, p1/z, \1, z0\.s ++** | ++** facle p0\.s, p1/z, z0\.s, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_ZD (acle_s4_f32, svfloat32_t, float32_t, ++ p0 = svacle_n_f32 (p1, z0, d4), ++ p0 = svacle (p1, z0, d4)) ++ ++/* ++** acle_0_f32: ++** mov (z[0-9]+\.s), #0 ++** ( ++** facge p0\.s, p1/z, \1, z0\.s ++** | ++** facle p0\.s, p1/z, z0\.s, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (acle_0_f32, svfloat32_t, ++ p0 = svacle_n_f32 (p1, z0, 0), ++ p0 = svacle (p1, z0, 0)) ++ ++/* ++** acle_1_f32: ++** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? ++** ( ++** facge p0\.s, p1/z, \1, z0\.s ++** | ++** facle p0\.s, p1/z, z0\.s, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (acle_1_f32, svfloat32_t, ++ p0 = svacle_n_f32 (p1, z0, 1), ++ p0 = svacle (p1, z0, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acle_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acle_f64.c +new file mode 100644 +index 000000000..ecbb8e500 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acle_f64.c +@@ -0,0 +1,71 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** acle_f64_tied: ++** ( ++** facge p0\.d, p0/z, z1\.d, z0\.d ++** | ++** facle p0\.d, p0/z, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (acle_f64_tied, svfloat64_t, ++ p0 = svacle_f64 (p0, z0, z1), ++ p0 = svacle (p0, z0, z1)) ++ ++/* ++** acle_f64_untied: ++** ( ++** facge p0\.d, p1/z, z1\.d, z0\.d ++** | ++** facle p0\.d, p1/z, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (acle_f64_untied, svfloat64_t, ++ p0 = svacle_f64 (p1, z0, z1), ++ p0 = svacle (p1, z0, z1)) ++ ++/* ++** acle_d4_f64: ++** mov (z[0-9]+\.d), d4 ++** ( ++** facge p0\.d, p1/z, \1, z0\.d ++** | ++** facle p0\.d, p1/z, z0\.d, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_ZD (acle_d4_f64, svfloat64_t, float64_t, ++ p0 = svacle_n_f64 (p1, z0, d4), ++ p0 = svacle (p1, z0, d4)) ++ ++/* ++** acle_0_f64: ++** mov (z[0-9]+\.d), #0 ++** ( ++** facge p0\.d, p1/z, \1, z0\.d ++** | ++** facle p0\.d, p1/z, z0\.d, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (acle_0_f64, svfloat64_t, ++ p0 = svacle_n_f64 (p1, z0, 0), ++ p0 = svacle (p1, z0, 0)) ++ ++/* ++** acle_1_f64: ++** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? ++** ( ++** facge p0\.d, p1/z, \1, z0\.d ++** | ++** facle p0\.d, p1/z, z0\.d, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (acle_1_f64, svfloat64_t, ++ p0 = svacle_n_f64 (p1, z0, 1), ++ p0 = svacle (p1, z0, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/aclt_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/aclt_f16.c +new file mode 100644 +index 000000000..e5f5040c7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/aclt_f16.c +@@ -0,0 +1,71 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** aclt_f16_tied: ++** ( ++** facgt p0\.h, p0/z, z1\.h, z0\.h ++** | ++** faclt p0\.h, p0/z, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (aclt_f16_tied, svfloat16_t, ++ p0 = svaclt_f16 (p0, z0, z1), ++ p0 = svaclt (p0, z0, z1)) ++ ++/* ++** aclt_f16_untied: ++** ( ++** facgt p0\.h, p1/z, z1\.h, z0\.h ++** | ++** faclt p0\.h, p1/z, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (aclt_f16_untied, svfloat16_t, ++ p0 = svaclt_f16 (p1, z0, z1), ++ p0 = svaclt (p1, z0, z1)) ++ ++/* ++** aclt_h4_f16: ++** mov (z[0-9]+\.h), h4 ++** ( ++** facgt p0\.h, p1/z, \1, z0\.h ++** | ++** faclt p0\.h, p1/z, z0\.h, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_ZD (aclt_h4_f16, svfloat16_t, float16_t, ++ p0 = svaclt_n_f16 (p1, z0, d4), ++ p0 = svaclt (p1, z0, d4)) ++ ++/* ++** aclt_0_f16: ++** mov (z[0-9]+\.h), #0 ++** ( ++** facgt p0\.h, p1/z, \1, z0\.h ++** | ++** faclt p0\.h, p1/z, z0\.h, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (aclt_0_f16, svfloat16_t, ++ p0 = svaclt_n_f16 (p1, z0, 0), ++ p0 = svaclt (p1, z0, 0)) ++ ++/* ++** aclt_1_f16: ++** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? ++** ( ++** facgt p0\.h, p1/z, \1, z0\.h ++** | ++** faclt p0\.h, p1/z, z0\.h, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (aclt_1_f16, svfloat16_t, ++ p0 = svaclt_n_f16 (p1, z0, 1), ++ p0 = svaclt (p1, z0, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/aclt_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/aclt_f32.c +new file mode 100644 +index 000000000..f40826445 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/aclt_f32.c +@@ -0,0 +1,71 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** aclt_f32_tied: ++** ( ++** facgt p0\.s, p0/z, z1\.s, z0\.s ++** | ++** faclt p0\.s, p0/z, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (aclt_f32_tied, svfloat32_t, ++ p0 = svaclt_f32 (p0, z0, z1), ++ p0 = svaclt (p0, z0, z1)) ++ ++/* ++** aclt_f32_untied: ++** ( ++** facgt p0\.s, p1/z, z1\.s, z0\.s ++** | ++** faclt p0\.s, p1/z, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (aclt_f32_untied, svfloat32_t, ++ p0 = svaclt_f32 (p1, z0, z1), ++ p0 = svaclt (p1, z0, z1)) ++ ++/* ++** aclt_s4_f32: ++** mov (z[0-9]+\.s), s4 ++** ( ++** facgt p0\.s, p1/z, \1, z0\.s ++** | ++** faclt p0\.s, p1/z, z0\.s, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_ZD (aclt_s4_f32, svfloat32_t, float32_t, ++ p0 = svaclt_n_f32 (p1, z0, d4), ++ p0 = svaclt (p1, z0, d4)) ++ ++/* ++** aclt_0_f32: ++** mov (z[0-9]+\.s), #0 ++** ( ++** facgt p0\.s, p1/z, \1, z0\.s ++** | ++** faclt p0\.s, p1/z, z0\.s, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (aclt_0_f32, svfloat32_t, ++ p0 = svaclt_n_f32 (p1, z0, 0), ++ p0 = svaclt (p1, z0, 0)) ++ ++/* ++** aclt_1_f32: ++** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? ++** ( ++** facgt p0\.s, p1/z, \1, z0\.s ++** | ++** faclt p0\.s, p1/z, z0\.s, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (aclt_1_f32, svfloat32_t, ++ p0 = svaclt_n_f32 (p1, z0, 1), ++ p0 = svaclt (p1, z0, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/aclt_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/aclt_f64.c +new file mode 100644 +index 000000000..0170b3307 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/aclt_f64.c +@@ -0,0 +1,71 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** aclt_f64_tied: ++** ( ++** facgt p0\.d, p0/z, z1\.d, z0\.d ++** | ++** faclt p0\.d, p0/z, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (aclt_f64_tied, svfloat64_t, ++ p0 = svaclt_f64 (p0, z0, z1), ++ p0 = svaclt (p0, z0, z1)) ++ ++/* ++** aclt_f64_untied: ++** ( ++** facgt p0\.d, p1/z, z1\.d, z0\.d ++** | ++** faclt p0\.d, p1/z, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (aclt_f64_untied, svfloat64_t, ++ p0 = svaclt_f64 (p1, z0, z1), ++ p0 = svaclt (p1, z0, z1)) ++ ++/* ++** aclt_d4_f64: ++** mov (z[0-9]+\.d), d4 ++** ( ++** facgt p0\.d, p1/z, \1, z0\.d ++** | ++** faclt p0\.d, p1/z, z0\.d, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_ZD (aclt_d4_f64, svfloat64_t, float64_t, ++ p0 = svaclt_n_f64 (p1, z0, d4), ++ p0 = svaclt (p1, z0, d4)) ++ ++/* ++** aclt_0_f64: ++** mov (z[0-9]+\.d), #0 ++** ( ++** facgt p0\.d, p1/z, \1, z0\.d ++** | ++** faclt p0\.d, p1/z, z0\.d, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (aclt_0_f64, svfloat64_t, ++ p0 = svaclt_n_f64 (p1, z0, 0), ++ p0 = svaclt (p1, z0, 0)) ++ ++/* ++** aclt_1_f64: ++** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? ++** ( ++** facgt p0\.d, p1/z, \1, z0\.d ++** | ++** faclt p0\.d, p1/z, z0\.d, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (aclt_1_f64, svfloat64_t, ++ p0 = svaclt_n_f64 (p1, z0, 1), ++ p0 = svaclt (p1, z0, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_f16.c +new file mode 100644 +index 000000000..7228e5dd5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_f16.c +@@ -0,0 +1,577 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** add_f16_m_tied1: ++** fadd z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (add_f16_m_tied1, svfloat16_t, ++ z0 = svadd_f16_m (p0, z0, z1), ++ z0 = svadd_m (p0, z0, z1)) ++ ++/* ++** add_f16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fadd z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (add_f16_m_tied2, svfloat16_t, ++ z0 = svadd_f16_m (p0, z1, z0), ++ z0 = svadd_m (p0, z1, z0)) ++ ++/* ++** add_f16_m_untied: ++** movprfx z0, z1 ++** fadd z0\.h, p0/m, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (add_f16_m_untied, svfloat16_t, ++ z0 = svadd_f16_m (p0, z1, z2), ++ z0 = svadd_m (p0, z1, z2)) ++ ++/* ++** add_h4_f16_m_tied1: ++** mov (z[0-9]+\.h), h4 ++** fadd z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (add_h4_f16_m_tied1, svfloat16_t, __fp16, ++ z0 = svadd_n_f16_m (p0, z0, d4), ++ z0 = svadd_m (p0, z0, d4)) ++ ++/* ++** add_h4_f16_m_untied: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0, z1 ++** fadd z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (add_h4_f16_m_untied, svfloat16_t, __fp16, ++ z0 = svadd_n_f16_m (p0, z1, d4), ++ z0 = svadd_m (p0, z1, d4)) ++ ++/* ++** add_1_f16_m_tied1: ++** fadd z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_f16_m_tied1, svfloat16_t, ++ z0 = svadd_n_f16_m (p0, z0, 1), ++ z0 = svadd_m (p0, z0, 1)) ++ ++/* ++** add_1_f16_m_untied: ++** movprfx z0, z1 ++** fadd z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_f16_m_untied, svfloat16_t, ++ z0 = svadd_n_f16_m (p0, z1, 1), ++ z0 = svadd_m (p0, z1, 1)) ++ ++/* ++** add_0p5_f16_m_tied1: ++** fadd z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_0p5_f16_m_tied1, svfloat16_t, ++ z0 = svadd_n_f16_m (p0, z0, 0.5), ++ z0 = svadd_m (p0, z0, 0.5)) ++ ++/* ++** add_0p5_f16_m_untied: ++** movprfx z0, z1 ++** fadd z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_0p5_f16_m_untied, svfloat16_t, ++ z0 = svadd_n_f16_m (p0, z1, 0.5), ++ z0 = svadd_m (p0, z1, 0.5)) ++ ++/* ++** add_m1_f16_m_tied1: ++** fsub z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m1_f16_m_tied1, svfloat16_t, ++ z0 = svadd_n_f16_m (p0, z0, -1), ++ z0 = svadd_m (p0, z0, -1)) ++ ++/* ++** add_m1_f16_m_untied: ++** movprfx z0, z1 ++** fsub z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m1_f16_m_untied, svfloat16_t, ++ z0 = svadd_n_f16_m (p0, z1, -1), ++ z0 = svadd_m (p0, z1, -1)) ++ ++/* ++** add_m0p5_f16_m_tied1: ++** fsub z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m0p5_f16_m_tied1, svfloat16_t, ++ z0 = svadd_n_f16_m (p0, z0, -0.5), ++ z0 = svadd_m (p0, z0, -0.5)) ++ ++/* ++** add_m0p5_f16_m_untied: ++** movprfx z0, z1 ++** fsub z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m0p5_f16_m_untied, svfloat16_t, ++ z0 = svadd_n_f16_m (p0, z1, -0.5), ++ z0 = svadd_m (p0, z1, -0.5)) ++ ++/* ++** add_m2_f16_m: ++** fmov (z[0-9]+\.h), #-2\.0(?:e\+0)? ++** fadd z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m2_f16_m, svfloat16_t, ++ z0 = svadd_n_f16_m (p0, z0, -2), ++ z0 = svadd_m (p0, z0, -2)) ++ ++/* ++** add_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fadd z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (add_f16_z_tied1, svfloat16_t, ++ z0 = svadd_f16_z (p0, z0, z1), ++ z0 = svadd_z (p0, z0, z1)) ++ ++/* ++** add_f16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** fadd z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (add_f16_z_tied2, svfloat16_t, ++ z0 = svadd_f16_z (p0, z1, z0), ++ z0 = svadd_z (p0, z1, z0)) ++ ++/* ++** add_f16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fadd z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** fadd z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (add_f16_z_untied, svfloat16_t, ++ z0 = svadd_f16_z (p0, z1, z2), ++ z0 = svadd_z (p0, z1, z2)) ++ ++/* ++** add_h4_f16_z_tied1: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0\.h, p0/z, z0\.h ++** fadd z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (add_h4_f16_z_tied1, svfloat16_t, __fp16, ++ z0 = svadd_n_f16_z (p0, z0, d4), ++ z0 = svadd_z (p0, z0, d4)) ++ ++/* ++** add_h4_f16_z_untied: ++** mov (z[0-9]+\.h), h4 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fadd z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** fadd z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (add_h4_f16_z_untied, svfloat16_t, __fp16, ++ z0 = svadd_n_f16_z (p0, z1, d4), ++ z0 = svadd_z (p0, z1, d4)) ++ ++/* ++** add_1_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fadd z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_f16_z_tied1, svfloat16_t, ++ z0 = svadd_n_f16_z (p0, z0, 1), ++ z0 = svadd_z (p0, z0, 1)) ++ ++/* ++** add_1_f16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** fadd z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_f16_z_untied, svfloat16_t, ++ z0 = svadd_n_f16_z (p0, z1, 1), ++ z0 = svadd_z (p0, z1, 1)) ++ ++/* ++** add_0p5_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fadd z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_0p5_f16_z_tied1, svfloat16_t, ++ z0 = svadd_n_f16_z (p0, z0, 0.5), ++ z0 = svadd_z (p0, z0, 0.5)) ++ ++/* ++** add_0p5_f16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** fadd z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_0p5_f16_z_untied, svfloat16_t, ++ z0 = svadd_n_f16_z (p0, z1, 0.5), ++ z0 = svadd_z (p0, z1, 0.5)) ++ ++/* ++** add_m1_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fsub z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m1_f16_z_tied1, svfloat16_t, ++ z0 = svadd_n_f16_z (p0, z0, -1), ++ z0 = svadd_z (p0, z0, -1)) ++ ++/* ++** add_m1_f16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** fsub z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m1_f16_z_untied, svfloat16_t, ++ z0 = svadd_n_f16_z (p0, z1, -1), ++ z0 = svadd_z (p0, z1, -1)) ++ ++/* ++** add_m0p5_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fsub z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m0p5_f16_z_tied1, svfloat16_t, ++ z0 = svadd_n_f16_z (p0, z0, -0.5), ++ z0 = svadd_z (p0, z0, -0.5)) ++ ++/* ++** add_m0p5_f16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** fsub z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m0p5_f16_z_untied, svfloat16_t, ++ z0 = svadd_n_f16_z (p0, z1, -0.5), ++ z0 = svadd_z (p0, z1, -0.5)) ++ ++/* ++** add_m2_f16_z: ++** fmov (z[0-9]+\.h), #-2\.0(?:e\+0)? ++** movprfx z0\.h, p0/z, z0\.h ++** fadd z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m2_f16_z, svfloat16_t, ++ z0 = svadd_n_f16_z (p0, z0, -2), ++ z0 = svadd_z (p0, z0, -2)) ++ ++/* ++** add_f16_x_tied1: ++** fadd z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (add_f16_x_tied1, svfloat16_t, ++ z0 = svadd_f16_x (p0, z0, z1), ++ z0 = svadd_x (p0, z0, z1)) ++ ++/* ++** add_f16_x_tied2: ++** fadd z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (add_f16_x_tied2, svfloat16_t, ++ z0 = svadd_f16_x (p0, z1, z0), ++ z0 = svadd_x (p0, z1, z0)) ++ ++/* ++** add_f16_x_untied: ++** ( ++** movprfx z0, z1 ++** fadd z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0, z2 ++** fadd z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (add_f16_x_untied, svfloat16_t, ++ z0 = svadd_f16_x (p0, z1, z2), ++ z0 = svadd_x (p0, z1, z2)) ++ ++/* ++** add_h4_f16_x_tied1: ++** mov (z[0-9]+\.h), h4 ++** fadd z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (add_h4_f16_x_tied1, svfloat16_t, __fp16, ++ z0 = svadd_n_f16_x (p0, z0, d4), ++ z0 = svadd_x (p0, z0, d4)) ++ ++/* ++** add_h4_f16_x_untied: ++** mov z0\.h, h4 ++** fadd z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZD (add_h4_f16_x_untied, svfloat16_t, __fp16, ++ z0 = svadd_n_f16_x (p0, z1, d4), ++ z0 = svadd_x (p0, z1, d4)) ++ ++/* ++** add_1_f16_x_tied1: ++** fadd z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_f16_x_tied1, svfloat16_t, ++ z0 = svadd_n_f16_x (p0, z0, 1), ++ z0 = svadd_x (p0, z0, 1)) ++ ++/* ++** add_1_f16_x_untied: ++** movprfx z0, z1 ++** fadd z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_f16_x_untied, svfloat16_t, ++ z0 = svadd_n_f16_x (p0, z1, 1), ++ z0 = svadd_x (p0, z1, 1)) ++ ++/* ++** add_0p5_f16_x_tied1: ++** fadd z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_0p5_f16_x_tied1, svfloat16_t, ++ z0 = svadd_n_f16_x (p0, z0, 0.5), ++ z0 = svadd_x (p0, z0, 0.5)) ++ ++/* ++** add_0p5_f16_x_untied: ++** movprfx z0, z1 ++** fadd z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_0p5_f16_x_untied, svfloat16_t, ++ z0 = svadd_n_f16_x (p0, z1, 0.5), ++ z0 = svadd_x (p0, z1, 0.5)) ++ ++/* ++** add_m1_f16_x_tied1: ++** fsub z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m1_f16_x_tied1, svfloat16_t, ++ z0 = svadd_n_f16_x (p0, z0, -1), ++ z0 = svadd_x (p0, z0, -1)) ++ ++/* ++** add_m1_f16_x_untied: ++** movprfx z0, z1 ++** fsub z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m1_f16_x_untied, svfloat16_t, ++ z0 = svadd_n_f16_x (p0, z1, -1), ++ z0 = svadd_x (p0, z1, -1)) ++ ++/* ++** add_m0p5_f16_x_tied1: ++** fsub z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m0p5_f16_x_tied1, svfloat16_t, ++ z0 = svadd_n_f16_x (p0, z0, -0.5), ++ z0 = svadd_x (p0, z0, -0.5)) ++ ++/* ++** add_m0p5_f16_x_untied: ++** movprfx z0, z1 ++** fsub z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m0p5_f16_x_untied, svfloat16_t, ++ z0 = svadd_n_f16_x (p0, z1, -0.5), ++ z0 = svadd_x (p0, z1, -0.5)) ++ ++/* ++** add_2_f16_x_tied1: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** fadd z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_2_f16_x_tied1, svfloat16_t, ++ z0 = svadd_n_f16_x (p0, z0, 2), ++ z0 = svadd_x (p0, z0, 2)) ++ ++/* ++** add_2_f16_x_untied: ++** fmov z0\.h, #2\.0(?:e\+0)? ++** fadd z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (add_2_f16_x_untied, svfloat16_t, ++ z0 = svadd_n_f16_x (p0, z1, 2), ++ z0 = svadd_x (p0, z1, 2)) ++ ++/* ++** ptrue_add_f16_x_tied1: ++** fadd z0\.h, (z0\.h, z1\.h|z1\.h, z0\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_f16_x_tied1, svfloat16_t, ++ z0 = svadd_f16_x (svptrue_b16 (), z0, z1), ++ z0 = svadd_x (svptrue_b16 (), z0, z1)) ++ ++/* ++** ptrue_add_f16_x_tied2: ++** fadd z0\.h, (z0\.h, z1\.h|z1\.h, z0\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_f16_x_tied2, svfloat16_t, ++ z0 = svadd_f16_x (svptrue_b16 (), z1, z0), ++ z0 = svadd_x (svptrue_b16 (), z1, z0)) ++ ++/* ++** ptrue_add_f16_x_untied: ++** fadd z0\.h, (z1\.h, z2\.h|z2\.h, z1\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_f16_x_untied, svfloat16_t, ++ z0 = svadd_f16_x (svptrue_b16 (), z1, z2), ++ z0 = svadd_x (svptrue_b16 (), z1, z2)) ++ ++/* ++** ptrue_add_1_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_1_f16_x_tied1, svfloat16_t, ++ z0 = svadd_n_f16_x (svptrue_b16 (), z0, 1), ++ z0 = svadd_x (svptrue_b16 (), z0, 1)) ++ ++/* ++** ptrue_add_1_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_1_f16_x_untied, svfloat16_t, ++ z0 = svadd_n_f16_x (svptrue_b16 (), z1, 1), ++ z0 = svadd_x (svptrue_b16 (), z1, 1)) ++ ++/* ++** ptrue_add_0p5_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_0p5_f16_x_tied1, svfloat16_t, ++ z0 = svadd_n_f16_x (svptrue_b16 (), z0, 0.5), ++ z0 = svadd_x (svptrue_b16 (), z0, 0.5)) ++ ++/* ++** ptrue_add_0p5_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_0p5_f16_x_untied, svfloat16_t, ++ z0 = svadd_n_f16_x (svptrue_b16 (), z1, 0.5), ++ z0 = svadd_x (svptrue_b16 (), z1, 0.5)) ++ ++/* ++** ptrue_add_m1_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_m1_f16_x_tied1, svfloat16_t, ++ z0 = svadd_n_f16_x (svptrue_b16 (), z0, -1), ++ z0 = svadd_x (svptrue_b16 (), z0, -1)) ++ ++/* ++** ptrue_add_m1_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_m1_f16_x_untied, svfloat16_t, ++ z0 = svadd_n_f16_x (svptrue_b16 (), z1, -1), ++ z0 = svadd_x (svptrue_b16 (), z1, -1)) ++ ++/* ++** ptrue_add_m0p5_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_m0p5_f16_x_tied1, svfloat16_t, ++ z0 = svadd_n_f16_x (svptrue_b16 (), z0, -0.5), ++ z0 = svadd_x (svptrue_b16 (), z0, -0.5)) ++ ++/* ++** ptrue_add_m0p5_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_m0p5_f16_x_untied, svfloat16_t, ++ z0 = svadd_n_f16_x (svptrue_b16 (), z1, -0.5), ++ z0 = svadd_x (svptrue_b16 (), z1, -0.5)) ++ ++/* ++** ptrue_add_2_f16_x_tied1: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** fadd z0\.h, (z0\.h, \1|\1, z0\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_2_f16_x_tied1, svfloat16_t, ++ z0 = svadd_n_f16_x (svptrue_b16 (), z0, 2), ++ z0 = svadd_x (svptrue_b16 (), z0, 2)) ++ ++/* ++** ptrue_add_2_f16_x_untied: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** fadd z0\.h, (z1\.h, \1|\1, z1\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_2_f16_x_untied, svfloat16_t, ++ z0 = svadd_n_f16_x (svptrue_b16 (), z1, 2), ++ z0 = svadd_x (svptrue_b16 (), z1, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_f16_notrap.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_f16_notrap.c +new file mode 100644 +index 000000000..f6330acee +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_f16_notrap.c +@@ -0,0 +1,572 @@ ++/* { dg-additional-options "-fno-trapping-math" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** add_f16_m_tied1: ++** fadd z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (add_f16_m_tied1, svfloat16_t, ++ z0 = svadd_f16_m (p0, z0, z1), ++ z0 = svadd_m (p0, z0, z1)) ++ ++/* ++** add_f16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fadd z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (add_f16_m_tied2, svfloat16_t, ++ z0 = svadd_f16_m (p0, z1, z0), ++ z0 = svadd_m (p0, z1, z0)) ++ ++/* ++** add_f16_m_untied: ++** movprfx z0, z1 ++** fadd z0\.h, p0/m, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (add_f16_m_untied, svfloat16_t, ++ z0 = svadd_f16_m (p0, z1, z2), ++ z0 = svadd_m (p0, z1, z2)) ++ ++/* ++** add_h4_f16_m_tied1: ++** mov (z[0-9]+\.h), h4 ++** fadd z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (add_h4_f16_m_tied1, svfloat16_t, __fp16, ++ z0 = svadd_n_f16_m (p0, z0, d4), ++ z0 = svadd_m (p0, z0, d4)) ++ ++/* ++** add_h4_f16_m_untied: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0, z1 ++** fadd z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (add_h4_f16_m_untied, svfloat16_t, __fp16, ++ z0 = svadd_n_f16_m (p0, z1, d4), ++ z0 = svadd_m (p0, z1, d4)) ++ ++/* ++** add_1_f16_m_tied1: ++** fadd z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_f16_m_tied1, svfloat16_t, ++ z0 = svadd_n_f16_m (p0, z0, 1), ++ z0 = svadd_m (p0, z0, 1)) ++ ++/* ++** add_1_f16_m_untied: ++** movprfx z0, z1 ++** fadd z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_f16_m_untied, svfloat16_t, ++ z0 = svadd_n_f16_m (p0, z1, 1), ++ z0 = svadd_m (p0, z1, 1)) ++ ++/* ++** add_0p5_f16_m_tied1: ++** fadd z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_0p5_f16_m_tied1, svfloat16_t, ++ z0 = svadd_n_f16_m (p0, z0, 0.5), ++ z0 = svadd_m (p0, z0, 0.5)) ++ ++/* ++** add_0p5_f16_m_untied: ++** movprfx z0, z1 ++** fadd z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_0p5_f16_m_untied, svfloat16_t, ++ z0 = svadd_n_f16_m (p0, z1, 0.5), ++ z0 = svadd_m (p0, z1, 0.5)) ++ ++/* ++** add_m1_f16_m_tied1: ++** fsub z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m1_f16_m_tied1, svfloat16_t, ++ z0 = svadd_n_f16_m (p0, z0, -1), ++ z0 = svadd_m (p0, z0, -1)) ++ ++/* ++** add_m1_f16_m_untied: ++** movprfx z0, z1 ++** fsub z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m1_f16_m_untied, svfloat16_t, ++ z0 = svadd_n_f16_m (p0, z1, -1), ++ z0 = svadd_m (p0, z1, -1)) ++ ++/* ++** add_m0p5_f16_m_tied1: ++** fsub z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m0p5_f16_m_tied1, svfloat16_t, ++ z0 = svadd_n_f16_m (p0, z0, -0.5), ++ z0 = svadd_m (p0, z0, -0.5)) ++ ++/* ++** add_m0p5_f16_m_untied: ++** movprfx z0, z1 ++** fsub z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m0p5_f16_m_untied, svfloat16_t, ++ z0 = svadd_n_f16_m (p0, z1, -0.5), ++ z0 = svadd_m (p0, z1, -0.5)) ++ ++/* ++** add_m2_f16_m: ++** fmov (z[0-9]+\.h), #-2\.0(?:e\+0)? ++** fadd z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m2_f16_m, svfloat16_t, ++ z0 = svadd_n_f16_m (p0, z0, -2), ++ z0 = svadd_m (p0, z0, -2)) ++ ++/* ++** add_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fadd z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (add_f16_z_tied1, svfloat16_t, ++ z0 = svadd_f16_z (p0, z0, z1), ++ z0 = svadd_z (p0, z0, z1)) ++ ++/* ++** add_f16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** fadd z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (add_f16_z_tied2, svfloat16_t, ++ z0 = svadd_f16_z (p0, z1, z0), ++ z0 = svadd_z (p0, z1, z0)) ++ ++/* ++** add_f16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fadd z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** fadd z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (add_f16_z_untied, svfloat16_t, ++ z0 = svadd_f16_z (p0, z1, z2), ++ z0 = svadd_z (p0, z1, z2)) ++ ++/* ++** add_h4_f16_z_tied1: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0\.h, p0/z, z0\.h ++** fadd z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (add_h4_f16_z_tied1, svfloat16_t, __fp16, ++ z0 = svadd_n_f16_z (p0, z0, d4), ++ z0 = svadd_z (p0, z0, d4)) ++ ++/* ++** add_h4_f16_z_untied: ++** mov (z[0-9]+\.h), h4 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fadd z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** fadd z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (add_h4_f16_z_untied, svfloat16_t, __fp16, ++ z0 = svadd_n_f16_z (p0, z1, d4), ++ z0 = svadd_z (p0, z1, d4)) ++ ++/* ++** add_1_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fadd z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_f16_z_tied1, svfloat16_t, ++ z0 = svadd_n_f16_z (p0, z0, 1), ++ z0 = svadd_z (p0, z0, 1)) ++ ++/* ++** add_1_f16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** fadd z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_f16_z_untied, svfloat16_t, ++ z0 = svadd_n_f16_z (p0, z1, 1), ++ z0 = svadd_z (p0, z1, 1)) ++ ++/* ++** add_0p5_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fadd z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_0p5_f16_z_tied1, svfloat16_t, ++ z0 = svadd_n_f16_z (p0, z0, 0.5), ++ z0 = svadd_z (p0, z0, 0.5)) ++ ++/* ++** add_0p5_f16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** fadd z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_0p5_f16_z_untied, svfloat16_t, ++ z0 = svadd_n_f16_z (p0, z1, 0.5), ++ z0 = svadd_z (p0, z1, 0.5)) ++ ++/* ++** add_m1_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fsub z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m1_f16_z_tied1, svfloat16_t, ++ z0 = svadd_n_f16_z (p0, z0, -1), ++ z0 = svadd_z (p0, z0, -1)) ++ ++/* ++** add_m1_f16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** fsub z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m1_f16_z_untied, svfloat16_t, ++ z0 = svadd_n_f16_z (p0, z1, -1), ++ z0 = svadd_z (p0, z1, -1)) ++ ++/* ++** add_m0p5_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fsub z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m0p5_f16_z_tied1, svfloat16_t, ++ z0 = svadd_n_f16_z (p0, z0, -0.5), ++ z0 = svadd_z (p0, z0, -0.5)) ++ ++/* ++** add_m0p5_f16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** fsub z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m0p5_f16_z_untied, svfloat16_t, ++ z0 = svadd_n_f16_z (p0, z1, -0.5), ++ z0 = svadd_z (p0, z1, -0.5)) ++ ++/* ++** add_m2_f16_z: ++** fmov (z[0-9]+\.h), #-2\.0(?:e\+0)? ++** movprfx z0\.h, p0/z, z0\.h ++** fadd z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m2_f16_z, svfloat16_t, ++ z0 = svadd_n_f16_z (p0, z0, -2), ++ z0 = svadd_z (p0, z0, -2)) ++ ++/* ++** add_f16_x_tied1: ++** fadd z0\.h, (z0\.h, z1\.h|z1\.h, z0\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (add_f16_x_tied1, svfloat16_t, ++ z0 = svadd_f16_x (p0, z0, z1), ++ z0 = svadd_x (p0, z0, z1)) ++ ++/* ++** add_f16_x_tied2: ++** fadd z0\.h, (z0\.h, z1\.h|z1\.h, z0\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (add_f16_x_tied2, svfloat16_t, ++ z0 = svadd_f16_x (p0, z1, z0), ++ z0 = svadd_x (p0, z1, z0)) ++ ++/* ++** add_f16_x_untied: ++** fadd z0\.h, (z1\.h, z2\.h|z2\.h, z1\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (add_f16_x_untied, svfloat16_t, ++ z0 = svadd_f16_x (p0, z1, z2), ++ z0 = svadd_x (p0, z1, z2)) ++ ++/* ++** add_h4_f16_x_tied1: ++** mov (z[0-9]+\.h), h4 ++** fadd z0\.h, (z0\.h, \1|\1, z0\.h) ++** ret ++*/ ++TEST_UNIFORM_ZD (add_h4_f16_x_tied1, svfloat16_t, __fp16, ++ z0 = svadd_n_f16_x (p0, z0, d4), ++ z0 = svadd_x (p0, z0, d4)) ++ ++/* ++** add_h4_f16_x_untied: ++** mov (z[0-9]+\.h), h4 ++** fadd z0\.h, (z1\.h, \1|\1, z1\.h) ++** ret ++*/ ++TEST_UNIFORM_ZD (add_h4_f16_x_untied, svfloat16_t, __fp16, ++ z0 = svadd_n_f16_x (p0, z1, d4), ++ z0 = svadd_x (p0, z1, d4)) ++ ++/* ++** add_1_f16_x_tied1: ++** fadd z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_f16_x_tied1, svfloat16_t, ++ z0 = svadd_n_f16_x (p0, z0, 1), ++ z0 = svadd_x (p0, z0, 1)) ++ ++/* ++** add_1_f16_x_untied: ++** movprfx z0, z1 ++** fadd z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_f16_x_untied, svfloat16_t, ++ z0 = svadd_n_f16_x (p0, z1, 1), ++ z0 = svadd_x (p0, z1, 1)) ++ ++/* ++** add_0p5_f16_x_tied1: ++** fadd z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_0p5_f16_x_tied1, svfloat16_t, ++ z0 = svadd_n_f16_x (p0, z0, 0.5), ++ z0 = svadd_x (p0, z0, 0.5)) ++ ++/* ++** add_0p5_f16_x_untied: ++** movprfx z0, z1 ++** fadd z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_0p5_f16_x_untied, svfloat16_t, ++ z0 = svadd_n_f16_x (p0, z1, 0.5), ++ z0 = svadd_x (p0, z1, 0.5)) ++ ++/* ++** add_m1_f16_x_tied1: ++** fsub z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m1_f16_x_tied1, svfloat16_t, ++ z0 = svadd_n_f16_x (p0, z0, -1), ++ z0 = svadd_x (p0, z0, -1)) ++ ++/* ++** add_m1_f16_x_untied: ++** movprfx z0, z1 ++** fsub z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m1_f16_x_untied, svfloat16_t, ++ z0 = svadd_n_f16_x (p0, z1, -1), ++ z0 = svadd_x (p0, z1, -1)) ++ ++/* ++** add_m0p5_f16_x_tied1: ++** fsub z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m0p5_f16_x_tied1, svfloat16_t, ++ z0 = svadd_n_f16_x (p0, z0, -0.5), ++ z0 = svadd_x (p0, z0, -0.5)) ++ ++/* ++** add_m0p5_f16_x_untied: ++** movprfx z0, z1 ++** fsub z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m0p5_f16_x_untied, svfloat16_t, ++ z0 = svadd_n_f16_x (p0, z1, -0.5), ++ z0 = svadd_x (p0, z1, -0.5)) ++ ++/* ++** add_2_f16_x_tied1: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** fadd z0\.h, (z0\.h, \1|\1, z0\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (add_2_f16_x_tied1, svfloat16_t, ++ z0 = svadd_n_f16_x (p0, z0, 2), ++ z0 = svadd_x (p0, z0, 2)) ++ ++/* ++** add_2_f16_x_untied: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** fadd z0\.h, (z1\.h, \1|\1, z1\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (add_2_f16_x_untied, svfloat16_t, ++ z0 = svadd_n_f16_x (p0, z1, 2), ++ z0 = svadd_x (p0, z1, 2)) ++ ++/* ++** ptrue_add_f16_x_tied1: ++** fadd z0\.h, (z0\.h, z1\.h|z1\.h, z0\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_f16_x_tied1, svfloat16_t, ++ z0 = svadd_f16_x (svptrue_b16 (), z0, z1), ++ z0 = svadd_x (svptrue_b16 (), z0, z1)) ++ ++/* ++** ptrue_add_f16_x_tied2: ++** fadd z0\.h, (z0\.h, z1\.h|z1\.h, z0\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_f16_x_tied2, svfloat16_t, ++ z0 = svadd_f16_x (svptrue_b16 (), z1, z0), ++ z0 = svadd_x (svptrue_b16 (), z1, z0)) ++ ++/* ++** ptrue_add_f16_x_untied: ++** fadd z0\.h, (z1\.h, z2\.h|z2\.h, z1\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_f16_x_untied, svfloat16_t, ++ z0 = svadd_f16_x (svptrue_b16 (), z1, z2), ++ z0 = svadd_x (svptrue_b16 (), z1, z2)) ++ ++/* ++** ptrue_add_1_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_1_f16_x_tied1, svfloat16_t, ++ z0 = svadd_n_f16_x (svptrue_b16 (), z0, 1), ++ z0 = svadd_x (svptrue_b16 (), z0, 1)) ++ ++/* ++** ptrue_add_1_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_1_f16_x_untied, svfloat16_t, ++ z0 = svadd_n_f16_x (svptrue_b16 (), z1, 1), ++ z0 = svadd_x (svptrue_b16 (), z1, 1)) ++ ++/* ++** ptrue_add_0p5_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_0p5_f16_x_tied1, svfloat16_t, ++ z0 = svadd_n_f16_x (svptrue_b16 (), z0, 0.5), ++ z0 = svadd_x (svptrue_b16 (), z0, 0.5)) ++ ++/* ++** ptrue_add_0p5_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_0p5_f16_x_untied, svfloat16_t, ++ z0 = svadd_n_f16_x (svptrue_b16 (), z1, 0.5), ++ z0 = svadd_x (svptrue_b16 (), z1, 0.5)) ++ ++/* ++** ptrue_add_m1_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_m1_f16_x_tied1, svfloat16_t, ++ z0 = svadd_n_f16_x (svptrue_b16 (), z0, -1), ++ z0 = svadd_x (svptrue_b16 (), z0, -1)) ++ ++/* ++** ptrue_add_m1_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_m1_f16_x_untied, svfloat16_t, ++ z0 = svadd_n_f16_x (svptrue_b16 (), z1, -1), ++ z0 = svadd_x (svptrue_b16 (), z1, -1)) ++ ++/* ++** ptrue_add_m0p5_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_m0p5_f16_x_tied1, svfloat16_t, ++ z0 = svadd_n_f16_x (svptrue_b16 (), z0, -0.5), ++ z0 = svadd_x (svptrue_b16 (), z0, -0.5)) ++ ++/* ++** ptrue_add_m0p5_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_m0p5_f16_x_untied, svfloat16_t, ++ z0 = svadd_n_f16_x (svptrue_b16 (), z1, -0.5), ++ z0 = svadd_x (svptrue_b16 (), z1, -0.5)) ++ ++/* ++** ptrue_add_2_f16_x_tied1: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** fadd z0\.h, (z0\.h, \1|\1, z0\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_2_f16_x_tied1, svfloat16_t, ++ z0 = svadd_n_f16_x (svptrue_b16 (), z0, 2), ++ z0 = svadd_x (svptrue_b16 (), z0, 2)) ++ ++/* ++** ptrue_add_2_f16_x_untied: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** fadd z0\.h, (z1\.h, \1|\1, z1\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_2_f16_x_untied, svfloat16_t, ++ z0 = svadd_n_f16_x (svptrue_b16 (), z1, 2), ++ z0 = svadd_x (svptrue_b16 (), z1, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_f32.c +new file mode 100644 +index 000000000..b5f4e9623 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_f32.c +@@ -0,0 +1,577 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** add_f32_m_tied1: ++** fadd z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (add_f32_m_tied1, svfloat32_t, ++ z0 = svadd_f32_m (p0, z0, z1), ++ z0 = svadd_m (p0, z0, z1)) ++ ++/* ++** add_f32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fadd z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (add_f32_m_tied2, svfloat32_t, ++ z0 = svadd_f32_m (p0, z1, z0), ++ z0 = svadd_m (p0, z1, z0)) ++ ++/* ++** add_f32_m_untied: ++** movprfx z0, z1 ++** fadd z0\.s, p0/m, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (add_f32_m_untied, svfloat32_t, ++ z0 = svadd_f32_m (p0, z1, z2), ++ z0 = svadd_m (p0, z1, z2)) ++ ++/* ++** add_s4_f32_m_tied1: ++** mov (z[0-9]+\.s), s4 ++** fadd z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (add_s4_f32_m_tied1, svfloat32_t, float, ++ z0 = svadd_n_f32_m (p0, z0, d4), ++ z0 = svadd_m (p0, z0, d4)) ++ ++/* ++** add_s4_f32_m_untied: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0, z1 ++** fadd z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (add_s4_f32_m_untied, svfloat32_t, float, ++ z0 = svadd_n_f32_m (p0, z1, d4), ++ z0 = svadd_m (p0, z1, d4)) ++ ++/* ++** add_1_f32_m_tied1: ++** fadd z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_f32_m_tied1, svfloat32_t, ++ z0 = svadd_n_f32_m (p0, z0, 1), ++ z0 = svadd_m (p0, z0, 1)) ++ ++/* ++** add_1_f32_m_untied: ++** movprfx z0, z1 ++** fadd z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_f32_m_untied, svfloat32_t, ++ z0 = svadd_n_f32_m (p0, z1, 1), ++ z0 = svadd_m (p0, z1, 1)) ++ ++/* ++** add_0p5_f32_m_tied1: ++** fadd z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_0p5_f32_m_tied1, svfloat32_t, ++ z0 = svadd_n_f32_m (p0, z0, 0.5), ++ z0 = svadd_m (p0, z0, 0.5)) ++ ++/* ++** add_0p5_f32_m_untied: ++** movprfx z0, z1 ++** fadd z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_0p5_f32_m_untied, svfloat32_t, ++ z0 = svadd_n_f32_m (p0, z1, 0.5), ++ z0 = svadd_m (p0, z1, 0.5)) ++ ++/* ++** add_m1_f32_m_tied1: ++** fsub z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m1_f32_m_tied1, svfloat32_t, ++ z0 = svadd_n_f32_m (p0, z0, -1), ++ z0 = svadd_m (p0, z0, -1)) ++ ++/* ++** add_m1_f32_m_untied: ++** movprfx z0, z1 ++** fsub z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m1_f32_m_untied, svfloat32_t, ++ z0 = svadd_n_f32_m (p0, z1, -1), ++ z0 = svadd_m (p0, z1, -1)) ++ ++/* ++** add_m0p5_f32_m_tied1: ++** fsub z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m0p5_f32_m_tied1, svfloat32_t, ++ z0 = svadd_n_f32_m (p0, z0, -0.5), ++ z0 = svadd_m (p0, z0, -0.5)) ++ ++/* ++** add_m0p5_f32_m_untied: ++** movprfx z0, z1 ++** fsub z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m0p5_f32_m_untied, svfloat32_t, ++ z0 = svadd_n_f32_m (p0, z1, -0.5), ++ z0 = svadd_m (p0, z1, -0.5)) ++ ++/* ++** add_m2_f32_m: ++** fmov (z[0-9]+\.s), #-2\.0(?:e\+0)? ++** fadd z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m2_f32_m, svfloat32_t, ++ z0 = svadd_n_f32_m (p0, z0, -2), ++ z0 = svadd_m (p0, z0, -2)) ++ ++/* ++** add_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fadd z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (add_f32_z_tied1, svfloat32_t, ++ z0 = svadd_f32_z (p0, z0, z1), ++ z0 = svadd_z (p0, z0, z1)) ++ ++/* ++** add_f32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** fadd z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (add_f32_z_tied2, svfloat32_t, ++ z0 = svadd_f32_z (p0, z1, z0), ++ z0 = svadd_z (p0, z1, z0)) ++ ++/* ++** add_f32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fadd z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** fadd z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (add_f32_z_untied, svfloat32_t, ++ z0 = svadd_f32_z (p0, z1, z2), ++ z0 = svadd_z (p0, z1, z2)) ++ ++/* ++** add_s4_f32_z_tied1: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0\.s, p0/z, z0\.s ++** fadd z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (add_s4_f32_z_tied1, svfloat32_t, float, ++ z0 = svadd_n_f32_z (p0, z0, d4), ++ z0 = svadd_z (p0, z0, d4)) ++ ++/* ++** add_s4_f32_z_untied: ++** mov (z[0-9]+\.s), s4 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fadd z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** fadd z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (add_s4_f32_z_untied, svfloat32_t, float, ++ z0 = svadd_n_f32_z (p0, z1, d4), ++ z0 = svadd_z (p0, z1, d4)) ++ ++/* ++** add_1_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fadd z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_f32_z_tied1, svfloat32_t, ++ z0 = svadd_n_f32_z (p0, z0, 1), ++ z0 = svadd_z (p0, z0, 1)) ++ ++/* ++** add_1_f32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** fadd z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_f32_z_untied, svfloat32_t, ++ z0 = svadd_n_f32_z (p0, z1, 1), ++ z0 = svadd_z (p0, z1, 1)) ++ ++/* ++** add_0p5_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fadd z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_0p5_f32_z_tied1, svfloat32_t, ++ z0 = svadd_n_f32_z (p0, z0, 0.5), ++ z0 = svadd_z (p0, z0, 0.5)) ++ ++/* ++** add_0p5_f32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** fadd z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_0p5_f32_z_untied, svfloat32_t, ++ z0 = svadd_n_f32_z (p0, z1, 0.5), ++ z0 = svadd_z (p0, z1, 0.5)) ++ ++/* ++** add_m1_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fsub z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m1_f32_z_tied1, svfloat32_t, ++ z0 = svadd_n_f32_z (p0, z0, -1), ++ z0 = svadd_z (p0, z0, -1)) ++ ++/* ++** add_m1_f32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** fsub z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m1_f32_z_untied, svfloat32_t, ++ z0 = svadd_n_f32_z (p0, z1, -1), ++ z0 = svadd_z (p0, z1, -1)) ++ ++/* ++** add_m0p5_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fsub z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m0p5_f32_z_tied1, svfloat32_t, ++ z0 = svadd_n_f32_z (p0, z0, -0.5), ++ z0 = svadd_z (p0, z0, -0.5)) ++ ++/* ++** add_m0p5_f32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** fsub z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m0p5_f32_z_untied, svfloat32_t, ++ z0 = svadd_n_f32_z (p0, z1, -0.5), ++ z0 = svadd_z (p0, z1, -0.5)) ++ ++/* ++** add_m2_f32_z: ++** fmov (z[0-9]+\.s), #-2\.0(?:e\+0)? ++** movprfx z0\.s, p0/z, z0\.s ++** fadd z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m2_f32_z, svfloat32_t, ++ z0 = svadd_n_f32_z (p0, z0, -2), ++ z0 = svadd_z (p0, z0, -2)) ++ ++/* ++** add_f32_x_tied1: ++** fadd z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (add_f32_x_tied1, svfloat32_t, ++ z0 = svadd_f32_x (p0, z0, z1), ++ z0 = svadd_x (p0, z0, z1)) ++ ++/* ++** add_f32_x_tied2: ++** fadd z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (add_f32_x_tied2, svfloat32_t, ++ z0 = svadd_f32_x (p0, z1, z0), ++ z0 = svadd_x (p0, z1, z0)) ++ ++/* ++** add_f32_x_untied: ++** ( ++** movprfx z0, z1 ++** fadd z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0, z2 ++** fadd z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (add_f32_x_untied, svfloat32_t, ++ z0 = svadd_f32_x (p0, z1, z2), ++ z0 = svadd_x (p0, z1, z2)) ++ ++/* ++** add_s4_f32_x_tied1: ++** mov (z[0-9]+\.s), s4 ++** fadd z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (add_s4_f32_x_tied1, svfloat32_t, float, ++ z0 = svadd_n_f32_x (p0, z0, d4), ++ z0 = svadd_x (p0, z0, d4)) ++ ++/* ++** add_s4_f32_x_untied: ++** mov z0\.s, s4 ++** fadd z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZD (add_s4_f32_x_untied, svfloat32_t, float, ++ z0 = svadd_n_f32_x (p0, z1, d4), ++ z0 = svadd_x (p0, z1, d4)) ++ ++/* ++** add_1_f32_x_tied1: ++** fadd z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_f32_x_tied1, svfloat32_t, ++ z0 = svadd_n_f32_x (p0, z0, 1), ++ z0 = svadd_x (p0, z0, 1)) ++ ++/* ++** add_1_f32_x_untied: ++** movprfx z0, z1 ++** fadd z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_f32_x_untied, svfloat32_t, ++ z0 = svadd_n_f32_x (p0, z1, 1), ++ z0 = svadd_x (p0, z1, 1)) ++ ++/* ++** add_0p5_f32_x_tied1: ++** fadd z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_0p5_f32_x_tied1, svfloat32_t, ++ z0 = svadd_n_f32_x (p0, z0, 0.5), ++ z0 = svadd_x (p0, z0, 0.5)) ++ ++/* ++** add_0p5_f32_x_untied: ++** movprfx z0, z1 ++** fadd z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_0p5_f32_x_untied, svfloat32_t, ++ z0 = svadd_n_f32_x (p0, z1, 0.5), ++ z0 = svadd_x (p0, z1, 0.5)) ++ ++/* ++** add_m1_f32_x_tied1: ++** fsub z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m1_f32_x_tied1, svfloat32_t, ++ z0 = svadd_n_f32_x (p0, z0, -1), ++ z0 = svadd_x (p0, z0, -1)) ++ ++/* ++** add_m1_f32_x_untied: ++** movprfx z0, z1 ++** fsub z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m1_f32_x_untied, svfloat32_t, ++ z0 = svadd_n_f32_x (p0, z1, -1), ++ z0 = svadd_x (p0, z1, -1)) ++ ++/* ++** add_m0p5_f32_x_tied1: ++** fsub z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m0p5_f32_x_tied1, svfloat32_t, ++ z0 = svadd_n_f32_x (p0, z0, -0.5), ++ z0 = svadd_x (p0, z0, -0.5)) ++ ++/* ++** add_m0p5_f32_x_untied: ++** movprfx z0, z1 ++** fsub z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m0p5_f32_x_untied, svfloat32_t, ++ z0 = svadd_n_f32_x (p0, z1, -0.5), ++ z0 = svadd_x (p0, z1, -0.5)) ++ ++/* ++** add_2_f32_x_tied1: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** fadd z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_2_f32_x_tied1, svfloat32_t, ++ z0 = svadd_n_f32_x (p0, z0, 2), ++ z0 = svadd_x (p0, z0, 2)) ++ ++/* ++** add_2_f32_x_untied: ++** fmov z0\.s, #2\.0(?:e\+0)? ++** fadd z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (add_2_f32_x_untied, svfloat32_t, ++ z0 = svadd_n_f32_x (p0, z1, 2), ++ z0 = svadd_x (p0, z1, 2)) ++ ++/* ++** ptrue_add_f32_x_tied1: ++** fadd z0\.s, (z0\.s, z1\.s|z1\.s, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_f32_x_tied1, svfloat32_t, ++ z0 = svadd_f32_x (svptrue_b32 (), z0, z1), ++ z0 = svadd_x (svptrue_b32 (), z0, z1)) ++ ++/* ++** ptrue_add_f32_x_tied2: ++** fadd z0\.s, (z0\.s, z1\.s|z1\.s, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_f32_x_tied2, svfloat32_t, ++ z0 = svadd_f32_x (svptrue_b32 (), z1, z0), ++ z0 = svadd_x (svptrue_b32 (), z1, z0)) ++ ++/* ++** ptrue_add_f32_x_untied: ++** fadd z0\.s, (z1\.s, z2\.s|z2\.s, z1\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_f32_x_untied, svfloat32_t, ++ z0 = svadd_f32_x (svptrue_b32 (), z1, z2), ++ z0 = svadd_x (svptrue_b32 (), z1, z2)) ++ ++/* ++** ptrue_add_1_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_1_f32_x_tied1, svfloat32_t, ++ z0 = svadd_n_f32_x (svptrue_b32 (), z0, 1), ++ z0 = svadd_x (svptrue_b32 (), z0, 1)) ++ ++/* ++** ptrue_add_1_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_1_f32_x_untied, svfloat32_t, ++ z0 = svadd_n_f32_x (svptrue_b32 (), z1, 1), ++ z0 = svadd_x (svptrue_b32 (), z1, 1)) ++ ++/* ++** ptrue_add_0p5_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_0p5_f32_x_tied1, svfloat32_t, ++ z0 = svadd_n_f32_x (svptrue_b32 (), z0, 0.5), ++ z0 = svadd_x (svptrue_b32 (), z0, 0.5)) ++ ++/* ++** ptrue_add_0p5_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_0p5_f32_x_untied, svfloat32_t, ++ z0 = svadd_n_f32_x (svptrue_b32 (), z1, 0.5), ++ z0 = svadd_x (svptrue_b32 (), z1, 0.5)) ++ ++/* ++** ptrue_add_m1_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_m1_f32_x_tied1, svfloat32_t, ++ z0 = svadd_n_f32_x (svptrue_b32 (), z0, -1), ++ z0 = svadd_x (svptrue_b32 (), z0, -1)) ++ ++/* ++** ptrue_add_m1_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_m1_f32_x_untied, svfloat32_t, ++ z0 = svadd_n_f32_x (svptrue_b32 (), z1, -1), ++ z0 = svadd_x (svptrue_b32 (), z1, -1)) ++ ++/* ++** ptrue_add_m0p5_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_m0p5_f32_x_tied1, svfloat32_t, ++ z0 = svadd_n_f32_x (svptrue_b32 (), z0, -0.5), ++ z0 = svadd_x (svptrue_b32 (), z0, -0.5)) ++ ++/* ++** ptrue_add_m0p5_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_m0p5_f32_x_untied, svfloat32_t, ++ z0 = svadd_n_f32_x (svptrue_b32 (), z1, -0.5), ++ z0 = svadd_x (svptrue_b32 (), z1, -0.5)) ++ ++/* ++** ptrue_add_2_f32_x_tied1: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** fadd z0\.s, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_2_f32_x_tied1, svfloat32_t, ++ z0 = svadd_n_f32_x (svptrue_b32 (), z0, 2), ++ z0 = svadd_x (svptrue_b32 (), z0, 2)) ++ ++/* ++** ptrue_add_2_f32_x_untied: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** fadd z0\.s, (z1\.s, \1|\1, z1\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_2_f32_x_untied, svfloat32_t, ++ z0 = svadd_n_f32_x (svptrue_b32 (), z1, 2), ++ z0 = svadd_x (svptrue_b32 (), z1, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_f32_notrap.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_f32_notrap.c +new file mode 100644 +index 000000000..062e5fd67 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_f32_notrap.c +@@ -0,0 +1,572 @@ ++/* { dg-additional-options "-fno-trapping-math" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** add_f32_m_tied1: ++** fadd z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (add_f32_m_tied1, svfloat32_t, ++ z0 = svadd_f32_m (p0, z0, z1), ++ z0 = svadd_m (p0, z0, z1)) ++ ++/* ++** add_f32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fadd z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (add_f32_m_tied2, svfloat32_t, ++ z0 = svadd_f32_m (p0, z1, z0), ++ z0 = svadd_m (p0, z1, z0)) ++ ++/* ++** add_f32_m_untied: ++** movprfx z0, z1 ++** fadd z0\.s, p0/m, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (add_f32_m_untied, svfloat32_t, ++ z0 = svadd_f32_m (p0, z1, z2), ++ z0 = svadd_m (p0, z1, z2)) ++ ++/* ++** add_s4_f32_m_tied1: ++** mov (z[0-9]+\.s), s4 ++** fadd z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (add_s4_f32_m_tied1, svfloat32_t, float, ++ z0 = svadd_n_f32_m (p0, z0, d4), ++ z0 = svadd_m (p0, z0, d4)) ++ ++/* ++** add_s4_f32_m_untied: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0, z1 ++** fadd z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (add_s4_f32_m_untied, svfloat32_t, float, ++ z0 = svadd_n_f32_m (p0, z1, d4), ++ z0 = svadd_m (p0, z1, d4)) ++ ++/* ++** add_1_f32_m_tied1: ++** fadd z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_f32_m_tied1, svfloat32_t, ++ z0 = svadd_n_f32_m (p0, z0, 1), ++ z0 = svadd_m (p0, z0, 1)) ++ ++/* ++** add_1_f32_m_untied: ++** movprfx z0, z1 ++** fadd z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_f32_m_untied, svfloat32_t, ++ z0 = svadd_n_f32_m (p0, z1, 1), ++ z0 = svadd_m (p0, z1, 1)) ++ ++/* ++** add_0p5_f32_m_tied1: ++** fadd z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_0p5_f32_m_tied1, svfloat32_t, ++ z0 = svadd_n_f32_m (p0, z0, 0.5), ++ z0 = svadd_m (p0, z0, 0.5)) ++ ++/* ++** add_0p5_f32_m_untied: ++** movprfx z0, z1 ++** fadd z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_0p5_f32_m_untied, svfloat32_t, ++ z0 = svadd_n_f32_m (p0, z1, 0.5), ++ z0 = svadd_m (p0, z1, 0.5)) ++ ++/* ++** add_m1_f32_m_tied1: ++** fsub z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m1_f32_m_tied1, svfloat32_t, ++ z0 = svadd_n_f32_m (p0, z0, -1), ++ z0 = svadd_m (p0, z0, -1)) ++ ++/* ++** add_m1_f32_m_untied: ++** movprfx z0, z1 ++** fsub z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m1_f32_m_untied, svfloat32_t, ++ z0 = svadd_n_f32_m (p0, z1, -1), ++ z0 = svadd_m (p0, z1, -1)) ++ ++/* ++** add_m0p5_f32_m_tied1: ++** fsub z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m0p5_f32_m_tied1, svfloat32_t, ++ z0 = svadd_n_f32_m (p0, z0, -0.5), ++ z0 = svadd_m (p0, z0, -0.5)) ++ ++/* ++** add_m0p5_f32_m_untied: ++** movprfx z0, z1 ++** fsub z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m0p5_f32_m_untied, svfloat32_t, ++ z0 = svadd_n_f32_m (p0, z1, -0.5), ++ z0 = svadd_m (p0, z1, -0.5)) ++ ++/* ++** add_m2_f32_m: ++** fmov (z[0-9]+\.s), #-2\.0(?:e\+0)? ++** fadd z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m2_f32_m, svfloat32_t, ++ z0 = svadd_n_f32_m (p0, z0, -2), ++ z0 = svadd_m (p0, z0, -2)) ++ ++/* ++** add_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fadd z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (add_f32_z_tied1, svfloat32_t, ++ z0 = svadd_f32_z (p0, z0, z1), ++ z0 = svadd_z (p0, z0, z1)) ++ ++/* ++** add_f32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** fadd z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (add_f32_z_tied2, svfloat32_t, ++ z0 = svadd_f32_z (p0, z1, z0), ++ z0 = svadd_z (p0, z1, z0)) ++ ++/* ++** add_f32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fadd z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** fadd z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (add_f32_z_untied, svfloat32_t, ++ z0 = svadd_f32_z (p0, z1, z2), ++ z0 = svadd_z (p0, z1, z2)) ++ ++/* ++** add_s4_f32_z_tied1: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0\.s, p0/z, z0\.s ++** fadd z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (add_s4_f32_z_tied1, svfloat32_t, float, ++ z0 = svadd_n_f32_z (p0, z0, d4), ++ z0 = svadd_z (p0, z0, d4)) ++ ++/* ++** add_s4_f32_z_untied: ++** mov (z[0-9]+\.s), s4 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fadd z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** fadd z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (add_s4_f32_z_untied, svfloat32_t, float, ++ z0 = svadd_n_f32_z (p0, z1, d4), ++ z0 = svadd_z (p0, z1, d4)) ++ ++/* ++** add_1_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fadd z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_f32_z_tied1, svfloat32_t, ++ z0 = svadd_n_f32_z (p0, z0, 1), ++ z0 = svadd_z (p0, z0, 1)) ++ ++/* ++** add_1_f32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** fadd z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_f32_z_untied, svfloat32_t, ++ z0 = svadd_n_f32_z (p0, z1, 1), ++ z0 = svadd_z (p0, z1, 1)) ++ ++/* ++** add_0p5_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fadd z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_0p5_f32_z_tied1, svfloat32_t, ++ z0 = svadd_n_f32_z (p0, z0, 0.5), ++ z0 = svadd_z (p0, z0, 0.5)) ++ ++/* ++** add_0p5_f32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** fadd z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_0p5_f32_z_untied, svfloat32_t, ++ z0 = svadd_n_f32_z (p0, z1, 0.5), ++ z0 = svadd_z (p0, z1, 0.5)) ++ ++/* ++** add_m1_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fsub z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m1_f32_z_tied1, svfloat32_t, ++ z0 = svadd_n_f32_z (p0, z0, -1), ++ z0 = svadd_z (p0, z0, -1)) ++ ++/* ++** add_m1_f32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** fsub z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m1_f32_z_untied, svfloat32_t, ++ z0 = svadd_n_f32_z (p0, z1, -1), ++ z0 = svadd_z (p0, z1, -1)) ++ ++/* ++** add_m0p5_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fsub z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m0p5_f32_z_tied1, svfloat32_t, ++ z0 = svadd_n_f32_z (p0, z0, -0.5), ++ z0 = svadd_z (p0, z0, -0.5)) ++ ++/* ++** add_m0p5_f32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** fsub z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m0p5_f32_z_untied, svfloat32_t, ++ z0 = svadd_n_f32_z (p0, z1, -0.5), ++ z0 = svadd_z (p0, z1, -0.5)) ++ ++/* ++** add_m2_f32_z: ++** fmov (z[0-9]+\.s), #-2\.0(?:e\+0)? ++** movprfx z0\.s, p0/z, z0\.s ++** fadd z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m2_f32_z, svfloat32_t, ++ z0 = svadd_n_f32_z (p0, z0, -2), ++ z0 = svadd_z (p0, z0, -2)) ++ ++/* ++** add_f32_x_tied1: ++** fadd z0\.s, (z0\.s, z1\.s|z1\.s, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (add_f32_x_tied1, svfloat32_t, ++ z0 = svadd_f32_x (p0, z0, z1), ++ z0 = svadd_x (p0, z0, z1)) ++ ++/* ++** add_f32_x_tied2: ++** fadd z0\.s, (z0\.s, z1\.s|z1\.s, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (add_f32_x_tied2, svfloat32_t, ++ z0 = svadd_f32_x (p0, z1, z0), ++ z0 = svadd_x (p0, z1, z0)) ++ ++/* ++** add_f32_x_untied: ++** fadd z0\.s, (z1\.s, z2\.s|z2\.s, z1\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (add_f32_x_untied, svfloat32_t, ++ z0 = svadd_f32_x (p0, z1, z2), ++ z0 = svadd_x (p0, z1, z2)) ++ ++/* ++** add_s4_f32_x_tied1: ++** mov (z[0-9]+\.s), s4 ++** fadd z0\.s, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_ZD (add_s4_f32_x_tied1, svfloat32_t, float, ++ z0 = svadd_n_f32_x (p0, z0, d4), ++ z0 = svadd_x (p0, z0, d4)) ++ ++/* ++** add_s4_f32_x_untied: ++** mov (z[0-9]+\.s), s4 ++** fadd z0\.s, (z1\.s, \1|\1, z1\.s) ++** ret ++*/ ++TEST_UNIFORM_ZD (add_s4_f32_x_untied, svfloat32_t, float, ++ z0 = svadd_n_f32_x (p0, z1, d4), ++ z0 = svadd_x (p0, z1, d4)) ++ ++/* ++** add_1_f32_x_tied1: ++** fadd z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_f32_x_tied1, svfloat32_t, ++ z0 = svadd_n_f32_x (p0, z0, 1), ++ z0 = svadd_x (p0, z0, 1)) ++ ++/* ++** add_1_f32_x_untied: ++** movprfx z0, z1 ++** fadd z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_f32_x_untied, svfloat32_t, ++ z0 = svadd_n_f32_x (p0, z1, 1), ++ z0 = svadd_x (p0, z1, 1)) ++ ++/* ++** add_0p5_f32_x_tied1: ++** fadd z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_0p5_f32_x_tied1, svfloat32_t, ++ z0 = svadd_n_f32_x (p0, z0, 0.5), ++ z0 = svadd_x (p0, z0, 0.5)) ++ ++/* ++** add_0p5_f32_x_untied: ++** movprfx z0, z1 ++** fadd z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_0p5_f32_x_untied, svfloat32_t, ++ z0 = svadd_n_f32_x (p0, z1, 0.5), ++ z0 = svadd_x (p0, z1, 0.5)) ++ ++/* ++** add_m1_f32_x_tied1: ++** fsub z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m1_f32_x_tied1, svfloat32_t, ++ z0 = svadd_n_f32_x (p0, z0, -1), ++ z0 = svadd_x (p0, z0, -1)) ++ ++/* ++** add_m1_f32_x_untied: ++** movprfx z0, z1 ++** fsub z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m1_f32_x_untied, svfloat32_t, ++ z0 = svadd_n_f32_x (p0, z1, -1), ++ z0 = svadd_x (p0, z1, -1)) ++ ++/* ++** add_m0p5_f32_x_tied1: ++** fsub z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m0p5_f32_x_tied1, svfloat32_t, ++ z0 = svadd_n_f32_x (p0, z0, -0.5), ++ z0 = svadd_x (p0, z0, -0.5)) ++ ++/* ++** add_m0p5_f32_x_untied: ++** movprfx z0, z1 ++** fsub z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m0p5_f32_x_untied, svfloat32_t, ++ z0 = svadd_n_f32_x (p0, z1, -0.5), ++ z0 = svadd_x (p0, z1, -0.5)) ++ ++/* ++** add_2_f32_x_tied1: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** fadd z0\.s, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (add_2_f32_x_tied1, svfloat32_t, ++ z0 = svadd_n_f32_x (p0, z0, 2), ++ z0 = svadd_x (p0, z0, 2)) ++ ++/* ++** add_2_f32_x_untied: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** fadd z0\.s, (z1\.s, \1|\1, z1\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (add_2_f32_x_untied, svfloat32_t, ++ z0 = svadd_n_f32_x (p0, z1, 2), ++ z0 = svadd_x (p0, z1, 2)) ++ ++/* ++** ptrue_add_f32_x_tied1: ++** fadd z0\.s, (z0\.s, z1\.s|z1\.s, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_f32_x_tied1, svfloat32_t, ++ z0 = svadd_f32_x (svptrue_b32 (), z0, z1), ++ z0 = svadd_x (svptrue_b32 (), z0, z1)) ++ ++/* ++** ptrue_add_f32_x_tied2: ++** fadd z0\.s, (z0\.s, z1\.s|z1\.s, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_f32_x_tied2, svfloat32_t, ++ z0 = svadd_f32_x (svptrue_b32 (), z1, z0), ++ z0 = svadd_x (svptrue_b32 (), z1, z0)) ++ ++/* ++** ptrue_add_f32_x_untied: ++** fadd z0\.s, (z1\.s, z2\.s|z2\.s, z1\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_f32_x_untied, svfloat32_t, ++ z0 = svadd_f32_x (svptrue_b32 (), z1, z2), ++ z0 = svadd_x (svptrue_b32 (), z1, z2)) ++ ++/* ++** ptrue_add_1_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_1_f32_x_tied1, svfloat32_t, ++ z0 = svadd_n_f32_x (svptrue_b32 (), z0, 1), ++ z0 = svadd_x (svptrue_b32 (), z0, 1)) ++ ++/* ++** ptrue_add_1_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_1_f32_x_untied, svfloat32_t, ++ z0 = svadd_n_f32_x (svptrue_b32 (), z1, 1), ++ z0 = svadd_x (svptrue_b32 (), z1, 1)) ++ ++/* ++** ptrue_add_0p5_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_0p5_f32_x_tied1, svfloat32_t, ++ z0 = svadd_n_f32_x (svptrue_b32 (), z0, 0.5), ++ z0 = svadd_x (svptrue_b32 (), z0, 0.5)) ++ ++/* ++** ptrue_add_0p5_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_0p5_f32_x_untied, svfloat32_t, ++ z0 = svadd_n_f32_x (svptrue_b32 (), z1, 0.5), ++ z0 = svadd_x (svptrue_b32 (), z1, 0.5)) ++ ++/* ++** ptrue_add_m1_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_m1_f32_x_tied1, svfloat32_t, ++ z0 = svadd_n_f32_x (svptrue_b32 (), z0, -1), ++ z0 = svadd_x (svptrue_b32 (), z0, -1)) ++ ++/* ++** ptrue_add_m1_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_m1_f32_x_untied, svfloat32_t, ++ z0 = svadd_n_f32_x (svptrue_b32 (), z1, -1), ++ z0 = svadd_x (svptrue_b32 (), z1, -1)) ++ ++/* ++** ptrue_add_m0p5_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_m0p5_f32_x_tied1, svfloat32_t, ++ z0 = svadd_n_f32_x (svptrue_b32 (), z0, -0.5), ++ z0 = svadd_x (svptrue_b32 (), z0, -0.5)) ++ ++/* ++** ptrue_add_m0p5_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_m0p5_f32_x_untied, svfloat32_t, ++ z0 = svadd_n_f32_x (svptrue_b32 (), z1, -0.5), ++ z0 = svadd_x (svptrue_b32 (), z1, -0.5)) ++ ++/* ++** ptrue_add_2_f32_x_tied1: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** fadd z0\.s, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_2_f32_x_tied1, svfloat32_t, ++ z0 = svadd_n_f32_x (svptrue_b32 (), z0, 2), ++ z0 = svadd_x (svptrue_b32 (), z0, 2)) ++ ++/* ++** ptrue_add_2_f32_x_untied: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** fadd z0\.s, (z1\.s, \1|\1, z1\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_2_f32_x_untied, svfloat32_t, ++ z0 = svadd_n_f32_x (svptrue_b32 (), z1, 2), ++ z0 = svadd_x (svptrue_b32 (), z1, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_f64.c +new file mode 100644 +index 000000000..7185f3acf +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_f64.c +@@ -0,0 +1,577 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** add_f64_m_tied1: ++** fadd z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (add_f64_m_tied1, svfloat64_t, ++ z0 = svadd_f64_m (p0, z0, z1), ++ z0 = svadd_m (p0, z0, z1)) ++ ++/* ++** add_f64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fadd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_f64_m_tied2, svfloat64_t, ++ z0 = svadd_f64_m (p0, z1, z0), ++ z0 = svadd_m (p0, z1, z0)) ++ ++/* ++** add_f64_m_untied: ++** movprfx z0, z1 ++** fadd z0\.d, p0/m, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (add_f64_m_untied, svfloat64_t, ++ z0 = svadd_f64_m (p0, z1, z2), ++ z0 = svadd_m (p0, z1, z2)) ++ ++/* ++** add_d4_f64_m_tied1: ++** mov (z[0-9]+\.d), d4 ++** fadd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (add_d4_f64_m_tied1, svfloat64_t, double, ++ z0 = svadd_n_f64_m (p0, z0, d4), ++ z0 = svadd_m (p0, z0, d4)) ++ ++/* ++** add_d4_f64_m_untied: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0, z1 ++** fadd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (add_d4_f64_m_untied, svfloat64_t, double, ++ z0 = svadd_n_f64_m (p0, z1, d4), ++ z0 = svadd_m (p0, z1, d4)) ++ ++/* ++** add_1_f64_m_tied1: ++** fadd z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_f64_m_tied1, svfloat64_t, ++ z0 = svadd_n_f64_m (p0, z0, 1), ++ z0 = svadd_m (p0, z0, 1)) ++ ++/* ++** add_1_f64_m_untied: ++** movprfx z0, z1 ++** fadd z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_f64_m_untied, svfloat64_t, ++ z0 = svadd_n_f64_m (p0, z1, 1), ++ z0 = svadd_m (p0, z1, 1)) ++ ++/* ++** add_0p5_f64_m_tied1: ++** fadd z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_0p5_f64_m_tied1, svfloat64_t, ++ z0 = svadd_n_f64_m (p0, z0, 0.5), ++ z0 = svadd_m (p0, z0, 0.5)) ++ ++/* ++** add_0p5_f64_m_untied: ++** movprfx z0, z1 ++** fadd z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_0p5_f64_m_untied, svfloat64_t, ++ z0 = svadd_n_f64_m (p0, z1, 0.5), ++ z0 = svadd_m (p0, z1, 0.5)) ++ ++/* ++** add_m1_f64_m_tied1: ++** fsub z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m1_f64_m_tied1, svfloat64_t, ++ z0 = svadd_n_f64_m (p0, z0, -1), ++ z0 = svadd_m (p0, z0, -1)) ++ ++/* ++** add_m1_f64_m_untied: ++** movprfx z0, z1 ++** fsub z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m1_f64_m_untied, svfloat64_t, ++ z0 = svadd_n_f64_m (p0, z1, -1), ++ z0 = svadd_m (p0, z1, -1)) ++ ++/* ++** add_m0p5_f64_m_tied1: ++** fsub z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m0p5_f64_m_tied1, svfloat64_t, ++ z0 = svadd_n_f64_m (p0, z0, -0.5), ++ z0 = svadd_m (p0, z0, -0.5)) ++ ++/* ++** add_m0p5_f64_m_untied: ++** movprfx z0, z1 ++** fsub z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m0p5_f64_m_untied, svfloat64_t, ++ z0 = svadd_n_f64_m (p0, z1, -0.5), ++ z0 = svadd_m (p0, z1, -0.5)) ++ ++/* ++** add_m2_f64_m: ++** fmov (z[0-9]+\.d), #-2\.0(?:e\+0)? ++** fadd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m2_f64_m, svfloat64_t, ++ z0 = svadd_n_f64_m (p0, z0, -2), ++ z0 = svadd_m (p0, z0, -2)) ++ ++/* ++** add_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fadd z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (add_f64_z_tied1, svfloat64_t, ++ z0 = svadd_f64_z (p0, z0, z1), ++ z0 = svadd_z (p0, z0, z1)) ++ ++/* ++** add_f64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** fadd z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (add_f64_z_tied2, svfloat64_t, ++ z0 = svadd_f64_z (p0, z1, z0), ++ z0 = svadd_z (p0, z1, z0)) ++ ++/* ++** add_f64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fadd z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** fadd z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (add_f64_z_untied, svfloat64_t, ++ z0 = svadd_f64_z (p0, z1, z2), ++ z0 = svadd_z (p0, z1, z2)) ++ ++/* ++** add_d4_f64_z_tied1: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0\.d, p0/z, z0\.d ++** fadd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (add_d4_f64_z_tied1, svfloat64_t, double, ++ z0 = svadd_n_f64_z (p0, z0, d4), ++ z0 = svadd_z (p0, z0, d4)) ++ ++/* ++** add_d4_f64_z_untied: ++** mov (z[0-9]+\.d), d4 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fadd z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** fadd z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (add_d4_f64_z_untied, svfloat64_t, double, ++ z0 = svadd_n_f64_z (p0, z1, d4), ++ z0 = svadd_z (p0, z1, d4)) ++ ++/* ++** add_1_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fadd z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_f64_z_tied1, svfloat64_t, ++ z0 = svadd_n_f64_z (p0, z0, 1), ++ z0 = svadd_z (p0, z0, 1)) ++ ++/* ++** add_1_f64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** fadd z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_f64_z_untied, svfloat64_t, ++ z0 = svadd_n_f64_z (p0, z1, 1), ++ z0 = svadd_z (p0, z1, 1)) ++ ++/* ++** add_0p5_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fadd z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_0p5_f64_z_tied1, svfloat64_t, ++ z0 = svadd_n_f64_z (p0, z0, 0.5), ++ z0 = svadd_z (p0, z0, 0.5)) ++ ++/* ++** add_0p5_f64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** fadd z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_0p5_f64_z_untied, svfloat64_t, ++ z0 = svadd_n_f64_z (p0, z1, 0.5), ++ z0 = svadd_z (p0, z1, 0.5)) ++ ++/* ++** add_m1_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fsub z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m1_f64_z_tied1, svfloat64_t, ++ z0 = svadd_n_f64_z (p0, z0, -1), ++ z0 = svadd_z (p0, z0, -1)) ++ ++/* ++** add_m1_f64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** fsub z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m1_f64_z_untied, svfloat64_t, ++ z0 = svadd_n_f64_z (p0, z1, -1), ++ z0 = svadd_z (p0, z1, -1)) ++ ++/* ++** add_m0p5_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fsub z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m0p5_f64_z_tied1, svfloat64_t, ++ z0 = svadd_n_f64_z (p0, z0, -0.5), ++ z0 = svadd_z (p0, z0, -0.5)) ++ ++/* ++** add_m0p5_f64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** fsub z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m0p5_f64_z_untied, svfloat64_t, ++ z0 = svadd_n_f64_z (p0, z1, -0.5), ++ z0 = svadd_z (p0, z1, -0.5)) ++ ++/* ++** add_m2_f64_z: ++** fmov (z[0-9]+\.d), #-2\.0(?:e\+0)? ++** movprfx z0\.d, p0/z, z0\.d ++** fadd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m2_f64_z, svfloat64_t, ++ z0 = svadd_n_f64_z (p0, z0, -2), ++ z0 = svadd_z (p0, z0, -2)) ++ ++/* ++** add_f64_x_tied1: ++** fadd z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (add_f64_x_tied1, svfloat64_t, ++ z0 = svadd_f64_x (p0, z0, z1), ++ z0 = svadd_x (p0, z0, z1)) ++ ++/* ++** add_f64_x_tied2: ++** fadd z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (add_f64_x_tied2, svfloat64_t, ++ z0 = svadd_f64_x (p0, z1, z0), ++ z0 = svadd_x (p0, z1, z0)) ++ ++/* ++** add_f64_x_untied: ++** ( ++** movprfx z0, z1 ++** fadd z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0, z2 ++** fadd z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (add_f64_x_untied, svfloat64_t, ++ z0 = svadd_f64_x (p0, z1, z2), ++ z0 = svadd_x (p0, z1, z2)) ++ ++/* ++** add_d4_f64_x_tied1: ++** mov (z[0-9]+\.d), d4 ++** fadd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (add_d4_f64_x_tied1, svfloat64_t, double, ++ z0 = svadd_n_f64_x (p0, z0, d4), ++ z0 = svadd_x (p0, z0, d4)) ++ ++/* ++** add_d4_f64_x_untied: ++** mov z0\.d, d4 ++** fadd z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZD (add_d4_f64_x_untied, svfloat64_t, double, ++ z0 = svadd_n_f64_x (p0, z1, d4), ++ z0 = svadd_x (p0, z1, d4)) ++ ++/* ++** add_1_f64_x_tied1: ++** fadd z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_f64_x_tied1, svfloat64_t, ++ z0 = svadd_n_f64_x (p0, z0, 1), ++ z0 = svadd_x (p0, z0, 1)) ++ ++/* ++** add_1_f64_x_untied: ++** movprfx z0, z1 ++** fadd z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_f64_x_untied, svfloat64_t, ++ z0 = svadd_n_f64_x (p0, z1, 1), ++ z0 = svadd_x (p0, z1, 1)) ++ ++/* ++** add_0p5_f64_x_tied1: ++** fadd z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_0p5_f64_x_tied1, svfloat64_t, ++ z0 = svadd_n_f64_x (p0, z0, 0.5), ++ z0 = svadd_x (p0, z0, 0.5)) ++ ++/* ++** add_0p5_f64_x_untied: ++** movprfx z0, z1 ++** fadd z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_0p5_f64_x_untied, svfloat64_t, ++ z0 = svadd_n_f64_x (p0, z1, 0.5), ++ z0 = svadd_x (p0, z1, 0.5)) ++ ++/* ++** add_m1_f64_x_tied1: ++** fsub z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m1_f64_x_tied1, svfloat64_t, ++ z0 = svadd_n_f64_x (p0, z0, -1), ++ z0 = svadd_x (p0, z0, -1)) ++ ++/* ++** add_m1_f64_x_untied: ++** movprfx z0, z1 ++** fsub z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m1_f64_x_untied, svfloat64_t, ++ z0 = svadd_n_f64_x (p0, z1, -1), ++ z0 = svadd_x (p0, z1, -1)) ++ ++/* ++** add_m0p5_f64_x_tied1: ++** fsub z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m0p5_f64_x_tied1, svfloat64_t, ++ z0 = svadd_n_f64_x (p0, z0, -0.5), ++ z0 = svadd_x (p0, z0, -0.5)) ++ ++/* ++** add_m0p5_f64_x_untied: ++** movprfx z0, z1 ++** fsub z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m0p5_f64_x_untied, svfloat64_t, ++ z0 = svadd_n_f64_x (p0, z1, -0.5), ++ z0 = svadd_x (p0, z1, -0.5)) ++ ++/* ++** add_2_f64_x_tied1: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** fadd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_2_f64_x_tied1, svfloat64_t, ++ z0 = svadd_n_f64_x (p0, z0, 2), ++ z0 = svadd_x (p0, z0, 2)) ++ ++/* ++** add_2_f64_x_untied: ++** fmov z0\.d, #2\.0(?:e\+0)? ++** fadd z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (add_2_f64_x_untied, svfloat64_t, ++ z0 = svadd_n_f64_x (p0, z1, 2), ++ z0 = svadd_x (p0, z1, 2)) ++ ++/* ++** ptrue_add_f64_x_tied1: ++** fadd z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_f64_x_tied1, svfloat64_t, ++ z0 = svadd_f64_x (svptrue_b64 (), z0, z1), ++ z0 = svadd_x (svptrue_b64 (), z0, z1)) ++ ++/* ++** ptrue_add_f64_x_tied2: ++** fadd z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_f64_x_tied2, svfloat64_t, ++ z0 = svadd_f64_x (svptrue_b64 (), z1, z0), ++ z0 = svadd_x (svptrue_b64 (), z1, z0)) ++ ++/* ++** ptrue_add_f64_x_untied: ++** fadd z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_f64_x_untied, svfloat64_t, ++ z0 = svadd_f64_x (svptrue_b64 (), z1, z2), ++ z0 = svadd_x (svptrue_b64 (), z1, z2)) ++ ++/* ++** ptrue_add_1_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_1_f64_x_tied1, svfloat64_t, ++ z0 = svadd_n_f64_x (svptrue_b64 (), z0, 1), ++ z0 = svadd_x (svptrue_b64 (), z0, 1)) ++ ++/* ++** ptrue_add_1_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_1_f64_x_untied, svfloat64_t, ++ z0 = svadd_n_f64_x (svptrue_b64 (), z1, 1), ++ z0 = svadd_x (svptrue_b64 (), z1, 1)) ++ ++/* ++** ptrue_add_0p5_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_0p5_f64_x_tied1, svfloat64_t, ++ z0 = svadd_n_f64_x (svptrue_b64 (), z0, 0.5), ++ z0 = svadd_x (svptrue_b64 (), z0, 0.5)) ++ ++/* ++** ptrue_add_0p5_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_0p5_f64_x_untied, svfloat64_t, ++ z0 = svadd_n_f64_x (svptrue_b64 (), z1, 0.5), ++ z0 = svadd_x (svptrue_b64 (), z1, 0.5)) ++ ++/* ++** ptrue_add_m1_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_m1_f64_x_tied1, svfloat64_t, ++ z0 = svadd_n_f64_x (svptrue_b64 (), z0, -1), ++ z0 = svadd_x (svptrue_b64 (), z0, -1)) ++ ++/* ++** ptrue_add_m1_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_m1_f64_x_untied, svfloat64_t, ++ z0 = svadd_n_f64_x (svptrue_b64 (), z1, -1), ++ z0 = svadd_x (svptrue_b64 (), z1, -1)) ++ ++/* ++** ptrue_add_m0p5_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_m0p5_f64_x_tied1, svfloat64_t, ++ z0 = svadd_n_f64_x (svptrue_b64 (), z0, -0.5), ++ z0 = svadd_x (svptrue_b64 (), z0, -0.5)) ++ ++/* ++** ptrue_add_m0p5_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_m0p5_f64_x_untied, svfloat64_t, ++ z0 = svadd_n_f64_x (svptrue_b64 (), z1, -0.5), ++ z0 = svadd_x (svptrue_b64 (), z1, -0.5)) ++ ++/* ++** ptrue_add_2_f64_x_tied1: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** fadd z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_2_f64_x_tied1, svfloat64_t, ++ z0 = svadd_n_f64_x (svptrue_b64 (), z0, 2), ++ z0 = svadd_x (svptrue_b64 (), z0, 2)) ++ ++/* ++** ptrue_add_2_f64_x_untied: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** fadd z0\.d, (z1\.d, \1|\1, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_2_f64_x_untied, svfloat64_t, ++ z0 = svadd_n_f64_x (svptrue_b64 (), z1, 2), ++ z0 = svadd_x (svptrue_b64 (), z1, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_f64_notrap.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_f64_notrap.c +new file mode 100644 +index 000000000..6d095b507 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_f64_notrap.c +@@ -0,0 +1,572 @@ ++/* { dg-additional-options "-fno-trapping-math" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** add_f64_m_tied1: ++** fadd z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (add_f64_m_tied1, svfloat64_t, ++ z0 = svadd_f64_m (p0, z0, z1), ++ z0 = svadd_m (p0, z0, z1)) ++ ++/* ++** add_f64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fadd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_f64_m_tied2, svfloat64_t, ++ z0 = svadd_f64_m (p0, z1, z0), ++ z0 = svadd_m (p0, z1, z0)) ++ ++/* ++** add_f64_m_untied: ++** movprfx z0, z1 ++** fadd z0\.d, p0/m, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (add_f64_m_untied, svfloat64_t, ++ z0 = svadd_f64_m (p0, z1, z2), ++ z0 = svadd_m (p0, z1, z2)) ++ ++/* ++** add_d4_f64_m_tied1: ++** mov (z[0-9]+\.d), d4 ++** fadd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (add_d4_f64_m_tied1, svfloat64_t, double, ++ z0 = svadd_n_f64_m (p0, z0, d4), ++ z0 = svadd_m (p0, z0, d4)) ++ ++/* ++** add_d4_f64_m_untied: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0, z1 ++** fadd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (add_d4_f64_m_untied, svfloat64_t, double, ++ z0 = svadd_n_f64_m (p0, z1, d4), ++ z0 = svadd_m (p0, z1, d4)) ++ ++/* ++** add_1_f64_m_tied1: ++** fadd z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_f64_m_tied1, svfloat64_t, ++ z0 = svadd_n_f64_m (p0, z0, 1), ++ z0 = svadd_m (p0, z0, 1)) ++ ++/* ++** add_1_f64_m_untied: ++** movprfx z0, z1 ++** fadd z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_f64_m_untied, svfloat64_t, ++ z0 = svadd_n_f64_m (p0, z1, 1), ++ z0 = svadd_m (p0, z1, 1)) ++ ++/* ++** add_0p5_f64_m_tied1: ++** fadd z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_0p5_f64_m_tied1, svfloat64_t, ++ z0 = svadd_n_f64_m (p0, z0, 0.5), ++ z0 = svadd_m (p0, z0, 0.5)) ++ ++/* ++** add_0p5_f64_m_untied: ++** movprfx z0, z1 ++** fadd z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_0p5_f64_m_untied, svfloat64_t, ++ z0 = svadd_n_f64_m (p0, z1, 0.5), ++ z0 = svadd_m (p0, z1, 0.5)) ++ ++/* ++** add_m1_f64_m_tied1: ++** fsub z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m1_f64_m_tied1, svfloat64_t, ++ z0 = svadd_n_f64_m (p0, z0, -1), ++ z0 = svadd_m (p0, z0, -1)) ++ ++/* ++** add_m1_f64_m_untied: ++** movprfx z0, z1 ++** fsub z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m1_f64_m_untied, svfloat64_t, ++ z0 = svadd_n_f64_m (p0, z1, -1), ++ z0 = svadd_m (p0, z1, -1)) ++ ++/* ++** add_m0p5_f64_m_tied1: ++** fsub z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m0p5_f64_m_tied1, svfloat64_t, ++ z0 = svadd_n_f64_m (p0, z0, -0.5), ++ z0 = svadd_m (p0, z0, -0.5)) ++ ++/* ++** add_m0p5_f64_m_untied: ++** movprfx z0, z1 ++** fsub z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m0p5_f64_m_untied, svfloat64_t, ++ z0 = svadd_n_f64_m (p0, z1, -0.5), ++ z0 = svadd_m (p0, z1, -0.5)) ++ ++/* ++** add_m2_f64_m: ++** fmov (z[0-9]+\.d), #-2\.0(?:e\+0)? ++** fadd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m2_f64_m, svfloat64_t, ++ z0 = svadd_n_f64_m (p0, z0, -2), ++ z0 = svadd_m (p0, z0, -2)) ++ ++/* ++** add_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fadd z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (add_f64_z_tied1, svfloat64_t, ++ z0 = svadd_f64_z (p0, z0, z1), ++ z0 = svadd_z (p0, z0, z1)) ++ ++/* ++** add_f64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** fadd z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (add_f64_z_tied2, svfloat64_t, ++ z0 = svadd_f64_z (p0, z1, z0), ++ z0 = svadd_z (p0, z1, z0)) ++ ++/* ++** add_f64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fadd z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** fadd z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (add_f64_z_untied, svfloat64_t, ++ z0 = svadd_f64_z (p0, z1, z2), ++ z0 = svadd_z (p0, z1, z2)) ++ ++/* ++** add_d4_f64_z_tied1: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0\.d, p0/z, z0\.d ++** fadd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (add_d4_f64_z_tied1, svfloat64_t, double, ++ z0 = svadd_n_f64_z (p0, z0, d4), ++ z0 = svadd_z (p0, z0, d4)) ++ ++/* ++** add_d4_f64_z_untied: ++** mov (z[0-9]+\.d), d4 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fadd z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** fadd z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (add_d4_f64_z_untied, svfloat64_t, double, ++ z0 = svadd_n_f64_z (p0, z1, d4), ++ z0 = svadd_z (p0, z1, d4)) ++ ++/* ++** add_1_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fadd z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_f64_z_tied1, svfloat64_t, ++ z0 = svadd_n_f64_z (p0, z0, 1), ++ z0 = svadd_z (p0, z0, 1)) ++ ++/* ++** add_1_f64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** fadd z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_f64_z_untied, svfloat64_t, ++ z0 = svadd_n_f64_z (p0, z1, 1), ++ z0 = svadd_z (p0, z1, 1)) ++ ++/* ++** add_0p5_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fadd z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_0p5_f64_z_tied1, svfloat64_t, ++ z0 = svadd_n_f64_z (p0, z0, 0.5), ++ z0 = svadd_z (p0, z0, 0.5)) ++ ++/* ++** add_0p5_f64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** fadd z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_0p5_f64_z_untied, svfloat64_t, ++ z0 = svadd_n_f64_z (p0, z1, 0.5), ++ z0 = svadd_z (p0, z1, 0.5)) ++ ++/* ++** add_m1_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fsub z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m1_f64_z_tied1, svfloat64_t, ++ z0 = svadd_n_f64_z (p0, z0, -1), ++ z0 = svadd_z (p0, z0, -1)) ++ ++/* ++** add_m1_f64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** fsub z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m1_f64_z_untied, svfloat64_t, ++ z0 = svadd_n_f64_z (p0, z1, -1), ++ z0 = svadd_z (p0, z1, -1)) ++ ++/* ++** add_m0p5_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fsub z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m0p5_f64_z_tied1, svfloat64_t, ++ z0 = svadd_n_f64_z (p0, z0, -0.5), ++ z0 = svadd_z (p0, z0, -0.5)) ++ ++/* ++** add_m0p5_f64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** fsub z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m0p5_f64_z_untied, svfloat64_t, ++ z0 = svadd_n_f64_z (p0, z1, -0.5), ++ z0 = svadd_z (p0, z1, -0.5)) ++ ++/* ++** add_m2_f64_z: ++** fmov (z[0-9]+\.d), #-2\.0(?:e\+0)? ++** movprfx z0\.d, p0/z, z0\.d ++** fadd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m2_f64_z, svfloat64_t, ++ z0 = svadd_n_f64_z (p0, z0, -2), ++ z0 = svadd_z (p0, z0, -2)) ++ ++/* ++** add_f64_x_tied1: ++** fadd z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (add_f64_x_tied1, svfloat64_t, ++ z0 = svadd_f64_x (p0, z0, z1), ++ z0 = svadd_x (p0, z0, z1)) ++ ++/* ++** add_f64_x_tied2: ++** fadd z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (add_f64_x_tied2, svfloat64_t, ++ z0 = svadd_f64_x (p0, z1, z0), ++ z0 = svadd_x (p0, z1, z0)) ++ ++/* ++** add_f64_x_untied: ++** fadd z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (add_f64_x_untied, svfloat64_t, ++ z0 = svadd_f64_x (p0, z1, z2), ++ z0 = svadd_x (p0, z1, z2)) ++ ++/* ++** add_d4_f64_x_tied1: ++** mov (z[0-9]+\.d), d4 ++** fadd z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_ZD (add_d4_f64_x_tied1, svfloat64_t, double, ++ z0 = svadd_n_f64_x (p0, z0, d4), ++ z0 = svadd_x (p0, z0, d4)) ++ ++/* ++** add_d4_f64_x_untied: ++** mov (z[0-9]+\.d), d4 ++** fadd z0\.d, (z1\.d, \1|\1, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_ZD (add_d4_f64_x_untied, svfloat64_t, double, ++ z0 = svadd_n_f64_x (p0, z1, d4), ++ z0 = svadd_x (p0, z1, d4)) ++ ++/* ++** add_1_f64_x_tied1: ++** fadd z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_f64_x_tied1, svfloat64_t, ++ z0 = svadd_n_f64_x (p0, z0, 1), ++ z0 = svadd_x (p0, z0, 1)) ++ ++/* ++** add_1_f64_x_untied: ++** movprfx z0, z1 ++** fadd z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_f64_x_untied, svfloat64_t, ++ z0 = svadd_n_f64_x (p0, z1, 1), ++ z0 = svadd_x (p0, z1, 1)) ++ ++/* ++** add_0p5_f64_x_tied1: ++** fadd z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_0p5_f64_x_tied1, svfloat64_t, ++ z0 = svadd_n_f64_x (p0, z0, 0.5), ++ z0 = svadd_x (p0, z0, 0.5)) ++ ++/* ++** add_0p5_f64_x_untied: ++** movprfx z0, z1 ++** fadd z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_0p5_f64_x_untied, svfloat64_t, ++ z0 = svadd_n_f64_x (p0, z1, 0.5), ++ z0 = svadd_x (p0, z1, 0.5)) ++ ++/* ++** add_m1_f64_x_tied1: ++** fsub z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m1_f64_x_tied1, svfloat64_t, ++ z0 = svadd_n_f64_x (p0, z0, -1), ++ z0 = svadd_x (p0, z0, -1)) ++ ++/* ++** add_m1_f64_x_untied: ++** movprfx z0, z1 ++** fsub z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m1_f64_x_untied, svfloat64_t, ++ z0 = svadd_n_f64_x (p0, z1, -1), ++ z0 = svadd_x (p0, z1, -1)) ++ ++/* ++** add_m0p5_f64_x_tied1: ++** fsub z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m0p5_f64_x_tied1, svfloat64_t, ++ z0 = svadd_n_f64_x (p0, z0, -0.5), ++ z0 = svadd_x (p0, z0, -0.5)) ++ ++/* ++** add_m0p5_f64_x_untied: ++** movprfx z0, z1 ++** fsub z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m0p5_f64_x_untied, svfloat64_t, ++ z0 = svadd_n_f64_x (p0, z1, -0.5), ++ z0 = svadd_x (p0, z1, -0.5)) ++ ++/* ++** add_2_f64_x_tied1: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** fadd z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (add_2_f64_x_tied1, svfloat64_t, ++ z0 = svadd_n_f64_x (p0, z0, 2), ++ z0 = svadd_x (p0, z0, 2)) ++ ++/* ++** add_2_f64_x_untied: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** fadd z0\.d, (z1\.d, \1|\1, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (add_2_f64_x_untied, svfloat64_t, ++ z0 = svadd_n_f64_x (p0, z1, 2), ++ z0 = svadd_x (p0, z1, 2)) ++ ++/* ++** ptrue_add_f64_x_tied1: ++** fadd z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_f64_x_tied1, svfloat64_t, ++ z0 = svadd_f64_x (svptrue_b64 (), z0, z1), ++ z0 = svadd_x (svptrue_b64 (), z0, z1)) ++ ++/* ++** ptrue_add_f64_x_tied2: ++** fadd z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_f64_x_tied2, svfloat64_t, ++ z0 = svadd_f64_x (svptrue_b64 (), z1, z0), ++ z0 = svadd_x (svptrue_b64 (), z1, z0)) ++ ++/* ++** ptrue_add_f64_x_untied: ++** fadd z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_f64_x_untied, svfloat64_t, ++ z0 = svadd_f64_x (svptrue_b64 (), z1, z2), ++ z0 = svadd_x (svptrue_b64 (), z1, z2)) ++ ++/* ++** ptrue_add_1_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_1_f64_x_tied1, svfloat64_t, ++ z0 = svadd_n_f64_x (svptrue_b64 (), z0, 1), ++ z0 = svadd_x (svptrue_b64 (), z0, 1)) ++ ++/* ++** ptrue_add_1_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_1_f64_x_untied, svfloat64_t, ++ z0 = svadd_n_f64_x (svptrue_b64 (), z1, 1), ++ z0 = svadd_x (svptrue_b64 (), z1, 1)) ++ ++/* ++** ptrue_add_0p5_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_0p5_f64_x_tied1, svfloat64_t, ++ z0 = svadd_n_f64_x (svptrue_b64 (), z0, 0.5), ++ z0 = svadd_x (svptrue_b64 (), z0, 0.5)) ++ ++/* ++** ptrue_add_0p5_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_0p5_f64_x_untied, svfloat64_t, ++ z0 = svadd_n_f64_x (svptrue_b64 (), z1, 0.5), ++ z0 = svadd_x (svptrue_b64 (), z1, 0.5)) ++ ++/* ++** ptrue_add_m1_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_m1_f64_x_tied1, svfloat64_t, ++ z0 = svadd_n_f64_x (svptrue_b64 (), z0, -1), ++ z0 = svadd_x (svptrue_b64 (), z0, -1)) ++ ++/* ++** ptrue_add_m1_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_m1_f64_x_untied, svfloat64_t, ++ z0 = svadd_n_f64_x (svptrue_b64 (), z1, -1), ++ z0 = svadd_x (svptrue_b64 (), z1, -1)) ++ ++/* ++** ptrue_add_m0p5_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_m0p5_f64_x_tied1, svfloat64_t, ++ z0 = svadd_n_f64_x (svptrue_b64 (), z0, -0.5), ++ z0 = svadd_x (svptrue_b64 (), z0, -0.5)) ++ ++/* ++** ptrue_add_m0p5_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_m0p5_f64_x_untied, svfloat64_t, ++ z0 = svadd_n_f64_x (svptrue_b64 (), z1, -0.5), ++ z0 = svadd_x (svptrue_b64 (), z1, -0.5)) ++ ++/* ++** ptrue_add_2_f64_x_tied1: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** fadd z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_2_f64_x_tied1, svfloat64_t, ++ z0 = svadd_n_f64_x (svptrue_b64 (), z0, 2), ++ z0 = svadd_x (svptrue_b64 (), z0, 2)) ++ ++/* ++** ptrue_add_2_f64_x_untied: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** fadd z0\.d, (z1\.d, \1|\1, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_add_2_f64_x_untied, svfloat64_t, ++ z0 = svadd_n_f64_x (svptrue_b64 (), z1, 2), ++ z0 = svadd_x (svptrue_b64 (), z1, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_s16.c +new file mode 100644 +index 000000000..c0883edf9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_s16.c +@@ -0,0 +1,377 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** add_s16_m_tied1: ++** add z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (add_s16_m_tied1, svint16_t, ++ z0 = svadd_s16_m (p0, z0, z1), ++ z0 = svadd_m (p0, z0, z1)) ++ ++/* ++** add_s16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** add z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (add_s16_m_tied2, svint16_t, ++ z0 = svadd_s16_m (p0, z1, z0), ++ z0 = svadd_m (p0, z1, z0)) ++ ++/* ++** add_s16_m_untied: ++** movprfx z0, z1 ++** add z0\.h, p0/m, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (add_s16_m_untied, svint16_t, ++ z0 = svadd_s16_m (p0, z1, z2), ++ z0 = svadd_m (p0, z1, z2)) ++ ++/* ++** add_w0_s16_m_tied1: ++** mov (z[0-9]+\.h), w0 ++** add z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (add_w0_s16_m_tied1, svint16_t, int16_t, ++ z0 = svadd_n_s16_m (p0, z0, x0), ++ z0 = svadd_m (p0, z0, x0)) ++ ++/* ++** add_w0_s16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), w0 ++** movprfx z0, z1 ++** add z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (add_w0_s16_m_untied, svint16_t, int16_t, ++ z0 = svadd_n_s16_m (p0, z1, x0), ++ z0 = svadd_m (p0, z1, x0)) ++ ++/* ++** add_1_s16_m_tied1: ++** mov (z[0-9]+\.h), #1 ++** add z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_s16_m_tied1, svint16_t, ++ z0 = svadd_n_s16_m (p0, z0, 1), ++ z0 = svadd_m (p0, z0, 1)) ++ ++/* ++** add_1_s16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), #1 ++** movprfx z0, z1 ++** add z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_s16_m_untied, svint16_t, ++ z0 = svadd_n_s16_m (p0, z1, 1), ++ z0 = svadd_m (p0, z1, 1)) ++ ++/* ++** add_m2_s16_m: ++** mov (z[0-9]+\.h), #-2 ++** add z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m2_s16_m, svint16_t, ++ z0 = svadd_n_s16_m (p0, z0, -2), ++ z0 = svadd_m (p0, z0, -2)) ++ ++/* ++** add_s16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** add z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (add_s16_z_tied1, svint16_t, ++ z0 = svadd_s16_z (p0, z0, z1), ++ z0 = svadd_z (p0, z0, z1)) ++ ++/* ++** add_s16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** add z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (add_s16_z_tied2, svint16_t, ++ z0 = svadd_s16_z (p0, z1, z0), ++ z0 = svadd_z (p0, z1, z0)) ++ ++/* ++** add_s16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** add z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** add z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (add_s16_z_untied, svint16_t, ++ z0 = svadd_s16_z (p0, z1, z2), ++ z0 = svadd_z (p0, z1, z2)) ++ ++/* ++** add_w0_s16_z_tied1: ++** mov (z[0-9]+\.h), w0 ++** movprfx z0\.h, p0/z, z0\.h ++** add z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (add_w0_s16_z_tied1, svint16_t, int16_t, ++ z0 = svadd_n_s16_z (p0, z0, x0), ++ z0 = svadd_z (p0, z0, x0)) ++ ++/* ++** add_w0_s16_z_untied: ++** mov (z[0-9]+\.h), w0 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** add z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** add z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (add_w0_s16_z_untied, svint16_t, int16_t, ++ z0 = svadd_n_s16_z (p0, z1, x0), ++ z0 = svadd_z (p0, z1, x0)) ++ ++/* ++** add_1_s16_z_tied1: ++** mov (z[0-9]+\.h), #1 ++** movprfx z0\.h, p0/z, z0\.h ++** add z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_s16_z_tied1, svint16_t, ++ z0 = svadd_n_s16_z (p0, z0, 1), ++ z0 = svadd_z (p0, z0, 1)) ++ ++/* ++** add_1_s16_z_untied: ++** mov (z[0-9]+\.h), #1 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** add z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** add z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_s16_z_untied, svint16_t, ++ z0 = svadd_n_s16_z (p0, z1, 1), ++ z0 = svadd_z (p0, z1, 1)) ++ ++/* ++** add_s16_x_tied1: ++** add z0\.h, (z0\.h, z1\.h|z1\.h, z0\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (add_s16_x_tied1, svint16_t, ++ z0 = svadd_s16_x (p0, z0, z1), ++ z0 = svadd_x (p0, z0, z1)) ++ ++/* ++** add_s16_x_tied2: ++** add z0\.h, (z0\.h, z1\.h|z1\.h, z0\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (add_s16_x_tied2, svint16_t, ++ z0 = svadd_s16_x (p0, z1, z0), ++ z0 = svadd_x (p0, z1, z0)) ++ ++/* ++** add_s16_x_untied: ++** add z0\.h, (z1\.h, z2\.h|z2\.h, z1\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (add_s16_x_untied, svint16_t, ++ z0 = svadd_s16_x (p0, z1, z2), ++ z0 = svadd_x (p0, z1, z2)) ++ ++/* ++** add_w0_s16_x_tied1: ++** mov (z[0-9]+\.h), w0 ++** add z0\.h, (z0\.h, \1|\1, z0\.h) ++** ret ++*/ ++TEST_UNIFORM_ZX (add_w0_s16_x_tied1, svint16_t, int16_t, ++ z0 = svadd_n_s16_x (p0, z0, x0), ++ z0 = svadd_x (p0, z0, x0)) ++ ++/* ++** add_w0_s16_x_untied: ++** mov (z[0-9]+\.h), w0 ++** add z0\.h, (z1\.h, \1|\1, z1\.h) ++** ret ++*/ ++TEST_UNIFORM_ZX (add_w0_s16_x_untied, svint16_t, int16_t, ++ z0 = svadd_n_s16_x (p0, z1, x0), ++ z0 = svadd_x (p0, z1, x0)) ++ ++/* ++** add_1_s16_x_tied1: ++** add z0\.h, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_s16_x_tied1, svint16_t, ++ z0 = svadd_n_s16_x (p0, z0, 1), ++ z0 = svadd_x (p0, z0, 1)) ++ ++/* ++** add_1_s16_x_untied: ++** movprfx z0, z1 ++** add z0\.h, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_s16_x_untied, svint16_t, ++ z0 = svadd_n_s16_x (p0, z1, 1), ++ z0 = svadd_x (p0, z1, 1)) ++ ++/* ++** add_127_s16_x: ++** add z0\.h, z0\.h, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (add_127_s16_x, svint16_t, ++ z0 = svadd_n_s16_x (p0, z0, 127), ++ z0 = svadd_x (p0, z0, 127)) ++ ++/* ++** add_128_s16_x: ++** add z0\.h, z0\.h, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (add_128_s16_x, svint16_t, ++ z0 = svadd_n_s16_x (p0, z0, 128), ++ z0 = svadd_x (p0, z0, 128)) ++ ++/* ++** add_255_s16_x: ++** add z0\.h, z0\.h, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (add_255_s16_x, svint16_t, ++ z0 = svadd_n_s16_x (p0, z0, 255), ++ z0 = svadd_x (p0, z0, 255)) ++ ++/* ++** add_256_s16_x: ++** add z0\.h, z0\.h, #256 ++** ret ++*/ ++TEST_UNIFORM_Z (add_256_s16_x, svint16_t, ++ z0 = svadd_n_s16_x (p0, z0, 256), ++ z0 = svadd_x (p0, z0, 256)) ++ ++/* ++** add_257_s16_x: ++** mov (z[0-9]+)\.b, #1 ++** add z0\.h, (z0\.h, \1\.h|\1\.h, z0\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (add_257_s16_x, svint16_t, ++ z0 = svadd_n_s16_x (p0, z0, 257), ++ z0 = svadd_x (p0, z0, 257)) ++ ++/* ++** add_512_s16_x: ++** add z0\.h, z0\.h, #512 ++** ret ++*/ ++TEST_UNIFORM_Z (add_512_s16_x, svint16_t, ++ z0 = svadd_n_s16_x (p0, z0, 512), ++ z0 = svadd_x (p0, z0, 512)) ++ ++/* ++** add_65280_s16_x: ++** add z0\.h, z0\.h, #65280 ++** ret ++*/ ++TEST_UNIFORM_Z (add_65280_s16_x, svint16_t, ++ z0 = svadd_n_s16_x (p0, z0, 0xff00), ++ z0 = svadd_x (p0, z0, 0xff00)) ++ ++/* ++** add_m1_s16_x: ++** sub z0\.h, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m1_s16_x, svint16_t, ++ z0 = svadd_n_s16_x (p0, z0, -1), ++ z0 = svadd_x (p0, z0, -1)) ++ ++/* ++** add_m127_s16_x: ++** sub z0\.h, z0\.h, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m127_s16_x, svint16_t, ++ z0 = svadd_n_s16_x (p0, z0, -127), ++ z0 = svadd_x (p0, z0, -127)) ++ ++/* ++** add_m128_s16_x: ++** sub z0\.h, z0\.h, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m128_s16_x, svint16_t, ++ z0 = svadd_n_s16_x (p0, z0, -128), ++ z0 = svadd_x (p0, z0, -128)) ++ ++/* ++** add_m255_s16_x: ++** sub z0\.h, z0\.h, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m255_s16_x, svint16_t, ++ z0 = svadd_n_s16_x (p0, z0, -255), ++ z0 = svadd_x (p0, z0, -255)) ++ ++/* ++** add_m256_s16_x: ++** add z0\.h, z0\.h, #65280 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m256_s16_x, svint16_t, ++ z0 = svadd_n_s16_x (p0, z0, -256), ++ z0 = svadd_x (p0, z0, -256)) ++ ++/* ++** add_m257_s16_x: ++** mov (z[0-9]+\.h), #-257 ++** add z0\.h, (z0\.h, \1|\1, z0\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (add_m257_s16_x, svint16_t, ++ z0 = svadd_n_s16_x (p0, z0, -257), ++ z0 = svadd_x (p0, z0, -257)) ++ ++/* ++** add_m512_s16_x: ++** add z0\.h, z0\.h, #65024 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m512_s16_x, svint16_t, ++ z0 = svadd_n_s16_x (p0, z0, -512), ++ z0 = svadd_x (p0, z0, -512)) ++ ++/* ++** add_m32768_s16_x: ++** add z0\.h, z0\.h, #32768 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m32768_s16_x, svint16_t, ++ z0 = svadd_n_s16_x (p0, z0, -0x8000), ++ z0 = svadd_x (p0, z0, -0x8000)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_s32.c +new file mode 100644 +index 000000000..887038ba3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_s32.c +@@ -0,0 +1,426 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** add_s32_m_tied1: ++** add z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (add_s32_m_tied1, svint32_t, ++ z0 = svadd_s32_m (p0, z0, z1), ++ z0 = svadd_m (p0, z0, z1)) ++ ++/* ++** add_s32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** add z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (add_s32_m_tied2, svint32_t, ++ z0 = svadd_s32_m (p0, z1, z0), ++ z0 = svadd_m (p0, z1, z0)) ++ ++/* ++** add_s32_m_untied: ++** movprfx z0, z1 ++** add z0\.s, p0/m, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (add_s32_m_untied, svint32_t, ++ z0 = svadd_s32_m (p0, z1, z2), ++ z0 = svadd_m (p0, z1, z2)) ++ ++/* ++** add_w0_s32_m_tied1: ++** mov (z[0-9]+\.s), w0 ++** add z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (add_w0_s32_m_tied1, svint32_t, int32_t, ++ z0 = svadd_n_s32_m (p0, z0, x0), ++ z0 = svadd_m (p0, z0, x0)) ++ ++/* ++** add_w0_s32_m_untied: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0, z1 ++** add z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (add_w0_s32_m_untied, svint32_t, int32_t, ++ z0 = svadd_n_s32_m (p0, z1, x0), ++ z0 = svadd_m (p0, z1, x0)) ++ ++/* ++** add_1_s32_m_tied1: ++** mov (z[0-9]+\.s), #1 ++** add z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_s32_m_tied1, svint32_t, ++ z0 = svadd_n_s32_m (p0, z0, 1), ++ z0 = svadd_m (p0, z0, 1)) ++ ++/* ++** add_1_s32_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.s), #1 ++** movprfx z0, z1 ++** add z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_s32_m_untied, svint32_t, ++ z0 = svadd_n_s32_m (p0, z1, 1), ++ z0 = svadd_m (p0, z1, 1)) ++ ++/* ++** add_m2_s32_m: ++** mov (z[0-9]+\.s), #-2 ++** add z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m2_s32_m, svint32_t, ++ z0 = svadd_n_s32_m (p0, z0, -2), ++ z0 = svadd_m (p0, z0, -2)) ++ ++/* ++** add_s32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** add z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (add_s32_z_tied1, svint32_t, ++ z0 = svadd_s32_z (p0, z0, z1), ++ z0 = svadd_z (p0, z0, z1)) ++ ++/* ++** add_s32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** add z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (add_s32_z_tied2, svint32_t, ++ z0 = svadd_s32_z (p0, z1, z0), ++ z0 = svadd_z (p0, z1, z0)) ++ ++/* ++** add_s32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** add z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** add z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (add_s32_z_untied, svint32_t, ++ z0 = svadd_s32_z (p0, z1, z2), ++ z0 = svadd_z (p0, z1, z2)) ++ ++/* ++** add_w0_s32_z_tied1: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0\.s, p0/z, z0\.s ++** add z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (add_w0_s32_z_tied1, svint32_t, int32_t, ++ z0 = svadd_n_s32_z (p0, z0, x0), ++ z0 = svadd_z (p0, z0, x0)) ++ ++/* ++** add_w0_s32_z_untied: ++** mov (z[0-9]+\.s), w0 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** add z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** add z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (add_w0_s32_z_untied, svint32_t, int32_t, ++ z0 = svadd_n_s32_z (p0, z1, x0), ++ z0 = svadd_z (p0, z1, x0)) ++ ++/* ++** add_1_s32_z_tied1: ++** mov (z[0-9]+\.s), #1 ++** movprfx z0\.s, p0/z, z0\.s ++** add z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_s32_z_tied1, svint32_t, ++ z0 = svadd_n_s32_z (p0, z0, 1), ++ z0 = svadd_z (p0, z0, 1)) ++ ++/* ++** add_1_s32_z_untied: ++** mov (z[0-9]+\.s), #1 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** add z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** add z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_s32_z_untied, svint32_t, ++ z0 = svadd_n_s32_z (p0, z1, 1), ++ z0 = svadd_z (p0, z1, 1)) ++ ++/* ++** add_s32_x_tied1: ++** add z0\.s, (z0\.s, z1\.s|z1\.s, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (add_s32_x_tied1, svint32_t, ++ z0 = svadd_s32_x (p0, z0, z1), ++ z0 = svadd_x (p0, z0, z1)) ++ ++/* ++** add_s32_x_tied2: ++** add z0\.s, (z0\.s, z1\.s|z1\.s, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (add_s32_x_tied2, svint32_t, ++ z0 = svadd_s32_x (p0, z1, z0), ++ z0 = svadd_x (p0, z1, z0)) ++ ++/* ++** add_s32_x_untied: ++** add z0\.s, (z1\.s, z2\.s|z2\.s, z1\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (add_s32_x_untied, svint32_t, ++ z0 = svadd_s32_x (p0, z1, z2), ++ z0 = svadd_x (p0, z1, z2)) ++ ++/* ++** add_w0_s32_x_tied1: ++** mov (z[0-9]+\.s), w0 ++** add z0\.s, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_ZX (add_w0_s32_x_tied1, svint32_t, int32_t, ++ z0 = svadd_n_s32_x (p0, z0, x0), ++ z0 = svadd_x (p0, z0, x0)) ++ ++/* ++** add_w0_s32_x_untied: ++** mov (z[0-9]+\.s), w0 ++** add z0\.s, (z1\.s, \1|\1, z1\.s) ++** ret ++*/ ++TEST_UNIFORM_ZX (add_w0_s32_x_untied, svint32_t, int32_t, ++ z0 = svadd_n_s32_x (p0, z1, x0), ++ z0 = svadd_x (p0, z1, x0)) ++ ++/* ++** add_1_s32_x_tied1: ++** add z0\.s, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_s32_x_tied1, svint32_t, ++ z0 = svadd_n_s32_x (p0, z0, 1), ++ z0 = svadd_x (p0, z0, 1)) ++ ++/* ++** add_1_s32_x_untied: ++** movprfx z0, z1 ++** add z0\.s, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_s32_x_untied, svint32_t, ++ z0 = svadd_n_s32_x (p0, z1, 1), ++ z0 = svadd_x (p0, z1, 1)) ++ ++/* ++** add_127_s32_x: ++** add z0\.s, z0\.s, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (add_127_s32_x, svint32_t, ++ z0 = svadd_n_s32_x (p0, z0, 127), ++ z0 = svadd_x (p0, z0, 127)) ++ ++/* ++** add_128_s32_x: ++** add z0\.s, z0\.s, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (add_128_s32_x, svint32_t, ++ z0 = svadd_n_s32_x (p0, z0, 128), ++ z0 = svadd_x (p0, z0, 128)) ++ ++/* ++** add_255_s32_x: ++** add z0\.s, z0\.s, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (add_255_s32_x, svint32_t, ++ z0 = svadd_n_s32_x (p0, z0, 255), ++ z0 = svadd_x (p0, z0, 255)) ++ ++/* ++** add_256_s32_x: ++** add z0\.s, z0\.s, #256 ++** ret ++*/ ++TEST_UNIFORM_Z (add_256_s32_x, svint32_t, ++ z0 = svadd_n_s32_x (p0, z0, 256), ++ z0 = svadd_x (p0, z0, 256)) ++ ++/* ++** add_511_s32_x: ++** mov (z[0-9]+\.s), #511 ++** add z0\.s, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (add_511_s32_x, svint32_t, ++ z0 = svadd_n_s32_x (p0, z0, 511), ++ z0 = svadd_x (p0, z0, 511)) ++ ++/* ++** add_512_s32_x: ++** add z0\.s, z0\.s, #512 ++** ret ++*/ ++TEST_UNIFORM_Z (add_512_s32_x, svint32_t, ++ z0 = svadd_n_s32_x (p0, z0, 512), ++ z0 = svadd_x (p0, z0, 512)) ++ ++/* ++** add_65280_s32_x: ++** add z0\.s, z0\.s, #65280 ++** ret ++*/ ++TEST_UNIFORM_Z (add_65280_s32_x, svint32_t, ++ z0 = svadd_n_s32_x (p0, z0, 0xff00), ++ z0 = svadd_x (p0, z0, 0xff00)) ++ ++/* ++** add_65535_s32_x: ++** mov (z[0-9]+\.s), #65535 ++** add z0\.s, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (add_65535_s32_x, svint32_t, ++ z0 = svadd_n_s32_x (p0, z0, 65535), ++ z0 = svadd_x (p0, z0, 65535)) ++ ++/* ++** add_65536_s32_x: ++** mov (z[0-9]+\.s), #65536 ++** add z0\.s, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (add_65536_s32_x, svint32_t, ++ z0 = svadd_n_s32_x (p0, z0, 65536), ++ z0 = svadd_x (p0, z0, 65536)) ++ ++/* ++** add_m1_s32_x: ++** sub z0\.s, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m1_s32_x, svint32_t, ++ z0 = svadd_n_s32_x (p0, z0, -1), ++ z0 = svadd_x (p0, z0, -1)) ++ ++/* ++** add_m127_s32_x: ++** sub z0\.s, z0\.s, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m127_s32_x, svint32_t, ++ z0 = svadd_n_s32_x (p0, z0, -127), ++ z0 = svadd_x (p0, z0, -127)) ++ ++/* ++** add_m128_s32_x: ++** sub z0\.s, z0\.s, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m128_s32_x, svint32_t, ++ z0 = svadd_n_s32_x (p0, z0, -128), ++ z0 = svadd_x (p0, z0, -128)) ++ ++/* ++** add_m255_s32_x: ++** sub z0\.s, z0\.s, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m255_s32_x, svint32_t, ++ z0 = svadd_n_s32_x (p0, z0, -255), ++ z0 = svadd_x (p0, z0, -255)) ++ ++/* ++** add_m256_s32_x: ++** sub z0\.s, z0\.s, #256 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m256_s32_x, svint32_t, ++ z0 = svadd_n_s32_x (p0, z0, -256), ++ z0 = svadd_x (p0, z0, -256)) ++ ++/* ++** add_m511_s32_x: ++** mov (z[0-9]+\.s), #-511 ++** add z0\.s, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (add_m511_s32_x, svint32_t, ++ z0 = svadd_n_s32_x (p0, z0, -511), ++ z0 = svadd_x (p0, z0, -511)) ++ ++/* ++** add_m512_s32_x: ++** sub z0\.s, z0\.s, #512 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m512_s32_x, svint32_t, ++ z0 = svadd_n_s32_x (p0, z0, -512), ++ z0 = svadd_x (p0, z0, -512)) ++ ++/* ++** add_m32768_s32_x: ++** sub z0\.s, z0\.s, #32768 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m32768_s32_x, svint32_t, ++ z0 = svadd_n_s32_x (p0, z0, -0x8000), ++ z0 = svadd_x (p0, z0, -0x8000)) ++ ++/* ++** add_m65280_s32_x: ++** sub z0\.s, z0\.s, #65280 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m65280_s32_x, svint32_t, ++ z0 = svadd_n_s32_x (p0, z0, -0xff00), ++ z0 = svadd_x (p0, z0, -0xff00)) ++ ++/* ++** add_m65535_s32_x: ++** mov (z[0-9]+\.s), #-65535 ++** add z0\.s, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (add_m65535_s32_x, svint32_t, ++ z0 = svadd_n_s32_x (p0, z0, -65535), ++ z0 = svadd_x (p0, z0, -65535)) ++ ++/* ++** add_m65536_s32_x: ++** mov (z[0-9]+\.s), #-65536 ++** add z0\.s, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (add_m65536_s32_x, svint32_t, ++ z0 = svadd_n_s32_x (p0, z0, -65536), ++ z0 = svadd_x (p0, z0, -65536)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_s64.c +new file mode 100644 +index 000000000..aab63ef62 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_s64.c +@@ -0,0 +1,426 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** add_s64_m_tied1: ++** add z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (add_s64_m_tied1, svint64_t, ++ z0 = svadd_s64_m (p0, z0, z1), ++ z0 = svadd_m (p0, z0, z1)) ++ ++/* ++** add_s64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** add z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_s64_m_tied2, svint64_t, ++ z0 = svadd_s64_m (p0, z1, z0), ++ z0 = svadd_m (p0, z1, z0)) ++ ++/* ++** add_s64_m_untied: ++** movprfx z0, z1 ++** add z0\.d, p0/m, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (add_s64_m_untied, svint64_t, ++ z0 = svadd_s64_m (p0, z1, z2), ++ z0 = svadd_m (p0, z1, z2)) ++ ++/* ++** add_x0_s64_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** add z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (add_x0_s64_m_tied1, svint64_t, int64_t, ++ z0 = svadd_n_s64_m (p0, z0, x0), ++ z0 = svadd_m (p0, z0, x0)) ++ ++/* ++** add_x0_s64_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** add z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (add_x0_s64_m_untied, svint64_t, int64_t, ++ z0 = svadd_n_s64_m (p0, z1, x0), ++ z0 = svadd_m (p0, z1, x0)) ++ ++/* ++** add_1_s64_m_tied1: ++** mov (z[0-9]+\.d), #1 ++** add z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_s64_m_tied1, svint64_t, ++ z0 = svadd_n_s64_m (p0, z0, 1), ++ z0 = svadd_m (p0, z0, 1)) ++ ++/* ++** add_1_s64_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), #1 ++** movprfx z0, z1 ++** add z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_s64_m_untied, svint64_t, ++ z0 = svadd_n_s64_m (p0, z1, 1), ++ z0 = svadd_m (p0, z1, 1)) ++ ++/* ++** add_m2_s64_m: ++** mov (z[0-9]+\.d), #-2 ++** add z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m2_s64_m, svint64_t, ++ z0 = svadd_n_s64_m (p0, z0, -2), ++ z0 = svadd_m (p0, z0, -2)) ++ ++/* ++** add_s64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** add z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (add_s64_z_tied1, svint64_t, ++ z0 = svadd_s64_z (p0, z0, z1), ++ z0 = svadd_z (p0, z0, z1)) ++ ++/* ++** add_s64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** add z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (add_s64_z_tied2, svint64_t, ++ z0 = svadd_s64_z (p0, z1, z0), ++ z0 = svadd_z (p0, z1, z0)) ++ ++/* ++** add_s64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** add z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** add z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (add_s64_z_untied, svint64_t, ++ z0 = svadd_s64_z (p0, z1, z2), ++ z0 = svadd_z (p0, z1, z2)) ++ ++/* ++** add_x0_s64_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.d, p0/z, z0\.d ++** add z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (add_x0_s64_z_tied1, svint64_t, int64_t, ++ z0 = svadd_n_s64_z (p0, z0, x0), ++ z0 = svadd_z (p0, z0, x0)) ++ ++/* ++** add_x0_s64_z_untied: ++** mov (z[0-9]+\.d), x0 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** add z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** add z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (add_x0_s64_z_untied, svint64_t, int64_t, ++ z0 = svadd_n_s64_z (p0, z1, x0), ++ z0 = svadd_z (p0, z1, x0)) ++ ++/* ++** add_1_s64_z_tied1: ++** mov (z[0-9]+\.d), #1 ++** movprfx z0\.d, p0/z, z0\.d ++** add z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_s64_z_tied1, svint64_t, ++ z0 = svadd_n_s64_z (p0, z0, 1), ++ z0 = svadd_z (p0, z0, 1)) ++ ++/* ++** add_1_s64_z_untied: ++** mov (z[0-9]+\.d), #1 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** add z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** add z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_s64_z_untied, svint64_t, ++ z0 = svadd_n_s64_z (p0, z1, 1), ++ z0 = svadd_z (p0, z1, 1)) ++ ++/* ++** add_s64_x_tied1: ++** add z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (add_s64_x_tied1, svint64_t, ++ z0 = svadd_s64_x (p0, z0, z1), ++ z0 = svadd_x (p0, z0, z1)) ++ ++/* ++** add_s64_x_tied2: ++** add z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (add_s64_x_tied2, svint64_t, ++ z0 = svadd_s64_x (p0, z1, z0), ++ z0 = svadd_x (p0, z1, z0)) ++ ++/* ++** add_s64_x_untied: ++** add z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (add_s64_x_untied, svint64_t, ++ z0 = svadd_s64_x (p0, z1, z2), ++ z0 = svadd_x (p0, z1, z2)) ++ ++/* ++** add_x0_s64_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** add z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (add_x0_s64_x_tied1, svint64_t, int64_t, ++ z0 = svadd_n_s64_x (p0, z0, x0), ++ z0 = svadd_x (p0, z0, x0)) ++ ++/* ++** add_x0_s64_x_untied: ++** mov (z[0-9]+\.d), x0 ++** add z0\.d, (z1\.d, \1|\1, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (add_x0_s64_x_untied, svint64_t, int64_t, ++ z0 = svadd_n_s64_x (p0, z1, x0), ++ z0 = svadd_x (p0, z1, x0)) ++ ++/* ++** add_1_s64_x_tied1: ++** add z0\.d, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_s64_x_tied1, svint64_t, ++ z0 = svadd_n_s64_x (p0, z0, 1), ++ z0 = svadd_x (p0, z0, 1)) ++ ++/* ++** add_1_s64_x_untied: ++** movprfx z0, z1 ++** add z0\.d, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_s64_x_untied, svint64_t, ++ z0 = svadd_n_s64_x (p0, z1, 1), ++ z0 = svadd_x (p0, z1, 1)) ++ ++/* ++** add_127_s64_x: ++** add z0\.d, z0\.d, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (add_127_s64_x, svint64_t, ++ z0 = svadd_n_s64_x (p0, z0, 127), ++ z0 = svadd_x (p0, z0, 127)) ++ ++/* ++** add_128_s64_x: ++** add z0\.d, z0\.d, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (add_128_s64_x, svint64_t, ++ z0 = svadd_n_s64_x (p0, z0, 128), ++ z0 = svadd_x (p0, z0, 128)) ++ ++/* ++** add_255_s64_x: ++** add z0\.d, z0\.d, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (add_255_s64_x, svint64_t, ++ z0 = svadd_n_s64_x (p0, z0, 255), ++ z0 = svadd_x (p0, z0, 255)) ++ ++/* ++** add_256_s64_x: ++** add z0\.d, z0\.d, #256 ++** ret ++*/ ++TEST_UNIFORM_Z (add_256_s64_x, svint64_t, ++ z0 = svadd_n_s64_x (p0, z0, 256), ++ z0 = svadd_x (p0, z0, 256)) ++ ++/* ++** add_511_s64_x: ++** mov (z[0-9]+\.d), #511 ++** add z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (add_511_s64_x, svint64_t, ++ z0 = svadd_n_s64_x (p0, z0, 511), ++ z0 = svadd_x (p0, z0, 511)) ++ ++/* ++** add_512_s64_x: ++** add z0\.d, z0\.d, #512 ++** ret ++*/ ++TEST_UNIFORM_Z (add_512_s64_x, svint64_t, ++ z0 = svadd_n_s64_x (p0, z0, 512), ++ z0 = svadd_x (p0, z0, 512)) ++ ++/* ++** add_65280_s64_x: ++** add z0\.d, z0\.d, #65280 ++** ret ++*/ ++TEST_UNIFORM_Z (add_65280_s64_x, svint64_t, ++ z0 = svadd_n_s64_x (p0, z0, 0xff00), ++ z0 = svadd_x (p0, z0, 0xff00)) ++ ++/* ++** add_65535_s64_x: ++** mov (z[0-9]+\.d), #65535 ++** add z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (add_65535_s64_x, svint64_t, ++ z0 = svadd_n_s64_x (p0, z0, 65535), ++ z0 = svadd_x (p0, z0, 65535)) ++ ++/* ++** add_65536_s64_x: ++** mov (z[0-9]+\.d), #65536 ++** add z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (add_65536_s64_x, svint64_t, ++ z0 = svadd_n_s64_x (p0, z0, 65536), ++ z0 = svadd_x (p0, z0, 65536)) ++ ++/* ++** add_m1_s64_x: ++** sub z0\.d, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m1_s64_x, svint64_t, ++ z0 = svadd_n_s64_x (p0, z0, -1), ++ z0 = svadd_x (p0, z0, -1)) ++ ++/* ++** add_m127_s64_x: ++** sub z0\.d, z0\.d, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m127_s64_x, svint64_t, ++ z0 = svadd_n_s64_x (p0, z0, -127), ++ z0 = svadd_x (p0, z0, -127)) ++ ++/* ++** add_m128_s64_x: ++** sub z0\.d, z0\.d, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m128_s64_x, svint64_t, ++ z0 = svadd_n_s64_x (p0, z0, -128), ++ z0 = svadd_x (p0, z0, -128)) ++ ++/* ++** add_m255_s64_x: ++** sub z0\.d, z0\.d, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m255_s64_x, svint64_t, ++ z0 = svadd_n_s64_x (p0, z0, -255), ++ z0 = svadd_x (p0, z0, -255)) ++ ++/* ++** add_m256_s64_x: ++** sub z0\.d, z0\.d, #256 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m256_s64_x, svint64_t, ++ z0 = svadd_n_s64_x (p0, z0, -256), ++ z0 = svadd_x (p0, z0, -256)) ++ ++/* ++** add_m511_s64_x: ++** mov (z[0-9]+\.d), #-511 ++** add z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (add_m511_s64_x, svint64_t, ++ z0 = svadd_n_s64_x (p0, z0, -511), ++ z0 = svadd_x (p0, z0, -511)) ++ ++/* ++** add_m512_s64_x: ++** sub z0\.d, z0\.d, #512 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m512_s64_x, svint64_t, ++ z0 = svadd_n_s64_x (p0, z0, -512), ++ z0 = svadd_x (p0, z0, -512)) ++ ++/* ++** add_m32768_s64_x: ++** sub z0\.d, z0\.d, #32768 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m32768_s64_x, svint64_t, ++ z0 = svadd_n_s64_x (p0, z0, -0x8000), ++ z0 = svadd_x (p0, z0, -0x8000)) ++ ++/* ++** add_m65280_s64_x: ++** sub z0\.d, z0\.d, #65280 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m65280_s64_x, svint64_t, ++ z0 = svadd_n_s64_x (p0, z0, -0xff00), ++ z0 = svadd_x (p0, z0, -0xff00)) ++ ++/* ++** add_m65535_s64_x: ++** mov (z[0-9]+\.d), #-65535 ++** add z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (add_m65535_s64_x, svint64_t, ++ z0 = svadd_n_s64_x (p0, z0, -65535), ++ z0 = svadd_x (p0, z0, -65535)) ++ ++/* ++** add_m65536_s64_x: ++** mov (z[0-9]+\.d), #-65536 ++** add z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (add_m65536_s64_x, svint64_t, ++ z0 = svadd_n_s64_x (p0, z0, -65536), ++ z0 = svadd_x (p0, z0, -65536)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_s8.c +new file mode 100644 +index 000000000..0889c189d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_s8.c +@@ -0,0 +1,294 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** add_s8_m_tied1: ++** add z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (add_s8_m_tied1, svint8_t, ++ z0 = svadd_s8_m (p0, z0, z1), ++ z0 = svadd_m (p0, z0, z1)) ++ ++/* ++** add_s8_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** add z0\.b, p0/m, z0\.b, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (add_s8_m_tied2, svint8_t, ++ z0 = svadd_s8_m (p0, z1, z0), ++ z0 = svadd_m (p0, z1, z0)) ++ ++/* ++** add_s8_m_untied: ++** movprfx z0, z1 ++** add z0\.b, p0/m, z0\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (add_s8_m_untied, svint8_t, ++ z0 = svadd_s8_m (p0, z1, z2), ++ z0 = svadd_m (p0, z1, z2)) ++ ++/* ++** add_w0_s8_m_tied1: ++** mov (z[0-9]+\.b), w0 ++** add z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (add_w0_s8_m_tied1, svint8_t, int8_t, ++ z0 = svadd_n_s8_m (p0, z0, x0), ++ z0 = svadd_m (p0, z0, x0)) ++ ++/* ++** add_w0_s8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), w0 ++** movprfx z0, z1 ++** add z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (add_w0_s8_m_untied, svint8_t, int8_t, ++ z0 = svadd_n_s8_m (p0, z1, x0), ++ z0 = svadd_m (p0, z1, x0)) ++ ++/* ++** add_1_s8_m_tied1: ++** mov (z[0-9]+\.b), #1 ++** add z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_s8_m_tied1, svint8_t, ++ z0 = svadd_n_s8_m (p0, z0, 1), ++ z0 = svadd_m (p0, z0, 1)) ++ ++/* ++** add_1_s8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), #1 ++** movprfx z0, z1 ++** add z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_s8_m_untied, svint8_t, ++ z0 = svadd_n_s8_m (p0, z1, 1), ++ z0 = svadd_m (p0, z1, 1)) ++ ++/* ++** add_m1_s8_m: ++** mov (z[0-9]+\.b), #-1 ++** add z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m1_s8_m, svint8_t, ++ z0 = svadd_n_s8_m (p0, z0, -1), ++ z0 = svadd_m (p0, z0, -1)) ++ ++/* ++** add_s8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** add z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (add_s8_z_tied1, svint8_t, ++ z0 = svadd_s8_z (p0, z0, z1), ++ z0 = svadd_z (p0, z0, z1)) ++ ++/* ++** add_s8_z_tied2: ++** movprfx z0\.b, p0/z, z0\.b ++** add z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (add_s8_z_tied2, svint8_t, ++ z0 = svadd_s8_z (p0, z1, z0), ++ z0 = svadd_z (p0, z1, z0)) ++ ++/* ++** add_s8_z_untied: ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** add z0\.b, p0/m, z0\.b, z2\.b ++** | ++** movprfx z0\.b, p0/z, z2\.b ++** add z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (add_s8_z_untied, svint8_t, ++ z0 = svadd_s8_z (p0, z1, z2), ++ z0 = svadd_z (p0, z1, z2)) ++ ++/* ++** add_w0_s8_z_tied1: ++** mov (z[0-9]+\.b), w0 ++** movprfx z0\.b, p0/z, z0\.b ++** add z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (add_w0_s8_z_tied1, svint8_t, int8_t, ++ z0 = svadd_n_s8_z (p0, z0, x0), ++ z0 = svadd_z (p0, z0, x0)) ++ ++/* ++** add_w0_s8_z_untied: ++** mov (z[0-9]+\.b), w0 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** add z0\.b, p0/m, z0\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** add z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (add_w0_s8_z_untied, svint8_t, int8_t, ++ z0 = svadd_n_s8_z (p0, z1, x0), ++ z0 = svadd_z (p0, z1, x0)) ++ ++/* ++** add_1_s8_z_tied1: ++** mov (z[0-9]+\.b), #1 ++** movprfx z0\.b, p0/z, z0\.b ++** add z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_s8_z_tied1, svint8_t, ++ z0 = svadd_n_s8_z (p0, z0, 1), ++ z0 = svadd_z (p0, z0, 1)) ++ ++/* ++** add_1_s8_z_untied: ++** mov (z[0-9]+\.b), #1 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** add z0\.b, p0/m, z0\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** add z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_s8_z_untied, svint8_t, ++ z0 = svadd_n_s8_z (p0, z1, 1), ++ z0 = svadd_z (p0, z1, 1)) ++ ++/* ++** add_s8_x_tied1: ++** add z0\.b, (z0\.b, z1\.b|z1\.b, z0\.b) ++** ret ++*/ ++TEST_UNIFORM_Z (add_s8_x_tied1, svint8_t, ++ z0 = svadd_s8_x (p0, z0, z1), ++ z0 = svadd_x (p0, z0, z1)) ++ ++/* ++** add_s8_x_tied2: ++** add z0\.b, (z0\.b, z1\.b|z1\.b, z0\.b) ++** ret ++*/ ++TEST_UNIFORM_Z (add_s8_x_tied2, svint8_t, ++ z0 = svadd_s8_x (p0, z1, z0), ++ z0 = svadd_x (p0, z1, z0)) ++ ++/* ++** add_s8_x_untied: ++** add z0\.b, (z1\.b, z2\.b|z2\.b, z1\.b) ++** ret ++*/ ++TEST_UNIFORM_Z (add_s8_x_untied, svint8_t, ++ z0 = svadd_s8_x (p0, z1, z2), ++ z0 = svadd_x (p0, z1, z2)) ++ ++/* ++** add_w0_s8_x_tied1: ++** mov (z[0-9]+\.b), w0 ++** add z0\.b, (z0\.b, \1|\1, z0\.b) ++** ret ++*/ ++TEST_UNIFORM_ZX (add_w0_s8_x_tied1, svint8_t, int8_t, ++ z0 = svadd_n_s8_x (p0, z0, x0), ++ z0 = svadd_x (p0, z0, x0)) ++ ++/* ++** add_w0_s8_x_untied: ++** mov (z[0-9]+\.b), w0 ++** add z0\.b, (z1\.b, \1|\1, z1\.b) ++** ret ++*/ ++TEST_UNIFORM_ZX (add_w0_s8_x_untied, svint8_t, int8_t, ++ z0 = svadd_n_s8_x (p0, z1, x0), ++ z0 = svadd_x (p0, z1, x0)) ++ ++/* ++** add_1_s8_x_tied1: ++** add z0\.b, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_s8_x_tied1, svint8_t, ++ z0 = svadd_n_s8_x (p0, z0, 1), ++ z0 = svadd_x (p0, z0, 1)) ++ ++/* ++** add_1_s8_x_untied: ++** movprfx z0, z1 ++** add z0\.b, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_s8_x_untied, svint8_t, ++ z0 = svadd_n_s8_x (p0, z1, 1), ++ z0 = svadd_x (p0, z1, 1)) ++ ++/* ++** add_127_s8_x: ++** add z0\.b, z0\.b, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (add_127_s8_x, svint8_t, ++ z0 = svadd_n_s8_x (p0, z0, 127), ++ z0 = svadd_x (p0, z0, 127)) ++ ++/* ++** add_128_s8_x: ++** add z0\.b, z0\.b, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (add_128_s8_x, svint8_t, ++ z0 = svadd_n_s8_x (p0, z0, 128), ++ z0 = svadd_x (p0, z0, 128)) ++ ++/* ++** add_255_s8_x: ++** add z0\.b, z0\.b, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (add_255_s8_x, svint8_t, ++ z0 = svadd_n_s8_x (p0, z0, 255), ++ z0 = svadd_x (p0, z0, 255)) ++ ++/* ++** add_m1_s8_x: ++** add z0\.b, z0\.b, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m1_s8_x, svint8_t, ++ z0 = svadd_n_s8_x (p0, z0, -1), ++ z0 = svadd_x (p0, z0, -1)) ++ ++/* ++** add_m127_s8_x: ++** add z0\.b, z0\.b, #129 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m127_s8_x, svint8_t, ++ z0 = svadd_n_s8_x (p0, z0, -127), ++ z0 = svadd_x (p0, z0, -127)) ++ ++/* ++** add_m128_s8_x: ++** add z0\.b, z0\.b, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m128_s8_x, svint8_t, ++ z0 = svadd_n_s8_x (p0, z0, -128), ++ z0 = svadd_x (p0, z0, -128)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_u16.c +new file mode 100644 +index 000000000..25cb90353 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_u16.c +@@ -0,0 +1,377 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** add_u16_m_tied1: ++** add z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (add_u16_m_tied1, svuint16_t, ++ z0 = svadd_u16_m (p0, z0, z1), ++ z0 = svadd_m (p0, z0, z1)) ++ ++/* ++** add_u16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** add z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (add_u16_m_tied2, svuint16_t, ++ z0 = svadd_u16_m (p0, z1, z0), ++ z0 = svadd_m (p0, z1, z0)) ++ ++/* ++** add_u16_m_untied: ++** movprfx z0, z1 ++** add z0\.h, p0/m, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (add_u16_m_untied, svuint16_t, ++ z0 = svadd_u16_m (p0, z1, z2), ++ z0 = svadd_m (p0, z1, z2)) ++ ++/* ++** add_w0_u16_m_tied1: ++** mov (z[0-9]+\.h), w0 ++** add z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (add_w0_u16_m_tied1, svuint16_t, uint16_t, ++ z0 = svadd_n_u16_m (p0, z0, x0), ++ z0 = svadd_m (p0, z0, x0)) ++ ++/* ++** add_w0_u16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), w0 ++** movprfx z0, z1 ++** add z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (add_w0_u16_m_untied, svuint16_t, uint16_t, ++ z0 = svadd_n_u16_m (p0, z1, x0), ++ z0 = svadd_m (p0, z1, x0)) ++ ++/* ++** add_1_u16_m_tied1: ++** mov (z[0-9]+\.h), #1 ++** add z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_u16_m_tied1, svuint16_t, ++ z0 = svadd_n_u16_m (p0, z0, 1), ++ z0 = svadd_m (p0, z0, 1)) ++ ++/* ++** add_1_u16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), #1 ++** movprfx z0, z1 ++** add z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_u16_m_untied, svuint16_t, ++ z0 = svadd_n_u16_m (p0, z1, 1), ++ z0 = svadd_m (p0, z1, 1)) ++ ++/* ++** add_m2_u16_m: ++** mov (z[0-9]+\.h), #-2 ++** add z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m2_u16_m, svuint16_t, ++ z0 = svadd_n_u16_m (p0, z0, -2), ++ z0 = svadd_m (p0, z0, -2)) ++ ++/* ++** add_u16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** add z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (add_u16_z_tied1, svuint16_t, ++ z0 = svadd_u16_z (p0, z0, z1), ++ z0 = svadd_z (p0, z0, z1)) ++ ++/* ++** add_u16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** add z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (add_u16_z_tied2, svuint16_t, ++ z0 = svadd_u16_z (p0, z1, z0), ++ z0 = svadd_z (p0, z1, z0)) ++ ++/* ++** add_u16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** add z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** add z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (add_u16_z_untied, svuint16_t, ++ z0 = svadd_u16_z (p0, z1, z2), ++ z0 = svadd_z (p0, z1, z2)) ++ ++/* ++** add_w0_u16_z_tied1: ++** mov (z[0-9]+\.h), w0 ++** movprfx z0\.h, p0/z, z0\.h ++** add z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (add_w0_u16_z_tied1, svuint16_t, uint16_t, ++ z0 = svadd_n_u16_z (p0, z0, x0), ++ z0 = svadd_z (p0, z0, x0)) ++ ++/* ++** add_w0_u16_z_untied: ++** mov (z[0-9]+\.h), w0 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** add z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** add z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (add_w0_u16_z_untied, svuint16_t, uint16_t, ++ z0 = svadd_n_u16_z (p0, z1, x0), ++ z0 = svadd_z (p0, z1, x0)) ++ ++/* ++** add_1_u16_z_tied1: ++** mov (z[0-9]+\.h), #1 ++** movprfx z0\.h, p0/z, z0\.h ++** add z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_u16_z_tied1, svuint16_t, ++ z0 = svadd_n_u16_z (p0, z0, 1), ++ z0 = svadd_z (p0, z0, 1)) ++ ++/* ++** add_1_u16_z_untied: ++** mov (z[0-9]+\.h), #1 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** add z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** add z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_u16_z_untied, svuint16_t, ++ z0 = svadd_n_u16_z (p0, z1, 1), ++ z0 = svadd_z (p0, z1, 1)) ++ ++/* ++** add_u16_x_tied1: ++** add z0\.h, (z0\.h, z1\.h|z1\.h, z0\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (add_u16_x_tied1, svuint16_t, ++ z0 = svadd_u16_x (p0, z0, z1), ++ z0 = svadd_x (p0, z0, z1)) ++ ++/* ++** add_u16_x_tied2: ++** add z0\.h, (z0\.h, z1\.h|z1\.h, z0\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (add_u16_x_tied2, svuint16_t, ++ z0 = svadd_u16_x (p0, z1, z0), ++ z0 = svadd_x (p0, z1, z0)) ++ ++/* ++** add_u16_x_untied: ++** add z0\.h, (z1\.h, z2\.h|z2\.h, z1\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (add_u16_x_untied, svuint16_t, ++ z0 = svadd_u16_x (p0, z1, z2), ++ z0 = svadd_x (p0, z1, z2)) ++ ++/* ++** add_w0_u16_x_tied1: ++** mov (z[0-9]+\.h), w0 ++** add z0\.h, (z0\.h, \1|\1, z0\.h) ++** ret ++*/ ++TEST_UNIFORM_ZX (add_w0_u16_x_tied1, svuint16_t, uint16_t, ++ z0 = svadd_n_u16_x (p0, z0, x0), ++ z0 = svadd_x (p0, z0, x0)) ++ ++/* ++** add_w0_u16_x_untied: ++** mov (z[0-9]+\.h), w0 ++** add z0\.h, (z1\.h, \1|\1, z1\.h) ++** ret ++*/ ++TEST_UNIFORM_ZX (add_w0_u16_x_untied, svuint16_t, uint16_t, ++ z0 = svadd_n_u16_x (p0, z1, x0), ++ z0 = svadd_x (p0, z1, x0)) ++ ++/* ++** add_1_u16_x_tied1: ++** add z0\.h, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_u16_x_tied1, svuint16_t, ++ z0 = svadd_n_u16_x (p0, z0, 1), ++ z0 = svadd_x (p0, z0, 1)) ++ ++/* ++** add_1_u16_x_untied: ++** movprfx z0, z1 ++** add z0\.h, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_u16_x_untied, svuint16_t, ++ z0 = svadd_n_u16_x (p0, z1, 1), ++ z0 = svadd_x (p0, z1, 1)) ++ ++/* ++** add_127_u16_x: ++** add z0\.h, z0\.h, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (add_127_u16_x, svuint16_t, ++ z0 = svadd_n_u16_x (p0, z0, 127), ++ z0 = svadd_x (p0, z0, 127)) ++ ++/* ++** add_128_u16_x: ++** add z0\.h, z0\.h, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (add_128_u16_x, svuint16_t, ++ z0 = svadd_n_u16_x (p0, z0, 128), ++ z0 = svadd_x (p0, z0, 128)) ++ ++/* ++** add_255_u16_x: ++** add z0\.h, z0\.h, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (add_255_u16_x, svuint16_t, ++ z0 = svadd_n_u16_x (p0, z0, 255), ++ z0 = svadd_x (p0, z0, 255)) ++ ++/* ++** add_256_u16_x: ++** add z0\.h, z0\.h, #256 ++** ret ++*/ ++TEST_UNIFORM_Z (add_256_u16_x, svuint16_t, ++ z0 = svadd_n_u16_x (p0, z0, 256), ++ z0 = svadd_x (p0, z0, 256)) ++ ++/* ++** add_257_u16_x: ++** mov (z[0-9]+)\.b, #1 ++** add z0\.h, (z0\.h, \1\.h|\1\.h, z0\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (add_257_u16_x, svuint16_t, ++ z0 = svadd_n_u16_x (p0, z0, 257), ++ z0 = svadd_x (p0, z0, 257)) ++ ++/* ++** add_512_u16_x: ++** add z0\.h, z0\.h, #512 ++** ret ++*/ ++TEST_UNIFORM_Z (add_512_u16_x, svuint16_t, ++ z0 = svadd_n_u16_x (p0, z0, 512), ++ z0 = svadd_x (p0, z0, 512)) ++ ++/* ++** add_65280_u16_x: ++** add z0\.h, z0\.h, #65280 ++** ret ++*/ ++TEST_UNIFORM_Z (add_65280_u16_x, svuint16_t, ++ z0 = svadd_n_u16_x (p0, z0, 0xff00), ++ z0 = svadd_x (p0, z0, 0xff00)) ++ ++/* ++** add_m1_u16_x: ++** sub z0\.h, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m1_u16_x, svuint16_t, ++ z0 = svadd_n_u16_x (p0, z0, -1), ++ z0 = svadd_x (p0, z0, -1)) ++ ++/* ++** add_m127_u16_x: ++** sub z0\.h, z0\.h, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m127_u16_x, svuint16_t, ++ z0 = svadd_n_u16_x (p0, z0, -127), ++ z0 = svadd_x (p0, z0, -127)) ++ ++/* ++** add_m128_u16_x: ++** sub z0\.h, z0\.h, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m128_u16_x, svuint16_t, ++ z0 = svadd_n_u16_x (p0, z0, -128), ++ z0 = svadd_x (p0, z0, -128)) ++ ++/* ++** add_m255_u16_x: ++** sub z0\.h, z0\.h, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m255_u16_x, svuint16_t, ++ z0 = svadd_n_u16_x (p0, z0, -255), ++ z0 = svadd_x (p0, z0, -255)) ++ ++/* ++** add_m256_u16_x: ++** add z0\.h, z0\.h, #65280 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m256_u16_x, svuint16_t, ++ z0 = svadd_n_u16_x (p0, z0, -256), ++ z0 = svadd_x (p0, z0, -256)) ++ ++/* ++** add_m257_u16_x: ++** mov (z[0-9]+\.h), #-257 ++** add z0\.h, (z0\.h, \1|\1, z0\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (add_m257_u16_x, svuint16_t, ++ z0 = svadd_n_u16_x (p0, z0, -257), ++ z0 = svadd_x (p0, z0, -257)) ++ ++/* ++** add_m512_u16_x: ++** add z0\.h, z0\.h, #65024 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m512_u16_x, svuint16_t, ++ z0 = svadd_n_u16_x (p0, z0, -512), ++ z0 = svadd_x (p0, z0, -512)) ++ ++/* ++** add_m32768_u16_x: ++** add z0\.h, z0\.h, #32768 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m32768_u16_x, svuint16_t, ++ z0 = svadd_n_u16_x (p0, z0, -0x8000), ++ z0 = svadd_x (p0, z0, -0x8000)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_u32.c +new file mode 100644 +index 000000000..ee979489b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_u32.c +@@ -0,0 +1,426 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** add_u32_m_tied1: ++** add z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (add_u32_m_tied1, svuint32_t, ++ z0 = svadd_u32_m (p0, z0, z1), ++ z0 = svadd_m (p0, z0, z1)) ++ ++/* ++** add_u32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** add z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (add_u32_m_tied2, svuint32_t, ++ z0 = svadd_u32_m (p0, z1, z0), ++ z0 = svadd_m (p0, z1, z0)) ++ ++/* ++** add_u32_m_untied: ++** movprfx z0, z1 ++** add z0\.s, p0/m, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (add_u32_m_untied, svuint32_t, ++ z0 = svadd_u32_m (p0, z1, z2), ++ z0 = svadd_m (p0, z1, z2)) ++ ++/* ++** add_w0_u32_m_tied1: ++** mov (z[0-9]+\.s), w0 ++** add z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (add_w0_u32_m_tied1, svuint32_t, uint32_t, ++ z0 = svadd_n_u32_m (p0, z0, x0), ++ z0 = svadd_m (p0, z0, x0)) ++ ++/* ++** add_w0_u32_m_untied: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0, z1 ++** add z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (add_w0_u32_m_untied, svuint32_t, uint32_t, ++ z0 = svadd_n_u32_m (p0, z1, x0), ++ z0 = svadd_m (p0, z1, x0)) ++ ++/* ++** add_1_u32_m_tied1: ++** mov (z[0-9]+\.s), #1 ++** add z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_u32_m_tied1, svuint32_t, ++ z0 = svadd_n_u32_m (p0, z0, 1), ++ z0 = svadd_m (p0, z0, 1)) ++ ++/* ++** add_1_u32_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.s), #1 ++** movprfx z0, z1 ++** add z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_u32_m_untied, svuint32_t, ++ z0 = svadd_n_u32_m (p0, z1, 1), ++ z0 = svadd_m (p0, z1, 1)) ++ ++/* ++** add_m2_u32_m: ++** mov (z[0-9]+\.s), #-2 ++** add z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m2_u32_m, svuint32_t, ++ z0 = svadd_n_u32_m (p0, z0, -2), ++ z0 = svadd_m (p0, z0, -2)) ++ ++/* ++** add_u32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** add z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (add_u32_z_tied1, svuint32_t, ++ z0 = svadd_u32_z (p0, z0, z1), ++ z0 = svadd_z (p0, z0, z1)) ++ ++/* ++** add_u32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** add z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (add_u32_z_tied2, svuint32_t, ++ z0 = svadd_u32_z (p0, z1, z0), ++ z0 = svadd_z (p0, z1, z0)) ++ ++/* ++** add_u32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** add z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** add z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (add_u32_z_untied, svuint32_t, ++ z0 = svadd_u32_z (p0, z1, z2), ++ z0 = svadd_z (p0, z1, z2)) ++ ++/* ++** add_w0_u32_z_tied1: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0\.s, p0/z, z0\.s ++** add z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (add_w0_u32_z_tied1, svuint32_t, uint32_t, ++ z0 = svadd_n_u32_z (p0, z0, x0), ++ z0 = svadd_z (p0, z0, x0)) ++ ++/* ++** add_w0_u32_z_untied: ++** mov (z[0-9]+\.s), w0 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** add z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** add z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (add_w0_u32_z_untied, svuint32_t, uint32_t, ++ z0 = svadd_n_u32_z (p0, z1, x0), ++ z0 = svadd_z (p0, z1, x0)) ++ ++/* ++** add_1_u32_z_tied1: ++** mov (z[0-9]+\.s), #1 ++** movprfx z0\.s, p0/z, z0\.s ++** add z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_u32_z_tied1, svuint32_t, ++ z0 = svadd_n_u32_z (p0, z0, 1), ++ z0 = svadd_z (p0, z0, 1)) ++ ++/* ++** add_1_u32_z_untied: ++** mov (z[0-9]+\.s), #1 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** add z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** add z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_u32_z_untied, svuint32_t, ++ z0 = svadd_n_u32_z (p0, z1, 1), ++ z0 = svadd_z (p0, z1, 1)) ++ ++/* ++** add_u32_x_tied1: ++** add z0\.s, (z0\.s, z1\.s|z1\.s, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (add_u32_x_tied1, svuint32_t, ++ z0 = svadd_u32_x (p0, z0, z1), ++ z0 = svadd_x (p0, z0, z1)) ++ ++/* ++** add_u32_x_tied2: ++** add z0\.s, (z0\.s, z1\.s|z1\.s, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (add_u32_x_tied2, svuint32_t, ++ z0 = svadd_u32_x (p0, z1, z0), ++ z0 = svadd_x (p0, z1, z0)) ++ ++/* ++** add_u32_x_untied: ++** add z0\.s, (z1\.s, z2\.s|z2\.s, z1\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (add_u32_x_untied, svuint32_t, ++ z0 = svadd_u32_x (p0, z1, z2), ++ z0 = svadd_x (p0, z1, z2)) ++ ++/* ++** add_w0_u32_x_tied1: ++** mov (z[0-9]+\.s), w0 ++** add z0\.s, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_ZX (add_w0_u32_x_tied1, svuint32_t, uint32_t, ++ z0 = svadd_n_u32_x (p0, z0, x0), ++ z0 = svadd_x (p0, z0, x0)) ++ ++/* ++** add_w0_u32_x_untied: ++** mov (z[0-9]+\.s), w0 ++** add z0\.s, (z1\.s, \1|\1, z1\.s) ++** ret ++*/ ++TEST_UNIFORM_ZX (add_w0_u32_x_untied, svuint32_t, uint32_t, ++ z0 = svadd_n_u32_x (p0, z1, x0), ++ z0 = svadd_x (p0, z1, x0)) ++ ++/* ++** add_1_u32_x_tied1: ++** add z0\.s, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_u32_x_tied1, svuint32_t, ++ z0 = svadd_n_u32_x (p0, z0, 1), ++ z0 = svadd_x (p0, z0, 1)) ++ ++/* ++** add_1_u32_x_untied: ++** movprfx z0, z1 ++** add z0\.s, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_u32_x_untied, svuint32_t, ++ z0 = svadd_n_u32_x (p0, z1, 1), ++ z0 = svadd_x (p0, z1, 1)) ++ ++/* ++** add_127_u32_x: ++** add z0\.s, z0\.s, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (add_127_u32_x, svuint32_t, ++ z0 = svadd_n_u32_x (p0, z0, 127), ++ z0 = svadd_x (p0, z0, 127)) ++ ++/* ++** add_128_u32_x: ++** add z0\.s, z0\.s, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (add_128_u32_x, svuint32_t, ++ z0 = svadd_n_u32_x (p0, z0, 128), ++ z0 = svadd_x (p0, z0, 128)) ++ ++/* ++** add_255_u32_x: ++** add z0\.s, z0\.s, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (add_255_u32_x, svuint32_t, ++ z0 = svadd_n_u32_x (p0, z0, 255), ++ z0 = svadd_x (p0, z0, 255)) ++ ++/* ++** add_256_u32_x: ++** add z0\.s, z0\.s, #256 ++** ret ++*/ ++TEST_UNIFORM_Z (add_256_u32_x, svuint32_t, ++ z0 = svadd_n_u32_x (p0, z0, 256), ++ z0 = svadd_x (p0, z0, 256)) ++ ++/* ++** add_511_u32_x: ++** mov (z[0-9]+\.s), #511 ++** add z0\.s, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (add_511_u32_x, svuint32_t, ++ z0 = svadd_n_u32_x (p0, z0, 511), ++ z0 = svadd_x (p0, z0, 511)) ++ ++/* ++** add_512_u32_x: ++** add z0\.s, z0\.s, #512 ++** ret ++*/ ++TEST_UNIFORM_Z (add_512_u32_x, svuint32_t, ++ z0 = svadd_n_u32_x (p0, z0, 512), ++ z0 = svadd_x (p0, z0, 512)) ++ ++/* ++** add_65280_u32_x: ++** add z0\.s, z0\.s, #65280 ++** ret ++*/ ++TEST_UNIFORM_Z (add_65280_u32_x, svuint32_t, ++ z0 = svadd_n_u32_x (p0, z0, 0xff00), ++ z0 = svadd_x (p0, z0, 0xff00)) ++ ++/* ++** add_65535_u32_x: ++** mov (z[0-9]+\.s), #65535 ++** add z0\.s, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (add_65535_u32_x, svuint32_t, ++ z0 = svadd_n_u32_x (p0, z0, 65535), ++ z0 = svadd_x (p0, z0, 65535)) ++ ++/* ++** add_65536_u32_x: ++** mov (z[0-9]+\.s), #65536 ++** add z0\.s, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (add_65536_u32_x, svuint32_t, ++ z0 = svadd_n_u32_x (p0, z0, 65536), ++ z0 = svadd_x (p0, z0, 65536)) ++ ++/* ++** add_m1_u32_x: ++** sub z0\.s, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m1_u32_x, svuint32_t, ++ z0 = svadd_n_u32_x (p0, z0, -1), ++ z0 = svadd_x (p0, z0, -1)) ++ ++/* ++** add_m127_u32_x: ++** sub z0\.s, z0\.s, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m127_u32_x, svuint32_t, ++ z0 = svadd_n_u32_x (p0, z0, -127), ++ z0 = svadd_x (p0, z0, -127)) ++ ++/* ++** add_m128_u32_x: ++** sub z0\.s, z0\.s, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m128_u32_x, svuint32_t, ++ z0 = svadd_n_u32_x (p0, z0, -128), ++ z0 = svadd_x (p0, z0, -128)) ++ ++/* ++** add_m255_u32_x: ++** sub z0\.s, z0\.s, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m255_u32_x, svuint32_t, ++ z0 = svadd_n_u32_x (p0, z0, -255), ++ z0 = svadd_x (p0, z0, -255)) ++ ++/* ++** add_m256_u32_x: ++** sub z0\.s, z0\.s, #256 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m256_u32_x, svuint32_t, ++ z0 = svadd_n_u32_x (p0, z0, -256), ++ z0 = svadd_x (p0, z0, -256)) ++ ++/* ++** add_m511_u32_x: ++** mov (z[0-9]+\.s), #-511 ++** add z0\.s, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (add_m511_u32_x, svuint32_t, ++ z0 = svadd_n_u32_x (p0, z0, -511), ++ z0 = svadd_x (p0, z0, -511)) ++ ++/* ++** add_m512_u32_x: ++** sub z0\.s, z0\.s, #512 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m512_u32_x, svuint32_t, ++ z0 = svadd_n_u32_x (p0, z0, -512), ++ z0 = svadd_x (p0, z0, -512)) ++ ++/* ++** add_m32768_u32_x: ++** sub z0\.s, z0\.s, #32768 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m32768_u32_x, svuint32_t, ++ z0 = svadd_n_u32_x (p0, z0, -0x8000), ++ z0 = svadd_x (p0, z0, -0x8000)) ++ ++/* ++** add_m65280_u32_x: ++** sub z0\.s, z0\.s, #65280 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m65280_u32_x, svuint32_t, ++ z0 = svadd_n_u32_x (p0, z0, -0xff00), ++ z0 = svadd_x (p0, z0, -0xff00)) ++ ++/* ++** add_m65535_u32_x: ++** mov (z[0-9]+\.s), #-65535 ++** add z0\.s, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (add_m65535_u32_x, svuint32_t, ++ z0 = svadd_n_u32_x (p0, z0, -65535), ++ z0 = svadd_x (p0, z0, -65535)) ++ ++/* ++** add_m65536_u32_x: ++** mov (z[0-9]+\.s), #-65536 ++** add z0\.s, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (add_m65536_u32_x, svuint32_t, ++ z0 = svadd_n_u32_x (p0, z0, -65536), ++ z0 = svadd_x (p0, z0, -65536)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_u64.c +new file mode 100644 +index 000000000..25d2972a6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_u64.c +@@ -0,0 +1,426 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** add_u64_m_tied1: ++** add z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (add_u64_m_tied1, svuint64_t, ++ z0 = svadd_u64_m (p0, z0, z1), ++ z0 = svadd_m (p0, z0, z1)) ++ ++/* ++** add_u64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** add z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_u64_m_tied2, svuint64_t, ++ z0 = svadd_u64_m (p0, z1, z0), ++ z0 = svadd_m (p0, z1, z0)) ++ ++/* ++** add_u64_m_untied: ++** movprfx z0, z1 ++** add z0\.d, p0/m, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (add_u64_m_untied, svuint64_t, ++ z0 = svadd_u64_m (p0, z1, z2), ++ z0 = svadd_m (p0, z1, z2)) ++ ++/* ++** add_x0_u64_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** add z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (add_x0_u64_m_tied1, svuint64_t, uint64_t, ++ z0 = svadd_n_u64_m (p0, z0, x0), ++ z0 = svadd_m (p0, z0, x0)) ++ ++/* ++** add_x0_u64_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** add z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (add_x0_u64_m_untied, svuint64_t, uint64_t, ++ z0 = svadd_n_u64_m (p0, z1, x0), ++ z0 = svadd_m (p0, z1, x0)) ++ ++/* ++** add_1_u64_m_tied1: ++** mov (z[0-9]+\.d), #1 ++** add z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_u64_m_tied1, svuint64_t, ++ z0 = svadd_n_u64_m (p0, z0, 1), ++ z0 = svadd_m (p0, z0, 1)) ++ ++/* ++** add_1_u64_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), #1 ++** movprfx z0, z1 ++** add z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_u64_m_untied, svuint64_t, ++ z0 = svadd_n_u64_m (p0, z1, 1), ++ z0 = svadd_m (p0, z1, 1)) ++ ++/* ++** add_m2_u64_m: ++** mov (z[0-9]+\.d), #-2 ++** add z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m2_u64_m, svuint64_t, ++ z0 = svadd_n_u64_m (p0, z0, -2), ++ z0 = svadd_m (p0, z0, -2)) ++ ++/* ++** add_u64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** add z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (add_u64_z_tied1, svuint64_t, ++ z0 = svadd_u64_z (p0, z0, z1), ++ z0 = svadd_z (p0, z0, z1)) ++ ++/* ++** add_u64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** add z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (add_u64_z_tied2, svuint64_t, ++ z0 = svadd_u64_z (p0, z1, z0), ++ z0 = svadd_z (p0, z1, z0)) ++ ++/* ++** add_u64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** add z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** add z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (add_u64_z_untied, svuint64_t, ++ z0 = svadd_u64_z (p0, z1, z2), ++ z0 = svadd_z (p0, z1, z2)) ++ ++/* ++** add_x0_u64_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.d, p0/z, z0\.d ++** add z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (add_x0_u64_z_tied1, svuint64_t, uint64_t, ++ z0 = svadd_n_u64_z (p0, z0, x0), ++ z0 = svadd_z (p0, z0, x0)) ++ ++/* ++** add_x0_u64_z_untied: ++** mov (z[0-9]+\.d), x0 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** add z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** add z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (add_x0_u64_z_untied, svuint64_t, uint64_t, ++ z0 = svadd_n_u64_z (p0, z1, x0), ++ z0 = svadd_z (p0, z1, x0)) ++ ++/* ++** add_1_u64_z_tied1: ++** mov (z[0-9]+\.d), #1 ++** movprfx z0\.d, p0/z, z0\.d ++** add z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_u64_z_tied1, svuint64_t, ++ z0 = svadd_n_u64_z (p0, z0, 1), ++ z0 = svadd_z (p0, z0, 1)) ++ ++/* ++** add_1_u64_z_untied: ++** mov (z[0-9]+\.d), #1 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** add z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** add z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_u64_z_untied, svuint64_t, ++ z0 = svadd_n_u64_z (p0, z1, 1), ++ z0 = svadd_z (p0, z1, 1)) ++ ++/* ++** add_u64_x_tied1: ++** add z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (add_u64_x_tied1, svuint64_t, ++ z0 = svadd_u64_x (p0, z0, z1), ++ z0 = svadd_x (p0, z0, z1)) ++ ++/* ++** add_u64_x_tied2: ++** add z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (add_u64_x_tied2, svuint64_t, ++ z0 = svadd_u64_x (p0, z1, z0), ++ z0 = svadd_x (p0, z1, z0)) ++ ++/* ++** add_u64_x_untied: ++** add z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (add_u64_x_untied, svuint64_t, ++ z0 = svadd_u64_x (p0, z1, z2), ++ z0 = svadd_x (p0, z1, z2)) ++ ++/* ++** add_x0_u64_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** add z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (add_x0_u64_x_tied1, svuint64_t, uint64_t, ++ z0 = svadd_n_u64_x (p0, z0, x0), ++ z0 = svadd_x (p0, z0, x0)) ++ ++/* ++** add_x0_u64_x_untied: ++** mov (z[0-9]+\.d), x0 ++** add z0\.d, (z1\.d, \1|\1, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (add_x0_u64_x_untied, svuint64_t, uint64_t, ++ z0 = svadd_n_u64_x (p0, z1, x0), ++ z0 = svadd_x (p0, z1, x0)) ++ ++/* ++** add_1_u64_x_tied1: ++** add z0\.d, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_u64_x_tied1, svuint64_t, ++ z0 = svadd_n_u64_x (p0, z0, 1), ++ z0 = svadd_x (p0, z0, 1)) ++ ++/* ++** add_1_u64_x_untied: ++** movprfx z0, z1 ++** add z0\.d, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_u64_x_untied, svuint64_t, ++ z0 = svadd_n_u64_x (p0, z1, 1), ++ z0 = svadd_x (p0, z1, 1)) ++ ++/* ++** add_127_u64_x: ++** add z0\.d, z0\.d, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (add_127_u64_x, svuint64_t, ++ z0 = svadd_n_u64_x (p0, z0, 127), ++ z0 = svadd_x (p0, z0, 127)) ++ ++/* ++** add_128_u64_x: ++** add z0\.d, z0\.d, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (add_128_u64_x, svuint64_t, ++ z0 = svadd_n_u64_x (p0, z0, 128), ++ z0 = svadd_x (p0, z0, 128)) ++ ++/* ++** add_255_u64_x: ++** add z0\.d, z0\.d, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (add_255_u64_x, svuint64_t, ++ z0 = svadd_n_u64_x (p0, z0, 255), ++ z0 = svadd_x (p0, z0, 255)) ++ ++/* ++** add_256_u64_x: ++** add z0\.d, z0\.d, #256 ++** ret ++*/ ++TEST_UNIFORM_Z (add_256_u64_x, svuint64_t, ++ z0 = svadd_n_u64_x (p0, z0, 256), ++ z0 = svadd_x (p0, z0, 256)) ++ ++/* ++** add_511_u64_x: ++** mov (z[0-9]+\.d), #511 ++** add z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (add_511_u64_x, svuint64_t, ++ z0 = svadd_n_u64_x (p0, z0, 511), ++ z0 = svadd_x (p0, z0, 511)) ++ ++/* ++** add_512_u64_x: ++** add z0\.d, z0\.d, #512 ++** ret ++*/ ++TEST_UNIFORM_Z (add_512_u64_x, svuint64_t, ++ z0 = svadd_n_u64_x (p0, z0, 512), ++ z0 = svadd_x (p0, z0, 512)) ++ ++/* ++** add_65280_u64_x: ++** add z0\.d, z0\.d, #65280 ++** ret ++*/ ++TEST_UNIFORM_Z (add_65280_u64_x, svuint64_t, ++ z0 = svadd_n_u64_x (p0, z0, 0xff00), ++ z0 = svadd_x (p0, z0, 0xff00)) ++ ++/* ++** add_65535_u64_x: ++** mov (z[0-9]+\.d), #65535 ++** add z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (add_65535_u64_x, svuint64_t, ++ z0 = svadd_n_u64_x (p0, z0, 65535), ++ z0 = svadd_x (p0, z0, 65535)) ++ ++/* ++** add_65536_u64_x: ++** mov (z[0-9]+\.d), #65536 ++** add z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (add_65536_u64_x, svuint64_t, ++ z0 = svadd_n_u64_x (p0, z0, 65536), ++ z0 = svadd_x (p0, z0, 65536)) ++ ++/* ++** add_m1_u64_x: ++** sub z0\.d, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m1_u64_x, svuint64_t, ++ z0 = svadd_n_u64_x (p0, z0, -1), ++ z0 = svadd_x (p0, z0, -1)) ++ ++/* ++** add_m127_u64_x: ++** sub z0\.d, z0\.d, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m127_u64_x, svuint64_t, ++ z0 = svadd_n_u64_x (p0, z0, -127), ++ z0 = svadd_x (p0, z0, -127)) ++ ++/* ++** add_m128_u64_x: ++** sub z0\.d, z0\.d, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m128_u64_x, svuint64_t, ++ z0 = svadd_n_u64_x (p0, z0, -128), ++ z0 = svadd_x (p0, z0, -128)) ++ ++/* ++** add_m255_u64_x: ++** sub z0\.d, z0\.d, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m255_u64_x, svuint64_t, ++ z0 = svadd_n_u64_x (p0, z0, -255), ++ z0 = svadd_x (p0, z0, -255)) ++ ++/* ++** add_m256_u64_x: ++** sub z0\.d, z0\.d, #256 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m256_u64_x, svuint64_t, ++ z0 = svadd_n_u64_x (p0, z0, -256), ++ z0 = svadd_x (p0, z0, -256)) ++ ++/* ++** add_m511_u64_x: ++** mov (z[0-9]+\.d), #-511 ++** add z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (add_m511_u64_x, svuint64_t, ++ z0 = svadd_n_u64_x (p0, z0, -511), ++ z0 = svadd_x (p0, z0, -511)) ++ ++/* ++** add_m512_u64_x: ++** sub z0\.d, z0\.d, #512 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m512_u64_x, svuint64_t, ++ z0 = svadd_n_u64_x (p0, z0, -512), ++ z0 = svadd_x (p0, z0, -512)) ++ ++/* ++** add_m32768_u64_x: ++** sub z0\.d, z0\.d, #32768 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m32768_u64_x, svuint64_t, ++ z0 = svadd_n_u64_x (p0, z0, -0x8000), ++ z0 = svadd_x (p0, z0, -0x8000)) ++ ++/* ++** add_m65280_u64_x: ++** sub z0\.d, z0\.d, #65280 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m65280_u64_x, svuint64_t, ++ z0 = svadd_n_u64_x (p0, z0, -0xff00), ++ z0 = svadd_x (p0, z0, -0xff00)) ++ ++/* ++** add_m65535_u64_x: ++** mov (z[0-9]+\.d), #-65535 ++** add z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (add_m65535_u64_x, svuint64_t, ++ z0 = svadd_n_u64_x (p0, z0, -65535), ++ z0 = svadd_x (p0, z0, -65535)) ++ ++/* ++** add_m65536_u64_x: ++** mov (z[0-9]+\.d), #-65536 ++** add z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (add_m65536_u64_x, svuint64_t, ++ z0 = svadd_n_u64_x (p0, z0, -65536), ++ z0 = svadd_x (p0, z0, -65536)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_u8.c +new file mode 100644 +index 000000000..06b68c97c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_u8.c +@@ -0,0 +1,294 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** add_u8_m_tied1: ++** add z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (add_u8_m_tied1, svuint8_t, ++ z0 = svadd_u8_m (p0, z0, z1), ++ z0 = svadd_m (p0, z0, z1)) ++ ++/* ++** add_u8_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** add z0\.b, p0/m, z0\.b, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (add_u8_m_tied2, svuint8_t, ++ z0 = svadd_u8_m (p0, z1, z0), ++ z0 = svadd_m (p0, z1, z0)) ++ ++/* ++** add_u8_m_untied: ++** movprfx z0, z1 ++** add z0\.b, p0/m, z0\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (add_u8_m_untied, svuint8_t, ++ z0 = svadd_u8_m (p0, z1, z2), ++ z0 = svadd_m (p0, z1, z2)) ++ ++/* ++** add_w0_u8_m_tied1: ++** mov (z[0-9]+\.b), w0 ++** add z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (add_w0_u8_m_tied1, svuint8_t, uint8_t, ++ z0 = svadd_n_u8_m (p0, z0, x0), ++ z0 = svadd_m (p0, z0, x0)) ++ ++/* ++** add_w0_u8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), w0 ++** movprfx z0, z1 ++** add z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (add_w0_u8_m_untied, svuint8_t, uint8_t, ++ z0 = svadd_n_u8_m (p0, z1, x0), ++ z0 = svadd_m (p0, z1, x0)) ++ ++/* ++** add_1_u8_m_tied1: ++** mov (z[0-9]+\.b), #1 ++** add z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_u8_m_tied1, svuint8_t, ++ z0 = svadd_n_u8_m (p0, z0, 1), ++ z0 = svadd_m (p0, z0, 1)) ++ ++/* ++** add_1_u8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), #1 ++** movprfx z0, z1 ++** add z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_u8_m_untied, svuint8_t, ++ z0 = svadd_n_u8_m (p0, z1, 1), ++ z0 = svadd_m (p0, z1, 1)) ++ ++/* ++** add_m1_u8_m: ++** mov (z[0-9]+\.b), #-1 ++** add z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m1_u8_m, svuint8_t, ++ z0 = svadd_n_u8_m (p0, z0, -1), ++ z0 = svadd_m (p0, z0, -1)) ++ ++/* ++** add_u8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** add z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (add_u8_z_tied1, svuint8_t, ++ z0 = svadd_u8_z (p0, z0, z1), ++ z0 = svadd_z (p0, z0, z1)) ++ ++/* ++** add_u8_z_tied2: ++** movprfx z0\.b, p0/z, z0\.b ++** add z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (add_u8_z_tied2, svuint8_t, ++ z0 = svadd_u8_z (p0, z1, z0), ++ z0 = svadd_z (p0, z1, z0)) ++ ++/* ++** add_u8_z_untied: ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** add z0\.b, p0/m, z0\.b, z2\.b ++** | ++** movprfx z0\.b, p0/z, z2\.b ++** add z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (add_u8_z_untied, svuint8_t, ++ z0 = svadd_u8_z (p0, z1, z2), ++ z0 = svadd_z (p0, z1, z2)) ++ ++/* ++** add_w0_u8_z_tied1: ++** mov (z[0-9]+\.b), w0 ++** movprfx z0\.b, p0/z, z0\.b ++** add z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (add_w0_u8_z_tied1, svuint8_t, uint8_t, ++ z0 = svadd_n_u8_z (p0, z0, x0), ++ z0 = svadd_z (p0, z0, x0)) ++ ++/* ++** add_w0_u8_z_untied: ++** mov (z[0-9]+\.b), w0 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** add z0\.b, p0/m, z0\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** add z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (add_w0_u8_z_untied, svuint8_t, uint8_t, ++ z0 = svadd_n_u8_z (p0, z1, x0), ++ z0 = svadd_z (p0, z1, x0)) ++ ++/* ++** add_1_u8_z_tied1: ++** mov (z[0-9]+\.b), #1 ++** movprfx z0\.b, p0/z, z0\.b ++** add z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_u8_z_tied1, svuint8_t, ++ z0 = svadd_n_u8_z (p0, z0, 1), ++ z0 = svadd_z (p0, z0, 1)) ++ ++/* ++** add_1_u8_z_untied: ++** mov (z[0-9]+\.b), #1 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** add z0\.b, p0/m, z0\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** add z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_u8_z_untied, svuint8_t, ++ z0 = svadd_n_u8_z (p0, z1, 1), ++ z0 = svadd_z (p0, z1, 1)) ++ ++/* ++** add_u8_x_tied1: ++** add z0\.b, (z0\.b, z1\.b|z1\.b, z0\.b) ++** ret ++*/ ++TEST_UNIFORM_Z (add_u8_x_tied1, svuint8_t, ++ z0 = svadd_u8_x (p0, z0, z1), ++ z0 = svadd_x (p0, z0, z1)) ++ ++/* ++** add_u8_x_tied2: ++** add z0\.b, (z0\.b, z1\.b|z1\.b, z0\.b) ++** ret ++*/ ++TEST_UNIFORM_Z (add_u8_x_tied2, svuint8_t, ++ z0 = svadd_u8_x (p0, z1, z0), ++ z0 = svadd_x (p0, z1, z0)) ++ ++/* ++** add_u8_x_untied: ++** add z0\.b, (z1\.b, z2\.b|z2\.b, z1\.b) ++** ret ++*/ ++TEST_UNIFORM_Z (add_u8_x_untied, svuint8_t, ++ z0 = svadd_u8_x (p0, z1, z2), ++ z0 = svadd_x (p0, z1, z2)) ++ ++/* ++** add_w0_u8_x_tied1: ++** mov (z[0-9]+\.b), w0 ++** add z0\.b, (z0\.b, \1|\1, z0\.b) ++** ret ++*/ ++TEST_UNIFORM_ZX (add_w0_u8_x_tied1, svuint8_t, uint8_t, ++ z0 = svadd_n_u8_x (p0, z0, x0), ++ z0 = svadd_x (p0, z0, x0)) ++ ++/* ++** add_w0_u8_x_untied: ++** mov (z[0-9]+\.b), w0 ++** add z0\.b, (z1\.b, \1|\1, z1\.b) ++** ret ++*/ ++TEST_UNIFORM_ZX (add_w0_u8_x_untied, svuint8_t, uint8_t, ++ z0 = svadd_n_u8_x (p0, z1, x0), ++ z0 = svadd_x (p0, z1, x0)) ++ ++/* ++** add_1_u8_x_tied1: ++** add z0\.b, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_u8_x_tied1, svuint8_t, ++ z0 = svadd_n_u8_x (p0, z0, 1), ++ z0 = svadd_x (p0, z0, 1)) ++ ++/* ++** add_1_u8_x_untied: ++** movprfx z0, z1 ++** add z0\.b, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (add_1_u8_x_untied, svuint8_t, ++ z0 = svadd_n_u8_x (p0, z1, 1), ++ z0 = svadd_x (p0, z1, 1)) ++ ++/* ++** add_127_u8_x: ++** add z0\.b, z0\.b, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (add_127_u8_x, svuint8_t, ++ z0 = svadd_n_u8_x (p0, z0, 127), ++ z0 = svadd_x (p0, z0, 127)) ++ ++/* ++** add_128_u8_x: ++** add z0\.b, z0\.b, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (add_128_u8_x, svuint8_t, ++ z0 = svadd_n_u8_x (p0, z0, 128), ++ z0 = svadd_x (p0, z0, 128)) ++ ++/* ++** add_255_u8_x: ++** add z0\.b, z0\.b, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (add_255_u8_x, svuint8_t, ++ z0 = svadd_n_u8_x (p0, z0, 255), ++ z0 = svadd_x (p0, z0, 255)) ++ ++/* ++** add_m1_u8_x: ++** add z0\.b, z0\.b, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m1_u8_x, svuint8_t, ++ z0 = svadd_n_u8_x (p0, z0, -1), ++ z0 = svadd_x (p0, z0, -1)) ++ ++/* ++** add_m127_u8_x: ++** add z0\.b, z0\.b, #129 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m127_u8_x, svuint8_t, ++ z0 = svadd_n_u8_x (p0, z0, -127), ++ z0 = svadd_x (p0, z0, -127)) ++ ++/* ++** add_m128_u8_x: ++** add z0\.b, z0\.b, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (add_m128_u8_x, svuint8_t, ++ z0 = svadd_n_u8_x (p0, z0, -128), ++ z0 = svadd_x (p0, z0, -128)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f16.c +new file mode 100644 +index 000000000..6c6bfa1c2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f16.c +@@ -0,0 +1,22 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** adda_d0_f16: ++** fadda h0, p0, h0, z2\.h ++** ret ++*/ ++TEST_FOLD_LEFT_D (adda_d0_f16, float16_t, svfloat16_t, ++ d0 = svadda_f16 (p0, d0, z2), ++ d0 = svadda (p0, d0, z2)) ++ ++/* ++** adda_d1_f16: ++** mov v0\.h\[0\], v1\.h\[0\] ++** fadda h0, p0, h0, z2\.h ++** ret ++*/ ++TEST_FOLD_LEFT_D (adda_d1_f16, float16_t, svfloat16_t, ++ d0 = svadda_f16 (p0, d1, z2), ++ d0 = svadda (p0, d1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f32.c +new file mode 100644 +index 000000000..8b2a1dd1c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f32.c +@@ -0,0 +1,22 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** adda_d0_f32: ++** fadda s0, p0, s0, z2\.s ++** ret ++*/ ++TEST_FOLD_LEFT_D (adda_d0_f32, float32_t, svfloat32_t, ++ d0 = svadda_f32 (p0, d0, z2), ++ d0 = svadda (p0, d0, z2)) ++ ++/* ++** adda_d1_f32: ++** fmov s0, s1 ++** fadda s0, p0, s0, z2\.s ++** ret ++*/ ++TEST_FOLD_LEFT_D (adda_d1_f32, float32_t, svfloat32_t, ++ d0 = svadda_f32 (p0, d1, z2), ++ d0 = svadda (p0, d1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f64.c +new file mode 100644 +index 000000000..90a56420a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f64.c +@@ -0,0 +1,22 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** adda_d0_f64: ++** fadda d0, p0, d0, z2\.d ++** ret ++*/ ++TEST_FOLD_LEFT_D (adda_d0_f64, float64_t, svfloat64_t, ++ d0 = svadda_f64 (p0, d0, z2), ++ d0 = svadda (p0, d0, z2)) ++ ++/* ++** adda_d1_f64: ++** fmov d0, d1 ++** fadda d0, p0, d0, z2\.d ++** ret ++*/ ++TEST_FOLD_LEFT_D (adda_d1_f64, float64_t, svfloat64_t, ++ d0 = svadda_f64 (p0, d1, z2), ++ d0 = svadda (p0, d1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_f16.c +new file mode 100644 +index 000000000..7bb0c1de4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_f16.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** addv_d0_f16_tied: ++** faddv h0, p0, z0\.h ++** ret ++*/ ++TEST_REDUCTION_D (addv_d0_f16_tied, float16_t, svfloat16_t, ++ d0 = svaddv_f16 (p0, z0), ++ d0 = svaddv (p0, z0)) ++ ++/* ++** addv_d0_f16_untied: ++** faddv h0, p0, z1\.h ++** ret ++*/ ++TEST_REDUCTION_D (addv_d0_f16_untied, float16_t, svfloat16_t, ++ d0 = svaddv_f16 (p0, z1), ++ d0 = svaddv (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_f32.c +new file mode 100644 +index 000000000..51c621910 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_f32.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** addv_d0_f32_tied: ++** faddv s0, p0, z0\.s ++** ret ++*/ ++TEST_REDUCTION_D (addv_d0_f32_tied, float32_t, svfloat32_t, ++ d0 = svaddv_f32 (p0, z0), ++ d0 = svaddv (p0, z0)) ++ ++/* ++** addv_d0_f32_untied: ++** faddv s0, p0, z1\.s ++** ret ++*/ ++TEST_REDUCTION_D (addv_d0_f32_untied, float32_t, svfloat32_t, ++ d0 = svaddv_f32 (p0, z1), ++ d0 = svaddv (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_f64.c +new file mode 100644 +index 000000000..882866210 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_f64.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** addv_d0_f64_tied: ++** faddv d0, p0, z0\.d ++** ret ++*/ ++TEST_REDUCTION_D (addv_d0_f64_tied, float64_t, svfloat64_t, ++ d0 = svaddv_f64 (p0, z0), ++ d0 = svaddv (p0, z0)) ++ ++/* ++** addv_d0_f64_untied: ++** faddv d0, p0, z1\.d ++** ret ++*/ ++TEST_REDUCTION_D (addv_d0_f64_untied, float64_t, svfloat64_t, ++ d0 = svaddv_f64 (p0, z1), ++ d0 = svaddv (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_s16.c +new file mode 100644 +index 000000000..05429a47e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_s16.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** addv_x0_s16: ++** saddv (d[0-9]+), p0, z0\.h ++** fmov x0, \1 ++** ret ++*/ ++TEST_REDUCTION_X (addv_x0_s16, int64_t, svint16_t, ++ x0 = svaddv_s16 (p0, z0), ++ x0 = svaddv (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_s32.c +new file mode 100644 +index 000000000..5f7789a9a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_s32.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** addv_x0_s32: ++** saddv (d[0-9]+), p0, z0\.s ++** fmov x0, \1 ++** ret ++*/ ++TEST_REDUCTION_X (addv_x0_s32, int64_t, svint32_t, ++ x0 = svaddv_s32 (p0, z0), ++ x0 = svaddv (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_s64.c +new file mode 100644 +index 000000000..76c480091 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_s64.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** addv_x0_s64: ++** uaddv (d[0-9]+), p0, z0\.d ++** fmov x0, \1 ++** ret ++*/ ++TEST_REDUCTION_X (addv_x0_s64, int64_t, svint64_t, ++ x0 = svaddv_s64 (p0, z0), ++ x0 = svaddv (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_s8.c +new file mode 100644 +index 000000000..8ccb2bf4f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_s8.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** addv_x0_s8: ++** saddv (d[0-9]+), p0, z0\.b ++** fmov x0, \1 ++** ret ++*/ ++TEST_REDUCTION_X (addv_x0_s8, int64_t, svint8_t, ++ x0 = svaddv_s8 (p0, z0), ++ x0 = svaddv (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_u16.c +new file mode 100644 +index 000000000..6371921fe +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_u16.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** addv_x0_u16: ++** uaddv (d[0-9]+), p0, z0\.h ++** fmov x0, \1 ++** ret ++*/ ++TEST_REDUCTION_X (addv_x0_u16, uint64_t, svuint16_t, ++ x0 = svaddv_u16 (p0, z0), ++ x0 = svaddv (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_u32.c +new file mode 100644 +index 000000000..bdd0ed1f9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_u32.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** addv_x0_u32: ++** uaddv (d[0-9]+), p0, z0\.s ++** fmov x0, \1 ++** ret ++*/ ++TEST_REDUCTION_X (addv_x0_u32, uint64_t, svuint32_t, ++ x0 = svaddv_u32 (p0, z0), ++ x0 = svaddv (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_u64.c +new file mode 100644 +index 000000000..7b1995d3f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_u64.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** addv_x0_u64: ++** uaddv (d[0-9]+), p0, z0\.d ++** fmov x0, \1 ++** ret ++*/ ++TEST_REDUCTION_X (addv_x0_u64, uint64_t, svuint64_t, ++ x0 = svaddv_u64 (p0, z0), ++ x0 = svaddv (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_u8.c +new file mode 100644 +index 000000000..0e972f093 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_u8.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** addv_x0_u8: ++** uaddv (d[0-9]+), p0, z0\.b ++** fmov x0, \1 ++** ret ++*/ ++TEST_REDUCTION_X (addv_x0_u8, uint64_t, svuint8_t, ++ x0 = svaddv_u8 (p0, z0), ++ x0 = svaddv (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrb.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrb.c +new file mode 100644 +index 000000000..a61eec971 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrb.c +@@ -0,0 +1,57 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** adrb_u32base_s32offset: ++** adr z0\.s, \[z0\.s, z1\.s\] ++** ret ++*/ ++TEST_ADR (adrb_u32base_s32offset, svuint32_t, svint32_t, ++ z0 = svadrb_u32base_s32offset (z0, z1), ++ z0 = svadrb_offset (z0, z1)) ++ ++/* ++** adrb_u32base_u32offset: ++** adr z0\.s, \[z0\.s, z1\.s\] ++** ret ++*/ ++TEST_ADR (adrb_u32base_u32offset, svuint32_t, svuint32_t, ++ z0 = svadrb_u32base_u32offset (z0, z1), ++ z0 = svadrb_offset (z0, z1)) ++ ++/* ++** adrb_u64base_s64offset: ++** adr z0\.d, \[z0\.d, z1\.d\] ++** ret ++*/ ++TEST_ADR (adrb_u64base_s64offset, svuint64_t, svint64_t, ++ z0 = svadrb_u64base_s64offset (z0, z1), ++ z0 = svadrb_offset (z0, z1)) ++ ++/* ++** adrb_ext_u64base_s64offset: ++** adr z0\.d, \[z0\.d, z1\.d, sxtw\] ++** ret ++*/ ++TEST_ADR (adrb_ext_u64base_s64offset, svuint64_t, svint64_t, ++ z0 = svadrb_u64base_s64offset (z0, svextw_s64_x (svptrue_b64 (), z1)), ++ z0 = svadrb_offset (z0, svextw_x (svptrue_b64 (), z1))) ++ ++/* ++** adrb_u64base_u64offset: ++** adr z0\.d, \[z0\.d, z1\.d\] ++** ret ++*/ ++TEST_ADR (adrb_u64base_u64offset, svuint64_t, svuint64_t, ++ z0 = svadrb_u64base_u64offset (z0, z1), ++ z0 = svadrb_offset (z0, z1)) ++ ++/* ++** adrb_ext_u64base_u64offset: ++** adr z0\.d, \[z0\.d, z1\.d, uxtw\] ++** ret ++*/ ++TEST_ADR (adrb_ext_u64base_u64offset, svuint64_t, svuint64_t, ++ z0 = svadrb_u64base_u64offset (z0, svextw_u64_x (svptrue_b64 (), z1)), ++ z0 = svadrb_offset (z0, svextw_x (svptrue_b64 (), z1))) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrd.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrd.c +new file mode 100644 +index 000000000..970485bd6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrd.c +@@ -0,0 +1,57 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** adrd_u32base_s32index: ++** adr z0\.s, \[z0\.s, z1\.s, lsl 3\] ++** ret ++*/ ++TEST_ADR (adrd_u32base_s32index, svuint32_t, svint32_t, ++ z0 = svadrd_u32base_s32index (z0, z1), ++ z0 = svadrd_index (z0, z1)) ++ ++/* ++** adrd_u32base_u32index: ++** adr z0\.s, \[z0\.s, z1\.s, lsl 3\] ++** ret ++*/ ++TEST_ADR (adrd_u32base_u32index, svuint32_t, svuint32_t, ++ z0 = svadrd_u32base_u32index (z0, z1), ++ z0 = svadrd_index (z0, z1)) ++ ++/* ++** adrd_u64base_s64index: ++** adr z0\.d, \[z0\.d, z1\.d, lsl 3\] ++** ret ++*/ ++TEST_ADR (adrd_u64base_s64index, svuint64_t, svint64_t, ++ z0 = svadrd_u64base_s64index (z0, z1), ++ z0 = svadrd_index (z0, z1)) ++ ++/* ++** adrd_ext_u64base_s64index: ++** adr z0\.d, \[z0\.d, z1\.d, sxtw 3\] ++** ret ++*/ ++TEST_ADR (adrd_ext_u64base_s64index, svuint64_t, svint64_t, ++ z0 = svadrd_u64base_s64index (z0, svextw_s64_x (svptrue_b64 (), z1)), ++ z0 = svadrd_index (z0, svextw_x (svptrue_b64 (), z1))) ++ ++/* ++** adrd_u64base_u64index: ++** adr z0\.d, \[z0\.d, z1\.d, lsl 3\] ++** ret ++*/ ++TEST_ADR (adrd_u64base_u64index, svuint64_t, svuint64_t, ++ z0 = svadrd_u64base_u64index (z0, z1), ++ z0 = svadrd_index (z0, z1)) ++ ++/* ++** adrd_ext_u64base_u64index: ++** adr z0\.d, \[z0\.d, z1\.d, uxtw 3\] ++** ret ++*/ ++TEST_ADR (adrd_ext_u64base_u64index, svuint64_t, svuint64_t, ++ z0 = svadrd_u64base_u64index (z0, svextw_u64_x (svptrue_b64 (), z1)), ++ z0 = svadrd_index (z0, svextw_x (svptrue_b64 (), z1))) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrh.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrh.c +new file mode 100644 +index 000000000..d06f51fe3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrh.c +@@ -0,0 +1,57 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** adrh_u32base_s32index: ++** adr z0\.s, \[z0\.s, z1\.s, lsl 1\] ++** ret ++*/ ++TEST_ADR (adrh_u32base_s32index, svuint32_t, svint32_t, ++ z0 = svadrh_u32base_s32index (z0, z1), ++ z0 = svadrh_index (z0, z1)) ++ ++/* ++** adrh_u32base_u32index: ++** adr z0\.s, \[z0\.s, z1\.s, lsl 1\] ++** ret ++*/ ++TEST_ADR (adrh_u32base_u32index, svuint32_t, svuint32_t, ++ z0 = svadrh_u32base_u32index (z0, z1), ++ z0 = svadrh_index (z0, z1)) ++ ++/* ++** adrh_u64base_s64index: ++** adr z0\.d, \[z0\.d, z1\.d, lsl 1\] ++** ret ++*/ ++TEST_ADR (adrh_u64base_s64index, svuint64_t, svint64_t, ++ z0 = svadrh_u64base_s64index (z0, z1), ++ z0 = svadrh_index (z0, z1)) ++ ++/* ++** adrh_ext_u64base_s64index: ++** adr z0\.d, \[z0\.d, z1\.d, sxtw 1\] ++** ret ++*/ ++TEST_ADR (adrh_ext_u64base_s64index, svuint64_t, svint64_t, ++ z0 = svadrh_u64base_s64index (z0, svextw_s64_x (svptrue_b64 (), z1)), ++ z0 = svadrh_index (z0, svextw_x (svptrue_b64 (), z1))) ++ ++/* ++** adrh_u64base_u64index: ++** adr z0\.d, \[z0\.d, z1\.d, lsl 1\] ++** ret ++*/ ++TEST_ADR (adrh_u64base_u64index, svuint64_t, svuint64_t, ++ z0 = svadrh_u64base_u64index (z0, z1), ++ z0 = svadrh_index (z0, z1)) ++ ++/* ++** adrh_ext_u64base_u64index: ++** adr z0\.d, \[z0\.d, z1\.d, uxtw 1\] ++** ret ++*/ ++TEST_ADR (adrh_ext_u64base_u64index, svuint64_t, svuint64_t, ++ z0 = svadrh_u64base_u64index (z0, svextw_u64_x (svptrue_b64 (), z1)), ++ z0 = svadrh_index (z0, svextw_x (svptrue_b64 (), z1))) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrw.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrw.c +new file mode 100644 +index 000000000..b23f25a11 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrw.c +@@ -0,0 +1,57 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** adrw_u32base_s32index: ++** adr z0\.s, \[z0\.s, z1\.s, lsl 2\] ++** ret ++*/ ++TEST_ADR (adrw_u32base_s32index, svuint32_t, svint32_t, ++ z0 = svadrw_u32base_s32index (z0, z1), ++ z0 = svadrw_index (z0, z1)) ++ ++/* ++** adrw_u32base_u32index: ++** adr z0\.s, \[z0\.s, z1\.s, lsl 2\] ++** ret ++*/ ++TEST_ADR (adrw_u32base_u32index, svuint32_t, svuint32_t, ++ z0 = svadrw_u32base_u32index (z0, z1), ++ z0 = svadrw_index (z0, z1)) ++ ++/* ++** adrw_u64base_s64index: ++** adr z0\.d, \[z0\.d, z1\.d, lsl 2\] ++** ret ++*/ ++TEST_ADR (adrw_u64base_s64index, svuint64_t, svint64_t, ++ z0 = svadrw_u64base_s64index (z0, z1), ++ z0 = svadrw_index (z0, z1)) ++ ++/* ++** adrw_ext_u64base_s64index: ++** adr z0\.d, \[z0\.d, z1\.d, sxtw 2\] ++** ret ++*/ ++TEST_ADR (adrw_ext_u64base_s64index, svuint64_t, svint64_t, ++ z0 = svadrw_u64base_s64index (z0, svextw_s64_x (svptrue_b64 (), z1)), ++ z0 = svadrw_index (z0, svextw_x (svptrue_b64 (), z1))) ++ ++/* ++** adrw_u64base_u64index: ++** adr z0\.d, \[z0\.d, z1\.d, lsl 2\] ++** ret ++*/ ++TEST_ADR (adrw_u64base_u64index, svuint64_t, svuint64_t, ++ z0 = svadrw_u64base_u64index (z0, z1), ++ z0 = svadrw_index (z0, z1)) ++ ++/* ++** adrw_ext_u64base_u64index: ++** adr z0\.d, \[z0\.d, z1\.d, uxtw 2\] ++** ret ++*/ ++TEST_ADR (adrw_ext_u64base_u64index, svuint64_t, svuint64_t, ++ z0 = svadrw_u64base_u64index (z0, svextw_u64_x (svptrue_b64 (), z1)), ++ z0 = svadrw_index (z0, svextw_x (svptrue_b64 (), z1))) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_b.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_b.c +new file mode 100644 +index 000000000..f0c4ff1b1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_b.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** and_b_z_tied1: ++** and p0\.b, p3/z, (p0\.b, p1\.b|p1\.b, p0\.b) ++** ret ++*/ ++TEST_UNIFORM_P (and_b_z_tied1, ++ p0 = svand_b_z (p3, p0, p1), ++ p0 = svand_z (p3, p0, p1)) ++ ++/* ++** and_b_z_tied2: ++** and p0\.b, p3/z, (p0\.b, p1\.b|p1\.b, p0\.b) ++** ret ++*/ ++TEST_UNIFORM_P (and_b_z_tied2, ++ p0 = svand_b_z (p3, p1, p0), ++ p0 = svand_z (p3, p1, p0)) ++ ++/* ++** and_b_z_untied: ++** and p0\.b, p3/z, (p1\.b, p2\.b|p2\.b, p1\.b) ++** ret ++*/ ++TEST_UNIFORM_P (and_b_z_untied, ++ p0 = svand_b_z (p3, p1, p2), ++ p0 = svand_z (p3, p1, p2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_s16.c +new file mode 100644 +index 000000000..d54613e91 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_s16.c +@@ -0,0 +1,422 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** and_s16_m_tied1: ++** and z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (and_s16_m_tied1, svint16_t, ++ z0 = svand_s16_m (p0, z0, z1), ++ z0 = svand_m (p0, z0, z1)) ++ ++/* ++** and_s16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** and z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (and_s16_m_tied2, svint16_t, ++ z0 = svand_s16_m (p0, z1, z0), ++ z0 = svand_m (p0, z1, z0)) ++ ++/* ++** and_s16_m_untied: ++** movprfx z0, z1 ++** and z0\.h, p0/m, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (and_s16_m_untied, svint16_t, ++ z0 = svand_s16_m (p0, z1, z2), ++ z0 = svand_m (p0, z1, z2)) ++ ++/* ++** and_w0_s16_m_tied1: ++** mov (z[0-9]+\.h), w0 ++** and z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (and_w0_s16_m_tied1, svint16_t, int16_t, ++ z0 = svand_n_s16_m (p0, z0, x0), ++ z0 = svand_m (p0, z0, x0)) ++ ++/* ++** and_w0_s16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), w0 ++** movprfx z0, z1 ++** and z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (and_w0_s16_m_untied, svint16_t, int16_t, ++ z0 = svand_n_s16_m (p0, z1, x0), ++ z0 = svand_m (p0, z1, x0)) ++ ++/* ++** and_1_s16_m_tied1: ++** mov (z[0-9]+\.h), #1 ++** and z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (and_1_s16_m_tied1, svint16_t, ++ z0 = svand_n_s16_m (p0, z0, 1), ++ z0 = svand_m (p0, z0, 1)) ++ ++/* ++** and_1_s16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), #1 ++** movprfx z0, z1 ++** and z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (and_1_s16_m_untied, svint16_t, ++ z0 = svand_n_s16_m (p0, z1, 1), ++ z0 = svand_m (p0, z1, 1)) ++ ++/* ++** and_m2_s16_m: ++** mov (z[0-9]+\.h), #-2 ++** and z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (and_m2_s16_m, svint16_t, ++ z0 = svand_n_s16_m (p0, z0, -2), ++ z0 = svand_m (p0, z0, -2)) ++ ++/* ++** and_255_s16_m_tied1: ++** uxtb z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (and_255_s16_m_tied1, svint16_t, ++ z0 = svand_n_s16_m (p0, z0, 255), ++ z0 = svand_m (p0, z0, 255)) ++ ++/* ++** and_255_s16_m_untied: ++** movprfx z0, z1 ++** uxtb z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (and_255_s16_m_untied, svint16_t, ++ z0 = svand_n_s16_m (p0, z1, 255), ++ z0 = svand_m (p0, z1, 255)) ++ ++/* ++** and_s16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** and z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (and_s16_z_tied1, svint16_t, ++ z0 = svand_s16_z (p0, z0, z1), ++ z0 = svand_z (p0, z0, z1)) ++ ++/* ++** and_s16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** and z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (and_s16_z_tied2, svint16_t, ++ z0 = svand_s16_z (p0, z1, z0), ++ z0 = svand_z (p0, z1, z0)) ++ ++/* ++** and_s16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** and z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** and z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (and_s16_z_untied, svint16_t, ++ z0 = svand_s16_z (p0, z1, z2), ++ z0 = svand_z (p0, z1, z2)) ++ ++/* ++** and_w0_s16_z_tied1: ++** mov (z[0-9]+\.h), w0 ++** movprfx z0\.h, p0/z, z0\.h ++** and z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (and_w0_s16_z_tied1, svint16_t, int16_t, ++ z0 = svand_n_s16_z (p0, z0, x0), ++ z0 = svand_z (p0, z0, x0)) ++ ++/* ++** and_w0_s16_z_untied: ++** mov (z[0-9]+\.h), w0 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** and z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** and z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (and_w0_s16_z_untied, svint16_t, int16_t, ++ z0 = svand_n_s16_z (p0, z1, x0), ++ z0 = svand_z (p0, z1, x0)) ++ ++/* ++** and_1_s16_z_tied1: ++** mov (z[0-9]+\.h), #1 ++** movprfx z0\.h, p0/z, z0\.h ++** and z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (and_1_s16_z_tied1, svint16_t, ++ z0 = svand_n_s16_z (p0, z0, 1), ++ z0 = svand_z (p0, z0, 1)) ++ ++/* ++** and_1_s16_z_untied: ++** mov (z[0-9]+\.h), #1 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** and z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** and z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (and_1_s16_z_untied, svint16_t, ++ z0 = svand_n_s16_z (p0, z1, 1), ++ z0 = svand_z (p0, z1, 1)) ++ ++/* ++** and_255_s16_z_tied1: ++** ( ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.h, p0/z, \1\.h ++** uxtb z0\.h, p0/m, \1\.h ++** | ++** mov (z[0-9]+\.h), #255 ++** movprfx z0\.h, p0/z, z0\.h ++** and z0\.h, p0/m, z0\.h, \1 ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (and_255_s16_z_tied1, svint16_t, ++ z0 = svand_n_s16_z (p0, z0, 255), ++ z0 = svand_z (p0, z0, 255)) ++ ++/* ++** and_255_s16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** uxtb z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (and_255_s16_z_untied, svint16_t, ++ z0 = svand_n_s16_z (p0, z1, 255), ++ z0 = svand_z (p0, z1, 255)) ++ ++/* ++** and_s16_x_tied1: ++** and z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (and_s16_x_tied1, svint16_t, ++ z0 = svand_s16_x (p0, z0, z1), ++ z0 = svand_x (p0, z0, z1)) ++ ++/* ++** and_s16_x_tied2: ++** and z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (and_s16_x_tied2, svint16_t, ++ z0 = svand_s16_x (p0, z1, z0), ++ z0 = svand_x (p0, z1, z0)) ++ ++/* ++** and_s16_x_untied: ++** and z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (and_s16_x_untied, svint16_t, ++ z0 = svand_s16_x (p0, z1, z2), ++ z0 = svand_x (p0, z1, z2)) ++ ++/* ++** and_w0_s16_x_tied1: ++** mov (z[0-9]+)\.h, w0 ++** and z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (and_w0_s16_x_tied1, svint16_t, int16_t, ++ z0 = svand_n_s16_x (p0, z0, x0), ++ z0 = svand_x (p0, z0, x0)) ++ ++/* ++** and_w0_s16_x_untied: ++** mov (z[0-9]+)\.h, w0 ++** and z0\.d, (z1\.d, \1\.d|\1\.d, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (and_w0_s16_x_untied, svint16_t, int16_t, ++ z0 = svand_n_s16_x (p0, z1, x0), ++ z0 = svand_x (p0, z1, x0)) ++ ++/* ++** and_1_s16_x_tied1: ++** and z0\.h, z0\.h, #0x1 ++** ret ++*/ ++TEST_UNIFORM_Z (and_1_s16_x_tied1, svint16_t, ++ z0 = svand_n_s16_x (p0, z0, 1), ++ z0 = svand_x (p0, z0, 1)) ++ ++/* ++** and_1_s16_x_untied: ++** movprfx z0, z1 ++** and z0\.h, z0\.h, #0x1 ++** ret ++*/ ++TEST_UNIFORM_Z (and_1_s16_x_untied, svint16_t, ++ z0 = svand_n_s16_x (p0, z1, 1), ++ z0 = svand_x (p0, z1, 1)) ++ ++/* ++** and_127_s16_x: ++** and z0\.h, z0\.h, #0x7f ++** ret ++*/ ++TEST_UNIFORM_Z (and_127_s16_x, svint16_t, ++ z0 = svand_n_s16_x (p0, z0, 127), ++ z0 = svand_x (p0, z0, 127)) ++ ++/* ++** and_128_s16_x: ++** and z0\.h, z0\.h, #0x80 ++** ret ++*/ ++TEST_UNIFORM_Z (and_128_s16_x, svint16_t, ++ z0 = svand_n_s16_x (p0, z0, 128), ++ z0 = svand_x (p0, z0, 128)) ++ ++/* ++** and_255_s16_x: ++** and z0\.h, z0\.h, #0xff ++** ret ++*/ ++TEST_UNIFORM_Z (and_255_s16_x, svint16_t, ++ z0 = svand_n_s16_x (p0, z0, 255), ++ z0 = svand_x (p0, z0, 255)) ++ ++/* ++** and_256_s16_x: ++** and z0\.h, z0\.h, #0x100 ++** ret ++*/ ++TEST_UNIFORM_Z (and_256_s16_x, svint16_t, ++ z0 = svand_n_s16_x (p0, z0, 256), ++ z0 = svand_x (p0, z0, 256)) ++ ++/* ++** and_257_s16_x: ++** and z0\.h, z0\.h, #0x101 ++** ret ++*/ ++TEST_UNIFORM_Z (and_257_s16_x, svint16_t, ++ z0 = svand_n_s16_x (p0, z0, 257), ++ z0 = svand_x (p0, z0, 257)) ++ ++/* ++** and_512_s16_x: ++** and z0\.h, z0\.h, #0x200 ++** ret ++*/ ++TEST_UNIFORM_Z (and_512_s16_x, svint16_t, ++ z0 = svand_n_s16_x (p0, z0, 512), ++ z0 = svand_x (p0, z0, 512)) ++ ++/* ++** and_65280_s16_x: ++** and z0\.h, z0\.h, #0xff00 ++** ret ++*/ ++TEST_UNIFORM_Z (and_65280_s16_x, svint16_t, ++ z0 = svand_n_s16_x (p0, z0, 0xff00), ++ z0 = svand_x (p0, z0, 0xff00)) ++ ++/* ++** and_m127_s16_x: ++** and z0\.h, z0\.h, #0xff81 ++** ret ++*/ ++TEST_UNIFORM_Z (and_m127_s16_x, svint16_t, ++ z0 = svand_n_s16_x (p0, z0, -127), ++ z0 = svand_x (p0, z0, -127)) ++ ++/* ++** and_m128_s16_x: ++** and z0\.h, z0\.h, #0xff80 ++** ret ++*/ ++TEST_UNIFORM_Z (and_m128_s16_x, svint16_t, ++ z0 = svand_n_s16_x (p0, z0, -128), ++ z0 = svand_x (p0, z0, -128)) ++ ++/* ++** and_m255_s16_x: ++** and z0\.h, z0\.h, #0xff01 ++** ret ++*/ ++TEST_UNIFORM_Z (and_m255_s16_x, svint16_t, ++ z0 = svand_n_s16_x (p0, z0, -255), ++ z0 = svand_x (p0, z0, -255)) ++ ++/* ++** and_m256_s16_x: ++** and z0\.h, z0\.h, #0xff00 ++** ret ++*/ ++TEST_UNIFORM_Z (and_m256_s16_x, svint16_t, ++ z0 = svand_n_s16_x (p0, z0, -256), ++ z0 = svand_x (p0, z0, -256)) ++ ++/* ++** and_m257_s16_x: ++** and z0\.h, z0\.h, #0xfeff ++** ret ++*/ ++TEST_UNIFORM_Z (and_m257_s16_x, svint16_t, ++ z0 = svand_n_s16_x (p0, z0, -257), ++ z0 = svand_x (p0, z0, -257)) ++ ++/* ++** and_m512_s16_x: ++** and z0\.h, z0\.h, #0xfe00 ++** ret ++*/ ++TEST_UNIFORM_Z (and_m512_s16_x, svint16_t, ++ z0 = svand_n_s16_x (p0, z0, -512), ++ z0 = svand_x (p0, z0, -512)) ++ ++/* ++** and_m32768_s16_x: ++** and z0\.h, z0\.h, #0x8000 ++** ret ++*/ ++TEST_UNIFORM_Z (and_m32768_s16_x, svint16_t, ++ z0 = svand_n_s16_x (p0, z0, -0x8000), ++ z0 = svand_x (p0, z0, -0x8000)) ++ ++/* ++** and_5_s16_x: ++** mov (z[0-9]+)\.h, #5 ++** and z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (and_5_s16_x, svint16_t, ++ z0 = svand_n_s16_x (p0, z0, 5), ++ z0 = svand_x (p0, z0, 5)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_s32.c +new file mode 100644 +index 000000000..7f4082b32 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_s32.c +@@ -0,0 +1,464 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** and_s32_m_tied1: ++** and z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (and_s32_m_tied1, svint32_t, ++ z0 = svand_s32_m (p0, z0, z1), ++ z0 = svand_m (p0, z0, z1)) ++ ++/* ++** and_s32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** and z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (and_s32_m_tied2, svint32_t, ++ z0 = svand_s32_m (p0, z1, z0), ++ z0 = svand_m (p0, z1, z0)) ++ ++/* ++** and_s32_m_untied: ++** movprfx z0, z1 ++** and z0\.s, p0/m, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (and_s32_m_untied, svint32_t, ++ z0 = svand_s32_m (p0, z1, z2), ++ z0 = svand_m (p0, z1, z2)) ++ ++/* ++** and_w0_s32_m_tied1: ++** mov (z[0-9]+\.s), w0 ++** and z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (and_w0_s32_m_tied1, svint32_t, int32_t, ++ z0 = svand_n_s32_m (p0, z0, x0), ++ z0 = svand_m (p0, z0, x0)) ++ ++/* ++** and_w0_s32_m_untied: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0, z1 ++** and z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (and_w0_s32_m_untied, svint32_t, int32_t, ++ z0 = svand_n_s32_m (p0, z1, x0), ++ z0 = svand_m (p0, z1, x0)) ++ ++/* ++** and_1_s32_m_tied1: ++** mov (z[0-9]+\.s), #1 ++** and z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (and_1_s32_m_tied1, svint32_t, ++ z0 = svand_n_s32_m (p0, z0, 1), ++ z0 = svand_m (p0, z0, 1)) ++ ++/* ++** and_1_s32_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.s), #1 ++** movprfx z0, z1 ++** and z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (and_1_s32_m_untied, svint32_t, ++ z0 = svand_n_s32_m (p0, z1, 1), ++ z0 = svand_m (p0, z1, 1)) ++ ++/* ++** and_m2_s32_m: ++** mov (z[0-9]+\.s), #-2 ++** and z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (and_m2_s32_m, svint32_t, ++ z0 = svand_n_s32_m (p0, z0, -2), ++ z0 = svand_m (p0, z0, -2)) ++ ++/* ++** and_255_s32_m_tied1: ++** uxtb z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (and_255_s32_m_tied1, svint32_t, ++ z0 = svand_n_s32_m (p0, z0, 255), ++ z0 = svand_m (p0, z0, 255)) ++ ++/* ++** and_255_s32_m_untied: ++** movprfx z0, z1 ++** uxtb z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (and_255_s32_m_untied, svint32_t, ++ z0 = svand_n_s32_m (p0, z1, 255), ++ z0 = svand_m (p0, z1, 255)) ++ ++/* ++** and_65535_s32_m_tied1: ++** uxth z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (and_65535_s32_m_tied1, svint32_t, ++ z0 = svand_n_s32_m (p0, z0, 65535), ++ z0 = svand_m (p0, z0, 65535)) ++ ++/* ++** and_65535_s32_m_untied: ++** movprfx z0, z1 ++** uxth z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (and_65535_s32_m_untied, svint32_t, ++ z0 = svand_n_s32_m (p0, z1, 65535), ++ z0 = svand_m (p0, z1, 65535)) ++ ++/* ++** and_s32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** and z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (and_s32_z_tied1, svint32_t, ++ z0 = svand_s32_z (p0, z0, z1), ++ z0 = svand_z (p0, z0, z1)) ++ ++/* ++** and_s32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** and z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (and_s32_z_tied2, svint32_t, ++ z0 = svand_s32_z (p0, z1, z0), ++ z0 = svand_z (p0, z1, z0)) ++ ++/* ++** and_s32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** and z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** and z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (and_s32_z_untied, svint32_t, ++ z0 = svand_s32_z (p0, z1, z2), ++ z0 = svand_z (p0, z1, z2)) ++ ++/* ++** and_w0_s32_z_tied1: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0\.s, p0/z, z0\.s ++** and z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (and_w0_s32_z_tied1, svint32_t, int32_t, ++ z0 = svand_n_s32_z (p0, z0, x0), ++ z0 = svand_z (p0, z0, x0)) ++ ++/* ++** and_w0_s32_z_untied: ++** mov (z[0-9]+\.s), w0 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** and z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** and z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (and_w0_s32_z_untied, svint32_t, int32_t, ++ z0 = svand_n_s32_z (p0, z1, x0), ++ z0 = svand_z (p0, z1, x0)) ++ ++/* ++** and_1_s32_z_tied1: ++** mov (z[0-9]+\.s), #1 ++** movprfx z0\.s, p0/z, z0\.s ++** and z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (and_1_s32_z_tied1, svint32_t, ++ z0 = svand_n_s32_z (p0, z0, 1), ++ z0 = svand_z (p0, z0, 1)) ++ ++/* ++** and_1_s32_z_untied: ++** mov (z[0-9]+\.s), #1 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** and z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** and z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (and_1_s32_z_untied, svint32_t, ++ z0 = svand_n_s32_z (p0, z1, 1), ++ z0 = svand_z (p0, z1, 1)) ++ ++/* ++** and_255_s32_z_tied1: ++** ( ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, \1\.s ++** uxtb z0\.s, p0/m, \1\.s ++** | ++** mov (z[0-9]+\.s), #255 ++** movprfx z0\.s, p0/z, z0\.s ++** and z0\.s, p0/m, z0\.s, \1 ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (and_255_s32_z_tied1, svint32_t, ++ z0 = svand_n_s32_z (p0, z0, 255), ++ z0 = svand_z (p0, z0, 255)) ++ ++/* ++** and_255_s32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** uxtb z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (and_255_s32_z_untied, svint32_t, ++ z0 = svand_n_s32_z (p0, z1, 255), ++ z0 = svand_z (p0, z1, 255)) ++ ++/* ++** and_65535_s32_z_tied1: ++** ( ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, \1\.s ++** uxth z0\.s, p0/m, \1\.s ++** | ++** mov (z[0-9]+\.s), #65535 ++** movprfx z0\.s, p0/z, z0\.s ++** and z0\.s, p0/m, z0\.s, \1 ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (and_65535_s32_z_tied1, svint32_t, ++ z0 = svand_n_s32_z (p0, z0, 65535), ++ z0 = svand_z (p0, z0, 65535)) ++ ++/* ++** and_65535_s32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** uxth z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (and_65535_s32_z_untied, svint32_t, ++ z0 = svand_n_s32_z (p0, z1, 65535), ++ z0 = svand_z (p0, z1, 65535)) ++ ++/* ++** and_s32_x_tied1: ++** and z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (and_s32_x_tied1, svint32_t, ++ z0 = svand_s32_x (p0, z0, z1), ++ z0 = svand_x (p0, z0, z1)) ++ ++/* ++** and_s32_x_tied2: ++** and z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (and_s32_x_tied2, svint32_t, ++ z0 = svand_s32_x (p0, z1, z0), ++ z0 = svand_x (p0, z1, z0)) ++ ++/* ++** and_s32_x_untied: ++** and z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (and_s32_x_untied, svint32_t, ++ z0 = svand_s32_x (p0, z1, z2), ++ z0 = svand_x (p0, z1, z2)) ++ ++/* ++** and_w0_s32_x_tied1: ++** mov (z[0-9]+)\.s, w0 ++** and z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (and_w0_s32_x_tied1, svint32_t, int32_t, ++ z0 = svand_n_s32_x (p0, z0, x0), ++ z0 = svand_x (p0, z0, x0)) ++ ++/* ++** and_w0_s32_x_untied: ++** mov (z[0-9]+)\.s, w0 ++** and z0\.d, (z1\.d, \1\.d|\1\.d, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (and_w0_s32_x_untied, svint32_t, int32_t, ++ z0 = svand_n_s32_x (p0, z1, x0), ++ z0 = svand_x (p0, z1, x0)) ++ ++/* ++** and_1_s32_x_tied1: ++** and z0\.s, z0\.s, #0x1 ++** ret ++*/ ++TEST_UNIFORM_Z (and_1_s32_x_tied1, svint32_t, ++ z0 = svand_n_s32_x (p0, z0, 1), ++ z0 = svand_x (p0, z0, 1)) ++ ++/* ++** and_1_s32_x_untied: ++** movprfx z0, z1 ++** and z0\.s, z0\.s, #0x1 ++** ret ++*/ ++TEST_UNIFORM_Z (and_1_s32_x_untied, svint32_t, ++ z0 = svand_n_s32_x (p0, z1, 1), ++ z0 = svand_x (p0, z1, 1)) ++ ++/* ++** and_127_s32_x: ++** and z0\.s, z0\.s, #0x7f ++** ret ++*/ ++TEST_UNIFORM_Z (and_127_s32_x, svint32_t, ++ z0 = svand_n_s32_x (p0, z0, 127), ++ z0 = svand_x (p0, z0, 127)) ++ ++/* ++** and_128_s32_x: ++** and z0\.s, z0\.s, #0x80 ++** ret ++*/ ++TEST_UNIFORM_Z (and_128_s32_x, svint32_t, ++ z0 = svand_n_s32_x (p0, z0, 128), ++ z0 = svand_x (p0, z0, 128)) ++ ++/* ++** and_255_s32_x: ++** and z0\.s, z0\.s, #0xff ++** ret ++*/ ++TEST_UNIFORM_Z (and_255_s32_x, svint32_t, ++ z0 = svand_n_s32_x (p0, z0, 255), ++ z0 = svand_x (p0, z0, 255)) ++ ++/* ++** and_256_s32_x: ++** and z0\.s, z0\.s, #0x100 ++** ret ++*/ ++TEST_UNIFORM_Z (and_256_s32_x, svint32_t, ++ z0 = svand_n_s32_x (p0, z0, 256), ++ z0 = svand_x (p0, z0, 256)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (and_257_s32_x, svint32_t, ++ z0 = svand_n_s32_x (p0, z0, 257), ++ z0 = svand_x (p0, z0, 257)) ++ ++/* ++** and_512_s32_x: ++** and z0\.s, z0\.s, #0x200 ++** ret ++*/ ++TEST_UNIFORM_Z (and_512_s32_x, svint32_t, ++ z0 = svand_n_s32_x (p0, z0, 512), ++ z0 = svand_x (p0, z0, 512)) ++ ++/* ++** and_65280_s32_x: ++** and z0\.s, z0\.s, #0xff00 ++** ret ++*/ ++TEST_UNIFORM_Z (and_65280_s32_x, svint32_t, ++ z0 = svand_n_s32_x (p0, z0, 0xff00), ++ z0 = svand_x (p0, z0, 0xff00)) ++ ++/* ++** and_m127_s32_x: ++** and z0\.s, z0\.s, #0xffffff81 ++** ret ++*/ ++TEST_UNIFORM_Z (and_m127_s32_x, svint32_t, ++ z0 = svand_n_s32_x (p0, z0, -127), ++ z0 = svand_x (p0, z0, -127)) ++ ++/* ++** and_m128_s32_x: ++** and z0\.s, z0\.s, #0xffffff80 ++** ret ++*/ ++TEST_UNIFORM_Z (and_m128_s32_x, svint32_t, ++ z0 = svand_n_s32_x (p0, z0, -128), ++ z0 = svand_x (p0, z0, -128)) ++ ++/* ++** and_m255_s32_x: ++** and z0\.s, z0\.s, #0xffffff01 ++** ret ++*/ ++TEST_UNIFORM_Z (and_m255_s32_x, svint32_t, ++ z0 = svand_n_s32_x (p0, z0, -255), ++ z0 = svand_x (p0, z0, -255)) ++ ++/* ++** and_m256_s32_x: ++** and z0\.s, z0\.s, #0xffffff00 ++** ret ++*/ ++TEST_UNIFORM_Z (and_m256_s32_x, svint32_t, ++ z0 = svand_n_s32_x (p0, z0, -256), ++ z0 = svand_x (p0, z0, -256)) ++ ++/* ++** and_m257_s32_x: ++** and z0\.s, z0\.s, #0xfffffeff ++** ret ++*/ ++TEST_UNIFORM_Z (and_m257_s32_x, svint32_t, ++ z0 = svand_n_s32_x (p0, z0, -257), ++ z0 = svand_x (p0, z0, -257)) ++ ++/* ++** and_m512_s32_x: ++** and z0\.s, z0\.s, #0xfffffe00 ++** ret ++*/ ++TEST_UNIFORM_Z (and_m512_s32_x, svint32_t, ++ z0 = svand_n_s32_x (p0, z0, -512), ++ z0 = svand_x (p0, z0, -512)) ++ ++/* ++** and_m32768_s32_x: ++** and z0\.s, z0\.s, #0xffff8000 ++** ret ++*/ ++TEST_UNIFORM_Z (and_m32768_s32_x, svint32_t, ++ z0 = svand_n_s32_x (p0, z0, -0x8000), ++ z0 = svand_x (p0, z0, -0x8000)) ++ ++/* ++** and_5_s32_x: ++** mov (z[0-9]+)\.s, #5 ++** and z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (and_5_s32_x, svint32_t, ++ z0 = svand_n_s32_x (p0, z0, 5), ++ z0 = svand_x (p0, z0, 5)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_s64.c +new file mode 100644 +index 000000000..8868258dc +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_s64.c +@@ -0,0 +1,510 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** and_s64_m_tied1: ++** and z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (and_s64_m_tied1, svint64_t, ++ z0 = svand_s64_m (p0, z0, z1), ++ z0 = svand_m (p0, z0, z1)) ++ ++/* ++** and_s64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** and z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (and_s64_m_tied2, svint64_t, ++ z0 = svand_s64_m (p0, z1, z0), ++ z0 = svand_m (p0, z1, z0)) ++ ++/* ++** and_s64_m_untied: ++** movprfx z0, z1 ++** and z0\.d, p0/m, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (and_s64_m_untied, svint64_t, ++ z0 = svand_s64_m (p0, z1, z2), ++ z0 = svand_m (p0, z1, z2)) ++ ++/* ++** and_x0_s64_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** and z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (and_x0_s64_m_tied1, svint64_t, int64_t, ++ z0 = svand_n_s64_m (p0, z0, x0), ++ z0 = svand_m (p0, z0, x0)) ++ ++/* ++** and_x0_s64_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** and z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (and_x0_s64_m_untied, svint64_t, int64_t, ++ z0 = svand_n_s64_m (p0, z1, x0), ++ z0 = svand_m (p0, z1, x0)) ++ ++/* ++** and_1_s64_m_tied1: ++** mov (z[0-9]+\.d), #1 ++** and z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (and_1_s64_m_tied1, svint64_t, ++ z0 = svand_n_s64_m (p0, z0, 1), ++ z0 = svand_m (p0, z0, 1)) ++ ++/* ++** and_1_s64_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), #1 ++** movprfx z0, z1 ++** and z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (and_1_s64_m_untied, svint64_t, ++ z0 = svand_n_s64_m (p0, z1, 1), ++ z0 = svand_m (p0, z1, 1)) ++ ++/* ++** and_m2_s64_m: ++** mov (z[0-9]+\.d), #-2 ++** and z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (and_m2_s64_m, svint64_t, ++ z0 = svand_n_s64_m (p0, z0, -2), ++ z0 = svand_m (p0, z0, -2)) ++ ++/* ++** and_255_s64_m_tied1: ++** uxtb z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (and_255_s64_m_tied1, svint64_t, ++ z0 = svand_n_s64_m (p0, z0, 255), ++ z0 = svand_m (p0, z0, 255)) ++ ++/* ++** and_255_s64_m_untied: ++** movprfx z0, z1 ++** uxtb z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (and_255_s64_m_untied, svint64_t, ++ z0 = svand_n_s64_m (p0, z1, 255), ++ z0 = svand_m (p0, z1, 255)) ++ ++/* ++** and_65535_s64_m_tied1: ++** uxth z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (and_65535_s64_m_tied1, svint64_t, ++ z0 = svand_n_s64_m (p0, z0, 65535), ++ z0 = svand_m (p0, z0, 65535)) ++ ++/* ++** and_65535_s64_m_untied: ++** movprfx z0, z1 ++** uxth z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (and_65535_s64_m_untied, svint64_t, ++ z0 = svand_n_s64_m (p0, z1, 65535), ++ z0 = svand_m (p0, z1, 65535)) ++ ++/* ++** and_0xffffffff_s64_m_tied1: ++** uxtw z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (and_0xffffffff_s64_m_tied1, svint64_t, ++ z0 = svand_n_s64_m (p0, z0, 0xffffffff), ++ z0 = svand_m (p0, z0, 0xffffffff)) ++ ++/* ++** and_0xffffffff_s64_m_untied: ++** movprfx z0, z1 ++** uxtw z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (and_0xffffffff_s64_m_untied, svint64_t, ++ z0 = svand_n_s64_m (p0, z1, 0xffffffff), ++ z0 = svand_m (p0, z1, 0xffffffff)) ++ ++/* ++** and_s64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** and z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (and_s64_z_tied1, svint64_t, ++ z0 = svand_s64_z (p0, z0, z1), ++ z0 = svand_z (p0, z0, z1)) ++ ++/* ++** and_s64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** and z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (and_s64_z_tied2, svint64_t, ++ z0 = svand_s64_z (p0, z1, z0), ++ z0 = svand_z (p0, z1, z0)) ++ ++/* ++** and_s64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** and z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** and z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (and_s64_z_untied, svint64_t, ++ z0 = svand_s64_z (p0, z1, z2), ++ z0 = svand_z (p0, z1, z2)) ++ ++/* ++** and_x0_s64_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.d, p0/z, z0\.d ++** and z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (and_x0_s64_z_tied1, svint64_t, int64_t, ++ z0 = svand_n_s64_z (p0, z0, x0), ++ z0 = svand_z (p0, z0, x0)) ++ ++/* ++** and_x0_s64_z_untied: ++** mov (z[0-9]+\.d), x0 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** and z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** and z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (and_x0_s64_z_untied, svint64_t, int64_t, ++ z0 = svand_n_s64_z (p0, z1, x0), ++ z0 = svand_z (p0, z1, x0)) ++ ++/* ++** and_1_s64_z_tied1: ++** mov (z[0-9]+\.d), #1 ++** movprfx z0\.d, p0/z, z0\.d ++** and z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (and_1_s64_z_tied1, svint64_t, ++ z0 = svand_n_s64_z (p0, z0, 1), ++ z0 = svand_z (p0, z0, 1)) ++ ++/* ++** and_1_s64_z_untied: ++** mov (z[0-9]+\.d), #1 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** and z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** and z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (and_1_s64_z_untied, svint64_t, ++ z0 = svand_n_s64_z (p0, z1, 1), ++ z0 = svand_z (p0, z1, 1)) ++ ++/* ++** and_255_s64_z_tied1: ++** ( ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, \1 ++** uxtb z0\.d, p0/m, \1 ++** | ++** mov (z[0-9]+\.d), #255 ++** movprfx z0\.d, p0/z, z0\.d ++** and z0\.d, p0/m, z0\.d, \1 ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (and_255_s64_z_tied1, svint64_t, ++ z0 = svand_n_s64_z (p0, z0, 255), ++ z0 = svand_z (p0, z0, 255)) ++ ++/* ++** and_255_s64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** uxtb z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (and_255_s64_z_untied, svint64_t, ++ z0 = svand_n_s64_z (p0, z1, 255), ++ z0 = svand_z (p0, z1, 255)) ++ ++/* ++** and_65535_s64_z_tied1: ++** ( ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, \1 ++** uxth z0\.d, p0/m, \1 ++** | ++** mov (z[0-9]+\.d), #65535 ++** movprfx z0\.d, p0/z, z0\.d ++** and z0\.d, p0/m, z0\.d, \1 ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (and_65535_s64_z_tied1, svint64_t, ++ z0 = svand_n_s64_z (p0, z0, 65535), ++ z0 = svand_z (p0, z0, 65535)) ++ ++/* ++** and_65535_s64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** uxth z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (and_65535_s64_z_untied, svint64_t, ++ z0 = svand_n_s64_z (p0, z1, 65535), ++ z0 = svand_z (p0, z1, 65535)) ++ ++/* ++** and_0xffffffff_s64_z_tied1: ++** ( ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, \1 ++** uxtw z0\.d, p0/m, \1 ++** | ++** mov (z[0-9]+\.d), #4294967295 ++** movprfx z0\.d, p0/z, z0\.d ++** and z0\.d, p0/m, z0\.d, \1 ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (and_0xffffffff_s64_z_tied1, svint64_t, ++ z0 = svand_n_s64_z (p0, z0, 0xffffffff), ++ z0 = svand_z (p0, z0, 0xffffffff)) ++ ++/* ++** and_0xffffffff_s64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** uxtw z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (and_0xffffffff_s64_z_untied, svint64_t, ++ z0 = svand_n_s64_z (p0, z1, 0xffffffff), ++ z0 = svand_z (p0, z1, 0xffffffff)) ++ ++/* ++** and_s64_x_tied1: ++** and z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (and_s64_x_tied1, svint64_t, ++ z0 = svand_s64_x (p0, z0, z1), ++ z0 = svand_x (p0, z0, z1)) ++ ++/* ++** and_s64_x_tied2: ++** and z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (and_s64_x_tied2, svint64_t, ++ z0 = svand_s64_x (p0, z1, z0), ++ z0 = svand_x (p0, z1, z0)) ++ ++/* ++** and_s64_x_untied: ++** and z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (and_s64_x_untied, svint64_t, ++ z0 = svand_s64_x (p0, z1, z2), ++ z0 = svand_x (p0, z1, z2)) ++ ++/* ++** and_x0_s64_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** and z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (and_x0_s64_x_tied1, svint64_t, int64_t, ++ z0 = svand_n_s64_x (p0, z0, x0), ++ z0 = svand_x (p0, z0, x0)) ++ ++/* ++** and_x0_s64_x_untied: ++** mov (z[0-9]+\.d), x0 ++** and z0\.d, (z1\.d, \1|\1, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (and_x0_s64_x_untied, svint64_t, int64_t, ++ z0 = svand_n_s64_x (p0, z1, x0), ++ z0 = svand_x (p0, z1, x0)) ++ ++/* ++** and_1_s64_x_tied1: ++** and z0\.d, z0\.d, #0x1 ++** ret ++*/ ++TEST_UNIFORM_Z (and_1_s64_x_tied1, svint64_t, ++ z0 = svand_n_s64_x (p0, z0, 1), ++ z0 = svand_x (p0, z0, 1)) ++ ++/* ++** and_1_s64_x_untied: ++** movprfx z0, z1 ++** and z0\.d, z0\.d, #0x1 ++** ret ++*/ ++TEST_UNIFORM_Z (and_1_s64_x_untied, svint64_t, ++ z0 = svand_n_s64_x (p0, z1, 1), ++ z0 = svand_x (p0, z1, 1)) ++ ++/* ++** and_127_s64_x: ++** and z0\.d, z0\.d, #0x7f ++** ret ++*/ ++TEST_UNIFORM_Z (and_127_s64_x, svint64_t, ++ z0 = svand_n_s64_x (p0, z0, 127), ++ z0 = svand_x (p0, z0, 127)) ++ ++/* ++** and_128_s64_x: ++** and z0\.d, z0\.d, #0x80 ++** ret ++*/ ++TEST_UNIFORM_Z (and_128_s64_x, svint64_t, ++ z0 = svand_n_s64_x (p0, z0, 128), ++ z0 = svand_x (p0, z0, 128)) ++ ++/* ++** and_255_s64_x: ++** and z0\.d, z0\.d, #0xff ++** ret ++*/ ++TEST_UNIFORM_Z (and_255_s64_x, svint64_t, ++ z0 = svand_n_s64_x (p0, z0, 255), ++ z0 = svand_x (p0, z0, 255)) ++ ++/* ++** and_256_s64_x: ++** and z0\.d, z0\.d, #0x100 ++** ret ++*/ ++TEST_UNIFORM_Z (and_256_s64_x, svint64_t, ++ z0 = svand_n_s64_x (p0, z0, 256), ++ z0 = svand_x (p0, z0, 256)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (and_257_s64_x, svint64_t, ++ z0 = svand_n_s64_x (p0, z0, 257), ++ z0 = svand_x (p0, z0, 257)) ++ ++/* ++** and_512_s64_x: ++** and z0\.d, z0\.d, #0x200 ++** ret ++*/ ++TEST_UNIFORM_Z (and_512_s64_x, svint64_t, ++ z0 = svand_n_s64_x (p0, z0, 512), ++ z0 = svand_x (p0, z0, 512)) ++ ++/* ++** and_65280_s64_x: ++** and z0\.d, z0\.d, #0xff00 ++** ret ++*/ ++TEST_UNIFORM_Z (and_65280_s64_x, svint64_t, ++ z0 = svand_n_s64_x (p0, z0, 0xff00), ++ z0 = svand_x (p0, z0, 0xff00)) ++ ++/* ++** and_m127_s64_x: ++** and z0\.d, z0\.d, #0xffffffffffffff81 ++** ret ++*/ ++TEST_UNIFORM_Z (and_m127_s64_x, svint64_t, ++ z0 = svand_n_s64_x (p0, z0, -127), ++ z0 = svand_x (p0, z0, -127)) ++ ++/* ++** and_m128_s64_x: ++** and z0\.d, z0\.d, #0xffffffffffffff80 ++** ret ++*/ ++TEST_UNIFORM_Z (and_m128_s64_x, svint64_t, ++ z0 = svand_n_s64_x (p0, z0, -128), ++ z0 = svand_x (p0, z0, -128)) ++ ++/* ++** and_m255_s64_x: ++** and z0\.d, z0\.d, #0xffffffffffffff01 ++** ret ++*/ ++TEST_UNIFORM_Z (and_m255_s64_x, svint64_t, ++ z0 = svand_n_s64_x (p0, z0, -255), ++ z0 = svand_x (p0, z0, -255)) ++ ++/* ++** and_m256_s64_x: ++** and z0\.d, z0\.d, #0xffffffffffffff00 ++** ret ++*/ ++TEST_UNIFORM_Z (and_m256_s64_x, svint64_t, ++ z0 = svand_n_s64_x (p0, z0, -256), ++ z0 = svand_x (p0, z0, -256)) ++ ++/* ++** and_m257_s64_x: ++** and z0\.d, z0\.d, #0xfffffffffffffeff ++** ret ++*/ ++TEST_UNIFORM_Z (and_m257_s64_x, svint64_t, ++ z0 = svand_n_s64_x (p0, z0, -257), ++ z0 = svand_x (p0, z0, -257)) ++ ++/* ++** and_m512_s64_x: ++** and z0\.d, z0\.d, #0xfffffffffffffe00 ++** ret ++*/ ++TEST_UNIFORM_Z (and_m512_s64_x, svint64_t, ++ z0 = svand_n_s64_x (p0, z0, -512), ++ z0 = svand_x (p0, z0, -512)) ++ ++/* ++** and_m32768_s64_x: ++** and z0\.d, z0\.d, #0xffffffffffff8000 ++** ret ++*/ ++TEST_UNIFORM_Z (and_m32768_s64_x, svint64_t, ++ z0 = svand_n_s64_x (p0, z0, -0x8000), ++ z0 = svand_x (p0, z0, -0x8000)) ++ ++/* ++** and_5_s64_x: ++** mov (z[0-9]+\.d), #5 ++** and z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (and_5_s64_x, svint64_t, ++ z0 = svand_n_s64_x (p0, z0, 5), ++ z0 = svand_x (p0, z0, 5)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_s8.c +new file mode 100644 +index 000000000..61d168d3f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_s8.c +@@ -0,0 +1,294 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** and_s8_m_tied1: ++** and z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (and_s8_m_tied1, svint8_t, ++ z0 = svand_s8_m (p0, z0, z1), ++ z0 = svand_m (p0, z0, z1)) ++ ++/* ++** and_s8_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** and z0\.b, p0/m, z0\.b, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (and_s8_m_tied2, svint8_t, ++ z0 = svand_s8_m (p0, z1, z0), ++ z0 = svand_m (p0, z1, z0)) ++ ++/* ++** and_s8_m_untied: ++** movprfx z0, z1 ++** and z0\.b, p0/m, z0\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (and_s8_m_untied, svint8_t, ++ z0 = svand_s8_m (p0, z1, z2), ++ z0 = svand_m (p0, z1, z2)) ++ ++/* ++** and_w0_s8_m_tied1: ++** mov (z[0-9]+\.b), w0 ++** and z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (and_w0_s8_m_tied1, svint8_t, int8_t, ++ z0 = svand_n_s8_m (p0, z0, x0), ++ z0 = svand_m (p0, z0, x0)) ++ ++/* ++** and_w0_s8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), w0 ++** movprfx z0, z1 ++** and z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (and_w0_s8_m_untied, svint8_t, int8_t, ++ z0 = svand_n_s8_m (p0, z1, x0), ++ z0 = svand_m (p0, z1, x0)) ++ ++/* ++** and_1_s8_m_tied1: ++** mov (z[0-9]+\.b), #1 ++** and z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (and_1_s8_m_tied1, svint8_t, ++ z0 = svand_n_s8_m (p0, z0, 1), ++ z0 = svand_m (p0, z0, 1)) ++ ++/* ++** and_1_s8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), #1 ++** movprfx z0, z1 ++** and z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (and_1_s8_m_untied, svint8_t, ++ z0 = svand_n_s8_m (p0, z1, 1), ++ z0 = svand_m (p0, z1, 1)) ++ ++/* ++** and_m2_s8_m: ++** mov (z[0-9]+\.b), #-2 ++** and z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (and_m2_s8_m, svint8_t, ++ z0 = svand_n_s8_m (p0, z0, -2), ++ z0 = svand_m (p0, z0, -2)) ++ ++/* ++** and_s8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** and z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (and_s8_z_tied1, svint8_t, ++ z0 = svand_s8_z (p0, z0, z1), ++ z0 = svand_z (p0, z0, z1)) ++ ++/* ++** and_s8_z_tied2: ++** movprfx z0\.b, p0/z, z0\.b ++** and z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (and_s8_z_tied2, svint8_t, ++ z0 = svand_s8_z (p0, z1, z0), ++ z0 = svand_z (p0, z1, z0)) ++ ++/* ++** and_s8_z_untied: ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** and z0\.b, p0/m, z0\.b, z2\.b ++** | ++** movprfx z0\.b, p0/z, z2\.b ++** and z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (and_s8_z_untied, svint8_t, ++ z0 = svand_s8_z (p0, z1, z2), ++ z0 = svand_z (p0, z1, z2)) ++ ++/* ++** and_w0_s8_z_tied1: ++** mov (z[0-9]+\.b), w0 ++** movprfx z0\.b, p0/z, z0\.b ++** and z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (and_w0_s8_z_tied1, svint8_t, int8_t, ++ z0 = svand_n_s8_z (p0, z0, x0), ++ z0 = svand_z (p0, z0, x0)) ++ ++/* ++** and_w0_s8_z_untied: ++** mov (z[0-9]+\.b), w0 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** and z0\.b, p0/m, z0\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** and z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (and_w0_s8_z_untied, svint8_t, int8_t, ++ z0 = svand_n_s8_z (p0, z1, x0), ++ z0 = svand_z (p0, z1, x0)) ++ ++/* ++** and_1_s8_z_tied1: ++** mov (z[0-9]+\.b), #1 ++** movprfx z0\.b, p0/z, z0\.b ++** and z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (and_1_s8_z_tied1, svint8_t, ++ z0 = svand_n_s8_z (p0, z0, 1), ++ z0 = svand_z (p0, z0, 1)) ++ ++/* ++** and_1_s8_z_untied: ++** mov (z[0-9]+\.b), #1 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** and z0\.b, p0/m, z0\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** and z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (and_1_s8_z_untied, svint8_t, ++ z0 = svand_n_s8_z (p0, z1, 1), ++ z0 = svand_z (p0, z1, 1)) ++ ++/* ++** and_s8_x_tied1: ++** and z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (and_s8_x_tied1, svint8_t, ++ z0 = svand_s8_x (p0, z0, z1), ++ z0 = svand_x (p0, z0, z1)) ++ ++/* ++** and_s8_x_tied2: ++** and z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (and_s8_x_tied2, svint8_t, ++ z0 = svand_s8_x (p0, z1, z0), ++ z0 = svand_x (p0, z1, z0)) ++ ++/* ++** and_s8_x_untied: ++** and z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (and_s8_x_untied, svint8_t, ++ z0 = svand_s8_x (p0, z1, z2), ++ z0 = svand_x (p0, z1, z2)) ++ ++/* ++** and_w0_s8_x_tied1: ++** mov (z[0-9]+)\.b, w0 ++** and z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (and_w0_s8_x_tied1, svint8_t, int8_t, ++ z0 = svand_n_s8_x (p0, z0, x0), ++ z0 = svand_x (p0, z0, x0)) ++ ++/* ++** and_w0_s8_x_untied: ++** mov (z[0-9]+)\.b, w0 ++** and z0\.d, (z1\.d, \1\.d|\1\.d, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (and_w0_s8_x_untied, svint8_t, int8_t, ++ z0 = svand_n_s8_x (p0, z1, x0), ++ z0 = svand_x (p0, z1, x0)) ++ ++/* ++** and_1_s8_x_tied1: ++** and z0\.b, z0\.b, #0x1 ++** ret ++*/ ++TEST_UNIFORM_Z (and_1_s8_x_tied1, svint8_t, ++ z0 = svand_n_s8_x (p0, z0, 1), ++ z0 = svand_x (p0, z0, 1)) ++ ++/* ++** and_1_s8_x_untied: ++** movprfx z0, z1 ++** and z0\.b, z0\.b, #0x1 ++** ret ++*/ ++TEST_UNIFORM_Z (and_1_s8_x_untied, svint8_t, ++ z0 = svand_n_s8_x (p0, z1, 1), ++ z0 = svand_x (p0, z1, 1)) ++ ++/* ++** and_127_s8_x: ++** and z0\.b, z0\.b, #0x7f ++** ret ++*/ ++TEST_UNIFORM_Z (and_127_s8_x, svint8_t, ++ z0 = svand_n_s8_x (p0, z0, 127), ++ z0 = svand_x (p0, z0, 127)) ++ ++/* ++** and_128_s8_x: ++** and z0\.b, z0\.b, #0x80 ++** ret ++*/ ++TEST_UNIFORM_Z (and_128_s8_x, svint8_t, ++ z0 = svand_n_s8_x (p0, z0, 128), ++ z0 = svand_x (p0, z0, 128)) ++ ++/* ++** and_255_s8_x: ++** ret ++*/ ++TEST_UNIFORM_Z (and_255_s8_x, svint8_t, ++ z0 = svand_n_s8_x (p0, z0, 255), ++ z0 = svand_x (p0, z0, 255)) ++ ++/* ++** and_m127_s8_x: ++** and z0\.b, z0\.b, #0x81 ++** ret ++*/ ++TEST_UNIFORM_Z (and_m127_s8_x, svint8_t, ++ z0 = svand_n_s8_x (p0, z0, -127), ++ z0 = svand_x (p0, z0, -127)) ++ ++/* ++** and_m128_s8_x: ++** and z0\.b, z0\.b, #0x80 ++** ret ++*/ ++TEST_UNIFORM_Z (and_m128_s8_x, svint8_t, ++ z0 = svand_n_s8_x (p0, z0, -128), ++ z0 = svand_x (p0, z0, -128)) ++ ++/* ++** and_5_s8_x: ++** mov (z[0-9]+)\.b, #5 ++** and z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (and_5_s8_x, svint8_t, ++ z0 = svand_n_s8_x (p0, z0, 5), ++ z0 = svand_x (p0, z0, 5)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_u16.c +new file mode 100644 +index 000000000..875a08d71 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_u16.c +@@ -0,0 +1,422 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** and_u16_m_tied1: ++** and z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (and_u16_m_tied1, svuint16_t, ++ z0 = svand_u16_m (p0, z0, z1), ++ z0 = svand_m (p0, z0, z1)) ++ ++/* ++** and_u16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** and z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (and_u16_m_tied2, svuint16_t, ++ z0 = svand_u16_m (p0, z1, z0), ++ z0 = svand_m (p0, z1, z0)) ++ ++/* ++** and_u16_m_untied: ++** movprfx z0, z1 ++** and z0\.h, p0/m, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (and_u16_m_untied, svuint16_t, ++ z0 = svand_u16_m (p0, z1, z2), ++ z0 = svand_m (p0, z1, z2)) ++ ++/* ++** and_w0_u16_m_tied1: ++** mov (z[0-9]+\.h), w0 ++** and z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (and_w0_u16_m_tied1, svuint16_t, uint16_t, ++ z0 = svand_n_u16_m (p0, z0, x0), ++ z0 = svand_m (p0, z0, x0)) ++ ++/* ++** and_w0_u16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), w0 ++** movprfx z0, z1 ++** and z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (and_w0_u16_m_untied, svuint16_t, uint16_t, ++ z0 = svand_n_u16_m (p0, z1, x0), ++ z0 = svand_m (p0, z1, x0)) ++ ++/* ++** and_1_u16_m_tied1: ++** mov (z[0-9]+\.h), #1 ++** and z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (and_1_u16_m_tied1, svuint16_t, ++ z0 = svand_n_u16_m (p0, z0, 1), ++ z0 = svand_m (p0, z0, 1)) ++ ++/* ++** and_1_u16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), #1 ++** movprfx z0, z1 ++** and z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (and_1_u16_m_untied, svuint16_t, ++ z0 = svand_n_u16_m (p0, z1, 1), ++ z0 = svand_m (p0, z1, 1)) ++ ++/* ++** and_m2_u16_m: ++** mov (z[0-9]+\.h), #-2 ++** and z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (and_m2_u16_m, svuint16_t, ++ z0 = svand_n_u16_m (p0, z0, -2), ++ z0 = svand_m (p0, z0, -2)) ++ ++/* ++** and_255_u16_m_tied1: ++** uxtb z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (and_255_u16_m_tied1, svuint16_t, ++ z0 = svand_n_u16_m (p0, z0, 255), ++ z0 = svand_m (p0, z0, 255)) ++ ++/* ++** and_255_u16_m_untied: ++** movprfx z0, z1 ++** uxtb z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (and_255_u16_m_untied, svuint16_t, ++ z0 = svand_n_u16_m (p0, z1, 255), ++ z0 = svand_m (p0, z1, 255)) ++ ++/* ++** and_u16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** and z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (and_u16_z_tied1, svuint16_t, ++ z0 = svand_u16_z (p0, z0, z1), ++ z0 = svand_z (p0, z0, z1)) ++ ++/* ++** and_u16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** and z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (and_u16_z_tied2, svuint16_t, ++ z0 = svand_u16_z (p0, z1, z0), ++ z0 = svand_z (p0, z1, z0)) ++ ++/* ++** and_u16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** and z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** and z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (and_u16_z_untied, svuint16_t, ++ z0 = svand_u16_z (p0, z1, z2), ++ z0 = svand_z (p0, z1, z2)) ++ ++/* ++** and_w0_u16_z_tied1: ++** mov (z[0-9]+\.h), w0 ++** movprfx z0\.h, p0/z, z0\.h ++** and z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (and_w0_u16_z_tied1, svuint16_t, uint16_t, ++ z0 = svand_n_u16_z (p0, z0, x0), ++ z0 = svand_z (p0, z0, x0)) ++ ++/* ++** and_w0_u16_z_untied: ++** mov (z[0-9]+\.h), w0 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** and z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** and z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (and_w0_u16_z_untied, svuint16_t, uint16_t, ++ z0 = svand_n_u16_z (p0, z1, x0), ++ z0 = svand_z (p0, z1, x0)) ++ ++/* ++** and_1_u16_z_tied1: ++** mov (z[0-9]+\.h), #1 ++** movprfx z0\.h, p0/z, z0\.h ++** and z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (and_1_u16_z_tied1, svuint16_t, ++ z0 = svand_n_u16_z (p0, z0, 1), ++ z0 = svand_z (p0, z0, 1)) ++ ++/* ++** and_1_u16_z_untied: ++** mov (z[0-9]+\.h), #1 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** and z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** and z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (and_1_u16_z_untied, svuint16_t, ++ z0 = svand_n_u16_z (p0, z1, 1), ++ z0 = svand_z (p0, z1, 1)) ++ ++/* ++** and_255_u16_z_tied1: ++** ( ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.h, p0/z, \1\.h ++** uxtb z0\.h, p0/m, \1\.h ++** | ++** mov (z[0-9]+\.h), #255 ++** movprfx z0\.h, p0/z, z0\.h ++** and z0\.h, p0/m, z0\.h, \1 ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (and_255_u16_z_tied1, svuint16_t, ++ z0 = svand_n_u16_z (p0, z0, 255), ++ z0 = svand_z (p0, z0, 255)) ++ ++/* ++** and_255_u16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** uxtb z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (and_255_u16_z_untied, svuint16_t, ++ z0 = svand_n_u16_z (p0, z1, 255), ++ z0 = svand_z (p0, z1, 255)) ++ ++/* ++** and_u16_x_tied1: ++** and z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (and_u16_x_tied1, svuint16_t, ++ z0 = svand_u16_x (p0, z0, z1), ++ z0 = svand_x (p0, z0, z1)) ++ ++/* ++** and_u16_x_tied2: ++** and z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (and_u16_x_tied2, svuint16_t, ++ z0 = svand_u16_x (p0, z1, z0), ++ z0 = svand_x (p0, z1, z0)) ++ ++/* ++** and_u16_x_untied: ++** and z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (and_u16_x_untied, svuint16_t, ++ z0 = svand_u16_x (p0, z1, z2), ++ z0 = svand_x (p0, z1, z2)) ++ ++/* ++** and_w0_u16_x_tied1: ++** mov (z[0-9]+)\.h, w0 ++** and z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (and_w0_u16_x_tied1, svuint16_t, uint16_t, ++ z0 = svand_n_u16_x (p0, z0, x0), ++ z0 = svand_x (p0, z0, x0)) ++ ++/* ++** and_w0_u16_x_untied: ++** mov (z[0-9]+)\.h, w0 ++** and z0\.d, (z1\.d, \1\.d|\1\.d, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (and_w0_u16_x_untied, svuint16_t, uint16_t, ++ z0 = svand_n_u16_x (p0, z1, x0), ++ z0 = svand_x (p0, z1, x0)) ++ ++/* ++** and_1_u16_x_tied1: ++** and z0\.h, z0\.h, #0x1 ++** ret ++*/ ++TEST_UNIFORM_Z (and_1_u16_x_tied1, svuint16_t, ++ z0 = svand_n_u16_x (p0, z0, 1), ++ z0 = svand_x (p0, z0, 1)) ++ ++/* ++** and_1_u16_x_untied: ++** movprfx z0, z1 ++** and z0\.h, z0\.h, #0x1 ++** ret ++*/ ++TEST_UNIFORM_Z (and_1_u16_x_untied, svuint16_t, ++ z0 = svand_n_u16_x (p0, z1, 1), ++ z0 = svand_x (p0, z1, 1)) ++ ++/* ++** and_127_u16_x: ++** and z0\.h, z0\.h, #0x7f ++** ret ++*/ ++TEST_UNIFORM_Z (and_127_u16_x, svuint16_t, ++ z0 = svand_n_u16_x (p0, z0, 127), ++ z0 = svand_x (p0, z0, 127)) ++ ++/* ++** and_128_u16_x: ++** and z0\.h, z0\.h, #0x80 ++** ret ++*/ ++TEST_UNIFORM_Z (and_128_u16_x, svuint16_t, ++ z0 = svand_n_u16_x (p0, z0, 128), ++ z0 = svand_x (p0, z0, 128)) ++ ++/* ++** and_255_u16_x: ++** and z0\.h, z0\.h, #0xff ++** ret ++*/ ++TEST_UNIFORM_Z (and_255_u16_x, svuint16_t, ++ z0 = svand_n_u16_x (p0, z0, 255), ++ z0 = svand_x (p0, z0, 255)) ++ ++/* ++** and_256_u16_x: ++** and z0\.h, z0\.h, #0x100 ++** ret ++*/ ++TEST_UNIFORM_Z (and_256_u16_x, svuint16_t, ++ z0 = svand_n_u16_x (p0, z0, 256), ++ z0 = svand_x (p0, z0, 256)) ++ ++/* ++** and_257_u16_x: ++** and z0\.h, z0\.h, #0x101 ++** ret ++*/ ++TEST_UNIFORM_Z (and_257_u16_x, svuint16_t, ++ z0 = svand_n_u16_x (p0, z0, 257), ++ z0 = svand_x (p0, z0, 257)) ++ ++/* ++** and_512_u16_x: ++** and z0\.h, z0\.h, #0x200 ++** ret ++*/ ++TEST_UNIFORM_Z (and_512_u16_x, svuint16_t, ++ z0 = svand_n_u16_x (p0, z0, 512), ++ z0 = svand_x (p0, z0, 512)) ++ ++/* ++** and_65280_u16_x: ++** and z0\.h, z0\.h, #0xff00 ++** ret ++*/ ++TEST_UNIFORM_Z (and_65280_u16_x, svuint16_t, ++ z0 = svand_n_u16_x (p0, z0, 0xff00), ++ z0 = svand_x (p0, z0, 0xff00)) ++ ++/* ++** and_m127_u16_x: ++** and z0\.h, z0\.h, #0xff81 ++** ret ++*/ ++TEST_UNIFORM_Z (and_m127_u16_x, svuint16_t, ++ z0 = svand_n_u16_x (p0, z0, -127), ++ z0 = svand_x (p0, z0, -127)) ++ ++/* ++** and_m128_u16_x: ++** and z0\.h, z0\.h, #0xff80 ++** ret ++*/ ++TEST_UNIFORM_Z (and_m128_u16_x, svuint16_t, ++ z0 = svand_n_u16_x (p0, z0, -128), ++ z0 = svand_x (p0, z0, -128)) ++ ++/* ++** and_m255_u16_x: ++** and z0\.h, z0\.h, #0xff01 ++** ret ++*/ ++TEST_UNIFORM_Z (and_m255_u16_x, svuint16_t, ++ z0 = svand_n_u16_x (p0, z0, -255), ++ z0 = svand_x (p0, z0, -255)) ++ ++/* ++** and_m256_u16_x: ++** and z0\.h, z0\.h, #0xff00 ++** ret ++*/ ++TEST_UNIFORM_Z (and_m256_u16_x, svuint16_t, ++ z0 = svand_n_u16_x (p0, z0, -256), ++ z0 = svand_x (p0, z0, -256)) ++ ++/* ++** and_m257_u16_x: ++** and z0\.h, z0\.h, #0xfeff ++** ret ++*/ ++TEST_UNIFORM_Z (and_m257_u16_x, svuint16_t, ++ z0 = svand_n_u16_x (p0, z0, -257), ++ z0 = svand_x (p0, z0, -257)) ++ ++/* ++** and_m512_u16_x: ++** and z0\.h, z0\.h, #0xfe00 ++** ret ++*/ ++TEST_UNIFORM_Z (and_m512_u16_x, svuint16_t, ++ z0 = svand_n_u16_x (p0, z0, -512), ++ z0 = svand_x (p0, z0, -512)) ++ ++/* ++** and_m32768_u16_x: ++** and z0\.h, z0\.h, #0x8000 ++** ret ++*/ ++TEST_UNIFORM_Z (and_m32768_u16_x, svuint16_t, ++ z0 = svand_n_u16_x (p0, z0, -0x8000), ++ z0 = svand_x (p0, z0, -0x8000)) ++ ++/* ++** and_5_u16_x: ++** mov (z[0-9]+)\.h, #5 ++** and z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (and_5_u16_x, svuint16_t, ++ z0 = svand_n_u16_x (p0, z0, 5), ++ z0 = svand_x (p0, z0, 5)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_u32.c +new file mode 100644 +index 000000000..80ff50396 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_u32.c +@@ -0,0 +1,464 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** and_u32_m_tied1: ++** and z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (and_u32_m_tied1, svuint32_t, ++ z0 = svand_u32_m (p0, z0, z1), ++ z0 = svand_m (p0, z0, z1)) ++ ++/* ++** and_u32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** and z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (and_u32_m_tied2, svuint32_t, ++ z0 = svand_u32_m (p0, z1, z0), ++ z0 = svand_m (p0, z1, z0)) ++ ++/* ++** and_u32_m_untied: ++** movprfx z0, z1 ++** and z0\.s, p0/m, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (and_u32_m_untied, svuint32_t, ++ z0 = svand_u32_m (p0, z1, z2), ++ z0 = svand_m (p0, z1, z2)) ++ ++/* ++** and_w0_u32_m_tied1: ++** mov (z[0-9]+\.s), w0 ++** and z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (and_w0_u32_m_tied1, svuint32_t, uint32_t, ++ z0 = svand_n_u32_m (p0, z0, x0), ++ z0 = svand_m (p0, z0, x0)) ++ ++/* ++** and_w0_u32_m_untied: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0, z1 ++** and z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (and_w0_u32_m_untied, svuint32_t, uint32_t, ++ z0 = svand_n_u32_m (p0, z1, x0), ++ z0 = svand_m (p0, z1, x0)) ++ ++/* ++** and_1_u32_m_tied1: ++** mov (z[0-9]+\.s), #1 ++** and z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (and_1_u32_m_tied1, svuint32_t, ++ z0 = svand_n_u32_m (p0, z0, 1), ++ z0 = svand_m (p0, z0, 1)) ++ ++/* ++** and_1_u32_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.s), #1 ++** movprfx z0, z1 ++** and z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (and_1_u32_m_untied, svuint32_t, ++ z0 = svand_n_u32_m (p0, z1, 1), ++ z0 = svand_m (p0, z1, 1)) ++ ++/* ++** and_m2_u32_m: ++** mov (z[0-9]+\.s), #-2 ++** and z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (and_m2_u32_m, svuint32_t, ++ z0 = svand_n_u32_m (p0, z0, -2), ++ z0 = svand_m (p0, z0, -2)) ++ ++/* ++** and_255_u32_m_tied1: ++** uxtb z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (and_255_u32_m_tied1, svuint32_t, ++ z0 = svand_n_u32_m (p0, z0, 255), ++ z0 = svand_m (p0, z0, 255)) ++ ++/* ++** and_255_u32_m_untied: ++** movprfx z0, z1 ++** uxtb z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (and_255_u32_m_untied, svuint32_t, ++ z0 = svand_n_u32_m (p0, z1, 255), ++ z0 = svand_m (p0, z1, 255)) ++ ++/* ++** and_65535_u32_m_tied1: ++** uxth z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (and_65535_u32_m_tied1, svuint32_t, ++ z0 = svand_n_u32_m (p0, z0, 65535), ++ z0 = svand_m (p0, z0, 65535)) ++ ++/* ++** and_65535_u32_m_untied: ++** movprfx z0, z1 ++** uxth z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (and_65535_u32_m_untied, svuint32_t, ++ z0 = svand_n_u32_m (p0, z1, 65535), ++ z0 = svand_m (p0, z1, 65535)) ++ ++/* ++** and_u32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** and z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (and_u32_z_tied1, svuint32_t, ++ z0 = svand_u32_z (p0, z0, z1), ++ z0 = svand_z (p0, z0, z1)) ++ ++/* ++** and_u32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** and z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (and_u32_z_tied2, svuint32_t, ++ z0 = svand_u32_z (p0, z1, z0), ++ z0 = svand_z (p0, z1, z0)) ++ ++/* ++** and_u32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** and z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** and z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (and_u32_z_untied, svuint32_t, ++ z0 = svand_u32_z (p0, z1, z2), ++ z0 = svand_z (p0, z1, z2)) ++ ++/* ++** and_w0_u32_z_tied1: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0\.s, p0/z, z0\.s ++** and z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (and_w0_u32_z_tied1, svuint32_t, uint32_t, ++ z0 = svand_n_u32_z (p0, z0, x0), ++ z0 = svand_z (p0, z0, x0)) ++ ++/* ++** and_w0_u32_z_untied: ++** mov (z[0-9]+\.s), w0 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** and z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** and z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (and_w0_u32_z_untied, svuint32_t, uint32_t, ++ z0 = svand_n_u32_z (p0, z1, x0), ++ z0 = svand_z (p0, z1, x0)) ++ ++/* ++** and_1_u32_z_tied1: ++** mov (z[0-9]+\.s), #1 ++** movprfx z0\.s, p0/z, z0\.s ++** and z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (and_1_u32_z_tied1, svuint32_t, ++ z0 = svand_n_u32_z (p0, z0, 1), ++ z0 = svand_z (p0, z0, 1)) ++ ++/* ++** and_1_u32_z_untied: ++** mov (z[0-9]+\.s), #1 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** and z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** and z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (and_1_u32_z_untied, svuint32_t, ++ z0 = svand_n_u32_z (p0, z1, 1), ++ z0 = svand_z (p0, z1, 1)) ++ ++/* ++** and_255_u32_z_tied1: ++** ( ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, \1\.s ++** uxtb z0\.s, p0/m, \1\.s ++** | ++** mov (z[0-9]+\.s), #255 ++** movprfx z0\.s, p0/z, z0\.s ++** and z0\.s, p0/m, z0\.s, \1 ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (and_255_u32_z_tied1, svuint32_t, ++ z0 = svand_n_u32_z (p0, z0, 255), ++ z0 = svand_z (p0, z0, 255)) ++ ++/* ++** and_255_u32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** uxtb z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (and_255_u32_z_untied, svuint32_t, ++ z0 = svand_n_u32_z (p0, z1, 255), ++ z0 = svand_z (p0, z1, 255)) ++ ++/* ++** and_65535_u32_z_tied1: ++** ( ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, \1\.s ++** uxth z0\.s, p0/m, \1\.s ++** | ++** mov (z[0-9]+\.s), #65535 ++** movprfx z0\.s, p0/z, z0\.s ++** and z0\.s, p0/m, z0\.s, \1 ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (and_65535_u32_z_tied1, svuint32_t, ++ z0 = svand_n_u32_z (p0, z0, 65535), ++ z0 = svand_z (p0, z0, 65535)) ++ ++/* ++** and_65535_u32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** uxth z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (and_65535_u32_z_untied, svuint32_t, ++ z0 = svand_n_u32_z (p0, z1, 65535), ++ z0 = svand_z (p0, z1, 65535)) ++ ++/* ++** and_u32_x_tied1: ++** and z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (and_u32_x_tied1, svuint32_t, ++ z0 = svand_u32_x (p0, z0, z1), ++ z0 = svand_x (p0, z0, z1)) ++ ++/* ++** and_u32_x_tied2: ++** and z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (and_u32_x_tied2, svuint32_t, ++ z0 = svand_u32_x (p0, z1, z0), ++ z0 = svand_x (p0, z1, z0)) ++ ++/* ++** and_u32_x_untied: ++** and z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (and_u32_x_untied, svuint32_t, ++ z0 = svand_u32_x (p0, z1, z2), ++ z0 = svand_x (p0, z1, z2)) ++ ++/* ++** and_w0_u32_x_tied1: ++** mov (z[0-9]+)\.s, w0 ++** and z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (and_w0_u32_x_tied1, svuint32_t, uint32_t, ++ z0 = svand_n_u32_x (p0, z0, x0), ++ z0 = svand_x (p0, z0, x0)) ++ ++/* ++** and_w0_u32_x_untied: ++** mov (z[0-9]+)\.s, w0 ++** and z0\.d, (z1\.d, \1\.d|\1\.d, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (and_w0_u32_x_untied, svuint32_t, uint32_t, ++ z0 = svand_n_u32_x (p0, z1, x0), ++ z0 = svand_x (p0, z1, x0)) ++ ++/* ++** and_1_u32_x_tied1: ++** and z0\.s, z0\.s, #0x1 ++** ret ++*/ ++TEST_UNIFORM_Z (and_1_u32_x_tied1, svuint32_t, ++ z0 = svand_n_u32_x (p0, z0, 1), ++ z0 = svand_x (p0, z0, 1)) ++ ++/* ++** and_1_u32_x_untied: ++** movprfx z0, z1 ++** and z0\.s, z0\.s, #0x1 ++** ret ++*/ ++TEST_UNIFORM_Z (and_1_u32_x_untied, svuint32_t, ++ z0 = svand_n_u32_x (p0, z1, 1), ++ z0 = svand_x (p0, z1, 1)) ++ ++/* ++** and_127_u32_x: ++** and z0\.s, z0\.s, #0x7f ++** ret ++*/ ++TEST_UNIFORM_Z (and_127_u32_x, svuint32_t, ++ z0 = svand_n_u32_x (p0, z0, 127), ++ z0 = svand_x (p0, z0, 127)) ++ ++/* ++** and_128_u32_x: ++** and z0\.s, z0\.s, #0x80 ++** ret ++*/ ++TEST_UNIFORM_Z (and_128_u32_x, svuint32_t, ++ z0 = svand_n_u32_x (p0, z0, 128), ++ z0 = svand_x (p0, z0, 128)) ++ ++/* ++** and_255_u32_x: ++** and z0\.s, z0\.s, #0xff ++** ret ++*/ ++TEST_UNIFORM_Z (and_255_u32_x, svuint32_t, ++ z0 = svand_n_u32_x (p0, z0, 255), ++ z0 = svand_x (p0, z0, 255)) ++ ++/* ++** and_256_u32_x: ++** and z0\.s, z0\.s, #0x100 ++** ret ++*/ ++TEST_UNIFORM_Z (and_256_u32_x, svuint32_t, ++ z0 = svand_n_u32_x (p0, z0, 256), ++ z0 = svand_x (p0, z0, 256)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (and_257_u32_x, svuint32_t, ++ z0 = svand_n_u32_x (p0, z0, 257), ++ z0 = svand_x (p0, z0, 257)) ++ ++/* ++** and_512_u32_x: ++** and z0\.s, z0\.s, #0x200 ++** ret ++*/ ++TEST_UNIFORM_Z (and_512_u32_x, svuint32_t, ++ z0 = svand_n_u32_x (p0, z0, 512), ++ z0 = svand_x (p0, z0, 512)) ++ ++/* ++** and_65280_u32_x: ++** and z0\.s, z0\.s, #0xff00 ++** ret ++*/ ++TEST_UNIFORM_Z (and_65280_u32_x, svuint32_t, ++ z0 = svand_n_u32_x (p0, z0, 0xff00), ++ z0 = svand_x (p0, z0, 0xff00)) ++ ++/* ++** and_m127_u32_x: ++** and z0\.s, z0\.s, #0xffffff81 ++** ret ++*/ ++TEST_UNIFORM_Z (and_m127_u32_x, svuint32_t, ++ z0 = svand_n_u32_x (p0, z0, -127), ++ z0 = svand_x (p0, z0, -127)) ++ ++/* ++** and_m128_u32_x: ++** and z0\.s, z0\.s, #0xffffff80 ++** ret ++*/ ++TEST_UNIFORM_Z (and_m128_u32_x, svuint32_t, ++ z0 = svand_n_u32_x (p0, z0, -128), ++ z0 = svand_x (p0, z0, -128)) ++ ++/* ++** and_m255_u32_x: ++** and z0\.s, z0\.s, #0xffffff01 ++** ret ++*/ ++TEST_UNIFORM_Z (and_m255_u32_x, svuint32_t, ++ z0 = svand_n_u32_x (p0, z0, -255), ++ z0 = svand_x (p0, z0, -255)) ++ ++/* ++** and_m256_u32_x: ++** and z0\.s, z0\.s, #0xffffff00 ++** ret ++*/ ++TEST_UNIFORM_Z (and_m256_u32_x, svuint32_t, ++ z0 = svand_n_u32_x (p0, z0, -256), ++ z0 = svand_x (p0, z0, -256)) ++ ++/* ++** and_m257_u32_x: ++** and z0\.s, z0\.s, #0xfffffeff ++** ret ++*/ ++TEST_UNIFORM_Z (and_m257_u32_x, svuint32_t, ++ z0 = svand_n_u32_x (p0, z0, -257), ++ z0 = svand_x (p0, z0, -257)) ++ ++/* ++** and_m512_u32_x: ++** and z0\.s, z0\.s, #0xfffffe00 ++** ret ++*/ ++TEST_UNIFORM_Z (and_m512_u32_x, svuint32_t, ++ z0 = svand_n_u32_x (p0, z0, -512), ++ z0 = svand_x (p0, z0, -512)) ++ ++/* ++** and_m32768_u32_x: ++** and z0\.s, z0\.s, #0xffff8000 ++** ret ++*/ ++TEST_UNIFORM_Z (and_m32768_u32_x, svuint32_t, ++ z0 = svand_n_u32_x (p0, z0, -0x8000), ++ z0 = svand_x (p0, z0, -0x8000)) ++ ++/* ++** and_5_u32_x: ++** mov (z[0-9]+)\.s, #5 ++** and z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (and_5_u32_x, svuint32_t, ++ z0 = svand_n_u32_x (p0, z0, 5), ++ z0 = svand_x (p0, z0, 5)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_u64.c +new file mode 100644 +index 000000000..906b19c37 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_u64.c +@@ -0,0 +1,510 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** and_u64_m_tied1: ++** and z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (and_u64_m_tied1, svuint64_t, ++ z0 = svand_u64_m (p0, z0, z1), ++ z0 = svand_m (p0, z0, z1)) ++ ++/* ++** and_u64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** and z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (and_u64_m_tied2, svuint64_t, ++ z0 = svand_u64_m (p0, z1, z0), ++ z0 = svand_m (p0, z1, z0)) ++ ++/* ++** and_u64_m_untied: ++** movprfx z0, z1 ++** and z0\.d, p0/m, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (and_u64_m_untied, svuint64_t, ++ z0 = svand_u64_m (p0, z1, z2), ++ z0 = svand_m (p0, z1, z2)) ++ ++/* ++** and_x0_u64_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** and z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (and_x0_u64_m_tied1, svuint64_t, uint64_t, ++ z0 = svand_n_u64_m (p0, z0, x0), ++ z0 = svand_m (p0, z0, x0)) ++ ++/* ++** and_x0_u64_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** and z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (and_x0_u64_m_untied, svuint64_t, uint64_t, ++ z0 = svand_n_u64_m (p0, z1, x0), ++ z0 = svand_m (p0, z1, x0)) ++ ++/* ++** and_1_u64_m_tied1: ++** mov (z[0-9]+\.d), #1 ++** and z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (and_1_u64_m_tied1, svuint64_t, ++ z0 = svand_n_u64_m (p0, z0, 1), ++ z0 = svand_m (p0, z0, 1)) ++ ++/* ++** and_1_u64_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), #1 ++** movprfx z0, z1 ++** and z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (and_1_u64_m_untied, svuint64_t, ++ z0 = svand_n_u64_m (p0, z1, 1), ++ z0 = svand_m (p0, z1, 1)) ++ ++/* ++** and_m2_u64_m: ++** mov (z[0-9]+\.d), #-2 ++** and z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (and_m2_u64_m, svuint64_t, ++ z0 = svand_n_u64_m (p0, z0, -2), ++ z0 = svand_m (p0, z0, -2)) ++ ++/* ++** and_255_u64_m_tied1: ++** uxtb z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (and_255_u64_m_tied1, svuint64_t, ++ z0 = svand_n_u64_m (p0, z0, 255), ++ z0 = svand_m (p0, z0, 255)) ++ ++/* ++** and_255_u64_m_untied: ++** movprfx z0, z1 ++** uxtb z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (and_255_u64_m_untied, svuint64_t, ++ z0 = svand_n_u64_m (p0, z1, 255), ++ z0 = svand_m (p0, z1, 255)) ++ ++/* ++** and_65535_u64_m_tied1: ++** uxth z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (and_65535_u64_m_tied1, svuint64_t, ++ z0 = svand_n_u64_m (p0, z0, 65535), ++ z0 = svand_m (p0, z0, 65535)) ++ ++/* ++** and_65535_u64_m_untied: ++** movprfx z0, z1 ++** uxth z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (and_65535_u64_m_untied, svuint64_t, ++ z0 = svand_n_u64_m (p0, z1, 65535), ++ z0 = svand_m (p0, z1, 65535)) ++ ++/* ++** and_0xffffffff_u64_m_tied1: ++** uxtw z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (and_0xffffffff_u64_m_tied1, svuint64_t, ++ z0 = svand_n_u64_m (p0, z0, 0xffffffff), ++ z0 = svand_m (p0, z0, 0xffffffff)) ++ ++/* ++** and_0xffffffff_u64_m_untied: ++** movprfx z0, z1 ++** uxtw z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (and_0xffffffff_u64_m_untied, svuint64_t, ++ z0 = svand_n_u64_m (p0, z1, 0xffffffff), ++ z0 = svand_m (p0, z1, 0xffffffff)) ++ ++/* ++** and_u64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** and z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (and_u64_z_tied1, svuint64_t, ++ z0 = svand_u64_z (p0, z0, z1), ++ z0 = svand_z (p0, z0, z1)) ++ ++/* ++** and_u64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** and z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (and_u64_z_tied2, svuint64_t, ++ z0 = svand_u64_z (p0, z1, z0), ++ z0 = svand_z (p0, z1, z0)) ++ ++/* ++** and_u64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** and z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** and z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (and_u64_z_untied, svuint64_t, ++ z0 = svand_u64_z (p0, z1, z2), ++ z0 = svand_z (p0, z1, z2)) ++ ++/* ++** and_x0_u64_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.d, p0/z, z0\.d ++** and z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (and_x0_u64_z_tied1, svuint64_t, uint64_t, ++ z0 = svand_n_u64_z (p0, z0, x0), ++ z0 = svand_z (p0, z0, x0)) ++ ++/* ++** and_x0_u64_z_untied: ++** mov (z[0-9]+\.d), x0 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** and z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** and z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (and_x0_u64_z_untied, svuint64_t, uint64_t, ++ z0 = svand_n_u64_z (p0, z1, x0), ++ z0 = svand_z (p0, z1, x0)) ++ ++/* ++** and_1_u64_z_tied1: ++** mov (z[0-9]+\.d), #1 ++** movprfx z0\.d, p0/z, z0\.d ++** and z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (and_1_u64_z_tied1, svuint64_t, ++ z0 = svand_n_u64_z (p0, z0, 1), ++ z0 = svand_z (p0, z0, 1)) ++ ++/* ++** and_1_u64_z_untied: ++** mov (z[0-9]+\.d), #1 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** and z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** and z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (and_1_u64_z_untied, svuint64_t, ++ z0 = svand_n_u64_z (p0, z1, 1), ++ z0 = svand_z (p0, z1, 1)) ++ ++/* ++** and_255_u64_z_tied1: ++** ( ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, \1 ++** uxtb z0\.d, p0/m, \1 ++** | ++** mov (z[0-9]+\.d), #255 ++** movprfx z0\.d, p0/z, z0\.d ++** and z0\.d, p0/m, z0\.d, \1 ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (and_255_u64_z_tied1, svuint64_t, ++ z0 = svand_n_u64_z (p0, z0, 255), ++ z0 = svand_z (p0, z0, 255)) ++ ++/* ++** and_255_u64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** uxtb z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (and_255_u64_z_untied, svuint64_t, ++ z0 = svand_n_u64_z (p0, z1, 255), ++ z0 = svand_z (p0, z1, 255)) ++ ++/* ++** and_65535_u64_z_tied1: ++** ( ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, \1 ++** uxth z0\.d, p0/m, \1 ++** | ++** mov (z[0-9]+\.d), #65535 ++** movprfx z0\.d, p0/z, z0\.d ++** and z0\.d, p0/m, z0\.d, \1 ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (and_65535_u64_z_tied1, svuint64_t, ++ z0 = svand_n_u64_z (p0, z0, 65535), ++ z0 = svand_z (p0, z0, 65535)) ++ ++/* ++** and_65535_u64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** uxth z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (and_65535_u64_z_untied, svuint64_t, ++ z0 = svand_n_u64_z (p0, z1, 65535), ++ z0 = svand_z (p0, z1, 65535)) ++ ++/* ++** and_0xffffffff_u64_z_tied1: ++** ( ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, \1 ++** uxtw z0\.d, p0/m, \1 ++** | ++** mov (z[0-9]+\.d), #4294967295 ++** movprfx z0\.d, p0/z, z0\.d ++** and z0\.d, p0/m, z0\.d, \1 ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (and_0xffffffff_u64_z_tied1, svuint64_t, ++ z0 = svand_n_u64_z (p0, z0, 0xffffffff), ++ z0 = svand_z (p0, z0, 0xffffffff)) ++ ++/* ++** and_0xffffffff_u64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** uxtw z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (and_0xffffffff_u64_z_untied, svuint64_t, ++ z0 = svand_n_u64_z (p0, z1, 0xffffffff), ++ z0 = svand_z (p0, z1, 0xffffffff)) ++ ++/* ++** and_u64_x_tied1: ++** and z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (and_u64_x_tied1, svuint64_t, ++ z0 = svand_u64_x (p0, z0, z1), ++ z0 = svand_x (p0, z0, z1)) ++ ++/* ++** and_u64_x_tied2: ++** and z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (and_u64_x_tied2, svuint64_t, ++ z0 = svand_u64_x (p0, z1, z0), ++ z0 = svand_x (p0, z1, z0)) ++ ++/* ++** and_u64_x_untied: ++** and z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (and_u64_x_untied, svuint64_t, ++ z0 = svand_u64_x (p0, z1, z2), ++ z0 = svand_x (p0, z1, z2)) ++ ++/* ++** and_x0_u64_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** and z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (and_x0_u64_x_tied1, svuint64_t, uint64_t, ++ z0 = svand_n_u64_x (p0, z0, x0), ++ z0 = svand_x (p0, z0, x0)) ++ ++/* ++** and_x0_u64_x_untied: ++** mov (z[0-9]+\.d), x0 ++** and z0\.d, (z1\.d, \1|\1, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (and_x0_u64_x_untied, svuint64_t, uint64_t, ++ z0 = svand_n_u64_x (p0, z1, x0), ++ z0 = svand_x (p0, z1, x0)) ++ ++/* ++** and_1_u64_x_tied1: ++** and z0\.d, z0\.d, #0x1 ++** ret ++*/ ++TEST_UNIFORM_Z (and_1_u64_x_tied1, svuint64_t, ++ z0 = svand_n_u64_x (p0, z0, 1), ++ z0 = svand_x (p0, z0, 1)) ++ ++/* ++** and_1_u64_x_untied: ++** movprfx z0, z1 ++** and z0\.d, z0\.d, #0x1 ++** ret ++*/ ++TEST_UNIFORM_Z (and_1_u64_x_untied, svuint64_t, ++ z0 = svand_n_u64_x (p0, z1, 1), ++ z0 = svand_x (p0, z1, 1)) ++ ++/* ++** and_127_u64_x: ++** and z0\.d, z0\.d, #0x7f ++** ret ++*/ ++TEST_UNIFORM_Z (and_127_u64_x, svuint64_t, ++ z0 = svand_n_u64_x (p0, z0, 127), ++ z0 = svand_x (p0, z0, 127)) ++ ++/* ++** and_128_u64_x: ++** and z0\.d, z0\.d, #0x80 ++** ret ++*/ ++TEST_UNIFORM_Z (and_128_u64_x, svuint64_t, ++ z0 = svand_n_u64_x (p0, z0, 128), ++ z0 = svand_x (p0, z0, 128)) ++ ++/* ++** and_255_u64_x: ++** and z0\.d, z0\.d, #0xff ++** ret ++*/ ++TEST_UNIFORM_Z (and_255_u64_x, svuint64_t, ++ z0 = svand_n_u64_x (p0, z0, 255), ++ z0 = svand_x (p0, z0, 255)) ++ ++/* ++** and_256_u64_x: ++** and z0\.d, z0\.d, #0x100 ++** ret ++*/ ++TEST_UNIFORM_Z (and_256_u64_x, svuint64_t, ++ z0 = svand_n_u64_x (p0, z0, 256), ++ z0 = svand_x (p0, z0, 256)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (and_257_u64_x, svuint64_t, ++ z0 = svand_n_u64_x (p0, z0, 257), ++ z0 = svand_x (p0, z0, 257)) ++ ++/* ++** and_512_u64_x: ++** and z0\.d, z0\.d, #0x200 ++** ret ++*/ ++TEST_UNIFORM_Z (and_512_u64_x, svuint64_t, ++ z0 = svand_n_u64_x (p0, z0, 512), ++ z0 = svand_x (p0, z0, 512)) ++ ++/* ++** and_65280_u64_x: ++** and z0\.d, z0\.d, #0xff00 ++** ret ++*/ ++TEST_UNIFORM_Z (and_65280_u64_x, svuint64_t, ++ z0 = svand_n_u64_x (p0, z0, 0xff00), ++ z0 = svand_x (p0, z0, 0xff00)) ++ ++/* ++** and_m127_u64_x: ++** and z0\.d, z0\.d, #0xffffffffffffff81 ++** ret ++*/ ++TEST_UNIFORM_Z (and_m127_u64_x, svuint64_t, ++ z0 = svand_n_u64_x (p0, z0, -127), ++ z0 = svand_x (p0, z0, -127)) ++ ++/* ++** and_m128_u64_x: ++** and z0\.d, z0\.d, #0xffffffffffffff80 ++** ret ++*/ ++TEST_UNIFORM_Z (and_m128_u64_x, svuint64_t, ++ z0 = svand_n_u64_x (p0, z0, -128), ++ z0 = svand_x (p0, z0, -128)) ++ ++/* ++** and_m255_u64_x: ++** and z0\.d, z0\.d, #0xffffffffffffff01 ++** ret ++*/ ++TEST_UNIFORM_Z (and_m255_u64_x, svuint64_t, ++ z0 = svand_n_u64_x (p0, z0, -255), ++ z0 = svand_x (p0, z0, -255)) ++ ++/* ++** and_m256_u64_x: ++** and z0\.d, z0\.d, #0xffffffffffffff00 ++** ret ++*/ ++TEST_UNIFORM_Z (and_m256_u64_x, svuint64_t, ++ z0 = svand_n_u64_x (p0, z0, -256), ++ z0 = svand_x (p0, z0, -256)) ++ ++/* ++** and_m257_u64_x: ++** and z0\.d, z0\.d, #0xfffffffffffffeff ++** ret ++*/ ++TEST_UNIFORM_Z (and_m257_u64_x, svuint64_t, ++ z0 = svand_n_u64_x (p0, z0, -257), ++ z0 = svand_x (p0, z0, -257)) ++ ++/* ++** and_m512_u64_x: ++** and z0\.d, z0\.d, #0xfffffffffffffe00 ++** ret ++*/ ++TEST_UNIFORM_Z (and_m512_u64_x, svuint64_t, ++ z0 = svand_n_u64_x (p0, z0, -512), ++ z0 = svand_x (p0, z0, -512)) ++ ++/* ++** and_m32768_u64_x: ++** and z0\.d, z0\.d, #0xffffffffffff8000 ++** ret ++*/ ++TEST_UNIFORM_Z (and_m32768_u64_x, svuint64_t, ++ z0 = svand_n_u64_x (p0, z0, -0x8000), ++ z0 = svand_x (p0, z0, -0x8000)) ++ ++/* ++** and_5_u64_x: ++** mov (z[0-9]+\.d), #5 ++** and z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (and_5_u64_x, svuint64_t, ++ z0 = svand_n_u64_x (p0, z0, 5), ++ z0 = svand_x (p0, z0, 5)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_u8.c +new file mode 100644 +index 000000000..b0f1c9529 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_u8.c +@@ -0,0 +1,294 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** and_u8_m_tied1: ++** and z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (and_u8_m_tied1, svuint8_t, ++ z0 = svand_u8_m (p0, z0, z1), ++ z0 = svand_m (p0, z0, z1)) ++ ++/* ++** and_u8_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** and z0\.b, p0/m, z0\.b, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (and_u8_m_tied2, svuint8_t, ++ z0 = svand_u8_m (p0, z1, z0), ++ z0 = svand_m (p0, z1, z0)) ++ ++/* ++** and_u8_m_untied: ++** movprfx z0, z1 ++** and z0\.b, p0/m, z0\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (and_u8_m_untied, svuint8_t, ++ z0 = svand_u8_m (p0, z1, z2), ++ z0 = svand_m (p0, z1, z2)) ++ ++/* ++** and_w0_u8_m_tied1: ++** mov (z[0-9]+\.b), w0 ++** and z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (and_w0_u8_m_tied1, svuint8_t, uint8_t, ++ z0 = svand_n_u8_m (p0, z0, x0), ++ z0 = svand_m (p0, z0, x0)) ++ ++/* ++** and_w0_u8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), w0 ++** movprfx z0, z1 ++** and z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (and_w0_u8_m_untied, svuint8_t, uint8_t, ++ z0 = svand_n_u8_m (p0, z1, x0), ++ z0 = svand_m (p0, z1, x0)) ++ ++/* ++** and_1_u8_m_tied1: ++** mov (z[0-9]+\.b), #1 ++** and z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (and_1_u8_m_tied1, svuint8_t, ++ z0 = svand_n_u8_m (p0, z0, 1), ++ z0 = svand_m (p0, z0, 1)) ++ ++/* ++** and_1_u8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), #1 ++** movprfx z0, z1 ++** and z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (and_1_u8_m_untied, svuint8_t, ++ z0 = svand_n_u8_m (p0, z1, 1), ++ z0 = svand_m (p0, z1, 1)) ++ ++/* ++** and_m2_u8_m: ++** mov (z[0-9]+\.b), #-2 ++** and z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (and_m2_u8_m, svuint8_t, ++ z0 = svand_n_u8_m (p0, z0, -2), ++ z0 = svand_m (p0, z0, -2)) ++ ++/* ++** and_u8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** and z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (and_u8_z_tied1, svuint8_t, ++ z0 = svand_u8_z (p0, z0, z1), ++ z0 = svand_z (p0, z0, z1)) ++ ++/* ++** and_u8_z_tied2: ++** movprfx z0\.b, p0/z, z0\.b ++** and z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (and_u8_z_tied2, svuint8_t, ++ z0 = svand_u8_z (p0, z1, z0), ++ z0 = svand_z (p0, z1, z0)) ++ ++/* ++** and_u8_z_untied: ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** and z0\.b, p0/m, z0\.b, z2\.b ++** | ++** movprfx z0\.b, p0/z, z2\.b ++** and z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (and_u8_z_untied, svuint8_t, ++ z0 = svand_u8_z (p0, z1, z2), ++ z0 = svand_z (p0, z1, z2)) ++ ++/* ++** and_w0_u8_z_tied1: ++** mov (z[0-9]+\.b), w0 ++** movprfx z0\.b, p0/z, z0\.b ++** and z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (and_w0_u8_z_tied1, svuint8_t, uint8_t, ++ z0 = svand_n_u8_z (p0, z0, x0), ++ z0 = svand_z (p0, z0, x0)) ++ ++/* ++** and_w0_u8_z_untied: ++** mov (z[0-9]+\.b), w0 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** and z0\.b, p0/m, z0\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** and z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (and_w0_u8_z_untied, svuint8_t, uint8_t, ++ z0 = svand_n_u8_z (p0, z1, x0), ++ z0 = svand_z (p0, z1, x0)) ++ ++/* ++** and_1_u8_z_tied1: ++** mov (z[0-9]+\.b), #1 ++** movprfx z0\.b, p0/z, z0\.b ++** and z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (and_1_u8_z_tied1, svuint8_t, ++ z0 = svand_n_u8_z (p0, z0, 1), ++ z0 = svand_z (p0, z0, 1)) ++ ++/* ++** and_1_u8_z_untied: ++** mov (z[0-9]+\.b), #1 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** and z0\.b, p0/m, z0\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** and z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (and_1_u8_z_untied, svuint8_t, ++ z0 = svand_n_u8_z (p0, z1, 1), ++ z0 = svand_z (p0, z1, 1)) ++ ++/* ++** and_u8_x_tied1: ++** and z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (and_u8_x_tied1, svuint8_t, ++ z0 = svand_u8_x (p0, z0, z1), ++ z0 = svand_x (p0, z0, z1)) ++ ++/* ++** and_u8_x_tied2: ++** and z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (and_u8_x_tied2, svuint8_t, ++ z0 = svand_u8_x (p0, z1, z0), ++ z0 = svand_x (p0, z1, z0)) ++ ++/* ++** and_u8_x_untied: ++** and z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (and_u8_x_untied, svuint8_t, ++ z0 = svand_u8_x (p0, z1, z2), ++ z0 = svand_x (p0, z1, z2)) ++ ++/* ++** and_w0_u8_x_tied1: ++** mov (z[0-9]+)\.b, w0 ++** and z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (and_w0_u8_x_tied1, svuint8_t, uint8_t, ++ z0 = svand_n_u8_x (p0, z0, x0), ++ z0 = svand_x (p0, z0, x0)) ++ ++/* ++** and_w0_u8_x_untied: ++** mov (z[0-9]+)\.b, w0 ++** and z0\.d, (z1\.d, \1\.d|\1\.d, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (and_w0_u8_x_untied, svuint8_t, uint8_t, ++ z0 = svand_n_u8_x (p0, z1, x0), ++ z0 = svand_x (p0, z1, x0)) ++ ++/* ++** and_1_u8_x_tied1: ++** and z0\.b, z0\.b, #0x1 ++** ret ++*/ ++TEST_UNIFORM_Z (and_1_u8_x_tied1, svuint8_t, ++ z0 = svand_n_u8_x (p0, z0, 1), ++ z0 = svand_x (p0, z0, 1)) ++ ++/* ++** and_1_u8_x_untied: ++** movprfx z0, z1 ++** and z0\.b, z0\.b, #0x1 ++** ret ++*/ ++TEST_UNIFORM_Z (and_1_u8_x_untied, svuint8_t, ++ z0 = svand_n_u8_x (p0, z1, 1), ++ z0 = svand_x (p0, z1, 1)) ++ ++/* ++** and_127_u8_x: ++** and z0\.b, z0\.b, #0x7f ++** ret ++*/ ++TEST_UNIFORM_Z (and_127_u8_x, svuint8_t, ++ z0 = svand_n_u8_x (p0, z0, 127), ++ z0 = svand_x (p0, z0, 127)) ++ ++/* ++** and_128_u8_x: ++** and z0\.b, z0\.b, #0x80 ++** ret ++*/ ++TEST_UNIFORM_Z (and_128_u8_x, svuint8_t, ++ z0 = svand_n_u8_x (p0, z0, 128), ++ z0 = svand_x (p0, z0, 128)) ++ ++/* ++** and_255_u8_x: ++** ret ++*/ ++TEST_UNIFORM_Z (and_255_u8_x, svuint8_t, ++ z0 = svand_n_u8_x (p0, z0, 255), ++ z0 = svand_x (p0, z0, 255)) ++ ++/* ++** and_m127_u8_x: ++** and z0\.b, z0\.b, #0x81 ++** ret ++*/ ++TEST_UNIFORM_Z (and_m127_u8_x, svuint8_t, ++ z0 = svand_n_u8_x (p0, z0, -127), ++ z0 = svand_x (p0, z0, -127)) ++ ++/* ++** and_m128_u8_x: ++** and z0\.b, z0\.b, #0x80 ++** ret ++*/ ++TEST_UNIFORM_Z (and_m128_u8_x, svuint8_t, ++ z0 = svand_n_u8_x (p0, z0, -128), ++ z0 = svand_x (p0, z0, -128)) ++ ++/* ++** and_5_u8_x: ++** mov (z[0-9]+)\.b, #5 ++** and z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (and_5_u8_x, svuint8_t, ++ z0 = svand_n_u8_x (p0, z0, 5), ++ z0 = svand_x (p0, z0, 5)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/andv_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/andv_s16.c +new file mode 100644 +index 000000000..16761b823 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/andv_s16.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** andv_x0_s16: ++** andv h([0-9]+), p0, z0\.h ++** umov w0, v\1\.h\[0\] ++** ret ++*/ ++TEST_REDUCTION_X (andv_x0_s16, int16_t, svint16_t, ++ x0 = svandv_s16 (p0, z0), ++ x0 = svandv (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/andv_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/andv_s32.c +new file mode 100644 +index 000000000..bccc91e21 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/andv_s32.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** andv_x0_s32: ++** andv (s[0-9]+), p0, z0\.s ++** fmov w0, \1 ++** ret ++*/ ++TEST_REDUCTION_X (andv_x0_s32, int32_t, svint32_t, ++ x0 = svandv_s32 (p0, z0), ++ x0 = svandv (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/andv_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/andv_s64.c +new file mode 100644 +index 000000000..53488b6e3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/andv_s64.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** andv_x0_s64: ++** andv (d[0-9]+), p0, z0\.d ++** fmov x0, \1 ++** ret ++*/ ++TEST_REDUCTION_X (andv_x0_s64, int64_t, svint64_t, ++ x0 = svandv_s64 (p0, z0), ++ x0 = svandv (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/andv_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/andv_s8.c +new file mode 100644 +index 000000000..052f74c7f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/andv_s8.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** andv_x0_s8: ++** andv b([0-9]+), p0, z0\.b ++** umov w0, v\1\.b\[0\] ++** ret ++*/ ++TEST_REDUCTION_X (andv_x0_s8, int8_t, svint8_t, ++ x0 = svandv_s8 (p0, z0), ++ x0 = svandv (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/andv_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/andv_u16.c +new file mode 100644 +index 000000000..03328022d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/andv_u16.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** andv_x0_u16: ++** andv h([0-9]+), p0, z0\.h ++** umov w0, v\1\.h\[0\] ++** ret ++*/ ++TEST_REDUCTION_X (andv_x0_u16, uint16_t, svuint16_t, ++ x0 = svandv_u16 (p0, z0), ++ x0 = svandv (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/andv_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/andv_u32.c +new file mode 100644 +index 000000000..a1677e703 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/andv_u32.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** andv_x0_u32: ++** andv (s[0-9]+), p0, z0\.s ++** fmov w0, \1 ++** ret ++*/ ++TEST_REDUCTION_X (andv_x0_u32, uint32_t, svuint32_t, ++ x0 = svandv_u32 (p0, z0), ++ x0 = svandv (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/andv_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/andv_u64.c +new file mode 100644 +index 000000000..d45422693 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/andv_u64.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** andv_x0_u64: ++** andv (d[0-9]+), p0, z0\.d ++** fmov x0, \1 ++** ret ++*/ ++TEST_REDUCTION_X (andv_x0_u64, uint64_t, svuint64_t, ++ x0 = svandv_u64 (p0, z0), ++ x0 = svandv (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/andv_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/andv_u8.c +new file mode 100644 +index 000000000..b07f6b6e6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/andv_u8.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** andv_x0_u8: ++** andv b([0-9]+), p0, z0\.b ++** umov w0, v\1\.b\[0\] ++** ret ++*/ ++TEST_REDUCTION_X (andv_x0_u8, uint8_t, svuint8_t, ++ x0 = svandv_u8 (p0, z0), ++ x0 = svandv (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asr_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asr_s16.c +new file mode 100644 +index 000000000..877bf1068 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asr_s16.c +@@ -0,0 +1,340 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** asr_s16_m_tied1: ++** asr z0\.h, p0/m, z0\.h, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (asr_s16_m_tied1, svint16_t, svuint16_t, ++ z0 = svasr_s16_m (p0, z0, z4), ++ z0 = svasr_m (p0, z0, z4)) ++ ++/* ++** asr_s16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** asr z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (asr_s16_m_tied2, svint16_t, svuint16_t, ++ z0_res = svasr_s16_m (p0, z4, z0), ++ z0_res = svasr_m (p0, z4, z0)) ++ ++/* ++** asr_s16_m_untied: ++** movprfx z0, z1 ++** asr z0\.h, p0/m, z0\.h, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (asr_s16_m_untied, svint16_t, svuint16_t, ++ z0 = svasr_s16_m (p0, z1, z4), ++ z0 = svasr_m (p0, z1, z4)) ++ ++/* ++** asr_w0_s16_m_tied1: ++** mov (z[0-9]+\.h), w0 ++** asr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (asr_w0_s16_m_tied1, svint16_t, uint16_t, ++ z0 = svasr_n_s16_m (p0, z0, x0), ++ z0 = svasr_m (p0, z0, x0)) ++ ++/* ++** asr_w0_s16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), w0 ++** movprfx z0, z1 ++** asr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (asr_w0_s16_m_untied, svint16_t, uint16_t, ++ z0 = svasr_n_s16_m (p0, z1, x0), ++ z0 = svasr_m (p0, z1, x0)) ++ ++/* ++** asr_1_s16_m_tied1: ++** asr z0\.h, p0/m, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_1_s16_m_tied1, svint16_t, ++ z0 = svasr_n_s16_m (p0, z0, 1), ++ z0 = svasr_m (p0, z0, 1)) ++ ++/* ++** asr_1_s16_m_untied: ++** movprfx z0, z1 ++** asr z0\.h, p0/m, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_1_s16_m_untied, svint16_t, ++ z0 = svasr_n_s16_m (p0, z1, 1), ++ z0 = svasr_m (p0, z1, 1)) ++ ++/* ++** asr_15_s16_m_tied1: ++** asr z0\.h, p0/m, z0\.h, #15 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_15_s16_m_tied1, svint16_t, ++ z0 = svasr_n_s16_m (p0, z0, 15), ++ z0 = svasr_m (p0, z0, 15)) ++ ++/* ++** asr_15_s16_m_untied: ++** movprfx z0, z1 ++** asr z0\.h, p0/m, z0\.h, #15 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_15_s16_m_untied, svint16_t, ++ z0 = svasr_n_s16_m (p0, z1, 15), ++ z0 = svasr_m (p0, z1, 15)) ++ ++/* ++** asr_16_s16_m_tied1: ++** asr z0\.h, p0/m, z0\.h, #16 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_16_s16_m_tied1, svint16_t, ++ z0 = svasr_n_s16_m (p0, z0, 16), ++ z0 = svasr_m (p0, z0, 16)) ++ ++/* ++** asr_16_s16_m_untied: ++** movprfx z0, z1 ++** asr z0\.h, p0/m, z0\.h, #16 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_16_s16_m_untied, svint16_t, ++ z0 = svasr_n_s16_m (p0, z1, 16), ++ z0 = svasr_m (p0, z1, 16)) ++ ++/* ++** asr_s16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** asr z0\.h, p0/m, z0\.h, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (asr_s16_z_tied1, svint16_t, svuint16_t, ++ z0 = svasr_s16_z (p0, z0, z4), ++ z0 = svasr_z (p0, z0, z4)) ++ ++/* ++** asr_s16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** asrr z0\.h, p0/m, z0\.h, z4\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (asr_s16_z_tied2, svint16_t, svuint16_t, ++ z0_res = svasr_s16_z (p0, z4, z0), ++ z0_res = svasr_z (p0, z4, z0)) ++ ++/* ++** asr_s16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** asr z0\.h, p0/m, z0\.h, z4\.h ++** | ++** movprfx z0\.h, p0/z, z4\.h ++** asrr z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_DUAL_Z (asr_s16_z_untied, svint16_t, svuint16_t, ++ z0 = svasr_s16_z (p0, z1, z4), ++ z0 = svasr_z (p0, z1, z4)) ++ ++/* ++** asr_w0_s16_z_tied1: ++** mov (z[0-9]+\.h), w0 ++** movprfx z0\.h, p0/z, z0\.h ++** asr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (asr_w0_s16_z_tied1, svint16_t, uint16_t, ++ z0 = svasr_n_s16_z (p0, z0, x0), ++ z0 = svasr_z (p0, z0, x0)) ++ ++/* ++** asr_w0_s16_z_untied: ++** mov (z[0-9]+\.h), w0 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** asr z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** asrr z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (asr_w0_s16_z_untied, svint16_t, uint16_t, ++ z0 = svasr_n_s16_z (p0, z1, x0), ++ z0 = svasr_z (p0, z1, x0)) ++ ++/* ++** asr_1_s16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** asr z0\.h, p0/m, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_1_s16_z_tied1, svint16_t, ++ z0 = svasr_n_s16_z (p0, z0, 1), ++ z0 = svasr_z (p0, z0, 1)) ++ ++/* ++** asr_1_s16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** asr z0\.h, p0/m, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_1_s16_z_untied, svint16_t, ++ z0 = svasr_n_s16_z (p0, z1, 1), ++ z0 = svasr_z (p0, z1, 1)) ++ ++/* ++** asr_15_s16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** asr z0\.h, p0/m, z0\.h, #15 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_15_s16_z_tied1, svint16_t, ++ z0 = svasr_n_s16_z (p0, z0, 15), ++ z0 = svasr_z (p0, z0, 15)) ++ ++/* ++** asr_15_s16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** asr z0\.h, p0/m, z0\.h, #15 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_15_s16_z_untied, svint16_t, ++ z0 = svasr_n_s16_z (p0, z1, 15), ++ z0 = svasr_z (p0, z1, 15)) ++ ++/* ++** asr_16_s16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** asr z0\.h, p0/m, z0\.h, #16 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_16_s16_z_tied1, svint16_t, ++ z0 = svasr_n_s16_z (p0, z0, 16), ++ z0 = svasr_z (p0, z0, 16)) ++ ++/* ++** asr_16_s16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** asr z0\.h, p0/m, z0\.h, #16 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_16_s16_z_untied, svint16_t, ++ z0 = svasr_n_s16_z (p0, z1, 16), ++ z0 = svasr_z (p0, z1, 16)) ++ ++/* ++** asr_s16_x_tied1: ++** asr z0\.h, p0/m, z0\.h, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (asr_s16_x_tied1, svint16_t, svuint16_t, ++ z0 = svasr_s16_x (p0, z0, z4), ++ z0 = svasr_x (p0, z0, z4)) ++ ++/* ++** asr_s16_x_tied2: ++** asrr z0\.h, p0/m, z0\.h, z4\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (asr_s16_x_tied2, svint16_t, svuint16_t, ++ z0_res = svasr_s16_x (p0, z4, z0), ++ z0_res = svasr_x (p0, z4, z0)) ++ ++/* ++** asr_s16_x_untied: ++** ( ++** movprfx z0, z1 ++** asr z0\.h, p0/m, z0\.h, z4\.h ++** | ++** movprfx z0, z4 ++** asrr z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_DUAL_Z (asr_s16_x_untied, svint16_t, svuint16_t, ++ z0 = svasr_s16_x (p0, z1, z4), ++ z0 = svasr_x (p0, z1, z4)) ++ ++/* ++** asr_w0_s16_x_tied1: ++** mov (z[0-9]+\.h), w0 ++** asr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (asr_w0_s16_x_tied1, svint16_t, uint16_t, ++ z0 = svasr_n_s16_x (p0, z0, x0), ++ z0 = svasr_x (p0, z0, x0)) ++ ++/* ++** asr_w0_s16_x_untied: ++** mov z0\.h, w0 ++** asrr z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZX (asr_w0_s16_x_untied, svint16_t, uint16_t, ++ z0 = svasr_n_s16_x (p0, z1, x0), ++ z0 = svasr_x (p0, z1, x0)) ++ ++/* ++** asr_1_s16_x_tied1: ++** asr z0\.h, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_1_s16_x_tied1, svint16_t, ++ z0 = svasr_n_s16_x (p0, z0, 1), ++ z0 = svasr_x (p0, z0, 1)) ++ ++/* ++** asr_1_s16_x_untied: ++** asr z0\.h, z1\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_1_s16_x_untied, svint16_t, ++ z0 = svasr_n_s16_x (p0, z1, 1), ++ z0 = svasr_x (p0, z1, 1)) ++ ++/* ++** asr_15_s16_x_tied1: ++** asr z0\.h, z0\.h, #15 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_15_s16_x_tied1, svint16_t, ++ z0 = svasr_n_s16_x (p0, z0, 15), ++ z0 = svasr_x (p0, z0, 15)) ++ ++/* ++** asr_15_s16_x_untied: ++** asr z0\.h, z1\.h, #15 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_15_s16_x_untied, svint16_t, ++ z0 = svasr_n_s16_x (p0, z1, 15), ++ z0 = svasr_x (p0, z1, 15)) ++ ++/* ++** asr_16_s16_x_tied1: ++** asr z0\.h, z0\.h, #16 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_16_s16_x_tied1, svint16_t, ++ z0 = svasr_n_s16_x (p0, z0, 16), ++ z0 = svasr_x (p0, z0, 16)) ++ ++/* ++** asr_16_s16_x_untied: ++** asr z0\.h, z1\.h, #16 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_16_s16_x_untied, svint16_t, ++ z0 = svasr_n_s16_x (p0, z1, 16), ++ z0 = svasr_x (p0, z1, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asr_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asr_s32.c +new file mode 100644 +index 000000000..0f5a37372 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asr_s32.c +@@ -0,0 +1,340 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** asr_s32_m_tied1: ++** asr z0\.s, p0/m, z0\.s, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (asr_s32_m_tied1, svint32_t, svuint32_t, ++ z0 = svasr_s32_m (p0, z0, z4), ++ z0 = svasr_m (p0, z0, z4)) ++ ++/* ++** asr_s32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** asr z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (asr_s32_m_tied2, svint32_t, svuint32_t, ++ z0_res = svasr_s32_m (p0, z4, z0), ++ z0_res = svasr_m (p0, z4, z0)) ++ ++/* ++** asr_s32_m_untied: ++** movprfx z0, z1 ++** asr z0\.s, p0/m, z0\.s, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (asr_s32_m_untied, svint32_t, svuint32_t, ++ z0 = svasr_s32_m (p0, z1, z4), ++ z0 = svasr_m (p0, z1, z4)) ++ ++/* ++** asr_w0_s32_m_tied1: ++** mov (z[0-9]+\.s), w0 ++** asr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (asr_w0_s32_m_tied1, svint32_t, uint32_t, ++ z0 = svasr_n_s32_m (p0, z0, x0), ++ z0 = svasr_m (p0, z0, x0)) ++ ++/* ++** asr_w0_s32_m_untied: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0, z1 ++** asr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (asr_w0_s32_m_untied, svint32_t, uint32_t, ++ z0 = svasr_n_s32_m (p0, z1, x0), ++ z0 = svasr_m (p0, z1, x0)) ++ ++/* ++** asr_1_s32_m_tied1: ++** asr z0\.s, p0/m, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_1_s32_m_tied1, svint32_t, ++ z0 = svasr_n_s32_m (p0, z0, 1), ++ z0 = svasr_m (p0, z0, 1)) ++ ++/* ++** asr_1_s32_m_untied: ++** movprfx z0, z1 ++** asr z0\.s, p0/m, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_1_s32_m_untied, svint32_t, ++ z0 = svasr_n_s32_m (p0, z1, 1), ++ z0 = svasr_m (p0, z1, 1)) ++ ++/* ++** asr_31_s32_m_tied1: ++** asr z0\.s, p0/m, z0\.s, #31 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_31_s32_m_tied1, svint32_t, ++ z0 = svasr_n_s32_m (p0, z0, 31), ++ z0 = svasr_m (p0, z0, 31)) ++ ++/* ++** asr_31_s32_m_untied: ++** movprfx z0, z1 ++** asr z0\.s, p0/m, z0\.s, #31 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_31_s32_m_untied, svint32_t, ++ z0 = svasr_n_s32_m (p0, z1, 31), ++ z0 = svasr_m (p0, z1, 31)) ++ ++/* ++** asr_32_s32_m_tied1: ++** asr z0\.s, p0/m, z0\.s, #32 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_32_s32_m_tied1, svint32_t, ++ z0 = svasr_n_s32_m (p0, z0, 32), ++ z0 = svasr_m (p0, z0, 32)) ++ ++/* ++** asr_32_s32_m_untied: ++** movprfx z0, z1 ++** asr z0\.s, p0/m, z0\.s, #32 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_32_s32_m_untied, svint32_t, ++ z0 = svasr_n_s32_m (p0, z1, 32), ++ z0 = svasr_m (p0, z1, 32)) ++ ++/* ++** asr_s32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** asr z0\.s, p0/m, z0\.s, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (asr_s32_z_tied1, svint32_t, svuint32_t, ++ z0 = svasr_s32_z (p0, z0, z4), ++ z0 = svasr_z (p0, z0, z4)) ++ ++/* ++** asr_s32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** asrr z0\.s, p0/m, z0\.s, z4\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (asr_s32_z_tied2, svint32_t, svuint32_t, ++ z0_res = svasr_s32_z (p0, z4, z0), ++ z0_res = svasr_z (p0, z4, z0)) ++ ++/* ++** asr_s32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** asr z0\.s, p0/m, z0\.s, z4\.s ++** | ++** movprfx z0\.s, p0/z, z4\.s ++** asrr z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_DUAL_Z (asr_s32_z_untied, svint32_t, svuint32_t, ++ z0 = svasr_s32_z (p0, z1, z4), ++ z0 = svasr_z (p0, z1, z4)) ++ ++/* ++** asr_w0_s32_z_tied1: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0\.s, p0/z, z0\.s ++** asr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (asr_w0_s32_z_tied1, svint32_t, uint32_t, ++ z0 = svasr_n_s32_z (p0, z0, x0), ++ z0 = svasr_z (p0, z0, x0)) ++ ++/* ++** asr_w0_s32_z_untied: ++** mov (z[0-9]+\.s), w0 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** asr z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** asrr z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (asr_w0_s32_z_untied, svint32_t, uint32_t, ++ z0 = svasr_n_s32_z (p0, z1, x0), ++ z0 = svasr_z (p0, z1, x0)) ++ ++/* ++** asr_1_s32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** asr z0\.s, p0/m, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_1_s32_z_tied1, svint32_t, ++ z0 = svasr_n_s32_z (p0, z0, 1), ++ z0 = svasr_z (p0, z0, 1)) ++ ++/* ++** asr_1_s32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** asr z0\.s, p0/m, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_1_s32_z_untied, svint32_t, ++ z0 = svasr_n_s32_z (p0, z1, 1), ++ z0 = svasr_z (p0, z1, 1)) ++ ++/* ++** asr_31_s32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** asr z0\.s, p0/m, z0\.s, #31 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_31_s32_z_tied1, svint32_t, ++ z0 = svasr_n_s32_z (p0, z0, 31), ++ z0 = svasr_z (p0, z0, 31)) ++ ++/* ++** asr_31_s32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** asr z0\.s, p0/m, z0\.s, #31 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_31_s32_z_untied, svint32_t, ++ z0 = svasr_n_s32_z (p0, z1, 31), ++ z0 = svasr_z (p0, z1, 31)) ++ ++/* ++** asr_32_s32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** asr z0\.s, p0/m, z0\.s, #32 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_32_s32_z_tied1, svint32_t, ++ z0 = svasr_n_s32_z (p0, z0, 32), ++ z0 = svasr_z (p0, z0, 32)) ++ ++/* ++** asr_32_s32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** asr z0\.s, p0/m, z0\.s, #32 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_32_s32_z_untied, svint32_t, ++ z0 = svasr_n_s32_z (p0, z1, 32), ++ z0 = svasr_z (p0, z1, 32)) ++ ++/* ++** asr_s32_x_tied1: ++** asr z0\.s, p0/m, z0\.s, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (asr_s32_x_tied1, svint32_t, svuint32_t, ++ z0 = svasr_s32_x (p0, z0, z4), ++ z0 = svasr_x (p0, z0, z4)) ++ ++/* ++** asr_s32_x_tied2: ++** asrr z0\.s, p0/m, z0\.s, z4\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (asr_s32_x_tied2, svint32_t, svuint32_t, ++ z0_res = svasr_s32_x (p0, z4, z0), ++ z0_res = svasr_x (p0, z4, z0)) ++ ++/* ++** asr_s32_x_untied: ++** ( ++** movprfx z0, z1 ++** asr z0\.s, p0/m, z0\.s, z4\.s ++** | ++** movprfx z0, z4 ++** asrr z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_DUAL_Z (asr_s32_x_untied, svint32_t, svuint32_t, ++ z0 = svasr_s32_x (p0, z1, z4), ++ z0 = svasr_x (p0, z1, z4)) ++ ++/* ++** asr_w0_s32_x_tied1: ++** mov (z[0-9]+\.s), w0 ++** asr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (asr_w0_s32_x_tied1, svint32_t, uint32_t, ++ z0 = svasr_n_s32_x (p0, z0, x0), ++ z0 = svasr_x (p0, z0, x0)) ++ ++/* ++** asr_w0_s32_x_untied: ++** mov z0\.s, w0 ++** asrr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZX (asr_w0_s32_x_untied, svint32_t, uint32_t, ++ z0 = svasr_n_s32_x (p0, z1, x0), ++ z0 = svasr_x (p0, z1, x0)) ++ ++/* ++** asr_1_s32_x_tied1: ++** asr z0\.s, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_1_s32_x_tied1, svint32_t, ++ z0 = svasr_n_s32_x (p0, z0, 1), ++ z0 = svasr_x (p0, z0, 1)) ++ ++/* ++** asr_1_s32_x_untied: ++** asr z0\.s, z1\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_1_s32_x_untied, svint32_t, ++ z0 = svasr_n_s32_x (p0, z1, 1), ++ z0 = svasr_x (p0, z1, 1)) ++ ++/* ++** asr_31_s32_x_tied1: ++** asr z0\.s, z0\.s, #31 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_31_s32_x_tied1, svint32_t, ++ z0 = svasr_n_s32_x (p0, z0, 31), ++ z0 = svasr_x (p0, z0, 31)) ++ ++/* ++** asr_31_s32_x_untied: ++** asr z0\.s, z1\.s, #31 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_31_s32_x_untied, svint32_t, ++ z0 = svasr_n_s32_x (p0, z1, 31), ++ z0 = svasr_x (p0, z1, 31)) ++ ++/* ++** asr_32_s32_x_tied1: ++** asr z0\.s, z0\.s, #32 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_32_s32_x_tied1, svint32_t, ++ z0 = svasr_n_s32_x (p0, z0, 32), ++ z0 = svasr_x (p0, z0, 32)) ++ ++/* ++** asr_32_s32_x_untied: ++** asr z0\.s, z1\.s, #32 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_32_s32_x_untied, svint32_t, ++ z0 = svasr_n_s32_x (p0, z1, 32), ++ z0 = svasr_x (p0, z1, 32)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asr_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asr_s64.c +new file mode 100644 +index 000000000..80cae07c3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asr_s64.c +@@ -0,0 +1,340 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** asr_s64_m_tied1: ++** asr z0\.d, p0/m, z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (asr_s64_m_tied1, svint64_t, svuint64_t, ++ z0 = svasr_s64_m (p0, z0, z4), ++ z0 = svasr_m (p0, z0, z4)) ++ ++/* ++** asr_s64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z4 ++** asr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (asr_s64_m_tied2, svint64_t, svuint64_t, ++ z0_res = svasr_s64_m (p0, z4, z0), ++ z0_res = svasr_m (p0, z4, z0)) ++ ++/* ++** asr_s64_m_untied: ++** movprfx z0, z1 ++** asr z0\.d, p0/m, z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (asr_s64_m_untied, svint64_t, svuint64_t, ++ z0 = svasr_s64_m (p0, z1, z4), ++ z0 = svasr_m (p0, z1, z4)) ++ ++/* ++** asr_x0_s64_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** asr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (asr_x0_s64_m_tied1, svint64_t, uint64_t, ++ z0 = svasr_n_s64_m (p0, z0, x0), ++ z0 = svasr_m (p0, z0, x0)) ++ ++/* ++** asr_x0_s64_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** asr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (asr_x0_s64_m_untied, svint64_t, uint64_t, ++ z0 = svasr_n_s64_m (p0, z1, x0), ++ z0 = svasr_m (p0, z1, x0)) ++ ++/* ++** asr_1_s64_m_tied1: ++** asr z0\.d, p0/m, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_1_s64_m_tied1, svint64_t, ++ z0 = svasr_n_s64_m (p0, z0, 1), ++ z0 = svasr_m (p0, z0, 1)) ++ ++/* ++** asr_1_s64_m_untied: ++** movprfx z0, z1 ++** asr z0\.d, p0/m, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_1_s64_m_untied, svint64_t, ++ z0 = svasr_n_s64_m (p0, z1, 1), ++ z0 = svasr_m (p0, z1, 1)) ++ ++/* ++** asr_63_s64_m_tied1: ++** asr z0\.d, p0/m, z0\.d, #63 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_63_s64_m_tied1, svint64_t, ++ z0 = svasr_n_s64_m (p0, z0, 63), ++ z0 = svasr_m (p0, z0, 63)) ++ ++/* ++** asr_63_s64_m_untied: ++** movprfx z0, z1 ++** asr z0\.d, p0/m, z0\.d, #63 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_63_s64_m_untied, svint64_t, ++ z0 = svasr_n_s64_m (p0, z1, 63), ++ z0 = svasr_m (p0, z1, 63)) ++ ++/* ++** asr_64_s64_m_tied1: ++** asr z0\.d, p0/m, z0\.d, #64 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_64_s64_m_tied1, svint64_t, ++ z0 = svasr_n_s64_m (p0, z0, 64), ++ z0 = svasr_m (p0, z0, 64)) ++ ++/* ++** asr_64_s64_m_untied: ++** movprfx z0, z1 ++** asr z0\.d, p0/m, z0\.d, #64 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_64_s64_m_untied, svint64_t, ++ z0 = svasr_n_s64_m (p0, z1, 64), ++ z0 = svasr_m (p0, z1, 64)) ++ ++/* ++** asr_s64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** asr z0\.d, p0/m, z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (asr_s64_z_tied1, svint64_t, svuint64_t, ++ z0 = svasr_s64_z (p0, z0, z4), ++ z0 = svasr_z (p0, z0, z4)) ++ ++/* ++** asr_s64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** asrr z0\.d, p0/m, z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z_REV (asr_s64_z_tied2, svint64_t, svuint64_t, ++ z0_res = svasr_s64_z (p0, z4, z0), ++ z0_res = svasr_z (p0, z4, z0)) ++ ++/* ++** asr_s64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** asr z0\.d, p0/m, z0\.d, z4\.d ++** | ++** movprfx z0\.d, p0/z, z4\.d ++** asrr z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_DUAL_Z (asr_s64_z_untied, svint64_t, svuint64_t, ++ z0 = svasr_s64_z (p0, z1, z4), ++ z0 = svasr_z (p0, z1, z4)) ++ ++/* ++** asr_x0_s64_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.d, p0/z, z0\.d ++** asr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (asr_x0_s64_z_tied1, svint64_t, uint64_t, ++ z0 = svasr_n_s64_z (p0, z0, x0), ++ z0 = svasr_z (p0, z0, x0)) ++ ++/* ++** asr_x0_s64_z_untied: ++** mov (z[0-9]+\.d), x0 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** asr z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** asrr z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (asr_x0_s64_z_untied, svint64_t, uint64_t, ++ z0 = svasr_n_s64_z (p0, z1, x0), ++ z0 = svasr_z (p0, z1, x0)) ++ ++/* ++** asr_1_s64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** asr z0\.d, p0/m, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_1_s64_z_tied1, svint64_t, ++ z0 = svasr_n_s64_z (p0, z0, 1), ++ z0 = svasr_z (p0, z0, 1)) ++ ++/* ++** asr_1_s64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** asr z0\.d, p0/m, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_1_s64_z_untied, svint64_t, ++ z0 = svasr_n_s64_z (p0, z1, 1), ++ z0 = svasr_z (p0, z1, 1)) ++ ++/* ++** asr_63_s64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** asr z0\.d, p0/m, z0\.d, #63 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_63_s64_z_tied1, svint64_t, ++ z0 = svasr_n_s64_z (p0, z0, 63), ++ z0 = svasr_z (p0, z0, 63)) ++ ++/* ++** asr_63_s64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** asr z0\.d, p0/m, z0\.d, #63 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_63_s64_z_untied, svint64_t, ++ z0 = svasr_n_s64_z (p0, z1, 63), ++ z0 = svasr_z (p0, z1, 63)) ++ ++/* ++** asr_64_s64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** asr z0\.d, p0/m, z0\.d, #64 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_64_s64_z_tied1, svint64_t, ++ z0 = svasr_n_s64_z (p0, z0, 64), ++ z0 = svasr_z (p0, z0, 64)) ++ ++/* ++** asr_64_s64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** asr z0\.d, p0/m, z0\.d, #64 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_64_s64_z_untied, svint64_t, ++ z0 = svasr_n_s64_z (p0, z1, 64), ++ z0 = svasr_z (p0, z1, 64)) ++ ++/* ++** asr_s64_x_tied1: ++** asr z0\.d, p0/m, z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (asr_s64_x_tied1, svint64_t, svuint64_t, ++ z0 = svasr_s64_x (p0, z0, z4), ++ z0 = svasr_x (p0, z0, z4)) ++ ++/* ++** asr_s64_x_tied2: ++** asrr z0\.d, p0/m, z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z_REV (asr_s64_x_tied2, svint64_t, svuint64_t, ++ z0_res = svasr_s64_x (p0, z4, z0), ++ z0_res = svasr_x (p0, z4, z0)) ++ ++/* ++** asr_s64_x_untied: ++** ( ++** movprfx z0, z1 ++** asr z0\.d, p0/m, z0\.d, z4\.d ++** | ++** movprfx z0, z4 ++** asrr z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_DUAL_Z (asr_s64_x_untied, svint64_t, svuint64_t, ++ z0 = svasr_s64_x (p0, z1, z4), ++ z0 = svasr_x (p0, z1, z4)) ++ ++/* ++** asr_x0_s64_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** asr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (asr_x0_s64_x_tied1, svint64_t, uint64_t, ++ z0 = svasr_n_s64_x (p0, z0, x0), ++ z0 = svasr_x (p0, z0, x0)) ++ ++/* ++** asr_x0_s64_x_untied: ++** mov z0\.d, x0 ++** asrr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZX (asr_x0_s64_x_untied, svint64_t, uint64_t, ++ z0 = svasr_n_s64_x (p0, z1, x0), ++ z0 = svasr_x (p0, z1, x0)) ++ ++/* ++** asr_1_s64_x_tied1: ++** asr z0\.d, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_1_s64_x_tied1, svint64_t, ++ z0 = svasr_n_s64_x (p0, z0, 1), ++ z0 = svasr_x (p0, z0, 1)) ++ ++/* ++** asr_1_s64_x_untied: ++** asr z0\.d, z1\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_1_s64_x_untied, svint64_t, ++ z0 = svasr_n_s64_x (p0, z1, 1), ++ z0 = svasr_x (p0, z1, 1)) ++ ++/* ++** asr_63_s64_x_tied1: ++** asr z0\.d, z0\.d, #63 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_63_s64_x_tied1, svint64_t, ++ z0 = svasr_n_s64_x (p0, z0, 63), ++ z0 = svasr_x (p0, z0, 63)) ++ ++/* ++** asr_63_s64_x_untied: ++** asr z0\.d, z1\.d, #63 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_63_s64_x_untied, svint64_t, ++ z0 = svasr_n_s64_x (p0, z1, 63), ++ z0 = svasr_x (p0, z1, 63)) ++ ++/* ++** asr_64_s64_x_tied1: ++** asr z0\.d, z0\.d, #64 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_64_s64_x_tied1, svint64_t, ++ z0 = svasr_n_s64_x (p0, z0, 64), ++ z0 = svasr_x (p0, z0, 64)) ++ ++/* ++** asr_64_s64_x_untied: ++** asr z0\.d, z1\.d, #64 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_64_s64_x_untied, svint64_t, ++ z0 = svasr_n_s64_x (p0, z1, 64), ++ z0 = svasr_x (p0, z1, 64)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asr_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asr_s8.c +new file mode 100644 +index 000000000..992e93fde +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asr_s8.c +@@ -0,0 +1,340 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** asr_s8_m_tied1: ++** asr z0\.b, p0/m, z0\.b, z4\.b ++** ret ++*/ ++TEST_DUAL_Z (asr_s8_m_tied1, svint8_t, svuint8_t, ++ z0 = svasr_s8_m (p0, z0, z4), ++ z0 = svasr_m (p0, z0, z4)) ++ ++/* ++** asr_s8_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** asr z0\.b, p0/m, z0\.b, \1\.b ++** ret ++*/ ++TEST_DUAL_Z_REV (asr_s8_m_tied2, svint8_t, svuint8_t, ++ z0_res = svasr_s8_m (p0, z4, z0), ++ z0_res = svasr_m (p0, z4, z0)) ++ ++/* ++** asr_s8_m_untied: ++** movprfx z0, z1 ++** asr z0\.b, p0/m, z0\.b, z4\.b ++** ret ++*/ ++TEST_DUAL_Z (asr_s8_m_untied, svint8_t, svuint8_t, ++ z0 = svasr_s8_m (p0, z1, z4), ++ z0 = svasr_m (p0, z1, z4)) ++ ++/* ++** asr_w0_s8_m_tied1: ++** mov (z[0-9]+\.b), w0 ++** asr z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (asr_w0_s8_m_tied1, svint8_t, uint8_t, ++ z0 = svasr_n_s8_m (p0, z0, x0), ++ z0 = svasr_m (p0, z0, x0)) ++ ++/* ++** asr_w0_s8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), w0 ++** movprfx z0, z1 ++** asr z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (asr_w0_s8_m_untied, svint8_t, uint8_t, ++ z0 = svasr_n_s8_m (p0, z1, x0), ++ z0 = svasr_m (p0, z1, x0)) ++ ++/* ++** asr_1_s8_m_tied1: ++** asr z0\.b, p0/m, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_1_s8_m_tied1, svint8_t, ++ z0 = svasr_n_s8_m (p0, z0, 1), ++ z0 = svasr_m (p0, z0, 1)) ++ ++/* ++** asr_1_s8_m_untied: ++** movprfx z0, z1 ++** asr z0\.b, p0/m, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_1_s8_m_untied, svint8_t, ++ z0 = svasr_n_s8_m (p0, z1, 1), ++ z0 = svasr_m (p0, z1, 1)) ++ ++/* ++** asr_7_s8_m_tied1: ++** asr z0\.b, p0/m, z0\.b, #7 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_7_s8_m_tied1, svint8_t, ++ z0 = svasr_n_s8_m (p0, z0, 7), ++ z0 = svasr_m (p0, z0, 7)) ++ ++/* ++** asr_7_s8_m_untied: ++** movprfx z0, z1 ++** asr z0\.b, p0/m, z0\.b, #7 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_7_s8_m_untied, svint8_t, ++ z0 = svasr_n_s8_m (p0, z1, 7), ++ z0 = svasr_m (p0, z1, 7)) ++ ++/* ++** asr_8_s8_m_tied1: ++** asr z0\.b, p0/m, z0\.b, #8 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_8_s8_m_tied1, svint8_t, ++ z0 = svasr_n_s8_m (p0, z0, 8), ++ z0 = svasr_m (p0, z0, 8)) ++ ++/* ++** asr_8_s8_m_untied: ++** movprfx z0, z1 ++** asr z0\.b, p0/m, z0\.b, #8 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_8_s8_m_untied, svint8_t, ++ z0 = svasr_n_s8_m (p0, z1, 8), ++ z0 = svasr_m (p0, z1, 8)) ++ ++/* ++** asr_s8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** asr z0\.b, p0/m, z0\.b, z4\.b ++** ret ++*/ ++TEST_DUAL_Z (asr_s8_z_tied1, svint8_t, svuint8_t, ++ z0 = svasr_s8_z (p0, z0, z4), ++ z0 = svasr_z (p0, z0, z4)) ++ ++/* ++** asr_s8_z_tied2: ++** movprfx z0\.b, p0/z, z0\.b ++** asrr z0\.b, p0/m, z0\.b, z4\.b ++** ret ++*/ ++TEST_DUAL_Z_REV (asr_s8_z_tied2, svint8_t, svuint8_t, ++ z0_res = svasr_s8_z (p0, z4, z0), ++ z0_res = svasr_z (p0, z4, z0)) ++ ++/* ++** asr_s8_z_untied: ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** asr z0\.b, p0/m, z0\.b, z4\.b ++** | ++** movprfx z0\.b, p0/z, z4\.b ++** asrr z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_DUAL_Z (asr_s8_z_untied, svint8_t, svuint8_t, ++ z0 = svasr_s8_z (p0, z1, z4), ++ z0 = svasr_z (p0, z1, z4)) ++ ++/* ++** asr_w0_s8_z_tied1: ++** mov (z[0-9]+\.b), w0 ++** movprfx z0\.b, p0/z, z0\.b ++** asr z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (asr_w0_s8_z_tied1, svint8_t, uint8_t, ++ z0 = svasr_n_s8_z (p0, z0, x0), ++ z0 = svasr_z (p0, z0, x0)) ++ ++/* ++** asr_w0_s8_z_untied: ++** mov (z[0-9]+\.b), w0 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** asr z0\.b, p0/m, z0\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** asrr z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (asr_w0_s8_z_untied, svint8_t, uint8_t, ++ z0 = svasr_n_s8_z (p0, z1, x0), ++ z0 = svasr_z (p0, z1, x0)) ++ ++/* ++** asr_1_s8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** asr z0\.b, p0/m, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_1_s8_z_tied1, svint8_t, ++ z0 = svasr_n_s8_z (p0, z0, 1), ++ z0 = svasr_z (p0, z0, 1)) ++ ++/* ++** asr_1_s8_z_untied: ++** movprfx z0\.b, p0/z, z1\.b ++** asr z0\.b, p0/m, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_1_s8_z_untied, svint8_t, ++ z0 = svasr_n_s8_z (p0, z1, 1), ++ z0 = svasr_z (p0, z1, 1)) ++ ++/* ++** asr_7_s8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** asr z0\.b, p0/m, z0\.b, #7 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_7_s8_z_tied1, svint8_t, ++ z0 = svasr_n_s8_z (p0, z0, 7), ++ z0 = svasr_z (p0, z0, 7)) ++ ++/* ++** asr_7_s8_z_untied: ++** movprfx z0\.b, p0/z, z1\.b ++** asr z0\.b, p0/m, z0\.b, #7 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_7_s8_z_untied, svint8_t, ++ z0 = svasr_n_s8_z (p0, z1, 7), ++ z0 = svasr_z (p0, z1, 7)) ++ ++/* ++** asr_8_s8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** asr z0\.b, p0/m, z0\.b, #8 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_8_s8_z_tied1, svint8_t, ++ z0 = svasr_n_s8_z (p0, z0, 8), ++ z0 = svasr_z (p0, z0, 8)) ++ ++/* ++** asr_8_s8_z_untied: ++** movprfx z0\.b, p0/z, z1\.b ++** asr z0\.b, p0/m, z0\.b, #8 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_8_s8_z_untied, svint8_t, ++ z0 = svasr_n_s8_z (p0, z1, 8), ++ z0 = svasr_z (p0, z1, 8)) ++ ++/* ++** asr_s8_x_tied1: ++** asr z0\.b, p0/m, z0\.b, z4\.b ++** ret ++*/ ++TEST_DUAL_Z (asr_s8_x_tied1, svint8_t, svuint8_t, ++ z0 = svasr_s8_x (p0, z0, z4), ++ z0 = svasr_x (p0, z0, z4)) ++ ++/* ++** asr_s8_x_tied2: ++** asrr z0\.b, p0/m, z0\.b, z4\.b ++** ret ++*/ ++TEST_DUAL_Z_REV (asr_s8_x_tied2, svint8_t, svuint8_t, ++ z0_res = svasr_s8_x (p0, z4, z0), ++ z0_res = svasr_x (p0, z4, z0)) ++ ++/* ++** asr_s8_x_untied: ++** ( ++** movprfx z0, z1 ++** asr z0\.b, p0/m, z0\.b, z4\.b ++** | ++** movprfx z0, z4 ++** asrr z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_DUAL_Z (asr_s8_x_untied, svint8_t, svuint8_t, ++ z0 = svasr_s8_x (p0, z1, z4), ++ z0 = svasr_x (p0, z1, z4)) ++ ++/* ++** asr_w0_s8_x_tied1: ++** mov (z[0-9]+\.b), w0 ++** asr z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (asr_w0_s8_x_tied1, svint8_t, uint8_t, ++ z0 = svasr_n_s8_x (p0, z0, x0), ++ z0 = svasr_x (p0, z0, x0)) ++ ++/* ++** asr_w0_s8_x_untied: ++** mov z0\.b, w0 ++** asrr z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_ZX (asr_w0_s8_x_untied, svint8_t, uint8_t, ++ z0 = svasr_n_s8_x (p0, z1, x0), ++ z0 = svasr_x (p0, z1, x0)) ++ ++/* ++** asr_1_s8_x_tied1: ++** asr z0\.b, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_1_s8_x_tied1, svint8_t, ++ z0 = svasr_n_s8_x (p0, z0, 1), ++ z0 = svasr_x (p0, z0, 1)) ++ ++/* ++** asr_1_s8_x_untied: ++** asr z0\.b, z1\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_1_s8_x_untied, svint8_t, ++ z0 = svasr_n_s8_x (p0, z1, 1), ++ z0 = svasr_x (p0, z1, 1)) ++ ++/* ++** asr_7_s8_x_tied1: ++** asr z0\.b, z0\.b, #7 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_7_s8_x_tied1, svint8_t, ++ z0 = svasr_n_s8_x (p0, z0, 7), ++ z0 = svasr_x (p0, z0, 7)) ++ ++/* ++** asr_7_s8_x_untied: ++** asr z0\.b, z1\.b, #7 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_7_s8_x_untied, svint8_t, ++ z0 = svasr_n_s8_x (p0, z1, 7), ++ z0 = svasr_x (p0, z1, 7)) ++ ++/* ++** asr_8_s8_x_tied1: ++** asr z0\.b, z0\.b, #8 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_8_s8_x_tied1, svint8_t, ++ z0 = svasr_n_s8_x (p0, z0, 8), ++ z0 = svasr_x (p0, z0, 8)) ++ ++/* ++** asr_8_s8_x_untied: ++** asr z0\.b, z1\.b, #8 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_8_s8_x_untied, svint8_t, ++ z0 = svasr_n_s8_x (p0, z1, 8), ++ z0 = svasr_x (p0, z1, 8)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asr_wide_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asr_wide_s16.c +new file mode 100644 +index 000000000..b74ae33e1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asr_wide_s16.c +@@ -0,0 +1,325 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** asr_wide_s16_m_tied1: ++** asr z0\.h, p0/m, z0\.h, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (asr_wide_s16_m_tied1, svint16_t, svuint64_t, ++ z0 = svasr_wide_s16_m (p0, z0, z4), ++ z0 = svasr_wide_m (p0, z0, z4)) ++ ++/* ++** asr_wide_s16_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z4 ++** asr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (asr_wide_s16_m_tied2, svint16_t, svuint64_t, ++ z0_res = svasr_wide_s16_m (p0, z4, z0), ++ z0_res = svasr_wide_m (p0, z4, z0)) ++ ++/* ++** asr_wide_s16_m_untied: ++** movprfx z0, z1 ++** asr z0\.h, p0/m, z0\.h, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (asr_wide_s16_m_untied, svint16_t, svuint64_t, ++ z0 = svasr_wide_s16_m (p0, z1, z4), ++ z0 = svasr_wide_m (p0, z1, z4)) ++ ++/* ++** asr_wide_x0_s16_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** asr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (asr_wide_x0_s16_m_tied1, svint16_t, uint64_t, ++ z0 = svasr_wide_n_s16_m (p0, z0, x0), ++ z0 = svasr_wide_m (p0, z0, x0)) ++ ++/* ++** asr_wide_x0_s16_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** asr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (asr_wide_x0_s16_m_untied, svint16_t, uint64_t, ++ z0 = svasr_wide_n_s16_m (p0, z1, x0), ++ z0 = svasr_wide_m (p0, z1, x0)) ++ ++/* ++** asr_wide_1_s16_m_tied1: ++** asr z0\.h, p0/m, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_1_s16_m_tied1, svint16_t, ++ z0 = svasr_wide_n_s16_m (p0, z0, 1), ++ z0 = svasr_wide_m (p0, z0, 1)) ++ ++/* ++** asr_wide_1_s16_m_untied: ++** movprfx z0, z1 ++** asr z0\.h, p0/m, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_1_s16_m_untied, svint16_t, ++ z0 = svasr_wide_n_s16_m (p0, z1, 1), ++ z0 = svasr_wide_m (p0, z1, 1)) ++ ++/* ++** asr_wide_15_s16_m_tied1: ++** asr z0\.h, p0/m, z0\.h, #15 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_15_s16_m_tied1, svint16_t, ++ z0 = svasr_wide_n_s16_m (p0, z0, 15), ++ z0 = svasr_wide_m (p0, z0, 15)) ++ ++/* ++** asr_wide_15_s16_m_untied: ++** movprfx z0, z1 ++** asr z0\.h, p0/m, z0\.h, #15 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_15_s16_m_untied, svint16_t, ++ z0 = svasr_wide_n_s16_m (p0, z1, 15), ++ z0 = svasr_wide_m (p0, z1, 15)) ++ ++/* ++** asr_wide_16_s16_m_tied1: ++** asr z0\.h, p0/m, z0\.h, #16 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_16_s16_m_tied1, svint16_t, ++ z0 = svasr_wide_n_s16_m (p0, z0, 16), ++ z0 = svasr_wide_m (p0, z0, 16)) ++ ++/* ++** asr_wide_16_s16_m_untied: ++** movprfx z0, z1 ++** asr z0\.h, p0/m, z0\.h, #16 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_16_s16_m_untied, svint16_t, ++ z0 = svasr_wide_n_s16_m (p0, z1, 16), ++ z0 = svasr_wide_m (p0, z1, 16)) ++ ++/* ++** asr_wide_s16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** asr z0\.h, p0/m, z0\.h, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (asr_wide_s16_z_tied1, svint16_t, svuint64_t, ++ z0 = svasr_wide_s16_z (p0, z0, z4), ++ z0 = svasr_wide_z (p0, z0, z4)) ++ ++/* ++** asr_wide_s16_z_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.h, p0/z, z4\.h ++** asr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (asr_wide_s16_z_tied2, svint16_t, svuint64_t, ++ z0_res = svasr_wide_s16_z (p0, z4, z0), ++ z0_res = svasr_wide_z (p0, z4, z0)) ++ ++/* ++** asr_wide_s16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** asr z0\.h, p0/m, z0\.h, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (asr_wide_s16_z_untied, svint16_t, svuint64_t, ++ z0 = svasr_wide_s16_z (p0, z1, z4), ++ z0 = svasr_wide_z (p0, z1, z4)) ++ ++/* ++** asr_wide_x0_s16_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.h, p0/z, z0\.h ++** asr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (asr_wide_x0_s16_z_tied1, svint16_t, uint64_t, ++ z0 = svasr_wide_n_s16_z (p0, z0, x0), ++ z0 = svasr_wide_z (p0, z0, x0)) ++ ++/* ++** asr_wide_x0_s16_z_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.h, p0/z, z1\.h ++** asr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (asr_wide_x0_s16_z_untied, svint16_t, uint64_t, ++ z0 = svasr_wide_n_s16_z (p0, z1, x0), ++ z0 = svasr_wide_z (p0, z1, x0)) ++ ++/* ++** asr_wide_1_s16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** asr z0\.h, p0/m, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_1_s16_z_tied1, svint16_t, ++ z0 = svasr_wide_n_s16_z (p0, z0, 1), ++ z0 = svasr_wide_z (p0, z0, 1)) ++ ++/* ++** asr_wide_1_s16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** asr z0\.h, p0/m, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_1_s16_z_untied, svint16_t, ++ z0 = svasr_wide_n_s16_z (p0, z1, 1), ++ z0 = svasr_wide_z (p0, z1, 1)) ++ ++/* ++** asr_wide_15_s16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** asr z0\.h, p0/m, z0\.h, #15 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_15_s16_z_tied1, svint16_t, ++ z0 = svasr_wide_n_s16_z (p0, z0, 15), ++ z0 = svasr_wide_z (p0, z0, 15)) ++ ++/* ++** asr_wide_15_s16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** asr z0\.h, p0/m, z0\.h, #15 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_15_s16_z_untied, svint16_t, ++ z0 = svasr_wide_n_s16_z (p0, z1, 15), ++ z0 = svasr_wide_z (p0, z1, 15)) ++ ++/* ++** asr_wide_16_s16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** asr z0\.h, p0/m, z0\.h, #16 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_16_s16_z_tied1, svint16_t, ++ z0 = svasr_wide_n_s16_z (p0, z0, 16), ++ z0 = svasr_wide_z (p0, z0, 16)) ++ ++/* ++** asr_wide_16_s16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** asr z0\.h, p0/m, z0\.h, #16 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_16_s16_z_untied, svint16_t, ++ z0 = svasr_wide_n_s16_z (p0, z1, 16), ++ z0 = svasr_wide_z (p0, z1, 16)) ++ ++/* ++** asr_wide_s16_x_tied1: ++** asr z0\.h, z0\.h, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (asr_wide_s16_x_tied1, svint16_t, svuint64_t, ++ z0 = svasr_wide_s16_x (p0, z0, z4), ++ z0 = svasr_wide_x (p0, z0, z4)) ++ ++/* ++** asr_wide_s16_x_tied2: ++** asr z0\.h, z4\.h, z0\.d ++** ret ++*/ ++TEST_DUAL_Z_REV (asr_wide_s16_x_tied2, svint16_t, svuint64_t, ++ z0_res = svasr_wide_s16_x (p0, z4, z0), ++ z0_res = svasr_wide_x (p0, z4, z0)) ++ ++/* ++** asr_wide_s16_x_untied: ++** asr z0\.h, z1\.h, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (asr_wide_s16_x_untied, svint16_t, svuint64_t, ++ z0 = svasr_wide_s16_x (p0, z1, z4), ++ z0 = svasr_wide_x (p0, z1, z4)) ++ ++/* ++** asr_wide_x0_s16_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** asr z0\.h, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (asr_wide_x0_s16_x_tied1, svint16_t, uint64_t, ++ z0 = svasr_wide_n_s16_x (p0, z0, x0), ++ z0 = svasr_wide_x (p0, z0, x0)) ++ ++/* ++** asr_wide_x0_s16_x_untied: ++** mov (z[0-9]+\.d), x0 ++** asr z0\.h, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (asr_wide_x0_s16_x_untied, svint16_t, uint64_t, ++ z0 = svasr_wide_n_s16_x (p0, z1, x0), ++ z0 = svasr_wide_x (p0, z1, x0)) ++ ++/* ++** asr_wide_1_s16_x_tied1: ++** asr z0\.h, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_1_s16_x_tied1, svint16_t, ++ z0 = svasr_wide_n_s16_x (p0, z0, 1), ++ z0 = svasr_wide_x (p0, z0, 1)) ++ ++/* ++** asr_wide_1_s16_x_untied: ++** asr z0\.h, z1\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_1_s16_x_untied, svint16_t, ++ z0 = svasr_wide_n_s16_x (p0, z1, 1), ++ z0 = svasr_wide_x (p0, z1, 1)) ++ ++/* ++** asr_wide_15_s16_x_tied1: ++** asr z0\.h, z0\.h, #15 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_15_s16_x_tied1, svint16_t, ++ z0 = svasr_wide_n_s16_x (p0, z0, 15), ++ z0 = svasr_wide_x (p0, z0, 15)) ++ ++/* ++** asr_wide_15_s16_x_untied: ++** asr z0\.h, z1\.h, #15 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_15_s16_x_untied, svint16_t, ++ z0 = svasr_wide_n_s16_x (p0, z1, 15), ++ z0 = svasr_wide_x (p0, z1, 15)) ++ ++/* ++** asr_wide_16_s16_x_tied1: ++** asr z0\.h, z0\.h, #16 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_16_s16_x_tied1, svint16_t, ++ z0 = svasr_wide_n_s16_x (p0, z0, 16), ++ z0 = svasr_wide_x (p0, z0, 16)) ++ ++/* ++** asr_wide_16_s16_x_untied: ++** asr z0\.h, z1\.h, #16 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_16_s16_x_untied, svint16_t, ++ z0 = svasr_wide_n_s16_x (p0, z1, 16), ++ z0 = svasr_wide_x (p0, z1, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asr_wide_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asr_wide_s32.c +new file mode 100644 +index 000000000..8698aef26 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asr_wide_s32.c +@@ -0,0 +1,325 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** asr_wide_s32_m_tied1: ++** asr z0\.s, p0/m, z0\.s, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (asr_wide_s32_m_tied1, svint32_t, svuint64_t, ++ z0 = svasr_wide_s32_m (p0, z0, z4), ++ z0 = svasr_wide_m (p0, z0, z4)) ++ ++/* ++** asr_wide_s32_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z4 ++** asr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (asr_wide_s32_m_tied2, svint32_t, svuint64_t, ++ z0_res = svasr_wide_s32_m (p0, z4, z0), ++ z0_res = svasr_wide_m (p0, z4, z0)) ++ ++/* ++** asr_wide_s32_m_untied: ++** movprfx z0, z1 ++** asr z0\.s, p0/m, z0\.s, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (asr_wide_s32_m_untied, svint32_t, svuint64_t, ++ z0 = svasr_wide_s32_m (p0, z1, z4), ++ z0 = svasr_wide_m (p0, z1, z4)) ++ ++/* ++** asr_wide_x0_s32_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** asr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (asr_wide_x0_s32_m_tied1, svint32_t, uint64_t, ++ z0 = svasr_wide_n_s32_m (p0, z0, x0), ++ z0 = svasr_wide_m (p0, z0, x0)) ++ ++/* ++** asr_wide_x0_s32_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** asr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (asr_wide_x0_s32_m_untied, svint32_t, uint64_t, ++ z0 = svasr_wide_n_s32_m (p0, z1, x0), ++ z0 = svasr_wide_m (p0, z1, x0)) ++ ++/* ++** asr_wide_1_s32_m_tied1: ++** asr z0\.s, p0/m, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_1_s32_m_tied1, svint32_t, ++ z0 = svasr_wide_n_s32_m (p0, z0, 1), ++ z0 = svasr_wide_m (p0, z0, 1)) ++ ++/* ++** asr_wide_1_s32_m_untied: ++** movprfx z0, z1 ++** asr z0\.s, p0/m, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_1_s32_m_untied, svint32_t, ++ z0 = svasr_wide_n_s32_m (p0, z1, 1), ++ z0 = svasr_wide_m (p0, z1, 1)) ++ ++/* ++** asr_wide_31_s32_m_tied1: ++** asr z0\.s, p0/m, z0\.s, #31 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_31_s32_m_tied1, svint32_t, ++ z0 = svasr_wide_n_s32_m (p0, z0, 31), ++ z0 = svasr_wide_m (p0, z0, 31)) ++ ++/* ++** asr_wide_31_s32_m_untied: ++** movprfx z0, z1 ++** asr z0\.s, p0/m, z0\.s, #31 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_31_s32_m_untied, svint32_t, ++ z0 = svasr_wide_n_s32_m (p0, z1, 31), ++ z0 = svasr_wide_m (p0, z1, 31)) ++ ++/* ++** asr_wide_32_s32_m_tied1: ++** asr z0\.s, p0/m, z0\.s, #32 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_32_s32_m_tied1, svint32_t, ++ z0 = svasr_wide_n_s32_m (p0, z0, 32), ++ z0 = svasr_wide_m (p0, z0, 32)) ++ ++/* ++** asr_wide_32_s32_m_untied: ++** movprfx z0, z1 ++** asr z0\.s, p0/m, z0\.s, #32 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_32_s32_m_untied, svint32_t, ++ z0 = svasr_wide_n_s32_m (p0, z1, 32), ++ z0 = svasr_wide_m (p0, z1, 32)) ++ ++/* ++** asr_wide_s32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** asr z0\.s, p0/m, z0\.s, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (asr_wide_s32_z_tied1, svint32_t, svuint64_t, ++ z0 = svasr_wide_s32_z (p0, z0, z4), ++ z0 = svasr_wide_z (p0, z0, z4)) ++ ++/* ++** asr_wide_s32_z_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.s, p0/z, z4\.s ++** asr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (asr_wide_s32_z_tied2, svint32_t, svuint64_t, ++ z0_res = svasr_wide_s32_z (p0, z4, z0), ++ z0_res = svasr_wide_z (p0, z4, z0)) ++ ++/* ++** asr_wide_s32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** asr z0\.s, p0/m, z0\.s, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (asr_wide_s32_z_untied, svint32_t, svuint64_t, ++ z0 = svasr_wide_s32_z (p0, z1, z4), ++ z0 = svasr_wide_z (p0, z1, z4)) ++ ++/* ++** asr_wide_x0_s32_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.s, p0/z, z0\.s ++** asr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (asr_wide_x0_s32_z_tied1, svint32_t, uint64_t, ++ z0 = svasr_wide_n_s32_z (p0, z0, x0), ++ z0 = svasr_wide_z (p0, z0, x0)) ++ ++/* ++** asr_wide_x0_s32_z_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.s, p0/z, z1\.s ++** asr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (asr_wide_x0_s32_z_untied, svint32_t, uint64_t, ++ z0 = svasr_wide_n_s32_z (p0, z1, x0), ++ z0 = svasr_wide_z (p0, z1, x0)) ++ ++/* ++** asr_wide_1_s32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** asr z0\.s, p0/m, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_1_s32_z_tied1, svint32_t, ++ z0 = svasr_wide_n_s32_z (p0, z0, 1), ++ z0 = svasr_wide_z (p0, z0, 1)) ++ ++/* ++** asr_wide_1_s32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** asr z0\.s, p0/m, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_1_s32_z_untied, svint32_t, ++ z0 = svasr_wide_n_s32_z (p0, z1, 1), ++ z0 = svasr_wide_z (p0, z1, 1)) ++ ++/* ++** asr_wide_31_s32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** asr z0\.s, p0/m, z0\.s, #31 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_31_s32_z_tied1, svint32_t, ++ z0 = svasr_wide_n_s32_z (p0, z0, 31), ++ z0 = svasr_wide_z (p0, z0, 31)) ++ ++/* ++** asr_wide_31_s32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** asr z0\.s, p0/m, z0\.s, #31 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_31_s32_z_untied, svint32_t, ++ z0 = svasr_wide_n_s32_z (p0, z1, 31), ++ z0 = svasr_wide_z (p0, z1, 31)) ++ ++/* ++** asr_wide_32_s32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** asr z0\.s, p0/m, z0\.s, #32 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_32_s32_z_tied1, svint32_t, ++ z0 = svasr_wide_n_s32_z (p0, z0, 32), ++ z0 = svasr_wide_z (p0, z0, 32)) ++ ++/* ++** asr_wide_32_s32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** asr z0\.s, p0/m, z0\.s, #32 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_32_s32_z_untied, svint32_t, ++ z0 = svasr_wide_n_s32_z (p0, z1, 32), ++ z0 = svasr_wide_z (p0, z1, 32)) ++ ++/* ++** asr_wide_s32_x_tied1: ++** asr z0\.s, z0\.s, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (asr_wide_s32_x_tied1, svint32_t, svuint64_t, ++ z0 = svasr_wide_s32_x (p0, z0, z4), ++ z0 = svasr_wide_x (p0, z0, z4)) ++ ++/* ++** asr_wide_s32_x_tied2: ++** asr z0\.s, z4\.s, z0\.d ++** ret ++*/ ++TEST_DUAL_Z_REV (asr_wide_s32_x_tied2, svint32_t, svuint64_t, ++ z0_res = svasr_wide_s32_x (p0, z4, z0), ++ z0_res = svasr_wide_x (p0, z4, z0)) ++ ++/* ++** asr_wide_s32_x_untied: ++** asr z0\.s, z1\.s, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (asr_wide_s32_x_untied, svint32_t, svuint64_t, ++ z0 = svasr_wide_s32_x (p0, z1, z4), ++ z0 = svasr_wide_x (p0, z1, z4)) ++ ++/* ++** asr_wide_x0_s32_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** asr z0\.s, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (asr_wide_x0_s32_x_tied1, svint32_t, uint64_t, ++ z0 = svasr_wide_n_s32_x (p0, z0, x0), ++ z0 = svasr_wide_x (p0, z0, x0)) ++ ++/* ++** asr_wide_x0_s32_x_untied: ++** mov (z[0-9]+\.d), x0 ++** asr z0\.s, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (asr_wide_x0_s32_x_untied, svint32_t, uint64_t, ++ z0 = svasr_wide_n_s32_x (p0, z1, x0), ++ z0 = svasr_wide_x (p0, z1, x0)) ++ ++/* ++** asr_wide_1_s32_x_tied1: ++** asr z0\.s, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_1_s32_x_tied1, svint32_t, ++ z0 = svasr_wide_n_s32_x (p0, z0, 1), ++ z0 = svasr_wide_x (p0, z0, 1)) ++ ++/* ++** asr_wide_1_s32_x_untied: ++** asr z0\.s, z1\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_1_s32_x_untied, svint32_t, ++ z0 = svasr_wide_n_s32_x (p0, z1, 1), ++ z0 = svasr_wide_x (p0, z1, 1)) ++ ++/* ++** asr_wide_31_s32_x_tied1: ++** asr z0\.s, z0\.s, #31 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_31_s32_x_tied1, svint32_t, ++ z0 = svasr_wide_n_s32_x (p0, z0, 31), ++ z0 = svasr_wide_x (p0, z0, 31)) ++ ++/* ++** asr_wide_31_s32_x_untied: ++** asr z0\.s, z1\.s, #31 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_31_s32_x_untied, svint32_t, ++ z0 = svasr_wide_n_s32_x (p0, z1, 31), ++ z0 = svasr_wide_x (p0, z1, 31)) ++ ++/* ++** asr_wide_32_s32_x_tied1: ++** asr z0\.s, z0\.s, #32 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_32_s32_x_tied1, svint32_t, ++ z0 = svasr_wide_n_s32_x (p0, z0, 32), ++ z0 = svasr_wide_x (p0, z0, 32)) ++ ++/* ++** asr_wide_32_s32_x_untied: ++** asr z0\.s, z1\.s, #32 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_32_s32_x_untied, svint32_t, ++ z0 = svasr_wide_n_s32_x (p0, z1, 32), ++ z0 = svasr_wide_x (p0, z1, 32)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asr_wide_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asr_wide_s8.c +new file mode 100644 +index 000000000..77b166939 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asr_wide_s8.c +@@ -0,0 +1,325 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** asr_wide_s8_m_tied1: ++** asr z0\.b, p0/m, z0\.b, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (asr_wide_s8_m_tied1, svint8_t, svuint64_t, ++ z0 = svasr_wide_s8_m (p0, z0, z4), ++ z0 = svasr_wide_m (p0, z0, z4)) ++ ++/* ++** asr_wide_s8_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z4 ++** asr z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (asr_wide_s8_m_tied2, svint8_t, svuint64_t, ++ z0_res = svasr_wide_s8_m (p0, z4, z0), ++ z0_res = svasr_wide_m (p0, z4, z0)) ++ ++/* ++** asr_wide_s8_m_untied: ++** movprfx z0, z1 ++** asr z0\.b, p0/m, z0\.b, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (asr_wide_s8_m_untied, svint8_t, svuint64_t, ++ z0 = svasr_wide_s8_m (p0, z1, z4), ++ z0 = svasr_wide_m (p0, z1, z4)) ++ ++/* ++** asr_wide_x0_s8_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** asr z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (asr_wide_x0_s8_m_tied1, svint8_t, uint64_t, ++ z0 = svasr_wide_n_s8_m (p0, z0, x0), ++ z0 = svasr_wide_m (p0, z0, x0)) ++ ++/* ++** asr_wide_x0_s8_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** asr z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (asr_wide_x0_s8_m_untied, svint8_t, uint64_t, ++ z0 = svasr_wide_n_s8_m (p0, z1, x0), ++ z0 = svasr_wide_m (p0, z1, x0)) ++ ++/* ++** asr_wide_1_s8_m_tied1: ++** asr z0\.b, p0/m, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_1_s8_m_tied1, svint8_t, ++ z0 = svasr_wide_n_s8_m (p0, z0, 1), ++ z0 = svasr_wide_m (p0, z0, 1)) ++ ++/* ++** asr_wide_1_s8_m_untied: ++** movprfx z0, z1 ++** asr z0\.b, p0/m, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_1_s8_m_untied, svint8_t, ++ z0 = svasr_wide_n_s8_m (p0, z1, 1), ++ z0 = svasr_wide_m (p0, z1, 1)) ++ ++/* ++** asr_wide_7_s8_m_tied1: ++** asr z0\.b, p0/m, z0\.b, #7 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_7_s8_m_tied1, svint8_t, ++ z0 = svasr_wide_n_s8_m (p0, z0, 7), ++ z0 = svasr_wide_m (p0, z0, 7)) ++ ++/* ++** asr_wide_7_s8_m_untied: ++** movprfx z0, z1 ++** asr z0\.b, p0/m, z0\.b, #7 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_7_s8_m_untied, svint8_t, ++ z0 = svasr_wide_n_s8_m (p0, z1, 7), ++ z0 = svasr_wide_m (p0, z1, 7)) ++ ++/* ++** asr_wide_8_s8_m_tied1: ++** asr z0\.b, p0/m, z0\.b, #8 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_8_s8_m_tied1, svint8_t, ++ z0 = svasr_wide_n_s8_m (p0, z0, 8), ++ z0 = svasr_wide_m (p0, z0, 8)) ++ ++/* ++** asr_wide_8_s8_m_untied: ++** movprfx z0, z1 ++** asr z0\.b, p0/m, z0\.b, #8 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_8_s8_m_untied, svint8_t, ++ z0 = svasr_wide_n_s8_m (p0, z1, 8), ++ z0 = svasr_wide_m (p0, z1, 8)) ++ ++/* ++** asr_wide_s8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** asr z0\.b, p0/m, z0\.b, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (asr_wide_s8_z_tied1, svint8_t, svuint64_t, ++ z0 = svasr_wide_s8_z (p0, z0, z4), ++ z0 = svasr_wide_z (p0, z0, z4)) ++ ++/* ++** asr_wide_s8_z_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.b, p0/z, z4\.b ++** asr z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (asr_wide_s8_z_tied2, svint8_t, svuint64_t, ++ z0_res = svasr_wide_s8_z (p0, z4, z0), ++ z0_res = svasr_wide_z (p0, z4, z0)) ++ ++/* ++** asr_wide_s8_z_untied: ++** movprfx z0\.b, p0/z, z1\.b ++** asr z0\.b, p0/m, z0\.b, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (asr_wide_s8_z_untied, svint8_t, svuint64_t, ++ z0 = svasr_wide_s8_z (p0, z1, z4), ++ z0 = svasr_wide_z (p0, z1, z4)) ++ ++/* ++** asr_wide_x0_s8_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.b, p0/z, z0\.b ++** asr z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (asr_wide_x0_s8_z_tied1, svint8_t, uint64_t, ++ z0 = svasr_wide_n_s8_z (p0, z0, x0), ++ z0 = svasr_wide_z (p0, z0, x0)) ++ ++/* ++** asr_wide_x0_s8_z_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.b, p0/z, z1\.b ++** asr z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (asr_wide_x0_s8_z_untied, svint8_t, uint64_t, ++ z0 = svasr_wide_n_s8_z (p0, z1, x0), ++ z0 = svasr_wide_z (p0, z1, x0)) ++ ++/* ++** asr_wide_1_s8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** asr z0\.b, p0/m, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_1_s8_z_tied1, svint8_t, ++ z0 = svasr_wide_n_s8_z (p0, z0, 1), ++ z0 = svasr_wide_z (p0, z0, 1)) ++ ++/* ++** asr_wide_1_s8_z_untied: ++** movprfx z0\.b, p0/z, z1\.b ++** asr z0\.b, p0/m, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_1_s8_z_untied, svint8_t, ++ z0 = svasr_wide_n_s8_z (p0, z1, 1), ++ z0 = svasr_wide_z (p0, z1, 1)) ++ ++/* ++** asr_wide_7_s8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** asr z0\.b, p0/m, z0\.b, #7 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_7_s8_z_tied1, svint8_t, ++ z0 = svasr_wide_n_s8_z (p0, z0, 7), ++ z0 = svasr_wide_z (p0, z0, 7)) ++ ++/* ++** asr_wide_7_s8_z_untied: ++** movprfx z0\.b, p0/z, z1\.b ++** asr z0\.b, p0/m, z0\.b, #7 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_7_s8_z_untied, svint8_t, ++ z0 = svasr_wide_n_s8_z (p0, z1, 7), ++ z0 = svasr_wide_z (p0, z1, 7)) ++ ++/* ++** asr_wide_8_s8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** asr z0\.b, p0/m, z0\.b, #8 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_8_s8_z_tied1, svint8_t, ++ z0 = svasr_wide_n_s8_z (p0, z0, 8), ++ z0 = svasr_wide_z (p0, z0, 8)) ++ ++/* ++** asr_wide_8_s8_z_untied: ++** movprfx z0\.b, p0/z, z1\.b ++** asr z0\.b, p0/m, z0\.b, #8 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_8_s8_z_untied, svint8_t, ++ z0 = svasr_wide_n_s8_z (p0, z1, 8), ++ z0 = svasr_wide_z (p0, z1, 8)) ++ ++/* ++** asr_wide_s8_x_tied1: ++** asr z0\.b, z0\.b, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (asr_wide_s8_x_tied1, svint8_t, svuint64_t, ++ z0 = svasr_wide_s8_x (p0, z0, z4), ++ z0 = svasr_wide_x (p0, z0, z4)) ++ ++/* ++** asr_wide_s8_x_tied2: ++** asr z0\.b, z4\.b, z0\.d ++** ret ++*/ ++TEST_DUAL_Z_REV (asr_wide_s8_x_tied2, svint8_t, svuint64_t, ++ z0_res = svasr_wide_s8_x (p0, z4, z0), ++ z0_res = svasr_wide_x (p0, z4, z0)) ++ ++/* ++** asr_wide_s8_x_untied: ++** asr z0\.b, z1\.b, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (asr_wide_s8_x_untied, svint8_t, svuint64_t, ++ z0 = svasr_wide_s8_x (p0, z1, z4), ++ z0 = svasr_wide_x (p0, z1, z4)) ++ ++/* ++** asr_wide_x0_s8_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** asr z0\.b, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (asr_wide_x0_s8_x_tied1, svint8_t, uint64_t, ++ z0 = svasr_wide_n_s8_x (p0, z0, x0), ++ z0 = svasr_wide_x (p0, z0, x0)) ++ ++/* ++** asr_wide_x0_s8_x_untied: ++** mov (z[0-9]+\.d), x0 ++** asr z0\.b, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (asr_wide_x0_s8_x_untied, svint8_t, uint64_t, ++ z0 = svasr_wide_n_s8_x (p0, z1, x0), ++ z0 = svasr_wide_x (p0, z1, x0)) ++ ++/* ++** asr_wide_1_s8_x_tied1: ++** asr z0\.b, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_1_s8_x_tied1, svint8_t, ++ z0 = svasr_wide_n_s8_x (p0, z0, 1), ++ z0 = svasr_wide_x (p0, z0, 1)) ++ ++/* ++** asr_wide_1_s8_x_untied: ++** asr z0\.b, z1\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_1_s8_x_untied, svint8_t, ++ z0 = svasr_wide_n_s8_x (p0, z1, 1), ++ z0 = svasr_wide_x (p0, z1, 1)) ++ ++/* ++** asr_wide_7_s8_x_tied1: ++** asr z0\.b, z0\.b, #7 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_7_s8_x_tied1, svint8_t, ++ z0 = svasr_wide_n_s8_x (p0, z0, 7), ++ z0 = svasr_wide_x (p0, z0, 7)) ++ ++/* ++** asr_wide_7_s8_x_untied: ++** asr z0\.b, z1\.b, #7 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_7_s8_x_untied, svint8_t, ++ z0 = svasr_wide_n_s8_x (p0, z1, 7), ++ z0 = svasr_wide_x (p0, z1, 7)) ++ ++/* ++** asr_wide_8_s8_x_tied1: ++** asr z0\.b, z0\.b, #8 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_8_s8_x_tied1, svint8_t, ++ z0 = svasr_wide_n_s8_x (p0, z0, 8), ++ z0 = svasr_wide_x (p0, z0, 8)) ++ ++/* ++** asr_wide_8_s8_x_untied: ++** asr z0\.b, z1\.b, #8 ++** ret ++*/ ++TEST_UNIFORM_Z (asr_wide_8_s8_x_untied, svint8_t, ++ z0 = svasr_wide_n_s8_x (p0, z1, 8), ++ z0 = svasr_wide_x (p0, z1, 8)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asrd_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asrd_s16.c +new file mode 100644 +index 000000000..40bbce042 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asrd_s16.c +@@ -0,0 +1,177 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** asrd_1_s16_m_tied1: ++** asrd z0\.h, p0/m, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_1_s16_m_tied1, svint16_t, ++ z0 = svasrd_n_s16_m (p0, z0, 1), ++ z0 = svasrd_m (p0, z0, 1)) ++ ++/* ++** asrd_1_s16_m_untied: ++** movprfx z0, z1 ++** asrd z0\.h, p0/m, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_1_s16_m_untied, svint16_t, ++ z0 = svasrd_n_s16_m (p0, z1, 1), ++ z0 = svasrd_m (p0, z1, 1)) ++ ++/* ++** asrd_2_s16_m_tied1: ++** asrd z0\.h, p0/m, z0\.h, #2 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_2_s16_m_tied1, svint16_t, ++ z0 = svasrd_n_s16_m (p0, z0, 2), ++ z0 = svasrd_m (p0, z0, 2)) ++ ++/* ++** asrd_2_s16_m_untied: ++** movprfx z0, z1 ++** asrd z0\.h, p0/m, z0\.h, #2 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_2_s16_m_untied, svint16_t, ++ z0 = svasrd_n_s16_m (p0, z1, 2), ++ z0 = svasrd_m (p0, z1, 2)) ++ ++/* ++** asrd_16_s16_m_tied1: ++** asrd z0\.h, p0/m, z0\.h, #16 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_16_s16_m_tied1, svint16_t, ++ z0 = svasrd_n_s16_m (p0, z0, 16), ++ z0 = svasrd_m (p0, z0, 16)) ++ ++/* ++** asrd_16_s16_m_untied: ++** movprfx z0, z1 ++** asrd z0\.h, p0/m, z0\.h, #16 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_16_s16_m_untied, svint16_t, ++ z0 = svasrd_n_s16_m (p0, z1, 16), ++ z0 = svasrd_m (p0, z1, 16)) ++ ++/* ++** asrd_1_s16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** asrd z0\.h, p0/m, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_1_s16_z_tied1, svint16_t, ++ z0 = svasrd_n_s16_z (p0, z0, 1), ++ z0 = svasrd_z (p0, z0, 1)) ++ ++/* ++** asrd_1_s16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** asrd z0\.h, p0/m, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_1_s16_z_untied, svint16_t, ++ z0 = svasrd_n_s16_z (p0, z1, 1), ++ z0 = svasrd_z (p0, z1, 1)) ++ ++/* ++** asrd_2_s16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** asrd z0\.h, p0/m, z0\.h, #2 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_2_s16_z_tied1, svint16_t, ++ z0 = svasrd_n_s16_z (p0, z0, 2), ++ z0 = svasrd_z (p0, z0, 2)) ++ ++/* ++** asrd_2_s16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** asrd z0\.h, p0/m, z0\.h, #2 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_2_s16_z_untied, svint16_t, ++ z0 = svasrd_n_s16_z (p0, z1, 2), ++ z0 = svasrd_z (p0, z1, 2)) ++ ++/* ++** asrd_16_s16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** asrd z0\.h, p0/m, z0\.h, #16 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_16_s16_z_tied1, svint16_t, ++ z0 = svasrd_n_s16_z (p0, z0, 16), ++ z0 = svasrd_z (p0, z0, 16)) ++ ++/* ++** asrd_16_s16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** asrd z0\.h, p0/m, z0\.h, #16 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_16_s16_z_untied, svint16_t, ++ z0 = svasrd_n_s16_z (p0, z1, 16), ++ z0 = svasrd_z (p0, z1, 16)) ++ ++/* ++** asrd_1_s16_x_tied1: ++** asrd z0\.h, p0/m, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_1_s16_x_tied1, svint16_t, ++ z0 = svasrd_n_s16_x (p0, z0, 1), ++ z0 = svasrd_x (p0, z0, 1)) ++ ++/* ++** asrd_1_s16_x_untied: ++** movprfx z0, z1 ++** asrd z0\.h, p0/m, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_1_s16_x_untied, svint16_t, ++ z0 = svasrd_n_s16_x (p0, z1, 1), ++ z0 = svasrd_x (p0, z1, 1)) ++ ++/* ++** asrd_2_s16_x_tied1: ++** asrd z0\.h, p0/m, z0\.h, #2 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_2_s16_x_tied1, svint16_t, ++ z0 = svasrd_n_s16_x (p0, z0, 2), ++ z0 = svasrd_x (p0, z0, 2)) ++ ++/* ++** asrd_2_s16_x_untied: ++** movprfx z0, z1 ++** asrd z0\.h, p0/m, z0\.h, #2 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_2_s16_x_untied, svint16_t, ++ z0 = svasrd_n_s16_x (p0, z1, 2), ++ z0 = svasrd_x (p0, z1, 2)) ++ ++/* ++** asrd_16_s16_x_tied1: ++** asrd z0\.h, p0/m, z0\.h, #16 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_16_s16_x_tied1, svint16_t, ++ z0 = svasrd_n_s16_x (p0, z0, 16), ++ z0 = svasrd_x (p0, z0, 16)) ++ ++/* ++** asrd_16_s16_x_untied: ++** movprfx z0, z1 ++** asrd z0\.h, p0/m, z0\.h, #16 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_16_s16_x_untied, svint16_t, ++ z0 = svasrd_n_s16_x (p0, z1, 16), ++ z0 = svasrd_x (p0, z1, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asrd_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asrd_s32.c +new file mode 100644 +index 000000000..0760b03de +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asrd_s32.c +@@ -0,0 +1,177 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** asrd_1_s32_m_tied1: ++** asrd z0\.s, p0/m, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_1_s32_m_tied1, svint32_t, ++ z0 = svasrd_n_s32_m (p0, z0, 1), ++ z0 = svasrd_m (p0, z0, 1)) ++ ++/* ++** asrd_1_s32_m_untied: ++** movprfx z0, z1 ++** asrd z0\.s, p0/m, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_1_s32_m_untied, svint32_t, ++ z0 = svasrd_n_s32_m (p0, z1, 1), ++ z0 = svasrd_m (p0, z1, 1)) ++ ++/* ++** asrd_2_s32_m_tied1: ++** asrd z0\.s, p0/m, z0\.s, #2 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_2_s32_m_tied1, svint32_t, ++ z0 = svasrd_n_s32_m (p0, z0, 2), ++ z0 = svasrd_m (p0, z0, 2)) ++ ++/* ++** asrd_2_s32_m_untied: ++** movprfx z0, z1 ++** asrd z0\.s, p0/m, z0\.s, #2 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_2_s32_m_untied, svint32_t, ++ z0 = svasrd_n_s32_m (p0, z1, 2), ++ z0 = svasrd_m (p0, z1, 2)) ++ ++/* ++** asrd_32_s32_m_tied1: ++** asrd z0\.s, p0/m, z0\.s, #32 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_32_s32_m_tied1, svint32_t, ++ z0 = svasrd_n_s32_m (p0, z0, 32), ++ z0 = svasrd_m (p0, z0, 32)) ++ ++/* ++** asrd_32_s32_m_untied: ++** movprfx z0, z1 ++** asrd z0\.s, p0/m, z0\.s, #32 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_32_s32_m_untied, svint32_t, ++ z0 = svasrd_n_s32_m (p0, z1, 32), ++ z0 = svasrd_m (p0, z1, 32)) ++ ++/* ++** asrd_1_s32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** asrd z0\.s, p0/m, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_1_s32_z_tied1, svint32_t, ++ z0 = svasrd_n_s32_z (p0, z0, 1), ++ z0 = svasrd_z (p0, z0, 1)) ++ ++/* ++** asrd_1_s32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** asrd z0\.s, p0/m, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_1_s32_z_untied, svint32_t, ++ z0 = svasrd_n_s32_z (p0, z1, 1), ++ z0 = svasrd_z (p0, z1, 1)) ++ ++/* ++** asrd_2_s32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** asrd z0\.s, p0/m, z0\.s, #2 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_2_s32_z_tied1, svint32_t, ++ z0 = svasrd_n_s32_z (p0, z0, 2), ++ z0 = svasrd_z (p0, z0, 2)) ++ ++/* ++** asrd_2_s32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** asrd z0\.s, p0/m, z0\.s, #2 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_2_s32_z_untied, svint32_t, ++ z0 = svasrd_n_s32_z (p0, z1, 2), ++ z0 = svasrd_z (p0, z1, 2)) ++ ++/* ++** asrd_32_s32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** asrd z0\.s, p0/m, z0\.s, #32 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_32_s32_z_tied1, svint32_t, ++ z0 = svasrd_n_s32_z (p0, z0, 32), ++ z0 = svasrd_z (p0, z0, 32)) ++ ++/* ++** asrd_32_s32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** asrd z0\.s, p0/m, z0\.s, #32 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_32_s32_z_untied, svint32_t, ++ z0 = svasrd_n_s32_z (p0, z1, 32), ++ z0 = svasrd_z (p0, z1, 32)) ++ ++/* ++** asrd_1_s32_x_tied1: ++** asrd z0\.s, p0/m, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_1_s32_x_tied1, svint32_t, ++ z0 = svasrd_n_s32_x (p0, z0, 1), ++ z0 = svasrd_x (p0, z0, 1)) ++ ++/* ++** asrd_1_s32_x_untied: ++** movprfx z0, z1 ++** asrd z0\.s, p0/m, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_1_s32_x_untied, svint32_t, ++ z0 = svasrd_n_s32_x (p0, z1, 1), ++ z0 = svasrd_x (p0, z1, 1)) ++ ++/* ++** asrd_2_s32_x_tied1: ++** asrd z0\.s, p0/m, z0\.s, #2 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_2_s32_x_tied1, svint32_t, ++ z0 = svasrd_n_s32_x (p0, z0, 2), ++ z0 = svasrd_x (p0, z0, 2)) ++ ++/* ++** asrd_2_s32_x_untied: ++** movprfx z0, z1 ++** asrd z0\.s, p0/m, z0\.s, #2 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_2_s32_x_untied, svint32_t, ++ z0 = svasrd_n_s32_x (p0, z1, 2), ++ z0 = svasrd_x (p0, z1, 2)) ++ ++/* ++** asrd_32_s32_x_tied1: ++** asrd z0\.s, p0/m, z0\.s, #32 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_32_s32_x_tied1, svint32_t, ++ z0 = svasrd_n_s32_x (p0, z0, 32), ++ z0 = svasrd_x (p0, z0, 32)) ++ ++/* ++** asrd_32_s32_x_untied: ++** movprfx z0, z1 ++** asrd z0\.s, p0/m, z0\.s, #32 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_32_s32_x_untied, svint32_t, ++ z0 = svasrd_n_s32_x (p0, z1, 32), ++ z0 = svasrd_x (p0, z1, 32)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asrd_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asrd_s64.c +new file mode 100644 +index 000000000..0ef26c9fe +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asrd_s64.c +@@ -0,0 +1,177 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** asrd_1_s64_m_tied1: ++** asrd z0\.d, p0/m, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_1_s64_m_tied1, svint64_t, ++ z0 = svasrd_n_s64_m (p0, z0, 1), ++ z0 = svasrd_m (p0, z0, 1)) ++ ++/* ++** asrd_1_s64_m_untied: ++** movprfx z0, z1 ++** asrd z0\.d, p0/m, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_1_s64_m_untied, svint64_t, ++ z0 = svasrd_n_s64_m (p0, z1, 1), ++ z0 = svasrd_m (p0, z1, 1)) ++ ++/* ++** asrd_2_s64_m_tied1: ++** asrd z0\.d, p0/m, z0\.d, #2 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_2_s64_m_tied1, svint64_t, ++ z0 = svasrd_n_s64_m (p0, z0, 2), ++ z0 = svasrd_m (p0, z0, 2)) ++ ++/* ++** asrd_2_s64_m_untied: ++** movprfx z0, z1 ++** asrd z0\.d, p0/m, z0\.d, #2 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_2_s64_m_untied, svint64_t, ++ z0 = svasrd_n_s64_m (p0, z1, 2), ++ z0 = svasrd_m (p0, z1, 2)) ++ ++/* ++** asrd_64_s64_m_tied1: ++** asrd z0\.d, p0/m, z0\.d, #64 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_64_s64_m_tied1, svint64_t, ++ z0 = svasrd_n_s64_m (p0, z0, 64), ++ z0 = svasrd_m (p0, z0, 64)) ++ ++/* ++** asrd_64_s64_m_untied: ++** movprfx z0, z1 ++** asrd z0\.d, p0/m, z0\.d, #64 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_64_s64_m_untied, svint64_t, ++ z0 = svasrd_n_s64_m (p0, z1, 64), ++ z0 = svasrd_m (p0, z1, 64)) ++ ++/* ++** asrd_1_s64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** asrd z0\.d, p0/m, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_1_s64_z_tied1, svint64_t, ++ z0 = svasrd_n_s64_z (p0, z0, 1), ++ z0 = svasrd_z (p0, z0, 1)) ++ ++/* ++** asrd_1_s64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** asrd z0\.d, p0/m, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_1_s64_z_untied, svint64_t, ++ z0 = svasrd_n_s64_z (p0, z1, 1), ++ z0 = svasrd_z (p0, z1, 1)) ++ ++/* ++** asrd_2_s64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** asrd z0\.d, p0/m, z0\.d, #2 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_2_s64_z_tied1, svint64_t, ++ z0 = svasrd_n_s64_z (p0, z0, 2), ++ z0 = svasrd_z (p0, z0, 2)) ++ ++/* ++** asrd_2_s64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** asrd z0\.d, p0/m, z0\.d, #2 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_2_s64_z_untied, svint64_t, ++ z0 = svasrd_n_s64_z (p0, z1, 2), ++ z0 = svasrd_z (p0, z1, 2)) ++ ++/* ++** asrd_64_s64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** asrd z0\.d, p0/m, z0\.d, #64 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_64_s64_z_tied1, svint64_t, ++ z0 = svasrd_n_s64_z (p0, z0, 64), ++ z0 = svasrd_z (p0, z0, 64)) ++ ++/* ++** asrd_64_s64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** asrd z0\.d, p0/m, z0\.d, #64 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_64_s64_z_untied, svint64_t, ++ z0 = svasrd_n_s64_z (p0, z1, 64), ++ z0 = svasrd_z (p0, z1, 64)) ++ ++/* ++** asrd_1_s64_x_tied1: ++** asrd z0\.d, p0/m, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_1_s64_x_tied1, svint64_t, ++ z0 = svasrd_n_s64_x (p0, z0, 1), ++ z0 = svasrd_x (p0, z0, 1)) ++ ++/* ++** asrd_1_s64_x_untied: ++** movprfx z0, z1 ++** asrd z0\.d, p0/m, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_1_s64_x_untied, svint64_t, ++ z0 = svasrd_n_s64_x (p0, z1, 1), ++ z0 = svasrd_x (p0, z1, 1)) ++ ++/* ++** asrd_2_s64_x_tied1: ++** asrd z0\.d, p0/m, z0\.d, #2 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_2_s64_x_tied1, svint64_t, ++ z0 = svasrd_n_s64_x (p0, z0, 2), ++ z0 = svasrd_x (p0, z0, 2)) ++ ++/* ++** asrd_2_s64_x_untied: ++** movprfx z0, z1 ++** asrd z0\.d, p0/m, z0\.d, #2 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_2_s64_x_untied, svint64_t, ++ z0 = svasrd_n_s64_x (p0, z1, 2), ++ z0 = svasrd_x (p0, z1, 2)) ++ ++/* ++** asrd_64_s64_x_tied1: ++** asrd z0\.d, p0/m, z0\.d, #64 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_64_s64_x_tied1, svint64_t, ++ z0 = svasrd_n_s64_x (p0, z0, 64), ++ z0 = svasrd_x (p0, z0, 64)) ++ ++/* ++** asrd_64_s64_x_untied: ++** movprfx z0, z1 ++** asrd z0\.d, p0/m, z0\.d, #64 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_64_s64_x_untied, svint64_t, ++ z0 = svasrd_n_s64_x (p0, z1, 64), ++ z0 = svasrd_x (p0, z1, 64)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asrd_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asrd_s8.c +new file mode 100644 +index 000000000..9249ffbcb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asrd_s8.c +@@ -0,0 +1,177 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** asrd_1_s8_m_tied1: ++** asrd z0\.b, p0/m, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_1_s8_m_tied1, svint8_t, ++ z0 = svasrd_n_s8_m (p0, z0, 1), ++ z0 = svasrd_m (p0, z0, 1)) ++ ++/* ++** asrd_1_s8_m_untied: ++** movprfx z0, z1 ++** asrd z0\.b, p0/m, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_1_s8_m_untied, svint8_t, ++ z0 = svasrd_n_s8_m (p0, z1, 1), ++ z0 = svasrd_m (p0, z1, 1)) ++ ++/* ++** asrd_2_s8_m_tied1: ++** asrd z0\.b, p0/m, z0\.b, #2 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_2_s8_m_tied1, svint8_t, ++ z0 = svasrd_n_s8_m (p0, z0, 2), ++ z0 = svasrd_m (p0, z0, 2)) ++ ++/* ++** asrd_2_s8_m_untied: ++** movprfx z0, z1 ++** asrd z0\.b, p0/m, z0\.b, #2 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_2_s8_m_untied, svint8_t, ++ z0 = svasrd_n_s8_m (p0, z1, 2), ++ z0 = svasrd_m (p0, z1, 2)) ++ ++/* ++** asrd_8_s8_m_tied1: ++** asrd z0\.b, p0/m, z0\.b, #8 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_8_s8_m_tied1, svint8_t, ++ z0 = svasrd_n_s8_m (p0, z0, 8), ++ z0 = svasrd_m (p0, z0, 8)) ++ ++/* ++** asrd_8_s8_m_untied: ++** movprfx z0, z1 ++** asrd z0\.b, p0/m, z0\.b, #8 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_8_s8_m_untied, svint8_t, ++ z0 = svasrd_n_s8_m (p0, z1, 8), ++ z0 = svasrd_m (p0, z1, 8)) ++ ++/* ++** asrd_1_s8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** asrd z0\.b, p0/m, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_1_s8_z_tied1, svint8_t, ++ z0 = svasrd_n_s8_z (p0, z0, 1), ++ z0 = svasrd_z (p0, z0, 1)) ++ ++/* ++** asrd_1_s8_z_untied: ++** movprfx z0\.b, p0/z, z1\.b ++** asrd z0\.b, p0/m, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_1_s8_z_untied, svint8_t, ++ z0 = svasrd_n_s8_z (p0, z1, 1), ++ z0 = svasrd_z (p0, z1, 1)) ++ ++/* ++** asrd_2_s8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** asrd z0\.b, p0/m, z0\.b, #2 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_2_s8_z_tied1, svint8_t, ++ z0 = svasrd_n_s8_z (p0, z0, 2), ++ z0 = svasrd_z (p0, z0, 2)) ++ ++/* ++** asrd_2_s8_z_untied: ++** movprfx z0\.b, p0/z, z1\.b ++** asrd z0\.b, p0/m, z0\.b, #2 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_2_s8_z_untied, svint8_t, ++ z0 = svasrd_n_s8_z (p0, z1, 2), ++ z0 = svasrd_z (p0, z1, 2)) ++ ++/* ++** asrd_8_s8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** asrd z0\.b, p0/m, z0\.b, #8 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_8_s8_z_tied1, svint8_t, ++ z0 = svasrd_n_s8_z (p0, z0, 8), ++ z0 = svasrd_z (p0, z0, 8)) ++ ++/* ++** asrd_8_s8_z_untied: ++** movprfx z0\.b, p0/z, z1\.b ++** asrd z0\.b, p0/m, z0\.b, #8 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_8_s8_z_untied, svint8_t, ++ z0 = svasrd_n_s8_z (p0, z1, 8), ++ z0 = svasrd_z (p0, z1, 8)) ++ ++/* ++** asrd_1_s8_x_tied1: ++** asrd z0\.b, p0/m, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_1_s8_x_tied1, svint8_t, ++ z0 = svasrd_n_s8_x (p0, z0, 1), ++ z0 = svasrd_x (p0, z0, 1)) ++ ++/* ++** asrd_1_s8_x_untied: ++** movprfx z0, z1 ++** asrd z0\.b, p0/m, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_1_s8_x_untied, svint8_t, ++ z0 = svasrd_n_s8_x (p0, z1, 1), ++ z0 = svasrd_x (p0, z1, 1)) ++ ++/* ++** asrd_2_s8_x_tied1: ++** asrd z0\.b, p0/m, z0\.b, #2 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_2_s8_x_tied1, svint8_t, ++ z0 = svasrd_n_s8_x (p0, z0, 2), ++ z0 = svasrd_x (p0, z0, 2)) ++ ++/* ++** asrd_2_s8_x_untied: ++** movprfx z0, z1 ++** asrd z0\.b, p0/m, z0\.b, #2 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_2_s8_x_untied, svint8_t, ++ z0 = svasrd_n_s8_x (p0, z1, 2), ++ z0 = svasrd_x (p0, z1, 2)) ++ ++/* ++** asrd_8_s8_x_tied1: ++** asrd z0\.b, p0/m, z0\.b, #8 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_8_s8_x_tied1, svint8_t, ++ z0 = svasrd_n_s8_x (p0, z0, 8), ++ z0 = svasrd_x (p0, z0, 8)) ++ ++/* ++** asrd_8_s8_x_untied: ++** movprfx z0, z1 ++** asrd z0\.b, p0/m, z0\.b, #8 ++** ret ++*/ ++TEST_UNIFORM_Z (asrd_8_s8_x_untied, svint8_t, ++ z0 = svasrd_n_s8_x (p0, z1, 8), ++ z0 = svasrd_x (p0, z1, 8)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfdot_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfdot_f32.c +new file mode 100644 +index 000000000..376622da0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfdot_f32.c +@@ -0,0 +1,67 @@ ++/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */ ++/* { dg-require-effective-target aarch64_asm_bf16_ok } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** bfdot_f32_tied1: ++** bfdot z0\.s, z4\.h, z5\.h ++** ret ++*/ ++TEST_DUAL_Z (bfdot_f32_tied1, svfloat32_t, svbfloat16_t, ++ z0 = svbfdot_f32 (z0, z4, z5), ++ z0 = svbfdot (z0, z4, z5)) ++ ++/* ++** bfdot_f32_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** bfdot z0\.s, \1\.h, z1\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (bfdot_f32_tied2, svfloat32_t, svbfloat16_t, ++ z0_res = svbfdot_f32 (z4, z0, z1), ++ z0_res = svbfdot (z4, z0, z1)) ++ ++/* ++** bfdot_f32_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** bfdot z0\.s, z1\.h, \1\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (bfdot_f32_tied3, svfloat32_t, svbfloat16_t, ++ z0_res = svbfdot_f32 (z4, z1, z0), ++ z0_res = svbfdot (z4, z1, z0)) ++ ++/* ++** bfdot_f32_untied: ++** movprfx z0, z1 ++** bfdot z0\.s, z4\.h, z5\.h ++** ret ++*/ ++TEST_DUAL_Z (bfdot_f32_untied, svfloat32_t, svbfloat16_t, ++ z0 = svbfdot_f32 (z1, z4, z5), ++ z0 = svbfdot (z1, z4, z5)) ++ ++/* ++** bfdot_h7_f32_tied1: ++** mov (z[0-9]+\.h), h7 ++** bfdot z0\.s, z4\.h, \1 ++** ret ++*/ ++TEST_DUAL_ZD (bfdot_h7_f32_tied1, svfloat32_t, svbfloat16_t, bfloat16_t, ++ z0 = svbfdot_n_f32 (z0, z4, d7), ++ z0 = svbfdot (z0, z4, d7)) ++ ++/* ++** bfdot_h7_f32_untied: ++** mov (z[0-9]+\.h), h7 ++** movprfx z0, z1 ++** bfdot z0\.s, z4\.h, \1 ++** ret ++*/ ++TEST_DUAL_ZD (bfdot_h7_f32_untied, svfloat32_t, svbfloat16_t, bfloat16_t, ++ z0 = svbfdot_n_f32 (z1, z4, d7), ++ z0 = svbfdot (z1, z4, d7)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfdot_lane_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfdot_lane_f32.c +new file mode 100644 +index 000000000..0f624fe9f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfdot_lane_f32.c +@@ -0,0 +1,86 @@ ++/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */ ++/* { dg-require-effective-target aarch64_asm_bf16_ok } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** bfdot_lane_0_f32_tied1: ++** bfdot z0\.s, z4\.h, z5\.h\[0\] ++** ret ++*/ ++TEST_DUAL_Z (bfdot_lane_0_f32_tied1, svfloat32_t, svbfloat16_t, ++ z0 = svbfdot_lane_f32 (z0, z4, z5, 0), ++ z0 = svbfdot_lane (z0, z4, z5, 0)) ++ ++/* ++** bfdot_lane_0_f32_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** bfdot z0\.s, \1\.h, z1\.h\[0\] ++** ret ++*/ ++TEST_DUAL_Z_REV (bfdot_lane_0_f32_tied2, svfloat32_t, svbfloat16_t, ++ z0_res = svbfdot_lane_f32 (z4, z0, z1, 0), ++ z0_res = svbfdot_lane (z4, z0, z1, 0)) ++ ++/* ++** bfdot_lane_0_f32_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** bfdot z0\.s, z1\.h, \1\.h\[0\] ++** ret ++*/ ++TEST_DUAL_Z_REV (bfdot_lane_0_f32_tied3, svfloat32_t, svbfloat16_t, ++ z0_res = svbfdot_lane_f32 (z4, z1, z0, 0), ++ z0_res = svbfdot_lane (z4, z1, z0, 0)) ++ ++/* ++** bfdot_lane_0_f32_untied: ++** movprfx z0, z1 ++** bfdot z0\.s, z4\.h, z5\.h\[0\] ++** ret ++*/ ++TEST_DUAL_Z (bfdot_lane_0_f32_untied, svfloat32_t, svbfloat16_t, ++ z0 = svbfdot_lane_f32 (z1, z4, z5, 0), ++ z0 = svbfdot_lane (z1, z4, z5, 0)) ++ ++/* ++** bfdot_lane_1_f32: ++** bfdot z0\.s, z4\.h, z5\.h\[1\] ++** ret ++*/ ++TEST_DUAL_Z (bfdot_lane_1_f32, svfloat32_t, svbfloat16_t, ++ z0 = svbfdot_lane_f32 (z0, z4, z5, 1), ++ z0 = svbfdot_lane (z0, z4, z5, 1)) ++ ++/* ++** bfdot_lane_3_f32: ++** bfdot z0\.s, z4\.h, z5\.h\[3\] ++** ret ++*/ ++TEST_DUAL_Z (bfdot_lane_3_f32, svfloat32_t, svbfloat16_t, ++ z0 = svbfdot_lane_f32 (z0, z4, z5, 3), ++ z0 = svbfdot_lane (z0, z4, z5, 3)) ++ ++/* ++** bfdot_lane_z8_f32: ++** str d8, \[sp, -16\]! ++** mov (z[0-7])\.d, z8\.d ++** bfdot z0\.s, z1\.h, \1\.h\[1\] ++** ldr d8, \[sp\], 16 ++** ret ++*/ ++TEST_DUAL_LANE_REG (bfdot_lane_z8_f32, svfloat32_t, svbfloat16_t, z8, ++ z0 = svbfdot_lane_f32 (z0, z1, z8, 1), ++ z0 = svbfdot_lane (z0, z1, z8, 1)) ++ ++/* ++** bfdot_lane_z16_f32: ++** mov (z[0-7])\.d, z16\.d ++** bfdot z0\.s, z1\.h, \1\.h\[1\] ++** ret ++*/ ++TEST_DUAL_LANE_REG (bfdot_lane_z16_f32, svfloat32_t, svbfloat16_t, z16, ++ z0 = svbfdot_lane_f32 (z0, z1, z16, 1), ++ z0 = svbfdot_lane (z0, z1, z16, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalb_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalb_f32.c +new file mode 100644 +index 000000000..0f810116c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalb_f32.c +@@ -0,0 +1,67 @@ ++/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */ ++/* { dg-require-effective-target aarch64_asm_bf16_ok } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** bfmlalb_f32_tied1: ++** bfmlalb z0\.s, z4\.h, z5\.h ++** ret ++*/ ++TEST_DUAL_Z (bfmlalb_f32_tied1, svfloat32_t, svbfloat16_t, ++ z0 = svbfmlalb_f32 (z0, z4, z5), ++ z0 = svbfmlalb (z0, z4, z5)) ++ ++/* ++** bfmlalb_f32_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** bfmlalb z0\.s, \1\.h, z1\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (bfmlalb_f32_tied2, svfloat32_t, svbfloat16_t, ++ z0_res = svbfmlalb_f32 (z4, z0, z1), ++ z0_res = svbfmlalb (z4, z0, z1)) ++ ++/* ++** bfmlalb_f32_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** bfmlalb z0\.s, z1\.h, \1\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (bfmlalb_f32_tied3, svfloat32_t, svbfloat16_t, ++ z0_res = svbfmlalb_f32 (z4, z1, z0), ++ z0_res = svbfmlalb (z4, z1, z0)) ++ ++/* ++** bfmlalb_f32_untied: ++** movprfx z0, z1 ++** bfmlalb z0\.s, z4\.h, z5\.h ++** ret ++*/ ++TEST_DUAL_Z (bfmlalb_f32_untied, svfloat32_t, svbfloat16_t, ++ z0 = svbfmlalb_f32 (z1, z4, z5), ++ z0 = svbfmlalb (z1, z4, z5)) ++ ++/* ++** bfmlalb_h7_f32_tied1: ++** mov (z[0-9]+\.h), h7 ++** bfmlalb z0\.s, z4\.h, \1 ++** ret ++*/ ++TEST_DUAL_ZD (bfmlalb_h7_f32_tied1, svfloat32_t, svbfloat16_t, bfloat16_t, ++ z0 = svbfmlalb_n_f32 (z0, z4, d7), ++ z0 = svbfmlalb (z0, z4, d7)) ++ ++/* ++** bfmlalb_h7_f32_untied: ++** mov (z[0-9]+\.h), h7 ++** movprfx z0, z1 ++** bfmlalb z0\.s, z4\.h, \1 ++** ret ++*/ ++TEST_DUAL_ZD (bfmlalb_h7_f32_untied, svfloat32_t, svbfloat16_t, bfloat16_t, ++ z0 = svbfmlalb_n_f32 (z1, z4, d7), ++ z0 = svbfmlalb (z1, z4, d7)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalb_lane_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalb_lane_f32.c +new file mode 100644 +index 000000000..b0ec0881d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalb_lane_f32.c +@@ -0,0 +1,86 @@ ++/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */ ++/* { dg-require-effective-target aarch64_asm_bf16_ok } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** bfmlalb_lane_0_f32_tied1: ++** bfmlalb z0\.s, z4\.h, z5\.h\[0\] ++** ret ++*/ ++TEST_DUAL_Z (bfmlalb_lane_0_f32_tied1, svfloat32_t, svbfloat16_t, ++ z0 = svbfmlalb_lane_f32 (z0, z4, z5, 0), ++ z0 = svbfmlalb_lane (z0, z4, z5, 0)) ++ ++/* ++** bfmlalb_lane_0_f32_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** bfmlalb z0\.s, \1\.h, z1\.h\[0\] ++** ret ++*/ ++TEST_DUAL_Z_REV (bfmlalb_lane_0_f32_tied2, svfloat32_t, svbfloat16_t, ++ z0_res = svbfmlalb_lane_f32 (z4, z0, z1, 0), ++ z0_res = svbfmlalb_lane (z4, z0, z1, 0)) ++ ++/* ++** bfmlalb_lane_0_f32_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** bfmlalb z0\.s, z1\.h, \1\.h\[0\] ++** ret ++*/ ++TEST_DUAL_Z_REV (bfmlalb_lane_0_f32_tied3, svfloat32_t, svbfloat16_t, ++ z0_res = svbfmlalb_lane_f32 (z4, z1, z0, 0), ++ z0_res = svbfmlalb_lane (z4, z1, z0, 0)) ++ ++/* ++** bfmlalb_lane_0_f32_untied: ++** movprfx z0, z1 ++** bfmlalb z0\.s, z4\.h, z5\.h\[0\] ++** ret ++*/ ++TEST_DUAL_Z (bfmlalb_lane_0_f32_untied, svfloat32_t, svbfloat16_t, ++ z0 = svbfmlalb_lane_f32 (z1, z4, z5, 0), ++ z0 = svbfmlalb_lane (z1, z4, z5, 0)) ++ ++/* ++** bfmlalb_lane_1_f32: ++** bfmlalb z0\.s, z4\.h, z5\.h\[1\] ++** ret ++*/ ++TEST_DUAL_Z (bfmlalb_lane_1_f32, svfloat32_t, svbfloat16_t, ++ z0 = svbfmlalb_lane_f32 (z0, z4, z5, 1), ++ z0 = svbfmlalb_lane (z0, z4, z5, 1)) ++ ++/* ++** bfmlalb_lane_7_f32: ++** bfmlalb z0\.s, z4\.h, z5\.h\[7\] ++** ret ++*/ ++TEST_DUAL_Z (bfmlalb_lane_7_f32, svfloat32_t, svbfloat16_t, ++ z0 = svbfmlalb_lane_f32 (z0, z4, z5, 7), ++ z0 = svbfmlalb_lane (z0, z4, z5, 7)) ++ ++/* ++** bfmlalb_lane_z8_f32: ++** str d8, \[sp, -16\]! ++** mov (z[0-7])\.d, z8\.d ++** bfmlalb z0\.s, z1\.h, \1\.h\[1\] ++** ldr d8, \[sp\], 16 ++** ret ++*/ ++TEST_DUAL_LANE_REG (bfmlalb_lane_z8_f32, svfloat32_t, svbfloat16_t, z8, ++ z0 = svbfmlalb_lane_f32 (z0, z1, z8, 1), ++ z0 = svbfmlalb_lane (z0, z1, z8, 1)) ++ ++/* ++** bfmlalb_lane_z16_f32: ++** mov (z[0-7])\.d, z16\.d ++** bfmlalb z0\.s, z1\.h, \1\.h\[1\] ++** ret ++*/ ++TEST_DUAL_LANE_REG (bfmlalb_lane_z16_f32, svfloat32_t, svbfloat16_t, z16, ++ z0 = svbfmlalb_lane_f32 (z0, z1, z16, 1), ++ z0 = svbfmlalb_lane (z0, z1, z16, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalt_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalt_f32.c +new file mode 100644 +index 000000000..2a583fa4a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalt_f32.c +@@ -0,0 +1,67 @@ ++/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */ ++/* { dg-require-effective-target aarch64_asm_bf16_ok } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** bfmlalt_f32_tied1: ++** bfmlalt z0\.s, z4\.h, z5\.h ++** ret ++*/ ++TEST_DUAL_Z (bfmlalt_f32_tied1, svfloat32_t, svbfloat16_t, ++ z0 = svbfmlalt_f32 (z0, z4, z5), ++ z0 = svbfmlalt (z0, z4, z5)) ++ ++/* ++** bfmlalt_f32_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** bfmlalt z0\.s, \1\.h, z1\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (bfmlalt_f32_tied2, svfloat32_t, svbfloat16_t, ++ z0_res = svbfmlalt_f32 (z4, z0, z1), ++ z0_res = svbfmlalt (z4, z0, z1)) ++ ++/* ++** bfmlalt_f32_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** bfmlalt z0\.s, z1\.h, \1\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (bfmlalt_f32_tied3, svfloat32_t, svbfloat16_t, ++ z0_res = svbfmlalt_f32 (z4, z1, z0), ++ z0_res = svbfmlalt (z4, z1, z0)) ++ ++/* ++** bfmlalt_f32_untied: ++** movprfx z0, z1 ++** bfmlalt z0\.s, z4\.h, z5\.h ++** ret ++*/ ++TEST_DUAL_Z (bfmlalt_f32_untied, svfloat32_t, svbfloat16_t, ++ z0 = svbfmlalt_f32 (z1, z4, z5), ++ z0 = svbfmlalt (z1, z4, z5)) ++ ++/* ++** bfmlalt_h7_f32_tied1: ++** mov (z[0-9]+\.h), h7 ++** bfmlalt z0\.s, z4\.h, \1 ++** ret ++*/ ++TEST_DUAL_ZD (bfmlalt_h7_f32_tied1, svfloat32_t, svbfloat16_t, bfloat16_t, ++ z0 = svbfmlalt_n_f32 (z0, z4, d7), ++ z0 = svbfmlalt (z0, z4, d7)) ++ ++/* ++** bfmlalt_h7_f32_untied: ++** mov (z[0-9]+\.h), h7 ++** movprfx z0, z1 ++** bfmlalt z0\.s, z4\.h, \1 ++** ret ++*/ ++TEST_DUAL_ZD (bfmlalt_h7_f32_untied, svfloat32_t, svbfloat16_t, bfloat16_t, ++ z0 = svbfmlalt_n_f32 (z1, z4, d7), ++ z0 = svbfmlalt (z1, z4, d7)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalt_lane_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalt_lane_f32.c +new file mode 100644 +index 000000000..3af3997e9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalt_lane_f32.c +@@ -0,0 +1,86 @@ ++/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */ ++/* { dg-require-effective-target aarch64_asm_bf16_ok } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** bfmlalt_lane_0_f32_tied1: ++** bfmlalt z0\.s, z4\.h, z5\.h\[0\] ++** ret ++*/ ++TEST_DUAL_Z (bfmlalt_lane_0_f32_tied1, svfloat32_t, svbfloat16_t, ++ z0 = svbfmlalt_lane_f32 (z0, z4, z5, 0), ++ z0 = svbfmlalt_lane (z0, z4, z5, 0)) ++ ++/* ++** bfmlalt_lane_0_f32_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** bfmlalt z0\.s, \1\.h, z1\.h\[0\] ++** ret ++*/ ++TEST_DUAL_Z_REV (bfmlalt_lane_0_f32_tied2, svfloat32_t, svbfloat16_t, ++ z0_res = svbfmlalt_lane_f32 (z4, z0, z1, 0), ++ z0_res = svbfmlalt_lane (z4, z0, z1, 0)) ++ ++/* ++** bfmlalt_lane_0_f32_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** bfmlalt z0\.s, z1\.h, \1\.h\[0\] ++** ret ++*/ ++TEST_DUAL_Z_REV (bfmlalt_lane_0_f32_tied3, svfloat32_t, svbfloat16_t, ++ z0_res = svbfmlalt_lane_f32 (z4, z1, z0, 0), ++ z0_res = svbfmlalt_lane (z4, z1, z0, 0)) ++ ++/* ++** bfmlalt_lane_0_f32_untied: ++** movprfx z0, z1 ++** bfmlalt z0\.s, z4\.h, z5\.h\[0\] ++** ret ++*/ ++TEST_DUAL_Z (bfmlalt_lane_0_f32_untied, svfloat32_t, svbfloat16_t, ++ z0 = svbfmlalt_lane_f32 (z1, z4, z5, 0), ++ z0 = svbfmlalt_lane (z1, z4, z5, 0)) ++ ++/* ++** bfmlalt_lane_1_f32: ++** bfmlalt z0\.s, z4\.h, z5\.h\[1\] ++** ret ++*/ ++TEST_DUAL_Z (bfmlalt_lane_1_f32, svfloat32_t, svbfloat16_t, ++ z0 = svbfmlalt_lane_f32 (z0, z4, z5, 1), ++ z0 = svbfmlalt_lane (z0, z4, z5, 1)) ++ ++/* ++** bfmlalt_lane_7_f32: ++** bfmlalt z0\.s, z4\.h, z5\.h\[7\] ++** ret ++*/ ++TEST_DUAL_Z (bfmlalt_lane_7_f32, svfloat32_t, svbfloat16_t, ++ z0 = svbfmlalt_lane_f32 (z0, z4, z5, 7), ++ z0 = svbfmlalt_lane (z0, z4, z5, 7)) ++ ++/* ++** bfmlalt_lane_z8_f32: ++** str d8, \[sp, -16\]! ++** mov (z[0-7])\.d, z8\.d ++** bfmlalt z0\.s, z1\.h, \1\.h\[1\] ++** ldr d8, \[sp\], 16 ++** ret ++*/ ++TEST_DUAL_LANE_REG (bfmlalt_lane_z8_f32, svfloat32_t, svbfloat16_t, z8, ++ z0 = svbfmlalt_lane_f32 (z0, z1, z8, 1), ++ z0 = svbfmlalt_lane (z0, z1, z8, 1)) ++ ++/* ++** bfmlalt_lane_z16_f32: ++** mov (z[0-7])\.d, z16\.d ++** bfmlalt z0\.s, z1\.h, \1\.h\[1\] ++** ret ++*/ ++TEST_DUAL_LANE_REG (bfmlalt_lane_z16_f32, svfloat32_t, svbfloat16_t, z16, ++ z0 = svbfmlalt_lane_f32 (z0, z1, z16, 1), ++ z0 = svbfmlalt_lane (z0, z1, z16, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmmla_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmmla_f32.c +new file mode 100644 +index 000000000..b1d98fbf5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmmla_f32.c +@@ -0,0 +1,46 @@ ++/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */ ++/* { dg-require-effective-target aarch64_asm_bf16_ok } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** bfmmla_f32_tied1: ++** bfmmla z0\.s, z4\.h, z5\.h ++** ret ++*/ ++TEST_DUAL_Z (bfmmla_f32_tied1, svfloat32_t, svbfloat16_t, ++ z0 = svbfmmla_f32 (z0, z4, z5), ++ z0 = svbfmmla (z0, z4, z5)) ++ ++/* ++** bfmmla_f32_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** bfmmla z0\.s, \1\.h, z1\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (bfmmla_f32_tied2, svfloat32_t, svbfloat16_t, ++ z0_res = svbfmmla_f32 (z4, z0, z1), ++ z0_res = svbfmmla (z4, z0, z1)) ++ ++/* ++** bfmmla_f32_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** bfmmla z0\.s, z1\.h, \1\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (bfmmla_f32_tied3, svfloat32_t, svbfloat16_t, ++ z0_res = svbfmmla_f32 (z4, z1, z0), ++ z0_res = svbfmmla (z4, z1, z0)) ++ ++/* ++** bfmmla_f32_untied: ++** movprfx z0, z1 ++** bfmmla z0\.s, z4\.h, z5\.h ++** ret ++*/ ++TEST_DUAL_Z (bfmmla_f32_untied, svfloat32_t, svbfloat16_t, ++ z0 = svbfmmla_f32 (z1, z4, z5), ++ z0 = svbfmmla (z1, z4, z5)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_b.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_b.c +new file mode 100644 +index 000000000..9d41aeaa2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_b.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** bic_b_z_tied1: ++** bic p0\.b, p3/z, p0\.b, p1\.b ++** ret ++*/ ++TEST_UNIFORM_P (bic_b_z_tied1, ++ p0 = svbic_b_z (p3, p0, p1), ++ p0 = svbic_z (p3, p0, p1)) ++ ++/* ++** bic_b_z_tied2: ++** bic p0\.b, p3/z, p1\.b, p0\.b ++** ret ++*/ ++TEST_UNIFORM_P (bic_b_z_tied2, ++ p0 = svbic_b_z (p3, p1, p0), ++ p0 = svbic_z (p3, p1, p0)) ++ ++/* ++** bic_b_z_untied: ++** bic p0\.b, p3/z, p1\.b, p2\.b ++** ret ++*/ ++TEST_UNIFORM_P (bic_b_z_untied, ++ p0 = svbic_b_z (p3, p1, p2), ++ p0 = svbic_z (p3, p1, p2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_s16.c +new file mode 100644 +index 000000000..c80f5697f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_s16.c +@@ -0,0 +1,367 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** bic_s16_m_tied1: ++** bic z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (bic_s16_m_tied1, svint16_t, ++ z0 = svbic_s16_m (p0, z0, z1), ++ z0 = svbic_m (p0, z0, z1)) ++ ++/* ++** bic_s16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** bic z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (bic_s16_m_tied2, svint16_t, ++ z0 = svbic_s16_m (p0, z1, z0), ++ z0 = svbic_m (p0, z1, z0)) ++ ++/* ++** bic_s16_m_untied: ++** movprfx z0, z1 ++** bic z0\.h, p0/m, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (bic_s16_m_untied, svint16_t, ++ z0 = svbic_s16_m (p0, z1, z2), ++ z0 = svbic_m (p0, z1, z2)) ++ ++/* ++** bic_w0_s16_m_tied1: ++** mov (z[0-9]+\.h), w0 ++** bic z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (bic_w0_s16_m_tied1, svint16_t, int16_t, ++ z0 = svbic_n_s16_m (p0, z0, x0), ++ z0 = svbic_m (p0, z0, x0)) ++ ++/* ++** bic_w0_s16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), w0 ++** movprfx z0, z1 ++** bic z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (bic_w0_s16_m_untied, svint16_t, int16_t, ++ z0 = svbic_n_s16_m (p0, z1, x0), ++ z0 = svbic_m (p0, z1, x0)) ++ ++/* ++** bic_1_s16_m_tied1: ++** mov (z[0-9]+\.h), #-2 ++** and z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_1_s16_m_tied1, svint16_t, ++ z0 = svbic_n_s16_m (p0, z0, 1), ++ z0 = svbic_m (p0, z0, 1)) ++ ++/* ++** bic_1_s16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), #-2 ++** movprfx z0, z1 ++** and z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_1_s16_m_untied, svint16_t, ++ z0 = svbic_n_s16_m (p0, z1, 1), ++ z0 = svbic_m (p0, z1, 1)) ++ ++/* ++** bic_m2_s16_m: ++** mov (z[0-9]+\.h), #1 ++** and z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m2_s16_m, svint16_t, ++ z0 = svbic_n_s16_m (p0, z0, -2), ++ z0 = svbic_m (p0, z0, -2)) ++ ++/* ++** bic_s16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** bic z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (bic_s16_z_tied1, svint16_t, ++ z0 = svbic_s16_z (p0, z0, z1), ++ z0 = svbic_z (p0, z0, z1)) ++ ++/* ++** bic_s16_z_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.h, p0/z, z1\.h ++** bic z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (bic_s16_z_tied2, svint16_t, ++ z0 = svbic_s16_z (p0, z1, z0), ++ z0 = svbic_z (p0, z1, z0)) ++ ++/* ++** bic_s16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** bic z0\.h, p0/m, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (bic_s16_z_untied, svint16_t, ++ z0 = svbic_s16_z (p0, z1, z2), ++ z0 = svbic_z (p0, z1, z2)) ++ ++/* ++** bic_w0_s16_z_tied1: ++** mov (z[0-9]+\.h), w0 ++** movprfx z0\.h, p0/z, z0\.h ++** bic z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (bic_w0_s16_z_tied1, svint16_t, int16_t, ++ z0 = svbic_n_s16_z (p0, z0, x0), ++ z0 = svbic_z (p0, z0, x0)) ++ ++/* ++** bic_w0_s16_z_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), w0 ++** movprfx z0\.h, p0/z, z1\.h ++** bic z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (bic_w0_s16_z_untied, svint16_t, int16_t, ++ z0 = svbic_n_s16_z (p0, z1, x0), ++ z0 = svbic_z (p0, z1, x0)) ++ ++/* ++** bic_1_s16_z_tied1: ++** mov (z[0-9]+\.h), #-2 ++** movprfx z0\.h, p0/z, z0\.h ++** and z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_1_s16_z_tied1, svint16_t, ++ z0 = svbic_n_s16_z (p0, z0, 1), ++ z0 = svbic_z (p0, z0, 1)) ++ ++/* ++** bic_1_s16_z_untied: ++** mov (z[0-9]+\.h), #-2 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** and z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** and z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (bic_1_s16_z_untied, svint16_t, ++ z0 = svbic_n_s16_z (p0, z1, 1), ++ z0 = svbic_z (p0, z1, 1)) ++ ++/* ++** bic_s16_x_tied1: ++** bic z0\.d, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (bic_s16_x_tied1, svint16_t, ++ z0 = svbic_s16_x (p0, z0, z1), ++ z0 = svbic_x (p0, z0, z1)) ++ ++/* ++** bic_s16_x_tied2: ++** bic z0\.d, z1\.d, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (bic_s16_x_tied2, svint16_t, ++ z0 = svbic_s16_x (p0, z1, z0), ++ z0 = svbic_x (p0, z1, z0)) ++ ++/* ++** bic_s16_x_untied: ++** bic z0\.d, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (bic_s16_x_untied, svint16_t, ++ z0 = svbic_s16_x (p0, z1, z2), ++ z0 = svbic_x (p0, z1, z2)) ++ ++/* ++** bic_w0_s16_x_tied1: ++** mov (z[0-9]+)\.h, w0 ++** bic z0\.d, z0\.d, \1\.d ++** ret ++*/ ++TEST_UNIFORM_ZX (bic_w0_s16_x_tied1, svint16_t, int16_t, ++ z0 = svbic_n_s16_x (p0, z0, x0), ++ z0 = svbic_x (p0, z0, x0)) ++ ++/* ++** bic_w0_s16_x_untied: ++** mov (z[0-9]+)\.h, w0 ++** bic z0\.d, z1\.d, \1\.d ++** ret ++*/ ++TEST_UNIFORM_ZX (bic_w0_s16_x_untied, svint16_t, int16_t, ++ z0 = svbic_n_s16_x (p0, z1, x0), ++ z0 = svbic_x (p0, z1, x0)) ++ ++/* ++** bic_1_s16_x_tied1: ++** and z0\.h, z0\.h, #0xfffe ++** ret ++*/ ++TEST_UNIFORM_Z (bic_1_s16_x_tied1, svint16_t, ++ z0 = svbic_n_s16_x (p0, z0, 1), ++ z0 = svbic_x (p0, z0, 1)) ++ ++/* ++** bic_1_s16_x_untied: ++** movprfx z0, z1 ++** and z0\.h, z0\.h, #0xfffe ++** ret ++*/ ++TEST_UNIFORM_Z (bic_1_s16_x_untied, svint16_t, ++ z0 = svbic_n_s16_x (p0, z1, 1), ++ z0 = svbic_x (p0, z1, 1)) ++ ++/* ++** bic_127_s16_x: ++** and z0\.h, z0\.h, #0xff80 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_127_s16_x, svint16_t, ++ z0 = svbic_n_s16_x (p0, z0, 127), ++ z0 = svbic_x (p0, z0, 127)) ++ ++/* ++** bic_128_s16_x: ++** and z0\.h, z0\.h, #0xff7f ++** ret ++*/ ++TEST_UNIFORM_Z (bic_128_s16_x, svint16_t, ++ z0 = svbic_n_s16_x (p0, z0, 128), ++ z0 = svbic_x (p0, z0, 128)) ++ ++/* ++** bic_255_s16_x: ++** and z0\.h, z0\.h, #0xff00 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_255_s16_x, svint16_t, ++ z0 = svbic_n_s16_x (p0, z0, 255), ++ z0 = svbic_x (p0, z0, 255)) ++ ++/* ++** bic_256_s16_x: ++** and z0\.h, z0\.h, #0xfeff ++** ret ++*/ ++TEST_UNIFORM_Z (bic_256_s16_x, svint16_t, ++ z0 = svbic_n_s16_x (p0, z0, 256), ++ z0 = svbic_x (p0, z0, 256)) ++ ++/* ++** bic_257_s16_x: ++** and z0\.h, z0\.h, #0xfefe ++** ret ++*/ ++TEST_UNIFORM_Z (bic_257_s16_x, svint16_t, ++ z0 = svbic_n_s16_x (p0, z0, 257), ++ z0 = svbic_x (p0, z0, 257)) ++ ++/* ++** bic_512_s16_x: ++** and z0\.h, z0\.h, #0xfdff ++** ret ++*/ ++TEST_UNIFORM_Z (bic_512_s16_x, svint16_t, ++ z0 = svbic_n_s16_x (p0, z0, 512), ++ z0 = svbic_x (p0, z0, 512)) ++ ++/* ++** bic_65280_s16_x: ++** and z0\.h, z0\.h, #0xff ++** ret ++*/ ++TEST_UNIFORM_Z (bic_65280_s16_x, svint16_t, ++ z0 = svbic_n_s16_x (p0, z0, 0xff00), ++ z0 = svbic_x (p0, z0, 0xff00)) ++ ++/* ++** bic_m127_s16_x: ++** and z0\.h, z0\.h, #0x7e ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m127_s16_x, svint16_t, ++ z0 = svbic_n_s16_x (p0, z0, -127), ++ z0 = svbic_x (p0, z0, -127)) ++ ++/* ++** bic_m128_s16_x: ++** and z0\.h, z0\.h, #0x7f ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m128_s16_x, svint16_t, ++ z0 = svbic_n_s16_x (p0, z0, -128), ++ z0 = svbic_x (p0, z0, -128)) ++ ++/* ++** bic_m255_s16_x: ++** and z0\.h, z0\.h, #0xfe ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m255_s16_x, svint16_t, ++ z0 = svbic_n_s16_x (p0, z0, -255), ++ z0 = svbic_x (p0, z0, -255)) ++ ++/* ++** bic_m256_s16_x: ++** and z0\.h, z0\.h, #0xff ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m256_s16_x, svint16_t, ++ z0 = svbic_n_s16_x (p0, z0, -256), ++ z0 = svbic_x (p0, z0, -256)) ++ ++/* ++** bic_m257_s16_x: ++** and z0\.h, z0\.h, #0x100 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m257_s16_x, svint16_t, ++ z0 = svbic_n_s16_x (p0, z0, -257), ++ z0 = svbic_x (p0, z0, -257)) ++ ++/* ++** bic_m512_s16_x: ++** and z0\.h, z0\.h, #0x1ff ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m512_s16_x, svint16_t, ++ z0 = svbic_n_s16_x (p0, z0, -512), ++ z0 = svbic_x (p0, z0, -512)) ++ ++/* ++** bic_m32768_s16_x: ++** and z0\.h, z0\.h, #0x7fff ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m32768_s16_x, svint16_t, ++ z0 = svbic_n_s16_x (p0, z0, -0x8000), ++ z0 = svbic_x (p0, z0, -0x8000)) ++ ++/* ++** bic_5_s16_x: ++** mov (z[0-9]+)\.h, #-6 ++** and z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (bic_5_s16_x, svint16_t, ++ z0 = svbic_n_s16_x (p0, z0, 5), ++ z0 = svbic_x (p0, z0, 5)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_s32.c +new file mode 100644 +index 000000000..9e388e499 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_s32.c +@@ -0,0 +1,363 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** bic_s32_m_tied1: ++** bic z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (bic_s32_m_tied1, svint32_t, ++ z0 = svbic_s32_m (p0, z0, z1), ++ z0 = svbic_m (p0, z0, z1)) ++ ++/* ++** bic_s32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** bic z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (bic_s32_m_tied2, svint32_t, ++ z0 = svbic_s32_m (p0, z1, z0), ++ z0 = svbic_m (p0, z1, z0)) ++ ++/* ++** bic_s32_m_untied: ++** movprfx z0, z1 ++** bic z0\.s, p0/m, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (bic_s32_m_untied, svint32_t, ++ z0 = svbic_s32_m (p0, z1, z2), ++ z0 = svbic_m (p0, z1, z2)) ++ ++/* ++** bic_w0_s32_m_tied1: ++** mov (z[0-9]+\.s), w0 ++** bic z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (bic_w0_s32_m_tied1, svint32_t, int32_t, ++ z0 = svbic_n_s32_m (p0, z0, x0), ++ z0 = svbic_m (p0, z0, x0)) ++ ++/* ++** bic_w0_s32_m_untied: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0, z1 ++** bic z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (bic_w0_s32_m_untied, svint32_t, int32_t, ++ z0 = svbic_n_s32_m (p0, z1, x0), ++ z0 = svbic_m (p0, z1, x0)) ++ ++/* ++** bic_1_s32_m_tied1: ++** mov (z[0-9]+\.s), #-2 ++** and z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_1_s32_m_tied1, svint32_t, ++ z0 = svbic_n_s32_m (p0, z0, 1), ++ z0 = svbic_m (p0, z0, 1)) ++ ++/* ++** bic_1_s32_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.s), #-2 ++** movprfx z0, z1 ++** and z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_1_s32_m_untied, svint32_t, ++ z0 = svbic_n_s32_m (p0, z1, 1), ++ z0 = svbic_m (p0, z1, 1)) ++ ++/* ++** bic_m2_s32_m: ++** mov (z[0-9]+\.s), #1 ++** and z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m2_s32_m, svint32_t, ++ z0 = svbic_n_s32_m (p0, z0, -2), ++ z0 = svbic_m (p0, z0, -2)) ++ ++/* ++** bic_s32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** bic z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (bic_s32_z_tied1, svint32_t, ++ z0 = svbic_s32_z (p0, z0, z1), ++ z0 = svbic_z (p0, z0, z1)) ++ ++/* ++** bic_s32_z_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, z1\.s ++** bic z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (bic_s32_z_tied2, svint32_t, ++ z0 = svbic_s32_z (p0, z1, z0), ++ z0 = svbic_z (p0, z1, z0)) ++ ++/* ++** bic_s32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** bic z0\.s, p0/m, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (bic_s32_z_untied, svint32_t, ++ z0 = svbic_s32_z (p0, z1, z2), ++ z0 = svbic_z (p0, z1, z2)) ++ ++/* ++** bic_w0_s32_z_tied1: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0\.s, p0/z, z0\.s ++** bic z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (bic_w0_s32_z_tied1, svint32_t, int32_t, ++ z0 = svbic_n_s32_z (p0, z0, x0), ++ z0 = svbic_z (p0, z0, x0)) ++ ++/* ++** bic_w0_s32_z_untied: { xfail *-*-* } ++** mov (z[0-9]+\.s), w0 ++** movprfx z0\.s, p0/z, z1\.s ++** bic z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (bic_w0_s32_z_untied, svint32_t, int32_t, ++ z0 = svbic_n_s32_z (p0, z1, x0), ++ z0 = svbic_z (p0, z1, x0)) ++ ++/* ++** bic_1_s32_z_tied1: ++** mov (z[0-9]+\.s), #-2 ++** movprfx z0\.s, p0/z, z0\.s ++** and z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_1_s32_z_tied1, svint32_t, ++ z0 = svbic_n_s32_z (p0, z0, 1), ++ z0 = svbic_z (p0, z0, 1)) ++ ++/* ++** bic_1_s32_z_untied: ++** mov (z[0-9]+\.s), #-2 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** and z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** and z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (bic_1_s32_z_untied, svint32_t, ++ z0 = svbic_n_s32_z (p0, z1, 1), ++ z0 = svbic_z (p0, z1, 1)) ++ ++/* ++** bic_s32_x_tied1: ++** bic z0\.d, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (bic_s32_x_tied1, svint32_t, ++ z0 = svbic_s32_x (p0, z0, z1), ++ z0 = svbic_x (p0, z0, z1)) ++ ++/* ++** bic_s32_x_tied2: ++** bic z0\.d, z1\.d, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (bic_s32_x_tied2, svint32_t, ++ z0 = svbic_s32_x (p0, z1, z0), ++ z0 = svbic_x (p0, z1, z0)) ++ ++/* ++** bic_s32_x_untied: ++** bic z0\.d, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (bic_s32_x_untied, svint32_t, ++ z0 = svbic_s32_x (p0, z1, z2), ++ z0 = svbic_x (p0, z1, z2)) ++ ++/* ++** bic_w0_s32_x_tied1: ++** mov (z[0-9]+)\.s, w0 ++** bic z0\.d, z0\.d, \1\.d ++** ret ++*/ ++TEST_UNIFORM_ZX (bic_w0_s32_x_tied1, svint32_t, int32_t, ++ z0 = svbic_n_s32_x (p0, z0, x0), ++ z0 = svbic_x (p0, z0, x0)) ++ ++/* ++** bic_w0_s32_x_untied: ++** mov (z[0-9]+)\.s, w0 ++** bic z0\.d, z1\.d, \1\.d ++** ret ++*/ ++TEST_UNIFORM_ZX (bic_w0_s32_x_untied, svint32_t, int32_t, ++ z0 = svbic_n_s32_x (p0, z1, x0), ++ z0 = svbic_x (p0, z1, x0)) ++ ++/* ++** bic_1_s32_x_tied1: ++** and z0\.s, z0\.s, #0xfffffffe ++** ret ++*/ ++TEST_UNIFORM_Z (bic_1_s32_x_tied1, svint32_t, ++ z0 = svbic_n_s32_x (p0, z0, 1), ++ z0 = svbic_x (p0, z0, 1)) ++ ++/* ++** bic_1_s32_x_untied: ++** movprfx z0, z1 ++** and z0\.s, z0\.s, #0xfffffffe ++** ret ++*/ ++TEST_UNIFORM_Z (bic_1_s32_x_untied, svint32_t, ++ z0 = svbic_n_s32_x (p0, z1, 1), ++ z0 = svbic_x (p0, z1, 1)) ++ ++/* ++** bic_127_s32_x: ++** and z0\.s, z0\.s, #0xffffff80 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_127_s32_x, svint32_t, ++ z0 = svbic_n_s32_x (p0, z0, 127), ++ z0 = svbic_x (p0, z0, 127)) ++ ++/* ++** bic_128_s32_x: ++** and z0\.s, z0\.s, #0xffffff7f ++** ret ++*/ ++TEST_UNIFORM_Z (bic_128_s32_x, svint32_t, ++ z0 = svbic_n_s32_x (p0, z0, 128), ++ z0 = svbic_x (p0, z0, 128)) ++ ++/* ++** bic_255_s32_x: ++** and z0\.s, z0\.s, #0xffffff00 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_255_s32_x, svint32_t, ++ z0 = svbic_n_s32_x (p0, z0, 255), ++ z0 = svbic_x (p0, z0, 255)) ++ ++/* ++** bic_256_s32_x: ++** and z0\.s, z0\.s, #0xfffffeff ++** ret ++*/ ++TEST_UNIFORM_Z (bic_256_s32_x, svint32_t, ++ z0 = svbic_n_s32_x (p0, z0, 256), ++ z0 = svbic_x (p0, z0, 256)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (bic_257_s32_x, svint32_t, ++ z0 = svbic_n_s32_x (p0, z0, 257), ++ z0 = svbic_x (p0, z0, 257)) ++ ++/* ++** bic_512_s32_x: ++** and z0\.s, z0\.s, #0xfffffdff ++** ret ++*/ ++TEST_UNIFORM_Z (bic_512_s32_x, svint32_t, ++ z0 = svbic_n_s32_x (p0, z0, 512), ++ z0 = svbic_x (p0, z0, 512)) ++ ++/* ++** bic_65280_s32_x: ++** and z0\.s, z0\.s, #0xffff00ff ++** ret ++*/ ++TEST_UNIFORM_Z (bic_65280_s32_x, svint32_t, ++ z0 = svbic_n_s32_x (p0, z0, 0xff00), ++ z0 = svbic_x (p0, z0, 0xff00)) ++ ++/* ++** bic_m127_s32_x: ++** and z0\.s, z0\.s, #0x7e ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m127_s32_x, svint32_t, ++ z0 = svbic_n_s32_x (p0, z0, -127), ++ z0 = svbic_x (p0, z0, -127)) ++ ++/* ++** bic_m128_s32_x: ++** and z0\.s, z0\.s, #0x7f ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m128_s32_x, svint32_t, ++ z0 = svbic_n_s32_x (p0, z0, -128), ++ z0 = svbic_x (p0, z0, -128)) ++ ++/* ++** bic_m255_s32_x: ++** and z0\.s, z0\.s, #0xfe ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m255_s32_x, svint32_t, ++ z0 = svbic_n_s32_x (p0, z0, -255), ++ z0 = svbic_x (p0, z0, -255)) ++ ++/* ++** bic_m256_s32_x: ++** and z0\.s, z0\.s, #0xff ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m256_s32_x, svint32_t, ++ z0 = svbic_n_s32_x (p0, z0, -256), ++ z0 = svbic_x (p0, z0, -256)) ++ ++/* ++** bic_m257_s32_x: ++** and z0\.s, z0\.s, #0x100 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m257_s32_x, svint32_t, ++ z0 = svbic_n_s32_x (p0, z0, -257), ++ z0 = svbic_x (p0, z0, -257)) ++ ++/* ++** bic_m512_s32_x: ++** and z0\.s, z0\.s, #0x1ff ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m512_s32_x, svint32_t, ++ z0 = svbic_n_s32_x (p0, z0, -512), ++ z0 = svbic_x (p0, z0, -512)) ++ ++/* ++** bic_m32768_s32_x: ++** and z0\.s, z0\.s, #0x7fff ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m32768_s32_x, svint32_t, ++ z0 = svbic_n_s32_x (p0, z0, -0x8000), ++ z0 = svbic_x (p0, z0, -0x8000)) ++ ++/* ++** bic_5_s32_x: ++** mov (z[0-9]+)\.s, #-6 ++** and z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (bic_5_s32_x, svint32_t, ++ z0 = svbic_n_s32_x (p0, z0, 5), ++ z0 = svbic_x (p0, z0, 5)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_s64.c +new file mode 100644 +index 000000000..bf9536815 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_s64.c +@@ -0,0 +1,363 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** bic_s64_m_tied1: ++** bic z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (bic_s64_m_tied1, svint64_t, ++ z0 = svbic_s64_m (p0, z0, z1), ++ z0 = svbic_m (p0, z0, z1)) ++ ++/* ++** bic_s64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** bic z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_s64_m_tied2, svint64_t, ++ z0 = svbic_s64_m (p0, z1, z0), ++ z0 = svbic_m (p0, z1, z0)) ++ ++/* ++** bic_s64_m_untied: ++** movprfx z0, z1 ++** bic z0\.d, p0/m, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (bic_s64_m_untied, svint64_t, ++ z0 = svbic_s64_m (p0, z1, z2), ++ z0 = svbic_m (p0, z1, z2)) ++ ++/* ++** bic_x0_s64_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** bic z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (bic_x0_s64_m_tied1, svint64_t, int64_t, ++ z0 = svbic_n_s64_m (p0, z0, x0), ++ z0 = svbic_m (p0, z0, x0)) ++ ++/* ++** bic_x0_s64_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** bic z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (bic_x0_s64_m_untied, svint64_t, int64_t, ++ z0 = svbic_n_s64_m (p0, z1, x0), ++ z0 = svbic_m (p0, z1, x0)) ++ ++/* ++** bic_1_s64_m_tied1: ++** mov (z[0-9]+\.d), #-2 ++** and z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_1_s64_m_tied1, svint64_t, ++ z0 = svbic_n_s64_m (p0, z0, 1), ++ z0 = svbic_m (p0, z0, 1)) ++ ++/* ++** bic_1_s64_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), #-2 ++** movprfx z0, z1 ++** and z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_1_s64_m_untied, svint64_t, ++ z0 = svbic_n_s64_m (p0, z1, 1), ++ z0 = svbic_m (p0, z1, 1)) ++ ++/* ++** bic_m2_s64_m: ++** mov (z[0-9]+\.d), #1 ++** and z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m2_s64_m, svint64_t, ++ z0 = svbic_n_s64_m (p0, z0, -2), ++ z0 = svbic_m (p0, z0, -2)) ++ ++/* ++** bic_s64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** bic z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (bic_s64_z_tied1, svint64_t, ++ z0 = svbic_s64_z (p0, z0, z1), ++ z0 = svbic_z (p0, z0, z1)) ++ ++/* ++** bic_s64_z_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, z1\.d ++** bic z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_s64_z_tied2, svint64_t, ++ z0 = svbic_s64_z (p0, z1, z0), ++ z0 = svbic_z (p0, z1, z0)) ++ ++/* ++** bic_s64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** bic z0\.d, p0/m, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (bic_s64_z_untied, svint64_t, ++ z0 = svbic_s64_z (p0, z1, z2), ++ z0 = svbic_z (p0, z1, z2)) ++ ++/* ++** bic_x0_s64_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.d, p0/z, z0\.d ++** bic z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (bic_x0_s64_z_tied1, svint64_t, int64_t, ++ z0 = svbic_n_s64_z (p0, z0, x0), ++ z0 = svbic_z (p0, z0, x0)) ++ ++/* ++** bic_x0_s64_z_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.d, p0/z, z1\.d ++** bic z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (bic_x0_s64_z_untied, svint64_t, int64_t, ++ z0 = svbic_n_s64_z (p0, z1, x0), ++ z0 = svbic_z (p0, z1, x0)) ++ ++/* ++** bic_1_s64_z_tied1: ++** mov (z[0-9]+\.d), #-2 ++** movprfx z0\.d, p0/z, z0\.d ++** and z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_1_s64_z_tied1, svint64_t, ++ z0 = svbic_n_s64_z (p0, z0, 1), ++ z0 = svbic_z (p0, z0, 1)) ++ ++/* ++** bic_1_s64_z_untied: ++** mov (z[0-9]+\.d), #-2 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** and z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** and z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (bic_1_s64_z_untied, svint64_t, ++ z0 = svbic_n_s64_z (p0, z1, 1), ++ z0 = svbic_z (p0, z1, 1)) ++ ++/* ++** bic_s64_x_tied1: ++** bic z0\.d, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (bic_s64_x_tied1, svint64_t, ++ z0 = svbic_s64_x (p0, z0, z1), ++ z0 = svbic_x (p0, z0, z1)) ++ ++/* ++** bic_s64_x_tied2: ++** bic z0\.d, z1\.d, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (bic_s64_x_tied2, svint64_t, ++ z0 = svbic_s64_x (p0, z1, z0), ++ z0 = svbic_x (p0, z1, z0)) ++ ++/* ++** bic_s64_x_untied: ++** bic z0\.d, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (bic_s64_x_untied, svint64_t, ++ z0 = svbic_s64_x (p0, z1, z2), ++ z0 = svbic_x (p0, z1, z2)) ++ ++/* ++** bic_x0_s64_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** bic z0\.d, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (bic_x0_s64_x_tied1, svint64_t, int64_t, ++ z0 = svbic_n_s64_x (p0, z0, x0), ++ z0 = svbic_x (p0, z0, x0)) ++ ++/* ++** bic_x0_s64_x_untied: ++** mov (z[0-9]+\.d), x0 ++** bic z0\.d, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (bic_x0_s64_x_untied, svint64_t, int64_t, ++ z0 = svbic_n_s64_x (p0, z1, x0), ++ z0 = svbic_x (p0, z1, x0)) ++ ++/* ++** bic_1_s64_x_tied1: ++** and z0\.d, z0\.d, #0xfffffffffffffffe ++** ret ++*/ ++TEST_UNIFORM_Z (bic_1_s64_x_tied1, svint64_t, ++ z0 = svbic_n_s64_x (p0, z0, 1), ++ z0 = svbic_x (p0, z0, 1)) ++ ++/* ++** bic_1_s64_x_untied: ++** movprfx z0, z1 ++** and z0\.d, z0\.d, #0xfffffffffffffffe ++** ret ++*/ ++TEST_UNIFORM_Z (bic_1_s64_x_untied, svint64_t, ++ z0 = svbic_n_s64_x (p0, z1, 1), ++ z0 = svbic_x (p0, z1, 1)) ++ ++/* ++** bic_127_s64_x: ++** and z0\.d, z0\.d, #0xffffffffffffff80 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_127_s64_x, svint64_t, ++ z0 = svbic_n_s64_x (p0, z0, 127), ++ z0 = svbic_x (p0, z0, 127)) ++ ++/* ++** bic_128_s64_x: ++** and z0\.d, z0\.d, #0xffffffffffffff7f ++** ret ++*/ ++TEST_UNIFORM_Z (bic_128_s64_x, svint64_t, ++ z0 = svbic_n_s64_x (p0, z0, 128), ++ z0 = svbic_x (p0, z0, 128)) ++ ++/* ++** bic_255_s64_x: ++** and z0\.d, z0\.d, #0xffffffffffffff00 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_255_s64_x, svint64_t, ++ z0 = svbic_n_s64_x (p0, z0, 255), ++ z0 = svbic_x (p0, z0, 255)) ++ ++/* ++** bic_256_s64_x: ++** and z0\.d, z0\.d, #0xfffffffffffffeff ++** ret ++*/ ++TEST_UNIFORM_Z (bic_256_s64_x, svint64_t, ++ z0 = svbic_n_s64_x (p0, z0, 256), ++ z0 = svbic_x (p0, z0, 256)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (bic_257_s64_x, svint64_t, ++ z0 = svbic_n_s64_x (p0, z0, 257), ++ z0 = svbic_x (p0, z0, 257)) ++ ++/* ++** bic_512_s64_x: ++** and z0\.d, z0\.d, #0xfffffffffffffdff ++** ret ++*/ ++TEST_UNIFORM_Z (bic_512_s64_x, svint64_t, ++ z0 = svbic_n_s64_x (p0, z0, 512), ++ z0 = svbic_x (p0, z0, 512)) ++ ++/* ++** bic_65280_s64_x: ++** and z0\.d, z0\.d, #0xffffffffffff00ff ++** ret ++*/ ++TEST_UNIFORM_Z (bic_65280_s64_x, svint64_t, ++ z0 = svbic_n_s64_x (p0, z0, 0xff00), ++ z0 = svbic_x (p0, z0, 0xff00)) ++ ++/* ++** bic_m127_s64_x: ++** and z0\.d, z0\.d, #0x7e ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m127_s64_x, svint64_t, ++ z0 = svbic_n_s64_x (p0, z0, -127), ++ z0 = svbic_x (p0, z0, -127)) ++ ++/* ++** bic_m128_s64_x: ++** and z0\.d, z0\.d, #0x7f ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m128_s64_x, svint64_t, ++ z0 = svbic_n_s64_x (p0, z0, -128), ++ z0 = svbic_x (p0, z0, -128)) ++ ++/* ++** bic_m255_s64_x: ++** and z0\.d, z0\.d, #0xfe ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m255_s64_x, svint64_t, ++ z0 = svbic_n_s64_x (p0, z0, -255), ++ z0 = svbic_x (p0, z0, -255)) ++ ++/* ++** bic_m256_s64_x: ++** and z0\.d, z0\.d, #0xff ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m256_s64_x, svint64_t, ++ z0 = svbic_n_s64_x (p0, z0, -256), ++ z0 = svbic_x (p0, z0, -256)) ++ ++/* ++** bic_m257_s64_x: ++** and z0\.d, z0\.d, #0x100 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m257_s64_x, svint64_t, ++ z0 = svbic_n_s64_x (p0, z0, -257), ++ z0 = svbic_x (p0, z0, -257)) ++ ++/* ++** bic_m512_s64_x: ++** and z0\.d, z0\.d, #0x1ff ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m512_s64_x, svint64_t, ++ z0 = svbic_n_s64_x (p0, z0, -512), ++ z0 = svbic_x (p0, z0, -512)) ++ ++/* ++** bic_m32768_s64_x: ++** and z0\.d, z0\.d, #0x7fff ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m32768_s64_x, svint64_t, ++ z0 = svbic_n_s64_x (p0, z0, -0x8000), ++ z0 = svbic_x (p0, z0, -0x8000)) ++ ++/* ++** bic_5_s64_x: ++** mov (z[0-9]+\.d), #-6 ++** and z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (bic_5_s64_x, svint64_t, ++ z0 = svbic_n_s64_x (p0, z0, 5), ++ z0 = svbic_x (p0, z0, 5)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_s8.c +new file mode 100644 +index 000000000..0958a3403 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_s8.c +@@ -0,0 +1,286 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** bic_s8_m_tied1: ++** bic z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (bic_s8_m_tied1, svint8_t, ++ z0 = svbic_s8_m (p0, z0, z1), ++ z0 = svbic_m (p0, z0, z1)) ++ ++/* ++** bic_s8_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** bic z0\.b, p0/m, z0\.b, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (bic_s8_m_tied2, svint8_t, ++ z0 = svbic_s8_m (p0, z1, z0), ++ z0 = svbic_m (p0, z1, z0)) ++ ++/* ++** bic_s8_m_untied: ++** movprfx z0, z1 ++** bic z0\.b, p0/m, z0\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (bic_s8_m_untied, svint8_t, ++ z0 = svbic_s8_m (p0, z1, z2), ++ z0 = svbic_m (p0, z1, z2)) ++ ++/* ++** bic_w0_s8_m_tied1: ++** mov (z[0-9]+\.b), w0 ++** bic z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (bic_w0_s8_m_tied1, svint8_t, int8_t, ++ z0 = svbic_n_s8_m (p0, z0, x0), ++ z0 = svbic_m (p0, z0, x0)) ++ ++/* ++** bic_w0_s8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), w0 ++** movprfx z0, z1 ++** bic z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (bic_w0_s8_m_untied, svint8_t, int8_t, ++ z0 = svbic_n_s8_m (p0, z1, x0), ++ z0 = svbic_m (p0, z1, x0)) ++ ++/* ++** bic_1_s8_m_tied1: ++** mov (z[0-9]+\.b), #-2 ++** and z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_1_s8_m_tied1, svint8_t, ++ z0 = svbic_n_s8_m (p0, z0, 1), ++ z0 = svbic_m (p0, z0, 1)) ++ ++/* ++** bic_1_s8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), #-2 ++** movprfx z0, z1 ++** and z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_1_s8_m_untied, svint8_t, ++ z0 = svbic_n_s8_m (p0, z1, 1), ++ z0 = svbic_m (p0, z1, 1)) ++ ++/* ++** bic_m2_s8_m: ++** mov (z[0-9]+\.b), #1 ++** and z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m2_s8_m, svint8_t, ++ z0 = svbic_n_s8_m (p0, z0, -2), ++ z0 = svbic_m (p0, z0, -2)) ++ ++/* ++** bic_s8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** bic z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (bic_s8_z_tied1, svint8_t, ++ z0 = svbic_s8_z (p0, z0, z1), ++ z0 = svbic_z (p0, z0, z1)) ++ ++/* ++** bic_s8_z_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.b, p0/z, z1\.b ++** bic z0\.b, p0/m, z0\.b, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (bic_s8_z_tied2, svint8_t, ++ z0 = svbic_s8_z (p0, z1, z0), ++ z0 = svbic_z (p0, z1, z0)) ++ ++/* ++** bic_s8_z_untied: ++** movprfx z0\.b, p0/z, z1\.b ++** bic z0\.b, p0/m, z0\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (bic_s8_z_untied, svint8_t, ++ z0 = svbic_s8_z (p0, z1, z2), ++ z0 = svbic_z (p0, z1, z2)) ++ ++/* ++** bic_w0_s8_z_tied1: ++** mov (z[0-9]+\.b), w0 ++** movprfx z0\.b, p0/z, z0\.b ++** bic z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (bic_w0_s8_z_tied1, svint8_t, int8_t, ++ z0 = svbic_n_s8_z (p0, z0, x0), ++ z0 = svbic_z (p0, z0, x0)) ++ ++/* ++** bic_w0_s8_z_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), w0 ++** movprfx z0\.b, p0/z, z1\.b ++** bic z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (bic_w0_s8_z_untied, svint8_t, int8_t, ++ z0 = svbic_n_s8_z (p0, z1, x0), ++ z0 = svbic_z (p0, z1, x0)) ++ ++/* ++** bic_1_s8_z_tied1: ++** mov (z[0-9]+\.b), #-2 ++** movprfx z0\.b, p0/z, z0\.b ++** and z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_1_s8_z_tied1, svint8_t, ++ z0 = svbic_n_s8_z (p0, z0, 1), ++ z0 = svbic_z (p0, z0, 1)) ++ ++/* ++** bic_1_s8_z_untied: ++** mov (z[0-9]+\.b), #-2 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** and z0\.b, p0/m, z0\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** and z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (bic_1_s8_z_untied, svint8_t, ++ z0 = svbic_n_s8_z (p0, z1, 1), ++ z0 = svbic_z (p0, z1, 1)) ++ ++/* ++** bic_s8_x_tied1: ++** bic z0\.d, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (bic_s8_x_tied1, svint8_t, ++ z0 = svbic_s8_x (p0, z0, z1), ++ z0 = svbic_x (p0, z0, z1)) ++ ++/* ++** bic_s8_x_tied2: ++** bic z0\.d, z1\.d, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (bic_s8_x_tied2, svint8_t, ++ z0 = svbic_s8_x (p0, z1, z0), ++ z0 = svbic_x (p0, z1, z0)) ++ ++/* ++** bic_s8_x_untied: ++** bic z0\.d, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (bic_s8_x_untied, svint8_t, ++ z0 = svbic_s8_x (p0, z1, z2), ++ z0 = svbic_x (p0, z1, z2)) ++ ++/* ++** bic_w0_s8_x_tied1: ++** mov (z[0-9]+)\.b, w0 ++** bic z0\.d, z0\.d, \1\.d ++** ret ++*/ ++TEST_UNIFORM_ZX (bic_w0_s8_x_tied1, svint8_t, int8_t, ++ z0 = svbic_n_s8_x (p0, z0, x0), ++ z0 = svbic_x (p0, z0, x0)) ++ ++/* ++** bic_w0_s8_x_untied: ++** mov (z[0-9]+)\.b, w0 ++** bic z0\.d, z1\.d, \1\.d ++** ret ++*/ ++TEST_UNIFORM_ZX (bic_w0_s8_x_untied, svint8_t, int8_t, ++ z0 = svbic_n_s8_x (p0, z1, x0), ++ z0 = svbic_x (p0, z1, x0)) ++ ++/* ++** bic_1_s8_x_tied1: ++** and z0\.b, z0\.b, #0xfe ++** ret ++*/ ++TEST_UNIFORM_Z (bic_1_s8_x_tied1, svint8_t, ++ z0 = svbic_n_s8_x (p0, z0, 1), ++ z0 = svbic_x (p0, z0, 1)) ++ ++/* ++** bic_1_s8_x_untied: ++** movprfx z0, z1 ++** and z0\.b, z0\.b, #0xfe ++** ret ++*/ ++TEST_UNIFORM_Z (bic_1_s8_x_untied, svint8_t, ++ z0 = svbic_n_s8_x (p0, z1, 1), ++ z0 = svbic_x (p0, z1, 1)) ++ ++/* ++** bic_127_s8_x: ++** and z0\.b, z0\.b, #0x80 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_127_s8_x, svint8_t, ++ z0 = svbic_n_s8_x (p0, z0, 127), ++ z0 = svbic_x (p0, z0, 127)) ++ ++/* ++** bic_128_s8_x: ++** and z0\.b, z0\.b, #0x7f ++** ret ++*/ ++TEST_UNIFORM_Z (bic_128_s8_x, svint8_t, ++ z0 = svbic_n_s8_x (p0, z0, 128), ++ z0 = svbic_x (p0, z0, 128)) ++ ++/* ++** bic_255_s8_x: ++** mov z0\.b, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_255_s8_x, svint8_t, ++ z0 = svbic_n_s8_x (p0, z0, 255), ++ z0 = svbic_x (p0, z0, 255)) ++ ++/* ++** bic_m127_s8_x: ++** and z0\.b, z0\.b, #0x7e ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m127_s8_x, svint8_t, ++ z0 = svbic_n_s8_x (p0, z0, -127), ++ z0 = svbic_x (p0, z0, -127)) ++ ++/* ++** bic_m128_s8_x: ++** and z0\.b, z0\.b, #0x7f ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m128_s8_x, svint8_t, ++ z0 = svbic_n_s8_x (p0, z0, -128), ++ z0 = svbic_x (p0, z0, -128)) ++ ++/* ++** bic_5_s8_x: ++** mov (z[0-9]+)\.b, #-6 ++** and z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (bic_5_s8_x, svint8_t, ++ z0 = svbic_n_s8_x (p0, z0, 5), ++ z0 = svbic_x (p0, z0, 5)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_u16.c +new file mode 100644 +index 000000000..30209ffb4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_u16.c +@@ -0,0 +1,367 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** bic_u16_m_tied1: ++** bic z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (bic_u16_m_tied1, svuint16_t, ++ z0 = svbic_u16_m (p0, z0, z1), ++ z0 = svbic_m (p0, z0, z1)) ++ ++/* ++** bic_u16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** bic z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (bic_u16_m_tied2, svuint16_t, ++ z0 = svbic_u16_m (p0, z1, z0), ++ z0 = svbic_m (p0, z1, z0)) ++ ++/* ++** bic_u16_m_untied: ++** movprfx z0, z1 ++** bic z0\.h, p0/m, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (bic_u16_m_untied, svuint16_t, ++ z0 = svbic_u16_m (p0, z1, z2), ++ z0 = svbic_m (p0, z1, z2)) ++ ++/* ++** bic_w0_u16_m_tied1: ++** mov (z[0-9]+\.h), w0 ++** bic z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (bic_w0_u16_m_tied1, svuint16_t, uint16_t, ++ z0 = svbic_n_u16_m (p0, z0, x0), ++ z0 = svbic_m (p0, z0, x0)) ++ ++/* ++** bic_w0_u16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), w0 ++** movprfx z0, z1 ++** bic z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (bic_w0_u16_m_untied, svuint16_t, uint16_t, ++ z0 = svbic_n_u16_m (p0, z1, x0), ++ z0 = svbic_m (p0, z1, x0)) ++ ++/* ++** bic_1_u16_m_tied1: ++** mov (z[0-9]+\.h), #-2 ++** and z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_1_u16_m_tied1, svuint16_t, ++ z0 = svbic_n_u16_m (p0, z0, 1), ++ z0 = svbic_m (p0, z0, 1)) ++ ++/* ++** bic_1_u16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), #-2 ++** movprfx z0, z1 ++** and z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_1_u16_m_untied, svuint16_t, ++ z0 = svbic_n_u16_m (p0, z1, 1), ++ z0 = svbic_m (p0, z1, 1)) ++ ++/* ++** bic_m2_u16_m: ++** mov (z[0-9]+\.h), #1 ++** and z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m2_u16_m, svuint16_t, ++ z0 = svbic_n_u16_m (p0, z0, -2), ++ z0 = svbic_m (p0, z0, -2)) ++ ++/* ++** bic_u16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** bic z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (bic_u16_z_tied1, svuint16_t, ++ z0 = svbic_u16_z (p0, z0, z1), ++ z0 = svbic_z (p0, z0, z1)) ++ ++/* ++** bic_u16_z_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.h, p0/z, z1\.h ++** bic z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (bic_u16_z_tied2, svuint16_t, ++ z0 = svbic_u16_z (p0, z1, z0), ++ z0 = svbic_z (p0, z1, z0)) ++ ++/* ++** bic_u16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** bic z0\.h, p0/m, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (bic_u16_z_untied, svuint16_t, ++ z0 = svbic_u16_z (p0, z1, z2), ++ z0 = svbic_z (p0, z1, z2)) ++ ++/* ++** bic_w0_u16_z_tied1: ++** mov (z[0-9]+\.h), w0 ++** movprfx z0\.h, p0/z, z0\.h ++** bic z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (bic_w0_u16_z_tied1, svuint16_t, uint16_t, ++ z0 = svbic_n_u16_z (p0, z0, x0), ++ z0 = svbic_z (p0, z0, x0)) ++ ++/* ++** bic_w0_u16_z_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), w0 ++** movprfx z0\.h, p0/z, z1\.h ++** bic z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (bic_w0_u16_z_untied, svuint16_t, uint16_t, ++ z0 = svbic_n_u16_z (p0, z1, x0), ++ z0 = svbic_z (p0, z1, x0)) ++ ++/* ++** bic_1_u16_z_tied1: ++** mov (z[0-9]+\.h), #-2 ++** movprfx z0\.h, p0/z, z0\.h ++** and z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_1_u16_z_tied1, svuint16_t, ++ z0 = svbic_n_u16_z (p0, z0, 1), ++ z0 = svbic_z (p0, z0, 1)) ++ ++/* ++** bic_1_u16_z_untied: ++** mov (z[0-9]+\.h), #-2 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** and z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** and z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (bic_1_u16_z_untied, svuint16_t, ++ z0 = svbic_n_u16_z (p0, z1, 1), ++ z0 = svbic_z (p0, z1, 1)) ++ ++/* ++** bic_u16_x_tied1: ++** bic z0\.d, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (bic_u16_x_tied1, svuint16_t, ++ z0 = svbic_u16_x (p0, z0, z1), ++ z0 = svbic_x (p0, z0, z1)) ++ ++/* ++** bic_u16_x_tied2: ++** bic z0\.d, z1\.d, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (bic_u16_x_tied2, svuint16_t, ++ z0 = svbic_u16_x (p0, z1, z0), ++ z0 = svbic_x (p0, z1, z0)) ++ ++/* ++** bic_u16_x_untied: ++** bic z0\.d, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (bic_u16_x_untied, svuint16_t, ++ z0 = svbic_u16_x (p0, z1, z2), ++ z0 = svbic_x (p0, z1, z2)) ++ ++/* ++** bic_w0_u16_x_tied1: ++** mov (z[0-9]+)\.h, w0 ++** bic z0\.d, z0\.d, \1\.d ++** ret ++*/ ++TEST_UNIFORM_ZX (bic_w0_u16_x_tied1, svuint16_t, uint16_t, ++ z0 = svbic_n_u16_x (p0, z0, x0), ++ z0 = svbic_x (p0, z0, x0)) ++ ++/* ++** bic_w0_u16_x_untied: ++** mov (z[0-9]+)\.h, w0 ++** bic z0\.d, z1\.d, \1\.d ++** ret ++*/ ++TEST_UNIFORM_ZX (bic_w0_u16_x_untied, svuint16_t, uint16_t, ++ z0 = svbic_n_u16_x (p0, z1, x0), ++ z0 = svbic_x (p0, z1, x0)) ++ ++/* ++** bic_1_u16_x_tied1: ++** and z0\.h, z0\.h, #0xfffe ++** ret ++*/ ++TEST_UNIFORM_Z (bic_1_u16_x_tied1, svuint16_t, ++ z0 = svbic_n_u16_x (p0, z0, 1), ++ z0 = svbic_x (p0, z0, 1)) ++ ++/* ++** bic_1_u16_x_untied: ++** movprfx z0, z1 ++** and z0\.h, z0\.h, #0xfffe ++** ret ++*/ ++TEST_UNIFORM_Z (bic_1_u16_x_untied, svuint16_t, ++ z0 = svbic_n_u16_x (p0, z1, 1), ++ z0 = svbic_x (p0, z1, 1)) ++ ++/* ++** bic_127_u16_x: ++** and z0\.h, z0\.h, #0xff80 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_127_u16_x, svuint16_t, ++ z0 = svbic_n_u16_x (p0, z0, 127), ++ z0 = svbic_x (p0, z0, 127)) ++ ++/* ++** bic_128_u16_x: ++** and z0\.h, z0\.h, #0xff7f ++** ret ++*/ ++TEST_UNIFORM_Z (bic_128_u16_x, svuint16_t, ++ z0 = svbic_n_u16_x (p0, z0, 128), ++ z0 = svbic_x (p0, z0, 128)) ++ ++/* ++** bic_255_u16_x: ++** and z0\.h, z0\.h, #0xff00 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_255_u16_x, svuint16_t, ++ z0 = svbic_n_u16_x (p0, z0, 255), ++ z0 = svbic_x (p0, z0, 255)) ++ ++/* ++** bic_256_u16_x: ++** and z0\.h, z0\.h, #0xfeff ++** ret ++*/ ++TEST_UNIFORM_Z (bic_256_u16_x, svuint16_t, ++ z0 = svbic_n_u16_x (p0, z0, 256), ++ z0 = svbic_x (p0, z0, 256)) ++ ++/* ++** bic_257_u16_x: ++** and z0\.h, z0\.h, #0xfefe ++** ret ++*/ ++TEST_UNIFORM_Z (bic_257_u16_x, svuint16_t, ++ z0 = svbic_n_u16_x (p0, z0, 257), ++ z0 = svbic_x (p0, z0, 257)) ++ ++/* ++** bic_512_u16_x: ++** and z0\.h, z0\.h, #0xfdff ++** ret ++*/ ++TEST_UNIFORM_Z (bic_512_u16_x, svuint16_t, ++ z0 = svbic_n_u16_x (p0, z0, 512), ++ z0 = svbic_x (p0, z0, 512)) ++ ++/* ++** bic_65280_u16_x: ++** and z0\.h, z0\.h, #0xff ++** ret ++*/ ++TEST_UNIFORM_Z (bic_65280_u16_x, svuint16_t, ++ z0 = svbic_n_u16_x (p0, z0, 0xff00), ++ z0 = svbic_x (p0, z0, 0xff00)) ++ ++/* ++** bic_m127_u16_x: ++** and z0\.h, z0\.h, #0x7e ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m127_u16_x, svuint16_t, ++ z0 = svbic_n_u16_x (p0, z0, -127), ++ z0 = svbic_x (p0, z0, -127)) ++ ++/* ++** bic_m128_u16_x: ++** and z0\.h, z0\.h, #0x7f ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m128_u16_x, svuint16_t, ++ z0 = svbic_n_u16_x (p0, z0, -128), ++ z0 = svbic_x (p0, z0, -128)) ++ ++/* ++** bic_m255_u16_x: ++** and z0\.h, z0\.h, #0xfe ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m255_u16_x, svuint16_t, ++ z0 = svbic_n_u16_x (p0, z0, -255), ++ z0 = svbic_x (p0, z0, -255)) ++ ++/* ++** bic_m256_u16_x: ++** and z0\.h, z0\.h, #0xff ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m256_u16_x, svuint16_t, ++ z0 = svbic_n_u16_x (p0, z0, -256), ++ z0 = svbic_x (p0, z0, -256)) ++ ++/* ++** bic_m257_u16_x: ++** and z0\.h, z0\.h, #0x100 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m257_u16_x, svuint16_t, ++ z0 = svbic_n_u16_x (p0, z0, -257), ++ z0 = svbic_x (p0, z0, -257)) ++ ++/* ++** bic_m512_u16_x: ++** and z0\.h, z0\.h, #0x1ff ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m512_u16_x, svuint16_t, ++ z0 = svbic_n_u16_x (p0, z0, -512), ++ z0 = svbic_x (p0, z0, -512)) ++ ++/* ++** bic_m32768_u16_x: ++** and z0\.h, z0\.h, #0x7fff ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m32768_u16_x, svuint16_t, ++ z0 = svbic_n_u16_x (p0, z0, -0x8000), ++ z0 = svbic_x (p0, z0, -0x8000)) ++ ++/* ++** bic_5_u16_x: ++** mov (z[0-9]+)\.h, #-6 ++** and z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (bic_5_u16_x, svuint16_t, ++ z0 = svbic_n_u16_x (p0, z0, 5), ++ z0 = svbic_x (p0, z0, 5)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_u32.c +new file mode 100644 +index 000000000..b308b599b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_u32.c +@@ -0,0 +1,363 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** bic_u32_m_tied1: ++** bic z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (bic_u32_m_tied1, svuint32_t, ++ z0 = svbic_u32_m (p0, z0, z1), ++ z0 = svbic_m (p0, z0, z1)) ++ ++/* ++** bic_u32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** bic z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (bic_u32_m_tied2, svuint32_t, ++ z0 = svbic_u32_m (p0, z1, z0), ++ z0 = svbic_m (p0, z1, z0)) ++ ++/* ++** bic_u32_m_untied: ++** movprfx z0, z1 ++** bic z0\.s, p0/m, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (bic_u32_m_untied, svuint32_t, ++ z0 = svbic_u32_m (p0, z1, z2), ++ z0 = svbic_m (p0, z1, z2)) ++ ++/* ++** bic_w0_u32_m_tied1: ++** mov (z[0-9]+\.s), w0 ++** bic z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (bic_w0_u32_m_tied1, svuint32_t, uint32_t, ++ z0 = svbic_n_u32_m (p0, z0, x0), ++ z0 = svbic_m (p0, z0, x0)) ++ ++/* ++** bic_w0_u32_m_untied: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0, z1 ++** bic z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (bic_w0_u32_m_untied, svuint32_t, uint32_t, ++ z0 = svbic_n_u32_m (p0, z1, x0), ++ z0 = svbic_m (p0, z1, x0)) ++ ++/* ++** bic_1_u32_m_tied1: ++** mov (z[0-9]+\.s), #-2 ++** and z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_1_u32_m_tied1, svuint32_t, ++ z0 = svbic_n_u32_m (p0, z0, 1), ++ z0 = svbic_m (p0, z0, 1)) ++ ++/* ++** bic_1_u32_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.s), #-2 ++** movprfx z0, z1 ++** and z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_1_u32_m_untied, svuint32_t, ++ z0 = svbic_n_u32_m (p0, z1, 1), ++ z0 = svbic_m (p0, z1, 1)) ++ ++/* ++** bic_m2_u32_m: ++** mov (z[0-9]+\.s), #1 ++** and z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m2_u32_m, svuint32_t, ++ z0 = svbic_n_u32_m (p0, z0, -2), ++ z0 = svbic_m (p0, z0, -2)) ++ ++/* ++** bic_u32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** bic z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (bic_u32_z_tied1, svuint32_t, ++ z0 = svbic_u32_z (p0, z0, z1), ++ z0 = svbic_z (p0, z0, z1)) ++ ++/* ++** bic_u32_z_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, z1\.s ++** bic z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (bic_u32_z_tied2, svuint32_t, ++ z0 = svbic_u32_z (p0, z1, z0), ++ z0 = svbic_z (p0, z1, z0)) ++ ++/* ++** bic_u32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** bic z0\.s, p0/m, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (bic_u32_z_untied, svuint32_t, ++ z0 = svbic_u32_z (p0, z1, z2), ++ z0 = svbic_z (p0, z1, z2)) ++ ++/* ++** bic_w0_u32_z_tied1: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0\.s, p0/z, z0\.s ++** bic z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (bic_w0_u32_z_tied1, svuint32_t, uint32_t, ++ z0 = svbic_n_u32_z (p0, z0, x0), ++ z0 = svbic_z (p0, z0, x0)) ++ ++/* ++** bic_w0_u32_z_untied: { xfail *-*-* } ++** mov (z[0-9]+\.s), w0 ++** movprfx z0\.s, p0/z, z1\.s ++** bic z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (bic_w0_u32_z_untied, svuint32_t, uint32_t, ++ z0 = svbic_n_u32_z (p0, z1, x0), ++ z0 = svbic_z (p0, z1, x0)) ++ ++/* ++** bic_1_u32_z_tied1: ++** mov (z[0-9]+\.s), #-2 ++** movprfx z0\.s, p0/z, z0\.s ++** and z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_1_u32_z_tied1, svuint32_t, ++ z0 = svbic_n_u32_z (p0, z0, 1), ++ z0 = svbic_z (p0, z0, 1)) ++ ++/* ++** bic_1_u32_z_untied: ++** mov (z[0-9]+\.s), #-2 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** and z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** and z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (bic_1_u32_z_untied, svuint32_t, ++ z0 = svbic_n_u32_z (p0, z1, 1), ++ z0 = svbic_z (p0, z1, 1)) ++ ++/* ++** bic_u32_x_tied1: ++** bic z0\.d, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (bic_u32_x_tied1, svuint32_t, ++ z0 = svbic_u32_x (p0, z0, z1), ++ z0 = svbic_x (p0, z0, z1)) ++ ++/* ++** bic_u32_x_tied2: ++** bic z0\.d, z1\.d, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (bic_u32_x_tied2, svuint32_t, ++ z0 = svbic_u32_x (p0, z1, z0), ++ z0 = svbic_x (p0, z1, z0)) ++ ++/* ++** bic_u32_x_untied: ++** bic z0\.d, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (bic_u32_x_untied, svuint32_t, ++ z0 = svbic_u32_x (p0, z1, z2), ++ z0 = svbic_x (p0, z1, z2)) ++ ++/* ++** bic_w0_u32_x_tied1: ++** mov (z[0-9]+)\.s, w0 ++** bic z0\.d, z0\.d, \1\.d ++** ret ++*/ ++TEST_UNIFORM_ZX (bic_w0_u32_x_tied1, svuint32_t, uint32_t, ++ z0 = svbic_n_u32_x (p0, z0, x0), ++ z0 = svbic_x (p0, z0, x0)) ++ ++/* ++** bic_w0_u32_x_untied: ++** mov (z[0-9]+)\.s, w0 ++** bic z0\.d, z1\.d, \1\.d ++** ret ++*/ ++TEST_UNIFORM_ZX (bic_w0_u32_x_untied, svuint32_t, uint32_t, ++ z0 = svbic_n_u32_x (p0, z1, x0), ++ z0 = svbic_x (p0, z1, x0)) ++ ++/* ++** bic_1_u32_x_tied1: ++** and z0\.s, z0\.s, #0xfffffffe ++** ret ++*/ ++TEST_UNIFORM_Z (bic_1_u32_x_tied1, svuint32_t, ++ z0 = svbic_n_u32_x (p0, z0, 1), ++ z0 = svbic_x (p0, z0, 1)) ++ ++/* ++** bic_1_u32_x_untied: ++** movprfx z0, z1 ++** and z0\.s, z0\.s, #0xfffffffe ++** ret ++*/ ++TEST_UNIFORM_Z (bic_1_u32_x_untied, svuint32_t, ++ z0 = svbic_n_u32_x (p0, z1, 1), ++ z0 = svbic_x (p0, z1, 1)) ++ ++/* ++** bic_127_u32_x: ++** and z0\.s, z0\.s, #0xffffff80 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_127_u32_x, svuint32_t, ++ z0 = svbic_n_u32_x (p0, z0, 127), ++ z0 = svbic_x (p0, z0, 127)) ++ ++/* ++** bic_128_u32_x: ++** and z0\.s, z0\.s, #0xffffff7f ++** ret ++*/ ++TEST_UNIFORM_Z (bic_128_u32_x, svuint32_t, ++ z0 = svbic_n_u32_x (p0, z0, 128), ++ z0 = svbic_x (p0, z0, 128)) ++ ++/* ++** bic_255_u32_x: ++** and z0\.s, z0\.s, #0xffffff00 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_255_u32_x, svuint32_t, ++ z0 = svbic_n_u32_x (p0, z0, 255), ++ z0 = svbic_x (p0, z0, 255)) ++ ++/* ++** bic_256_u32_x: ++** and z0\.s, z0\.s, #0xfffffeff ++** ret ++*/ ++TEST_UNIFORM_Z (bic_256_u32_x, svuint32_t, ++ z0 = svbic_n_u32_x (p0, z0, 256), ++ z0 = svbic_x (p0, z0, 256)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (bic_257_u32_x, svuint32_t, ++ z0 = svbic_n_u32_x (p0, z0, 257), ++ z0 = svbic_x (p0, z0, 257)) ++ ++/* ++** bic_512_u32_x: ++** and z0\.s, z0\.s, #0xfffffdff ++** ret ++*/ ++TEST_UNIFORM_Z (bic_512_u32_x, svuint32_t, ++ z0 = svbic_n_u32_x (p0, z0, 512), ++ z0 = svbic_x (p0, z0, 512)) ++ ++/* ++** bic_65280_u32_x: ++** and z0\.s, z0\.s, #0xffff00ff ++** ret ++*/ ++TEST_UNIFORM_Z (bic_65280_u32_x, svuint32_t, ++ z0 = svbic_n_u32_x (p0, z0, 0xff00), ++ z0 = svbic_x (p0, z0, 0xff00)) ++ ++/* ++** bic_m127_u32_x: ++** and z0\.s, z0\.s, #0x7e ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m127_u32_x, svuint32_t, ++ z0 = svbic_n_u32_x (p0, z0, -127), ++ z0 = svbic_x (p0, z0, -127)) ++ ++/* ++** bic_m128_u32_x: ++** and z0\.s, z0\.s, #0x7f ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m128_u32_x, svuint32_t, ++ z0 = svbic_n_u32_x (p0, z0, -128), ++ z0 = svbic_x (p0, z0, -128)) ++ ++/* ++** bic_m255_u32_x: ++** and z0\.s, z0\.s, #0xfe ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m255_u32_x, svuint32_t, ++ z0 = svbic_n_u32_x (p0, z0, -255), ++ z0 = svbic_x (p0, z0, -255)) ++ ++/* ++** bic_m256_u32_x: ++** and z0\.s, z0\.s, #0xff ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m256_u32_x, svuint32_t, ++ z0 = svbic_n_u32_x (p0, z0, -256), ++ z0 = svbic_x (p0, z0, -256)) ++ ++/* ++** bic_m257_u32_x: ++** and z0\.s, z0\.s, #0x100 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m257_u32_x, svuint32_t, ++ z0 = svbic_n_u32_x (p0, z0, -257), ++ z0 = svbic_x (p0, z0, -257)) ++ ++/* ++** bic_m512_u32_x: ++** and z0\.s, z0\.s, #0x1ff ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m512_u32_x, svuint32_t, ++ z0 = svbic_n_u32_x (p0, z0, -512), ++ z0 = svbic_x (p0, z0, -512)) ++ ++/* ++** bic_m32768_u32_x: ++** and z0\.s, z0\.s, #0x7fff ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m32768_u32_x, svuint32_t, ++ z0 = svbic_n_u32_x (p0, z0, -0x8000), ++ z0 = svbic_x (p0, z0, -0x8000)) ++ ++/* ++** bic_5_u32_x: ++** mov (z[0-9]+)\.s, #-6 ++** and z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (bic_5_u32_x, svuint32_t, ++ z0 = svbic_n_u32_x (p0, z0, 5), ++ z0 = svbic_x (p0, z0, 5)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_u64.c +new file mode 100644 +index 000000000..e82db1e94 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_u64.c +@@ -0,0 +1,363 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** bic_u64_m_tied1: ++** bic z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (bic_u64_m_tied1, svuint64_t, ++ z0 = svbic_u64_m (p0, z0, z1), ++ z0 = svbic_m (p0, z0, z1)) ++ ++/* ++** bic_u64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** bic z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_u64_m_tied2, svuint64_t, ++ z0 = svbic_u64_m (p0, z1, z0), ++ z0 = svbic_m (p0, z1, z0)) ++ ++/* ++** bic_u64_m_untied: ++** movprfx z0, z1 ++** bic z0\.d, p0/m, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (bic_u64_m_untied, svuint64_t, ++ z0 = svbic_u64_m (p0, z1, z2), ++ z0 = svbic_m (p0, z1, z2)) ++ ++/* ++** bic_x0_u64_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** bic z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (bic_x0_u64_m_tied1, svuint64_t, uint64_t, ++ z0 = svbic_n_u64_m (p0, z0, x0), ++ z0 = svbic_m (p0, z0, x0)) ++ ++/* ++** bic_x0_u64_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** bic z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (bic_x0_u64_m_untied, svuint64_t, uint64_t, ++ z0 = svbic_n_u64_m (p0, z1, x0), ++ z0 = svbic_m (p0, z1, x0)) ++ ++/* ++** bic_1_u64_m_tied1: ++** mov (z[0-9]+\.d), #-2 ++** and z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_1_u64_m_tied1, svuint64_t, ++ z0 = svbic_n_u64_m (p0, z0, 1), ++ z0 = svbic_m (p0, z0, 1)) ++ ++/* ++** bic_1_u64_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), #-2 ++** movprfx z0, z1 ++** and z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_1_u64_m_untied, svuint64_t, ++ z0 = svbic_n_u64_m (p0, z1, 1), ++ z0 = svbic_m (p0, z1, 1)) ++ ++/* ++** bic_m2_u64_m: ++** mov (z[0-9]+\.d), #1 ++** and z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m2_u64_m, svuint64_t, ++ z0 = svbic_n_u64_m (p0, z0, -2), ++ z0 = svbic_m (p0, z0, -2)) ++ ++/* ++** bic_u64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** bic z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (bic_u64_z_tied1, svuint64_t, ++ z0 = svbic_u64_z (p0, z0, z1), ++ z0 = svbic_z (p0, z0, z1)) ++ ++/* ++** bic_u64_z_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, z1\.d ++** bic z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_u64_z_tied2, svuint64_t, ++ z0 = svbic_u64_z (p0, z1, z0), ++ z0 = svbic_z (p0, z1, z0)) ++ ++/* ++** bic_u64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** bic z0\.d, p0/m, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (bic_u64_z_untied, svuint64_t, ++ z0 = svbic_u64_z (p0, z1, z2), ++ z0 = svbic_z (p0, z1, z2)) ++ ++/* ++** bic_x0_u64_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.d, p0/z, z0\.d ++** bic z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (bic_x0_u64_z_tied1, svuint64_t, uint64_t, ++ z0 = svbic_n_u64_z (p0, z0, x0), ++ z0 = svbic_z (p0, z0, x0)) ++ ++/* ++** bic_x0_u64_z_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.d, p0/z, z1\.d ++** bic z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (bic_x0_u64_z_untied, svuint64_t, uint64_t, ++ z0 = svbic_n_u64_z (p0, z1, x0), ++ z0 = svbic_z (p0, z1, x0)) ++ ++/* ++** bic_1_u64_z_tied1: ++** mov (z[0-9]+\.d), #-2 ++** movprfx z0\.d, p0/z, z0\.d ++** and z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_1_u64_z_tied1, svuint64_t, ++ z0 = svbic_n_u64_z (p0, z0, 1), ++ z0 = svbic_z (p0, z0, 1)) ++ ++/* ++** bic_1_u64_z_untied: ++** mov (z[0-9]+\.d), #-2 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** and z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** and z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (bic_1_u64_z_untied, svuint64_t, ++ z0 = svbic_n_u64_z (p0, z1, 1), ++ z0 = svbic_z (p0, z1, 1)) ++ ++/* ++** bic_u64_x_tied1: ++** bic z0\.d, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (bic_u64_x_tied1, svuint64_t, ++ z0 = svbic_u64_x (p0, z0, z1), ++ z0 = svbic_x (p0, z0, z1)) ++ ++/* ++** bic_u64_x_tied2: ++** bic z0\.d, z1\.d, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (bic_u64_x_tied2, svuint64_t, ++ z0 = svbic_u64_x (p0, z1, z0), ++ z0 = svbic_x (p0, z1, z0)) ++ ++/* ++** bic_u64_x_untied: ++** bic z0\.d, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (bic_u64_x_untied, svuint64_t, ++ z0 = svbic_u64_x (p0, z1, z2), ++ z0 = svbic_x (p0, z1, z2)) ++ ++/* ++** bic_x0_u64_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** bic z0\.d, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (bic_x0_u64_x_tied1, svuint64_t, uint64_t, ++ z0 = svbic_n_u64_x (p0, z0, x0), ++ z0 = svbic_x (p0, z0, x0)) ++ ++/* ++** bic_x0_u64_x_untied: ++** mov (z[0-9]+\.d), x0 ++** bic z0\.d, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (bic_x0_u64_x_untied, svuint64_t, uint64_t, ++ z0 = svbic_n_u64_x (p0, z1, x0), ++ z0 = svbic_x (p0, z1, x0)) ++ ++/* ++** bic_1_u64_x_tied1: ++** and z0\.d, z0\.d, #0xfffffffffffffffe ++** ret ++*/ ++TEST_UNIFORM_Z (bic_1_u64_x_tied1, svuint64_t, ++ z0 = svbic_n_u64_x (p0, z0, 1), ++ z0 = svbic_x (p0, z0, 1)) ++ ++/* ++** bic_1_u64_x_untied: ++** movprfx z0, z1 ++** and z0\.d, z0\.d, #0xfffffffffffffffe ++** ret ++*/ ++TEST_UNIFORM_Z (bic_1_u64_x_untied, svuint64_t, ++ z0 = svbic_n_u64_x (p0, z1, 1), ++ z0 = svbic_x (p0, z1, 1)) ++ ++/* ++** bic_127_u64_x: ++** and z0\.d, z0\.d, #0xffffffffffffff80 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_127_u64_x, svuint64_t, ++ z0 = svbic_n_u64_x (p0, z0, 127), ++ z0 = svbic_x (p0, z0, 127)) ++ ++/* ++** bic_128_u64_x: ++** and z0\.d, z0\.d, #0xffffffffffffff7f ++** ret ++*/ ++TEST_UNIFORM_Z (bic_128_u64_x, svuint64_t, ++ z0 = svbic_n_u64_x (p0, z0, 128), ++ z0 = svbic_x (p0, z0, 128)) ++ ++/* ++** bic_255_u64_x: ++** and z0\.d, z0\.d, #0xffffffffffffff00 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_255_u64_x, svuint64_t, ++ z0 = svbic_n_u64_x (p0, z0, 255), ++ z0 = svbic_x (p0, z0, 255)) ++ ++/* ++** bic_256_u64_x: ++** and z0\.d, z0\.d, #0xfffffffffffffeff ++** ret ++*/ ++TEST_UNIFORM_Z (bic_256_u64_x, svuint64_t, ++ z0 = svbic_n_u64_x (p0, z0, 256), ++ z0 = svbic_x (p0, z0, 256)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (bic_257_u64_x, svuint64_t, ++ z0 = svbic_n_u64_x (p0, z0, 257), ++ z0 = svbic_x (p0, z0, 257)) ++ ++/* ++** bic_512_u64_x: ++** and z0\.d, z0\.d, #0xfffffffffffffdff ++** ret ++*/ ++TEST_UNIFORM_Z (bic_512_u64_x, svuint64_t, ++ z0 = svbic_n_u64_x (p0, z0, 512), ++ z0 = svbic_x (p0, z0, 512)) ++ ++/* ++** bic_65280_u64_x: ++** and z0\.d, z0\.d, #0xffffffffffff00ff ++** ret ++*/ ++TEST_UNIFORM_Z (bic_65280_u64_x, svuint64_t, ++ z0 = svbic_n_u64_x (p0, z0, 0xff00), ++ z0 = svbic_x (p0, z0, 0xff00)) ++ ++/* ++** bic_m127_u64_x: ++** and z0\.d, z0\.d, #0x7e ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m127_u64_x, svuint64_t, ++ z0 = svbic_n_u64_x (p0, z0, -127), ++ z0 = svbic_x (p0, z0, -127)) ++ ++/* ++** bic_m128_u64_x: ++** and z0\.d, z0\.d, #0x7f ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m128_u64_x, svuint64_t, ++ z0 = svbic_n_u64_x (p0, z0, -128), ++ z0 = svbic_x (p0, z0, -128)) ++ ++/* ++** bic_m255_u64_x: ++** and z0\.d, z0\.d, #0xfe ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m255_u64_x, svuint64_t, ++ z0 = svbic_n_u64_x (p0, z0, -255), ++ z0 = svbic_x (p0, z0, -255)) ++ ++/* ++** bic_m256_u64_x: ++** and z0\.d, z0\.d, #0xff ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m256_u64_x, svuint64_t, ++ z0 = svbic_n_u64_x (p0, z0, -256), ++ z0 = svbic_x (p0, z0, -256)) ++ ++/* ++** bic_m257_u64_x: ++** and z0\.d, z0\.d, #0x100 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m257_u64_x, svuint64_t, ++ z0 = svbic_n_u64_x (p0, z0, -257), ++ z0 = svbic_x (p0, z0, -257)) ++ ++/* ++** bic_m512_u64_x: ++** and z0\.d, z0\.d, #0x1ff ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m512_u64_x, svuint64_t, ++ z0 = svbic_n_u64_x (p0, z0, -512), ++ z0 = svbic_x (p0, z0, -512)) ++ ++/* ++** bic_m32768_u64_x: ++** and z0\.d, z0\.d, #0x7fff ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m32768_u64_x, svuint64_t, ++ z0 = svbic_n_u64_x (p0, z0, -0x8000), ++ z0 = svbic_x (p0, z0, -0x8000)) ++ ++/* ++** bic_5_u64_x: ++** mov (z[0-9]+\.d), #-6 ++** and z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (bic_5_u64_x, svuint64_t, ++ z0 = svbic_n_u64_x (p0, z0, 5), ++ z0 = svbic_x (p0, z0, 5)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_u8.c +new file mode 100644 +index 000000000..80c489b9c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_u8.c +@@ -0,0 +1,286 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** bic_u8_m_tied1: ++** bic z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (bic_u8_m_tied1, svuint8_t, ++ z0 = svbic_u8_m (p0, z0, z1), ++ z0 = svbic_m (p0, z0, z1)) ++ ++/* ++** bic_u8_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** bic z0\.b, p0/m, z0\.b, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (bic_u8_m_tied2, svuint8_t, ++ z0 = svbic_u8_m (p0, z1, z0), ++ z0 = svbic_m (p0, z1, z0)) ++ ++/* ++** bic_u8_m_untied: ++** movprfx z0, z1 ++** bic z0\.b, p0/m, z0\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (bic_u8_m_untied, svuint8_t, ++ z0 = svbic_u8_m (p0, z1, z2), ++ z0 = svbic_m (p0, z1, z2)) ++ ++/* ++** bic_w0_u8_m_tied1: ++** mov (z[0-9]+\.b), w0 ++** bic z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (bic_w0_u8_m_tied1, svuint8_t, uint8_t, ++ z0 = svbic_n_u8_m (p0, z0, x0), ++ z0 = svbic_m (p0, z0, x0)) ++ ++/* ++** bic_w0_u8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), w0 ++** movprfx z0, z1 ++** bic z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (bic_w0_u8_m_untied, svuint8_t, uint8_t, ++ z0 = svbic_n_u8_m (p0, z1, x0), ++ z0 = svbic_m (p0, z1, x0)) ++ ++/* ++** bic_1_u8_m_tied1: ++** mov (z[0-9]+\.b), #-2 ++** and z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_1_u8_m_tied1, svuint8_t, ++ z0 = svbic_n_u8_m (p0, z0, 1), ++ z0 = svbic_m (p0, z0, 1)) ++ ++/* ++** bic_1_u8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), #-2 ++** movprfx z0, z1 ++** and z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_1_u8_m_untied, svuint8_t, ++ z0 = svbic_n_u8_m (p0, z1, 1), ++ z0 = svbic_m (p0, z1, 1)) ++ ++/* ++** bic_m2_u8_m: ++** mov (z[0-9]+\.b), #1 ++** and z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m2_u8_m, svuint8_t, ++ z0 = svbic_n_u8_m (p0, z0, -2), ++ z0 = svbic_m (p0, z0, -2)) ++ ++/* ++** bic_u8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** bic z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (bic_u8_z_tied1, svuint8_t, ++ z0 = svbic_u8_z (p0, z0, z1), ++ z0 = svbic_z (p0, z0, z1)) ++ ++/* ++** bic_u8_z_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.b, p0/z, z1\.b ++** bic z0\.b, p0/m, z0\.b, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (bic_u8_z_tied2, svuint8_t, ++ z0 = svbic_u8_z (p0, z1, z0), ++ z0 = svbic_z (p0, z1, z0)) ++ ++/* ++** bic_u8_z_untied: ++** movprfx z0\.b, p0/z, z1\.b ++** bic z0\.b, p0/m, z0\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (bic_u8_z_untied, svuint8_t, ++ z0 = svbic_u8_z (p0, z1, z2), ++ z0 = svbic_z (p0, z1, z2)) ++ ++/* ++** bic_w0_u8_z_tied1: ++** mov (z[0-9]+\.b), w0 ++** movprfx z0\.b, p0/z, z0\.b ++** bic z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (bic_w0_u8_z_tied1, svuint8_t, uint8_t, ++ z0 = svbic_n_u8_z (p0, z0, x0), ++ z0 = svbic_z (p0, z0, x0)) ++ ++/* ++** bic_w0_u8_z_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), w0 ++** movprfx z0\.b, p0/z, z1\.b ++** bic z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (bic_w0_u8_z_untied, svuint8_t, uint8_t, ++ z0 = svbic_n_u8_z (p0, z1, x0), ++ z0 = svbic_z (p0, z1, x0)) ++ ++/* ++** bic_1_u8_z_tied1: ++** mov (z[0-9]+\.b), #-2 ++** movprfx z0\.b, p0/z, z0\.b ++** and z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_1_u8_z_tied1, svuint8_t, ++ z0 = svbic_n_u8_z (p0, z0, 1), ++ z0 = svbic_z (p0, z0, 1)) ++ ++/* ++** bic_1_u8_z_untied: ++** mov (z[0-9]+\.b), #-2 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** and z0\.b, p0/m, z0\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** and z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (bic_1_u8_z_untied, svuint8_t, ++ z0 = svbic_n_u8_z (p0, z1, 1), ++ z0 = svbic_z (p0, z1, 1)) ++ ++/* ++** bic_u8_x_tied1: ++** bic z0\.d, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (bic_u8_x_tied1, svuint8_t, ++ z0 = svbic_u8_x (p0, z0, z1), ++ z0 = svbic_x (p0, z0, z1)) ++ ++/* ++** bic_u8_x_tied2: ++** bic z0\.d, z1\.d, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (bic_u8_x_tied2, svuint8_t, ++ z0 = svbic_u8_x (p0, z1, z0), ++ z0 = svbic_x (p0, z1, z0)) ++ ++/* ++** bic_u8_x_untied: ++** bic z0\.d, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (bic_u8_x_untied, svuint8_t, ++ z0 = svbic_u8_x (p0, z1, z2), ++ z0 = svbic_x (p0, z1, z2)) ++ ++/* ++** bic_w0_u8_x_tied1: ++** mov (z[0-9]+)\.b, w0 ++** bic z0\.d, z0\.d, \1\.d ++** ret ++*/ ++TEST_UNIFORM_ZX (bic_w0_u8_x_tied1, svuint8_t, uint8_t, ++ z0 = svbic_n_u8_x (p0, z0, x0), ++ z0 = svbic_x (p0, z0, x0)) ++ ++/* ++** bic_w0_u8_x_untied: ++** mov (z[0-9]+)\.b, w0 ++** bic z0\.d, z1\.d, \1\.d ++** ret ++*/ ++TEST_UNIFORM_ZX (bic_w0_u8_x_untied, svuint8_t, uint8_t, ++ z0 = svbic_n_u8_x (p0, z1, x0), ++ z0 = svbic_x (p0, z1, x0)) ++ ++/* ++** bic_1_u8_x_tied1: ++** and z0\.b, z0\.b, #0xfe ++** ret ++*/ ++TEST_UNIFORM_Z (bic_1_u8_x_tied1, svuint8_t, ++ z0 = svbic_n_u8_x (p0, z0, 1), ++ z0 = svbic_x (p0, z0, 1)) ++ ++/* ++** bic_1_u8_x_untied: ++** movprfx z0, z1 ++** and z0\.b, z0\.b, #0xfe ++** ret ++*/ ++TEST_UNIFORM_Z (bic_1_u8_x_untied, svuint8_t, ++ z0 = svbic_n_u8_x (p0, z1, 1), ++ z0 = svbic_x (p0, z1, 1)) ++ ++/* ++** bic_127_u8_x: ++** and z0\.b, z0\.b, #0x80 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_127_u8_x, svuint8_t, ++ z0 = svbic_n_u8_x (p0, z0, 127), ++ z0 = svbic_x (p0, z0, 127)) ++ ++/* ++** bic_128_u8_x: ++** and z0\.b, z0\.b, #0x7f ++** ret ++*/ ++TEST_UNIFORM_Z (bic_128_u8_x, svuint8_t, ++ z0 = svbic_n_u8_x (p0, z0, 128), ++ z0 = svbic_x (p0, z0, 128)) ++ ++/* ++** bic_255_u8_x: ++** mov z0\.b, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (bic_255_u8_x, svuint8_t, ++ z0 = svbic_n_u8_x (p0, z0, 255), ++ z0 = svbic_x (p0, z0, 255)) ++ ++/* ++** bic_m127_u8_x: ++** and z0\.b, z0\.b, #0x7e ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m127_u8_x, svuint8_t, ++ z0 = svbic_n_u8_x (p0, z0, -127), ++ z0 = svbic_x (p0, z0, -127)) ++ ++/* ++** bic_m128_u8_x: ++** and z0\.b, z0\.b, #0x7f ++** ret ++*/ ++TEST_UNIFORM_Z (bic_m128_u8_x, svuint8_t, ++ z0 = svbic_n_u8_x (p0, z0, -128), ++ z0 = svbic_x (p0, z0, -128)) ++ ++/* ++** bic_5_u8_x: ++** mov (z[0-9]+)\.b, #-6 ++** and z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (bic_5_u8_x, svuint8_t, ++ z0 = svbic_n_u8_x (p0, z0, 5), ++ z0 = svbic_x (p0, z0, 5)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brka_b.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brka_b.c +new file mode 100644 +index 000000000..63426cf94 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brka_b.c +@@ -0,0 +1,54 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** brka_b_m_tied12: ++** brka p0\.b, p3/m, p0\.b ++** ret ++*/ ++TEST_UNIFORM_P (brka_b_m_tied12, ++ p0 = svbrka_b_m (p0, p3, p0), ++ p0 = svbrka_m (p0, p3, p0)) ++ ++/* ++** brka_b_m_tied1: ++** brka p0\.b, p3/m, p1\.b ++** ret ++*/ ++TEST_UNIFORM_P (brka_b_m_tied1, ++ p0 = svbrka_b_m (p0, p3, p1), ++ p0 = svbrka_m (p0, p3, p1)) ++ ++/* Bad RA choice: no preferred output sequence. */ ++TEST_UNIFORM_P (brka_b_m_tied2, ++ p0 = svbrka_b_m (p1, p3, p0), ++ p0 = svbrka_m (p1, p3, p0)) ++ ++/* ++** brka_b_m_untied: ++** mov p0\.b, p2\.b ++** brka p0\.b, p3/m, p1\.b ++** ret ++*/ ++TEST_UNIFORM_P (brka_b_m_untied, ++ p0 = svbrka_b_m (p2, p3, p1), ++ p0 = svbrka_m (p2, p3, p1)) ++ ++/* ++** brka_b_z_tied1: ++** brka p0\.b, p3/z, p0\.b ++** ret ++*/ ++TEST_UNIFORM_P (brka_b_z_tied1, ++ p0 = svbrka_b_z (p3, p0), ++ p0 = svbrka_z (p3, p0)) ++ ++/* ++** brka_b_z_untied: ++** brka p0\.b, p3/z, p1\.b ++** ret ++*/ ++TEST_UNIFORM_P (brka_b_z_untied, ++ p0 = svbrka_b_z (p3, p1), ++ p0 = svbrka_z (p3, p1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brkb_b.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brkb_b.c +new file mode 100644 +index 000000000..4f9a2c2d7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brkb_b.c +@@ -0,0 +1,54 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** brkb_b_m_tied12: ++** brkb p0\.b, p3/m, p0\.b ++** ret ++*/ ++TEST_UNIFORM_P (brkb_b_m_tied12, ++ p0 = svbrkb_b_m (p0, p3, p0), ++ p0 = svbrkb_m (p0, p3, p0)) ++ ++/* ++** brkb_b_m_tied1: ++** brkb p0\.b, p3/m, p1\.b ++** ret ++*/ ++TEST_UNIFORM_P (brkb_b_m_tied1, ++ p0 = svbrkb_b_m (p0, p3, p1), ++ p0 = svbrkb_m (p0, p3, p1)) ++ ++/* Bad RA choice: no preferred output sequence. */ ++TEST_UNIFORM_P (brkb_b_m_tied2, ++ p0 = svbrkb_b_m (p1, p3, p0), ++ p0 = svbrkb_m (p1, p3, p0)) ++ ++/* ++** brkb_b_m_untied: ++** mov p0\.b, p2\.b ++** brkb p0\.b, p3/m, p1\.b ++** ret ++*/ ++TEST_UNIFORM_P (brkb_b_m_untied, ++ p0 = svbrkb_b_m (p2, p3, p1), ++ p0 = svbrkb_m (p2, p3, p1)) ++ ++/* ++** brkb_b_z_tied1: ++** brkb p0\.b, p3/z, p0\.b ++** ret ++*/ ++TEST_UNIFORM_P (brkb_b_z_tied1, ++ p0 = svbrkb_b_z (p3, p0), ++ p0 = svbrkb_z (p3, p0)) ++ ++/* ++** brkb_b_z_untied: ++** brkb p0\.b, p3/z, p1\.b ++** ret ++*/ ++TEST_UNIFORM_P (brkb_b_z_untied, ++ p0 = svbrkb_b_z (p3, p1), ++ p0 = svbrkb_z (p3, p1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brkn_b.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brkn_b.c +new file mode 100644 +index 000000000..229a5fff9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brkn_b.c +@@ -0,0 +1,27 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* Bad RA choice: no preferred output sequence. */ ++TEST_UNIFORM_P (brkn_b_z_tied1, ++ p0 = svbrkn_b_z (p3, p0, p1), ++ p0 = svbrkn_z (p3, p0, p1)) ++ ++/* ++** brkn_b_z_tied2: ++** brkn p0\.b, p3/z, p1\.b, p0\.b ++** ret ++*/ ++TEST_UNIFORM_P (brkn_b_z_tied2, ++ p0 = svbrkn_b_z (p3, p1, p0), ++ p0 = svbrkn_z (p3, p1, p0)) ++ ++/* ++** brkn_b_z_untied: ++** mov p0\.b, p2\.b ++** brkn p0\.b, p3/z, p1\.b, p0\.b ++** ret ++*/ ++TEST_UNIFORM_P (brkn_b_z_untied, ++ p0 = svbrkn_b_z (p3, p1, p2), ++ p0 = svbrkn_z (p3, p1, p2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brkpa_b.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brkpa_b.c +new file mode 100644 +index 000000000..2c074e389 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brkpa_b.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** brkpa_b_z_tied1: ++** brkpa p0\.b, p3/z, p0\.b, p1\.b ++** ret ++*/ ++TEST_UNIFORM_P (brkpa_b_z_tied1, ++ p0 = svbrkpa_b_z (p3, p0, p1), ++ p0 = svbrkpa_z (p3, p0, p1)) ++ ++/* ++** brkpa_b_z_tied2: ++** brkpa p0\.b, p3/z, p1\.b, p0\.b ++** ret ++*/ ++TEST_UNIFORM_P (brkpa_b_z_tied2, ++ p0 = svbrkpa_b_z (p3, p1, p0), ++ p0 = svbrkpa_z (p3, p1, p0)) ++ ++/* ++** brkpa_b_z_untied: ++** brkpa p0\.b, p3/z, p1\.b, p2\.b ++** ret ++*/ ++TEST_UNIFORM_P (brkpa_b_z_untied, ++ p0 = svbrkpa_b_z (p3, p1, p2), ++ p0 = svbrkpa_z (p3, p1, p2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brkpb_b.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brkpb_b.c +new file mode 100644 +index 000000000..b41797ee1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brkpb_b.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** brkpb_b_z_tied1: ++** brkpb p0\.b, p3/z, p0\.b, p1\.b ++** ret ++*/ ++TEST_UNIFORM_P (brkpb_b_z_tied1, ++ p0 = svbrkpb_b_z (p3, p0, p1), ++ p0 = svbrkpb_z (p3, p0, p1)) ++ ++/* ++** brkpb_b_z_tied2: ++** brkpb p0\.b, p3/z, p1\.b, p0\.b ++** ret ++*/ ++TEST_UNIFORM_P (brkpb_b_z_tied2, ++ p0 = svbrkpb_b_z (p3, p1, p0), ++ p0 = svbrkpb_z (p3, p1, p0)) ++ ++/* ++** brkpb_b_z_untied: ++** brkpb p0\.b, p3/z, p1\.b, p2\.b ++** ret ++*/ ++TEST_UNIFORM_P (brkpb_b_z_untied, ++ p0 = svbrkpb_b_z (p3, p1, p2), ++ p0 = svbrkpb_z (p3, p1, p2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cadd_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cadd_f16.c +new file mode 100644 +index 000000000..e89c78455 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cadd_f16.c +@@ -0,0 +1,251 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cadd_90_f16_m_tied1: ++** fcadd z0\.h, p0/m, z0\.h, z1\.h, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_90_f16_m_tied1, svfloat16_t, ++ z0 = svcadd_f16_m (p0, z0, z1, 90), ++ z0 = svcadd_m (p0, z0, z1, 90)) ++ ++/* ++** cadd_90_f16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcadd z0\.h, p0/m, z0\.h, \1\.h, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_90_f16_m_tied2, svfloat16_t, ++ z0 = svcadd_f16_m (p0, z1, z0, 90), ++ z0 = svcadd_m (p0, z1, z0, 90)) ++ ++/* ++** cadd_90_f16_m_untied: ++** movprfx z0, z1 ++** fcadd z0\.h, p0/m, z0\.h, z2\.h, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_90_f16_m_untied, svfloat16_t, ++ z0 = svcadd_f16_m (p0, z1, z2, 90), ++ z0 = svcadd_m (p0, z1, z2, 90)) ++ ++/* ++** cadd_270_f16_m_tied1: ++** fcadd z0\.h, p0/m, z0\.h, z1\.h, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_270_f16_m_tied1, svfloat16_t, ++ z0 = svcadd_f16_m (p0, z0, z1, 270), ++ z0 = svcadd_m (p0, z0, z1, 270)) ++ ++/* ++** cadd_270_f16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcadd z0\.h, p0/m, z0\.h, \1\.h, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_270_f16_m_tied2, svfloat16_t, ++ z0 = svcadd_f16_m (p0, z1, z0, 270), ++ z0 = svcadd_m (p0, z1, z0, 270)) ++ ++/* ++** cadd_270_f16_m_untied: ++** movprfx z0, z1 ++** fcadd z0\.h, p0/m, z0\.h, z2\.h, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_270_f16_m_untied, svfloat16_t, ++ z0 = svcadd_f16_m (p0, z1, z2, 270), ++ z0 = svcadd_m (p0, z1, z2, 270)) ++ ++/* ++** cadd_90_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fcadd z0\.h, p0/m, z0\.h, z1\.h, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_90_f16_z_tied1, svfloat16_t, ++ z0 = svcadd_f16_z (p0, z0, z1, 90), ++ z0 = svcadd_z (p0, z0, z1, 90)) ++ ++/* ++** cadd_90_f16_z_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.h, p0/z, z1\.h ++** fcadd z0\.h, p0/m, z0\.h, \1\.h, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_90_f16_z_tied2, svfloat16_t, ++ z0 = svcadd_f16_z (p0, z1, z0, 90), ++ z0 = svcadd_z (p0, z1, z0, 90)) ++ ++/* ++** cadd_90_f16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** fcadd z0\.h, p0/m, z0\.h, z2\.h, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_90_f16_z_untied, svfloat16_t, ++ z0 = svcadd_f16_z (p0, z1, z2, 90), ++ z0 = svcadd_z (p0, z1, z2, 90)) ++ ++/* ++** cadd_270_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fcadd z0\.h, p0/m, z0\.h, z1\.h, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_270_f16_z_tied1, svfloat16_t, ++ z0 = svcadd_f16_z (p0, z0, z1, 270), ++ z0 = svcadd_z (p0, z0, z1, 270)) ++ ++/* ++** cadd_270_f16_z_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.h, p0/z, z1\.h ++** fcadd z0\.h, p0/m, z0\.h, \1\.h, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_270_f16_z_tied2, svfloat16_t, ++ z0 = svcadd_f16_z (p0, z1, z0, 270), ++ z0 = svcadd_z (p0, z1, z0, 270)) ++ ++/* ++** cadd_270_f16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** fcadd z0\.h, p0/m, z0\.h, z2\.h, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_270_f16_z_untied, svfloat16_t, ++ z0 = svcadd_f16_z (p0, z1, z2, 270), ++ z0 = svcadd_z (p0, z1, z2, 270)) ++ ++/* ++** cadd_90_f16_x_tied1: ++** fcadd z0\.h, p0/m, z0\.h, z1\.h, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_90_f16_x_tied1, svfloat16_t, ++ z0 = svcadd_f16_x (p0, z0, z1, 90), ++ z0 = svcadd_x (p0, z0, z1, 90)) ++ ++/* ++** cadd_90_f16_x_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcadd z0\.h, p0/m, z0\.h, \1\.h, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_90_f16_x_tied2, svfloat16_t, ++ z0 = svcadd_f16_x (p0, z1, z0, 90), ++ z0 = svcadd_x (p0, z1, z0, 90)) ++ ++/* ++** cadd_90_f16_x_untied: ++** movprfx z0, z1 ++** fcadd z0\.h, p0/m, z0\.h, z2\.h, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_90_f16_x_untied, svfloat16_t, ++ z0 = svcadd_f16_x (p0, z1, z2, 90), ++ z0 = svcadd_x (p0, z1, z2, 90)) ++ ++/* ++** cadd_270_f16_x_tied1: ++** fcadd z0\.h, p0/m, z0\.h, z1\.h, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_270_f16_x_tied1, svfloat16_t, ++ z0 = svcadd_f16_x (p0, z0, z1, 270), ++ z0 = svcadd_x (p0, z0, z1, 270)) ++ ++/* ++** cadd_270_f16_x_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcadd z0\.h, p0/m, z0\.h, \1\.h, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_270_f16_x_tied2, svfloat16_t, ++ z0 = svcadd_f16_x (p0, z1, z0, 270), ++ z0 = svcadd_x (p0, z1, z0, 270)) ++ ++/* ++** cadd_270_f16_x_untied: ++** movprfx z0, z1 ++** fcadd z0\.h, p0/m, z0\.h, z2\.h, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_270_f16_x_untied, svfloat16_t, ++ z0 = svcadd_f16_x (p0, z1, z2, 270), ++ z0 = svcadd_x (p0, z1, z2, 270)) ++ ++/* ++** ptrue_cadd_90_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cadd_90_f16_x_tied1, svfloat16_t, ++ z0 = svcadd_f16_x (svptrue_b16 (), z0, z1, 90), ++ z0 = svcadd_x (svptrue_b16 (), z0, z1, 90)) ++ ++/* ++** ptrue_cadd_90_f16_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cadd_90_f16_x_tied2, svfloat16_t, ++ z0 = svcadd_f16_x (svptrue_b16 (), z1, z0, 90), ++ z0 = svcadd_x (svptrue_b16 (), z1, z0, 90)) ++ ++/* ++** ptrue_cadd_90_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cadd_90_f16_x_untied, svfloat16_t, ++ z0 = svcadd_f16_x (svptrue_b16 (), z1, z2, 90), ++ z0 = svcadd_x (svptrue_b16 (), z1, z2, 90)) ++ ++/* ++** ptrue_cadd_270_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cadd_270_f16_x_tied1, svfloat16_t, ++ z0 = svcadd_f16_x (svptrue_b16 (), z0, z1, 270), ++ z0 = svcadd_x (svptrue_b16 (), z0, z1, 270)) ++ ++/* ++** ptrue_cadd_270_f16_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cadd_270_f16_x_tied2, svfloat16_t, ++ z0 = svcadd_f16_x (svptrue_b16 (), z1, z0, 270), ++ z0 = svcadd_x (svptrue_b16 (), z1, z0, 270)) ++ ++/* ++** ptrue_cadd_270_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cadd_270_f16_x_untied, svfloat16_t, ++ z0 = svcadd_f16_x (svptrue_b16 (), z1, z2, 270), ++ z0 = svcadd_x (svptrue_b16 (), z1, z2, 270)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cadd_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cadd_f32.c +new file mode 100644 +index 000000000..ed5c16ff3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cadd_f32.c +@@ -0,0 +1,251 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cadd_90_f32_m_tied1: ++** fcadd z0\.s, p0/m, z0\.s, z1\.s, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_90_f32_m_tied1, svfloat32_t, ++ z0 = svcadd_f32_m (p0, z0, z1, 90), ++ z0 = svcadd_m (p0, z0, z1, 90)) ++ ++/* ++** cadd_90_f32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcadd z0\.s, p0/m, z0\.s, \1\.s, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_90_f32_m_tied2, svfloat32_t, ++ z0 = svcadd_f32_m (p0, z1, z0, 90), ++ z0 = svcadd_m (p0, z1, z0, 90)) ++ ++/* ++** cadd_90_f32_m_untied: ++** movprfx z0, z1 ++** fcadd z0\.s, p0/m, z0\.s, z2\.s, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_90_f32_m_untied, svfloat32_t, ++ z0 = svcadd_f32_m (p0, z1, z2, 90), ++ z0 = svcadd_m (p0, z1, z2, 90)) ++ ++/* ++** cadd_270_f32_m_tied1: ++** fcadd z0\.s, p0/m, z0\.s, z1\.s, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_270_f32_m_tied1, svfloat32_t, ++ z0 = svcadd_f32_m (p0, z0, z1, 270), ++ z0 = svcadd_m (p0, z0, z1, 270)) ++ ++/* ++** cadd_270_f32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcadd z0\.s, p0/m, z0\.s, \1\.s, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_270_f32_m_tied2, svfloat32_t, ++ z0 = svcadd_f32_m (p0, z1, z0, 270), ++ z0 = svcadd_m (p0, z1, z0, 270)) ++ ++/* ++** cadd_270_f32_m_untied: ++** movprfx z0, z1 ++** fcadd z0\.s, p0/m, z0\.s, z2\.s, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_270_f32_m_untied, svfloat32_t, ++ z0 = svcadd_f32_m (p0, z1, z2, 270), ++ z0 = svcadd_m (p0, z1, z2, 270)) ++ ++/* ++** cadd_90_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fcadd z0\.s, p0/m, z0\.s, z1\.s, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_90_f32_z_tied1, svfloat32_t, ++ z0 = svcadd_f32_z (p0, z0, z1, 90), ++ z0 = svcadd_z (p0, z0, z1, 90)) ++ ++/* ++** cadd_90_f32_z_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, z1\.s ++** fcadd z0\.s, p0/m, z0\.s, \1\.s, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_90_f32_z_tied2, svfloat32_t, ++ z0 = svcadd_f32_z (p0, z1, z0, 90), ++ z0 = svcadd_z (p0, z1, z0, 90)) ++ ++/* ++** cadd_90_f32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** fcadd z0\.s, p0/m, z0\.s, z2\.s, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_90_f32_z_untied, svfloat32_t, ++ z0 = svcadd_f32_z (p0, z1, z2, 90), ++ z0 = svcadd_z (p0, z1, z2, 90)) ++ ++/* ++** cadd_270_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fcadd z0\.s, p0/m, z0\.s, z1\.s, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_270_f32_z_tied1, svfloat32_t, ++ z0 = svcadd_f32_z (p0, z0, z1, 270), ++ z0 = svcadd_z (p0, z0, z1, 270)) ++ ++/* ++** cadd_270_f32_z_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, z1\.s ++** fcadd z0\.s, p0/m, z0\.s, \1\.s, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_270_f32_z_tied2, svfloat32_t, ++ z0 = svcadd_f32_z (p0, z1, z0, 270), ++ z0 = svcadd_z (p0, z1, z0, 270)) ++ ++/* ++** cadd_270_f32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** fcadd z0\.s, p0/m, z0\.s, z2\.s, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_270_f32_z_untied, svfloat32_t, ++ z0 = svcadd_f32_z (p0, z1, z2, 270), ++ z0 = svcadd_z (p0, z1, z2, 270)) ++ ++/* ++** cadd_90_f32_x_tied1: ++** fcadd z0\.s, p0/m, z0\.s, z1\.s, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_90_f32_x_tied1, svfloat32_t, ++ z0 = svcadd_f32_x (p0, z0, z1, 90), ++ z0 = svcadd_x (p0, z0, z1, 90)) ++ ++/* ++** cadd_90_f32_x_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcadd z0\.s, p0/m, z0\.s, \1\.s, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_90_f32_x_tied2, svfloat32_t, ++ z0 = svcadd_f32_x (p0, z1, z0, 90), ++ z0 = svcadd_x (p0, z1, z0, 90)) ++ ++/* ++** cadd_90_f32_x_untied: ++** movprfx z0, z1 ++** fcadd z0\.s, p0/m, z0\.s, z2\.s, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_90_f32_x_untied, svfloat32_t, ++ z0 = svcadd_f32_x (p0, z1, z2, 90), ++ z0 = svcadd_x (p0, z1, z2, 90)) ++ ++/* ++** cadd_270_f32_x_tied1: ++** fcadd z0\.s, p0/m, z0\.s, z1\.s, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_270_f32_x_tied1, svfloat32_t, ++ z0 = svcadd_f32_x (p0, z0, z1, 270), ++ z0 = svcadd_x (p0, z0, z1, 270)) ++ ++/* ++** cadd_270_f32_x_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcadd z0\.s, p0/m, z0\.s, \1\.s, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_270_f32_x_tied2, svfloat32_t, ++ z0 = svcadd_f32_x (p0, z1, z0, 270), ++ z0 = svcadd_x (p0, z1, z0, 270)) ++ ++/* ++** cadd_270_f32_x_untied: ++** movprfx z0, z1 ++** fcadd z0\.s, p0/m, z0\.s, z2\.s, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_270_f32_x_untied, svfloat32_t, ++ z0 = svcadd_f32_x (p0, z1, z2, 270), ++ z0 = svcadd_x (p0, z1, z2, 270)) ++ ++/* ++** ptrue_cadd_90_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cadd_90_f32_x_tied1, svfloat32_t, ++ z0 = svcadd_f32_x (svptrue_b32 (), z0, z1, 90), ++ z0 = svcadd_x (svptrue_b32 (), z0, z1, 90)) ++ ++/* ++** ptrue_cadd_90_f32_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cadd_90_f32_x_tied2, svfloat32_t, ++ z0 = svcadd_f32_x (svptrue_b32 (), z1, z0, 90), ++ z0 = svcadd_x (svptrue_b32 (), z1, z0, 90)) ++ ++/* ++** ptrue_cadd_90_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cadd_90_f32_x_untied, svfloat32_t, ++ z0 = svcadd_f32_x (svptrue_b32 (), z1, z2, 90), ++ z0 = svcadd_x (svptrue_b32 (), z1, z2, 90)) ++ ++/* ++** ptrue_cadd_270_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cadd_270_f32_x_tied1, svfloat32_t, ++ z0 = svcadd_f32_x (svptrue_b32 (), z0, z1, 270), ++ z0 = svcadd_x (svptrue_b32 (), z0, z1, 270)) ++ ++/* ++** ptrue_cadd_270_f32_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cadd_270_f32_x_tied2, svfloat32_t, ++ z0 = svcadd_f32_x (svptrue_b32 (), z1, z0, 270), ++ z0 = svcadd_x (svptrue_b32 (), z1, z0, 270)) ++ ++/* ++** ptrue_cadd_270_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cadd_270_f32_x_untied, svfloat32_t, ++ z0 = svcadd_f32_x (svptrue_b32 (), z1, z2, 270), ++ z0 = svcadd_x (svptrue_b32 (), z1, z2, 270)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cadd_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cadd_f64.c +new file mode 100644 +index 000000000..0ada881c5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cadd_f64.c +@@ -0,0 +1,251 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cadd_90_f64_m_tied1: ++** fcadd z0\.d, p0/m, z0\.d, z1\.d, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_90_f64_m_tied1, svfloat64_t, ++ z0 = svcadd_f64_m (p0, z0, z1, 90), ++ z0 = svcadd_m (p0, z0, z1, 90)) ++ ++/* ++** cadd_90_f64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fcadd z0\.d, p0/m, z0\.d, \1, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_90_f64_m_tied2, svfloat64_t, ++ z0 = svcadd_f64_m (p0, z1, z0, 90), ++ z0 = svcadd_m (p0, z1, z0, 90)) ++ ++/* ++** cadd_90_f64_m_untied: ++** movprfx z0, z1 ++** fcadd z0\.d, p0/m, z0\.d, z2\.d, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_90_f64_m_untied, svfloat64_t, ++ z0 = svcadd_f64_m (p0, z1, z2, 90), ++ z0 = svcadd_m (p0, z1, z2, 90)) ++ ++/* ++** cadd_270_f64_m_tied1: ++** fcadd z0\.d, p0/m, z0\.d, z1\.d, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_270_f64_m_tied1, svfloat64_t, ++ z0 = svcadd_f64_m (p0, z0, z1, 270), ++ z0 = svcadd_m (p0, z0, z1, 270)) ++ ++/* ++** cadd_270_f64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fcadd z0\.d, p0/m, z0\.d, \1, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_270_f64_m_tied2, svfloat64_t, ++ z0 = svcadd_f64_m (p0, z1, z0, 270), ++ z0 = svcadd_m (p0, z1, z0, 270)) ++ ++/* ++** cadd_270_f64_m_untied: ++** movprfx z0, z1 ++** fcadd z0\.d, p0/m, z0\.d, z2\.d, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_270_f64_m_untied, svfloat64_t, ++ z0 = svcadd_f64_m (p0, z1, z2, 270), ++ z0 = svcadd_m (p0, z1, z2, 270)) ++ ++/* ++** cadd_90_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fcadd z0\.d, p0/m, z0\.d, z1\.d, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_90_f64_z_tied1, svfloat64_t, ++ z0 = svcadd_f64_z (p0, z0, z1, 90), ++ z0 = svcadd_z (p0, z0, z1, 90)) ++ ++/* ++** cadd_90_f64_z_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, z1\.d ++** fcadd z0\.d, p0/m, z0\.d, \1, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_90_f64_z_tied2, svfloat64_t, ++ z0 = svcadd_f64_z (p0, z1, z0, 90), ++ z0 = svcadd_z (p0, z1, z0, 90)) ++ ++/* ++** cadd_90_f64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** fcadd z0\.d, p0/m, z0\.d, z2\.d, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_90_f64_z_untied, svfloat64_t, ++ z0 = svcadd_f64_z (p0, z1, z2, 90), ++ z0 = svcadd_z (p0, z1, z2, 90)) ++ ++/* ++** cadd_270_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fcadd z0\.d, p0/m, z0\.d, z1\.d, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_270_f64_z_tied1, svfloat64_t, ++ z0 = svcadd_f64_z (p0, z0, z1, 270), ++ z0 = svcadd_z (p0, z0, z1, 270)) ++ ++/* ++** cadd_270_f64_z_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, z1\.d ++** fcadd z0\.d, p0/m, z0\.d, \1, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_270_f64_z_tied2, svfloat64_t, ++ z0 = svcadd_f64_z (p0, z1, z0, 270), ++ z0 = svcadd_z (p0, z1, z0, 270)) ++ ++/* ++** cadd_270_f64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** fcadd z0\.d, p0/m, z0\.d, z2\.d, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_270_f64_z_untied, svfloat64_t, ++ z0 = svcadd_f64_z (p0, z1, z2, 270), ++ z0 = svcadd_z (p0, z1, z2, 270)) ++ ++/* ++** cadd_90_f64_x_tied1: ++** fcadd z0\.d, p0/m, z0\.d, z1\.d, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_90_f64_x_tied1, svfloat64_t, ++ z0 = svcadd_f64_x (p0, z0, z1, 90), ++ z0 = svcadd_x (p0, z0, z1, 90)) ++ ++/* ++** cadd_90_f64_x_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fcadd z0\.d, p0/m, z0\.d, \1, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_90_f64_x_tied2, svfloat64_t, ++ z0 = svcadd_f64_x (p0, z1, z0, 90), ++ z0 = svcadd_x (p0, z1, z0, 90)) ++ ++/* ++** cadd_90_f64_x_untied: ++** movprfx z0, z1 ++** fcadd z0\.d, p0/m, z0\.d, z2\.d, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_90_f64_x_untied, svfloat64_t, ++ z0 = svcadd_f64_x (p0, z1, z2, 90), ++ z0 = svcadd_x (p0, z1, z2, 90)) ++ ++/* ++** cadd_270_f64_x_tied1: ++** fcadd z0\.d, p0/m, z0\.d, z1\.d, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_270_f64_x_tied1, svfloat64_t, ++ z0 = svcadd_f64_x (p0, z0, z1, 270), ++ z0 = svcadd_x (p0, z0, z1, 270)) ++ ++/* ++** cadd_270_f64_x_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fcadd z0\.d, p0/m, z0\.d, \1, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_270_f64_x_tied2, svfloat64_t, ++ z0 = svcadd_f64_x (p0, z1, z0, 270), ++ z0 = svcadd_x (p0, z1, z0, 270)) ++ ++/* ++** cadd_270_f64_x_untied: ++** movprfx z0, z1 ++** fcadd z0\.d, p0/m, z0\.d, z2\.d, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cadd_270_f64_x_untied, svfloat64_t, ++ z0 = svcadd_f64_x (p0, z1, z2, 270), ++ z0 = svcadd_x (p0, z1, z2, 270)) ++ ++/* ++** ptrue_cadd_90_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cadd_90_f64_x_tied1, svfloat64_t, ++ z0 = svcadd_f64_x (svptrue_b64 (), z0, z1, 90), ++ z0 = svcadd_x (svptrue_b64 (), z0, z1, 90)) ++ ++/* ++** ptrue_cadd_90_f64_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cadd_90_f64_x_tied2, svfloat64_t, ++ z0 = svcadd_f64_x (svptrue_b64 (), z1, z0, 90), ++ z0 = svcadd_x (svptrue_b64 (), z1, z0, 90)) ++ ++/* ++** ptrue_cadd_90_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cadd_90_f64_x_untied, svfloat64_t, ++ z0 = svcadd_f64_x (svptrue_b64 (), z1, z2, 90), ++ z0 = svcadd_x (svptrue_b64 (), z1, z2, 90)) ++ ++/* ++** ptrue_cadd_270_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cadd_270_f64_x_tied1, svfloat64_t, ++ z0 = svcadd_f64_x (svptrue_b64 (), z0, z1, 270), ++ z0 = svcadd_x (svptrue_b64 (), z0, z1, 270)) ++ ++/* ++** ptrue_cadd_270_f64_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cadd_270_f64_x_tied2, svfloat64_t, ++ z0 = svcadd_f64_x (svptrue_b64 (), z1, z0, 270), ++ z0 = svcadd_x (svptrue_b64 (), z1, z0, 270)) ++ ++/* ++** ptrue_cadd_270_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cadd_270_f64_x_untied, svfloat64_t, ++ z0 = svcadd_f64_x (svptrue_b64 (), z1, z2, 270), ++ z0 = svcadd_x (svptrue_b64 (), z1, z2, 270)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_bf16.c +new file mode 100644 +index 000000000..a15e34400 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_bf16.c +@@ -0,0 +1,52 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** clasta_bf16_tied1: ++** clasta z0\.h, p0, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (clasta_bf16_tied1, svbfloat16_t, ++ z0 = svclasta_bf16 (p0, z0, z1), ++ z0 = svclasta (p0, z0, z1)) ++ ++/* ++** clasta_bf16_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** clasta z0\.h, p0, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (clasta_bf16_tied2, svbfloat16_t, ++ z0 = svclasta_bf16 (p0, z1, z0), ++ z0 = svclasta (p0, z1, z0)) ++ ++/* ++** clasta_bf16_untied: ++** movprfx z0, z1 ++** clasta z0\.h, p0, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (clasta_bf16_untied, svbfloat16_t, ++ z0 = svclasta_bf16 (p0, z1, z2), ++ z0 = svclasta (p0, z1, z2)) ++ ++/* ++** clasta_d0_bf16: ++** clasta h0, p0, h0, z2\.h ++** ret ++*/ ++TEST_FOLD_LEFT_D (clasta_d0_bf16, bfloat16_t, svbfloat16_t, ++ d0 = svclasta_n_bf16 (p0, d0, z2), ++ d0 = svclasta (p0, d0, z2)) ++ ++/* ++** clasta_d1_bf16: ++** mov v0\.h\[0\], v1\.h\[0\] ++** clasta h0, p0, h0, z2\.h ++** ret ++*/ ++TEST_FOLD_LEFT_D (clasta_d1_bf16, bfloat16_t, svbfloat16_t, ++ d0 = svclasta_n_bf16 (p0, d1, z2), ++ d0 = svclasta (p0, d1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_f16.c +new file mode 100644 +index 000000000..d9a980f60 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_f16.c +@@ -0,0 +1,52 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** clasta_f16_tied1: ++** clasta z0\.h, p0, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (clasta_f16_tied1, svfloat16_t, ++ z0 = svclasta_f16 (p0, z0, z1), ++ z0 = svclasta (p0, z0, z1)) ++ ++/* ++** clasta_f16_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** clasta z0\.h, p0, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (clasta_f16_tied2, svfloat16_t, ++ z0 = svclasta_f16 (p0, z1, z0), ++ z0 = svclasta (p0, z1, z0)) ++ ++/* ++** clasta_f16_untied: ++** movprfx z0, z1 ++** clasta z0\.h, p0, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (clasta_f16_untied, svfloat16_t, ++ z0 = svclasta_f16 (p0, z1, z2), ++ z0 = svclasta (p0, z1, z2)) ++ ++/* ++** clasta_d0_f16: ++** clasta h0, p0, h0, z2\.h ++** ret ++*/ ++TEST_FOLD_LEFT_D (clasta_d0_f16, float16_t, svfloat16_t, ++ d0 = svclasta_n_f16 (p0, d0, z2), ++ d0 = svclasta (p0, d0, z2)) ++ ++/* ++** clasta_d1_f16: ++** mov v0\.h\[0\], v1\.h\[0\] ++** clasta h0, p0, h0, z2\.h ++** ret ++*/ ++TEST_FOLD_LEFT_D (clasta_d1_f16, float16_t, svfloat16_t, ++ d0 = svclasta_n_f16 (p0, d1, z2), ++ d0 = svclasta (p0, d1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_f32.c +new file mode 100644 +index 000000000..cac01fa6d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_f32.c +@@ -0,0 +1,52 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** clasta_f32_tied1: ++** clasta z0\.s, p0, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (clasta_f32_tied1, svfloat32_t, ++ z0 = svclasta_f32 (p0, z0, z1), ++ z0 = svclasta (p0, z0, z1)) ++ ++/* ++** clasta_f32_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** clasta z0\.s, p0, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (clasta_f32_tied2, svfloat32_t, ++ z0 = svclasta_f32 (p0, z1, z0), ++ z0 = svclasta (p0, z1, z0)) ++ ++/* ++** clasta_f32_untied: ++** movprfx z0, z1 ++** clasta z0\.s, p0, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (clasta_f32_untied, svfloat32_t, ++ z0 = svclasta_f32 (p0, z1, z2), ++ z0 = svclasta (p0, z1, z2)) ++ ++/* ++** clasta_d0_f32: ++** clasta s0, p0, s0, z2\.s ++** ret ++*/ ++TEST_FOLD_LEFT_D (clasta_d0_f32, float32_t, svfloat32_t, ++ d0 = svclasta_n_f32 (p0, d0, z2), ++ d0 = svclasta (p0, d0, z2)) ++ ++/* ++** clasta_d1_f32: ++** fmov s0, s1 ++** clasta s0, p0, s0, z2\.s ++** ret ++*/ ++TEST_FOLD_LEFT_D (clasta_d1_f32, float32_t, svfloat32_t, ++ d0 = svclasta_n_f32 (p0, d1, z2), ++ d0 = svclasta (p0, d1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_f64.c +new file mode 100644 +index 000000000..43b93553b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_f64.c +@@ -0,0 +1,52 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** clasta_f64_tied1: ++** clasta z0\.d, p0, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (clasta_f64_tied1, svfloat64_t, ++ z0 = svclasta_f64 (p0, z0, z1), ++ z0 = svclasta (p0, z0, z1)) ++ ++/* ++** clasta_f64_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** clasta z0\.d, p0, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (clasta_f64_tied2, svfloat64_t, ++ z0 = svclasta_f64 (p0, z1, z0), ++ z0 = svclasta (p0, z1, z0)) ++ ++/* ++** clasta_f64_untied: ++** movprfx z0, z1 ++** clasta z0\.d, p0, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (clasta_f64_untied, svfloat64_t, ++ z0 = svclasta_f64 (p0, z1, z2), ++ z0 = svclasta (p0, z1, z2)) ++ ++/* ++** clasta_d0_f64: ++** clasta d0, p0, d0, z2\.d ++** ret ++*/ ++TEST_FOLD_LEFT_D (clasta_d0_f64, float64_t, svfloat64_t, ++ d0 = svclasta_n_f64 (p0, d0, z2), ++ d0 = svclasta (p0, d0, z2)) ++ ++/* ++** clasta_d1_f64: ++** fmov d0, d1 ++** clasta d0, p0, d0, z2\.d ++** ret ++*/ ++TEST_FOLD_LEFT_D (clasta_d1_f64, float64_t, svfloat64_t, ++ d0 = svclasta_n_f64 (p0, d1, z2), ++ d0 = svclasta (p0, d1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_s16.c +new file mode 100644 +index 000000000..f5e4f85ce +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_s16.c +@@ -0,0 +1,52 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** clasta_s16_tied1: ++** clasta z0\.h, p0, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (clasta_s16_tied1, svint16_t, ++ z0 = svclasta_s16 (p0, z0, z1), ++ z0 = svclasta (p0, z0, z1)) ++ ++/* ++** clasta_s16_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** clasta z0\.h, p0, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (clasta_s16_tied2, svint16_t, ++ z0 = svclasta_s16 (p0, z1, z0), ++ z0 = svclasta (p0, z1, z0)) ++ ++/* ++** clasta_s16_untied: ++** movprfx z0, z1 ++** clasta z0\.h, p0, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (clasta_s16_untied, svint16_t, ++ z0 = svclasta_s16 (p0, z1, z2), ++ z0 = svclasta (p0, z1, z2)) ++ ++/* ++** clasta_x0_s16: ++** clasta w0, p0, w0, z0\.h ++** ret ++*/ ++TEST_FOLD_LEFT_X (clasta_x0_s16, int16_t, svint16_t, ++ x0 = svclasta_n_s16 (p0, x0, z0), ++ x0 = svclasta (p0, x0, z0)) ++ ++/* ++** clasta_x1_s16: ++** mov w0, w1 ++** clasta w0, p0, w0, z0\.h ++** ret ++*/ ++TEST_FOLD_LEFT_X (clasta_x1_s16, int16_t, svint16_t, ++ x0 = svclasta_n_s16 (p0, x1, z0), ++ x0 = svclasta (p0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_s32.c +new file mode 100644 +index 000000000..fbd82e778 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_s32.c +@@ -0,0 +1,52 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** clasta_s32_tied1: ++** clasta z0\.s, p0, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (clasta_s32_tied1, svint32_t, ++ z0 = svclasta_s32 (p0, z0, z1), ++ z0 = svclasta (p0, z0, z1)) ++ ++/* ++** clasta_s32_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** clasta z0\.s, p0, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (clasta_s32_tied2, svint32_t, ++ z0 = svclasta_s32 (p0, z1, z0), ++ z0 = svclasta (p0, z1, z0)) ++ ++/* ++** clasta_s32_untied: ++** movprfx z0, z1 ++** clasta z0\.s, p0, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (clasta_s32_untied, svint32_t, ++ z0 = svclasta_s32 (p0, z1, z2), ++ z0 = svclasta (p0, z1, z2)) ++ ++/* ++** clasta_x0_s32: ++** clasta w0, p0, w0, z0\.s ++** ret ++*/ ++TEST_FOLD_LEFT_X (clasta_x0_s32, int32_t, svint32_t, ++ x0 = svclasta_n_s32 (p0, x0, z0), ++ x0 = svclasta (p0, x0, z0)) ++ ++/* ++** clasta_x1_s32: ++** mov w0, w1 ++** clasta w0, p0, w0, z0\.s ++** ret ++*/ ++TEST_FOLD_LEFT_X (clasta_x1_s32, int32_t, svint32_t, ++ x0 = svclasta_n_s32 (p0, x1, z0), ++ x0 = svclasta (p0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_s64.c +new file mode 100644 +index 000000000..08edf157b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_s64.c +@@ -0,0 +1,52 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** clasta_s64_tied1: ++** clasta z0\.d, p0, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (clasta_s64_tied1, svint64_t, ++ z0 = svclasta_s64 (p0, z0, z1), ++ z0 = svclasta (p0, z0, z1)) ++ ++/* ++** clasta_s64_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** clasta z0\.d, p0, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (clasta_s64_tied2, svint64_t, ++ z0 = svclasta_s64 (p0, z1, z0), ++ z0 = svclasta (p0, z1, z0)) ++ ++/* ++** clasta_s64_untied: ++** movprfx z0, z1 ++** clasta z0\.d, p0, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (clasta_s64_untied, svint64_t, ++ z0 = svclasta_s64 (p0, z1, z2), ++ z0 = svclasta (p0, z1, z2)) ++ ++/* ++** clasta_x0_s64: ++** clasta x0, p0, x0, z0\.d ++** ret ++*/ ++TEST_FOLD_LEFT_X (clasta_x0_s64, int64_t, svint64_t, ++ x0 = svclasta_n_s64 (p0, x0, z0), ++ x0 = svclasta (p0, x0, z0)) ++ ++/* ++** clasta_x1_s64: ++** mov x0, x1 ++** clasta x0, p0, x0, z0\.d ++** ret ++*/ ++TEST_FOLD_LEFT_X (clasta_x1_s64, int64_t, svint64_t, ++ x0 = svclasta_n_s64 (p0, x1, z0), ++ x0 = svclasta (p0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_s8.c +new file mode 100644 +index 000000000..286f16a9d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_s8.c +@@ -0,0 +1,52 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** clasta_s8_tied1: ++** clasta z0\.b, p0, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (clasta_s8_tied1, svint8_t, ++ z0 = svclasta_s8 (p0, z0, z1), ++ z0 = svclasta (p0, z0, z1)) ++ ++/* ++** clasta_s8_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** clasta z0\.b, p0, z0\.b, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (clasta_s8_tied2, svint8_t, ++ z0 = svclasta_s8 (p0, z1, z0), ++ z0 = svclasta (p0, z1, z0)) ++ ++/* ++** clasta_s8_untied: ++** movprfx z0, z1 ++** clasta z0\.b, p0, z0\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (clasta_s8_untied, svint8_t, ++ z0 = svclasta_s8 (p0, z1, z2), ++ z0 = svclasta (p0, z1, z2)) ++ ++/* ++** clasta_x0_s8: ++** clasta w0, p0, w0, z0\.b ++** ret ++*/ ++TEST_FOLD_LEFT_X (clasta_x0_s8, int8_t, svint8_t, ++ x0 = svclasta_n_s8 (p0, x0, z0), ++ x0 = svclasta (p0, x0, z0)) ++ ++/* ++** clasta_x1_s8: ++** mov w0, w1 ++** clasta w0, p0, w0, z0\.b ++** ret ++*/ ++TEST_FOLD_LEFT_X (clasta_x1_s8, int8_t, svint8_t, ++ x0 = svclasta_n_s8 (p0, x1, z0), ++ x0 = svclasta (p0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_u16.c +new file mode 100644 +index 000000000..40c6dca90 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_u16.c +@@ -0,0 +1,52 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** clasta_u16_tied1: ++** clasta z0\.h, p0, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (clasta_u16_tied1, svuint16_t, ++ z0 = svclasta_u16 (p0, z0, z1), ++ z0 = svclasta (p0, z0, z1)) ++ ++/* ++** clasta_u16_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** clasta z0\.h, p0, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (clasta_u16_tied2, svuint16_t, ++ z0 = svclasta_u16 (p0, z1, z0), ++ z0 = svclasta (p0, z1, z0)) ++ ++/* ++** clasta_u16_untied: ++** movprfx z0, z1 ++** clasta z0\.h, p0, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (clasta_u16_untied, svuint16_t, ++ z0 = svclasta_u16 (p0, z1, z2), ++ z0 = svclasta (p0, z1, z2)) ++ ++/* ++** clasta_x0_u16: ++** clasta w0, p0, w0, z0\.h ++** ret ++*/ ++TEST_FOLD_LEFT_X (clasta_x0_u16, uint16_t, svuint16_t, ++ x0 = svclasta_n_u16 (p0, x0, z0), ++ x0 = svclasta (p0, x0, z0)) ++ ++/* ++** clasta_x1_u16: ++** mov w0, w1 ++** clasta w0, p0, w0, z0\.h ++** ret ++*/ ++TEST_FOLD_LEFT_X (clasta_x1_u16, uint16_t, svuint16_t, ++ x0 = svclasta_n_u16 (p0, x1, z0), ++ x0 = svclasta (p0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_u32.c +new file mode 100644 +index 000000000..6c46e13cf +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_u32.c +@@ -0,0 +1,52 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** clasta_u32_tied1: ++** clasta z0\.s, p0, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (clasta_u32_tied1, svuint32_t, ++ z0 = svclasta_u32 (p0, z0, z1), ++ z0 = svclasta (p0, z0, z1)) ++ ++/* ++** clasta_u32_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** clasta z0\.s, p0, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (clasta_u32_tied2, svuint32_t, ++ z0 = svclasta_u32 (p0, z1, z0), ++ z0 = svclasta (p0, z1, z0)) ++ ++/* ++** clasta_u32_untied: ++** movprfx z0, z1 ++** clasta z0\.s, p0, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (clasta_u32_untied, svuint32_t, ++ z0 = svclasta_u32 (p0, z1, z2), ++ z0 = svclasta (p0, z1, z2)) ++ ++/* ++** clasta_x0_u32: ++** clasta w0, p0, w0, z0\.s ++** ret ++*/ ++TEST_FOLD_LEFT_X (clasta_x0_u32, uint32_t, svuint32_t, ++ x0 = svclasta_n_u32 (p0, x0, z0), ++ x0 = svclasta (p0, x0, z0)) ++ ++/* ++** clasta_x1_u32: ++** mov w0, w1 ++** clasta w0, p0, w0, z0\.s ++** ret ++*/ ++TEST_FOLD_LEFT_X (clasta_x1_u32, uint32_t, svuint32_t, ++ x0 = svclasta_n_u32 (p0, x1, z0), ++ x0 = svclasta (p0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_u64.c +new file mode 100644 +index 000000000..99ad41e50 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_u64.c +@@ -0,0 +1,52 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** clasta_u64_tied1: ++** clasta z0\.d, p0, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (clasta_u64_tied1, svuint64_t, ++ z0 = svclasta_u64 (p0, z0, z1), ++ z0 = svclasta (p0, z0, z1)) ++ ++/* ++** clasta_u64_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** clasta z0\.d, p0, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (clasta_u64_tied2, svuint64_t, ++ z0 = svclasta_u64 (p0, z1, z0), ++ z0 = svclasta (p0, z1, z0)) ++ ++/* ++** clasta_u64_untied: ++** movprfx z0, z1 ++** clasta z0\.d, p0, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (clasta_u64_untied, svuint64_t, ++ z0 = svclasta_u64 (p0, z1, z2), ++ z0 = svclasta (p0, z1, z2)) ++ ++/* ++** clasta_x0_u64: ++** clasta x0, p0, x0, z0\.d ++** ret ++*/ ++TEST_FOLD_LEFT_X (clasta_x0_u64, uint64_t, svuint64_t, ++ x0 = svclasta_n_u64 (p0, x0, z0), ++ x0 = svclasta (p0, x0, z0)) ++ ++/* ++** clasta_x1_u64: ++** mov x0, x1 ++** clasta x0, p0, x0, z0\.d ++** ret ++*/ ++TEST_FOLD_LEFT_X (clasta_x1_u64, uint64_t, svuint64_t, ++ x0 = svclasta_n_u64 (p0, x1, z0), ++ x0 = svclasta (p0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_u8.c +new file mode 100644 +index 000000000..eb438f4ea +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_u8.c +@@ -0,0 +1,52 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** clasta_u8_tied1: ++** clasta z0\.b, p0, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (clasta_u8_tied1, svuint8_t, ++ z0 = svclasta_u8 (p0, z0, z1), ++ z0 = svclasta (p0, z0, z1)) ++ ++/* ++** clasta_u8_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** clasta z0\.b, p0, z0\.b, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (clasta_u8_tied2, svuint8_t, ++ z0 = svclasta_u8 (p0, z1, z0), ++ z0 = svclasta (p0, z1, z0)) ++ ++/* ++** clasta_u8_untied: ++** movprfx z0, z1 ++** clasta z0\.b, p0, z0\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (clasta_u8_untied, svuint8_t, ++ z0 = svclasta_u8 (p0, z1, z2), ++ z0 = svclasta (p0, z1, z2)) ++ ++/* ++** clasta_x0_u8: ++** clasta w0, p0, w0, z0\.b ++** ret ++*/ ++TEST_FOLD_LEFT_X (clasta_x0_u8, uint8_t, svuint8_t, ++ x0 = svclasta_n_u8 (p0, x0, z0), ++ x0 = svclasta (p0, x0, z0)) ++ ++/* ++** clasta_x1_u8: ++** mov w0, w1 ++** clasta w0, p0, w0, z0\.b ++** ret ++*/ ++TEST_FOLD_LEFT_X (clasta_x1_u8, uint8_t, svuint8_t, ++ x0 = svclasta_n_u8 (p0, x1, z0), ++ x0 = svclasta (p0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_bf16.c +new file mode 100644 +index 000000000..235fd1b4e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_bf16.c +@@ -0,0 +1,52 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** clastb_bf16_tied1: ++** clastb z0\.h, p0, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (clastb_bf16_tied1, svbfloat16_t, ++ z0 = svclastb_bf16 (p0, z0, z1), ++ z0 = svclastb (p0, z0, z1)) ++ ++/* ++** clastb_bf16_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** clastb z0\.h, p0, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (clastb_bf16_tied2, svbfloat16_t, ++ z0 = svclastb_bf16 (p0, z1, z0), ++ z0 = svclastb (p0, z1, z0)) ++ ++/* ++** clastb_bf16_untied: ++** movprfx z0, z1 ++** clastb z0\.h, p0, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (clastb_bf16_untied, svbfloat16_t, ++ z0 = svclastb_bf16 (p0, z1, z2), ++ z0 = svclastb (p0, z1, z2)) ++ ++/* ++** clastb_d0_bf16: ++** clastb h0, p0, h0, z2\.h ++** ret ++*/ ++TEST_FOLD_LEFT_D (clastb_d0_bf16, bfloat16_t, svbfloat16_t, ++ d0 = svclastb_n_bf16 (p0, d0, z2), ++ d0 = svclastb (p0, d0, z2)) ++ ++/* ++** clastb_d1_bf16: ++** mov v0\.h\[0\], v1\.h\[0\] ++** clastb h0, p0, h0, z2\.h ++** ret ++*/ ++TEST_FOLD_LEFT_D (clastb_d1_bf16, bfloat16_t, svbfloat16_t, ++ d0 = svclastb_n_bf16 (p0, d1, z2), ++ d0 = svclastb (p0, d1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_f16.c +new file mode 100644 +index 000000000..e56d7688a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_f16.c +@@ -0,0 +1,52 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** clastb_f16_tied1: ++** clastb z0\.h, p0, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (clastb_f16_tied1, svfloat16_t, ++ z0 = svclastb_f16 (p0, z0, z1), ++ z0 = svclastb (p0, z0, z1)) ++ ++/* ++** clastb_f16_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** clastb z0\.h, p0, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (clastb_f16_tied2, svfloat16_t, ++ z0 = svclastb_f16 (p0, z1, z0), ++ z0 = svclastb (p0, z1, z0)) ++ ++/* ++** clastb_f16_untied: ++** movprfx z0, z1 ++** clastb z0\.h, p0, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (clastb_f16_untied, svfloat16_t, ++ z0 = svclastb_f16 (p0, z1, z2), ++ z0 = svclastb (p0, z1, z2)) ++ ++/* ++** clastb_d0_f16: ++** clastb h0, p0, h0, z2\.h ++** ret ++*/ ++TEST_FOLD_LEFT_D (clastb_d0_f16, float16_t, svfloat16_t, ++ d0 = svclastb_n_f16 (p0, d0, z2), ++ d0 = svclastb (p0, d0, z2)) ++ ++/* ++** clastb_d1_f16: ++** mov v0\.h\[0\], v1\.h\[0\] ++** clastb h0, p0, h0, z2\.h ++** ret ++*/ ++TEST_FOLD_LEFT_D (clastb_d1_f16, float16_t, svfloat16_t, ++ d0 = svclastb_n_f16 (p0, d1, z2), ++ d0 = svclastb (p0, d1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_f32.c +new file mode 100644 +index 000000000..c580d1306 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_f32.c +@@ -0,0 +1,52 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** clastb_f32_tied1: ++** clastb z0\.s, p0, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (clastb_f32_tied1, svfloat32_t, ++ z0 = svclastb_f32 (p0, z0, z1), ++ z0 = svclastb (p0, z0, z1)) ++ ++/* ++** clastb_f32_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** clastb z0\.s, p0, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (clastb_f32_tied2, svfloat32_t, ++ z0 = svclastb_f32 (p0, z1, z0), ++ z0 = svclastb (p0, z1, z0)) ++ ++/* ++** clastb_f32_untied: ++** movprfx z0, z1 ++** clastb z0\.s, p0, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (clastb_f32_untied, svfloat32_t, ++ z0 = svclastb_f32 (p0, z1, z2), ++ z0 = svclastb (p0, z1, z2)) ++ ++/* ++** clastb_d0_f32: ++** clastb s0, p0, s0, z2\.s ++** ret ++*/ ++TEST_FOLD_LEFT_D (clastb_d0_f32, float32_t, svfloat32_t, ++ d0 = svclastb_n_f32 (p0, d0, z2), ++ d0 = svclastb (p0, d0, z2)) ++ ++/* ++** clastb_d1_f32: ++** fmov s0, s1 ++** clastb s0, p0, s0, z2\.s ++** ret ++*/ ++TEST_FOLD_LEFT_D (clastb_d1_f32, float32_t, svfloat32_t, ++ d0 = svclastb_n_f32 (p0, d1, z2), ++ d0 = svclastb (p0, d1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_f64.c +new file mode 100644 +index 000000000..217a76f51 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_f64.c +@@ -0,0 +1,52 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** clastb_f64_tied1: ++** clastb z0\.d, p0, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (clastb_f64_tied1, svfloat64_t, ++ z0 = svclastb_f64 (p0, z0, z1), ++ z0 = svclastb (p0, z0, z1)) ++ ++/* ++** clastb_f64_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** clastb z0\.d, p0, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (clastb_f64_tied2, svfloat64_t, ++ z0 = svclastb_f64 (p0, z1, z0), ++ z0 = svclastb (p0, z1, z0)) ++ ++/* ++** clastb_f64_untied: ++** movprfx z0, z1 ++** clastb z0\.d, p0, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (clastb_f64_untied, svfloat64_t, ++ z0 = svclastb_f64 (p0, z1, z2), ++ z0 = svclastb (p0, z1, z2)) ++ ++/* ++** clastb_d0_f64: ++** clastb d0, p0, d0, z2\.d ++** ret ++*/ ++TEST_FOLD_LEFT_D (clastb_d0_f64, float64_t, svfloat64_t, ++ d0 = svclastb_n_f64 (p0, d0, z2), ++ d0 = svclastb (p0, d0, z2)) ++ ++/* ++** clastb_d1_f64: ++** fmov d0, d1 ++** clastb d0, p0, d0, z2\.d ++** ret ++*/ ++TEST_FOLD_LEFT_D (clastb_d1_f64, float64_t, svfloat64_t, ++ d0 = svclastb_n_f64 (p0, d1, z2), ++ d0 = svclastb (p0, d1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_s16.c +new file mode 100644 +index 000000000..37be28040 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_s16.c +@@ -0,0 +1,52 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** clastb_s16_tied1: ++** clastb z0\.h, p0, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (clastb_s16_tied1, svint16_t, ++ z0 = svclastb_s16 (p0, z0, z1), ++ z0 = svclastb (p0, z0, z1)) ++ ++/* ++** clastb_s16_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** clastb z0\.h, p0, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (clastb_s16_tied2, svint16_t, ++ z0 = svclastb_s16 (p0, z1, z0), ++ z0 = svclastb (p0, z1, z0)) ++ ++/* ++** clastb_s16_untied: ++** movprfx z0, z1 ++** clastb z0\.h, p0, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (clastb_s16_untied, svint16_t, ++ z0 = svclastb_s16 (p0, z1, z2), ++ z0 = svclastb (p0, z1, z2)) ++ ++/* ++** clastb_x0_s16: ++** clastb w0, p0, w0, z0\.h ++** ret ++*/ ++TEST_FOLD_LEFT_X (clastb_x0_s16, int16_t, svint16_t, ++ x0 = svclastb_n_s16 (p0, x0, z0), ++ x0 = svclastb (p0, x0, z0)) ++ ++/* ++** clastb_x1_s16: ++** mov w0, w1 ++** clastb w0, p0, w0, z0\.h ++** ret ++*/ ++TEST_FOLD_LEFT_X (clastb_x1_s16, int16_t, svint16_t, ++ x0 = svclastb_n_s16 (p0, x1, z0), ++ x0 = svclastb (p0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_s32.c +new file mode 100644 +index 000000000..2e56c5a8f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_s32.c +@@ -0,0 +1,52 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** clastb_s32_tied1: ++** clastb z0\.s, p0, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (clastb_s32_tied1, svint32_t, ++ z0 = svclastb_s32 (p0, z0, z1), ++ z0 = svclastb (p0, z0, z1)) ++ ++/* ++** clastb_s32_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** clastb z0\.s, p0, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (clastb_s32_tied2, svint32_t, ++ z0 = svclastb_s32 (p0, z1, z0), ++ z0 = svclastb (p0, z1, z0)) ++ ++/* ++** clastb_s32_untied: ++** movprfx z0, z1 ++** clastb z0\.s, p0, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (clastb_s32_untied, svint32_t, ++ z0 = svclastb_s32 (p0, z1, z2), ++ z0 = svclastb (p0, z1, z2)) ++ ++/* ++** clastb_x0_s32: ++** clastb w0, p0, w0, z0\.s ++** ret ++*/ ++TEST_FOLD_LEFT_X (clastb_x0_s32, int32_t, svint32_t, ++ x0 = svclastb_n_s32 (p0, x0, z0), ++ x0 = svclastb (p0, x0, z0)) ++ ++/* ++** clastb_x1_s32: ++** mov w0, w1 ++** clastb w0, p0, w0, z0\.s ++** ret ++*/ ++TEST_FOLD_LEFT_X (clastb_x1_s32, int32_t, svint32_t, ++ x0 = svclastb_n_s32 (p0, x1, z0), ++ x0 = svclastb (p0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_s64.c +new file mode 100644 +index 000000000..9ce210aae +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_s64.c +@@ -0,0 +1,52 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** clastb_s64_tied1: ++** clastb z0\.d, p0, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (clastb_s64_tied1, svint64_t, ++ z0 = svclastb_s64 (p0, z0, z1), ++ z0 = svclastb (p0, z0, z1)) ++ ++/* ++** clastb_s64_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** clastb z0\.d, p0, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (clastb_s64_tied2, svint64_t, ++ z0 = svclastb_s64 (p0, z1, z0), ++ z0 = svclastb (p0, z1, z0)) ++ ++/* ++** clastb_s64_untied: ++** movprfx z0, z1 ++** clastb z0\.d, p0, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (clastb_s64_untied, svint64_t, ++ z0 = svclastb_s64 (p0, z1, z2), ++ z0 = svclastb (p0, z1, z2)) ++ ++/* ++** clastb_x0_s64: ++** clastb x0, p0, x0, z0\.d ++** ret ++*/ ++TEST_FOLD_LEFT_X (clastb_x0_s64, int64_t, svint64_t, ++ x0 = svclastb_n_s64 (p0, x0, z0), ++ x0 = svclastb (p0, x0, z0)) ++ ++/* ++** clastb_x1_s64: ++** mov x0, x1 ++** clastb x0, p0, x0, z0\.d ++** ret ++*/ ++TEST_FOLD_LEFT_X (clastb_x1_s64, int64_t, svint64_t, ++ x0 = svclastb_n_s64 (p0, x1, z0), ++ x0 = svclastb (p0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_s8.c +new file mode 100644 +index 000000000..eb76c22cd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_s8.c +@@ -0,0 +1,52 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** clastb_s8_tied1: ++** clastb z0\.b, p0, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (clastb_s8_tied1, svint8_t, ++ z0 = svclastb_s8 (p0, z0, z1), ++ z0 = svclastb (p0, z0, z1)) ++ ++/* ++** clastb_s8_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** clastb z0\.b, p0, z0\.b, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (clastb_s8_tied2, svint8_t, ++ z0 = svclastb_s8 (p0, z1, z0), ++ z0 = svclastb (p0, z1, z0)) ++ ++/* ++** clastb_s8_untied: ++** movprfx z0, z1 ++** clastb z0\.b, p0, z0\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (clastb_s8_untied, svint8_t, ++ z0 = svclastb_s8 (p0, z1, z2), ++ z0 = svclastb (p0, z1, z2)) ++ ++/* ++** clastb_x0_s8: ++** clastb w0, p0, w0, z0\.b ++** ret ++*/ ++TEST_FOLD_LEFT_X (clastb_x0_s8, int8_t, svint8_t, ++ x0 = svclastb_n_s8 (p0, x0, z0), ++ x0 = svclastb (p0, x0, z0)) ++ ++/* ++** clastb_x1_s8: ++** mov w0, w1 ++** clastb w0, p0, w0, z0\.b ++** ret ++*/ ++TEST_FOLD_LEFT_X (clastb_x1_s8, int8_t, svint8_t, ++ x0 = svclastb_n_s8 (p0, x1, z0), ++ x0 = svclastb (p0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_u16.c +new file mode 100644 +index 000000000..5aea9c7bd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_u16.c +@@ -0,0 +1,52 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** clastb_u16_tied1: ++** clastb z0\.h, p0, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (clastb_u16_tied1, svuint16_t, ++ z0 = svclastb_u16 (p0, z0, z1), ++ z0 = svclastb (p0, z0, z1)) ++ ++/* ++** clastb_u16_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** clastb z0\.h, p0, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (clastb_u16_tied2, svuint16_t, ++ z0 = svclastb_u16 (p0, z1, z0), ++ z0 = svclastb (p0, z1, z0)) ++ ++/* ++** clastb_u16_untied: ++** movprfx z0, z1 ++** clastb z0\.h, p0, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (clastb_u16_untied, svuint16_t, ++ z0 = svclastb_u16 (p0, z1, z2), ++ z0 = svclastb (p0, z1, z2)) ++ ++/* ++** clastb_x0_u16: ++** clastb w0, p0, w0, z0\.h ++** ret ++*/ ++TEST_FOLD_LEFT_X (clastb_x0_u16, uint16_t, svuint16_t, ++ x0 = svclastb_n_u16 (p0, x0, z0), ++ x0 = svclastb (p0, x0, z0)) ++ ++/* ++** clastb_x1_u16: ++** mov w0, w1 ++** clastb w0, p0, w0, z0\.h ++** ret ++*/ ++TEST_FOLD_LEFT_X (clastb_x1_u16, uint16_t, svuint16_t, ++ x0 = svclastb_n_u16 (p0, x1, z0), ++ x0 = svclastb (p0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_u32.c +new file mode 100644 +index 000000000..47fcf4f27 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_u32.c +@@ -0,0 +1,52 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** clastb_u32_tied1: ++** clastb z0\.s, p0, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (clastb_u32_tied1, svuint32_t, ++ z0 = svclastb_u32 (p0, z0, z1), ++ z0 = svclastb (p0, z0, z1)) ++ ++/* ++** clastb_u32_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** clastb z0\.s, p0, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (clastb_u32_tied2, svuint32_t, ++ z0 = svclastb_u32 (p0, z1, z0), ++ z0 = svclastb (p0, z1, z0)) ++ ++/* ++** clastb_u32_untied: ++** movprfx z0, z1 ++** clastb z0\.s, p0, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (clastb_u32_untied, svuint32_t, ++ z0 = svclastb_u32 (p0, z1, z2), ++ z0 = svclastb (p0, z1, z2)) ++ ++/* ++** clastb_x0_u32: ++** clastb w0, p0, w0, z0\.s ++** ret ++*/ ++TEST_FOLD_LEFT_X (clastb_x0_u32, uint32_t, svuint32_t, ++ x0 = svclastb_n_u32 (p0, x0, z0), ++ x0 = svclastb (p0, x0, z0)) ++ ++/* ++** clastb_x1_u32: ++** mov w0, w1 ++** clastb w0, p0, w0, z0\.s ++** ret ++*/ ++TEST_FOLD_LEFT_X (clastb_x1_u32, uint32_t, svuint32_t, ++ x0 = svclastb_n_u32 (p0, x1, z0), ++ x0 = svclastb (p0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_u64.c +new file mode 100644 +index 000000000..fb57afe85 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_u64.c +@@ -0,0 +1,52 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** clastb_u64_tied1: ++** clastb z0\.d, p0, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (clastb_u64_tied1, svuint64_t, ++ z0 = svclastb_u64 (p0, z0, z1), ++ z0 = svclastb (p0, z0, z1)) ++ ++/* ++** clastb_u64_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** clastb z0\.d, p0, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (clastb_u64_tied2, svuint64_t, ++ z0 = svclastb_u64 (p0, z1, z0), ++ z0 = svclastb (p0, z1, z0)) ++ ++/* ++** clastb_u64_untied: ++** movprfx z0, z1 ++** clastb z0\.d, p0, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (clastb_u64_untied, svuint64_t, ++ z0 = svclastb_u64 (p0, z1, z2), ++ z0 = svclastb (p0, z1, z2)) ++ ++/* ++** clastb_x0_u64: ++** clastb x0, p0, x0, z0\.d ++** ret ++*/ ++TEST_FOLD_LEFT_X (clastb_x0_u64, uint64_t, svuint64_t, ++ x0 = svclastb_n_u64 (p0, x0, z0), ++ x0 = svclastb (p0, x0, z0)) ++ ++/* ++** clastb_x1_u64: ++** mov x0, x1 ++** clastb x0, p0, x0, z0\.d ++** ret ++*/ ++TEST_FOLD_LEFT_X (clastb_x1_u64, uint64_t, svuint64_t, ++ x0 = svclastb_n_u64 (p0, x1, z0), ++ x0 = svclastb (p0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_u8.c +new file mode 100644 +index 000000000..f3ca84920 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_u8.c +@@ -0,0 +1,52 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** clastb_u8_tied1: ++** clastb z0\.b, p0, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (clastb_u8_tied1, svuint8_t, ++ z0 = svclastb_u8 (p0, z0, z1), ++ z0 = svclastb (p0, z0, z1)) ++ ++/* ++** clastb_u8_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** clastb z0\.b, p0, z0\.b, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (clastb_u8_tied2, svuint8_t, ++ z0 = svclastb_u8 (p0, z1, z0), ++ z0 = svclastb (p0, z1, z0)) ++ ++/* ++** clastb_u8_untied: ++** movprfx z0, z1 ++** clastb z0\.b, p0, z0\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (clastb_u8_untied, svuint8_t, ++ z0 = svclastb_u8 (p0, z1, z2), ++ z0 = svclastb (p0, z1, z2)) ++ ++/* ++** clastb_x0_u8: ++** clastb w0, p0, w0, z0\.b ++** ret ++*/ ++TEST_FOLD_LEFT_X (clastb_x0_u8, uint8_t, svuint8_t, ++ x0 = svclastb_n_u8 (p0, x0, z0), ++ x0 = svclastb (p0, x0, z0)) ++ ++/* ++** clastb_x1_u8: ++** mov w0, w1 ++** clastb w0, p0, w0, z0\.b ++** ret ++*/ ++TEST_FOLD_LEFT_X (clastb_x1_u8, uint8_t, svuint8_t, ++ x0 = svclastb_n_u8 (p0, x1, z0), ++ x0 = svclastb (p0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cls_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cls_s16.c +new file mode 100644 +index 000000000..7af312397 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cls_s16.c +@@ -0,0 +1,41 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cls_s16_m_tied1: ++** cls z0\.h, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cls_s16_m_tied1, svuint16_t, svint16_t, ++ z0 = svcls_s16_m (z0, p0, z4), ++ z0 = svcls_m (z0, p0, z4)) ++ ++/* ++** cls_s16_m_untied: ++** movprfx z0, z1 ++** cls z0\.h, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cls_s16_m_untied, svuint16_t, svint16_t, ++ z0 = svcls_s16_m (z1, p0, z4), ++ z0 = svcls_m (z1, p0, z4)) ++ ++/* ++** cls_s16_z: ++** movprfx z0\.h, p0/z, z4\.h ++** cls z0\.h, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cls_s16_z, svuint16_t, svint16_t, ++ z0 = svcls_s16_z (p0, z4), ++ z0 = svcls_z (p0, z4)) ++ ++/* ++** cls_s16_x: ++** cls z0\.h, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cls_s16_x, svuint16_t, svint16_t, ++ z0 = svcls_s16_x (p0, z4), ++ z0 = svcls_x (p0, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cls_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cls_s32.c +new file mode 100644 +index 000000000..813876f68 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cls_s32.c +@@ -0,0 +1,41 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cls_s32_m_tied1: ++** cls z0\.s, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cls_s32_m_tied1, svuint32_t, svint32_t, ++ z0 = svcls_s32_m (z0, p0, z4), ++ z0 = svcls_m (z0, p0, z4)) ++ ++/* ++** cls_s32_m_untied: ++** movprfx z0, z1 ++** cls z0\.s, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cls_s32_m_untied, svuint32_t, svint32_t, ++ z0 = svcls_s32_m (z1, p0, z4), ++ z0 = svcls_m (z1, p0, z4)) ++ ++/* ++** cls_s32_z: ++** movprfx z0\.s, p0/z, z4\.s ++** cls z0\.s, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cls_s32_z, svuint32_t, svint32_t, ++ z0 = svcls_s32_z (p0, z4), ++ z0 = svcls_z (p0, z4)) ++ ++/* ++** cls_s32_x: ++** cls z0\.s, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cls_s32_x, svuint32_t, svint32_t, ++ z0 = svcls_s32_x (p0, z4), ++ z0 = svcls_x (p0, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cls_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cls_s64.c +new file mode 100644 +index 000000000..660a20556 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cls_s64.c +@@ -0,0 +1,41 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cls_s64_m_tied1: ++** cls z0\.d, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cls_s64_m_tied1, svuint64_t, svint64_t, ++ z0 = svcls_s64_m (z0, p0, z4), ++ z0 = svcls_m (z0, p0, z4)) ++ ++/* ++** cls_s64_m_untied: ++** movprfx z0, z1 ++** cls z0\.d, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cls_s64_m_untied, svuint64_t, svint64_t, ++ z0 = svcls_s64_m (z1, p0, z4), ++ z0 = svcls_m (z1, p0, z4)) ++ ++/* ++** cls_s64_z: ++** movprfx z0\.d, p0/z, z4\.d ++** cls z0\.d, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cls_s64_z, svuint64_t, svint64_t, ++ z0 = svcls_s64_z (p0, z4), ++ z0 = svcls_z (p0, z4)) ++ ++/* ++** cls_s64_x: ++** cls z0\.d, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cls_s64_x, svuint64_t, svint64_t, ++ z0 = svcls_s64_x (p0, z4), ++ z0 = svcls_x (p0, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cls_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cls_s8.c +new file mode 100644 +index 000000000..56f5c2608 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cls_s8.c +@@ -0,0 +1,41 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cls_s8_m_tied1: ++** cls z0\.b, p0/m, z4\.b ++** ret ++*/ ++TEST_DUAL_Z (cls_s8_m_tied1, svuint8_t, svint8_t, ++ z0 = svcls_s8_m (z0, p0, z4), ++ z0 = svcls_m (z0, p0, z4)) ++ ++/* ++** cls_s8_m_untied: ++** movprfx z0, z1 ++** cls z0\.b, p0/m, z4\.b ++** ret ++*/ ++TEST_DUAL_Z (cls_s8_m_untied, svuint8_t, svint8_t, ++ z0 = svcls_s8_m (z1, p0, z4), ++ z0 = svcls_m (z1, p0, z4)) ++ ++/* ++** cls_s8_z: ++** movprfx z0\.b, p0/z, z4\.b ++** cls z0\.b, p0/m, z4\.b ++** ret ++*/ ++TEST_DUAL_Z (cls_s8_z, svuint8_t, svint8_t, ++ z0 = svcls_s8_z (p0, z4), ++ z0 = svcls_z (p0, z4)) ++ ++/* ++** cls_s8_x: ++** cls z0\.b, p0/m, z4\.b ++** ret ++*/ ++TEST_DUAL_Z (cls_s8_x, svuint8_t, svint8_t, ++ z0 = svcls_s8_x (p0, z4), ++ z0 = svcls_x (p0, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_s16.c +new file mode 100644 +index 000000000..58f89005c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_s16.c +@@ -0,0 +1,41 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** clz_s16_m_tied1: ++** clz z0\.h, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (clz_s16_m_tied1, svuint16_t, svint16_t, ++ z0 = svclz_s16_m (z0, p0, z4), ++ z0 = svclz_m (z0, p0, z4)) ++ ++/* ++** clz_s16_m_untied: ++** movprfx z0, z1 ++** clz z0\.h, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (clz_s16_m_untied, svuint16_t, svint16_t, ++ z0 = svclz_s16_m (z1, p0, z4), ++ z0 = svclz_m (z1, p0, z4)) ++ ++/* ++** clz_s16_z: ++** movprfx z0\.h, p0/z, z4\.h ++** clz z0\.h, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (clz_s16_z, svuint16_t, svint16_t, ++ z0 = svclz_s16_z (p0, z4), ++ z0 = svclz_z (p0, z4)) ++ ++/* ++** clz_s16_x: ++** clz z0\.h, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (clz_s16_x, svuint16_t, svint16_t, ++ z0 = svclz_s16_x (p0, z4), ++ z0 = svclz_x (p0, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_s32.c +new file mode 100644 +index 000000000..a9198070b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_s32.c +@@ -0,0 +1,41 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** clz_s32_m_tied1: ++** clz z0\.s, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (clz_s32_m_tied1, svuint32_t, svint32_t, ++ z0 = svclz_s32_m (z0, p0, z4), ++ z0 = svclz_m (z0, p0, z4)) ++ ++/* ++** clz_s32_m_untied: ++** movprfx z0, z1 ++** clz z0\.s, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (clz_s32_m_untied, svuint32_t, svint32_t, ++ z0 = svclz_s32_m (z1, p0, z4), ++ z0 = svclz_m (z1, p0, z4)) ++ ++/* ++** clz_s32_z: ++** movprfx z0\.s, p0/z, z4\.s ++** clz z0\.s, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (clz_s32_z, svuint32_t, svint32_t, ++ z0 = svclz_s32_z (p0, z4), ++ z0 = svclz_z (p0, z4)) ++ ++/* ++** clz_s32_x: ++** clz z0\.s, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (clz_s32_x, svuint32_t, svint32_t, ++ z0 = svclz_s32_x (p0, z4), ++ z0 = svclz_x (p0, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_s64.c +new file mode 100644 +index 000000000..02c0c993e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_s64.c +@@ -0,0 +1,41 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** clz_s64_m_tied1: ++** clz z0\.d, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (clz_s64_m_tied1, svuint64_t, svint64_t, ++ z0 = svclz_s64_m (z0, p0, z4), ++ z0 = svclz_m (z0, p0, z4)) ++ ++/* ++** clz_s64_m_untied: ++** movprfx z0, z1 ++** clz z0\.d, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (clz_s64_m_untied, svuint64_t, svint64_t, ++ z0 = svclz_s64_m (z1, p0, z4), ++ z0 = svclz_m (z1, p0, z4)) ++ ++/* ++** clz_s64_z: ++** movprfx z0\.d, p0/z, z4\.d ++** clz z0\.d, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (clz_s64_z, svuint64_t, svint64_t, ++ z0 = svclz_s64_z (p0, z4), ++ z0 = svclz_z (p0, z4)) ++ ++/* ++** clz_s64_x: ++** clz z0\.d, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (clz_s64_x, svuint64_t, svint64_t, ++ z0 = svclz_s64_x (p0, z4), ++ z0 = svclz_x (p0, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_s8.c +new file mode 100644 +index 000000000..642d298c8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_s8.c +@@ -0,0 +1,41 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** clz_s8_m_tied1: ++** clz z0\.b, p0/m, z4\.b ++** ret ++*/ ++TEST_DUAL_Z (clz_s8_m_tied1, svuint8_t, svint8_t, ++ z0 = svclz_s8_m (z0, p0, z4), ++ z0 = svclz_m (z0, p0, z4)) ++ ++/* ++** clz_s8_m_untied: ++** movprfx z0, z1 ++** clz z0\.b, p0/m, z4\.b ++** ret ++*/ ++TEST_DUAL_Z (clz_s8_m_untied, svuint8_t, svint8_t, ++ z0 = svclz_s8_m (z1, p0, z4), ++ z0 = svclz_m (z1, p0, z4)) ++ ++/* ++** clz_s8_z: ++** movprfx z0\.b, p0/z, z4\.b ++** clz z0\.b, p0/m, z4\.b ++** ret ++*/ ++TEST_DUAL_Z (clz_s8_z, svuint8_t, svint8_t, ++ z0 = svclz_s8_z (p0, z4), ++ z0 = svclz_z (p0, z4)) ++ ++/* ++** clz_s8_x: ++** clz z0\.b, p0/m, z4\.b ++** ret ++*/ ++TEST_DUAL_Z (clz_s8_x, svuint8_t, svint8_t, ++ z0 = svclz_s8_x (p0, z4), ++ z0 = svclz_x (p0, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_u16.c +new file mode 100644 +index 000000000..f08723017 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_u16.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** clz_u16_m_tied12: ++** clz z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (clz_u16_m_tied12, svuint16_t, ++ z0 = svclz_u16_m (z0, p0, z0), ++ z0 = svclz_m (z0, p0, z0)) ++ ++/* ++** clz_u16_m_tied1: ++** clz z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (clz_u16_m_tied1, svuint16_t, ++ z0 = svclz_u16_m (z0, p0, z1), ++ z0 = svclz_m (z0, p0, z1)) ++ ++/* ++** clz_u16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** clz z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (clz_u16_m_tied2, svuint16_t, ++ z0 = svclz_u16_m (z1, p0, z0), ++ z0 = svclz_m (z1, p0, z0)) ++ ++/* ++** clz_u16_m_untied: ++** movprfx z0, z2 ++** clz z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (clz_u16_m_untied, svuint16_t, ++ z0 = svclz_u16_m (z2, p0, z1), ++ z0 = svclz_m (z2, p0, z1)) ++ ++/* ++** clz_u16_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.h, p0/z, \1\.h ++** clz z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (clz_u16_z_tied1, svuint16_t, ++ z0 = svclz_u16_z (p0, z0), ++ z0 = svclz_z (p0, z0)) ++ ++/* ++** clz_u16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** clz z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (clz_u16_z_untied, svuint16_t, ++ z0 = svclz_u16_z (p0, z1), ++ z0 = svclz_z (p0, z1)) ++ ++/* ++** clz_u16_x_tied1: ++** clz z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (clz_u16_x_tied1, svuint16_t, ++ z0 = svclz_u16_x (p0, z0), ++ z0 = svclz_x (p0, z0)) ++ ++/* ++** clz_u16_x_untied: ++** clz z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (clz_u16_x_untied, svuint16_t, ++ z0 = svclz_u16_x (p0, z1), ++ z0 = svclz_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_u32.c +new file mode 100644 +index 000000000..e00424131 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_u32.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** clz_u32_m_tied12: ++** clz z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (clz_u32_m_tied12, svuint32_t, ++ z0 = svclz_u32_m (z0, p0, z0), ++ z0 = svclz_m (z0, p0, z0)) ++ ++/* ++** clz_u32_m_tied1: ++** clz z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (clz_u32_m_tied1, svuint32_t, ++ z0 = svclz_u32_m (z0, p0, z1), ++ z0 = svclz_m (z0, p0, z1)) ++ ++/* ++** clz_u32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** clz z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (clz_u32_m_tied2, svuint32_t, ++ z0 = svclz_u32_m (z1, p0, z0), ++ z0 = svclz_m (z1, p0, z0)) ++ ++/* ++** clz_u32_m_untied: ++** movprfx z0, z2 ++** clz z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (clz_u32_m_untied, svuint32_t, ++ z0 = svclz_u32_m (z2, p0, z1), ++ z0 = svclz_m (z2, p0, z1)) ++ ++/* ++** clz_u32_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, \1\.s ++** clz z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (clz_u32_z_tied1, svuint32_t, ++ z0 = svclz_u32_z (p0, z0), ++ z0 = svclz_z (p0, z0)) ++ ++/* ++** clz_u32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** clz z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (clz_u32_z_untied, svuint32_t, ++ z0 = svclz_u32_z (p0, z1), ++ z0 = svclz_z (p0, z1)) ++ ++/* ++** clz_u32_x_tied1: ++** clz z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (clz_u32_x_tied1, svuint32_t, ++ z0 = svclz_u32_x (p0, z0), ++ z0 = svclz_x (p0, z0)) ++ ++/* ++** clz_u32_x_untied: ++** clz z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (clz_u32_x_untied, svuint32_t, ++ z0 = svclz_u32_x (p0, z1), ++ z0 = svclz_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_u64.c +new file mode 100644 +index 000000000..e879e1b9a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_u64.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** clz_u64_m_tied12: ++** clz z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (clz_u64_m_tied12, svuint64_t, ++ z0 = svclz_u64_m (z0, p0, z0), ++ z0 = svclz_m (z0, p0, z0)) ++ ++/* ++** clz_u64_m_tied1: ++** clz z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (clz_u64_m_tied1, svuint64_t, ++ z0 = svclz_u64_m (z0, p0, z1), ++ z0 = svclz_m (z0, p0, z1)) ++ ++/* ++** clz_u64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** clz z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (clz_u64_m_tied2, svuint64_t, ++ z0 = svclz_u64_m (z1, p0, z0), ++ z0 = svclz_m (z1, p0, z0)) ++ ++/* ++** clz_u64_m_untied: ++** movprfx z0, z2 ++** clz z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (clz_u64_m_untied, svuint64_t, ++ z0 = svclz_u64_m (z2, p0, z1), ++ z0 = svclz_m (z2, p0, z1)) ++ ++/* ++** clz_u64_z_tied1: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, \1 ++** clz z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (clz_u64_z_tied1, svuint64_t, ++ z0 = svclz_u64_z (p0, z0), ++ z0 = svclz_z (p0, z0)) ++ ++/* ++** clz_u64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** clz z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (clz_u64_z_untied, svuint64_t, ++ z0 = svclz_u64_z (p0, z1), ++ z0 = svclz_z (p0, z1)) ++ ++/* ++** clz_u64_x_tied1: ++** clz z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (clz_u64_x_tied1, svuint64_t, ++ z0 = svclz_u64_x (p0, z0), ++ z0 = svclz_x (p0, z0)) ++ ++/* ++** clz_u64_x_untied: ++** clz z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (clz_u64_x_untied, svuint64_t, ++ z0 = svclz_u64_x (p0, z1), ++ z0 = svclz_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_u8.c +new file mode 100644 +index 000000000..ce6cb8f45 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_u8.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** clz_u8_m_tied12: ++** clz z0\.b, p0/m, z0\.b ++** ret ++*/ ++TEST_UNIFORM_Z (clz_u8_m_tied12, svuint8_t, ++ z0 = svclz_u8_m (z0, p0, z0), ++ z0 = svclz_m (z0, p0, z0)) ++ ++/* ++** clz_u8_m_tied1: ++** clz z0\.b, p0/m, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (clz_u8_m_tied1, svuint8_t, ++ z0 = svclz_u8_m (z0, p0, z1), ++ z0 = svclz_m (z0, p0, z1)) ++ ++/* ++** clz_u8_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** clz z0\.b, p0/m, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (clz_u8_m_tied2, svuint8_t, ++ z0 = svclz_u8_m (z1, p0, z0), ++ z0 = svclz_m (z1, p0, z0)) ++ ++/* ++** clz_u8_m_untied: ++** movprfx z0, z2 ++** clz z0\.b, p0/m, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (clz_u8_m_untied, svuint8_t, ++ z0 = svclz_u8_m (z2, p0, z1), ++ z0 = svclz_m (z2, p0, z1)) ++ ++/* ++** clz_u8_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.b, p0/z, \1\.b ++** clz z0\.b, p0/m, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (clz_u8_z_tied1, svuint8_t, ++ z0 = svclz_u8_z (p0, z0), ++ z0 = svclz_z (p0, z0)) ++ ++/* ++** clz_u8_z_untied: ++** movprfx z0\.b, p0/z, z1\.b ++** clz z0\.b, p0/m, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (clz_u8_z_untied, svuint8_t, ++ z0 = svclz_u8_z (p0, z1), ++ z0 = svclz_z (p0, z1)) ++ ++/* ++** clz_u8_x_tied1: ++** clz z0\.b, p0/m, z0\.b ++** ret ++*/ ++TEST_UNIFORM_Z (clz_u8_x_tied1, svuint8_t, ++ z0 = svclz_u8_x (p0, z0), ++ z0 = svclz_x (p0, z0)) ++ ++/* ++** clz_u8_x_untied: ++** clz z0\.b, p0/m, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (clz_u8_x_untied, svuint8_t, ++ z0 = svclz_u8_x (p0, z1), ++ z0 = svclz_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmla_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmla_f16.c +new file mode 100644 +index 000000000..3bf44a59f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmla_f16.c +@@ -0,0 +1,675 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmla_0_f16_m_tied1: ++** fcmla z0\.h, p0/m, z1\.h, z2\.h, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_0_f16_m_tied1, svfloat16_t, ++ z0 = svcmla_f16_m (p0, z0, z1, z2, 0), ++ z0 = svcmla_m (p0, z0, z1, z2, 0)) ++ ++/* ++** cmla_0_f16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcmla z0\.h, p0/m, \1\.h, z2\.h, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_0_f16_m_tied2, svfloat16_t, ++ z0 = svcmla_f16_m (p0, z1, z0, z2, 0), ++ z0 = svcmla_m (p0, z1, z0, z2, 0)) ++ ++/* ++** cmla_0_f16_m_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcmla z0\.h, p0/m, z2\.h, \1\.h, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_0_f16_m_tied3, svfloat16_t, ++ z0 = svcmla_f16_m (p0, z1, z2, z0, 0), ++ z0 = svcmla_m (p0, z1, z2, z0, 0)) ++ ++/* ++** cmla_0_f16_m_untied: ++** movprfx z0, z1 ++** fcmla z0\.h, p0/m, z2\.h, z3\.h, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_0_f16_m_untied, svfloat16_t, ++ z0 = svcmla_f16_m (p0, z1, z2, z3, 0), ++ z0 = svcmla_m (p0, z1, z2, z3, 0)) ++ ++/* ++** cmla_90_f16_m_tied1: ++** fcmla z0\.h, p0/m, z1\.h, z2\.h, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_90_f16_m_tied1, svfloat16_t, ++ z0 = svcmla_f16_m (p0, z0, z1, z2, 90), ++ z0 = svcmla_m (p0, z0, z1, z2, 90)) ++ ++/* ++** cmla_90_f16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcmla z0\.h, p0/m, \1\.h, z2\.h, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_90_f16_m_tied2, svfloat16_t, ++ z0 = svcmla_f16_m (p0, z1, z0, z2, 90), ++ z0 = svcmla_m (p0, z1, z0, z2, 90)) ++ ++/* ++** cmla_90_f16_m_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcmla z0\.h, p0/m, z2\.h, \1\.h, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_90_f16_m_tied3, svfloat16_t, ++ z0 = svcmla_f16_m (p0, z1, z2, z0, 90), ++ z0 = svcmla_m (p0, z1, z2, z0, 90)) ++ ++/* ++** cmla_90_f16_m_untied: ++** movprfx z0, z1 ++** fcmla z0\.h, p0/m, z2\.h, z3\.h, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_90_f16_m_untied, svfloat16_t, ++ z0 = svcmla_f16_m (p0, z1, z2, z3, 90), ++ z0 = svcmla_m (p0, z1, z2, z3, 90)) ++ ++/* ++** cmla_180_f16_m_tied1: ++** fcmla z0\.h, p0/m, z1\.h, z2\.h, #180 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_180_f16_m_tied1, svfloat16_t, ++ z0 = svcmla_f16_m (p0, z0, z1, z2, 180), ++ z0 = svcmla_m (p0, z0, z1, z2, 180)) ++ ++/* ++** cmla_180_f16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcmla z0\.h, p0/m, \1\.h, z2\.h, #180 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_180_f16_m_tied2, svfloat16_t, ++ z0 = svcmla_f16_m (p0, z1, z0, z2, 180), ++ z0 = svcmla_m (p0, z1, z0, z2, 180)) ++ ++/* ++** cmla_180_f16_m_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcmla z0\.h, p0/m, z2\.h, \1\.h, #180 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_180_f16_m_tied3, svfloat16_t, ++ z0 = svcmla_f16_m (p0, z1, z2, z0, 180), ++ z0 = svcmla_m (p0, z1, z2, z0, 180)) ++ ++/* ++** cmla_180_f16_m_untied: ++** movprfx z0, z1 ++** fcmla z0\.h, p0/m, z2\.h, z3\.h, #180 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_180_f16_m_untied, svfloat16_t, ++ z0 = svcmla_f16_m (p0, z1, z2, z3, 180), ++ z0 = svcmla_m (p0, z1, z2, z3, 180)) ++ ++/* ++** cmla_270_f16_m_tied1: ++** fcmla z0\.h, p0/m, z1\.h, z2\.h, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_270_f16_m_tied1, svfloat16_t, ++ z0 = svcmla_f16_m (p0, z0, z1, z2, 270), ++ z0 = svcmla_m (p0, z0, z1, z2, 270)) ++ ++/* ++** cmla_270_f16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcmla z0\.h, p0/m, \1\.h, z2\.h, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_270_f16_m_tied2, svfloat16_t, ++ z0 = svcmla_f16_m (p0, z1, z0, z2, 270), ++ z0 = svcmla_m (p0, z1, z0, z2, 270)) ++ ++/* ++** cmla_270_f16_m_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcmla z0\.h, p0/m, z2\.h, \1\.h, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_270_f16_m_tied3, svfloat16_t, ++ z0 = svcmla_f16_m (p0, z1, z2, z0, 270), ++ z0 = svcmla_m (p0, z1, z2, z0, 270)) ++ ++/* ++** cmla_270_f16_m_untied: ++** movprfx z0, z1 ++** fcmla z0\.h, p0/m, z2\.h, z3\.h, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_270_f16_m_untied, svfloat16_t, ++ z0 = svcmla_f16_m (p0, z1, z2, z3, 270), ++ z0 = svcmla_m (p0, z1, z2, z3, 270)) ++ ++/* ++** cmla_0_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fcmla z0\.h, p0/m, z1\.h, z2\.h, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_0_f16_z_tied1, svfloat16_t, ++ z0 = svcmla_f16_z (p0, z0, z1, z2, 0), ++ z0 = svcmla_z (p0, z0, z1, z2, 0)) ++ ++/* ++** cmla_0_f16_z_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.h, p0/z, z1\.h ++** fcmla z0\.h, p0/m, \1\.h, z2\.h, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_0_f16_z_tied2, svfloat16_t, ++ z0 = svcmla_f16_z (p0, z1, z0, z2, 0), ++ z0 = svcmla_z (p0, z1, z0, z2, 0)) ++ ++/* ++** cmla_0_f16_z_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.h, p0/z, z1\.h ++** fcmla z0\.h, p0/m, z2\.h, \1\.h, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_0_f16_z_tied3, svfloat16_t, ++ z0 = svcmla_f16_z (p0, z1, z2, z0, 0), ++ z0 = svcmla_z (p0, z1, z2, z0, 0)) ++ ++/* ++** cmla_0_f16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** fcmla z0\.h, p0/m, z2\.h, z3\.h, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_0_f16_z_untied, svfloat16_t, ++ z0 = svcmla_f16_z (p0, z1, z2, z3, 0), ++ z0 = svcmla_z (p0, z1, z2, z3, 0)) ++ ++/* ++** cmla_90_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fcmla z0\.h, p0/m, z1\.h, z2\.h, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_90_f16_z_tied1, svfloat16_t, ++ z0 = svcmla_f16_z (p0, z0, z1, z2, 90), ++ z0 = svcmla_z (p0, z0, z1, z2, 90)) ++ ++/* ++** cmla_90_f16_z_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.h, p0/z, z1\.h ++** fcmla z0\.h, p0/m, \1\.h, z2\.h, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_90_f16_z_tied2, svfloat16_t, ++ z0 = svcmla_f16_z (p0, z1, z0, z2, 90), ++ z0 = svcmla_z (p0, z1, z0, z2, 90)) ++ ++/* ++** cmla_90_f16_z_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.h, p0/z, z1\.h ++** fcmla z0\.h, p0/m, z2\.h, \1\.h, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_90_f16_z_tied3, svfloat16_t, ++ z0 = svcmla_f16_z (p0, z1, z2, z0, 90), ++ z0 = svcmla_z (p0, z1, z2, z0, 90)) ++ ++/* ++** cmla_90_f16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** fcmla z0\.h, p0/m, z2\.h, z3\.h, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_90_f16_z_untied, svfloat16_t, ++ z0 = svcmla_f16_z (p0, z1, z2, z3, 90), ++ z0 = svcmla_z (p0, z1, z2, z3, 90)) ++ ++/* ++** cmla_180_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fcmla z0\.h, p0/m, z1\.h, z2\.h, #180 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_180_f16_z_tied1, svfloat16_t, ++ z0 = svcmla_f16_z (p0, z0, z1, z2, 180), ++ z0 = svcmla_z (p0, z0, z1, z2, 180)) ++ ++/* ++** cmla_180_f16_z_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.h, p0/z, z1\.h ++** fcmla z0\.h, p0/m, \1\.h, z2\.h, #180 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_180_f16_z_tied2, svfloat16_t, ++ z0 = svcmla_f16_z (p0, z1, z0, z2, 180), ++ z0 = svcmla_z (p0, z1, z0, z2, 180)) ++ ++/* ++** cmla_180_f16_z_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.h, p0/z, z1\.h ++** fcmla z0\.h, p0/m, z2\.h, \1\.h, #180 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_180_f16_z_tied3, svfloat16_t, ++ z0 = svcmla_f16_z (p0, z1, z2, z0, 180), ++ z0 = svcmla_z (p0, z1, z2, z0, 180)) ++ ++/* ++** cmla_180_f16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** fcmla z0\.h, p0/m, z2\.h, z3\.h, #180 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_180_f16_z_untied, svfloat16_t, ++ z0 = svcmla_f16_z (p0, z1, z2, z3, 180), ++ z0 = svcmla_z (p0, z1, z2, z3, 180)) ++ ++/* ++** cmla_270_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fcmla z0\.h, p0/m, z1\.h, z2\.h, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_270_f16_z_tied1, svfloat16_t, ++ z0 = svcmla_f16_z (p0, z0, z1, z2, 270), ++ z0 = svcmla_z (p0, z0, z1, z2, 270)) ++ ++/* ++** cmla_270_f16_z_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.h, p0/z, z1\.h ++** fcmla z0\.h, p0/m, \1\.h, z2\.h, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_270_f16_z_tied2, svfloat16_t, ++ z0 = svcmla_f16_z (p0, z1, z0, z2, 270), ++ z0 = svcmla_z (p0, z1, z0, z2, 270)) ++ ++/* ++** cmla_270_f16_z_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.h, p0/z, z1\.h ++** fcmla z0\.h, p0/m, z2\.h, \1\.h, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_270_f16_z_tied3, svfloat16_t, ++ z0 = svcmla_f16_z (p0, z1, z2, z0, 270), ++ z0 = svcmla_z (p0, z1, z2, z0, 270)) ++ ++/* ++** cmla_270_f16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** fcmla z0\.h, p0/m, z2\.h, z3\.h, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_270_f16_z_untied, svfloat16_t, ++ z0 = svcmla_f16_z (p0, z1, z2, z3, 270), ++ z0 = svcmla_z (p0, z1, z2, z3, 270)) ++ ++/* ++** cmla_0_f16_x_tied1: ++** fcmla z0\.h, p0/m, z1\.h, z2\.h, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_0_f16_x_tied1, svfloat16_t, ++ z0 = svcmla_f16_x (p0, z0, z1, z2, 0), ++ z0 = svcmla_x (p0, z0, z1, z2, 0)) ++ ++/* ++** cmla_0_f16_x_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcmla z0\.h, p0/m, \1\.h, z2\.h, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_0_f16_x_tied2, svfloat16_t, ++ z0 = svcmla_f16_x (p0, z1, z0, z2, 0), ++ z0 = svcmla_x (p0, z1, z0, z2, 0)) ++ ++/* ++** cmla_0_f16_x_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcmla z0\.h, p0/m, z2\.h, \1\.h, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_0_f16_x_tied3, svfloat16_t, ++ z0 = svcmla_f16_x (p0, z1, z2, z0, 0), ++ z0 = svcmla_x (p0, z1, z2, z0, 0)) ++ ++/* ++** cmla_0_f16_x_untied: ++** movprfx z0, z1 ++** fcmla z0\.h, p0/m, z2\.h, z3\.h, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_0_f16_x_untied, svfloat16_t, ++ z0 = svcmla_f16_x (p0, z1, z2, z3, 0), ++ z0 = svcmla_x (p0, z1, z2, z3, 0)) ++ ++/* ++** cmla_90_f16_x_tied1: ++** fcmla z0\.h, p0/m, z1\.h, z2\.h, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_90_f16_x_tied1, svfloat16_t, ++ z0 = svcmla_f16_x (p0, z0, z1, z2, 90), ++ z0 = svcmla_x (p0, z0, z1, z2, 90)) ++ ++/* ++** cmla_90_f16_x_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcmla z0\.h, p0/m, \1\.h, z2\.h, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_90_f16_x_tied2, svfloat16_t, ++ z0 = svcmla_f16_x (p0, z1, z0, z2, 90), ++ z0 = svcmla_x (p0, z1, z0, z2, 90)) ++ ++/* ++** cmla_90_f16_x_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcmla z0\.h, p0/m, z2\.h, \1\.h, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_90_f16_x_tied3, svfloat16_t, ++ z0 = svcmla_f16_x (p0, z1, z2, z0, 90), ++ z0 = svcmla_x (p0, z1, z2, z0, 90)) ++ ++/* ++** cmla_90_f16_x_untied: ++** movprfx z0, z1 ++** fcmla z0\.h, p0/m, z2\.h, z3\.h, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_90_f16_x_untied, svfloat16_t, ++ z0 = svcmla_f16_x (p0, z1, z2, z3, 90), ++ z0 = svcmla_x (p0, z1, z2, z3, 90)) ++ ++/* ++** cmla_180_f16_x_tied1: ++** fcmla z0\.h, p0/m, z1\.h, z2\.h, #180 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_180_f16_x_tied1, svfloat16_t, ++ z0 = svcmla_f16_x (p0, z0, z1, z2, 180), ++ z0 = svcmla_x (p0, z0, z1, z2, 180)) ++ ++/* ++** cmla_180_f16_x_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcmla z0\.h, p0/m, \1\.h, z2\.h, #180 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_180_f16_x_tied2, svfloat16_t, ++ z0 = svcmla_f16_x (p0, z1, z0, z2, 180), ++ z0 = svcmla_x (p0, z1, z0, z2, 180)) ++ ++/* ++** cmla_180_f16_x_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcmla z0\.h, p0/m, z2\.h, \1\.h, #180 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_180_f16_x_tied3, svfloat16_t, ++ z0 = svcmla_f16_x (p0, z1, z2, z0, 180), ++ z0 = svcmla_x (p0, z1, z2, z0, 180)) ++ ++/* ++** cmla_180_f16_x_untied: ++** movprfx z0, z1 ++** fcmla z0\.h, p0/m, z2\.h, z3\.h, #180 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_180_f16_x_untied, svfloat16_t, ++ z0 = svcmla_f16_x (p0, z1, z2, z3, 180), ++ z0 = svcmla_x (p0, z1, z2, z3, 180)) ++ ++/* ++** cmla_270_f16_x_tied1: ++** fcmla z0\.h, p0/m, z1\.h, z2\.h, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_270_f16_x_tied1, svfloat16_t, ++ z0 = svcmla_f16_x (p0, z0, z1, z2, 270), ++ z0 = svcmla_x (p0, z0, z1, z2, 270)) ++ ++/* ++** cmla_270_f16_x_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcmla z0\.h, p0/m, \1\.h, z2\.h, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_270_f16_x_tied2, svfloat16_t, ++ z0 = svcmla_f16_x (p0, z1, z0, z2, 270), ++ z0 = svcmla_x (p0, z1, z0, z2, 270)) ++ ++/* ++** cmla_270_f16_x_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcmla z0\.h, p0/m, z2\.h, \1\.h, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_270_f16_x_tied3, svfloat16_t, ++ z0 = svcmla_f16_x (p0, z1, z2, z0, 270), ++ z0 = svcmla_x (p0, z1, z2, z0, 270)) ++ ++/* ++** cmla_270_f16_x_untied: ++** movprfx z0, z1 ++** fcmla z0\.h, p0/m, z2\.h, z3\.h, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_270_f16_x_untied, svfloat16_t, ++ z0 = svcmla_f16_x (p0, z1, z2, z3, 270), ++ z0 = svcmla_x (p0, z1, z2, z3, 270)) ++ ++/* ++** ptrue_cmla_0_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cmla_0_f16_x_tied1, svfloat16_t, ++ z0 = svcmla_f16_x (svptrue_b16 (), z0, z1, z2, 0), ++ z0 = svcmla_x (svptrue_b16 (), z0, z1, z2, 0)) ++ ++/* ++** ptrue_cmla_0_f16_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cmla_0_f16_x_tied2, svfloat16_t, ++ z0 = svcmla_f16_x (svptrue_b16 (), z1, z0, z2, 0), ++ z0 = svcmla_x (svptrue_b16 (), z1, z0, z2, 0)) ++ ++/* ++** ptrue_cmla_0_f16_x_tied3: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cmla_0_f16_x_tied3, svfloat16_t, ++ z0 = svcmla_f16_x (svptrue_b16 (), z1, z2, z0, 0), ++ z0 = svcmla_x (svptrue_b16 (), z1, z2, z0, 0)) ++ ++/* ++** ptrue_cmla_0_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cmla_0_f16_x_untied, svfloat16_t, ++ z0 = svcmla_f16_x (svptrue_b16 (), z1, z2, z3, 0), ++ z0 = svcmla_x (svptrue_b16 (), z1, z2, z3, 0)) ++ ++/* ++** ptrue_cmla_90_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cmla_90_f16_x_tied1, svfloat16_t, ++ z0 = svcmla_f16_x (svptrue_b16 (), z0, z1, z2, 90), ++ z0 = svcmla_x (svptrue_b16 (), z0, z1, z2, 90)) ++ ++/* ++** ptrue_cmla_90_f16_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cmla_90_f16_x_tied2, svfloat16_t, ++ z0 = svcmla_f16_x (svptrue_b16 (), z1, z0, z2, 90), ++ z0 = svcmla_x (svptrue_b16 (), z1, z0, z2, 90)) ++ ++/* ++** ptrue_cmla_90_f16_x_tied3: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cmla_90_f16_x_tied3, svfloat16_t, ++ z0 = svcmla_f16_x (svptrue_b16 (), z1, z2, z0, 90), ++ z0 = svcmla_x (svptrue_b16 (), z1, z2, z0, 90)) ++ ++/* ++** ptrue_cmla_90_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cmla_90_f16_x_untied, svfloat16_t, ++ z0 = svcmla_f16_x (svptrue_b16 (), z1, z2, z3, 90), ++ z0 = svcmla_x (svptrue_b16 (), z1, z2, z3, 90)) ++ ++/* ++** ptrue_cmla_180_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cmla_180_f16_x_tied1, svfloat16_t, ++ z0 = svcmla_f16_x (svptrue_b16 (), z0, z1, z2, 180), ++ z0 = svcmla_x (svptrue_b16 (), z0, z1, z2, 180)) ++ ++/* ++** ptrue_cmla_180_f16_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cmla_180_f16_x_tied2, svfloat16_t, ++ z0 = svcmla_f16_x (svptrue_b16 (), z1, z0, z2, 180), ++ z0 = svcmla_x (svptrue_b16 (), z1, z0, z2, 180)) ++ ++/* ++** ptrue_cmla_180_f16_x_tied3: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cmla_180_f16_x_tied3, svfloat16_t, ++ z0 = svcmla_f16_x (svptrue_b16 (), z1, z2, z0, 180), ++ z0 = svcmla_x (svptrue_b16 (), z1, z2, z0, 180)) ++ ++/* ++** ptrue_cmla_180_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cmla_180_f16_x_untied, svfloat16_t, ++ z0 = svcmla_f16_x (svptrue_b16 (), z1, z2, z3, 180), ++ z0 = svcmla_x (svptrue_b16 (), z1, z2, z3, 180)) ++ ++/* ++** ptrue_cmla_270_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cmla_270_f16_x_tied1, svfloat16_t, ++ z0 = svcmla_f16_x (svptrue_b16 (), z0, z1, z2, 270), ++ z0 = svcmla_x (svptrue_b16 (), z0, z1, z2, 270)) ++ ++/* ++** ptrue_cmla_270_f16_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cmla_270_f16_x_tied2, svfloat16_t, ++ z0 = svcmla_f16_x (svptrue_b16 (), z1, z0, z2, 270), ++ z0 = svcmla_x (svptrue_b16 (), z1, z0, z2, 270)) ++ ++/* ++** ptrue_cmla_270_f16_x_tied3: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cmla_270_f16_x_tied3, svfloat16_t, ++ z0 = svcmla_f16_x (svptrue_b16 (), z1, z2, z0, 270), ++ z0 = svcmla_x (svptrue_b16 (), z1, z2, z0, 270)) ++ ++/* ++** ptrue_cmla_270_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cmla_270_f16_x_untied, svfloat16_t, ++ z0 = svcmla_f16_x (svptrue_b16 (), z1, z2, z3, 270), ++ z0 = svcmla_x (svptrue_b16 (), z1, z2, z3, 270)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmla_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmla_f32.c +new file mode 100644 +index 000000000..b266738b2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmla_f32.c +@@ -0,0 +1,675 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmla_0_f32_m_tied1: ++** fcmla z0\.s, p0/m, z1\.s, z2\.s, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_0_f32_m_tied1, svfloat32_t, ++ z0 = svcmla_f32_m (p0, z0, z1, z2, 0), ++ z0 = svcmla_m (p0, z0, z1, z2, 0)) ++ ++/* ++** cmla_0_f32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcmla z0\.s, p0/m, \1\.s, z2\.s, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_0_f32_m_tied2, svfloat32_t, ++ z0 = svcmla_f32_m (p0, z1, z0, z2, 0), ++ z0 = svcmla_m (p0, z1, z0, z2, 0)) ++ ++/* ++** cmla_0_f32_m_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcmla z0\.s, p0/m, z2\.s, \1\.s, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_0_f32_m_tied3, svfloat32_t, ++ z0 = svcmla_f32_m (p0, z1, z2, z0, 0), ++ z0 = svcmla_m (p0, z1, z2, z0, 0)) ++ ++/* ++** cmla_0_f32_m_untied: ++** movprfx z0, z1 ++** fcmla z0\.s, p0/m, z2\.s, z3\.s, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_0_f32_m_untied, svfloat32_t, ++ z0 = svcmla_f32_m (p0, z1, z2, z3, 0), ++ z0 = svcmla_m (p0, z1, z2, z3, 0)) ++ ++/* ++** cmla_90_f32_m_tied1: ++** fcmla z0\.s, p0/m, z1\.s, z2\.s, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_90_f32_m_tied1, svfloat32_t, ++ z0 = svcmla_f32_m (p0, z0, z1, z2, 90), ++ z0 = svcmla_m (p0, z0, z1, z2, 90)) ++ ++/* ++** cmla_90_f32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcmla z0\.s, p0/m, \1\.s, z2\.s, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_90_f32_m_tied2, svfloat32_t, ++ z0 = svcmla_f32_m (p0, z1, z0, z2, 90), ++ z0 = svcmla_m (p0, z1, z0, z2, 90)) ++ ++/* ++** cmla_90_f32_m_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcmla z0\.s, p0/m, z2\.s, \1\.s, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_90_f32_m_tied3, svfloat32_t, ++ z0 = svcmla_f32_m (p0, z1, z2, z0, 90), ++ z0 = svcmla_m (p0, z1, z2, z0, 90)) ++ ++/* ++** cmla_90_f32_m_untied: ++** movprfx z0, z1 ++** fcmla z0\.s, p0/m, z2\.s, z3\.s, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_90_f32_m_untied, svfloat32_t, ++ z0 = svcmla_f32_m (p0, z1, z2, z3, 90), ++ z0 = svcmla_m (p0, z1, z2, z3, 90)) ++ ++/* ++** cmla_180_f32_m_tied1: ++** fcmla z0\.s, p0/m, z1\.s, z2\.s, #180 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_180_f32_m_tied1, svfloat32_t, ++ z0 = svcmla_f32_m (p0, z0, z1, z2, 180), ++ z0 = svcmla_m (p0, z0, z1, z2, 180)) ++ ++/* ++** cmla_180_f32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcmla z0\.s, p0/m, \1\.s, z2\.s, #180 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_180_f32_m_tied2, svfloat32_t, ++ z0 = svcmla_f32_m (p0, z1, z0, z2, 180), ++ z0 = svcmla_m (p0, z1, z0, z2, 180)) ++ ++/* ++** cmla_180_f32_m_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcmla z0\.s, p0/m, z2\.s, \1\.s, #180 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_180_f32_m_tied3, svfloat32_t, ++ z0 = svcmla_f32_m (p0, z1, z2, z0, 180), ++ z0 = svcmla_m (p0, z1, z2, z0, 180)) ++ ++/* ++** cmla_180_f32_m_untied: ++** movprfx z0, z1 ++** fcmla z0\.s, p0/m, z2\.s, z3\.s, #180 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_180_f32_m_untied, svfloat32_t, ++ z0 = svcmla_f32_m (p0, z1, z2, z3, 180), ++ z0 = svcmla_m (p0, z1, z2, z3, 180)) ++ ++/* ++** cmla_270_f32_m_tied1: ++** fcmla z0\.s, p0/m, z1\.s, z2\.s, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_270_f32_m_tied1, svfloat32_t, ++ z0 = svcmla_f32_m (p0, z0, z1, z2, 270), ++ z0 = svcmla_m (p0, z0, z1, z2, 270)) ++ ++/* ++** cmla_270_f32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcmla z0\.s, p0/m, \1\.s, z2\.s, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_270_f32_m_tied2, svfloat32_t, ++ z0 = svcmla_f32_m (p0, z1, z0, z2, 270), ++ z0 = svcmla_m (p0, z1, z0, z2, 270)) ++ ++/* ++** cmla_270_f32_m_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcmla z0\.s, p0/m, z2\.s, \1\.s, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_270_f32_m_tied3, svfloat32_t, ++ z0 = svcmla_f32_m (p0, z1, z2, z0, 270), ++ z0 = svcmla_m (p0, z1, z2, z0, 270)) ++ ++/* ++** cmla_270_f32_m_untied: ++** movprfx z0, z1 ++** fcmla z0\.s, p0/m, z2\.s, z3\.s, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_270_f32_m_untied, svfloat32_t, ++ z0 = svcmla_f32_m (p0, z1, z2, z3, 270), ++ z0 = svcmla_m (p0, z1, z2, z3, 270)) ++ ++/* ++** cmla_0_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fcmla z0\.s, p0/m, z1\.s, z2\.s, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_0_f32_z_tied1, svfloat32_t, ++ z0 = svcmla_f32_z (p0, z0, z1, z2, 0), ++ z0 = svcmla_z (p0, z0, z1, z2, 0)) ++ ++/* ++** cmla_0_f32_z_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, z1\.s ++** fcmla z0\.s, p0/m, \1\.s, z2\.s, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_0_f32_z_tied2, svfloat32_t, ++ z0 = svcmla_f32_z (p0, z1, z0, z2, 0), ++ z0 = svcmla_z (p0, z1, z0, z2, 0)) ++ ++/* ++** cmla_0_f32_z_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, z1\.s ++** fcmla z0\.s, p0/m, z2\.s, \1\.s, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_0_f32_z_tied3, svfloat32_t, ++ z0 = svcmla_f32_z (p0, z1, z2, z0, 0), ++ z0 = svcmla_z (p0, z1, z2, z0, 0)) ++ ++/* ++** cmla_0_f32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** fcmla z0\.s, p0/m, z2\.s, z3\.s, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_0_f32_z_untied, svfloat32_t, ++ z0 = svcmla_f32_z (p0, z1, z2, z3, 0), ++ z0 = svcmla_z (p0, z1, z2, z3, 0)) ++ ++/* ++** cmla_90_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fcmla z0\.s, p0/m, z1\.s, z2\.s, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_90_f32_z_tied1, svfloat32_t, ++ z0 = svcmla_f32_z (p0, z0, z1, z2, 90), ++ z0 = svcmla_z (p0, z0, z1, z2, 90)) ++ ++/* ++** cmla_90_f32_z_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, z1\.s ++** fcmla z0\.s, p0/m, \1\.s, z2\.s, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_90_f32_z_tied2, svfloat32_t, ++ z0 = svcmla_f32_z (p0, z1, z0, z2, 90), ++ z0 = svcmla_z (p0, z1, z0, z2, 90)) ++ ++/* ++** cmla_90_f32_z_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, z1\.s ++** fcmla z0\.s, p0/m, z2\.s, \1\.s, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_90_f32_z_tied3, svfloat32_t, ++ z0 = svcmla_f32_z (p0, z1, z2, z0, 90), ++ z0 = svcmla_z (p0, z1, z2, z0, 90)) ++ ++/* ++** cmla_90_f32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** fcmla z0\.s, p0/m, z2\.s, z3\.s, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_90_f32_z_untied, svfloat32_t, ++ z0 = svcmla_f32_z (p0, z1, z2, z3, 90), ++ z0 = svcmla_z (p0, z1, z2, z3, 90)) ++ ++/* ++** cmla_180_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fcmla z0\.s, p0/m, z1\.s, z2\.s, #180 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_180_f32_z_tied1, svfloat32_t, ++ z0 = svcmla_f32_z (p0, z0, z1, z2, 180), ++ z0 = svcmla_z (p0, z0, z1, z2, 180)) ++ ++/* ++** cmla_180_f32_z_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, z1\.s ++** fcmla z0\.s, p0/m, \1\.s, z2\.s, #180 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_180_f32_z_tied2, svfloat32_t, ++ z0 = svcmla_f32_z (p0, z1, z0, z2, 180), ++ z0 = svcmla_z (p0, z1, z0, z2, 180)) ++ ++/* ++** cmla_180_f32_z_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, z1\.s ++** fcmla z0\.s, p0/m, z2\.s, \1\.s, #180 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_180_f32_z_tied3, svfloat32_t, ++ z0 = svcmla_f32_z (p0, z1, z2, z0, 180), ++ z0 = svcmla_z (p0, z1, z2, z0, 180)) ++ ++/* ++** cmla_180_f32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** fcmla z0\.s, p0/m, z2\.s, z3\.s, #180 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_180_f32_z_untied, svfloat32_t, ++ z0 = svcmla_f32_z (p0, z1, z2, z3, 180), ++ z0 = svcmla_z (p0, z1, z2, z3, 180)) ++ ++/* ++** cmla_270_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fcmla z0\.s, p0/m, z1\.s, z2\.s, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_270_f32_z_tied1, svfloat32_t, ++ z0 = svcmla_f32_z (p0, z0, z1, z2, 270), ++ z0 = svcmla_z (p0, z0, z1, z2, 270)) ++ ++/* ++** cmla_270_f32_z_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, z1\.s ++** fcmla z0\.s, p0/m, \1\.s, z2\.s, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_270_f32_z_tied2, svfloat32_t, ++ z0 = svcmla_f32_z (p0, z1, z0, z2, 270), ++ z0 = svcmla_z (p0, z1, z0, z2, 270)) ++ ++/* ++** cmla_270_f32_z_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, z1\.s ++** fcmla z0\.s, p0/m, z2\.s, \1\.s, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_270_f32_z_tied3, svfloat32_t, ++ z0 = svcmla_f32_z (p0, z1, z2, z0, 270), ++ z0 = svcmla_z (p0, z1, z2, z0, 270)) ++ ++/* ++** cmla_270_f32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** fcmla z0\.s, p0/m, z2\.s, z3\.s, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_270_f32_z_untied, svfloat32_t, ++ z0 = svcmla_f32_z (p0, z1, z2, z3, 270), ++ z0 = svcmla_z (p0, z1, z2, z3, 270)) ++ ++/* ++** cmla_0_f32_x_tied1: ++** fcmla z0\.s, p0/m, z1\.s, z2\.s, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_0_f32_x_tied1, svfloat32_t, ++ z0 = svcmla_f32_x (p0, z0, z1, z2, 0), ++ z0 = svcmla_x (p0, z0, z1, z2, 0)) ++ ++/* ++** cmla_0_f32_x_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcmla z0\.s, p0/m, \1\.s, z2\.s, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_0_f32_x_tied2, svfloat32_t, ++ z0 = svcmla_f32_x (p0, z1, z0, z2, 0), ++ z0 = svcmla_x (p0, z1, z0, z2, 0)) ++ ++/* ++** cmla_0_f32_x_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcmla z0\.s, p0/m, z2\.s, \1\.s, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_0_f32_x_tied3, svfloat32_t, ++ z0 = svcmla_f32_x (p0, z1, z2, z0, 0), ++ z0 = svcmla_x (p0, z1, z2, z0, 0)) ++ ++/* ++** cmla_0_f32_x_untied: ++** movprfx z0, z1 ++** fcmla z0\.s, p0/m, z2\.s, z3\.s, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_0_f32_x_untied, svfloat32_t, ++ z0 = svcmla_f32_x (p0, z1, z2, z3, 0), ++ z0 = svcmla_x (p0, z1, z2, z3, 0)) ++ ++/* ++** cmla_90_f32_x_tied1: ++** fcmla z0\.s, p0/m, z1\.s, z2\.s, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_90_f32_x_tied1, svfloat32_t, ++ z0 = svcmla_f32_x (p0, z0, z1, z2, 90), ++ z0 = svcmla_x (p0, z0, z1, z2, 90)) ++ ++/* ++** cmla_90_f32_x_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcmla z0\.s, p0/m, \1\.s, z2\.s, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_90_f32_x_tied2, svfloat32_t, ++ z0 = svcmla_f32_x (p0, z1, z0, z2, 90), ++ z0 = svcmla_x (p0, z1, z0, z2, 90)) ++ ++/* ++** cmla_90_f32_x_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcmla z0\.s, p0/m, z2\.s, \1\.s, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_90_f32_x_tied3, svfloat32_t, ++ z0 = svcmla_f32_x (p0, z1, z2, z0, 90), ++ z0 = svcmla_x (p0, z1, z2, z0, 90)) ++ ++/* ++** cmla_90_f32_x_untied: ++** movprfx z0, z1 ++** fcmla z0\.s, p0/m, z2\.s, z3\.s, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_90_f32_x_untied, svfloat32_t, ++ z0 = svcmla_f32_x (p0, z1, z2, z3, 90), ++ z0 = svcmla_x (p0, z1, z2, z3, 90)) ++ ++/* ++** cmla_180_f32_x_tied1: ++** fcmla z0\.s, p0/m, z1\.s, z2\.s, #180 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_180_f32_x_tied1, svfloat32_t, ++ z0 = svcmla_f32_x (p0, z0, z1, z2, 180), ++ z0 = svcmla_x (p0, z0, z1, z2, 180)) ++ ++/* ++** cmla_180_f32_x_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcmla z0\.s, p0/m, \1\.s, z2\.s, #180 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_180_f32_x_tied2, svfloat32_t, ++ z0 = svcmla_f32_x (p0, z1, z0, z2, 180), ++ z0 = svcmla_x (p0, z1, z0, z2, 180)) ++ ++/* ++** cmla_180_f32_x_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcmla z0\.s, p0/m, z2\.s, \1\.s, #180 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_180_f32_x_tied3, svfloat32_t, ++ z0 = svcmla_f32_x (p0, z1, z2, z0, 180), ++ z0 = svcmla_x (p0, z1, z2, z0, 180)) ++ ++/* ++** cmla_180_f32_x_untied: ++** movprfx z0, z1 ++** fcmla z0\.s, p0/m, z2\.s, z3\.s, #180 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_180_f32_x_untied, svfloat32_t, ++ z0 = svcmla_f32_x (p0, z1, z2, z3, 180), ++ z0 = svcmla_x (p0, z1, z2, z3, 180)) ++ ++/* ++** cmla_270_f32_x_tied1: ++** fcmla z0\.s, p0/m, z1\.s, z2\.s, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_270_f32_x_tied1, svfloat32_t, ++ z0 = svcmla_f32_x (p0, z0, z1, z2, 270), ++ z0 = svcmla_x (p0, z0, z1, z2, 270)) ++ ++/* ++** cmla_270_f32_x_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcmla z0\.s, p0/m, \1\.s, z2\.s, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_270_f32_x_tied2, svfloat32_t, ++ z0 = svcmla_f32_x (p0, z1, z0, z2, 270), ++ z0 = svcmla_x (p0, z1, z0, z2, 270)) ++ ++/* ++** cmla_270_f32_x_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcmla z0\.s, p0/m, z2\.s, \1\.s, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_270_f32_x_tied3, svfloat32_t, ++ z0 = svcmla_f32_x (p0, z1, z2, z0, 270), ++ z0 = svcmla_x (p0, z1, z2, z0, 270)) ++ ++/* ++** cmla_270_f32_x_untied: ++** movprfx z0, z1 ++** fcmla z0\.s, p0/m, z2\.s, z3\.s, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_270_f32_x_untied, svfloat32_t, ++ z0 = svcmla_f32_x (p0, z1, z2, z3, 270), ++ z0 = svcmla_x (p0, z1, z2, z3, 270)) ++ ++/* ++** ptrue_cmla_0_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cmla_0_f32_x_tied1, svfloat32_t, ++ z0 = svcmla_f32_x (svptrue_b32 (), z0, z1, z2, 0), ++ z0 = svcmla_x (svptrue_b32 (), z0, z1, z2, 0)) ++ ++/* ++** ptrue_cmla_0_f32_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cmla_0_f32_x_tied2, svfloat32_t, ++ z0 = svcmla_f32_x (svptrue_b32 (), z1, z0, z2, 0), ++ z0 = svcmla_x (svptrue_b32 (), z1, z0, z2, 0)) ++ ++/* ++** ptrue_cmla_0_f32_x_tied3: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cmla_0_f32_x_tied3, svfloat32_t, ++ z0 = svcmla_f32_x (svptrue_b32 (), z1, z2, z0, 0), ++ z0 = svcmla_x (svptrue_b32 (), z1, z2, z0, 0)) ++ ++/* ++** ptrue_cmla_0_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cmla_0_f32_x_untied, svfloat32_t, ++ z0 = svcmla_f32_x (svptrue_b32 (), z1, z2, z3, 0), ++ z0 = svcmla_x (svptrue_b32 (), z1, z2, z3, 0)) ++ ++/* ++** ptrue_cmla_90_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cmla_90_f32_x_tied1, svfloat32_t, ++ z0 = svcmla_f32_x (svptrue_b32 (), z0, z1, z2, 90), ++ z0 = svcmla_x (svptrue_b32 (), z0, z1, z2, 90)) ++ ++/* ++** ptrue_cmla_90_f32_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cmla_90_f32_x_tied2, svfloat32_t, ++ z0 = svcmla_f32_x (svptrue_b32 (), z1, z0, z2, 90), ++ z0 = svcmla_x (svptrue_b32 (), z1, z0, z2, 90)) ++ ++/* ++** ptrue_cmla_90_f32_x_tied3: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cmla_90_f32_x_tied3, svfloat32_t, ++ z0 = svcmla_f32_x (svptrue_b32 (), z1, z2, z0, 90), ++ z0 = svcmla_x (svptrue_b32 (), z1, z2, z0, 90)) ++ ++/* ++** ptrue_cmla_90_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cmla_90_f32_x_untied, svfloat32_t, ++ z0 = svcmla_f32_x (svptrue_b32 (), z1, z2, z3, 90), ++ z0 = svcmla_x (svptrue_b32 (), z1, z2, z3, 90)) ++ ++/* ++** ptrue_cmla_180_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cmla_180_f32_x_tied1, svfloat32_t, ++ z0 = svcmla_f32_x (svptrue_b32 (), z0, z1, z2, 180), ++ z0 = svcmla_x (svptrue_b32 (), z0, z1, z2, 180)) ++ ++/* ++** ptrue_cmla_180_f32_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cmla_180_f32_x_tied2, svfloat32_t, ++ z0 = svcmla_f32_x (svptrue_b32 (), z1, z0, z2, 180), ++ z0 = svcmla_x (svptrue_b32 (), z1, z0, z2, 180)) ++ ++/* ++** ptrue_cmla_180_f32_x_tied3: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cmla_180_f32_x_tied3, svfloat32_t, ++ z0 = svcmla_f32_x (svptrue_b32 (), z1, z2, z0, 180), ++ z0 = svcmla_x (svptrue_b32 (), z1, z2, z0, 180)) ++ ++/* ++** ptrue_cmla_180_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cmla_180_f32_x_untied, svfloat32_t, ++ z0 = svcmla_f32_x (svptrue_b32 (), z1, z2, z3, 180), ++ z0 = svcmla_x (svptrue_b32 (), z1, z2, z3, 180)) ++ ++/* ++** ptrue_cmla_270_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cmla_270_f32_x_tied1, svfloat32_t, ++ z0 = svcmla_f32_x (svptrue_b32 (), z0, z1, z2, 270), ++ z0 = svcmla_x (svptrue_b32 (), z0, z1, z2, 270)) ++ ++/* ++** ptrue_cmla_270_f32_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cmla_270_f32_x_tied2, svfloat32_t, ++ z0 = svcmla_f32_x (svptrue_b32 (), z1, z0, z2, 270), ++ z0 = svcmla_x (svptrue_b32 (), z1, z0, z2, 270)) ++ ++/* ++** ptrue_cmla_270_f32_x_tied3: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cmla_270_f32_x_tied3, svfloat32_t, ++ z0 = svcmla_f32_x (svptrue_b32 (), z1, z2, z0, 270), ++ z0 = svcmla_x (svptrue_b32 (), z1, z2, z0, 270)) ++ ++/* ++** ptrue_cmla_270_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cmla_270_f32_x_untied, svfloat32_t, ++ z0 = svcmla_f32_x (svptrue_b32 (), z1, z2, z3, 270), ++ z0 = svcmla_x (svptrue_b32 (), z1, z2, z3, 270)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmla_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmla_f64.c +new file mode 100644 +index 000000000..024ae5ce3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmla_f64.c +@@ -0,0 +1,675 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmla_0_f64_m_tied1: ++** fcmla z0\.d, p0/m, z1\.d, z2\.d, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_0_f64_m_tied1, svfloat64_t, ++ z0 = svcmla_f64_m (p0, z0, z1, z2, 0), ++ z0 = svcmla_m (p0, z0, z1, z2, 0)) ++ ++/* ++** cmla_0_f64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fcmla z0\.d, p0/m, \1, z2\.d, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_0_f64_m_tied2, svfloat64_t, ++ z0 = svcmla_f64_m (p0, z1, z0, z2, 0), ++ z0 = svcmla_m (p0, z1, z0, z2, 0)) ++ ++/* ++** cmla_0_f64_m_tied3: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fcmla z0\.d, p0/m, z2\.d, \1, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_0_f64_m_tied3, svfloat64_t, ++ z0 = svcmla_f64_m (p0, z1, z2, z0, 0), ++ z0 = svcmla_m (p0, z1, z2, z0, 0)) ++ ++/* ++** cmla_0_f64_m_untied: ++** movprfx z0, z1 ++** fcmla z0\.d, p0/m, z2\.d, z3\.d, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_0_f64_m_untied, svfloat64_t, ++ z0 = svcmla_f64_m (p0, z1, z2, z3, 0), ++ z0 = svcmla_m (p0, z1, z2, z3, 0)) ++ ++/* ++** cmla_90_f64_m_tied1: ++** fcmla z0\.d, p0/m, z1\.d, z2\.d, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_90_f64_m_tied1, svfloat64_t, ++ z0 = svcmla_f64_m (p0, z0, z1, z2, 90), ++ z0 = svcmla_m (p0, z0, z1, z2, 90)) ++ ++/* ++** cmla_90_f64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fcmla z0\.d, p0/m, \1, z2\.d, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_90_f64_m_tied2, svfloat64_t, ++ z0 = svcmla_f64_m (p0, z1, z0, z2, 90), ++ z0 = svcmla_m (p0, z1, z0, z2, 90)) ++ ++/* ++** cmla_90_f64_m_tied3: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fcmla z0\.d, p0/m, z2\.d, \1, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_90_f64_m_tied3, svfloat64_t, ++ z0 = svcmla_f64_m (p0, z1, z2, z0, 90), ++ z0 = svcmla_m (p0, z1, z2, z0, 90)) ++ ++/* ++** cmla_90_f64_m_untied: ++** movprfx z0, z1 ++** fcmla z0\.d, p0/m, z2\.d, z3\.d, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_90_f64_m_untied, svfloat64_t, ++ z0 = svcmla_f64_m (p0, z1, z2, z3, 90), ++ z0 = svcmla_m (p0, z1, z2, z3, 90)) ++ ++/* ++** cmla_180_f64_m_tied1: ++** fcmla z0\.d, p0/m, z1\.d, z2\.d, #180 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_180_f64_m_tied1, svfloat64_t, ++ z0 = svcmla_f64_m (p0, z0, z1, z2, 180), ++ z0 = svcmla_m (p0, z0, z1, z2, 180)) ++ ++/* ++** cmla_180_f64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fcmla z0\.d, p0/m, \1, z2\.d, #180 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_180_f64_m_tied2, svfloat64_t, ++ z0 = svcmla_f64_m (p0, z1, z0, z2, 180), ++ z0 = svcmla_m (p0, z1, z0, z2, 180)) ++ ++/* ++** cmla_180_f64_m_tied3: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fcmla z0\.d, p0/m, z2\.d, \1, #180 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_180_f64_m_tied3, svfloat64_t, ++ z0 = svcmla_f64_m (p0, z1, z2, z0, 180), ++ z0 = svcmla_m (p0, z1, z2, z0, 180)) ++ ++/* ++** cmla_180_f64_m_untied: ++** movprfx z0, z1 ++** fcmla z0\.d, p0/m, z2\.d, z3\.d, #180 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_180_f64_m_untied, svfloat64_t, ++ z0 = svcmla_f64_m (p0, z1, z2, z3, 180), ++ z0 = svcmla_m (p0, z1, z2, z3, 180)) ++ ++/* ++** cmla_270_f64_m_tied1: ++** fcmla z0\.d, p0/m, z1\.d, z2\.d, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_270_f64_m_tied1, svfloat64_t, ++ z0 = svcmla_f64_m (p0, z0, z1, z2, 270), ++ z0 = svcmla_m (p0, z0, z1, z2, 270)) ++ ++/* ++** cmla_270_f64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fcmla z0\.d, p0/m, \1, z2\.d, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_270_f64_m_tied2, svfloat64_t, ++ z0 = svcmla_f64_m (p0, z1, z0, z2, 270), ++ z0 = svcmla_m (p0, z1, z0, z2, 270)) ++ ++/* ++** cmla_270_f64_m_tied3: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fcmla z0\.d, p0/m, z2\.d, \1, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_270_f64_m_tied3, svfloat64_t, ++ z0 = svcmla_f64_m (p0, z1, z2, z0, 270), ++ z0 = svcmla_m (p0, z1, z2, z0, 270)) ++ ++/* ++** cmla_270_f64_m_untied: ++** movprfx z0, z1 ++** fcmla z0\.d, p0/m, z2\.d, z3\.d, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_270_f64_m_untied, svfloat64_t, ++ z0 = svcmla_f64_m (p0, z1, z2, z3, 270), ++ z0 = svcmla_m (p0, z1, z2, z3, 270)) ++ ++/* ++** cmla_0_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fcmla z0\.d, p0/m, z1\.d, z2\.d, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_0_f64_z_tied1, svfloat64_t, ++ z0 = svcmla_f64_z (p0, z0, z1, z2, 0), ++ z0 = svcmla_z (p0, z0, z1, z2, 0)) ++ ++/* ++** cmla_0_f64_z_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, z1\.d ++** fcmla z0\.d, p0/m, \1, z2\.d, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_0_f64_z_tied2, svfloat64_t, ++ z0 = svcmla_f64_z (p0, z1, z0, z2, 0), ++ z0 = svcmla_z (p0, z1, z0, z2, 0)) ++ ++/* ++** cmla_0_f64_z_tied3: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, z1\.d ++** fcmla z0\.d, p0/m, z2\.d, \1, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_0_f64_z_tied3, svfloat64_t, ++ z0 = svcmla_f64_z (p0, z1, z2, z0, 0), ++ z0 = svcmla_z (p0, z1, z2, z0, 0)) ++ ++/* ++** cmla_0_f64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** fcmla z0\.d, p0/m, z2\.d, z3\.d, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_0_f64_z_untied, svfloat64_t, ++ z0 = svcmla_f64_z (p0, z1, z2, z3, 0), ++ z0 = svcmla_z (p0, z1, z2, z3, 0)) ++ ++/* ++** cmla_90_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fcmla z0\.d, p0/m, z1\.d, z2\.d, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_90_f64_z_tied1, svfloat64_t, ++ z0 = svcmla_f64_z (p0, z0, z1, z2, 90), ++ z0 = svcmla_z (p0, z0, z1, z2, 90)) ++ ++/* ++** cmla_90_f64_z_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, z1\.d ++** fcmla z0\.d, p0/m, \1, z2\.d, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_90_f64_z_tied2, svfloat64_t, ++ z0 = svcmla_f64_z (p0, z1, z0, z2, 90), ++ z0 = svcmla_z (p0, z1, z0, z2, 90)) ++ ++/* ++** cmla_90_f64_z_tied3: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, z1\.d ++** fcmla z0\.d, p0/m, z2\.d, \1, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_90_f64_z_tied3, svfloat64_t, ++ z0 = svcmla_f64_z (p0, z1, z2, z0, 90), ++ z0 = svcmla_z (p0, z1, z2, z0, 90)) ++ ++/* ++** cmla_90_f64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** fcmla z0\.d, p0/m, z2\.d, z3\.d, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_90_f64_z_untied, svfloat64_t, ++ z0 = svcmla_f64_z (p0, z1, z2, z3, 90), ++ z0 = svcmla_z (p0, z1, z2, z3, 90)) ++ ++/* ++** cmla_180_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fcmla z0\.d, p0/m, z1\.d, z2\.d, #180 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_180_f64_z_tied1, svfloat64_t, ++ z0 = svcmla_f64_z (p0, z0, z1, z2, 180), ++ z0 = svcmla_z (p0, z0, z1, z2, 180)) ++ ++/* ++** cmla_180_f64_z_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, z1\.d ++** fcmla z0\.d, p0/m, \1, z2\.d, #180 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_180_f64_z_tied2, svfloat64_t, ++ z0 = svcmla_f64_z (p0, z1, z0, z2, 180), ++ z0 = svcmla_z (p0, z1, z0, z2, 180)) ++ ++/* ++** cmla_180_f64_z_tied3: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, z1\.d ++** fcmla z0\.d, p0/m, z2\.d, \1, #180 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_180_f64_z_tied3, svfloat64_t, ++ z0 = svcmla_f64_z (p0, z1, z2, z0, 180), ++ z0 = svcmla_z (p0, z1, z2, z0, 180)) ++ ++/* ++** cmla_180_f64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** fcmla z0\.d, p0/m, z2\.d, z3\.d, #180 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_180_f64_z_untied, svfloat64_t, ++ z0 = svcmla_f64_z (p0, z1, z2, z3, 180), ++ z0 = svcmla_z (p0, z1, z2, z3, 180)) ++ ++/* ++** cmla_270_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fcmla z0\.d, p0/m, z1\.d, z2\.d, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_270_f64_z_tied1, svfloat64_t, ++ z0 = svcmla_f64_z (p0, z0, z1, z2, 270), ++ z0 = svcmla_z (p0, z0, z1, z2, 270)) ++ ++/* ++** cmla_270_f64_z_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, z1\.d ++** fcmla z0\.d, p0/m, \1, z2\.d, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_270_f64_z_tied2, svfloat64_t, ++ z0 = svcmla_f64_z (p0, z1, z0, z2, 270), ++ z0 = svcmla_z (p0, z1, z0, z2, 270)) ++ ++/* ++** cmla_270_f64_z_tied3: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, z1\.d ++** fcmla z0\.d, p0/m, z2\.d, \1, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_270_f64_z_tied3, svfloat64_t, ++ z0 = svcmla_f64_z (p0, z1, z2, z0, 270), ++ z0 = svcmla_z (p0, z1, z2, z0, 270)) ++ ++/* ++** cmla_270_f64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** fcmla z0\.d, p0/m, z2\.d, z3\.d, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_270_f64_z_untied, svfloat64_t, ++ z0 = svcmla_f64_z (p0, z1, z2, z3, 270), ++ z0 = svcmla_z (p0, z1, z2, z3, 270)) ++ ++/* ++** cmla_0_f64_x_tied1: ++** fcmla z0\.d, p0/m, z1\.d, z2\.d, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_0_f64_x_tied1, svfloat64_t, ++ z0 = svcmla_f64_x (p0, z0, z1, z2, 0), ++ z0 = svcmla_x (p0, z0, z1, z2, 0)) ++ ++/* ++** cmla_0_f64_x_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fcmla z0\.d, p0/m, \1, z2\.d, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_0_f64_x_tied2, svfloat64_t, ++ z0 = svcmla_f64_x (p0, z1, z0, z2, 0), ++ z0 = svcmla_x (p0, z1, z0, z2, 0)) ++ ++/* ++** cmla_0_f64_x_tied3: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fcmla z0\.d, p0/m, z2\.d, \1, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_0_f64_x_tied3, svfloat64_t, ++ z0 = svcmla_f64_x (p0, z1, z2, z0, 0), ++ z0 = svcmla_x (p0, z1, z2, z0, 0)) ++ ++/* ++** cmla_0_f64_x_untied: ++** movprfx z0, z1 ++** fcmla z0\.d, p0/m, z2\.d, z3\.d, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_0_f64_x_untied, svfloat64_t, ++ z0 = svcmla_f64_x (p0, z1, z2, z3, 0), ++ z0 = svcmla_x (p0, z1, z2, z3, 0)) ++ ++/* ++** cmla_90_f64_x_tied1: ++** fcmla z0\.d, p0/m, z1\.d, z2\.d, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_90_f64_x_tied1, svfloat64_t, ++ z0 = svcmla_f64_x (p0, z0, z1, z2, 90), ++ z0 = svcmla_x (p0, z0, z1, z2, 90)) ++ ++/* ++** cmla_90_f64_x_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fcmla z0\.d, p0/m, \1, z2\.d, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_90_f64_x_tied2, svfloat64_t, ++ z0 = svcmla_f64_x (p0, z1, z0, z2, 90), ++ z0 = svcmla_x (p0, z1, z0, z2, 90)) ++ ++/* ++** cmla_90_f64_x_tied3: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fcmla z0\.d, p0/m, z2\.d, \1, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_90_f64_x_tied3, svfloat64_t, ++ z0 = svcmla_f64_x (p0, z1, z2, z0, 90), ++ z0 = svcmla_x (p0, z1, z2, z0, 90)) ++ ++/* ++** cmla_90_f64_x_untied: ++** movprfx z0, z1 ++** fcmla z0\.d, p0/m, z2\.d, z3\.d, #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_90_f64_x_untied, svfloat64_t, ++ z0 = svcmla_f64_x (p0, z1, z2, z3, 90), ++ z0 = svcmla_x (p0, z1, z2, z3, 90)) ++ ++/* ++** cmla_180_f64_x_tied1: ++** fcmla z0\.d, p0/m, z1\.d, z2\.d, #180 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_180_f64_x_tied1, svfloat64_t, ++ z0 = svcmla_f64_x (p0, z0, z1, z2, 180), ++ z0 = svcmla_x (p0, z0, z1, z2, 180)) ++ ++/* ++** cmla_180_f64_x_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fcmla z0\.d, p0/m, \1, z2\.d, #180 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_180_f64_x_tied2, svfloat64_t, ++ z0 = svcmla_f64_x (p0, z1, z0, z2, 180), ++ z0 = svcmla_x (p0, z1, z0, z2, 180)) ++ ++/* ++** cmla_180_f64_x_tied3: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fcmla z0\.d, p0/m, z2\.d, \1, #180 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_180_f64_x_tied3, svfloat64_t, ++ z0 = svcmla_f64_x (p0, z1, z2, z0, 180), ++ z0 = svcmla_x (p0, z1, z2, z0, 180)) ++ ++/* ++** cmla_180_f64_x_untied: ++** movprfx z0, z1 ++** fcmla z0\.d, p0/m, z2\.d, z3\.d, #180 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_180_f64_x_untied, svfloat64_t, ++ z0 = svcmla_f64_x (p0, z1, z2, z3, 180), ++ z0 = svcmla_x (p0, z1, z2, z3, 180)) ++ ++/* ++** cmla_270_f64_x_tied1: ++** fcmla z0\.d, p0/m, z1\.d, z2\.d, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_270_f64_x_tied1, svfloat64_t, ++ z0 = svcmla_f64_x (p0, z0, z1, z2, 270), ++ z0 = svcmla_x (p0, z0, z1, z2, 270)) ++ ++/* ++** cmla_270_f64_x_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fcmla z0\.d, p0/m, \1, z2\.d, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_270_f64_x_tied2, svfloat64_t, ++ z0 = svcmla_f64_x (p0, z1, z0, z2, 270), ++ z0 = svcmla_x (p0, z1, z0, z2, 270)) ++ ++/* ++** cmla_270_f64_x_tied3: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fcmla z0\.d, p0/m, z2\.d, \1, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_270_f64_x_tied3, svfloat64_t, ++ z0 = svcmla_f64_x (p0, z1, z2, z0, 270), ++ z0 = svcmla_x (p0, z1, z2, z0, 270)) ++ ++/* ++** cmla_270_f64_x_untied: ++** movprfx z0, z1 ++** fcmla z0\.d, p0/m, z2\.d, z3\.d, #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_270_f64_x_untied, svfloat64_t, ++ z0 = svcmla_f64_x (p0, z1, z2, z3, 270), ++ z0 = svcmla_x (p0, z1, z2, z3, 270)) ++ ++/* ++** ptrue_cmla_0_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cmla_0_f64_x_tied1, svfloat64_t, ++ z0 = svcmla_f64_x (svptrue_b64 (), z0, z1, z2, 0), ++ z0 = svcmla_x (svptrue_b64 (), z0, z1, z2, 0)) ++ ++/* ++** ptrue_cmla_0_f64_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cmla_0_f64_x_tied2, svfloat64_t, ++ z0 = svcmla_f64_x (svptrue_b64 (), z1, z0, z2, 0), ++ z0 = svcmla_x (svptrue_b64 (), z1, z0, z2, 0)) ++ ++/* ++** ptrue_cmla_0_f64_x_tied3: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cmla_0_f64_x_tied3, svfloat64_t, ++ z0 = svcmla_f64_x (svptrue_b64 (), z1, z2, z0, 0), ++ z0 = svcmla_x (svptrue_b64 (), z1, z2, z0, 0)) ++ ++/* ++** ptrue_cmla_0_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cmla_0_f64_x_untied, svfloat64_t, ++ z0 = svcmla_f64_x (svptrue_b64 (), z1, z2, z3, 0), ++ z0 = svcmla_x (svptrue_b64 (), z1, z2, z3, 0)) ++ ++/* ++** ptrue_cmla_90_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cmla_90_f64_x_tied1, svfloat64_t, ++ z0 = svcmla_f64_x (svptrue_b64 (), z0, z1, z2, 90), ++ z0 = svcmla_x (svptrue_b64 (), z0, z1, z2, 90)) ++ ++/* ++** ptrue_cmla_90_f64_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cmla_90_f64_x_tied2, svfloat64_t, ++ z0 = svcmla_f64_x (svptrue_b64 (), z1, z0, z2, 90), ++ z0 = svcmla_x (svptrue_b64 (), z1, z0, z2, 90)) ++ ++/* ++** ptrue_cmla_90_f64_x_tied3: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cmla_90_f64_x_tied3, svfloat64_t, ++ z0 = svcmla_f64_x (svptrue_b64 (), z1, z2, z0, 90), ++ z0 = svcmla_x (svptrue_b64 (), z1, z2, z0, 90)) ++ ++/* ++** ptrue_cmla_90_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cmla_90_f64_x_untied, svfloat64_t, ++ z0 = svcmla_f64_x (svptrue_b64 (), z1, z2, z3, 90), ++ z0 = svcmla_x (svptrue_b64 (), z1, z2, z3, 90)) ++ ++/* ++** ptrue_cmla_180_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cmla_180_f64_x_tied1, svfloat64_t, ++ z0 = svcmla_f64_x (svptrue_b64 (), z0, z1, z2, 180), ++ z0 = svcmla_x (svptrue_b64 (), z0, z1, z2, 180)) ++ ++/* ++** ptrue_cmla_180_f64_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cmla_180_f64_x_tied2, svfloat64_t, ++ z0 = svcmla_f64_x (svptrue_b64 (), z1, z0, z2, 180), ++ z0 = svcmla_x (svptrue_b64 (), z1, z0, z2, 180)) ++ ++/* ++** ptrue_cmla_180_f64_x_tied3: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cmla_180_f64_x_tied3, svfloat64_t, ++ z0 = svcmla_f64_x (svptrue_b64 (), z1, z2, z0, 180), ++ z0 = svcmla_x (svptrue_b64 (), z1, z2, z0, 180)) ++ ++/* ++** ptrue_cmla_180_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cmla_180_f64_x_untied, svfloat64_t, ++ z0 = svcmla_f64_x (svptrue_b64 (), z1, z2, z3, 180), ++ z0 = svcmla_x (svptrue_b64 (), z1, z2, z3, 180)) ++ ++/* ++** ptrue_cmla_270_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cmla_270_f64_x_tied1, svfloat64_t, ++ z0 = svcmla_f64_x (svptrue_b64 (), z0, z1, z2, 270), ++ z0 = svcmla_x (svptrue_b64 (), z0, z1, z2, 270)) ++ ++/* ++** ptrue_cmla_270_f64_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cmla_270_f64_x_tied2, svfloat64_t, ++ z0 = svcmla_f64_x (svptrue_b64 (), z1, z0, z2, 270), ++ z0 = svcmla_x (svptrue_b64 (), z1, z0, z2, 270)) ++ ++/* ++** ptrue_cmla_270_f64_x_tied3: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cmla_270_f64_x_tied3, svfloat64_t, ++ z0 = svcmla_f64_x (svptrue_b64 (), z1, z2, z0, 270), ++ z0 = svcmla_x (svptrue_b64 (), z1, z2, z0, 270)) ++ ++/* ++** ptrue_cmla_270_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_cmla_270_f64_x_untied, svfloat64_t, ++ z0 = svcmla_f64_x (svptrue_b64 (), z1, z2, z3, 270), ++ z0 = svcmla_x (svptrue_b64 (), z1, z2, z3, 270)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmla_lane_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmla_lane_f16.c +new file mode 100644 +index 000000000..16f1b77ad +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmla_lane_f16.c +@@ -0,0 +1,194 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmla_lane_0_0_f16_tied1: ++** fcmla z0\.h, z1\.h, z2\.h\[0\], #0 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_lane_0_0_f16_tied1, svfloat16_t, ++ z0 = svcmla_lane_f16 (z0, z1, z2, 0, 0), ++ z0 = svcmla_lane (z0, z1, z2, 0, 0)) ++ ++/* ++** cmla_lane_0_0_f16_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcmla z0\.h, \1\.h, z2\.h\[0\], #0 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_lane_0_0_f16_tied2, svfloat16_t, ++ z0 = svcmla_lane_f16 (z1, z0, z2, 0, 0), ++ z0 = svcmla_lane (z1, z0, z2, 0, 0)) ++ ++/* ++** cmla_lane_0_0_f16_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcmla z0\.h, z2\.h, \1\.h\[0\], #0 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_lane_0_0_f16_tied3, svfloat16_t, ++ z0 = svcmla_lane_f16 (z1, z2, z0, 0, 0), ++ z0 = svcmla_lane (z1, z2, z0, 0, 0)) ++ ++/* ++** cmla_lane_0_0_f16_untied: ++** movprfx z0, z1 ++** fcmla z0\.h, z2\.h, z3\.h\[0\], #0 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_lane_0_0_f16_untied, svfloat16_t, ++ z0 = svcmla_lane_f16 (z1, z2, z3, 0, 0), ++ z0 = svcmla_lane (z1, z2, z3, 0, 0)) ++ ++/* ++** cmla_lane_0_90_f16_tied1: ++** fcmla z0\.h, z1\.h, z2\.h\[0\], #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_lane_0_90_f16_tied1, svfloat16_t, ++ z0 = svcmla_lane_f16 (z0, z1, z2, 0, 90), ++ z0 = svcmla_lane (z0, z1, z2, 0, 90)) ++ ++/* ++** cmla_lane_0_90_f16_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcmla z0\.h, \1\.h, z2\.h\[0\], #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_lane_0_90_f16_tied2, svfloat16_t, ++ z0 = svcmla_lane_f16 (z1, z0, z2, 0, 90), ++ z0 = svcmla_lane (z1, z0, z2, 0, 90)) ++ ++/* ++** cmla_lane_0_90_f16_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcmla z0\.h, z2\.h, \1\.h\[0\], #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_lane_0_90_f16_tied3, svfloat16_t, ++ z0 = svcmla_lane_f16 (z1, z2, z0, 0, 90), ++ z0 = svcmla_lane (z1, z2, z0, 0, 90)) ++ ++/* ++** cmla_lane_0_90_f16_untied: ++** movprfx z0, z1 ++** fcmla z0\.h, z2\.h, z3\.h\[0\], #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_lane_0_90_f16_untied, svfloat16_t, ++ z0 = svcmla_lane_f16 (z1, z2, z3, 0, 90), ++ z0 = svcmla_lane (z1, z2, z3, 0, 90)) ++ ++/* ++** cmla_lane_0_180_f16_tied1: ++** fcmla z0\.h, z1\.h, z2\.h\[0\], #180 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_lane_0_180_f16_tied1, svfloat16_t, ++ z0 = svcmla_lane_f16 (z0, z1, z2, 0, 180), ++ z0 = svcmla_lane (z0, z1, z2, 0, 180)) ++ ++/* ++** cmla_lane_0_180_f16_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcmla z0\.h, \1\.h, z2\.h\[0\], #180 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_lane_0_180_f16_tied2, svfloat16_t, ++ z0 = svcmla_lane_f16 (z1, z0, z2, 0, 180), ++ z0 = svcmla_lane (z1, z0, z2, 0, 180)) ++ ++/* ++** cmla_lane_0_180_f16_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcmla z0\.h, z2\.h, \1\.h\[0\], #180 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_lane_0_180_f16_tied3, svfloat16_t, ++ z0 = svcmla_lane_f16 (z1, z2, z0, 0, 180), ++ z0 = svcmla_lane (z1, z2, z0, 0, 180)) ++ ++/* ++** cmla_lane_0_180_f16_untied: ++** movprfx z0, z1 ++** fcmla z0\.h, z2\.h, z3\.h\[0\], #180 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_lane_0_180_f16_untied, svfloat16_t, ++ z0 = svcmla_lane_f16 (z1, z2, z3, 0, 180), ++ z0 = svcmla_lane (z1, z2, z3, 0, 180)) ++ ++/* ++** cmla_lane_0_270_f16_tied1: ++** fcmla z0\.h, z1\.h, z2\.h\[0\], #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_lane_0_270_f16_tied1, svfloat16_t, ++ z0 = svcmla_lane_f16 (z0, z1, z2, 0, 270), ++ z0 = svcmla_lane (z0, z1, z2, 0, 270)) ++ ++/* ++** cmla_lane_0_270_f16_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcmla z0\.h, \1\.h, z2\.h\[0\], #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_lane_0_270_f16_tied2, svfloat16_t, ++ z0 = svcmla_lane_f16 (z1, z0, z2, 0, 270), ++ z0 = svcmla_lane (z1, z0, z2, 0, 270)) ++ ++/* ++** cmla_lane_0_270_f16_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcmla z0\.h, z2\.h, \1\.h\[0\], #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_lane_0_270_f16_tied3, svfloat16_t, ++ z0 = svcmla_lane_f16 (z1, z2, z0, 0, 270), ++ z0 = svcmla_lane (z1, z2, z0, 0, 270)) ++ ++/* ++** cmla_lane_0_270_f16_untied: ++** movprfx z0, z1 ++** fcmla z0\.h, z2\.h, z3\.h\[0\], #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_lane_0_270_f16_untied, svfloat16_t, ++ z0 = svcmla_lane_f16 (z1, z2, z3, 0, 270), ++ z0 = svcmla_lane (z1, z2, z3, 0, 270)) ++ ++/* ++** cmla_lane_1_f16: ++** fcmla z0\.h, z1\.h, z2\.h\[1\], #0 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_lane_1_f16, svfloat16_t, ++ z0 = svcmla_lane_f16 (z0, z1, z2, 1, 0), ++ z0 = svcmla_lane (z0, z1, z2, 1, 0)) ++ ++/* ++** cmla_lane_2_f16: ++** fcmla z0\.h, z1\.h, z2\.h\[2\], #0 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_lane_2_f16, svfloat16_t, ++ z0 = svcmla_lane_f16 (z0, z1, z2, 2, 0), ++ z0 = svcmla_lane (z0, z1, z2, 2, 0)) ++ ++/* ++** cmla_lane_3_f16: ++** fcmla z0\.h, z1\.h, z2\.h\[3\], #0 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_lane_3_f16, svfloat16_t, ++ z0 = svcmla_lane_f16 (z0, z1, z2, 3, 0), ++ z0 = svcmla_lane (z0, z1, z2, 3, 0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmla_lane_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmla_lane_f32.c +new file mode 100644 +index 000000000..85bff68fd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmla_lane_f32.c +@@ -0,0 +1,176 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmla_lane_0_0_f32_tied1: ++** fcmla z0\.s, z1\.s, z2\.s\[0\], #0 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_lane_0_0_f32_tied1, svfloat32_t, ++ z0 = svcmla_lane_f32 (z0, z1, z2, 0, 0), ++ z0 = svcmla_lane (z0, z1, z2, 0, 0)) ++ ++/* ++** cmla_lane_0_0_f32_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcmla z0\.s, \1\.s, z2\.s\[0\], #0 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_lane_0_0_f32_tied2, svfloat32_t, ++ z0 = svcmla_lane_f32 (z1, z0, z2, 0, 0), ++ z0 = svcmla_lane (z1, z0, z2, 0, 0)) ++ ++/* ++** cmla_lane_0_0_f32_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcmla z0\.s, z2\.s, \1\.s\[0\], #0 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_lane_0_0_f32_tied3, svfloat32_t, ++ z0 = svcmla_lane_f32 (z1, z2, z0, 0, 0), ++ z0 = svcmla_lane (z1, z2, z0, 0, 0)) ++ ++/* ++** cmla_lane_0_0_f32_untied: ++** movprfx z0, z1 ++** fcmla z0\.s, z2\.s, z3\.s\[0\], #0 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_lane_0_0_f32_untied, svfloat32_t, ++ z0 = svcmla_lane_f32 (z1, z2, z3, 0, 0), ++ z0 = svcmla_lane (z1, z2, z3, 0, 0)) ++ ++/* ++** cmla_lane_0_90_f32_tied1: ++** fcmla z0\.s, z1\.s, z2\.s\[0\], #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_lane_0_90_f32_tied1, svfloat32_t, ++ z0 = svcmla_lane_f32 (z0, z1, z2, 0, 90), ++ z0 = svcmla_lane (z0, z1, z2, 0, 90)) ++ ++/* ++** cmla_lane_0_90_f32_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcmla z0\.s, \1\.s, z2\.s\[0\], #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_lane_0_90_f32_tied2, svfloat32_t, ++ z0 = svcmla_lane_f32 (z1, z0, z2, 0, 90), ++ z0 = svcmla_lane (z1, z0, z2, 0, 90)) ++ ++/* ++** cmla_lane_0_90_f32_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcmla z0\.s, z2\.s, \1\.s\[0\], #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_lane_0_90_f32_tied3, svfloat32_t, ++ z0 = svcmla_lane_f32 (z1, z2, z0, 0, 90), ++ z0 = svcmla_lane (z1, z2, z0, 0, 90)) ++ ++/* ++** cmla_lane_0_90_f32_untied: ++** movprfx z0, z1 ++** fcmla z0\.s, z2\.s, z3\.s\[0\], #90 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_lane_0_90_f32_untied, svfloat32_t, ++ z0 = svcmla_lane_f32 (z1, z2, z3, 0, 90), ++ z0 = svcmla_lane (z1, z2, z3, 0, 90)) ++ ++/* ++** cmla_lane_0_180_f32_tied1: ++** fcmla z0\.s, z1\.s, z2\.s\[0\], #180 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_lane_0_180_f32_tied1, svfloat32_t, ++ z0 = svcmla_lane_f32 (z0, z1, z2, 0, 180), ++ z0 = svcmla_lane (z0, z1, z2, 0, 180)) ++ ++/* ++** cmla_lane_0_180_f32_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcmla z0\.s, \1\.s, z2\.s\[0\], #180 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_lane_0_180_f32_tied2, svfloat32_t, ++ z0 = svcmla_lane_f32 (z1, z0, z2, 0, 180), ++ z0 = svcmla_lane (z1, z0, z2, 0, 180)) ++ ++/* ++** cmla_lane_0_180_f32_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcmla z0\.s, z2\.s, \1\.s\[0\], #180 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_lane_0_180_f32_tied3, svfloat32_t, ++ z0 = svcmla_lane_f32 (z1, z2, z0, 0, 180), ++ z0 = svcmla_lane (z1, z2, z0, 0, 180)) ++ ++/* ++** cmla_lane_0_180_f32_untied: ++** movprfx z0, z1 ++** fcmla z0\.s, z2\.s, z3\.s\[0\], #180 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_lane_0_180_f32_untied, svfloat32_t, ++ z0 = svcmla_lane_f32 (z1, z2, z3, 0, 180), ++ z0 = svcmla_lane (z1, z2, z3, 0, 180)) ++ ++/* ++** cmla_lane_0_270_f32_tied1: ++** fcmla z0\.s, z1\.s, z2\.s\[0\], #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_lane_0_270_f32_tied1, svfloat32_t, ++ z0 = svcmla_lane_f32 (z0, z1, z2, 0, 270), ++ z0 = svcmla_lane (z0, z1, z2, 0, 270)) ++ ++/* ++** cmla_lane_0_270_f32_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcmla z0\.s, \1\.s, z2\.s\[0\], #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_lane_0_270_f32_tied2, svfloat32_t, ++ z0 = svcmla_lane_f32 (z1, z0, z2, 0, 270), ++ z0 = svcmla_lane (z1, z0, z2, 0, 270)) ++ ++/* ++** cmla_lane_0_270_f32_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fcmla z0\.s, z2\.s, \1\.s\[0\], #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_lane_0_270_f32_tied3, svfloat32_t, ++ z0 = svcmla_lane_f32 (z1, z2, z0, 0, 270), ++ z0 = svcmla_lane (z1, z2, z0, 0, 270)) ++ ++/* ++** cmla_lane_0_270_f32_untied: ++** movprfx z0, z1 ++** fcmla z0\.s, z2\.s, z3\.s\[0\], #270 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_lane_0_270_f32_untied, svfloat32_t, ++ z0 = svcmla_lane_f32 (z1, z2, z3, 0, 270), ++ z0 = svcmla_lane (z1, z2, z3, 0, 270)) ++ ++/* ++** cmla_lane_1_f32: ++** fcmla z0\.s, z1\.s, z2\.s\[1\], #0 ++** ret ++*/ ++TEST_UNIFORM_Z (cmla_lane_1_f32, svfloat32_t, ++ z0 = svcmla_lane_f32 (z0, z1, z2, 1, 0), ++ z0 = svcmla_lane (z0, z1, z2, 1, 0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_f16.c +new file mode 100644 +index 000000000..7149ad300 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_f16.c +@@ -0,0 +1,50 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpeq_f16_tied: ++** fcmeq p0\.h, p0/z, (z0\.h, z1\.h|z1\.h, z0\.h) ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_f16_tied, svfloat16_t, ++ p0 = svcmpeq_f16 (p0, z0, z1), ++ p0 = svcmpeq (p0, z0, z1)) ++ ++/* ++** cmpeq_f16_untied: ++** fcmeq p0\.h, p1/z, (z0\.h, z1\.h|z1\.h, z0\.h) ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_f16_untied, svfloat16_t, ++ p0 = svcmpeq_f16 (p1, z0, z1), ++ p0 = svcmpeq (p1, z0, z1)) ++ ++/* ++** cmpeq_h4_f16: ++** mov (z[0-9]+\.h), h4 ++** fcmeq p0\.h, p1/z, (z0\.h, \1|\1, z0\.h) ++** ret ++*/ ++TEST_COMPARE_ZD (cmpeq_h4_f16, svfloat16_t, float16_t, ++ p0 = svcmpeq_n_f16 (p1, z0, d4), ++ p0 = svcmpeq (p1, z0, d4)) ++ ++/* ++** cmpeq_0_f16: ++** fcmeq p0\.h, p1/z, z0\.h, #0\.0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_0_f16, svfloat16_t, ++ p0 = svcmpeq_n_f16 (p1, z0, 0), ++ p0 = svcmpeq (p1, z0, 0)) ++ ++/* ++** cmpeq_1_f16: ++** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? ++** fcmeq p0\.h, p1/z, (z0\.h, \1|\1, z0\.h) ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_1_f16, svfloat16_t, ++ p0 = svcmpeq_n_f16 (p1, z0, 1), ++ p0 = svcmpeq (p1, z0, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_f32.c +new file mode 100644 +index 000000000..05910bc50 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_f32.c +@@ -0,0 +1,50 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpeq_f32_tied: ++** fcmeq p0\.s, p0/z, (z0\.s, z1\.s|z1\.s, z0\.s) ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_f32_tied, svfloat32_t, ++ p0 = svcmpeq_f32 (p0, z0, z1), ++ p0 = svcmpeq (p0, z0, z1)) ++ ++/* ++** cmpeq_f32_untied: ++** fcmeq p0\.s, p1/z, (z0\.s, z1\.s|z1\.s, z0\.s) ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_f32_untied, svfloat32_t, ++ p0 = svcmpeq_f32 (p1, z0, z1), ++ p0 = svcmpeq (p1, z0, z1)) ++ ++/* ++** cmpeq_s4_f32: ++** mov (z[0-9]+\.s), s4 ++** fcmeq p0\.s, p1/z, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_COMPARE_ZD (cmpeq_s4_f32, svfloat32_t, float32_t, ++ p0 = svcmpeq_n_f32 (p1, z0, d4), ++ p0 = svcmpeq (p1, z0, d4)) ++ ++/* ++** cmpeq_0_f32: ++** fcmeq p0\.s, p1/z, z0\.s, #0\.0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_0_f32, svfloat32_t, ++ p0 = svcmpeq_n_f32 (p1, z0, 0), ++ p0 = svcmpeq (p1, z0, 0)) ++ ++/* ++** cmpeq_1_f32: ++** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? ++** fcmeq p0\.s, p1/z, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_1_f32, svfloat32_t, ++ p0 = svcmpeq_n_f32 (p1, z0, 1), ++ p0 = svcmpeq (p1, z0, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_f64.c +new file mode 100644 +index 000000000..f94bdfe27 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_f64.c +@@ -0,0 +1,50 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpeq_f64_tied: ++** fcmeq p0\.d, p0/z, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_f64_tied, svfloat64_t, ++ p0 = svcmpeq_f64 (p0, z0, z1), ++ p0 = svcmpeq (p0, z0, z1)) ++ ++/* ++** cmpeq_f64_untied: ++** fcmeq p0\.d, p1/z, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_f64_untied, svfloat64_t, ++ p0 = svcmpeq_f64 (p1, z0, z1), ++ p0 = svcmpeq (p1, z0, z1)) ++ ++/* ++** cmpeq_d4_f64: ++** mov (z[0-9]+\.d), d4 ++** fcmeq p0\.d, p1/z, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_COMPARE_ZD (cmpeq_d4_f64, svfloat64_t, float64_t, ++ p0 = svcmpeq_n_f64 (p1, z0, d4), ++ p0 = svcmpeq (p1, z0, d4)) ++ ++/* ++** cmpeq_0_f64: ++** fcmeq p0\.d, p1/z, z0\.d, #0\.0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_0_f64, svfloat64_t, ++ p0 = svcmpeq_n_f64 (p1, z0, 0), ++ p0 = svcmpeq (p1, z0, 0)) ++ ++/* ++** cmpeq_1_f64: ++** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? ++** fcmeq p0\.d, p1/z, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_1_f64, svfloat64_t, ++ p0 = svcmpeq_n_f64 (p1, z0, 1), ++ p0 = svcmpeq (p1, z0, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_s16.c +new file mode 100644 +index 000000000..b0befcb77 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_s16.c +@@ -0,0 +1,96 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpeq_s16_tied: ++** cmpeq p0\.h, p0/z, (z0\.h, z1\.h|z1\.h, z0\.h) ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_s16_tied, svint16_t, ++ p0 = svcmpeq_s16 (p0, z0, z1), ++ p0 = svcmpeq (p0, z0, z1)) ++ ++/* ++** cmpeq_s16_untied: ++** cmpeq p0\.h, p1/z, (z0\.h, z1\.h|z1\.h, z0\.h) ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_s16_untied, svint16_t, ++ p0 = svcmpeq_s16 (p1, z0, z1), ++ p0 = svcmpeq (p1, z0, z1)) ++ ++/* ++** cmpeq_w0_s16: ++** mov (z[0-9]+\.h), w0 ++** cmpeq p0\.h, p1/z, (z0\.h, \1|\1, z0\.h) ++** ret ++*/ ++TEST_COMPARE_ZX (cmpeq_w0_s16, svint16_t, int16_t, ++ p0 = svcmpeq_n_s16 (p1, z0, x0), ++ p0 = svcmpeq (p1, z0, x0)) ++ ++/* ++** cmpeq_0_s16: ++** cmpeq p0\.h, p1/z, z0\.h, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_0_s16, svint16_t, ++ p0 = svcmpeq_n_s16 (p1, z0, 0), ++ p0 = svcmpeq (p1, z0, 0)) ++ ++/* ++** cmpeq_1_s16: ++** cmpeq p0\.h, p1/z, z0\.h, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_1_s16, svint16_t, ++ p0 = svcmpeq_n_s16 (p1, z0, 1), ++ p0 = svcmpeq (p1, z0, 1)) ++ ++/* ++** cmpeq_15_s16: ++** cmpeq p0\.h, p1/z, z0\.h, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_15_s16, svint16_t, ++ p0 = svcmpeq_n_s16 (p1, z0, 15), ++ p0 = svcmpeq (p1, z0, 15)) ++ ++/* ++** cmpeq_16_s16: ++** mov (z[0-9]+\.h), #16 ++** cmpeq p0\.h, p1/z, (z0\.h, \1|\1, z0\.h) ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_16_s16, svint16_t, ++ p0 = svcmpeq_n_s16 (p1, z0, 16), ++ p0 = svcmpeq (p1, z0, 16)) ++ ++/* ++** cmpeq_m1_s16: ++** cmpeq p0\.h, p1/z, z0\.h, #-1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_m1_s16, svint16_t, ++ p0 = svcmpeq_n_s16 (p1, z0, -1), ++ p0 = svcmpeq (p1, z0, -1)) ++ ++/* ++** cmpeq_m16_s16: ++** cmpeq p0\.h, p1/z, z0\.h, #-16 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_m16_s16, svint16_t, ++ p0 = svcmpeq_n_s16 (p1, z0, -16), ++ p0 = svcmpeq (p1, z0, -16)) ++ ++/* ++** cmpeq_m17_s16: ++** mov (z[0-9]+\.h), #-17 ++** cmpeq p0\.h, p1/z, (z0\.h, \1|\1, z0\.h) ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_m17_s16, svint16_t, ++ p0 = svcmpeq_n_s16 (p1, z0, -17), ++ p0 = svcmpeq (p1, z0, -17)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_s32.c +new file mode 100644 +index 000000000..de48a2c38 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_s32.c +@@ -0,0 +1,96 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpeq_s32_tied: ++** cmpeq p0\.s, p0/z, (z0\.s, z1\.s|z1\.s, z0\.s) ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_s32_tied, svint32_t, ++ p0 = svcmpeq_s32 (p0, z0, z1), ++ p0 = svcmpeq (p0, z0, z1)) ++ ++/* ++** cmpeq_s32_untied: ++** cmpeq p0\.s, p1/z, (z0\.s, z1\.s|z1\.s, z0\.s) ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_s32_untied, svint32_t, ++ p0 = svcmpeq_s32 (p1, z0, z1), ++ p0 = svcmpeq (p1, z0, z1)) ++ ++/* ++** cmpeq_w0_s32: ++** mov (z[0-9]+\.s), w0 ++** cmpeq p0\.s, p1/z, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_COMPARE_ZX (cmpeq_w0_s32, svint32_t, int32_t, ++ p0 = svcmpeq_n_s32 (p1, z0, x0), ++ p0 = svcmpeq (p1, z0, x0)) ++ ++/* ++** cmpeq_0_s32: ++** cmpeq p0\.s, p1/z, z0\.s, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_0_s32, svint32_t, ++ p0 = svcmpeq_n_s32 (p1, z0, 0), ++ p0 = svcmpeq (p1, z0, 0)) ++ ++/* ++** cmpeq_1_s32: ++** cmpeq p0\.s, p1/z, z0\.s, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_1_s32, svint32_t, ++ p0 = svcmpeq_n_s32 (p1, z0, 1), ++ p0 = svcmpeq (p1, z0, 1)) ++ ++/* ++** cmpeq_15_s32: ++** cmpeq p0\.s, p1/z, z0\.s, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_15_s32, svint32_t, ++ p0 = svcmpeq_n_s32 (p1, z0, 15), ++ p0 = svcmpeq (p1, z0, 15)) ++ ++/* ++** cmpeq_16_s32: ++** mov (z[0-9]+\.s), #16 ++** cmpeq p0\.s, p1/z, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_16_s32, svint32_t, ++ p0 = svcmpeq_n_s32 (p1, z0, 16), ++ p0 = svcmpeq (p1, z0, 16)) ++ ++/* ++** cmpeq_m1_s32: ++** cmpeq p0\.s, p1/z, z0\.s, #-1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_m1_s32, svint32_t, ++ p0 = svcmpeq_n_s32 (p1, z0, -1), ++ p0 = svcmpeq (p1, z0, -1)) ++ ++/* ++** cmpeq_m16_s32: ++** cmpeq p0\.s, p1/z, z0\.s, #-16 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_m16_s32, svint32_t, ++ p0 = svcmpeq_n_s32 (p1, z0, -16), ++ p0 = svcmpeq (p1, z0, -16)) ++ ++/* ++** cmpeq_m17_s32: ++** mov (z[0-9]+\.s), #-17 ++** cmpeq p0\.s, p1/z, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_m17_s32, svint32_t, ++ p0 = svcmpeq_n_s32 (p1, z0, -17), ++ p0 = svcmpeq (p1, z0, -17)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_s64.c +new file mode 100644 +index 000000000..ff976712a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_s64.c +@@ -0,0 +1,96 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpeq_s64_tied: ++** cmpeq p0\.d, p0/z, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_s64_tied, svint64_t, ++ p0 = svcmpeq_s64 (p0, z0, z1), ++ p0 = svcmpeq (p0, z0, z1)) ++ ++/* ++** cmpeq_s64_untied: ++** cmpeq p0\.d, p1/z, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_s64_untied, svint64_t, ++ p0 = svcmpeq_s64 (p1, z0, z1), ++ p0 = svcmpeq (p1, z0, z1)) ++ ++/* ++** cmpeq_x0_s64: ++** mov (z[0-9]+\.d), x0 ++** cmpeq p0\.d, p1/z, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_COMPARE_ZX (cmpeq_x0_s64, svint64_t, int64_t, ++ p0 = svcmpeq_n_s64 (p1, z0, x0), ++ p0 = svcmpeq (p1, z0, x0)) ++ ++/* ++** cmpeq_0_s64: ++** cmpeq p0\.d, p1/z, z0\.d, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_0_s64, svint64_t, ++ p0 = svcmpeq_n_s64 (p1, z0, 0), ++ p0 = svcmpeq (p1, z0, 0)) ++ ++/* ++** cmpeq_1_s64: ++** cmpeq p0\.d, p1/z, z0\.d, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_1_s64, svint64_t, ++ p0 = svcmpeq_n_s64 (p1, z0, 1), ++ p0 = svcmpeq (p1, z0, 1)) ++ ++/* ++** cmpeq_15_s64: ++** cmpeq p0\.d, p1/z, z0\.d, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_15_s64, svint64_t, ++ p0 = svcmpeq_n_s64 (p1, z0, 15), ++ p0 = svcmpeq (p1, z0, 15)) ++ ++/* ++** cmpeq_16_s64: ++** mov (z[0-9]+\.d), #16 ++** cmpeq p0\.d, p1/z, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_16_s64, svint64_t, ++ p0 = svcmpeq_n_s64 (p1, z0, 16), ++ p0 = svcmpeq (p1, z0, 16)) ++ ++/* ++** cmpeq_m1_s64: ++** cmpeq p0\.d, p1/z, z0\.d, #-1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_m1_s64, svint64_t, ++ p0 = svcmpeq_n_s64 (p1, z0, -1), ++ p0 = svcmpeq (p1, z0, -1)) ++ ++/* ++** cmpeq_m16_s64: ++** cmpeq p0\.d, p1/z, z0\.d, #-16 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_m16_s64, svint64_t, ++ p0 = svcmpeq_n_s64 (p1, z0, -16), ++ p0 = svcmpeq (p1, z0, -16)) ++ ++/* ++** cmpeq_m17_s64: ++** mov (z[0-9]+\.d), #-17 ++** cmpeq p0\.d, p1/z, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_m17_s64, svint64_t, ++ p0 = svcmpeq_n_s64 (p1, z0, -17), ++ p0 = svcmpeq (p1, z0, -17)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_s8.c +new file mode 100644 +index 000000000..1325755a8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_s8.c +@@ -0,0 +1,96 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpeq_s8_tied: ++** cmpeq p0\.b, p0/z, (z0\.b, z1\.b|z1\.b, z0\.b) ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_s8_tied, svint8_t, ++ p0 = svcmpeq_s8 (p0, z0, z1), ++ p0 = svcmpeq (p0, z0, z1)) ++ ++/* ++** cmpeq_s8_untied: ++** cmpeq p0\.b, p1/z, (z0\.b, z1\.b|z1\.b, z0\.b) ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_s8_untied, svint8_t, ++ p0 = svcmpeq_s8 (p1, z0, z1), ++ p0 = svcmpeq (p1, z0, z1)) ++ ++/* ++** cmpeq_w0_s8: ++** mov (z[0-9]+\.b), w0 ++** cmpeq p0\.b, p1/z, (z0\.b, \1|\1, z0\.b) ++** ret ++*/ ++TEST_COMPARE_ZX (cmpeq_w0_s8, svint8_t, int8_t, ++ p0 = svcmpeq_n_s8 (p1, z0, x0), ++ p0 = svcmpeq (p1, z0, x0)) ++ ++/* ++** cmpeq_0_s8: ++** cmpeq p0\.b, p1/z, z0\.b, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_0_s8, svint8_t, ++ p0 = svcmpeq_n_s8 (p1, z0, 0), ++ p0 = svcmpeq (p1, z0, 0)) ++ ++/* ++** cmpeq_1_s8: ++** cmpeq p0\.b, p1/z, z0\.b, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_1_s8, svint8_t, ++ p0 = svcmpeq_n_s8 (p1, z0, 1), ++ p0 = svcmpeq (p1, z0, 1)) ++ ++/* ++** cmpeq_15_s8: ++** cmpeq p0\.b, p1/z, z0\.b, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_15_s8, svint8_t, ++ p0 = svcmpeq_n_s8 (p1, z0, 15), ++ p0 = svcmpeq (p1, z0, 15)) ++ ++/* ++** cmpeq_16_s8: ++** mov (z[0-9]+\.b), #16 ++** cmpeq p0\.b, p1/z, (z0\.b, \1|\1, z0\.b) ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_16_s8, svint8_t, ++ p0 = svcmpeq_n_s8 (p1, z0, 16), ++ p0 = svcmpeq (p1, z0, 16)) ++ ++/* ++** cmpeq_m1_s8: ++** cmpeq p0\.b, p1/z, z0\.b, #-1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_m1_s8, svint8_t, ++ p0 = svcmpeq_n_s8 (p1, z0, -1), ++ p0 = svcmpeq (p1, z0, -1)) ++ ++/* ++** cmpeq_m16_s8: ++** cmpeq p0\.b, p1/z, z0\.b, #-16 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_m16_s8, svint8_t, ++ p0 = svcmpeq_n_s8 (p1, z0, -16), ++ p0 = svcmpeq (p1, z0, -16)) ++ ++/* ++** cmpeq_m17_s8: ++** mov (z[0-9]+\.b), #-17 ++** cmpeq p0\.b, p1/z, (z0\.b, \1|\1, z0\.b) ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_m17_s8, svint8_t, ++ p0 = svcmpeq_n_s8 (p1, z0, -17), ++ p0 = svcmpeq (p1, z0, -17)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_u16.c +new file mode 100644 +index 000000000..91004692c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_u16.c +@@ -0,0 +1,96 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpeq_u16_tied: ++** cmpeq p0\.h, p0/z, (z0\.h, z1\.h|z1\.h, z0\.h) ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_u16_tied, svuint16_t, ++ p0 = svcmpeq_u16 (p0, z0, z1), ++ p0 = svcmpeq (p0, z0, z1)) ++ ++/* ++** cmpeq_u16_untied: ++** cmpeq p0\.h, p1/z, (z0\.h, z1\.h|z1\.h, z0\.h) ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_u16_untied, svuint16_t, ++ p0 = svcmpeq_u16 (p1, z0, z1), ++ p0 = svcmpeq (p1, z0, z1)) ++ ++/* ++** cmpeq_w0_u16: ++** mov (z[0-9]+\.h), w0 ++** cmpeq p0\.h, p1/z, (z0\.h, \1|\1, z0\.h) ++** ret ++*/ ++TEST_COMPARE_ZX (cmpeq_w0_u16, svuint16_t, uint16_t, ++ p0 = svcmpeq_n_u16 (p1, z0, x0), ++ p0 = svcmpeq (p1, z0, x0)) ++ ++/* ++** cmpeq_0_u16: ++** cmpeq p0\.h, p1/z, z0\.h, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_0_u16, svuint16_t, ++ p0 = svcmpeq_n_u16 (p1, z0, 0), ++ p0 = svcmpeq (p1, z0, 0)) ++ ++/* ++** cmpeq_1_u16: ++** cmpeq p0\.h, p1/z, z0\.h, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_1_u16, svuint16_t, ++ p0 = svcmpeq_n_u16 (p1, z0, 1), ++ p0 = svcmpeq (p1, z0, 1)) ++ ++/* ++** cmpeq_15_u16: ++** cmpeq p0\.h, p1/z, z0\.h, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_15_u16, svuint16_t, ++ p0 = svcmpeq_n_u16 (p1, z0, 15), ++ p0 = svcmpeq (p1, z0, 15)) ++ ++/* ++** cmpeq_16_u16: ++** mov (z[0-9]+\.h), #16 ++** cmpeq p0\.h, p1/z, (z0\.h, \1|\1, z0\.h) ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_16_u16, svuint16_t, ++ p0 = svcmpeq_n_u16 (p1, z0, 16), ++ p0 = svcmpeq (p1, z0, 16)) ++ ++/* ++** cmpeq_m1_u16: ++** cmpeq p0\.h, p1/z, z0\.h, #-1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_m1_u16, svuint16_t, ++ p0 = svcmpeq_n_u16 (p1, z0, -1), ++ p0 = svcmpeq (p1, z0, -1)) ++ ++/* ++** cmpeq_m16_u16: ++** cmpeq p0\.h, p1/z, z0\.h, #-16 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_m16_u16, svuint16_t, ++ p0 = svcmpeq_n_u16 (p1, z0, -16), ++ p0 = svcmpeq (p1, z0, -16)) ++ ++/* ++** cmpeq_m17_u16: ++** mov (z[0-9]+\.h), #-17 ++** cmpeq p0\.h, p1/z, (z0\.h, \1|\1, z0\.h) ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_m17_u16, svuint16_t, ++ p0 = svcmpeq_n_u16 (p1, z0, -17), ++ p0 = svcmpeq (p1, z0, -17)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_u32.c +new file mode 100644 +index 000000000..2cff56eb6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_u32.c +@@ -0,0 +1,96 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpeq_u32_tied: ++** cmpeq p0\.s, p0/z, (z0\.s, z1\.s|z1\.s, z0\.s) ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_u32_tied, svuint32_t, ++ p0 = svcmpeq_u32 (p0, z0, z1), ++ p0 = svcmpeq (p0, z0, z1)) ++ ++/* ++** cmpeq_u32_untied: ++** cmpeq p0\.s, p1/z, (z0\.s, z1\.s|z1\.s, z0\.s) ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_u32_untied, svuint32_t, ++ p0 = svcmpeq_u32 (p1, z0, z1), ++ p0 = svcmpeq (p1, z0, z1)) ++ ++/* ++** cmpeq_w0_u32: ++** mov (z[0-9]+\.s), w0 ++** cmpeq p0\.s, p1/z, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_COMPARE_ZX (cmpeq_w0_u32, svuint32_t, uint32_t, ++ p0 = svcmpeq_n_u32 (p1, z0, x0), ++ p0 = svcmpeq (p1, z0, x0)) ++ ++/* ++** cmpeq_0_u32: ++** cmpeq p0\.s, p1/z, z0\.s, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_0_u32, svuint32_t, ++ p0 = svcmpeq_n_u32 (p1, z0, 0), ++ p0 = svcmpeq (p1, z0, 0)) ++ ++/* ++** cmpeq_1_u32: ++** cmpeq p0\.s, p1/z, z0\.s, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_1_u32, svuint32_t, ++ p0 = svcmpeq_n_u32 (p1, z0, 1), ++ p0 = svcmpeq (p1, z0, 1)) ++ ++/* ++** cmpeq_15_u32: ++** cmpeq p0\.s, p1/z, z0\.s, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_15_u32, svuint32_t, ++ p0 = svcmpeq_n_u32 (p1, z0, 15), ++ p0 = svcmpeq (p1, z0, 15)) ++ ++/* ++** cmpeq_16_u32: ++** mov (z[0-9]+\.s), #16 ++** cmpeq p0\.s, p1/z, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_16_u32, svuint32_t, ++ p0 = svcmpeq_n_u32 (p1, z0, 16), ++ p0 = svcmpeq (p1, z0, 16)) ++ ++/* ++** cmpeq_m1_u32: ++** cmpeq p0\.s, p1/z, z0\.s, #-1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_m1_u32, svuint32_t, ++ p0 = svcmpeq_n_u32 (p1, z0, -1), ++ p0 = svcmpeq (p1, z0, -1)) ++ ++/* ++** cmpeq_m16_u32: ++** cmpeq p0\.s, p1/z, z0\.s, #-16 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_m16_u32, svuint32_t, ++ p0 = svcmpeq_n_u32 (p1, z0, -16), ++ p0 = svcmpeq (p1, z0, -16)) ++ ++/* ++** cmpeq_m17_u32: ++** mov (z[0-9]+\.s), #-17 ++** cmpeq p0\.s, p1/z, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_m17_u32, svuint32_t, ++ p0 = svcmpeq_n_u32 (p1, z0, -17), ++ p0 = svcmpeq (p1, z0, -17)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_u64.c +new file mode 100644 +index 000000000..0f02c9988 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_u64.c +@@ -0,0 +1,96 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpeq_u64_tied: ++** cmpeq p0\.d, p0/z, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_u64_tied, svuint64_t, ++ p0 = svcmpeq_u64 (p0, z0, z1), ++ p0 = svcmpeq (p0, z0, z1)) ++ ++/* ++** cmpeq_u64_untied: ++** cmpeq p0\.d, p1/z, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_u64_untied, svuint64_t, ++ p0 = svcmpeq_u64 (p1, z0, z1), ++ p0 = svcmpeq (p1, z0, z1)) ++ ++/* ++** cmpeq_x0_u64: ++** mov (z[0-9]+\.d), x0 ++** cmpeq p0\.d, p1/z, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_COMPARE_ZX (cmpeq_x0_u64, svuint64_t, uint64_t, ++ p0 = svcmpeq_n_u64 (p1, z0, x0), ++ p0 = svcmpeq (p1, z0, x0)) ++ ++/* ++** cmpeq_0_u64: ++** cmpeq p0\.d, p1/z, z0\.d, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_0_u64, svuint64_t, ++ p0 = svcmpeq_n_u64 (p1, z0, 0), ++ p0 = svcmpeq (p1, z0, 0)) ++ ++/* ++** cmpeq_1_u64: ++** cmpeq p0\.d, p1/z, z0\.d, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_1_u64, svuint64_t, ++ p0 = svcmpeq_n_u64 (p1, z0, 1), ++ p0 = svcmpeq (p1, z0, 1)) ++ ++/* ++** cmpeq_15_u64: ++** cmpeq p0\.d, p1/z, z0\.d, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_15_u64, svuint64_t, ++ p0 = svcmpeq_n_u64 (p1, z0, 15), ++ p0 = svcmpeq (p1, z0, 15)) ++ ++/* ++** cmpeq_16_u64: ++** mov (z[0-9]+\.d), #16 ++** cmpeq p0\.d, p1/z, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_16_u64, svuint64_t, ++ p0 = svcmpeq_n_u64 (p1, z0, 16), ++ p0 = svcmpeq (p1, z0, 16)) ++ ++/* ++** cmpeq_m1_u64: ++** cmpeq p0\.d, p1/z, z0\.d, #-1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_m1_u64, svuint64_t, ++ p0 = svcmpeq_n_u64 (p1, z0, -1), ++ p0 = svcmpeq (p1, z0, -1)) ++ ++/* ++** cmpeq_m16_u64: ++** cmpeq p0\.d, p1/z, z0\.d, #-16 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_m16_u64, svuint64_t, ++ p0 = svcmpeq_n_u64 (p1, z0, -16), ++ p0 = svcmpeq (p1, z0, -16)) ++ ++/* ++** cmpeq_m17_u64: ++** mov (z[0-9]+\.d), #-17 ++** cmpeq p0\.d, p1/z, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_m17_u64, svuint64_t, ++ p0 = svcmpeq_n_u64 (p1, z0, -17), ++ p0 = svcmpeq (p1, z0, -17)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_u8.c +new file mode 100644 +index 000000000..ccd9a61c6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_u8.c +@@ -0,0 +1,96 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpeq_u8_tied: ++** cmpeq p0\.b, p0/z, (z0\.b, z1\.b|z1\.b, z0\.b) ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_u8_tied, svuint8_t, ++ p0 = svcmpeq_u8 (p0, z0, z1), ++ p0 = svcmpeq (p0, z0, z1)) ++ ++/* ++** cmpeq_u8_untied: ++** cmpeq p0\.b, p1/z, (z0\.b, z1\.b|z1\.b, z0\.b) ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_u8_untied, svuint8_t, ++ p0 = svcmpeq_u8 (p1, z0, z1), ++ p0 = svcmpeq (p1, z0, z1)) ++ ++/* ++** cmpeq_w0_u8: ++** mov (z[0-9]+\.b), w0 ++** cmpeq p0\.b, p1/z, (z0\.b, \1|\1, z0\.b) ++** ret ++*/ ++TEST_COMPARE_ZX (cmpeq_w0_u8, svuint8_t, uint8_t, ++ p0 = svcmpeq_n_u8 (p1, z0, x0), ++ p0 = svcmpeq (p1, z0, x0)) ++ ++/* ++** cmpeq_0_u8: ++** cmpeq p0\.b, p1/z, z0\.b, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_0_u8, svuint8_t, ++ p0 = svcmpeq_n_u8 (p1, z0, 0), ++ p0 = svcmpeq (p1, z0, 0)) ++ ++/* ++** cmpeq_1_u8: ++** cmpeq p0\.b, p1/z, z0\.b, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_1_u8, svuint8_t, ++ p0 = svcmpeq_n_u8 (p1, z0, 1), ++ p0 = svcmpeq (p1, z0, 1)) ++ ++/* ++** cmpeq_15_u8: ++** cmpeq p0\.b, p1/z, z0\.b, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_15_u8, svuint8_t, ++ p0 = svcmpeq_n_u8 (p1, z0, 15), ++ p0 = svcmpeq (p1, z0, 15)) ++ ++/* ++** cmpeq_16_u8: ++** mov (z[0-9]+\.b), #16 ++** cmpeq p0\.b, p1/z, (z0\.b, \1|\1, z0\.b) ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_16_u8, svuint8_t, ++ p0 = svcmpeq_n_u8 (p1, z0, 16), ++ p0 = svcmpeq (p1, z0, 16)) ++ ++/* ++** cmpeq_m1_u8: ++** cmpeq p0\.b, p1/z, z0\.b, #-1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_m1_u8, svuint8_t, ++ p0 = svcmpeq_n_u8 (p1, z0, -1), ++ p0 = svcmpeq (p1, z0, -1)) ++ ++/* ++** cmpeq_m16_u8: ++** cmpeq p0\.b, p1/z, z0\.b, #-16 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_m16_u8, svuint8_t, ++ p0 = svcmpeq_n_u8 (p1, z0, -16), ++ p0 = svcmpeq (p1, z0, -16)) ++ ++/* ++** cmpeq_m17_u8: ++** mov (z[0-9]+\.b), #-17 ++** cmpeq p0\.b, p1/z, (z0\.b, \1|\1, z0\.b) ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_m17_u8, svuint8_t, ++ p0 = svcmpeq_n_u8 (p1, z0, -17), ++ p0 = svcmpeq (p1, z0, -17)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_wide_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_wide_s16.c +new file mode 100644 +index 000000000..c9712b3b4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_wide_s16.c +@@ -0,0 +1,96 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpeq_wide_s16_tied: ++** cmpeq p0\.h, p0/z, z0\.h, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmpeq_wide_s16_tied, svint16_t, svint64_t, ++ p0 = svcmpeq_wide_s16 (p0, z0, z1), ++ p0 = svcmpeq_wide (p0, z0, z1)) ++ ++/* ++** cmpeq_wide_s16_untied: ++** cmpeq p0\.h, p1/z, z0\.h, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmpeq_wide_s16_untied, svint16_t, svint64_t, ++ p0 = svcmpeq_wide_s16 (p1, z0, z1), ++ p0 = svcmpeq_wide (p1, z0, z1)) ++ ++/* ++** cmpeq_wide_x0_s16: ++** mov (z[0-9]+\.d), x0 ++** cmpeq p0\.h, p1/z, z0\.h, \1 ++** ret ++*/ ++TEST_COMPARE_ZX (cmpeq_wide_x0_s16, svint16_t, int64_t, ++ p0 = svcmpeq_wide_n_s16 (p1, z0, x0), ++ p0 = svcmpeq_wide (p1, z0, x0)) ++ ++/* ++** cmpeq_wide_0_s16: ++** cmpeq p0\.h, p1/z, z0\.h, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_wide_0_s16, svint16_t, ++ p0 = svcmpeq_wide_n_s16 (p1, z0, 0), ++ p0 = svcmpeq_wide (p1, z0, 0)) ++ ++/* ++** cmpeq_wide_1_s16: ++** cmpeq p0\.h, p1/z, z0\.h, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_wide_1_s16, svint16_t, ++ p0 = svcmpeq_wide_n_s16 (p1, z0, 1), ++ p0 = svcmpeq_wide (p1, z0, 1)) ++ ++/* ++** cmpeq_wide_15_s16: ++** cmpeq p0\.h, p1/z, z0\.h, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_wide_15_s16, svint16_t, ++ p0 = svcmpeq_wide_n_s16 (p1, z0, 15), ++ p0 = svcmpeq_wide (p1, z0, 15)) ++ ++/* ++** cmpeq_wide_16_s16: ++** mov (z[0-9]+\.d), #16 ++** cmpeq p0\.h, p1/z, z0\.h, \1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_wide_16_s16, svint16_t, ++ p0 = svcmpeq_wide_n_s16 (p1, z0, 16), ++ p0 = svcmpeq_wide (p1, z0, 16)) ++ ++/* ++** cmpeq_wide_m1_s16: ++** cmpeq p0\.h, p1/z, z0\.h, #-1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_wide_m1_s16, svint16_t, ++ p0 = svcmpeq_wide_n_s16 (p1, z0, -1), ++ p0 = svcmpeq_wide (p1, z0, -1)) ++ ++/* ++** cmpeq_wide_m16_s16: ++** cmpeq p0\.h, p1/z, z0\.h, #-16 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_wide_m16_s16, svint16_t, ++ p0 = svcmpeq_wide_n_s16 (p1, z0, -16), ++ p0 = svcmpeq_wide (p1, z0, -16)) ++ ++/* ++** cmpeq_wide_m17_s16: ++** mov (z[0-9]+\.d), #-17 ++** cmpeq p0\.h, p1/z, z0\.h, \1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_wide_m17_s16, svint16_t, ++ p0 = svcmpeq_wide_n_s16 (p1, z0, -17), ++ p0 = svcmpeq_wide (p1, z0, -17)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_wide_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_wide_s32.c +new file mode 100644 +index 000000000..22bd99f57 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_wide_s32.c +@@ -0,0 +1,96 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpeq_wide_s32_tied: ++** cmpeq p0\.s, p0/z, z0\.s, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmpeq_wide_s32_tied, svint32_t, svint64_t, ++ p0 = svcmpeq_wide_s32 (p0, z0, z1), ++ p0 = svcmpeq_wide (p0, z0, z1)) ++ ++/* ++** cmpeq_wide_s32_untied: ++** cmpeq p0\.s, p1/z, z0\.s, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmpeq_wide_s32_untied, svint32_t, svint64_t, ++ p0 = svcmpeq_wide_s32 (p1, z0, z1), ++ p0 = svcmpeq_wide (p1, z0, z1)) ++ ++/* ++** cmpeq_wide_x0_s32: ++** mov (z[0-9]+\.d), x0 ++** cmpeq p0\.s, p1/z, z0\.s, \1 ++** ret ++*/ ++TEST_COMPARE_ZX (cmpeq_wide_x0_s32, svint32_t, int64_t, ++ p0 = svcmpeq_wide_n_s32 (p1, z0, x0), ++ p0 = svcmpeq_wide (p1, z0, x0)) ++ ++/* ++** cmpeq_wide_0_s32: ++** cmpeq p0\.s, p1/z, z0\.s, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_wide_0_s32, svint32_t, ++ p0 = svcmpeq_wide_n_s32 (p1, z0, 0), ++ p0 = svcmpeq_wide (p1, z0, 0)) ++ ++/* ++** cmpeq_wide_1_s32: ++** cmpeq p0\.s, p1/z, z0\.s, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_wide_1_s32, svint32_t, ++ p0 = svcmpeq_wide_n_s32 (p1, z0, 1), ++ p0 = svcmpeq_wide (p1, z0, 1)) ++ ++/* ++** cmpeq_wide_15_s32: ++** cmpeq p0\.s, p1/z, z0\.s, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_wide_15_s32, svint32_t, ++ p0 = svcmpeq_wide_n_s32 (p1, z0, 15), ++ p0 = svcmpeq_wide (p1, z0, 15)) ++ ++/* ++** cmpeq_wide_16_s32: ++** mov (z[0-9]+\.d), #16 ++** cmpeq p0\.s, p1/z, z0\.s, \1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_wide_16_s32, svint32_t, ++ p0 = svcmpeq_wide_n_s32 (p1, z0, 16), ++ p0 = svcmpeq_wide (p1, z0, 16)) ++ ++/* ++** cmpeq_wide_m1_s32: ++** cmpeq p0\.s, p1/z, z0\.s, #-1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_wide_m1_s32, svint32_t, ++ p0 = svcmpeq_wide_n_s32 (p1, z0, -1), ++ p0 = svcmpeq_wide (p1, z0, -1)) ++ ++/* ++** cmpeq_wide_m16_s32: ++** cmpeq p0\.s, p1/z, z0\.s, #-16 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_wide_m16_s32, svint32_t, ++ p0 = svcmpeq_wide_n_s32 (p1, z0, -16), ++ p0 = svcmpeq_wide (p1, z0, -16)) ++ ++/* ++** cmpeq_wide_m17_s32: ++** mov (z[0-9]+\.d), #-17 ++** cmpeq p0\.s, p1/z, z0\.s, \1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_wide_m17_s32, svint32_t, ++ p0 = svcmpeq_wide_n_s32 (p1, z0, -17), ++ p0 = svcmpeq_wide (p1, z0, -17)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_wide_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_wide_s8.c +new file mode 100644 +index 000000000..a9e9a0bf5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_wide_s8.c +@@ -0,0 +1,96 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpeq_wide_s8_tied: ++** cmpeq p0\.b, p0/z, z0\.b, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmpeq_wide_s8_tied, svint8_t, svint64_t, ++ p0 = svcmpeq_wide_s8 (p0, z0, z1), ++ p0 = svcmpeq_wide (p0, z0, z1)) ++ ++/* ++** cmpeq_wide_s8_untied: ++** cmpeq p0\.b, p1/z, z0\.b, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmpeq_wide_s8_untied, svint8_t, svint64_t, ++ p0 = svcmpeq_wide_s8 (p1, z0, z1), ++ p0 = svcmpeq_wide (p1, z0, z1)) ++ ++/* ++** cmpeq_wide_x0_s8: ++** mov (z[0-9]+\.d), x0 ++** cmpeq p0\.b, p1/z, z0\.b, \1 ++** ret ++*/ ++TEST_COMPARE_ZX (cmpeq_wide_x0_s8, svint8_t, int64_t, ++ p0 = svcmpeq_wide_n_s8 (p1, z0, x0), ++ p0 = svcmpeq_wide (p1, z0, x0)) ++ ++/* ++** cmpeq_wide_0_s8: ++** cmpeq p0\.b, p1/z, z0\.b, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_wide_0_s8, svint8_t, ++ p0 = svcmpeq_wide_n_s8 (p1, z0, 0), ++ p0 = svcmpeq_wide (p1, z0, 0)) ++ ++/* ++** cmpeq_wide_1_s8: ++** cmpeq p0\.b, p1/z, z0\.b, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_wide_1_s8, svint8_t, ++ p0 = svcmpeq_wide_n_s8 (p1, z0, 1), ++ p0 = svcmpeq_wide (p1, z0, 1)) ++ ++/* ++** cmpeq_wide_15_s8: ++** cmpeq p0\.b, p1/z, z0\.b, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_wide_15_s8, svint8_t, ++ p0 = svcmpeq_wide_n_s8 (p1, z0, 15), ++ p0 = svcmpeq_wide (p1, z0, 15)) ++ ++/* ++** cmpeq_wide_16_s8: ++** mov (z[0-9]+\.d), #16 ++** cmpeq p0\.b, p1/z, z0\.b, \1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_wide_16_s8, svint8_t, ++ p0 = svcmpeq_wide_n_s8 (p1, z0, 16), ++ p0 = svcmpeq_wide (p1, z0, 16)) ++ ++/* ++** cmpeq_wide_m1_s8: ++** cmpeq p0\.b, p1/z, z0\.b, #-1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_wide_m1_s8, svint8_t, ++ p0 = svcmpeq_wide_n_s8 (p1, z0, -1), ++ p0 = svcmpeq_wide (p1, z0, -1)) ++ ++/* ++** cmpeq_wide_m16_s8: ++** cmpeq p0\.b, p1/z, z0\.b, #-16 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_wide_m16_s8, svint8_t, ++ p0 = svcmpeq_wide_n_s8 (p1, z0, -16), ++ p0 = svcmpeq_wide (p1, z0, -16)) ++ ++/* ++** cmpeq_wide_m17_s8: ++** mov (z[0-9]+\.d), #-17 ++** cmpeq p0\.b, p1/z, z0\.b, \1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpeq_wide_m17_s8, svint8_t, ++ p0 = svcmpeq_wide_n_s8 (p1, z0, -17), ++ p0 = svcmpeq_wide (p1, z0, -17)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_f16.c +new file mode 100644 +index 000000000..a6db8c16a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_f16.c +@@ -0,0 +1,66 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpge_f16_tied: ++** ( ++** fcmge p0\.h, p0/z, z0\.h, z1\.h ++** | ++** fcmle p0\.h, p0/z, z1\.h, z0\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_f16_tied, svfloat16_t, ++ p0 = svcmpge_f16 (p0, z0, z1), ++ p0 = svcmpge (p0, z0, z1)) ++ ++/* ++** cmpge_f16_untied: ++** ( ++** fcmge p0\.h, p1/z, z0\.h, z1\.h ++** | ++** fcmle p0\.h, p1/z, z1\.h, z0\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_f16_untied, svfloat16_t, ++ p0 = svcmpge_f16 (p1, z0, z1), ++ p0 = svcmpge (p1, z0, z1)) ++ ++/* ++** cmpge_h4_f16: ++** mov (z[0-9]+\.h), h4 ++** ( ++** fcmge p0\.h, p1/z, z0\.h, \1 ++** | ++** fcmle p0\.h, p1/z, \1, z0\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_ZD (cmpge_h4_f16, svfloat16_t, float16_t, ++ p0 = svcmpge_n_f16 (p1, z0, d4), ++ p0 = svcmpge (p1, z0, d4)) ++ ++/* ++** cmpge_0_f16: ++** fcmge p0\.h, p1/z, z0\.h, #0\.0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_0_f16, svfloat16_t, ++ p0 = svcmpge_n_f16 (p1, z0, 0), ++ p0 = svcmpge (p1, z0, 0)) ++ ++/* ++** cmpge_1_f16: ++** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? ++** ( ++** fcmge p0\.h, p1/z, z0\.h, \1 ++** | ++** fcmle p0\.h, p1/z, \1, z0\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_1_f16, svfloat16_t, ++ p0 = svcmpge_n_f16 (p1, z0, 1), ++ p0 = svcmpge (p1, z0, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_f32.c +new file mode 100644 +index 000000000..ee2976e58 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_f32.c +@@ -0,0 +1,66 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpge_f32_tied: ++** ( ++** fcmge p0\.s, p0/z, z0\.s, z1\.s ++** | ++** fcmle p0\.s, p0/z, z1\.s, z0\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_f32_tied, svfloat32_t, ++ p0 = svcmpge_f32 (p0, z0, z1), ++ p0 = svcmpge (p0, z0, z1)) ++ ++/* ++** cmpge_f32_untied: ++** ( ++** fcmge p0\.s, p1/z, z0\.s, z1\.s ++** | ++** fcmle p0\.s, p1/z, z1\.s, z0\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_f32_untied, svfloat32_t, ++ p0 = svcmpge_f32 (p1, z0, z1), ++ p0 = svcmpge (p1, z0, z1)) ++ ++/* ++** cmpge_s4_f32: ++** mov (z[0-9]+\.s), s4 ++** ( ++** fcmge p0\.s, p1/z, z0\.s, \1 ++** | ++** fcmle p0\.s, p1/z, \1, z0\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_ZD (cmpge_s4_f32, svfloat32_t, float32_t, ++ p0 = svcmpge_n_f32 (p1, z0, d4), ++ p0 = svcmpge (p1, z0, d4)) ++ ++/* ++** cmpge_0_f32: ++** fcmge p0\.s, p1/z, z0\.s, #0\.0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_0_f32, svfloat32_t, ++ p0 = svcmpge_n_f32 (p1, z0, 0), ++ p0 = svcmpge (p1, z0, 0)) ++ ++/* ++** cmpge_1_f32: ++** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? ++** ( ++** fcmge p0\.s, p1/z, z0\.s, \1 ++** | ++** fcmle p0\.s, p1/z, \1, z0\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_1_f32, svfloat32_t, ++ p0 = svcmpge_n_f32 (p1, z0, 1), ++ p0 = svcmpge (p1, z0, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_f64.c +new file mode 100644 +index 000000000..ceea0afe3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_f64.c +@@ -0,0 +1,66 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpge_f64_tied: ++** ( ++** fcmge p0\.d, p0/z, z0\.d, z1\.d ++** | ++** fcmle p0\.d, p0/z, z1\.d, z0\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_f64_tied, svfloat64_t, ++ p0 = svcmpge_f64 (p0, z0, z1), ++ p0 = svcmpge (p0, z0, z1)) ++ ++/* ++** cmpge_f64_untied: ++** ( ++** fcmge p0\.d, p1/z, z0\.d, z1\.d ++** | ++** fcmle p0\.d, p1/z, z1\.d, z0\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_f64_untied, svfloat64_t, ++ p0 = svcmpge_f64 (p1, z0, z1), ++ p0 = svcmpge (p1, z0, z1)) ++ ++/* ++** cmpge_d4_f64: ++** mov (z[0-9]+\.d), d4 ++** ( ++** fcmge p0\.d, p1/z, z0\.d, \1 ++** | ++** fcmle p0\.d, p1/z, \1, z0\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_ZD (cmpge_d4_f64, svfloat64_t, float64_t, ++ p0 = svcmpge_n_f64 (p1, z0, d4), ++ p0 = svcmpge (p1, z0, d4)) ++ ++/* ++** cmpge_0_f64: ++** fcmge p0\.d, p1/z, z0\.d, #0\.0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_0_f64, svfloat64_t, ++ p0 = svcmpge_n_f64 (p1, z0, 0), ++ p0 = svcmpge (p1, z0, 0)) ++ ++/* ++** cmpge_1_f64: ++** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? ++** ( ++** fcmge p0\.d, p1/z, z0\.d, \1 ++** | ++** fcmle p0\.d, p1/z, \1, z0\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_1_f64, svfloat64_t, ++ p0 = svcmpge_n_f64 (p1, z0, 1), ++ p0 = svcmpge (p1, z0, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_s16.c +new file mode 100644 +index 000000000..de9180b84 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_s16.c +@@ -0,0 +1,116 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpge_s16_tied: ++** ( ++** cmpge p0\.h, p0/z, z0\.h, z1\.h ++** | ++** cmple p0\.h, p0/z, z1\.h, z0\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_s16_tied, svint16_t, ++ p0 = svcmpge_s16 (p0, z0, z1), ++ p0 = svcmpge (p0, z0, z1)) ++ ++/* ++** cmpge_s16_untied: ++** ( ++** cmpge p0\.h, p1/z, z0\.h, z1\.h ++** | ++** cmple p0\.h, p1/z, z1\.h, z0\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_s16_untied, svint16_t, ++ p0 = svcmpge_s16 (p1, z0, z1), ++ p0 = svcmpge (p1, z0, z1)) ++ ++/* ++** cmpge_w0_s16: ++** mov (z[0-9]+\.h), w0 ++** ( ++** cmpge p0\.h, p1/z, z0\.h, \1 ++** | ++** cmple p0\.h, p1/z, \1, z0\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_ZX (cmpge_w0_s16, svint16_t, int16_t, ++ p0 = svcmpge_n_s16 (p1, z0, x0), ++ p0 = svcmpge (p1, z0, x0)) ++ ++/* ++** cmpge_0_s16: ++** cmpge p0\.h, p1/z, z0\.h, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_0_s16, svint16_t, ++ p0 = svcmpge_n_s16 (p1, z0, 0), ++ p0 = svcmpge (p1, z0, 0)) ++ ++/* ++** cmpge_1_s16: ++** cmpge p0\.h, p1/z, z0\.h, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_1_s16, svint16_t, ++ p0 = svcmpge_n_s16 (p1, z0, 1), ++ p0 = svcmpge (p1, z0, 1)) ++ ++/* ++** cmpge_15_s16: ++** cmpge p0\.h, p1/z, z0\.h, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_15_s16, svint16_t, ++ p0 = svcmpge_n_s16 (p1, z0, 15), ++ p0 = svcmpge (p1, z0, 15)) ++ ++/* ++** cmpge_16_s16: ++** mov (z[0-9]+\.h), #16 ++** ( ++** cmpge p0\.h, p1/z, z0\.h, \1 ++** | ++** cmple p0\.h, p1/z, \1, z0\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_16_s16, svint16_t, ++ p0 = svcmpge_n_s16 (p1, z0, 16), ++ p0 = svcmpge (p1, z0, 16)) ++ ++/* ++** cmpge_m1_s16: ++** cmpge p0\.h, p1/z, z0\.h, #-1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_m1_s16, svint16_t, ++ p0 = svcmpge_n_s16 (p1, z0, -1), ++ p0 = svcmpge (p1, z0, -1)) ++ ++/* ++** cmpge_m16_s16: ++** cmpge p0\.h, p1/z, z0\.h, #-16 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_m16_s16, svint16_t, ++ p0 = svcmpge_n_s16 (p1, z0, -16), ++ p0 = svcmpge (p1, z0, -16)) ++ ++/* ++** cmpge_m17_s16: ++** mov (z[0-9]+\.h), #-17 ++** ( ++** cmpge p0\.h, p1/z, z0\.h, \1 ++** | ++** cmple p0\.h, p1/z, \1, z0\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_m17_s16, svint16_t, ++ p0 = svcmpge_n_s16 (p1, z0, -17), ++ p0 = svcmpge (p1, z0, -17)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_s32.c +new file mode 100644 +index 000000000..67286b1fe +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_s32.c +@@ -0,0 +1,116 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpge_s32_tied: ++** ( ++** cmpge p0\.s, p0/z, z0\.s, z1\.s ++** | ++** cmple p0\.s, p0/z, z1\.s, z0\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_s32_tied, svint32_t, ++ p0 = svcmpge_s32 (p0, z0, z1), ++ p0 = svcmpge (p0, z0, z1)) ++ ++/* ++** cmpge_s32_untied: ++** ( ++** cmpge p0\.s, p1/z, z0\.s, z1\.s ++** | ++** cmple p0\.s, p1/z, z1\.s, z0\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_s32_untied, svint32_t, ++ p0 = svcmpge_s32 (p1, z0, z1), ++ p0 = svcmpge (p1, z0, z1)) ++ ++/* ++** cmpge_w0_s32: ++** mov (z[0-9]+\.s), w0 ++** ( ++** cmpge p0\.s, p1/z, z0\.s, \1 ++** | ++** cmple p0\.s, p1/z, \1, z0\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_ZX (cmpge_w0_s32, svint32_t, int32_t, ++ p0 = svcmpge_n_s32 (p1, z0, x0), ++ p0 = svcmpge (p1, z0, x0)) ++ ++/* ++** cmpge_0_s32: ++** cmpge p0\.s, p1/z, z0\.s, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_0_s32, svint32_t, ++ p0 = svcmpge_n_s32 (p1, z0, 0), ++ p0 = svcmpge (p1, z0, 0)) ++ ++/* ++** cmpge_1_s32: ++** cmpge p0\.s, p1/z, z0\.s, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_1_s32, svint32_t, ++ p0 = svcmpge_n_s32 (p1, z0, 1), ++ p0 = svcmpge (p1, z0, 1)) ++ ++/* ++** cmpge_15_s32: ++** cmpge p0\.s, p1/z, z0\.s, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_15_s32, svint32_t, ++ p0 = svcmpge_n_s32 (p1, z0, 15), ++ p0 = svcmpge (p1, z0, 15)) ++ ++/* ++** cmpge_16_s32: ++** mov (z[0-9]+\.s), #16 ++** ( ++** cmpge p0\.s, p1/z, z0\.s, \1 ++** | ++** cmple p0\.s, p1/z, \1, z0\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_16_s32, svint32_t, ++ p0 = svcmpge_n_s32 (p1, z0, 16), ++ p0 = svcmpge (p1, z0, 16)) ++ ++/* ++** cmpge_m1_s32: ++** cmpge p0\.s, p1/z, z0\.s, #-1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_m1_s32, svint32_t, ++ p0 = svcmpge_n_s32 (p1, z0, -1), ++ p0 = svcmpge (p1, z0, -1)) ++ ++/* ++** cmpge_m16_s32: ++** cmpge p0\.s, p1/z, z0\.s, #-16 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_m16_s32, svint32_t, ++ p0 = svcmpge_n_s32 (p1, z0, -16), ++ p0 = svcmpge (p1, z0, -16)) ++ ++/* ++** cmpge_m17_s32: ++** mov (z[0-9]+\.s), #-17 ++** ( ++** cmpge p0\.s, p1/z, z0\.s, \1 ++** | ++** cmple p0\.s, p1/z, \1, z0\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_m17_s32, svint32_t, ++ p0 = svcmpge_n_s32 (p1, z0, -17), ++ p0 = svcmpge (p1, z0, -17)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_s64.c +new file mode 100644 +index 000000000..02e3ac07a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_s64.c +@@ -0,0 +1,116 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpge_s64_tied: ++** ( ++** cmpge p0\.d, p0/z, z0\.d, z1\.d ++** | ++** cmple p0\.d, p0/z, z1\.d, z0\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_s64_tied, svint64_t, ++ p0 = svcmpge_s64 (p0, z0, z1), ++ p0 = svcmpge (p0, z0, z1)) ++ ++/* ++** cmpge_s64_untied: ++** ( ++** cmpge p0\.d, p1/z, z0\.d, z1\.d ++** | ++** cmple p0\.d, p1/z, z1\.d, z0\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_s64_untied, svint64_t, ++ p0 = svcmpge_s64 (p1, z0, z1), ++ p0 = svcmpge (p1, z0, z1)) ++ ++/* ++** cmpge_x0_s64: ++** mov (z[0-9]+\.d), x0 ++** ( ++** cmpge p0\.d, p1/z, z0\.d, \1 ++** | ++** cmple p0\.d, p1/z, \1, z0\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_ZX (cmpge_x0_s64, svint64_t, int64_t, ++ p0 = svcmpge_n_s64 (p1, z0, x0), ++ p0 = svcmpge (p1, z0, x0)) ++ ++/* ++** cmpge_0_s64: ++** cmpge p0\.d, p1/z, z0\.d, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_0_s64, svint64_t, ++ p0 = svcmpge_n_s64 (p1, z0, 0), ++ p0 = svcmpge (p1, z0, 0)) ++ ++/* ++** cmpge_1_s64: ++** cmpge p0\.d, p1/z, z0\.d, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_1_s64, svint64_t, ++ p0 = svcmpge_n_s64 (p1, z0, 1), ++ p0 = svcmpge (p1, z0, 1)) ++ ++/* ++** cmpge_15_s64: ++** cmpge p0\.d, p1/z, z0\.d, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_15_s64, svint64_t, ++ p0 = svcmpge_n_s64 (p1, z0, 15), ++ p0 = svcmpge (p1, z0, 15)) ++ ++/* ++** cmpge_16_s64: ++** mov (z[0-9]+\.d), #16 ++** ( ++** cmpge p0\.d, p1/z, z0\.d, \1 ++** | ++** cmple p0\.d, p1/z, \1, z0\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_16_s64, svint64_t, ++ p0 = svcmpge_n_s64 (p1, z0, 16), ++ p0 = svcmpge (p1, z0, 16)) ++ ++/* ++** cmpge_m1_s64: ++** cmpge p0\.d, p1/z, z0\.d, #-1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_m1_s64, svint64_t, ++ p0 = svcmpge_n_s64 (p1, z0, -1), ++ p0 = svcmpge (p1, z0, -1)) ++ ++/* ++** cmpge_m16_s64: ++** cmpge p0\.d, p1/z, z0\.d, #-16 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_m16_s64, svint64_t, ++ p0 = svcmpge_n_s64 (p1, z0, -16), ++ p0 = svcmpge (p1, z0, -16)) ++ ++/* ++** cmpge_m17_s64: ++** mov (z[0-9]+\.d), #-17 ++** ( ++** cmpge p0\.d, p1/z, z0\.d, \1 ++** | ++** cmple p0\.d, p1/z, \1, z0\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_m17_s64, svint64_t, ++ p0 = svcmpge_n_s64 (p1, z0, -17), ++ p0 = svcmpge (p1, z0, -17)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_s8.c +new file mode 100644 +index 000000000..45c9c5f10 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_s8.c +@@ -0,0 +1,116 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpge_s8_tied: ++** ( ++** cmpge p0\.b, p0/z, z0\.b, z1\.b ++** | ++** cmple p0\.b, p0/z, z1\.b, z0\.b ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_s8_tied, svint8_t, ++ p0 = svcmpge_s8 (p0, z0, z1), ++ p0 = svcmpge (p0, z0, z1)) ++ ++/* ++** cmpge_s8_untied: ++** ( ++** cmpge p0\.b, p1/z, z0\.b, z1\.b ++** | ++** cmple p0\.b, p1/z, z1\.b, z0\.b ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_s8_untied, svint8_t, ++ p0 = svcmpge_s8 (p1, z0, z1), ++ p0 = svcmpge (p1, z0, z1)) ++ ++/* ++** cmpge_w0_s8: ++** mov (z[0-9]+\.b), w0 ++** ( ++** cmpge p0\.b, p1/z, z0\.b, \1 ++** | ++** cmple p0\.b, p1/z, \1, z0\.b ++** ) ++** ret ++*/ ++TEST_COMPARE_ZX (cmpge_w0_s8, svint8_t, int8_t, ++ p0 = svcmpge_n_s8 (p1, z0, x0), ++ p0 = svcmpge (p1, z0, x0)) ++ ++/* ++** cmpge_0_s8: ++** cmpge p0\.b, p1/z, z0\.b, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_0_s8, svint8_t, ++ p0 = svcmpge_n_s8 (p1, z0, 0), ++ p0 = svcmpge (p1, z0, 0)) ++ ++/* ++** cmpge_1_s8: ++** cmpge p0\.b, p1/z, z0\.b, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_1_s8, svint8_t, ++ p0 = svcmpge_n_s8 (p1, z0, 1), ++ p0 = svcmpge (p1, z0, 1)) ++ ++/* ++** cmpge_15_s8: ++** cmpge p0\.b, p1/z, z0\.b, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_15_s8, svint8_t, ++ p0 = svcmpge_n_s8 (p1, z0, 15), ++ p0 = svcmpge (p1, z0, 15)) ++ ++/* ++** cmpge_16_s8: ++** mov (z[0-9]+\.b), #16 ++** ( ++** cmpge p0\.b, p1/z, z0\.b, \1 ++** | ++** cmple p0\.b, p1/z, \1, z0\.b ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_16_s8, svint8_t, ++ p0 = svcmpge_n_s8 (p1, z0, 16), ++ p0 = svcmpge (p1, z0, 16)) ++ ++/* ++** cmpge_m1_s8: ++** cmpge p0\.b, p1/z, z0\.b, #-1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_m1_s8, svint8_t, ++ p0 = svcmpge_n_s8 (p1, z0, -1), ++ p0 = svcmpge (p1, z0, -1)) ++ ++/* ++** cmpge_m16_s8: ++** cmpge p0\.b, p1/z, z0\.b, #-16 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_m16_s8, svint8_t, ++ p0 = svcmpge_n_s8 (p1, z0, -16), ++ p0 = svcmpge (p1, z0, -16)) ++ ++/* ++** cmpge_m17_s8: ++** mov (z[0-9]+\.b), #-17 ++** ( ++** cmpge p0\.b, p1/z, z0\.b, \1 ++** | ++** cmple p0\.b, p1/z, \1, z0\.b ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_m17_s8, svint8_t, ++ p0 = svcmpge_n_s8 (p1, z0, -17), ++ p0 = svcmpge (p1, z0, -17)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_u16.c +new file mode 100644 +index 000000000..7c7d2b307 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_u16.c +@@ -0,0 +1,116 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpge_u16_tied: ++** ( ++** cmphs p0\.h, p0/z, z0\.h, z1\.h ++** | ++** cmpls p0\.h, p0/z, z1\.h, z0\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_u16_tied, svuint16_t, ++ p0 = svcmpge_u16 (p0, z0, z1), ++ p0 = svcmpge (p0, z0, z1)) ++ ++/* ++** cmpge_u16_untied: ++** ( ++** cmphs p0\.h, p1/z, z0\.h, z1\.h ++** | ++** cmpls p0\.h, p1/z, z1\.h, z0\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_u16_untied, svuint16_t, ++ p0 = svcmpge_u16 (p1, z0, z1), ++ p0 = svcmpge (p1, z0, z1)) ++ ++/* ++** cmpge_w0_u16: ++** mov (z[0-9]+\.h), w0 ++** ( ++** cmphs p0\.h, p1/z, z0\.h, \1 ++** | ++** cmpls p0\.h, p1/z, \1, z0\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_ZX (cmpge_w0_u16, svuint16_t, uint16_t, ++ p0 = svcmpge_n_u16 (p1, z0, x0), ++ p0 = svcmpge (p1, z0, x0)) ++ ++/* ++** cmpge_0_u16: ++** cmphs p0\.h, p1/z, z0\.h, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_0_u16, svuint16_t, ++ p0 = svcmpge_n_u16 (p1, z0, 0), ++ p0 = svcmpge (p1, z0, 0)) ++ ++/* ++** cmpge_1_u16: ++** cmphs p0\.h, p1/z, z0\.h, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_1_u16, svuint16_t, ++ p0 = svcmpge_n_u16 (p1, z0, 1), ++ p0 = svcmpge (p1, z0, 1)) ++ ++/* ++** cmpge_15_u16: ++** cmphs p0\.h, p1/z, z0\.h, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_15_u16, svuint16_t, ++ p0 = svcmpge_n_u16 (p1, z0, 15), ++ p0 = svcmpge (p1, z0, 15)) ++ ++/* ++** cmpge_16_u16: ++** cmphs p0\.h, p1/z, z0\.h, #16 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_16_u16, svuint16_t, ++ p0 = svcmpge_n_u16 (p1, z0, 16), ++ p0 = svcmpge (p1, z0, 16)) ++ ++/* ++** cmpge_127_u16: ++** cmphs p0\.h, p1/z, z0\.h, #127 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_127_u16, svuint16_t, ++ p0 = svcmpge_n_u16 (p1, z0, 127), ++ p0 = svcmpge (p1, z0, 127)) ++ ++/* ++** cmpge_128_u16: ++** mov (z[0-9]+\.h), #128 ++** ( ++** cmphs p0\.h, p1/z, z0\.h, \1 ++** | ++** cmpls p0\.h, p1/z, \1, z0\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_128_u16, svuint16_t, ++ p0 = svcmpge_n_u16 (p1, z0, 128), ++ p0 = svcmpge (p1, z0, 128)) ++ ++/* ++** cmpge_m1_u16: ++** mov (z[0-9]+)\.b, #-1 ++** ( ++** cmphs p0\.h, p1/z, z0\.h, \1\.h ++** | ++** cmpls p0\.h, p1/z, \1\.h, z0\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_m1_u16, svuint16_t, ++ p0 = svcmpge_n_u16 (p1, z0, -1), ++ p0 = svcmpge (p1, z0, -1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_u32.c +new file mode 100644 +index 000000000..a2021ef50 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_u32.c +@@ -0,0 +1,116 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpge_u32_tied: ++** ( ++** cmphs p0\.s, p0/z, z0\.s, z1\.s ++** | ++** cmpls p0\.s, p0/z, z1\.s, z0\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_u32_tied, svuint32_t, ++ p0 = svcmpge_u32 (p0, z0, z1), ++ p0 = svcmpge (p0, z0, z1)) ++ ++/* ++** cmpge_u32_untied: ++** ( ++** cmphs p0\.s, p1/z, z0\.s, z1\.s ++** | ++** cmpls p0\.s, p1/z, z1\.s, z0\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_u32_untied, svuint32_t, ++ p0 = svcmpge_u32 (p1, z0, z1), ++ p0 = svcmpge (p1, z0, z1)) ++ ++/* ++** cmpge_w0_u32: ++** mov (z[0-9]+\.s), w0 ++** ( ++** cmphs p0\.s, p1/z, z0\.s, \1 ++** | ++** cmpls p0\.s, p1/z, \1, z0\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_ZX (cmpge_w0_u32, svuint32_t, uint32_t, ++ p0 = svcmpge_n_u32 (p1, z0, x0), ++ p0 = svcmpge (p1, z0, x0)) ++ ++/* ++** cmpge_0_u32: ++** cmphs p0\.s, p1/z, z0\.s, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_0_u32, svuint32_t, ++ p0 = svcmpge_n_u32 (p1, z0, 0), ++ p0 = svcmpge (p1, z0, 0)) ++ ++/* ++** cmpge_1_u32: ++** cmphs p0\.s, p1/z, z0\.s, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_1_u32, svuint32_t, ++ p0 = svcmpge_n_u32 (p1, z0, 1), ++ p0 = svcmpge (p1, z0, 1)) ++ ++/* ++** cmpge_15_u32: ++** cmphs p0\.s, p1/z, z0\.s, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_15_u32, svuint32_t, ++ p0 = svcmpge_n_u32 (p1, z0, 15), ++ p0 = svcmpge (p1, z0, 15)) ++ ++/* ++** cmpge_16_u32: ++** cmphs p0\.s, p1/z, z0\.s, #16 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_16_u32, svuint32_t, ++ p0 = svcmpge_n_u32 (p1, z0, 16), ++ p0 = svcmpge (p1, z0, 16)) ++ ++/* ++** cmpge_127_u32: ++** cmphs p0\.s, p1/z, z0\.s, #127 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_127_u32, svuint32_t, ++ p0 = svcmpge_n_u32 (p1, z0, 127), ++ p0 = svcmpge (p1, z0, 127)) ++ ++/* ++** cmpge_128_u32: ++** mov (z[0-9]+\.s), #128 ++** ( ++** cmphs p0\.s, p1/z, z0\.s, \1 ++** | ++** cmpls p0\.s, p1/z, \1, z0\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_128_u32, svuint32_t, ++ p0 = svcmpge_n_u32 (p1, z0, 128), ++ p0 = svcmpge (p1, z0, 128)) ++ ++/* ++** cmpge_m1_u32: ++** mov (z[0-9]+)\.b, #-1 ++** ( ++** cmphs p0\.s, p1/z, z0\.s, \1\.s ++** | ++** cmpls p0\.s, p1/z, \1\.s, z0\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_m1_u32, svuint32_t, ++ p0 = svcmpge_n_u32 (p1, z0, -1), ++ p0 = svcmpge (p1, z0, -1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_u64.c +new file mode 100644 +index 000000000..0f9159590 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_u64.c +@@ -0,0 +1,116 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpge_u64_tied: ++** ( ++** cmphs p0\.d, p0/z, z0\.d, z1\.d ++** | ++** cmpls p0\.d, p0/z, z1\.d, z0\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_u64_tied, svuint64_t, ++ p0 = svcmpge_u64 (p0, z0, z1), ++ p0 = svcmpge (p0, z0, z1)) ++ ++/* ++** cmpge_u64_untied: ++** ( ++** cmphs p0\.d, p1/z, z0\.d, z1\.d ++** | ++** cmpls p0\.d, p1/z, z1\.d, z0\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_u64_untied, svuint64_t, ++ p0 = svcmpge_u64 (p1, z0, z1), ++ p0 = svcmpge (p1, z0, z1)) ++ ++/* ++** cmpge_x0_u64: ++** mov (z[0-9]+\.d), x0 ++** ( ++** cmphs p0\.d, p1/z, z0\.d, \1 ++** | ++** cmpls p0\.d, p1/z, \1, z0\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_ZX (cmpge_x0_u64, svuint64_t, uint64_t, ++ p0 = svcmpge_n_u64 (p1, z0, x0), ++ p0 = svcmpge (p1, z0, x0)) ++ ++/* ++** cmpge_0_u64: ++** cmphs p0\.d, p1/z, z0\.d, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_0_u64, svuint64_t, ++ p0 = svcmpge_n_u64 (p1, z0, 0), ++ p0 = svcmpge (p1, z0, 0)) ++ ++/* ++** cmpge_1_u64: ++** cmphs p0\.d, p1/z, z0\.d, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_1_u64, svuint64_t, ++ p0 = svcmpge_n_u64 (p1, z0, 1), ++ p0 = svcmpge (p1, z0, 1)) ++ ++/* ++** cmpge_15_u64: ++** cmphs p0\.d, p1/z, z0\.d, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_15_u64, svuint64_t, ++ p0 = svcmpge_n_u64 (p1, z0, 15), ++ p0 = svcmpge (p1, z0, 15)) ++ ++/* ++** cmpge_16_u64: ++** cmphs p0\.d, p1/z, z0\.d, #16 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_16_u64, svuint64_t, ++ p0 = svcmpge_n_u64 (p1, z0, 16), ++ p0 = svcmpge (p1, z0, 16)) ++ ++/* ++** cmpge_127_u64: ++** cmphs p0\.d, p1/z, z0\.d, #127 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_127_u64, svuint64_t, ++ p0 = svcmpge_n_u64 (p1, z0, 127), ++ p0 = svcmpge (p1, z0, 127)) ++ ++/* ++** cmpge_128_u64: ++** mov (z[0-9]+\.d), #128 ++** ( ++** cmphs p0\.d, p1/z, z0\.d, \1 ++** | ++** cmpls p0\.d, p1/z, \1, z0\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_128_u64, svuint64_t, ++ p0 = svcmpge_n_u64 (p1, z0, 128), ++ p0 = svcmpge (p1, z0, 128)) ++ ++/* ++** cmpge_m1_u64: ++** mov (z[0-9]+)\.b, #-1 ++** ( ++** cmphs p0\.d, p1/z, z0\.d, \1\.d ++** | ++** cmpls p0\.d, p1/z, \1\.d, z0\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_m1_u64, svuint64_t, ++ p0 = svcmpge_n_u64 (p1, z0, -1), ++ p0 = svcmpge (p1, z0, -1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_u8.c +new file mode 100644 +index 000000000..39f988d01 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_u8.c +@@ -0,0 +1,116 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpge_u8_tied: ++** ( ++** cmphs p0\.b, p0/z, z0\.b, z1\.b ++** | ++** cmpls p0\.b, p0/z, z1\.b, z0\.b ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_u8_tied, svuint8_t, ++ p0 = svcmpge_u8 (p0, z0, z1), ++ p0 = svcmpge (p0, z0, z1)) ++ ++/* ++** cmpge_u8_untied: ++** ( ++** cmphs p0\.b, p1/z, z0\.b, z1\.b ++** | ++** cmpls p0\.b, p1/z, z1\.b, z0\.b ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_u8_untied, svuint8_t, ++ p0 = svcmpge_u8 (p1, z0, z1), ++ p0 = svcmpge (p1, z0, z1)) ++ ++/* ++** cmpge_w0_u8: ++** mov (z[0-9]+\.b), w0 ++** ( ++** cmphs p0\.b, p1/z, z0\.b, \1 ++** | ++** cmpls p0\.b, p1/z, \1, z0\.b ++** ) ++** ret ++*/ ++TEST_COMPARE_ZX (cmpge_w0_u8, svuint8_t, uint8_t, ++ p0 = svcmpge_n_u8 (p1, z0, x0), ++ p0 = svcmpge (p1, z0, x0)) ++ ++/* ++** cmpge_0_u8: ++** cmphs p0\.b, p1/z, z0\.b, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_0_u8, svuint8_t, ++ p0 = svcmpge_n_u8 (p1, z0, 0), ++ p0 = svcmpge (p1, z0, 0)) ++ ++/* ++** cmpge_1_u8: ++** cmphs p0\.b, p1/z, z0\.b, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_1_u8, svuint8_t, ++ p0 = svcmpge_n_u8 (p1, z0, 1), ++ p0 = svcmpge (p1, z0, 1)) ++ ++/* ++** cmpge_15_u8: ++** cmphs p0\.b, p1/z, z0\.b, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_15_u8, svuint8_t, ++ p0 = svcmpge_n_u8 (p1, z0, 15), ++ p0 = svcmpge (p1, z0, 15)) ++ ++/* ++** cmpge_16_u8: ++** cmphs p0\.b, p1/z, z0\.b, #16 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_16_u8, svuint8_t, ++ p0 = svcmpge_n_u8 (p1, z0, 16), ++ p0 = svcmpge (p1, z0, 16)) ++ ++/* ++** cmpge_127_u8: ++** cmphs p0\.b, p1/z, z0\.b, #127 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_127_u8, svuint8_t, ++ p0 = svcmpge_n_u8 (p1, z0, 127), ++ p0 = svcmpge (p1, z0, 127)) ++ ++/* ++** cmpge_128_u8: ++** mov (z[0-9]+\.b), #-128 ++** ( ++** cmphs p0\.b, p1/z, z0\.b, \1 ++** | ++** cmpls p0\.b, p1/z, \1, z0\.b ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_128_u8, svuint8_t, ++ p0 = svcmpge_n_u8 (p1, z0, 128), ++ p0 = svcmpge (p1, z0, 128)) ++ ++/* ++** cmpge_m1_u8: ++** mov (z[0-9]+\.b), #-1 ++** ( ++** cmphs p0\.b, p1/z, z0\.b, \1 ++** | ++** cmpls p0\.b, p1/z, \1, z0\.b ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_m1_u8, svuint8_t, ++ p0 = svcmpge_n_u8 (p1, z0, -1), ++ p0 = svcmpge (p1, z0, -1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_wide_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_wide_s16.c +new file mode 100644 +index 000000000..0400d7871 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_wide_s16.c +@@ -0,0 +1,96 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpge_wide_s16_tied: ++** cmpge p0\.h, p0/z, z0\.h, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmpge_wide_s16_tied, svint16_t, svint64_t, ++ p0 = svcmpge_wide_s16 (p0, z0, z1), ++ p0 = svcmpge_wide (p0, z0, z1)) ++ ++/* ++** cmpge_wide_s16_untied: ++** cmpge p0\.h, p1/z, z0\.h, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmpge_wide_s16_untied, svint16_t, svint64_t, ++ p0 = svcmpge_wide_s16 (p1, z0, z1), ++ p0 = svcmpge_wide (p1, z0, z1)) ++ ++/* ++** cmpge_wide_x0_s16: ++** mov (z[0-9]+\.d), x0 ++** cmpge p0\.h, p1/z, z0\.h, \1 ++** ret ++*/ ++TEST_COMPARE_ZX (cmpge_wide_x0_s16, svint16_t, int64_t, ++ p0 = svcmpge_wide_n_s16 (p1, z0, x0), ++ p0 = svcmpge_wide (p1, z0, x0)) ++ ++/* ++** cmpge_wide_0_s16: ++** cmpge p0\.h, p1/z, z0\.h, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_wide_0_s16, svint16_t, ++ p0 = svcmpge_wide_n_s16 (p1, z0, 0), ++ p0 = svcmpge_wide (p1, z0, 0)) ++ ++/* ++** cmpge_wide_1_s16: ++** cmpge p0\.h, p1/z, z0\.h, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_wide_1_s16, svint16_t, ++ p0 = svcmpge_wide_n_s16 (p1, z0, 1), ++ p0 = svcmpge_wide (p1, z0, 1)) ++ ++/* ++** cmpge_wide_15_s16: ++** cmpge p0\.h, p1/z, z0\.h, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_wide_15_s16, svint16_t, ++ p0 = svcmpge_wide_n_s16 (p1, z0, 15), ++ p0 = svcmpge_wide (p1, z0, 15)) ++ ++/* ++** cmpge_wide_16_s16: ++** mov (z[0-9]+\.d), #16 ++** cmpge p0\.h, p1/z, z0\.h, \1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_wide_16_s16, svint16_t, ++ p0 = svcmpge_wide_n_s16 (p1, z0, 16), ++ p0 = svcmpge_wide (p1, z0, 16)) ++ ++/* ++** cmpge_wide_m1_s16: ++** cmpge p0\.h, p1/z, z0\.h, #-1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_wide_m1_s16, svint16_t, ++ p0 = svcmpge_wide_n_s16 (p1, z0, -1), ++ p0 = svcmpge_wide (p1, z0, -1)) ++ ++/* ++** cmpge_wide_m16_s16: ++** cmpge p0\.h, p1/z, z0\.h, #-16 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_wide_m16_s16, svint16_t, ++ p0 = svcmpge_wide_n_s16 (p1, z0, -16), ++ p0 = svcmpge_wide (p1, z0, -16)) ++ ++/* ++** cmpge_wide_m17_s16: ++** mov (z[0-9]+\.d), #-17 ++** cmpge p0\.h, p1/z, z0\.h, \1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_wide_m17_s16, svint16_t, ++ p0 = svcmpge_wide_n_s16 (p1, z0, -17), ++ p0 = svcmpge_wide (p1, z0, -17)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_wide_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_wide_s32.c +new file mode 100644 +index 000000000..ad7b9c55b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_wide_s32.c +@@ -0,0 +1,96 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpge_wide_s32_tied: ++** cmpge p0\.s, p0/z, z0\.s, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmpge_wide_s32_tied, svint32_t, svint64_t, ++ p0 = svcmpge_wide_s32 (p0, z0, z1), ++ p0 = svcmpge_wide (p0, z0, z1)) ++ ++/* ++** cmpge_wide_s32_untied: ++** cmpge p0\.s, p1/z, z0\.s, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmpge_wide_s32_untied, svint32_t, svint64_t, ++ p0 = svcmpge_wide_s32 (p1, z0, z1), ++ p0 = svcmpge_wide (p1, z0, z1)) ++ ++/* ++** cmpge_wide_x0_s32: ++** mov (z[0-9]+\.d), x0 ++** cmpge p0\.s, p1/z, z0\.s, \1 ++** ret ++*/ ++TEST_COMPARE_ZX (cmpge_wide_x0_s32, svint32_t, int64_t, ++ p0 = svcmpge_wide_n_s32 (p1, z0, x0), ++ p0 = svcmpge_wide (p1, z0, x0)) ++ ++/* ++** cmpge_wide_0_s32: ++** cmpge p0\.s, p1/z, z0\.s, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_wide_0_s32, svint32_t, ++ p0 = svcmpge_wide_n_s32 (p1, z0, 0), ++ p0 = svcmpge_wide (p1, z0, 0)) ++ ++/* ++** cmpge_wide_1_s32: ++** cmpge p0\.s, p1/z, z0\.s, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_wide_1_s32, svint32_t, ++ p0 = svcmpge_wide_n_s32 (p1, z0, 1), ++ p0 = svcmpge_wide (p1, z0, 1)) ++ ++/* ++** cmpge_wide_15_s32: ++** cmpge p0\.s, p1/z, z0\.s, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_wide_15_s32, svint32_t, ++ p0 = svcmpge_wide_n_s32 (p1, z0, 15), ++ p0 = svcmpge_wide (p1, z0, 15)) ++ ++/* ++** cmpge_wide_16_s32: ++** mov (z[0-9]+\.d), #16 ++** cmpge p0\.s, p1/z, z0\.s, \1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_wide_16_s32, svint32_t, ++ p0 = svcmpge_wide_n_s32 (p1, z0, 16), ++ p0 = svcmpge_wide (p1, z0, 16)) ++ ++/* ++** cmpge_wide_m1_s32: ++** cmpge p0\.s, p1/z, z0\.s, #-1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_wide_m1_s32, svint32_t, ++ p0 = svcmpge_wide_n_s32 (p1, z0, -1), ++ p0 = svcmpge_wide (p1, z0, -1)) ++ ++/* ++** cmpge_wide_m16_s32: ++** cmpge p0\.s, p1/z, z0\.s, #-16 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_wide_m16_s32, svint32_t, ++ p0 = svcmpge_wide_n_s32 (p1, z0, -16), ++ p0 = svcmpge_wide (p1, z0, -16)) ++ ++/* ++** cmpge_wide_m17_s32: ++** mov (z[0-9]+\.d), #-17 ++** cmpge p0\.s, p1/z, z0\.s, \1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_wide_m17_s32, svint32_t, ++ p0 = svcmpge_wide_n_s32 (p1, z0, -17), ++ p0 = svcmpge_wide (p1, z0, -17)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_wide_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_wide_s8.c +new file mode 100644 +index 000000000..b03a42488 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_wide_s8.c +@@ -0,0 +1,96 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpge_wide_s8_tied: ++** cmpge p0\.b, p0/z, z0\.b, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmpge_wide_s8_tied, svint8_t, svint64_t, ++ p0 = svcmpge_wide_s8 (p0, z0, z1), ++ p0 = svcmpge_wide (p0, z0, z1)) ++ ++/* ++** cmpge_wide_s8_untied: ++** cmpge p0\.b, p1/z, z0\.b, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmpge_wide_s8_untied, svint8_t, svint64_t, ++ p0 = svcmpge_wide_s8 (p1, z0, z1), ++ p0 = svcmpge_wide (p1, z0, z1)) ++ ++/* ++** cmpge_wide_x0_s8: ++** mov (z[0-9]+\.d), x0 ++** cmpge p0\.b, p1/z, z0\.b, \1 ++** ret ++*/ ++TEST_COMPARE_ZX (cmpge_wide_x0_s8, svint8_t, int64_t, ++ p0 = svcmpge_wide_n_s8 (p1, z0, x0), ++ p0 = svcmpge_wide (p1, z0, x0)) ++ ++/* ++** cmpge_wide_0_s8: ++** cmpge p0\.b, p1/z, z0\.b, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_wide_0_s8, svint8_t, ++ p0 = svcmpge_wide_n_s8 (p1, z0, 0), ++ p0 = svcmpge_wide (p1, z0, 0)) ++ ++/* ++** cmpge_wide_1_s8: ++** cmpge p0\.b, p1/z, z0\.b, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_wide_1_s8, svint8_t, ++ p0 = svcmpge_wide_n_s8 (p1, z0, 1), ++ p0 = svcmpge_wide (p1, z0, 1)) ++ ++/* ++** cmpge_wide_15_s8: ++** cmpge p0\.b, p1/z, z0\.b, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_wide_15_s8, svint8_t, ++ p0 = svcmpge_wide_n_s8 (p1, z0, 15), ++ p0 = svcmpge_wide (p1, z0, 15)) ++ ++/* ++** cmpge_wide_16_s8: ++** mov (z[0-9]+\.d), #16 ++** cmpge p0\.b, p1/z, z0\.b, \1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_wide_16_s8, svint8_t, ++ p0 = svcmpge_wide_n_s8 (p1, z0, 16), ++ p0 = svcmpge_wide (p1, z0, 16)) ++ ++/* ++** cmpge_wide_m1_s8: ++** cmpge p0\.b, p1/z, z0\.b, #-1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_wide_m1_s8, svint8_t, ++ p0 = svcmpge_wide_n_s8 (p1, z0, -1), ++ p0 = svcmpge_wide (p1, z0, -1)) ++ ++/* ++** cmpge_wide_m16_s8: ++** cmpge p0\.b, p1/z, z0\.b, #-16 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_wide_m16_s8, svint8_t, ++ p0 = svcmpge_wide_n_s8 (p1, z0, -16), ++ p0 = svcmpge_wide (p1, z0, -16)) ++ ++/* ++** cmpge_wide_m17_s8: ++** mov (z[0-9]+\.d), #-17 ++** cmpge p0\.b, p1/z, z0\.b, \1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_wide_m17_s8, svint8_t, ++ p0 = svcmpge_wide_n_s8 (p1, z0, -17), ++ p0 = svcmpge_wide (p1, z0, -17)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_wide_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_wide_u16.c +new file mode 100644 +index 000000000..966b1e554 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_wide_u16.c +@@ -0,0 +1,96 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpge_wide_u16_tied: ++** cmphs p0\.h, p0/z, z0\.h, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmpge_wide_u16_tied, svuint16_t, svuint64_t, ++ p0 = svcmpge_wide_u16 (p0, z0, z1), ++ p0 = svcmpge_wide (p0, z0, z1)) ++ ++/* ++** cmpge_wide_u16_untied: ++** cmphs p0\.h, p1/z, z0\.h, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmpge_wide_u16_untied, svuint16_t, svuint64_t, ++ p0 = svcmpge_wide_u16 (p1, z0, z1), ++ p0 = svcmpge_wide (p1, z0, z1)) ++ ++/* ++** cmpge_wide_x0_u16: ++** mov (z[0-9]+\.d), x0 ++** cmphs p0\.h, p1/z, z0\.h, \1 ++** ret ++*/ ++TEST_COMPARE_ZX (cmpge_wide_x0_u16, svuint16_t, uint64_t, ++ p0 = svcmpge_wide_n_u16 (p1, z0, x0), ++ p0 = svcmpge_wide (p1, z0, x0)) ++ ++/* ++** cmpge_wide_0_u16: ++** cmphs p0\.h, p1/z, z0\.h, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_wide_0_u16, svuint16_t, ++ p0 = svcmpge_wide_n_u16 (p1, z0, 0), ++ p0 = svcmpge_wide (p1, z0, 0)) ++ ++/* ++** cmpge_wide_1_u16: ++** cmphs p0\.h, p1/z, z0\.h, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_wide_1_u16, svuint16_t, ++ p0 = svcmpge_wide_n_u16 (p1, z0, 1), ++ p0 = svcmpge_wide (p1, z0, 1)) ++ ++/* ++** cmpge_wide_15_u16: ++** cmphs p0\.h, p1/z, z0\.h, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_wide_15_u16, svuint16_t, ++ p0 = svcmpge_wide_n_u16 (p1, z0, 15), ++ p0 = svcmpge_wide (p1, z0, 15)) ++ ++/* ++** cmpge_wide_16_u16: ++** cmphs p0\.h, p1/z, z0\.h, #16 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_wide_16_u16, svuint16_t, ++ p0 = svcmpge_wide_n_u16 (p1, z0, 16), ++ p0 = svcmpge_wide (p1, z0, 16)) ++ ++/* ++** cmpge_wide_127_u16: ++** cmphs p0\.h, p1/z, z0\.h, #127 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_wide_127_u16, svuint16_t, ++ p0 = svcmpge_wide_n_u16 (p1, z0, 127), ++ p0 = svcmpge_wide (p1, z0, 127)) ++ ++/* ++** cmpge_wide_128_u16: ++** mov (z[0-9]+\.d), #128 ++** cmphs p0\.h, p1/z, z0\.h, \1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_wide_128_u16, svuint16_t, ++ p0 = svcmpge_wide_n_u16 (p1, z0, 128), ++ p0 = svcmpge_wide (p1, z0, 128)) ++ ++/* ++** cmpge_wide_m1_u16: ++** mov (z[0-9]+)\.b, #-1 ++** cmphs p0\.h, p1/z, z0\.h, \1\.d ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_wide_m1_u16, svuint16_t, ++ p0 = svcmpge_wide_n_u16 (p1, z0, -1), ++ p0 = svcmpge_wide (p1, z0, -1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_wide_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_wide_u32.c +new file mode 100644 +index 000000000..fdeb53a46 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_wide_u32.c +@@ -0,0 +1,96 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpge_wide_u32_tied: ++** cmphs p0\.s, p0/z, z0\.s, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmpge_wide_u32_tied, svuint32_t, svuint64_t, ++ p0 = svcmpge_wide_u32 (p0, z0, z1), ++ p0 = svcmpge_wide (p0, z0, z1)) ++ ++/* ++** cmpge_wide_u32_untied: ++** cmphs p0\.s, p1/z, z0\.s, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmpge_wide_u32_untied, svuint32_t, svuint64_t, ++ p0 = svcmpge_wide_u32 (p1, z0, z1), ++ p0 = svcmpge_wide (p1, z0, z1)) ++ ++/* ++** cmpge_wide_x0_u32: ++** mov (z[0-9]+\.d), x0 ++** cmphs p0\.s, p1/z, z0\.s, \1 ++** ret ++*/ ++TEST_COMPARE_ZX (cmpge_wide_x0_u32, svuint32_t, uint64_t, ++ p0 = svcmpge_wide_n_u32 (p1, z0, x0), ++ p0 = svcmpge_wide (p1, z0, x0)) ++ ++/* ++** cmpge_wide_0_u32: ++** cmphs p0\.s, p1/z, z0\.s, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_wide_0_u32, svuint32_t, ++ p0 = svcmpge_wide_n_u32 (p1, z0, 0), ++ p0 = svcmpge_wide (p1, z0, 0)) ++ ++/* ++** cmpge_wide_1_u32: ++** cmphs p0\.s, p1/z, z0\.s, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_wide_1_u32, svuint32_t, ++ p0 = svcmpge_wide_n_u32 (p1, z0, 1), ++ p0 = svcmpge_wide (p1, z0, 1)) ++ ++/* ++** cmpge_wide_15_u32: ++** cmphs p0\.s, p1/z, z0\.s, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_wide_15_u32, svuint32_t, ++ p0 = svcmpge_wide_n_u32 (p1, z0, 15), ++ p0 = svcmpge_wide (p1, z0, 15)) ++ ++/* ++** cmpge_wide_16_u32: ++** cmphs p0\.s, p1/z, z0\.s, #16 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_wide_16_u32, svuint32_t, ++ p0 = svcmpge_wide_n_u32 (p1, z0, 16), ++ p0 = svcmpge_wide (p1, z0, 16)) ++ ++/* ++** cmpge_wide_127_u32: ++** cmphs p0\.s, p1/z, z0\.s, #127 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_wide_127_u32, svuint32_t, ++ p0 = svcmpge_wide_n_u32 (p1, z0, 127), ++ p0 = svcmpge_wide (p1, z0, 127)) ++ ++/* ++** cmpge_wide_128_u32: ++** mov (z[0-9]+\.d), #128 ++** cmphs p0\.s, p1/z, z0\.s, \1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_wide_128_u32, svuint32_t, ++ p0 = svcmpge_wide_n_u32 (p1, z0, 128), ++ p0 = svcmpge_wide (p1, z0, 128)) ++ ++/* ++** cmpge_wide_m1_u32: ++** mov (z[0-9]+)\.b, #-1 ++** cmphs p0\.s, p1/z, z0\.s, \1\.d ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_wide_m1_u32, svuint32_t, ++ p0 = svcmpge_wide_n_u32 (p1, z0, -1), ++ p0 = svcmpge_wide (p1, z0, -1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_wide_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_wide_u8.c +new file mode 100644 +index 000000000..565093120 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_wide_u8.c +@@ -0,0 +1,96 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpge_wide_u8_tied: ++** cmphs p0\.b, p0/z, z0\.b, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmpge_wide_u8_tied, svuint8_t, svuint64_t, ++ p0 = svcmpge_wide_u8 (p0, z0, z1), ++ p0 = svcmpge_wide (p0, z0, z1)) ++ ++/* ++** cmpge_wide_u8_untied: ++** cmphs p0\.b, p1/z, z0\.b, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmpge_wide_u8_untied, svuint8_t, svuint64_t, ++ p0 = svcmpge_wide_u8 (p1, z0, z1), ++ p0 = svcmpge_wide (p1, z0, z1)) ++ ++/* ++** cmpge_wide_x0_u8: ++** mov (z[0-9]+\.d), x0 ++** cmphs p0\.b, p1/z, z0\.b, \1 ++** ret ++*/ ++TEST_COMPARE_ZX (cmpge_wide_x0_u8, svuint8_t, uint64_t, ++ p0 = svcmpge_wide_n_u8 (p1, z0, x0), ++ p0 = svcmpge_wide (p1, z0, x0)) ++ ++/* ++** cmpge_wide_0_u8: ++** cmphs p0\.b, p1/z, z0\.b, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_wide_0_u8, svuint8_t, ++ p0 = svcmpge_wide_n_u8 (p1, z0, 0), ++ p0 = svcmpge_wide (p1, z0, 0)) ++ ++/* ++** cmpge_wide_1_u8: ++** cmphs p0\.b, p1/z, z0\.b, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_wide_1_u8, svuint8_t, ++ p0 = svcmpge_wide_n_u8 (p1, z0, 1), ++ p0 = svcmpge_wide (p1, z0, 1)) ++ ++/* ++** cmpge_wide_15_u8: ++** cmphs p0\.b, p1/z, z0\.b, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_wide_15_u8, svuint8_t, ++ p0 = svcmpge_wide_n_u8 (p1, z0, 15), ++ p0 = svcmpge_wide (p1, z0, 15)) ++ ++/* ++** cmpge_wide_16_u8: ++** cmphs p0\.b, p1/z, z0\.b, #16 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_wide_16_u8, svuint8_t, ++ p0 = svcmpge_wide_n_u8 (p1, z0, 16), ++ p0 = svcmpge_wide (p1, z0, 16)) ++ ++/* ++** cmpge_wide_127_u8: ++** cmphs p0\.b, p1/z, z0\.b, #127 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_wide_127_u8, svuint8_t, ++ p0 = svcmpge_wide_n_u8 (p1, z0, 127), ++ p0 = svcmpge_wide (p1, z0, 127)) ++ ++/* ++** cmpge_wide_128_u8: ++** mov (z[0-9]+\.d), #128 ++** cmphs p0\.b, p1/z, z0\.b, \1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_wide_128_u8, svuint8_t, ++ p0 = svcmpge_wide_n_u8 (p1, z0, 128), ++ p0 = svcmpge_wide (p1, z0, 128)) ++ ++/* ++** cmpge_wide_m1_u8: ++** mov (z[0-9]+)\.b, #-1 ++** cmphs p0\.b, p1/z, z0\.b, \1\.d ++** ret ++*/ ++TEST_COMPARE_Z (cmpge_wide_m1_u8, svuint8_t, ++ p0 = svcmpge_wide_n_u8 (p1, z0, -1), ++ p0 = svcmpge_wide (p1, z0, -1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_f16.c +new file mode 100644 +index 000000000..69b015794 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_f16.c +@@ -0,0 +1,66 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpgt_f16_tied: ++** ( ++** fcmgt p0\.h, p0/z, z0\.h, z1\.h ++** | ++** fcmlt p0\.h, p0/z, z1\.h, z0\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_f16_tied, svfloat16_t, ++ p0 = svcmpgt_f16 (p0, z0, z1), ++ p0 = svcmpgt (p0, z0, z1)) ++ ++/* ++** cmpgt_f16_untied: ++** ( ++** fcmgt p0\.h, p1/z, z0\.h, z1\.h ++** | ++** fcmlt p0\.h, p1/z, z1\.h, z0\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_f16_untied, svfloat16_t, ++ p0 = svcmpgt_f16 (p1, z0, z1), ++ p0 = svcmpgt (p1, z0, z1)) ++ ++/* ++** cmpgt_h4_f16: ++** mov (z[0-9]+\.h), h4 ++** ( ++** fcmgt p0\.h, p1/z, z0\.h, \1 ++** | ++** fcmlt p0\.h, p1/z, \1, z0\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_ZD (cmpgt_h4_f16, svfloat16_t, float16_t, ++ p0 = svcmpgt_n_f16 (p1, z0, d4), ++ p0 = svcmpgt (p1, z0, d4)) ++ ++/* ++** cmpgt_0_f16: ++** fcmgt p0\.h, p1/z, z0\.h, #0\.0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_0_f16, svfloat16_t, ++ p0 = svcmpgt_n_f16 (p1, z0, 0), ++ p0 = svcmpgt (p1, z0, 0)) ++ ++/* ++** cmpgt_1_f16: ++** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? ++** ( ++** fcmgt p0\.h, p1/z, z0\.h, \1 ++** | ++** fcmlt p0\.h, p1/z, \1, z0\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_1_f16, svfloat16_t, ++ p0 = svcmpgt_n_f16 (p1, z0, 1), ++ p0 = svcmpgt (p1, z0, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_f32.c +new file mode 100644 +index 000000000..7d66b67c3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_f32.c +@@ -0,0 +1,66 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpgt_f32_tied: ++** ( ++** fcmgt p0\.s, p0/z, z0\.s, z1\.s ++** | ++** fcmlt p0\.s, p0/z, z1\.s, z0\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_f32_tied, svfloat32_t, ++ p0 = svcmpgt_f32 (p0, z0, z1), ++ p0 = svcmpgt (p0, z0, z1)) ++ ++/* ++** cmpgt_f32_untied: ++** ( ++** fcmgt p0\.s, p1/z, z0\.s, z1\.s ++** | ++** fcmlt p0\.s, p1/z, z1\.s, z0\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_f32_untied, svfloat32_t, ++ p0 = svcmpgt_f32 (p1, z0, z1), ++ p0 = svcmpgt (p1, z0, z1)) ++ ++/* ++** cmpgt_s4_f32: ++** mov (z[0-9]+\.s), s4 ++** ( ++** fcmgt p0\.s, p1/z, z0\.s, \1 ++** | ++** fcmlt p0\.s, p1/z, \1, z0\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_ZD (cmpgt_s4_f32, svfloat32_t, float32_t, ++ p0 = svcmpgt_n_f32 (p1, z0, d4), ++ p0 = svcmpgt (p1, z0, d4)) ++ ++/* ++** cmpgt_0_f32: ++** fcmgt p0\.s, p1/z, z0\.s, #0\.0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_0_f32, svfloat32_t, ++ p0 = svcmpgt_n_f32 (p1, z0, 0), ++ p0 = svcmpgt (p1, z0, 0)) ++ ++/* ++** cmpgt_1_f32: ++** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? ++** ( ++** fcmgt p0\.s, p1/z, z0\.s, \1 ++** | ++** fcmlt p0\.s, p1/z, \1, z0\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_1_f32, svfloat32_t, ++ p0 = svcmpgt_n_f32 (p1, z0, 1), ++ p0 = svcmpgt (p1, z0, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_f64.c +new file mode 100644 +index 000000000..f3a155476 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_f64.c +@@ -0,0 +1,66 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpgt_f64_tied: ++** ( ++** fcmgt p0\.d, p0/z, z0\.d, z1\.d ++** | ++** fcmlt p0\.d, p0/z, z1\.d, z0\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_f64_tied, svfloat64_t, ++ p0 = svcmpgt_f64 (p0, z0, z1), ++ p0 = svcmpgt (p0, z0, z1)) ++ ++/* ++** cmpgt_f64_untied: ++** ( ++** fcmgt p0\.d, p1/z, z0\.d, z1\.d ++** | ++** fcmlt p0\.d, p1/z, z1\.d, z0\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_f64_untied, svfloat64_t, ++ p0 = svcmpgt_f64 (p1, z0, z1), ++ p0 = svcmpgt (p1, z0, z1)) ++ ++/* ++** cmpgt_d4_f64: ++** mov (z[0-9]+\.d), d4 ++** ( ++** fcmgt p0\.d, p1/z, z0\.d, \1 ++** | ++** fcmlt p0\.d, p1/z, \1, z0\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_ZD (cmpgt_d4_f64, svfloat64_t, float64_t, ++ p0 = svcmpgt_n_f64 (p1, z0, d4), ++ p0 = svcmpgt (p1, z0, d4)) ++ ++/* ++** cmpgt_0_f64: ++** fcmgt p0\.d, p1/z, z0\.d, #0\.0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_0_f64, svfloat64_t, ++ p0 = svcmpgt_n_f64 (p1, z0, 0), ++ p0 = svcmpgt (p1, z0, 0)) ++ ++/* ++** cmpgt_1_f64: ++** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? ++** ( ++** fcmgt p0\.d, p1/z, z0\.d, \1 ++** | ++** fcmlt p0\.d, p1/z, \1, z0\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_1_f64, svfloat64_t, ++ p0 = svcmpgt_n_f64 (p1, z0, 1), ++ p0 = svcmpgt (p1, z0, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_s16.c +new file mode 100644 +index 000000000..cc86c0c00 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_s16.c +@@ -0,0 +1,116 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpgt_s16_tied: ++** ( ++** cmpgt p0\.h, p0/z, z0\.h, z1\.h ++** | ++** cmplt p0\.h, p0/z, z1\.h, z0\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_s16_tied, svint16_t, ++ p0 = svcmpgt_s16 (p0, z0, z1), ++ p0 = svcmpgt (p0, z0, z1)) ++ ++/* ++** cmpgt_s16_untied: ++** ( ++** cmpgt p0\.h, p1/z, z0\.h, z1\.h ++** | ++** cmplt p0\.h, p1/z, z1\.h, z0\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_s16_untied, svint16_t, ++ p0 = svcmpgt_s16 (p1, z0, z1), ++ p0 = svcmpgt (p1, z0, z1)) ++ ++/* ++** cmpgt_w0_s16: ++** mov (z[0-9]+\.h), w0 ++** ( ++** cmpgt p0\.h, p1/z, z0\.h, \1 ++** | ++** cmplt p0\.h, p1/z, \1, z0\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_ZX (cmpgt_w0_s16, svint16_t, int16_t, ++ p0 = svcmpgt_n_s16 (p1, z0, x0), ++ p0 = svcmpgt (p1, z0, x0)) ++ ++/* ++** cmpgt_0_s16: ++** cmpgt p0\.h, p1/z, z0\.h, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_0_s16, svint16_t, ++ p0 = svcmpgt_n_s16 (p1, z0, 0), ++ p0 = svcmpgt (p1, z0, 0)) ++ ++/* ++** cmpgt_1_s16: ++** cmpgt p0\.h, p1/z, z0\.h, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_1_s16, svint16_t, ++ p0 = svcmpgt_n_s16 (p1, z0, 1), ++ p0 = svcmpgt (p1, z0, 1)) ++ ++/* ++** cmpgt_15_s16: ++** cmpgt p0\.h, p1/z, z0\.h, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_15_s16, svint16_t, ++ p0 = svcmpgt_n_s16 (p1, z0, 15), ++ p0 = svcmpgt (p1, z0, 15)) ++ ++/* ++** cmpgt_16_s16: ++** mov (z[0-9]+\.h), #16 ++** ( ++** cmpgt p0\.h, p1/z, z0\.h, \1 ++** | ++** cmplt p0\.h, p1/z, \1, z0\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_16_s16, svint16_t, ++ p0 = svcmpgt_n_s16 (p1, z0, 16), ++ p0 = svcmpgt (p1, z0, 16)) ++ ++/* ++** cmpgt_m1_s16: ++** cmpgt p0\.h, p1/z, z0\.h, #-1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_m1_s16, svint16_t, ++ p0 = svcmpgt_n_s16 (p1, z0, -1), ++ p0 = svcmpgt (p1, z0, -1)) ++ ++/* ++** cmpgt_m16_s16: ++** cmpgt p0\.h, p1/z, z0\.h, #-16 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_m16_s16, svint16_t, ++ p0 = svcmpgt_n_s16 (p1, z0, -16), ++ p0 = svcmpgt (p1, z0, -16)) ++ ++/* ++** cmpgt_m17_s16: ++** mov (z[0-9]+\.h), #-17 ++** ( ++** cmpgt p0\.h, p1/z, z0\.h, \1 ++** | ++** cmplt p0\.h, p1/z, \1, z0\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_m17_s16, svint16_t, ++ p0 = svcmpgt_n_s16 (p1, z0, -17), ++ p0 = svcmpgt (p1, z0, -17)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_s32.c +new file mode 100644 +index 000000000..75f0cc737 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_s32.c +@@ -0,0 +1,116 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpgt_s32_tied: ++** ( ++** cmpgt p0\.s, p0/z, z0\.s, z1\.s ++** | ++** cmplt p0\.s, p0/z, z1\.s, z0\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_s32_tied, svint32_t, ++ p0 = svcmpgt_s32 (p0, z0, z1), ++ p0 = svcmpgt (p0, z0, z1)) ++ ++/* ++** cmpgt_s32_untied: ++** ( ++** cmpgt p0\.s, p1/z, z0\.s, z1\.s ++** | ++** cmplt p0\.s, p1/z, z1\.s, z0\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_s32_untied, svint32_t, ++ p0 = svcmpgt_s32 (p1, z0, z1), ++ p0 = svcmpgt (p1, z0, z1)) ++ ++/* ++** cmpgt_w0_s32: ++** mov (z[0-9]+\.s), w0 ++** ( ++** cmpgt p0\.s, p1/z, z0\.s, \1 ++** | ++** cmplt p0\.s, p1/z, \1, z0\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_ZX (cmpgt_w0_s32, svint32_t, int32_t, ++ p0 = svcmpgt_n_s32 (p1, z0, x0), ++ p0 = svcmpgt (p1, z0, x0)) ++ ++/* ++** cmpgt_0_s32: ++** cmpgt p0\.s, p1/z, z0\.s, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_0_s32, svint32_t, ++ p0 = svcmpgt_n_s32 (p1, z0, 0), ++ p0 = svcmpgt (p1, z0, 0)) ++ ++/* ++** cmpgt_1_s32: ++** cmpgt p0\.s, p1/z, z0\.s, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_1_s32, svint32_t, ++ p0 = svcmpgt_n_s32 (p1, z0, 1), ++ p0 = svcmpgt (p1, z0, 1)) ++ ++/* ++** cmpgt_15_s32: ++** cmpgt p0\.s, p1/z, z0\.s, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_15_s32, svint32_t, ++ p0 = svcmpgt_n_s32 (p1, z0, 15), ++ p0 = svcmpgt (p1, z0, 15)) ++ ++/* ++** cmpgt_16_s32: ++** mov (z[0-9]+\.s), #16 ++** ( ++** cmpgt p0\.s, p1/z, z0\.s, \1 ++** | ++** cmplt p0\.s, p1/z, \1, z0\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_16_s32, svint32_t, ++ p0 = svcmpgt_n_s32 (p1, z0, 16), ++ p0 = svcmpgt (p1, z0, 16)) ++ ++/* ++** cmpgt_m1_s32: ++** cmpgt p0\.s, p1/z, z0\.s, #-1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_m1_s32, svint32_t, ++ p0 = svcmpgt_n_s32 (p1, z0, -1), ++ p0 = svcmpgt (p1, z0, -1)) ++ ++/* ++** cmpgt_m16_s32: ++** cmpgt p0\.s, p1/z, z0\.s, #-16 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_m16_s32, svint32_t, ++ p0 = svcmpgt_n_s32 (p1, z0, -16), ++ p0 = svcmpgt (p1, z0, -16)) ++ ++/* ++** cmpgt_m17_s32: ++** mov (z[0-9]+\.s), #-17 ++** ( ++** cmpgt p0\.s, p1/z, z0\.s, \1 ++** | ++** cmplt p0\.s, p1/z, \1, z0\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_m17_s32, svint32_t, ++ p0 = svcmpgt_n_s32 (p1, z0, -17), ++ p0 = svcmpgt (p1, z0, -17)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_s64.c +new file mode 100644 +index 000000000..dbfd55e6f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_s64.c +@@ -0,0 +1,116 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpgt_s64_tied: ++** ( ++** cmpgt p0\.d, p0/z, z0\.d, z1\.d ++** | ++** cmplt p0\.d, p0/z, z1\.d, z0\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_s64_tied, svint64_t, ++ p0 = svcmpgt_s64 (p0, z0, z1), ++ p0 = svcmpgt (p0, z0, z1)) ++ ++/* ++** cmpgt_s64_untied: ++** ( ++** cmpgt p0\.d, p1/z, z0\.d, z1\.d ++** | ++** cmplt p0\.d, p1/z, z1\.d, z0\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_s64_untied, svint64_t, ++ p0 = svcmpgt_s64 (p1, z0, z1), ++ p0 = svcmpgt (p1, z0, z1)) ++ ++/* ++** cmpgt_x0_s64: ++** mov (z[0-9]+\.d), x0 ++** ( ++** cmpgt p0\.d, p1/z, z0\.d, \1 ++** | ++** cmplt p0\.d, p1/z, \1, z0\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_ZX (cmpgt_x0_s64, svint64_t, int64_t, ++ p0 = svcmpgt_n_s64 (p1, z0, x0), ++ p0 = svcmpgt (p1, z0, x0)) ++ ++/* ++** cmpgt_0_s64: ++** cmpgt p0\.d, p1/z, z0\.d, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_0_s64, svint64_t, ++ p0 = svcmpgt_n_s64 (p1, z0, 0), ++ p0 = svcmpgt (p1, z0, 0)) ++ ++/* ++** cmpgt_1_s64: ++** cmpgt p0\.d, p1/z, z0\.d, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_1_s64, svint64_t, ++ p0 = svcmpgt_n_s64 (p1, z0, 1), ++ p0 = svcmpgt (p1, z0, 1)) ++ ++/* ++** cmpgt_15_s64: ++** cmpgt p0\.d, p1/z, z0\.d, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_15_s64, svint64_t, ++ p0 = svcmpgt_n_s64 (p1, z0, 15), ++ p0 = svcmpgt (p1, z0, 15)) ++ ++/* ++** cmpgt_16_s64: ++** mov (z[0-9]+\.d), #16 ++** ( ++** cmpgt p0\.d, p1/z, z0\.d, \1 ++** | ++** cmplt p0\.d, p1/z, \1, z0\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_16_s64, svint64_t, ++ p0 = svcmpgt_n_s64 (p1, z0, 16), ++ p0 = svcmpgt (p1, z0, 16)) ++ ++/* ++** cmpgt_m1_s64: ++** cmpgt p0\.d, p1/z, z0\.d, #-1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_m1_s64, svint64_t, ++ p0 = svcmpgt_n_s64 (p1, z0, -1), ++ p0 = svcmpgt (p1, z0, -1)) ++ ++/* ++** cmpgt_m16_s64: ++** cmpgt p0\.d, p1/z, z0\.d, #-16 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_m16_s64, svint64_t, ++ p0 = svcmpgt_n_s64 (p1, z0, -16), ++ p0 = svcmpgt (p1, z0, -16)) ++ ++/* ++** cmpgt_m17_s64: ++** mov (z[0-9]+\.d), #-17 ++** ( ++** cmpgt p0\.d, p1/z, z0\.d, \1 ++** | ++** cmplt p0\.d, p1/z, \1, z0\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_m17_s64, svint64_t, ++ p0 = svcmpgt_n_s64 (p1, z0, -17), ++ p0 = svcmpgt (p1, z0, -17)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_s8.c +new file mode 100644 +index 000000000..710c2e602 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_s8.c +@@ -0,0 +1,116 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpgt_s8_tied: ++** ( ++** cmpgt p0\.b, p0/z, z0\.b, z1\.b ++** | ++** cmplt p0\.b, p0/z, z1\.b, z0\.b ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_s8_tied, svint8_t, ++ p0 = svcmpgt_s8 (p0, z0, z1), ++ p0 = svcmpgt (p0, z0, z1)) ++ ++/* ++** cmpgt_s8_untied: ++** ( ++** cmpgt p0\.b, p1/z, z0\.b, z1\.b ++** | ++** cmplt p0\.b, p1/z, z1\.b, z0\.b ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_s8_untied, svint8_t, ++ p0 = svcmpgt_s8 (p1, z0, z1), ++ p0 = svcmpgt (p1, z0, z1)) ++ ++/* ++** cmpgt_w0_s8: ++** mov (z[0-9]+\.b), w0 ++** ( ++** cmpgt p0\.b, p1/z, z0\.b, \1 ++** | ++** cmplt p0\.b, p1/z, \1, z0\.b ++** ) ++** ret ++*/ ++TEST_COMPARE_ZX (cmpgt_w0_s8, svint8_t, int8_t, ++ p0 = svcmpgt_n_s8 (p1, z0, x0), ++ p0 = svcmpgt (p1, z0, x0)) ++ ++/* ++** cmpgt_0_s8: ++** cmpgt p0\.b, p1/z, z0\.b, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_0_s8, svint8_t, ++ p0 = svcmpgt_n_s8 (p1, z0, 0), ++ p0 = svcmpgt (p1, z0, 0)) ++ ++/* ++** cmpgt_1_s8: ++** cmpgt p0\.b, p1/z, z0\.b, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_1_s8, svint8_t, ++ p0 = svcmpgt_n_s8 (p1, z0, 1), ++ p0 = svcmpgt (p1, z0, 1)) ++ ++/* ++** cmpgt_15_s8: ++** cmpgt p0\.b, p1/z, z0\.b, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_15_s8, svint8_t, ++ p0 = svcmpgt_n_s8 (p1, z0, 15), ++ p0 = svcmpgt (p1, z0, 15)) ++ ++/* ++** cmpgt_16_s8: ++** mov (z[0-9]+\.b), #16 ++** ( ++** cmpgt p0\.b, p1/z, z0\.b, \1 ++** | ++** cmplt p0\.b, p1/z, \1, z0\.b ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_16_s8, svint8_t, ++ p0 = svcmpgt_n_s8 (p1, z0, 16), ++ p0 = svcmpgt (p1, z0, 16)) ++ ++/* ++** cmpgt_m1_s8: ++** cmpgt p0\.b, p1/z, z0\.b, #-1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_m1_s8, svint8_t, ++ p0 = svcmpgt_n_s8 (p1, z0, -1), ++ p0 = svcmpgt (p1, z0, -1)) ++ ++/* ++** cmpgt_m16_s8: ++** cmpgt p0\.b, p1/z, z0\.b, #-16 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_m16_s8, svint8_t, ++ p0 = svcmpgt_n_s8 (p1, z0, -16), ++ p0 = svcmpgt (p1, z0, -16)) ++ ++/* ++** cmpgt_m17_s8: ++** mov (z[0-9]+\.b), #-17 ++** ( ++** cmpgt p0\.b, p1/z, z0\.b, \1 ++** | ++** cmplt p0\.b, p1/z, \1, z0\.b ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_m17_s8, svint8_t, ++ p0 = svcmpgt_n_s8 (p1, z0, -17), ++ p0 = svcmpgt (p1, z0, -17)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_u16.c +new file mode 100644 +index 000000000..48e99c72c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_u16.c +@@ -0,0 +1,116 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpgt_u16_tied: ++** ( ++** cmphi p0\.h, p0/z, z0\.h, z1\.h ++** | ++** cmplo p0\.h, p0/z, z1\.h, z0\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_u16_tied, svuint16_t, ++ p0 = svcmpgt_u16 (p0, z0, z1), ++ p0 = svcmpgt (p0, z0, z1)) ++ ++/* ++** cmpgt_u16_untied: ++** ( ++** cmphi p0\.h, p1/z, z0\.h, z1\.h ++** | ++** cmplo p0\.h, p1/z, z1\.h, z0\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_u16_untied, svuint16_t, ++ p0 = svcmpgt_u16 (p1, z0, z1), ++ p0 = svcmpgt (p1, z0, z1)) ++ ++/* ++** cmpgt_w0_u16: ++** mov (z[0-9]+\.h), w0 ++** ( ++** cmphi p0\.h, p1/z, z0\.h, \1 ++** | ++** cmplo p0\.h, p1/z, \1, z0\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_ZX (cmpgt_w0_u16, svuint16_t, uint16_t, ++ p0 = svcmpgt_n_u16 (p1, z0, x0), ++ p0 = svcmpgt (p1, z0, x0)) ++ ++/* ++** cmpgt_0_u16: ++** cmphi p0\.h, p1/z, z0\.h, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_0_u16, svuint16_t, ++ p0 = svcmpgt_n_u16 (p1, z0, 0), ++ p0 = svcmpgt (p1, z0, 0)) ++ ++/* ++** cmpgt_1_u16: ++** cmphi p0\.h, p1/z, z0\.h, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_1_u16, svuint16_t, ++ p0 = svcmpgt_n_u16 (p1, z0, 1), ++ p0 = svcmpgt (p1, z0, 1)) ++ ++/* ++** cmpgt_15_u16: ++** cmphi p0\.h, p1/z, z0\.h, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_15_u16, svuint16_t, ++ p0 = svcmpgt_n_u16 (p1, z0, 15), ++ p0 = svcmpgt (p1, z0, 15)) ++ ++/* ++** cmpgt_16_u16: ++** cmphi p0\.h, p1/z, z0\.h, #16 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_16_u16, svuint16_t, ++ p0 = svcmpgt_n_u16 (p1, z0, 16), ++ p0 = svcmpgt (p1, z0, 16)) ++ ++/* ++** cmpgt_127_u16: ++** cmphi p0\.h, p1/z, z0\.h, #127 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_127_u16, svuint16_t, ++ p0 = svcmpgt_n_u16 (p1, z0, 127), ++ p0 = svcmpgt (p1, z0, 127)) ++ ++/* ++** cmpgt_128_u16: ++** mov (z[0-9]+\.h), #128 ++** ( ++** cmphi p0\.h, p1/z, z0\.h, \1 ++** | ++** cmplo p0\.h, p1/z, \1, z0\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_128_u16, svuint16_t, ++ p0 = svcmpgt_n_u16 (p1, z0, 128), ++ p0 = svcmpgt (p1, z0, 128)) ++ ++/* ++** cmpgt_m1_u16: ++** mov (z[0-9]+)\.b, #-1 ++** ( ++** cmphi p0\.h, p1/z, z0\.h, \1\.h ++** | ++** cmplo p0\.h, p1/z, \1\.h, z0\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_m1_u16, svuint16_t, ++ p0 = svcmpgt_n_u16 (p1, z0, -1), ++ p0 = svcmpgt (p1, z0, -1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_u32.c +new file mode 100644 +index 000000000..408037d72 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_u32.c +@@ -0,0 +1,116 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpgt_u32_tied: ++** ( ++** cmphi p0\.s, p0/z, z0\.s, z1\.s ++** | ++** cmplo p0\.s, p0/z, z1\.s, z0\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_u32_tied, svuint32_t, ++ p0 = svcmpgt_u32 (p0, z0, z1), ++ p0 = svcmpgt (p0, z0, z1)) ++ ++/* ++** cmpgt_u32_untied: ++** ( ++** cmphi p0\.s, p1/z, z0\.s, z1\.s ++** | ++** cmplo p0\.s, p1/z, z1\.s, z0\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_u32_untied, svuint32_t, ++ p0 = svcmpgt_u32 (p1, z0, z1), ++ p0 = svcmpgt (p1, z0, z1)) ++ ++/* ++** cmpgt_w0_u32: ++** mov (z[0-9]+\.s), w0 ++** ( ++** cmphi p0\.s, p1/z, z0\.s, \1 ++** | ++** cmplo p0\.s, p1/z, \1, z0\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_ZX (cmpgt_w0_u32, svuint32_t, uint32_t, ++ p0 = svcmpgt_n_u32 (p1, z0, x0), ++ p0 = svcmpgt (p1, z0, x0)) ++ ++/* ++** cmpgt_0_u32: ++** cmphi p0\.s, p1/z, z0\.s, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_0_u32, svuint32_t, ++ p0 = svcmpgt_n_u32 (p1, z0, 0), ++ p0 = svcmpgt (p1, z0, 0)) ++ ++/* ++** cmpgt_1_u32: ++** cmphi p0\.s, p1/z, z0\.s, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_1_u32, svuint32_t, ++ p0 = svcmpgt_n_u32 (p1, z0, 1), ++ p0 = svcmpgt (p1, z0, 1)) ++ ++/* ++** cmpgt_15_u32: ++** cmphi p0\.s, p1/z, z0\.s, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_15_u32, svuint32_t, ++ p0 = svcmpgt_n_u32 (p1, z0, 15), ++ p0 = svcmpgt (p1, z0, 15)) ++ ++/* ++** cmpgt_16_u32: ++** cmphi p0\.s, p1/z, z0\.s, #16 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_16_u32, svuint32_t, ++ p0 = svcmpgt_n_u32 (p1, z0, 16), ++ p0 = svcmpgt (p1, z0, 16)) ++ ++/* ++** cmpgt_127_u32: ++** cmphi p0\.s, p1/z, z0\.s, #127 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_127_u32, svuint32_t, ++ p0 = svcmpgt_n_u32 (p1, z0, 127), ++ p0 = svcmpgt (p1, z0, 127)) ++ ++/* ++** cmpgt_128_u32: ++** mov (z[0-9]+\.s), #128 ++** ( ++** cmphi p0\.s, p1/z, z0\.s, \1 ++** | ++** cmplo p0\.s, p1/z, \1, z0\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_128_u32, svuint32_t, ++ p0 = svcmpgt_n_u32 (p1, z0, 128), ++ p0 = svcmpgt (p1, z0, 128)) ++ ++/* ++** cmpgt_m1_u32: ++** mov (z[0-9]+)\.b, #-1 ++** ( ++** cmphi p0\.s, p1/z, z0\.s, \1\.s ++** | ++** cmplo p0\.s, p1/z, \1\.s, z0\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_m1_u32, svuint32_t, ++ p0 = svcmpgt_n_u32 (p1, z0, -1), ++ p0 = svcmpgt (p1, z0, -1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_u64.c +new file mode 100644 +index 000000000..f76a23e49 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_u64.c +@@ -0,0 +1,116 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpgt_u64_tied: ++** ( ++** cmphi p0\.d, p0/z, z0\.d, z1\.d ++** | ++** cmplo p0\.d, p0/z, z1\.d, z0\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_u64_tied, svuint64_t, ++ p0 = svcmpgt_u64 (p0, z0, z1), ++ p0 = svcmpgt (p0, z0, z1)) ++ ++/* ++** cmpgt_u64_untied: ++** ( ++** cmphi p0\.d, p1/z, z0\.d, z1\.d ++** | ++** cmplo p0\.d, p1/z, z1\.d, z0\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_u64_untied, svuint64_t, ++ p0 = svcmpgt_u64 (p1, z0, z1), ++ p0 = svcmpgt (p1, z0, z1)) ++ ++/* ++** cmpgt_x0_u64: ++** mov (z[0-9]+\.d), x0 ++** ( ++** cmphi p0\.d, p1/z, z0\.d, \1 ++** | ++** cmplo p0\.d, p1/z, \1, z0\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_ZX (cmpgt_x0_u64, svuint64_t, uint64_t, ++ p0 = svcmpgt_n_u64 (p1, z0, x0), ++ p0 = svcmpgt (p1, z0, x0)) ++ ++/* ++** cmpgt_0_u64: ++** cmphi p0\.d, p1/z, z0\.d, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_0_u64, svuint64_t, ++ p0 = svcmpgt_n_u64 (p1, z0, 0), ++ p0 = svcmpgt (p1, z0, 0)) ++ ++/* ++** cmpgt_1_u64: ++** cmphi p0\.d, p1/z, z0\.d, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_1_u64, svuint64_t, ++ p0 = svcmpgt_n_u64 (p1, z0, 1), ++ p0 = svcmpgt (p1, z0, 1)) ++ ++/* ++** cmpgt_15_u64: ++** cmphi p0\.d, p1/z, z0\.d, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_15_u64, svuint64_t, ++ p0 = svcmpgt_n_u64 (p1, z0, 15), ++ p0 = svcmpgt (p1, z0, 15)) ++ ++/* ++** cmpgt_16_u64: ++** cmphi p0\.d, p1/z, z0\.d, #16 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_16_u64, svuint64_t, ++ p0 = svcmpgt_n_u64 (p1, z0, 16), ++ p0 = svcmpgt (p1, z0, 16)) ++ ++/* ++** cmpgt_127_u64: ++** cmphi p0\.d, p1/z, z0\.d, #127 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_127_u64, svuint64_t, ++ p0 = svcmpgt_n_u64 (p1, z0, 127), ++ p0 = svcmpgt (p1, z0, 127)) ++ ++/* ++** cmpgt_128_u64: ++** mov (z[0-9]+\.d), #128 ++** ( ++** cmphi p0\.d, p1/z, z0\.d, \1 ++** | ++** cmplo p0\.d, p1/z, \1, z0\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_128_u64, svuint64_t, ++ p0 = svcmpgt_n_u64 (p1, z0, 128), ++ p0 = svcmpgt (p1, z0, 128)) ++ ++/* ++** cmpgt_m1_u64: ++** mov (z[0-9]+)\.b, #-1 ++** ( ++** cmphi p0\.d, p1/z, z0\.d, \1\.d ++** | ++** cmplo p0\.d, p1/z, \1\.d, z0\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_m1_u64, svuint64_t, ++ p0 = svcmpgt_n_u64 (p1, z0, -1), ++ p0 = svcmpgt (p1, z0, -1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_u8.c +new file mode 100644 +index 000000000..4f28331f9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_u8.c +@@ -0,0 +1,116 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpgt_u8_tied: ++** ( ++** cmphi p0\.b, p0/z, z0\.b, z1\.b ++** | ++** cmplo p0\.b, p0/z, z1\.b, z0\.b ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_u8_tied, svuint8_t, ++ p0 = svcmpgt_u8 (p0, z0, z1), ++ p0 = svcmpgt (p0, z0, z1)) ++ ++/* ++** cmpgt_u8_untied: ++** ( ++** cmphi p0\.b, p1/z, z0\.b, z1\.b ++** | ++** cmplo p0\.b, p1/z, z1\.b, z0\.b ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_u8_untied, svuint8_t, ++ p0 = svcmpgt_u8 (p1, z0, z1), ++ p0 = svcmpgt (p1, z0, z1)) ++ ++/* ++** cmpgt_w0_u8: ++** mov (z[0-9]+\.b), w0 ++** ( ++** cmphi p0\.b, p1/z, z0\.b, \1 ++** | ++** cmplo p0\.b, p1/z, \1, z0\.b ++** ) ++** ret ++*/ ++TEST_COMPARE_ZX (cmpgt_w0_u8, svuint8_t, uint8_t, ++ p0 = svcmpgt_n_u8 (p1, z0, x0), ++ p0 = svcmpgt (p1, z0, x0)) ++ ++/* ++** cmpgt_0_u8: ++** cmphi p0\.b, p1/z, z0\.b, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_0_u8, svuint8_t, ++ p0 = svcmpgt_n_u8 (p1, z0, 0), ++ p0 = svcmpgt (p1, z0, 0)) ++ ++/* ++** cmpgt_1_u8: ++** cmphi p0\.b, p1/z, z0\.b, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_1_u8, svuint8_t, ++ p0 = svcmpgt_n_u8 (p1, z0, 1), ++ p0 = svcmpgt (p1, z0, 1)) ++ ++/* ++** cmpgt_15_u8: ++** cmphi p0\.b, p1/z, z0\.b, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_15_u8, svuint8_t, ++ p0 = svcmpgt_n_u8 (p1, z0, 15), ++ p0 = svcmpgt (p1, z0, 15)) ++ ++/* ++** cmpgt_16_u8: ++** cmphi p0\.b, p1/z, z0\.b, #16 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_16_u8, svuint8_t, ++ p0 = svcmpgt_n_u8 (p1, z0, 16), ++ p0 = svcmpgt (p1, z0, 16)) ++ ++/* ++** cmpgt_127_u8: ++** cmphi p0\.b, p1/z, z0\.b, #127 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_127_u8, svuint8_t, ++ p0 = svcmpgt_n_u8 (p1, z0, 127), ++ p0 = svcmpgt (p1, z0, 127)) ++ ++/* ++** cmpgt_128_u8: ++** mov (z[0-9]+\.b), #-128 ++** ( ++** cmphi p0\.b, p1/z, z0\.b, \1 ++** | ++** cmplo p0\.b, p1/z, \1, z0\.b ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_128_u8, svuint8_t, ++ p0 = svcmpgt_n_u8 (p1, z0, 128), ++ p0 = svcmpgt (p1, z0, 128)) ++ ++/* ++** cmpgt_m1_u8: ++** mov (z[0-9]+\.b), #-1 ++** ( ++** cmphi p0\.b, p1/z, z0\.b, \1 ++** | ++** cmplo p0\.b, p1/z, \1, z0\.b ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_m1_u8, svuint8_t, ++ p0 = svcmpgt_n_u8 (p1, z0, -1), ++ p0 = svcmpgt (p1, z0, -1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_wide_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_wide_s16.c +new file mode 100644 +index 000000000..07d3bbbd9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_wide_s16.c +@@ -0,0 +1,96 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpgt_wide_s16_tied: ++** cmpgt p0\.h, p0/z, z0\.h, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmpgt_wide_s16_tied, svint16_t, svint64_t, ++ p0 = svcmpgt_wide_s16 (p0, z0, z1), ++ p0 = svcmpgt_wide (p0, z0, z1)) ++ ++/* ++** cmpgt_wide_s16_untied: ++** cmpgt p0\.h, p1/z, z0\.h, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmpgt_wide_s16_untied, svint16_t, svint64_t, ++ p0 = svcmpgt_wide_s16 (p1, z0, z1), ++ p0 = svcmpgt_wide (p1, z0, z1)) ++ ++/* ++** cmpgt_wide_x0_s16: ++** mov (z[0-9]+\.d), x0 ++** cmpgt p0\.h, p1/z, z0\.h, \1 ++** ret ++*/ ++TEST_COMPARE_ZX (cmpgt_wide_x0_s16, svint16_t, int64_t, ++ p0 = svcmpgt_wide_n_s16 (p1, z0, x0), ++ p0 = svcmpgt_wide (p1, z0, x0)) ++ ++/* ++** cmpgt_wide_0_s16: ++** cmpgt p0\.h, p1/z, z0\.h, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_wide_0_s16, svint16_t, ++ p0 = svcmpgt_wide_n_s16 (p1, z0, 0), ++ p0 = svcmpgt_wide (p1, z0, 0)) ++ ++/* ++** cmpgt_wide_1_s16: ++** cmpgt p0\.h, p1/z, z0\.h, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_wide_1_s16, svint16_t, ++ p0 = svcmpgt_wide_n_s16 (p1, z0, 1), ++ p0 = svcmpgt_wide (p1, z0, 1)) ++ ++/* ++** cmpgt_wide_15_s16: ++** cmpgt p0\.h, p1/z, z0\.h, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_wide_15_s16, svint16_t, ++ p0 = svcmpgt_wide_n_s16 (p1, z0, 15), ++ p0 = svcmpgt_wide (p1, z0, 15)) ++ ++/* ++** cmpgt_wide_16_s16: ++** mov (z[0-9]+\.d), #16 ++** cmpgt p0\.h, p1/z, z0\.h, \1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_wide_16_s16, svint16_t, ++ p0 = svcmpgt_wide_n_s16 (p1, z0, 16), ++ p0 = svcmpgt_wide (p1, z0, 16)) ++ ++/* ++** cmpgt_wide_m1_s16: ++** cmpgt p0\.h, p1/z, z0\.h, #-1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_wide_m1_s16, svint16_t, ++ p0 = svcmpgt_wide_n_s16 (p1, z0, -1), ++ p0 = svcmpgt_wide (p1, z0, -1)) ++ ++/* ++** cmpgt_wide_m16_s16: ++** cmpgt p0\.h, p1/z, z0\.h, #-16 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_wide_m16_s16, svint16_t, ++ p0 = svcmpgt_wide_n_s16 (p1, z0, -16), ++ p0 = svcmpgt_wide (p1, z0, -16)) ++ ++/* ++** cmpgt_wide_m17_s16: ++** mov (z[0-9]+\.d), #-17 ++** cmpgt p0\.h, p1/z, z0\.h, \1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_wide_m17_s16, svint16_t, ++ p0 = svcmpgt_wide_n_s16 (p1, z0, -17), ++ p0 = svcmpgt_wide (p1, z0, -17)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_wide_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_wide_s32.c +new file mode 100644 +index 000000000..f984362e6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_wide_s32.c +@@ -0,0 +1,96 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpgt_wide_s32_tied: ++** cmpgt p0\.s, p0/z, z0\.s, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmpgt_wide_s32_tied, svint32_t, svint64_t, ++ p0 = svcmpgt_wide_s32 (p0, z0, z1), ++ p0 = svcmpgt_wide (p0, z0, z1)) ++ ++/* ++** cmpgt_wide_s32_untied: ++** cmpgt p0\.s, p1/z, z0\.s, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmpgt_wide_s32_untied, svint32_t, svint64_t, ++ p0 = svcmpgt_wide_s32 (p1, z0, z1), ++ p0 = svcmpgt_wide (p1, z0, z1)) ++ ++/* ++** cmpgt_wide_x0_s32: ++** mov (z[0-9]+\.d), x0 ++** cmpgt p0\.s, p1/z, z0\.s, \1 ++** ret ++*/ ++TEST_COMPARE_ZX (cmpgt_wide_x0_s32, svint32_t, int64_t, ++ p0 = svcmpgt_wide_n_s32 (p1, z0, x0), ++ p0 = svcmpgt_wide (p1, z0, x0)) ++ ++/* ++** cmpgt_wide_0_s32: ++** cmpgt p0\.s, p1/z, z0\.s, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_wide_0_s32, svint32_t, ++ p0 = svcmpgt_wide_n_s32 (p1, z0, 0), ++ p0 = svcmpgt_wide (p1, z0, 0)) ++ ++/* ++** cmpgt_wide_1_s32: ++** cmpgt p0\.s, p1/z, z0\.s, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_wide_1_s32, svint32_t, ++ p0 = svcmpgt_wide_n_s32 (p1, z0, 1), ++ p0 = svcmpgt_wide (p1, z0, 1)) ++ ++/* ++** cmpgt_wide_15_s32: ++** cmpgt p0\.s, p1/z, z0\.s, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_wide_15_s32, svint32_t, ++ p0 = svcmpgt_wide_n_s32 (p1, z0, 15), ++ p0 = svcmpgt_wide (p1, z0, 15)) ++ ++/* ++** cmpgt_wide_16_s32: ++** mov (z[0-9]+\.d), #16 ++** cmpgt p0\.s, p1/z, z0\.s, \1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_wide_16_s32, svint32_t, ++ p0 = svcmpgt_wide_n_s32 (p1, z0, 16), ++ p0 = svcmpgt_wide (p1, z0, 16)) ++ ++/* ++** cmpgt_wide_m1_s32: ++** cmpgt p0\.s, p1/z, z0\.s, #-1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_wide_m1_s32, svint32_t, ++ p0 = svcmpgt_wide_n_s32 (p1, z0, -1), ++ p0 = svcmpgt_wide (p1, z0, -1)) ++ ++/* ++** cmpgt_wide_m16_s32: ++** cmpgt p0\.s, p1/z, z0\.s, #-16 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_wide_m16_s32, svint32_t, ++ p0 = svcmpgt_wide_n_s32 (p1, z0, -16), ++ p0 = svcmpgt_wide (p1, z0, -16)) ++ ++/* ++** cmpgt_wide_m17_s32: ++** mov (z[0-9]+\.d), #-17 ++** cmpgt p0\.s, p1/z, z0\.s, \1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_wide_m17_s32, svint32_t, ++ p0 = svcmpgt_wide_n_s32 (p1, z0, -17), ++ p0 = svcmpgt_wide (p1, z0, -17)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_wide_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_wide_s8.c +new file mode 100644 +index 000000000..07047a315 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_wide_s8.c +@@ -0,0 +1,96 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpgt_wide_s8_tied: ++** cmpgt p0\.b, p0/z, z0\.b, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmpgt_wide_s8_tied, svint8_t, svint64_t, ++ p0 = svcmpgt_wide_s8 (p0, z0, z1), ++ p0 = svcmpgt_wide (p0, z0, z1)) ++ ++/* ++** cmpgt_wide_s8_untied: ++** cmpgt p0\.b, p1/z, z0\.b, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmpgt_wide_s8_untied, svint8_t, svint64_t, ++ p0 = svcmpgt_wide_s8 (p1, z0, z1), ++ p0 = svcmpgt_wide (p1, z0, z1)) ++ ++/* ++** cmpgt_wide_x0_s8: ++** mov (z[0-9]+\.d), x0 ++** cmpgt p0\.b, p1/z, z0\.b, \1 ++** ret ++*/ ++TEST_COMPARE_ZX (cmpgt_wide_x0_s8, svint8_t, int64_t, ++ p0 = svcmpgt_wide_n_s8 (p1, z0, x0), ++ p0 = svcmpgt_wide (p1, z0, x0)) ++ ++/* ++** cmpgt_wide_0_s8: ++** cmpgt p0\.b, p1/z, z0\.b, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_wide_0_s8, svint8_t, ++ p0 = svcmpgt_wide_n_s8 (p1, z0, 0), ++ p0 = svcmpgt_wide (p1, z0, 0)) ++ ++/* ++** cmpgt_wide_1_s8: ++** cmpgt p0\.b, p1/z, z0\.b, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_wide_1_s8, svint8_t, ++ p0 = svcmpgt_wide_n_s8 (p1, z0, 1), ++ p0 = svcmpgt_wide (p1, z0, 1)) ++ ++/* ++** cmpgt_wide_15_s8: ++** cmpgt p0\.b, p1/z, z0\.b, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_wide_15_s8, svint8_t, ++ p0 = svcmpgt_wide_n_s8 (p1, z0, 15), ++ p0 = svcmpgt_wide (p1, z0, 15)) ++ ++/* ++** cmpgt_wide_16_s8: ++** mov (z[0-9]+\.d), #16 ++** cmpgt p0\.b, p1/z, z0\.b, \1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_wide_16_s8, svint8_t, ++ p0 = svcmpgt_wide_n_s8 (p1, z0, 16), ++ p0 = svcmpgt_wide (p1, z0, 16)) ++ ++/* ++** cmpgt_wide_m1_s8: ++** cmpgt p0\.b, p1/z, z0\.b, #-1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_wide_m1_s8, svint8_t, ++ p0 = svcmpgt_wide_n_s8 (p1, z0, -1), ++ p0 = svcmpgt_wide (p1, z0, -1)) ++ ++/* ++** cmpgt_wide_m16_s8: ++** cmpgt p0\.b, p1/z, z0\.b, #-16 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_wide_m16_s8, svint8_t, ++ p0 = svcmpgt_wide_n_s8 (p1, z0, -16), ++ p0 = svcmpgt_wide (p1, z0, -16)) ++ ++/* ++** cmpgt_wide_m17_s8: ++** mov (z[0-9]+\.d), #-17 ++** cmpgt p0\.b, p1/z, z0\.b, \1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_wide_m17_s8, svint8_t, ++ p0 = svcmpgt_wide_n_s8 (p1, z0, -17), ++ p0 = svcmpgt_wide (p1, z0, -17)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_wide_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_wide_u16.c +new file mode 100644 +index 000000000..bcffb88c0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_wide_u16.c +@@ -0,0 +1,96 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpgt_wide_u16_tied: ++** cmphi p0\.h, p0/z, z0\.h, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmpgt_wide_u16_tied, svuint16_t, svuint64_t, ++ p0 = svcmpgt_wide_u16 (p0, z0, z1), ++ p0 = svcmpgt_wide (p0, z0, z1)) ++ ++/* ++** cmpgt_wide_u16_untied: ++** cmphi p0\.h, p1/z, z0\.h, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmpgt_wide_u16_untied, svuint16_t, svuint64_t, ++ p0 = svcmpgt_wide_u16 (p1, z0, z1), ++ p0 = svcmpgt_wide (p1, z0, z1)) ++ ++/* ++** cmpgt_wide_x0_u16: ++** mov (z[0-9]+\.d), x0 ++** cmphi p0\.h, p1/z, z0\.h, \1 ++** ret ++*/ ++TEST_COMPARE_ZX (cmpgt_wide_x0_u16, svuint16_t, uint64_t, ++ p0 = svcmpgt_wide_n_u16 (p1, z0, x0), ++ p0 = svcmpgt_wide (p1, z0, x0)) ++ ++/* ++** cmpgt_wide_0_u16: ++** cmphi p0\.h, p1/z, z0\.h, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_wide_0_u16, svuint16_t, ++ p0 = svcmpgt_wide_n_u16 (p1, z0, 0), ++ p0 = svcmpgt_wide (p1, z0, 0)) ++ ++/* ++** cmpgt_wide_1_u16: ++** cmphi p0\.h, p1/z, z0\.h, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_wide_1_u16, svuint16_t, ++ p0 = svcmpgt_wide_n_u16 (p1, z0, 1), ++ p0 = svcmpgt_wide (p1, z0, 1)) ++ ++/* ++** cmpgt_wide_15_u16: ++** cmphi p0\.h, p1/z, z0\.h, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_wide_15_u16, svuint16_t, ++ p0 = svcmpgt_wide_n_u16 (p1, z0, 15), ++ p0 = svcmpgt_wide (p1, z0, 15)) ++ ++/* ++** cmpgt_wide_16_u16: ++** cmphi p0\.h, p1/z, z0\.h, #16 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_wide_16_u16, svuint16_t, ++ p0 = svcmpgt_wide_n_u16 (p1, z0, 16), ++ p0 = svcmpgt_wide (p1, z0, 16)) ++ ++/* ++** cmpgt_wide_127_u16: ++** cmphi p0\.h, p1/z, z0\.h, #127 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_wide_127_u16, svuint16_t, ++ p0 = svcmpgt_wide_n_u16 (p1, z0, 127), ++ p0 = svcmpgt_wide (p1, z0, 127)) ++ ++/* ++** cmpgt_wide_128_u16: ++** mov (z[0-9]+\.d), #128 ++** cmphi p0\.h, p1/z, z0\.h, \1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_wide_128_u16, svuint16_t, ++ p0 = svcmpgt_wide_n_u16 (p1, z0, 128), ++ p0 = svcmpgt_wide (p1, z0, 128)) ++ ++/* ++** cmpgt_wide_m1_u16: ++** mov (z[0-9]+)\.b, #-1 ++** cmphi p0\.h, p1/z, z0\.h, \1\.d ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_wide_m1_u16, svuint16_t, ++ p0 = svcmpgt_wide_n_u16 (p1, z0, -1), ++ p0 = svcmpgt_wide (p1, z0, -1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_wide_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_wide_u32.c +new file mode 100644 +index 000000000..65c0231e5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_wide_u32.c +@@ -0,0 +1,96 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpgt_wide_u32_tied: ++** cmphi p0\.s, p0/z, z0\.s, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmpgt_wide_u32_tied, svuint32_t, svuint64_t, ++ p0 = svcmpgt_wide_u32 (p0, z0, z1), ++ p0 = svcmpgt_wide (p0, z0, z1)) ++ ++/* ++** cmpgt_wide_u32_untied: ++** cmphi p0\.s, p1/z, z0\.s, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmpgt_wide_u32_untied, svuint32_t, svuint64_t, ++ p0 = svcmpgt_wide_u32 (p1, z0, z1), ++ p0 = svcmpgt_wide (p1, z0, z1)) ++ ++/* ++** cmpgt_wide_x0_u32: ++** mov (z[0-9]+\.d), x0 ++** cmphi p0\.s, p1/z, z0\.s, \1 ++** ret ++*/ ++TEST_COMPARE_ZX (cmpgt_wide_x0_u32, svuint32_t, uint64_t, ++ p0 = svcmpgt_wide_n_u32 (p1, z0, x0), ++ p0 = svcmpgt_wide (p1, z0, x0)) ++ ++/* ++** cmpgt_wide_0_u32: ++** cmphi p0\.s, p1/z, z0\.s, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_wide_0_u32, svuint32_t, ++ p0 = svcmpgt_wide_n_u32 (p1, z0, 0), ++ p0 = svcmpgt_wide (p1, z0, 0)) ++ ++/* ++** cmpgt_wide_1_u32: ++** cmphi p0\.s, p1/z, z0\.s, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_wide_1_u32, svuint32_t, ++ p0 = svcmpgt_wide_n_u32 (p1, z0, 1), ++ p0 = svcmpgt_wide (p1, z0, 1)) ++ ++/* ++** cmpgt_wide_15_u32: ++** cmphi p0\.s, p1/z, z0\.s, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_wide_15_u32, svuint32_t, ++ p0 = svcmpgt_wide_n_u32 (p1, z0, 15), ++ p0 = svcmpgt_wide (p1, z0, 15)) ++ ++/* ++** cmpgt_wide_16_u32: ++** cmphi p0\.s, p1/z, z0\.s, #16 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_wide_16_u32, svuint32_t, ++ p0 = svcmpgt_wide_n_u32 (p1, z0, 16), ++ p0 = svcmpgt_wide (p1, z0, 16)) ++ ++/* ++** cmpgt_wide_127_u32: ++** cmphi p0\.s, p1/z, z0\.s, #127 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_wide_127_u32, svuint32_t, ++ p0 = svcmpgt_wide_n_u32 (p1, z0, 127), ++ p0 = svcmpgt_wide (p1, z0, 127)) ++ ++/* ++** cmpgt_wide_128_u32: ++** mov (z[0-9]+\.d), #128 ++** cmphi p0\.s, p1/z, z0\.s, \1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_wide_128_u32, svuint32_t, ++ p0 = svcmpgt_wide_n_u32 (p1, z0, 128), ++ p0 = svcmpgt_wide (p1, z0, 128)) ++ ++/* ++** cmpgt_wide_m1_u32: ++** mov (z[0-9]+)\.b, #-1 ++** cmphi p0\.s, p1/z, z0\.s, \1\.d ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_wide_m1_u32, svuint32_t, ++ p0 = svcmpgt_wide_n_u32 (p1, z0, -1), ++ p0 = svcmpgt_wide (p1, z0, -1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_wide_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_wide_u8.c +new file mode 100644 +index 000000000..0d1142f27 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_wide_u8.c +@@ -0,0 +1,96 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpgt_wide_u8_tied: ++** cmphi p0\.b, p0/z, z0\.b, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmpgt_wide_u8_tied, svuint8_t, svuint64_t, ++ p0 = svcmpgt_wide_u8 (p0, z0, z1), ++ p0 = svcmpgt_wide (p0, z0, z1)) ++ ++/* ++** cmpgt_wide_u8_untied: ++** cmphi p0\.b, p1/z, z0\.b, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmpgt_wide_u8_untied, svuint8_t, svuint64_t, ++ p0 = svcmpgt_wide_u8 (p1, z0, z1), ++ p0 = svcmpgt_wide (p1, z0, z1)) ++ ++/* ++** cmpgt_wide_x0_u8: ++** mov (z[0-9]+\.d), x0 ++** cmphi p0\.b, p1/z, z0\.b, \1 ++** ret ++*/ ++TEST_COMPARE_ZX (cmpgt_wide_x0_u8, svuint8_t, uint64_t, ++ p0 = svcmpgt_wide_n_u8 (p1, z0, x0), ++ p0 = svcmpgt_wide (p1, z0, x0)) ++ ++/* ++** cmpgt_wide_0_u8: ++** cmphi p0\.b, p1/z, z0\.b, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_wide_0_u8, svuint8_t, ++ p0 = svcmpgt_wide_n_u8 (p1, z0, 0), ++ p0 = svcmpgt_wide (p1, z0, 0)) ++ ++/* ++** cmpgt_wide_1_u8: ++** cmphi p0\.b, p1/z, z0\.b, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_wide_1_u8, svuint8_t, ++ p0 = svcmpgt_wide_n_u8 (p1, z0, 1), ++ p0 = svcmpgt_wide (p1, z0, 1)) ++ ++/* ++** cmpgt_wide_15_u8: ++** cmphi p0\.b, p1/z, z0\.b, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_wide_15_u8, svuint8_t, ++ p0 = svcmpgt_wide_n_u8 (p1, z0, 15), ++ p0 = svcmpgt_wide (p1, z0, 15)) ++ ++/* ++** cmpgt_wide_16_u8: ++** cmphi p0\.b, p1/z, z0\.b, #16 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_wide_16_u8, svuint8_t, ++ p0 = svcmpgt_wide_n_u8 (p1, z0, 16), ++ p0 = svcmpgt_wide (p1, z0, 16)) ++ ++/* ++** cmpgt_wide_127_u8: ++** cmphi p0\.b, p1/z, z0\.b, #127 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_wide_127_u8, svuint8_t, ++ p0 = svcmpgt_wide_n_u8 (p1, z0, 127), ++ p0 = svcmpgt_wide (p1, z0, 127)) ++ ++/* ++** cmpgt_wide_128_u8: ++** mov (z[0-9]+\.d), #128 ++** cmphi p0\.b, p1/z, z0\.b, \1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_wide_128_u8, svuint8_t, ++ p0 = svcmpgt_wide_n_u8 (p1, z0, 128), ++ p0 = svcmpgt_wide (p1, z0, 128)) ++ ++/* ++** cmpgt_wide_m1_u8: ++** mov (z[0-9]+)\.b, #-1 ++** cmphi p0\.b, p1/z, z0\.b, \1\.d ++** ret ++*/ ++TEST_COMPARE_Z (cmpgt_wide_m1_u8, svuint8_t, ++ p0 = svcmpgt_wide_n_u8 (p1, z0, -1), ++ p0 = svcmpgt_wide (p1, z0, -1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_f16.c +new file mode 100644 +index 000000000..7d500590f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_f16.c +@@ -0,0 +1,66 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmple_f16_tied: ++** ( ++** fcmge p0\.h, p0/z, z1\.h, z0\.h ++** | ++** fcmle p0\.h, p0/z, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmple_f16_tied, svfloat16_t, ++ p0 = svcmple_f16 (p0, z0, z1), ++ p0 = svcmple (p0, z0, z1)) ++ ++/* ++** cmple_f16_untied: ++** ( ++** fcmge p0\.h, p1/z, z1\.h, z0\.h ++** | ++** fcmle p0\.h, p1/z, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmple_f16_untied, svfloat16_t, ++ p0 = svcmple_f16 (p1, z0, z1), ++ p0 = svcmple (p1, z0, z1)) ++ ++/* ++** cmple_h4_f16: ++** mov (z[0-9]+\.h), h4 ++** ( ++** fcmge p0\.h, p1/z, \1, z0\.h ++** | ++** fcmle p0\.h, p1/z, z0\.h, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_ZD (cmple_h4_f16, svfloat16_t, float16_t, ++ p0 = svcmple_n_f16 (p1, z0, d4), ++ p0 = svcmple (p1, z0, d4)) ++ ++/* ++** cmple_0_f16: ++** fcmle p0\.h, p1/z, z0\.h, #0\.0 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_0_f16, svfloat16_t, ++ p0 = svcmple_n_f16 (p1, z0, 0), ++ p0 = svcmple (p1, z0, 0)) ++ ++/* ++** cmple_1_f16: ++** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? ++** ( ++** fcmge p0\.h, p1/z, \1, z0\.h ++** | ++** fcmle p0\.h, p1/z, z0\.h, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmple_1_f16, svfloat16_t, ++ p0 = svcmple_n_f16 (p1, z0, 1), ++ p0 = svcmple (p1, z0, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_f32.c +new file mode 100644 +index 000000000..3df63fef7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_f32.c +@@ -0,0 +1,66 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmple_f32_tied: ++** ( ++** fcmge p0\.s, p0/z, z1\.s, z0\.s ++** | ++** fcmle p0\.s, p0/z, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmple_f32_tied, svfloat32_t, ++ p0 = svcmple_f32 (p0, z0, z1), ++ p0 = svcmple (p0, z0, z1)) ++ ++/* ++** cmple_f32_untied: ++** ( ++** fcmge p0\.s, p1/z, z1\.s, z0\.s ++** | ++** fcmle p0\.s, p1/z, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmple_f32_untied, svfloat32_t, ++ p0 = svcmple_f32 (p1, z0, z1), ++ p0 = svcmple (p1, z0, z1)) ++ ++/* ++** cmple_s4_f32: ++** mov (z[0-9]+\.s), s4 ++** ( ++** fcmge p0\.s, p1/z, \1, z0\.s ++** | ++** fcmle p0\.s, p1/z, z0\.s, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_ZD (cmple_s4_f32, svfloat32_t, float32_t, ++ p0 = svcmple_n_f32 (p1, z0, d4), ++ p0 = svcmple (p1, z0, d4)) ++ ++/* ++** cmple_0_f32: ++** fcmle p0\.s, p1/z, z0\.s, #0\.0 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_0_f32, svfloat32_t, ++ p0 = svcmple_n_f32 (p1, z0, 0), ++ p0 = svcmple (p1, z0, 0)) ++ ++/* ++** cmple_1_f32: ++** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? ++** ( ++** fcmge p0\.s, p1/z, \1, z0\.s ++** | ++** fcmle p0\.s, p1/z, z0\.s, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmple_1_f32, svfloat32_t, ++ p0 = svcmple_n_f32 (p1, z0, 1), ++ p0 = svcmple (p1, z0, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_f64.c +new file mode 100644 +index 000000000..5946a1b3a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_f64.c +@@ -0,0 +1,66 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmple_f64_tied: ++** ( ++** fcmge p0\.d, p0/z, z1\.d, z0\.d ++** | ++** fcmle p0\.d, p0/z, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmple_f64_tied, svfloat64_t, ++ p0 = svcmple_f64 (p0, z0, z1), ++ p0 = svcmple (p0, z0, z1)) ++ ++/* ++** cmple_f64_untied: ++** ( ++** fcmge p0\.d, p1/z, z1\.d, z0\.d ++** | ++** fcmle p0\.d, p1/z, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmple_f64_untied, svfloat64_t, ++ p0 = svcmple_f64 (p1, z0, z1), ++ p0 = svcmple (p1, z0, z1)) ++ ++/* ++** cmple_d4_f64: ++** mov (z[0-9]+\.d), d4 ++** ( ++** fcmge p0\.d, p1/z, \1, z0\.d ++** | ++** fcmle p0\.d, p1/z, z0\.d, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_ZD (cmple_d4_f64, svfloat64_t, float64_t, ++ p0 = svcmple_n_f64 (p1, z0, d4), ++ p0 = svcmple (p1, z0, d4)) ++ ++/* ++** cmple_0_f64: ++** fcmle p0\.d, p1/z, z0\.d, #0\.0 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_0_f64, svfloat64_t, ++ p0 = svcmple_n_f64 (p1, z0, 0), ++ p0 = svcmple (p1, z0, 0)) ++ ++/* ++** cmple_1_f64: ++** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? ++** ( ++** fcmge p0\.d, p1/z, \1, z0\.d ++** | ++** fcmle p0\.d, p1/z, z0\.d, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmple_1_f64, svfloat64_t, ++ p0 = svcmple_n_f64 (p1, z0, 1), ++ p0 = svcmple (p1, z0, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_s16.c +new file mode 100644 +index 000000000..9b221bb4c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_s16.c +@@ -0,0 +1,116 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmple_s16_tied: ++** ( ++** cmpge p0\.h, p0/z, z1\.h, z0\.h ++** | ++** cmple p0\.h, p0/z, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmple_s16_tied, svint16_t, ++ p0 = svcmple_s16 (p0, z0, z1), ++ p0 = svcmple (p0, z0, z1)) ++ ++/* ++** cmple_s16_untied: ++** ( ++** cmpge p0\.h, p1/z, z1\.h, z0\.h ++** | ++** cmple p0\.h, p1/z, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmple_s16_untied, svint16_t, ++ p0 = svcmple_s16 (p1, z0, z1), ++ p0 = svcmple (p1, z0, z1)) ++ ++/* ++** cmple_w0_s16: ++** mov (z[0-9]+\.h), w0 ++** ( ++** cmpge p0\.h, p1/z, \1, z0\.h ++** | ++** cmple p0\.h, p1/z, z0\.h, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_ZX (cmple_w0_s16, svint16_t, int16_t, ++ p0 = svcmple_n_s16 (p1, z0, x0), ++ p0 = svcmple (p1, z0, x0)) ++ ++/* ++** cmple_0_s16: ++** cmple p0\.h, p1/z, z0\.h, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_0_s16, svint16_t, ++ p0 = svcmple_n_s16 (p1, z0, 0), ++ p0 = svcmple (p1, z0, 0)) ++ ++/* ++** cmple_1_s16: ++** cmple p0\.h, p1/z, z0\.h, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_1_s16, svint16_t, ++ p0 = svcmple_n_s16 (p1, z0, 1), ++ p0 = svcmple (p1, z0, 1)) ++ ++/* ++** cmple_15_s16: ++** cmple p0\.h, p1/z, z0\.h, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_15_s16, svint16_t, ++ p0 = svcmple_n_s16 (p1, z0, 15), ++ p0 = svcmple (p1, z0, 15)) ++ ++/* ++** cmple_16_s16: ++** mov (z[0-9]+\.h), #16 ++** ( ++** cmpge p0\.h, p1/z, \1, z0\.h ++** | ++** cmple p0\.h, p1/z, z0\.h, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmple_16_s16, svint16_t, ++ p0 = svcmple_n_s16 (p1, z0, 16), ++ p0 = svcmple (p1, z0, 16)) ++ ++/* ++** cmple_m1_s16: ++** cmple p0\.h, p1/z, z0\.h, #-1 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_m1_s16, svint16_t, ++ p0 = svcmple_n_s16 (p1, z0, -1), ++ p0 = svcmple (p1, z0, -1)) ++ ++/* ++** cmple_m16_s16: ++** cmple p0\.h, p1/z, z0\.h, #-16 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_m16_s16, svint16_t, ++ p0 = svcmple_n_s16 (p1, z0, -16), ++ p0 = svcmple (p1, z0, -16)) ++ ++/* ++** cmple_m17_s16: ++** mov (z[0-9]+\.h), #-17 ++** ( ++** cmpge p0\.h, p1/z, \1, z0\.h ++** | ++** cmple p0\.h, p1/z, z0\.h, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmple_m17_s16, svint16_t, ++ p0 = svcmple_n_s16 (p1, z0, -17), ++ p0 = svcmple (p1, z0, -17)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_s32.c +new file mode 100644 +index 000000000..b0c8367e2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_s32.c +@@ -0,0 +1,116 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmple_s32_tied: ++** ( ++** cmpge p0\.s, p0/z, z1\.s, z0\.s ++** | ++** cmple p0\.s, p0/z, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmple_s32_tied, svint32_t, ++ p0 = svcmple_s32 (p0, z0, z1), ++ p0 = svcmple (p0, z0, z1)) ++ ++/* ++** cmple_s32_untied: ++** ( ++** cmpge p0\.s, p1/z, z1\.s, z0\.s ++** | ++** cmple p0\.s, p1/z, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmple_s32_untied, svint32_t, ++ p0 = svcmple_s32 (p1, z0, z1), ++ p0 = svcmple (p1, z0, z1)) ++ ++/* ++** cmple_w0_s32: ++** mov (z[0-9]+\.s), w0 ++** ( ++** cmpge p0\.s, p1/z, \1, z0\.s ++** | ++** cmple p0\.s, p1/z, z0\.s, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_ZX (cmple_w0_s32, svint32_t, int32_t, ++ p0 = svcmple_n_s32 (p1, z0, x0), ++ p0 = svcmple (p1, z0, x0)) ++ ++/* ++** cmple_0_s32: ++** cmple p0\.s, p1/z, z0\.s, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_0_s32, svint32_t, ++ p0 = svcmple_n_s32 (p1, z0, 0), ++ p0 = svcmple (p1, z0, 0)) ++ ++/* ++** cmple_1_s32: ++** cmple p0\.s, p1/z, z0\.s, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_1_s32, svint32_t, ++ p0 = svcmple_n_s32 (p1, z0, 1), ++ p0 = svcmple (p1, z0, 1)) ++ ++/* ++** cmple_15_s32: ++** cmple p0\.s, p1/z, z0\.s, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_15_s32, svint32_t, ++ p0 = svcmple_n_s32 (p1, z0, 15), ++ p0 = svcmple (p1, z0, 15)) ++ ++/* ++** cmple_16_s32: ++** mov (z[0-9]+\.s), #16 ++** ( ++** cmpge p0\.s, p1/z, \1, z0\.s ++** | ++** cmple p0\.s, p1/z, z0\.s, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmple_16_s32, svint32_t, ++ p0 = svcmple_n_s32 (p1, z0, 16), ++ p0 = svcmple (p1, z0, 16)) ++ ++/* ++** cmple_m1_s32: ++** cmple p0\.s, p1/z, z0\.s, #-1 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_m1_s32, svint32_t, ++ p0 = svcmple_n_s32 (p1, z0, -1), ++ p0 = svcmple (p1, z0, -1)) ++ ++/* ++** cmple_m16_s32: ++** cmple p0\.s, p1/z, z0\.s, #-16 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_m16_s32, svint32_t, ++ p0 = svcmple_n_s32 (p1, z0, -16), ++ p0 = svcmple (p1, z0, -16)) ++ ++/* ++** cmple_m17_s32: ++** mov (z[0-9]+\.s), #-17 ++** ( ++** cmpge p0\.s, p1/z, \1, z0\.s ++** | ++** cmple p0\.s, p1/z, z0\.s, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmple_m17_s32, svint32_t, ++ p0 = svcmple_n_s32 (p1, z0, -17), ++ p0 = svcmple (p1, z0, -17)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_s64.c +new file mode 100644 +index 000000000..faaa87614 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_s64.c +@@ -0,0 +1,116 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmple_s64_tied: ++** ( ++** cmpge p0\.d, p0/z, z1\.d, z0\.d ++** | ++** cmple p0\.d, p0/z, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmple_s64_tied, svint64_t, ++ p0 = svcmple_s64 (p0, z0, z1), ++ p0 = svcmple (p0, z0, z1)) ++ ++/* ++** cmple_s64_untied: ++** ( ++** cmpge p0\.d, p1/z, z1\.d, z0\.d ++** | ++** cmple p0\.d, p1/z, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmple_s64_untied, svint64_t, ++ p0 = svcmple_s64 (p1, z0, z1), ++ p0 = svcmple (p1, z0, z1)) ++ ++/* ++** cmple_x0_s64: ++** mov (z[0-9]+\.d), x0 ++** ( ++** cmpge p0\.d, p1/z, \1, z0\.d ++** | ++** cmple p0\.d, p1/z, z0\.d, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_ZX (cmple_x0_s64, svint64_t, int64_t, ++ p0 = svcmple_n_s64 (p1, z0, x0), ++ p0 = svcmple (p1, z0, x0)) ++ ++/* ++** cmple_0_s64: ++** cmple p0\.d, p1/z, z0\.d, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_0_s64, svint64_t, ++ p0 = svcmple_n_s64 (p1, z0, 0), ++ p0 = svcmple (p1, z0, 0)) ++ ++/* ++** cmple_1_s64: ++** cmple p0\.d, p1/z, z0\.d, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_1_s64, svint64_t, ++ p0 = svcmple_n_s64 (p1, z0, 1), ++ p0 = svcmple (p1, z0, 1)) ++ ++/* ++** cmple_15_s64: ++** cmple p0\.d, p1/z, z0\.d, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_15_s64, svint64_t, ++ p0 = svcmple_n_s64 (p1, z0, 15), ++ p0 = svcmple (p1, z0, 15)) ++ ++/* ++** cmple_16_s64: ++** mov (z[0-9]+\.d), #16 ++** ( ++** cmpge p0\.d, p1/z, \1, z0\.d ++** | ++** cmple p0\.d, p1/z, z0\.d, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmple_16_s64, svint64_t, ++ p0 = svcmple_n_s64 (p1, z0, 16), ++ p0 = svcmple (p1, z0, 16)) ++ ++/* ++** cmple_m1_s64: ++** cmple p0\.d, p1/z, z0\.d, #-1 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_m1_s64, svint64_t, ++ p0 = svcmple_n_s64 (p1, z0, -1), ++ p0 = svcmple (p1, z0, -1)) ++ ++/* ++** cmple_m16_s64: ++** cmple p0\.d, p1/z, z0\.d, #-16 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_m16_s64, svint64_t, ++ p0 = svcmple_n_s64 (p1, z0, -16), ++ p0 = svcmple (p1, z0, -16)) ++ ++/* ++** cmple_m17_s64: ++** mov (z[0-9]+\.d), #-17 ++** ( ++** cmpge p0\.d, p1/z, \1, z0\.d ++** | ++** cmple p0\.d, p1/z, z0\.d, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmple_m17_s64, svint64_t, ++ p0 = svcmple_n_s64 (p1, z0, -17), ++ p0 = svcmple (p1, z0, -17)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_s8.c +new file mode 100644 +index 000000000..222487d75 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_s8.c +@@ -0,0 +1,116 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmple_s8_tied: ++** ( ++** cmpge p0\.b, p0/z, z1\.b, z0\.b ++** | ++** cmple p0\.b, p0/z, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmple_s8_tied, svint8_t, ++ p0 = svcmple_s8 (p0, z0, z1), ++ p0 = svcmple (p0, z0, z1)) ++ ++/* ++** cmple_s8_untied: ++** ( ++** cmpge p0\.b, p1/z, z1\.b, z0\.b ++** | ++** cmple p0\.b, p1/z, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmple_s8_untied, svint8_t, ++ p0 = svcmple_s8 (p1, z0, z1), ++ p0 = svcmple (p1, z0, z1)) ++ ++/* ++** cmple_w0_s8: ++** mov (z[0-9]+\.b), w0 ++** ( ++** cmpge p0\.b, p1/z, \1, z0\.b ++** | ++** cmple p0\.b, p1/z, z0\.b, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_ZX (cmple_w0_s8, svint8_t, int8_t, ++ p0 = svcmple_n_s8 (p1, z0, x0), ++ p0 = svcmple (p1, z0, x0)) ++ ++/* ++** cmple_0_s8: ++** cmple p0\.b, p1/z, z0\.b, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_0_s8, svint8_t, ++ p0 = svcmple_n_s8 (p1, z0, 0), ++ p0 = svcmple (p1, z0, 0)) ++ ++/* ++** cmple_1_s8: ++** cmple p0\.b, p1/z, z0\.b, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_1_s8, svint8_t, ++ p0 = svcmple_n_s8 (p1, z0, 1), ++ p0 = svcmple (p1, z0, 1)) ++ ++/* ++** cmple_15_s8: ++** cmple p0\.b, p1/z, z0\.b, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_15_s8, svint8_t, ++ p0 = svcmple_n_s8 (p1, z0, 15), ++ p0 = svcmple (p1, z0, 15)) ++ ++/* ++** cmple_16_s8: ++** mov (z[0-9]+\.b), #16 ++** ( ++** cmpge p0\.b, p1/z, \1, z0\.b ++** | ++** cmple p0\.b, p1/z, z0\.b, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmple_16_s8, svint8_t, ++ p0 = svcmple_n_s8 (p1, z0, 16), ++ p0 = svcmple (p1, z0, 16)) ++ ++/* ++** cmple_m1_s8: ++** cmple p0\.b, p1/z, z0\.b, #-1 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_m1_s8, svint8_t, ++ p0 = svcmple_n_s8 (p1, z0, -1), ++ p0 = svcmple (p1, z0, -1)) ++ ++/* ++** cmple_m16_s8: ++** cmple p0\.b, p1/z, z0\.b, #-16 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_m16_s8, svint8_t, ++ p0 = svcmple_n_s8 (p1, z0, -16), ++ p0 = svcmple (p1, z0, -16)) ++ ++/* ++** cmple_m17_s8: ++** mov (z[0-9]+\.b), #-17 ++** ( ++** cmpge p0\.b, p1/z, \1, z0\.b ++** | ++** cmple p0\.b, p1/z, z0\.b, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmple_m17_s8, svint8_t, ++ p0 = svcmple_n_s8 (p1, z0, -17), ++ p0 = svcmple (p1, z0, -17)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_u16.c +new file mode 100644 +index 000000000..26af06e52 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_u16.c +@@ -0,0 +1,116 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmple_u16_tied: ++** ( ++** cmphs p0\.h, p0/z, z1\.h, z0\.h ++** | ++** cmpls p0\.h, p0/z, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmple_u16_tied, svuint16_t, ++ p0 = svcmple_u16 (p0, z0, z1), ++ p0 = svcmple (p0, z0, z1)) ++ ++/* ++** cmple_u16_untied: ++** ( ++** cmphs p0\.h, p1/z, z1\.h, z0\.h ++** | ++** cmpls p0\.h, p1/z, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmple_u16_untied, svuint16_t, ++ p0 = svcmple_u16 (p1, z0, z1), ++ p0 = svcmple (p1, z0, z1)) ++ ++/* ++** cmple_w0_u16: ++** mov (z[0-9]+\.h), w0 ++** ( ++** cmphs p0\.h, p1/z, \1, z0\.h ++** | ++** cmpls p0\.h, p1/z, z0\.h, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_ZX (cmple_w0_u16, svuint16_t, uint16_t, ++ p0 = svcmple_n_u16 (p1, z0, x0), ++ p0 = svcmple (p1, z0, x0)) ++ ++/* ++** cmple_0_u16: ++** cmpls p0\.h, p1/z, z0\.h, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_0_u16, svuint16_t, ++ p0 = svcmple_n_u16 (p1, z0, 0), ++ p0 = svcmple (p1, z0, 0)) ++ ++/* ++** cmple_1_u16: ++** cmpls p0\.h, p1/z, z0\.h, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_1_u16, svuint16_t, ++ p0 = svcmple_n_u16 (p1, z0, 1), ++ p0 = svcmple (p1, z0, 1)) ++ ++/* ++** cmple_15_u16: ++** cmpls p0\.h, p1/z, z0\.h, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_15_u16, svuint16_t, ++ p0 = svcmple_n_u16 (p1, z0, 15), ++ p0 = svcmple (p1, z0, 15)) ++ ++/* ++** cmple_16_u16: ++** cmpls p0\.h, p1/z, z0\.h, #16 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_16_u16, svuint16_t, ++ p0 = svcmple_n_u16 (p1, z0, 16), ++ p0 = svcmple (p1, z0, 16)) ++ ++/* ++** cmple_127_u16: ++** cmpls p0\.h, p1/z, z0\.h, #127 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_127_u16, svuint16_t, ++ p0 = svcmple_n_u16 (p1, z0, 127), ++ p0 = svcmple (p1, z0, 127)) ++ ++/* ++** cmple_128_u16: ++** mov (z[0-9]+\.h), #128 ++** ( ++** cmphs p0\.h, p1/z, \1, z0\.h ++** | ++** cmpls p0\.h, p1/z, z0\.h, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmple_128_u16, svuint16_t, ++ p0 = svcmple_n_u16 (p1, z0, 128), ++ p0 = svcmple (p1, z0, 128)) ++ ++/* ++** cmple_m1_u16: ++** mov (z[0-9]+)\.b, #-1 ++** ( ++** cmphs p0\.h, p1/z, \1\.h, z0\.h ++** | ++** cmpls p0\.h, p1/z, z0\.h, \1\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmple_m1_u16, svuint16_t, ++ p0 = svcmple_n_u16 (p1, z0, -1), ++ p0 = svcmple (p1, z0, -1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_u32.c +new file mode 100644 +index 000000000..cee2d14c8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_u32.c +@@ -0,0 +1,116 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmple_u32_tied: ++** ( ++** cmphs p0\.s, p0/z, z1\.s, z0\.s ++** | ++** cmpls p0\.s, p0/z, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmple_u32_tied, svuint32_t, ++ p0 = svcmple_u32 (p0, z0, z1), ++ p0 = svcmple (p0, z0, z1)) ++ ++/* ++** cmple_u32_untied: ++** ( ++** cmphs p0\.s, p1/z, z1\.s, z0\.s ++** | ++** cmpls p0\.s, p1/z, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmple_u32_untied, svuint32_t, ++ p0 = svcmple_u32 (p1, z0, z1), ++ p0 = svcmple (p1, z0, z1)) ++ ++/* ++** cmple_w0_u32: ++** mov (z[0-9]+\.s), w0 ++** ( ++** cmphs p0\.s, p1/z, \1, z0\.s ++** | ++** cmpls p0\.s, p1/z, z0\.s, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_ZX (cmple_w0_u32, svuint32_t, uint32_t, ++ p0 = svcmple_n_u32 (p1, z0, x0), ++ p0 = svcmple (p1, z0, x0)) ++ ++/* ++** cmple_0_u32: ++** cmpls p0\.s, p1/z, z0\.s, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_0_u32, svuint32_t, ++ p0 = svcmple_n_u32 (p1, z0, 0), ++ p0 = svcmple (p1, z0, 0)) ++ ++/* ++** cmple_1_u32: ++** cmpls p0\.s, p1/z, z0\.s, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_1_u32, svuint32_t, ++ p0 = svcmple_n_u32 (p1, z0, 1), ++ p0 = svcmple (p1, z0, 1)) ++ ++/* ++** cmple_15_u32: ++** cmpls p0\.s, p1/z, z0\.s, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_15_u32, svuint32_t, ++ p0 = svcmple_n_u32 (p1, z0, 15), ++ p0 = svcmple (p1, z0, 15)) ++ ++/* ++** cmple_16_u32: ++** cmpls p0\.s, p1/z, z0\.s, #16 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_16_u32, svuint32_t, ++ p0 = svcmple_n_u32 (p1, z0, 16), ++ p0 = svcmple (p1, z0, 16)) ++ ++/* ++** cmple_127_u32: ++** cmpls p0\.s, p1/z, z0\.s, #127 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_127_u32, svuint32_t, ++ p0 = svcmple_n_u32 (p1, z0, 127), ++ p0 = svcmple (p1, z0, 127)) ++ ++/* ++** cmple_128_u32: ++** mov (z[0-9]+\.s), #128 ++** ( ++** cmphs p0\.s, p1/z, \1, z0\.s ++** | ++** cmpls p0\.s, p1/z, z0\.s, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmple_128_u32, svuint32_t, ++ p0 = svcmple_n_u32 (p1, z0, 128), ++ p0 = svcmple (p1, z0, 128)) ++ ++/* ++** cmple_m1_u32: ++** mov (z[0-9]+)\.b, #-1 ++** ( ++** cmphs p0\.s, p1/z, \1\.s, z0\.s ++** | ++** cmpls p0\.s, p1/z, z0\.s, \1\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmple_m1_u32, svuint32_t, ++ p0 = svcmple_n_u32 (p1, z0, -1), ++ p0 = svcmple (p1, z0, -1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_u64.c +new file mode 100644 +index 000000000..b8388bca8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_u64.c +@@ -0,0 +1,116 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmple_u64_tied: ++** ( ++** cmphs p0\.d, p0/z, z1\.d, z0\.d ++** | ++** cmpls p0\.d, p0/z, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmple_u64_tied, svuint64_t, ++ p0 = svcmple_u64 (p0, z0, z1), ++ p0 = svcmple (p0, z0, z1)) ++ ++/* ++** cmple_u64_untied: ++** ( ++** cmphs p0\.d, p1/z, z1\.d, z0\.d ++** | ++** cmpls p0\.d, p1/z, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmple_u64_untied, svuint64_t, ++ p0 = svcmple_u64 (p1, z0, z1), ++ p0 = svcmple (p1, z0, z1)) ++ ++/* ++** cmple_x0_u64: ++** mov (z[0-9]+\.d), x0 ++** ( ++** cmphs p0\.d, p1/z, \1, z0\.d ++** | ++** cmpls p0\.d, p1/z, z0\.d, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_ZX (cmple_x0_u64, svuint64_t, uint64_t, ++ p0 = svcmple_n_u64 (p1, z0, x0), ++ p0 = svcmple (p1, z0, x0)) ++ ++/* ++** cmple_0_u64: ++** cmpls p0\.d, p1/z, z0\.d, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_0_u64, svuint64_t, ++ p0 = svcmple_n_u64 (p1, z0, 0), ++ p0 = svcmple (p1, z0, 0)) ++ ++/* ++** cmple_1_u64: ++** cmpls p0\.d, p1/z, z0\.d, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_1_u64, svuint64_t, ++ p0 = svcmple_n_u64 (p1, z0, 1), ++ p0 = svcmple (p1, z0, 1)) ++ ++/* ++** cmple_15_u64: ++** cmpls p0\.d, p1/z, z0\.d, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_15_u64, svuint64_t, ++ p0 = svcmple_n_u64 (p1, z0, 15), ++ p0 = svcmple (p1, z0, 15)) ++ ++/* ++** cmple_16_u64: ++** cmpls p0\.d, p1/z, z0\.d, #16 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_16_u64, svuint64_t, ++ p0 = svcmple_n_u64 (p1, z0, 16), ++ p0 = svcmple (p1, z0, 16)) ++ ++/* ++** cmple_127_u64: ++** cmpls p0\.d, p1/z, z0\.d, #127 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_127_u64, svuint64_t, ++ p0 = svcmple_n_u64 (p1, z0, 127), ++ p0 = svcmple (p1, z0, 127)) ++ ++/* ++** cmple_128_u64: ++** mov (z[0-9]+\.d), #128 ++** ( ++** cmphs p0\.d, p1/z, \1, z0\.d ++** | ++** cmpls p0\.d, p1/z, z0\.d, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmple_128_u64, svuint64_t, ++ p0 = svcmple_n_u64 (p1, z0, 128), ++ p0 = svcmple (p1, z0, 128)) ++ ++/* ++** cmple_m1_u64: ++** mov (z[0-9]+)\.b, #-1 ++** ( ++** cmphs p0\.d, p1/z, \1\.d, z0\.d ++** | ++** cmpls p0\.d, p1/z, z0\.d, \1\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmple_m1_u64, svuint64_t, ++ p0 = svcmple_n_u64 (p1, z0, -1), ++ p0 = svcmple (p1, z0, -1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_u8.c +new file mode 100644 +index 000000000..55a8d4f40 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_u8.c +@@ -0,0 +1,116 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmple_u8_tied: ++** ( ++** cmphs p0\.b, p0/z, z1\.b, z0\.b ++** | ++** cmpls p0\.b, p0/z, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmple_u8_tied, svuint8_t, ++ p0 = svcmple_u8 (p0, z0, z1), ++ p0 = svcmple (p0, z0, z1)) ++ ++/* ++** cmple_u8_untied: ++** ( ++** cmphs p0\.b, p1/z, z1\.b, z0\.b ++** | ++** cmpls p0\.b, p1/z, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmple_u8_untied, svuint8_t, ++ p0 = svcmple_u8 (p1, z0, z1), ++ p0 = svcmple (p1, z0, z1)) ++ ++/* ++** cmple_w0_u8: ++** mov (z[0-9]+\.b), w0 ++** ( ++** cmphs p0\.b, p1/z, \1, z0\.b ++** | ++** cmpls p0\.b, p1/z, z0\.b, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_ZX (cmple_w0_u8, svuint8_t, uint8_t, ++ p0 = svcmple_n_u8 (p1, z0, x0), ++ p0 = svcmple (p1, z0, x0)) ++ ++/* ++** cmple_0_u8: ++** cmpls p0\.b, p1/z, z0\.b, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_0_u8, svuint8_t, ++ p0 = svcmple_n_u8 (p1, z0, 0), ++ p0 = svcmple (p1, z0, 0)) ++ ++/* ++** cmple_1_u8: ++** cmpls p0\.b, p1/z, z0\.b, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_1_u8, svuint8_t, ++ p0 = svcmple_n_u8 (p1, z0, 1), ++ p0 = svcmple (p1, z0, 1)) ++ ++/* ++** cmple_15_u8: ++** cmpls p0\.b, p1/z, z0\.b, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_15_u8, svuint8_t, ++ p0 = svcmple_n_u8 (p1, z0, 15), ++ p0 = svcmple (p1, z0, 15)) ++ ++/* ++** cmple_16_u8: ++** cmpls p0\.b, p1/z, z0\.b, #16 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_16_u8, svuint8_t, ++ p0 = svcmple_n_u8 (p1, z0, 16), ++ p0 = svcmple (p1, z0, 16)) ++ ++/* ++** cmple_127_u8: ++** cmpls p0\.b, p1/z, z0\.b, #127 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_127_u8, svuint8_t, ++ p0 = svcmple_n_u8 (p1, z0, 127), ++ p0 = svcmple (p1, z0, 127)) ++ ++/* ++** cmple_128_u8: ++** mov (z[0-9]+\.b), #-128 ++** ( ++** cmphs p0\.b, p1/z, \1, z0\.b ++** | ++** cmpls p0\.b, p1/z, z0\.b, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmple_128_u8, svuint8_t, ++ p0 = svcmple_n_u8 (p1, z0, 128), ++ p0 = svcmple (p1, z0, 128)) ++ ++/* ++** cmple_m1_u8: ++** mov (z[0-9]+\.b), #-1 ++** ( ++** cmphs p0\.b, p1/z, \1, z0\.b ++** | ++** cmpls p0\.b, p1/z, z0\.b, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmple_m1_u8, svuint8_t, ++ p0 = svcmple_n_u8 (p1, z0, -1), ++ p0 = svcmple (p1, z0, -1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_wide_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_wide_s16.c +new file mode 100644 +index 000000000..f1f0b2ed6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_wide_s16.c +@@ -0,0 +1,96 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmple_wide_s16_tied: ++** cmple p0\.h, p0/z, z0\.h, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmple_wide_s16_tied, svint16_t, svint64_t, ++ p0 = svcmple_wide_s16 (p0, z0, z1), ++ p0 = svcmple_wide (p0, z0, z1)) ++ ++/* ++** cmple_wide_s16_untied: ++** cmple p0\.h, p1/z, z0\.h, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmple_wide_s16_untied, svint16_t, svint64_t, ++ p0 = svcmple_wide_s16 (p1, z0, z1), ++ p0 = svcmple_wide (p1, z0, z1)) ++ ++/* ++** cmple_wide_x0_s16: ++** mov (z[0-9]+\.d), x0 ++** cmple p0\.h, p1/z, z0\.h, \1 ++** ret ++*/ ++TEST_COMPARE_ZX (cmple_wide_x0_s16, svint16_t, int64_t, ++ p0 = svcmple_wide_n_s16 (p1, z0, x0), ++ p0 = svcmple_wide (p1, z0, x0)) ++ ++/* ++** cmple_wide_0_s16: ++** cmple p0\.h, p1/z, z0\.h, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_wide_0_s16, svint16_t, ++ p0 = svcmple_wide_n_s16 (p1, z0, 0), ++ p0 = svcmple_wide (p1, z0, 0)) ++ ++/* ++** cmple_wide_1_s16: ++** cmple p0\.h, p1/z, z0\.h, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_wide_1_s16, svint16_t, ++ p0 = svcmple_wide_n_s16 (p1, z0, 1), ++ p0 = svcmple_wide (p1, z0, 1)) ++ ++/* ++** cmple_wide_15_s16: ++** cmple p0\.h, p1/z, z0\.h, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_wide_15_s16, svint16_t, ++ p0 = svcmple_wide_n_s16 (p1, z0, 15), ++ p0 = svcmple_wide (p1, z0, 15)) ++ ++/* ++** cmple_wide_16_s16: ++** mov (z[0-9]+\.d), #16 ++** cmple p0\.h, p1/z, z0\.h, \1 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_wide_16_s16, svint16_t, ++ p0 = svcmple_wide_n_s16 (p1, z0, 16), ++ p0 = svcmple_wide (p1, z0, 16)) ++ ++/* ++** cmple_wide_m1_s16: ++** cmple p0\.h, p1/z, z0\.h, #-1 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_wide_m1_s16, svint16_t, ++ p0 = svcmple_wide_n_s16 (p1, z0, -1), ++ p0 = svcmple_wide (p1, z0, -1)) ++ ++/* ++** cmple_wide_m16_s16: ++** cmple p0\.h, p1/z, z0\.h, #-16 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_wide_m16_s16, svint16_t, ++ p0 = svcmple_wide_n_s16 (p1, z0, -16), ++ p0 = svcmple_wide (p1, z0, -16)) ++ ++/* ++** cmple_wide_m17_s16: ++** mov (z[0-9]+\.d), #-17 ++** cmple p0\.h, p1/z, z0\.h, \1 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_wide_m17_s16, svint16_t, ++ p0 = svcmple_wide_n_s16 (p1, z0, -17), ++ p0 = svcmple_wide (p1, z0, -17)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_wide_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_wide_s32.c +new file mode 100644 +index 000000000..edc5513b6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_wide_s32.c +@@ -0,0 +1,96 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmple_wide_s32_tied: ++** cmple p0\.s, p0/z, z0\.s, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmple_wide_s32_tied, svint32_t, svint64_t, ++ p0 = svcmple_wide_s32 (p0, z0, z1), ++ p0 = svcmple_wide (p0, z0, z1)) ++ ++/* ++** cmple_wide_s32_untied: ++** cmple p0\.s, p1/z, z0\.s, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmple_wide_s32_untied, svint32_t, svint64_t, ++ p0 = svcmple_wide_s32 (p1, z0, z1), ++ p0 = svcmple_wide (p1, z0, z1)) ++ ++/* ++** cmple_wide_x0_s32: ++** mov (z[0-9]+\.d), x0 ++** cmple p0\.s, p1/z, z0\.s, \1 ++** ret ++*/ ++TEST_COMPARE_ZX (cmple_wide_x0_s32, svint32_t, int64_t, ++ p0 = svcmple_wide_n_s32 (p1, z0, x0), ++ p0 = svcmple_wide (p1, z0, x0)) ++ ++/* ++** cmple_wide_0_s32: ++** cmple p0\.s, p1/z, z0\.s, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_wide_0_s32, svint32_t, ++ p0 = svcmple_wide_n_s32 (p1, z0, 0), ++ p0 = svcmple_wide (p1, z0, 0)) ++ ++/* ++** cmple_wide_1_s32: ++** cmple p0\.s, p1/z, z0\.s, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_wide_1_s32, svint32_t, ++ p0 = svcmple_wide_n_s32 (p1, z0, 1), ++ p0 = svcmple_wide (p1, z0, 1)) ++ ++/* ++** cmple_wide_15_s32: ++** cmple p0\.s, p1/z, z0\.s, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_wide_15_s32, svint32_t, ++ p0 = svcmple_wide_n_s32 (p1, z0, 15), ++ p0 = svcmple_wide (p1, z0, 15)) ++ ++/* ++** cmple_wide_16_s32: ++** mov (z[0-9]+\.d), #16 ++** cmple p0\.s, p1/z, z0\.s, \1 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_wide_16_s32, svint32_t, ++ p0 = svcmple_wide_n_s32 (p1, z0, 16), ++ p0 = svcmple_wide (p1, z0, 16)) ++ ++/* ++** cmple_wide_m1_s32: ++** cmple p0\.s, p1/z, z0\.s, #-1 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_wide_m1_s32, svint32_t, ++ p0 = svcmple_wide_n_s32 (p1, z0, -1), ++ p0 = svcmple_wide (p1, z0, -1)) ++ ++/* ++** cmple_wide_m16_s32: ++** cmple p0\.s, p1/z, z0\.s, #-16 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_wide_m16_s32, svint32_t, ++ p0 = svcmple_wide_n_s32 (p1, z0, -16), ++ p0 = svcmple_wide (p1, z0, -16)) ++ ++/* ++** cmple_wide_m17_s32: ++** mov (z[0-9]+\.d), #-17 ++** cmple p0\.s, p1/z, z0\.s, \1 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_wide_m17_s32, svint32_t, ++ p0 = svcmple_wide_n_s32 (p1, z0, -17), ++ p0 = svcmple_wide (p1, z0, -17)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_wide_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_wide_s8.c +new file mode 100644 +index 000000000..984044460 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_wide_s8.c +@@ -0,0 +1,96 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmple_wide_s8_tied: ++** cmple p0\.b, p0/z, z0\.b, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmple_wide_s8_tied, svint8_t, svint64_t, ++ p0 = svcmple_wide_s8 (p0, z0, z1), ++ p0 = svcmple_wide (p0, z0, z1)) ++ ++/* ++** cmple_wide_s8_untied: ++** cmple p0\.b, p1/z, z0\.b, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmple_wide_s8_untied, svint8_t, svint64_t, ++ p0 = svcmple_wide_s8 (p1, z0, z1), ++ p0 = svcmple_wide (p1, z0, z1)) ++ ++/* ++** cmple_wide_x0_s8: ++** mov (z[0-9]+\.d), x0 ++** cmple p0\.b, p1/z, z0\.b, \1 ++** ret ++*/ ++TEST_COMPARE_ZX (cmple_wide_x0_s8, svint8_t, int64_t, ++ p0 = svcmple_wide_n_s8 (p1, z0, x0), ++ p0 = svcmple_wide (p1, z0, x0)) ++ ++/* ++** cmple_wide_0_s8: ++** cmple p0\.b, p1/z, z0\.b, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_wide_0_s8, svint8_t, ++ p0 = svcmple_wide_n_s8 (p1, z0, 0), ++ p0 = svcmple_wide (p1, z0, 0)) ++ ++/* ++** cmple_wide_1_s8: ++** cmple p0\.b, p1/z, z0\.b, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_wide_1_s8, svint8_t, ++ p0 = svcmple_wide_n_s8 (p1, z0, 1), ++ p0 = svcmple_wide (p1, z0, 1)) ++ ++/* ++** cmple_wide_15_s8: ++** cmple p0\.b, p1/z, z0\.b, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_wide_15_s8, svint8_t, ++ p0 = svcmple_wide_n_s8 (p1, z0, 15), ++ p0 = svcmple_wide (p1, z0, 15)) ++ ++/* ++** cmple_wide_16_s8: ++** mov (z[0-9]+\.d), #16 ++** cmple p0\.b, p1/z, z0\.b, \1 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_wide_16_s8, svint8_t, ++ p0 = svcmple_wide_n_s8 (p1, z0, 16), ++ p0 = svcmple_wide (p1, z0, 16)) ++ ++/* ++** cmple_wide_m1_s8: ++** cmple p0\.b, p1/z, z0\.b, #-1 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_wide_m1_s8, svint8_t, ++ p0 = svcmple_wide_n_s8 (p1, z0, -1), ++ p0 = svcmple_wide (p1, z0, -1)) ++ ++/* ++** cmple_wide_m16_s8: ++** cmple p0\.b, p1/z, z0\.b, #-16 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_wide_m16_s8, svint8_t, ++ p0 = svcmple_wide_n_s8 (p1, z0, -16), ++ p0 = svcmple_wide (p1, z0, -16)) ++ ++/* ++** cmple_wide_m17_s8: ++** mov (z[0-9]+\.d), #-17 ++** cmple p0\.b, p1/z, z0\.b, \1 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_wide_m17_s8, svint8_t, ++ p0 = svcmple_wide_n_s8 (p1, z0, -17), ++ p0 = svcmple_wide (p1, z0, -17)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_wide_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_wide_u16.c +new file mode 100644 +index 000000000..a39a1aad5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_wide_u16.c +@@ -0,0 +1,96 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmple_wide_u16_tied: ++** cmpls p0\.h, p0/z, z0\.h, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmple_wide_u16_tied, svuint16_t, svuint64_t, ++ p0 = svcmple_wide_u16 (p0, z0, z1), ++ p0 = svcmple_wide (p0, z0, z1)) ++ ++/* ++** cmple_wide_u16_untied: ++** cmpls p0\.h, p1/z, z0\.h, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmple_wide_u16_untied, svuint16_t, svuint64_t, ++ p0 = svcmple_wide_u16 (p1, z0, z1), ++ p0 = svcmple_wide (p1, z0, z1)) ++ ++/* ++** cmple_wide_x0_u16: ++** mov (z[0-9]+\.d), x0 ++** cmpls p0\.h, p1/z, z0\.h, \1 ++** ret ++*/ ++TEST_COMPARE_ZX (cmple_wide_x0_u16, svuint16_t, uint64_t, ++ p0 = svcmple_wide_n_u16 (p1, z0, x0), ++ p0 = svcmple_wide (p1, z0, x0)) ++ ++/* ++** cmple_wide_0_u16: ++** cmpls p0\.h, p1/z, z0\.h, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_wide_0_u16, svuint16_t, ++ p0 = svcmple_wide_n_u16 (p1, z0, 0), ++ p0 = svcmple_wide (p1, z0, 0)) ++ ++/* ++** cmple_wide_1_u16: ++** cmpls p0\.h, p1/z, z0\.h, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_wide_1_u16, svuint16_t, ++ p0 = svcmple_wide_n_u16 (p1, z0, 1), ++ p0 = svcmple_wide (p1, z0, 1)) ++ ++/* ++** cmple_wide_15_u16: ++** cmpls p0\.h, p1/z, z0\.h, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_wide_15_u16, svuint16_t, ++ p0 = svcmple_wide_n_u16 (p1, z0, 15), ++ p0 = svcmple_wide (p1, z0, 15)) ++ ++/* ++** cmple_wide_16_u16: ++** cmpls p0\.h, p1/z, z0\.h, #16 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_wide_16_u16, svuint16_t, ++ p0 = svcmple_wide_n_u16 (p1, z0, 16), ++ p0 = svcmple_wide (p1, z0, 16)) ++ ++/* ++** cmple_wide_127_u16: ++** cmpls p0\.h, p1/z, z0\.h, #127 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_wide_127_u16, svuint16_t, ++ p0 = svcmple_wide_n_u16 (p1, z0, 127), ++ p0 = svcmple_wide (p1, z0, 127)) ++ ++/* ++** cmple_wide_128_u16: ++** mov (z[0-9]+\.d), #128 ++** cmpls p0\.h, p1/z, z0\.h, \1 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_wide_128_u16, svuint16_t, ++ p0 = svcmple_wide_n_u16 (p1, z0, 128), ++ p0 = svcmple_wide (p1, z0, 128)) ++ ++/* ++** cmple_wide_m1_u16: ++** mov (z[0-9]+)\.b, #-1 ++** cmpls p0\.h, p1/z, z0\.h, \1\.d ++** ret ++*/ ++TEST_COMPARE_Z (cmple_wide_m1_u16, svuint16_t, ++ p0 = svcmple_wide_n_u16 (p1, z0, -1), ++ p0 = svcmple_wide (p1, z0, -1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_wide_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_wide_u32.c +new file mode 100644 +index 000000000..fe682c9e8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_wide_u32.c +@@ -0,0 +1,96 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmple_wide_u32_tied: ++** cmpls p0\.s, p0/z, z0\.s, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmple_wide_u32_tied, svuint32_t, svuint64_t, ++ p0 = svcmple_wide_u32 (p0, z0, z1), ++ p0 = svcmple_wide (p0, z0, z1)) ++ ++/* ++** cmple_wide_u32_untied: ++** cmpls p0\.s, p1/z, z0\.s, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmple_wide_u32_untied, svuint32_t, svuint64_t, ++ p0 = svcmple_wide_u32 (p1, z0, z1), ++ p0 = svcmple_wide (p1, z0, z1)) ++ ++/* ++** cmple_wide_x0_u32: ++** mov (z[0-9]+\.d), x0 ++** cmpls p0\.s, p1/z, z0\.s, \1 ++** ret ++*/ ++TEST_COMPARE_ZX (cmple_wide_x0_u32, svuint32_t, uint64_t, ++ p0 = svcmple_wide_n_u32 (p1, z0, x0), ++ p0 = svcmple_wide (p1, z0, x0)) ++ ++/* ++** cmple_wide_0_u32: ++** cmpls p0\.s, p1/z, z0\.s, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_wide_0_u32, svuint32_t, ++ p0 = svcmple_wide_n_u32 (p1, z0, 0), ++ p0 = svcmple_wide (p1, z0, 0)) ++ ++/* ++** cmple_wide_1_u32: ++** cmpls p0\.s, p1/z, z0\.s, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_wide_1_u32, svuint32_t, ++ p0 = svcmple_wide_n_u32 (p1, z0, 1), ++ p0 = svcmple_wide (p1, z0, 1)) ++ ++/* ++** cmple_wide_15_u32: ++** cmpls p0\.s, p1/z, z0\.s, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_wide_15_u32, svuint32_t, ++ p0 = svcmple_wide_n_u32 (p1, z0, 15), ++ p0 = svcmple_wide (p1, z0, 15)) ++ ++/* ++** cmple_wide_16_u32: ++** cmpls p0\.s, p1/z, z0\.s, #16 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_wide_16_u32, svuint32_t, ++ p0 = svcmple_wide_n_u32 (p1, z0, 16), ++ p0 = svcmple_wide (p1, z0, 16)) ++ ++/* ++** cmple_wide_127_u32: ++** cmpls p0\.s, p1/z, z0\.s, #127 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_wide_127_u32, svuint32_t, ++ p0 = svcmple_wide_n_u32 (p1, z0, 127), ++ p0 = svcmple_wide (p1, z0, 127)) ++ ++/* ++** cmple_wide_128_u32: ++** mov (z[0-9]+\.d), #128 ++** cmpls p0\.s, p1/z, z0\.s, \1 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_wide_128_u32, svuint32_t, ++ p0 = svcmple_wide_n_u32 (p1, z0, 128), ++ p0 = svcmple_wide (p1, z0, 128)) ++ ++/* ++** cmple_wide_m1_u32: ++** mov (z[0-9]+)\.b, #-1 ++** cmpls p0\.s, p1/z, z0\.s, \1\.d ++** ret ++*/ ++TEST_COMPARE_Z (cmple_wide_m1_u32, svuint32_t, ++ p0 = svcmple_wide_n_u32 (p1, z0, -1), ++ p0 = svcmple_wide (p1, z0, -1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_wide_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_wide_u8.c +new file mode 100644 +index 000000000..893dfa627 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_wide_u8.c +@@ -0,0 +1,96 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmple_wide_u8_tied: ++** cmpls p0\.b, p0/z, z0\.b, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmple_wide_u8_tied, svuint8_t, svuint64_t, ++ p0 = svcmple_wide_u8 (p0, z0, z1), ++ p0 = svcmple_wide (p0, z0, z1)) ++ ++/* ++** cmple_wide_u8_untied: ++** cmpls p0\.b, p1/z, z0\.b, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmple_wide_u8_untied, svuint8_t, svuint64_t, ++ p0 = svcmple_wide_u8 (p1, z0, z1), ++ p0 = svcmple_wide (p1, z0, z1)) ++ ++/* ++** cmple_wide_x0_u8: ++** mov (z[0-9]+\.d), x0 ++** cmpls p0\.b, p1/z, z0\.b, \1 ++** ret ++*/ ++TEST_COMPARE_ZX (cmple_wide_x0_u8, svuint8_t, uint64_t, ++ p0 = svcmple_wide_n_u8 (p1, z0, x0), ++ p0 = svcmple_wide (p1, z0, x0)) ++ ++/* ++** cmple_wide_0_u8: ++** cmpls p0\.b, p1/z, z0\.b, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_wide_0_u8, svuint8_t, ++ p0 = svcmple_wide_n_u8 (p1, z0, 0), ++ p0 = svcmple_wide (p1, z0, 0)) ++ ++/* ++** cmple_wide_1_u8: ++** cmpls p0\.b, p1/z, z0\.b, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_wide_1_u8, svuint8_t, ++ p0 = svcmple_wide_n_u8 (p1, z0, 1), ++ p0 = svcmple_wide (p1, z0, 1)) ++ ++/* ++** cmple_wide_15_u8: ++** cmpls p0\.b, p1/z, z0\.b, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_wide_15_u8, svuint8_t, ++ p0 = svcmple_wide_n_u8 (p1, z0, 15), ++ p0 = svcmple_wide (p1, z0, 15)) ++ ++/* ++** cmple_wide_16_u8: ++** cmpls p0\.b, p1/z, z0\.b, #16 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_wide_16_u8, svuint8_t, ++ p0 = svcmple_wide_n_u8 (p1, z0, 16), ++ p0 = svcmple_wide (p1, z0, 16)) ++ ++/* ++** cmple_wide_127_u8: ++** cmpls p0\.b, p1/z, z0\.b, #127 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_wide_127_u8, svuint8_t, ++ p0 = svcmple_wide_n_u8 (p1, z0, 127), ++ p0 = svcmple_wide (p1, z0, 127)) ++ ++/* ++** cmple_wide_128_u8: ++** mov (z[0-9]+\.d), #128 ++** cmpls p0\.b, p1/z, z0\.b, \1 ++** ret ++*/ ++TEST_COMPARE_Z (cmple_wide_128_u8, svuint8_t, ++ p0 = svcmple_wide_n_u8 (p1, z0, 128), ++ p0 = svcmple_wide (p1, z0, 128)) ++ ++/* ++** cmple_wide_m1_u8: ++** mov (z[0-9]+)\.b, #-1 ++** cmpls p0\.b, p1/z, z0\.b, \1\.d ++** ret ++*/ ++TEST_COMPARE_Z (cmple_wide_m1_u8, svuint8_t, ++ p0 = svcmple_wide_n_u8 (p1, z0, -1), ++ p0 = svcmple_wide (p1, z0, -1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_f16.c +new file mode 100644 +index 000000000..598f673a8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_f16.c +@@ -0,0 +1,66 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmplt_f16_tied: ++** ( ++** fcmgt p0\.h, p0/z, z1\.h, z0\.h ++** | ++** fcmlt p0\.h, p0/z, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_f16_tied, svfloat16_t, ++ p0 = svcmplt_f16 (p0, z0, z1), ++ p0 = svcmplt (p0, z0, z1)) ++ ++/* ++** cmplt_f16_untied: ++** ( ++** fcmgt p0\.h, p1/z, z1\.h, z0\.h ++** | ++** fcmlt p0\.h, p1/z, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_f16_untied, svfloat16_t, ++ p0 = svcmplt_f16 (p1, z0, z1), ++ p0 = svcmplt (p1, z0, z1)) ++ ++/* ++** cmplt_h4_f16: ++** mov (z[0-9]+\.h), h4 ++** ( ++** fcmgt p0\.h, p1/z, \1, z0\.h ++** | ++** fcmlt p0\.h, p1/z, z0\.h, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_ZD (cmplt_h4_f16, svfloat16_t, float16_t, ++ p0 = svcmplt_n_f16 (p1, z0, d4), ++ p0 = svcmplt (p1, z0, d4)) ++ ++/* ++** cmplt_0_f16: ++** fcmlt p0\.h, p1/z, z0\.h, #0\.0 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_0_f16, svfloat16_t, ++ p0 = svcmplt_n_f16 (p1, z0, 0), ++ p0 = svcmplt (p1, z0, 0)) ++ ++/* ++** cmplt_1_f16: ++** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? ++** ( ++** fcmgt p0\.h, p1/z, \1, z0\.h ++** | ++** fcmlt p0\.h, p1/z, z0\.h, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_1_f16, svfloat16_t, ++ p0 = svcmplt_n_f16 (p1, z0, 1), ++ p0 = svcmplt (p1, z0, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_f32.c +new file mode 100644 +index 000000000..f9dea3665 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_f32.c +@@ -0,0 +1,66 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmplt_f32_tied: ++** ( ++** fcmgt p0\.s, p0/z, z1\.s, z0\.s ++** | ++** fcmlt p0\.s, p0/z, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_f32_tied, svfloat32_t, ++ p0 = svcmplt_f32 (p0, z0, z1), ++ p0 = svcmplt (p0, z0, z1)) ++ ++/* ++** cmplt_f32_untied: ++** ( ++** fcmgt p0\.s, p1/z, z1\.s, z0\.s ++** | ++** fcmlt p0\.s, p1/z, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_f32_untied, svfloat32_t, ++ p0 = svcmplt_f32 (p1, z0, z1), ++ p0 = svcmplt (p1, z0, z1)) ++ ++/* ++** cmplt_s4_f32: ++** mov (z[0-9]+\.s), s4 ++** ( ++** fcmgt p0\.s, p1/z, \1, z0\.s ++** | ++** fcmlt p0\.s, p1/z, z0\.s, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_ZD (cmplt_s4_f32, svfloat32_t, float32_t, ++ p0 = svcmplt_n_f32 (p1, z0, d4), ++ p0 = svcmplt (p1, z0, d4)) ++ ++/* ++** cmplt_0_f32: ++** fcmlt p0\.s, p1/z, z0\.s, #0\.0 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_0_f32, svfloat32_t, ++ p0 = svcmplt_n_f32 (p1, z0, 0), ++ p0 = svcmplt (p1, z0, 0)) ++ ++/* ++** cmplt_1_f32: ++** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? ++** ( ++** fcmgt p0\.s, p1/z, \1, z0\.s ++** | ++** fcmlt p0\.s, p1/z, z0\.s, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_1_f32, svfloat32_t, ++ p0 = svcmplt_n_f32 (p1, z0, 1), ++ p0 = svcmplt (p1, z0, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_f64.c +new file mode 100644 +index 000000000..6f251db4f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_f64.c +@@ -0,0 +1,66 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmplt_f64_tied: ++** ( ++** fcmgt p0\.d, p0/z, z1\.d, z0\.d ++** | ++** fcmlt p0\.d, p0/z, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_f64_tied, svfloat64_t, ++ p0 = svcmplt_f64 (p0, z0, z1), ++ p0 = svcmplt (p0, z0, z1)) ++ ++/* ++** cmplt_f64_untied: ++** ( ++** fcmgt p0\.d, p1/z, z1\.d, z0\.d ++** | ++** fcmlt p0\.d, p1/z, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_f64_untied, svfloat64_t, ++ p0 = svcmplt_f64 (p1, z0, z1), ++ p0 = svcmplt (p1, z0, z1)) ++ ++/* ++** cmplt_d4_f64: ++** mov (z[0-9]+\.d), d4 ++** ( ++** fcmgt p0\.d, p1/z, \1, z0\.d ++** | ++** fcmlt p0\.d, p1/z, z0\.d, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_ZD (cmplt_d4_f64, svfloat64_t, float64_t, ++ p0 = svcmplt_n_f64 (p1, z0, d4), ++ p0 = svcmplt (p1, z0, d4)) ++ ++/* ++** cmplt_0_f64: ++** fcmlt p0\.d, p1/z, z0\.d, #0\.0 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_0_f64, svfloat64_t, ++ p0 = svcmplt_n_f64 (p1, z0, 0), ++ p0 = svcmplt (p1, z0, 0)) ++ ++/* ++** cmplt_1_f64: ++** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? ++** ( ++** fcmgt p0\.d, p1/z, \1, z0\.d ++** | ++** fcmlt p0\.d, p1/z, z0\.d, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_1_f64, svfloat64_t, ++ p0 = svcmplt_n_f64 (p1, z0, 1), ++ p0 = svcmplt (p1, z0, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_s16.c +new file mode 100644 +index 000000000..1e2bf9dde +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_s16.c +@@ -0,0 +1,116 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmplt_s16_tied: ++** ( ++** cmpgt p0\.h, p0/z, z1\.h, z0\.h ++** | ++** cmplt p0\.h, p0/z, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_s16_tied, svint16_t, ++ p0 = svcmplt_s16 (p0, z0, z1), ++ p0 = svcmplt (p0, z0, z1)) ++ ++/* ++** cmplt_s16_untied: ++** ( ++** cmpgt p0\.h, p1/z, z1\.h, z0\.h ++** | ++** cmplt p0\.h, p1/z, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_s16_untied, svint16_t, ++ p0 = svcmplt_s16 (p1, z0, z1), ++ p0 = svcmplt (p1, z0, z1)) ++ ++/* ++** cmplt_w0_s16: ++** mov (z[0-9]+\.h), w0 ++** ( ++** cmpgt p0\.h, p1/z, \1, z0\.h ++** | ++** cmplt p0\.h, p1/z, z0\.h, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_ZX (cmplt_w0_s16, svint16_t, int16_t, ++ p0 = svcmplt_n_s16 (p1, z0, x0), ++ p0 = svcmplt (p1, z0, x0)) ++ ++/* ++** cmplt_0_s16: ++** cmplt p0\.h, p1/z, z0\.h, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_0_s16, svint16_t, ++ p0 = svcmplt_n_s16 (p1, z0, 0), ++ p0 = svcmplt (p1, z0, 0)) ++ ++/* ++** cmplt_1_s16: ++** cmplt p0\.h, p1/z, z0\.h, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_1_s16, svint16_t, ++ p0 = svcmplt_n_s16 (p1, z0, 1), ++ p0 = svcmplt (p1, z0, 1)) ++ ++/* ++** cmplt_15_s16: ++** cmplt p0\.h, p1/z, z0\.h, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_15_s16, svint16_t, ++ p0 = svcmplt_n_s16 (p1, z0, 15), ++ p0 = svcmplt (p1, z0, 15)) ++ ++/* ++** cmplt_16_s16: ++** mov (z[0-9]+\.h), #16 ++** ( ++** cmpgt p0\.h, p1/z, \1, z0\.h ++** | ++** cmplt p0\.h, p1/z, z0\.h, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_16_s16, svint16_t, ++ p0 = svcmplt_n_s16 (p1, z0, 16), ++ p0 = svcmplt (p1, z0, 16)) ++ ++/* ++** cmplt_m1_s16: ++** cmplt p0\.h, p1/z, z0\.h, #-1 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_m1_s16, svint16_t, ++ p0 = svcmplt_n_s16 (p1, z0, -1), ++ p0 = svcmplt (p1, z0, -1)) ++ ++/* ++** cmplt_m16_s16: ++** cmplt p0\.h, p1/z, z0\.h, #-16 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_m16_s16, svint16_t, ++ p0 = svcmplt_n_s16 (p1, z0, -16), ++ p0 = svcmplt (p1, z0, -16)) ++ ++/* ++** cmplt_m17_s16: ++** mov (z[0-9]+\.h), #-17 ++** ( ++** cmpgt p0\.h, p1/z, \1, z0\.h ++** | ++** cmplt p0\.h, p1/z, z0\.h, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_m17_s16, svint16_t, ++ p0 = svcmplt_n_s16 (p1, z0, -17), ++ p0 = svcmplt (p1, z0, -17)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_s32.c +new file mode 100644 +index 000000000..8e2c02c4d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_s32.c +@@ -0,0 +1,116 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmplt_s32_tied: ++** ( ++** cmpgt p0\.s, p0/z, z1\.s, z0\.s ++** | ++** cmplt p0\.s, p0/z, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_s32_tied, svint32_t, ++ p0 = svcmplt_s32 (p0, z0, z1), ++ p0 = svcmplt (p0, z0, z1)) ++ ++/* ++** cmplt_s32_untied: ++** ( ++** cmpgt p0\.s, p1/z, z1\.s, z0\.s ++** | ++** cmplt p0\.s, p1/z, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_s32_untied, svint32_t, ++ p0 = svcmplt_s32 (p1, z0, z1), ++ p0 = svcmplt (p1, z0, z1)) ++ ++/* ++** cmplt_w0_s32: ++** mov (z[0-9]+\.s), w0 ++** ( ++** cmpgt p0\.s, p1/z, \1, z0\.s ++** | ++** cmplt p0\.s, p1/z, z0\.s, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_ZX (cmplt_w0_s32, svint32_t, int32_t, ++ p0 = svcmplt_n_s32 (p1, z0, x0), ++ p0 = svcmplt (p1, z0, x0)) ++ ++/* ++** cmplt_0_s32: ++** cmplt p0\.s, p1/z, z0\.s, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_0_s32, svint32_t, ++ p0 = svcmplt_n_s32 (p1, z0, 0), ++ p0 = svcmplt (p1, z0, 0)) ++ ++/* ++** cmplt_1_s32: ++** cmplt p0\.s, p1/z, z0\.s, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_1_s32, svint32_t, ++ p0 = svcmplt_n_s32 (p1, z0, 1), ++ p0 = svcmplt (p1, z0, 1)) ++ ++/* ++** cmplt_15_s32: ++** cmplt p0\.s, p1/z, z0\.s, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_15_s32, svint32_t, ++ p0 = svcmplt_n_s32 (p1, z0, 15), ++ p0 = svcmplt (p1, z0, 15)) ++ ++/* ++** cmplt_16_s32: ++** mov (z[0-9]+\.s), #16 ++** ( ++** cmpgt p0\.s, p1/z, \1, z0\.s ++** | ++** cmplt p0\.s, p1/z, z0\.s, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_16_s32, svint32_t, ++ p0 = svcmplt_n_s32 (p1, z0, 16), ++ p0 = svcmplt (p1, z0, 16)) ++ ++/* ++** cmplt_m1_s32: ++** cmplt p0\.s, p1/z, z0\.s, #-1 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_m1_s32, svint32_t, ++ p0 = svcmplt_n_s32 (p1, z0, -1), ++ p0 = svcmplt (p1, z0, -1)) ++ ++/* ++** cmplt_m16_s32: ++** cmplt p0\.s, p1/z, z0\.s, #-16 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_m16_s32, svint32_t, ++ p0 = svcmplt_n_s32 (p1, z0, -16), ++ p0 = svcmplt (p1, z0, -16)) ++ ++/* ++** cmplt_m17_s32: ++** mov (z[0-9]+\.s), #-17 ++** ( ++** cmpgt p0\.s, p1/z, \1, z0\.s ++** | ++** cmplt p0\.s, p1/z, z0\.s, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_m17_s32, svint32_t, ++ p0 = svcmplt_n_s32 (p1, z0, -17), ++ p0 = svcmplt (p1, z0, -17)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_s64.c +new file mode 100644 +index 000000000..818c9fba9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_s64.c +@@ -0,0 +1,116 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmplt_s64_tied: ++** ( ++** cmpgt p0\.d, p0/z, z1\.d, z0\.d ++** | ++** cmplt p0\.d, p0/z, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_s64_tied, svint64_t, ++ p0 = svcmplt_s64 (p0, z0, z1), ++ p0 = svcmplt (p0, z0, z1)) ++ ++/* ++** cmplt_s64_untied: ++** ( ++** cmpgt p0\.d, p1/z, z1\.d, z0\.d ++** | ++** cmplt p0\.d, p1/z, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_s64_untied, svint64_t, ++ p0 = svcmplt_s64 (p1, z0, z1), ++ p0 = svcmplt (p1, z0, z1)) ++ ++/* ++** cmplt_x0_s64: ++** mov (z[0-9]+\.d), x0 ++** ( ++** cmpgt p0\.d, p1/z, \1, z0\.d ++** | ++** cmplt p0\.d, p1/z, z0\.d, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_ZX (cmplt_x0_s64, svint64_t, int64_t, ++ p0 = svcmplt_n_s64 (p1, z0, x0), ++ p0 = svcmplt (p1, z0, x0)) ++ ++/* ++** cmplt_0_s64: ++** cmplt p0\.d, p1/z, z0\.d, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_0_s64, svint64_t, ++ p0 = svcmplt_n_s64 (p1, z0, 0), ++ p0 = svcmplt (p1, z0, 0)) ++ ++/* ++** cmplt_1_s64: ++** cmplt p0\.d, p1/z, z0\.d, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_1_s64, svint64_t, ++ p0 = svcmplt_n_s64 (p1, z0, 1), ++ p0 = svcmplt (p1, z0, 1)) ++ ++/* ++** cmplt_15_s64: ++** cmplt p0\.d, p1/z, z0\.d, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_15_s64, svint64_t, ++ p0 = svcmplt_n_s64 (p1, z0, 15), ++ p0 = svcmplt (p1, z0, 15)) ++ ++/* ++** cmplt_16_s64: ++** mov (z[0-9]+\.d), #16 ++** ( ++** cmpgt p0\.d, p1/z, \1, z0\.d ++** | ++** cmplt p0\.d, p1/z, z0\.d, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_16_s64, svint64_t, ++ p0 = svcmplt_n_s64 (p1, z0, 16), ++ p0 = svcmplt (p1, z0, 16)) ++ ++/* ++** cmplt_m1_s64: ++** cmplt p0\.d, p1/z, z0\.d, #-1 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_m1_s64, svint64_t, ++ p0 = svcmplt_n_s64 (p1, z0, -1), ++ p0 = svcmplt (p1, z0, -1)) ++ ++/* ++** cmplt_m16_s64: ++** cmplt p0\.d, p1/z, z0\.d, #-16 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_m16_s64, svint64_t, ++ p0 = svcmplt_n_s64 (p1, z0, -16), ++ p0 = svcmplt (p1, z0, -16)) ++ ++/* ++** cmplt_m17_s64: ++** mov (z[0-9]+\.d), #-17 ++** ( ++** cmpgt p0\.d, p1/z, \1, z0\.d ++** | ++** cmplt p0\.d, p1/z, z0\.d, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_m17_s64, svint64_t, ++ p0 = svcmplt_n_s64 (p1, z0, -17), ++ p0 = svcmplt (p1, z0, -17)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_s8.c +new file mode 100644 +index 000000000..54b8dc408 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_s8.c +@@ -0,0 +1,116 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmplt_s8_tied: ++** ( ++** cmpgt p0\.b, p0/z, z1\.b, z0\.b ++** | ++** cmplt p0\.b, p0/z, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_s8_tied, svint8_t, ++ p0 = svcmplt_s8 (p0, z0, z1), ++ p0 = svcmplt (p0, z0, z1)) ++ ++/* ++** cmplt_s8_untied: ++** ( ++** cmpgt p0\.b, p1/z, z1\.b, z0\.b ++** | ++** cmplt p0\.b, p1/z, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_s8_untied, svint8_t, ++ p0 = svcmplt_s8 (p1, z0, z1), ++ p0 = svcmplt (p1, z0, z1)) ++ ++/* ++** cmplt_w0_s8: ++** mov (z[0-9]+\.b), w0 ++** ( ++** cmpgt p0\.b, p1/z, \1, z0\.b ++** | ++** cmplt p0\.b, p1/z, z0\.b, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_ZX (cmplt_w0_s8, svint8_t, int8_t, ++ p0 = svcmplt_n_s8 (p1, z0, x0), ++ p0 = svcmplt (p1, z0, x0)) ++ ++/* ++** cmplt_0_s8: ++** cmplt p0\.b, p1/z, z0\.b, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_0_s8, svint8_t, ++ p0 = svcmplt_n_s8 (p1, z0, 0), ++ p0 = svcmplt (p1, z0, 0)) ++ ++/* ++** cmplt_1_s8: ++** cmplt p0\.b, p1/z, z0\.b, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_1_s8, svint8_t, ++ p0 = svcmplt_n_s8 (p1, z0, 1), ++ p0 = svcmplt (p1, z0, 1)) ++ ++/* ++** cmplt_15_s8: ++** cmplt p0\.b, p1/z, z0\.b, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_15_s8, svint8_t, ++ p0 = svcmplt_n_s8 (p1, z0, 15), ++ p0 = svcmplt (p1, z0, 15)) ++ ++/* ++** cmplt_16_s8: ++** mov (z[0-9]+\.b), #16 ++** ( ++** cmpgt p0\.b, p1/z, \1, z0\.b ++** | ++** cmplt p0\.b, p1/z, z0\.b, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_16_s8, svint8_t, ++ p0 = svcmplt_n_s8 (p1, z0, 16), ++ p0 = svcmplt (p1, z0, 16)) ++ ++/* ++** cmplt_m1_s8: ++** cmplt p0\.b, p1/z, z0\.b, #-1 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_m1_s8, svint8_t, ++ p0 = svcmplt_n_s8 (p1, z0, -1), ++ p0 = svcmplt (p1, z0, -1)) ++ ++/* ++** cmplt_m16_s8: ++** cmplt p0\.b, p1/z, z0\.b, #-16 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_m16_s8, svint8_t, ++ p0 = svcmplt_n_s8 (p1, z0, -16), ++ p0 = svcmplt (p1, z0, -16)) ++ ++/* ++** cmplt_m17_s8: ++** mov (z[0-9]+\.b), #-17 ++** ( ++** cmpgt p0\.b, p1/z, \1, z0\.b ++** | ++** cmplt p0\.b, p1/z, z0\.b, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_m17_s8, svint8_t, ++ p0 = svcmplt_n_s8 (p1, z0, -17), ++ p0 = svcmplt (p1, z0, -17)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_u16.c +new file mode 100644 +index 000000000..c0f2a0550 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_u16.c +@@ -0,0 +1,116 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmplt_u16_tied: ++** ( ++** cmphi p0\.h, p0/z, z1\.h, z0\.h ++** | ++** cmplo p0\.h, p0/z, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_u16_tied, svuint16_t, ++ p0 = svcmplt_u16 (p0, z0, z1), ++ p0 = svcmplt (p0, z0, z1)) ++ ++/* ++** cmplt_u16_untied: ++** ( ++** cmphi p0\.h, p1/z, z1\.h, z0\.h ++** | ++** cmplo p0\.h, p1/z, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_u16_untied, svuint16_t, ++ p0 = svcmplt_u16 (p1, z0, z1), ++ p0 = svcmplt (p1, z0, z1)) ++ ++/* ++** cmplt_w0_u16: ++** mov (z[0-9]+\.h), w0 ++** ( ++** cmphi p0\.h, p1/z, \1, z0\.h ++** | ++** cmplo p0\.h, p1/z, z0\.h, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_ZX (cmplt_w0_u16, svuint16_t, uint16_t, ++ p0 = svcmplt_n_u16 (p1, z0, x0), ++ p0 = svcmplt (p1, z0, x0)) ++ ++/* ++** cmplt_0_u16: ++** cmplo p0\.h, p1/z, z0\.h, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_0_u16, svuint16_t, ++ p0 = svcmplt_n_u16 (p1, z0, 0), ++ p0 = svcmplt (p1, z0, 0)) ++ ++/* ++** cmplt_1_u16: ++** cmplo p0\.h, p1/z, z0\.h, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_1_u16, svuint16_t, ++ p0 = svcmplt_n_u16 (p1, z0, 1), ++ p0 = svcmplt (p1, z0, 1)) ++ ++/* ++** cmplt_15_u16: ++** cmplo p0\.h, p1/z, z0\.h, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_15_u16, svuint16_t, ++ p0 = svcmplt_n_u16 (p1, z0, 15), ++ p0 = svcmplt (p1, z0, 15)) ++ ++/* ++** cmplt_16_u16: ++** cmplo p0\.h, p1/z, z0\.h, #16 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_16_u16, svuint16_t, ++ p0 = svcmplt_n_u16 (p1, z0, 16), ++ p0 = svcmplt (p1, z0, 16)) ++ ++/* ++** cmplt_127_u16: ++** cmplo p0\.h, p1/z, z0\.h, #127 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_127_u16, svuint16_t, ++ p0 = svcmplt_n_u16 (p1, z0, 127), ++ p0 = svcmplt (p1, z0, 127)) ++ ++/* ++** cmplt_128_u16: ++** mov (z[0-9]+\.h), #128 ++** ( ++** cmphi p0\.h, p1/z, \1, z0\.h ++** | ++** cmplo p0\.h, p1/z, z0\.h, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_128_u16, svuint16_t, ++ p0 = svcmplt_n_u16 (p1, z0, 128), ++ p0 = svcmplt (p1, z0, 128)) ++ ++/* ++** cmplt_m1_u16: ++** mov (z[0-9]+)\.b, #-1 ++** ( ++** cmphi p0\.h, p1/z, \1\.h, z0\.h ++** | ++** cmplo p0\.h, p1/z, z0\.h, \1\.h ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_m1_u16, svuint16_t, ++ p0 = svcmplt_n_u16 (p1, z0, -1), ++ p0 = svcmplt (p1, z0, -1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_u32.c +new file mode 100644 +index 000000000..3bb0b1464 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_u32.c +@@ -0,0 +1,116 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmplt_u32_tied: ++** ( ++** cmphi p0\.s, p0/z, z1\.s, z0\.s ++** | ++** cmplo p0\.s, p0/z, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_u32_tied, svuint32_t, ++ p0 = svcmplt_u32 (p0, z0, z1), ++ p0 = svcmplt (p0, z0, z1)) ++ ++/* ++** cmplt_u32_untied: ++** ( ++** cmphi p0\.s, p1/z, z1\.s, z0\.s ++** | ++** cmplo p0\.s, p1/z, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_u32_untied, svuint32_t, ++ p0 = svcmplt_u32 (p1, z0, z1), ++ p0 = svcmplt (p1, z0, z1)) ++ ++/* ++** cmplt_w0_u32: ++** mov (z[0-9]+\.s), w0 ++** ( ++** cmphi p0\.s, p1/z, \1, z0\.s ++** | ++** cmplo p0\.s, p1/z, z0\.s, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_ZX (cmplt_w0_u32, svuint32_t, uint32_t, ++ p0 = svcmplt_n_u32 (p1, z0, x0), ++ p0 = svcmplt (p1, z0, x0)) ++ ++/* ++** cmplt_0_u32: ++** cmplo p0\.s, p1/z, z0\.s, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_0_u32, svuint32_t, ++ p0 = svcmplt_n_u32 (p1, z0, 0), ++ p0 = svcmplt (p1, z0, 0)) ++ ++/* ++** cmplt_1_u32: ++** cmplo p0\.s, p1/z, z0\.s, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_1_u32, svuint32_t, ++ p0 = svcmplt_n_u32 (p1, z0, 1), ++ p0 = svcmplt (p1, z0, 1)) ++ ++/* ++** cmplt_15_u32: ++** cmplo p0\.s, p1/z, z0\.s, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_15_u32, svuint32_t, ++ p0 = svcmplt_n_u32 (p1, z0, 15), ++ p0 = svcmplt (p1, z0, 15)) ++ ++/* ++** cmplt_16_u32: ++** cmplo p0\.s, p1/z, z0\.s, #16 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_16_u32, svuint32_t, ++ p0 = svcmplt_n_u32 (p1, z0, 16), ++ p0 = svcmplt (p1, z0, 16)) ++ ++/* ++** cmplt_127_u32: ++** cmplo p0\.s, p1/z, z0\.s, #127 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_127_u32, svuint32_t, ++ p0 = svcmplt_n_u32 (p1, z0, 127), ++ p0 = svcmplt (p1, z0, 127)) ++ ++/* ++** cmplt_128_u32: ++** mov (z[0-9]+\.s), #128 ++** ( ++** cmphi p0\.s, p1/z, \1, z0\.s ++** | ++** cmplo p0\.s, p1/z, z0\.s, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_128_u32, svuint32_t, ++ p0 = svcmplt_n_u32 (p1, z0, 128), ++ p0 = svcmplt (p1, z0, 128)) ++ ++/* ++** cmplt_m1_u32: ++** mov (z[0-9]+)\.b, #-1 ++** ( ++** cmphi p0\.s, p1/z, \1\.s, z0\.s ++** | ++** cmplo p0\.s, p1/z, z0\.s, \1\.s ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_m1_u32, svuint32_t, ++ p0 = svcmplt_n_u32 (p1, z0, -1), ++ p0 = svcmplt (p1, z0, -1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_u64.c +new file mode 100644 +index 000000000..d9de5add2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_u64.c +@@ -0,0 +1,116 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmplt_u64_tied: ++** ( ++** cmphi p0\.d, p0/z, z1\.d, z0\.d ++** | ++** cmplo p0\.d, p0/z, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_u64_tied, svuint64_t, ++ p0 = svcmplt_u64 (p0, z0, z1), ++ p0 = svcmplt (p0, z0, z1)) ++ ++/* ++** cmplt_u64_untied: ++** ( ++** cmphi p0\.d, p1/z, z1\.d, z0\.d ++** | ++** cmplo p0\.d, p1/z, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_u64_untied, svuint64_t, ++ p0 = svcmplt_u64 (p1, z0, z1), ++ p0 = svcmplt (p1, z0, z1)) ++ ++/* ++** cmplt_x0_u64: ++** mov (z[0-9]+\.d), x0 ++** ( ++** cmphi p0\.d, p1/z, \1, z0\.d ++** | ++** cmplo p0\.d, p1/z, z0\.d, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_ZX (cmplt_x0_u64, svuint64_t, uint64_t, ++ p0 = svcmplt_n_u64 (p1, z0, x0), ++ p0 = svcmplt (p1, z0, x0)) ++ ++/* ++** cmplt_0_u64: ++** cmplo p0\.d, p1/z, z0\.d, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_0_u64, svuint64_t, ++ p0 = svcmplt_n_u64 (p1, z0, 0), ++ p0 = svcmplt (p1, z0, 0)) ++ ++/* ++** cmplt_1_u64: ++** cmplo p0\.d, p1/z, z0\.d, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_1_u64, svuint64_t, ++ p0 = svcmplt_n_u64 (p1, z0, 1), ++ p0 = svcmplt (p1, z0, 1)) ++ ++/* ++** cmplt_15_u64: ++** cmplo p0\.d, p1/z, z0\.d, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_15_u64, svuint64_t, ++ p0 = svcmplt_n_u64 (p1, z0, 15), ++ p0 = svcmplt (p1, z0, 15)) ++ ++/* ++** cmplt_16_u64: ++** cmplo p0\.d, p1/z, z0\.d, #16 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_16_u64, svuint64_t, ++ p0 = svcmplt_n_u64 (p1, z0, 16), ++ p0 = svcmplt (p1, z0, 16)) ++ ++/* ++** cmplt_127_u64: ++** cmplo p0\.d, p1/z, z0\.d, #127 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_127_u64, svuint64_t, ++ p0 = svcmplt_n_u64 (p1, z0, 127), ++ p0 = svcmplt (p1, z0, 127)) ++ ++/* ++** cmplt_128_u64: ++** mov (z[0-9]+\.d), #128 ++** ( ++** cmphi p0\.d, p1/z, \1, z0\.d ++** | ++** cmplo p0\.d, p1/z, z0\.d, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_128_u64, svuint64_t, ++ p0 = svcmplt_n_u64 (p1, z0, 128), ++ p0 = svcmplt (p1, z0, 128)) ++ ++/* ++** cmplt_m1_u64: ++** mov (z[0-9]+)\.b, #-1 ++** ( ++** cmphi p0\.d, p1/z, \1\.d, z0\.d ++** | ++** cmplo p0\.d, p1/z, z0\.d, \1\.d ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_m1_u64, svuint64_t, ++ p0 = svcmplt_n_u64 (p1, z0, -1), ++ p0 = svcmplt (p1, z0, -1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_u8.c +new file mode 100644 +index 000000000..42d5ad868 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_u8.c +@@ -0,0 +1,116 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmplt_u8_tied: ++** ( ++** cmphi p0\.b, p0/z, z1\.b, z0\.b ++** | ++** cmplo p0\.b, p0/z, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_u8_tied, svuint8_t, ++ p0 = svcmplt_u8 (p0, z0, z1), ++ p0 = svcmplt (p0, z0, z1)) ++ ++/* ++** cmplt_u8_untied: ++** ( ++** cmphi p0\.b, p1/z, z1\.b, z0\.b ++** | ++** cmplo p0\.b, p1/z, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_u8_untied, svuint8_t, ++ p0 = svcmplt_u8 (p1, z0, z1), ++ p0 = svcmplt (p1, z0, z1)) ++ ++/* ++** cmplt_w0_u8: ++** mov (z[0-9]+\.b), w0 ++** ( ++** cmphi p0\.b, p1/z, \1, z0\.b ++** | ++** cmplo p0\.b, p1/z, z0\.b, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_ZX (cmplt_w0_u8, svuint8_t, uint8_t, ++ p0 = svcmplt_n_u8 (p1, z0, x0), ++ p0 = svcmplt (p1, z0, x0)) ++ ++/* ++** cmplt_0_u8: ++** cmplo p0\.b, p1/z, z0\.b, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_0_u8, svuint8_t, ++ p0 = svcmplt_n_u8 (p1, z0, 0), ++ p0 = svcmplt (p1, z0, 0)) ++ ++/* ++** cmplt_1_u8: ++** cmplo p0\.b, p1/z, z0\.b, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_1_u8, svuint8_t, ++ p0 = svcmplt_n_u8 (p1, z0, 1), ++ p0 = svcmplt (p1, z0, 1)) ++ ++/* ++** cmplt_15_u8: ++** cmplo p0\.b, p1/z, z0\.b, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_15_u8, svuint8_t, ++ p0 = svcmplt_n_u8 (p1, z0, 15), ++ p0 = svcmplt (p1, z0, 15)) ++ ++/* ++** cmplt_16_u8: ++** cmplo p0\.b, p1/z, z0\.b, #16 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_16_u8, svuint8_t, ++ p0 = svcmplt_n_u8 (p1, z0, 16), ++ p0 = svcmplt (p1, z0, 16)) ++ ++/* ++** cmplt_127_u8: ++** cmplo p0\.b, p1/z, z0\.b, #127 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_127_u8, svuint8_t, ++ p0 = svcmplt_n_u8 (p1, z0, 127), ++ p0 = svcmplt (p1, z0, 127)) ++ ++/* ++** cmplt_128_u8: ++** mov (z[0-9]+\.b), #-128 ++** ( ++** cmphi p0\.b, p1/z, \1, z0\.b ++** | ++** cmplo p0\.b, p1/z, z0\.b, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_128_u8, svuint8_t, ++ p0 = svcmplt_n_u8 (p1, z0, 128), ++ p0 = svcmplt (p1, z0, 128)) ++ ++/* ++** cmplt_m1_u8: ++** mov (z[0-9]+\.b), #-1 ++** ( ++** cmphi p0\.b, p1/z, \1, z0\.b ++** | ++** cmplo p0\.b, p1/z, z0\.b, \1 ++** ) ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_m1_u8, svuint8_t, ++ p0 = svcmplt_n_u8 (p1, z0, -1), ++ p0 = svcmplt (p1, z0, -1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_wide_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_wide_s16.c +new file mode 100644 +index 000000000..a3c8942ba +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_wide_s16.c +@@ -0,0 +1,96 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmplt_wide_s16_tied: ++** cmplt p0\.h, p0/z, z0\.h, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmplt_wide_s16_tied, svint16_t, svint64_t, ++ p0 = svcmplt_wide_s16 (p0, z0, z1), ++ p0 = svcmplt_wide (p0, z0, z1)) ++ ++/* ++** cmplt_wide_s16_untied: ++** cmplt p0\.h, p1/z, z0\.h, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmplt_wide_s16_untied, svint16_t, svint64_t, ++ p0 = svcmplt_wide_s16 (p1, z0, z1), ++ p0 = svcmplt_wide (p1, z0, z1)) ++ ++/* ++** cmplt_wide_x0_s16: ++** mov (z[0-9]+\.d), x0 ++** cmplt p0\.h, p1/z, z0\.h, \1 ++** ret ++*/ ++TEST_COMPARE_ZX (cmplt_wide_x0_s16, svint16_t, int64_t, ++ p0 = svcmplt_wide_n_s16 (p1, z0, x0), ++ p0 = svcmplt_wide (p1, z0, x0)) ++ ++/* ++** cmplt_wide_0_s16: ++** cmplt p0\.h, p1/z, z0\.h, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_wide_0_s16, svint16_t, ++ p0 = svcmplt_wide_n_s16 (p1, z0, 0), ++ p0 = svcmplt_wide (p1, z0, 0)) ++ ++/* ++** cmplt_wide_1_s16: ++** cmplt p0\.h, p1/z, z0\.h, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_wide_1_s16, svint16_t, ++ p0 = svcmplt_wide_n_s16 (p1, z0, 1), ++ p0 = svcmplt_wide (p1, z0, 1)) ++ ++/* ++** cmplt_wide_15_s16: ++** cmplt p0\.h, p1/z, z0\.h, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_wide_15_s16, svint16_t, ++ p0 = svcmplt_wide_n_s16 (p1, z0, 15), ++ p0 = svcmplt_wide (p1, z0, 15)) ++ ++/* ++** cmplt_wide_16_s16: ++** mov (z[0-9]+\.d), #16 ++** cmplt p0\.h, p1/z, z0\.h, \1 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_wide_16_s16, svint16_t, ++ p0 = svcmplt_wide_n_s16 (p1, z0, 16), ++ p0 = svcmplt_wide (p1, z0, 16)) ++ ++/* ++** cmplt_wide_m1_s16: ++** cmplt p0\.h, p1/z, z0\.h, #-1 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_wide_m1_s16, svint16_t, ++ p0 = svcmplt_wide_n_s16 (p1, z0, -1), ++ p0 = svcmplt_wide (p1, z0, -1)) ++ ++/* ++** cmplt_wide_m16_s16: ++** cmplt p0\.h, p1/z, z0\.h, #-16 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_wide_m16_s16, svint16_t, ++ p0 = svcmplt_wide_n_s16 (p1, z0, -16), ++ p0 = svcmplt_wide (p1, z0, -16)) ++ ++/* ++** cmplt_wide_m17_s16: ++** mov (z[0-9]+\.d), #-17 ++** cmplt p0\.h, p1/z, z0\.h, \1 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_wide_m17_s16, svint16_t, ++ p0 = svcmplt_wide_n_s16 (p1, z0, -17), ++ p0 = svcmplt_wide (p1, z0, -17)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_wide_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_wide_s32.c +new file mode 100644 +index 000000000..b2cad6773 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_wide_s32.c +@@ -0,0 +1,96 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmplt_wide_s32_tied: ++** cmplt p0\.s, p0/z, z0\.s, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmplt_wide_s32_tied, svint32_t, svint64_t, ++ p0 = svcmplt_wide_s32 (p0, z0, z1), ++ p0 = svcmplt_wide (p0, z0, z1)) ++ ++/* ++** cmplt_wide_s32_untied: ++** cmplt p0\.s, p1/z, z0\.s, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmplt_wide_s32_untied, svint32_t, svint64_t, ++ p0 = svcmplt_wide_s32 (p1, z0, z1), ++ p0 = svcmplt_wide (p1, z0, z1)) ++ ++/* ++** cmplt_wide_x0_s32: ++** mov (z[0-9]+\.d), x0 ++** cmplt p0\.s, p1/z, z0\.s, \1 ++** ret ++*/ ++TEST_COMPARE_ZX (cmplt_wide_x0_s32, svint32_t, int64_t, ++ p0 = svcmplt_wide_n_s32 (p1, z0, x0), ++ p0 = svcmplt_wide (p1, z0, x0)) ++ ++/* ++** cmplt_wide_0_s32: ++** cmplt p0\.s, p1/z, z0\.s, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_wide_0_s32, svint32_t, ++ p0 = svcmplt_wide_n_s32 (p1, z0, 0), ++ p0 = svcmplt_wide (p1, z0, 0)) ++ ++/* ++** cmplt_wide_1_s32: ++** cmplt p0\.s, p1/z, z0\.s, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_wide_1_s32, svint32_t, ++ p0 = svcmplt_wide_n_s32 (p1, z0, 1), ++ p0 = svcmplt_wide (p1, z0, 1)) ++ ++/* ++** cmplt_wide_15_s32: ++** cmplt p0\.s, p1/z, z0\.s, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_wide_15_s32, svint32_t, ++ p0 = svcmplt_wide_n_s32 (p1, z0, 15), ++ p0 = svcmplt_wide (p1, z0, 15)) ++ ++/* ++** cmplt_wide_16_s32: ++** mov (z[0-9]+\.d), #16 ++** cmplt p0\.s, p1/z, z0\.s, \1 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_wide_16_s32, svint32_t, ++ p0 = svcmplt_wide_n_s32 (p1, z0, 16), ++ p0 = svcmplt_wide (p1, z0, 16)) ++ ++/* ++** cmplt_wide_m1_s32: ++** cmplt p0\.s, p1/z, z0\.s, #-1 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_wide_m1_s32, svint32_t, ++ p0 = svcmplt_wide_n_s32 (p1, z0, -1), ++ p0 = svcmplt_wide (p1, z0, -1)) ++ ++/* ++** cmplt_wide_m16_s32: ++** cmplt p0\.s, p1/z, z0\.s, #-16 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_wide_m16_s32, svint32_t, ++ p0 = svcmplt_wide_n_s32 (p1, z0, -16), ++ p0 = svcmplt_wide (p1, z0, -16)) ++ ++/* ++** cmplt_wide_m17_s32: ++** mov (z[0-9]+\.d), #-17 ++** cmplt p0\.s, p1/z, z0\.s, \1 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_wide_m17_s32, svint32_t, ++ p0 = svcmplt_wide_n_s32 (p1, z0, -17), ++ p0 = svcmplt_wide (p1, z0, -17)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_wide_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_wide_s8.c +new file mode 100644 +index 000000000..1015fe309 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_wide_s8.c +@@ -0,0 +1,96 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmplt_wide_s8_tied: ++** cmplt p0\.b, p0/z, z0\.b, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmplt_wide_s8_tied, svint8_t, svint64_t, ++ p0 = svcmplt_wide_s8 (p0, z0, z1), ++ p0 = svcmplt_wide (p0, z0, z1)) ++ ++/* ++** cmplt_wide_s8_untied: ++** cmplt p0\.b, p1/z, z0\.b, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmplt_wide_s8_untied, svint8_t, svint64_t, ++ p0 = svcmplt_wide_s8 (p1, z0, z1), ++ p0 = svcmplt_wide (p1, z0, z1)) ++ ++/* ++** cmplt_wide_x0_s8: ++** mov (z[0-9]+\.d), x0 ++** cmplt p0\.b, p1/z, z0\.b, \1 ++** ret ++*/ ++TEST_COMPARE_ZX (cmplt_wide_x0_s8, svint8_t, int64_t, ++ p0 = svcmplt_wide_n_s8 (p1, z0, x0), ++ p0 = svcmplt_wide (p1, z0, x0)) ++ ++/* ++** cmplt_wide_0_s8: ++** cmplt p0\.b, p1/z, z0\.b, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_wide_0_s8, svint8_t, ++ p0 = svcmplt_wide_n_s8 (p1, z0, 0), ++ p0 = svcmplt_wide (p1, z0, 0)) ++ ++/* ++** cmplt_wide_1_s8: ++** cmplt p0\.b, p1/z, z0\.b, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_wide_1_s8, svint8_t, ++ p0 = svcmplt_wide_n_s8 (p1, z0, 1), ++ p0 = svcmplt_wide (p1, z0, 1)) ++ ++/* ++** cmplt_wide_15_s8: ++** cmplt p0\.b, p1/z, z0\.b, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_wide_15_s8, svint8_t, ++ p0 = svcmplt_wide_n_s8 (p1, z0, 15), ++ p0 = svcmplt_wide (p1, z0, 15)) ++ ++/* ++** cmplt_wide_16_s8: ++** mov (z[0-9]+\.d), #16 ++** cmplt p0\.b, p1/z, z0\.b, \1 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_wide_16_s8, svint8_t, ++ p0 = svcmplt_wide_n_s8 (p1, z0, 16), ++ p0 = svcmplt_wide (p1, z0, 16)) ++ ++/* ++** cmplt_wide_m1_s8: ++** cmplt p0\.b, p1/z, z0\.b, #-1 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_wide_m1_s8, svint8_t, ++ p0 = svcmplt_wide_n_s8 (p1, z0, -1), ++ p0 = svcmplt_wide (p1, z0, -1)) ++ ++/* ++** cmplt_wide_m16_s8: ++** cmplt p0\.b, p1/z, z0\.b, #-16 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_wide_m16_s8, svint8_t, ++ p0 = svcmplt_wide_n_s8 (p1, z0, -16), ++ p0 = svcmplt_wide (p1, z0, -16)) ++ ++/* ++** cmplt_wide_m17_s8: ++** mov (z[0-9]+\.d), #-17 ++** cmplt p0\.b, p1/z, z0\.b, \1 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_wide_m17_s8, svint8_t, ++ p0 = svcmplt_wide_n_s8 (p1, z0, -17), ++ p0 = svcmplt_wide (p1, z0, -17)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_wide_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_wide_u16.c +new file mode 100644 +index 000000000..851400d36 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_wide_u16.c +@@ -0,0 +1,96 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmplt_wide_u16_tied: ++** cmplo p0\.h, p0/z, z0\.h, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmplt_wide_u16_tied, svuint16_t, svuint64_t, ++ p0 = svcmplt_wide_u16 (p0, z0, z1), ++ p0 = svcmplt_wide (p0, z0, z1)) ++ ++/* ++** cmplt_wide_u16_untied: ++** cmplo p0\.h, p1/z, z0\.h, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmplt_wide_u16_untied, svuint16_t, svuint64_t, ++ p0 = svcmplt_wide_u16 (p1, z0, z1), ++ p0 = svcmplt_wide (p1, z0, z1)) ++ ++/* ++** cmplt_wide_x0_u16: ++** mov (z[0-9]+\.d), x0 ++** cmplo p0\.h, p1/z, z0\.h, \1 ++** ret ++*/ ++TEST_COMPARE_ZX (cmplt_wide_x0_u16, svuint16_t, uint64_t, ++ p0 = svcmplt_wide_n_u16 (p1, z0, x0), ++ p0 = svcmplt_wide (p1, z0, x0)) ++ ++/* ++** cmplt_wide_0_u16: ++** cmplo p0\.h, p1/z, z0\.h, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_wide_0_u16, svuint16_t, ++ p0 = svcmplt_wide_n_u16 (p1, z0, 0), ++ p0 = svcmplt_wide (p1, z0, 0)) ++ ++/* ++** cmplt_wide_1_u16: ++** cmplo p0\.h, p1/z, z0\.h, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_wide_1_u16, svuint16_t, ++ p0 = svcmplt_wide_n_u16 (p1, z0, 1), ++ p0 = svcmplt_wide (p1, z0, 1)) ++ ++/* ++** cmplt_wide_15_u16: ++** cmplo p0\.h, p1/z, z0\.h, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_wide_15_u16, svuint16_t, ++ p0 = svcmplt_wide_n_u16 (p1, z0, 15), ++ p0 = svcmplt_wide (p1, z0, 15)) ++ ++/* ++** cmplt_wide_16_u16: ++** cmplo p0\.h, p1/z, z0\.h, #16 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_wide_16_u16, svuint16_t, ++ p0 = svcmplt_wide_n_u16 (p1, z0, 16), ++ p0 = svcmplt_wide (p1, z0, 16)) ++ ++/* ++** cmplt_wide_127_u16: ++** cmplo p0\.h, p1/z, z0\.h, #127 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_wide_127_u16, svuint16_t, ++ p0 = svcmplt_wide_n_u16 (p1, z0, 127), ++ p0 = svcmplt_wide (p1, z0, 127)) ++ ++/* ++** cmplt_wide_128_u16: ++** mov (z[0-9]+\.d), #128 ++** cmplo p0\.h, p1/z, z0\.h, \1 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_wide_128_u16, svuint16_t, ++ p0 = svcmplt_wide_n_u16 (p1, z0, 128), ++ p0 = svcmplt_wide (p1, z0, 128)) ++ ++/* ++** cmplt_wide_m1_u16: ++** mov (z[0-9]+)\.b, #-1 ++** cmplo p0\.h, p1/z, z0\.h, \1\.d ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_wide_m1_u16, svuint16_t, ++ p0 = svcmplt_wide_n_u16 (p1, z0, -1), ++ p0 = svcmplt_wide (p1, z0, -1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_wide_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_wide_u32.c +new file mode 100644 +index 000000000..1f9652def +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_wide_u32.c +@@ -0,0 +1,96 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmplt_wide_u32_tied: ++** cmplo p0\.s, p0/z, z0\.s, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmplt_wide_u32_tied, svuint32_t, svuint64_t, ++ p0 = svcmplt_wide_u32 (p0, z0, z1), ++ p0 = svcmplt_wide (p0, z0, z1)) ++ ++/* ++** cmplt_wide_u32_untied: ++** cmplo p0\.s, p1/z, z0\.s, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmplt_wide_u32_untied, svuint32_t, svuint64_t, ++ p0 = svcmplt_wide_u32 (p1, z0, z1), ++ p0 = svcmplt_wide (p1, z0, z1)) ++ ++/* ++** cmplt_wide_x0_u32: ++** mov (z[0-9]+\.d), x0 ++** cmplo p0\.s, p1/z, z0\.s, \1 ++** ret ++*/ ++TEST_COMPARE_ZX (cmplt_wide_x0_u32, svuint32_t, uint64_t, ++ p0 = svcmplt_wide_n_u32 (p1, z0, x0), ++ p0 = svcmplt_wide (p1, z0, x0)) ++ ++/* ++** cmplt_wide_0_u32: ++** cmplo p0\.s, p1/z, z0\.s, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_wide_0_u32, svuint32_t, ++ p0 = svcmplt_wide_n_u32 (p1, z0, 0), ++ p0 = svcmplt_wide (p1, z0, 0)) ++ ++/* ++** cmplt_wide_1_u32: ++** cmplo p0\.s, p1/z, z0\.s, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_wide_1_u32, svuint32_t, ++ p0 = svcmplt_wide_n_u32 (p1, z0, 1), ++ p0 = svcmplt_wide (p1, z0, 1)) ++ ++/* ++** cmplt_wide_15_u32: ++** cmplo p0\.s, p1/z, z0\.s, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_wide_15_u32, svuint32_t, ++ p0 = svcmplt_wide_n_u32 (p1, z0, 15), ++ p0 = svcmplt_wide (p1, z0, 15)) ++ ++/* ++** cmplt_wide_16_u32: ++** cmplo p0\.s, p1/z, z0\.s, #16 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_wide_16_u32, svuint32_t, ++ p0 = svcmplt_wide_n_u32 (p1, z0, 16), ++ p0 = svcmplt_wide (p1, z0, 16)) ++ ++/* ++** cmplt_wide_127_u32: ++** cmplo p0\.s, p1/z, z0\.s, #127 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_wide_127_u32, svuint32_t, ++ p0 = svcmplt_wide_n_u32 (p1, z0, 127), ++ p0 = svcmplt_wide (p1, z0, 127)) ++ ++/* ++** cmplt_wide_128_u32: ++** mov (z[0-9]+\.d), #128 ++** cmplo p0\.s, p1/z, z0\.s, \1 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_wide_128_u32, svuint32_t, ++ p0 = svcmplt_wide_n_u32 (p1, z0, 128), ++ p0 = svcmplt_wide (p1, z0, 128)) ++ ++/* ++** cmplt_wide_m1_u32: ++** mov (z[0-9]+)\.b, #-1 ++** cmplo p0\.s, p1/z, z0\.s, \1\.d ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_wide_m1_u32, svuint32_t, ++ p0 = svcmplt_wide_n_u32 (p1, z0, -1), ++ p0 = svcmplt_wide (p1, z0, -1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_wide_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_wide_u8.c +new file mode 100644 +index 000000000..95ef3cf16 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_wide_u8.c +@@ -0,0 +1,96 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmplt_wide_u8_tied: ++** cmplo p0\.b, p0/z, z0\.b, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmplt_wide_u8_tied, svuint8_t, svuint64_t, ++ p0 = svcmplt_wide_u8 (p0, z0, z1), ++ p0 = svcmplt_wide (p0, z0, z1)) ++ ++/* ++** cmplt_wide_u8_untied: ++** cmplo p0\.b, p1/z, z0\.b, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmplt_wide_u8_untied, svuint8_t, svuint64_t, ++ p0 = svcmplt_wide_u8 (p1, z0, z1), ++ p0 = svcmplt_wide (p1, z0, z1)) ++ ++/* ++** cmplt_wide_x0_u8: ++** mov (z[0-9]+\.d), x0 ++** cmplo p0\.b, p1/z, z0\.b, \1 ++** ret ++*/ ++TEST_COMPARE_ZX (cmplt_wide_x0_u8, svuint8_t, uint64_t, ++ p0 = svcmplt_wide_n_u8 (p1, z0, x0), ++ p0 = svcmplt_wide (p1, z0, x0)) ++ ++/* ++** cmplt_wide_0_u8: ++** cmplo p0\.b, p1/z, z0\.b, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_wide_0_u8, svuint8_t, ++ p0 = svcmplt_wide_n_u8 (p1, z0, 0), ++ p0 = svcmplt_wide (p1, z0, 0)) ++ ++/* ++** cmplt_wide_1_u8: ++** cmplo p0\.b, p1/z, z0\.b, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_wide_1_u8, svuint8_t, ++ p0 = svcmplt_wide_n_u8 (p1, z0, 1), ++ p0 = svcmplt_wide (p1, z0, 1)) ++ ++/* ++** cmplt_wide_15_u8: ++** cmplo p0\.b, p1/z, z0\.b, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_wide_15_u8, svuint8_t, ++ p0 = svcmplt_wide_n_u8 (p1, z0, 15), ++ p0 = svcmplt_wide (p1, z0, 15)) ++ ++/* ++** cmplt_wide_16_u8: ++** cmplo p0\.b, p1/z, z0\.b, #16 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_wide_16_u8, svuint8_t, ++ p0 = svcmplt_wide_n_u8 (p1, z0, 16), ++ p0 = svcmplt_wide (p1, z0, 16)) ++ ++/* ++** cmplt_wide_127_u8: ++** cmplo p0\.b, p1/z, z0\.b, #127 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_wide_127_u8, svuint8_t, ++ p0 = svcmplt_wide_n_u8 (p1, z0, 127), ++ p0 = svcmplt_wide (p1, z0, 127)) ++ ++/* ++** cmplt_wide_128_u8: ++** mov (z[0-9]+\.d), #128 ++** cmplo p0\.b, p1/z, z0\.b, \1 ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_wide_128_u8, svuint8_t, ++ p0 = svcmplt_wide_n_u8 (p1, z0, 128), ++ p0 = svcmplt_wide (p1, z0, 128)) ++ ++/* ++** cmplt_wide_m1_u8: ++** mov (z[0-9]+)\.b, #-1 ++** cmplo p0\.b, p1/z, z0\.b, \1\.d ++** ret ++*/ ++TEST_COMPARE_Z (cmplt_wide_m1_u8, svuint8_t, ++ p0 = svcmplt_wide_n_u8 (p1, z0, -1), ++ p0 = svcmplt_wide (p1, z0, -1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_f16.c +new file mode 100644 +index 000000000..63e203b09 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_f16.c +@@ -0,0 +1,50 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpne_f16_tied: ++** fcmne p0\.h, p0/z, (z0\.h, z1\.h|z1\.h, z0\.h) ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_f16_tied, svfloat16_t, ++ p0 = svcmpne_f16 (p0, z0, z1), ++ p0 = svcmpne (p0, z0, z1)) ++ ++/* ++** cmpne_f16_untied: ++** fcmne p0\.h, p1/z, (z0\.h, z1\.h|z1\.h, z0\.h) ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_f16_untied, svfloat16_t, ++ p0 = svcmpne_f16 (p1, z0, z1), ++ p0 = svcmpne (p1, z0, z1)) ++ ++/* ++** cmpne_h4_f16: ++** mov (z[0-9]+\.h), h4 ++** fcmne p0\.h, p1/z, (z0\.h, \1|\1, z0\.h) ++** ret ++*/ ++TEST_COMPARE_ZD (cmpne_h4_f16, svfloat16_t, float16_t, ++ p0 = svcmpne_n_f16 (p1, z0, d4), ++ p0 = svcmpne (p1, z0, d4)) ++ ++/* ++** cmpne_0_f16: ++** fcmne p0\.h, p1/z, z0\.h, #0\.0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_0_f16, svfloat16_t, ++ p0 = svcmpne_n_f16 (p1, z0, 0), ++ p0 = svcmpne (p1, z0, 0)) ++ ++/* ++** cmpne_1_f16: ++** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? ++** fcmne p0\.h, p1/z, (z0\.h, \1|\1, z0\.h) ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_1_f16, svfloat16_t, ++ p0 = svcmpne_n_f16 (p1, z0, 1), ++ p0 = svcmpne (p1, z0, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_f32.c +new file mode 100644 +index 000000000..f81e2da51 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_f32.c +@@ -0,0 +1,50 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpne_f32_tied: ++** fcmne p0\.s, p0/z, (z0\.s, z1\.s|z1\.s, z0\.s) ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_f32_tied, svfloat32_t, ++ p0 = svcmpne_f32 (p0, z0, z1), ++ p0 = svcmpne (p0, z0, z1)) ++ ++/* ++** cmpne_f32_untied: ++** fcmne p0\.s, p1/z, (z0\.s, z1\.s|z1\.s, z0\.s) ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_f32_untied, svfloat32_t, ++ p0 = svcmpne_f32 (p1, z0, z1), ++ p0 = svcmpne (p1, z0, z1)) ++ ++/* ++** cmpne_s4_f32: ++** mov (z[0-9]+\.s), s4 ++** fcmne p0\.s, p1/z, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_COMPARE_ZD (cmpne_s4_f32, svfloat32_t, float32_t, ++ p0 = svcmpne_n_f32 (p1, z0, d4), ++ p0 = svcmpne (p1, z0, d4)) ++ ++/* ++** cmpne_0_f32: ++** fcmne p0\.s, p1/z, z0\.s, #0\.0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_0_f32, svfloat32_t, ++ p0 = svcmpne_n_f32 (p1, z0, 0), ++ p0 = svcmpne (p1, z0, 0)) ++ ++/* ++** cmpne_1_f32: ++** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? ++** fcmne p0\.s, p1/z, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_1_f32, svfloat32_t, ++ p0 = svcmpne_n_f32 (p1, z0, 1), ++ p0 = svcmpne (p1, z0, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_f64.c +new file mode 100644 +index 000000000..22e4eeef4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_f64.c +@@ -0,0 +1,50 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpne_f64_tied: ++** fcmne p0\.d, p0/z, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_f64_tied, svfloat64_t, ++ p0 = svcmpne_f64 (p0, z0, z1), ++ p0 = svcmpne (p0, z0, z1)) ++ ++/* ++** cmpne_f64_untied: ++** fcmne p0\.d, p1/z, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_f64_untied, svfloat64_t, ++ p0 = svcmpne_f64 (p1, z0, z1), ++ p0 = svcmpne (p1, z0, z1)) ++ ++/* ++** cmpne_d4_f64: ++** mov (z[0-9]+\.d), d4 ++** fcmne p0\.d, p1/z, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_COMPARE_ZD (cmpne_d4_f64, svfloat64_t, float64_t, ++ p0 = svcmpne_n_f64 (p1, z0, d4), ++ p0 = svcmpne (p1, z0, d4)) ++ ++/* ++** cmpne_0_f64: ++** fcmne p0\.d, p1/z, z0\.d, #0\.0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_0_f64, svfloat64_t, ++ p0 = svcmpne_n_f64 (p1, z0, 0), ++ p0 = svcmpne (p1, z0, 0)) ++ ++/* ++** cmpne_1_f64: ++** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? ++** fcmne p0\.d, p1/z, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_1_f64, svfloat64_t, ++ p0 = svcmpne_n_f64 (p1, z0, 1), ++ p0 = svcmpne (p1, z0, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_s16.c +new file mode 100644 +index 000000000..d8c743f8b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_s16.c +@@ -0,0 +1,96 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpne_s16_tied: ++** cmpne p0\.h, p0/z, (z0\.h, z1\.h|z1\.h, z0\.h) ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_s16_tied, svint16_t, ++ p0 = svcmpne_s16 (p0, z0, z1), ++ p0 = svcmpne (p0, z0, z1)) ++ ++/* ++** cmpne_s16_untied: ++** cmpne p0\.h, p1/z, (z0\.h, z1\.h|z1\.h, z0\.h) ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_s16_untied, svint16_t, ++ p0 = svcmpne_s16 (p1, z0, z1), ++ p0 = svcmpne (p1, z0, z1)) ++ ++/* ++** cmpne_w0_s16: ++** mov (z[0-9]+\.h), w0 ++** cmpne p0\.h, p1/z, (z0\.h, \1|\1, z0\.h) ++** ret ++*/ ++TEST_COMPARE_ZX (cmpne_w0_s16, svint16_t, int16_t, ++ p0 = svcmpne_n_s16 (p1, z0, x0), ++ p0 = svcmpne (p1, z0, x0)) ++ ++/* ++** cmpne_0_s16: ++** cmpne p0\.h, p1/z, z0\.h, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_0_s16, svint16_t, ++ p0 = svcmpne_n_s16 (p1, z0, 0), ++ p0 = svcmpne (p1, z0, 0)) ++ ++/* ++** cmpne_1_s16: ++** cmpne p0\.h, p1/z, z0\.h, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_1_s16, svint16_t, ++ p0 = svcmpne_n_s16 (p1, z0, 1), ++ p0 = svcmpne (p1, z0, 1)) ++ ++/* ++** cmpne_15_s16: ++** cmpne p0\.h, p1/z, z0\.h, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_15_s16, svint16_t, ++ p0 = svcmpne_n_s16 (p1, z0, 15), ++ p0 = svcmpne (p1, z0, 15)) ++ ++/* ++** cmpne_16_s16: ++** mov (z[0-9]+\.h), #16 ++** cmpne p0\.h, p1/z, (z0\.h, \1|\1, z0\.h) ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_16_s16, svint16_t, ++ p0 = svcmpne_n_s16 (p1, z0, 16), ++ p0 = svcmpne (p1, z0, 16)) ++ ++/* ++** cmpne_m1_s16: ++** cmpne p0\.h, p1/z, z0\.h, #-1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_m1_s16, svint16_t, ++ p0 = svcmpne_n_s16 (p1, z0, -1), ++ p0 = svcmpne (p1, z0, -1)) ++ ++/* ++** cmpne_m16_s16: ++** cmpne p0\.h, p1/z, z0\.h, #-16 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_m16_s16, svint16_t, ++ p0 = svcmpne_n_s16 (p1, z0, -16), ++ p0 = svcmpne (p1, z0, -16)) ++ ++/* ++** cmpne_m17_s16: ++** mov (z[0-9]+\.h), #-17 ++** cmpne p0\.h, p1/z, (z0\.h, \1|\1, z0\.h) ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_m17_s16, svint16_t, ++ p0 = svcmpne_n_s16 (p1, z0, -17), ++ p0 = svcmpne (p1, z0, -17)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_s32.c +new file mode 100644 +index 000000000..0d3c35111 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_s32.c +@@ -0,0 +1,96 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpne_s32_tied: ++** cmpne p0\.s, p0/z, (z0\.s, z1\.s|z1\.s, z0\.s) ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_s32_tied, svint32_t, ++ p0 = svcmpne_s32 (p0, z0, z1), ++ p0 = svcmpne (p0, z0, z1)) ++ ++/* ++** cmpne_s32_untied: ++** cmpne p0\.s, p1/z, (z0\.s, z1\.s|z1\.s, z0\.s) ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_s32_untied, svint32_t, ++ p0 = svcmpne_s32 (p1, z0, z1), ++ p0 = svcmpne (p1, z0, z1)) ++ ++/* ++** cmpne_w0_s32: ++** mov (z[0-9]+\.s), w0 ++** cmpne p0\.s, p1/z, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_COMPARE_ZX (cmpne_w0_s32, svint32_t, int32_t, ++ p0 = svcmpne_n_s32 (p1, z0, x0), ++ p0 = svcmpne (p1, z0, x0)) ++ ++/* ++** cmpne_0_s32: ++** cmpne p0\.s, p1/z, z0\.s, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_0_s32, svint32_t, ++ p0 = svcmpne_n_s32 (p1, z0, 0), ++ p0 = svcmpne (p1, z0, 0)) ++ ++/* ++** cmpne_1_s32: ++** cmpne p0\.s, p1/z, z0\.s, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_1_s32, svint32_t, ++ p0 = svcmpne_n_s32 (p1, z0, 1), ++ p0 = svcmpne (p1, z0, 1)) ++ ++/* ++** cmpne_15_s32: ++** cmpne p0\.s, p1/z, z0\.s, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_15_s32, svint32_t, ++ p0 = svcmpne_n_s32 (p1, z0, 15), ++ p0 = svcmpne (p1, z0, 15)) ++ ++/* ++** cmpne_16_s32: ++** mov (z[0-9]+\.s), #16 ++** cmpne p0\.s, p1/z, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_16_s32, svint32_t, ++ p0 = svcmpne_n_s32 (p1, z0, 16), ++ p0 = svcmpne (p1, z0, 16)) ++ ++/* ++** cmpne_m1_s32: ++** cmpne p0\.s, p1/z, z0\.s, #-1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_m1_s32, svint32_t, ++ p0 = svcmpne_n_s32 (p1, z0, -1), ++ p0 = svcmpne (p1, z0, -1)) ++ ++/* ++** cmpne_m16_s32: ++** cmpne p0\.s, p1/z, z0\.s, #-16 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_m16_s32, svint32_t, ++ p0 = svcmpne_n_s32 (p1, z0, -16), ++ p0 = svcmpne (p1, z0, -16)) ++ ++/* ++** cmpne_m17_s32: ++** mov (z[0-9]+\.s), #-17 ++** cmpne p0\.s, p1/z, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_m17_s32, svint32_t, ++ p0 = svcmpne_n_s32 (p1, z0, -17), ++ p0 = svcmpne (p1, z0, -17)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_s64.c +new file mode 100644 +index 000000000..4cf78f2dd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_s64.c +@@ -0,0 +1,96 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpne_s64_tied: ++** cmpne p0\.d, p0/z, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_s64_tied, svint64_t, ++ p0 = svcmpne_s64 (p0, z0, z1), ++ p0 = svcmpne (p0, z0, z1)) ++ ++/* ++** cmpne_s64_untied: ++** cmpne p0\.d, p1/z, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_s64_untied, svint64_t, ++ p0 = svcmpne_s64 (p1, z0, z1), ++ p0 = svcmpne (p1, z0, z1)) ++ ++/* ++** cmpne_x0_s64: ++** mov (z[0-9]+\.d), x0 ++** cmpne p0\.d, p1/z, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_COMPARE_ZX (cmpne_x0_s64, svint64_t, int64_t, ++ p0 = svcmpne_n_s64 (p1, z0, x0), ++ p0 = svcmpne (p1, z0, x0)) ++ ++/* ++** cmpne_0_s64: ++** cmpne p0\.d, p1/z, z0\.d, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_0_s64, svint64_t, ++ p0 = svcmpne_n_s64 (p1, z0, 0), ++ p0 = svcmpne (p1, z0, 0)) ++ ++/* ++** cmpne_1_s64: ++** cmpne p0\.d, p1/z, z0\.d, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_1_s64, svint64_t, ++ p0 = svcmpne_n_s64 (p1, z0, 1), ++ p0 = svcmpne (p1, z0, 1)) ++ ++/* ++** cmpne_15_s64: ++** cmpne p0\.d, p1/z, z0\.d, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_15_s64, svint64_t, ++ p0 = svcmpne_n_s64 (p1, z0, 15), ++ p0 = svcmpne (p1, z0, 15)) ++ ++/* ++** cmpne_16_s64: ++** mov (z[0-9]+\.d), #16 ++** cmpne p0\.d, p1/z, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_16_s64, svint64_t, ++ p0 = svcmpne_n_s64 (p1, z0, 16), ++ p0 = svcmpne (p1, z0, 16)) ++ ++/* ++** cmpne_m1_s64: ++** cmpne p0\.d, p1/z, z0\.d, #-1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_m1_s64, svint64_t, ++ p0 = svcmpne_n_s64 (p1, z0, -1), ++ p0 = svcmpne (p1, z0, -1)) ++ ++/* ++** cmpne_m16_s64: ++** cmpne p0\.d, p1/z, z0\.d, #-16 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_m16_s64, svint64_t, ++ p0 = svcmpne_n_s64 (p1, z0, -16), ++ p0 = svcmpne (p1, z0, -16)) ++ ++/* ++** cmpne_m17_s64: ++** mov (z[0-9]+\.d), #-17 ++** cmpne p0\.d, p1/z, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_m17_s64, svint64_t, ++ p0 = svcmpne_n_s64 (p1, z0, -17), ++ p0 = svcmpne (p1, z0, -17)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_s8.c +new file mode 100644 +index 000000000..6409ecdd4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_s8.c +@@ -0,0 +1,96 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpne_s8_tied: ++** cmpne p0\.b, p0/z, (z0\.b, z1\.b|z1\.b, z0\.b) ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_s8_tied, svint8_t, ++ p0 = svcmpne_s8 (p0, z0, z1), ++ p0 = svcmpne (p0, z0, z1)) ++ ++/* ++** cmpne_s8_untied: ++** cmpne p0\.b, p1/z, (z0\.b, z1\.b|z1\.b, z0\.b) ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_s8_untied, svint8_t, ++ p0 = svcmpne_s8 (p1, z0, z1), ++ p0 = svcmpne (p1, z0, z1)) ++ ++/* ++** cmpne_w0_s8: ++** mov (z[0-9]+\.b), w0 ++** cmpne p0\.b, p1/z, (z0\.b, \1|\1, z0\.b) ++** ret ++*/ ++TEST_COMPARE_ZX (cmpne_w0_s8, svint8_t, int8_t, ++ p0 = svcmpne_n_s8 (p1, z0, x0), ++ p0 = svcmpne (p1, z0, x0)) ++ ++/* ++** cmpne_0_s8: ++** cmpne p0\.b, p1/z, z0\.b, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_0_s8, svint8_t, ++ p0 = svcmpne_n_s8 (p1, z0, 0), ++ p0 = svcmpne (p1, z0, 0)) ++ ++/* ++** cmpne_1_s8: ++** cmpne p0\.b, p1/z, z0\.b, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_1_s8, svint8_t, ++ p0 = svcmpne_n_s8 (p1, z0, 1), ++ p0 = svcmpne (p1, z0, 1)) ++ ++/* ++** cmpne_15_s8: ++** cmpne p0\.b, p1/z, z0\.b, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_15_s8, svint8_t, ++ p0 = svcmpne_n_s8 (p1, z0, 15), ++ p0 = svcmpne (p1, z0, 15)) ++ ++/* ++** cmpne_16_s8: ++** mov (z[0-9]+\.b), #16 ++** cmpne p0\.b, p1/z, (z0\.b, \1|\1, z0\.b) ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_16_s8, svint8_t, ++ p0 = svcmpne_n_s8 (p1, z0, 16), ++ p0 = svcmpne (p1, z0, 16)) ++ ++/* ++** cmpne_m1_s8: ++** cmpne p0\.b, p1/z, z0\.b, #-1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_m1_s8, svint8_t, ++ p0 = svcmpne_n_s8 (p1, z0, -1), ++ p0 = svcmpne (p1, z0, -1)) ++ ++/* ++** cmpne_m16_s8: ++** cmpne p0\.b, p1/z, z0\.b, #-16 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_m16_s8, svint8_t, ++ p0 = svcmpne_n_s8 (p1, z0, -16), ++ p0 = svcmpne (p1, z0, -16)) ++ ++/* ++** cmpne_m17_s8: ++** mov (z[0-9]+\.b), #-17 ++** cmpne p0\.b, p1/z, (z0\.b, \1|\1, z0\.b) ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_m17_s8, svint8_t, ++ p0 = svcmpne_n_s8 (p1, z0, -17), ++ p0 = svcmpne (p1, z0, -17)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_u16.c +new file mode 100644 +index 000000000..4d22bc7d3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_u16.c +@@ -0,0 +1,96 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpne_u16_tied: ++** cmpne p0\.h, p0/z, (z0\.h, z1\.h|z1\.h, z0\.h) ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_u16_tied, svuint16_t, ++ p0 = svcmpne_u16 (p0, z0, z1), ++ p0 = svcmpne (p0, z0, z1)) ++ ++/* ++** cmpne_u16_untied: ++** cmpne p0\.h, p1/z, (z0\.h, z1\.h|z1\.h, z0\.h) ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_u16_untied, svuint16_t, ++ p0 = svcmpne_u16 (p1, z0, z1), ++ p0 = svcmpne (p1, z0, z1)) ++ ++/* ++** cmpne_w0_u16: ++** mov (z[0-9]+\.h), w0 ++** cmpne p0\.h, p1/z, (z0\.h, \1|\1, z0\.h) ++** ret ++*/ ++TEST_COMPARE_ZX (cmpne_w0_u16, svuint16_t, uint16_t, ++ p0 = svcmpne_n_u16 (p1, z0, x0), ++ p0 = svcmpne (p1, z0, x0)) ++ ++/* ++** cmpne_0_u16: ++** cmpne p0\.h, p1/z, z0\.h, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_0_u16, svuint16_t, ++ p0 = svcmpne_n_u16 (p1, z0, 0), ++ p0 = svcmpne (p1, z0, 0)) ++ ++/* ++** cmpne_1_u16: ++** cmpne p0\.h, p1/z, z0\.h, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_1_u16, svuint16_t, ++ p0 = svcmpne_n_u16 (p1, z0, 1), ++ p0 = svcmpne (p1, z0, 1)) ++ ++/* ++** cmpne_15_u16: ++** cmpne p0\.h, p1/z, z0\.h, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_15_u16, svuint16_t, ++ p0 = svcmpne_n_u16 (p1, z0, 15), ++ p0 = svcmpne (p1, z0, 15)) ++ ++/* ++** cmpne_16_u16: ++** mov (z[0-9]+\.h), #16 ++** cmpne p0\.h, p1/z, (z0\.h, \1|\1, z0\.h) ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_16_u16, svuint16_t, ++ p0 = svcmpne_n_u16 (p1, z0, 16), ++ p0 = svcmpne (p1, z0, 16)) ++ ++/* ++** cmpne_m1_u16: ++** cmpne p0\.h, p1/z, z0\.h, #-1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_m1_u16, svuint16_t, ++ p0 = svcmpne_n_u16 (p1, z0, -1), ++ p0 = svcmpne (p1, z0, -1)) ++ ++/* ++** cmpne_m16_u16: ++** cmpne p0\.h, p1/z, z0\.h, #-16 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_m16_u16, svuint16_t, ++ p0 = svcmpne_n_u16 (p1, z0, -16), ++ p0 = svcmpne (p1, z0, -16)) ++ ++/* ++** cmpne_m17_u16: ++** mov (z[0-9]+\.h), #-17 ++** cmpne p0\.h, p1/z, (z0\.h, \1|\1, z0\.h) ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_m17_u16, svuint16_t, ++ p0 = svcmpne_n_u16 (p1, z0, -17), ++ p0 = svcmpne (p1, z0, -17)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_u32.c +new file mode 100644 +index 000000000..b7ca94a69 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_u32.c +@@ -0,0 +1,96 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpne_u32_tied: ++** cmpne p0\.s, p0/z, (z0\.s, z1\.s|z1\.s, z0\.s) ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_u32_tied, svuint32_t, ++ p0 = svcmpne_u32 (p0, z0, z1), ++ p0 = svcmpne (p0, z0, z1)) ++ ++/* ++** cmpne_u32_untied: ++** cmpne p0\.s, p1/z, (z0\.s, z1\.s|z1\.s, z0\.s) ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_u32_untied, svuint32_t, ++ p0 = svcmpne_u32 (p1, z0, z1), ++ p0 = svcmpne (p1, z0, z1)) ++ ++/* ++** cmpne_w0_u32: ++** mov (z[0-9]+\.s), w0 ++** cmpne p0\.s, p1/z, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_COMPARE_ZX (cmpne_w0_u32, svuint32_t, uint32_t, ++ p0 = svcmpne_n_u32 (p1, z0, x0), ++ p0 = svcmpne (p1, z0, x0)) ++ ++/* ++** cmpne_0_u32: ++** cmpne p0\.s, p1/z, z0\.s, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_0_u32, svuint32_t, ++ p0 = svcmpne_n_u32 (p1, z0, 0), ++ p0 = svcmpne (p1, z0, 0)) ++ ++/* ++** cmpne_1_u32: ++** cmpne p0\.s, p1/z, z0\.s, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_1_u32, svuint32_t, ++ p0 = svcmpne_n_u32 (p1, z0, 1), ++ p0 = svcmpne (p1, z0, 1)) ++ ++/* ++** cmpne_15_u32: ++** cmpne p0\.s, p1/z, z0\.s, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_15_u32, svuint32_t, ++ p0 = svcmpne_n_u32 (p1, z0, 15), ++ p0 = svcmpne (p1, z0, 15)) ++ ++/* ++** cmpne_16_u32: ++** mov (z[0-9]+\.s), #16 ++** cmpne p0\.s, p1/z, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_16_u32, svuint32_t, ++ p0 = svcmpne_n_u32 (p1, z0, 16), ++ p0 = svcmpne (p1, z0, 16)) ++ ++/* ++** cmpne_m1_u32: ++** cmpne p0\.s, p1/z, z0\.s, #-1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_m1_u32, svuint32_t, ++ p0 = svcmpne_n_u32 (p1, z0, -1), ++ p0 = svcmpne (p1, z0, -1)) ++ ++/* ++** cmpne_m16_u32: ++** cmpne p0\.s, p1/z, z0\.s, #-16 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_m16_u32, svuint32_t, ++ p0 = svcmpne_n_u32 (p1, z0, -16), ++ p0 = svcmpne (p1, z0, -16)) ++ ++/* ++** cmpne_m17_u32: ++** mov (z[0-9]+\.s), #-17 ++** cmpne p0\.s, p1/z, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_m17_u32, svuint32_t, ++ p0 = svcmpne_n_u32 (p1, z0, -17), ++ p0 = svcmpne (p1, z0, -17)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_u64.c +new file mode 100644 +index 000000000..960ac85b1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_u64.c +@@ -0,0 +1,96 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpne_u64_tied: ++** cmpne p0\.d, p0/z, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_u64_tied, svuint64_t, ++ p0 = svcmpne_u64 (p0, z0, z1), ++ p0 = svcmpne (p0, z0, z1)) ++ ++/* ++** cmpne_u64_untied: ++** cmpne p0\.d, p1/z, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_u64_untied, svuint64_t, ++ p0 = svcmpne_u64 (p1, z0, z1), ++ p0 = svcmpne (p1, z0, z1)) ++ ++/* ++** cmpne_x0_u64: ++** mov (z[0-9]+\.d), x0 ++** cmpne p0\.d, p1/z, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_COMPARE_ZX (cmpne_x0_u64, svuint64_t, uint64_t, ++ p0 = svcmpne_n_u64 (p1, z0, x0), ++ p0 = svcmpne (p1, z0, x0)) ++ ++/* ++** cmpne_0_u64: ++** cmpne p0\.d, p1/z, z0\.d, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_0_u64, svuint64_t, ++ p0 = svcmpne_n_u64 (p1, z0, 0), ++ p0 = svcmpne (p1, z0, 0)) ++ ++/* ++** cmpne_1_u64: ++** cmpne p0\.d, p1/z, z0\.d, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_1_u64, svuint64_t, ++ p0 = svcmpne_n_u64 (p1, z0, 1), ++ p0 = svcmpne (p1, z0, 1)) ++ ++/* ++** cmpne_15_u64: ++** cmpne p0\.d, p1/z, z0\.d, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_15_u64, svuint64_t, ++ p0 = svcmpne_n_u64 (p1, z0, 15), ++ p0 = svcmpne (p1, z0, 15)) ++ ++/* ++** cmpne_16_u64: ++** mov (z[0-9]+\.d), #16 ++** cmpne p0\.d, p1/z, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_16_u64, svuint64_t, ++ p0 = svcmpne_n_u64 (p1, z0, 16), ++ p0 = svcmpne (p1, z0, 16)) ++ ++/* ++** cmpne_m1_u64: ++** cmpne p0\.d, p1/z, z0\.d, #-1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_m1_u64, svuint64_t, ++ p0 = svcmpne_n_u64 (p1, z0, -1), ++ p0 = svcmpne (p1, z0, -1)) ++ ++/* ++** cmpne_m16_u64: ++** cmpne p0\.d, p1/z, z0\.d, #-16 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_m16_u64, svuint64_t, ++ p0 = svcmpne_n_u64 (p1, z0, -16), ++ p0 = svcmpne (p1, z0, -16)) ++ ++/* ++** cmpne_m17_u64: ++** mov (z[0-9]+\.d), #-17 ++** cmpne p0\.d, p1/z, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_m17_u64, svuint64_t, ++ p0 = svcmpne_n_u64 (p1, z0, -17), ++ p0 = svcmpne (p1, z0, -17)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_u8.c +new file mode 100644 +index 000000000..cb8496eab +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_u8.c +@@ -0,0 +1,96 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpne_u8_tied: ++** cmpne p0\.b, p0/z, (z0\.b, z1\.b|z1\.b, z0\.b) ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_u8_tied, svuint8_t, ++ p0 = svcmpne_u8 (p0, z0, z1), ++ p0 = svcmpne (p0, z0, z1)) ++ ++/* ++** cmpne_u8_untied: ++** cmpne p0\.b, p1/z, (z0\.b, z1\.b|z1\.b, z0\.b) ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_u8_untied, svuint8_t, ++ p0 = svcmpne_u8 (p1, z0, z1), ++ p0 = svcmpne (p1, z0, z1)) ++ ++/* ++** cmpne_w0_u8: ++** mov (z[0-9]+\.b), w0 ++** cmpne p0\.b, p1/z, (z0\.b, \1|\1, z0\.b) ++** ret ++*/ ++TEST_COMPARE_ZX (cmpne_w0_u8, svuint8_t, uint8_t, ++ p0 = svcmpne_n_u8 (p1, z0, x0), ++ p0 = svcmpne (p1, z0, x0)) ++ ++/* ++** cmpne_0_u8: ++** cmpne p0\.b, p1/z, z0\.b, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_0_u8, svuint8_t, ++ p0 = svcmpne_n_u8 (p1, z0, 0), ++ p0 = svcmpne (p1, z0, 0)) ++ ++/* ++** cmpne_1_u8: ++** cmpne p0\.b, p1/z, z0\.b, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_1_u8, svuint8_t, ++ p0 = svcmpne_n_u8 (p1, z0, 1), ++ p0 = svcmpne (p1, z0, 1)) ++ ++/* ++** cmpne_15_u8: ++** cmpne p0\.b, p1/z, z0\.b, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_15_u8, svuint8_t, ++ p0 = svcmpne_n_u8 (p1, z0, 15), ++ p0 = svcmpne (p1, z0, 15)) ++ ++/* ++** cmpne_16_u8: ++** mov (z[0-9]+\.b), #16 ++** cmpne p0\.b, p1/z, (z0\.b, \1|\1, z0\.b) ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_16_u8, svuint8_t, ++ p0 = svcmpne_n_u8 (p1, z0, 16), ++ p0 = svcmpne (p1, z0, 16)) ++ ++/* ++** cmpne_m1_u8: ++** cmpne p0\.b, p1/z, z0\.b, #-1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_m1_u8, svuint8_t, ++ p0 = svcmpne_n_u8 (p1, z0, -1), ++ p0 = svcmpne (p1, z0, -1)) ++ ++/* ++** cmpne_m16_u8: ++** cmpne p0\.b, p1/z, z0\.b, #-16 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_m16_u8, svuint8_t, ++ p0 = svcmpne_n_u8 (p1, z0, -16), ++ p0 = svcmpne (p1, z0, -16)) ++ ++/* ++** cmpne_m17_u8: ++** mov (z[0-9]+\.b), #-17 ++** cmpne p0\.b, p1/z, (z0\.b, \1|\1, z0\.b) ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_m17_u8, svuint8_t, ++ p0 = svcmpne_n_u8 (p1, z0, -17), ++ p0 = svcmpne (p1, z0, -17)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_wide_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_wide_s16.c +new file mode 100644 +index 000000000..4cb7586c9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_wide_s16.c +@@ -0,0 +1,96 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpne_wide_s16_tied: ++** cmpne p0\.h, p0/z, z0\.h, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmpne_wide_s16_tied, svint16_t, svint64_t, ++ p0 = svcmpne_wide_s16 (p0, z0, z1), ++ p0 = svcmpne_wide (p0, z0, z1)) ++ ++/* ++** cmpne_wide_s16_untied: ++** cmpne p0\.h, p1/z, z0\.h, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmpne_wide_s16_untied, svint16_t, svint64_t, ++ p0 = svcmpne_wide_s16 (p1, z0, z1), ++ p0 = svcmpne_wide (p1, z0, z1)) ++ ++/* ++** cmpne_wide_x0_s16: ++** mov (z[0-9]+\.d), x0 ++** cmpne p0\.h, p1/z, z0\.h, \1 ++** ret ++*/ ++TEST_COMPARE_ZX (cmpne_wide_x0_s16, svint16_t, int64_t, ++ p0 = svcmpne_wide_n_s16 (p1, z0, x0), ++ p0 = svcmpne_wide (p1, z0, x0)) ++ ++/* ++** cmpne_wide_0_s16: ++** cmpne p0\.h, p1/z, z0\.h, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_wide_0_s16, svint16_t, ++ p0 = svcmpne_wide_n_s16 (p1, z0, 0), ++ p0 = svcmpne_wide (p1, z0, 0)) ++ ++/* ++** cmpne_wide_1_s16: ++** cmpne p0\.h, p1/z, z0\.h, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_wide_1_s16, svint16_t, ++ p0 = svcmpne_wide_n_s16 (p1, z0, 1), ++ p0 = svcmpne_wide (p1, z0, 1)) ++ ++/* ++** cmpne_wide_15_s16: ++** cmpne p0\.h, p1/z, z0\.h, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_wide_15_s16, svint16_t, ++ p0 = svcmpne_wide_n_s16 (p1, z0, 15), ++ p0 = svcmpne_wide (p1, z0, 15)) ++ ++/* ++** cmpne_wide_16_s16: ++** mov (z[0-9]+\.d), #16 ++** cmpne p0\.h, p1/z, z0\.h, \1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_wide_16_s16, svint16_t, ++ p0 = svcmpne_wide_n_s16 (p1, z0, 16), ++ p0 = svcmpne_wide (p1, z0, 16)) ++ ++/* ++** cmpne_wide_m1_s16: ++** cmpne p0\.h, p1/z, z0\.h, #-1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_wide_m1_s16, svint16_t, ++ p0 = svcmpne_wide_n_s16 (p1, z0, -1), ++ p0 = svcmpne_wide (p1, z0, -1)) ++ ++/* ++** cmpne_wide_m16_s16: ++** cmpne p0\.h, p1/z, z0\.h, #-16 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_wide_m16_s16, svint16_t, ++ p0 = svcmpne_wide_n_s16 (p1, z0, -16), ++ p0 = svcmpne_wide (p1, z0, -16)) ++ ++/* ++** cmpne_wide_m17_s16: ++** mov (z[0-9]+\.d), #-17 ++** cmpne p0\.h, p1/z, z0\.h, \1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_wide_m17_s16, svint16_t, ++ p0 = svcmpne_wide_n_s16 (p1, z0, -17), ++ p0 = svcmpne_wide (p1, z0, -17)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_wide_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_wide_s32.c +new file mode 100644 +index 000000000..633994ed3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_wide_s32.c +@@ -0,0 +1,96 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpne_wide_s32_tied: ++** cmpne p0\.s, p0/z, z0\.s, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmpne_wide_s32_tied, svint32_t, svint64_t, ++ p0 = svcmpne_wide_s32 (p0, z0, z1), ++ p0 = svcmpne_wide (p0, z0, z1)) ++ ++/* ++** cmpne_wide_s32_untied: ++** cmpne p0\.s, p1/z, z0\.s, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmpne_wide_s32_untied, svint32_t, svint64_t, ++ p0 = svcmpne_wide_s32 (p1, z0, z1), ++ p0 = svcmpne_wide (p1, z0, z1)) ++ ++/* ++** cmpne_wide_x0_s32: ++** mov (z[0-9]+\.d), x0 ++** cmpne p0\.s, p1/z, z0\.s, \1 ++** ret ++*/ ++TEST_COMPARE_ZX (cmpne_wide_x0_s32, svint32_t, int64_t, ++ p0 = svcmpne_wide_n_s32 (p1, z0, x0), ++ p0 = svcmpne_wide (p1, z0, x0)) ++ ++/* ++** cmpne_wide_0_s32: ++** cmpne p0\.s, p1/z, z0\.s, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_wide_0_s32, svint32_t, ++ p0 = svcmpne_wide_n_s32 (p1, z0, 0), ++ p0 = svcmpne_wide (p1, z0, 0)) ++ ++/* ++** cmpne_wide_1_s32: ++** cmpne p0\.s, p1/z, z0\.s, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_wide_1_s32, svint32_t, ++ p0 = svcmpne_wide_n_s32 (p1, z0, 1), ++ p0 = svcmpne_wide (p1, z0, 1)) ++ ++/* ++** cmpne_wide_15_s32: ++** cmpne p0\.s, p1/z, z0\.s, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_wide_15_s32, svint32_t, ++ p0 = svcmpne_wide_n_s32 (p1, z0, 15), ++ p0 = svcmpne_wide (p1, z0, 15)) ++ ++/* ++** cmpne_wide_16_s32: ++** mov (z[0-9]+\.d), #16 ++** cmpne p0\.s, p1/z, z0\.s, \1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_wide_16_s32, svint32_t, ++ p0 = svcmpne_wide_n_s32 (p1, z0, 16), ++ p0 = svcmpne_wide (p1, z0, 16)) ++ ++/* ++** cmpne_wide_m1_s32: ++** cmpne p0\.s, p1/z, z0\.s, #-1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_wide_m1_s32, svint32_t, ++ p0 = svcmpne_wide_n_s32 (p1, z0, -1), ++ p0 = svcmpne_wide (p1, z0, -1)) ++ ++/* ++** cmpne_wide_m16_s32: ++** cmpne p0\.s, p1/z, z0\.s, #-16 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_wide_m16_s32, svint32_t, ++ p0 = svcmpne_wide_n_s32 (p1, z0, -16), ++ p0 = svcmpne_wide (p1, z0, -16)) ++ ++/* ++** cmpne_wide_m17_s32: ++** mov (z[0-9]+\.d), #-17 ++** cmpne p0\.s, p1/z, z0\.s, \1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_wide_m17_s32, svint32_t, ++ p0 = svcmpne_wide_n_s32 (p1, z0, -17), ++ p0 = svcmpne_wide (p1, z0, -17)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_wide_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_wide_s8.c +new file mode 100644 +index 000000000..de343f4cc +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_wide_s8.c +@@ -0,0 +1,96 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpne_wide_s8_tied: ++** cmpne p0\.b, p0/z, z0\.b, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmpne_wide_s8_tied, svint8_t, svint64_t, ++ p0 = svcmpne_wide_s8 (p0, z0, z1), ++ p0 = svcmpne_wide (p0, z0, z1)) ++ ++/* ++** cmpne_wide_s8_untied: ++** cmpne p0\.b, p1/z, z0\.b, z1\.d ++** ret ++*/ ++TEST_COMPARE_DUAL_Z (cmpne_wide_s8_untied, svint8_t, svint64_t, ++ p0 = svcmpne_wide_s8 (p1, z0, z1), ++ p0 = svcmpne_wide (p1, z0, z1)) ++ ++/* ++** cmpne_wide_x0_s8: ++** mov (z[0-9]+\.d), x0 ++** cmpne p0\.b, p1/z, z0\.b, \1 ++** ret ++*/ ++TEST_COMPARE_ZX (cmpne_wide_x0_s8, svint8_t, int64_t, ++ p0 = svcmpne_wide_n_s8 (p1, z0, x0), ++ p0 = svcmpne_wide (p1, z0, x0)) ++ ++/* ++** cmpne_wide_0_s8: ++** cmpne p0\.b, p1/z, z0\.b, #0 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_wide_0_s8, svint8_t, ++ p0 = svcmpne_wide_n_s8 (p1, z0, 0), ++ p0 = svcmpne_wide (p1, z0, 0)) ++ ++/* ++** cmpne_wide_1_s8: ++** cmpne p0\.b, p1/z, z0\.b, #1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_wide_1_s8, svint8_t, ++ p0 = svcmpne_wide_n_s8 (p1, z0, 1), ++ p0 = svcmpne_wide (p1, z0, 1)) ++ ++/* ++** cmpne_wide_15_s8: ++** cmpne p0\.b, p1/z, z0\.b, #15 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_wide_15_s8, svint8_t, ++ p0 = svcmpne_wide_n_s8 (p1, z0, 15), ++ p0 = svcmpne_wide (p1, z0, 15)) ++ ++/* ++** cmpne_wide_16_s8: ++** mov (z[0-9]+\.d), #16 ++** cmpne p0\.b, p1/z, z0\.b, \1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_wide_16_s8, svint8_t, ++ p0 = svcmpne_wide_n_s8 (p1, z0, 16), ++ p0 = svcmpne_wide (p1, z0, 16)) ++ ++/* ++** cmpne_wide_m1_s8: ++** cmpne p0\.b, p1/z, z0\.b, #-1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_wide_m1_s8, svint8_t, ++ p0 = svcmpne_wide_n_s8 (p1, z0, -1), ++ p0 = svcmpne_wide (p1, z0, -1)) ++ ++/* ++** cmpne_wide_m16_s8: ++** cmpne p0\.b, p1/z, z0\.b, #-16 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_wide_m16_s8, svint8_t, ++ p0 = svcmpne_wide_n_s8 (p1, z0, -16), ++ p0 = svcmpne_wide (p1, z0, -16)) ++ ++/* ++** cmpne_wide_m17_s8: ++** mov (z[0-9]+\.d), #-17 ++** cmpne p0\.b, p1/z, z0\.b, \1 ++** ret ++*/ ++TEST_COMPARE_Z (cmpne_wide_m17_s8, svint8_t, ++ p0 = svcmpne_wide_n_s8 (p1, z0, -17), ++ p0 = svcmpne_wide (p1, z0, -17)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpuo_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpuo_f16.c +new file mode 100644 +index 000000000..8f702cdde +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpuo_f16.c +@@ -0,0 +1,51 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpuo_f16_tied: ++** fcmuo p0\.h, p0/z, (z0\.h, z1\.h|z1\.h, z0\.h) ++** ret ++*/ ++TEST_COMPARE_Z (cmpuo_f16_tied, svfloat16_t, ++ p0 = svcmpuo_f16 (p0, z0, z1), ++ p0 = svcmpuo (p0, z0, z1)) ++ ++/* ++** cmpuo_f16_untied: ++** fcmuo p0\.h, p1/z, (z0\.h, z1\.h|z1\.h, z0\.h) ++** ret ++*/ ++TEST_COMPARE_Z (cmpuo_f16_untied, svfloat16_t, ++ p0 = svcmpuo_f16 (p1, z0, z1), ++ p0 = svcmpuo (p1, z0, z1)) ++ ++/* ++** cmpuo_h4_f16: ++** mov (z[0-9]+\.h), h4 ++** fcmuo p0\.h, p1/z, (z0\.h, \1|\1, z0\.h) ++** ret ++*/ ++TEST_COMPARE_ZD (cmpuo_h4_f16, svfloat16_t, float16_t, ++ p0 = svcmpuo_n_f16 (p1, z0, d4), ++ p0 = svcmpuo (p1, z0, d4)) ++ ++/* ++** cmpuo_0_f16: ++** mov (z[0-9]+\.h), #0 ++** fcmuo p0\.h, p1/z, (z0\.h, \1|\1, z0\.h) ++** ret ++*/ ++TEST_COMPARE_Z (cmpuo_0_f16, svfloat16_t, ++ p0 = svcmpuo_n_f16 (p1, z0, 0), ++ p0 = svcmpuo (p1, z0, 0)) ++ ++/* ++** cmpuo_1_f16: ++** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? ++** fcmuo p0\.h, p1/z, (z0\.h, \1|\1, z0\.h) ++** ret ++*/ ++TEST_COMPARE_Z (cmpuo_1_f16, svfloat16_t, ++ p0 = svcmpuo_n_f16 (p1, z0, 1), ++ p0 = svcmpuo (p1, z0, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpuo_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpuo_f32.c +new file mode 100644 +index 000000000..8827604aa +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpuo_f32.c +@@ -0,0 +1,51 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpuo_f32_tied: ++** fcmuo p0\.s, p0/z, (z0\.s, z1\.s|z1\.s, z0\.s) ++** ret ++*/ ++TEST_COMPARE_Z (cmpuo_f32_tied, svfloat32_t, ++ p0 = svcmpuo_f32 (p0, z0, z1), ++ p0 = svcmpuo (p0, z0, z1)) ++ ++/* ++** cmpuo_f32_untied: ++** fcmuo p0\.s, p1/z, (z0\.s, z1\.s|z1\.s, z0\.s) ++** ret ++*/ ++TEST_COMPARE_Z (cmpuo_f32_untied, svfloat32_t, ++ p0 = svcmpuo_f32 (p1, z0, z1), ++ p0 = svcmpuo (p1, z0, z1)) ++ ++/* ++** cmpuo_s4_f32: ++** mov (z[0-9]+\.s), s4 ++** fcmuo p0\.s, p1/z, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_COMPARE_ZD (cmpuo_s4_f32, svfloat32_t, float32_t, ++ p0 = svcmpuo_n_f32 (p1, z0, d4), ++ p0 = svcmpuo (p1, z0, d4)) ++ ++/* ++** cmpuo_0_f32: ++** mov (z[0-9]+\.s), #0 ++** fcmuo p0\.s, p1/z, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_COMPARE_Z (cmpuo_0_f32, svfloat32_t, ++ p0 = svcmpuo_n_f32 (p1, z0, 0), ++ p0 = svcmpuo (p1, z0, 0)) ++ ++/* ++** cmpuo_1_f32: ++** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? ++** fcmuo p0\.s, p1/z, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_COMPARE_Z (cmpuo_1_f32, svfloat32_t, ++ p0 = svcmpuo_n_f32 (p1, z0, 1), ++ p0 = svcmpuo (p1, z0, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpuo_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpuo_f64.c +new file mode 100644 +index 000000000..d7a71eca4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpuo_f64.c +@@ -0,0 +1,51 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cmpuo_f64_tied: ++** fcmuo p0\.d, p0/z, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_COMPARE_Z (cmpuo_f64_tied, svfloat64_t, ++ p0 = svcmpuo_f64 (p0, z0, z1), ++ p0 = svcmpuo (p0, z0, z1)) ++ ++/* ++** cmpuo_f64_untied: ++** fcmuo p0\.d, p1/z, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_COMPARE_Z (cmpuo_f64_untied, svfloat64_t, ++ p0 = svcmpuo_f64 (p1, z0, z1), ++ p0 = svcmpuo (p1, z0, z1)) ++ ++/* ++** cmpuo_d4_f64: ++** mov (z[0-9]+\.d), d4 ++** fcmuo p0\.d, p1/z, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_COMPARE_ZD (cmpuo_d4_f64, svfloat64_t, float64_t, ++ p0 = svcmpuo_n_f64 (p1, z0, d4), ++ p0 = svcmpuo (p1, z0, d4)) ++ ++/* ++** cmpuo_0_f64: ++** mov (z[0-9]+\.d), #0 ++** fcmuo p0\.d, p1/z, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_COMPARE_Z (cmpuo_0_f64, svfloat64_t, ++ p0 = svcmpuo_n_f64 (p1, z0, 0), ++ p0 = svcmpuo (p1, z0, 0)) ++ ++/* ++** cmpuo_1_f64: ++** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? ++** fcmuo p0\.d, p1/z, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_COMPARE_Z (cmpuo_1_f64, svfloat64_t, ++ p0 = svcmpuo_n_f64 (p1, z0, 1), ++ p0 = svcmpuo (p1, z0, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_s16.c +new file mode 100644 +index 000000000..19d46be68 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_s16.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cnot_s16_m_tied12: ++** cnot z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_s16_m_tied12, svint16_t, ++ z0 = svcnot_s16_m (z0, p0, z0), ++ z0 = svcnot_m (z0, p0, z0)) ++ ++/* ++** cnot_s16_m_tied1: ++** cnot z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_s16_m_tied1, svint16_t, ++ z0 = svcnot_s16_m (z0, p0, z1), ++ z0 = svcnot_m (z0, p0, z1)) ++ ++/* ++** cnot_s16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** cnot z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_s16_m_tied2, svint16_t, ++ z0 = svcnot_s16_m (z1, p0, z0), ++ z0 = svcnot_m (z1, p0, z0)) ++ ++/* ++** cnot_s16_m_untied: ++** movprfx z0, z2 ++** cnot z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_s16_m_untied, svint16_t, ++ z0 = svcnot_s16_m (z2, p0, z1), ++ z0 = svcnot_m (z2, p0, z1)) ++ ++/* ++** cnot_s16_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.h, p0/z, \1\.h ++** cnot z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_s16_z_tied1, svint16_t, ++ z0 = svcnot_s16_z (p0, z0), ++ z0 = svcnot_z (p0, z0)) ++ ++/* ++** cnot_s16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** cnot z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_s16_z_untied, svint16_t, ++ z0 = svcnot_s16_z (p0, z1), ++ z0 = svcnot_z (p0, z1)) ++ ++/* ++** cnot_s16_x_tied1: ++** cnot z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_s16_x_tied1, svint16_t, ++ z0 = svcnot_s16_x (p0, z0), ++ z0 = svcnot_x (p0, z0)) ++ ++/* ++** cnot_s16_x_untied: ++** cnot z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_s16_x_untied, svint16_t, ++ z0 = svcnot_s16_x (p0, z1), ++ z0 = svcnot_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_s32.c +new file mode 100644 +index 000000000..041b59a04 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_s32.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cnot_s32_m_tied12: ++** cnot z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_s32_m_tied12, svint32_t, ++ z0 = svcnot_s32_m (z0, p0, z0), ++ z0 = svcnot_m (z0, p0, z0)) ++ ++/* ++** cnot_s32_m_tied1: ++** cnot z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_s32_m_tied1, svint32_t, ++ z0 = svcnot_s32_m (z0, p0, z1), ++ z0 = svcnot_m (z0, p0, z1)) ++ ++/* ++** cnot_s32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** cnot z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_s32_m_tied2, svint32_t, ++ z0 = svcnot_s32_m (z1, p0, z0), ++ z0 = svcnot_m (z1, p0, z0)) ++ ++/* ++** cnot_s32_m_untied: ++** movprfx z0, z2 ++** cnot z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_s32_m_untied, svint32_t, ++ z0 = svcnot_s32_m (z2, p0, z1), ++ z0 = svcnot_m (z2, p0, z1)) ++ ++/* ++** cnot_s32_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, \1\.s ++** cnot z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_s32_z_tied1, svint32_t, ++ z0 = svcnot_s32_z (p0, z0), ++ z0 = svcnot_z (p0, z0)) ++ ++/* ++** cnot_s32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** cnot z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_s32_z_untied, svint32_t, ++ z0 = svcnot_s32_z (p0, z1), ++ z0 = svcnot_z (p0, z1)) ++ ++/* ++** cnot_s32_x_tied1: ++** cnot z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_s32_x_tied1, svint32_t, ++ z0 = svcnot_s32_x (p0, z0), ++ z0 = svcnot_x (p0, z0)) ++ ++/* ++** cnot_s32_x_untied: ++** cnot z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_s32_x_untied, svint32_t, ++ z0 = svcnot_s32_x (p0, z1), ++ z0 = svcnot_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_s64.c +new file mode 100644 +index 000000000..c7135cb95 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_s64.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cnot_s64_m_tied12: ++** cnot z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_s64_m_tied12, svint64_t, ++ z0 = svcnot_s64_m (z0, p0, z0), ++ z0 = svcnot_m (z0, p0, z0)) ++ ++/* ++** cnot_s64_m_tied1: ++** cnot z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_s64_m_tied1, svint64_t, ++ z0 = svcnot_s64_m (z0, p0, z1), ++ z0 = svcnot_m (z0, p0, z1)) ++ ++/* ++** cnot_s64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** cnot z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_s64_m_tied2, svint64_t, ++ z0 = svcnot_s64_m (z1, p0, z0), ++ z0 = svcnot_m (z1, p0, z0)) ++ ++/* ++** cnot_s64_m_untied: ++** movprfx z0, z2 ++** cnot z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_s64_m_untied, svint64_t, ++ z0 = svcnot_s64_m (z2, p0, z1), ++ z0 = svcnot_m (z2, p0, z1)) ++ ++/* ++** cnot_s64_z_tied1: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, \1 ++** cnot z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_s64_z_tied1, svint64_t, ++ z0 = svcnot_s64_z (p0, z0), ++ z0 = svcnot_z (p0, z0)) ++ ++/* ++** cnot_s64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** cnot z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_s64_z_untied, svint64_t, ++ z0 = svcnot_s64_z (p0, z1), ++ z0 = svcnot_z (p0, z1)) ++ ++/* ++** cnot_s64_x_tied1: ++** cnot z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_s64_x_tied1, svint64_t, ++ z0 = svcnot_s64_x (p0, z0), ++ z0 = svcnot_x (p0, z0)) ++ ++/* ++** cnot_s64_x_untied: ++** cnot z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_s64_x_untied, svint64_t, ++ z0 = svcnot_s64_x (p0, z1), ++ z0 = svcnot_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_s8.c +new file mode 100644 +index 000000000..0560f9751 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_s8.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cnot_s8_m_tied12: ++** cnot z0\.b, p0/m, z0\.b ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_s8_m_tied12, svint8_t, ++ z0 = svcnot_s8_m (z0, p0, z0), ++ z0 = svcnot_m (z0, p0, z0)) ++ ++/* ++** cnot_s8_m_tied1: ++** cnot z0\.b, p0/m, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_s8_m_tied1, svint8_t, ++ z0 = svcnot_s8_m (z0, p0, z1), ++ z0 = svcnot_m (z0, p0, z1)) ++ ++/* ++** cnot_s8_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** cnot z0\.b, p0/m, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_s8_m_tied2, svint8_t, ++ z0 = svcnot_s8_m (z1, p0, z0), ++ z0 = svcnot_m (z1, p0, z0)) ++ ++/* ++** cnot_s8_m_untied: ++** movprfx z0, z2 ++** cnot z0\.b, p0/m, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_s8_m_untied, svint8_t, ++ z0 = svcnot_s8_m (z2, p0, z1), ++ z0 = svcnot_m (z2, p0, z1)) ++ ++/* ++** cnot_s8_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.b, p0/z, \1\.b ++** cnot z0\.b, p0/m, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_s8_z_tied1, svint8_t, ++ z0 = svcnot_s8_z (p0, z0), ++ z0 = svcnot_z (p0, z0)) ++ ++/* ++** cnot_s8_z_untied: ++** movprfx z0\.b, p0/z, z1\.b ++** cnot z0\.b, p0/m, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_s8_z_untied, svint8_t, ++ z0 = svcnot_s8_z (p0, z1), ++ z0 = svcnot_z (p0, z1)) ++ ++/* ++** cnot_s8_x_tied1: ++** cnot z0\.b, p0/m, z0\.b ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_s8_x_tied1, svint8_t, ++ z0 = svcnot_s8_x (p0, z0), ++ z0 = svcnot_x (p0, z0)) ++ ++/* ++** cnot_s8_x_untied: ++** cnot z0\.b, p0/m, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_s8_x_untied, svint8_t, ++ z0 = svcnot_s8_x (p0, z1), ++ z0 = svcnot_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_u16.c +new file mode 100644 +index 000000000..7ea9ff71d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_u16.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cnot_u16_m_tied12: ++** cnot z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_u16_m_tied12, svuint16_t, ++ z0 = svcnot_u16_m (z0, p0, z0), ++ z0 = svcnot_m (z0, p0, z0)) ++ ++/* ++** cnot_u16_m_tied1: ++** cnot z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_u16_m_tied1, svuint16_t, ++ z0 = svcnot_u16_m (z0, p0, z1), ++ z0 = svcnot_m (z0, p0, z1)) ++ ++/* ++** cnot_u16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** cnot z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_u16_m_tied2, svuint16_t, ++ z0 = svcnot_u16_m (z1, p0, z0), ++ z0 = svcnot_m (z1, p0, z0)) ++ ++/* ++** cnot_u16_m_untied: ++** movprfx z0, z2 ++** cnot z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_u16_m_untied, svuint16_t, ++ z0 = svcnot_u16_m (z2, p0, z1), ++ z0 = svcnot_m (z2, p0, z1)) ++ ++/* ++** cnot_u16_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.h, p0/z, \1\.h ++** cnot z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_u16_z_tied1, svuint16_t, ++ z0 = svcnot_u16_z (p0, z0), ++ z0 = svcnot_z (p0, z0)) ++ ++/* ++** cnot_u16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** cnot z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_u16_z_untied, svuint16_t, ++ z0 = svcnot_u16_z (p0, z1), ++ z0 = svcnot_z (p0, z1)) ++ ++/* ++** cnot_u16_x_tied1: ++** cnot z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_u16_x_tied1, svuint16_t, ++ z0 = svcnot_u16_x (p0, z0), ++ z0 = svcnot_x (p0, z0)) ++ ++/* ++** cnot_u16_x_untied: ++** cnot z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_u16_x_untied, svuint16_t, ++ z0 = svcnot_u16_x (p0, z1), ++ z0 = svcnot_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_u32.c +new file mode 100644 +index 000000000..972c7751e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_u32.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cnot_u32_m_tied12: ++** cnot z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_u32_m_tied12, svuint32_t, ++ z0 = svcnot_u32_m (z0, p0, z0), ++ z0 = svcnot_m (z0, p0, z0)) ++ ++/* ++** cnot_u32_m_tied1: ++** cnot z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_u32_m_tied1, svuint32_t, ++ z0 = svcnot_u32_m (z0, p0, z1), ++ z0 = svcnot_m (z0, p0, z1)) ++ ++/* ++** cnot_u32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** cnot z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_u32_m_tied2, svuint32_t, ++ z0 = svcnot_u32_m (z1, p0, z0), ++ z0 = svcnot_m (z1, p0, z0)) ++ ++/* ++** cnot_u32_m_untied: ++** movprfx z0, z2 ++** cnot z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_u32_m_untied, svuint32_t, ++ z0 = svcnot_u32_m (z2, p0, z1), ++ z0 = svcnot_m (z2, p0, z1)) ++ ++/* ++** cnot_u32_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, \1\.s ++** cnot z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_u32_z_tied1, svuint32_t, ++ z0 = svcnot_u32_z (p0, z0), ++ z0 = svcnot_z (p0, z0)) ++ ++/* ++** cnot_u32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** cnot z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_u32_z_untied, svuint32_t, ++ z0 = svcnot_u32_z (p0, z1), ++ z0 = svcnot_z (p0, z1)) ++ ++/* ++** cnot_u32_x_tied1: ++** cnot z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_u32_x_tied1, svuint32_t, ++ z0 = svcnot_u32_x (p0, z0), ++ z0 = svcnot_x (p0, z0)) ++ ++/* ++** cnot_u32_x_untied: ++** cnot z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_u32_x_untied, svuint32_t, ++ z0 = svcnot_u32_x (p0, z1), ++ z0 = svcnot_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_u64.c +new file mode 100644 +index 000000000..f25e001c5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_u64.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cnot_u64_m_tied12: ++** cnot z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_u64_m_tied12, svuint64_t, ++ z0 = svcnot_u64_m (z0, p0, z0), ++ z0 = svcnot_m (z0, p0, z0)) ++ ++/* ++** cnot_u64_m_tied1: ++** cnot z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_u64_m_tied1, svuint64_t, ++ z0 = svcnot_u64_m (z0, p0, z1), ++ z0 = svcnot_m (z0, p0, z1)) ++ ++/* ++** cnot_u64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** cnot z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_u64_m_tied2, svuint64_t, ++ z0 = svcnot_u64_m (z1, p0, z0), ++ z0 = svcnot_m (z1, p0, z0)) ++ ++/* ++** cnot_u64_m_untied: ++** movprfx z0, z2 ++** cnot z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_u64_m_untied, svuint64_t, ++ z0 = svcnot_u64_m (z2, p0, z1), ++ z0 = svcnot_m (z2, p0, z1)) ++ ++/* ++** cnot_u64_z_tied1: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, \1 ++** cnot z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_u64_z_tied1, svuint64_t, ++ z0 = svcnot_u64_z (p0, z0), ++ z0 = svcnot_z (p0, z0)) ++ ++/* ++** cnot_u64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** cnot z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_u64_z_untied, svuint64_t, ++ z0 = svcnot_u64_z (p0, z1), ++ z0 = svcnot_z (p0, z1)) ++ ++/* ++** cnot_u64_x_tied1: ++** cnot z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_u64_x_tied1, svuint64_t, ++ z0 = svcnot_u64_x (p0, z0), ++ z0 = svcnot_x (p0, z0)) ++ ++/* ++** cnot_u64_x_untied: ++** cnot z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_u64_x_untied, svuint64_t, ++ z0 = svcnot_u64_x (p0, z1), ++ z0 = svcnot_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_u8.c +new file mode 100644 +index 000000000..e135a7295 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_u8.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cnot_u8_m_tied12: ++** cnot z0\.b, p0/m, z0\.b ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_u8_m_tied12, svuint8_t, ++ z0 = svcnot_u8_m (z0, p0, z0), ++ z0 = svcnot_m (z0, p0, z0)) ++ ++/* ++** cnot_u8_m_tied1: ++** cnot z0\.b, p0/m, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_u8_m_tied1, svuint8_t, ++ z0 = svcnot_u8_m (z0, p0, z1), ++ z0 = svcnot_m (z0, p0, z1)) ++ ++/* ++** cnot_u8_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** cnot z0\.b, p0/m, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_u8_m_tied2, svuint8_t, ++ z0 = svcnot_u8_m (z1, p0, z0), ++ z0 = svcnot_m (z1, p0, z0)) ++ ++/* ++** cnot_u8_m_untied: ++** movprfx z0, z2 ++** cnot z0\.b, p0/m, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_u8_m_untied, svuint8_t, ++ z0 = svcnot_u8_m (z2, p0, z1), ++ z0 = svcnot_m (z2, p0, z1)) ++ ++/* ++** cnot_u8_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.b, p0/z, \1\.b ++** cnot z0\.b, p0/m, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_u8_z_tied1, svuint8_t, ++ z0 = svcnot_u8_z (p0, z0), ++ z0 = svcnot_z (p0, z0)) ++ ++/* ++** cnot_u8_z_untied: ++** movprfx z0\.b, p0/z, z1\.b ++** cnot z0\.b, p0/m, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_u8_z_untied, svuint8_t, ++ z0 = svcnot_u8_z (p0, z1), ++ z0 = svcnot_z (p0, z1)) ++ ++/* ++** cnot_u8_x_tied1: ++** cnot z0\.b, p0/m, z0\.b ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_u8_x_tied1, svuint8_t, ++ z0 = svcnot_u8_x (p0, z0), ++ z0 = svcnot_x (p0, z0)) ++ ++/* ++** cnot_u8_x_untied: ++** cnot z0\.b, p0/m, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (cnot_u8_x_untied, svuint8_t, ++ z0 = svcnot_u8_x (p0, z1), ++ z0 = svcnot_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_bf16.c +new file mode 100644 +index 000000000..d92fbc157 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_bf16.c +@@ -0,0 +1,52 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cnt_bf16_m_tied1: ++** cnt z0\.h, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cnt_bf16_m_tied1, svuint16_t, svbfloat16_t, ++ z0 = svcnt_bf16_m (z0, p0, z4), ++ z0 = svcnt_m (z0, p0, z4)) ++ ++/* ++** cnt_bf16_m_untied: ++** movprfx z0, z1 ++** cnt z0\.h, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cnt_bf16_m_untied, svuint16_t, svbfloat16_t, ++ z0 = svcnt_bf16_m (z1, p0, z4), ++ z0 = svcnt_m (z1, p0, z4)) ++ ++/* ++** cnt_bf16_z: ++** movprfx z0\.h, p0/z, z4\.h ++** cnt z0\.h, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cnt_bf16_z, svuint16_t, svbfloat16_t, ++ z0 = svcnt_bf16_z (p0, z4), ++ z0 = svcnt_z (p0, z4)) ++ ++/* ++** cnt_bf16_x: ++** cnt z0\.h, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cnt_bf16_x, svuint16_t, svbfloat16_t, ++ z0 = svcnt_bf16_x (p0, z4), ++ z0 = svcnt_x (p0, z4)) ++ ++/* ++** ptrue_cnt_bf16_x: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z (ptrue_cnt_bf16_x, svuint16_t, svbfloat16_t, ++ z0 = svcnt_bf16_x (svptrue_b16 (), z4), ++ z0 = svcnt_x (svptrue_b16 (), z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_f16.c +new file mode 100644 +index 000000000..b8061bb80 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_f16.c +@@ -0,0 +1,52 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cnt_f16_m_tied1: ++** cnt z0\.h, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cnt_f16_m_tied1, svuint16_t, svfloat16_t, ++ z0 = svcnt_f16_m (z0, p0, z4), ++ z0 = svcnt_m (z0, p0, z4)) ++ ++/* ++** cnt_f16_m_untied: ++** movprfx z0, z1 ++** cnt z0\.h, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cnt_f16_m_untied, svuint16_t, svfloat16_t, ++ z0 = svcnt_f16_m (z1, p0, z4), ++ z0 = svcnt_m (z1, p0, z4)) ++ ++/* ++** cnt_f16_z: ++** movprfx z0\.h, p0/z, z4\.h ++** cnt z0\.h, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cnt_f16_z, svuint16_t, svfloat16_t, ++ z0 = svcnt_f16_z (p0, z4), ++ z0 = svcnt_z (p0, z4)) ++ ++/* ++** cnt_f16_x: ++** cnt z0\.h, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cnt_f16_x, svuint16_t, svfloat16_t, ++ z0 = svcnt_f16_x (p0, z4), ++ z0 = svcnt_x (p0, z4)) ++ ++/* ++** ptrue_cnt_f16_x: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z (ptrue_cnt_f16_x, svuint16_t, svfloat16_t, ++ z0 = svcnt_f16_x (svptrue_b16 (), z4), ++ z0 = svcnt_x (svptrue_b16 (), z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_f32.c +new file mode 100644 +index 000000000..b9292c977 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_f32.c +@@ -0,0 +1,52 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cnt_f32_m_tied1: ++** cnt z0\.s, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cnt_f32_m_tied1, svuint32_t, svfloat32_t, ++ z0 = svcnt_f32_m (z0, p0, z4), ++ z0 = svcnt_m (z0, p0, z4)) ++ ++/* ++** cnt_f32_m_untied: ++** movprfx z0, z1 ++** cnt z0\.s, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cnt_f32_m_untied, svuint32_t, svfloat32_t, ++ z0 = svcnt_f32_m (z1, p0, z4), ++ z0 = svcnt_m (z1, p0, z4)) ++ ++/* ++** cnt_f32_z: ++** movprfx z0\.s, p0/z, z4\.s ++** cnt z0\.s, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cnt_f32_z, svuint32_t, svfloat32_t, ++ z0 = svcnt_f32_z (p0, z4), ++ z0 = svcnt_z (p0, z4)) ++ ++/* ++** cnt_f32_x: ++** cnt z0\.s, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cnt_f32_x, svuint32_t, svfloat32_t, ++ z0 = svcnt_f32_x (p0, z4), ++ z0 = svcnt_x (p0, z4)) ++ ++/* ++** ptrue_cnt_f32_x: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z (ptrue_cnt_f32_x, svuint32_t, svfloat32_t, ++ z0 = svcnt_f32_x (svptrue_b32 (), z4), ++ z0 = svcnt_x (svptrue_b32 (), z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_f64.c +new file mode 100644 +index 000000000..4976ee467 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_f64.c +@@ -0,0 +1,52 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cnt_f64_m_tied1: ++** cnt z0\.d, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cnt_f64_m_tied1, svuint64_t, svfloat64_t, ++ z0 = svcnt_f64_m (z0, p0, z4), ++ z0 = svcnt_m (z0, p0, z4)) ++ ++/* ++** cnt_f64_m_untied: ++** movprfx z0, z1 ++** cnt z0\.d, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cnt_f64_m_untied, svuint64_t, svfloat64_t, ++ z0 = svcnt_f64_m (z1, p0, z4), ++ z0 = svcnt_m (z1, p0, z4)) ++ ++/* ++** cnt_f64_z: ++** movprfx z0\.d, p0/z, z4\.d ++** cnt z0\.d, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cnt_f64_z, svuint64_t, svfloat64_t, ++ z0 = svcnt_f64_z (p0, z4), ++ z0 = svcnt_z (p0, z4)) ++ ++/* ++** cnt_f64_x: ++** cnt z0\.d, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cnt_f64_x, svuint64_t, svfloat64_t, ++ z0 = svcnt_f64_x (p0, z4), ++ z0 = svcnt_x (p0, z4)) ++ ++/* ++** ptrue_cnt_f64_x: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z (ptrue_cnt_f64_x, svuint64_t, svfloat64_t, ++ z0 = svcnt_f64_x (svptrue_b64 (), z4), ++ z0 = svcnt_x (svptrue_b64 (), z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_s16.c +new file mode 100644 +index 000000000..a8ff8f3d2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_s16.c +@@ -0,0 +1,41 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cnt_s16_m_tied1: ++** cnt z0\.h, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cnt_s16_m_tied1, svuint16_t, svint16_t, ++ z0 = svcnt_s16_m (z0, p0, z4), ++ z0 = svcnt_m (z0, p0, z4)) ++ ++/* ++** cnt_s16_m_untied: ++** movprfx z0, z1 ++** cnt z0\.h, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cnt_s16_m_untied, svuint16_t, svint16_t, ++ z0 = svcnt_s16_m (z1, p0, z4), ++ z0 = svcnt_m (z1, p0, z4)) ++ ++/* ++** cnt_s16_z: ++** movprfx z0\.h, p0/z, z4\.h ++** cnt z0\.h, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cnt_s16_z, svuint16_t, svint16_t, ++ z0 = svcnt_s16_z (p0, z4), ++ z0 = svcnt_z (p0, z4)) ++ ++/* ++** cnt_s16_x: ++** cnt z0\.h, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cnt_s16_x, svuint16_t, svint16_t, ++ z0 = svcnt_s16_x (p0, z4), ++ z0 = svcnt_x (p0, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_s32.c +new file mode 100644 +index 000000000..3d16041f2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_s32.c +@@ -0,0 +1,41 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cnt_s32_m_tied1: ++** cnt z0\.s, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cnt_s32_m_tied1, svuint32_t, svint32_t, ++ z0 = svcnt_s32_m (z0, p0, z4), ++ z0 = svcnt_m (z0, p0, z4)) ++ ++/* ++** cnt_s32_m_untied: ++** movprfx z0, z1 ++** cnt z0\.s, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cnt_s32_m_untied, svuint32_t, svint32_t, ++ z0 = svcnt_s32_m (z1, p0, z4), ++ z0 = svcnt_m (z1, p0, z4)) ++ ++/* ++** cnt_s32_z: ++** movprfx z0\.s, p0/z, z4\.s ++** cnt z0\.s, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cnt_s32_z, svuint32_t, svint32_t, ++ z0 = svcnt_s32_z (p0, z4), ++ z0 = svcnt_z (p0, z4)) ++ ++/* ++** cnt_s32_x: ++** cnt z0\.s, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cnt_s32_x, svuint32_t, svint32_t, ++ z0 = svcnt_s32_x (p0, z4), ++ z0 = svcnt_x (p0, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_s64.c +new file mode 100644 +index 000000000..8c8871ba5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_s64.c +@@ -0,0 +1,41 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cnt_s64_m_tied1: ++** cnt z0\.d, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cnt_s64_m_tied1, svuint64_t, svint64_t, ++ z0 = svcnt_s64_m (z0, p0, z4), ++ z0 = svcnt_m (z0, p0, z4)) ++ ++/* ++** cnt_s64_m_untied: ++** movprfx z0, z1 ++** cnt z0\.d, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cnt_s64_m_untied, svuint64_t, svint64_t, ++ z0 = svcnt_s64_m (z1, p0, z4), ++ z0 = svcnt_m (z1, p0, z4)) ++ ++/* ++** cnt_s64_z: ++** movprfx z0\.d, p0/z, z4\.d ++** cnt z0\.d, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cnt_s64_z, svuint64_t, svint64_t, ++ z0 = svcnt_s64_z (p0, z4), ++ z0 = svcnt_z (p0, z4)) ++ ++/* ++** cnt_s64_x: ++** cnt z0\.d, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cnt_s64_x, svuint64_t, svint64_t, ++ z0 = svcnt_s64_x (p0, z4), ++ z0 = svcnt_x (p0, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_s8.c +new file mode 100644 +index 000000000..8d85c8e51 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_s8.c +@@ -0,0 +1,41 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cnt_s8_m_tied1: ++** cnt z0\.b, p0/m, z4\.b ++** ret ++*/ ++TEST_DUAL_Z (cnt_s8_m_tied1, svuint8_t, svint8_t, ++ z0 = svcnt_s8_m (z0, p0, z4), ++ z0 = svcnt_m (z0, p0, z4)) ++ ++/* ++** cnt_s8_m_untied: ++** movprfx z0, z1 ++** cnt z0\.b, p0/m, z4\.b ++** ret ++*/ ++TEST_DUAL_Z (cnt_s8_m_untied, svuint8_t, svint8_t, ++ z0 = svcnt_s8_m (z1, p0, z4), ++ z0 = svcnt_m (z1, p0, z4)) ++ ++/* ++** cnt_s8_z: ++** movprfx z0\.b, p0/z, z4\.b ++** cnt z0\.b, p0/m, z4\.b ++** ret ++*/ ++TEST_DUAL_Z (cnt_s8_z, svuint8_t, svint8_t, ++ z0 = svcnt_s8_z (p0, z4), ++ z0 = svcnt_z (p0, z4)) ++ ++/* ++** cnt_s8_x: ++** cnt z0\.b, p0/m, z4\.b ++** ret ++*/ ++TEST_DUAL_Z (cnt_s8_x, svuint8_t, svint8_t, ++ z0 = svcnt_s8_x (p0, z4), ++ z0 = svcnt_x (p0, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_u16.c +new file mode 100644 +index 000000000..f173d3108 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_u16.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cnt_u16_m_tied12: ++** cnt z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (cnt_u16_m_tied12, svuint16_t, ++ z0 = svcnt_u16_m (z0, p0, z0), ++ z0 = svcnt_m (z0, p0, z0)) ++ ++/* ++** cnt_u16_m_tied1: ++** cnt z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (cnt_u16_m_tied1, svuint16_t, ++ z0 = svcnt_u16_m (z0, p0, z1), ++ z0 = svcnt_m (z0, p0, z1)) ++ ++/* ++** cnt_u16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** cnt z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (cnt_u16_m_tied2, svuint16_t, ++ z0 = svcnt_u16_m (z1, p0, z0), ++ z0 = svcnt_m (z1, p0, z0)) ++ ++/* ++** cnt_u16_m_untied: ++** movprfx z0, z2 ++** cnt z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (cnt_u16_m_untied, svuint16_t, ++ z0 = svcnt_u16_m (z2, p0, z1), ++ z0 = svcnt_m (z2, p0, z1)) ++ ++/* ++** cnt_u16_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.h, p0/z, \1\.h ++** cnt z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (cnt_u16_z_tied1, svuint16_t, ++ z0 = svcnt_u16_z (p0, z0), ++ z0 = svcnt_z (p0, z0)) ++ ++/* ++** cnt_u16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** cnt z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (cnt_u16_z_untied, svuint16_t, ++ z0 = svcnt_u16_z (p0, z1), ++ z0 = svcnt_z (p0, z1)) ++ ++/* ++** cnt_u16_x_tied1: ++** cnt z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (cnt_u16_x_tied1, svuint16_t, ++ z0 = svcnt_u16_x (p0, z0), ++ z0 = svcnt_x (p0, z0)) ++ ++/* ++** cnt_u16_x_untied: ++** cnt z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (cnt_u16_x_untied, svuint16_t, ++ z0 = svcnt_u16_x (p0, z1), ++ z0 = svcnt_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_u32.c +new file mode 100644 +index 000000000..11969a6b6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_u32.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cnt_u32_m_tied12: ++** cnt z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (cnt_u32_m_tied12, svuint32_t, ++ z0 = svcnt_u32_m (z0, p0, z0), ++ z0 = svcnt_m (z0, p0, z0)) ++ ++/* ++** cnt_u32_m_tied1: ++** cnt z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (cnt_u32_m_tied1, svuint32_t, ++ z0 = svcnt_u32_m (z0, p0, z1), ++ z0 = svcnt_m (z0, p0, z1)) ++ ++/* ++** cnt_u32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** cnt z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (cnt_u32_m_tied2, svuint32_t, ++ z0 = svcnt_u32_m (z1, p0, z0), ++ z0 = svcnt_m (z1, p0, z0)) ++ ++/* ++** cnt_u32_m_untied: ++** movprfx z0, z2 ++** cnt z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (cnt_u32_m_untied, svuint32_t, ++ z0 = svcnt_u32_m (z2, p0, z1), ++ z0 = svcnt_m (z2, p0, z1)) ++ ++/* ++** cnt_u32_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, \1\.s ++** cnt z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (cnt_u32_z_tied1, svuint32_t, ++ z0 = svcnt_u32_z (p0, z0), ++ z0 = svcnt_z (p0, z0)) ++ ++/* ++** cnt_u32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** cnt z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (cnt_u32_z_untied, svuint32_t, ++ z0 = svcnt_u32_z (p0, z1), ++ z0 = svcnt_z (p0, z1)) ++ ++/* ++** cnt_u32_x_tied1: ++** cnt z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (cnt_u32_x_tied1, svuint32_t, ++ z0 = svcnt_u32_x (p0, z0), ++ z0 = svcnt_x (p0, z0)) ++ ++/* ++** cnt_u32_x_untied: ++** cnt z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (cnt_u32_x_untied, svuint32_t, ++ z0 = svcnt_u32_x (p0, z1), ++ z0 = svcnt_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_u64.c +new file mode 100644 +index 000000000..4eb69ea84 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_u64.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cnt_u64_m_tied12: ++** cnt z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (cnt_u64_m_tied12, svuint64_t, ++ z0 = svcnt_u64_m (z0, p0, z0), ++ z0 = svcnt_m (z0, p0, z0)) ++ ++/* ++** cnt_u64_m_tied1: ++** cnt z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (cnt_u64_m_tied1, svuint64_t, ++ z0 = svcnt_u64_m (z0, p0, z1), ++ z0 = svcnt_m (z0, p0, z1)) ++ ++/* ++** cnt_u64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** cnt z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (cnt_u64_m_tied2, svuint64_t, ++ z0 = svcnt_u64_m (z1, p0, z0), ++ z0 = svcnt_m (z1, p0, z0)) ++ ++/* ++** cnt_u64_m_untied: ++** movprfx z0, z2 ++** cnt z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (cnt_u64_m_untied, svuint64_t, ++ z0 = svcnt_u64_m (z2, p0, z1), ++ z0 = svcnt_m (z2, p0, z1)) ++ ++/* ++** cnt_u64_z_tied1: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, \1 ++** cnt z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (cnt_u64_z_tied1, svuint64_t, ++ z0 = svcnt_u64_z (p0, z0), ++ z0 = svcnt_z (p0, z0)) ++ ++/* ++** cnt_u64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** cnt z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (cnt_u64_z_untied, svuint64_t, ++ z0 = svcnt_u64_z (p0, z1), ++ z0 = svcnt_z (p0, z1)) ++ ++/* ++** cnt_u64_x_tied1: ++** cnt z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (cnt_u64_x_tied1, svuint64_t, ++ z0 = svcnt_u64_x (p0, z0), ++ z0 = svcnt_x (p0, z0)) ++ ++/* ++** cnt_u64_x_untied: ++** cnt z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (cnt_u64_x_untied, svuint64_t, ++ z0 = svcnt_u64_x (p0, z1), ++ z0 = svcnt_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_u8.c +new file mode 100644 +index 000000000..30e798302 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_u8.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cnt_u8_m_tied12: ++** cnt z0\.b, p0/m, z0\.b ++** ret ++*/ ++TEST_UNIFORM_Z (cnt_u8_m_tied12, svuint8_t, ++ z0 = svcnt_u8_m (z0, p0, z0), ++ z0 = svcnt_m (z0, p0, z0)) ++ ++/* ++** cnt_u8_m_tied1: ++** cnt z0\.b, p0/m, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (cnt_u8_m_tied1, svuint8_t, ++ z0 = svcnt_u8_m (z0, p0, z1), ++ z0 = svcnt_m (z0, p0, z1)) ++ ++/* ++** cnt_u8_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** cnt z0\.b, p0/m, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (cnt_u8_m_tied2, svuint8_t, ++ z0 = svcnt_u8_m (z1, p0, z0), ++ z0 = svcnt_m (z1, p0, z0)) ++ ++/* ++** cnt_u8_m_untied: ++** movprfx z0, z2 ++** cnt z0\.b, p0/m, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (cnt_u8_m_untied, svuint8_t, ++ z0 = svcnt_u8_m (z2, p0, z1), ++ z0 = svcnt_m (z2, p0, z1)) ++ ++/* ++** cnt_u8_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.b, p0/z, \1\.b ++** cnt z0\.b, p0/m, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (cnt_u8_z_tied1, svuint8_t, ++ z0 = svcnt_u8_z (p0, z0), ++ z0 = svcnt_z (p0, z0)) ++ ++/* ++** cnt_u8_z_untied: ++** movprfx z0\.b, p0/z, z1\.b ++** cnt z0\.b, p0/m, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (cnt_u8_z_untied, svuint8_t, ++ z0 = svcnt_u8_z (p0, z1), ++ z0 = svcnt_z (p0, z1)) ++ ++/* ++** cnt_u8_x_tied1: ++** cnt z0\.b, p0/m, z0\.b ++** ret ++*/ ++TEST_UNIFORM_Z (cnt_u8_x_tied1, svuint8_t, ++ z0 = svcnt_u8_x (p0, z0), ++ z0 = svcnt_x (p0, z0)) ++ ++/* ++** cnt_u8_x_untied: ++** cnt z0\.b, p0/m, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (cnt_u8_x_untied, svuint8_t, ++ z0 = svcnt_u8_x (p0, z1), ++ z0 = svcnt_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntb.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntb.c +new file mode 100644 +index 000000000..8b8fe8e4f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntb.c +@@ -0,0 +1,280 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cntb_1: ++** cntb x0 ++** ret ++*/ ++PROTO (cntb_1, uint64_t, ()) { return svcntb (); } ++ ++/* ++** cntb_2: ++** cntb x0, all, mul #2 ++** ret ++*/ ++PROTO (cntb_2, uint64_t, ()) { return svcntb () * 2; } ++ ++/* ++** cntb_3: ++** cntb x0, all, mul #3 ++** ret ++*/ ++PROTO (cntb_3, uint64_t, ()) { return svcntb () * 3; } ++ ++/* ++** cntb_4: ++** cntb x0, all, mul #4 ++** ret ++*/ ++PROTO (cntb_4, uint64_t, ()) { return svcntb () * 4; } ++ ++/* ++** cntb_8: ++** cntb x0, all, mul #8 ++** ret ++*/ ++PROTO (cntb_8, uint64_t, ()) { return svcntb () * 8; } ++ ++/* ++** cntb_15: ++** cntb x0, all, mul #15 ++** ret ++*/ ++PROTO (cntb_15, uint64_t, ()) { return svcntb () * 15; } ++ ++/* ++** cntb_16: ++** cntb x0, all, mul #16 ++** ret ++*/ ++PROTO (cntb_16, uint64_t, ()) { return svcntb () * 16; } ++ ++/* Other sequences would be OK. */ ++/* ++** cntb_17: ++** cntb x0, all, mul #16 ++** incb x0 ++** ret ++*/ ++PROTO (cntb_17, uint64_t, ()) { return svcntb () * 17; } ++ ++/* ++** cntb_32: ++** cntd (x[0-9]+) ++** lsl x0, \1, 8 ++** ret ++*/ ++PROTO (cntb_32, uint64_t, ()) { return svcntb () * 32; } ++ ++/* Other sequences would be OK. */ ++/* ++** cntb_33: ++** cntb (x[0-9]+) ++** lsl x0, \1, 5 ++** incb x0 ++** ret ++*/ ++PROTO (cntb_33, uint64_t, ()) { return svcntb () * 33; } ++ ++/* ++** cntb_64: ++** cntd (x[0-9]+) ++** lsl x0, \1, 9 ++** ret ++*/ ++PROTO (cntb_64, uint64_t, ()) { return svcntb () * 64; } ++ ++/* ++** cntb_128: ++** cntd (x[0-9]+) ++** lsl x0, \1, 10 ++** ret ++*/ ++PROTO (cntb_128, uint64_t, ()) { return svcntb () * 128; } ++ ++/* Other sequences would be OK. */ ++/* ++** cntb_129: ++** cntb (x[0-9]+) ++** lsl x0, \1, 7 ++** incb x0 ++** ret ++*/ ++PROTO (cntb_129, uint64_t, ()) { return svcntb () * 129; } ++ ++/* ++** cntb_m1: ++** cntb (x[0-9]+) ++** neg x0, \1 ++** ret ++*/ ++PROTO (cntb_m1, uint64_t, ()) { return -svcntb (); } ++ ++/* ++** cntb_m13: ++** cntb (x[0-9]+), all, mul #13 ++** neg x0, \1 ++** ret ++*/ ++PROTO (cntb_m13, uint64_t, ()) { return -svcntb () * 13; } ++ ++/* ++** cntb_m15: ++** cntb (x[0-9]+), all, mul #15 ++** neg x0, \1 ++** ret ++*/ ++PROTO (cntb_m15, uint64_t, ()) { return -svcntb () * 15; } ++ ++/* ++** cntb_m16: ++** cntb (x[0-9]+), all, mul #16 ++** neg x0, \1 ++** ret ++*/ ++PROTO (cntb_m16, uint64_t, ()) { return -svcntb () * 16; } ++ ++/* Other sequences would be OK. */ ++/* ++** cntb_m17: ++** cntb x0, all, mul #16 ++** incb x0 ++** neg x0, x0 ++** ret ++*/ ++PROTO (cntb_m17, uint64_t, ()) { return -svcntb () * 17; } ++ ++/* ++** incb_1: ++** incb x0 ++** ret ++*/ ++PROTO (incb_1, uint64_t, (uint64_t x0)) { return x0 + svcntb (); } ++ ++/* ++** incb_2: ++** incb x0, all, mul #2 ++** ret ++*/ ++PROTO (incb_2, uint64_t, (uint64_t x0)) { return x0 + svcntb () * 2; } ++ ++/* ++** incb_3: ++** incb x0, all, mul #3 ++** ret ++*/ ++PROTO (incb_3, uint64_t, (uint64_t x0)) { return x0 + svcntb () * 3; } ++ ++/* ++** incb_4: ++** incb x0, all, mul #4 ++** ret ++*/ ++PROTO (incb_4, uint64_t, (uint64_t x0)) { return x0 + svcntb () * 4; } ++ ++/* ++** incb_8: ++** incb x0, all, mul #8 ++** ret ++*/ ++PROTO (incb_8, uint64_t, (uint64_t x0)) { return x0 + svcntb () * 8; } ++ ++/* ++** incb_15: ++** incb x0, all, mul #15 ++** ret ++*/ ++PROTO (incb_15, uint64_t, (uint64_t x0)) { return x0 + svcntb () * 15; } ++ ++/* ++** incb_16: ++** incb x0, all, mul #16 ++** ret ++*/ ++PROTO (incb_16, uint64_t, (uint64_t x0)) { return x0 + svcntb () * 16; } ++ ++/* ++** incb_17: ++** addvl x0, x0, #17 ++** ret ++*/ ++PROTO (incb_17, uint64_t, (uint64_t x0)) { return x0 + svcntb () * 17; } ++ ++/* ++** incb_31: ++** addvl x0, x0, #31 ++** ret ++*/ ++PROTO (incb_31, uint64_t, (uint64_t x0)) { return x0 + svcntb () * 31; } ++ ++/* ++** decb_1: ++** decb x0 ++** ret ++*/ ++PROTO (decb_1, uint64_t, (uint64_t x0)) { return x0 - svcntb (); } ++ ++/* ++** decb_2: ++** decb x0, all, mul #2 ++** ret ++*/ ++PROTO (decb_2, uint64_t, (uint64_t x0)) { return x0 - svcntb () * 2; } ++ ++/* ++** decb_3: ++** decb x0, all, mul #3 ++** ret ++*/ ++PROTO (decb_3, uint64_t, (uint64_t x0)) { return x0 - svcntb () * 3; } ++ ++/* ++** decb_4: ++** decb x0, all, mul #4 ++** ret ++*/ ++PROTO (decb_4, uint64_t, (uint64_t x0)) { return x0 - svcntb () * 4; } ++ ++/* ++** decb_8: ++** decb x0, all, mul #8 ++** ret ++*/ ++PROTO (decb_8, uint64_t, (uint64_t x0)) { return x0 - svcntb () * 8; } ++ ++/* ++** decb_15: ++** decb x0, all, mul #15 ++** ret ++*/ ++PROTO (decb_15, uint64_t, (uint64_t x0)) { return x0 - svcntb () * 15; } ++ ++/* ++** decb_16: ++** decb x0, all, mul #16 ++** ret ++*/ ++PROTO (decb_16, uint64_t, (uint64_t x0)) { return x0 - svcntb () * 16; } ++ ++/* ++** decb_17: ++** addvl x0, x0, #-17 ++** ret ++*/ ++PROTO (decb_17, uint64_t, (uint64_t x0)) { return x0 - svcntb () * 17; } ++ ++/* ++** decb_31: ++** addvl x0, x0, #-31 ++** ret ++*/ ++PROTO (decb_31, uint64_t, (uint64_t x0)) { return x0 - svcntb () * 31; } ++ ++/* ++** decb_32: ++** addvl x0, x0, #-32 ++** ret ++*/ ++PROTO (decb_32, uint64_t, (uint64_t x0)) { return x0 - svcntb () * 32; } +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntb_pat.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntb_pat.c +new file mode 100644 +index 000000000..effc5668d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntb_pat.c +@@ -0,0 +1,432 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cntb_pow2: ++** cntb x0, pow2 ++** ret ++*/ ++PROTO (cntb_pow2, uint64_t, ()) { return svcntb_pat (SV_POW2); } ++ ++/* ++** cntb_vl1: ++** mov x0, #?1 ++** ret ++*/ ++PROTO (cntb_vl1, uint64_t, ()) { return svcntb_pat (SV_VL1); } ++ ++/* ++** cntb_vl2: ++** mov x0, #?2 ++** ret ++*/ ++PROTO (cntb_vl2, uint64_t, ()) { return svcntb_pat (SV_VL2); } ++ ++/* ++** cntb_vl3: ++** mov x0, #?3 ++** ret ++*/ ++PROTO (cntb_vl3, uint64_t, ()) { return svcntb_pat (SV_VL3); } ++ ++/* ++** cntb_vl4: ++** mov x0, #?4 ++** ret ++*/ ++PROTO (cntb_vl4, uint64_t, ()) { return svcntb_pat (SV_VL4); } ++ ++/* ++** cntb_vl5: ++** mov x0, #?5 ++** ret ++*/ ++PROTO (cntb_vl5, uint64_t, ()) { return svcntb_pat (SV_VL5); } ++ ++/* ++** cntb_vl6: ++** mov x0, #?6 ++** ret ++*/ ++PROTO (cntb_vl6, uint64_t, ()) { return svcntb_pat (SV_VL6); } ++ ++/* ++** cntb_vl7: ++** mov x0, #?7 ++** ret ++*/ ++PROTO (cntb_vl7, uint64_t, ()) { return svcntb_pat (SV_VL7); } ++ ++/* ++** cntb_vl8: ++** mov x0, #?8 ++** ret ++*/ ++PROTO (cntb_vl8, uint64_t, ()) { return svcntb_pat (SV_VL8); } ++ ++/* ++** cntb_vl16: ++** mov x0, #?16 ++** ret ++*/ ++PROTO (cntb_vl16, uint64_t, ()) { return svcntb_pat (SV_VL16); } ++ ++/* ++** cntb_vl32: ++** cntb x0, vl32 ++** ret ++*/ ++PROTO (cntb_vl32, uint64_t, ()) { return svcntb_pat (SV_VL32); } ++ ++/* ++** cntb_vl64: ++** cntb x0, vl64 ++** ret ++*/ ++PROTO (cntb_vl64, uint64_t, ()) { return svcntb_pat (SV_VL64); } ++ ++/* ++** cntb_vl128: ++** cntb x0, vl128 ++** ret ++*/ ++PROTO (cntb_vl128, uint64_t, ()) { return svcntb_pat (SV_VL128); } ++ ++/* ++** cntb_vl256: ++** cntb x0, vl256 ++** ret ++*/ ++PROTO (cntb_vl256, uint64_t, ()) { return svcntb_pat (SV_VL256); } ++ ++/* ++** cntb_mul3: ++** cntb x0, mul3 ++** ret ++*/ ++PROTO (cntb_mul3, uint64_t, ()) { return svcntb_pat (SV_MUL3); } ++ ++/* ++** cntb_mul4: ++** cntb x0, mul4 ++** ret ++*/ ++PROTO (cntb_mul4, uint64_t, ()) { return svcntb_pat (SV_MUL4); } ++ ++/* ++** cntb_all: ++** cntb x0 ++** ret ++*/ ++PROTO (cntb_all, uint64_t, ()) { return svcntb_pat (SV_ALL); } ++ ++/* ++** incb_32_pow2: ++** incb x0, pow2 ++** ret ++*/ ++PROTO (incb_32_pow2, uint32_t, (uint32_t w0)) { return w0 + svcntb_pat (SV_POW2); } ++ ++/* ++** incb_32_vl1: ++** add w0, w0, #?1 ++** ret ++*/ ++PROTO (incb_32_vl1, uint32_t, (uint32_t w0)) { return w0 + svcntb_pat (SV_VL1); } ++ ++/* ++** incb_32_vl2: ++** add w0, w0, #?2 ++** ret ++*/ ++PROTO (incb_32_vl2, uint32_t, (uint32_t w0)) { return w0 + svcntb_pat (SV_VL2); } ++ ++/* ++** incb_32_vl3: ++** add w0, w0, #?3 ++** ret ++*/ ++PROTO (incb_32_vl3, uint32_t, (uint32_t w0)) { return w0 + svcntb_pat (SV_VL3); } ++ ++/* ++** incb_32_vl4: ++** add w0, w0, #?4 ++** ret ++*/ ++PROTO (incb_32_vl4, uint32_t, (uint32_t w0)) { return w0 + svcntb_pat (SV_VL4); } ++ ++/* ++** incb_32_vl5: ++** add w0, w0, #?5 ++** ret ++*/ ++PROTO (incb_32_vl5, uint32_t, (uint32_t w0)) { return w0 + svcntb_pat (SV_VL5); } ++ ++/* ++** incb_32_vl6: ++** add w0, w0, #?6 ++** ret ++*/ ++PROTO (incb_32_vl6, uint32_t, (uint32_t w0)) { return w0 + svcntb_pat (SV_VL6); } ++ ++/* ++** incb_32_vl7: ++** add w0, w0, #?7 ++** ret ++*/ ++PROTO (incb_32_vl7, uint32_t, (uint32_t w0)) { return w0 + svcntb_pat (SV_VL7); } ++ ++/* ++** incb_32_vl8: ++** add w0, w0, #?8 ++** ret ++*/ ++PROTO (incb_32_vl8, uint32_t, (uint32_t w0)) { return w0 + svcntb_pat (SV_VL8); } ++ ++/* ++** incb_32_vl16: ++** add w0, w0, #?16 ++** ret ++*/ ++PROTO (incb_32_vl16, uint32_t, (uint32_t w0)) { return w0 + svcntb_pat (SV_VL16); } ++ ++/* ++** incb_32_vl32: ++** incb x0, vl32 ++** ret ++*/ ++PROTO (incb_32_vl32, uint32_t, (uint32_t w0)) { return w0 + svcntb_pat (SV_VL32); } ++ ++/* ++** incb_32_vl64: ++** incb x0, vl64 ++** ret ++*/ ++PROTO (incb_32_vl64, uint32_t, (uint32_t w0)) { return w0 + svcntb_pat (SV_VL64); } ++ ++/* ++** incb_32_vl128: ++** incb x0, vl128 ++** ret ++*/ ++PROTO (incb_32_vl128, uint32_t, (uint32_t w0)) { return w0 + svcntb_pat (SV_VL128); } ++ ++/* ++** incb_32_vl256: ++** incb x0, vl256 ++** ret ++*/ ++PROTO (incb_32_vl256, uint32_t, (uint32_t w0)) { return w0 + svcntb_pat (SV_VL256); } ++ ++/* ++** incb_32_mul3: ++** incb x0, mul3 ++** ret ++*/ ++PROTO (incb_32_mul3, uint32_t, (uint32_t w0)) { return w0 + svcntb_pat (SV_MUL3); } ++ ++/* ++** incb_32_mul4: ++** incb x0, mul4 ++** ret ++*/ ++PROTO (incb_32_mul4, uint32_t, (uint32_t w0)) { return w0 + svcntb_pat (SV_MUL4); } ++ ++/* ++** incb_32_all: ++** incb x0 ++** ret ++*/ ++PROTO (incb_32_all, uint32_t, (uint32_t w0)) { return w0 + svcntb_pat (SV_ALL); } ++ ++/* ++** incb_64_pow2: ++** incb x0, pow2 ++** ret ++*/ ++PROTO (incb_64_pow2, uint64_t, (uint64_t x0)) { return x0 + svcntb_pat (SV_POW2); } ++ ++/* ++** incb_64_all: ++** incb x0 ++** ret ++*/ ++PROTO (incb_64_all, uint64_t, (uint64_t x0)) { return x0 + svcntb_pat (SV_ALL); } ++ ++/* ++** decb_32_pow2: ++** decb x0, pow2 ++** ret ++*/ ++PROTO (decb_32_pow2, uint32_t, (uint32_t w0)) { return w0 - svcntb_pat (SV_POW2); } ++ ++/* ++** decb_32_vl1: ++** sub w0, w0, #?1 ++** ret ++*/ ++PROTO (decb_32_vl1, uint32_t, (uint32_t w0)) { return w0 - svcntb_pat (SV_VL1); } ++ ++/* ++** decb_32_vl2: ++** sub w0, w0, #?2 ++** ret ++*/ ++PROTO (decb_32_vl2, uint32_t, (uint32_t w0)) { return w0 - svcntb_pat (SV_VL2); } ++ ++/* ++** decb_32_vl3: ++** sub w0, w0, #?3 ++** ret ++*/ ++PROTO (decb_32_vl3, uint32_t, (uint32_t w0)) { return w0 - svcntb_pat (SV_VL3); } ++ ++/* ++** decb_32_vl4: ++** sub w0, w0, #?4 ++** ret ++*/ ++PROTO (decb_32_vl4, uint32_t, (uint32_t w0)) { return w0 - svcntb_pat (SV_VL4); } ++ ++/* ++** decb_32_vl5: ++** sub w0, w0, #?5 ++** ret ++*/ ++PROTO (decb_32_vl5, uint32_t, (uint32_t w0)) { return w0 - svcntb_pat (SV_VL5); } ++ ++/* ++** decb_32_vl6: ++** sub w0, w0, #?6 ++** ret ++*/ ++PROTO (decb_32_vl6, uint32_t, (uint32_t w0)) { return w0 - svcntb_pat (SV_VL6); } ++ ++/* ++** decb_32_vl7: ++** sub w0, w0, #?7 ++** ret ++*/ ++PROTO (decb_32_vl7, uint32_t, (uint32_t w0)) { return w0 - svcntb_pat (SV_VL7); } ++ ++/* ++** decb_32_vl8: ++** sub w0, w0, #?8 ++** ret ++*/ ++PROTO (decb_32_vl8, uint32_t, (uint32_t w0)) { return w0 - svcntb_pat (SV_VL8); } ++ ++/* ++** decb_32_vl16: ++** sub w0, w0, #?16 ++** ret ++*/ ++PROTO (decb_32_vl16, uint32_t, (uint32_t w0)) { return w0 - svcntb_pat (SV_VL16); } ++ ++/* ++** decb_32_vl32: ++** decb x0, vl32 ++** ret ++*/ ++PROTO (decb_32_vl32, uint32_t, (uint32_t w0)) { return w0 - svcntb_pat (SV_VL32); } ++ ++/* ++** decb_32_vl64: ++** decb x0, vl64 ++** ret ++*/ ++PROTO (decb_32_vl64, uint32_t, (uint32_t w0)) { return w0 - svcntb_pat (SV_VL64); } ++ ++/* ++** decb_32_vl128: ++** decb x0, vl128 ++** ret ++*/ ++PROTO (decb_32_vl128, uint32_t, (uint32_t w0)) { return w0 - svcntb_pat (SV_VL128); } ++ ++/* ++** decb_32_vl256: ++** decb x0, vl256 ++** ret ++*/ ++PROTO (decb_32_vl256, uint32_t, (uint32_t w0)) { return w0 - svcntb_pat (SV_VL256); } ++ ++/* ++** decb_32_mul3: ++** decb x0, mul3 ++** ret ++*/ ++PROTO (decb_32_mul3, uint32_t, (uint32_t w0)) { return w0 - svcntb_pat (SV_MUL3); } ++ ++/* ++** decb_32_mul4: ++** decb x0, mul4 ++** ret ++*/ ++PROTO (decb_32_mul4, uint32_t, (uint32_t w0)) { return w0 - svcntb_pat (SV_MUL4); } ++ ++/* ++** decb_32_all: ++** decb x0 ++** ret ++*/ ++PROTO (decb_32_all, uint32_t, (uint32_t w0)) { return w0 - svcntb_pat (SV_ALL); } ++ ++/* ++** decb_64_pow2: ++** decb x0, pow2 ++** ret ++*/ ++PROTO (decb_64_pow2, uint64_t, (uint64_t x0)) { return x0 - svcntb_pat (SV_POW2); } ++ ++/* ++** decb_64_all: ++** decb x0 ++** ret ++*/ ++PROTO (decb_64_all, uint64_t, (uint64_t x0)) { return x0 - svcntb_pat (SV_ALL); } ++ ++/* ++** incb_s8_pow2_z0: ++** cntb x([0-9]+), pow2 ++** mov (z[0-9]+\.b), w\1 ++** add z0\.b, (z0\.b, \2|\2, z0\.b) ++** ret ++*/ ++TEST_UNIFORM_Z (incb_s8_pow2_z0, svint8_t, ++ z0 = svadd_n_s8_x (svptrue_b8 (), z0, svcntb_pat (SV_POW2)), ++ z0 = svadd_x (svptrue_b8 (), z0, svcntb_pat (SV_POW2))); ++ ++/* ++** incb_s8_pow2_z1: ++** cntb x([0-9]+), pow2 ++** mov (z[0-9]+\.b), w\1 ++** add z0\.b, (z1\.b, \2|\2, z1\.b) ++** ret ++*/ ++TEST_UNIFORM_Z (incb_s8_pow2_z1, svint8_t, ++ z0 = svadd_n_s8_x (svptrue_b8 (), z1, svcntb_pat (SV_POW2)), ++ z0 = svadd_x (svptrue_b8 (), z1, svcntb_pat (SV_POW2))); ++ ++/* ++** decb_s8_pow2_z0: ++** cntb x([0-9]+), pow2 ++** mov (z[0-9]+\.b), w\1 ++** sub z0\.b, z0\.b, \2 ++** ret ++*/ ++TEST_UNIFORM_Z (decb_s8_pow2_z0, svint8_t, ++ z0 = svsub_n_s8_x (svptrue_b8 (), z0, svcntb_pat (SV_POW2)), ++ z0 = svsub_x (svptrue_b8 (), z0, svcntb_pat (SV_POW2))); ++ ++/* ++** decb_s8_pow2_z1: ++** cntb x([0-9]+), pow2 ++** mov (z[0-9]+\.b), w\1 ++** sub z0\.b, z1\.b, \2 ++** ret ++*/ ++TEST_UNIFORM_Z (decb_s8_pow2_z1, svint8_t, ++ z0 = svsub_n_s8_x (svptrue_b8 (), z1, svcntb_pat (SV_POW2)), ++ z0 = svsub_x (svptrue_b8 (), z1, svcntb_pat (SV_POW2))); +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntd.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntd.c +new file mode 100644 +index 000000000..0d0ed4849 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntd.c +@@ -0,0 +1,278 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cntd_1: ++** cntd x0 ++** ret ++*/ ++PROTO (cntd_1, uint64_t, ()) { return svcntd (); } ++ ++/* ++** cntd_2: ++** cntw x0 ++** ret ++*/ ++PROTO (cntd_2, uint64_t, ()) { return svcntd () * 2; } ++ ++/* ++** cntd_3: ++** cntd x0, all, mul #3 ++** ret ++*/ ++PROTO (cntd_3, uint64_t, ()) { return svcntd () * 3; } ++ ++/* ++** cntd_4: ++** cnth x0 ++** ret ++*/ ++PROTO (cntd_4, uint64_t, ()) { return svcntd () * 4; } ++ ++/* ++** cntd_8: ++** cntb x0 ++** ret ++*/ ++PROTO (cntd_8, uint64_t, ()) { return svcntd () * 8; } ++ ++/* ++** cntd_15: ++** cntd x0, all, mul #15 ++** ret ++*/ ++PROTO (cntd_15, uint64_t, ()) { return svcntd () * 15; } ++ ++/* ++** cntd_16: ++** cntb x0, all, mul #2 ++** ret ++*/ ++PROTO (cntd_16, uint64_t, ()) { return svcntd () * 16; } ++ ++/* Other sequences would be OK. */ ++/* ++** cntd_17: ++** cntb x0, all, mul #2 ++** incd x0 ++** ret ++*/ ++PROTO (cntd_17, uint64_t, ()) { return svcntd () * 17; } ++ ++/* ++** cntd_32: ++** cntb x0, all, mul #4 ++** ret ++*/ ++PROTO (cntd_32, uint64_t, ()) { return svcntd () * 32; } ++ ++/* ++** cntd_64: ++** cntb x0, all, mul #8 ++** ret ++*/ ++PROTO (cntd_64, uint64_t, ()) { return svcntd () * 64; } ++ ++/* ++** cntd_128: ++** cntb x0, all, mul #16 ++** ret ++*/ ++PROTO (cntd_128, uint64_t, ()) { return svcntd () * 128; } ++ ++/* ++** cntd_m1: ++** cntd (x[0-9]+) ++** neg x0, \1 ++** ret ++*/ ++PROTO (cntd_m1, uint64_t, ()) { return -svcntd (); } ++ ++/* ++** cntd_m13: ++** cntd (x[0-9]+), all, mul #13 ++** neg x0, \1 ++** ret ++*/ ++PROTO (cntd_m13, uint64_t, ()) { return -svcntd () * 13; } ++ ++/* ++** cntd_m15: ++** cntd (x[0-9]+), all, mul #15 ++** neg x0, \1 ++** ret ++*/ ++PROTO (cntd_m15, uint64_t, ()) { return -svcntd () * 15; } ++ ++/* ++** cntd_m16: ++** cntb (x[0-9]+), all, mul #2 ++** neg x0, \1 ++** ret ++*/ ++PROTO (cntd_m16, uint64_t, ()) { return -svcntd () * 16; } ++ ++/* Other sequences would be OK. */ ++/* ++** cntd_m17: ++** cntb x0, all, mul #2 ++** incd x0 ++** neg x0, x0 ++** ret ++*/ ++PROTO (cntd_m17, uint64_t, ()) { return -svcntd () * 17; } ++ ++/* ++** incd_1: ++** incd x0 ++** ret ++*/ ++PROTO (incd_1, uint64_t, (uint64_t x0)) { return x0 + svcntd (); } ++ ++/* ++** incd_2: ++** incw x0 ++** ret ++*/ ++PROTO (incd_2, uint64_t, (uint64_t x0)) { return x0 + svcntd () * 2; } ++ ++/* ++** incd_3: ++** incd x0, all, mul #3 ++** ret ++*/ ++PROTO (incd_3, uint64_t, (uint64_t x0)) { return x0 + svcntd () * 3; } ++ ++/* ++** incd_4: ++** inch x0 ++** ret ++*/ ++PROTO (incd_4, uint64_t, (uint64_t x0)) { return x0 + svcntd () * 4; } ++ ++/* ++** incd_7: ++** incd x0, all, mul #7 ++** ret ++*/ ++PROTO (incd_7, uint64_t, (uint64_t x0)) { return x0 + svcntd () * 7; } ++ ++/* ++** incd_8: ++** incb x0 ++** ret ++*/ ++PROTO (incd_8, uint64_t, (uint64_t x0)) { return x0 + svcntd () * 8; } ++ ++/* ++** incd_9: ++** incd x0, all, mul #9 ++** ret ++*/ ++PROTO (incd_9, uint64_t, (uint64_t x0)) { return x0 + svcntd () * 9; } ++ ++/* ++** incd_15: ++** incd x0, all, mul #15 ++** ret ++*/ ++PROTO (incd_15, uint64_t, (uint64_t x0)) { return x0 + svcntd () * 15; } ++ ++/* ++** incd_16: ++** incb x0, all, mul #2 ++** ret ++*/ ++PROTO (incd_16, uint64_t, (uint64_t x0)) { return x0 + svcntd () * 16; } ++ ++/* ++** incd_18: ++** incw x0, all, mul #9 ++** ret ++*/ ++PROTO (incd_18, uint64_t, (uint64_t x0)) { return x0 + svcntd () * 18; } ++ ++/* ++** incd_30: ++** incw x0, all, mul #15 ++** ret ++*/ ++PROTO (incd_30, uint64_t, (uint64_t x0)) { return x0 + svcntd () * 30; } ++ ++/* ++** decd_1: ++** decd x0 ++** ret ++*/ ++PROTO (decd_1, uint64_t, (uint64_t x0)) { return x0 - svcntd (); } ++ ++/* ++** decd_2: ++** decw x0 ++** ret ++*/ ++PROTO (decd_2, uint64_t, (uint64_t x0)) { return x0 - svcntd () * 2; } ++ ++/* ++** decd_3: ++** decd x0, all, mul #3 ++** ret ++*/ ++PROTO (decd_3, uint64_t, (uint64_t x0)) { return x0 - svcntd () * 3; } ++ ++/* ++** decd_4: ++** dech x0 ++** ret ++*/ ++PROTO (decd_4, uint64_t, (uint64_t x0)) { return x0 - svcntd () * 4; } ++ ++/* ++** decd_7: ++** decd x0, all, mul #7 ++** ret ++*/ ++PROTO (decd_7, uint64_t, (uint64_t x0)) { return x0 - svcntd () * 7; } ++ ++/* ++** decd_8: ++** decb x0 ++** ret ++*/ ++PROTO (decd_8, uint64_t, (uint64_t x0)) { return x0 - svcntd () * 8; } ++ ++/* ++** decd_9: ++** decd x0, all, mul #9 ++** ret ++*/ ++PROTO (decd_9, uint64_t, (uint64_t x0)) { return x0 - svcntd () * 9; } ++ ++/* ++** decd_15: ++** decd x0, all, mul #15 ++** ret ++*/ ++PROTO (decd_15, uint64_t, (uint64_t x0)) { return x0 - svcntd () * 15; } ++ ++/* ++** decd_16: ++** decb x0, all, mul #2 ++** ret ++*/ ++PROTO (decd_16, uint64_t, (uint64_t x0)) { return x0 - svcntd () * 16; } ++ ++/* ++** decd_18: ++** decw x0, all, mul #9 ++** ret ++*/ ++PROTO (decd_18, uint64_t, (uint64_t x0)) { return x0 - svcntd () * 18; } ++ ++/* ++** decd_30: ++** decw x0, all, mul #15 ++** ret ++*/ ++PROTO (decd_30, uint64_t, (uint64_t x0)) { return x0 - svcntd () * 30; } +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntd_pat.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntd_pat.c +new file mode 100644 +index 000000000..31ecde7ae +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntd_pat.c +@@ -0,0 +1,426 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cntd_pow2: ++** cntd x0, pow2 ++** ret ++*/ ++PROTO (cntd_pow2, uint64_t, ()) { return svcntd_pat (SV_POW2); } ++ ++/* ++** cntd_vl1: ++** mov x0, #?1 ++** ret ++*/ ++PROTO (cntd_vl1, uint64_t, ()) { return svcntd_pat (SV_VL1); } ++ ++/* ++** cntd_vl2: ++** mov x0, #?2 ++** ret ++*/ ++PROTO (cntd_vl2, uint64_t, ()) { return svcntd_pat (SV_VL2); } ++ ++/* ++** cntd_vl3: ++** cntd x0, vl3 ++** ret ++*/ ++PROTO (cntd_vl3, uint64_t, ()) { return svcntd_pat (SV_VL3); } ++ ++/* ++** cntd_vl4: ++** cntd x0, vl4 ++** ret ++*/ ++PROTO (cntd_vl4, uint64_t, ()) { return svcntd_pat (SV_VL4); } ++ ++/* ++** cntd_vl5: ++** cntd x0, vl5 ++** ret ++*/ ++PROTO (cntd_vl5, uint64_t, ()) { return svcntd_pat (SV_VL5); } ++ ++/* ++** cntd_vl6: ++** cntd x0, vl6 ++** ret ++*/ ++PROTO (cntd_vl6, uint64_t, ()) { return svcntd_pat (SV_VL6); } ++ ++/* ++** cntd_vl7: ++** cntd x0, vl7 ++** ret ++*/ ++PROTO (cntd_vl7, uint64_t, ()) { return svcntd_pat (SV_VL7); } ++ ++/* ++** cntd_vl8: ++** cntd x0, vl8 ++** ret ++*/ ++PROTO (cntd_vl8, uint64_t, ()) { return svcntd_pat (SV_VL8); } ++ ++/* ++** cntd_vl16: ++** cntd x0, vl16 ++** ret ++*/ ++PROTO (cntd_vl16, uint64_t, ()) { return svcntd_pat (SV_VL16); } ++ ++/* ++** cntd_vl32: ++** cntd x0, vl32 ++** ret ++*/ ++PROTO (cntd_vl32, uint64_t, ()) { return svcntd_pat (SV_VL32); } ++ ++/* ++** cntd_vl64: ++** cntd x0, vl64 ++** ret ++*/ ++PROTO (cntd_vl64, uint64_t, ()) { return svcntd_pat (SV_VL64); } ++ ++/* ++** cntd_vl128: ++** cntd x0, vl128 ++** ret ++*/ ++PROTO (cntd_vl128, uint64_t, ()) { return svcntd_pat (SV_VL128); } ++ ++/* ++** cntd_vl256: ++** cntd x0, vl256 ++** ret ++*/ ++PROTO (cntd_vl256, uint64_t, ()) { return svcntd_pat (SV_VL256); } ++ ++/* ++** cntd_mul3: ++** cntd x0, mul3 ++** ret ++*/ ++PROTO (cntd_mul3, uint64_t, ()) { return svcntd_pat (SV_MUL3); } ++ ++/* ++** cntd_mul4: ++** cntd x0, mul4 ++** ret ++*/ ++PROTO (cntd_mul4, uint64_t, ()) { return svcntd_pat (SV_MUL4); } ++ ++/* ++** cntd_all: ++** cntd x0 ++** ret ++*/ ++PROTO (cntd_all, uint64_t, ()) { return svcntd_pat (SV_ALL); } ++ ++/* ++** incd_32_pow2: ++** incd x0, pow2 ++** ret ++*/ ++PROTO (incd_32_pow2, uint32_t, (uint32_t w0)) { return w0 + svcntd_pat (SV_POW2); } ++ ++/* ++** incd_32_vl1: ++** add w0, w0, #?1 ++** ret ++*/ ++PROTO (incd_32_vl1, uint32_t, (uint32_t w0)) { return w0 + svcntd_pat (SV_VL1); } ++ ++/* ++** incd_32_vl2: ++** add w0, w0, #?2 ++** ret ++*/ ++PROTO (incd_32_vl2, uint32_t, (uint32_t w0)) { return w0 + svcntd_pat (SV_VL2); } ++ ++/* ++** incd_32_vl3: ++** incd x0, vl3 ++** ret ++*/ ++PROTO (incd_32_vl3, uint32_t, (uint32_t w0)) { return w0 + svcntd_pat (SV_VL3); } ++ ++/* ++** incd_32_vl4: ++** incd x0, vl4 ++** ret ++*/ ++PROTO (incd_32_vl4, uint32_t, (uint32_t w0)) { return w0 + svcntd_pat (SV_VL4); } ++ ++/* ++** incd_32_vl5: ++** incd x0, vl5 ++** ret ++*/ ++PROTO (incd_32_vl5, uint32_t, (uint32_t w0)) { return w0 + svcntd_pat (SV_VL5); } ++ ++/* ++** incd_32_vl6: ++** incd x0, vl6 ++** ret ++*/ ++PROTO (incd_32_vl6, uint32_t, (uint32_t w0)) { return w0 + svcntd_pat (SV_VL6); } ++ ++/* ++** incd_32_vl7: ++** incd x0, vl7 ++** ret ++*/ ++PROTO (incd_32_vl7, uint32_t, (uint32_t w0)) { return w0 + svcntd_pat (SV_VL7); } ++ ++/* ++** incd_32_vl8: ++** incd x0, vl8 ++** ret ++*/ ++PROTO (incd_32_vl8, uint32_t, (uint32_t w0)) { return w0 + svcntd_pat (SV_VL8); } ++ ++/* ++** incd_32_vl16: ++** incd x0, vl16 ++** ret ++*/ ++PROTO (incd_32_vl16, uint32_t, (uint32_t w0)) { return w0 + svcntd_pat (SV_VL16); } ++ ++/* ++** incd_32_vl32: ++** incd x0, vl32 ++** ret ++*/ ++PROTO (incd_32_vl32, uint32_t, (uint32_t w0)) { return w0 + svcntd_pat (SV_VL32); } ++ ++/* ++** incd_32_vl64: ++** incd x0, vl64 ++** ret ++*/ ++PROTO (incd_32_vl64, uint32_t, (uint32_t w0)) { return w0 + svcntd_pat (SV_VL64); } ++ ++/* ++** incd_32_vl128: ++** incd x0, vl128 ++** ret ++*/ ++PROTO (incd_32_vl128, uint32_t, (uint32_t w0)) { return w0 + svcntd_pat (SV_VL128); } ++ ++/* ++** incd_32_vl256: ++** incd x0, vl256 ++** ret ++*/ ++PROTO (incd_32_vl256, uint32_t, (uint32_t w0)) { return w0 + svcntd_pat (SV_VL256); } ++ ++/* ++** incd_32_mul3: ++** incd x0, mul3 ++** ret ++*/ ++PROTO (incd_32_mul3, uint32_t, (uint32_t w0)) { return w0 + svcntd_pat (SV_MUL3); } ++ ++/* ++** incd_32_mul4: ++** incd x0, mul4 ++** ret ++*/ ++PROTO (incd_32_mul4, uint32_t, (uint32_t w0)) { return w0 + svcntd_pat (SV_MUL4); } ++ ++/* ++** incd_32_all: ++** incd x0 ++** ret ++*/ ++PROTO (incd_32_all, uint32_t, (uint32_t w0)) { return w0 + svcntd_pat (SV_ALL); } ++ ++/* ++** incd_64_pow2: ++** incd x0, pow2 ++** ret ++*/ ++PROTO (incd_64_pow2, uint64_t, (uint64_t x0)) { return x0 + svcntd_pat (SV_POW2); } ++ ++/* ++** incd_64_all: ++** incd x0 ++** ret ++*/ ++PROTO (incd_64_all, uint64_t, (uint64_t x0)) { return x0 + svcntd_pat (SV_ALL); } ++ ++/* ++** decd_32_pow2: ++** decd x0, pow2 ++** ret ++*/ ++PROTO (decd_32_pow2, uint32_t, (uint32_t w0)) { return w0 - svcntd_pat (SV_POW2); } ++ ++/* ++** decd_32_vl1: ++** sub w0, w0, #?1 ++** ret ++*/ ++PROTO (decd_32_vl1, uint32_t, (uint32_t w0)) { return w0 - svcntd_pat (SV_VL1); } ++ ++/* ++** decd_32_vl2: ++** sub w0, w0, #?2 ++** ret ++*/ ++PROTO (decd_32_vl2, uint32_t, (uint32_t w0)) { return w0 - svcntd_pat (SV_VL2); } ++ ++/* ++** decd_32_vl3: ++** decd x0, vl3 ++** ret ++*/ ++PROTO (decd_32_vl3, uint32_t, (uint32_t w0)) { return w0 - svcntd_pat (SV_VL3); } ++ ++/* ++** decd_32_vl4: ++** decd x0, vl4 ++** ret ++*/ ++PROTO (decd_32_vl4, uint32_t, (uint32_t w0)) { return w0 - svcntd_pat (SV_VL4); } ++ ++/* ++** decd_32_vl5: ++** decd x0, vl5 ++** ret ++*/ ++PROTO (decd_32_vl5, uint32_t, (uint32_t w0)) { return w0 - svcntd_pat (SV_VL5); } ++ ++/* ++** decd_32_vl6: ++** decd x0, vl6 ++** ret ++*/ ++PROTO (decd_32_vl6, uint32_t, (uint32_t w0)) { return w0 - svcntd_pat (SV_VL6); } ++ ++/* ++** decd_32_vl7: ++** decd x0, vl7 ++** ret ++*/ ++PROTO (decd_32_vl7, uint32_t, (uint32_t w0)) { return w0 - svcntd_pat (SV_VL7); } ++ ++/* ++** decd_32_vl8: ++** decd x0, vl8 ++** ret ++*/ ++PROTO (decd_32_vl8, uint32_t, (uint32_t w0)) { return w0 - svcntd_pat (SV_VL8); } ++ ++/* ++** decd_32_vl16: ++** decd x0, vl16 ++** ret ++*/ ++PROTO (decd_32_vl16, uint32_t, (uint32_t w0)) { return w0 - svcntd_pat (SV_VL16); } ++ ++/* ++** decd_32_vl32: ++** decd x0, vl32 ++** ret ++*/ ++PROTO (decd_32_vl32, uint32_t, (uint32_t w0)) { return w0 - svcntd_pat (SV_VL32); } ++ ++/* ++** decd_32_vl64: ++** decd x0, vl64 ++** ret ++*/ ++PROTO (decd_32_vl64, uint32_t, (uint32_t w0)) { return w0 - svcntd_pat (SV_VL64); } ++ ++/* ++** decd_32_vl128: ++** decd x0, vl128 ++** ret ++*/ ++PROTO (decd_32_vl128, uint32_t, (uint32_t w0)) { return w0 - svcntd_pat (SV_VL128); } ++ ++/* ++** decd_32_vl256: ++** decd x0, vl256 ++** ret ++*/ ++PROTO (decd_32_vl256, uint32_t, (uint32_t w0)) { return w0 - svcntd_pat (SV_VL256); } ++ ++/* ++** decd_32_mul3: ++** decd x0, mul3 ++** ret ++*/ ++PROTO (decd_32_mul3, uint32_t, (uint32_t w0)) { return w0 - svcntd_pat (SV_MUL3); } ++ ++/* ++** decd_32_mul4: ++** decd x0, mul4 ++** ret ++*/ ++PROTO (decd_32_mul4, uint32_t, (uint32_t w0)) { return w0 - svcntd_pat (SV_MUL4); } ++ ++/* ++** decd_32_all: ++** decd x0 ++** ret ++*/ ++PROTO (decd_32_all, uint32_t, (uint32_t w0)) { return w0 - svcntd_pat (SV_ALL); } ++ ++/* ++** decd_64_pow2: ++** decd x0, pow2 ++** ret ++*/ ++PROTO (decd_64_pow2, uint64_t, (uint64_t x0)) { return x0 - svcntd_pat (SV_POW2); } ++ ++/* ++** decd_64_all: ++** decd x0 ++** ret ++*/ ++PROTO (decd_64_all, uint64_t, (uint64_t x0)) { return x0 - svcntd_pat (SV_ALL); } ++ ++/* ++** incd_s64_pow2_z0: ++** incd z0\.d, pow2 ++** ret ++*/ ++TEST_UNIFORM_Z (incd_s64_pow2_z0, svint64_t, ++ z0 = svadd_n_s64_x (svptrue_b64 (), z0, svcntd_pat (SV_POW2)), ++ z0 = svadd_x (svptrue_b64 (), z0, svcntd_pat (SV_POW2))); ++ ++/* ++** incd_s64_pow2_z1: ++** movprfx z0, z1 ++** incd z0\.d, pow2 ++** ret ++*/ ++TEST_UNIFORM_Z (incd_s64_pow2_z1, svint64_t, ++ z0 = svadd_n_s64_x (svptrue_b64 (), z1, svcntd_pat (SV_POW2)), ++ z0 = svadd_x (svptrue_b64 (), z1, svcntd_pat (SV_POW2))); ++ ++/* ++** decd_s64_pow2_z0: ++** decd z0\.d, pow2 ++** ret ++*/ ++TEST_UNIFORM_Z (decd_s64_pow2_z0, svint64_t, ++ z0 = svsub_n_s64_x (svptrue_b64 (), z0, svcntd_pat (SV_POW2)), ++ z0 = svsub_x (svptrue_b64 (), z0, svcntd_pat (SV_POW2))); ++ ++/* ++** decd_s64_pow2_z1: ++** movprfx z0, z1 ++** decd z0\.d, pow2 ++** ret ++*/ ++TEST_UNIFORM_Z (decd_s64_pow2_z1, svint64_t, ++ z0 = svsub_n_s64_x (svptrue_b64 (), z1, svcntd_pat (SV_POW2)), ++ z0 = svsub_x (svptrue_b64 (), z1, svcntd_pat (SV_POW2))); +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnth.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnth.c +new file mode 100644 +index 000000000..c29930f15 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnth.c +@@ -0,0 +1,280 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cnth_1: ++** cnth x0 ++** ret ++*/ ++PROTO (cnth_1, uint64_t, ()) { return svcnth (); } ++ ++/* ++** cnth_2: ++** cntb x0 ++** ret ++*/ ++PROTO (cnth_2, uint64_t, ()) { return svcnth () * 2; } ++ ++/* ++** cnth_3: ++** cnth x0, all, mul #3 ++** ret ++*/ ++PROTO (cnth_3, uint64_t, ()) { return svcnth () * 3; } ++ ++/* ++** cnth_4: ++** cntb x0, all, mul #2 ++** ret ++*/ ++PROTO (cnth_4, uint64_t, ()) { return svcnth () * 4; } ++ ++/* ++** cnth_8: ++** cntb x0, all, mul #4 ++** ret ++*/ ++PROTO (cnth_8, uint64_t, ()) { return svcnth () * 8; } ++ ++/* ++** cnth_15: ++** cnth x0, all, mul #15 ++** ret ++*/ ++PROTO (cnth_15, uint64_t, ()) { return svcnth () * 15; } ++ ++/* ++** cnth_16: ++** cntb x0, all, mul #8 ++** ret ++*/ ++PROTO (cnth_16, uint64_t, ()) { return svcnth () * 16; } ++ ++/* Other sequences would be OK. */ ++/* ++** cnth_17: ++** cntb x0, all, mul #8 ++** inch x0 ++** ret ++*/ ++PROTO (cnth_17, uint64_t, ()) { return svcnth () * 17; } ++ ++/* ++** cnth_32: ++** cntb x0, all, mul #16 ++** ret ++*/ ++PROTO (cnth_32, uint64_t, ()) { return svcnth () * 32; } ++ ++/* ++** cnth_64: ++** cntd (x[0-9]+) ++** lsl x0, \1, 8 ++** ret ++*/ ++PROTO (cnth_64, uint64_t, ()) { return svcnth () * 64; } ++ ++/* ++** cnth_128: ++** cntd (x[0-9]+) ++** lsl x0, \1, 9 ++** ret ++*/ ++PROTO (cnth_128, uint64_t, ()) { return svcnth () * 128; } ++ ++/* ++** cnth_m1: ++** cnth (x[0-9]+) ++** neg x0, \1 ++** ret ++*/ ++PROTO (cnth_m1, uint64_t, ()) { return -svcnth (); } ++ ++/* ++** cnth_m13: ++** cnth (x[0-9]+), all, mul #13 ++** neg x0, \1 ++** ret ++*/ ++PROTO (cnth_m13, uint64_t, ()) { return -svcnth () * 13; } ++ ++/* ++** cnth_m15: ++** cnth (x[0-9]+), all, mul #15 ++** neg x0, \1 ++** ret ++*/ ++PROTO (cnth_m15, uint64_t, ()) { return -svcnth () * 15; } ++ ++/* ++** cnth_m16: ++** cntb (x[0-9]+), all, mul #8 ++** neg x0, \1 ++** ret ++*/ ++PROTO (cnth_m16, uint64_t, ()) { return -svcnth () * 16; } ++ ++/* Other sequences would be OK. */ ++/* ++** cnth_m17: ++** cntb x0, all, mul #8 ++** inch x0 ++** neg x0, x0 ++** ret ++*/ ++PROTO (cnth_m17, uint64_t, ()) { return -svcnth () * 17; } ++ ++/* ++** inch_1: ++** inch x0 ++** ret ++*/ ++PROTO (inch_1, uint64_t, (uint64_t x0)) { return x0 + svcnth (); } ++ ++/* ++** inch_2: ++** incb x0 ++** ret ++*/ ++PROTO (inch_2, uint64_t, (uint64_t x0)) { return x0 + svcnth () * 2; } ++ ++/* ++** inch_3: ++** inch x0, all, mul #3 ++** ret ++*/ ++PROTO (inch_3, uint64_t, (uint64_t x0)) { return x0 + svcnth () * 3; } ++ ++/* ++** inch_4: ++** incb x0, all, mul #2 ++** ret ++*/ ++PROTO (inch_4, uint64_t, (uint64_t x0)) { return x0 + svcnth () * 4; } ++ ++/* ++** inch_7: ++** inch x0, all, mul #7 ++** ret ++*/ ++PROTO (inch_7, uint64_t, (uint64_t x0)) { return x0 + svcnth () * 7; } ++ ++/* ++** inch_8: ++** incb x0, all, mul #4 ++** ret ++*/ ++PROTO (inch_8, uint64_t, (uint64_t x0)) { return x0 + svcnth () * 8; } ++ ++/* ++** inch_9: ++** inch x0, all, mul #9 ++** ret ++*/ ++PROTO (inch_9, uint64_t, (uint64_t x0)) { return x0 + svcnth () * 9; } ++ ++/* ++** inch_15: ++** inch x0, all, mul #15 ++** ret ++*/ ++PROTO (inch_15, uint64_t, (uint64_t x0)) { return x0 + svcnth () * 15; } ++ ++/* ++** inch_16: ++** incb x0, all, mul #8 ++** ret ++*/ ++PROTO (inch_16, uint64_t, (uint64_t x0)) { return x0 + svcnth () * 16; } ++ ++/* ++** inch_18: ++** incb x0, all, mul #9 ++** ret ++*/ ++PROTO (inch_18, uint64_t, (uint64_t x0)) { return x0 + svcnth () * 18; } ++ ++/* ++** inch_30: ++** incb x0, all, mul #15 ++** ret ++*/ ++PROTO (inch_30, uint64_t, (uint64_t x0)) { return x0 + svcnth () * 30; } ++ ++/* ++** dech_1: ++** dech x0 ++** ret ++*/ ++PROTO (dech_1, uint64_t, (uint64_t x0)) { return x0 - svcnth (); } ++ ++/* ++** dech_2: ++** decb x0 ++** ret ++*/ ++PROTO (dech_2, uint64_t, (uint64_t x0)) { return x0 - svcnth () * 2; } ++ ++/* ++** dech_3: ++** dech x0, all, mul #3 ++** ret ++*/ ++PROTO (dech_3, uint64_t, (uint64_t x0)) { return x0 - svcnth () * 3; } ++ ++/* ++** dech_4: ++** decb x0, all, mul #2 ++** ret ++*/ ++PROTO (dech_4, uint64_t, (uint64_t x0)) { return x0 - svcnth () * 4; } ++ ++/* ++** dech_7: ++** dech x0, all, mul #7 ++** ret ++*/ ++PROTO (dech_7, uint64_t, (uint64_t x0)) { return x0 - svcnth () * 7; } ++ ++/* ++** dech_8: ++** decb x0, all, mul #4 ++** ret ++*/ ++PROTO (dech_8, uint64_t, (uint64_t x0)) { return x0 - svcnth () * 8; } ++ ++/* ++** dech_9: ++** dech x0, all, mul #9 ++** ret ++*/ ++PROTO (dech_9, uint64_t, (uint64_t x0)) { return x0 - svcnth () * 9; } ++ ++/* ++** dech_15: ++** dech x0, all, mul #15 ++** ret ++*/ ++PROTO (dech_15, uint64_t, (uint64_t x0)) { return x0 - svcnth () * 15; } ++ ++/* ++** dech_16: ++** decb x0, all, mul #8 ++** ret ++*/ ++PROTO (dech_16, uint64_t, (uint64_t x0)) { return x0 - svcnth () * 16; } ++ ++/* ++** dech_18: ++** decb x0, all, mul #9 ++** ret ++*/ ++PROTO (dech_18, uint64_t, (uint64_t x0)) { return x0 - svcnth () * 18; } ++ ++/* ++** dech_30: ++** decb x0, all, mul #15 ++** ret ++*/ ++PROTO (dech_30, uint64_t, (uint64_t x0)) { return x0 - svcnth () * 30; } +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnth_pat.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnth_pat.c +new file mode 100644 +index 000000000..7a42e7ad9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnth_pat.c +@@ -0,0 +1,426 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cnth_pow2: ++** cnth x0, pow2 ++** ret ++*/ ++PROTO (cnth_pow2, uint64_t, ()) { return svcnth_pat (SV_POW2); } ++ ++/* ++** cnth_vl1: ++** mov x0, #?1 ++** ret ++*/ ++PROTO (cnth_vl1, uint64_t, ()) { return svcnth_pat (SV_VL1); } ++ ++/* ++** cnth_vl2: ++** mov x0, #?2 ++** ret ++*/ ++PROTO (cnth_vl2, uint64_t, ()) { return svcnth_pat (SV_VL2); } ++ ++/* ++** cnth_vl3: ++** mov x0, #?3 ++** ret ++*/ ++PROTO (cnth_vl3, uint64_t, ()) { return svcnth_pat (SV_VL3); } ++ ++/* ++** cnth_vl4: ++** mov x0, #?4 ++** ret ++*/ ++PROTO (cnth_vl4, uint64_t, ()) { return svcnth_pat (SV_VL4); } ++ ++/* ++** cnth_vl5: ++** mov x0, #?5 ++** ret ++*/ ++PROTO (cnth_vl5, uint64_t, ()) { return svcnth_pat (SV_VL5); } ++ ++/* ++** cnth_vl6: ++** mov x0, #?6 ++** ret ++*/ ++PROTO (cnth_vl6, uint64_t, ()) { return svcnth_pat (SV_VL6); } ++ ++/* ++** cnth_vl7: ++** mov x0, #?7 ++** ret ++*/ ++PROTO (cnth_vl7, uint64_t, ()) { return svcnth_pat (SV_VL7); } ++ ++/* ++** cnth_vl8: ++** mov x0, #?8 ++** ret ++*/ ++PROTO (cnth_vl8, uint64_t, ()) { return svcnth_pat (SV_VL8); } ++ ++/* ++** cnth_vl16: ++** cnth x0, vl16 ++** ret ++*/ ++PROTO (cnth_vl16, uint64_t, ()) { return svcnth_pat (SV_VL16); } ++ ++/* ++** cnth_vl32: ++** cnth x0, vl32 ++** ret ++*/ ++PROTO (cnth_vl32, uint64_t, ()) { return svcnth_pat (SV_VL32); } ++ ++/* ++** cnth_vl64: ++** cnth x0, vl64 ++** ret ++*/ ++PROTO (cnth_vl64, uint64_t, ()) { return svcnth_pat (SV_VL64); } ++ ++/* ++** cnth_vl128: ++** cnth x0, vl128 ++** ret ++*/ ++PROTO (cnth_vl128, uint64_t, ()) { return svcnth_pat (SV_VL128); } ++ ++/* ++** cnth_vl256: ++** cnth x0, vl256 ++** ret ++*/ ++PROTO (cnth_vl256, uint64_t, ()) { return svcnth_pat (SV_VL256); } ++ ++/* ++** cnth_mul3: ++** cnth x0, mul3 ++** ret ++*/ ++PROTO (cnth_mul3, uint64_t, ()) { return svcnth_pat (SV_MUL3); } ++ ++/* ++** cnth_mul4: ++** cnth x0, mul4 ++** ret ++*/ ++PROTO (cnth_mul4, uint64_t, ()) { return svcnth_pat (SV_MUL4); } ++ ++/* ++** cnth_all: ++** cnth x0 ++** ret ++*/ ++PROTO (cnth_all, uint64_t, ()) { return svcnth_pat (SV_ALL); } ++ ++/* ++** inch_32_pow2: ++** inch x0, pow2 ++** ret ++*/ ++PROTO (inch_32_pow2, uint32_t, (uint32_t w0)) { return w0 + svcnth_pat (SV_POW2); } ++ ++/* ++** inch_32_vl1: ++** add w0, w0, #?1 ++** ret ++*/ ++PROTO (inch_32_vl1, uint32_t, (uint32_t w0)) { return w0 + svcnth_pat (SV_VL1); } ++ ++/* ++** inch_32_vl2: ++** add w0, w0, #?2 ++** ret ++*/ ++PROTO (inch_32_vl2, uint32_t, (uint32_t w0)) { return w0 + svcnth_pat (SV_VL2); } ++ ++/* ++** inch_32_vl3: ++** add w0, w0, #?3 ++** ret ++*/ ++PROTO (inch_32_vl3, uint32_t, (uint32_t w0)) { return w0 + svcnth_pat (SV_VL3); } ++ ++/* ++** inch_32_vl4: ++** add w0, w0, #?4 ++** ret ++*/ ++PROTO (inch_32_vl4, uint32_t, (uint32_t w0)) { return w0 + svcnth_pat (SV_VL4); } ++ ++/* ++** inch_32_vl5: ++** add w0, w0, #?5 ++** ret ++*/ ++PROTO (inch_32_vl5, uint32_t, (uint32_t w0)) { return w0 + svcnth_pat (SV_VL5); } ++ ++/* ++** inch_32_vl6: ++** add w0, w0, #?6 ++** ret ++*/ ++PROTO (inch_32_vl6, uint32_t, (uint32_t w0)) { return w0 + svcnth_pat (SV_VL6); } ++ ++/* ++** inch_32_vl7: ++** add w0, w0, #?7 ++** ret ++*/ ++PROTO (inch_32_vl7, uint32_t, (uint32_t w0)) { return w0 + svcnth_pat (SV_VL7); } ++ ++/* ++** inch_32_vl8: ++** add w0, w0, #?8 ++** ret ++*/ ++PROTO (inch_32_vl8, uint32_t, (uint32_t w0)) { return w0 + svcnth_pat (SV_VL8); } ++ ++/* ++** inch_32_vl16: ++** inch x0, vl16 ++** ret ++*/ ++PROTO (inch_32_vl16, uint32_t, (uint32_t w0)) { return w0 + svcnth_pat (SV_VL16); } ++ ++/* ++** inch_32_vl32: ++** inch x0, vl32 ++** ret ++*/ ++PROTO (inch_32_vl32, uint32_t, (uint32_t w0)) { return w0 + svcnth_pat (SV_VL32); } ++ ++/* ++** inch_32_vl64: ++** inch x0, vl64 ++** ret ++*/ ++PROTO (inch_32_vl64, uint32_t, (uint32_t w0)) { return w0 + svcnth_pat (SV_VL64); } ++ ++/* ++** inch_32_vl128: ++** inch x0, vl128 ++** ret ++*/ ++PROTO (inch_32_vl128, uint32_t, (uint32_t w0)) { return w0 + svcnth_pat (SV_VL128); } ++ ++/* ++** inch_32_vl256: ++** inch x0, vl256 ++** ret ++*/ ++PROTO (inch_32_vl256, uint32_t, (uint32_t w0)) { return w0 + svcnth_pat (SV_VL256); } ++ ++/* ++** inch_32_mul3: ++** inch x0, mul3 ++** ret ++*/ ++PROTO (inch_32_mul3, uint32_t, (uint32_t w0)) { return w0 + svcnth_pat (SV_MUL3); } ++ ++/* ++** inch_32_mul4: ++** inch x0, mul4 ++** ret ++*/ ++PROTO (inch_32_mul4, uint32_t, (uint32_t w0)) { return w0 + svcnth_pat (SV_MUL4); } ++ ++/* ++** inch_32_all: ++** inch x0 ++** ret ++*/ ++PROTO (inch_32_all, uint32_t, (uint32_t w0)) { return w0 + svcnth_pat (SV_ALL); } ++ ++/* ++** inch_64_pow2: ++** inch x0, pow2 ++** ret ++*/ ++PROTO (inch_64_pow2, uint64_t, (uint64_t x0)) { return x0 + svcnth_pat (SV_POW2); } ++ ++/* ++** inch_64_all: ++** inch x0 ++** ret ++*/ ++PROTO (inch_64_all, uint64_t, (uint64_t x0)) { return x0 + svcnth_pat (SV_ALL); } ++ ++/* ++** dech_32_pow2: ++** dech x0, pow2 ++** ret ++*/ ++PROTO (dech_32_pow2, uint32_t, (uint32_t w0)) { return w0 - svcnth_pat (SV_POW2); } ++ ++/* ++** dech_32_vl1: ++** sub w0, w0, #?1 ++** ret ++*/ ++PROTO (dech_32_vl1, uint32_t, (uint32_t w0)) { return w0 - svcnth_pat (SV_VL1); } ++ ++/* ++** dech_32_vl2: ++** sub w0, w0, #?2 ++** ret ++*/ ++PROTO (dech_32_vl2, uint32_t, (uint32_t w0)) { return w0 - svcnth_pat (SV_VL2); } ++ ++/* ++** dech_32_vl3: ++** sub w0, w0, #?3 ++** ret ++*/ ++PROTO (dech_32_vl3, uint32_t, (uint32_t w0)) { return w0 - svcnth_pat (SV_VL3); } ++ ++/* ++** dech_32_vl4: ++** sub w0, w0, #?4 ++** ret ++*/ ++PROTO (dech_32_vl4, uint32_t, (uint32_t w0)) { return w0 - svcnth_pat (SV_VL4); } ++ ++/* ++** dech_32_vl5: ++** sub w0, w0, #?5 ++** ret ++*/ ++PROTO (dech_32_vl5, uint32_t, (uint32_t w0)) { return w0 - svcnth_pat (SV_VL5); } ++ ++/* ++** dech_32_vl6: ++** sub w0, w0, #?6 ++** ret ++*/ ++PROTO (dech_32_vl6, uint32_t, (uint32_t w0)) { return w0 - svcnth_pat (SV_VL6); } ++ ++/* ++** dech_32_vl7: ++** sub w0, w0, #?7 ++** ret ++*/ ++PROTO (dech_32_vl7, uint32_t, (uint32_t w0)) { return w0 - svcnth_pat (SV_VL7); } ++ ++/* ++** dech_32_vl8: ++** sub w0, w0, #?8 ++** ret ++*/ ++PROTO (dech_32_vl8, uint32_t, (uint32_t w0)) { return w0 - svcnth_pat (SV_VL8); } ++ ++/* ++** dech_32_vl16: ++** dech x0, vl16 ++** ret ++*/ ++PROTO (dech_32_vl16, uint32_t, (uint32_t w0)) { return w0 - svcnth_pat (SV_VL16); } ++ ++/* ++** dech_32_vl32: ++** dech x0, vl32 ++** ret ++*/ ++PROTO (dech_32_vl32, uint32_t, (uint32_t w0)) { return w0 - svcnth_pat (SV_VL32); } ++ ++/* ++** dech_32_vl64: ++** dech x0, vl64 ++** ret ++*/ ++PROTO (dech_32_vl64, uint32_t, (uint32_t w0)) { return w0 - svcnth_pat (SV_VL64); } ++ ++/* ++** dech_32_vl128: ++** dech x0, vl128 ++** ret ++*/ ++PROTO (dech_32_vl128, uint32_t, (uint32_t w0)) { return w0 - svcnth_pat (SV_VL128); } ++ ++/* ++** dech_32_vl256: ++** dech x0, vl256 ++** ret ++*/ ++PROTO (dech_32_vl256, uint32_t, (uint32_t w0)) { return w0 - svcnth_pat (SV_VL256); } ++ ++/* ++** dech_32_mul3: ++** dech x0, mul3 ++** ret ++*/ ++PROTO (dech_32_mul3, uint32_t, (uint32_t w0)) { return w0 - svcnth_pat (SV_MUL3); } ++ ++/* ++** dech_32_mul4: ++** dech x0, mul4 ++** ret ++*/ ++PROTO (dech_32_mul4, uint32_t, (uint32_t w0)) { return w0 - svcnth_pat (SV_MUL4); } ++ ++/* ++** dech_32_all: ++** dech x0 ++** ret ++*/ ++PROTO (dech_32_all, uint32_t, (uint32_t w0)) { return w0 - svcnth_pat (SV_ALL); } ++ ++/* ++** dech_64_pow2: ++** dech x0, pow2 ++** ret ++*/ ++PROTO (dech_64_pow2, uint64_t, (uint64_t x0)) { return x0 - svcnth_pat (SV_POW2); } ++ ++/* ++** dech_64_all: ++** dech x0 ++** ret ++*/ ++PROTO (dech_64_all, uint64_t, (uint64_t x0)) { return x0 - svcnth_pat (SV_ALL); } ++ ++/* ++** inch_s16_pow2_z0: ++** inch z0\.h, pow2 ++** ret ++*/ ++TEST_UNIFORM_Z (inch_s16_pow2_z0, svint16_t, ++ z0 = svadd_n_s16_x (svptrue_b16 (), z0, svcnth_pat (SV_POW2)), ++ z0 = svadd_x (svptrue_b16 (), z0, svcnth_pat (SV_POW2))); ++ ++/* ++** inch_s16_pow2_z1: ++** movprfx z0, z1 ++** inch z0\.h, pow2 ++** ret ++*/ ++TEST_UNIFORM_Z (inch_s16_pow2_z1, svint16_t, ++ z0 = svadd_n_s16_x (svptrue_b16 (), z1, svcnth_pat (SV_POW2)), ++ z0 = svadd_x (svptrue_b16 (), z1, svcnth_pat (SV_POW2))); ++ ++/* ++** dech_s16_pow2_z0: ++** dech z0\.h, pow2 ++** ret ++*/ ++TEST_UNIFORM_Z (dech_s16_pow2_z0, svint16_t, ++ z0 = svsub_n_s16_x (svptrue_b16 (), z0, svcnth_pat (SV_POW2)), ++ z0 = svsub_x (svptrue_b16 (), z0, svcnth_pat (SV_POW2))); ++ ++/* ++** dech_s16_pow2_z1: ++** movprfx z0, z1 ++** dech z0\.h, pow2 ++** ret ++*/ ++TEST_UNIFORM_Z (dech_s16_pow2_z1, svint16_t, ++ z0 = svsub_n_s16_x (svptrue_b16 (), z1, svcnth_pat (SV_POW2)), ++ z0 = svsub_x (svptrue_b16 (), z1, svcnth_pat (SV_POW2))); +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntp_b16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntp_b16.c +new file mode 100644 +index 000000000..d88b9e5f3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntp_b16.c +@@ -0,0 +1,243 @@ ++/* { dg-additional-options "-msve-vector-bits=scalable" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++#include ++ ++/* ++** cnt_b16_32: ++** cntp x0, p0, p1\.h ++** ret ++*/ ++TEST_PTEST (cnt_b16_32, uint32_t, ++ x0 = svcntp_b16 (p0, p1)); ++ ++/* ++** cnt_b16_64: ++** cntp x0, p0, p1\.h ++** ret ++*/ ++TEST_PTEST (cnt_b16_64, uint64_t, ++ x0 = svcntp_b16 (p0, p1)); ++ ++/* ++** inc_b16_32_general_x0: ++** cntp x([0-9]+), p0, p1\.h ++** add w0, (w0, w\1|w\1, w0) ++** ret ++*/ ++TEST_PTEST (inc_b16_32_general_x0, uint32_t, ++ x0 += svcntp_b16 (p0, p1)); ++ ++/* ++** inc_b16_32_general_x1: ++** cntp x([0-9]+), p0, p1\.h ++** add w0, (w1, w\1|w\1, w1) ++** ret ++*/ ++TEST_PTEST (inc_b16_32_general_x1, uint32_t, ++ x0 = x1 + svcntp_b16 (p0, p1)); ++ ++/* ++** inc_b16_32_ptrue_x0: ++** incp x0, p1\.h ++** ret ++*/ ++TEST_PTEST (inc_b16_32_ptrue_x0, uint32_t, ++ x0 += svcntp_b16 (svptrue_b16 (), p1)); ++ ++/* ++** inc_b16_32_ptrue_x1: ++** mov w0, w1 ++** incp x0, p1\.h ++** ret ++*/ ++TEST_PTEST (inc_b16_32_ptrue_x1, uint32_t, ++ x0 = x1 + svcntp_b16 (svptrue_b16 (), p1)); ++ ++/* ++** inc_b16_64_general_x0: ++** cntp (x[0-9]+), p0, p1\.h ++** add x0, (x0, \1|\1, x0) ++** ret ++*/ ++TEST_PTEST (inc_b16_64_general_x0, uint64_t, ++ x0 += svcntp_b16 (p0, p1)); ++ ++/* ++** inc_b16_64_general_x1: ++** cntp (x[0-9]+), p0, p1\.h ++** add x0, (x1, \1|\1, x1) ++** ret ++*/ ++TEST_PTEST (inc_b16_64_general_x1, uint64_t, ++ x0 = x1 + svcntp_b16 (p0, p1)); ++ ++/* ++** inc_b16_64_ptrue_x0: ++** incp x0, p1\.h ++** ret ++*/ ++TEST_PTEST (inc_b16_64_ptrue_x0, uint64_t, ++ x0 += svcntp_b16 (svptrue_b16 (), p1)); ++ ++/* ++** inc_b16_64_ptrue_x1: ++** mov x0, x1 ++** incp x0, p1\.h ++** ret ++*/ ++TEST_PTEST (inc_b16_64_ptrue_x1, uint64_t, ++ x0 = x1 + svcntp_b16 (svptrue_b16 (), p1)); ++ ++/* ++** dec_b16_32_general_x0: ++** cntp x([0-9]+), p0, p1\.h ++** sub w0, w0, w\1 ++** ret ++*/ ++TEST_PTEST (dec_b16_32_general_x0, uint32_t, ++ x0 -= svcntp_b16 (p0, p1)); ++ ++/* ++** dec_b16_32_general_x1: ++** cntp x([0-9]+), p0, p1\.h ++** sub w0, w1, w\1 ++** ret ++*/ ++TEST_PTEST (dec_b16_32_general_x1, uint32_t, ++ x0 = x1 - svcntp_b16 (p0, p1)); ++ ++/* ++** dec_b16_32_ptrue_x0: ++** decp x0, p1\.h ++** ret ++*/ ++TEST_PTEST (dec_b16_32_ptrue_x0, uint32_t, ++ x0 -= svcntp_b16 (svptrue_b16 (), p1)); ++ ++/* ++** dec_b16_32_ptrue_x1: ++** mov w0, w1 ++** decp x0, p1\.h ++** ret ++*/ ++TEST_PTEST (dec_b16_32_ptrue_x1, uint32_t, ++ x0 = x1 - svcntp_b16 (svptrue_b16 (), p1)); ++ ++/* ++** dec_b16_64_general_x0: ++** cntp (x[0-9]+), p0, p1\.h ++** sub x0, x0, \1 ++** ret ++*/ ++TEST_PTEST (dec_b16_64_general_x0, uint64_t, ++ x0 -= svcntp_b16 (p0, p1)); ++ ++/* ++** dec_b16_64_general_x1: ++** cntp (x[0-9]+), p0, p1\.h ++** sub x0, x1, \1 ++** ret ++*/ ++TEST_PTEST (dec_b16_64_general_x1, uint64_t, ++ x0 = x1 - svcntp_b16 (p0, p1)); ++ ++/* ++** dec_b16_64_ptrue_x0: ++** decp x0, p1\.h ++** ret ++*/ ++TEST_PTEST (dec_b16_64_ptrue_x0, uint64_t, ++ x0 -= svcntp_b16 (svptrue_b16 (), p1)); ++ ++/* ++** dec_b16_64_ptrue_x1: ++** mov x0, x1 ++** decp x0, p1\.h ++** ret ++*/ ++TEST_PTEST (dec_b16_64_ptrue_x1, uint64_t, ++ x0 = x1 - svcntp_b16 (svptrue_b16 (), p1)); ++ ++/* ++** inc_b16_u16_general_z0: ++** cntp x([0-9]+), p0, p1\.h ++** mov (z[0-9]+\.h), w\1 ++** add z0\.h, (z0\.h, \2|\2, z0\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (inc_b16_u16_general_z0, svuint16_t, ++ z0 = svadd_n_u16_x (svptrue_b16 (), z0, svcntp_b16 (p0, p1)), ++ z0 = svadd_x (svptrue_b16 (), z0, svcntp_b16 (p0, p1))); ++ ++/* ++** inc_b16_u16_general_z1: ++** cntp x([0-9]+), p0, p1\.h ++** mov (z[0-9]+\.h), w\1 ++** add z0\.h, (z1\.h, \2|\2, z1\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (inc_b16_u16_general_z1, svuint16_t, ++ z0 = svadd_n_u16_x (svptrue_b16 (), z1, svcntp_b16 (p0, p1)), ++ z0 = svadd_x (svptrue_b16 (), z1, svcntp_b16 (p0, p1))); ++ ++/* ++** inc_b16_u16_ptrue_z0: ++** incp z0\.h, p0 ++** ret ++*/ ++TEST_UNIFORM_Z (inc_b16_u16_ptrue_z0, svuint16_t, ++ z0 = svadd_n_u16_x (svptrue_b16 (), z0, svcntp_b16 (svptrue_b16 (), p0)), ++ z0 = svadd_x (svptrue_b16 (), z0, svcntp_b16 (svptrue_b16 (), p0))); ++ ++/* ++** inc_b16_u16_ptrue_z1: ++** movprfx z0, z1 ++** incp z0\.h, p0 ++** ret ++*/ ++TEST_UNIFORM_Z (inc_b16_u16_ptrue_z1, svuint16_t, ++ z0 = svadd_n_u16_x (svptrue_b16 (), z1, svcntp_b16 (svptrue_b16 (), p0)), ++ z0 = svadd_x (svptrue_b16 (), z1, svcntp_b16 (svptrue_b16 (), p0))); ++ ++/* ++** dec_b16_u16_general_z0: ++** cntp x([0-9]+), p0, p1\.h ++** mov (z[0-9]+\.h), w\1 ++** sub z0\.h, z0\.h, \2 ++** ret ++*/ ++TEST_UNIFORM_Z (dec_b16_u16_general_z0, svuint16_t, ++ z0 = svsub_n_u16_x (svptrue_b16 (), z0, svcntp_b16 (p0, p1)), ++ z0 = svsub_x (svptrue_b16 (), z0, svcntp_b16 (p0, p1))); ++ ++/* ++** dec_b16_u16_general_z1: ++** cntp x([0-9]+), p0, p1\.h ++** mov (z[0-9]+\.h), w\1 ++** sub z0\.h, z1\.h, \2 ++** ret ++*/ ++TEST_UNIFORM_Z (dec_b16_u16_general_z1, svuint16_t, ++ z0 = svsub_n_u16_x (svptrue_b16 (), z1, svcntp_b16 (p0, p1)), ++ z0 = svsub_x (svptrue_b16 (), z1, svcntp_b16 (p0, p1))); ++ ++/* ++** dec_b16_u16_ptrue_z0: ++** decp z0\.h, p0 ++** ret ++*/ ++TEST_UNIFORM_Z (dec_b16_u16_ptrue_z0, svuint16_t, ++ z0 = svsub_n_u16_x (svptrue_b16 (), z0, svcntp_b16 (svptrue_b16 (), p0)), ++ z0 = svsub_x (svptrue_b16 (), z0, svcntp_b16 (svptrue_b16 (), p0))); ++ ++/* ++** dec_b16_u16_ptrue_z1: ++** movprfx z0, z1 ++** decp z0\.h, p0 ++** ret ++*/ ++TEST_UNIFORM_Z (dec_b16_u16_ptrue_z1, svuint16_t, ++ z0 = svsub_n_u16_x (svptrue_b16 (), z1, svcntp_b16 (svptrue_b16 (), p0)), ++ z0 = svsub_x (svptrue_b16 (), z1, svcntp_b16 (svptrue_b16 (), p0))); +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntp_b32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntp_b32.c +new file mode 100644 +index 000000000..0da818895 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntp_b32.c +@@ -0,0 +1,243 @@ ++/* { dg-additional-options "-msve-vector-bits=scalable" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++#include ++ ++/* ++** cnt_b32_32: ++** cntp x0, p0, p1\.s ++** ret ++*/ ++TEST_PTEST (cnt_b32_32, uint32_t, ++ x0 = svcntp_b32 (p0, p1)); ++ ++/* ++** cnt_b32_64: ++** cntp x0, p0, p1\.s ++** ret ++*/ ++TEST_PTEST (cnt_b32_64, uint64_t, ++ x0 = svcntp_b32 (p0, p1)); ++ ++/* ++** inc_b32_32_general_x0: ++** cntp x([0-9]+), p0, p1\.s ++** add w0, (w0, w\1|w\1, w0) ++** ret ++*/ ++TEST_PTEST (inc_b32_32_general_x0, uint32_t, ++ x0 += svcntp_b32 (p0, p1)); ++ ++/* ++** inc_b32_32_general_x1: ++** cntp x([0-9]+), p0, p1\.s ++** add w0, (w1, w\1|w\1, w1) ++** ret ++*/ ++TEST_PTEST (inc_b32_32_general_x1, uint32_t, ++ x0 = x1 + svcntp_b32 (p0, p1)); ++ ++/* ++** inc_b32_32_ptrue_x0: ++** incp x0, p1\.s ++** ret ++*/ ++TEST_PTEST (inc_b32_32_ptrue_x0, uint32_t, ++ x0 += svcntp_b32 (svptrue_b32 (), p1)); ++ ++/* ++** inc_b32_32_ptrue_x1: ++** mov w0, w1 ++** incp x0, p1\.s ++** ret ++*/ ++TEST_PTEST (inc_b32_32_ptrue_x1, uint32_t, ++ x0 = x1 + svcntp_b32 (svptrue_b32 (), p1)); ++ ++/* ++** inc_b32_64_general_x0: ++** cntp (x[0-9]+), p0, p1\.s ++** add x0, (x0, \1|\1, x0) ++** ret ++*/ ++TEST_PTEST (inc_b32_64_general_x0, uint64_t, ++ x0 += svcntp_b32 (p0, p1)); ++ ++/* ++** inc_b32_64_general_x1: ++** cntp (x[0-9]+), p0, p1\.s ++** add x0, (x1, \1|\1, x1) ++** ret ++*/ ++TEST_PTEST (inc_b32_64_general_x1, uint64_t, ++ x0 = x1 + svcntp_b32 (p0, p1)); ++ ++/* ++** inc_b32_64_ptrue_x0: ++** incp x0, p1\.s ++** ret ++*/ ++TEST_PTEST (inc_b32_64_ptrue_x0, uint64_t, ++ x0 += svcntp_b32 (svptrue_b32 (), p1)); ++ ++/* ++** inc_b32_64_ptrue_x1: ++** mov x0, x1 ++** incp x0, p1\.s ++** ret ++*/ ++TEST_PTEST (inc_b32_64_ptrue_x1, uint64_t, ++ x0 = x1 + svcntp_b32 (svptrue_b32 (), p1)); ++ ++/* ++** dec_b32_32_general_x0: ++** cntp x([0-9]+), p0, p1\.s ++** sub w0, w0, w\1 ++** ret ++*/ ++TEST_PTEST (dec_b32_32_general_x0, uint32_t, ++ x0 -= svcntp_b32 (p0, p1)); ++ ++/* ++** dec_b32_32_general_x1: ++** cntp x([0-9]+), p0, p1\.s ++** sub w0, w1, w\1 ++** ret ++*/ ++TEST_PTEST (dec_b32_32_general_x1, uint32_t, ++ x0 = x1 - svcntp_b32 (p0, p1)); ++ ++/* ++** dec_b32_32_ptrue_x0: ++** decp x0, p1\.s ++** ret ++*/ ++TEST_PTEST (dec_b32_32_ptrue_x0, uint32_t, ++ x0 -= svcntp_b32 (svptrue_b32 (), p1)); ++ ++/* ++** dec_b32_32_ptrue_x1: ++** mov w0, w1 ++** decp x0, p1\.s ++** ret ++*/ ++TEST_PTEST (dec_b32_32_ptrue_x1, uint32_t, ++ x0 = x1 - svcntp_b32 (svptrue_b32 (), p1)); ++ ++/* ++** dec_b32_64_general_x0: ++** cntp (x[0-9]+), p0, p1\.s ++** sub x0, x0, \1 ++** ret ++*/ ++TEST_PTEST (dec_b32_64_general_x0, uint64_t, ++ x0 -= svcntp_b32 (p0, p1)); ++ ++/* ++** dec_b32_64_general_x1: ++** cntp (x[0-9]+), p0, p1\.s ++** sub x0, x1, \1 ++** ret ++*/ ++TEST_PTEST (dec_b32_64_general_x1, uint64_t, ++ x0 = x1 - svcntp_b32 (p0, p1)); ++ ++/* ++** dec_b32_64_ptrue_x0: ++** decp x0, p1\.s ++** ret ++*/ ++TEST_PTEST (dec_b32_64_ptrue_x0, uint64_t, ++ x0 -= svcntp_b32 (svptrue_b32 (), p1)); ++ ++/* ++** dec_b32_64_ptrue_x1: ++** mov x0, x1 ++** decp x0, p1\.s ++** ret ++*/ ++TEST_PTEST (dec_b32_64_ptrue_x1, uint64_t, ++ x0 = x1 - svcntp_b32 (svptrue_b32 (), p1)); ++ ++/* ++** inc_b32_s32_general_z0: ++** cntp x([0-9]+), p0, p1\.s ++** mov (z[0-9]+\.s), w\1 ++** add z0\.s, (z0\.s, \2|\2, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (inc_b32_s32_general_z0, svint32_t, ++ z0 = svadd_n_s32_x (svptrue_b32 (), z0, svcntp_b32 (p0, p1)), ++ z0 = svadd_x (svptrue_b32 (), z0, svcntp_b32 (p0, p1))); ++ ++/* ++** inc_b32_s32_general_z1: ++** cntp x([0-9]+), p0, p1\.s ++** mov (z[0-9]+\.s), w\1 ++** add z0\.s, (z1\.s, \2|\2, z1\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (inc_b32_s32_general_z1, svint32_t, ++ z0 = svadd_n_s32_x (svptrue_b32 (), z1, svcntp_b32 (p0, p1)), ++ z0 = svadd_x (svptrue_b32 (), z1, svcntp_b32 (p0, p1))); ++ ++/* ++** inc_b32_s32_ptrue_z0: ++** incp z0\.s, p0 ++** ret ++*/ ++TEST_UNIFORM_Z (inc_b32_s32_ptrue_z0, svint32_t, ++ z0 = svadd_n_s32_x (svptrue_b32 (), z0, svcntp_b32 (svptrue_b32 (), p0)), ++ z0 = svadd_x (svptrue_b32 (), z0, svcntp_b32 (svptrue_b32 (), p0))); ++ ++/* ++** inc_b32_s32_ptrue_z1: ++** movprfx z0, z1 ++** incp z0\.s, p0 ++** ret ++*/ ++TEST_UNIFORM_Z (inc_b32_s32_ptrue_z1, svint32_t, ++ z0 = svadd_n_s32_x (svptrue_b32 (), z1, svcntp_b32 (svptrue_b32 (), p0)), ++ z0 = svadd_x (svptrue_b32 (), z1, svcntp_b32 (svptrue_b32 (), p0))); ++ ++/* ++** dec_b32_s32_general_z0: ++** cntp x([0-9]+), p0, p1\.s ++** mov (z[0-9]+\.s), w\1 ++** sub z0\.s, z0\.s, \2 ++** ret ++*/ ++TEST_UNIFORM_Z (dec_b32_s32_general_z0, svint32_t, ++ z0 = svsub_n_s32_x (svptrue_b32 (), z0, svcntp_b32 (p0, p1)), ++ z0 = svsub_x (svptrue_b32 (), z0, svcntp_b32 (p0, p1))); ++ ++/* ++** dec_b32_s32_general_z1: ++** cntp x([0-9]+), p0, p1\.s ++** mov (z[0-9]+\.s), w\1 ++** sub z0\.s, z1\.s, \2 ++** ret ++*/ ++TEST_UNIFORM_Z (dec_b32_s32_general_z1, svint32_t, ++ z0 = svsub_n_s32_x (svptrue_b32 (), z1, svcntp_b32 (p0, p1)), ++ z0 = svsub_x (svptrue_b32 (), z1, svcntp_b32 (p0, p1))); ++ ++/* ++** dec_b32_s32_ptrue_z0: ++** decp z0\.s, p0 ++** ret ++*/ ++TEST_UNIFORM_Z (dec_b32_s32_ptrue_z0, svint32_t, ++ z0 = svsub_n_s32_x (svptrue_b32 (), z0, svcntp_b32 (svptrue_b32 (), p0)), ++ z0 = svsub_x (svptrue_b32 (), z0, svcntp_b32 (svptrue_b32 (), p0))); ++ ++/* ++** dec_b32_s32_ptrue_z1: ++** movprfx z0, z1 ++** decp z0\.s, p0 ++** ret ++*/ ++TEST_UNIFORM_Z (dec_b32_s32_ptrue_z1, svint32_t, ++ z0 = svsub_n_s32_x (svptrue_b32 (), z1, svcntp_b32 (svptrue_b32 (), p0)), ++ z0 = svsub_x (svptrue_b32 (), z1, svcntp_b32 (svptrue_b32 (), p0))); +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntp_b64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntp_b64.c +new file mode 100644 +index 000000000..6ddbaef5a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntp_b64.c +@@ -0,0 +1,243 @@ ++/* { dg-additional-options "-msve-vector-bits=scalable" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++#include ++ ++/* ++** cnt_b64_32: ++** cntp x0, p0, p1\.d ++** ret ++*/ ++TEST_PTEST (cnt_b64_32, uint32_t, ++ x0 = svcntp_b64 (p0, p1)); ++ ++/* ++** cnt_b64_64: ++** cntp x0, p0, p1\.d ++** ret ++*/ ++TEST_PTEST (cnt_b64_64, uint64_t, ++ x0 = svcntp_b64 (p0, p1)); ++ ++/* ++** inc_b64_32_general_x0: ++** cntp x([0-9]+), p0, p1\.d ++** add w0, (w0, w\1|w\1, w0) ++** ret ++*/ ++TEST_PTEST (inc_b64_32_general_x0, uint32_t, ++ x0 += svcntp_b64 (p0, p1)); ++ ++/* ++** inc_b64_32_general_x1: ++** cntp x([0-9]+), p0, p1\.d ++** add w0, (w1, w\1|w\1, w1) ++** ret ++*/ ++TEST_PTEST (inc_b64_32_general_x1, uint32_t, ++ x0 = x1 + svcntp_b64 (p0, p1)); ++ ++/* ++** inc_b64_32_ptrue_x0: ++** incp x0, p1\.d ++** ret ++*/ ++TEST_PTEST (inc_b64_32_ptrue_x0, uint32_t, ++ x0 += svcntp_b64 (svptrue_b64 (), p1)); ++ ++/* ++** inc_b64_32_ptrue_x1: ++** mov w0, w1 ++** incp x0, p1\.d ++** ret ++*/ ++TEST_PTEST (inc_b64_32_ptrue_x1, uint32_t, ++ x0 = x1 + svcntp_b64 (svptrue_b64 (), p1)); ++ ++/* ++** inc_b64_64_general_x0: ++** cntp (x[0-9]+), p0, p1\.d ++** add x0, (x0, \1|\1, x0) ++** ret ++*/ ++TEST_PTEST (inc_b64_64_general_x0, uint64_t, ++ x0 += svcntp_b64 (p0, p1)); ++ ++/* ++** inc_b64_64_general_x1: ++** cntp (x[0-9]+), p0, p1\.d ++** add x0, (x1, \1|\1, x1) ++** ret ++*/ ++TEST_PTEST (inc_b64_64_general_x1, uint64_t, ++ x0 = x1 + svcntp_b64 (p0, p1)); ++ ++/* ++** inc_b64_64_ptrue_x0: ++** incp x0, p1\.d ++** ret ++*/ ++TEST_PTEST (inc_b64_64_ptrue_x0, uint64_t, ++ x0 += svcntp_b64 (svptrue_b64 (), p1)); ++ ++/* ++** inc_b64_64_ptrue_x1: ++** mov x0, x1 ++** incp x0, p1\.d ++** ret ++*/ ++TEST_PTEST (inc_b64_64_ptrue_x1, uint64_t, ++ x0 = x1 + svcntp_b64 (svptrue_b64 (), p1)); ++ ++/* ++** dec_b64_32_general_x0: ++** cntp x([0-9]+), p0, p1\.d ++** sub w0, w0, w\1 ++** ret ++*/ ++TEST_PTEST (dec_b64_32_general_x0, uint32_t, ++ x0 -= svcntp_b64 (p0, p1)); ++ ++/* ++** dec_b64_32_general_x1: ++** cntp x([0-9]+), p0, p1\.d ++** sub w0, w1, w\1 ++** ret ++*/ ++TEST_PTEST (dec_b64_32_general_x1, uint32_t, ++ x0 = x1 - svcntp_b64 (p0, p1)); ++ ++/* ++** dec_b64_32_ptrue_x0: ++** decp x0, p1\.d ++** ret ++*/ ++TEST_PTEST (dec_b64_32_ptrue_x0, uint32_t, ++ x0 -= svcntp_b64 (svptrue_b64 (), p1)); ++ ++/* ++** dec_b64_32_ptrue_x1: ++** mov w0, w1 ++** decp x0, p1\.d ++** ret ++*/ ++TEST_PTEST (dec_b64_32_ptrue_x1, uint32_t, ++ x0 = x1 - svcntp_b64 (svptrue_b64 (), p1)); ++ ++/* ++** dec_b64_64_general_x0: ++** cntp (x[0-9]+), p0, p1\.d ++** sub x0, x0, \1 ++** ret ++*/ ++TEST_PTEST (dec_b64_64_general_x0, uint64_t, ++ x0 -= svcntp_b64 (p0, p1)); ++ ++/* ++** dec_b64_64_general_x1: ++** cntp (x[0-9]+), p0, p1\.d ++** sub x0, x1, \1 ++** ret ++*/ ++TEST_PTEST (dec_b64_64_general_x1, uint64_t, ++ x0 = x1 - svcntp_b64 (p0, p1)); ++ ++/* ++** dec_b64_64_ptrue_x0: ++** decp x0, p1\.d ++** ret ++*/ ++TEST_PTEST (dec_b64_64_ptrue_x0, uint64_t, ++ x0 -= svcntp_b64 (svptrue_b64 (), p1)); ++ ++/* ++** dec_b64_64_ptrue_x1: ++** mov x0, x1 ++** decp x0, p1\.d ++** ret ++*/ ++TEST_PTEST (dec_b64_64_ptrue_x1, uint64_t, ++ x0 = x1 - svcntp_b64 (svptrue_b64 (), p1)); ++ ++/* ++** inc_b64_u64_general_z0: ++** cntp (x[0-9]+), p0, p1\.d ++** mov (z[0-9]+\.d), \1 ++** add z0\.d, (z0\.d, \2|\2, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (inc_b64_u64_general_z0, svuint64_t, ++ z0 = svadd_n_u64_x (svptrue_b64 (), z0, svcntp_b64 (p0, p1)), ++ z0 = svadd_x (svptrue_b64 (), z0, svcntp_b64 (p0, p1))); ++ ++/* ++** inc_b64_u64_general_z1: ++** cntp (x[0-9]+), p0, p1\.d ++** mov (z[0-9]+\.d), \1 ++** add z0\.d, (z1\.d, \2|\2, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (inc_b64_u64_general_z1, svuint64_t, ++ z0 = svadd_n_u64_x (svptrue_b64 (), z1, svcntp_b64 (p0, p1)), ++ z0 = svadd_x (svptrue_b64 (), z1, svcntp_b64 (p0, p1))); ++ ++/* ++** inc_b64_u64_ptrue_z0: ++** incp z0\.d, p0 ++** ret ++*/ ++TEST_UNIFORM_Z (inc_b64_u64_ptrue_z0, svuint64_t, ++ z0 = svadd_n_u64_x (svptrue_b64 (), z0, svcntp_b64 (svptrue_b64 (), p0)), ++ z0 = svadd_x (svptrue_b64 (), z0, svcntp_b64 (svptrue_b64 (), p0))); ++ ++/* ++** inc_b64_u64_ptrue_z1: ++** movprfx z0, z1 ++** incp z0\.d, p0 ++** ret ++*/ ++TEST_UNIFORM_Z (inc_b64_u64_ptrue_z1, svuint64_t, ++ z0 = svadd_n_u64_x (svptrue_b64 (), z1, svcntp_b64 (svptrue_b64 (), p0)), ++ z0 = svadd_x (svptrue_b64 (), z1, svcntp_b64 (svptrue_b64 (), p0))); ++ ++/* ++** dec_b64_u64_general_z0: ++** cntp (x[0-9]+), p0, p1\.d ++** mov (z[0-9]+\.d), \1 ++** sub z0\.d, z0\.d, \2 ++** ret ++*/ ++TEST_UNIFORM_Z (dec_b64_u64_general_z0, svuint64_t, ++ z0 = svsub_n_u64_x (svptrue_b64 (), z0, svcntp_b64 (p0, p1)), ++ z0 = svsub_x (svptrue_b64 (), z0, svcntp_b64 (p0, p1))); ++ ++/* ++** dec_b64_u64_general_z1: ++** cntp (x[0-9]+), p0, p1\.d ++** mov (z[0-9]+\.d), \1 ++** sub z0\.d, z1\.d, \2 ++** ret ++*/ ++TEST_UNIFORM_Z (dec_b64_u64_general_z1, svuint64_t, ++ z0 = svsub_n_u64_x (svptrue_b64 (), z1, svcntp_b64 (p0, p1)), ++ z0 = svsub_x (svptrue_b64 (), z1, svcntp_b64 (p0, p1))); ++ ++/* ++** dec_b64_u64_ptrue_z0: ++** decp z0\.d, p0 ++** ret ++*/ ++TEST_UNIFORM_Z (dec_b64_u64_ptrue_z0, svuint64_t, ++ z0 = svsub_n_u64_x (svptrue_b64 (), z0, svcntp_b64 (svptrue_b64 (), p0)), ++ z0 = svsub_x (svptrue_b64 (), z0, svcntp_b64 (svptrue_b64 (), p0))); ++ ++/* ++** dec_b64_u64_ptrue_z1: ++** movprfx z0, z1 ++** decp z0\.d, p0 ++** ret ++*/ ++TEST_UNIFORM_Z (dec_b64_u64_ptrue_z1, svuint64_t, ++ z0 = svsub_n_u64_x (svptrue_b64 (), z1, svcntp_b64 (svptrue_b64 (), p0)), ++ z0 = svsub_x (svptrue_b64 (), z1, svcntp_b64 (svptrue_b64 (), p0))); +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntp_b8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntp_b8.c +new file mode 100644 +index 000000000..e02c02cd6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntp_b8.c +@@ -0,0 +1,253 @@ ++/* { dg-additional-options "-msve-vector-bits=scalable" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++#include ++ ++/* ++** cnt_b8_32: ++** cntp x0, p0, p1\.b ++** ret ++*/ ++TEST_PTEST (cnt_b8_32, uint32_t, ++ x0 = svcntp_b8 (p0, p1)); ++ ++/* ++** cnt_b8_64: ++** cntp x0, p0, p1\.b ++** ret ++*/ ++TEST_PTEST (cnt_b8_64, uint64_t, ++ x0 = svcntp_b8 (p0, p1)); ++ ++/* ++** inc_b8_32_general_x0: ++** cntp x([0-9]+), p0, p1\.b ++** add w0, (w0, w\1|w\1, w0) ++** ret ++*/ ++TEST_PTEST (inc_b8_32_general_x0, uint32_t, ++ x0 += svcntp_b8 (p0, p1)); ++ ++/* ++** inc_b8_32_general_x1: ++** cntp x([0-9]+), p0, p1\.b ++** add w0, (w1, w\1|w\1, w1) ++** ret ++*/ ++TEST_PTEST (inc_b8_32_general_x1, uint32_t, ++ x0 = x1 + svcntp_b8 (p0, p1)); ++ ++/* ++** inc_b8_32_ptrue_x0: ++** incp x0, p1\.b ++** ret ++*/ ++TEST_PTEST (inc_b8_32_ptrue_x0, uint32_t, ++ x0 += svcntp_b8 (svptrue_b8 (), p1)); ++ ++/* ++** inc_b8_32_ptrue_x1: ++** mov w0, w1 ++** incp x0, p1\.b ++** ret ++*/ ++TEST_PTEST (inc_b8_32_ptrue_x1, uint32_t, ++ x0 = x1 + svcntp_b8 (svptrue_b8 (), p1)); ++ ++/* ++** inc_b8_64_general_x0: ++** cntp (x[0-9]+), p0, p1\.b ++** add x0, (x0, \1|\1, x0) ++** ret ++*/ ++TEST_PTEST (inc_b8_64_general_x0, uint64_t, ++ x0 += svcntp_b8 (p0, p1)); ++ ++/* ++** inc_b8_64_general_x1: ++** cntp (x[0-9]+), p0, p1\.b ++** add x0, (x1, \1|\1, x1) ++** ret ++*/ ++TEST_PTEST (inc_b8_64_general_x1, uint64_t, ++ x0 = x1 + svcntp_b8 (p0, p1)); ++ ++/* ++** inc_b8_64_ptrue_x0: ++** incp x0, p1\.b ++** ret ++*/ ++TEST_PTEST (inc_b8_64_ptrue_x0, uint64_t, ++ x0 += svcntp_b8 (svptrue_b8 (), p1)); ++ ++/* ++** inc_b8_64_ptrue_x1: ++** mov x0, x1 ++** incp x0, p1\.b ++** ret ++*/ ++TEST_PTEST (inc_b8_64_ptrue_x1, uint64_t, ++ x0 = x1 + svcntp_b8 (svptrue_b8 (), p1)); ++ ++/* ++** dec_b8_32_general_x0: ++** cntp x([0-9]+), p0, p1\.b ++** sub w0, w0, w\1 ++** ret ++*/ ++TEST_PTEST (dec_b8_32_general_x0, uint32_t, ++ x0 -= svcntp_b8 (p0, p1)); ++ ++/* ++** dec_b8_32_general_x1: ++** cntp x([0-9]+), p0, p1\.b ++** sub w0, w1, w\1 ++** ret ++*/ ++TEST_PTEST (dec_b8_32_general_x1, uint32_t, ++ x0 = x1 - svcntp_b8 (p0, p1)); ++ ++/* ++** dec_b8_32_ptrue_x0: ++** decp x0, p1\.b ++** ret ++*/ ++TEST_PTEST (dec_b8_32_ptrue_x0, uint32_t, ++ x0 -= svcntp_b8 (svptrue_b8 (), p1)); ++ ++/* ++** dec_b8_32_ptrue_x1: ++** mov w0, w1 ++** decp x0, p1\.b ++** ret ++*/ ++TEST_PTEST (dec_b8_32_ptrue_x1, uint32_t, ++ x0 = x1 - svcntp_b8 (svptrue_b8 (), p1)); ++ ++/* ++** dec_b8_64_general_x0: ++** cntp (x[0-9]+), p0, p1\.b ++** sub x0, x0, \1 ++** ret ++*/ ++TEST_PTEST (dec_b8_64_general_x0, uint64_t, ++ x0 -= svcntp_b8 (p0, p1)); ++ ++/* ++** dec_b8_64_general_x1: ++** cntp (x[0-9]+), p0, p1\.b ++** sub x0, x1, \1 ++** ret ++*/ ++TEST_PTEST (dec_b8_64_general_x1, uint64_t, ++ x0 = x1 - svcntp_b8 (p0, p1)); ++ ++/* ++** dec_b8_64_ptrue_x0: ++** decp x0, p1\.b ++** ret ++*/ ++TEST_PTEST (dec_b8_64_ptrue_x0, uint64_t, ++ x0 -= svcntp_b8 (svptrue_b8 (), p1)); ++ ++/* ++** dec_b8_64_ptrue_x1: ++** mov x0, x1 ++** decp x0, p1\.b ++** ret ++*/ ++TEST_PTEST (dec_b8_64_ptrue_x1, uint64_t, ++ x0 = x1 - svcntp_b8 (svptrue_b8 (), p1)); ++ ++/* ++** inc_b8_s8_general_z0: ++** cntp x([0-9]+), p0, p1\.b ++** mov (z[0-9]+\.b), w\1 ++** add z0\.b, (z0\.b, \2|\2, z0\.b) ++** ret ++*/ ++TEST_UNIFORM_Z (inc_b8_s8_general_z0, svint8_t, ++ z0 = svadd_n_s8_x (svptrue_b8 (), z0, svcntp_b8 (p0, p1)), ++ z0 = svadd_x (svptrue_b8 (), z0, svcntp_b8 (p0, p1))); ++ ++/* ++** inc_b8_s8_general_z1: ++** cntp x([0-9]+), p0, p1\.b ++** mov (z[0-9]+\.b), w\1 ++** add z0\.b, (z1\.b, \2|\2, z1\.b) ++** ret ++*/ ++TEST_UNIFORM_Z (inc_b8_s8_general_z1, svint8_t, ++ z0 = svadd_n_s8_x (svptrue_b8 (), z1, svcntp_b8 (p0, p1)), ++ z0 = svadd_x (svptrue_b8 (), z1, svcntp_b8 (p0, p1))); ++ ++/* ++** inc_b8_s8_ptrue_z0: ++** ptrue (p[0-7])\.b, all ++** cntp x([0-9]+), \1, p0\.b ++** mov (z[0-9]+\.b), w\2 ++** add z0\.b, (z0\.b, \3|\3, z0\.b) ++** ret ++*/ ++TEST_UNIFORM_Z (inc_b8_s8_ptrue_z0, svint8_t, ++ z0 = svadd_n_s8_x (svptrue_b8 (), z0, svcntp_b8 (svptrue_b8 (), p0)), ++ z0 = svadd_x (svptrue_b8 (), z0, svcntp_b8 (svptrue_b8 (), p0))); ++ ++/* ++** inc_b8_s8_ptrue_z1: ++** ptrue (p[0-7])\.b, all ++** cntp x([0-9]+), \1, p0\.b ++** mov (z[0-9]+\.b), w\2 ++** add z0\.b, (z1\.b, \3|\3, z1\.b) ++** ret ++*/ ++TEST_UNIFORM_Z (inc_b8_s8_ptrue_z1, svint8_t, ++ z0 = svadd_n_s8_x (svptrue_b8 (), z1, svcntp_b8 (svptrue_b8 (), p0)), ++ z0 = svadd_x (svptrue_b8 (), z1, svcntp_b8 (svptrue_b8 (), p0))); ++ ++/* ++** dec_b8_s8_general_z0: ++** cntp x([0-9]+), p0, p1\.b ++** mov (z[0-9]+\.b), w\1 ++** sub z0\.b, z0\.b, \2 ++** ret ++*/ ++TEST_UNIFORM_Z (dec_b8_s8_general_z0, svint8_t, ++ z0 = svsub_n_s8_x (svptrue_b8 (), z0, svcntp_b8 (p0, p1)), ++ z0 = svsub_x (svptrue_b8 (), z0, svcntp_b8 (p0, p1))); ++ ++/* ++** dec_b8_s8_general_z1: ++** cntp x([0-9]+), p0, p1\.b ++** mov (z[0-9]+\.b), w\1 ++** sub z0\.b, z1\.b, \2 ++** ret ++*/ ++TEST_UNIFORM_Z (dec_b8_s8_general_z1, svint8_t, ++ z0 = svsub_n_s8_x (svptrue_b8 (), z1, svcntp_b8 (p0, p1)), ++ z0 = svsub_x (svptrue_b8 (), z1, svcntp_b8 (p0, p1))); ++ ++/* ++** dec_b8_s8_ptrue_z0: ++** ptrue (p[0-7])\.b, all ++** cntp x([0-9]+), \1, p0\.b ++** mov (z[0-9]+\.b), w\2 ++** sub z0\.b, z0\.b, \3 ++** ret ++*/ ++TEST_UNIFORM_Z (dec_b8_s8_ptrue_z0, svint8_t, ++ z0 = svsub_n_s8_x (svptrue_b8 (), z0, svcntp_b8 (svptrue_b8 (), p0)), ++ z0 = svsub_x (svptrue_b8 (), z0, svcntp_b8 (svptrue_b8 (), p0))); ++ ++/* ++** dec_b8_s8_ptrue_z1: ++** ptrue (p[0-7])\.b, all ++** cntp x([0-9]+), \1, p0\.b ++** mov (z[0-9]+\.b), w\2 ++** sub z0\.b, z1\.b, \3 ++** ret ++*/ ++TEST_UNIFORM_Z (dec_b8_s8_ptrue_z1, svint8_t, ++ z0 = svsub_n_s8_x (svptrue_b8 (), z1, svcntp_b8 (svptrue_b8 (), p0)), ++ z0 = svsub_x (svptrue_b8 (), z1, svcntp_b8 (svptrue_b8 (), p0))); +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntw.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntw.c +new file mode 100644 +index 000000000..e26cc67a4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntw.c +@@ -0,0 +1,279 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cntw_1: ++** cntw x0 ++** ret ++*/ ++PROTO (cntw_1, uint64_t, ()) { return svcntw (); } ++ ++/* ++** cntw_2: ++** cnth x0 ++** ret ++*/ ++PROTO (cntw_2, uint64_t, ()) { return svcntw () * 2; } ++ ++/* ++** cntw_3: ++** cntw x0, all, mul #3 ++** ret ++*/ ++PROTO (cntw_3, uint64_t, ()) { return svcntw () * 3; } ++ ++/* ++** cntw_4: ++** cntb x0 ++** ret ++*/ ++PROTO (cntw_4, uint64_t, ()) { return svcntw () * 4; } ++ ++/* ++** cntw_8: ++** cntb x0, all, mul #2 ++** ret ++*/ ++PROTO (cntw_8, uint64_t, ()) { return svcntw () * 8; } ++ ++/* ++** cntw_15: ++** cntw x0, all, mul #15 ++** ret ++*/ ++PROTO (cntw_15, uint64_t, ()) { return svcntw () * 15; } ++ ++/* ++** cntw_16: ++** cntb x0, all, mul #4 ++** ret ++*/ ++PROTO (cntw_16, uint64_t, ()) { return svcntw () * 16; } ++ ++/* Other sequences would be OK. */ ++/* ++** cntw_17: ++** cntb x0, all, mul #4 ++** incw x0 ++** ret ++*/ ++PROTO (cntw_17, uint64_t, ()) { return svcntw () * 17; } ++ ++/* ++** cntw_32: ++** cntb x0, all, mul #8 ++** ret ++*/ ++PROTO (cntw_32, uint64_t, ()) { return svcntw () * 32; } ++ ++/* ++** cntw_64: ++** cntb x0, all, mul #16 ++** ret ++*/ ++PROTO (cntw_64, uint64_t, ()) { return svcntw () * 64; } ++ ++/* ++** cntw_128: ++** cntd (x[0-9]+) ++** lsl x0, \1, 8 ++** ret ++*/ ++PROTO (cntw_128, uint64_t, ()) { return svcntw () * 128; } ++ ++/* ++** cntw_m1: ++** cntw (x[0-9]+) ++** neg x0, \1 ++** ret ++*/ ++PROTO (cntw_m1, uint64_t, ()) { return -svcntw (); } ++ ++/* ++** cntw_m13: ++** cntw (x[0-9]+), all, mul #13 ++** neg x0, \1 ++** ret ++*/ ++PROTO (cntw_m13, uint64_t, ()) { return -svcntw () * 13; } ++ ++/* ++** cntw_m15: ++** cntw (x[0-9]+), all, mul #15 ++** neg x0, \1 ++** ret ++*/ ++PROTO (cntw_m15, uint64_t, ()) { return -svcntw () * 15; } ++ ++/* ++** cntw_m16: ++** cntb (x[0-9]+), all, mul #4 ++** neg x0, \1 ++** ret ++*/ ++PROTO (cntw_m16, uint64_t, ()) { return -svcntw () * 16; } ++ ++/* Other sequences would be OK. */ ++/* ++** cntw_m17: ++** cntb x0, all, mul #4 ++** incw x0 ++** neg x0, x0 ++** ret ++*/ ++PROTO (cntw_m17, uint64_t, ()) { return -svcntw () * 17; } ++ ++/* ++** incw_1: ++** incw x0 ++** ret ++*/ ++PROTO (incw_1, uint64_t, (uint64_t x0)) { return x0 + svcntw (); } ++ ++/* ++** incw_2: ++** inch x0 ++** ret ++*/ ++PROTO (incw_2, uint64_t, (uint64_t x0)) { return x0 + svcntw () * 2; } ++ ++/* ++** incw_3: ++** incw x0, all, mul #3 ++** ret ++*/ ++PROTO (incw_3, uint64_t, (uint64_t x0)) { return x0 + svcntw () * 3; } ++ ++/* ++** incw_4: ++** incb x0 ++** ret ++*/ ++PROTO (incw_4, uint64_t, (uint64_t x0)) { return x0 + svcntw () * 4; } ++ ++/* ++** incw_7: ++** incw x0, all, mul #7 ++** ret ++*/ ++PROTO (incw_7, uint64_t, (uint64_t x0)) { return x0 + svcntw () * 7; } ++ ++/* ++** incw_8: ++** incb x0, all, mul #2 ++** ret ++*/ ++PROTO (incw_8, uint64_t, (uint64_t x0)) { return x0 + svcntw () * 8; } ++ ++/* ++** incw_9: ++** incw x0, all, mul #9 ++** ret ++*/ ++PROTO (incw_9, uint64_t, (uint64_t x0)) { return x0 + svcntw () * 9; } ++ ++/* ++** incw_15: ++** incw x0, all, mul #15 ++** ret ++*/ ++PROTO (incw_15, uint64_t, (uint64_t x0)) { return x0 + svcntw () * 15; } ++ ++/* ++** incw_16: ++** incb x0, all, mul #4 ++** ret ++*/ ++PROTO (incw_16, uint64_t, (uint64_t x0)) { return x0 + svcntw () * 16; } ++ ++/* ++** incw_18: ++** inch x0, all, mul #9 ++** ret ++*/ ++PROTO (incw_18, uint64_t, (uint64_t x0)) { return x0 + svcntw () * 18; } ++ ++/* ++** incw_30: ++** inch x0, all, mul #15 ++** ret ++*/ ++PROTO (incw_30, uint64_t, (uint64_t x0)) { return x0 + svcntw () * 30; } ++ ++/* ++** decw_1: ++** decw x0 ++** ret ++*/ ++PROTO (decw_1, uint64_t, (uint64_t x0)) { return x0 - svcntw (); } ++ ++/* ++** decw_2: ++** dech x0 ++** ret ++*/ ++PROTO (decw_2, uint64_t, (uint64_t x0)) { return x0 - svcntw () * 2; } ++ ++/* ++** decw_3: ++** decw x0, all, mul #3 ++** ret ++*/ ++PROTO (decw_3, uint64_t, (uint64_t x0)) { return x0 - svcntw () * 3; } ++ ++/* ++** decw_4: ++** decb x0 ++** ret ++*/ ++PROTO (decw_4, uint64_t, (uint64_t x0)) { return x0 - svcntw () * 4; } ++ ++/* ++** decw_7: ++** decw x0, all, mul #7 ++** ret ++*/ ++PROTO (decw_7, uint64_t, (uint64_t x0)) { return x0 - svcntw () * 7; } ++ ++/* ++** decw_8: ++** decb x0, all, mul #2 ++** ret ++*/ ++PROTO (decw_8, uint64_t, (uint64_t x0)) { return x0 - svcntw () * 8; } ++ ++/* ++** decw_9: ++** decw x0, all, mul #9 ++** ret ++*/ ++PROTO (decw_9, uint64_t, (uint64_t x0)) { return x0 - svcntw () * 9; } ++ ++/* ++** decw_15: ++** decw x0, all, mul #15 ++** ret ++*/ ++PROTO (decw_15, uint64_t, (uint64_t x0)) { return x0 - svcntw () * 15; } ++ ++/* ++** decw_16: ++** decb x0, all, mul #4 ++** ret ++*/ ++PROTO (decw_16, uint64_t, (uint64_t x0)) { return x0 - svcntw () * 16; } ++ ++/* ++** decw_18: ++** dech x0, all, mul #9 ++** ret ++*/ ++PROTO (decw_18, uint64_t, (uint64_t x0)) { return x0 - svcntw () * 18; } ++ ++/* ++** decw_30: ++** dech x0, all, mul #15 ++** ret ++*/ ++PROTO (decw_30, uint64_t, (uint64_t x0)) { return x0 - svcntw () * 30; } +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntw_pat.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntw_pat.c +new file mode 100644 +index 000000000..ff6b7d882 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntw_pat.c +@@ -0,0 +1,426 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cntw_pow2: ++** cntw x0, pow2 ++** ret ++*/ ++PROTO (cntw_pow2, uint64_t, ()) { return svcntw_pat (SV_POW2); } ++ ++/* ++** cntw_vl1: ++** mov x0, #?1 ++** ret ++*/ ++PROTO (cntw_vl1, uint64_t, ()) { return svcntw_pat (SV_VL1); } ++ ++/* ++** cntw_vl2: ++** mov x0, #?2 ++** ret ++*/ ++PROTO (cntw_vl2, uint64_t, ()) { return svcntw_pat (SV_VL2); } ++ ++/* ++** cntw_vl3: ++** mov x0, #?3 ++** ret ++*/ ++PROTO (cntw_vl3, uint64_t, ()) { return svcntw_pat (SV_VL3); } ++ ++/* ++** cntw_vl4: ++** mov x0, #?4 ++** ret ++*/ ++PROTO (cntw_vl4, uint64_t, ()) { return svcntw_pat (SV_VL4); } ++ ++/* ++** cntw_vl5: ++** cntw x0, vl5 ++** ret ++*/ ++PROTO (cntw_vl5, uint64_t, ()) { return svcntw_pat (SV_VL5); } ++ ++/* ++** cntw_vl6: ++** cntw x0, vl6 ++** ret ++*/ ++PROTO (cntw_vl6, uint64_t, ()) { return svcntw_pat (SV_VL6); } ++ ++/* ++** cntw_vl7: ++** cntw x0, vl7 ++** ret ++*/ ++PROTO (cntw_vl7, uint64_t, ()) { return svcntw_pat (SV_VL7); } ++ ++/* ++** cntw_vl8: ++** cntw x0, vl8 ++** ret ++*/ ++PROTO (cntw_vl8, uint64_t, ()) { return svcntw_pat (SV_VL8); } ++ ++/* ++** cntw_vl16: ++** cntw x0, vl16 ++** ret ++*/ ++PROTO (cntw_vl16, uint64_t, ()) { return svcntw_pat (SV_VL16); } ++ ++/* ++** cntw_vl32: ++** cntw x0, vl32 ++** ret ++*/ ++PROTO (cntw_vl32, uint64_t, ()) { return svcntw_pat (SV_VL32); } ++ ++/* ++** cntw_vl64: ++** cntw x0, vl64 ++** ret ++*/ ++PROTO (cntw_vl64, uint64_t, ()) { return svcntw_pat (SV_VL64); } ++ ++/* ++** cntw_vl128: ++** cntw x0, vl128 ++** ret ++*/ ++PROTO (cntw_vl128, uint64_t, ()) { return svcntw_pat (SV_VL128); } ++ ++/* ++** cntw_vl256: ++** cntw x0, vl256 ++** ret ++*/ ++PROTO (cntw_vl256, uint64_t, ()) { return svcntw_pat (SV_VL256); } ++ ++/* ++** cntw_mul3: ++** cntw x0, mul3 ++** ret ++*/ ++PROTO (cntw_mul3, uint64_t, ()) { return svcntw_pat (SV_MUL3); } ++ ++/* ++** cntw_mul4: ++** cntw x0, mul4 ++** ret ++*/ ++PROTO (cntw_mul4, uint64_t, ()) { return svcntw_pat (SV_MUL4); } ++ ++/* ++** cntw_all: ++** cntw x0 ++** ret ++*/ ++PROTO (cntw_all, uint64_t, ()) { return svcntw_pat (SV_ALL); } ++ ++/* ++** incw_32_pow2: ++** incw x0, pow2 ++** ret ++*/ ++PROTO (incw_32_pow2, uint32_t, (uint32_t w0)) { return w0 + svcntw_pat (SV_POW2); } ++ ++/* ++** incw_32_vl1: ++** add w0, w0, #?1 ++** ret ++*/ ++PROTO (incw_32_vl1, uint32_t, (uint32_t w0)) { return w0 + svcntw_pat (SV_VL1); } ++ ++/* ++** incw_32_vl2: ++** add w0, w0, #?2 ++** ret ++*/ ++PROTO (incw_32_vl2, uint32_t, (uint32_t w0)) { return w0 + svcntw_pat (SV_VL2); } ++ ++/* ++** incw_32_vl3: ++** add w0, w0, #?3 ++** ret ++*/ ++PROTO (incw_32_vl3, uint32_t, (uint32_t w0)) { return w0 + svcntw_pat (SV_VL3); } ++ ++/* ++** incw_32_vl4: ++** add w0, w0, #?4 ++** ret ++*/ ++PROTO (incw_32_vl4, uint32_t, (uint32_t w0)) { return w0 + svcntw_pat (SV_VL4); } ++ ++/* ++** incw_32_vl5: ++** incw x0, vl5 ++** ret ++*/ ++PROTO (incw_32_vl5, uint32_t, (uint32_t w0)) { return w0 + svcntw_pat (SV_VL5); } ++ ++/* ++** incw_32_vl6: ++** incw x0, vl6 ++** ret ++*/ ++PROTO (incw_32_vl6, uint32_t, (uint32_t w0)) { return w0 + svcntw_pat (SV_VL6); } ++ ++/* ++** incw_32_vl7: ++** incw x0, vl7 ++** ret ++*/ ++PROTO (incw_32_vl7, uint32_t, (uint32_t w0)) { return w0 + svcntw_pat (SV_VL7); } ++ ++/* ++** incw_32_vl8: ++** incw x0, vl8 ++** ret ++*/ ++PROTO (incw_32_vl8, uint32_t, (uint32_t w0)) { return w0 + svcntw_pat (SV_VL8); } ++ ++/* ++** incw_32_vl16: ++** incw x0, vl16 ++** ret ++*/ ++PROTO (incw_32_vl16, uint32_t, (uint32_t w0)) { return w0 + svcntw_pat (SV_VL16); } ++ ++/* ++** incw_32_vl32: ++** incw x0, vl32 ++** ret ++*/ ++PROTO (incw_32_vl32, uint32_t, (uint32_t w0)) { return w0 + svcntw_pat (SV_VL32); } ++ ++/* ++** incw_32_vl64: ++** incw x0, vl64 ++** ret ++*/ ++PROTO (incw_32_vl64, uint32_t, (uint32_t w0)) { return w0 + svcntw_pat (SV_VL64); } ++ ++/* ++** incw_32_vl128: ++** incw x0, vl128 ++** ret ++*/ ++PROTO (incw_32_vl128, uint32_t, (uint32_t w0)) { return w0 + svcntw_pat (SV_VL128); } ++ ++/* ++** incw_32_vl256: ++** incw x0, vl256 ++** ret ++*/ ++PROTO (incw_32_vl256, uint32_t, (uint32_t w0)) { return w0 + svcntw_pat (SV_VL256); } ++ ++/* ++** incw_32_mul3: ++** incw x0, mul3 ++** ret ++*/ ++PROTO (incw_32_mul3, uint32_t, (uint32_t w0)) { return w0 + svcntw_pat (SV_MUL3); } ++ ++/* ++** incw_32_mul4: ++** incw x0, mul4 ++** ret ++*/ ++PROTO (incw_32_mul4, uint32_t, (uint32_t w0)) { return w0 + svcntw_pat (SV_MUL4); } ++ ++/* ++** incw_32_all: ++** incw x0 ++** ret ++*/ ++PROTO (incw_32_all, uint32_t, (uint32_t w0)) { return w0 + svcntw_pat (SV_ALL); } ++ ++/* ++** incw_64_pow2: ++** incw x0, pow2 ++** ret ++*/ ++PROTO (incw_64_pow2, uint64_t, (uint64_t x0)) { return x0 + svcntw_pat (SV_POW2); } ++ ++/* ++** incw_64_all: ++** incw x0 ++** ret ++*/ ++PROTO (incw_64_all, uint64_t, (uint64_t x0)) { return x0 + svcntw_pat (SV_ALL); } ++ ++/* ++** decw_32_pow2: ++** decw x0, pow2 ++** ret ++*/ ++PROTO (decw_32_pow2, uint32_t, (uint32_t w0)) { return w0 - svcntw_pat (SV_POW2); } ++ ++/* ++** decw_32_vl1: ++** sub w0, w0, #?1 ++** ret ++*/ ++PROTO (decw_32_vl1, uint32_t, (uint32_t w0)) { return w0 - svcntw_pat (SV_VL1); } ++ ++/* ++** decw_32_vl2: ++** sub w0, w0, #?2 ++** ret ++*/ ++PROTO (decw_32_vl2, uint32_t, (uint32_t w0)) { return w0 - svcntw_pat (SV_VL2); } ++ ++/* ++** decw_32_vl3: ++** sub w0, w0, #?3 ++** ret ++*/ ++PROTO (decw_32_vl3, uint32_t, (uint32_t w0)) { return w0 - svcntw_pat (SV_VL3); } ++ ++/* ++** decw_32_vl4: ++** sub w0, w0, #?4 ++** ret ++*/ ++PROTO (decw_32_vl4, uint32_t, (uint32_t w0)) { return w0 - svcntw_pat (SV_VL4); } ++ ++/* ++** decw_32_vl5: ++** decw x0, vl5 ++** ret ++*/ ++PROTO (decw_32_vl5, uint32_t, (uint32_t w0)) { return w0 - svcntw_pat (SV_VL5); } ++ ++/* ++** decw_32_vl6: ++** decw x0, vl6 ++** ret ++*/ ++PROTO (decw_32_vl6, uint32_t, (uint32_t w0)) { return w0 - svcntw_pat (SV_VL6); } ++ ++/* ++** decw_32_vl7: ++** decw x0, vl7 ++** ret ++*/ ++PROTO (decw_32_vl7, uint32_t, (uint32_t w0)) { return w0 - svcntw_pat (SV_VL7); } ++ ++/* ++** decw_32_vl8: ++** decw x0, vl8 ++** ret ++*/ ++PROTO (decw_32_vl8, uint32_t, (uint32_t w0)) { return w0 - svcntw_pat (SV_VL8); } ++ ++/* ++** decw_32_vl16: ++** decw x0, vl16 ++** ret ++*/ ++PROTO (decw_32_vl16, uint32_t, (uint32_t w0)) { return w0 - svcntw_pat (SV_VL16); } ++ ++/* ++** decw_32_vl32: ++** decw x0, vl32 ++** ret ++*/ ++PROTO (decw_32_vl32, uint32_t, (uint32_t w0)) { return w0 - svcntw_pat (SV_VL32); } ++ ++/* ++** decw_32_vl64: ++** decw x0, vl64 ++** ret ++*/ ++PROTO (decw_32_vl64, uint32_t, (uint32_t w0)) { return w0 - svcntw_pat (SV_VL64); } ++ ++/* ++** decw_32_vl128: ++** decw x0, vl128 ++** ret ++*/ ++PROTO (decw_32_vl128, uint32_t, (uint32_t w0)) { return w0 - svcntw_pat (SV_VL128); } ++ ++/* ++** decw_32_vl256: ++** decw x0, vl256 ++** ret ++*/ ++PROTO (decw_32_vl256, uint32_t, (uint32_t w0)) { return w0 - svcntw_pat (SV_VL256); } ++ ++/* ++** decw_32_mul3: ++** decw x0, mul3 ++** ret ++*/ ++PROTO (decw_32_mul3, uint32_t, (uint32_t w0)) { return w0 - svcntw_pat (SV_MUL3); } ++ ++/* ++** decw_32_mul4: ++** decw x0, mul4 ++** ret ++*/ ++PROTO (decw_32_mul4, uint32_t, (uint32_t w0)) { return w0 - svcntw_pat (SV_MUL4); } ++ ++/* ++** decw_32_all: ++** decw x0 ++** ret ++*/ ++PROTO (decw_32_all, uint32_t, (uint32_t w0)) { return w0 - svcntw_pat (SV_ALL); } ++ ++/* ++** decw_64_pow2: ++** decw x0, pow2 ++** ret ++*/ ++PROTO (decw_64_pow2, uint64_t, (uint64_t x0)) { return x0 - svcntw_pat (SV_POW2); } ++ ++/* ++** decw_64_all: ++** decw x0 ++** ret ++*/ ++PROTO (decw_64_all, uint64_t, (uint64_t x0)) { return x0 - svcntw_pat (SV_ALL); } ++ ++/* ++** incw_s32_pow2_z0: ++** incw z0\.s, pow2 ++** ret ++*/ ++TEST_UNIFORM_Z (incw_s32_pow2_z0, svint32_t, ++ z0 = svadd_n_s32_x (svptrue_b32 (), z0, svcntw_pat (SV_POW2)), ++ z0 = svadd_x (svptrue_b32 (), z0, svcntw_pat (SV_POW2))); ++ ++/* ++** incw_s32_pow2_z1: ++** movprfx z0, z1 ++** incw z0\.s, pow2 ++** ret ++*/ ++TEST_UNIFORM_Z (incw_s32_pow2_z1, svint32_t, ++ z0 = svadd_n_s32_x (svptrue_b32 (), z1, svcntw_pat (SV_POW2)), ++ z0 = svadd_x (svptrue_b32 (), z1, svcntw_pat (SV_POW2))); ++ ++/* ++** decw_s32_pow2_z0: ++** decw z0\.s, pow2 ++** ret ++*/ ++TEST_UNIFORM_Z (decw_s32_pow2_z0, svint32_t, ++ z0 = svsub_n_s32_x (svptrue_b32 (), z0, svcntw_pat (SV_POW2)), ++ z0 = svsub_x (svptrue_b32 (), z0, svcntw_pat (SV_POW2))); ++ ++/* ++** decw_s32_pow2_z1: ++** movprfx z0, z1 ++** decw z0\.s, pow2 ++** ret ++*/ ++TEST_UNIFORM_Z (decw_s32_pow2_z1, svint32_t, ++ z0 = svsub_n_s32_x (svptrue_b32 (), z1, svcntw_pat (SV_POW2)), ++ z0 = svsub_x (svptrue_b32 (), z1, svcntw_pat (SV_POW2))); +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_f32.c +new file mode 100644 +index 000000000..2e80d6830 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_f32.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** compact_f32_tied1: ++** compact z0\.s, p0, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (compact_f32_tied1, svfloat32_t, ++ z0 = svcompact_f32 (p0, z0), ++ z0 = svcompact (p0, z0)) ++ ++/* ++** compact_f32_untied: ++** compact z0\.s, p0, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (compact_f32_untied, svfloat32_t, ++ z0 = svcompact_f32 (p0, z1), ++ z0 = svcompact (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_f64.c +new file mode 100644 +index 000000000..e0bc33efe +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_f64.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** compact_f64_tied1: ++** compact z0\.d, p0, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (compact_f64_tied1, svfloat64_t, ++ z0 = svcompact_f64 (p0, z0), ++ z0 = svcompact (p0, z0)) ++ ++/* ++** compact_f64_untied: ++** compact z0\.d, p0, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (compact_f64_untied, svfloat64_t, ++ z0 = svcompact_f64 (p0, z1), ++ z0 = svcompact (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_s32.c +new file mode 100644 +index 000000000..e4634982b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_s32.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** compact_s32_tied1: ++** compact z0\.s, p0, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (compact_s32_tied1, svint32_t, ++ z0 = svcompact_s32 (p0, z0), ++ z0 = svcompact (p0, z0)) ++ ++/* ++** compact_s32_untied: ++** compact z0\.s, p0, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (compact_s32_untied, svint32_t, ++ z0 = svcompact_s32 (p0, z1), ++ z0 = svcompact (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_s64.c +new file mode 100644 +index 000000000..71cb97b8a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_s64.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** compact_s64_tied1: ++** compact z0\.d, p0, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (compact_s64_tied1, svint64_t, ++ z0 = svcompact_s64 (p0, z0), ++ z0 = svcompact (p0, z0)) ++ ++/* ++** compact_s64_untied: ++** compact z0\.d, p0, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (compact_s64_untied, svint64_t, ++ z0 = svcompact_s64 (p0, z1), ++ z0 = svcompact (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_u32.c +new file mode 100644 +index 000000000..954329a0b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_u32.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** compact_u32_tied1: ++** compact z0\.s, p0, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (compact_u32_tied1, svuint32_t, ++ z0 = svcompact_u32 (p0, z0), ++ z0 = svcompact (p0, z0)) ++ ++/* ++** compact_u32_untied: ++** compact z0\.s, p0, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (compact_u32_untied, svuint32_t, ++ z0 = svcompact_u32 (p0, z1), ++ z0 = svcompact (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_u64.c +new file mode 100644 +index 000000000..ec664845f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_u64.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** compact_u64_tied1: ++** compact z0\.d, p0, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (compact_u64_tied1, svuint64_t, ++ z0 = svcompact_u64 (p0, z0), ++ z0 = svcompact (p0, z0)) ++ ++/* ++** compact_u64_untied: ++** compact z0\.d, p0, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (compact_u64_untied, svuint64_t, ++ z0 = svcompact_u64 (p0, z1), ++ z0 = svcompact (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/create2_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/create2_1.c +new file mode 100644 +index 000000000..e9158ed8a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/create2_1.c +@@ -0,0 +1,123 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** create2_s8: ++** mov z0\.d, z6\.d ++** mov z1\.d, z4\.d ++** ret ++*/ ++TEST_CREATE (create2_s8, svint8x2_t, svint8_t, ++ z0 = svcreate2_s8 (z6, z4), ++ z0 = svcreate2 (z6, z4)) ++ ++/* ++** create2_u8: ++** mov z0\.d, z4\.d ++** mov z1\.d, z6\.d ++** ret ++*/ ++TEST_CREATE (create2_u8, svuint8x2_t, svuint8_t, ++ z0 = svcreate2_u8 (z4, z6), ++ z0 = svcreate2 (z4, z6)) ++ ++/* ++** create2_s16: ++** mov z0\.d, z6\.d ++** mov z1\.d, z4\.d ++** ret ++*/ ++TEST_CREATE (create2_s16, svint16x2_t, svint16_t, ++ z0 = svcreate2_s16 (z6, z4), ++ z0 = svcreate2 (z6, z4)) ++ ++/* ++** create2_u16: ++** mov z0\.d, z6\.d ++** mov z1\.d, z5\.d ++** ret ++*/ ++TEST_CREATE (create2_u16, svuint16x2_t, svuint16_t, ++ z0 = svcreate2_u16 (z6, z5), ++ z0 = svcreate2 (z6, z5)) ++ ++/* ++** create2_bf16: ++** mov z0\.d, z4\.d ++** mov z1\.d, z5\.d ++** ret ++*/ ++TEST_CREATE (create2_bf16, svbfloat16x2_t, svbfloat16_t, ++ z0 = svcreate2_bf16 (z4, z5), ++ z0 = svcreate2 (z4, z5)) ++ ++/* ++** create2_f16: ++** mov z0\.d, z4\.d ++** mov z1\.d, z5\.d ++** ret ++*/ ++TEST_CREATE (create2_f16, svfloat16x2_t, svfloat16_t, ++ z0 = svcreate2_f16 (z4, z5), ++ z0 = svcreate2 (z4, z5)) ++ ++/* ++** create2_s32: ++** mov z0\.d, z6\.d ++** mov z1\.d, z7\.d ++** ret ++*/ ++TEST_CREATE (create2_s32, svint32x2_t, svint32_t, ++ z0 = svcreate2_s32 (z6, z7), ++ z0 = svcreate2 (z6, z7)) ++ ++/* ++** create2_u32: ++** mov z0\.d, z7\.d ++** mov z1\.d, z5\.d ++** ret ++*/ ++TEST_CREATE (create2_u32, svuint32x2_t, svuint32_t, ++ z0 = svcreate2_u32 (z7, z5), ++ z0 = svcreate2 (z7, z5)) ++ ++/* ++** create2_f32: ++** mov z0\.d, z7\.d ++** mov z1\.d, z4\.d ++** ret ++*/ ++TEST_CREATE (create2_f32, svfloat32x2_t, svfloat32_t, ++ z0 = svcreate2_f32 (z7, z4), ++ z0 = svcreate2 (z7, z4)) ++ ++/* ++** create2_s64: ++** mov z0\.d, z5\.d ++** mov z1\.d, z7\.d ++** ret ++*/ ++TEST_CREATE (create2_s64, svint64x2_t, svint64_t, ++ z0 = svcreate2_s64 (z5, z7), ++ z0 = svcreate2 (z5, z7)) ++ ++/* ++** create2_u64: ++** mov z0\.d, z7\.d ++** mov z1\.d, z6\.d ++** ret ++*/ ++TEST_CREATE (create2_u64, svuint64x2_t, svuint64_t, ++ z0 = svcreate2_u64 (z7, z6), ++ z0 = svcreate2 (z7, z6)) ++ ++/* ++** create2_f64: ++** mov z0\.d, z5\.d ++** mov z1\.d, z4\.d ++** ret ++*/ ++TEST_CREATE (create2_f64, svfloat64x2_t, svfloat64_t, ++ z0 = svcreate2_f64 (z5, z4), ++ z0 = svcreate2 (z5, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/create3_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/create3_1.c +new file mode 100644 +index 000000000..6f1afb772 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/create3_1.c +@@ -0,0 +1,135 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** create3_s8: ++** mov z0\.d, z6\.d ++** mov z1\.d, z4\.d ++** mov z2\.d, z7\.d ++** ret ++*/ ++TEST_CREATE (create3_s8, svint8x3_t, svint8_t, ++ z0 = svcreate3_s8 (z6, z4, z7), ++ z0 = svcreate3 (z6, z4, z7)) ++ ++/* ++** create3_u8: ++** mov z0\.d, z4\.d ++** mov z1\.d, z6\.d ++** mov z2\.d, z5\.d ++** ret ++*/ ++TEST_CREATE (create3_u8, svuint8x3_t, svuint8_t, ++ z0 = svcreate3_u8 (z4, z6, z5), ++ z0 = svcreate3 (z4, z6, z5)) ++ ++/* ++** create3_s16: ++** mov z0\.d, z6\.d ++** mov z1\.d, z4\.d ++** mov z2\.d, z5\.d ++** ret ++*/ ++TEST_CREATE (create3_s16, svint16x3_t, svint16_t, ++ z0 = svcreate3_s16 (z6, z4, z5), ++ z0 = svcreate3 (z6, z4, z5)) ++ ++/* ++** create3_u16: ++** mov z0\.d, z6\.d ++** mov z1\.d, z5\.d ++** mov z2\.d, z4\.d ++** ret ++*/ ++TEST_CREATE (create3_u16, svuint16x3_t, svuint16_t, ++ z0 = svcreate3_u16 (z6, z5, z4), ++ z0 = svcreate3 (z6, z5, z4)) ++ ++/* ++** create3_bf16: ++** mov z0\.d, z4\.d ++** mov z1\.d, z5\.d ++** mov z2\.d, z6\.d ++** ret ++*/ ++TEST_CREATE (create3_bf16, svbfloat16x3_t, svbfloat16_t, ++ z0 = svcreate3_bf16 (z4, z5, z6), ++ z0 = svcreate3 (z4, z5, z6)) ++ ++/* ++** create3_f16: ++** mov z0\.d, z4\.d ++** mov z1\.d, z5\.d ++** mov z2\.d, z6\.d ++** ret ++*/ ++TEST_CREATE (create3_f16, svfloat16x3_t, svfloat16_t, ++ z0 = svcreate3_f16 (z4, z5, z6), ++ z0 = svcreate3 (z4, z5, z6)) ++ ++/* ++** create3_s32: ++** mov z0\.d, z6\.d ++** mov z1\.d, z7\.d ++** mov z2\.d, z4\.d ++** ret ++*/ ++TEST_CREATE (create3_s32, svint32x3_t, svint32_t, ++ z0 = svcreate3_s32 (z6, z7, z4), ++ z0 = svcreate3 (z6, z7, z4)) ++ ++/* ++** create3_u32: ++** mov z0\.d, z7\.d ++** mov z1\.d, z5\.d ++** mov z2\.d, z6\.d ++** ret ++*/ ++TEST_CREATE (create3_u32, svuint32x3_t, svuint32_t, ++ z0 = svcreate3_u32 (z7, z5, z6), ++ z0 = svcreate3 (z7, z5, z6)) ++ ++/* ++** create3_f32: ++** mov z0\.d, z7\.d ++** mov z1\.d, z4\.d ++** mov z2\.d, z6\.d ++** ret ++*/ ++TEST_CREATE (create3_f32, svfloat32x3_t, svfloat32_t, ++ z0 = svcreate3_f32 (z7, z4, z6), ++ z0 = svcreate3 (z7, z4, z6)) ++ ++/* ++** create3_s64: ++** mov z0\.d, z5\.d ++** mov z1\.d, z7\.d ++** mov z2\.d, z6\.d ++** ret ++*/ ++TEST_CREATE (create3_s64, svint64x3_t, svint64_t, ++ z0 = svcreate3_s64 (z5, z7, z6), ++ z0 = svcreate3 (z5, z7, z6)) ++ ++/* ++** create3_u64: ++** mov z0\.d, z7\.d ++** mov z1\.d, z6\.d ++** mov z2\.d, z4\.d ++** ret ++*/ ++TEST_CREATE (create3_u64, svuint64x3_t, svuint64_t, ++ z0 = svcreate3_u64 (z7, z6, z4), ++ z0 = svcreate3 (z7, z6, z4)) ++ ++/* ++** create3_f64: ++** mov z0\.d, z5\.d ++** mov z1\.d, z4\.d ++** mov z2\.d, z7\.d ++** ret ++*/ ++TEST_CREATE (create3_f64, svfloat64x3_t, svfloat64_t, ++ z0 = svcreate3_f64 (z5, z4, z7), ++ z0 = svcreate3 (z5, z4, z7)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/create4_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/create4_1.c +new file mode 100644 +index 000000000..a3866286e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/create4_1.c +@@ -0,0 +1,147 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** create4_s8: ++** mov z0\.d, z6\.d ++** mov z1\.d, z4\.d ++** mov z2\.d, z7\.d ++** mov z3\.d, z5\.d ++** ret ++*/ ++TEST_CREATE (create4_s8, svint8x4_t, svint8_t, ++ z0 = svcreate4_s8 (z6, z4, z7, z5), ++ z0 = svcreate4 (z6, z4, z7, z5)) ++ ++/* ++** create4_u8: ++** mov z0\.d, z4\.d ++** mov z1\.d, z6\.d ++** mov z2\.d, z5\.d ++** mov z3\.d, z7\.d ++** ret ++*/ ++TEST_CREATE (create4_u8, svuint8x4_t, svuint8_t, ++ z0 = svcreate4_u8 (z4, z6, z5, z7), ++ z0 = svcreate4 (z4, z6, z5, z7)) ++ ++/* ++** create4_s16: ++** mov z0\.d, z6\.d ++** mov z1\.d, z4\.d ++** mov z2\.d, z5\.d ++** mov z3\.d, z7\.d ++** ret ++*/ ++TEST_CREATE (create4_s16, svint16x4_t, svint16_t, ++ z0 = svcreate4_s16 (z6, z4, z5, z7), ++ z0 = svcreate4 (z6, z4, z5, z7)) ++ ++/* ++** create4_u16: ++** mov z0\.d, z6\.d ++** mov z1\.d, z5\.d ++** mov z2\.d, z4\.d ++** mov z3\.d, z7\.d ++** ret ++*/ ++TEST_CREATE (create4_u16, svuint16x4_t, svuint16_t, ++ z0 = svcreate4_u16 (z6, z5, z4, z7), ++ z0 = svcreate4 (z6, z5, z4, z7)) ++ ++/* ++** create4_bf16: ++** mov z0\.d, z4\.d ++** mov z1\.d, z5\.d ++** mov z2\.d, z6\.d ++** mov z3\.d, z7\.d ++** ret ++*/ ++TEST_CREATE (create4_bf16, svbfloat16x4_t, svbfloat16_t, ++ z0 = svcreate4_bf16 (z4, z5, z6, z7), ++ z0 = svcreate4 (z4, z5, z6, z7)) ++ ++/* ++** create4_f16: ++** mov z0\.d, z4\.d ++** mov z1\.d, z5\.d ++** mov z2\.d, z6\.d ++** mov z3\.d, z7\.d ++** ret ++*/ ++TEST_CREATE (create4_f16, svfloat16x4_t, svfloat16_t, ++ z0 = svcreate4_f16 (z4, z5, z6, z7), ++ z0 = svcreate4 (z4, z5, z6, z7)) ++ ++/* ++** create4_s32: ++** mov z0\.d, z6\.d ++** mov z1\.d, z7\.d ++** mov z2\.d, z4\.d ++** mov z3\.d, z5\.d ++** ret ++*/ ++TEST_CREATE (create4_s32, svint32x4_t, svint32_t, ++ z0 = svcreate4_s32 (z6, z7, z4, z5), ++ z0 = svcreate4 (z6, z7, z4, z5)) ++ ++/* ++** create4_u32: ++** mov z0\.d, z7\.d ++** mov z1\.d, z5\.d ++** mov z2\.d, z6\.d ++** mov z3\.d, z7\.d ++** ret ++*/ ++TEST_CREATE (create4_u32, svuint32x4_t, svuint32_t, ++ z0 = svcreate4_u32 (z7, z5, z6, z7), ++ z0 = svcreate4 (z7, z5, z6, z7)) ++ ++/* ++** create4_f32: ++** mov z0\.d, z7\.d ++** mov z1\.d, z4\.d ++** mov z2\.d, z6\.d ++** mov z3\.d, z4\.d ++** ret ++*/ ++TEST_CREATE (create4_f32, svfloat32x4_t, svfloat32_t, ++ z0 = svcreate4_f32 (z7, z4, z6, z4), ++ z0 = svcreate4 (z7, z4, z6, z4)) ++ ++/* ++** create4_s64: ++** mov z0\.d, z5\.d ++** mov z1\.d, z7\.d ++** mov z2\.d, z6\.d ++** mov z3\.d, z6\.d ++** ret ++*/ ++TEST_CREATE (create4_s64, svint64x4_t, svint64_t, ++ z0 = svcreate4_s64 (z5, z7, z6, z6), ++ z0 = svcreate4 (z5, z7, z6, z6)) ++ ++/* ++** create4_u64: ++** mov z0\.d, z7\.d ++** mov z1\.d, z6\.d ++** mov z2\.d, z4\.d ++** mov z3\.d, z5\.d ++** ret ++*/ ++TEST_CREATE (create4_u64, svuint64x4_t, svuint64_t, ++ z0 = svcreate4_u64 (z7, z6, z4, z5), ++ z0 = svcreate4 (z7, z6, z4, z5)) ++ ++/* ++** create4_f64: ++** mov z0\.d, z5\.d ++** mov z1\.d, z4\.d ++** mov z2\.d, z7\.d ++** mov z3\.d, z6\.d ++** ret ++*/ ++TEST_CREATE (create4_f64, svfloat64x4_t, svfloat64_t, ++ z0 = svcreate4_f64 (z5, z4, z7, z6), ++ z0 = svcreate4 (z5, z4, z7, z6)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_bf16.c +new file mode 100644 +index 000000000..52baa1f58 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_bf16.c +@@ -0,0 +1,96 @@ ++/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */ ++/* { dg-require-effective-target aarch64_asm_bf16_ok } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cvt_bf16_f32_m_tied1: ++** bfcvt z0\.h, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvt_bf16_f32_m_tied1, svbfloat16_t, svfloat32_t, ++ z0 = svcvt_bf16_f32_m (z0, p0, z4), ++ z0 = svcvt_bf16_m (z0, p0, z4)) ++ ++/* ++** cvt_bf16_f32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** bfcvt z0\.h, p0/m, \1\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_bf16_f32_m_tied2, svbfloat16_t, svfloat32_t, ++ z0_res = svcvt_bf16_f32_m (z4, p0, z0), ++ z0_res = svcvt_bf16_m (z4, p0, z0)) ++ ++/* ++** cvt_bf16_f32_m_untied: ++** movprfx z0, z1 ++** bfcvt z0\.h, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvt_bf16_f32_m_untied, svbfloat16_t, svfloat32_t, ++ z0 = svcvt_bf16_f32_m (z1, p0, z4), ++ z0 = svcvt_bf16_m (z1, p0, z4)) ++ ++/* ++** cvt_bf16_f32_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, \1\.s ++** bfcvt z0\.h, p0/m, \1\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_bf16_f32_z_tied1, svbfloat16_t, svfloat32_t, ++ z0_res = svcvt_bf16_f32_z (p0, z0), ++ z0_res = svcvt_bf16_z (p0, z0)) ++ ++/* ++** cvt_bf16_f32_z_untied: ++** movprfx z0\.s, p0/z, z4\.s ++** bfcvt z0\.h, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvt_bf16_f32_z_untied, svbfloat16_t, svfloat32_t, ++ z0 = svcvt_bf16_f32_z (p0, z4), ++ z0 = svcvt_bf16_z (p0, z4)) ++ ++/* ++** cvt_bf16_f32_x_tied1: ++** bfcvt z0\.h, p0/m, z0\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_bf16_f32_x_tied1, svbfloat16_t, svfloat32_t, ++ z0_res = svcvt_bf16_f32_x (p0, z0), ++ z0_res = svcvt_bf16_x (p0, z0)) ++ ++/* ++** cvt_bf16_f32_x_untied: ++** bfcvt z0\.h, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvt_bf16_f32_x_untied, svbfloat16_t, svfloat32_t, ++ z0 = svcvt_bf16_f32_x (p0, z4), ++ z0 = svcvt_bf16_x (p0, z4)) ++ ++/* ++** ptrue_cvt_bf16_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z_REV (ptrue_cvt_bf16_f32_x_tied1, svbfloat16_t, svfloat32_t, ++ z0_res = svcvt_bf16_f32_x (svptrue_b32 (), z0), ++ z0_res = svcvt_bf16_x (svptrue_b32 (), z0)) ++ ++/* ++** ptrue_cvt_bf16_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z (ptrue_cvt_bf16_f32_x_untied, svbfloat16_t, svfloat32_t, ++ z0 = svcvt_bf16_f32_x (svptrue_b32 (), z4), ++ z0 = svcvt_bf16_x (svptrue_b32 (), z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_f16.c +new file mode 100644 +index 000000000..5dcd48046 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_f16.c +@@ -0,0 +1,731 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cvt_f16_f32_m_tied1: ++** fcvt z0\.h, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvt_f16_f32_m_tied1, svfloat16_t, svfloat32_t, ++ z0 = svcvt_f16_f32_m (z0, p0, z4), ++ z0 = svcvt_f16_m (z0, p0, z4)) ++ ++/* ++** cvt_f16_f32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** fcvt z0\.h, p0/m, \1\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f16_f32_m_tied2, svfloat16_t, svfloat32_t, ++ z0_res = svcvt_f16_f32_m (z4, p0, z0), ++ z0_res = svcvt_f16_m (z4, p0, z0)) ++ ++/* ++** cvt_f16_f32_m_untied: ++** movprfx z0, z1 ++** fcvt z0\.h, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvt_f16_f32_m_untied, svfloat16_t, svfloat32_t, ++ z0 = svcvt_f16_f32_m (z1, p0, z4), ++ z0 = svcvt_f16_m (z1, p0, z4)) ++ ++/* ++** cvt_f16_f64_m_tied1: ++** fcvt z0\.h, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cvt_f16_f64_m_tied1, svfloat16_t, svfloat64_t, ++ z0 = svcvt_f16_f64_m (z0, p0, z4), ++ z0 = svcvt_f16_m (z0, p0, z4)) ++ ++/* ++** cvt_f16_f64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z4 ++** fcvt z0\.h, p0/m, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f16_f64_m_tied2, svfloat16_t, svfloat64_t, ++ z0_res = svcvt_f16_f64_m (z4, p0, z0), ++ z0_res = svcvt_f16_m (z4, p0, z0)) ++ ++/* ++** cvt_f16_f64_m_untied: ++** movprfx z0, z1 ++** fcvt z0\.h, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cvt_f16_f64_m_untied, svfloat16_t, svfloat64_t, ++ z0 = svcvt_f16_f64_m (z1, p0, z4), ++ z0 = svcvt_f16_m (z1, p0, z4)) ++ ++/* ++** cvt_f16_s16_m_tied1: ++** scvtf z0\.h, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cvt_f16_s16_m_tied1, svfloat16_t, svint16_t, ++ z0 = svcvt_f16_s16_m (z0, p0, z4), ++ z0 = svcvt_f16_m (z0, p0, z4)) ++ ++/* ++** cvt_f16_s16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** scvtf z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f16_s16_m_tied2, svfloat16_t, svint16_t, ++ z0_res = svcvt_f16_s16_m (z4, p0, z0), ++ z0_res = svcvt_f16_m (z4, p0, z0)) ++ ++/* ++** cvt_f16_s16_m_untied: ++** movprfx z0, z1 ++** scvtf z0\.h, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cvt_f16_s16_m_untied, svfloat16_t, svint16_t, ++ z0 = svcvt_f16_s16_m (z1, p0, z4), ++ z0 = svcvt_f16_m (z1, p0, z4)) ++ ++/* ++** cvt_f16_s32_m_tied1: ++** scvtf z0\.h, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvt_f16_s32_m_tied1, svfloat16_t, svint32_t, ++ z0 = svcvt_f16_s32_m (z0, p0, z4), ++ z0 = svcvt_f16_m (z0, p0, z4)) ++ ++/* ++** cvt_f16_s32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** scvtf z0\.h, p0/m, \1\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f16_s32_m_tied2, svfloat16_t, svint32_t, ++ z0_res = svcvt_f16_s32_m (z4, p0, z0), ++ z0_res = svcvt_f16_m (z4, p0, z0)) ++ ++/* ++** cvt_f16_s32_m_untied: ++** movprfx z0, z1 ++** scvtf z0\.h, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvt_f16_s32_m_untied, svfloat16_t, svint32_t, ++ z0 = svcvt_f16_s32_m (z1, p0, z4), ++ z0 = svcvt_f16_m (z1, p0, z4)) ++ ++/* ++** cvt_f16_s64_m_tied1: ++** scvtf z0\.h, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cvt_f16_s64_m_tied1, svfloat16_t, svint64_t, ++ z0 = svcvt_f16_s64_m (z0, p0, z4), ++ z0 = svcvt_f16_m (z0, p0, z4)) ++ ++/* ++** cvt_f16_s64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z4 ++** scvtf z0\.h, p0/m, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f16_s64_m_tied2, svfloat16_t, svint64_t, ++ z0_res = svcvt_f16_s64_m (z4, p0, z0), ++ z0_res = svcvt_f16_m (z4, p0, z0)) ++ ++/* ++** cvt_f16_s64_m_untied: ++** movprfx z0, z1 ++** scvtf z0\.h, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cvt_f16_s64_m_untied, svfloat16_t, svint64_t, ++ z0 = svcvt_f16_s64_m (z1, p0, z4), ++ z0 = svcvt_f16_m (z1, p0, z4)) ++ ++/* ++** cvt_f16_u16_m_tied1: ++** ucvtf z0\.h, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cvt_f16_u16_m_tied1, svfloat16_t, svuint16_t, ++ z0 = svcvt_f16_u16_m (z0, p0, z4), ++ z0 = svcvt_f16_m (z0, p0, z4)) ++ ++/* ++** cvt_f16_u16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** ucvtf z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f16_u16_m_tied2, svfloat16_t, svuint16_t, ++ z0_res = svcvt_f16_u16_m (z4, p0, z0), ++ z0_res = svcvt_f16_m (z4, p0, z0)) ++ ++/* ++** cvt_f16_u16_m_untied: ++** movprfx z0, z1 ++** ucvtf z0\.h, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cvt_f16_u16_m_untied, svfloat16_t, svuint16_t, ++ z0 = svcvt_f16_u16_m (z1, p0, z4), ++ z0 = svcvt_f16_m (z1, p0, z4)) ++ ++/* ++** cvt_f16_u32_m_tied1: ++** ucvtf z0\.h, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvt_f16_u32_m_tied1, svfloat16_t, svuint32_t, ++ z0 = svcvt_f16_u32_m (z0, p0, z4), ++ z0 = svcvt_f16_m (z0, p0, z4)) ++ ++/* ++** cvt_f16_u32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** ucvtf z0\.h, p0/m, \1\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f16_u32_m_tied2, svfloat16_t, svuint32_t, ++ z0_res = svcvt_f16_u32_m (z4, p0, z0), ++ z0_res = svcvt_f16_m (z4, p0, z0)) ++ ++/* ++** cvt_f16_u32_m_untied: ++** movprfx z0, z1 ++** ucvtf z0\.h, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvt_f16_u32_m_untied, svfloat16_t, svuint32_t, ++ z0 = svcvt_f16_u32_m (z1, p0, z4), ++ z0 = svcvt_f16_m (z1, p0, z4)) ++ ++/* ++** cvt_f16_u64_m_tied1: ++** ucvtf z0\.h, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cvt_f16_u64_m_tied1, svfloat16_t, svuint64_t, ++ z0 = svcvt_f16_u64_m (z0, p0, z4), ++ z0 = svcvt_f16_m (z0, p0, z4)) ++ ++/* ++** cvt_f16_u64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z4 ++** ucvtf z0\.h, p0/m, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f16_u64_m_tied2, svfloat16_t, svuint64_t, ++ z0_res = svcvt_f16_u64_m (z4, p0, z0), ++ z0_res = svcvt_f16_m (z4, p0, z0)) ++ ++/* ++** cvt_f16_u64_m_untied: ++** movprfx z0, z1 ++** ucvtf z0\.h, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cvt_f16_u64_m_untied, svfloat16_t, svuint64_t, ++ z0 = svcvt_f16_u64_m (z1, p0, z4), ++ z0 = svcvt_f16_m (z1, p0, z4)) ++ ++/* ++** cvt_f16_f32_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, \1\.s ++** fcvt z0\.h, p0/m, \1\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f16_f32_z_tied1, svfloat16_t, svfloat32_t, ++ z0_res = svcvt_f16_f32_z (p0, z0), ++ z0_res = svcvt_f16_z (p0, z0)) ++ ++/* ++** cvt_f16_f32_z_untied: ++** movprfx z0\.s, p0/z, z4\.s ++** fcvt z0\.h, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvt_f16_f32_z_untied, svfloat16_t, svfloat32_t, ++ z0 = svcvt_f16_f32_z (p0, z4), ++ z0 = svcvt_f16_z (p0, z4)) ++ ++/* ++** cvt_f16_f64_z_tied1: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, \1 ++** fcvt z0\.h, p0/m, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f16_f64_z_tied1, svfloat16_t, svfloat64_t, ++ z0_res = svcvt_f16_f64_z (p0, z0), ++ z0_res = svcvt_f16_z (p0, z0)) ++ ++/* ++** cvt_f16_f64_z_untied: ++** movprfx z0\.d, p0/z, z4\.d ++** fcvt z0\.h, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cvt_f16_f64_z_untied, svfloat16_t, svfloat64_t, ++ z0 = svcvt_f16_f64_z (p0, z4), ++ z0 = svcvt_f16_z (p0, z4)) ++ ++/* ++** cvt_f16_s16_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.h, p0/z, \1\.h ++** scvtf z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f16_s16_z_tied1, svfloat16_t, svint16_t, ++ z0_res = svcvt_f16_s16_z (p0, z0), ++ z0_res = svcvt_f16_z (p0, z0)) ++ ++/* ++** cvt_f16_s16_z_untied: ++** movprfx z0\.h, p0/z, z4\.h ++** scvtf z0\.h, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cvt_f16_s16_z_untied, svfloat16_t, svint16_t, ++ z0 = svcvt_f16_s16_z (p0, z4), ++ z0 = svcvt_f16_z (p0, z4)) ++ ++/* ++** cvt_f16_s32_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, \1\.s ++** scvtf z0\.h, p0/m, \1\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f16_s32_z_tied1, svfloat16_t, svint32_t, ++ z0_res = svcvt_f16_s32_z (p0, z0), ++ z0_res = svcvt_f16_z (p0, z0)) ++ ++/* ++** cvt_f16_s32_z_untied: ++** movprfx z0\.s, p0/z, z4\.s ++** scvtf z0\.h, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvt_f16_s32_z_untied, svfloat16_t, svint32_t, ++ z0 = svcvt_f16_s32_z (p0, z4), ++ z0 = svcvt_f16_z (p0, z4)) ++ ++/* ++** cvt_f16_s64_z_tied1: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, \1 ++** scvtf z0\.h, p0/m, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f16_s64_z_tied1, svfloat16_t, svint64_t, ++ z0_res = svcvt_f16_s64_z (p0, z0), ++ z0_res = svcvt_f16_z (p0, z0)) ++ ++/* ++** cvt_f16_s64_z_untied: ++** movprfx z0\.d, p0/z, z4\.d ++** scvtf z0\.h, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cvt_f16_s64_z_untied, svfloat16_t, svint64_t, ++ z0 = svcvt_f16_s64_z (p0, z4), ++ z0 = svcvt_f16_z (p0, z4)) ++ ++/* ++** cvt_f16_u16_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.h, p0/z, \1\.h ++** ucvtf z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f16_u16_z_tied1, svfloat16_t, svuint16_t, ++ z0_res = svcvt_f16_u16_z (p0, z0), ++ z0_res = svcvt_f16_z (p0, z0)) ++ ++/* ++** cvt_f16_u16_z_untied: ++** movprfx z0\.h, p0/z, z4\.h ++** ucvtf z0\.h, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cvt_f16_u16_z_untied, svfloat16_t, svuint16_t, ++ z0 = svcvt_f16_u16_z (p0, z4), ++ z0 = svcvt_f16_z (p0, z4)) ++ ++/* ++** cvt_f16_u32_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, \1\.s ++** ucvtf z0\.h, p0/m, \1\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f16_u32_z_tied1, svfloat16_t, svuint32_t, ++ z0_res = svcvt_f16_u32_z (p0, z0), ++ z0_res = svcvt_f16_z (p0, z0)) ++ ++/* ++** cvt_f16_u32_z_untied: ++** movprfx z0\.s, p0/z, z4\.s ++** ucvtf z0\.h, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvt_f16_u32_z_untied, svfloat16_t, svuint32_t, ++ z0 = svcvt_f16_u32_z (p0, z4), ++ z0 = svcvt_f16_z (p0, z4)) ++ ++/* ++** cvt_f16_u64_z_tied1: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, \1 ++** ucvtf z0\.h, p0/m, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f16_u64_z_tied1, svfloat16_t, svuint64_t, ++ z0_res = svcvt_f16_u64_z (p0, z0), ++ z0_res = svcvt_f16_z (p0, z0)) ++ ++/* ++** cvt_f16_u64_z_untied: ++** movprfx z0\.d, p0/z, z4\.d ++** ucvtf z0\.h, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cvt_f16_u64_z_untied, svfloat16_t, svuint64_t, ++ z0 = svcvt_f16_u64_z (p0, z4), ++ z0 = svcvt_f16_z (p0, z4)) ++ ++/* ++** cvt_f16_f32_x_tied1: ++** fcvt z0\.h, p0/m, z0\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f16_f32_x_tied1, svfloat16_t, svfloat32_t, ++ z0_res = svcvt_f16_f32_x (p0, z0), ++ z0_res = svcvt_f16_x (p0, z0)) ++ ++/* ++** cvt_f16_f32_x_untied: ++** fcvt z0\.h, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvt_f16_f32_x_untied, svfloat16_t, svfloat32_t, ++ z0 = svcvt_f16_f32_x (p0, z4), ++ z0 = svcvt_f16_x (p0, z4)) ++ ++/* ++** cvt_f16_f64_x_tied1: ++** fcvt z0\.h, p0/m, z0\.d ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f16_f64_x_tied1, svfloat16_t, svfloat64_t, ++ z0_res = svcvt_f16_f64_x (p0, z0), ++ z0_res = svcvt_f16_x (p0, z0)) ++ ++/* ++** cvt_f16_f64_x_untied: ++** fcvt z0\.h, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cvt_f16_f64_x_untied, svfloat16_t, svfloat64_t, ++ z0 = svcvt_f16_f64_x (p0, z4), ++ z0 = svcvt_f16_x (p0, z4)) ++ ++/* ++** cvt_f16_s16_x_tied1: ++** scvtf z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f16_s16_x_tied1, svfloat16_t, svint16_t, ++ z0_res = svcvt_f16_s16_x (p0, z0), ++ z0_res = svcvt_f16_x (p0, z0)) ++ ++/* ++** cvt_f16_s16_x_untied: ++** scvtf z0\.h, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cvt_f16_s16_x_untied, svfloat16_t, svint16_t, ++ z0 = svcvt_f16_s16_x (p0, z4), ++ z0 = svcvt_f16_x (p0, z4)) ++ ++/* ++** cvt_f16_s32_x_tied1: ++** scvtf z0\.h, p0/m, z0\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f16_s32_x_tied1, svfloat16_t, svint32_t, ++ z0_res = svcvt_f16_s32_x (p0, z0), ++ z0_res = svcvt_f16_x (p0, z0)) ++ ++/* ++** cvt_f16_s32_x_untied: ++** scvtf z0\.h, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvt_f16_s32_x_untied, svfloat16_t, svint32_t, ++ z0 = svcvt_f16_s32_x (p0, z4), ++ z0 = svcvt_f16_x (p0, z4)) ++ ++/* ++** cvt_f16_s64_x_tied1: ++** scvtf z0\.h, p0/m, z0\.d ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f16_s64_x_tied1, svfloat16_t, svint64_t, ++ z0_res = svcvt_f16_s64_x (p0, z0), ++ z0_res = svcvt_f16_x (p0, z0)) ++ ++/* ++** cvt_f16_s64_x_untied: ++** scvtf z0\.h, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cvt_f16_s64_x_untied, svfloat16_t, svint64_t, ++ z0 = svcvt_f16_s64_x (p0, z4), ++ z0 = svcvt_f16_x (p0, z4)) ++ ++/* ++** cvt_f16_u16_x_tied1: ++** ucvtf z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f16_u16_x_tied1, svfloat16_t, svuint16_t, ++ z0_res = svcvt_f16_u16_x (p0, z0), ++ z0_res = svcvt_f16_x (p0, z0)) ++ ++/* ++** cvt_f16_u16_x_untied: ++** ucvtf z0\.h, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cvt_f16_u16_x_untied, svfloat16_t, svuint16_t, ++ z0 = svcvt_f16_u16_x (p0, z4), ++ z0 = svcvt_f16_x (p0, z4)) ++ ++/* ++** cvt_f16_u32_x_tied1: ++** ucvtf z0\.h, p0/m, z0\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f16_u32_x_tied1, svfloat16_t, svuint32_t, ++ z0_res = svcvt_f16_u32_x (p0, z0), ++ z0_res = svcvt_f16_x (p0, z0)) ++ ++/* ++** cvt_f16_u32_x_untied: ++** ucvtf z0\.h, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvt_f16_u32_x_untied, svfloat16_t, svuint32_t, ++ z0 = svcvt_f16_u32_x (p0, z4), ++ z0 = svcvt_f16_x (p0, z4)) ++ ++/* ++** cvt_f16_u64_x_tied1: ++** ucvtf z0\.h, p0/m, z0\.d ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f16_u64_x_tied1, svfloat16_t, svuint64_t, ++ z0_res = svcvt_f16_u64_x (p0, z0), ++ z0_res = svcvt_f16_x (p0, z0)) ++ ++/* ++** cvt_f16_u64_x_untied: ++** ucvtf z0\.h, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cvt_f16_u64_x_untied, svfloat16_t, svuint64_t, ++ z0 = svcvt_f16_u64_x (p0, z4), ++ z0 = svcvt_f16_x (p0, z4)) ++ ++/* ++** ptrue_cvt_f16_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z_REV (ptrue_cvt_f16_f32_x_tied1, svfloat16_t, svfloat32_t, ++ z0_res = svcvt_f16_f32_x (svptrue_b32 (), z0), ++ z0_res = svcvt_f16_x (svptrue_b32 (), z0)) ++ ++/* ++** ptrue_cvt_f16_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z (ptrue_cvt_f16_f32_x_untied, svfloat16_t, svfloat32_t, ++ z0 = svcvt_f16_f32_x (svptrue_b32 (), z4), ++ z0 = svcvt_f16_x (svptrue_b32 (), z4)) ++ ++/* ++** ptrue_cvt_f16_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z_REV (ptrue_cvt_f16_f64_x_tied1, svfloat16_t, svfloat64_t, ++ z0_res = svcvt_f16_f64_x (svptrue_b64 (), z0), ++ z0_res = svcvt_f16_x (svptrue_b64 (), z0)) ++ ++/* ++** ptrue_cvt_f16_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z (ptrue_cvt_f16_f64_x_untied, svfloat16_t, svfloat64_t, ++ z0 = svcvt_f16_f64_x (svptrue_b64 (), z4), ++ z0 = svcvt_f16_x (svptrue_b64 (), z4)) ++ ++/* ++** ptrue_cvt_f16_s16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z_REV (ptrue_cvt_f16_s16_x_tied1, svfloat16_t, svint16_t, ++ z0_res = svcvt_f16_s16_x (svptrue_b16 (), z0), ++ z0_res = svcvt_f16_x (svptrue_b16 (), z0)) ++ ++/* ++** ptrue_cvt_f16_s16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z (ptrue_cvt_f16_s16_x_untied, svfloat16_t, svint16_t, ++ z0 = svcvt_f16_s16_x (svptrue_b16 (), z4), ++ z0 = svcvt_f16_x (svptrue_b16 (), z4)) ++ ++/* ++** ptrue_cvt_f16_s32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z_REV (ptrue_cvt_f16_s32_x_tied1, svfloat16_t, svint32_t, ++ z0_res = svcvt_f16_s32_x (svptrue_b32 (), z0), ++ z0_res = svcvt_f16_x (svptrue_b32 (), z0)) ++ ++/* ++** ptrue_cvt_f16_s32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z (ptrue_cvt_f16_s32_x_untied, svfloat16_t, svint32_t, ++ z0 = svcvt_f16_s32_x (svptrue_b32 (), z4), ++ z0 = svcvt_f16_x (svptrue_b32 (), z4)) ++ ++/* ++** ptrue_cvt_f16_s64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z_REV (ptrue_cvt_f16_s64_x_tied1, svfloat16_t, svint64_t, ++ z0_res = svcvt_f16_s64_x (svptrue_b64 (), z0), ++ z0_res = svcvt_f16_x (svptrue_b64 (), z0)) ++ ++/* ++** ptrue_cvt_f16_s64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z (ptrue_cvt_f16_s64_x_untied, svfloat16_t, svint64_t, ++ z0 = svcvt_f16_s64_x (svptrue_b64 (), z4), ++ z0 = svcvt_f16_x (svptrue_b64 (), z4)) ++ ++/* ++** ptrue_cvt_f16_u16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z_REV (ptrue_cvt_f16_u16_x_tied1, svfloat16_t, svuint16_t, ++ z0_res = svcvt_f16_u16_x (svptrue_b16 (), z0), ++ z0_res = svcvt_f16_x (svptrue_b16 (), z0)) ++ ++/* ++** ptrue_cvt_f16_u16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z (ptrue_cvt_f16_u16_x_untied, svfloat16_t, svuint16_t, ++ z0 = svcvt_f16_u16_x (svptrue_b16 (), z4), ++ z0 = svcvt_f16_x (svptrue_b16 (), z4)) ++ ++/* ++** ptrue_cvt_f16_u32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z_REV (ptrue_cvt_f16_u32_x_tied1, svfloat16_t, svuint32_t, ++ z0_res = svcvt_f16_u32_x (svptrue_b32 (), z0), ++ z0_res = svcvt_f16_x (svptrue_b32 (), z0)) ++ ++/* ++** ptrue_cvt_f16_u32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z (ptrue_cvt_f16_u32_x_untied, svfloat16_t, svuint32_t, ++ z0 = svcvt_f16_u32_x (svptrue_b32 (), z4), ++ z0 = svcvt_f16_x (svptrue_b32 (), z4)) ++ ++/* ++** ptrue_cvt_f16_u64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z_REV (ptrue_cvt_f16_u64_x_tied1, svfloat16_t, svuint64_t, ++ z0_res = svcvt_f16_u64_x (svptrue_b64 (), z0), ++ z0_res = svcvt_f16_x (svptrue_b64 (), z0)) ++ ++/* ++** ptrue_cvt_f16_u64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z (ptrue_cvt_f16_u64_x_untied, svfloat16_t, svuint64_t, ++ z0 = svcvt_f16_u64_x (svptrue_b64 (), z4), ++ z0 = svcvt_f16_x (svptrue_b64 (), z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_f32.c +new file mode 100644 +index 000000000..c16469939 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_f32.c +@@ -0,0 +1,549 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cvt_f32_f16_m_tied1: ++** fcvt z0\.s, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cvt_f32_f16_m_tied1, svfloat32_t, svfloat16_t, ++ z0 = svcvt_f32_f16_m (z0, p0, z4), ++ z0 = svcvt_f32_m (z0, p0, z4)) ++ ++/* ++** cvt_f32_f16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** fcvt z0\.s, p0/m, \1\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f32_f16_m_tied2, svfloat32_t, svfloat16_t, ++ z0_res = svcvt_f32_f16_m (z4, p0, z0), ++ z0_res = svcvt_f32_m (z4, p0, z0)) ++ ++/* ++** cvt_f32_f16_m_untied: ++** movprfx z0, z1 ++** fcvt z0\.s, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cvt_f32_f16_m_untied, svfloat32_t, svfloat16_t, ++ z0 = svcvt_f32_f16_m (z1, p0, z4), ++ z0 = svcvt_f32_m (z1, p0, z4)) ++ ++/* ++** cvt_f32_f64_m_tied1: ++** fcvt z0\.s, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cvt_f32_f64_m_tied1, svfloat32_t, svfloat64_t, ++ z0 = svcvt_f32_f64_m (z0, p0, z4), ++ z0 = svcvt_f32_m (z0, p0, z4)) ++ ++/* ++** cvt_f32_f64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z4 ++** fcvt z0\.s, p0/m, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f32_f64_m_tied2, svfloat32_t, svfloat64_t, ++ z0_res = svcvt_f32_f64_m (z4, p0, z0), ++ z0_res = svcvt_f32_m (z4, p0, z0)) ++ ++/* ++** cvt_f32_f64_m_untied: ++** movprfx z0, z1 ++** fcvt z0\.s, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cvt_f32_f64_m_untied, svfloat32_t, svfloat64_t, ++ z0 = svcvt_f32_f64_m (z1, p0, z4), ++ z0 = svcvt_f32_m (z1, p0, z4)) ++ ++/* ++** cvt_f32_s32_m_tied1: ++** scvtf z0\.s, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvt_f32_s32_m_tied1, svfloat32_t, svint32_t, ++ z0 = svcvt_f32_s32_m (z0, p0, z4), ++ z0 = svcvt_f32_m (z0, p0, z4)) ++ ++/* ++** cvt_f32_s32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** scvtf z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f32_s32_m_tied2, svfloat32_t, svint32_t, ++ z0_res = svcvt_f32_s32_m (z4, p0, z0), ++ z0_res = svcvt_f32_m (z4, p0, z0)) ++ ++/* ++** cvt_f32_s32_m_untied: ++** movprfx z0, z1 ++** scvtf z0\.s, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvt_f32_s32_m_untied, svfloat32_t, svint32_t, ++ z0 = svcvt_f32_s32_m (z1, p0, z4), ++ z0 = svcvt_f32_m (z1, p0, z4)) ++ ++/* ++** cvt_f32_s64_m_tied1: ++** scvtf z0\.s, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cvt_f32_s64_m_tied1, svfloat32_t, svint64_t, ++ z0 = svcvt_f32_s64_m (z0, p0, z4), ++ z0 = svcvt_f32_m (z0, p0, z4)) ++ ++/* ++** cvt_f32_s64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z4 ++** scvtf z0\.s, p0/m, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f32_s64_m_tied2, svfloat32_t, svint64_t, ++ z0_res = svcvt_f32_s64_m (z4, p0, z0), ++ z0_res = svcvt_f32_m (z4, p0, z0)) ++ ++/* ++** cvt_f32_s64_m_untied: ++** movprfx z0, z1 ++** scvtf z0\.s, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cvt_f32_s64_m_untied, svfloat32_t, svint64_t, ++ z0 = svcvt_f32_s64_m (z1, p0, z4), ++ z0 = svcvt_f32_m (z1, p0, z4)) ++ ++/* ++** cvt_f32_u32_m_tied1: ++** ucvtf z0\.s, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvt_f32_u32_m_tied1, svfloat32_t, svuint32_t, ++ z0 = svcvt_f32_u32_m (z0, p0, z4), ++ z0 = svcvt_f32_m (z0, p0, z4)) ++ ++/* ++** cvt_f32_u32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** ucvtf z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f32_u32_m_tied2, svfloat32_t, svuint32_t, ++ z0_res = svcvt_f32_u32_m (z4, p0, z0), ++ z0_res = svcvt_f32_m (z4, p0, z0)) ++ ++/* ++** cvt_f32_u32_m_untied: ++** movprfx z0, z1 ++** ucvtf z0\.s, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvt_f32_u32_m_untied, svfloat32_t, svuint32_t, ++ z0 = svcvt_f32_u32_m (z1, p0, z4), ++ z0 = svcvt_f32_m (z1, p0, z4)) ++ ++/* ++** cvt_f32_u64_m_tied1: ++** ucvtf z0\.s, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cvt_f32_u64_m_tied1, svfloat32_t, svuint64_t, ++ z0 = svcvt_f32_u64_m (z0, p0, z4), ++ z0 = svcvt_f32_m (z0, p0, z4)) ++ ++/* ++** cvt_f32_u64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z4 ++** ucvtf z0\.s, p0/m, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f32_u64_m_tied2, svfloat32_t, svuint64_t, ++ z0_res = svcvt_f32_u64_m (z4, p0, z0), ++ z0_res = svcvt_f32_m (z4, p0, z0)) ++ ++/* ++** cvt_f32_u64_m_untied: ++** movprfx z0, z1 ++** ucvtf z0\.s, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cvt_f32_u64_m_untied, svfloat32_t, svuint64_t, ++ z0 = svcvt_f32_u64_m (z1, p0, z4), ++ z0 = svcvt_f32_m (z1, p0, z4)) ++ ++/* ++** cvt_f32_f16_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, \1\.s ++** fcvt z0\.s, p0/m, \1\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f32_f16_z_tied1, svfloat32_t, svfloat16_t, ++ z0_res = svcvt_f32_f16_z (p0, z0), ++ z0_res = svcvt_f32_z (p0, z0)) ++ ++/* ++** cvt_f32_f16_z_untied: ++** movprfx z0\.s, p0/z, z4\.s ++** fcvt z0\.s, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cvt_f32_f16_z_untied, svfloat32_t, svfloat16_t, ++ z0 = svcvt_f32_f16_z (p0, z4), ++ z0 = svcvt_f32_z (p0, z4)) ++ ++/* ++** cvt_f32_f64_z_tied1: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, \1 ++** fcvt z0\.s, p0/m, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f32_f64_z_tied1, svfloat32_t, svfloat64_t, ++ z0_res = svcvt_f32_f64_z (p0, z0), ++ z0_res = svcvt_f32_z (p0, z0)) ++ ++/* ++** cvt_f32_f64_z_untied: ++** movprfx z0\.d, p0/z, z4\.d ++** fcvt z0\.s, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cvt_f32_f64_z_untied, svfloat32_t, svfloat64_t, ++ z0 = svcvt_f32_f64_z (p0, z4), ++ z0 = svcvt_f32_z (p0, z4)) ++ ++/* ++** cvt_f32_s32_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, \1\.s ++** scvtf z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f32_s32_z_tied1, svfloat32_t, svint32_t, ++ z0_res = svcvt_f32_s32_z (p0, z0), ++ z0_res = svcvt_f32_z (p0, z0)) ++ ++/* ++** cvt_f32_s32_z_untied: ++** movprfx z0\.s, p0/z, z4\.s ++** scvtf z0\.s, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvt_f32_s32_z_untied, svfloat32_t, svint32_t, ++ z0 = svcvt_f32_s32_z (p0, z4), ++ z0 = svcvt_f32_z (p0, z4)) ++ ++/* ++** cvt_f32_s64_z_tied1: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, \1 ++** scvtf z0\.s, p0/m, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f32_s64_z_tied1, svfloat32_t, svint64_t, ++ z0_res = svcvt_f32_s64_z (p0, z0), ++ z0_res = svcvt_f32_z (p0, z0)) ++ ++/* ++** cvt_f32_s64_z_untied: ++** movprfx z0\.d, p0/z, z4\.d ++** scvtf z0\.s, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cvt_f32_s64_z_untied, svfloat32_t, svint64_t, ++ z0 = svcvt_f32_s64_z (p0, z4), ++ z0 = svcvt_f32_z (p0, z4)) ++ ++/* ++** cvt_f32_u32_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, \1\.s ++** ucvtf z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f32_u32_z_tied1, svfloat32_t, svuint32_t, ++ z0_res = svcvt_f32_u32_z (p0, z0), ++ z0_res = svcvt_f32_z (p0, z0)) ++ ++/* ++** cvt_f32_u32_z_untied: ++** movprfx z0\.s, p0/z, z4\.s ++** ucvtf z0\.s, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvt_f32_u32_z_untied, svfloat32_t, svuint32_t, ++ z0 = svcvt_f32_u32_z (p0, z4), ++ z0 = svcvt_f32_z (p0, z4)) ++ ++/* ++** cvt_f32_u64_z_tied1: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, \1 ++** ucvtf z0\.s, p0/m, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f32_u64_z_tied1, svfloat32_t, svuint64_t, ++ z0_res = svcvt_f32_u64_z (p0, z0), ++ z0_res = svcvt_f32_z (p0, z0)) ++ ++/* ++** cvt_f32_u64_z_untied: ++** movprfx z0\.d, p0/z, z4\.d ++** ucvtf z0\.s, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cvt_f32_u64_z_untied, svfloat32_t, svuint64_t, ++ z0 = svcvt_f32_u64_z (p0, z4), ++ z0 = svcvt_f32_z (p0, z4)) ++ ++/* ++** cvt_f32_f16_x_tied1: ++** fcvt z0\.s, p0/m, z0\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f32_f16_x_tied1, svfloat32_t, svfloat16_t, ++ z0_res = svcvt_f32_f16_x (p0, z0), ++ z0_res = svcvt_f32_x (p0, z0)) ++ ++/* ++** cvt_f32_f16_x_untied: ++** fcvt z0\.s, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cvt_f32_f16_x_untied, svfloat32_t, svfloat16_t, ++ z0 = svcvt_f32_f16_x (p0, z4), ++ z0 = svcvt_f32_x (p0, z4)) ++ ++/* ++** cvt_f32_f64_x_tied1: ++** fcvt z0\.s, p0/m, z0\.d ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f32_f64_x_tied1, svfloat32_t, svfloat64_t, ++ z0_res = svcvt_f32_f64_x (p0, z0), ++ z0_res = svcvt_f32_x (p0, z0)) ++ ++/* ++** cvt_f32_f64_x_untied: ++** fcvt z0\.s, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cvt_f32_f64_x_untied, svfloat32_t, svfloat64_t, ++ z0 = svcvt_f32_f64_x (p0, z4), ++ z0 = svcvt_f32_x (p0, z4)) ++ ++/* ++** cvt_f32_s32_x_tied1: ++** scvtf z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f32_s32_x_tied1, svfloat32_t, svint32_t, ++ z0_res = svcvt_f32_s32_x (p0, z0), ++ z0_res = svcvt_f32_x (p0, z0)) ++ ++/* ++** cvt_f32_s32_x_untied: ++** scvtf z0\.s, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvt_f32_s32_x_untied, svfloat32_t, svint32_t, ++ z0 = svcvt_f32_s32_x (p0, z4), ++ z0 = svcvt_f32_x (p0, z4)) ++ ++/* ++** cvt_f32_s64_x_tied1: ++** scvtf z0\.s, p0/m, z0\.d ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f32_s64_x_tied1, svfloat32_t, svint64_t, ++ z0_res = svcvt_f32_s64_x (p0, z0), ++ z0_res = svcvt_f32_x (p0, z0)) ++ ++/* ++** cvt_f32_s64_x_untied: ++** scvtf z0\.s, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cvt_f32_s64_x_untied, svfloat32_t, svint64_t, ++ z0 = svcvt_f32_s64_x (p0, z4), ++ z0 = svcvt_f32_x (p0, z4)) ++ ++/* ++** cvt_f32_u32_x_tied1: ++** ucvtf z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f32_u32_x_tied1, svfloat32_t, svuint32_t, ++ z0_res = svcvt_f32_u32_x (p0, z0), ++ z0_res = svcvt_f32_x (p0, z0)) ++ ++/* ++** cvt_f32_u32_x_untied: ++** ucvtf z0\.s, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvt_f32_u32_x_untied, svfloat32_t, svuint32_t, ++ z0 = svcvt_f32_u32_x (p0, z4), ++ z0 = svcvt_f32_x (p0, z4)) ++ ++/* ++** cvt_f32_u64_x_tied1: ++** ucvtf z0\.s, p0/m, z0\.d ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f32_u64_x_tied1, svfloat32_t, svuint64_t, ++ z0_res = svcvt_f32_u64_x (p0, z0), ++ z0_res = svcvt_f32_x (p0, z0)) ++ ++/* ++** cvt_f32_u64_x_untied: ++** ucvtf z0\.s, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cvt_f32_u64_x_untied, svfloat32_t, svuint64_t, ++ z0 = svcvt_f32_u64_x (p0, z4), ++ z0 = svcvt_f32_x (p0, z4)) ++ ++/* ++** ptrue_cvt_f32_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z_REV (ptrue_cvt_f32_f16_x_tied1, svfloat32_t, svfloat16_t, ++ z0_res = svcvt_f32_f16_x (svptrue_b32 (), z0), ++ z0_res = svcvt_f32_x (svptrue_b32 (), z0)) ++ ++/* ++** ptrue_cvt_f32_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z (ptrue_cvt_f32_f16_x_untied, svfloat32_t, svfloat16_t, ++ z0 = svcvt_f32_f16_x (svptrue_b32 (), z4), ++ z0 = svcvt_f32_x (svptrue_b32 (), z4)) ++ ++/* ++** ptrue_cvt_f32_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z_REV (ptrue_cvt_f32_f64_x_tied1, svfloat32_t, svfloat64_t, ++ z0_res = svcvt_f32_f64_x (svptrue_b64 (), z0), ++ z0_res = svcvt_f32_x (svptrue_b64 (), z0)) ++ ++/* ++** ptrue_cvt_f32_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z (ptrue_cvt_f32_f64_x_untied, svfloat32_t, svfloat64_t, ++ z0 = svcvt_f32_f64_x (svptrue_b64 (), z4), ++ z0 = svcvt_f32_x (svptrue_b64 (), z4)) ++ ++/* ++** ptrue_cvt_f32_s32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z_REV (ptrue_cvt_f32_s32_x_tied1, svfloat32_t, svint32_t, ++ z0_res = svcvt_f32_s32_x (svptrue_b32 (), z0), ++ z0_res = svcvt_f32_x (svptrue_b32 (), z0)) ++ ++/* ++** ptrue_cvt_f32_s32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z (ptrue_cvt_f32_s32_x_untied, svfloat32_t, svint32_t, ++ z0 = svcvt_f32_s32_x (svptrue_b32 (), z4), ++ z0 = svcvt_f32_x (svptrue_b32 (), z4)) ++ ++/* ++** ptrue_cvt_f32_s64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z_REV (ptrue_cvt_f32_s64_x_tied1, svfloat32_t, svint64_t, ++ z0_res = svcvt_f32_s64_x (svptrue_b64 (), z0), ++ z0_res = svcvt_f32_x (svptrue_b64 (), z0)) ++ ++/* ++** ptrue_cvt_f32_s64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z (ptrue_cvt_f32_s64_x_untied, svfloat32_t, svint64_t, ++ z0 = svcvt_f32_s64_x (svptrue_b64 (), z4), ++ z0 = svcvt_f32_x (svptrue_b64 (), z4)) ++ ++/* ++** ptrue_cvt_f32_u32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z_REV (ptrue_cvt_f32_u32_x_tied1, svfloat32_t, svuint32_t, ++ z0_res = svcvt_f32_u32_x (svptrue_b32 (), z0), ++ z0_res = svcvt_f32_x (svptrue_b32 (), z0)) ++ ++/* ++** ptrue_cvt_f32_u32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z (ptrue_cvt_f32_u32_x_untied, svfloat32_t, svuint32_t, ++ z0 = svcvt_f32_u32_x (svptrue_b32 (), z4), ++ z0 = svcvt_f32_x (svptrue_b32 (), z4)) ++ ++/* ++** ptrue_cvt_f32_u64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z_REV (ptrue_cvt_f32_u64_x_tied1, svfloat32_t, svuint64_t, ++ z0_res = svcvt_f32_u64_x (svptrue_b64 (), z0), ++ z0_res = svcvt_f32_x (svptrue_b64 (), z0)) ++ ++/* ++** ptrue_cvt_f32_u64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z (ptrue_cvt_f32_u64_x_untied, svfloat32_t, svuint64_t, ++ z0 = svcvt_f32_u64_x (svptrue_b64 (), z4), ++ z0 = svcvt_f32_x (svptrue_b64 (), z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_f64.c +new file mode 100644 +index 000000000..1d08e6ec5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_f64.c +@@ -0,0 +1,549 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cvt_f64_f16_m_tied1: ++** fcvt z0\.d, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cvt_f64_f16_m_tied1, svfloat64_t, svfloat16_t, ++ z0 = svcvt_f64_f16_m (z0, p0, z4), ++ z0 = svcvt_f64_m (z0, p0, z4)) ++ ++/* ++** cvt_f64_f16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** fcvt z0\.d, p0/m, \1\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f64_f16_m_tied2, svfloat64_t, svfloat16_t, ++ z0_res = svcvt_f64_f16_m (z4, p0, z0), ++ z0_res = svcvt_f64_m (z4, p0, z0)) ++ ++/* ++** cvt_f64_f16_m_untied: ++** movprfx z0, z1 ++** fcvt z0\.d, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cvt_f64_f16_m_untied, svfloat64_t, svfloat16_t, ++ z0 = svcvt_f64_f16_m (z1, p0, z4), ++ z0 = svcvt_f64_m (z1, p0, z4)) ++ ++/* ++** cvt_f64_f32_m_tied1: ++** fcvt z0\.d, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvt_f64_f32_m_tied1, svfloat64_t, svfloat32_t, ++ z0 = svcvt_f64_f32_m (z0, p0, z4), ++ z0 = svcvt_f64_m (z0, p0, z4)) ++ ++/* ++** cvt_f64_f32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** fcvt z0\.d, p0/m, \1\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f64_f32_m_tied2, svfloat64_t, svfloat32_t, ++ z0_res = svcvt_f64_f32_m (z4, p0, z0), ++ z0_res = svcvt_f64_m (z4, p0, z0)) ++ ++/* ++** cvt_f64_f32_m_untied: ++** movprfx z0, z1 ++** fcvt z0\.d, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvt_f64_f32_m_untied, svfloat64_t, svfloat32_t, ++ z0 = svcvt_f64_f32_m (z1, p0, z4), ++ z0 = svcvt_f64_m (z1, p0, z4)) ++ ++/* ++** cvt_f64_s32_m_tied1: ++** scvtf z0\.d, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvt_f64_s32_m_tied1, svfloat64_t, svint32_t, ++ z0 = svcvt_f64_s32_m (z0, p0, z4), ++ z0 = svcvt_f64_m (z0, p0, z4)) ++ ++/* ++** cvt_f64_s32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** scvtf z0\.d, p0/m, \1\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f64_s32_m_tied2, svfloat64_t, svint32_t, ++ z0_res = svcvt_f64_s32_m (z4, p0, z0), ++ z0_res = svcvt_f64_m (z4, p0, z0)) ++ ++/* ++** cvt_f64_s32_m_untied: ++** movprfx z0, z1 ++** scvtf z0\.d, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvt_f64_s32_m_untied, svfloat64_t, svint32_t, ++ z0 = svcvt_f64_s32_m (z1, p0, z4), ++ z0 = svcvt_f64_m (z1, p0, z4)) ++ ++/* ++** cvt_f64_s64_m_tied1: ++** scvtf z0\.d, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cvt_f64_s64_m_tied1, svfloat64_t, svint64_t, ++ z0 = svcvt_f64_s64_m (z0, p0, z4), ++ z0 = svcvt_f64_m (z0, p0, z4)) ++ ++/* ++** cvt_f64_s64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z4 ++** scvtf z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f64_s64_m_tied2, svfloat64_t, svint64_t, ++ z0_res = svcvt_f64_s64_m (z4, p0, z0), ++ z0_res = svcvt_f64_m (z4, p0, z0)) ++ ++/* ++** cvt_f64_s64_m_untied: ++** movprfx z0, z1 ++** scvtf z0\.d, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cvt_f64_s64_m_untied, svfloat64_t, svint64_t, ++ z0 = svcvt_f64_s64_m (z1, p0, z4), ++ z0 = svcvt_f64_m (z1, p0, z4)) ++ ++/* ++** cvt_f64_u32_m_tied1: ++** ucvtf z0\.d, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvt_f64_u32_m_tied1, svfloat64_t, svuint32_t, ++ z0 = svcvt_f64_u32_m (z0, p0, z4), ++ z0 = svcvt_f64_m (z0, p0, z4)) ++ ++/* ++** cvt_f64_u32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** ucvtf z0\.d, p0/m, \1\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f64_u32_m_tied2, svfloat64_t, svuint32_t, ++ z0_res = svcvt_f64_u32_m (z4, p0, z0), ++ z0_res = svcvt_f64_m (z4, p0, z0)) ++ ++/* ++** cvt_f64_u32_m_untied: ++** movprfx z0, z1 ++** ucvtf z0\.d, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvt_f64_u32_m_untied, svfloat64_t, svuint32_t, ++ z0 = svcvt_f64_u32_m (z1, p0, z4), ++ z0 = svcvt_f64_m (z1, p0, z4)) ++ ++/* ++** cvt_f64_u64_m_tied1: ++** ucvtf z0\.d, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cvt_f64_u64_m_tied1, svfloat64_t, svuint64_t, ++ z0 = svcvt_f64_u64_m (z0, p0, z4), ++ z0 = svcvt_f64_m (z0, p0, z4)) ++ ++/* ++** cvt_f64_u64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z4 ++** ucvtf z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f64_u64_m_tied2, svfloat64_t, svuint64_t, ++ z0_res = svcvt_f64_u64_m (z4, p0, z0), ++ z0_res = svcvt_f64_m (z4, p0, z0)) ++ ++/* ++** cvt_f64_u64_m_untied: ++** movprfx z0, z1 ++** ucvtf z0\.d, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cvt_f64_u64_m_untied, svfloat64_t, svuint64_t, ++ z0 = svcvt_f64_u64_m (z1, p0, z4), ++ z0 = svcvt_f64_m (z1, p0, z4)) ++ ++/* ++** cvt_f64_f16_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.d, p0/z, \1\.d ++** fcvt z0\.d, p0/m, \1\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f64_f16_z_tied1, svfloat64_t, svfloat16_t, ++ z0_res = svcvt_f64_f16_z (p0, z0), ++ z0_res = svcvt_f64_z (p0, z0)) ++ ++/* ++** cvt_f64_f16_z_untied: ++** movprfx z0\.d, p0/z, z4\.d ++** fcvt z0\.d, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cvt_f64_f16_z_untied, svfloat64_t, svfloat16_t, ++ z0 = svcvt_f64_f16_z (p0, z4), ++ z0 = svcvt_f64_z (p0, z4)) ++ ++/* ++** cvt_f64_f32_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.d, p0/z, \1\.d ++** fcvt z0\.d, p0/m, \1\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f64_f32_z_tied1, svfloat64_t, svfloat32_t, ++ z0_res = svcvt_f64_f32_z (p0, z0), ++ z0_res = svcvt_f64_z (p0, z0)) ++ ++/* ++** cvt_f64_f32_z_untied: ++** movprfx z0\.d, p0/z, z4\.d ++** fcvt z0\.d, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvt_f64_f32_z_untied, svfloat64_t, svfloat32_t, ++ z0 = svcvt_f64_f32_z (p0, z4), ++ z0 = svcvt_f64_z (p0, z4)) ++ ++/* ++** cvt_f64_s32_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.d, p0/z, \1\.d ++** scvtf z0\.d, p0/m, \1\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f64_s32_z_tied1, svfloat64_t, svint32_t, ++ z0_res = svcvt_f64_s32_z (p0, z0), ++ z0_res = svcvt_f64_z (p0, z0)) ++ ++/* ++** cvt_f64_s32_z_untied: ++** movprfx z0\.d, p0/z, z4\.d ++** scvtf z0\.d, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvt_f64_s32_z_untied, svfloat64_t, svint32_t, ++ z0 = svcvt_f64_s32_z (p0, z4), ++ z0 = svcvt_f64_z (p0, z4)) ++ ++/* ++** cvt_f64_s64_z_tied1: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, \1 ++** scvtf z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f64_s64_z_tied1, svfloat64_t, svint64_t, ++ z0_res = svcvt_f64_s64_z (p0, z0), ++ z0_res = svcvt_f64_z (p0, z0)) ++ ++/* ++** cvt_f64_s64_z_untied: ++** movprfx z0\.d, p0/z, z4\.d ++** scvtf z0\.d, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cvt_f64_s64_z_untied, svfloat64_t, svint64_t, ++ z0 = svcvt_f64_s64_z (p0, z4), ++ z0 = svcvt_f64_z (p0, z4)) ++ ++/* ++** cvt_f64_u32_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.d, p0/z, \1\.d ++** ucvtf z0\.d, p0/m, \1\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f64_u32_z_tied1, svfloat64_t, svuint32_t, ++ z0_res = svcvt_f64_u32_z (p0, z0), ++ z0_res = svcvt_f64_z (p0, z0)) ++ ++/* ++** cvt_f64_u32_z_untied: ++** movprfx z0\.d, p0/z, z4\.d ++** ucvtf z0\.d, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvt_f64_u32_z_untied, svfloat64_t, svuint32_t, ++ z0 = svcvt_f64_u32_z (p0, z4), ++ z0 = svcvt_f64_z (p0, z4)) ++ ++/* ++** cvt_f64_u64_z_tied1: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, \1 ++** ucvtf z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f64_u64_z_tied1, svfloat64_t, svuint64_t, ++ z0_res = svcvt_f64_u64_z (p0, z0), ++ z0_res = svcvt_f64_z (p0, z0)) ++ ++/* ++** cvt_f64_u64_z_untied: ++** movprfx z0\.d, p0/z, z4\.d ++** ucvtf z0\.d, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cvt_f64_u64_z_untied, svfloat64_t, svuint64_t, ++ z0 = svcvt_f64_u64_z (p0, z4), ++ z0 = svcvt_f64_z (p0, z4)) ++ ++/* ++** cvt_f64_f16_x_tied1: ++** fcvt z0\.d, p0/m, z0\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f64_f16_x_tied1, svfloat64_t, svfloat16_t, ++ z0_res = svcvt_f64_f16_x (p0, z0), ++ z0_res = svcvt_f64_x (p0, z0)) ++ ++/* ++** cvt_f64_f16_x_untied: ++** fcvt z0\.d, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cvt_f64_f16_x_untied, svfloat64_t, svfloat16_t, ++ z0 = svcvt_f64_f16_x (p0, z4), ++ z0 = svcvt_f64_x (p0, z4)) ++ ++/* ++** cvt_f64_f32_x_tied1: ++** fcvt z0\.d, p0/m, z0\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f64_f32_x_tied1, svfloat64_t, svfloat32_t, ++ z0_res = svcvt_f64_f32_x (p0, z0), ++ z0_res = svcvt_f64_x (p0, z0)) ++ ++/* ++** cvt_f64_f32_x_untied: ++** fcvt z0\.d, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvt_f64_f32_x_untied, svfloat64_t, svfloat32_t, ++ z0 = svcvt_f64_f32_x (p0, z4), ++ z0 = svcvt_f64_x (p0, z4)) ++ ++/* ++** cvt_f64_s32_x_tied1: ++** scvtf z0\.d, p0/m, z0\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f64_s32_x_tied1, svfloat64_t, svint32_t, ++ z0_res = svcvt_f64_s32_x (p0, z0), ++ z0_res = svcvt_f64_x (p0, z0)) ++ ++/* ++** cvt_f64_s32_x_untied: ++** scvtf z0\.d, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvt_f64_s32_x_untied, svfloat64_t, svint32_t, ++ z0 = svcvt_f64_s32_x (p0, z4), ++ z0 = svcvt_f64_x (p0, z4)) ++ ++/* ++** cvt_f64_s64_x_tied1: ++** scvtf z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f64_s64_x_tied1, svfloat64_t, svint64_t, ++ z0_res = svcvt_f64_s64_x (p0, z0), ++ z0_res = svcvt_f64_x (p0, z0)) ++ ++/* ++** cvt_f64_s64_x_untied: ++** scvtf z0\.d, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cvt_f64_s64_x_untied, svfloat64_t, svint64_t, ++ z0 = svcvt_f64_s64_x (p0, z4), ++ z0 = svcvt_f64_x (p0, z4)) ++ ++/* ++** cvt_f64_u32_x_tied1: ++** ucvtf z0\.d, p0/m, z0\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f64_u32_x_tied1, svfloat64_t, svuint32_t, ++ z0_res = svcvt_f64_u32_x (p0, z0), ++ z0_res = svcvt_f64_x (p0, z0)) ++ ++/* ++** cvt_f64_u32_x_untied: ++** ucvtf z0\.d, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvt_f64_u32_x_untied, svfloat64_t, svuint32_t, ++ z0 = svcvt_f64_u32_x (p0, z4), ++ z0 = svcvt_f64_x (p0, z4)) ++ ++/* ++** cvt_f64_u64_x_tied1: ++** ucvtf z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_f64_u64_x_tied1, svfloat64_t, svuint64_t, ++ z0_res = svcvt_f64_u64_x (p0, z0), ++ z0_res = svcvt_f64_x (p0, z0)) ++ ++/* ++** cvt_f64_u64_x_untied: ++** ucvtf z0\.d, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cvt_f64_u64_x_untied, svfloat64_t, svuint64_t, ++ z0 = svcvt_f64_u64_x (p0, z4), ++ z0 = svcvt_f64_x (p0, z4)) ++ ++/* ++** ptrue_cvt_f64_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z_REV (ptrue_cvt_f64_f16_x_tied1, svfloat64_t, svfloat16_t, ++ z0_res = svcvt_f64_f16_x (svptrue_b64 (), z0), ++ z0_res = svcvt_f64_x (svptrue_b64 (), z0)) ++ ++/* ++** ptrue_cvt_f64_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z (ptrue_cvt_f64_f16_x_untied, svfloat64_t, svfloat16_t, ++ z0 = svcvt_f64_f16_x (svptrue_b64 (), z4), ++ z0 = svcvt_f64_x (svptrue_b64 (), z4)) ++ ++/* ++** ptrue_cvt_f64_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z_REV (ptrue_cvt_f64_f32_x_tied1, svfloat64_t, svfloat32_t, ++ z0_res = svcvt_f64_f32_x (svptrue_b64 (), z0), ++ z0_res = svcvt_f64_x (svptrue_b64 (), z0)) ++ ++/* ++** ptrue_cvt_f64_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z (ptrue_cvt_f64_f32_x_untied, svfloat64_t, svfloat32_t, ++ z0 = svcvt_f64_f32_x (svptrue_b64 (), z4), ++ z0 = svcvt_f64_x (svptrue_b64 (), z4)) ++ ++/* ++** ptrue_cvt_f64_s32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z_REV (ptrue_cvt_f64_s32_x_tied1, svfloat64_t, svint32_t, ++ z0_res = svcvt_f64_s32_x (svptrue_b64 (), z0), ++ z0_res = svcvt_f64_x (svptrue_b64 (), z0)) ++ ++/* ++** ptrue_cvt_f64_s32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z (ptrue_cvt_f64_s32_x_untied, svfloat64_t, svint32_t, ++ z0 = svcvt_f64_s32_x (svptrue_b64 (), z4), ++ z0 = svcvt_f64_x (svptrue_b64 (), z4)) ++ ++/* ++** ptrue_cvt_f64_s64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z_REV (ptrue_cvt_f64_s64_x_tied1, svfloat64_t, svint64_t, ++ z0_res = svcvt_f64_s64_x (svptrue_b64 (), z0), ++ z0_res = svcvt_f64_x (svptrue_b64 (), z0)) ++ ++/* ++** ptrue_cvt_f64_s64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z (ptrue_cvt_f64_s64_x_untied, svfloat64_t, svint64_t, ++ z0 = svcvt_f64_s64_x (svptrue_b64 (), z4), ++ z0 = svcvt_f64_x (svptrue_b64 (), z4)) ++ ++/* ++** ptrue_cvt_f64_u32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z_REV (ptrue_cvt_f64_u32_x_tied1, svfloat64_t, svuint32_t, ++ z0_res = svcvt_f64_u32_x (svptrue_b64 (), z0), ++ z0_res = svcvt_f64_x (svptrue_b64 (), z0)) ++ ++/* ++** ptrue_cvt_f64_u32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z (ptrue_cvt_f64_u32_x_untied, svfloat64_t, svuint32_t, ++ z0 = svcvt_f64_u32_x (svptrue_b64 (), z4), ++ z0 = svcvt_f64_x (svptrue_b64 (), z4)) ++ ++/* ++** ptrue_cvt_f64_u64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z_REV (ptrue_cvt_f64_u64_x_tied1, svfloat64_t, svuint64_t, ++ z0_res = svcvt_f64_u64_x (svptrue_b64 (), z0), ++ z0_res = svcvt_f64_x (svptrue_b64 (), z0)) ++ ++/* ++** ptrue_cvt_f64_u64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z (ptrue_cvt_f64_u64_x_untied, svfloat64_t, svuint64_t, ++ z0 = svcvt_f64_u64_x (svptrue_b64 (), z4), ++ z0 = svcvt_f64_x (svptrue_b64 (), z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_s16.c +new file mode 100644 +index 000000000..81761ab09 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_s16.c +@@ -0,0 +1,72 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cvt_s16_f16_m_tied1: ++** fcvtzs z0\.h, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cvt_s16_f16_m_tied1, svint16_t, svfloat16_t, ++ z0 = svcvt_s16_f16_m (z0, p0, z4), ++ z0 = svcvt_s16_m (z0, p0, z4)) ++ ++/* ++** cvt_s16_f16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** fcvtzs z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_s16_f16_m_tied2, svint16_t, svfloat16_t, ++ z0_res = svcvt_s16_f16_m (z4, p0, z0), ++ z0_res = svcvt_s16_m (z4, p0, z0)) ++ ++/* ++** cvt_s16_f16_m_untied: ++** movprfx z0, z1 ++** fcvtzs z0\.h, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cvt_s16_f16_m_untied, svint16_t, svfloat16_t, ++ z0 = svcvt_s16_f16_m (z1, p0, z4), ++ z0 = svcvt_s16_m (z1, p0, z4)) ++ ++/* ++** cvt_s16_f16_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.h, p0/z, \1\.h ++** fcvtzs z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_s16_f16_z_tied1, svint16_t, svfloat16_t, ++ z0_res = svcvt_s16_f16_z (p0, z0), ++ z0_res = svcvt_s16_z (p0, z0)) ++ ++/* ++** cvt_s16_f16_z_untied: ++** movprfx z0\.h, p0/z, z4\.h ++** fcvtzs z0\.h, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cvt_s16_f16_z_untied, svint16_t, svfloat16_t, ++ z0 = svcvt_s16_f16_z (p0, z4), ++ z0 = svcvt_s16_z (p0, z4)) ++ ++/* ++** cvt_s16_f16_x_tied1: ++** fcvtzs z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_s16_f16_x_tied1, svint16_t, svfloat16_t, ++ z0_res = svcvt_s16_f16_x (p0, z0), ++ z0_res = svcvt_s16_x (p0, z0)) ++ ++/* ++** cvt_s16_f16_x_untied: ++** fcvtzs z0\.h, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cvt_s16_f16_x_untied, svint16_t, svfloat16_t, ++ z0 = svcvt_s16_f16_x (p0, z4), ++ z0 = svcvt_s16_x (p0, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_s32.c +new file mode 100644 +index 000000000..d30da5cc5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_s32.c +@@ -0,0 +1,210 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cvt_s32_f16_m_tied1: ++** fcvtzs z0\.s, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cvt_s32_f16_m_tied1, svint32_t, svfloat16_t, ++ z0 = svcvt_s32_f16_m (z0, p0, z4), ++ z0 = svcvt_s32_m (z0, p0, z4)) ++ ++/* ++** cvt_s32_f16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** fcvtzs z0\.s, p0/m, \1\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_s32_f16_m_tied2, svint32_t, svfloat16_t, ++ z0_res = svcvt_s32_f16_m (z4, p0, z0), ++ z0_res = svcvt_s32_m (z4, p0, z0)) ++ ++/* ++** cvt_s32_f16_m_untied: ++** movprfx z0, z1 ++** fcvtzs z0\.s, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cvt_s32_f16_m_untied, svint32_t, svfloat16_t, ++ z0 = svcvt_s32_f16_m (z1, p0, z4), ++ z0 = svcvt_s32_m (z1, p0, z4)) ++ ++/* ++** cvt_s32_f32_m_tied1: ++** fcvtzs z0\.s, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvt_s32_f32_m_tied1, svint32_t, svfloat32_t, ++ z0 = svcvt_s32_f32_m (z0, p0, z4), ++ z0 = svcvt_s32_m (z0, p0, z4)) ++ ++/* ++** cvt_s32_f32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** fcvtzs z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_s32_f32_m_tied2, svint32_t, svfloat32_t, ++ z0_res = svcvt_s32_f32_m (z4, p0, z0), ++ z0_res = svcvt_s32_m (z4, p0, z0)) ++ ++/* ++** cvt_s32_f32_m_untied: ++** movprfx z0, z1 ++** fcvtzs z0\.s, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvt_s32_f32_m_untied, svint32_t, svfloat32_t, ++ z0 = svcvt_s32_f32_m (z1, p0, z4), ++ z0 = svcvt_s32_m (z1, p0, z4)) ++ ++/* ++** cvt_s32_f64_m_tied1: ++** fcvtzs z0\.s, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cvt_s32_f64_m_tied1, svint32_t, svfloat64_t, ++ z0 = svcvt_s32_f64_m (z0, p0, z4), ++ z0 = svcvt_s32_m (z0, p0, z4)) ++ ++/* ++** cvt_s32_f64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z4 ++** fcvtzs z0\.s, p0/m, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_s32_f64_m_tied2, svint32_t, svfloat64_t, ++ z0_res = svcvt_s32_f64_m (z4, p0, z0), ++ z0_res = svcvt_s32_m (z4, p0, z0)) ++ ++/* ++** cvt_s32_f64_m_untied: ++** movprfx z0, z1 ++** fcvtzs z0\.s, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cvt_s32_f64_m_untied, svint32_t, svfloat64_t, ++ z0 = svcvt_s32_f64_m (z1, p0, z4), ++ z0 = svcvt_s32_m (z1, p0, z4)) ++ ++/* ++** cvt_s32_f16_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, \1\.s ++** fcvtzs z0\.s, p0/m, \1\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_s32_f16_z_tied1, svint32_t, svfloat16_t, ++ z0_res = svcvt_s32_f16_z (p0, z0), ++ z0_res = svcvt_s32_z (p0, z0)) ++ ++/* ++** cvt_s32_f16_z_untied: ++** movprfx z0\.s, p0/z, z4\.s ++** fcvtzs z0\.s, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cvt_s32_f16_z_untied, svint32_t, svfloat16_t, ++ z0 = svcvt_s32_f16_z (p0, z4), ++ z0 = svcvt_s32_z (p0, z4)) ++ ++/* ++** cvt_s32_f32_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, \1\.s ++** fcvtzs z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_s32_f32_z_tied1, svint32_t, svfloat32_t, ++ z0_res = svcvt_s32_f32_z (p0, z0), ++ z0_res = svcvt_s32_z (p0, z0)) ++ ++/* ++** cvt_s32_f32_z_untied: ++** movprfx z0\.s, p0/z, z4\.s ++** fcvtzs z0\.s, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvt_s32_f32_z_untied, svint32_t, svfloat32_t, ++ z0 = svcvt_s32_f32_z (p0, z4), ++ z0 = svcvt_s32_z (p0, z4)) ++ ++/* ++** cvt_s32_f64_z_tied1: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, \1 ++** fcvtzs z0\.s, p0/m, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_s32_f64_z_tied1, svint32_t, svfloat64_t, ++ z0_res = svcvt_s32_f64_z (p0, z0), ++ z0_res = svcvt_s32_z (p0, z0)) ++ ++/* ++** cvt_s32_f64_z_untied: ++** movprfx z0\.d, p0/z, z4\.d ++** fcvtzs z0\.s, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cvt_s32_f64_z_untied, svint32_t, svfloat64_t, ++ z0 = svcvt_s32_f64_z (p0, z4), ++ z0 = svcvt_s32_z (p0, z4)) ++ ++/* ++** cvt_s32_f16_x_tied1: ++** fcvtzs z0\.s, p0/m, z0\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_s32_f16_x_tied1, svint32_t, svfloat16_t, ++ z0_res = svcvt_s32_f16_x (p0, z0), ++ z0_res = svcvt_s32_x (p0, z0)) ++ ++/* ++** cvt_s32_f16_x_untied: ++** fcvtzs z0\.s, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cvt_s32_f16_x_untied, svint32_t, svfloat16_t, ++ z0 = svcvt_s32_f16_x (p0, z4), ++ z0 = svcvt_s32_x (p0, z4)) ++ ++/* ++** cvt_s32_f32_x_tied1: ++** fcvtzs z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_s32_f32_x_tied1, svint32_t, svfloat32_t, ++ z0_res = svcvt_s32_f32_x (p0, z0), ++ z0_res = svcvt_s32_x (p0, z0)) ++ ++/* ++** cvt_s32_f32_x_untied: ++** fcvtzs z0\.s, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvt_s32_f32_x_untied, svint32_t, svfloat32_t, ++ z0 = svcvt_s32_f32_x (p0, z4), ++ z0 = svcvt_s32_x (p0, z4)) ++ ++/* ++** cvt_s32_f64_x_tied1: ++** fcvtzs z0\.s, p0/m, z0\.d ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_s32_f64_x_tied1, svint32_t, svfloat64_t, ++ z0_res = svcvt_s32_f64_x (p0, z0), ++ z0_res = svcvt_s32_x (p0, z0)) ++ ++/* ++** cvt_s32_f64_x_untied: ++** fcvtzs z0\.s, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cvt_s32_f64_x_untied, svint32_t, svfloat64_t, ++ z0 = svcvt_s32_f64_x (p0, z4), ++ z0 = svcvt_s32_x (p0, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_s64.c +new file mode 100644 +index 000000000..68cd80784 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_s64.c +@@ -0,0 +1,210 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cvt_s64_f16_m_tied1: ++** fcvtzs z0\.d, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cvt_s64_f16_m_tied1, svint64_t, svfloat16_t, ++ z0 = svcvt_s64_f16_m (z0, p0, z4), ++ z0 = svcvt_s64_m (z0, p0, z4)) ++ ++/* ++** cvt_s64_f16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** fcvtzs z0\.d, p0/m, \1\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_s64_f16_m_tied2, svint64_t, svfloat16_t, ++ z0_res = svcvt_s64_f16_m (z4, p0, z0), ++ z0_res = svcvt_s64_m (z4, p0, z0)) ++ ++/* ++** cvt_s64_f16_m_untied: ++** movprfx z0, z1 ++** fcvtzs z0\.d, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cvt_s64_f16_m_untied, svint64_t, svfloat16_t, ++ z0 = svcvt_s64_f16_m (z1, p0, z4), ++ z0 = svcvt_s64_m (z1, p0, z4)) ++ ++/* ++** cvt_s64_f32_m_tied1: ++** fcvtzs z0\.d, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvt_s64_f32_m_tied1, svint64_t, svfloat32_t, ++ z0 = svcvt_s64_f32_m (z0, p0, z4), ++ z0 = svcvt_s64_m (z0, p0, z4)) ++ ++/* ++** cvt_s64_f32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** fcvtzs z0\.d, p0/m, \1\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_s64_f32_m_tied2, svint64_t, svfloat32_t, ++ z0_res = svcvt_s64_f32_m (z4, p0, z0), ++ z0_res = svcvt_s64_m (z4, p0, z0)) ++ ++/* ++** cvt_s64_f32_m_untied: ++** movprfx z0, z1 ++** fcvtzs z0\.d, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvt_s64_f32_m_untied, svint64_t, svfloat32_t, ++ z0 = svcvt_s64_f32_m (z1, p0, z4), ++ z0 = svcvt_s64_m (z1, p0, z4)) ++ ++/* ++** cvt_s64_f64_m_tied1: ++** fcvtzs z0\.d, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cvt_s64_f64_m_tied1, svint64_t, svfloat64_t, ++ z0 = svcvt_s64_f64_m (z0, p0, z4), ++ z0 = svcvt_s64_m (z0, p0, z4)) ++ ++/* ++** cvt_s64_f64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z4 ++** fcvtzs z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_s64_f64_m_tied2, svint64_t, svfloat64_t, ++ z0_res = svcvt_s64_f64_m (z4, p0, z0), ++ z0_res = svcvt_s64_m (z4, p0, z0)) ++ ++/* ++** cvt_s64_f64_m_untied: ++** movprfx z0, z1 ++** fcvtzs z0\.d, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cvt_s64_f64_m_untied, svint64_t, svfloat64_t, ++ z0 = svcvt_s64_f64_m (z1, p0, z4), ++ z0 = svcvt_s64_m (z1, p0, z4)) ++ ++/* ++** cvt_s64_f16_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.d, p0/z, \1\.d ++** fcvtzs z0\.d, p0/m, \1\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_s64_f16_z_tied1, svint64_t, svfloat16_t, ++ z0_res = svcvt_s64_f16_z (p0, z0), ++ z0_res = svcvt_s64_z (p0, z0)) ++ ++/* ++** cvt_s64_f16_z_untied: ++** movprfx z0\.d, p0/z, z4\.d ++** fcvtzs z0\.d, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cvt_s64_f16_z_untied, svint64_t, svfloat16_t, ++ z0 = svcvt_s64_f16_z (p0, z4), ++ z0 = svcvt_s64_z (p0, z4)) ++ ++/* ++** cvt_s64_f32_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.d, p0/z, \1\.d ++** fcvtzs z0\.d, p0/m, \1\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_s64_f32_z_tied1, svint64_t, svfloat32_t, ++ z0_res = svcvt_s64_f32_z (p0, z0), ++ z0_res = svcvt_s64_z (p0, z0)) ++ ++/* ++** cvt_s64_f32_z_untied: ++** movprfx z0\.d, p0/z, z4\.d ++** fcvtzs z0\.d, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvt_s64_f32_z_untied, svint64_t, svfloat32_t, ++ z0 = svcvt_s64_f32_z (p0, z4), ++ z0 = svcvt_s64_z (p0, z4)) ++ ++/* ++** cvt_s64_f64_z_tied1: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, \1 ++** fcvtzs z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_s64_f64_z_tied1, svint64_t, svfloat64_t, ++ z0_res = svcvt_s64_f64_z (p0, z0), ++ z0_res = svcvt_s64_z (p0, z0)) ++ ++/* ++** cvt_s64_f64_z_untied: ++** movprfx z0\.d, p0/z, z4\.d ++** fcvtzs z0\.d, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cvt_s64_f64_z_untied, svint64_t, svfloat64_t, ++ z0 = svcvt_s64_f64_z (p0, z4), ++ z0 = svcvt_s64_z (p0, z4)) ++ ++/* ++** cvt_s64_f16_x_tied1: ++** fcvtzs z0\.d, p0/m, z0\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_s64_f16_x_tied1, svint64_t, svfloat16_t, ++ z0_res = svcvt_s64_f16_x (p0, z0), ++ z0_res = svcvt_s64_x (p0, z0)) ++ ++/* ++** cvt_s64_f16_x_untied: ++** fcvtzs z0\.d, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cvt_s64_f16_x_untied, svint64_t, svfloat16_t, ++ z0 = svcvt_s64_f16_x (p0, z4), ++ z0 = svcvt_s64_x (p0, z4)) ++ ++/* ++** cvt_s64_f32_x_tied1: ++** fcvtzs z0\.d, p0/m, z0\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_s64_f32_x_tied1, svint64_t, svfloat32_t, ++ z0_res = svcvt_s64_f32_x (p0, z0), ++ z0_res = svcvt_s64_x (p0, z0)) ++ ++/* ++** cvt_s64_f32_x_untied: ++** fcvtzs z0\.d, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvt_s64_f32_x_untied, svint64_t, svfloat32_t, ++ z0 = svcvt_s64_f32_x (p0, z4), ++ z0 = svcvt_s64_x (p0, z4)) ++ ++/* ++** cvt_s64_f64_x_tied1: ++** fcvtzs z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_s64_f64_x_tied1, svint64_t, svfloat64_t, ++ z0_res = svcvt_s64_f64_x (p0, z0), ++ z0_res = svcvt_s64_x (p0, z0)) ++ ++/* ++** cvt_s64_f64_x_untied: ++** fcvtzs z0\.d, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cvt_s64_f64_x_untied, svint64_t, svfloat64_t, ++ z0 = svcvt_s64_f64_x (p0, z4), ++ z0 = svcvt_s64_x (p0, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_u16.c +new file mode 100644 +index 000000000..4db0dffdd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_u16.c +@@ -0,0 +1,72 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cvt_u16_f16_m_tied1: ++** fcvtzu z0\.h, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cvt_u16_f16_m_tied1, svuint16_t, svfloat16_t, ++ z0 = svcvt_u16_f16_m (z0, p0, z4), ++ z0 = svcvt_u16_m (z0, p0, z4)) ++ ++/* ++** cvt_u16_f16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** fcvtzu z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_u16_f16_m_tied2, svuint16_t, svfloat16_t, ++ z0_res = svcvt_u16_f16_m (z4, p0, z0), ++ z0_res = svcvt_u16_m (z4, p0, z0)) ++ ++/* ++** cvt_u16_f16_m_untied: ++** movprfx z0, z1 ++** fcvtzu z0\.h, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cvt_u16_f16_m_untied, svuint16_t, svfloat16_t, ++ z0 = svcvt_u16_f16_m (z1, p0, z4), ++ z0 = svcvt_u16_m (z1, p0, z4)) ++ ++/* ++** cvt_u16_f16_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.h, p0/z, \1\.h ++** fcvtzu z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_u16_f16_z_tied1, svuint16_t, svfloat16_t, ++ z0_res = svcvt_u16_f16_z (p0, z0), ++ z0_res = svcvt_u16_z (p0, z0)) ++ ++/* ++** cvt_u16_f16_z_untied: ++** movprfx z0\.h, p0/z, z4\.h ++** fcvtzu z0\.h, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cvt_u16_f16_z_untied, svuint16_t, svfloat16_t, ++ z0 = svcvt_u16_f16_z (p0, z4), ++ z0 = svcvt_u16_z (p0, z4)) ++ ++/* ++** cvt_u16_f16_x_tied1: ++** fcvtzu z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_u16_f16_x_tied1, svuint16_t, svfloat16_t, ++ z0_res = svcvt_u16_f16_x (p0, z0), ++ z0_res = svcvt_u16_x (p0, z0)) ++ ++/* ++** cvt_u16_f16_x_untied: ++** fcvtzu z0\.h, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cvt_u16_f16_x_untied, svuint16_t, svfloat16_t, ++ z0 = svcvt_u16_f16_x (p0, z4), ++ z0 = svcvt_u16_x (p0, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_u32.c +new file mode 100644 +index 000000000..52ef49fcf +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_u32.c +@@ -0,0 +1,210 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cvt_u32_f16_m_tied1: ++** fcvtzu z0\.s, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cvt_u32_f16_m_tied1, svuint32_t, svfloat16_t, ++ z0 = svcvt_u32_f16_m (z0, p0, z4), ++ z0 = svcvt_u32_m (z0, p0, z4)) ++ ++/* ++** cvt_u32_f16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** fcvtzu z0\.s, p0/m, \1\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_u32_f16_m_tied2, svuint32_t, svfloat16_t, ++ z0_res = svcvt_u32_f16_m (z4, p0, z0), ++ z0_res = svcvt_u32_m (z4, p0, z0)) ++ ++/* ++** cvt_u32_f16_m_untied: ++** movprfx z0, z1 ++** fcvtzu z0\.s, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cvt_u32_f16_m_untied, svuint32_t, svfloat16_t, ++ z0 = svcvt_u32_f16_m (z1, p0, z4), ++ z0 = svcvt_u32_m (z1, p0, z4)) ++ ++/* ++** cvt_u32_f32_m_tied1: ++** fcvtzu z0\.s, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvt_u32_f32_m_tied1, svuint32_t, svfloat32_t, ++ z0 = svcvt_u32_f32_m (z0, p0, z4), ++ z0 = svcvt_u32_m (z0, p0, z4)) ++ ++/* ++** cvt_u32_f32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** fcvtzu z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_u32_f32_m_tied2, svuint32_t, svfloat32_t, ++ z0_res = svcvt_u32_f32_m (z4, p0, z0), ++ z0_res = svcvt_u32_m (z4, p0, z0)) ++ ++/* ++** cvt_u32_f32_m_untied: ++** movprfx z0, z1 ++** fcvtzu z0\.s, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvt_u32_f32_m_untied, svuint32_t, svfloat32_t, ++ z0 = svcvt_u32_f32_m (z1, p0, z4), ++ z0 = svcvt_u32_m (z1, p0, z4)) ++ ++/* ++** cvt_u32_f64_m_tied1: ++** fcvtzu z0\.s, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cvt_u32_f64_m_tied1, svuint32_t, svfloat64_t, ++ z0 = svcvt_u32_f64_m (z0, p0, z4), ++ z0 = svcvt_u32_m (z0, p0, z4)) ++ ++/* ++** cvt_u32_f64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z4 ++** fcvtzu z0\.s, p0/m, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_u32_f64_m_tied2, svuint32_t, svfloat64_t, ++ z0_res = svcvt_u32_f64_m (z4, p0, z0), ++ z0_res = svcvt_u32_m (z4, p0, z0)) ++ ++/* ++** cvt_u32_f64_m_untied: ++** movprfx z0, z1 ++** fcvtzu z0\.s, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cvt_u32_f64_m_untied, svuint32_t, svfloat64_t, ++ z0 = svcvt_u32_f64_m (z1, p0, z4), ++ z0 = svcvt_u32_m (z1, p0, z4)) ++ ++/* ++** cvt_u32_f16_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, \1\.s ++** fcvtzu z0\.s, p0/m, \1\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_u32_f16_z_tied1, svuint32_t, svfloat16_t, ++ z0_res = svcvt_u32_f16_z (p0, z0), ++ z0_res = svcvt_u32_z (p0, z0)) ++ ++/* ++** cvt_u32_f16_z_untied: ++** movprfx z0\.s, p0/z, z4\.s ++** fcvtzu z0\.s, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cvt_u32_f16_z_untied, svuint32_t, svfloat16_t, ++ z0 = svcvt_u32_f16_z (p0, z4), ++ z0 = svcvt_u32_z (p0, z4)) ++ ++/* ++** cvt_u32_f32_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, \1\.s ++** fcvtzu z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_u32_f32_z_tied1, svuint32_t, svfloat32_t, ++ z0_res = svcvt_u32_f32_z (p0, z0), ++ z0_res = svcvt_u32_z (p0, z0)) ++ ++/* ++** cvt_u32_f32_z_untied: ++** movprfx z0\.s, p0/z, z4\.s ++** fcvtzu z0\.s, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvt_u32_f32_z_untied, svuint32_t, svfloat32_t, ++ z0 = svcvt_u32_f32_z (p0, z4), ++ z0 = svcvt_u32_z (p0, z4)) ++ ++/* ++** cvt_u32_f64_z_tied1: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, \1 ++** fcvtzu z0\.s, p0/m, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_u32_f64_z_tied1, svuint32_t, svfloat64_t, ++ z0_res = svcvt_u32_f64_z (p0, z0), ++ z0_res = svcvt_u32_z (p0, z0)) ++ ++/* ++** cvt_u32_f64_z_untied: ++** movprfx z0\.d, p0/z, z4\.d ++** fcvtzu z0\.s, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cvt_u32_f64_z_untied, svuint32_t, svfloat64_t, ++ z0 = svcvt_u32_f64_z (p0, z4), ++ z0 = svcvt_u32_z (p0, z4)) ++ ++/* ++** cvt_u32_f16_x_tied1: ++** fcvtzu z0\.s, p0/m, z0\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_u32_f16_x_tied1, svuint32_t, svfloat16_t, ++ z0_res = svcvt_u32_f16_x (p0, z0), ++ z0_res = svcvt_u32_x (p0, z0)) ++ ++/* ++** cvt_u32_f16_x_untied: ++** fcvtzu z0\.s, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cvt_u32_f16_x_untied, svuint32_t, svfloat16_t, ++ z0 = svcvt_u32_f16_x (p0, z4), ++ z0 = svcvt_u32_x (p0, z4)) ++ ++/* ++** cvt_u32_f32_x_tied1: ++** fcvtzu z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_u32_f32_x_tied1, svuint32_t, svfloat32_t, ++ z0_res = svcvt_u32_f32_x (p0, z0), ++ z0_res = svcvt_u32_x (p0, z0)) ++ ++/* ++** cvt_u32_f32_x_untied: ++** fcvtzu z0\.s, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvt_u32_f32_x_untied, svuint32_t, svfloat32_t, ++ z0 = svcvt_u32_f32_x (p0, z4), ++ z0 = svcvt_u32_x (p0, z4)) ++ ++/* ++** cvt_u32_f64_x_tied1: ++** fcvtzu z0\.s, p0/m, z0\.d ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_u32_f64_x_tied1, svuint32_t, svfloat64_t, ++ z0_res = svcvt_u32_f64_x (p0, z0), ++ z0_res = svcvt_u32_x (p0, z0)) ++ ++/* ++** cvt_u32_f64_x_untied: ++** fcvtzu z0\.s, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cvt_u32_f64_x_untied, svuint32_t, svfloat64_t, ++ z0 = svcvt_u32_f64_x (p0, z4), ++ z0 = svcvt_u32_x (p0, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_u64.c +new file mode 100644 +index 000000000..0c43758ae +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_u64.c +@@ -0,0 +1,210 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cvt_u64_f16_m_tied1: ++** fcvtzu z0\.d, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cvt_u64_f16_m_tied1, svuint64_t, svfloat16_t, ++ z0 = svcvt_u64_f16_m (z0, p0, z4), ++ z0 = svcvt_u64_m (z0, p0, z4)) ++ ++/* ++** cvt_u64_f16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** fcvtzu z0\.d, p0/m, \1\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_u64_f16_m_tied2, svuint64_t, svfloat16_t, ++ z0_res = svcvt_u64_f16_m (z4, p0, z0), ++ z0_res = svcvt_u64_m (z4, p0, z0)) ++ ++/* ++** cvt_u64_f16_m_untied: ++** movprfx z0, z1 ++** fcvtzu z0\.d, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cvt_u64_f16_m_untied, svuint64_t, svfloat16_t, ++ z0 = svcvt_u64_f16_m (z1, p0, z4), ++ z0 = svcvt_u64_m (z1, p0, z4)) ++ ++/* ++** cvt_u64_f32_m_tied1: ++** fcvtzu z0\.d, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvt_u64_f32_m_tied1, svuint64_t, svfloat32_t, ++ z0 = svcvt_u64_f32_m (z0, p0, z4), ++ z0 = svcvt_u64_m (z0, p0, z4)) ++ ++/* ++** cvt_u64_f32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** fcvtzu z0\.d, p0/m, \1\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_u64_f32_m_tied2, svuint64_t, svfloat32_t, ++ z0_res = svcvt_u64_f32_m (z4, p0, z0), ++ z0_res = svcvt_u64_m (z4, p0, z0)) ++ ++/* ++** cvt_u64_f32_m_untied: ++** movprfx z0, z1 ++** fcvtzu z0\.d, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvt_u64_f32_m_untied, svuint64_t, svfloat32_t, ++ z0 = svcvt_u64_f32_m (z1, p0, z4), ++ z0 = svcvt_u64_m (z1, p0, z4)) ++ ++/* ++** cvt_u64_f64_m_tied1: ++** fcvtzu z0\.d, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cvt_u64_f64_m_tied1, svuint64_t, svfloat64_t, ++ z0 = svcvt_u64_f64_m (z0, p0, z4), ++ z0 = svcvt_u64_m (z0, p0, z4)) ++ ++/* ++** cvt_u64_f64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z4 ++** fcvtzu z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_u64_f64_m_tied2, svuint64_t, svfloat64_t, ++ z0_res = svcvt_u64_f64_m (z4, p0, z0), ++ z0_res = svcvt_u64_m (z4, p0, z0)) ++ ++/* ++** cvt_u64_f64_m_untied: ++** movprfx z0, z1 ++** fcvtzu z0\.d, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cvt_u64_f64_m_untied, svuint64_t, svfloat64_t, ++ z0 = svcvt_u64_f64_m (z1, p0, z4), ++ z0 = svcvt_u64_m (z1, p0, z4)) ++ ++/* ++** cvt_u64_f16_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.d, p0/z, \1\.d ++** fcvtzu z0\.d, p0/m, \1\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_u64_f16_z_tied1, svuint64_t, svfloat16_t, ++ z0_res = svcvt_u64_f16_z (p0, z0), ++ z0_res = svcvt_u64_z (p0, z0)) ++ ++/* ++** cvt_u64_f16_z_untied: ++** movprfx z0\.d, p0/z, z4\.d ++** fcvtzu z0\.d, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cvt_u64_f16_z_untied, svuint64_t, svfloat16_t, ++ z0 = svcvt_u64_f16_z (p0, z4), ++ z0 = svcvt_u64_z (p0, z4)) ++ ++/* ++** cvt_u64_f32_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.d, p0/z, \1\.d ++** fcvtzu z0\.d, p0/m, \1\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_u64_f32_z_tied1, svuint64_t, svfloat32_t, ++ z0_res = svcvt_u64_f32_z (p0, z0), ++ z0_res = svcvt_u64_z (p0, z0)) ++ ++/* ++** cvt_u64_f32_z_untied: ++** movprfx z0\.d, p0/z, z4\.d ++** fcvtzu z0\.d, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvt_u64_f32_z_untied, svuint64_t, svfloat32_t, ++ z0 = svcvt_u64_f32_z (p0, z4), ++ z0 = svcvt_u64_z (p0, z4)) ++ ++/* ++** cvt_u64_f64_z_tied1: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, \1 ++** fcvtzu z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_u64_f64_z_tied1, svuint64_t, svfloat64_t, ++ z0_res = svcvt_u64_f64_z (p0, z0), ++ z0_res = svcvt_u64_z (p0, z0)) ++ ++/* ++** cvt_u64_f64_z_untied: ++** movprfx z0\.d, p0/z, z4\.d ++** fcvtzu z0\.d, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cvt_u64_f64_z_untied, svuint64_t, svfloat64_t, ++ z0 = svcvt_u64_f64_z (p0, z4), ++ z0 = svcvt_u64_z (p0, z4)) ++ ++/* ++** cvt_u64_f16_x_tied1: ++** fcvtzu z0\.d, p0/m, z0\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_u64_f16_x_tied1, svuint64_t, svfloat16_t, ++ z0_res = svcvt_u64_f16_x (p0, z0), ++ z0_res = svcvt_u64_x (p0, z0)) ++ ++/* ++** cvt_u64_f16_x_untied: ++** fcvtzu z0\.d, p0/m, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (cvt_u64_f16_x_untied, svuint64_t, svfloat16_t, ++ z0 = svcvt_u64_f16_x (p0, z4), ++ z0 = svcvt_u64_x (p0, z4)) ++ ++/* ++** cvt_u64_f32_x_tied1: ++** fcvtzu z0\.d, p0/m, z0\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_u64_f32_x_tied1, svuint64_t, svfloat32_t, ++ z0_res = svcvt_u64_f32_x (p0, z0), ++ z0_res = svcvt_u64_x (p0, z0)) ++ ++/* ++** cvt_u64_f32_x_untied: ++** fcvtzu z0\.d, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvt_u64_f32_x_untied, svuint64_t, svfloat32_t, ++ z0 = svcvt_u64_f32_x (p0, z4), ++ z0 = svcvt_u64_x (p0, z4)) ++ ++/* ++** cvt_u64_f64_x_tied1: ++** fcvtzu z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_DUAL_Z_REV (cvt_u64_f64_x_tied1, svuint64_t, svfloat64_t, ++ z0_res = svcvt_u64_f64_x (p0, z0), ++ z0_res = svcvt_u64_x (p0, z0)) ++ ++/* ++** cvt_u64_f64_x_untied: ++** fcvtzu z0\.d, p0/m, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (cvt_u64_f64_x_untied, svuint64_t, svfloat64_t, ++ z0 = svcvt_u64_f64_x (p0, z4), ++ z0 = svcvt_u64_x (p0, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvtnt_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvtnt_bf16.c +new file mode 100644 +index 000000000..54614c95d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvtnt_bf16.c +@@ -0,0 +1,90 @@ ++/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */ ++/* { dg-require-effective-target aarch64_asm_bf16_ok } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** cvtnt_bf16_f32_m_tied1: ++** bfcvtnt z0\.h, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvtnt_bf16_f32_m_tied1, svbfloat16_t, svfloat32_t, ++ z0 = svcvtnt_bf16_f32_m (z0, p0, z4), ++ z0 = svcvtnt_bf16_m (z0, p0, z4)) ++ ++/* Bad RA choice: no preferred output sequence. */ ++TEST_DUAL_Z_REV (cvtnt_bf16_f32_m_tied2, svbfloat16_t, svfloat32_t, ++ z0_res = svcvtnt_bf16_f32_m (z4, p0, z0), ++ z0_res = svcvtnt_bf16_m (z4, p0, z0)) ++ ++/* ++** cvtnt_bf16_f32_m_untied: ++** ( ++** mov z0\.d, z1\.d ++** bfcvtnt z0\.h, p0/m, z4\.s ++** | ++** bfcvtnt z1\.h, p0/m, z4\.s ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_DUAL_Z (cvtnt_bf16_f32_m_untied, svbfloat16_t, svfloat32_t, ++ z0 = svcvtnt_bf16_f32_m (z1, p0, z4), ++ z0 = svcvtnt_bf16_m (z1, p0, z4)) ++ ++/* ++** cvtnt_bf16_f32_x_tied1: ++** bfcvtnt z0\.h, p0/m, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (cvtnt_bf16_f32_x_tied1, svbfloat16_t, svfloat32_t, ++ z0 = svcvtnt_bf16_f32_x (z0, p0, z4), ++ z0 = svcvtnt_bf16_x (z0, p0, z4)) ++ ++/* Bad RA choice: no preferred output sequence. */ ++TEST_DUAL_Z_REV (cvtnt_bf16_f32_x_tied2, svbfloat16_t, svfloat32_t, ++ z0_res = svcvtnt_bf16_f32_x (z4, p0, z0), ++ z0_res = svcvtnt_bf16_x (z4, p0, z0)) ++ ++/* ++** cvtnt_bf16_f32_x_untied: ++** ( ++** mov z0\.d, z1\.d ++** bfcvtnt z0\.h, p0/m, z4\.s ++** | ++** bfcvtnt z1\.h, p0/m, z4\.s ++** mov z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_DUAL_Z (cvtnt_bf16_f32_x_untied, svbfloat16_t, svfloat32_t, ++ z0 = svcvtnt_bf16_f32_x (z1, p0, z4), ++ z0 = svcvtnt_bf16_x (z1, p0, z4)) ++ ++/* ++** ptrue_cvtnt_bf16_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z (ptrue_cvtnt_bf16_f32_x_tied1, svbfloat16_t, svfloat32_t, ++ z0 = svcvtnt_bf16_f32_x (z0, svptrue_b32 (), z4), ++ z0 = svcvtnt_bf16_x (z0, svptrue_b32 (), z4)) ++ ++/* Bad RA choice: no preferred output sequence. */ ++TEST_DUAL_Z_REV (ptrue_cvtnt_bf16_f32_x_tied2, svbfloat16_t, svfloat32_t, ++ z0_res = svcvtnt_bf16_f32_x (z4, svptrue_b32 (), z0), ++ z0_res = svcvtnt_bf16_x (z4, svptrue_b32 (), z0)) ++ ++/* ++** ptrue_cvtnt_bf16_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z (ptrue_cvtnt_bf16_f32_x_untied, svbfloat16_t, svfloat32_t, ++ z0 = svcvtnt_bf16_f32_x (z1, svptrue_b32 (), z4), ++ z0 = svcvtnt_bf16_x (z1, svptrue_b32 (), z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_f16.c +new file mode 100644 +index 000000000..35f5c1589 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_f16.c +@@ -0,0 +1,303 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** div_f16_m_tied1: ++** fdiv z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (div_f16_m_tied1, svfloat16_t, ++ z0 = svdiv_f16_m (p0, z0, z1), ++ z0 = svdiv_m (p0, z0, z1)) ++ ++/* ++** div_f16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fdiv z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (div_f16_m_tied2, svfloat16_t, ++ z0 = svdiv_f16_m (p0, z1, z0), ++ z0 = svdiv_m (p0, z1, z0)) ++ ++/* ++** div_f16_m_untied: ++** movprfx z0, z1 ++** fdiv z0\.h, p0/m, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (div_f16_m_untied, svfloat16_t, ++ z0 = svdiv_f16_m (p0, z1, z2), ++ z0 = svdiv_m (p0, z1, z2)) ++ ++/* ++** div_h4_f16_m_tied1: ++** mov (z[0-9]+\.h), h4 ++** fdiv z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (div_h4_f16_m_tied1, svfloat16_t, __fp16, ++ z0 = svdiv_n_f16_m (p0, z0, d4), ++ z0 = svdiv_m (p0, z0, d4)) ++ ++/* ++** div_h4_f16_m_untied: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0, z1 ++** fdiv z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (div_h4_f16_m_untied, svfloat16_t, __fp16, ++ z0 = svdiv_n_f16_m (p0, z1, d4), ++ z0 = svdiv_m (p0, z1, d4)) ++ ++/* ++** div_1_f16_m_tied1: ++** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? ++** fdiv z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (div_1_f16_m_tied1, svfloat16_t, ++ z0 = svdiv_n_f16_m (p0, z0, 1), ++ z0 = svdiv_m (p0, z0, 1)) ++ ++/* ++** div_1_f16_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? ++** movprfx z0, z1 ++** fdiv z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (div_1_f16_m_untied, svfloat16_t, ++ z0 = svdiv_n_f16_m (p0, z1, 1), ++ z0 = svdiv_m (p0, z1, 1)) ++ ++/* ++** div_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fdiv z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (div_f16_z_tied1, svfloat16_t, ++ z0 = svdiv_f16_z (p0, z0, z1), ++ z0 = svdiv_z (p0, z0, z1)) ++ ++/* ++** div_f16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** fdivr z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (div_f16_z_tied2, svfloat16_t, ++ z0 = svdiv_f16_z (p0, z1, z0), ++ z0 = svdiv_z (p0, z1, z0)) ++ ++/* ++** div_f16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fdiv z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** fdivr z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (div_f16_z_untied, svfloat16_t, ++ z0 = svdiv_f16_z (p0, z1, z2), ++ z0 = svdiv_z (p0, z1, z2)) ++ ++/* ++** div_h4_f16_z_tied1: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0\.h, p0/z, z0\.h ++** fdiv z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (div_h4_f16_z_tied1, svfloat16_t, __fp16, ++ z0 = svdiv_n_f16_z (p0, z0, d4), ++ z0 = svdiv_z (p0, z0, d4)) ++ ++/* ++** div_h4_f16_z_untied: ++** mov (z[0-9]+\.h), h4 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fdiv z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** fdivr z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (div_h4_f16_z_untied, svfloat16_t, __fp16, ++ z0 = svdiv_n_f16_z (p0, z1, d4), ++ z0 = svdiv_z (p0, z1, d4)) ++ ++/* ++** div_1_f16_z_tied1: ++** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? ++** movprfx z0\.h, p0/z, z0\.h ++** fdiv z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (div_1_f16_z_tied1, svfloat16_t, ++ z0 = svdiv_n_f16_z (p0, z0, 1), ++ z0 = svdiv_z (p0, z0, 1)) ++ ++/* ++** div_1_f16_z_untied: ++** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fdiv z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** fdivr z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (div_1_f16_z_untied, svfloat16_t, ++ z0 = svdiv_n_f16_z (p0, z1, 1), ++ z0 = svdiv_z (p0, z1, 1)) ++ ++/* ++** div_0p5_f16_z: ++** fmov (z[0-9]+\.h), #(?:0\.5|5\.0e-1) ++** movprfx z0\.h, p0/z, z0\.h ++** fdiv z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (div_0p5_f16_z, svfloat16_t, ++ z0 = svdiv_n_f16_z (p0, z0, 0.5), ++ z0 = svdiv_z (p0, z0, 0.5)) ++ ++/* ++** div_f16_x_tied1: ++** fdiv z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (div_f16_x_tied1, svfloat16_t, ++ z0 = svdiv_f16_x (p0, z0, z1), ++ z0 = svdiv_x (p0, z0, z1)) ++ ++/* ++** div_f16_x_tied2: ++** fdivr z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (div_f16_x_tied2, svfloat16_t, ++ z0 = svdiv_f16_x (p0, z1, z0), ++ z0 = svdiv_x (p0, z1, z0)) ++ ++/* ++** div_f16_x_untied: ++** ( ++** movprfx z0, z1 ++** fdiv z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0, z2 ++** fdivr z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (div_f16_x_untied, svfloat16_t, ++ z0 = svdiv_f16_x (p0, z1, z2), ++ z0 = svdiv_x (p0, z1, z2)) ++ ++/* ++** div_h4_f16_x_tied1: ++** mov (z[0-9]+\.h), h4 ++** fdiv z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (div_h4_f16_x_tied1, svfloat16_t, __fp16, ++ z0 = svdiv_n_f16_x (p0, z0, d4), ++ z0 = svdiv_x (p0, z0, d4)) ++ ++/* ++** div_h4_f16_x_untied: { xfail *-*-* } ++** mov z0\.h, h4 ++** fdivr z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZD (div_h4_f16_x_untied, svfloat16_t, __fp16, ++ z0 = svdiv_n_f16_x (p0, z1, d4), ++ z0 = svdiv_x (p0, z1, d4)) ++ ++/* ++** div_1_f16_x_tied1: ++** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? ++** fdiv z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (div_1_f16_x_tied1, svfloat16_t, ++ z0 = svdiv_n_f16_x (p0, z0, 1), ++ z0 = svdiv_x (p0, z0, 1)) ++ ++/* ++** div_1_f16_x_untied: ++** fmov z0\.h, #1\.0(?:e\+0)? ++** fdivr z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (div_1_f16_x_untied, svfloat16_t, ++ z0 = svdiv_n_f16_x (p0, z1, 1), ++ z0 = svdiv_x (p0, z1, 1)) ++ ++/* ++** ptrue_div_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_div_f16_x_tied1, svfloat16_t, ++ z0 = svdiv_f16_x (svptrue_b16 (), z0, z1), ++ z0 = svdiv_x (svptrue_b16 (), z0, z1)) ++ ++/* ++** ptrue_div_f16_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_div_f16_x_tied2, svfloat16_t, ++ z0 = svdiv_f16_x (svptrue_b16 (), z1, z0), ++ z0 = svdiv_x (svptrue_b16 (), z1, z0)) ++ ++/* ++** ptrue_div_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_div_f16_x_untied, svfloat16_t, ++ z0 = svdiv_f16_x (svptrue_b16 (), z1, z2), ++ z0 = svdiv_x (svptrue_b16 (), z1, z2)) ++ ++/* ++** ptrue_div_1_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_div_1_f16_x_tied1, svfloat16_t, ++ z0 = svdiv_n_f16_x (svptrue_b16 (), z0, 1), ++ z0 = svdiv_x (svptrue_b16 (), z0, 1)) ++ ++/* ++** ptrue_div_1_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_div_1_f16_x_untied, svfloat16_t, ++ z0 = svdiv_n_f16_x (svptrue_b16 (), z1, 1), ++ z0 = svdiv_x (svptrue_b16 (), z1, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_f32.c +new file mode 100644 +index 000000000..40cc203da +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_f32.c +@@ -0,0 +1,303 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** div_f32_m_tied1: ++** fdiv z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (div_f32_m_tied1, svfloat32_t, ++ z0 = svdiv_f32_m (p0, z0, z1), ++ z0 = svdiv_m (p0, z0, z1)) ++ ++/* ++** div_f32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fdiv z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (div_f32_m_tied2, svfloat32_t, ++ z0 = svdiv_f32_m (p0, z1, z0), ++ z0 = svdiv_m (p0, z1, z0)) ++ ++/* ++** div_f32_m_untied: ++** movprfx z0, z1 ++** fdiv z0\.s, p0/m, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (div_f32_m_untied, svfloat32_t, ++ z0 = svdiv_f32_m (p0, z1, z2), ++ z0 = svdiv_m (p0, z1, z2)) ++ ++/* ++** div_s4_f32_m_tied1: ++** mov (z[0-9]+\.s), s4 ++** fdiv z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (div_s4_f32_m_tied1, svfloat32_t, float, ++ z0 = svdiv_n_f32_m (p0, z0, d4), ++ z0 = svdiv_m (p0, z0, d4)) ++ ++/* ++** div_s4_f32_m_untied: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0, z1 ++** fdiv z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (div_s4_f32_m_untied, svfloat32_t, float, ++ z0 = svdiv_n_f32_m (p0, z1, d4), ++ z0 = svdiv_m (p0, z1, d4)) ++ ++/* ++** div_1_f32_m_tied1: ++** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? ++** fdiv z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (div_1_f32_m_tied1, svfloat32_t, ++ z0 = svdiv_n_f32_m (p0, z0, 1), ++ z0 = svdiv_m (p0, z0, 1)) ++ ++/* ++** div_1_f32_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? ++** movprfx z0, z1 ++** fdiv z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (div_1_f32_m_untied, svfloat32_t, ++ z0 = svdiv_n_f32_m (p0, z1, 1), ++ z0 = svdiv_m (p0, z1, 1)) ++ ++/* ++** div_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fdiv z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (div_f32_z_tied1, svfloat32_t, ++ z0 = svdiv_f32_z (p0, z0, z1), ++ z0 = svdiv_z (p0, z0, z1)) ++ ++/* ++** div_f32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** fdivr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (div_f32_z_tied2, svfloat32_t, ++ z0 = svdiv_f32_z (p0, z1, z0), ++ z0 = svdiv_z (p0, z1, z0)) ++ ++/* ++** div_f32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fdiv z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** fdivr z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (div_f32_z_untied, svfloat32_t, ++ z0 = svdiv_f32_z (p0, z1, z2), ++ z0 = svdiv_z (p0, z1, z2)) ++ ++/* ++** div_s4_f32_z_tied1: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0\.s, p0/z, z0\.s ++** fdiv z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (div_s4_f32_z_tied1, svfloat32_t, float, ++ z0 = svdiv_n_f32_z (p0, z0, d4), ++ z0 = svdiv_z (p0, z0, d4)) ++ ++/* ++** div_s4_f32_z_untied: ++** mov (z[0-9]+\.s), s4 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fdiv z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** fdivr z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (div_s4_f32_z_untied, svfloat32_t, float, ++ z0 = svdiv_n_f32_z (p0, z1, d4), ++ z0 = svdiv_z (p0, z1, d4)) ++ ++/* ++** div_1_f32_z_tied1: ++** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? ++** movprfx z0\.s, p0/z, z0\.s ++** fdiv z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (div_1_f32_z_tied1, svfloat32_t, ++ z0 = svdiv_n_f32_z (p0, z0, 1), ++ z0 = svdiv_z (p0, z0, 1)) ++ ++/* ++** div_1_f32_z_untied: ++** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fdiv z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** fdivr z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (div_1_f32_z_untied, svfloat32_t, ++ z0 = svdiv_n_f32_z (p0, z1, 1), ++ z0 = svdiv_z (p0, z1, 1)) ++ ++/* ++** div_0p5_f32_z: ++** fmov (z[0-9]+\.s), #(?:0\.5|5\.0e-1) ++** movprfx z0\.s, p0/z, z0\.s ++** fdiv z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (div_0p5_f32_z, svfloat32_t, ++ z0 = svdiv_n_f32_z (p0, z0, 0.5), ++ z0 = svdiv_z (p0, z0, 0.5)) ++ ++/* ++** div_f32_x_tied1: ++** fdiv z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (div_f32_x_tied1, svfloat32_t, ++ z0 = svdiv_f32_x (p0, z0, z1), ++ z0 = svdiv_x (p0, z0, z1)) ++ ++/* ++** div_f32_x_tied2: ++** fdivr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (div_f32_x_tied2, svfloat32_t, ++ z0 = svdiv_f32_x (p0, z1, z0), ++ z0 = svdiv_x (p0, z1, z0)) ++ ++/* ++** div_f32_x_untied: ++** ( ++** movprfx z0, z1 ++** fdiv z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0, z2 ++** fdivr z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (div_f32_x_untied, svfloat32_t, ++ z0 = svdiv_f32_x (p0, z1, z2), ++ z0 = svdiv_x (p0, z1, z2)) ++ ++/* ++** div_s4_f32_x_tied1: ++** mov (z[0-9]+\.s), s4 ++** fdiv z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (div_s4_f32_x_tied1, svfloat32_t, float, ++ z0 = svdiv_n_f32_x (p0, z0, d4), ++ z0 = svdiv_x (p0, z0, d4)) ++ ++/* ++** div_s4_f32_x_untied: { xfail *-*-* } ++** mov z0\.s, s4 ++** fdivr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZD (div_s4_f32_x_untied, svfloat32_t, float, ++ z0 = svdiv_n_f32_x (p0, z1, d4), ++ z0 = svdiv_x (p0, z1, d4)) ++ ++/* ++** div_1_f32_x_tied1: ++** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? ++** fdiv z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (div_1_f32_x_tied1, svfloat32_t, ++ z0 = svdiv_n_f32_x (p0, z0, 1), ++ z0 = svdiv_x (p0, z0, 1)) ++ ++/* ++** div_1_f32_x_untied: ++** fmov z0\.s, #1\.0(?:e\+0)? ++** fdivr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (div_1_f32_x_untied, svfloat32_t, ++ z0 = svdiv_n_f32_x (p0, z1, 1), ++ z0 = svdiv_x (p0, z1, 1)) ++ ++/* ++** ptrue_div_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_div_f32_x_tied1, svfloat32_t, ++ z0 = svdiv_f32_x (svptrue_b32 (), z0, z1), ++ z0 = svdiv_x (svptrue_b32 (), z0, z1)) ++ ++/* ++** ptrue_div_f32_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_div_f32_x_tied2, svfloat32_t, ++ z0 = svdiv_f32_x (svptrue_b32 (), z1, z0), ++ z0 = svdiv_x (svptrue_b32 (), z1, z0)) ++ ++/* ++** ptrue_div_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_div_f32_x_untied, svfloat32_t, ++ z0 = svdiv_f32_x (svptrue_b32 (), z1, z2), ++ z0 = svdiv_x (svptrue_b32 (), z1, z2)) ++ ++/* ++** ptrue_div_1_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_div_1_f32_x_tied1, svfloat32_t, ++ z0 = svdiv_n_f32_x (svptrue_b32 (), z0, 1), ++ z0 = svdiv_x (svptrue_b32 (), z0, 1)) ++ ++/* ++** ptrue_div_1_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_div_1_f32_x_untied, svfloat32_t, ++ z0 = svdiv_n_f32_x (svptrue_b32 (), z1, 1), ++ z0 = svdiv_x (svptrue_b32 (), z1, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_f64.c +new file mode 100644 +index 000000000..56acbbe95 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_f64.c +@@ -0,0 +1,303 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** div_f64_m_tied1: ++** fdiv z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (div_f64_m_tied1, svfloat64_t, ++ z0 = svdiv_f64_m (p0, z0, z1), ++ z0 = svdiv_m (p0, z0, z1)) ++ ++/* ++** div_f64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fdiv z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (div_f64_m_tied2, svfloat64_t, ++ z0 = svdiv_f64_m (p0, z1, z0), ++ z0 = svdiv_m (p0, z1, z0)) ++ ++/* ++** div_f64_m_untied: ++** movprfx z0, z1 ++** fdiv z0\.d, p0/m, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (div_f64_m_untied, svfloat64_t, ++ z0 = svdiv_f64_m (p0, z1, z2), ++ z0 = svdiv_m (p0, z1, z2)) ++ ++/* ++** div_d4_f64_m_tied1: ++** mov (z[0-9]+\.d), d4 ++** fdiv z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (div_d4_f64_m_tied1, svfloat64_t, double, ++ z0 = svdiv_n_f64_m (p0, z0, d4), ++ z0 = svdiv_m (p0, z0, d4)) ++ ++/* ++** div_d4_f64_m_untied: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0, z1 ++** fdiv z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (div_d4_f64_m_untied, svfloat64_t, double, ++ z0 = svdiv_n_f64_m (p0, z1, d4), ++ z0 = svdiv_m (p0, z1, d4)) ++ ++/* ++** div_1_f64_m_tied1: ++** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? ++** fdiv z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (div_1_f64_m_tied1, svfloat64_t, ++ z0 = svdiv_n_f64_m (p0, z0, 1), ++ z0 = svdiv_m (p0, z0, 1)) ++ ++/* ++** div_1_f64_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? ++** movprfx z0, z1 ++** fdiv z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (div_1_f64_m_untied, svfloat64_t, ++ z0 = svdiv_n_f64_m (p0, z1, 1), ++ z0 = svdiv_m (p0, z1, 1)) ++ ++/* ++** div_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fdiv z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (div_f64_z_tied1, svfloat64_t, ++ z0 = svdiv_f64_z (p0, z0, z1), ++ z0 = svdiv_z (p0, z0, z1)) ++ ++/* ++** div_f64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** fdivr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (div_f64_z_tied2, svfloat64_t, ++ z0 = svdiv_f64_z (p0, z1, z0), ++ z0 = svdiv_z (p0, z1, z0)) ++ ++/* ++** div_f64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fdiv z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** fdivr z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (div_f64_z_untied, svfloat64_t, ++ z0 = svdiv_f64_z (p0, z1, z2), ++ z0 = svdiv_z (p0, z1, z2)) ++ ++/* ++** div_d4_f64_z_tied1: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0\.d, p0/z, z0\.d ++** fdiv z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (div_d4_f64_z_tied1, svfloat64_t, double, ++ z0 = svdiv_n_f64_z (p0, z0, d4), ++ z0 = svdiv_z (p0, z0, d4)) ++ ++/* ++** div_d4_f64_z_untied: ++** mov (z[0-9]+\.d), d4 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fdiv z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** fdivr z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (div_d4_f64_z_untied, svfloat64_t, double, ++ z0 = svdiv_n_f64_z (p0, z1, d4), ++ z0 = svdiv_z (p0, z1, d4)) ++ ++/* ++** div_1_f64_z_tied1: ++** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? ++** movprfx z0\.d, p0/z, z0\.d ++** fdiv z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (div_1_f64_z_tied1, svfloat64_t, ++ z0 = svdiv_n_f64_z (p0, z0, 1), ++ z0 = svdiv_z (p0, z0, 1)) ++ ++/* ++** div_1_f64_z_untied: ++** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fdiv z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** fdivr z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (div_1_f64_z_untied, svfloat64_t, ++ z0 = svdiv_n_f64_z (p0, z1, 1), ++ z0 = svdiv_z (p0, z1, 1)) ++ ++/* ++** div_0p5_f64_z: ++** fmov (z[0-9]+\.d), #(?:0\.5|5\.0e-1) ++** movprfx z0\.d, p0/z, z0\.d ++** fdiv z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (div_0p5_f64_z, svfloat64_t, ++ z0 = svdiv_n_f64_z (p0, z0, 0.5), ++ z0 = svdiv_z (p0, z0, 0.5)) ++ ++/* ++** div_f64_x_tied1: ++** fdiv z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (div_f64_x_tied1, svfloat64_t, ++ z0 = svdiv_f64_x (p0, z0, z1), ++ z0 = svdiv_x (p0, z0, z1)) ++ ++/* ++** div_f64_x_tied2: ++** fdivr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (div_f64_x_tied2, svfloat64_t, ++ z0 = svdiv_f64_x (p0, z1, z0), ++ z0 = svdiv_x (p0, z1, z0)) ++ ++/* ++** div_f64_x_untied: ++** ( ++** movprfx z0, z1 ++** fdiv z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0, z2 ++** fdivr z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (div_f64_x_untied, svfloat64_t, ++ z0 = svdiv_f64_x (p0, z1, z2), ++ z0 = svdiv_x (p0, z1, z2)) ++ ++/* ++** div_d4_f64_x_tied1: ++** mov (z[0-9]+\.d), d4 ++** fdiv z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (div_d4_f64_x_tied1, svfloat64_t, double, ++ z0 = svdiv_n_f64_x (p0, z0, d4), ++ z0 = svdiv_x (p0, z0, d4)) ++ ++/* ++** div_d4_f64_x_untied: { xfail *-*-* } ++** mov z0\.d, d4 ++** fdivr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZD (div_d4_f64_x_untied, svfloat64_t, double, ++ z0 = svdiv_n_f64_x (p0, z1, d4), ++ z0 = svdiv_x (p0, z1, d4)) ++ ++/* ++** div_1_f64_x_tied1: ++** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? ++** fdiv z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (div_1_f64_x_tied1, svfloat64_t, ++ z0 = svdiv_n_f64_x (p0, z0, 1), ++ z0 = svdiv_x (p0, z0, 1)) ++ ++/* ++** div_1_f64_x_untied: ++** fmov z0\.d, #1\.0(?:e\+0)? ++** fdivr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (div_1_f64_x_untied, svfloat64_t, ++ z0 = svdiv_n_f64_x (p0, z1, 1), ++ z0 = svdiv_x (p0, z1, 1)) ++ ++/* ++** ptrue_div_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_div_f64_x_tied1, svfloat64_t, ++ z0 = svdiv_f64_x (svptrue_b64 (), z0, z1), ++ z0 = svdiv_x (svptrue_b64 (), z0, z1)) ++ ++/* ++** ptrue_div_f64_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_div_f64_x_tied2, svfloat64_t, ++ z0 = svdiv_f64_x (svptrue_b64 (), z1, z0), ++ z0 = svdiv_x (svptrue_b64 (), z1, z0)) ++ ++/* ++** ptrue_div_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_div_f64_x_untied, svfloat64_t, ++ z0 = svdiv_f64_x (svptrue_b64 (), z1, z2), ++ z0 = svdiv_x (svptrue_b64 (), z1, z2)) ++ ++/* ++** ptrue_div_1_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_div_1_f64_x_tied1, svfloat64_t, ++ z0 = svdiv_n_f64_x (svptrue_b64 (), z0, 1), ++ z0 = svdiv_x (svptrue_b64 (), z0, 1)) ++ ++/* ++** ptrue_div_1_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_div_1_f64_x_untied, svfloat64_t, ++ z0 = svdiv_n_f64_x (svptrue_b64 (), z1, 1), ++ z0 = svdiv_x (svptrue_b64 (), z1, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s32.c +new file mode 100644 +index 000000000..8e70ae797 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s32.c +@@ -0,0 +1,237 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** div_s32_m_tied1: ++** sdiv z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (div_s32_m_tied1, svint32_t, ++ z0 = svdiv_s32_m (p0, z0, z1), ++ z0 = svdiv_m (p0, z0, z1)) ++ ++/* ++** div_s32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** sdiv z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (div_s32_m_tied2, svint32_t, ++ z0 = svdiv_s32_m (p0, z1, z0), ++ z0 = svdiv_m (p0, z1, z0)) ++ ++/* ++** div_s32_m_untied: ++** movprfx z0, z1 ++** sdiv z0\.s, p0/m, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (div_s32_m_untied, svint32_t, ++ z0 = svdiv_s32_m (p0, z1, z2), ++ z0 = svdiv_m (p0, z1, z2)) ++ ++/* ++** div_w0_s32_m_tied1: ++** mov (z[0-9]+\.s), w0 ++** sdiv z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (div_w0_s32_m_tied1, svint32_t, int32_t, ++ z0 = svdiv_n_s32_m (p0, z0, x0), ++ z0 = svdiv_m (p0, z0, x0)) ++ ++/* ++** div_w0_s32_m_untied: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0, z1 ++** sdiv z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (div_w0_s32_m_untied, svint32_t, int32_t, ++ z0 = svdiv_n_s32_m (p0, z1, x0), ++ z0 = svdiv_m (p0, z1, x0)) ++ ++/* ++** div_2_s32_m_tied1: ++** mov (z[0-9]+\.s), #2 ++** sdiv z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (div_2_s32_m_tied1, svint32_t, ++ z0 = svdiv_n_s32_m (p0, z0, 2), ++ z0 = svdiv_m (p0, z0, 2)) ++ ++/* ++** div_2_s32_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.s), #2 ++** movprfx z0, z1 ++** sdiv z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (div_2_s32_m_untied, svint32_t, ++ z0 = svdiv_n_s32_m (p0, z1, 2), ++ z0 = svdiv_m (p0, z1, 2)) ++ ++/* ++** div_s32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** sdiv z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (div_s32_z_tied1, svint32_t, ++ z0 = svdiv_s32_z (p0, z0, z1), ++ z0 = svdiv_z (p0, z0, z1)) ++ ++/* ++** div_s32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** sdivr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (div_s32_z_tied2, svint32_t, ++ z0 = svdiv_s32_z (p0, z1, z0), ++ z0 = svdiv_z (p0, z1, z0)) ++ ++/* ++** div_s32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** sdiv z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** sdivr z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (div_s32_z_untied, svint32_t, ++ z0 = svdiv_s32_z (p0, z1, z2), ++ z0 = svdiv_z (p0, z1, z2)) ++ ++/* ++** div_w0_s32_z_tied1: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0\.s, p0/z, z0\.s ++** sdiv z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (div_w0_s32_z_tied1, svint32_t, int32_t, ++ z0 = svdiv_n_s32_z (p0, z0, x0), ++ z0 = svdiv_z (p0, z0, x0)) ++ ++/* ++** div_w0_s32_z_untied: ++** mov (z[0-9]+\.s), w0 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** sdiv z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** sdivr z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (div_w0_s32_z_untied, svint32_t, int32_t, ++ z0 = svdiv_n_s32_z (p0, z1, x0), ++ z0 = svdiv_z (p0, z1, x0)) ++ ++/* ++** div_2_s32_z_tied1: ++** mov (z[0-9]+\.s), #2 ++** movprfx z0\.s, p0/z, z0\.s ++** sdiv z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (div_2_s32_z_tied1, svint32_t, ++ z0 = svdiv_n_s32_z (p0, z0, 2), ++ z0 = svdiv_z (p0, z0, 2)) ++ ++/* ++** div_2_s32_z_untied: ++** mov (z[0-9]+\.s), #2 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** sdiv z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** sdivr z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (div_2_s32_z_untied, svint32_t, ++ z0 = svdiv_n_s32_z (p0, z1, 2), ++ z0 = svdiv_z (p0, z1, 2)) ++ ++/* ++** div_s32_x_tied1: ++** sdiv z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (div_s32_x_tied1, svint32_t, ++ z0 = svdiv_s32_x (p0, z0, z1), ++ z0 = svdiv_x (p0, z0, z1)) ++ ++/* ++** div_s32_x_tied2: ++** sdivr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (div_s32_x_tied2, svint32_t, ++ z0 = svdiv_s32_x (p0, z1, z0), ++ z0 = svdiv_x (p0, z1, z0)) ++ ++/* ++** div_s32_x_untied: ++** ( ++** movprfx z0, z1 ++** sdiv z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0, z2 ++** sdivr z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (div_s32_x_untied, svint32_t, ++ z0 = svdiv_s32_x (p0, z1, z2), ++ z0 = svdiv_x (p0, z1, z2)) ++ ++/* ++** div_w0_s32_x_tied1: ++** mov (z[0-9]+\.s), w0 ++** sdiv z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (div_w0_s32_x_tied1, svint32_t, int32_t, ++ z0 = svdiv_n_s32_x (p0, z0, x0), ++ z0 = svdiv_x (p0, z0, x0)) ++ ++/* ++** div_w0_s32_x_untied: ++** mov z0\.s, w0 ++** sdivr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZX (div_w0_s32_x_untied, svint32_t, int32_t, ++ z0 = svdiv_n_s32_x (p0, z1, x0), ++ z0 = svdiv_x (p0, z1, x0)) ++ ++/* ++** div_2_s32_x_tied1: ++** mov (z[0-9]+\.s), #2 ++** sdiv z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (div_2_s32_x_tied1, svint32_t, ++ z0 = svdiv_n_s32_x (p0, z0, 2), ++ z0 = svdiv_x (p0, z0, 2)) ++ ++/* ++** div_2_s32_x_untied: ++** mov z0\.s, #2 ++** sdivr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (div_2_s32_x_untied, svint32_t, ++ z0 = svdiv_n_s32_x (p0, z1, 2), ++ z0 = svdiv_x (p0, z1, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s64.c +new file mode 100644 +index 000000000..439da1f57 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s64.c +@@ -0,0 +1,237 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** div_s64_m_tied1: ++** sdiv z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (div_s64_m_tied1, svint64_t, ++ z0 = svdiv_s64_m (p0, z0, z1), ++ z0 = svdiv_m (p0, z0, z1)) ++ ++/* ++** div_s64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** sdiv z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (div_s64_m_tied2, svint64_t, ++ z0 = svdiv_s64_m (p0, z1, z0), ++ z0 = svdiv_m (p0, z1, z0)) ++ ++/* ++** div_s64_m_untied: ++** movprfx z0, z1 ++** sdiv z0\.d, p0/m, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (div_s64_m_untied, svint64_t, ++ z0 = svdiv_s64_m (p0, z1, z2), ++ z0 = svdiv_m (p0, z1, z2)) ++ ++/* ++** div_x0_s64_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** sdiv z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (div_x0_s64_m_tied1, svint64_t, int64_t, ++ z0 = svdiv_n_s64_m (p0, z0, x0), ++ z0 = svdiv_m (p0, z0, x0)) ++ ++/* ++** div_x0_s64_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** sdiv z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (div_x0_s64_m_untied, svint64_t, int64_t, ++ z0 = svdiv_n_s64_m (p0, z1, x0), ++ z0 = svdiv_m (p0, z1, x0)) ++ ++/* ++** div_2_s64_m_tied1: ++** mov (z[0-9]+\.d), #2 ++** sdiv z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (div_2_s64_m_tied1, svint64_t, ++ z0 = svdiv_n_s64_m (p0, z0, 2), ++ z0 = svdiv_m (p0, z0, 2)) ++ ++/* ++** div_2_s64_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), #2 ++** movprfx z0, z1 ++** sdiv z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (div_2_s64_m_untied, svint64_t, ++ z0 = svdiv_n_s64_m (p0, z1, 2), ++ z0 = svdiv_m (p0, z1, 2)) ++ ++/* ++** div_s64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** sdiv z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (div_s64_z_tied1, svint64_t, ++ z0 = svdiv_s64_z (p0, z0, z1), ++ z0 = svdiv_z (p0, z0, z1)) ++ ++/* ++** div_s64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** sdivr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (div_s64_z_tied2, svint64_t, ++ z0 = svdiv_s64_z (p0, z1, z0), ++ z0 = svdiv_z (p0, z1, z0)) ++ ++/* ++** div_s64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** sdiv z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** sdivr z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (div_s64_z_untied, svint64_t, ++ z0 = svdiv_s64_z (p0, z1, z2), ++ z0 = svdiv_z (p0, z1, z2)) ++ ++/* ++** div_x0_s64_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.d, p0/z, z0\.d ++** sdiv z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (div_x0_s64_z_tied1, svint64_t, int64_t, ++ z0 = svdiv_n_s64_z (p0, z0, x0), ++ z0 = svdiv_z (p0, z0, x0)) ++ ++/* ++** div_x0_s64_z_untied: ++** mov (z[0-9]+\.d), x0 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** sdiv z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** sdivr z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (div_x0_s64_z_untied, svint64_t, int64_t, ++ z0 = svdiv_n_s64_z (p0, z1, x0), ++ z0 = svdiv_z (p0, z1, x0)) ++ ++/* ++** div_2_s64_z_tied1: ++** mov (z[0-9]+\.d), #2 ++** movprfx z0\.d, p0/z, z0\.d ++** sdiv z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (div_2_s64_z_tied1, svint64_t, ++ z0 = svdiv_n_s64_z (p0, z0, 2), ++ z0 = svdiv_z (p0, z0, 2)) ++ ++/* ++** div_2_s64_z_untied: ++** mov (z[0-9]+\.d), #2 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** sdiv z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** sdivr z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (div_2_s64_z_untied, svint64_t, ++ z0 = svdiv_n_s64_z (p0, z1, 2), ++ z0 = svdiv_z (p0, z1, 2)) ++ ++/* ++** div_s64_x_tied1: ++** sdiv z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (div_s64_x_tied1, svint64_t, ++ z0 = svdiv_s64_x (p0, z0, z1), ++ z0 = svdiv_x (p0, z0, z1)) ++ ++/* ++** div_s64_x_tied2: ++** sdivr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (div_s64_x_tied2, svint64_t, ++ z0 = svdiv_s64_x (p0, z1, z0), ++ z0 = svdiv_x (p0, z1, z0)) ++ ++/* ++** div_s64_x_untied: ++** ( ++** movprfx z0, z1 ++** sdiv z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0, z2 ++** sdivr z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (div_s64_x_untied, svint64_t, ++ z0 = svdiv_s64_x (p0, z1, z2), ++ z0 = svdiv_x (p0, z1, z2)) ++ ++/* ++** div_x0_s64_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** sdiv z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (div_x0_s64_x_tied1, svint64_t, int64_t, ++ z0 = svdiv_n_s64_x (p0, z0, x0), ++ z0 = svdiv_x (p0, z0, x0)) ++ ++/* ++** div_x0_s64_x_untied: ++** mov z0\.d, x0 ++** sdivr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZX (div_x0_s64_x_untied, svint64_t, int64_t, ++ z0 = svdiv_n_s64_x (p0, z1, x0), ++ z0 = svdiv_x (p0, z1, x0)) ++ ++/* ++** div_2_s64_x_tied1: ++** mov (z[0-9]+\.d), #2 ++** sdiv z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (div_2_s64_x_tied1, svint64_t, ++ z0 = svdiv_n_s64_x (p0, z0, 2), ++ z0 = svdiv_x (p0, z0, 2)) ++ ++/* ++** div_2_s64_x_untied: ++** mov z0\.d, #2 ++** sdivr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (div_2_s64_x_untied, svint64_t, ++ z0 = svdiv_n_s64_x (p0, z1, 2), ++ z0 = svdiv_x (p0, z1, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_u32.c +new file mode 100644 +index 000000000..8e8e464b7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_u32.c +@@ -0,0 +1,237 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** div_u32_m_tied1: ++** udiv z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (div_u32_m_tied1, svuint32_t, ++ z0 = svdiv_u32_m (p0, z0, z1), ++ z0 = svdiv_m (p0, z0, z1)) ++ ++/* ++** div_u32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** udiv z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (div_u32_m_tied2, svuint32_t, ++ z0 = svdiv_u32_m (p0, z1, z0), ++ z0 = svdiv_m (p0, z1, z0)) ++ ++/* ++** div_u32_m_untied: ++** movprfx z0, z1 ++** udiv z0\.s, p0/m, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (div_u32_m_untied, svuint32_t, ++ z0 = svdiv_u32_m (p0, z1, z2), ++ z0 = svdiv_m (p0, z1, z2)) ++ ++/* ++** div_w0_u32_m_tied1: ++** mov (z[0-9]+\.s), w0 ++** udiv z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (div_w0_u32_m_tied1, svuint32_t, uint32_t, ++ z0 = svdiv_n_u32_m (p0, z0, x0), ++ z0 = svdiv_m (p0, z0, x0)) ++ ++/* ++** div_w0_u32_m_untied: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0, z1 ++** udiv z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (div_w0_u32_m_untied, svuint32_t, uint32_t, ++ z0 = svdiv_n_u32_m (p0, z1, x0), ++ z0 = svdiv_m (p0, z1, x0)) ++ ++/* ++** div_2_u32_m_tied1: ++** mov (z[0-9]+\.s), #2 ++** udiv z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (div_2_u32_m_tied1, svuint32_t, ++ z0 = svdiv_n_u32_m (p0, z0, 2), ++ z0 = svdiv_m (p0, z0, 2)) ++ ++/* ++** div_2_u32_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.s), #2 ++** movprfx z0, z1 ++** udiv z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (div_2_u32_m_untied, svuint32_t, ++ z0 = svdiv_n_u32_m (p0, z1, 2), ++ z0 = svdiv_m (p0, z1, 2)) ++ ++/* ++** div_u32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** udiv z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (div_u32_z_tied1, svuint32_t, ++ z0 = svdiv_u32_z (p0, z0, z1), ++ z0 = svdiv_z (p0, z0, z1)) ++ ++/* ++** div_u32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** udivr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (div_u32_z_tied2, svuint32_t, ++ z0 = svdiv_u32_z (p0, z1, z0), ++ z0 = svdiv_z (p0, z1, z0)) ++ ++/* ++** div_u32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** udiv z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** udivr z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (div_u32_z_untied, svuint32_t, ++ z0 = svdiv_u32_z (p0, z1, z2), ++ z0 = svdiv_z (p0, z1, z2)) ++ ++/* ++** div_w0_u32_z_tied1: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0\.s, p0/z, z0\.s ++** udiv z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (div_w0_u32_z_tied1, svuint32_t, uint32_t, ++ z0 = svdiv_n_u32_z (p0, z0, x0), ++ z0 = svdiv_z (p0, z0, x0)) ++ ++/* ++** div_w0_u32_z_untied: ++** mov (z[0-9]+\.s), w0 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** udiv z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** udivr z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (div_w0_u32_z_untied, svuint32_t, uint32_t, ++ z0 = svdiv_n_u32_z (p0, z1, x0), ++ z0 = svdiv_z (p0, z1, x0)) ++ ++/* ++** div_2_u32_z_tied1: ++** mov (z[0-9]+\.s), #2 ++** movprfx z0\.s, p0/z, z0\.s ++** udiv z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (div_2_u32_z_tied1, svuint32_t, ++ z0 = svdiv_n_u32_z (p0, z0, 2), ++ z0 = svdiv_z (p0, z0, 2)) ++ ++/* ++** div_2_u32_z_untied: ++** mov (z[0-9]+\.s), #2 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** udiv z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** udivr z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (div_2_u32_z_untied, svuint32_t, ++ z0 = svdiv_n_u32_z (p0, z1, 2), ++ z0 = svdiv_z (p0, z1, 2)) ++ ++/* ++** div_u32_x_tied1: ++** udiv z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (div_u32_x_tied1, svuint32_t, ++ z0 = svdiv_u32_x (p0, z0, z1), ++ z0 = svdiv_x (p0, z0, z1)) ++ ++/* ++** div_u32_x_tied2: ++** udivr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (div_u32_x_tied2, svuint32_t, ++ z0 = svdiv_u32_x (p0, z1, z0), ++ z0 = svdiv_x (p0, z1, z0)) ++ ++/* ++** div_u32_x_untied: ++** ( ++** movprfx z0, z1 ++** udiv z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0, z2 ++** udivr z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (div_u32_x_untied, svuint32_t, ++ z0 = svdiv_u32_x (p0, z1, z2), ++ z0 = svdiv_x (p0, z1, z2)) ++ ++/* ++** div_w0_u32_x_tied1: ++** mov (z[0-9]+\.s), w0 ++** udiv z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (div_w0_u32_x_tied1, svuint32_t, uint32_t, ++ z0 = svdiv_n_u32_x (p0, z0, x0), ++ z0 = svdiv_x (p0, z0, x0)) ++ ++/* ++** div_w0_u32_x_untied: ++** mov z0\.s, w0 ++** udivr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZX (div_w0_u32_x_untied, svuint32_t, uint32_t, ++ z0 = svdiv_n_u32_x (p0, z1, x0), ++ z0 = svdiv_x (p0, z1, x0)) ++ ++/* ++** div_2_u32_x_tied1: ++** mov (z[0-9]+\.s), #2 ++** udiv z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (div_2_u32_x_tied1, svuint32_t, ++ z0 = svdiv_n_u32_x (p0, z0, 2), ++ z0 = svdiv_x (p0, z0, 2)) ++ ++/* ++** div_2_u32_x_untied: ++** mov z0\.s, #2 ++** udivr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (div_2_u32_x_untied, svuint32_t, ++ z0 = svdiv_n_u32_x (p0, z1, 2), ++ z0 = svdiv_x (p0, z1, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_u64.c +new file mode 100644 +index 000000000..fc152e8e5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_u64.c +@@ -0,0 +1,237 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** div_u64_m_tied1: ++** udiv z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (div_u64_m_tied1, svuint64_t, ++ z0 = svdiv_u64_m (p0, z0, z1), ++ z0 = svdiv_m (p0, z0, z1)) ++ ++/* ++** div_u64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** udiv z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (div_u64_m_tied2, svuint64_t, ++ z0 = svdiv_u64_m (p0, z1, z0), ++ z0 = svdiv_m (p0, z1, z0)) ++ ++/* ++** div_u64_m_untied: ++** movprfx z0, z1 ++** udiv z0\.d, p0/m, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (div_u64_m_untied, svuint64_t, ++ z0 = svdiv_u64_m (p0, z1, z2), ++ z0 = svdiv_m (p0, z1, z2)) ++ ++/* ++** div_x0_u64_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** udiv z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (div_x0_u64_m_tied1, svuint64_t, uint64_t, ++ z0 = svdiv_n_u64_m (p0, z0, x0), ++ z0 = svdiv_m (p0, z0, x0)) ++ ++/* ++** div_x0_u64_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** udiv z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (div_x0_u64_m_untied, svuint64_t, uint64_t, ++ z0 = svdiv_n_u64_m (p0, z1, x0), ++ z0 = svdiv_m (p0, z1, x0)) ++ ++/* ++** div_2_u64_m_tied1: ++** mov (z[0-9]+\.d), #2 ++** udiv z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (div_2_u64_m_tied1, svuint64_t, ++ z0 = svdiv_n_u64_m (p0, z0, 2), ++ z0 = svdiv_m (p0, z0, 2)) ++ ++/* ++** div_2_u64_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), #2 ++** movprfx z0, z1 ++** udiv z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (div_2_u64_m_untied, svuint64_t, ++ z0 = svdiv_n_u64_m (p0, z1, 2), ++ z0 = svdiv_m (p0, z1, 2)) ++ ++/* ++** div_u64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** udiv z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (div_u64_z_tied1, svuint64_t, ++ z0 = svdiv_u64_z (p0, z0, z1), ++ z0 = svdiv_z (p0, z0, z1)) ++ ++/* ++** div_u64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** udivr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (div_u64_z_tied2, svuint64_t, ++ z0 = svdiv_u64_z (p0, z1, z0), ++ z0 = svdiv_z (p0, z1, z0)) ++ ++/* ++** div_u64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** udiv z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** udivr z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (div_u64_z_untied, svuint64_t, ++ z0 = svdiv_u64_z (p0, z1, z2), ++ z0 = svdiv_z (p0, z1, z2)) ++ ++/* ++** div_x0_u64_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.d, p0/z, z0\.d ++** udiv z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (div_x0_u64_z_tied1, svuint64_t, uint64_t, ++ z0 = svdiv_n_u64_z (p0, z0, x0), ++ z0 = svdiv_z (p0, z0, x0)) ++ ++/* ++** div_x0_u64_z_untied: ++** mov (z[0-9]+\.d), x0 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** udiv z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** udivr z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (div_x0_u64_z_untied, svuint64_t, uint64_t, ++ z0 = svdiv_n_u64_z (p0, z1, x0), ++ z0 = svdiv_z (p0, z1, x0)) ++ ++/* ++** div_2_u64_z_tied1: ++** mov (z[0-9]+\.d), #2 ++** movprfx z0\.d, p0/z, z0\.d ++** udiv z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (div_2_u64_z_tied1, svuint64_t, ++ z0 = svdiv_n_u64_z (p0, z0, 2), ++ z0 = svdiv_z (p0, z0, 2)) ++ ++/* ++** div_2_u64_z_untied: ++** mov (z[0-9]+\.d), #2 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** udiv z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** udivr z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (div_2_u64_z_untied, svuint64_t, ++ z0 = svdiv_n_u64_z (p0, z1, 2), ++ z0 = svdiv_z (p0, z1, 2)) ++ ++/* ++** div_u64_x_tied1: ++** udiv z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (div_u64_x_tied1, svuint64_t, ++ z0 = svdiv_u64_x (p0, z0, z1), ++ z0 = svdiv_x (p0, z0, z1)) ++ ++/* ++** div_u64_x_tied2: ++** udivr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (div_u64_x_tied2, svuint64_t, ++ z0 = svdiv_u64_x (p0, z1, z0), ++ z0 = svdiv_x (p0, z1, z0)) ++ ++/* ++** div_u64_x_untied: ++** ( ++** movprfx z0, z1 ++** udiv z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0, z2 ++** udivr z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (div_u64_x_untied, svuint64_t, ++ z0 = svdiv_u64_x (p0, z1, z2), ++ z0 = svdiv_x (p0, z1, z2)) ++ ++/* ++** div_x0_u64_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** udiv z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (div_x0_u64_x_tied1, svuint64_t, uint64_t, ++ z0 = svdiv_n_u64_x (p0, z0, x0), ++ z0 = svdiv_x (p0, z0, x0)) ++ ++/* ++** div_x0_u64_x_untied: ++** mov z0\.d, x0 ++** udivr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZX (div_x0_u64_x_untied, svuint64_t, uint64_t, ++ z0 = svdiv_n_u64_x (p0, z1, x0), ++ z0 = svdiv_x (p0, z1, x0)) ++ ++/* ++** div_2_u64_x_tied1: ++** mov (z[0-9]+\.d), #2 ++** udiv z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (div_2_u64_x_tied1, svuint64_t, ++ z0 = svdiv_n_u64_x (p0, z0, 2), ++ z0 = svdiv_x (p0, z0, 2)) ++ ++/* ++** div_2_u64_x_untied: ++** mov z0\.d, #2 ++** udivr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (div_2_u64_x_untied, svuint64_t, ++ z0 = svdiv_n_u64_x (p0, z1, 2), ++ z0 = svdiv_x (p0, z1, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/divr_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/divr_f16.c +new file mode 100644 +index 000000000..03cc0343b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/divr_f16.c +@@ -0,0 +1,324 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** divr_f16_m_tied1: ++** fdivr z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (divr_f16_m_tied1, svfloat16_t, ++ z0 = svdivr_f16_m (p0, z0, z1), ++ z0 = svdivr_m (p0, z0, z1)) ++ ++/* ++** divr_f16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fdivr z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (divr_f16_m_tied2, svfloat16_t, ++ z0 = svdivr_f16_m (p0, z1, z0), ++ z0 = svdivr_m (p0, z1, z0)) ++ ++/* ++** divr_f16_m_untied: ++** movprfx z0, z1 ++** fdivr z0\.h, p0/m, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (divr_f16_m_untied, svfloat16_t, ++ z0 = svdivr_f16_m (p0, z1, z2), ++ z0 = svdivr_m (p0, z1, z2)) ++ ++/* ++** divr_h4_f16_m_tied1: ++** mov (z[0-9]+\.h), h4 ++** fdivr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (divr_h4_f16_m_tied1, svfloat16_t, __fp16, ++ z0 = svdivr_n_f16_m (p0, z0, d4), ++ z0 = svdivr_m (p0, z0, d4)) ++ ++/* ++** divr_h4_f16_m_untied: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0, z1 ++** fdivr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (divr_h4_f16_m_untied, svfloat16_t, __fp16, ++ z0 = svdivr_n_f16_m (p0, z1, d4), ++ z0 = svdivr_m (p0, z1, d4)) ++ ++/* ++** divr_1_f16_m_tied1: ++** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? ++** fdivr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (divr_1_f16_m_tied1, svfloat16_t, ++ z0 = svdivr_n_f16_m (p0, z0, 1), ++ z0 = svdivr_m (p0, z0, 1)) ++ ++/* ++** divr_1_f16_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? ++** movprfx z0, z1 ++** fdivr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (divr_1_f16_m_untied, svfloat16_t, ++ z0 = svdivr_n_f16_m (p0, z1, 1), ++ z0 = svdivr_m (p0, z1, 1)) ++ ++/* ++** divr_0p5_f16_m_tied1: ++** fmov (z[0-9]+\.h), #(?:0\.5|5\.0e-1) ++** fdivr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (divr_0p5_f16_m_tied1, svfloat16_t, ++ z0 = svdivr_n_f16_m (p0, z0, 0.5), ++ z0 = svdivr_m (p0, z0, 0.5)) ++ ++/* ++** divr_0p5_f16_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.h), #(?:0\.5|5\.0e-1) ++** movprfx z0, z1 ++** fdivr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (divr_0p5_f16_m_untied, svfloat16_t, ++ z0 = svdivr_n_f16_m (p0, z1, 0.5), ++ z0 = svdivr_m (p0, z1, 0.5)) ++ ++/* ++** divr_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fdivr z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (divr_f16_z_tied1, svfloat16_t, ++ z0 = svdivr_f16_z (p0, z0, z1), ++ z0 = svdivr_z (p0, z0, z1)) ++ ++/* ++** divr_f16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** fdiv z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (divr_f16_z_tied2, svfloat16_t, ++ z0 = svdivr_f16_z (p0, z1, z0), ++ z0 = svdivr_z (p0, z1, z0)) ++ ++/* ++** divr_f16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fdivr z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** fdiv z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (divr_f16_z_untied, svfloat16_t, ++ z0 = svdivr_f16_z (p0, z1, z2), ++ z0 = svdivr_z (p0, z1, z2)) ++ ++/* ++** divr_h4_f16_z_tied1: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0\.h, p0/z, z0\.h ++** fdivr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (divr_h4_f16_z_tied1, svfloat16_t, __fp16, ++ z0 = svdivr_n_f16_z (p0, z0, d4), ++ z0 = svdivr_z (p0, z0, d4)) ++ ++/* ++** divr_h4_f16_z_untied: ++** mov (z[0-9]+\.h), h4 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fdivr z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** fdiv z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (divr_h4_f16_z_untied, svfloat16_t, __fp16, ++ z0 = svdivr_n_f16_z (p0, z1, d4), ++ z0 = svdivr_z (p0, z1, d4)) ++ ++/* ++** divr_1_f16_z: ++** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? ++** movprfx z0\.h, p0/z, z0\.h ++** fdivr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (divr_1_f16_z, svfloat16_t, ++ z0 = svdivr_n_f16_z (p0, z0, 1), ++ z0 = svdivr_z (p0, z0, 1)) ++ ++/* ++** divr_0p5_f16_z_tied1: ++** fmov (z[0-9]+\.h), #(?:0\.5|5\.0e-1) ++** movprfx z0\.h, p0/z, z0\.h ++** fdivr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (divr_0p5_f16_z_tied1, svfloat16_t, ++ z0 = svdivr_n_f16_z (p0, z0, 0.5), ++ z0 = svdivr_z (p0, z0, 0.5)) ++ ++/* ++** divr_0p5_f16_z_untied: ++** fmov (z[0-9]+\.h), #(?:0\.5|5\.0e-1) ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fdivr z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** fdiv z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (divr_0p5_f16_z_untied, svfloat16_t, ++ z0 = svdivr_n_f16_z (p0, z1, 0.5), ++ z0 = svdivr_z (p0, z1, 0.5)) ++ ++/* ++** divr_f16_x_tied1: ++** fdivr z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (divr_f16_x_tied1, svfloat16_t, ++ z0 = svdivr_f16_x (p0, z0, z1), ++ z0 = svdivr_x (p0, z0, z1)) ++ ++/* ++** divr_f16_x_tied2: ++** fdiv z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (divr_f16_x_tied2, svfloat16_t, ++ z0 = svdivr_f16_x (p0, z1, z0), ++ z0 = svdivr_x (p0, z1, z0)) ++ ++/* ++** divr_f16_x_untied: ++** ( ++** movprfx z0, z1 ++** fdivr z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0, z2 ++** fdiv z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (divr_f16_x_untied, svfloat16_t, ++ z0 = svdivr_f16_x (p0, z1, z2), ++ z0 = svdivr_x (p0, z1, z2)) ++ ++/* ++** divr_h4_f16_x_tied1: ++** mov (z[0-9]+\.h), h4 ++** fdivr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (divr_h4_f16_x_tied1, svfloat16_t, __fp16, ++ z0 = svdivr_n_f16_x (p0, z0, d4), ++ z0 = svdivr_x (p0, z0, d4)) ++ ++/* ++** divr_h4_f16_x_untied: { xfail *-*-* } ++** mov z0\.h, h4 ++** fdiv z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZD (divr_h4_f16_x_untied, svfloat16_t, __fp16, ++ z0 = svdivr_n_f16_x (p0, z1, d4), ++ z0 = svdivr_x (p0, z1, d4)) ++ ++/* ++** divr_1_f16_x_tied1: ++** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? ++** fdivr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (divr_1_f16_x_tied1, svfloat16_t, ++ z0 = svdivr_n_f16_x (p0, z0, 1), ++ z0 = svdivr_x (p0, z0, 1)) ++ ++/* ++** divr_1_f16_x_untied: ++** fmov z0\.h, #1\.0(?:e\+0)? ++** fdiv z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (divr_1_f16_x_untied, svfloat16_t, ++ z0 = svdivr_n_f16_x (p0, z1, 1), ++ z0 = svdivr_x (p0, z1, 1)) ++ ++/* ++** ptrue_divr_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_divr_f16_x_tied1, svfloat16_t, ++ z0 = svdivr_f16_x (svptrue_b16 (), z0, z1), ++ z0 = svdivr_x (svptrue_b16 (), z0, z1)) ++ ++/* ++** ptrue_divr_f16_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_divr_f16_x_tied2, svfloat16_t, ++ z0 = svdivr_f16_x (svptrue_b16 (), z1, z0), ++ z0 = svdivr_x (svptrue_b16 (), z1, z0)) ++ ++/* ++** ptrue_divr_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_divr_f16_x_untied, svfloat16_t, ++ z0 = svdivr_f16_x (svptrue_b16 (), z1, z2), ++ z0 = svdivr_x (svptrue_b16 (), z1, z2)) ++ ++/* ++** ptrue_divr_1_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_divr_1_f16_x_tied1, svfloat16_t, ++ z0 = svdivr_n_f16_x (svptrue_b16 (), z0, 1), ++ z0 = svdivr_x (svptrue_b16 (), z0, 1)) ++ ++/* ++** ptrue_divr_1_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_divr_1_f16_x_untied, svfloat16_t, ++ z0 = svdivr_n_f16_x (svptrue_b16 (), z1, 1), ++ z0 = svdivr_x (svptrue_b16 (), z1, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/divr_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/divr_f32.c +new file mode 100644 +index 000000000..c2b65fc33 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/divr_f32.c +@@ -0,0 +1,324 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** divr_f32_m_tied1: ++** fdivr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (divr_f32_m_tied1, svfloat32_t, ++ z0 = svdivr_f32_m (p0, z0, z1), ++ z0 = svdivr_m (p0, z0, z1)) ++ ++/* ++** divr_f32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fdivr z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (divr_f32_m_tied2, svfloat32_t, ++ z0 = svdivr_f32_m (p0, z1, z0), ++ z0 = svdivr_m (p0, z1, z0)) ++ ++/* ++** divr_f32_m_untied: ++** movprfx z0, z1 ++** fdivr z0\.s, p0/m, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (divr_f32_m_untied, svfloat32_t, ++ z0 = svdivr_f32_m (p0, z1, z2), ++ z0 = svdivr_m (p0, z1, z2)) ++ ++/* ++** divr_s4_f32_m_tied1: ++** mov (z[0-9]+\.s), s4 ++** fdivr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (divr_s4_f32_m_tied1, svfloat32_t, float, ++ z0 = svdivr_n_f32_m (p0, z0, d4), ++ z0 = svdivr_m (p0, z0, d4)) ++ ++/* ++** divr_s4_f32_m_untied: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0, z1 ++** fdivr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (divr_s4_f32_m_untied, svfloat32_t, float, ++ z0 = svdivr_n_f32_m (p0, z1, d4), ++ z0 = svdivr_m (p0, z1, d4)) ++ ++/* ++** divr_1_f32_m_tied1: ++** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? ++** fdivr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (divr_1_f32_m_tied1, svfloat32_t, ++ z0 = svdivr_n_f32_m (p0, z0, 1), ++ z0 = svdivr_m (p0, z0, 1)) ++ ++/* ++** divr_1_f32_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? ++** movprfx z0, z1 ++** fdivr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (divr_1_f32_m_untied, svfloat32_t, ++ z0 = svdivr_n_f32_m (p0, z1, 1), ++ z0 = svdivr_m (p0, z1, 1)) ++ ++/* ++** divr_0p5_f32_m_tied1: ++** fmov (z[0-9]+\.s), #(?:0\.5|5\.0e-1) ++** fdivr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (divr_0p5_f32_m_tied1, svfloat32_t, ++ z0 = svdivr_n_f32_m (p0, z0, 0.5), ++ z0 = svdivr_m (p0, z0, 0.5)) ++ ++/* ++** divr_0p5_f32_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.s), #(?:0\.5|5\.0e-1) ++** movprfx z0, z1 ++** fdivr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (divr_0p5_f32_m_untied, svfloat32_t, ++ z0 = svdivr_n_f32_m (p0, z1, 0.5), ++ z0 = svdivr_m (p0, z1, 0.5)) ++ ++/* ++** divr_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fdivr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (divr_f32_z_tied1, svfloat32_t, ++ z0 = svdivr_f32_z (p0, z0, z1), ++ z0 = svdivr_z (p0, z0, z1)) ++ ++/* ++** divr_f32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** fdiv z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (divr_f32_z_tied2, svfloat32_t, ++ z0 = svdivr_f32_z (p0, z1, z0), ++ z0 = svdivr_z (p0, z1, z0)) ++ ++/* ++** divr_f32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fdivr z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** fdiv z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (divr_f32_z_untied, svfloat32_t, ++ z0 = svdivr_f32_z (p0, z1, z2), ++ z0 = svdivr_z (p0, z1, z2)) ++ ++/* ++** divr_s4_f32_z_tied1: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0\.s, p0/z, z0\.s ++** fdivr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (divr_s4_f32_z_tied1, svfloat32_t, float, ++ z0 = svdivr_n_f32_z (p0, z0, d4), ++ z0 = svdivr_z (p0, z0, d4)) ++ ++/* ++** divr_s4_f32_z_untied: ++** mov (z[0-9]+\.s), s4 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fdivr z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** fdiv z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (divr_s4_f32_z_untied, svfloat32_t, float, ++ z0 = svdivr_n_f32_z (p0, z1, d4), ++ z0 = svdivr_z (p0, z1, d4)) ++ ++/* ++** divr_1_f32_z: ++** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? ++** movprfx z0\.s, p0/z, z0\.s ++** fdivr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (divr_1_f32_z, svfloat32_t, ++ z0 = svdivr_n_f32_z (p0, z0, 1), ++ z0 = svdivr_z (p0, z0, 1)) ++ ++/* ++** divr_0p5_f32_z_tied1: ++** fmov (z[0-9]+\.s), #(?:0\.5|5\.0e-1) ++** movprfx z0\.s, p0/z, z0\.s ++** fdivr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (divr_0p5_f32_z_tied1, svfloat32_t, ++ z0 = svdivr_n_f32_z (p0, z0, 0.5), ++ z0 = svdivr_z (p0, z0, 0.5)) ++ ++/* ++** divr_0p5_f32_z_untied: ++** fmov (z[0-9]+\.s), #(?:0\.5|5\.0e-1) ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fdivr z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** fdiv z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (divr_0p5_f32_z_untied, svfloat32_t, ++ z0 = svdivr_n_f32_z (p0, z1, 0.5), ++ z0 = svdivr_z (p0, z1, 0.5)) ++ ++/* ++** divr_f32_x_tied1: ++** fdivr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (divr_f32_x_tied1, svfloat32_t, ++ z0 = svdivr_f32_x (p0, z0, z1), ++ z0 = svdivr_x (p0, z0, z1)) ++ ++/* ++** divr_f32_x_tied2: ++** fdiv z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (divr_f32_x_tied2, svfloat32_t, ++ z0 = svdivr_f32_x (p0, z1, z0), ++ z0 = svdivr_x (p0, z1, z0)) ++ ++/* ++** divr_f32_x_untied: ++** ( ++** movprfx z0, z1 ++** fdivr z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0, z2 ++** fdiv z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (divr_f32_x_untied, svfloat32_t, ++ z0 = svdivr_f32_x (p0, z1, z2), ++ z0 = svdivr_x (p0, z1, z2)) ++ ++/* ++** divr_s4_f32_x_tied1: ++** mov (z[0-9]+\.s), s4 ++** fdivr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (divr_s4_f32_x_tied1, svfloat32_t, float, ++ z0 = svdivr_n_f32_x (p0, z0, d4), ++ z0 = svdivr_x (p0, z0, d4)) ++ ++/* ++** divr_s4_f32_x_untied: { xfail *-*-* } ++** mov z0\.s, s4 ++** fdiv z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZD (divr_s4_f32_x_untied, svfloat32_t, float, ++ z0 = svdivr_n_f32_x (p0, z1, d4), ++ z0 = svdivr_x (p0, z1, d4)) ++ ++/* ++** divr_1_f32_x_tied1: ++** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? ++** fdivr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (divr_1_f32_x_tied1, svfloat32_t, ++ z0 = svdivr_n_f32_x (p0, z0, 1), ++ z0 = svdivr_x (p0, z0, 1)) ++ ++/* ++** divr_1_f32_x_untied: ++** fmov z0\.s, #1\.0(?:e\+0)? ++** fdiv z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (divr_1_f32_x_untied, svfloat32_t, ++ z0 = svdivr_n_f32_x (p0, z1, 1), ++ z0 = svdivr_x (p0, z1, 1)) ++ ++/* ++** ptrue_divr_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_divr_f32_x_tied1, svfloat32_t, ++ z0 = svdivr_f32_x (svptrue_b32 (), z0, z1), ++ z0 = svdivr_x (svptrue_b32 (), z0, z1)) ++ ++/* ++** ptrue_divr_f32_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_divr_f32_x_tied2, svfloat32_t, ++ z0 = svdivr_f32_x (svptrue_b32 (), z1, z0), ++ z0 = svdivr_x (svptrue_b32 (), z1, z0)) ++ ++/* ++** ptrue_divr_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_divr_f32_x_untied, svfloat32_t, ++ z0 = svdivr_f32_x (svptrue_b32 (), z1, z2), ++ z0 = svdivr_x (svptrue_b32 (), z1, z2)) ++ ++/* ++** ptrue_divr_1_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_divr_1_f32_x_tied1, svfloat32_t, ++ z0 = svdivr_n_f32_x (svptrue_b32 (), z0, 1), ++ z0 = svdivr_x (svptrue_b32 (), z0, 1)) ++ ++/* ++** ptrue_divr_1_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_divr_1_f32_x_untied, svfloat32_t, ++ z0 = svdivr_n_f32_x (svptrue_b32 (), z1, 1), ++ z0 = svdivr_x (svptrue_b32 (), z1, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/divr_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/divr_f64.c +new file mode 100644 +index 000000000..0a72a37b1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/divr_f64.c +@@ -0,0 +1,324 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** divr_f64_m_tied1: ++** fdivr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (divr_f64_m_tied1, svfloat64_t, ++ z0 = svdivr_f64_m (p0, z0, z1), ++ z0 = svdivr_m (p0, z0, z1)) ++ ++/* ++** divr_f64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fdivr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (divr_f64_m_tied2, svfloat64_t, ++ z0 = svdivr_f64_m (p0, z1, z0), ++ z0 = svdivr_m (p0, z1, z0)) ++ ++/* ++** divr_f64_m_untied: ++** movprfx z0, z1 ++** fdivr z0\.d, p0/m, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (divr_f64_m_untied, svfloat64_t, ++ z0 = svdivr_f64_m (p0, z1, z2), ++ z0 = svdivr_m (p0, z1, z2)) ++ ++/* ++** divr_d4_f64_m_tied1: ++** mov (z[0-9]+\.d), d4 ++** fdivr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (divr_d4_f64_m_tied1, svfloat64_t, double, ++ z0 = svdivr_n_f64_m (p0, z0, d4), ++ z0 = svdivr_m (p0, z0, d4)) ++ ++/* ++** divr_d4_f64_m_untied: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0, z1 ++** fdivr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (divr_d4_f64_m_untied, svfloat64_t, double, ++ z0 = svdivr_n_f64_m (p0, z1, d4), ++ z0 = svdivr_m (p0, z1, d4)) ++ ++/* ++** divr_1_f64_m_tied1: ++** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? ++** fdivr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (divr_1_f64_m_tied1, svfloat64_t, ++ z0 = svdivr_n_f64_m (p0, z0, 1), ++ z0 = svdivr_m (p0, z0, 1)) ++ ++/* ++** divr_1_f64_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? ++** movprfx z0, z1 ++** fdivr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (divr_1_f64_m_untied, svfloat64_t, ++ z0 = svdivr_n_f64_m (p0, z1, 1), ++ z0 = svdivr_m (p0, z1, 1)) ++ ++/* ++** divr_0p5_f64_m_tied1: ++** fmov (z[0-9]+\.d), #(?:0\.5|5\.0e-1) ++** fdivr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (divr_0p5_f64_m_tied1, svfloat64_t, ++ z0 = svdivr_n_f64_m (p0, z0, 0.5), ++ z0 = svdivr_m (p0, z0, 0.5)) ++ ++/* ++** divr_0p5_f64_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.d), #(?:0\.5|5\.0e-1) ++** movprfx z0, z1 ++** fdivr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (divr_0p5_f64_m_untied, svfloat64_t, ++ z0 = svdivr_n_f64_m (p0, z1, 0.5), ++ z0 = svdivr_m (p0, z1, 0.5)) ++ ++/* ++** divr_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fdivr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (divr_f64_z_tied1, svfloat64_t, ++ z0 = svdivr_f64_z (p0, z0, z1), ++ z0 = svdivr_z (p0, z0, z1)) ++ ++/* ++** divr_f64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** fdiv z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (divr_f64_z_tied2, svfloat64_t, ++ z0 = svdivr_f64_z (p0, z1, z0), ++ z0 = svdivr_z (p0, z1, z0)) ++ ++/* ++** divr_f64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fdivr z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** fdiv z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (divr_f64_z_untied, svfloat64_t, ++ z0 = svdivr_f64_z (p0, z1, z2), ++ z0 = svdivr_z (p0, z1, z2)) ++ ++/* ++** divr_d4_f64_z_tied1: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0\.d, p0/z, z0\.d ++** fdivr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (divr_d4_f64_z_tied1, svfloat64_t, double, ++ z0 = svdivr_n_f64_z (p0, z0, d4), ++ z0 = svdivr_z (p0, z0, d4)) ++ ++/* ++** divr_d4_f64_z_untied: ++** mov (z[0-9]+\.d), d4 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fdivr z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** fdiv z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (divr_d4_f64_z_untied, svfloat64_t, double, ++ z0 = svdivr_n_f64_z (p0, z1, d4), ++ z0 = svdivr_z (p0, z1, d4)) ++ ++/* ++** divr_1_f64_z: ++** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? ++** movprfx z0\.d, p0/z, z0\.d ++** fdivr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (divr_1_f64_z, svfloat64_t, ++ z0 = svdivr_n_f64_z (p0, z0, 1), ++ z0 = svdivr_z (p0, z0, 1)) ++ ++/* ++** divr_0p5_f64_z_tied1: ++** fmov (z[0-9]+\.d), #(?:0\.5|5\.0e-1) ++** movprfx z0\.d, p0/z, z0\.d ++** fdivr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (divr_0p5_f64_z_tied1, svfloat64_t, ++ z0 = svdivr_n_f64_z (p0, z0, 0.5), ++ z0 = svdivr_z (p0, z0, 0.5)) ++ ++/* ++** divr_0p5_f64_z_untied: ++** fmov (z[0-9]+\.d), #(?:0\.5|5\.0e-1) ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fdivr z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** fdiv z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (divr_0p5_f64_z_untied, svfloat64_t, ++ z0 = svdivr_n_f64_z (p0, z1, 0.5), ++ z0 = svdivr_z (p0, z1, 0.5)) ++ ++/* ++** divr_f64_x_tied1: ++** fdivr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (divr_f64_x_tied1, svfloat64_t, ++ z0 = svdivr_f64_x (p0, z0, z1), ++ z0 = svdivr_x (p0, z0, z1)) ++ ++/* ++** divr_f64_x_tied2: ++** fdiv z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (divr_f64_x_tied2, svfloat64_t, ++ z0 = svdivr_f64_x (p0, z1, z0), ++ z0 = svdivr_x (p0, z1, z0)) ++ ++/* ++** divr_f64_x_untied: ++** ( ++** movprfx z0, z1 ++** fdivr z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0, z2 ++** fdiv z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (divr_f64_x_untied, svfloat64_t, ++ z0 = svdivr_f64_x (p0, z1, z2), ++ z0 = svdivr_x (p0, z1, z2)) ++ ++/* ++** divr_d4_f64_x_tied1: ++** mov (z[0-9]+\.d), d4 ++** fdivr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (divr_d4_f64_x_tied1, svfloat64_t, double, ++ z0 = svdivr_n_f64_x (p0, z0, d4), ++ z0 = svdivr_x (p0, z0, d4)) ++ ++/* ++** divr_d4_f64_x_untied: { xfail *-*-* } ++** mov z0\.d, d4 ++** fdiv z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZD (divr_d4_f64_x_untied, svfloat64_t, double, ++ z0 = svdivr_n_f64_x (p0, z1, d4), ++ z0 = svdivr_x (p0, z1, d4)) ++ ++/* ++** divr_1_f64_x_tied1: ++** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? ++** fdivr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (divr_1_f64_x_tied1, svfloat64_t, ++ z0 = svdivr_n_f64_x (p0, z0, 1), ++ z0 = svdivr_x (p0, z0, 1)) ++ ++/* ++** divr_1_f64_x_untied: ++** fmov z0\.d, #1\.0(?:e\+0)? ++** fdiv z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (divr_1_f64_x_untied, svfloat64_t, ++ z0 = svdivr_n_f64_x (p0, z1, 1), ++ z0 = svdivr_x (p0, z1, 1)) ++ ++/* ++** ptrue_divr_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_divr_f64_x_tied1, svfloat64_t, ++ z0 = svdivr_f64_x (svptrue_b64 (), z0, z1), ++ z0 = svdivr_x (svptrue_b64 (), z0, z1)) ++ ++/* ++** ptrue_divr_f64_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_divr_f64_x_tied2, svfloat64_t, ++ z0 = svdivr_f64_x (svptrue_b64 (), z1, z0), ++ z0 = svdivr_x (svptrue_b64 (), z1, z0)) ++ ++/* ++** ptrue_divr_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_divr_f64_x_untied, svfloat64_t, ++ z0 = svdivr_f64_x (svptrue_b64 (), z1, z2), ++ z0 = svdivr_x (svptrue_b64 (), z1, z2)) ++ ++/* ++** ptrue_divr_1_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_divr_1_f64_x_tied1, svfloat64_t, ++ z0 = svdivr_n_f64_x (svptrue_b64 (), z0, 1), ++ z0 = svdivr_x (svptrue_b64 (), z0, 1)) ++ ++/* ++** ptrue_divr_1_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_divr_1_f64_x_untied, svfloat64_t, ++ z0 = svdivr_n_f64_x (svptrue_b64 (), z1, 1), ++ z0 = svdivr_x (svptrue_b64 (), z1, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/divr_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/divr_s32.c +new file mode 100644 +index 000000000..75a6c1d97 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/divr_s32.c +@@ -0,0 +1,247 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** divr_s32_m_tied1: ++** sdivr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (divr_s32_m_tied1, svint32_t, ++ z0 = svdivr_s32_m (p0, z0, z1), ++ z0 = svdivr_m (p0, z0, z1)) ++ ++/* ++** divr_s32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** sdivr z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (divr_s32_m_tied2, svint32_t, ++ z0 = svdivr_s32_m (p0, z1, z0), ++ z0 = svdivr_m (p0, z1, z0)) ++ ++/* ++** divr_s32_m_untied: ++** movprfx z0, z1 ++** sdivr z0\.s, p0/m, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (divr_s32_m_untied, svint32_t, ++ z0 = svdivr_s32_m (p0, z1, z2), ++ z0 = svdivr_m (p0, z1, z2)) ++ ++/* ++** divr_w0_s32_m_tied1: ++** mov (z[0-9]+\.s), w0 ++** sdivr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (divr_w0_s32_m_tied1, svint32_t, int32_t, ++ z0 = svdivr_n_s32_m (p0, z0, x0), ++ z0 = svdivr_m (p0, z0, x0)) ++ ++/* ++** divr_w0_s32_m_untied: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0, z1 ++** sdivr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (divr_w0_s32_m_untied, svint32_t, int32_t, ++ z0 = svdivr_n_s32_m (p0, z1, x0), ++ z0 = svdivr_m (p0, z1, x0)) ++ ++/* ++** divr_2_s32_m_tied1: ++** mov (z[0-9]+\.s), #2 ++** sdivr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (divr_2_s32_m_tied1, svint32_t, ++ z0 = svdivr_n_s32_m (p0, z0, 2), ++ z0 = svdivr_m (p0, z0, 2)) ++ ++/* ++** divr_2_s32_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.s), #2 ++** movprfx z0, z1 ++** sdivr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (divr_2_s32_m_untied, svint32_t, ++ z0 = svdivr_n_s32_m (p0, z1, 2), ++ z0 = svdivr_m (p0, z1, 2)) ++ ++/* ++** divr_m1_s32_m: ++** mov (z[0-9]+)\.b, #-1 ++** sdivr z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (divr_m1_s32_m, svint32_t, ++ z0 = svdivr_n_s32_m (p0, z0, -1), ++ z0 = svdivr_m (p0, z0, -1)) ++ ++/* ++** divr_s32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** sdivr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (divr_s32_z_tied1, svint32_t, ++ z0 = svdivr_s32_z (p0, z0, z1), ++ z0 = svdivr_z (p0, z0, z1)) ++ ++/* ++** divr_s32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** sdiv z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (divr_s32_z_tied2, svint32_t, ++ z0 = svdivr_s32_z (p0, z1, z0), ++ z0 = svdivr_z (p0, z1, z0)) ++ ++/* ++** divr_s32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** sdivr z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** sdiv z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (divr_s32_z_untied, svint32_t, ++ z0 = svdivr_s32_z (p0, z1, z2), ++ z0 = svdivr_z (p0, z1, z2)) ++ ++/* ++** divr_w0_s32_z_tied1: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0\.s, p0/z, z0\.s ++** sdivr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (divr_w0_s32_z_tied1, svint32_t, int32_t, ++ z0 = svdivr_n_s32_z (p0, z0, x0), ++ z0 = svdivr_z (p0, z0, x0)) ++ ++/* ++** divr_w0_s32_z_untied: ++** mov (z[0-9]+\.s), w0 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** sdivr z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** sdiv z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (divr_w0_s32_z_untied, svint32_t, int32_t, ++ z0 = svdivr_n_s32_z (p0, z1, x0), ++ z0 = svdivr_z (p0, z1, x0)) ++ ++/* ++** divr_2_s32_z_tied1: ++** mov (z[0-9]+\.s), #2 ++** movprfx z0\.s, p0/z, z0\.s ++** sdivr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (divr_2_s32_z_tied1, svint32_t, ++ z0 = svdivr_n_s32_z (p0, z0, 2), ++ z0 = svdivr_z (p0, z0, 2)) ++ ++/* ++** divr_2_s32_z_untied: ++** mov (z[0-9]+\.s), #2 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** sdivr z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** sdiv z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (divr_2_s32_z_untied, svint32_t, ++ z0 = svdivr_n_s32_z (p0, z1, 2), ++ z0 = svdivr_z (p0, z1, 2)) ++ ++/* ++** divr_s32_x_tied1: ++** sdivr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (divr_s32_x_tied1, svint32_t, ++ z0 = svdivr_s32_x (p0, z0, z1), ++ z0 = svdivr_x (p0, z0, z1)) ++ ++/* ++** divr_s32_x_tied2: ++** sdiv z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (divr_s32_x_tied2, svint32_t, ++ z0 = svdivr_s32_x (p0, z1, z0), ++ z0 = svdivr_x (p0, z1, z0)) ++ ++/* ++** divr_s32_x_untied: ++** ( ++** movprfx z0, z1 ++** sdivr z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0, z2 ++** sdiv z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (divr_s32_x_untied, svint32_t, ++ z0 = svdivr_s32_x (p0, z1, z2), ++ z0 = svdivr_x (p0, z1, z2)) ++ ++/* ++** divr_w0_s32_x_tied1: ++** mov (z[0-9]+\.s), w0 ++** sdivr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (divr_w0_s32_x_tied1, svint32_t, int32_t, ++ z0 = svdivr_n_s32_x (p0, z0, x0), ++ z0 = svdivr_x (p0, z0, x0)) ++ ++/* ++** divr_w0_s32_x_untied: ++** mov z0\.s, w0 ++** sdiv z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZX (divr_w0_s32_x_untied, svint32_t, int32_t, ++ z0 = svdivr_n_s32_x (p0, z1, x0), ++ z0 = svdivr_x (p0, z1, x0)) ++ ++/* ++** divr_2_s32_x_tied1: ++** mov (z[0-9]+\.s), #2 ++** sdivr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (divr_2_s32_x_tied1, svint32_t, ++ z0 = svdivr_n_s32_x (p0, z0, 2), ++ z0 = svdivr_x (p0, z0, 2)) ++ ++/* ++** divr_2_s32_x_untied: ++** mov z0\.s, #2 ++** sdiv z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (divr_2_s32_x_untied, svint32_t, ++ z0 = svdivr_n_s32_x (p0, z1, 2), ++ z0 = svdivr_x (p0, z1, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/divr_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/divr_s64.c +new file mode 100644 +index 000000000..8f4939a91 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/divr_s64.c +@@ -0,0 +1,247 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** divr_s64_m_tied1: ++** sdivr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (divr_s64_m_tied1, svint64_t, ++ z0 = svdivr_s64_m (p0, z0, z1), ++ z0 = svdivr_m (p0, z0, z1)) ++ ++/* ++** divr_s64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** sdivr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (divr_s64_m_tied2, svint64_t, ++ z0 = svdivr_s64_m (p0, z1, z0), ++ z0 = svdivr_m (p0, z1, z0)) ++ ++/* ++** divr_s64_m_untied: ++** movprfx z0, z1 ++** sdivr z0\.d, p0/m, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (divr_s64_m_untied, svint64_t, ++ z0 = svdivr_s64_m (p0, z1, z2), ++ z0 = svdivr_m (p0, z1, z2)) ++ ++/* ++** divr_x0_s64_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** sdivr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (divr_x0_s64_m_tied1, svint64_t, int64_t, ++ z0 = svdivr_n_s64_m (p0, z0, x0), ++ z0 = svdivr_m (p0, z0, x0)) ++ ++/* ++** divr_x0_s64_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** sdivr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (divr_x0_s64_m_untied, svint64_t, int64_t, ++ z0 = svdivr_n_s64_m (p0, z1, x0), ++ z0 = svdivr_m (p0, z1, x0)) ++ ++/* ++** divr_2_s64_m_tied1: ++** mov (z[0-9]+\.d), #2 ++** sdivr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (divr_2_s64_m_tied1, svint64_t, ++ z0 = svdivr_n_s64_m (p0, z0, 2), ++ z0 = svdivr_m (p0, z0, 2)) ++ ++/* ++** divr_2_s64_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), #2 ++** movprfx z0, z1 ++** sdivr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (divr_2_s64_m_untied, svint64_t, ++ z0 = svdivr_n_s64_m (p0, z1, 2), ++ z0 = svdivr_m (p0, z1, 2)) ++ ++/* ++** divr_m1_s64_m: ++** mov (z[0-9]+)\.b, #-1 ++** sdivr z0\.d, p0/m, z0\.d, \1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (divr_m1_s64_m, svint64_t, ++ z0 = svdivr_n_s64_m (p0, z0, -1), ++ z0 = svdivr_m (p0, z0, -1)) ++ ++/* ++** divr_s64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** sdivr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (divr_s64_z_tied1, svint64_t, ++ z0 = svdivr_s64_z (p0, z0, z1), ++ z0 = svdivr_z (p0, z0, z1)) ++ ++/* ++** divr_s64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** sdiv z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (divr_s64_z_tied2, svint64_t, ++ z0 = svdivr_s64_z (p0, z1, z0), ++ z0 = svdivr_z (p0, z1, z0)) ++ ++/* ++** divr_s64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** sdivr z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** sdiv z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (divr_s64_z_untied, svint64_t, ++ z0 = svdivr_s64_z (p0, z1, z2), ++ z0 = svdivr_z (p0, z1, z2)) ++ ++/* ++** divr_x0_s64_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.d, p0/z, z0\.d ++** sdivr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (divr_x0_s64_z_tied1, svint64_t, int64_t, ++ z0 = svdivr_n_s64_z (p0, z0, x0), ++ z0 = svdivr_z (p0, z0, x0)) ++ ++/* ++** divr_x0_s64_z_untied: ++** mov (z[0-9]+\.d), x0 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** sdivr z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** sdiv z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (divr_x0_s64_z_untied, svint64_t, int64_t, ++ z0 = svdivr_n_s64_z (p0, z1, x0), ++ z0 = svdivr_z (p0, z1, x0)) ++ ++/* ++** divr_2_s64_z_tied1: ++** mov (z[0-9]+\.d), #2 ++** movprfx z0\.d, p0/z, z0\.d ++** sdivr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (divr_2_s64_z_tied1, svint64_t, ++ z0 = svdivr_n_s64_z (p0, z0, 2), ++ z0 = svdivr_z (p0, z0, 2)) ++ ++/* ++** divr_2_s64_z_untied: ++** mov (z[0-9]+\.d), #2 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** sdivr z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** sdiv z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (divr_2_s64_z_untied, svint64_t, ++ z0 = svdivr_n_s64_z (p0, z1, 2), ++ z0 = svdivr_z (p0, z1, 2)) ++ ++/* ++** divr_s64_x_tied1: ++** sdivr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (divr_s64_x_tied1, svint64_t, ++ z0 = svdivr_s64_x (p0, z0, z1), ++ z0 = svdivr_x (p0, z0, z1)) ++ ++/* ++** divr_s64_x_tied2: ++** sdiv z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (divr_s64_x_tied2, svint64_t, ++ z0 = svdivr_s64_x (p0, z1, z0), ++ z0 = svdivr_x (p0, z1, z0)) ++ ++/* ++** divr_s64_x_untied: ++** ( ++** movprfx z0, z1 ++** sdivr z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0, z2 ++** sdiv z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (divr_s64_x_untied, svint64_t, ++ z0 = svdivr_s64_x (p0, z1, z2), ++ z0 = svdivr_x (p0, z1, z2)) ++ ++/* ++** divr_x0_s64_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** sdivr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (divr_x0_s64_x_tied1, svint64_t, int64_t, ++ z0 = svdivr_n_s64_x (p0, z0, x0), ++ z0 = svdivr_x (p0, z0, x0)) ++ ++/* ++** divr_x0_s64_x_untied: ++** mov z0\.d, x0 ++** sdiv z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZX (divr_x0_s64_x_untied, svint64_t, int64_t, ++ z0 = svdivr_n_s64_x (p0, z1, x0), ++ z0 = svdivr_x (p0, z1, x0)) ++ ++/* ++** divr_2_s64_x_tied1: ++** mov (z[0-9]+\.d), #2 ++** sdivr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (divr_2_s64_x_tied1, svint64_t, ++ z0 = svdivr_n_s64_x (p0, z0, 2), ++ z0 = svdivr_x (p0, z0, 2)) ++ ++/* ++** divr_2_s64_x_untied: ++** mov z0\.d, #2 ++** sdiv z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (divr_2_s64_x_untied, svint64_t, ++ z0 = svdivr_n_s64_x (p0, z1, 2), ++ z0 = svdivr_x (p0, z1, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/divr_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/divr_u32.c +new file mode 100644 +index 000000000..84c243b44 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/divr_u32.c +@@ -0,0 +1,247 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** divr_u32_m_tied1: ++** udivr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (divr_u32_m_tied1, svuint32_t, ++ z0 = svdivr_u32_m (p0, z0, z1), ++ z0 = svdivr_m (p0, z0, z1)) ++ ++/* ++** divr_u32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** udivr z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (divr_u32_m_tied2, svuint32_t, ++ z0 = svdivr_u32_m (p0, z1, z0), ++ z0 = svdivr_m (p0, z1, z0)) ++ ++/* ++** divr_u32_m_untied: ++** movprfx z0, z1 ++** udivr z0\.s, p0/m, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (divr_u32_m_untied, svuint32_t, ++ z0 = svdivr_u32_m (p0, z1, z2), ++ z0 = svdivr_m (p0, z1, z2)) ++ ++/* ++** divr_w0_u32_m_tied1: ++** mov (z[0-9]+\.s), w0 ++** udivr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (divr_w0_u32_m_tied1, svuint32_t, uint32_t, ++ z0 = svdivr_n_u32_m (p0, z0, x0), ++ z0 = svdivr_m (p0, z0, x0)) ++ ++/* ++** divr_w0_u32_m_untied: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0, z1 ++** udivr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (divr_w0_u32_m_untied, svuint32_t, uint32_t, ++ z0 = svdivr_n_u32_m (p0, z1, x0), ++ z0 = svdivr_m (p0, z1, x0)) ++ ++/* ++** divr_2_u32_m_tied1: ++** mov (z[0-9]+\.s), #2 ++** udivr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (divr_2_u32_m_tied1, svuint32_t, ++ z0 = svdivr_n_u32_m (p0, z0, 2), ++ z0 = svdivr_m (p0, z0, 2)) ++ ++/* ++** divr_2_u32_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.s), #2 ++** movprfx z0, z1 ++** udivr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (divr_2_u32_m_untied, svuint32_t, ++ z0 = svdivr_n_u32_m (p0, z1, 2), ++ z0 = svdivr_m (p0, z1, 2)) ++ ++/* ++** divr_m1_u32_m: ++** mov (z[0-9]+)\.b, #-1 ++** udivr z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (divr_m1_u32_m, svuint32_t, ++ z0 = svdivr_n_u32_m (p0, z0, -1), ++ z0 = svdivr_m (p0, z0, -1)) ++ ++/* ++** divr_u32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** udivr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (divr_u32_z_tied1, svuint32_t, ++ z0 = svdivr_u32_z (p0, z0, z1), ++ z0 = svdivr_z (p0, z0, z1)) ++ ++/* ++** divr_u32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** udiv z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (divr_u32_z_tied2, svuint32_t, ++ z0 = svdivr_u32_z (p0, z1, z0), ++ z0 = svdivr_z (p0, z1, z0)) ++ ++/* ++** divr_u32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** udivr z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** udiv z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (divr_u32_z_untied, svuint32_t, ++ z0 = svdivr_u32_z (p0, z1, z2), ++ z0 = svdivr_z (p0, z1, z2)) ++ ++/* ++** divr_w0_u32_z_tied1: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0\.s, p0/z, z0\.s ++** udivr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (divr_w0_u32_z_tied1, svuint32_t, uint32_t, ++ z0 = svdivr_n_u32_z (p0, z0, x0), ++ z0 = svdivr_z (p0, z0, x0)) ++ ++/* ++** divr_w0_u32_z_untied: ++** mov (z[0-9]+\.s), w0 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** udivr z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** udiv z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (divr_w0_u32_z_untied, svuint32_t, uint32_t, ++ z0 = svdivr_n_u32_z (p0, z1, x0), ++ z0 = svdivr_z (p0, z1, x0)) ++ ++/* ++** divr_2_u32_z_tied1: ++** mov (z[0-9]+\.s), #2 ++** movprfx z0\.s, p0/z, z0\.s ++** udivr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (divr_2_u32_z_tied1, svuint32_t, ++ z0 = svdivr_n_u32_z (p0, z0, 2), ++ z0 = svdivr_z (p0, z0, 2)) ++ ++/* ++** divr_2_u32_z_untied: ++** mov (z[0-9]+\.s), #2 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** udivr z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** udiv z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (divr_2_u32_z_untied, svuint32_t, ++ z0 = svdivr_n_u32_z (p0, z1, 2), ++ z0 = svdivr_z (p0, z1, 2)) ++ ++/* ++** divr_u32_x_tied1: ++** udivr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (divr_u32_x_tied1, svuint32_t, ++ z0 = svdivr_u32_x (p0, z0, z1), ++ z0 = svdivr_x (p0, z0, z1)) ++ ++/* ++** divr_u32_x_tied2: ++** udiv z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (divr_u32_x_tied2, svuint32_t, ++ z0 = svdivr_u32_x (p0, z1, z0), ++ z0 = svdivr_x (p0, z1, z0)) ++ ++/* ++** divr_u32_x_untied: ++** ( ++** movprfx z0, z1 ++** udivr z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0, z2 ++** udiv z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (divr_u32_x_untied, svuint32_t, ++ z0 = svdivr_u32_x (p0, z1, z2), ++ z0 = svdivr_x (p0, z1, z2)) ++ ++/* ++** divr_w0_u32_x_tied1: ++** mov (z[0-9]+\.s), w0 ++** udivr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (divr_w0_u32_x_tied1, svuint32_t, uint32_t, ++ z0 = svdivr_n_u32_x (p0, z0, x0), ++ z0 = svdivr_x (p0, z0, x0)) ++ ++/* ++** divr_w0_u32_x_untied: ++** mov z0\.s, w0 ++** udiv z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZX (divr_w0_u32_x_untied, svuint32_t, uint32_t, ++ z0 = svdivr_n_u32_x (p0, z1, x0), ++ z0 = svdivr_x (p0, z1, x0)) ++ ++/* ++** divr_2_u32_x_tied1: ++** mov (z[0-9]+\.s), #2 ++** udivr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (divr_2_u32_x_tied1, svuint32_t, ++ z0 = svdivr_n_u32_x (p0, z0, 2), ++ z0 = svdivr_x (p0, z0, 2)) ++ ++/* ++** divr_2_u32_x_untied: ++** mov z0\.s, #2 ++** udiv z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (divr_2_u32_x_untied, svuint32_t, ++ z0 = svdivr_n_u32_x (p0, z1, 2), ++ z0 = svdivr_x (p0, z1, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/divr_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/divr_u64.c +new file mode 100644 +index 000000000..03bb62472 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/divr_u64.c +@@ -0,0 +1,247 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** divr_u64_m_tied1: ++** udivr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (divr_u64_m_tied1, svuint64_t, ++ z0 = svdivr_u64_m (p0, z0, z1), ++ z0 = svdivr_m (p0, z0, z1)) ++ ++/* ++** divr_u64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** udivr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (divr_u64_m_tied2, svuint64_t, ++ z0 = svdivr_u64_m (p0, z1, z0), ++ z0 = svdivr_m (p0, z1, z0)) ++ ++/* ++** divr_u64_m_untied: ++** movprfx z0, z1 ++** udivr z0\.d, p0/m, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (divr_u64_m_untied, svuint64_t, ++ z0 = svdivr_u64_m (p0, z1, z2), ++ z0 = svdivr_m (p0, z1, z2)) ++ ++/* ++** divr_x0_u64_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** udivr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (divr_x0_u64_m_tied1, svuint64_t, uint64_t, ++ z0 = svdivr_n_u64_m (p0, z0, x0), ++ z0 = svdivr_m (p0, z0, x0)) ++ ++/* ++** divr_x0_u64_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** udivr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (divr_x0_u64_m_untied, svuint64_t, uint64_t, ++ z0 = svdivr_n_u64_m (p0, z1, x0), ++ z0 = svdivr_m (p0, z1, x0)) ++ ++/* ++** divr_2_u64_m_tied1: ++** mov (z[0-9]+\.d), #2 ++** udivr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (divr_2_u64_m_tied1, svuint64_t, ++ z0 = svdivr_n_u64_m (p0, z0, 2), ++ z0 = svdivr_m (p0, z0, 2)) ++ ++/* ++** divr_2_u64_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), #2 ++** movprfx z0, z1 ++** udivr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (divr_2_u64_m_untied, svuint64_t, ++ z0 = svdivr_n_u64_m (p0, z1, 2), ++ z0 = svdivr_m (p0, z1, 2)) ++ ++/* ++** divr_m1_u64_m: ++** mov (z[0-9]+)\.b, #-1 ++** udivr z0\.d, p0/m, z0\.d, \1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (divr_m1_u64_m, svuint64_t, ++ z0 = svdivr_n_u64_m (p0, z0, -1), ++ z0 = svdivr_m (p0, z0, -1)) ++ ++/* ++** divr_u64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** udivr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (divr_u64_z_tied1, svuint64_t, ++ z0 = svdivr_u64_z (p0, z0, z1), ++ z0 = svdivr_z (p0, z0, z1)) ++ ++/* ++** divr_u64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** udiv z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (divr_u64_z_tied2, svuint64_t, ++ z0 = svdivr_u64_z (p0, z1, z0), ++ z0 = svdivr_z (p0, z1, z0)) ++ ++/* ++** divr_u64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** udivr z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** udiv z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (divr_u64_z_untied, svuint64_t, ++ z0 = svdivr_u64_z (p0, z1, z2), ++ z0 = svdivr_z (p0, z1, z2)) ++ ++/* ++** divr_x0_u64_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.d, p0/z, z0\.d ++** udivr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (divr_x0_u64_z_tied1, svuint64_t, uint64_t, ++ z0 = svdivr_n_u64_z (p0, z0, x0), ++ z0 = svdivr_z (p0, z0, x0)) ++ ++/* ++** divr_x0_u64_z_untied: ++** mov (z[0-9]+\.d), x0 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** udivr z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** udiv z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (divr_x0_u64_z_untied, svuint64_t, uint64_t, ++ z0 = svdivr_n_u64_z (p0, z1, x0), ++ z0 = svdivr_z (p0, z1, x0)) ++ ++/* ++** divr_2_u64_z_tied1: ++** mov (z[0-9]+\.d), #2 ++** movprfx z0\.d, p0/z, z0\.d ++** udivr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (divr_2_u64_z_tied1, svuint64_t, ++ z0 = svdivr_n_u64_z (p0, z0, 2), ++ z0 = svdivr_z (p0, z0, 2)) ++ ++/* ++** divr_2_u64_z_untied: ++** mov (z[0-9]+\.d), #2 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** udivr z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** udiv z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (divr_2_u64_z_untied, svuint64_t, ++ z0 = svdivr_n_u64_z (p0, z1, 2), ++ z0 = svdivr_z (p0, z1, 2)) ++ ++/* ++** divr_u64_x_tied1: ++** udivr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (divr_u64_x_tied1, svuint64_t, ++ z0 = svdivr_u64_x (p0, z0, z1), ++ z0 = svdivr_x (p0, z0, z1)) ++ ++/* ++** divr_u64_x_tied2: ++** udiv z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (divr_u64_x_tied2, svuint64_t, ++ z0 = svdivr_u64_x (p0, z1, z0), ++ z0 = svdivr_x (p0, z1, z0)) ++ ++/* ++** divr_u64_x_untied: ++** ( ++** movprfx z0, z1 ++** udivr z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0, z2 ++** udiv z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (divr_u64_x_untied, svuint64_t, ++ z0 = svdivr_u64_x (p0, z1, z2), ++ z0 = svdivr_x (p0, z1, z2)) ++ ++/* ++** divr_x0_u64_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** udivr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (divr_x0_u64_x_tied1, svuint64_t, uint64_t, ++ z0 = svdivr_n_u64_x (p0, z0, x0), ++ z0 = svdivr_x (p0, z0, x0)) ++ ++/* ++** divr_x0_u64_x_untied: ++** mov z0\.d, x0 ++** udiv z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZX (divr_x0_u64_x_untied, svuint64_t, uint64_t, ++ z0 = svdivr_n_u64_x (p0, z1, x0), ++ z0 = svdivr_x (p0, z1, x0)) ++ ++/* ++** divr_2_u64_x_tied1: ++** mov (z[0-9]+\.d), #2 ++** udivr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (divr_2_u64_x_tied1, svuint64_t, ++ z0 = svdivr_n_u64_x (p0, z0, 2), ++ z0 = svdivr_x (p0, z0, 2)) ++ ++/* ++** divr_2_u64_x_untied: ++** mov z0\.d, #2 ++** udiv z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (divr_2_u64_x_untied, svuint64_t, ++ z0 = svdivr_n_u64_x (p0, z1, 2), ++ z0 = svdivr_x (p0, z1, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dot_lane_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dot_lane_s32.c +new file mode 100644 +index 000000000..a4d713e29 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dot_lane_s32.c +@@ -0,0 +1,93 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dot_lane_0_s32_tied1: ++** sdot z0\.s, z4\.b, z5\.b\[0\] ++** ret ++*/ ++TEST_DUAL_Z (dot_lane_0_s32_tied1, svint32_t, svint8_t, ++ z0 = svdot_lane_s32 (z0, z4, z5, 0), ++ z0 = svdot_lane (z0, z4, z5, 0)) ++ ++/* ++** dot_lane_0_s32_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** sdot z0\.s, \1\.b, z1\.b\[0\] ++** ret ++*/ ++TEST_DUAL_Z_REV (dot_lane_0_s32_tied2, svint32_t, svint8_t, ++ z0_res = svdot_lane_s32 (z4, z0, z1, 0), ++ z0_res = svdot_lane (z4, z0, z1, 0)) ++ ++/* ++** dot_lane_0_s32_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** sdot z0\.s, z1\.b, \1\.b\[0\] ++** ret ++*/ ++TEST_DUAL_Z_REV (dot_lane_0_s32_tied3, svint32_t, svint8_t, ++ z0_res = svdot_lane_s32 (z4, z1, z0, 0), ++ z0_res = svdot_lane (z4, z1, z0, 0)) ++ ++/* ++** dot_lane_0_s32_untied: ++** movprfx z0, z1 ++** sdot z0\.s, z4\.b, z5\.b\[0\] ++** ret ++*/ ++TEST_DUAL_Z (dot_lane_0_s32_untied, svint32_t, svint8_t, ++ z0 = svdot_lane_s32 (z1, z4, z5, 0), ++ z0 = svdot_lane (z1, z4, z5, 0)) ++ ++/* ++** dot_lane_1_s32: ++** sdot z0\.s, z4\.b, z5\.b\[1\] ++** ret ++*/ ++TEST_DUAL_Z (dot_lane_1_s32, svint32_t, svint8_t, ++ z0 = svdot_lane_s32 (z0, z4, z5, 1), ++ z0 = svdot_lane (z0, z4, z5, 1)) ++ ++/* ++** dot_lane_2_s32: ++** sdot z0\.s, z4\.b, z5\.b\[2\] ++** ret ++*/ ++TEST_DUAL_Z (dot_lane_2_s32, svint32_t, svint8_t, ++ z0 = svdot_lane_s32 (z0, z4, z5, 2), ++ z0 = svdot_lane (z0, z4, z5, 2)) ++ ++/* ++** dot_lane_3_s32: ++** sdot z0\.s, z4\.b, z5\.b\[3\] ++** ret ++*/ ++TEST_DUAL_Z (dot_lane_3_s32, svint32_t, svint8_t, ++ z0 = svdot_lane_s32 (z0, z4, z5, 3), ++ z0 = svdot_lane (z0, z4, z5, 3)) ++ ++/* ++** dot_lane_z8_s32: ++** str d8, \[sp, -16\]! ++** mov (z[0-7])\.d, z8\.d ++** sdot z0\.s, z1\.b, \1\.b\[1\] ++** ldr d8, \[sp\], 16 ++** ret ++*/ ++TEST_DUAL_LANE_REG (dot_lane_z8_s32, svint32_t, svint8_t, z8, ++ z0 = svdot_lane_s32 (z0, z1, z8, 1), ++ z0 = svdot_lane (z0, z1, z8, 1)) ++ ++/* ++** dot_lane_z16_s32: ++** mov (z[0-7])\.d, z16\.d ++** sdot z0\.s, z1\.b, \1\.b\[1\] ++** ret ++*/ ++TEST_DUAL_LANE_REG (dot_lane_z16_s32, svint32_t, svint8_t, z16, ++ z0 = svdot_lane_s32 (z0, z1, z16, 1), ++ z0 = svdot_lane (z0, z1, z16, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dot_lane_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dot_lane_s64.c +new file mode 100644 +index 000000000..daee74091 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dot_lane_s64.c +@@ -0,0 +1,74 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dot_lane_0_s64_tied1: ++** sdot z0\.d, z4\.h, z5\.h\[0\] ++** ret ++*/ ++TEST_DUAL_Z (dot_lane_0_s64_tied1, svint64_t, svint16_t, ++ z0 = svdot_lane_s64 (z0, z4, z5, 0), ++ z0 = svdot_lane (z0, z4, z5, 0)) ++ ++/* ++** dot_lane_0_s64_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** sdot z0\.d, \1\.h, z1\.h\[0\] ++** ret ++*/ ++TEST_DUAL_Z_REV (dot_lane_0_s64_tied2, svint64_t, svint16_t, ++ z0_res = svdot_lane_s64 (z4, z0, z1, 0), ++ z0_res = svdot_lane (z4, z0, z1, 0)) ++ ++/* ++** dot_lane_0_s64_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** sdot z0\.d, z1\.h, \1\.h\[0\] ++** ret ++*/ ++TEST_DUAL_Z_REV (dot_lane_0_s64_tied3, svint64_t, svint16_t, ++ z0_res = svdot_lane_s64 (z4, z1, z0, 0), ++ z0_res = svdot_lane (z4, z1, z0, 0)) ++ ++/* ++** dot_lane_0_s64_untied: ++** movprfx z0, z1 ++** sdot z0\.d, z4\.h, z5\.h\[0\] ++** ret ++*/ ++TEST_DUAL_Z (dot_lane_0_s64_untied, svint64_t, svint16_t, ++ z0 = svdot_lane_s64 (z1, z4, z5, 0), ++ z0 = svdot_lane (z1, z4, z5, 0)) ++ ++/* ++** dot_lane_1_s64: ++** sdot z0\.d, z4\.h, z5\.h\[1\] ++** ret ++*/ ++TEST_DUAL_Z (dot_lane_1_s64, svint64_t, svint16_t, ++ z0 = svdot_lane_s64 (z0, z4, z5, 1), ++ z0 = svdot_lane (z0, z4, z5, 1)) ++ ++/* ++** dot_lane_z15_s64: ++** str d15, \[sp, -16\]! ++** sdot z0\.d, z1\.h, z15\.h\[1\] ++** ldr d15, \[sp\], 16 ++** ret ++*/ ++TEST_DUAL_LANE_REG (dot_lane_z15_s64, svint64_t, svint16_t, z15, ++ z0 = svdot_lane_s64 (z0, z1, z15, 1), ++ z0 = svdot_lane (z0, z1, z15, 1)) ++ ++/* ++** dot_lane_z16_s64: ++** mov (z[0-9]|z1[0-5])\.d, z16\.d ++** sdot z0\.d, z1\.h, \1\.h\[1\] ++** ret ++*/ ++TEST_DUAL_LANE_REG (dot_lane_z16_s64, svint64_t, svint16_t, z16, ++ z0 = svdot_lane_s64 (z0, z1, z16, 1), ++ z0 = svdot_lane (z0, z1, z16, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dot_lane_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dot_lane_u32.c +new file mode 100644 +index 000000000..6d69df76d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dot_lane_u32.c +@@ -0,0 +1,93 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dot_lane_0_u32_tied1: ++** udot z0\.s, z4\.b, z5\.b\[0\] ++** ret ++*/ ++TEST_DUAL_Z (dot_lane_0_u32_tied1, svuint32_t, svuint8_t, ++ z0 = svdot_lane_u32 (z0, z4, z5, 0), ++ z0 = svdot_lane (z0, z4, z5, 0)) ++ ++/* ++** dot_lane_0_u32_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** udot z0\.s, \1\.b, z1\.b\[0\] ++** ret ++*/ ++TEST_DUAL_Z_REV (dot_lane_0_u32_tied2, svuint32_t, svuint8_t, ++ z0_res = svdot_lane_u32 (z4, z0, z1, 0), ++ z0_res = svdot_lane (z4, z0, z1, 0)) ++ ++/* ++** dot_lane_0_u32_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** udot z0\.s, z1\.b, \1\.b\[0\] ++** ret ++*/ ++TEST_DUAL_Z_REV (dot_lane_0_u32_tied3, svuint32_t, svuint8_t, ++ z0_res = svdot_lane_u32 (z4, z1, z0, 0), ++ z0_res = svdot_lane (z4, z1, z0, 0)) ++ ++/* ++** dot_lane_0_u32_untied: ++** movprfx z0, z1 ++** udot z0\.s, z4\.b, z5\.b\[0\] ++** ret ++*/ ++TEST_DUAL_Z (dot_lane_0_u32_untied, svuint32_t, svuint8_t, ++ z0 = svdot_lane_u32 (z1, z4, z5, 0), ++ z0 = svdot_lane (z1, z4, z5, 0)) ++ ++/* ++** dot_lane_1_u32: ++** udot z0\.s, z4\.b, z5\.b\[1\] ++** ret ++*/ ++TEST_DUAL_Z (dot_lane_1_u32, svuint32_t, svuint8_t, ++ z0 = svdot_lane_u32 (z0, z4, z5, 1), ++ z0 = svdot_lane (z0, z4, z5, 1)) ++ ++/* ++** dot_lane_2_u32: ++** udot z0\.s, z4\.b, z5\.b\[2\] ++** ret ++*/ ++TEST_DUAL_Z (dot_lane_2_u32, svuint32_t, svuint8_t, ++ z0 = svdot_lane_u32 (z0, z4, z5, 2), ++ z0 = svdot_lane (z0, z4, z5, 2)) ++ ++/* ++** dot_lane_3_u32: ++** udot z0\.s, z4\.b, z5\.b\[3\] ++** ret ++*/ ++TEST_DUAL_Z (dot_lane_3_u32, svuint32_t, svuint8_t, ++ z0 = svdot_lane_u32 (z0, z4, z5, 3), ++ z0 = svdot_lane (z0, z4, z5, 3)) ++ ++/* ++** dot_lane_z8_u32: ++** str d8, \[sp, -16\]! ++** mov (z[0-7])\.d, z8\.d ++** udot z0\.s, z1\.b, \1\.b\[1\] ++** ldr d8, \[sp\], 16 ++** ret ++*/ ++TEST_DUAL_LANE_REG (dot_lane_z8_u32, svuint32_t, svuint8_t, z8, ++ z0 = svdot_lane_u32 (z0, z1, z8, 1), ++ z0 = svdot_lane (z0, z1, z8, 1)) ++ ++/* ++** dot_lane_z16_u32: ++** mov (z[0-7])\.d, z16\.d ++** udot z0\.s, z1\.b, \1\.b\[1\] ++** ret ++*/ ++TEST_DUAL_LANE_REG (dot_lane_z16_u32, svuint32_t, svuint8_t, z16, ++ z0 = svdot_lane_u32 (z0, z1, z16, 1), ++ z0 = svdot_lane (z0, z1, z16, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dot_lane_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dot_lane_u64.c +new file mode 100644 +index 000000000..242e21c78 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dot_lane_u64.c +@@ -0,0 +1,74 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dot_lane_0_u64_tied1: ++** udot z0\.d, z4\.h, z5\.h\[0\] ++** ret ++*/ ++TEST_DUAL_Z (dot_lane_0_u64_tied1, svuint64_t, svuint16_t, ++ z0 = svdot_lane_u64 (z0, z4, z5, 0), ++ z0 = svdot_lane (z0, z4, z5, 0)) ++ ++/* ++** dot_lane_0_u64_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** udot z0\.d, \1\.h, z1\.h\[0\] ++** ret ++*/ ++TEST_DUAL_Z_REV (dot_lane_0_u64_tied2, svuint64_t, svuint16_t, ++ z0_res = svdot_lane_u64 (z4, z0, z1, 0), ++ z0_res = svdot_lane (z4, z0, z1, 0)) ++ ++/* ++** dot_lane_0_u64_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** udot z0\.d, z1\.h, \1\.h\[0\] ++** ret ++*/ ++TEST_DUAL_Z_REV (dot_lane_0_u64_tied3, svuint64_t, svuint16_t, ++ z0_res = svdot_lane_u64 (z4, z1, z0, 0), ++ z0_res = svdot_lane (z4, z1, z0, 0)) ++ ++/* ++** dot_lane_0_u64_untied: ++** movprfx z0, z1 ++** udot z0\.d, z4\.h, z5\.h\[0\] ++** ret ++*/ ++TEST_DUAL_Z (dot_lane_0_u64_untied, svuint64_t, svuint16_t, ++ z0 = svdot_lane_u64 (z1, z4, z5, 0), ++ z0 = svdot_lane (z1, z4, z5, 0)) ++ ++/* ++** dot_lane_1_u64: ++** udot z0\.d, z4\.h, z5\.h\[1\] ++** ret ++*/ ++TEST_DUAL_Z (dot_lane_1_u64, svuint64_t, svuint16_t, ++ z0 = svdot_lane_u64 (z0, z4, z5, 1), ++ z0 = svdot_lane (z0, z4, z5, 1)) ++ ++/* ++** dot_lane_z15_u64: ++** str d15, \[sp, -16\]! ++** udot z0\.d, z1\.h, z15\.h\[1\] ++** ldr d15, \[sp\], 16 ++** ret ++*/ ++TEST_DUAL_LANE_REG (dot_lane_z15_u64, svuint64_t, svuint16_t, z15, ++ z0 = svdot_lane_u64 (z0, z1, z15, 1), ++ z0 = svdot_lane (z0, z1, z15, 1)) ++ ++/* ++** dot_lane_z16_u64: ++** mov (z[0-9]|z1[0-5])\.d, z16\.d ++** udot z0\.d, z1\.h, \1\.h\[1\] ++** ret ++*/ ++TEST_DUAL_LANE_REG (dot_lane_z16_u64, svuint64_t, svuint16_t, z16, ++ z0 = svdot_lane_u64 (z0, z1, z16, 1), ++ z0 = svdot_lane (z0, z1, z16, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dot_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dot_s32.c +new file mode 100644 +index 000000000..605bd1b30 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dot_s32.c +@@ -0,0 +1,86 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dot_s32_tied1: ++** sdot z0\.s, z4\.b, z5\.b ++** ret ++*/ ++TEST_DUAL_Z (dot_s32_tied1, svint32_t, svint8_t, ++ z0 = svdot_s32 (z0, z4, z5), ++ z0 = svdot (z0, z4, z5)) ++ ++/* ++** dot_s32_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** sdot z0\.s, \1\.b, z1\.b ++** ret ++*/ ++TEST_DUAL_Z_REV (dot_s32_tied2, svint32_t, svint8_t, ++ z0_res = svdot_s32 (z4, z0, z1), ++ z0_res = svdot (z4, z0, z1)) ++ ++/* ++** dot_s32_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** sdot z0\.s, z1\.b, \1\.b ++** ret ++*/ ++TEST_DUAL_Z_REV (dot_s32_tied3, svint32_t, svint8_t, ++ z0_res = svdot_s32 (z4, z1, z0), ++ z0_res = svdot (z4, z1, z0)) ++ ++/* ++** dot_s32_untied: ++** movprfx z0, z1 ++** sdot z0\.s, z4\.b, z5\.b ++** ret ++*/ ++TEST_DUAL_Z (dot_s32_untied, svint32_t, svint8_t, ++ z0 = svdot_s32 (z1, z4, z5), ++ z0 = svdot (z1, z4, z5)) ++ ++/* ++** dot_w0_s32_tied1: ++** mov (z[0-9]+\.b), w0 ++** sdot z0\.s, z4\.b, \1 ++** ret ++*/ ++TEST_DUAL_ZX (dot_w0_s32_tied1, svint32_t, svint8_t, int8_t, ++ z0 = svdot_n_s32 (z0, z4, x0), ++ z0 = svdot (z0, z4, x0)) ++ ++/* ++** dot_w0_s32_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), w0 ++** movprfx z0, z1 ++** sdot z0\.s, z4\.b, \1 ++** ret ++*/ ++TEST_DUAL_ZX (dot_w0_s32_untied, svint32_t, svint8_t, int8_t, ++ z0 = svdot_n_s32 (z1, z4, x0), ++ z0 = svdot (z1, z4, x0)) ++ ++/* ++** dot_9_s32_tied1: ++** mov (z[0-9]+\.b), #9 ++** sdot z0\.s, z4\.b, \1 ++** ret ++*/ ++TEST_DUAL_Z (dot_9_s32_tied1, svint32_t, svint8_t, ++ z0 = svdot_n_s32 (z0, z4, 9), ++ z0 = svdot (z0, z4, 9)) ++ ++/* ++** dot_9_s32_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), #9 ++** movprfx z0, z1 ++** sdot z0\.s, z4\.b, \1 ++** ret ++*/ ++TEST_DUAL_Z (dot_9_s32_untied, svint32_t, svint8_t, ++ z0 = svdot_n_s32 (z1, z4, 9), ++ z0 = svdot (z1, z4, 9)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dot_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dot_s64.c +new file mode 100644 +index 000000000..b6574740b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dot_s64.c +@@ -0,0 +1,86 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dot_s64_tied1: ++** sdot z0\.d, z4\.h, z5\.h ++** ret ++*/ ++TEST_DUAL_Z (dot_s64_tied1, svint64_t, svint16_t, ++ z0 = svdot_s64 (z0, z4, z5), ++ z0 = svdot (z0, z4, z5)) ++ ++/* ++** dot_s64_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** sdot z0\.d, \1\.h, z1\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (dot_s64_tied2, svint64_t, svint16_t, ++ z0_res = svdot_s64 (z4, z0, z1), ++ z0_res = svdot (z4, z0, z1)) ++ ++/* ++** dot_s64_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** sdot z0\.d, z1\.h, \1\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (dot_s64_tied3, svint64_t, svint16_t, ++ z0_res = svdot_s64 (z4, z1, z0), ++ z0_res = svdot (z4, z1, z0)) ++ ++/* ++** dot_s64_untied: ++** movprfx z0, z1 ++** sdot z0\.d, z4\.h, z5\.h ++** ret ++*/ ++TEST_DUAL_Z (dot_s64_untied, svint64_t, svint16_t, ++ z0 = svdot_s64 (z1, z4, z5), ++ z0 = svdot (z1, z4, z5)) ++ ++/* ++** dot_w0_s64_tied1: ++** mov (z[0-9]+\.h), w0 ++** sdot z0\.d, z4\.h, \1 ++** ret ++*/ ++TEST_DUAL_ZX (dot_w0_s64_tied1, svint64_t, svint16_t, int16_t, ++ z0 = svdot_n_s64 (z0, z4, x0), ++ z0 = svdot (z0, z4, x0)) ++ ++/* ++** dot_w0_s64_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), w0 ++** movprfx z0, z1 ++** sdot z0\.d, z4\.h, \1 ++** ret ++*/ ++TEST_DUAL_ZX (dot_w0_s64_untied, svint64_t, svint16_t, int16_t, ++ z0 = svdot_n_s64 (z1, z4, x0), ++ z0 = svdot (z1, z4, x0)) ++ ++/* ++** dot_9_s64_tied1: ++** mov (z[0-9]+\.h), #9 ++** sdot z0\.d, z4\.h, \1 ++** ret ++*/ ++TEST_DUAL_Z (dot_9_s64_tied1, svint64_t, svint16_t, ++ z0 = svdot_n_s64 (z0, z4, 9), ++ z0 = svdot (z0, z4, 9)) ++ ++/* ++** dot_9_s64_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), #9 ++** movprfx z0, z1 ++** sdot z0\.d, z4\.h, \1 ++** ret ++*/ ++TEST_DUAL_Z (dot_9_s64_untied, svint64_t, svint16_t, ++ z0 = svdot_n_s64 (z1, z4, 9), ++ z0 = svdot (z1, z4, 9)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dot_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dot_u32.c +new file mode 100644 +index 000000000..541e71cc2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dot_u32.c +@@ -0,0 +1,86 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dot_u32_tied1: ++** udot z0\.s, z4\.b, z5\.b ++** ret ++*/ ++TEST_DUAL_Z (dot_u32_tied1, svuint32_t, svuint8_t, ++ z0 = svdot_u32 (z0, z4, z5), ++ z0 = svdot (z0, z4, z5)) ++ ++/* ++** dot_u32_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** udot z0\.s, \1\.b, z1\.b ++** ret ++*/ ++TEST_DUAL_Z_REV (dot_u32_tied2, svuint32_t, svuint8_t, ++ z0_res = svdot_u32 (z4, z0, z1), ++ z0_res = svdot (z4, z0, z1)) ++ ++/* ++** dot_u32_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** udot z0\.s, z1\.b, \1\.b ++** ret ++*/ ++TEST_DUAL_Z_REV (dot_u32_tied3, svuint32_t, svuint8_t, ++ z0_res = svdot_u32 (z4, z1, z0), ++ z0_res = svdot (z4, z1, z0)) ++ ++/* ++** dot_u32_untied: ++** movprfx z0, z1 ++** udot z0\.s, z4\.b, z5\.b ++** ret ++*/ ++TEST_DUAL_Z (dot_u32_untied, svuint32_t, svuint8_t, ++ z0 = svdot_u32 (z1, z4, z5), ++ z0 = svdot (z1, z4, z5)) ++ ++/* ++** dot_w0_u32_tied1: ++** mov (z[0-9]+\.b), w0 ++** udot z0\.s, z4\.b, \1 ++** ret ++*/ ++TEST_DUAL_ZX (dot_w0_u32_tied1, svuint32_t, svuint8_t, uint8_t, ++ z0 = svdot_n_u32 (z0, z4, x0), ++ z0 = svdot (z0, z4, x0)) ++ ++/* ++** dot_w0_u32_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), w0 ++** movprfx z0, z1 ++** udot z0\.s, z4\.b, \1 ++** ret ++*/ ++TEST_DUAL_ZX (dot_w0_u32_untied, svuint32_t, svuint8_t, uint8_t, ++ z0 = svdot_n_u32 (z1, z4, x0), ++ z0 = svdot (z1, z4, x0)) ++ ++/* ++** dot_9_u32_tied1: ++** mov (z[0-9]+\.b), #9 ++** udot z0\.s, z4\.b, \1 ++** ret ++*/ ++TEST_DUAL_Z (dot_9_u32_tied1, svuint32_t, svuint8_t, ++ z0 = svdot_n_u32 (z0, z4, 9), ++ z0 = svdot (z0, z4, 9)) ++ ++/* ++** dot_9_u32_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), #9 ++** movprfx z0, z1 ++** udot z0\.s, z4\.b, \1 ++** ret ++*/ ++TEST_DUAL_Z (dot_9_u32_untied, svuint32_t, svuint8_t, ++ z0 = svdot_n_u32 (z1, z4, 9), ++ z0 = svdot (z1, z4, 9)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dot_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dot_u64.c +new file mode 100644 +index 000000000..cc0e85373 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dot_u64.c +@@ -0,0 +1,86 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dot_u64_tied1: ++** udot z0\.d, z4\.h, z5\.h ++** ret ++*/ ++TEST_DUAL_Z (dot_u64_tied1, svuint64_t, svuint16_t, ++ z0 = svdot_u64 (z0, z4, z5), ++ z0 = svdot (z0, z4, z5)) ++ ++/* ++** dot_u64_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** udot z0\.d, \1\.h, z1\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (dot_u64_tied2, svuint64_t, svuint16_t, ++ z0_res = svdot_u64 (z4, z0, z1), ++ z0_res = svdot (z4, z0, z1)) ++ ++/* ++** dot_u64_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** udot z0\.d, z1\.h, \1\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (dot_u64_tied3, svuint64_t, svuint16_t, ++ z0_res = svdot_u64 (z4, z1, z0), ++ z0_res = svdot (z4, z1, z0)) ++ ++/* ++** dot_u64_untied: ++** movprfx z0, z1 ++** udot z0\.d, z4\.h, z5\.h ++** ret ++*/ ++TEST_DUAL_Z (dot_u64_untied, svuint64_t, svuint16_t, ++ z0 = svdot_u64 (z1, z4, z5), ++ z0 = svdot (z1, z4, z5)) ++ ++/* ++** dot_w0_u64_tied1: ++** mov (z[0-9]+\.h), w0 ++** udot z0\.d, z4\.h, \1 ++** ret ++*/ ++TEST_DUAL_ZX (dot_w0_u64_tied1, svuint64_t, svuint16_t, uint16_t, ++ z0 = svdot_n_u64 (z0, z4, x0), ++ z0 = svdot (z0, z4, x0)) ++ ++/* ++** dot_w0_u64_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), w0 ++** movprfx z0, z1 ++** udot z0\.d, z4\.h, \1 ++** ret ++*/ ++TEST_DUAL_ZX (dot_w0_u64_untied, svuint64_t, svuint16_t, uint16_t, ++ z0 = svdot_n_u64 (z1, z4, x0), ++ z0 = svdot (z1, z4, x0)) ++ ++/* ++** dot_9_u64_tied1: ++** mov (z[0-9]+\.h), #9 ++** udot z0\.d, z4\.h, \1 ++** ret ++*/ ++TEST_DUAL_Z (dot_9_u64_tied1, svuint64_t, svuint16_t, ++ z0 = svdot_n_u64 (z0, z4, 9), ++ z0 = svdot (z0, z4, 9)) ++ ++/* ++** dot_9_u64_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), #9 ++** movprfx z0, z1 ++** udot z0\.d, z4\.h, \1 ++** ret ++*/ ++TEST_DUAL_Z (dot_9_u64_untied, svuint64_t, svuint16_t, ++ z0 = svdot_n_u64 (z1, z4, 9), ++ z0 = svdot (z1, z4, 9)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_b16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_b16.c +new file mode 100644 +index 000000000..785832ab3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_b16.c +@@ -0,0 +1,32 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include ++#include "test_sve_acle.h" ++ ++/* ++** dup_false_b16: ++** pfalse p0\.b ++** ret ++*/ ++TEST_UNIFORM_P (dup_false_b16, ++ p0 = svdup_n_b16 (false), ++ p0 = svdup_b16 (false)) ++ ++/* ++** dup_true_b16: ++** ptrue p0\.h, all ++** ret ++*/ ++TEST_UNIFORM_P (dup_true_b16, ++ p0 = svdup_n_b16 (true), ++ p0 = svdup_b16 (true)) ++ ++/* ++** dup_w0_b16: ++** lsl (x[0-9]+), x0, 63 ++** whilelo p0\.h, xzr, \1 ++** ret ++*/ ++TEST_UNIFORM_PS (dup_w0_b16, ++ p0 = svdup_n_b16 (x0), ++ p0 = svdup_b16 (x0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_b32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_b32.c +new file mode 100644 +index 000000000..6e9d91eaf +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_b32.c +@@ -0,0 +1,32 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include ++#include "test_sve_acle.h" ++ ++/* ++** dup_false_b32: ++** pfalse p0\.b ++** ret ++*/ ++TEST_UNIFORM_P (dup_false_b32, ++ p0 = svdup_n_b32 (false), ++ p0 = svdup_b32 (false)) ++ ++/* ++** dup_true_b32: ++** ptrue p0\.s, all ++** ret ++*/ ++TEST_UNIFORM_P (dup_true_b32, ++ p0 = svdup_n_b32 (true), ++ p0 = svdup_b32 (true)) ++ ++/* ++** dup_w0_b32: ++** lsl (x[0-9]+), x0, 63 ++** whilelo p0\.s, xzr, \1 ++** ret ++*/ ++TEST_UNIFORM_PS (dup_w0_b32, ++ p0 = svdup_n_b32 (x0), ++ p0 = svdup_b32 (x0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_b64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_b64.c +new file mode 100644 +index 000000000..ed69896c4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_b64.c +@@ -0,0 +1,32 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include ++#include "test_sve_acle.h" ++ ++/* ++** dup_false_b64: ++** pfalse p0\.b ++** ret ++*/ ++TEST_UNIFORM_P (dup_false_b64, ++ p0 = svdup_n_b64 (false), ++ p0 = svdup_b64 (false)) ++ ++/* ++** dup_true_b64: ++** ptrue p0\.d, all ++** ret ++*/ ++TEST_UNIFORM_P (dup_true_b64, ++ p0 = svdup_n_b64 (true), ++ p0 = svdup_b64 (true)) ++ ++/* ++** dup_w0_b64: ++** lsl (x[0-9]+), x0, 63 ++** whilelo p0\.d, xzr, \1 ++** ret ++*/ ++TEST_UNIFORM_PS (dup_w0_b64, ++ p0 = svdup_n_b64 (x0), ++ p0 = svdup_b64 (x0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_b8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_b8.c +new file mode 100644 +index 000000000..a99ab552a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_b8.c +@@ -0,0 +1,32 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include ++#include "test_sve_acle.h" ++ ++/* ++** dup_false_b8: ++** pfalse p0\.b ++** ret ++*/ ++TEST_UNIFORM_P (dup_false_b8, ++ p0 = svdup_n_b8 (false), ++ p0 = svdup_b8 (false)) ++ ++/* ++** dup_true_b8: ++** ptrue p0\.b, all ++** ret ++*/ ++TEST_UNIFORM_P (dup_true_b8, ++ p0 = svdup_n_b8 (true), ++ p0 = svdup_b8 (true)) ++ ++/* ++** dup_w0_b8: ++** lsl (x[0-9]+), x0, 63 ++** whilelo p0\.b, xzr, \1 ++** ret ++*/ ++TEST_UNIFORM_PS (dup_w0_b8, ++ p0 = svdup_n_b8 (x0), ++ p0 = svdup_b8 (x0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_bf16.c +new file mode 100644 +index 000000000..db47d849c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_bf16.c +@@ -0,0 +1,41 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dup_h4_bf16: ++** mov z0\.h, h4 ++** ret ++*/ ++TEST_UNIFORM_ZD (dup_h4_bf16, svbfloat16_t, __bf16, ++ z0 = svdup_n_bf16 (d4), ++ z0 = svdup_bf16 (d4)) ++ ++/* ++** dup_h4_bf16_m: ++** movprfx z0, z1 ++** mov z0\.h, p0/m, h4 ++** ret ++*/ ++TEST_UNIFORM_ZD (dup_h4_bf16_m, svbfloat16_t, __bf16, ++ z0 = svdup_n_bf16_m (z1, p0, d4), ++ z0 = svdup_bf16_m (z1, p0, d4)) ++ ++/* ++** dup_h4_bf16_z: ++** movprfx z0\.h, p0/z, z0\.h ++** mov z0\.h, p0/m, h4 ++** ret ++*/ ++TEST_UNIFORM_ZD (dup_h4_bf16_z, svbfloat16_t, __bf16, ++ z0 = svdup_n_bf16_z (p0, d4), ++ z0 = svdup_bf16_z (p0, d4)) ++ ++/* ++** dup_h4_bf16_x: ++** mov z0\.h, h4 ++** ret ++*/ ++TEST_UNIFORM_ZD (dup_h4_bf16_x, svbfloat16_t, __bf16, ++ z0 = svdup_n_bf16_x (p0, d4), ++ z0 = svdup_bf16_x (p0, d4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_f16.c +new file mode 100644 +index 000000000..2d48b9a3d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_f16.c +@@ -0,0 +1,215 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dup_1_f16: ++** fmov z0\.h, #1\.0(?:e\+0)? ++** ret ++*/ ++TEST_UNIFORM_Z (dup_1_f16, svfloat16_t, ++ z0 = svdup_n_f16 (1), ++ z0 = svdup_f16 (1)) ++ ++/* ++** dup_0_f16: ++** mov z0\.h, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_0_f16, svfloat16_t, ++ z0 = svdup_n_f16 (0), ++ z0 = svdup_f16 (0)) ++ ++/* ++** dup_8_f16: ++** fmov z0\.h, #8\.0(?:e\+0)? ++** ret ++*/ ++TEST_UNIFORM_Z (dup_8_f16, svfloat16_t, ++ z0 = svdup_n_f16 (8), ++ z0 = svdup_f16 (8)) ++ ++/* ++** dup_512_f16: ++** mov z0\.h, #24576 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_512_f16, svfloat16_t, ++ z0 = svdup_n_f16 (512), ++ z0 = svdup_f16 (512)) ++ ++/* ++** dup_513_f16: ++** mov (w[0-7]+), 24578 ++** mov z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_513_f16, svfloat16_t, ++ z0 = svdup_n_f16 (513), ++ z0 = svdup_f16 (513)) ++ ++/* ++** dup_h4_f16: ++** mov z0\.h, h4 ++** ret ++*/ ++TEST_UNIFORM_ZD (dup_h4_f16, svfloat16_t, __fp16, ++ z0 = svdup_n_f16 (d4), ++ z0 = svdup_f16 (d4)) ++ ++/* ++** dup_1_f16_m: ++** mov z0\.h, p0/m, #15360 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_1_f16_m, svfloat16_t, ++ z0 = svdup_n_f16_m (z0, p0, 1), ++ z0 = svdup_f16_m (z0, p0, 1)) ++ ++/* ++** dup_0_f16_m: ++** mov z0\.h, p0/m, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_0_f16_m, svfloat16_t, ++ z0 = svdup_n_f16_m (z0, p0, 0), ++ z0 = svdup_f16_m (z0, p0, 0)) ++ ++/* ++** dup_8_f16_m: ++** mov z0\.h, p0/m, #18432 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_8_f16_m, svfloat16_t, ++ z0 = svdup_n_f16_m (z0, p0, 8), ++ z0 = svdup_f16_m (z0, p0, 8)) ++ ++/* ++** dup_512_f16_m: ++** mov z0\.h, p0/m, #24576 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_512_f16_m, svfloat16_t, ++ z0 = svdup_n_f16_m (z0, p0, 512), ++ z0 = svdup_f16_m (z0, p0, 512)) ++ ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_513_f16_m, svfloat16_t, ++ z0 = svdup_n_f16_m (z0, p0, 513), ++ z0 = svdup_f16_m (z0, p0, 513)) ++ ++/* ++** dup_h4_f16_m: ++** movprfx z0, z1 ++** mov z0\.h, p0/m, h4 ++** ret ++*/ ++TEST_UNIFORM_ZD (dup_h4_f16_m, svfloat16_t, __fp16, ++ z0 = svdup_n_f16_m (z1, p0, d4), ++ z0 = svdup_f16_m (z1, p0, d4)) ++ ++/* ++** dup_1_f16_z: ++** mov z0\.h, p0/z, #15360 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_1_f16_z, svfloat16_t, ++ z0 = svdup_n_f16_z (p0, 1), ++ z0 = svdup_f16_z (p0, 1)) ++ ++/* ++** dup_0_f16_z: ++** mov z0\.h, p0/z, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_0_f16_z, svfloat16_t, ++ z0 = svdup_n_f16_z (p0, 0), ++ z0 = svdup_f16_z (p0, 0)) ++ ++/* ++** dup_8_f16_z: ++** mov z0\.h, p0/z, #18432 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_8_f16_z, svfloat16_t, ++ z0 = svdup_n_f16_z (p0, 8), ++ z0 = svdup_f16_z (p0, 8)) ++ ++/* ++** dup_512_f16_z: ++** mov z0\.h, p0/z, #24576 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_512_f16_z, svfloat16_t, ++ z0 = svdup_n_f16_z (p0, 512), ++ z0 = svdup_f16_z (p0, 512)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_513_f16_z, svfloat16_t, ++ z0 = svdup_n_f16_z (p0, 513), ++ z0 = svdup_f16_z (p0, 513)) ++/* ++** dup_h4_f16_z: ++** movprfx z0\.h, p0/z, z0\.h ++** mov z0\.h, p0/m, h4 ++** ret ++*/ ++TEST_UNIFORM_ZD (dup_h4_f16_z, svfloat16_t, __fp16, ++ z0 = svdup_n_f16_z (p0, d4), ++ z0 = svdup_f16_z (p0, d4)) ++ ++/* ++** dup_1_f16_x: ++** fmov z0\.h, #1\.0(?:e\+0)? ++** ret ++*/ ++TEST_UNIFORM_Z (dup_1_f16_x, svfloat16_t, ++ z0 = svdup_n_f16_x (p0, 1), ++ z0 = svdup_f16_x (p0, 1)) ++ ++/* ++** dup_0_f16_x: ++** mov z0\.h, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_0_f16_x, svfloat16_t, ++ z0 = svdup_n_f16_x (p0, 0), ++ z0 = svdup_f16_x (p0, 0)) ++ ++/* ++** dup_8_f16_x: ++** fmov z0\.h, #8\.0(?:e\+0)? ++** ret ++*/ ++TEST_UNIFORM_Z (dup_8_f16_x, svfloat16_t, ++ z0 = svdup_n_f16_x (p0, 8), ++ z0 = svdup_f16_x (p0, 8)) ++ ++/* ++** dup_512_f16_x: ++** mov z0\.h, #24576 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_512_f16_x, svfloat16_t, ++ z0 = svdup_n_f16_x (p0, 512), ++ z0 = svdup_f16_x (p0, 512)) ++ ++/* ++** dup_513_f16_x: ++** mov (w[0-7]+), 24578 ++** mov z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_513_f16_x, svfloat16_t, ++ z0 = svdup_n_f16_x (p0, 513), ++ z0 = svdup_f16_x (p0, 513)) ++ ++/* ++** dup_h4_f16_x: ++** mov z0\.h, h4 ++** ret ++*/ ++TEST_UNIFORM_ZD (dup_h4_f16_x, svfloat16_t, __fp16, ++ z0 = svdup_n_f16_x (p0, d4), ++ z0 = svdup_f16_x (p0, d4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_f32.c +new file mode 100644 +index 000000000..f997b7a7d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_f32.c +@@ -0,0 +1,212 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dup_1_f32: ++** fmov z0\.s, #1\.0(?:e\+0)? ++** ret ++*/ ++TEST_UNIFORM_Z (dup_1_f32, svfloat32_t, ++ z0 = svdup_n_f32 (1), ++ z0 = svdup_f32 (1)) ++ ++/* ++** dup_0_f32: ++** mov z0\.s, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_0_f32, svfloat32_t, ++ z0 = svdup_n_f32 (0), ++ z0 = svdup_f32 (0)) ++ ++/* ++** dup_8_f32: ++** fmov z0\.s, #8\.0(?:e\+0)? ++** ret ++*/ ++TEST_UNIFORM_Z (dup_8_f32, svfloat32_t, ++ z0 = svdup_n_f32 (8), ++ z0 = svdup_f32 (8)) ++ ++/* ++** dup_512_f32: ++** movi v([0-9]+).4s, 0x44, lsl 24 ++** dup z0\.q, z0\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_512_f32, svfloat32_t, ++ z0 = svdup_n_f32 (512), ++ z0 = svdup_f32 (512)) ++ ++/* ++** dup_513_f32: ++** ... ++** ld1rw z0\.s, p[0-7]/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_513_f32, svfloat32_t, ++ z0 = svdup_n_f32 (513), ++ z0 = svdup_f32 (513)) ++ ++/* ++** dup_s4_f32: ++** mov z0\.s, s4 ++** ret ++*/ ++TEST_UNIFORM_ZD (dup_s4_f32, svfloat32_t, float, ++ z0 = svdup_n_f32 (d4), ++ z0 = svdup_f32 (d4)) ++ ++/* ++** dup_1_f32_m: ++** fmov z0\.s, p0/m, #1\.0(?:e\+0)? ++** ret ++*/ ++TEST_UNIFORM_Z (dup_1_f32_m, svfloat32_t, ++ z0 = svdup_n_f32_m (z0, p0, 1), ++ z0 = svdup_f32_m (z0, p0, 1)) ++ ++/* ++** dup_0_f32_m: ++** mov z0\.s, p0/m, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_0_f32_m, svfloat32_t, ++ z0 = svdup_n_f32_m (z0, p0, 0), ++ z0 = svdup_f32_m (z0, p0, 0)) ++ ++/* ++** dup_8_f32_m: ++** fmov z0\.s, p0/m, #8\.0(?:e\+0)? ++** ret ++*/ ++TEST_UNIFORM_Z (dup_8_f32_m, svfloat32_t, ++ z0 = svdup_n_f32_m (z0, p0, 8), ++ z0 = svdup_f32_m (z0, p0, 8)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_512_f32_m, svfloat32_t, ++ z0 = svdup_n_f32_m (z0, p0, 512), ++ z0 = svdup_f32_m (z0, p0, 512)) ++ ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_513_f32_m, svfloat32_t, ++ z0 = svdup_n_f32_m (z0, p0, 513), ++ z0 = svdup_f32_m (z0, p0, 513)) ++ ++/* ++** dup_s4_f32_m: ++** movprfx z0, z1 ++** mov z0\.s, p0/m, s4 ++** ret ++*/ ++TEST_UNIFORM_ZD (dup_s4_f32_m, svfloat32_t, float, ++ z0 = svdup_n_f32_m (z1, p0, d4), ++ z0 = svdup_f32_m (z1, p0, d4)) ++ ++/* ++** dup_1_f32_z: ++** movprfx z0\.s, p0/z, z0\.s ++** fmov z0\.s, p0/m, #1\.0(?:e\+0)? ++** ret ++*/ ++TEST_UNIFORM_Z (dup_1_f32_z, svfloat32_t, ++ z0 = svdup_n_f32_z (p0, 1), ++ z0 = svdup_f32_z (p0, 1)) ++ ++/* ++** dup_0_f32_z: ++** mov z0\.s, p0/z, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_0_f32_z, svfloat32_t, ++ z0 = svdup_n_f32_z (p0, 0), ++ z0 = svdup_f32_z (p0, 0)) ++ ++/* ++** dup_8_f32_z: ++** movprfx z0\.s, p0/z, z0\.s ++** fmov z0\.s, p0/m, #8\.0(?:e\+0)? ++** ret ++*/ ++TEST_UNIFORM_Z (dup_8_f32_z, svfloat32_t, ++ z0 = svdup_n_f32_z (p0, 8), ++ z0 = svdup_f32_z (p0, 8)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_512_f32_z, svfloat32_t, ++ z0 = svdup_n_f32_z (p0, 512), ++ z0 = svdup_f32_z (p0, 512)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_513_f32_z, svfloat32_t, ++ z0 = svdup_n_f32_z (p0, 513), ++ z0 = svdup_f32_z (p0, 513)) ++ ++/* ++** dup_s4_f32_z: ++** movprfx z0\.s, p0/z, z0\.s ++** mov z0\.s, p0/m, s4 ++** ret ++*/ ++TEST_UNIFORM_ZD (dup_s4_f32_z, svfloat32_t, float, ++ z0 = svdup_n_f32_z (p0, d4), ++ z0 = svdup_f32_z (p0, d4)) ++ ++/* ++** dup_1_f32_x: ++** fmov z0\.s, #1\.0(?:e\+0)? ++** ret ++*/ ++TEST_UNIFORM_Z (dup_1_f32_x, svfloat32_t, ++ z0 = svdup_n_f32_x (p0, 1), ++ z0 = svdup_f32_x (p0, 1)) ++ ++/* ++** dup_0_f32_x: ++** mov z0\.s, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_0_f32_x, svfloat32_t, ++ z0 = svdup_n_f32_x (p0, 0), ++ z0 = svdup_f32_x (p0, 0)) ++ ++/* ++** dup_8_f32_x: ++** fmov z0\.s, #8\.0(?:e\+0)? ++** ret ++*/ ++TEST_UNIFORM_Z (dup_8_f32_x, svfloat32_t, ++ z0 = svdup_n_f32_x (p0, 8), ++ z0 = svdup_f32_x (p0, 8)) ++ ++/* ++** dup_512_f32_x: ++** movi v([0-9]+).4s, 0x44, lsl 24 ++** dup z0\.q, z0\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_512_f32_x, svfloat32_t, ++ z0 = svdup_n_f32_x (p0, 512), ++ z0 = svdup_f32_x (p0, 512)) ++ ++/* ++** dup_513_f32_x: ++** ... ++** ld1rw z0\.s, p[0-7]/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_513_f32_x, svfloat32_t, ++ z0 = svdup_n_f32_x (p0, 513), ++ z0 = svdup_f32_x (p0, 513)) ++ ++/* ++** dup_s4_f32_x: ++** mov z0\.s, s4 ++** ret ++*/ ++TEST_UNIFORM_ZD (dup_s4_f32_x, svfloat32_t, float, ++ z0 = svdup_n_f32_x (p0, d4), ++ z0 = svdup_f32_x (p0, d4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_f64.c +new file mode 100644 +index 000000000..e177d9108 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_f64.c +@@ -0,0 +1,212 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dup_1_f64: ++** fmov z0\.d, #1\.0(?:e\+0)? ++** ret ++*/ ++TEST_UNIFORM_Z (dup_1_f64, svfloat64_t, ++ z0 = svdup_n_f64 (1), ++ z0 = svdup_f64 (1)) ++ ++/* ++** dup_0_f64: ++** mov z0\.d, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_0_f64, svfloat64_t, ++ z0 = svdup_n_f64 (0), ++ z0 = svdup_f64 (0)) ++ ++/* ++** dup_8_f64: ++** fmov z0\.d, #8\.0(?:e\+0)? ++** ret ++*/ ++TEST_UNIFORM_Z (dup_8_f64, svfloat64_t, ++ z0 = svdup_n_f64 (8), ++ z0 = svdup_f64 (8)) ++ ++/* ++** dup_512_f64: ++** mov (x[0-9]+), 4647714815446351872 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_512_f64, svfloat64_t, ++ z0 = svdup_n_f64 (512), ++ z0 = svdup_f64 (512)) ++ ++/* ++** dup_513_f64: ++** ... ++** ld1rd z0\.d, p[0-7]/z, \[x[0-9+]\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_513_f64, svfloat64_t, ++ z0 = svdup_n_f64 (513), ++ z0 = svdup_f64 (513)) ++ ++/* ++** dup_d4_f64: ++** mov z0\.d, d4 ++** ret ++*/ ++TEST_UNIFORM_ZD (dup_d4_f64, svfloat64_t, double, ++ z0 = svdup_n_f64 (d4), ++ z0 = svdup_f64 (d4)) ++ ++/* ++** dup_1_f64_m: ++** fmov z0\.d, p0/m, #1\.0(?:e\+0)? ++** ret ++*/ ++TEST_UNIFORM_Z (dup_1_f64_m, svfloat64_t, ++ z0 = svdup_n_f64_m (z0, p0, 1), ++ z0 = svdup_f64_m (z0, p0, 1)) ++ ++/* ++** dup_0_f64_m: ++** mov z0\.d, p0/m, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_0_f64_m, svfloat64_t, ++ z0 = svdup_n_f64_m (z0, p0, 0), ++ z0 = svdup_f64_m (z0, p0, 0)) ++ ++/* ++** dup_8_f64_m: ++** fmov z0\.d, p0/m, #8\.0(?:e\+0)? ++** ret ++*/ ++TEST_UNIFORM_Z (dup_8_f64_m, svfloat64_t, ++ z0 = svdup_n_f64_m (z0, p0, 8), ++ z0 = svdup_f64_m (z0, p0, 8)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_512_f64_m, svfloat64_t, ++ z0 = svdup_n_f64_m (z0, p0, 512), ++ z0 = svdup_f64_m (z0, p0, 512)) ++ ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_513_f64_m, svfloat64_t, ++ z0 = svdup_n_f64_m (z0, p0, 513), ++ z0 = svdup_f64_m (z0, p0, 513)) ++ ++/* ++** dup_d4_f64_m: ++** movprfx z0, z1 ++** mov z0\.d, p0/m, d4 ++** ret ++*/ ++TEST_UNIFORM_ZD (dup_d4_f64_m, svfloat64_t, double, ++ z0 = svdup_n_f64_m (z1, p0, d4), ++ z0 = svdup_f64_m (z1, p0, d4)) ++ ++/* ++** dup_1_f64_z: ++** movprfx z0\.d, p0/z, z0\.d ++** fmov z0\.d, p0/m, #1\.0(?:e\+0)? ++** ret ++*/ ++TEST_UNIFORM_Z (dup_1_f64_z, svfloat64_t, ++ z0 = svdup_n_f64_z (p0, 1), ++ z0 = svdup_f64_z (p0, 1)) ++ ++/* ++** dup_0_f64_z: ++** mov z0\.d, p0/z, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_0_f64_z, svfloat64_t, ++ z0 = svdup_n_f64_z (p0, 0), ++ z0 = svdup_f64_z (p0, 0)) ++ ++/* ++** dup_8_f64_z: ++** movprfx z0\.d, p0/z, z0\.d ++** fmov z0\.d, p0/m, #8\.0(?:e\+0)? ++** ret ++*/ ++TEST_UNIFORM_Z (dup_8_f64_z, svfloat64_t, ++ z0 = svdup_n_f64_z (p0, 8), ++ z0 = svdup_f64_z (p0, 8)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_512_f64_z, svfloat64_t, ++ z0 = svdup_n_f64_z (p0, 512), ++ z0 = svdup_f64_z (p0, 512)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_513_f64_z, svfloat64_t, ++ z0 = svdup_n_f64_z (p0, 513), ++ z0 = svdup_f64_z (p0, 513)) ++ ++/* ++** dup_d4_f64_z: ++** movprfx z0\.d, p0/z, z0\.d ++** mov z0\.d, p0/m, d4 ++** ret ++*/ ++TEST_UNIFORM_ZD (dup_d4_f64_z, svfloat64_t, double, ++ z0 = svdup_n_f64_z (p0, d4), ++ z0 = svdup_f64_z (p0, d4)) ++ ++/* ++** dup_1_f64_x: ++** fmov z0\.d, #1\.0(?:e\+0)? ++** ret ++*/ ++TEST_UNIFORM_Z (dup_1_f64_x, svfloat64_t, ++ z0 = svdup_n_f64_x (p0, 1), ++ z0 = svdup_f64_x (p0, 1)) ++ ++/* ++** dup_0_f64_x: ++** mov z0\.d, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_0_f64_x, svfloat64_t, ++ z0 = svdup_n_f64_x (p0, 0), ++ z0 = svdup_f64_x (p0, 0)) ++ ++/* ++** dup_8_f64_x: ++** fmov z0\.d, #8\.0(?:e\+0)? ++** ret ++*/ ++TEST_UNIFORM_Z (dup_8_f64_x, svfloat64_t, ++ z0 = svdup_n_f64_x (p0, 8), ++ z0 = svdup_f64_x (p0, 8)) ++ ++/* ++** dup_512_f64_x: ++** mov (x[0-9]+), 4647714815446351872 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_512_f64_x, svfloat64_t, ++ z0 = svdup_n_f64_x (p0, 512), ++ z0 = svdup_f64_x (p0, 512)) ++ ++/* ++** dup_513_f64_x: ++** ... ++** ld1rd z0\.d, p[0-7]/z, \[x[0-9+]\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_513_f64_x, svfloat64_t, ++ z0 = svdup_n_f64_x (p0, 513), ++ z0 = svdup_f64_x (p0, 513)) ++ ++/* ++** dup_d4_f64_x: ++** mov z0\.d, d4 ++** ret ++*/ ++TEST_UNIFORM_ZD (dup_d4_f64_x, svfloat64_t, double, ++ z0 = svdup_n_f64_x (p0, d4), ++ z0 = svdup_f64_x (p0, d4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_bf16.c +new file mode 100644 +index 000000000..d05ad5adb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_bf16.c +@@ -0,0 +1,108 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dup_lane_w0_bf16_tied1: ++** mov (z[0-9]+\.h), w0 ++** tbl z0\.h, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_lane_w0_bf16_tied1, svbfloat16_t, uint16_t, ++ z0 = svdup_lane_bf16 (z0, x0), ++ z0 = svdup_lane (z0, x0)) ++ ++/* ++** dup_lane_w0_bf16_untied: ++** mov (z[0-9]+\.h), w0 ++** tbl z0\.h, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_lane_w0_bf16_untied, svbfloat16_t, uint16_t, ++ z0 = svdup_lane_bf16 (z1, x0), ++ z0 = svdup_lane (z1, x0)) ++ ++/* ++** dup_lane_0_bf16_tied1: ++** dup z0\.h, z0\.h\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_0_bf16_tied1, svbfloat16_t, ++ z0 = svdup_lane_bf16 (z0, 0), ++ z0 = svdup_lane (z0, 0)) ++ ++/* ++** dup_lane_0_bf16_untied: ++** dup z0\.h, z1\.h\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_0_bf16_untied, svbfloat16_t, ++ z0 = svdup_lane_bf16 (z1, 0), ++ z0 = svdup_lane (z1, 0)) ++ ++/* ++** dup_lane_15_bf16: ++** dup z0\.h, z0\.h\[15\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_15_bf16, svbfloat16_t, ++ z0 = svdup_lane_bf16 (z0, 15), ++ z0 = svdup_lane (z0, 15)) ++ ++/* ++** dup_lane_16_bf16: ++** dup z0\.h, z0\.h\[16\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_16_bf16, svbfloat16_t, ++ z0 = svdup_lane_bf16 (z0, 16), ++ z0 = svdup_lane (z0, 16)) ++ ++/* ++** dup_lane_31_bf16: ++** dup z0\.h, z0\.h\[31\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_31_bf16, svbfloat16_t, ++ z0 = svdup_lane_bf16 (z0, 31), ++ z0 = svdup_lane (z0, 31)) ++ ++/* ++** dup_lane_32_bf16: ++** mov (z[0-9]+\.h), #32 ++** tbl z0\.h, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_32_bf16, svbfloat16_t, ++ z0 = svdup_lane_bf16 (z0, 32), ++ z0 = svdup_lane (z0, 32)) ++ ++/* ++** dup_lane_63_bf16: ++** mov (z[0-9]+\.h), #63 ++** tbl z0\.h, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_63_bf16, svbfloat16_t, ++ z0 = svdup_lane_bf16 (z0, 63), ++ z0 = svdup_lane (z0, 63)) ++ ++/* ++** dup_lane_64_bf16: ++** mov (z[0-9]+\.h), #64 ++** tbl z0\.h, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_64_bf16, svbfloat16_t, ++ z0 = svdup_lane_bf16 (z0, 64), ++ z0 = svdup_lane (z0, 64)) ++ ++/* ++** dup_lane_255_bf16: ++** mov (z[0-9]+\.h), #255 ++** tbl z0\.h, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_255_bf16, svbfloat16_t, ++ z0 = svdup_lane_bf16 (z0, 255), ++ z0 = svdup_lane (z0, 255)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_f16.c +new file mode 100644 +index 000000000..142afbb24 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_f16.c +@@ -0,0 +1,108 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dup_lane_w0_f16_tied1: ++** mov (z[0-9]+\.h), w0 ++** tbl z0\.h, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_lane_w0_f16_tied1, svfloat16_t, uint16_t, ++ z0 = svdup_lane_f16 (z0, x0), ++ z0 = svdup_lane (z0, x0)) ++ ++/* ++** dup_lane_w0_f16_untied: ++** mov (z[0-9]+\.h), w0 ++** tbl z0\.h, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_lane_w0_f16_untied, svfloat16_t, uint16_t, ++ z0 = svdup_lane_f16 (z1, x0), ++ z0 = svdup_lane (z1, x0)) ++ ++/* ++** dup_lane_0_f16_tied1: ++** dup z0\.h, z0\.h\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_0_f16_tied1, svfloat16_t, ++ z0 = svdup_lane_f16 (z0, 0), ++ z0 = svdup_lane (z0, 0)) ++ ++/* ++** dup_lane_0_f16_untied: ++** dup z0\.h, z1\.h\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_0_f16_untied, svfloat16_t, ++ z0 = svdup_lane_f16 (z1, 0), ++ z0 = svdup_lane (z1, 0)) ++ ++/* ++** dup_lane_15_f16: ++** dup z0\.h, z0\.h\[15\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_15_f16, svfloat16_t, ++ z0 = svdup_lane_f16 (z0, 15), ++ z0 = svdup_lane (z0, 15)) ++ ++/* ++** dup_lane_16_f16: ++** dup z0\.h, z0\.h\[16\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_16_f16, svfloat16_t, ++ z0 = svdup_lane_f16 (z0, 16), ++ z0 = svdup_lane (z0, 16)) ++ ++/* ++** dup_lane_31_f16: ++** dup z0\.h, z0\.h\[31\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_31_f16, svfloat16_t, ++ z0 = svdup_lane_f16 (z0, 31), ++ z0 = svdup_lane (z0, 31)) ++ ++/* ++** dup_lane_32_f16: ++** mov (z[0-9]+\.h), #32 ++** tbl z0\.h, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_32_f16, svfloat16_t, ++ z0 = svdup_lane_f16 (z0, 32), ++ z0 = svdup_lane (z0, 32)) ++ ++/* ++** dup_lane_63_f16: ++** mov (z[0-9]+\.h), #63 ++** tbl z0\.h, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_63_f16, svfloat16_t, ++ z0 = svdup_lane_f16 (z0, 63), ++ z0 = svdup_lane (z0, 63)) ++ ++/* ++** dup_lane_64_f16: ++** mov (z[0-9]+\.h), #64 ++** tbl z0\.h, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_64_f16, svfloat16_t, ++ z0 = svdup_lane_f16 (z0, 64), ++ z0 = svdup_lane (z0, 64)) ++ ++/* ++** dup_lane_255_f16: ++** mov (z[0-9]+\.h), #255 ++** tbl z0\.h, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_255_f16, svfloat16_t, ++ z0 = svdup_lane_f16 (z0, 255), ++ z0 = svdup_lane (z0, 255)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_f32.c +new file mode 100644 +index 000000000..b32068a37 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_f32.c +@@ -0,0 +1,110 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dup_lane_w0_f32_tied1: ++** mov (z[0-9]+\.s), w0 ++** tbl z0\.s, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_lane_w0_f32_tied1, svfloat32_t, uint32_t, ++ z0 = svdup_lane_f32 (z0, x0), ++ z0 = svdup_lane (z0, x0)) ++ ++/* ++** dup_lane_w0_f32_untied: ++** mov (z[0-9]+\.s), w0 ++** tbl z0\.s, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_lane_w0_f32_untied, svfloat32_t, uint32_t, ++ z0 = svdup_lane_f32 (z1, x0), ++ z0 = svdup_lane (z1, x0)) ++ ++/* ++** dup_lane_0_f32_tied1: ++** dup z0\.s, z0\.s\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_0_f32_tied1, svfloat32_t, ++ z0 = svdup_lane_f32 (z0, 0), ++ z0 = svdup_lane (z0, 0)) ++ ++/* ++** dup_lane_0_f32_untied: ++** dup z0\.s, z1\.s\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_0_f32_untied, svfloat32_t, ++ z0 = svdup_lane_f32 (z1, 0), ++ z0 = svdup_lane (z1, 0)) ++ ++/* ++** dup_lane_15_f32: ++** dup z0\.s, z0\.s\[15\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_15_f32, svfloat32_t, ++ z0 = svdup_lane_f32 (z0, 15), ++ z0 = svdup_lane (z0, 15)) ++ ++/* ++** dup_lane_16_f32: ++** mov (z[0-9]+\.s), #16 ++** tbl z0\.s, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_16_f32, svfloat32_t, ++ z0 = svdup_lane_f32 (z0, 16), ++ z0 = svdup_lane (z0, 16)) ++ ++/* ++** dup_lane_31_f32: ++** mov (z[0-9]+\.s), #31 ++** tbl z0\.s, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_31_f32, svfloat32_t, ++ z0 = svdup_lane_f32 (z0, 31), ++ z0 = svdup_lane (z0, 31)) ++ ++/* ++** dup_lane_32_f32: ++** mov (z[0-9]+\.s), #32 ++** tbl z0\.s, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_32_f32, svfloat32_t, ++ z0 = svdup_lane_f32 (z0, 32), ++ z0 = svdup_lane (z0, 32)) ++ ++/* ++** dup_lane_63_f32: ++** mov (z[0-9]+\.s), #63 ++** tbl z0\.s, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_63_f32, svfloat32_t, ++ z0 = svdup_lane_f32 (z0, 63), ++ z0 = svdup_lane (z0, 63)) ++ ++/* ++** dup_lane_64_f32: ++** mov (z[0-9]+\.s), #64 ++** tbl z0\.s, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_64_f32, svfloat32_t, ++ z0 = svdup_lane_f32 (z0, 64), ++ z0 = svdup_lane (z0, 64)) ++ ++/* ++** dup_lane_255_f32: ++** mov (z[0-9]+\.s), #255 ++** tbl z0\.s, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_255_f32, svfloat32_t, ++ z0 = svdup_lane_f32 (z0, 255), ++ z0 = svdup_lane (z0, 255)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_f64.c +new file mode 100644 +index 000000000..64af50d0c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_f64.c +@@ -0,0 +1,111 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dup_lane_x0_f64_tied1: ++** mov (z[0-9]+\.d), x0 ++** tbl z0\.d, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_lane_x0_f64_tied1, svfloat64_t, uint64_t, ++ z0 = svdup_lane_f64 (z0, x0), ++ z0 = svdup_lane (z0, x0)) ++ ++/* ++** dup_lane_x0_f64_untied: ++** mov (z[0-9]+\.d), x0 ++** tbl z0\.d, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_lane_x0_f64_untied, svfloat64_t, uint64_t, ++ z0 = svdup_lane_f64 (z1, x0), ++ z0 = svdup_lane (z1, x0)) ++ ++/* ++** dup_lane_0_f64_tied1: ++** dup z0\.d, z0\.d\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_0_f64_tied1, svfloat64_t, ++ z0 = svdup_lane_f64 (z0, 0), ++ z0 = svdup_lane (z0, 0)) ++ ++/* ++** dup_lane_0_f64_untied: ++** dup z0\.d, z1\.d\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_0_f64_untied, svfloat64_t, ++ z0 = svdup_lane_f64 (z1, 0), ++ z0 = svdup_lane (z1, 0)) ++ ++/* ++** dup_lane_15_f64: ++** mov (z[0-9]+\.d), #15 ++** tbl z0\.d, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_15_f64, svfloat64_t, ++ z0 = svdup_lane_f64 (z0, 15), ++ z0 = svdup_lane (z0, 15)) ++ ++/* ++** dup_lane_16_f64: ++** mov (z[0-9]+\.d), #16 ++** tbl z0\.d, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_16_f64, svfloat64_t, ++ z0 = svdup_lane_f64 (z0, 16), ++ z0 = svdup_lane (z0, 16)) ++ ++/* ++** dup_lane_31_f64: ++** mov (z[0-9]+\.d), #31 ++** tbl z0\.d, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_31_f64, svfloat64_t, ++ z0 = svdup_lane_f64 (z0, 31), ++ z0 = svdup_lane (z0, 31)) ++ ++/* ++** dup_lane_32_f64: ++** mov (z[0-9]+\.d), #32 ++** tbl z0\.d, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_32_f64, svfloat64_t, ++ z0 = svdup_lane_f64 (z0, 32), ++ z0 = svdup_lane (z0, 32)) ++ ++/* ++** dup_lane_63_f64: ++** mov (z[0-9]+\.d), #63 ++** tbl z0\.d, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_63_f64, svfloat64_t, ++ z0 = svdup_lane_f64 (z0, 63), ++ z0 = svdup_lane (z0, 63)) ++ ++/* ++** dup_lane_64_f64: ++** mov (z[0-9]+\.d), #64 ++** tbl z0\.d, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_64_f64, svfloat64_t, ++ z0 = svdup_lane_f64 (z0, 64), ++ z0 = svdup_lane (z0, 64)) ++ ++/* ++** dup_lane_255_f64: ++** mov (z[0-9]+\.d), #255 ++** tbl z0\.d, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_255_f64, svfloat64_t, ++ z0 = svdup_lane_f64 (z0, 255), ++ z0 = svdup_lane (z0, 255)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_s16.c +new file mode 100644 +index 000000000..3b6f20696 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_s16.c +@@ -0,0 +1,126 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dup_lane_w0_s16_tied1: ++** mov (z[0-9]+\.h), w0 ++** tbl z0\.h, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_lane_w0_s16_tied1, svint16_t, uint16_t, ++ z0 = svdup_lane_s16 (z0, x0), ++ z0 = svdup_lane (z0, x0)) ++ ++/* ++** dup_lane_w0_s16_untied: ++** mov (z[0-9]+\.h), w0 ++** tbl z0\.h, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_lane_w0_s16_untied, svint16_t, uint16_t, ++ z0 = svdup_lane_s16 (z1, x0), ++ z0 = svdup_lane (z1, x0)) ++ ++/* ++** dup_lane_0_s16_tied1: ++** dup z0\.h, z0\.h\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_0_s16_tied1, svint16_t, ++ z0 = svdup_lane_s16 (z0, 0), ++ z0 = svdup_lane (z0, 0)) ++ ++/* ++** dup_lane_0_s16_untied: ++** dup z0\.h, z1\.h\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_0_s16_untied, svint16_t, ++ z0 = svdup_lane_s16 (z1, 0), ++ z0 = svdup_lane (z1, 0)) ++ ++/* ++** dup_lane_7_s16: ++** dup z0\.h, z0\.h\[7\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_7_s16, svint16_t, ++ z0 = svdup_lane_s16 (z0, 7), ++ z0 = svdup_lane (z0, 7)) ++ ++/* ++** dup_lane_8_s16: ++** dup z0\.h, z0\.h\[8\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_8_s16, svint16_t, ++ z0 = svdup_lane_s16 (z0, 8), ++ z0 = svdup_lane (z0, 8)) ++ ++/* ++** dup_lane_15_s16: ++** dup z0\.h, z0\.h\[15\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_15_s16, svint16_t, ++ z0 = svdup_lane_s16 (z0, 15), ++ z0 = svdup_lane (z0, 15)) ++ ++/* ++** dup_lane_16_s16: ++** dup z0\.h, z0\.h\[16\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_16_s16, svint16_t, ++ z0 = svdup_lane_s16 (z0, 16), ++ z0 = svdup_lane (z0, 16)) ++ ++/* ++** dup_lane_31_s16: ++** dup z0\.h, z0\.h\[31\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_31_s16, svint16_t, ++ z0 = svdup_lane_s16 (z0, 31), ++ z0 = svdup_lane (z0, 31)) ++ ++/* ++** dup_lane_32_s16: ++** mov (z[0-9]+\.h), #32 ++** tbl z0\.h, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_32_s16, svint16_t, ++ z0 = svdup_lane_s16 (z0, 32), ++ z0 = svdup_lane (z0, 32)) ++ ++/* ++** dup_lane_63_s16: ++** mov (z[0-9]+\.h), #63 ++** tbl z0\.h, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_63_s16, svint16_t, ++ z0 = svdup_lane_s16 (z0, 63), ++ z0 = svdup_lane (z0, 63)) ++ ++/* ++** dup_lane_64_s16: ++** mov (z[0-9]+\.h), #64 ++** tbl z0\.h, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_64_s16, svint16_t, ++ z0 = svdup_lane_s16 (z0, 64), ++ z0 = svdup_lane (z0, 64)) ++ ++/* ++** dup_lane_255_s16: ++** mov (z[0-9]+\.h), #255 ++** tbl z0\.h, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_255_s16, svint16_t, ++ z0 = svdup_lane_s16 (z0, 255), ++ z0 = svdup_lane (z0, 255)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_s32.c +new file mode 100644 +index 000000000..bf597fdf6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_s32.c +@@ -0,0 +1,128 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dup_lane_w0_s32_tied1: ++** mov (z[0-9]+\.s), w0 ++** tbl z0\.s, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_lane_w0_s32_tied1, svint32_t, uint32_t, ++ z0 = svdup_lane_s32 (z0, x0), ++ z0 = svdup_lane (z0, x0)) ++ ++/* ++** dup_lane_w0_s32_untied: ++** mov (z[0-9]+\.s), w0 ++** tbl z0\.s, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_lane_w0_s32_untied, svint32_t, uint32_t, ++ z0 = svdup_lane_s32 (z1, x0), ++ z0 = svdup_lane (z1, x0)) ++ ++/* ++** dup_lane_0_s32_tied1: ++** dup z0\.s, z0\.s\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_0_s32_tied1, svint32_t, ++ z0 = svdup_lane_s32 (z0, 0), ++ z0 = svdup_lane (z0, 0)) ++ ++/* ++** dup_lane_0_s32_untied: ++** dup z0\.s, z1\.s\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_0_s32_untied, svint32_t, ++ z0 = svdup_lane_s32 (z1, 0), ++ z0 = svdup_lane (z1, 0)) ++ ++/* ++** dup_lane_7_s32: ++** dup z0\.s, z0\.s\[7\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_7_s32, svint32_t, ++ z0 = svdup_lane_s32 (z0, 7), ++ z0 = svdup_lane (z0, 7)) ++ ++/* ++** dup_lane_8_s32: ++** dup z0\.s, z0\.s\[8\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_8_s32, svint32_t, ++ z0 = svdup_lane_s32 (z0, 8), ++ z0 = svdup_lane (z0, 8)) ++ ++/* ++** dup_lane_15_s32: ++** dup z0\.s, z0\.s\[15\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_15_s32, svint32_t, ++ z0 = svdup_lane_s32 (z0, 15), ++ z0 = svdup_lane (z0, 15)) ++ ++/* ++** dup_lane_16_s32: ++** mov (z[0-9]+\.s), #16 ++** tbl z0\.s, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_16_s32, svint32_t, ++ z0 = svdup_lane_s32 (z0, 16), ++ z0 = svdup_lane (z0, 16)) ++ ++/* ++** dup_lane_31_s32: ++** mov (z[0-9]+\.s), #31 ++** tbl z0\.s, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_31_s32, svint32_t, ++ z0 = svdup_lane_s32 (z0, 31), ++ z0 = svdup_lane (z0, 31)) ++ ++/* ++** dup_lane_32_s32: ++** mov (z[0-9]+\.s), #32 ++** tbl z0\.s, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_32_s32, svint32_t, ++ z0 = svdup_lane_s32 (z0, 32), ++ z0 = svdup_lane (z0, 32)) ++ ++/* ++** dup_lane_63_s32: ++** mov (z[0-9]+\.s), #63 ++** tbl z0\.s, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_63_s32, svint32_t, ++ z0 = svdup_lane_s32 (z0, 63), ++ z0 = svdup_lane (z0, 63)) ++ ++/* ++** dup_lane_64_s32: ++** mov (z[0-9]+\.s), #64 ++** tbl z0\.s, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_64_s32, svint32_t, ++ z0 = svdup_lane_s32 (z0, 64), ++ z0 = svdup_lane (z0, 64)) ++ ++/* ++** dup_lane_255_s32: ++** mov (z[0-9]+\.s), #255 ++** tbl z0\.s, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_255_s32, svint32_t, ++ z0 = svdup_lane_s32 (z0, 255), ++ z0 = svdup_lane (z0, 255)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_s64.c +new file mode 100644 +index 000000000..f2f3a1770 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_s64.c +@@ -0,0 +1,130 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dup_lane_x0_s64_tied1: ++** mov (z[0-9]+\.d), x0 ++** tbl z0\.d, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_lane_x0_s64_tied1, svint64_t, uint64_t, ++ z0 = svdup_lane_s64 (z0, x0), ++ z0 = svdup_lane (z0, x0)) ++ ++/* ++** dup_lane_x0_s64_untied: ++** mov (z[0-9]+\.d), x0 ++** tbl z0\.d, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_lane_x0_s64_untied, svint64_t, uint64_t, ++ z0 = svdup_lane_s64 (z1, x0), ++ z0 = svdup_lane (z1, x0)) ++ ++/* ++** dup_lane_0_s64_tied1: ++** dup z0\.d, z0\.d\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_0_s64_tied1, svint64_t, ++ z0 = svdup_lane_s64 (z0, 0), ++ z0 = svdup_lane (z0, 0)) ++ ++/* ++** dup_lane_0_s64_untied: ++** dup z0\.d, z1\.d\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_0_s64_untied, svint64_t, ++ z0 = svdup_lane_s64 (z1, 0), ++ z0 = svdup_lane (z1, 0)) ++ ++/* ++** dup_lane_7_s64: ++** dup z0\.d, z0\.d\[7\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_7_s64, svint64_t, ++ z0 = svdup_lane_s64 (z0, 7), ++ z0 = svdup_lane (z0, 7)) ++ ++/* ++** dup_lane_8_s64: ++** mov (z[0-9]+\.d), #8 ++** tbl z0\.d, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_8_s64, svint64_t, ++ z0 = svdup_lane_s64 (z0, 8), ++ z0 = svdup_lane (z0, 8)) ++ ++/* ++** dup_lane_15_s64: ++** mov (z[0-9]+\.d), #15 ++** tbl z0\.d, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_15_s64, svint64_t, ++ z0 = svdup_lane_s64 (z0, 15), ++ z0 = svdup_lane (z0, 15)) ++ ++/* ++** dup_lane_16_s64: ++** mov (z[0-9]+\.d), #16 ++** tbl z0\.d, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_16_s64, svint64_t, ++ z0 = svdup_lane_s64 (z0, 16), ++ z0 = svdup_lane (z0, 16)) ++ ++/* ++** dup_lane_31_s64: ++** mov (z[0-9]+\.d), #31 ++** tbl z0\.d, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_31_s64, svint64_t, ++ z0 = svdup_lane_s64 (z0, 31), ++ z0 = svdup_lane (z0, 31)) ++ ++/* ++** dup_lane_32_s64: ++** mov (z[0-9]+\.d), #32 ++** tbl z0\.d, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_32_s64, svint64_t, ++ z0 = svdup_lane_s64 (z0, 32), ++ z0 = svdup_lane (z0, 32)) ++ ++/* ++** dup_lane_63_s64: ++** mov (z[0-9]+\.d), #63 ++** tbl z0\.d, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_63_s64, svint64_t, ++ z0 = svdup_lane_s64 (z0, 63), ++ z0 = svdup_lane (z0, 63)) ++ ++/* ++** dup_lane_64_s64: ++** mov (z[0-9]+\.d), #64 ++** tbl z0\.d, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_64_s64, svint64_t, ++ z0 = svdup_lane_s64 (z0, 64), ++ z0 = svdup_lane (z0, 64)) ++ ++/* ++** dup_lane_255_s64: ++** mov (z[0-9]+\.d), #255 ++** tbl z0\.d, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_255_s64, svint64_t, ++ z0 = svdup_lane_s64 (z0, 255), ++ z0 = svdup_lane (z0, 255)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_s8.c +new file mode 100644 +index 000000000..f5a07e9f3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_s8.c +@@ -0,0 +1,124 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dup_lane_w0_s8_tied1: ++** mov (z[0-9]+\.b), w0 ++** tbl z0\.b, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_lane_w0_s8_tied1, svint8_t, uint8_t, ++ z0 = svdup_lane_s8 (z0, x0), ++ z0 = svdup_lane (z0, x0)) ++ ++/* ++** dup_lane_w0_s8_untied: ++** mov (z[0-9]+\.b), w0 ++** tbl z0\.b, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_lane_w0_s8_untied, svint8_t, uint8_t, ++ z0 = svdup_lane_s8 (z1, x0), ++ z0 = svdup_lane (z1, x0)) ++ ++/* ++** dup_lane_0_s8_tied1: ++** dup z0\.b, z0\.b\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_0_s8_tied1, svint8_t, ++ z0 = svdup_lane_s8 (z0, 0), ++ z0 = svdup_lane (z0, 0)) ++ ++/* ++** dup_lane_0_s8_untied: ++** dup z0\.b, z1\.b\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_0_s8_untied, svint8_t, ++ z0 = svdup_lane_s8 (z1, 0), ++ z0 = svdup_lane (z1, 0)) ++ ++/* ++** dup_lane_7_s8: ++** dup z0\.b, z0\.b\[7\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_7_s8, svint8_t, ++ z0 = svdup_lane_s8 (z0, 7), ++ z0 = svdup_lane (z0, 7)) ++ ++/* ++** dup_lane_8_s8: ++** dup z0\.b, z0\.b\[8\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_8_s8, svint8_t, ++ z0 = svdup_lane_s8 (z0, 8), ++ z0 = svdup_lane (z0, 8)) ++ ++/* ++** dup_lane_15_s8: ++** dup z0\.b, z0\.b\[15\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_15_s8, svint8_t, ++ z0 = svdup_lane_s8 (z0, 15), ++ z0 = svdup_lane (z0, 15)) ++ ++/* ++** dup_lane_16_s8: ++** dup z0\.b, z0\.b\[16\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_16_s8, svint8_t, ++ z0 = svdup_lane_s8 (z0, 16), ++ z0 = svdup_lane (z0, 16)) ++ ++/* ++** dup_lane_31_s8: ++** dup z0\.b, z0\.b\[31\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_31_s8, svint8_t, ++ z0 = svdup_lane_s8 (z0, 31), ++ z0 = svdup_lane (z0, 31)) ++ ++/* ++** dup_lane_32_s8: ++** dup z0\.b, z0\.b\[32\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_32_s8, svint8_t, ++ z0 = svdup_lane_s8 (z0, 32), ++ z0 = svdup_lane (z0, 32)) ++ ++/* ++** dup_lane_63_s8: ++** dup z0\.b, z0\.b\[63\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_63_s8, svint8_t, ++ z0 = svdup_lane_s8 (z0, 63), ++ z0 = svdup_lane (z0, 63)) ++ ++/* ++** dup_lane_64_s8: ++** mov (z[0-9]+\.b), #64 ++** tbl z0\.b, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_64_s8, svint8_t, ++ z0 = svdup_lane_s8 (z0, 64), ++ z0 = svdup_lane (z0, 64)) ++ ++/* ++** dup_lane_255_s8: ++** mov (z[0-9]+\.b), #-1 ++** tbl z0\.b, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_255_s8, svint8_t, ++ z0 = svdup_lane_s8 (z0, 255), ++ z0 = svdup_lane (z0, 255)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_u16.c +new file mode 100644 +index 000000000..e5135caa5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_u16.c +@@ -0,0 +1,126 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dup_lane_w0_u16_tied1: ++** mov (z[0-9]+\.h), w0 ++** tbl z0\.h, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_lane_w0_u16_tied1, svuint16_t, uint16_t, ++ z0 = svdup_lane_u16 (z0, x0), ++ z0 = svdup_lane (z0, x0)) ++ ++/* ++** dup_lane_w0_u16_untied: ++** mov (z[0-9]+\.h), w0 ++** tbl z0\.h, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_lane_w0_u16_untied, svuint16_t, uint16_t, ++ z0 = svdup_lane_u16 (z1, x0), ++ z0 = svdup_lane (z1, x0)) ++ ++/* ++** dup_lane_0_u16_tied1: ++** dup z0\.h, z0\.h\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_0_u16_tied1, svuint16_t, ++ z0 = svdup_lane_u16 (z0, 0), ++ z0 = svdup_lane (z0, 0)) ++ ++/* ++** dup_lane_0_u16_untied: ++** dup z0\.h, z1\.h\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_0_u16_untied, svuint16_t, ++ z0 = svdup_lane_u16 (z1, 0), ++ z0 = svdup_lane (z1, 0)) ++ ++/* ++** dup_lane_7_u16: ++** dup z0\.h, z0\.h\[7\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_7_u16, svuint16_t, ++ z0 = svdup_lane_u16 (z0, 7), ++ z0 = svdup_lane (z0, 7)) ++ ++/* ++** dup_lane_8_u16: ++** dup z0\.h, z0\.h\[8\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_8_u16, svuint16_t, ++ z0 = svdup_lane_u16 (z0, 8), ++ z0 = svdup_lane (z0, 8)) ++ ++/* ++** dup_lane_15_u16: ++** dup z0\.h, z0\.h\[15\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_15_u16, svuint16_t, ++ z0 = svdup_lane_u16 (z0, 15), ++ z0 = svdup_lane (z0, 15)) ++ ++/* ++** dup_lane_16_u16: ++** dup z0\.h, z0\.h\[16\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_16_u16, svuint16_t, ++ z0 = svdup_lane_u16 (z0, 16), ++ z0 = svdup_lane (z0, 16)) ++ ++/* ++** dup_lane_31_u16: ++** dup z0\.h, z0\.h\[31\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_31_u16, svuint16_t, ++ z0 = svdup_lane_u16 (z0, 31), ++ z0 = svdup_lane (z0, 31)) ++ ++/* ++** dup_lane_32_u16: ++** mov (z[0-9]+\.h), #32 ++** tbl z0\.h, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_32_u16, svuint16_t, ++ z0 = svdup_lane_u16 (z0, 32), ++ z0 = svdup_lane (z0, 32)) ++ ++/* ++** dup_lane_63_u16: ++** mov (z[0-9]+\.h), #63 ++** tbl z0\.h, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_63_u16, svuint16_t, ++ z0 = svdup_lane_u16 (z0, 63), ++ z0 = svdup_lane (z0, 63)) ++ ++/* ++** dup_lane_64_u16: ++** mov (z[0-9]+\.h), #64 ++** tbl z0\.h, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_64_u16, svuint16_t, ++ z0 = svdup_lane_u16 (z0, 64), ++ z0 = svdup_lane (z0, 64)) ++ ++/* ++** dup_lane_255_u16: ++** mov (z[0-9]+\.h), #255 ++** tbl z0\.h, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_255_u16, svuint16_t, ++ z0 = svdup_lane_u16 (z0, 255), ++ z0 = svdup_lane (z0, 255)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_u32.c +new file mode 100644 +index 000000000..7e972aca7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_u32.c +@@ -0,0 +1,128 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dup_lane_w0_u32_tied1: ++** mov (z[0-9]+\.s), w0 ++** tbl z0\.s, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_lane_w0_u32_tied1, svuint32_t, uint32_t, ++ z0 = svdup_lane_u32 (z0, x0), ++ z0 = svdup_lane (z0, x0)) ++ ++/* ++** dup_lane_w0_u32_untied: ++** mov (z[0-9]+\.s), w0 ++** tbl z0\.s, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_lane_w0_u32_untied, svuint32_t, uint32_t, ++ z0 = svdup_lane_u32 (z1, x0), ++ z0 = svdup_lane (z1, x0)) ++ ++/* ++** dup_lane_0_u32_tied1: ++** dup z0\.s, z0\.s\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_0_u32_tied1, svuint32_t, ++ z0 = svdup_lane_u32 (z0, 0), ++ z0 = svdup_lane (z0, 0)) ++ ++/* ++** dup_lane_0_u32_untied: ++** dup z0\.s, z1\.s\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_0_u32_untied, svuint32_t, ++ z0 = svdup_lane_u32 (z1, 0), ++ z0 = svdup_lane (z1, 0)) ++ ++/* ++** dup_lane_7_u32: ++** dup z0\.s, z0\.s\[7\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_7_u32, svuint32_t, ++ z0 = svdup_lane_u32 (z0, 7), ++ z0 = svdup_lane (z0, 7)) ++ ++/* ++** dup_lane_8_u32: ++** dup z0\.s, z0\.s\[8\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_8_u32, svuint32_t, ++ z0 = svdup_lane_u32 (z0, 8), ++ z0 = svdup_lane (z0, 8)) ++ ++/* ++** dup_lane_15_u32: ++** dup z0\.s, z0\.s\[15\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_15_u32, svuint32_t, ++ z0 = svdup_lane_u32 (z0, 15), ++ z0 = svdup_lane (z0, 15)) ++ ++/* ++** dup_lane_16_u32: ++** mov (z[0-9]+\.s), #16 ++** tbl z0\.s, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_16_u32, svuint32_t, ++ z0 = svdup_lane_u32 (z0, 16), ++ z0 = svdup_lane (z0, 16)) ++ ++/* ++** dup_lane_31_u32: ++** mov (z[0-9]+\.s), #31 ++** tbl z0\.s, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_31_u32, svuint32_t, ++ z0 = svdup_lane_u32 (z0, 31), ++ z0 = svdup_lane (z0, 31)) ++ ++/* ++** dup_lane_32_u32: ++** mov (z[0-9]+\.s), #32 ++** tbl z0\.s, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_32_u32, svuint32_t, ++ z0 = svdup_lane_u32 (z0, 32), ++ z0 = svdup_lane (z0, 32)) ++ ++/* ++** dup_lane_63_u32: ++** mov (z[0-9]+\.s), #63 ++** tbl z0\.s, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_63_u32, svuint32_t, ++ z0 = svdup_lane_u32 (z0, 63), ++ z0 = svdup_lane (z0, 63)) ++ ++/* ++** dup_lane_64_u32: ++** mov (z[0-9]+\.s), #64 ++** tbl z0\.s, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_64_u32, svuint32_t, ++ z0 = svdup_lane_u32 (z0, 64), ++ z0 = svdup_lane (z0, 64)) ++ ++/* ++** dup_lane_255_u32: ++** mov (z[0-9]+\.s), #255 ++** tbl z0\.s, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_255_u32, svuint32_t, ++ z0 = svdup_lane_u32 (z0, 255), ++ z0 = svdup_lane (z0, 255)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_u64.c +new file mode 100644 +index 000000000..5097b7e96 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_u64.c +@@ -0,0 +1,130 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dup_lane_x0_u64_tied1: ++** mov (z[0-9]+\.d), x0 ++** tbl z0\.d, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_lane_x0_u64_tied1, svuint64_t, uint64_t, ++ z0 = svdup_lane_u64 (z0, x0), ++ z0 = svdup_lane (z0, x0)) ++ ++/* ++** dup_lane_x0_u64_untied: ++** mov (z[0-9]+\.d), x0 ++** tbl z0\.d, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_lane_x0_u64_untied, svuint64_t, uint64_t, ++ z0 = svdup_lane_u64 (z1, x0), ++ z0 = svdup_lane (z1, x0)) ++ ++/* ++** dup_lane_0_u64_tied1: ++** dup z0\.d, z0\.d\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_0_u64_tied1, svuint64_t, ++ z0 = svdup_lane_u64 (z0, 0), ++ z0 = svdup_lane (z0, 0)) ++ ++/* ++** dup_lane_0_u64_untied: ++** dup z0\.d, z1\.d\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_0_u64_untied, svuint64_t, ++ z0 = svdup_lane_u64 (z1, 0), ++ z0 = svdup_lane (z1, 0)) ++ ++/* ++** dup_lane_7_u64: ++** dup z0\.d, z0\.d\[7\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_7_u64, svuint64_t, ++ z0 = svdup_lane_u64 (z0, 7), ++ z0 = svdup_lane (z0, 7)) ++ ++/* ++** dup_lane_8_u64: ++** mov (z[0-9]+\.d), #8 ++** tbl z0\.d, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_8_u64, svuint64_t, ++ z0 = svdup_lane_u64 (z0, 8), ++ z0 = svdup_lane (z0, 8)) ++ ++/* ++** dup_lane_15_u64: ++** mov (z[0-9]+\.d), #15 ++** tbl z0\.d, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_15_u64, svuint64_t, ++ z0 = svdup_lane_u64 (z0, 15), ++ z0 = svdup_lane (z0, 15)) ++ ++/* ++** dup_lane_16_u64: ++** mov (z[0-9]+\.d), #16 ++** tbl z0\.d, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_16_u64, svuint64_t, ++ z0 = svdup_lane_u64 (z0, 16), ++ z0 = svdup_lane (z0, 16)) ++ ++/* ++** dup_lane_31_u64: ++** mov (z[0-9]+\.d), #31 ++** tbl z0\.d, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_31_u64, svuint64_t, ++ z0 = svdup_lane_u64 (z0, 31), ++ z0 = svdup_lane (z0, 31)) ++ ++/* ++** dup_lane_32_u64: ++** mov (z[0-9]+\.d), #32 ++** tbl z0\.d, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_32_u64, svuint64_t, ++ z0 = svdup_lane_u64 (z0, 32), ++ z0 = svdup_lane (z0, 32)) ++ ++/* ++** dup_lane_63_u64: ++** mov (z[0-9]+\.d), #63 ++** tbl z0\.d, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_63_u64, svuint64_t, ++ z0 = svdup_lane_u64 (z0, 63), ++ z0 = svdup_lane (z0, 63)) ++ ++/* ++** dup_lane_64_u64: ++** mov (z[0-9]+\.d), #64 ++** tbl z0\.d, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_64_u64, svuint64_t, ++ z0 = svdup_lane_u64 (z0, 64), ++ z0 = svdup_lane (z0, 64)) ++ ++/* ++** dup_lane_255_u64: ++** mov (z[0-9]+\.d), #255 ++** tbl z0\.d, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_255_u64, svuint64_t, ++ z0 = svdup_lane_u64 (z0, 255), ++ z0 = svdup_lane (z0, 255)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_u8.c +new file mode 100644 +index 000000000..25fdf0acb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_u8.c +@@ -0,0 +1,124 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dup_lane_w0_u8_tied1: ++** mov (z[0-9]+\.b), w0 ++** tbl z0\.b, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_lane_w0_u8_tied1, svuint8_t, uint8_t, ++ z0 = svdup_lane_u8 (z0, x0), ++ z0 = svdup_lane (z0, x0)) ++ ++/* ++** dup_lane_w0_u8_untied: ++** mov (z[0-9]+\.b), w0 ++** tbl z0\.b, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_lane_w0_u8_untied, svuint8_t, uint8_t, ++ z0 = svdup_lane_u8 (z1, x0), ++ z0 = svdup_lane (z1, x0)) ++ ++/* ++** dup_lane_0_u8_tied1: ++** dup z0\.b, z0\.b\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_0_u8_tied1, svuint8_t, ++ z0 = svdup_lane_u8 (z0, 0), ++ z0 = svdup_lane (z0, 0)) ++ ++/* ++** dup_lane_0_u8_untied: ++** dup z0\.b, z1\.b\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_0_u8_untied, svuint8_t, ++ z0 = svdup_lane_u8 (z1, 0), ++ z0 = svdup_lane (z1, 0)) ++ ++/* ++** dup_lane_7_u8: ++** dup z0\.b, z0\.b\[7\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_7_u8, svuint8_t, ++ z0 = svdup_lane_u8 (z0, 7), ++ z0 = svdup_lane (z0, 7)) ++ ++/* ++** dup_lane_8_u8: ++** dup z0\.b, z0\.b\[8\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_8_u8, svuint8_t, ++ z0 = svdup_lane_u8 (z0, 8), ++ z0 = svdup_lane (z0, 8)) ++ ++/* ++** dup_lane_15_u8: ++** dup z0\.b, z0\.b\[15\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_15_u8, svuint8_t, ++ z0 = svdup_lane_u8 (z0, 15), ++ z0 = svdup_lane (z0, 15)) ++ ++/* ++** dup_lane_16_u8: ++** dup z0\.b, z0\.b\[16\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_16_u8, svuint8_t, ++ z0 = svdup_lane_u8 (z0, 16), ++ z0 = svdup_lane (z0, 16)) ++ ++/* ++** dup_lane_31_u8: ++** dup z0\.b, z0\.b\[31\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_31_u8, svuint8_t, ++ z0 = svdup_lane_u8 (z0, 31), ++ z0 = svdup_lane (z0, 31)) ++ ++/* ++** dup_lane_32_u8: ++** dup z0\.b, z0\.b\[32\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_32_u8, svuint8_t, ++ z0 = svdup_lane_u8 (z0, 32), ++ z0 = svdup_lane (z0, 32)) ++ ++/* ++** dup_lane_63_u8: ++** dup z0\.b, z0\.b\[63\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_63_u8, svuint8_t, ++ z0 = svdup_lane_u8 (z0, 63), ++ z0 = svdup_lane (z0, 63)) ++ ++/* ++** dup_lane_64_u8: ++** mov (z[0-9]+\.b), #64 ++** tbl z0\.b, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_64_u8, svuint8_t, ++ z0 = svdup_lane_u8 (z0, 64), ++ z0 = svdup_lane (z0, 64)) ++ ++/* ++** dup_lane_255_u8: ++** mov (z[0-9]+\.b), #-1 ++** tbl z0\.b, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_lane_255_u8, svuint8_t, ++ z0 = svdup_lane_u8 (z0, 255), ++ z0 = svdup_lane (z0, 255)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s16.c +new file mode 100644 +index 000000000..876f36db7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s16.c +@@ -0,0 +1,1193 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dup_1_s16: ++** mov z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_1_s16, svint16_t, ++ z0 = svdup_n_s16 (1), ++ z0 = svdup_s16 (1)) ++ ++/* ++** dup_127_s16: ++** mov z0\.h, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_127_s16, svint16_t, ++ z0 = svdup_n_s16 (127), ++ z0 = svdup_s16 (127)) ++ ++/* ++** dup_128_s16: ++** mov z0\.h, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_128_s16, svint16_t, ++ z0 = svdup_n_s16 (128), ++ z0 = svdup_s16 (128)) ++ ++/* ++** dup_129_s16: ++** movi v([0-9]+)\.8h, 0x81 ++** dup z0\.q, z\1\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_129_s16, svint16_t, ++ z0 = svdup_n_s16 (129), ++ z0 = svdup_s16 (129)) ++ ++/* ++** dup_253_s16: ++** movi v([0-9]+)\.8h, 0xfd ++** dup z0\.q, z\1\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_253_s16, svint16_t, ++ z0 = svdup_n_s16 (253), ++ z0 = svdup_s16 (253)) ++ ++/* ++** dup_254_s16: ++** mov z0\.h, #254 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_254_s16, svint16_t, ++ z0 = svdup_n_s16 (254), ++ z0 = svdup_s16 (254)) ++ ++/* ++** dup_255_s16: ++** mov z0\.h, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_255_s16, svint16_t, ++ z0 = svdup_n_s16 (255), ++ z0 = svdup_s16 (255)) ++ ++/* ++** dup_256_s16: ++** mov z0\.h, #256 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_256_s16, svint16_t, ++ z0 = svdup_n_s16 (256), ++ z0 = svdup_s16 (256)) ++ ++/* ++** dup_257_s16: ++** mov z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_257_s16, svint16_t, ++ z0 = svdup_n_s16 (257), ++ z0 = svdup_s16 (257)) ++ ++/* ++** dup_512_s16: ++** mov z0\.h, #512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_512_s16, svint16_t, ++ z0 = svdup_n_s16 (512), ++ z0 = svdup_s16 (512)) ++ ++/* ++** dup_7f00_s16: ++** mov z0\.h, #32512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7f00_s16, svint16_t, ++ z0 = svdup_n_s16 (0x7f00), ++ z0 = svdup_s16 (0x7f00)) ++ ++/* ++** dup_7f01_s16: ++** mov (w[0-9]+), 32513 ++** mov z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7f01_s16, svint16_t, ++ z0 = svdup_n_s16 (0x7f01), ++ z0 = svdup_s16 (0x7f01)) ++ ++/* ++** dup_7ffd_s16: ++** mov (w[0-9]+), 32765 ++** mov z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7ffd_s16, svint16_t, ++ z0 = svdup_n_s16 (0x7ffd), ++ z0 = svdup_s16 (0x7ffd)) ++ ++/* ++** dup_7ffe_s16: ++** mov z0\.h, #32766 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7ffe_s16, svint16_t, ++ z0 = svdup_n_s16 (0x7ffe), ++ z0 = svdup_s16 (0x7ffe)) ++ ++/* ++** dup_7fff_s16: ++** mov z0\.h, #32767 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7fff_s16, svint16_t, ++ z0 = svdup_n_s16 (0x7fff), ++ z0 = svdup_s16 (0x7fff)) ++ ++/* ++** dup_m1_s16: ++** mov z0\.b, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m1_s16, svint16_t, ++ z0 = svdup_n_s16 (-1), ++ z0 = svdup_s16 (-1)) ++ ++/* ++** dup_m128_s16: ++** mov z0\.h, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m128_s16, svint16_t, ++ z0 = svdup_n_s16 (-128), ++ z0 = svdup_s16 (-128)) ++ ++/* ++** dup_m129_s16: ++** mov z0\.h, #-129 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m129_s16, svint16_t, ++ z0 = svdup_n_s16 (-129), ++ z0 = svdup_s16 (-129)) ++ ++/* ++** dup_m130_s16: ++** mvni v([0-9]+)\.8h, 0x81 ++** dup z0\.q, z\1\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m130_s16, svint16_t, ++ z0 = svdup_n_s16 (-130), ++ z0 = svdup_s16 (-130)) ++ ++/* ++** dup_m254_s16: ++** mvni v([0-9]+)\.8h, 0xfd ++** dup z0\.q, z\1\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m254_s16, svint16_t, ++ z0 = svdup_n_s16 (-254), ++ z0 = svdup_s16 (-254)) ++ ++/* ++** dup_m255_s16: ++** mov z0\.h, #-255 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m255_s16, svint16_t, ++ z0 = svdup_n_s16 (-255), ++ z0 = svdup_s16 (-255)) ++ ++/* ++** dup_m256_s16: ++** mov z0\.h, #-256 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m256_s16, svint16_t, ++ z0 = svdup_n_s16 (-256), ++ z0 = svdup_s16 (-256)) ++ ++/* ++** dup_m257_s16: ++** mov z0\.h, #-257 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m257_s16, svint16_t, ++ z0 = svdup_n_s16 (-257), ++ z0 = svdup_s16 (-257)) ++ ++/* ++** dup_m258_s16: ++** mov z0\.b, #-2 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m258_s16, svint16_t, ++ z0 = svdup_n_s16 (-258), ++ z0 = svdup_s16 (-258)) ++ ++/* ++** dup_m259_s16: ++** mov (w[0-9]+), -259 ++** mov z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m259_s16, svint16_t, ++ z0 = svdup_n_s16 (-259), ++ z0 = svdup_s16 (-259)) ++ ++/* ++** dup_m512_s16: ++** mov z0\.h, #-512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m512_s16, svint16_t, ++ z0 = svdup_n_s16 (-512), ++ z0 = svdup_s16 (-512)) ++ ++/* ++** dup_m7f00_s16: ++** mov z0\.h, #-32512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f00_s16, svint16_t, ++ z0 = svdup_n_s16 (-0x7f00), ++ z0 = svdup_s16 (-0x7f00)) ++ ++/* ++** dup_m7f01_s16: ++** mov z0\.h, #-32513 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f01_s16, svint16_t, ++ z0 = svdup_n_s16 (-0x7f01), ++ z0 = svdup_s16 (-0x7f01)) ++ ++/* ++** dup_m7f02_s16: ++** mov (w[0-9]+), -32514 ++** mov z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f02_s16, svint16_t, ++ z0 = svdup_n_s16 (-0x7f02), ++ z0 = svdup_s16 (-0x7f02)) ++ ++/* ++** dup_m7ffe_s16: ++** mov (w[0-9]+), -32766 ++** mov z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7ffe_s16, svint16_t, ++ z0 = svdup_n_s16 (-0x7ffe), ++ z0 = svdup_s16 (-0x7ffe)) ++ ++/* ++** dup_m7fff_s16: ++** mov z0\.h, #-32767 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7fff_s16, svint16_t, ++ z0 = svdup_n_s16 (-0x7fff), ++ z0 = svdup_s16 (-0x7fff)) ++ ++/* ++** dup_m8000_s16: ++** mov z0\.h, #-32768 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m8000_s16, svint16_t, ++ z0 = svdup_n_s16 (-0x8000), ++ z0 = svdup_s16 (-0x8000)) ++ ++/* ++** dup_w0_s16: ++** mov z0\.h, w0 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_w0_s16, svint16_t, int16_t, ++ z0 = svdup_n_s16 (x0), ++ z0 = svdup_s16 (x0)) ++ ++/* ++** dup_1_s16_m: ++** mov z0\.h, p0/m, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_1_s16_m, svint16_t, ++ z0 = svdup_n_s16_m (z0, p0, 1), ++ z0 = svdup_s16_m (z0, p0, 1)) ++ ++/* ++** dup_127_s16_m: ++** mov z0\.h, p0/m, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_127_s16_m, svint16_t, ++ z0 = svdup_n_s16_m (z0, p0, 127), ++ z0 = svdup_s16_m (z0, p0, 127)) ++ ++/* ++** dup_128_s16_m: ++** mov (z[0-9]+\.h), #128 ++** sel z0\.h, p0, \1, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (dup_128_s16_m, svint16_t, ++ z0 = svdup_n_s16_m (z0, p0, 128), ++ z0 = svdup_s16_m (z0, p0, 128)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_129_s16_m, svint16_t, ++ z0 = svdup_n_s16_m (z0, p0, 129), ++ z0 = svdup_s16_m (z0, p0, 129)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_253_s16_m, svint16_t, ++ z0 = svdup_n_s16_m (z0, p0, 253), ++ z0 = svdup_s16_m (z0, p0, 253)) ++ ++/* ++** dup_254_s16_m: ++** mov (z[0-9]+\.h), #254 ++** sel z0\.h, p0, \1, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (dup_254_s16_m, svint16_t, ++ z0 = svdup_n_s16_m (z0, p0, 254), ++ z0 = svdup_s16_m (z0, p0, 254)) ++ ++/* ++** dup_255_s16_m: ++** mov (z[0-9]+\.h), #255 ++** sel z0\.h, p0, \1, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (dup_255_s16_m, svint16_t, ++ z0 = svdup_n_s16_m (z0, p0, 255), ++ z0 = svdup_s16_m (z0, p0, 255)) ++ ++/* ++** dup_256_s16_m: ++** mov z0\.h, p0/m, #256 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_256_s16_m, svint16_t, ++ z0 = svdup_n_s16_m (z0, p0, 256), ++ z0 = svdup_s16_m (z0, p0, 256)) ++ ++/* ++** dup_257_s16_m: ++** mov (z[0-9]+)\.b, #1 ++** sel z0\.h, p0, \1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (dup_257_s16_m, svint16_t, ++ z0 = svdup_n_s16_m (z0, p0, 257), ++ z0 = svdup_s16_m (z0, p0, 257)) ++ ++/* ++** dup_512_s16_m: ++** mov z0\.h, p0/m, #512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_512_s16_m, svint16_t, ++ z0 = svdup_n_s16_m (z0, p0, 512), ++ z0 = svdup_s16_m (z0, p0, 512)) ++ ++/* ++** dup_7f00_s16_m: ++** mov z0\.h, p0/m, #32512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7f00_s16_m, svint16_t, ++ z0 = svdup_n_s16_m (z0, p0, 0x7f00), ++ z0 = svdup_s16_m (z0, p0, 0x7f00)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_7f01_s16_m, svint16_t, ++ z0 = svdup_n_s16_m (z0, p0, 0x7f01), ++ z0 = svdup_s16_m (z0, p0, 0x7f01)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_7ffd_s16_m, svint16_t, ++ z0 = svdup_n_s16_m (z0, p0, 0x7ffd), ++ z0 = svdup_s16_m (z0, p0, 0x7ffd)) ++ ++/* ++** dup_7ffe_s16_m: ++** mov (z[0-9]+\.h), #32766 ++** sel z0\.h, p0, \1, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7ffe_s16_m, svint16_t, ++ z0 = svdup_n_s16_m (z0, p0, 0x7ffe), ++ z0 = svdup_s16_m (z0, p0, 0x7ffe)) ++ ++/* ++** dup_7fff_s16_m: ++** mov (z[0-9]+\.h), #32767 ++** sel z0\.h, p0, \1, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7fff_s16_m, svint16_t, ++ z0 = svdup_n_s16_m (z0, p0, 0x7fff), ++ z0 = svdup_s16_m (z0, p0, 0x7fff)) ++ ++/* ++** dup_m1_s16_m: ++** mov z0\.h, p0/m, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m1_s16_m, svint16_t, ++ z0 = svdup_n_s16_m (z0, p0, -1), ++ z0 = svdup_s16_m (z0, p0, -1)) ++ ++/* ++** dup_m128_s16_m: ++** mov z0\.h, p0/m, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m128_s16_m, svint16_t, ++ z0 = svdup_n_s16_m (z0, p0, -128), ++ z0 = svdup_s16_m (z0, p0, -128)) ++ ++/* ++** dup_m129_s16_m: ++** mov (z[0-9]+\.h), #-129 ++** sel z0\.h, p0, \1, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m129_s16_m, svint16_t, ++ z0 = svdup_n_s16_m (z0, p0, -129), ++ z0 = svdup_s16_m (z0, p0, -129)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m130_s16_m, svint16_t, ++ z0 = svdup_n_s16_m (z0, p0, -130), ++ z0 = svdup_s16_m (z0, p0, -130)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m254_s16_m, svint16_t, ++ z0 = svdup_n_s16_m (z0, p0, -254), ++ z0 = svdup_s16_m (z0, p0, -254)) ++ ++/* ++** dup_m255_s16_m: ++** mov (z[0-9]+\.h), #-255 ++** sel z0\.h, p0, \1, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m255_s16_m, svint16_t, ++ z0 = svdup_n_s16_m (z0, p0, -255), ++ z0 = svdup_s16_m (z0, p0, -255)) ++ ++/* ++** dup_m256_s16_m: ++** mov z0\.h, p0/m, #-256 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m256_s16_m, svint16_t, ++ z0 = svdup_n_s16_m (z0, p0, -256), ++ z0 = svdup_s16_m (z0, p0, -256)) ++ ++/* ++** dup_m257_s16_m: ++** mov (z[0-9]+\.h), #-257 ++** sel z0\.h, p0, \1, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m257_s16_m, svint16_t, ++ z0 = svdup_n_s16_m (z0, p0, -257), ++ z0 = svdup_s16_m (z0, p0, -257)) ++ ++/* ++** dup_m258_s16_m: ++** mov (z[0-9]+)\.b, #-2 ++** sel z0\.h, p0, \1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m258_s16_m, svint16_t, ++ z0 = svdup_n_s16_m (z0, p0, -258), ++ z0 = svdup_s16_m (z0, p0, -258)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m259_s16_m, svint16_t, ++ z0 = svdup_n_s16_m (z0, p0, -259), ++ z0 = svdup_s16_m (z0, p0, -259)) ++ ++/* ++** dup_m512_s16_m: ++** mov z0\.h, p0/m, #-512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m512_s16_m, svint16_t, ++ z0 = svdup_n_s16_m (z0, p0, -512), ++ z0 = svdup_s16_m (z0, p0, -512)) ++ ++/* ++** dup_m7f00_s16_m: ++** mov z0\.h, p0/m, #-32512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f00_s16_m, svint16_t, ++ z0 = svdup_n_s16_m (z0, p0, -0x7f00), ++ z0 = svdup_s16_m (z0, p0, -0x7f00)) ++ ++/* ++** dup_m7f01_s16_m: ++** mov (z[0-9]+\.h), #-32513 ++** sel z0\.h, p0, \1, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f01_s16_m, svint16_t, ++ z0 = svdup_n_s16_m (z0, p0, -0x7f01), ++ z0 = svdup_s16_m (z0, p0, -0x7f01)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m7f02_s16_m, svint16_t, ++ z0 = svdup_n_s16_m (z0, p0, -0x7f02), ++ z0 = svdup_s16_m (z0, p0, -0x7f02)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m7ffe_s16_m, svint16_t, ++ z0 = svdup_n_s16_m (z0, p0, -0x7ffe), ++ z0 = svdup_s16_m (z0, p0, -0x7ffe)) ++ ++/* ++** dup_m7fff_s16_m: ++** mov (z[0-9]+\.h), #-32767 ++** sel z0\.h, p0, \1, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7fff_s16_m, svint16_t, ++ z0 = svdup_n_s16_m (z0, p0, -0x7fff), ++ z0 = svdup_s16_m (z0, p0, -0x7fff)) ++ ++/* ++** dup_m8000_s16_m: ++** mov z0\.h, p0/m, #-32768 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m8000_s16_m, svint16_t, ++ z0 = svdup_n_s16_m (z0, p0, -0x8000), ++ z0 = svdup_s16_m (z0, p0, -0x8000)) ++ ++/* ++** dup_0_s16_m: ++** mov z0\.h, p0/m, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_0_s16_m, svint16_t, ++ z0 = svdup_n_s16_m (z0, p0, 0), ++ z0 = svdup_s16_m (z0, p0, 0)) ++ ++/* ++** dup_w0_s16_m: ++** movprfx z0, z1 ++** mov z0\.h, p0/m, w0 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_w0_s16_m, svint16_t, int16_t, ++ z0 = svdup_n_s16_m (z1, p0, x0), ++ z0 = svdup_s16_m (z1, p0, x0)) ++ ++/* ++** dup_1_s16_z: ++** mov z0\.h, p0/z, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_1_s16_z, svint16_t, ++ z0 = svdup_n_s16_z (p0, 1), ++ z0 = svdup_s16_z (p0, 1)) ++ ++/* ++** dup_127_s16_z: ++** mov z0\.h, p0/z, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_127_s16_z, svint16_t, ++ z0 = svdup_n_s16_z (p0, 127), ++ z0 = svdup_s16_z (p0, 127)) ++ ++/* ++** dup_128_s16_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.h), #128 ++** sel z0\.h, p0, \2, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (dup_128_s16_z, svint16_t, ++ z0 = svdup_n_s16_z (p0, 128), ++ z0 = svdup_s16_z (p0, 128)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_129_s16_z, svint16_t, ++ z0 = svdup_n_s16_z (p0, 129), ++ z0 = svdup_s16_z (p0, 129)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_253_s16_z, svint16_t, ++ z0 = svdup_n_s16_z (p0, 253), ++ z0 = svdup_s16_z (p0, 253)) ++ ++/* ++** dup_254_s16_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.h), #254 ++** sel z0\.h, p0, \2, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (dup_254_s16_z, svint16_t, ++ z0 = svdup_n_s16_z (p0, 254), ++ z0 = svdup_s16_z (p0, 254)) ++ ++/* ++** dup_255_s16_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.h), #255 ++** sel z0\.h, p0, \2, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (dup_255_s16_z, svint16_t, ++ z0 = svdup_n_s16_z (p0, 255), ++ z0 = svdup_s16_z (p0, 255)) ++ ++/* ++** dup_256_s16_z: ++** mov z0\.h, p0/z, #256 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_256_s16_z, svint16_t, ++ z0 = svdup_n_s16_z (p0, 256), ++ z0 = svdup_s16_z (p0, 256)) ++ ++/* ++** dup_257_s16_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+)\.b, #1 ++** sel z0\.h, p0, \2\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (dup_257_s16_z, svint16_t, ++ z0 = svdup_n_s16_z (p0, 257), ++ z0 = svdup_s16_z (p0, 257)) ++ ++/* ++** dup_512_s16_z: ++** mov z0\.h, p0/z, #512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_512_s16_z, svint16_t, ++ z0 = svdup_n_s16_z (p0, 512), ++ z0 = svdup_s16_z (p0, 512)) ++ ++/* ++** dup_7f00_s16_z: ++** mov z0\.h, p0/z, #32512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7f00_s16_z, svint16_t, ++ z0 = svdup_n_s16_z (p0, 0x7f00), ++ z0 = svdup_s16_z (p0, 0x7f00)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_7f01_s16_z, svint16_t, ++ z0 = svdup_n_s16_z (p0, 0x7f01), ++ z0 = svdup_s16_z (p0, 0x7f01)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_7ffd_s16_z, svint16_t, ++ z0 = svdup_n_s16_z (p0, 0x7ffd), ++ z0 = svdup_s16_z (p0, 0x7ffd)) ++ ++/* ++** dup_7ffe_s16_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.h), #32766 ++** sel z0\.h, p0, \2, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7ffe_s16_z, svint16_t, ++ z0 = svdup_n_s16_z (p0, 0x7ffe), ++ z0 = svdup_s16_z (p0, 0x7ffe)) ++ ++/* ++** dup_7fff_s16_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.h), #32767 ++** sel z0\.h, p0, \2, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7fff_s16_z, svint16_t, ++ z0 = svdup_n_s16_z (p0, 0x7fff), ++ z0 = svdup_s16_z (p0, 0x7fff)) ++ ++/* ++** dup_m1_s16_z: ++** mov z0\.h, p0/z, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m1_s16_z, svint16_t, ++ z0 = svdup_n_s16_z (p0, -1), ++ z0 = svdup_s16_z (p0, -1)) ++ ++/* ++** dup_m128_s16_z: ++** mov z0\.h, p0/z, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m128_s16_z, svint16_t, ++ z0 = svdup_n_s16_z (p0, -128), ++ z0 = svdup_s16_z (p0, -128)) ++ ++/* ++** dup_m129_s16_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.h), #-129 ++** sel z0\.h, p0, \2, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m129_s16_z, svint16_t, ++ z0 = svdup_n_s16_z (p0, -129), ++ z0 = svdup_s16_z (p0, -129)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m130_s16_z, svint16_t, ++ z0 = svdup_n_s16_z (p0, -130), ++ z0 = svdup_s16_z (p0, -130)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m254_s16_z, svint16_t, ++ z0 = svdup_n_s16_z (p0, -254), ++ z0 = svdup_s16_z (p0, -254)) ++ ++/* ++** dup_m255_s16_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.h), #-255 ++** sel z0\.h, p0, \2, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m255_s16_z, svint16_t, ++ z0 = svdup_n_s16_z (p0, -255), ++ z0 = svdup_s16_z (p0, -255)) ++ ++/* ++** dup_m256_s16_z: ++** mov z0\.h, p0/z, #-256 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m256_s16_z, svint16_t, ++ z0 = svdup_n_s16_z (p0, -256), ++ z0 = svdup_s16_z (p0, -256)) ++ ++/* ++** dup_m257_s16_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.h), #-257 ++** sel z0\.h, p0, \2, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m257_s16_z, svint16_t, ++ z0 = svdup_n_s16_z (p0, -257), ++ z0 = svdup_s16_z (p0, -257)) ++ ++/* ++** dup_m258_s16_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+)\.b, #-2 ++** sel z0\.h, p0, \2\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m258_s16_z, svint16_t, ++ z0 = svdup_n_s16_z (p0, -258), ++ z0 = svdup_s16_z (p0, -258)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m259_s16_z, svint16_t, ++ z0 = svdup_n_s16_z (p0, -259), ++ z0 = svdup_s16_z (p0, -259)) ++ ++/* ++** dup_m512_s16_z: ++** mov z0\.h, p0/z, #-512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m512_s16_z, svint16_t, ++ z0 = svdup_n_s16_z (p0, -512), ++ z0 = svdup_s16_z (p0, -512)) ++ ++/* ++** dup_m7f00_s16_z: ++** mov z0\.h, p0/z, #-32512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f00_s16_z, svint16_t, ++ z0 = svdup_n_s16_z (p0, -0x7f00), ++ z0 = svdup_s16_z (p0, -0x7f00)) ++ ++/* ++** dup_m7f01_s16_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.h), #-32513 ++** sel z0\.h, p0, \2, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f01_s16_z, svint16_t, ++ z0 = svdup_n_s16_z (p0, -0x7f01), ++ z0 = svdup_s16_z (p0, -0x7f01)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m7f02_s16_z, svint16_t, ++ z0 = svdup_n_s16_z (p0, -0x7f02), ++ z0 = svdup_s16_z (p0, -0x7f02)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m7ffe_s16_z, svint16_t, ++ z0 = svdup_n_s16_z (p0, -0x7ffe), ++ z0 = svdup_s16_z (p0, -0x7ffe)) ++ ++/* ++** dup_m7fff_s16_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.h), #-32767 ++** sel z0\.h, p0, \2, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7fff_s16_z, svint16_t, ++ z0 = svdup_n_s16_z (p0, -0x7fff), ++ z0 = svdup_s16_z (p0, -0x7fff)) ++ ++/* ++** dup_m8000_s16_z: ++** mov z0\.h, p0/z, #-32768 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m8000_s16_z, svint16_t, ++ z0 = svdup_n_s16_z (p0, -0x8000), ++ z0 = svdup_s16_z (p0, -0x8000)) ++ ++/* ++** dup_0_s16_z: ++** mov z0\.h, p0/z, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_0_s16_z, svint16_t, ++ z0 = svdup_n_s16_z (p0, 0), ++ z0 = svdup_s16_z (p0, 0)) ++ ++/* ++** dup_w0_s16_z: ++** movprfx z0\.h, p0/z, z0\.h ++** mov z0\.h, p0/m, w0 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_w0_s16_z, svint16_t, int16_t, ++ z0 = svdup_n_s16_z (p0, x0), ++ z0 = svdup_s16_z (p0, x0)) ++ ++/* ++** dup_1_s16_x: ++** mov z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_1_s16_x, svint16_t, ++ z0 = svdup_n_s16_x (p0, 1), ++ z0 = svdup_s16_x (p0, 1)) ++ ++/* ++** dup_127_s16_x: ++** mov z0\.h, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_127_s16_x, svint16_t, ++ z0 = svdup_n_s16_x (p0, 127), ++ z0 = svdup_s16_x (p0, 127)) ++ ++/* ++** dup_128_s16_x: ++** mov z0\.h, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_128_s16_x, svint16_t, ++ z0 = svdup_n_s16_x (p0, 128), ++ z0 = svdup_s16_x (p0, 128)) ++ ++/* ++** dup_129_s16_x: ++** movi v([0-9]+)\.8h, 0x81 ++** dup z0\.q, z\1\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_129_s16_x, svint16_t, ++ z0 = svdup_n_s16_x (p0, 129), ++ z0 = svdup_s16_x (p0, 129)) ++ ++/* ++** dup_253_s16_x: ++** movi v([0-9]+)\.8h, 0xfd ++** dup z0\.q, z\1\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_253_s16_x, svint16_t, ++ z0 = svdup_n_s16_x (p0, 253), ++ z0 = svdup_s16_x (p0, 253)) ++ ++/* ++** dup_254_s16_x: ++** mov z0\.h, #254 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_254_s16_x, svint16_t, ++ z0 = svdup_n_s16_x (p0, 254), ++ z0 = svdup_s16_x (p0, 254)) ++ ++/* ++** dup_255_s16_x: ++** mov z0\.h, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_255_s16_x, svint16_t, ++ z0 = svdup_n_s16_x (p0, 255), ++ z0 = svdup_s16_x (p0, 255)) ++ ++/* ++** dup_256_s16_x: ++** mov z0\.h, #256 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_256_s16_x, svint16_t, ++ z0 = svdup_n_s16_x (p0, 256), ++ z0 = svdup_s16_x (p0, 256)) ++ ++/* ++** dup_257_s16_x: ++** mov z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_257_s16_x, svint16_t, ++ z0 = svdup_n_s16_x (p0, 257), ++ z0 = svdup_s16_x (p0, 257)) ++ ++/* ++** dup_512_s16_x: ++** mov z0\.h, #512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_512_s16_x, svint16_t, ++ z0 = svdup_n_s16_x (p0, 512), ++ z0 = svdup_s16_x (p0, 512)) ++ ++/* ++** dup_7f00_s16_x: ++** mov z0\.h, #32512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7f00_s16_x, svint16_t, ++ z0 = svdup_n_s16_x (p0, 0x7f00), ++ z0 = svdup_s16_x (p0, 0x7f00)) ++ ++/* ++** dup_7f01_s16_x: ++** mov (w[0-9]+), 32513 ++** mov z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7f01_s16_x, svint16_t, ++ z0 = svdup_n_s16_x (p0, 0x7f01), ++ z0 = svdup_s16_x (p0, 0x7f01)) ++ ++/* ++** dup_7ffd_s16_x: ++** mov (w[0-9]+), 32765 ++** mov z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7ffd_s16_x, svint16_t, ++ z0 = svdup_n_s16_x (p0, 0x7ffd), ++ z0 = svdup_s16_x (p0, 0x7ffd)) ++ ++/* ++** dup_7ffe_s16_x: ++** mov z0\.h, #32766 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7ffe_s16_x, svint16_t, ++ z0 = svdup_n_s16_x (p0, 0x7ffe), ++ z0 = svdup_s16_x (p0, 0x7ffe)) ++ ++/* ++** dup_7fff_s16_x: ++** mov z0\.h, #32767 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7fff_s16_x, svint16_t, ++ z0 = svdup_n_s16_x (p0, 0x7fff), ++ z0 = svdup_s16_x (p0, 0x7fff)) ++ ++/* ++** dup_m1_s16_x: ++** mov z0\.b, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m1_s16_x, svint16_t, ++ z0 = svdup_n_s16_x (p0, -1), ++ z0 = svdup_s16_x (p0, -1)) ++ ++/* ++** dup_m128_s16_x: ++** mov z0\.h, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m128_s16_x, svint16_t, ++ z0 = svdup_n_s16_x (p0, -128), ++ z0 = svdup_s16_x (p0, -128)) ++ ++/* ++** dup_m129_s16_x: ++** mov z0\.h, #-129 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m129_s16_x, svint16_t, ++ z0 = svdup_n_s16_x (p0, -129), ++ z0 = svdup_s16_x (p0, -129)) ++ ++/* ++** dup_m130_s16_x: ++** mvni v([0-9]+)\.8h, 0x81 ++** dup z0\.q, z\1\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m130_s16_x, svint16_t, ++ z0 = svdup_n_s16_x (p0, -130), ++ z0 = svdup_s16_x (p0, -130)) ++ ++/* ++** dup_m254_s16_x: ++** mvni v([0-9]+)\.8h, 0xfd ++** dup z0\.q, z\1\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m254_s16_x, svint16_t, ++ z0 = svdup_n_s16_x (p0, -254), ++ z0 = svdup_s16_x (p0, -254)) ++ ++/* ++** dup_m255_s16_x: ++** mov z0\.h, #-255 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m255_s16_x, svint16_t, ++ z0 = svdup_n_s16_x (p0, -255), ++ z0 = svdup_s16_x (p0, -255)) ++ ++/* ++** dup_m256_s16_x: ++** mov z0\.h, #-256 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m256_s16_x, svint16_t, ++ z0 = svdup_n_s16_x (p0, -256), ++ z0 = svdup_s16_x (p0, -256)) ++ ++/* ++** dup_m257_s16_x: ++** mov z0\.h, #-257 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m257_s16_x, svint16_t, ++ z0 = svdup_n_s16_x (p0, -257), ++ z0 = svdup_s16_x (p0, -257)) ++ ++/* ++** dup_m258_s16_x: ++** mov z0\.b, #-2 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m258_s16_x, svint16_t, ++ z0 = svdup_n_s16_x (p0, -258), ++ z0 = svdup_s16_x (p0, -258)) ++ ++/* ++** dup_m259_s16_x: ++** mov (w[0-9]+), -259 ++** mov z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m259_s16_x, svint16_t, ++ z0 = svdup_n_s16_x (p0, -259), ++ z0 = svdup_s16_x (p0, -259)) ++ ++/* ++** dup_m512_s16_x: ++** mov z0\.h, #-512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m512_s16_x, svint16_t, ++ z0 = svdup_n_s16_x (p0, -512), ++ z0 = svdup_s16_x (p0, -512)) ++ ++/* ++** dup_m7f00_s16_x: ++** mov z0\.h, #-32512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f00_s16_x, svint16_t, ++ z0 = svdup_n_s16_x (p0, -0x7f00), ++ z0 = svdup_s16_x (p0, -0x7f00)) ++ ++/* ++** dup_m7f01_s16_x: ++** mov z0\.h, #-32513 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f01_s16_x, svint16_t, ++ z0 = svdup_n_s16_x (p0, -0x7f01), ++ z0 = svdup_s16_x (p0, -0x7f01)) ++ ++/* ++** dup_m7f02_s16_x: ++** mov (w[0-9]+), -32514 ++** mov z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f02_s16_x, svint16_t, ++ z0 = svdup_n_s16_x (p0, -0x7f02), ++ z0 = svdup_s16_x (p0, -0x7f02)) ++ ++/* ++** dup_m7ffe_s16_x: ++** mov (w[0-9]+), -32766 ++** mov z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7ffe_s16_x, svint16_t, ++ z0 = svdup_n_s16_x (p0, -0x7ffe), ++ z0 = svdup_s16_x (p0, -0x7ffe)) ++ ++/* ++** dup_m7fff_s16_x: ++** mov z0\.h, #-32767 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7fff_s16_x, svint16_t, ++ z0 = svdup_n_s16_x (p0, -0x7fff), ++ z0 = svdup_s16_x (p0, -0x7fff)) ++ ++/* ++** dup_m8000_s16_x: ++** mov z0\.h, #-32768 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m8000_s16_x, svint16_t, ++ z0 = svdup_n_s16_x (p0, -0x8000), ++ z0 = svdup_s16_x (p0, -0x8000)) ++ ++/* ++** dup_w0_s16_x: ++** mov z0\.h, w0 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_w0_s16_x, svint16_t, int16_t, ++ z0 = svdup_n_s16_x (p0, x0), ++ z0 = svdup_s16_x (p0, x0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s32.c +new file mode 100644 +index 000000000..0b396dbeb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s32.c +@@ -0,0 +1,1175 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dup_1_s32: ++** mov z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_1_s32, svint32_t, ++ z0 = svdup_n_s32 (1), ++ z0 = svdup_s32 (1)) ++ ++/* ++** dup_127_s32: ++** mov z0\.s, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_127_s32, svint32_t, ++ z0 = svdup_n_s32 (127), ++ z0 = svdup_s32 (127)) ++ ++/* ++** dup_128_s32: ++** mov z0\.s, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_128_s32, svint32_t, ++ z0 = svdup_n_s32 (128), ++ z0 = svdup_s32 (128)) ++ ++/* ++** dup_129_s32: ++** movi v([0-9]+)\.4s, 0x81 ++** dup z0\.q, z\1\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_129_s32, svint32_t, ++ z0 = svdup_n_s32 (129), ++ z0 = svdup_s32 (129)) ++ ++/* ++** dup_253_s32: ++** movi v([0-9]+)\.4s, 0xfd ++** dup z0\.q, z\1\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_253_s32, svint32_t, ++ z0 = svdup_n_s32 (253), ++ z0 = svdup_s32 (253)) ++ ++/* ++** dup_254_s32: ++** mov z0\.s, #254 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_254_s32, svint32_t, ++ z0 = svdup_n_s32 (254), ++ z0 = svdup_s32 (254)) ++ ++/* ++** dup_255_s32: ++** mov z0\.s, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_255_s32, svint32_t, ++ z0 = svdup_n_s32 (255), ++ z0 = svdup_s32 (255)) ++ ++/* ++** dup_256_s32: ++** mov z0\.s, #256 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_256_s32, svint32_t, ++ z0 = svdup_n_s32 (256), ++ z0 = svdup_s32 (256)) ++ ++/* ++** dup_257_s32: ++** mov (w[0-9]+), 257 ++** mov z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_257_s32, svint32_t, ++ z0 = svdup_n_s32 (257), ++ z0 = svdup_s32 (257)) ++ ++/* ++** dup_512_s32: ++** mov z0\.s, #512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_512_s32, svint32_t, ++ z0 = svdup_n_s32 (512), ++ z0 = svdup_s32 (512)) ++ ++/* ++** dup_7f00_s32: ++** mov z0\.s, #32512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7f00_s32, svint32_t, ++ z0 = svdup_n_s32 (0x7f00), ++ z0 = svdup_s32 (0x7f00)) ++ ++/* ++** dup_7f01_s32: ++** mov (w[0-9]+), 32513 ++** mov z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7f01_s32, svint32_t, ++ z0 = svdup_n_s32 (0x7f01), ++ z0 = svdup_s32 (0x7f01)) ++ ++/* ++** dup_7ffd_s32: ++** mov (w[0-9]+), 32765 ++** mov z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7ffd_s32, svint32_t, ++ z0 = svdup_n_s32 (0x7ffd), ++ z0 = svdup_s32 (0x7ffd)) ++ ++/* ++** dup_7ffe_s32: ++** mov z0\.s, #32766 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7ffe_s32, svint32_t, ++ z0 = svdup_n_s32 (0x7ffe), ++ z0 = svdup_s32 (0x7ffe)) ++ ++/* ++** dup_7fff_s32: ++** mov z0\.s, #32767 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7fff_s32, svint32_t, ++ z0 = svdup_n_s32 (0x7fff), ++ z0 = svdup_s32 (0x7fff)) ++ ++/* ++** dup_m1_s32: ++** mov z0\.b, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m1_s32, svint32_t, ++ z0 = svdup_n_s32 (-1), ++ z0 = svdup_s32 (-1)) ++ ++/* ++** dup_m128_s32: ++** mov z0\.s, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m128_s32, svint32_t, ++ z0 = svdup_n_s32 (-128), ++ z0 = svdup_s32 (-128)) ++ ++/* ++** dup_m129_s32: ++** mov z0\.s, #-129 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m129_s32, svint32_t, ++ z0 = svdup_n_s32 (-129), ++ z0 = svdup_s32 (-129)) ++ ++/* ++** dup_m130_s32: ++** mvni v([0-9]+)\.4s, 0x81 ++** dup z0\.q, z\1\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m130_s32, svint32_t, ++ z0 = svdup_n_s32 (-130), ++ z0 = svdup_s32 (-130)) ++ ++/* ++** dup_m254_s32: ++** mvni v([0-9]+)\.4s, 0xfd ++** dup z0\.q, z\1\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m254_s32, svint32_t, ++ z0 = svdup_n_s32 (-254), ++ z0 = svdup_s32 (-254)) ++ ++/* ++** dup_m255_s32: ++** mov z0\.s, #-255 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m255_s32, svint32_t, ++ z0 = svdup_n_s32 (-255), ++ z0 = svdup_s32 (-255)) ++ ++/* ++** dup_m256_s32: ++** mov z0\.s, #-256 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m256_s32, svint32_t, ++ z0 = svdup_n_s32 (-256), ++ z0 = svdup_s32 (-256)) ++ ++/* ++** dup_m257_s32: ++** mov z0\.s, #-257 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m257_s32, svint32_t, ++ z0 = svdup_n_s32 (-257), ++ z0 = svdup_s32 (-257)) ++ ++/* ++** dup_m258_s32: ++** mov (w[0-9]+), -258 ++** mov z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m258_s32, svint32_t, ++ z0 = svdup_n_s32 (-258), ++ z0 = svdup_s32 (-258)) ++ ++/* ++** dup_m259_s32: ++** mov (w[0-9]+), -259 ++** mov z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m259_s32, svint32_t, ++ z0 = svdup_n_s32 (-259), ++ z0 = svdup_s32 (-259)) ++ ++/* ++** dup_m512_s32: ++** mov z0\.s, #-512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m512_s32, svint32_t, ++ z0 = svdup_n_s32 (-512), ++ z0 = svdup_s32 (-512)) ++ ++/* ++** dup_m7f00_s32: ++** mov z0\.s, #-32512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f00_s32, svint32_t, ++ z0 = svdup_n_s32 (-0x7f00), ++ z0 = svdup_s32 (-0x7f00)) ++ ++/* ++** dup_m7f01_s32: ++** mov z0\.s, #-32513 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f01_s32, svint32_t, ++ z0 = svdup_n_s32 (-0x7f01), ++ z0 = svdup_s32 (-0x7f01)) ++ ++/* ++** dup_m7f02_s32: ++** mov (w[0-9]+), -32514 ++** mov z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f02_s32, svint32_t, ++ z0 = svdup_n_s32 (-0x7f02), ++ z0 = svdup_s32 (-0x7f02)) ++ ++/* ++** dup_m7ffe_s32: ++** mov (w[0-9]+), -32766 ++** mov z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7ffe_s32, svint32_t, ++ z0 = svdup_n_s32 (-0x7ffe), ++ z0 = svdup_s32 (-0x7ffe)) ++ ++/* ++** dup_m7fff_s32: ++** mov z0\.s, #-32767 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7fff_s32, svint32_t, ++ z0 = svdup_n_s32 (-0x7fff), ++ z0 = svdup_s32 (-0x7fff)) ++ ++/* ++** dup_m8000_s32: ++** mov z0\.s, #-32768 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m8000_s32, svint32_t, ++ z0 = svdup_n_s32 (-0x8000), ++ z0 = svdup_s32 (-0x8000)) ++ ++/* ++** dup_w0_s32: ++** mov z0\.s, w0 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_w0_s32, svint32_t, int32_t, ++ z0 = svdup_n_s32 (x0), ++ z0 = svdup_s32 (x0)) ++ ++/* ++** dup_1_s32_m: ++** mov z0\.s, p0/m, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_1_s32_m, svint32_t, ++ z0 = svdup_n_s32_m (z0, p0, 1), ++ z0 = svdup_s32_m (z0, p0, 1)) ++ ++/* ++** dup_127_s32_m: ++** mov z0\.s, p0/m, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_127_s32_m, svint32_t, ++ z0 = svdup_n_s32_m (z0, p0, 127), ++ z0 = svdup_s32_m (z0, p0, 127)) ++ ++/* ++** dup_128_s32_m: ++** mov (z[0-9]+\.s), #128 ++** sel z0\.s, p0, \1, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (dup_128_s32_m, svint32_t, ++ z0 = svdup_n_s32_m (z0, p0, 128), ++ z0 = svdup_s32_m (z0, p0, 128)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_129_s32_m, svint32_t, ++ z0 = svdup_n_s32_m (z0, p0, 129), ++ z0 = svdup_s32_m (z0, p0, 129)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_253_s32_m, svint32_t, ++ z0 = svdup_n_s32_m (z0, p0, 253), ++ z0 = svdup_s32_m (z0, p0, 253)) ++ ++/* ++** dup_254_s32_m: ++** mov (z[0-9]+\.s), #254 ++** sel z0\.s, p0, \1, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (dup_254_s32_m, svint32_t, ++ z0 = svdup_n_s32_m (z0, p0, 254), ++ z0 = svdup_s32_m (z0, p0, 254)) ++ ++/* ++** dup_255_s32_m: ++** mov (z[0-9]+\.s), #255 ++** sel z0\.s, p0, \1, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (dup_255_s32_m, svint32_t, ++ z0 = svdup_n_s32_m (z0, p0, 255), ++ z0 = svdup_s32_m (z0, p0, 255)) ++ ++/* ++** dup_256_s32_m: ++** mov z0\.s, p0/m, #256 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_256_s32_m, svint32_t, ++ z0 = svdup_n_s32_m (z0, p0, 256), ++ z0 = svdup_s32_m (z0, p0, 256)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_257_s32_m, svint32_t, ++ z0 = svdup_n_s32_m (z0, p0, 257), ++ z0 = svdup_s32_m (z0, p0, 257)) ++ ++/* ++** dup_512_s32_m: ++** mov z0\.s, p0/m, #512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_512_s32_m, svint32_t, ++ z0 = svdup_n_s32_m (z0, p0, 512), ++ z0 = svdup_s32_m (z0, p0, 512)) ++ ++/* ++** dup_7f00_s32_m: ++** mov z0\.s, p0/m, #32512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7f00_s32_m, svint32_t, ++ z0 = svdup_n_s32_m (z0, p0, 0x7f00), ++ z0 = svdup_s32_m (z0, p0, 0x7f00)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_7f01_s32_m, svint32_t, ++ z0 = svdup_n_s32_m (z0, p0, 0x7f01), ++ z0 = svdup_s32_m (z0, p0, 0x7f01)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_7ffd_s32_m, svint32_t, ++ z0 = svdup_n_s32_m (z0, p0, 0x7ffd), ++ z0 = svdup_s32_m (z0, p0, 0x7ffd)) ++ ++/* ++** dup_7ffe_s32_m: ++** mov (z[0-9]+\.s), #32766 ++** sel z0\.s, p0, \1, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7ffe_s32_m, svint32_t, ++ z0 = svdup_n_s32_m (z0, p0, 0x7ffe), ++ z0 = svdup_s32_m (z0, p0, 0x7ffe)) ++ ++/* ++** dup_7fff_s32_m: ++** mov (z[0-9]+\.s), #32767 ++** sel z0\.s, p0, \1, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7fff_s32_m, svint32_t, ++ z0 = svdup_n_s32_m (z0, p0, 0x7fff), ++ z0 = svdup_s32_m (z0, p0, 0x7fff)) ++ ++/* ++** dup_m1_s32_m: ++** mov z0\.s, p0/m, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m1_s32_m, svint32_t, ++ z0 = svdup_n_s32_m (z0, p0, -1), ++ z0 = svdup_s32_m (z0, p0, -1)) ++ ++/* ++** dup_m128_s32_m: ++** mov z0\.s, p0/m, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m128_s32_m, svint32_t, ++ z0 = svdup_n_s32_m (z0, p0, -128), ++ z0 = svdup_s32_m (z0, p0, -128)) ++ ++/* ++** dup_m129_s32_m: ++** mov (z[0-9]+\.s), #-129 ++** sel z0\.s, p0, \1, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m129_s32_m, svint32_t, ++ z0 = svdup_n_s32_m (z0, p0, -129), ++ z0 = svdup_s32_m (z0, p0, -129)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m130_s32_m, svint32_t, ++ z0 = svdup_n_s32_m (z0, p0, -130), ++ z0 = svdup_s32_m (z0, p0, -130)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m254_s32_m, svint32_t, ++ z0 = svdup_n_s32_m (z0, p0, -254), ++ z0 = svdup_s32_m (z0, p0, -254)) ++ ++/* ++** dup_m255_s32_m: ++** mov (z[0-9]+\.s), #-255 ++** sel z0\.s, p0, \1, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m255_s32_m, svint32_t, ++ z0 = svdup_n_s32_m (z0, p0, -255), ++ z0 = svdup_s32_m (z0, p0, -255)) ++ ++/* ++** dup_m256_s32_m: ++** mov z0\.s, p0/m, #-256 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m256_s32_m, svint32_t, ++ z0 = svdup_n_s32_m (z0, p0, -256), ++ z0 = svdup_s32_m (z0, p0, -256)) ++ ++/* ++** dup_m257_s32_m: ++** mov (z[0-9]+\.s), #-257 ++** sel z0\.s, p0, \1, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m257_s32_m, svint32_t, ++ z0 = svdup_n_s32_m (z0, p0, -257), ++ z0 = svdup_s32_m (z0, p0, -257)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m258_s32_m, svint32_t, ++ z0 = svdup_n_s32_m (z0, p0, -258), ++ z0 = svdup_s32_m (z0, p0, -258)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m259_s32_m, svint32_t, ++ z0 = svdup_n_s32_m (z0, p0, -259), ++ z0 = svdup_s32_m (z0, p0, -259)) ++ ++/* ++** dup_m512_s32_m: ++** mov z0\.s, p0/m, #-512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m512_s32_m, svint32_t, ++ z0 = svdup_n_s32_m (z0, p0, -512), ++ z0 = svdup_s32_m (z0, p0, -512)) ++ ++/* ++** dup_m7f00_s32_m: ++** mov z0\.s, p0/m, #-32512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f00_s32_m, svint32_t, ++ z0 = svdup_n_s32_m (z0, p0, -0x7f00), ++ z0 = svdup_s32_m (z0, p0, -0x7f00)) ++ ++/* ++** dup_m7f01_s32_m: ++** mov (z[0-9]+\.s), #-32513 ++** sel z0\.s, p0, \1, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f01_s32_m, svint32_t, ++ z0 = svdup_n_s32_m (z0, p0, -0x7f01), ++ z0 = svdup_s32_m (z0, p0, -0x7f01)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m7f02_s32_m, svint32_t, ++ z0 = svdup_n_s32_m (z0, p0, -0x7f02), ++ z0 = svdup_s32_m (z0, p0, -0x7f02)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m7ffe_s32_m, svint32_t, ++ z0 = svdup_n_s32_m (z0, p0, -0x7ffe), ++ z0 = svdup_s32_m (z0, p0, -0x7ffe)) ++ ++/* ++** dup_m7fff_s32_m: ++** mov (z[0-9]+\.s), #-32767 ++** sel z0\.s, p0, \1, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7fff_s32_m, svint32_t, ++ z0 = svdup_n_s32_m (z0, p0, -0x7fff), ++ z0 = svdup_s32_m (z0, p0, -0x7fff)) ++ ++/* ++** dup_m8000_s32_m: ++** mov z0\.s, p0/m, #-32768 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m8000_s32_m, svint32_t, ++ z0 = svdup_n_s32_m (z0, p0, -0x8000), ++ z0 = svdup_s32_m (z0, p0, -0x8000)) ++ ++/* ++** dup_0_s32_m: ++** mov z0\.s, p0/m, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_0_s32_m, svint32_t, ++ z0 = svdup_n_s32_m (z0, p0, 0), ++ z0 = svdup_s32_m (z0, p0, 0)) ++ ++/* ++** dup_w0_s32_m: ++** movprfx z0, z1 ++** mov z0\.s, p0/m, w0 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_w0_s32_m, svint32_t, int32_t, ++ z0 = svdup_n_s32_m (z1, p0, x0), ++ z0 = svdup_s32_m (z1, p0, x0)) ++ ++/* ++** dup_1_s32_z: ++** mov z0\.s, p0/z, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_1_s32_z, svint32_t, ++ z0 = svdup_n_s32_z (p0, 1), ++ z0 = svdup_s32_z (p0, 1)) ++ ++/* ++** dup_127_s32_z: ++** mov z0\.s, p0/z, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_127_s32_z, svint32_t, ++ z0 = svdup_n_s32_z (p0, 127), ++ z0 = svdup_s32_z (p0, 127)) ++ ++/* ++** dup_128_s32_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.s), #128 ++** sel z0\.s, p0, \2, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (dup_128_s32_z, svint32_t, ++ z0 = svdup_n_s32_z (p0, 128), ++ z0 = svdup_s32_z (p0, 128)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_129_s32_z, svint32_t, ++ z0 = svdup_n_s32_z (p0, 129), ++ z0 = svdup_s32_z (p0, 129)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_253_s32_z, svint32_t, ++ z0 = svdup_n_s32_z (p0, 253), ++ z0 = svdup_s32_z (p0, 253)) ++ ++/* ++** dup_254_s32_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.s), #254 ++** sel z0\.s, p0, \2, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (dup_254_s32_z, svint32_t, ++ z0 = svdup_n_s32_z (p0, 254), ++ z0 = svdup_s32_z (p0, 254)) ++ ++/* ++** dup_255_s32_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.s), #255 ++** sel z0\.s, p0, \2, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (dup_255_s32_z, svint32_t, ++ z0 = svdup_n_s32_z (p0, 255), ++ z0 = svdup_s32_z (p0, 255)) ++ ++/* ++** dup_256_s32_z: ++** mov z0\.s, p0/z, #256 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_256_s32_z, svint32_t, ++ z0 = svdup_n_s32_z (p0, 256), ++ z0 = svdup_s32_z (p0, 256)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_257_s32_z, svint32_t, ++ z0 = svdup_n_s32_z (p0, 257), ++ z0 = svdup_s32_z (p0, 257)) ++ ++/* ++** dup_512_s32_z: ++** mov z0\.s, p0/z, #512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_512_s32_z, svint32_t, ++ z0 = svdup_n_s32_z (p0, 512), ++ z0 = svdup_s32_z (p0, 512)) ++ ++/* ++** dup_7f00_s32_z: ++** mov z0\.s, p0/z, #32512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7f00_s32_z, svint32_t, ++ z0 = svdup_n_s32_z (p0, 0x7f00), ++ z0 = svdup_s32_z (p0, 0x7f00)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_7f01_s32_z, svint32_t, ++ z0 = svdup_n_s32_z (p0, 0x7f01), ++ z0 = svdup_s32_z (p0, 0x7f01)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_7ffd_s32_z, svint32_t, ++ z0 = svdup_n_s32_z (p0, 0x7ffd), ++ z0 = svdup_s32_z (p0, 0x7ffd)) ++ ++/* ++** dup_7ffe_s32_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.s), #32766 ++** sel z0\.s, p0, \2, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7ffe_s32_z, svint32_t, ++ z0 = svdup_n_s32_z (p0, 0x7ffe), ++ z0 = svdup_s32_z (p0, 0x7ffe)) ++ ++/* ++** dup_7fff_s32_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.s), #32767 ++** sel z0\.s, p0, \2, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7fff_s32_z, svint32_t, ++ z0 = svdup_n_s32_z (p0, 0x7fff), ++ z0 = svdup_s32_z (p0, 0x7fff)) ++ ++/* ++** dup_m1_s32_z: ++** mov z0\.s, p0/z, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m1_s32_z, svint32_t, ++ z0 = svdup_n_s32_z (p0, -1), ++ z0 = svdup_s32_z (p0, -1)) ++ ++/* ++** dup_m128_s32_z: ++** mov z0\.s, p0/z, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m128_s32_z, svint32_t, ++ z0 = svdup_n_s32_z (p0, -128), ++ z0 = svdup_s32_z (p0, -128)) ++ ++/* ++** dup_m129_s32_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.s), #-129 ++** sel z0\.s, p0, \2, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m129_s32_z, svint32_t, ++ z0 = svdup_n_s32_z (p0, -129), ++ z0 = svdup_s32_z (p0, -129)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m130_s32_z, svint32_t, ++ z0 = svdup_n_s32_z (p0, -130), ++ z0 = svdup_s32_z (p0, -130)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m254_s32_z, svint32_t, ++ z0 = svdup_n_s32_z (p0, -254), ++ z0 = svdup_s32_z (p0, -254)) ++ ++/* ++** dup_m255_s32_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.s), #-255 ++** sel z0\.s, p0, \2, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m255_s32_z, svint32_t, ++ z0 = svdup_n_s32_z (p0, -255), ++ z0 = svdup_s32_z (p0, -255)) ++ ++/* ++** dup_m256_s32_z: ++** mov z0\.s, p0/z, #-256 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m256_s32_z, svint32_t, ++ z0 = svdup_n_s32_z (p0, -256), ++ z0 = svdup_s32_z (p0, -256)) ++ ++/* ++** dup_m257_s32_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.s), #-257 ++** sel z0\.s, p0, \2, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m257_s32_z, svint32_t, ++ z0 = svdup_n_s32_z (p0, -257), ++ z0 = svdup_s32_z (p0, -257)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m258_s32_z, svint32_t, ++ z0 = svdup_n_s32_z (p0, -258), ++ z0 = svdup_s32_z (p0, -258)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m259_s32_z, svint32_t, ++ z0 = svdup_n_s32_z (p0, -259), ++ z0 = svdup_s32_z (p0, -259)) ++ ++/* ++** dup_m512_s32_z: ++** mov z0\.s, p0/z, #-512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m512_s32_z, svint32_t, ++ z0 = svdup_n_s32_z (p0, -512), ++ z0 = svdup_s32_z (p0, -512)) ++ ++/* ++** dup_m7f00_s32_z: ++** mov z0\.s, p0/z, #-32512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f00_s32_z, svint32_t, ++ z0 = svdup_n_s32_z (p0, -0x7f00), ++ z0 = svdup_s32_z (p0, -0x7f00)) ++ ++/* ++** dup_m7f01_s32_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.s), #-32513 ++** sel z0\.s, p0, \2, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f01_s32_z, svint32_t, ++ z0 = svdup_n_s32_z (p0, -0x7f01), ++ z0 = svdup_s32_z (p0, -0x7f01)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m7f02_s32_z, svint32_t, ++ z0 = svdup_n_s32_z (p0, -0x7f02), ++ z0 = svdup_s32_z (p0, -0x7f02)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m7ffe_s32_z, svint32_t, ++ z0 = svdup_n_s32_z (p0, -0x7ffe), ++ z0 = svdup_s32_z (p0, -0x7ffe)) ++ ++/* ++** dup_m7fff_s32_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.s), #-32767 ++** sel z0\.s, p0, \2, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7fff_s32_z, svint32_t, ++ z0 = svdup_n_s32_z (p0, -0x7fff), ++ z0 = svdup_s32_z (p0, -0x7fff)) ++ ++/* ++** dup_m8000_s32_z: ++** mov z0\.s, p0/z, #-32768 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m8000_s32_z, svint32_t, ++ z0 = svdup_n_s32_z (p0, -0x8000), ++ z0 = svdup_s32_z (p0, -0x8000)) ++ ++/* ++** dup_0_s32_z: ++** mov z0\.s, p0/z, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_0_s32_z, svint32_t, ++ z0 = svdup_n_s32_z (p0, 0), ++ z0 = svdup_s32_z (p0, 0)) ++ ++/* ++** dup_w0_s32_z: ++** movprfx z0\.s, p0/z, z0\.s ++** mov z0\.s, p0/m, w0 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_w0_s32_z, svint32_t, int32_t, ++ z0 = svdup_n_s32_z (p0, x0), ++ z0 = svdup_s32_z (p0, x0)) ++ ++/* ++** dup_1_s32_x: ++** mov z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_1_s32_x, svint32_t, ++ z0 = svdup_n_s32_x (p0, 1), ++ z0 = svdup_s32_x (p0, 1)) ++ ++/* ++** dup_127_s32_x: ++** mov z0\.s, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_127_s32_x, svint32_t, ++ z0 = svdup_n_s32_x (p0, 127), ++ z0 = svdup_s32_x (p0, 127)) ++ ++/* ++** dup_128_s32_x: ++** mov z0\.s, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_128_s32_x, svint32_t, ++ z0 = svdup_n_s32_x (p0, 128), ++ z0 = svdup_s32_x (p0, 128)) ++ ++/* ++** dup_129_s32_x: ++** movi v([0-9]+)\.4s, 0x81 ++** dup z0\.q, z\1\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_129_s32_x, svint32_t, ++ z0 = svdup_n_s32_x (p0, 129), ++ z0 = svdup_s32_x (p0, 129)) ++ ++/* ++** dup_253_s32_x: ++** movi v([0-9]+)\.4s, 0xfd ++** dup z0\.q, z\1\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_253_s32_x, svint32_t, ++ z0 = svdup_n_s32_x (p0, 253), ++ z0 = svdup_s32_x (p0, 253)) ++ ++/* ++** dup_254_s32_x: ++** mov z0\.s, #254 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_254_s32_x, svint32_t, ++ z0 = svdup_n_s32_x (p0, 254), ++ z0 = svdup_s32_x (p0, 254)) ++ ++/* ++** dup_255_s32_x: ++** mov z0\.s, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_255_s32_x, svint32_t, ++ z0 = svdup_n_s32_x (p0, 255), ++ z0 = svdup_s32_x (p0, 255)) ++ ++/* ++** dup_256_s32_x: ++** mov z0\.s, #256 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_256_s32_x, svint32_t, ++ z0 = svdup_n_s32_x (p0, 256), ++ z0 = svdup_s32_x (p0, 256)) ++ ++/* ++** dup_257_s32_x: ++** mov (w[0-9]+), 257 ++** mov z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_257_s32_x, svint32_t, ++ z0 = svdup_n_s32_x (p0, 257), ++ z0 = svdup_s32_x (p0, 257)) ++ ++/* ++** dup_512_s32_x: ++** mov z0\.s, #512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_512_s32_x, svint32_t, ++ z0 = svdup_n_s32_x (p0, 512), ++ z0 = svdup_s32_x (p0, 512)) ++ ++/* ++** dup_7f00_s32_x: ++** mov z0\.s, #32512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7f00_s32_x, svint32_t, ++ z0 = svdup_n_s32_x (p0, 0x7f00), ++ z0 = svdup_s32_x (p0, 0x7f00)) ++ ++/* ++** dup_7f01_s32_x: ++** mov (w[0-9]+), 32513 ++** mov z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7f01_s32_x, svint32_t, ++ z0 = svdup_n_s32_x (p0, 0x7f01), ++ z0 = svdup_s32_x (p0, 0x7f01)) ++ ++/* ++** dup_7ffd_s32_x: ++** mov (w[0-9]+), 32765 ++** mov z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7ffd_s32_x, svint32_t, ++ z0 = svdup_n_s32_x (p0, 0x7ffd), ++ z0 = svdup_s32_x (p0, 0x7ffd)) ++ ++/* ++** dup_7ffe_s32_x: ++** mov z0\.s, #32766 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7ffe_s32_x, svint32_t, ++ z0 = svdup_n_s32_x (p0, 0x7ffe), ++ z0 = svdup_s32_x (p0, 0x7ffe)) ++ ++/* ++** dup_7fff_s32_x: ++** mov z0\.s, #32767 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7fff_s32_x, svint32_t, ++ z0 = svdup_n_s32_x (p0, 0x7fff), ++ z0 = svdup_s32_x (p0, 0x7fff)) ++ ++/* ++** dup_m1_s32_x: ++** mov z0\.b, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m1_s32_x, svint32_t, ++ z0 = svdup_n_s32_x (p0, -1), ++ z0 = svdup_s32_x (p0, -1)) ++ ++/* ++** dup_m128_s32_x: ++** mov z0\.s, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m128_s32_x, svint32_t, ++ z0 = svdup_n_s32_x (p0, -128), ++ z0 = svdup_s32_x (p0, -128)) ++ ++/* ++** dup_m129_s32_x: ++** mov z0\.s, #-129 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m129_s32_x, svint32_t, ++ z0 = svdup_n_s32_x (p0, -129), ++ z0 = svdup_s32_x (p0, -129)) ++ ++/* ++** dup_m130_s32_x: ++** mvni v([0-9]+)\.4s, 0x81 ++** dup z0\.q, z\1\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m130_s32_x, svint32_t, ++ z0 = svdup_n_s32_x (p0, -130), ++ z0 = svdup_s32_x (p0, -130)) ++ ++/* ++** dup_m254_s32_x: ++** mvni v([0-9]+)\.4s, 0xfd ++** dup z0\.q, z\1\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m254_s32_x, svint32_t, ++ z0 = svdup_n_s32_x (p0, -254), ++ z0 = svdup_s32_x (p0, -254)) ++ ++/* ++** dup_m255_s32_x: ++** mov z0\.s, #-255 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m255_s32_x, svint32_t, ++ z0 = svdup_n_s32_x (p0, -255), ++ z0 = svdup_s32_x (p0, -255)) ++ ++/* ++** dup_m256_s32_x: ++** mov z0\.s, #-256 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m256_s32_x, svint32_t, ++ z0 = svdup_n_s32_x (p0, -256), ++ z0 = svdup_s32_x (p0, -256)) ++ ++/* ++** dup_m257_s32_x: ++** mov z0\.s, #-257 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m257_s32_x, svint32_t, ++ z0 = svdup_n_s32_x (p0, -257), ++ z0 = svdup_s32_x (p0, -257)) ++ ++/* ++** dup_m258_s32_x: ++** mov (w[0-9]+), -258 ++** mov z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m258_s32_x, svint32_t, ++ z0 = svdup_n_s32_x (p0, -258), ++ z0 = svdup_s32_x (p0, -258)) ++ ++/* ++** dup_m259_s32_x: ++** mov (w[0-9]+), -259 ++** mov z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m259_s32_x, svint32_t, ++ z0 = svdup_n_s32_x (p0, -259), ++ z0 = svdup_s32_x (p0, -259)) ++ ++/* ++** dup_m512_s32_x: ++** mov z0\.s, #-512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m512_s32_x, svint32_t, ++ z0 = svdup_n_s32_x (p0, -512), ++ z0 = svdup_s32_x (p0, -512)) ++ ++/* ++** dup_m7f00_s32_x: ++** mov z0\.s, #-32512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f00_s32_x, svint32_t, ++ z0 = svdup_n_s32_x (p0, -0x7f00), ++ z0 = svdup_s32_x (p0, -0x7f00)) ++ ++/* ++** dup_m7f01_s32_x: ++** mov z0\.s, #-32513 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f01_s32_x, svint32_t, ++ z0 = svdup_n_s32_x (p0, -0x7f01), ++ z0 = svdup_s32_x (p0, -0x7f01)) ++ ++/* ++** dup_m7f02_s32_x: ++** mov (w[0-9]+), -32514 ++** mov z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f02_s32_x, svint32_t, ++ z0 = svdup_n_s32_x (p0, -0x7f02), ++ z0 = svdup_s32_x (p0, -0x7f02)) ++ ++/* ++** dup_m7ffe_s32_x: ++** mov (w[0-9]+), -32766 ++** mov z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7ffe_s32_x, svint32_t, ++ z0 = svdup_n_s32_x (p0, -0x7ffe), ++ z0 = svdup_s32_x (p0, -0x7ffe)) ++ ++/* ++** dup_m7fff_s32_x: ++** mov z0\.s, #-32767 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7fff_s32_x, svint32_t, ++ z0 = svdup_n_s32_x (p0, -0x7fff), ++ z0 = svdup_s32_x (p0, -0x7fff)) ++ ++/* ++** dup_m8000_s32_x: ++** mov z0\.s, #-32768 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m8000_s32_x, svint32_t, ++ z0 = svdup_n_s32_x (p0, -0x8000), ++ z0 = svdup_s32_x (p0, -0x8000)) ++ ++/* ++** dup_w0_s32_x: ++** mov z0\.s, w0 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_w0_s32_x, svint32_t, int32_t, ++ z0 = svdup_n_s32_x (p0, x0), ++ z0 = svdup_s32_x (p0, x0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s64.c +new file mode 100644 +index 000000000..6259b7fb5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s64.c +@@ -0,0 +1,1175 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dup_1_s64: ++** mov z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_1_s64, svint64_t, ++ z0 = svdup_n_s64 (1), ++ z0 = svdup_s64 (1)) ++ ++/* ++** dup_127_s64: ++** mov z0\.d, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_127_s64, svint64_t, ++ z0 = svdup_n_s64 (127), ++ z0 = svdup_s64 (127)) ++ ++/* ++** dup_128_s64: ++** mov z0\.d, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_128_s64, svint64_t, ++ z0 = svdup_n_s64 (128), ++ z0 = svdup_s64 (128)) ++ ++/* ++** dup_129_s64: ++** mov (x[0-9]+), 129 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_129_s64, svint64_t, ++ z0 = svdup_n_s64 (129), ++ z0 = svdup_s64 (129)) ++ ++/* ++** dup_253_s64: ++** mov (x[0-9]+), 253 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_253_s64, svint64_t, ++ z0 = svdup_n_s64 (253), ++ z0 = svdup_s64 (253)) ++ ++/* ++** dup_254_s64: ++** mov z0\.d, #254 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_254_s64, svint64_t, ++ z0 = svdup_n_s64 (254), ++ z0 = svdup_s64 (254)) ++ ++/* ++** dup_255_s64: ++** mov z0\.d, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_255_s64, svint64_t, ++ z0 = svdup_n_s64 (255), ++ z0 = svdup_s64 (255)) ++ ++/* ++** dup_256_s64: ++** mov z0\.d, #256 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_256_s64, svint64_t, ++ z0 = svdup_n_s64 (256), ++ z0 = svdup_s64 (256)) ++ ++/* ++** dup_257_s64: ++** mov (x[0-9]+), 257 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_257_s64, svint64_t, ++ z0 = svdup_n_s64 (257), ++ z0 = svdup_s64 (257)) ++ ++/* ++** dup_512_s64: ++** mov z0\.d, #512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_512_s64, svint64_t, ++ z0 = svdup_n_s64 (512), ++ z0 = svdup_s64 (512)) ++ ++/* ++** dup_7f00_s64: ++** mov z0\.d, #32512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7f00_s64, svint64_t, ++ z0 = svdup_n_s64 (0x7f00), ++ z0 = svdup_s64 (0x7f00)) ++ ++/* ++** dup_7f01_s64: ++** mov (x[0-9]+), 32513 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7f01_s64, svint64_t, ++ z0 = svdup_n_s64 (0x7f01), ++ z0 = svdup_s64 (0x7f01)) ++ ++/* ++** dup_7ffd_s64: ++** mov (x[0-9]+), 32765 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7ffd_s64, svint64_t, ++ z0 = svdup_n_s64 (0x7ffd), ++ z0 = svdup_s64 (0x7ffd)) ++ ++/* ++** dup_7ffe_s64: ++** mov z0\.d, #32766 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7ffe_s64, svint64_t, ++ z0 = svdup_n_s64 (0x7ffe), ++ z0 = svdup_s64 (0x7ffe)) ++ ++/* ++** dup_7fff_s64: ++** mov z0\.d, #32767 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7fff_s64, svint64_t, ++ z0 = svdup_n_s64 (0x7fff), ++ z0 = svdup_s64 (0x7fff)) ++ ++/* ++** dup_m1_s64: ++** mov z0\.b, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m1_s64, svint64_t, ++ z0 = svdup_n_s64 (-1), ++ z0 = svdup_s64 (-1)) ++ ++/* ++** dup_m128_s64: ++** mov z0\.d, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m128_s64, svint64_t, ++ z0 = svdup_n_s64 (-128), ++ z0 = svdup_s64 (-128)) ++ ++/* ++** dup_m129_s64: ++** mov z0\.d, #-129 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m129_s64, svint64_t, ++ z0 = svdup_n_s64 (-129), ++ z0 = svdup_s64 (-129)) ++ ++/* ++** dup_m130_s64: ++** mov (x[0-9]+), -130 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m130_s64, svint64_t, ++ z0 = svdup_n_s64 (-130), ++ z0 = svdup_s64 (-130)) ++ ++/* ++** dup_m254_s64: ++** mov (x[0-9]+), -254 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m254_s64, svint64_t, ++ z0 = svdup_n_s64 (-254), ++ z0 = svdup_s64 (-254)) ++ ++/* ++** dup_m255_s64: ++** mov z0\.d, #-255 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m255_s64, svint64_t, ++ z0 = svdup_n_s64 (-255), ++ z0 = svdup_s64 (-255)) ++ ++/* ++** dup_m256_s64: ++** mov z0\.d, #-256 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m256_s64, svint64_t, ++ z0 = svdup_n_s64 (-256), ++ z0 = svdup_s64 (-256)) ++ ++/* ++** dup_m257_s64: ++** mov z0\.d, #-257 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m257_s64, svint64_t, ++ z0 = svdup_n_s64 (-257), ++ z0 = svdup_s64 (-257)) ++ ++/* ++** dup_m258_s64: ++** mov (x[0-9]+), -258 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m258_s64, svint64_t, ++ z0 = svdup_n_s64 (-258), ++ z0 = svdup_s64 (-258)) ++ ++/* ++** dup_m259_s64: ++** mov (x[0-9]+), -259 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m259_s64, svint64_t, ++ z0 = svdup_n_s64 (-259), ++ z0 = svdup_s64 (-259)) ++ ++/* ++** dup_m512_s64: ++** mov z0\.d, #-512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m512_s64, svint64_t, ++ z0 = svdup_n_s64 (-512), ++ z0 = svdup_s64 (-512)) ++ ++/* ++** dup_m7f00_s64: ++** mov z0\.d, #-32512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f00_s64, svint64_t, ++ z0 = svdup_n_s64 (-0x7f00), ++ z0 = svdup_s64 (-0x7f00)) ++ ++/* ++** dup_m7f01_s64: ++** mov z0\.d, #-32513 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f01_s64, svint64_t, ++ z0 = svdup_n_s64 (-0x7f01), ++ z0 = svdup_s64 (-0x7f01)) ++ ++/* ++** dup_m7f02_s64: ++** mov (x[0-9]+), -32514 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f02_s64, svint64_t, ++ z0 = svdup_n_s64 (-0x7f02), ++ z0 = svdup_s64 (-0x7f02)) ++ ++/* ++** dup_m7ffe_s64: ++** mov (x[0-9]+), -32766 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7ffe_s64, svint64_t, ++ z0 = svdup_n_s64 (-0x7ffe), ++ z0 = svdup_s64 (-0x7ffe)) ++ ++/* ++** dup_m7fff_s64: ++** mov z0\.d, #-32767 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7fff_s64, svint64_t, ++ z0 = svdup_n_s64 (-0x7fff), ++ z0 = svdup_s64 (-0x7fff)) ++ ++/* ++** dup_m8000_s64: ++** mov z0\.d, #-32768 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m8000_s64, svint64_t, ++ z0 = svdup_n_s64 (-0x8000), ++ z0 = svdup_s64 (-0x8000)) ++ ++/* ++** dup_x0_s64: ++** mov z0\.d, x0 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_x0_s64, svint64_t, int64_t, ++ z0 = svdup_n_s64 (x0), ++ z0 = svdup_s64 (x0)) ++ ++/* ++** dup_1_s64_m: ++** mov z0\.d, p0/m, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_1_s64_m, svint64_t, ++ z0 = svdup_n_s64_m (z0, p0, 1), ++ z0 = svdup_s64_m (z0, p0, 1)) ++ ++/* ++** dup_127_s64_m: ++** mov z0\.d, p0/m, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_127_s64_m, svint64_t, ++ z0 = svdup_n_s64_m (z0, p0, 127), ++ z0 = svdup_s64_m (z0, p0, 127)) ++ ++/* ++** dup_128_s64_m: ++** mov (z[0-9]+\.d), #128 ++** sel z0\.d, p0, \1, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (dup_128_s64_m, svint64_t, ++ z0 = svdup_n_s64_m (z0, p0, 128), ++ z0 = svdup_s64_m (z0, p0, 128)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_129_s64_m, svint64_t, ++ z0 = svdup_n_s64_m (z0, p0, 129), ++ z0 = svdup_s64_m (z0, p0, 129)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_253_s64_m, svint64_t, ++ z0 = svdup_n_s64_m (z0, p0, 253), ++ z0 = svdup_s64_m (z0, p0, 253)) ++ ++/* ++** dup_254_s64_m: ++** mov (z[0-9]+\.d), #254 ++** sel z0\.d, p0, \1, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (dup_254_s64_m, svint64_t, ++ z0 = svdup_n_s64_m (z0, p0, 254), ++ z0 = svdup_s64_m (z0, p0, 254)) ++ ++/* ++** dup_255_s64_m: ++** mov (z[0-9]+\.d), #255 ++** sel z0\.d, p0, \1, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (dup_255_s64_m, svint64_t, ++ z0 = svdup_n_s64_m (z0, p0, 255), ++ z0 = svdup_s64_m (z0, p0, 255)) ++ ++/* ++** dup_256_s64_m: ++** mov z0\.d, p0/m, #256 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_256_s64_m, svint64_t, ++ z0 = svdup_n_s64_m (z0, p0, 256), ++ z0 = svdup_s64_m (z0, p0, 256)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_257_s64_m, svint64_t, ++ z0 = svdup_n_s64_m (z0, p0, 257), ++ z0 = svdup_s64_m (z0, p0, 257)) ++ ++/* ++** dup_512_s64_m: ++** mov z0\.d, p0/m, #512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_512_s64_m, svint64_t, ++ z0 = svdup_n_s64_m (z0, p0, 512), ++ z0 = svdup_s64_m (z0, p0, 512)) ++ ++/* ++** dup_7f00_s64_m: ++** mov z0\.d, p0/m, #32512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7f00_s64_m, svint64_t, ++ z0 = svdup_n_s64_m (z0, p0, 0x7f00), ++ z0 = svdup_s64_m (z0, p0, 0x7f00)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_7f01_s64_m, svint64_t, ++ z0 = svdup_n_s64_m (z0, p0, 0x7f01), ++ z0 = svdup_s64_m (z0, p0, 0x7f01)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_7ffd_s64_m, svint64_t, ++ z0 = svdup_n_s64_m (z0, p0, 0x7ffd), ++ z0 = svdup_s64_m (z0, p0, 0x7ffd)) ++ ++/* ++** dup_7ffe_s64_m: ++** mov (z[0-9]+\.d), #32766 ++** sel z0\.d, p0, \1, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7ffe_s64_m, svint64_t, ++ z0 = svdup_n_s64_m (z0, p0, 0x7ffe), ++ z0 = svdup_s64_m (z0, p0, 0x7ffe)) ++ ++/* ++** dup_7fff_s64_m: ++** mov (z[0-9]+\.d), #32767 ++** sel z0\.d, p0, \1, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7fff_s64_m, svint64_t, ++ z0 = svdup_n_s64_m (z0, p0, 0x7fff), ++ z0 = svdup_s64_m (z0, p0, 0x7fff)) ++ ++/* ++** dup_m1_s64_m: ++** mov z0\.d, p0/m, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m1_s64_m, svint64_t, ++ z0 = svdup_n_s64_m (z0, p0, -1), ++ z0 = svdup_s64_m (z0, p0, -1)) ++ ++/* ++** dup_m128_s64_m: ++** mov z0\.d, p0/m, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m128_s64_m, svint64_t, ++ z0 = svdup_n_s64_m (z0, p0, -128), ++ z0 = svdup_s64_m (z0, p0, -128)) ++ ++/* ++** dup_m129_s64_m: ++** mov (z[0-9]+\.d), #-129 ++** sel z0\.d, p0, \1, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m129_s64_m, svint64_t, ++ z0 = svdup_n_s64_m (z0, p0, -129), ++ z0 = svdup_s64_m (z0, p0, -129)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m130_s64_m, svint64_t, ++ z0 = svdup_n_s64_m (z0, p0, -130), ++ z0 = svdup_s64_m (z0, p0, -130)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m254_s64_m, svint64_t, ++ z0 = svdup_n_s64_m (z0, p0, -254), ++ z0 = svdup_s64_m (z0, p0, -254)) ++ ++/* ++** dup_m255_s64_m: ++** mov (z[0-9]+\.d), #-255 ++** sel z0\.d, p0, \1, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m255_s64_m, svint64_t, ++ z0 = svdup_n_s64_m (z0, p0, -255), ++ z0 = svdup_s64_m (z0, p0, -255)) ++ ++/* ++** dup_m256_s64_m: ++** mov z0\.d, p0/m, #-256 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m256_s64_m, svint64_t, ++ z0 = svdup_n_s64_m (z0, p0, -256), ++ z0 = svdup_s64_m (z0, p0, -256)) ++ ++/* ++** dup_m257_s64_m: ++** mov (z[0-9]+\.d), #-257 ++** sel z0\.d, p0, \1, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m257_s64_m, svint64_t, ++ z0 = svdup_n_s64_m (z0, p0, -257), ++ z0 = svdup_s64_m (z0, p0, -257)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m258_s64_m, svint64_t, ++ z0 = svdup_n_s64_m (z0, p0, -258), ++ z0 = svdup_s64_m (z0, p0, -258)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m259_s64_m, svint64_t, ++ z0 = svdup_n_s64_m (z0, p0, -259), ++ z0 = svdup_s64_m (z0, p0, -259)) ++ ++/* ++** dup_m512_s64_m: ++** mov z0\.d, p0/m, #-512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m512_s64_m, svint64_t, ++ z0 = svdup_n_s64_m (z0, p0, -512), ++ z0 = svdup_s64_m (z0, p0, -512)) ++ ++/* ++** dup_m7f00_s64_m: ++** mov z0\.d, p0/m, #-32512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f00_s64_m, svint64_t, ++ z0 = svdup_n_s64_m (z0, p0, -0x7f00), ++ z0 = svdup_s64_m (z0, p0, -0x7f00)) ++ ++/* ++** dup_m7f01_s64_m: ++** mov (z[0-9]+\.d), #-32513 ++** sel z0\.d, p0, \1, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f01_s64_m, svint64_t, ++ z0 = svdup_n_s64_m (z0, p0, -0x7f01), ++ z0 = svdup_s64_m (z0, p0, -0x7f01)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m7f02_s64_m, svint64_t, ++ z0 = svdup_n_s64_m (z0, p0, -0x7f02), ++ z0 = svdup_s64_m (z0, p0, -0x7f02)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m7ffe_s64_m, svint64_t, ++ z0 = svdup_n_s64_m (z0, p0, -0x7ffe), ++ z0 = svdup_s64_m (z0, p0, -0x7ffe)) ++ ++/* ++** dup_m7fff_s64_m: ++** mov (z[0-9]+\.d), #-32767 ++** sel z0\.d, p0, \1, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7fff_s64_m, svint64_t, ++ z0 = svdup_n_s64_m (z0, p0, -0x7fff), ++ z0 = svdup_s64_m (z0, p0, -0x7fff)) ++ ++/* ++** dup_m8000_s64_m: ++** mov z0\.d, p0/m, #-32768 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m8000_s64_m, svint64_t, ++ z0 = svdup_n_s64_m (z0, p0, -0x8000), ++ z0 = svdup_s64_m (z0, p0, -0x8000)) ++ ++/* ++** dup_0_s64_m: ++** mov z0\.d, p0/m, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_0_s64_m, svint64_t, ++ z0 = svdup_n_s64_m (z0, p0, 0), ++ z0 = svdup_s64_m (z0, p0, 0)) ++ ++/* ++** dup_x0_s64_m: ++** movprfx z0, z1 ++** mov z0\.d, p0/m, x0 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_x0_s64_m, svint64_t, int64_t, ++ z0 = svdup_n_s64_m (z1, p0, x0), ++ z0 = svdup_s64_m (z1, p0, x0)) ++ ++/* ++** dup_1_s64_z: ++** mov z0\.d, p0/z, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_1_s64_z, svint64_t, ++ z0 = svdup_n_s64_z (p0, 1), ++ z0 = svdup_s64_z (p0, 1)) ++ ++/* ++** dup_127_s64_z: ++** mov z0\.d, p0/z, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_127_s64_z, svint64_t, ++ z0 = svdup_n_s64_z (p0, 127), ++ z0 = svdup_s64_z (p0, 127)) ++ ++/* ++** dup_128_s64_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.d), #128 ++** sel z0\.d, p0, \2, \1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (dup_128_s64_z, svint64_t, ++ z0 = svdup_n_s64_z (p0, 128), ++ z0 = svdup_s64_z (p0, 128)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_129_s64_z, svint64_t, ++ z0 = svdup_n_s64_z (p0, 129), ++ z0 = svdup_s64_z (p0, 129)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_253_s64_z, svint64_t, ++ z0 = svdup_n_s64_z (p0, 253), ++ z0 = svdup_s64_z (p0, 253)) ++ ++/* ++** dup_254_s64_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.d), #254 ++** sel z0\.d, p0, \2, \1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (dup_254_s64_z, svint64_t, ++ z0 = svdup_n_s64_z (p0, 254), ++ z0 = svdup_s64_z (p0, 254)) ++ ++/* ++** dup_255_s64_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.d), #255 ++** sel z0\.d, p0, \2, \1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (dup_255_s64_z, svint64_t, ++ z0 = svdup_n_s64_z (p0, 255), ++ z0 = svdup_s64_z (p0, 255)) ++ ++/* ++** dup_256_s64_z: ++** mov z0\.d, p0/z, #256 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_256_s64_z, svint64_t, ++ z0 = svdup_n_s64_z (p0, 256), ++ z0 = svdup_s64_z (p0, 256)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_257_s64_z, svint64_t, ++ z0 = svdup_n_s64_z (p0, 257), ++ z0 = svdup_s64_z (p0, 257)) ++ ++/* ++** dup_512_s64_z: ++** mov z0\.d, p0/z, #512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_512_s64_z, svint64_t, ++ z0 = svdup_n_s64_z (p0, 512), ++ z0 = svdup_s64_z (p0, 512)) ++ ++/* ++** dup_7f00_s64_z: ++** mov z0\.d, p0/z, #32512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7f00_s64_z, svint64_t, ++ z0 = svdup_n_s64_z (p0, 0x7f00), ++ z0 = svdup_s64_z (p0, 0x7f00)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_7f01_s64_z, svint64_t, ++ z0 = svdup_n_s64_z (p0, 0x7f01), ++ z0 = svdup_s64_z (p0, 0x7f01)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_7ffd_s64_z, svint64_t, ++ z0 = svdup_n_s64_z (p0, 0x7ffd), ++ z0 = svdup_s64_z (p0, 0x7ffd)) ++ ++/* ++** dup_7ffe_s64_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.d), #32766 ++** sel z0\.d, p0, \2, \1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7ffe_s64_z, svint64_t, ++ z0 = svdup_n_s64_z (p0, 0x7ffe), ++ z0 = svdup_s64_z (p0, 0x7ffe)) ++ ++/* ++** dup_7fff_s64_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.d), #32767 ++** sel z0\.d, p0, \2, \1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7fff_s64_z, svint64_t, ++ z0 = svdup_n_s64_z (p0, 0x7fff), ++ z0 = svdup_s64_z (p0, 0x7fff)) ++ ++/* ++** dup_m1_s64_z: ++** mov z0\.d, p0/z, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m1_s64_z, svint64_t, ++ z0 = svdup_n_s64_z (p0, -1), ++ z0 = svdup_s64_z (p0, -1)) ++ ++/* ++** dup_m128_s64_z: ++** mov z0\.d, p0/z, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m128_s64_z, svint64_t, ++ z0 = svdup_n_s64_z (p0, -128), ++ z0 = svdup_s64_z (p0, -128)) ++ ++/* ++** dup_m129_s64_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.d), #-129 ++** sel z0\.d, p0, \2, \1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m129_s64_z, svint64_t, ++ z0 = svdup_n_s64_z (p0, -129), ++ z0 = svdup_s64_z (p0, -129)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m130_s64_z, svint64_t, ++ z0 = svdup_n_s64_z (p0, -130), ++ z0 = svdup_s64_z (p0, -130)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m254_s64_z, svint64_t, ++ z0 = svdup_n_s64_z (p0, -254), ++ z0 = svdup_s64_z (p0, -254)) ++ ++/* ++** dup_m255_s64_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.d), #-255 ++** sel z0\.d, p0, \2, \1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m255_s64_z, svint64_t, ++ z0 = svdup_n_s64_z (p0, -255), ++ z0 = svdup_s64_z (p0, -255)) ++ ++/* ++** dup_m256_s64_z: ++** mov z0\.d, p0/z, #-256 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m256_s64_z, svint64_t, ++ z0 = svdup_n_s64_z (p0, -256), ++ z0 = svdup_s64_z (p0, -256)) ++ ++/* ++** dup_m257_s64_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.d), #-257 ++** sel z0\.d, p0, \2, \1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m257_s64_z, svint64_t, ++ z0 = svdup_n_s64_z (p0, -257), ++ z0 = svdup_s64_z (p0, -257)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m258_s64_z, svint64_t, ++ z0 = svdup_n_s64_z (p0, -258), ++ z0 = svdup_s64_z (p0, -258)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m259_s64_z, svint64_t, ++ z0 = svdup_n_s64_z (p0, -259), ++ z0 = svdup_s64_z (p0, -259)) ++ ++/* ++** dup_m512_s64_z: ++** mov z0\.d, p0/z, #-512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m512_s64_z, svint64_t, ++ z0 = svdup_n_s64_z (p0, -512), ++ z0 = svdup_s64_z (p0, -512)) ++ ++/* ++** dup_m7f00_s64_z: ++** mov z0\.d, p0/z, #-32512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f00_s64_z, svint64_t, ++ z0 = svdup_n_s64_z (p0, -0x7f00), ++ z0 = svdup_s64_z (p0, -0x7f00)) ++ ++/* ++** dup_m7f01_s64_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.d), #-32513 ++** sel z0\.d, p0, \2, \1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f01_s64_z, svint64_t, ++ z0 = svdup_n_s64_z (p0, -0x7f01), ++ z0 = svdup_s64_z (p0, -0x7f01)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m7f02_s64_z, svint64_t, ++ z0 = svdup_n_s64_z (p0, -0x7f02), ++ z0 = svdup_s64_z (p0, -0x7f02)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m7ffe_s64_z, svint64_t, ++ z0 = svdup_n_s64_z (p0, -0x7ffe), ++ z0 = svdup_s64_z (p0, -0x7ffe)) ++ ++/* ++** dup_m7fff_s64_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.d), #-32767 ++** sel z0\.d, p0, \2, \1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7fff_s64_z, svint64_t, ++ z0 = svdup_n_s64_z (p0, -0x7fff), ++ z0 = svdup_s64_z (p0, -0x7fff)) ++ ++/* ++** dup_m8000_s64_z: ++** mov z0\.d, p0/z, #-32768 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m8000_s64_z, svint64_t, ++ z0 = svdup_n_s64_z (p0, -0x8000), ++ z0 = svdup_s64_z (p0, -0x8000)) ++ ++/* ++** dup_0_s64_z: ++** mov z0\.d, p0/z, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_0_s64_z, svint64_t, ++ z0 = svdup_n_s64_z (p0, 0), ++ z0 = svdup_s64_z (p0, 0)) ++ ++/* ++** dup_x0_s64_z: ++** movprfx z0\.d, p0/z, z0\.d ++** mov z0\.d, p0/m, x0 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_x0_s64_z, svint64_t, int64_t, ++ z0 = svdup_n_s64_z (p0, x0), ++ z0 = svdup_s64_z (p0, x0)) ++ ++/* ++** dup_1_s64_x: ++** mov z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_1_s64_x, svint64_t, ++ z0 = svdup_n_s64_x (p0, 1), ++ z0 = svdup_s64_x (p0, 1)) ++ ++/* ++** dup_127_s64_x: ++** mov z0\.d, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_127_s64_x, svint64_t, ++ z0 = svdup_n_s64_x (p0, 127), ++ z0 = svdup_s64_x (p0, 127)) ++ ++/* ++** dup_128_s64_x: ++** mov z0\.d, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_128_s64_x, svint64_t, ++ z0 = svdup_n_s64_x (p0, 128), ++ z0 = svdup_s64_x (p0, 128)) ++ ++/* ++** dup_129_s64_x: ++** mov (x[0-9]+), 129 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_129_s64_x, svint64_t, ++ z0 = svdup_n_s64_x (p0, 129), ++ z0 = svdup_s64_x (p0, 129)) ++ ++/* ++** dup_253_s64_x: ++** mov (x[0-9]+), 253 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_253_s64_x, svint64_t, ++ z0 = svdup_n_s64_x (p0, 253), ++ z0 = svdup_s64_x (p0, 253)) ++ ++/* ++** dup_254_s64_x: ++** mov z0\.d, #254 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_254_s64_x, svint64_t, ++ z0 = svdup_n_s64_x (p0, 254), ++ z0 = svdup_s64_x (p0, 254)) ++ ++/* ++** dup_255_s64_x: ++** mov z0\.d, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_255_s64_x, svint64_t, ++ z0 = svdup_n_s64_x (p0, 255), ++ z0 = svdup_s64_x (p0, 255)) ++ ++/* ++** dup_256_s64_x: ++** mov z0\.d, #256 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_256_s64_x, svint64_t, ++ z0 = svdup_n_s64_x (p0, 256), ++ z0 = svdup_s64_x (p0, 256)) ++ ++/* ++** dup_257_s64_x: ++** mov (x[0-9]+), 257 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_257_s64_x, svint64_t, ++ z0 = svdup_n_s64_x (p0, 257), ++ z0 = svdup_s64_x (p0, 257)) ++ ++/* ++** dup_512_s64_x: ++** mov z0\.d, #512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_512_s64_x, svint64_t, ++ z0 = svdup_n_s64_x (p0, 512), ++ z0 = svdup_s64_x (p0, 512)) ++ ++/* ++** dup_7f00_s64_x: ++** mov z0\.d, #32512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7f00_s64_x, svint64_t, ++ z0 = svdup_n_s64_x (p0, 0x7f00), ++ z0 = svdup_s64_x (p0, 0x7f00)) ++ ++/* ++** dup_7f01_s64_x: ++** mov (x[0-9]+), 32513 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7f01_s64_x, svint64_t, ++ z0 = svdup_n_s64_x (p0, 0x7f01), ++ z0 = svdup_s64_x (p0, 0x7f01)) ++ ++/* ++** dup_7ffd_s64_x: ++** mov (x[0-9]+), 32765 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7ffd_s64_x, svint64_t, ++ z0 = svdup_n_s64_x (p0, 0x7ffd), ++ z0 = svdup_s64_x (p0, 0x7ffd)) ++ ++/* ++** dup_7ffe_s64_x: ++** mov z0\.d, #32766 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7ffe_s64_x, svint64_t, ++ z0 = svdup_n_s64_x (p0, 0x7ffe), ++ z0 = svdup_s64_x (p0, 0x7ffe)) ++ ++/* ++** dup_7fff_s64_x: ++** mov z0\.d, #32767 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7fff_s64_x, svint64_t, ++ z0 = svdup_n_s64_x (p0, 0x7fff), ++ z0 = svdup_s64_x (p0, 0x7fff)) ++ ++/* ++** dup_m1_s64_x: ++** mov z0\.b, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m1_s64_x, svint64_t, ++ z0 = svdup_n_s64_x (p0, -1), ++ z0 = svdup_s64_x (p0, -1)) ++ ++/* ++** dup_m128_s64_x: ++** mov z0\.d, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m128_s64_x, svint64_t, ++ z0 = svdup_n_s64_x (p0, -128), ++ z0 = svdup_s64_x (p0, -128)) ++ ++/* ++** dup_m129_s64_x: ++** mov z0\.d, #-129 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m129_s64_x, svint64_t, ++ z0 = svdup_n_s64_x (p0, -129), ++ z0 = svdup_s64_x (p0, -129)) ++ ++/* ++** dup_m130_s64_x: ++** mov (x[0-9]+), -130 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m130_s64_x, svint64_t, ++ z0 = svdup_n_s64_x (p0, -130), ++ z0 = svdup_s64_x (p0, -130)) ++ ++/* ++** dup_m254_s64_x: ++** mov (x[0-9]+), -254 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m254_s64_x, svint64_t, ++ z0 = svdup_n_s64_x (p0, -254), ++ z0 = svdup_s64_x (p0, -254)) ++ ++/* ++** dup_m255_s64_x: ++** mov z0\.d, #-255 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m255_s64_x, svint64_t, ++ z0 = svdup_n_s64_x (p0, -255), ++ z0 = svdup_s64_x (p0, -255)) ++ ++/* ++** dup_m256_s64_x: ++** mov z0\.d, #-256 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m256_s64_x, svint64_t, ++ z0 = svdup_n_s64_x (p0, -256), ++ z0 = svdup_s64_x (p0, -256)) ++ ++/* ++** dup_m257_s64_x: ++** mov z0\.d, #-257 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m257_s64_x, svint64_t, ++ z0 = svdup_n_s64_x (p0, -257), ++ z0 = svdup_s64_x (p0, -257)) ++ ++/* ++** dup_m258_s64_x: ++** mov (x[0-9]+), -258 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m258_s64_x, svint64_t, ++ z0 = svdup_n_s64_x (p0, -258), ++ z0 = svdup_s64_x (p0, -258)) ++ ++/* ++** dup_m259_s64_x: ++** mov (x[0-9]+), -259 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m259_s64_x, svint64_t, ++ z0 = svdup_n_s64_x (p0, -259), ++ z0 = svdup_s64_x (p0, -259)) ++ ++/* ++** dup_m512_s64_x: ++** mov z0\.d, #-512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m512_s64_x, svint64_t, ++ z0 = svdup_n_s64_x (p0, -512), ++ z0 = svdup_s64_x (p0, -512)) ++ ++/* ++** dup_m7f00_s64_x: ++** mov z0\.d, #-32512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f00_s64_x, svint64_t, ++ z0 = svdup_n_s64_x (p0, -0x7f00), ++ z0 = svdup_s64_x (p0, -0x7f00)) ++ ++/* ++** dup_m7f01_s64_x: ++** mov z0\.d, #-32513 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f01_s64_x, svint64_t, ++ z0 = svdup_n_s64_x (p0, -0x7f01), ++ z0 = svdup_s64_x (p0, -0x7f01)) ++ ++/* ++** dup_m7f02_s64_x: ++** mov (x[0-9]+), -32514 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f02_s64_x, svint64_t, ++ z0 = svdup_n_s64_x (p0, -0x7f02), ++ z0 = svdup_s64_x (p0, -0x7f02)) ++ ++/* ++** dup_m7ffe_s64_x: ++** mov (x[0-9]+), -32766 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7ffe_s64_x, svint64_t, ++ z0 = svdup_n_s64_x (p0, -0x7ffe), ++ z0 = svdup_s64_x (p0, -0x7ffe)) ++ ++/* ++** dup_m7fff_s64_x: ++** mov z0\.d, #-32767 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7fff_s64_x, svint64_t, ++ z0 = svdup_n_s64_x (p0, -0x7fff), ++ z0 = svdup_s64_x (p0, -0x7fff)) ++ ++/* ++** dup_m8000_s64_x: ++** mov z0\.d, #-32768 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m8000_s64_x, svint64_t, ++ z0 = svdup_n_s64_x (p0, -0x8000), ++ z0 = svdup_s64_x (p0, -0x8000)) ++ ++/* ++** dup_x0_s64_x: ++** mov z0\.d, x0 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_x0_s64_x, svint64_t, int64_t, ++ z0 = svdup_n_s64_x (p0, x0), ++ z0 = svdup_s64_x (p0, x0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s8.c +new file mode 100644 +index 000000000..96fc5fa64 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s8.c +@@ -0,0 +1,383 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dup_1_s8: ++** mov z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_1_s8, svint8_t, ++ z0 = svdup_n_s8 (1), ++ z0 = svdup_s8 (1)) ++ ++/* ++** dup_127_s8: ++** mov z0\.b, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_127_s8, svint8_t, ++ z0 = svdup_n_s8 (127), ++ z0 = svdup_s8 (127)) ++ ++/* ++** dup_128_s8: ++** mov z0\.b, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_128_s8, svint8_t, ++ z0 = svdup_n_s8 (128), ++ z0 = svdup_s8 (128)) ++ ++/* ++** dup_129_s8: ++** mov z0\.b, #-127 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_129_s8, svint8_t, ++ z0 = svdup_n_s8 (129), ++ z0 = svdup_s8 (129)) ++ ++/* ++** dup_253_s8: ++** mov z0\.b, #-3 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_253_s8, svint8_t, ++ z0 = svdup_n_s8 (253), ++ z0 = svdup_s8 (253)) ++ ++/* ++** dup_254_s8: ++** mov z0\.b, #-2 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_254_s8, svint8_t, ++ z0 = svdup_n_s8 (254), ++ z0 = svdup_s8 (254)) ++ ++/* ++** dup_255_s8: ++** mov z0\.b, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_255_s8, svint8_t, ++ z0 = svdup_n_s8 (255), ++ z0 = svdup_s8 (255)) ++ ++/* ++** dup_m1_s8: ++** mov z0\.b, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m1_s8, svint8_t, ++ z0 = svdup_n_s8 (-1), ++ z0 = svdup_s8 (-1)) ++ ++/* ++** dup_m128_s8: ++** mov z0\.b, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m128_s8, svint8_t, ++ z0 = svdup_n_s8 (-128), ++ z0 = svdup_s8 (-128)) ++ ++/* ++** dup_w0_s8: ++** mov z0\.b, w0 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_w0_s8, svint8_t, int8_t, ++ z0 = svdup_n_s8 (x0), ++ z0 = svdup_s8 (x0)) ++ ++/* ++** dup_1_s8_m: ++** mov z0\.b, p0/m, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_1_s8_m, svint8_t, ++ z0 = svdup_n_s8_m (z0, p0, 1), ++ z0 = svdup_s8_m (z0, p0, 1)) ++ ++/* ++** dup_127_s8_m: ++** mov z0\.b, p0/m, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_127_s8_m, svint8_t, ++ z0 = svdup_n_s8_m (z0, p0, 127), ++ z0 = svdup_s8_m (z0, p0, 127)) ++ ++/* ++** dup_128_s8_m: ++** mov z0\.b, p0/m, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_128_s8_m, svint8_t, ++ z0 = svdup_n_s8_m (z0, p0, 128), ++ z0 = svdup_s8_m (z0, p0, 128)) ++ ++/* ++** dup_129_s8_m: ++** mov z0\.b, p0/m, #-127 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_129_s8_m, svint8_t, ++ z0 = svdup_n_s8_m (z0, p0, 129), ++ z0 = svdup_s8_m (z0, p0, 129)) ++ ++/* ++** dup_253_s8_m: ++** mov z0\.b, p0/m, #-3 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_253_s8_m, svint8_t, ++ z0 = svdup_n_s8_m (z0, p0, 253), ++ z0 = svdup_s8_m (z0, p0, 253)) ++ ++/* ++** dup_254_s8_m: ++** mov z0\.b, p0/m, #-2 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_254_s8_m, svint8_t, ++ z0 = svdup_n_s8_m (z0, p0, 254), ++ z0 = svdup_s8_m (z0, p0, 254)) ++ ++/* ++** dup_255_s8_m: ++** mov z0\.b, p0/m, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_255_s8_m, svint8_t, ++ z0 = svdup_n_s8_m (z0, p0, 255), ++ z0 = svdup_s8_m (z0, p0, 255)) ++ ++/* ++** dup_m1_s8_m: ++** mov z0\.b, p0/m, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m1_s8_m, svint8_t, ++ z0 = svdup_n_s8_m (z0, p0, -1), ++ z0 = svdup_s8_m (z0, p0, -1)) ++ ++/* ++** dup_m128_s8_m: ++** mov z0\.b, p0/m, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m128_s8_m, svint8_t, ++ z0 = svdup_n_s8_m (z0, p0, -128), ++ z0 = svdup_s8_m (z0, p0, -128)) ++ ++/* ++** dup_0_s8_m: ++** mov z0\.b, p0/m, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_0_s8_m, svint8_t, ++ z0 = svdup_n_s8_m (z0, p0, 0), ++ z0 = svdup_s8_m (z0, p0, 0)) ++ ++/* ++** dup_w0_s8_m: ++** movprfx z0, z1 ++** mov z0\.b, p0/m, w0 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_w0_s8_m, svint8_t, int8_t, ++ z0 = svdup_n_s8_m (z1, p0, x0), ++ z0 = svdup_s8_m (z1, p0, x0)) ++ ++/* ++** dup_1_s8_z: ++** mov z0\.b, p0/z, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_1_s8_z, svint8_t, ++ z0 = svdup_n_s8_z (p0, 1), ++ z0 = svdup_s8_z (p0, 1)) ++ ++/* ++** dup_127_s8_z: ++** mov z0\.b, p0/z, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_127_s8_z, svint8_t, ++ z0 = svdup_n_s8_z (p0, 127), ++ z0 = svdup_s8_z (p0, 127)) ++ ++/* ++** dup_128_s8_z: ++** mov z0\.b, p0/z, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_128_s8_z, svint8_t, ++ z0 = svdup_n_s8_z (p0, 128), ++ z0 = svdup_s8_z (p0, 128)) ++ ++/* ++** dup_129_s8_z: ++** mov z0\.b, p0/z, #-127 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_129_s8_z, svint8_t, ++ z0 = svdup_n_s8_z (p0, 129), ++ z0 = svdup_s8_z (p0, 129)) ++ ++/* ++** dup_253_s8_z: ++** mov z0\.b, p0/z, #-3 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_253_s8_z, svint8_t, ++ z0 = svdup_n_s8_z (p0, 253), ++ z0 = svdup_s8_z (p0, 253)) ++ ++/* ++** dup_254_s8_z: ++** mov z0\.b, p0/z, #-2 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_254_s8_z, svint8_t, ++ z0 = svdup_n_s8_z (p0, 254), ++ z0 = svdup_s8_z (p0, 254)) ++ ++/* ++** dup_255_s8_z: ++** mov z0\.b, p0/z, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_255_s8_z, svint8_t, ++ z0 = svdup_n_s8_z (p0, 255), ++ z0 = svdup_s8_z (p0, 255)) ++ ++/* ++** dup_m1_s8_z: ++** mov z0\.b, p0/z, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m1_s8_z, svint8_t, ++ z0 = svdup_n_s8_z (p0, -1), ++ z0 = svdup_s8_z (p0, -1)) ++ ++/* ++** dup_m128_s8_z: ++** mov z0\.b, p0/z, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m128_s8_z, svint8_t, ++ z0 = svdup_n_s8_z (p0, -128), ++ z0 = svdup_s8_z (p0, -128)) ++ ++/* ++** dup_0_s8_z: ++** mov z0\.b, p0/z, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_0_s8_z, svint8_t, ++ z0 = svdup_n_s8_z (p0, 0), ++ z0 = svdup_s8_z (p0, 0)) ++ ++/* ++** dup_w0_s8_z: ++** movprfx z0\.b, p0/z, z0\.b ++** mov z0\.b, p0/m, w0 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_w0_s8_z, svint8_t, int8_t, ++ z0 = svdup_n_s8_z (p0, x0), ++ z0 = svdup_s8_z (p0, x0)) ++ ++/* ++** dup_1_s8_x: ++** mov z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_1_s8_x, svint8_t, ++ z0 = svdup_n_s8_x (p0, 1), ++ z0 = svdup_s8_x (p0, 1)) ++ ++/* ++** dup_127_s8_x: ++** mov z0\.b, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_127_s8_x, svint8_t, ++ z0 = svdup_n_s8_x (p0, 127), ++ z0 = svdup_s8_x (p0, 127)) ++ ++/* ++** dup_128_s8_x: ++** mov z0\.b, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_128_s8_x, svint8_t, ++ z0 = svdup_n_s8_x (p0, 128), ++ z0 = svdup_s8_x (p0, 128)) ++ ++/* ++** dup_129_s8_x: ++** mov z0\.b, #-127 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_129_s8_x, svint8_t, ++ z0 = svdup_n_s8_x (p0, 129), ++ z0 = svdup_s8_x (p0, 129)) ++ ++/* ++** dup_253_s8_x: ++** mov z0\.b, #-3 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_253_s8_x, svint8_t, ++ z0 = svdup_n_s8_x (p0, 253), ++ z0 = svdup_s8_x (p0, 253)) ++ ++/* ++** dup_254_s8_x: ++** mov z0\.b, #-2 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_254_s8_x, svint8_t, ++ z0 = svdup_n_s8_x (p0, 254), ++ z0 = svdup_s8_x (p0, 254)) ++ ++/* ++** dup_255_s8_x: ++** mov z0\.b, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_255_s8_x, svint8_t, ++ z0 = svdup_n_s8_x (p0, 255), ++ z0 = svdup_s8_x (p0, 255)) ++ ++/* ++** dup_m1_s8_x: ++** mov z0\.b, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m1_s8_x, svint8_t, ++ z0 = svdup_n_s8_x (p0, -1), ++ z0 = svdup_s8_x (p0, -1)) ++ ++/* ++** dup_m128_s8_x: ++** mov z0\.b, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m128_s8_x, svint8_t, ++ z0 = svdup_n_s8_x (p0, -128), ++ z0 = svdup_s8_x (p0, -128)) ++ ++/* ++** dup_w0_s8_x: ++** mov z0\.b, w0 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_w0_s8_x, svint8_t, int8_t, ++ z0 = svdup_n_s8_x (p0, x0), ++ z0 = svdup_s8_x (p0, x0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u16.c +new file mode 100644 +index 000000000..263eafef0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u16.c +@@ -0,0 +1,1193 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dup_1_u16: ++** mov z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_1_u16, svuint16_t, ++ z0 = svdup_n_u16 (1), ++ z0 = svdup_u16 (1)) ++ ++/* ++** dup_127_u16: ++** mov z0\.h, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_127_u16, svuint16_t, ++ z0 = svdup_n_u16 (127), ++ z0 = svdup_u16 (127)) ++ ++/* ++** dup_128_u16: ++** mov z0\.h, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_128_u16, svuint16_t, ++ z0 = svdup_n_u16 (128), ++ z0 = svdup_u16 (128)) ++ ++/* ++** dup_129_u16: ++** movi v([0-9]+)\.8h, 0x81 ++** dup z0\.q, z\1\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_129_u16, svuint16_t, ++ z0 = svdup_n_u16 (129), ++ z0 = svdup_u16 (129)) ++ ++/* ++** dup_253_u16: ++** movi v([0-9]+)\.8h, 0xfd ++** dup z0\.q, z\1\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_253_u16, svuint16_t, ++ z0 = svdup_n_u16 (253), ++ z0 = svdup_u16 (253)) ++ ++/* ++** dup_254_u16: ++** mov z0\.h, #254 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_254_u16, svuint16_t, ++ z0 = svdup_n_u16 (254), ++ z0 = svdup_u16 (254)) ++ ++/* ++** dup_255_u16: ++** mov z0\.h, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_255_u16, svuint16_t, ++ z0 = svdup_n_u16 (255), ++ z0 = svdup_u16 (255)) ++ ++/* ++** dup_256_u16: ++** mov z0\.h, #256 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_256_u16, svuint16_t, ++ z0 = svdup_n_u16 (256), ++ z0 = svdup_u16 (256)) ++ ++/* ++** dup_257_u16: ++** mov z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_257_u16, svuint16_t, ++ z0 = svdup_n_u16 (257), ++ z0 = svdup_u16 (257)) ++ ++/* ++** dup_512_u16: ++** mov z0\.h, #512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_512_u16, svuint16_t, ++ z0 = svdup_n_u16 (512), ++ z0 = svdup_u16 (512)) ++ ++/* ++** dup_7f00_u16: ++** mov z0\.h, #32512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7f00_u16, svuint16_t, ++ z0 = svdup_n_u16 (0x7f00), ++ z0 = svdup_u16 (0x7f00)) ++ ++/* ++** dup_7f01_u16: ++** mov (w[0-9]+), 32513 ++** mov z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7f01_u16, svuint16_t, ++ z0 = svdup_n_u16 (0x7f01), ++ z0 = svdup_u16 (0x7f01)) ++ ++/* ++** dup_7ffd_u16: ++** mov (w[0-9]+), 32765 ++** mov z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7ffd_u16, svuint16_t, ++ z0 = svdup_n_u16 (0x7ffd), ++ z0 = svdup_u16 (0x7ffd)) ++ ++/* ++** dup_7ffe_u16: ++** mov z0\.h, #32766 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7ffe_u16, svuint16_t, ++ z0 = svdup_n_u16 (0x7ffe), ++ z0 = svdup_u16 (0x7ffe)) ++ ++/* ++** dup_7fff_u16: ++** mov z0\.h, #32767 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7fff_u16, svuint16_t, ++ z0 = svdup_n_u16 (0x7fff), ++ z0 = svdup_u16 (0x7fff)) ++ ++/* ++** dup_m1_u16: ++** mov z0\.b, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m1_u16, svuint16_t, ++ z0 = svdup_n_u16 (-1), ++ z0 = svdup_u16 (-1)) ++ ++/* ++** dup_m128_u16: ++** mov z0\.h, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m128_u16, svuint16_t, ++ z0 = svdup_n_u16 (-128), ++ z0 = svdup_u16 (-128)) ++ ++/* ++** dup_m129_u16: ++** mov z0\.h, #-129 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m129_u16, svuint16_t, ++ z0 = svdup_n_u16 (-129), ++ z0 = svdup_u16 (-129)) ++ ++/* ++** dup_m130_u16: ++** mvni v([0-9]+)\.8h, 0x81 ++** dup z0\.q, z\1\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m130_u16, svuint16_t, ++ z0 = svdup_n_u16 (-130), ++ z0 = svdup_u16 (-130)) ++ ++/* ++** dup_m254_u16: ++** mvni v([0-9]+)\.8h, 0xfd ++** dup z0\.q, z\1\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m254_u16, svuint16_t, ++ z0 = svdup_n_u16 (-254), ++ z0 = svdup_u16 (-254)) ++ ++/* ++** dup_m255_u16: ++** mov z0\.h, #-255 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m255_u16, svuint16_t, ++ z0 = svdup_n_u16 (-255), ++ z0 = svdup_u16 (-255)) ++ ++/* ++** dup_m256_u16: ++** mov z0\.h, #-256 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m256_u16, svuint16_t, ++ z0 = svdup_n_u16 (-256), ++ z0 = svdup_u16 (-256)) ++ ++/* ++** dup_m257_u16: ++** mov z0\.h, #-257 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m257_u16, svuint16_t, ++ z0 = svdup_n_u16 (-257), ++ z0 = svdup_u16 (-257)) ++ ++/* ++** dup_m258_u16: ++** mov z0\.b, #-2 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m258_u16, svuint16_t, ++ z0 = svdup_n_u16 (-258), ++ z0 = svdup_u16 (-258)) ++ ++/* ++** dup_m259_u16: ++** mov (w[0-9]+), -259 ++** mov z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m259_u16, svuint16_t, ++ z0 = svdup_n_u16 (-259), ++ z0 = svdup_u16 (-259)) ++ ++/* ++** dup_m512_u16: ++** mov z0\.h, #-512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m512_u16, svuint16_t, ++ z0 = svdup_n_u16 (-512), ++ z0 = svdup_u16 (-512)) ++ ++/* ++** dup_m7f00_u16: ++** mov z0\.h, #-32512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f00_u16, svuint16_t, ++ z0 = svdup_n_u16 (-0x7f00), ++ z0 = svdup_u16 (-0x7f00)) ++ ++/* ++** dup_m7f01_u16: ++** mov z0\.h, #-32513 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f01_u16, svuint16_t, ++ z0 = svdup_n_u16 (-0x7f01), ++ z0 = svdup_u16 (-0x7f01)) ++ ++/* ++** dup_m7f02_u16: ++** mov (w[0-9]+), -32514 ++** mov z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f02_u16, svuint16_t, ++ z0 = svdup_n_u16 (-0x7f02), ++ z0 = svdup_u16 (-0x7f02)) ++ ++/* ++** dup_m7ffe_u16: ++** mov (w[0-9]+), -32766 ++** mov z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7ffe_u16, svuint16_t, ++ z0 = svdup_n_u16 (-0x7ffe), ++ z0 = svdup_u16 (-0x7ffe)) ++ ++/* ++** dup_m7fff_u16: ++** mov z0\.h, #-32767 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7fff_u16, svuint16_t, ++ z0 = svdup_n_u16 (-0x7fff), ++ z0 = svdup_u16 (-0x7fff)) ++ ++/* ++** dup_m8000_u16: ++** mov z0\.h, #-32768 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m8000_u16, svuint16_t, ++ z0 = svdup_n_u16 (-0x8000), ++ z0 = svdup_u16 (-0x8000)) ++ ++/* ++** dup_w0_u16: ++** mov z0\.h, w0 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_w0_u16, svuint16_t, uint16_t, ++ z0 = svdup_n_u16 (x0), ++ z0 = svdup_u16 (x0)) ++ ++/* ++** dup_1_u16_m: ++** mov z0\.h, p0/m, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_1_u16_m, svuint16_t, ++ z0 = svdup_n_u16_m (z0, p0, 1), ++ z0 = svdup_u16_m (z0, p0, 1)) ++ ++/* ++** dup_127_u16_m: ++** mov z0\.h, p0/m, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_127_u16_m, svuint16_t, ++ z0 = svdup_n_u16_m (z0, p0, 127), ++ z0 = svdup_u16_m (z0, p0, 127)) ++ ++/* ++** dup_128_u16_m: ++** mov (z[0-9]+\.h), #128 ++** sel z0\.h, p0, \1, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (dup_128_u16_m, svuint16_t, ++ z0 = svdup_n_u16_m (z0, p0, 128), ++ z0 = svdup_u16_m (z0, p0, 128)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_129_u16_m, svuint16_t, ++ z0 = svdup_n_u16_m (z0, p0, 129), ++ z0 = svdup_u16_m (z0, p0, 129)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_253_u16_m, svuint16_t, ++ z0 = svdup_n_u16_m (z0, p0, 253), ++ z0 = svdup_u16_m (z0, p0, 253)) ++ ++/* ++** dup_254_u16_m: ++** mov (z[0-9]+\.h), #254 ++** sel z0\.h, p0, \1, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (dup_254_u16_m, svuint16_t, ++ z0 = svdup_n_u16_m (z0, p0, 254), ++ z0 = svdup_u16_m (z0, p0, 254)) ++ ++/* ++** dup_255_u16_m: ++** mov (z[0-9]+\.h), #255 ++** sel z0\.h, p0, \1, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (dup_255_u16_m, svuint16_t, ++ z0 = svdup_n_u16_m (z0, p0, 255), ++ z0 = svdup_u16_m (z0, p0, 255)) ++ ++/* ++** dup_256_u16_m: ++** mov z0\.h, p0/m, #256 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_256_u16_m, svuint16_t, ++ z0 = svdup_n_u16_m (z0, p0, 256), ++ z0 = svdup_u16_m (z0, p0, 256)) ++ ++/* ++** dup_257_u16_m: ++** mov (z[0-9]+)\.b, #1 ++** sel z0\.h, p0, \1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (dup_257_u16_m, svuint16_t, ++ z0 = svdup_n_u16_m (z0, p0, 257), ++ z0 = svdup_u16_m (z0, p0, 257)) ++ ++/* ++** dup_512_u16_m: ++** mov z0\.h, p0/m, #512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_512_u16_m, svuint16_t, ++ z0 = svdup_n_u16_m (z0, p0, 512), ++ z0 = svdup_u16_m (z0, p0, 512)) ++ ++/* ++** dup_7f00_u16_m: ++** mov z0\.h, p0/m, #32512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7f00_u16_m, svuint16_t, ++ z0 = svdup_n_u16_m (z0, p0, 0x7f00), ++ z0 = svdup_u16_m (z0, p0, 0x7f00)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_7f01_u16_m, svuint16_t, ++ z0 = svdup_n_u16_m (z0, p0, 0x7f01), ++ z0 = svdup_u16_m (z0, p0, 0x7f01)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_7ffd_u16_m, svuint16_t, ++ z0 = svdup_n_u16_m (z0, p0, 0x7ffd), ++ z0 = svdup_u16_m (z0, p0, 0x7ffd)) ++ ++/* ++** dup_7ffe_u16_m: ++** mov (z[0-9]+\.h), #32766 ++** sel z0\.h, p0, \1, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7ffe_u16_m, svuint16_t, ++ z0 = svdup_n_u16_m (z0, p0, 0x7ffe), ++ z0 = svdup_u16_m (z0, p0, 0x7ffe)) ++ ++/* ++** dup_7fff_u16_m: ++** mov (z[0-9]+\.h), #32767 ++** sel z0\.h, p0, \1, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7fff_u16_m, svuint16_t, ++ z0 = svdup_n_u16_m (z0, p0, 0x7fff), ++ z0 = svdup_u16_m (z0, p0, 0x7fff)) ++ ++/* ++** dup_m1_u16_m: ++** mov z0\.h, p0/m, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m1_u16_m, svuint16_t, ++ z0 = svdup_n_u16_m (z0, p0, -1), ++ z0 = svdup_u16_m (z0, p0, -1)) ++ ++/* ++** dup_m128_u16_m: ++** mov z0\.h, p0/m, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m128_u16_m, svuint16_t, ++ z0 = svdup_n_u16_m (z0, p0, -128), ++ z0 = svdup_u16_m (z0, p0, -128)) ++ ++/* ++** dup_m129_u16_m: ++** mov (z[0-9]+\.h), #-129 ++** sel z0\.h, p0, \1, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m129_u16_m, svuint16_t, ++ z0 = svdup_n_u16_m (z0, p0, -129), ++ z0 = svdup_u16_m (z0, p0, -129)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m130_u16_m, svuint16_t, ++ z0 = svdup_n_u16_m (z0, p0, -130), ++ z0 = svdup_u16_m (z0, p0, -130)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m254_u16_m, svuint16_t, ++ z0 = svdup_n_u16_m (z0, p0, -254), ++ z0 = svdup_u16_m (z0, p0, -254)) ++ ++/* ++** dup_m255_u16_m: ++** mov (z[0-9]+\.h), #-255 ++** sel z0\.h, p0, \1, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m255_u16_m, svuint16_t, ++ z0 = svdup_n_u16_m (z0, p0, -255), ++ z0 = svdup_u16_m (z0, p0, -255)) ++ ++/* ++** dup_m256_u16_m: ++** mov z0\.h, p0/m, #-256 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m256_u16_m, svuint16_t, ++ z0 = svdup_n_u16_m (z0, p0, -256), ++ z0 = svdup_u16_m (z0, p0, -256)) ++ ++/* ++** dup_m257_u16_m: ++** mov (z[0-9]+\.h), #-257 ++** sel z0\.h, p0, \1, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m257_u16_m, svuint16_t, ++ z0 = svdup_n_u16_m (z0, p0, -257), ++ z0 = svdup_u16_m (z0, p0, -257)) ++ ++/* ++** dup_m258_u16_m: ++** mov (z[0-9]+)\.b, #-2 ++** sel z0\.h, p0, \1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m258_u16_m, svuint16_t, ++ z0 = svdup_n_u16_m (z0, p0, -258), ++ z0 = svdup_u16_m (z0, p0, -258)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m259_u16_m, svuint16_t, ++ z0 = svdup_n_u16_m (z0, p0, -259), ++ z0 = svdup_u16_m (z0, p0, -259)) ++ ++/* ++** dup_m512_u16_m: ++** mov z0\.h, p0/m, #-512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m512_u16_m, svuint16_t, ++ z0 = svdup_n_u16_m (z0, p0, -512), ++ z0 = svdup_u16_m (z0, p0, -512)) ++ ++/* ++** dup_m7f00_u16_m: ++** mov z0\.h, p0/m, #-32512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f00_u16_m, svuint16_t, ++ z0 = svdup_n_u16_m (z0, p0, -0x7f00), ++ z0 = svdup_u16_m (z0, p0, -0x7f00)) ++ ++/* ++** dup_m7f01_u16_m: ++** mov (z[0-9]+\.h), #-32513 ++** sel z0\.h, p0, \1, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f01_u16_m, svuint16_t, ++ z0 = svdup_n_u16_m (z0, p0, -0x7f01), ++ z0 = svdup_u16_m (z0, p0, -0x7f01)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m7f02_u16_m, svuint16_t, ++ z0 = svdup_n_u16_m (z0, p0, -0x7f02), ++ z0 = svdup_u16_m (z0, p0, -0x7f02)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m7ffe_u16_m, svuint16_t, ++ z0 = svdup_n_u16_m (z0, p0, -0x7ffe), ++ z0 = svdup_u16_m (z0, p0, -0x7ffe)) ++ ++/* ++** dup_m7fff_u16_m: ++** mov (z[0-9]+\.h), #-32767 ++** sel z0\.h, p0, \1, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7fff_u16_m, svuint16_t, ++ z0 = svdup_n_u16_m (z0, p0, -0x7fff), ++ z0 = svdup_u16_m (z0, p0, -0x7fff)) ++ ++/* ++** dup_m8000_u16_m: ++** mov z0\.h, p0/m, #-32768 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m8000_u16_m, svuint16_t, ++ z0 = svdup_n_u16_m (z0, p0, -0x8000), ++ z0 = svdup_u16_m (z0, p0, -0x8000)) ++ ++/* ++** dup_0_u16_m: ++** mov z0\.h, p0/m, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_0_u16_m, svuint16_t, ++ z0 = svdup_n_u16_m (z0, p0, 0), ++ z0 = svdup_u16_m (z0, p0, 0)) ++ ++/* ++** dup_w0_u16_m: ++** movprfx z0, z1 ++** mov z0\.h, p0/m, w0 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_w0_u16_m, svuint16_t, uint16_t, ++ z0 = svdup_n_u16_m (z1, p0, x0), ++ z0 = svdup_u16_m (z1, p0, x0)) ++ ++/* ++** dup_1_u16_z: ++** mov z0\.h, p0/z, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_1_u16_z, svuint16_t, ++ z0 = svdup_n_u16_z (p0, 1), ++ z0 = svdup_u16_z (p0, 1)) ++ ++/* ++** dup_127_u16_z: ++** mov z0\.h, p0/z, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_127_u16_z, svuint16_t, ++ z0 = svdup_n_u16_z (p0, 127), ++ z0 = svdup_u16_z (p0, 127)) ++ ++/* ++** dup_128_u16_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.h), #128 ++** sel z0\.h, p0, \2, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (dup_128_u16_z, svuint16_t, ++ z0 = svdup_n_u16_z (p0, 128), ++ z0 = svdup_u16_z (p0, 128)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_129_u16_z, svuint16_t, ++ z0 = svdup_n_u16_z (p0, 129), ++ z0 = svdup_u16_z (p0, 129)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_253_u16_z, svuint16_t, ++ z0 = svdup_n_u16_z (p0, 253), ++ z0 = svdup_u16_z (p0, 253)) ++ ++/* ++** dup_254_u16_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.h), #254 ++** sel z0\.h, p0, \2, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (dup_254_u16_z, svuint16_t, ++ z0 = svdup_n_u16_z (p0, 254), ++ z0 = svdup_u16_z (p0, 254)) ++ ++/* ++** dup_255_u16_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.h), #255 ++** sel z0\.h, p0, \2, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (dup_255_u16_z, svuint16_t, ++ z0 = svdup_n_u16_z (p0, 255), ++ z0 = svdup_u16_z (p0, 255)) ++ ++/* ++** dup_256_u16_z: ++** mov z0\.h, p0/z, #256 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_256_u16_z, svuint16_t, ++ z0 = svdup_n_u16_z (p0, 256), ++ z0 = svdup_u16_z (p0, 256)) ++ ++/* ++** dup_257_u16_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+)\.b, #1 ++** sel z0\.h, p0, \2\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (dup_257_u16_z, svuint16_t, ++ z0 = svdup_n_u16_z (p0, 257), ++ z0 = svdup_u16_z (p0, 257)) ++ ++/* ++** dup_512_u16_z: ++** mov z0\.h, p0/z, #512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_512_u16_z, svuint16_t, ++ z0 = svdup_n_u16_z (p0, 512), ++ z0 = svdup_u16_z (p0, 512)) ++ ++/* ++** dup_7f00_u16_z: ++** mov z0\.h, p0/z, #32512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7f00_u16_z, svuint16_t, ++ z0 = svdup_n_u16_z (p0, 0x7f00), ++ z0 = svdup_u16_z (p0, 0x7f00)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_7f01_u16_z, svuint16_t, ++ z0 = svdup_n_u16_z (p0, 0x7f01), ++ z0 = svdup_u16_z (p0, 0x7f01)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_7ffd_u16_z, svuint16_t, ++ z0 = svdup_n_u16_z (p0, 0x7ffd), ++ z0 = svdup_u16_z (p0, 0x7ffd)) ++ ++/* ++** dup_7ffe_u16_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.h), #32766 ++** sel z0\.h, p0, \2, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7ffe_u16_z, svuint16_t, ++ z0 = svdup_n_u16_z (p0, 0x7ffe), ++ z0 = svdup_u16_z (p0, 0x7ffe)) ++ ++/* ++** dup_7fff_u16_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.h), #32767 ++** sel z0\.h, p0, \2, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7fff_u16_z, svuint16_t, ++ z0 = svdup_n_u16_z (p0, 0x7fff), ++ z0 = svdup_u16_z (p0, 0x7fff)) ++ ++/* ++** dup_m1_u16_z: ++** mov z0\.h, p0/z, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m1_u16_z, svuint16_t, ++ z0 = svdup_n_u16_z (p0, -1), ++ z0 = svdup_u16_z (p0, -1)) ++ ++/* ++** dup_m128_u16_z: ++** mov z0\.h, p0/z, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m128_u16_z, svuint16_t, ++ z0 = svdup_n_u16_z (p0, -128), ++ z0 = svdup_u16_z (p0, -128)) ++ ++/* ++** dup_m129_u16_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.h), #-129 ++** sel z0\.h, p0, \2, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m129_u16_z, svuint16_t, ++ z0 = svdup_n_u16_z (p0, -129), ++ z0 = svdup_u16_z (p0, -129)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m130_u16_z, svuint16_t, ++ z0 = svdup_n_u16_z (p0, -130), ++ z0 = svdup_u16_z (p0, -130)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m254_u16_z, svuint16_t, ++ z0 = svdup_n_u16_z (p0, -254), ++ z0 = svdup_u16_z (p0, -254)) ++ ++/* ++** dup_m255_u16_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.h), #-255 ++** sel z0\.h, p0, \2, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m255_u16_z, svuint16_t, ++ z0 = svdup_n_u16_z (p0, -255), ++ z0 = svdup_u16_z (p0, -255)) ++ ++/* ++** dup_m256_u16_z: ++** mov z0\.h, p0/z, #-256 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m256_u16_z, svuint16_t, ++ z0 = svdup_n_u16_z (p0, -256), ++ z0 = svdup_u16_z (p0, -256)) ++ ++/* ++** dup_m257_u16_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.h), #-257 ++** sel z0\.h, p0, \2, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m257_u16_z, svuint16_t, ++ z0 = svdup_n_u16_z (p0, -257), ++ z0 = svdup_u16_z (p0, -257)) ++ ++/* ++** dup_m258_u16_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+)\.b, #-2 ++** sel z0\.h, p0, \2\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m258_u16_z, svuint16_t, ++ z0 = svdup_n_u16_z (p0, -258), ++ z0 = svdup_u16_z (p0, -258)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m259_u16_z, svuint16_t, ++ z0 = svdup_n_u16_z (p0, -259), ++ z0 = svdup_u16_z (p0, -259)) ++ ++/* ++** dup_m512_u16_z: ++** mov z0\.h, p0/z, #-512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m512_u16_z, svuint16_t, ++ z0 = svdup_n_u16_z (p0, -512), ++ z0 = svdup_u16_z (p0, -512)) ++ ++/* ++** dup_m7f00_u16_z: ++** mov z0\.h, p0/z, #-32512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f00_u16_z, svuint16_t, ++ z0 = svdup_n_u16_z (p0, -0x7f00), ++ z0 = svdup_u16_z (p0, -0x7f00)) ++ ++/* ++** dup_m7f01_u16_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.h), #-32513 ++** sel z0\.h, p0, \2, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f01_u16_z, svuint16_t, ++ z0 = svdup_n_u16_z (p0, -0x7f01), ++ z0 = svdup_u16_z (p0, -0x7f01)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m7f02_u16_z, svuint16_t, ++ z0 = svdup_n_u16_z (p0, -0x7f02), ++ z0 = svdup_u16_z (p0, -0x7f02)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m7ffe_u16_z, svuint16_t, ++ z0 = svdup_n_u16_z (p0, -0x7ffe), ++ z0 = svdup_u16_z (p0, -0x7ffe)) ++ ++/* ++** dup_m7fff_u16_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.h), #-32767 ++** sel z0\.h, p0, \2, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7fff_u16_z, svuint16_t, ++ z0 = svdup_n_u16_z (p0, -0x7fff), ++ z0 = svdup_u16_z (p0, -0x7fff)) ++ ++/* ++** dup_m8000_u16_z: ++** mov z0\.h, p0/z, #-32768 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m8000_u16_z, svuint16_t, ++ z0 = svdup_n_u16_z (p0, -0x8000), ++ z0 = svdup_u16_z (p0, -0x8000)) ++ ++/* ++** dup_0_u16_z: ++** mov z0\.h, p0/z, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_0_u16_z, svuint16_t, ++ z0 = svdup_n_u16_z (p0, 0), ++ z0 = svdup_u16_z (p0, 0)) ++ ++/* ++** dup_w0_u16_z: ++** movprfx z0\.h, p0/z, z0\.h ++** mov z0\.h, p0/m, w0 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_w0_u16_z, svuint16_t, uint16_t, ++ z0 = svdup_n_u16_z (p0, x0), ++ z0 = svdup_u16_z (p0, x0)) ++ ++/* ++** dup_1_u16_x: ++** mov z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_1_u16_x, svuint16_t, ++ z0 = svdup_n_u16_x (p0, 1), ++ z0 = svdup_u16_x (p0, 1)) ++ ++/* ++** dup_127_u16_x: ++** mov z0\.h, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_127_u16_x, svuint16_t, ++ z0 = svdup_n_u16_x (p0, 127), ++ z0 = svdup_u16_x (p0, 127)) ++ ++/* ++** dup_128_u16_x: ++** mov z0\.h, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_128_u16_x, svuint16_t, ++ z0 = svdup_n_u16_x (p0, 128), ++ z0 = svdup_u16_x (p0, 128)) ++ ++/* ++** dup_129_u16_x: ++** movi v([0-9]+)\.8h, 0x81 ++** dup z0\.q, z\1\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_129_u16_x, svuint16_t, ++ z0 = svdup_n_u16_x (p0, 129), ++ z0 = svdup_u16_x (p0, 129)) ++ ++/* ++** dup_253_u16_x: ++** movi v([0-9]+)\.8h, 0xfd ++** dup z0\.q, z\1\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_253_u16_x, svuint16_t, ++ z0 = svdup_n_u16_x (p0, 253), ++ z0 = svdup_u16_x (p0, 253)) ++ ++/* ++** dup_254_u16_x: ++** mov z0\.h, #254 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_254_u16_x, svuint16_t, ++ z0 = svdup_n_u16_x (p0, 254), ++ z0 = svdup_u16_x (p0, 254)) ++ ++/* ++** dup_255_u16_x: ++** mov z0\.h, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_255_u16_x, svuint16_t, ++ z0 = svdup_n_u16_x (p0, 255), ++ z0 = svdup_u16_x (p0, 255)) ++ ++/* ++** dup_256_u16_x: ++** mov z0\.h, #256 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_256_u16_x, svuint16_t, ++ z0 = svdup_n_u16_x (p0, 256), ++ z0 = svdup_u16_x (p0, 256)) ++ ++/* ++** dup_257_u16_x: ++** mov z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_257_u16_x, svuint16_t, ++ z0 = svdup_n_u16_x (p0, 257), ++ z0 = svdup_u16_x (p0, 257)) ++ ++/* ++** dup_512_u16_x: ++** mov z0\.h, #512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_512_u16_x, svuint16_t, ++ z0 = svdup_n_u16_x (p0, 512), ++ z0 = svdup_u16_x (p0, 512)) ++ ++/* ++** dup_7f00_u16_x: ++** mov z0\.h, #32512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7f00_u16_x, svuint16_t, ++ z0 = svdup_n_u16_x (p0, 0x7f00), ++ z0 = svdup_u16_x (p0, 0x7f00)) ++ ++/* ++** dup_7f01_u16_x: ++** mov (w[0-9]+), 32513 ++** mov z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7f01_u16_x, svuint16_t, ++ z0 = svdup_n_u16_x (p0, 0x7f01), ++ z0 = svdup_u16_x (p0, 0x7f01)) ++ ++/* ++** dup_7ffd_u16_x: ++** mov (w[0-9]+), 32765 ++** mov z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7ffd_u16_x, svuint16_t, ++ z0 = svdup_n_u16_x (p0, 0x7ffd), ++ z0 = svdup_u16_x (p0, 0x7ffd)) ++ ++/* ++** dup_7ffe_u16_x: ++** mov z0\.h, #32766 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7ffe_u16_x, svuint16_t, ++ z0 = svdup_n_u16_x (p0, 0x7ffe), ++ z0 = svdup_u16_x (p0, 0x7ffe)) ++ ++/* ++** dup_7fff_u16_x: ++** mov z0\.h, #32767 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7fff_u16_x, svuint16_t, ++ z0 = svdup_n_u16_x (p0, 0x7fff), ++ z0 = svdup_u16_x (p0, 0x7fff)) ++ ++/* ++** dup_m1_u16_x: ++** mov z0\.b, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m1_u16_x, svuint16_t, ++ z0 = svdup_n_u16_x (p0, -1), ++ z0 = svdup_u16_x (p0, -1)) ++ ++/* ++** dup_m128_u16_x: ++** mov z0\.h, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m128_u16_x, svuint16_t, ++ z0 = svdup_n_u16_x (p0, -128), ++ z0 = svdup_u16_x (p0, -128)) ++ ++/* ++** dup_m129_u16_x: ++** mov z0\.h, #-129 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m129_u16_x, svuint16_t, ++ z0 = svdup_n_u16_x (p0, -129), ++ z0 = svdup_u16_x (p0, -129)) ++ ++/* ++** dup_m130_u16_x: ++** mvni v([0-9]+)\.8h, 0x81 ++** dup z0\.q, z\1\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m130_u16_x, svuint16_t, ++ z0 = svdup_n_u16_x (p0, -130), ++ z0 = svdup_u16_x (p0, -130)) ++ ++/* ++** dup_m254_u16_x: ++** mvni v([0-9]+)\.8h, 0xfd ++** dup z0\.q, z\1\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m254_u16_x, svuint16_t, ++ z0 = svdup_n_u16_x (p0, -254), ++ z0 = svdup_u16_x (p0, -254)) ++ ++/* ++** dup_m255_u16_x: ++** mov z0\.h, #-255 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m255_u16_x, svuint16_t, ++ z0 = svdup_n_u16_x (p0, -255), ++ z0 = svdup_u16_x (p0, -255)) ++ ++/* ++** dup_m256_u16_x: ++** mov z0\.h, #-256 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m256_u16_x, svuint16_t, ++ z0 = svdup_n_u16_x (p0, -256), ++ z0 = svdup_u16_x (p0, -256)) ++ ++/* ++** dup_m257_u16_x: ++** mov z0\.h, #-257 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m257_u16_x, svuint16_t, ++ z0 = svdup_n_u16_x (p0, -257), ++ z0 = svdup_u16_x (p0, -257)) ++ ++/* ++** dup_m258_u16_x: ++** mov z0\.b, #-2 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m258_u16_x, svuint16_t, ++ z0 = svdup_n_u16_x (p0, -258), ++ z0 = svdup_u16_x (p0, -258)) ++ ++/* ++** dup_m259_u16_x: ++** mov (w[0-9]+), -259 ++** mov z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m259_u16_x, svuint16_t, ++ z0 = svdup_n_u16_x (p0, -259), ++ z0 = svdup_u16_x (p0, -259)) ++ ++/* ++** dup_m512_u16_x: ++** mov z0\.h, #-512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m512_u16_x, svuint16_t, ++ z0 = svdup_n_u16_x (p0, -512), ++ z0 = svdup_u16_x (p0, -512)) ++ ++/* ++** dup_m7f00_u16_x: ++** mov z0\.h, #-32512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f00_u16_x, svuint16_t, ++ z0 = svdup_n_u16_x (p0, -0x7f00), ++ z0 = svdup_u16_x (p0, -0x7f00)) ++ ++/* ++** dup_m7f01_u16_x: ++** mov z0\.h, #-32513 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f01_u16_x, svuint16_t, ++ z0 = svdup_n_u16_x (p0, -0x7f01), ++ z0 = svdup_u16_x (p0, -0x7f01)) ++ ++/* ++** dup_m7f02_u16_x: ++** mov (w[0-9]+), -32514 ++** mov z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f02_u16_x, svuint16_t, ++ z0 = svdup_n_u16_x (p0, -0x7f02), ++ z0 = svdup_u16_x (p0, -0x7f02)) ++ ++/* ++** dup_m7ffe_u16_x: ++** mov (w[0-9]+), -32766 ++** mov z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7ffe_u16_x, svuint16_t, ++ z0 = svdup_n_u16_x (p0, -0x7ffe), ++ z0 = svdup_u16_x (p0, -0x7ffe)) ++ ++/* ++** dup_m7fff_u16_x: ++** mov z0\.h, #-32767 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7fff_u16_x, svuint16_t, ++ z0 = svdup_n_u16_x (p0, -0x7fff), ++ z0 = svdup_u16_x (p0, -0x7fff)) ++ ++/* ++** dup_m8000_u16_x: ++** mov z0\.h, #-32768 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m8000_u16_x, svuint16_t, ++ z0 = svdup_n_u16_x (p0, -0x8000), ++ z0 = svdup_u16_x (p0, -0x8000)) ++ ++/* ++** dup_w0_u16_x: ++** mov z0\.h, w0 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_w0_u16_x, svuint16_t, uint16_t, ++ z0 = svdup_n_u16_x (p0, x0), ++ z0 = svdup_u16_x (p0, x0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u32.c +new file mode 100644 +index 000000000..667feea64 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u32.c +@@ -0,0 +1,1175 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dup_1_u32: ++** mov z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_1_u32, svuint32_t, ++ z0 = svdup_n_u32 (1), ++ z0 = svdup_u32 (1)) ++ ++/* ++** dup_127_u32: ++** mov z0\.s, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_127_u32, svuint32_t, ++ z0 = svdup_n_u32 (127), ++ z0 = svdup_u32 (127)) ++ ++/* ++** dup_128_u32: ++** mov z0\.s, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_128_u32, svuint32_t, ++ z0 = svdup_n_u32 (128), ++ z0 = svdup_u32 (128)) ++ ++/* ++** dup_129_u32: ++** movi v([0-9]+)\.4s, 0x81 ++** dup z0\.q, z\1\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_129_u32, svuint32_t, ++ z0 = svdup_n_u32 (129), ++ z0 = svdup_u32 (129)) ++ ++/* ++** dup_253_u32: ++** movi v([0-9]+)\.4s, 0xfd ++** dup z0\.q, z\1\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_253_u32, svuint32_t, ++ z0 = svdup_n_u32 (253), ++ z0 = svdup_u32 (253)) ++ ++/* ++** dup_254_u32: ++** mov z0\.s, #254 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_254_u32, svuint32_t, ++ z0 = svdup_n_u32 (254), ++ z0 = svdup_u32 (254)) ++ ++/* ++** dup_255_u32: ++** mov z0\.s, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_255_u32, svuint32_t, ++ z0 = svdup_n_u32 (255), ++ z0 = svdup_u32 (255)) ++ ++/* ++** dup_256_u32: ++** mov z0\.s, #256 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_256_u32, svuint32_t, ++ z0 = svdup_n_u32 (256), ++ z0 = svdup_u32 (256)) ++ ++/* ++** dup_257_u32: ++** mov (w[0-9]+), 257 ++** mov z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_257_u32, svuint32_t, ++ z0 = svdup_n_u32 (257), ++ z0 = svdup_u32 (257)) ++ ++/* ++** dup_512_u32: ++** mov z0\.s, #512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_512_u32, svuint32_t, ++ z0 = svdup_n_u32 (512), ++ z0 = svdup_u32 (512)) ++ ++/* ++** dup_7f00_u32: ++** mov z0\.s, #32512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7f00_u32, svuint32_t, ++ z0 = svdup_n_u32 (0x7f00), ++ z0 = svdup_u32 (0x7f00)) ++ ++/* ++** dup_7f01_u32: ++** mov (w[0-9]+), 32513 ++** mov z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7f01_u32, svuint32_t, ++ z0 = svdup_n_u32 (0x7f01), ++ z0 = svdup_u32 (0x7f01)) ++ ++/* ++** dup_7ffd_u32: ++** mov (w[0-9]+), 32765 ++** mov z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7ffd_u32, svuint32_t, ++ z0 = svdup_n_u32 (0x7ffd), ++ z0 = svdup_u32 (0x7ffd)) ++ ++/* ++** dup_7ffe_u32: ++** mov z0\.s, #32766 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7ffe_u32, svuint32_t, ++ z0 = svdup_n_u32 (0x7ffe), ++ z0 = svdup_u32 (0x7ffe)) ++ ++/* ++** dup_7fff_u32: ++** mov z0\.s, #32767 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7fff_u32, svuint32_t, ++ z0 = svdup_n_u32 (0x7fff), ++ z0 = svdup_u32 (0x7fff)) ++ ++/* ++** dup_m1_u32: ++** mov z0\.b, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m1_u32, svuint32_t, ++ z0 = svdup_n_u32 (-1), ++ z0 = svdup_u32 (-1)) ++ ++/* ++** dup_m128_u32: ++** mov z0\.s, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m128_u32, svuint32_t, ++ z0 = svdup_n_u32 (-128), ++ z0 = svdup_u32 (-128)) ++ ++/* ++** dup_m129_u32: ++** mov z0\.s, #-129 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m129_u32, svuint32_t, ++ z0 = svdup_n_u32 (-129), ++ z0 = svdup_u32 (-129)) ++ ++/* ++** dup_m130_u32: ++** mvni v([0-9]+)\.4s, 0x81 ++** dup z0\.q, z\1\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m130_u32, svuint32_t, ++ z0 = svdup_n_u32 (-130), ++ z0 = svdup_u32 (-130)) ++ ++/* ++** dup_m254_u32: ++** mvni v([0-9]+)\.4s, 0xfd ++** dup z0\.q, z\1\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m254_u32, svuint32_t, ++ z0 = svdup_n_u32 (-254), ++ z0 = svdup_u32 (-254)) ++ ++/* ++** dup_m255_u32: ++** mov z0\.s, #-255 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m255_u32, svuint32_t, ++ z0 = svdup_n_u32 (-255), ++ z0 = svdup_u32 (-255)) ++ ++/* ++** dup_m256_u32: ++** mov z0\.s, #-256 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m256_u32, svuint32_t, ++ z0 = svdup_n_u32 (-256), ++ z0 = svdup_u32 (-256)) ++ ++/* ++** dup_m257_u32: ++** mov z0\.s, #-257 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m257_u32, svuint32_t, ++ z0 = svdup_n_u32 (-257), ++ z0 = svdup_u32 (-257)) ++ ++/* ++** dup_m258_u32: ++** mov (w[0-9]+), -258 ++** mov z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m258_u32, svuint32_t, ++ z0 = svdup_n_u32 (-258), ++ z0 = svdup_u32 (-258)) ++ ++/* ++** dup_m259_u32: ++** mov (w[0-9]+), -259 ++** mov z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m259_u32, svuint32_t, ++ z0 = svdup_n_u32 (-259), ++ z0 = svdup_u32 (-259)) ++ ++/* ++** dup_m512_u32: ++** mov z0\.s, #-512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m512_u32, svuint32_t, ++ z0 = svdup_n_u32 (-512), ++ z0 = svdup_u32 (-512)) ++ ++/* ++** dup_m7f00_u32: ++** mov z0\.s, #-32512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f00_u32, svuint32_t, ++ z0 = svdup_n_u32 (-0x7f00), ++ z0 = svdup_u32 (-0x7f00)) ++ ++/* ++** dup_m7f01_u32: ++** mov z0\.s, #-32513 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f01_u32, svuint32_t, ++ z0 = svdup_n_u32 (-0x7f01), ++ z0 = svdup_u32 (-0x7f01)) ++ ++/* ++** dup_m7f02_u32: ++** mov (w[0-9]+), -32514 ++** mov z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f02_u32, svuint32_t, ++ z0 = svdup_n_u32 (-0x7f02), ++ z0 = svdup_u32 (-0x7f02)) ++ ++/* ++** dup_m7ffe_u32: ++** mov (w[0-9]+), -32766 ++** mov z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7ffe_u32, svuint32_t, ++ z0 = svdup_n_u32 (-0x7ffe), ++ z0 = svdup_u32 (-0x7ffe)) ++ ++/* ++** dup_m7fff_u32: ++** mov z0\.s, #-32767 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7fff_u32, svuint32_t, ++ z0 = svdup_n_u32 (-0x7fff), ++ z0 = svdup_u32 (-0x7fff)) ++ ++/* ++** dup_m8000_u32: ++** mov z0\.s, #-32768 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m8000_u32, svuint32_t, ++ z0 = svdup_n_u32 (-0x8000), ++ z0 = svdup_u32 (-0x8000)) ++ ++/* ++** dup_w0_u32: ++** mov z0\.s, w0 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_w0_u32, svuint32_t, uint32_t, ++ z0 = svdup_n_u32 (x0), ++ z0 = svdup_u32 (x0)) ++ ++/* ++** dup_1_u32_m: ++** mov z0\.s, p0/m, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_1_u32_m, svuint32_t, ++ z0 = svdup_n_u32_m (z0, p0, 1), ++ z0 = svdup_u32_m (z0, p0, 1)) ++ ++/* ++** dup_127_u32_m: ++** mov z0\.s, p0/m, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_127_u32_m, svuint32_t, ++ z0 = svdup_n_u32_m (z0, p0, 127), ++ z0 = svdup_u32_m (z0, p0, 127)) ++ ++/* ++** dup_128_u32_m: ++** mov (z[0-9]+\.s), #128 ++** sel z0\.s, p0, \1, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (dup_128_u32_m, svuint32_t, ++ z0 = svdup_n_u32_m (z0, p0, 128), ++ z0 = svdup_u32_m (z0, p0, 128)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_129_u32_m, svuint32_t, ++ z0 = svdup_n_u32_m (z0, p0, 129), ++ z0 = svdup_u32_m (z0, p0, 129)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_253_u32_m, svuint32_t, ++ z0 = svdup_n_u32_m (z0, p0, 253), ++ z0 = svdup_u32_m (z0, p0, 253)) ++ ++/* ++** dup_254_u32_m: ++** mov (z[0-9]+\.s), #254 ++** sel z0\.s, p0, \1, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (dup_254_u32_m, svuint32_t, ++ z0 = svdup_n_u32_m (z0, p0, 254), ++ z0 = svdup_u32_m (z0, p0, 254)) ++ ++/* ++** dup_255_u32_m: ++** mov (z[0-9]+\.s), #255 ++** sel z0\.s, p0, \1, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (dup_255_u32_m, svuint32_t, ++ z0 = svdup_n_u32_m (z0, p0, 255), ++ z0 = svdup_u32_m (z0, p0, 255)) ++ ++/* ++** dup_256_u32_m: ++** mov z0\.s, p0/m, #256 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_256_u32_m, svuint32_t, ++ z0 = svdup_n_u32_m (z0, p0, 256), ++ z0 = svdup_u32_m (z0, p0, 256)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_257_u32_m, svuint32_t, ++ z0 = svdup_n_u32_m (z0, p0, 257), ++ z0 = svdup_u32_m (z0, p0, 257)) ++ ++/* ++** dup_512_u32_m: ++** mov z0\.s, p0/m, #512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_512_u32_m, svuint32_t, ++ z0 = svdup_n_u32_m (z0, p0, 512), ++ z0 = svdup_u32_m (z0, p0, 512)) ++ ++/* ++** dup_7f00_u32_m: ++** mov z0\.s, p0/m, #32512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7f00_u32_m, svuint32_t, ++ z0 = svdup_n_u32_m (z0, p0, 0x7f00), ++ z0 = svdup_u32_m (z0, p0, 0x7f00)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_7f01_u32_m, svuint32_t, ++ z0 = svdup_n_u32_m (z0, p0, 0x7f01), ++ z0 = svdup_u32_m (z0, p0, 0x7f01)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_7ffd_u32_m, svuint32_t, ++ z0 = svdup_n_u32_m (z0, p0, 0x7ffd), ++ z0 = svdup_u32_m (z0, p0, 0x7ffd)) ++ ++/* ++** dup_7ffe_u32_m: ++** mov (z[0-9]+\.s), #32766 ++** sel z0\.s, p0, \1, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7ffe_u32_m, svuint32_t, ++ z0 = svdup_n_u32_m (z0, p0, 0x7ffe), ++ z0 = svdup_u32_m (z0, p0, 0x7ffe)) ++ ++/* ++** dup_7fff_u32_m: ++** mov (z[0-9]+\.s), #32767 ++** sel z0\.s, p0, \1, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7fff_u32_m, svuint32_t, ++ z0 = svdup_n_u32_m (z0, p0, 0x7fff), ++ z0 = svdup_u32_m (z0, p0, 0x7fff)) ++ ++/* ++** dup_m1_u32_m: ++** mov z0\.s, p0/m, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m1_u32_m, svuint32_t, ++ z0 = svdup_n_u32_m (z0, p0, -1), ++ z0 = svdup_u32_m (z0, p0, -1)) ++ ++/* ++** dup_m128_u32_m: ++** mov z0\.s, p0/m, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m128_u32_m, svuint32_t, ++ z0 = svdup_n_u32_m (z0, p0, -128), ++ z0 = svdup_u32_m (z0, p0, -128)) ++ ++/* ++** dup_m129_u32_m: ++** mov (z[0-9]+\.s), #-129 ++** sel z0\.s, p0, \1, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m129_u32_m, svuint32_t, ++ z0 = svdup_n_u32_m (z0, p0, -129), ++ z0 = svdup_u32_m (z0, p0, -129)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m130_u32_m, svuint32_t, ++ z0 = svdup_n_u32_m (z0, p0, -130), ++ z0 = svdup_u32_m (z0, p0, -130)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m254_u32_m, svuint32_t, ++ z0 = svdup_n_u32_m (z0, p0, -254), ++ z0 = svdup_u32_m (z0, p0, -254)) ++ ++/* ++** dup_m255_u32_m: ++** mov (z[0-9]+\.s), #-255 ++** sel z0\.s, p0, \1, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m255_u32_m, svuint32_t, ++ z0 = svdup_n_u32_m (z0, p0, -255), ++ z0 = svdup_u32_m (z0, p0, -255)) ++ ++/* ++** dup_m256_u32_m: ++** mov z0\.s, p0/m, #-256 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m256_u32_m, svuint32_t, ++ z0 = svdup_n_u32_m (z0, p0, -256), ++ z0 = svdup_u32_m (z0, p0, -256)) ++ ++/* ++** dup_m257_u32_m: ++** mov (z[0-9]+\.s), #-257 ++** sel z0\.s, p0, \1, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m257_u32_m, svuint32_t, ++ z0 = svdup_n_u32_m (z0, p0, -257), ++ z0 = svdup_u32_m (z0, p0, -257)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m258_u32_m, svuint32_t, ++ z0 = svdup_n_u32_m (z0, p0, -258), ++ z0 = svdup_u32_m (z0, p0, -258)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m259_u32_m, svuint32_t, ++ z0 = svdup_n_u32_m (z0, p0, -259), ++ z0 = svdup_u32_m (z0, p0, -259)) ++ ++/* ++** dup_m512_u32_m: ++** mov z0\.s, p0/m, #-512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m512_u32_m, svuint32_t, ++ z0 = svdup_n_u32_m (z0, p0, -512), ++ z0 = svdup_u32_m (z0, p0, -512)) ++ ++/* ++** dup_m7f00_u32_m: ++** mov z0\.s, p0/m, #-32512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f00_u32_m, svuint32_t, ++ z0 = svdup_n_u32_m (z0, p0, -0x7f00), ++ z0 = svdup_u32_m (z0, p0, -0x7f00)) ++ ++/* ++** dup_m7f01_u32_m: ++** mov (z[0-9]+\.s), #-32513 ++** sel z0\.s, p0, \1, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f01_u32_m, svuint32_t, ++ z0 = svdup_n_u32_m (z0, p0, -0x7f01), ++ z0 = svdup_u32_m (z0, p0, -0x7f01)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m7f02_u32_m, svuint32_t, ++ z0 = svdup_n_u32_m (z0, p0, -0x7f02), ++ z0 = svdup_u32_m (z0, p0, -0x7f02)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m7ffe_u32_m, svuint32_t, ++ z0 = svdup_n_u32_m (z0, p0, -0x7ffe), ++ z0 = svdup_u32_m (z0, p0, -0x7ffe)) ++ ++/* ++** dup_m7fff_u32_m: ++** mov (z[0-9]+\.s), #-32767 ++** sel z0\.s, p0, \1, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7fff_u32_m, svuint32_t, ++ z0 = svdup_n_u32_m (z0, p0, -0x7fff), ++ z0 = svdup_u32_m (z0, p0, -0x7fff)) ++ ++/* ++** dup_m8000_u32_m: ++** mov z0\.s, p0/m, #-32768 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m8000_u32_m, svuint32_t, ++ z0 = svdup_n_u32_m (z0, p0, -0x8000), ++ z0 = svdup_u32_m (z0, p0, -0x8000)) ++ ++/* ++** dup_0_u32_m: ++** mov z0\.s, p0/m, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_0_u32_m, svuint32_t, ++ z0 = svdup_n_u32_m (z0, p0, 0), ++ z0 = svdup_u32_m (z0, p0, 0)) ++ ++/* ++** dup_w0_u32_m: ++** movprfx z0, z1 ++** mov z0\.s, p0/m, w0 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_w0_u32_m, svuint32_t, uint32_t, ++ z0 = svdup_n_u32_m (z1, p0, x0), ++ z0 = svdup_u32_m (z1, p0, x0)) ++ ++/* ++** dup_1_u32_z: ++** mov z0\.s, p0/z, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_1_u32_z, svuint32_t, ++ z0 = svdup_n_u32_z (p0, 1), ++ z0 = svdup_u32_z (p0, 1)) ++ ++/* ++** dup_127_u32_z: ++** mov z0\.s, p0/z, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_127_u32_z, svuint32_t, ++ z0 = svdup_n_u32_z (p0, 127), ++ z0 = svdup_u32_z (p0, 127)) ++ ++/* ++** dup_128_u32_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.s), #128 ++** sel z0\.s, p0, \2, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (dup_128_u32_z, svuint32_t, ++ z0 = svdup_n_u32_z (p0, 128), ++ z0 = svdup_u32_z (p0, 128)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_129_u32_z, svuint32_t, ++ z0 = svdup_n_u32_z (p0, 129), ++ z0 = svdup_u32_z (p0, 129)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_253_u32_z, svuint32_t, ++ z0 = svdup_n_u32_z (p0, 253), ++ z0 = svdup_u32_z (p0, 253)) ++ ++/* ++** dup_254_u32_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.s), #254 ++** sel z0\.s, p0, \2, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (dup_254_u32_z, svuint32_t, ++ z0 = svdup_n_u32_z (p0, 254), ++ z0 = svdup_u32_z (p0, 254)) ++ ++/* ++** dup_255_u32_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.s), #255 ++** sel z0\.s, p0, \2, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (dup_255_u32_z, svuint32_t, ++ z0 = svdup_n_u32_z (p0, 255), ++ z0 = svdup_u32_z (p0, 255)) ++ ++/* ++** dup_256_u32_z: ++** mov z0\.s, p0/z, #256 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_256_u32_z, svuint32_t, ++ z0 = svdup_n_u32_z (p0, 256), ++ z0 = svdup_u32_z (p0, 256)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_257_u32_z, svuint32_t, ++ z0 = svdup_n_u32_z (p0, 257), ++ z0 = svdup_u32_z (p0, 257)) ++ ++/* ++** dup_512_u32_z: ++** mov z0\.s, p0/z, #512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_512_u32_z, svuint32_t, ++ z0 = svdup_n_u32_z (p0, 512), ++ z0 = svdup_u32_z (p0, 512)) ++ ++/* ++** dup_7f00_u32_z: ++** mov z0\.s, p0/z, #32512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7f00_u32_z, svuint32_t, ++ z0 = svdup_n_u32_z (p0, 0x7f00), ++ z0 = svdup_u32_z (p0, 0x7f00)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_7f01_u32_z, svuint32_t, ++ z0 = svdup_n_u32_z (p0, 0x7f01), ++ z0 = svdup_u32_z (p0, 0x7f01)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_7ffd_u32_z, svuint32_t, ++ z0 = svdup_n_u32_z (p0, 0x7ffd), ++ z0 = svdup_u32_z (p0, 0x7ffd)) ++ ++/* ++** dup_7ffe_u32_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.s), #32766 ++** sel z0\.s, p0, \2, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7ffe_u32_z, svuint32_t, ++ z0 = svdup_n_u32_z (p0, 0x7ffe), ++ z0 = svdup_u32_z (p0, 0x7ffe)) ++ ++/* ++** dup_7fff_u32_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.s), #32767 ++** sel z0\.s, p0, \2, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7fff_u32_z, svuint32_t, ++ z0 = svdup_n_u32_z (p0, 0x7fff), ++ z0 = svdup_u32_z (p0, 0x7fff)) ++ ++/* ++** dup_m1_u32_z: ++** mov z0\.s, p0/z, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m1_u32_z, svuint32_t, ++ z0 = svdup_n_u32_z (p0, -1), ++ z0 = svdup_u32_z (p0, -1)) ++ ++/* ++** dup_m128_u32_z: ++** mov z0\.s, p0/z, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m128_u32_z, svuint32_t, ++ z0 = svdup_n_u32_z (p0, -128), ++ z0 = svdup_u32_z (p0, -128)) ++ ++/* ++** dup_m129_u32_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.s), #-129 ++** sel z0\.s, p0, \2, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m129_u32_z, svuint32_t, ++ z0 = svdup_n_u32_z (p0, -129), ++ z0 = svdup_u32_z (p0, -129)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m130_u32_z, svuint32_t, ++ z0 = svdup_n_u32_z (p0, -130), ++ z0 = svdup_u32_z (p0, -130)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m254_u32_z, svuint32_t, ++ z0 = svdup_n_u32_z (p0, -254), ++ z0 = svdup_u32_z (p0, -254)) ++ ++/* ++** dup_m255_u32_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.s), #-255 ++** sel z0\.s, p0, \2, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m255_u32_z, svuint32_t, ++ z0 = svdup_n_u32_z (p0, -255), ++ z0 = svdup_u32_z (p0, -255)) ++ ++/* ++** dup_m256_u32_z: ++** mov z0\.s, p0/z, #-256 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m256_u32_z, svuint32_t, ++ z0 = svdup_n_u32_z (p0, -256), ++ z0 = svdup_u32_z (p0, -256)) ++ ++/* ++** dup_m257_u32_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.s), #-257 ++** sel z0\.s, p0, \2, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m257_u32_z, svuint32_t, ++ z0 = svdup_n_u32_z (p0, -257), ++ z0 = svdup_u32_z (p0, -257)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m258_u32_z, svuint32_t, ++ z0 = svdup_n_u32_z (p0, -258), ++ z0 = svdup_u32_z (p0, -258)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m259_u32_z, svuint32_t, ++ z0 = svdup_n_u32_z (p0, -259), ++ z0 = svdup_u32_z (p0, -259)) ++ ++/* ++** dup_m512_u32_z: ++** mov z0\.s, p0/z, #-512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m512_u32_z, svuint32_t, ++ z0 = svdup_n_u32_z (p0, -512), ++ z0 = svdup_u32_z (p0, -512)) ++ ++/* ++** dup_m7f00_u32_z: ++** mov z0\.s, p0/z, #-32512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f00_u32_z, svuint32_t, ++ z0 = svdup_n_u32_z (p0, -0x7f00), ++ z0 = svdup_u32_z (p0, -0x7f00)) ++ ++/* ++** dup_m7f01_u32_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.s), #-32513 ++** sel z0\.s, p0, \2, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f01_u32_z, svuint32_t, ++ z0 = svdup_n_u32_z (p0, -0x7f01), ++ z0 = svdup_u32_z (p0, -0x7f01)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m7f02_u32_z, svuint32_t, ++ z0 = svdup_n_u32_z (p0, -0x7f02), ++ z0 = svdup_u32_z (p0, -0x7f02)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m7ffe_u32_z, svuint32_t, ++ z0 = svdup_n_u32_z (p0, -0x7ffe), ++ z0 = svdup_u32_z (p0, -0x7ffe)) ++ ++/* ++** dup_m7fff_u32_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.s), #-32767 ++** sel z0\.s, p0, \2, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7fff_u32_z, svuint32_t, ++ z0 = svdup_n_u32_z (p0, -0x7fff), ++ z0 = svdup_u32_z (p0, -0x7fff)) ++ ++/* ++** dup_m8000_u32_z: ++** mov z0\.s, p0/z, #-32768 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m8000_u32_z, svuint32_t, ++ z0 = svdup_n_u32_z (p0, -0x8000), ++ z0 = svdup_u32_z (p0, -0x8000)) ++ ++/* ++** dup_0_u32_z: ++** mov z0\.s, p0/z, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_0_u32_z, svuint32_t, ++ z0 = svdup_n_u32_z (p0, 0), ++ z0 = svdup_u32_z (p0, 0)) ++ ++/* ++** dup_w0_u32_z: ++** movprfx z0\.s, p0/z, z0\.s ++** mov z0\.s, p0/m, w0 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_w0_u32_z, svuint32_t, uint32_t, ++ z0 = svdup_n_u32_z (p0, x0), ++ z0 = svdup_u32_z (p0, x0)) ++ ++/* ++** dup_1_u32_x: ++** mov z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_1_u32_x, svuint32_t, ++ z0 = svdup_n_u32_x (p0, 1), ++ z0 = svdup_u32_x (p0, 1)) ++ ++/* ++** dup_127_u32_x: ++** mov z0\.s, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_127_u32_x, svuint32_t, ++ z0 = svdup_n_u32_x (p0, 127), ++ z0 = svdup_u32_x (p0, 127)) ++ ++/* ++** dup_128_u32_x: ++** mov z0\.s, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_128_u32_x, svuint32_t, ++ z0 = svdup_n_u32_x (p0, 128), ++ z0 = svdup_u32_x (p0, 128)) ++ ++/* ++** dup_129_u32_x: ++** movi v([0-9]+)\.4s, 0x81 ++** dup z0\.q, z\1\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_129_u32_x, svuint32_t, ++ z0 = svdup_n_u32_x (p0, 129), ++ z0 = svdup_u32_x (p0, 129)) ++ ++/* ++** dup_253_u32_x: ++** movi v([0-9]+)\.4s, 0xfd ++** dup z0\.q, z\1\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_253_u32_x, svuint32_t, ++ z0 = svdup_n_u32_x (p0, 253), ++ z0 = svdup_u32_x (p0, 253)) ++ ++/* ++** dup_254_u32_x: ++** mov z0\.s, #254 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_254_u32_x, svuint32_t, ++ z0 = svdup_n_u32_x (p0, 254), ++ z0 = svdup_u32_x (p0, 254)) ++ ++/* ++** dup_255_u32_x: ++** mov z0\.s, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_255_u32_x, svuint32_t, ++ z0 = svdup_n_u32_x (p0, 255), ++ z0 = svdup_u32_x (p0, 255)) ++ ++/* ++** dup_256_u32_x: ++** mov z0\.s, #256 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_256_u32_x, svuint32_t, ++ z0 = svdup_n_u32_x (p0, 256), ++ z0 = svdup_u32_x (p0, 256)) ++ ++/* ++** dup_257_u32_x: ++** mov (w[0-9]+), 257 ++** mov z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_257_u32_x, svuint32_t, ++ z0 = svdup_n_u32_x (p0, 257), ++ z0 = svdup_u32_x (p0, 257)) ++ ++/* ++** dup_512_u32_x: ++** mov z0\.s, #512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_512_u32_x, svuint32_t, ++ z0 = svdup_n_u32_x (p0, 512), ++ z0 = svdup_u32_x (p0, 512)) ++ ++/* ++** dup_7f00_u32_x: ++** mov z0\.s, #32512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7f00_u32_x, svuint32_t, ++ z0 = svdup_n_u32_x (p0, 0x7f00), ++ z0 = svdup_u32_x (p0, 0x7f00)) ++ ++/* ++** dup_7f01_u32_x: ++** mov (w[0-9]+), 32513 ++** mov z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7f01_u32_x, svuint32_t, ++ z0 = svdup_n_u32_x (p0, 0x7f01), ++ z0 = svdup_u32_x (p0, 0x7f01)) ++ ++/* ++** dup_7ffd_u32_x: ++** mov (w[0-9]+), 32765 ++** mov z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7ffd_u32_x, svuint32_t, ++ z0 = svdup_n_u32_x (p0, 0x7ffd), ++ z0 = svdup_u32_x (p0, 0x7ffd)) ++ ++/* ++** dup_7ffe_u32_x: ++** mov z0\.s, #32766 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7ffe_u32_x, svuint32_t, ++ z0 = svdup_n_u32_x (p0, 0x7ffe), ++ z0 = svdup_u32_x (p0, 0x7ffe)) ++ ++/* ++** dup_7fff_u32_x: ++** mov z0\.s, #32767 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7fff_u32_x, svuint32_t, ++ z0 = svdup_n_u32_x (p0, 0x7fff), ++ z0 = svdup_u32_x (p0, 0x7fff)) ++ ++/* ++** dup_m1_u32_x: ++** mov z0\.b, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m1_u32_x, svuint32_t, ++ z0 = svdup_n_u32_x (p0, -1), ++ z0 = svdup_u32_x (p0, -1)) ++ ++/* ++** dup_m128_u32_x: ++** mov z0\.s, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m128_u32_x, svuint32_t, ++ z0 = svdup_n_u32_x (p0, -128), ++ z0 = svdup_u32_x (p0, -128)) ++ ++/* ++** dup_m129_u32_x: ++** mov z0\.s, #-129 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m129_u32_x, svuint32_t, ++ z0 = svdup_n_u32_x (p0, -129), ++ z0 = svdup_u32_x (p0, -129)) ++ ++/* ++** dup_m130_u32_x: ++** mvni v([0-9]+)\.4s, 0x81 ++** dup z0\.q, z\1\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m130_u32_x, svuint32_t, ++ z0 = svdup_n_u32_x (p0, -130), ++ z0 = svdup_u32_x (p0, -130)) ++ ++/* ++** dup_m254_u32_x: ++** mvni v([0-9]+)\.4s, 0xfd ++** dup z0\.q, z\1\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m254_u32_x, svuint32_t, ++ z0 = svdup_n_u32_x (p0, -254), ++ z0 = svdup_u32_x (p0, -254)) ++ ++/* ++** dup_m255_u32_x: ++** mov z0\.s, #-255 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m255_u32_x, svuint32_t, ++ z0 = svdup_n_u32_x (p0, -255), ++ z0 = svdup_u32_x (p0, -255)) ++ ++/* ++** dup_m256_u32_x: ++** mov z0\.s, #-256 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m256_u32_x, svuint32_t, ++ z0 = svdup_n_u32_x (p0, -256), ++ z0 = svdup_u32_x (p0, -256)) ++ ++/* ++** dup_m257_u32_x: ++** mov z0\.s, #-257 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m257_u32_x, svuint32_t, ++ z0 = svdup_n_u32_x (p0, -257), ++ z0 = svdup_u32_x (p0, -257)) ++ ++/* ++** dup_m258_u32_x: ++** mov (w[0-9]+), -258 ++** mov z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m258_u32_x, svuint32_t, ++ z0 = svdup_n_u32_x (p0, -258), ++ z0 = svdup_u32_x (p0, -258)) ++ ++/* ++** dup_m259_u32_x: ++** mov (w[0-9]+), -259 ++** mov z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m259_u32_x, svuint32_t, ++ z0 = svdup_n_u32_x (p0, -259), ++ z0 = svdup_u32_x (p0, -259)) ++ ++/* ++** dup_m512_u32_x: ++** mov z0\.s, #-512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m512_u32_x, svuint32_t, ++ z0 = svdup_n_u32_x (p0, -512), ++ z0 = svdup_u32_x (p0, -512)) ++ ++/* ++** dup_m7f00_u32_x: ++** mov z0\.s, #-32512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f00_u32_x, svuint32_t, ++ z0 = svdup_n_u32_x (p0, -0x7f00), ++ z0 = svdup_u32_x (p0, -0x7f00)) ++ ++/* ++** dup_m7f01_u32_x: ++** mov z0\.s, #-32513 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f01_u32_x, svuint32_t, ++ z0 = svdup_n_u32_x (p0, -0x7f01), ++ z0 = svdup_u32_x (p0, -0x7f01)) ++ ++/* ++** dup_m7f02_u32_x: ++** mov (w[0-9]+), -32514 ++** mov z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f02_u32_x, svuint32_t, ++ z0 = svdup_n_u32_x (p0, -0x7f02), ++ z0 = svdup_u32_x (p0, -0x7f02)) ++ ++/* ++** dup_m7ffe_u32_x: ++** mov (w[0-9]+), -32766 ++** mov z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7ffe_u32_x, svuint32_t, ++ z0 = svdup_n_u32_x (p0, -0x7ffe), ++ z0 = svdup_u32_x (p0, -0x7ffe)) ++ ++/* ++** dup_m7fff_u32_x: ++** mov z0\.s, #-32767 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7fff_u32_x, svuint32_t, ++ z0 = svdup_n_u32_x (p0, -0x7fff), ++ z0 = svdup_u32_x (p0, -0x7fff)) ++ ++/* ++** dup_m8000_u32_x: ++** mov z0\.s, #-32768 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m8000_u32_x, svuint32_t, ++ z0 = svdup_n_u32_x (p0, -0x8000), ++ z0 = svdup_u32_x (p0, -0x8000)) ++ ++/* ++** dup_w0_u32_x: ++** mov z0\.s, w0 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_w0_u32_x, svuint32_t, uint32_t, ++ z0 = svdup_n_u32_x (p0, x0), ++ z0 = svdup_u32_x (p0, x0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u64.c +new file mode 100644 +index 000000000..a7cca7af0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u64.c +@@ -0,0 +1,1175 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dup_1_u64: ++** mov z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_1_u64, svuint64_t, ++ z0 = svdup_n_u64 (1), ++ z0 = svdup_u64 (1)) ++ ++/* ++** dup_127_u64: ++** mov z0\.d, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_127_u64, svuint64_t, ++ z0 = svdup_n_u64 (127), ++ z0 = svdup_u64 (127)) ++ ++/* ++** dup_128_u64: ++** mov z0\.d, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_128_u64, svuint64_t, ++ z0 = svdup_n_u64 (128), ++ z0 = svdup_u64 (128)) ++ ++/* ++** dup_129_u64: ++** mov (x[0-9]+), 129 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_129_u64, svuint64_t, ++ z0 = svdup_n_u64 (129), ++ z0 = svdup_u64 (129)) ++ ++/* ++** dup_253_u64: ++** mov (x[0-9]+), 253 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_253_u64, svuint64_t, ++ z0 = svdup_n_u64 (253), ++ z0 = svdup_u64 (253)) ++ ++/* ++** dup_254_u64: ++** mov z0\.d, #254 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_254_u64, svuint64_t, ++ z0 = svdup_n_u64 (254), ++ z0 = svdup_u64 (254)) ++ ++/* ++** dup_255_u64: ++** mov z0\.d, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_255_u64, svuint64_t, ++ z0 = svdup_n_u64 (255), ++ z0 = svdup_u64 (255)) ++ ++/* ++** dup_256_u64: ++** mov z0\.d, #256 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_256_u64, svuint64_t, ++ z0 = svdup_n_u64 (256), ++ z0 = svdup_u64 (256)) ++ ++/* ++** dup_257_u64: ++** mov (x[0-9]+), 257 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_257_u64, svuint64_t, ++ z0 = svdup_n_u64 (257), ++ z0 = svdup_u64 (257)) ++ ++/* ++** dup_512_u64: ++** mov z0\.d, #512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_512_u64, svuint64_t, ++ z0 = svdup_n_u64 (512), ++ z0 = svdup_u64 (512)) ++ ++/* ++** dup_7f00_u64: ++** mov z0\.d, #32512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7f00_u64, svuint64_t, ++ z0 = svdup_n_u64 (0x7f00), ++ z0 = svdup_u64 (0x7f00)) ++ ++/* ++** dup_7f01_u64: ++** mov (x[0-9]+), 32513 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7f01_u64, svuint64_t, ++ z0 = svdup_n_u64 (0x7f01), ++ z0 = svdup_u64 (0x7f01)) ++ ++/* ++** dup_7ffd_u64: ++** mov (x[0-9]+), 32765 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7ffd_u64, svuint64_t, ++ z0 = svdup_n_u64 (0x7ffd), ++ z0 = svdup_u64 (0x7ffd)) ++ ++/* ++** dup_7ffe_u64: ++** mov z0\.d, #32766 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7ffe_u64, svuint64_t, ++ z0 = svdup_n_u64 (0x7ffe), ++ z0 = svdup_u64 (0x7ffe)) ++ ++/* ++** dup_7fff_u64: ++** mov z0\.d, #32767 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7fff_u64, svuint64_t, ++ z0 = svdup_n_u64 (0x7fff), ++ z0 = svdup_u64 (0x7fff)) ++ ++/* ++** dup_m1_u64: ++** mov z0\.b, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m1_u64, svuint64_t, ++ z0 = svdup_n_u64 (-1), ++ z0 = svdup_u64 (-1)) ++ ++/* ++** dup_m128_u64: ++** mov z0\.d, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m128_u64, svuint64_t, ++ z0 = svdup_n_u64 (-128), ++ z0 = svdup_u64 (-128)) ++ ++/* ++** dup_m129_u64: ++** mov z0\.d, #-129 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m129_u64, svuint64_t, ++ z0 = svdup_n_u64 (-129), ++ z0 = svdup_u64 (-129)) ++ ++/* ++** dup_m130_u64: ++** mov (x[0-9]+), -130 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m130_u64, svuint64_t, ++ z0 = svdup_n_u64 (-130), ++ z0 = svdup_u64 (-130)) ++ ++/* ++** dup_m254_u64: ++** mov (x[0-9]+), -254 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m254_u64, svuint64_t, ++ z0 = svdup_n_u64 (-254), ++ z0 = svdup_u64 (-254)) ++ ++/* ++** dup_m255_u64: ++** mov z0\.d, #-255 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m255_u64, svuint64_t, ++ z0 = svdup_n_u64 (-255), ++ z0 = svdup_u64 (-255)) ++ ++/* ++** dup_m256_u64: ++** mov z0\.d, #-256 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m256_u64, svuint64_t, ++ z0 = svdup_n_u64 (-256), ++ z0 = svdup_u64 (-256)) ++ ++/* ++** dup_m257_u64: ++** mov z0\.d, #-257 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m257_u64, svuint64_t, ++ z0 = svdup_n_u64 (-257), ++ z0 = svdup_u64 (-257)) ++ ++/* ++** dup_m258_u64: ++** mov (x[0-9]+), -258 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m258_u64, svuint64_t, ++ z0 = svdup_n_u64 (-258), ++ z0 = svdup_u64 (-258)) ++ ++/* ++** dup_m259_u64: ++** mov (x[0-9]+), -259 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m259_u64, svuint64_t, ++ z0 = svdup_n_u64 (-259), ++ z0 = svdup_u64 (-259)) ++ ++/* ++** dup_m512_u64: ++** mov z0\.d, #-512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m512_u64, svuint64_t, ++ z0 = svdup_n_u64 (-512), ++ z0 = svdup_u64 (-512)) ++ ++/* ++** dup_m7f00_u64: ++** mov z0\.d, #-32512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f00_u64, svuint64_t, ++ z0 = svdup_n_u64 (-0x7f00), ++ z0 = svdup_u64 (-0x7f00)) ++ ++/* ++** dup_m7f01_u64: ++** mov z0\.d, #-32513 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f01_u64, svuint64_t, ++ z0 = svdup_n_u64 (-0x7f01), ++ z0 = svdup_u64 (-0x7f01)) ++ ++/* ++** dup_m7f02_u64: ++** mov (x[0-9]+), -32514 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f02_u64, svuint64_t, ++ z0 = svdup_n_u64 (-0x7f02), ++ z0 = svdup_u64 (-0x7f02)) ++ ++/* ++** dup_m7ffe_u64: ++** mov (x[0-9]+), -32766 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7ffe_u64, svuint64_t, ++ z0 = svdup_n_u64 (-0x7ffe), ++ z0 = svdup_u64 (-0x7ffe)) ++ ++/* ++** dup_m7fff_u64: ++** mov z0\.d, #-32767 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7fff_u64, svuint64_t, ++ z0 = svdup_n_u64 (-0x7fff), ++ z0 = svdup_u64 (-0x7fff)) ++ ++/* ++** dup_m8000_u64: ++** mov z0\.d, #-32768 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m8000_u64, svuint64_t, ++ z0 = svdup_n_u64 (-0x8000), ++ z0 = svdup_u64 (-0x8000)) ++ ++/* ++** dup_x0_u64: ++** mov z0\.d, x0 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_x0_u64, svuint64_t, uint64_t, ++ z0 = svdup_n_u64 (x0), ++ z0 = svdup_u64 (x0)) ++ ++/* ++** dup_1_u64_m: ++** mov z0\.d, p0/m, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_1_u64_m, svuint64_t, ++ z0 = svdup_n_u64_m (z0, p0, 1), ++ z0 = svdup_u64_m (z0, p0, 1)) ++ ++/* ++** dup_127_u64_m: ++** mov z0\.d, p0/m, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_127_u64_m, svuint64_t, ++ z0 = svdup_n_u64_m (z0, p0, 127), ++ z0 = svdup_u64_m (z0, p0, 127)) ++ ++/* ++** dup_128_u64_m: ++** mov (z[0-9]+\.d), #128 ++** sel z0\.d, p0, \1, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (dup_128_u64_m, svuint64_t, ++ z0 = svdup_n_u64_m (z0, p0, 128), ++ z0 = svdup_u64_m (z0, p0, 128)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_129_u64_m, svuint64_t, ++ z0 = svdup_n_u64_m (z0, p0, 129), ++ z0 = svdup_u64_m (z0, p0, 129)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_253_u64_m, svuint64_t, ++ z0 = svdup_n_u64_m (z0, p0, 253), ++ z0 = svdup_u64_m (z0, p0, 253)) ++ ++/* ++** dup_254_u64_m: ++** mov (z[0-9]+\.d), #254 ++** sel z0\.d, p0, \1, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (dup_254_u64_m, svuint64_t, ++ z0 = svdup_n_u64_m (z0, p0, 254), ++ z0 = svdup_u64_m (z0, p0, 254)) ++ ++/* ++** dup_255_u64_m: ++** mov (z[0-9]+\.d), #255 ++** sel z0\.d, p0, \1, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (dup_255_u64_m, svuint64_t, ++ z0 = svdup_n_u64_m (z0, p0, 255), ++ z0 = svdup_u64_m (z0, p0, 255)) ++ ++/* ++** dup_256_u64_m: ++** mov z0\.d, p0/m, #256 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_256_u64_m, svuint64_t, ++ z0 = svdup_n_u64_m (z0, p0, 256), ++ z0 = svdup_u64_m (z0, p0, 256)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_257_u64_m, svuint64_t, ++ z0 = svdup_n_u64_m (z0, p0, 257), ++ z0 = svdup_u64_m (z0, p0, 257)) ++ ++/* ++** dup_512_u64_m: ++** mov z0\.d, p0/m, #512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_512_u64_m, svuint64_t, ++ z0 = svdup_n_u64_m (z0, p0, 512), ++ z0 = svdup_u64_m (z0, p0, 512)) ++ ++/* ++** dup_7f00_u64_m: ++** mov z0\.d, p0/m, #32512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7f00_u64_m, svuint64_t, ++ z0 = svdup_n_u64_m (z0, p0, 0x7f00), ++ z0 = svdup_u64_m (z0, p0, 0x7f00)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_7f01_u64_m, svuint64_t, ++ z0 = svdup_n_u64_m (z0, p0, 0x7f01), ++ z0 = svdup_u64_m (z0, p0, 0x7f01)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_7ffd_u64_m, svuint64_t, ++ z0 = svdup_n_u64_m (z0, p0, 0x7ffd), ++ z0 = svdup_u64_m (z0, p0, 0x7ffd)) ++ ++/* ++** dup_7ffe_u64_m: ++** mov (z[0-9]+\.d), #32766 ++** sel z0\.d, p0, \1, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7ffe_u64_m, svuint64_t, ++ z0 = svdup_n_u64_m (z0, p0, 0x7ffe), ++ z0 = svdup_u64_m (z0, p0, 0x7ffe)) ++ ++/* ++** dup_7fff_u64_m: ++** mov (z[0-9]+\.d), #32767 ++** sel z0\.d, p0, \1, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7fff_u64_m, svuint64_t, ++ z0 = svdup_n_u64_m (z0, p0, 0x7fff), ++ z0 = svdup_u64_m (z0, p0, 0x7fff)) ++ ++/* ++** dup_m1_u64_m: ++** mov z0\.d, p0/m, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m1_u64_m, svuint64_t, ++ z0 = svdup_n_u64_m (z0, p0, -1), ++ z0 = svdup_u64_m (z0, p0, -1)) ++ ++/* ++** dup_m128_u64_m: ++** mov z0\.d, p0/m, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m128_u64_m, svuint64_t, ++ z0 = svdup_n_u64_m (z0, p0, -128), ++ z0 = svdup_u64_m (z0, p0, -128)) ++ ++/* ++** dup_m129_u64_m: ++** mov (z[0-9]+\.d), #-129 ++** sel z0\.d, p0, \1, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m129_u64_m, svuint64_t, ++ z0 = svdup_n_u64_m (z0, p0, -129), ++ z0 = svdup_u64_m (z0, p0, -129)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m130_u64_m, svuint64_t, ++ z0 = svdup_n_u64_m (z0, p0, -130), ++ z0 = svdup_u64_m (z0, p0, -130)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m254_u64_m, svuint64_t, ++ z0 = svdup_n_u64_m (z0, p0, -254), ++ z0 = svdup_u64_m (z0, p0, -254)) ++ ++/* ++** dup_m255_u64_m: ++** mov (z[0-9]+\.d), #-255 ++** sel z0\.d, p0, \1, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m255_u64_m, svuint64_t, ++ z0 = svdup_n_u64_m (z0, p0, -255), ++ z0 = svdup_u64_m (z0, p0, -255)) ++ ++/* ++** dup_m256_u64_m: ++** mov z0\.d, p0/m, #-256 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m256_u64_m, svuint64_t, ++ z0 = svdup_n_u64_m (z0, p0, -256), ++ z0 = svdup_u64_m (z0, p0, -256)) ++ ++/* ++** dup_m257_u64_m: ++** mov (z[0-9]+\.d), #-257 ++** sel z0\.d, p0, \1, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m257_u64_m, svuint64_t, ++ z0 = svdup_n_u64_m (z0, p0, -257), ++ z0 = svdup_u64_m (z0, p0, -257)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m258_u64_m, svuint64_t, ++ z0 = svdup_n_u64_m (z0, p0, -258), ++ z0 = svdup_u64_m (z0, p0, -258)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m259_u64_m, svuint64_t, ++ z0 = svdup_n_u64_m (z0, p0, -259), ++ z0 = svdup_u64_m (z0, p0, -259)) ++ ++/* ++** dup_m512_u64_m: ++** mov z0\.d, p0/m, #-512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m512_u64_m, svuint64_t, ++ z0 = svdup_n_u64_m (z0, p0, -512), ++ z0 = svdup_u64_m (z0, p0, -512)) ++ ++/* ++** dup_m7f00_u64_m: ++** mov z0\.d, p0/m, #-32512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f00_u64_m, svuint64_t, ++ z0 = svdup_n_u64_m (z0, p0, -0x7f00), ++ z0 = svdup_u64_m (z0, p0, -0x7f00)) ++ ++/* ++** dup_m7f01_u64_m: ++** mov (z[0-9]+\.d), #-32513 ++** sel z0\.d, p0, \1, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f01_u64_m, svuint64_t, ++ z0 = svdup_n_u64_m (z0, p0, -0x7f01), ++ z0 = svdup_u64_m (z0, p0, -0x7f01)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m7f02_u64_m, svuint64_t, ++ z0 = svdup_n_u64_m (z0, p0, -0x7f02), ++ z0 = svdup_u64_m (z0, p0, -0x7f02)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m7ffe_u64_m, svuint64_t, ++ z0 = svdup_n_u64_m (z0, p0, -0x7ffe), ++ z0 = svdup_u64_m (z0, p0, -0x7ffe)) ++ ++/* ++** dup_m7fff_u64_m: ++** mov (z[0-9]+\.d), #-32767 ++** sel z0\.d, p0, \1, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7fff_u64_m, svuint64_t, ++ z0 = svdup_n_u64_m (z0, p0, -0x7fff), ++ z0 = svdup_u64_m (z0, p0, -0x7fff)) ++ ++/* ++** dup_m8000_u64_m: ++** mov z0\.d, p0/m, #-32768 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m8000_u64_m, svuint64_t, ++ z0 = svdup_n_u64_m (z0, p0, -0x8000), ++ z0 = svdup_u64_m (z0, p0, -0x8000)) ++ ++/* ++** dup_0_u64_m: ++** mov z0\.d, p0/m, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_0_u64_m, svuint64_t, ++ z0 = svdup_n_u64_m (z0, p0, 0), ++ z0 = svdup_u64_m (z0, p0, 0)) ++ ++/* ++** dup_x0_u64_m: ++** movprfx z0, z1 ++** mov z0\.d, p0/m, x0 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_x0_u64_m, svuint64_t, uint64_t, ++ z0 = svdup_n_u64_m (z1, p0, x0), ++ z0 = svdup_u64_m (z1, p0, x0)) ++ ++/* ++** dup_1_u64_z: ++** mov z0\.d, p0/z, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_1_u64_z, svuint64_t, ++ z0 = svdup_n_u64_z (p0, 1), ++ z0 = svdup_u64_z (p0, 1)) ++ ++/* ++** dup_127_u64_z: ++** mov z0\.d, p0/z, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_127_u64_z, svuint64_t, ++ z0 = svdup_n_u64_z (p0, 127), ++ z0 = svdup_u64_z (p0, 127)) ++ ++/* ++** dup_128_u64_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.d), #128 ++** sel z0\.d, p0, \2, \1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (dup_128_u64_z, svuint64_t, ++ z0 = svdup_n_u64_z (p0, 128), ++ z0 = svdup_u64_z (p0, 128)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_129_u64_z, svuint64_t, ++ z0 = svdup_n_u64_z (p0, 129), ++ z0 = svdup_u64_z (p0, 129)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_253_u64_z, svuint64_t, ++ z0 = svdup_n_u64_z (p0, 253), ++ z0 = svdup_u64_z (p0, 253)) ++ ++/* ++** dup_254_u64_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.d), #254 ++** sel z0\.d, p0, \2, \1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (dup_254_u64_z, svuint64_t, ++ z0 = svdup_n_u64_z (p0, 254), ++ z0 = svdup_u64_z (p0, 254)) ++ ++/* ++** dup_255_u64_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.d), #255 ++** sel z0\.d, p0, \2, \1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (dup_255_u64_z, svuint64_t, ++ z0 = svdup_n_u64_z (p0, 255), ++ z0 = svdup_u64_z (p0, 255)) ++ ++/* ++** dup_256_u64_z: ++** mov z0\.d, p0/z, #256 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_256_u64_z, svuint64_t, ++ z0 = svdup_n_u64_z (p0, 256), ++ z0 = svdup_u64_z (p0, 256)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_257_u64_z, svuint64_t, ++ z0 = svdup_n_u64_z (p0, 257), ++ z0 = svdup_u64_z (p0, 257)) ++ ++/* ++** dup_512_u64_z: ++** mov z0\.d, p0/z, #512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_512_u64_z, svuint64_t, ++ z0 = svdup_n_u64_z (p0, 512), ++ z0 = svdup_u64_z (p0, 512)) ++ ++/* ++** dup_7f00_u64_z: ++** mov z0\.d, p0/z, #32512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7f00_u64_z, svuint64_t, ++ z0 = svdup_n_u64_z (p0, 0x7f00), ++ z0 = svdup_u64_z (p0, 0x7f00)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_7f01_u64_z, svuint64_t, ++ z0 = svdup_n_u64_z (p0, 0x7f01), ++ z0 = svdup_u64_z (p0, 0x7f01)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_7ffd_u64_z, svuint64_t, ++ z0 = svdup_n_u64_z (p0, 0x7ffd), ++ z0 = svdup_u64_z (p0, 0x7ffd)) ++ ++/* ++** dup_7ffe_u64_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.d), #32766 ++** sel z0\.d, p0, \2, \1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7ffe_u64_z, svuint64_t, ++ z0 = svdup_n_u64_z (p0, 0x7ffe), ++ z0 = svdup_u64_z (p0, 0x7ffe)) ++ ++/* ++** dup_7fff_u64_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.d), #32767 ++** sel z0\.d, p0, \2, \1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7fff_u64_z, svuint64_t, ++ z0 = svdup_n_u64_z (p0, 0x7fff), ++ z0 = svdup_u64_z (p0, 0x7fff)) ++ ++/* ++** dup_m1_u64_z: ++** mov z0\.d, p0/z, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m1_u64_z, svuint64_t, ++ z0 = svdup_n_u64_z (p0, -1), ++ z0 = svdup_u64_z (p0, -1)) ++ ++/* ++** dup_m128_u64_z: ++** mov z0\.d, p0/z, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m128_u64_z, svuint64_t, ++ z0 = svdup_n_u64_z (p0, -128), ++ z0 = svdup_u64_z (p0, -128)) ++ ++/* ++** dup_m129_u64_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.d), #-129 ++** sel z0\.d, p0, \2, \1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m129_u64_z, svuint64_t, ++ z0 = svdup_n_u64_z (p0, -129), ++ z0 = svdup_u64_z (p0, -129)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m130_u64_z, svuint64_t, ++ z0 = svdup_n_u64_z (p0, -130), ++ z0 = svdup_u64_z (p0, -130)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m254_u64_z, svuint64_t, ++ z0 = svdup_n_u64_z (p0, -254), ++ z0 = svdup_u64_z (p0, -254)) ++ ++/* ++** dup_m255_u64_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.d), #-255 ++** sel z0\.d, p0, \2, \1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m255_u64_z, svuint64_t, ++ z0 = svdup_n_u64_z (p0, -255), ++ z0 = svdup_u64_z (p0, -255)) ++ ++/* ++** dup_m256_u64_z: ++** mov z0\.d, p0/z, #-256 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m256_u64_z, svuint64_t, ++ z0 = svdup_n_u64_z (p0, -256), ++ z0 = svdup_u64_z (p0, -256)) ++ ++/* ++** dup_m257_u64_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.d), #-257 ++** sel z0\.d, p0, \2, \1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m257_u64_z, svuint64_t, ++ z0 = svdup_n_u64_z (p0, -257), ++ z0 = svdup_u64_z (p0, -257)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m258_u64_z, svuint64_t, ++ z0 = svdup_n_u64_z (p0, -258), ++ z0 = svdup_u64_z (p0, -258)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m259_u64_z, svuint64_t, ++ z0 = svdup_n_u64_z (p0, -259), ++ z0 = svdup_u64_z (p0, -259)) ++ ++/* ++** dup_m512_u64_z: ++** mov z0\.d, p0/z, #-512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m512_u64_z, svuint64_t, ++ z0 = svdup_n_u64_z (p0, -512), ++ z0 = svdup_u64_z (p0, -512)) ++ ++/* ++** dup_m7f00_u64_z: ++** mov z0\.d, p0/z, #-32512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f00_u64_z, svuint64_t, ++ z0 = svdup_n_u64_z (p0, -0x7f00), ++ z0 = svdup_u64_z (p0, -0x7f00)) ++ ++/* ++** dup_m7f01_u64_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.d), #-32513 ++** sel z0\.d, p0, \2, \1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f01_u64_z, svuint64_t, ++ z0 = svdup_n_u64_z (p0, -0x7f01), ++ z0 = svdup_u64_z (p0, -0x7f01)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m7f02_u64_z, svuint64_t, ++ z0 = svdup_n_u64_z (p0, -0x7f02), ++ z0 = svdup_u64_z (p0, -0x7f02)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (dup_m7ffe_u64_z, svuint64_t, ++ z0 = svdup_n_u64_z (p0, -0x7ffe), ++ z0 = svdup_u64_z (p0, -0x7ffe)) ++ ++/* ++** dup_m7fff_u64_z: ++** mov (z[0-9]+)\.b, #0 ++** mov (z[0-9]+\.d), #-32767 ++** sel z0\.d, p0, \2, \1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7fff_u64_z, svuint64_t, ++ z0 = svdup_n_u64_z (p0, -0x7fff), ++ z0 = svdup_u64_z (p0, -0x7fff)) ++ ++/* ++** dup_m8000_u64_z: ++** mov z0\.d, p0/z, #-32768 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m8000_u64_z, svuint64_t, ++ z0 = svdup_n_u64_z (p0, -0x8000), ++ z0 = svdup_u64_z (p0, -0x8000)) ++ ++/* ++** dup_0_u64_z: ++** mov z0\.d, p0/z, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_0_u64_z, svuint64_t, ++ z0 = svdup_n_u64_z (p0, 0), ++ z0 = svdup_u64_z (p0, 0)) ++ ++/* ++** dup_x0_u64_z: ++** movprfx z0\.d, p0/z, z0\.d ++** mov z0\.d, p0/m, x0 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_x0_u64_z, svuint64_t, uint64_t, ++ z0 = svdup_n_u64_z (p0, x0), ++ z0 = svdup_u64_z (p0, x0)) ++ ++/* ++** dup_1_u64_x: ++** mov z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_1_u64_x, svuint64_t, ++ z0 = svdup_n_u64_x (p0, 1), ++ z0 = svdup_u64_x (p0, 1)) ++ ++/* ++** dup_127_u64_x: ++** mov z0\.d, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_127_u64_x, svuint64_t, ++ z0 = svdup_n_u64_x (p0, 127), ++ z0 = svdup_u64_x (p0, 127)) ++ ++/* ++** dup_128_u64_x: ++** mov z0\.d, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_128_u64_x, svuint64_t, ++ z0 = svdup_n_u64_x (p0, 128), ++ z0 = svdup_u64_x (p0, 128)) ++ ++/* ++** dup_129_u64_x: ++** mov (x[0-9]+), 129 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_129_u64_x, svuint64_t, ++ z0 = svdup_n_u64_x (p0, 129), ++ z0 = svdup_u64_x (p0, 129)) ++ ++/* ++** dup_253_u64_x: ++** mov (x[0-9]+), 253 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_253_u64_x, svuint64_t, ++ z0 = svdup_n_u64_x (p0, 253), ++ z0 = svdup_u64_x (p0, 253)) ++ ++/* ++** dup_254_u64_x: ++** mov z0\.d, #254 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_254_u64_x, svuint64_t, ++ z0 = svdup_n_u64_x (p0, 254), ++ z0 = svdup_u64_x (p0, 254)) ++ ++/* ++** dup_255_u64_x: ++** mov z0\.d, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_255_u64_x, svuint64_t, ++ z0 = svdup_n_u64_x (p0, 255), ++ z0 = svdup_u64_x (p0, 255)) ++ ++/* ++** dup_256_u64_x: ++** mov z0\.d, #256 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_256_u64_x, svuint64_t, ++ z0 = svdup_n_u64_x (p0, 256), ++ z0 = svdup_u64_x (p0, 256)) ++ ++/* ++** dup_257_u64_x: ++** mov (x[0-9]+), 257 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_257_u64_x, svuint64_t, ++ z0 = svdup_n_u64_x (p0, 257), ++ z0 = svdup_u64_x (p0, 257)) ++ ++/* ++** dup_512_u64_x: ++** mov z0\.d, #512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_512_u64_x, svuint64_t, ++ z0 = svdup_n_u64_x (p0, 512), ++ z0 = svdup_u64_x (p0, 512)) ++ ++/* ++** dup_7f00_u64_x: ++** mov z0\.d, #32512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7f00_u64_x, svuint64_t, ++ z0 = svdup_n_u64_x (p0, 0x7f00), ++ z0 = svdup_u64_x (p0, 0x7f00)) ++ ++/* ++** dup_7f01_u64_x: ++** mov (x[0-9]+), 32513 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7f01_u64_x, svuint64_t, ++ z0 = svdup_n_u64_x (p0, 0x7f01), ++ z0 = svdup_u64_x (p0, 0x7f01)) ++ ++/* ++** dup_7ffd_u64_x: ++** mov (x[0-9]+), 32765 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7ffd_u64_x, svuint64_t, ++ z0 = svdup_n_u64_x (p0, 0x7ffd), ++ z0 = svdup_u64_x (p0, 0x7ffd)) ++ ++/* ++** dup_7ffe_u64_x: ++** mov z0\.d, #32766 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7ffe_u64_x, svuint64_t, ++ z0 = svdup_n_u64_x (p0, 0x7ffe), ++ z0 = svdup_u64_x (p0, 0x7ffe)) ++ ++/* ++** dup_7fff_u64_x: ++** mov z0\.d, #32767 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_7fff_u64_x, svuint64_t, ++ z0 = svdup_n_u64_x (p0, 0x7fff), ++ z0 = svdup_u64_x (p0, 0x7fff)) ++ ++/* ++** dup_m1_u64_x: ++** mov z0\.b, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m1_u64_x, svuint64_t, ++ z0 = svdup_n_u64_x (p0, -1), ++ z0 = svdup_u64_x (p0, -1)) ++ ++/* ++** dup_m128_u64_x: ++** mov z0\.d, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m128_u64_x, svuint64_t, ++ z0 = svdup_n_u64_x (p0, -128), ++ z0 = svdup_u64_x (p0, -128)) ++ ++/* ++** dup_m129_u64_x: ++** mov z0\.d, #-129 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m129_u64_x, svuint64_t, ++ z0 = svdup_n_u64_x (p0, -129), ++ z0 = svdup_u64_x (p0, -129)) ++ ++/* ++** dup_m130_u64_x: ++** mov (x[0-9]+), -130 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m130_u64_x, svuint64_t, ++ z0 = svdup_n_u64_x (p0, -130), ++ z0 = svdup_u64_x (p0, -130)) ++ ++/* ++** dup_m254_u64_x: ++** mov (x[0-9]+), -254 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m254_u64_x, svuint64_t, ++ z0 = svdup_n_u64_x (p0, -254), ++ z0 = svdup_u64_x (p0, -254)) ++ ++/* ++** dup_m255_u64_x: ++** mov z0\.d, #-255 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m255_u64_x, svuint64_t, ++ z0 = svdup_n_u64_x (p0, -255), ++ z0 = svdup_u64_x (p0, -255)) ++ ++/* ++** dup_m256_u64_x: ++** mov z0\.d, #-256 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m256_u64_x, svuint64_t, ++ z0 = svdup_n_u64_x (p0, -256), ++ z0 = svdup_u64_x (p0, -256)) ++ ++/* ++** dup_m257_u64_x: ++** mov z0\.d, #-257 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m257_u64_x, svuint64_t, ++ z0 = svdup_n_u64_x (p0, -257), ++ z0 = svdup_u64_x (p0, -257)) ++ ++/* ++** dup_m258_u64_x: ++** mov (x[0-9]+), -258 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m258_u64_x, svuint64_t, ++ z0 = svdup_n_u64_x (p0, -258), ++ z0 = svdup_u64_x (p0, -258)) ++ ++/* ++** dup_m259_u64_x: ++** mov (x[0-9]+), -259 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m259_u64_x, svuint64_t, ++ z0 = svdup_n_u64_x (p0, -259), ++ z0 = svdup_u64_x (p0, -259)) ++ ++/* ++** dup_m512_u64_x: ++** mov z0\.d, #-512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m512_u64_x, svuint64_t, ++ z0 = svdup_n_u64_x (p0, -512), ++ z0 = svdup_u64_x (p0, -512)) ++ ++/* ++** dup_m7f00_u64_x: ++** mov z0\.d, #-32512 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f00_u64_x, svuint64_t, ++ z0 = svdup_n_u64_x (p0, -0x7f00), ++ z0 = svdup_u64_x (p0, -0x7f00)) ++ ++/* ++** dup_m7f01_u64_x: ++** mov z0\.d, #-32513 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f01_u64_x, svuint64_t, ++ z0 = svdup_n_u64_x (p0, -0x7f01), ++ z0 = svdup_u64_x (p0, -0x7f01)) ++ ++/* ++** dup_m7f02_u64_x: ++** mov (x[0-9]+), -32514 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7f02_u64_x, svuint64_t, ++ z0 = svdup_n_u64_x (p0, -0x7f02), ++ z0 = svdup_u64_x (p0, -0x7f02)) ++ ++/* ++** dup_m7ffe_u64_x: ++** mov (x[0-9]+), -32766 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7ffe_u64_x, svuint64_t, ++ z0 = svdup_n_u64_x (p0, -0x7ffe), ++ z0 = svdup_u64_x (p0, -0x7ffe)) ++ ++/* ++** dup_m7fff_u64_x: ++** mov z0\.d, #-32767 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m7fff_u64_x, svuint64_t, ++ z0 = svdup_n_u64_x (p0, -0x7fff), ++ z0 = svdup_u64_x (p0, -0x7fff)) ++ ++/* ++** dup_m8000_u64_x: ++** mov z0\.d, #-32768 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m8000_u64_x, svuint64_t, ++ z0 = svdup_n_u64_x (p0, -0x8000), ++ z0 = svdup_u64_x (p0, -0x8000)) ++ ++/* ++** dup_x0_u64_x: ++** mov z0\.d, x0 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_x0_u64_x, svuint64_t, uint64_t, ++ z0 = svdup_n_u64_x (p0, x0), ++ z0 = svdup_u64_x (p0, x0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u8.c +new file mode 100644 +index 000000000..d27f4bba9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u8.c +@@ -0,0 +1,383 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dup_1_u8: ++** mov z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_1_u8, svuint8_t, ++ z0 = svdup_n_u8 (1), ++ z0 = svdup_u8 (1)) ++ ++/* ++** dup_127_u8: ++** mov z0\.b, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_127_u8, svuint8_t, ++ z0 = svdup_n_u8 (127), ++ z0 = svdup_u8 (127)) ++ ++/* ++** dup_128_u8: ++** mov z0\.b, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_128_u8, svuint8_t, ++ z0 = svdup_n_u8 (128), ++ z0 = svdup_u8 (128)) ++ ++/* ++** dup_129_u8: ++** mov z0\.b, #-127 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_129_u8, svuint8_t, ++ z0 = svdup_n_u8 (129), ++ z0 = svdup_u8 (129)) ++ ++/* ++** dup_253_u8: ++** mov z0\.b, #-3 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_253_u8, svuint8_t, ++ z0 = svdup_n_u8 (253), ++ z0 = svdup_u8 (253)) ++ ++/* ++** dup_254_u8: ++** mov z0\.b, #-2 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_254_u8, svuint8_t, ++ z0 = svdup_n_u8 (254), ++ z0 = svdup_u8 (254)) ++ ++/* ++** dup_255_u8: ++** mov z0\.b, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_255_u8, svuint8_t, ++ z0 = svdup_n_u8 (255), ++ z0 = svdup_u8 (255)) ++ ++/* ++** dup_m1_u8: ++** mov z0\.b, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m1_u8, svuint8_t, ++ z0 = svdup_n_u8 (-1), ++ z0 = svdup_u8 (-1)) ++ ++/* ++** dup_m128_u8: ++** mov z0\.b, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m128_u8, svuint8_t, ++ z0 = svdup_n_u8 (-128), ++ z0 = svdup_u8 (-128)) ++ ++/* ++** dup_w0_u8: ++** mov z0\.b, w0 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_w0_u8, svuint8_t, uint8_t, ++ z0 = svdup_n_u8 (x0), ++ z0 = svdup_u8 (x0)) ++ ++/* ++** dup_1_u8_m: ++** mov z0\.b, p0/m, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_1_u8_m, svuint8_t, ++ z0 = svdup_n_u8_m (z0, p0, 1), ++ z0 = svdup_u8_m (z0, p0, 1)) ++ ++/* ++** dup_127_u8_m: ++** mov z0\.b, p0/m, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_127_u8_m, svuint8_t, ++ z0 = svdup_n_u8_m (z0, p0, 127), ++ z0 = svdup_u8_m (z0, p0, 127)) ++ ++/* ++** dup_128_u8_m: ++** mov z0\.b, p0/m, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_128_u8_m, svuint8_t, ++ z0 = svdup_n_u8_m (z0, p0, 128), ++ z0 = svdup_u8_m (z0, p0, 128)) ++ ++/* ++** dup_129_u8_m: ++** mov z0\.b, p0/m, #-127 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_129_u8_m, svuint8_t, ++ z0 = svdup_n_u8_m (z0, p0, 129), ++ z0 = svdup_u8_m (z0, p0, 129)) ++ ++/* ++** dup_253_u8_m: ++** mov z0\.b, p0/m, #-3 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_253_u8_m, svuint8_t, ++ z0 = svdup_n_u8_m (z0, p0, 253), ++ z0 = svdup_u8_m (z0, p0, 253)) ++ ++/* ++** dup_254_u8_m: ++** mov z0\.b, p0/m, #-2 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_254_u8_m, svuint8_t, ++ z0 = svdup_n_u8_m (z0, p0, 254), ++ z0 = svdup_u8_m (z0, p0, 254)) ++ ++/* ++** dup_255_u8_m: ++** mov z0\.b, p0/m, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_255_u8_m, svuint8_t, ++ z0 = svdup_n_u8_m (z0, p0, 255), ++ z0 = svdup_u8_m (z0, p0, 255)) ++ ++/* ++** dup_m1_u8_m: ++** mov z0\.b, p0/m, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m1_u8_m, svuint8_t, ++ z0 = svdup_n_u8_m (z0, p0, -1), ++ z0 = svdup_u8_m (z0, p0, -1)) ++ ++/* ++** dup_m128_u8_m: ++** mov z0\.b, p0/m, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m128_u8_m, svuint8_t, ++ z0 = svdup_n_u8_m (z0, p0, -128), ++ z0 = svdup_u8_m (z0, p0, -128)) ++ ++/* ++** dup_0_u8_m: ++** mov z0\.b, p0/m, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_0_u8_m, svuint8_t, ++ z0 = svdup_n_u8_m (z0, p0, 0), ++ z0 = svdup_u8_m (z0, p0, 0)) ++ ++/* ++** dup_w0_u8_m: ++** movprfx z0, z1 ++** mov z0\.b, p0/m, w0 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_w0_u8_m, svuint8_t, uint8_t, ++ z0 = svdup_n_u8_m (z1, p0, x0), ++ z0 = svdup_u8_m (z1, p0, x0)) ++ ++/* ++** dup_1_u8_z: ++** mov z0\.b, p0/z, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_1_u8_z, svuint8_t, ++ z0 = svdup_n_u8_z (p0, 1), ++ z0 = svdup_u8_z (p0, 1)) ++ ++/* ++** dup_127_u8_z: ++** mov z0\.b, p0/z, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_127_u8_z, svuint8_t, ++ z0 = svdup_n_u8_z (p0, 127), ++ z0 = svdup_u8_z (p0, 127)) ++ ++/* ++** dup_128_u8_z: ++** mov z0\.b, p0/z, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_128_u8_z, svuint8_t, ++ z0 = svdup_n_u8_z (p0, 128), ++ z0 = svdup_u8_z (p0, 128)) ++ ++/* ++** dup_129_u8_z: ++** mov z0\.b, p0/z, #-127 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_129_u8_z, svuint8_t, ++ z0 = svdup_n_u8_z (p0, 129), ++ z0 = svdup_u8_z (p0, 129)) ++ ++/* ++** dup_253_u8_z: ++** mov z0\.b, p0/z, #-3 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_253_u8_z, svuint8_t, ++ z0 = svdup_n_u8_z (p0, 253), ++ z0 = svdup_u8_z (p0, 253)) ++ ++/* ++** dup_254_u8_z: ++** mov z0\.b, p0/z, #-2 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_254_u8_z, svuint8_t, ++ z0 = svdup_n_u8_z (p0, 254), ++ z0 = svdup_u8_z (p0, 254)) ++ ++/* ++** dup_255_u8_z: ++** mov z0\.b, p0/z, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_255_u8_z, svuint8_t, ++ z0 = svdup_n_u8_z (p0, 255), ++ z0 = svdup_u8_z (p0, 255)) ++ ++/* ++** dup_m1_u8_z: ++** mov z0\.b, p0/z, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m1_u8_z, svuint8_t, ++ z0 = svdup_n_u8_z (p0, -1), ++ z0 = svdup_u8_z (p0, -1)) ++ ++/* ++** dup_m128_u8_z: ++** mov z0\.b, p0/z, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m128_u8_z, svuint8_t, ++ z0 = svdup_n_u8_z (p0, -128), ++ z0 = svdup_u8_z (p0, -128)) ++ ++/* ++** dup_0_u8_z: ++** mov z0\.b, p0/z, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_0_u8_z, svuint8_t, ++ z0 = svdup_n_u8_z (p0, 0), ++ z0 = svdup_u8_z (p0, 0)) ++ ++/* ++** dup_w0_u8_z: ++** movprfx z0\.b, p0/z, z0\.b ++** mov z0\.b, p0/m, w0 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_w0_u8_z, svuint8_t, uint8_t, ++ z0 = svdup_n_u8_z (p0, x0), ++ z0 = svdup_u8_z (p0, x0)) ++ ++/* ++** dup_1_u8_x: ++** mov z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_1_u8_x, svuint8_t, ++ z0 = svdup_n_u8_x (p0, 1), ++ z0 = svdup_u8_x (p0, 1)) ++ ++/* ++** dup_127_u8_x: ++** mov z0\.b, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_127_u8_x, svuint8_t, ++ z0 = svdup_n_u8_x (p0, 127), ++ z0 = svdup_u8_x (p0, 127)) ++ ++/* ++** dup_128_u8_x: ++** mov z0\.b, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_128_u8_x, svuint8_t, ++ z0 = svdup_n_u8_x (p0, 128), ++ z0 = svdup_u8_x (p0, 128)) ++ ++/* ++** dup_129_u8_x: ++** mov z0\.b, #-127 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_129_u8_x, svuint8_t, ++ z0 = svdup_n_u8_x (p0, 129), ++ z0 = svdup_u8_x (p0, 129)) ++ ++/* ++** dup_253_u8_x: ++** mov z0\.b, #-3 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_253_u8_x, svuint8_t, ++ z0 = svdup_n_u8_x (p0, 253), ++ z0 = svdup_u8_x (p0, 253)) ++ ++/* ++** dup_254_u8_x: ++** mov z0\.b, #-2 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_254_u8_x, svuint8_t, ++ z0 = svdup_n_u8_x (p0, 254), ++ z0 = svdup_u8_x (p0, 254)) ++ ++/* ++** dup_255_u8_x: ++** mov z0\.b, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_255_u8_x, svuint8_t, ++ z0 = svdup_n_u8_x (p0, 255), ++ z0 = svdup_u8_x (p0, 255)) ++ ++/* ++** dup_m1_u8_x: ++** mov z0\.b, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m1_u8_x, svuint8_t, ++ z0 = svdup_n_u8_x (p0, -1), ++ z0 = svdup_u8_x (p0, -1)) ++ ++/* ++** dup_m128_u8_x: ++** mov z0\.b, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (dup_m128_u8_x, svuint8_t, ++ z0 = svdup_n_u8_x (p0, -128), ++ z0 = svdup_u8_x (p0, -128)) ++ ++/* ++** dup_w0_u8_x: ++** mov z0\.b, w0 ++** ret ++*/ ++TEST_UNIFORM_ZX (dup_w0_u8_x, svuint8_t, uint8_t, ++ z0 = svdup_n_u8_x (p0, x0), ++ z0 = svdup_u8_x (p0, x0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_b16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_b16.c +new file mode 100644 +index 000000000..ecbacd7e9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_b16.c +@@ -0,0 +1,276 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dupq_00_b16: ++** pfalse p0\.b ++** ret ++*/ ++TEST_UNIFORM_P (dupq_00_b16, ++ p0 = svdupq_n_b16 (0, 0, 0, 0, 0, 0, 0, 0), ++ p0 = svdupq_b16 (0, 0, 0, 0, 0, 0, 0, 0)) ++ ++/* ++** dupq_11_b16: ++** ptrue p0\.d, all ++** ret ++*/ ++TEST_UNIFORM_P (dupq_11_b16, ++ p0 = svdupq_n_b16 (1, 0, 0, 0, 1, 0, 0, 0), ++ p0 = svdupq_b16 (1, 0, 0, 0, 1, 0, 0, 0)) ++ ++/* ++** dupq_22_b16: ++** ( ++** pfalse (p[0-7])\.b ++** ptrue (p[0-7])\.d, all ++** trn1 p0\.h, \1\.h, \2\.h ++** | ++** ptrue (p[0-7])\.d, all ++** pfalse (p[0-7])\.b ++** trn1 p0\.h, \4\.h, \3\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_P (dupq_22_b16, ++ p0 = svdupq_n_b16 (0, 1, 0, 0, 0, 1, 0, 0), ++ p0 = svdupq_b16 (0, 1, 0, 0, 0, 1, 0, 0)) ++ ++/* ++** dupq_33_b16: ++** ptrue (p[0-7])\.d, all ++** trn1 p0\.h, \1\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_P (dupq_33_b16, ++ p0 = svdupq_n_b16 (1, 1, 0, 0, 1, 1, 0, 0), ++ p0 = svdupq_b16 (1, 1, 0, 0, 1, 1, 0, 0)) ++ ++/* ++** dupq_44_b16: ++** ( ++** ptrue (p[0-7])\.d, all ++** ptrue (p[0-7])\.s, all ++** not p0\.b, \2/z, \1\.b ++** | ++** ptrue (p[0-7])\.s, all ++** ptrue (p[0-7])\.d, all ++** not p0\.b, \3/z, \4\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_P (dupq_44_b16, ++ p0 = svdupq_n_b16 (0, 0, 1, 0, 0, 0, 1, 0), ++ p0 = svdupq_b16 (0, 0, 1, 0, 0, 0, 1, 0)) ++ ++/* ++** dupq_55_b16: ++** ptrue p0\.s, all ++** ret ++*/ ++TEST_UNIFORM_P (dupq_55_b16, ++ p0 = svdupq_n_b16 (1, 0, 1, 0, 1, 0, 1, 0), ++ p0 = svdupq_b16 (1, 0, 1, 0, 1, 0, 1, 0)) ++ ++/* ++** dupq_66_b16: ++** ... ++** cmpne p0\.b, p[0-7]/z, z[0-9]+\.b, #0 ++** ret ++*/ ++TEST_UNIFORM_P (dupq_66_b16, ++ p0 = svdupq_n_b16 (0, 1, 1, 0, 0, 1, 1, 0), ++ p0 = svdupq_b16 (0, 1, 1, 0, 0, 1, 1, 0)) ++ ++/* ++** dupq_77_b16: ++** ( ++** ptrue (p[0-7])\.d, all ++** ptrue (p[0-7])\.[hs], all ++** trn1 p0\.h, \2\.h, \1\.h ++** | ++** ptrue (p[0-7])\.[hs], all ++** ptrue (p[0-7])\.s, all ++** trn1 p0\.h, \3\.h, \4\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_P (dupq_77_b16, ++ p0 = svdupq_n_b16 (1, 1, 1, 0, 1, 1, 1, 0), ++ p0 = svdupq_b16 (1, 1, 1, 0, 1, 1, 1, 0)) ++ ++/* ++** dupq_88_b16: ++** ( ++** mov (z[0-9]+)\.d, #71776119061217280 ++** ptrue (p[0-7])\.b, all ++** cmpne p0\.b, \2/z, \1\.b, #0 ++** | ++** ptrue (p[0-7])\.b, all ++** mov (z[0-9]+)\.d, #71776119061217280 ++** cmpne p0\.b, \3/z, \4\.b, #0 ++** ) ++** ret ++*/ ++TEST_UNIFORM_P (dupq_88_b16, ++ p0 = svdupq_n_b16 (0, 0, 0, 1, 0, 0, 0, 1), ++ p0 = svdupq_b16 (0, 0, 0, 1, 0, 0, 0, 1)) ++ ++/* ++** dupq_99_b16: ++** ... ++** cmpne p0\.b, p[0-7]/z, z[0-9]+\.b, #0 ++** ret ++*/ ++TEST_UNIFORM_P (dupq_99_b16, ++ p0 = svdupq_n_b16 (1, 0, 0, 1, 1, 0, 0, 1), ++ p0 = svdupq_b16 (1, 0, 0, 1, 1, 0, 0, 1)) ++ ++/* ++** dupq_aa_b16: ++** ( ++** ptrue (p[0-7])\.s, all ++** ptrue (p[0-7])\.h, all ++** not p0\.b, \2/z, \1\.b ++** | ++** ptrue (p[0-7])\.h, all ++** ptrue (p[0-7])\.s, all ++** not p0\.b, \3/z, \4\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_P (dupq_aa_b16, ++ p0 = svdupq_n_b16 (0, 1, 0, 1, 0, 1, 0, 1), ++ p0 = svdupq_b16 (0, 1, 0, 1, 0, 1, 0, 1)) ++ ++/* ++** dupq_bb_b16: ++** ( ++** ptrue (p[0-7])\.d, all ++** ptrue (p[0-7])\.[hs], all ++** trn1 p0\.h, \1\.h, \2\.h ++** | ++** ptrue (p[0-7])\.[hs], all ++** ptrue (p[0-7])\.d, all ++** trn1 p0\.h, \4\.h, \3\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_P (dupq_bb_b16, ++ p0 = svdupq_n_b16 (1, 1, 0, 1, 1, 1, 0, 1), ++ p0 = svdupq_b16 (1, 1, 0, 1, 1, 1, 0, 1)) ++ ++/* ++** dupq_cc_b16: ++** ( ++** pfalse (p[0-7])\.b ++** ptrue (p[0-7])\.h, all ++** trn1 p0\.s, \1\.s, \2\.s ++** | ++** ptrue (p[0-7])\.h, all ++** pfalse (p[0-7])\.b ++** trn1 p0\.s, \4\.s, \3\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_P (dupq_cc_b16, ++ p0 = svdupq_n_b16 (0, 0, 1, 1, 0, 0, 1, 1), ++ p0 = svdupq_b16 (0, 0, 1, 1, 0, 0, 1, 1)) ++ ++/* ++** dupq_dd_b16: ++** ( ++** ptrue (p[0-7])\.[sd], all ++** ptrue (p[0-7])\.h, all ++** trn1 p0\.s, \1\.s, \2\.s ++** | ++** ptrue (p[0-7])\.h, all ++** ptrue (p[0-7])\.[sd], all ++** trn1 p0\.s, \4\.s, \3\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_P (dupq_dd_b16, ++ p0 = svdupq_n_b16 (1, 0, 1, 1, 1, 0, 1, 1), ++ p0 = svdupq_b16 (1, 0, 1, 1, 1, 0, 1, 1)) ++ ++/* ++** dupq_ee_b16: ++** ( ++** ptrue (p[0-7])\.d, all ++** ptrue (p[0-7])\.h, all ++** not p0\.b, \2/z, \1\.b ++** | ++** ptrue (p[0-7])\.h, all ++** ptrue (p[0-7])\.d, all ++** not p0\.b, \3/z, \4\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_P (dupq_ee_b16, ++ p0 = svdupq_n_b16 (0, 1, 1, 1, 0, 1, 1, 1), ++ p0 = svdupq_b16 (0, 1, 1, 1, 0, 1, 1, 1)) ++ ++/* ++** dupq_ff_b16: ++** ptrue p0\.h, all ++** ret ++*/ ++TEST_UNIFORM_P (dupq_ff_b16, ++ p0 = svdupq_n_b16 (1, 1, 1, 1, 1, 1, 1, 1), ++ p0 = svdupq_b16 (1, 1, 1, 1, 1, 1, 1, 1)) ++ ++/* ++** dupq_01_b16: ++** ( ++** ptrue (p[0-7])\.d, all ++** pfalse (p[0-7])\.b ++** trn1 p0\.d, \1\.d, \2\.d ++** | ++** pfalse (p[0-7])\.b ++** ptrue (p[0-7])\.d, all ++** trn1 p0\.d, \4\.d, \3\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_P (dupq_01_b16, ++ p0 = svdupq_n_b16 (1, 0, 0, 0, 0, 0, 0, 0), ++ p0 = svdupq_b16 (1, 0, 0, 0, 0, 0, 0, 0)) ++ ++/* ++** dupq_03_b16: ++** ... ++** cmpne p0\.b, p[0-7]/z, z[0-9]+\.b, #0 ++** ret ++*/ ++TEST_UNIFORM_P (dupq_03_b16, ++ p0 = svdupq_n_b16 (1, 1, 0, 0, 0, 0, 0, 0), ++ p0 = svdupq_b16 (1, 1, 0, 0, 0, 0, 0, 0)) ++ ++/* ++** dupq_0f_b16: ++** ( ++** ptrue (p[0-7])\.h, all ++** pfalse (p[0-7])\.b ++** trn1 p0\.d, \1\.d, \2\.d ++** | ++** pfalse (p[0-7])\.b ++** ptrue (p[0-7])\.h, all ++** trn1 p0\.d, \4\.d, \3\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_P (dupq_0f_b16, ++ p0 = svdupq_n_b16 (1, 1, 1, 1, 0, 0, 0, 0), ++ p0 = svdupq_b16 (1, 1, 1, 1, 0, 0, 0, 0)) ++ ++/* ++** dupq_3f_b16: ++** ... ++** cmpne p0\.b, p[0-7]/z, z[0-9]+\.b, #0 ++** ret ++*/ ++TEST_UNIFORM_P (dupq_3f_b16, ++ p0 = svdupq_n_b16 (1, 1, 1, 1, 1, 1, 0, 0), ++ p0 = svdupq_b16 (1, 1, 1, 1, 1, 1, 0, 0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_b32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_b32.c +new file mode 100644 +index 000000000..39719a76d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_b32.c +@@ -0,0 +1,132 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dupq_0_b32: ++** pfalse p0\.b ++** ret ++*/ ++TEST_UNIFORM_P (dupq_0_b32, ++ p0 = svdupq_n_b32 (0, 0, 0, 0), ++ p0 = svdupq_b32 (0, 0, 0, 0)) ++ ++/* ++** dupq_1_b32: ++** ( ++** ptrue (p[0-7])\.d, all ++** pfalse (p[0-7])\.b ++** trn1 p0\.d, \1\.d, \2\.d ++** | ++** pfalse (p[0-7])\.b ++** ptrue (p[0-7])\.d, all ++** trn1 p0\.d, \4\.d, \3\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_P (dupq_1_b32, ++ p0 = svdupq_n_b32 (1, 0, 0, 0), ++ p0 = svdupq_b32 (1, 0, 0, 0)) ++ ++/* ++** dupq_3_b32: ++** ( ++** ptrue (p[0-7])\.s, all ++** pfalse (p[0-7])\.b ++** trn1 p0\.d, \1\.d, \2\.d ++** | ++** pfalse (p[0-7])\.b ++** ptrue (p[0-7])\.s, all ++** trn1 p0\.d, \4\.d, \3\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_P (dupq_3_b32, ++ p0 = svdupq_n_b32 (1, 1, 0, 0), ++ p0 = svdupq_b32 (1, 1, 0, 0)) ++ ++/* ++** dupq_4_b32: ++** ( ++** pfalse (p[0-7])\.b ++** ptrue (p[0-7])\.d, all ++** trn1 p0\.d, \1\.d, \2\.d ++** | ++** ptrue (p[0-7])\.d, all ++** pfalse (p[0-7])\.b ++** trn1 p0\.d, \4\.d, \3\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_P (dupq_4_b32, ++ p0 = svdupq_n_b32 (0, 0, 1, 0), ++ p0 = svdupq_b32 (0, 0, 1, 0)) ++ ++/* ++** dupq_5_b32: ++** ptrue p0\.d, all ++** ret ++*/ ++TEST_UNIFORM_P (dupq_5_b32, ++ p0 = svdupq_n_b32 (1, 0, 1, 0), ++ p0 = svdupq_b32 (1, 0, 1, 0)) ++ ++/* ++** dupq_7_b32: ++** ( ++** ptrue (p[0-7])\.s, all ++** ptrue (p[0-7])\.d, all ++** trn1 p0\.d, \1\.d, \2\.d ++** | ++** ptrue (p[0-7])\.d, all ++** ptrue (p[0-7])\.s, all ++** trn1 p0\.d, \4\.d, \3\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_P (dupq_7_b32, ++ p0 = svdupq_n_b32 (1, 1, 1, 0), ++ p0 = svdupq_b32 (1, 1, 1, 0)) ++ ++/* ++** dupq_a_b32: ++** ( ++** ptrue (p[0-7])\.d, all ++** ptrue (p[0-7])\.s, all ++** not p0\.b, \2/z, \1\.b ++** | ++** ptrue (p[0-7])\.s, all ++** ptrue (p[0-7])\.d, all ++** not p0\.b, \3/z, \4\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_P (dupq_a_b32, ++ p0 = svdupq_n_b32 (0, 1, 0, 1), ++ p0 = svdupq_b32 (0, 1, 0, 1)) ++ ++/* ++** dupq_e_b32: ++** ( ++** ptrue (p[0-7])\.d, all ++** ptrue (p[0-7])\.s, all ++** trn1 p0\.d, \1\.d, \2\.d ++** | ++** ptrue (p[0-7])\.s, all ++** ptrue (p[0-7])\.d, all ++** trn1 p0\.d, \4\.d, \3\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_P (dupq_e_b32, ++ p0 = svdupq_n_b32 (1, 0, 1, 1), ++ p0 = svdupq_b32 (1, 0, 1, 1)) ++ ++/* ++** dupq_f_b32: ++** ptrue p0\.s, all ++** ret ++*/ ++TEST_UNIFORM_P (dupq_f_b32, ++ p0 = svdupq_n_b32 (1, 1, 1, 1), ++ p0 = svdupq_b32 (1, 1, 1, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_b64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_b64.c +new file mode 100644 +index 000000000..820ace431 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_b64.c +@@ -0,0 +1,55 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dupq_0_b64: ++** pfalse p0\.b ++** ret ++*/ ++TEST_UNIFORM_P (dupq_0_b64, ++ p0 = svdupq_n_b64 (0, 0), ++ p0 = svdupq_b64 (0, 0)) ++ ++/* ++** dupq_1_b64: ++** ( ++** ptrue (p[0-7])\.d, all ++** pfalse (p[0-7])\.b ++** trn1 p0\.d, \1\.d, \2\.d ++** | ++** pfalse (p[0-7])\.b ++** ptrue (p[0-7])\.d, all ++** trn1 p0\.d, \4\.d, \3\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_P (dupq_1_b64, ++ p0 = svdupq_n_b64 (1, 0), ++ p0 = svdupq_b64 (1, 0)) ++ ++/* ++** dupq_2_b64: ++** ( ++** pfalse (p[0-7])\.b ++** ptrue (p[0-7])\.d, all ++** trn1 p0\.d, \1\.d, \2\.d ++** | ++** ptrue (p[0-7])\.d, all ++** pfalse (p[0-7])\.b ++** trn1 p0\.d, \4\.d, \3\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_P (dupq_2_b64, ++ p0 = svdupq_n_b64 (0, 1), ++ p0 = svdupq_b64 (0, 1)) ++ ++/* ++** dupq_3_b64: ++** ptrue p0\.d, all ++** ret ++*/ ++TEST_UNIFORM_P (dupq_3_b64, ++ p0 = svdupq_n_b64 (1, 1), ++ p0 = svdupq_b64 (1, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_b8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_b8.c +new file mode 100644 +index 000000000..4762f950b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_b8.c +@@ -0,0 +1,413 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dupq_0000_b8: ++** pfalse p0\.b ++** ret ++*/ ++TEST_UNIFORM_P (dupq_0000_b8, ++ p0 = svdupq_n_b8 (0, 0, 0, 0, 0, 0, 0, 0, ++ 0, 0, 0, 0, 0, 0, 0, 0), ++ p0 = svdupq_b8 (0, 0, 0, 0, 0, 0, 0, 0, ++ 0, 0, 0, 0, 0, 0, 0, 0)) ++ ++/* ++** dupq_1111_b8: ++** ptrue p0\.s, all ++** ret ++*/ ++TEST_UNIFORM_P (dupq_1111_b8, ++ p0 = svdupq_n_b8 (1, 0, 0, 0, 1, 0, 0, 0, ++ 1, 0, 0, 0, 1, 0, 0, 0), ++ p0 = svdupq_b8 (1, 0, 0, 0, 1, 0, 0, 0, ++ 1, 0, 0, 0, 1, 0, 0, 0)) ++ ++/* ++** dupq_2222_b8: ++** ( ++** pfalse (p[0-7])\.b ++** ptrue (p[0-7])\.s, all ++** trn1 p0\.b, \1\.b, \2\.b ++** | ++** ptrue (p[0-7])\.s, all ++** pfalse (p[0-7])\.b ++** trn1 p0\.b, \4\.b, \3\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_P (dupq_2222_b8, ++ p0 = svdupq_n_b8 (0, 1, 0, 0, 0, 1, 0, 0, ++ 0, 1, 0, 0, 0, 1, 0, 0), ++ p0 = svdupq_b8 (0, 1, 0, 0, 0, 1, 0, 0, ++ 0, 1, 0, 0, 0, 1, 0, 0)) ++ ++/* ++** dupq_3333_b8: ++** ptrue (p[0-7])\.s, all ++** trn1 p0\.b, \1\.b, \1\.b ++** ret ++*/ ++TEST_UNIFORM_P (dupq_3333_b8, ++ p0 = svdupq_n_b8 (1, 1, 0, 0, 1, 1, 0, 0, ++ 1, 1, 0, 0, 1, 1, 0, 0), ++ p0 = svdupq_b8 (1, 1, 0, 0, 1, 1, 0, 0, ++ 1, 1, 0, 0, 1, 1, 0, 0)) ++ ++/* ++** dupq_4444_b8: ++** ( ++** ptrue (p[0-7])\.s, all ++** ptrue (p[0-7])\.h, all ++** not p0\.b, \2/z, \1\.b ++** | ++** ptrue (p[0-7])\.h, all ++** ptrue (p[0-7])\.s, all ++** not p0\.b, \3/z, \4\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_P (dupq_4444_b8, ++ p0 = svdupq_n_b8 (0, 0, 1, 0, 0, 0, 1, 0, ++ 0, 0, 1, 0, 0, 0, 1, 0), ++ p0 = svdupq_b8 (0, 0, 1, 0, 0, 0, 1, 0, ++ 0, 0, 1, 0, 0, 0, 1, 0)) ++ ++/* ++** dupq_5555_b8: ++** ptrue p0\.h, all ++** ret ++*/ ++TEST_UNIFORM_P (dupq_5555_b8, ++ p0 = svdupq_n_b8 (1, 0, 1, 0, 1, 0, 1, 0, ++ 1, 0, 1, 0, 1, 0, 1, 0), ++ p0 = svdupq_b8 (1, 0, 1, 0, 1, 0, 1, 0, ++ 1, 0, 1, 0, 1, 0, 1, 0)) ++ ++/* ++** dupq_6666_b8: ++** ( ++** mov (z[0-9]+)\.s, #16776960 ++** ptrue (p[0-7])\.b, all ++** cmpne p0\.b, \2/z, \1\.b, #0 ++** | ++** ptrue (p[0-7])\.b, all ++** mov (z[0-9]+)\.s, #16776960 ++** cmpne p0\.b, \3/z, \4\.b, #0 ++** ) ++** ret ++*/ ++TEST_UNIFORM_P (dupq_6666_b8, ++ p0 = svdupq_n_b8 (0, 1, 1, 0, 0, 1, 1, 0, ++ 0, 1, 1, 0, 0, 1, 1, 0), ++ p0 = svdupq_b8 (0, 1, 1, 0, 0, 1, 1, 0, ++ 0, 1, 1, 0, 0, 1, 1, 0)) ++ ++/* ++** dupq_7777_b8: ++** ( ++** ptrue (p[0-7])\.s, all ++** ptrue (p[0-7])\.[bh], all ++** trn1 p0\.b, \2\.b, \1\.b ++** | ++** ptrue (p[0-7])\.[bh], all ++** ptrue (p[0-7])\.s, all ++** trn1 p0\.b, \3\.b, \4\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_P (dupq_7777_b8, ++ p0 = svdupq_n_b8 (1, 1, 1, 0, 1, 1, 1, 0, ++ 1, 1, 1, 0, 1, 1, 1, 0), ++ p0 = svdupq_b8 (1, 1, 1, 0, 1, 1, 1, 0, ++ 1, 1, 1, 0, 1, 1, 1, 0)) ++ ++/* ++** dupq_8888_b8: ++** ( ++** mov (z[0-9]+)\.s, #-16777216 ++** ptrue (p[0-7])\.b, all ++** cmpne p0\.b, \2/z, \1\.b, #0 ++** | ++** ptrue (p[0-7])\.b, all ++** mov (z[0-9]+)\.s, #-16777216 ++** cmpne p0\.b, \3/z, \4\.b, #0 ++** ) ++** ret ++*/ ++TEST_UNIFORM_P (dupq_8888_b8, ++ p0 = svdupq_n_b8 (0, 0, 0, 1, 0, 0, 0, 1, ++ 0, 0, 0, 1, 0, 0, 0, 1), ++ p0 = svdupq_b8 (0, 0, 0, 1, 0, 0, 0, 1, ++ 0, 0, 0, 1, 0, 0, 0, 1)) ++ ++/* ++** dupq_9999_b8: ++** ( ++** mov (z[0-9]+)\.s, #-16776961 ++** ptrue (p[0-7])\.b, all ++** cmpne p0\.b, \2/z, \1\.b, #0 ++** | ++** ptrue (p[0-7])\.b, all ++** mov (z[0-9]+)\.s, #-16776961 ++** cmpne p0\.b, \3/z, \4\.b, #0 ++** ) ++** ret ++*/ ++TEST_UNIFORM_P (dupq_9999_b8, ++ p0 = svdupq_n_b8 (1, 0, 0, 1, 1, 0, 0, 1, ++ 1, 0, 0, 1, 1, 0, 0, 1), ++ p0 = svdupq_b8 (1, 0, 0, 1, 1, 0, 0, 1, ++ 1, 0, 0, 1, 1, 0, 0, 1)) ++ ++/* ++** dupq_aaaa_b8: ++** ( ++** ptrue (p[0-7])\.h, all ++** ptrue (p[0-7])\.b, all ++** not p0\.b, \2/z, \1\.b ++** | ++** ptrue (p[0-7])\.b, all ++** ptrue (p[0-7])\.h, all ++** not p0\.b, \3/z, \4\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_P (dupq_aaaa_b8, ++ p0 = svdupq_n_b8 (0, 1, 0, 1, 0, 1, 0, 1, ++ 0, 1, 0, 1, 0, 1, 0, 1), ++ p0 = svdupq_b8 (0, 1, 0, 1, 0, 1, 0, 1, ++ 0, 1, 0, 1, 0, 1, 0, 1)) ++ ++/* ++** dupq_bbbb_b8: ++** ( ++** ptrue (p[0-7])\.s, all ++** ptrue (p[0-7])\.[bh], all ++** trn1 p0\.b, \1\.b, \2\.b ++** | ++** ptrue (p[0-7])\.[bh], all ++** ptrue (p[0-7])\.s, all ++** trn1 p0\.b, \4\.b, \3\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_P (dupq_bbbb_b8, ++ p0 = svdupq_n_b8 (1, 1, 0, 1, 1, 1, 0, 1, ++ 1, 1, 0, 1, 1, 1, 0, 1), ++ p0 = svdupq_b8 (1, 1, 0, 1, 1, 1, 0, 1, ++ 1, 1, 0, 1, 1, 1, 0, 1)) ++ ++/* ++** dupq_cccc_b8: ++** ( ++** pfalse (p[0-7])\.b ++** ptrue (p[0-7])\.b, all ++** trn1 p0\.h, \1\.h, \2\.h ++** | ++** ptrue (p[0-7])\.b, all ++** pfalse (p[0-7])\.b ++** trn1 p0\.h, \4\.h, \3\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_P (dupq_cccc_b8, ++ p0 = svdupq_n_b8 (0, 0, 1, 1, 0, 0, 1, 1, ++ 0, 0, 1, 1, 0, 0, 1, 1), ++ p0 = svdupq_b8 (0, 0, 1, 1, 0, 0, 1, 1, ++ 0, 0, 1, 1, 0, 0, 1, 1)) ++ ++/* ++** dupq_dddd_b8: ++** ( ++** ptrue (p[0-7])\.[hs], all ++** ptrue (p[0-7])\.b, all ++** trn1 p0\.h, \1\.h, \2\.h ++** | ++** ptrue (p[0-7])\.b, all ++** ptrue (p[0-7])\.[hs], all ++** trn1 p0\.h, \4\.h, \3\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_P (dupq_dddd_b8, ++ p0 = svdupq_n_b8 (1, 0, 1, 1, 1, 0, 1, 1, ++ 1, 0, 1, 1, 1, 0, 1, 1), ++ p0 = svdupq_b8 (1, 0, 1, 1, 1, 0, 1, 1, ++ 1, 0, 1, 1, 1, 0, 1, 1)) ++ ++/* ++** dupq_eeee_b8: ++** ( ++** ptrue (p[0-7])\.s, all ++** ptrue (p[0-7])\.b, all ++** not p0\.b, \2/z, \1\.b ++** | ++** ptrue (p[0-7])\.b, all ++** ptrue (p[0-7])\.s, all ++** not p0\.b, \3/z, \4\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_P (dupq_eeee_b8, ++ p0 = svdupq_n_b8 (0, 1, 1, 1, 0, 1, 1, 1, ++ 0, 1, 1, 1, 0, 1, 1, 1), ++ p0 = svdupq_b8 (0, 1, 1, 1, 0, 1, 1, 1, ++ 0, 1, 1, 1, 0, 1, 1, 1)) ++ ++/* ++** dupq_ffff_b8: ++** ptrue p0\.b, all ++** ret ++*/ ++TEST_UNIFORM_P (dupq_ffff_b8, ++ p0 = svdupq_n_b8 (1, 1, 1, 1, 1, 1, 1, 1, ++ 1, 1, 1, 1, 1, 1, 1, 1), ++ p0 = svdupq_b8 (1, 1, 1, 1, 1, 1, 1, 1, ++ 1, 1, 1, 1, 1, 1, 1, 1)) ++ ++/* ++** dupq_5f5f_b8: ++** ( ++** ptrue (p[0-7])\.h, all ++** ptrue (p[0-7])\.b, all ++** trn1 p0\.s, \2\.s, \1\.s ++** | ++** ptrue (p[0-7])\.b, all ++** ptrue (p[0-7])\.h, all ++** trn1 p0\.s, \3\.s, \4\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_P (dupq_5f5f_b8, ++ p0 = svdupq_n_b8 (1, 1, 1, 1, 1, 0, 1, 0, ++ 1, 1, 1, 1, 1, 0, 1, 0), ++ p0 = svdupq_b8 (1, 1, 1, 1, 1, 0, 1, 0, ++ 1, 1, 1, 1, 1, 0, 1, 0)) ++ ++/* ++** dupq_1f1f_b8: ++** ( ++** ptrue (p[0-7])\.[sd], all ++** ptrue (p[0-7])\.b, all ++** trn1 p0\.s, \2\.s, \1\.s ++** | ++** ptrue (p[0-7])\.b, all ++** ptrue (p[0-7])\.[sd], all ++** trn1 p0\.s, \3\.s, \4\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_P (dupq_1f1f_b8, ++ p0 = svdupq_n_b8 (1, 1, 1, 1, 1, 0, 0, 0, ++ 1, 1, 1, 1, 1, 0, 0, 0), ++ p0 = svdupq_b8 (1, 1, 1, 1, 1, 0, 0, 0, ++ 1, 1, 1, 1, 1, 0, 0, 0)) ++ ++/* ++** dupq_1515_b8: ++** ( ++** ptrue (p[0-7])\.d, all ++** ptrue (p[0-7])\.[hs], all ++** trn1 p0\.h, \2\.h, \1\.h ++** | ++** ptrue (p[0-7])\.[hs], all ++** ptrue (p[0-7])\.d, all ++** trn1 p0\.h, \3\.h, \4\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_P (dupq_1515_b8, ++ p0 = svdupq_n_b8 (1, 0, 1, 0, 1, 0, 0, 0, ++ 1, 0, 1, 0, 1, 0, 0, 0), ++ p0 = svdupq_b8 (1, 0, 1, 0, 1, 0, 0, 0, ++ 1, 0, 1, 0, 1, 0, 0, 0)) ++ ++/* ++** dupq_0505_b8: ++** ptrue (p[0-7])\.d, all ++** trn1 p0\.h, \1\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_P (dupq_0505_b8, ++ p0 = svdupq_n_b8 (1, 0, 1, 0, 0, 0, 0, 0, ++ 1, 0, 1, 0, 0, 0, 0, 0), ++ p0 = svdupq_b8 (1, 0, 1, 0, 0, 0, 0, 0, ++ 1, 0, 1, 0, 0, 0, 0, 0)) ++ ++/* ++** dupq_00ff_b8: ++** ( ++** pfalse (p[0-7])\.b ++** ptrue (p[0-7])\.b, all ++** trn1 p0\.d, \2\.d, \1\.d ++** | ++** ptrue (p[0-7])\.b, all ++** pfalse (p[0-7])\.b ++** trn1 p0\.d, \3\.d, \4\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_P (dupq_00ff_b8, ++ p0 = svdupq_n_b8 (1, 1, 1, 1, 1, 1, 1, 1, ++ 0, 0, 0, 0, 0, 0, 0, 0), ++ p0 = svdupq_b8 (1, 1, 1, 1, 1, 1, 1, 1, ++ 0, 0, 0, 0, 0, 0, 0, 0)) ++ ++/* ++** dupq_0055_b8: ++** ( ++** pfalse (p[0-7])\.b ++** ptrue (p[0-7])\.h, all ++** trn1 p0\.d, \2\.d, \1\.d ++** | ++** ptrue (p[0-7])\.h, all ++** pfalse (p[0-7])\.b ++** trn1 p0\.d, \3\.d, \4\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_P (dupq_0055_b8, ++ p0 = svdupq_n_b8 (1, 0, 1, 0, 1, 0, 1, 0, ++ 0, 0, 0, 0, 0, 0, 0, 0), ++ p0 = svdupq_b8 (1, 0, 1, 0, 1, 0, 1, 0, ++ 0, 0, 0, 0, 0, 0, 0, 0)) ++ ++/* ++** dupq_0011_b8: ++** ( ++** pfalse (p[0-7])\.b ++** ptrue (p[0-7])\.s, all ++** trn1 p0\.d, \2\.d, \1\.d ++** | ++** ptrue (p[0-7])\.s, all ++** pfalse (p[0-7])\.b ++** trn1 p0\.d, \3\.d, \4\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_P (dupq_0011_b8, ++ p0 = svdupq_n_b8 (1, 0, 0, 0, 1, 0, 0, 0, ++ 0, 0, 0, 0, 0, 0, 0, 0), ++ p0 = svdupq_b8 (1, 0, 0, 0, 1, 0, 0, 0, ++ 0, 0, 0, 0, 0, 0, 0, 0)) ++ ++/* ++** dupq_0111_b8: ++** ( ++** ptrue (p[0-7])\.d, all ++** ptrue (p[0-7])\.s, all ++** trn1 p0\.d, \2\.d, \1\.d ++** | ++** ptrue (p[0-7])\.s, all ++** ptrue (p[0-7])\.d, all ++** trn1 p0\.d, \3\.d, \4\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_P (dupq_0111_b8, ++ p0 = svdupq_n_b8 (1, 0, 0, 0, 1, 0, 0, 0, ++ 1, 0, 0, 0, 0, 0, 0, 0), ++ p0 = svdupq_b8 (1, 0, 0, 0, 1, 0, 0, 0, ++ 1, 0, 0, 0, 0, 0, 0, 0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_f16.c +new file mode 100644 +index 000000000..91de8344c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_f16.c +@@ -0,0 +1,53 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dupq_1c_f16: ++** mov z0\.s, #15360 ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_1c_f16, svfloat16_t, ++ z0 = svdupq_n_f16 (1.0, 0, 1.0, 0, 1.0, 0, 1.0, 0), ++ z0 = svdupq_f16 (1.0, 0, 1.0, 0, 1.0, 0, 1.0, 0)); ++ ++/* ++** dupq_5ic_f16: ++** movi v([0-9]+)\.4s, 0x45, lsl 24 ++** dup z0\.q, z\1\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_5ic_f16, svfloat16_t, ++ z0 = svdupq_n_f16 (0, 5.0, 0, 5.0, 0, 5.0, 0, 5.0), ++ z0 = svdupq_f16 (0, 5.0, 0, 5.0, 0, 5.0, 0, 5.0)); ++ ++ ++/* ++** dupq_m1c_f16: ++** movi v([0-9]+)\.4s, 0xbc, lsl 8 ++** dup z0\.q, z\1\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_m1c_f16, svfloat16_t, ++ z0 = svdupq_n_f16 (-1.0, 0, -1.0, 0, -1.0, 0, -1.0, 0), ++ z0 = svdupq_f16 (-1.0, 0, -1.0, 0, -1.0, 0, -1.0, 0)); ++ ++/* ++** dupq_40p5c_f16: ++** mov (w[0-9]+), 20752 ++** mov z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_40p5c_f16, svfloat16_t, ++ z0 = svdupq_n_f16 (40.5, 0, 40.5, 0, 40.5, 0, 40.5, 0), ++ z0 = svdupq_f16 (40.5, 0, 40.5, 0, 40.5, 0, 40.5, 0)); ++ ++/* ++** dupq_pool_f16: ++** ... ++** ld1rqh z0\.h, p[0-7]/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_pool_f16, svfloat16_t, ++ z0 = svdupq_n_f16 (4.75, 1.0, 9, 77, 5.25, 22, 19, 50), ++ z0 = svdupq_f16 (4.75, 1.0, 9, 77, 5.25, 22, 19, 50)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_f32.c +new file mode 100644 +index 000000000..4f9c04f1a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_f32.c +@@ -0,0 +1,53 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dupq_1c_f32: ++** mov z0\.d, #1065353216 ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_1c_f32, svfloat32_t, ++ z0 = svdupq_n_f32 (1.0, 0, 1.0, 0), ++ z0 = svdupq_f32 (1.0, 0, 1.0, 0)); ++ ++/* ++** dupq_5ic_f32: ++** mov (x[0-9]+), 4656722014701092864 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_5ic_f32, svfloat32_t, ++ z0 = svdupq_n_f32 (0, 5.0, 0, 5.0), ++ z0 = svdupq_f32 (0, 5.0, 0, 5.0)); ++ ++ ++/* ++** dupq_m1c_f32: ++** mov (x[0-9]+), 3212836864 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_m1c_f32, svfloat32_t, ++ z0 = svdupq_n_f32 (-1.0, 0, -1.0, 0), ++ z0 = svdupq_f32 (-1.0, 0, -1.0, 0)); ++ ++/* ++** dupq_40p5c_f32: ++** mov (x[0-9]+), 1109524480 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_40p5c_f32, svfloat32_t, ++ z0 = svdupq_n_f32 (40.5, 0, 40.5, 0), ++ z0 = svdupq_f32 (40.5, 0, 40.5, 0)); ++ ++/* ++** dupq_pool_f32: ++** ... ++** ld1rqw z0\.s, p[0-7]/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_pool_f32, svfloat32_t, ++ z0 = svdupq_n_f32 (4.5, 10.1, 7.3, 11.8), ++ z0 = svdupq_f32 (4.5, 10.1, 7.3, 11.8)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_f64.c +new file mode 100644 +index 000000000..27d14480e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_f64.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dupq_pool_f64: ++** ... ++** ld1rqd z0\.d, p[0-7]/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_pool_f64, svfloat64_t, ++ z0 = svdupq_n_f64 (4.5, 10.1), ++ z0 = svdupq_f64 (4.5, 10.1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_bf16.c +new file mode 100644 +index 000000000..89ae4a4c2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_bf16.c +@@ -0,0 +1,48 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dupq_lane_0_bf16_tied: ++** dup z0\.q, z0\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_0_bf16_tied, svbfloat16_t, ++ z0 = svdupq_lane_bf16 (z0, 0), ++ z0 = svdupq_lane (z0, 0)) ++ ++/* ++** dupq_lane_0_bf16_untied: ++** dup z0\.q, z1\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_0_bf16_untied, svbfloat16_t, ++ z0 = svdupq_lane_bf16 (z1, 0), ++ z0 = svdupq_lane (z1, 0)) ++ ++/* ++** dupq_lane_1_bf16: ++** dup z0\.q, z0\.q\[1\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_1_bf16, svbfloat16_t, ++ z0 = svdupq_lane_bf16 (z0, 1), ++ z0 = svdupq_lane (z0, 1)) ++ ++/* ++** dupq_lane_2_bf16: ++** dup z0\.q, z0\.q\[2\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_2_bf16, svbfloat16_t, ++ z0 = svdupq_lane_bf16 (z0, 2), ++ z0 = svdupq_lane (z0, 2)) ++ ++/* ++** dupq_lane_3_bf16: ++** dup z0\.q, z0\.q\[3\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_3_bf16, svbfloat16_t, ++ z0 = svdupq_lane_bf16 (z0, 3), ++ z0 = svdupq_lane (z0, 3)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_f16.c +new file mode 100644 +index 000000000..6fa97ca3a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_f16.c +@@ -0,0 +1,48 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dupq_lane_0_f16_tied: ++** dup z0\.q, z0\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_0_f16_tied, svfloat16_t, ++ z0 = svdupq_lane_f16 (z0, 0), ++ z0 = svdupq_lane (z0, 0)) ++ ++/* ++** dupq_lane_0_f16_untied: ++** dup z0\.q, z1\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_0_f16_untied, svfloat16_t, ++ z0 = svdupq_lane_f16 (z1, 0), ++ z0 = svdupq_lane (z1, 0)) ++ ++/* ++** dupq_lane_1_f16: ++** dup z0\.q, z0\.q\[1\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_1_f16, svfloat16_t, ++ z0 = svdupq_lane_f16 (z0, 1), ++ z0 = svdupq_lane (z0, 1)) ++ ++/* ++** dupq_lane_2_f16: ++** dup z0\.q, z0\.q\[2\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_2_f16, svfloat16_t, ++ z0 = svdupq_lane_f16 (z0, 2), ++ z0 = svdupq_lane (z0, 2)) ++ ++/* ++** dupq_lane_3_f16: ++** dup z0\.q, z0\.q\[3\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_3_f16, svfloat16_t, ++ z0 = svdupq_lane_f16 (z0, 3), ++ z0 = svdupq_lane (z0, 3)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_f32.c +new file mode 100644 +index 000000000..69ce5452e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_f32.c +@@ -0,0 +1,48 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dupq_lane_0_f32_tied: ++** dup z0\.q, z0\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_0_f32_tied, svfloat32_t, ++ z0 = svdupq_lane_f32 (z0, 0), ++ z0 = svdupq_lane (z0, 0)) ++ ++/* ++** dupq_lane_0_f32_untied: ++** dup z0\.q, z1\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_0_f32_untied, svfloat32_t, ++ z0 = svdupq_lane_f32 (z1, 0), ++ z0 = svdupq_lane (z1, 0)) ++ ++/* ++** dupq_lane_1_f32: ++** dup z0\.q, z0\.q\[1\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_1_f32, svfloat32_t, ++ z0 = svdupq_lane_f32 (z0, 1), ++ z0 = svdupq_lane (z0, 1)) ++ ++/* ++** dupq_lane_2_f32: ++** dup z0\.q, z0\.q\[2\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_2_f32, svfloat32_t, ++ z0 = svdupq_lane_f32 (z0, 2), ++ z0 = svdupq_lane (z0, 2)) ++ ++/* ++** dupq_lane_3_f32: ++** dup z0\.q, z0\.q\[3\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_3_f32, svfloat32_t, ++ z0 = svdupq_lane_f32 (z0, 3), ++ z0 = svdupq_lane (z0, 3)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_f64.c +new file mode 100644 +index 000000000..51a8d9f2d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_f64.c +@@ -0,0 +1,48 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dupq_lane_0_f64_tied: ++** dup z0\.q, z0\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_0_f64_tied, svfloat64_t, ++ z0 = svdupq_lane_f64 (z0, 0), ++ z0 = svdupq_lane (z0, 0)) ++ ++/* ++** dupq_lane_0_f64_untied: ++** dup z0\.q, z1\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_0_f64_untied, svfloat64_t, ++ z0 = svdupq_lane_f64 (z1, 0), ++ z0 = svdupq_lane (z1, 0)) ++ ++/* ++** dupq_lane_1_f64: ++** dup z0\.q, z0\.q\[1\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_1_f64, svfloat64_t, ++ z0 = svdupq_lane_f64 (z0, 1), ++ z0 = svdupq_lane (z0, 1)) ++ ++/* ++** dupq_lane_2_f64: ++** dup z0\.q, z0\.q\[2\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_2_f64, svfloat64_t, ++ z0 = svdupq_lane_f64 (z0, 2), ++ z0 = svdupq_lane (z0, 2)) ++ ++/* ++** dupq_lane_3_f64: ++** dup z0\.q, z0\.q\[3\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_3_f64, svfloat64_t, ++ z0 = svdupq_lane_f64 (z0, 3), ++ z0 = svdupq_lane (z0, 3)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_s16.c +new file mode 100644 +index 000000000..08a0510be +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_s16.c +@@ -0,0 +1,48 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dupq_lane_0_s16_tied: ++** dup z0\.q, z0\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_0_s16_tied, svint16_t, ++ z0 = svdupq_lane_s16 (z0, 0), ++ z0 = svdupq_lane (z0, 0)) ++ ++/* ++** dupq_lane_0_s16_untied: ++** dup z0\.q, z1\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_0_s16_untied, svint16_t, ++ z0 = svdupq_lane_s16 (z1, 0), ++ z0 = svdupq_lane (z1, 0)) ++ ++/* ++** dupq_lane_1_s16: ++** dup z0\.q, z0\.q\[1\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_1_s16, svint16_t, ++ z0 = svdupq_lane_s16 (z0, 1), ++ z0 = svdupq_lane (z0, 1)) ++ ++/* ++** dupq_lane_2_s16: ++** dup z0\.q, z0\.q\[2\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_2_s16, svint16_t, ++ z0 = svdupq_lane_s16 (z0, 2), ++ z0 = svdupq_lane (z0, 2)) ++ ++/* ++** dupq_lane_3_s16: ++** dup z0\.q, z0\.q\[3\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_3_s16, svint16_t, ++ z0 = svdupq_lane_s16 (z0, 3), ++ z0 = svdupq_lane (z0, 3)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_s32.c +new file mode 100644 +index 000000000..e9a9c9a60 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_s32.c +@@ -0,0 +1,48 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dupq_lane_0_s32_tied: ++** dup z0\.q, z0\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_0_s32_tied, svint32_t, ++ z0 = svdupq_lane_s32 (z0, 0), ++ z0 = svdupq_lane (z0, 0)) ++ ++/* ++** dupq_lane_0_s32_untied: ++** dup z0\.q, z1\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_0_s32_untied, svint32_t, ++ z0 = svdupq_lane_s32 (z1, 0), ++ z0 = svdupq_lane (z1, 0)) ++ ++/* ++** dupq_lane_1_s32: ++** dup z0\.q, z0\.q\[1\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_1_s32, svint32_t, ++ z0 = svdupq_lane_s32 (z0, 1), ++ z0 = svdupq_lane (z0, 1)) ++ ++/* ++** dupq_lane_2_s32: ++** dup z0\.q, z0\.q\[2\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_2_s32, svint32_t, ++ z0 = svdupq_lane_s32 (z0, 2), ++ z0 = svdupq_lane (z0, 2)) ++ ++/* ++** dupq_lane_3_s32: ++** dup z0\.q, z0\.q\[3\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_3_s32, svint32_t, ++ z0 = svdupq_lane_s32 (z0, 3), ++ z0 = svdupq_lane (z0, 3)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_s64.c +new file mode 100644 +index 000000000..2c6342149 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_s64.c +@@ -0,0 +1,48 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dupq_lane_0_s64_tied: ++** dup z0\.q, z0\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_0_s64_tied, svint64_t, ++ z0 = svdupq_lane_s64 (z0, 0), ++ z0 = svdupq_lane (z0, 0)) ++ ++/* ++** dupq_lane_0_s64_untied: ++** dup z0\.q, z1\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_0_s64_untied, svint64_t, ++ z0 = svdupq_lane_s64 (z1, 0), ++ z0 = svdupq_lane (z1, 0)) ++ ++/* ++** dupq_lane_1_s64: ++** dup z0\.q, z0\.q\[1\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_1_s64, svint64_t, ++ z0 = svdupq_lane_s64 (z0, 1), ++ z0 = svdupq_lane (z0, 1)) ++ ++/* ++** dupq_lane_2_s64: ++** dup z0\.q, z0\.q\[2\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_2_s64, svint64_t, ++ z0 = svdupq_lane_s64 (z0, 2), ++ z0 = svdupq_lane (z0, 2)) ++ ++/* ++** dupq_lane_3_s64: ++** dup z0\.q, z0\.q\[3\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_3_s64, svint64_t, ++ z0 = svdupq_lane_s64 (z0, 3), ++ z0 = svdupq_lane (z0, 3)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_s8.c +new file mode 100644 +index 000000000..2c2e6ee72 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_s8.c +@@ -0,0 +1,48 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dupq_lane_0_s8_tied: ++** dup z0\.q, z0\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_0_s8_tied, svint8_t, ++ z0 = svdupq_lane_s8 (z0, 0), ++ z0 = svdupq_lane (z0, 0)) ++ ++/* ++** dupq_lane_0_s8_untied: ++** dup z0\.q, z1\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_0_s8_untied, svint8_t, ++ z0 = svdupq_lane_s8 (z1, 0), ++ z0 = svdupq_lane (z1, 0)) ++ ++/* ++** dupq_lane_1_s8: ++** dup z0\.q, z0\.q\[1\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_1_s8, svint8_t, ++ z0 = svdupq_lane_s8 (z0, 1), ++ z0 = svdupq_lane (z0, 1)) ++ ++/* ++** dupq_lane_2_s8: ++** dup z0\.q, z0\.q\[2\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_2_s8, svint8_t, ++ z0 = svdupq_lane_s8 (z0, 2), ++ z0 = svdupq_lane (z0, 2)) ++ ++/* ++** dupq_lane_3_s8: ++** dup z0\.q, z0\.q\[3\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_3_s8, svint8_t, ++ z0 = svdupq_lane_s8 (z0, 3), ++ z0 = svdupq_lane (z0, 3)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_u16.c +new file mode 100644 +index 000000000..e5fba592f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_u16.c +@@ -0,0 +1,48 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dupq_lane_0_u16_tied: ++** dup z0\.q, z0\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_0_u16_tied, svuint16_t, ++ z0 = svdupq_lane_u16 (z0, 0), ++ z0 = svdupq_lane (z0, 0)) ++ ++/* ++** dupq_lane_0_u16_untied: ++** dup z0\.q, z1\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_0_u16_untied, svuint16_t, ++ z0 = svdupq_lane_u16 (z1, 0), ++ z0 = svdupq_lane (z1, 0)) ++ ++/* ++** dupq_lane_1_u16: ++** dup z0\.q, z0\.q\[1\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_1_u16, svuint16_t, ++ z0 = svdupq_lane_u16 (z0, 1), ++ z0 = svdupq_lane (z0, 1)) ++ ++/* ++** dupq_lane_2_u16: ++** dup z0\.q, z0\.q\[2\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_2_u16, svuint16_t, ++ z0 = svdupq_lane_u16 (z0, 2), ++ z0 = svdupq_lane (z0, 2)) ++ ++/* ++** dupq_lane_3_u16: ++** dup z0\.q, z0\.q\[3\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_3_u16, svuint16_t, ++ z0 = svdupq_lane_u16 (z0, 3), ++ z0 = svdupq_lane (z0, 3)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_u32.c +new file mode 100644 +index 000000000..fb3346e45 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_u32.c +@@ -0,0 +1,48 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dupq_lane_0_u32_tied: ++** dup z0\.q, z0\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_0_u32_tied, svuint32_t, ++ z0 = svdupq_lane_u32 (z0, 0), ++ z0 = svdupq_lane (z0, 0)) ++ ++/* ++** dupq_lane_0_u32_untied: ++** dup z0\.q, z1\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_0_u32_untied, svuint32_t, ++ z0 = svdupq_lane_u32 (z1, 0), ++ z0 = svdupq_lane (z1, 0)) ++ ++/* ++** dupq_lane_1_u32: ++** dup z0\.q, z0\.q\[1\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_1_u32, svuint32_t, ++ z0 = svdupq_lane_u32 (z0, 1), ++ z0 = svdupq_lane (z0, 1)) ++ ++/* ++** dupq_lane_2_u32: ++** dup z0\.q, z0\.q\[2\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_2_u32, svuint32_t, ++ z0 = svdupq_lane_u32 (z0, 2), ++ z0 = svdupq_lane (z0, 2)) ++ ++/* ++** dupq_lane_3_u32: ++** dup z0\.q, z0\.q\[3\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_3_u32, svuint32_t, ++ z0 = svdupq_lane_u32 (z0, 3), ++ z0 = svdupq_lane (z0, 3)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_u64.c +new file mode 100644 +index 000000000..22f1d5d55 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_u64.c +@@ -0,0 +1,48 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dupq_lane_0_u64_tied: ++** dup z0\.q, z0\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_0_u64_tied, svuint64_t, ++ z0 = svdupq_lane_u64 (z0, 0), ++ z0 = svdupq_lane (z0, 0)) ++ ++/* ++** dupq_lane_0_u64_untied: ++** dup z0\.q, z1\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_0_u64_untied, svuint64_t, ++ z0 = svdupq_lane_u64 (z1, 0), ++ z0 = svdupq_lane (z1, 0)) ++ ++/* ++** dupq_lane_1_u64: ++** dup z0\.q, z0\.q\[1\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_1_u64, svuint64_t, ++ z0 = svdupq_lane_u64 (z0, 1), ++ z0 = svdupq_lane (z0, 1)) ++ ++/* ++** dupq_lane_2_u64: ++** dup z0\.q, z0\.q\[2\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_2_u64, svuint64_t, ++ z0 = svdupq_lane_u64 (z0, 2), ++ z0 = svdupq_lane (z0, 2)) ++ ++/* ++** dupq_lane_3_u64: ++** dup z0\.q, z0\.q\[3\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_3_u64, svuint64_t, ++ z0 = svdupq_lane_u64 (z0, 3), ++ z0 = svdupq_lane (z0, 3)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_u8.c +new file mode 100644 +index 000000000..ba16f836a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_u8.c +@@ -0,0 +1,48 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dupq_lane_0_u8_tied: ++** dup z0\.q, z0\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_0_u8_tied, svuint8_t, ++ z0 = svdupq_lane_u8 (z0, 0), ++ z0 = svdupq_lane (z0, 0)) ++ ++/* ++** dupq_lane_0_u8_untied: ++** dup z0\.q, z1\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_0_u8_untied, svuint8_t, ++ z0 = svdupq_lane_u8 (z1, 0), ++ z0 = svdupq_lane (z1, 0)) ++ ++/* ++** dupq_lane_1_u8: ++** dup z0\.q, z0\.q\[1\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_1_u8, svuint8_t, ++ z0 = svdupq_lane_u8 (z0, 1), ++ z0 = svdupq_lane (z0, 1)) ++ ++/* ++** dupq_lane_2_u8: ++** dup z0\.q, z0\.q\[2\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_2_u8, svuint8_t, ++ z0 = svdupq_lane_u8 (z0, 2), ++ z0 = svdupq_lane (z0, 2)) ++ ++/* ++** dupq_lane_3_u8: ++** dup z0\.q, z0\.q\[3\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_lane_3_u8, svuint8_t, ++ z0 = svdupq_lane_u8 (z0, 3), ++ z0 = svdupq_lane (z0, 3)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_s16.c +new file mode 100644 +index 000000000..5a9a53b2d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_s16.c +@@ -0,0 +1,70 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dupq_25600s_s16: ++** mov z0\.s, #25600 ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_25600s_s16, svint16_t, ++ z0 = svdupq_n_s16 (25600, 0, 25600, 0, 25600, 0, 25600, 0), ++ z0 = svdupq_s16 (25600, 0, 25600, 0, 25600, 0, 25600, 0)) ++ ++/* ++** dupq_7ff00s_s16: ++** mov z0\.s, #524032 ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_7ff00s_s16, svint16_t, ++ z0 = svdupq_n_s16 (0xff00, 7, 0xff00, 7, 0xff00, 7, 0xff00, 7), ++ z0 = svdupq_s16 (0xff00, 7, 0xff00, 7, 0xff00, 7, 0xff00, 7)) ++ ++/* ++** dupq_65536d_s16: ++** mov z0\.d, #65536 ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_65536d_s16, svint16_t, ++ z0 = svdupq_n_s16 (0, 1, 0, 0, 0, 1, 0, 0), ++ z0 = svdupq_s16 (0, 1, 0, 0, 0, 1, 0, 0)) ++ ++/* ++** dupq_m2d_s16: ++** mov z0\.d, #-2 ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_m2d_s16, svint16_t, ++ z0 = svdupq_n_s16 (-2, -1, -1, -1, -2, -1, -1, -1), ++ z0 = svdupq_s16 (-2, -1, -1, -1, -2, -1, -1, -1)) ++ ++/* ++** dupq_4ddb_s16: ++** movi v([0-9]+)\.2d, 0xff0000ffff00ff ++** dup z0\.q, z\1\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_4ddb_s16, svint16_t, ++ z0 = svdupq_n_s16 (0xff, -1, 0, 0xff, 0xff, -1, 0, 0xff), ++ z0 = svdupq_s16 (0xff, -1, 0, 0xff, 0xff, -1, 0, 0xff)) ++ ++ ++/* ++** dupq_a093s_s16: ++** mov (w[0-9]+), 41107 ++** mov z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_a093s_s16, svint16_t, ++ z0 = svdupq_n_s16 (0xa093, 0, 0xa093, 0, 0xa093, 0, 0xa093, 0), ++ z0 = svdupq_s16 (0xa093, 0, 0xa093, 0, 0xa093, 0, 0xa093, 0)); ++ ++/* ++** dupq_pool_s16: ++** ... ++** ld1rqh z0\.h, p[0-7]/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_pool_s16, svint16_t, ++ z0 = svdupq_n_s16 (4, 10, 9, 77, 52, 22, 19, 50), ++ z0 = svdupq_s16 (4, 10, 9, 77, 52, 22, 19, 50)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_s32.c +new file mode 100644 +index 000000000..13b24c0db +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_s32.c +@@ -0,0 +1,61 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dupq_12800d_s32: ++** mov z0\.d, #12800 ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_12800d_s32, svint32_t, ++ z0 = svdupq_n_s32 (12800, 0, 12800, 0), ++ z0 = svdupq_s32 (12800, 0, 12800, 0)) ++ ++/* ++** dupq_fffffffed_s32: ++** mov z0\.d, #4294967294 ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_fffffffed_s32, svint32_t, ++ z0 = svdupq_n_s32 (-2, 0, -2, 0), ++ z0 = svdupq_s32 (-2, 0, -2, 0)) ++ ++/* ++** dupq_ff00ffffff00d_s32: ++** movi v([0-9]+)\.2d, 0xff00ffffff00 ++** dup z0\.q, z\1\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_ff00ffffff00d_s32, svint32_t, ++ z0 = svdupq_n_s32 (-256, 0xff00, -256, 0xff00), ++ z0 = svdupq_s32 (-256, 0xff00, -256, 0xff00)) ++ ++/* ++** dupq_fedcd_s32: ++** mov (x[0-9]+), 65244 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_fedcd_s32, svint32_t, ++ z0 = svdupq_n_s32 (0xfedc, 0, 0xfedc, 0), ++ z0 = svdupq_s32 (0xfedc, 0, 0xfedc, 0)) ++ ++/* ++** dupq_1357ud_s32: ++** mov (x[0-9]+), 21264383082496 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_1357ud_s32, svint32_t, ++ z0 = svdupq_n_s32 (0, 0x1357, 0, 0x1357), ++ z0 = svdupq_s32 (0, 0x1357, 0, 0x1357)) ++ ++/* ++** dupq_pool_s32: ++** ... ++** ld1rqw z0\.s, p[0-7]/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_pool_s32, svint32_t, ++ z0 = svdupq_n_s32 (4, 10, 9, 77), ++ z0 = svdupq_s32 (4, 10, 9, 77)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_s64.c +new file mode 100644 +index 000000000..d2689fa5c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_s64.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dupq_pool_s64: ++** ... ++** ld1rqd z0\.d, p[0-7]/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_pool_s64, svint64_t, ++ z0 = svdupq_n_s64 (4, 10), ++ z0 = svdupq_s64 (4, 10)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_s8.c +new file mode 100644 +index 000000000..30b36c162 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_s8.c +@@ -0,0 +1,99 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dupq_54h_s8: ++** mov z0\.h, #54 ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_54h_s8, svint8_t, ++ z0 = svdupq_n_s8 (54, 0, 54, 0, 54, 0, 54, 0, ++ 54, 0, 54, 0, 54, 0, 54, 0), ++ z0 = svdupq_s8 (54, 0, 54, 0, 54, 0, 54, 0, ++ 54, 0, 54, 0, 54, 0, 54, 0)) ++ ++/* ++** dupq_2560h_s8: ++** mov z0\.h, #2560 ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_2560h_s8, svint8_t, ++ z0 = svdupq_n_s8 (0, 10, 0, 10, 0, 10, 0, 10, ++ 0, 10, 0, 10, 0, 10, 0, 10), ++ z0 = svdupq_s8 (0, 10, 0, 10, 0, 10, 0, 10, ++ 0, 10, 0, 10, 0, 10, 0, 10)) ++ ++/* ++** dupq_5120s_s8: ++** mov z0\.s, #5120 ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_5120s_s8, svint8_t, ++ z0 = svdupq_n_s8 (0, 20, 0, 0, 0, 20, 0, 0, ++ 0, 20, 0, 0, 0, 20, 0, 0), ++ z0 = svdupq_s8 (0, 20, 0, 0, 0, 20, 0, 0, ++ 0, 20, 0, 0, 0, 20, 0, 0)) ++ ++/* ++** dupq_1ff00s_s8: ++** mov z0\.s, #130816 ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_1ff00s_s8, svint8_t, ++ z0 = svdupq_n_s8 (0, -1, 1, 0, 0, -1, 1, 0, ++ 0, -1, 1, 0, 0, -1, 1, 0), ++ z0 = svdupq_s8 (0, -1, 1, 0, 0, -1, 1, 0, ++ 0, -1, 1, 0, 0, -1, 1, 0)) ++ ++/* ++** dupq_96db_s8: ++** movi v([0-9]+)\.2d, 0xff0000ff00ffff00 ++** dup z0\.q, z\1\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_96db_s8, svint8_t, ++ z0 = svdupq_n_s8 (0, -1, -1, 0, -1, 0, 0, -1, ++ 0, -1, -1, 0, -1, 0, 0, -1), ++ z0 = svdupq_s8 (0, -1, -1, 0, -1, 0, 0, -1, ++ 0, -1, -1, 0, -1, 0, 0, -1)) ++ ++/* ++** dupq_7755h_s8: ++** mov (w[0-9]+), 21879 ++** mov z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_7755h_s8, svint8_t, ++ z0 = svdupq_n_s8 (0x77, 0x55, 0x77, 0x55, ++ 0x77, 0x55, 0x77, 0x55, ++ 0x77, 0x55, 0x77, 0x55, ++ 0x77, 0x55, 0x77, 0x55), ++ z0 = svdupq_s8 (0x77, 0x55, 0x77, 0x55, ++ 0x77, 0x55, 0x77, 0x55, ++ 0x77, 0x55, 0x77, 0x55, ++ 0x77, 0x55, 0x77, 0x55)) ++ ++/* ++** dupq_729a0000s_s8: ++** mov (w[0-9]+), 1922695168 ++** mov z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_729a0000s_s8, svint8_t, ++ z0 = svdupq_n_s8 (0, 0, 0x9a, 0x72, 0, 0, 0x9a, 0x72, ++ 0, 0, 0x9a, 0x72, 0, 0, 0x9a, 0x72), ++ z0 = svdupq_s8 (0, 0, 0x9a, 0x72, 0, 0, 0x9a, 0x72, ++ 0, 0, 0x9a, 0x72, 0, 0, 0x9a, 0x72)) ++ ++/* ++** dupq_pool_s8: ++** ... ++** ld1rqb z0\.b, p[0-7]/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_pool_s8, svint8_t, ++ z0 = svdupq_n_s8 (4, 10, 9, 77, 52, 22, 19, 50, ++ -1, 32, 44, 17, 23, 99, 53, 39), ++ z0 = svdupq_s8 (4, 10, 9, 77, 52, 22, 19, 50, ++ -1, 32, 44, 17, 23, 99, 53, 39)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_u16.c +new file mode 100644 +index 000000000..6ca13222d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_u16.c +@@ -0,0 +1,70 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dupq_25600s_u16: ++** mov z0\.s, #25600 ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_25600s_u16, svuint16_t, ++ z0 = svdupq_n_u16 (25600, 0, 25600, 0, 25600, 0, 25600, 0), ++ z0 = svdupq_u16 (25600, 0, 25600, 0, 25600, 0, 25600, 0)) ++ ++/* ++** dupq_7ff00s_u16: ++** mov z0\.s, #524032 ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_7ff00s_u16, svuint16_t, ++ z0 = svdupq_n_u16 (0xff00, 7, 0xff00, 7, 0xff00, 7, 0xff00, 7), ++ z0 = svdupq_u16 (0xff00, 7, 0xff00, 7, 0xff00, 7, 0xff00, 7)) ++ ++/* ++** dupq_65536d_u16: ++** mov z0\.d, #65536 ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_65536d_u16, svuint16_t, ++ z0 = svdupq_n_u16 (0, 1, 0, 0, 0, 1, 0, 0), ++ z0 = svdupq_u16 (0, 1, 0, 0, 0, 1, 0, 0)) ++ ++/* ++** dupq_m2d_u16: ++** mov z0\.d, #-2 ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_m2d_u16, svuint16_t, ++ z0 = svdupq_n_u16 (-2, -1, -1, -1, -2, -1, -1, -1), ++ z0 = svdupq_u16 (-2, -1, -1, -1, -2, -1, -1, -1)) ++ ++/* ++** dupq_4ddb_u16: ++** movi v([0-9]+)\.2d, 0xff0000ffff00ff ++** dup z0\.q, z\1\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_4ddb_u16, svuint16_t, ++ z0 = svdupq_n_u16 (0xff, -1, 0, 0xff, 0xff, -1, 0, 0xff), ++ z0 = svdupq_u16 (0xff, -1, 0, 0xff, 0xff, -1, 0, 0xff)) ++ ++ ++/* ++** dupq_a093s_u16: ++** mov (w[0-9]+), 41107 ++** mov z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_a093s_u16, svuint16_t, ++ z0 = svdupq_n_u16 (0xa093, 0, 0xa093, 0, 0xa093, 0, 0xa093, 0), ++ z0 = svdupq_u16 (0xa093, 0, 0xa093, 0, 0xa093, 0, 0xa093, 0)); ++ ++/* ++** dupq_pool_u16: ++** ... ++** ld1rqh z0\.h, p[0-7]/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_pool_u16, svuint16_t, ++ z0 = svdupq_n_u16 (4, 10, 9, 77, 52, 22, 19, 50), ++ z0 = svdupq_u16 (4, 10, 9, 77, 52, 22, 19, 50)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_u32.c +new file mode 100644 +index 000000000..3669bf8a1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_u32.c +@@ -0,0 +1,61 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dupq_12800d_u32: ++** mov z0\.d, #12800 ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_12800d_u32, svuint32_t, ++ z0 = svdupq_n_u32 (12800, 0, 12800, 0), ++ z0 = svdupq_u32 (12800, 0, 12800, 0)) ++ ++/* ++** dupq_fffffffed_u32: ++** mov z0\.d, #4294967294 ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_fffffffed_u32, svuint32_t, ++ z0 = svdupq_n_u32 (-2, 0, -2, 0), ++ z0 = svdupq_u32 (-2, 0, -2, 0)) ++ ++/* ++** dupq_ff00ffffff00d_u32: ++** movi v([0-9]+)\.2d, 0xff00ffffff00 ++** dup z0\.q, z\1\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_ff00ffffff00d_u32, svuint32_t, ++ z0 = svdupq_n_u32 (-256, 0xff00, -256, 0xff00), ++ z0 = svdupq_u32 (-256, 0xff00, -256, 0xff00)) ++ ++/* ++** dupq_fedcd_u32: ++** mov (x[0-9]+), 65244 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_fedcd_u32, svuint32_t, ++ z0 = svdupq_n_u32 (0xfedc, 0, 0xfedc, 0), ++ z0 = svdupq_u32 (0xfedc, 0, 0xfedc, 0)) ++ ++/* ++** dupq_1357ud_u32: ++** mov (x[0-9]+), 21264383082496 ++** mov z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_1357ud_u32, svuint32_t, ++ z0 = svdupq_n_u32 (0, 0x1357, 0, 0x1357), ++ z0 = svdupq_u32 (0, 0x1357, 0, 0x1357)) ++ ++/* ++** dupq_pool_u32: ++** ... ++** ld1rqw z0\.s, p[0-7]/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_pool_u32, svuint32_t, ++ z0 = svdupq_n_u32 (4, 10, 9, 77), ++ z0 = svdupq_u32 (4, 10, 9, 77)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_u64.c +new file mode 100644 +index 000000000..cb655a15a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_u64.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dupq_pool_u64: ++** ... ++** ld1rqd z0\.d, p[0-7]/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_pool_u64, svuint64_t, ++ z0 = svdupq_n_u64 (4, 10), ++ z0 = svdupq_u64 (4, 10)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_u8.c +new file mode 100644 +index 000000000..8b40c2b41 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_u8.c +@@ -0,0 +1,99 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** dupq_54h_u8: ++** mov z0\.h, #54 ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_54h_u8, svuint8_t, ++ z0 = svdupq_n_u8 (54, 0, 54, 0, 54, 0, 54, 0, ++ 54, 0, 54, 0, 54, 0, 54, 0), ++ z0 = svdupq_u8 (54, 0, 54, 0, 54, 0, 54, 0, ++ 54, 0, 54, 0, 54, 0, 54, 0)) ++ ++/* ++** dupq_2560h_u8: ++** mov z0\.h, #2560 ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_2560h_u8, svuint8_t, ++ z0 = svdupq_n_u8 (0, 10, 0, 10, 0, 10, 0, 10, ++ 0, 10, 0, 10, 0, 10, 0, 10), ++ z0 = svdupq_u8 (0, 10, 0, 10, 0, 10, 0, 10, ++ 0, 10, 0, 10, 0, 10, 0, 10)) ++ ++/* ++** dupq_5120s_u8: ++** mov z0\.s, #5120 ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_5120s_u8, svuint8_t, ++ z0 = svdupq_n_u8 (0, 20, 0, 0, 0, 20, 0, 0, ++ 0, 20, 0, 0, 0, 20, 0, 0), ++ z0 = svdupq_u8 (0, 20, 0, 0, 0, 20, 0, 0, ++ 0, 20, 0, 0, 0, 20, 0, 0)) ++ ++/* ++** dupq_1ff00s_u8: ++** mov z0\.s, #130816 ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_1ff00s_u8, svuint8_t, ++ z0 = svdupq_n_u8 (0, -1, 1, 0, 0, -1, 1, 0, ++ 0, -1, 1, 0, 0, -1, 1, 0), ++ z0 = svdupq_u8 (0, -1, 1, 0, 0, -1, 1, 0, ++ 0, -1, 1, 0, 0, -1, 1, 0)) ++ ++/* ++** dupq_96db_u8: ++** movi v([0-9]+)\.2d, 0xff0000ff00ffff00 ++** dup z0\.q, z\1\.q\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_96db_u8, svuint8_t, ++ z0 = svdupq_n_u8 (0, -1, -1, 0, -1, 0, 0, -1, ++ 0, -1, -1, 0, -1, 0, 0, -1), ++ z0 = svdupq_u8 (0, -1, -1, 0, -1, 0, 0, -1, ++ 0, -1, -1, 0, -1, 0, 0, -1)) ++ ++/* ++** dupq_7755h_u8: ++** mov (w[0-9]+), 21879 ++** mov z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_7755h_u8, svuint8_t, ++ z0 = svdupq_n_u8 (0x77, 0x55, 0x77, 0x55, ++ 0x77, 0x55, 0x77, 0x55, ++ 0x77, 0x55, 0x77, 0x55, ++ 0x77, 0x55, 0x77, 0x55), ++ z0 = svdupq_u8 (0x77, 0x55, 0x77, 0x55, ++ 0x77, 0x55, 0x77, 0x55, ++ 0x77, 0x55, 0x77, 0x55, ++ 0x77, 0x55, 0x77, 0x55)) ++ ++/* ++** dupq_729a0000s_u8: ++** mov (w[0-9]+), 1922695168 ++** mov z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_729a0000s_u8, svuint8_t, ++ z0 = svdupq_n_u8 (0, 0, 0x9a, 0x72, 0, 0, 0x9a, 0x72, ++ 0, 0, 0x9a, 0x72, 0, 0, 0x9a, 0x72), ++ z0 = svdupq_u8 (0, 0, 0x9a, 0x72, 0, 0, 0x9a, 0x72, ++ 0, 0, 0x9a, 0x72, 0, 0, 0x9a, 0x72)) ++ ++/* ++** dupq_pool_u8: ++** ... ++** ld1rqb z0\.b, p[0-7]/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_UNIFORM_Z (dupq_pool_u8, svuint8_t, ++ z0 = svdupq_n_u8 (4, 10, 9, 77, 52, 22, 19, 50, ++ -1, 32, 44, 17, 23, 99, 53, 39), ++ z0 = svdupq_u8 (4, 10, 9, 77, 52, 22, 19, 50, ++ -1, 32, 44, 17, 23, 99, 53, 39)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_b.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_b.c +new file mode 100644 +index 000000000..961ae84c0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_b.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** eor_b_z_tied1: ++** eor p0\.b, p3/z, (p0\.b, p1\.b|p1\.b, p0\.b) ++** ret ++*/ ++TEST_UNIFORM_P (eor_b_z_tied1, ++ p0 = sveor_b_z (p3, p0, p1), ++ p0 = sveor_z (p3, p0, p1)) ++ ++/* ++** eor_b_z_tied2: ++** eor p0\.b, p3/z, (p0\.b, p1\.b|p1\.b, p0\.b) ++** ret ++*/ ++TEST_UNIFORM_P (eor_b_z_tied2, ++ p0 = sveor_b_z (p3, p1, p0), ++ p0 = sveor_z (p3, p1, p0)) ++ ++/* ++** eor_b_z_untied: ++** eor p0\.b, p3/z, (p1\.b, p2\.b|p2\.b, p1\.b) ++** ret ++*/ ++TEST_UNIFORM_P (eor_b_z_untied, ++ p0 = sveor_b_z (p3, p1, p2), ++ p0 = sveor_z (p3, p1, p2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_s16.c +new file mode 100644 +index 000000000..7cf73609a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_s16.c +@@ -0,0 +1,376 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** eor_s16_m_tied1: ++** eor z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (eor_s16_m_tied1, svint16_t, ++ z0 = sveor_s16_m (p0, z0, z1), ++ z0 = sveor_m (p0, z0, z1)) ++ ++/* ++** eor_s16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** eor z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (eor_s16_m_tied2, svint16_t, ++ z0 = sveor_s16_m (p0, z1, z0), ++ z0 = sveor_m (p0, z1, z0)) ++ ++/* ++** eor_s16_m_untied: ++** movprfx z0, z1 ++** eor z0\.h, p0/m, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (eor_s16_m_untied, svint16_t, ++ z0 = sveor_s16_m (p0, z1, z2), ++ z0 = sveor_m (p0, z1, z2)) ++ ++/* ++** eor_w0_s16_m_tied1: ++** mov (z[0-9]+\.h), w0 ++** eor z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (eor_w0_s16_m_tied1, svint16_t, int16_t, ++ z0 = sveor_n_s16_m (p0, z0, x0), ++ z0 = sveor_m (p0, z0, x0)) ++ ++/* ++** eor_w0_s16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), w0 ++** movprfx z0, z1 ++** eor z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (eor_w0_s16_m_untied, svint16_t, int16_t, ++ z0 = sveor_n_s16_m (p0, z1, x0), ++ z0 = sveor_m (p0, z1, x0)) ++ ++/* ++** eor_1_s16_m_tied1: ++** mov (z[0-9]+\.h), #1 ++** eor z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_1_s16_m_tied1, svint16_t, ++ z0 = sveor_n_s16_m (p0, z0, 1), ++ z0 = sveor_m (p0, z0, 1)) ++ ++/* ++** eor_1_s16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), #1 ++** movprfx z0, z1 ++** eor z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_1_s16_m_untied, svint16_t, ++ z0 = sveor_n_s16_m (p0, z1, 1), ++ z0 = sveor_m (p0, z1, 1)) ++ ++/* ++** eor_m2_s16_m: ++** mov (z[0-9]+\.h), #-2 ++** eor z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m2_s16_m, svint16_t, ++ z0 = sveor_n_s16_m (p0, z0, -2), ++ z0 = sveor_m (p0, z0, -2)) ++ ++/* ++** eor_s16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** eor z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (eor_s16_z_tied1, svint16_t, ++ z0 = sveor_s16_z (p0, z0, z1), ++ z0 = sveor_z (p0, z0, z1)) ++ ++/* ++** eor_s16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** eor z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (eor_s16_z_tied2, svint16_t, ++ z0 = sveor_s16_z (p0, z1, z0), ++ z0 = sveor_z (p0, z1, z0)) ++ ++/* ++** eor_s16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** eor z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** eor z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (eor_s16_z_untied, svint16_t, ++ z0 = sveor_s16_z (p0, z1, z2), ++ z0 = sveor_z (p0, z1, z2)) ++ ++/* ++** eor_w0_s16_z_tied1: ++** mov (z[0-9]+\.h), w0 ++** movprfx z0\.h, p0/z, z0\.h ++** eor z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (eor_w0_s16_z_tied1, svint16_t, int16_t, ++ z0 = sveor_n_s16_z (p0, z0, x0), ++ z0 = sveor_z (p0, z0, x0)) ++ ++/* ++** eor_w0_s16_z_untied: ++** mov (z[0-9]+\.h), w0 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** eor z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** eor z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (eor_w0_s16_z_untied, svint16_t, int16_t, ++ z0 = sveor_n_s16_z (p0, z1, x0), ++ z0 = sveor_z (p0, z1, x0)) ++ ++/* ++** eor_1_s16_z_tied1: ++** mov (z[0-9]+\.h), #1 ++** movprfx z0\.h, p0/z, z0\.h ++** eor z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_1_s16_z_tied1, svint16_t, ++ z0 = sveor_n_s16_z (p0, z0, 1), ++ z0 = sveor_z (p0, z0, 1)) ++ ++/* ++** eor_1_s16_z_untied: ++** mov (z[0-9]+\.h), #1 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** eor z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** eor z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (eor_1_s16_z_untied, svint16_t, ++ z0 = sveor_n_s16_z (p0, z1, 1), ++ z0 = sveor_z (p0, z1, 1)) ++ ++/* ++** eor_s16_x_tied1: ++** eor z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (eor_s16_x_tied1, svint16_t, ++ z0 = sveor_s16_x (p0, z0, z1), ++ z0 = sveor_x (p0, z0, z1)) ++ ++/* ++** eor_s16_x_tied2: ++** eor z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (eor_s16_x_tied2, svint16_t, ++ z0 = sveor_s16_x (p0, z1, z0), ++ z0 = sveor_x (p0, z1, z0)) ++ ++/* ++** eor_s16_x_untied: ++** eor z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (eor_s16_x_untied, svint16_t, ++ z0 = sveor_s16_x (p0, z1, z2), ++ z0 = sveor_x (p0, z1, z2)) ++ ++/* ++** eor_w0_s16_x_tied1: ++** mov (z[0-9]+)\.h, w0 ++** eor z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (eor_w0_s16_x_tied1, svint16_t, int16_t, ++ z0 = sveor_n_s16_x (p0, z0, x0), ++ z0 = sveor_x (p0, z0, x0)) ++ ++/* ++** eor_w0_s16_x_untied: ++** mov (z[0-9]+)\.h, w0 ++** eor z0\.d, (z1\.d, \1\.d|\1\.d, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (eor_w0_s16_x_untied, svint16_t, int16_t, ++ z0 = sveor_n_s16_x (p0, z1, x0), ++ z0 = sveor_x (p0, z1, x0)) ++ ++/* ++** eor_1_s16_x_tied1: ++** eor z0\.h, z0\.h, #0x1 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_1_s16_x_tied1, svint16_t, ++ z0 = sveor_n_s16_x (p0, z0, 1), ++ z0 = sveor_x (p0, z0, 1)) ++ ++/* ++** eor_1_s16_x_untied: ++** movprfx z0, z1 ++** eor z0\.h, z0\.h, #0x1 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_1_s16_x_untied, svint16_t, ++ z0 = sveor_n_s16_x (p0, z1, 1), ++ z0 = sveor_x (p0, z1, 1)) ++ ++/* ++** eor_127_s16_x: ++** eor z0\.h, z0\.h, #0x7f ++** ret ++*/ ++TEST_UNIFORM_Z (eor_127_s16_x, svint16_t, ++ z0 = sveor_n_s16_x (p0, z0, 127), ++ z0 = sveor_x (p0, z0, 127)) ++ ++/* ++** eor_128_s16_x: ++** eor z0\.h, z0\.h, #0x80 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_128_s16_x, svint16_t, ++ z0 = sveor_n_s16_x (p0, z0, 128), ++ z0 = sveor_x (p0, z0, 128)) ++ ++/* ++** eor_255_s16_x: ++** eor z0\.h, z0\.h, #0xff ++** ret ++*/ ++TEST_UNIFORM_Z (eor_255_s16_x, svint16_t, ++ z0 = sveor_n_s16_x (p0, z0, 255), ++ z0 = sveor_x (p0, z0, 255)) ++ ++/* ++** eor_256_s16_x: ++** eor z0\.h, z0\.h, #0x100 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_256_s16_x, svint16_t, ++ z0 = sveor_n_s16_x (p0, z0, 256), ++ z0 = sveor_x (p0, z0, 256)) ++ ++/* ++** eor_257_s16_x: ++** eor z0\.h, z0\.h, #0x101 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_257_s16_x, svint16_t, ++ z0 = sveor_n_s16_x (p0, z0, 257), ++ z0 = sveor_x (p0, z0, 257)) ++ ++/* ++** eor_512_s16_x: ++** eor z0\.h, z0\.h, #0x200 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_512_s16_x, svint16_t, ++ z0 = sveor_n_s16_x (p0, z0, 512), ++ z0 = sveor_x (p0, z0, 512)) ++ ++/* ++** eor_65280_s16_x: ++** eor z0\.h, z0\.h, #0xff00 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_65280_s16_x, svint16_t, ++ z0 = sveor_n_s16_x (p0, z0, 0xff00), ++ z0 = sveor_x (p0, z0, 0xff00)) ++ ++/* ++** eor_m127_s16_x: ++** eor z0\.h, z0\.h, #0xff81 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m127_s16_x, svint16_t, ++ z0 = sveor_n_s16_x (p0, z0, -127), ++ z0 = sveor_x (p0, z0, -127)) ++ ++/* ++** eor_m128_s16_x: ++** eor z0\.h, z0\.h, #0xff80 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m128_s16_x, svint16_t, ++ z0 = sveor_n_s16_x (p0, z0, -128), ++ z0 = sveor_x (p0, z0, -128)) ++ ++/* ++** eor_m255_s16_x: ++** eor z0\.h, z0\.h, #0xff01 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m255_s16_x, svint16_t, ++ z0 = sveor_n_s16_x (p0, z0, -255), ++ z0 = sveor_x (p0, z0, -255)) ++ ++/* ++** eor_m256_s16_x: ++** eor z0\.h, z0\.h, #0xff00 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m256_s16_x, svint16_t, ++ z0 = sveor_n_s16_x (p0, z0, -256), ++ z0 = sveor_x (p0, z0, -256)) ++ ++/* ++** eor_m257_s16_x: ++** eor z0\.h, z0\.h, #0xfeff ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m257_s16_x, svint16_t, ++ z0 = sveor_n_s16_x (p0, z0, -257), ++ z0 = sveor_x (p0, z0, -257)) ++ ++/* ++** eor_m512_s16_x: ++** eor z0\.h, z0\.h, #0xfe00 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m512_s16_x, svint16_t, ++ z0 = sveor_n_s16_x (p0, z0, -512), ++ z0 = sveor_x (p0, z0, -512)) ++ ++/* ++** eor_m32768_s16_x: ++** eor z0\.h, z0\.h, #0x8000 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m32768_s16_x, svint16_t, ++ z0 = sveor_n_s16_x (p0, z0, -0x8000), ++ z0 = sveor_x (p0, z0, -0x8000)) ++ ++/* ++** eor_5_s16_x: ++** mov (z[0-9]+)\.h, #5 ++** eor z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (eor_5_s16_x, svint16_t, ++ z0 = sveor_n_s16_x (p0, z0, 5), ++ z0 = sveor_x (p0, z0, 5)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_s32.c +new file mode 100644 +index 000000000..d5aecb201 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_s32.c +@@ -0,0 +1,372 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** eor_s32_m_tied1: ++** eor z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (eor_s32_m_tied1, svint32_t, ++ z0 = sveor_s32_m (p0, z0, z1), ++ z0 = sveor_m (p0, z0, z1)) ++ ++/* ++** eor_s32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** eor z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (eor_s32_m_tied2, svint32_t, ++ z0 = sveor_s32_m (p0, z1, z0), ++ z0 = sveor_m (p0, z1, z0)) ++ ++/* ++** eor_s32_m_untied: ++** movprfx z0, z1 ++** eor z0\.s, p0/m, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (eor_s32_m_untied, svint32_t, ++ z0 = sveor_s32_m (p0, z1, z2), ++ z0 = sveor_m (p0, z1, z2)) ++ ++/* ++** eor_w0_s32_m_tied1: ++** mov (z[0-9]+\.s), w0 ++** eor z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (eor_w0_s32_m_tied1, svint32_t, int32_t, ++ z0 = sveor_n_s32_m (p0, z0, x0), ++ z0 = sveor_m (p0, z0, x0)) ++ ++/* ++** eor_w0_s32_m_untied: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0, z1 ++** eor z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (eor_w0_s32_m_untied, svint32_t, int32_t, ++ z0 = sveor_n_s32_m (p0, z1, x0), ++ z0 = sveor_m (p0, z1, x0)) ++ ++/* ++** eor_1_s32_m_tied1: ++** mov (z[0-9]+\.s), #1 ++** eor z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_1_s32_m_tied1, svint32_t, ++ z0 = sveor_n_s32_m (p0, z0, 1), ++ z0 = sveor_m (p0, z0, 1)) ++ ++/* ++** eor_1_s32_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.s), #1 ++** movprfx z0, z1 ++** eor z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_1_s32_m_untied, svint32_t, ++ z0 = sveor_n_s32_m (p0, z1, 1), ++ z0 = sveor_m (p0, z1, 1)) ++ ++/* ++** eor_m2_s32_m: ++** mov (z[0-9]+\.s), #-2 ++** eor z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m2_s32_m, svint32_t, ++ z0 = sveor_n_s32_m (p0, z0, -2), ++ z0 = sveor_m (p0, z0, -2)) ++ ++/* ++** eor_s32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** eor z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (eor_s32_z_tied1, svint32_t, ++ z0 = sveor_s32_z (p0, z0, z1), ++ z0 = sveor_z (p0, z0, z1)) ++ ++/* ++** eor_s32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** eor z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (eor_s32_z_tied2, svint32_t, ++ z0 = sveor_s32_z (p0, z1, z0), ++ z0 = sveor_z (p0, z1, z0)) ++ ++/* ++** eor_s32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** eor z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** eor z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (eor_s32_z_untied, svint32_t, ++ z0 = sveor_s32_z (p0, z1, z2), ++ z0 = sveor_z (p0, z1, z2)) ++ ++/* ++** eor_w0_s32_z_tied1: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0\.s, p0/z, z0\.s ++** eor z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (eor_w0_s32_z_tied1, svint32_t, int32_t, ++ z0 = sveor_n_s32_z (p0, z0, x0), ++ z0 = sveor_z (p0, z0, x0)) ++ ++/* ++** eor_w0_s32_z_untied: ++** mov (z[0-9]+\.s), w0 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** eor z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** eor z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (eor_w0_s32_z_untied, svint32_t, int32_t, ++ z0 = sveor_n_s32_z (p0, z1, x0), ++ z0 = sveor_z (p0, z1, x0)) ++ ++/* ++** eor_1_s32_z_tied1: ++** mov (z[0-9]+\.s), #1 ++** movprfx z0\.s, p0/z, z0\.s ++** eor z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_1_s32_z_tied1, svint32_t, ++ z0 = sveor_n_s32_z (p0, z0, 1), ++ z0 = sveor_z (p0, z0, 1)) ++ ++/* ++** eor_1_s32_z_untied: ++** mov (z[0-9]+\.s), #1 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** eor z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** eor z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (eor_1_s32_z_untied, svint32_t, ++ z0 = sveor_n_s32_z (p0, z1, 1), ++ z0 = sveor_z (p0, z1, 1)) ++ ++/* ++** eor_s32_x_tied1: ++** eor z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (eor_s32_x_tied1, svint32_t, ++ z0 = sveor_s32_x (p0, z0, z1), ++ z0 = sveor_x (p0, z0, z1)) ++ ++/* ++** eor_s32_x_tied2: ++** eor z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (eor_s32_x_tied2, svint32_t, ++ z0 = sveor_s32_x (p0, z1, z0), ++ z0 = sveor_x (p0, z1, z0)) ++ ++/* ++** eor_s32_x_untied: ++** eor z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (eor_s32_x_untied, svint32_t, ++ z0 = sveor_s32_x (p0, z1, z2), ++ z0 = sveor_x (p0, z1, z2)) ++ ++/* ++** eor_w0_s32_x_tied1: ++** mov (z[0-9]+)\.s, w0 ++** eor z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (eor_w0_s32_x_tied1, svint32_t, int32_t, ++ z0 = sveor_n_s32_x (p0, z0, x0), ++ z0 = sveor_x (p0, z0, x0)) ++ ++/* ++** eor_w0_s32_x_untied: ++** mov (z[0-9]+)\.s, w0 ++** eor z0\.d, (z1\.d, \1\.d|\1\.d, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (eor_w0_s32_x_untied, svint32_t, int32_t, ++ z0 = sveor_n_s32_x (p0, z1, x0), ++ z0 = sveor_x (p0, z1, x0)) ++ ++/* ++** eor_1_s32_x_tied1: ++** eor z0\.s, z0\.s, #0x1 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_1_s32_x_tied1, svint32_t, ++ z0 = sveor_n_s32_x (p0, z0, 1), ++ z0 = sveor_x (p0, z0, 1)) ++ ++/* ++** eor_1_s32_x_untied: ++** movprfx z0, z1 ++** eor z0\.s, z0\.s, #0x1 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_1_s32_x_untied, svint32_t, ++ z0 = sveor_n_s32_x (p0, z1, 1), ++ z0 = sveor_x (p0, z1, 1)) ++ ++/* ++** eor_127_s32_x: ++** eor z0\.s, z0\.s, #0x7f ++** ret ++*/ ++TEST_UNIFORM_Z (eor_127_s32_x, svint32_t, ++ z0 = sveor_n_s32_x (p0, z0, 127), ++ z0 = sveor_x (p0, z0, 127)) ++ ++/* ++** eor_128_s32_x: ++** eor z0\.s, z0\.s, #0x80 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_128_s32_x, svint32_t, ++ z0 = sveor_n_s32_x (p0, z0, 128), ++ z0 = sveor_x (p0, z0, 128)) ++ ++/* ++** eor_255_s32_x: ++** eor z0\.s, z0\.s, #0xff ++** ret ++*/ ++TEST_UNIFORM_Z (eor_255_s32_x, svint32_t, ++ z0 = sveor_n_s32_x (p0, z0, 255), ++ z0 = sveor_x (p0, z0, 255)) ++ ++/* ++** eor_256_s32_x: ++** eor z0\.s, z0\.s, #0x100 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_256_s32_x, svint32_t, ++ z0 = sveor_n_s32_x (p0, z0, 256), ++ z0 = sveor_x (p0, z0, 256)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (eor_257_s32_x, svint32_t, ++ z0 = sveor_n_s32_x (p0, z0, 257), ++ z0 = sveor_x (p0, z0, 257)) ++ ++/* ++** eor_512_s32_x: ++** eor z0\.s, z0\.s, #0x200 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_512_s32_x, svint32_t, ++ z0 = sveor_n_s32_x (p0, z0, 512), ++ z0 = sveor_x (p0, z0, 512)) ++ ++/* ++** eor_65280_s32_x: ++** eor z0\.s, z0\.s, #0xff00 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_65280_s32_x, svint32_t, ++ z0 = sveor_n_s32_x (p0, z0, 0xff00), ++ z0 = sveor_x (p0, z0, 0xff00)) ++ ++/* ++** eor_m127_s32_x: ++** eor z0\.s, z0\.s, #0xffffff81 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m127_s32_x, svint32_t, ++ z0 = sveor_n_s32_x (p0, z0, -127), ++ z0 = sveor_x (p0, z0, -127)) ++ ++/* ++** eor_m128_s32_x: ++** eor z0\.s, z0\.s, #0xffffff80 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m128_s32_x, svint32_t, ++ z0 = sveor_n_s32_x (p0, z0, -128), ++ z0 = sveor_x (p0, z0, -128)) ++ ++/* ++** eor_m255_s32_x: ++** eor z0\.s, z0\.s, #0xffffff01 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m255_s32_x, svint32_t, ++ z0 = sveor_n_s32_x (p0, z0, -255), ++ z0 = sveor_x (p0, z0, -255)) ++ ++/* ++** eor_m256_s32_x: ++** eor z0\.s, z0\.s, #0xffffff00 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m256_s32_x, svint32_t, ++ z0 = sveor_n_s32_x (p0, z0, -256), ++ z0 = sveor_x (p0, z0, -256)) ++ ++/* ++** eor_m257_s32_x: ++** eor z0\.s, z0\.s, #0xfffffeff ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m257_s32_x, svint32_t, ++ z0 = sveor_n_s32_x (p0, z0, -257), ++ z0 = sveor_x (p0, z0, -257)) ++ ++/* ++** eor_m512_s32_x: ++** eor z0\.s, z0\.s, #0xfffffe00 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m512_s32_x, svint32_t, ++ z0 = sveor_n_s32_x (p0, z0, -512), ++ z0 = sveor_x (p0, z0, -512)) ++ ++/* ++** eor_m32768_s32_x: ++** eor z0\.s, z0\.s, #0xffff8000 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m32768_s32_x, svint32_t, ++ z0 = sveor_n_s32_x (p0, z0, -0x8000), ++ z0 = sveor_x (p0, z0, -0x8000)) ++ ++/* ++** eor_5_s32_x: ++** mov (z[0-9]+)\.s, #5 ++** eor z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (eor_5_s32_x, svint32_t, ++ z0 = sveor_n_s32_x (p0, z0, 5), ++ z0 = sveor_x (p0, z0, 5)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_s64.c +new file mode 100644 +index 000000000..157128974 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_s64.c +@@ -0,0 +1,372 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** eor_s64_m_tied1: ++** eor z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (eor_s64_m_tied1, svint64_t, ++ z0 = sveor_s64_m (p0, z0, z1), ++ z0 = sveor_m (p0, z0, z1)) ++ ++/* ++** eor_s64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** eor z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_s64_m_tied2, svint64_t, ++ z0 = sveor_s64_m (p0, z1, z0), ++ z0 = sveor_m (p0, z1, z0)) ++ ++/* ++** eor_s64_m_untied: ++** movprfx z0, z1 ++** eor z0\.d, p0/m, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (eor_s64_m_untied, svint64_t, ++ z0 = sveor_s64_m (p0, z1, z2), ++ z0 = sveor_m (p0, z1, z2)) ++ ++/* ++** eor_x0_s64_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** eor z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (eor_x0_s64_m_tied1, svint64_t, int64_t, ++ z0 = sveor_n_s64_m (p0, z0, x0), ++ z0 = sveor_m (p0, z0, x0)) ++ ++/* ++** eor_x0_s64_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** eor z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (eor_x0_s64_m_untied, svint64_t, int64_t, ++ z0 = sveor_n_s64_m (p0, z1, x0), ++ z0 = sveor_m (p0, z1, x0)) ++ ++/* ++** eor_1_s64_m_tied1: ++** mov (z[0-9]+\.d), #1 ++** eor z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_1_s64_m_tied1, svint64_t, ++ z0 = sveor_n_s64_m (p0, z0, 1), ++ z0 = sveor_m (p0, z0, 1)) ++ ++/* ++** eor_1_s64_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), #1 ++** movprfx z0, z1 ++** eor z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_1_s64_m_untied, svint64_t, ++ z0 = sveor_n_s64_m (p0, z1, 1), ++ z0 = sveor_m (p0, z1, 1)) ++ ++/* ++** eor_m2_s64_m: ++** mov (z[0-9]+\.d), #-2 ++** eor z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m2_s64_m, svint64_t, ++ z0 = sveor_n_s64_m (p0, z0, -2), ++ z0 = sveor_m (p0, z0, -2)) ++ ++/* ++** eor_s64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** eor z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (eor_s64_z_tied1, svint64_t, ++ z0 = sveor_s64_z (p0, z0, z1), ++ z0 = sveor_z (p0, z0, z1)) ++ ++/* ++** eor_s64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** eor z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (eor_s64_z_tied2, svint64_t, ++ z0 = sveor_s64_z (p0, z1, z0), ++ z0 = sveor_z (p0, z1, z0)) ++ ++/* ++** eor_s64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** eor z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** eor z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (eor_s64_z_untied, svint64_t, ++ z0 = sveor_s64_z (p0, z1, z2), ++ z0 = sveor_z (p0, z1, z2)) ++ ++/* ++** eor_x0_s64_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.d, p0/z, z0\.d ++** eor z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (eor_x0_s64_z_tied1, svint64_t, int64_t, ++ z0 = sveor_n_s64_z (p0, z0, x0), ++ z0 = sveor_z (p0, z0, x0)) ++ ++/* ++** eor_x0_s64_z_untied: ++** mov (z[0-9]+\.d), x0 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** eor z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** eor z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (eor_x0_s64_z_untied, svint64_t, int64_t, ++ z0 = sveor_n_s64_z (p0, z1, x0), ++ z0 = sveor_z (p0, z1, x0)) ++ ++/* ++** eor_1_s64_z_tied1: ++** mov (z[0-9]+\.d), #1 ++** movprfx z0\.d, p0/z, z0\.d ++** eor z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_1_s64_z_tied1, svint64_t, ++ z0 = sveor_n_s64_z (p0, z0, 1), ++ z0 = sveor_z (p0, z0, 1)) ++ ++/* ++** eor_1_s64_z_untied: ++** mov (z[0-9]+\.d), #1 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** eor z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** eor z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (eor_1_s64_z_untied, svint64_t, ++ z0 = sveor_n_s64_z (p0, z1, 1), ++ z0 = sveor_z (p0, z1, 1)) ++ ++/* ++** eor_s64_x_tied1: ++** eor z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (eor_s64_x_tied1, svint64_t, ++ z0 = sveor_s64_x (p0, z0, z1), ++ z0 = sveor_x (p0, z0, z1)) ++ ++/* ++** eor_s64_x_tied2: ++** eor z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (eor_s64_x_tied2, svint64_t, ++ z0 = sveor_s64_x (p0, z1, z0), ++ z0 = sveor_x (p0, z1, z0)) ++ ++/* ++** eor_s64_x_untied: ++** eor z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (eor_s64_x_untied, svint64_t, ++ z0 = sveor_s64_x (p0, z1, z2), ++ z0 = sveor_x (p0, z1, z2)) ++ ++/* ++** eor_x0_s64_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** eor z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (eor_x0_s64_x_tied1, svint64_t, int64_t, ++ z0 = sveor_n_s64_x (p0, z0, x0), ++ z0 = sveor_x (p0, z0, x0)) ++ ++/* ++** eor_x0_s64_x_untied: ++** mov (z[0-9]+\.d), x0 ++** eor z0\.d, (z1\.d, \1|\1, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (eor_x0_s64_x_untied, svint64_t, int64_t, ++ z0 = sveor_n_s64_x (p0, z1, x0), ++ z0 = sveor_x (p0, z1, x0)) ++ ++/* ++** eor_1_s64_x_tied1: ++** eor z0\.d, z0\.d, #0x1 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_1_s64_x_tied1, svint64_t, ++ z0 = sveor_n_s64_x (p0, z0, 1), ++ z0 = sveor_x (p0, z0, 1)) ++ ++/* ++** eor_1_s64_x_untied: ++** movprfx z0, z1 ++** eor z0\.d, z0\.d, #0x1 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_1_s64_x_untied, svint64_t, ++ z0 = sveor_n_s64_x (p0, z1, 1), ++ z0 = sveor_x (p0, z1, 1)) ++ ++/* ++** eor_127_s64_x: ++** eor z0\.d, z0\.d, #0x7f ++** ret ++*/ ++TEST_UNIFORM_Z (eor_127_s64_x, svint64_t, ++ z0 = sveor_n_s64_x (p0, z0, 127), ++ z0 = sveor_x (p0, z0, 127)) ++ ++/* ++** eor_128_s64_x: ++** eor z0\.d, z0\.d, #0x80 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_128_s64_x, svint64_t, ++ z0 = sveor_n_s64_x (p0, z0, 128), ++ z0 = sveor_x (p0, z0, 128)) ++ ++/* ++** eor_255_s64_x: ++** eor z0\.d, z0\.d, #0xff ++** ret ++*/ ++TEST_UNIFORM_Z (eor_255_s64_x, svint64_t, ++ z0 = sveor_n_s64_x (p0, z0, 255), ++ z0 = sveor_x (p0, z0, 255)) ++ ++/* ++** eor_256_s64_x: ++** eor z0\.d, z0\.d, #0x100 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_256_s64_x, svint64_t, ++ z0 = sveor_n_s64_x (p0, z0, 256), ++ z0 = sveor_x (p0, z0, 256)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (eor_257_s64_x, svint64_t, ++ z0 = sveor_n_s64_x (p0, z0, 257), ++ z0 = sveor_x (p0, z0, 257)) ++ ++/* ++** eor_512_s64_x: ++** eor z0\.d, z0\.d, #0x200 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_512_s64_x, svint64_t, ++ z0 = sveor_n_s64_x (p0, z0, 512), ++ z0 = sveor_x (p0, z0, 512)) ++ ++/* ++** eor_65280_s64_x: ++** eor z0\.d, z0\.d, #0xff00 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_65280_s64_x, svint64_t, ++ z0 = sveor_n_s64_x (p0, z0, 0xff00), ++ z0 = sveor_x (p0, z0, 0xff00)) ++ ++/* ++** eor_m127_s64_x: ++** eor z0\.d, z0\.d, #0xffffffffffffff81 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m127_s64_x, svint64_t, ++ z0 = sveor_n_s64_x (p0, z0, -127), ++ z0 = sveor_x (p0, z0, -127)) ++ ++/* ++** eor_m128_s64_x: ++** eor z0\.d, z0\.d, #0xffffffffffffff80 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m128_s64_x, svint64_t, ++ z0 = sveor_n_s64_x (p0, z0, -128), ++ z0 = sveor_x (p0, z0, -128)) ++ ++/* ++** eor_m255_s64_x: ++** eor z0\.d, z0\.d, #0xffffffffffffff01 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m255_s64_x, svint64_t, ++ z0 = sveor_n_s64_x (p0, z0, -255), ++ z0 = sveor_x (p0, z0, -255)) ++ ++/* ++** eor_m256_s64_x: ++** eor z0\.d, z0\.d, #0xffffffffffffff00 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m256_s64_x, svint64_t, ++ z0 = sveor_n_s64_x (p0, z0, -256), ++ z0 = sveor_x (p0, z0, -256)) ++ ++/* ++** eor_m257_s64_x: ++** eor z0\.d, z0\.d, #0xfffffffffffffeff ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m257_s64_x, svint64_t, ++ z0 = sveor_n_s64_x (p0, z0, -257), ++ z0 = sveor_x (p0, z0, -257)) ++ ++/* ++** eor_m512_s64_x: ++** eor z0\.d, z0\.d, #0xfffffffffffffe00 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m512_s64_x, svint64_t, ++ z0 = sveor_n_s64_x (p0, z0, -512), ++ z0 = sveor_x (p0, z0, -512)) ++ ++/* ++** eor_m32768_s64_x: ++** eor z0\.d, z0\.d, #0xffffffffffff8000 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m32768_s64_x, svint64_t, ++ z0 = sveor_n_s64_x (p0, z0, -0x8000), ++ z0 = sveor_x (p0, z0, -0x8000)) ++ ++/* ++** eor_5_s64_x: ++** mov (z[0-9]+\.d), #5 ++** eor z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (eor_5_s64_x, svint64_t, ++ z0 = sveor_n_s64_x (p0, z0, 5), ++ z0 = sveor_x (p0, z0, 5)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_s8.c +new file mode 100644 +index 000000000..083ac2dde +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_s8.c +@@ -0,0 +1,296 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** eor_s8_m_tied1: ++** eor z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (eor_s8_m_tied1, svint8_t, ++ z0 = sveor_s8_m (p0, z0, z1), ++ z0 = sveor_m (p0, z0, z1)) ++ ++/* ++** eor_s8_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** eor z0\.b, p0/m, z0\.b, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (eor_s8_m_tied2, svint8_t, ++ z0 = sveor_s8_m (p0, z1, z0), ++ z0 = sveor_m (p0, z1, z0)) ++ ++/* ++** eor_s8_m_untied: ++** movprfx z0, z1 ++** eor z0\.b, p0/m, z0\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (eor_s8_m_untied, svint8_t, ++ z0 = sveor_s8_m (p0, z1, z2), ++ z0 = sveor_m (p0, z1, z2)) ++ ++/* ++** eor_w0_s8_m_tied1: ++** mov (z[0-9]+\.b), w0 ++** eor z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (eor_w0_s8_m_tied1, svint8_t, int8_t, ++ z0 = sveor_n_s8_m (p0, z0, x0), ++ z0 = sveor_m (p0, z0, x0)) ++ ++/* ++** eor_w0_s8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), w0 ++** movprfx z0, z1 ++** eor z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (eor_w0_s8_m_untied, svint8_t, int8_t, ++ z0 = sveor_n_s8_m (p0, z1, x0), ++ z0 = sveor_m (p0, z1, x0)) ++ ++/* ++** eor_1_s8_m_tied1: ++** mov (z[0-9]+\.b), #1 ++** eor z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_1_s8_m_tied1, svint8_t, ++ z0 = sveor_n_s8_m (p0, z0, 1), ++ z0 = sveor_m (p0, z0, 1)) ++ ++/* ++** eor_1_s8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), #1 ++** movprfx z0, z1 ++** eor z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_1_s8_m_untied, svint8_t, ++ z0 = sveor_n_s8_m (p0, z1, 1), ++ z0 = sveor_m (p0, z1, 1)) ++ ++/* ++** eor_m2_s8_m: ++** mov (z[0-9]+\.b), #-2 ++** eor z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m2_s8_m, svint8_t, ++ z0 = sveor_n_s8_m (p0, z0, -2), ++ z0 = sveor_m (p0, z0, -2)) ++ ++/* ++** eor_s8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** eor z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (eor_s8_z_tied1, svint8_t, ++ z0 = sveor_s8_z (p0, z0, z1), ++ z0 = sveor_z (p0, z0, z1)) ++ ++/* ++** eor_s8_z_tied2: ++** movprfx z0\.b, p0/z, z0\.b ++** eor z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (eor_s8_z_tied2, svint8_t, ++ z0 = sveor_s8_z (p0, z1, z0), ++ z0 = sveor_z (p0, z1, z0)) ++ ++/* ++** eor_s8_z_untied: ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** eor z0\.b, p0/m, z0\.b, z2\.b ++** | ++** movprfx z0\.b, p0/z, z2\.b ++** eor z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (eor_s8_z_untied, svint8_t, ++ z0 = sveor_s8_z (p0, z1, z2), ++ z0 = sveor_z (p0, z1, z2)) ++ ++/* ++** eor_w0_s8_z_tied1: ++** mov (z[0-9]+\.b), w0 ++** movprfx z0\.b, p0/z, z0\.b ++** eor z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (eor_w0_s8_z_tied1, svint8_t, int8_t, ++ z0 = sveor_n_s8_z (p0, z0, x0), ++ z0 = sveor_z (p0, z0, x0)) ++ ++/* ++** eor_w0_s8_z_untied: ++** mov (z[0-9]+\.b), w0 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** eor z0\.b, p0/m, z0\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** eor z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (eor_w0_s8_z_untied, svint8_t, int8_t, ++ z0 = sveor_n_s8_z (p0, z1, x0), ++ z0 = sveor_z (p0, z1, x0)) ++ ++/* ++** eor_1_s8_z_tied1: ++** mov (z[0-9]+\.b), #1 ++** movprfx z0\.b, p0/z, z0\.b ++** eor z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_1_s8_z_tied1, svint8_t, ++ z0 = sveor_n_s8_z (p0, z0, 1), ++ z0 = sveor_z (p0, z0, 1)) ++ ++/* ++** eor_1_s8_z_untied: ++** mov (z[0-9]+\.b), #1 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** eor z0\.b, p0/m, z0\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** eor z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (eor_1_s8_z_untied, svint8_t, ++ z0 = sveor_n_s8_z (p0, z1, 1), ++ z0 = sveor_z (p0, z1, 1)) ++ ++/* ++** eor_s8_x_tied1: ++** eor z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (eor_s8_x_tied1, svint8_t, ++ z0 = sveor_s8_x (p0, z0, z1), ++ z0 = sveor_x (p0, z0, z1)) ++ ++/* ++** eor_s8_x_tied2: ++** eor z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (eor_s8_x_tied2, svint8_t, ++ z0 = sveor_s8_x (p0, z1, z0), ++ z0 = sveor_x (p0, z1, z0)) ++ ++/* ++** eor_s8_x_untied: ++** eor z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (eor_s8_x_untied, svint8_t, ++ z0 = sveor_s8_x (p0, z1, z2), ++ z0 = sveor_x (p0, z1, z2)) ++ ++/* ++** eor_w0_s8_x_tied1: ++** mov (z[0-9]+)\.b, w0 ++** eor z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (eor_w0_s8_x_tied1, svint8_t, int8_t, ++ z0 = sveor_n_s8_x (p0, z0, x0), ++ z0 = sveor_x (p0, z0, x0)) ++ ++/* ++** eor_w0_s8_x_untied: ++** mov (z[0-9]+)\.b, w0 ++** eor z0\.d, (z1\.d, \1\.d|\1\.d, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (eor_w0_s8_x_untied, svint8_t, int8_t, ++ z0 = sveor_n_s8_x (p0, z1, x0), ++ z0 = sveor_x (p0, z1, x0)) ++ ++/* ++** eor_1_s8_x_tied1: ++** eor z0\.b, z0\.b, #0x1 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_1_s8_x_tied1, svint8_t, ++ z0 = sveor_n_s8_x (p0, z0, 1), ++ z0 = sveor_x (p0, z0, 1)) ++ ++/* ++** eor_1_s8_x_untied: ++** movprfx z0, z1 ++** eor z0\.b, z0\.b, #0x1 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_1_s8_x_untied, svint8_t, ++ z0 = sveor_n_s8_x (p0, z1, 1), ++ z0 = sveor_x (p0, z1, 1)) ++ ++/* ++** eor_127_s8_x: ++** eor z0\.b, z0\.b, #0x7f ++** ret ++*/ ++TEST_UNIFORM_Z (eor_127_s8_x, svint8_t, ++ z0 = sveor_n_s8_x (p0, z0, 127), ++ z0 = sveor_x (p0, z0, 127)) ++ ++/* ++** eor_128_s8_x: ++** eor z0\.b, z0\.b, #0x80 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_128_s8_x, svint8_t, ++ z0 = sveor_n_s8_x (p0, z0, 128), ++ z0 = sveor_x (p0, z0, 128)) ++ ++/* ++** eor_255_s8_x: ++** mov (z[0-9]+)\.b, #-1 ++** eor z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (eor_255_s8_x, svint8_t, ++ z0 = sveor_n_s8_x (p0, z0, 255), ++ z0 = sveor_x (p0, z0, 255)) ++ ++/* ++** eor_m127_s8_x: ++** eor z0\.b, z0\.b, #0x81 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m127_s8_x, svint8_t, ++ z0 = sveor_n_s8_x (p0, z0, -127), ++ z0 = sveor_x (p0, z0, -127)) ++ ++/* ++** eor_m128_s8_x: ++** eor z0\.b, z0\.b, #0x80 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m128_s8_x, svint8_t, ++ z0 = sveor_n_s8_x (p0, z0, -128), ++ z0 = sveor_x (p0, z0, -128)) ++ ++/* ++** eor_5_s8_x: ++** mov (z[0-9]+)\.b, #5 ++** eor z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (eor_5_s8_x, svint8_t, ++ z0 = sveor_n_s8_x (p0, z0, 5), ++ z0 = sveor_x (p0, z0, 5)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_u16.c +new file mode 100644 +index 000000000..40b43a5f8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_u16.c +@@ -0,0 +1,376 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** eor_u16_m_tied1: ++** eor z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (eor_u16_m_tied1, svuint16_t, ++ z0 = sveor_u16_m (p0, z0, z1), ++ z0 = sveor_m (p0, z0, z1)) ++ ++/* ++** eor_u16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** eor z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (eor_u16_m_tied2, svuint16_t, ++ z0 = sveor_u16_m (p0, z1, z0), ++ z0 = sveor_m (p0, z1, z0)) ++ ++/* ++** eor_u16_m_untied: ++** movprfx z0, z1 ++** eor z0\.h, p0/m, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (eor_u16_m_untied, svuint16_t, ++ z0 = sveor_u16_m (p0, z1, z2), ++ z0 = sveor_m (p0, z1, z2)) ++ ++/* ++** eor_w0_u16_m_tied1: ++** mov (z[0-9]+\.h), w0 ++** eor z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (eor_w0_u16_m_tied1, svuint16_t, uint16_t, ++ z0 = sveor_n_u16_m (p0, z0, x0), ++ z0 = sveor_m (p0, z0, x0)) ++ ++/* ++** eor_w0_u16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), w0 ++** movprfx z0, z1 ++** eor z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (eor_w0_u16_m_untied, svuint16_t, uint16_t, ++ z0 = sveor_n_u16_m (p0, z1, x0), ++ z0 = sveor_m (p0, z1, x0)) ++ ++/* ++** eor_1_u16_m_tied1: ++** mov (z[0-9]+\.h), #1 ++** eor z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_1_u16_m_tied1, svuint16_t, ++ z0 = sveor_n_u16_m (p0, z0, 1), ++ z0 = sveor_m (p0, z0, 1)) ++ ++/* ++** eor_1_u16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), #1 ++** movprfx z0, z1 ++** eor z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_1_u16_m_untied, svuint16_t, ++ z0 = sveor_n_u16_m (p0, z1, 1), ++ z0 = sveor_m (p0, z1, 1)) ++ ++/* ++** eor_m2_u16_m: ++** mov (z[0-9]+\.h), #-2 ++** eor z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m2_u16_m, svuint16_t, ++ z0 = sveor_n_u16_m (p0, z0, -2), ++ z0 = sveor_m (p0, z0, -2)) ++ ++/* ++** eor_u16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** eor z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (eor_u16_z_tied1, svuint16_t, ++ z0 = sveor_u16_z (p0, z0, z1), ++ z0 = sveor_z (p0, z0, z1)) ++ ++/* ++** eor_u16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** eor z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (eor_u16_z_tied2, svuint16_t, ++ z0 = sveor_u16_z (p0, z1, z0), ++ z0 = sveor_z (p0, z1, z0)) ++ ++/* ++** eor_u16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** eor z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** eor z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (eor_u16_z_untied, svuint16_t, ++ z0 = sveor_u16_z (p0, z1, z2), ++ z0 = sveor_z (p0, z1, z2)) ++ ++/* ++** eor_w0_u16_z_tied1: ++** mov (z[0-9]+\.h), w0 ++** movprfx z0\.h, p0/z, z0\.h ++** eor z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (eor_w0_u16_z_tied1, svuint16_t, uint16_t, ++ z0 = sveor_n_u16_z (p0, z0, x0), ++ z0 = sveor_z (p0, z0, x0)) ++ ++/* ++** eor_w0_u16_z_untied: ++** mov (z[0-9]+\.h), w0 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** eor z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** eor z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (eor_w0_u16_z_untied, svuint16_t, uint16_t, ++ z0 = sveor_n_u16_z (p0, z1, x0), ++ z0 = sveor_z (p0, z1, x0)) ++ ++/* ++** eor_1_u16_z_tied1: ++** mov (z[0-9]+\.h), #1 ++** movprfx z0\.h, p0/z, z0\.h ++** eor z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_1_u16_z_tied1, svuint16_t, ++ z0 = sveor_n_u16_z (p0, z0, 1), ++ z0 = sveor_z (p0, z0, 1)) ++ ++/* ++** eor_1_u16_z_untied: ++** mov (z[0-9]+\.h), #1 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** eor z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** eor z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (eor_1_u16_z_untied, svuint16_t, ++ z0 = sveor_n_u16_z (p0, z1, 1), ++ z0 = sveor_z (p0, z1, 1)) ++ ++/* ++** eor_u16_x_tied1: ++** eor z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (eor_u16_x_tied1, svuint16_t, ++ z0 = sveor_u16_x (p0, z0, z1), ++ z0 = sveor_x (p0, z0, z1)) ++ ++/* ++** eor_u16_x_tied2: ++** eor z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (eor_u16_x_tied2, svuint16_t, ++ z0 = sveor_u16_x (p0, z1, z0), ++ z0 = sveor_x (p0, z1, z0)) ++ ++/* ++** eor_u16_x_untied: ++** eor z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (eor_u16_x_untied, svuint16_t, ++ z0 = sveor_u16_x (p0, z1, z2), ++ z0 = sveor_x (p0, z1, z2)) ++ ++/* ++** eor_w0_u16_x_tied1: ++** mov (z[0-9]+)\.h, w0 ++** eor z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (eor_w0_u16_x_tied1, svuint16_t, uint16_t, ++ z0 = sveor_n_u16_x (p0, z0, x0), ++ z0 = sveor_x (p0, z0, x0)) ++ ++/* ++** eor_w0_u16_x_untied: ++** mov (z[0-9]+)\.h, w0 ++** eor z0\.d, (z1\.d, \1\.d|\1\.d, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (eor_w0_u16_x_untied, svuint16_t, uint16_t, ++ z0 = sveor_n_u16_x (p0, z1, x0), ++ z0 = sveor_x (p0, z1, x0)) ++ ++/* ++** eor_1_u16_x_tied1: ++** eor z0\.h, z0\.h, #0x1 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_1_u16_x_tied1, svuint16_t, ++ z0 = sveor_n_u16_x (p0, z0, 1), ++ z0 = sveor_x (p0, z0, 1)) ++ ++/* ++** eor_1_u16_x_untied: ++** movprfx z0, z1 ++** eor z0\.h, z0\.h, #0x1 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_1_u16_x_untied, svuint16_t, ++ z0 = sveor_n_u16_x (p0, z1, 1), ++ z0 = sveor_x (p0, z1, 1)) ++ ++/* ++** eor_127_u16_x: ++** eor z0\.h, z0\.h, #0x7f ++** ret ++*/ ++TEST_UNIFORM_Z (eor_127_u16_x, svuint16_t, ++ z0 = sveor_n_u16_x (p0, z0, 127), ++ z0 = sveor_x (p0, z0, 127)) ++ ++/* ++** eor_128_u16_x: ++** eor z0\.h, z0\.h, #0x80 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_128_u16_x, svuint16_t, ++ z0 = sveor_n_u16_x (p0, z0, 128), ++ z0 = sveor_x (p0, z0, 128)) ++ ++/* ++** eor_255_u16_x: ++** eor z0\.h, z0\.h, #0xff ++** ret ++*/ ++TEST_UNIFORM_Z (eor_255_u16_x, svuint16_t, ++ z0 = sveor_n_u16_x (p0, z0, 255), ++ z0 = sveor_x (p0, z0, 255)) ++ ++/* ++** eor_256_u16_x: ++** eor z0\.h, z0\.h, #0x100 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_256_u16_x, svuint16_t, ++ z0 = sveor_n_u16_x (p0, z0, 256), ++ z0 = sveor_x (p0, z0, 256)) ++ ++/* ++** eor_257_u16_x: ++** eor z0\.h, z0\.h, #0x101 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_257_u16_x, svuint16_t, ++ z0 = sveor_n_u16_x (p0, z0, 257), ++ z0 = sveor_x (p0, z0, 257)) ++ ++/* ++** eor_512_u16_x: ++** eor z0\.h, z0\.h, #0x200 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_512_u16_x, svuint16_t, ++ z0 = sveor_n_u16_x (p0, z0, 512), ++ z0 = sveor_x (p0, z0, 512)) ++ ++/* ++** eor_65280_u16_x: ++** eor z0\.h, z0\.h, #0xff00 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_65280_u16_x, svuint16_t, ++ z0 = sveor_n_u16_x (p0, z0, 0xff00), ++ z0 = sveor_x (p0, z0, 0xff00)) ++ ++/* ++** eor_m127_u16_x: ++** eor z0\.h, z0\.h, #0xff81 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m127_u16_x, svuint16_t, ++ z0 = sveor_n_u16_x (p0, z0, -127), ++ z0 = sveor_x (p0, z0, -127)) ++ ++/* ++** eor_m128_u16_x: ++** eor z0\.h, z0\.h, #0xff80 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m128_u16_x, svuint16_t, ++ z0 = sveor_n_u16_x (p0, z0, -128), ++ z0 = sveor_x (p0, z0, -128)) ++ ++/* ++** eor_m255_u16_x: ++** eor z0\.h, z0\.h, #0xff01 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m255_u16_x, svuint16_t, ++ z0 = sveor_n_u16_x (p0, z0, -255), ++ z0 = sveor_x (p0, z0, -255)) ++ ++/* ++** eor_m256_u16_x: ++** eor z0\.h, z0\.h, #0xff00 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m256_u16_x, svuint16_t, ++ z0 = sveor_n_u16_x (p0, z0, -256), ++ z0 = sveor_x (p0, z0, -256)) ++ ++/* ++** eor_m257_u16_x: ++** eor z0\.h, z0\.h, #0xfeff ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m257_u16_x, svuint16_t, ++ z0 = sveor_n_u16_x (p0, z0, -257), ++ z0 = sveor_x (p0, z0, -257)) ++ ++/* ++** eor_m512_u16_x: ++** eor z0\.h, z0\.h, #0xfe00 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m512_u16_x, svuint16_t, ++ z0 = sveor_n_u16_x (p0, z0, -512), ++ z0 = sveor_x (p0, z0, -512)) ++ ++/* ++** eor_m32768_u16_x: ++** eor z0\.h, z0\.h, #0x8000 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m32768_u16_x, svuint16_t, ++ z0 = sveor_n_u16_x (p0, z0, -0x8000), ++ z0 = sveor_x (p0, z0, -0x8000)) ++ ++/* ++** eor_5_u16_x: ++** mov (z[0-9]+)\.h, #5 ++** eor z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (eor_5_u16_x, svuint16_t, ++ z0 = sveor_n_u16_x (p0, z0, 5), ++ z0 = sveor_x (p0, z0, 5)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_u32.c +new file mode 100644 +index 000000000..8e46d08ca +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_u32.c +@@ -0,0 +1,372 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** eor_u32_m_tied1: ++** eor z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (eor_u32_m_tied1, svuint32_t, ++ z0 = sveor_u32_m (p0, z0, z1), ++ z0 = sveor_m (p0, z0, z1)) ++ ++/* ++** eor_u32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** eor z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (eor_u32_m_tied2, svuint32_t, ++ z0 = sveor_u32_m (p0, z1, z0), ++ z0 = sveor_m (p0, z1, z0)) ++ ++/* ++** eor_u32_m_untied: ++** movprfx z0, z1 ++** eor z0\.s, p0/m, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (eor_u32_m_untied, svuint32_t, ++ z0 = sveor_u32_m (p0, z1, z2), ++ z0 = sveor_m (p0, z1, z2)) ++ ++/* ++** eor_w0_u32_m_tied1: ++** mov (z[0-9]+\.s), w0 ++** eor z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (eor_w0_u32_m_tied1, svuint32_t, uint32_t, ++ z0 = sveor_n_u32_m (p0, z0, x0), ++ z0 = sveor_m (p0, z0, x0)) ++ ++/* ++** eor_w0_u32_m_untied: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0, z1 ++** eor z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (eor_w0_u32_m_untied, svuint32_t, uint32_t, ++ z0 = sveor_n_u32_m (p0, z1, x0), ++ z0 = sveor_m (p0, z1, x0)) ++ ++/* ++** eor_1_u32_m_tied1: ++** mov (z[0-9]+\.s), #1 ++** eor z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_1_u32_m_tied1, svuint32_t, ++ z0 = sveor_n_u32_m (p0, z0, 1), ++ z0 = sveor_m (p0, z0, 1)) ++ ++/* ++** eor_1_u32_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.s), #1 ++** movprfx z0, z1 ++** eor z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_1_u32_m_untied, svuint32_t, ++ z0 = sveor_n_u32_m (p0, z1, 1), ++ z0 = sveor_m (p0, z1, 1)) ++ ++/* ++** eor_m2_u32_m: ++** mov (z[0-9]+\.s), #-2 ++** eor z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m2_u32_m, svuint32_t, ++ z0 = sveor_n_u32_m (p0, z0, -2), ++ z0 = sveor_m (p0, z0, -2)) ++ ++/* ++** eor_u32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** eor z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (eor_u32_z_tied1, svuint32_t, ++ z0 = sveor_u32_z (p0, z0, z1), ++ z0 = sveor_z (p0, z0, z1)) ++ ++/* ++** eor_u32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** eor z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (eor_u32_z_tied2, svuint32_t, ++ z0 = sveor_u32_z (p0, z1, z0), ++ z0 = sveor_z (p0, z1, z0)) ++ ++/* ++** eor_u32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** eor z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** eor z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (eor_u32_z_untied, svuint32_t, ++ z0 = sveor_u32_z (p0, z1, z2), ++ z0 = sveor_z (p0, z1, z2)) ++ ++/* ++** eor_w0_u32_z_tied1: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0\.s, p0/z, z0\.s ++** eor z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (eor_w0_u32_z_tied1, svuint32_t, uint32_t, ++ z0 = sveor_n_u32_z (p0, z0, x0), ++ z0 = sveor_z (p0, z0, x0)) ++ ++/* ++** eor_w0_u32_z_untied: ++** mov (z[0-9]+\.s), w0 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** eor z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** eor z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (eor_w0_u32_z_untied, svuint32_t, uint32_t, ++ z0 = sveor_n_u32_z (p0, z1, x0), ++ z0 = sveor_z (p0, z1, x0)) ++ ++/* ++** eor_1_u32_z_tied1: ++** mov (z[0-9]+\.s), #1 ++** movprfx z0\.s, p0/z, z0\.s ++** eor z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_1_u32_z_tied1, svuint32_t, ++ z0 = sveor_n_u32_z (p0, z0, 1), ++ z0 = sveor_z (p0, z0, 1)) ++ ++/* ++** eor_1_u32_z_untied: ++** mov (z[0-9]+\.s), #1 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** eor z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** eor z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (eor_1_u32_z_untied, svuint32_t, ++ z0 = sveor_n_u32_z (p0, z1, 1), ++ z0 = sveor_z (p0, z1, 1)) ++ ++/* ++** eor_u32_x_tied1: ++** eor z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (eor_u32_x_tied1, svuint32_t, ++ z0 = sveor_u32_x (p0, z0, z1), ++ z0 = sveor_x (p0, z0, z1)) ++ ++/* ++** eor_u32_x_tied2: ++** eor z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (eor_u32_x_tied2, svuint32_t, ++ z0 = sveor_u32_x (p0, z1, z0), ++ z0 = sveor_x (p0, z1, z0)) ++ ++/* ++** eor_u32_x_untied: ++** eor z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (eor_u32_x_untied, svuint32_t, ++ z0 = sveor_u32_x (p0, z1, z2), ++ z0 = sveor_x (p0, z1, z2)) ++ ++/* ++** eor_w0_u32_x_tied1: ++** mov (z[0-9]+)\.s, w0 ++** eor z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (eor_w0_u32_x_tied1, svuint32_t, uint32_t, ++ z0 = sveor_n_u32_x (p0, z0, x0), ++ z0 = sveor_x (p0, z0, x0)) ++ ++/* ++** eor_w0_u32_x_untied: ++** mov (z[0-9]+)\.s, w0 ++** eor z0\.d, (z1\.d, \1\.d|\1\.d, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (eor_w0_u32_x_untied, svuint32_t, uint32_t, ++ z0 = sveor_n_u32_x (p0, z1, x0), ++ z0 = sveor_x (p0, z1, x0)) ++ ++/* ++** eor_1_u32_x_tied1: ++** eor z0\.s, z0\.s, #0x1 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_1_u32_x_tied1, svuint32_t, ++ z0 = sveor_n_u32_x (p0, z0, 1), ++ z0 = sveor_x (p0, z0, 1)) ++ ++/* ++** eor_1_u32_x_untied: ++** movprfx z0, z1 ++** eor z0\.s, z0\.s, #0x1 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_1_u32_x_untied, svuint32_t, ++ z0 = sveor_n_u32_x (p0, z1, 1), ++ z0 = sveor_x (p0, z1, 1)) ++ ++/* ++** eor_127_u32_x: ++** eor z0\.s, z0\.s, #0x7f ++** ret ++*/ ++TEST_UNIFORM_Z (eor_127_u32_x, svuint32_t, ++ z0 = sveor_n_u32_x (p0, z0, 127), ++ z0 = sveor_x (p0, z0, 127)) ++ ++/* ++** eor_128_u32_x: ++** eor z0\.s, z0\.s, #0x80 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_128_u32_x, svuint32_t, ++ z0 = sveor_n_u32_x (p0, z0, 128), ++ z0 = sveor_x (p0, z0, 128)) ++ ++/* ++** eor_255_u32_x: ++** eor z0\.s, z0\.s, #0xff ++** ret ++*/ ++TEST_UNIFORM_Z (eor_255_u32_x, svuint32_t, ++ z0 = sveor_n_u32_x (p0, z0, 255), ++ z0 = sveor_x (p0, z0, 255)) ++ ++/* ++** eor_256_u32_x: ++** eor z0\.s, z0\.s, #0x100 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_256_u32_x, svuint32_t, ++ z0 = sveor_n_u32_x (p0, z0, 256), ++ z0 = sveor_x (p0, z0, 256)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (eor_257_u32_x, svuint32_t, ++ z0 = sveor_n_u32_x (p0, z0, 257), ++ z0 = sveor_x (p0, z0, 257)) ++ ++/* ++** eor_512_u32_x: ++** eor z0\.s, z0\.s, #0x200 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_512_u32_x, svuint32_t, ++ z0 = sveor_n_u32_x (p0, z0, 512), ++ z0 = sveor_x (p0, z0, 512)) ++ ++/* ++** eor_65280_u32_x: ++** eor z0\.s, z0\.s, #0xff00 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_65280_u32_x, svuint32_t, ++ z0 = sveor_n_u32_x (p0, z0, 0xff00), ++ z0 = sveor_x (p0, z0, 0xff00)) ++ ++/* ++** eor_m127_u32_x: ++** eor z0\.s, z0\.s, #0xffffff81 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m127_u32_x, svuint32_t, ++ z0 = sveor_n_u32_x (p0, z0, -127), ++ z0 = sveor_x (p0, z0, -127)) ++ ++/* ++** eor_m128_u32_x: ++** eor z0\.s, z0\.s, #0xffffff80 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m128_u32_x, svuint32_t, ++ z0 = sveor_n_u32_x (p0, z0, -128), ++ z0 = sveor_x (p0, z0, -128)) ++ ++/* ++** eor_m255_u32_x: ++** eor z0\.s, z0\.s, #0xffffff01 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m255_u32_x, svuint32_t, ++ z0 = sveor_n_u32_x (p0, z0, -255), ++ z0 = sveor_x (p0, z0, -255)) ++ ++/* ++** eor_m256_u32_x: ++** eor z0\.s, z0\.s, #0xffffff00 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m256_u32_x, svuint32_t, ++ z0 = sveor_n_u32_x (p0, z0, -256), ++ z0 = sveor_x (p0, z0, -256)) ++ ++/* ++** eor_m257_u32_x: ++** eor z0\.s, z0\.s, #0xfffffeff ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m257_u32_x, svuint32_t, ++ z0 = sveor_n_u32_x (p0, z0, -257), ++ z0 = sveor_x (p0, z0, -257)) ++ ++/* ++** eor_m512_u32_x: ++** eor z0\.s, z0\.s, #0xfffffe00 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m512_u32_x, svuint32_t, ++ z0 = sveor_n_u32_x (p0, z0, -512), ++ z0 = sveor_x (p0, z0, -512)) ++ ++/* ++** eor_m32768_u32_x: ++** eor z0\.s, z0\.s, #0xffff8000 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m32768_u32_x, svuint32_t, ++ z0 = sveor_n_u32_x (p0, z0, -0x8000), ++ z0 = sveor_x (p0, z0, -0x8000)) ++ ++/* ++** eor_5_u32_x: ++** mov (z[0-9]+)\.s, #5 ++** eor z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (eor_5_u32_x, svuint32_t, ++ z0 = sveor_n_u32_x (p0, z0, 5), ++ z0 = sveor_x (p0, z0, 5)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_u64.c +new file mode 100644 +index 000000000..a82398f91 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_u64.c +@@ -0,0 +1,372 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** eor_u64_m_tied1: ++** eor z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (eor_u64_m_tied1, svuint64_t, ++ z0 = sveor_u64_m (p0, z0, z1), ++ z0 = sveor_m (p0, z0, z1)) ++ ++/* ++** eor_u64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** eor z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_u64_m_tied2, svuint64_t, ++ z0 = sveor_u64_m (p0, z1, z0), ++ z0 = sveor_m (p0, z1, z0)) ++ ++/* ++** eor_u64_m_untied: ++** movprfx z0, z1 ++** eor z0\.d, p0/m, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (eor_u64_m_untied, svuint64_t, ++ z0 = sveor_u64_m (p0, z1, z2), ++ z0 = sveor_m (p0, z1, z2)) ++ ++/* ++** eor_x0_u64_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** eor z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (eor_x0_u64_m_tied1, svuint64_t, uint64_t, ++ z0 = sveor_n_u64_m (p0, z0, x0), ++ z0 = sveor_m (p0, z0, x0)) ++ ++/* ++** eor_x0_u64_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** eor z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (eor_x0_u64_m_untied, svuint64_t, uint64_t, ++ z0 = sveor_n_u64_m (p0, z1, x0), ++ z0 = sveor_m (p0, z1, x0)) ++ ++/* ++** eor_1_u64_m_tied1: ++** mov (z[0-9]+\.d), #1 ++** eor z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_1_u64_m_tied1, svuint64_t, ++ z0 = sveor_n_u64_m (p0, z0, 1), ++ z0 = sveor_m (p0, z0, 1)) ++ ++/* ++** eor_1_u64_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), #1 ++** movprfx z0, z1 ++** eor z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_1_u64_m_untied, svuint64_t, ++ z0 = sveor_n_u64_m (p0, z1, 1), ++ z0 = sveor_m (p0, z1, 1)) ++ ++/* ++** eor_m2_u64_m: ++** mov (z[0-9]+\.d), #-2 ++** eor z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m2_u64_m, svuint64_t, ++ z0 = sveor_n_u64_m (p0, z0, -2), ++ z0 = sveor_m (p0, z0, -2)) ++ ++/* ++** eor_u64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** eor z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (eor_u64_z_tied1, svuint64_t, ++ z0 = sveor_u64_z (p0, z0, z1), ++ z0 = sveor_z (p0, z0, z1)) ++ ++/* ++** eor_u64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** eor z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (eor_u64_z_tied2, svuint64_t, ++ z0 = sveor_u64_z (p0, z1, z0), ++ z0 = sveor_z (p0, z1, z0)) ++ ++/* ++** eor_u64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** eor z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** eor z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (eor_u64_z_untied, svuint64_t, ++ z0 = sveor_u64_z (p0, z1, z2), ++ z0 = sveor_z (p0, z1, z2)) ++ ++/* ++** eor_x0_u64_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.d, p0/z, z0\.d ++** eor z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (eor_x0_u64_z_tied1, svuint64_t, uint64_t, ++ z0 = sveor_n_u64_z (p0, z0, x0), ++ z0 = sveor_z (p0, z0, x0)) ++ ++/* ++** eor_x0_u64_z_untied: ++** mov (z[0-9]+\.d), x0 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** eor z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** eor z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (eor_x0_u64_z_untied, svuint64_t, uint64_t, ++ z0 = sveor_n_u64_z (p0, z1, x0), ++ z0 = sveor_z (p0, z1, x0)) ++ ++/* ++** eor_1_u64_z_tied1: ++** mov (z[0-9]+\.d), #1 ++** movprfx z0\.d, p0/z, z0\.d ++** eor z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_1_u64_z_tied1, svuint64_t, ++ z0 = sveor_n_u64_z (p0, z0, 1), ++ z0 = sveor_z (p0, z0, 1)) ++ ++/* ++** eor_1_u64_z_untied: ++** mov (z[0-9]+\.d), #1 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** eor z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** eor z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (eor_1_u64_z_untied, svuint64_t, ++ z0 = sveor_n_u64_z (p0, z1, 1), ++ z0 = sveor_z (p0, z1, 1)) ++ ++/* ++** eor_u64_x_tied1: ++** eor z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (eor_u64_x_tied1, svuint64_t, ++ z0 = sveor_u64_x (p0, z0, z1), ++ z0 = sveor_x (p0, z0, z1)) ++ ++/* ++** eor_u64_x_tied2: ++** eor z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (eor_u64_x_tied2, svuint64_t, ++ z0 = sveor_u64_x (p0, z1, z0), ++ z0 = sveor_x (p0, z1, z0)) ++ ++/* ++** eor_u64_x_untied: ++** eor z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (eor_u64_x_untied, svuint64_t, ++ z0 = sveor_u64_x (p0, z1, z2), ++ z0 = sveor_x (p0, z1, z2)) ++ ++/* ++** eor_x0_u64_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** eor z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (eor_x0_u64_x_tied1, svuint64_t, uint64_t, ++ z0 = sveor_n_u64_x (p0, z0, x0), ++ z0 = sveor_x (p0, z0, x0)) ++ ++/* ++** eor_x0_u64_x_untied: ++** mov (z[0-9]+\.d), x0 ++** eor z0\.d, (z1\.d, \1|\1, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (eor_x0_u64_x_untied, svuint64_t, uint64_t, ++ z0 = sveor_n_u64_x (p0, z1, x0), ++ z0 = sveor_x (p0, z1, x0)) ++ ++/* ++** eor_1_u64_x_tied1: ++** eor z0\.d, z0\.d, #0x1 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_1_u64_x_tied1, svuint64_t, ++ z0 = sveor_n_u64_x (p0, z0, 1), ++ z0 = sveor_x (p0, z0, 1)) ++ ++/* ++** eor_1_u64_x_untied: ++** movprfx z0, z1 ++** eor z0\.d, z0\.d, #0x1 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_1_u64_x_untied, svuint64_t, ++ z0 = sveor_n_u64_x (p0, z1, 1), ++ z0 = sveor_x (p0, z1, 1)) ++ ++/* ++** eor_127_u64_x: ++** eor z0\.d, z0\.d, #0x7f ++** ret ++*/ ++TEST_UNIFORM_Z (eor_127_u64_x, svuint64_t, ++ z0 = sveor_n_u64_x (p0, z0, 127), ++ z0 = sveor_x (p0, z0, 127)) ++ ++/* ++** eor_128_u64_x: ++** eor z0\.d, z0\.d, #0x80 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_128_u64_x, svuint64_t, ++ z0 = sveor_n_u64_x (p0, z0, 128), ++ z0 = sveor_x (p0, z0, 128)) ++ ++/* ++** eor_255_u64_x: ++** eor z0\.d, z0\.d, #0xff ++** ret ++*/ ++TEST_UNIFORM_Z (eor_255_u64_x, svuint64_t, ++ z0 = sveor_n_u64_x (p0, z0, 255), ++ z0 = sveor_x (p0, z0, 255)) ++ ++/* ++** eor_256_u64_x: ++** eor z0\.d, z0\.d, #0x100 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_256_u64_x, svuint64_t, ++ z0 = sveor_n_u64_x (p0, z0, 256), ++ z0 = sveor_x (p0, z0, 256)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (eor_257_u64_x, svuint64_t, ++ z0 = sveor_n_u64_x (p0, z0, 257), ++ z0 = sveor_x (p0, z0, 257)) ++ ++/* ++** eor_512_u64_x: ++** eor z0\.d, z0\.d, #0x200 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_512_u64_x, svuint64_t, ++ z0 = sveor_n_u64_x (p0, z0, 512), ++ z0 = sveor_x (p0, z0, 512)) ++ ++/* ++** eor_65280_u64_x: ++** eor z0\.d, z0\.d, #0xff00 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_65280_u64_x, svuint64_t, ++ z0 = sveor_n_u64_x (p0, z0, 0xff00), ++ z0 = sveor_x (p0, z0, 0xff00)) ++ ++/* ++** eor_m127_u64_x: ++** eor z0\.d, z0\.d, #0xffffffffffffff81 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m127_u64_x, svuint64_t, ++ z0 = sveor_n_u64_x (p0, z0, -127), ++ z0 = sveor_x (p0, z0, -127)) ++ ++/* ++** eor_m128_u64_x: ++** eor z0\.d, z0\.d, #0xffffffffffffff80 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m128_u64_x, svuint64_t, ++ z0 = sveor_n_u64_x (p0, z0, -128), ++ z0 = sveor_x (p0, z0, -128)) ++ ++/* ++** eor_m255_u64_x: ++** eor z0\.d, z0\.d, #0xffffffffffffff01 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m255_u64_x, svuint64_t, ++ z0 = sveor_n_u64_x (p0, z0, -255), ++ z0 = sveor_x (p0, z0, -255)) ++ ++/* ++** eor_m256_u64_x: ++** eor z0\.d, z0\.d, #0xffffffffffffff00 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m256_u64_x, svuint64_t, ++ z0 = sveor_n_u64_x (p0, z0, -256), ++ z0 = sveor_x (p0, z0, -256)) ++ ++/* ++** eor_m257_u64_x: ++** eor z0\.d, z0\.d, #0xfffffffffffffeff ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m257_u64_x, svuint64_t, ++ z0 = sveor_n_u64_x (p0, z0, -257), ++ z0 = sveor_x (p0, z0, -257)) ++ ++/* ++** eor_m512_u64_x: ++** eor z0\.d, z0\.d, #0xfffffffffffffe00 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m512_u64_x, svuint64_t, ++ z0 = sveor_n_u64_x (p0, z0, -512), ++ z0 = sveor_x (p0, z0, -512)) ++ ++/* ++** eor_m32768_u64_x: ++** eor z0\.d, z0\.d, #0xffffffffffff8000 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m32768_u64_x, svuint64_t, ++ z0 = sveor_n_u64_x (p0, z0, -0x8000), ++ z0 = sveor_x (p0, z0, -0x8000)) ++ ++/* ++** eor_5_u64_x: ++** mov (z[0-9]+\.d), #5 ++** eor z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (eor_5_u64_x, svuint64_t, ++ z0 = sveor_n_u64_x (p0, z0, 5), ++ z0 = sveor_x (p0, z0, 5)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_u8.c +new file mode 100644 +index 000000000..006637699 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_u8.c +@@ -0,0 +1,296 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** eor_u8_m_tied1: ++** eor z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (eor_u8_m_tied1, svuint8_t, ++ z0 = sveor_u8_m (p0, z0, z1), ++ z0 = sveor_m (p0, z0, z1)) ++ ++/* ++** eor_u8_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** eor z0\.b, p0/m, z0\.b, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (eor_u8_m_tied2, svuint8_t, ++ z0 = sveor_u8_m (p0, z1, z0), ++ z0 = sveor_m (p0, z1, z0)) ++ ++/* ++** eor_u8_m_untied: ++** movprfx z0, z1 ++** eor z0\.b, p0/m, z0\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (eor_u8_m_untied, svuint8_t, ++ z0 = sveor_u8_m (p0, z1, z2), ++ z0 = sveor_m (p0, z1, z2)) ++ ++/* ++** eor_w0_u8_m_tied1: ++** mov (z[0-9]+\.b), w0 ++** eor z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (eor_w0_u8_m_tied1, svuint8_t, uint8_t, ++ z0 = sveor_n_u8_m (p0, z0, x0), ++ z0 = sveor_m (p0, z0, x0)) ++ ++/* ++** eor_w0_u8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), w0 ++** movprfx z0, z1 ++** eor z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (eor_w0_u8_m_untied, svuint8_t, uint8_t, ++ z0 = sveor_n_u8_m (p0, z1, x0), ++ z0 = sveor_m (p0, z1, x0)) ++ ++/* ++** eor_1_u8_m_tied1: ++** mov (z[0-9]+\.b), #1 ++** eor z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_1_u8_m_tied1, svuint8_t, ++ z0 = sveor_n_u8_m (p0, z0, 1), ++ z0 = sveor_m (p0, z0, 1)) ++ ++/* ++** eor_1_u8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), #1 ++** movprfx z0, z1 ++** eor z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_1_u8_m_untied, svuint8_t, ++ z0 = sveor_n_u8_m (p0, z1, 1), ++ z0 = sveor_m (p0, z1, 1)) ++ ++/* ++** eor_m2_u8_m: ++** mov (z[0-9]+\.b), #-2 ++** eor z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m2_u8_m, svuint8_t, ++ z0 = sveor_n_u8_m (p0, z0, -2), ++ z0 = sveor_m (p0, z0, -2)) ++ ++/* ++** eor_u8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** eor z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (eor_u8_z_tied1, svuint8_t, ++ z0 = sveor_u8_z (p0, z0, z1), ++ z0 = sveor_z (p0, z0, z1)) ++ ++/* ++** eor_u8_z_tied2: ++** movprfx z0\.b, p0/z, z0\.b ++** eor z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (eor_u8_z_tied2, svuint8_t, ++ z0 = sveor_u8_z (p0, z1, z0), ++ z0 = sveor_z (p0, z1, z0)) ++ ++/* ++** eor_u8_z_untied: ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** eor z0\.b, p0/m, z0\.b, z2\.b ++** | ++** movprfx z0\.b, p0/z, z2\.b ++** eor z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (eor_u8_z_untied, svuint8_t, ++ z0 = sveor_u8_z (p0, z1, z2), ++ z0 = sveor_z (p0, z1, z2)) ++ ++/* ++** eor_w0_u8_z_tied1: ++** mov (z[0-9]+\.b), w0 ++** movprfx z0\.b, p0/z, z0\.b ++** eor z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (eor_w0_u8_z_tied1, svuint8_t, uint8_t, ++ z0 = sveor_n_u8_z (p0, z0, x0), ++ z0 = sveor_z (p0, z0, x0)) ++ ++/* ++** eor_w0_u8_z_untied: ++** mov (z[0-9]+\.b), w0 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** eor z0\.b, p0/m, z0\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** eor z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (eor_w0_u8_z_untied, svuint8_t, uint8_t, ++ z0 = sveor_n_u8_z (p0, z1, x0), ++ z0 = sveor_z (p0, z1, x0)) ++ ++/* ++** eor_1_u8_z_tied1: ++** mov (z[0-9]+\.b), #1 ++** movprfx z0\.b, p0/z, z0\.b ++** eor z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_1_u8_z_tied1, svuint8_t, ++ z0 = sveor_n_u8_z (p0, z0, 1), ++ z0 = sveor_z (p0, z0, 1)) ++ ++/* ++** eor_1_u8_z_untied: ++** mov (z[0-9]+\.b), #1 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** eor z0\.b, p0/m, z0\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** eor z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (eor_1_u8_z_untied, svuint8_t, ++ z0 = sveor_n_u8_z (p0, z1, 1), ++ z0 = sveor_z (p0, z1, 1)) ++ ++/* ++** eor_u8_x_tied1: ++** eor z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (eor_u8_x_tied1, svuint8_t, ++ z0 = sveor_u8_x (p0, z0, z1), ++ z0 = sveor_x (p0, z0, z1)) ++ ++/* ++** eor_u8_x_tied2: ++** eor z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (eor_u8_x_tied2, svuint8_t, ++ z0 = sveor_u8_x (p0, z1, z0), ++ z0 = sveor_x (p0, z1, z0)) ++ ++/* ++** eor_u8_x_untied: ++** eor z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (eor_u8_x_untied, svuint8_t, ++ z0 = sveor_u8_x (p0, z1, z2), ++ z0 = sveor_x (p0, z1, z2)) ++ ++/* ++** eor_w0_u8_x_tied1: ++** mov (z[0-9]+)\.b, w0 ++** eor z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (eor_w0_u8_x_tied1, svuint8_t, uint8_t, ++ z0 = sveor_n_u8_x (p0, z0, x0), ++ z0 = sveor_x (p0, z0, x0)) ++ ++/* ++** eor_w0_u8_x_untied: ++** mov (z[0-9]+)\.b, w0 ++** eor z0\.d, (z1\.d, \1\.d|\1\.d, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (eor_w0_u8_x_untied, svuint8_t, uint8_t, ++ z0 = sveor_n_u8_x (p0, z1, x0), ++ z0 = sveor_x (p0, z1, x0)) ++ ++/* ++** eor_1_u8_x_tied1: ++** eor z0\.b, z0\.b, #0x1 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_1_u8_x_tied1, svuint8_t, ++ z0 = sveor_n_u8_x (p0, z0, 1), ++ z0 = sveor_x (p0, z0, 1)) ++ ++/* ++** eor_1_u8_x_untied: ++** movprfx z0, z1 ++** eor z0\.b, z0\.b, #0x1 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_1_u8_x_untied, svuint8_t, ++ z0 = sveor_n_u8_x (p0, z1, 1), ++ z0 = sveor_x (p0, z1, 1)) ++ ++/* ++** eor_127_u8_x: ++** eor z0\.b, z0\.b, #0x7f ++** ret ++*/ ++TEST_UNIFORM_Z (eor_127_u8_x, svuint8_t, ++ z0 = sveor_n_u8_x (p0, z0, 127), ++ z0 = sveor_x (p0, z0, 127)) ++ ++/* ++** eor_128_u8_x: ++** eor z0\.b, z0\.b, #0x80 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_128_u8_x, svuint8_t, ++ z0 = sveor_n_u8_x (p0, z0, 128), ++ z0 = sveor_x (p0, z0, 128)) ++ ++/* ++** eor_255_u8_x: ++** mov (z[0-9]+)\.b, #-1 ++** eor z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (eor_255_u8_x, svuint8_t, ++ z0 = sveor_n_u8_x (p0, z0, 255), ++ z0 = sveor_x (p0, z0, 255)) ++ ++/* ++** eor_m127_u8_x: ++** eor z0\.b, z0\.b, #0x81 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m127_u8_x, svuint8_t, ++ z0 = sveor_n_u8_x (p0, z0, -127), ++ z0 = sveor_x (p0, z0, -127)) ++ ++/* ++** eor_m128_u8_x: ++** eor z0\.b, z0\.b, #0x80 ++** ret ++*/ ++TEST_UNIFORM_Z (eor_m128_u8_x, svuint8_t, ++ z0 = sveor_n_u8_x (p0, z0, -128), ++ z0 = sveor_x (p0, z0, -128)) ++ ++/* ++** eor_5_u8_x: ++** mov (z[0-9]+)\.b, #5 ++** eor z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (eor_5_u8_x, svuint8_t, ++ z0 = sveor_n_u8_x (p0, z0, 5), ++ z0 = sveor_x (p0, z0, 5)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eorv_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eorv_s16.c +new file mode 100644 +index 000000000..0675d7ed9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eorv_s16.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** eorv_x0_s16: ++** eorv h([0-9]+), p0, z0\.h ++** umov w0, v\1\.h\[0\] ++** ret ++*/ ++TEST_REDUCTION_X (eorv_x0_s16, int16_t, svint16_t, ++ x0 = sveorv_s16 (p0, z0), ++ x0 = sveorv (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eorv_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eorv_s32.c +new file mode 100644 +index 000000000..9c0c1089f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eorv_s32.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** eorv_x0_s32: ++** eorv (s[0-9]+), p0, z0\.s ++** fmov w0, \1 ++** ret ++*/ ++TEST_REDUCTION_X (eorv_x0_s32, int32_t, svint32_t, ++ x0 = sveorv_s32 (p0, z0), ++ x0 = sveorv (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eorv_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eorv_s64.c +new file mode 100644 +index 000000000..7a474556c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eorv_s64.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** eorv_x0_s64: ++** eorv (d[0-9]+), p0, z0\.d ++** fmov x0, \1 ++** ret ++*/ ++TEST_REDUCTION_X (eorv_x0_s64, int64_t, svint64_t, ++ x0 = sveorv_s64 (p0, z0), ++ x0 = sveorv (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eorv_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eorv_s8.c +new file mode 100644 +index 000000000..43f056d3a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eorv_s8.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** eorv_x0_s8: ++** eorv b([0-9]+), p0, z0\.b ++** umov w0, v\1\.b\[0\] ++** ret ++*/ ++TEST_REDUCTION_X (eorv_x0_s8, int8_t, svint8_t, ++ x0 = sveorv_s8 (p0, z0), ++ x0 = sveorv (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eorv_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eorv_u16.c +new file mode 100644 +index 000000000..5f7836db4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eorv_u16.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** eorv_x0_u16: ++** eorv h([0-9]+), p0, z0\.h ++** umov w0, v\1\.h\[0\] ++** ret ++*/ ++TEST_REDUCTION_X (eorv_x0_u16, uint16_t, svuint16_t, ++ x0 = sveorv_u16 (p0, z0), ++ x0 = sveorv (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eorv_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eorv_u32.c +new file mode 100644 +index 000000000..f112a0dc2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eorv_u32.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** eorv_x0_u32: ++** eorv (s[0-9]+), p0, z0\.s ++** fmov w0, \1 ++** ret ++*/ ++TEST_REDUCTION_X (eorv_x0_u32, uint32_t, svuint32_t, ++ x0 = sveorv_u32 (p0, z0), ++ x0 = sveorv (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eorv_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eorv_u64.c +new file mode 100644 +index 000000000..5f8b8f86b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eorv_u64.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** eorv_x0_u64: ++** eorv (d[0-9]+), p0, z0\.d ++** fmov x0, \1 ++** ret ++*/ ++TEST_REDUCTION_X (eorv_x0_u64, uint64_t, svuint64_t, ++ x0 = sveorv_u64 (p0, z0), ++ x0 = sveorv (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eorv_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eorv_u8.c +new file mode 100644 +index 000000000..eed4d4915 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eorv_u8.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** eorv_x0_u8: ++** eorv b([0-9]+), p0, z0\.b ++** umov w0, v\1\.b\[0\] ++** ret ++*/ ++TEST_REDUCTION_X (eorv_x0_u8, uint8_t, svuint8_t, ++ x0 = sveorv_u8 (p0, z0), ++ x0 = sveorv (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/expa_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/expa_f16.c +new file mode 100644 +index 000000000..5a5411e46 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/expa_f16.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** expa_f16_tied1: ++** fexpa z0\.h, z0\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (expa_f16_tied1, svfloat16_t, svuint16_t, ++ z0_res = svexpa_f16 (z0), ++ z0_res = svexpa (z0)) ++ ++/* ++** expa_f16_untied: ++** fexpa z0\.h, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (expa_f16_untied, svfloat16_t, svuint16_t, ++ z0 = svexpa_f16 (z4), ++ z0 = svexpa (z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/expa_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/expa_f32.c +new file mode 100644 +index 000000000..4ded1c575 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/expa_f32.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** expa_f32_tied1: ++** fexpa z0\.s, z0\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (expa_f32_tied1, svfloat32_t, svuint32_t, ++ z0_res = svexpa_f32 (z0), ++ z0_res = svexpa (z0)) ++ ++/* ++** expa_f32_untied: ++** fexpa z0\.s, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (expa_f32_untied, svfloat32_t, svuint32_t, ++ z0 = svexpa_f32 (z4), ++ z0 = svexpa (z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/expa_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/expa_f64.c +new file mode 100644 +index 000000000..c31f9ccb5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/expa_f64.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** expa_f64_tied1: ++** fexpa z0\.d, z0\.d ++** ret ++*/ ++TEST_DUAL_Z_REV (expa_f64_tied1, svfloat64_t, svuint64_t, ++ z0_res = svexpa_f64 (z0), ++ z0_res = svexpa (z0)) ++ ++/* ++** expa_f64_untied: ++** fexpa z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (expa_f64_untied, svfloat64_t, svuint64_t, ++ z0 = svexpa_f64 (z4), ++ z0 = svexpa (z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_bf16.c +new file mode 100644 +index 000000000..f982873c4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_bf16.c +@@ -0,0 +1,73 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ext_0_bf16_tied1: ++** ext z0\.b, z0\.b, z1\.b, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_0_bf16_tied1, svbfloat16_t, ++ z0 = svext_bf16 (z0, z1, 0), ++ z0 = svext (z0, z1, 0)) ++ ++/* ++** ext_0_bf16_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, \1\.b, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_0_bf16_tied2, svbfloat16_t, ++ z0 = svext_bf16 (z1, z0, 0), ++ z0 = svext (z1, z0, 0)) ++ ++/* ++** ext_0_bf16_untied: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_0_bf16_untied, svbfloat16_t, ++ z0 = svext_bf16 (z1, z2, 0), ++ z0 = svext (z1, z2, 0)) ++ ++/* ++** ext_1_bf16: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #2 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_1_bf16, svbfloat16_t, ++ z0 = svext_bf16 (z1, z2, 1), ++ z0 = svext (z1, z2, 1)) ++ ++/* ++** ext_2_bf16: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #4 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_2_bf16, svbfloat16_t, ++ z0 = svext_bf16 (z1, z2, 2), ++ z0 = svext (z1, z2, 2)) ++ ++/* ++** ext_3_bf16: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #6 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_3_bf16, svbfloat16_t, ++ z0 = svext_bf16 (z1, z2, 3), ++ z0 = svext (z1, z2, 3)) ++ ++/* ++** ext_127_bf16: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #254 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_127_bf16, svbfloat16_t, ++ z0 = svext_bf16 (z1, z2, 127), ++ z0 = svext (z1, z2, 127)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_f16.c +new file mode 100644 +index 000000000..d8edccb9f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_f16.c +@@ -0,0 +1,73 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ext_0_f16_tied1: ++** ext z0\.b, z0\.b, z1\.b, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_0_f16_tied1, svfloat16_t, ++ z0 = svext_f16 (z0, z1, 0), ++ z0 = svext (z0, z1, 0)) ++ ++/* ++** ext_0_f16_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, \1\.b, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_0_f16_tied2, svfloat16_t, ++ z0 = svext_f16 (z1, z0, 0), ++ z0 = svext (z1, z0, 0)) ++ ++/* ++** ext_0_f16_untied: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_0_f16_untied, svfloat16_t, ++ z0 = svext_f16 (z1, z2, 0), ++ z0 = svext (z1, z2, 0)) ++ ++/* ++** ext_1_f16: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #2 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_1_f16, svfloat16_t, ++ z0 = svext_f16 (z1, z2, 1), ++ z0 = svext (z1, z2, 1)) ++ ++/* ++** ext_2_f16: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #4 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_2_f16, svfloat16_t, ++ z0 = svext_f16 (z1, z2, 2), ++ z0 = svext (z1, z2, 2)) ++ ++/* ++** ext_3_f16: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #6 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_3_f16, svfloat16_t, ++ z0 = svext_f16 (z1, z2, 3), ++ z0 = svext (z1, z2, 3)) ++ ++/* ++** ext_127_f16: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #254 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_127_f16, svfloat16_t, ++ z0 = svext_f16 (z1, z2, 127), ++ z0 = svext (z1, z2, 127)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_f32.c +new file mode 100644 +index 000000000..c00ea06fb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_f32.c +@@ -0,0 +1,73 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ext_0_f32_tied1: ++** ext z0\.b, z0\.b, z1\.b, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_0_f32_tied1, svfloat32_t, ++ z0 = svext_f32 (z0, z1, 0), ++ z0 = svext (z0, z1, 0)) ++ ++/* ++** ext_0_f32_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, \1\.b, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_0_f32_tied2, svfloat32_t, ++ z0 = svext_f32 (z1, z0, 0), ++ z0 = svext (z1, z0, 0)) ++ ++/* ++** ext_0_f32_untied: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_0_f32_untied, svfloat32_t, ++ z0 = svext_f32 (z1, z2, 0), ++ z0 = svext (z1, z2, 0)) ++ ++/* ++** ext_1_f32: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #4 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_1_f32, svfloat32_t, ++ z0 = svext_f32 (z1, z2, 1), ++ z0 = svext (z1, z2, 1)) ++ ++/* ++** ext_2_f32: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #8 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_2_f32, svfloat32_t, ++ z0 = svext_f32 (z1, z2, 2), ++ z0 = svext (z1, z2, 2)) ++ ++/* ++** ext_3_f32: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #12 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_3_f32, svfloat32_t, ++ z0 = svext_f32 (z1, z2, 3), ++ z0 = svext (z1, z2, 3)) ++ ++/* ++** ext_63_f32: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #252 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_63_f32, svfloat32_t, ++ z0 = svext_f32 (z1, z2, 63), ++ z0 = svext (z1, z2, 63)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_f64.c +new file mode 100644 +index 000000000..af72870ca +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_f64.c +@@ -0,0 +1,73 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ext_0_f64_tied1: ++** ext z0\.b, z0\.b, z1\.b, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_0_f64_tied1, svfloat64_t, ++ z0 = svext_f64 (z0, z1, 0), ++ z0 = svext (z0, z1, 0)) ++ ++/* ++** ext_0_f64_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, \1\.b, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_0_f64_tied2, svfloat64_t, ++ z0 = svext_f64 (z1, z0, 0), ++ z0 = svext (z1, z0, 0)) ++ ++/* ++** ext_0_f64_untied: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_0_f64_untied, svfloat64_t, ++ z0 = svext_f64 (z1, z2, 0), ++ z0 = svext (z1, z2, 0)) ++ ++/* ++** ext_1_f64: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #8 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_1_f64, svfloat64_t, ++ z0 = svext_f64 (z1, z2, 1), ++ z0 = svext (z1, z2, 1)) ++ ++/* ++** ext_2_f64: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #16 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_2_f64, svfloat64_t, ++ z0 = svext_f64 (z1, z2, 2), ++ z0 = svext (z1, z2, 2)) ++ ++/* ++** ext_3_f64: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #24 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_3_f64, svfloat64_t, ++ z0 = svext_f64 (z1, z2, 3), ++ z0 = svext (z1, z2, 3)) ++ ++/* ++** ext_31_f64: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #248 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_31_f64, svfloat64_t, ++ z0 = svext_f64 (z1, z2, 31), ++ z0 = svext (z1, z2, 31)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_s16.c +new file mode 100644 +index 000000000..a7c4484ac +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_s16.c +@@ -0,0 +1,73 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ext_0_s16_tied1: ++** ext z0\.b, z0\.b, z1\.b, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_0_s16_tied1, svint16_t, ++ z0 = svext_s16 (z0, z1, 0), ++ z0 = svext (z0, z1, 0)) ++ ++/* ++** ext_0_s16_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, \1\.b, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_0_s16_tied2, svint16_t, ++ z0 = svext_s16 (z1, z0, 0), ++ z0 = svext (z1, z0, 0)) ++ ++/* ++** ext_0_s16_untied: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_0_s16_untied, svint16_t, ++ z0 = svext_s16 (z1, z2, 0), ++ z0 = svext (z1, z2, 0)) ++ ++/* ++** ext_1_s16: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #2 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_1_s16, svint16_t, ++ z0 = svext_s16 (z1, z2, 1), ++ z0 = svext (z1, z2, 1)) ++ ++/* ++** ext_2_s16: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #4 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_2_s16, svint16_t, ++ z0 = svext_s16 (z1, z2, 2), ++ z0 = svext (z1, z2, 2)) ++ ++/* ++** ext_3_s16: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #6 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_3_s16, svint16_t, ++ z0 = svext_s16 (z1, z2, 3), ++ z0 = svext (z1, z2, 3)) ++ ++/* ++** ext_127_s16: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #254 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_127_s16, svint16_t, ++ z0 = svext_s16 (z1, z2, 127), ++ z0 = svext (z1, z2, 127)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_s32.c +new file mode 100644 +index 000000000..68242a9ec +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_s32.c +@@ -0,0 +1,73 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ext_0_s32_tied1: ++** ext z0\.b, z0\.b, z1\.b, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_0_s32_tied1, svint32_t, ++ z0 = svext_s32 (z0, z1, 0), ++ z0 = svext (z0, z1, 0)) ++ ++/* ++** ext_0_s32_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, \1\.b, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_0_s32_tied2, svint32_t, ++ z0 = svext_s32 (z1, z0, 0), ++ z0 = svext (z1, z0, 0)) ++ ++/* ++** ext_0_s32_untied: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_0_s32_untied, svint32_t, ++ z0 = svext_s32 (z1, z2, 0), ++ z0 = svext (z1, z2, 0)) ++ ++/* ++** ext_1_s32: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #4 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_1_s32, svint32_t, ++ z0 = svext_s32 (z1, z2, 1), ++ z0 = svext (z1, z2, 1)) ++ ++/* ++** ext_2_s32: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #8 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_2_s32, svint32_t, ++ z0 = svext_s32 (z1, z2, 2), ++ z0 = svext (z1, z2, 2)) ++ ++/* ++** ext_3_s32: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #12 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_3_s32, svint32_t, ++ z0 = svext_s32 (z1, z2, 3), ++ z0 = svext (z1, z2, 3)) ++ ++/* ++** ext_63_s32: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #252 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_63_s32, svint32_t, ++ z0 = svext_s32 (z1, z2, 63), ++ z0 = svext (z1, z2, 63)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_s64.c +new file mode 100644 +index 000000000..8bdbd0561 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_s64.c +@@ -0,0 +1,73 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ext_0_s64_tied1: ++** ext z0\.b, z0\.b, z1\.b, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_0_s64_tied1, svint64_t, ++ z0 = svext_s64 (z0, z1, 0), ++ z0 = svext (z0, z1, 0)) ++ ++/* ++** ext_0_s64_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, \1\.b, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_0_s64_tied2, svint64_t, ++ z0 = svext_s64 (z1, z0, 0), ++ z0 = svext (z1, z0, 0)) ++ ++/* ++** ext_0_s64_untied: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_0_s64_untied, svint64_t, ++ z0 = svext_s64 (z1, z2, 0), ++ z0 = svext (z1, z2, 0)) ++ ++/* ++** ext_1_s64: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #8 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_1_s64, svint64_t, ++ z0 = svext_s64 (z1, z2, 1), ++ z0 = svext (z1, z2, 1)) ++ ++/* ++** ext_2_s64: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #16 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_2_s64, svint64_t, ++ z0 = svext_s64 (z1, z2, 2), ++ z0 = svext (z1, z2, 2)) ++ ++/* ++** ext_3_s64: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #24 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_3_s64, svint64_t, ++ z0 = svext_s64 (z1, z2, 3), ++ z0 = svext (z1, z2, 3)) ++ ++/* ++** ext_31_s64: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #248 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_31_s64, svint64_t, ++ z0 = svext_s64 (z1, z2, 31), ++ z0 = svext (z1, z2, 31)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_s8.c +new file mode 100644 +index 000000000..52490f00e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_s8.c +@@ -0,0 +1,73 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ext_0_s8_tied1: ++** ext z0\.b, z0\.b, z1\.b, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_0_s8_tied1, svint8_t, ++ z0 = svext_s8 (z0, z1, 0), ++ z0 = svext (z0, z1, 0)) ++ ++/* ++** ext_0_s8_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, \1\.b, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_0_s8_tied2, svint8_t, ++ z0 = svext_s8 (z1, z0, 0), ++ z0 = svext (z1, z0, 0)) ++ ++/* ++** ext_0_s8_untied: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_0_s8_untied, svint8_t, ++ z0 = svext_s8 (z1, z2, 0), ++ z0 = svext (z1, z2, 0)) ++ ++/* ++** ext_1_s8: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_1_s8, svint8_t, ++ z0 = svext_s8 (z1, z2, 1), ++ z0 = svext (z1, z2, 1)) ++ ++/* ++** ext_2_s8: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #2 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_2_s8, svint8_t, ++ z0 = svext_s8 (z1, z2, 2), ++ z0 = svext (z1, z2, 2)) ++ ++/* ++** ext_3_s8: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #3 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_3_s8, svint8_t, ++ z0 = svext_s8 (z1, z2, 3), ++ z0 = svext (z1, z2, 3)) ++ ++/* ++** ext_255_s8: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_255_s8, svint8_t, ++ z0 = svext_s8 (z1, z2, 255), ++ z0 = svext (z1, z2, 255)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_u16.c +new file mode 100644 +index 000000000..dc7574ffa +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_u16.c +@@ -0,0 +1,73 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ext_0_u16_tied1: ++** ext z0\.b, z0\.b, z1\.b, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_0_u16_tied1, svuint16_t, ++ z0 = svext_u16 (z0, z1, 0), ++ z0 = svext (z0, z1, 0)) ++ ++/* ++** ext_0_u16_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, \1\.b, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_0_u16_tied2, svuint16_t, ++ z0 = svext_u16 (z1, z0, 0), ++ z0 = svext (z1, z0, 0)) ++ ++/* ++** ext_0_u16_untied: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_0_u16_untied, svuint16_t, ++ z0 = svext_u16 (z1, z2, 0), ++ z0 = svext (z1, z2, 0)) ++ ++/* ++** ext_1_u16: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #2 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_1_u16, svuint16_t, ++ z0 = svext_u16 (z1, z2, 1), ++ z0 = svext (z1, z2, 1)) ++ ++/* ++** ext_2_u16: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #4 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_2_u16, svuint16_t, ++ z0 = svext_u16 (z1, z2, 2), ++ z0 = svext (z1, z2, 2)) ++ ++/* ++** ext_3_u16: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #6 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_3_u16, svuint16_t, ++ z0 = svext_u16 (z1, z2, 3), ++ z0 = svext (z1, z2, 3)) ++ ++/* ++** ext_127_u16: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #254 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_127_u16, svuint16_t, ++ z0 = svext_u16 (z1, z2, 127), ++ z0 = svext (z1, z2, 127)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_u32.c +new file mode 100644 +index 000000000..0d417fc43 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_u32.c +@@ -0,0 +1,73 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ext_0_u32_tied1: ++** ext z0\.b, z0\.b, z1\.b, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_0_u32_tied1, svuint32_t, ++ z0 = svext_u32 (z0, z1, 0), ++ z0 = svext (z0, z1, 0)) ++ ++/* ++** ext_0_u32_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, \1\.b, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_0_u32_tied2, svuint32_t, ++ z0 = svext_u32 (z1, z0, 0), ++ z0 = svext (z1, z0, 0)) ++ ++/* ++** ext_0_u32_untied: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_0_u32_untied, svuint32_t, ++ z0 = svext_u32 (z1, z2, 0), ++ z0 = svext (z1, z2, 0)) ++ ++/* ++** ext_1_u32: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #4 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_1_u32, svuint32_t, ++ z0 = svext_u32 (z1, z2, 1), ++ z0 = svext (z1, z2, 1)) ++ ++/* ++** ext_2_u32: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #8 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_2_u32, svuint32_t, ++ z0 = svext_u32 (z1, z2, 2), ++ z0 = svext (z1, z2, 2)) ++ ++/* ++** ext_3_u32: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #12 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_3_u32, svuint32_t, ++ z0 = svext_u32 (z1, z2, 3), ++ z0 = svext (z1, z2, 3)) ++ ++/* ++** ext_63_u32: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #252 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_63_u32, svuint32_t, ++ z0 = svext_u32 (z1, z2, 63), ++ z0 = svext (z1, z2, 63)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_u64.c +new file mode 100644 +index 000000000..ed81f811e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_u64.c +@@ -0,0 +1,73 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ext_0_u64_tied1: ++** ext z0\.b, z0\.b, z1\.b, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_0_u64_tied1, svuint64_t, ++ z0 = svext_u64 (z0, z1, 0), ++ z0 = svext (z0, z1, 0)) ++ ++/* ++** ext_0_u64_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, \1\.b, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_0_u64_tied2, svuint64_t, ++ z0 = svext_u64 (z1, z0, 0), ++ z0 = svext (z1, z0, 0)) ++ ++/* ++** ext_0_u64_untied: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_0_u64_untied, svuint64_t, ++ z0 = svext_u64 (z1, z2, 0), ++ z0 = svext (z1, z2, 0)) ++ ++/* ++** ext_1_u64: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #8 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_1_u64, svuint64_t, ++ z0 = svext_u64 (z1, z2, 1), ++ z0 = svext (z1, z2, 1)) ++ ++/* ++** ext_2_u64: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #16 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_2_u64, svuint64_t, ++ z0 = svext_u64 (z1, z2, 2), ++ z0 = svext (z1, z2, 2)) ++ ++/* ++** ext_3_u64: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #24 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_3_u64, svuint64_t, ++ z0 = svext_u64 (z1, z2, 3), ++ z0 = svext (z1, z2, 3)) ++ ++/* ++** ext_31_u64: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #248 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_31_u64, svuint64_t, ++ z0 = svext_u64 (z1, z2, 31), ++ z0 = svext (z1, z2, 31)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_u8.c +new file mode 100644 +index 000000000..6c061406b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_u8.c +@@ -0,0 +1,73 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ext_0_u8_tied1: ++** ext z0\.b, z0\.b, z1\.b, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_0_u8_tied1, svuint8_t, ++ z0 = svext_u8 (z0, z1, 0), ++ z0 = svext (z0, z1, 0)) ++ ++/* ++** ext_0_u8_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, \1\.b, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_0_u8_tied2, svuint8_t, ++ z0 = svext_u8 (z1, z0, 0), ++ z0 = svext (z1, z0, 0)) ++ ++/* ++** ext_0_u8_untied: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_0_u8_untied, svuint8_t, ++ z0 = svext_u8 (z1, z2, 0), ++ z0 = svext (z1, z2, 0)) ++ ++/* ++** ext_1_u8: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_1_u8, svuint8_t, ++ z0 = svext_u8 (z1, z2, 1), ++ z0 = svext (z1, z2, 1)) ++ ++/* ++** ext_2_u8: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #2 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_2_u8, svuint8_t, ++ z0 = svext_u8 (z1, z2, 2), ++ z0 = svext (z1, z2, 2)) ++ ++/* ++** ext_3_u8: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #3 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_3_u8, svuint8_t, ++ z0 = svext_u8 (z1, z2, 3), ++ z0 = svext (z1, z2, 3)) ++ ++/* ++** ext_255_u8: ++** movprfx z0, z1 ++** ext z0\.b, z0\.b, z2\.b, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (ext_255_u8, svuint8_t, ++ z0 = svext_u8 (z1, z2, 255), ++ z0 = svext (z1, z2, 255)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/extb_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/extb_s16.c +new file mode 100644 +index 000000000..32e836f01 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/extb_s16.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** extb_s16_m_tied12: ++** sxtb z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (extb_s16_m_tied12, svint16_t, ++ z0 = svextb_s16_m (z0, p0, z0), ++ z0 = svextb_m (z0, p0, z0)) ++ ++/* ++** extb_s16_m_tied1: ++** sxtb z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (extb_s16_m_tied1, svint16_t, ++ z0 = svextb_s16_m (z0, p0, z1), ++ z0 = svextb_m (z0, p0, z1)) ++ ++/* ++** extb_s16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** sxtb z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (extb_s16_m_tied2, svint16_t, ++ z0 = svextb_s16_m (z1, p0, z0), ++ z0 = svextb_m (z1, p0, z0)) ++ ++/* ++** extb_s16_m_untied: ++** movprfx z0, z2 ++** sxtb z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (extb_s16_m_untied, svint16_t, ++ z0 = svextb_s16_m (z2, p0, z1), ++ z0 = svextb_m (z2, p0, z1)) ++ ++/* ++** extb_s16_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.h, p0/z, \1\.h ++** sxtb z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (extb_s16_z_tied1, svint16_t, ++ z0 = svextb_s16_z (p0, z0), ++ z0 = svextb_z (p0, z0)) ++ ++/* ++** extb_s16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** sxtb z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (extb_s16_z_untied, svint16_t, ++ z0 = svextb_s16_z (p0, z1), ++ z0 = svextb_z (p0, z1)) ++ ++/* ++** extb_s16_x_tied1: ++** sxtb z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (extb_s16_x_tied1, svint16_t, ++ z0 = svextb_s16_x (p0, z0), ++ z0 = svextb_x (p0, z0)) ++ ++/* ++** extb_s16_x_untied: ++** sxtb z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (extb_s16_x_untied, svint16_t, ++ z0 = svextb_s16_x (p0, z1), ++ z0 = svextb_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/extb_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/extb_s32.c +new file mode 100644 +index 000000000..e2f13f41c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/extb_s32.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** extb_s32_m_tied12: ++** sxtb z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (extb_s32_m_tied12, svint32_t, ++ z0 = svextb_s32_m (z0, p0, z0), ++ z0 = svextb_m (z0, p0, z0)) ++ ++/* ++** extb_s32_m_tied1: ++** sxtb z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (extb_s32_m_tied1, svint32_t, ++ z0 = svextb_s32_m (z0, p0, z1), ++ z0 = svextb_m (z0, p0, z1)) ++ ++/* ++** extb_s32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** sxtb z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (extb_s32_m_tied2, svint32_t, ++ z0 = svextb_s32_m (z1, p0, z0), ++ z0 = svextb_m (z1, p0, z0)) ++ ++/* ++** extb_s32_m_untied: ++** movprfx z0, z2 ++** sxtb z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (extb_s32_m_untied, svint32_t, ++ z0 = svextb_s32_m (z2, p0, z1), ++ z0 = svextb_m (z2, p0, z1)) ++ ++/* ++** extb_s32_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, \1\.s ++** sxtb z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (extb_s32_z_tied1, svint32_t, ++ z0 = svextb_s32_z (p0, z0), ++ z0 = svextb_z (p0, z0)) ++ ++/* ++** extb_s32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** sxtb z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (extb_s32_z_untied, svint32_t, ++ z0 = svextb_s32_z (p0, z1), ++ z0 = svextb_z (p0, z1)) ++ ++/* ++** extb_s32_x_tied1: ++** sxtb z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (extb_s32_x_tied1, svint32_t, ++ z0 = svextb_s32_x (p0, z0), ++ z0 = svextb_x (p0, z0)) ++ ++/* ++** extb_s32_x_untied: ++** sxtb z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (extb_s32_x_untied, svint32_t, ++ z0 = svextb_s32_x (p0, z1), ++ z0 = svextb_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/extb_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/extb_s64.c +new file mode 100644 +index 000000000..83363efdb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/extb_s64.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** extb_s64_m_tied12: ++** sxtb z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (extb_s64_m_tied12, svint64_t, ++ z0 = svextb_s64_m (z0, p0, z0), ++ z0 = svextb_m (z0, p0, z0)) ++ ++/* ++** extb_s64_m_tied1: ++** sxtb z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (extb_s64_m_tied1, svint64_t, ++ z0 = svextb_s64_m (z0, p0, z1), ++ z0 = svextb_m (z0, p0, z1)) ++ ++/* ++** extb_s64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** sxtb z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (extb_s64_m_tied2, svint64_t, ++ z0 = svextb_s64_m (z1, p0, z0), ++ z0 = svextb_m (z1, p0, z0)) ++ ++/* ++** extb_s64_m_untied: ++** movprfx z0, z2 ++** sxtb z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (extb_s64_m_untied, svint64_t, ++ z0 = svextb_s64_m (z2, p0, z1), ++ z0 = svextb_m (z2, p0, z1)) ++ ++/* ++** extb_s64_z_tied1: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, \1 ++** sxtb z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (extb_s64_z_tied1, svint64_t, ++ z0 = svextb_s64_z (p0, z0), ++ z0 = svextb_z (p0, z0)) ++ ++/* ++** extb_s64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** sxtb z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (extb_s64_z_untied, svint64_t, ++ z0 = svextb_s64_z (p0, z1), ++ z0 = svextb_z (p0, z1)) ++ ++/* ++** extb_s64_x_tied1: ++** sxtb z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (extb_s64_x_tied1, svint64_t, ++ z0 = svextb_s64_x (p0, z0), ++ z0 = svextb_x (p0, z0)) ++ ++/* ++** extb_s64_x_untied: ++** sxtb z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (extb_s64_x_untied, svint64_t, ++ z0 = svextb_s64_x (p0, z1), ++ z0 = svextb_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/extb_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/extb_u16.c +new file mode 100644 +index 000000000..d806edfaa +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/extb_u16.c +@@ -0,0 +1,82 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** extb_u16_m_tied12: ++** uxtb z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (extb_u16_m_tied12, svuint16_t, ++ z0 = svextb_u16_m (z0, p0, z0), ++ z0 = svextb_m (z0, p0, z0)) ++ ++/* ++** extb_u16_m_tied1: ++** uxtb z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (extb_u16_m_tied1, svuint16_t, ++ z0 = svextb_u16_m (z0, p0, z1), ++ z0 = svextb_m (z0, p0, z1)) ++ ++/* ++** extb_u16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** uxtb z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (extb_u16_m_tied2, svuint16_t, ++ z0 = svextb_u16_m (z1, p0, z0), ++ z0 = svextb_m (z1, p0, z0)) ++ ++/* ++** extb_u16_m_untied: ++** movprfx z0, z2 ++** uxtb z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (extb_u16_m_untied, svuint16_t, ++ z0 = svextb_u16_m (z2, p0, z1), ++ z0 = svextb_m (z2, p0, z1)) ++ ++/* ++** extb_u16_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.h, p0/z, \1\.h ++** uxtb z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (extb_u16_z_tied1, svuint16_t, ++ z0 = svextb_u16_z (p0, z0), ++ z0 = svextb_z (p0, z0)) ++ ++/* ++** extb_u16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** uxtb z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (extb_u16_z_untied, svuint16_t, ++ z0 = svextb_u16_z (p0, z1), ++ z0 = svextb_z (p0, z1)) ++ ++/* ++** extb_u16_x_tied1: ++** and z0\.h, z0\.h, #0xff ++** ret ++*/ ++TEST_UNIFORM_Z (extb_u16_x_tied1, svuint16_t, ++ z0 = svextb_u16_x (p0, z0), ++ z0 = svextb_x (p0, z0)) ++ ++/* ++** extb_u16_x_untied: ++** movprfx z0, z1 ++** and z0\.h, z0\.h, #0xff ++** ret ++*/ ++TEST_UNIFORM_Z (extb_u16_x_untied, svuint16_t, ++ z0 = svextb_u16_x (p0, z1), ++ z0 = svextb_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/extb_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/extb_u32.c +new file mode 100644 +index 000000000..274656dbd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/extb_u32.c +@@ -0,0 +1,82 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** extb_u32_m_tied12: ++** uxtb z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (extb_u32_m_tied12, svuint32_t, ++ z0 = svextb_u32_m (z0, p0, z0), ++ z0 = svextb_m (z0, p0, z0)) ++ ++/* ++** extb_u32_m_tied1: ++** uxtb z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (extb_u32_m_tied1, svuint32_t, ++ z0 = svextb_u32_m (z0, p0, z1), ++ z0 = svextb_m (z0, p0, z1)) ++ ++/* ++** extb_u32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** uxtb z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (extb_u32_m_tied2, svuint32_t, ++ z0 = svextb_u32_m (z1, p0, z0), ++ z0 = svextb_m (z1, p0, z0)) ++ ++/* ++** extb_u32_m_untied: ++** movprfx z0, z2 ++** uxtb z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (extb_u32_m_untied, svuint32_t, ++ z0 = svextb_u32_m (z2, p0, z1), ++ z0 = svextb_m (z2, p0, z1)) ++ ++/* ++** extb_u32_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, \1\.s ++** uxtb z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (extb_u32_z_tied1, svuint32_t, ++ z0 = svextb_u32_z (p0, z0), ++ z0 = svextb_z (p0, z0)) ++ ++/* ++** extb_u32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** uxtb z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (extb_u32_z_untied, svuint32_t, ++ z0 = svextb_u32_z (p0, z1), ++ z0 = svextb_z (p0, z1)) ++ ++/* ++** extb_u32_x_tied1: ++** and z0\.s, z0\.s, #0xff ++** ret ++*/ ++TEST_UNIFORM_Z (extb_u32_x_tied1, svuint32_t, ++ z0 = svextb_u32_x (p0, z0), ++ z0 = svextb_x (p0, z0)) ++ ++/* ++** extb_u32_x_untied: ++** movprfx z0, z1 ++** and z0\.s, z0\.s, #0xff ++** ret ++*/ ++TEST_UNIFORM_Z (extb_u32_x_untied, svuint32_t, ++ z0 = svextb_u32_x (p0, z1), ++ z0 = svextb_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/extb_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/extb_u64.c +new file mode 100644 +index 000000000..de24cc605 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/extb_u64.c +@@ -0,0 +1,82 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** extb_u64_m_tied12: ++** uxtb z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (extb_u64_m_tied12, svuint64_t, ++ z0 = svextb_u64_m (z0, p0, z0), ++ z0 = svextb_m (z0, p0, z0)) ++ ++/* ++** extb_u64_m_tied1: ++** uxtb z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (extb_u64_m_tied1, svuint64_t, ++ z0 = svextb_u64_m (z0, p0, z1), ++ z0 = svextb_m (z0, p0, z1)) ++ ++/* ++** extb_u64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** uxtb z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (extb_u64_m_tied2, svuint64_t, ++ z0 = svextb_u64_m (z1, p0, z0), ++ z0 = svextb_m (z1, p0, z0)) ++ ++/* ++** extb_u64_m_untied: ++** movprfx z0, z2 ++** uxtb z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (extb_u64_m_untied, svuint64_t, ++ z0 = svextb_u64_m (z2, p0, z1), ++ z0 = svextb_m (z2, p0, z1)) ++ ++/* ++** extb_u64_z_tied1: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, \1 ++** uxtb z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (extb_u64_z_tied1, svuint64_t, ++ z0 = svextb_u64_z (p0, z0), ++ z0 = svextb_z (p0, z0)) ++ ++/* ++** extb_u64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** uxtb z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (extb_u64_z_untied, svuint64_t, ++ z0 = svextb_u64_z (p0, z1), ++ z0 = svextb_z (p0, z1)) ++ ++/* ++** extb_u64_x_tied1: ++** and z0\.d, z0\.d, #0xff ++** ret ++*/ ++TEST_UNIFORM_Z (extb_u64_x_tied1, svuint64_t, ++ z0 = svextb_u64_x (p0, z0), ++ z0 = svextb_x (p0, z0)) ++ ++/* ++** extb_u64_x_untied: ++** movprfx z0, z1 ++** and z0\.d, z0\.d, #0xff ++** ret ++*/ ++TEST_UNIFORM_Z (extb_u64_x_untied, svuint64_t, ++ z0 = svextb_u64_x (p0, z1), ++ z0 = svextb_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/exth_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/exth_s32.c +new file mode 100644 +index 000000000..3bb0bf31f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/exth_s32.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** exth_s32_m_tied12: ++** sxth z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (exth_s32_m_tied12, svint32_t, ++ z0 = svexth_s32_m (z0, p0, z0), ++ z0 = svexth_m (z0, p0, z0)) ++ ++/* ++** exth_s32_m_tied1: ++** sxth z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (exth_s32_m_tied1, svint32_t, ++ z0 = svexth_s32_m (z0, p0, z1), ++ z0 = svexth_m (z0, p0, z1)) ++ ++/* ++** exth_s32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** sxth z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (exth_s32_m_tied2, svint32_t, ++ z0 = svexth_s32_m (z1, p0, z0), ++ z0 = svexth_m (z1, p0, z0)) ++ ++/* ++** exth_s32_m_untied: ++** movprfx z0, z2 ++** sxth z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (exth_s32_m_untied, svint32_t, ++ z0 = svexth_s32_m (z2, p0, z1), ++ z0 = svexth_m (z2, p0, z1)) ++ ++/* ++** exth_s32_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, \1\.s ++** sxth z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (exth_s32_z_tied1, svint32_t, ++ z0 = svexth_s32_z (p0, z0), ++ z0 = svexth_z (p0, z0)) ++ ++/* ++** exth_s32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** sxth z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (exth_s32_z_untied, svint32_t, ++ z0 = svexth_s32_z (p0, z1), ++ z0 = svexth_z (p0, z1)) ++ ++/* ++** exth_s32_x_tied1: ++** sxth z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (exth_s32_x_tied1, svint32_t, ++ z0 = svexth_s32_x (p0, z0), ++ z0 = svexth_x (p0, z0)) ++ ++/* ++** exth_s32_x_untied: ++** sxth z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (exth_s32_x_untied, svint32_t, ++ z0 = svexth_s32_x (p0, z1), ++ z0 = svexth_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/exth_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/exth_s64.c +new file mode 100644 +index 000000000..0718b67ad +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/exth_s64.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** exth_s64_m_tied12: ++** sxth z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (exth_s64_m_tied12, svint64_t, ++ z0 = svexth_s64_m (z0, p0, z0), ++ z0 = svexth_m (z0, p0, z0)) ++ ++/* ++** exth_s64_m_tied1: ++** sxth z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (exth_s64_m_tied1, svint64_t, ++ z0 = svexth_s64_m (z0, p0, z1), ++ z0 = svexth_m (z0, p0, z1)) ++ ++/* ++** exth_s64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** sxth z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (exth_s64_m_tied2, svint64_t, ++ z0 = svexth_s64_m (z1, p0, z0), ++ z0 = svexth_m (z1, p0, z0)) ++ ++/* ++** exth_s64_m_untied: ++** movprfx z0, z2 ++** sxth z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (exth_s64_m_untied, svint64_t, ++ z0 = svexth_s64_m (z2, p0, z1), ++ z0 = svexth_m (z2, p0, z1)) ++ ++/* ++** exth_s64_z_tied1: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, \1 ++** sxth z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (exth_s64_z_tied1, svint64_t, ++ z0 = svexth_s64_z (p0, z0), ++ z0 = svexth_z (p0, z0)) ++ ++/* ++** exth_s64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** sxth z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (exth_s64_z_untied, svint64_t, ++ z0 = svexth_s64_z (p0, z1), ++ z0 = svexth_z (p0, z1)) ++ ++/* ++** exth_s64_x_tied1: ++** sxth z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (exth_s64_x_tied1, svint64_t, ++ z0 = svexth_s64_x (p0, z0), ++ z0 = svexth_x (p0, z0)) ++ ++/* ++** exth_s64_x_untied: ++** sxth z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (exth_s64_x_untied, svint64_t, ++ z0 = svexth_s64_x (p0, z1), ++ z0 = svexth_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/exth_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/exth_u32.c +new file mode 100644 +index 000000000..1ba7fc8c3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/exth_u32.c +@@ -0,0 +1,82 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** exth_u32_m_tied12: ++** uxth z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (exth_u32_m_tied12, svuint32_t, ++ z0 = svexth_u32_m (z0, p0, z0), ++ z0 = svexth_m (z0, p0, z0)) ++ ++/* ++** exth_u32_m_tied1: ++** uxth z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (exth_u32_m_tied1, svuint32_t, ++ z0 = svexth_u32_m (z0, p0, z1), ++ z0 = svexth_m (z0, p0, z1)) ++ ++/* ++** exth_u32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** uxth z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (exth_u32_m_tied2, svuint32_t, ++ z0 = svexth_u32_m (z1, p0, z0), ++ z0 = svexth_m (z1, p0, z0)) ++ ++/* ++** exth_u32_m_untied: ++** movprfx z0, z2 ++** uxth z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (exth_u32_m_untied, svuint32_t, ++ z0 = svexth_u32_m (z2, p0, z1), ++ z0 = svexth_m (z2, p0, z1)) ++ ++/* ++** exth_u32_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, \1\.s ++** uxth z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (exth_u32_z_tied1, svuint32_t, ++ z0 = svexth_u32_z (p0, z0), ++ z0 = svexth_z (p0, z0)) ++ ++/* ++** exth_u32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** uxth z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (exth_u32_z_untied, svuint32_t, ++ z0 = svexth_u32_z (p0, z1), ++ z0 = svexth_z (p0, z1)) ++ ++/* ++** exth_u32_x_tied1: ++** and z0\.s, z0\.s, #0xffff ++** ret ++*/ ++TEST_UNIFORM_Z (exth_u32_x_tied1, svuint32_t, ++ z0 = svexth_u32_x (p0, z0), ++ z0 = svexth_x (p0, z0)) ++ ++/* ++** exth_u32_x_untied: ++** movprfx z0, z1 ++** and z0\.s, z0\.s, #0xffff ++** ret ++*/ ++TEST_UNIFORM_Z (exth_u32_x_untied, svuint32_t, ++ z0 = svexth_u32_x (p0, z1), ++ z0 = svexth_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/exth_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/exth_u64.c +new file mode 100644 +index 000000000..1555cf0b7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/exth_u64.c +@@ -0,0 +1,82 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** exth_u64_m_tied12: ++** uxth z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (exth_u64_m_tied12, svuint64_t, ++ z0 = svexth_u64_m (z0, p0, z0), ++ z0 = svexth_m (z0, p0, z0)) ++ ++/* ++** exth_u64_m_tied1: ++** uxth z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (exth_u64_m_tied1, svuint64_t, ++ z0 = svexth_u64_m (z0, p0, z1), ++ z0 = svexth_m (z0, p0, z1)) ++ ++/* ++** exth_u64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** uxth z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (exth_u64_m_tied2, svuint64_t, ++ z0 = svexth_u64_m (z1, p0, z0), ++ z0 = svexth_m (z1, p0, z0)) ++ ++/* ++** exth_u64_m_untied: ++** movprfx z0, z2 ++** uxth z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (exth_u64_m_untied, svuint64_t, ++ z0 = svexth_u64_m (z2, p0, z1), ++ z0 = svexth_m (z2, p0, z1)) ++ ++/* ++** exth_u64_z_tied1: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, \1 ++** uxth z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (exth_u64_z_tied1, svuint64_t, ++ z0 = svexth_u64_z (p0, z0), ++ z0 = svexth_z (p0, z0)) ++ ++/* ++** exth_u64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** uxth z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (exth_u64_z_untied, svuint64_t, ++ z0 = svexth_u64_z (p0, z1), ++ z0 = svexth_z (p0, z1)) ++ ++/* ++** exth_u64_x_tied1: ++** and z0\.d, z0\.d, #0xffff ++** ret ++*/ ++TEST_UNIFORM_Z (exth_u64_x_tied1, svuint64_t, ++ z0 = svexth_u64_x (p0, z0), ++ z0 = svexth_x (p0, z0)) ++ ++/* ++** exth_u64_x_untied: ++** movprfx z0, z1 ++** and z0\.d, z0\.d, #0xffff ++** ret ++*/ ++TEST_UNIFORM_Z (exth_u64_x_untied, svuint64_t, ++ z0 = svexth_u64_x (p0, z1), ++ z0 = svexth_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/extw_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/extw_s64.c +new file mode 100644 +index 000000000..a6edadfa7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/extw_s64.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** extw_s64_m_tied12: ++** sxtw z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (extw_s64_m_tied12, svint64_t, ++ z0 = svextw_s64_m (z0, p0, z0), ++ z0 = svextw_m (z0, p0, z0)) ++ ++/* ++** extw_s64_m_tied1: ++** sxtw z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (extw_s64_m_tied1, svint64_t, ++ z0 = svextw_s64_m (z0, p0, z1), ++ z0 = svextw_m (z0, p0, z1)) ++ ++/* ++** extw_s64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** sxtw z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (extw_s64_m_tied2, svint64_t, ++ z0 = svextw_s64_m (z1, p0, z0), ++ z0 = svextw_m (z1, p0, z0)) ++ ++/* ++** extw_s64_m_untied: ++** movprfx z0, z2 ++** sxtw z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (extw_s64_m_untied, svint64_t, ++ z0 = svextw_s64_m (z2, p0, z1), ++ z0 = svextw_m (z2, p0, z1)) ++ ++/* ++** extw_s64_z_tied1: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, \1 ++** sxtw z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (extw_s64_z_tied1, svint64_t, ++ z0 = svextw_s64_z (p0, z0), ++ z0 = svextw_z (p0, z0)) ++ ++/* ++** extw_s64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** sxtw z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (extw_s64_z_untied, svint64_t, ++ z0 = svextw_s64_z (p0, z1), ++ z0 = svextw_z (p0, z1)) ++ ++/* ++** extw_s64_x_tied1: ++** sxtw z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (extw_s64_x_tied1, svint64_t, ++ z0 = svextw_s64_x (p0, z0), ++ z0 = svextw_x (p0, z0)) ++ ++/* ++** extw_s64_x_untied: ++** sxtw z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (extw_s64_x_untied, svint64_t, ++ z0 = svextw_s64_x (p0, z1), ++ z0 = svextw_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/extw_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/extw_u64.c +new file mode 100644 +index 000000000..880a287f3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/extw_u64.c +@@ -0,0 +1,82 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** extw_u64_m_tied12: ++** uxtw z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (extw_u64_m_tied12, svuint64_t, ++ z0 = svextw_u64_m (z0, p0, z0), ++ z0 = svextw_m (z0, p0, z0)) ++ ++/* ++** extw_u64_m_tied1: ++** uxtw z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (extw_u64_m_tied1, svuint64_t, ++ z0 = svextw_u64_m (z0, p0, z1), ++ z0 = svextw_m (z0, p0, z1)) ++ ++/* ++** extw_u64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** uxtw z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (extw_u64_m_tied2, svuint64_t, ++ z0 = svextw_u64_m (z1, p0, z0), ++ z0 = svextw_m (z1, p0, z0)) ++ ++/* ++** extw_u64_m_untied: ++** movprfx z0, z2 ++** uxtw z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (extw_u64_m_untied, svuint64_t, ++ z0 = svextw_u64_m (z2, p0, z1), ++ z0 = svextw_m (z2, p0, z1)) ++ ++/* ++** extw_u64_z_tied1: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, \1 ++** uxtw z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (extw_u64_z_tied1, svuint64_t, ++ z0 = svextw_u64_z (p0, z0), ++ z0 = svextw_z (p0, z0)) ++ ++/* ++** extw_u64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** uxtw z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (extw_u64_z_untied, svuint64_t, ++ z0 = svextw_u64_z (p0, z1), ++ z0 = svextw_z (p0, z1)) ++ ++/* ++** extw_u64_x_tied1: ++** and z0\.d, z0\.d, #0xffffffff ++** ret ++*/ ++TEST_UNIFORM_Z (extw_u64_x_tied1, svuint64_t, ++ z0 = svextw_u64_x (p0, z0), ++ z0 = svextw_x (p0, z0)) ++ ++/* ++** extw_u64_x_untied: ++** movprfx z0, z1 ++** and z0\.d, z0\.d, #0xffffffff ++** ret ++*/ ++TEST_UNIFORM_Z (extw_u64_x_untied, svuint64_t, ++ z0 = svextw_u64_x (p0, z1), ++ z0 = svextw_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_bf16.c +new file mode 100644 +index 000000000..6e5c773b5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_bf16.c +@@ -0,0 +1,55 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** get2_bf16_z0_0: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_GET (get2_bf16_z0_0, svbfloat16x2_t, svbfloat16_t, ++ z0 = svget2_bf16 (z4, 0), ++ z0 = svget2 (z4, 0)) ++ ++/* ++** get2_bf16_z0_1: ++** mov z0\.d, z5\.d ++** ret ++*/ ++TEST_GET (get2_bf16_z0_1, svbfloat16x2_t, svbfloat16_t, ++ z0 = svget2_bf16 (z4, 1), ++ z0 = svget2 (z4, 1)) ++ ++/* ++** get2_bf16_z4_0: ++** ret ++*/ ++TEST_GET (get2_bf16_z4_0, svbfloat16x2_t, svbfloat16_t, ++ z4_res = svget2_bf16 (z4, 0), ++ z4_res = svget2 (z4, 0)) ++ ++/* ++** get2_bf16_z4_1: ++** mov z4\.d, z5\.d ++** ret ++*/ ++TEST_GET (get2_bf16_z4_1, svbfloat16x2_t, svbfloat16_t, ++ z4_res = svget2_bf16 (z4, 1), ++ z4_res = svget2 (z4, 1)) ++ ++/* ++** get2_bf16_z5_0: ++** mov z5\.d, z4\.d ++** ret ++*/ ++TEST_GET (get2_bf16_z5_0, svbfloat16x2_t, svbfloat16_t, ++ z5_res = svget2_bf16 (z4, 0), ++ z5_res = svget2 (z4, 0)) ++ ++/* ++** get2_bf16_z5_1: ++** ret ++*/ ++TEST_GET (get2_bf16_z5_1, svbfloat16x2_t, svbfloat16_t, ++ z5_res = svget2_bf16 (z4, 1), ++ z5_res = svget2 (z4, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_f16.c +new file mode 100644 +index 000000000..9b6379e0b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_f16.c +@@ -0,0 +1,55 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** get2_f16_z0_0: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_GET (get2_f16_z0_0, svfloat16x2_t, svfloat16_t, ++ z0 = svget2_f16 (z4, 0), ++ z0 = svget2 (z4, 0)) ++ ++/* ++** get2_f16_z0_1: ++** mov z0\.d, z5\.d ++** ret ++*/ ++TEST_GET (get2_f16_z0_1, svfloat16x2_t, svfloat16_t, ++ z0 = svget2_f16 (z4, 1), ++ z0 = svget2 (z4, 1)) ++ ++/* ++** get2_f16_z4_0: ++** ret ++*/ ++TEST_GET (get2_f16_z4_0, svfloat16x2_t, svfloat16_t, ++ z4_res = svget2_f16 (z4, 0), ++ z4_res = svget2 (z4, 0)) ++ ++/* ++** get2_f16_z4_1: ++** mov z4\.d, z5\.d ++** ret ++*/ ++TEST_GET (get2_f16_z4_1, svfloat16x2_t, svfloat16_t, ++ z4_res = svget2_f16 (z4, 1), ++ z4_res = svget2 (z4, 1)) ++ ++/* ++** get2_f16_z5_0: ++** mov z5\.d, z4\.d ++** ret ++*/ ++TEST_GET (get2_f16_z5_0, svfloat16x2_t, svfloat16_t, ++ z5_res = svget2_f16 (z4, 0), ++ z5_res = svget2 (z4, 0)) ++ ++/* ++** get2_f16_z5_1: ++** ret ++*/ ++TEST_GET (get2_f16_z5_1, svfloat16x2_t, svfloat16_t, ++ z5_res = svget2_f16 (z4, 1), ++ z5_res = svget2 (z4, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_f32.c +new file mode 100644 +index 000000000..76080dc66 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_f32.c +@@ -0,0 +1,55 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** get2_f32_z0_0: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_GET (get2_f32_z0_0, svfloat32x2_t, svfloat32_t, ++ z0 = svget2_f32 (z4, 0), ++ z0 = svget2 (z4, 0)) ++ ++/* ++** get2_f32_z0_1: ++** mov z0\.d, z5\.d ++** ret ++*/ ++TEST_GET (get2_f32_z0_1, svfloat32x2_t, svfloat32_t, ++ z0 = svget2_f32 (z4, 1), ++ z0 = svget2 (z4, 1)) ++ ++/* ++** get2_f32_z4_0: ++** ret ++*/ ++TEST_GET (get2_f32_z4_0, svfloat32x2_t, svfloat32_t, ++ z4_res = svget2_f32 (z4, 0), ++ z4_res = svget2 (z4, 0)) ++ ++/* ++** get2_f32_z4_1: ++** mov z4\.d, z5\.d ++** ret ++*/ ++TEST_GET (get2_f32_z4_1, svfloat32x2_t, svfloat32_t, ++ z4_res = svget2_f32 (z4, 1), ++ z4_res = svget2 (z4, 1)) ++ ++/* ++** get2_f32_z5_0: ++** mov z5\.d, z4\.d ++** ret ++*/ ++TEST_GET (get2_f32_z5_0, svfloat32x2_t, svfloat32_t, ++ z5_res = svget2_f32 (z4, 0), ++ z5_res = svget2 (z4, 0)) ++ ++/* ++** get2_f32_z5_1: ++** ret ++*/ ++TEST_GET (get2_f32_z5_1, svfloat32x2_t, svfloat32_t, ++ z5_res = svget2_f32 (z4, 1), ++ z5_res = svget2 (z4, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_f64.c +new file mode 100644 +index 000000000..cabe6e7de +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_f64.c +@@ -0,0 +1,55 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** get2_f64_z0_0: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_GET (get2_f64_z0_0, svfloat64x2_t, svfloat64_t, ++ z0 = svget2_f64 (z4, 0), ++ z0 = svget2 (z4, 0)) ++ ++/* ++** get2_f64_z0_1: ++** mov z0\.d, z5\.d ++** ret ++*/ ++TEST_GET (get2_f64_z0_1, svfloat64x2_t, svfloat64_t, ++ z0 = svget2_f64 (z4, 1), ++ z0 = svget2 (z4, 1)) ++ ++/* ++** get2_f64_z4_0: ++** ret ++*/ ++TEST_GET (get2_f64_z4_0, svfloat64x2_t, svfloat64_t, ++ z4_res = svget2_f64 (z4, 0), ++ z4_res = svget2 (z4, 0)) ++ ++/* ++** get2_f64_z4_1: ++** mov z4\.d, z5\.d ++** ret ++*/ ++TEST_GET (get2_f64_z4_1, svfloat64x2_t, svfloat64_t, ++ z4_res = svget2_f64 (z4, 1), ++ z4_res = svget2 (z4, 1)) ++ ++/* ++** get2_f64_z5_0: ++** mov z5\.d, z4\.d ++** ret ++*/ ++TEST_GET (get2_f64_z5_0, svfloat64x2_t, svfloat64_t, ++ z5_res = svget2_f64 (z4, 0), ++ z5_res = svget2 (z4, 0)) ++ ++/* ++** get2_f64_z5_1: ++** ret ++*/ ++TEST_GET (get2_f64_z5_1, svfloat64x2_t, svfloat64_t, ++ z5_res = svget2_f64 (z4, 1), ++ z5_res = svget2 (z4, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_s16.c +new file mode 100644 +index 000000000..387e6daad +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_s16.c +@@ -0,0 +1,55 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** get2_s16_z0_0: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_GET (get2_s16_z0_0, svint16x2_t, svint16_t, ++ z0 = svget2_s16 (z4, 0), ++ z0 = svget2 (z4, 0)) ++ ++/* ++** get2_s16_z0_1: ++** mov z0\.d, z5\.d ++** ret ++*/ ++TEST_GET (get2_s16_z0_1, svint16x2_t, svint16_t, ++ z0 = svget2_s16 (z4, 1), ++ z0 = svget2 (z4, 1)) ++ ++/* ++** get2_s16_z4_0: ++** ret ++*/ ++TEST_GET (get2_s16_z4_0, svint16x2_t, svint16_t, ++ z4_res = svget2_s16 (z4, 0), ++ z4_res = svget2 (z4, 0)) ++ ++/* ++** get2_s16_z4_1: ++** mov z4\.d, z5\.d ++** ret ++*/ ++TEST_GET (get2_s16_z4_1, svint16x2_t, svint16_t, ++ z4_res = svget2_s16 (z4, 1), ++ z4_res = svget2 (z4, 1)) ++ ++/* ++** get2_s16_z5_0: ++** mov z5\.d, z4\.d ++** ret ++*/ ++TEST_GET (get2_s16_z5_0, svint16x2_t, svint16_t, ++ z5_res = svget2_s16 (z4, 0), ++ z5_res = svget2 (z4, 0)) ++ ++/* ++** get2_s16_z5_1: ++** ret ++*/ ++TEST_GET (get2_s16_z5_1, svint16x2_t, svint16_t, ++ z5_res = svget2_s16 (z4, 1), ++ z5_res = svget2 (z4, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_s32.c +new file mode 100644 +index 000000000..5c47286e0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_s32.c +@@ -0,0 +1,55 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** get2_s32_z0_0: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_GET (get2_s32_z0_0, svint32x2_t, svint32_t, ++ z0 = svget2_s32 (z4, 0), ++ z0 = svget2 (z4, 0)) ++ ++/* ++** get2_s32_z0_1: ++** mov z0\.d, z5\.d ++** ret ++*/ ++TEST_GET (get2_s32_z0_1, svint32x2_t, svint32_t, ++ z0 = svget2_s32 (z4, 1), ++ z0 = svget2 (z4, 1)) ++ ++/* ++** get2_s32_z4_0: ++** ret ++*/ ++TEST_GET (get2_s32_z4_0, svint32x2_t, svint32_t, ++ z4_res = svget2_s32 (z4, 0), ++ z4_res = svget2 (z4, 0)) ++ ++/* ++** get2_s32_z4_1: ++** mov z4\.d, z5\.d ++** ret ++*/ ++TEST_GET (get2_s32_z4_1, svint32x2_t, svint32_t, ++ z4_res = svget2_s32 (z4, 1), ++ z4_res = svget2 (z4, 1)) ++ ++/* ++** get2_s32_z5_0: ++** mov z5\.d, z4\.d ++** ret ++*/ ++TEST_GET (get2_s32_z5_0, svint32x2_t, svint32_t, ++ z5_res = svget2_s32 (z4, 0), ++ z5_res = svget2 (z4, 0)) ++ ++/* ++** get2_s32_z5_1: ++** ret ++*/ ++TEST_GET (get2_s32_z5_1, svint32x2_t, svint32_t, ++ z5_res = svget2_s32 (z4, 1), ++ z5_res = svget2 (z4, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_s64.c +new file mode 100644 +index 000000000..18f930d4c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_s64.c +@@ -0,0 +1,55 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** get2_s64_z0_0: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_GET (get2_s64_z0_0, svint64x2_t, svint64_t, ++ z0 = svget2_s64 (z4, 0), ++ z0 = svget2 (z4, 0)) ++ ++/* ++** get2_s64_z0_1: ++** mov z0\.d, z5\.d ++** ret ++*/ ++TEST_GET (get2_s64_z0_1, svint64x2_t, svint64_t, ++ z0 = svget2_s64 (z4, 1), ++ z0 = svget2 (z4, 1)) ++ ++/* ++** get2_s64_z4_0: ++** ret ++*/ ++TEST_GET (get2_s64_z4_0, svint64x2_t, svint64_t, ++ z4_res = svget2_s64 (z4, 0), ++ z4_res = svget2 (z4, 0)) ++ ++/* ++** get2_s64_z4_1: ++** mov z4\.d, z5\.d ++** ret ++*/ ++TEST_GET (get2_s64_z4_1, svint64x2_t, svint64_t, ++ z4_res = svget2_s64 (z4, 1), ++ z4_res = svget2 (z4, 1)) ++ ++/* ++** get2_s64_z5_0: ++** mov z5\.d, z4\.d ++** ret ++*/ ++TEST_GET (get2_s64_z5_0, svint64x2_t, svint64_t, ++ z5_res = svget2_s64 (z4, 0), ++ z5_res = svget2 (z4, 0)) ++ ++/* ++** get2_s64_z5_1: ++** ret ++*/ ++TEST_GET (get2_s64_z5_1, svint64x2_t, svint64_t, ++ z5_res = svget2_s64 (z4, 1), ++ z5_res = svget2 (z4, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_s8.c +new file mode 100644 +index 000000000..27e2cfafb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_s8.c +@@ -0,0 +1,55 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** get2_s8_z0_0: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_GET (get2_s8_z0_0, svint8x2_t, svint8_t, ++ z0 = svget2_s8 (z4, 0), ++ z0 = svget2 (z4, 0)) ++ ++/* ++** get2_s8_z0_1: ++** mov z0\.d, z5\.d ++** ret ++*/ ++TEST_GET (get2_s8_z0_1, svint8x2_t, svint8_t, ++ z0 = svget2_s8 (z4, 1), ++ z0 = svget2 (z4, 1)) ++ ++/* ++** get2_s8_z4_0: ++** ret ++*/ ++TEST_GET (get2_s8_z4_0, svint8x2_t, svint8_t, ++ z4_res = svget2_s8 (z4, 0), ++ z4_res = svget2 (z4, 0)) ++ ++/* ++** get2_s8_z4_1: ++** mov z4\.d, z5\.d ++** ret ++*/ ++TEST_GET (get2_s8_z4_1, svint8x2_t, svint8_t, ++ z4_res = svget2_s8 (z4, 1), ++ z4_res = svget2 (z4, 1)) ++ ++/* ++** get2_s8_z5_0: ++** mov z5\.d, z4\.d ++** ret ++*/ ++TEST_GET (get2_s8_z5_0, svint8x2_t, svint8_t, ++ z5_res = svget2_s8 (z4, 0), ++ z5_res = svget2 (z4, 0)) ++ ++/* ++** get2_s8_z5_1: ++** ret ++*/ ++TEST_GET (get2_s8_z5_1, svint8x2_t, svint8_t, ++ z5_res = svget2_s8 (z4, 1), ++ z5_res = svget2 (z4, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_u16.c +new file mode 100644 +index 000000000..1804900cc +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_u16.c +@@ -0,0 +1,55 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** get2_u16_z0_0: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_GET (get2_u16_z0_0, svuint16x2_t, svuint16_t, ++ z0 = svget2_u16 (z4, 0), ++ z0 = svget2 (z4, 0)) ++ ++/* ++** get2_u16_z0_1: ++** mov z0\.d, z5\.d ++** ret ++*/ ++TEST_GET (get2_u16_z0_1, svuint16x2_t, svuint16_t, ++ z0 = svget2_u16 (z4, 1), ++ z0 = svget2 (z4, 1)) ++ ++/* ++** get2_u16_z4_0: ++** ret ++*/ ++TEST_GET (get2_u16_z4_0, svuint16x2_t, svuint16_t, ++ z4_res = svget2_u16 (z4, 0), ++ z4_res = svget2 (z4, 0)) ++ ++/* ++** get2_u16_z4_1: ++** mov z4\.d, z5\.d ++** ret ++*/ ++TEST_GET (get2_u16_z4_1, svuint16x2_t, svuint16_t, ++ z4_res = svget2_u16 (z4, 1), ++ z4_res = svget2 (z4, 1)) ++ ++/* ++** get2_u16_z5_0: ++** mov z5\.d, z4\.d ++** ret ++*/ ++TEST_GET (get2_u16_z5_0, svuint16x2_t, svuint16_t, ++ z5_res = svget2_u16 (z4, 0), ++ z5_res = svget2 (z4, 0)) ++ ++/* ++** get2_u16_z5_1: ++** ret ++*/ ++TEST_GET (get2_u16_z5_1, svuint16x2_t, svuint16_t, ++ z5_res = svget2_u16 (z4, 1), ++ z5_res = svget2 (z4, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_u32.c +new file mode 100644 +index 000000000..5c14de6aa +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_u32.c +@@ -0,0 +1,55 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** get2_u32_z0_0: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_GET (get2_u32_z0_0, svuint32x2_t, svuint32_t, ++ z0 = svget2_u32 (z4, 0), ++ z0 = svget2 (z4, 0)) ++ ++/* ++** get2_u32_z0_1: ++** mov z0\.d, z5\.d ++** ret ++*/ ++TEST_GET (get2_u32_z0_1, svuint32x2_t, svuint32_t, ++ z0 = svget2_u32 (z4, 1), ++ z0 = svget2 (z4, 1)) ++ ++/* ++** get2_u32_z4_0: ++** ret ++*/ ++TEST_GET (get2_u32_z4_0, svuint32x2_t, svuint32_t, ++ z4_res = svget2_u32 (z4, 0), ++ z4_res = svget2 (z4, 0)) ++ ++/* ++** get2_u32_z4_1: ++** mov z4\.d, z5\.d ++** ret ++*/ ++TEST_GET (get2_u32_z4_1, svuint32x2_t, svuint32_t, ++ z4_res = svget2_u32 (z4, 1), ++ z4_res = svget2 (z4, 1)) ++ ++/* ++** get2_u32_z5_0: ++** mov z5\.d, z4\.d ++** ret ++*/ ++TEST_GET (get2_u32_z5_0, svuint32x2_t, svuint32_t, ++ z5_res = svget2_u32 (z4, 0), ++ z5_res = svget2 (z4, 0)) ++ ++/* ++** get2_u32_z5_1: ++** ret ++*/ ++TEST_GET (get2_u32_z5_1, svuint32x2_t, svuint32_t, ++ z5_res = svget2_u32 (z4, 1), ++ z5_res = svget2 (z4, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_u64.c +new file mode 100644 +index 000000000..fd389a01e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_u64.c +@@ -0,0 +1,55 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** get2_u64_z0_0: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_GET (get2_u64_z0_0, svuint64x2_t, svuint64_t, ++ z0 = svget2_u64 (z4, 0), ++ z0 = svget2 (z4, 0)) ++ ++/* ++** get2_u64_z0_1: ++** mov z0\.d, z5\.d ++** ret ++*/ ++TEST_GET (get2_u64_z0_1, svuint64x2_t, svuint64_t, ++ z0 = svget2_u64 (z4, 1), ++ z0 = svget2 (z4, 1)) ++ ++/* ++** get2_u64_z4_0: ++** ret ++*/ ++TEST_GET (get2_u64_z4_0, svuint64x2_t, svuint64_t, ++ z4_res = svget2_u64 (z4, 0), ++ z4_res = svget2 (z4, 0)) ++ ++/* ++** get2_u64_z4_1: ++** mov z4\.d, z5\.d ++** ret ++*/ ++TEST_GET (get2_u64_z4_1, svuint64x2_t, svuint64_t, ++ z4_res = svget2_u64 (z4, 1), ++ z4_res = svget2 (z4, 1)) ++ ++/* ++** get2_u64_z5_0: ++** mov z5\.d, z4\.d ++** ret ++*/ ++TEST_GET (get2_u64_z5_0, svuint64x2_t, svuint64_t, ++ z5_res = svget2_u64 (z4, 0), ++ z5_res = svget2 (z4, 0)) ++ ++/* ++** get2_u64_z5_1: ++** ret ++*/ ++TEST_GET (get2_u64_z5_1, svuint64x2_t, svuint64_t, ++ z5_res = svget2_u64 (z4, 1), ++ z5_res = svget2 (z4, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_u8.c +new file mode 100644 +index 000000000..42ffb0344 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_u8.c +@@ -0,0 +1,55 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** get2_u8_z0_0: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_GET (get2_u8_z0_0, svuint8x2_t, svuint8_t, ++ z0 = svget2_u8 (z4, 0), ++ z0 = svget2 (z4, 0)) ++ ++/* ++** get2_u8_z0_1: ++** mov z0\.d, z5\.d ++** ret ++*/ ++TEST_GET (get2_u8_z0_1, svuint8x2_t, svuint8_t, ++ z0 = svget2_u8 (z4, 1), ++ z0 = svget2 (z4, 1)) ++ ++/* ++** get2_u8_z4_0: ++** ret ++*/ ++TEST_GET (get2_u8_z4_0, svuint8x2_t, svuint8_t, ++ z4_res = svget2_u8 (z4, 0), ++ z4_res = svget2 (z4, 0)) ++ ++/* ++** get2_u8_z4_1: ++** mov z4\.d, z5\.d ++** ret ++*/ ++TEST_GET (get2_u8_z4_1, svuint8x2_t, svuint8_t, ++ z4_res = svget2_u8 (z4, 1), ++ z4_res = svget2 (z4, 1)) ++ ++/* ++** get2_u8_z5_0: ++** mov z5\.d, z4\.d ++** ret ++*/ ++TEST_GET (get2_u8_z5_0, svuint8x2_t, svuint8_t, ++ z5_res = svget2_u8 (z4, 0), ++ z5_res = svget2 (z4, 0)) ++ ++/* ++** get2_u8_z5_1: ++** ret ++*/ ++TEST_GET (get2_u8_z5_1, svuint8x2_t, svuint8_t, ++ z5_res = svget2_u8 (z4, 1), ++ z5_res = svget2 (z4, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_bf16.c +new file mode 100644 +index 000000000..292f02a12 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_bf16.c +@@ -0,0 +1,108 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** get3_bf16_z0_0: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_GET (get3_bf16_z0_0, svbfloat16x3_t, svbfloat16_t, ++ z0 = svget3_bf16 (z4, 0), ++ z0 = svget3 (z4, 0)) ++ ++/* ++** get3_bf16_z0_1: ++** mov z0\.d, z5\.d ++** ret ++*/ ++TEST_GET (get3_bf16_z0_1, svbfloat16x3_t, svbfloat16_t, ++ z0 = svget3_bf16 (z4, 1), ++ z0 = svget3 (z4, 1)) ++ ++/* ++** get3_bf16_z0_2: ++** mov z0\.d, z6\.d ++** ret ++*/ ++TEST_GET (get3_bf16_z0_2, svbfloat16x3_t, svbfloat16_t, ++ z0 = svget3_bf16 (z4, 2), ++ z0 = svget3 (z4, 2)) ++ ++/* ++** get3_bf16_z4_0: ++** ret ++*/ ++TEST_GET (get3_bf16_z4_0, svbfloat16x3_t, svbfloat16_t, ++ z4_res = svget3_bf16 (z4, 0), ++ z4_res = svget3 (z4, 0)) ++ ++/* ++** get3_bf16_z4_1: ++** mov z4\.d, z5\.d ++** ret ++*/ ++TEST_GET (get3_bf16_z4_1, svbfloat16x3_t, svbfloat16_t, ++ z4_res = svget3_bf16 (z4, 1), ++ z4_res = svget3 (z4, 1)) ++ ++/* ++** get3_bf16_z4_2: ++** mov z4\.d, z6\.d ++** ret ++*/ ++TEST_GET (get3_bf16_z4_2, svbfloat16x3_t, svbfloat16_t, ++ z4_res = svget3_bf16 (z4, 2), ++ z4_res = svget3 (z4, 2)) ++ ++/* ++** get3_bf16_z5_0: ++** mov z5\.d, z4\.d ++** ret ++*/ ++TEST_GET (get3_bf16_z5_0, svbfloat16x3_t, svbfloat16_t, ++ z5_res = svget3_bf16 (z4, 0), ++ z5_res = svget3 (z4, 0)) ++ ++/* ++** get3_bf16_z5_1: ++** ret ++*/ ++TEST_GET (get3_bf16_z5_1, svbfloat16x3_t, svbfloat16_t, ++ z5_res = svget3_bf16 (z4, 1), ++ z5_res = svget3 (z4, 1)) ++ ++/* ++** get3_bf16_z5_2: ++** mov z5\.d, z6\.d ++** ret ++*/ ++TEST_GET (get3_bf16_z5_2, svbfloat16x3_t, svbfloat16_t, ++ z5_res = svget3_bf16 (z4, 2), ++ z5_res = svget3 (z4, 2)) ++ ++/* ++** get3_bf16_z6_0: ++** mov z6\.d, z4\.d ++** ret ++*/ ++TEST_GET (get3_bf16_z6_0, svbfloat16x3_t, svbfloat16_t, ++ z6_res = svget3_bf16 (z4, 0), ++ z6_res = svget3 (z4, 0)) ++ ++/* ++** get3_bf16_z6_1: ++** mov z6\.d, z5\.d ++** ret ++*/ ++TEST_GET (get3_bf16_z6_1, svbfloat16x3_t, svbfloat16_t, ++ z6_res = svget3_bf16 (z4, 1), ++ z6_res = svget3 (z4, 1)) ++ ++/* ++** get3_bf16_z6_2: ++** ret ++*/ ++TEST_GET (get3_bf16_z6_2, svbfloat16x3_t, svbfloat16_t, ++ z6_res = svget3_bf16 (z4, 2), ++ z6_res = svget3 (z4, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_f16.c +new file mode 100644 +index 000000000..8bea03bc5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_f16.c +@@ -0,0 +1,108 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** get3_f16_z0_0: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_GET (get3_f16_z0_0, svfloat16x3_t, svfloat16_t, ++ z0 = svget3_f16 (z4, 0), ++ z0 = svget3 (z4, 0)) ++ ++/* ++** get3_f16_z0_1: ++** mov z0\.d, z5\.d ++** ret ++*/ ++TEST_GET (get3_f16_z0_1, svfloat16x3_t, svfloat16_t, ++ z0 = svget3_f16 (z4, 1), ++ z0 = svget3 (z4, 1)) ++ ++/* ++** get3_f16_z0_2: ++** mov z0\.d, z6\.d ++** ret ++*/ ++TEST_GET (get3_f16_z0_2, svfloat16x3_t, svfloat16_t, ++ z0 = svget3_f16 (z4, 2), ++ z0 = svget3 (z4, 2)) ++ ++/* ++** get3_f16_z4_0: ++** ret ++*/ ++TEST_GET (get3_f16_z4_0, svfloat16x3_t, svfloat16_t, ++ z4_res = svget3_f16 (z4, 0), ++ z4_res = svget3 (z4, 0)) ++ ++/* ++** get3_f16_z4_1: ++** mov z4\.d, z5\.d ++** ret ++*/ ++TEST_GET (get3_f16_z4_1, svfloat16x3_t, svfloat16_t, ++ z4_res = svget3_f16 (z4, 1), ++ z4_res = svget3 (z4, 1)) ++ ++/* ++** get3_f16_z4_2: ++** mov z4\.d, z6\.d ++** ret ++*/ ++TEST_GET (get3_f16_z4_2, svfloat16x3_t, svfloat16_t, ++ z4_res = svget3_f16 (z4, 2), ++ z4_res = svget3 (z4, 2)) ++ ++/* ++** get3_f16_z5_0: ++** mov z5\.d, z4\.d ++** ret ++*/ ++TEST_GET (get3_f16_z5_0, svfloat16x3_t, svfloat16_t, ++ z5_res = svget3_f16 (z4, 0), ++ z5_res = svget3 (z4, 0)) ++ ++/* ++** get3_f16_z5_1: ++** ret ++*/ ++TEST_GET (get3_f16_z5_1, svfloat16x3_t, svfloat16_t, ++ z5_res = svget3_f16 (z4, 1), ++ z5_res = svget3 (z4, 1)) ++ ++/* ++** get3_f16_z5_2: ++** mov z5\.d, z6\.d ++** ret ++*/ ++TEST_GET (get3_f16_z5_2, svfloat16x3_t, svfloat16_t, ++ z5_res = svget3_f16 (z4, 2), ++ z5_res = svget3 (z4, 2)) ++ ++/* ++** get3_f16_z6_0: ++** mov z6\.d, z4\.d ++** ret ++*/ ++TEST_GET (get3_f16_z6_0, svfloat16x3_t, svfloat16_t, ++ z6_res = svget3_f16 (z4, 0), ++ z6_res = svget3 (z4, 0)) ++ ++/* ++** get3_f16_z6_1: ++** mov z6\.d, z5\.d ++** ret ++*/ ++TEST_GET (get3_f16_z6_1, svfloat16x3_t, svfloat16_t, ++ z6_res = svget3_f16 (z4, 1), ++ z6_res = svget3 (z4, 1)) ++ ++/* ++** get3_f16_z6_2: ++** ret ++*/ ++TEST_GET (get3_f16_z6_2, svfloat16x3_t, svfloat16_t, ++ z6_res = svget3_f16 (z4, 2), ++ z6_res = svget3 (z4, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_f32.c +new file mode 100644 +index 000000000..246679584 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_f32.c +@@ -0,0 +1,108 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** get3_f32_z0_0: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_GET (get3_f32_z0_0, svfloat32x3_t, svfloat32_t, ++ z0 = svget3_f32 (z4, 0), ++ z0 = svget3 (z4, 0)) ++ ++/* ++** get3_f32_z0_1: ++** mov z0\.d, z5\.d ++** ret ++*/ ++TEST_GET (get3_f32_z0_1, svfloat32x3_t, svfloat32_t, ++ z0 = svget3_f32 (z4, 1), ++ z0 = svget3 (z4, 1)) ++ ++/* ++** get3_f32_z0_2: ++** mov z0\.d, z6\.d ++** ret ++*/ ++TEST_GET (get3_f32_z0_2, svfloat32x3_t, svfloat32_t, ++ z0 = svget3_f32 (z4, 2), ++ z0 = svget3 (z4, 2)) ++ ++/* ++** get3_f32_z4_0: ++** ret ++*/ ++TEST_GET (get3_f32_z4_0, svfloat32x3_t, svfloat32_t, ++ z4_res = svget3_f32 (z4, 0), ++ z4_res = svget3 (z4, 0)) ++ ++/* ++** get3_f32_z4_1: ++** mov z4\.d, z5\.d ++** ret ++*/ ++TEST_GET (get3_f32_z4_1, svfloat32x3_t, svfloat32_t, ++ z4_res = svget3_f32 (z4, 1), ++ z4_res = svget3 (z4, 1)) ++ ++/* ++** get3_f32_z4_2: ++** mov z4\.d, z6\.d ++** ret ++*/ ++TEST_GET (get3_f32_z4_2, svfloat32x3_t, svfloat32_t, ++ z4_res = svget3_f32 (z4, 2), ++ z4_res = svget3 (z4, 2)) ++ ++/* ++** get3_f32_z5_0: ++** mov z5\.d, z4\.d ++** ret ++*/ ++TEST_GET (get3_f32_z5_0, svfloat32x3_t, svfloat32_t, ++ z5_res = svget3_f32 (z4, 0), ++ z5_res = svget3 (z4, 0)) ++ ++/* ++** get3_f32_z5_1: ++** ret ++*/ ++TEST_GET (get3_f32_z5_1, svfloat32x3_t, svfloat32_t, ++ z5_res = svget3_f32 (z4, 1), ++ z5_res = svget3 (z4, 1)) ++ ++/* ++** get3_f32_z5_2: ++** mov z5\.d, z6\.d ++** ret ++*/ ++TEST_GET (get3_f32_z5_2, svfloat32x3_t, svfloat32_t, ++ z5_res = svget3_f32 (z4, 2), ++ z5_res = svget3 (z4, 2)) ++ ++/* ++** get3_f32_z6_0: ++** mov z6\.d, z4\.d ++** ret ++*/ ++TEST_GET (get3_f32_z6_0, svfloat32x3_t, svfloat32_t, ++ z6_res = svget3_f32 (z4, 0), ++ z6_res = svget3 (z4, 0)) ++ ++/* ++** get3_f32_z6_1: ++** mov z6\.d, z5\.d ++** ret ++*/ ++TEST_GET (get3_f32_z6_1, svfloat32x3_t, svfloat32_t, ++ z6_res = svget3_f32 (z4, 1), ++ z6_res = svget3 (z4, 1)) ++ ++/* ++** get3_f32_z6_2: ++** ret ++*/ ++TEST_GET (get3_f32_z6_2, svfloat32x3_t, svfloat32_t, ++ z6_res = svget3_f32 (z4, 2), ++ z6_res = svget3 (z4, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_f64.c +new file mode 100644 +index 000000000..e44eb15fd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_f64.c +@@ -0,0 +1,108 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** get3_f64_z0_0: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_GET (get3_f64_z0_0, svfloat64x3_t, svfloat64_t, ++ z0 = svget3_f64 (z4, 0), ++ z0 = svget3 (z4, 0)) ++ ++/* ++** get3_f64_z0_1: ++** mov z0\.d, z5\.d ++** ret ++*/ ++TEST_GET (get3_f64_z0_1, svfloat64x3_t, svfloat64_t, ++ z0 = svget3_f64 (z4, 1), ++ z0 = svget3 (z4, 1)) ++ ++/* ++** get3_f64_z0_2: ++** mov z0\.d, z6\.d ++** ret ++*/ ++TEST_GET (get3_f64_z0_2, svfloat64x3_t, svfloat64_t, ++ z0 = svget3_f64 (z4, 2), ++ z0 = svget3 (z4, 2)) ++ ++/* ++** get3_f64_z4_0: ++** ret ++*/ ++TEST_GET (get3_f64_z4_0, svfloat64x3_t, svfloat64_t, ++ z4_res = svget3_f64 (z4, 0), ++ z4_res = svget3 (z4, 0)) ++ ++/* ++** get3_f64_z4_1: ++** mov z4\.d, z5\.d ++** ret ++*/ ++TEST_GET (get3_f64_z4_1, svfloat64x3_t, svfloat64_t, ++ z4_res = svget3_f64 (z4, 1), ++ z4_res = svget3 (z4, 1)) ++ ++/* ++** get3_f64_z4_2: ++** mov z4\.d, z6\.d ++** ret ++*/ ++TEST_GET (get3_f64_z4_2, svfloat64x3_t, svfloat64_t, ++ z4_res = svget3_f64 (z4, 2), ++ z4_res = svget3 (z4, 2)) ++ ++/* ++** get3_f64_z5_0: ++** mov z5\.d, z4\.d ++** ret ++*/ ++TEST_GET (get3_f64_z5_0, svfloat64x3_t, svfloat64_t, ++ z5_res = svget3_f64 (z4, 0), ++ z5_res = svget3 (z4, 0)) ++ ++/* ++** get3_f64_z5_1: ++** ret ++*/ ++TEST_GET (get3_f64_z5_1, svfloat64x3_t, svfloat64_t, ++ z5_res = svget3_f64 (z4, 1), ++ z5_res = svget3 (z4, 1)) ++ ++/* ++** get3_f64_z5_2: ++** mov z5\.d, z6\.d ++** ret ++*/ ++TEST_GET (get3_f64_z5_2, svfloat64x3_t, svfloat64_t, ++ z5_res = svget3_f64 (z4, 2), ++ z5_res = svget3 (z4, 2)) ++ ++/* ++** get3_f64_z6_0: ++** mov z6\.d, z4\.d ++** ret ++*/ ++TEST_GET (get3_f64_z6_0, svfloat64x3_t, svfloat64_t, ++ z6_res = svget3_f64 (z4, 0), ++ z6_res = svget3 (z4, 0)) ++ ++/* ++** get3_f64_z6_1: ++** mov z6\.d, z5\.d ++** ret ++*/ ++TEST_GET (get3_f64_z6_1, svfloat64x3_t, svfloat64_t, ++ z6_res = svget3_f64 (z4, 1), ++ z6_res = svget3 (z4, 1)) ++ ++/* ++** get3_f64_z6_2: ++** ret ++*/ ++TEST_GET (get3_f64_z6_2, svfloat64x3_t, svfloat64_t, ++ z6_res = svget3_f64 (z4, 2), ++ z6_res = svget3 (z4, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_s16.c +new file mode 100644 +index 000000000..88f7e4986 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_s16.c +@@ -0,0 +1,108 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** get3_s16_z0_0: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_GET (get3_s16_z0_0, svint16x3_t, svint16_t, ++ z0 = svget3_s16 (z4, 0), ++ z0 = svget3 (z4, 0)) ++ ++/* ++** get3_s16_z0_1: ++** mov z0\.d, z5\.d ++** ret ++*/ ++TEST_GET (get3_s16_z0_1, svint16x3_t, svint16_t, ++ z0 = svget3_s16 (z4, 1), ++ z0 = svget3 (z4, 1)) ++ ++/* ++** get3_s16_z0_2: ++** mov z0\.d, z6\.d ++** ret ++*/ ++TEST_GET (get3_s16_z0_2, svint16x3_t, svint16_t, ++ z0 = svget3_s16 (z4, 2), ++ z0 = svget3 (z4, 2)) ++ ++/* ++** get3_s16_z4_0: ++** ret ++*/ ++TEST_GET (get3_s16_z4_0, svint16x3_t, svint16_t, ++ z4_res = svget3_s16 (z4, 0), ++ z4_res = svget3 (z4, 0)) ++ ++/* ++** get3_s16_z4_1: ++** mov z4\.d, z5\.d ++** ret ++*/ ++TEST_GET (get3_s16_z4_1, svint16x3_t, svint16_t, ++ z4_res = svget3_s16 (z4, 1), ++ z4_res = svget3 (z4, 1)) ++ ++/* ++** get3_s16_z4_2: ++** mov z4\.d, z6\.d ++** ret ++*/ ++TEST_GET (get3_s16_z4_2, svint16x3_t, svint16_t, ++ z4_res = svget3_s16 (z4, 2), ++ z4_res = svget3 (z4, 2)) ++ ++/* ++** get3_s16_z5_0: ++** mov z5\.d, z4\.d ++** ret ++*/ ++TEST_GET (get3_s16_z5_0, svint16x3_t, svint16_t, ++ z5_res = svget3_s16 (z4, 0), ++ z5_res = svget3 (z4, 0)) ++ ++/* ++** get3_s16_z5_1: ++** ret ++*/ ++TEST_GET (get3_s16_z5_1, svint16x3_t, svint16_t, ++ z5_res = svget3_s16 (z4, 1), ++ z5_res = svget3 (z4, 1)) ++ ++/* ++** get3_s16_z5_2: ++** mov z5\.d, z6\.d ++** ret ++*/ ++TEST_GET (get3_s16_z5_2, svint16x3_t, svint16_t, ++ z5_res = svget3_s16 (z4, 2), ++ z5_res = svget3 (z4, 2)) ++ ++/* ++** get3_s16_z6_0: ++** mov z6\.d, z4\.d ++** ret ++*/ ++TEST_GET (get3_s16_z6_0, svint16x3_t, svint16_t, ++ z6_res = svget3_s16 (z4, 0), ++ z6_res = svget3 (z4, 0)) ++ ++/* ++** get3_s16_z6_1: ++** mov z6\.d, z5\.d ++** ret ++*/ ++TEST_GET (get3_s16_z6_1, svint16x3_t, svint16_t, ++ z6_res = svget3_s16 (z4, 1), ++ z6_res = svget3 (z4, 1)) ++ ++/* ++** get3_s16_z6_2: ++** ret ++*/ ++TEST_GET (get3_s16_z6_2, svint16x3_t, svint16_t, ++ z6_res = svget3_s16 (z4, 2), ++ z6_res = svget3 (z4, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_s32.c +new file mode 100644 +index 000000000..f0f7785c8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_s32.c +@@ -0,0 +1,108 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** get3_s32_z0_0: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_GET (get3_s32_z0_0, svint32x3_t, svint32_t, ++ z0 = svget3_s32 (z4, 0), ++ z0 = svget3 (z4, 0)) ++ ++/* ++** get3_s32_z0_1: ++** mov z0\.d, z5\.d ++** ret ++*/ ++TEST_GET (get3_s32_z0_1, svint32x3_t, svint32_t, ++ z0 = svget3_s32 (z4, 1), ++ z0 = svget3 (z4, 1)) ++ ++/* ++** get3_s32_z0_2: ++** mov z0\.d, z6\.d ++** ret ++*/ ++TEST_GET (get3_s32_z0_2, svint32x3_t, svint32_t, ++ z0 = svget3_s32 (z4, 2), ++ z0 = svget3 (z4, 2)) ++ ++/* ++** get3_s32_z4_0: ++** ret ++*/ ++TEST_GET (get3_s32_z4_0, svint32x3_t, svint32_t, ++ z4_res = svget3_s32 (z4, 0), ++ z4_res = svget3 (z4, 0)) ++ ++/* ++** get3_s32_z4_1: ++** mov z4\.d, z5\.d ++** ret ++*/ ++TEST_GET (get3_s32_z4_1, svint32x3_t, svint32_t, ++ z4_res = svget3_s32 (z4, 1), ++ z4_res = svget3 (z4, 1)) ++ ++/* ++** get3_s32_z4_2: ++** mov z4\.d, z6\.d ++** ret ++*/ ++TEST_GET (get3_s32_z4_2, svint32x3_t, svint32_t, ++ z4_res = svget3_s32 (z4, 2), ++ z4_res = svget3 (z4, 2)) ++ ++/* ++** get3_s32_z5_0: ++** mov z5\.d, z4\.d ++** ret ++*/ ++TEST_GET (get3_s32_z5_0, svint32x3_t, svint32_t, ++ z5_res = svget3_s32 (z4, 0), ++ z5_res = svget3 (z4, 0)) ++ ++/* ++** get3_s32_z5_1: ++** ret ++*/ ++TEST_GET (get3_s32_z5_1, svint32x3_t, svint32_t, ++ z5_res = svget3_s32 (z4, 1), ++ z5_res = svget3 (z4, 1)) ++ ++/* ++** get3_s32_z5_2: ++** mov z5\.d, z6\.d ++** ret ++*/ ++TEST_GET (get3_s32_z5_2, svint32x3_t, svint32_t, ++ z5_res = svget3_s32 (z4, 2), ++ z5_res = svget3 (z4, 2)) ++ ++/* ++** get3_s32_z6_0: ++** mov z6\.d, z4\.d ++** ret ++*/ ++TEST_GET (get3_s32_z6_0, svint32x3_t, svint32_t, ++ z6_res = svget3_s32 (z4, 0), ++ z6_res = svget3 (z4, 0)) ++ ++/* ++** get3_s32_z6_1: ++** mov z6\.d, z5\.d ++** ret ++*/ ++TEST_GET (get3_s32_z6_1, svint32x3_t, svint32_t, ++ z6_res = svget3_s32 (z4, 1), ++ z6_res = svget3 (z4, 1)) ++ ++/* ++** get3_s32_z6_2: ++** ret ++*/ ++TEST_GET (get3_s32_z6_2, svint32x3_t, svint32_t, ++ z6_res = svget3_s32 (z4, 2), ++ z6_res = svget3 (z4, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_s64.c +new file mode 100644 +index 000000000..92500bfdf +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_s64.c +@@ -0,0 +1,108 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** get3_s64_z0_0: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_GET (get3_s64_z0_0, svint64x3_t, svint64_t, ++ z0 = svget3_s64 (z4, 0), ++ z0 = svget3 (z4, 0)) ++ ++/* ++** get3_s64_z0_1: ++** mov z0\.d, z5\.d ++** ret ++*/ ++TEST_GET (get3_s64_z0_1, svint64x3_t, svint64_t, ++ z0 = svget3_s64 (z4, 1), ++ z0 = svget3 (z4, 1)) ++ ++/* ++** get3_s64_z0_2: ++** mov z0\.d, z6\.d ++** ret ++*/ ++TEST_GET (get3_s64_z0_2, svint64x3_t, svint64_t, ++ z0 = svget3_s64 (z4, 2), ++ z0 = svget3 (z4, 2)) ++ ++/* ++** get3_s64_z4_0: ++** ret ++*/ ++TEST_GET (get3_s64_z4_0, svint64x3_t, svint64_t, ++ z4_res = svget3_s64 (z4, 0), ++ z4_res = svget3 (z4, 0)) ++ ++/* ++** get3_s64_z4_1: ++** mov z4\.d, z5\.d ++** ret ++*/ ++TEST_GET (get3_s64_z4_1, svint64x3_t, svint64_t, ++ z4_res = svget3_s64 (z4, 1), ++ z4_res = svget3 (z4, 1)) ++ ++/* ++** get3_s64_z4_2: ++** mov z4\.d, z6\.d ++** ret ++*/ ++TEST_GET (get3_s64_z4_2, svint64x3_t, svint64_t, ++ z4_res = svget3_s64 (z4, 2), ++ z4_res = svget3 (z4, 2)) ++ ++/* ++** get3_s64_z5_0: ++** mov z5\.d, z4\.d ++** ret ++*/ ++TEST_GET (get3_s64_z5_0, svint64x3_t, svint64_t, ++ z5_res = svget3_s64 (z4, 0), ++ z5_res = svget3 (z4, 0)) ++ ++/* ++** get3_s64_z5_1: ++** ret ++*/ ++TEST_GET (get3_s64_z5_1, svint64x3_t, svint64_t, ++ z5_res = svget3_s64 (z4, 1), ++ z5_res = svget3 (z4, 1)) ++ ++/* ++** get3_s64_z5_2: ++** mov z5\.d, z6\.d ++** ret ++*/ ++TEST_GET (get3_s64_z5_2, svint64x3_t, svint64_t, ++ z5_res = svget3_s64 (z4, 2), ++ z5_res = svget3 (z4, 2)) ++ ++/* ++** get3_s64_z6_0: ++** mov z6\.d, z4\.d ++** ret ++*/ ++TEST_GET (get3_s64_z6_0, svint64x3_t, svint64_t, ++ z6_res = svget3_s64 (z4, 0), ++ z6_res = svget3 (z4, 0)) ++ ++/* ++** get3_s64_z6_1: ++** mov z6\.d, z5\.d ++** ret ++*/ ++TEST_GET (get3_s64_z6_1, svint64x3_t, svint64_t, ++ z6_res = svget3_s64 (z4, 1), ++ z6_res = svget3 (z4, 1)) ++ ++/* ++** get3_s64_z6_2: ++** ret ++*/ ++TEST_GET (get3_s64_z6_2, svint64x3_t, svint64_t, ++ z6_res = svget3_s64 (z4, 2), ++ z6_res = svget3 (z4, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_s8.c +new file mode 100644 +index 000000000..edf225ba5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_s8.c +@@ -0,0 +1,108 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** get3_s8_z0_0: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_GET (get3_s8_z0_0, svint8x3_t, svint8_t, ++ z0 = svget3_s8 (z4, 0), ++ z0 = svget3 (z4, 0)) ++ ++/* ++** get3_s8_z0_1: ++** mov z0\.d, z5\.d ++** ret ++*/ ++TEST_GET (get3_s8_z0_1, svint8x3_t, svint8_t, ++ z0 = svget3_s8 (z4, 1), ++ z0 = svget3 (z4, 1)) ++ ++/* ++** get3_s8_z0_2: ++** mov z0\.d, z6\.d ++** ret ++*/ ++TEST_GET (get3_s8_z0_2, svint8x3_t, svint8_t, ++ z0 = svget3_s8 (z4, 2), ++ z0 = svget3 (z4, 2)) ++ ++/* ++** get3_s8_z4_0: ++** ret ++*/ ++TEST_GET (get3_s8_z4_0, svint8x3_t, svint8_t, ++ z4_res = svget3_s8 (z4, 0), ++ z4_res = svget3 (z4, 0)) ++ ++/* ++** get3_s8_z4_1: ++** mov z4\.d, z5\.d ++** ret ++*/ ++TEST_GET (get3_s8_z4_1, svint8x3_t, svint8_t, ++ z4_res = svget3_s8 (z4, 1), ++ z4_res = svget3 (z4, 1)) ++ ++/* ++** get3_s8_z4_2: ++** mov z4\.d, z6\.d ++** ret ++*/ ++TEST_GET (get3_s8_z4_2, svint8x3_t, svint8_t, ++ z4_res = svget3_s8 (z4, 2), ++ z4_res = svget3 (z4, 2)) ++ ++/* ++** get3_s8_z5_0: ++** mov z5\.d, z4\.d ++** ret ++*/ ++TEST_GET (get3_s8_z5_0, svint8x3_t, svint8_t, ++ z5_res = svget3_s8 (z4, 0), ++ z5_res = svget3 (z4, 0)) ++ ++/* ++** get3_s8_z5_1: ++** ret ++*/ ++TEST_GET (get3_s8_z5_1, svint8x3_t, svint8_t, ++ z5_res = svget3_s8 (z4, 1), ++ z5_res = svget3 (z4, 1)) ++ ++/* ++** get3_s8_z5_2: ++** mov z5\.d, z6\.d ++** ret ++*/ ++TEST_GET (get3_s8_z5_2, svint8x3_t, svint8_t, ++ z5_res = svget3_s8 (z4, 2), ++ z5_res = svget3 (z4, 2)) ++ ++/* ++** get3_s8_z6_0: ++** mov z6\.d, z4\.d ++** ret ++*/ ++TEST_GET (get3_s8_z6_0, svint8x3_t, svint8_t, ++ z6_res = svget3_s8 (z4, 0), ++ z6_res = svget3 (z4, 0)) ++ ++/* ++** get3_s8_z6_1: ++** mov z6\.d, z5\.d ++** ret ++*/ ++TEST_GET (get3_s8_z6_1, svint8x3_t, svint8_t, ++ z6_res = svget3_s8 (z4, 1), ++ z6_res = svget3 (z4, 1)) ++ ++/* ++** get3_s8_z6_2: ++** ret ++*/ ++TEST_GET (get3_s8_z6_2, svint8x3_t, svint8_t, ++ z6_res = svget3_s8 (z4, 2), ++ z6_res = svget3 (z4, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_u16.c +new file mode 100644 +index 000000000..1fa7c63c0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_u16.c +@@ -0,0 +1,108 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** get3_u16_z0_0: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_GET (get3_u16_z0_0, svuint16x3_t, svuint16_t, ++ z0 = svget3_u16 (z4, 0), ++ z0 = svget3 (z4, 0)) ++ ++/* ++** get3_u16_z0_1: ++** mov z0\.d, z5\.d ++** ret ++*/ ++TEST_GET (get3_u16_z0_1, svuint16x3_t, svuint16_t, ++ z0 = svget3_u16 (z4, 1), ++ z0 = svget3 (z4, 1)) ++ ++/* ++** get3_u16_z0_2: ++** mov z0\.d, z6\.d ++** ret ++*/ ++TEST_GET (get3_u16_z0_2, svuint16x3_t, svuint16_t, ++ z0 = svget3_u16 (z4, 2), ++ z0 = svget3 (z4, 2)) ++ ++/* ++** get3_u16_z4_0: ++** ret ++*/ ++TEST_GET (get3_u16_z4_0, svuint16x3_t, svuint16_t, ++ z4_res = svget3_u16 (z4, 0), ++ z4_res = svget3 (z4, 0)) ++ ++/* ++** get3_u16_z4_1: ++** mov z4\.d, z5\.d ++** ret ++*/ ++TEST_GET (get3_u16_z4_1, svuint16x3_t, svuint16_t, ++ z4_res = svget3_u16 (z4, 1), ++ z4_res = svget3 (z4, 1)) ++ ++/* ++** get3_u16_z4_2: ++** mov z4\.d, z6\.d ++** ret ++*/ ++TEST_GET (get3_u16_z4_2, svuint16x3_t, svuint16_t, ++ z4_res = svget3_u16 (z4, 2), ++ z4_res = svget3 (z4, 2)) ++ ++/* ++** get3_u16_z5_0: ++** mov z5\.d, z4\.d ++** ret ++*/ ++TEST_GET (get3_u16_z5_0, svuint16x3_t, svuint16_t, ++ z5_res = svget3_u16 (z4, 0), ++ z5_res = svget3 (z4, 0)) ++ ++/* ++** get3_u16_z5_1: ++** ret ++*/ ++TEST_GET (get3_u16_z5_1, svuint16x3_t, svuint16_t, ++ z5_res = svget3_u16 (z4, 1), ++ z5_res = svget3 (z4, 1)) ++ ++/* ++** get3_u16_z5_2: ++** mov z5\.d, z6\.d ++** ret ++*/ ++TEST_GET (get3_u16_z5_2, svuint16x3_t, svuint16_t, ++ z5_res = svget3_u16 (z4, 2), ++ z5_res = svget3 (z4, 2)) ++ ++/* ++** get3_u16_z6_0: ++** mov z6\.d, z4\.d ++** ret ++*/ ++TEST_GET (get3_u16_z6_0, svuint16x3_t, svuint16_t, ++ z6_res = svget3_u16 (z4, 0), ++ z6_res = svget3 (z4, 0)) ++ ++/* ++** get3_u16_z6_1: ++** mov z6\.d, z5\.d ++** ret ++*/ ++TEST_GET (get3_u16_z6_1, svuint16x3_t, svuint16_t, ++ z6_res = svget3_u16 (z4, 1), ++ z6_res = svget3 (z4, 1)) ++ ++/* ++** get3_u16_z6_2: ++** ret ++*/ ++TEST_GET (get3_u16_z6_2, svuint16x3_t, svuint16_t, ++ z6_res = svget3_u16 (z4, 2), ++ z6_res = svget3 (z4, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_u32.c +new file mode 100644 +index 000000000..03b5f2616 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_u32.c +@@ -0,0 +1,108 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** get3_u32_z0_0: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_GET (get3_u32_z0_0, svuint32x3_t, svuint32_t, ++ z0 = svget3_u32 (z4, 0), ++ z0 = svget3 (z4, 0)) ++ ++/* ++** get3_u32_z0_1: ++** mov z0\.d, z5\.d ++** ret ++*/ ++TEST_GET (get3_u32_z0_1, svuint32x3_t, svuint32_t, ++ z0 = svget3_u32 (z4, 1), ++ z0 = svget3 (z4, 1)) ++ ++/* ++** get3_u32_z0_2: ++** mov z0\.d, z6\.d ++** ret ++*/ ++TEST_GET (get3_u32_z0_2, svuint32x3_t, svuint32_t, ++ z0 = svget3_u32 (z4, 2), ++ z0 = svget3 (z4, 2)) ++ ++/* ++** get3_u32_z4_0: ++** ret ++*/ ++TEST_GET (get3_u32_z4_0, svuint32x3_t, svuint32_t, ++ z4_res = svget3_u32 (z4, 0), ++ z4_res = svget3 (z4, 0)) ++ ++/* ++** get3_u32_z4_1: ++** mov z4\.d, z5\.d ++** ret ++*/ ++TEST_GET (get3_u32_z4_1, svuint32x3_t, svuint32_t, ++ z4_res = svget3_u32 (z4, 1), ++ z4_res = svget3 (z4, 1)) ++ ++/* ++** get3_u32_z4_2: ++** mov z4\.d, z6\.d ++** ret ++*/ ++TEST_GET (get3_u32_z4_2, svuint32x3_t, svuint32_t, ++ z4_res = svget3_u32 (z4, 2), ++ z4_res = svget3 (z4, 2)) ++ ++/* ++** get3_u32_z5_0: ++** mov z5\.d, z4\.d ++** ret ++*/ ++TEST_GET (get3_u32_z5_0, svuint32x3_t, svuint32_t, ++ z5_res = svget3_u32 (z4, 0), ++ z5_res = svget3 (z4, 0)) ++ ++/* ++** get3_u32_z5_1: ++** ret ++*/ ++TEST_GET (get3_u32_z5_1, svuint32x3_t, svuint32_t, ++ z5_res = svget3_u32 (z4, 1), ++ z5_res = svget3 (z4, 1)) ++ ++/* ++** get3_u32_z5_2: ++** mov z5\.d, z6\.d ++** ret ++*/ ++TEST_GET (get3_u32_z5_2, svuint32x3_t, svuint32_t, ++ z5_res = svget3_u32 (z4, 2), ++ z5_res = svget3 (z4, 2)) ++ ++/* ++** get3_u32_z6_0: ++** mov z6\.d, z4\.d ++** ret ++*/ ++TEST_GET (get3_u32_z6_0, svuint32x3_t, svuint32_t, ++ z6_res = svget3_u32 (z4, 0), ++ z6_res = svget3 (z4, 0)) ++ ++/* ++** get3_u32_z6_1: ++** mov z6\.d, z5\.d ++** ret ++*/ ++TEST_GET (get3_u32_z6_1, svuint32x3_t, svuint32_t, ++ z6_res = svget3_u32 (z4, 1), ++ z6_res = svget3 (z4, 1)) ++ ++/* ++** get3_u32_z6_2: ++** ret ++*/ ++TEST_GET (get3_u32_z6_2, svuint32x3_t, svuint32_t, ++ z6_res = svget3_u32 (z4, 2), ++ z6_res = svget3 (z4, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_u64.c +new file mode 100644 +index 000000000..ae4ef0024 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_u64.c +@@ -0,0 +1,108 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** get3_u64_z0_0: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_GET (get3_u64_z0_0, svuint64x3_t, svuint64_t, ++ z0 = svget3_u64 (z4, 0), ++ z0 = svget3 (z4, 0)) ++ ++/* ++** get3_u64_z0_1: ++** mov z0\.d, z5\.d ++** ret ++*/ ++TEST_GET (get3_u64_z0_1, svuint64x3_t, svuint64_t, ++ z0 = svget3_u64 (z4, 1), ++ z0 = svget3 (z4, 1)) ++ ++/* ++** get3_u64_z0_2: ++** mov z0\.d, z6\.d ++** ret ++*/ ++TEST_GET (get3_u64_z0_2, svuint64x3_t, svuint64_t, ++ z0 = svget3_u64 (z4, 2), ++ z0 = svget3 (z4, 2)) ++ ++/* ++** get3_u64_z4_0: ++** ret ++*/ ++TEST_GET (get3_u64_z4_0, svuint64x3_t, svuint64_t, ++ z4_res = svget3_u64 (z4, 0), ++ z4_res = svget3 (z4, 0)) ++ ++/* ++** get3_u64_z4_1: ++** mov z4\.d, z5\.d ++** ret ++*/ ++TEST_GET (get3_u64_z4_1, svuint64x3_t, svuint64_t, ++ z4_res = svget3_u64 (z4, 1), ++ z4_res = svget3 (z4, 1)) ++ ++/* ++** get3_u64_z4_2: ++** mov z4\.d, z6\.d ++** ret ++*/ ++TEST_GET (get3_u64_z4_2, svuint64x3_t, svuint64_t, ++ z4_res = svget3_u64 (z4, 2), ++ z4_res = svget3 (z4, 2)) ++ ++/* ++** get3_u64_z5_0: ++** mov z5\.d, z4\.d ++** ret ++*/ ++TEST_GET (get3_u64_z5_0, svuint64x3_t, svuint64_t, ++ z5_res = svget3_u64 (z4, 0), ++ z5_res = svget3 (z4, 0)) ++ ++/* ++** get3_u64_z5_1: ++** ret ++*/ ++TEST_GET (get3_u64_z5_1, svuint64x3_t, svuint64_t, ++ z5_res = svget3_u64 (z4, 1), ++ z5_res = svget3 (z4, 1)) ++ ++/* ++** get3_u64_z5_2: ++** mov z5\.d, z6\.d ++** ret ++*/ ++TEST_GET (get3_u64_z5_2, svuint64x3_t, svuint64_t, ++ z5_res = svget3_u64 (z4, 2), ++ z5_res = svget3 (z4, 2)) ++ ++/* ++** get3_u64_z6_0: ++** mov z6\.d, z4\.d ++** ret ++*/ ++TEST_GET (get3_u64_z6_0, svuint64x3_t, svuint64_t, ++ z6_res = svget3_u64 (z4, 0), ++ z6_res = svget3 (z4, 0)) ++ ++/* ++** get3_u64_z6_1: ++** mov z6\.d, z5\.d ++** ret ++*/ ++TEST_GET (get3_u64_z6_1, svuint64x3_t, svuint64_t, ++ z6_res = svget3_u64 (z4, 1), ++ z6_res = svget3 (z4, 1)) ++ ++/* ++** get3_u64_z6_2: ++** ret ++*/ ++TEST_GET (get3_u64_z6_2, svuint64x3_t, svuint64_t, ++ z6_res = svget3_u64 (z4, 2), ++ z6_res = svget3 (z4, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_u8.c +new file mode 100644 +index 000000000..497dcbbae +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_u8.c +@@ -0,0 +1,108 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** get3_u8_z0_0: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_GET (get3_u8_z0_0, svuint8x3_t, svuint8_t, ++ z0 = svget3_u8 (z4, 0), ++ z0 = svget3 (z4, 0)) ++ ++/* ++** get3_u8_z0_1: ++** mov z0\.d, z5\.d ++** ret ++*/ ++TEST_GET (get3_u8_z0_1, svuint8x3_t, svuint8_t, ++ z0 = svget3_u8 (z4, 1), ++ z0 = svget3 (z4, 1)) ++ ++/* ++** get3_u8_z0_2: ++** mov z0\.d, z6\.d ++** ret ++*/ ++TEST_GET (get3_u8_z0_2, svuint8x3_t, svuint8_t, ++ z0 = svget3_u8 (z4, 2), ++ z0 = svget3 (z4, 2)) ++ ++/* ++** get3_u8_z4_0: ++** ret ++*/ ++TEST_GET (get3_u8_z4_0, svuint8x3_t, svuint8_t, ++ z4_res = svget3_u8 (z4, 0), ++ z4_res = svget3 (z4, 0)) ++ ++/* ++** get3_u8_z4_1: ++** mov z4\.d, z5\.d ++** ret ++*/ ++TEST_GET (get3_u8_z4_1, svuint8x3_t, svuint8_t, ++ z4_res = svget3_u8 (z4, 1), ++ z4_res = svget3 (z4, 1)) ++ ++/* ++** get3_u8_z4_2: ++** mov z4\.d, z6\.d ++** ret ++*/ ++TEST_GET (get3_u8_z4_2, svuint8x3_t, svuint8_t, ++ z4_res = svget3_u8 (z4, 2), ++ z4_res = svget3 (z4, 2)) ++ ++/* ++** get3_u8_z5_0: ++** mov z5\.d, z4\.d ++** ret ++*/ ++TEST_GET (get3_u8_z5_0, svuint8x3_t, svuint8_t, ++ z5_res = svget3_u8 (z4, 0), ++ z5_res = svget3 (z4, 0)) ++ ++/* ++** get3_u8_z5_1: ++** ret ++*/ ++TEST_GET (get3_u8_z5_1, svuint8x3_t, svuint8_t, ++ z5_res = svget3_u8 (z4, 1), ++ z5_res = svget3 (z4, 1)) ++ ++/* ++** get3_u8_z5_2: ++** mov z5\.d, z6\.d ++** ret ++*/ ++TEST_GET (get3_u8_z5_2, svuint8x3_t, svuint8_t, ++ z5_res = svget3_u8 (z4, 2), ++ z5_res = svget3 (z4, 2)) ++ ++/* ++** get3_u8_z6_0: ++** mov z6\.d, z4\.d ++** ret ++*/ ++TEST_GET (get3_u8_z6_0, svuint8x3_t, svuint8_t, ++ z6_res = svget3_u8 (z4, 0), ++ z6_res = svget3 (z4, 0)) ++ ++/* ++** get3_u8_z6_1: ++** mov z6\.d, z5\.d ++** ret ++*/ ++TEST_GET (get3_u8_z6_1, svuint8x3_t, svuint8_t, ++ z6_res = svget3_u8 (z4, 1), ++ z6_res = svget3 (z4, 1)) ++ ++/* ++** get3_u8_z6_2: ++** ret ++*/ ++TEST_GET (get3_u8_z6_2, svuint8x3_t, svuint8_t, ++ z6_res = svget3_u8 (z4, 2), ++ z6_res = svget3 (z4, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_bf16.c +new file mode 100644 +index 000000000..f751fc147 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_bf16.c +@@ -0,0 +1,179 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** get4_bf16_z0_0: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_GET (get4_bf16_z0_0, svbfloat16x4_t, svbfloat16_t, ++ z0 = svget4_bf16 (z4, 0), ++ z0 = svget4 (z4, 0)) ++ ++/* ++** get4_bf16_z0_1: ++** mov z0\.d, z5\.d ++** ret ++*/ ++TEST_GET (get4_bf16_z0_1, svbfloat16x4_t, svbfloat16_t, ++ z0 = svget4_bf16 (z4, 1), ++ z0 = svget4 (z4, 1)) ++ ++/* ++** get4_bf16_z0_2: ++** mov z0\.d, z6\.d ++** ret ++*/ ++TEST_GET (get4_bf16_z0_2, svbfloat16x4_t, svbfloat16_t, ++ z0 = svget4_bf16 (z4, 2), ++ z0 = svget4 (z4, 2)) ++ ++/* ++** get4_bf16_z0_3: ++** mov z0\.d, z7\.d ++** ret ++*/ ++TEST_GET (get4_bf16_z0_3, svbfloat16x4_t, svbfloat16_t, ++ z0 = svget4_bf16 (z4, 3), ++ z0 = svget4 (z4, 3)) ++ ++/* ++** get4_bf16_z4_0: ++** ret ++*/ ++TEST_GET (get4_bf16_z4_0, svbfloat16x4_t, svbfloat16_t, ++ z4_res = svget4_bf16 (z4, 0), ++ z4_res = svget4 (z4, 0)) ++ ++/* ++** get4_bf16_z4_1: ++** mov z4\.d, z5\.d ++** ret ++*/ ++TEST_GET (get4_bf16_z4_1, svbfloat16x4_t, svbfloat16_t, ++ z4_res = svget4_bf16 (z4, 1), ++ z4_res = svget4 (z4, 1)) ++ ++/* ++** get4_bf16_z4_2: ++** mov z4\.d, z6\.d ++** ret ++*/ ++TEST_GET (get4_bf16_z4_2, svbfloat16x4_t, svbfloat16_t, ++ z4_res = svget4_bf16 (z4, 2), ++ z4_res = svget4 (z4, 2)) ++ ++/* ++** get4_bf16_z4_3: ++** mov z4\.d, z7\.d ++** ret ++*/ ++TEST_GET (get4_bf16_z4_3, svbfloat16x4_t, svbfloat16_t, ++ z4_res = svget4_bf16 (z4, 3), ++ z4_res = svget4 (z4, 3)) ++ ++/* ++** get4_bf16_z5_0: ++** mov z5\.d, z4\.d ++** ret ++*/ ++TEST_GET (get4_bf16_z5_0, svbfloat16x4_t, svbfloat16_t, ++ z5_res = svget4_bf16 (z4, 0), ++ z5_res = svget4 (z4, 0)) ++ ++/* ++** get4_bf16_z5_1: ++** ret ++*/ ++TEST_GET (get4_bf16_z5_1, svbfloat16x4_t, svbfloat16_t, ++ z5_res = svget4_bf16 (z4, 1), ++ z5_res = svget4 (z4, 1)) ++ ++/* ++** get4_bf16_z5_2: ++** mov z5\.d, z6\.d ++** ret ++*/ ++TEST_GET (get4_bf16_z5_2, svbfloat16x4_t, svbfloat16_t, ++ z5_res = svget4_bf16 (z4, 2), ++ z5_res = svget4 (z4, 2)) ++ ++/* ++** get4_bf16_z5_3: ++** mov z5\.d, z7\.d ++** ret ++*/ ++TEST_GET (get4_bf16_z5_3, svbfloat16x4_t, svbfloat16_t, ++ z5_res = svget4_bf16 (z4, 3), ++ z5_res = svget4 (z4, 3)) ++ ++/* ++** get4_bf16_z6_0: ++** mov z6\.d, z4\.d ++** ret ++*/ ++TEST_GET (get4_bf16_z6_0, svbfloat16x4_t, svbfloat16_t, ++ z6_res = svget4_bf16 (z4, 0), ++ z6_res = svget4 (z4, 0)) ++ ++/* ++** get4_bf16_z6_1: ++** mov z6\.d, z5\.d ++** ret ++*/ ++TEST_GET (get4_bf16_z6_1, svbfloat16x4_t, svbfloat16_t, ++ z6_res = svget4_bf16 (z4, 1), ++ z6_res = svget4 (z4, 1)) ++ ++/* ++** get4_bf16_z6_2: ++** ret ++*/ ++TEST_GET (get4_bf16_z6_2, svbfloat16x4_t, svbfloat16_t, ++ z6_res = svget4_bf16 (z4, 2), ++ z6_res = svget4 (z4, 2)) ++ ++/* ++** get4_bf16_z6_3: ++** mov z6\.d, z7\.d ++** ret ++*/ ++TEST_GET (get4_bf16_z6_3, svbfloat16x4_t, svbfloat16_t, ++ z6_res = svget4_bf16 (z4, 3), ++ z6_res = svget4 (z4, 3)) ++ ++/* ++** get4_bf16_z7_0: ++** mov z7\.d, z4\.d ++** ret ++*/ ++TEST_GET (get4_bf16_z7_0, svbfloat16x4_t, svbfloat16_t, ++ z7_res = svget4_bf16 (z4, 0), ++ z7_res = svget4 (z4, 0)) ++ ++/* ++** get4_bf16_z7_1: ++** mov z7\.d, z5\.d ++** ret ++*/ ++TEST_GET (get4_bf16_z7_1, svbfloat16x4_t, svbfloat16_t, ++ z7_res = svget4_bf16 (z4, 1), ++ z7_res = svget4 (z4, 1)) ++ ++/* ++** get4_bf16_z7_2: ++** mov z7\.d, z6\.d ++** ret ++*/ ++TEST_GET (get4_bf16_z7_2, svbfloat16x4_t, svbfloat16_t, ++ z7_res = svget4_bf16 (z4, 2), ++ z7_res = svget4 (z4, 2)) ++ ++/* ++** get4_bf16_z7_3: ++** ret ++*/ ++TEST_GET (get4_bf16_z7_3, svbfloat16x4_t, svbfloat16_t, ++ z7_res = svget4_bf16 (z4, 3), ++ z7_res = svget4 (z4, 3)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_f16.c +new file mode 100644 +index 000000000..7871f6f4e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_f16.c +@@ -0,0 +1,179 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** get4_f16_z0_0: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_GET (get4_f16_z0_0, svfloat16x4_t, svfloat16_t, ++ z0 = svget4_f16 (z4, 0), ++ z0 = svget4 (z4, 0)) ++ ++/* ++** get4_f16_z0_1: ++** mov z0\.d, z5\.d ++** ret ++*/ ++TEST_GET (get4_f16_z0_1, svfloat16x4_t, svfloat16_t, ++ z0 = svget4_f16 (z4, 1), ++ z0 = svget4 (z4, 1)) ++ ++/* ++** get4_f16_z0_2: ++** mov z0\.d, z6\.d ++** ret ++*/ ++TEST_GET (get4_f16_z0_2, svfloat16x4_t, svfloat16_t, ++ z0 = svget4_f16 (z4, 2), ++ z0 = svget4 (z4, 2)) ++ ++/* ++** get4_f16_z0_3: ++** mov z0\.d, z7\.d ++** ret ++*/ ++TEST_GET (get4_f16_z0_3, svfloat16x4_t, svfloat16_t, ++ z0 = svget4_f16 (z4, 3), ++ z0 = svget4 (z4, 3)) ++ ++/* ++** get4_f16_z4_0: ++** ret ++*/ ++TEST_GET (get4_f16_z4_0, svfloat16x4_t, svfloat16_t, ++ z4_res = svget4_f16 (z4, 0), ++ z4_res = svget4 (z4, 0)) ++ ++/* ++** get4_f16_z4_1: ++** mov z4\.d, z5\.d ++** ret ++*/ ++TEST_GET (get4_f16_z4_1, svfloat16x4_t, svfloat16_t, ++ z4_res = svget4_f16 (z4, 1), ++ z4_res = svget4 (z4, 1)) ++ ++/* ++** get4_f16_z4_2: ++** mov z4\.d, z6\.d ++** ret ++*/ ++TEST_GET (get4_f16_z4_2, svfloat16x4_t, svfloat16_t, ++ z4_res = svget4_f16 (z4, 2), ++ z4_res = svget4 (z4, 2)) ++ ++/* ++** get4_f16_z4_3: ++** mov z4\.d, z7\.d ++** ret ++*/ ++TEST_GET (get4_f16_z4_3, svfloat16x4_t, svfloat16_t, ++ z4_res = svget4_f16 (z4, 3), ++ z4_res = svget4 (z4, 3)) ++ ++/* ++** get4_f16_z5_0: ++** mov z5\.d, z4\.d ++** ret ++*/ ++TEST_GET (get4_f16_z5_0, svfloat16x4_t, svfloat16_t, ++ z5_res = svget4_f16 (z4, 0), ++ z5_res = svget4 (z4, 0)) ++ ++/* ++** get4_f16_z5_1: ++** ret ++*/ ++TEST_GET (get4_f16_z5_1, svfloat16x4_t, svfloat16_t, ++ z5_res = svget4_f16 (z4, 1), ++ z5_res = svget4 (z4, 1)) ++ ++/* ++** get4_f16_z5_2: ++** mov z5\.d, z6\.d ++** ret ++*/ ++TEST_GET (get4_f16_z5_2, svfloat16x4_t, svfloat16_t, ++ z5_res = svget4_f16 (z4, 2), ++ z5_res = svget4 (z4, 2)) ++ ++/* ++** get4_f16_z5_3: ++** mov z5\.d, z7\.d ++** ret ++*/ ++TEST_GET (get4_f16_z5_3, svfloat16x4_t, svfloat16_t, ++ z5_res = svget4_f16 (z4, 3), ++ z5_res = svget4 (z4, 3)) ++ ++/* ++** get4_f16_z6_0: ++** mov z6\.d, z4\.d ++** ret ++*/ ++TEST_GET (get4_f16_z6_0, svfloat16x4_t, svfloat16_t, ++ z6_res = svget4_f16 (z4, 0), ++ z6_res = svget4 (z4, 0)) ++ ++/* ++** get4_f16_z6_1: ++** mov z6\.d, z5\.d ++** ret ++*/ ++TEST_GET (get4_f16_z6_1, svfloat16x4_t, svfloat16_t, ++ z6_res = svget4_f16 (z4, 1), ++ z6_res = svget4 (z4, 1)) ++ ++/* ++** get4_f16_z6_2: ++** ret ++*/ ++TEST_GET (get4_f16_z6_2, svfloat16x4_t, svfloat16_t, ++ z6_res = svget4_f16 (z4, 2), ++ z6_res = svget4 (z4, 2)) ++ ++/* ++** get4_f16_z6_3: ++** mov z6\.d, z7\.d ++** ret ++*/ ++TEST_GET (get4_f16_z6_3, svfloat16x4_t, svfloat16_t, ++ z6_res = svget4_f16 (z4, 3), ++ z6_res = svget4 (z4, 3)) ++ ++/* ++** get4_f16_z7_0: ++** mov z7\.d, z4\.d ++** ret ++*/ ++TEST_GET (get4_f16_z7_0, svfloat16x4_t, svfloat16_t, ++ z7_res = svget4_f16 (z4, 0), ++ z7_res = svget4 (z4, 0)) ++ ++/* ++** get4_f16_z7_1: ++** mov z7\.d, z5\.d ++** ret ++*/ ++TEST_GET (get4_f16_z7_1, svfloat16x4_t, svfloat16_t, ++ z7_res = svget4_f16 (z4, 1), ++ z7_res = svget4 (z4, 1)) ++ ++/* ++** get4_f16_z7_2: ++** mov z7\.d, z6\.d ++** ret ++*/ ++TEST_GET (get4_f16_z7_2, svfloat16x4_t, svfloat16_t, ++ z7_res = svget4_f16 (z4, 2), ++ z7_res = svget4 (z4, 2)) ++ ++/* ++** get4_f16_z7_3: ++** ret ++*/ ++TEST_GET (get4_f16_z7_3, svfloat16x4_t, svfloat16_t, ++ z7_res = svget4_f16 (z4, 3), ++ z7_res = svget4 (z4, 3)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_f32.c +new file mode 100644 +index 000000000..a290e026d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_f32.c +@@ -0,0 +1,179 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** get4_f32_z0_0: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_GET (get4_f32_z0_0, svfloat32x4_t, svfloat32_t, ++ z0 = svget4_f32 (z4, 0), ++ z0 = svget4 (z4, 0)) ++ ++/* ++** get4_f32_z0_1: ++** mov z0\.d, z5\.d ++** ret ++*/ ++TEST_GET (get4_f32_z0_1, svfloat32x4_t, svfloat32_t, ++ z0 = svget4_f32 (z4, 1), ++ z0 = svget4 (z4, 1)) ++ ++/* ++** get4_f32_z0_2: ++** mov z0\.d, z6\.d ++** ret ++*/ ++TEST_GET (get4_f32_z0_2, svfloat32x4_t, svfloat32_t, ++ z0 = svget4_f32 (z4, 2), ++ z0 = svget4 (z4, 2)) ++ ++/* ++** get4_f32_z0_3: ++** mov z0\.d, z7\.d ++** ret ++*/ ++TEST_GET (get4_f32_z0_3, svfloat32x4_t, svfloat32_t, ++ z0 = svget4_f32 (z4, 3), ++ z0 = svget4 (z4, 3)) ++ ++/* ++** get4_f32_z4_0: ++** ret ++*/ ++TEST_GET (get4_f32_z4_0, svfloat32x4_t, svfloat32_t, ++ z4_res = svget4_f32 (z4, 0), ++ z4_res = svget4 (z4, 0)) ++ ++/* ++** get4_f32_z4_1: ++** mov z4\.d, z5\.d ++** ret ++*/ ++TEST_GET (get4_f32_z4_1, svfloat32x4_t, svfloat32_t, ++ z4_res = svget4_f32 (z4, 1), ++ z4_res = svget4 (z4, 1)) ++ ++/* ++** get4_f32_z4_2: ++** mov z4\.d, z6\.d ++** ret ++*/ ++TEST_GET (get4_f32_z4_2, svfloat32x4_t, svfloat32_t, ++ z4_res = svget4_f32 (z4, 2), ++ z4_res = svget4 (z4, 2)) ++ ++/* ++** get4_f32_z4_3: ++** mov z4\.d, z7\.d ++** ret ++*/ ++TEST_GET (get4_f32_z4_3, svfloat32x4_t, svfloat32_t, ++ z4_res = svget4_f32 (z4, 3), ++ z4_res = svget4 (z4, 3)) ++ ++/* ++** get4_f32_z5_0: ++** mov z5\.d, z4\.d ++** ret ++*/ ++TEST_GET (get4_f32_z5_0, svfloat32x4_t, svfloat32_t, ++ z5_res = svget4_f32 (z4, 0), ++ z5_res = svget4 (z4, 0)) ++ ++/* ++** get4_f32_z5_1: ++** ret ++*/ ++TEST_GET (get4_f32_z5_1, svfloat32x4_t, svfloat32_t, ++ z5_res = svget4_f32 (z4, 1), ++ z5_res = svget4 (z4, 1)) ++ ++/* ++** get4_f32_z5_2: ++** mov z5\.d, z6\.d ++** ret ++*/ ++TEST_GET (get4_f32_z5_2, svfloat32x4_t, svfloat32_t, ++ z5_res = svget4_f32 (z4, 2), ++ z5_res = svget4 (z4, 2)) ++ ++/* ++** get4_f32_z5_3: ++** mov z5\.d, z7\.d ++** ret ++*/ ++TEST_GET (get4_f32_z5_3, svfloat32x4_t, svfloat32_t, ++ z5_res = svget4_f32 (z4, 3), ++ z5_res = svget4 (z4, 3)) ++ ++/* ++** get4_f32_z6_0: ++** mov z6\.d, z4\.d ++** ret ++*/ ++TEST_GET (get4_f32_z6_0, svfloat32x4_t, svfloat32_t, ++ z6_res = svget4_f32 (z4, 0), ++ z6_res = svget4 (z4, 0)) ++ ++/* ++** get4_f32_z6_1: ++** mov z6\.d, z5\.d ++** ret ++*/ ++TEST_GET (get4_f32_z6_1, svfloat32x4_t, svfloat32_t, ++ z6_res = svget4_f32 (z4, 1), ++ z6_res = svget4 (z4, 1)) ++ ++/* ++** get4_f32_z6_2: ++** ret ++*/ ++TEST_GET (get4_f32_z6_2, svfloat32x4_t, svfloat32_t, ++ z6_res = svget4_f32 (z4, 2), ++ z6_res = svget4 (z4, 2)) ++ ++/* ++** get4_f32_z6_3: ++** mov z6\.d, z7\.d ++** ret ++*/ ++TEST_GET (get4_f32_z6_3, svfloat32x4_t, svfloat32_t, ++ z6_res = svget4_f32 (z4, 3), ++ z6_res = svget4 (z4, 3)) ++ ++/* ++** get4_f32_z7_0: ++** mov z7\.d, z4\.d ++** ret ++*/ ++TEST_GET (get4_f32_z7_0, svfloat32x4_t, svfloat32_t, ++ z7_res = svget4_f32 (z4, 0), ++ z7_res = svget4 (z4, 0)) ++ ++/* ++** get4_f32_z7_1: ++** mov z7\.d, z5\.d ++** ret ++*/ ++TEST_GET (get4_f32_z7_1, svfloat32x4_t, svfloat32_t, ++ z7_res = svget4_f32 (z4, 1), ++ z7_res = svget4 (z4, 1)) ++ ++/* ++** get4_f32_z7_2: ++** mov z7\.d, z6\.d ++** ret ++*/ ++TEST_GET (get4_f32_z7_2, svfloat32x4_t, svfloat32_t, ++ z7_res = svget4_f32 (z4, 2), ++ z7_res = svget4 (z4, 2)) ++ ++/* ++** get4_f32_z7_3: ++** ret ++*/ ++TEST_GET (get4_f32_z7_3, svfloat32x4_t, svfloat32_t, ++ z7_res = svget4_f32 (z4, 3), ++ z7_res = svget4 (z4, 3)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_f64.c +new file mode 100644 +index 000000000..2c34dfef1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_f64.c +@@ -0,0 +1,179 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** get4_f64_z0_0: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_GET (get4_f64_z0_0, svfloat64x4_t, svfloat64_t, ++ z0 = svget4_f64 (z4, 0), ++ z0 = svget4 (z4, 0)) ++ ++/* ++** get4_f64_z0_1: ++** mov z0\.d, z5\.d ++** ret ++*/ ++TEST_GET (get4_f64_z0_1, svfloat64x4_t, svfloat64_t, ++ z0 = svget4_f64 (z4, 1), ++ z0 = svget4 (z4, 1)) ++ ++/* ++** get4_f64_z0_2: ++** mov z0\.d, z6\.d ++** ret ++*/ ++TEST_GET (get4_f64_z0_2, svfloat64x4_t, svfloat64_t, ++ z0 = svget4_f64 (z4, 2), ++ z0 = svget4 (z4, 2)) ++ ++/* ++** get4_f64_z0_3: ++** mov z0\.d, z7\.d ++** ret ++*/ ++TEST_GET (get4_f64_z0_3, svfloat64x4_t, svfloat64_t, ++ z0 = svget4_f64 (z4, 3), ++ z0 = svget4 (z4, 3)) ++ ++/* ++** get4_f64_z4_0: ++** ret ++*/ ++TEST_GET (get4_f64_z4_0, svfloat64x4_t, svfloat64_t, ++ z4_res = svget4_f64 (z4, 0), ++ z4_res = svget4 (z4, 0)) ++ ++/* ++** get4_f64_z4_1: ++** mov z4\.d, z5\.d ++** ret ++*/ ++TEST_GET (get4_f64_z4_1, svfloat64x4_t, svfloat64_t, ++ z4_res = svget4_f64 (z4, 1), ++ z4_res = svget4 (z4, 1)) ++ ++/* ++** get4_f64_z4_2: ++** mov z4\.d, z6\.d ++** ret ++*/ ++TEST_GET (get4_f64_z4_2, svfloat64x4_t, svfloat64_t, ++ z4_res = svget4_f64 (z4, 2), ++ z4_res = svget4 (z4, 2)) ++ ++/* ++** get4_f64_z4_3: ++** mov z4\.d, z7\.d ++** ret ++*/ ++TEST_GET (get4_f64_z4_3, svfloat64x4_t, svfloat64_t, ++ z4_res = svget4_f64 (z4, 3), ++ z4_res = svget4 (z4, 3)) ++ ++/* ++** get4_f64_z5_0: ++** mov z5\.d, z4\.d ++** ret ++*/ ++TEST_GET (get4_f64_z5_0, svfloat64x4_t, svfloat64_t, ++ z5_res = svget4_f64 (z4, 0), ++ z5_res = svget4 (z4, 0)) ++ ++/* ++** get4_f64_z5_1: ++** ret ++*/ ++TEST_GET (get4_f64_z5_1, svfloat64x4_t, svfloat64_t, ++ z5_res = svget4_f64 (z4, 1), ++ z5_res = svget4 (z4, 1)) ++ ++/* ++** get4_f64_z5_2: ++** mov z5\.d, z6\.d ++** ret ++*/ ++TEST_GET (get4_f64_z5_2, svfloat64x4_t, svfloat64_t, ++ z5_res = svget4_f64 (z4, 2), ++ z5_res = svget4 (z4, 2)) ++ ++/* ++** get4_f64_z5_3: ++** mov z5\.d, z7\.d ++** ret ++*/ ++TEST_GET (get4_f64_z5_3, svfloat64x4_t, svfloat64_t, ++ z5_res = svget4_f64 (z4, 3), ++ z5_res = svget4 (z4, 3)) ++ ++/* ++** get4_f64_z6_0: ++** mov z6\.d, z4\.d ++** ret ++*/ ++TEST_GET (get4_f64_z6_0, svfloat64x4_t, svfloat64_t, ++ z6_res = svget4_f64 (z4, 0), ++ z6_res = svget4 (z4, 0)) ++ ++/* ++** get4_f64_z6_1: ++** mov z6\.d, z5\.d ++** ret ++*/ ++TEST_GET (get4_f64_z6_1, svfloat64x4_t, svfloat64_t, ++ z6_res = svget4_f64 (z4, 1), ++ z6_res = svget4 (z4, 1)) ++ ++/* ++** get4_f64_z6_2: ++** ret ++*/ ++TEST_GET (get4_f64_z6_2, svfloat64x4_t, svfloat64_t, ++ z6_res = svget4_f64 (z4, 2), ++ z6_res = svget4 (z4, 2)) ++ ++/* ++** get4_f64_z6_3: ++** mov z6\.d, z7\.d ++** ret ++*/ ++TEST_GET (get4_f64_z6_3, svfloat64x4_t, svfloat64_t, ++ z6_res = svget4_f64 (z4, 3), ++ z6_res = svget4 (z4, 3)) ++ ++/* ++** get4_f64_z7_0: ++** mov z7\.d, z4\.d ++** ret ++*/ ++TEST_GET (get4_f64_z7_0, svfloat64x4_t, svfloat64_t, ++ z7_res = svget4_f64 (z4, 0), ++ z7_res = svget4 (z4, 0)) ++ ++/* ++** get4_f64_z7_1: ++** mov z7\.d, z5\.d ++** ret ++*/ ++TEST_GET (get4_f64_z7_1, svfloat64x4_t, svfloat64_t, ++ z7_res = svget4_f64 (z4, 1), ++ z7_res = svget4 (z4, 1)) ++ ++/* ++** get4_f64_z7_2: ++** mov z7\.d, z6\.d ++** ret ++*/ ++TEST_GET (get4_f64_z7_2, svfloat64x4_t, svfloat64_t, ++ z7_res = svget4_f64 (z4, 2), ++ z7_res = svget4 (z4, 2)) ++ ++/* ++** get4_f64_z7_3: ++** ret ++*/ ++TEST_GET (get4_f64_z7_3, svfloat64x4_t, svfloat64_t, ++ z7_res = svget4_f64 (z4, 3), ++ z7_res = svget4 (z4, 3)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_s16.c +new file mode 100644 +index 000000000..6a2280fea +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_s16.c +@@ -0,0 +1,179 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** get4_s16_z0_0: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_GET (get4_s16_z0_0, svint16x4_t, svint16_t, ++ z0 = svget4_s16 (z4, 0), ++ z0 = svget4 (z4, 0)) ++ ++/* ++** get4_s16_z0_1: ++** mov z0\.d, z5\.d ++** ret ++*/ ++TEST_GET (get4_s16_z0_1, svint16x4_t, svint16_t, ++ z0 = svget4_s16 (z4, 1), ++ z0 = svget4 (z4, 1)) ++ ++/* ++** get4_s16_z0_2: ++** mov z0\.d, z6\.d ++** ret ++*/ ++TEST_GET (get4_s16_z0_2, svint16x4_t, svint16_t, ++ z0 = svget4_s16 (z4, 2), ++ z0 = svget4 (z4, 2)) ++ ++/* ++** get4_s16_z0_3: ++** mov z0\.d, z7\.d ++** ret ++*/ ++TEST_GET (get4_s16_z0_3, svint16x4_t, svint16_t, ++ z0 = svget4_s16 (z4, 3), ++ z0 = svget4 (z4, 3)) ++ ++/* ++** get4_s16_z4_0: ++** ret ++*/ ++TEST_GET (get4_s16_z4_0, svint16x4_t, svint16_t, ++ z4_res = svget4_s16 (z4, 0), ++ z4_res = svget4 (z4, 0)) ++ ++/* ++** get4_s16_z4_1: ++** mov z4\.d, z5\.d ++** ret ++*/ ++TEST_GET (get4_s16_z4_1, svint16x4_t, svint16_t, ++ z4_res = svget4_s16 (z4, 1), ++ z4_res = svget4 (z4, 1)) ++ ++/* ++** get4_s16_z4_2: ++** mov z4\.d, z6\.d ++** ret ++*/ ++TEST_GET (get4_s16_z4_2, svint16x4_t, svint16_t, ++ z4_res = svget4_s16 (z4, 2), ++ z4_res = svget4 (z4, 2)) ++ ++/* ++** get4_s16_z4_3: ++** mov z4\.d, z7\.d ++** ret ++*/ ++TEST_GET (get4_s16_z4_3, svint16x4_t, svint16_t, ++ z4_res = svget4_s16 (z4, 3), ++ z4_res = svget4 (z4, 3)) ++ ++/* ++** get4_s16_z5_0: ++** mov z5\.d, z4\.d ++** ret ++*/ ++TEST_GET (get4_s16_z5_0, svint16x4_t, svint16_t, ++ z5_res = svget4_s16 (z4, 0), ++ z5_res = svget4 (z4, 0)) ++ ++/* ++** get4_s16_z5_1: ++** ret ++*/ ++TEST_GET (get4_s16_z5_1, svint16x4_t, svint16_t, ++ z5_res = svget4_s16 (z4, 1), ++ z5_res = svget4 (z4, 1)) ++ ++/* ++** get4_s16_z5_2: ++** mov z5\.d, z6\.d ++** ret ++*/ ++TEST_GET (get4_s16_z5_2, svint16x4_t, svint16_t, ++ z5_res = svget4_s16 (z4, 2), ++ z5_res = svget4 (z4, 2)) ++ ++/* ++** get4_s16_z5_3: ++** mov z5\.d, z7\.d ++** ret ++*/ ++TEST_GET (get4_s16_z5_3, svint16x4_t, svint16_t, ++ z5_res = svget4_s16 (z4, 3), ++ z5_res = svget4 (z4, 3)) ++ ++/* ++** get4_s16_z6_0: ++** mov z6\.d, z4\.d ++** ret ++*/ ++TEST_GET (get4_s16_z6_0, svint16x4_t, svint16_t, ++ z6_res = svget4_s16 (z4, 0), ++ z6_res = svget4 (z4, 0)) ++ ++/* ++** get4_s16_z6_1: ++** mov z6\.d, z5\.d ++** ret ++*/ ++TEST_GET (get4_s16_z6_1, svint16x4_t, svint16_t, ++ z6_res = svget4_s16 (z4, 1), ++ z6_res = svget4 (z4, 1)) ++ ++/* ++** get4_s16_z6_2: ++** ret ++*/ ++TEST_GET (get4_s16_z6_2, svint16x4_t, svint16_t, ++ z6_res = svget4_s16 (z4, 2), ++ z6_res = svget4 (z4, 2)) ++ ++/* ++** get4_s16_z6_3: ++** mov z6\.d, z7\.d ++** ret ++*/ ++TEST_GET (get4_s16_z6_3, svint16x4_t, svint16_t, ++ z6_res = svget4_s16 (z4, 3), ++ z6_res = svget4 (z4, 3)) ++ ++/* ++** get4_s16_z7_0: ++** mov z7\.d, z4\.d ++** ret ++*/ ++TEST_GET (get4_s16_z7_0, svint16x4_t, svint16_t, ++ z7_res = svget4_s16 (z4, 0), ++ z7_res = svget4 (z4, 0)) ++ ++/* ++** get4_s16_z7_1: ++** mov z7\.d, z5\.d ++** ret ++*/ ++TEST_GET (get4_s16_z7_1, svint16x4_t, svint16_t, ++ z7_res = svget4_s16 (z4, 1), ++ z7_res = svget4 (z4, 1)) ++ ++/* ++** get4_s16_z7_2: ++** mov z7\.d, z6\.d ++** ret ++*/ ++TEST_GET (get4_s16_z7_2, svint16x4_t, svint16_t, ++ z7_res = svget4_s16 (z4, 2), ++ z7_res = svget4 (z4, 2)) ++ ++/* ++** get4_s16_z7_3: ++** ret ++*/ ++TEST_GET (get4_s16_z7_3, svint16x4_t, svint16_t, ++ z7_res = svget4_s16 (z4, 3), ++ z7_res = svget4 (z4, 3)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_s32.c +new file mode 100644 +index 000000000..41aca09d9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_s32.c +@@ -0,0 +1,179 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** get4_s32_z0_0: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_GET (get4_s32_z0_0, svint32x4_t, svint32_t, ++ z0 = svget4_s32 (z4, 0), ++ z0 = svget4 (z4, 0)) ++ ++/* ++** get4_s32_z0_1: ++** mov z0\.d, z5\.d ++** ret ++*/ ++TEST_GET (get4_s32_z0_1, svint32x4_t, svint32_t, ++ z0 = svget4_s32 (z4, 1), ++ z0 = svget4 (z4, 1)) ++ ++/* ++** get4_s32_z0_2: ++** mov z0\.d, z6\.d ++** ret ++*/ ++TEST_GET (get4_s32_z0_2, svint32x4_t, svint32_t, ++ z0 = svget4_s32 (z4, 2), ++ z0 = svget4 (z4, 2)) ++ ++/* ++** get4_s32_z0_3: ++** mov z0\.d, z7\.d ++** ret ++*/ ++TEST_GET (get4_s32_z0_3, svint32x4_t, svint32_t, ++ z0 = svget4_s32 (z4, 3), ++ z0 = svget4 (z4, 3)) ++ ++/* ++** get4_s32_z4_0: ++** ret ++*/ ++TEST_GET (get4_s32_z4_0, svint32x4_t, svint32_t, ++ z4_res = svget4_s32 (z4, 0), ++ z4_res = svget4 (z4, 0)) ++ ++/* ++** get4_s32_z4_1: ++** mov z4\.d, z5\.d ++** ret ++*/ ++TEST_GET (get4_s32_z4_1, svint32x4_t, svint32_t, ++ z4_res = svget4_s32 (z4, 1), ++ z4_res = svget4 (z4, 1)) ++ ++/* ++** get4_s32_z4_2: ++** mov z4\.d, z6\.d ++** ret ++*/ ++TEST_GET (get4_s32_z4_2, svint32x4_t, svint32_t, ++ z4_res = svget4_s32 (z4, 2), ++ z4_res = svget4 (z4, 2)) ++ ++/* ++** get4_s32_z4_3: ++** mov z4\.d, z7\.d ++** ret ++*/ ++TEST_GET (get4_s32_z4_3, svint32x4_t, svint32_t, ++ z4_res = svget4_s32 (z4, 3), ++ z4_res = svget4 (z4, 3)) ++ ++/* ++** get4_s32_z5_0: ++** mov z5\.d, z4\.d ++** ret ++*/ ++TEST_GET (get4_s32_z5_0, svint32x4_t, svint32_t, ++ z5_res = svget4_s32 (z4, 0), ++ z5_res = svget4 (z4, 0)) ++ ++/* ++** get4_s32_z5_1: ++** ret ++*/ ++TEST_GET (get4_s32_z5_1, svint32x4_t, svint32_t, ++ z5_res = svget4_s32 (z4, 1), ++ z5_res = svget4 (z4, 1)) ++ ++/* ++** get4_s32_z5_2: ++** mov z5\.d, z6\.d ++** ret ++*/ ++TEST_GET (get4_s32_z5_2, svint32x4_t, svint32_t, ++ z5_res = svget4_s32 (z4, 2), ++ z5_res = svget4 (z4, 2)) ++ ++/* ++** get4_s32_z5_3: ++** mov z5\.d, z7\.d ++** ret ++*/ ++TEST_GET (get4_s32_z5_3, svint32x4_t, svint32_t, ++ z5_res = svget4_s32 (z4, 3), ++ z5_res = svget4 (z4, 3)) ++ ++/* ++** get4_s32_z6_0: ++** mov z6\.d, z4\.d ++** ret ++*/ ++TEST_GET (get4_s32_z6_0, svint32x4_t, svint32_t, ++ z6_res = svget4_s32 (z4, 0), ++ z6_res = svget4 (z4, 0)) ++ ++/* ++** get4_s32_z6_1: ++** mov z6\.d, z5\.d ++** ret ++*/ ++TEST_GET (get4_s32_z6_1, svint32x4_t, svint32_t, ++ z6_res = svget4_s32 (z4, 1), ++ z6_res = svget4 (z4, 1)) ++ ++/* ++** get4_s32_z6_2: ++** ret ++*/ ++TEST_GET (get4_s32_z6_2, svint32x4_t, svint32_t, ++ z6_res = svget4_s32 (z4, 2), ++ z6_res = svget4 (z4, 2)) ++ ++/* ++** get4_s32_z6_3: ++** mov z6\.d, z7\.d ++** ret ++*/ ++TEST_GET (get4_s32_z6_3, svint32x4_t, svint32_t, ++ z6_res = svget4_s32 (z4, 3), ++ z6_res = svget4 (z4, 3)) ++ ++/* ++** get4_s32_z7_0: ++** mov z7\.d, z4\.d ++** ret ++*/ ++TEST_GET (get4_s32_z7_0, svint32x4_t, svint32_t, ++ z7_res = svget4_s32 (z4, 0), ++ z7_res = svget4 (z4, 0)) ++ ++/* ++** get4_s32_z7_1: ++** mov z7\.d, z5\.d ++** ret ++*/ ++TEST_GET (get4_s32_z7_1, svint32x4_t, svint32_t, ++ z7_res = svget4_s32 (z4, 1), ++ z7_res = svget4 (z4, 1)) ++ ++/* ++** get4_s32_z7_2: ++** mov z7\.d, z6\.d ++** ret ++*/ ++TEST_GET (get4_s32_z7_2, svint32x4_t, svint32_t, ++ z7_res = svget4_s32 (z4, 2), ++ z7_res = svget4 (z4, 2)) ++ ++/* ++** get4_s32_z7_3: ++** ret ++*/ ++TEST_GET (get4_s32_z7_3, svint32x4_t, svint32_t, ++ z7_res = svget4_s32 (z4, 3), ++ z7_res = svget4 (z4, 3)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_s64.c +new file mode 100644 +index 000000000..a17e2779c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_s64.c +@@ -0,0 +1,179 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** get4_s64_z0_0: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_GET (get4_s64_z0_0, svint64x4_t, svint64_t, ++ z0 = svget4_s64 (z4, 0), ++ z0 = svget4 (z4, 0)) ++ ++/* ++** get4_s64_z0_1: ++** mov z0\.d, z5\.d ++** ret ++*/ ++TEST_GET (get4_s64_z0_1, svint64x4_t, svint64_t, ++ z0 = svget4_s64 (z4, 1), ++ z0 = svget4 (z4, 1)) ++ ++/* ++** get4_s64_z0_2: ++** mov z0\.d, z6\.d ++** ret ++*/ ++TEST_GET (get4_s64_z0_2, svint64x4_t, svint64_t, ++ z0 = svget4_s64 (z4, 2), ++ z0 = svget4 (z4, 2)) ++ ++/* ++** get4_s64_z0_3: ++** mov z0\.d, z7\.d ++** ret ++*/ ++TEST_GET (get4_s64_z0_3, svint64x4_t, svint64_t, ++ z0 = svget4_s64 (z4, 3), ++ z0 = svget4 (z4, 3)) ++ ++/* ++** get4_s64_z4_0: ++** ret ++*/ ++TEST_GET (get4_s64_z4_0, svint64x4_t, svint64_t, ++ z4_res = svget4_s64 (z4, 0), ++ z4_res = svget4 (z4, 0)) ++ ++/* ++** get4_s64_z4_1: ++** mov z4\.d, z5\.d ++** ret ++*/ ++TEST_GET (get4_s64_z4_1, svint64x4_t, svint64_t, ++ z4_res = svget4_s64 (z4, 1), ++ z4_res = svget4 (z4, 1)) ++ ++/* ++** get4_s64_z4_2: ++** mov z4\.d, z6\.d ++** ret ++*/ ++TEST_GET (get4_s64_z4_2, svint64x4_t, svint64_t, ++ z4_res = svget4_s64 (z4, 2), ++ z4_res = svget4 (z4, 2)) ++ ++/* ++** get4_s64_z4_3: ++** mov z4\.d, z7\.d ++** ret ++*/ ++TEST_GET (get4_s64_z4_3, svint64x4_t, svint64_t, ++ z4_res = svget4_s64 (z4, 3), ++ z4_res = svget4 (z4, 3)) ++ ++/* ++** get4_s64_z5_0: ++** mov z5\.d, z4\.d ++** ret ++*/ ++TEST_GET (get4_s64_z5_0, svint64x4_t, svint64_t, ++ z5_res = svget4_s64 (z4, 0), ++ z5_res = svget4 (z4, 0)) ++ ++/* ++** get4_s64_z5_1: ++** ret ++*/ ++TEST_GET (get4_s64_z5_1, svint64x4_t, svint64_t, ++ z5_res = svget4_s64 (z4, 1), ++ z5_res = svget4 (z4, 1)) ++ ++/* ++** get4_s64_z5_2: ++** mov z5\.d, z6\.d ++** ret ++*/ ++TEST_GET (get4_s64_z5_2, svint64x4_t, svint64_t, ++ z5_res = svget4_s64 (z4, 2), ++ z5_res = svget4 (z4, 2)) ++ ++/* ++** get4_s64_z5_3: ++** mov z5\.d, z7\.d ++** ret ++*/ ++TEST_GET (get4_s64_z5_3, svint64x4_t, svint64_t, ++ z5_res = svget4_s64 (z4, 3), ++ z5_res = svget4 (z4, 3)) ++ ++/* ++** get4_s64_z6_0: ++** mov z6\.d, z4\.d ++** ret ++*/ ++TEST_GET (get4_s64_z6_0, svint64x4_t, svint64_t, ++ z6_res = svget4_s64 (z4, 0), ++ z6_res = svget4 (z4, 0)) ++ ++/* ++** get4_s64_z6_1: ++** mov z6\.d, z5\.d ++** ret ++*/ ++TEST_GET (get4_s64_z6_1, svint64x4_t, svint64_t, ++ z6_res = svget4_s64 (z4, 1), ++ z6_res = svget4 (z4, 1)) ++ ++/* ++** get4_s64_z6_2: ++** ret ++*/ ++TEST_GET (get4_s64_z6_2, svint64x4_t, svint64_t, ++ z6_res = svget4_s64 (z4, 2), ++ z6_res = svget4 (z4, 2)) ++ ++/* ++** get4_s64_z6_3: ++** mov z6\.d, z7\.d ++** ret ++*/ ++TEST_GET (get4_s64_z6_3, svint64x4_t, svint64_t, ++ z6_res = svget4_s64 (z4, 3), ++ z6_res = svget4 (z4, 3)) ++ ++/* ++** get4_s64_z7_0: ++** mov z7\.d, z4\.d ++** ret ++*/ ++TEST_GET (get4_s64_z7_0, svint64x4_t, svint64_t, ++ z7_res = svget4_s64 (z4, 0), ++ z7_res = svget4 (z4, 0)) ++ ++/* ++** get4_s64_z7_1: ++** mov z7\.d, z5\.d ++** ret ++*/ ++TEST_GET (get4_s64_z7_1, svint64x4_t, svint64_t, ++ z7_res = svget4_s64 (z4, 1), ++ z7_res = svget4 (z4, 1)) ++ ++/* ++** get4_s64_z7_2: ++** mov z7\.d, z6\.d ++** ret ++*/ ++TEST_GET (get4_s64_z7_2, svint64x4_t, svint64_t, ++ z7_res = svget4_s64 (z4, 2), ++ z7_res = svget4 (z4, 2)) ++ ++/* ++** get4_s64_z7_3: ++** ret ++*/ ++TEST_GET (get4_s64_z7_3, svint64x4_t, svint64_t, ++ z7_res = svget4_s64 (z4, 3), ++ z7_res = svget4 (z4, 3)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_s8.c +new file mode 100644 +index 000000000..9fa159597 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_s8.c +@@ -0,0 +1,179 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** get4_s8_z0_0: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_GET (get4_s8_z0_0, svint8x4_t, svint8_t, ++ z0 = svget4_s8 (z4, 0), ++ z0 = svget4 (z4, 0)) ++ ++/* ++** get4_s8_z0_1: ++** mov z0\.d, z5\.d ++** ret ++*/ ++TEST_GET (get4_s8_z0_1, svint8x4_t, svint8_t, ++ z0 = svget4_s8 (z4, 1), ++ z0 = svget4 (z4, 1)) ++ ++/* ++** get4_s8_z0_2: ++** mov z0\.d, z6\.d ++** ret ++*/ ++TEST_GET (get4_s8_z0_2, svint8x4_t, svint8_t, ++ z0 = svget4_s8 (z4, 2), ++ z0 = svget4 (z4, 2)) ++ ++/* ++** get4_s8_z0_3: ++** mov z0\.d, z7\.d ++** ret ++*/ ++TEST_GET (get4_s8_z0_3, svint8x4_t, svint8_t, ++ z0 = svget4_s8 (z4, 3), ++ z0 = svget4 (z4, 3)) ++ ++/* ++** get4_s8_z4_0: ++** ret ++*/ ++TEST_GET (get4_s8_z4_0, svint8x4_t, svint8_t, ++ z4_res = svget4_s8 (z4, 0), ++ z4_res = svget4 (z4, 0)) ++ ++/* ++** get4_s8_z4_1: ++** mov z4\.d, z5\.d ++** ret ++*/ ++TEST_GET (get4_s8_z4_1, svint8x4_t, svint8_t, ++ z4_res = svget4_s8 (z4, 1), ++ z4_res = svget4 (z4, 1)) ++ ++/* ++** get4_s8_z4_2: ++** mov z4\.d, z6\.d ++** ret ++*/ ++TEST_GET (get4_s8_z4_2, svint8x4_t, svint8_t, ++ z4_res = svget4_s8 (z4, 2), ++ z4_res = svget4 (z4, 2)) ++ ++/* ++** get4_s8_z4_3: ++** mov z4\.d, z7\.d ++** ret ++*/ ++TEST_GET (get4_s8_z4_3, svint8x4_t, svint8_t, ++ z4_res = svget4_s8 (z4, 3), ++ z4_res = svget4 (z4, 3)) ++ ++/* ++** get4_s8_z5_0: ++** mov z5\.d, z4\.d ++** ret ++*/ ++TEST_GET (get4_s8_z5_0, svint8x4_t, svint8_t, ++ z5_res = svget4_s8 (z4, 0), ++ z5_res = svget4 (z4, 0)) ++ ++/* ++** get4_s8_z5_1: ++** ret ++*/ ++TEST_GET (get4_s8_z5_1, svint8x4_t, svint8_t, ++ z5_res = svget4_s8 (z4, 1), ++ z5_res = svget4 (z4, 1)) ++ ++/* ++** get4_s8_z5_2: ++** mov z5\.d, z6\.d ++** ret ++*/ ++TEST_GET (get4_s8_z5_2, svint8x4_t, svint8_t, ++ z5_res = svget4_s8 (z4, 2), ++ z5_res = svget4 (z4, 2)) ++ ++/* ++** get4_s8_z5_3: ++** mov z5\.d, z7\.d ++** ret ++*/ ++TEST_GET (get4_s8_z5_3, svint8x4_t, svint8_t, ++ z5_res = svget4_s8 (z4, 3), ++ z5_res = svget4 (z4, 3)) ++ ++/* ++** get4_s8_z6_0: ++** mov z6\.d, z4\.d ++** ret ++*/ ++TEST_GET (get4_s8_z6_0, svint8x4_t, svint8_t, ++ z6_res = svget4_s8 (z4, 0), ++ z6_res = svget4 (z4, 0)) ++ ++/* ++** get4_s8_z6_1: ++** mov z6\.d, z5\.d ++** ret ++*/ ++TEST_GET (get4_s8_z6_1, svint8x4_t, svint8_t, ++ z6_res = svget4_s8 (z4, 1), ++ z6_res = svget4 (z4, 1)) ++ ++/* ++** get4_s8_z6_2: ++** ret ++*/ ++TEST_GET (get4_s8_z6_2, svint8x4_t, svint8_t, ++ z6_res = svget4_s8 (z4, 2), ++ z6_res = svget4 (z4, 2)) ++ ++/* ++** get4_s8_z6_3: ++** mov z6\.d, z7\.d ++** ret ++*/ ++TEST_GET (get4_s8_z6_3, svint8x4_t, svint8_t, ++ z6_res = svget4_s8 (z4, 3), ++ z6_res = svget4 (z4, 3)) ++ ++/* ++** get4_s8_z7_0: ++** mov z7\.d, z4\.d ++** ret ++*/ ++TEST_GET (get4_s8_z7_0, svint8x4_t, svint8_t, ++ z7_res = svget4_s8 (z4, 0), ++ z7_res = svget4 (z4, 0)) ++ ++/* ++** get4_s8_z7_1: ++** mov z7\.d, z5\.d ++** ret ++*/ ++TEST_GET (get4_s8_z7_1, svint8x4_t, svint8_t, ++ z7_res = svget4_s8 (z4, 1), ++ z7_res = svget4 (z4, 1)) ++ ++/* ++** get4_s8_z7_2: ++** mov z7\.d, z6\.d ++** ret ++*/ ++TEST_GET (get4_s8_z7_2, svint8x4_t, svint8_t, ++ z7_res = svget4_s8 (z4, 2), ++ z7_res = svget4 (z4, 2)) ++ ++/* ++** get4_s8_z7_3: ++** ret ++*/ ++TEST_GET (get4_s8_z7_3, svint8x4_t, svint8_t, ++ z7_res = svget4_s8 (z4, 3), ++ z7_res = svget4 (z4, 3)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_u16.c +new file mode 100644 +index 000000000..8f17ad213 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_u16.c +@@ -0,0 +1,179 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** get4_u16_z0_0: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_GET (get4_u16_z0_0, svuint16x4_t, svuint16_t, ++ z0 = svget4_u16 (z4, 0), ++ z0 = svget4 (z4, 0)) ++ ++/* ++** get4_u16_z0_1: ++** mov z0\.d, z5\.d ++** ret ++*/ ++TEST_GET (get4_u16_z0_1, svuint16x4_t, svuint16_t, ++ z0 = svget4_u16 (z4, 1), ++ z0 = svget4 (z4, 1)) ++ ++/* ++** get4_u16_z0_2: ++** mov z0\.d, z6\.d ++** ret ++*/ ++TEST_GET (get4_u16_z0_2, svuint16x4_t, svuint16_t, ++ z0 = svget4_u16 (z4, 2), ++ z0 = svget4 (z4, 2)) ++ ++/* ++** get4_u16_z0_3: ++** mov z0\.d, z7\.d ++** ret ++*/ ++TEST_GET (get4_u16_z0_3, svuint16x4_t, svuint16_t, ++ z0 = svget4_u16 (z4, 3), ++ z0 = svget4 (z4, 3)) ++ ++/* ++** get4_u16_z4_0: ++** ret ++*/ ++TEST_GET (get4_u16_z4_0, svuint16x4_t, svuint16_t, ++ z4_res = svget4_u16 (z4, 0), ++ z4_res = svget4 (z4, 0)) ++ ++/* ++** get4_u16_z4_1: ++** mov z4\.d, z5\.d ++** ret ++*/ ++TEST_GET (get4_u16_z4_1, svuint16x4_t, svuint16_t, ++ z4_res = svget4_u16 (z4, 1), ++ z4_res = svget4 (z4, 1)) ++ ++/* ++** get4_u16_z4_2: ++** mov z4\.d, z6\.d ++** ret ++*/ ++TEST_GET (get4_u16_z4_2, svuint16x4_t, svuint16_t, ++ z4_res = svget4_u16 (z4, 2), ++ z4_res = svget4 (z4, 2)) ++ ++/* ++** get4_u16_z4_3: ++** mov z4\.d, z7\.d ++** ret ++*/ ++TEST_GET (get4_u16_z4_3, svuint16x4_t, svuint16_t, ++ z4_res = svget4_u16 (z4, 3), ++ z4_res = svget4 (z4, 3)) ++ ++/* ++** get4_u16_z5_0: ++** mov z5\.d, z4\.d ++** ret ++*/ ++TEST_GET (get4_u16_z5_0, svuint16x4_t, svuint16_t, ++ z5_res = svget4_u16 (z4, 0), ++ z5_res = svget4 (z4, 0)) ++ ++/* ++** get4_u16_z5_1: ++** ret ++*/ ++TEST_GET (get4_u16_z5_1, svuint16x4_t, svuint16_t, ++ z5_res = svget4_u16 (z4, 1), ++ z5_res = svget4 (z4, 1)) ++ ++/* ++** get4_u16_z5_2: ++** mov z5\.d, z6\.d ++** ret ++*/ ++TEST_GET (get4_u16_z5_2, svuint16x4_t, svuint16_t, ++ z5_res = svget4_u16 (z4, 2), ++ z5_res = svget4 (z4, 2)) ++ ++/* ++** get4_u16_z5_3: ++** mov z5\.d, z7\.d ++** ret ++*/ ++TEST_GET (get4_u16_z5_3, svuint16x4_t, svuint16_t, ++ z5_res = svget4_u16 (z4, 3), ++ z5_res = svget4 (z4, 3)) ++ ++/* ++** get4_u16_z6_0: ++** mov z6\.d, z4\.d ++** ret ++*/ ++TEST_GET (get4_u16_z6_0, svuint16x4_t, svuint16_t, ++ z6_res = svget4_u16 (z4, 0), ++ z6_res = svget4 (z4, 0)) ++ ++/* ++** get4_u16_z6_1: ++** mov z6\.d, z5\.d ++** ret ++*/ ++TEST_GET (get4_u16_z6_1, svuint16x4_t, svuint16_t, ++ z6_res = svget4_u16 (z4, 1), ++ z6_res = svget4 (z4, 1)) ++ ++/* ++** get4_u16_z6_2: ++** ret ++*/ ++TEST_GET (get4_u16_z6_2, svuint16x4_t, svuint16_t, ++ z6_res = svget4_u16 (z4, 2), ++ z6_res = svget4 (z4, 2)) ++ ++/* ++** get4_u16_z6_3: ++** mov z6\.d, z7\.d ++** ret ++*/ ++TEST_GET (get4_u16_z6_3, svuint16x4_t, svuint16_t, ++ z6_res = svget4_u16 (z4, 3), ++ z6_res = svget4 (z4, 3)) ++ ++/* ++** get4_u16_z7_0: ++** mov z7\.d, z4\.d ++** ret ++*/ ++TEST_GET (get4_u16_z7_0, svuint16x4_t, svuint16_t, ++ z7_res = svget4_u16 (z4, 0), ++ z7_res = svget4 (z4, 0)) ++ ++/* ++** get4_u16_z7_1: ++** mov z7\.d, z5\.d ++** ret ++*/ ++TEST_GET (get4_u16_z7_1, svuint16x4_t, svuint16_t, ++ z7_res = svget4_u16 (z4, 1), ++ z7_res = svget4 (z4, 1)) ++ ++/* ++** get4_u16_z7_2: ++** mov z7\.d, z6\.d ++** ret ++*/ ++TEST_GET (get4_u16_z7_2, svuint16x4_t, svuint16_t, ++ z7_res = svget4_u16 (z4, 2), ++ z7_res = svget4 (z4, 2)) ++ ++/* ++** get4_u16_z7_3: ++** ret ++*/ ++TEST_GET (get4_u16_z7_3, svuint16x4_t, svuint16_t, ++ z7_res = svget4_u16 (z4, 3), ++ z7_res = svget4 (z4, 3)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_u32.c +new file mode 100644 +index 000000000..e6c94b39d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_u32.c +@@ -0,0 +1,179 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** get4_u32_z0_0: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_GET (get4_u32_z0_0, svuint32x4_t, svuint32_t, ++ z0 = svget4_u32 (z4, 0), ++ z0 = svget4 (z4, 0)) ++ ++/* ++** get4_u32_z0_1: ++** mov z0\.d, z5\.d ++** ret ++*/ ++TEST_GET (get4_u32_z0_1, svuint32x4_t, svuint32_t, ++ z0 = svget4_u32 (z4, 1), ++ z0 = svget4 (z4, 1)) ++ ++/* ++** get4_u32_z0_2: ++** mov z0\.d, z6\.d ++** ret ++*/ ++TEST_GET (get4_u32_z0_2, svuint32x4_t, svuint32_t, ++ z0 = svget4_u32 (z4, 2), ++ z0 = svget4 (z4, 2)) ++ ++/* ++** get4_u32_z0_3: ++** mov z0\.d, z7\.d ++** ret ++*/ ++TEST_GET (get4_u32_z0_3, svuint32x4_t, svuint32_t, ++ z0 = svget4_u32 (z4, 3), ++ z0 = svget4 (z4, 3)) ++ ++/* ++** get4_u32_z4_0: ++** ret ++*/ ++TEST_GET (get4_u32_z4_0, svuint32x4_t, svuint32_t, ++ z4_res = svget4_u32 (z4, 0), ++ z4_res = svget4 (z4, 0)) ++ ++/* ++** get4_u32_z4_1: ++** mov z4\.d, z5\.d ++** ret ++*/ ++TEST_GET (get4_u32_z4_1, svuint32x4_t, svuint32_t, ++ z4_res = svget4_u32 (z4, 1), ++ z4_res = svget4 (z4, 1)) ++ ++/* ++** get4_u32_z4_2: ++** mov z4\.d, z6\.d ++** ret ++*/ ++TEST_GET (get4_u32_z4_2, svuint32x4_t, svuint32_t, ++ z4_res = svget4_u32 (z4, 2), ++ z4_res = svget4 (z4, 2)) ++ ++/* ++** get4_u32_z4_3: ++** mov z4\.d, z7\.d ++** ret ++*/ ++TEST_GET (get4_u32_z4_3, svuint32x4_t, svuint32_t, ++ z4_res = svget4_u32 (z4, 3), ++ z4_res = svget4 (z4, 3)) ++ ++/* ++** get4_u32_z5_0: ++** mov z5\.d, z4\.d ++** ret ++*/ ++TEST_GET (get4_u32_z5_0, svuint32x4_t, svuint32_t, ++ z5_res = svget4_u32 (z4, 0), ++ z5_res = svget4 (z4, 0)) ++ ++/* ++** get4_u32_z5_1: ++** ret ++*/ ++TEST_GET (get4_u32_z5_1, svuint32x4_t, svuint32_t, ++ z5_res = svget4_u32 (z4, 1), ++ z5_res = svget4 (z4, 1)) ++ ++/* ++** get4_u32_z5_2: ++** mov z5\.d, z6\.d ++** ret ++*/ ++TEST_GET (get4_u32_z5_2, svuint32x4_t, svuint32_t, ++ z5_res = svget4_u32 (z4, 2), ++ z5_res = svget4 (z4, 2)) ++ ++/* ++** get4_u32_z5_3: ++** mov z5\.d, z7\.d ++** ret ++*/ ++TEST_GET (get4_u32_z5_3, svuint32x4_t, svuint32_t, ++ z5_res = svget4_u32 (z4, 3), ++ z5_res = svget4 (z4, 3)) ++ ++/* ++** get4_u32_z6_0: ++** mov z6\.d, z4\.d ++** ret ++*/ ++TEST_GET (get4_u32_z6_0, svuint32x4_t, svuint32_t, ++ z6_res = svget4_u32 (z4, 0), ++ z6_res = svget4 (z4, 0)) ++ ++/* ++** get4_u32_z6_1: ++** mov z6\.d, z5\.d ++** ret ++*/ ++TEST_GET (get4_u32_z6_1, svuint32x4_t, svuint32_t, ++ z6_res = svget4_u32 (z4, 1), ++ z6_res = svget4 (z4, 1)) ++ ++/* ++** get4_u32_z6_2: ++** ret ++*/ ++TEST_GET (get4_u32_z6_2, svuint32x4_t, svuint32_t, ++ z6_res = svget4_u32 (z4, 2), ++ z6_res = svget4 (z4, 2)) ++ ++/* ++** get4_u32_z6_3: ++** mov z6\.d, z7\.d ++** ret ++*/ ++TEST_GET (get4_u32_z6_3, svuint32x4_t, svuint32_t, ++ z6_res = svget4_u32 (z4, 3), ++ z6_res = svget4 (z4, 3)) ++ ++/* ++** get4_u32_z7_0: ++** mov z7\.d, z4\.d ++** ret ++*/ ++TEST_GET (get4_u32_z7_0, svuint32x4_t, svuint32_t, ++ z7_res = svget4_u32 (z4, 0), ++ z7_res = svget4 (z4, 0)) ++ ++/* ++** get4_u32_z7_1: ++** mov z7\.d, z5\.d ++** ret ++*/ ++TEST_GET (get4_u32_z7_1, svuint32x4_t, svuint32_t, ++ z7_res = svget4_u32 (z4, 1), ++ z7_res = svget4 (z4, 1)) ++ ++/* ++** get4_u32_z7_2: ++** mov z7\.d, z6\.d ++** ret ++*/ ++TEST_GET (get4_u32_z7_2, svuint32x4_t, svuint32_t, ++ z7_res = svget4_u32 (z4, 2), ++ z7_res = svget4 (z4, 2)) ++ ++/* ++** get4_u32_z7_3: ++** ret ++*/ ++TEST_GET (get4_u32_z7_3, svuint32x4_t, svuint32_t, ++ z7_res = svget4_u32 (z4, 3), ++ z7_res = svget4 (z4, 3)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_u64.c +new file mode 100644 +index 000000000..79c293a2c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_u64.c +@@ -0,0 +1,179 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** get4_u64_z0_0: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_GET (get4_u64_z0_0, svuint64x4_t, svuint64_t, ++ z0 = svget4_u64 (z4, 0), ++ z0 = svget4 (z4, 0)) ++ ++/* ++** get4_u64_z0_1: ++** mov z0\.d, z5\.d ++** ret ++*/ ++TEST_GET (get4_u64_z0_1, svuint64x4_t, svuint64_t, ++ z0 = svget4_u64 (z4, 1), ++ z0 = svget4 (z4, 1)) ++ ++/* ++** get4_u64_z0_2: ++** mov z0\.d, z6\.d ++** ret ++*/ ++TEST_GET (get4_u64_z0_2, svuint64x4_t, svuint64_t, ++ z0 = svget4_u64 (z4, 2), ++ z0 = svget4 (z4, 2)) ++ ++/* ++** get4_u64_z0_3: ++** mov z0\.d, z7\.d ++** ret ++*/ ++TEST_GET (get4_u64_z0_3, svuint64x4_t, svuint64_t, ++ z0 = svget4_u64 (z4, 3), ++ z0 = svget4 (z4, 3)) ++ ++/* ++** get4_u64_z4_0: ++** ret ++*/ ++TEST_GET (get4_u64_z4_0, svuint64x4_t, svuint64_t, ++ z4_res = svget4_u64 (z4, 0), ++ z4_res = svget4 (z4, 0)) ++ ++/* ++** get4_u64_z4_1: ++** mov z4\.d, z5\.d ++** ret ++*/ ++TEST_GET (get4_u64_z4_1, svuint64x4_t, svuint64_t, ++ z4_res = svget4_u64 (z4, 1), ++ z4_res = svget4 (z4, 1)) ++ ++/* ++** get4_u64_z4_2: ++** mov z4\.d, z6\.d ++** ret ++*/ ++TEST_GET (get4_u64_z4_2, svuint64x4_t, svuint64_t, ++ z4_res = svget4_u64 (z4, 2), ++ z4_res = svget4 (z4, 2)) ++ ++/* ++** get4_u64_z4_3: ++** mov z4\.d, z7\.d ++** ret ++*/ ++TEST_GET (get4_u64_z4_3, svuint64x4_t, svuint64_t, ++ z4_res = svget4_u64 (z4, 3), ++ z4_res = svget4 (z4, 3)) ++ ++/* ++** get4_u64_z5_0: ++** mov z5\.d, z4\.d ++** ret ++*/ ++TEST_GET (get4_u64_z5_0, svuint64x4_t, svuint64_t, ++ z5_res = svget4_u64 (z4, 0), ++ z5_res = svget4 (z4, 0)) ++ ++/* ++** get4_u64_z5_1: ++** ret ++*/ ++TEST_GET (get4_u64_z5_1, svuint64x4_t, svuint64_t, ++ z5_res = svget4_u64 (z4, 1), ++ z5_res = svget4 (z4, 1)) ++ ++/* ++** get4_u64_z5_2: ++** mov z5\.d, z6\.d ++** ret ++*/ ++TEST_GET (get4_u64_z5_2, svuint64x4_t, svuint64_t, ++ z5_res = svget4_u64 (z4, 2), ++ z5_res = svget4 (z4, 2)) ++ ++/* ++** get4_u64_z5_3: ++** mov z5\.d, z7\.d ++** ret ++*/ ++TEST_GET (get4_u64_z5_3, svuint64x4_t, svuint64_t, ++ z5_res = svget4_u64 (z4, 3), ++ z5_res = svget4 (z4, 3)) ++ ++/* ++** get4_u64_z6_0: ++** mov z6\.d, z4\.d ++** ret ++*/ ++TEST_GET (get4_u64_z6_0, svuint64x4_t, svuint64_t, ++ z6_res = svget4_u64 (z4, 0), ++ z6_res = svget4 (z4, 0)) ++ ++/* ++** get4_u64_z6_1: ++** mov z6\.d, z5\.d ++** ret ++*/ ++TEST_GET (get4_u64_z6_1, svuint64x4_t, svuint64_t, ++ z6_res = svget4_u64 (z4, 1), ++ z6_res = svget4 (z4, 1)) ++ ++/* ++** get4_u64_z6_2: ++** ret ++*/ ++TEST_GET (get4_u64_z6_2, svuint64x4_t, svuint64_t, ++ z6_res = svget4_u64 (z4, 2), ++ z6_res = svget4 (z4, 2)) ++ ++/* ++** get4_u64_z6_3: ++** mov z6\.d, z7\.d ++** ret ++*/ ++TEST_GET (get4_u64_z6_3, svuint64x4_t, svuint64_t, ++ z6_res = svget4_u64 (z4, 3), ++ z6_res = svget4 (z4, 3)) ++ ++/* ++** get4_u64_z7_0: ++** mov z7\.d, z4\.d ++** ret ++*/ ++TEST_GET (get4_u64_z7_0, svuint64x4_t, svuint64_t, ++ z7_res = svget4_u64 (z4, 0), ++ z7_res = svget4 (z4, 0)) ++ ++/* ++** get4_u64_z7_1: ++** mov z7\.d, z5\.d ++** ret ++*/ ++TEST_GET (get4_u64_z7_1, svuint64x4_t, svuint64_t, ++ z7_res = svget4_u64 (z4, 1), ++ z7_res = svget4 (z4, 1)) ++ ++/* ++** get4_u64_z7_2: ++** mov z7\.d, z6\.d ++** ret ++*/ ++TEST_GET (get4_u64_z7_2, svuint64x4_t, svuint64_t, ++ z7_res = svget4_u64 (z4, 2), ++ z7_res = svget4 (z4, 2)) ++ ++/* ++** get4_u64_z7_3: ++** ret ++*/ ++TEST_GET (get4_u64_z7_3, svuint64x4_t, svuint64_t, ++ z7_res = svget4_u64 (z4, 3), ++ z7_res = svget4 (z4, 3)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_u8.c +new file mode 100644 +index 000000000..f3ad9a85b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_u8.c +@@ -0,0 +1,179 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** get4_u8_z0_0: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_GET (get4_u8_z0_0, svuint8x4_t, svuint8_t, ++ z0 = svget4_u8 (z4, 0), ++ z0 = svget4 (z4, 0)) ++ ++/* ++** get4_u8_z0_1: ++** mov z0\.d, z5\.d ++** ret ++*/ ++TEST_GET (get4_u8_z0_1, svuint8x4_t, svuint8_t, ++ z0 = svget4_u8 (z4, 1), ++ z0 = svget4 (z4, 1)) ++ ++/* ++** get4_u8_z0_2: ++** mov z0\.d, z6\.d ++** ret ++*/ ++TEST_GET (get4_u8_z0_2, svuint8x4_t, svuint8_t, ++ z0 = svget4_u8 (z4, 2), ++ z0 = svget4 (z4, 2)) ++ ++/* ++** get4_u8_z0_3: ++** mov z0\.d, z7\.d ++** ret ++*/ ++TEST_GET (get4_u8_z0_3, svuint8x4_t, svuint8_t, ++ z0 = svget4_u8 (z4, 3), ++ z0 = svget4 (z4, 3)) ++ ++/* ++** get4_u8_z4_0: ++** ret ++*/ ++TEST_GET (get4_u8_z4_0, svuint8x4_t, svuint8_t, ++ z4_res = svget4_u8 (z4, 0), ++ z4_res = svget4 (z4, 0)) ++ ++/* ++** get4_u8_z4_1: ++** mov z4\.d, z5\.d ++** ret ++*/ ++TEST_GET (get4_u8_z4_1, svuint8x4_t, svuint8_t, ++ z4_res = svget4_u8 (z4, 1), ++ z4_res = svget4 (z4, 1)) ++ ++/* ++** get4_u8_z4_2: ++** mov z4\.d, z6\.d ++** ret ++*/ ++TEST_GET (get4_u8_z4_2, svuint8x4_t, svuint8_t, ++ z4_res = svget4_u8 (z4, 2), ++ z4_res = svget4 (z4, 2)) ++ ++/* ++** get4_u8_z4_3: ++** mov z4\.d, z7\.d ++** ret ++*/ ++TEST_GET (get4_u8_z4_3, svuint8x4_t, svuint8_t, ++ z4_res = svget4_u8 (z4, 3), ++ z4_res = svget4 (z4, 3)) ++ ++/* ++** get4_u8_z5_0: ++** mov z5\.d, z4\.d ++** ret ++*/ ++TEST_GET (get4_u8_z5_0, svuint8x4_t, svuint8_t, ++ z5_res = svget4_u8 (z4, 0), ++ z5_res = svget4 (z4, 0)) ++ ++/* ++** get4_u8_z5_1: ++** ret ++*/ ++TEST_GET (get4_u8_z5_1, svuint8x4_t, svuint8_t, ++ z5_res = svget4_u8 (z4, 1), ++ z5_res = svget4 (z4, 1)) ++ ++/* ++** get4_u8_z5_2: ++** mov z5\.d, z6\.d ++** ret ++*/ ++TEST_GET (get4_u8_z5_2, svuint8x4_t, svuint8_t, ++ z5_res = svget4_u8 (z4, 2), ++ z5_res = svget4 (z4, 2)) ++ ++/* ++** get4_u8_z5_3: ++** mov z5\.d, z7\.d ++** ret ++*/ ++TEST_GET (get4_u8_z5_3, svuint8x4_t, svuint8_t, ++ z5_res = svget4_u8 (z4, 3), ++ z5_res = svget4 (z4, 3)) ++ ++/* ++** get4_u8_z6_0: ++** mov z6\.d, z4\.d ++** ret ++*/ ++TEST_GET (get4_u8_z6_0, svuint8x4_t, svuint8_t, ++ z6_res = svget4_u8 (z4, 0), ++ z6_res = svget4 (z4, 0)) ++ ++/* ++** get4_u8_z6_1: ++** mov z6\.d, z5\.d ++** ret ++*/ ++TEST_GET (get4_u8_z6_1, svuint8x4_t, svuint8_t, ++ z6_res = svget4_u8 (z4, 1), ++ z6_res = svget4 (z4, 1)) ++ ++/* ++** get4_u8_z6_2: ++** ret ++*/ ++TEST_GET (get4_u8_z6_2, svuint8x4_t, svuint8_t, ++ z6_res = svget4_u8 (z4, 2), ++ z6_res = svget4 (z4, 2)) ++ ++/* ++** get4_u8_z6_3: ++** mov z6\.d, z7\.d ++** ret ++*/ ++TEST_GET (get4_u8_z6_3, svuint8x4_t, svuint8_t, ++ z6_res = svget4_u8 (z4, 3), ++ z6_res = svget4 (z4, 3)) ++ ++/* ++** get4_u8_z7_0: ++** mov z7\.d, z4\.d ++** ret ++*/ ++TEST_GET (get4_u8_z7_0, svuint8x4_t, svuint8_t, ++ z7_res = svget4_u8 (z4, 0), ++ z7_res = svget4 (z4, 0)) ++ ++/* ++** get4_u8_z7_1: ++** mov z7\.d, z5\.d ++** ret ++*/ ++TEST_GET (get4_u8_z7_1, svuint8x4_t, svuint8_t, ++ z7_res = svget4_u8 (z4, 1), ++ z7_res = svget4 (z4, 1)) ++ ++/* ++** get4_u8_z7_2: ++** mov z7\.d, z6\.d ++** ret ++*/ ++TEST_GET (get4_u8_z7_2, svuint8x4_t, svuint8_t, ++ z7_res = svget4_u8 (z4, 2), ++ z7_res = svget4 (z4, 2)) ++ ++/* ++** get4_u8_z7_3: ++** ret ++*/ ++TEST_GET (get4_u8_z7_3, svuint8x4_t, svuint8_t, ++ z7_res = svget4_u8 (z4, 3), ++ z7_res = svget4 (z4, 3)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/index_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/index_s16.c +new file mode 100644 +index 000000000..90a1434f1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/index_s16.c +@@ -0,0 +1,220 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** index_s16_w0_w1: ++** index z0\.h, w0, w1 ++** ret ++*/ ++TEST_S (index_s16_w0_w1, svint16_t, int16_t, ++ z0 = svindex_s16 (x0, x1)) ++ ++/* ++** index_s16_w0_2: ++** index z0\.h, w0, #2 ++** ret ++*/ ++TEST_S (index_s16_w0_2, svint16_t, int16_t, ++ z0 = svindex_s16 (x0, 2)) ++ ++/* ++** index_s16_50_2: ++** mov (w[0-9]+), 50 ++** index z0\.h, \1, #2 ++** ret ++*/ ++TEST_S (index_s16_50_2, svint16_t, int16_t, ++ z0 = svindex_s16 (50, 2)) ++ ++/* ++** index_s16_0_m17: ++** mov (w[0-9]+), -17 ++** index z0\.h, #0, \1 ++** ret ++*/ ++TEST_S (index_s16_0_m17, svint16_t, int16_t, ++ z0 = svindex_s16 (0, -17)) ++ ++/* ++** index_s16_0_m16: ++** index z0\.h, #0, #-16 ++** ret ++*/ ++TEST_S (index_s16_0_m16, svint16_t, int16_t, ++ z0 = svindex_s16 (0, -16)) ++ ++/* ++** index_s16_0_1: ++** index z0\.h, #0, #1 ++** ret ++*/ ++TEST_S (index_s16_0_1, svint16_t, int16_t, ++ z0 = svindex_s16 (0, 1)) ++ ++/* ++** index_s16_0_15: ++** index z0\.h, #0, #15 ++** ret ++*/ ++TEST_S (index_s16_0_15, svint16_t, int16_t, ++ z0 = svindex_s16 (0, 15)) ++ ++/* ++** index_s16_0_16: ++** mov (w[0-9]+), 16 ++** index z0\.h, #0, \1 ++** ret ++*/ ++TEST_S (index_s16_0_16, svint16_t, int16_t, ++ z0 = svindex_s16 (0, 16)) ++ ++/* ++** index_s16_m17_1: ++** mov (w[0-9]+), -17 ++** index z0\.h, \1, #1 ++** ret ++*/ ++TEST_S (index_s16_m17_1, svint16_t, int16_t, ++ z0 = svindex_s16 (-17, 1)) ++ ++/* ++** index_s16_m16_1: ++** index z0\.h, #-16, #1 ++** ret ++*/ ++TEST_S (index_s16_m16_1, svint16_t, int16_t, ++ z0 = svindex_s16 (-16, 1)) ++ ++/* ++** index_s16_m1_1: ++** index z0\.h, #-1, #1 ++** ret ++*/ ++TEST_S (index_s16_m1_1, svint16_t, int16_t, ++ z0 = svindex_s16 (-1, 1)) ++ ++/* ++** index_s16_1_1: ++** index z0\.h, #1, #1 ++** ret ++*/ ++TEST_S (index_s16_1_1, svint16_t, int16_t, ++ z0 = svindex_s16 (1, 1)) ++ ++/* ++** index_s16_15_1: ++** index z0\.h, #15, #1 ++** ret ++*/ ++TEST_S (index_s16_15_1, svint16_t, int16_t, ++ z0 = svindex_s16 (15, 1)) ++ ++/* ++** index_s16_16_1: ++** mov (w[0-9]+), 16 ++** index z0\.h, \1, #1 ++** ret ++*/ ++TEST_S (index_s16_16_1, svint16_t, int16_t, ++ z0 = svindex_s16 (16, 1)) ++ ++/* ++** index_s16_m17_x0: ++** mov (w[0-9]+), -17 ++** index z0\.h, \1, w0 ++** ret ++*/ ++TEST_S (index_s16_m17_x0, svint16_t, int16_t, ++ z0 = svindex_s16 (-17, x0)) ++ ++/* ++** index_s16_m16_x0: ++** index z0\.h, #-16, w0 ++** ret ++*/ ++TEST_S (index_s16_m16_x0, svint16_t, int16_t, ++ z0 = svindex_s16 (-16, x0)) ++ ++/* ++** index_s16_m1_x0: ++** index z0\.h, #-1, w0 ++** ret ++*/ ++TEST_S (index_s16_m1_x0, svint16_t, int16_t, ++ z0 = svindex_s16 (-1, x0)) ++ ++/* ++** index_s16_0_x0: ++** index z0\.h, #0, w0 ++** ret ++*/ ++TEST_S (index_s16_0_x0, svint16_t, int16_t, ++ z0 = svindex_s16 (0, x0)) ++ ++/* ++** index_s16_1_x0: ++** index z0\.h, #1, w0 ++** ret ++*/ ++TEST_S (index_s16_1_x0, svint16_t, int16_t, ++ z0 = svindex_s16 (1, x0)) ++ ++/* ++** index_s16_15_x0: ++** index z0\.h, #15, w0 ++** ret ++*/ ++TEST_S (index_s16_15_x0, svint16_t, int16_t, ++ z0 = svindex_s16 (15, x0)) ++ ++/* ++** index_s16_16_x0: ++** mov (w[0-9]+), 16 ++** index z0\.h, \1, w0 ++** ret ++*/ ++TEST_S (index_s16_16_x0, svint16_t, int16_t, ++ z0 = svindex_s16 (16, x0)) ++ ++/* ++** index_s16_x0_m17: ++** mov (w[0-9]+), -17 ++** index z0\.h, w0, \1 ++** ret ++*/ ++TEST_S (index_s16_x0_m17, svint16_t, int16_t, ++ z0 = svindex_s16 (x0, -17)) ++ ++/* ++** index_s16_x0_m16: ++** index z0\.h, w0, #-16 ++** ret ++*/ ++TEST_S (index_s16_x0_m16, svint16_t, int16_t, ++ z0 = svindex_s16 (x0, -16)) ++ ++/* ++** index_s16_x0_1: ++** index z0\.h, w0, #1 ++** ret ++*/ ++TEST_S (index_s16_x0_1, svint16_t, int16_t, ++ z0 = svindex_s16 (x0, 1)) ++ ++/* ++** index_s16_x0_15: ++** index z0\.h, w0, #15 ++** ret ++*/ ++TEST_S (index_s16_x0_15, svint16_t, int16_t, ++ z0 = svindex_s16 (x0, 15)) ++ ++/* ++** index_s16_x0_16: ++** mov (w[0-9]+), 16 ++** index z0\.h, w0, \1 ++** ret ++*/ ++TEST_S (index_s16_x0_16, svint16_t, int16_t, ++ z0 = svindex_s16 (x0, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/index_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/index_s32.c +new file mode 100644 +index 000000000..18afedac0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/index_s32.c +@@ -0,0 +1,220 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** index_s32_w0_w1: ++** index z0\.s, w0, w1 ++** ret ++*/ ++TEST_S (index_s32_w0_w1, svint32_t, int32_t, ++ z0 = svindex_s32 (x0, x1)) ++ ++/* ++** index_s32_w0_2: ++** index z0\.s, w0, #2 ++** ret ++*/ ++TEST_S (index_s32_w0_2, svint32_t, int32_t, ++ z0 = svindex_s32 (x0, 2)) ++ ++/* ++** index_s32_50_2: ++** mov (w[0-9]+), 50 ++** index z0\.s, \1, #2 ++** ret ++*/ ++TEST_S (index_s32_50_2, svint32_t, int32_t, ++ z0 = svindex_s32 (50, 2)) ++ ++/* ++** index_s32_0_m17: ++** mov (w[0-9]+), -17 ++** index z0\.s, #0, \1 ++** ret ++*/ ++TEST_S (index_s32_0_m17, svint32_t, int32_t, ++ z0 = svindex_s32 (0, -17)) ++ ++/* ++** index_s32_0_m16: ++** index z0\.s, #0, #-16 ++** ret ++*/ ++TEST_S (index_s32_0_m16, svint32_t, int32_t, ++ z0 = svindex_s32 (0, -16)) ++ ++/* ++** index_s32_0_1: ++** index z0\.s, #0, #1 ++** ret ++*/ ++TEST_S (index_s32_0_1, svint32_t, int32_t, ++ z0 = svindex_s32 (0, 1)) ++ ++/* ++** index_s32_0_15: ++** index z0\.s, #0, #15 ++** ret ++*/ ++TEST_S (index_s32_0_15, svint32_t, int32_t, ++ z0 = svindex_s32 (0, 15)) ++ ++/* ++** index_s32_0_16: ++** mov (w[0-9]+), 16 ++** index z0\.s, #0, \1 ++** ret ++*/ ++TEST_S (index_s32_0_16, svint32_t, int32_t, ++ z0 = svindex_s32 (0, 16)) ++ ++/* ++** index_s32_m17_1: ++** mov (w[0-9]+), -17 ++** index z0\.s, \1, #1 ++** ret ++*/ ++TEST_S (index_s32_m17_1, svint32_t, int32_t, ++ z0 = svindex_s32 (-17, 1)) ++ ++/* ++** index_s32_m16_1: ++** index z0\.s, #-16, #1 ++** ret ++*/ ++TEST_S (index_s32_m16_1, svint32_t, int32_t, ++ z0 = svindex_s32 (-16, 1)) ++ ++/* ++** index_s32_m1_1: ++** index z0\.s, #-1, #1 ++** ret ++*/ ++TEST_S (index_s32_m1_1, svint32_t, int32_t, ++ z0 = svindex_s32 (-1, 1)) ++ ++/* ++** index_s32_1_1: ++** index z0\.s, #1, #1 ++** ret ++*/ ++TEST_S (index_s32_1_1, svint32_t, int32_t, ++ z0 = svindex_s32 (1, 1)) ++ ++/* ++** index_s32_15_1: ++** index z0\.s, #15, #1 ++** ret ++*/ ++TEST_S (index_s32_15_1, svint32_t, int32_t, ++ z0 = svindex_s32 (15, 1)) ++ ++/* ++** index_s32_16_1: ++** mov (w[0-9]+), 16 ++** index z0\.s, \1, #1 ++** ret ++*/ ++TEST_S (index_s32_16_1, svint32_t, int32_t, ++ z0 = svindex_s32 (16, 1)) ++ ++/* ++** index_s32_m17_x0: ++** mov (w[0-9]+), -17 ++** index z0\.s, \1, w0 ++** ret ++*/ ++TEST_S (index_s32_m17_x0, svint32_t, int32_t, ++ z0 = svindex_s32 (-17, x0)) ++ ++/* ++** index_s32_m16_x0: ++** index z0\.s, #-16, w0 ++** ret ++*/ ++TEST_S (index_s32_m16_x0, svint32_t, int32_t, ++ z0 = svindex_s32 (-16, x0)) ++ ++/* ++** index_s32_m1_x0: ++** index z0\.s, #-1, w0 ++** ret ++*/ ++TEST_S (index_s32_m1_x0, svint32_t, int32_t, ++ z0 = svindex_s32 (-1, x0)) ++ ++/* ++** index_s32_0_x0: ++** index z0\.s, #0, w0 ++** ret ++*/ ++TEST_S (index_s32_0_x0, svint32_t, int32_t, ++ z0 = svindex_s32 (0, x0)) ++ ++/* ++** index_s32_1_x0: ++** index z0\.s, #1, w0 ++** ret ++*/ ++TEST_S (index_s32_1_x0, svint32_t, int32_t, ++ z0 = svindex_s32 (1, x0)) ++ ++/* ++** index_s32_15_x0: ++** index z0\.s, #15, w0 ++** ret ++*/ ++TEST_S (index_s32_15_x0, svint32_t, int32_t, ++ z0 = svindex_s32 (15, x0)) ++ ++/* ++** index_s32_16_x0: ++** mov (w[0-9]+), 16 ++** index z0\.s, \1, w0 ++** ret ++*/ ++TEST_S (index_s32_16_x0, svint32_t, int32_t, ++ z0 = svindex_s32 (16, x0)) ++ ++/* ++** index_s32_x0_m17: ++** mov (w[0-9]+), -17 ++** index z0\.s, w0, \1 ++** ret ++*/ ++TEST_S (index_s32_x0_m17, svint32_t, int32_t, ++ z0 = svindex_s32 (x0, -17)) ++ ++/* ++** index_s32_x0_m16: ++** index z0\.s, w0, #-16 ++** ret ++*/ ++TEST_S (index_s32_x0_m16, svint32_t, int32_t, ++ z0 = svindex_s32 (x0, -16)) ++ ++/* ++** index_s32_x0_1: ++** index z0\.s, w0, #1 ++** ret ++*/ ++TEST_S (index_s32_x0_1, svint32_t, int32_t, ++ z0 = svindex_s32 (x0, 1)) ++ ++/* ++** index_s32_x0_15: ++** index z0\.s, w0, #15 ++** ret ++*/ ++TEST_S (index_s32_x0_15, svint32_t, int32_t, ++ z0 = svindex_s32 (x0, 15)) ++ ++/* ++** index_s32_x0_16: ++** mov (w[0-9]+), 16 ++** index z0\.s, w0, \1 ++** ret ++*/ ++TEST_S (index_s32_x0_16, svint32_t, int32_t, ++ z0 = svindex_s32 (x0, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/index_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/index_s64.c +new file mode 100644 +index 000000000..298eec9ea +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/index_s64.c +@@ -0,0 +1,220 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** index_s64_x0_x1: ++** index z0\.d, x0, x1 ++** ret ++*/ ++TEST_S (index_s64_x0_x1, svint64_t, int64_t, ++ z0 = svindex_s64 (x0, x1)) ++ ++/* ++** index_s64_x0_2: ++** index z0\.d, x0, #2 ++** ret ++*/ ++TEST_S (index_s64_x0_2, svint64_t, int64_t, ++ z0 = svindex_s64 (x0, 2)) ++ ++/* ++** index_s64_50_2: ++** mov (x[0-9]+), 50 ++** index z0\.d, \1, #2 ++** ret ++*/ ++TEST_S (index_s64_50_2, svint64_t, int64_t, ++ z0 = svindex_s64 (50, 2)) ++ ++/* ++** index_s64_0_m17: ++** mov (x[0-9]+), -17 ++** index z0\.d, #0, \1 ++** ret ++*/ ++TEST_S (index_s64_0_m17, svint64_t, int64_t, ++ z0 = svindex_s64 (0, -17)) ++ ++/* ++** index_s64_0_m16: ++** index z0\.d, #0, #-16 ++** ret ++*/ ++TEST_S (index_s64_0_m16, svint64_t, int64_t, ++ z0 = svindex_s64 (0, -16)) ++ ++/* ++** index_s64_0_1: ++** index z0\.d, #0, #1 ++** ret ++*/ ++TEST_S (index_s64_0_1, svint64_t, int64_t, ++ z0 = svindex_s64 (0, 1)) ++ ++/* ++** index_s64_0_15: ++** index z0\.d, #0, #15 ++** ret ++*/ ++TEST_S (index_s64_0_15, svint64_t, int64_t, ++ z0 = svindex_s64 (0, 15)) ++ ++/* ++** index_s64_0_16: ++** mov (x[0-9]+), 16 ++** index z0\.d, #0, \1 ++** ret ++*/ ++TEST_S (index_s64_0_16, svint64_t, int64_t, ++ z0 = svindex_s64 (0, 16)) ++ ++/* ++** index_s64_m17_1: ++** mov (x[0-9]+), -17 ++** index z0\.d, \1, #1 ++** ret ++*/ ++TEST_S (index_s64_m17_1, svint64_t, int64_t, ++ z0 = svindex_s64 (-17, 1)) ++ ++/* ++** index_s64_m16_1: ++** index z0\.d, #-16, #1 ++** ret ++*/ ++TEST_S (index_s64_m16_1, svint64_t, int64_t, ++ z0 = svindex_s64 (-16, 1)) ++ ++/* ++** index_s64_m1_1: ++** index z0\.d, #-1, #1 ++** ret ++*/ ++TEST_S (index_s64_m1_1, svint64_t, int64_t, ++ z0 = svindex_s64 (-1, 1)) ++ ++/* ++** index_s64_1_1: ++** index z0\.d, #1, #1 ++** ret ++*/ ++TEST_S (index_s64_1_1, svint64_t, int64_t, ++ z0 = svindex_s64 (1, 1)) ++ ++/* ++** index_s64_15_1: ++** index z0\.d, #15, #1 ++** ret ++*/ ++TEST_S (index_s64_15_1, svint64_t, int64_t, ++ z0 = svindex_s64 (15, 1)) ++ ++/* ++** index_s64_16_1: ++** mov (x[0-9]+), 16 ++** index z0\.d, \1, #1 ++** ret ++*/ ++TEST_S (index_s64_16_1, svint64_t, int64_t, ++ z0 = svindex_s64 (16, 1)) ++ ++/* ++** index_s64_m17_x0: ++** mov (x[0-9]+), -17 ++** index z0\.d, \1, x0 ++** ret ++*/ ++TEST_S (index_s64_m17_x0, svint64_t, int64_t, ++ z0 = svindex_s64 (-17, x0)) ++ ++/* ++** index_s64_m16_x0: ++** index z0\.d, #-16, x0 ++** ret ++*/ ++TEST_S (index_s64_m16_x0, svint64_t, int64_t, ++ z0 = svindex_s64 (-16, x0)) ++ ++/* ++** index_s64_m1_x0: ++** index z0\.d, #-1, x0 ++** ret ++*/ ++TEST_S (index_s64_m1_x0, svint64_t, int64_t, ++ z0 = svindex_s64 (-1, x0)) ++ ++/* ++** index_s64_0_x0: ++** index z0\.d, #0, x0 ++** ret ++*/ ++TEST_S (index_s64_0_x0, svint64_t, int64_t, ++ z0 = svindex_s64 (0, x0)) ++ ++/* ++** index_s64_1_x0: ++** index z0\.d, #1, x0 ++** ret ++*/ ++TEST_S (index_s64_1_x0, svint64_t, int64_t, ++ z0 = svindex_s64 (1, x0)) ++ ++/* ++** index_s64_15_x0: ++** index z0\.d, #15, x0 ++** ret ++*/ ++TEST_S (index_s64_15_x0, svint64_t, int64_t, ++ z0 = svindex_s64 (15, x0)) ++ ++/* ++** index_s64_16_x0: ++** mov (x[0-9]+), 16 ++** index z0\.d, \1, x0 ++** ret ++*/ ++TEST_S (index_s64_16_x0, svint64_t, int64_t, ++ z0 = svindex_s64 (16, x0)) ++ ++/* ++** index_s64_x0_m17: ++** mov (x[0-9]+), -17 ++** index z0\.d, x0, \1 ++** ret ++*/ ++TEST_S (index_s64_x0_m17, svint64_t, int64_t, ++ z0 = svindex_s64 (x0, -17)) ++ ++/* ++** index_s64_x0_m16: ++** index z0\.d, x0, #-16 ++** ret ++*/ ++TEST_S (index_s64_x0_m16, svint64_t, int64_t, ++ z0 = svindex_s64 (x0, -16)) ++ ++/* ++** index_s64_x0_1: ++** index z0\.d, x0, #1 ++** ret ++*/ ++TEST_S (index_s64_x0_1, svint64_t, int64_t, ++ z0 = svindex_s64 (x0, 1)) ++ ++/* ++** index_s64_x0_15: ++** index z0\.d, x0, #15 ++** ret ++*/ ++TEST_S (index_s64_x0_15, svint64_t, int64_t, ++ z0 = svindex_s64 (x0, 15)) ++ ++/* ++** index_s64_x0_16: ++** mov (x[0-9]+), 16 ++** index z0\.d, x0, \1 ++** ret ++*/ ++TEST_S (index_s64_x0_16, svint64_t, int64_t, ++ z0 = svindex_s64 (x0, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/index_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/index_s8.c +new file mode 100644 +index 000000000..8a1f14f50 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/index_s8.c +@@ -0,0 +1,220 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** index_s8_w0_w1: ++** index z0\.b, w0, w1 ++** ret ++*/ ++TEST_S (index_s8_w0_w1, svint8_t, int8_t, ++ z0 = svindex_s8 (x0, x1)) ++ ++/* ++** index_s8_w0_2: ++** index z0\.b, w0, #2 ++** ret ++*/ ++TEST_S (index_s8_w0_2, svint8_t, int8_t, ++ z0 = svindex_s8 (x0, 2)) ++ ++/* ++** index_s8_50_2: ++** mov (w[0-9]+), 50 ++** index z0\.b, \1, #2 ++** ret ++*/ ++TEST_S (index_s8_50_2, svint8_t, int8_t, ++ z0 = svindex_s8 (50, 2)) ++ ++/* ++** index_s8_0_m17: ++** mov (w[0-9]+), -17 ++** index z0\.b, #0, \1 ++** ret ++*/ ++TEST_S (index_s8_0_m17, svint8_t, int8_t, ++ z0 = svindex_s8 (0, -17)) ++ ++/* ++** index_s8_0_m16: ++** index z0\.b, #0, #-16 ++** ret ++*/ ++TEST_S (index_s8_0_m16, svint8_t, int8_t, ++ z0 = svindex_s8 (0, -16)) ++ ++/* ++** index_s8_0_1: ++** index z0\.b, #0, #1 ++** ret ++*/ ++TEST_S (index_s8_0_1, svint8_t, int8_t, ++ z0 = svindex_s8 (0, 1)) ++ ++/* ++** index_s8_0_15: ++** index z0\.b, #0, #15 ++** ret ++*/ ++TEST_S (index_s8_0_15, svint8_t, int8_t, ++ z0 = svindex_s8 (0, 15)) ++ ++/* ++** index_s8_0_16: ++** mov (w[0-9]+), 16 ++** index z0\.b, #0, \1 ++** ret ++*/ ++TEST_S (index_s8_0_16, svint8_t, int8_t, ++ z0 = svindex_s8 (0, 16)) ++ ++/* ++** index_s8_m17_1: ++** mov (w[0-9]+), -17 ++** index z0\.b, \1, #1 ++** ret ++*/ ++TEST_S (index_s8_m17_1, svint8_t, int8_t, ++ z0 = svindex_s8 (-17, 1)) ++ ++/* ++** index_s8_m16_1: ++** index z0\.b, #-16, #1 ++** ret ++*/ ++TEST_S (index_s8_m16_1, svint8_t, int8_t, ++ z0 = svindex_s8 (-16, 1)) ++ ++/* ++** index_s8_m1_1: ++** index z0\.b, #-1, #1 ++** ret ++*/ ++TEST_S (index_s8_m1_1, svint8_t, int8_t, ++ z0 = svindex_s8 (-1, 1)) ++ ++/* ++** index_s8_1_1: ++** index z0\.b, #1, #1 ++** ret ++*/ ++TEST_S (index_s8_1_1, svint8_t, int8_t, ++ z0 = svindex_s8 (1, 1)) ++ ++/* ++** index_s8_15_1: ++** index z0\.b, #15, #1 ++** ret ++*/ ++TEST_S (index_s8_15_1, svint8_t, int8_t, ++ z0 = svindex_s8 (15, 1)) ++ ++/* ++** index_s8_16_1: ++** mov (w[0-9]+), 16 ++** index z0\.b, \1, #1 ++** ret ++*/ ++TEST_S (index_s8_16_1, svint8_t, int8_t, ++ z0 = svindex_s8 (16, 1)) ++ ++/* ++** index_s8_m17_x0: ++** mov (w[0-9]+), -17 ++** index z0\.b, \1, w0 ++** ret ++*/ ++TEST_S (index_s8_m17_x0, svint8_t, int8_t, ++ z0 = svindex_s8 (-17, x0)) ++ ++/* ++** index_s8_m16_x0: ++** index z0\.b, #-16, w0 ++** ret ++*/ ++TEST_S (index_s8_m16_x0, svint8_t, int8_t, ++ z0 = svindex_s8 (-16, x0)) ++ ++/* ++** index_s8_m1_x0: ++** index z0\.b, #-1, w0 ++** ret ++*/ ++TEST_S (index_s8_m1_x0, svint8_t, int8_t, ++ z0 = svindex_s8 (-1, x0)) ++ ++/* ++** index_s8_0_x0: ++** index z0\.b, #0, w0 ++** ret ++*/ ++TEST_S (index_s8_0_x0, svint8_t, int8_t, ++ z0 = svindex_s8 (0, x0)) ++ ++/* ++** index_s8_1_x0: ++** index z0\.b, #1, w0 ++** ret ++*/ ++TEST_S (index_s8_1_x0, svint8_t, int8_t, ++ z0 = svindex_s8 (1, x0)) ++ ++/* ++** index_s8_15_x0: ++** index z0\.b, #15, w0 ++** ret ++*/ ++TEST_S (index_s8_15_x0, svint8_t, int8_t, ++ z0 = svindex_s8 (15, x0)) ++ ++/* ++** index_s8_16_x0: ++** mov (w[0-9]+), 16 ++** index z0\.b, \1, w0 ++** ret ++*/ ++TEST_S (index_s8_16_x0, svint8_t, int8_t, ++ z0 = svindex_s8 (16, x0)) ++ ++/* ++** index_s8_x0_m17: ++** mov (w[0-9]+), -17 ++** index z0\.b, w0, \1 ++** ret ++*/ ++TEST_S (index_s8_x0_m17, svint8_t, int8_t, ++ z0 = svindex_s8 (x0, -17)) ++ ++/* ++** index_s8_x0_m16: ++** index z0\.b, w0, #-16 ++** ret ++*/ ++TEST_S (index_s8_x0_m16, svint8_t, int8_t, ++ z0 = svindex_s8 (x0, -16)) ++ ++/* ++** index_s8_x0_1: ++** index z0\.b, w0, #1 ++** ret ++*/ ++TEST_S (index_s8_x0_1, svint8_t, int8_t, ++ z0 = svindex_s8 (x0, 1)) ++ ++/* ++** index_s8_x0_15: ++** index z0\.b, w0, #15 ++** ret ++*/ ++TEST_S (index_s8_x0_15, svint8_t, int8_t, ++ z0 = svindex_s8 (x0, 15)) ++ ++/* ++** index_s8_x0_16: ++** mov (w[0-9]+), 16 ++** index z0\.b, w0, \1 ++** ret ++*/ ++TEST_S (index_s8_x0_16, svint8_t, int8_t, ++ z0 = svindex_s8 (x0, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/index_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/index_u16.c +new file mode 100644 +index 000000000..1c6631088 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/index_u16.c +@@ -0,0 +1,220 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** index_u16_w0_w1: ++** index z0\.h, w0, w1 ++** ret ++*/ ++TEST_S (index_u16_w0_w1, svuint16_t, uint16_t, ++ z0 = svindex_u16 (x0, x1)) ++ ++/* ++** index_u16_w0_2: ++** index z0\.h, w0, #2 ++** ret ++*/ ++TEST_S (index_u16_w0_2, svuint16_t, uint16_t, ++ z0 = svindex_u16 (x0, 2)) ++ ++/* ++** index_u16_50_2: ++** mov (w[0-9]+), 50 ++** index z0\.h, \1, #2 ++** ret ++*/ ++TEST_S (index_u16_50_2, svuint16_t, uint16_t, ++ z0 = svindex_u16 (50, 2)) ++ ++/* ++** index_u16_0_m17: ++** mov (w[0-9]+), -17 ++** index z0\.h, #0, \1 ++** ret ++*/ ++TEST_S (index_u16_0_m17, svuint16_t, uint16_t, ++ z0 = svindex_u16 (0, -17)) ++ ++/* ++** index_u16_0_m16: ++** index z0\.h, #0, #-16 ++** ret ++*/ ++TEST_S (index_u16_0_m16, svuint16_t, uint16_t, ++ z0 = svindex_u16 (0, -16)) ++ ++/* ++** index_u16_0_1: ++** index z0\.h, #0, #1 ++** ret ++*/ ++TEST_S (index_u16_0_1, svuint16_t, uint16_t, ++ z0 = svindex_u16 (0, 1)) ++ ++/* ++** index_u16_0_15: ++** index z0\.h, #0, #15 ++** ret ++*/ ++TEST_S (index_u16_0_15, svuint16_t, uint16_t, ++ z0 = svindex_u16 (0, 15)) ++ ++/* ++** index_u16_0_16: ++** mov (w[0-9]+), 16 ++** index z0\.h, #0, \1 ++** ret ++*/ ++TEST_S (index_u16_0_16, svuint16_t, uint16_t, ++ z0 = svindex_u16 (0, 16)) ++ ++/* ++** index_u16_m17_1: ++** mov (w[0-9]+), -17 ++** index z0\.h, \1, #1 ++** ret ++*/ ++TEST_S (index_u16_m17_1, svuint16_t, uint16_t, ++ z0 = svindex_u16 (-17, 1)) ++ ++/* ++** index_u16_m16_1: ++** index z0\.h, #-16, #1 ++** ret ++*/ ++TEST_S (index_u16_m16_1, svuint16_t, uint16_t, ++ z0 = svindex_u16 (-16, 1)) ++ ++/* ++** index_u16_m1_1: ++** index z0\.h, #-1, #1 ++** ret ++*/ ++TEST_S (index_u16_m1_1, svuint16_t, uint16_t, ++ z0 = svindex_u16 (-1, 1)) ++ ++/* ++** index_u16_1_1: ++** index z0\.h, #1, #1 ++** ret ++*/ ++TEST_S (index_u16_1_1, svuint16_t, uint16_t, ++ z0 = svindex_u16 (1, 1)) ++ ++/* ++** index_u16_15_1: ++** index z0\.h, #15, #1 ++** ret ++*/ ++TEST_S (index_u16_15_1, svuint16_t, uint16_t, ++ z0 = svindex_u16 (15, 1)) ++ ++/* ++** index_u16_16_1: ++** mov (w[0-9]+), 16 ++** index z0\.h, \1, #1 ++** ret ++*/ ++TEST_S (index_u16_16_1, svuint16_t, uint16_t, ++ z0 = svindex_u16 (16, 1)) ++ ++/* ++** index_u16_m17_x0: ++** mov (w[0-9]+), -17 ++** index z0\.h, \1, w0 ++** ret ++*/ ++TEST_S (index_u16_m17_x0, svuint16_t, uint16_t, ++ z0 = svindex_u16 (-17, x0)) ++ ++/* ++** index_u16_m16_x0: ++** index z0\.h, #-16, w0 ++** ret ++*/ ++TEST_S (index_u16_m16_x0, svuint16_t, uint16_t, ++ z0 = svindex_u16 (-16, x0)) ++ ++/* ++** index_u16_m1_x0: ++** index z0\.h, #-1, w0 ++** ret ++*/ ++TEST_S (index_u16_m1_x0, svuint16_t, uint16_t, ++ z0 = svindex_u16 (-1, x0)) ++ ++/* ++** index_u16_0_x0: ++** index z0\.h, #0, w0 ++** ret ++*/ ++TEST_S (index_u16_0_x0, svuint16_t, uint16_t, ++ z0 = svindex_u16 (0, x0)) ++ ++/* ++** index_u16_1_x0: ++** index z0\.h, #1, w0 ++** ret ++*/ ++TEST_S (index_u16_1_x0, svuint16_t, uint16_t, ++ z0 = svindex_u16 (1, x0)) ++ ++/* ++** index_u16_15_x0: ++** index z0\.h, #15, w0 ++** ret ++*/ ++TEST_S (index_u16_15_x0, svuint16_t, uint16_t, ++ z0 = svindex_u16 (15, x0)) ++ ++/* ++** index_u16_16_x0: ++** mov (w[0-9]+), 16 ++** index z0\.h, \1, w0 ++** ret ++*/ ++TEST_S (index_u16_16_x0, svuint16_t, uint16_t, ++ z0 = svindex_u16 (16, x0)) ++ ++/* ++** index_u16_x0_m17: ++** mov (w[0-9]+), -17 ++** index z0\.h, w0, \1 ++** ret ++*/ ++TEST_S (index_u16_x0_m17, svuint16_t, uint16_t, ++ z0 = svindex_u16 (x0, -17)) ++ ++/* ++** index_u16_x0_m16: ++** index z0\.h, w0, #-16 ++** ret ++*/ ++TEST_S (index_u16_x0_m16, svuint16_t, uint16_t, ++ z0 = svindex_u16 (x0, -16)) ++ ++/* ++** index_u16_x0_1: ++** index z0\.h, w0, #1 ++** ret ++*/ ++TEST_S (index_u16_x0_1, svuint16_t, uint16_t, ++ z0 = svindex_u16 (x0, 1)) ++ ++/* ++** index_u16_x0_15: ++** index z0\.h, w0, #15 ++** ret ++*/ ++TEST_S (index_u16_x0_15, svuint16_t, uint16_t, ++ z0 = svindex_u16 (x0, 15)) ++ ++/* ++** index_u16_x0_16: ++** mov (w[0-9]+), 16 ++** index z0\.h, w0, \1 ++** ret ++*/ ++TEST_S (index_u16_x0_16, svuint16_t, uint16_t, ++ z0 = svindex_u16 (x0, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/index_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/index_u32.c +new file mode 100644 +index 000000000..c2badb05e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/index_u32.c +@@ -0,0 +1,220 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** index_u32_w0_w1: ++** index z0\.s, w0, w1 ++** ret ++*/ ++TEST_S (index_u32_w0_w1, svuint32_t, uint32_t, ++ z0 = svindex_u32 (x0, x1)) ++ ++/* ++** index_u32_w0_2: ++** index z0\.s, w0, #2 ++** ret ++*/ ++TEST_S (index_u32_w0_2, svuint32_t, uint32_t, ++ z0 = svindex_u32 (x0, 2)) ++ ++/* ++** index_u32_50_2: ++** mov (w[0-9]+), 50 ++** index z0\.s, \1, #2 ++** ret ++*/ ++TEST_S (index_u32_50_2, svuint32_t, uint32_t, ++ z0 = svindex_u32 (50, 2)) ++ ++/* ++** index_u32_0_m17: ++** mov (w[0-9]+), -17 ++** index z0\.s, #0, \1 ++** ret ++*/ ++TEST_S (index_u32_0_m17, svuint32_t, uint32_t, ++ z0 = svindex_u32 (0, -17)) ++ ++/* ++** index_u32_0_m16: ++** index z0\.s, #0, #-16 ++** ret ++*/ ++TEST_S (index_u32_0_m16, svuint32_t, uint32_t, ++ z0 = svindex_u32 (0, -16)) ++ ++/* ++** index_u32_0_1: ++** index z0\.s, #0, #1 ++** ret ++*/ ++TEST_S (index_u32_0_1, svuint32_t, uint32_t, ++ z0 = svindex_u32 (0, 1)) ++ ++/* ++** index_u32_0_15: ++** index z0\.s, #0, #15 ++** ret ++*/ ++TEST_S (index_u32_0_15, svuint32_t, uint32_t, ++ z0 = svindex_u32 (0, 15)) ++ ++/* ++** index_u32_0_16: ++** mov (w[0-9]+), 16 ++** index z0\.s, #0, \1 ++** ret ++*/ ++TEST_S (index_u32_0_16, svuint32_t, uint32_t, ++ z0 = svindex_u32 (0, 16)) ++ ++/* ++** index_u32_m17_1: ++** mov (w[0-9]+), -17 ++** index z0\.s, \1, #1 ++** ret ++*/ ++TEST_S (index_u32_m17_1, svuint32_t, uint32_t, ++ z0 = svindex_u32 (-17, 1)) ++ ++/* ++** index_u32_m16_1: ++** index z0\.s, #-16, #1 ++** ret ++*/ ++TEST_S (index_u32_m16_1, svuint32_t, uint32_t, ++ z0 = svindex_u32 (-16, 1)) ++ ++/* ++** index_u32_m1_1: ++** index z0\.s, #-1, #1 ++** ret ++*/ ++TEST_S (index_u32_m1_1, svuint32_t, uint32_t, ++ z0 = svindex_u32 (-1, 1)) ++ ++/* ++** index_u32_1_1: ++** index z0\.s, #1, #1 ++** ret ++*/ ++TEST_S (index_u32_1_1, svuint32_t, uint32_t, ++ z0 = svindex_u32 (1, 1)) ++ ++/* ++** index_u32_15_1: ++** index z0\.s, #15, #1 ++** ret ++*/ ++TEST_S (index_u32_15_1, svuint32_t, uint32_t, ++ z0 = svindex_u32 (15, 1)) ++ ++/* ++** index_u32_16_1: ++** mov (w[0-9]+), 16 ++** index z0\.s, \1, #1 ++** ret ++*/ ++TEST_S (index_u32_16_1, svuint32_t, uint32_t, ++ z0 = svindex_u32 (16, 1)) ++ ++/* ++** index_u32_m17_x0: ++** mov (w[0-9]+), -17 ++** index z0\.s, \1, w0 ++** ret ++*/ ++TEST_S (index_u32_m17_x0, svuint32_t, uint32_t, ++ z0 = svindex_u32 (-17, x0)) ++ ++/* ++** index_u32_m16_x0: ++** index z0\.s, #-16, w0 ++** ret ++*/ ++TEST_S (index_u32_m16_x0, svuint32_t, uint32_t, ++ z0 = svindex_u32 (-16, x0)) ++ ++/* ++** index_u32_m1_x0: ++** index z0\.s, #-1, w0 ++** ret ++*/ ++TEST_S (index_u32_m1_x0, svuint32_t, uint32_t, ++ z0 = svindex_u32 (-1, x0)) ++ ++/* ++** index_u32_0_x0: ++** index z0\.s, #0, w0 ++** ret ++*/ ++TEST_S (index_u32_0_x0, svuint32_t, uint32_t, ++ z0 = svindex_u32 (0, x0)) ++ ++/* ++** index_u32_1_x0: ++** index z0\.s, #1, w0 ++** ret ++*/ ++TEST_S (index_u32_1_x0, svuint32_t, uint32_t, ++ z0 = svindex_u32 (1, x0)) ++ ++/* ++** index_u32_15_x0: ++** index z0\.s, #15, w0 ++** ret ++*/ ++TEST_S (index_u32_15_x0, svuint32_t, uint32_t, ++ z0 = svindex_u32 (15, x0)) ++ ++/* ++** index_u32_16_x0: ++** mov (w[0-9]+), 16 ++** index z0\.s, \1, w0 ++** ret ++*/ ++TEST_S (index_u32_16_x0, svuint32_t, uint32_t, ++ z0 = svindex_u32 (16, x0)) ++ ++/* ++** index_u32_x0_m17: ++** mov (w[0-9]+), -17 ++** index z0\.s, w0, \1 ++** ret ++*/ ++TEST_S (index_u32_x0_m17, svuint32_t, uint32_t, ++ z0 = svindex_u32 (x0, -17)) ++ ++/* ++** index_u32_x0_m16: ++** index z0\.s, w0, #-16 ++** ret ++*/ ++TEST_S (index_u32_x0_m16, svuint32_t, uint32_t, ++ z0 = svindex_u32 (x0, -16)) ++ ++/* ++** index_u32_x0_1: ++** index z0\.s, w0, #1 ++** ret ++*/ ++TEST_S (index_u32_x0_1, svuint32_t, uint32_t, ++ z0 = svindex_u32 (x0, 1)) ++ ++/* ++** index_u32_x0_15: ++** index z0\.s, w0, #15 ++** ret ++*/ ++TEST_S (index_u32_x0_15, svuint32_t, uint32_t, ++ z0 = svindex_u32 (x0, 15)) ++ ++/* ++** index_u32_x0_16: ++** mov (w[0-9]+), 16 ++** index z0\.s, w0, \1 ++** ret ++*/ ++TEST_S (index_u32_x0_16, svuint32_t, uint32_t, ++ z0 = svindex_u32 (x0, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/index_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/index_u64.c +new file mode 100644 +index 000000000..526c5e80a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/index_u64.c +@@ -0,0 +1,220 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** index_u64_x0_x1: ++** index z0\.d, x0, x1 ++** ret ++*/ ++TEST_S (index_u64_x0_x1, svuint64_t, uint64_t, ++ z0 = svindex_u64 (x0, x1)) ++ ++/* ++** index_u64_x0_2: ++** index z0\.d, x0, #2 ++** ret ++*/ ++TEST_S (index_u64_x0_2, svuint64_t, uint64_t, ++ z0 = svindex_u64 (x0, 2)) ++ ++/* ++** index_u64_50_2: ++** mov (x[0-9]+), 50 ++** index z0\.d, \1, #2 ++** ret ++*/ ++TEST_S (index_u64_50_2, svuint64_t, uint64_t, ++ z0 = svindex_u64 (50, 2)) ++ ++/* ++** index_u64_0_m17: ++** mov (x[0-9]+), -17 ++** index z0\.d, #0, \1 ++** ret ++*/ ++TEST_S (index_u64_0_m17, svuint64_t, uint64_t, ++ z0 = svindex_u64 (0, -17)) ++ ++/* ++** index_u64_0_m16: ++** index z0\.d, #0, #-16 ++** ret ++*/ ++TEST_S (index_u64_0_m16, svuint64_t, uint64_t, ++ z0 = svindex_u64 (0, -16)) ++ ++/* ++** index_u64_0_1: ++** index z0\.d, #0, #1 ++** ret ++*/ ++TEST_S (index_u64_0_1, svuint64_t, uint64_t, ++ z0 = svindex_u64 (0, 1)) ++ ++/* ++** index_u64_0_15: ++** index z0\.d, #0, #15 ++** ret ++*/ ++TEST_S (index_u64_0_15, svuint64_t, uint64_t, ++ z0 = svindex_u64 (0, 15)) ++ ++/* ++** index_u64_0_16: ++** mov (x[0-9]+), 16 ++** index z0\.d, #0, \1 ++** ret ++*/ ++TEST_S (index_u64_0_16, svuint64_t, uint64_t, ++ z0 = svindex_u64 (0, 16)) ++ ++/* ++** index_u64_m17_1: ++** mov (x[0-9]+), -17 ++** index z0\.d, \1, #1 ++** ret ++*/ ++TEST_S (index_u64_m17_1, svuint64_t, uint64_t, ++ z0 = svindex_u64 (-17, 1)) ++ ++/* ++** index_u64_m16_1: ++** index z0\.d, #-16, #1 ++** ret ++*/ ++TEST_S (index_u64_m16_1, svuint64_t, uint64_t, ++ z0 = svindex_u64 (-16, 1)) ++ ++/* ++** index_u64_m1_1: ++** index z0\.d, #-1, #1 ++** ret ++*/ ++TEST_S (index_u64_m1_1, svuint64_t, uint64_t, ++ z0 = svindex_u64 (-1, 1)) ++ ++/* ++** index_u64_1_1: ++** index z0\.d, #1, #1 ++** ret ++*/ ++TEST_S (index_u64_1_1, svuint64_t, uint64_t, ++ z0 = svindex_u64 (1, 1)) ++ ++/* ++** index_u64_15_1: ++** index z0\.d, #15, #1 ++** ret ++*/ ++TEST_S (index_u64_15_1, svuint64_t, uint64_t, ++ z0 = svindex_u64 (15, 1)) ++ ++/* ++** index_u64_16_1: ++** mov (x[0-9]+), 16 ++** index z0\.d, \1, #1 ++** ret ++*/ ++TEST_S (index_u64_16_1, svuint64_t, uint64_t, ++ z0 = svindex_u64 (16, 1)) ++ ++/* ++** index_u64_m17_x0: ++** mov (x[0-9]+), -17 ++** index z0\.d, \1, x0 ++** ret ++*/ ++TEST_S (index_u64_m17_x0, svuint64_t, uint64_t, ++ z0 = svindex_u64 (-17, x0)) ++ ++/* ++** index_u64_m16_x0: ++** index z0\.d, #-16, x0 ++** ret ++*/ ++TEST_S (index_u64_m16_x0, svuint64_t, uint64_t, ++ z0 = svindex_u64 (-16, x0)) ++ ++/* ++** index_u64_m1_x0: ++** index z0\.d, #-1, x0 ++** ret ++*/ ++TEST_S (index_u64_m1_x0, svuint64_t, uint64_t, ++ z0 = svindex_u64 (-1, x0)) ++ ++/* ++** index_u64_0_x0: ++** index z0\.d, #0, x0 ++** ret ++*/ ++TEST_S (index_u64_0_x0, svuint64_t, uint64_t, ++ z0 = svindex_u64 (0, x0)) ++ ++/* ++** index_u64_1_x0: ++** index z0\.d, #1, x0 ++** ret ++*/ ++TEST_S (index_u64_1_x0, svuint64_t, uint64_t, ++ z0 = svindex_u64 (1, x0)) ++ ++/* ++** index_u64_15_x0: ++** index z0\.d, #15, x0 ++** ret ++*/ ++TEST_S (index_u64_15_x0, svuint64_t, uint64_t, ++ z0 = svindex_u64 (15, x0)) ++ ++/* ++** index_u64_16_x0: ++** mov (x[0-9]+), 16 ++** index z0\.d, \1, x0 ++** ret ++*/ ++TEST_S (index_u64_16_x0, svuint64_t, uint64_t, ++ z0 = svindex_u64 (16, x0)) ++ ++/* ++** index_u64_x0_m17: ++** mov (x[0-9]+), -17 ++** index z0\.d, x0, \1 ++** ret ++*/ ++TEST_S (index_u64_x0_m17, svuint64_t, uint64_t, ++ z0 = svindex_u64 (x0, -17)) ++ ++/* ++** index_u64_x0_m16: ++** index z0\.d, x0, #-16 ++** ret ++*/ ++TEST_S (index_u64_x0_m16, svuint64_t, uint64_t, ++ z0 = svindex_u64 (x0, -16)) ++ ++/* ++** index_u64_x0_1: ++** index z0\.d, x0, #1 ++** ret ++*/ ++TEST_S (index_u64_x0_1, svuint64_t, uint64_t, ++ z0 = svindex_u64 (x0, 1)) ++ ++/* ++** index_u64_x0_15: ++** index z0\.d, x0, #15 ++** ret ++*/ ++TEST_S (index_u64_x0_15, svuint64_t, uint64_t, ++ z0 = svindex_u64 (x0, 15)) ++ ++/* ++** index_u64_x0_16: ++** mov (x[0-9]+), 16 ++** index z0\.d, x0, \1 ++** ret ++*/ ++TEST_S (index_u64_x0_16, svuint64_t, uint64_t, ++ z0 = svindex_u64 (x0, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/index_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/index_u8.c +new file mode 100644 +index 000000000..c6ce12ec8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/index_u8.c +@@ -0,0 +1,220 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** index_u8_w0_w1: ++** index z0\.b, w0, w1 ++** ret ++*/ ++TEST_S (index_u8_w0_w1, svuint8_t, uint8_t, ++ z0 = svindex_u8 (x0, x1)) ++ ++/* ++** index_u8_w0_2: ++** index z0\.b, w0, #2 ++** ret ++*/ ++TEST_S (index_u8_w0_2, svuint8_t, uint8_t, ++ z0 = svindex_u8 (x0, 2)) ++ ++/* ++** index_u8_50_2: ++** mov (w[0-9]+), 50 ++** index z0\.b, \1, #2 ++** ret ++*/ ++TEST_S (index_u8_50_2, svuint8_t, uint8_t, ++ z0 = svindex_u8 (50, 2)) ++ ++/* ++** index_u8_0_m17: ++** mov (w[0-9]+), -17 ++** index z0\.b, #0, \1 ++** ret ++*/ ++TEST_S (index_u8_0_m17, svuint8_t, uint8_t, ++ z0 = svindex_u8 (0, -17)) ++ ++/* ++** index_u8_0_m16: ++** index z0\.b, #0, #-16 ++** ret ++*/ ++TEST_S (index_u8_0_m16, svuint8_t, uint8_t, ++ z0 = svindex_u8 (0, -16)) ++ ++/* ++** index_u8_0_1: ++** index z0\.b, #0, #1 ++** ret ++*/ ++TEST_S (index_u8_0_1, svuint8_t, uint8_t, ++ z0 = svindex_u8 (0, 1)) ++ ++/* ++** index_u8_0_15: ++** index z0\.b, #0, #15 ++** ret ++*/ ++TEST_S (index_u8_0_15, svuint8_t, uint8_t, ++ z0 = svindex_u8 (0, 15)) ++ ++/* ++** index_u8_0_16: ++** mov (w[0-9]+), 16 ++** index z0\.b, #0, \1 ++** ret ++*/ ++TEST_S (index_u8_0_16, svuint8_t, uint8_t, ++ z0 = svindex_u8 (0, 16)) ++ ++/* ++** index_u8_m17_1: ++** mov (w[0-9]+), -17 ++** index z0\.b, \1, #1 ++** ret ++*/ ++TEST_S (index_u8_m17_1, svuint8_t, uint8_t, ++ z0 = svindex_u8 (-17, 1)) ++ ++/* ++** index_u8_m16_1: ++** index z0\.b, #-16, #1 ++** ret ++*/ ++TEST_S (index_u8_m16_1, svuint8_t, uint8_t, ++ z0 = svindex_u8 (-16, 1)) ++ ++/* ++** index_u8_m1_1: ++** index z0\.b, #-1, #1 ++** ret ++*/ ++TEST_S (index_u8_m1_1, svuint8_t, uint8_t, ++ z0 = svindex_u8 (-1, 1)) ++ ++/* ++** index_u8_1_1: ++** index z0\.b, #1, #1 ++** ret ++*/ ++TEST_S (index_u8_1_1, svuint8_t, uint8_t, ++ z0 = svindex_u8 (1, 1)) ++ ++/* ++** index_u8_15_1: ++** index z0\.b, #15, #1 ++** ret ++*/ ++TEST_S (index_u8_15_1, svuint8_t, uint8_t, ++ z0 = svindex_u8 (15, 1)) ++ ++/* ++** index_u8_16_1: ++** mov (w[0-9]+), 16 ++** index z0\.b, \1, #1 ++** ret ++*/ ++TEST_S (index_u8_16_1, svuint8_t, uint8_t, ++ z0 = svindex_u8 (16, 1)) ++ ++/* ++** index_u8_m17_x0: ++** mov (w[0-9]+), -17 ++** index z0\.b, \1, w0 ++** ret ++*/ ++TEST_S (index_u8_m17_x0, svuint8_t, uint8_t, ++ z0 = svindex_u8 (-17, x0)) ++ ++/* ++** index_u8_m16_x0: ++** index z0\.b, #-16, w0 ++** ret ++*/ ++TEST_S (index_u8_m16_x0, svuint8_t, uint8_t, ++ z0 = svindex_u8 (-16, x0)) ++ ++/* ++** index_u8_m1_x0: ++** index z0\.b, #-1, w0 ++** ret ++*/ ++TEST_S (index_u8_m1_x0, svuint8_t, uint8_t, ++ z0 = svindex_u8 (-1, x0)) ++ ++/* ++** index_u8_0_x0: ++** index z0\.b, #0, w0 ++** ret ++*/ ++TEST_S (index_u8_0_x0, svuint8_t, uint8_t, ++ z0 = svindex_u8 (0, x0)) ++ ++/* ++** index_u8_1_x0: ++** index z0\.b, #1, w0 ++** ret ++*/ ++TEST_S (index_u8_1_x0, svuint8_t, uint8_t, ++ z0 = svindex_u8 (1, x0)) ++ ++/* ++** index_u8_15_x0: ++** index z0\.b, #15, w0 ++** ret ++*/ ++TEST_S (index_u8_15_x0, svuint8_t, uint8_t, ++ z0 = svindex_u8 (15, x0)) ++ ++/* ++** index_u8_16_x0: ++** mov (w[0-9]+), 16 ++** index z0\.b, \1, w0 ++** ret ++*/ ++TEST_S (index_u8_16_x0, svuint8_t, uint8_t, ++ z0 = svindex_u8 (16, x0)) ++ ++/* ++** index_u8_x0_m17: ++** mov (w[0-9]+), -17 ++** index z0\.b, w0, \1 ++** ret ++*/ ++TEST_S (index_u8_x0_m17, svuint8_t, uint8_t, ++ z0 = svindex_u8 (x0, -17)) ++ ++/* ++** index_u8_x0_m16: ++** index z0\.b, w0, #-16 ++** ret ++*/ ++TEST_S (index_u8_x0_m16, svuint8_t, uint8_t, ++ z0 = svindex_u8 (x0, -16)) ++ ++/* ++** index_u8_x0_1: ++** index z0\.b, w0, #1 ++** ret ++*/ ++TEST_S (index_u8_x0_1, svuint8_t, uint8_t, ++ z0 = svindex_u8 (x0, 1)) ++ ++/* ++** index_u8_x0_15: ++** index z0\.b, w0, #15 ++** ret ++*/ ++TEST_S (index_u8_x0_15, svuint8_t, uint8_t, ++ z0 = svindex_u8 (x0, 15)) ++ ++/* ++** index_u8_x0_16: ++** mov (w[0-9]+), 16 ++** index z0\.b, w0, \1 ++** ret ++*/ ++TEST_S (index_u8_x0_16, svuint8_t, uint8_t, ++ z0 = svindex_u8 (x0, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_bf16.c +new file mode 100644 +index 000000000..55afdba62 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_bf16.c +@@ -0,0 +1,22 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** insr_h4_bf16_tied1: ++** insr z0\.h, h4 ++** ret ++*/ ++TEST_UNIFORM_ZD (insr_h4_bf16_tied1, svbfloat16_t, bfloat16_t, ++ z0 = svinsr_n_bf16 (z0, d4), ++ z0 = svinsr (z0, d4)) ++ ++/* ++** insr_h4_bf16_untied: ++** movprfx z0, z1 ++** insr z0\.h, h4 ++** ret ++*/ ++TEST_UNIFORM_ZD (insr_h4_bf16_untied, svbfloat16_t, bfloat16_t, ++ z0 = svinsr_n_bf16 (z1, d4), ++ z0 = svinsr (z1, d4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_f16.c +new file mode 100644 +index 000000000..f01a36189 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_f16.c +@@ -0,0 +1,51 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** insr_h4_f16_tied1: ++** insr z0\.h, h4 ++** ret ++*/ ++TEST_UNIFORM_ZD (insr_h4_f16_tied1, svfloat16_t, __fp16, ++ z0 = svinsr_n_f16 (z0, d4), ++ z0 = svinsr (z0, d4)) ++ ++/* ++** insr_h4_f16_untied: ++** movprfx z0, z1 ++** insr z0\.h, h4 ++** ret ++*/ ++TEST_UNIFORM_ZD (insr_h4_f16_untied, svfloat16_t, __fp16, ++ z0 = svinsr_n_f16 (z1, d4), ++ z0 = svinsr (z1, d4)) ++ ++/* ++** insr_0_f16_tied1: ++** insr z0\.h, wzr ++** ret ++*/ ++TEST_UNIFORM_Z (insr_0_f16_tied1, svfloat16_t, ++ z0 = svinsr_n_f16 (z0, 0), ++ z0 = svinsr (z0, 0)) ++ ++/* ++** insr_0_f16_untied: ++** movprfx z0, z1 ++** insr z0\.h, wzr ++** ret ++*/ ++TEST_UNIFORM_Z (insr_0_f16_untied, svfloat16_t, ++ z0 = svinsr_n_f16 (z1, 0), ++ z0 = svinsr (z1, 0)) ++ ++/* ++** insr_1_f16: ++** fmov (h[0-9]+), #?1\.0(?:e\+0)? ++** insr z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (insr_1_f16, svfloat16_t, ++ z0 = svinsr_n_f16 (z0, 1), ++ z0 = svinsr (z0, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_f32.c +new file mode 100644 +index 000000000..e339727b1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_f32.c +@@ -0,0 +1,51 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** insr_s4_f32_tied1: ++** insr z0\.s, s4 ++** ret ++*/ ++TEST_UNIFORM_ZD (insr_s4_f32_tied1, svfloat32_t, float, ++ z0 = svinsr_n_f32 (z0, d4), ++ z0 = svinsr (z0, d4)) ++ ++/* ++** insr_s4_f32_untied: ++** movprfx z0, z1 ++** insr z0\.s, s4 ++** ret ++*/ ++TEST_UNIFORM_ZD (insr_s4_f32_untied, svfloat32_t, float, ++ z0 = svinsr_n_f32 (z1, d4), ++ z0 = svinsr (z1, d4)) ++ ++/* ++** insr_0_f32_tied1: ++** insr z0\.s, wzr ++** ret ++*/ ++TEST_UNIFORM_Z (insr_0_f32_tied1, svfloat32_t, ++ z0 = svinsr_n_f32 (z0, 0), ++ z0 = svinsr (z0, 0)) ++ ++/* ++** insr_0_f32_untied: ++** movprfx z0, z1 ++** insr z0\.s, wzr ++** ret ++*/ ++TEST_UNIFORM_Z (insr_0_f32_untied, svfloat32_t, ++ z0 = svinsr_n_f32 (z1, 0), ++ z0 = svinsr (z1, 0)) ++ ++/* ++** insr_1_f32: ++** fmov (s[0-9]+), #?1\.0(?:e\+0)? ++** insr z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (insr_1_f32, svfloat32_t, ++ z0 = svinsr_n_f32 (z0, 1), ++ z0 = svinsr (z0, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_f64.c +new file mode 100644 +index 000000000..9400225a5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_f64.c +@@ -0,0 +1,51 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** insr_d4_f64_tied1: ++** insr z0\.d, d4 ++** ret ++*/ ++TEST_UNIFORM_ZD (insr_d4_f64_tied1, svfloat64_t, double, ++ z0 = svinsr_n_f64 (z0, d4), ++ z0 = svinsr (z0, d4)) ++ ++/* ++** insr_d4_f64_untied: ++** movprfx z0, z1 ++** insr z0\.d, d4 ++** ret ++*/ ++TEST_UNIFORM_ZD (insr_d4_f64_untied, svfloat64_t, double, ++ z0 = svinsr_n_f64 (z1, d4), ++ z0 = svinsr (z1, d4)) ++ ++/* ++** insr_0_f64_tied1: ++** insr z0\.d, xzr ++** ret ++*/ ++TEST_UNIFORM_Z (insr_0_f64_tied1, svfloat64_t, ++ z0 = svinsr_n_f64 (z0, 0), ++ z0 = svinsr (z0, 0)) ++ ++/* ++** insr_0_f64_untied: ++** movprfx z0, z1 ++** insr z0\.d, xzr ++** ret ++*/ ++TEST_UNIFORM_Z (insr_0_f64_untied, svfloat64_t, ++ z0 = svinsr_n_f64 (z1, 0), ++ z0 = svinsr (z1, 0)) ++ ++/* ++** insr_1_f64: ++** fmov (d[0-9]+), #?1\.0(?:e\+0)? ++** insr z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (insr_1_f64, svfloat64_t, ++ z0 = svinsr_n_f64 (z0, 1), ++ z0 = svinsr (z0, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_s16.c +new file mode 100644 +index 000000000..651977a9d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_s16.c +@@ -0,0 +1,56 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** insr_w0_s16_tied1: ++** insr z0\.h, w0 ++** ret ++*/ ++TEST_UNIFORM_ZX (insr_w0_s16_tied1, svint16_t, int16_t, ++ z0 = svinsr_n_s16 (z0, x0), ++ z0 = svinsr (z0, x0)) ++ ++/* ++** insr_w0_s16_untied: ++** movprfx z0, z1 ++** insr z0\.h, w0 ++** ret ++*/ ++TEST_UNIFORM_ZX (insr_w0_s16_untied, svint16_t, int16_t, ++ z0 = svinsr_n_s16 (z1, x0), ++ z0 = svinsr (z1, x0)) ++ ++/* ++** insr_0_s16_tied1: ++** insr z0\.h, wzr ++** ret ++*/ ++TEST_UNIFORM_Z (insr_0_s16_tied1, svint16_t, ++ z0 = svinsr_n_s16 (z0, 0), ++ z0 = svinsr (z0, 0)) ++ ++/* ++** insr_0_s16_untied: ++** movprfx z0, z1 ++** insr z0\.h, wzr ++** ret ++*/ ++TEST_UNIFORM_Z (insr_0_s16_untied, svint16_t, ++ z0 = svinsr_n_s16 (z1, 0), ++ z0 = svinsr (z1, 0)) ++ ++/* ++** insr_1_s16: ++** ( ++** mov (w[0-9]+), #?1 ++** insr z0\.h, \1 ++** | ++** movi v([0-9]+)\.4h, 0x1 ++** insr z0\.h, h\2 ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (insr_1_s16, svint16_t, ++ z0 = svinsr_n_s16 (z0, 1), ++ z0 = svinsr (z0, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_s32.c +new file mode 100644 +index 000000000..a1dcfc090 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_s32.c +@@ -0,0 +1,56 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** insr_w0_s32_tied1: ++** insr z0\.s, w0 ++** ret ++*/ ++TEST_UNIFORM_ZX (insr_w0_s32_tied1, svint32_t, int32_t, ++ z0 = svinsr_n_s32 (z0, x0), ++ z0 = svinsr (z0, x0)) ++ ++/* ++** insr_w0_s32_untied: ++** movprfx z0, z1 ++** insr z0\.s, w0 ++** ret ++*/ ++TEST_UNIFORM_ZX (insr_w0_s32_untied, svint32_t, int32_t, ++ z0 = svinsr_n_s32 (z1, x0), ++ z0 = svinsr (z1, x0)) ++ ++/* ++** insr_0_s32_tied1: ++** insr z0\.s, wzr ++** ret ++*/ ++TEST_UNIFORM_Z (insr_0_s32_tied1, svint32_t, ++ z0 = svinsr_n_s32 (z0, 0), ++ z0 = svinsr (z0, 0)) ++ ++/* ++** insr_0_s32_untied: ++** movprfx z0, z1 ++** insr z0\.s, wzr ++** ret ++*/ ++TEST_UNIFORM_Z (insr_0_s32_untied, svint32_t, ++ z0 = svinsr_n_s32 (z1, 0), ++ z0 = svinsr (z1, 0)) ++ ++/* ++** insr_1_s32: ++** ( ++** mov (w[0-9]+), #?1 ++** insr z0\.s, \1 ++** | ++** movi v([0-9]+)\.2s, 0x1 ++** insr z0\.s, s\2 ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (insr_1_s32, svint32_t, ++ z0 = svinsr_n_s32 (z0, 1), ++ z0 = svinsr (z0, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_s64.c +new file mode 100644 +index 000000000..32cdc8263 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_s64.c +@@ -0,0 +1,56 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** insr_x0_s64_tied1: ++** insr z0\.d, x0 ++** ret ++*/ ++TEST_UNIFORM_ZX (insr_x0_s64_tied1, svint64_t, int64_t, ++ z0 = svinsr_n_s64 (z0, x0), ++ z0 = svinsr (z0, x0)) ++ ++/* ++** insr_x0_s64_untied: ++** movprfx z0, z1 ++** insr z0\.d, x0 ++** ret ++*/ ++TEST_UNIFORM_ZX (insr_x0_s64_untied, svint64_t, int64_t, ++ z0 = svinsr_n_s64 (z1, x0), ++ z0 = svinsr (z1, x0)) ++ ++/* ++** insr_0_s64_tied1: ++** insr z0\.d, xzr ++** ret ++*/ ++TEST_UNIFORM_Z (insr_0_s64_tied1, svint64_t, ++ z0 = svinsr_n_s64 (z0, 0), ++ z0 = svinsr (z0, 0)) ++ ++/* ++** insr_0_s64_untied: ++** movprfx z0, z1 ++** insr z0\.d, xzr ++** ret ++*/ ++TEST_UNIFORM_Z (insr_0_s64_untied, svint64_t, ++ z0 = svinsr_n_s64 (z1, 0), ++ z0 = svinsr (z1, 0)) ++ ++/* ++** insr_1_s64: ++** ( ++** mov (x[0-9]+), #?1 ++** insr z0\.d, \1 ++** | ++** movi v([0-9]+)\.2d, 0x1 ++** insr z0\.d, d\2 ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (insr_1_s64, svint64_t, ++ z0 = svinsr_n_s64 (z0, 1), ++ z0 = svinsr (z0, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_s8.c +new file mode 100644 +index 000000000..cb69b09fa +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_s8.c +@@ -0,0 +1,56 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** insr_w0_s8_tied1: ++** insr z0\.b, w0 ++** ret ++*/ ++TEST_UNIFORM_ZX (insr_w0_s8_tied1, svint8_t, int8_t, ++ z0 = svinsr_n_s8 (z0, x0), ++ z0 = svinsr (z0, x0)) ++ ++/* ++** insr_w0_s8_untied: ++** movprfx z0, z1 ++** insr z0\.b, w0 ++** ret ++*/ ++TEST_UNIFORM_ZX (insr_w0_s8_untied, svint8_t, int8_t, ++ z0 = svinsr_n_s8 (z1, x0), ++ z0 = svinsr (z1, x0)) ++ ++/* ++** insr_0_s8_tied1: ++** insr z0\.b, wzr ++** ret ++*/ ++TEST_UNIFORM_Z (insr_0_s8_tied1, svint8_t, ++ z0 = svinsr_n_s8 (z0, 0), ++ z0 = svinsr (z0, 0)) ++ ++/* ++** insr_0_s8_untied: ++** movprfx z0, z1 ++** insr z0\.b, wzr ++** ret ++*/ ++TEST_UNIFORM_Z (insr_0_s8_untied, svint8_t, ++ z0 = svinsr_n_s8 (z1, 0), ++ z0 = svinsr (z1, 0)) ++ ++/* ++** insr_1_s8: ++** ( ++** mov (w[0-9]+), #?1 ++** insr z0\.b, \1 ++** | ++** movi v([0-9]+)\.8b, 0x1 ++** insr z0\.b, b\2 ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (insr_1_s8, svint8_t, ++ z0 = svinsr_n_s8 (z0, 1), ++ z0 = svinsr (z0, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_u16.c +new file mode 100644 +index 000000000..35af77402 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_u16.c +@@ -0,0 +1,56 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** insr_w0_u16_tied1: ++** insr z0\.h, w0 ++** ret ++*/ ++TEST_UNIFORM_ZX (insr_w0_u16_tied1, svuint16_t, uint16_t, ++ z0 = svinsr_n_u16 (z0, x0), ++ z0 = svinsr (z0, x0)) ++ ++/* ++** insr_w0_u16_untied: ++** movprfx z0, z1 ++** insr z0\.h, w0 ++** ret ++*/ ++TEST_UNIFORM_ZX (insr_w0_u16_untied, svuint16_t, uint16_t, ++ z0 = svinsr_n_u16 (z1, x0), ++ z0 = svinsr (z1, x0)) ++ ++/* ++** insr_0_u16_tied1: ++** insr z0\.h, wzr ++** ret ++*/ ++TEST_UNIFORM_Z (insr_0_u16_tied1, svuint16_t, ++ z0 = svinsr_n_u16 (z0, 0), ++ z0 = svinsr (z0, 0)) ++ ++/* ++** insr_0_u16_untied: ++** movprfx z0, z1 ++** insr z0\.h, wzr ++** ret ++*/ ++TEST_UNIFORM_Z (insr_0_u16_untied, svuint16_t, ++ z0 = svinsr_n_u16 (z1, 0), ++ z0 = svinsr (z1, 0)) ++ ++/* ++** insr_1_u16: ++** ( ++** mov (w[0-9]+), #?1 ++** insr z0\.h, \1 ++** | ++** movi v([0-9]+)\.4h, 0x1 ++** insr z0\.h, h\2 ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (insr_1_u16, svuint16_t, ++ z0 = svinsr_n_u16 (z0, 1), ++ z0 = svinsr (z0, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_u32.c +new file mode 100644 +index 000000000..8a72e7f2a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_u32.c +@@ -0,0 +1,56 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** insr_w0_u32_tied1: ++** insr z0\.s, w0 ++** ret ++*/ ++TEST_UNIFORM_ZX (insr_w0_u32_tied1, svuint32_t, uint32_t, ++ z0 = svinsr_n_u32 (z0, x0), ++ z0 = svinsr (z0, x0)) ++ ++/* ++** insr_w0_u32_untied: ++** movprfx z0, z1 ++** insr z0\.s, w0 ++** ret ++*/ ++TEST_UNIFORM_ZX (insr_w0_u32_untied, svuint32_t, uint32_t, ++ z0 = svinsr_n_u32 (z1, x0), ++ z0 = svinsr (z1, x0)) ++ ++/* ++** insr_0_u32_tied1: ++** insr z0\.s, wzr ++** ret ++*/ ++TEST_UNIFORM_Z (insr_0_u32_tied1, svuint32_t, ++ z0 = svinsr_n_u32 (z0, 0), ++ z0 = svinsr (z0, 0)) ++ ++/* ++** insr_0_u32_untied: ++** movprfx z0, z1 ++** insr z0\.s, wzr ++** ret ++*/ ++TEST_UNIFORM_Z (insr_0_u32_untied, svuint32_t, ++ z0 = svinsr_n_u32 (z1, 0), ++ z0 = svinsr (z1, 0)) ++ ++/* ++** insr_1_u32: ++** ( ++** mov (w[0-9]+), #?1 ++** insr z0\.s, \1 ++** | ++** movi v([0-9]+)\.2s, 0x1 ++** insr z0\.s, s\2 ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (insr_1_u32, svuint32_t, ++ z0 = svinsr_n_u32 (z0, 1), ++ z0 = svinsr (z0, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_u64.c +new file mode 100644 +index 000000000..ab23f677d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_u64.c +@@ -0,0 +1,56 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** insr_x0_u64_tied1: ++** insr z0\.d, x0 ++** ret ++*/ ++TEST_UNIFORM_ZX (insr_x0_u64_tied1, svuint64_t, uint64_t, ++ z0 = svinsr_n_u64 (z0, x0), ++ z0 = svinsr (z0, x0)) ++ ++/* ++** insr_x0_u64_untied: ++** movprfx z0, z1 ++** insr z0\.d, x0 ++** ret ++*/ ++TEST_UNIFORM_ZX (insr_x0_u64_untied, svuint64_t, uint64_t, ++ z0 = svinsr_n_u64 (z1, x0), ++ z0 = svinsr (z1, x0)) ++ ++/* ++** insr_0_u64_tied1: ++** insr z0\.d, xzr ++** ret ++*/ ++TEST_UNIFORM_Z (insr_0_u64_tied1, svuint64_t, ++ z0 = svinsr_n_u64 (z0, 0), ++ z0 = svinsr (z0, 0)) ++ ++/* ++** insr_0_u64_untied: ++** movprfx z0, z1 ++** insr z0\.d, xzr ++** ret ++*/ ++TEST_UNIFORM_Z (insr_0_u64_untied, svuint64_t, ++ z0 = svinsr_n_u64 (z1, 0), ++ z0 = svinsr (z1, 0)) ++ ++/* ++** insr_1_u64: ++** ( ++** mov (x[0-9]+), #?1 ++** insr z0\.d, \1 ++** | ++** movi v([0-9]+)\.2d, 0x1 ++** insr z0\.d, d\2 ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (insr_1_u64, svuint64_t, ++ z0 = svinsr_n_u64 (z0, 1), ++ z0 = svinsr (z0, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_u8.c +new file mode 100644 +index 000000000..549d71882 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_u8.c +@@ -0,0 +1,56 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** insr_w0_u8_tied1: ++** insr z0\.b, w0 ++** ret ++*/ ++TEST_UNIFORM_ZX (insr_w0_u8_tied1, svuint8_t, uint8_t, ++ z0 = svinsr_n_u8 (z0, x0), ++ z0 = svinsr (z0, x0)) ++ ++/* ++** insr_w0_u8_untied: ++** movprfx z0, z1 ++** insr z0\.b, w0 ++** ret ++*/ ++TEST_UNIFORM_ZX (insr_w0_u8_untied, svuint8_t, uint8_t, ++ z0 = svinsr_n_u8 (z1, x0), ++ z0 = svinsr (z1, x0)) ++ ++/* ++** insr_0_u8_tied1: ++** insr z0\.b, wzr ++** ret ++*/ ++TEST_UNIFORM_Z (insr_0_u8_tied1, svuint8_t, ++ z0 = svinsr_n_u8 (z0, 0), ++ z0 = svinsr (z0, 0)) ++ ++/* ++** insr_0_u8_untied: ++** movprfx z0, z1 ++** insr z0\.b, wzr ++** ret ++*/ ++TEST_UNIFORM_Z (insr_0_u8_untied, svuint8_t, ++ z0 = svinsr_n_u8 (z1, 0), ++ z0 = svinsr (z1, 0)) ++ ++/* ++** insr_1_u8: ++** ( ++** mov (w[0-9]+), #?1 ++** insr z0\.b, \1 ++** | ++** movi v([0-9]+)\.8b, 0x1 ++** insr z0\.b, b\2 ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (insr_1_u8, svuint8_t, ++ z0 = svinsr_n_u8 (z0, 1), ++ z0 = svinsr (z0, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_bf16.c +new file mode 100644 +index 000000000..da30e05e5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_bf16.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** lasta_d0_bf16_tied: ++** lasta h0, p0, z0\.h ++** ret ++*/ ++TEST_REDUCTION_D (lasta_d0_bf16_tied, bfloat16_t, svbfloat16_t, ++ d0 = svlasta_bf16 (p0, z0), ++ d0 = svlasta (p0, z0)) ++ ++/* ++** lasta_d0_bf16_untied: ++** lasta h0, p0, z1\.h ++** ret ++*/ ++TEST_REDUCTION_D (lasta_d0_bf16_untied, bfloat16_t, svbfloat16_t, ++ d0 = svlasta_bf16 (p0, z1), ++ d0 = svlasta (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_f16.c +new file mode 100644 +index 000000000..972b55ab6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_f16.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** lasta_d0_f16_tied: ++** lasta h0, p0, z0\.h ++** ret ++*/ ++TEST_REDUCTION_D (lasta_d0_f16_tied, float16_t, svfloat16_t, ++ d0 = svlasta_f16 (p0, z0), ++ d0 = svlasta (p0, z0)) ++ ++/* ++** lasta_d0_f16_untied: ++** lasta h0, p0, z1\.h ++** ret ++*/ ++TEST_REDUCTION_D (lasta_d0_f16_untied, float16_t, svfloat16_t, ++ d0 = svlasta_f16 (p0, z1), ++ d0 = svlasta (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_f32.c +new file mode 100644 +index 000000000..cfb537f2f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_f32.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** lasta_d0_f32_tied: ++** lasta s0, p0, z0\.s ++** ret ++*/ ++TEST_REDUCTION_D (lasta_d0_f32_tied, float32_t, svfloat32_t, ++ d0 = svlasta_f32 (p0, z0), ++ d0 = svlasta (p0, z0)) ++ ++/* ++** lasta_d0_f32_untied: ++** lasta s0, p0, z1\.s ++** ret ++*/ ++TEST_REDUCTION_D (lasta_d0_f32_untied, float32_t, svfloat32_t, ++ d0 = svlasta_f32 (p0, z1), ++ d0 = svlasta (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_f64.c +new file mode 100644 +index 000000000..a4a8a74c9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_f64.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** lasta_d0_f64_tied: ++** lasta d0, p0, z0\.d ++** ret ++*/ ++TEST_REDUCTION_D (lasta_d0_f64_tied, float64_t, svfloat64_t, ++ d0 = svlasta_f64 (p0, z0), ++ d0 = svlasta (p0, z0)) ++ ++/* ++** lasta_d0_f64_untied: ++** lasta d0, p0, z1\.d ++** ret ++*/ ++TEST_REDUCTION_D (lasta_d0_f64_untied, float64_t, svfloat64_t, ++ d0 = svlasta_f64 (p0, z1), ++ d0 = svlasta (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_s16.c +new file mode 100644 +index 000000000..54bd0248f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_s16.c +@@ -0,0 +1,12 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** lasta_x0_s16: ++** lasta w0, p0, z0\.h ++** ret ++*/ ++TEST_REDUCTION_X (lasta_x0_s16, int16_t, svint16_t, ++ x0 = svlasta_s16 (p0, z0), ++ x0 = svlasta (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_s32.c +new file mode 100644 +index 000000000..18f852f94 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_s32.c +@@ -0,0 +1,12 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** lasta_x0_s32: ++** lasta w0, p0, z0\.s ++** ret ++*/ ++TEST_REDUCTION_X (lasta_x0_s32, int32_t, svint32_t, ++ x0 = svlasta_s32 (p0, z0), ++ x0 = svlasta (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_s64.c +new file mode 100644 +index 000000000..6e45af3d4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_s64.c +@@ -0,0 +1,12 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** lasta_x0_s64: ++** lasta x0, p0, z0\.d ++** ret ++*/ ++TEST_REDUCTION_X (lasta_x0_s64, int64_t, svint64_t, ++ x0 = svlasta_s64 (p0, z0), ++ x0 = svlasta (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_s8.c +new file mode 100644 +index 000000000..58e574f30 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_s8.c +@@ -0,0 +1,12 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** lasta_x0_s8: ++** lasta w0, p0, z0\.b ++** ret ++*/ ++TEST_REDUCTION_X (lasta_x0_s8, int8_t, svint8_t, ++ x0 = svlasta_s8 (p0, z0), ++ x0 = svlasta (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_u16.c +new file mode 100644 +index 000000000..a0e14eca4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_u16.c +@@ -0,0 +1,12 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** lasta_x0_u16: ++** lasta w0, p0, z0\.h ++** ret ++*/ ++TEST_REDUCTION_X (lasta_x0_u16, uint16_t, svuint16_t, ++ x0 = svlasta_u16 (p0, z0), ++ x0 = svlasta (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_u32.c +new file mode 100644 +index 000000000..dab37c36a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_u32.c +@@ -0,0 +1,12 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** lasta_x0_u32: ++** lasta w0, p0, z0\.s ++** ret ++*/ ++TEST_REDUCTION_X (lasta_x0_u32, uint32_t, svuint32_t, ++ x0 = svlasta_u32 (p0, z0), ++ x0 = svlasta (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_u64.c +new file mode 100644 +index 000000000..c766f36ec +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_u64.c +@@ -0,0 +1,12 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** lasta_x0_u64: ++** lasta x0, p0, z0\.d ++** ret ++*/ ++TEST_REDUCTION_X (lasta_x0_u64, uint64_t, svuint64_t, ++ x0 = svlasta_u64 (p0, z0), ++ x0 = svlasta (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_u8.c +new file mode 100644 +index 000000000..a83f25fe4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_u8.c +@@ -0,0 +1,12 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** lasta_x0_u8: ++** lasta w0, p0, z0\.b ++** ret ++*/ ++TEST_REDUCTION_X (lasta_x0_u8, uint8_t, svuint8_t, ++ x0 = svlasta_u8 (p0, z0), ++ x0 = svlasta (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_bf16.c +new file mode 100644 +index 000000000..01ba39a02 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_bf16.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** lastb_d0_bf16_tied: ++** lastb h0, p0, z0\.h ++** ret ++*/ ++TEST_REDUCTION_D (lastb_d0_bf16_tied, bfloat16_t, svbfloat16_t, ++ d0 = svlastb_bf16 (p0, z0), ++ d0 = svlastb (p0, z0)) ++ ++/* ++** lastb_d0_bf16_untied: ++** lastb h0, p0, z1\.h ++** ret ++*/ ++TEST_REDUCTION_D (lastb_d0_bf16_untied, bfloat16_t, svbfloat16_t, ++ d0 = svlastb_bf16 (p0, z1), ++ d0 = svlastb (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_f16.c +new file mode 100644 +index 000000000..0bc7e9ef4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_f16.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** lastb_d0_f16_tied: ++** lastb h0, p0, z0\.h ++** ret ++*/ ++TEST_REDUCTION_D (lastb_d0_f16_tied, float16_t, svfloat16_t, ++ d0 = svlastb_f16 (p0, z0), ++ d0 = svlastb (p0, z0)) ++ ++/* ++** lastb_d0_f16_untied: ++** lastb h0, p0, z1\.h ++** ret ++*/ ++TEST_REDUCTION_D (lastb_d0_f16_untied, float16_t, svfloat16_t, ++ d0 = svlastb_f16 (p0, z1), ++ d0 = svlastb (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_f32.c +new file mode 100644 +index 000000000..b33d61eee +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_f32.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** lastb_d0_f32_tied: ++** lastb s0, p0, z0\.s ++** ret ++*/ ++TEST_REDUCTION_D (lastb_d0_f32_tied, float32_t, svfloat32_t, ++ d0 = svlastb_f32 (p0, z0), ++ d0 = svlastb (p0, z0)) ++ ++/* ++** lastb_d0_f32_untied: ++** lastb s0, p0, z1\.s ++** ret ++*/ ++TEST_REDUCTION_D (lastb_d0_f32_untied, float32_t, svfloat32_t, ++ d0 = svlastb_f32 (p0, z1), ++ d0 = svlastb (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_f64.c +new file mode 100644 +index 000000000..9fa7de706 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_f64.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** lastb_d0_f64_tied: ++** lastb d0, p0, z0\.d ++** ret ++*/ ++TEST_REDUCTION_D (lastb_d0_f64_tied, float64_t, svfloat64_t, ++ d0 = svlastb_f64 (p0, z0), ++ d0 = svlastb (p0, z0)) ++ ++/* ++** lastb_d0_f64_untied: ++** lastb d0, p0, z1\.d ++** ret ++*/ ++TEST_REDUCTION_D (lastb_d0_f64_untied, float64_t, svfloat64_t, ++ d0 = svlastb_f64 (p0, z1), ++ d0 = svlastb (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_s16.c +new file mode 100644 +index 000000000..6575f21cd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_s16.c +@@ -0,0 +1,12 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** lastb_x0_s16: ++** lastb w0, p0, z0\.h ++** ret ++*/ ++TEST_REDUCTION_X (lastb_x0_s16, int16_t, svint16_t, ++ x0 = svlastb_s16 (p0, z0), ++ x0 = svlastb (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_s32.c +new file mode 100644 +index 000000000..856e5bdc8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_s32.c +@@ -0,0 +1,12 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** lastb_x0_s32: ++** lastb w0, p0, z0\.s ++** ret ++*/ ++TEST_REDUCTION_X (lastb_x0_s32, int32_t, svint32_t, ++ x0 = svlastb_s32 (p0, z0), ++ x0 = svlastb (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_s64.c +new file mode 100644 +index 000000000..bd7de2ab2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_s64.c +@@ -0,0 +1,12 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** lastb_x0_s64: ++** lastb x0, p0, z0\.d ++** ret ++*/ ++TEST_REDUCTION_X (lastb_x0_s64, int64_t, svint64_t, ++ x0 = svlastb_s64 (p0, z0), ++ x0 = svlastb (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_s8.c +new file mode 100644 +index 000000000..4c343a705 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_s8.c +@@ -0,0 +1,12 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** lastb_x0_s8: ++** lastb w0, p0, z0\.b ++** ret ++*/ ++TEST_REDUCTION_X (lastb_x0_s8, int8_t, svint8_t, ++ x0 = svlastb_s8 (p0, z0), ++ x0 = svlastb (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_u16.c +new file mode 100644 +index 000000000..7f3db1bb1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_u16.c +@@ -0,0 +1,12 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** lastb_x0_u16: ++** lastb w0, p0, z0\.h ++** ret ++*/ ++TEST_REDUCTION_X (lastb_x0_u16, uint16_t, svuint16_t, ++ x0 = svlastb_u16 (p0, z0), ++ x0 = svlastb (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_u32.c +new file mode 100644 +index 000000000..c2eeacba0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_u32.c +@@ -0,0 +1,12 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** lastb_x0_u32: ++** lastb w0, p0, z0\.s ++** ret ++*/ ++TEST_REDUCTION_X (lastb_x0_u32, uint32_t, svuint32_t, ++ x0 = svlastb_u32 (p0, z0), ++ x0 = svlastb (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_u64.c +new file mode 100644 +index 000000000..1496ffa0e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_u64.c +@@ -0,0 +1,12 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** lastb_x0_u64: ++** lastb x0, p0, z0\.d ++** ret ++*/ ++TEST_REDUCTION_X (lastb_x0_u64, uint64_t, svuint64_t, ++ x0 = svlastb_u64 (p0, z0), ++ x0 = svlastb (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_u8.c +new file mode 100644 +index 000000000..25f036063 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_u8.c +@@ -0,0 +1,12 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** lastb_x0_u8: ++** lastb w0, p0, z0\.b ++** ret ++*/ ++TEST_REDUCTION_X (lastb_x0_u8, uint8_t, svuint8_t, ++ x0 = svlastb_u8 (p0, z0), ++ x0 = svlastb (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_bf16.c +new file mode 100644 +index 000000000..07891de04 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_bf16.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1_bf16_base: ++** ld1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_bf16_base, svbfloat16_t, bfloat16_t, ++ z0 = svld1_bf16 (p0, x0), ++ z0 = svld1 (p0, x0)) ++ ++/* ++** ld1_bf16_index: ++** ld1h z0\.h, p0/z, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_LOAD (ld1_bf16_index, svbfloat16_t, bfloat16_t, ++ z0 = svld1_bf16 (p0, x0 + x1), ++ z0 = svld1 (p0, x0 + x1)) ++ ++/* ++** ld1_bf16_1: ++** ld1h z0\.h, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_bf16_1, svbfloat16_t, bfloat16_t, ++ z0 = svld1_bf16 (p0, x0 + svcnth ()), ++ z0 = svld1 (p0, x0 + svcnth ())) ++ ++/* ++** ld1_bf16_7: ++** ld1h z0\.h, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_bf16_7, svbfloat16_t, bfloat16_t, ++ z0 = svld1_bf16 (p0, x0 + svcnth () * 7), ++ z0 = svld1 (p0, x0 + svcnth () * 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1_bf16_8: ++** incb x0, all, mul #8 ++** ld1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_bf16_8, svbfloat16_t, bfloat16_t, ++ z0 = svld1_bf16 (p0, x0 + svcnth () * 8), ++ z0 = svld1 (p0, x0 + svcnth () * 8)) ++ ++/* ++** ld1_bf16_m1: ++** ld1h z0\.h, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_bf16_m1, svbfloat16_t, bfloat16_t, ++ z0 = svld1_bf16 (p0, x0 - svcnth ()), ++ z0 = svld1 (p0, x0 - svcnth ())) ++ ++/* ++** ld1_bf16_m8: ++** ld1h z0\.h, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_bf16_m8, svbfloat16_t, bfloat16_t, ++ z0 = svld1_bf16 (p0, x0 - svcnth () * 8), ++ z0 = svld1 (p0, x0 - svcnth () * 8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1_bf16_m9: ++** decb x0, all, mul #9 ++** ld1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_bf16_m9, svbfloat16_t, bfloat16_t, ++ z0 = svld1_bf16 (p0, x0 - svcnth () * 9), ++ z0 = svld1 (p0, x0 - svcnth () * 9)) ++ ++/* ++** ld1_vnum_bf16_0: ++** ld1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_bf16_0, svbfloat16_t, bfloat16_t, ++ z0 = svld1_vnum_bf16 (p0, x0, 0), ++ z0 = svld1_vnum (p0, x0, 0)) ++ ++/* ++** ld1_vnum_bf16_1: ++** ld1h z0\.h, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_bf16_1, svbfloat16_t, bfloat16_t, ++ z0 = svld1_vnum_bf16 (p0, x0, 1), ++ z0 = svld1_vnum (p0, x0, 1)) ++ ++/* ++** ld1_vnum_bf16_7: ++** ld1h z0\.h, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_bf16_7, svbfloat16_t, bfloat16_t, ++ z0 = svld1_vnum_bf16 (p0, x0, 7), ++ z0 = svld1_vnum (p0, x0, 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1_vnum_bf16_8: ++** incb x0, all, mul #8 ++** ld1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_bf16_8, svbfloat16_t, bfloat16_t, ++ z0 = svld1_vnum_bf16 (p0, x0, 8), ++ z0 = svld1_vnum (p0, x0, 8)) ++ ++/* ++** ld1_vnum_bf16_m1: ++** ld1h z0\.h, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_bf16_m1, svbfloat16_t, bfloat16_t, ++ z0 = svld1_vnum_bf16 (p0, x0, -1), ++ z0 = svld1_vnum (p0, x0, -1)) ++ ++/* ++** ld1_vnum_bf16_m8: ++** ld1h z0\.h, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_bf16_m8, svbfloat16_t, bfloat16_t, ++ z0 = svld1_vnum_bf16 (p0, x0, -8), ++ z0 = svld1_vnum (p0, x0, -8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1_vnum_bf16_m9: ++** decb x0, all, mul #9 ++** ld1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_bf16_m9, svbfloat16_t, bfloat16_t, ++ z0 = svld1_vnum_bf16 (p0, x0, -9), ++ z0 = svld1_vnum (p0, x0, -9)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ld1_vnum_bf16_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ld1h z0\.h, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_bf16_x1, svbfloat16_t, bfloat16_t, ++ z0 = svld1_vnum_bf16 (p0, x0, x1), ++ z0 = svld1_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_f16.c +new file mode 100644 +index 000000000..c3552bfbd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_f16.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1_f16_base: ++** ld1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_f16_base, svfloat16_t, float16_t, ++ z0 = svld1_f16 (p0, x0), ++ z0 = svld1 (p0, x0)) ++ ++/* ++** ld1_f16_index: ++** ld1h z0\.h, p0/z, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_LOAD (ld1_f16_index, svfloat16_t, float16_t, ++ z0 = svld1_f16 (p0, x0 + x1), ++ z0 = svld1 (p0, x0 + x1)) ++ ++/* ++** ld1_f16_1: ++** ld1h z0\.h, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_f16_1, svfloat16_t, float16_t, ++ z0 = svld1_f16 (p0, x0 + svcnth ()), ++ z0 = svld1 (p0, x0 + svcnth ())) ++ ++/* ++** ld1_f16_7: ++** ld1h z0\.h, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_f16_7, svfloat16_t, float16_t, ++ z0 = svld1_f16 (p0, x0 + svcnth () * 7), ++ z0 = svld1 (p0, x0 + svcnth () * 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1_f16_8: ++** incb x0, all, mul #8 ++** ld1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_f16_8, svfloat16_t, float16_t, ++ z0 = svld1_f16 (p0, x0 + svcnth () * 8), ++ z0 = svld1 (p0, x0 + svcnth () * 8)) ++ ++/* ++** ld1_f16_m1: ++** ld1h z0\.h, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_f16_m1, svfloat16_t, float16_t, ++ z0 = svld1_f16 (p0, x0 - svcnth ()), ++ z0 = svld1 (p0, x0 - svcnth ())) ++ ++/* ++** ld1_f16_m8: ++** ld1h z0\.h, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_f16_m8, svfloat16_t, float16_t, ++ z0 = svld1_f16 (p0, x0 - svcnth () * 8), ++ z0 = svld1 (p0, x0 - svcnth () * 8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1_f16_m9: ++** decb x0, all, mul #9 ++** ld1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_f16_m9, svfloat16_t, float16_t, ++ z0 = svld1_f16 (p0, x0 - svcnth () * 9), ++ z0 = svld1 (p0, x0 - svcnth () * 9)) ++ ++/* ++** ld1_vnum_f16_0: ++** ld1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_f16_0, svfloat16_t, float16_t, ++ z0 = svld1_vnum_f16 (p0, x0, 0), ++ z0 = svld1_vnum (p0, x0, 0)) ++ ++/* ++** ld1_vnum_f16_1: ++** ld1h z0\.h, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_f16_1, svfloat16_t, float16_t, ++ z0 = svld1_vnum_f16 (p0, x0, 1), ++ z0 = svld1_vnum (p0, x0, 1)) ++ ++/* ++** ld1_vnum_f16_7: ++** ld1h z0\.h, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_f16_7, svfloat16_t, float16_t, ++ z0 = svld1_vnum_f16 (p0, x0, 7), ++ z0 = svld1_vnum (p0, x0, 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1_vnum_f16_8: ++** incb x0, all, mul #8 ++** ld1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_f16_8, svfloat16_t, float16_t, ++ z0 = svld1_vnum_f16 (p0, x0, 8), ++ z0 = svld1_vnum (p0, x0, 8)) ++ ++/* ++** ld1_vnum_f16_m1: ++** ld1h z0\.h, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_f16_m1, svfloat16_t, float16_t, ++ z0 = svld1_vnum_f16 (p0, x0, -1), ++ z0 = svld1_vnum (p0, x0, -1)) ++ ++/* ++** ld1_vnum_f16_m8: ++** ld1h z0\.h, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_f16_m8, svfloat16_t, float16_t, ++ z0 = svld1_vnum_f16 (p0, x0, -8), ++ z0 = svld1_vnum (p0, x0, -8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1_vnum_f16_m9: ++** decb x0, all, mul #9 ++** ld1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_f16_m9, svfloat16_t, float16_t, ++ z0 = svld1_vnum_f16 (p0, x0, -9), ++ z0 = svld1_vnum (p0, x0, -9)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ld1_vnum_f16_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ld1h z0\.h, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_f16_x1, svfloat16_t, float16_t, ++ z0 = svld1_vnum_f16 (p0, x0, x1), ++ z0 = svld1_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_f32.c +new file mode 100644 +index 000000000..8990f48d9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_f32.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1_f32_base: ++** ld1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_f32_base, svfloat32_t, float32_t, ++ z0 = svld1_f32 (p0, x0), ++ z0 = svld1 (p0, x0)) ++ ++/* ++** ld1_f32_index: ++** ld1w z0\.s, p0/z, \[x0, x1, lsl 2\] ++** ret ++*/ ++TEST_LOAD (ld1_f32_index, svfloat32_t, float32_t, ++ z0 = svld1_f32 (p0, x0 + x1), ++ z0 = svld1 (p0, x0 + x1)) ++ ++/* ++** ld1_f32_1: ++** ld1w z0\.s, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_f32_1, svfloat32_t, float32_t, ++ z0 = svld1_f32 (p0, x0 + svcntw ()), ++ z0 = svld1 (p0, x0 + svcntw ())) ++ ++/* ++** ld1_f32_7: ++** ld1w z0\.s, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_f32_7, svfloat32_t, float32_t, ++ z0 = svld1_f32 (p0, x0 + svcntw () * 7), ++ z0 = svld1 (p0, x0 + svcntw () * 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1_f32_8: ++** incb x0, all, mul #8 ++** ld1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_f32_8, svfloat32_t, float32_t, ++ z0 = svld1_f32 (p0, x0 + svcntw () * 8), ++ z0 = svld1 (p0, x0 + svcntw () * 8)) ++ ++/* ++** ld1_f32_m1: ++** ld1w z0\.s, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_f32_m1, svfloat32_t, float32_t, ++ z0 = svld1_f32 (p0, x0 - svcntw ()), ++ z0 = svld1 (p0, x0 - svcntw ())) ++ ++/* ++** ld1_f32_m8: ++** ld1w z0\.s, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_f32_m8, svfloat32_t, float32_t, ++ z0 = svld1_f32 (p0, x0 - svcntw () * 8), ++ z0 = svld1 (p0, x0 - svcntw () * 8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1_f32_m9: ++** decb x0, all, mul #9 ++** ld1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_f32_m9, svfloat32_t, float32_t, ++ z0 = svld1_f32 (p0, x0 - svcntw () * 9), ++ z0 = svld1 (p0, x0 - svcntw () * 9)) ++ ++/* ++** ld1_vnum_f32_0: ++** ld1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_f32_0, svfloat32_t, float32_t, ++ z0 = svld1_vnum_f32 (p0, x0, 0), ++ z0 = svld1_vnum (p0, x0, 0)) ++ ++/* ++** ld1_vnum_f32_1: ++** ld1w z0\.s, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_f32_1, svfloat32_t, float32_t, ++ z0 = svld1_vnum_f32 (p0, x0, 1), ++ z0 = svld1_vnum (p0, x0, 1)) ++ ++/* ++** ld1_vnum_f32_7: ++** ld1w z0\.s, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_f32_7, svfloat32_t, float32_t, ++ z0 = svld1_vnum_f32 (p0, x0, 7), ++ z0 = svld1_vnum (p0, x0, 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1_vnum_f32_8: ++** incb x0, all, mul #8 ++** ld1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_f32_8, svfloat32_t, float32_t, ++ z0 = svld1_vnum_f32 (p0, x0, 8), ++ z0 = svld1_vnum (p0, x0, 8)) ++ ++/* ++** ld1_vnum_f32_m1: ++** ld1w z0\.s, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_f32_m1, svfloat32_t, float32_t, ++ z0 = svld1_vnum_f32 (p0, x0, -1), ++ z0 = svld1_vnum (p0, x0, -1)) ++ ++/* ++** ld1_vnum_f32_m8: ++** ld1w z0\.s, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_f32_m8, svfloat32_t, float32_t, ++ z0 = svld1_vnum_f32 (p0, x0, -8), ++ z0 = svld1_vnum (p0, x0, -8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1_vnum_f32_m9: ++** decb x0, all, mul #9 ++** ld1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_f32_m9, svfloat32_t, float32_t, ++ z0 = svld1_vnum_f32 (p0, x0, -9), ++ z0 = svld1_vnum (p0, x0, -9)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ld1_vnum_f32_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ld1w z0\.s, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_f32_x1, svfloat32_t, float32_t, ++ z0 = svld1_vnum_f32 (p0, x0, x1), ++ z0 = svld1_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_f64.c +new file mode 100644 +index 000000000..eb28687fe +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_f64.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1_f64_base: ++** ld1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_f64_base, svfloat64_t, float64_t, ++ z0 = svld1_f64 (p0, x0), ++ z0 = svld1 (p0, x0)) ++ ++/* ++** ld1_f64_index: ++** ld1d z0\.d, p0/z, \[x0, x1, lsl 3\] ++** ret ++*/ ++TEST_LOAD (ld1_f64_index, svfloat64_t, float64_t, ++ z0 = svld1_f64 (p0, x0 + x1), ++ z0 = svld1 (p0, x0 + x1)) ++ ++/* ++** ld1_f64_1: ++** ld1d z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_f64_1, svfloat64_t, float64_t, ++ z0 = svld1_f64 (p0, x0 + svcntd ()), ++ z0 = svld1 (p0, x0 + svcntd ())) ++ ++/* ++** ld1_f64_7: ++** ld1d z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_f64_7, svfloat64_t, float64_t, ++ z0 = svld1_f64 (p0, x0 + svcntd () * 7), ++ z0 = svld1 (p0, x0 + svcntd () * 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1_f64_8: ++** incb x0, all, mul #8 ++** ld1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_f64_8, svfloat64_t, float64_t, ++ z0 = svld1_f64 (p0, x0 + svcntd () * 8), ++ z0 = svld1 (p0, x0 + svcntd () * 8)) ++ ++/* ++** ld1_f64_m1: ++** ld1d z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_f64_m1, svfloat64_t, float64_t, ++ z0 = svld1_f64 (p0, x0 - svcntd ()), ++ z0 = svld1 (p0, x0 - svcntd ())) ++ ++/* ++** ld1_f64_m8: ++** ld1d z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_f64_m8, svfloat64_t, float64_t, ++ z0 = svld1_f64 (p0, x0 - svcntd () * 8), ++ z0 = svld1 (p0, x0 - svcntd () * 8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1_f64_m9: ++** decb x0, all, mul #9 ++** ld1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_f64_m9, svfloat64_t, float64_t, ++ z0 = svld1_f64 (p0, x0 - svcntd () * 9), ++ z0 = svld1 (p0, x0 - svcntd () * 9)) ++ ++/* ++** ld1_vnum_f64_0: ++** ld1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_f64_0, svfloat64_t, float64_t, ++ z0 = svld1_vnum_f64 (p0, x0, 0), ++ z0 = svld1_vnum (p0, x0, 0)) ++ ++/* ++** ld1_vnum_f64_1: ++** ld1d z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_f64_1, svfloat64_t, float64_t, ++ z0 = svld1_vnum_f64 (p0, x0, 1), ++ z0 = svld1_vnum (p0, x0, 1)) ++ ++/* ++** ld1_vnum_f64_7: ++** ld1d z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_f64_7, svfloat64_t, float64_t, ++ z0 = svld1_vnum_f64 (p0, x0, 7), ++ z0 = svld1_vnum (p0, x0, 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1_vnum_f64_8: ++** incb x0, all, mul #8 ++** ld1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_f64_8, svfloat64_t, float64_t, ++ z0 = svld1_vnum_f64 (p0, x0, 8), ++ z0 = svld1_vnum (p0, x0, 8)) ++ ++/* ++** ld1_vnum_f64_m1: ++** ld1d z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_f64_m1, svfloat64_t, float64_t, ++ z0 = svld1_vnum_f64 (p0, x0, -1), ++ z0 = svld1_vnum (p0, x0, -1)) ++ ++/* ++** ld1_vnum_f64_m8: ++** ld1d z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_f64_m8, svfloat64_t, float64_t, ++ z0 = svld1_vnum_f64 (p0, x0, -8), ++ z0 = svld1_vnum (p0, x0, -8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1_vnum_f64_m9: ++** decb x0, all, mul #9 ++** ld1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_f64_m9, svfloat64_t, float64_t, ++ z0 = svld1_vnum_f64 (p0, x0, -9), ++ z0 = svld1_vnum (p0, x0, -9)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ld1_vnum_f64_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ld1d z0\.d, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_f64_x1, svfloat64_t, float64_t, ++ z0 = svld1_vnum_f64 (p0, x0, x1), ++ z0 = svld1_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_f32.c +new file mode 100644 +index 000000000..00b68ff29 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_f32.c +@@ -0,0 +1,272 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1_gather_f32_tied1: ++** ld1w z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_f32_tied1, svfloat32_t, svuint32_t, ++ z0_res = svld1_gather_u32base_f32 (p0, z0), ++ z0_res = svld1_gather_f32 (p0, z0)) ++ ++/* ++** ld1_gather_f32_untied: ++** ld1w z0\.s, p0/z, \[z1\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_f32_untied, svfloat32_t, svuint32_t, ++ z0_res = svld1_gather_u32base_f32 (p0, z1), ++ z0_res = svld1_gather_f32 (p0, z1)) ++ ++/* ++** ld1_gather_x0_f32_offset: ++** ld1w z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_x0_f32_offset, svfloat32_t, svuint32_t, ++ z0_res = svld1_gather_u32base_offset_f32 (p0, z0, x0), ++ z0_res = svld1_gather_offset_f32 (p0, z0, x0)) ++ ++/* ++** ld1_gather_m4_f32_offset: ++** mov (x[0-9]+), #?-4 ++** ld1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_m4_f32_offset, svfloat32_t, svuint32_t, ++ z0_res = svld1_gather_u32base_offset_f32 (p0, z0, -4), ++ z0_res = svld1_gather_offset_f32 (p0, z0, -4)) ++ ++/* ++** ld1_gather_0_f32_offset: ++** ld1w z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_0_f32_offset, svfloat32_t, svuint32_t, ++ z0_res = svld1_gather_u32base_offset_f32 (p0, z0, 0), ++ z0_res = svld1_gather_offset_f32 (p0, z0, 0)) ++ ++/* ++** ld1_gather_5_f32_offset: ++** mov (x[0-9]+), #?5 ++** ld1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_5_f32_offset, svfloat32_t, svuint32_t, ++ z0_res = svld1_gather_u32base_offset_f32 (p0, z0, 5), ++ z0_res = svld1_gather_offset_f32 (p0, z0, 5)) ++ ++/* ++** ld1_gather_6_f32_offset: ++** mov (x[0-9]+), #?6 ++** ld1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_6_f32_offset, svfloat32_t, svuint32_t, ++ z0_res = svld1_gather_u32base_offset_f32 (p0, z0, 6), ++ z0_res = svld1_gather_offset_f32 (p0, z0, 6)) ++ ++/* ++** ld1_gather_7_f32_offset: ++** mov (x[0-9]+), #?7 ++** ld1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_7_f32_offset, svfloat32_t, svuint32_t, ++ z0_res = svld1_gather_u32base_offset_f32 (p0, z0, 7), ++ z0_res = svld1_gather_offset_f32 (p0, z0, 7)) ++ ++/* ++** ld1_gather_8_f32_offset: ++** ld1w z0\.s, p0/z, \[z0\.s, #8\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_8_f32_offset, svfloat32_t, svuint32_t, ++ z0_res = svld1_gather_u32base_offset_f32 (p0, z0, 8), ++ z0_res = svld1_gather_offset_f32 (p0, z0, 8)) ++ ++/* ++** ld1_gather_124_f32_offset: ++** ld1w z0\.s, p0/z, \[z0\.s, #124\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_124_f32_offset, svfloat32_t, svuint32_t, ++ z0_res = svld1_gather_u32base_offset_f32 (p0, z0, 124), ++ z0_res = svld1_gather_offset_f32 (p0, z0, 124)) ++ ++/* ++** ld1_gather_128_f32_offset: ++** mov (x[0-9]+), #?128 ++** ld1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_128_f32_offset, svfloat32_t, svuint32_t, ++ z0_res = svld1_gather_u32base_offset_f32 (p0, z0, 128), ++ z0_res = svld1_gather_offset_f32 (p0, z0, 128)) ++ ++/* ++** ld1_gather_x0_f32_index: ++** lsl (x[0-9]+), x0, #?2 ++** ld1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_x0_f32_index, svfloat32_t, svuint32_t, ++ z0_res = svld1_gather_u32base_index_f32 (p0, z0, x0), ++ z0_res = svld1_gather_index_f32 (p0, z0, x0)) ++ ++/* ++** ld1_gather_m1_f32_index: ++** mov (x[0-9]+), #?-4 ++** ld1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_m1_f32_index, svfloat32_t, svuint32_t, ++ z0_res = svld1_gather_u32base_index_f32 (p0, z0, -1), ++ z0_res = svld1_gather_index_f32 (p0, z0, -1)) ++ ++/* ++** ld1_gather_0_f32_index: ++** ld1w z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_0_f32_index, svfloat32_t, svuint32_t, ++ z0_res = svld1_gather_u32base_index_f32 (p0, z0, 0), ++ z0_res = svld1_gather_index_f32 (p0, z0, 0)) ++ ++/* ++** ld1_gather_5_f32_index: ++** ld1w z0\.s, p0/z, \[z0\.s, #20\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_5_f32_index, svfloat32_t, svuint32_t, ++ z0_res = svld1_gather_u32base_index_f32 (p0, z0, 5), ++ z0_res = svld1_gather_index_f32 (p0, z0, 5)) ++ ++/* ++** ld1_gather_31_f32_index: ++** ld1w z0\.s, p0/z, \[z0\.s, #124\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_31_f32_index, svfloat32_t, svuint32_t, ++ z0_res = svld1_gather_u32base_index_f32 (p0, z0, 31), ++ z0_res = svld1_gather_index_f32 (p0, z0, 31)) ++ ++/* ++** ld1_gather_32_f32_index: ++** mov (x[0-9]+), #?128 ++** ld1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_32_f32_index, svfloat32_t, svuint32_t, ++ z0_res = svld1_gather_u32base_index_f32 (p0, z0, 32), ++ z0_res = svld1_gather_index_f32 (p0, z0, 32)) ++ ++/* ++** ld1_gather_x0_f32_s32offset: ++** ld1w z0\.s, p0/z, \[x0, z0\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_x0_f32_s32offset, svfloat32_t, float32_t, svint32_t, ++ z0_res = svld1_gather_s32offset_f32 (p0, x0, z0), ++ z0_res = svld1_gather_offset (p0, x0, z0)) ++ ++/* ++** ld1_gather_tied1_f32_s32offset: ++** ld1w z0\.s, p0/z, \[x0, z0\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_tied1_f32_s32offset, svfloat32_t, float32_t, svint32_t, ++ z0_res = svld1_gather_s32offset_f32 (p0, x0, z0), ++ z0_res = svld1_gather_offset (p0, x0, z0)) ++ ++/* ++** ld1_gather_untied_f32_s32offset: ++** ld1w z0\.s, p0/z, \[x0, z1\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_untied_f32_s32offset, svfloat32_t, float32_t, svint32_t, ++ z0_res = svld1_gather_s32offset_f32 (p0, x0, z1), ++ z0_res = svld1_gather_offset (p0, x0, z1)) ++ ++/* ++** ld1_gather_x0_f32_u32offset: ++** ld1w z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_x0_f32_u32offset, svfloat32_t, float32_t, svuint32_t, ++ z0_res = svld1_gather_u32offset_f32 (p0, x0, z0), ++ z0_res = svld1_gather_offset (p0, x0, z0)) ++ ++/* ++** ld1_gather_tied1_f32_u32offset: ++** ld1w z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_tied1_f32_u32offset, svfloat32_t, float32_t, svuint32_t, ++ z0_res = svld1_gather_u32offset_f32 (p0, x0, z0), ++ z0_res = svld1_gather_offset (p0, x0, z0)) ++ ++/* ++** ld1_gather_untied_f32_u32offset: ++** ld1w z0\.s, p0/z, \[x0, z1\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_untied_f32_u32offset, svfloat32_t, float32_t, svuint32_t, ++ z0_res = svld1_gather_u32offset_f32 (p0, x0, z1), ++ z0_res = svld1_gather_offset (p0, x0, z1)) ++ ++/* ++** ld1_gather_x0_f32_s32index: ++** ld1w z0\.s, p0/z, \[x0, z0\.s, sxtw 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_x0_f32_s32index, svfloat32_t, float32_t, svint32_t, ++ z0_res = svld1_gather_s32index_f32 (p0, x0, z0), ++ z0_res = svld1_gather_index (p0, x0, z0)) ++ ++/* ++** ld1_gather_tied1_f32_s32index: ++** ld1w z0\.s, p0/z, \[x0, z0\.s, sxtw 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_tied1_f32_s32index, svfloat32_t, float32_t, svint32_t, ++ z0_res = svld1_gather_s32index_f32 (p0, x0, z0), ++ z0_res = svld1_gather_index (p0, x0, z0)) ++ ++/* ++** ld1_gather_untied_f32_s32index: ++** ld1w z0\.s, p0/z, \[x0, z1\.s, sxtw 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_untied_f32_s32index, svfloat32_t, float32_t, svint32_t, ++ z0_res = svld1_gather_s32index_f32 (p0, x0, z1), ++ z0_res = svld1_gather_index (p0, x0, z1)) ++ ++/* ++** ld1_gather_x0_f32_u32index: ++** ld1w z0\.s, p0/z, \[x0, z0\.s, uxtw 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_x0_f32_u32index, svfloat32_t, float32_t, svuint32_t, ++ z0_res = svld1_gather_u32index_f32 (p0, x0, z0), ++ z0_res = svld1_gather_index (p0, x0, z0)) ++ ++/* ++** ld1_gather_tied1_f32_u32index: ++** ld1w z0\.s, p0/z, \[x0, z0\.s, uxtw 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_tied1_f32_u32index, svfloat32_t, float32_t, svuint32_t, ++ z0_res = svld1_gather_u32index_f32 (p0, x0, z0), ++ z0_res = svld1_gather_index (p0, x0, z0)) ++ ++/* ++** ld1_gather_untied_f32_u32index: ++** ld1w z0\.s, p0/z, \[x0, z1\.s, uxtw 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_untied_f32_u32index, svfloat32_t, float32_t, svuint32_t, ++ z0_res = svld1_gather_u32index_f32 (p0, x0, z1), ++ z0_res = svld1_gather_index (p0, x0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_f64.c +new file mode 100644 +index 000000000..47127960c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_f64.c +@@ -0,0 +1,348 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1_gather_f64_tied1: ++** ld1d z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_f64_tied1, svfloat64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_f64 (p0, z0), ++ z0_res = svld1_gather_f64 (p0, z0)) ++ ++/* ++** ld1_gather_f64_untied: ++** ld1d z0\.d, p0/z, \[z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_f64_untied, svfloat64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_f64 (p0, z1), ++ z0_res = svld1_gather_f64 (p0, z1)) ++ ++/* ++** ld1_gather_x0_f64_offset: ++** ld1d z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_x0_f64_offset, svfloat64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_offset_f64 (p0, z0, x0), ++ z0_res = svld1_gather_offset_f64 (p0, z0, x0)) ++ ++/* ++** ld1_gather_m8_f64_offset: ++** mov (x[0-9]+), #?-8 ++** ld1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_m8_f64_offset, svfloat64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_offset_f64 (p0, z0, -8), ++ z0_res = svld1_gather_offset_f64 (p0, z0, -8)) ++ ++/* ++** ld1_gather_0_f64_offset: ++** ld1d z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_0_f64_offset, svfloat64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_offset_f64 (p0, z0, 0), ++ z0_res = svld1_gather_offset_f64 (p0, z0, 0)) ++ ++/* ++** ld1_gather_9_f64_offset: ++** mov (x[0-9]+), #?9 ++** ld1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_9_f64_offset, svfloat64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_offset_f64 (p0, z0, 9), ++ z0_res = svld1_gather_offset_f64 (p0, z0, 9)) ++ ++/* ++** ld1_gather_10_f64_offset: ++** mov (x[0-9]+), #?10 ++** ld1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_10_f64_offset, svfloat64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_offset_f64 (p0, z0, 10), ++ z0_res = svld1_gather_offset_f64 (p0, z0, 10)) ++ ++/* ++** ld1_gather_11_f64_offset: ++** mov (x[0-9]+), #?11 ++** ld1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_11_f64_offset, svfloat64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_offset_f64 (p0, z0, 11), ++ z0_res = svld1_gather_offset_f64 (p0, z0, 11)) ++ ++/* ++** ld1_gather_12_f64_offset: ++** mov (x[0-9]+), #?12 ++** ld1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_12_f64_offset, svfloat64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_offset_f64 (p0, z0, 12), ++ z0_res = svld1_gather_offset_f64 (p0, z0, 12)) ++ ++/* ++** ld1_gather_13_f64_offset: ++** mov (x[0-9]+), #?13 ++** ld1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_13_f64_offset, svfloat64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_offset_f64 (p0, z0, 13), ++ z0_res = svld1_gather_offset_f64 (p0, z0, 13)) ++ ++/* ++** ld1_gather_14_f64_offset: ++** mov (x[0-9]+), #?14 ++** ld1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_14_f64_offset, svfloat64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_offset_f64 (p0, z0, 14), ++ z0_res = svld1_gather_offset_f64 (p0, z0, 14)) ++ ++/* ++** ld1_gather_15_f64_offset: ++** mov (x[0-9]+), #?15 ++** ld1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_15_f64_offset, svfloat64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_offset_f64 (p0, z0, 15), ++ z0_res = svld1_gather_offset_f64 (p0, z0, 15)) ++ ++/* ++** ld1_gather_16_f64_offset: ++** ld1d z0\.d, p0/z, \[z0\.d, #16\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_16_f64_offset, svfloat64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_offset_f64 (p0, z0, 16), ++ z0_res = svld1_gather_offset_f64 (p0, z0, 16)) ++ ++/* ++** ld1_gather_248_f64_offset: ++** ld1d z0\.d, p0/z, \[z0\.d, #248\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_248_f64_offset, svfloat64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_offset_f64 (p0, z0, 248), ++ z0_res = svld1_gather_offset_f64 (p0, z0, 248)) ++ ++/* ++** ld1_gather_256_f64_offset: ++** mov (x[0-9]+), #?256 ++** ld1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_256_f64_offset, svfloat64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_offset_f64 (p0, z0, 256), ++ z0_res = svld1_gather_offset_f64 (p0, z0, 256)) ++ ++/* ++** ld1_gather_x0_f64_index: ++** lsl (x[0-9]+), x0, #?3 ++** ld1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_x0_f64_index, svfloat64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_index_f64 (p0, z0, x0), ++ z0_res = svld1_gather_index_f64 (p0, z0, x0)) ++ ++/* ++** ld1_gather_m1_f64_index: ++** mov (x[0-9]+), #?-8 ++** ld1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_m1_f64_index, svfloat64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_index_f64 (p0, z0, -1), ++ z0_res = svld1_gather_index_f64 (p0, z0, -1)) ++ ++/* ++** ld1_gather_0_f64_index: ++** ld1d z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_0_f64_index, svfloat64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_index_f64 (p0, z0, 0), ++ z0_res = svld1_gather_index_f64 (p0, z0, 0)) ++ ++/* ++** ld1_gather_5_f64_index: ++** ld1d z0\.d, p0/z, \[z0\.d, #40\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_5_f64_index, svfloat64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_index_f64 (p0, z0, 5), ++ z0_res = svld1_gather_index_f64 (p0, z0, 5)) ++ ++/* ++** ld1_gather_31_f64_index: ++** ld1d z0\.d, p0/z, \[z0\.d, #248\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_31_f64_index, svfloat64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_index_f64 (p0, z0, 31), ++ z0_res = svld1_gather_index_f64 (p0, z0, 31)) ++ ++/* ++** ld1_gather_32_f64_index: ++** mov (x[0-9]+), #?256 ++** ld1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_32_f64_index, svfloat64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_index_f64 (p0, z0, 32), ++ z0_res = svld1_gather_index_f64 (p0, z0, 32)) ++ ++/* ++** ld1_gather_x0_f64_s64offset: ++** ld1d z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_x0_f64_s64offset, svfloat64_t, float64_t, svint64_t, ++ z0_res = svld1_gather_s64offset_f64 (p0, x0, z0), ++ z0_res = svld1_gather_offset (p0, x0, z0)) ++ ++/* ++** ld1_gather_tied1_f64_s64offset: ++** ld1d z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_tied1_f64_s64offset, svfloat64_t, float64_t, svint64_t, ++ z0_res = svld1_gather_s64offset_f64 (p0, x0, z0), ++ z0_res = svld1_gather_offset (p0, x0, z0)) ++ ++/* ++** ld1_gather_untied_f64_s64offset: ++** ld1d z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_untied_f64_s64offset, svfloat64_t, float64_t, svint64_t, ++ z0_res = svld1_gather_s64offset_f64 (p0, x0, z1), ++ z0_res = svld1_gather_offset (p0, x0, z1)) ++ ++/* ++** ld1_gather_ext_f64_s64offset: ++** ld1d z0\.d, p0/z, \[x0, z1\.d, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_ext_f64_s64offset, svfloat64_t, float64_t, svint64_t, ++ z0_res = svld1_gather_s64offset_f64 (p0, x0, svextw_s64_x (p0, z1)), ++ z0_res = svld1_gather_offset (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ld1_gather_x0_f64_u64offset: ++** ld1d z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_x0_f64_u64offset, svfloat64_t, float64_t, svuint64_t, ++ z0_res = svld1_gather_u64offset_f64 (p0, x0, z0), ++ z0_res = svld1_gather_offset (p0, x0, z0)) ++ ++/* ++** ld1_gather_tied1_f64_u64offset: ++** ld1d z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_tied1_f64_u64offset, svfloat64_t, float64_t, svuint64_t, ++ z0_res = svld1_gather_u64offset_f64 (p0, x0, z0), ++ z0_res = svld1_gather_offset (p0, x0, z0)) ++ ++/* ++** ld1_gather_untied_f64_u64offset: ++** ld1d z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_untied_f64_u64offset, svfloat64_t, float64_t, svuint64_t, ++ z0_res = svld1_gather_u64offset_f64 (p0, x0, z1), ++ z0_res = svld1_gather_offset (p0, x0, z1)) ++ ++/* ++** ld1_gather_ext_f64_u64offset: ++** ld1d z0\.d, p0/z, \[x0, z1\.d, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_ext_f64_u64offset, svfloat64_t, float64_t, svuint64_t, ++ z0_res = svld1_gather_u64offset_f64 (p0, x0, svextw_u64_x (p0, z1)), ++ z0_res = svld1_gather_offset (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ld1_gather_x0_f64_s64index: ++** ld1d z0\.d, p0/z, \[x0, z0\.d, lsl 3\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_x0_f64_s64index, svfloat64_t, float64_t, svint64_t, ++ z0_res = svld1_gather_s64index_f64 (p0, x0, z0), ++ z0_res = svld1_gather_index (p0, x0, z0)) ++ ++/* ++** ld1_gather_tied1_f64_s64index: ++** ld1d z0\.d, p0/z, \[x0, z0\.d, lsl 3\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_tied1_f64_s64index, svfloat64_t, float64_t, svint64_t, ++ z0_res = svld1_gather_s64index_f64 (p0, x0, z0), ++ z0_res = svld1_gather_index (p0, x0, z0)) ++ ++/* ++** ld1_gather_untied_f64_s64index: ++** ld1d z0\.d, p0/z, \[x0, z1\.d, lsl 3\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_untied_f64_s64index, svfloat64_t, float64_t, svint64_t, ++ z0_res = svld1_gather_s64index_f64 (p0, x0, z1), ++ z0_res = svld1_gather_index (p0, x0, z1)) ++ ++/* ++** ld1_gather_ext_f64_s64index: ++** ld1d z0\.d, p0/z, \[x0, z1\.d, sxtw 3\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_ext_f64_s64index, svfloat64_t, float64_t, svint64_t, ++ z0_res = svld1_gather_s64index_f64 (p0, x0, svextw_s64_x (p0, z1)), ++ z0_res = svld1_gather_index (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ld1_gather_x0_f64_u64index: ++** ld1d z0\.d, p0/z, \[x0, z0\.d, lsl 3\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_x0_f64_u64index, svfloat64_t, float64_t, svuint64_t, ++ z0_res = svld1_gather_u64index_f64 (p0, x0, z0), ++ z0_res = svld1_gather_index (p0, x0, z0)) ++ ++/* ++** ld1_gather_tied1_f64_u64index: ++** ld1d z0\.d, p0/z, \[x0, z0\.d, lsl 3\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_tied1_f64_u64index, svfloat64_t, float64_t, svuint64_t, ++ z0_res = svld1_gather_u64index_f64 (p0, x0, z0), ++ z0_res = svld1_gather_index (p0, x0, z0)) ++ ++/* ++** ld1_gather_untied_f64_u64index: ++** ld1d z0\.d, p0/z, \[x0, z1\.d, lsl 3\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_untied_f64_u64index, svfloat64_t, float64_t, svuint64_t, ++ z0_res = svld1_gather_u64index_f64 (p0, x0, z1), ++ z0_res = svld1_gather_index (p0, x0, z1)) ++ ++/* ++** ld1_gather_ext_f64_u64index: ++** ld1d z0\.d, p0/z, \[x0, z1\.d, uxtw 3\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_ext_f64_u64index, svfloat64_t, float64_t, svuint64_t, ++ z0_res = svld1_gather_u64index_f64 (p0, x0, svextw_u64_x (p0, z1)), ++ z0_res = svld1_gather_index (p0, x0, svextw_x (p0, z1))) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_s32.c +new file mode 100644 +index 000000000..9b6335547 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_s32.c +@@ -0,0 +1,272 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1_gather_s32_tied1: ++** ld1w z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_s32_tied1, svint32_t, svuint32_t, ++ z0_res = svld1_gather_u32base_s32 (p0, z0), ++ z0_res = svld1_gather_s32 (p0, z0)) ++ ++/* ++** ld1_gather_s32_untied: ++** ld1w z0\.s, p0/z, \[z1\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_s32_untied, svint32_t, svuint32_t, ++ z0_res = svld1_gather_u32base_s32 (p0, z1), ++ z0_res = svld1_gather_s32 (p0, z1)) ++ ++/* ++** ld1_gather_x0_s32_offset: ++** ld1w z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_x0_s32_offset, svint32_t, svuint32_t, ++ z0_res = svld1_gather_u32base_offset_s32 (p0, z0, x0), ++ z0_res = svld1_gather_offset_s32 (p0, z0, x0)) ++ ++/* ++** ld1_gather_m4_s32_offset: ++** mov (x[0-9]+), #?-4 ++** ld1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_m4_s32_offset, svint32_t, svuint32_t, ++ z0_res = svld1_gather_u32base_offset_s32 (p0, z0, -4), ++ z0_res = svld1_gather_offset_s32 (p0, z0, -4)) ++ ++/* ++** ld1_gather_0_s32_offset: ++** ld1w z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_0_s32_offset, svint32_t, svuint32_t, ++ z0_res = svld1_gather_u32base_offset_s32 (p0, z0, 0), ++ z0_res = svld1_gather_offset_s32 (p0, z0, 0)) ++ ++/* ++** ld1_gather_5_s32_offset: ++** mov (x[0-9]+), #?5 ++** ld1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_5_s32_offset, svint32_t, svuint32_t, ++ z0_res = svld1_gather_u32base_offset_s32 (p0, z0, 5), ++ z0_res = svld1_gather_offset_s32 (p0, z0, 5)) ++ ++/* ++** ld1_gather_6_s32_offset: ++** mov (x[0-9]+), #?6 ++** ld1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_6_s32_offset, svint32_t, svuint32_t, ++ z0_res = svld1_gather_u32base_offset_s32 (p0, z0, 6), ++ z0_res = svld1_gather_offset_s32 (p0, z0, 6)) ++ ++/* ++** ld1_gather_7_s32_offset: ++** mov (x[0-9]+), #?7 ++** ld1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_7_s32_offset, svint32_t, svuint32_t, ++ z0_res = svld1_gather_u32base_offset_s32 (p0, z0, 7), ++ z0_res = svld1_gather_offset_s32 (p0, z0, 7)) ++ ++/* ++** ld1_gather_8_s32_offset: ++** ld1w z0\.s, p0/z, \[z0\.s, #8\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_8_s32_offset, svint32_t, svuint32_t, ++ z0_res = svld1_gather_u32base_offset_s32 (p0, z0, 8), ++ z0_res = svld1_gather_offset_s32 (p0, z0, 8)) ++ ++/* ++** ld1_gather_124_s32_offset: ++** ld1w z0\.s, p0/z, \[z0\.s, #124\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_124_s32_offset, svint32_t, svuint32_t, ++ z0_res = svld1_gather_u32base_offset_s32 (p0, z0, 124), ++ z0_res = svld1_gather_offset_s32 (p0, z0, 124)) ++ ++/* ++** ld1_gather_128_s32_offset: ++** mov (x[0-9]+), #?128 ++** ld1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_128_s32_offset, svint32_t, svuint32_t, ++ z0_res = svld1_gather_u32base_offset_s32 (p0, z0, 128), ++ z0_res = svld1_gather_offset_s32 (p0, z0, 128)) ++ ++/* ++** ld1_gather_x0_s32_index: ++** lsl (x[0-9]+), x0, #?2 ++** ld1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_x0_s32_index, svint32_t, svuint32_t, ++ z0_res = svld1_gather_u32base_index_s32 (p0, z0, x0), ++ z0_res = svld1_gather_index_s32 (p0, z0, x0)) ++ ++/* ++** ld1_gather_m1_s32_index: ++** mov (x[0-9]+), #?-4 ++** ld1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_m1_s32_index, svint32_t, svuint32_t, ++ z0_res = svld1_gather_u32base_index_s32 (p0, z0, -1), ++ z0_res = svld1_gather_index_s32 (p0, z0, -1)) ++ ++/* ++** ld1_gather_0_s32_index: ++** ld1w z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_0_s32_index, svint32_t, svuint32_t, ++ z0_res = svld1_gather_u32base_index_s32 (p0, z0, 0), ++ z0_res = svld1_gather_index_s32 (p0, z0, 0)) ++ ++/* ++** ld1_gather_5_s32_index: ++** ld1w z0\.s, p0/z, \[z0\.s, #20\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_5_s32_index, svint32_t, svuint32_t, ++ z0_res = svld1_gather_u32base_index_s32 (p0, z0, 5), ++ z0_res = svld1_gather_index_s32 (p0, z0, 5)) ++ ++/* ++** ld1_gather_31_s32_index: ++** ld1w z0\.s, p0/z, \[z0\.s, #124\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_31_s32_index, svint32_t, svuint32_t, ++ z0_res = svld1_gather_u32base_index_s32 (p0, z0, 31), ++ z0_res = svld1_gather_index_s32 (p0, z0, 31)) ++ ++/* ++** ld1_gather_32_s32_index: ++** mov (x[0-9]+), #?128 ++** ld1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_32_s32_index, svint32_t, svuint32_t, ++ z0_res = svld1_gather_u32base_index_s32 (p0, z0, 32), ++ z0_res = svld1_gather_index_s32 (p0, z0, 32)) ++ ++/* ++** ld1_gather_x0_s32_s32offset: ++** ld1w z0\.s, p0/z, \[x0, z0\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_x0_s32_s32offset, svint32_t, int32_t, svint32_t, ++ z0_res = svld1_gather_s32offset_s32 (p0, x0, z0), ++ z0_res = svld1_gather_offset (p0, x0, z0)) ++ ++/* ++** ld1_gather_tied1_s32_s32offset: ++** ld1w z0\.s, p0/z, \[x0, z0\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_tied1_s32_s32offset, svint32_t, int32_t, svint32_t, ++ z0_res = svld1_gather_s32offset_s32 (p0, x0, z0), ++ z0_res = svld1_gather_offset (p0, x0, z0)) ++ ++/* ++** ld1_gather_untied_s32_s32offset: ++** ld1w z0\.s, p0/z, \[x0, z1\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_untied_s32_s32offset, svint32_t, int32_t, svint32_t, ++ z0_res = svld1_gather_s32offset_s32 (p0, x0, z1), ++ z0_res = svld1_gather_offset (p0, x0, z1)) ++ ++/* ++** ld1_gather_x0_s32_u32offset: ++** ld1w z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_x0_s32_u32offset, svint32_t, int32_t, svuint32_t, ++ z0_res = svld1_gather_u32offset_s32 (p0, x0, z0), ++ z0_res = svld1_gather_offset (p0, x0, z0)) ++ ++/* ++** ld1_gather_tied1_s32_u32offset: ++** ld1w z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_tied1_s32_u32offset, svint32_t, int32_t, svuint32_t, ++ z0_res = svld1_gather_u32offset_s32 (p0, x0, z0), ++ z0_res = svld1_gather_offset (p0, x0, z0)) ++ ++/* ++** ld1_gather_untied_s32_u32offset: ++** ld1w z0\.s, p0/z, \[x0, z1\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_untied_s32_u32offset, svint32_t, int32_t, svuint32_t, ++ z0_res = svld1_gather_u32offset_s32 (p0, x0, z1), ++ z0_res = svld1_gather_offset (p0, x0, z1)) ++ ++/* ++** ld1_gather_x0_s32_s32index: ++** ld1w z0\.s, p0/z, \[x0, z0\.s, sxtw 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_x0_s32_s32index, svint32_t, int32_t, svint32_t, ++ z0_res = svld1_gather_s32index_s32 (p0, x0, z0), ++ z0_res = svld1_gather_index (p0, x0, z0)) ++ ++/* ++** ld1_gather_tied1_s32_s32index: ++** ld1w z0\.s, p0/z, \[x0, z0\.s, sxtw 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_tied1_s32_s32index, svint32_t, int32_t, svint32_t, ++ z0_res = svld1_gather_s32index_s32 (p0, x0, z0), ++ z0_res = svld1_gather_index (p0, x0, z0)) ++ ++/* ++** ld1_gather_untied_s32_s32index: ++** ld1w z0\.s, p0/z, \[x0, z1\.s, sxtw 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_untied_s32_s32index, svint32_t, int32_t, svint32_t, ++ z0_res = svld1_gather_s32index_s32 (p0, x0, z1), ++ z0_res = svld1_gather_index (p0, x0, z1)) ++ ++/* ++** ld1_gather_x0_s32_u32index: ++** ld1w z0\.s, p0/z, \[x0, z0\.s, uxtw 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_x0_s32_u32index, svint32_t, int32_t, svuint32_t, ++ z0_res = svld1_gather_u32index_s32 (p0, x0, z0), ++ z0_res = svld1_gather_index (p0, x0, z0)) ++ ++/* ++** ld1_gather_tied1_s32_u32index: ++** ld1w z0\.s, p0/z, \[x0, z0\.s, uxtw 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_tied1_s32_u32index, svint32_t, int32_t, svuint32_t, ++ z0_res = svld1_gather_u32index_s32 (p0, x0, z0), ++ z0_res = svld1_gather_index (p0, x0, z0)) ++ ++/* ++** ld1_gather_untied_s32_u32index: ++** ld1w z0\.s, p0/z, \[x0, z1\.s, uxtw 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_untied_s32_u32index, svint32_t, int32_t, svuint32_t, ++ z0_res = svld1_gather_u32index_s32 (p0, x0, z1), ++ z0_res = svld1_gather_index (p0, x0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_s64.c +new file mode 100644 +index 000000000..c9cea3ad8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_s64.c +@@ -0,0 +1,348 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1_gather_s64_tied1: ++** ld1d z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_s64_tied1, svint64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_s64 (p0, z0), ++ z0_res = svld1_gather_s64 (p0, z0)) ++ ++/* ++** ld1_gather_s64_untied: ++** ld1d z0\.d, p0/z, \[z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_s64_untied, svint64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_s64 (p0, z1), ++ z0_res = svld1_gather_s64 (p0, z1)) ++ ++/* ++** ld1_gather_x0_s64_offset: ++** ld1d z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_x0_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_offset_s64 (p0, z0, x0), ++ z0_res = svld1_gather_offset_s64 (p0, z0, x0)) ++ ++/* ++** ld1_gather_m8_s64_offset: ++** mov (x[0-9]+), #?-8 ++** ld1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_m8_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_offset_s64 (p0, z0, -8), ++ z0_res = svld1_gather_offset_s64 (p0, z0, -8)) ++ ++/* ++** ld1_gather_0_s64_offset: ++** ld1d z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_0_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_offset_s64 (p0, z0, 0), ++ z0_res = svld1_gather_offset_s64 (p0, z0, 0)) ++ ++/* ++** ld1_gather_9_s64_offset: ++** mov (x[0-9]+), #?9 ++** ld1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_9_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_offset_s64 (p0, z0, 9), ++ z0_res = svld1_gather_offset_s64 (p0, z0, 9)) ++ ++/* ++** ld1_gather_10_s64_offset: ++** mov (x[0-9]+), #?10 ++** ld1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_10_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_offset_s64 (p0, z0, 10), ++ z0_res = svld1_gather_offset_s64 (p0, z0, 10)) ++ ++/* ++** ld1_gather_11_s64_offset: ++** mov (x[0-9]+), #?11 ++** ld1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_11_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_offset_s64 (p0, z0, 11), ++ z0_res = svld1_gather_offset_s64 (p0, z0, 11)) ++ ++/* ++** ld1_gather_12_s64_offset: ++** mov (x[0-9]+), #?12 ++** ld1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_12_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_offset_s64 (p0, z0, 12), ++ z0_res = svld1_gather_offset_s64 (p0, z0, 12)) ++ ++/* ++** ld1_gather_13_s64_offset: ++** mov (x[0-9]+), #?13 ++** ld1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_13_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_offset_s64 (p0, z0, 13), ++ z0_res = svld1_gather_offset_s64 (p0, z0, 13)) ++ ++/* ++** ld1_gather_14_s64_offset: ++** mov (x[0-9]+), #?14 ++** ld1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_14_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_offset_s64 (p0, z0, 14), ++ z0_res = svld1_gather_offset_s64 (p0, z0, 14)) ++ ++/* ++** ld1_gather_15_s64_offset: ++** mov (x[0-9]+), #?15 ++** ld1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_15_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_offset_s64 (p0, z0, 15), ++ z0_res = svld1_gather_offset_s64 (p0, z0, 15)) ++ ++/* ++** ld1_gather_16_s64_offset: ++** ld1d z0\.d, p0/z, \[z0\.d, #16\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_16_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_offset_s64 (p0, z0, 16), ++ z0_res = svld1_gather_offset_s64 (p0, z0, 16)) ++ ++/* ++** ld1_gather_248_s64_offset: ++** ld1d z0\.d, p0/z, \[z0\.d, #248\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_248_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_offset_s64 (p0, z0, 248), ++ z0_res = svld1_gather_offset_s64 (p0, z0, 248)) ++ ++/* ++** ld1_gather_256_s64_offset: ++** mov (x[0-9]+), #?256 ++** ld1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_256_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_offset_s64 (p0, z0, 256), ++ z0_res = svld1_gather_offset_s64 (p0, z0, 256)) ++ ++/* ++** ld1_gather_x0_s64_index: ++** lsl (x[0-9]+), x0, #?3 ++** ld1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_x0_s64_index, svint64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_index_s64 (p0, z0, x0), ++ z0_res = svld1_gather_index_s64 (p0, z0, x0)) ++ ++/* ++** ld1_gather_m1_s64_index: ++** mov (x[0-9]+), #?-8 ++** ld1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_m1_s64_index, svint64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_index_s64 (p0, z0, -1), ++ z0_res = svld1_gather_index_s64 (p0, z0, -1)) ++ ++/* ++** ld1_gather_0_s64_index: ++** ld1d z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_0_s64_index, svint64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_index_s64 (p0, z0, 0), ++ z0_res = svld1_gather_index_s64 (p0, z0, 0)) ++ ++/* ++** ld1_gather_5_s64_index: ++** ld1d z0\.d, p0/z, \[z0\.d, #40\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_5_s64_index, svint64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_index_s64 (p0, z0, 5), ++ z0_res = svld1_gather_index_s64 (p0, z0, 5)) ++ ++/* ++** ld1_gather_31_s64_index: ++** ld1d z0\.d, p0/z, \[z0\.d, #248\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_31_s64_index, svint64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_index_s64 (p0, z0, 31), ++ z0_res = svld1_gather_index_s64 (p0, z0, 31)) ++ ++/* ++** ld1_gather_32_s64_index: ++** mov (x[0-9]+), #?256 ++** ld1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_32_s64_index, svint64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_index_s64 (p0, z0, 32), ++ z0_res = svld1_gather_index_s64 (p0, z0, 32)) ++ ++/* ++** ld1_gather_x0_s64_s64offset: ++** ld1d z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_x0_s64_s64offset, svint64_t, int64_t, svint64_t, ++ z0_res = svld1_gather_s64offset_s64 (p0, x0, z0), ++ z0_res = svld1_gather_offset (p0, x0, z0)) ++ ++/* ++** ld1_gather_tied1_s64_s64offset: ++** ld1d z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_tied1_s64_s64offset, svint64_t, int64_t, svint64_t, ++ z0_res = svld1_gather_s64offset_s64 (p0, x0, z0), ++ z0_res = svld1_gather_offset (p0, x0, z0)) ++ ++/* ++** ld1_gather_untied_s64_s64offset: ++** ld1d z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_untied_s64_s64offset, svint64_t, int64_t, svint64_t, ++ z0_res = svld1_gather_s64offset_s64 (p0, x0, z1), ++ z0_res = svld1_gather_offset (p0, x0, z1)) ++ ++/* ++** ld1_gather_ext_s64_s64offset: ++** ld1d z0\.d, p0/z, \[x0, z1\.d, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_ext_s64_s64offset, svint64_t, int64_t, svint64_t, ++ z0_res = svld1_gather_s64offset_s64 (p0, x0, svextw_s64_x (p0, z1)), ++ z0_res = svld1_gather_offset (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ld1_gather_x0_s64_u64offset: ++** ld1d z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_x0_s64_u64offset, svint64_t, int64_t, svuint64_t, ++ z0_res = svld1_gather_u64offset_s64 (p0, x0, z0), ++ z0_res = svld1_gather_offset (p0, x0, z0)) ++ ++/* ++** ld1_gather_tied1_s64_u64offset: ++** ld1d z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_tied1_s64_u64offset, svint64_t, int64_t, svuint64_t, ++ z0_res = svld1_gather_u64offset_s64 (p0, x0, z0), ++ z0_res = svld1_gather_offset (p0, x0, z0)) ++ ++/* ++** ld1_gather_untied_s64_u64offset: ++** ld1d z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_untied_s64_u64offset, svint64_t, int64_t, svuint64_t, ++ z0_res = svld1_gather_u64offset_s64 (p0, x0, z1), ++ z0_res = svld1_gather_offset (p0, x0, z1)) ++ ++/* ++** ld1_gather_ext_s64_u64offset: ++** ld1d z0\.d, p0/z, \[x0, z1\.d, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_ext_s64_u64offset, svint64_t, int64_t, svuint64_t, ++ z0_res = svld1_gather_u64offset_s64 (p0, x0, svextw_u64_x (p0, z1)), ++ z0_res = svld1_gather_offset (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ld1_gather_x0_s64_s64index: ++** ld1d z0\.d, p0/z, \[x0, z0\.d, lsl 3\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_x0_s64_s64index, svint64_t, int64_t, svint64_t, ++ z0_res = svld1_gather_s64index_s64 (p0, x0, z0), ++ z0_res = svld1_gather_index (p0, x0, z0)) ++ ++/* ++** ld1_gather_tied1_s64_s64index: ++** ld1d z0\.d, p0/z, \[x0, z0\.d, lsl 3\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_tied1_s64_s64index, svint64_t, int64_t, svint64_t, ++ z0_res = svld1_gather_s64index_s64 (p0, x0, z0), ++ z0_res = svld1_gather_index (p0, x0, z0)) ++ ++/* ++** ld1_gather_untied_s64_s64index: ++** ld1d z0\.d, p0/z, \[x0, z1\.d, lsl 3\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_untied_s64_s64index, svint64_t, int64_t, svint64_t, ++ z0_res = svld1_gather_s64index_s64 (p0, x0, z1), ++ z0_res = svld1_gather_index (p0, x0, z1)) ++ ++/* ++** ld1_gather_ext_s64_s64index: ++** ld1d z0\.d, p0/z, \[x0, z1\.d, sxtw 3\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_ext_s64_s64index, svint64_t, int64_t, svint64_t, ++ z0_res = svld1_gather_s64index_s64 (p0, x0, svextw_s64_x (p0, z1)), ++ z0_res = svld1_gather_index (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ld1_gather_x0_s64_u64index: ++** ld1d z0\.d, p0/z, \[x0, z0\.d, lsl 3\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_x0_s64_u64index, svint64_t, int64_t, svuint64_t, ++ z0_res = svld1_gather_u64index_s64 (p0, x0, z0), ++ z0_res = svld1_gather_index (p0, x0, z0)) ++ ++/* ++** ld1_gather_tied1_s64_u64index: ++** ld1d z0\.d, p0/z, \[x0, z0\.d, lsl 3\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_tied1_s64_u64index, svint64_t, int64_t, svuint64_t, ++ z0_res = svld1_gather_u64index_s64 (p0, x0, z0), ++ z0_res = svld1_gather_index (p0, x0, z0)) ++ ++/* ++** ld1_gather_untied_s64_u64index: ++** ld1d z0\.d, p0/z, \[x0, z1\.d, lsl 3\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_untied_s64_u64index, svint64_t, int64_t, svuint64_t, ++ z0_res = svld1_gather_u64index_s64 (p0, x0, z1), ++ z0_res = svld1_gather_index (p0, x0, z1)) ++ ++/* ++** ld1_gather_ext_s64_u64index: ++** ld1d z0\.d, p0/z, \[x0, z1\.d, uxtw 3\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_ext_s64_u64index, svint64_t, int64_t, svuint64_t, ++ z0_res = svld1_gather_u64index_s64 (p0, x0, svextw_u64_x (p0, z1)), ++ z0_res = svld1_gather_index (p0, x0, svextw_x (p0, z1))) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_u32.c +new file mode 100644 +index 000000000..2cccc8d49 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_u32.c +@@ -0,0 +1,272 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1_gather_u32_tied1: ++** ld1w z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_u32_tied1, svuint32_t, svuint32_t, ++ z0_res = svld1_gather_u32base_u32 (p0, z0), ++ z0_res = svld1_gather_u32 (p0, z0)) ++ ++/* ++** ld1_gather_u32_untied: ++** ld1w z0\.s, p0/z, \[z1\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_u32_untied, svuint32_t, svuint32_t, ++ z0_res = svld1_gather_u32base_u32 (p0, z1), ++ z0_res = svld1_gather_u32 (p0, z1)) ++ ++/* ++** ld1_gather_x0_u32_offset: ++** ld1w z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_x0_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svld1_gather_u32base_offset_u32 (p0, z0, x0), ++ z0_res = svld1_gather_offset_u32 (p0, z0, x0)) ++ ++/* ++** ld1_gather_m4_u32_offset: ++** mov (x[0-9]+), #?-4 ++** ld1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_m4_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svld1_gather_u32base_offset_u32 (p0, z0, -4), ++ z0_res = svld1_gather_offset_u32 (p0, z0, -4)) ++ ++/* ++** ld1_gather_0_u32_offset: ++** ld1w z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_0_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svld1_gather_u32base_offset_u32 (p0, z0, 0), ++ z0_res = svld1_gather_offset_u32 (p0, z0, 0)) ++ ++/* ++** ld1_gather_5_u32_offset: ++** mov (x[0-9]+), #?5 ++** ld1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_5_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svld1_gather_u32base_offset_u32 (p0, z0, 5), ++ z0_res = svld1_gather_offset_u32 (p0, z0, 5)) ++ ++/* ++** ld1_gather_6_u32_offset: ++** mov (x[0-9]+), #?6 ++** ld1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_6_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svld1_gather_u32base_offset_u32 (p0, z0, 6), ++ z0_res = svld1_gather_offset_u32 (p0, z0, 6)) ++ ++/* ++** ld1_gather_7_u32_offset: ++** mov (x[0-9]+), #?7 ++** ld1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_7_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svld1_gather_u32base_offset_u32 (p0, z0, 7), ++ z0_res = svld1_gather_offset_u32 (p0, z0, 7)) ++ ++/* ++** ld1_gather_8_u32_offset: ++** ld1w z0\.s, p0/z, \[z0\.s, #8\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_8_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svld1_gather_u32base_offset_u32 (p0, z0, 8), ++ z0_res = svld1_gather_offset_u32 (p0, z0, 8)) ++ ++/* ++** ld1_gather_124_u32_offset: ++** ld1w z0\.s, p0/z, \[z0\.s, #124\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_124_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svld1_gather_u32base_offset_u32 (p0, z0, 124), ++ z0_res = svld1_gather_offset_u32 (p0, z0, 124)) ++ ++/* ++** ld1_gather_128_u32_offset: ++** mov (x[0-9]+), #?128 ++** ld1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_128_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svld1_gather_u32base_offset_u32 (p0, z0, 128), ++ z0_res = svld1_gather_offset_u32 (p0, z0, 128)) ++ ++/* ++** ld1_gather_x0_u32_index: ++** lsl (x[0-9]+), x0, #?2 ++** ld1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_x0_u32_index, svuint32_t, svuint32_t, ++ z0_res = svld1_gather_u32base_index_u32 (p0, z0, x0), ++ z0_res = svld1_gather_index_u32 (p0, z0, x0)) ++ ++/* ++** ld1_gather_m1_u32_index: ++** mov (x[0-9]+), #?-4 ++** ld1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_m1_u32_index, svuint32_t, svuint32_t, ++ z0_res = svld1_gather_u32base_index_u32 (p0, z0, -1), ++ z0_res = svld1_gather_index_u32 (p0, z0, -1)) ++ ++/* ++** ld1_gather_0_u32_index: ++** ld1w z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_0_u32_index, svuint32_t, svuint32_t, ++ z0_res = svld1_gather_u32base_index_u32 (p0, z0, 0), ++ z0_res = svld1_gather_index_u32 (p0, z0, 0)) ++ ++/* ++** ld1_gather_5_u32_index: ++** ld1w z0\.s, p0/z, \[z0\.s, #20\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_5_u32_index, svuint32_t, svuint32_t, ++ z0_res = svld1_gather_u32base_index_u32 (p0, z0, 5), ++ z0_res = svld1_gather_index_u32 (p0, z0, 5)) ++ ++/* ++** ld1_gather_31_u32_index: ++** ld1w z0\.s, p0/z, \[z0\.s, #124\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_31_u32_index, svuint32_t, svuint32_t, ++ z0_res = svld1_gather_u32base_index_u32 (p0, z0, 31), ++ z0_res = svld1_gather_index_u32 (p0, z0, 31)) ++ ++/* ++** ld1_gather_32_u32_index: ++** mov (x[0-9]+), #?128 ++** ld1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_32_u32_index, svuint32_t, svuint32_t, ++ z0_res = svld1_gather_u32base_index_u32 (p0, z0, 32), ++ z0_res = svld1_gather_index_u32 (p0, z0, 32)) ++ ++/* ++** ld1_gather_x0_u32_s32offset: ++** ld1w z0\.s, p0/z, \[x0, z0\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_x0_u32_s32offset, svuint32_t, uint32_t, svint32_t, ++ z0_res = svld1_gather_s32offset_u32 (p0, x0, z0), ++ z0_res = svld1_gather_offset (p0, x0, z0)) ++ ++/* ++** ld1_gather_tied1_u32_s32offset: ++** ld1w z0\.s, p0/z, \[x0, z0\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_tied1_u32_s32offset, svuint32_t, uint32_t, svint32_t, ++ z0_res = svld1_gather_s32offset_u32 (p0, x0, z0), ++ z0_res = svld1_gather_offset (p0, x0, z0)) ++ ++/* ++** ld1_gather_untied_u32_s32offset: ++** ld1w z0\.s, p0/z, \[x0, z1\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_untied_u32_s32offset, svuint32_t, uint32_t, svint32_t, ++ z0_res = svld1_gather_s32offset_u32 (p0, x0, z1), ++ z0_res = svld1_gather_offset (p0, x0, z1)) ++ ++/* ++** ld1_gather_x0_u32_u32offset: ++** ld1w z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_x0_u32_u32offset, svuint32_t, uint32_t, svuint32_t, ++ z0_res = svld1_gather_u32offset_u32 (p0, x0, z0), ++ z0_res = svld1_gather_offset (p0, x0, z0)) ++ ++/* ++** ld1_gather_tied1_u32_u32offset: ++** ld1w z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_tied1_u32_u32offset, svuint32_t, uint32_t, svuint32_t, ++ z0_res = svld1_gather_u32offset_u32 (p0, x0, z0), ++ z0_res = svld1_gather_offset (p0, x0, z0)) ++ ++/* ++** ld1_gather_untied_u32_u32offset: ++** ld1w z0\.s, p0/z, \[x0, z1\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_untied_u32_u32offset, svuint32_t, uint32_t, svuint32_t, ++ z0_res = svld1_gather_u32offset_u32 (p0, x0, z1), ++ z0_res = svld1_gather_offset (p0, x0, z1)) ++ ++/* ++** ld1_gather_x0_u32_s32index: ++** ld1w z0\.s, p0/z, \[x0, z0\.s, sxtw 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_x0_u32_s32index, svuint32_t, uint32_t, svint32_t, ++ z0_res = svld1_gather_s32index_u32 (p0, x0, z0), ++ z0_res = svld1_gather_index (p0, x0, z0)) ++ ++/* ++** ld1_gather_tied1_u32_s32index: ++** ld1w z0\.s, p0/z, \[x0, z0\.s, sxtw 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_tied1_u32_s32index, svuint32_t, uint32_t, svint32_t, ++ z0_res = svld1_gather_s32index_u32 (p0, x0, z0), ++ z0_res = svld1_gather_index (p0, x0, z0)) ++ ++/* ++** ld1_gather_untied_u32_s32index: ++** ld1w z0\.s, p0/z, \[x0, z1\.s, sxtw 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_untied_u32_s32index, svuint32_t, uint32_t, svint32_t, ++ z0_res = svld1_gather_s32index_u32 (p0, x0, z1), ++ z0_res = svld1_gather_index (p0, x0, z1)) ++ ++/* ++** ld1_gather_x0_u32_u32index: ++** ld1w z0\.s, p0/z, \[x0, z0\.s, uxtw 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_x0_u32_u32index, svuint32_t, uint32_t, svuint32_t, ++ z0_res = svld1_gather_u32index_u32 (p0, x0, z0), ++ z0_res = svld1_gather_index (p0, x0, z0)) ++ ++/* ++** ld1_gather_tied1_u32_u32index: ++** ld1w z0\.s, p0/z, \[x0, z0\.s, uxtw 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_tied1_u32_u32index, svuint32_t, uint32_t, svuint32_t, ++ z0_res = svld1_gather_u32index_u32 (p0, x0, z0), ++ z0_res = svld1_gather_index (p0, x0, z0)) ++ ++/* ++** ld1_gather_untied_u32_u32index: ++** ld1w z0\.s, p0/z, \[x0, z1\.s, uxtw 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_untied_u32_u32index, svuint32_t, uint32_t, svuint32_t, ++ z0_res = svld1_gather_u32index_u32 (p0, x0, z1), ++ z0_res = svld1_gather_index (p0, x0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_u64.c +new file mode 100644 +index 000000000..6ee1d48ab +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_u64.c +@@ -0,0 +1,348 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1_gather_u64_tied1: ++** ld1d z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_u64_tied1, svuint64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_u64 (p0, z0), ++ z0_res = svld1_gather_u64 (p0, z0)) ++ ++/* ++** ld1_gather_u64_untied: ++** ld1d z0\.d, p0/z, \[z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_u64_untied, svuint64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_u64 (p0, z1), ++ z0_res = svld1_gather_u64 (p0, z1)) ++ ++/* ++** ld1_gather_x0_u64_offset: ++** ld1d z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_x0_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_offset_u64 (p0, z0, x0), ++ z0_res = svld1_gather_offset_u64 (p0, z0, x0)) ++ ++/* ++** ld1_gather_m8_u64_offset: ++** mov (x[0-9]+), #?-8 ++** ld1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_m8_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_offset_u64 (p0, z0, -8), ++ z0_res = svld1_gather_offset_u64 (p0, z0, -8)) ++ ++/* ++** ld1_gather_0_u64_offset: ++** ld1d z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_0_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_offset_u64 (p0, z0, 0), ++ z0_res = svld1_gather_offset_u64 (p0, z0, 0)) ++ ++/* ++** ld1_gather_9_u64_offset: ++** mov (x[0-9]+), #?9 ++** ld1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_9_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_offset_u64 (p0, z0, 9), ++ z0_res = svld1_gather_offset_u64 (p0, z0, 9)) ++ ++/* ++** ld1_gather_10_u64_offset: ++** mov (x[0-9]+), #?10 ++** ld1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_10_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_offset_u64 (p0, z0, 10), ++ z0_res = svld1_gather_offset_u64 (p0, z0, 10)) ++ ++/* ++** ld1_gather_11_u64_offset: ++** mov (x[0-9]+), #?11 ++** ld1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_11_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_offset_u64 (p0, z0, 11), ++ z0_res = svld1_gather_offset_u64 (p0, z0, 11)) ++ ++/* ++** ld1_gather_12_u64_offset: ++** mov (x[0-9]+), #?12 ++** ld1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_12_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_offset_u64 (p0, z0, 12), ++ z0_res = svld1_gather_offset_u64 (p0, z0, 12)) ++ ++/* ++** ld1_gather_13_u64_offset: ++** mov (x[0-9]+), #?13 ++** ld1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_13_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_offset_u64 (p0, z0, 13), ++ z0_res = svld1_gather_offset_u64 (p0, z0, 13)) ++ ++/* ++** ld1_gather_14_u64_offset: ++** mov (x[0-9]+), #?14 ++** ld1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_14_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_offset_u64 (p0, z0, 14), ++ z0_res = svld1_gather_offset_u64 (p0, z0, 14)) ++ ++/* ++** ld1_gather_15_u64_offset: ++** mov (x[0-9]+), #?15 ++** ld1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_15_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_offset_u64 (p0, z0, 15), ++ z0_res = svld1_gather_offset_u64 (p0, z0, 15)) ++ ++/* ++** ld1_gather_16_u64_offset: ++** ld1d z0\.d, p0/z, \[z0\.d, #16\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_16_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_offset_u64 (p0, z0, 16), ++ z0_res = svld1_gather_offset_u64 (p0, z0, 16)) ++ ++/* ++** ld1_gather_248_u64_offset: ++** ld1d z0\.d, p0/z, \[z0\.d, #248\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_248_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_offset_u64 (p0, z0, 248), ++ z0_res = svld1_gather_offset_u64 (p0, z0, 248)) ++ ++/* ++** ld1_gather_256_u64_offset: ++** mov (x[0-9]+), #?256 ++** ld1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_256_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_offset_u64 (p0, z0, 256), ++ z0_res = svld1_gather_offset_u64 (p0, z0, 256)) ++ ++/* ++** ld1_gather_x0_u64_index: ++** lsl (x[0-9]+), x0, #?3 ++** ld1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_x0_u64_index, svuint64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_index_u64 (p0, z0, x0), ++ z0_res = svld1_gather_index_u64 (p0, z0, x0)) ++ ++/* ++** ld1_gather_m1_u64_index: ++** mov (x[0-9]+), #?-8 ++** ld1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_m1_u64_index, svuint64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_index_u64 (p0, z0, -1), ++ z0_res = svld1_gather_index_u64 (p0, z0, -1)) ++ ++/* ++** ld1_gather_0_u64_index: ++** ld1d z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_0_u64_index, svuint64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_index_u64 (p0, z0, 0), ++ z0_res = svld1_gather_index_u64 (p0, z0, 0)) ++ ++/* ++** ld1_gather_5_u64_index: ++** ld1d z0\.d, p0/z, \[z0\.d, #40\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_5_u64_index, svuint64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_index_u64 (p0, z0, 5), ++ z0_res = svld1_gather_index_u64 (p0, z0, 5)) ++ ++/* ++** ld1_gather_31_u64_index: ++** ld1d z0\.d, p0/z, \[z0\.d, #248\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_31_u64_index, svuint64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_index_u64 (p0, z0, 31), ++ z0_res = svld1_gather_index_u64 (p0, z0, 31)) ++ ++/* ++** ld1_gather_32_u64_index: ++** mov (x[0-9]+), #?256 ++** ld1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1_gather_32_u64_index, svuint64_t, svuint64_t, ++ z0_res = svld1_gather_u64base_index_u64 (p0, z0, 32), ++ z0_res = svld1_gather_index_u64 (p0, z0, 32)) ++ ++/* ++** ld1_gather_x0_u64_s64offset: ++** ld1d z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_x0_u64_s64offset, svuint64_t, uint64_t, svint64_t, ++ z0_res = svld1_gather_s64offset_u64 (p0, x0, z0), ++ z0_res = svld1_gather_offset (p0, x0, z0)) ++ ++/* ++** ld1_gather_tied1_u64_s64offset: ++** ld1d z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_tied1_u64_s64offset, svuint64_t, uint64_t, svint64_t, ++ z0_res = svld1_gather_s64offset_u64 (p0, x0, z0), ++ z0_res = svld1_gather_offset (p0, x0, z0)) ++ ++/* ++** ld1_gather_untied_u64_s64offset: ++** ld1d z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_untied_u64_s64offset, svuint64_t, uint64_t, svint64_t, ++ z0_res = svld1_gather_s64offset_u64 (p0, x0, z1), ++ z0_res = svld1_gather_offset (p0, x0, z1)) ++ ++/* ++** ld1_gather_ext_u64_s64offset: ++** ld1d z0\.d, p0/z, \[x0, z1\.d, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_ext_u64_s64offset, svuint64_t, uint64_t, svint64_t, ++ z0_res = svld1_gather_s64offset_u64 (p0, x0, svextw_s64_x (p0, z1)), ++ z0_res = svld1_gather_offset (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ld1_gather_x0_u64_u64offset: ++** ld1d z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_x0_u64_u64offset, svuint64_t, uint64_t, svuint64_t, ++ z0_res = svld1_gather_u64offset_u64 (p0, x0, z0), ++ z0_res = svld1_gather_offset (p0, x0, z0)) ++ ++/* ++** ld1_gather_tied1_u64_u64offset: ++** ld1d z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_tied1_u64_u64offset, svuint64_t, uint64_t, svuint64_t, ++ z0_res = svld1_gather_u64offset_u64 (p0, x0, z0), ++ z0_res = svld1_gather_offset (p0, x0, z0)) ++ ++/* ++** ld1_gather_untied_u64_u64offset: ++** ld1d z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_untied_u64_u64offset, svuint64_t, uint64_t, svuint64_t, ++ z0_res = svld1_gather_u64offset_u64 (p0, x0, z1), ++ z0_res = svld1_gather_offset (p0, x0, z1)) ++ ++/* ++** ld1_gather_ext_u64_u64offset: ++** ld1d z0\.d, p0/z, \[x0, z1\.d, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_ext_u64_u64offset, svuint64_t, uint64_t, svuint64_t, ++ z0_res = svld1_gather_u64offset_u64 (p0, x0, svextw_u64_x (p0, z1)), ++ z0_res = svld1_gather_offset (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ld1_gather_x0_u64_s64index: ++** ld1d z0\.d, p0/z, \[x0, z0\.d, lsl 3\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_x0_u64_s64index, svuint64_t, uint64_t, svint64_t, ++ z0_res = svld1_gather_s64index_u64 (p0, x0, z0), ++ z0_res = svld1_gather_index (p0, x0, z0)) ++ ++/* ++** ld1_gather_tied1_u64_s64index: ++** ld1d z0\.d, p0/z, \[x0, z0\.d, lsl 3\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_tied1_u64_s64index, svuint64_t, uint64_t, svint64_t, ++ z0_res = svld1_gather_s64index_u64 (p0, x0, z0), ++ z0_res = svld1_gather_index (p0, x0, z0)) ++ ++/* ++** ld1_gather_untied_u64_s64index: ++** ld1d z0\.d, p0/z, \[x0, z1\.d, lsl 3\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_untied_u64_s64index, svuint64_t, uint64_t, svint64_t, ++ z0_res = svld1_gather_s64index_u64 (p0, x0, z1), ++ z0_res = svld1_gather_index (p0, x0, z1)) ++ ++/* ++** ld1_gather_ext_u64_s64index: ++** ld1d z0\.d, p0/z, \[x0, z1\.d, sxtw 3\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_ext_u64_s64index, svuint64_t, uint64_t, svint64_t, ++ z0_res = svld1_gather_s64index_u64 (p0, x0, svextw_s64_x (p0, z1)), ++ z0_res = svld1_gather_index (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ld1_gather_x0_u64_u64index: ++** ld1d z0\.d, p0/z, \[x0, z0\.d, lsl 3\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_x0_u64_u64index, svuint64_t, uint64_t, svuint64_t, ++ z0_res = svld1_gather_u64index_u64 (p0, x0, z0), ++ z0_res = svld1_gather_index (p0, x0, z0)) ++ ++/* ++** ld1_gather_tied1_u64_u64index: ++** ld1d z0\.d, p0/z, \[x0, z0\.d, lsl 3\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_tied1_u64_u64index, svuint64_t, uint64_t, svuint64_t, ++ z0_res = svld1_gather_u64index_u64 (p0, x0, z0), ++ z0_res = svld1_gather_index (p0, x0, z0)) ++ ++/* ++** ld1_gather_untied_u64_u64index: ++** ld1d z0\.d, p0/z, \[x0, z1\.d, lsl 3\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_untied_u64_u64index, svuint64_t, uint64_t, svuint64_t, ++ z0_res = svld1_gather_u64index_u64 (p0, x0, z1), ++ z0_res = svld1_gather_index (p0, x0, z1)) ++ ++/* ++** ld1_gather_ext_u64_u64index: ++** ld1d z0\.d, p0/z, \[x0, z1\.d, uxtw 3\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1_gather_ext_u64_u64index, svuint64_t, uint64_t, svuint64_t, ++ z0_res = svld1_gather_u64index_u64 (p0, x0, svextw_u64_x (p0, z1)), ++ z0_res = svld1_gather_index (p0, x0, svextw_x (p0, z1))) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_s16.c +new file mode 100644 +index 000000000..d86b49a73 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_s16.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1_s16_base: ++** ld1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_s16_base, svint16_t, int16_t, ++ z0 = svld1_s16 (p0, x0), ++ z0 = svld1 (p0, x0)) ++ ++/* ++** ld1_s16_index: ++** ld1h z0\.h, p0/z, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_LOAD (ld1_s16_index, svint16_t, int16_t, ++ z0 = svld1_s16 (p0, x0 + x1), ++ z0 = svld1 (p0, x0 + x1)) ++ ++/* ++** ld1_s16_1: ++** ld1h z0\.h, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_s16_1, svint16_t, int16_t, ++ z0 = svld1_s16 (p0, x0 + svcnth ()), ++ z0 = svld1 (p0, x0 + svcnth ())) ++ ++/* ++** ld1_s16_7: ++** ld1h z0\.h, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_s16_7, svint16_t, int16_t, ++ z0 = svld1_s16 (p0, x0 + svcnth () * 7), ++ z0 = svld1 (p0, x0 + svcnth () * 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1_s16_8: ++** incb x0, all, mul #8 ++** ld1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_s16_8, svint16_t, int16_t, ++ z0 = svld1_s16 (p0, x0 + svcnth () * 8), ++ z0 = svld1 (p0, x0 + svcnth () * 8)) ++ ++/* ++** ld1_s16_m1: ++** ld1h z0\.h, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_s16_m1, svint16_t, int16_t, ++ z0 = svld1_s16 (p0, x0 - svcnth ()), ++ z0 = svld1 (p0, x0 - svcnth ())) ++ ++/* ++** ld1_s16_m8: ++** ld1h z0\.h, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_s16_m8, svint16_t, int16_t, ++ z0 = svld1_s16 (p0, x0 - svcnth () * 8), ++ z0 = svld1 (p0, x0 - svcnth () * 8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1_s16_m9: ++** decb x0, all, mul #9 ++** ld1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_s16_m9, svint16_t, int16_t, ++ z0 = svld1_s16 (p0, x0 - svcnth () * 9), ++ z0 = svld1 (p0, x0 - svcnth () * 9)) ++ ++/* ++** ld1_vnum_s16_0: ++** ld1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_s16_0, svint16_t, int16_t, ++ z0 = svld1_vnum_s16 (p0, x0, 0), ++ z0 = svld1_vnum (p0, x0, 0)) ++ ++/* ++** ld1_vnum_s16_1: ++** ld1h z0\.h, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_s16_1, svint16_t, int16_t, ++ z0 = svld1_vnum_s16 (p0, x0, 1), ++ z0 = svld1_vnum (p0, x0, 1)) ++ ++/* ++** ld1_vnum_s16_7: ++** ld1h z0\.h, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_s16_7, svint16_t, int16_t, ++ z0 = svld1_vnum_s16 (p0, x0, 7), ++ z0 = svld1_vnum (p0, x0, 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1_vnum_s16_8: ++** incb x0, all, mul #8 ++** ld1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_s16_8, svint16_t, int16_t, ++ z0 = svld1_vnum_s16 (p0, x0, 8), ++ z0 = svld1_vnum (p0, x0, 8)) ++ ++/* ++** ld1_vnum_s16_m1: ++** ld1h z0\.h, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_s16_m1, svint16_t, int16_t, ++ z0 = svld1_vnum_s16 (p0, x0, -1), ++ z0 = svld1_vnum (p0, x0, -1)) ++ ++/* ++** ld1_vnum_s16_m8: ++** ld1h z0\.h, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_s16_m8, svint16_t, int16_t, ++ z0 = svld1_vnum_s16 (p0, x0, -8), ++ z0 = svld1_vnum (p0, x0, -8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1_vnum_s16_m9: ++** decb x0, all, mul #9 ++** ld1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_s16_m9, svint16_t, int16_t, ++ z0 = svld1_vnum_s16 (p0, x0, -9), ++ z0 = svld1_vnum (p0, x0, -9)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ld1_vnum_s16_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ld1h z0\.h, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_s16_x1, svint16_t, int16_t, ++ z0 = svld1_vnum_s16 (p0, x0, x1), ++ z0 = svld1_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_s32.c +new file mode 100644 +index 000000000..5b692e510 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_s32.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1_s32_base: ++** ld1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_s32_base, svint32_t, int32_t, ++ z0 = svld1_s32 (p0, x0), ++ z0 = svld1 (p0, x0)) ++ ++/* ++** ld1_s32_index: ++** ld1w z0\.s, p0/z, \[x0, x1, lsl 2\] ++** ret ++*/ ++TEST_LOAD (ld1_s32_index, svint32_t, int32_t, ++ z0 = svld1_s32 (p0, x0 + x1), ++ z0 = svld1 (p0, x0 + x1)) ++ ++/* ++** ld1_s32_1: ++** ld1w z0\.s, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_s32_1, svint32_t, int32_t, ++ z0 = svld1_s32 (p0, x0 + svcntw ()), ++ z0 = svld1 (p0, x0 + svcntw ())) ++ ++/* ++** ld1_s32_7: ++** ld1w z0\.s, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_s32_7, svint32_t, int32_t, ++ z0 = svld1_s32 (p0, x0 + svcntw () * 7), ++ z0 = svld1 (p0, x0 + svcntw () * 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1_s32_8: ++** incb x0, all, mul #8 ++** ld1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_s32_8, svint32_t, int32_t, ++ z0 = svld1_s32 (p0, x0 + svcntw () * 8), ++ z0 = svld1 (p0, x0 + svcntw () * 8)) ++ ++/* ++** ld1_s32_m1: ++** ld1w z0\.s, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_s32_m1, svint32_t, int32_t, ++ z0 = svld1_s32 (p0, x0 - svcntw ()), ++ z0 = svld1 (p0, x0 - svcntw ())) ++ ++/* ++** ld1_s32_m8: ++** ld1w z0\.s, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_s32_m8, svint32_t, int32_t, ++ z0 = svld1_s32 (p0, x0 - svcntw () * 8), ++ z0 = svld1 (p0, x0 - svcntw () * 8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1_s32_m9: ++** decb x0, all, mul #9 ++** ld1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_s32_m9, svint32_t, int32_t, ++ z0 = svld1_s32 (p0, x0 - svcntw () * 9), ++ z0 = svld1 (p0, x0 - svcntw () * 9)) ++ ++/* ++** ld1_vnum_s32_0: ++** ld1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_s32_0, svint32_t, int32_t, ++ z0 = svld1_vnum_s32 (p0, x0, 0), ++ z0 = svld1_vnum (p0, x0, 0)) ++ ++/* ++** ld1_vnum_s32_1: ++** ld1w z0\.s, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_s32_1, svint32_t, int32_t, ++ z0 = svld1_vnum_s32 (p0, x0, 1), ++ z0 = svld1_vnum (p0, x0, 1)) ++ ++/* ++** ld1_vnum_s32_7: ++** ld1w z0\.s, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_s32_7, svint32_t, int32_t, ++ z0 = svld1_vnum_s32 (p0, x0, 7), ++ z0 = svld1_vnum (p0, x0, 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1_vnum_s32_8: ++** incb x0, all, mul #8 ++** ld1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_s32_8, svint32_t, int32_t, ++ z0 = svld1_vnum_s32 (p0, x0, 8), ++ z0 = svld1_vnum (p0, x0, 8)) ++ ++/* ++** ld1_vnum_s32_m1: ++** ld1w z0\.s, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_s32_m1, svint32_t, int32_t, ++ z0 = svld1_vnum_s32 (p0, x0, -1), ++ z0 = svld1_vnum (p0, x0, -1)) ++ ++/* ++** ld1_vnum_s32_m8: ++** ld1w z0\.s, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_s32_m8, svint32_t, int32_t, ++ z0 = svld1_vnum_s32 (p0, x0, -8), ++ z0 = svld1_vnum (p0, x0, -8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1_vnum_s32_m9: ++** decb x0, all, mul #9 ++** ld1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_s32_m9, svint32_t, int32_t, ++ z0 = svld1_vnum_s32 (p0, x0, -9), ++ z0 = svld1_vnum (p0, x0, -9)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ld1_vnum_s32_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ld1w z0\.s, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_s32_x1, svint32_t, int32_t, ++ z0 = svld1_vnum_s32 (p0, x0, x1), ++ z0 = svld1_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_s64.c +new file mode 100644 +index 000000000..15ee29bba +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_s64.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1_s64_base: ++** ld1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_s64_base, svint64_t, int64_t, ++ z0 = svld1_s64 (p0, x0), ++ z0 = svld1 (p0, x0)) ++ ++/* ++** ld1_s64_index: ++** ld1d z0\.d, p0/z, \[x0, x1, lsl 3\] ++** ret ++*/ ++TEST_LOAD (ld1_s64_index, svint64_t, int64_t, ++ z0 = svld1_s64 (p0, x0 + x1), ++ z0 = svld1 (p0, x0 + x1)) ++ ++/* ++** ld1_s64_1: ++** ld1d z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_s64_1, svint64_t, int64_t, ++ z0 = svld1_s64 (p0, x0 + svcntd ()), ++ z0 = svld1 (p0, x0 + svcntd ())) ++ ++/* ++** ld1_s64_7: ++** ld1d z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_s64_7, svint64_t, int64_t, ++ z0 = svld1_s64 (p0, x0 + svcntd () * 7), ++ z0 = svld1 (p0, x0 + svcntd () * 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1_s64_8: ++** incb x0, all, mul #8 ++** ld1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_s64_8, svint64_t, int64_t, ++ z0 = svld1_s64 (p0, x0 + svcntd () * 8), ++ z0 = svld1 (p0, x0 + svcntd () * 8)) ++ ++/* ++** ld1_s64_m1: ++** ld1d z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_s64_m1, svint64_t, int64_t, ++ z0 = svld1_s64 (p0, x0 - svcntd ()), ++ z0 = svld1 (p0, x0 - svcntd ())) ++ ++/* ++** ld1_s64_m8: ++** ld1d z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_s64_m8, svint64_t, int64_t, ++ z0 = svld1_s64 (p0, x0 - svcntd () * 8), ++ z0 = svld1 (p0, x0 - svcntd () * 8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1_s64_m9: ++** decb x0, all, mul #9 ++** ld1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_s64_m9, svint64_t, int64_t, ++ z0 = svld1_s64 (p0, x0 - svcntd () * 9), ++ z0 = svld1 (p0, x0 - svcntd () * 9)) ++ ++/* ++** ld1_vnum_s64_0: ++** ld1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_s64_0, svint64_t, int64_t, ++ z0 = svld1_vnum_s64 (p0, x0, 0), ++ z0 = svld1_vnum (p0, x0, 0)) ++ ++/* ++** ld1_vnum_s64_1: ++** ld1d z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_s64_1, svint64_t, int64_t, ++ z0 = svld1_vnum_s64 (p0, x0, 1), ++ z0 = svld1_vnum (p0, x0, 1)) ++ ++/* ++** ld1_vnum_s64_7: ++** ld1d z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_s64_7, svint64_t, int64_t, ++ z0 = svld1_vnum_s64 (p0, x0, 7), ++ z0 = svld1_vnum (p0, x0, 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1_vnum_s64_8: ++** incb x0, all, mul #8 ++** ld1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_s64_8, svint64_t, int64_t, ++ z0 = svld1_vnum_s64 (p0, x0, 8), ++ z0 = svld1_vnum (p0, x0, 8)) ++ ++/* ++** ld1_vnum_s64_m1: ++** ld1d z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_s64_m1, svint64_t, int64_t, ++ z0 = svld1_vnum_s64 (p0, x0, -1), ++ z0 = svld1_vnum (p0, x0, -1)) ++ ++/* ++** ld1_vnum_s64_m8: ++** ld1d z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_s64_m8, svint64_t, int64_t, ++ z0 = svld1_vnum_s64 (p0, x0, -8), ++ z0 = svld1_vnum (p0, x0, -8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1_vnum_s64_m9: ++** decb x0, all, mul #9 ++** ld1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_s64_m9, svint64_t, int64_t, ++ z0 = svld1_vnum_s64 (p0, x0, -9), ++ z0 = svld1_vnum (p0, x0, -9)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ld1_vnum_s64_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ld1d z0\.d, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_s64_x1, svint64_t, int64_t, ++ z0 = svld1_vnum_s64 (p0, x0, x1), ++ z0 = svld1_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_s8.c +new file mode 100644 +index 000000000..036fb3d41 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_s8.c +@@ -0,0 +1,162 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1_s8_base: ++** ld1b z0\.b, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_s8_base, svint8_t, int8_t, ++ z0 = svld1_s8 (p0, x0), ++ z0 = svld1 (p0, x0)) ++ ++/* ++** ld1_s8_index: ++** ld1b z0\.b, p0/z, \[x0, x1\] ++** ret ++*/ ++TEST_LOAD (ld1_s8_index, svint8_t, int8_t, ++ z0 = svld1_s8 (p0, x0 + x1), ++ z0 = svld1 (p0, x0 + x1)) ++ ++/* ++** ld1_s8_1: ++** ld1b z0\.b, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_s8_1, svint8_t, int8_t, ++ z0 = svld1_s8 (p0, x0 + svcntb ()), ++ z0 = svld1 (p0, x0 + svcntb ())) ++ ++/* ++** ld1_s8_7: ++** ld1b z0\.b, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_s8_7, svint8_t, int8_t, ++ z0 = svld1_s8 (p0, x0 + svcntb () * 7), ++ z0 = svld1 (p0, x0 + svcntb () * 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1_s8_8: ++** incb x0, all, mul #8 ++** ld1b z0\.b, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_s8_8, svint8_t, int8_t, ++ z0 = svld1_s8 (p0, x0 + svcntb () * 8), ++ z0 = svld1 (p0, x0 + svcntb () * 8)) ++ ++/* ++** ld1_s8_m1: ++** ld1b z0\.b, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_s8_m1, svint8_t, int8_t, ++ z0 = svld1_s8 (p0, x0 - svcntb ()), ++ z0 = svld1 (p0, x0 - svcntb ())) ++ ++/* ++** ld1_s8_m8: ++** ld1b z0\.b, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_s8_m8, svint8_t, int8_t, ++ z0 = svld1_s8 (p0, x0 - svcntb () * 8), ++ z0 = svld1 (p0, x0 - svcntb () * 8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1_s8_m9: ++** decb x0, all, mul #9 ++** ld1b z0\.b, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_s8_m9, svint8_t, int8_t, ++ z0 = svld1_s8 (p0, x0 - svcntb () * 9), ++ z0 = svld1 (p0, x0 - svcntb () * 9)) ++ ++/* ++** ld1_vnum_s8_0: ++** ld1b z0\.b, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_s8_0, svint8_t, int8_t, ++ z0 = svld1_vnum_s8 (p0, x0, 0), ++ z0 = svld1_vnum (p0, x0, 0)) ++ ++/* ++** ld1_vnum_s8_1: ++** ld1b z0\.b, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_s8_1, svint8_t, int8_t, ++ z0 = svld1_vnum_s8 (p0, x0, 1), ++ z0 = svld1_vnum (p0, x0, 1)) ++ ++/* ++** ld1_vnum_s8_7: ++** ld1b z0\.b, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_s8_7, svint8_t, int8_t, ++ z0 = svld1_vnum_s8 (p0, x0, 7), ++ z0 = svld1_vnum (p0, x0, 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1_vnum_s8_8: ++** incb x0, all, mul #8 ++** ld1b z0\.b, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_s8_8, svint8_t, int8_t, ++ z0 = svld1_vnum_s8 (p0, x0, 8), ++ z0 = svld1_vnum (p0, x0, 8)) ++ ++/* ++** ld1_vnum_s8_m1: ++** ld1b z0\.b, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_s8_m1, svint8_t, int8_t, ++ z0 = svld1_vnum_s8 (p0, x0, -1), ++ z0 = svld1_vnum (p0, x0, -1)) ++ ++/* ++** ld1_vnum_s8_m8: ++** ld1b z0\.b, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_s8_m8, svint8_t, int8_t, ++ z0 = svld1_vnum_s8 (p0, x0, -8), ++ z0 = svld1_vnum (p0, x0, -8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1_vnum_s8_m9: ++** decb x0, all, mul #9 ++** ld1b z0\.b, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_s8_m9, svint8_t, int8_t, ++ z0 = svld1_vnum_s8 (p0, x0, -9), ++ z0 = svld1_vnum (p0, x0, -9)) ++ ++/* ++** ld1_vnum_s8_x1: ++** cntb (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ld1b z0\.b, p0/z, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** ld1b z0\.b, p0/z, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_LOAD (ld1_vnum_s8_x1, svint8_t, int8_t, ++ z0 = svld1_vnum_s8 (p0, x0, x1), ++ z0 = svld1_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_u16.c +new file mode 100644 +index 000000000..ee25b9e37 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_u16.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1_u16_base: ++** ld1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_u16_base, svuint16_t, uint16_t, ++ z0 = svld1_u16 (p0, x0), ++ z0 = svld1 (p0, x0)) ++ ++/* ++** ld1_u16_index: ++** ld1h z0\.h, p0/z, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_LOAD (ld1_u16_index, svuint16_t, uint16_t, ++ z0 = svld1_u16 (p0, x0 + x1), ++ z0 = svld1 (p0, x0 + x1)) ++ ++/* ++** ld1_u16_1: ++** ld1h z0\.h, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_u16_1, svuint16_t, uint16_t, ++ z0 = svld1_u16 (p0, x0 + svcnth ()), ++ z0 = svld1 (p0, x0 + svcnth ())) ++ ++/* ++** ld1_u16_7: ++** ld1h z0\.h, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_u16_7, svuint16_t, uint16_t, ++ z0 = svld1_u16 (p0, x0 + svcnth () * 7), ++ z0 = svld1 (p0, x0 + svcnth () * 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1_u16_8: ++** incb x0, all, mul #8 ++** ld1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_u16_8, svuint16_t, uint16_t, ++ z0 = svld1_u16 (p0, x0 + svcnth () * 8), ++ z0 = svld1 (p0, x0 + svcnth () * 8)) ++ ++/* ++** ld1_u16_m1: ++** ld1h z0\.h, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_u16_m1, svuint16_t, uint16_t, ++ z0 = svld1_u16 (p0, x0 - svcnth ()), ++ z0 = svld1 (p0, x0 - svcnth ())) ++ ++/* ++** ld1_u16_m8: ++** ld1h z0\.h, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_u16_m8, svuint16_t, uint16_t, ++ z0 = svld1_u16 (p0, x0 - svcnth () * 8), ++ z0 = svld1 (p0, x0 - svcnth () * 8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1_u16_m9: ++** decb x0, all, mul #9 ++** ld1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_u16_m9, svuint16_t, uint16_t, ++ z0 = svld1_u16 (p0, x0 - svcnth () * 9), ++ z0 = svld1 (p0, x0 - svcnth () * 9)) ++ ++/* ++** ld1_vnum_u16_0: ++** ld1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_u16_0, svuint16_t, uint16_t, ++ z0 = svld1_vnum_u16 (p0, x0, 0), ++ z0 = svld1_vnum (p0, x0, 0)) ++ ++/* ++** ld1_vnum_u16_1: ++** ld1h z0\.h, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_u16_1, svuint16_t, uint16_t, ++ z0 = svld1_vnum_u16 (p0, x0, 1), ++ z0 = svld1_vnum (p0, x0, 1)) ++ ++/* ++** ld1_vnum_u16_7: ++** ld1h z0\.h, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_u16_7, svuint16_t, uint16_t, ++ z0 = svld1_vnum_u16 (p0, x0, 7), ++ z0 = svld1_vnum (p0, x0, 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1_vnum_u16_8: ++** incb x0, all, mul #8 ++** ld1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_u16_8, svuint16_t, uint16_t, ++ z0 = svld1_vnum_u16 (p0, x0, 8), ++ z0 = svld1_vnum (p0, x0, 8)) ++ ++/* ++** ld1_vnum_u16_m1: ++** ld1h z0\.h, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_u16_m1, svuint16_t, uint16_t, ++ z0 = svld1_vnum_u16 (p0, x0, -1), ++ z0 = svld1_vnum (p0, x0, -1)) ++ ++/* ++** ld1_vnum_u16_m8: ++** ld1h z0\.h, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_u16_m8, svuint16_t, uint16_t, ++ z0 = svld1_vnum_u16 (p0, x0, -8), ++ z0 = svld1_vnum (p0, x0, -8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1_vnum_u16_m9: ++** decb x0, all, mul #9 ++** ld1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_u16_m9, svuint16_t, uint16_t, ++ z0 = svld1_vnum_u16 (p0, x0, -9), ++ z0 = svld1_vnum (p0, x0, -9)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ld1_vnum_u16_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ld1h z0\.h, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_u16_x1, svuint16_t, uint16_t, ++ z0 = svld1_vnum_u16 (p0, x0, x1), ++ z0 = svld1_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_u32.c +new file mode 100644 +index 000000000..bcd304126 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_u32.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1_u32_base: ++** ld1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_u32_base, svuint32_t, uint32_t, ++ z0 = svld1_u32 (p0, x0), ++ z0 = svld1 (p0, x0)) ++ ++/* ++** ld1_u32_index: ++** ld1w z0\.s, p0/z, \[x0, x1, lsl 2\] ++** ret ++*/ ++TEST_LOAD (ld1_u32_index, svuint32_t, uint32_t, ++ z0 = svld1_u32 (p0, x0 + x1), ++ z0 = svld1 (p0, x0 + x1)) ++ ++/* ++** ld1_u32_1: ++** ld1w z0\.s, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_u32_1, svuint32_t, uint32_t, ++ z0 = svld1_u32 (p0, x0 + svcntw ()), ++ z0 = svld1 (p0, x0 + svcntw ())) ++ ++/* ++** ld1_u32_7: ++** ld1w z0\.s, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_u32_7, svuint32_t, uint32_t, ++ z0 = svld1_u32 (p0, x0 + svcntw () * 7), ++ z0 = svld1 (p0, x0 + svcntw () * 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1_u32_8: ++** incb x0, all, mul #8 ++** ld1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_u32_8, svuint32_t, uint32_t, ++ z0 = svld1_u32 (p0, x0 + svcntw () * 8), ++ z0 = svld1 (p0, x0 + svcntw () * 8)) ++ ++/* ++** ld1_u32_m1: ++** ld1w z0\.s, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_u32_m1, svuint32_t, uint32_t, ++ z0 = svld1_u32 (p0, x0 - svcntw ()), ++ z0 = svld1 (p0, x0 - svcntw ())) ++ ++/* ++** ld1_u32_m8: ++** ld1w z0\.s, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_u32_m8, svuint32_t, uint32_t, ++ z0 = svld1_u32 (p0, x0 - svcntw () * 8), ++ z0 = svld1 (p0, x0 - svcntw () * 8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1_u32_m9: ++** decb x0, all, mul #9 ++** ld1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_u32_m9, svuint32_t, uint32_t, ++ z0 = svld1_u32 (p0, x0 - svcntw () * 9), ++ z0 = svld1 (p0, x0 - svcntw () * 9)) ++ ++/* ++** ld1_vnum_u32_0: ++** ld1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_u32_0, svuint32_t, uint32_t, ++ z0 = svld1_vnum_u32 (p0, x0, 0), ++ z0 = svld1_vnum (p0, x0, 0)) ++ ++/* ++** ld1_vnum_u32_1: ++** ld1w z0\.s, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_u32_1, svuint32_t, uint32_t, ++ z0 = svld1_vnum_u32 (p0, x0, 1), ++ z0 = svld1_vnum (p0, x0, 1)) ++ ++/* ++** ld1_vnum_u32_7: ++** ld1w z0\.s, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_u32_7, svuint32_t, uint32_t, ++ z0 = svld1_vnum_u32 (p0, x0, 7), ++ z0 = svld1_vnum (p0, x0, 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1_vnum_u32_8: ++** incb x0, all, mul #8 ++** ld1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_u32_8, svuint32_t, uint32_t, ++ z0 = svld1_vnum_u32 (p0, x0, 8), ++ z0 = svld1_vnum (p0, x0, 8)) ++ ++/* ++** ld1_vnum_u32_m1: ++** ld1w z0\.s, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_u32_m1, svuint32_t, uint32_t, ++ z0 = svld1_vnum_u32 (p0, x0, -1), ++ z0 = svld1_vnum (p0, x0, -1)) ++ ++/* ++** ld1_vnum_u32_m8: ++** ld1w z0\.s, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_u32_m8, svuint32_t, uint32_t, ++ z0 = svld1_vnum_u32 (p0, x0, -8), ++ z0 = svld1_vnum (p0, x0, -8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1_vnum_u32_m9: ++** decb x0, all, mul #9 ++** ld1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_u32_m9, svuint32_t, uint32_t, ++ z0 = svld1_vnum_u32 (p0, x0, -9), ++ z0 = svld1_vnum (p0, x0, -9)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ld1_vnum_u32_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ld1w z0\.s, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_u32_x1, svuint32_t, uint32_t, ++ z0 = svld1_vnum_u32 (p0, x0, x1), ++ z0 = svld1_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_u64.c +new file mode 100644 +index 000000000..ebb874720 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_u64.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1_u64_base: ++** ld1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_u64_base, svuint64_t, uint64_t, ++ z0 = svld1_u64 (p0, x0), ++ z0 = svld1 (p0, x0)) ++ ++/* ++** ld1_u64_index: ++** ld1d z0\.d, p0/z, \[x0, x1, lsl 3\] ++** ret ++*/ ++TEST_LOAD (ld1_u64_index, svuint64_t, uint64_t, ++ z0 = svld1_u64 (p0, x0 + x1), ++ z0 = svld1 (p0, x0 + x1)) ++ ++/* ++** ld1_u64_1: ++** ld1d z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_u64_1, svuint64_t, uint64_t, ++ z0 = svld1_u64 (p0, x0 + svcntd ()), ++ z0 = svld1 (p0, x0 + svcntd ())) ++ ++/* ++** ld1_u64_7: ++** ld1d z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_u64_7, svuint64_t, uint64_t, ++ z0 = svld1_u64 (p0, x0 + svcntd () * 7), ++ z0 = svld1 (p0, x0 + svcntd () * 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1_u64_8: ++** incb x0, all, mul #8 ++** ld1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_u64_8, svuint64_t, uint64_t, ++ z0 = svld1_u64 (p0, x0 + svcntd () * 8), ++ z0 = svld1 (p0, x0 + svcntd () * 8)) ++ ++/* ++** ld1_u64_m1: ++** ld1d z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_u64_m1, svuint64_t, uint64_t, ++ z0 = svld1_u64 (p0, x0 - svcntd ()), ++ z0 = svld1 (p0, x0 - svcntd ())) ++ ++/* ++** ld1_u64_m8: ++** ld1d z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_u64_m8, svuint64_t, uint64_t, ++ z0 = svld1_u64 (p0, x0 - svcntd () * 8), ++ z0 = svld1 (p0, x0 - svcntd () * 8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1_u64_m9: ++** decb x0, all, mul #9 ++** ld1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_u64_m9, svuint64_t, uint64_t, ++ z0 = svld1_u64 (p0, x0 - svcntd () * 9), ++ z0 = svld1 (p0, x0 - svcntd () * 9)) ++ ++/* ++** ld1_vnum_u64_0: ++** ld1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_u64_0, svuint64_t, uint64_t, ++ z0 = svld1_vnum_u64 (p0, x0, 0), ++ z0 = svld1_vnum (p0, x0, 0)) ++ ++/* ++** ld1_vnum_u64_1: ++** ld1d z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_u64_1, svuint64_t, uint64_t, ++ z0 = svld1_vnum_u64 (p0, x0, 1), ++ z0 = svld1_vnum (p0, x0, 1)) ++ ++/* ++** ld1_vnum_u64_7: ++** ld1d z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_u64_7, svuint64_t, uint64_t, ++ z0 = svld1_vnum_u64 (p0, x0, 7), ++ z0 = svld1_vnum (p0, x0, 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1_vnum_u64_8: ++** incb x0, all, mul #8 ++** ld1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_u64_8, svuint64_t, uint64_t, ++ z0 = svld1_vnum_u64 (p0, x0, 8), ++ z0 = svld1_vnum (p0, x0, 8)) ++ ++/* ++** ld1_vnum_u64_m1: ++** ld1d z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_u64_m1, svuint64_t, uint64_t, ++ z0 = svld1_vnum_u64 (p0, x0, -1), ++ z0 = svld1_vnum (p0, x0, -1)) ++ ++/* ++** ld1_vnum_u64_m8: ++** ld1d z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_u64_m8, svuint64_t, uint64_t, ++ z0 = svld1_vnum_u64 (p0, x0, -8), ++ z0 = svld1_vnum (p0, x0, -8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1_vnum_u64_m9: ++** decb x0, all, mul #9 ++** ld1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_u64_m9, svuint64_t, uint64_t, ++ z0 = svld1_vnum_u64 (p0, x0, -9), ++ z0 = svld1_vnum (p0, x0, -9)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ld1_vnum_u64_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ld1d z0\.d, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_u64_x1, svuint64_t, uint64_t, ++ z0 = svld1_vnum_u64 (p0, x0, x1), ++ z0 = svld1_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_u8.c +new file mode 100644 +index 000000000..12f42bd92 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_u8.c +@@ -0,0 +1,162 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1_u8_base: ++** ld1b z0\.b, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_u8_base, svuint8_t, uint8_t, ++ z0 = svld1_u8 (p0, x0), ++ z0 = svld1 (p0, x0)) ++ ++/* ++** ld1_u8_index: ++** ld1b z0\.b, p0/z, \[x0, x1\] ++** ret ++*/ ++TEST_LOAD (ld1_u8_index, svuint8_t, uint8_t, ++ z0 = svld1_u8 (p0, x0 + x1), ++ z0 = svld1 (p0, x0 + x1)) ++ ++/* ++** ld1_u8_1: ++** ld1b z0\.b, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_u8_1, svuint8_t, uint8_t, ++ z0 = svld1_u8 (p0, x0 + svcntb ()), ++ z0 = svld1 (p0, x0 + svcntb ())) ++ ++/* ++** ld1_u8_7: ++** ld1b z0\.b, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_u8_7, svuint8_t, uint8_t, ++ z0 = svld1_u8 (p0, x0 + svcntb () * 7), ++ z0 = svld1 (p0, x0 + svcntb () * 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1_u8_8: ++** incb x0, all, mul #8 ++** ld1b z0\.b, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_u8_8, svuint8_t, uint8_t, ++ z0 = svld1_u8 (p0, x0 + svcntb () * 8), ++ z0 = svld1 (p0, x0 + svcntb () * 8)) ++ ++/* ++** ld1_u8_m1: ++** ld1b z0\.b, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_u8_m1, svuint8_t, uint8_t, ++ z0 = svld1_u8 (p0, x0 - svcntb ()), ++ z0 = svld1 (p0, x0 - svcntb ())) ++ ++/* ++** ld1_u8_m8: ++** ld1b z0\.b, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_u8_m8, svuint8_t, uint8_t, ++ z0 = svld1_u8 (p0, x0 - svcntb () * 8), ++ z0 = svld1 (p0, x0 - svcntb () * 8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1_u8_m9: ++** decb x0, all, mul #9 ++** ld1b z0\.b, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_u8_m9, svuint8_t, uint8_t, ++ z0 = svld1_u8 (p0, x0 - svcntb () * 9), ++ z0 = svld1 (p0, x0 - svcntb () * 9)) ++ ++/* ++** ld1_vnum_u8_0: ++** ld1b z0\.b, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_u8_0, svuint8_t, uint8_t, ++ z0 = svld1_vnum_u8 (p0, x0, 0), ++ z0 = svld1_vnum (p0, x0, 0)) ++ ++/* ++** ld1_vnum_u8_1: ++** ld1b z0\.b, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_u8_1, svuint8_t, uint8_t, ++ z0 = svld1_vnum_u8 (p0, x0, 1), ++ z0 = svld1_vnum (p0, x0, 1)) ++ ++/* ++** ld1_vnum_u8_7: ++** ld1b z0\.b, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_u8_7, svuint8_t, uint8_t, ++ z0 = svld1_vnum_u8 (p0, x0, 7), ++ z0 = svld1_vnum (p0, x0, 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1_vnum_u8_8: ++** incb x0, all, mul #8 ++** ld1b z0\.b, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_u8_8, svuint8_t, uint8_t, ++ z0 = svld1_vnum_u8 (p0, x0, 8), ++ z0 = svld1_vnum (p0, x0, 8)) ++ ++/* ++** ld1_vnum_u8_m1: ++** ld1b z0\.b, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_u8_m1, svuint8_t, uint8_t, ++ z0 = svld1_vnum_u8 (p0, x0, -1), ++ z0 = svld1_vnum (p0, x0, -1)) ++ ++/* ++** ld1_vnum_u8_m8: ++** ld1b z0\.b, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_u8_m8, svuint8_t, uint8_t, ++ z0 = svld1_vnum_u8 (p0, x0, -8), ++ z0 = svld1_vnum (p0, x0, -8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1_vnum_u8_m9: ++** decb x0, all, mul #9 ++** ld1b z0\.b, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1_vnum_u8_m9, svuint8_t, uint8_t, ++ z0 = svld1_vnum_u8 (p0, x0, -9), ++ z0 = svld1_vnum (p0, x0, -9)) ++ ++/* ++** ld1_vnum_u8_x1: ++** cntb (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ld1b z0\.b, p0/z, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** ld1b z0\.b, p0/z, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_LOAD (ld1_vnum_u8_x1, svuint8_t, uint8_t, ++ z0 = svld1_vnum_u8 (p0, x0, x1), ++ z0 = svld1_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_bf16.c +new file mode 100644 +index 000000000..cb1801778 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_bf16.c +@@ -0,0 +1,120 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++/* { dg-additional-options "-march=armv8.6-a+f64mm" } */ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1ro_bf16_base: ++** ld1roh z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1ro_bf16_base, svbfloat16_t, bfloat16_t, ++ z0 = svld1ro_bf16 (p0, x0), ++ z0 = svld1ro (p0, x0)) ++ ++/* ++** ld1ro_bf16_index: ++** ld1roh z0\.h, p0/z, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_bf16_index, svbfloat16_t, bfloat16_t, ++ z0 = svld1ro_bf16 (p0, x0 + x1), ++ z0 = svld1ro (p0, x0 + x1)) ++ ++/* ++** ld1ro_bf16_1: ++** add (x[0-9]+), x0, #?2 ++** ld1roh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_bf16_1, svbfloat16_t, bfloat16_t, ++ z0 = svld1ro_bf16 (p0, x0 + 1), ++ z0 = svld1ro (p0, x0 + 1)) ++ ++/* ++** ld1ro_bf16_8: ++** add (x[0-9]+), x0, #?16 ++** ld1roh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_bf16_8, svbfloat16_t, bfloat16_t, ++ z0 = svld1ro_bf16 (p0, x0 + 8), ++ z0 = svld1ro (p0, x0 + 8)) ++ ++/* ++** ld1ro_bf16_128: ++** add (x[0-9]+), x0, #?256 ++** ld1roh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_bf16_128, svbfloat16_t, bfloat16_t, ++ z0 = svld1ro_bf16 (p0, x0 + 128), ++ z0 = svld1ro (p0, x0 + 128)) ++ ++/* ++** ld1ro_bf16_m1: ++** sub (x[0-9]+), x0, #?2 ++** ld1roh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_bf16_m1, svbfloat16_t, bfloat16_t, ++ z0 = svld1ro_bf16 (p0, x0 - 1), ++ z0 = svld1ro (p0, x0 - 1)) ++ ++/* ++** ld1ro_bf16_m8: ++** sub (x[0-9]+), x0, #?16 ++** ld1roh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_bf16_m8, svbfloat16_t, bfloat16_t, ++ z0 = svld1ro_bf16 (p0, x0 - 8), ++ z0 = svld1ro (p0, x0 - 8)) ++ ++/* ++** ld1ro_bf16_m144: ++** sub (x[0-9]+), x0, #?288 ++** ld1roh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_bf16_m144, svbfloat16_t, bfloat16_t, ++ z0 = svld1ro_bf16 (p0, x0 - 144), ++ z0 = svld1ro (p0, x0 - 144)) ++ ++/* ++** ld1ro_bf16_16: ++** ld1roh z0\.h, p0/z, \[x0, #?32\] ++** ret ++*/ ++TEST_LOAD (ld1ro_bf16_16, svbfloat16_t, bfloat16_t, ++ z0 = svld1ro_bf16 (p0, x0 + 16), ++ z0 = svld1ro (p0, x0 + 16)) ++ ++/* ++** ld1ro_bf16_112: ++** ld1roh z0\.h, p0/z, \[x0, #?224\] ++** ret ++*/ ++TEST_LOAD (ld1ro_bf16_112, svbfloat16_t, bfloat16_t, ++ z0 = svld1ro_bf16 (p0, x0 + 112), ++ z0 = svld1ro (p0, x0 + 112)) ++ ++/* ++** ld1ro_bf16_m16: ++** ld1roh z0\.h, p0/z, \[x0, #?-32\] ++** ret ++*/ ++TEST_LOAD (ld1ro_bf16_m16, svbfloat16_t, bfloat16_t, ++ z0 = svld1ro_bf16 (p0, x0 - 16), ++ z0 = svld1ro (p0, x0 - 16)) ++ ++/* ++** ld1ro_bf16_m128: ++** ld1roh z0\.h, p0/z, \[x0, #?-256\] ++** ret ++*/ ++TEST_LOAD (ld1ro_bf16_m128, svbfloat16_t, bfloat16_t, ++ z0 = svld1ro_bf16 (p0, x0 - 128), ++ z0 = svld1ro (p0, x0 - 128)) ++ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_f16.c +new file mode 100644 +index 000000000..86081edbd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_f16.c +@@ -0,0 +1,120 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++/* { dg-additional-options "-march=armv8.6-a+f64mm" } */ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1ro_f16_base: ++** ld1roh z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1ro_f16_base, svfloat16_t, float16_t, ++ z0 = svld1ro_f16 (p0, x0), ++ z0 = svld1ro (p0, x0)) ++ ++/* ++** ld1ro_f16_index: ++** ld1roh z0\.h, p0/z, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_f16_index, svfloat16_t, float16_t, ++ z0 = svld1ro_f16 (p0, x0 + x1), ++ z0 = svld1ro (p0, x0 + x1)) ++ ++/* ++** ld1ro_f16_1: ++** add (x[0-9]+), x0, #?2 ++** ld1roh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_f16_1, svfloat16_t, float16_t, ++ z0 = svld1ro_f16 (p0, x0 + 1), ++ z0 = svld1ro (p0, x0 + 1)) ++ ++/* ++** ld1ro_f16_8: ++** add (x[0-9]+), x0, #?16 ++** ld1roh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_f16_8, svfloat16_t, float16_t, ++ z0 = svld1ro_f16 (p0, x0 + 8), ++ z0 = svld1ro (p0, x0 + 8)) ++ ++/* ++** ld1ro_f16_128: ++** add (x[0-9]+), x0, #?256 ++** ld1roh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_f16_128, svfloat16_t, float16_t, ++ z0 = svld1ro_f16 (p0, x0 + 128), ++ z0 = svld1ro (p0, x0 + 128)) ++ ++/* ++** ld1ro_f16_m1: ++** sub (x[0-9]+), x0, #?2 ++** ld1roh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_f16_m1, svfloat16_t, float16_t, ++ z0 = svld1ro_f16 (p0, x0 - 1), ++ z0 = svld1ro (p0, x0 - 1)) ++ ++/* ++** ld1ro_f16_m8: ++** sub (x[0-9]+), x0, #?16 ++** ld1roh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_f16_m8, svfloat16_t, float16_t, ++ z0 = svld1ro_f16 (p0, x0 - 8), ++ z0 = svld1ro (p0, x0 - 8)) ++ ++/* ++** ld1ro_f16_m144: ++** sub (x[0-9]+), x0, #?288 ++** ld1roh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_f16_m144, svfloat16_t, float16_t, ++ z0 = svld1ro_f16 (p0, x0 - 144), ++ z0 = svld1ro (p0, x0 - 144)) ++ ++/* ++** ld1ro_f16_16: ++** ld1roh z0\.h, p0/z, \[x0, #?32\] ++** ret ++*/ ++TEST_LOAD (ld1ro_f16_16, svfloat16_t, float16_t, ++ z0 = svld1ro_f16 (p0, x0 + 16), ++ z0 = svld1ro (p0, x0 + 16)) ++ ++/* ++** ld1ro_f16_112: ++** ld1roh z0\.h, p0/z, \[x0, #?224\] ++** ret ++*/ ++TEST_LOAD (ld1ro_f16_112, svfloat16_t, float16_t, ++ z0 = svld1ro_f16 (p0, x0 + 112), ++ z0 = svld1ro (p0, x0 + 112)) ++ ++/* ++** ld1ro_f16_m16: ++** ld1roh z0\.h, p0/z, \[x0, #?-32\] ++** ret ++*/ ++TEST_LOAD (ld1ro_f16_m16, svfloat16_t, float16_t, ++ z0 = svld1ro_f16 (p0, x0 - 16), ++ z0 = svld1ro (p0, x0 - 16)) ++ ++/* ++** ld1ro_f16_m128: ++** ld1roh z0\.h, p0/z, \[x0, #?-256\] ++** ret ++*/ ++TEST_LOAD (ld1ro_f16_m128, svfloat16_t, float16_t, ++ z0 = svld1ro_f16 (p0, x0 - 128), ++ z0 = svld1ro (p0, x0 - 128)) ++ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_f32.c +new file mode 100644 +index 000000000..c8df00f8a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_f32.c +@@ -0,0 +1,120 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++/* { dg-additional-options "-march=armv8.6-a+f64mm" } */ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1ro_f32_base: ++** ld1row z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1ro_f32_base, svfloat32_t, float32_t, ++ z0 = svld1ro_f32 (p0, x0), ++ z0 = svld1ro (p0, x0)) ++ ++/* ++** ld1ro_f32_index: ++** ld1row z0\.s, p0/z, \[x0, x1, lsl 2\] ++** ret ++*/ ++TEST_LOAD (ld1ro_f32_index, svfloat32_t, float32_t, ++ z0 = svld1ro_f32 (p0, x0 + x1), ++ z0 = svld1ro (p0, x0 + x1)) ++ ++/* ++** ld1ro_f32_1: ++** add (x[0-9]+), x0, #?4 ++** ld1row z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_f32_1, svfloat32_t, float32_t, ++ z0 = svld1ro_f32 (p0, x0 + 1), ++ z0 = svld1ro (p0, x0 + 1)) ++ ++/* ++** ld1ro_f32_4: ++** add (x[0-9]+), x0, #?16 ++** ld1row z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_f32_4, svfloat32_t, float32_t, ++ z0 = svld1ro_f32 (p0, x0 + 4), ++ z0 = svld1ro (p0, x0 + 4)) ++ ++/* ++** ld1ro_f32_64: ++** add (x[0-9]+), x0, #?256 ++** ld1row z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_f32_64, svfloat32_t, float32_t, ++ z0 = svld1ro_f32 (p0, x0 + 64), ++ z0 = svld1ro (p0, x0 + 64)) ++ ++/* ++** ld1ro_f32_m1: ++** sub (x[0-9]+), x0, #?4 ++** ld1row z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_f32_m1, svfloat32_t, float32_t, ++ z0 = svld1ro_f32 (p0, x0 - 1), ++ z0 = svld1ro (p0, x0 - 1)) ++ ++/* ++** ld1ro_f32_m4: ++** sub (x[0-9]+), x0, #?16 ++** ld1row z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_f32_m4, svfloat32_t, float32_t, ++ z0 = svld1ro_f32 (p0, x0 - 4), ++ z0 = svld1ro (p0, x0 - 4)) ++ ++/* ++** ld1ro_f32_m72: ++** sub (x[0-9]+), x0, #?288 ++** ld1row z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_f32_m72, svfloat32_t, float32_t, ++ z0 = svld1ro_f32 (p0, x0 - 72), ++ z0 = svld1ro (p0, x0 - 72)) ++ ++/* ++** ld1ro_f32_8: ++** ld1row z0\.s, p0/z, \[x0, #?32\] ++** ret ++*/ ++TEST_LOAD (ld1ro_f32_8, svfloat32_t, float32_t, ++ z0 = svld1ro_f32 (p0, x0 + 8), ++ z0 = svld1ro (p0, x0 + 8)) ++ ++/* ++** ld1ro_f32_56: ++** ld1row z0\.s, p0/z, \[x0, #?224\] ++** ret ++*/ ++TEST_LOAD (ld1ro_f32_56, svfloat32_t, float32_t, ++ z0 = svld1ro_f32 (p0, x0 + 56), ++ z0 = svld1ro (p0, x0 + 56)) ++ ++/* ++** ld1ro_f32_m8: ++** ld1row z0\.s, p0/z, \[x0, #?-32\] ++** ret ++*/ ++TEST_LOAD (ld1ro_f32_m8, svfloat32_t, float32_t, ++ z0 = svld1ro_f32 (p0, x0 - 8), ++ z0 = svld1ro (p0, x0 - 8)) ++ ++/* ++** ld1ro_f32_m64: ++** ld1row z0\.s, p0/z, \[x0, #?-256\] ++** ret ++*/ ++TEST_LOAD (ld1ro_f32_m64, svfloat32_t, float32_t, ++ z0 = svld1ro_f32 (p0, x0 - 64), ++ z0 = svld1ro (p0, x0 - 64)) ++ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_f64.c +new file mode 100644 +index 000000000..2fb9d5b74 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_f64.c +@@ -0,0 +1,120 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++/* { dg-additional-options "-march=armv8.6-a+f64mm" } */ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1ro_f64_base: ++** ld1rod z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1ro_f64_base, svfloat64_t, float64_t, ++ z0 = svld1ro_f64 (p0, x0), ++ z0 = svld1ro (p0, x0)) ++ ++/* ++** ld1ro_f64_index: ++** ld1rod z0\.d, p0/z, \[x0, x1, lsl 3\] ++** ret ++*/ ++TEST_LOAD (ld1ro_f64_index, svfloat64_t, float64_t, ++ z0 = svld1ro_f64 (p0, x0 + x1), ++ z0 = svld1ro (p0, x0 + x1)) ++ ++/* ++** ld1ro_f64_1: ++** add (x[0-9]+), x0, #?8 ++** ld1rod z0\.d, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_f64_1, svfloat64_t, float64_t, ++ z0 = svld1ro_f64 (p0, x0 + 1), ++ z0 = svld1ro (p0, x0 + 1)) ++ ++/* ++** ld1ro_f64_2: ++** add (x[0-9]+), x0, #?16 ++** ld1rod z0\.d, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_f64_2, svfloat64_t, float64_t, ++ z0 = svld1ro_f64 (p0, x0 + 2), ++ z0 = svld1ro (p0, x0 + 2)) ++ ++/* ++** ld1ro_f64_32: ++** add (x[0-9]+), x0, #?256 ++** ld1rod z0\.d, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_f64_32, svfloat64_t, float64_t, ++ z0 = svld1ro_f64 (p0, x0 + 32), ++ z0 = svld1ro (p0, x0 + 32)) ++ ++/* ++** ld1ro_f64_m1: ++** sub (x[0-9]+), x0, #?8 ++** ld1rod z0\.d, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_f64_m1, svfloat64_t, float64_t, ++ z0 = svld1ro_f64 (p0, x0 - 1), ++ z0 = svld1ro (p0, x0 - 1)) ++ ++/* ++** ld1ro_f64_m2: ++** sub (x[0-9]+), x0, #?16 ++** ld1rod z0\.d, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_f64_m2, svfloat64_t, float64_t, ++ z0 = svld1ro_f64 (p0, x0 - 2), ++ z0 = svld1ro (p0, x0 - 2)) ++ ++/* ++** ld1ro_f64_m36: ++** sub (x[0-9]+), x0, #?288 ++** ld1rod z0\.d, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_f64_m36, svfloat64_t, float64_t, ++ z0 = svld1ro_f64 (p0, x0 - 36), ++ z0 = svld1ro (p0, x0 - 36)) ++ ++/* ++** ld1ro_f64_4: ++** ld1rod z0\.d, p0/z, \[x0, #?32\] ++** ret ++*/ ++TEST_LOAD (ld1ro_f64_4, svfloat64_t, float64_t, ++ z0 = svld1ro_f64 (p0, x0 + 4), ++ z0 = svld1ro (p0, x0 + 4)) ++ ++/* ++** ld1ro_f64_28: ++** ld1rod z0\.d, p0/z, \[x0, #?224\] ++** ret ++*/ ++TEST_LOAD (ld1ro_f64_28, svfloat64_t, float64_t, ++ z0 = svld1ro_f64 (p0, x0 + 28), ++ z0 = svld1ro (p0, x0 + 28)) ++ ++/* ++** ld1ro_f64_m4: ++** ld1rod z0\.d, p0/z, \[x0, #?-32\] ++** ret ++*/ ++TEST_LOAD (ld1ro_f64_m4, svfloat64_t, float64_t, ++ z0 = svld1ro_f64 (p0, x0 - 4), ++ z0 = svld1ro (p0, x0 - 4)) ++ ++/* ++** ld1ro_f64_m32: ++** ld1rod z0\.d, p0/z, \[x0, #?-256\] ++** ret ++*/ ++TEST_LOAD (ld1ro_f64_m32, svfloat64_t, float64_t, ++ z0 = svld1ro_f64 (p0, x0 - 32), ++ z0 = svld1ro (p0, x0 - 32)) ++ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s16.c +new file mode 100644 +index 000000000..3cd211b16 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s16.c +@@ -0,0 +1,120 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++/* { dg-additional-options "-march=armv8.6-a+f64mm" } */ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1ro_s16_base: ++** ld1roh z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1ro_s16_base, svint16_t, int16_t, ++ z0 = svld1ro_s16 (p0, x0), ++ z0 = svld1ro (p0, x0)) ++ ++/* ++** ld1ro_s16_index: ++** ld1roh z0\.h, p0/z, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_s16_index, svint16_t, int16_t, ++ z0 = svld1ro_s16 (p0, x0 + x1), ++ z0 = svld1ro (p0, x0 + x1)) ++ ++/* ++** ld1ro_s16_1: ++** add (x[0-9]+), x0, #?2 ++** ld1roh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_s16_1, svint16_t, int16_t, ++ z0 = svld1ro_s16 (p0, x0 + 1), ++ z0 = svld1ro (p0, x0 + 1)) ++ ++/* ++** ld1ro_s16_8: ++** add (x[0-9]+), x0, #?16 ++** ld1roh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_s16_8, svint16_t, int16_t, ++ z0 = svld1ro_s16 (p0, x0 + 8), ++ z0 = svld1ro (p0, x0 + 8)) ++ ++/* ++** ld1ro_s16_128: ++** add (x[0-9]+), x0, #?256 ++** ld1roh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_s16_128, svint16_t, int16_t, ++ z0 = svld1ro_s16 (p0, x0 + 128), ++ z0 = svld1ro (p0, x0 + 128)) ++ ++/* ++** ld1ro_s16_m1: ++** sub (x[0-9]+), x0, #?2 ++** ld1roh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_s16_m1, svint16_t, int16_t, ++ z0 = svld1ro_s16 (p0, x0 - 1), ++ z0 = svld1ro (p0, x0 - 1)) ++ ++/* ++** ld1ro_s16_m8: ++** sub (x[0-9]+), x0, #?16 ++** ld1roh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_s16_m8, svint16_t, int16_t, ++ z0 = svld1ro_s16 (p0, x0 - 8), ++ z0 = svld1ro (p0, x0 - 8)) ++ ++/* ++** ld1ro_s16_m144: ++** sub (x[0-9]+), x0, #?288 ++** ld1roh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_s16_m144, svint16_t, int16_t, ++ z0 = svld1ro_s16 (p0, x0 - 144), ++ z0 = svld1ro (p0, x0 - 144)) ++ ++/* ++** ld1ro_s16_16: ++** ld1roh z0\.h, p0/z, \[x0, #?32\] ++** ret ++*/ ++TEST_LOAD (ld1ro_s16_16, svint16_t, int16_t, ++ z0 = svld1ro_s16 (p0, x0 + 16), ++ z0 = svld1ro (p0, x0 + 16)) ++ ++/* ++** ld1ro_s16_112: ++** ld1roh z0\.h, p0/z, \[x0, #?224\] ++** ret ++*/ ++TEST_LOAD (ld1ro_s16_112, svint16_t, int16_t, ++ z0 = svld1ro_s16 (p0, x0 + 112), ++ z0 = svld1ro (p0, x0 + 112)) ++ ++/* ++** ld1ro_s16_m16: ++** ld1roh z0\.h, p0/z, \[x0, #?-32\] ++** ret ++*/ ++TEST_LOAD (ld1ro_s16_m16, svint16_t, int16_t, ++ z0 = svld1ro_s16 (p0, x0 - 16), ++ z0 = svld1ro (p0, x0 - 16)) ++ ++/* ++** ld1ro_s16_m128: ++** ld1roh z0\.h, p0/z, \[x0, #?-256\] ++** ret ++*/ ++TEST_LOAD (ld1ro_s16_m128, svint16_t, int16_t, ++ z0 = svld1ro_s16 (p0, x0 - 128), ++ z0 = svld1ro (p0, x0 - 128)) ++ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s32.c +new file mode 100644 +index 000000000..44b16ed5f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s32.c +@@ -0,0 +1,120 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++/* { dg-additional-options "-march=armv8.6-a+f64mm" } */ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1ro_s32_base: ++** ld1row z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1ro_s32_base, svint32_t, int32_t, ++ z0 = svld1ro_s32 (p0, x0), ++ z0 = svld1ro (p0, x0)) ++ ++/* ++** ld1ro_s32_index: ++** ld1row z0\.s, p0/z, \[x0, x1, lsl 2\] ++** ret ++*/ ++TEST_LOAD (ld1ro_s32_index, svint32_t, int32_t, ++ z0 = svld1ro_s32 (p0, x0 + x1), ++ z0 = svld1ro (p0, x0 + x1)) ++ ++/* ++** ld1ro_s32_1: ++** add (x[0-9]+), x0, #?4 ++** ld1row z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_s32_1, svint32_t, int32_t, ++ z0 = svld1ro_s32 (p0, x0 + 1), ++ z0 = svld1ro (p0, x0 + 1)) ++ ++/* ++** ld1ro_s32_4: ++** add (x[0-9]+), x0, #?16 ++** ld1row z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_s32_4, svint32_t, int32_t, ++ z0 = svld1ro_s32 (p0, x0 + 4), ++ z0 = svld1ro (p0, x0 + 4)) ++ ++/* ++** ld1ro_s32_64: ++** add (x[0-9]+), x0, #?256 ++** ld1row z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_s32_64, svint32_t, int32_t, ++ z0 = svld1ro_s32 (p0, x0 + 64), ++ z0 = svld1ro (p0, x0 + 64)) ++ ++/* ++** ld1ro_s32_m1: ++** sub (x[0-9]+), x0, #?4 ++** ld1row z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_s32_m1, svint32_t, int32_t, ++ z0 = svld1ro_s32 (p0, x0 - 1), ++ z0 = svld1ro (p0, x0 - 1)) ++ ++/* ++** ld1ro_s32_m4: ++** sub (x[0-9]+), x0, #?16 ++** ld1row z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_s32_m4, svint32_t, int32_t, ++ z0 = svld1ro_s32 (p0, x0 - 4), ++ z0 = svld1ro (p0, x0 - 4)) ++ ++/* ++** ld1ro_s32_m72: ++** sub (x[0-9]+), x0, #?288 ++** ld1row z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_s32_m72, svint32_t, int32_t, ++ z0 = svld1ro_s32 (p0, x0 - 72), ++ z0 = svld1ro (p0, x0 - 72)) ++ ++/* ++** ld1ro_s32_8: ++** ld1row z0\.s, p0/z, \[x0, #?32\] ++** ret ++*/ ++TEST_LOAD (ld1ro_s32_8, svint32_t, int32_t, ++ z0 = svld1ro_s32 (p0, x0 + 8), ++ z0 = svld1ro (p0, x0 + 8)) ++ ++/* ++** ld1ro_s32_56: ++** ld1row z0\.s, p0/z, \[x0, #?224\] ++** ret ++*/ ++TEST_LOAD (ld1ro_s32_56, svint32_t, int32_t, ++ z0 = svld1ro_s32 (p0, x0 + 56), ++ z0 = svld1ro (p0, x0 + 56)) ++ ++/* ++** ld1ro_s32_m8: ++** ld1row z0\.s, p0/z, \[x0, #?-32\] ++** ret ++*/ ++TEST_LOAD (ld1ro_s32_m8, svint32_t, int32_t, ++ z0 = svld1ro_s32 (p0, x0 - 8), ++ z0 = svld1ro (p0, x0 - 8)) ++ ++/* ++** ld1ro_s32_m64: ++** ld1row z0\.s, p0/z, \[x0, #?-256\] ++** ret ++*/ ++TEST_LOAD (ld1ro_s32_m64, svint32_t, int32_t, ++ z0 = svld1ro_s32 (p0, x0 - 64), ++ z0 = svld1ro (p0, x0 - 64)) ++ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s64.c +new file mode 100644 +index 000000000..3aa9a15ee +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s64.c +@@ -0,0 +1,120 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++/* { dg-additional-options "-march=armv8.6-a+f64mm" } */ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1ro_s64_base: ++** ld1rod z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1ro_s64_base, svint64_t, int64_t, ++ z0 = svld1ro_s64 (p0, x0), ++ z0 = svld1ro (p0, x0)) ++ ++/* ++** ld1ro_s64_index: ++** ld1rod z0\.d, p0/z, \[x0, x1, lsl 3\] ++** ret ++*/ ++TEST_LOAD (ld1ro_s64_index, svint64_t, int64_t, ++ z0 = svld1ro_s64 (p0, x0 + x1), ++ z0 = svld1ro (p0, x0 + x1)) ++ ++/* ++** ld1ro_s64_1: ++** add (x[0-9]+), x0, #?8 ++** ld1rod z0\.d, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_s64_1, svint64_t, int64_t, ++ z0 = svld1ro_s64 (p0, x0 + 1), ++ z0 = svld1ro (p0, x0 + 1)) ++ ++/* ++** ld1ro_s64_2: ++** add (x[0-9]+), x0, #?16 ++** ld1rod z0\.d, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_s64_2, svint64_t, int64_t, ++ z0 = svld1ro_s64 (p0, x0 + 2), ++ z0 = svld1ro (p0, x0 + 2)) ++ ++/* ++** ld1ro_s64_32: ++** add (x[0-9]+), x0, #?256 ++** ld1rod z0\.d, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_s64_32, svint64_t, int64_t, ++ z0 = svld1ro_s64 (p0, x0 + 32), ++ z0 = svld1ro (p0, x0 + 32)) ++ ++/* ++** ld1ro_s64_m1: ++** sub (x[0-9]+), x0, #?8 ++** ld1rod z0\.d, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_s64_m1, svint64_t, int64_t, ++ z0 = svld1ro_s64 (p0, x0 - 1), ++ z0 = svld1ro (p0, x0 - 1)) ++ ++/* ++** ld1ro_s64_m2: ++** sub (x[0-9]+), x0, #?16 ++** ld1rod z0\.d, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_s64_m2, svint64_t, int64_t, ++ z0 = svld1ro_s64 (p0, x0 - 2), ++ z0 = svld1ro (p0, x0 - 2)) ++ ++/* ++** ld1ro_s64_m36: ++** sub (x[0-9]+), x0, #?288 ++** ld1rod z0\.d, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_s64_m36, svint64_t, int64_t, ++ z0 = svld1ro_s64 (p0, x0 - 36), ++ z0 = svld1ro (p0, x0 - 36)) ++ ++/* ++** ld1ro_s64_4: ++** ld1rod z0\.d, p0/z, \[x0, #?32\] ++** ret ++*/ ++TEST_LOAD (ld1ro_s64_4, svint64_t, int64_t, ++ z0 = svld1ro_s64 (p0, x0 + 4), ++ z0 = svld1ro (p0, x0 + 4)) ++ ++/* ++** ld1ro_s64_28: ++** ld1rod z0\.d, p0/z, \[x0, #?224\] ++** ret ++*/ ++TEST_LOAD (ld1ro_s64_28, svint64_t, int64_t, ++ z0 = svld1ro_s64 (p0, x0 + 28), ++ z0 = svld1ro (p0, x0 + 28)) ++ ++/* ++** ld1ro_s64_m4: ++** ld1rod z0\.d, p0/z, \[x0, #?-32\] ++** ret ++*/ ++TEST_LOAD (ld1ro_s64_m4, svint64_t, int64_t, ++ z0 = svld1ro_s64 (p0, x0 - 4), ++ z0 = svld1ro (p0, x0 - 4)) ++ ++/* ++** ld1ro_s64_m32: ++** ld1rod z0\.d, p0/z, \[x0, #?-256\] ++** ret ++*/ ++TEST_LOAD (ld1ro_s64_m32, svint64_t, int64_t, ++ z0 = svld1ro_s64 (p0, x0 - 32), ++ z0 = svld1ro (p0, x0 - 32)) ++ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s8.c +new file mode 100644 +index 000000000..49aff5146 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s8.c +@@ -0,0 +1,120 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++/* { dg-additional-options "-march=armv8.6-a+f64mm" } */ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1ro_s8_base: ++** ld1rob z0\.b, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1ro_s8_base, svint8_t, int8_t, ++ z0 = svld1ro_s8 (p0, x0), ++ z0 = svld1ro (p0, x0)) ++ ++/* ++** ld1ro_s8_index: ++** ld1rob z0\.b, p0/z, \[x0, x1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_s8_index, svint8_t, int8_t, ++ z0 = svld1ro_s8 (p0, x0 + x1), ++ z0 = svld1ro (p0, x0 + x1)) ++ ++/* ++** ld1ro_s8_1: ++** add (x[0-9]+), x0, #?1 ++** ld1rob z0\.b, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_s8_1, svint8_t, int8_t, ++ z0 = svld1ro_s8 (p0, x0 + 1), ++ z0 = svld1ro (p0, x0 + 1)) ++ ++/* ++** ld1ro_s8_16: ++** add (x[0-9]+), x0, #?16 ++** ld1rob z0\.b, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_s8_16, svint8_t, int8_t, ++ z0 = svld1ro_s8 (p0, x0 + 16), ++ z0 = svld1ro (p0, x0 + 16)) ++ ++/* ++** ld1ro_s8_256: ++** add (x[0-9]+), x0, #?256 ++** ld1rob z0\.b, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_s8_256, svint8_t, int8_t, ++ z0 = svld1ro_s8 (p0, x0 + 256), ++ z0 = svld1ro (p0, x0 + 256)) ++ ++/* ++** ld1ro_s8_m1: ++** sub (x[0-9]+), x0, #?1 ++** ld1rob z0\.b, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_s8_m1, svint8_t, int8_t, ++ z0 = svld1ro_s8 (p0, x0 - 1), ++ z0 = svld1ro (p0, x0 - 1)) ++ ++/* ++** ld1ro_s8_m16: ++** sub (x[0-9]+), x0, #?16 ++** ld1rob z0\.b, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_s8_m16, svint8_t, int8_t, ++ z0 = svld1ro_s8 (p0, x0 - 16), ++ z0 = svld1ro (p0, x0 - 16)) ++ ++/* ++** ld1ro_s8_m288: ++** sub (x[0-9]+), x0, #?288 ++** ld1rob z0\.b, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_s8_m288, svint8_t, int8_t, ++ z0 = svld1ro_s8 (p0, x0 - 288), ++ z0 = svld1ro (p0, x0 - 288)) ++ ++/* ++** ld1ro_s8_32: ++** ld1rob z0\.b, p0/z, \[x0, #?32\] ++** ret ++*/ ++TEST_LOAD (ld1ro_s8_32, svint8_t, int8_t, ++ z0 = svld1ro_s8 (p0, x0 + 32), ++ z0 = svld1ro (p0, x0 + 32)) ++ ++/* ++** ld1ro_s8_224: ++** ld1rob z0\.b, p0/z, \[x0, #?224\] ++** ret ++*/ ++TEST_LOAD (ld1ro_s8_224, svint8_t, int8_t, ++ z0 = svld1ro_s8 (p0, x0 + 224), ++ z0 = svld1ro (p0, x0 + 224)) ++ ++/* ++** ld1ro_s8_m32: ++** ld1rob z0\.b, p0/z, \[x0, #?-32\] ++** ret ++*/ ++TEST_LOAD (ld1ro_s8_m32, svint8_t, int8_t, ++ z0 = svld1ro_s8 (p0, x0 - 32), ++ z0 = svld1ro (p0, x0 - 32)) ++ ++/* ++** ld1ro_s8_m256: ++** ld1rob z0\.b, p0/z, \[x0, #?-256\] ++** ret ++*/ ++TEST_LOAD (ld1ro_s8_m256, svint8_t, int8_t, ++ z0 = svld1ro_s8 (p0, x0 - 256), ++ z0 = svld1ro (p0, x0 - 256)) ++ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u16.c +new file mode 100644 +index 000000000..00bf9e129 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u16.c +@@ -0,0 +1,120 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++/* { dg-additional-options "-march=armv8.6-a+f64mm" } */ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1ro_u16_base: ++** ld1roh z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1ro_u16_base, svuint16_t, uint16_t, ++ z0 = svld1ro_u16 (p0, x0), ++ z0 = svld1ro (p0, x0)) ++ ++/* ++** ld1ro_u16_index: ++** ld1roh z0\.h, p0/z, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_u16_index, svuint16_t, uint16_t, ++ z0 = svld1ro_u16 (p0, x0 + x1), ++ z0 = svld1ro (p0, x0 + x1)) ++ ++/* ++** ld1ro_u16_1: ++** add (x[0-9]+), x0, #?2 ++** ld1roh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_u16_1, svuint16_t, uint16_t, ++ z0 = svld1ro_u16 (p0, x0 + 1), ++ z0 = svld1ro (p0, x0 + 1)) ++ ++/* ++** ld1ro_u16_8: ++** add (x[0-9]+), x0, #?16 ++** ld1roh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_u16_8, svuint16_t, uint16_t, ++ z0 = svld1ro_u16 (p0, x0 + 8), ++ z0 = svld1ro (p0, x0 + 8)) ++ ++/* ++** ld1ro_u16_128: ++** add (x[0-9]+), x0, #?256 ++** ld1roh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_u16_128, svuint16_t, uint16_t, ++ z0 = svld1ro_u16 (p0, x0 + 128), ++ z0 = svld1ro (p0, x0 + 128)) ++ ++/* ++** ld1ro_u16_m1: ++** sub (x[0-9]+), x0, #?2 ++** ld1roh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_u16_m1, svuint16_t, uint16_t, ++ z0 = svld1ro_u16 (p0, x0 - 1), ++ z0 = svld1ro (p0, x0 - 1)) ++ ++/* ++** ld1ro_u16_m8: ++** sub (x[0-9]+), x0, #?16 ++** ld1roh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_u16_m8, svuint16_t, uint16_t, ++ z0 = svld1ro_u16 (p0, x0 - 8), ++ z0 = svld1ro (p0, x0 - 8)) ++ ++/* ++** ld1ro_u16_m144: ++** sub (x[0-9]+), x0, #?288 ++** ld1roh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_u16_m144, svuint16_t, uint16_t, ++ z0 = svld1ro_u16 (p0, x0 - 144), ++ z0 = svld1ro (p0, x0 - 144)) ++ ++/* ++** ld1ro_u16_16: ++** ld1roh z0\.h, p0/z, \[x0, #?32\] ++** ret ++*/ ++TEST_LOAD (ld1ro_u16_16, svuint16_t, uint16_t, ++ z0 = svld1ro_u16 (p0, x0 + 16), ++ z0 = svld1ro (p0, x0 + 16)) ++ ++/* ++** ld1ro_u16_112: ++** ld1roh z0\.h, p0/z, \[x0, #?224\] ++** ret ++*/ ++TEST_LOAD (ld1ro_u16_112, svuint16_t, uint16_t, ++ z0 = svld1ro_u16 (p0, x0 + 112), ++ z0 = svld1ro (p0, x0 + 112)) ++ ++/* ++** ld1ro_u16_m16: ++** ld1roh z0\.h, p0/z, \[x0, #?-32\] ++** ret ++*/ ++TEST_LOAD (ld1ro_u16_m16, svuint16_t, uint16_t, ++ z0 = svld1ro_u16 (p0, x0 - 16), ++ z0 = svld1ro (p0, x0 - 16)) ++ ++/* ++** ld1ro_u16_m128: ++** ld1roh z0\.h, p0/z, \[x0, #?-256\] ++** ret ++*/ ++TEST_LOAD (ld1ro_u16_m128, svuint16_t, uint16_t, ++ z0 = svld1ro_u16 (p0, x0 - 128), ++ z0 = svld1ro (p0, x0 - 128)) ++ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u32.c +new file mode 100644 +index 000000000..9e9b3290a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u32.c +@@ -0,0 +1,120 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++/* { dg-additional-options "-march=armv8.6-a+f64mm" } */ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1ro_u32_base: ++** ld1row z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1ro_u32_base, svuint32_t, uint32_t, ++ z0 = svld1ro_u32 (p0, x0), ++ z0 = svld1ro (p0, x0)) ++ ++/* ++** ld1ro_u32_index: ++** ld1row z0\.s, p0/z, \[x0, x1, lsl 2\] ++** ret ++*/ ++TEST_LOAD (ld1ro_u32_index, svuint32_t, uint32_t, ++ z0 = svld1ro_u32 (p0, x0 + x1), ++ z0 = svld1ro (p0, x0 + x1)) ++ ++/* ++** ld1ro_u32_1: ++** add (x[0-9]+), x0, #?4 ++** ld1row z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_u32_1, svuint32_t, uint32_t, ++ z0 = svld1ro_u32 (p0, x0 + 1), ++ z0 = svld1ro (p0, x0 + 1)) ++ ++/* ++** ld1ro_u32_4: ++** add (x[0-9]+), x0, #?16 ++** ld1row z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_u32_4, svuint32_t, uint32_t, ++ z0 = svld1ro_u32 (p0, x0 + 4), ++ z0 = svld1ro (p0, x0 + 4)) ++ ++/* ++** ld1ro_u32_64: ++** add (x[0-9]+), x0, #?256 ++** ld1row z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_u32_64, svuint32_t, uint32_t, ++ z0 = svld1ro_u32 (p0, x0 + 64), ++ z0 = svld1ro (p0, x0 + 64)) ++ ++/* ++** ld1ro_u32_m1: ++** sub (x[0-9]+), x0, #?4 ++** ld1row z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_u32_m1, svuint32_t, uint32_t, ++ z0 = svld1ro_u32 (p0, x0 - 1), ++ z0 = svld1ro (p0, x0 - 1)) ++ ++/* ++** ld1ro_u32_m4: ++** sub (x[0-9]+), x0, #?16 ++** ld1row z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_u32_m4, svuint32_t, uint32_t, ++ z0 = svld1ro_u32 (p0, x0 - 4), ++ z0 = svld1ro (p0, x0 - 4)) ++ ++/* ++** ld1ro_u32_m72: ++** sub (x[0-9]+), x0, #?288 ++** ld1row z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_u32_m72, svuint32_t, uint32_t, ++ z0 = svld1ro_u32 (p0, x0 - 72), ++ z0 = svld1ro (p0, x0 - 72)) ++ ++/* ++** ld1ro_u32_8: ++** ld1row z0\.s, p0/z, \[x0, #?32\] ++** ret ++*/ ++TEST_LOAD (ld1ro_u32_8, svuint32_t, uint32_t, ++ z0 = svld1ro_u32 (p0, x0 + 8), ++ z0 = svld1ro (p0, x0 + 8)) ++ ++/* ++** ld1ro_u32_56: ++** ld1row z0\.s, p0/z, \[x0, #?224\] ++** ret ++*/ ++TEST_LOAD (ld1ro_u32_56, svuint32_t, uint32_t, ++ z0 = svld1ro_u32 (p0, x0 + 56), ++ z0 = svld1ro (p0, x0 + 56)) ++ ++/* ++** ld1ro_u32_m8: ++** ld1row z0\.s, p0/z, \[x0, #?-32\] ++** ret ++*/ ++TEST_LOAD (ld1ro_u32_m8, svuint32_t, uint32_t, ++ z0 = svld1ro_u32 (p0, x0 - 8), ++ z0 = svld1ro (p0, x0 - 8)) ++ ++/* ++** ld1ro_u32_m64: ++** ld1row z0\.s, p0/z, \[x0, #?-256\] ++** ret ++*/ ++TEST_LOAD (ld1ro_u32_m64, svuint32_t, uint32_t, ++ z0 = svld1ro_u32 (p0, x0 - 64), ++ z0 = svld1ro (p0, x0 - 64)) ++ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u64.c +new file mode 100644 +index 000000000..64ec62871 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u64.c +@@ -0,0 +1,120 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++/* { dg-additional-options "-march=armv8.6-a+f64mm" } */ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1ro_u64_base: ++** ld1rod z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1ro_u64_base, svuint64_t, uint64_t, ++ z0 = svld1ro_u64 (p0, x0), ++ z0 = svld1ro (p0, x0)) ++ ++/* ++** ld1ro_u64_index: ++** ld1rod z0\.d, p0/z, \[x0, x1, lsl 3\] ++** ret ++*/ ++TEST_LOAD (ld1ro_u64_index, svuint64_t, uint64_t, ++ z0 = svld1ro_u64 (p0, x0 + x1), ++ z0 = svld1ro (p0, x0 + x1)) ++ ++/* ++** ld1ro_u64_1: ++** add (x[0-9]+), x0, #?8 ++** ld1rod z0\.d, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_u64_1, svuint64_t, uint64_t, ++ z0 = svld1ro_u64 (p0, x0 + 1), ++ z0 = svld1ro (p0, x0 + 1)) ++ ++/* ++** ld1ro_u64_2: ++** add (x[0-9]+), x0, #?16 ++** ld1rod z0\.d, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_u64_2, svuint64_t, uint64_t, ++ z0 = svld1ro_u64 (p0, x0 + 2), ++ z0 = svld1ro (p0, x0 + 2)) ++ ++/* ++** ld1ro_u64_32: ++** add (x[0-9]+), x0, #?256 ++** ld1rod z0\.d, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_u64_32, svuint64_t, uint64_t, ++ z0 = svld1ro_u64 (p0, x0 + 32), ++ z0 = svld1ro (p0, x0 + 32)) ++ ++/* ++** ld1ro_u64_m1: ++** sub (x[0-9]+), x0, #?8 ++** ld1rod z0\.d, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_u64_m1, svuint64_t, uint64_t, ++ z0 = svld1ro_u64 (p0, x0 - 1), ++ z0 = svld1ro (p0, x0 - 1)) ++ ++/* ++** ld1ro_u64_m2: ++** sub (x[0-9]+), x0, #?16 ++** ld1rod z0\.d, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_u64_m2, svuint64_t, uint64_t, ++ z0 = svld1ro_u64 (p0, x0 - 2), ++ z0 = svld1ro (p0, x0 - 2)) ++ ++/* ++** ld1ro_u64_m36: ++** sub (x[0-9]+), x0, #?288 ++** ld1rod z0\.d, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_u64_m36, svuint64_t, uint64_t, ++ z0 = svld1ro_u64 (p0, x0 - 36), ++ z0 = svld1ro (p0, x0 - 36)) ++ ++/* ++** ld1ro_u64_4: ++** ld1rod z0\.d, p0/z, \[x0, #?32\] ++** ret ++*/ ++TEST_LOAD (ld1ro_u64_4, svuint64_t, uint64_t, ++ z0 = svld1ro_u64 (p0, x0 + 4), ++ z0 = svld1ro (p0, x0 + 4)) ++ ++/* ++** ld1ro_u64_28: ++** ld1rod z0\.d, p0/z, \[x0, #?224\] ++** ret ++*/ ++TEST_LOAD (ld1ro_u64_28, svuint64_t, uint64_t, ++ z0 = svld1ro_u64 (p0, x0 + 28), ++ z0 = svld1ro (p0, x0 + 28)) ++ ++/* ++** ld1ro_u64_m4: ++** ld1rod z0\.d, p0/z, \[x0, #?-32\] ++** ret ++*/ ++TEST_LOAD (ld1ro_u64_m4, svuint64_t, uint64_t, ++ z0 = svld1ro_u64 (p0, x0 - 4), ++ z0 = svld1ro (p0, x0 - 4)) ++ ++/* ++** ld1ro_u64_m32: ++** ld1rod z0\.d, p0/z, \[x0, #?-256\] ++** ret ++*/ ++TEST_LOAD (ld1ro_u64_m32, svuint64_t, uint64_t, ++ z0 = svld1ro_u64 (p0, x0 - 32), ++ z0 = svld1ro (p0, x0 - 32)) ++ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u8.c +new file mode 100644 +index 000000000..22701320b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u8.c +@@ -0,0 +1,120 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++/* { dg-additional-options "-march=armv8.6-a+f64mm" } */ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1ro_u8_base: ++** ld1rob z0\.b, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1ro_u8_base, svuint8_t, uint8_t, ++ z0 = svld1ro_u8 (p0, x0), ++ z0 = svld1ro (p0, x0)) ++ ++/* ++** ld1ro_u8_index: ++** ld1rob z0\.b, p0/z, \[x0, x1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_u8_index, svuint8_t, uint8_t, ++ z0 = svld1ro_u8 (p0, x0 + x1), ++ z0 = svld1ro (p0, x0 + x1)) ++ ++/* ++** ld1ro_u8_1: ++** add (x[0-9]+), x0, #?1 ++** ld1rob z0\.b, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_u8_1, svuint8_t, uint8_t, ++ z0 = svld1ro_u8 (p0, x0 + 1), ++ z0 = svld1ro (p0, x0 + 1)) ++ ++/* ++** ld1ro_u8_16: ++** add (x[0-9]+), x0, #?16 ++** ld1rob z0\.b, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_u8_16, svuint8_t, uint8_t, ++ z0 = svld1ro_u8 (p0, x0 + 16), ++ z0 = svld1ro (p0, x0 + 16)) ++ ++/* ++** ld1ro_u8_256: ++** add (x[0-9]+), x0, #?256 ++** ld1rob z0\.b, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_u8_256, svuint8_t, uint8_t, ++ z0 = svld1ro_u8 (p0, x0 + 256), ++ z0 = svld1ro (p0, x0 + 256)) ++ ++/* ++** ld1ro_u8_m1: ++** sub (x[0-9]+), x0, #?1 ++** ld1rob z0\.b, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_u8_m1, svuint8_t, uint8_t, ++ z0 = svld1ro_u8 (p0, x0 - 1), ++ z0 = svld1ro (p0, x0 - 1)) ++ ++/* ++** ld1ro_u8_m16: ++** sub (x[0-9]+), x0, #?16 ++** ld1rob z0\.b, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_u8_m16, svuint8_t, uint8_t, ++ z0 = svld1ro_u8 (p0, x0 - 16), ++ z0 = svld1ro (p0, x0 - 16)) ++ ++/* ++** ld1ro_u8_m288: ++** sub (x[0-9]+), x0, #?288 ++** ld1rob z0\.b, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1ro_u8_m288, svuint8_t, uint8_t, ++ z0 = svld1ro_u8 (p0, x0 - 288), ++ z0 = svld1ro (p0, x0 - 288)) ++ ++/* ++** ld1ro_u8_32: ++** ld1rob z0\.b, p0/z, \[x0, #?32\] ++** ret ++*/ ++TEST_LOAD (ld1ro_u8_32, svuint8_t, uint8_t, ++ z0 = svld1ro_u8 (p0, x0 + 32), ++ z0 = svld1ro (p0, x0 + 32)) ++ ++/* ++** ld1ro_u8_224: ++** ld1rob z0\.b, p0/z, \[x0, #?224\] ++** ret ++*/ ++TEST_LOAD (ld1ro_u8_224, svuint8_t, uint8_t, ++ z0 = svld1ro_u8 (p0, x0 + 224), ++ z0 = svld1ro (p0, x0 + 224)) ++ ++/* ++** ld1ro_u8_m32: ++** ld1rob z0\.b, p0/z, \[x0, #?-32\] ++** ret ++*/ ++TEST_LOAD (ld1ro_u8_m32, svuint8_t, uint8_t, ++ z0 = svld1ro_u8 (p0, x0 - 32), ++ z0 = svld1ro (p0, x0 - 32)) ++ ++/* ++** ld1ro_u8_m256: ++** ld1rob z0\.b, p0/z, \[x0, #?-256\] ++** ret ++*/ ++TEST_LOAD (ld1ro_u8_m256, svuint8_t, uint8_t, ++ z0 = svld1ro_u8 (p0, x0 - 256), ++ z0 = svld1ro (p0, x0 - 256)) ++ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_bf16.c +new file mode 100644 +index 000000000..54c69a1db +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_bf16.c +@@ -0,0 +1,137 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1rq_bf16_base: ++** ld1rqh z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1rq_bf16_base, svbfloat16_t, bfloat16_t, ++ z0 = svld1rq_bf16 (p0, x0), ++ z0 = svld1rq (p0, x0)) ++ ++/* ++** ld1rq_bf16_index: ++** ld1rqh z0\.h, p0/z, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_bf16_index, svbfloat16_t, bfloat16_t, ++ z0 = svld1rq_bf16 (p0, x0 + x1), ++ z0 = svld1rq (p0, x0 + x1)) ++ ++/* ++** ld1rq_bf16_1: ++** add (x[0-9]+), x0, #?2 ++** ld1rqh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_bf16_1, svbfloat16_t, bfloat16_t, ++ z0 = svld1rq_bf16 (p0, x0 + 1), ++ z0 = svld1rq (p0, x0 + 1)) ++ ++/* ++** ld1rq_bf16_4: ++** add (x[0-9]+), x0, #?8 ++** ld1rqh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_bf16_4, svbfloat16_t, bfloat16_t, ++ z0 = svld1rq_bf16 (p0, x0 + 4), ++ z0 = svld1rq (p0, x0 + 4)) ++ ++/* ++** ld1rq_bf16_7: ++** add (x[0-9]+), x0, #?14 ++** ld1rqh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_bf16_7, svbfloat16_t, bfloat16_t, ++ z0 = svld1rq_bf16 (p0, x0 + 7), ++ z0 = svld1rq (p0, x0 + 7)) ++ ++/* ++** ld1rq_bf16_8: ++** ld1rqh z0\.h, p0/z, \[x0, #?16\] ++** ret ++*/ ++TEST_LOAD (ld1rq_bf16_8, svbfloat16_t, bfloat16_t, ++ z0 = svld1rq_bf16 (p0, x0 + 8), ++ z0 = svld1rq (p0, x0 + 8)) ++ ++/* ++** ld1rq_bf16_56: ++** ld1rqh z0\.h, p0/z, \[x0, #?112\] ++** ret ++*/ ++TEST_LOAD (ld1rq_bf16_56, svbfloat16_t, bfloat16_t, ++ z0 = svld1rq_bf16 (p0, x0 + 56), ++ z0 = svld1rq (p0, x0 + 56)) ++ ++/* ++** ld1rq_bf16_64: ++** add (x[0-9]+), x0, #?128 ++** ld1rqh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_bf16_64, svbfloat16_t, bfloat16_t, ++ z0 = svld1rq_bf16 (p0, x0 + 64), ++ z0 = svld1rq (p0, x0 + 64)) ++ ++/* ++** ld1rq_bf16_m1: ++** sub (x[0-9]+), x0, #?2 ++** ld1rqh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_bf16_m1, svbfloat16_t, bfloat16_t, ++ z0 = svld1rq_bf16 (p0, x0 - 1), ++ z0 = svld1rq (p0, x0 - 1)) ++ ++/* ++** ld1rq_bf16_m4: ++** sub (x[0-9]+), x0, #?8 ++** ld1rqh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_bf16_m4, svbfloat16_t, bfloat16_t, ++ z0 = svld1rq_bf16 (p0, x0 - 4), ++ z0 = svld1rq (p0, x0 - 4)) ++ ++/* ++** ld1rq_bf16_m7: ++** sub (x[0-9]+), x0, #?14 ++** ld1rqh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_bf16_m7, svbfloat16_t, bfloat16_t, ++ z0 = svld1rq_bf16 (p0, x0 - 7), ++ z0 = svld1rq (p0, x0 - 7)) ++ ++/* ++** ld1rq_bf16_m8: ++** ld1rqh z0\.h, p0/z, \[x0, #?-16\] ++** ret ++*/ ++TEST_LOAD (ld1rq_bf16_m8, svbfloat16_t, bfloat16_t, ++ z0 = svld1rq_bf16 (p0, x0 - 8), ++ z0 = svld1rq (p0, x0 - 8)) ++ ++/* ++** ld1rq_bf16_m64: ++** ld1rqh z0\.h, p0/z, \[x0, #?-128\] ++** ret ++*/ ++TEST_LOAD (ld1rq_bf16_m64, svbfloat16_t, bfloat16_t, ++ z0 = svld1rq_bf16 (p0, x0 - 64), ++ z0 = svld1rq (p0, x0 - 64)) ++ ++/* ++** ld1rq_bf16_m72: ++** sub (x[0-9]+), x0, #?144 ++** ld1rqh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_bf16_m72, svbfloat16_t, bfloat16_t, ++ z0 = svld1rq_bf16 (p0, x0 - 72), ++ z0 = svld1rq (p0, x0 - 72)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_f16.c +new file mode 100644 +index 000000000..7536236f7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_f16.c +@@ -0,0 +1,137 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1rq_f16_base: ++** ld1rqh z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1rq_f16_base, svfloat16_t, float16_t, ++ z0 = svld1rq_f16 (p0, x0), ++ z0 = svld1rq (p0, x0)) ++ ++/* ++** ld1rq_f16_index: ++** ld1rqh z0\.h, p0/z, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_f16_index, svfloat16_t, float16_t, ++ z0 = svld1rq_f16 (p0, x0 + x1), ++ z0 = svld1rq (p0, x0 + x1)) ++ ++/* ++** ld1rq_f16_1: ++** add (x[0-9]+), x0, #?2 ++** ld1rqh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_f16_1, svfloat16_t, float16_t, ++ z0 = svld1rq_f16 (p0, x0 + 1), ++ z0 = svld1rq (p0, x0 + 1)) ++ ++/* ++** ld1rq_f16_4: ++** add (x[0-9]+), x0, #?8 ++** ld1rqh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_f16_4, svfloat16_t, float16_t, ++ z0 = svld1rq_f16 (p0, x0 + 4), ++ z0 = svld1rq (p0, x0 + 4)) ++ ++/* ++** ld1rq_f16_7: ++** add (x[0-9]+), x0, #?14 ++** ld1rqh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_f16_7, svfloat16_t, float16_t, ++ z0 = svld1rq_f16 (p0, x0 + 7), ++ z0 = svld1rq (p0, x0 + 7)) ++ ++/* ++** ld1rq_f16_8: ++** ld1rqh z0\.h, p0/z, \[x0, #?16\] ++** ret ++*/ ++TEST_LOAD (ld1rq_f16_8, svfloat16_t, float16_t, ++ z0 = svld1rq_f16 (p0, x0 + 8), ++ z0 = svld1rq (p0, x0 + 8)) ++ ++/* ++** ld1rq_f16_56: ++** ld1rqh z0\.h, p0/z, \[x0, #?112\] ++** ret ++*/ ++TEST_LOAD (ld1rq_f16_56, svfloat16_t, float16_t, ++ z0 = svld1rq_f16 (p0, x0 + 56), ++ z0 = svld1rq (p0, x0 + 56)) ++ ++/* ++** ld1rq_f16_64: ++** add (x[0-9]+), x0, #?128 ++** ld1rqh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_f16_64, svfloat16_t, float16_t, ++ z0 = svld1rq_f16 (p0, x0 + 64), ++ z0 = svld1rq (p0, x0 + 64)) ++ ++/* ++** ld1rq_f16_m1: ++** sub (x[0-9]+), x0, #?2 ++** ld1rqh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_f16_m1, svfloat16_t, float16_t, ++ z0 = svld1rq_f16 (p0, x0 - 1), ++ z0 = svld1rq (p0, x0 - 1)) ++ ++/* ++** ld1rq_f16_m4: ++** sub (x[0-9]+), x0, #?8 ++** ld1rqh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_f16_m4, svfloat16_t, float16_t, ++ z0 = svld1rq_f16 (p0, x0 - 4), ++ z0 = svld1rq (p0, x0 - 4)) ++ ++/* ++** ld1rq_f16_m7: ++** sub (x[0-9]+), x0, #?14 ++** ld1rqh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_f16_m7, svfloat16_t, float16_t, ++ z0 = svld1rq_f16 (p0, x0 - 7), ++ z0 = svld1rq (p0, x0 - 7)) ++ ++/* ++** ld1rq_f16_m8: ++** ld1rqh z0\.h, p0/z, \[x0, #?-16\] ++** ret ++*/ ++TEST_LOAD (ld1rq_f16_m8, svfloat16_t, float16_t, ++ z0 = svld1rq_f16 (p0, x0 - 8), ++ z0 = svld1rq (p0, x0 - 8)) ++ ++/* ++** ld1rq_f16_m64: ++** ld1rqh z0\.h, p0/z, \[x0, #?-128\] ++** ret ++*/ ++TEST_LOAD (ld1rq_f16_m64, svfloat16_t, float16_t, ++ z0 = svld1rq_f16 (p0, x0 - 64), ++ z0 = svld1rq (p0, x0 - 64)) ++ ++/* ++** ld1rq_f16_m72: ++** sub (x[0-9]+), x0, #?144 ++** ld1rqh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_f16_m72, svfloat16_t, float16_t, ++ z0 = svld1rq_f16 (p0, x0 - 72), ++ z0 = svld1rq (p0, x0 - 72)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_f32.c +new file mode 100644 +index 000000000..9be2b7412 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_f32.c +@@ -0,0 +1,137 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1rq_f32_base: ++** ld1rqw z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1rq_f32_base, svfloat32_t, float32_t, ++ z0 = svld1rq_f32 (p0, x0), ++ z0 = svld1rq (p0, x0)) ++ ++/* ++** ld1rq_f32_index: ++** ld1rqw z0\.s, p0/z, \[x0, x1, lsl 2\] ++** ret ++*/ ++TEST_LOAD (ld1rq_f32_index, svfloat32_t, float32_t, ++ z0 = svld1rq_f32 (p0, x0 + x1), ++ z0 = svld1rq (p0, x0 + x1)) ++ ++/* ++** ld1rq_f32_1: ++** add (x[0-9]+), x0, #?4 ++** ld1rqw z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_f32_1, svfloat32_t, float32_t, ++ z0 = svld1rq_f32 (p0, x0 + 1), ++ z0 = svld1rq (p0, x0 + 1)) ++ ++/* ++** ld1rq_f32_2: ++** add (x[0-9]+), x0, #?8 ++** ld1rqw z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_f32_2, svfloat32_t, float32_t, ++ z0 = svld1rq_f32 (p0, x0 + 2), ++ z0 = svld1rq (p0, x0 + 2)) ++ ++/* ++** ld1rq_f32_3: ++** add (x[0-9]+), x0, #?12 ++** ld1rqw z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_f32_3, svfloat32_t, float32_t, ++ z0 = svld1rq_f32 (p0, x0 + 3), ++ z0 = svld1rq (p0, x0 + 3)) ++ ++/* ++** ld1rq_f32_4: ++** ld1rqw z0\.s, p0/z, \[x0, #?16\] ++** ret ++*/ ++TEST_LOAD (ld1rq_f32_4, svfloat32_t, float32_t, ++ z0 = svld1rq_f32 (p0, x0 + 4), ++ z0 = svld1rq (p0, x0 + 4)) ++ ++/* ++** ld1rq_f32_28: ++** ld1rqw z0\.s, p0/z, \[x0, #?112\] ++** ret ++*/ ++TEST_LOAD (ld1rq_f32_28, svfloat32_t, float32_t, ++ z0 = svld1rq_f32 (p0, x0 + 28), ++ z0 = svld1rq (p0, x0 + 28)) ++ ++/* ++** ld1rq_f32_32: ++** add (x[0-9]+), x0, #?128 ++** ld1rqw z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_f32_32, svfloat32_t, float32_t, ++ z0 = svld1rq_f32 (p0, x0 + 32), ++ z0 = svld1rq (p0, x0 + 32)) ++ ++/* ++** ld1rq_f32_m1: ++** sub (x[0-9]+), x0, #?4 ++** ld1rqw z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_f32_m1, svfloat32_t, float32_t, ++ z0 = svld1rq_f32 (p0, x0 - 1), ++ z0 = svld1rq (p0, x0 - 1)) ++ ++/* ++** ld1rq_f32_m2: ++** sub (x[0-9]+), x0, #?8 ++** ld1rqw z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_f32_m2, svfloat32_t, float32_t, ++ z0 = svld1rq_f32 (p0, x0 - 2), ++ z0 = svld1rq (p0, x0 - 2)) ++ ++/* ++** ld1rq_f32_m3: ++** sub (x[0-9]+), x0, #?12 ++** ld1rqw z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_f32_m3, svfloat32_t, float32_t, ++ z0 = svld1rq_f32 (p0, x0 - 3), ++ z0 = svld1rq (p0, x0 - 3)) ++ ++/* ++** ld1rq_f32_m4: ++** ld1rqw z0\.s, p0/z, \[x0, #?-16\] ++** ret ++*/ ++TEST_LOAD (ld1rq_f32_m4, svfloat32_t, float32_t, ++ z0 = svld1rq_f32 (p0, x0 - 4), ++ z0 = svld1rq (p0, x0 - 4)) ++ ++/* ++** ld1rq_f32_m32: ++** ld1rqw z0\.s, p0/z, \[x0, #?-128\] ++** ret ++*/ ++TEST_LOAD (ld1rq_f32_m32, svfloat32_t, float32_t, ++ z0 = svld1rq_f32 (p0, x0 - 32), ++ z0 = svld1rq (p0, x0 - 32)) ++ ++/* ++** ld1rq_f32_m36: ++** sub (x[0-9]+), x0, #?144 ++** ld1rqw z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_f32_m36, svfloat32_t, float32_t, ++ z0 = svld1rq_f32 (p0, x0 - 36), ++ z0 = svld1rq (p0, x0 - 36)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_f64.c +new file mode 100644 +index 000000000..32105af17 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_f64.c +@@ -0,0 +1,97 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1rq_f64_base: ++** ld1rqd z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1rq_f64_base, svfloat64_t, float64_t, ++ z0 = svld1rq_f64 (p0, x0), ++ z0 = svld1rq (p0, x0)) ++ ++/* ++** ld1rq_f64_index: ++** ld1rqd z0\.d, p0/z, \[x0, x1, lsl 3\] ++** ret ++*/ ++TEST_LOAD (ld1rq_f64_index, svfloat64_t, float64_t, ++ z0 = svld1rq_f64 (p0, x0 + x1), ++ z0 = svld1rq (p0, x0 + x1)) ++ ++/* ++** ld1rq_f64_1: ++** add (x[0-9]+), x0, #?8 ++** ld1rqd z0\.d, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_f64_1, svfloat64_t, float64_t, ++ z0 = svld1rq_f64 (p0, x0 + 1), ++ z0 = svld1rq (p0, x0 + 1)) ++ ++/* ++** ld1rq_f64_2: ++** ld1rqd z0\.d, p0/z, \[x0, #?16\] ++** ret ++*/ ++TEST_LOAD (ld1rq_f64_2, svfloat64_t, float64_t, ++ z0 = svld1rq_f64 (p0, x0 + 2), ++ z0 = svld1rq (p0, x0 + 2)) ++ ++/* ++** ld1rq_f64_14: ++** ld1rqd z0\.d, p0/z, \[x0, #?112\] ++** ret ++*/ ++TEST_LOAD (ld1rq_f64_14, svfloat64_t, float64_t, ++ z0 = svld1rq_f64 (p0, x0 + 14), ++ z0 = svld1rq (p0, x0 + 14)) ++ ++/* ++** ld1rq_f64_16: ++** add (x[0-9]+), x0, #?128 ++** ld1rqd z0\.d, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_f64_16, svfloat64_t, float64_t, ++ z0 = svld1rq_f64 (p0, x0 + 16), ++ z0 = svld1rq (p0, x0 + 16)) ++ ++/* ++** ld1rq_f64_m1: ++** sub (x[0-9]+), x0, #?8 ++** ld1rqd z0\.d, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_f64_m1, svfloat64_t, float64_t, ++ z0 = svld1rq_f64 (p0, x0 - 1), ++ z0 = svld1rq (p0, x0 - 1)) ++ ++/* ++** ld1rq_f64_m2: ++** ld1rqd z0\.d, p0/z, \[x0, #?-16\] ++** ret ++*/ ++TEST_LOAD (ld1rq_f64_m2, svfloat64_t, float64_t, ++ z0 = svld1rq_f64 (p0, x0 - 2), ++ z0 = svld1rq (p0, x0 - 2)) ++ ++/* ++** ld1rq_f64_m16: ++** ld1rqd z0\.d, p0/z, \[x0, #?-128\] ++** ret ++*/ ++TEST_LOAD (ld1rq_f64_m16, svfloat64_t, float64_t, ++ z0 = svld1rq_f64 (p0, x0 - 16), ++ z0 = svld1rq (p0, x0 - 16)) ++ ++/* ++** ld1rq_f64_m18: ++** sub (x[0-9]+), x0, #?144 ++** ld1rqd z0\.d, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_f64_m18, svfloat64_t, float64_t, ++ z0 = svld1rq_f64 (p0, x0 - 18), ++ z0 = svld1rq (p0, x0 - 18)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_s16.c +new file mode 100644 +index 000000000..8903b96a3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_s16.c +@@ -0,0 +1,137 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1rq_s16_base: ++** ld1rqh z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1rq_s16_base, svint16_t, int16_t, ++ z0 = svld1rq_s16 (p0, x0), ++ z0 = svld1rq (p0, x0)) ++ ++/* ++** ld1rq_s16_index: ++** ld1rqh z0\.h, p0/z, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_s16_index, svint16_t, int16_t, ++ z0 = svld1rq_s16 (p0, x0 + x1), ++ z0 = svld1rq (p0, x0 + x1)) ++ ++/* ++** ld1rq_s16_1: ++** add (x[0-9]+), x0, #?2 ++** ld1rqh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_s16_1, svint16_t, int16_t, ++ z0 = svld1rq_s16 (p0, x0 + 1), ++ z0 = svld1rq (p0, x0 + 1)) ++ ++/* ++** ld1rq_s16_4: ++** add (x[0-9]+), x0, #?8 ++** ld1rqh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_s16_4, svint16_t, int16_t, ++ z0 = svld1rq_s16 (p0, x0 + 4), ++ z0 = svld1rq (p0, x0 + 4)) ++ ++/* ++** ld1rq_s16_7: ++** add (x[0-9]+), x0, #?14 ++** ld1rqh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_s16_7, svint16_t, int16_t, ++ z0 = svld1rq_s16 (p0, x0 + 7), ++ z0 = svld1rq (p0, x0 + 7)) ++ ++/* ++** ld1rq_s16_8: ++** ld1rqh z0\.h, p0/z, \[x0, #?16\] ++** ret ++*/ ++TEST_LOAD (ld1rq_s16_8, svint16_t, int16_t, ++ z0 = svld1rq_s16 (p0, x0 + 8), ++ z0 = svld1rq (p0, x0 + 8)) ++ ++/* ++** ld1rq_s16_56: ++** ld1rqh z0\.h, p0/z, \[x0, #?112\] ++** ret ++*/ ++TEST_LOAD (ld1rq_s16_56, svint16_t, int16_t, ++ z0 = svld1rq_s16 (p0, x0 + 56), ++ z0 = svld1rq (p0, x0 + 56)) ++ ++/* ++** ld1rq_s16_64: ++** add (x[0-9]+), x0, #?128 ++** ld1rqh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_s16_64, svint16_t, int16_t, ++ z0 = svld1rq_s16 (p0, x0 + 64), ++ z0 = svld1rq (p0, x0 + 64)) ++ ++/* ++** ld1rq_s16_m1: ++** sub (x[0-9]+), x0, #?2 ++** ld1rqh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_s16_m1, svint16_t, int16_t, ++ z0 = svld1rq_s16 (p0, x0 - 1), ++ z0 = svld1rq (p0, x0 - 1)) ++ ++/* ++** ld1rq_s16_m4: ++** sub (x[0-9]+), x0, #?8 ++** ld1rqh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_s16_m4, svint16_t, int16_t, ++ z0 = svld1rq_s16 (p0, x0 - 4), ++ z0 = svld1rq (p0, x0 - 4)) ++ ++/* ++** ld1rq_s16_m7: ++** sub (x[0-9]+), x0, #?14 ++** ld1rqh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_s16_m7, svint16_t, int16_t, ++ z0 = svld1rq_s16 (p0, x0 - 7), ++ z0 = svld1rq (p0, x0 - 7)) ++ ++/* ++** ld1rq_s16_m8: ++** ld1rqh z0\.h, p0/z, \[x0, #?-16\] ++** ret ++*/ ++TEST_LOAD (ld1rq_s16_m8, svint16_t, int16_t, ++ z0 = svld1rq_s16 (p0, x0 - 8), ++ z0 = svld1rq (p0, x0 - 8)) ++ ++/* ++** ld1rq_s16_m64: ++** ld1rqh z0\.h, p0/z, \[x0, #?-128\] ++** ret ++*/ ++TEST_LOAD (ld1rq_s16_m64, svint16_t, int16_t, ++ z0 = svld1rq_s16 (p0, x0 - 64), ++ z0 = svld1rq (p0, x0 - 64)) ++ ++/* ++** ld1rq_s16_m72: ++** sub (x[0-9]+), x0, #?144 ++** ld1rqh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_s16_m72, svint16_t, int16_t, ++ z0 = svld1rq_s16 (p0, x0 - 72), ++ z0 = svld1rq (p0, x0 - 72)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_s32.c +new file mode 100644 +index 000000000..a428b4350 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_s32.c +@@ -0,0 +1,137 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1rq_s32_base: ++** ld1rqw z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1rq_s32_base, svint32_t, int32_t, ++ z0 = svld1rq_s32 (p0, x0), ++ z0 = svld1rq (p0, x0)) ++ ++/* ++** ld1rq_s32_index: ++** ld1rqw z0\.s, p0/z, \[x0, x1, lsl 2\] ++** ret ++*/ ++TEST_LOAD (ld1rq_s32_index, svint32_t, int32_t, ++ z0 = svld1rq_s32 (p0, x0 + x1), ++ z0 = svld1rq (p0, x0 + x1)) ++ ++/* ++** ld1rq_s32_1: ++** add (x[0-9]+), x0, #?4 ++** ld1rqw z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_s32_1, svint32_t, int32_t, ++ z0 = svld1rq_s32 (p0, x0 + 1), ++ z0 = svld1rq (p0, x0 + 1)) ++ ++/* ++** ld1rq_s32_2: ++** add (x[0-9]+), x0, #?8 ++** ld1rqw z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_s32_2, svint32_t, int32_t, ++ z0 = svld1rq_s32 (p0, x0 + 2), ++ z0 = svld1rq (p0, x0 + 2)) ++ ++/* ++** ld1rq_s32_3: ++** add (x[0-9]+), x0, #?12 ++** ld1rqw z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_s32_3, svint32_t, int32_t, ++ z0 = svld1rq_s32 (p0, x0 + 3), ++ z0 = svld1rq (p0, x0 + 3)) ++ ++/* ++** ld1rq_s32_4: ++** ld1rqw z0\.s, p0/z, \[x0, #?16\] ++** ret ++*/ ++TEST_LOAD (ld1rq_s32_4, svint32_t, int32_t, ++ z0 = svld1rq_s32 (p0, x0 + 4), ++ z0 = svld1rq (p0, x0 + 4)) ++ ++/* ++** ld1rq_s32_28: ++** ld1rqw z0\.s, p0/z, \[x0, #?112\] ++** ret ++*/ ++TEST_LOAD (ld1rq_s32_28, svint32_t, int32_t, ++ z0 = svld1rq_s32 (p0, x0 + 28), ++ z0 = svld1rq (p0, x0 + 28)) ++ ++/* ++** ld1rq_s32_32: ++** add (x[0-9]+), x0, #?128 ++** ld1rqw z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_s32_32, svint32_t, int32_t, ++ z0 = svld1rq_s32 (p0, x0 + 32), ++ z0 = svld1rq (p0, x0 + 32)) ++ ++/* ++** ld1rq_s32_m1: ++** sub (x[0-9]+), x0, #?4 ++** ld1rqw z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_s32_m1, svint32_t, int32_t, ++ z0 = svld1rq_s32 (p0, x0 - 1), ++ z0 = svld1rq (p0, x0 - 1)) ++ ++/* ++** ld1rq_s32_m2: ++** sub (x[0-9]+), x0, #?8 ++** ld1rqw z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_s32_m2, svint32_t, int32_t, ++ z0 = svld1rq_s32 (p0, x0 - 2), ++ z0 = svld1rq (p0, x0 - 2)) ++ ++/* ++** ld1rq_s32_m3: ++** sub (x[0-9]+), x0, #?12 ++** ld1rqw z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_s32_m3, svint32_t, int32_t, ++ z0 = svld1rq_s32 (p0, x0 - 3), ++ z0 = svld1rq (p0, x0 - 3)) ++ ++/* ++** ld1rq_s32_m4: ++** ld1rqw z0\.s, p0/z, \[x0, #?-16\] ++** ret ++*/ ++TEST_LOAD (ld1rq_s32_m4, svint32_t, int32_t, ++ z0 = svld1rq_s32 (p0, x0 - 4), ++ z0 = svld1rq (p0, x0 - 4)) ++ ++/* ++** ld1rq_s32_m32: ++** ld1rqw z0\.s, p0/z, \[x0, #?-128\] ++** ret ++*/ ++TEST_LOAD (ld1rq_s32_m32, svint32_t, int32_t, ++ z0 = svld1rq_s32 (p0, x0 - 32), ++ z0 = svld1rq (p0, x0 - 32)) ++ ++/* ++** ld1rq_s32_m36: ++** sub (x[0-9]+), x0, #?144 ++** ld1rqw z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_s32_m36, svint32_t, int32_t, ++ z0 = svld1rq_s32 (p0, x0 - 36), ++ z0 = svld1rq (p0, x0 - 36)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_s64.c +new file mode 100644 +index 000000000..efc0e740f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_s64.c +@@ -0,0 +1,97 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1rq_s64_base: ++** ld1rqd z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1rq_s64_base, svint64_t, int64_t, ++ z0 = svld1rq_s64 (p0, x0), ++ z0 = svld1rq (p0, x0)) ++ ++/* ++** ld1rq_s64_index: ++** ld1rqd z0\.d, p0/z, \[x0, x1, lsl 3\] ++** ret ++*/ ++TEST_LOAD (ld1rq_s64_index, svint64_t, int64_t, ++ z0 = svld1rq_s64 (p0, x0 + x1), ++ z0 = svld1rq (p0, x0 + x1)) ++ ++/* ++** ld1rq_s64_1: ++** add (x[0-9]+), x0, #?8 ++** ld1rqd z0\.d, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_s64_1, svint64_t, int64_t, ++ z0 = svld1rq_s64 (p0, x0 + 1), ++ z0 = svld1rq (p0, x0 + 1)) ++ ++/* ++** ld1rq_s64_2: ++** ld1rqd z0\.d, p0/z, \[x0, #?16\] ++** ret ++*/ ++TEST_LOAD (ld1rq_s64_2, svint64_t, int64_t, ++ z0 = svld1rq_s64 (p0, x0 + 2), ++ z0 = svld1rq (p0, x0 + 2)) ++ ++/* ++** ld1rq_s64_14: ++** ld1rqd z0\.d, p0/z, \[x0, #?112\] ++** ret ++*/ ++TEST_LOAD (ld1rq_s64_14, svint64_t, int64_t, ++ z0 = svld1rq_s64 (p0, x0 + 14), ++ z0 = svld1rq (p0, x0 + 14)) ++ ++/* ++** ld1rq_s64_16: ++** add (x[0-9]+), x0, #?128 ++** ld1rqd z0\.d, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_s64_16, svint64_t, int64_t, ++ z0 = svld1rq_s64 (p0, x0 + 16), ++ z0 = svld1rq (p0, x0 + 16)) ++ ++/* ++** ld1rq_s64_m1: ++** sub (x[0-9]+), x0, #?8 ++** ld1rqd z0\.d, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_s64_m1, svint64_t, int64_t, ++ z0 = svld1rq_s64 (p0, x0 - 1), ++ z0 = svld1rq (p0, x0 - 1)) ++ ++/* ++** ld1rq_s64_m2: ++** ld1rqd z0\.d, p0/z, \[x0, #?-16\] ++** ret ++*/ ++TEST_LOAD (ld1rq_s64_m2, svint64_t, int64_t, ++ z0 = svld1rq_s64 (p0, x0 - 2), ++ z0 = svld1rq (p0, x0 - 2)) ++ ++/* ++** ld1rq_s64_m16: ++** ld1rqd z0\.d, p0/z, \[x0, #?-128\] ++** ret ++*/ ++TEST_LOAD (ld1rq_s64_m16, svint64_t, int64_t, ++ z0 = svld1rq_s64 (p0, x0 - 16), ++ z0 = svld1rq (p0, x0 - 16)) ++ ++/* ++** ld1rq_s64_m18: ++** sub (x[0-9]+), x0, #?144 ++** ld1rqd z0\.d, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_s64_m18, svint64_t, int64_t, ++ z0 = svld1rq_s64 (p0, x0 - 18), ++ z0 = svld1rq (p0, x0 - 18)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_s8.c +new file mode 100644 +index 000000000..e183e472f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_s8.c +@@ -0,0 +1,137 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1rq_s8_base: ++** ld1rqb z0\.b, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1rq_s8_base, svint8_t, int8_t, ++ z0 = svld1rq_s8 (p0, x0), ++ z0 = svld1rq (p0, x0)) ++ ++/* ++** ld1rq_s8_index: ++** ld1rqb z0\.b, p0/z, \[x0, x1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_s8_index, svint8_t, int8_t, ++ z0 = svld1rq_s8 (p0, x0 + x1), ++ z0 = svld1rq (p0, x0 + x1)) ++ ++/* ++** ld1rq_s8_1: ++** add (x[0-9]+), x0, #?1 ++** ld1rqb z0\.b, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_s8_1, svint8_t, int8_t, ++ z0 = svld1rq_s8 (p0, x0 + 1), ++ z0 = svld1rq (p0, x0 + 1)) ++ ++/* ++** ld1rq_s8_8: ++** add (x[0-9]+), x0, #?8 ++** ld1rqb z0\.b, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_s8_8, svint8_t, int8_t, ++ z0 = svld1rq_s8 (p0, x0 + 8), ++ z0 = svld1rq (p0, x0 + 8)) ++ ++/* ++** ld1rq_s8_15: ++** add (x[0-9]+), x0, #?15 ++** ld1rqb z0\.b, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_s8_15, svint8_t, int8_t, ++ z0 = svld1rq_s8 (p0, x0 + 15), ++ z0 = svld1rq (p0, x0 + 15)) ++ ++/* ++** ld1rq_s8_16: ++** ld1rqb z0\.b, p0/z, \[x0, #?16\] ++** ret ++*/ ++TEST_LOAD (ld1rq_s8_16, svint8_t, int8_t, ++ z0 = svld1rq_s8 (p0, x0 + 16), ++ z0 = svld1rq (p0, x0 + 16)) ++ ++/* ++** ld1rq_s8_112: ++** ld1rqb z0\.b, p0/z, \[x0, #?112\] ++** ret ++*/ ++TEST_LOAD (ld1rq_s8_112, svint8_t, int8_t, ++ z0 = svld1rq_s8 (p0, x0 + 112), ++ z0 = svld1rq (p0, x0 + 112)) ++ ++/* ++** ld1rq_s8_128: ++** add (x[0-9]+), x0, #?128 ++** ld1rqb z0\.b, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_s8_128, svint8_t, int8_t, ++ z0 = svld1rq_s8 (p0, x0 + 128), ++ z0 = svld1rq (p0, x0 + 128)) ++ ++/* ++** ld1rq_s8_m1: ++** sub (x[0-9]+), x0, #?1 ++** ld1rqb z0\.b, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_s8_m1, svint8_t, int8_t, ++ z0 = svld1rq_s8 (p0, x0 - 1), ++ z0 = svld1rq (p0, x0 - 1)) ++ ++/* ++** ld1rq_s8_m8: ++** sub (x[0-9]+), x0, #?8 ++** ld1rqb z0\.b, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_s8_m8, svint8_t, int8_t, ++ z0 = svld1rq_s8 (p0, x0 - 8), ++ z0 = svld1rq (p0, x0 - 8)) ++ ++/* ++** ld1rq_s8_m15: ++** sub (x[0-9]+), x0, #?15 ++** ld1rqb z0\.b, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_s8_m15, svint8_t, int8_t, ++ z0 = svld1rq_s8 (p0, x0 - 15), ++ z0 = svld1rq (p0, x0 - 15)) ++ ++/* ++** ld1rq_s8_m16: ++** ld1rqb z0\.b, p0/z, \[x0, #?-16\] ++** ret ++*/ ++TEST_LOAD (ld1rq_s8_m16, svint8_t, int8_t, ++ z0 = svld1rq_s8 (p0, x0 - 16), ++ z0 = svld1rq (p0, x0 - 16)) ++ ++/* ++** ld1rq_s8_m128: ++** ld1rqb z0\.b, p0/z, \[x0, #?-128\] ++** ret ++*/ ++TEST_LOAD (ld1rq_s8_m128, svint8_t, int8_t, ++ z0 = svld1rq_s8 (p0, x0 - 128), ++ z0 = svld1rq (p0, x0 - 128)) ++ ++/* ++** ld1rq_s8_m144: ++** sub (x[0-9]+), x0, #?144 ++** ld1rqb z0\.b, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_s8_m144, svint8_t, int8_t, ++ z0 = svld1rq_s8 (p0, x0 - 144), ++ z0 = svld1rq (p0, x0 - 144)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_u16.c +new file mode 100644 +index 000000000..c24ab680a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_u16.c +@@ -0,0 +1,137 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1rq_u16_base: ++** ld1rqh z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1rq_u16_base, svuint16_t, uint16_t, ++ z0 = svld1rq_u16 (p0, x0), ++ z0 = svld1rq (p0, x0)) ++ ++/* ++** ld1rq_u16_index: ++** ld1rqh z0\.h, p0/z, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_u16_index, svuint16_t, uint16_t, ++ z0 = svld1rq_u16 (p0, x0 + x1), ++ z0 = svld1rq (p0, x0 + x1)) ++ ++/* ++** ld1rq_u16_1: ++** add (x[0-9]+), x0, #?2 ++** ld1rqh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_u16_1, svuint16_t, uint16_t, ++ z0 = svld1rq_u16 (p0, x0 + 1), ++ z0 = svld1rq (p0, x0 + 1)) ++ ++/* ++** ld1rq_u16_4: ++** add (x[0-9]+), x0, #?8 ++** ld1rqh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_u16_4, svuint16_t, uint16_t, ++ z0 = svld1rq_u16 (p0, x0 + 4), ++ z0 = svld1rq (p0, x0 + 4)) ++ ++/* ++** ld1rq_u16_7: ++** add (x[0-9]+), x0, #?14 ++** ld1rqh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_u16_7, svuint16_t, uint16_t, ++ z0 = svld1rq_u16 (p0, x0 + 7), ++ z0 = svld1rq (p0, x0 + 7)) ++ ++/* ++** ld1rq_u16_8: ++** ld1rqh z0\.h, p0/z, \[x0, #?16\] ++** ret ++*/ ++TEST_LOAD (ld1rq_u16_8, svuint16_t, uint16_t, ++ z0 = svld1rq_u16 (p0, x0 + 8), ++ z0 = svld1rq (p0, x0 + 8)) ++ ++/* ++** ld1rq_u16_56: ++** ld1rqh z0\.h, p0/z, \[x0, #?112\] ++** ret ++*/ ++TEST_LOAD (ld1rq_u16_56, svuint16_t, uint16_t, ++ z0 = svld1rq_u16 (p0, x0 + 56), ++ z0 = svld1rq (p0, x0 + 56)) ++ ++/* ++** ld1rq_u16_64: ++** add (x[0-9]+), x0, #?128 ++** ld1rqh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_u16_64, svuint16_t, uint16_t, ++ z0 = svld1rq_u16 (p0, x0 + 64), ++ z0 = svld1rq (p0, x0 + 64)) ++ ++/* ++** ld1rq_u16_m1: ++** sub (x[0-9]+), x0, #?2 ++** ld1rqh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_u16_m1, svuint16_t, uint16_t, ++ z0 = svld1rq_u16 (p0, x0 - 1), ++ z0 = svld1rq (p0, x0 - 1)) ++ ++/* ++** ld1rq_u16_m4: ++** sub (x[0-9]+), x0, #?8 ++** ld1rqh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_u16_m4, svuint16_t, uint16_t, ++ z0 = svld1rq_u16 (p0, x0 - 4), ++ z0 = svld1rq (p0, x0 - 4)) ++ ++/* ++** ld1rq_u16_m7: ++** sub (x[0-9]+), x0, #?14 ++** ld1rqh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_u16_m7, svuint16_t, uint16_t, ++ z0 = svld1rq_u16 (p0, x0 - 7), ++ z0 = svld1rq (p0, x0 - 7)) ++ ++/* ++** ld1rq_u16_m8: ++** ld1rqh z0\.h, p0/z, \[x0, #?-16\] ++** ret ++*/ ++TEST_LOAD (ld1rq_u16_m8, svuint16_t, uint16_t, ++ z0 = svld1rq_u16 (p0, x0 - 8), ++ z0 = svld1rq (p0, x0 - 8)) ++ ++/* ++** ld1rq_u16_m64: ++** ld1rqh z0\.h, p0/z, \[x0, #?-128\] ++** ret ++*/ ++TEST_LOAD (ld1rq_u16_m64, svuint16_t, uint16_t, ++ z0 = svld1rq_u16 (p0, x0 - 64), ++ z0 = svld1rq (p0, x0 - 64)) ++ ++/* ++** ld1rq_u16_m72: ++** sub (x[0-9]+), x0, #?144 ++** ld1rqh z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_u16_m72, svuint16_t, uint16_t, ++ z0 = svld1rq_u16 (p0, x0 - 72), ++ z0 = svld1rq (p0, x0 - 72)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_u32.c +new file mode 100644 +index 000000000..722e34db3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_u32.c +@@ -0,0 +1,137 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1rq_u32_base: ++** ld1rqw z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1rq_u32_base, svuint32_t, uint32_t, ++ z0 = svld1rq_u32 (p0, x0), ++ z0 = svld1rq (p0, x0)) ++ ++/* ++** ld1rq_u32_index: ++** ld1rqw z0\.s, p0/z, \[x0, x1, lsl 2\] ++** ret ++*/ ++TEST_LOAD (ld1rq_u32_index, svuint32_t, uint32_t, ++ z0 = svld1rq_u32 (p0, x0 + x1), ++ z0 = svld1rq (p0, x0 + x1)) ++ ++/* ++** ld1rq_u32_1: ++** add (x[0-9]+), x0, #?4 ++** ld1rqw z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_u32_1, svuint32_t, uint32_t, ++ z0 = svld1rq_u32 (p0, x0 + 1), ++ z0 = svld1rq (p0, x0 + 1)) ++ ++/* ++** ld1rq_u32_2: ++** add (x[0-9]+), x0, #?8 ++** ld1rqw z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_u32_2, svuint32_t, uint32_t, ++ z0 = svld1rq_u32 (p0, x0 + 2), ++ z0 = svld1rq (p0, x0 + 2)) ++ ++/* ++** ld1rq_u32_3: ++** add (x[0-9]+), x0, #?12 ++** ld1rqw z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_u32_3, svuint32_t, uint32_t, ++ z0 = svld1rq_u32 (p0, x0 + 3), ++ z0 = svld1rq (p0, x0 + 3)) ++ ++/* ++** ld1rq_u32_4: ++** ld1rqw z0\.s, p0/z, \[x0, #?16\] ++** ret ++*/ ++TEST_LOAD (ld1rq_u32_4, svuint32_t, uint32_t, ++ z0 = svld1rq_u32 (p0, x0 + 4), ++ z0 = svld1rq (p0, x0 + 4)) ++ ++/* ++** ld1rq_u32_28: ++** ld1rqw z0\.s, p0/z, \[x0, #?112\] ++** ret ++*/ ++TEST_LOAD (ld1rq_u32_28, svuint32_t, uint32_t, ++ z0 = svld1rq_u32 (p0, x0 + 28), ++ z0 = svld1rq (p0, x0 + 28)) ++ ++/* ++** ld1rq_u32_32: ++** add (x[0-9]+), x0, #?128 ++** ld1rqw z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_u32_32, svuint32_t, uint32_t, ++ z0 = svld1rq_u32 (p0, x0 + 32), ++ z0 = svld1rq (p0, x0 + 32)) ++ ++/* ++** ld1rq_u32_m1: ++** sub (x[0-9]+), x0, #?4 ++** ld1rqw z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_u32_m1, svuint32_t, uint32_t, ++ z0 = svld1rq_u32 (p0, x0 - 1), ++ z0 = svld1rq (p0, x0 - 1)) ++ ++/* ++** ld1rq_u32_m2: ++** sub (x[0-9]+), x0, #?8 ++** ld1rqw z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_u32_m2, svuint32_t, uint32_t, ++ z0 = svld1rq_u32 (p0, x0 - 2), ++ z0 = svld1rq (p0, x0 - 2)) ++ ++/* ++** ld1rq_u32_m3: ++** sub (x[0-9]+), x0, #?12 ++** ld1rqw z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_u32_m3, svuint32_t, uint32_t, ++ z0 = svld1rq_u32 (p0, x0 - 3), ++ z0 = svld1rq (p0, x0 - 3)) ++ ++/* ++** ld1rq_u32_m4: ++** ld1rqw z0\.s, p0/z, \[x0, #?-16\] ++** ret ++*/ ++TEST_LOAD (ld1rq_u32_m4, svuint32_t, uint32_t, ++ z0 = svld1rq_u32 (p0, x0 - 4), ++ z0 = svld1rq (p0, x0 - 4)) ++ ++/* ++** ld1rq_u32_m32: ++** ld1rqw z0\.s, p0/z, \[x0, #?-128\] ++** ret ++*/ ++TEST_LOAD (ld1rq_u32_m32, svuint32_t, uint32_t, ++ z0 = svld1rq_u32 (p0, x0 - 32), ++ z0 = svld1rq (p0, x0 - 32)) ++ ++/* ++** ld1rq_u32_m36: ++** sub (x[0-9]+), x0, #?144 ++** ld1rqw z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_u32_m36, svuint32_t, uint32_t, ++ z0 = svld1rq_u32 (p0, x0 - 36), ++ z0 = svld1rq (p0, x0 - 36)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_u64.c +new file mode 100644 +index 000000000..a116b7fd9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_u64.c +@@ -0,0 +1,97 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1rq_u64_base: ++** ld1rqd z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1rq_u64_base, svuint64_t, uint64_t, ++ z0 = svld1rq_u64 (p0, x0), ++ z0 = svld1rq (p0, x0)) ++ ++/* ++** ld1rq_u64_index: ++** ld1rqd z0\.d, p0/z, \[x0, x1, lsl 3\] ++** ret ++*/ ++TEST_LOAD (ld1rq_u64_index, svuint64_t, uint64_t, ++ z0 = svld1rq_u64 (p0, x0 + x1), ++ z0 = svld1rq (p0, x0 + x1)) ++ ++/* ++** ld1rq_u64_1: ++** add (x[0-9]+), x0, #?8 ++** ld1rqd z0\.d, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_u64_1, svuint64_t, uint64_t, ++ z0 = svld1rq_u64 (p0, x0 + 1), ++ z0 = svld1rq (p0, x0 + 1)) ++ ++/* ++** ld1rq_u64_2: ++** ld1rqd z0\.d, p0/z, \[x0, #?16\] ++** ret ++*/ ++TEST_LOAD (ld1rq_u64_2, svuint64_t, uint64_t, ++ z0 = svld1rq_u64 (p0, x0 + 2), ++ z0 = svld1rq (p0, x0 + 2)) ++ ++/* ++** ld1rq_u64_14: ++** ld1rqd z0\.d, p0/z, \[x0, #?112\] ++** ret ++*/ ++TEST_LOAD (ld1rq_u64_14, svuint64_t, uint64_t, ++ z0 = svld1rq_u64 (p0, x0 + 14), ++ z0 = svld1rq (p0, x0 + 14)) ++ ++/* ++** ld1rq_u64_16: ++** add (x[0-9]+), x0, #?128 ++** ld1rqd z0\.d, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_u64_16, svuint64_t, uint64_t, ++ z0 = svld1rq_u64 (p0, x0 + 16), ++ z0 = svld1rq (p0, x0 + 16)) ++ ++/* ++** ld1rq_u64_m1: ++** sub (x[0-9]+), x0, #?8 ++** ld1rqd z0\.d, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_u64_m1, svuint64_t, uint64_t, ++ z0 = svld1rq_u64 (p0, x0 - 1), ++ z0 = svld1rq (p0, x0 - 1)) ++ ++/* ++** ld1rq_u64_m2: ++** ld1rqd z0\.d, p0/z, \[x0, #?-16\] ++** ret ++*/ ++TEST_LOAD (ld1rq_u64_m2, svuint64_t, uint64_t, ++ z0 = svld1rq_u64 (p0, x0 - 2), ++ z0 = svld1rq (p0, x0 - 2)) ++ ++/* ++** ld1rq_u64_m16: ++** ld1rqd z0\.d, p0/z, \[x0, #?-128\] ++** ret ++*/ ++TEST_LOAD (ld1rq_u64_m16, svuint64_t, uint64_t, ++ z0 = svld1rq_u64 (p0, x0 - 16), ++ z0 = svld1rq (p0, x0 - 16)) ++ ++/* ++** ld1rq_u64_m18: ++** sub (x[0-9]+), x0, #?144 ++** ld1rqd z0\.d, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_u64_m18, svuint64_t, uint64_t, ++ z0 = svld1rq_u64 (p0, x0 - 18), ++ z0 = svld1rq (p0, x0 - 18)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_u8.c +new file mode 100644 +index 000000000..74b72530e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_u8.c +@@ -0,0 +1,137 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1rq_u8_base: ++** ld1rqb z0\.b, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1rq_u8_base, svuint8_t, uint8_t, ++ z0 = svld1rq_u8 (p0, x0), ++ z0 = svld1rq (p0, x0)) ++ ++/* ++** ld1rq_u8_index: ++** ld1rqb z0\.b, p0/z, \[x0, x1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_u8_index, svuint8_t, uint8_t, ++ z0 = svld1rq_u8 (p0, x0 + x1), ++ z0 = svld1rq (p0, x0 + x1)) ++ ++/* ++** ld1rq_u8_1: ++** add (x[0-9]+), x0, #?1 ++** ld1rqb z0\.b, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_u8_1, svuint8_t, uint8_t, ++ z0 = svld1rq_u8 (p0, x0 + 1), ++ z0 = svld1rq (p0, x0 + 1)) ++ ++/* ++** ld1rq_u8_8: ++** add (x[0-9]+), x0, #?8 ++** ld1rqb z0\.b, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_u8_8, svuint8_t, uint8_t, ++ z0 = svld1rq_u8 (p0, x0 + 8), ++ z0 = svld1rq (p0, x0 + 8)) ++ ++/* ++** ld1rq_u8_15: ++** add (x[0-9]+), x0, #?15 ++** ld1rqb z0\.b, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_u8_15, svuint8_t, uint8_t, ++ z0 = svld1rq_u8 (p0, x0 + 15), ++ z0 = svld1rq (p0, x0 + 15)) ++ ++/* ++** ld1rq_u8_16: ++** ld1rqb z0\.b, p0/z, \[x0, #?16\] ++** ret ++*/ ++TEST_LOAD (ld1rq_u8_16, svuint8_t, uint8_t, ++ z0 = svld1rq_u8 (p0, x0 + 16), ++ z0 = svld1rq (p0, x0 + 16)) ++ ++/* ++** ld1rq_u8_112: ++** ld1rqb z0\.b, p0/z, \[x0, #?112\] ++** ret ++*/ ++TEST_LOAD (ld1rq_u8_112, svuint8_t, uint8_t, ++ z0 = svld1rq_u8 (p0, x0 + 112), ++ z0 = svld1rq (p0, x0 + 112)) ++ ++/* ++** ld1rq_u8_128: ++** add (x[0-9]+), x0, #?128 ++** ld1rqb z0\.b, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_u8_128, svuint8_t, uint8_t, ++ z0 = svld1rq_u8 (p0, x0 + 128), ++ z0 = svld1rq (p0, x0 + 128)) ++ ++/* ++** ld1rq_u8_m1: ++** sub (x[0-9]+), x0, #?1 ++** ld1rqb z0\.b, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_u8_m1, svuint8_t, uint8_t, ++ z0 = svld1rq_u8 (p0, x0 - 1), ++ z0 = svld1rq (p0, x0 - 1)) ++ ++/* ++** ld1rq_u8_m8: ++** sub (x[0-9]+), x0, #?8 ++** ld1rqb z0\.b, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_u8_m8, svuint8_t, uint8_t, ++ z0 = svld1rq_u8 (p0, x0 - 8), ++ z0 = svld1rq (p0, x0 - 8)) ++ ++/* ++** ld1rq_u8_m15: ++** sub (x[0-9]+), x0, #?15 ++** ld1rqb z0\.b, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_u8_m15, svuint8_t, uint8_t, ++ z0 = svld1rq_u8 (p0, x0 - 15), ++ z0 = svld1rq (p0, x0 - 15)) ++ ++/* ++** ld1rq_u8_m16: ++** ld1rqb z0\.b, p0/z, \[x0, #?-16\] ++** ret ++*/ ++TEST_LOAD (ld1rq_u8_m16, svuint8_t, uint8_t, ++ z0 = svld1rq_u8 (p0, x0 - 16), ++ z0 = svld1rq (p0, x0 - 16)) ++ ++/* ++** ld1rq_u8_m128: ++** ld1rqb z0\.b, p0/z, \[x0, #?-128\] ++** ret ++*/ ++TEST_LOAD (ld1rq_u8_m128, svuint8_t, uint8_t, ++ z0 = svld1rq_u8 (p0, x0 - 128), ++ z0 = svld1rq (p0, x0 - 128)) ++ ++/* ++** ld1rq_u8_m144: ++** sub (x[0-9]+), x0, #?144 ++** ld1rqb z0\.b, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld1rq_u8_m144, svuint8_t, uint8_t, ++ z0 = svld1rq_u8 (p0, x0 - 144), ++ z0 = svld1rq (p0, x0 - 144)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_s32.c +new file mode 100644 +index 000000000..16a5316a9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_s32.c +@@ -0,0 +1,131 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1sb_gather_s32_tied1: ++** ld1sb z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sb_gather_s32_tied1, svint32_t, svuint32_t, ++ z0_res = svld1sb_gather_u32base_s32 (p0, z0), ++ z0_res = svld1sb_gather_s32 (p0, z0)) ++ ++/* ++** ld1sb_gather_s32_untied: ++** ld1sb z0\.s, p0/z, \[z1\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sb_gather_s32_untied, svint32_t, svuint32_t, ++ z0_res = svld1sb_gather_u32base_s32 (p0, z1), ++ z0_res = svld1sb_gather_s32 (p0, z1)) ++ ++/* ++** ld1sb_gather_x0_s32_offset: ++** ld1sb z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sb_gather_x0_s32_offset, svint32_t, svuint32_t, ++ z0_res = svld1sb_gather_u32base_offset_s32 (p0, z0, x0), ++ z0_res = svld1sb_gather_offset_s32 (p0, z0, x0)) ++ ++/* ++** ld1sb_gather_m1_s32_offset: ++** mov (x[0-9]+), #?-1 ++** ld1sb z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sb_gather_m1_s32_offset, svint32_t, svuint32_t, ++ z0_res = svld1sb_gather_u32base_offset_s32 (p0, z0, -1), ++ z0_res = svld1sb_gather_offset_s32 (p0, z0, -1)) ++ ++/* ++** ld1sb_gather_0_s32_offset: ++** ld1sb z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sb_gather_0_s32_offset, svint32_t, svuint32_t, ++ z0_res = svld1sb_gather_u32base_offset_s32 (p0, z0, 0), ++ z0_res = svld1sb_gather_offset_s32 (p0, z0, 0)) ++ ++/* ++** ld1sb_gather_5_s32_offset: ++** ld1sb z0\.s, p0/z, \[z0\.s, #5\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sb_gather_5_s32_offset, svint32_t, svuint32_t, ++ z0_res = svld1sb_gather_u32base_offset_s32 (p0, z0, 5), ++ z0_res = svld1sb_gather_offset_s32 (p0, z0, 5)) ++ ++/* ++** ld1sb_gather_31_s32_offset: ++** ld1sb z0\.s, p0/z, \[z0\.s, #31\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sb_gather_31_s32_offset, svint32_t, svuint32_t, ++ z0_res = svld1sb_gather_u32base_offset_s32 (p0, z0, 31), ++ z0_res = svld1sb_gather_offset_s32 (p0, z0, 31)) ++ ++/* ++** ld1sb_gather_32_s32_offset: ++** mov (x[0-9]+), #?32 ++** ld1sb z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sb_gather_32_s32_offset, svint32_t, svuint32_t, ++ z0_res = svld1sb_gather_u32base_offset_s32 (p0, z0, 32), ++ z0_res = svld1sb_gather_offset_s32 (p0, z0, 32)) ++ ++/* ++** ld1sb_gather_x0_s32_s32offset: ++** ld1sb z0\.s, p0/z, \[x0, z0\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sb_gather_x0_s32_s32offset, svint32_t, int8_t, svint32_t, ++ z0_res = svld1sb_gather_s32offset_s32 (p0, x0, z0), ++ z0_res = svld1sb_gather_offset_s32 (p0, x0, z0)) ++ ++/* ++** ld1sb_gather_tied1_s32_s32offset: ++** ld1sb z0\.s, p0/z, \[x0, z0\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sb_gather_tied1_s32_s32offset, svint32_t, int8_t, svint32_t, ++ z0_res = svld1sb_gather_s32offset_s32 (p0, x0, z0), ++ z0_res = svld1sb_gather_offset_s32 (p0, x0, z0)) ++ ++/* ++** ld1sb_gather_untied_s32_s32offset: ++** ld1sb z0\.s, p0/z, \[x0, z1\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sb_gather_untied_s32_s32offset, svint32_t, int8_t, svint32_t, ++ z0_res = svld1sb_gather_s32offset_s32 (p0, x0, z1), ++ z0_res = svld1sb_gather_offset_s32 (p0, x0, z1)) ++ ++/* ++** ld1sb_gather_x0_s32_u32offset: ++** ld1sb z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sb_gather_x0_s32_u32offset, svint32_t, int8_t, svuint32_t, ++ z0_res = svld1sb_gather_u32offset_s32 (p0, x0, z0), ++ z0_res = svld1sb_gather_offset_s32 (p0, x0, z0)) ++ ++/* ++** ld1sb_gather_tied1_s32_u32offset: ++** ld1sb z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sb_gather_tied1_s32_u32offset, svint32_t, int8_t, svuint32_t, ++ z0_res = svld1sb_gather_u32offset_s32 (p0, x0, z0), ++ z0_res = svld1sb_gather_offset_s32 (p0, x0, z0)) ++ ++/* ++** ld1sb_gather_untied_s32_u32offset: ++** ld1sb z0\.s, p0/z, \[x0, z1\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sb_gather_untied_s32_u32offset, svint32_t, int8_t, svuint32_t, ++ z0_res = svld1sb_gather_u32offset_s32 (p0, x0, z1), ++ z0_res = svld1sb_gather_offset_s32 (p0, x0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_s64.c +new file mode 100644 +index 000000000..3f953247e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_s64.c +@@ -0,0 +1,149 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1sb_gather_s64_tied1: ++** ld1sb z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sb_gather_s64_tied1, svint64_t, svuint64_t, ++ z0_res = svld1sb_gather_u64base_s64 (p0, z0), ++ z0_res = svld1sb_gather_s64 (p0, z0)) ++ ++/* ++** ld1sb_gather_s64_untied: ++** ld1sb z0\.d, p0/z, \[z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sb_gather_s64_untied, svint64_t, svuint64_t, ++ z0_res = svld1sb_gather_u64base_s64 (p0, z1), ++ z0_res = svld1sb_gather_s64 (p0, z1)) ++ ++/* ++** ld1sb_gather_x0_s64_offset: ++** ld1sb z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sb_gather_x0_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1sb_gather_u64base_offset_s64 (p0, z0, x0), ++ z0_res = svld1sb_gather_offset_s64 (p0, z0, x0)) ++ ++/* ++** ld1sb_gather_m1_s64_offset: ++** mov (x[0-9]+), #?-1 ++** ld1sb z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sb_gather_m1_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1sb_gather_u64base_offset_s64 (p0, z0, -1), ++ z0_res = svld1sb_gather_offset_s64 (p0, z0, -1)) ++ ++/* ++** ld1sb_gather_0_s64_offset: ++** ld1sb z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sb_gather_0_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1sb_gather_u64base_offset_s64 (p0, z0, 0), ++ z0_res = svld1sb_gather_offset_s64 (p0, z0, 0)) ++ ++/* ++** ld1sb_gather_5_s64_offset: ++** ld1sb z0\.d, p0/z, \[z0\.d, #5\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sb_gather_5_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1sb_gather_u64base_offset_s64 (p0, z0, 5), ++ z0_res = svld1sb_gather_offset_s64 (p0, z0, 5)) ++ ++/* ++** ld1sb_gather_31_s64_offset: ++** ld1sb z0\.d, p0/z, \[z0\.d, #31\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sb_gather_31_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1sb_gather_u64base_offset_s64 (p0, z0, 31), ++ z0_res = svld1sb_gather_offset_s64 (p0, z0, 31)) ++ ++/* ++** ld1sb_gather_32_s64_offset: ++** mov (x[0-9]+), #?32 ++** ld1sb z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sb_gather_32_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1sb_gather_u64base_offset_s64 (p0, z0, 32), ++ z0_res = svld1sb_gather_offset_s64 (p0, z0, 32)) ++ ++/* ++** ld1sb_gather_x0_s64_s64offset: ++** ld1sb z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sb_gather_x0_s64_s64offset, svint64_t, int8_t, svint64_t, ++ z0_res = svld1sb_gather_s64offset_s64 (p0, x0, z0), ++ z0_res = svld1sb_gather_offset_s64 (p0, x0, z0)) ++ ++/* ++** ld1sb_gather_tied1_s64_s64offset: ++** ld1sb z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sb_gather_tied1_s64_s64offset, svint64_t, int8_t, svint64_t, ++ z0_res = svld1sb_gather_s64offset_s64 (p0, x0, z0), ++ z0_res = svld1sb_gather_offset_s64 (p0, x0, z0)) ++ ++/* ++** ld1sb_gather_untied_s64_s64offset: ++** ld1sb z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sb_gather_untied_s64_s64offset, svint64_t, int8_t, svint64_t, ++ z0_res = svld1sb_gather_s64offset_s64 (p0, x0, z1), ++ z0_res = svld1sb_gather_offset_s64 (p0, x0, z1)) ++ ++/* ++** ld1sb_gather_ext_s64_s64offset: ++** ld1sb z0\.d, p0/z, \[x0, z1\.d, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sb_gather_ext_s64_s64offset, svint64_t, int8_t, svint64_t, ++ z0_res = svld1sb_gather_s64offset_s64 (p0, x0, svextw_s64_x (p0, z1)), ++ z0_res = svld1sb_gather_offset_s64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ld1sb_gather_x0_s64_u64offset: ++** ld1sb z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sb_gather_x0_s64_u64offset, svint64_t, int8_t, svuint64_t, ++ z0_res = svld1sb_gather_u64offset_s64 (p0, x0, z0), ++ z0_res = svld1sb_gather_offset_s64 (p0, x0, z0)) ++ ++/* ++** ld1sb_gather_tied1_s64_u64offset: ++** ld1sb z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sb_gather_tied1_s64_u64offset, svint64_t, int8_t, svuint64_t, ++ z0_res = svld1sb_gather_u64offset_s64 (p0, x0, z0), ++ z0_res = svld1sb_gather_offset_s64 (p0, x0, z0)) ++ ++/* ++** ld1sb_gather_untied_s64_u64offset: ++** ld1sb z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sb_gather_untied_s64_u64offset, svint64_t, int8_t, svuint64_t, ++ z0_res = svld1sb_gather_u64offset_s64 (p0, x0, z1), ++ z0_res = svld1sb_gather_offset_s64 (p0, x0, z1)) ++ ++/* ++** ld1sb_gather_ext_s64_u64offset: ++** ld1sb z0\.d, p0/z, \[x0, z1\.d, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sb_gather_ext_s64_u64offset, svint64_t, int8_t, svuint64_t, ++ z0_res = svld1sb_gather_u64offset_s64 (p0, x0, svextw_u64_x (p0, z1)), ++ z0_res = svld1sb_gather_offset_s64 (p0, x0, svextw_x (p0, z1))) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_u32.c +new file mode 100644 +index 000000000..424de65a6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_u32.c +@@ -0,0 +1,131 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1sb_gather_u32_tied1: ++** ld1sb z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sb_gather_u32_tied1, svuint32_t, svuint32_t, ++ z0_res = svld1sb_gather_u32base_u32 (p0, z0), ++ z0_res = svld1sb_gather_u32 (p0, z0)) ++ ++/* ++** ld1sb_gather_u32_untied: ++** ld1sb z0\.s, p0/z, \[z1\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sb_gather_u32_untied, svuint32_t, svuint32_t, ++ z0_res = svld1sb_gather_u32base_u32 (p0, z1), ++ z0_res = svld1sb_gather_u32 (p0, z1)) ++ ++/* ++** ld1sb_gather_x0_u32_offset: ++** ld1sb z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sb_gather_x0_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svld1sb_gather_u32base_offset_u32 (p0, z0, x0), ++ z0_res = svld1sb_gather_offset_u32 (p0, z0, x0)) ++ ++/* ++** ld1sb_gather_m1_u32_offset: ++** mov (x[0-9]+), #?-1 ++** ld1sb z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sb_gather_m1_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svld1sb_gather_u32base_offset_u32 (p0, z0, -1), ++ z0_res = svld1sb_gather_offset_u32 (p0, z0, -1)) ++ ++/* ++** ld1sb_gather_0_u32_offset: ++** ld1sb z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sb_gather_0_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svld1sb_gather_u32base_offset_u32 (p0, z0, 0), ++ z0_res = svld1sb_gather_offset_u32 (p0, z0, 0)) ++ ++/* ++** ld1sb_gather_5_u32_offset: ++** ld1sb z0\.s, p0/z, \[z0\.s, #5\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sb_gather_5_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svld1sb_gather_u32base_offset_u32 (p0, z0, 5), ++ z0_res = svld1sb_gather_offset_u32 (p0, z0, 5)) ++ ++/* ++** ld1sb_gather_31_u32_offset: ++** ld1sb z0\.s, p0/z, \[z0\.s, #31\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sb_gather_31_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svld1sb_gather_u32base_offset_u32 (p0, z0, 31), ++ z0_res = svld1sb_gather_offset_u32 (p0, z0, 31)) ++ ++/* ++** ld1sb_gather_32_u32_offset: ++** mov (x[0-9]+), #?32 ++** ld1sb z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sb_gather_32_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svld1sb_gather_u32base_offset_u32 (p0, z0, 32), ++ z0_res = svld1sb_gather_offset_u32 (p0, z0, 32)) ++ ++/* ++** ld1sb_gather_x0_u32_s32offset: ++** ld1sb z0\.s, p0/z, \[x0, z0\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sb_gather_x0_u32_s32offset, svuint32_t, int8_t, svint32_t, ++ z0_res = svld1sb_gather_s32offset_u32 (p0, x0, z0), ++ z0_res = svld1sb_gather_offset_u32 (p0, x0, z0)) ++ ++/* ++** ld1sb_gather_tied1_u32_s32offset: ++** ld1sb z0\.s, p0/z, \[x0, z0\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sb_gather_tied1_u32_s32offset, svuint32_t, int8_t, svint32_t, ++ z0_res = svld1sb_gather_s32offset_u32 (p0, x0, z0), ++ z0_res = svld1sb_gather_offset_u32 (p0, x0, z0)) ++ ++/* ++** ld1sb_gather_untied_u32_s32offset: ++** ld1sb z0\.s, p0/z, \[x0, z1\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sb_gather_untied_u32_s32offset, svuint32_t, int8_t, svint32_t, ++ z0_res = svld1sb_gather_s32offset_u32 (p0, x0, z1), ++ z0_res = svld1sb_gather_offset_u32 (p0, x0, z1)) ++ ++/* ++** ld1sb_gather_x0_u32_u32offset: ++** ld1sb z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sb_gather_x0_u32_u32offset, svuint32_t, int8_t, svuint32_t, ++ z0_res = svld1sb_gather_u32offset_u32 (p0, x0, z0), ++ z0_res = svld1sb_gather_offset_u32 (p0, x0, z0)) ++ ++/* ++** ld1sb_gather_tied1_u32_u32offset: ++** ld1sb z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sb_gather_tied1_u32_u32offset, svuint32_t, int8_t, svuint32_t, ++ z0_res = svld1sb_gather_u32offset_u32 (p0, x0, z0), ++ z0_res = svld1sb_gather_offset_u32 (p0, x0, z0)) ++ ++/* ++** ld1sb_gather_untied_u32_u32offset: ++** ld1sb z0\.s, p0/z, \[x0, z1\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sb_gather_untied_u32_u32offset, svuint32_t, int8_t, svuint32_t, ++ z0_res = svld1sb_gather_u32offset_u32 (p0, x0, z1), ++ z0_res = svld1sb_gather_offset_u32 (p0, x0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_u64.c +new file mode 100644 +index 000000000..aa375bea2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_u64.c +@@ -0,0 +1,149 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1sb_gather_u64_tied1: ++** ld1sb z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sb_gather_u64_tied1, svuint64_t, svuint64_t, ++ z0_res = svld1sb_gather_u64base_u64 (p0, z0), ++ z0_res = svld1sb_gather_u64 (p0, z0)) ++ ++/* ++** ld1sb_gather_u64_untied: ++** ld1sb z0\.d, p0/z, \[z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sb_gather_u64_untied, svuint64_t, svuint64_t, ++ z0_res = svld1sb_gather_u64base_u64 (p0, z1), ++ z0_res = svld1sb_gather_u64 (p0, z1)) ++ ++/* ++** ld1sb_gather_x0_u64_offset: ++** ld1sb z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sb_gather_x0_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1sb_gather_u64base_offset_u64 (p0, z0, x0), ++ z0_res = svld1sb_gather_offset_u64 (p0, z0, x0)) ++ ++/* ++** ld1sb_gather_m1_u64_offset: ++** mov (x[0-9]+), #?-1 ++** ld1sb z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sb_gather_m1_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1sb_gather_u64base_offset_u64 (p0, z0, -1), ++ z0_res = svld1sb_gather_offset_u64 (p0, z0, -1)) ++ ++/* ++** ld1sb_gather_0_u64_offset: ++** ld1sb z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sb_gather_0_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1sb_gather_u64base_offset_u64 (p0, z0, 0), ++ z0_res = svld1sb_gather_offset_u64 (p0, z0, 0)) ++ ++/* ++** ld1sb_gather_5_u64_offset: ++** ld1sb z0\.d, p0/z, \[z0\.d, #5\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sb_gather_5_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1sb_gather_u64base_offset_u64 (p0, z0, 5), ++ z0_res = svld1sb_gather_offset_u64 (p0, z0, 5)) ++ ++/* ++** ld1sb_gather_31_u64_offset: ++** ld1sb z0\.d, p0/z, \[z0\.d, #31\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sb_gather_31_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1sb_gather_u64base_offset_u64 (p0, z0, 31), ++ z0_res = svld1sb_gather_offset_u64 (p0, z0, 31)) ++ ++/* ++** ld1sb_gather_32_u64_offset: ++** mov (x[0-9]+), #?32 ++** ld1sb z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sb_gather_32_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1sb_gather_u64base_offset_u64 (p0, z0, 32), ++ z0_res = svld1sb_gather_offset_u64 (p0, z0, 32)) ++ ++/* ++** ld1sb_gather_x0_u64_s64offset: ++** ld1sb z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sb_gather_x0_u64_s64offset, svuint64_t, int8_t, svint64_t, ++ z0_res = svld1sb_gather_s64offset_u64 (p0, x0, z0), ++ z0_res = svld1sb_gather_offset_u64 (p0, x0, z0)) ++ ++/* ++** ld1sb_gather_tied1_u64_s64offset: ++** ld1sb z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sb_gather_tied1_u64_s64offset, svuint64_t, int8_t, svint64_t, ++ z0_res = svld1sb_gather_s64offset_u64 (p0, x0, z0), ++ z0_res = svld1sb_gather_offset_u64 (p0, x0, z0)) ++ ++/* ++** ld1sb_gather_untied_u64_s64offset: ++** ld1sb z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sb_gather_untied_u64_s64offset, svuint64_t, int8_t, svint64_t, ++ z0_res = svld1sb_gather_s64offset_u64 (p0, x0, z1), ++ z0_res = svld1sb_gather_offset_u64 (p0, x0, z1)) ++ ++/* ++** ld1sb_gather_ext_u64_s64offset: ++** ld1sb z0\.d, p0/z, \[x0, z1\.d, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sb_gather_ext_u64_s64offset, svuint64_t, int8_t, svint64_t, ++ z0_res = svld1sb_gather_s64offset_u64 (p0, x0, svextw_s64_x (p0, z1)), ++ z0_res = svld1sb_gather_offset_u64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ld1sb_gather_x0_u64_u64offset: ++** ld1sb z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sb_gather_x0_u64_u64offset, svuint64_t, int8_t, svuint64_t, ++ z0_res = svld1sb_gather_u64offset_u64 (p0, x0, z0), ++ z0_res = svld1sb_gather_offset_u64 (p0, x0, z0)) ++ ++/* ++** ld1sb_gather_tied1_u64_u64offset: ++** ld1sb z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sb_gather_tied1_u64_u64offset, svuint64_t, int8_t, svuint64_t, ++ z0_res = svld1sb_gather_u64offset_u64 (p0, x0, z0), ++ z0_res = svld1sb_gather_offset_u64 (p0, x0, z0)) ++ ++/* ++** ld1sb_gather_untied_u64_u64offset: ++** ld1sb z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sb_gather_untied_u64_u64offset, svuint64_t, int8_t, svuint64_t, ++ z0_res = svld1sb_gather_u64offset_u64 (p0, x0, z1), ++ z0_res = svld1sb_gather_offset_u64 (p0, x0, z1)) ++ ++/* ++** ld1sb_gather_ext_u64_u64offset: ++** ld1sb z0\.d, p0/z, \[x0, z1\.d, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sb_gather_ext_u64_u64offset, svuint64_t, int8_t, svuint64_t, ++ z0_res = svld1sb_gather_u64offset_u64 (p0, x0, svextw_u64_x (p0, z1)), ++ z0_res = svld1sb_gather_offset_u64 (p0, x0, svextw_x (p0, z1))) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_s16.c +new file mode 100644 +index 000000000..70a793c14 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_s16.c +@@ -0,0 +1,162 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1sb_s16_base: ++** ld1sb z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sb_s16_base, svint16_t, int8_t, ++ z0 = svld1sb_s16 (p0, x0), ++ z0 = svld1sb_s16 (p0, x0)) ++ ++/* ++** ld1sb_s16_index: ++** ld1sb z0\.h, p0/z, \[x0, x1\] ++** ret ++*/ ++TEST_LOAD (ld1sb_s16_index, svint16_t, int8_t, ++ z0 = svld1sb_s16 (p0, x0 + x1), ++ z0 = svld1sb_s16 (p0, x0 + x1)) ++ ++/* ++** ld1sb_s16_1: ++** ld1sb z0\.h, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sb_s16_1, svint16_t, int8_t, ++ z0 = svld1sb_s16 (p0, x0 + svcnth ()), ++ z0 = svld1sb_s16 (p0, x0 + svcnth ())) ++ ++/* ++** ld1sb_s16_7: ++** ld1sb z0\.h, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sb_s16_7, svint16_t, int8_t, ++ z0 = svld1sb_s16 (p0, x0 + svcnth () * 7), ++ z0 = svld1sb_s16 (p0, x0 + svcnth () * 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1sb_s16_8: ++** incb x0, all, mul #4 ++** ld1sb z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sb_s16_8, svint16_t, int8_t, ++ z0 = svld1sb_s16 (p0, x0 + svcnth () * 8), ++ z0 = svld1sb_s16 (p0, x0 + svcnth () * 8)) ++ ++/* ++** ld1sb_s16_m1: ++** ld1sb z0\.h, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sb_s16_m1, svint16_t, int8_t, ++ z0 = svld1sb_s16 (p0, x0 - svcnth ()), ++ z0 = svld1sb_s16 (p0, x0 - svcnth ())) ++ ++/* ++** ld1sb_s16_m8: ++** ld1sb z0\.h, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sb_s16_m8, svint16_t, int8_t, ++ z0 = svld1sb_s16 (p0, x0 - svcnth () * 8), ++ z0 = svld1sb_s16 (p0, x0 - svcnth () * 8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1sb_s16_m9: ++** dech x0, all, mul #9 ++** ld1sb z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sb_s16_m9, svint16_t, int8_t, ++ z0 = svld1sb_s16 (p0, x0 - svcnth () * 9), ++ z0 = svld1sb_s16 (p0, x0 - svcnth () * 9)) ++ ++/* ++** ld1sb_vnum_s16_0: ++** ld1sb z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sb_vnum_s16_0, svint16_t, int8_t, ++ z0 = svld1sb_vnum_s16 (p0, x0, 0), ++ z0 = svld1sb_vnum_s16 (p0, x0, 0)) ++ ++/* ++** ld1sb_vnum_s16_1: ++** ld1sb z0\.h, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sb_vnum_s16_1, svint16_t, int8_t, ++ z0 = svld1sb_vnum_s16 (p0, x0, 1), ++ z0 = svld1sb_vnum_s16 (p0, x0, 1)) ++ ++/* ++** ld1sb_vnum_s16_7: ++** ld1sb z0\.h, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sb_vnum_s16_7, svint16_t, int8_t, ++ z0 = svld1sb_vnum_s16 (p0, x0, 7), ++ z0 = svld1sb_vnum_s16 (p0, x0, 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1sb_vnum_s16_8: ++** incb x0, all, mul #4 ++** ld1sb z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sb_vnum_s16_8, svint16_t, int8_t, ++ z0 = svld1sb_vnum_s16 (p0, x0, 8), ++ z0 = svld1sb_vnum_s16 (p0, x0, 8)) ++ ++/* ++** ld1sb_vnum_s16_m1: ++** ld1sb z0\.h, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sb_vnum_s16_m1, svint16_t, int8_t, ++ z0 = svld1sb_vnum_s16 (p0, x0, -1), ++ z0 = svld1sb_vnum_s16 (p0, x0, -1)) ++ ++/* ++** ld1sb_vnum_s16_m8: ++** ld1sb z0\.h, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sb_vnum_s16_m8, svint16_t, int8_t, ++ z0 = svld1sb_vnum_s16 (p0, x0, -8), ++ z0 = svld1sb_vnum_s16 (p0, x0, -8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1sb_vnum_s16_m9: ++** dech x0, all, mul #9 ++** ld1sb z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sb_vnum_s16_m9, svint16_t, int8_t, ++ z0 = svld1sb_vnum_s16 (p0, x0, -9), ++ z0 = svld1sb_vnum_s16 (p0, x0, -9)) ++ ++/* ++** ld1sb_vnum_s16_x1: ++** cnth (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ld1sb z0\.h, p0/z, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** ld1sb z0\.h, p0/z, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_LOAD (ld1sb_vnum_s16_x1, svint16_t, int8_t, ++ z0 = svld1sb_vnum_s16 (p0, x0, x1), ++ z0 = svld1sb_vnum_s16 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_s32.c +new file mode 100644 +index 000000000..74b3a321b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_s32.c +@@ -0,0 +1,162 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1sb_s32_base: ++** ld1sb z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sb_s32_base, svint32_t, int8_t, ++ z0 = svld1sb_s32 (p0, x0), ++ z0 = svld1sb_s32 (p0, x0)) ++ ++/* ++** ld1sb_s32_index: ++** ld1sb z0\.s, p0/z, \[x0, x1\] ++** ret ++*/ ++TEST_LOAD (ld1sb_s32_index, svint32_t, int8_t, ++ z0 = svld1sb_s32 (p0, x0 + x1), ++ z0 = svld1sb_s32 (p0, x0 + x1)) ++ ++/* ++** ld1sb_s32_1: ++** ld1sb z0\.s, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sb_s32_1, svint32_t, int8_t, ++ z0 = svld1sb_s32 (p0, x0 + svcntw ()), ++ z0 = svld1sb_s32 (p0, x0 + svcntw ())) ++ ++/* ++** ld1sb_s32_7: ++** ld1sb z0\.s, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sb_s32_7, svint32_t, int8_t, ++ z0 = svld1sb_s32 (p0, x0 + svcntw () * 7), ++ z0 = svld1sb_s32 (p0, x0 + svcntw () * 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1sb_s32_8: ++** incb x0, all, mul #2 ++** ld1sb z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sb_s32_8, svint32_t, int8_t, ++ z0 = svld1sb_s32 (p0, x0 + svcntw () * 8), ++ z0 = svld1sb_s32 (p0, x0 + svcntw () * 8)) ++ ++/* ++** ld1sb_s32_m1: ++** ld1sb z0\.s, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sb_s32_m1, svint32_t, int8_t, ++ z0 = svld1sb_s32 (p0, x0 - svcntw ()), ++ z0 = svld1sb_s32 (p0, x0 - svcntw ())) ++ ++/* ++** ld1sb_s32_m8: ++** ld1sb z0\.s, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sb_s32_m8, svint32_t, int8_t, ++ z0 = svld1sb_s32 (p0, x0 - svcntw () * 8), ++ z0 = svld1sb_s32 (p0, x0 - svcntw () * 8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1sb_s32_m9: ++** decw x0, all, mul #9 ++** ld1sb z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sb_s32_m9, svint32_t, int8_t, ++ z0 = svld1sb_s32 (p0, x0 - svcntw () * 9), ++ z0 = svld1sb_s32 (p0, x0 - svcntw () * 9)) ++ ++/* ++** ld1sb_vnum_s32_0: ++** ld1sb z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sb_vnum_s32_0, svint32_t, int8_t, ++ z0 = svld1sb_vnum_s32 (p0, x0, 0), ++ z0 = svld1sb_vnum_s32 (p0, x0, 0)) ++ ++/* ++** ld1sb_vnum_s32_1: ++** ld1sb z0\.s, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sb_vnum_s32_1, svint32_t, int8_t, ++ z0 = svld1sb_vnum_s32 (p0, x0, 1), ++ z0 = svld1sb_vnum_s32 (p0, x0, 1)) ++ ++/* ++** ld1sb_vnum_s32_7: ++** ld1sb z0\.s, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sb_vnum_s32_7, svint32_t, int8_t, ++ z0 = svld1sb_vnum_s32 (p0, x0, 7), ++ z0 = svld1sb_vnum_s32 (p0, x0, 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1sb_vnum_s32_8: ++** incb x0, all, mul #2 ++** ld1sb z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sb_vnum_s32_8, svint32_t, int8_t, ++ z0 = svld1sb_vnum_s32 (p0, x0, 8), ++ z0 = svld1sb_vnum_s32 (p0, x0, 8)) ++ ++/* ++** ld1sb_vnum_s32_m1: ++** ld1sb z0\.s, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sb_vnum_s32_m1, svint32_t, int8_t, ++ z0 = svld1sb_vnum_s32 (p0, x0, -1), ++ z0 = svld1sb_vnum_s32 (p0, x0, -1)) ++ ++/* ++** ld1sb_vnum_s32_m8: ++** ld1sb z0\.s, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sb_vnum_s32_m8, svint32_t, int8_t, ++ z0 = svld1sb_vnum_s32 (p0, x0, -8), ++ z0 = svld1sb_vnum_s32 (p0, x0, -8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1sb_vnum_s32_m9: ++** decw x0, all, mul #9 ++** ld1sb z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sb_vnum_s32_m9, svint32_t, int8_t, ++ z0 = svld1sb_vnum_s32 (p0, x0, -9), ++ z0 = svld1sb_vnum_s32 (p0, x0, -9)) ++ ++/* ++** ld1sb_vnum_s32_x1: ++** cntw (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ld1sb z0\.s, p0/z, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** ld1sb z0\.s, p0/z, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_LOAD (ld1sb_vnum_s32_x1, svint32_t, int8_t, ++ z0 = svld1sb_vnum_s32 (p0, x0, x1), ++ z0 = svld1sb_vnum_s32 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_s64.c +new file mode 100644 +index 000000000..1984e1956 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_s64.c +@@ -0,0 +1,162 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1sb_s64_base: ++** ld1sb z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sb_s64_base, svint64_t, int8_t, ++ z0 = svld1sb_s64 (p0, x0), ++ z0 = svld1sb_s64 (p0, x0)) ++ ++/* ++** ld1sb_s64_index: ++** ld1sb z0\.d, p0/z, \[x0, x1\] ++** ret ++*/ ++TEST_LOAD (ld1sb_s64_index, svint64_t, int8_t, ++ z0 = svld1sb_s64 (p0, x0 + x1), ++ z0 = svld1sb_s64 (p0, x0 + x1)) ++ ++/* ++** ld1sb_s64_1: ++** ld1sb z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sb_s64_1, svint64_t, int8_t, ++ z0 = svld1sb_s64 (p0, x0 + svcntd ()), ++ z0 = svld1sb_s64 (p0, x0 + svcntd ())) ++ ++/* ++** ld1sb_s64_7: ++** ld1sb z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sb_s64_7, svint64_t, int8_t, ++ z0 = svld1sb_s64 (p0, x0 + svcntd () * 7), ++ z0 = svld1sb_s64 (p0, x0 + svcntd () * 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1sb_s64_8: ++** incb x0 ++** ld1sb z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sb_s64_8, svint64_t, int8_t, ++ z0 = svld1sb_s64 (p0, x0 + svcntd () * 8), ++ z0 = svld1sb_s64 (p0, x0 + svcntd () * 8)) ++ ++/* ++** ld1sb_s64_m1: ++** ld1sb z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sb_s64_m1, svint64_t, int8_t, ++ z0 = svld1sb_s64 (p0, x0 - svcntd ()), ++ z0 = svld1sb_s64 (p0, x0 - svcntd ())) ++ ++/* ++** ld1sb_s64_m8: ++** ld1sb z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sb_s64_m8, svint64_t, int8_t, ++ z0 = svld1sb_s64 (p0, x0 - svcntd () * 8), ++ z0 = svld1sb_s64 (p0, x0 - svcntd () * 8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1sb_s64_m9: ++** decd x0, all, mul #9 ++** ld1sb z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sb_s64_m9, svint64_t, int8_t, ++ z0 = svld1sb_s64 (p0, x0 - svcntd () * 9), ++ z0 = svld1sb_s64 (p0, x0 - svcntd () * 9)) ++ ++/* ++** ld1sb_vnum_s64_0: ++** ld1sb z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sb_vnum_s64_0, svint64_t, int8_t, ++ z0 = svld1sb_vnum_s64 (p0, x0, 0), ++ z0 = svld1sb_vnum_s64 (p0, x0, 0)) ++ ++/* ++** ld1sb_vnum_s64_1: ++** ld1sb z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sb_vnum_s64_1, svint64_t, int8_t, ++ z0 = svld1sb_vnum_s64 (p0, x0, 1), ++ z0 = svld1sb_vnum_s64 (p0, x0, 1)) ++ ++/* ++** ld1sb_vnum_s64_7: ++** ld1sb z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sb_vnum_s64_7, svint64_t, int8_t, ++ z0 = svld1sb_vnum_s64 (p0, x0, 7), ++ z0 = svld1sb_vnum_s64 (p0, x0, 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1sb_vnum_s64_8: ++** incb x0 ++** ld1sb z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sb_vnum_s64_8, svint64_t, int8_t, ++ z0 = svld1sb_vnum_s64 (p0, x0, 8), ++ z0 = svld1sb_vnum_s64 (p0, x0, 8)) ++ ++/* ++** ld1sb_vnum_s64_m1: ++** ld1sb z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sb_vnum_s64_m1, svint64_t, int8_t, ++ z0 = svld1sb_vnum_s64 (p0, x0, -1), ++ z0 = svld1sb_vnum_s64 (p0, x0, -1)) ++ ++/* ++** ld1sb_vnum_s64_m8: ++** ld1sb z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sb_vnum_s64_m8, svint64_t, int8_t, ++ z0 = svld1sb_vnum_s64 (p0, x0, -8), ++ z0 = svld1sb_vnum_s64 (p0, x0, -8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1sb_vnum_s64_m9: ++** decd x0, all, mul #9 ++** ld1sb z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sb_vnum_s64_m9, svint64_t, int8_t, ++ z0 = svld1sb_vnum_s64 (p0, x0, -9), ++ z0 = svld1sb_vnum_s64 (p0, x0, -9)) ++ ++/* ++** ld1sb_vnum_s64_x1: ++** cntd (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ld1sb z0\.d, p0/z, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** ld1sb z0\.d, p0/z, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_LOAD (ld1sb_vnum_s64_x1, svint64_t, int8_t, ++ z0 = svld1sb_vnum_s64 (p0, x0, x1), ++ z0 = svld1sb_vnum_s64 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_u16.c +new file mode 100644 +index 000000000..cfa616251 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_u16.c +@@ -0,0 +1,162 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1sb_u16_base: ++** ld1sb z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sb_u16_base, svuint16_t, int8_t, ++ z0 = svld1sb_u16 (p0, x0), ++ z0 = svld1sb_u16 (p0, x0)) ++ ++/* ++** ld1sb_u16_index: ++** ld1sb z0\.h, p0/z, \[x0, x1\] ++** ret ++*/ ++TEST_LOAD (ld1sb_u16_index, svuint16_t, int8_t, ++ z0 = svld1sb_u16 (p0, x0 + x1), ++ z0 = svld1sb_u16 (p0, x0 + x1)) ++ ++/* ++** ld1sb_u16_1: ++** ld1sb z0\.h, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sb_u16_1, svuint16_t, int8_t, ++ z0 = svld1sb_u16 (p0, x0 + svcnth ()), ++ z0 = svld1sb_u16 (p0, x0 + svcnth ())) ++ ++/* ++** ld1sb_u16_7: ++** ld1sb z0\.h, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sb_u16_7, svuint16_t, int8_t, ++ z0 = svld1sb_u16 (p0, x0 + svcnth () * 7), ++ z0 = svld1sb_u16 (p0, x0 + svcnth () * 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1sb_u16_8: ++** incb x0, all, mul #4 ++** ld1sb z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sb_u16_8, svuint16_t, int8_t, ++ z0 = svld1sb_u16 (p0, x0 + svcnth () * 8), ++ z0 = svld1sb_u16 (p0, x0 + svcnth () * 8)) ++ ++/* ++** ld1sb_u16_m1: ++** ld1sb z0\.h, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sb_u16_m1, svuint16_t, int8_t, ++ z0 = svld1sb_u16 (p0, x0 - svcnth ()), ++ z0 = svld1sb_u16 (p0, x0 - svcnth ())) ++ ++/* ++** ld1sb_u16_m8: ++** ld1sb z0\.h, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sb_u16_m8, svuint16_t, int8_t, ++ z0 = svld1sb_u16 (p0, x0 - svcnth () * 8), ++ z0 = svld1sb_u16 (p0, x0 - svcnth () * 8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1sb_u16_m9: ++** dech x0, all, mul #9 ++** ld1sb z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sb_u16_m9, svuint16_t, int8_t, ++ z0 = svld1sb_u16 (p0, x0 - svcnth () * 9), ++ z0 = svld1sb_u16 (p0, x0 - svcnth () * 9)) ++ ++/* ++** ld1sb_vnum_u16_0: ++** ld1sb z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sb_vnum_u16_0, svuint16_t, int8_t, ++ z0 = svld1sb_vnum_u16 (p0, x0, 0), ++ z0 = svld1sb_vnum_u16 (p0, x0, 0)) ++ ++/* ++** ld1sb_vnum_u16_1: ++** ld1sb z0\.h, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sb_vnum_u16_1, svuint16_t, int8_t, ++ z0 = svld1sb_vnum_u16 (p0, x0, 1), ++ z0 = svld1sb_vnum_u16 (p0, x0, 1)) ++ ++/* ++** ld1sb_vnum_u16_7: ++** ld1sb z0\.h, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sb_vnum_u16_7, svuint16_t, int8_t, ++ z0 = svld1sb_vnum_u16 (p0, x0, 7), ++ z0 = svld1sb_vnum_u16 (p0, x0, 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1sb_vnum_u16_8: ++** incb x0, all, mul #4 ++** ld1sb z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sb_vnum_u16_8, svuint16_t, int8_t, ++ z0 = svld1sb_vnum_u16 (p0, x0, 8), ++ z0 = svld1sb_vnum_u16 (p0, x0, 8)) ++ ++/* ++** ld1sb_vnum_u16_m1: ++** ld1sb z0\.h, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sb_vnum_u16_m1, svuint16_t, int8_t, ++ z0 = svld1sb_vnum_u16 (p0, x0, -1), ++ z0 = svld1sb_vnum_u16 (p0, x0, -1)) ++ ++/* ++** ld1sb_vnum_u16_m8: ++** ld1sb z0\.h, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sb_vnum_u16_m8, svuint16_t, int8_t, ++ z0 = svld1sb_vnum_u16 (p0, x0, -8), ++ z0 = svld1sb_vnum_u16 (p0, x0, -8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1sb_vnum_u16_m9: ++** dech x0, all, mul #9 ++** ld1sb z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sb_vnum_u16_m9, svuint16_t, int8_t, ++ z0 = svld1sb_vnum_u16 (p0, x0, -9), ++ z0 = svld1sb_vnum_u16 (p0, x0, -9)) ++ ++/* ++** ld1sb_vnum_u16_x1: ++** cnth (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ld1sb z0\.h, p0/z, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** ld1sb z0\.h, p0/z, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_LOAD (ld1sb_vnum_u16_x1, svuint16_t, int8_t, ++ z0 = svld1sb_vnum_u16 (p0, x0, x1), ++ z0 = svld1sb_vnum_u16 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_u32.c +new file mode 100644 +index 000000000..990ae5e1b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_u32.c +@@ -0,0 +1,162 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1sb_u32_base: ++** ld1sb z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sb_u32_base, svuint32_t, int8_t, ++ z0 = svld1sb_u32 (p0, x0), ++ z0 = svld1sb_u32 (p0, x0)) ++ ++/* ++** ld1sb_u32_index: ++** ld1sb z0\.s, p0/z, \[x0, x1\] ++** ret ++*/ ++TEST_LOAD (ld1sb_u32_index, svuint32_t, int8_t, ++ z0 = svld1sb_u32 (p0, x0 + x1), ++ z0 = svld1sb_u32 (p0, x0 + x1)) ++ ++/* ++** ld1sb_u32_1: ++** ld1sb z0\.s, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sb_u32_1, svuint32_t, int8_t, ++ z0 = svld1sb_u32 (p0, x0 + svcntw ()), ++ z0 = svld1sb_u32 (p0, x0 + svcntw ())) ++ ++/* ++** ld1sb_u32_7: ++** ld1sb z0\.s, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sb_u32_7, svuint32_t, int8_t, ++ z0 = svld1sb_u32 (p0, x0 + svcntw () * 7), ++ z0 = svld1sb_u32 (p0, x0 + svcntw () * 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1sb_u32_8: ++** incb x0, all, mul #2 ++** ld1sb z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sb_u32_8, svuint32_t, int8_t, ++ z0 = svld1sb_u32 (p0, x0 + svcntw () * 8), ++ z0 = svld1sb_u32 (p0, x0 + svcntw () * 8)) ++ ++/* ++** ld1sb_u32_m1: ++** ld1sb z0\.s, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sb_u32_m1, svuint32_t, int8_t, ++ z0 = svld1sb_u32 (p0, x0 - svcntw ()), ++ z0 = svld1sb_u32 (p0, x0 - svcntw ())) ++ ++/* ++** ld1sb_u32_m8: ++** ld1sb z0\.s, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sb_u32_m8, svuint32_t, int8_t, ++ z0 = svld1sb_u32 (p0, x0 - svcntw () * 8), ++ z0 = svld1sb_u32 (p0, x0 - svcntw () * 8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1sb_u32_m9: ++** decw x0, all, mul #9 ++** ld1sb z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sb_u32_m9, svuint32_t, int8_t, ++ z0 = svld1sb_u32 (p0, x0 - svcntw () * 9), ++ z0 = svld1sb_u32 (p0, x0 - svcntw () * 9)) ++ ++/* ++** ld1sb_vnum_u32_0: ++** ld1sb z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sb_vnum_u32_0, svuint32_t, int8_t, ++ z0 = svld1sb_vnum_u32 (p0, x0, 0), ++ z0 = svld1sb_vnum_u32 (p0, x0, 0)) ++ ++/* ++** ld1sb_vnum_u32_1: ++** ld1sb z0\.s, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sb_vnum_u32_1, svuint32_t, int8_t, ++ z0 = svld1sb_vnum_u32 (p0, x0, 1), ++ z0 = svld1sb_vnum_u32 (p0, x0, 1)) ++ ++/* ++** ld1sb_vnum_u32_7: ++** ld1sb z0\.s, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sb_vnum_u32_7, svuint32_t, int8_t, ++ z0 = svld1sb_vnum_u32 (p0, x0, 7), ++ z0 = svld1sb_vnum_u32 (p0, x0, 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1sb_vnum_u32_8: ++** incb x0, all, mul #2 ++** ld1sb z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sb_vnum_u32_8, svuint32_t, int8_t, ++ z0 = svld1sb_vnum_u32 (p0, x0, 8), ++ z0 = svld1sb_vnum_u32 (p0, x0, 8)) ++ ++/* ++** ld1sb_vnum_u32_m1: ++** ld1sb z0\.s, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sb_vnum_u32_m1, svuint32_t, int8_t, ++ z0 = svld1sb_vnum_u32 (p0, x0, -1), ++ z0 = svld1sb_vnum_u32 (p0, x0, -1)) ++ ++/* ++** ld1sb_vnum_u32_m8: ++** ld1sb z0\.s, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sb_vnum_u32_m8, svuint32_t, int8_t, ++ z0 = svld1sb_vnum_u32 (p0, x0, -8), ++ z0 = svld1sb_vnum_u32 (p0, x0, -8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1sb_vnum_u32_m9: ++** decw x0, all, mul #9 ++** ld1sb z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sb_vnum_u32_m9, svuint32_t, int8_t, ++ z0 = svld1sb_vnum_u32 (p0, x0, -9), ++ z0 = svld1sb_vnum_u32 (p0, x0, -9)) ++ ++/* ++** ld1sb_vnum_u32_x1: ++** cntw (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ld1sb z0\.s, p0/z, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** ld1sb z0\.s, p0/z, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_LOAD (ld1sb_vnum_u32_x1, svuint32_t, int8_t, ++ z0 = svld1sb_vnum_u32 (p0, x0, x1), ++ z0 = svld1sb_vnum_u32 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_u64.c +new file mode 100644 +index 000000000..8051bf140 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_u64.c +@@ -0,0 +1,162 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1sb_u64_base: ++** ld1sb z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sb_u64_base, svuint64_t, int8_t, ++ z0 = svld1sb_u64 (p0, x0), ++ z0 = svld1sb_u64 (p0, x0)) ++ ++/* ++** ld1sb_u64_index: ++** ld1sb z0\.d, p0/z, \[x0, x1\] ++** ret ++*/ ++TEST_LOAD (ld1sb_u64_index, svuint64_t, int8_t, ++ z0 = svld1sb_u64 (p0, x0 + x1), ++ z0 = svld1sb_u64 (p0, x0 + x1)) ++ ++/* ++** ld1sb_u64_1: ++** ld1sb z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sb_u64_1, svuint64_t, int8_t, ++ z0 = svld1sb_u64 (p0, x0 + svcntd ()), ++ z0 = svld1sb_u64 (p0, x0 + svcntd ())) ++ ++/* ++** ld1sb_u64_7: ++** ld1sb z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sb_u64_7, svuint64_t, int8_t, ++ z0 = svld1sb_u64 (p0, x0 + svcntd () * 7), ++ z0 = svld1sb_u64 (p0, x0 + svcntd () * 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1sb_u64_8: ++** incb x0 ++** ld1sb z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sb_u64_8, svuint64_t, int8_t, ++ z0 = svld1sb_u64 (p0, x0 + svcntd () * 8), ++ z0 = svld1sb_u64 (p0, x0 + svcntd () * 8)) ++ ++/* ++** ld1sb_u64_m1: ++** ld1sb z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sb_u64_m1, svuint64_t, int8_t, ++ z0 = svld1sb_u64 (p0, x0 - svcntd ()), ++ z0 = svld1sb_u64 (p0, x0 - svcntd ())) ++ ++/* ++** ld1sb_u64_m8: ++** ld1sb z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sb_u64_m8, svuint64_t, int8_t, ++ z0 = svld1sb_u64 (p0, x0 - svcntd () * 8), ++ z0 = svld1sb_u64 (p0, x0 - svcntd () * 8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1sb_u64_m9: ++** decd x0, all, mul #9 ++** ld1sb z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sb_u64_m9, svuint64_t, int8_t, ++ z0 = svld1sb_u64 (p0, x0 - svcntd () * 9), ++ z0 = svld1sb_u64 (p0, x0 - svcntd () * 9)) ++ ++/* ++** ld1sb_vnum_u64_0: ++** ld1sb z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sb_vnum_u64_0, svuint64_t, int8_t, ++ z0 = svld1sb_vnum_u64 (p0, x0, 0), ++ z0 = svld1sb_vnum_u64 (p0, x0, 0)) ++ ++/* ++** ld1sb_vnum_u64_1: ++** ld1sb z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sb_vnum_u64_1, svuint64_t, int8_t, ++ z0 = svld1sb_vnum_u64 (p0, x0, 1), ++ z0 = svld1sb_vnum_u64 (p0, x0, 1)) ++ ++/* ++** ld1sb_vnum_u64_7: ++** ld1sb z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sb_vnum_u64_7, svuint64_t, int8_t, ++ z0 = svld1sb_vnum_u64 (p0, x0, 7), ++ z0 = svld1sb_vnum_u64 (p0, x0, 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1sb_vnum_u64_8: ++** incb x0 ++** ld1sb z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sb_vnum_u64_8, svuint64_t, int8_t, ++ z0 = svld1sb_vnum_u64 (p0, x0, 8), ++ z0 = svld1sb_vnum_u64 (p0, x0, 8)) ++ ++/* ++** ld1sb_vnum_u64_m1: ++** ld1sb z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sb_vnum_u64_m1, svuint64_t, int8_t, ++ z0 = svld1sb_vnum_u64 (p0, x0, -1), ++ z0 = svld1sb_vnum_u64 (p0, x0, -1)) ++ ++/* ++** ld1sb_vnum_u64_m8: ++** ld1sb z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sb_vnum_u64_m8, svuint64_t, int8_t, ++ z0 = svld1sb_vnum_u64 (p0, x0, -8), ++ z0 = svld1sb_vnum_u64 (p0, x0, -8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1sb_vnum_u64_m9: ++** decd x0, all, mul #9 ++** ld1sb z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sb_vnum_u64_m9, svuint64_t, int8_t, ++ z0 = svld1sb_vnum_u64 (p0, x0, -9), ++ z0 = svld1sb_vnum_u64 (p0, x0, -9)) ++ ++/* ++** ld1sb_vnum_u64_x1: ++** cntd (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ld1sb z0\.d, p0/z, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** ld1sb z0\.d, p0/z, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_LOAD (ld1sb_vnum_u64_x1, svuint64_t, int8_t, ++ z0 = svld1sb_vnum_u64 (p0, x0, x1), ++ z0 = svld1sb_vnum_u64 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_s32.c +new file mode 100644 +index 000000000..ed07b4dfc +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_s32.c +@@ -0,0 +1,252 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1sh_gather_s32_tied1: ++** ld1sh z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_s32_tied1, svint32_t, svuint32_t, ++ z0_res = svld1sh_gather_u32base_s32 (p0, z0), ++ z0_res = svld1sh_gather_s32 (p0, z0)) ++ ++/* ++** ld1sh_gather_s32_untied: ++** ld1sh z0\.s, p0/z, \[z1\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_s32_untied, svint32_t, svuint32_t, ++ z0_res = svld1sh_gather_u32base_s32 (p0, z1), ++ z0_res = svld1sh_gather_s32 (p0, z1)) ++ ++/* ++** ld1sh_gather_x0_s32_offset: ++** ld1sh z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_x0_s32_offset, svint32_t, svuint32_t, ++ z0_res = svld1sh_gather_u32base_offset_s32 (p0, z0, x0), ++ z0_res = svld1sh_gather_offset_s32 (p0, z0, x0)) ++ ++/* ++** ld1sh_gather_m2_s32_offset: ++** mov (x[0-9]+), #?-2 ++** ld1sh z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_m2_s32_offset, svint32_t, svuint32_t, ++ z0_res = svld1sh_gather_u32base_offset_s32 (p0, z0, -2), ++ z0_res = svld1sh_gather_offset_s32 (p0, z0, -2)) ++ ++/* ++** ld1sh_gather_0_s32_offset: ++** ld1sh z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_0_s32_offset, svint32_t, svuint32_t, ++ z0_res = svld1sh_gather_u32base_offset_s32 (p0, z0, 0), ++ z0_res = svld1sh_gather_offset_s32 (p0, z0, 0)) ++ ++/* ++** ld1sh_gather_5_s32_offset: ++** mov (x[0-9]+), #?5 ++** ld1sh z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_5_s32_offset, svint32_t, svuint32_t, ++ z0_res = svld1sh_gather_u32base_offset_s32 (p0, z0, 5), ++ z0_res = svld1sh_gather_offset_s32 (p0, z0, 5)) ++ ++/* ++** ld1sh_gather_6_s32_offset: ++** ld1sh z0\.s, p0/z, \[z0\.s, #6\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_6_s32_offset, svint32_t, svuint32_t, ++ z0_res = svld1sh_gather_u32base_offset_s32 (p0, z0, 6), ++ z0_res = svld1sh_gather_offset_s32 (p0, z0, 6)) ++ ++/* ++** ld1sh_gather_62_s32_offset: ++** ld1sh z0\.s, p0/z, \[z0\.s, #62\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_62_s32_offset, svint32_t, svuint32_t, ++ z0_res = svld1sh_gather_u32base_offset_s32 (p0, z0, 62), ++ z0_res = svld1sh_gather_offset_s32 (p0, z0, 62)) ++ ++/* ++** ld1sh_gather_64_s32_offset: ++** mov (x[0-9]+), #?64 ++** ld1sh z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_64_s32_offset, svint32_t, svuint32_t, ++ z0_res = svld1sh_gather_u32base_offset_s32 (p0, z0, 64), ++ z0_res = svld1sh_gather_offset_s32 (p0, z0, 64)) ++ ++/* ++** ld1sh_gather_x0_s32_index: ++** lsl (x[0-9]+), x0, #?1 ++** ld1sh z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_x0_s32_index, svint32_t, svuint32_t, ++ z0_res = svld1sh_gather_u32base_index_s32 (p0, z0, x0), ++ z0_res = svld1sh_gather_index_s32 (p0, z0, x0)) ++ ++/* ++** ld1sh_gather_m1_s32_index: ++** mov (x[0-9]+), #?-2 ++** ld1sh z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_m1_s32_index, svint32_t, svuint32_t, ++ z0_res = svld1sh_gather_u32base_index_s32 (p0, z0, -1), ++ z0_res = svld1sh_gather_index_s32 (p0, z0, -1)) ++ ++/* ++** ld1sh_gather_0_s32_index: ++** ld1sh z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_0_s32_index, svint32_t, svuint32_t, ++ z0_res = svld1sh_gather_u32base_index_s32 (p0, z0, 0), ++ z0_res = svld1sh_gather_index_s32 (p0, z0, 0)) ++ ++/* ++** ld1sh_gather_5_s32_index: ++** ld1sh z0\.s, p0/z, \[z0\.s, #10\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_5_s32_index, svint32_t, svuint32_t, ++ z0_res = svld1sh_gather_u32base_index_s32 (p0, z0, 5), ++ z0_res = svld1sh_gather_index_s32 (p0, z0, 5)) ++ ++/* ++** ld1sh_gather_31_s32_index: ++** ld1sh z0\.s, p0/z, \[z0\.s, #62\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_31_s32_index, svint32_t, svuint32_t, ++ z0_res = svld1sh_gather_u32base_index_s32 (p0, z0, 31), ++ z0_res = svld1sh_gather_index_s32 (p0, z0, 31)) ++ ++/* ++** ld1sh_gather_32_s32_index: ++** mov (x[0-9]+), #?64 ++** ld1sh z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_32_s32_index, svint32_t, svuint32_t, ++ z0_res = svld1sh_gather_u32base_index_s32 (p0, z0, 32), ++ z0_res = svld1sh_gather_index_s32 (p0, z0, 32)) ++ ++/* ++** ld1sh_gather_x0_s32_s32offset: ++** ld1sh z0\.s, p0/z, \[x0, z0\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_x0_s32_s32offset, svint32_t, int16_t, svint32_t, ++ z0_res = svld1sh_gather_s32offset_s32 (p0, x0, z0), ++ z0_res = svld1sh_gather_offset_s32 (p0, x0, z0)) ++ ++/* ++** ld1sh_gather_tied1_s32_s32offset: ++** ld1sh z0\.s, p0/z, \[x0, z0\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_tied1_s32_s32offset, svint32_t, int16_t, svint32_t, ++ z0_res = svld1sh_gather_s32offset_s32 (p0, x0, z0), ++ z0_res = svld1sh_gather_offset_s32 (p0, x0, z0)) ++ ++/* ++** ld1sh_gather_untied_s32_s32offset: ++** ld1sh z0\.s, p0/z, \[x0, z1\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_untied_s32_s32offset, svint32_t, int16_t, svint32_t, ++ z0_res = svld1sh_gather_s32offset_s32 (p0, x0, z1), ++ z0_res = svld1sh_gather_offset_s32 (p0, x0, z1)) ++ ++/* ++** ld1sh_gather_x0_s32_u32offset: ++** ld1sh z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_x0_s32_u32offset, svint32_t, int16_t, svuint32_t, ++ z0_res = svld1sh_gather_u32offset_s32 (p0, x0, z0), ++ z0_res = svld1sh_gather_offset_s32 (p0, x0, z0)) ++ ++/* ++** ld1sh_gather_tied1_s32_u32offset: ++** ld1sh z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_tied1_s32_u32offset, svint32_t, int16_t, svuint32_t, ++ z0_res = svld1sh_gather_u32offset_s32 (p0, x0, z0), ++ z0_res = svld1sh_gather_offset_s32 (p0, x0, z0)) ++ ++/* ++** ld1sh_gather_untied_s32_u32offset: ++** ld1sh z0\.s, p0/z, \[x0, z1\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_untied_s32_u32offset, svint32_t, int16_t, svuint32_t, ++ z0_res = svld1sh_gather_u32offset_s32 (p0, x0, z1), ++ z0_res = svld1sh_gather_offset_s32 (p0, x0, z1)) ++ ++/* ++** ld1sh_gather_x0_s32_s32index: ++** ld1sh z0\.s, p0/z, \[x0, z0\.s, sxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_x0_s32_s32index, svint32_t, int16_t, svint32_t, ++ z0_res = svld1sh_gather_s32index_s32 (p0, x0, z0), ++ z0_res = svld1sh_gather_index_s32 (p0, x0, z0)) ++ ++/* ++** ld1sh_gather_tied1_s32_s32index: ++** ld1sh z0\.s, p0/z, \[x0, z0\.s, sxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_tied1_s32_s32index, svint32_t, int16_t, svint32_t, ++ z0_res = svld1sh_gather_s32index_s32 (p0, x0, z0), ++ z0_res = svld1sh_gather_index_s32 (p0, x0, z0)) ++ ++/* ++** ld1sh_gather_untied_s32_s32index: ++** ld1sh z0\.s, p0/z, \[x0, z1\.s, sxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_untied_s32_s32index, svint32_t, int16_t, svint32_t, ++ z0_res = svld1sh_gather_s32index_s32 (p0, x0, z1), ++ z0_res = svld1sh_gather_index_s32 (p0, x0, z1)) ++ ++/* ++** ld1sh_gather_x0_s32_u32index: ++** ld1sh z0\.s, p0/z, \[x0, z0\.s, uxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_x0_s32_u32index, svint32_t, int16_t, svuint32_t, ++ z0_res = svld1sh_gather_u32index_s32 (p0, x0, z0), ++ z0_res = svld1sh_gather_index_s32 (p0, x0, z0)) ++ ++/* ++** ld1sh_gather_tied1_s32_u32index: ++** ld1sh z0\.s, p0/z, \[x0, z0\.s, uxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_tied1_s32_u32index, svint32_t, int16_t, svuint32_t, ++ z0_res = svld1sh_gather_u32index_s32 (p0, x0, z0), ++ z0_res = svld1sh_gather_index_s32 (p0, x0, z0)) ++ ++/* ++** ld1sh_gather_untied_s32_u32index: ++** ld1sh z0\.s, p0/z, \[x0, z1\.s, uxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_untied_s32_u32index, svint32_t, int16_t, svuint32_t, ++ z0_res = svld1sh_gather_u32index_s32 (p0, x0, z1), ++ z0_res = svld1sh_gather_index_s32 (p0, x0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_s64.c +new file mode 100644 +index 000000000..20ca42720 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_s64.c +@@ -0,0 +1,288 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1sh_gather_s64_tied1: ++** ld1sh z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_s64_tied1, svint64_t, svuint64_t, ++ z0_res = svld1sh_gather_u64base_s64 (p0, z0), ++ z0_res = svld1sh_gather_s64 (p0, z0)) ++ ++/* ++** ld1sh_gather_s64_untied: ++** ld1sh z0\.d, p0/z, \[z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_s64_untied, svint64_t, svuint64_t, ++ z0_res = svld1sh_gather_u64base_s64 (p0, z1), ++ z0_res = svld1sh_gather_s64 (p0, z1)) ++ ++/* ++** ld1sh_gather_x0_s64_offset: ++** ld1sh z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_x0_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1sh_gather_u64base_offset_s64 (p0, z0, x0), ++ z0_res = svld1sh_gather_offset_s64 (p0, z0, x0)) ++ ++/* ++** ld1sh_gather_m2_s64_offset: ++** mov (x[0-9]+), #?-2 ++** ld1sh z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_m2_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1sh_gather_u64base_offset_s64 (p0, z0, -2), ++ z0_res = svld1sh_gather_offset_s64 (p0, z0, -2)) ++ ++/* ++** ld1sh_gather_0_s64_offset: ++** ld1sh z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_0_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1sh_gather_u64base_offset_s64 (p0, z0, 0), ++ z0_res = svld1sh_gather_offset_s64 (p0, z0, 0)) ++ ++/* ++** ld1sh_gather_5_s64_offset: ++** mov (x[0-9]+), #?5 ++** ld1sh z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_5_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1sh_gather_u64base_offset_s64 (p0, z0, 5), ++ z0_res = svld1sh_gather_offset_s64 (p0, z0, 5)) ++ ++/* ++** ld1sh_gather_6_s64_offset: ++** ld1sh z0\.d, p0/z, \[z0\.d, #6\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_6_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1sh_gather_u64base_offset_s64 (p0, z0, 6), ++ z0_res = svld1sh_gather_offset_s64 (p0, z0, 6)) ++ ++/* ++** ld1sh_gather_62_s64_offset: ++** ld1sh z0\.d, p0/z, \[z0\.d, #62\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_62_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1sh_gather_u64base_offset_s64 (p0, z0, 62), ++ z0_res = svld1sh_gather_offset_s64 (p0, z0, 62)) ++ ++/* ++** ld1sh_gather_64_s64_offset: ++** mov (x[0-9]+), #?64 ++** ld1sh z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_64_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1sh_gather_u64base_offset_s64 (p0, z0, 64), ++ z0_res = svld1sh_gather_offset_s64 (p0, z0, 64)) ++ ++/* ++** ld1sh_gather_x0_s64_index: ++** lsl (x[0-9]+), x0, #?1 ++** ld1sh z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_x0_s64_index, svint64_t, svuint64_t, ++ z0_res = svld1sh_gather_u64base_index_s64 (p0, z0, x0), ++ z0_res = svld1sh_gather_index_s64 (p0, z0, x0)) ++ ++/* ++** ld1sh_gather_m1_s64_index: ++** mov (x[0-9]+), #?-2 ++** ld1sh z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_m1_s64_index, svint64_t, svuint64_t, ++ z0_res = svld1sh_gather_u64base_index_s64 (p0, z0, -1), ++ z0_res = svld1sh_gather_index_s64 (p0, z0, -1)) ++ ++/* ++** ld1sh_gather_0_s64_index: ++** ld1sh z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_0_s64_index, svint64_t, svuint64_t, ++ z0_res = svld1sh_gather_u64base_index_s64 (p0, z0, 0), ++ z0_res = svld1sh_gather_index_s64 (p0, z0, 0)) ++ ++/* ++** ld1sh_gather_5_s64_index: ++** ld1sh z0\.d, p0/z, \[z0\.d, #10\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_5_s64_index, svint64_t, svuint64_t, ++ z0_res = svld1sh_gather_u64base_index_s64 (p0, z0, 5), ++ z0_res = svld1sh_gather_index_s64 (p0, z0, 5)) ++ ++/* ++** ld1sh_gather_31_s64_index: ++** ld1sh z0\.d, p0/z, \[z0\.d, #62\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_31_s64_index, svint64_t, svuint64_t, ++ z0_res = svld1sh_gather_u64base_index_s64 (p0, z0, 31), ++ z0_res = svld1sh_gather_index_s64 (p0, z0, 31)) ++ ++/* ++** ld1sh_gather_32_s64_index: ++** mov (x[0-9]+), #?64 ++** ld1sh z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_32_s64_index, svint64_t, svuint64_t, ++ z0_res = svld1sh_gather_u64base_index_s64 (p0, z0, 32), ++ z0_res = svld1sh_gather_index_s64 (p0, z0, 32)) ++ ++/* ++** ld1sh_gather_x0_s64_s64offset: ++** ld1sh z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_x0_s64_s64offset, svint64_t, int16_t, svint64_t, ++ z0_res = svld1sh_gather_s64offset_s64 (p0, x0, z0), ++ z0_res = svld1sh_gather_offset_s64 (p0, x0, z0)) ++ ++/* ++** ld1sh_gather_tied1_s64_s64offset: ++** ld1sh z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_tied1_s64_s64offset, svint64_t, int16_t, svint64_t, ++ z0_res = svld1sh_gather_s64offset_s64 (p0, x0, z0), ++ z0_res = svld1sh_gather_offset_s64 (p0, x0, z0)) ++ ++/* ++** ld1sh_gather_untied_s64_s64offset: ++** ld1sh z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_untied_s64_s64offset, svint64_t, int16_t, svint64_t, ++ z0_res = svld1sh_gather_s64offset_s64 (p0, x0, z1), ++ z0_res = svld1sh_gather_offset_s64 (p0, x0, z1)) ++ ++/* ++** ld1sh_gather_ext_s64_s64offset: ++** ld1sh z0\.d, p0/z, \[x0, z1\.d, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_ext_s64_s64offset, svint64_t, int16_t, svint64_t, ++ z0_res = svld1sh_gather_s64offset_s64 (p0, x0, svextw_s64_x (p0, z1)), ++ z0_res = svld1sh_gather_offset_s64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ld1sh_gather_x0_s64_u64offset: ++** ld1sh z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_x0_s64_u64offset, svint64_t, int16_t, svuint64_t, ++ z0_res = svld1sh_gather_u64offset_s64 (p0, x0, z0), ++ z0_res = svld1sh_gather_offset_s64 (p0, x0, z0)) ++ ++/* ++** ld1sh_gather_tied1_s64_u64offset: ++** ld1sh z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_tied1_s64_u64offset, svint64_t, int16_t, svuint64_t, ++ z0_res = svld1sh_gather_u64offset_s64 (p0, x0, z0), ++ z0_res = svld1sh_gather_offset_s64 (p0, x0, z0)) ++ ++/* ++** ld1sh_gather_untied_s64_u64offset: ++** ld1sh z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_untied_s64_u64offset, svint64_t, int16_t, svuint64_t, ++ z0_res = svld1sh_gather_u64offset_s64 (p0, x0, z1), ++ z0_res = svld1sh_gather_offset_s64 (p0, x0, z1)) ++ ++/* ++** ld1sh_gather_ext_s64_u64offset: ++** ld1sh z0\.d, p0/z, \[x0, z1\.d, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_ext_s64_u64offset, svint64_t, int16_t, svuint64_t, ++ z0_res = svld1sh_gather_u64offset_s64 (p0, x0, svextw_u64_x (p0, z1)), ++ z0_res = svld1sh_gather_offset_s64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ld1sh_gather_x0_s64_s64index: ++** ld1sh z0\.d, p0/z, \[x0, z0\.d, lsl 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_x0_s64_s64index, svint64_t, int16_t, svint64_t, ++ z0_res = svld1sh_gather_s64index_s64 (p0, x0, z0), ++ z0_res = svld1sh_gather_index_s64 (p0, x0, z0)) ++ ++/* ++** ld1sh_gather_tied1_s64_s64index: ++** ld1sh z0\.d, p0/z, \[x0, z0\.d, lsl 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_tied1_s64_s64index, svint64_t, int16_t, svint64_t, ++ z0_res = svld1sh_gather_s64index_s64 (p0, x0, z0), ++ z0_res = svld1sh_gather_index_s64 (p0, x0, z0)) ++ ++/* ++** ld1sh_gather_untied_s64_s64index: ++** ld1sh z0\.d, p0/z, \[x0, z1\.d, lsl 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_untied_s64_s64index, svint64_t, int16_t, svint64_t, ++ z0_res = svld1sh_gather_s64index_s64 (p0, x0, z1), ++ z0_res = svld1sh_gather_index_s64 (p0, x0, z1)) ++ ++/* ++** ld1sh_gather_ext_s64_s64index: ++** ld1sh z0\.d, p0/z, \[x0, z1\.d, sxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_ext_s64_s64index, svint64_t, int16_t, svint64_t, ++ z0_res = svld1sh_gather_s64index_s64 (p0, x0, svextw_s64_x (p0, z1)), ++ z0_res = svld1sh_gather_index_s64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ld1sh_gather_x0_s64_u64index: ++** ld1sh z0\.d, p0/z, \[x0, z0\.d, lsl 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_x0_s64_u64index, svint64_t, int16_t, svuint64_t, ++ z0_res = svld1sh_gather_u64index_s64 (p0, x0, z0), ++ z0_res = svld1sh_gather_index_s64 (p0, x0, z0)) ++ ++/* ++** ld1sh_gather_tied1_s64_u64index: ++** ld1sh z0\.d, p0/z, \[x0, z0\.d, lsl 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_tied1_s64_u64index, svint64_t, int16_t, svuint64_t, ++ z0_res = svld1sh_gather_u64index_s64 (p0, x0, z0), ++ z0_res = svld1sh_gather_index_s64 (p0, x0, z0)) ++ ++/* ++** ld1sh_gather_untied_s64_u64index: ++** ld1sh z0\.d, p0/z, \[x0, z1\.d, lsl 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_untied_s64_u64index, svint64_t, int16_t, svuint64_t, ++ z0_res = svld1sh_gather_u64index_s64 (p0, x0, z1), ++ z0_res = svld1sh_gather_index_s64 (p0, x0, z1)) ++ ++/* ++** ld1sh_gather_ext_s64_u64index: ++** ld1sh z0\.d, p0/z, \[x0, z1\.d, uxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_ext_s64_u64index, svint64_t, int16_t, svuint64_t, ++ z0_res = svld1sh_gather_u64index_s64 (p0, x0, svextw_u64_x (p0, z1)), ++ z0_res = svld1sh_gather_index_s64 (p0, x0, svextw_x (p0, z1))) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_u32.c +new file mode 100644 +index 000000000..e3a85a23f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_u32.c +@@ -0,0 +1,252 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1sh_gather_u32_tied1: ++** ld1sh z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_u32_tied1, svuint32_t, svuint32_t, ++ z0_res = svld1sh_gather_u32base_u32 (p0, z0), ++ z0_res = svld1sh_gather_u32 (p0, z0)) ++ ++/* ++** ld1sh_gather_u32_untied: ++** ld1sh z0\.s, p0/z, \[z1\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_u32_untied, svuint32_t, svuint32_t, ++ z0_res = svld1sh_gather_u32base_u32 (p0, z1), ++ z0_res = svld1sh_gather_u32 (p0, z1)) ++ ++/* ++** ld1sh_gather_x0_u32_offset: ++** ld1sh z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_x0_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svld1sh_gather_u32base_offset_u32 (p0, z0, x0), ++ z0_res = svld1sh_gather_offset_u32 (p0, z0, x0)) ++ ++/* ++** ld1sh_gather_m2_u32_offset: ++** mov (x[0-9]+), #?-2 ++** ld1sh z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_m2_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svld1sh_gather_u32base_offset_u32 (p0, z0, -2), ++ z0_res = svld1sh_gather_offset_u32 (p0, z0, -2)) ++ ++/* ++** ld1sh_gather_0_u32_offset: ++** ld1sh z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_0_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svld1sh_gather_u32base_offset_u32 (p0, z0, 0), ++ z0_res = svld1sh_gather_offset_u32 (p0, z0, 0)) ++ ++/* ++** ld1sh_gather_5_u32_offset: ++** mov (x[0-9]+), #?5 ++** ld1sh z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_5_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svld1sh_gather_u32base_offset_u32 (p0, z0, 5), ++ z0_res = svld1sh_gather_offset_u32 (p0, z0, 5)) ++ ++/* ++** ld1sh_gather_6_u32_offset: ++** ld1sh z0\.s, p0/z, \[z0\.s, #6\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_6_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svld1sh_gather_u32base_offset_u32 (p0, z0, 6), ++ z0_res = svld1sh_gather_offset_u32 (p0, z0, 6)) ++ ++/* ++** ld1sh_gather_62_u32_offset: ++** ld1sh z0\.s, p0/z, \[z0\.s, #62\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_62_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svld1sh_gather_u32base_offset_u32 (p0, z0, 62), ++ z0_res = svld1sh_gather_offset_u32 (p0, z0, 62)) ++ ++/* ++** ld1sh_gather_64_u32_offset: ++** mov (x[0-9]+), #?64 ++** ld1sh z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_64_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svld1sh_gather_u32base_offset_u32 (p0, z0, 64), ++ z0_res = svld1sh_gather_offset_u32 (p0, z0, 64)) ++ ++/* ++** ld1sh_gather_x0_u32_index: ++** lsl (x[0-9]+), x0, #?1 ++** ld1sh z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_x0_u32_index, svuint32_t, svuint32_t, ++ z0_res = svld1sh_gather_u32base_index_u32 (p0, z0, x0), ++ z0_res = svld1sh_gather_index_u32 (p0, z0, x0)) ++ ++/* ++** ld1sh_gather_m1_u32_index: ++** mov (x[0-9]+), #?-2 ++** ld1sh z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_m1_u32_index, svuint32_t, svuint32_t, ++ z0_res = svld1sh_gather_u32base_index_u32 (p0, z0, -1), ++ z0_res = svld1sh_gather_index_u32 (p0, z0, -1)) ++ ++/* ++** ld1sh_gather_0_u32_index: ++** ld1sh z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_0_u32_index, svuint32_t, svuint32_t, ++ z0_res = svld1sh_gather_u32base_index_u32 (p0, z0, 0), ++ z0_res = svld1sh_gather_index_u32 (p0, z0, 0)) ++ ++/* ++** ld1sh_gather_5_u32_index: ++** ld1sh z0\.s, p0/z, \[z0\.s, #10\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_5_u32_index, svuint32_t, svuint32_t, ++ z0_res = svld1sh_gather_u32base_index_u32 (p0, z0, 5), ++ z0_res = svld1sh_gather_index_u32 (p0, z0, 5)) ++ ++/* ++** ld1sh_gather_31_u32_index: ++** ld1sh z0\.s, p0/z, \[z0\.s, #62\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_31_u32_index, svuint32_t, svuint32_t, ++ z0_res = svld1sh_gather_u32base_index_u32 (p0, z0, 31), ++ z0_res = svld1sh_gather_index_u32 (p0, z0, 31)) ++ ++/* ++** ld1sh_gather_32_u32_index: ++** mov (x[0-9]+), #?64 ++** ld1sh z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_32_u32_index, svuint32_t, svuint32_t, ++ z0_res = svld1sh_gather_u32base_index_u32 (p0, z0, 32), ++ z0_res = svld1sh_gather_index_u32 (p0, z0, 32)) ++ ++/* ++** ld1sh_gather_x0_u32_s32offset: ++** ld1sh z0\.s, p0/z, \[x0, z0\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_x0_u32_s32offset, svuint32_t, int16_t, svint32_t, ++ z0_res = svld1sh_gather_s32offset_u32 (p0, x0, z0), ++ z0_res = svld1sh_gather_offset_u32 (p0, x0, z0)) ++ ++/* ++** ld1sh_gather_tied1_u32_s32offset: ++** ld1sh z0\.s, p0/z, \[x0, z0\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_tied1_u32_s32offset, svuint32_t, int16_t, svint32_t, ++ z0_res = svld1sh_gather_s32offset_u32 (p0, x0, z0), ++ z0_res = svld1sh_gather_offset_u32 (p0, x0, z0)) ++ ++/* ++** ld1sh_gather_untied_u32_s32offset: ++** ld1sh z0\.s, p0/z, \[x0, z1\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_untied_u32_s32offset, svuint32_t, int16_t, svint32_t, ++ z0_res = svld1sh_gather_s32offset_u32 (p0, x0, z1), ++ z0_res = svld1sh_gather_offset_u32 (p0, x0, z1)) ++ ++/* ++** ld1sh_gather_x0_u32_u32offset: ++** ld1sh z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_x0_u32_u32offset, svuint32_t, int16_t, svuint32_t, ++ z0_res = svld1sh_gather_u32offset_u32 (p0, x0, z0), ++ z0_res = svld1sh_gather_offset_u32 (p0, x0, z0)) ++ ++/* ++** ld1sh_gather_tied1_u32_u32offset: ++** ld1sh z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_tied1_u32_u32offset, svuint32_t, int16_t, svuint32_t, ++ z0_res = svld1sh_gather_u32offset_u32 (p0, x0, z0), ++ z0_res = svld1sh_gather_offset_u32 (p0, x0, z0)) ++ ++/* ++** ld1sh_gather_untied_u32_u32offset: ++** ld1sh z0\.s, p0/z, \[x0, z1\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_untied_u32_u32offset, svuint32_t, int16_t, svuint32_t, ++ z0_res = svld1sh_gather_u32offset_u32 (p0, x0, z1), ++ z0_res = svld1sh_gather_offset_u32 (p0, x0, z1)) ++ ++/* ++** ld1sh_gather_x0_u32_s32index: ++** ld1sh z0\.s, p0/z, \[x0, z0\.s, sxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_x0_u32_s32index, svuint32_t, int16_t, svint32_t, ++ z0_res = svld1sh_gather_s32index_u32 (p0, x0, z0), ++ z0_res = svld1sh_gather_index_u32 (p0, x0, z0)) ++ ++/* ++** ld1sh_gather_tied1_u32_s32index: ++** ld1sh z0\.s, p0/z, \[x0, z0\.s, sxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_tied1_u32_s32index, svuint32_t, int16_t, svint32_t, ++ z0_res = svld1sh_gather_s32index_u32 (p0, x0, z0), ++ z0_res = svld1sh_gather_index_u32 (p0, x0, z0)) ++ ++/* ++** ld1sh_gather_untied_u32_s32index: ++** ld1sh z0\.s, p0/z, \[x0, z1\.s, sxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_untied_u32_s32index, svuint32_t, int16_t, svint32_t, ++ z0_res = svld1sh_gather_s32index_u32 (p0, x0, z1), ++ z0_res = svld1sh_gather_index_u32 (p0, x0, z1)) ++ ++/* ++** ld1sh_gather_x0_u32_u32index: ++** ld1sh z0\.s, p0/z, \[x0, z0\.s, uxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_x0_u32_u32index, svuint32_t, int16_t, svuint32_t, ++ z0_res = svld1sh_gather_u32index_u32 (p0, x0, z0), ++ z0_res = svld1sh_gather_index_u32 (p0, x0, z0)) ++ ++/* ++** ld1sh_gather_tied1_u32_u32index: ++** ld1sh z0\.s, p0/z, \[x0, z0\.s, uxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_tied1_u32_u32index, svuint32_t, int16_t, svuint32_t, ++ z0_res = svld1sh_gather_u32index_u32 (p0, x0, z0), ++ z0_res = svld1sh_gather_index_u32 (p0, x0, z0)) ++ ++/* ++** ld1sh_gather_untied_u32_u32index: ++** ld1sh z0\.s, p0/z, \[x0, z1\.s, uxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_untied_u32_u32index, svuint32_t, int16_t, svuint32_t, ++ z0_res = svld1sh_gather_u32index_u32 (p0, x0, z1), ++ z0_res = svld1sh_gather_index_u32 (p0, x0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_u64.c +new file mode 100644 +index 000000000..3a0094fba +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_u64.c +@@ -0,0 +1,288 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1sh_gather_u64_tied1: ++** ld1sh z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_u64_tied1, svuint64_t, svuint64_t, ++ z0_res = svld1sh_gather_u64base_u64 (p0, z0), ++ z0_res = svld1sh_gather_u64 (p0, z0)) ++ ++/* ++** ld1sh_gather_u64_untied: ++** ld1sh z0\.d, p0/z, \[z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_u64_untied, svuint64_t, svuint64_t, ++ z0_res = svld1sh_gather_u64base_u64 (p0, z1), ++ z0_res = svld1sh_gather_u64 (p0, z1)) ++ ++/* ++** ld1sh_gather_x0_u64_offset: ++** ld1sh z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_x0_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1sh_gather_u64base_offset_u64 (p0, z0, x0), ++ z0_res = svld1sh_gather_offset_u64 (p0, z0, x0)) ++ ++/* ++** ld1sh_gather_m2_u64_offset: ++** mov (x[0-9]+), #?-2 ++** ld1sh z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_m2_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1sh_gather_u64base_offset_u64 (p0, z0, -2), ++ z0_res = svld1sh_gather_offset_u64 (p0, z0, -2)) ++ ++/* ++** ld1sh_gather_0_u64_offset: ++** ld1sh z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_0_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1sh_gather_u64base_offset_u64 (p0, z0, 0), ++ z0_res = svld1sh_gather_offset_u64 (p0, z0, 0)) ++ ++/* ++** ld1sh_gather_5_u64_offset: ++** mov (x[0-9]+), #?5 ++** ld1sh z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_5_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1sh_gather_u64base_offset_u64 (p0, z0, 5), ++ z0_res = svld1sh_gather_offset_u64 (p0, z0, 5)) ++ ++/* ++** ld1sh_gather_6_u64_offset: ++** ld1sh z0\.d, p0/z, \[z0\.d, #6\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_6_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1sh_gather_u64base_offset_u64 (p0, z0, 6), ++ z0_res = svld1sh_gather_offset_u64 (p0, z0, 6)) ++ ++/* ++** ld1sh_gather_62_u64_offset: ++** ld1sh z0\.d, p0/z, \[z0\.d, #62\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_62_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1sh_gather_u64base_offset_u64 (p0, z0, 62), ++ z0_res = svld1sh_gather_offset_u64 (p0, z0, 62)) ++ ++/* ++** ld1sh_gather_64_u64_offset: ++** mov (x[0-9]+), #?64 ++** ld1sh z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_64_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1sh_gather_u64base_offset_u64 (p0, z0, 64), ++ z0_res = svld1sh_gather_offset_u64 (p0, z0, 64)) ++ ++/* ++** ld1sh_gather_x0_u64_index: ++** lsl (x[0-9]+), x0, #?1 ++** ld1sh z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_x0_u64_index, svuint64_t, svuint64_t, ++ z0_res = svld1sh_gather_u64base_index_u64 (p0, z0, x0), ++ z0_res = svld1sh_gather_index_u64 (p0, z0, x0)) ++ ++/* ++** ld1sh_gather_m1_u64_index: ++** mov (x[0-9]+), #?-2 ++** ld1sh z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_m1_u64_index, svuint64_t, svuint64_t, ++ z0_res = svld1sh_gather_u64base_index_u64 (p0, z0, -1), ++ z0_res = svld1sh_gather_index_u64 (p0, z0, -1)) ++ ++/* ++** ld1sh_gather_0_u64_index: ++** ld1sh z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_0_u64_index, svuint64_t, svuint64_t, ++ z0_res = svld1sh_gather_u64base_index_u64 (p0, z0, 0), ++ z0_res = svld1sh_gather_index_u64 (p0, z0, 0)) ++ ++/* ++** ld1sh_gather_5_u64_index: ++** ld1sh z0\.d, p0/z, \[z0\.d, #10\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_5_u64_index, svuint64_t, svuint64_t, ++ z0_res = svld1sh_gather_u64base_index_u64 (p0, z0, 5), ++ z0_res = svld1sh_gather_index_u64 (p0, z0, 5)) ++ ++/* ++** ld1sh_gather_31_u64_index: ++** ld1sh z0\.d, p0/z, \[z0\.d, #62\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_31_u64_index, svuint64_t, svuint64_t, ++ z0_res = svld1sh_gather_u64base_index_u64 (p0, z0, 31), ++ z0_res = svld1sh_gather_index_u64 (p0, z0, 31)) ++ ++/* ++** ld1sh_gather_32_u64_index: ++** mov (x[0-9]+), #?64 ++** ld1sh z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sh_gather_32_u64_index, svuint64_t, svuint64_t, ++ z0_res = svld1sh_gather_u64base_index_u64 (p0, z0, 32), ++ z0_res = svld1sh_gather_index_u64 (p0, z0, 32)) ++ ++/* ++** ld1sh_gather_x0_u64_s64offset: ++** ld1sh z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_x0_u64_s64offset, svuint64_t, int16_t, svint64_t, ++ z0_res = svld1sh_gather_s64offset_u64 (p0, x0, z0), ++ z0_res = svld1sh_gather_offset_u64 (p0, x0, z0)) ++ ++/* ++** ld1sh_gather_tied1_u64_s64offset: ++** ld1sh z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_tied1_u64_s64offset, svuint64_t, int16_t, svint64_t, ++ z0_res = svld1sh_gather_s64offset_u64 (p0, x0, z0), ++ z0_res = svld1sh_gather_offset_u64 (p0, x0, z0)) ++ ++/* ++** ld1sh_gather_untied_u64_s64offset: ++** ld1sh z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_untied_u64_s64offset, svuint64_t, int16_t, svint64_t, ++ z0_res = svld1sh_gather_s64offset_u64 (p0, x0, z1), ++ z0_res = svld1sh_gather_offset_u64 (p0, x0, z1)) ++ ++/* ++** ld1sh_gather_ext_u64_s64offset: ++** ld1sh z0\.d, p0/z, \[x0, z1\.d, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_ext_u64_s64offset, svuint64_t, int16_t, svint64_t, ++ z0_res = svld1sh_gather_s64offset_u64 (p0, x0, svextw_s64_x (p0, z1)), ++ z0_res = svld1sh_gather_offset_u64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ld1sh_gather_x0_u64_u64offset: ++** ld1sh z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_x0_u64_u64offset, svuint64_t, int16_t, svuint64_t, ++ z0_res = svld1sh_gather_u64offset_u64 (p0, x0, z0), ++ z0_res = svld1sh_gather_offset_u64 (p0, x0, z0)) ++ ++/* ++** ld1sh_gather_tied1_u64_u64offset: ++** ld1sh z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_tied1_u64_u64offset, svuint64_t, int16_t, svuint64_t, ++ z0_res = svld1sh_gather_u64offset_u64 (p0, x0, z0), ++ z0_res = svld1sh_gather_offset_u64 (p0, x0, z0)) ++ ++/* ++** ld1sh_gather_untied_u64_u64offset: ++** ld1sh z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_untied_u64_u64offset, svuint64_t, int16_t, svuint64_t, ++ z0_res = svld1sh_gather_u64offset_u64 (p0, x0, z1), ++ z0_res = svld1sh_gather_offset_u64 (p0, x0, z1)) ++ ++/* ++** ld1sh_gather_ext_u64_u64offset: ++** ld1sh z0\.d, p0/z, \[x0, z1\.d, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_ext_u64_u64offset, svuint64_t, int16_t, svuint64_t, ++ z0_res = svld1sh_gather_u64offset_u64 (p0, x0, svextw_u64_x (p0, z1)), ++ z0_res = svld1sh_gather_offset_u64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ld1sh_gather_x0_u64_s64index: ++** ld1sh z0\.d, p0/z, \[x0, z0\.d, lsl 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_x0_u64_s64index, svuint64_t, int16_t, svint64_t, ++ z0_res = svld1sh_gather_s64index_u64 (p0, x0, z0), ++ z0_res = svld1sh_gather_index_u64 (p0, x0, z0)) ++ ++/* ++** ld1sh_gather_tied1_u64_s64index: ++** ld1sh z0\.d, p0/z, \[x0, z0\.d, lsl 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_tied1_u64_s64index, svuint64_t, int16_t, svint64_t, ++ z0_res = svld1sh_gather_s64index_u64 (p0, x0, z0), ++ z0_res = svld1sh_gather_index_u64 (p0, x0, z0)) ++ ++/* ++** ld1sh_gather_untied_u64_s64index: ++** ld1sh z0\.d, p0/z, \[x0, z1\.d, lsl 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_untied_u64_s64index, svuint64_t, int16_t, svint64_t, ++ z0_res = svld1sh_gather_s64index_u64 (p0, x0, z1), ++ z0_res = svld1sh_gather_index_u64 (p0, x0, z1)) ++ ++/* ++** ld1sh_gather_ext_u64_s64index: ++** ld1sh z0\.d, p0/z, \[x0, z1\.d, sxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_ext_u64_s64index, svuint64_t, int16_t, svint64_t, ++ z0_res = svld1sh_gather_s64index_u64 (p0, x0, svextw_s64_x (p0, z1)), ++ z0_res = svld1sh_gather_index_u64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ld1sh_gather_x0_u64_u64index: ++** ld1sh z0\.d, p0/z, \[x0, z0\.d, lsl 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_x0_u64_u64index, svuint64_t, int16_t, svuint64_t, ++ z0_res = svld1sh_gather_u64index_u64 (p0, x0, z0), ++ z0_res = svld1sh_gather_index_u64 (p0, x0, z0)) ++ ++/* ++** ld1sh_gather_tied1_u64_u64index: ++** ld1sh z0\.d, p0/z, \[x0, z0\.d, lsl 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_tied1_u64_u64index, svuint64_t, int16_t, svuint64_t, ++ z0_res = svld1sh_gather_u64index_u64 (p0, x0, z0), ++ z0_res = svld1sh_gather_index_u64 (p0, x0, z0)) ++ ++/* ++** ld1sh_gather_untied_u64_u64index: ++** ld1sh z0\.d, p0/z, \[x0, z1\.d, lsl 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_untied_u64_u64index, svuint64_t, int16_t, svuint64_t, ++ z0_res = svld1sh_gather_u64index_u64 (p0, x0, z1), ++ z0_res = svld1sh_gather_index_u64 (p0, x0, z1)) ++ ++/* ++** ld1sh_gather_ext_u64_u64index: ++** ld1sh z0\.d, p0/z, \[x0, z1\.d, uxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sh_gather_ext_u64_u64index, svuint64_t, int16_t, svuint64_t, ++ z0_res = svld1sh_gather_u64index_u64 (p0, x0, svextw_u64_x (p0, z1)), ++ z0_res = svld1sh_gather_index_u64 (p0, x0, svextw_x (p0, z1))) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_s32.c +new file mode 100644 +index 000000000..8614f52c5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_s32.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1sh_s32_base: ++** ld1sh z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sh_s32_base, svint32_t, int16_t, ++ z0 = svld1sh_s32 (p0, x0), ++ z0 = svld1sh_s32 (p0, x0)) ++ ++/* ++** ld1sh_s32_index: ++** ld1sh z0\.s, p0/z, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_LOAD (ld1sh_s32_index, svint32_t, int16_t, ++ z0 = svld1sh_s32 (p0, x0 + x1), ++ z0 = svld1sh_s32 (p0, x0 + x1)) ++ ++/* ++** ld1sh_s32_1: ++** ld1sh z0\.s, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sh_s32_1, svint32_t, int16_t, ++ z0 = svld1sh_s32 (p0, x0 + svcntw ()), ++ z0 = svld1sh_s32 (p0, x0 + svcntw ())) ++ ++/* ++** ld1sh_s32_7: ++** ld1sh z0\.s, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sh_s32_7, svint32_t, int16_t, ++ z0 = svld1sh_s32 (p0, x0 + svcntw () * 7), ++ z0 = svld1sh_s32 (p0, x0 + svcntw () * 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1sh_s32_8: ++** incb x0, all, mul #4 ++** ld1sh z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sh_s32_8, svint32_t, int16_t, ++ z0 = svld1sh_s32 (p0, x0 + svcntw () * 8), ++ z0 = svld1sh_s32 (p0, x0 + svcntw () * 8)) ++ ++/* ++** ld1sh_s32_m1: ++** ld1sh z0\.s, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sh_s32_m1, svint32_t, int16_t, ++ z0 = svld1sh_s32 (p0, x0 - svcntw ()), ++ z0 = svld1sh_s32 (p0, x0 - svcntw ())) ++ ++/* ++** ld1sh_s32_m8: ++** ld1sh z0\.s, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sh_s32_m8, svint32_t, int16_t, ++ z0 = svld1sh_s32 (p0, x0 - svcntw () * 8), ++ z0 = svld1sh_s32 (p0, x0 - svcntw () * 8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1sh_s32_m9: ++** dech x0, all, mul #9 ++** ld1sh z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sh_s32_m9, svint32_t, int16_t, ++ z0 = svld1sh_s32 (p0, x0 - svcntw () * 9), ++ z0 = svld1sh_s32 (p0, x0 - svcntw () * 9)) ++ ++/* ++** ld1sh_vnum_s32_0: ++** ld1sh z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sh_vnum_s32_0, svint32_t, int16_t, ++ z0 = svld1sh_vnum_s32 (p0, x0, 0), ++ z0 = svld1sh_vnum_s32 (p0, x0, 0)) ++ ++/* ++** ld1sh_vnum_s32_1: ++** ld1sh z0\.s, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sh_vnum_s32_1, svint32_t, int16_t, ++ z0 = svld1sh_vnum_s32 (p0, x0, 1), ++ z0 = svld1sh_vnum_s32 (p0, x0, 1)) ++ ++/* ++** ld1sh_vnum_s32_7: ++** ld1sh z0\.s, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sh_vnum_s32_7, svint32_t, int16_t, ++ z0 = svld1sh_vnum_s32 (p0, x0, 7), ++ z0 = svld1sh_vnum_s32 (p0, x0, 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1sh_vnum_s32_8: ++** incb x0, all, mul #4 ++** ld1sh z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sh_vnum_s32_8, svint32_t, int16_t, ++ z0 = svld1sh_vnum_s32 (p0, x0, 8), ++ z0 = svld1sh_vnum_s32 (p0, x0, 8)) ++ ++/* ++** ld1sh_vnum_s32_m1: ++** ld1sh z0\.s, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sh_vnum_s32_m1, svint32_t, int16_t, ++ z0 = svld1sh_vnum_s32 (p0, x0, -1), ++ z0 = svld1sh_vnum_s32 (p0, x0, -1)) ++ ++/* ++** ld1sh_vnum_s32_m8: ++** ld1sh z0\.s, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sh_vnum_s32_m8, svint32_t, int16_t, ++ z0 = svld1sh_vnum_s32 (p0, x0, -8), ++ z0 = svld1sh_vnum_s32 (p0, x0, -8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1sh_vnum_s32_m9: ++** dech x0, all, mul #9 ++** ld1sh z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sh_vnum_s32_m9, svint32_t, int16_t, ++ z0 = svld1sh_vnum_s32 (p0, x0, -9), ++ z0 = svld1sh_vnum_s32 (p0, x0, -9)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ld1sh_vnum_s32_x1: ++** cnth (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ld1sh z0\.s, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ld1sh_vnum_s32_x1, svint32_t, int16_t, ++ z0 = svld1sh_vnum_s32 (p0, x0, x1), ++ z0 = svld1sh_vnum_s32 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_s64.c +new file mode 100644 +index 000000000..c02b40a76 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_s64.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1sh_s64_base: ++** ld1sh z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sh_s64_base, svint64_t, int16_t, ++ z0 = svld1sh_s64 (p0, x0), ++ z0 = svld1sh_s64 (p0, x0)) ++ ++/* ++** ld1sh_s64_index: ++** ld1sh z0\.d, p0/z, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_LOAD (ld1sh_s64_index, svint64_t, int16_t, ++ z0 = svld1sh_s64 (p0, x0 + x1), ++ z0 = svld1sh_s64 (p0, x0 + x1)) ++ ++/* ++** ld1sh_s64_1: ++** ld1sh z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sh_s64_1, svint64_t, int16_t, ++ z0 = svld1sh_s64 (p0, x0 + svcntd ()), ++ z0 = svld1sh_s64 (p0, x0 + svcntd ())) ++ ++/* ++** ld1sh_s64_7: ++** ld1sh z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sh_s64_7, svint64_t, int16_t, ++ z0 = svld1sh_s64 (p0, x0 + svcntd () * 7), ++ z0 = svld1sh_s64 (p0, x0 + svcntd () * 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1sh_s64_8: ++** incb x0, all, mul #2 ++** ld1sh z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sh_s64_8, svint64_t, int16_t, ++ z0 = svld1sh_s64 (p0, x0 + svcntd () * 8), ++ z0 = svld1sh_s64 (p0, x0 + svcntd () * 8)) ++ ++/* ++** ld1sh_s64_m1: ++** ld1sh z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sh_s64_m1, svint64_t, int16_t, ++ z0 = svld1sh_s64 (p0, x0 - svcntd ()), ++ z0 = svld1sh_s64 (p0, x0 - svcntd ())) ++ ++/* ++** ld1sh_s64_m8: ++** ld1sh z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sh_s64_m8, svint64_t, int16_t, ++ z0 = svld1sh_s64 (p0, x0 - svcntd () * 8), ++ z0 = svld1sh_s64 (p0, x0 - svcntd () * 8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1sh_s64_m9: ++** decw x0, all, mul #9 ++** ld1sh z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sh_s64_m9, svint64_t, int16_t, ++ z0 = svld1sh_s64 (p0, x0 - svcntd () * 9), ++ z0 = svld1sh_s64 (p0, x0 - svcntd () * 9)) ++ ++/* ++** ld1sh_vnum_s64_0: ++** ld1sh z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sh_vnum_s64_0, svint64_t, int16_t, ++ z0 = svld1sh_vnum_s64 (p0, x0, 0), ++ z0 = svld1sh_vnum_s64 (p0, x0, 0)) ++ ++/* ++** ld1sh_vnum_s64_1: ++** ld1sh z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sh_vnum_s64_1, svint64_t, int16_t, ++ z0 = svld1sh_vnum_s64 (p0, x0, 1), ++ z0 = svld1sh_vnum_s64 (p0, x0, 1)) ++ ++/* ++** ld1sh_vnum_s64_7: ++** ld1sh z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sh_vnum_s64_7, svint64_t, int16_t, ++ z0 = svld1sh_vnum_s64 (p0, x0, 7), ++ z0 = svld1sh_vnum_s64 (p0, x0, 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1sh_vnum_s64_8: ++** incb x0, all, mul #2 ++** ld1sh z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sh_vnum_s64_8, svint64_t, int16_t, ++ z0 = svld1sh_vnum_s64 (p0, x0, 8), ++ z0 = svld1sh_vnum_s64 (p0, x0, 8)) ++ ++/* ++** ld1sh_vnum_s64_m1: ++** ld1sh z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sh_vnum_s64_m1, svint64_t, int16_t, ++ z0 = svld1sh_vnum_s64 (p0, x0, -1), ++ z0 = svld1sh_vnum_s64 (p0, x0, -1)) ++ ++/* ++** ld1sh_vnum_s64_m8: ++** ld1sh z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sh_vnum_s64_m8, svint64_t, int16_t, ++ z0 = svld1sh_vnum_s64 (p0, x0, -8), ++ z0 = svld1sh_vnum_s64 (p0, x0, -8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1sh_vnum_s64_m9: ++** decw x0, all, mul #9 ++** ld1sh z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sh_vnum_s64_m9, svint64_t, int16_t, ++ z0 = svld1sh_vnum_s64 (p0, x0, -9), ++ z0 = svld1sh_vnum_s64 (p0, x0, -9)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ld1sh_vnum_s64_x1: ++** cntw (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ld1sh z0\.d, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ld1sh_vnum_s64_x1, svint64_t, int16_t, ++ z0 = svld1sh_vnum_s64 (p0, x0, x1), ++ z0 = svld1sh_vnum_s64 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_u32.c +new file mode 100644 +index 000000000..ead96174a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_u32.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1sh_u32_base: ++** ld1sh z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sh_u32_base, svuint32_t, int16_t, ++ z0 = svld1sh_u32 (p0, x0), ++ z0 = svld1sh_u32 (p0, x0)) ++ ++/* ++** ld1sh_u32_index: ++** ld1sh z0\.s, p0/z, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_LOAD (ld1sh_u32_index, svuint32_t, int16_t, ++ z0 = svld1sh_u32 (p0, x0 + x1), ++ z0 = svld1sh_u32 (p0, x0 + x1)) ++ ++/* ++** ld1sh_u32_1: ++** ld1sh z0\.s, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sh_u32_1, svuint32_t, int16_t, ++ z0 = svld1sh_u32 (p0, x0 + svcntw ()), ++ z0 = svld1sh_u32 (p0, x0 + svcntw ())) ++ ++/* ++** ld1sh_u32_7: ++** ld1sh z0\.s, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sh_u32_7, svuint32_t, int16_t, ++ z0 = svld1sh_u32 (p0, x0 + svcntw () * 7), ++ z0 = svld1sh_u32 (p0, x0 + svcntw () * 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1sh_u32_8: ++** incb x0, all, mul #4 ++** ld1sh z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sh_u32_8, svuint32_t, int16_t, ++ z0 = svld1sh_u32 (p0, x0 + svcntw () * 8), ++ z0 = svld1sh_u32 (p0, x0 + svcntw () * 8)) ++ ++/* ++** ld1sh_u32_m1: ++** ld1sh z0\.s, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sh_u32_m1, svuint32_t, int16_t, ++ z0 = svld1sh_u32 (p0, x0 - svcntw ()), ++ z0 = svld1sh_u32 (p0, x0 - svcntw ())) ++ ++/* ++** ld1sh_u32_m8: ++** ld1sh z0\.s, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sh_u32_m8, svuint32_t, int16_t, ++ z0 = svld1sh_u32 (p0, x0 - svcntw () * 8), ++ z0 = svld1sh_u32 (p0, x0 - svcntw () * 8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1sh_u32_m9: ++** dech x0, all, mul #9 ++** ld1sh z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sh_u32_m9, svuint32_t, int16_t, ++ z0 = svld1sh_u32 (p0, x0 - svcntw () * 9), ++ z0 = svld1sh_u32 (p0, x0 - svcntw () * 9)) ++ ++/* ++** ld1sh_vnum_u32_0: ++** ld1sh z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sh_vnum_u32_0, svuint32_t, int16_t, ++ z0 = svld1sh_vnum_u32 (p0, x0, 0), ++ z0 = svld1sh_vnum_u32 (p0, x0, 0)) ++ ++/* ++** ld1sh_vnum_u32_1: ++** ld1sh z0\.s, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sh_vnum_u32_1, svuint32_t, int16_t, ++ z0 = svld1sh_vnum_u32 (p0, x0, 1), ++ z0 = svld1sh_vnum_u32 (p0, x0, 1)) ++ ++/* ++** ld1sh_vnum_u32_7: ++** ld1sh z0\.s, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sh_vnum_u32_7, svuint32_t, int16_t, ++ z0 = svld1sh_vnum_u32 (p0, x0, 7), ++ z0 = svld1sh_vnum_u32 (p0, x0, 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1sh_vnum_u32_8: ++** incb x0, all, mul #4 ++** ld1sh z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sh_vnum_u32_8, svuint32_t, int16_t, ++ z0 = svld1sh_vnum_u32 (p0, x0, 8), ++ z0 = svld1sh_vnum_u32 (p0, x0, 8)) ++ ++/* ++** ld1sh_vnum_u32_m1: ++** ld1sh z0\.s, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sh_vnum_u32_m1, svuint32_t, int16_t, ++ z0 = svld1sh_vnum_u32 (p0, x0, -1), ++ z0 = svld1sh_vnum_u32 (p0, x0, -1)) ++ ++/* ++** ld1sh_vnum_u32_m8: ++** ld1sh z0\.s, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sh_vnum_u32_m8, svuint32_t, int16_t, ++ z0 = svld1sh_vnum_u32 (p0, x0, -8), ++ z0 = svld1sh_vnum_u32 (p0, x0, -8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1sh_vnum_u32_m9: ++** dech x0, all, mul #9 ++** ld1sh z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sh_vnum_u32_m9, svuint32_t, int16_t, ++ z0 = svld1sh_vnum_u32 (p0, x0, -9), ++ z0 = svld1sh_vnum_u32 (p0, x0, -9)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ld1sh_vnum_u32_x1: ++** cnth (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ld1sh z0\.s, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ld1sh_vnum_u32_x1, svuint32_t, int16_t, ++ z0 = svld1sh_vnum_u32 (p0, x0, x1), ++ z0 = svld1sh_vnum_u32 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_u64.c +new file mode 100644 +index 000000000..e407a08a6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_u64.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1sh_u64_base: ++** ld1sh z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sh_u64_base, svuint64_t, int16_t, ++ z0 = svld1sh_u64 (p0, x0), ++ z0 = svld1sh_u64 (p0, x0)) ++ ++/* ++** ld1sh_u64_index: ++** ld1sh z0\.d, p0/z, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_LOAD (ld1sh_u64_index, svuint64_t, int16_t, ++ z0 = svld1sh_u64 (p0, x0 + x1), ++ z0 = svld1sh_u64 (p0, x0 + x1)) ++ ++/* ++** ld1sh_u64_1: ++** ld1sh z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sh_u64_1, svuint64_t, int16_t, ++ z0 = svld1sh_u64 (p0, x0 + svcntd ()), ++ z0 = svld1sh_u64 (p0, x0 + svcntd ())) ++ ++/* ++** ld1sh_u64_7: ++** ld1sh z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sh_u64_7, svuint64_t, int16_t, ++ z0 = svld1sh_u64 (p0, x0 + svcntd () * 7), ++ z0 = svld1sh_u64 (p0, x0 + svcntd () * 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1sh_u64_8: ++** incb x0, all, mul #2 ++** ld1sh z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sh_u64_8, svuint64_t, int16_t, ++ z0 = svld1sh_u64 (p0, x0 + svcntd () * 8), ++ z0 = svld1sh_u64 (p0, x0 + svcntd () * 8)) ++ ++/* ++** ld1sh_u64_m1: ++** ld1sh z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sh_u64_m1, svuint64_t, int16_t, ++ z0 = svld1sh_u64 (p0, x0 - svcntd ()), ++ z0 = svld1sh_u64 (p0, x0 - svcntd ())) ++ ++/* ++** ld1sh_u64_m8: ++** ld1sh z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sh_u64_m8, svuint64_t, int16_t, ++ z0 = svld1sh_u64 (p0, x0 - svcntd () * 8), ++ z0 = svld1sh_u64 (p0, x0 - svcntd () * 8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1sh_u64_m9: ++** decw x0, all, mul #9 ++** ld1sh z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sh_u64_m9, svuint64_t, int16_t, ++ z0 = svld1sh_u64 (p0, x0 - svcntd () * 9), ++ z0 = svld1sh_u64 (p0, x0 - svcntd () * 9)) ++ ++/* ++** ld1sh_vnum_u64_0: ++** ld1sh z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sh_vnum_u64_0, svuint64_t, int16_t, ++ z0 = svld1sh_vnum_u64 (p0, x0, 0), ++ z0 = svld1sh_vnum_u64 (p0, x0, 0)) ++ ++/* ++** ld1sh_vnum_u64_1: ++** ld1sh z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sh_vnum_u64_1, svuint64_t, int16_t, ++ z0 = svld1sh_vnum_u64 (p0, x0, 1), ++ z0 = svld1sh_vnum_u64 (p0, x0, 1)) ++ ++/* ++** ld1sh_vnum_u64_7: ++** ld1sh z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sh_vnum_u64_7, svuint64_t, int16_t, ++ z0 = svld1sh_vnum_u64 (p0, x0, 7), ++ z0 = svld1sh_vnum_u64 (p0, x0, 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1sh_vnum_u64_8: ++** incb x0, all, mul #2 ++** ld1sh z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sh_vnum_u64_8, svuint64_t, int16_t, ++ z0 = svld1sh_vnum_u64 (p0, x0, 8), ++ z0 = svld1sh_vnum_u64 (p0, x0, 8)) ++ ++/* ++** ld1sh_vnum_u64_m1: ++** ld1sh z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sh_vnum_u64_m1, svuint64_t, int16_t, ++ z0 = svld1sh_vnum_u64 (p0, x0, -1), ++ z0 = svld1sh_vnum_u64 (p0, x0, -1)) ++ ++/* ++** ld1sh_vnum_u64_m8: ++** ld1sh z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sh_vnum_u64_m8, svuint64_t, int16_t, ++ z0 = svld1sh_vnum_u64 (p0, x0, -8), ++ z0 = svld1sh_vnum_u64 (p0, x0, -8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1sh_vnum_u64_m9: ++** decw x0, all, mul #9 ++** ld1sh z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sh_vnum_u64_m9, svuint64_t, int16_t, ++ z0 = svld1sh_vnum_u64 (p0, x0, -9), ++ z0 = svld1sh_vnum_u64 (p0, x0, -9)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ld1sh_vnum_u64_x1: ++** cntw (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ld1sh z0\.d, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ld1sh_vnum_u64_x1, svuint64_t, int16_t, ++ z0 = svld1sh_vnum_u64 (p0, x0, x1), ++ z0 = svld1sh_vnum_u64 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sw_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sw_gather_s64.c +new file mode 100644 +index 000000000..4d076b486 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sw_gather_s64.c +@@ -0,0 +1,308 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1sw_gather_s64_tied1: ++** ld1sw z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sw_gather_s64_tied1, svint64_t, svuint64_t, ++ z0_res = svld1sw_gather_u64base_s64 (p0, z0), ++ z0_res = svld1sw_gather_s64 (p0, z0)) ++ ++/* ++** ld1sw_gather_s64_untied: ++** ld1sw z0\.d, p0/z, \[z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sw_gather_s64_untied, svint64_t, svuint64_t, ++ z0_res = svld1sw_gather_u64base_s64 (p0, z1), ++ z0_res = svld1sw_gather_s64 (p0, z1)) ++ ++/* ++** ld1sw_gather_x0_s64_offset: ++** ld1sw z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sw_gather_x0_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1sw_gather_u64base_offset_s64 (p0, z0, x0), ++ z0_res = svld1sw_gather_offset_s64 (p0, z0, x0)) ++ ++/* ++** ld1sw_gather_m4_s64_offset: ++** mov (x[0-9]+), #?-4 ++** ld1sw z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sw_gather_m4_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1sw_gather_u64base_offset_s64 (p0, z0, -4), ++ z0_res = svld1sw_gather_offset_s64 (p0, z0, -4)) ++ ++/* ++** ld1sw_gather_0_s64_offset: ++** ld1sw z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sw_gather_0_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1sw_gather_u64base_offset_s64 (p0, z0, 0), ++ z0_res = svld1sw_gather_offset_s64 (p0, z0, 0)) ++ ++/* ++** ld1sw_gather_5_s64_offset: ++** mov (x[0-9]+), #?5 ++** ld1sw z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sw_gather_5_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1sw_gather_u64base_offset_s64 (p0, z0, 5), ++ z0_res = svld1sw_gather_offset_s64 (p0, z0, 5)) ++ ++/* ++** ld1sw_gather_6_s64_offset: ++** mov (x[0-9]+), #?6 ++** ld1sw z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sw_gather_6_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1sw_gather_u64base_offset_s64 (p0, z0, 6), ++ z0_res = svld1sw_gather_offset_s64 (p0, z0, 6)) ++ ++/* ++** ld1sw_gather_7_s64_offset: ++** mov (x[0-9]+), #?7 ++** ld1sw z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sw_gather_7_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1sw_gather_u64base_offset_s64 (p0, z0, 7), ++ z0_res = svld1sw_gather_offset_s64 (p0, z0, 7)) ++ ++/* ++** ld1sw_gather_8_s64_offset: ++** ld1sw z0\.d, p0/z, \[z0\.d, #8\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sw_gather_8_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1sw_gather_u64base_offset_s64 (p0, z0, 8), ++ z0_res = svld1sw_gather_offset_s64 (p0, z0, 8)) ++ ++/* ++** ld1sw_gather_124_s64_offset: ++** ld1sw z0\.d, p0/z, \[z0\.d, #124\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sw_gather_124_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1sw_gather_u64base_offset_s64 (p0, z0, 124), ++ z0_res = svld1sw_gather_offset_s64 (p0, z0, 124)) ++ ++/* ++** ld1sw_gather_128_s64_offset: ++** mov (x[0-9]+), #?128 ++** ld1sw z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sw_gather_128_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1sw_gather_u64base_offset_s64 (p0, z0, 128), ++ z0_res = svld1sw_gather_offset_s64 (p0, z0, 128)) ++ ++/* ++** ld1sw_gather_x0_s64_index: ++** lsl (x[0-9]+), x0, #?2 ++** ld1sw z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sw_gather_x0_s64_index, svint64_t, svuint64_t, ++ z0_res = svld1sw_gather_u64base_index_s64 (p0, z0, x0), ++ z0_res = svld1sw_gather_index_s64 (p0, z0, x0)) ++ ++/* ++** ld1sw_gather_m1_s64_index: ++** mov (x[0-9]+), #?-4 ++** ld1sw z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sw_gather_m1_s64_index, svint64_t, svuint64_t, ++ z0_res = svld1sw_gather_u64base_index_s64 (p0, z0, -1), ++ z0_res = svld1sw_gather_index_s64 (p0, z0, -1)) ++ ++/* ++** ld1sw_gather_0_s64_index: ++** ld1sw z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sw_gather_0_s64_index, svint64_t, svuint64_t, ++ z0_res = svld1sw_gather_u64base_index_s64 (p0, z0, 0), ++ z0_res = svld1sw_gather_index_s64 (p0, z0, 0)) ++ ++/* ++** ld1sw_gather_5_s64_index: ++** ld1sw z0\.d, p0/z, \[z0\.d, #20\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sw_gather_5_s64_index, svint64_t, svuint64_t, ++ z0_res = svld1sw_gather_u64base_index_s64 (p0, z0, 5), ++ z0_res = svld1sw_gather_index_s64 (p0, z0, 5)) ++ ++/* ++** ld1sw_gather_31_s64_index: ++** ld1sw z0\.d, p0/z, \[z0\.d, #124\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sw_gather_31_s64_index, svint64_t, svuint64_t, ++ z0_res = svld1sw_gather_u64base_index_s64 (p0, z0, 31), ++ z0_res = svld1sw_gather_index_s64 (p0, z0, 31)) ++ ++/* ++** ld1sw_gather_32_s64_index: ++** mov (x[0-9]+), #?128 ++** ld1sw z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sw_gather_32_s64_index, svint64_t, svuint64_t, ++ z0_res = svld1sw_gather_u64base_index_s64 (p0, z0, 32), ++ z0_res = svld1sw_gather_index_s64 (p0, z0, 32)) ++ ++/* ++** ld1sw_gather_x0_s64_s64offset: ++** ld1sw z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sw_gather_x0_s64_s64offset, svint64_t, int32_t, svint64_t, ++ z0_res = svld1sw_gather_s64offset_s64 (p0, x0, z0), ++ z0_res = svld1sw_gather_offset_s64 (p0, x0, z0)) ++ ++/* ++** ld1sw_gather_tied1_s64_s64offset: ++** ld1sw z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sw_gather_tied1_s64_s64offset, svint64_t, int32_t, svint64_t, ++ z0_res = svld1sw_gather_s64offset_s64 (p0, x0, z0), ++ z0_res = svld1sw_gather_offset_s64 (p0, x0, z0)) ++ ++/* ++** ld1sw_gather_untied_s64_s64offset: ++** ld1sw z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sw_gather_untied_s64_s64offset, svint64_t, int32_t, svint64_t, ++ z0_res = svld1sw_gather_s64offset_s64 (p0, x0, z1), ++ z0_res = svld1sw_gather_offset_s64 (p0, x0, z1)) ++ ++/* ++** ld1sw_gather_ext_s64_s64offset: ++** ld1sw z0\.d, p0/z, \[x0, z1\.d, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sw_gather_ext_s64_s64offset, svint64_t, int32_t, svint64_t, ++ z0_res = svld1sw_gather_s64offset_s64 (p0, x0, svextw_s64_x (p0, z1)), ++ z0_res = svld1sw_gather_offset_s64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ld1sw_gather_x0_s64_u64offset: ++** ld1sw z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sw_gather_x0_s64_u64offset, svint64_t, int32_t, svuint64_t, ++ z0_res = svld1sw_gather_u64offset_s64 (p0, x0, z0), ++ z0_res = svld1sw_gather_offset_s64 (p0, x0, z0)) ++ ++/* ++** ld1sw_gather_tied1_s64_u64offset: ++** ld1sw z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sw_gather_tied1_s64_u64offset, svint64_t, int32_t, svuint64_t, ++ z0_res = svld1sw_gather_u64offset_s64 (p0, x0, z0), ++ z0_res = svld1sw_gather_offset_s64 (p0, x0, z0)) ++ ++/* ++** ld1sw_gather_untied_s64_u64offset: ++** ld1sw z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sw_gather_untied_s64_u64offset, svint64_t, int32_t, svuint64_t, ++ z0_res = svld1sw_gather_u64offset_s64 (p0, x0, z1), ++ z0_res = svld1sw_gather_offset_s64 (p0, x0, z1)) ++ ++/* ++** ld1sw_gather_ext_s64_u64offset: ++** ld1sw z0\.d, p0/z, \[x0, z1\.d, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sw_gather_ext_s64_u64offset, svint64_t, int32_t, svuint64_t, ++ z0_res = svld1sw_gather_u64offset_s64 (p0, x0, svextw_u64_x (p0, z1)), ++ z0_res = svld1sw_gather_offset_s64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ld1sw_gather_x0_s64_s64index: ++** ld1sw z0\.d, p0/z, \[x0, z0\.d, lsl 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sw_gather_x0_s64_s64index, svint64_t, int32_t, svint64_t, ++ z0_res = svld1sw_gather_s64index_s64 (p0, x0, z0), ++ z0_res = svld1sw_gather_index_s64 (p0, x0, z0)) ++ ++/* ++** ld1sw_gather_tied1_s64_s64index: ++** ld1sw z0\.d, p0/z, \[x0, z0\.d, lsl 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sw_gather_tied1_s64_s64index, svint64_t, int32_t, svint64_t, ++ z0_res = svld1sw_gather_s64index_s64 (p0, x0, z0), ++ z0_res = svld1sw_gather_index_s64 (p0, x0, z0)) ++ ++/* ++** ld1sw_gather_untied_s64_s64index: ++** ld1sw z0\.d, p0/z, \[x0, z1\.d, lsl 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sw_gather_untied_s64_s64index, svint64_t, int32_t, svint64_t, ++ z0_res = svld1sw_gather_s64index_s64 (p0, x0, z1), ++ z0_res = svld1sw_gather_index_s64 (p0, x0, z1)) ++ ++/* ++** ld1sw_gather_ext_s64_s64index: ++** ld1sw z0\.d, p0/z, \[x0, z1\.d, sxtw 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sw_gather_ext_s64_s64index, svint64_t, int32_t, svint64_t, ++ z0_res = svld1sw_gather_s64index_s64 (p0, x0, svextw_s64_x (p0, z1)), ++ z0_res = svld1sw_gather_index_s64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ld1sw_gather_x0_s64_u64index: ++** ld1sw z0\.d, p0/z, \[x0, z0\.d, lsl 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sw_gather_x0_s64_u64index, svint64_t, int32_t, svuint64_t, ++ z0_res = svld1sw_gather_u64index_s64 (p0, x0, z0), ++ z0_res = svld1sw_gather_index_s64 (p0, x0, z0)) ++ ++/* ++** ld1sw_gather_tied1_s64_u64index: ++** ld1sw z0\.d, p0/z, \[x0, z0\.d, lsl 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sw_gather_tied1_s64_u64index, svint64_t, int32_t, svuint64_t, ++ z0_res = svld1sw_gather_u64index_s64 (p0, x0, z0), ++ z0_res = svld1sw_gather_index_s64 (p0, x0, z0)) ++ ++/* ++** ld1sw_gather_untied_s64_u64index: ++** ld1sw z0\.d, p0/z, \[x0, z1\.d, lsl 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sw_gather_untied_s64_u64index, svint64_t, int32_t, svuint64_t, ++ z0_res = svld1sw_gather_u64index_s64 (p0, x0, z1), ++ z0_res = svld1sw_gather_index_s64 (p0, x0, z1)) ++ ++/* ++** ld1sw_gather_ext_s64_u64index: ++** ld1sw z0\.d, p0/z, \[x0, z1\.d, uxtw 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sw_gather_ext_s64_u64index, svint64_t, int32_t, svuint64_t, ++ z0_res = svld1sw_gather_u64index_s64 (p0, x0, svextw_u64_x (p0, z1)), ++ z0_res = svld1sw_gather_index_s64 (p0, x0, svextw_x (p0, z1))) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sw_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sw_gather_u64.c +new file mode 100644 +index 000000000..ffa85eb3e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sw_gather_u64.c +@@ -0,0 +1,308 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1sw_gather_u64_tied1: ++** ld1sw z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sw_gather_u64_tied1, svuint64_t, svuint64_t, ++ z0_res = svld1sw_gather_u64base_u64 (p0, z0), ++ z0_res = svld1sw_gather_u64 (p0, z0)) ++ ++/* ++** ld1sw_gather_u64_untied: ++** ld1sw z0\.d, p0/z, \[z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sw_gather_u64_untied, svuint64_t, svuint64_t, ++ z0_res = svld1sw_gather_u64base_u64 (p0, z1), ++ z0_res = svld1sw_gather_u64 (p0, z1)) ++ ++/* ++** ld1sw_gather_x0_u64_offset: ++** ld1sw z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sw_gather_x0_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1sw_gather_u64base_offset_u64 (p0, z0, x0), ++ z0_res = svld1sw_gather_offset_u64 (p0, z0, x0)) ++ ++/* ++** ld1sw_gather_m4_u64_offset: ++** mov (x[0-9]+), #?-4 ++** ld1sw z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sw_gather_m4_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1sw_gather_u64base_offset_u64 (p0, z0, -4), ++ z0_res = svld1sw_gather_offset_u64 (p0, z0, -4)) ++ ++/* ++** ld1sw_gather_0_u64_offset: ++** ld1sw z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sw_gather_0_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1sw_gather_u64base_offset_u64 (p0, z0, 0), ++ z0_res = svld1sw_gather_offset_u64 (p0, z0, 0)) ++ ++/* ++** ld1sw_gather_5_u64_offset: ++** mov (x[0-9]+), #?5 ++** ld1sw z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sw_gather_5_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1sw_gather_u64base_offset_u64 (p0, z0, 5), ++ z0_res = svld1sw_gather_offset_u64 (p0, z0, 5)) ++ ++/* ++** ld1sw_gather_6_u64_offset: ++** mov (x[0-9]+), #?6 ++** ld1sw z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sw_gather_6_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1sw_gather_u64base_offset_u64 (p0, z0, 6), ++ z0_res = svld1sw_gather_offset_u64 (p0, z0, 6)) ++ ++/* ++** ld1sw_gather_7_u64_offset: ++** mov (x[0-9]+), #?7 ++** ld1sw z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sw_gather_7_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1sw_gather_u64base_offset_u64 (p0, z0, 7), ++ z0_res = svld1sw_gather_offset_u64 (p0, z0, 7)) ++ ++/* ++** ld1sw_gather_8_u64_offset: ++** ld1sw z0\.d, p0/z, \[z0\.d, #8\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sw_gather_8_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1sw_gather_u64base_offset_u64 (p0, z0, 8), ++ z0_res = svld1sw_gather_offset_u64 (p0, z0, 8)) ++ ++/* ++** ld1sw_gather_124_u64_offset: ++** ld1sw z0\.d, p0/z, \[z0\.d, #124\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sw_gather_124_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1sw_gather_u64base_offset_u64 (p0, z0, 124), ++ z0_res = svld1sw_gather_offset_u64 (p0, z0, 124)) ++ ++/* ++** ld1sw_gather_128_u64_offset: ++** mov (x[0-9]+), #?128 ++** ld1sw z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sw_gather_128_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1sw_gather_u64base_offset_u64 (p0, z0, 128), ++ z0_res = svld1sw_gather_offset_u64 (p0, z0, 128)) ++ ++/* ++** ld1sw_gather_x0_u64_index: ++** lsl (x[0-9]+), x0, #?2 ++** ld1sw z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sw_gather_x0_u64_index, svuint64_t, svuint64_t, ++ z0_res = svld1sw_gather_u64base_index_u64 (p0, z0, x0), ++ z0_res = svld1sw_gather_index_u64 (p0, z0, x0)) ++ ++/* ++** ld1sw_gather_m1_u64_index: ++** mov (x[0-9]+), #?-4 ++** ld1sw z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sw_gather_m1_u64_index, svuint64_t, svuint64_t, ++ z0_res = svld1sw_gather_u64base_index_u64 (p0, z0, -1), ++ z0_res = svld1sw_gather_index_u64 (p0, z0, -1)) ++ ++/* ++** ld1sw_gather_0_u64_index: ++** ld1sw z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sw_gather_0_u64_index, svuint64_t, svuint64_t, ++ z0_res = svld1sw_gather_u64base_index_u64 (p0, z0, 0), ++ z0_res = svld1sw_gather_index_u64 (p0, z0, 0)) ++ ++/* ++** ld1sw_gather_5_u64_index: ++** ld1sw z0\.d, p0/z, \[z0\.d, #20\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sw_gather_5_u64_index, svuint64_t, svuint64_t, ++ z0_res = svld1sw_gather_u64base_index_u64 (p0, z0, 5), ++ z0_res = svld1sw_gather_index_u64 (p0, z0, 5)) ++ ++/* ++** ld1sw_gather_31_u64_index: ++** ld1sw z0\.d, p0/z, \[z0\.d, #124\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sw_gather_31_u64_index, svuint64_t, svuint64_t, ++ z0_res = svld1sw_gather_u64base_index_u64 (p0, z0, 31), ++ z0_res = svld1sw_gather_index_u64 (p0, z0, 31)) ++ ++/* ++** ld1sw_gather_32_u64_index: ++** mov (x[0-9]+), #?128 ++** ld1sw z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1sw_gather_32_u64_index, svuint64_t, svuint64_t, ++ z0_res = svld1sw_gather_u64base_index_u64 (p0, z0, 32), ++ z0_res = svld1sw_gather_index_u64 (p0, z0, 32)) ++ ++/* ++** ld1sw_gather_x0_u64_s64offset: ++** ld1sw z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sw_gather_x0_u64_s64offset, svuint64_t, int32_t, svint64_t, ++ z0_res = svld1sw_gather_s64offset_u64 (p0, x0, z0), ++ z0_res = svld1sw_gather_offset_u64 (p0, x0, z0)) ++ ++/* ++** ld1sw_gather_tied1_u64_s64offset: ++** ld1sw z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sw_gather_tied1_u64_s64offset, svuint64_t, int32_t, svint64_t, ++ z0_res = svld1sw_gather_s64offset_u64 (p0, x0, z0), ++ z0_res = svld1sw_gather_offset_u64 (p0, x0, z0)) ++ ++/* ++** ld1sw_gather_untied_u64_s64offset: ++** ld1sw z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sw_gather_untied_u64_s64offset, svuint64_t, int32_t, svint64_t, ++ z0_res = svld1sw_gather_s64offset_u64 (p0, x0, z1), ++ z0_res = svld1sw_gather_offset_u64 (p0, x0, z1)) ++ ++/* ++** ld1sw_gather_ext_u64_s64offset: ++** ld1sw z0\.d, p0/z, \[x0, z1\.d, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sw_gather_ext_u64_s64offset, svuint64_t, int32_t, svint64_t, ++ z0_res = svld1sw_gather_s64offset_u64 (p0, x0, svextw_s64_x (p0, z1)), ++ z0_res = svld1sw_gather_offset_u64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ld1sw_gather_x0_u64_u64offset: ++** ld1sw z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sw_gather_x0_u64_u64offset, svuint64_t, int32_t, svuint64_t, ++ z0_res = svld1sw_gather_u64offset_u64 (p0, x0, z0), ++ z0_res = svld1sw_gather_offset_u64 (p0, x0, z0)) ++ ++/* ++** ld1sw_gather_tied1_u64_u64offset: ++** ld1sw z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sw_gather_tied1_u64_u64offset, svuint64_t, int32_t, svuint64_t, ++ z0_res = svld1sw_gather_u64offset_u64 (p0, x0, z0), ++ z0_res = svld1sw_gather_offset_u64 (p0, x0, z0)) ++ ++/* ++** ld1sw_gather_untied_u64_u64offset: ++** ld1sw z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sw_gather_untied_u64_u64offset, svuint64_t, int32_t, svuint64_t, ++ z0_res = svld1sw_gather_u64offset_u64 (p0, x0, z1), ++ z0_res = svld1sw_gather_offset_u64 (p0, x0, z1)) ++ ++/* ++** ld1sw_gather_ext_u64_u64offset: ++** ld1sw z0\.d, p0/z, \[x0, z1\.d, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sw_gather_ext_u64_u64offset, svuint64_t, int32_t, svuint64_t, ++ z0_res = svld1sw_gather_u64offset_u64 (p0, x0, svextw_u64_x (p0, z1)), ++ z0_res = svld1sw_gather_offset_u64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ld1sw_gather_x0_u64_s64index: ++** ld1sw z0\.d, p0/z, \[x0, z0\.d, lsl 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sw_gather_x0_u64_s64index, svuint64_t, int32_t, svint64_t, ++ z0_res = svld1sw_gather_s64index_u64 (p0, x0, z0), ++ z0_res = svld1sw_gather_index_u64 (p0, x0, z0)) ++ ++/* ++** ld1sw_gather_tied1_u64_s64index: ++** ld1sw z0\.d, p0/z, \[x0, z0\.d, lsl 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sw_gather_tied1_u64_s64index, svuint64_t, int32_t, svint64_t, ++ z0_res = svld1sw_gather_s64index_u64 (p0, x0, z0), ++ z0_res = svld1sw_gather_index_u64 (p0, x0, z0)) ++ ++/* ++** ld1sw_gather_untied_u64_s64index: ++** ld1sw z0\.d, p0/z, \[x0, z1\.d, lsl 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sw_gather_untied_u64_s64index, svuint64_t, int32_t, svint64_t, ++ z0_res = svld1sw_gather_s64index_u64 (p0, x0, z1), ++ z0_res = svld1sw_gather_index_u64 (p0, x0, z1)) ++ ++/* ++** ld1sw_gather_ext_u64_s64index: ++** ld1sw z0\.d, p0/z, \[x0, z1\.d, sxtw 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sw_gather_ext_u64_s64index, svuint64_t, int32_t, svint64_t, ++ z0_res = svld1sw_gather_s64index_u64 (p0, x0, svextw_s64_x (p0, z1)), ++ z0_res = svld1sw_gather_index_u64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ld1sw_gather_x0_u64_u64index: ++** ld1sw z0\.d, p0/z, \[x0, z0\.d, lsl 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sw_gather_x0_u64_u64index, svuint64_t, int32_t, svuint64_t, ++ z0_res = svld1sw_gather_u64index_u64 (p0, x0, z0), ++ z0_res = svld1sw_gather_index_u64 (p0, x0, z0)) ++ ++/* ++** ld1sw_gather_tied1_u64_u64index: ++** ld1sw z0\.d, p0/z, \[x0, z0\.d, lsl 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sw_gather_tied1_u64_u64index, svuint64_t, int32_t, svuint64_t, ++ z0_res = svld1sw_gather_u64index_u64 (p0, x0, z0), ++ z0_res = svld1sw_gather_index_u64 (p0, x0, z0)) ++ ++/* ++** ld1sw_gather_untied_u64_u64index: ++** ld1sw z0\.d, p0/z, \[x0, z1\.d, lsl 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sw_gather_untied_u64_u64index, svuint64_t, int32_t, svuint64_t, ++ z0_res = svld1sw_gather_u64index_u64 (p0, x0, z1), ++ z0_res = svld1sw_gather_index_u64 (p0, x0, z1)) ++ ++/* ++** ld1sw_gather_ext_u64_u64index: ++** ld1sw z0\.d, p0/z, \[x0, z1\.d, uxtw 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1sw_gather_ext_u64_u64index, svuint64_t, int32_t, svuint64_t, ++ z0_res = svld1sw_gather_u64index_u64 (p0, x0, svextw_u64_x (p0, z1)), ++ z0_res = svld1sw_gather_index_u64 (p0, x0, svextw_x (p0, z1))) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sw_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sw_s64.c +new file mode 100644 +index 000000000..019a12b20 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sw_s64.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1sw_s64_base: ++** ld1sw z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sw_s64_base, svint64_t, int32_t, ++ z0 = svld1sw_s64 (p0, x0), ++ z0 = svld1sw_s64 (p0, x0)) ++ ++/* ++** ld1sw_s64_index: ++** ld1sw z0\.d, p0/z, \[x0, x1, lsl 2\] ++** ret ++*/ ++TEST_LOAD (ld1sw_s64_index, svint64_t, int32_t, ++ z0 = svld1sw_s64 (p0, x0 + x1), ++ z0 = svld1sw_s64 (p0, x0 + x1)) ++ ++/* ++** ld1sw_s64_1: ++** ld1sw z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sw_s64_1, svint64_t, int32_t, ++ z0 = svld1sw_s64 (p0, x0 + svcntd ()), ++ z0 = svld1sw_s64 (p0, x0 + svcntd ())) ++ ++/* ++** ld1sw_s64_7: ++** ld1sw z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sw_s64_7, svint64_t, int32_t, ++ z0 = svld1sw_s64 (p0, x0 + svcntd () * 7), ++ z0 = svld1sw_s64 (p0, x0 + svcntd () * 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1sw_s64_8: ++** incb x0, all, mul #4 ++** ld1sw z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sw_s64_8, svint64_t, int32_t, ++ z0 = svld1sw_s64 (p0, x0 + svcntd () * 8), ++ z0 = svld1sw_s64 (p0, x0 + svcntd () * 8)) ++ ++/* ++** ld1sw_s64_m1: ++** ld1sw z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sw_s64_m1, svint64_t, int32_t, ++ z0 = svld1sw_s64 (p0, x0 - svcntd ()), ++ z0 = svld1sw_s64 (p0, x0 - svcntd ())) ++ ++/* ++** ld1sw_s64_m8: ++** ld1sw z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sw_s64_m8, svint64_t, int32_t, ++ z0 = svld1sw_s64 (p0, x0 - svcntd () * 8), ++ z0 = svld1sw_s64 (p0, x0 - svcntd () * 8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1sw_s64_m9: ++** dech x0, all, mul #9 ++** ld1sw z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sw_s64_m9, svint64_t, int32_t, ++ z0 = svld1sw_s64 (p0, x0 - svcntd () * 9), ++ z0 = svld1sw_s64 (p0, x0 - svcntd () * 9)) ++ ++/* ++** ld1sw_vnum_s64_0: ++** ld1sw z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sw_vnum_s64_0, svint64_t, int32_t, ++ z0 = svld1sw_vnum_s64 (p0, x0, 0), ++ z0 = svld1sw_vnum_s64 (p0, x0, 0)) ++ ++/* ++** ld1sw_vnum_s64_1: ++** ld1sw z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sw_vnum_s64_1, svint64_t, int32_t, ++ z0 = svld1sw_vnum_s64 (p0, x0, 1), ++ z0 = svld1sw_vnum_s64 (p0, x0, 1)) ++ ++/* ++** ld1sw_vnum_s64_7: ++** ld1sw z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sw_vnum_s64_7, svint64_t, int32_t, ++ z0 = svld1sw_vnum_s64 (p0, x0, 7), ++ z0 = svld1sw_vnum_s64 (p0, x0, 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1sw_vnum_s64_8: ++** incb x0, all, mul #4 ++** ld1sw z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sw_vnum_s64_8, svint64_t, int32_t, ++ z0 = svld1sw_vnum_s64 (p0, x0, 8), ++ z0 = svld1sw_vnum_s64 (p0, x0, 8)) ++ ++/* ++** ld1sw_vnum_s64_m1: ++** ld1sw z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sw_vnum_s64_m1, svint64_t, int32_t, ++ z0 = svld1sw_vnum_s64 (p0, x0, -1), ++ z0 = svld1sw_vnum_s64 (p0, x0, -1)) ++ ++/* ++** ld1sw_vnum_s64_m8: ++** ld1sw z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sw_vnum_s64_m8, svint64_t, int32_t, ++ z0 = svld1sw_vnum_s64 (p0, x0, -8), ++ z0 = svld1sw_vnum_s64 (p0, x0, -8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1sw_vnum_s64_m9: ++** dech x0, all, mul #9 ++** ld1sw z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sw_vnum_s64_m9, svint64_t, int32_t, ++ z0 = svld1sw_vnum_s64 (p0, x0, -9), ++ z0 = svld1sw_vnum_s64 (p0, x0, -9)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ld1sw_vnum_s64_x1: ++** cnth (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ld1sw z0\.d, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ld1sw_vnum_s64_x1, svint64_t, int32_t, ++ z0 = svld1sw_vnum_s64 (p0, x0, x1), ++ z0 = svld1sw_vnum_s64 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sw_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sw_u64.c +new file mode 100644 +index 000000000..4c291c243 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sw_u64.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1sw_u64_base: ++** ld1sw z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sw_u64_base, svuint64_t, int32_t, ++ z0 = svld1sw_u64 (p0, x0), ++ z0 = svld1sw_u64 (p0, x0)) ++ ++/* ++** ld1sw_u64_index: ++** ld1sw z0\.d, p0/z, \[x0, x1, lsl 2\] ++** ret ++*/ ++TEST_LOAD (ld1sw_u64_index, svuint64_t, int32_t, ++ z0 = svld1sw_u64 (p0, x0 + x1), ++ z0 = svld1sw_u64 (p0, x0 + x1)) ++ ++/* ++** ld1sw_u64_1: ++** ld1sw z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sw_u64_1, svuint64_t, int32_t, ++ z0 = svld1sw_u64 (p0, x0 + svcntd ()), ++ z0 = svld1sw_u64 (p0, x0 + svcntd ())) ++ ++/* ++** ld1sw_u64_7: ++** ld1sw z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sw_u64_7, svuint64_t, int32_t, ++ z0 = svld1sw_u64 (p0, x0 + svcntd () * 7), ++ z0 = svld1sw_u64 (p0, x0 + svcntd () * 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1sw_u64_8: ++** incb x0, all, mul #4 ++** ld1sw z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sw_u64_8, svuint64_t, int32_t, ++ z0 = svld1sw_u64 (p0, x0 + svcntd () * 8), ++ z0 = svld1sw_u64 (p0, x0 + svcntd () * 8)) ++ ++/* ++** ld1sw_u64_m1: ++** ld1sw z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sw_u64_m1, svuint64_t, int32_t, ++ z0 = svld1sw_u64 (p0, x0 - svcntd ()), ++ z0 = svld1sw_u64 (p0, x0 - svcntd ())) ++ ++/* ++** ld1sw_u64_m8: ++** ld1sw z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sw_u64_m8, svuint64_t, int32_t, ++ z0 = svld1sw_u64 (p0, x0 - svcntd () * 8), ++ z0 = svld1sw_u64 (p0, x0 - svcntd () * 8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1sw_u64_m9: ++** dech x0, all, mul #9 ++** ld1sw z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sw_u64_m9, svuint64_t, int32_t, ++ z0 = svld1sw_u64 (p0, x0 - svcntd () * 9), ++ z0 = svld1sw_u64 (p0, x0 - svcntd () * 9)) ++ ++/* ++** ld1sw_vnum_u64_0: ++** ld1sw z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sw_vnum_u64_0, svuint64_t, int32_t, ++ z0 = svld1sw_vnum_u64 (p0, x0, 0), ++ z0 = svld1sw_vnum_u64 (p0, x0, 0)) ++ ++/* ++** ld1sw_vnum_u64_1: ++** ld1sw z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sw_vnum_u64_1, svuint64_t, int32_t, ++ z0 = svld1sw_vnum_u64 (p0, x0, 1), ++ z0 = svld1sw_vnum_u64 (p0, x0, 1)) ++ ++/* ++** ld1sw_vnum_u64_7: ++** ld1sw z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sw_vnum_u64_7, svuint64_t, int32_t, ++ z0 = svld1sw_vnum_u64 (p0, x0, 7), ++ z0 = svld1sw_vnum_u64 (p0, x0, 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1sw_vnum_u64_8: ++** incb x0, all, mul #4 ++** ld1sw z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sw_vnum_u64_8, svuint64_t, int32_t, ++ z0 = svld1sw_vnum_u64 (p0, x0, 8), ++ z0 = svld1sw_vnum_u64 (p0, x0, 8)) ++ ++/* ++** ld1sw_vnum_u64_m1: ++** ld1sw z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sw_vnum_u64_m1, svuint64_t, int32_t, ++ z0 = svld1sw_vnum_u64 (p0, x0, -1), ++ z0 = svld1sw_vnum_u64 (p0, x0, -1)) ++ ++/* ++** ld1sw_vnum_u64_m8: ++** ld1sw z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1sw_vnum_u64_m8, svuint64_t, int32_t, ++ z0 = svld1sw_vnum_u64 (p0, x0, -8), ++ z0 = svld1sw_vnum_u64 (p0, x0, -8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1sw_vnum_u64_m9: ++** dech x0, all, mul #9 ++** ld1sw z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1sw_vnum_u64_m9, svuint64_t, int32_t, ++ z0 = svld1sw_vnum_u64 (p0, x0, -9), ++ z0 = svld1sw_vnum_u64 (p0, x0, -9)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ld1sw_vnum_u64_x1: ++** cnth (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ld1sw z0\.d, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ld1sw_vnum_u64_x1, svuint64_t, int32_t, ++ z0 = svld1sw_vnum_u64 (p0, x0, x1), ++ z0 = svld1sw_vnum_u64 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_s32.c +new file mode 100644 +index 000000000..a9c418265 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_s32.c +@@ -0,0 +1,131 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1ub_gather_s32_tied1: ++** ld1b z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1ub_gather_s32_tied1, svint32_t, svuint32_t, ++ z0_res = svld1ub_gather_u32base_s32 (p0, z0), ++ z0_res = svld1ub_gather_s32 (p0, z0)) ++ ++/* ++** ld1ub_gather_s32_untied: ++** ld1b z0\.s, p0/z, \[z1\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1ub_gather_s32_untied, svint32_t, svuint32_t, ++ z0_res = svld1ub_gather_u32base_s32 (p0, z1), ++ z0_res = svld1ub_gather_s32 (p0, z1)) ++ ++/* ++** ld1ub_gather_x0_s32_offset: ++** ld1b z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1ub_gather_x0_s32_offset, svint32_t, svuint32_t, ++ z0_res = svld1ub_gather_u32base_offset_s32 (p0, z0, x0), ++ z0_res = svld1ub_gather_offset_s32 (p0, z0, x0)) ++ ++/* ++** ld1ub_gather_m1_s32_offset: ++** mov (x[0-9]+), #?-1 ++** ld1b z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1ub_gather_m1_s32_offset, svint32_t, svuint32_t, ++ z0_res = svld1ub_gather_u32base_offset_s32 (p0, z0, -1), ++ z0_res = svld1ub_gather_offset_s32 (p0, z0, -1)) ++ ++/* ++** ld1ub_gather_0_s32_offset: ++** ld1b z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1ub_gather_0_s32_offset, svint32_t, svuint32_t, ++ z0_res = svld1ub_gather_u32base_offset_s32 (p0, z0, 0), ++ z0_res = svld1ub_gather_offset_s32 (p0, z0, 0)) ++ ++/* ++** ld1ub_gather_5_s32_offset: ++** ld1b z0\.s, p0/z, \[z0\.s, #5\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1ub_gather_5_s32_offset, svint32_t, svuint32_t, ++ z0_res = svld1ub_gather_u32base_offset_s32 (p0, z0, 5), ++ z0_res = svld1ub_gather_offset_s32 (p0, z0, 5)) ++ ++/* ++** ld1ub_gather_31_s32_offset: ++** ld1b z0\.s, p0/z, \[z0\.s, #31\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1ub_gather_31_s32_offset, svint32_t, svuint32_t, ++ z0_res = svld1ub_gather_u32base_offset_s32 (p0, z0, 31), ++ z0_res = svld1ub_gather_offset_s32 (p0, z0, 31)) ++ ++/* ++** ld1ub_gather_32_s32_offset: ++** mov (x[0-9]+), #?32 ++** ld1b z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1ub_gather_32_s32_offset, svint32_t, svuint32_t, ++ z0_res = svld1ub_gather_u32base_offset_s32 (p0, z0, 32), ++ z0_res = svld1ub_gather_offset_s32 (p0, z0, 32)) ++ ++/* ++** ld1ub_gather_x0_s32_s32offset: ++** ld1b z0\.s, p0/z, \[x0, z0\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1ub_gather_x0_s32_s32offset, svint32_t, uint8_t, svint32_t, ++ z0_res = svld1ub_gather_s32offset_s32 (p0, x0, z0), ++ z0_res = svld1ub_gather_offset_s32 (p0, x0, z0)) ++ ++/* ++** ld1ub_gather_tied1_s32_s32offset: ++** ld1b z0\.s, p0/z, \[x0, z0\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1ub_gather_tied1_s32_s32offset, svint32_t, uint8_t, svint32_t, ++ z0_res = svld1ub_gather_s32offset_s32 (p0, x0, z0), ++ z0_res = svld1ub_gather_offset_s32 (p0, x0, z0)) ++ ++/* ++** ld1ub_gather_untied_s32_s32offset: ++** ld1b z0\.s, p0/z, \[x0, z1\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1ub_gather_untied_s32_s32offset, svint32_t, uint8_t, svint32_t, ++ z0_res = svld1ub_gather_s32offset_s32 (p0, x0, z1), ++ z0_res = svld1ub_gather_offset_s32 (p0, x0, z1)) ++ ++/* ++** ld1ub_gather_x0_s32_u32offset: ++** ld1b z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1ub_gather_x0_s32_u32offset, svint32_t, uint8_t, svuint32_t, ++ z0_res = svld1ub_gather_u32offset_s32 (p0, x0, z0), ++ z0_res = svld1ub_gather_offset_s32 (p0, x0, z0)) ++ ++/* ++** ld1ub_gather_tied1_s32_u32offset: ++** ld1b z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1ub_gather_tied1_s32_u32offset, svint32_t, uint8_t, svuint32_t, ++ z0_res = svld1ub_gather_u32offset_s32 (p0, x0, z0), ++ z0_res = svld1ub_gather_offset_s32 (p0, x0, z0)) ++ ++/* ++** ld1ub_gather_untied_s32_u32offset: ++** ld1b z0\.s, p0/z, \[x0, z1\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1ub_gather_untied_s32_u32offset, svint32_t, uint8_t, svuint32_t, ++ z0_res = svld1ub_gather_u32offset_s32 (p0, x0, z1), ++ z0_res = svld1ub_gather_offset_s32 (p0, x0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_s64.c +new file mode 100644 +index 000000000..99af86ddf +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_s64.c +@@ -0,0 +1,149 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1ub_gather_s64_tied1: ++** ld1b z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1ub_gather_s64_tied1, svint64_t, svuint64_t, ++ z0_res = svld1ub_gather_u64base_s64 (p0, z0), ++ z0_res = svld1ub_gather_s64 (p0, z0)) ++ ++/* ++** ld1ub_gather_s64_untied: ++** ld1b z0\.d, p0/z, \[z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1ub_gather_s64_untied, svint64_t, svuint64_t, ++ z0_res = svld1ub_gather_u64base_s64 (p0, z1), ++ z0_res = svld1ub_gather_s64 (p0, z1)) ++ ++/* ++** ld1ub_gather_x0_s64_offset: ++** ld1b z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1ub_gather_x0_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1ub_gather_u64base_offset_s64 (p0, z0, x0), ++ z0_res = svld1ub_gather_offset_s64 (p0, z0, x0)) ++ ++/* ++** ld1ub_gather_m1_s64_offset: ++** mov (x[0-9]+), #?-1 ++** ld1b z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1ub_gather_m1_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1ub_gather_u64base_offset_s64 (p0, z0, -1), ++ z0_res = svld1ub_gather_offset_s64 (p0, z0, -1)) ++ ++/* ++** ld1ub_gather_0_s64_offset: ++** ld1b z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1ub_gather_0_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1ub_gather_u64base_offset_s64 (p0, z0, 0), ++ z0_res = svld1ub_gather_offset_s64 (p0, z0, 0)) ++ ++/* ++** ld1ub_gather_5_s64_offset: ++** ld1b z0\.d, p0/z, \[z0\.d, #5\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1ub_gather_5_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1ub_gather_u64base_offset_s64 (p0, z0, 5), ++ z0_res = svld1ub_gather_offset_s64 (p0, z0, 5)) ++ ++/* ++** ld1ub_gather_31_s64_offset: ++** ld1b z0\.d, p0/z, \[z0\.d, #31\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1ub_gather_31_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1ub_gather_u64base_offset_s64 (p0, z0, 31), ++ z0_res = svld1ub_gather_offset_s64 (p0, z0, 31)) ++ ++/* ++** ld1ub_gather_32_s64_offset: ++** mov (x[0-9]+), #?32 ++** ld1b z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1ub_gather_32_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1ub_gather_u64base_offset_s64 (p0, z0, 32), ++ z0_res = svld1ub_gather_offset_s64 (p0, z0, 32)) ++ ++/* ++** ld1ub_gather_x0_s64_s64offset: ++** ld1b z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1ub_gather_x0_s64_s64offset, svint64_t, uint8_t, svint64_t, ++ z0_res = svld1ub_gather_s64offset_s64 (p0, x0, z0), ++ z0_res = svld1ub_gather_offset_s64 (p0, x0, z0)) ++ ++/* ++** ld1ub_gather_tied1_s64_s64offset: ++** ld1b z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1ub_gather_tied1_s64_s64offset, svint64_t, uint8_t, svint64_t, ++ z0_res = svld1ub_gather_s64offset_s64 (p0, x0, z0), ++ z0_res = svld1ub_gather_offset_s64 (p0, x0, z0)) ++ ++/* ++** ld1ub_gather_untied_s64_s64offset: ++** ld1b z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1ub_gather_untied_s64_s64offset, svint64_t, uint8_t, svint64_t, ++ z0_res = svld1ub_gather_s64offset_s64 (p0, x0, z1), ++ z0_res = svld1ub_gather_offset_s64 (p0, x0, z1)) ++ ++/* ++** ld1ub_gather_ext_s64_s64offset: ++** ld1b z0\.d, p0/z, \[x0, z1\.d, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1ub_gather_ext_s64_s64offset, svint64_t, uint8_t, svint64_t, ++ z0_res = svld1ub_gather_s64offset_s64 (p0, x0, svextw_s64_x (p0, z1)), ++ z0_res = svld1ub_gather_offset_s64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ld1ub_gather_x0_s64_u64offset: ++** ld1b z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1ub_gather_x0_s64_u64offset, svint64_t, uint8_t, svuint64_t, ++ z0_res = svld1ub_gather_u64offset_s64 (p0, x0, z0), ++ z0_res = svld1ub_gather_offset_s64 (p0, x0, z0)) ++ ++/* ++** ld1ub_gather_tied1_s64_u64offset: ++** ld1b z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1ub_gather_tied1_s64_u64offset, svint64_t, uint8_t, svuint64_t, ++ z0_res = svld1ub_gather_u64offset_s64 (p0, x0, z0), ++ z0_res = svld1ub_gather_offset_s64 (p0, x0, z0)) ++ ++/* ++** ld1ub_gather_untied_s64_u64offset: ++** ld1b z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1ub_gather_untied_s64_u64offset, svint64_t, uint8_t, svuint64_t, ++ z0_res = svld1ub_gather_u64offset_s64 (p0, x0, z1), ++ z0_res = svld1ub_gather_offset_s64 (p0, x0, z1)) ++ ++/* ++** ld1ub_gather_ext_s64_u64offset: ++** ld1b z0\.d, p0/z, \[x0, z1\.d, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1ub_gather_ext_s64_u64offset, svint64_t, uint8_t, svuint64_t, ++ z0_res = svld1ub_gather_u64offset_s64 (p0, x0, svextw_u64_x (p0, z1)), ++ z0_res = svld1ub_gather_offset_s64 (p0, x0, svextw_x (p0, z1))) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_u32.c +new file mode 100644 +index 000000000..77c7e0a2d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_u32.c +@@ -0,0 +1,131 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1ub_gather_u32_tied1: ++** ld1b z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1ub_gather_u32_tied1, svuint32_t, svuint32_t, ++ z0_res = svld1ub_gather_u32base_u32 (p0, z0), ++ z0_res = svld1ub_gather_u32 (p0, z0)) ++ ++/* ++** ld1ub_gather_u32_untied: ++** ld1b z0\.s, p0/z, \[z1\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1ub_gather_u32_untied, svuint32_t, svuint32_t, ++ z0_res = svld1ub_gather_u32base_u32 (p0, z1), ++ z0_res = svld1ub_gather_u32 (p0, z1)) ++ ++/* ++** ld1ub_gather_x0_u32_offset: ++** ld1b z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1ub_gather_x0_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svld1ub_gather_u32base_offset_u32 (p0, z0, x0), ++ z0_res = svld1ub_gather_offset_u32 (p0, z0, x0)) ++ ++/* ++** ld1ub_gather_m1_u32_offset: ++** mov (x[0-9]+), #?-1 ++** ld1b z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1ub_gather_m1_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svld1ub_gather_u32base_offset_u32 (p0, z0, -1), ++ z0_res = svld1ub_gather_offset_u32 (p0, z0, -1)) ++ ++/* ++** ld1ub_gather_0_u32_offset: ++** ld1b z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1ub_gather_0_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svld1ub_gather_u32base_offset_u32 (p0, z0, 0), ++ z0_res = svld1ub_gather_offset_u32 (p0, z0, 0)) ++ ++/* ++** ld1ub_gather_5_u32_offset: ++** ld1b z0\.s, p0/z, \[z0\.s, #5\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1ub_gather_5_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svld1ub_gather_u32base_offset_u32 (p0, z0, 5), ++ z0_res = svld1ub_gather_offset_u32 (p0, z0, 5)) ++ ++/* ++** ld1ub_gather_31_u32_offset: ++** ld1b z0\.s, p0/z, \[z0\.s, #31\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1ub_gather_31_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svld1ub_gather_u32base_offset_u32 (p0, z0, 31), ++ z0_res = svld1ub_gather_offset_u32 (p0, z0, 31)) ++ ++/* ++** ld1ub_gather_32_u32_offset: ++** mov (x[0-9]+), #?32 ++** ld1b z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1ub_gather_32_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svld1ub_gather_u32base_offset_u32 (p0, z0, 32), ++ z0_res = svld1ub_gather_offset_u32 (p0, z0, 32)) ++ ++/* ++** ld1ub_gather_x0_u32_s32offset: ++** ld1b z0\.s, p0/z, \[x0, z0\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1ub_gather_x0_u32_s32offset, svuint32_t, uint8_t, svint32_t, ++ z0_res = svld1ub_gather_s32offset_u32 (p0, x0, z0), ++ z0_res = svld1ub_gather_offset_u32 (p0, x0, z0)) ++ ++/* ++** ld1ub_gather_tied1_u32_s32offset: ++** ld1b z0\.s, p0/z, \[x0, z0\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1ub_gather_tied1_u32_s32offset, svuint32_t, uint8_t, svint32_t, ++ z0_res = svld1ub_gather_s32offset_u32 (p0, x0, z0), ++ z0_res = svld1ub_gather_offset_u32 (p0, x0, z0)) ++ ++/* ++** ld1ub_gather_untied_u32_s32offset: ++** ld1b z0\.s, p0/z, \[x0, z1\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1ub_gather_untied_u32_s32offset, svuint32_t, uint8_t, svint32_t, ++ z0_res = svld1ub_gather_s32offset_u32 (p0, x0, z1), ++ z0_res = svld1ub_gather_offset_u32 (p0, x0, z1)) ++ ++/* ++** ld1ub_gather_x0_u32_u32offset: ++** ld1b z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1ub_gather_x0_u32_u32offset, svuint32_t, uint8_t, svuint32_t, ++ z0_res = svld1ub_gather_u32offset_u32 (p0, x0, z0), ++ z0_res = svld1ub_gather_offset_u32 (p0, x0, z0)) ++ ++/* ++** ld1ub_gather_tied1_u32_u32offset: ++** ld1b z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1ub_gather_tied1_u32_u32offset, svuint32_t, uint8_t, svuint32_t, ++ z0_res = svld1ub_gather_u32offset_u32 (p0, x0, z0), ++ z0_res = svld1ub_gather_offset_u32 (p0, x0, z0)) ++ ++/* ++** ld1ub_gather_untied_u32_u32offset: ++** ld1b z0\.s, p0/z, \[x0, z1\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1ub_gather_untied_u32_u32offset, svuint32_t, uint8_t, svuint32_t, ++ z0_res = svld1ub_gather_u32offset_u32 (p0, x0, z1), ++ z0_res = svld1ub_gather_offset_u32 (p0, x0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_u64.c +new file mode 100644 +index 000000000..b605f8b67 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_u64.c +@@ -0,0 +1,149 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1ub_gather_u64_tied1: ++** ld1b z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1ub_gather_u64_tied1, svuint64_t, svuint64_t, ++ z0_res = svld1ub_gather_u64base_u64 (p0, z0), ++ z0_res = svld1ub_gather_u64 (p0, z0)) ++ ++/* ++** ld1ub_gather_u64_untied: ++** ld1b z0\.d, p0/z, \[z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1ub_gather_u64_untied, svuint64_t, svuint64_t, ++ z0_res = svld1ub_gather_u64base_u64 (p0, z1), ++ z0_res = svld1ub_gather_u64 (p0, z1)) ++ ++/* ++** ld1ub_gather_x0_u64_offset: ++** ld1b z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1ub_gather_x0_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1ub_gather_u64base_offset_u64 (p0, z0, x0), ++ z0_res = svld1ub_gather_offset_u64 (p0, z0, x0)) ++ ++/* ++** ld1ub_gather_m1_u64_offset: ++** mov (x[0-9]+), #?-1 ++** ld1b z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1ub_gather_m1_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1ub_gather_u64base_offset_u64 (p0, z0, -1), ++ z0_res = svld1ub_gather_offset_u64 (p0, z0, -1)) ++ ++/* ++** ld1ub_gather_0_u64_offset: ++** ld1b z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1ub_gather_0_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1ub_gather_u64base_offset_u64 (p0, z0, 0), ++ z0_res = svld1ub_gather_offset_u64 (p0, z0, 0)) ++ ++/* ++** ld1ub_gather_5_u64_offset: ++** ld1b z0\.d, p0/z, \[z0\.d, #5\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1ub_gather_5_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1ub_gather_u64base_offset_u64 (p0, z0, 5), ++ z0_res = svld1ub_gather_offset_u64 (p0, z0, 5)) ++ ++/* ++** ld1ub_gather_31_u64_offset: ++** ld1b z0\.d, p0/z, \[z0\.d, #31\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1ub_gather_31_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1ub_gather_u64base_offset_u64 (p0, z0, 31), ++ z0_res = svld1ub_gather_offset_u64 (p0, z0, 31)) ++ ++/* ++** ld1ub_gather_32_u64_offset: ++** mov (x[0-9]+), #?32 ++** ld1b z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1ub_gather_32_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1ub_gather_u64base_offset_u64 (p0, z0, 32), ++ z0_res = svld1ub_gather_offset_u64 (p0, z0, 32)) ++ ++/* ++** ld1ub_gather_x0_u64_s64offset: ++** ld1b z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1ub_gather_x0_u64_s64offset, svuint64_t, uint8_t, svint64_t, ++ z0_res = svld1ub_gather_s64offset_u64 (p0, x0, z0), ++ z0_res = svld1ub_gather_offset_u64 (p0, x0, z0)) ++ ++/* ++** ld1ub_gather_tied1_u64_s64offset: ++** ld1b z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1ub_gather_tied1_u64_s64offset, svuint64_t, uint8_t, svint64_t, ++ z0_res = svld1ub_gather_s64offset_u64 (p0, x0, z0), ++ z0_res = svld1ub_gather_offset_u64 (p0, x0, z0)) ++ ++/* ++** ld1ub_gather_untied_u64_s64offset: ++** ld1b z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1ub_gather_untied_u64_s64offset, svuint64_t, uint8_t, svint64_t, ++ z0_res = svld1ub_gather_s64offset_u64 (p0, x0, z1), ++ z0_res = svld1ub_gather_offset_u64 (p0, x0, z1)) ++ ++/* ++** ld1ub_gather_ext_u64_s64offset: ++** ld1b z0\.d, p0/z, \[x0, z1\.d, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1ub_gather_ext_u64_s64offset, svuint64_t, uint8_t, svint64_t, ++ z0_res = svld1ub_gather_s64offset_u64 (p0, x0, svextw_s64_x (p0, z1)), ++ z0_res = svld1ub_gather_offset_u64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ld1ub_gather_x0_u64_u64offset: ++** ld1b z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1ub_gather_x0_u64_u64offset, svuint64_t, uint8_t, svuint64_t, ++ z0_res = svld1ub_gather_u64offset_u64 (p0, x0, z0), ++ z0_res = svld1ub_gather_offset_u64 (p0, x0, z0)) ++ ++/* ++** ld1ub_gather_tied1_u64_u64offset: ++** ld1b z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1ub_gather_tied1_u64_u64offset, svuint64_t, uint8_t, svuint64_t, ++ z0_res = svld1ub_gather_u64offset_u64 (p0, x0, z0), ++ z0_res = svld1ub_gather_offset_u64 (p0, x0, z0)) ++ ++/* ++** ld1ub_gather_untied_u64_u64offset: ++** ld1b z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1ub_gather_untied_u64_u64offset, svuint64_t, uint8_t, svuint64_t, ++ z0_res = svld1ub_gather_u64offset_u64 (p0, x0, z1), ++ z0_res = svld1ub_gather_offset_u64 (p0, x0, z1)) ++ ++/* ++** ld1ub_gather_ext_u64_u64offset: ++** ld1b z0\.d, p0/z, \[x0, z1\.d, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1ub_gather_ext_u64_u64offset, svuint64_t, uint8_t, svuint64_t, ++ z0_res = svld1ub_gather_u64offset_u64 (p0, x0, svextw_u64_x (p0, z1)), ++ z0_res = svld1ub_gather_offset_u64 (p0, x0, svextw_x (p0, z1))) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_s16.c +new file mode 100644 +index 000000000..c492086b5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_s16.c +@@ -0,0 +1,162 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1ub_s16_base: ++** ld1b z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1ub_s16_base, svint16_t, uint8_t, ++ z0 = svld1ub_s16 (p0, x0), ++ z0 = svld1ub_s16 (p0, x0)) ++ ++/* ++** ld1ub_s16_index: ++** ld1b z0\.h, p0/z, \[x0, x1\] ++** ret ++*/ ++TEST_LOAD (ld1ub_s16_index, svint16_t, uint8_t, ++ z0 = svld1ub_s16 (p0, x0 + x1), ++ z0 = svld1ub_s16 (p0, x0 + x1)) ++ ++/* ++** ld1ub_s16_1: ++** ld1b z0\.h, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1ub_s16_1, svint16_t, uint8_t, ++ z0 = svld1ub_s16 (p0, x0 + svcnth ()), ++ z0 = svld1ub_s16 (p0, x0 + svcnth ())) ++ ++/* ++** ld1ub_s16_7: ++** ld1b z0\.h, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1ub_s16_7, svint16_t, uint8_t, ++ z0 = svld1ub_s16 (p0, x0 + svcnth () * 7), ++ z0 = svld1ub_s16 (p0, x0 + svcnth () * 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1ub_s16_8: ++** incb x0, all, mul #4 ++** ld1b z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1ub_s16_8, svint16_t, uint8_t, ++ z0 = svld1ub_s16 (p0, x0 + svcnth () * 8), ++ z0 = svld1ub_s16 (p0, x0 + svcnth () * 8)) ++ ++/* ++** ld1ub_s16_m1: ++** ld1b z0\.h, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1ub_s16_m1, svint16_t, uint8_t, ++ z0 = svld1ub_s16 (p0, x0 - svcnth ()), ++ z0 = svld1ub_s16 (p0, x0 - svcnth ())) ++ ++/* ++** ld1ub_s16_m8: ++** ld1b z0\.h, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1ub_s16_m8, svint16_t, uint8_t, ++ z0 = svld1ub_s16 (p0, x0 - svcnth () * 8), ++ z0 = svld1ub_s16 (p0, x0 - svcnth () * 8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1ub_s16_m9: ++** dech x0, all, mul #9 ++** ld1b z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1ub_s16_m9, svint16_t, uint8_t, ++ z0 = svld1ub_s16 (p0, x0 - svcnth () * 9), ++ z0 = svld1ub_s16 (p0, x0 - svcnth () * 9)) ++ ++/* ++** ld1ub_vnum_s16_0: ++** ld1b z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1ub_vnum_s16_0, svint16_t, uint8_t, ++ z0 = svld1ub_vnum_s16 (p0, x0, 0), ++ z0 = svld1ub_vnum_s16 (p0, x0, 0)) ++ ++/* ++** ld1ub_vnum_s16_1: ++** ld1b z0\.h, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1ub_vnum_s16_1, svint16_t, uint8_t, ++ z0 = svld1ub_vnum_s16 (p0, x0, 1), ++ z0 = svld1ub_vnum_s16 (p0, x0, 1)) ++ ++/* ++** ld1ub_vnum_s16_7: ++** ld1b z0\.h, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1ub_vnum_s16_7, svint16_t, uint8_t, ++ z0 = svld1ub_vnum_s16 (p0, x0, 7), ++ z0 = svld1ub_vnum_s16 (p0, x0, 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1ub_vnum_s16_8: ++** incb x0, all, mul #4 ++** ld1b z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1ub_vnum_s16_8, svint16_t, uint8_t, ++ z0 = svld1ub_vnum_s16 (p0, x0, 8), ++ z0 = svld1ub_vnum_s16 (p0, x0, 8)) ++ ++/* ++** ld1ub_vnum_s16_m1: ++** ld1b z0\.h, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1ub_vnum_s16_m1, svint16_t, uint8_t, ++ z0 = svld1ub_vnum_s16 (p0, x0, -1), ++ z0 = svld1ub_vnum_s16 (p0, x0, -1)) ++ ++/* ++** ld1ub_vnum_s16_m8: ++** ld1b z0\.h, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1ub_vnum_s16_m8, svint16_t, uint8_t, ++ z0 = svld1ub_vnum_s16 (p0, x0, -8), ++ z0 = svld1ub_vnum_s16 (p0, x0, -8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1ub_vnum_s16_m9: ++** dech x0, all, mul #9 ++** ld1b z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1ub_vnum_s16_m9, svint16_t, uint8_t, ++ z0 = svld1ub_vnum_s16 (p0, x0, -9), ++ z0 = svld1ub_vnum_s16 (p0, x0, -9)) ++ ++/* ++** ld1ub_vnum_s16_x1: ++** cnth (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ld1b z0\.h, p0/z, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** ld1b z0\.h, p0/z, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_LOAD (ld1ub_vnum_s16_x1, svint16_t, uint8_t, ++ z0 = svld1ub_vnum_s16 (p0, x0, x1), ++ z0 = svld1ub_vnum_s16 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_s32.c +new file mode 100644 +index 000000000..b2f8c4b04 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_s32.c +@@ -0,0 +1,162 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1ub_s32_base: ++** ld1b z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1ub_s32_base, svint32_t, uint8_t, ++ z0 = svld1ub_s32 (p0, x0), ++ z0 = svld1ub_s32 (p0, x0)) ++ ++/* ++** ld1ub_s32_index: ++** ld1b z0\.s, p0/z, \[x0, x1\] ++** ret ++*/ ++TEST_LOAD (ld1ub_s32_index, svint32_t, uint8_t, ++ z0 = svld1ub_s32 (p0, x0 + x1), ++ z0 = svld1ub_s32 (p0, x0 + x1)) ++ ++/* ++** ld1ub_s32_1: ++** ld1b z0\.s, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1ub_s32_1, svint32_t, uint8_t, ++ z0 = svld1ub_s32 (p0, x0 + svcntw ()), ++ z0 = svld1ub_s32 (p0, x0 + svcntw ())) ++ ++/* ++** ld1ub_s32_7: ++** ld1b z0\.s, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1ub_s32_7, svint32_t, uint8_t, ++ z0 = svld1ub_s32 (p0, x0 + svcntw () * 7), ++ z0 = svld1ub_s32 (p0, x0 + svcntw () * 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1ub_s32_8: ++** incb x0, all, mul #2 ++** ld1b z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1ub_s32_8, svint32_t, uint8_t, ++ z0 = svld1ub_s32 (p0, x0 + svcntw () * 8), ++ z0 = svld1ub_s32 (p0, x0 + svcntw () * 8)) ++ ++/* ++** ld1ub_s32_m1: ++** ld1b z0\.s, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1ub_s32_m1, svint32_t, uint8_t, ++ z0 = svld1ub_s32 (p0, x0 - svcntw ()), ++ z0 = svld1ub_s32 (p0, x0 - svcntw ())) ++ ++/* ++** ld1ub_s32_m8: ++** ld1b z0\.s, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1ub_s32_m8, svint32_t, uint8_t, ++ z0 = svld1ub_s32 (p0, x0 - svcntw () * 8), ++ z0 = svld1ub_s32 (p0, x0 - svcntw () * 8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1ub_s32_m9: ++** decw x0, all, mul #9 ++** ld1b z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1ub_s32_m9, svint32_t, uint8_t, ++ z0 = svld1ub_s32 (p0, x0 - svcntw () * 9), ++ z0 = svld1ub_s32 (p0, x0 - svcntw () * 9)) ++ ++/* ++** ld1ub_vnum_s32_0: ++** ld1b z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1ub_vnum_s32_0, svint32_t, uint8_t, ++ z0 = svld1ub_vnum_s32 (p0, x0, 0), ++ z0 = svld1ub_vnum_s32 (p0, x0, 0)) ++ ++/* ++** ld1ub_vnum_s32_1: ++** ld1b z0\.s, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1ub_vnum_s32_1, svint32_t, uint8_t, ++ z0 = svld1ub_vnum_s32 (p0, x0, 1), ++ z0 = svld1ub_vnum_s32 (p0, x0, 1)) ++ ++/* ++** ld1ub_vnum_s32_7: ++** ld1b z0\.s, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1ub_vnum_s32_7, svint32_t, uint8_t, ++ z0 = svld1ub_vnum_s32 (p0, x0, 7), ++ z0 = svld1ub_vnum_s32 (p0, x0, 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1ub_vnum_s32_8: ++** incb x0, all, mul #2 ++** ld1b z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1ub_vnum_s32_8, svint32_t, uint8_t, ++ z0 = svld1ub_vnum_s32 (p0, x0, 8), ++ z0 = svld1ub_vnum_s32 (p0, x0, 8)) ++ ++/* ++** ld1ub_vnum_s32_m1: ++** ld1b z0\.s, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1ub_vnum_s32_m1, svint32_t, uint8_t, ++ z0 = svld1ub_vnum_s32 (p0, x0, -1), ++ z0 = svld1ub_vnum_s32 (p0, x0, -1)) ++ ++/* ++** ld1ub_vnum_s32_m8: ++** ld1b z0\.s, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1ub_vnum_s32_m8, svint32_t, uint8_t, ++ z0 = svld1ub_vnum_s32 (p0, x0, -8), ++ z0 = svld1ub_vnum_s32 (p0, x0, -8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1ub_vnum_s32_m9: ++** decw x0, all, mul #9 ++** ld1b z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1ub_vnum_s32_m9, svint32_t, uint8_t, ++ z0 = svld1ub_vnum_s32 (p0, x0, -9), ++ z0 = svld1ub_vnum_s32 (p0, x0, -9)) ++ ++/* ++** ld1ub_vnum_s32_x1: ++** cntw (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ld1b z0\.s, p0/z, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** ld1b z0\.s, p0/z, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_LOAD (ld1ub_vnum_s32_x1, svint32_t, uint8_t, ++ z0 = svld1ub_vnum_s32 (p0, x0, x1), ++ z0 = svld1ub_vnum_s32 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_s64.c +new file mode 100644 +index 000000000..d8694bf28 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_s64.c +@@ -0,0 +1,162 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1ub_s64_base: ++** ld1b z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1ub_s64_base, svint64_t, uint8_t, ++ z0 = svld1ub_s64 (p0, x0), ++ z0 = svld1ub_s64 (p0, x0)) ++ ++/* ++** ld1ub_s64_index: ++** ld1b z0\.d, p0/z, \[x0, x1\] ++** ret ++*/ ++TEST_LOAD (ld1ub_s64_index, svint64_t, uint8_t, ++ z0 = svld1ub_s64 (p0, x0 + x1), ++ z0 = svld1ub_s64 (p0, x0 + x1)) ++ ++/* ++** ld1ub_s64_1: ++** ld1b z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1ub_s64_1, svint64_t, uint8_t, ++ z0 = svld1ub_s64 (p0, x0 + svcntd ()), ++ z0 = svld1ub_s64 (p0, x0 + svcntd ())) ++ ++/* ++** ld1ub_s64_7: ++** ld1b z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1ub_s64_7, svint64_t, uint8_t, ++ z0 = svld1ub_s64 (p0, x0 + svcntd () * 7), ++ z0 = svld1ub_s64 (p0, x0 + svcntd () * 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1ub_s64_8: ++** incb x0 ++** ld1b z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1ub_s64_8, svint64_t, uint8_t, ++ z0 = svld1ub_s64 (p0, x0 + svcntd () * 8), ++ z0 = svld1ub_s64 (p0, x0 + svcntd () * 8)) ++ ++/* ++** ld1ub_s64_m1: ++** ld1b z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1ub_s64_m1, svint64_t, uint8_t, ++ z0 = svld1ub_s64 (p0, x0 - svcntd ()), ++ z0 = svld1ub_s64 (p0, x0 - svcntd ())) ++ ++/* ++** ld1ub_s64_m8: ++** ld1b z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1ub_s64_m8, svint64_t, uint8_t, ++ z0 = svld1ub_s64 (p0, x0 - svcntd () * 8), ++ z0 = svld1ub_s64 (p0, x0 - svcntd () * 8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1ub_s64_m9: ++** decd x0, all, mul #9 ++** ld1b z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1ub_s64_m9, svint64_t, uint8_t, ++ z0 = svld1ub_s64 (p0, x0 - svcntd () * 9), ++ z0 = svld1ub_s64 (p0, x0 - svcntd () * 9)) ++ ++/* ++** ld1ub_vnum_s64_0: ++** ld1b z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1ub_vnum_s64_0, svint64_t, uint8_t, ++ z0 = svld1ub_vnum_s64 (p0, x0, 0), ++ z0 = svld1ub_vnum_s64 (p0, x0, 0)) ++ ++/* ++** ld1ub_vnum_s64_1: ++** ld1b z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1ub_vnum_s64_1, svint64_t, uint8_t, ++ z0 = svld1ub_vnum_s64 (p0, x0, 1), ++ z0 = svld1ub_vnum_s64 (p0, x0, 1)) ++ ++/* ++** ld1ub_vnum_s64_7: ++** ld1b z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1ub_vnum_s64_7, svint64_t, uint8_t, ++ z0 = svld1ub_vnum_s64 (p0, x0, 7), ++ z0 = svld1ub_vnum_s64 (p0, x0, 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1ub_vnum_s64_8: ++** incb x0 ++** ld1b z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1ub_vnum_s64_8, svint64_t, uint8_t, ++ z0 = svld1ub_vnum_s64 (p0, x0, 8), ++ z0 = svld1ub_vnum_s64 (p0, x0, 8)) ++ ++/* ++** ld1ub_vnum_s64_m1: ++** ld1b z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1ub_vnum_s64_m1, svint64_t, uint8_t, ++ z0 = svld1ub_vnum_s64 (p0, x0, -1), ++ z0 = svld1ub_vnum_s64 (p0, x0, -1)) ++ ++/* ++** ld1ub_vnum_s64_m8: ++** ld1b z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1ub_vnum_s64_m8, svint64_t, uint8_t, ++ z0 = svld1ub_vnum_s64 (p0, x0, -8), ++ z0 = svld1ub_vnum_s64 (p0, x0, -8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1ub_vnum_s64_m9: ++** decd x0, all, mul #9 ++** ld1b z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1ub_vnum_s64_m9, svint64_t, uint8_t, ++ z0 = svld1ub_vnum_s64 (p0, x0, -9), ++ z0 = svld1ub_vnum_s64 (p0, x0, -9)) ++ ++/* ++** ld1ub_vnum_s64_x1: ++** cntd (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ld1b z0\.d, p0/z, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** ld1b z0\.d, p0/z, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_LOAD (ld1ub_vnum_s64_x1, svint64_t, uint8_t, ++ z0 = svld1ub_vnum_s64 (p0, x0, x1), ++ z0 = svld1ub_vnum_s64 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_u16.c +new file mode 100644 +index 000000000..049234ee4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_u16.c +@@ -0,0 +1,162 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1ub_u16_base: ++** ld1b z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1ub_u16_base, svuint16_t, uint8_t, ++ z0 = svld1ub_u16 (p0, x0), ++ z0 = svld1ub_u16 (p0, x0)) ++ ++/* ++** ld1ub_u16_index: ++** ld1b z0\.h, p0/z, \[x0, x1\] ++** ret ++*/ ++TEST_LOAD (ld1ub_u16_index, svuint16_t, uint8_t, ++ z0 = svld1ub_u16 (p0, x0 + x1), ++ z0 = svld1ub_u16 (p0, x0 + x1)) ++ ++/* ++** ld1ub_u16_1: ++** ld1b z0\.h, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1ub_u16_1, svuint16_t, uint8_t, ++ z0 = svld1ub_u16 (p0, x0 + svcnth ()), ++ z0 = svld1ub_u16 (p0, x0 + svcnth ())) ++ ++/* ++** ld1ub_u16_7: ++** ld1b z0\.h, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1ub_u16_7, svuint16_t, uint8_t, ++ z0 = svld1ub_u16 (p0, x0 + svcnth () * 7), ++ z0 = svld1ub_u16 (p0, x0 + svcnth () * 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1ub_u16_8: ++** incb x0, all, mul #4 ++** ld1b z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1ub_u16_8, svuint16_t, uint8_t, ++ z0 = svld1ub_u16 (p0, x0 + svcnth () * 8), ++ z0 = svld1ub_u16 (p0, x0 + svcnth () * 8)) ++ ++/* ++** ld1ub_u16_m1: ++** ld1b z0\.h, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1ub_u16_m1, svuint16_t, uint8_t, ++ z0 = svld1ub_u16 (p0, x0 - svcnth ()), ++ z0 = svld1ub_u16 (p0, x0 - svcnth ())) ++ ++/* ++** ld1ub_u16_m8: ++** ld1b z0\.h, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1ub_u16_m8, svuint16_t, uint8_t, ++ z0 = svld1ub_u16 (p0, x0 - svcnth () * 8), ++ z0 = svld1ub_u16 (p0, x0 - svcnth () * 8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1ub_u16_m9: ++** dech x0, all, mul #9 ++** ld1b z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1ub_u16_m9, svuint16_t, uint8_t, ++ z0 = svld1ub_u16 (p0, x0 - svcnth () * 9), ++ z0 = svld1ub_u16 (p0, x0 - svcnth () * 9)) ++ ++/* ++** ld1ub_vnum_u16_0: ++** ld1b z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1ub_vnum_u16_0, svuint16_t, uint8_t, ++ z0 = svld1ub_vnum_u16 (p0, x0, 0), ++ z0 = svld1ub_vnum_u16 (p0, x0, 0)) ++ ++/* ++** ld1ub_vnum_u16_1: ++** ld1b z0\.h, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1ub_vnum_u16_1, svuint16_t, uint8_t, ++ z0 = svld1ub_vnum_u16 (p0, x0, 1), ++ z0 = svld1ub_vnum_u16 (p0, x0, 1)) ++ ++/* ++** ld1ub_vnum_u16_7: ++** ld1b z0\.h, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1ub_vnum_u16_7, svuint16_t, uint8_t, ++ z0 = svld1ub_vnum_u16 (p0, x0, 7), ++ z0 = svld1ub_vnum_u16 (p0, x0, 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1ub_vnum_u16_8: ++** incb x0, all, mul #4 ++** ld1b z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1ub_vnum_u16_8, svuint16_t, uint8_t, ++ z0 = svld1ub_vnum_u16 (p0, x0, 8), ++ z0 = svld1ub_vnum_u16 (p0, x0, 8)) ++ ++/* ++** ld1ub_vnum_u16_m1: ++** ld1b z0\.h, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1ub_vnum_u16_m1, svuint16_t, uint8_t, ++ z0 = svld1ub_vnum_u16 (p0, x0, -1), ++ z0 = svld1ub_vnum_u16 (p0, x0, -1)) ++ ++/* ++** ld1ub_vnum_u16_m8: ++** ld1b z0\.h, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1ub_vnum_u16_m8, svuint16_t, uint8_t, ++ z0 = svld1ub_vnum_u16 (p0, x0, -8), ++ z0 = svld1ub_vnum_u16 (p0, x0, -8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1ub_vnum_u16_m9: ++** dech x0, all, mul #9 ++** ld1b z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1ub_vnum_u16_m9, svuint16_t, uint8_t, ++ z0 = svld1ub_vnum_u16 (p0, x0, -9), ++ z0 = svld1ub_vnum_u16 (p0, x0, -9)) ++ ++/* ++** ld1ub_vnum_u16_x1: ++** cnth (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ld1b z0\.h, p0/z, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** ld1b z0\.h, p0/z, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_LOAD (ld1ub_vnum_u16_x1, svuint16_t, uint8_t, ++ z0 = svld1ub_vnum_u16 (p0, x0, x1), ++ z0 = svld1ub_vnum_u16 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_u32.c +new file mode 100644 +index 000000000..58d2ef527 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_u32.c +@@ -0,0 +1,162 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1ub_u32_base: ++** ld1b z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1ub_u32_base, svuint32_t, uint8_t, ++ z0 = svld1ub_u32 (p0, x0), ++ z0 = svld1ub_u32 (p0, x0)) ++ ++/* ++** ld1ub_u32_index: ++** ld1b z0\.s, p0/z, \[x0, x1\] ++** ret ++*/ ++TEST_LOAD (ld1ub_u32_index, svuint32_t, uint8_t, ++ z0 = svld1ub_u32 (p0, x0 + x1), ++ z0 = svld1ub_u32 (p0, x0 + x1)) ++ ++/* ++** ld1ub_u32_1: ++** ld1b z0\.s, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1ub_u32_1, svuint32_t, uint8_t, ++ z0 = svld1ub_u32 (p0, x0 + svcntw ()), ++ z0 = svld1ub_u32 (p0, x0 + svcntw ())) ++ ++/* ++** ld1ub_u32_7: ++** ld1b z0\.s, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1ub_u32_7, svuint32_t, uint8_t, ++ z0 = svld1ub_u32 (p0, x0 + svcntw () * 7), ++ z0 = svld1ub_u32 (p0, x0 + svcntw () * 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1ub_u32_8: ++** incb x0, all, mul #2 ++** ld1b z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1ub_u32_8, svuint32_t, uint8_t, ++ z0 = svld1ub_u32 (p0, x0 + svcntw () * 8), ++ z0 = svld1ub_u32 (p0, x0 + svcntw () * 8)) ++ ++/* ++** ld1ub_u32_m1: ++** ld1b z0\.s, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1ub_u32_m1, svuint32_t, uint8_t, ++ z0 = svld1ub_u32 (p0, x0 - svcntw ()), ++ z0 = svld1ub_u32 (p0, x0 - svcntw ())) ++ ++/* ++** ld1ub_u32_m8: ++** ld1b z0\.s, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1ub_u32_m8, svuint32_t, uint8_t, ++ z0 = svld1ub_u32 (p0, x0 - svcntw () * 8), ++ z0 = svld1ub_u32 (p0, x0 - svcntw () * 8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1ub_u32_m9: ++** decw x0, all, mul #9 ++** ld1b z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1ub_u32_m9, svuint32_t, uint8_t, ++ z0 = svld1ub_u32 (p0, x0 - svcntw () * 9), ++ z0 = svld1ub_u32 (p0, x0 - svcntw () * 9)) ++ ++/* ++** ld1ub_vnum_u32_0: ++** ld1b z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1ub_vnum_u32_0, svuint32_t, uint8_t, ++ z0 = svld1ub_vnum_u32 (p0, x0, 0), ++ z0 = svld1ub_vnum_u32 (p0, x0, 0)) ++ ++/* ++** ld1ub_vnum_u32_1: ++** ld1b z0\.s, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1ub_vnum_u32_1, svuint32_t, uint8_t, ++ z0 = svld1ub_vnum_u32 (p0, x0, 1), ++ z0 = svld1ub_vnum_u32 (p0, x0, 1)) ++ ++/* ++** ld1ub_vnum_u32_7: ++** ld1b z0\.s, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1ub_vnum_u32_7, svuint32_t, uint8_t, ++ z0 = svld1ub_vnum_u32 (p0, x0, 7), ++ z0 = svld1ub_vnum_u32 (p0, x0, 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1ub_vnum_u32_8: ++** incb x0, all, mul #2 ++** ld1b z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1ub_vnum_u32_8, svuint32_t, uint8_t, ++ z0 = svld1ub_vnum_u32 (p0, x0, 8), ++ z0 = svld1ub_vnum_u32 (p0, x0, 8)) ++ ++/* ++** ld1ub_vnum_u32_m1: ++** ld1b z0\.s, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1ub_vnum_u32_m1, svuint32_t, uint8_t, ++ z0 = svld1ub_vnum_u32 (p0, x0, -1), ++ z0 = svld1ub_vnum_u32 (p0, x0, -1)) ++ ++/* ++** ld1ub_vnum_u32_m8: ++** ld1b z0\.s, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1ub_vnum_u32_m8, svuint32_t, uint8_t, ++ z0 = svld1ub_vnum_u32 (p0, x0, -8), ++ z0 = svld1ub_vnum_u32 (p0, x0, -8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1ub_vnum_u32_m9: ++** decw x0, all, mul #9 ++** ld1b z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1ub_vnum_u32_m9, svuint32_t, uint8_t, ++ z0 = svld1ub_vnum_u32 (p0, x0, -9), ++ z0 = svld1ub_vnum_u32 (p0, x0, -9)) ++ ++/* ++** ld1ub_vnum_u32_x1: ++** cntw (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ld1b z0\.s, p0/z, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** ld1b z0\.s, p0/z, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_LOAD (ld1ub_vnum_u32_x1, svuint32_t, uint8_t, ++ z0 = svld1ub_vnum_u32 (p0, x0, x1), ++ z0 = svld1ub_vnum_u32 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_u64.c +new file mode 100644 +index 000000000..46d7250f0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_u64.c +@@ -0,0 +1,162 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1ub_u64_base: ++** ld1b z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1ub_u64_base, svuint64_t, uint8_t, ++ z0 = svld1ub_u64 (p0, x0), ++ z0 = svld1ub_u64 (p0, x0)) ++ ++/* ++** ld1ub_u64_index: ++** ld1b z0\.d, p0/z, \[x0, x1\] ++** ret ++*/ ++TEST_LOAD (ld1ub_u64_index, svuint64_t, uint8_t, ++ z0 = svld1ub_u64 (p0, x0 + x1), ++ z0 = svld1ub_u64 (p0, x0 + x1)) ++ ++/* ++** ld1ub_u64_1: ++** ld1b z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1ub_u64_1, svuint64_t, uint8_t, ++ z0 = svld1ub_u64 (p0, x0 + svcntd ()), ++ z0 = svld1ub_u64 (p0, x0 + svcntd ())) ++ ++/* ++** ld1ub_u64_7: ++** ld1b z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1ub_u64_7, svuint64_t, uint8_t, ++ z0 = svld1ub_u64 (p0, x0 + svcntd () * 7), ++ z0 = svld1ub_u64 (p0, x0 + svcntd () * 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1ub_u64_8: ++** incb x0 ++** ld1b z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1ub_u64_8, svuint64_t, uint8_t, ++ z0 = svld1ub_u64 (p0, x0 + svcntd () * 8), ++ z0 = svld1ub_u64 (p0, x0 + svcntd () * 8)) ++ ++/* ++** ld1ub_u64_m1: ++** ld1b z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1ub_u64_m1, svuint64_t, uint8_t, ++ z0 = svld1ub_u64 (p0, x0 - svcntd ()), ++ z0 = svld1ub_u64 (p0, x0 - svcntd ())) ++ ++/* ++** ld1ub_u64_m8: ++** ld1b z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1ub_u64_m8, svuint64_t, uint8_t, ++ z0 = svld1ub_u64 (p0, x0 - svcntd () * 8), ++ z0 = svld1ub_u64 (p0, x0 - svcntd () * 8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1ub_u64_m9: ++** decd x0, all, mul #9 ++** ld1b z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1ub_u64_m9, svuint64_t, uint8_t, ++ z0 = svld1ub_u64 (p0, x0 - svcntd () * 9), ++ z0 = svld1ub_u64 (p0, x0 - svcntd () * 9)) ++ ++/* ++** ld1ub_vnum_u64_0: ++** ld1b z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1ub_vnum_u64_0, svuint64_t, uint8_t, ++ z0 = svld1ub_vnum_u64 (p0, x0, 0), ++ z0 = svld1ub_vnum_u64 (p0, x0, 0)) ++ ++/* ++** ld1ub_vnum_u64_1: ++** ld1b z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1ub_vnum_u64_1, svuint64_t, uint8_t, ++ z0 = svld1ub_vnum_u64 (p0, x0, 1), ++ z0 = svld1ub_vnum_u64 (p0, x0, 1)) ++ ++/* ++** ld1ub_vnum_u64_7: ++** ld1b z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1ub_vnum_u64_7, svuint64_t, uint8_t, ++ z0 = svld1ub_vnum_u64 (p0, x0, 7), ++ z0 = svld1ub_vnum_u64 (p0, x0, 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1ub_vnum_u64_8: ++** incb x0 ++** ld1b z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1ub_vnum_u64_8, svuint64_t, uint8_t, ++ z0 = svld1ub_vnum_u64 (p0, x0, 8), ++ z0 = svld1ub_vnum_u64 (p0, x0, 8)) ++ ++/* ++** ld1ub_vnum_u64_m1: ++** ld1b z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1ub_vnum_u64_m1, svuint64_t, uint8_t, ++ z0 = svld1ub_vnum_u64 (p0, x0, -1), ++ z0 = svld1ub_vnum_u64 (p0, x0, -1)) ++ ++/* ++** ld1ub_vnum_u64_m8: ++** ld1b z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1ub_vnum_u64_m8, svuint64_t, uint8_t, ++ z0 = svld1ub_vnum_u64 (p0, x0, -8), ++ z0 = svld1ub_vnum_u64 (p0, x0, -8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1ub_vnum_u64_m9: ++** decd x0, all, mul #9 ++** ld1b z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1ub_vnum_u64_m9, svuint64_t, uint8_t, ++ z0 = svld1ub_vnum_u64 (p0, x0, -9), ++ z0 = svld1ub_vnum_u64 (p0, x0, -9)) ++ ++/* ++** ld1ub_vnum_u64_x1: ++** cntd (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ld1b z0\.d, p0/z, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** ld1b z0\.d, p0/z, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_LOAD (ld1ub_vnum_u64_x1, svuint64_t, uint8_t, ++ z0 = svld1ub_vnum_u64 (p0, x0, x1), ++ z0 = svld1ub_vnum_u64 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_s32.c +new file mode 100644 +index 000000000..84fb5c335 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_s32.c +@@ -0,0 +1,252 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1uh_gather_s32_tied1: ++** ld1h z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_s32_tied1, svint32_t, svuint32_t, ++ z0_res = svld1uh_gather_u32base_s32 (p0, z0), ++ z0_res = svld1uh_gather_s32 (p0, z0)) ++ ++/* ++** ld1uh_gather_s32_untied: ++** ld1h z0\.s, p0/z, \[z1\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_s32_untied, svint32_t, svuint32_t, ++ z0_res = svld1uh_gather_u32base_s32 (p0, z1), ++ z0_res = svld1uh_gather_s32 (p0, z1)) ++ ++/* ++** ld1uh_gather_x0_s32_offset: ++** ld1h z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_x0_s32_offset, svint32_t, svuint32_t, ++ z0_res = svld1uh_gather_u32base_offset_s32 (p0, z0, x0), ++ z0_res = svld1uh_gather_offset_s32 (p0, z0, x0)) ++ ++/* ++** ld1uh_gather_m2_s32_offset: ++** mov (x[0-9]+), #?-2 ++** ld1h z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_m2_s32_offset, svint32_t, svuint32_t, ++ z0_res = svld1uh_gather_u32base_offset_s32 (p0, z0, -2), ++ z0_res = svld1uh_gather_offset_s32 (p0, z0, -2)) ++ ++/* ++** ld1uh_gather_0_s32_offset: ++** ld1h z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_0_s32_offset, svint32_t, svuint32_t, ++ z0_res = svld1uh_gather_u32base_offset_s32 (p0, z0, 0), ++ z0_res = svld1uh_gather_offset_s32 (p0, z0, 0)) ++ ++/* ++** ld1uh_gather_5_s32_offset: ++** mov (x[0-9]+), #?5 ++** ld1h z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_5_s32_offset, svint32_t, svuint32_t, ++ z0_res = svld1uh_gather_u32base_offset_s32 (p0, z0, 5), ++ z0_res = svld1uh_gather_offset_s32 (p0, z0, 5)) ++ ++/* ++** ld1uh_gather_6_s32_offset: ++** ld1h z0\.s, p0/z, \[z0\.s, #6\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_6_s32_offset, svint32_t, svuint32_t, ++ z0_res = svld1uh_gather_u32base_offset_s32 (p0, z0, 6), ++ z0_res = svld1uh_gather_offset_s32 (p0, z0, 6)) ++ ++/* ++** ld1uh_gather_62_s32_offset: ++** ld1h z0\.s, p0/z, \[z0\.s, #62\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_62_s32_offset, svint32_t, svuint32_t, ++ z0_res = svld1uh_gather_u32base_offset_s32 (p0, z0, 62), ++ z0_res = svld1uh_gather_offset_s32 (p0, z0, 62)) ++ ++/* ++** ld1uh_gather_64_s32_offset: ++** mov (x[0-9]+), #?64 ++** ld1h z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_64_s32_offset, svint32_t, svuint32_t, ++ z0_res = svld1uh_gather_u32base_offset_s32 (p0, z0, 64), ++ z0_res = svld1uh_gather_offset_s32 (p0, z0, 64)) ++ ++/* ++** ld1uh_gather_x0_s32_index: ++** lsl (x[0-9]+), x0, #?1 ++** ld1h z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_x0_s32_index, svint32_t, svuint32_t, ++ z0_res = svld1uh_gather_u32base_index_s32 (p0, z0, x0), ++ z0_res = svld1uh_gather_index_s32 (p0, z0, x0)) ++ ++/* ++** ld1uh_gather_m1_s32_index: ++** mov (x[0-9]+), #?-2 ++** ld1h z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_m1_s32_index, svint32_t, svuint32_t, ++ z0_res = svld1uh_gather_u32base_index_s32 (p0, z0, -1), ++ z0_res = svld1uh_gather_index_s32 (p0, z0, -1)) ++ ++/* ++** ld1uh_gather_0_s32_index: ++** ld1h z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_0_s32_index, svint32_t, svuint32_t, ++ z0_res = svld1uh_gather_u32base_index_s32 (p0, z0, 0), ++ z0_res = svld1uh_gather_index_s32 (p0, z0, 0)) ++ ++/* ++** ld1uh_gather_5_s32_index: ++** ld1h z0\.s, p0/z, \[z0\.s, #10\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_5_s32_index, svint32_t, svuint32_t, ++ z0_res = svld1uh_gather_u32base_index_s32 (p0, z0, 5), ++ z0_res = svld1uh_gather_index_s32 (p0, z0, 5)) ++ ++/* ++** ld1uh_gather_31_s32_index: ++** ld1h z0\.s, p0/z, \[z0\.s, #62\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_31_s32_index, svint32_t, svuint32_t, ++ z0_res = svld1uh_gather_u32base_index_s32 (p0, z0, 31), ++ z0_res = svld1uh_gather_index_s32 (p0, z0, 31)) ++ ++/* ++** ld1uh_gather_32_s32_index: ++** mov (x[0-9]+), #?64 ++** ld1h z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_32_s32_index, svint32_t, svuint32_t, ++ z0_res = svld1uh_gather_u32base_index_s32 (p0, z0, 32), ++ z0_res = svld1uh_gather_index_s32 (p0, z0, 32)) ++ ++/* ++** ld1uh_gather_x0_s32_s32offset: ++** ld1h z0\.s, p0/z, \[x0, z0\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_x0_s32_s32offset, svint32_t, uint16_t, svint32_t, ++ z0_res = svld1uh_gather_s32offset_s32 (p0, x0, z0), ++ z0_res = svld1uh_gather_offset_s32 (p0, x0, z0)) ++ ++/* ++** ld1uh_gather_tied1_s32_s32offset: ++** ld1h z0\.s, p0/z, \[x0, z0\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_tied1_s32_s32offset, svint32_t, uint16_t, svint32_t, ++ z0_res = svld1uh_gather_s32offset_s32 (p0, x0, z0), ++ z0_res = svld1uh_gather_offset_s32 (p0, x0, z0)) ++ ++/* ++** ld1uh_gather_untied_s32_s32offset: ++** ld1h z0\.s, p0/z, \[x0, z1\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_untied_s32_s32offset, svint32_t, uint16_t, svint32_t, ++ z0_res = svld1uh_gather_s32offset_s32 (p0, x0, z1), ++ z0_res = svld1uh_gather_offset_s32 (p0, x0, z1)) ++ ++/* ++** ld1uh_gather_x0_s32_u32offset: ++** ld1h z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_x0_s32_u32offset, svint32_t, uint16_t, svuint32_t, ++ z0_res = svld1uh_gather_u32offset_s32 (p0, x0, z0), ++ z0_res = svld1uh_gather_offset_s32 (p0, x0, z0)) ++ ++/* ++** ld1uh_gather_tied1_s32_u32offset: ++** ld1h z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_tied1_s32_u32offset, svint32_t, uint16_t, svuint32_t, ++ z0_res = svld1uh_gather_u32offset_s32 (p0, x0, z0), ++ z0_res = svld1uh_gather_offset_s32 (p0, x0, z0)) ++ ++/* ++** ld1uh_gather_untied_s32_u32offset: ++** ld1h z0\.s, p0/z, \[x0, z1\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_untied_s32_u32offset, svint32_t, uint16_t, svuint32_t, ++ z0_res = svld1uh_gather_u32offset_s32 (p0, x0, z1), ++ z0_res = svld1uh_gather_offset_s32 (p0, x0, z1)) ++ ++/* ++** ld1uh_gather_x0_s32_s32index: ++** ld1h z0\.s, p0/z, \[x0, z0\.s, sxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_x0_s32_s32index, svint32_t, uint16_t, svint32_t, ++ z0_res = svld1uh_gather_s32index_s32 (p0, x0, z0), ++ z0_res = svld1uh_gather_index_s32 (p0, x0, z0)) ++ ++/* ++** ld1uh_gather_tied1_s32_s32index: ++** ld1h z0\.s, p0/z, \[x0, z0\.s, sxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_tied1_s32_s32index, svint32_t, uint16_t, svint32_t, ++ z0_res = svld1uh_gather_s32index_s32 (p0, x0, z0), ++ z0_res = svld1uh_gather_index_s32 (p0, x0, z0)) ++ ++/* ++** ld1uh_gather_untied_s32_s32index: ++** ld1h z0\.s, p0/z, \[x0, z1\.s, sxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_untied_s32_s32index, svint32_t, uint16_t, svint32_t, ++ z0_res = svld1uh_gather_s32index_s32 (p0, x0, z1), ++ z0_res = svld1uh_gather_index_s32 (p0, x0, z1)) ++ ++/* ++** ld1uh_gather_x0_s32_u32index: ++** ld1h z0\.s, p0/z, \[x0, z0\.s, uxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_x0_s32_u32index, svint32_t, uint16_t, svuint32_t, ++ z0_res = svld1uh_gather_u32index_s32 (p0, x0, z0), ++ z0_res = svld1uh_gather_index_s32 (p0, x0, z0)) ++ ++/* ++** ld1uh_gather_tied1_s32_u32index: ++** ld1h z0\.s, p0/z, \[x0, z0\.s, uxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_tied1_s32_u32index, svint32_t, uint16_t, svuint32_t, ++ z0_res = svld1uh_gather_u32index_s32 (p0, x0, z0), ++ z0_res = svld1uh_gather_index_s32 (p0, x0, z0)) ++ ++/* ++** ld1uh_gather_untied_s32_u32index: ++** ld1h z0\.s, p0/z, \[x0, z1\.s, uxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_untied_s32_u32index, svint32_t, uint16_t, svuint32_t, ++ z0_res = svld1uh_gather_u32index_s32 (p0, x0, z1), ++ z0_res = svld1uh_gather_index_s32 (p0, x0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_s64.c +new file mode 100644 +index 000000000..447001793 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_s64.c +@@ -0,0 +1,288 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1uh_gather_s64_tied1: ++** ld1h z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_s64_tied1, svint64_t, svuint64_t, ++ z0_res = svld1uh_gather_u64base_s64 (p0, z0), ++ z0_res = svld1uh_gather_s64 (p0, z0)) ++ ++/* ++** ld1uh_gather_s64_untied: ++** ld1h z0\.d, p0/z, \[z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_s64_untied, svint64_t, svuint64_t, ++ z0_res = svld1uh_gather_u64base_s64 (p0, z1), ++ z0_res = svld1uh_gather_s64 (p0, z1)) ++ ++/* ++** ld1uh_gather_x0_s64_offset: ++** ld1h z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_x0_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1uh_gather_u64base_offset_s64 (p0, z0, x0), ++ z0_res = svld1uh_gather_offset_s64 (p0, z0, x0)) ++ ++/* ++** ld1uh_gather_m2_s64_offset: ++** mov (x[0-9]+), #?-2 ++** ld1h z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_m2_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1uh_gather_u64base_offset_s64 (p0, z0, -2), ++ z0_res = svld1uh_gather_offset_s64 (p0, z0, -2)) ++ ++/* ++** ld1uh_gather_0_s64_offset: ++** ld1h z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_0_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1uh_gather_u64base_offset_s64 (p0, z0, 0), ++ z0_res = svld1uh_gather_offset_s64 (p0, z0, 0)) ++ ++/* ++** ld1uh_gather_5_s64_offset: ++** mov (x[0-9]+), #?5 ++** ld1h z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_5_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1uh_gather_u64base_offset_s64 (p0, z0, 5), ++ z0_res = svld1uh_gather_offset_s64 (p0, z0, 5)) ++ ++/* ++** ld1uh_gather_6_s64_offset: ++** ld1h z0\.d, p0/z, \[z0\.d, #6\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_6_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1uh_gather_u64base_offset_s64 (p0, z0, 6), ++ z0_res = svld1uh_gather_offset_s64 (p0, z0, 6)) ++ ++/* ++** ld1uh_gather_62_s64_offset: ++** ld1h z0\.d, p0/z, \[z0\.d, #62\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_62_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1uh_gather_u64base_offset_s64 (p0, z0, 62), ++ z0_res = svld1uh_gather_offset_s64 (p0, z0, 62)) ++ ++/* ++** ld1uh_gather_64_s64_offset: ++** mov (x[0-9]+), #?64 ++** ld1h z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_64_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1uh_gather_u64base_offset_s64 (p0, z0, 64), ++ z0_res = svld1uh_gather_offset_s64 (p0, z0, 64)) ++ ++/* ++** ld1uh_gather_x0_s64_index: ++** lsl (x[0-9]+), x0, #?1 ++** ld1h z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_x0_s64_index, svint64_t, svuint64_t, ++ z0_res = svld1uh_gather_u64base_index_s64 (p0, z0, x0), ++ z0_res = svld1uh_gather_index_s64 (p0, z0, x0)) ++ ++/* ++** ld1uh_gather_m1_s64_index: ++** mov (x[0-9]+), #?-2 ++** ld1h z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_m1_s64_index, svint64_t, svuint64_t, ++ z0_res = svld1uh_gather_u64base_index_s64 (p0, z0, -1), ++ z0_res = svld1uh_gather_index_s64 (p0, z0, -1)) ++ ++/* ++** ld1uh_gather_0_s64_index: ++** ld1h z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_0_s64_index, svint64_t, svuint64_t, ++ z0_res = svld1uh_gather_u64base_index_s64 (p0, z0, 0), ++ z0_res = svld1uh_gather_index_s64 (p0, z0, 0)) ++ ++/* ++** ld1uh_gather_5_s64_index: ++** ld1h z0\.d, p0/z, \[z0\.d, #10\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_5_s64_index, svint64_t, svuint64_t, ++ z0_res = svld1uh_gather_u64base_index_s64 (p0, z0, 5), ++ z0_res = svld1uh_gather_index_s64 (p0, z0, 5)) ++ ++/* ++** ld1uh_gather_31_s64_index: ++** ld1h z0\.d, p0/z, \[z0\.d, #62\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_31_s64_index, svint64_t, svuint64_t, ++ z0_res = svld1uh_gather_u64base_index_s64 (p0, z0, 31), ++ z0_res = svld1uh_gather_index_s64 (p0, z0, 31)) ++ ++/* ++** ld1uh_gather_32_s64_index: ++** mov (x[0-9]+), #?64 ++** ld1h z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_32_s64_index, svint64_t, svuint64_t, ++ z0_res = svld1uh_gather_u64base_index_s64 (p0, z0, 32), ++ z0_res = svld1uh_gather_index_s64 (p0, z0, 32)) ++ ++/* ++** ld1uh_gather_x0_s64_s64offset: ++** ld1h z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_x0_s64_s64offset, svint64_t, uint16_t, svint64_t, ++ z0_res = svld1uh_gather_s64offset_s64 (p0, x0, z0), ++ z0_res = svld1uh_gather_offset_s64 (p0, x0, z0)) ++ ++/* ++** ld1uh_gather_tied1_s64_s64offset: ++** ld1h z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_tied1_s64_s64offset, svint64_t, uint16_t, svint64_t, ++ z0_res = svld1uh_gather_s64offset_s64 (p0, x0, z0), ++ z0_res = svld1uh_gather_offset_s64 (p0, x0, z0)) ++ ++/* ++** ld1uh_gather_untied_s64_s64offset: ++** ld1h z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_untied_s64_s64offset, svint64_t, uint16_t, svint64_t, ++ z0_res = svld1uh_gather_s64offset_s64 (p0, x0, z1), ++ z0_res = svld1uh_gather_offset_s64 (p0, x0, z1)) ++ ++/* ++** ld1uh_gather_ext_s64_s64offset: ++** ld1h z0\.d, p0/z, \[x0, z1\.d, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_ext_s64_s64offset, svint64_t, uint16_t, svint64_t, ++ z0_res = svld1uh_gather_s64offset_s64 (p0, x0, svextw_s64_x (p0, z1)), ++ z0_res = svld1uh_gather_offset_s64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ld1uh_gather_x0_s64_u64offset: ++** ld1h z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_x0_s64_u64offset, svint64_t, uint16_t, svuint64_t, ++ z0_res = svld1uh_gather_u64offset_s64 (p0, x0, z0), ++ z0_res = svld1uh_gather_offset_s64 (p0, x0, z0)) ++ ++/* ++** ld1uh_gather_tied1_s64_u64offset: ++** ld1h z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_tied1_s64_u64offset, svint64_t, uint16_t, svuint64_t, ++ z0_res = svld1uh_gather_u64offset_s64 (p0, x0, z0), ++ z0_res = svld1uh_gather_offset_s64 (p0, x0, z0)) ++ ++/* ++** ld1uh_gather_untied_s64_u64offset: ++** ld1h z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_untied_s64_u64offset, svint64_t, uint16_t, svuint64_t, ++ z0_res = svld1uh_gather_u64offset_s64 (p0, x0, z1), ++ z0_res = svld1uh_gather_offset_s64 (p0, x0, z1)) ++ ++/* ++** ld1uh_gather_ext_s64_u64offset: ++** ld1h z0\.d, p0/z, \[x0, z1\.d, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_ext_s64_u64offset, svint64_t, uint16_t, svuint64_t, ++ z0_res = svld1uh_gather_u64offset_s64 (p0, x0, svextw_u64_x (p0, z1)), ++ z0_res = svld1uh_gather_offset_s64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ld1uh_gather_x0_s64_s64index: ++** ld1h z0\.d, p0/z, \[x0, z0\.d, lsl 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_x0_s64_s64index, svint64_t, uint16_t, svint64_t, ++ z0_res = svld1uh_gather_s64index_s64 (p0, x0, z0), ++ z0_res = svld1uh_gather_index_s64 (p0, x0, z0)) ++ ++/* ++** ld1uh_gather_tied1_s64_s64index: ++** ld1h z0\.d, p0/z, \[x0, z0\.d, lsl 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_tied1_s64_s64index, svint64_t, uint16_t, svint64_t, ++ z0_res = svld1uh_gather_s64index_s64 (p0, x0, z0), ++ z0_res = svld1uh_gather_index_s64 (p0, x0, z0)) ++ ++/* ++** ld1uh_gather_untied_s64_s64index: ++** ld1h z0\.d, p0/z, \[x0, z1\.d, lsl 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_untied_s64_s64index, svint64_t, uint16_t, svint64_t, ++ z0_res = svld1uh_gather_s64index_s64 (p0, x0, z1), ++ z0_res = svld1uh_gather_index_s64 (p0, x0, z1)) ++ ++/* ++** ld1uh_gather_ext_s64_s64index: ++** ld1h z0\.d, p0/z, \[x0, z1\.d, sxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_ext_s64_s64index, svint64_t, uint16_t, svint64_t, ++ z0_res = svld1uh_gather_s64index_s64 (p0, x0, svextw_s64_x (p0, z1)), ++ z0_res = svld1uh_gather_index_s64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ld1uh_gather_x0_s64_u64index: ++** ld1h z0\.d, p0/z, \[x0, z0\.d, lsl 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_x0_s64_u64index, svint64_t, uint16_t, svuint64_t, ++ z0_res = svld1uh_gather_u64index_s64 (p0, x0, z0), ++ z0_res = svld1uh_gather_index_s64 (p0, x0, z0)) ++ ++/* ++** ld1uh_gather_tied1_s64_u64index: ++** ld1h z0\.d, p0/z, \[x0, z0\.d, lsl 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_tied1_s64_u64index, svint64_t, uint16_t, svuint64_t, ++ z0_res = svld1uh_gather_u64index_s64 (p0, x0, z0), ++ z0_res = svld1uh_gather_index_s64 (p0, x0, z0)) ++ ++/* ++** ld1uh_gather_untied_s64_u64index: ++** ld1h z0\.d, p0/z, \[x0, z1\.d, lsl 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_untied_s64_u64index, svint64_t, uint16_t, svuint64_t, ++ z0_res = svld1uh_gather_u64index_s64 (p0, x0, z1), ++ z0_res = svld1uh_gather_index_s64 (p0, x0, z1)) ++ ++/* ++** ld1uh_gather_ext_s64_u64index: ++** ld1h z0\.d, p0/z, \[x0, z1\.d, uxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_ext_s64_u64index, svint64_t, uint16_t, svuint64_t, ++ z0_res = svld1uh_gather_u64index_s64 (p0, x0, svextw_u64_x (p0, z1)), ++ z0_res = svld1uh_gather_index_s64 (p0, x0, svextw_x (p0, z1))) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_u32.c +new file mode 100644 +index 000000000..09d3cc8c2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_u32.c +@@ -0,0 +1,252 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1uh_gather_u32_tied1: ++** ld1h z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_u32_tied1, svuint32_t, svuint32_t, ++ z0_res = svld1uh_gather_u32base_u32 (p0, z0), ++ z0_res = svld1uh_gather_u32 (p0, z0)) ++ ++/* ++** ld1uh_gather_u32_untied: ++** ld1h z0\.s, p0/z, \[z1\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_u32_untied, svuint32_t, svuint32_t, ++ z0_res = svld1uh_gather_u32base_u32 (p0, z1), ++ z0_res = svld1uh_gather_u32 (p0, z1)) ++ ++/* ++** ld1uh_gather_x0_u32_offset: ++** ld1h z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_x0_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svld1uh_gather_u32base_offset_u32 (p0, z0, x0), ++ z0_res = svld1uh_gather_offset_u32 (p0, z0, x0)) ++ ++/* ++** ld1uh_gather_m2_u32_offset: ++** mov (x[0-9]+), #?-2 ++** ld1h z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_m2_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svld1uh_gather_u32base_offset_u32 (p0, z0, -2), ++ z0_res = svld1uh_gather_offset_u32 (p0, z0, -2)) ++ ++/* ++** ld1uh_gather_0_u32_offset: ++** ld1h z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_0_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svld1uh_gather_u32base_offset_u32 (p0, z0, 0), ++ z0_res = svld1uh_gather_offset_u32 (p0, z0, 0)) ++ ++/* ++** ld1uh_gather_5_u32_offset: ++** mov (x[0-9]+), #?5 ++** ld1h z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_5_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svld1uh_gather_u32base_offset_u32 (p0, z0, 5), ++ z0_res = svld1uh_gather_offset_u32 (p0, z0, 5)) ++ ++/* ++** ld1uh_gather_6_u32_offset: ++** ld1h z0\.s, p0/z, \[z0\.s, #6\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_6_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svld1uh_gather_u32base_offset_u32 (p0, z0, 6), ++ z0_res = svld1uh_gather_offset_u32 (p0, z0, 6)) ++ ++/* ++** ld1uh_gather_62_u32_offset: ++** ld1h z0\.s, p0/z, \[z0\.s, #62\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_62_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svld1uh_gather_u32base_offset_u32 (p0, z0, 62), ++ z0_res = svld1uh_gather_offset_u32 (p0, z0, 62)) ++ ++/* ++** ld1uh_gather_64_u32_offset: ++** mov (x[0-9]+), #?64 ++** ld1h z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_64_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svld1uh_gather_u32base_offset_u32 (p0, z0, 64), ++ z0_res = svld1uh_gather_offset_u32 (p0, z0, 64)) ++ ++/* ++** ld1uh_gather_x0_u32_index: ++** lsl (x[0-9]+), x0, #?1 ++** ld1h z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_x0_u32_index, svuint32_t, svuint32_t, ++ z0_res = svld1uh_gather_u32base_index_u32 (p0, z0, x0), ++ z0_res = svld1uh_gather_index_u32 (p0, z0, x0)) ++ ++/* ++** ld1uh_gather_m1_u32_index: ++** mov (x[0-9]+), #?-2 ++** ld1h z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_m1_u32_index, svuint32_t, svuint32_t, ++ z0_res = svld1uh_gather_u32base_index_u32 (p0, z0, -1), ++ z0_res = svld1uh_gather_index_u32 (p0, z0, -1)) ++ ++/* ++** ld1uh_gather_0_u32_index: ++** ld1h z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_0_u32_index, svuint32_t, svuint32_t, ++ z0_res = svld1uh_gather_u32base_index_u32 (p0, z0, 0), ++ z0_res = svld1uh_gather_index_u32 (p0, z0, 0)) ++ ++/* ++** ld1uh_gather_5_u32_index: ++** ld1h z0\.s, p0/z, \[z0\.s, #10\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_5_u32_index, svuint32_t, svuint32_t, ++ z0_res = svld1uh_gather_u32base_index_u32 (p0, z0, 5), ++ z0_res = svld1uh_gather_index_u32 (p0, z0, 5)) ++ ++/* ++** ld1uh_gather_31_u32_index: ++** ld1h z0\.s, p0/z, \[z0\.s, #62\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_31_u32_index, svuint32_t, svuint32_t, ++ z0_res = svld1uh_gather_u32base_index_u32 (p0, z0, 31), ++ z0_res = svld1uh_gather_index_u32 (p0, z0, 31)) ++ ++/* ++** ld1uh_gather_32_u32_index: ++** mov (x[0-9]+), #?64 ++** ld1h z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_32_u32_index, svuint32_t, svuint32_t, ++ z0_res = svld1uh_gather_u32base_index_u32 (p0, z0, 32), ++ z0_res = svld1uh_gather_index_u32 (p0, z0, 32)) ++ ++/* ++** ld1uh_gather_x0_u32_s32offset: ++** ld1h z0\.s, p0/z, \[x0, z0\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_x0_u32_s32offset, svuint32_t, uint16_t, svint32_t, ++ z0_res = svld1uh_gather_s32offset_u32 (p0, x0, z0), ++ z0_res = svld1uh_gather_offset_u32 (p0, x0, z0)) ++ ++/* ++** ld1uh_gather_tied1_u32_s32offset: ++** ld1h z0\.s, p0/z, \[x0, z0\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_tied1_u32_s32offset, svuint32_t, uint16_t, svint32_t, ++ z0_res = svld1uh_gather_s32offset_u32 (p0, x0, z0), ++ z0_res = svld1uh_gather_offset_u32 (p0, x0, z0)) ++ ++/* ++** ld1uh_gather_untied_u32_s32offset: ++** ld1h z0\.s, p0/z, \[x0, z1\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_untied_u32_s32offset, svuint32_t, uint16_t, svint32_t, ++ z0_res = svld1uh_gather_s32offset_u32 (p0, x0, z1), ++ z0_res = svld1uh_gather_offset_u32 (p0, x0, z1)) ++ ++/* ++** ld1uh_gather_x0_u32_u32offset: ++** ld1h z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_x0_u32_u32offset, svuint32_t, uint16_t, svuint32_t, ++ z0_res = svld1uh_gather_u32offset_u32 (p0, x0, z0), ++ z0_res = svld1uh_gather_offset_u32 (p0, x0, z0)) ++ ++/* ++** ld1uh_gather_tied1_u32_u32offset: ++** ld1h z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_tied1_u32_u32offset, svuint32_t, uint16_t, svuint32_t, ++ z0_res = svld1uh_gather_u32offset_u32 (p0, x0, z0), ++ z0_res = svld1uh_gather_offset_u32 (p0, x0, z0)) ++ ++/* ++** ld1uh_gather_untied_u32_u32offset: ++** ld1h z0\.s, p0/z, \[x0, z1\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_untied_u32_u32offset, svuint32_t, uint16_t, svuint32_t, ++ z0_res = svld1uh_gather_u32offset_u32 (p0, x0, z1), ++ z0_res = svld1uh_gather_offset_u32 (p0, x0, z1)) ++ ++/* ++** ld1uh_gather_x0_u32_s32index: ++** ld1h z0\.s, p0/z, \[x0, z0\.s, sxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_x0_u32_s32index, svuint32_t, uint16_t, svint32_t, ++ z0_res = svld1uh_gather_s32index_u32 (p0, x0, z0), ++ z0_res = svld1uh_gather_index_u32 (p0, x0, z0)) ++ ++/* ++** ld1uh_gather_tied1_u32_s32index: ++** ld1h z0\.s, p0/z, \[x0, z0\.s, sxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_tied1_u32_s32index, svuint32_t, uint16_t, svint32_t, ++ z0_res = svld1uh_gather_s32index_u32 (p0, x0, z0), ++ z0_res = svld1uh_gather_index_u32 (p0, x0, z0)) ++ ++/* ++** ld1uh_gather_untied_u32_s32index: ++** ld1h z0\.s, p0/z, \[x0, z1\.s, sxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_untied_u32_s32index, svuint32_t, uint16_t, svint32_t, ++ z0_res = svld1uh_gather_s32index_u32 (p0, x0, z1), ++ z0_res = svld1uh_gather_index_u32 (p0, x0, z1)) ++ ++/* ++** ld1uh_gather_x0_u32_u32index: ++** ld1h z0\.s, p0/z, \[x0, z0\.s, uxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_x0_u32_u32index, svuint32_t, uint16_t, svuint32_t, ++ z0_res = svld1uh_gather_u32index_u32 (p0, x0, z0), ++ z0_res = svld1uh_gather_index_u32 (p0, x0, z0)) ++ ++/* ++** ld1uh_gather_tied1_u32_u32index: ++** ld1h z0\.s, p0/z, \[x0, z0\.s, uxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_tied1_u32_u32index, svuint32_t, uint16_t, svuint32_t, ++ z0_res = svld1uh_gather_u32index_u32 (p0, x0, z0), ++ z0_res = svld1uh_gather_index_u32 (p0, x0, z0)) ++ ++/* ++** ld1uh_gather_untied_u32_u32index: ++** ld1h z0\.s, p0/z, \[x0, z1\.s, uxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_untied_u32_u32index, svuint32_t, uint16_t, svuint32_t, ++ z0_res = svld1uh_gather_u32index_u32 (p0, x0, z1), ++ z0_res = svld1uh_gather_index_u32 (p0, x0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_u64.c +new file mode 100644 +index 000000000..f3dcf03cd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_u64.c +@@ -0,0 +1,288 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1uh_gather_u64_tied1: ++** ld1h z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_u64_tied1, svuint64_t, svuint64_t, ++ z0_res = svld1uh_gather_u64base_u64 (p0, z0), ++ z0_res = svld1uh_gather_u64 (p0, z0)) ++ ++/* ++** ld1uh_gather_u64_untied: ++** ld1h z0\.d, p0/z, \[z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_u64_untied, svuint64_t, svuint64_t, ++ z0_res = svld1uh_gather_u64base_u64 (p0, z1), ++ z0_res = svld1uh_gather_u64 (p0, z1)) ++ ++/* ++** ld1uh_gather_x0_u64_offset: ++** ld1h z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_x0_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1uh_gather_u64base_offset_u64 (p0, z0, x0), ++ z0_res = svld1uh_gather_offset_u64 (p0, z0, x0)) ++ ++/* ++** ld1uh_gather_m2_u64_offset: ++** mov (x[0-9]+), #?-2 ++** ld1h z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_m2_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1uh_gather_u64base_offset_u64 (p0, z0, -2), ++ z0_res = svld1uh_gather_offset_u64 (p0, z0, -2)) ++ ++/* ++** ld1uh_gather_0_u64_offset: ++** ld1h z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_0_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1uh_gather_u64base_offset_u64 (p0, z0, 0), ++ z0_res = svld1uh_gather_offset_u64 (p0, z0, 0)) ++ ++/* ++** ld1uh_gather_5_u64_offset: ++** mov (x[0-9]+), #?5 ++** ld1h z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_5_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1uh_gather_u64base_offset_u64 (p0, z0, 5), ++ z0_res = svld1uh_gather_offset_u64 (p0, z0, 5)) ++ ++/* ++** ld1uh_gather_6_u64_offset: ++** ld1h z0\.d, p0/z, \[z0\.d, #6\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_6_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1uh_gather_u64base_offset_u64 (p0, z0, 6), ++ z0_res = svld1uh_gather_offset_u64 (p0, z0, 6)) ++ ++/* ++** ld1uh_gather_62_u64_offset: ++** ld1h z0\.d, p0/z, \[z0\.d, #62\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_62_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1uh_gather_u64base_offset_u64 (p0, z0, 62), ++ z0_res = svld1uh_gather_offset_u64 (p0, z0, 62)) ++ ++/* ++** ld1uh_gather_64_u64_offset: ++** mov (x[0-9]+), #?64 ++** ld1h z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_64_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1uh_gather_u64base_offset_u64 (p0, z0, 64), ++ z0_res = svld1uh_gather_offset_u64 (p0, z0, 64)) ++ ++/* ++** ld1uh_gather_x0_u64_index: ++** lsl (x[0-9]+), x0, #?1 ++** ld1h z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_x0_u64_index, svuint64_t, svuint64_t, ++ z0_res = svld1uh_gather_u64base_index_u64 (p0, z0, x0), ++ z0_res = svld1uh_gather_index_u64 (p0, z0, x0)) ++ ++/* ++** ld1uh_gather_m1_u64_index: ++** mov (x[0-9]+), #?-2 ++** ld1h z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_m1_u64_index, svuint64_t, svuint64_t, ++ z0_res = svld1uh_gather_u64base_index_u64 (p0, z0, -1), ++ z0_res = svld1uh_gather_index_u64 (p0, z0, -1)) ++ ++/* ++** ld1uh_gather_0_u64_index: ++** ld1h z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_0_u64_index, svuint64_t, svuint64_t, ++ z0_res = svld1uh_gather_u64base_index_u64 (p0, z0, 0), ++ z0_res = svld1uh_gather_index_u64 (p0, z0, 0)) ++ ++/* ++** ld1uh_gather_5_u64_index: ++** ld1h z0\.d, p0/z, \[z0\.d, #10\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_5_u64_index, svuint64_t, svuint64_t, ++ z0_res = svld1uh_gather_u64base_index_u64 (p0, z0, 5), ++ z0_res = svld1uh_gather_index_u64 (p0, z0, 5)) ++ ++/* ++** ld1uh_gather_31_u64_index: ++** ld1h z0\.d, p0/z, \[z0\.d, #62\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_31_u64_index, svuint64_t, svuint64_t, ++ z0_res = svld1uh_gather_u64base_index_u64 (p0, z0, 31), ++ z0_res = svld1uh_gather_index_u64 (p0, z0, 31)) ++ ++/* ++** ld1uh_gather_32_u64_index: ++** mov (x[0-9]+), #?64 ++** ld1h z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uh_gather_32_u64_index, svuint64_t, svuint64_t, ++ z0_res = svld1uh_gather_u64base_index_u64 (p0, z0, 32), ++ z0_res = svld1uh_gather_index_u64 (p0, z0, 32)) ++ ++/* ++** ld1uh_gather_x0_u64_s64offset: ++** ld1h z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_x0_u64_s64offset, svuint64_t, uint16_t, svint64_t, ++ z0_res = svld1uh_gather_s64offset_u64 (p0, x0, z0), ++ z0_res = svld1uh_gather_offset_u64 (p0, x0, z0)) ++ ++/* ++** ld1uh_gather_tied1_u64_s64offset: ++** ld1h z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_tied1_u64_s64offset, svuint64_t, uint16_t, svint64_t, ++ z0_res = svld1uh_gather_s64offset_u64 (p0, x0, z0), ++ z0_res = svld1uh_gather_offset_u64 (p0, x0, z0)) ++ ++/* ++** ld1uh_gather_untied_u64_s64offset: ++** ld1h z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_untied_u64_s64offset, svuint64_t, uint16_t, svint64_t, ++ z0_res = svld1uh_gather_s64offset_u64 (p0, x0, z1), ++ z0_res = svld1uh_gather_offset_u64 (p0, x0, z1)) ++ ++/* ++** ld1uh_gather_ext_u64_s64offset: ++** ld1h z0\.d, p0/z, \[x0, z1\.d, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_ext_u64_s64offset, svuint64_t, uint16_t, svint64_t, ++ z0_res = svld1uh_gather_s64offset_u64 (p0, x0, svextw_s64_x (p0, z1)), ++ z0_res = svld1uh_gather_offset_u64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ld1uh_gather_x0_u64_u64offset: ++** ld1h z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_x0_u64_u64offset, svuint64_t, uint16_t, svuint64_t, ++ z0_res = svld1uh_gather_u64offset_u64 (p0, x0, z0), ++ z0_res = svld1uh_gather_offset_u64 (p0, x0, z0)) ++ ++/* ++** ld1uh_gather_tied1_u64_u64offset: ++** ld1h z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_tied1_u64_u64offset, svuint64_t, uint16_t, svuint64_t, ++ z0_res = svld1uh_gather_u64offset_u64 (p0, x0, z0), ++ z0_res = svld1uh_gather_offset_u64 (p0, x0, z0)) ++ ++/* ++** ld1uh_gather_untied_u64_u64offset: ++** ld1h z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_untied_u64_u64offset, svuint64_t, uint16_t, svuint64_t, ++ z0_res = svld1uh_gather_u64offset_u64 (p0, x0, z1), ++ z0_res = svld1uh_gather_offset_u64 (p0, x0, z1)) ++ ++/* ++** ld1uh_gather_ext_u64_u64offset: ++** ld1h z0\.d, p0/z, \[x0, z1\.d, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_ext_u64_u64offset, svuint64_t, uint16_t, svuint64_t, ++ z0_res = svld1uh_gather_u64offset_u64 (p0, x0, svextw_u64_x (p0, z1)), ++ z0_res = svld1uh_gather_offset_u64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ld1uh_gather_x0_u64_s64index: ++** ld1h z0\.d, p0/z, \[x0, z0\.d, lsl 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_x0_u64_s64index, svuint64_t, uint16_t, svint64_t, ++ z0_res = svld1uh_gather_s64index_u64 (p0, x0, z0), ++ z0_res = svld1uh_gather_index_u64 (p0, x0, z0)) ++ ++/* ++** ld1uh_gather_tied1_u64_s64index: ++** ld1h z0\.d, p0/z, \[x0, z0\.d, lsl 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_tied1_u64_s64index, svuint64_t, uint16_t, svint64_t, ++ z0_res = svld1uh_gather_s64index_u64 (p0, x0, z0), ++ z0_res = svld1uh_gather_index_u64 (p0, x0, z0)) ++ ++/* ++** ld1uh_gather_untied_u64_s64index: ++** ld1h z0\.d, p0/z, \[x0, z1\.d, lsl 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_untied_u64_s64index, svuint64_t, uint16_t, svint64_t, ++ z0_res = svld1uh_gather_s64index_u64 (p0, x0, z1), ++ z0_res = svld1uh_gather_index_u64 (p0, x0, z1)) ++ ++/* ++** ld1uh_gather_ext_u64_s64index: ++** ld1h z0\.d, p0/z, \[x0, z1\.d, sxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_ext_u64_s64index, svuint64_t, uint16_t, svint64_t, ++ z0_res = svld1uh_gather_s64index_u64 (p0, x0, svextw_s64_x (p0, z1)), ++ z0_res = svld1uh_gather_index_u64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ld1uh_gather_x0_u64_u64index: ++** ld1h z0\.d, p0/z, \[x0, z0\.d, lsl 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_x0_u64_u64index, svuint64_t, uint16_t, svuint64_t, ++ z0_res = svld1uh_gather_u64index_u64 (p0, x0, z0), ++ z0_res = svld1uh_gather_index_u64 (p0, x0, z0)) ++ ++/* ++** ld1uh_gather_tied1_u64_u64index: ++** ld1h z0\.d, p0/z, \[x0, z0\.d, lsl 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_tied1_u64_u64index, svuint64_t, uint16_t, svuint64_t, ++ z0_res = svld1uh_gather_u64index_u64 (p0, x0, z0), ++ z0_res = svld1uh_gather_index_u64 (p0, x0, z0)) ++ ++/* ++** ld1uh_gather_untied_u64_u64index: ++** ld1h z0\.d, p0/z, \[x0, z1\.d, lsl 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_untied_u64_u64index, svuint64_t, uint16_t, svuint64_t, ++ z0_res = svld1uh_gather_u64index_u64 (p0, x0, z1), ++ z0_res = svld1uh_gather_index_u64 (p0, x0, z1)) ++ ++/* ++** ld1uh_gather_ext_u64_u64index: ++** ld1h z0\.d, p0/z, \[x0, z1\.d, uxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uh_gather_ext_u64_u64index, svuint64_t, uint16_t, svuint64_t, ++ z0_res = svld1uh_gather_u64index_u64 (p0, x0, svextw_u64_x (p0, z1)), ++ z0_res = svld1uh_gather_index_u64 (p0, x0, svextw_x (p0, z1))) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_s32.c +new file mode 100644 +index 000000000..df1ce974b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_s32.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1uh_s32_base: ++** ld1h z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1uh_s32_base, svint32_t, uint16_t, ++ z0 = svld1uh_s32 (p0, x0), ++ z0 = svld1uh_s32 (p0, x0)) ++ ++/* ++** ld1uh_s32_index: ++** ld1h z0\.s, p0/z, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_LOAD (ld1uh_s32_index, svint32_t, uint16_t, ++ z0 = svld1uh_s32 (p0, x0 + x1), ++ z0 = svld1uh_s32 (p0, x0 + x1)) ++ ++/* ++** ld1uh_s32_1: ++** ld1h z0\.s, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1uh_s32_1, svint32_t, uint16_t, ++ z0 = svld1uh_s32 (p0, x0 + svcntw ()), ++ z0 = svld1uh_s32 (p0, x0 + svcntw ())) ++ ++/* ++** ld1uh_s32_7: ++** ld1h z0\.s, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1uh_s32_7, svint32_t, uint16_t, ++ z0 = svld1uh_s32 (p0, x0 + svcntw () * 7), ++ z0 = svld1uh_s32 (p0, x0 + svcntw () * 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1uh_s32_8: ++** incb x0, all, mul #4 ++** ld1h z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1uh_s32_8, svint32_t, uint16_t, ++ z0 = svld1uh_s32 (p0, x0 + svcntw () * 8), ++ z0 = svld1uh_s32 (p0, x0 + svcntw () * 8)) ++ ++/* ++** ld1uh_s32_m1: ++** ld1h z0\.s, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1uh_s32_m1, svint32_t, uint16_t, ++ z0 = svld1uh_s32 (p0, x0 - svcntw ()), ++ z0 = svld1uh_s32 (p0, x0 - svcntw ())) ++ ++/* ++** ld1uh_s32_m8: ++** ld1h z0\.s, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1uh_s32_m8, svint32_t, uint16_t, ++ z0 = svld1uh_s32 (p0, x0 - svcntw () * 8), ++ z0 = svld1uh_s32 (p0, x0 - svcntw () * 8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1uh_s32_m9: ++** dech x0, all, mul #9 ++** ld1h z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1uh_s32_m9, svint32_t, uint16_t, ++ z0 = svld1uh_s32 (p0, x0 - svcntw () * 9), ++ z0 = svld1uh_s32 (p0, x0 - svcntw () * 9)) ++ ++/* ++** ld1uh_vnum_s32_0: ++** ld1h z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1uh_vnum_s32_0, svint32_t, uint16_t, ++ z0 = svld1uh_vnum_s32 (p0, x0, 0), ++ z0 = svld1uh_vnum_s32 (p0, x0, 0)) ++ ++/* ++** ld1uh_vnum_s32_1: ++** ld1h z0\.s, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1uh_vnum_s32_1, svint32_t, uint16_t, ++ z0 = svld1uh_vnum_s32 (p0, x0, 1), ++ z0 = svld1uh_vnum_s32 (p0, x0, 1)) ++ ++/* ++** ld1uh_vnum_s32_7: ++** ld1h z0\.s, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1uh_vnum_s32_7, svint32_t, uint16_t, ++ z0 = svld1uh_vnum_s32 (p0, x0, 7), ++ z0 = svld1uh_vnum_s32 (p0, x0, 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1uh_vnum_s32_8: ++** incb x0, all, mul #4 ++** ld1h z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1uh_vnum_s32_8, svint32_t, uint16_t, ++ z0 = svld1uh_vnum_s32 (p0, x0, 8), ++ z0 = svld1uh_vnum_s32 (p0, x0, 8)) ++ ++/* ++** ld1uh_vnum_s32_m1: ++** ld1h z0\.s, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1uh_vnum_s32_m1, svint32_t, uint16_t, ++ z0 = svld1uh_vnum_s32 (p0, x0, -1), ++ z0 = svld1uh_vnum_s32 (p0, x0, -1)) ++ ++/* ++** ld1uh_vnum_s32_m8: ++** ld1h z0\.s, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1uh_vnum_s32_m8, svint32_t, uint16_t, ++ z0 = svld1uh_vnum_s32 (p0, x0, -8), ++ z0 = svld1uh_vnum_s32 (p0, x0, -8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1uh_vnum_s32_m9: ++** dech x0, all, mul #9 ++** ld1h z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1uh_vnum_s32_m9, svint32_t, uint16_t, ++ z0 = svld1uh_vnum_s32 (p0, x0, -9), ++ z0 = svld1uh_vnum_s32 (p0, x0, -9)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ld1uh_vnum_s32_x1: ++** cnth (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ld1h z0\.s, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ld1uh_vnum_s32_x1, svint32_t, uint16_t, ++ z0 = svld1uh_vnum_s32 (p0, x0, x1), ++ z0 = svld1uh_vnum_s32 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_s64.c +new file mode 100644 +index 000000000..7c3ab0aee +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_s64.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1uh_s64_base: ++** ld1h z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1uh_s64_base, svint64_t, uint16_t, ++ z0 = svld1uh_s64 (p0, x0), ++ z0 = svld1uh_s64 (p0, x0)) ++ ++/* ++** ld1uh_s64_index: ++** ld1h z0\.d, p0/z, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_LOAD (ld1uh_s64_index, svint64_t, uint16_t, ++ z0 = svld1uh_s64 (p0, x0 + x1), ++ z0 = svld1uh_s64 (p0, x0 + x1)) ++ ++/* ++** ld1uh_s64_1: ++** ld1h z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1uh_s64_1, svint64_t, uint16_t, ++ z0 = svld1uh_s64 (p0, x0 + svcntd ()), ++ z0 = svld1uh_s64 (p0, x0 + svcntd ())) ++ ++/* ++** ld1uh_s64_7: ++** ld1h z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1uh_s64_7, svint64_t, uint16_t, ++ z0 = svld1uh_s64 (p0, x0 + svcntd () * 7), ++ z0 = svld1uh_s64 (p0, x0 + svcntd () * 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1uh_s64_8: ++** incb x0, all, mul #2 ++** ld1h z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1uh_s64_8, svint64_t, uint16_t, ++ z0 = svld1uh_s64 (p0, x0 + svcntd () * 8), ++ z0 = svld1uh_s64 (p0, x0 + svcntd () * 8)) ++ ++/* ++** ld1uh_s64_m1: ++** ld1h z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1uh_s64_m1, svint64_t, uint16_t, ++ z0 = svld1uh_s64 (p0, x0 - svcntd ()), ++ z0 = svld1uh_s64 (p0, x0 - svcntd ())) ++ ++/* ++** ld1uh_s64_m8: ++** ld1h z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1uh_s64_m8, svint64_t, uint16_t, ++ z0 = svld1uh_s64 (p0, x0 - svcntd () * 8), ++ z0 = svld1uh_s64 (p0, x0 - svcntd () * 8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1uh_s64_m9: ++** decw x0, all, mul #9 ++** ld1h z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1uh_s64_m9, svint64_t, uint16_t, ++ z0 = svld1uh_s64 (p0, x0 - svcntd () * 9), ++ z0 = svld1uh_s64 (p0, x0 - svcntd () * 9)) ++ ++/* ++** ld1uh_vnum_s64_0: ++** ld1h z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1uh_vnum_s64_0, svint64_t, uint16_t, ++ z0 = svld1uh_vnum_s64 (p0, x0, 0), ++ z0 = svld1uh_vnum_s64 (p0, x0, 0)) ++ ++/* ++** ld1uh_vnum_s64_1: ++** ld1h z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1uh_vnum_s64_1, svint64_t, uint16_t, ++ z0 = svld1uh_vnum_s64 (p0, x0, 1), ++ z0 = svld1uh_vnum_s64 (p0, x0, 1)) ++ ++/* ++** ld1uh_vnum_s64_7: ++** ld1h z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1uh_vnum_s64_7, svint64_t, uint16_t, ++ z0 = svld1uh_vnum_s64 (p0, x0, 7), ++ z0 = svld1uh_vnum_s64 (p0, x0, 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1uh_vnum_s64_8: ++** incb x0, all, mul #2 ++** ld1h z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1uh_vnum_s64_8, svint64_t, uint16_t, ++ z0 = svld1uh_vnum_s64 (p0, x0, 8), ++ z0 = svld1uh_vnum_s64 (p0, x0, 8)) ++ ++/* ++** ld1uh_vnum_s64_m1: ++** ld1h z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1uh_vnum_s64_m1, svint64_t, uint16_t, ++ z0 = svld1uh_vnum_s64 (p0, x0, -1), ++ z0 = svld1uh_vnum_s64 (p0, x0, -1)) ++ ++/* ++** ld1uh_vnum_s64_m8: ++** ld1h z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1uh_vnum_s64_m8, svint64_t, uint16_t, ++ z0 = svld1uh_vnum_s64 (p0, x0, -8), ++ z0 = svld1uh_vnum_s64 (p0, x0, -8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1uh_vnum_s64_m9: ++** decw x0, all, mul #9 ++** ld1h z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1uh_vnum_s64_m9, svint64_t, uint16_t, ++ z0 = svld1uh_vnum_s64 (p0, x0, -9), ++ z0 = svld1uh_vnum_s64 (p0, x0, -9)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ld1uh_vnum_s64_x1: ++** cntw (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ld1h z0\.d, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ld1uh_vnum_s64_x1, svint64_t, uint16_t, ++ z0 = svld1uh_vnum_s64 (p0, x0, x1), ++ z0 = svld1uh_vnum_s64 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_u32.c +new file mode 100644 +index 000000000..a07b19259 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_u32.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1uh_u32_base: ++** ld1h z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1uh_u32_base, svuint32_t, uint16_t, ++ z0 = svld1uh_u32 (p0, x0), ++ z0 = svld1uh_u32 (p0, x0)) ++ ++/* ++** ld1uh_u32_index: ++** ld1h z0\.s, p0/z, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_LOAD (ld1uh_u32_index, svuint32_t, uint16_t, ++ z0 = svld1uh_u32 (p0, x0 + x1), ++ z0 = svld1uh_u32 (p0, x0 + x1)) ++ ++/* ++** ld1uh_u32_1: ++** ld1h z0\.s, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1uh_u32_1, svuint32_t, uint16_t, ++ z0 = svld1uh_u32 (p0, x0 + svcntw ()), ++ z0 = svld1uh_u32 (p0, x0 + svcntw ())) ++ ++/* ++** ld1uh_u32_7: ++** ld1h z0\.s, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1uh_u32_7, svuint32_t, uint16_t, ++ z0 = svld1uh_u32 (p0, x0 + svcntw () * 7), ++ z0 = svld1uh_u32 (p0, x0 + svcntw () * 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1uh_u32_8: ++** incb x0, all, mul #4 ++** ld1h z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1uh_u32_8, svuint32_t, uint16_t, ++ z0 = svld1uh_u32 (p0, x0 + svcntw () * 8), ++ z0 = svld1uh_u32 (p0, x0 + svcntw () * 8)) ++ ++/* ++** ld1uh_u32_m1: ++** ld1h z0\.s, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1uh_u32_m1, svuint32_t, uint16_t, ++ z0 = svld1uh_u32 (p0, x0 - svcntw ()), ++ z0 = svld1uh_u32 (p0, x0 - svcntw ())) ++ ++/* ++** ld1uh_u32_m8: ++** ld1h z0\.s, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1uh_u32_m8, svuint32_t, uint16_t, ++ z0 = svld1uh_u32 (p0, x0 - svcntw () * 8), ++ z0 = svld1uh_u32 (p0, x0 - svcntw () * 8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1uh_u32_m9: ++** dech x0, all, mul #9 ++** ld1h z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1uh_u32_m9, svuint32_t, uint16_t, ++ z0 = svld1uh_u32 (p0, x0 - svcntw () * 9), ++ z0 = svld1uh_u32 (p0, x0 - svcntw () * 9)) ++ ++/* ++** ld1uh_vnum_u32_0: ++** ld1h z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1uh_vnum_u32_0, svuint32_t, uint16_t, ++ z0 = svld1uh_vnum_u32 (p0, x0, 0), ++ z0 = svld1uh_vnum_u32 (p0, x0, 0)) ++ ++/* ++** ld1uh_vnum_u32_1: ++** ld1h z0\.s, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1uh_vnum_u32_1, svuint32_t, uint16_t, ++ z0 = svld1uh_vnum_u32 (p0, x0, 1), ++ z0 = svld1uh_vnum_u32 (p0, x0, 1)) ++ ++/* ++** ld1uh_vnum_u32_7: ++** ld1h z0\.s, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1uh_vnum_u32_7, svuint32_t, uint16_t, ++ z0 = svld1uh_vnum_u32 (p0, x0, 7), ++ z0 = svld1uh_vnum_u32 (p0, x0, 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1uh_vnum_u32_8: ++** incb x0, all, mul #4 ++** ld1h z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1uh_vnum_u32_8, svuint32_t, uint16_t, ++ z0 = svld1uh_vnum_u32 (p0, x0, 8), ++ z0 = svld1uh_vnum_u32 (p0, x0, 8)) ++ ++/* ++** ld1uh_vnum_u32_m1: ++** ld1h z0\.s, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1uh_vnum_u32_m1, svuint32_t, uint16_t, ++ z0 = svld1uh_vnum_u32 (p0, x0, -1), ++ z0 = svld1uh_vnum_u32 (p0, x0, -1)) ++ ++/* ++** ld1uh_vnum_u32_m8: ++** ld1h z0\.s, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1uh_vnum_u32_m8, svuint32_t, uint16_t, ++ z0 = svld1uh_vnum_u32 (p0, x0, -8), ++ z0 = svld1uh_vnum_u32 (p0, x0, -8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1uh_vnum_u32_m9: ++** dech x0, all, mul #9 ++** ld1h z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1uh_vnum_u32_m9, svuint32_t, uint16_t, ++ z0 = svld1uh_vnum_u32 (p0, x0, -9), ++ z0 = svld1uh_vnum_u32 (p0, x0, -9)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ld1uh_vnum_u32_x1: ++** cnth (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ld1h z0\.s, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ld1uh_vnum_u32_x1, svuint32_t, uint16_t, ++ z0 = svld1uh_vnum_u32 (p0, x0, x1), ++ z0 = svld1uh_vnum_u32 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_u64.c +new file mode 100644 +index 000000000..79be01fbd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_u64.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1uh_u64_base: ++** ld1h z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1uh_u64_base, svuint64_t, uint16_t, ++ z0 = svld1uh_u64 (p0, x0), ++ z0 = svld1uh_u64 (p0, x0)) ++ ++/* ++** ld1uh_u64_index: ++** ld1h z0\.d, p0/z, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_LOAD (ld1uh_u64_index, svuint64_t, uint16_t, ++ z0 = svld1uh_u64 (p0, x0 + x1), ++ z0 = svld1uh_u64 (p0, x0 + x1)) ++ ++/* ++** ld1uh_u64_1: ++** ld1h z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1uh_u64_1, svuint64_t, uint16_t, ++ z0 = svld1uh_u64 (p0, x0 + svcntd ()), ++ z0 = svld1uh_u64 (p0, x0 + svcntd ())) ++ ++/* ++** ld1uh_u64_7: ++** ld1h z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1uh_u64_7, svuint64_t, uint16_t, ++ z0 = svld1uh_u64 (p0, x0 + svcntd () * 7), ++ z0 = svld1uh_u64 (p0, x0 + svcntd () * 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1uh_u64_8: ++** incb x0, all, mul #2 ++** ld1h z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1uh_u64_8, svuint64_t, uint16_t, ++ z0 = svld1uh_u64 (p0, x0 + svcntd () * 8), ++ z0 = svld1uh_u64 (p0, x0 + svcntd () * 8)) ++ ++/* ++** ld1uh_u64_m1: ++** ld1h z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1uh_u64_m1, svuint64_t, uint16_t, ++ z0 = svld1uh_u64 (p0, x0 - svcntd ()), ++ z0 = svld1uh_u64 (p0, x0 - svcntd ())) ++ ++/* ++** ld1uh_u64_m8: ++** ld1h z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1uh_u64_m8, svuint64_t, uint16_t, ++ z0 = svld1uh_u64 (p0, x0 - svcntd () * 8), ++ z0 = svld1uh_u64 (p0, x0 - svcntd () * 8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1uh_u64_m9: ++** decw x0, all, mul #9 ++** ld1h z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1uh_u64_m9, svuint64_t, uint16_t, ++ z0 = svld1uh_u64 (p0, x0 - svcntd () * 9), ++ z0 = svld1uh_u64 (p0, x0 - svcntd () * 9)) ++ ++/* ++** ld1uh_vnum_u64_0: ++** ld1h z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1uh_vnum_u64_0, svuint64_t, uint16_t, ++ z0 = svld1uh_vnum_u64 (p0, x0, 0), ++ z0 = svld1uh_vnum_u64 (p0, x0, 0)) ++ ++/* ++** ld1uh_vnum_u64_1: ++** ld1h z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1uh_vnum_u64_1, svuint64_t, uint16_t, ++ z0 = svld1uh_vnum_u64 (p0, x0, 1), ++ z0 = svld1uh_vnum_u64 (p0, x0, 1)) ++ ++/* ++** ld1uh_vnum_u64_7: ++** ld1h z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1uh_vnum_u64_7, svuint64_t, uint16_t, ++ z0 = svld1uh_vnum_u64 (p0, x0, 7), ++ z0 = svld1uh_vnum_u64 (p0, x0, 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1uh_vnum_u64_8: ++** incb x0, all, mul #2 ++** ld1h z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1uh_vnum_u64_8, svuint64_t, uint16_t, ++ z0 = svld1uh_vnum_u64 (p0, x0, 8), ++ z0 = svld1uh_vnum_u64 (p0, x0, 8)) ++ ++/* ++** ld1uh_vnum_u64_m1: ++** ld1h z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1uh_vnum_u64_m1, svuint64_t, uint16_t, ++ z0 = svld1uh_vnum_u64 (p0, x0, -1), ++ z0 = svld1uh_vnum_u64 (p0, x0, -1)) ++ ++/* ++** ld1uh_vnum_u64_m8: ++** ld1h z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1uh_vnum_u64_m8, svuint64_t, uint16_t, ++ z0 = svld1uh_vnum_u64 (p0, x0, -8), ++ z0 = svld1uh_vnum_u64 (p0, x0, -8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1uh_vnum_u64_m9: ++** decw x0, all, mul #9 ++** ld1h z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1uh_vnum_u64_m9, svuint64_t, uint16_t, ++ z0 = svld1uh_vnum_u64 (p0, x0, -9), ++ z0 = svld1uh_vnum_u64 (p0, x0, -9)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ld1uh_vnum_u64_x1: ++** cntw (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ld1h z0\.d, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ld1uh_vnum_u64_x1, svuint64_t, uint16_t, ++ z0 = svld1uh_vnum_u64 (p0, x0, x1), ++ z0 = svld1uh_vnum_u64 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uw_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uw_gather_s64.c +new file mode 100644 +index 000000000..f4e9d5db9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uw_gather_s64.c +@@ -0,0 +1,308 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1uw_gather_s64_tied1: ++** ld1w z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uw_gather_s64_tied1, svint64_t, svuint64_t, ++ z0_res = svld1uw_gather_u64base_s64 (p0, z0), ++ z0_res = svld1uw_gather_s64 (p0, z0)) ++ ++/* ++** ld1uw_gather_s64_untied: ++** ld1w z0\.d, p0/z, \[z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uw_gather_s64_untied, svint64_t, svuint64_t, ++ z0_res = svld1uw_gather_u64base_s64 (p0, z1), ++ z0_res = svld1uw_gather_s64 (p0, z1)) ++ ++/* ++** ld1uw_gather_x0_s64_offset: ++** ld1w z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uw_gather_x0_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1uw_gather_u64base_offset_s64 (p0, z0, x0), ++ z0_res = svld1uw_gather_offset_s64 (p0, z0, x0)) ++ ++/* ++** ld1uw_gather_m4_s64_offset: ++** mov (x[0-9]+), #?-4 ++** ld1w z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uw_gather_m4_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1uw_gather_u64base_offset_s64 (p0, z0, -4), ++ z0_res = svld1uw_gather_offset_s64 (p0, z0, -4)) ++ ++/* ++** ld1uw_gather_0_s64_offset: ++** ld1w z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uw_gather_0_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1uw_gather_u64base_offset_s64 (p0, z0, 0), ++ z0_res = svld1uw_gather_offset_s64 (p0, z0, 0)) ++ ++/* ++** ld1uw_gather_5_s64_offset: ++** mov (x[0-9]+), #?5 ++** ld1w z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uw_gather_5_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1uw_gather_u64base_offset_s64 (p0, z0, 5), ++ z0_res = svld1uw_gather_offset_s64 (p0, z0, 5)) ++ ++/* ++** ld1uw_gather_6_s64_offset: ++** mov (x[0-9]+), #?6 ++** ld1w z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uw_gather_6_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1uw_gather_u64base_offset_s64 (p0, z0, 6), ++ z0_res = svld1uw_gather_offset_s64 (p0, z0, 6)) ++ ++/* ++** ld1uw_gather_7_s64_offset: ++** mov (x[0-9]+), #?7 ++** ld1w z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uw_gather_7_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1uw_gather_u64base_offset_s64 (p0, z0, 7), ++ z0_res = svld1uw_gather_offset_s64 (p0, z0, 7)) ++ ++/* ++** ld1uw_gather_8_s64_offset: ++** ld1w z0\.d, p0/z, \[z0\.d, #8\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uw_gather_8_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1uw_gather_u64base_offset_s64 (p0, z0, 8), ++ z0_res = svld1uw_gather_offset_s64 (p0, z0, 8)) ++ ++/* ++** ld1uw_gather_124_s64_offset: ++** ld1w z0\.d, p0/z, \[z0\.d, #124\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uw_gather_124_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1uw_gather_u64base_offset_s64 (p0, z0, 124), ++ z0_res = svld1uw_gather_offset_s64 (p0, z0, 124)) ++ ++/* ++** ld1uw_gather_128_s64_offset: ++** mov (x[0-9]+), #?128 ++** ld1w z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uw_gather_128_s64_offset, svint64_t, svuint64_t, ++ z0_res = svld1uw_gather_u64base_offset_s64 (p0, z0, 128), ++ z0_res = svld1uw_gather_offset_s64 (p0, z0, 128)) ++ ++/* ++** ld1uw_gather_x0_s64_index: ++** lsl (x[0-9]+), x0, #?2 ++** ld1w z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uw_gather_x0_s64_index, svint64_t, svuint64_t, ++ z0_res = svld1uw_gather_u64base_index_s64 (p0, z0, x0), ++ z0_res = svld1uw_gather_index_s64 (p0, z0, x0)) ++ ++/* ++** ld1uw_gather_m1_s64_index: ++** mov (x[0-9]+), #?-4 ++** ld1w z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uw_gather_m1_s64_index, svint64_t, svuint64_t, ++ z0_res = svld1uw_gather_u64base_index_s64 (p0, z0, -1), ++ z0_res = svld1uw_gather_index_s64 (p0, z0, -1)) ++ ++/* ++** ld1uw_gather_0_s64_index: ++** ld1w z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uw_gather_0_s64_index, svint64_t, svuint64_t, ++ z0_res = svld1uw_gather_u64base_index_s64 (p0, z0, 0), ++ z0_res = svld1uw_gather_index_s64 (p0, z0, 0)) ++ ++/* ++** ld1uw_gather_5_s64_index: ++** ld1w z0\.d, p0/z, \[z0\.d, #20\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uw_gather_5_s64_index, svint64_t, svuint64_t, ++ z0_res = svld1uw_gather_u64base_index_s64 (p0, z0, 5), ++ z0_res = svld1uw_gather_index_s64 (p0, z0, 5)) ++ ++/* ++** ld1uw_gather_31_s64_index: ++** ld1w z0\.d, p0/z, \[z0\.d, #124\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uw_gather_31_s64_index, svint64_t, svuint64_t, ++ z0_res = svld1uw_gather_u64base_index_s64 (p0, z0, 31), ++ z0_res = svld1uw_gather_index_s64 (p0, z0, 31)) ++ ++/* ++** ld1uw_gather_32_s64_index: ++** mov (x[0-9]+), #?128 ++** ld1w z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uw_gather_32_s64_index, svint64_t, svuint64_t, ++ z0_res = svld1uw_gather_u64base_index_s64 (p0, z0, 32), ++ z0_res = svld1uw_gather_index_s64 (p0, z0, 32)) ++ ++/* ++** ld1uw_gather_x0_s64_s64offset: ++** ld1w z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uw_gather_x0_s64_s64offset, svint64_t, uint32_t, svint64_t, ++ z0_res = svld1uw_gather_s64offset_s64 (p0, x0, z0), ++ z0_res = svld1uw_gather_offset_s64 (p0, x0, z0)) ++ ++/* ++** ld1uw_gather_tied1_s64_s64offset: ++** ld1w z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uw_gather_tied1_s64_s64offset, svint64_t, uint32_t, svint64_t, ++ z0_res = svld1uw_gather_s64offset_s64 (p0, x0, z0), ++ z0_res = svld1uw_gather_offset_s64 (p0, x0, z0)) ++ ++/* ++** ld1uw_gather_untied_s64_s64offset: ++** ld1w z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uw_gather_untied_s64_s64offset, svint64_t, uint32_t, svint64_t, ++ z0_res = svld1uw_gather_s64offset_s64 (p0, x0, z1), ++ z0_res = svld1uw_gather_offset_s64 (p0, x0, z1)) ++ ++/* ++** ld1uw_gather_ext_s64_s64offset: ++** ld1w z0\.d, p0/z, \[x0, z1\.d, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uw_gather_ext_s64_s64offset, svint64_t, uint32_t, svint64_t, ++ z0_res = svld1uw_gather_s64offset_s64 (p0, x0, svextw_s64_x (p0, z1)), ++ z0_res = svld1uw_gather_offset_s64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ld1uw_gather_x0_s64_u64offset: ++** ld1w z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uw_gather_x0_s64_u64offset, svint64_t, uint32_t, svuint64_t, ++ z0_res = svld1uw_gather_u64offset_s64 (p0, x0, z0), ++ z0_res = svld1uw_gather_offset_s64 (p0, x0, z0)) ++ ++/* ++** ld1uw_gather_tied1_s64_u64offset: ++** ld1w z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uw_gather_tied1_s64_u64offset, svint64_t, uint32_t, svuint64_t, ++ z0_res = svld1uw_gather_u64offset_s64 (p0, x0, z0), ++ z0_res = svld1uw_gather_offset_s64 (p0, x0, z0)) ++ ++/* ++** ld1uw_gather_untied_s64_u64offset: ++** ld1w z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uw_gather_untied_s64_u64offset, svint64_t, uint32_t, svuint64_t, ++ z0_res = svld1uw_gather_u64offset_s64 (p0, x0, z1), ++ z0_res = svld1uw_gather_offset_s64 (p0, x0, z1)) ++ ++/* ++** ld1uw_gather_ext_s64_u64offset: ++** ld1w z0\.d, p0/z, \[x0, z1\.d, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uw_gather_ext_s64_u64offset, svint64_t, uint32_t, svuint64_t, ++ z0_res = svld1uw_gather_u64offset_s64 (p0, x0, svextw_u64_x (p0, z1)), ++ z0_res = svld1uw_gather_offset_s64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ld1uw_gather_x0_s64_s64index: ++** ld1w z0\.d, p0/z, \[x0, z0\.d, lsl 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uw_gather_x0_s64_s64index, svint64_t, uint32_t, svint64_t, ++ z0_res = svld1uw_gather_s64index_s64 (p0, x0, z0), ++ z0_res = svld1uw_gather_index_s64 (p0, x0, z0)) ++ ++/* ++** ld1uw_gather_tied1_s64_s64index: ++** ld1w z0\.d, p0/z, \[x0, z0\.d, lsl 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uw_gather_tied1_s64_s64index, svint64_t, uint32_t, svint64_t, ++ z0_res = svld1uw_gather_s64index_s64 (p0, x0, z0), ++ z0_res = svld1uw_gather_index_s64 (p0, x0, z0)) ++ ++/* ++** ld1uw_gather_untied_s64_s64index: ++** ld1w z0\.d, p0/z, \[x0, z1\.d, lsl 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uw_gather_untied_s64_s64index, svint64_t, uint32_t, svint64_t, ++ z0_res = svld1uw_gather_s64index_s64 (p0, x0, z1), ++ z0_res = svld1uw_gather_index_s64 (p0, x0, z1)) ++ ++/* ++** ld1uw_gather_ext_s64_s64index: ++** ld1w z0\.d, p0/z, \[x0, z1\.d, sxtw 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uw_gather_ext_s64_s64index, svint64_t, uint32_t, svint64_t, ++ z0_res = svld1uw_gather_s64index_s64 (p0, x0, svextw_s64_x (p0, z1)), ++ z0_res = svld1uw_gather_index_s64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ld1uw_gather_x0_s64_u64index: ++** ld1w z0\.d, p0/z, \[x0, z0\.d, lsl 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uw_gather_x0_s64_u64index, svint64_t, uint32_t, svuint64_t, ++ z0_res = svld1uw_gather_u64index_s64 (p0, x0, z0), ++ z0_res = svld1uw_gather_index_s64 (p0, x0, z0)) ++ ++/* ++** ld1uw_gather_tied1_s64_u64index: ++** ld1w z0\.d, p0/z, \[x0, z0\.d, lsl 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uw_gather_tied1_s64_u64index, svint64_t, uint32_t, svuint64_t, ++ z0_res = svld1uw_gather_u64index_s64 (p0, x0, z0), ++ z0_res = svld1uw_gather_index_s64 (p0, x0, z0)) ++ ++/* ++** ld1uw_gather_untied_s64_u64index: ++** ld1w z0\.d, p0/z, \[x0, z1\.d, lsl 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uw_gather_untied_s64_u64index, svint64_t, uint32_t, svuint64_t, ++ z0_res = svld1uw_gather_u64index_s64 (p0, x0, z1), ++ z0_res = svld1uw_gather_index_s64 (p0, x0, z1)) ++ ++/* ++** ld1uw_gather_ext_s64_u64index: ++** ld1w z0\.d, p0/z, \[x0, z1\.d, uxtw 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uw_gather_ext_s64_u64index, svint64_t, uint32_t, svuint64_t, ++ z0_res = svld1uw_gather_u64index_s64 (p0, x0, svextw_u64_x (p0, z1)), ++ z0_res = svld1uw_gather_index_s64 (p0, x0, svextw_x (p0, z1))) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uw_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uw_gather_u64.c +new file mode 100644 +index 000000000..854d19233 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uw_gather_u64.c +@@ -0,0 +1,308 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1uw_gather_u64_tied1: ++** ld1w z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uw_gather_u64_tied1, svuint64_t, svuint64_t, ++ z0_res = svld1uw_gather_u64base_u64 (p0, z0), ++ z0_res = svld1uw_gather_u64 (p0, z0)) ++ ++/* ++** ld1uw_gather_u64_untied: ++** ld1w z0\.d, p0/z, \[z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uw_gather_u64_untied, svuint64_t, svuint64_t, ++ z0_res = svld1uw_gather_u64base_u64 (p0, z1), ++ z0_res = svld1uw_gather_u64 (p0, z1)) ++ ++/* ++** ld1uw_gather_x0_u64_offset: ++** ld1w z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uw_gather_x0_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1uw_gather_u64base_offset_u64 (p0, z0, x0), ++ z0_res = svld1uw_gather_offset_u64 (p0, z0, x0)) ++ ++/* ++** ld1uw_gather_m4_u64_offset: ++** mov (x[0-9]+), #?-4 ++** ld1w z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uw_gather_m4_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1uw_gather_u64base_offset_u64 (p0, z0, -4), ++ z0_res = svld1uw_gather_offset_u64 (p0, z0, -4)) ++ ++/* ++** ld1uw_gather_0_u64_offset: ++** ld1w z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uw_gather_0_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1uw_gather_u64base_offset_u64 (p0, z0, 0), ++ z0_res = svld1uw_gather_offset_u64 (p0, z0, 0)) ++ ++/* ++** ld1uw_gather_5_u64_offset: ++** mov (x[0-9]+), #?5 ++** ld1w z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uw_gather_5_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1uw_gather_u64base_offset_u64 (p0, z0, 5), ++ z0_res = svld1uw_gather_offset_u64 (p0, z0, 5)) ++ ++/* ++** ld1uw_gather_6_u64_offset: ++** mov (x[0-9]+), #?6 ++** ld1w z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uw_gather_6_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1uw_gather_u64base_offset_u64 (p0, z0, 6), ++ z0_res = svld1uw_gather_offset_u64 (p0, z0, 6)) ++ ++/* ++** ld1uw_gather_7_u64_offset: ++** mov (x[0-9]+), #?7 ++** ld1w z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uw_gather_7_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1uw_gather_u64base_offset_u64 (p0, z0, 7), ++ z0_res = svld1uw_gather_offset_u64 (p0, z0, 7)) ++ ++/* ++** ld1uw_gather_8_u64_offset: ++** ld1w z0\.d, p0/z, \[z0\.d, #8\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uw_gather_8_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1uw_gather_u64base_offset_u64 (p0, z0, 8), ++ z0_res = svld1uw_gather_offset_u64 (p0, z0, 8)) ++ ++/* ++** ld1uw_gather_124_u64_offset: ++** ld1w z0\.d, p0/z, \[z0\.d, #124\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uw_gather_124_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1uw_gather_u64base_offset_u64 (p0, z0, 124), ++ z0_res = svld1uw_gather_offset_u64 (p0, z0, 124)) ++ ++/* ++** ld1uw_gather_128_u64_offset: ++** mov (x[0-9]+), #?128 ++** ld1w z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uw_gather_128_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svld1uw_gather_u64base_offset_u64 (p0, z0, 128), ++ z0_res = svld1uw_gather_offset_u64 (p0, z0, 128)) ++ ++/* ++** ld1uw_gather_x0_u64_index: ++** lsl (x[0-9]+), x0, #?2 ++** ld1w z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uw_gather_x0_u64_index, svuint64_t, svuint64_t, ++ z0_res = svld1uw_gather_u64base_index_u64 (p0, z0, x0), ++ z0_res = svld1uw_gather_index_u64 (p0, z0, x0)) ++ ++/* ++** ld1uw_gather_m1_u64_index: ++** mov (x[0-9]+), #?-4 ++** ld1w z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uw_gather_m1_u64_index, svuint64_t, svuint64_t, ++ z0_res = svld1uw_gather_u64base_index_u64 (p0, z0, -1), ++ z0_res = svld1uw_gather_index_u64 (p0, z0, -1)) ++ ++/* ++** ld1uw_gather_0_u64_index: ++** ld1w z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uw_gather_0_u64_index, svuint64_t, svuint64_t, ++ z0_res = svld1uw_gather_u64base_index_u64 (p0, z0, 0), ++ z0_res = svld1uw_gather_index_u64 (p0, z0, 0)) ++ ++/* ++** ld1uw_gather_5_u64_index: ++** ld1w z0\.d, p0/z, \[z0\.d, #20\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uw_gather_5_u64_index, svuint64_t, svuint64_t, ++ z0_res = svld1uw_gather_u64base_index_u64 (p0, z0, 5), ++ z0_res = svld1uw_gather_index_u64 (p0, z0, 5)) ++ ++/* ++** ld1uw_gather_31_u64_index: ++** ld1w z0\.d, p0/z, \[z0\.d, #124\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uw_gather_31_u64_index, svuint64_t, svuint64_t, ++ z0_res = svld1uw_gather_u64base_index_u64 (p0, z0, 31), ++ z0_res = svld1uw_gather_index_u64 (p0, z0, 31)) ++ ++/* ++** ld1uw_gather_32_u64_index: ++** mov (x[0-9]+), #?128 ++** ld1w z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ld1uw_gather_32_u64_index, svuint64_t, svuint64_t, ++ z0_res = svld1uw_gather_u64base_index_u64 (p0, z0, 32), ++ z0_res = svld1uw_gather_index_u64 (p0, z0, 32)) ++ ++/* ++** ld1uw_gather_x0_u64_s64offset: ++** ld1w z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uw_gather_x0_u64_s64offset, svuint64_t, uint32_t, svint64_t, ++ z0_res = svld1uw_gather_s64offset_u64 (p0, x0, z0), ++ z0_res = svld1uw_gather_offset_u64 (p0, x0, z0)) ++ ++/* ++** ld1uw_gather_tied1_u64_s64offset: ++** ld1w z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uw_gather_tied1_u64_s64offset, svuint64_t, uint32_t, svint64_t, ++ z0_res = svld1uw_gather_s64offset_u64 (p0, x0, z0), ++ z0_res = svld1uw_gather_offset_u64 (p0, x0, z0)) ++ ++/* ++** ld1uw_gather_untied_u64_s64offset: ++** ld1w z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uw_gather_untied_u64_s64offset, svuint64_t, uint32_t, svint64_t, ++ z0_res = svld1uw_gather_s64offset_u64 (p0, x0, z1), ++ z0_res = svld1uw_gather_offset_u64 (p0, x0, z1)) ++ ++/* ++** ld1uw_gather_ext_u64_s64offset: ++** ld1w z0\.d, p0/z, \[x0, z1\.d, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uw_gather_ext_u64_s64offset, svuint64_t, uint32_t, svint64_t, ++ z0_res = svld1uw_gather_s64offset_u64 (p0, x0, svextw_s64_x (p0, z1)), ++ z0_res = svld1uw_gather_offset_u64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ld1uw_gather_x0_u64_u64offset: ++** ld1w z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uw_gather_x0_u64_u64offset, svuint64_t, uint32_t, svuint64_t, ++ z0_res = svld1uw_gather_u64offset_u64 (p0, x0, z0), ++ z0_res = svld1uw_gather_offset_u64 (p0, x0, z0)) ++ ++/* ++** ld1uw_gather_tied1_u64_u64offset: ++** ld1w z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uw_gather_tied1_u64_u64offset, svuint64_t, uint32_t, svuint64_t, ++ z0_res = svld1uw_gather_u64offset_u64 (p0, x0, z0), ++ z0_res = svld1uw_gather_offset_u64 (p0, x0, z0)) ++ ++/* ++** ld1uw_gather_untied_u64_u64offset: ++** ld1w z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uw_gather_untied_u64_u64offset, svuint64_t, uint32_t, svuint64_t, ++ z0_res = svld1uw_gather_u64offset_u64 (p0, x0, z1), ++ z0_res = svld1uw_gather_offset_u64 (p0, x0, z1)) ++ ++/* ++** ld1uw_gather_ext_u64_u64offset: ++** ld1w z0\.d, p0/z, \[x0, z1\.d, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uw_gather_ext_u64_u64offset, svuint64_t, uint32_t, svuint64_t, ++ z0_res = svld1uw_gather_u64offset_u64 (p0, x0, svextw_u64_x (p0, z1)), ++ z0_res = svld1uw_gather_offset_u64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ld1uw_gather_x0_u64_s64index: ++** ld1w z0\.d, p0/z, \[x0, z0\.d, lsl 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uw_gather_x0_u64_s64index, svuint64_t, uint32_t, svint64_t, ++ z0_res = svld1uw_gather_s64index_u64 (p0, x0, z0), ++ z0_res = svld1uw_gather_index_u64 (p0, x0, z0)) ++ ++/* ++** ld1uw_gather_tied1_u64_s64index: ++** ld1w z0\.d, p0/z, \[x0, z0\.d, lsl 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uw_gather_tied1_u64_s64index, svuint64_t, uint32_t, svint64_t, ++ z0_res = svld1uw_gather_s64index_u64 (p0, x0, z0), ++ z0_res = svld1uw_gather_index_u64 (p0, x0, z0)) ++ ++/* ++** ld1uw_gather_untied_u64_s64index: ++** ld1w z0\.d, p0/z, \[x0, z1\.d, lsl 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uw_gather_untied_u64_s64index, svuint64_t, uint32_t, svint64_t, ++ z0_res = svld1uw_gather_s64index_u64 (p0, x0, z1), ++ z0_res = svld1uw_gather_index_u64 (p0, x0, z1)) ++ ++/* ++** ld1uw_gather_ext_u64_s64index: ++** ld1w z0\.d, p0/z, \[x0, z1\.d, sxtw 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uw_gather_ext_u64_s64index, svuint64_t, uint32_t, svint64_t, ++ z0_res = svld1uw_gather_s64index_u64 (p0, x0, svextw_s64_x (p0, z1)), ++ z0_res = svld1uw_gather_index_u64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ld1uw_gather_x0_u64_u64index: ++** ld1w z0\.d, p0/z, \[x0, z0\.d, lsl 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uw_gather_x0_u64_u64index, svuint64_t, uint32_t, svuint64_t, ++ z0_res = svld1uw_gather_u64index_u64 (p0, x0, z0), ++ z0_res = svld1uw_gather_index_u64 (p0, x0, z0)) ++ ++/* ++** ld1uw_gather_tied1_u64_u64index: ++** ld1w z0\.d, p0/z, \[x0, z0\.d, lsl 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uw_gather_tied1_u64_u64index, svuint64_t, uint32_t, svuint64_t, ++ z0_res = svld1uw_gather_u64index_u64 (p0, x0, z0), ++ z0_res = svld1uw_gather_index_u64 (p0, x0, z0)) ++ ++/* ++** ld1uw_gather_untied_u64_u64index: ++** ld1w z0\.d, p0/z, \[x0, z1\.d, lsl 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uw_gather_untied_u64_u64index, svuint64_t, uint32_t, svuint64_t, ++ z0_res = svld1uw_gather_u64index_u64 (p0, x0, z1), ++ z0_res = svld1uw_gather_index_u64 (p0, x0, z1)) ++ ++/* ++** ld1uw_gather_ext_u64_u64index: ++** ld1w z0\.d, p0/z, \[x0, z1\.d, uxtw 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ld1uw_gather_ext_u64_u64index, svuint64_t, uint32_t, svuint64_t, ++ z0_res = svld1uw_gather_u64index_u64 (p0, x0, svextw_u64_x (p0, z1)), ++ z0_res = svld1uw_gather_index_u64 (p0, x0, svextw_x (p0, z1))) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uw_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uw_s64.c +new file mode 100644 +index 000000000..55f5cbad3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uw_s64.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1uw_s64_base: ++** ld1w z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1uw_s64_base, svint64_t, uint32_t, ++ z0 = svld1uw_s64 (p0, x0), ++ z0 = svld1uw_s64 (p0, x0)) ++ ++/* ++** ld1uw_s64_index: ++** ld1w z0\.d, p0/z, \[x0, x1, lsl 2\] ++** ret ++*/ ++TEST_LOAD (ld1uw_s64_index, svint64_t, uint32_t, ++ z0 = svld1uw_s64 (p0, x0 + x1), ++ z0 = svld1uw_s64 (p0, x0 + x1)) ++ ++/* ++** ld1uw_s64_1: ++** ld1w z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1uw_s64_1, svint64_t, uint32_t, ++ z0 = svld1uw_s64 (p0, x0 + svcntd ()), ++ z0 = svld1uw_s64 (p0, x0 + svcntd ())) ++ ++/* ++** ld1uw_s64_7: ++** ld1w z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1uw_s64_7, svint64_t, uint32_t, ++ z0 = svld1uw_s64 (p0, x0 + svcntd () * 7), ++ z0 = svld1uw_s64 (p0, x0 + svcntd () * 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1uw_s64_8: ++** incb x0, all, mul #4 ++** ld1w z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1uw_s64_8, svint64_t, uint32_t, ++ z0 = svld1uw_s64 (p0, x0 + svcntd () * 8), ++ z0 = svld1uw_s64 (p0, x0 + svcntd () * 8)) ++ ++/* ++** ld1uw_s64_m1: ++** ld1w z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1uw_s64_m1, svint64_t, uint32_t, ++ z0 = svld1uw_s64 (p0, x0 - svcntd ()), ++ z0 = svld1uw_s64 (p0, x0 - svcntd ())) ++ ++/* ++** ld1uw_s64_m8: ++** ld1w z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1uw_s64_m8, svint64_t, uint32_t, ++ z0 = svld1uw_s64 (p0, x0 - svcntd () * 8), ++ z0 = svld1uw_s64 (p0, x0 - svcntd () * 8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1uw_s64_m9: ++** dech x0, all, mul #9 ++** ld1w z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1uw_s64_m9, svint64_t, uint32_t, ++ z0 = svld1uw_s64 (p0, x0 - svcntd () * 9), ++ z0 = svld1uw_s64 (p0, x0 - svcntd () * 9)) ++ ++/* ++** ld1uw_vnum_s64_0: ++** ld1w z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1uw_vnum_s64_0, svint64_t, uint32_t, ++ z0 = svld1uw_vnum_s64 (p0, x0, 0), ++ z0 = svld1uw_vnum_s64 (p0, x0, 0)) ++ ++/* ++** ld1uw_vnum_s64_1: ++** ld1w z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1uw_vnum_s64_1, svint64_t, uint32_t, ++ z0 = svld1uw_vnum_s64 (p0, x0, 1), ++ z0 = svld1uw_vnum_s64 (p0, x0, 1)) ++ ++/* ++** ld1uw_vnum_s64_7: ++** ld1w z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1uw_vnum_s64_7, svint64_t, uint32_t, ++ z0 = svld1uw_vnum_s64 (p0, x0, 7), ++ z0 = svld1uw_vnum_s64 (p0, x0, 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1uw_vnum_s64_8: ++** incb x0, all, mul #4 ++** ld1w z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1uw_vnum_s64_8, svint64_t, uint32_t, ++ z0 = svld1uw_vnum_s64 (p0, x0, 8), ++ z0 = svld1uw_vnum_s64 (p0, x0, 8)) ++ ++/* ++** ld1uw_vnum_s64_m1: ++** ld1w z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1uw_vnum_s64_m1, svint64_t, uint32_t, ++ z0 = svld1uw_vnum_s64 (p0, x0, -1), ++ z0 = svld1uw_vnum_s64 (p0, x0, -1)) ++ ++/* ++** ld1uw_vnum_s64_m8: ++** ld1w z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1uw_vnum_s64_m8, svint64_t, uint32_t, ++ z0 = svld1uw_vnum_s64 (p0, x0, -8), ++ z0 = svld1uw_vnum_s64 (p0, x0, -8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1uw_vnum_s64_m9: ++** dech x0, all, mul #9 ++** ld1w z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1uw_vnum_s64_m9, svint64_t, uint32_t, ++ z0 = svld1uw_vnum_s64 (p0, x0, -9), ++ z0 = svld1uw_vnum_s64 (p0, x0, -9)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ld1uw_vnum_s64_x1: ++** cnth (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ld1w z0\.d, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ld1uw_vnum_s64_x1, svint64_t, uint32_t, ++ z0 = svld1uw_vnum_s64 (p0, x0, x1), ++ z0 = svld1uw_vnum_s64 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uw_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uw_u64.c +new file mode 100644 +index 000000000..175b593f2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uw_u64.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld1uw_u64_base: ++** ld1w z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1uw_u64_base, svuint64_t, uint32_t, ++ z0 = svld1uw_u64 (p0, x0), ++ z0 = svld1uw_u64 (p0, x0)) ++ ++/* ++** ld1uw_u64_index: ++** ld1w z0\.d, p0/z, \[x0, x1, lsl 2\] ++** ret ++*/ ++TEST_LOAD (ld1uw_u64_index, svuint64_t, uint32_t, ++ z0 = svld1uw_u64 (p0, x0 + x1), ++ z0 = svld1uw_u64 (p0, x0 + x1)) ++ ++/* ++** ld1uw_u64_1: ++** ld1w z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1uw_u64_1, svuint64_t, uint32_t, ++ z0 = svld1uw_u64 (p0, x0 + svcntd ()), ++ z0 = svld1uw_u64 (p0, x0 + svcntd ())) ++ ++/* ++** ld1uw_u64_7: ++** ld1w z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1uw_u64_7, svuint64_t, uint32_t, ++ z0 = svld1uw_u64 (p0, x0 + svcntd () * 7), ++ z0 = svld1uw_u64 (p0, x0 + svcntd () * 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1uw_u64_8: ++** incb x0, all, mul #4 ++** ld1w z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1uw_u64_8, svuint64_t, uint32_t, ++ z0 = svld1uw_u64 (p0, x0 + svcntd () * 8), ++ z0 = svld1uw_u64 (p0, x0 + svcntd () * 8)) ++ ++/* ++** ld1uw_u64_m1: ++** ld1w z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1uw_u64_m1, svuint64_t, uint32_t, ++ z0 = svld1uw_u64 (p0, x0 - svcntd ()), ++ z0 = svld1uw_u64 (p0, x0 - svcntd ())) ++ ++/* ++** ld1uw_u64_m8: ++** ld1w z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1uw_u64_m8, svuint64_t, uint32_t, ++ z0 = svld1uw_u64 (p0, x0 - svcntd () * 8), ++ z0 = svld1uw_u64 (p0, x0 - svcntd () * 8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1uw_u64_m9: ++** dech x0, all, mul #9 ++** ld1w z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1uw_u64_m9, svuint64_t, uint32_t, ++ z0 = svld1uw_u64 (p0, x0 - svcntd () * 9), ++ z0 = svld1uw_u64 (p0, x0 - svcntd () * 9)) ++ ++/* ++** ld1uw_vnum_u64_0: ++** ld1w z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1uw_vnum_u64_0, svuint64_t, uint32_t, ++ z0 = svld1uw_vnum_u64 (p0, x0, 0), ++ z0 = svld1uw_vnum_u64 (p0, x0, 0)) ++ ++/* ++** ld1uw_vnum_u64_1: ++** ld1w z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1uw_vnum_u64_1, svuint64_t, uint32_t, ++ z0 = svld1uw_vnum_u64 (p0, x0, 1), ++ z0 = svld1uw_vnum_u64 (p0, x0, 1)) ++ ++/* ++** ld1uw_vnum_u64_7: ++** ld1w z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1uw_vnum_u64_7, svuint64_t, uint32_t, ++ z0 = svld1uw_vnum_u64 (p0, x0, 7), ++ z0 = svld1uw_vnum_u64 (p0, x0, 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1uw_vnum_u64_8: ++** incb x0, all, mul #4 ++** ld1w z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1uw_vnum_u64_8, svuint64_t, uint32_t, ++ z0 = svld1uw_vnum_u64 (p0, x0, 8), ++ z0 = svld1uw_vnum_u64 (p0, x0, 8)) ++ ++/* ++** ld1uw_vnum_u64_m1: ++** ld1w z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1uw_vnum_u64_m1, svuint64_t, uint32_t, ++ z0 = svld1uw_vnum_u64 (p0, x0, -1), ++ z0 = svld1uw_vnum_u64 (p0, x0, -1)) ++ ++/* ++** ld1uw_vnum_u64_m8: ++** ld1w z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld1uw_vnum_u64_m8, svuint64_t, uint32_t, ++ z0 = svld1uw_vnum_u64 (p0, x0, -8), ++ z0 = svld1uw_vnum_u64 (p0, x0, -8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld1uw_vnum_u64_m9: ++** dech x0, all, mul #9 ++** ld1w z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld1uw_vnum_u64_m9, svuint64_t, uint32_t, ++ z0 = svld1uw_vnum_u64 (p0, x0, -9), ++ z0 = svld1uw_vnum_u64 (p0, x0, -9)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ld1uw_vnum_u64_x1: ++** cnth (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ld1w z0\.d, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ld1uw_vnum_u64_x1, svuint64_t, uint32_t, ++ z0 = svld1uw_vnum_u64 (p0, x0, x1), ++ z0 = svld1uw_vnum_u64 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_bf16.c +new file mode 100644 +index 000000000..5d08c1e6e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_bf16.c +@@ -0,0 +1,200 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld2_bf16_base: ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_bf16_base, svbfloat16x2_t, bfloat16_t, ++ z0 = svld2_bf16 (p0, x0), ++ z0 = svld2 (p0, x0)) ++ ++/* ++** ld2_bf16_index: ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_LOAD (ld2_bf16_index, svbfloat16x2_t, bfloat16_t, ++ z0 = svld2_bf16 (p0, x0 + x1), ++ z0 = svld2 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_bf16_1: ++** incb x0 ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_bf16_1, svbfloat16x2_t, bfloat16_t, ++ z0 = svld2_bf16 (p0, x0 + svcnth ()), ++ z0 = svld2 (p0, x0 + svcnth ())) ++ ++/* ++** ld2_bf16_2: ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #2, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_bf16_2, svbfloat16x2_t, bfloat16_t, ++ z0 = svld2_bf16 (p0, x0 + svcnth () * 2), ++ z0 = svld2 (p0, x0 + svcnth () * 2)) ++ ++/* ++** ld2_bf16_14: ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #14, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_bf16_14, svbfloat16x2_t, bfloat16_t, ++ z0 = svld2_bf16 (p0, x0 + svcnth () * 14), ++ z0 = svld2 (p0, x0 + svcnth () * 14)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_bf16_16: ++** incb x0, all, mul #16 ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_bf16_16, svbfloat16x2_t, bfloat16_t, ++ z0 = svld2_bf16 (p0, x0 + svcnth () * 16), ++ z0 = svld2 (p0, x0 + svcnth () * 16)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_bf16_m1: ++** decb x0 ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_bf16_m1, svbfloat16x2_t, bfloat16_t, ++ z0 = svld2_bf16 (p0, x0 - svcnth ()), ++ z0 = svld2 (p0, x0 - svcnth ())) ++ ++/* ++** ld2_bf16_m2: ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #-2, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_bf16_m2, svbfloat16x2_t, bfloat16_t, ++ z0 = svld2_bf16 (p0, x0 - svcnth () * 2), ++ z0 = svld2 (p0, x0 - svcnth () * 2)) ++ ++/* ++** ld2_bf16_m16: ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #-16, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_bf16_m16, svbfloat16x2_t, bfloat16_t, ++ z0 = svld2_bf16 (p0, x0 - svcnth () * 16), ++ z0 = svld2 (p0, x0 - svcnth () * 16)) ++ ++/* ++** ld2_bf16_m18: ++** addvl (x[0-9]+), x0, #-18 ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld2_bf16_m18, svbfloat16x2_t, bfloat16_t, ++ z0 = svld2_bf16 (p0, x0 - svcnth () * 18), ++ z0 = svld2 (p0, x0 - svcnth () * 18)) ++ ++/* ++** ld2_vnum_bf16_0: ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_bf16_0, svbfloat16x2_t, bfloat16_t, ++ z0 = svld2_vnum_bf16 (p0, x0, 0), ++ z0 = svld2_vnum (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_vnum_bf16_1: ++** incb x0 ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_bf16_1, svbfloat16x2_t, bfloat16_t, ++ z0 = svld2_vnum_bf16 (p0, x0, 1), ++ z0 = svld2_vnum (p0, x0, 1)) ++ ++/* ++** ld2_vnum_bf16_2: ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #2, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_bf16_2, svbfloat16x2_t, bfloat16_t, ++ z0 = svld2_vnum_bf16 (p0, x0, 2), ++ z0 = svld2_vnum (p0, x0, 2)) ++ ++/* ++** ld2_vnum_bf16_14: ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #14, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_bf16_14, svbfloat16x2_t, bfloat16_t, ++ z0 = svld2_vnum_bf16 (p0, x0, 14), ++ z0 = svld2_vnum (p0, x0, 14)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_vnum_bf16_16: ++** incb x0, all, mul #16 ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_bf16_16, svbfloat16x2_t, bfloat16_t, ++ z0 = svld2_vnum_bf16 (p0, x0, 16), ++ z0 = svld2_vnum (p0, x0, 16)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_vnum_bf16_m1: ++** decb x0 ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_bf16_m1, svbfloat16x2_t, bfloat16_t, ++ z0 = svld2_vnum_bf16 (p0, x0, -1), ++ z0 = svld2_vnum (p0, x0, -1)) ++ ++/* ++** ld2_vnum_bf16_m2: ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #-2, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_bf16_m2, svbfloat16x2_t, bfloat16_t, ++ z0 = svld2_vnum_bf16 (p0, x0, -2), ++ z0 = svld2_vnum (p0, x0, -2)) ++ ++/* ++** ld2_vnum_bf16_m16: ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #-16, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_bf16_m16, svbfloat16x2_t, bfloat16_t, ++ z0 = svld2_vnum_bf16 (p0, x0, -16), ++ z0 = svld2_vnum (p0, x0, -16)) ++ ++/* ++** ld2_vnum_bf16_m18: ++** addvl (x[0-9]+), x0, #-18 ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_bf16_m18, svbfloat16x2_t, bfloat16_t, ++ z0 = svld2_vnum_bf16 (p0, x0, -18), ++ z0 = svld2_vnum (p0, x0, -18)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ld2_vnum_bf16_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_bf16_x1, svbfloat16x2_t, bfloat16_t, ++ z0 = svld2_vnum_bf16 (p0, x0, x1), ++ z0 = svld2_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_f16.c +new file mode 100644 +index 000000000..43392b2b2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_f16.c +@@ -0,0 +1,200 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld2_f16_base: ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_f16_base, svfloat16x2_t, float16_t, ++ z0 = svld2_f16 (p0, x0), ++ z0 = svld2 (p0, x0)) ++ ++/* ++** ld2_f16_index: ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_LOAD (ld2_f16_index, svfloat16x2_t, float16_t, ++ z0 = svld2_f16 (p0, x0 + x1), ++ z0 = svld2 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_f16_1: ++** incb x0 ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_f16_1, svfloat16x2_t, float16_t, ++ z0 = svld2_f16 (p0, x0 + svcnth ()), ++ z0 = svld2 (p0, x0 + svcnth ())) ++ ++/* ++** ld2_f16_2: ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #2, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_f16_2, svfloat16x2_t, float16_t, ++ z0 = svld2_f16 (p0, x0 + svcnth () * 2), ++ z0 = svld2 (p0, x0 + svcnth () * 2)) ++ ++/* ++** ld2_f16_14: ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #14, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_f16_14, svfloat16x2_t, float16_t, ++ z0 = svld2_f16 (p0, x0 + svcnth () * 14), ++ z0 = svld2 (p0, x0 + svcnth () * 14)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_f16_16: ++** incb x0, all, mul #16 ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_f16_16, svfloat16x2_t, float16_t, ++ z0 = svld2_f16 (p0, x0 + svcnth () * 16), ++ z0 = svld2 (p0, x0 + svcnth () * 16)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_f16_m1: ++** decb x0 ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_f16_m1, svfloat16x2_t, float16_t, ++ z0 = svld2_f16 (p0, x0 - svcnth ()), ++ z0 = svld2 (p0, x0 - svcnth ())) ++ ++/* ++** ld2_f16_m2: ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #-2, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_f16_m2, svfloat16x2_t, float16_t, ++ z0 = svld2_f16 (p0, x0 - svcnth () * 2), ++ z0 = svld2 (p0, x0 - svcnth () * 2)) ++ ++/* ++** ld2_f16_m16: ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #-16, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_f16_m16, svfloat16x2_t, float16_t, ++ z0 = svld2_f16 (p0, x0 - svcnth () * 16), ++ z0 = svld2 (p0, x0 - svcnth () * 16)) ++ ++/* ++** ld2_f16_m18: ++** addvl (x[0-9]+), x0, #-18 ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld2_f16_m18, svfloat16x2_t, float16_t, ++ z0 = svld2_f16 (p0, x0 - svcnth () * 18), ++ z0 = svld2 (p0, x0 - svcnth () * 18)) ++ ++/* ++** ld2_vnum_f16_0: ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_f16_0, svfloat16x2_t, float16_t, ++ z0 = svld2_vnum_f16 (p0, x0, 0), ++ z0 = svld2_vnum (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_vnum_f16_1: ++** incb x0 ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_f16_1, svfloat16x2_t, float16_t, ++ z0 = svld2_vnum_f16 (p0, x0, 1), ++ z0 = svld2_vnum (p0, x0, 1)) ++ ++/* ++** ld2_vnum_f16_2: ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #2, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_f16_2, svfloat16x2_t, float16_t, ++ z0 = svld2_vnum_f16 (p0, x0, 2), ++ z0 = svld2_vnum (p0, x0, 2)) ++ ++/* ++** ld2_vnum_f16_14: ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #14, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_f16_14, svfloat16x2_t, float16_t, ++ z0 = svld2_vnum_f16 (p0, x0, 14), ++ z0 = svld2_vnum (p0, x0, 14)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_vnum_f16_16: ++** incb x0, all, mul #16 ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_f16_16, svfloat16x2_t, float16_t, ++ z0 = svld2_vnum_f16 (p0, x0, 16), ++ z0 = svld2_vnum (p0, x0, 16)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_vnum_f16_m1: ++** decb x0 ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_f16_m1, svfloat16x2_t, float16_t, ++ z0 = svld2_vnum_f16 (p0, x0, -1), ++ z0 = svld2_vnum (p0, x0, -1)) ++ ++/* ++** ld2_vnum_f16_m2: ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #-2, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_f16_m2, svfloat16x2_t, float16_t, ++ z0 = svld2_vnum_f16 (p0, x0, -2), ++ z0 = svld2_vnum (p0, x0, -2)) ++ ++/* ++** ld2_vnum_f16_m16: ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #-16, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_f16_m16, svfloat16x2_t, float16_t, ++ z0 = svld2_vnum_f16 (p0, x0, -16), ++ z0 = svld2_vnum (p0, x0, -16)) ++ ++/* ++** ld2_vnum_f16_m18: ++** addvl (x[0-9]+), x0, #-18 ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_f16_m18, svfloat16x2_t, float16_t, ++ z0 = svld2_vnum_f16 (p0, x0, -18), ++ z0 = svld2_vnum (p0, x0, -18)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ld2_vnum_f16_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_f16_x1, svfloat16x2_t, float16_t, ++ z0 = svld2_vnum_f16 (p0, x0, x1), ++ z0 = svld2_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_f32.c +new file mode 100644 +index 000000000..379145e0c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_f32.c +@@ -0,0 +1,200 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld2_f32_base: ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_f32_base, svfloat32x2_t, float32_t, ++ z0 = svld2_f32 (p0, x0), ++ z0 = svld2 (p0, x0)) ++ ++/* ++** ld2_f32_index: ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, x1, lsl 2\] ++** ret ++*/ ++TEST_LOAD (ld2_f32_index, svfloat32x2_t, float32_t, ++ z0 = svld2_f32 (p0, x0 + x1), ++ z0 = svld2 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_f32_1: ++** incb x0 ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_f32_1, svfloat32x2_t, float32_t, ++ z0 = svld2_f32 (p0, x0 + svcntw ()), ++ z0 = svld2 (p0, x0 + svcntw ())) ++ ++/* ++** ld2_f32_2: ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, #2, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_f32_2, svfloat32x2_t, float32_t, ++ z0 = svld2_f32 (p0, x0 + svcntw () * 2), ++ z0 = svld2 (p0, x0 + svcntw () * 2)) ++ ++/* ++** ld2_f32_14: ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, #14, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_f32_14, svfloat32x2_t, float32_t, ++ z0 = svld2_f32 (p0, x0 + svcntw () * 14), ++ z0 = svld2 (p0, x0 + svcntw () * 14)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_f32_16: ++** incb x0, all, mul #16 ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_f32_16, svfloat32x2_t, float32_t, ++ z0 = svld2_f32 (p0, x0 + svcntw () * 16), ++ z0 = svld2 (p0, x0 + svcntw () * 16)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_f32_m1: ++** decb x0 ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_f32_m1, svfloat32x2_t, float32_t, ++ z0 = svld2_f32 (p0, x0 - svcntw ()), ++ z0 = svld2 (p0, x0 - svcntw ())) ++ ++/* ++** ld2_f32_m2: ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, #-2, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_f32_m2, svfloat32x2_t, float32_t, ++ z0 = svld2_f32 (p0, x0 - svcntw () * 2), ++ z0 = svld2 (p0, x0 - svcntw () * 2)) ++ ++/* ++** ld2_f32_m16: ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, #-16, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_f32_m16, svfloat32x2_t, float32_t, ++ z0 = svld2_f32 (p0, x0 - svcntw () * 16), ++ z0 = svld2 (p0, x0 - svcntw () * 16)) ++ ++/* ++** ld2_f32_m18: ++** addvl (x[0-9]+), x0, #-18 ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld2_f32_m18, svfloat32x2_t, float32_t, ++ z0 = svld2_f32 (p0, x0 - svcntw () * 18), ++ z0 = svld2 (p0, x0 - svcntw () * 18)) ++ ++/* ++** ld2_vnum_f32_0: ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_f32_0, svfloat32x2_t, float32_t, ++ z0 = svld2_vnum_f32 (p0, x0, 0), ++ z0 = svld2_vnum (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_vnum_f32_1: ++** incb x0 ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_f32_1, svfloat32x2_t, float32_t, ++ z0 = svld2_vnum_f32 (p0, x0, 1), ++ z0 = svld2_vnum (p0, x0, 1)) ++ ++/* ++** ld2_vnum_f32_2: ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, #2, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_f32_2, svfloat32x2_t, float32_t, ++ z0 = svld2_vnum_f32 (p0, x0, 2), ++ z0 = svld2_vnum (p0, x0, 2)) ++ ++/* ++** ld2_vnum_f32_14: ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, #14, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_f32_14, svfloat32x2_t, float32_t, ++ z0 = svld2_vnum_f32 (p0, x0, 14), ++ z0 = svld2_vnum (p0, x0, 14)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_vnum_f32_16: ++** incb x0, all, mul #16 ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_f32_16, svfloat32x2_t, float32_t, ++ z0 = svld2_vnum_f32 (p0, x0, 16), ++ z0 = svld2_vnum (p0, x0, 16)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_vnum_f32_m1: ++** decb x0 ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_f32_m1, svfloat32x2_t, float32_t, ++ z0 = svld2_vnum_f32 (p0, x0, -1), ++ z0 = svld2_vnum (p0, x0, -1)) ++ ++/* ++** ld2_vnum_f32_m2: ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, #-2, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_f32_m2, svfloat32x2_t, float32_t, ++ z0 = svld2_vnum_f32 (p0, x0, -2), ++ z0 = svld2_vnum (p0, x0, -2)) ++ ++/* ++** ld2_vnum_f32_m16: ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, #-16, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_f32_m16, svfloat32x2_t, float32_t, ++ z0 = svld2_vnum_f32 (p0, x0, -16), ++ z0 = svld2_vnum (p0, x0, -16)) ++ ++/* ++** ld2_vnum_f32_m18: ++** addvl (x[0-9]+), x0, #-18 ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_f32_m18, svfloat32x2_t, float32_t, ++ z0 = svld2_vnum_f32 (p0, x0, -18), ++ z0 = svld2_vnum (p0, x0, -18)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ld2_vnum_f32_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_f32_x1, svfloat32x2_t, float32_t, ++ z0 = svld2_vnum_f32 (p0, x0, x1), ++ z0 = svld2_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_f64.c +new file mode 100644 +index 000000000..1911612c6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_f64.c +@@ -0,0 +1,200 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld2_f64_base: ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_f64_base, svfloat64x2_t, float64_t, ++ z0 = svld2_f64 (p0, x0), ++ z0 = svld2 (p0, x0)) ++ ++/* ++** ld2_f64_index: ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, x1, lsl 3\] ++** ret ++*/ ++TEST_LOAD (ld2_f64_index, svfloat64x2_t, float64_t, ++ z0 = svld2_f64 (p0, x0 + x1), ++ z0 = svld2 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_f64_1: ++** incb x0 ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_f64_1, svfloat64x2_t, float64_t, ++ z0 = svld2_f64 (p0, x0 + svcntd ()), ++ z0 = svld2 (p0, x0 + svcntd ())) ++ ++/* ++** ld2_f64_2: ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, #2, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_f64_2, svfloat64x2_t, float64_t, ++ z0 = svld2_f64 (p0, x0 + svcntd () * 2), ++ z0 = svld2 (p0, x0 + svcntd () * 2)) ++ ++/* ++** ld2_f64_14: ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, #14, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_f64_14, svfloat64x2_t, float64_t, ++ z0 = svld2_f64 (p0, x0 + svcntd () * 14), ++ z0 = svld2 (p0, x0 + svcntd () * 14)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_f64_16: ++** incb x0, all, mul #16 ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_f64_16, svfloat64x2_t, float64_t, ++ z0 = svld2_f64 (p0, x0 + svcntd () * 16), ++ z0 = svld2 (p0, x0 + svcntd () * 16)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_f64_m1: ++** decb x0 ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_f64_m1, svfloat64x2_t, float64_t, ++ z0 = svld2_f64 (p0, x0 - svcntd ()), ++ z0 = svld2 (p0, x0 - svcntd ())) ++ ++/* ++** ld2_f64_m2: ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, #-2, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_f64_m2, svfloat64x2_t, float64_t, ++ z0 = svld2_f64 (p0, x0 - svcntd () * 2), ++ z0 = svld2 (p0, x0 - svcntd () * 2)) ++ ++/* ++** ld2_f64_m16: ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, #-16, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_f64_m16, svfloat64x2_t, float64_t, ++ z0 = svld2_f64 (p0, x0 - svcntd () * 16), ++ z0 = svld2 (p0, x0 - svcntd () * 16)) ++ ++/* ++** ld2_f64_m18: ++** addvl (x[0-9]+), x0, #-18 ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld2_f64_m18, svfloat64x2_t, float64_t, ++ z0 = svld2_f64 (p0, x0 - svcntd () * 18), ++ z0 = svld2 (p0, x0 - svcntd () * 18)) ++ ++/* ++** ld2_vnum_f64_0: ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_f64_0, svfloat64x2_t, float64_t, ++ z0 = svld2_vnum_f64 (p0, x0, 0), ++ z0 = svld2_vnum (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_vnum_f64_1: ++** incb x0 ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_f64_1, svfloat64x2_t, float64_t, ++ z0 = svld2_vnum_f64 (p0, x0, 1), ++ z0 = svld2_vnum (p0, x0, 1)) ++ ++/* ++** ld2_vnum_f64_2: ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, #2, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_f64_2, svfloat64x2_t, float64_t, ++ z0 = svld2_vnum_f64 (p0, x0, 2), ++ z0 = svld2_vnum (p0, x0, 2)) ++ ++/* ++** ld2_vnum_f64_14: ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, #14, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_f64_14, svfloat64x2_t, float64_t, ++ z0 = svld2_vnum_f64 (p0, x0, 14), ++ z0 = svld2_vnum (p0, x0, 14)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_vnum_f64_16: ++** incb x0, all, mul #16 ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_f64_16, svfloat64x2_t, float64_t, ++ z0 = svld2_vnum_f64 (p0, x0, 16), ++ z0 = svld2_vnum (p0, x0, 16)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_vnum_f64_m1: ++** decb x0 ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_f64_m1, svfloat64x2_t, float64_t, ++ z0 = svld2_vnum_f64 (p0, x0, -1), ++ z0 = svld2_vnum (p0, x0, -1)) ++ ++/* ++** ld2_vnum_f64_m2: ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, #-2, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_f64_m2, svfloat64x2_t, float64_t, ++ z0 = svld2_vnum_f64 (p0, x0, -2), ++ z0 = svld2_vnum (p0, x0, -2)) ++ ++/* ++** ld2_vnum_f64_m16: ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, #-16, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_f64_m16, svfloat64x2_t, float64_t, ++ z0 = svld2_vnum_f64 (p0, x0, -16), ++ z0 = svld2_vnum (p0, x0, -16)) ++ ++/* ++** ld2_vnum_f64_m18: ++** addvl (x[0-9]+), x0, #-18 ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_f64_m18, svfloat64x2_t, float64_t, ++ z0 = svld2_vnum_f64 (p0, x0, -18), ++ z0 = svld2_vnum (p0, x0, -18)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ld2_vnum_f64_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_f64_x1, svfloat64x2_t, float64_t, ++ z0 = svld2_vnum_f64 (p0, x0, x1), ++ z0 = svld2_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_s16.c +new file mode 100644 +index 000000000..90677d837 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_s16.c +@@ -0,0 +1,200 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld2_s16_base: ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_s16_base, svint16x2_t, int16_t, ++ z0 = svld2_s16 (p0, x0), ++ z0 = svld2 (p0, x0)) ++ ++/* ++** ld2_s16_index: ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_LOAD (ld2_s16_index, svint16x2_t, int16_t, ++ z0 = svld2_s16 (p0, x0 + x1), ++ z0 = svld2 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_s16_1: ++** incb x0 ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_s16_1, svint16x2_t, int16_t, ++ z0 = svld2_s16 (p0, x0 + svcnth ()), ++ z0 = svld2 (p0, x0 + svcnth ())) ++ ++/* ++** ld2_s16_2: ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #2, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_s16_2, svint16x2_t, int16_t, ++ z0 = svld2_s16 (p0, x0 + svcnth () * 2), ++ z0 = svld2 (p0, x0 + svcnth () * 2)) ++ ++/* ++** ld2_s16_14: ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #14, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_s16_14, svint16x2_t, int16_t, ++ z0 = svld2_s16 (p0, x0 + svcnth () * 14), ++ z0 = svld2 (p0, x0 + svcnth () * 14)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_s16_16: ++** incb x0, all, mul #16 ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_s16_16, svint16x2_t, int16_t, ++ z0 = svld2_s16 (p0, x0 + svcnth () * 16), ++ z0 = svld2 (p0, x0 + svcnth () * 16)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_s16_m1: ++** decb x0 ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_s16_m1, svint16x2_t, int16_t, ++ z0 = svld2_s16 (p0, x0 - svcnth ()), ++ z0 = svld2 (p0, x0 - svcnth ())) ++ ++/* ++** ld2_s16_m2: ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #-2, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_s16_m2, svint16x2_t, int16_t, ++ z0 = svld2_s16 (p0, x0 - svcnth () * 2), ++ z0 = svld2 (p0, x0 - svcnth () * 2)) ++ ++/* ++** ld2_s16_m16: ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #-16, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_s16_m16, svint16x2_t, int16_t, ++ z0 = svld2_s16 (p0, x0 - svcnth () * 16), ++ z0 = svld2 (p0, x0 - svcnth () * 16)) ++ ++/* ++** ld2_s16_m18: ++** addvl (x[0-9]+), x0, #-18 ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld2_s16_m18, svint16x2_t, int16_t, ++ z0 = svld2_s16 (p0, x0 - svcnth () * 18), ++ z0 = svld2 (p0, x0 - svcnth () * 18)) ++ ++/* ++** ld2_vnum_s16_0: ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_s16_0, svint16x2_t, int16_t, ++ z0 = svld2_vnum_s16 (p0, x0, 0), ++ z0 = svld2_vnum (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_vnum_s16_1: ++** incb x0 ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_s16_1, svint16x2_t, int16_t, ++ z0 = svld2_vnum_s16 (p0, x0, 1), ++ z0 = svld2_vnum (p0, x0, 1)) ++ ++/* ++** ld2_vnum_s16_2: ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #2, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_s16_2, svint16x2_t, int16_t, ++ z0 = svld2_vnum_s16 (p0, x0, 2), ++ z0 = svld2_vnum (p0, x0, 2)) ++ ++/* ++** ld2_vnum_s16_14: ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #14, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_s16_14, svint16x2_t, int16_t, ++ z0 = svld2_vnum_s16 (p0, x0, 14), ++ z0 = svld2_vnum (p0, x0, 14)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_vnum_s16_16: ++** incb x0, all, mul #16 ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_s16_16, svint16x2_t, int16_t, ++ z0 = svld2_vnum_s16 (p0, x0, 16), ++ z0 = svld2_vnum (p0, x0, 16)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_vnum_s16_m1: ++** decb x0 ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_s16_m1, svint16x2_t, int16_t, ++ z0 = svld2_vnum_s16 (p0, x0, -1), ++ z0 = svld2_vnum (p0, x0, -1)) ++ ++/* ++** ld2_vnum_s16_m2: ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #-2, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_s16_m2, svint16x2_t, int16_t, ++ z0 = svld2_vnum_s16 (p0, x0, -2), ++ z0 = svld2_vnum (p0, x0, -2)) ++ ++/* ++** ld2_vnum_s16_m16: ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #-16, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_s16_m16, svint16x2_t, int16_t, ++ z0 = svld2_vnum_s16 (p0, x0, -16), ++ z0 = svld2_vnum (p0, x0, -16)) ++ ++/* ++** ld2_vnum_s16_m18: ++** addvl (x[0-9]+), x0, #-18 ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_s16_m18, svint16x2_t, int16_t, ++ z0 = svld2_vnum_s16 (p0, x0, -18), ++ z0 = svld2_vnum (p0, x0, -18)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ld2_vnum_s16_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_s16_x1, svint16x2_t, int16_t, ++ z0 = svld2_vnum_s16 (p0, x0, x1), ++ z0 = svld2_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_s32.c +new file mode 100644 +index 000000000..10913c2d0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_s32.c +@@ -0,0 +1,200 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld2_s32_base: ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_s32_base, svint32x2_t, int32_t, ++ z0 = svld2_s32 (p0, x0), ++ z0 = svld2 (p0, x0)) ++ ++/* ++** ld2_s32_index: ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, x1, lsl 2\] ++** ret ++*/ ++TEST_LOAD (ld2_s32_index, svint32x2_t, int32_t, ++ z0 = svld2_s32 (p0, x0 + x1), ++ z0 = svld2 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_s32_1: ++** incb x0 ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_s32_1, svint32x2_t, int32_t, ++ z0 = svld2_s32 (p0, x0 + svcntw ()), ++ z0 = svld2 (p0, x0 + svcntw ())) ++ ++/* ++** ld2_s32_2: ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, #2, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_s32_2, svint32x2_t, int32_t, ++ z0 = svld2_s32 (p0, x0 + svcntw () * 2), ++ z0 = svld2 (p0, x0 + svcntw () * 2)) ++ ++/* ++** ld2_s32_14: ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, #14, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_s32_14, svint32x2_t, int32_t, ++ z0 = svld2_s32 (p0, x0 + svcntw () * 14), ++ z0 = svld2 (p0, x0 + svcntw () * 14)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_s32_16: ++** incb x0, all, mul #16 ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_s32_16, svint32x2_t, int32_t, ++ z0 = svld2_s32 (p0, x0 + svcntw () * 16), ++ z0 = svld2 (p0, x0 + svcntw () * 16)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_s32_m1: ++** decb x0 ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_s32_m1, svint32x2_t, int32_t, ++ z0 = svld2_s32 (p0, x0 - svcntw ()), ++ z0 = svld2 (p0, x0 - svcntw ())) ++ ++/* ++** ld2_s32_m2: ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, #-2, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_s32_m2, svint32x2_t, int32_t, ++ z0 = svld2_s32 (p0, x0 - svcntw () * 2), ++ z0 = svld2 (p0, x0 - svcntw () * 2)) ++ ++/* ++** ld2_s32_m16: ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, #-16, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_s32_m16, svint32x2_t, int32_t, ++ z0 = svld2_s32 (p0, x0 - svcntw () * 16), ++ z0 = svld2 (p0, x0 - svcntw () * 16)) ++ ++/* ++** ld2_s32_m18: ++** addvl (x[0-9]+), x0, #-18 ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld2_s32_m18, svint32x2_t, int32_t, ++ z0 = svld2_s32 (p0, x0 - svcntw () * 18), ++ z0 = svld2 (p0, x0 - svcntw () * 18)) ++ ++/* ++** ld2_vnum_s32_0: ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_s32_0, svint32x2_t, int32_t, ++ z0 = svld2_vnum_s32 (p0, x0, 0), ++ z0 = svld2_vnum (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_vnum_s32_1: ++** incb x0 ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_s32_1, svint32x2_t, int32_t, ++ z0 = svld2_vnum_s32 (p0, x0, 1), ++ z0 = svld2_vnum (p0, x0, 1)) ++ ++/* ++** ld2_vnum_s32_2: ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, #2, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_s32_2, svint32x2_t, int32_t, ++ z0 = svld2_vnum_s32 (p0, x0, 2), ++ z0 = svld2_vnum (p0, x0, 2)) ++ ++/* ++** ld2_vnum_s32_14: ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, #14, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_s32_14, svint32x2_t, int32_t, ++ z0 = svld2_vnum_s32 (p0, x0, 14), ++ z0 = svld2_vnum (p0, x0, 14)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_vnum_s32_16: ++** incb x0, all, mul #16 ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_s32_16, svint32x2_t, int32_t, ++ z0 = svld2_vnum_s32 (p0, x0, 16), ++ z0 = svld2_vnum (p0, x0, 16)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_vnum_s32_m1: ++** decb x0 ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_s32_m1, svint32x2_t, int32_t, ++ z0 = svld2_vnum_s32 (p0, x0, -1), ++ z0 = svld2_vnum (p0, x0, -1)) ++ ++/* ++** ld2_vnum_s32_m2: ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, #-2, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_s32_m2, svint32x2_t, int32_t, ++ z0 = svld2_vnum_s32 (p0, x0, -2), ++ z0 = svld2_vnum (p0, x0, -2)) ++ ++/* ++** ld2_vnum_s32_m16: ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, #-16, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_s32_m16, svint32x2_t, int32_t, ++ z0 = svld2_vnum_s32 (p0, x0, -16), ++ z0 = svld2_vnum (p0, x0, -16)) ++ ++/* ++** ld2_vnum_s32_m18: ++** addvl (x[0-9]+), x0, #-18 ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_s32_m18, svint32x2_t, int32_t, ++ z0 = svld2_vnum_s32 (p0, x0, -18), ++ z0 = svld2_vnum (p0, x0, -18)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ld2_vnum_s32_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_s32_x1, svint32x2_t, int32_t, ++ z0 = svld2_vnum_s32 (p0, x0, x1), ++ z0 = svld2_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_s64.c +new file mode 100644 +index 000000000..9a43e86d5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_s64.c +@@ -0,0 +1,200 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld2_s64_base: ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_s64_base, svint64x2_t, int64_t, ++ z0 = svld2_s64 (p0, x0), ++ z0 = svld2 (p0, x0)) ++ ++/* ++** ld2_s64_index: ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, x1, lsl 3\] ++** ret ++*/ ++TEST_LOAD (ld2_s64_index, svint64x2_t, int64_t, ++ z0 = svld2_s64 (p0, x0 + x1), ++ z0 = svld2 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_s64_1: ++** incb x0 ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_s64_1, svint64x2_t, int64_t, ++ z0 = svld2_s64 (p0, x0 + svcntd ()), ++ z0 = svld2 (p0, x0 + svcntd ())) ++ ++/* ++** ld2_s64_2: ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, #2, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_s64_2, svint64x2_t, int64_t, ++ z0 = svld2_s64 (p0, x0 + svcntd () * 2), ++ z0 = svld2 (p0, x0 + svcntd () * 2)) ++ ++/* ++** ld2_s64_14: ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, #14, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_s64_14, svint64x2_t, int64_t, ++ z0 = svld2_s64 (p0, x0 + svcntd () * 14), ++ z0 = svld2 (p0, x0 + svcntd () * 14)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_s64_16: ++** incb x0, all, mul #16 ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_s64_16, svint64x2_t, int64_t, ++ z0 = svld2_s64 (p0, x0 + svcntd () * 16), ++ z0 = svld2 (p0, x0 + svcntd () * 16)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_s64_m1: ++** decb x0 ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_s64_m1, svint64x2_t, int64_t, ++ z0 = svld2_s64 (p0, x0 - svcntd ()), ++ z0 = svld2 (p0, x0 - svcntd ())) ++ ++/* ++** ld2_s64_m2: ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, #-2, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_s64_m2, svint64x2_t, int64_t, ++ z0 = svld2_s64 (p0, x0 - svcntd () * 2), ++ z0 = svld2 (p0, x0 - svcntd () * 2)) ++ ++/* ++** ld2_s64_m16: ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, #-16, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_s64_m16, svint64x2_t, int64_t, ++ z0 = svld2_s64 (p0, x0 - svcntd () * 16), ++ z0 = svld2 (p0, x0 - svcntd () * 16)) ++ ++/* ++** ld2_s64_m18: ++** addvl (x[0-9]+), x0, #-18 ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld2_s64_m18, svint64x2_t, int64_t, ++ z0 = svld2_s64 (p0, x0 - svcntd () * 18), ++ z0 = svld2 (p0, x0 - svcntd () * 18)) ++ ++/* ++** ld2_vnum_s64_0: ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_s64_0, svint64x2_t, int64_t, ++ z0 = svld2_vnum_s64 (p0, x0, 0), ++ z0 = svld2_vnum (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_vnum_s64_1: ++** incb x0 ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_s64_1, svint64x2_t, int64_t, ++ z0 = svld2_vnum_s64 (p0, x0, 1), ++ z0 = svld2_vnum (p0, x0, 1)) ++ ++/* ++** ld2_vnum_s64_2: ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, #2, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_s64_2, svint64x2_t, int64_t, ++ z0 = svld2_vnum_s64 (p0, x0, 2), ++ z0 = svld2_vnum (p0, x0, 2)) ++ ++/* ++** ld2_vnum_s64_14: ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, #14, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_s64_14, svint64x2_t, int64_t, ++ z0 = svld2_vnum_s64 (p0, x0, 14), ++ z0 = svld2_vnum (p0, x0, 14)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_vnum_s64_16: ++** incb x0, all, mul #16 ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_s64_16, svint64x2_t, int64_t, ++ z0 = svld2_vnum_s64 (p0, x0, 16), ++ z0 = svld2_vnum (p0, x0, 16)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_vnum_s64_m1: ++** decb x0 ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_s64_m1, svint64x2_t, int64_t, ++ z0 = svld2_vnum_s64 (p0, x0, -1), ++ z0 = svld2_vnum (p0, x0, -1)) ++ ++/* ++** ld2_vnum_s64_m2: ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, #-2, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_s64_m2, svint64x2_t, int64_t, ++ z0 = svld2_vnum_s64 (p0, x0, -2), ++ z0 = svld2_vnum (p0, x0, -2)) ++ ++/* ++** ld2_vnum_s64_m16: ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, #-16, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_s64_m16, svint64x2_t, int64_t, ++ z0 = svld2_vnum_s64 (p0, x0, -16), ++ z0 = svld2_vnum (p0, x0, -16)) ++ ++/* ++** ld2_vnum_s64_m18: ++** addvl (x[0-9]+), x0, #-18 ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_s64_m18, svint64x2_t, int64_t, ++ z0 = svld2_vnum_s64 (p0, x0, -18), ++ z0 = svld2_vnum (p0, x0, -18)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ld2_vnum_s64_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_s64_x1, svint64x2_t, int64_t, ++ z0 = svld2_vnum_s64 (p0, x0, x1), ++ z0 = svld2_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_s8.c +new file mode 100644 +index 000000000..af5c04c66 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_s8.c +@@ -0,0 +1,204 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld2_s8_base: ++** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_s8_base, svint8x2_t, int8_t, ++ z0 = svld2_s8 (p0, x0), ++ z0 = svld2 (p0, x0)) ++ ++/* ++** ld2_s8_index: ++** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0, x1\] ++** ret ++*/ ++TEST_LOAD (ld2_s8_index, svint8x2_t, int8_t, ++ z0 = svld2_s8 (p0, x0 + x1), ++ z0 = svld2 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_s8_1: ++** incb x0 ++** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_s8_1, svint8x2_t, int8_t, ++ z0 = svld2_s8 (p0, x0 + svcntb ()), ++ z0 = svld2 (p0, x0 + svcntb ())) ++ ++/* ++** ld2_s8_2: ++** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0, #2, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_s8_2, svint8x2_t, int8_t, ++ z0 = svld2_s8 (p0, x0 + svcntb () * 2), ++ z0 = svld2 (p0, x0 + svcntb () * 2)) ++ ++/* ++** ld2_s8_14: ++** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0, #14, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_s8_14, svint8x2_t, int8_t, ++ z0 = svld2_s8 (p0, x0 + svcntb () * 14), ++ z0 = svld2 (p0, x0 + svcntb () * 14)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_s8_16: ++** incb x0, all, mul #16 ++** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_s8_16, svint8x2_t, int8_t, ++ z0 = svld2_s8 (p0, x0 + svcntb () * 16), ++ z0 = svld2 (p0, x0 + svcntb () * 16)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_s8_m1: ++** decb x0 ++** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_s8_m1, svint8x2_t, int8_t, ++ z0 = svld2_s8 (p0, x0 - svcntb ()), ++ z0 = svld2 (p0, x0 - svcntb ())) ++ ++/* ++** ld2_s8_m2: ++** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0, #-2, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_s8_m2, svint8x2_t, int8_t, ++ z0 = svld2_s8 (p0, x0 - svcntb () * 2), ++ z0 = svld2 (p0, x0 - svcntb () * 2)) ++ ++/* ++** ld2_s8_m16: ++** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0, #-16, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_s8_m16, svint8x2_t, int8_t, ++ z0 = svld2_s8 (p0, x0 - svcntb () * 16), ++ z0 = svld2 (p0, x0 - svcntb () * 16)) ++ ++/* ++** ld2_s8_m18: ++** addvl (x[0-9]+), x0, #-18 ++** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld2_s8_m18, svint8x2_t, int8_t, ++ z0 = svld2_s8 (p0, x0 - svcntb () * 18), ++ z0 = svld2 (p0, x0 - svcntb () * 18)) ++ ++/* ++** ld2_vnum_s8_0: ++** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_s8_0, svint8x2_t, int8_t, ++ z0 = svld2_vnum_s8 (p0, x0, 0), ++ z0 = svld2_vnum (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_vnum_s8_1: ++** incb x0 ++** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_s8_1, svint8x2_t, int8_t, ++ z0 = svld2_vnum_s8 (p0, x0, 1), ++ z0 = svld2_vnum (p0, x0, 1)) ++ ++/* ++** ld2_vnum_s8_2: ++** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0, #2, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_s8_2, svint8x2_t, int8_t, ++ z0 = svld2_vnum_s8 (p0, x0, 2), ++ z0 = svld2_vnum (p0, x0, 2)) ++ ++/* ++** ld2_vnum_s8_14: ++** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0, #14, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_s8_14, svint8x2_t, int8_t, ++ z0 = svld2_vnum_s8 (p0, x0, 14), ++ z0 = svld2_vnum (p0, x0, 14)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_vnum_s8_16: ++** incb x0, all, mul #16 ++** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_s8_16, svint8x2_t, int8_t, ++ z0 = svld2_vnum_s8 (p0, x0, 16), ++ z0 = svld2_vnum (p0, x0, 16)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_vnum_s8_m1: ++** decb x0 ++** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_s8_m1, svint8x2_t, int8_t, ++ z0 = svld2_vnum_s8 (p0, x0, -1), ++ z0 = svld2_vnum (p0, x0, -1)) ++ ++/* ++** ld2_vnum_s8_m2: ++** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0, #-2, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_s8_m2, svint8x2_t, int8_t, ++ z0 = svld2_vnum_s8 (p0, x0, -2), ++ z0 = svld2_vnum (p0, x0, -2)) ++ ++/* ++** ld2_vnum_s8_m16: ++** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0, #-16, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_s8_m16, svint8x2_t, int8_t, ++ z0 = svld2_vnum_s8 (p0, x0, -16), ++ z0 = svld2_vnum (p0, x0, -16)) ++ ++/* ++** ld2_vnum_s8_m18: ++** addvl (x[0-9]+), x0, #-18 ++** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_s8_m18, svint8x2_t, int8_t, ++ z0 = svld2_vnum_s8 (p0, x0, -18), ++ z0 = svld2_vnum (p0, x0, -18)) ++ ++/* ++** ld2_vnum_s8_x1: ++** cntb (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_LOAD (ld2_vnum_s8_x1, svint8x2_t, int8_t, ++ z0 = svld2_vnum_s8 (p0, x0, x1), ++ z0 = svld2_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_u16.c +new file mode 100644 +index 000000000..6c33322c1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_u16.c +@@ -0,0 +1,200 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld2_u16_base: ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_u16_base, svuint16x2_t, uint16_t, ++ z0 = svld2_u16 (p0, x0), ++ z0 = svld2 (p0, x0)) ++ ++/* ++** ld2_u16_index: ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_LOAD (ld2_u16_index, svuint16x2_t, uint16_t, ++ z0 = svld2_u16 (p0, x0 + x1), ++ z0 = svld2 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_u16_1: ++** incb x0 ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_u16_1, svuint16x2_t, uint16_t, ++ z0 = svld2_u16 (p0, x0 + svcnth ()), ++ z0 = svld2 (p0, x0 + svcnth ())) ++ ++/* ++** ld2_u16_2: ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #2, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_u16_2, svuint16x2_t, uint16_t, ++ z0 = svld2_u16 (p0, x0 + svcnth () * 2), ++ z0 = svld2 (p0, x0 + svcnth () * 2)) ++ ++/* ++** ld2_u16_14: ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #14, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_u16_14, svuint16x2_t, uint16_t, ++ z0 = svld2_u16 (p0, x0 + svcnth () * 14), ++ z0 = svld2 (p0, x0 + svcnth () * 14)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_u16_16: ++** incb x0, all, mul #16 ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_u16_16, svuint16x2_t, uint16_t, ++ z0 = svld2_u16 (p0, x0 + svcnth () * 16), ++ z0 = svld2 (p0, x0 + svcnth () * 16)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_u16_m1: ++** decb x0 ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_u16_m1, svuint16x2_t, uint16_t, ++ z0 = svld2_u16 (p0, x0 - svcnth ()), ++ z0 = svld2 (p0, x0 - svcnth ())) ++ ++/* ++** ld2_u16_m2: ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #-2, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_u16_m2, svuint16x2_t, uint16_t, ++ z0 = svld2_u16 (p0, x0 - svcnth () * 2), ++ z0 = svld2 (p0, x0 - svcnth () * 2)) ++ ++/* ++** ld2_u16_m16: ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #-16, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_u16_m16, svuint16x2_t, uint16_t, ++ z0 = svld2_u16 (p0, x0 - svcnth () * 16), ++ z0 = svld2 (p0, x0 - svcnth () * 16)) ++ ++/* ++** ld2_u16_m18: ++** addvl (x[0-9]+), x0, #-18 ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld2_u16_m18, svuint16x2_t, uint16_t, ++ z0 = svld2_u16 (p0, x0 - svcnth () * 18), ++ z0 = svld2 (p0, x0 - svcnth () * 18)) ++ ++/* ++** ld2_vnum_u16_0: ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_u16_0, svuint16x2_t, uint16_t, ++ z0 = svld2_vnum_u16 (p0, x0, 0), ++ z0 = svld2_vnum (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_vnum_u16_1: ++** incb x0 ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_u16_1, svuint16x2_t, uint16_t, ++ z0 = svld2_vnum_u16 (p0, x0, 1), ++ z0 = svld2_vnum (p0, x0, 1)) ++ ++/* ++** ld2_vnum_u16_2: ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #2, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_u16_2, svuint16x2_t, uint16_t, ++ z0 = svld2_vnum_u16 (p0, x0, 2), ++ z0 = svld2_vnum (p0, x0, 2)) ++ ++/* ++** ld2_vnum_u16_14: ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #14, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_u16_14, svuint16x2_t, uint16_t, ++ z0 = svld2_vnum_u16 (p0, x0, 14), ++ z0 = svld2_vnum (p0, x0, 14)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_vnum_u16_16: ++** incb x0, all, mul #16 ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_u16_16, svuint16x2_t, uint16_t, ++ z0 = svld2_vnum_u16 (p0, x0, 16), ++ z0 = svld2_vnum (p0, x0, 16)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_vnum_u16_m1: ++** decb x0 ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_u16_m1, svuint16x2_t, uint16_t, ++ z0 = svld2_vnum_u16 (p0, x0, -1), ++ z0 = svld2_vnum (p0, x0, -1)) ++ ++/* ++** ld2_vnum_u16_m2: ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #-2, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_u16_m2, svuint16x2_t, uint16_t, ++ z0 = svld2_vnum_u16 (p0, x0, -2), ++ z0 = svld2_vnum (p0, x0, -2)) ++ ++/* ++** ld2_vnum_u16_m16: ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #-16, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_u16_m16, svuint16x2_t, uint16_t, ++ z0 = svld2_vnum_u16 (p0, x0, -16), ++ z0 = svld2_vnum (p0, x0, -16)) ++ ++/* ++** ld2_vnum_u16_m18: ++** addvl (x[0-9]+), x0, #-18 ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_u16_m18, svuint16x2_t, uint16_t, ++ z0 = svld2_vnum_u16 (p0, x0, -18), ++ z0 = svld2_vnum (p0, x0, -18)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ld2_vnum_u16_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_u16_x1, svuint16x2_t, uint16_t, ++ z0 = svld2_vnum_u16 (p0, x0, x1), ++ z0 = svld2_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_u32.c +new file mode 100644 +index 000000000..84a23cf47 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_u32.c +@@ -0,0 +1,200 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld2_u32_base: ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_u32_base, svuint32x2_t, uint32_t, ++ z0 = svld2_u32 (p0, x0), ++ z0 = svld2 (p0, x0)) ++ ++/* ++** ld2_u32_index: ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, x1, lsl 2\] ++** ret ++*/ ++TEST_LOAD (ld2_u32_index, svuint32x2_t, uint32_t, ++ z0 = svld2_u32 (p0, x0 + x1), ++ z0 = svld2 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_u32_1: ++** incb x0 ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_u32_1, svuint32x2_t, uint32_t, ++ z0 = svld2_u32 (p0, x0 + svcntw ()), ++ z0 = svld2 (p0, x0 + svcntw ())) ++ ++/* ++** ld2_u32_2: ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, #2, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_u32_2, svuint32x2_t, uint32_t, ++ z0 = svld2_u32 (p0, x0 + svcntw () * 2), ++ z0 = svld2 (p0, x0 + svcntw () * 2)) ++ ++/* ++** ld2_u32_14: ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, #14, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_u32_14, svuint32x2_t, uint32_t, ++ z0 = svld2_u32 (p0, x0 + svcntw () * 14), ++ z0 = svld2 (p0, x0 + svcntw () * 14)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_u32_16: ++** incb x0, all, mul #16 ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_u32_16, svuint32x2_t, uint32_t, ++ z0 = svld2_u32 (p0, x0 + svcntw () * 16), ++ z0 = svld2 (p0, x0 + svcntw () * 16)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_u32_m1: ++** decb x0 ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_u32_m1, svuint32x2_t, uint32_t, ++ z0 = svld2_u32 (p0, x0 - svcntw ()), ++ z0 = svld2 (p0, x0 - svcntw ())) ++ ++/* ++** ld2_u32_m2: ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, #-2, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_u32_m2, svuint32x2_t, uint32_t, ++ z0 = svld2_u32 (p0, x0 - svcntw () * 2), ++ z0 = svld2 (p0, x0 - svcntw () * 2)) ++ ++/* ++** ld2_u32_m16: ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, #-16, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_u32_m16, svuint32x2_t, uint32_t, ++ z0 = svld2_u32 (p0, x0 - svcntw () * 16), ++ z0 = svld2 (p0, x0 - svcntw () * 16)) ++ ++/* ++** ld2_u32_m18: ++** addvl (x[0-9]+), x0, #-18 ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld2_u32_m18, svuint32x2_t, uint32_t, ++ z0 = svld2_u32 (p0, x0 - svcntw () * 18), ++ z0 = svld2 (p0, x0 - svcntw () * 18)) ++ ++/* ++** ld2_vnum_u32_0: ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_u32_0, svuint32x2_t, uint32_t, ++ z0 = svld2_vnum_u32 (p0, x0, 0), ++ z0 = svld2_vnum (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_vnum_u32_1: ++** incb x0 ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_u32_1, svuint32x2_t, uint32_t, ++ z0 = svld2_vnum_u32 (p0, x0, 1), ++ z0 = svld2_vnum (p0, x0, 1)) ++ ++/* ++** ld2_vnum_u32_2: ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, #2, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_u32_2, svuint32x2_t, uint32_t, ++ z0 = svld2_vnum_u32 (p0, x0, 2), ++ z0 = svld2_vnum (p0, x0, 2)) ++ ++/* ++** ld2_vnum_u32_14: ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, #14, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_u32_14, svuint32x2_t, uint32_t, ++ z0 = svld2_vnum_u32 (p0, x0, 14), ++ z0 = svld2_vnum (p0, x0, 14)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_vnum_u32_16: ++** incb x0, all, mul #16 ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_u32_16, svuint32x2_t, uint32_t, ++ z0 = svld2_vnum_u32 (p0, x0, 16), ++ z0 = svld2_vnum (p0, x0, 16)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_vnum_u32_m1: ++** decb x0 ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_u32_m1, svuint32x2_t, uint32_t, ++ z0 = svld2_vnum_u32 (p0, x0, -1), ++ z0 = svld2_vnum (p0, x0, -1)) ++ ++/* ++** ld2_vnum_u32_m2: ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, #-2, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_u32_m2, svuint32x2_t, uint32_t, ++ z0 = svld2_vnum_u32 (p0, x0, -2), ++ z0 = svld2_vnum (p0, x0, -2)) ++ ++/* ++** ld2_vnum_u32_m16: ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, #-16, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_u32_m16, svuint32x2_t, uint32_t, ++ z0 = svld2_vnum_u32 (p0, x0, -16), ++ z0 = svld2_vnum (p0, x0, -16)) ++ ++/* ++** ld2_vnum_u32_m18: ++** addvl (x[0-9]+), x0, #-18 ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_u32_m18, svuint32x2_t, uint32_t, ++ z0 = svld2_vnum_u32 (p0, x0, -18), ++ z0 = svld2_vnum (p0, x0, -18)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ld2_vnum_u32_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_u32_x1, svuint32x2_t, uint32_t, ++ z0 = svld2_vnum_u32 (p0, x0, x1), ++ z0 = svld2_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_u64.c +new file mode 100644 +index 000000000..350b05792 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_u64.c +@@ -0,0 +1,200 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld2_u64_base: ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_u64_base, svuint64x2_t, uint64_t, ++ z0 = svld2_u64 (p0, x0), ++ z0 = svld2 (p0, x0)) ++ ++/* ++** ld2_u64_index: ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, x1, lsl 3\] ++** ret ++*/ ++TEST_LOAD (ld2_u64_index, svuint64x2_t, uint64_t, ++ z0 = svld2_u64 (p0, x0 + x1), ++ z0 = svld2 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_u64_1: ++** incb x0 ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_u64_1, svuint64x2_t, uint64_t, ++ z0 = svld2_u64 (p0, x0 + svcntd ()), ++ z0 = svld2 (p0, x0 + svcntd ())) ++ ++/* ++** ld2_u64_2: ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, #2, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_u64_2, svuint64x2_t, uint64_t, ++ z0 = svld2_u64 (p0, x0 + svcntd () * 2), ++ z0 = svld2 (p0, x0 + svcntd () * 2)) ++ ++/* ++** ld2_u64_14: ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, #14, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_u64_14, svuint64x2_t, uint64_t, ++ z0 = svld2_u64 (p0, x0 + svcntd () * 14), ++ z0 = svld2 (p0, x0 + svcntd () * 14)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_u64_16: ++** incb x0, all, mul #16 ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_u64_16, svuint64x2_t, uint64_t, ++ z0 = svld2_u64 (p0, x0 + svcntd () * 16), ++ z0 = svld2 (p0, x0 + svcntd () * 16)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_u64_m1: ++** decb x0 ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_u64_m1, svuint64x2_t, uint64_t, ++ z0 = svld2_u64 (p0, x0 - svcntd ()), ++ z0 = svld2 (p0, x0 - svcntd ())) ++ ++/* ++** ld2_u64_m2: ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, #-2, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_u64_m2, svuint64x2_t, uint64_t, ++ z0 = svld2_u64 (p0, x0 - svcntd () * 2), ++ z0 = svld2 (p0, x0 - svcntd () * 2)) ++ ++/* ++** ld2_u64_m16: ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, #-16, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_u64_m16, svuint64x2_t, uint64_t, ++ z0 = svld2_u64 (p0, x0 - svcntd () * 16), ++ z0 = svld2 (p0, x0 - svcntd () * 16)) ++ ++/* ++** ld2_u64_m18: ++** addvl (x[0-9]+), x0, #-18 ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld2_u64_m18, svuint64x2_t, uint64_t, ++ z0 = svld2_u64 (p0, x0 - svcntd () * 18), ++ z0 = svld2 (p0, x0 - svcntd () * 18)) ++ ++/* ++** ld2_vnum_u64_0: ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_u64_0, svuint64x2_t, uint64_t, ++ z0 = svld2_vnum_u64 (p0, x0, 0), ++ z0 = svld2_vnum (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_vnum_u64_1: ++** incb x0 ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_u64_1, svuint64x2_t, uint64_t, ++ z0 = svld2_vnum_u64 (p0, x0, 1), ++ z0 = svld2_vnum (p0, x0, 1)) ++ ++/* ++** ld2_vnum_u64_2: ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, #2, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_u64_2, svuint64x2_t, uint64_t, ++ z0 = svld2_vnum_u64 (p0, x0, 2), ++ z0 = svld2_vnum (p0, x0, 2)) ++ ++/* ++** ld2_vnum_u64_14: ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, #14, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_u64_14, svuint64x2_t, uint64_t, ++ z0 = svld2_vnum_u64 (p0, x0, 14), ++ z0 = svld2_vnum (p0, x0, 14)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_vnum_u64_16: ++** incb x0, all, mul #16 ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_u64_16, svuint64x2_t, uint64_t, ++ z0 = svld2_vnum_u64 (p0, x0, 16), ++ z0 = svld2_vnum (p0, x0, 16)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_vnum_u64_m1: ++** decb x0 ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_u64_m1, svuint64x2_t, uint64_t, ++ z0 = svld2_vnum_u64 (p0, x0, -1), ++ z0 = svld2_vnum (p0, x0, -1)) ++ ++/* ++** ld2_vnum_u64_m2: ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, #-2, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_u64_m2, svuint64x2_t, uint64_t, ++ z0 = svld2_vnum_u64 (p0, x0, -2), ++ z0 = svld2_vnum (p0, x0, -2)) ++ ++/* ++** ld2_vnum_u64_m16: ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, #-16, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_u64_m16, svuint64x2_t, uint64_t, ++ z0 = svld2_vnum_u64 (p0, x0, -16), ++ z0 = svld2_vnum (p0, x0, -16)) ++ ++/* ++** ld2_vnum_u64_m18: ++** addvl (x[0-9]+), x0, #-18 ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_u64_m18, svuint64x2_t, uint64_t, ++ z0 = svld2_vnum_u64 (p0, x0, -18), ++ z0 = svld2_vnum (p0, x0, -18)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ld2_vnum_u64_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_u64_x1, svuint64x2_t, uint64_t, ++ z0 = svld2_vnum_u64 (p0, x0, x1), ++ z0 = svld2_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_u8.c +new file mode 100644 +index 000000000..e67634c4c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_u8.c +@@ -0,0 +1,204 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld2_u8_base: ++** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_u8_base, svuint8x2_t, uint8_t, ++ z0 = svld2_u8 (p0, x0), ++ z0 = svld2 (p0, x0)) ++ ++/* ++** ld2_u8_index: ++** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0, x1\] ++** ret ++*/ ++TEST_LOAD (ld2_u8_index, svuint8x2_t, uint8_t, ++ z0 = svld2_u8 (p0, x0 + x1), ++ z0 = svld2 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_u8_1: ++** incb x0 ++** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_u8_1, svuint8x2_t, uint8_t, ++ z0 = svld2_u8 (p0, x0 + svcntb ()), ++ z0 = svld2 (p0, x0 + svcntb ())) ++ ++/* ++** ld2_u8_2: ++** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0, #2, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_u8_2, svuint8x2_t, uint8_t, ++ z0 = svld2_u8 (p0, x0 + svcntb () * 2), ++ z0 = svld2 (p0, x0 + svcntb () * 2)) ++ ++/* ++** ld2_u8_14: ++** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0, #14, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_u8_14, svuint8x2_t, uint8_t, ++ z0 = svld2_u8 (p0, x0 + svcntb () * 14), ++ z0 = svld2 (p0, x0 + svcntb () * 14)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_u8_16: ++** incb x0, all, mul #16 ++** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_u8_16, svuint8x2_t, uint8_t, ++ z0 = svld2_u8 (p0, x0 + svcntb () * 16), ++ z0 = svld2 (p0, x0 + svcntb () * 16)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_u8_m1: ++** decb x0 ++** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_u8_m1, svuint8x2_t, uint8_t, ++ z0 = svld2_u8 (p0, x0 - svcntb ()), ++ z0 = svld2 (p0, x0 - svcntb ())) ++ ++/* ++** ld2_u8_m2: ++** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0, #-2, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_u8_m2, svuint8x2_t, uint8_t, ++ z0 = svld2_u8 (p0, x0 - svcntb () * 2), ++ z0 = svld2 (p0, x0 - svcntb () * 2)) ++ ++/* ++** ld2_u8_m16: ++** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0, #-16, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_u8_m16, svuint8x2_t, uint8_t, ++ z0 = svld2_u8 (p0, x0 - svcntb () * 16), ++ z0 = svld2 (p0, x0 - svcntb () * 16)) ++ ++/* ++** ld2_u8_m18: ++** addvl (x[0-9]+), x0, #-18 ++** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld2_u8_m18, svuint8x2_t, uint8_t, ++ z0 = svld2_u8 (p0, x0 - svcntb () * 18), ++ z0 = svld2 (p0, x0 - svcntb () * 18)) ++ ++/* ++** ld2_vnum_u8_0: ++** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_u8_0, svuint8x2_t, uint8_t, ++ z0 = svld2_vnum_u8 (p0, x0, 0), ++ z0 = svld2_vnum (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_vnum_u8_1: ++** incb x0 ++** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_u8_1, svuint8x2_t, uint8_t, ++ z0 = svld2_vnum_u8 (p0, x0, 1), ++ z0 = svld2_vnum (p0, x0, 1)) ++ ++/* ++** ld2_vnum_u8_2: ++** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0, #2, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_u8_2, svuint8x2_t, uint8_t, ++ z0 = svld2_vnum_u8 (p0, x0, 2), ++ z0 = svld2_vnum (p0, x0, 2)) ++ ++/* ++** ld2_vnum_u8_14: ++** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0, #14, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_u8_14, svuint8x2_t, uint8_t, ++ z0 = svld2_vnum_u8 (p0, x0, 14), ++ z0 = svld2_vnum (p0, x0, 14)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_vnum_u8_16: ++** incb x0, all, mul #16 ++** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_u8_16, svuint8x2_t, uint8_t, ++ z0 = svld2_vnum_u8 (p0, x0, 16), ++ z0 = svld2_vnum (p0, x0, 16)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld2_vnum_u8_m1: ++** decb x0 ++** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_u8_m1, svuint8x2_t, uint8_t, ++ z0 = svld2_vnum_u8 (p0, x0, -1), ++ z0 = svld2_vnum (p0, x0, -1)) ++ ++/* ++** ld2_vnum_u8_m2: ++** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0, #-2, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_u8_m2, svuint8x2_t, uint8_t, ++ z0 = svld2_vnum_u8 (p0, x0, -2), ++ z0 = svld2_vnum (p0, x0, -2)) ++ ++/* ++** ld2_vnum_u8_m16: ++** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0, #-16, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_u8_m16, svuint8x2_t, uint8_t, ++ z0 = svld2_vnum_u8 (p0, x0, -16), ++ z0 = svld2_vnum (p0, x0, -16)) ++ ++/* ++** ld2_vnum_u8_m18: ++** addvl (x[0-9]+), x0, #-18 ++** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld2_vnum_u8_m18, svuint8x2_t, uint8_t, ++ z0 = svld2_vnum_u8 (p0, x0, -18), ++ z0 = svld2_vnum (p0, x0, -18)) ++ ++/* ++** ld2_vnum_u8_x1: ++** cntb (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_LOAD (ld2_vnum_u8_x1, svuint8x2_t, uint8_t, ++ z0 = svld2_vnum_u8 (p0, x0, x1), ++ z0 = svld2_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_bf16.c +new file mode 100644 +index 000000000..e0b4fb1af +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_bf16.c +@@ -0,0 +1,242 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld3_bf16_base: ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_bf16_base, svbfloat16x3_t, bfloat16_t, ++ z0 = svld3_bf16 (p0, x0), ++ z0 = svld3 (p0, x0)) ++ ++/* ++** ld3_bf16_index: ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_LOAD (ld3_bf16_index, svbfloat16x3_t, bfloat16_t, ++ z0 = svld3_bf16 (p0, x0 + x1), ++ z0 = svld3 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_bf16_1: ++** incb x0 ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_bf16_1, svbfloat16x3_t, bfloat16_t, ++ z0 = svld3_bf16 (p0, x0 + svcnth ()), ++ z0 = svld3 (p0, x0 + svcnth ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_bf16_2: ++** incb x0, all, mul #2 ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_bf16_2, svbfloat16x3_t, bfloat16_t, ++ z0 = svld3_bf16 (p0, x0 + svcnth () * 2), ++ z0 = svld3 (p0, x0 + svcnth () * 2)) ++ ++/* ++** ld3_bf16_3: ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #3, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_bf16_3, svbfloat16x3_t, bfloat16_t, ++ z0 = svld3_bf16 (p0, x0 + svcnth () * 3), ++ z0 = svld3 (p0, x0 + svcnth () * 3)) ++ ++/* ++** ld3_bf16_21: ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #21, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_bf16_21, svbfloat16x3_t, bfloat16_t, ++ z0 = svld3_bf16 (p0, x0 + svcnth () * 21), ++ z0 = svld3 (p0, x0 + svcnth () * 21)) ++ ++/* ++** ld3_bf16_24: ++** addvl (x[0-9]+), x0, #24 ++** ld3h {z0\.h - z2\.h}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld3_bf16_24, svbfloat16x3_t, bfloat16_t, ++ z0 = svld3_bf16 (p0, x0 + svcnth () * 24), ++ z0 = svld3 (p0, x0 + svcnth () * 24)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_bf16_m1: ++** decb x0 ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_bf16_m1, svbfloat16x3_t, bfloat16_t, ++ z0 = svld3_bf16 (p0, x0 - svcnth ()), ++ z0 = svld3 (p0, x0 - svcnth ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_bf16_m2: ++** decb x0, all, mul #2 ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_bf16_m2, svbfloat16x3_t, bfloat16_t, ++ z0 = svld3_bf16 (p0, x0 - svcnth () * 2), ++ z0 = svld3 (p0, x0 - svcnth () * 2)) ++ ++/* ++** ld3_bf16_m3: ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #-3, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_bf16_m3, svbfloat16x3_t, bfloat16_t, ++ z0 = svld3_bf16 (p0, x0 - svcnth () * 3), ++ z0 = svld3 (p0, x0 - svcnth () * 3)) ++ ++/* ++** ld3_bf16_m24: ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #-24, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_bf16_m24, svbfloat16x3_t, bfloat16_t, ++ z0 = svld3_bf16 (p0, x0 - svcnth () * 24), ++ z0 = svld3 (p0, x0 - svcnth () * 24)) ++ ++/* ++** ld3_bf16_m27: ++** addvl (x[0-9]+), x0, #-27 ++** ld3h {z0\.h - z2\.h}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld3_bf16_m27, svbfloat16x3_t, bfloat16_t, ++ z0 = svld3_bf16 (p0, x0 - svcnth () * 27), ++ z0 = svld3 (p0, x0 - svcnth () * 27)) ++ ++/* ++** ld3_vnum_bf16_0: ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_bf16_0, svbfloat16x3_t, bfloat16_t, ++ z0 = svld3_vnum_bf16 (p0, x0, 0), ++ z0 = svld3_vnum (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_vnum_bf16_1: ++** incb x0 ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_bf16_1, svbfloat16x3_t, bfloat16_t, ++ z0 = svld3_vnum_bf16 (p0, x0, 1), ++ z0 = svld3_vnum (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_vnum_bf16_2: ++** incb x0, all, mul #2 ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_bf16_2, svbfloat16x3_t, bfloat16_t, ++ z0 = svld3_vnum_bf16 (p0, x0, 2), ++ z0 = svld3_vnum (p0, x0, 2)) ++ ++/* ++** ld3_vnum_bf16_3: ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #3, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_bf16_3, svbfloat16x3_t, bfloat16_t, ++ z0 = svld3_vnum_bf16 (p0, x0, 3), ++ z0 = svld3_vnum (p0, x0, 3)) ++ ++/* ++** ld3_vnum_bf16_21: ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #21, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_bf16_21, svbfloat16x3_t, bfloat16_t, ++ z0 = svld3_vnum_bf16 (p0, x0, 21), ++ z0 = svld3_vnum (p0, x0, 21)) ++ ++/* ++** ld3_vnum_bf16_24: ++** addvl (x[0-9]+), x0, #24 ++** ld3h {z0\.h - z2\.h}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_bf16_24, svbfloat16x3_t, bfloat16_t, ++ z0 = svld3_vnum_bf16 (p0, x0, 24), ++ z0 = svld3_vnum (p0, x0, 24)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_vnum_bf16_m1: ++** decb x0 ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_bf16_m1, svbfloat16x3_t, bfloat16_t, ++ z0 = svld3_vnum_bf16 (p0, x0, -1), ++ z0 = svld3_vnum (p0, x0, -1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_vnum_bf16_m2: ++** decb x0, all, mul #2 ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_bf16_m2, svbfloat16x3_t, bfloat16_t, ++ z0 = svld3_vnum_bf16 (p0, x0, -2), ++ z0 = svld3_vnum (p0, x0, -2)) ++ ++/* ++** ld3_vnum_bf16_m3: ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #-3, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_bf16_m3, svbfloat16x3_t, bfloat16_t, ++ z0 = svld3_vnum_bf16 (p0, x0, -3), ++ z0 = svld3_vnum (p0, x0, -3)) ++ ++/* ++** ld3_vnum_bf16_m24: ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #-24, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_bf16_m24, svbfloat16x3_t, bfloat16_t, ++ z0 = svld3_vnum_bf16 (p0, x0, -24), ++ z0 = svld3_vnum (p0, x0, -24)) ++ ++/* ++** ld3_vnum_bf16_m27: ++** addvl (x[0-9]+), x0, #-27 ++** ld3h {z0\.h - z2\.h}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_bf16_m27, svbfloat16x3_t, bfloat16_t, ++ z0 = svld3_vnum_bf16 (p0, x0, -27), ++ z0 = svld3_vnum (p0, x0, -27)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ld3_vnum_bf16_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ld3h {z0\.h - z2\.h}, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_bf16_x1, svbfloat16x3_t, bfloat16_t, ++ z0 = svld3_vnum_bf16 (p0, x0, x1), ++ z0 = svld3_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_f16.c +new file mode 100644 +index 000000000..3d7777e52 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_f16.c +@@ -0,0 +1,242 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld3_f16_base: ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_f16_base, svfloat16x3_t, float16_t, ++ z0 = svld3_f16 (p0, x0), ++ z0 = svld3 (p0, x0)) ++ ++/* ++** ld3_f16_index: ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_LOAD (ld3_f16_index, svfloat16x3_t, float16_t, ++ z0 = svld3_f16 (p0, x0 + x1), ++ z0 = svld3 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_f16_1: ++** incb x0 ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_f16_1, svfloat16x3_t, float16_t, ++ z0 = svld3_f16 (p0, x0 + svcnth ()), ++ z0 = svld3 (p0, x0 + svcnth ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_f16_2: ++** incb x0, all, mul #2 ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_f16_2, svfloat16x3_t, float16_t, ++ z0 = svld3_f16 (p0, x0 + svcnth () * 2), ++ z0 = svld3 (p0, x0 + svcnth () * 2)) ++ ++/* ++** ld3_f16_3: ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #3, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_f16_3, svfloat16x3_t, float16_t, ++ z0 = svld3_f16 (p0, x0 + svcnth () * 3), ++ z0 = svld3 (p0, x0 + svcnth () * 3)) ++ ++/* ++** ld3_f16_21: ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #21, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_f16_21, svfloat16x3_t, float16_t, ++ z0 = svld3_f16 (p0, x0 + svcnth () * 21), ++ z0 = svld3 (p0, x0 + svcnth () * 21)) ++ ++/* ++** ld3_f16_24: ++** addvl (x[0-9]+), x0, #24 ++** ld3h {z0\.h - z2\.h}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld3_f16_24, svfloat16x3_t, float16_t, ++ z0 = svld3_f16 (p0, x0 + svcnth () * 24), ++ z0 = svld3 (p0, x0 + svcnth () * 24)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_f16_m1: ++** decb x0 ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_f16_m1, svfloat16x3_t, float16_t, ++ z0 = svld3_f16 (p0, x0 - svcnth ()), ++ z0 = svld3 (p0, x0 - svcnth ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_f16_m2: ++** decb x0, all, mul #2 ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_f16_m2, svfloat16x3_t, float16_t, ++ z0 = svld3_f16 (p0, x0 - svcnth () * 2), ++ z0 = svld3 (p0, x0 - svcnth () * 2)) ++ ++/* ++** ld3_f16_m3: ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #-3, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_f16_m3, svfloat16x3_t, float16_t, ++ z0 = svld3_f16 (p0, x0 - svcnth () * 3), ++ z0 = svld3 (p0, x0 - svcnth () * 3)) ++ ++/* ++** ld3_f16_m24: ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #-24, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_f16_m24, svfloat16x3_t, float16_t, ++ z0 = svld3_f16 (p0, x0 - svcnth () * 24), ++ z0 = svld3 (p0, x0 - svcnth () * 24)) ++ ++/* ++** ld3_f16_m27: ++** addvl (x[0-9]+), x0, #-27 ++** ld3h {z0\.h - z2\.h}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld3_f16_m27, svfloat16x3_t, float16_t, ++ z0 = svld3_f16 (p0, x0 - svcnth () * 27), ++ z0 = svld3 (p0, x0 - svcnth () * 27)) ++ ++/* ++** ld3_vnum_f16_0: ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_f16_0, svfloat16x3_t, float16_t, ++ z0 = svld3_vnum_f16 (p0, x0, 0), ++ z0 = svld3_vnum (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_vnum_f16_1: ++** incb x0 ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_f16_1, svfloat16x3_t, float16_t, ++ z0 = svld3_vnum_f16 (p0, x0, 1), ++ z0 = svld3_vnum (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_vnum_f16_2: ++** incb x0, all, mul #2 ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_f16_2, svfloat16x3_t, float16_t, ++ z0 = svld3_vnum_f16 (p0, x0, 2), ++ z0 = svld3_vnum (p0, x0, 2)) ++ ++/* ++** ld3_vnum_f16_3: ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #3, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_f16_3, svfloat16x3_t, float16_t, ++ z0 = svld3_vnum_f16 (p0, x0, 3), ++ z0 = svld3_vnum (p0, x0, 3)) ++ ++/* ++** ld3_vnum_f16_21: ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #21, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_f16_21, svfloat16x3_t, float16_t, ++ z0 = svld3_vnum_f16 (p0, x0, 21), ++ z0 = svld3_vnum (p0, x0, 21)) ++ ++/* ++** ld3_vnum_f16_24: ++** addvl (x[0-9]+), x0, #24 ++** ld3h {z0\.h - z2\.h}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_f16_24, svfloat16x3_t, float16_t, ++ z0 = svld3_vnum_f16 (p0, x0, 24), ++ z0 = svld3_vnum (p0, x0, 24)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_vnum_f16_m1: ++** decb x0 ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_f16_m1, svfloat16x3_t, float16_t, ++ z0 = svld3_vnum_f16 (p0, x0, -1), ++ z0 = svld3_vnum (p0, x0, -1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_vnum_f16_m2: ++** decb x0, all, mul #2 ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_f16_m2, svfloat16x3_t, float16_t, ++ z0 = svld3_vnum_f16 (p0, x0, -2), ++ z0 = svld3_vnum (p0, x0, -2)) ++ ++/* ++** ld3_vnum_f16_m3: ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #-3, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_f16_m3, svfloat16x3_t, float16_t, ++ z0 = svld3_vnum_f16 (p0, x0, -3), ++ z0 = svld3_vnum (p0, x0, -3)) ++ ++/* ++** ld3_vnum_f16_m24: ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #-24, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_f16_m24, svfloat16x3_t, float16_t, ++ z0 = svld3_vnum_f16 (p0, x0, -24), ++ z0 = svld3_vnum (p0, x0, -24)) ++ ++/* ++** ld3_vnum_f16_m27: ++** addvl (x[0-9]+), x0, #-27 ++** ld3h {z0\.h - z2\.h}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_f16_m27, svfloat16x3_t, float16_t, ++ z0 = svld3_vnum_f16 (p0, x0, -27), ++ z0 = svld3_vnum (p0, x0, -27)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ld3_vnum_f16_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ld3h {z0\.h - z2\.h}, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_f16_x1, svfloat16x3_t, float16_t, ++ z0 = svld3_vnum_f16 (p0, x0, x1), ++ z0 = svld3_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_f32.c +new file mode 100644 +index 000000000..4e4ad7521 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_f32.c +@@ -0,0 +1,242 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld3_f32_base: ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_f32_base, svfloat32x3_t, float32_t, ++ z0 = svld3_f32 (p0, x0), ++ z0 = svld3 (p0, x0)) ++ ++/* ++** ld3_f32_index: ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0, x1, lsl 2\] ++** ret ++*/ ++TEST_LOAD (ld3_f32_index, svfloat32x3_t, float32_t, ++ z0 = svld3_f32 (p0, x0 + x1), ++ z0 = svld3 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_f32_1: ++** incb x0 ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_f32_1, svfloat32x3_t, float32_t, ++ z0 = svld3_f32 (p0, x0 + svcntw ()), ++ z0 = svld3 (p0, x0 + svcntw ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_f32_2: ++** incb x0, all, mul #2 ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_f32_2, svfloat32x3_t, float32_t, ++ z0 = svld3_f32 (p0, x0 + svcntw () * 2), ++ z0 = svld3 (p0, x0 + svcntw () * 2)) ++ ++/* ++** ld3_f32_3: ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0, #3, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_f32_3, svfloat32x3_t, float32_t, ++ z0 = svld3_f32 (p0, x0 + svcntw () * 3), ++ z0 = svld3 (p0, x0 + svcntw () * 3)) ++ ++/* ++** ld3_f32_21: ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0, #21, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_f32_21, svfloat32x3_t, float32_t, ++ z0 = svld3_f32 (p0, x0 + svcntw () * 21), ++ z0 = svld3 (p0, x0 + svcntw () * 21)) ++ ++/* ++** ld3_f32_24: ++** addvl (x[0-9]+), x0, #24 ++** ld3w {z0\.s - z2\.s}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld3_f32_24, svfloat32x3_t, float32_t, ++ z0 = svld3_f32 (p0, x0 + svcntw () * 24), ++ z0 = svld3 (p0, x0 + svcntw () * 24)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_f32_m1: ++** decb x0 ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_f32_m1, svfloat32x3_t, float32_t, ++ z0 = svld3_f32 (p0, x0 - svcntw ()), ++ z0 = svld3 (p0, x0 - svcntw ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_f32_m2: ++** decb x0, all, mul #2 ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_f32_m2, svfloat32x3_t, float32_t, ++ z0 = svld3_f32 (p0, x0 - svcntw () * 2), ++ z0 = svld3 (p0, x0 - svcntw () * 2)) ++ ++/* ++** ld3_f32_m3: ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0, #-3, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_f32_m3, svfloat32x3_t, float32_t, ++ z0 = svld3_f32 (p0, x0 - svcntw () * 3), ++ z0 = svld3 (p0, x0 - svcntw () * 3)) ++ ++/* ++** ld3_f32_m24: ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0, #-24, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_f32_m24, svfloat32x3_t, float32_t, ++ z0 = svld3_f32 (p0, x0 - svcntw () * 24), ++ z0 = svld3 (p0, x0 - svcntw () * 24)) ++ ++/* ++** ld3_f32_m27: ++** addvl (x[0-9]+), x0, #-27 ++** ld3w {z0\.s - z2\.s}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld3_f32_m27, svfloat32x3_t, float32_t, ++ z0 = svld3_f32 (p0, x0 - svcntw () * 27), ++ z0 = svld3 (p0, x0 - svcntw () * 27)) ++ ++/* ++** ld3_vnum_f32_0: ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_f32_0, svfloat32x3_t, float32_t, ++ z0 = svld3_vnum_f32 (p0, x0, 0), ++ z0 = svld3_vnum (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_vnum_f32_1: ++** incb x0 ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_f32_1, svfloat32x3_t, float32_t, ++ z0 = svld3_vnum_f32 (p0, x0, 1), ++ z0 = svld3_vnum (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_vnum_f32_2: ++** incb x0, all, mul #2 ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_f32_2, svfloat32x3_t, float32_t, ++ z0 = svld3_vnum_f32 (p0, x0, 2), ++ z0 = svld3_vnum (p0, x0, 2)) ++ ++/* ++** ld3_vnum_f32_3: ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0, #3, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_f32_3, svfloat32x3_t, float32_t, ++ z0 = svld3_vnum_f32 (p0, x0, 3), ++ z0 = svld3_vnum (p0, x0, 3)) ++ ++/* ++** ld3_vnum_f32_21: ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0, #21, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_f32_21, svfloat32x3_t, float32_t, ++ z0 = svld3_vnum_f32 (p0, x0, 21), ++ z0 = svld3_vnum (p0, x0, 21)) ++ ++/* ++** ld3_vnum_f32_24: ++** addvl (x[0-9]+), x0, #24 ++** ld3w {z0\.s - z2\.s}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_f32_24, svfloat32x3_t, float32_t, ++ z0 = svld3_vnum_f32 (p0, x0, 24), ++ z0 = svld3_vnum (p0, x0, 24)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_vnum_f32_m1: ++** decb x0 ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_f32_m1, svfloat32x3_t, float32_t, ++ z0 = svld3_vnum_f32 (p0, x0, -1), ++ z0 = svld3_vnum (p0, x0, -1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_vnum_f32_m2: ++** decb x0, all, mul #2 ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_f32_m2, svfloat32x3_t, float32_t, ++ z0 = svld3_vnum_f32 (p0, x0, -2), ++ z0 = svld3_vnum (p0, x0, -2)) ++ ++/* ++** ld3_vnum_f32_m3: ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0, #-3, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_f32_m3, svfloat32x3_t, float32_t, ++ z0 = svld3_vnum_f32 (p0, x0, -3), ++ z0 = svld3_vnum (p0, x0, -3)) ++ ++/* ++** ld3_vnum_f32_m24: ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0, #-24, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_f32_m24, svfloat32x3_t, float32_t, ++ z0 = svld3_vnum_f32 (p0, x0, -24), ++ z0 = svld3_vnum (p0, x0, -24)) ++ ++/* ++** ld3_vnum_f32_m27: ++** addvl (x[0-9]+), x0, #-27 ++** ld3w {z0\.s - z2\.s}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_f32_m27, svfloat32x3_t, float32_t, ++ z0 = svld3_vnum_f32 (p0, x0, -27), ++ z0 = svld3_vnum (p0, x0, -27)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ld3_vnum_f32_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ld3w {z0\.s - z2\.s}, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_f32_x1, svfloat32x3_t, float32_t, ++ z0 = svld3_vnum_f32 (p0, x0, x1), ++ z0 = svld3_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_f64.c +new file mode 100644 +index 000000000..7e6e1e749 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_f64.c +@@ -0,0 +1,242 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld3_f64_base: ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_f64_base, svfloat64x3_t, float64_t, ++ z0 = svld3_f64 (p0, x0), ++ z0 = svld3 (p0, x0)) ++ ++/* ++** ld3_f64_index: ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0, x1, lsl 3\] ++** ret ++*/ ++TEST_LOAD (ld3_f64_index, svfloat64x3_t, float64_t, ++ z0 = svld3_f64 (p0, x0 + x1), ++ z0 = svld3 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_f64_1: ++** incb x0 ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_f64_1, svfloat64x3_t, float64_t, ++ z0 = svld3_f64 (p0, x0 + svcntd ()), ++ z0 = svld3 (p0, x0 + svcntd ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_f64_2: ++** incb x0, all, mul #2 ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_f64_2, svfloat64x3_t, float64_t, ++ z0 = svld3_f64 (p0, x0 + svcntd () * 2), ++ z0 = svld3 (p0, x0 + svcntd () * 2)) ++ ++/* ++** ld3_f64_3: ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0, #3, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_f64_3, svfloat64x3_t, float64_t, ++ z0 = svld3_f64 (p0, x0 + svcntd () * 3), ++ z0 = svld3 (p0, x0 + svcntd () * 3)) ++ ++/* ++** ld3_f64_21: ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0, #21, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_f64_21, svfloat64x3_t, float64_t, ++ z0 = svld3_f64 (p0, x0 + svcntd () * 21), ++ z0 = svld3 (p0, x0 + svcntd () * 21)) ++ ++/* ++** ld3_f64_24: ++** addvl (x[0-9]+), x0, #24 ++** ld3d {z0\.d - z2\.d}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld3_f64_24, svfloat64x3_t, float64_t, ++ z0 = svld3_f64 (p0, x0 + svcntd () * 24), ++ z0 = svld3 (p0, x0 + svcntd () * 24)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_f64_m1: ++** decb x0 ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_f64_m1, svfloat64x3_t, float64_t, ++ z0 = svld3_f64 (p0, x0 - svcntd ()), ++ z0 = svld3 (p0, x0 - svcntd ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_f64_m2: ++** decb x0, all, mul #2 ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_f64_m2, svfloat64x3_t, float64_t, ++ z0 = svld3_f64 (p0, x0 - svcntd () * 2), ++ z0 = svld3 (p0, x0 - svcntd () * 2)) ++ ++/* ++** ld3_f64_m3: ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0, #-3, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_f64_m3, svfloat64x3_t, float64_t, ++ z0 = svld3_f64 (p0, x0 - svcntd () * 3), ++ z0 = svld3 (p0, x0 - svcntd () * 3)) ++ ++/* ++** ld3_f64_m24: ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0, #-24, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_f64_m24, svfloat64x3_t, float64_t, ++ z0 = svld3_f64 (p0, x0 - svcntd () * 24), ++ z0 = svld3 (p0, x0 - svcntd () * 24)) ++ ++/* ++** ld3_f64_m27: ++** addvl (x[0-9]+), x0, #-27 ++** ld3d {z0\.d - z2\.d}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld3_f64_m27, svfloat64x3_t, float64_t, ++ z0 = svld3_f64 (p0, x0 - svcntd () * 27), ++ z0 = svld3 (p0, x0 - svcntd () * 27)) ++ ++/* ++** ld3_vnum_f64_0: ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_f64_0, svfloat64x3_t, float64_t, ++ z0 = svld3_vnum_f64 (p0, x0, 0), ++ z0 = svld3_vnum (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_vnum_f64_1: ++** incb x0 ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_f64_1, svfloat64x3_t, float64_t, ++ z0 = svld3_vnum_f64 (p0, x0, 1), ++ z0 = svld3_vnum (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_vnum_f64_2: ++** incb x0, all, mul #2 ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_f64_2, svfloat64x3_t, float64_t, ++ z0 = svld3_vnum_f64 (p0, x0, 2), ++ z0 = svld3_vnum (p0, x0, 2)) ++ ++/* ++** ld3_vnum_f64_3: ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0, #3, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_f64_3, svfloat64x3_t, float64_t, ++ z0 = svld3_vnum_f64 (p0, x0, 3), ++ z0 = svld3_vnum (p0, x0, 3)) ++ ++/* ++** ld3_vnum_f64_21: ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0, #21, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_f64_21, svfloat64x3_t, float64_t, ++ z0 = svld3_vnum_f64 (p0, x0, 21), ++ z0 = svld3_vnum (p0, x0, 21)) ++ ++/* ++** ld3_vnum_f64_24: ++** addvl (x[0-9]+), x0, #24 ++** ld3d {z0\.d - z2\.d}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_f64_24, svfloat64x3_t, float64_t, ++ z0 = svld3_vnum_f64 (p0, x0, 24), ++ z0 = svld3_vnum (p0, x0, 24)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_vnum_f64_m1: ++** decb x0 ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_f64_m1, svfloat64x3_t, float64_t, ++ z0 = svld3_vnum_f64 (p0, x0, -1), ++ z0 = svld3_vnum (p0, x0, -1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_vnum_f64_m2: ++** decb x0, all, mul #2 ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_f64_m2, svfloat64x3_t, float64_t, ++ z0 = svld3_vnum_f64 (p0, x0, -2), ++ z0 = svld3_vnum (p0, x0, -2)) ++ ++/* ++** ld3_vnum_f64_m3: ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0, #-3, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_f64_m3, svfloat64x3_t, float64_t, ++ z0 = svld3_vnum_f64 (p0, x0, -3), ++ z0 = svld3_vnum (p0, x0, -3)) ++ ++/* ++** ld3_vnum_f64_m24: ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0, #-24, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_f64_m24, svfloat64x3_t, float64_t, ++ z0 = svld3_vnum_f64 (p0, x0, -24), ++ z0 = svld3_vnum (p0, x0, -24)) ++ ++/* ++** ld3_vnum_f64_m27: ++** addvl (x[0-9]+), x0, #-27 ++** ld3d {z0\.d - z2\.d}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_f64_m27, svfloat64x3_t, float64_t, ++ z0 = svld3_vnum_f64 (p0, x0, -27), ++ z0 = svld3_vnum (p0, x0, -27)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ld3_vnum_f64_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ld3d {z0\.d - z2\.d}, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_f64_x1, svfloat64x3_t, float64_t, ++ z0 = svld3_vnum_f64 (p0, x0, x1), ++ z0 = svld3_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_s16.c +new file mode 100644 +index 000000000..d4a046c64 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_s16.c +@@ -0,0 +1,242 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld3_s16_base: ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_s16_base, svint16x3_t, int16_t, ++ z0 = svld3_s16 (p0, x0), ++ z0 = svld3 (p0, x0)) ++ ++/* ++** ld3_s16_index: ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_LOAD (ld3_s16_index, svint16x3_t, int16_t, ++ z0 = svld3_s16 (p0, x0 + x1), ++ z0 = svld3 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_s16_1: ++** incb x0 ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_s16_1, svint16x3_t, int16_t, ++ z0 = svld3_s16 (p0, x0 + svcnth ()), ++ z0 = svld3 (p0, x0 + svcnth ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_s16_2: ++** incb x0, all, mul #2 ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_s16_2, svint16x3_t, int16_t, ++ z0 = svld3_s16 (p0, x0 + svcnth () * 2), ++ z0 = svld3 (p0, x0 + svcnth () * 2)) ++ ++/* ++** ld3_s16_3: ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #3, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_s16_3, svint16x3_t, int16_t, ++ z0 = svld3_s16 (p0, x0 + svcnth () * 3), ++ z0 = svld3 (p0, x0 + svcnth () * 3)) ++ ++/* ++** ld3_s16_21: ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #21, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_s16_21, svint16x3_t, int16_t, ++ z0 = svld3_s16 (p0, x0 + svcnth () * 21), ++ z0 = svld3 (p0, x0 + svcnth () * 21)) ++ ++/* ++** ld3_s16_24: ++** addvl (x[0-9]+), x0, #24 ++** ld3h {z0\.h - z2\.h}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld3_s16_24, svint16x3_t, int16_t, ++ z0 = svld3_s16 (p0, x0 + svcnth () * 24), ++ z0 = svld3 (p0, x0 + svcnth () * 24)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_s16_m1: ++** decb x0 ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_s16_m1, svint16x3_t, int16_t, ++ z0 = svld3_s16 (p0, x0 - svcnth ()), ++ z0 = svld3 (p0, x0 - svcnth ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_s16_m2: ++** decb x0, all, mul #2 ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_s16_m2, svint16x3_t, int16_t, ++ z0 = svld3_s16 (p0, x0 - svcnth () * 2), ++ z0 = svld3 (p0, x0 - svcnth () * 2)) ++ ++/* ++** ld3_s16_m3: ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #-3, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_s16_m3, svint16x3_t, int16_t, ++ z0 = svld3_s16 (p0, x0 - svcnth () * 3), ++ z0 = svld3 (p0, x0 - svcnth () * 3)) ++ ++/* ++** ld3_s16_m24: ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #-24, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_s16_m24, svint16x3_t, int16_t, ++ z0 = svld3_s16 (p0, x0 - svcnth () * 24), ++ z0 = svld3 (p0, x0 - svcnth () * 24)) ++ ++/* ++** ld3_s16_m27: ++** addvl (x[0-9]+), x0, #-27 ++** ld3h {z0\.h - z2\.h}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld3_s16_m27, svint16x3_t, int16_t, ++ z0 = svld3_s16 (p0, x0 - svcnth () * 27), ++ z0 = svld3 (p0, x0 - svcnth () * 27)) ++ ++/* ++** ld3_vnum_s16_0: ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_s16_0, svint16x3_t, int16_t, ++ z0 = svld3_vnum_s16 (p0, x0, 0), ++ z0 = svld3_vnum (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_vnum_s16_1: ++** incb x0 ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_s16_1, svint16x3_t, int16_t, ++ z0 = svld3_vnum_s16 (p0, x0, 1), ++ z0 = svld3_vnum (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_vnum_s16_2: ++** incb x0, all, mul #2 ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_s16_2, svint16x3_t, int16_t, ++ z0 = svld3_vnum_s16 (p0, x0, 2), ++ z0 = svld3_vnum (p0, x0, 2)) ++ ++/* ++** ld3_vnum_s16_3: ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #3, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_s16_3, svint16x3_t, int16_t, ++ z0 = svld3_vnum_s16 (p0, x0, 3), ++ z0 = svld3_vnum (p0, x0, 3)) ++ ++/* ++** ld3_vnum_s16_21: ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #21, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_s16_21, svint16x3_t, int16_t, ++ z0 = svld3_vnum_s16 (p0, x0, 21), ++ z0 = svld3_vnum (p0, x0, 21)) ++ ++/* ++** ld3_vnum_s16_24: ++** addvl (x[0-9]+), x0, #24 ++** ld3h {z0\.h - z2\.h}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_s16_24, svint16x3_t, int16_t, ++ z0 = svld3_vnum_s16 (p0, x0, 24), ++ z0 = svld3_vnum (p0, x0, 24)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_vnum_s16_m1: ++** decb x0 ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_s16_m1, svint16x3_t, int16_t, ++ z0 = svld3_vnum_s16 (p0, x0, -1), ++ z0 = svld3_vnum (p0, x0, -1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_vnum_s16_m2: ++** decb x0, all, mul #2 ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_s16_m2, svint16x3_t, int16_t, ++ z0 = svld3_vnum_s16 (p0, x0, -2), ++ z0 = svld3_vnum (p0, x0, -2)) ++ ++/* ++** ld3_vnum_s16_m3: ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #-3, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_s16_m3, svint16x3_t, int16_t, ++ z0 = svld3_vnum_s16 (p0, x0, -3), ++ z0 = svld3_vnum (p0, x0, -3)) ++ ++/* ++** ld3_vnum_s16_m24: ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #-24, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_s16_m24, svint16x3_t, int16_t, ++ z0 = svld3_vnum_s16 (p0, x0, -24), ++ z0 = svld3_vnum (p0, x0, -24)) ++ ++/* ++** ld3_vnum_s16_m27: ++** addvl (x[0-9]+), x0, #-27 ++** ld3h {z0\.h - z2\.h}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_s16_m27, svint16x3_t, int16_t, ++ z0 = svld3_vnum_s16 (p0, x0, -27), ++ z0 = svld3_vnum (p0, x0, -27)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ld3_vnum_s16_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ld3h {z0\.h - z2\.h}, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_s16_x1, svint16x3_t, int16_t, ++ z0 = svld3_vnum_s16 (p0, x0, x1), ++ z0 = svld3_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_s32.c +new file mode 100644 +index 000000000..3b0ba6e2a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_s32.c +@@ -0,0 +1,242 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld3_s32_base: ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_s32_base, svint32x3_t, int32_t, ++ z0 = svld3_s32 (p0, x0), ++ z0 = svld3 (p0, x0)) ++ ++/* ++** ld3_s32_index: ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0, x1, lsl 2\] ++** ret ++*/ ++TEST_LOAD (ld3_s32_index, svint32x3_t, int32_t, ++ z0 = svld3_s32 (p0, x0 + x1), ++ z0 = svld3 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_s32_1: ++** incb x0 ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_s32_1, svint32x3_t, int32_t, ++ z0 = svld3_s32 (p0, x0 + svcntw ()), ++ z0 = svld3 (p0, x0 + svcntw ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_s32_2: ++** incb x0, all, mul #2 ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_s32_2, svint32x3_t, int32_t, ++ z0 = svld3_s32 (p0, x0 + svcntw () * 2), ++ z0 = svld3 (p0, x0 + svcntw () * 2)) ++ ++/* ++** ld3_s32_3: ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0, #3, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_s32_3, svint32x3_t, int32_t, ++ z0 = svld3_s32 (p0, x0 + svcntw () * 3), ++ z0 = svld3 (p0, x0 + svcntw () * 3)) ++ ++/* ++** ld3_s32_21: ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0, #21, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_s32_21, svint32x3_t, int32_t, ++ z0 = svld3_s32 (p0, x0 + svcntw () * 21), ++ z0 = svld3 (p0, x0 + svcntw () * 21)) ++ ++/* ++** ld3_s32_24: ++** addvl (x[0-9]+), x0, #24 ++** ld3w {z0\.s - z2\.s}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld3_s32_24, svint32x3_t, int32_t, ++ z0 = svld3_s32 (p0, x0 + svcntw () * 24), ++ z0 = svld3 (p0, x0 + svcntw () * 24)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_s32_m1: ++** decb x0 ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_s32_m1, svint32x3_t, int32_t, ++ z0 = svld3_s32 (p0, x0 - svcntw ()), ++ z0 = svld3 (p0, x0 - svcntw ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_s32_m2: ++** decb x0, all, mul #2 ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_s32_m2, svint32x3_t, int32_t, ++ z0 = svld3_s32 (p0, x0 - svcntw () * 2), ++ z0 = svld3 (p0, x0 - svcntw () * 2)) ++ ++/* ++** ld3_s32_m3: ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0, #-3, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_s32_m3, svint32x3_t, int32_t, ++ z0 = svld3_s32 (p0, x0 - svcntw () * 3), ++ z0 = svld3 (p0, x0 - svcntw () * 3)) ++ ++/* ++** ld3_s32_m24: ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0, #-24, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_s32_m24, svint32x3_t, int32_t, ++ z0 = svld3_s32 (p0, x0 - svcntw () * 24), ++ z0 = svld3 (p0, x0 - svcntw () * 24)) ++ ++/* ++** ld3_s32_m27: ++** addvl (x[0-9]+), x0, #-27 ++** ld3w {z0\.s - z2\.s}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld3_s32_m27, svint32x3_t, int32_t, ++ z0 = svld3_s32 (p0, x0 - svcntw () * 27), ++ z0 = svld3 (p0, x0 - svcntw () * 27)) ++ ++/* ++** ld3_vnum_s32_0: ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_s32_0, svint32x3_t, int32_t, ++ z0 = svld3_vnum_s32 (p0, x0, 0), ++ z0 = svld3_vnum (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_vnum_s32_1: ++** incb x0 ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_s32_1, svint32x3_t, int32_t, ++ z0 = svld3_vnum_s32 (p0, x0, 1), ++ z0 = svld3_vnum (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_vnum_s32_2: ++** incb x0, all, mul #2 ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_s32_2, svint32x3_t, int32_t, ++ z0 = svld3_vnum_s32 (p0, x0, 2), ++ z0 = svld3_vnum (p0, x0, 2)) ++ ++/* ++** ld3_vnum_s32_3: ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0, #3, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_s32_3, svint32x3_t, int32_t, ++ z0 = svld3_vnum_s32 (p0, x0, 3), ++ z0 = svld3_vnum (p0, x0, 3)) ++ ++/* ++** ld3_vnum_s32_21: ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0, #21, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_s32_21, svint32x3_t, int32_t, ++ z0 = svld3_vnum_s32 (p0, x0, 21), ++ z0 = svld3_vnum (p0, x0, 21)) ++ ++/* ++** ld3_vnum_s32_24: ++** addvl (x[0-9]+), x0, #24 ++** ld3w {z0\.s - z2\.s}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_s32_24, svint32x3_t, int32_t, ++ z0 = svld3_vnum_s32 (p0, x0, 24), ++ z0 = svld3_vnum (p0, x0, 24)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_vnum_s32_m1: ++** decb x0 ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_s32_m1, svint32x3_t, int32_t, ++ z0 = svld3_vnum_s32 (p0, x0, -1), ++ z0 = svld3_vnum (p0, x0, -1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_vnum_s32_m2: ++** decb x0, all, mul #2 ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_s32_m2, svint32x3_t, int32_t, ++ z0 = svld3_vnum_s32 (p0, x0, -2), ++ z0 = svld3_vnum (p0, x0, -2)) ++ ++/* ++** ld3_vnum_s32_m3: ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0, #-3, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_s32_m3, svint32x3_t, int32_t, ++ z0 = svld3_vnum_s32 (p0, x0, -3), ++ z0 = svld3_vnum (p0, x0, -3)) ++ ++/* ++** ld3_vnum_s32_m24: ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0, #-24, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_s32_m24, svint32x3_t, int32_t, ++ z0 = svld3_vnum_s32 (p0, x0, -24), ++ z0 = svld3_vnum (p0, x0, -24)) ++ ++/* ++** ld3_vnum_s32_m27: ++** addvl (x[0-9]+), x0, #-27 ++** ld3w {z0\.s - z2\.s}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_s32_m27, svint32x3_t, int32_t, ++ z0 = svld3_vnum_s32 (p0, x0, -27), ++ z0 = svld3_vnum (p0, x0, -27)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ld3_vnum_s32_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ld3w {z0\.s - z2\.s}, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_s32_x1, svint32x3_t, int32_t, ++ z0 = svld3_vnum_s32 (p0, x0, x1), ++ z0 = svld3_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_s64.c +new file mode 100644 +index 000000000..080a10b8f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_s64.c +@@ -0,0 +1,242 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld3_s64_base: ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_s64_base, svint64x3_t, int64_t, ++ z0 = svld3_s64 (p0, x0), ++ z0 = svld3 (p0, x0)) ++ ++/* ++** ld3_s64_index: ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0, x1, lsl 3\] ++** ret ++*/ ++TEST_LOAD (ld3_s64_index, svint64x3_t, int64_t, ++ z0 = svld3_s64 (p0, x0 + x1), ++ z0 = svld3 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_s64_1: ++** incb x0 ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_s64_1, svint64x3_t, int64_t, ++ z0 = svld3_s64 (p0, x0 + svcntd ()), ++ z0 = svld3 (p0, x0 + svcntd ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_s64_2: ++** incb x0, all, mul #2 ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_s64_2, svint64x3_t, int64_t, ++ z0 = svld3_s64 (p0, x0 + svcntd () * 2), ++ z0 = svld3 (p0, x0 + svcntd () * 2)) ++ ++/* ++** ld3_s64_3: ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0, #3, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_s64_3, svint64x3_t, int64_t, ++ z0 = svld3_s64 (p0, x0 + svcntd () * 3), ++ z0 = svld3 (p0, x0 + svcntd () * 3)) ++ ++/* ++** ld3_s64_21: ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0, #21, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_s64_21, svint64x3_t, int64_t, ++ z0 = svld3_s64 (p0, x0 + svcntd () * 21), ++ z0 = svld3 (p0, x0 + svcntd () * 21)) ++ ++/* ++** ld3_s64_24: ++** addvl (x[0-9]+), x0, #24 ++** ld3d {z0\.d - z2\.d}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld3_s64_24, svint64x3_t, int64_t, ++ z0 = svld3_s64 (p0, x0 + svcntd () * 24), ++ z0 = svld3 (p0, x0 + svcntd () * 24)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_s64_m1: ++** decb x0 ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_s64_m1, svint64x3_t, int64_t, ++ z0 = svld3_s64 (p0, x0 - svcntd ()), ++ z0 = svld3 (p0, x0 - svcntd ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_s64_m2: ++** decb x0, all, mul #2 ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_s64_m2, svint64x3_t, int64_t, ++ z0 = svld3_s64 (p0, x0 - svcntd () * 2), ++ z0 = svld3 (p0, x0 - svcntd () * 2)) ++ ++/* ++** ld3_s64_m3: ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0, #-3, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_s64_m3, svint64x3_t, int64_t, ++ z0 = svld3_s64 (p0, x0 - svcntd () * 3), ++ z0 = svld3 (p0, x0 - svcntd () * 3)) ++ ++/* ++** ld3_s64_m24: ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0, #-24, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_s64_m24, svint64x3_t, int64_t, ++ z0 = svld3_s64 (p0, x0 - svcntd () * 24), ++ z0 = svld3 (p0, x0 - svcntd () * 24)) ++ ++/* ++** ld3_s64_m27: ++** addvl (x[0-9]+), x0, #-27 ++** ld3d {z0\.d - z2\.d}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld3_s64_m27, svint64x3_t, int64_t, ++ z0 = svld3_s64 (p0, x0 - svcntd () * 27), ++ z0 = svld3 (p0, x0 - svcntd () * 27)) ++ ++/* ++** ld3_vnum_s64_0: ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_s64_0, svint64x3_t, int64_t, ++ z0 = svld3_vnum_s64 (p0, x0, 0), ++ z0 = svld3_vnum (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_vnum_s64_1: ++** incb x0 ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_s64_1, svint64x3_t, int64_t, ++ z0 = svld3_vnum_s64 (p0, x0, 1), ++ z0 = svld3_vnum (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_vnum_s64_2: ++** incb x0, all, mul #2 ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_s64_2, svint64x3_t, int64_t, ++ z0 = svld3_vnum_s64 (p0, x0, 2), ++ z0 = svld3_vnum (p0, x0, 2)) ++ ++/* ++** ld3_vnum_s64_3: ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0, #3, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_s64_3, svint64x3_t, int64_t, ++ z0 = svld3_vnum_s64 (p0, x0, 3), ++ z0 = svld3_vnum (p0, x0, 3)) ++ ++/* ++** ld3_vnum_s64_21: ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0, #21, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_s64_21, svint64x3_t, int64_t, ++ z0 = svld3_vnum_s64 (p0, x0, 21), ++ z0 = svld3_vnum (p0, x0, 21)) ++ ++/* ++** ld3_vnum_s64_24: ++** addvl (x[0-9]+), x0, #24 ++** ld3d {z0\.d - z2\.d}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_s64_24, svint64x3_t, int64_t, ++ z0 = svld3_vnum_s64 (p0, x0, 24), ++ z0 = svld3_vnum (p0, x0, 24)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_vnum_s64_m1: ++** decb x0 ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_s64_m1, svint64x3_t, int64_t, ++ z0 = svld3_vnum_s64 (p0, x0, -1), ++ z0 = svld3_vnum (p0, x0, -1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_vnum_s64_m2: ++** decb x0, all, mul #2 ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_s64_m2, svint64x3_t, int64_t, ++ z0 = svld3_vnum_s64 (p0, x0, -2), ++ z0 = svld3_vnum (p0, x0, -2)) ++ ++/* ++** ld3_vnum_s64_m3: ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0, #-3, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_s64_m3, svint64x3_t, int64_t, ++ z0 = svld3_vnum_s64 (p0, x0, -3), ++ z0 = svld3_vnum (p0, x0, -3)) ++ ++/* ++** ld3_vnum_s64_m24: ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0, #-24, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_s64_m24, svint64x3_t, int64_t, ++ z0 = svld3_vnum_s64 (p0, x0, -24), ++ z0 = svld3_vnum (p0, x0, -24)) ++ ++/* ++** ld3_vnum_s64_m27: ++** addvl (x[0-9]+), x0, #-27 ++** ld3d {z0\.d - z2\.d}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_s64_m27, svint64x3_t, int64_t, ++ z0 = svld3_vnum_s64 (p0, x0, -27), ++ z0 = svld3_vnum (p0, x0, -27)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ld3_vnum_s64_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ld3d {z0\.d - z2\.d}, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_s64_x1, svint64x3_t, int64_t, ++ z0 = svld3_vnum_s64 (p0, x0, x1), ++ z0 = svld3_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_s8.c +new file mode 100644 +index 000000000..e0c551472 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_s8.c +@@ -0,0 +1,246 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld3_s8_base: ++** ld3b {z0\.b - z2\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_s8_base, svint8x3_t, int8_t, ++ z0 = svld3_s8 (p0, x0), ++ z0 = svld3 (p0, x0)) ++ ++/* ++** ld3_s8_index: ++** ld3b {z0\.b - z2\.b}, p0/z, \[x0, x1\] ++** ret ++*/ ++TEST_LOAD (ld3_s8_index, svint8x3_t, int8_t, ++ z0 = svld3_s8 (p0, x0 + x1), ++ z0 = svld3 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_s8_1: ++** incb x0 ++** ld3b {z0\.b - z2\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_s8_1, svint8x3_t, int8_t, ++ z0 = svld3_s8 (p0, x0 + svcntb ()), ++ z0 = svld3 (p0, x0 + svcntb ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_s8_2: ++** incb x0, all, mul #2 ++** ld3b {z0\.b - z2\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_s8_2, svint8x3_t, int8_t, ++ z0 = svld3_s8 (p0, x0 + svcntb () * 2), ++ z0 = svld3 (p0, x0 + svcntb () * 2)) ++ ++/* ++** ld3_s8_3: ++** ld3b {z0\.b - z2\.b}, p0/z, \[x0, #3, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_s8_3, svint8x3_t, int8_t, ++ z0 = svld3_s8 (p0, x0 + svcntb () * 3), ++ z0 = svld3 (p0, x0 + svcntb () * 3)) ++ ++/* ++** ld3_s8_21: ++** ld3b {z0\.b - z2\.b}, p0/z, \[x0, #21, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_s8_21, svint8x3_t, int8_t, ++ z0 = svld3_s8 (p0, x0 + svcntb () * 21), ++ z0 = svld3 (p0, x0 + svcntb () * 21)) ++ ++/* ++** ld3_s8_24: ++** addvl (x[0-9]+), x0, #24 ++** ld3b {z0\.b - z2\.b}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld3_s8_24, svint8x3_t, int8_t, ++ z0 = svld3_s8 (p0, x0 + svcntb () * 24), ++ z0 = svld3 (p0, x0 + svcntb () * 24)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_s8_m1: ++** decb x0 ++** ld3b {z0\.b - z2\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_s8_m1, svint8x3_t, int8_t, ++ z0 = svld3_s8 (p0, x0 - svcntb ()), ++ z0 = svld3 (p0, x0 - svcntb ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_s8_m2: ++** decb x0, all, mul #2 ++** ld3b {z0\.b - z2\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_s8_m2, svint8x3_t, int8_t, ++ z0 = svld3_s8 (p0, x0 - svcntb () * 2), ++ z0 = svld3 (p0, x0 - svcntb () * 2)) ++ ++/* ++** ld3_s8_m3: ++** ld3b {z0\.b - z2\.b}, p0/z, \[x0, #-3, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_s8_m3, svint8x3_t, int8_t, ++ z0 = svld3_s8 (p0, x0 - svcntb () * 3), ++ z0 = svld3 (p0, x0 - svcntb () * 3)) ++ ++/* ++** ld3_s8_m24: ++** ld3b {z0\.b - z2\.b}, p0/z, \[x0, #-24, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_s8_m24, svint8x3_t, int8_t, ++ z0 = svld3_s8 (p0, x0 - svcntb () * 24), ++ z0 = svld3 (p0, x0 - svcntb () * 24)) ++ ++/* ++** ld3_s8_m27: ++** addvl (x[0-9]+), x0, #-27 ++** ld3b {z0\.b - z2\.b}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld3_s8_m27, svint8x3_t, int8_t, ++ z0 = svld3_s8 (p0, x0 - svcntb () * 27), ++ z0 = svld3 (p0, x0 - svcntb () * 27)) ++ ++/* ++** ld3_vnum_s8_0: ++** ld3b {z0\.b - z2\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_s8_0, svint8x3_t, int8_t, ++ z0 = svld3_vnum_s8 (p0, x0, 0), ++ z0 = svld3_vnum (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_vnum_s8_1: ++** incb x0 ++** ld3b {z0\.b - z2\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_s8_1, svint8x3_t, int8_t, ++ z0 = svld3_vnum_s8 (p0, x0, 1), ++ z0 = svld3_vnum (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_vnum_s8_2: ++** incb x0, all, mul #2 ++** ld3b {z0\.b - z2\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_s8_2, svint8x3_t, int8_t, ++ z0 = svld3_vnum_s8 (p0, x0, 2), ++ z0 = svld3_vnum (p0, x0, 2)) ++ ++/* ++** ld3_vnum_s8_3: ++** ld3b {z0\.b - z2\.b}, p0/z, \[x0, #3, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_s8_3, svint8x3_t, int8_t, ++ z0 = svld3_vnum_s8 (p0, x0, 3), ++ z0 = svld3_vnum (p0, x0, 3)) ++ ++/* ++** ld3_vnum_s8_21: ++** ld3b {z0\.b - z2\.b}, p0/z, \[x0, #21, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_s8_21, svint8x3_t, int8_t, ++ z0 = svld3_vnum_s8 (p0, x0, 21), ++ z0 = svld3_vnum (p0, x0, 21)) ++ ++/* ++** ld3_vnum_s8_24: ++** addvl (x[0-9]+), x0, #24 ++** ld3b {z0\.b - z2\.b}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_s8_24, svint8x3_t, int8_t, ++ z0 = svld3_vnum_s8 (p0, x0, 24), ++ z0 = svld3_vnum (p0, x0, 24)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_vnum_s8_m1: ++** decb x0 ++** ld3b {z0\.b - z2\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_s8_m1, svint8x3_t, int8_t, ++ z0 = svld3_vnum_s8 (p0, x0, -1), ++ z0 = svld3_vnum (p0, x0, -1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_vnum_s8_m2: ++** decb x0, all, mul #2 ++** ld3b {z0\.b - z2\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_s8_m2, svint8x3_t, int8_t, ++ z0 = svld3_vnum_s8 (p0, x0, -2), ++ z0 = svld3_vnum (p0, x0, -2)) ++ ++/* ++** ld3_vnum_s8_m3: ++** ld3b {z0\.b - z2\.b}, p0/z, \[x0, #-3, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_s8_m3, svint8x3_t, int8_t, ++ z0 = svld3_vnum_s8 (p0, x0, -3), ++ z0 = svld3_vnum (p0, x0, -3)) ++ ++/* ++** ld3_vnum_s8_m24: ++** ld3b {z0\.b - z2\.b}, p0/z, \[x0, #-24, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_s8_m24, svint8x3_t, int8_t, ++ z0 = svld3_vnum_s8 (p0, x0, -24), ++ z0 = svld3_vnum (p0, x0, -24)) ++ ++/* ++** ld3_vnum_s8_m27: ++** addvl (x[0-9]+), x0, #-27 ++** ld3b {z0\.b - z2\.b}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_s8_m27, svint8x3_t, int8_t, ++ z0 = svld3_vnum_s8 (p0, x0, -27), ++ z0 = svld3_vnum (p0, x0, -27)) ++ ++/* ++** ld3_vnum_s8_x1: ++** cntb (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ld3b {z0\.b - z2\.b}, p0/z, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** ld3b {z0\.b - z2\.b}, p0/z, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_LOAD (ld3_vnum_s8_x1, svint8x3_t, int8_t, ++ z0 = svld3_vnum_s8 (p0, x0, x1), ++ z0 = svld3_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_u16.c +new file mode 100644 +index 000000000..12f6dd092 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_u16.c +@@ -0,0 +1,242 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld3_u16_base: ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_u16_base, svuint16x3_t, uint16_t, ++ z0 = svld3_u16 (p0, x0), ++ z0 = svld3 (p0, x0)) ++ ++/* ++** ld3_u16_index: ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_LOAD (ld3_u16_index, svuint16x3_t, uint16_t, ++ z0 = svld3_u16 (p0, x0 + x1), ++ z0 = svld3 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_u16_1: ++** incb x0 ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_u16_1, svuint16x3_t, uint16_t, ++ z0 = svld3_u16 (p0, x0 + svcnth ()), ++ z0 = svld3 (p0, x0 + svcnth ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_u16_2: ++** incb x0, all, mul #2 ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_u16_2, svuint16x3_t, uint16_t, ++ z0 = svld3_u16 (p0, x0 + svcnth () * 2), ++ z0 = svld3 (p0, x0 + svcnth () * 2)) ++ ++/* ++** ld3_u16_3: ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #3, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_u16_3, svuint16x3_t, uint16_t, ++ z0 = svld3_u16 (p0, x0 + svcnth () * 3), ++ z0 = svld3 (p0, x0 + svcnth () * 3)) ++ ++/* ++** ld3_u16_21: ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #21, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_u16_21, svuint16x3_t, uint16_t, ++ z0 = svld3_u16 (p0, x0 + svcnth () * 21), ++ z0 = svld3 (p0, x0 + svcnth () * 21)) ++ ++/* ++** ld3_u16_24: ++** addvl (x[0-9]+), x0, #24 ++** ld3h {z0\.h - z2\.h}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld3_u16_24, svuint16x3_t, uint16_t, ++ z0 = svld3_u16 (p0, x0 + svcnth () * 24), ++ z0 = svld3 (p0, x0 + svcnth () * 24)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_u16_m1: ++** decb x0 ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_u16_m1, svuint16x3_t, uint16_t, ++ z0 = svld3_u16 (p0, x0 - svcnth ()), ++ z0 = svld3 (p0, x0 - svcnth ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_u16_m2: ++** decb x0, all, mul #2 ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_u16_m2, svuint16x3_t, uint16_t, ++ z0 = svld3_u16 (p0, x0 - svcnth () * 2), ++ z0 = svld3 (p0, x0 - svcnth () * 2)) ++ ++/* ++** ld3_u16_m3: ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #-3, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_u16_m3, svuint16x3_t, uint16_t, ++ z0 = svld3_u16 (p0, x0 - svcnth () * 3), ++ z0 = svld3 (p0, x0 - svcnth () * 3)) ++ ++/* ++** ld3_u16_m24: ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #-24, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_u16_m24, svuint16x3_t, uint16_t, ++ z0 = svld3_u16 (p0, x0 - svcnth () * 24), ++ z0 = svld3 (p0, x0 - svcnth () * 24)) ++ ++/* ++** ld3_u16_m27: ++** addvl (x[0-9]+), x0, #-27 ++** ld3h {z0\.h - z2\.h}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld3_u16_m27, svuint16x3_t, uint16_t, ++ z0 = svld3_u16 (p0, x0 - svcnth () * 27), ++ z0 = svld3 (p0, x0 - svcnth () * 27)) ++ ++/* ++** ld3_vnum_u16_0: ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_u16_0, svuint16x3_t, uint16_t, ++ z0 = svld3_vnum_u16 (p0, x0, 0), ++ z0 = svld3_vnum (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_vnum_u16_1: ++** incb x0 ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_u16_1, svuint16x3_t, uint16_t, ++ z0 = svld3_vnum_u16 (p0, x0, 1), ++ z0 = svld3_vnum (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_vnum_u16_2: ++** incb x0, all, mul #2 ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_u16_2, svuint16x3_t, uint16_t, ++ z0 = svld3_vnum_u16 (p0, x0, 2), ++ z0 = svld3_vnum (p0, x0, 2)) ++ ++/* ++** ld3_vnum_u16_3: ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #3, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_u16_3, svuint16x3_t, uint16_t, ++ z0 = svld3_vnum_u16 (p0, x0, 3), ++ z0 = svld3_vnum (p0, x0, 3)) ++ ++/* ++** ld3_vnum_u16_21: ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #21, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_u16_21, svuint16x3_t, uint16_t, ++ z0 = svld3_vnum_u16 (p0, x0, 21), ++ z0 = svld3_vnum (p0, x0, 21)) ++ ++/* ++** ld3_vnum_u16_24: ++** addvl (x[0-9]+), x0, #24 ++** ld3h {z0\.h - z2\.h}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_u16_24, svuint16x3_t, uint16_t, ++ z0 = svld3_vnum_u16 (p0, x0, 24), ++ z0 = svld3_vnum (p0, x0, 24)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_vnum_u16_m1: ++** decb x0 ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_u16_m1, svuint16x3_t, uint16_t, ++ z0 = svld3_vnum_u16 (p0, x0, -1), ++ z0 = svld3_vnum (p0, x0, -1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_vnum_u16_m2: ++** decb x0, all, mul #2 ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_u16_m2, svuint16x3_t, uint16_t, ++ z0 = svld3_vnum_u16 (p0, x0, -2), ++ z0 = svld3_vnum (p0, x0, -2)) ++ ++/* ++** ld3_vnum_u16_m3: ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #-3, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_u16_m3, svuint16x3_t, uint16_t, ++ z0 = svld3_vnum_u16 (p0, x0, -3), ++ z0 = svld3_vnum (p0, x0, -3)) ++ ++/* ++** ld3_vnum_u16_m24: ++** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #-24, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_u16_m24, svuint16x3_t, uint16_t, ++ z0 = svld3_vnum_u16 (p0, x0, -24), ++ z0 = svld3_vnum (p0, x0, -24)) ++ ++/* ++** ld3_vnum_u16_m27: ++** addvl (x[0-9]+), x0, #-27 ++** ld3h {z0\.h - z2\.h}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_u16_m27, svuint16x3_t, uint16_t, ++ z0 = svld3_vnum_u16 (p0, x0, -27), ++ z0 = svld3_vnum (p0, x0, -27)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ld3_vnum_u16_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ld3h {z0\.h - z2\.h}, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_u16_x1, svuint16x3_t, uint16_t, ++ z0 = svld3_vnum_u16 (p0, x0, x1), ++ z0 = svld3_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_u32.c +new file mode 100644 +index 000000000..ffc6edfdc +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_u32.c +@@ -0,0 +1,242 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld3_u32_base: ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_u32_base, svuint32x3_t, uint32_t, ++ z0 = svld3_u32 (p0, x0), ++ z0 = svld3 (p0, x0)) ++ ++/* ++** ld3_u32_index: ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0, x1, lsl 2\] ++** ret ++*/ ++TEST_LOAD (ld3_u32_index, svuint32x3_t, uint32_t, ++ z0 = svld3_u32 (p0, x0 + x1), ++ z0 = svld3 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_u32_1: ++** incb x0 ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_u32_1, svuint32x3_t, uint32_t, ++ z0 = svld3_u32 (p0, x0 + svcntw ()), ++ z0 = svld3 (p0, x0 + svcntw ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_u32_2: ++** incb x0, all, mul #2 ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_u32_2, svuint32x3_t, uint32_t, ++ z0 = svld3_u32 (p0, x0 + svcntw () * 2), ++ z0 = svld3 (p0, x0 + svcntw () * 2)) ++ ++/* ++** ld3_u32_3: ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0, #3, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_u32_3, svuint32x3_t, uint32_t, ++ z0 = svld3_u32 (p0, x0 + svcntw () * 3), ++ z0 = svld3 (p0, x0 + svcntw () * 3)) ++ ++/* ++** ld3_u32_21: ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0, #21, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_u32_21, svuint32x3_t, uint32_t, ++ z0 = svld3_u32 (p0, x0 + svcntw () * 21), ++ z0 = svld3 (p0, x0 + svcntw () * 21)) ++ ++/* ++** ld3_u32_24: ++** addvl (x[0-9]+), x0, #24 ++** ld3w {z0\.s - z2\.s}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld3_u32_24, svuint32x3_t, uint32_t, ++ z0 = svld3_u32 (p0, x0 + svcntw () * 24), ++ z0 = svld3 (p0, x0 + svcntw () * 24)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_u32_m1: ++** decb x0 ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_u32_m1, svuint32x3_t, uint32_t, ++ z0 = svld3_u32 (p0, x0 - svcntw ()), ++ z0 = svld3 (p0, x0 - svcntw ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_u32_m2: ++** decb x0, all, mul #2 ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_u32_m2, svuint32x3_t, uint32_t, ++ z0 = svld3_u32 (p0, x0 - svcntw () * 2), ++ z0 = svld3 (p0, x0 - svcntw () * 2)) ++ ++/* ++** ld3_u32_m3: ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0, #-3, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_u32_m3, svuint32x3_t, uint32_t, ++ z0 = svld3_u32 (p0, x0 - svcntw () * 3), ++ z0 = svld3 (p0, x0 - svcntw () * 3)) ++ ++/* ++** ld3_u32_m24: ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0, #-24, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_u32_m24, svuint32x3_t, uint32_t, ++ z0 = svld3_u32 (p0, x0 - svcntw () * 24), ++ z0 = svld3 (p0, x0 - svcntw () * 24)) ++ ++/* ++** ld3_u32_m27: ++** addvl (x[0-9]+), x0, #-27 ++** ld3w {z0\.s - z2\.s}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld3_u32_m27, svuint32x3_t, uint32_t, ++ z0 = svld3_u32 (p0, x0 - svcntw () * 27), ++ z0 = svld3 (p0, x0 - svcntw () * 27)) ++ ++/* ++** ld3_vnum_u32_0: ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_u32_0, svuint32x3_t, uint32_t, ++ z0 = svld3_vnum_u32 (p0, x0, 0), ++ z0 = svld3_vnum (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_vnum_u32_1: ++** incb x0 ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_u32_1, svuint32x3_t, uint32_t, ++ z0 = svld3_vnum_u32 (p0, x0, 1), ++ z0 = svld3_vnum (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_vnum_u32_2: ++** incb x0, all, mul #2 ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_u32_2, svuint32x3_t, uint32_t, ++ z0 = svld3_vnum_u32 (p0, x0, 2), ++ z0 = svld3_vnum (p0, x0, 2)) ++ ++/* ++** ld3_vnum_u32_3: ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0, #3, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_u32_3, svuint32x3_t, uint32_t, ++ z0 = svld3_vnum_u32 (p0, x0, 3), ++ z0 = svld3_vnum (p0, x0, 3)) ++ ++/* ++** ld3_vnum_u32_21: ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0, #21, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_u32_21, svuint32x3_t, uint32_t, ++ z0 = svld3_vnum_u32 (p0, x0, 21), ++ z0 = svld3_vnum (p0, x0, 21)) ++ ++/* ++** ld3_vnum_u32_24: ++** addvl (x[0-9]+), x0, #24 ++** ld3w {z0\.s - z2\.s}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_u32_24, svuint32x3_t, uint32_t, ++ z0 = svld3_vnum_u32 (p0, x0, 24), ++ z0 = svld3_vnum (p0, x0, 24)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_vnum_u32_m1: ++** decb x0 ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_u32_m1, svuint32x3_t, uint32_t, ++ z0 = svld3_vnum_u32 (p0, x0, -1), ++ z0 = svld3_vnum (p0, x0, -1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_vnum_u32_m2: ++** decb x0, all, mul #2 ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_u32_m2, svuint32x3_t, uint32_t, ++ z0 = svld3_vnum_u32 (p0, x0, -2), ++ z0 = svld3_vnum (p0, x0, -2)) ++ ++/* ++** ld3_vnum_u32_m3: ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0, #-3, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_u32_m3, svuint32x3_t, uint32_t, ++ z0 = svld3_vnum_u32 (p0, x0, -3), ++ z0 = svld3_vnum (p0, x0, -3)) ++ ++/* ++** ld3_vnum_u32_m24: ++** ld3w {z0\.s - z2\.s}, p0/z, \[x0, #-24, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_u32_m24, svuint32x3_t, uint32_t, ++ z0 = svld3_vnum_u32 (p0, x0, -24), ++ z0 = svld3_vnum (p0, x0, -24)) ++ ++/* ++** ld3_vnum_u32_m27: ++** addvl (x[0-9]+), x0, #-27 ++** ld3w {z0\.s - z2\.s}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_u32_m27, svuint32x3_t, uint32_t, ++ z0 = svld3_vnum_u32 (p0, x0, -27), ++ z0 = svld3_vnum (p0, x0, -27)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ld3_vnum_u32_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ld3w {z0\.s - z2\.s}, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_u32_x1, svuint32x3_t, uint32_t, ++ z0 = svld3_vnum_u32 (p0, x0, x1), ++ z0 = svld3_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_u64.c +new file mode 100644 +index 000000000..2c0dc2f1a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_u64.c +@@ -0,0 +1,242 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld3_u64_base: ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_u64_base, svuint64x3_t, uint64_t, ++ z0 = svld3_u64 (p0, x0), ++ z0 = svld3 (p0, x0)) ++ ++/* ++** ld3_u64_index: ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0, x1, lsl 3\] ++** ret ++*/ ++TEST_LOAD (ld3_u64_index, svuint64x3_t, uint64_t, ++ z0 = svld3_u64 (p0, x0 + x1), ++ z0 = svld3 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_u64_1: ++** incb x0 ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_u64_1, svuint64x3_t, uint64_t, ++ z0 = svld3_u64 (p0, x0 + svcntd ()), ++ z0 = svld3 (p0, x0 + svcntd ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_u64_2: ++** incb x0, all, mul #2 ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_u64_2, svuint64x3_t, uint64_t, ++ z0 = svld3_u64 (p0, x0 + svcntd () * 2), ++ z0 = svld3 (p0, x0 + svcntd () * 2)) ++ ++/* ++** ld3_u64_3: ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0, #3, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_u64_3, svuint64x3_t, uint64_t, ++ z0 = svld3_u64 (p0, x0 + svcntd () * 3), ++ z0 = svld3 (p0, x0 + svcntd () * 3)) ++ ++/* ++** ld3_u64_21: ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0, #21, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_u64_21, svuint64x3_t, uint64_t, ++ z0 = svld3_u64 (p0, x0 + svcntd () * 21), ++ z0 = svld3 (p0, x0 + svcntd () * 21)) ++ ++/* ++** ld3_u64_24: ++** addvl (x[0-9]+), x0, #24 ++** ld3d {z0\.d - z2\.d}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld3_u64_24, svuint64x3_t, uint64_t, ++ z0 = svld3_u64 (p0, x0 + svcntd () * 24), ++ z0 = svld3 (p0, x0 + svcntd () * 24)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_u64_m1: ++** decb x0 ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_u64_m1, svuint64x3_t, uint64_t, ++ z0 = svld3_u64 (p0, x0 - svcntd ()), ++ z0 = svld3 (p0, x0 - svcntd ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_u64_m2: ++** decb x0, all, mul #2 ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_u64_m2, svuint64x3_t, uint64_t, ++ z0 = svld3_u64 (p0, x0 - svcntd () * 2), ++ z0 = svld3 (p0, x0 - svcntd () * 2)) ++ ++/* ++** ld3_u64_m3: ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0, #-3, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_u64_m3, svuint64x3_t, uint64_t, ++ z0 = svld3_u64 (p0, x0 - svcntd () * 3), ++ z0 = svld3 (p0, x0 - svcntd () * 3)) ++ ++/* ++** ld3_u64_m24: ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0, #-24, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_u64_m24, svuint64x3_t, uint64_t, ++ z0 = svld3_u64 (p0, x0 - svcntd () * 24), ++ z0 = svld3 (p0, x0 - svcntd () * 24)) ++ ++/* ++** ld3_u64_m27: ++** addvl (x[0-9]+), x0, #-27 ++** ld3d {z0\.d - z2\.d}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld3_u64_m27, svuint64x3_t, uint64_t, ++ z0 = svld3_u64 (p0, x0 - svcntd () * 27), ++ z0 = svld3 (p0, x0 - svcntd () * 27)) ++ ++/* ++** ld3_vnum_u64_0: ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_u64_0, svuint64x3_t, uint64_t, ++ z0 = svld3_vnum_u64 (p0, x0, 0), ++ z0 = svld3_vnum (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_vnum_u64_1: ++** incb x0 ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_u64_1, svuint64x3_t, uint64_t, ++ z0 = svld3_vnum_u64 (p0, x0, 1), ++ z0 = svld3_vnum (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_vnum_u64_2: ++** incb x0, all, mul #2 ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_u64_2, svuint64x3_t, uint64_t, ++ z0 = svld3_vnum_u64 (p0, x0, 2), ++ z0 = svld3_vnum (p0, x0, 2)) ++ ++/* ++** ld3_vnum_u64_3: ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0, #3, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_u64_3, svuint64x3_t, uint64_t, ++ z0 = svld3_vnum_u64 (p0, x0, 3), ++ z0 = svld3_vnum (p0, x0, 3)) ++ ++/* ++** ld3_vnum_u64_21: ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0, #21, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_u64_21, svuint64x3_t, uint64_t, ++ z0 = svld3_vnum_u64 (p0, x0, 21), ++ z0 = svld3_vnum (p0, x0, 21)) ++ ++/* ++** ld3_vnum_u64_24: ++** addvl (x[0-9]+), x0, #24 ++** ld3d {z0\.d - z2\.d}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_u64_24, svuint64x3_t, uint64_t, ++ z0 = svld3_vnum_u64 (p0, x0, 24), ++ z0 = svld3_vnum (p0, x0, 24)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_vnum_u64_m1: ++** decb x0 ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_u64_m1, svuint64x3_t, uint64_t, ++ z0 = svld3_vnum_u64 (p0, x0, -1), ++ z0 = svld3_vnum (p0, x0, -1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_vnum_u64_m2: ++** decb x0, all, mul #2 ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_u64_m2, svuint64x3_t, uint64_t, ++ z0 = svld3_vnum_u64 (p0, x0, -2), ++ z0 = svld3_vnum (p0, x0, -2)) ++ ++/* ++** ld3_vnum_u64_m3: ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0, #-3, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_u64_m3, svuint64x3_t, uint64_t, ++ z0 = svld3_vnum_u64 (p0, x0, -3), ++ z0 = svld3_vnum (p0, x0, -3)) ++ ++/* ++** ld3_vnum_u64_m24: ++** ld3d {z0\.d - z2\.d}, p0/z, \[x0, #-24, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_u64_m24, svuint64x3_t, uint64_t, ++ z0 = svld3_vnum_u64 (p0, x0, -24), ++ z0 = svld3_vnum (p0, x0, -24)) ++ ++/* ++** ld3_vnum_u64_m27: ++** addvl (x[0-9]+), x0, #-27 ++** ld3d {z0\.d - z2\.d}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_u64_m27, svuint64x3_t, uint64_t, ++ z0 = svld3_vnum_u64 (p0, x0, -27), ++ z0 = svld3_vnum (p0, x0, -27)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ld3_vnum_u64_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ld3d {z0\.d - z2\.d}, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_u64_x1, svuint64x3_t, uint64_t, ++ z0 = svld3_vnum_u64 (p0, x0, x1), ++ z0 = svld3_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_u8.c +new file mode 100644 +index 000000000..e9d1ab495 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_u8.c +@@ -0,0 +1,246 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld3_u8_base: ++** ld3b {z0\.b - z2\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_u8_base, svuint8x3_t, uint8_t, ++ z0 = svld3_u8 (p0, x0), ++ z0 = svld3 (p0, x0)) ++ ++/* ++** ld3_u8_index: ++** ld3b {z0\.b - z2\.b}, p0/z, \[x0, x1\] ++** ret ++*/ ++TEST_LOAD (ld3_u8_index, svuint8x3_t, uint8_t, ++ z0 = svld3_u8 (p0, x0 + x1), ++ z0 = svld3 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_u8_1: ++** incb x0 ++** ld3b {z0\.b - z2\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_u8_1, svuint8x3_t, uint8_t, ++ z0 = svld3_u8 (p0, x0 + svcntb ()), ++ z0 = svld3 (p0, x0 + svcntb ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_u8_2: ++** incb x0, all, mul #2 ++** ld3b {z0\.b - z2\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_u8_2, svuint8x3_t, uint8_t, ++ z0 = svld3_u8 (p0, x0 + svcntb () * 2), ++ z0 = svld3 (p0, x0 + svcntb () * 2)) ++ ++/* ++** ld3_u8_3: ++** ld3b {z0\.b - z2\.b}, p0/z, \[x0, #3, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_u8_3, svuint8x3_t, uint8_t, ++ z0 = svld3_u8 (p0, x0 + svcntb () * 3), ++ z0 = svld3 (p0, x0 + svcntb () * 3)) ++ ++/* ++** ld3_u8_21: ++** ld3b {z0\.b - z2\.b}, p0/z, \[x0, #21, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_u8_21, svuint8x3_t, uint8_t, ++ z0 = svld3_u8 (p0, x0 + svcntb () * 21), ++ z0 = svld3 (p0, x0 + svcntb () * 21)) ++ ++/* ++** ld3_u8_24: ++** addvl (x[0-9]+), x0, #24 ++** ld3b {z0\.b - z2\.b}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld3_u8_24, svuint8x3_t, uint8_t, ++ z0 = svld3_u8 (p0, x0 + svcntb () * 24), ++ z0 = svld3 (p0, x0 + svcntb () * 24)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_u8_m1: ++** decb x0 ++** ld3b {z0\.b - z2\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_u8_m1, svuint8x3_t, uint8_t, ++ z0 = svld3_u8 (p0, x0 - svcntb ()), ++ z0 = svld3 (p0, x0 - svcntb ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_u8_m2: ++** decb x0, all, mul #2 ++** ld3b {z0\.b - z2\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_u8_m2, svuint8x3_t, uint8_t, ++ z0 = svld3_u8 (p0, x0 - svcntb () * 2), ++ z0 = svld3 (p0, x0 - svcntb () * 2)) ++ ++/* ++** ld3_u8_m3: ++** ld3b {z0\.b - z2\.b}, p0/z, \[x0, #-3, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_u8_m3, svuint8x3_t, uint8_t, ++ z0 = svld3_u8 (p0, x0 - svcntb () * 3), ++ z0 = svld3 (p0, x0 - svcntb () * 3)) ++ ++/* ++** ld3_u8_m24: ++** ld3b {z0\.b - z2\.b}, p0/z, \[x0, #-24, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_u8_m24, svuint8x3_t, uint8_t, ++ z0 = svld3_u8 (p0, x0 - svcntb () * 24), ++ z0 = svld3 (p0, x0 - svcntb () * 24)) ++ ++/* ++** ld3_u8_m27: ++** addvl (x[0-9]+), x0, #-27 ++** ld3b {z0\.b - z2\.b}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld3_u8_m27, svuint8x3_t, uint8_t, ++ z0 = svld3_u8 (p0, x0 - svcntb () * 27), ++ z0 = svld3 (p0, x0 - svcntb () * 27)) ++ ++/* ++** ld3_vnum_u8_0: ++** ld3b {z0\.b - z2\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_u8_0, svuint8x3_t, uint8_t, ++ z0 = svld3_vnum_u8 (p0, x0, 0), ++ z0 = svld3_vnum (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_vnum_u8_1: ++** incb x0 ++** ld3b {z0\.b - z2\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_u8_1, svuint8x3_t, uint8_t, ++ z0 = svld3_vnum_u8 (p0, x0, 1), ++ z0 = svld3_vnum (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_vnum_u8_2: ++** incb x0, all, mul #2 ++** ld3b {z0\.b - z2\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_u8_2, svuint8x3_t, uint8_t, ++ z0 = svld3_vnum_u8 (p0, x0, 2), ++ z0 = svld3_vnum (p0, x0, 2)) ++ ++/* ++** ld3_vnum_u8_3: ++** ld3b {z0\.b - z2\.b}, p0/z, \[x0, #3, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_u8_3, svuint8x3_t, uint8_t, ++ z0 = svld3_vnum_u8 (p0, x0, 3), ++ z0 = svld3_vnum (p0, x0, 3)) ++ ++/* ++** ld3_vnum_u8_21: ++** ld3b {z0\.b - z2\.b}, p0/z, \[x0, #21, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_u8_21, svuint8x3_t, uint8_t, ++ z0 = svld3_vnum_u8 (p0, x0, 21), ++ z0 = svld3_vnum (p0, x0, 21)) ++ ++/* ++** ld3_vnum_u8_24: ++** addvl (x[0-9]+), x0, #24 ++** ld3b {z0\.b - z2\.b}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_u8_24, svuint8x3_t, uint8_t, ++ z0 = svld3_vnum_u8 (p0, x0, 24), ++ z0 = svld3_vnum (p0, x0, 24)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_vnum_u8_m1: ++** decb x0 ++** ld3b {z0\.b - z2\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_u8_m1, svuint8x3_t, uint8_t, ++ z0 = svld3_vnum_u8 (p0, x0, -1), ++ z0 = svld3_vnum (p0, x0, -1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld3_vnum_u8_m2: ++** decb x0, all, mul #2 ++** ld3b {z0\.b - z2\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_u8_m2, svuint8x3_t, uint8_t, ++ z0 = svld3_vnum_u8 (p0, x0, -2), ++ z0 = svld3_vnum (p0, x0, -2)) ++ ++/* ++** ld3_vnum_u8_m3: ++** ld3b {z0\.b - z2\.b}, p0/z, \[x0, #-3, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_u8_m3, svuint8x3_t, uint8_t, ++ z0 = svld3_vnum_u8 (p0, x0, -3), ++ z0 = svld3_vnum (p0, x0, -3)) ++ ++/* ++** ld3_vnum_u8_m24: ++** ld3b {z0\.b - z2\.b}, p0/z, \[x0, #-24, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_u8_m24, svuint8x3_t, uint8_t, ++ z0 = svld3_vnum_u8 (p0, x0, -24), ++ z0 = svld3_vnum (p0, x0, -24)) ++ ++/* ++** ld3_vnum_u8_m27: ++** addvl (x[0-9]+), x0, #-27 ++** ld3b {z0\.b - z2\.b}, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ld3_vnum_u8_m27, svuint8x3_t, uint8_t, ++ z0 = svld3_vnum_u8 (p0, x0, -27), ++ z0 = svld3_vnum (p0, x0, -27)) ++ ++/* ++** ld3_vnum_u8_x1: ++** cntb (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ld3b {z0\.b - z2\.b}, p0/z, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** ld3b {z0\.b - z2\.b}, p0/z, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_LOAD (ld3_vnum_u8_x1, svuint8x3_t, uint8_t, ++ z0 = svld3_vnum_u8 (p0, x0, x1), ++ z0 = svld3_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_bf16.c +new file mode 100644 +index 000000000..123ff6355 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_bf16.c +@@ -0,0 +1,286 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld4_bf16_base: ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_bf16_base, svbfloat16x4_t, bfloat16_t, ++ z0 = svld4_bf16 (p0, x0), ++ z0 = svld4 (p0, x0)) ++ ++/* ++** ld4_bf16_index: ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_LOAD (ld4_bf16_index, svbfloat16x4_t, bfloat16_t, ++ z0 = svld4_bf16 (p0, x0 + x1), ++ z0 = svld4 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_bf16_1: ++** incb x0 ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_bf16_1, svbfloat16x4_t, bfloat16_t, ++ z0 = svld4_bf16 (p0, x0 + svcnth ()), ++ z0 = svld4 (p0, x0 + svcnth ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_bf16_2: ++** incb x0, all, mul #2 ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_bf16_2, svbfloat16x4_t, bfloat16_t, ++ z0 = svld4_bf16 (p0, x0 + svcnth () * 2), ++ z0 = svld4 (p0, x0 + svcnth () * 2)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_bf16_3: ++** incb x0, all, mul #3 ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_bf16_3, svbfloat16x4_t, bfloat16_t, ++ z0 = svld4_bf16 (p0, x0 + svcnth () * 3), ++ z0 = svld4 (p0, x0 + svcnth () * 3)) ++ ++/* ++** ld4_bf16_4: ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #4, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_bf16_4, svbfloat16x4_t, bfloat16_t, ++ z0 = svld4_bf16 (p0, x0 + svcnth () * 4), ++ z0 = svld4 (p0, x0 + svcnth () * 4)) ++ ++/* ++** ld4_bf16_28: ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #28, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_bf16_28, svbfloat16x4_t, bfloat16_t, ++ z0 = svld4_bf16 (p0, x0 + svcnth () * 28), ++ z0 = svld4 (p0, x0 + svcnth () * 28)) ++ ++/* ++** ld4_bf16_32: ++** [^{]* ++** ld4h {z0\.h - z3\.h}, p0/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_LOAD (ld4_bf16_32, svbfloat16x4_t, bfloat16_t, ++ z0 = svld4_bf16 (p0, x0 + svcnth () * 32), ++ z0 = svld4 (p0, x0 + svcnth () * 32)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_bf16_m1: ++** decb x0 ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_bf16_m1, svbfloat16x4_t, bfloat16_t, ++ z0 = svld4_bf16 (p0, x0 - svcnth ()), ++ z0 = svld4 (p0, x0 - svcnth ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_bf16_m2: ++** decb x0, all, mul #2 ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_bf16_m2, svbfloat16x4_t, bfloat16_t, ++ z0 = svld4_bf16 (p0, x0 - svcnth () * 2), ++ z0 = svld4 (p0, x0 - svcnth () * 2)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_bf16_m3: ++** decb x0, all, mul #3 ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_bf16_m3, svbfloat16x4_t, bfloat16_t, ++ z0 = svld4_bf16 (p0, x0 - svcnth () * 3), ++ z0 = svld4 (p0, x0 - svcnth () * 3)) ++ ++/* ++** ld4_bf16_m4: ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #-4, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_bf16_m4, svbfloat16x4_t, bfloat16_t, ++ z0 = svld4_bf16 (p0, x0 - svcnth () * 4), ++ z0 = svld4 (p0, x0 - svcnth () * 4)) ++ ++/* ++** ld4_bf16_m32: ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #-32, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_bf16_m32, svbfloat16x4_t, bfloat16_t, ++ z0 = svld4_bf16 (p0, x0 - svcnth () * 32), ++ z0 = svld4 (p0, x0 - svcnth () * 32)) ++ ++/* ++** ld4_bf16_m36: ++** [^{]* ++** ld4h {z0\.h - z3\.h}, p0/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_LOAD (ld4_bf16_m36, svbfloat16x4_t, bfloat16_t, ++ z0 = svld4_bf16 (p0, x0 - svcnth () * 36), ++ z0 = svld4 (p0, x0 - svcnth () * 36)) ++ ++/* ++** ld4_vnum_bf16_0: ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_bf16_0, svbfloat16x4_t, bfloat16_t, ++ z0 = svld4_vnum_bf16 (p0, x0, 0), ++ z0 = svld4_vnum (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_bf16_1: ++** incb x0 ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_bf16_1, svbfloat16x4_t, bfloat16_t, ++ z0 = svld4_vnum_bf16 (p0, x0, 1), ++ z0 = svld4_vnum (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_bf16_2: ++** incb x0, all, mul #2 ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_bf16_2, svbfloat16x4_t, bfloat16_t, ++ z0 = svld4_vnum_bf16 (p0, x0, 2), ++ z0 = svld4_vnum (p0, x0, 2)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_bf16_3: ++** incb x0, all, mul #3 ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_bf16_3, svbfloat16x4_t, bfloat16_t, ++ z0 = svld4_vnum_bf16 (p0, x0, 3), ++ z0 = svld4_vnum (p0, x0, 3)) ++ ++/* ++** ld4_vnum_bf16_4: ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #4, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_bf16_4, svbfloat16x4_t, bfloat16_t, ++ z0 = svld4_vnum_bf16 (p0, x0, 4), ++ z0 = svld4_vnum (p0, x0, 4)) ++ ++/* ++** ld4_vnum_bf16_28: ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #28, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_bf16_28, svbfloat16x4_t, bfloat16_t, ++ z0 = svld4_vnum_bf16 (p0, x0, 28), ++ z0 = svld4_vnum (p0, x0, 28)) ++ ++/* ++** ld4_vnum_bf16_32: ++** [^{]* ++** ld4h {z0\.h - z3\.h}, p0/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_bf16_32, svbfloat16x4_t, bfloat16_t, ++ z0 = svld4_vnum_bf16 (p0, x0, 32), ++ z0 = svld4_vnum (p0, x0, 32)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_bf16_m1: ++** decb x0 ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_bf16_m1, svbfloat16x4_t, bfloat16_t, ++ z0 = svld4_vnum_bf16 (p0, x0, -1), ++ z0 = svld4_vnum (p0, x0, -1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_bf16_m2: ++** decb x0, all, mul #2 ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_bf16_m2, svbfloat16x4_t, bfloat16_t, ++ z0 = svld4_vnum_bf16 (p0, x0, -2), ++ z0 = svld4_vnum (p0, x0, -2)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_bf16_m3: ++** decb x0, all, mul #3 ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_bf16_m3, svbfloat16x4_t, bfloat16_t, ++ z0 = svld4_vnum_bf16 (p0, x0, -3), ++ z0 = svld4_vnum (p0, x0, -3)) ++ ++/* ++** ld4_vnum_bf16_m4: ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #-4, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_bf16_m4, svbfloat16x4_t, bfloat16_t, ++ z0 = svld4_vnum_bf16 (p0, x0, -4), ++ z0 = svld4_vnum (p0, x0, -4)) ++ ++/* ++** ld4_vnum_bf16_m32: ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #-32, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_bf16_m32, svbfloat16x4_t, bfloat16_t, ++ z0 = svld4_vnum_bf16 (p0, x0, -32), ++ z0 = svld4_vnum (p0, x0, -32)) ++ ++/* ++** ld4_vnum_bf16_m36: ++** [^{]* ++** ld4h {z0\.h - z3\.h}, p0/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_bf16_m36, svbfloat16x4_t, bfloat16_t, ++ z0 = svld4_vnum_bf16 (p0, x0, -36), ++ z0 = svld4_vnum (p0, x0, -36)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ld4_vnum_bf16_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ld4h {z0\.h - z3\.h}, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_bf16_x1, svbfloat16x4_t, bfloat16_t, ++ z0 = svld4_vnum_bf16 (p0, x0, x1), ++ z0 = svld4_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_f16.c +new file mode 100644 +index 000000000..0d0ecf0af +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_f16.c +@@ -0,0 +1,286 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld4_f16_base: ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_f16_base, svfloat16x4_t, float16_t, ++ z0 = svld4_f16 (p0, x0), ++ z0 = svld4 (p0, x0)) ++ ++/* ++** ld4_f16_index: ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_LOAD (ld4_f16_index, svfloat16x4_t, float16_t, ++ z0 = svld4_f16 (p0, x0 + x1), ++ z0 = svld4 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_f16_1: ++** incb x0 ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_f16_1, svfloat16x4_t, float16_t, ++ z0 = svld4_f16 (p0, x0 + svcnth ()), ++ z0 = svld4 (p0, x0 + svcnth ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_f16_2: ++** incb x0, all, mul #2 ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_f16_2, svfloat16x4_t, float16_t, ++ z0 = svld4_f16 (p0, x0 + svcnth () * 2), ++ z0 = svld4 (p0, x0 + svcnth () * 2)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_f16_3: ++** incb x0, all, mul #3 ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_f16_3, svfloat16x4_t, float16_t, ++ z0 = svld4_f16 (p0, x0 + svcnth () * 3), ++ z0 = svld4 (p0, x0 + svcnth () * 3)) ++ ++/* ++** ld4_f16_4: ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #4, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_f16_4, svfloat16x4_t, float16_t, ++ z0 = svld4_f16 (p0, x0 + svcnth () * 4), ++ z0 = svld4 (p0, x0 + svcnth () * 4)) ++ ++/* ++** ld4_f16_28: ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #28, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_f16_28, svfloat16x4_t, float16_t, ++ z0 = svld4_f16 (p0, x0 + svcnth () * 28), ++ z0 = svld4 (p0, x0 + svcnth () * 28)) ++ ++/* ++** ld4_f16_32: ++** [^{]* ++** ld4h {z0\.h - z3\.h}, p0/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_LOAD (ld4_f16_32, svfloat16x4_t, float16_t, ++ z0 = svld4_f16 (p0, x0 + svcnth () * 32), ++ z0 = svld4 (p0, x0 + svcnth () * 32)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_f16_m1: ++** decb x0 ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_f16_m1, svfloat16x4_t, float16_t, ++ z0 = svld4_f16 (p0, x0 - svcnth ()), ++ z0 = svld4 (p0, x0 - svcnth ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_f16_m2: ++** decb x0, all, mul #2 ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_f16_m2, svfloat16x4_t, float16_t, ++ z0 = svld4_f16 (p0, x0 - svcnth () * 2), ++ z0 = svld4 (p0, x0 - svcnth () * 2)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_f16_m3: ++** decb x0, all, mul #3 ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_f16_m3, svfloat16x4_t, float16_t, ++ z0 = svld4_f16 (p0, x0 - svcnth () * 3), ++ z0 = svld4 (p0, x0 - svcnth () * 3)) ++ ++/* ++** ld4_f16_m4: ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #-4, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_f16_m4, svfloat16x4_t, float16_t, ++ z0 = svld4_f16 (p0, x0 - svcnth () * 4), ++ z0 = svld4 (p0, x0 - svcnth () * 4)) ++ ++/* ++** ld4_f16_m32: ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #-32, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_f16_m32, svfloat16x4_t, float16_t, ++ z0 = svld4_f16 (p0, x0 - svcnth () * 32), ++ z0 = svld4 (p0, x0 - svcnth () * 32)) ++ ++/* ++** ld4_f16_m36: ++** [^{]* ++** ld4h {z0\.h - z3\.h}, p0/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_LOAD (ld4_f16_m36, svfloat16x4_t, float16_t, ++ z0 = svld4_f16 (p0, x0 - svcnth () * 36), ++ z0 = svld4 (p0, x0 - svcnth () * 36)) ++ ++/* ++** ld4_vnum_f16_0: ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_f16_0, svfloat16x4_t, float16_t, ++ z0 = svld4_vnum_f16 (p0, x0, 0), ++ z0 = svld4_vnum (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_f16_1: ++** incb x0 ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_f16_1, svfloat16x4_t, float16_t, ++ z0 = svld4_vnum_f16 (p0, x0, 1), ++ z0 = svld4_vnum (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_f16_2: ++** incb x0, all, mul #2 ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_f16_2, svfloat16x4_t, float16_t, ++ z0 = svld4_vnum_f16 (p0, x0, 2), ++ z0 = svld4_vnum (p0, x0, 2)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_f16_3: ++** incb x0, all, mul #3 ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_f16_3, svfloat16x4_t, float16_t, ++ z0 = svld4_vnum_f16 (p0, x0, 3), ++ z0 = svld4_vnum (p0, x0, 3)) ++ ++/* ++** ld4_vnum_f16_4: ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #4, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_f16_4, svfloat16x4_t, float16_t, ++ z0 = svld4_vnum_f16 (p0, x0, 4), ++ z0 = svld4_vnum (p0, x0, 4)) ++ ++/* ++** ld4_vnum_f16_28: ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #28, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_f16_28, svfloat16x4_t, float16_t, ++ z0 = svld4_vnum_f16 (p0, x0, 28), ++ z0 = svld4_vnum (p0, x0, 28)) ++ ++/* ++** ld4_vnum_f16_32: ++** [^{]* ++** ld4h {z0\.h - z3\.h}, p0/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_f16_32, svfloat16x4_t, float16_t, ++ z0 = svld4_vnum_f16 (p0, x0, 32), ++ z0 = svld4_vnum (p0, x0, 32)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_f16_m1: ++** decb x0 ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_f16_m1, svfloat16x4_t, float16_t, ++ z0 = svld4_vnum_f16 (p0, x0, -1), ++ z0 = svld4_vnum (p0, x0, -1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_f16_m2: ++** decb x0, all, mul #2 ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_f16_m2, svfloat16x4_t, float16_t, ++ z0 = svld4_vnum_f16 (p0, x0, -2), ++ z0 = svld4_vnum (p0, x0, -2)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_f16_m3: ++** decb x0, all, mul #3 ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_f16_m3, svfloat16x4_t, float16_t, ++ z0 = svld4_vnum_f16 (p0, x0, -3), ++ z0 = svld4_vnum (p0, x0, -3)) ++ ++/* ++** ld4_vnum_f16_m4: ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #-4, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_f16_m4, svfloat16x4_t, float16_t, ++ z0 = svld4_vnum_f16 (p0, x0, -4), ++ z0 = svld4_vnum (p0, x0, -4)) ++ ++/* ++** ld4_vnum_f16_m32: ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #-32, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_f16_m32, svfloat16x4_t, float16_t, ++ z0 = svld4_vnum_f16 (p0, x0, -32), ++ z0 = svld4_vnum (p0, x0, -32)) ++ ++/* ++** ld4_vnum_f16_m36: ++** [^{]* ++** ld4h {z0\.h - z3\.h}, p0/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_f16_m36, svfloat16x4_t, float16_t, ++ z0 = svld4_vnum_f16 (p0, x0, -36), ++ z0 = svld4_vnum (p0, x0, -36)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ld4_vnum_f16_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ld4h {z0\.h - z3\.h}, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_f16_x1, svfloat16x4_t, float16_t, ++ z0 = svld4_vnum_f16 (p0, x0, x1), ++ z0 = svld4_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_f32.c +new file mode 100644 +index 000000000..a433d1ffe +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_f32.c +@@ -0,0 +1,286 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld4_f32_base: ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_f32_base, svfloat32x4_t, float32_t, ++ z0 = svld4_f32 (p0, x0), ++ z0 = svld4 (p0, x0)) ++ ++/* ++** ld4_f32_index: ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0, x1, lsl 2\] ++** ret ++*/ ++TEST_LOAD (ld4_f32_index, svfloat32x4_t, float32_t, ++ z0 = svld4_f32 (p0, x0 + x1), ++ z0 = svld4 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_f32_1: ++** incb x0 ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_f32_1, svfloat32x4_t, float32_t, ++ z0 = svld4_f32 (p0, x0 + svcntw ()), ++ z0 = svld4 (p0, x0 + svcntw ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_f32_2: ++** incb x0, all, mul #2 ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_f32_2, svfloat32x4_t, float32_t, ++ z0 = svld4_f32 (p0, x0 + svcntw () * 2), ++ z0 = svld4 (p0, x0 + svcntw () * 2)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_f32_3: ++** incb x0, all, mul #3 ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_f32_3, svfloat32x4_t, float32_t, ++ z0 = svld4_f32 (p0, x0 + svcntw () * 3), ++ z0 = svld4 (p0, x0 + svcntw () * 3)) ++ ++/* ++** ld4_f32_4: ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0, #4, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_f32_4, svfloat32x4_t, float32_t, ++ z0 = svld4_f32 (p0, x0 + svcntw () * 4), ++ z0 = svld4 (p0, x0 + svcntw () * 4)) ++ ++/* ++** ld4_f32_28: ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0, #28, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_f32_28, svfloat32x4_t, float32_t, ++ z0 = svld4_f32 (p0, x0 + svcntw () * 28), ++ z0 = svld4 (p0, x0 + svcntw () * 28)) ++ ++/* ++** ld4_f32_32: ++** [^{]* ++** ld4w {z0\.s - z3\.s}, p0/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_LOAD (ld4_f32_32, svfloat32x4_t, float32_t, ++ z0 = svld4_f32 (p0, x0 + svcntw () * 32), ++ z0 = svld4 (p0, x0 + svcntw () * 32)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_f32_m1: ++** decb x0 ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_f32_m1, svfloat32x4_t, float32_t, ++ z0 = svld4_f32 (p0, x0 - svcntw ()), ++ z0 = svld4 (p0, x0 - svcntw ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_f32_m2: ++** decb x0, all, mul #2 ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_f32_m2, svfloat32x4_t, float32_t, ++ z0 = svld4_f32 (p0, x0 - svcntw () * 2), ++ z0 = svld4 (p0, x0 - svcntw () * 2)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_f32_m3: ++** decb x0, all, mul #3 ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_f32_m3, svfloat32x4_t, float32_t, ++ z0 = svld4_f32 (p0, x0 - svcntw () * 3), ++ z0 = svld4 (p0, x0 - svcntw () * 3)) ++ ++/* ++** ld4_f32_m4: ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0, #-4, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_f32_m4, svfloat32x4_t, float32_t, ++ z0 = svld4_f32 (p0, x0 - svcntw () * 4), ++ z0 = svld4 (p0, x0 - svcntw () * 4)) ++ ++/* ++** ld4_f32_m32: ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0, #-32, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_f32_m32, svfloat32x4_t, float32_t, ++ z0 = svld4_f32 (p0, x0 - svcntw () * 32), ++ z0 = svld4 (p0, x0 - svcntw () * 32)) ++ ++/* ++** ld4_f32_m36: ++** [^{]* ++** ld4w {z0\.s - z3\.s}, p0/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_LOAD (ld4_f32_m36, svfloat32x4_t, float32_t, ++ z0 = svld4_f32 (p0, x0 - svcntw () * 36), ++ z0 = svld4 (p0, x0 - svcntw () * 36)) ++ ++/* ++** ld4_vnum_f32_0: ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_f32_0, svfloat32x4_t, float32_t, ++ z0 = svld4_vnum_f32 (p0, x0, 0), ++ z0 = svld4_vnum (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_f32_1: ++** incb x0 ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_f32_1, svfloat32x4_t, float32_t, ++ z0 = svld4_vnum_f32 (p0, x0, 1), ++ z0 = svld4_vnum (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_f32_2: ++** incb x0, all, mul #2 ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_f32_2, svfloat32x4_t, float32_t, ++ z0 = svld4_vnum_f32 (p0, x0, 2), ++ z0 = svld4_vnum (p0, x0, 2)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_f32_3: ++** incb x0, all, mul #3 ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_f32_3, svfloat32x4_t, float32_t, ++ z0 = svld4_vnum_f32 (p0, x0, 3), ++ z0 = svld4_vnum (p0, x0, 3)) ++ ++/* ++** ld4_vnum_f32_4: ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0, #4, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_f32_4, svfloat32x4_t, float32_t, ++ z0 = svld4_vnum_f32 (p0, x0, 4), ++ z0 = svld4_vnum (p0, x0, 4)) ++ ++/* ++** ld4_vnum_f32_28: ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0, #28, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_f32_28, svfloat32x4_t, float32_t, ++ z0 = svld4_vnum_f32 (p0, x0, 28), ++ z0 = svld4_vnum (p0, x0, 28)) ++ ++/* ++** ld4_vnum_f32_32: ++** [^{]* ++** ld4w {z0\.s - z3\.s}, p0/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_f32_32, svfloat32x4_t, float32_t, ++ z0 = svld4_vnum_f32 (p0, x0, 32), ++ z0 = svld4_vnum (p0, x0, 32)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_f32_m1: ++** decb x0 ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_f32_m1, svfloat32x4_t, float32_t, ++ z0 = svld4_vnum_f32 (p0, x0, -1), ++ z0 = svld4_vnum (p0, x0, -1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_f32_m2: ++** decb x0, all, mul #2 ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_f32_m2, svfloat32x4_t, float32_t, ++ z0 = svld4_vnum_f32 (p0, x0, -2), ++ z0 = svld4_vnum (p0, x0, -2)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_f32_m3: ++** decb x0, all, mul #3 ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_f32_m3, svfloat32x4_t, float32_t, ++ z0 = svld4_vnum_f32 (p0, x0, -3), ++ z0 = svld4_vnum (p0, x0, -3)) ++ ++/* ++** ld4_vnum_f32_m4: ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0, #-4, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_f32_m4, svfloat32x4_t, float32_t, ++ z0 = svld4_vnum_f32 (p0, x0, -4), ++ z0 = svld4_vnum (p0, x0, -4)) ++ ++/* ++** ld4_vnum_f32_m32: ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0, #-32, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_f32_m32, svfloat32x4_t, float32_t, ++ z0 = svld4_vnum_f32 (p0, x0, -32), ++ z0 = svld4_vnum (p0, x0, -32)) ++ ++/* ++** ld4_vnum_f32_m36: ++** [^{]* ++** ld4w {z0\.s - z3\.s}, p0/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_f32_m36, svfloat32x4_t, float32_t, ++ z0 = svld4_vnum_f32 (p0, x0, -36), ++ z0 = svld4_vnum (p0, x0, -36)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ld4_vnum_f32_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ld4w {z0\.s - z3\.s}, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_f32_x1, svfloat32x4_t, float32_t, ++ z0 = svld4_vnum_f32 (p0, x0, x1), ++ z0 = svld4_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_f64.c +new file mode 100644 +index 000000000..bb18decec +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_f64.c +@@ -0,0 +1,286 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld4_f64_base: ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_f64_base, svfloat64x4_t, float64_t, ++ z0 = svld4_f64 (p0, x0), ++ z0 = svld4 (p0, x0)) ++ ++/* ++** ld4_f64_index: ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0, x1, lsl 3\] ++** ret ++*/ ++TEST_LOAD (ld4_f64_index, svfloat64x4_t, float64_t, ++ z0 = svld4_f64 (p0, x0 + x1), ++ z0 = svld4 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_f64_1: ++** incb x0 ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_f64_1, svfloat64x4_t, float64_t, ++ z0 = svld4_f64 (p0, x0 + svcntd ()), ++ z0 = svld4 (p0, x0 + svcntd ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_f64_2: ++** incb x0, all, mul #2 ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_f64_2, svfloat64x4_t, float64_t, ++ z0 = svld4_f64 (p0, x0 + svcntd () * 2), ++ z0 = svld4 (p0, x0 + svcntd () * 2)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_f64_3: ++** incb x0, all, mul #3 ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_f64_3, svfloat64x4_t, float64_t, ++ z0 = svld4_f64 (p0, x0 + svcntd () * 3), ++ z0 = svld4 (p0, x0 + svcntd () * 3)) ++ ++/* ++** ld4_f64_4: ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0, #4, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_f64_4, svfloat64x4_t, float64_t, ++ z0 = svld4_f64 (p0, x0 + svcntd () * 4), ++ z0 = svld4 (p0, x0 + svcntd () * 4)) ++ ++/* ++** ld4_f64_28: ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0, #28, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_f64_28, svfloat64x4_t, float64_t, ++ z0 = svld4_f64 (p0, x0 + svcntd () * 28), ++ z0 = svld4 (p0, x0 + svcntd () * 28)) ++ ++/* ++** ld4_f64_32: ++** [^{]* ++** ld4d {z0\.d - z3\.d}, p0/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_LOAD (ld4_f64_32, svfloat64x4_t, float64_t, ++ z0 = svld4_f64 (p0, x0 + svcntd () * 32), ++ z0 = svld4 (p0, x0 + svcntd () * 32)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_f64_m1: ++** decb x0 ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_f64_m1, svfloat64x4_t, float64_t, ++ z0 = svld4_f64 (p0, x0 - svcntd ()), ++ z0 = svld4 (p0, x0 - svcntd ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_f64_m2: ++** decb x0, all, mul #2 ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_f64_m2, svfloat64x4_t, float64_t, ++ z0 = svld4_f64 (p0, x0 - svcntd () * 2), ++ z0 = svld4 (p0, x0 - svcntd () * 2)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_f64_m3: ++** decb x0, all, mul #3 ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_f64_m3, svfloat64x4_t, float64_t, ++ z0 = svld4_f64 (p0, x0 - svcntd () * 3), ++ z0 = svld4 (p0, x0 - svcntd () * 3)) ++ ++/* ++** ld4_f64_m4: ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0, #-4, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_f64_m4, svfloat64x4_t, float64_t, ++ z0 = svld4_f64 (p0, x0 - svcntd () * 4), ++ z0 = svld4 (p0, x0 - svcntd () * 4)) ++ ++/* ++** ld4_f64_m32: ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0, #-32, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_f64_m32, svfloat64x4_t, float64_t, ++ z0 = svld4_f64 (p0, x0 - svcntd () * 32), ++ z0 = svld4 (p0, x0 - svcntd () * 32)) ++ ++/* ++** ld4_f64_m36: ++** [^{]* ++** ld4d {z0\.d - z3\.d}, p0/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_LOAD (ld4_f64_m36, svfloat64x4_t, float64_t, ++ z0 = svld4_f64 (p0, x0 - svcntd () * 36), ++ z0 = svld4 (p0, x0 - svcntd () * 36)) ++ ++/* ++** ld4_vnum_f64_0: ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_f64_0, svfloat64x4_t, float64_t, ++ z0 = svld4_vnum_f64 (p0, x0, 0), ++ z0 = svld4_vnum (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_f64_1: ++** incb x0 ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_f64_1, svfloat64x4_t, float64_t, ++ z0 = svld4_vnum_f64 (p0, x0, 1), ++ z0 = svld4_vnum (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_f64_2: ++** incb x0, all, mul #2 ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_f64_2, svfloat64x4_t, float64_t, ++ z0 = svld4_vnum_f64 (p0, x0, 2), ++ z0 = svld4_vnum (p0, x0, 2)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_f64_3: ++** incb x0, all, mul #3 ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_f64_3, svfloat64x4_t, float64_t, ++ z0 = svld4_vnum_f64 (p0, x0, 3), ++ z0 = svld4_vnum (p0, x0, 3)) ++ ++/* ++** ld4_vnum_f64_4: ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0, #4, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_f64_4, svfloat64x4_t, float64_t, ++ z0 = svld4_vnum_f64 (p0, x0, 4), ++ z0 = svld4_vnum (p0, x0, 4)) ++ ++/* ++** ld4_vnum_f64_28: ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0, #28, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_f64_28, svfloat64x4_t, float64_t, ++ z0 = svld4_vnum_f64 (p0, x0, 28), ++ z0 = svld4_vnum (p0, x0, 28)) ++ ++/* ++** ld4_vnum_f64_32: ++** [^{]* ++** ld4d {z0\.d - z3\.d}, p0/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_f64_32, svfloat64x4_t, float64_t, ++ z0 = svld4_vnum_f64 (p0, x0, 32), ++ z0 = svld4_vnum (p0, x0, 32)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_f64_m1: ++** decb x0 ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_f64_m1, svfloat64x4_t, float64_t, ++ z0 = svld4_vnum_f64 (p0, x0, -1), ++ z0 = svld4_vnum (p0, x0, -1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_f64_m2: ++** decb x0, all, mul #2 ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_f64_m2, svfloat64x4_t, float64_t, ++ z0 = svld4_vnum_f64 (p0, x0, -2), ++ z0 = svld4_vnum (p0, x0, -2)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_f64_m3: ++** decb x0, all, mul #3 ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_f64_m3, svfloat64x4_t, float64_t, ++ z0 = svld4_vnum_f64 (p0, x0, -3), ++ z0 = svld4_vnum (p0, x0, -3)) ++ ++/* ++** ld4_vnum_f64_m4: ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0, #-4, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_f64_m4, svfloat64x4_t, float64_t, ++ z0 = svld4_vnum_f64 (p0, x0, -4), ++ z0 = svld4_vnum (p0, x0, -4)) ++ ++/* ++** ld4_vnum_f64_m32: ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0, #-32, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_f64_m32, svfloat64x4_t, float64_t, ++ z0 = svld4_vnum_f64 (p0, x0, -32), ++ z0 = svld4_vnum (p0, x0, -32)) ++ ++/* ++** ld4_vnum_f64_m36: ++** [^{]* ++** ld4d {z0\.d - z3\.d}, p0/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_f64_m36, svfloat64x4_t, float64_t, ++ z0 = svld4_vnum_f64 (p0, x0, -36), ++ z0 = svld4_vnum (p0, x0, -36)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ld4_vnum_f64_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ld4d {z0\.d - z3\.d}, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_f64_x1, svfloat64x4_t, float64_t, ++ z0 = svld4_vnum_f64 (p0, x0, x1), ++ z0 = svld4_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_s16.c +new file mode 100644 +index 000000000..15fb1b595 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_s16.c +@@ -0,0 +1,286 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld4_s16_base: ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_s16_base, svint16x4_t, int16_t, ++ z0 = svld4_s16 (p0, x0), ++ z0 = svld4 (p0, x0)) ++ ++/* ++** ld4_s16_index: ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_LOAD (ld4_s16_index, svint16x4_t, int16_t, ++ z0 = svld4_s16 (p0, x0 + x1), ++ z0 = svld4 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_s16_1: ++** incb x0 ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_s16_1, svint16x4_t, int16_t, ++ z0 = svld4_s16 (p0, x0 + svcnth ()), ++ z0 = svld4 (p0, x0 + svcnth ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_s16_2: ++** incb x0, all, mul #2 ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_s16_2, svint16x4_t, int16_t, ++ z0 = svld4_s16 (p0, x0 + svcnth () * 2), ++ z0 = svld4 (p0, x0 + svcnth () * 2)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_s16_3: ++** incb x0, all, mul #3 ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_s16_3, svint16x4_t, int16_t, ++ z0 = svld4_s16 (p0, x0 + svcnth () * 3), ++ z0 = svld4 (p0, x0 + svcnth () * 3)) ++ ++/* ++** ld4_s16_4: ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #4, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_s16_4, svint16x4_t, int16_t, ++ z0 = svld4_s16 (p0, x0 + svcnth () * 4), ++ z0 = svld4 (p0, x0 + svcnth () * 4)) ++ ++/* ++** ld4_s16_28: ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #28, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_s16_28, svint16x4_t, int16_t, ++ z0 = svld4_s16 (p0, x0 + svcnth () * 28), ++ z0 = svld4 (p0, x0 + svcnth () * 28)) ++ ++/* ++** ld4_s16_32: ++** [^{]* ++** ld4h {z0\.h - z3\.h}, p0/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_LOAD (ld4_s16_32, svint16x4_t, int16_t, ++ z0 = svld4_s16 (p0, x0 + svcnth () * 32), ++ z0 = svld4 (p0, x0 + svcnth () * 32)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_s16_m1: ++** decb x0 ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_s16_m1, svint16x4_t, int16_t, ++ z0 = svld4_s16 (p0, x0 - svcnth ()), ++ z0 = svld4 (p0, x0 - svcnth ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_s16_m2: ++** decb x0, all, mul #2 ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_s16_m2, svint16x4_t, int16_t, ++ z0 = svld4_s16 (p0, x0 - svcnth () * 2), ++ z0 = svld4 (p0, x0 - svcnth () * 2)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_s16_m3: ++** decb x0, all, mul #3 ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_s16_m3, svint16x4_t, int16_t, ++ z0 = svld4_s16 (p0, x0 - svcnth () * 3), ++ z0 = svld4 (p0, x0 - svcnth () * 3)) ++ ++/* ++** ld4_s16_m4: ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #-4, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_s16_m4, svint16x4_t, int16_t, ++ z0 = svld4_s16 (p0, x0 - svcnth () * 4), ++ z0 = svld4 (p0, x0 - svcnth () * 4)) ++ ++/* ++** ld4_s16_m32: ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #-32, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_s16_m32, svint16x4_t, int16_t, ++ z0 = svld4_s16 (p0, x0 - svcnth () * 32), ++ z0 = svld4 (p0, x0 - svcnth () * 32)) ++ ++/* ++** ld4_s16_m36: ++** [^{]* ++** ld4h {z0\.h - z3\.h}, p0/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_LOAD (ld4_s16_m36, svint16x4_t, int16_t, ++ z0 = svld4_s16 (p0, x0 - svcnth () * 36), ++ z0 = svld4 (p0, x0 - svcnth () * 36)) ++ ++/* ++** ld4_vnum_s16_0: ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s16_0, svint16x4_t, int16_t, ++ z0 = svld4_vnum_s16 (p0, x0, 0), ++ z0 = svld4_vnum (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_s16_1: ++** incb x0 ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s16_1, svint16x4_t, int16_t, ++ z0 = svld4_vnum_s16 (p0, x0, 1), ++ z0 = svld4_vnum (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_s16_2: ++** incb x0, all, mul #2 ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s16_2, svint16x4_t, int16_t, ++ z0 = svld4_vnum_s16 (p0, x0, 2), ++ z0 = svld4_vnum (p0, x0, 2)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_s16_3: ++** incb x0, all, mul #3 ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s16_3, svint16x4_t, int16_t, ++ z0 = svld4_vnum_s16 (p0, x0, 3), ++ z0 = svld4_vnum (p0, x0, 3)) ++ ++/* ++** ld4_vnum_s16_4: ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #4, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s16_4, svint16x4_t, int16_t, ++ z0 = svld4_vnum_s16 (p0, x0, 4), ++ z0 = svld4_vnum (p0, x0, 4)) ++ ++/* ++** ld4_vnum_s16_28: ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #28, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s16_28, svint16x4_t, int16_t, ++ z0 = svld4_vnum_s16 (p0, x0, 28), ++ z0 = svld4_vnum (p0, x0, 28)) ++ ++/* ++** ld4_vnum_s16_32: ++** [^{]* ++** ld4h {z0\.h - z3\.h}, p0/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s16_32, svint16x4_t, int16_t, ++ z0 = svld4_vnum_s16 (p0, x0, 32), ++ z0 = svld4_vnum (p0, x0, 32)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_s16_m1: ++** decb x0 ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s16_m1, svint16x4_t, int16_t, ++ z0 = svld4_vnum_s16 (p0, x0, -1), ++ z0 = svld4_vnum (p0, x0, -1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_s16_m2: ++** decb x0, all, mul #2 ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s16_m2, svint16x4_t, int16_t, ++ z0 = svld4_vnum_s16 (p0, x0, -2), ++ z0 = svld4_vnum (p0, x0, -2)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_s16_m3: ++** decb x0, all, mul #3 ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s16_m3, svint16x4_t, int16_t, ++ z0 = svld4_vnum_s16 (p0, x0, -3), ++ z0 = svld4_vnum (p0, x0, -3)) ++ ++/* ++** ld4_vnum_s16_m4: ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #-4, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s16_m4, svint16x4_t, int16_t, ++ z0 = svld4_vnum_s16 (p0, x0, -4), ++ z0 = svld4_vnum (p0, x0, -4)) ++ ++/* ++** ld4_vnum_s16_m32: ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #-32, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s16_m32, svint16x4_t, int16_t, ++ z0 = svld4_vnum_s16 (p0, x0, -32), ++ z0 = svld4_vnum (p0, x0, -32)) ++ ++/* ++** ld4_vnum_s16_m36: ++** [^{]* ++** ld4h {z0\.h - z3\.h}, p0/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s16_m36, svint16x4_t, int16_t, ++ z0 = svld4_vnum_s16 (p0, x0, -36), ++ z0 = svld4_vnum (p0, x0, -36)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ld4_vnum_s16_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ld4h {z0\.h - z3\.h}, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s16_x1, svint16x4_t, int16_t, ++ z0 = svld4_vnum_s16 (p0, x0, x1), ++ z0 = svld4_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_s32.c +new file mode 100644 +index 000000000..81c67710f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_s32.c +@@ -0,0 +1,286 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld4_s32_base: ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_s32_base, svint32x4_t, int32_t, ++ z0 = svld4_s32 (p0, x0), ++ z0 = svld4 (p0, x0)) ++ ++/* ++** ld4_s32_index: ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0, x1, lsl 2\] ++** ret ++*/ ++TEST_LOAD (ld4_s32_index, svint32x4_t, int32_t, ++ z0 = svld4_s32 (p0, x0 + x1), ++ z0 = svld4 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_s32_1: ++** incb x0 ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_s32_1, svint32x4_t, int32_t, ++ z0 = svld4_s32 (p0, x0 + svcntw ()), ++ z0 = svld4 (p0, x0 + svcntw ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_s32_2: ++** incb x0, all, mul #2 ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_s32_2, svint32x4_t, int32_t, ++ z0 = svld4_s32 (p0, x0 + svcntw () * 2), ++ z0 = svld4 (p0, x0 + svcntw () * 2)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_s32_3: ++** incb x0, all, mul #3 ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_s32_3, svint32x4_t, int32_t, ++ z0 = svld4_s32 (p0, x0 + svcntw () * 3), ++ z0 = svld4 (p0, x0 + svcntw () * 3)) ++ ++/* ++** ld4_s32_4: ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0, #4, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_s32_4, svint32x4_t, int32_t, ++ z0 = svld4_s32 (p0, x0 + svcntw () * 4), ++ z0 = svld4 (p0, x0 + svcntw () * 4)) ++ ++/* ++** ld4_s32_28: ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0, #28, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_s32_28, svint32x4_t, int32_t, ++ z0 = svld4_s32 (p0, x0 + svcntw () * 28), ++ z0 = svld4 (p0, x0 + svcntw () * 28)) ++ ++/* ++** ld4_s32_32: ++** [^{]* ++** ld4w {z0\.s - z3\.s}, p0/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_LOAD (ld4_s32_32, svint32x4_t, int32_t, ++ z0 = svld4_s32 (p0, x0 + svcntw () * 32), ++ z0 = svld4 (p0, x0 + svcntw () * 32)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_s32_m1: ++** decb x0 ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_s32_m1, svint32x4_t, int32_t, ++ z0 = svld4_s32 (p0, x0 - svcntw ()), ++ z0 = svld4 (p0, x0 - svcntw ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_s32_m2: ++** decb x0, all, mul #2 ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_s32_m2, svint32x4_t, int32_t, ++ z0 = svld4_s32 (p0, x0 - svcntw () * 2), ++ z0 = svld4 (p0, x0 - svcntw () * 2)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_s32_m3: ++** decb x0, all, mul #3 ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_s32_m3, svint32x4_t, int32_t, ++ z0 = svld4_s32 (p0, x0 - svcntw () * 3), ++ z0 = svld4 (p0, x0 - svcntw () * 3)) ++ ++/* ++** ld4_s32_m4: ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0, #-4, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_s32_m4, svint32x4_t, int32_t, ++ z0 = svld4_s32 (p0, x0 - svcntw () * 4), ++ z0 = svld4 (p0, x0 - svcntw () * 4)) ++ ++/* ++** ld4_s32_m32: ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0, #-32, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_s32_m32, svint32x4_t, int32_t, ++ z0 = svld4_s32 (p0, x0 - svcntw () * 32), ++ z0 = svld4 (p0, x0 - svcntw () * 32)) ++ ++/* ++** ld4_s32_m36: ++** [^{]* ++** ld4w {z0\.s - z3\.s}, p0/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_LOAD (ld4_s32_m36, svint32x4_t, int32_t, ++ z0 = svld4_s32 (p0, x0 - svcntw () * 36), ++ z0 = svld4 (p0, x0 - svcntw () * 36)) ++ ++/* ++** ld4_vnum_s32_0: ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s32_0, svint32x4_t, int32_t, ++ z0 = svld4_vnum_s32 (p0, x0, 0), ++ z0 = svld4_vnum (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_s32_1: ++** incb x0 ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s32_1, svint32x4_t, int32_t, ++ z0 = svld4_vnum_s32 (p0, x0, 1), ++ z0 = svld4_vnum (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_s32_2: ++** incb x0, all, mul #2 ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s32_2, svint32x4_t, int32_t, ++ z0 = svld4_vnum_s32 (p0, x0, 2), ++ z0 = svld4_vnum (p0, x0, 2)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_s32_3: ++** incb x0, all, mul #3 ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s32_3, svint32x4_t, int32_t, ++ z0 = svld4_vnum_s32 (p0, x0, 3), ++ z0 = svld4_vnum (p0, x0, 3)) ++ ++/* ++** ld4_vnum_s32_4: ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0, #4, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s32_4, svint32x4_t, int32_t, ++ z0 = svld4_vnum_s32 (p0, x0, 4), ++ z0 = svld4_vnum (p0, x0, 4)) ++ ++/* ++** ld4_vnum_s32_28: ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0, #28, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s32_28, svint32x4_t, int32_t, ++ z0 = svld4_vnum_s32 (p0, x0, 28), ++ z0 = svld4_vnum (p0, x0, 28)) ++ ++/* ++** ld4_vnum_s32_32: ++** [^{]* ++** ld4w {z0\.s - z3\.s}, p0/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s32_32, svint32x4_t, int32_t, ++ z0 = svld4_vnum_s32 (p0, x0, 32), ++ z0 = svld4_vnum (p0, x0, 32)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_s32_m1: ++** decb x0 ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s32_m1, svint32x4_t, int32_t, ++ z0 = svld4_vnum_s32 (p0, x0, -1), ++ z0 = svld4_vnum (p0, x0, -1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_s32_m2: ++** decb x0, all, mul #2 ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s32_m2, svint32x4_t, int32_t, ++ z0 = svld4_vnum_s32 (p0, x0, -2), ++ z0 = svld4_vnum (p0, x0, -2)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_s32_m3: ++** decb x0, all, mul #3 ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s32_m3, svint32x4_t, int32_t, ++ z0 = svld4_vnum_s32 (p0, x0, -3), ++ z0 = svld4_vnum (p0, x0, -3)) ++ ++/* ++** ld4_vnum_s32_m4: ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0, #-4, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s32_m4, svint32x4_t, int32_t, ++ z0 = svld4_vnum_s32 (p0, x0, -4), ++ z0 = svld4_vnum (p0, x0, -4)) ++ ++/* ++** ld4_vnum_s32_m32: ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0, #-32, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s32_m32, svint32x4_t, int32_t, ++ z0 = svld4_vnum_s32 (p0, x0, -32), ++ z0 = svld4_vnum (p0, x0, -32)) ++ ++/* ++** ld4_vnum_s32_m36: ++** [^{]* ++** ld4w {z0\.s - z3\.s}, p0/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s32_m36, svint32x4_t, int32_t, ++ z0 = svld4_vnum_s32 (p0, x0, -36), ++ z0 = svld4_vnum (p0, x0, -36)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ld4_vnum_s32_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ld4w {z0\.s - z3\.s}, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s32_x1, svint32x4_t, int32_t, ++ z0 = svld4_vnum_s32 (p0, x0, x1), ++ z0 = svld4_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_s64.c +new file mode 100644 +index 000000000..d24c30dcf +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_s64.c +@@ -0,0 +1,286 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld4_s64_base: ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_s64_base, svint64x4_t, int64_t, ++ z0 = svld4_s64 (p0, x0), ++ z0 = svld4 (p0, x0)) ++ ++/* ++** ld4_s64_index: ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0, x1, lsl 3\] ++** ret ++*/ ++TEST_LOAD (ld4_s64_index, svint64x4_t, int64_t, ++ z0 = svld4_s64 (p0, x0 + x1), ++ z0 = svld4 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_s64_1: ++** incb x0 ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_s64_1, svint64x4_t, int64_t, ++ z0 = svld4_s64 (p0, x0 + svcntd ()), ++ z0 = svld4 (p0, x0 + svcntd ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_s64_2: ++** incb x0, all, mul #2 ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_s64_2, svint64x4_t, int64_t, ++ z0 = svld4_s64 (p0, x0 + svcntd () * 2), ++ z0 = svld4 (p0, x0 + svcntd () * 2)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_s64_3: ++** incb x0, all, mul #3 ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_s64_3, svint64x4_t, int64_t, ++ z0 = svld4_s64 (p0, x0 + svcntd () * 3), ++ z0 = svld4 (p0, x0 + svcntd () * 3)) ++ ++/* ++** ld4_s64_4: ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0, #4, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_s64_4, svint64x4_t, int64_t, ++ z0 = svld4_s64 (p0, x0 + svcntd () * 4), ++ z0 = svld4 (p0, x0 + svcntd () * 4)) ++ ++/* ++** ld4_s64_28: ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0, #28, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_s64_28, svint64x4_t, int64_t, ++ z0 = svld4_s64 (p0, x0 + svcntd () * 28), ++ z0 = svld4 (p0, x0 + svcntd () * 28)) ++ ++/* ++** ld4_s64_32: ++** [^{]* ++** ld4d {z0\.d - z3\.d}, p0/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_LOAD (ld4_s64_32, svint64x4_t, int64_t, ++ z0 = svld4_s64 (p0, x0 + svcntd () * 32), ++ z0 = svld4 (p0, x0 + svcntd () * 32)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_s64_m1: ++** decb x0 ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_s64_m1, svint64x4_t, int64_t, ++ z0 = svld4_s64 (p0, x0 - svcntd ()), ++ z0 = svld4 (p0, x0 - svcntd ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_s64_m2: ++** decb x0, all, mul #2 ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_s64_m2, svint64x4_t, int64_t, ++ z0 = svld4_s64 (p0, x0 - svcntd () * 2), ++ z0 = svld4 (p0, x0 - svcntd () * 2)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_s64_m3: ++** decb x0, all, mul #3 ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_s64_m3, svint64x4_t, int64_t, ++ z0 = svld4_s64 (p0, x0 - svcntd () * 3), ++ z0 = svld4 (p0, x0 - svcntd () * 3)) ++ ++/* ++** ld4_s64_m4: ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0, #-4, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_s64_m4, svint64x4_t, int64_t, ++ z0 = svld4_s64 (p0, x0 - svcntd () * 4), ++ z0 = svld4 (p0, x0 - svcntd () * 4)) ++ ++/* ++** ld4_s64_m32: ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0, #-32, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_s64_m32, svint64x4_t, int64_t, ++ z0 = svld4_s64 (p0, x0 - svcntd () * 32), ++ z0 = svld4 (p0, x0 - svcntd () * 32)) ++ ++/* ++** ld4_s64_m36: ++** [^{]* ++** ld4d {z0\.d - z3\.d}, p0/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_LOAD (ld4_s64_m36, svint64x4_t, int64_t, ++ z0 = svld4_s64 (p0, x0 - svcntd () * 36), ++ z0 = svld4 (p0, x0 - svcntd () * 36)) ++ ++/* ++** ld4_vnum_s64_0: ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s64_0, svint64x4_t, int64_t, ++ z0 = svld4_vnum_s64 (p0, x0, 0), ++ z0 = svld4_vnum (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_s64_1: ++** incb x0 ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s64_1, svint64x4_t, int64_t, ++ z0 = svld4_vnum_s64 (p0, x0, 1), ++ z0 = svld4_vnum (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_s64_2: ++** incb x0, all, mul #2 ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s64_2, svint64x4_t, int64_t, ++ z0 = svld4_vnum_s64 (p0, x0, 2), ++ z0 = svld4_vnum (p0, x0, 2)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_s64_3: ++** incb x0, all, mul #3 ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s64_3, svint64x4_t, int64_t, ++ z0 = svld4_vnum_s64 (p0, x0, 3), ++ z0 = svld4_vnum (p0, x0, 3)) ++ ++/* ++** ld4_vnum_s64_4: ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0, #4, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s64_4, svint64x4_t, int64_t, ++ z0 = svld4_vnum_s64 (p0, x0, 4), ++ z0 = svld4_vnum (p0, x0, 4)) ++ ++/* ++** ld4_vnum_s64_28: ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0, #28, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s64_28, svint64x4_t, int64_t, ++ z0 = svld4_vnum_s64 (p0, x0, 28), ++ z0 = svld4_vnum (p0, x0, 28)) ++ ++/* ++** ld4_vnum_s64_32: ++** [^{]* ++** ld4d {z0\.d - z3\.d}, p0/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s64_32, svint64x4_t, int64_t, ++ z0 = svld4_vnum_s64 (p0, x0, 32), ++ z0 = svld4_vnum (p0, x0, 32)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_s64_m1: ++** decb x0 ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s64_m1, svint64x4_t, int64_t, ++ z0 = svld4_vnum_s64 (p0, x0, -1), ++ z0 = svld4_vnum (p0, x0, -1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_s64_m2: ++** decb x0, all, mul #2 ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s64_m2, svint64x4_t, int64_t, ++ z0 = svld4_vnum_s64 (p0, x0, -2), ++ z0 = svld4_vnum (p0, x0, -2)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_s64_m3: ++** decb x0, all, mul #3 ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s64_m3, svint64x4_t, int64_t, ++ z0 = svld4_vnum_s64 (p0, x0, -3), ++ z0 = svld4_vnum (p0, x0, -3)) ++ ++/* ++** ld4_vnum_s64_m4: ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0, #-4, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s64_m4, svint64x4_t, int64_t, ++ z0 = svld4_vnum_s64 (p0, x0, -4), ++ z0 = svld4_vnum (p0, x0, -4)) ++ ++/* ++** ld4_vnum_s64_m32: ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0, #-32, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s64_m32, svint64x4_t, int64_t, ++ z0 = svld4_vnum_s64 (p0, x0, -32), ++ z0 = svld4_vnum (p0, x0, -32)) ++ ++/* ++** ld4_vnum_s64_m36: ++** [^{]* ++** ld4d {z0\.d - z3\.d}, p0/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s64_m36, svint64x4_t, int64_t, ++ z0 = svld4_vnum_s64 (p0, x0, -36), ++ z0 = svld4_vnum (p0, x0, -36)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ld4_vnum_s64_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ld4d {z0\.d - z3\.d}, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s64_x1, svint64x4_t, int64_t, ++ z0 = svld4_vnum_s64 (p0, x0, x1), ++ z0 = svld4_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_s8.c +new file mode 100644 +index 000000000..d7a17e266 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_s8.c +@@ -0,0 +1,290 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld4_s8_base: ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_s8_base, svint8x4_t, int8_t, ++ z0 = svld4_s8 (p0, x0), ++ z0 = svld4 (p0, x0)) ++ ++/* ++** ld4_s8_index: ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0, x1\] ++** ret ++*/ ++TEST_LOAD (ld4_s8_index, svint8x4_t, int8_t, ++ z0 = svld4_s8 (p0, x0 + x1), ++ z0 = svld4 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_s8_1: ++** incb x0 ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_s8_1, svint8x4_t, int8_t, ++ z0 = svld4_s8 (p0, x0 + svcntb ()), ++ z0 = svld4 (p0, x0 + svcntb ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_s8_2: ++** incb x0, all, mul #2 ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_s8_2, svint8x4_t, int8_t, ++ z0 = svld4_s8 (p0, x0 + svcntb () * 2), ++ z0 = svld4 (p0, x0 + svcntb () * 2)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_s8_3: ++** incb x0, all, mul #3 ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_s8_3, svint8x4_t, int8_t, ++ z0 = svld4_s8 (p0, x0 + svcntb () * 3), ++ z0 = svld4 (p0, x0 + svcntb () * 3)) ++ ++/* ++** ld4_s8_4: ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0, #4, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_s8_4, svint8x4_t, int8_t, ++ z0 = svld4_s8 (p0, x0 + svcntb () * 4), ++ z0 = svld4 (p0, x0 + svcntb () * 4)) ++ ++/* ++** ld4_s8_28: ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0, #28, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_s8_28, svint8x4_t, int8_t, ++ z0 = svld4_s8 (p0, x0 + svcntb () * 28), ++ z0 = svld4 (p0, x0 + svcntb () * 28)) ++ ++/* ++** ld4_s8_32: ++** [^{]* ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0, x[0-9]+\] ++** ret ++*/ ++TEST_LOAD (ld4_s8_32, svint8x4_t, int8_t, ++ z0 = svld4_s8 (p0, x0 + svcntb () * 32), ++ z0 = svld4 (p0, x0 + svcntb () * 32)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_s8_m1: ++** decb x0 ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_s8_m1, svint8x4_t, int8_t, ++ z0 = svld4_s8 (p0, x0 - svcntb ()), ++ z0 = svld4 (p0, x0 - svcntb ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_s8_m2: ++** decb x0, all, mul #2 ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_s8_m2, svint8x4_t, int8_t, ++ z0 = svld4_s8 (p0, x0 - svcntb () * 2), ++ z0 = svld4 (p0, x0 - svcntb () * 2)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_s8_m3: ++** decb x0, all, mul #3 ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_s8_m3, svint8x4_t, int8_t, ++ z0 = svld4_s8 (p0, x0 - svcntb () * 3), ++ z0 = svld4 (p0, x0 - svcntb () * 3)) ++ ++/* ++** ld4_s8_m4: ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0, #-4, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_s8_m4, svint8x4_t, int8_t, ++ z0 = svld4_s8 (p0, x0 - svcntb () * 4), ++ z0 = svld4 (p0, x0 - svcntb () * 4)) ++ ++/* ++** ld4_s8_m32: ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0, #-32, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_s8_m32, svint8x4_t, int8_t, ++ z0 = svld4_s8 (p0, x0 - svcntb () * 32), ++ z0 = svld4 (p0, x0 - svcntb () * 32)) ++ ++/* ++** ld4_s8_m36: ++** [^{]* ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0, x[0-9]+\] ++** ret ++*/ ++TEST_LOAD (ld4_s8_m36, svint8x4_t, int8_t, ++ z0 = svld4_s8 (p0, x0 - svcntb () * 36), ++ z0 = svld4 (p0, x0 - svcntb () * 36)) ++ ++/* ++** ld4_vnum_s8_0: ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s8_0, svint8x4_t, int8_t, ++ z0 = svld4_vnum_s8 (p0, x0, 0), ++ z0 = svld4_vnum (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_s8_1: ++** incb x0 ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s8_1, svint8x4_t, int8_t, ++ z0 = svld4_vnum_s8 (p0, x0, 1), ++ z0 = svld4_vnum (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_s8_2: ++** incb x0, all, mul #2 ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s8_2, svint8x4_t, int8_t, ++ z0 = svld4_vnum_s8 (p0, x0, 2), ++ z0 = svld4_vnum (p0, x0, 2)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_s8_3: ++** incb x0, all, mul #3 ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s8_3, svint8x4_t, int8_t, ++ z0 = svld4_vnum_s8 (p0, x0, 3), ++ z0 = svld4_vnum (p0, x0, 3)) ++ ++/* ++** ld4_vnum_s8_4: ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0, #4, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s8_4, svint8x4_t, int8_t, ++ z0 = svld4_vnum_s8 (p0, x0, 4), ++ z0 = svld4_vnum (p0, x0, 4)) ++ ++/* ++** ld4_vnum_s8_28: ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0, #28, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s8_28, svint8x4_t, int8_t, ++ z0 = svld4_vnum_s8 (p0, x0, 28), ++ z0 = svld4_vnum (p0, x0, 28)) ++ ++/* ++** ld4_vnum_s8_32: ++** [^{]* ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0, x[0-9]+\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s8_32, svint8x4_t, int8_t, ++ z0 = svld4_vnum_s8 (p0, x0, 32), ++ z0 = svld4_vnum (p0, x0, 32)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_s8_m1: ++** decb x0 ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s8_m1, svint8x4_t, int8_t, ++ z0 = svld4_vnum_s8 (p0, x0, -1), ++ z0 = svld4_vnum (p0, x0, -1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_s8_m2: ++** decb x0, all, mul #2 ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s8_m2, svint8x4_t, int8_t, ++ z0 = svld4_vnum_s8 (p0, x0, -2), ++ z0 = svld4_vnum (p0, x0, -2)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_s8_m3: ++** decb x0, all, mul #3 ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s8_m3, svint8x4_t, int8_t, ++ z0 = svld4_vnum_s8 (p0, x0, -3), ++ z0 = svld4_vnum (p0, x0, -3)) ++ ++/* ++** ld4_vnum_s8_m4: ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0, #-4, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s8_m4, svint8x4_t, int8_t, ++ z0 = svld4_vnum_s8 (p0, x0, -4), ++ z0 = svld4_vnum (p0, x0, -4)) ++ ++/* ++** ld4_vnum_s8_m32: ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0, #-32, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s8_m32, svint8x4_t, int8_t, ++ z0 = svld4_vnum_s8 (p0, x0, -32), ++ z0 = svld4_vnum (p0, x0, -32)) ++ ++/* ++** ld4_vnum_s8_m36: ++** [^{]* ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0, x[0-9]+\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s8_m36, svint8x4_t, int8_t, ++ z0 = svld4_vnum_s8 (p0, x0, -36), ++ z0 = svld4_vnum (p0, x0, -36)) ++ ++/* ++** ld4_vnum_s8_x1: ++** cntb (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ld4b {z0\.b - z3\.b}, p0/z, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_LOAD (ld4_vnum_s8_x1, svint8x4_t, int8_t, ++ z0 = svld4_vnum_s8 (p0, x0, x1), ++ z0 = svld4_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_u16.c +new file mode 100644 +index 000000000..234593d10 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_u16.c +@@ -0,0 +1,286 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld4_u16_base: ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_u16_base, svuint16x4_t, uint16_t, ++ z0 = svld4_u16 (p0, x0), ++ z0 = svld4 (p0, x0)) ++ ++/* ++** ld4_u16_index: ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_LOAD (ld4_u16_index, svuint16x4_t, uint16_t, ++ z0 = svld4_u16 (p0, x0 + x1), ++ z0 = svld4 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_u16_1: ++** incb x0 ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_u16_1, svuint16x4_t, uint16_t, ++ z0 = svld4_u16 (p0, x0 + svcnth ()), ++ z0 = svld4 (p0, x0 + svcnth ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_u16_2: ++** incb x0, all, mul #2 ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_u16_2, svuint16x4_t, uint16_t, ++ z0 = svld4_u16 (p0, x0 + svcnth () * 2), ++ z0 = svld4 (p0, x0 + svcnth () * 2)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_u16_3: ++** incb x0, all, mul #3 ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_u16_3, svuint16x4_t, uint16_t, ++ z0 = svld4_u16 (p0, x0 + svcnth () * 3), ++ z0 = svld4 (p0, x0 + svcnth () * 3)) ++ ++/* ++** ld4_u16_4: ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #4, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_u16_4, svuint16x4_t, uint16_t, ++ z0 = svld4_u16 (p0, x0 + svcnth () * 4), ++ z0 = svld4 (p0, x0 + svcnth () * 4)) ++ ++/* ++** ld4_u16_28: ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #28, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_u16_28, svuint16x4_t, uint16_t, ++ z0 = svld4_u16 (p0, x0 + svcnth () * 28), ++ z0 = svld4 (p0, x0 + svcnth () * 28)) ++ ++/* ++** ld4_u16_32: ++** [^{]* ++** ld4h {z0\.h - z3\.h}, p0/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_LOAD (ld4_u16_32, svuint16x4_t, uint16_t, ++ z0 = svld4_u16 (p0, x0 + svcnth () * 32), ++ z0 = svld4 (p0, x0 + svcnth () * 32)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_u16_m1: ++** decb x0 ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_u16_m1, svuint16x4_t, uint16_t, ++ z0 = svld4_u16 (p0, x0 - svcnth ()), ++ z0 = svld4 (p0, x0 - svcnth ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_u16_m2: ++** decb x0, all, mul #2 ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_u16_m2, svuint16x4_t, uint16_t, ++ z0 = svld4_u16 (p0, x0 - svcnth () * 2), ++ z0 = svld4 (p0, x0 - svcnth () * 2)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_u16_m3: ++** decb x0, all, mul #3 ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_u16_m3, svuint16x4_t, uint16_t, ++ z0 = svld4_u16 (p0, x0 - svcnth () * 3), ++ z0 = svld4 (p0, x0 - svcnth () * 3)) ++ ++/* ++** ld4_u16_m4: ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #-4, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_u16_m4, svuint16x4_t, uint16_t, ++ z0 = svld4_u16 (p0, x0 - svcnth () * 4), ++ z0 = svld4 (p0, x0 - svcnth () * 4)) ++ ++/* ++** ld4_u16_m32: ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #-32, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_u16_m32, svuint16x4_t, uint16_t, ++ z0 = svld4_u16 (p0, x0 - svcnth () * 32), ++ z0 = svld4 (p0, x0 - svcnth () * 32)) ++ ++/* ++** ld4_u16_m36: ++** [^{]* ++** ld4h {z0\.h - z3\.h}, p0/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_LOAD (ld4_u16_m36, svuint16x4_t, uint16_t, ++ z0 = svld4_u16 (p0, x0 - svcnth () * 36), ++ z0 = svld4 (p0, x0 - svcnth () * 36)) ++ ++/* ++** ld4_vnum_u16_0: ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u16_0, svuint16x4_t, uint16_t, ++ z0 = svld4_vnum_u16 (p0, x0, 0), ++ z0 = svld4_vnum (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_u16_1: ++** incb x0 ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u16_1, svuint16x4_t, uint16_t, ++ z0 = svld4_vnum_u16 (p0, x0, 1), ++ z0 = svld4_vnum (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_u16_2: ++** incb x0, all, mul #2 ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u16_2, svuint16x4_t, uint16_t, ++ z0 = svld4_vnum_u16 (p0, x0, 2), ++ z0 = svld4_vnum (p0, x0, 2)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_u16_3: ++** incb x0, all, mul #3 ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u16_3, svuint16x4_t, uint16_t, ++ z0 = svld4_vnum_u16 (p0, x0, 3), ++ z0 = svld4_vnum (p0, x0, 3)) ++ ++/* ++** ld4_vnum_u16_4: ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #4, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u16_4, svuint16x4_t, uint16_t, ++ z0 = svld4_vnum_u16 (p0, x0, 4), ++ z0 = svld4_vnum (p0, x0, 4)) ++ ++/* ++** ld4_vnum_u16_28: ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #28, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u16_28, svuint16x4_t, uint16_t, ++ z0 = svld4_vnum_u16 (p0, x0, 28), ++ z0 = svld4_vnum (p0, x0, 28)) ++ ++/* ++** ld4_vnum_u16_32: ++** [^{]* ++** ld4h {z0\.h - z3\.h}, p0/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u16_32, svuint16x4_t, uint16_t, ++ z0 = svld4_vnum_u16 (p0, x0, 32), ++ z0 = svld4_vnum (p0, x0, 32)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_u16_m1: ++** decb x0 ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u16_m1, svuint16x4_t, uint16_t, ++ z0 = svld4_vnum_u16 (p0, x0, -1), ++ z0 = svld4_vnum (p0, x0, -1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_u16_m2: ++** decb x0, all, mul #2 ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u16_m2, svuint16x4_t, uint16_t, ++ z0 = svld4_vnum_u16 (p0, x0, -2), ++ z0 = svld4_vnum (p0, x0, -2)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_u16_m3: ++** decb x0, all, mul #3 ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u16_m3, svuint16x4_t, uint16_t, ++ z0 = svld4_vnum_u16 (p0, x0, -3), ++ z0 = svld4_vnum (p0, x0, -3)) ++ ++/* ++** ld4_vnum_u16_m4: ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #-4, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u16_m4, svuint16x4_t, uint16_t, ++ z0 = svld4_vnum_u16 (p0, x0, -4), ++ z0 = svld4_vnum (p0, x0, -4)) ++ ++/* ++** ld4_vnum_u16_m32: ++** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #-32, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u16_m32, svuint16x4_t, uint16_t, ++ z0 = svld4_vnum_u16 (p0, x0, -32), ++ z0 = svld4_vnum (p0, x0, -32)) ++ ++/* ++** ld4_vnum_u16_m36: ++** [^{]* ++** ld4h {z0\.h - z3\.h}, p0/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u16_m36, svuint16x4_t, uint16_t, ++ z0 = svld4_vnum_u16 (p0, x0, -36), ++ z0 = svld4_vnum (p0, x0, -36)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ld4_vnum_u16_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ld4h {z0\.h - z3\.h}, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u16_x1, svuint16x4_t, uint16_t, ++ z0 = svld4_vnum_u16 (p0, x0, x1), ++ z0 = svld4_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_u32.c +new file mode 100644 +index 000000000..ad2627800 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_u32.c +@@ -0,0 +1,286 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld4_u32_base: ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_u32_base, svuint32x4_t, uint32_t, ++ z0 = svld4_u32 (p0, x0), ++ z0 = svld4 (p0, x0)) ++ ++/* ++** ld4_u32_index: ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0, x1, lsl 2\] ++** ret ++*/ ++TEST_LOAD (ld4_u32_index, svuint32x4_t, uint32_t, ++ z0 = svld4_u32 (p0, x0 + x1), ++ z0 = svld4 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_u32_1: ++** incb x0 ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_u32_1, svuint32x4_t, uint32_t, ++ z0 = svld4_u32 (p0, x0 + svcntw ()), ++ z0 = svld4 (p0, x0 + svcntw ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_u32_2: ++** incb x0, all, mul #2 ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_u32_2, svuint32x4_t, uint32_t, ++ z0 = svld4_u32 (p0, x0 + svcntw () * 2), ++ z0 = svld4 (p0, x0 + svcntw () * 2)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_u32_3: ++** incb x0, all, mul #3 ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_u32_3, svuint32x4_t, uint32_t, ++ z0 = svld4_u32 (p0, x0 + svcntw () * 3), ++ z0 = svld4 (p0, x0 + svcntw () * 3)) ++ ++/* ++** ld4_u32_4: ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0, #4, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_u32_4, svuint32x4_t, uint32_t, ++ z0 = svld4_u32 (p0, x0 + svcntw () * 4), ++ z0 = svld4 (p0, x0 + svcntw () * 4)) ++ ++/* ++** ld4_u32_28: ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0, #28, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_u32_28, svuint32x4_t, uint32_t, ++ z0 = svld4_u32 (p0, x0 + svcntw () * 28), ++ z0 = svld4 (p0, x0 + svcntw () * 28)) ++ ++/* ++** ld4_u32_32: ++** [^{]* ++** ld4w {z0\.s - z3\.s}, p0/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_LOAD (ld4_u32_32, svuint32x4_t, uint32_t, ++ z0 = svld4_u32 (p0, x0 + svcntw () * 32), ++ z0 = svld4 (p0, x0 + svcntw () * 32)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_u32_m1: ++** decb x0 ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_u32_m1, svuint32x4_t, uint32_t, ++ z0 = svld4_u32 (p0, x0 - svcntw ()), ++ z0 = svld4 (p0, x0 - svcntw ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_u32_m2: ++** decb x0, all, mul #2 ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_u32_m2, svuint32x4_t, uint32_t, ++ z0 = svld4_u32 (p0, x0 - svcntw () * 2), ++ z0 = svld4 (p0, x0 - svcntw () * 2)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_u32_m3: ++** decb x0, all, mul #3 ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_u32_m3, svuint32x4_t, uint32_t, ++ z0 = svld4_u32 (p0, x0 - svcntw () * 3), ++ z0 = svld4 (p0, x0 - svcntw () * 3)) ++ ++/* ++** ld4_u32_m4: ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0, #-4, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_u32_m4, svuint32x4_t, uint32_t, ++ z0 = svld4_u32 (p0, x0 - svcntw () * 4), ++ z0 = svld4 (p0, x0 - svcntw () * 4)) ++ ++/* ++** ld4_u32_m32: ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0, #-32, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_u32_m32, svuint32x4_t, uint32_t, ++ z0 = svld4_u32 (p0, x0 - svcntw () * 32), ++ z0 = svld4 (p0, x0 - svcntw () * 32)) ++ ++/* ++** ld4_u32_m36: ++** [^{]* ++** ld4w {z0\.s - z3\.s}, p0/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_LOAD (ld4_u32_m36, svuint32x4_t, uint32_t, ++ z0 = svld4_u32 (p0, x0 - svcntw () * 36), ++ z0 = svld4 (p0, x0 - svcntw () * 36)) ++ ++/* ++** ld4_vnum_u32_0: ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u32_0, svuint32x4_t, uint32_t, ++ z0 = svld4_vnum_u32 (p0, x0, 0), ++ z0 = svld4_vnum (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_u32_1: ++** incb x0 ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u32_1, svuint32x4_t, uint32_t, ++ z0 = svld4_vnum_u32 (p0, x0, 1), ++ z0 = svld4_vnum (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_u32_2: ++** incb x0, all, mul #2 ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u32_2, svuint32x4_t, uint32_t, ++ z0 = svld4_vnum_u32 (p0, x0, 2), ++ z0 = svld4_vnum (p0, x0, 2)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_u32_3: ++** incb x0, all, mul #3 ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u32_3, svuint32x4_t, uint32_t, ++ z0 = svld4_vnum_u32 (p0, x0, 3), ++ z0 = svld4_vnum (p0, x0, 3)) ++ ++/* ++** ld4_vnum_u32_4: ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0, #4, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u32_4, svuint32x4_t, uint32_t, ++ z0 = svld4_vnum_u32 (p0, x0, 4), ++ z0 = svld4_vnum (p0, x0, 4)) ++ ++/* ++** ld4_vnum_u32_28: ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0, #28, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u32_28, svuint32x4_t, uint32_t, ++ z0 = svld4_vnum_u32 (p0, x0, 28), ++ z0 = svld4_vnum (p0, x0, 28)) ++ ++/* ++** ld4_vnum_u32_32: ++** [^{]* ++** ld4w {z0\.s - z3\.s}, p0/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u32_32, svuint32x4_t, uint32_t, ++ z0 = svld4_vnum_u32 (p0, x0, 32), ++ z0 = svld4_vnum (p0, x0, 32)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_u32_m1: ++** decb x0 ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u32_m1, svuint32x4_t, uint32_t, ++ z0 = svld4_vnum_u32 (p0, x0, -1), ++ z0 = svld4_vnum (p0, x0, -1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_u32_m2: ++** decb x0, all, mul #2 ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u32_m2, svuint32x4_t, uint32_t, ++ z0 = svld4_vnum_u32 (p0, x0, -2), ++ z0 = svld4_vnum (p0, x0, -2)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_u32_m3: ++** decb x0, all, mul #3 ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u32_m3, svuint32x4_t, uint32_t, ++ z0 = svld4_vnum_u32 (p0, x0, -3), ++ z0 = svld4_vnum (p0, x0, -3)) ++ ++/* ++** ld4_vnum_u32_m4: ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0, #-4, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u32_m4, svuint32x4_t, uint32_t, ++ z0 = svld4_vnum_u32 (p0, x0, -4), ++ z0 = svld4_vnum (p0, x0, -4)) ++ ++/* ++** ld4_vnum_u32_m32: ++** ld4w {z0\.s - z3\.s}, p0/z, \[x0, #-32, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u32_m32, svuint32x4_t, uint32_t, ++ z0 = svld4_vnum_u32 (p0, x0, -32), ++ z0 = svld4_vnum (p0, x0, -32)) ++ ++/* ++** ld4_vnum_u32_m36: ++** [^{]* ++** ld4w {z0\.s - z3\.s}, p0/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u32_m36, svuint32x4_t, uint32_t, ++ z0 = svld4_vnum_u32 (p0, x0, -36), ++ z0 = svld4_vnum (p0, x0, -36)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ld4_vnum_u32_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ld4w {z0\.s - z3\.s}, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u32_x1, svuint32x4_t, uint32_t, ++ z0 = svld4_vnum_u32 (p0, x0, x1), ++ z0 = svld4_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_u64.c +new file mode 100644 +index 000000000..8772ba42d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_u64.c +@@ -0,0 +1,286 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld4_u64_base: ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_u64_base, svuint64x4_t, uint64_t, ++ z0 = svld4_u64 (p0, x0), ++ z0 = svld4 (p0, x0)) ++ ++/* ++** ld4_u64_index: ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0, x1, lsl 3\] ++** ret ++*/ ++TEST_LOAD (ld4_u64_index, svuint64x4_t, uint64_t, ++ z0 = svld4_u64 (p0, x0 + x1), ++ z0 = svld4 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_u64_1: ++** incb x0 ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_u64_1, svuint64x4_t, uint64_t, ++ z0 = svld4_u64 (p0, x0 + svcntd ()), ++ z0 = svld4 (p0, x0 + svcntd ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_u64_2: ++** incb x0, all, mul #2 ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_u64_2, svuint64x4_t, uint64_t, ++ z0 = svld4_u64 (p0, x0 + svcntd () * 2), ++ z0 = svld4 (p0, x0 + svcntd () * 2)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_u64_3: ++** incb x0, all, mul #3 ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_u64_3, svuint64x4_t, uint64_t, ++ z0 = svld4_u64 (p0, x0 + svcntd () * 3), ++ z0 = svld4 (p0, x0 + svcntd () * 3)) ++ ++/* ++** ld4_u64_4: ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0, #4, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_u64_4, svuint64x4_t, uint64_t, ++ z0 = svld4_u64 (p0, x0 + svcntd () * 4), ++ z0 = svld4 (p0, x0 + svcntd () * 4)) ++ ++/* ++** ld4_u64_28: ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0, #28, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_u64_28, svuint64x4_t, uint64_t, ++ z0 = svld4_u64 (p0, x0 + svcntd () * 28), ++ z0 = svld4 (p0, x0 + svcntd () * 28)) ++ ++/* ++** ld4_u64_32: ++** [^{]* ++** ld4d {z0\.d - z3\.d}, p0/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_LOAD (ld4_u64_32, svuint64x4_t, uint64_t, ++ z0 = svld4_u64 (p0, x0 + svcntd () * 32), ++ z0 = svld4 (p0, x0 + svcntd () * 32)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_u64_m1: ++** decb x0 ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_u64_m1, svuint64x4_t, uint64_t, ++ z0 = svld4_u64 (p0, x0 - svcntd ()), ++ z0 = svld4 (p0, x0 - svcntd ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_u64_m2: ++** decb x0, all, mul #2 ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_u64_m2, svuint64x4_t, uint64_t, ++ z0 = svld4_u64 (p0, x0 - svcntd () * 2), ++ z0 = svld4 (p0, x0 - svcntd () * 2)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_u64_m3: ++** decb x0, all, mul #3 ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_u64_m3, svuint64x4_t, uint64_t, ++ z0 = svld4_u64 (p0, x0 - svcntd () * 3), ++ z0 = svld4 (p0, x0 - svcntd () * 3)) ++ ++/* ++** ld4_u64_m4: ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0, #-4, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_u64_m4, svuint64x4_t, uint64_t, ++ z0 = svld4_u64 (p0, x0 - svcntd () * 4), ++ z0 = svld4 (p0, x0 - svcntd () * 4)) ++ ++/* ++** ld4_u64_m32: ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0, #-32, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_u64_m32, svuint64x4_t, uint64_t, ++ z0 = svld4_u64 (p0, x0 - svcntd () * 32), ++ z0 = svld4 (p0, x0 - svcntd () * 32)) ++ ++/* ++** ld4_u64_m36: ++** [^{]* ++** ld4d {z0\.d - z3\.d}, p0/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_LOAD (ld4_u64_m36, svuint64x4_t, uint64_t, ++ z0 = svld4_u64 (p0, x0 - svcntd () * 36), ++ z0 = svld4 (p0, x0 - svcntd () * 36)) ++ ++/* ++** ld4_vnum_u64_0: ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u64_0, svuint64x4_t, uint64_t, ++ z0 = svld4_vnum_u64 (p0, x0, 0), ++ z0 = svld4_vnum (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_u64_1: ++** incb x0 ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u64_1, svuint64x4_t, uint64_t, ++ z0 = svld4_vnum_u64 (p0, x0, 1), ++ z0 = svld4_vnum (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_u64_2: ++** incb x0, all, mul #2 ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u64_2, svuint64x4_t, uint64_t, ++ z0 = svld4_vnum_u64 (p0, x0, 2), ++ z0 = svld4_vnum (p0, x0, 2)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_u64_3: ++** incb x0, all, mul #3 ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u64_3, svuint64x4_t, uint64_t, ++ z0 = svld4_vnum_u64 (p0, x0, 3), ++ z0 = svld4_vnum (p0, x0, 3)) ++ ++/* ++** ld4_vnum_u64_4: ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0, #4, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u64_4, svuint64x4_t, uint64_t, ++ z0 = svld4_vnum_u64 (p0, x0, 4), ++ z0 = svld4_vnum (p0, x0, 4)) ++ ++/* ++** ld4_vnum_u64_28: ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0, #28, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u64_28, svuint64x4_t, uint64_t, ++ z0 = svld4_vnum_u64 (p0, x0, 28), ++ z0 = svld4_vnum (p0, x0, 28)) ++ ++/* ++** ld4_vnum_u64_32: ++** [^{]* ++** ld4d {z0\.d - z3\.d}, p0/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u64_32, svuint64x4_t, uint64_t, ++ z0 = svld4_vnum_u64 (p0, x0, 32), ++ z0 = svld4_vnum (p0, x0, 32)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_u64_m1: ++** decb x0 ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u64_m1, svuint64x4_t, uint64_t, ++ z0 = svld4_vnum_u64 (p0, x0, -1), ++ z0 = svld4_vnum (p0, x0, -1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_u64_m2: ++** decb x0, all, mul #2 ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u64_m2, svuint64x4_t, uint64_t, ++ z0 = svld4_vnum_u64 (p0, x0, -2), ++ z0 = svld4_vnum (p0, x0, -2)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_u64_m3: ++** decb x0, all, mul #3 ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u64_m3, svuint64x4_t, uint64_t, ++ z0 = svld4_vnum_u64 (p0, x0, -3), ++ z0 = svld4_vnum (p0, x0, -3)) ++ ++/* ++** ld4_vnum_u64_m4: ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0, #-4, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u64_m4, svuint64x4_t, uint64_t, ++ z0 = svld4_vnum_u64 (p0, x0, -4), ++ z0 = svld4_vnum (p0, x0, -4)) ++ ++/* ++** ld4_vnum_u64_m32: ++** ld4d {z0\.d - z3\.d}, p0/z, \[x0, #-32, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u64_m32, svuint64x4_t, uint64_t, ++ z0 = svld4_vnum_u64 (p0, x0, -32), ++ z0 = svld4_vnum (p0, x0, -32)) ++ ++/* ++** ld4_vnum_u64_m36: ++** [^{]* ++** ld4d {z0\.d - z3\.d}, p0/z, \[x[0-9]+\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u64_m36, svuint64x4_t, uint64_t, ++ z0 = svld4_vnum_u64 (p0, x0, -36), ++ z0 = svld4_vnum (p0, x0, -36)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ld4_vnum_u64_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ld4d {z0\.d - z3\.d}, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u64_x1, svuint64x4_t, uint64_t, ++ z0 = svld4_vnum_u64 (p0, x0, x1), ++ z0 = svld4_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_u8.c +new file mode 100644 +index 000000000..85b2987ce +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_u8.c +@@ -0,0 +1,290 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ld4_u8_base: ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_u8_base, svuint8x4_t, uint8_t, ++ z0 = svld4_u8 (p0, x0), ++ z0 = svld4 (p0, x0)) ++ ++/* ++** ld4_u8_index: ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0, x1\] ++** ret ++*/ ++TEST_LOAD (ld4_u8_index, svuint8x4_t, uint8_t, ++ z0 = svld4_u8 (p0, x0 + x1), ++ z0 = svld4 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_u8_1: ++** incb x0 ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_u8_1, svuint8x4_t, uint8_t, ++ z0 = svld4_u8 (p0, x0 + svcntb ()), ++ z0 = svld4 (p0, x0 + svcntb ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_u8_2: ++** incb x0, all, mul #2 ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_u8_2, svuint8x4_t, uint8_t, ++ z0 = svld4_u8 (p0, x0 + svcntb () * 2), ++ z0 = svld4 (p0, x0 + svcntb () * 2)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_u8_3: ++** incb x0, all, mul #3 ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_u8_3, svuint8x4_t, uint8_t, ++ z0 = svld4_u8 (p0, x0 + svcntb () * 3), ++ z0 = svld4 (p0, x0 + svcntb () * 3)) ++ ++/* ++** ld4_u8_4: ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0, #4, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_u8_4, svuint8x4_t, uint8_t, ++ z0 = svld4_u8 (p0, x0 + svcntb () * 4), ++ z0 = svld4 (p0, x0 + svcntb () * 4)) ++ ++/* ++** ld4_u8_28: ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0, #28, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_u8_28, svuint8x4_t, uint8_t, ++ z0 = svld4_u8 (p0, x0 + svcntb () * 28), ++ z0 = svld4 (p0, x0 + svcntb () * 28)) ++ ++/* ++** ld4_u8_32: ++** [^{]* ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0, x[0-9]+\] ++** ret ++*/ ++TEST_LOAD (ld4_u8_32, svuint8x4_t, uint8_t, ++ z0 = svld4_u8 (p0, x0 + svcntb () * 32), ++ z0 = svld4 (p0, x0 + svcntb () * 32)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_u8_m1: ++** decb x0 ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_u8_m1, svuint8x4_t, uint8_t, ++ z0 = svld4_u8 (p0, x0 - svcntb ()), ++ z0 = svld4 (p0, x0 - svcntb ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_u8_m2: ++** decb x0, all, mul #2 ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_u8_m2, svuint8x4_t, uint8_t, ++ z0 = svld4_u8 (p0, x0 - svcntb () * 2), ++ z0 = svld4 (p0, x0 - svcntb () * 2)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_u8_m3: ++** decb x0, all, mul #3 ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_u8_m3, svuint8x4_t, uint8_t, ++ z0 = svld4_u8 (p0, x0 - svcntb () * 3), ++ z0 = svld4 (p0, x0 - svcntb () * 3)) ++ ++/* ++** ld4_u8_m4: ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0, #-4, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_u8_m4, svuint8x4_t, uint8_t, ++ z0 = svld4_u8 (p0, x0 - svcntb () * 4), ++ z0 = svld4 (p0, x0 - svcntb () * 4)) ++ ++/* ++** ld4_u8_m32: ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0, #-32, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_u8_m32, svuint8x4_t, uint8_t, ++ z0 = svld4_u8 (p0, x0 - svcntb () * 32), ++ z0 = svld4 (p0, x0 - svcntb () * 32)) ++ ++/* ++** ld4_u8_m36: ++** [^{]* ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0, x[0-9]+\] ++** ret ++*/ ++TEST_LOAD (ld4_u8_m36, svuint8x4_t, uint8_t, ++ z0 = svld4_u8 (p0, x0 - svcntb () * 36), ++ z0 = svld4 (p0, x0 - svcntb () * 36)) ++ ++/* ++** ld4_vnum_u8_0: ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u8_0, svuint8x4_t, uint8_t, ++ z0 = svld4_vnum_u8 (p0, x0, 0), ++ z0 = svld4_vnum (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_u8_1: ++** incb x0 ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u8_1, svuint8x4_t, uint8_t, ++ z0 = svld4_vnum_u8 (p0, x0, 1), ++ z0 = svld4_vnum (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_u8_2: ++** incb x0, all, mul #2 ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u8_2, svuint8x4_t, uint8_t, ++ z0 = svld4_vnum_u8 (p0, x0, 2), ++ z0 = svld4_vnum (p0, x0, 2)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_u8_3: ++** incb x0, all, mul #3 ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u8_3, svuint8x4_t, uint8_t, ++ z0 = svld4_vnum_u8 (p0, x0, 3), ++ z0 = svld4_vnum (p0, x0, 3)) ++ ++/* ++** ld4_vnum_u8_4: ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0, #4, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u8_4, svuint8x4_t, uint8_t, ++ z0 = svld4_vnum_u8 (p0, x0, 4), ++ z0 = svld4_vnum (p0, x0, 4)) ++ ++/* ++** ld4_vnum_u8_28: ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0, #28, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u8_28, svuint8x4_t, uint8_t, ++ z0 = svld4_vnum_u8 (p0, x0, 28), ++ z0 = svld4_vnum (p0, x0, 28)) ++ ++/* ++** ld4_vnum_u8_32: ++** [^{]* ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0, x[0-9]+\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u8_32, svuint8x4_t, uint8_t, ++ z0 = svld4_vnum_u8 (p0, x0, 32), ++ z0 = svld4_vnum (p0, x0, 32)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_u8_m1: ++** decb x0 ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u8_m1, svuint8x4_t, uint8_t, ++ z0 = svld4_vnum_u8 (p0, x0, -1), ++ z0 = svld4_vnum (p0, x0, -1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_u8_m2: ++** decb x0, all, mul #2 ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u8_m2, svuint8x4_t, uint8_t, ++ z0 = svld4_vnum_u8 (p0, x0, -2), ++ z0 = svld4_vnum (p0, x0, -2)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ld4_vnum_u8_m3: ++** decb x0, all, mul #3 ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u8_m3, svuint8x4_t, uint8_t, ++ z0 = svld4_vnum_u8 (p0, x0, -3), ++ z0 = svld4_vnum (p0, x0, -3)) ++ ++/* ++** ld4_vnum_u8_m4: ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0, #-4, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u8_m4, svuint8x4_t, uint8_t, ++ z0 = svld4_vnum_u8 (p0, x0, -4), ++ z0 = svld4_vnum (p0, x0, -4)) ++ ++/* ++** ld4_vnum_u8_m32: ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0, #-32, mul vl\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u8_m32, svuint8x4_t, uint8_t, ++ z0 = svld4_vnum_u8 (p0, x0, -32), ++ z0 = svld4_vnum (p0, x0, -32)) ++ ++/* ++** ld4_vnum_u8_m36: ++** [^{]* ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0, x[0-9]+\] ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u8_m36, svuint8x4_t, uint8_t, ++ z0 = svld4_vnum_u8 (p0, x0, -36), ++ z0 = svld4_vnum (p0, x0, -36)) ++ ++/* ++** ld4_vnum_u8_x1: ++** cntb (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ld4b {z0\.b - z3\.b}, p0/z, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** ld4b {z0\.b - z3\.b}, p0/z, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_LOAD (ld4_vnum_u8_x1, svuint8x4_t, uint8_t, ++ z0 = svld4_vnum_u8 (p0, x0, x1), ++ z0 = svld4_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_bf16.c +new file mode 100644 +index 000000000..80f646870 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_bf16.c +@@ -0,0 +1,86 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1_bf16_base: ++** ldff1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_bf16_base, svbfloat16_t, bfloat16_t, ++ z0 = svldff1_bf16 (p0, x0), ++ z0 = svldff1 (p0, x0)) ++ ++/* ++** ldff1_bf16_index: ++** ldff1h z0\.h, p0/z, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_LOAD (ldff1_bf16_index, svbfloat16_t, bfloat16_t, ++ z0 = svldff1_bf16 (p0, x0 + x1), ++ z0 = svldff1 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1_bf16_1: ++** incb x0 ++** ldff1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_bf16_1, svbfloat16_t, bfloat16_t, ++ z0 = svldff1_bf16 (p0, x0 + svcnth ()), ++ z0 = svldff1 (p0, x0 + svcnth ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1_bf16_m1: ++** decb x0 ++** ldff1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_bf16_m1, svbfloat16_t, bfloat16_t, ++ z0 = svldff1_bf16 (p0, x0 - svcnth ()), ++ z0 = svldff1 (p0, x0 - svcnth ())) ++ ++/* ++** ldff1_vnum_bf16_0: ++** ldff1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_vnum_bf16_0, svbfloat16_t, bfloat16_t, ++ z0 = svldff1_vnum_bf16 (p0, x0, 0), ++ z0 = svldff1_vnum (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1_vnum_bf16_1: ++** incb x0 ++** ldff1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_vnum_bf16_1, svbfloat16_t, bfloat16_t, ++ z0 = svldff1_vnum_bf16 (p0, x0, 1), ++ z0 = svldff1_vnum (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1_vnum_bf16_m1: ++** decb x0 ++** ldff1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_vnum_bf16_m1, svbfloat16_t, bfloat16_t, ++ z0 = svldff1_vnum_bf16 (p0, x0, -1), ++ z0 = svldff1_vnum (p0, x0, -1)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ldff1_vnum_bf16_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ldff1h z0\.h, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldff1_vnum_bf16_x1, svbfloat16_t, bfloat16_t, ++ z0 = svldff1_vnum_bf16 (p0, x0, x1), ++ z0 = svldff1_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_f16.c +new file mode 100644 +index 000000000..13ce863c9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_f16.c +@@ -0,0 +1,86 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1_f16_base: ++** ldff1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_f16_base, svfloat16_t, float16_t, ++ z0 = svldff1_f16 (p0, x0), ++ z0 = svldff1 (p0, x0)) ++ ++/* ++** ldff1_f16_index: ++** ldff1h z0\.h, p0/z, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_LOAD (ldff1_f16_index, svfloat16_t, float16_t, ++ z0 = svldff1_f16 (p0, x0 + x1), ++ z0 = svldff1 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1_f16_1: ++** incb x0 ++** ldff1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_f16_1, svfloat16_t, float16_t, ++ z0 = svldff1_f16 (p0, x0 + svcnth ()), ++ z0 = svldff1 (p0, x0 + svcnth ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1_f16_m1: ++** decb x0 ++** ldff1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_f16_m1, svfloat16_t, float16_t, ++ z0 = svldff1_f16 (p0, x0 - svcnth ()), ++ z0 = svldff1 (p0, x0 - svcnth ())) ++ ++/* ++** ldff1_vnum_f16_0: ++** ldff1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_vnum_f16_0, svfloat16_t, float16_t, ++ z0 = svldff1_vnum_f16 (p0, x0, 0), ++ z0 = svldff1_vnum (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1_vnum_f16_1: ++** incb x0 ++** ldff1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_vnum_f16_1, svfloat16_t, float16_t, ++ z0 = svldff1_vnum_f16 (p0, x0, 1), ++ z0 = svldff1_vnum (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1_vnum_f16_m1: ++** decb x0 ++** ldff1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_vnum_f16_m1, svfloat16_t, float16_t, ++ z0 = svldff1_vnum_f16 (p0, x0, -1), ++ z0 = svldff1_vnum (p0, x0, -1)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ldff1_vnum_f16_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ldff1h z0\.h, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldff1_vnum_f16_x1, svfloat16_t, float16_t, ++ z0 = svldff1_vnum_f16 (p0, x0, x1), ++ z0 = svldff1_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_f32.c +new file mode 100644 +index 000000000..2fcc63390 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_f32.c +@@ -0,0 +1,86 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1_f32_base: ++** ldff1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_f32_base, svfloat32_t, float32_t, ++ z0 = svldff1_f32 (p0, x0), ++ z0 = svldff1 (p0, x0)) ++ ++/* ++** ldff1_f32_index: ++** ldff1w z0\.s, p0/z, \[x0, x1, lsl 2\] ++** ret ++*/ ++TEST_LOAD (ldff1_f32_index, svfloat32_t, float32_t, ++ z0 = svldff1_f32 (p0, x0 + x1), ++ z0 = svldff1 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1_f32_1: ++** incb x0 ++** ldff1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_f32_1, svfloat32_t, float32_t, ++ z0 = svldff1_f32 (p0, x0 + svcntw ()), ++ z0 = svldff1 (p0, x0 + svcntw ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1_f32_m1: ++** decb x0 ++** ldff1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_f32_m1, svfloat32_t, float32_t, ++ z0 = svldff1_f32 (p0, x0 - svcntw ()), ++ z0 = svldff1 (p0, x0 - svcntw ())) ++ ++/* ++** ldff1_vnum_f32_0: ++** ldff1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_vnum_f32_0, svfloat32_t, float32_t, ++ z0 = svldff1_vnum_f32 (p0, x0, 0), ++ z0 = svldff1_vnum (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1_vnum_f32_1: ++** incb x0 ++** ldff1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_vnum_f32_1, svfloat32_t, float32_t, ++ z0 = svldff1_vnum_f32 (p0, x0, 1), ++ z0 = svldff1_vnum (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1_vnum_f32_m1: ++** decb x0 ++** ldff1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_vnum_f32_m1, svfloat32_t, float32_t, ++ z0 = svldff1_vnum_f32 (p0, x0, -1), ++ z0 = svldff1_vnum (p0, x0, -1)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ldff1_vnum_f32_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ldff1w z0\.s, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldff1_vnum_f32_x1, svfloat32_t, float32_t, ++ z0 = svldff1_vnum_f32 (p0, x0, x1), ++ z0 = svldff1_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_f64.c +new file mode 100644 +index 000000000..cc15b927a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_f64.c +@@ -0,0 +1,86 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1_f64_base: ++** ldff1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_f64_base, svfloat64_t, float64_t, ++ z0 = svldff1_f64 (p0, x0), ++ z0 = svldff1 (p0, x0)) ++ ++/* ++** ldff1_f64_index: ++** ldff1d z0\.d, p0/z, \[x0, x1, lsl 3\] ++** ret ++*/ ++TEST_LOAD (ldff1_f64_index, svfloat64_t, float64_t, ++ z0 = svldff1_f64 (p0, x0 + x1), ++ z0 = svldff1 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1_f64_1: ++** incb x0 ++** ldff1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_f64_1, svfloat64_t, float64_t, ++ z0 = svldff1_f64 (p0, x0 + svcntd ()), ++ z0 = svldff1 (p0, x0 + svcntd ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1_f64_m1: ++** decb x0 ++** ldff1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_f64_m1, svfloat64_t, float64_t, ++ z0 = svldff1_f64 (p0, x0 - svcntd ()), ++ z0 = svldff1 (p0, x0 - svcntd ())) ++ ++/* ++** ldff1_vnum_f64_0: ++** ldff1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_vnum_f64_0, svfloat64_t, float64_t, ++ z0 = svldff1_vnum_f64 (p0, x0, 0), ++ z0 = svldff1_vnum (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1_vnum_f64_1: ++** incb x0 ++** ldff1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_vnum_f64_1, svfloat64_t, float64_t, ++ z0 = svldff1_vnum_f64 (p0, x0, 1), ++ z0 = svldff1_vnum (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1_vnum_f64_m1: ++** decb x0 ++** ldff1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_vnum_f64_m1, svfloat64_t, float64_t, ++ z0 = svldff1_vnum_f64 (p0, x0, -1), ++ z0 = svldff1_vnum (p0, x0, -1)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ldff1_vnum_f64_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ldff1d z0\.d, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldff1_vnum_f64_x1, svfloat64_t, float64_t, ++ z0 = svldff1_vnum_f64 (p0, x0, x1), ++ z0 = svldff1_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_f32.c +new file mode 100644 +index 000000000..7e330c042 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_f32.c +@@ -0,0 +1,272 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1_gather_f32_tied1: ++** ldff1w z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_f32_tied1, svfloat32_t, svuint32_t, ++ z0_res = svldff1_gather_u32base_f32 (p0, z0), ++ z0_res = svldff1_gather_f32 (p0, z0)) ++ ++/* ++** ldff1_gather_f32_untied: ++** ldff1w z0\.s, p0/z, \[z1\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_f32_untied, svfloat32_t, svuint32_t, ++ z0_res = svldff1_gather_u32base_f32 (p0, z1), ++ z0_res = svldff1_gather_f32 (p0, z1)) ++ ++/* ++** ldff1_gather_x0_f32_offset: ++** ldff1w z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_x0_f32_offset, svfloat32_t, svuint32_t, ++ z0_res = svldff1_gather_u32base_offset_f32 (p0, z0, x0), ++ z0_res = svldff1_gather_offset_f32 (p0, z0, x0)) ++ ++/* ++** ldff1_gather_m4_f32_offset: ++** mov (x[0-9]+), #?-4 ++** ldff1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_m4_f32_offset, svfloat32_t, svuint32_t, ++ z0_res = svldff1_gather_u32base_offset_f32 (p0, z0, -4), ++ z0_res = svldff1_gather_offset_f32 (p0, z0, -4)) ++ ++/* ++** ldff1_gather_0_f32_offset: ++** ldff1w z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_0_f32_offset, svfloat32_t, svuint32_t, ++ z0_res = svldff1_gather_u32base_offset_f32 (p0, z0, 0), ++ z0_res = svldff1_gather_offset_f32 (p0, z0, 0)) ++ ++/* ++** ldff1_gather_5_f32_offset: ++** mov (x[0-9]+), #?5 ++** ldff1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_5_f32_offset, svfloat32_t, svuint32_t, ++ z0_res = svldff1_gather_u32base_offset_f32 (p0, z0, 5), ++ z0_res = svldff1_gather_offset_f32 (p0, z0, 5)) ++ ++/* ++** ldff1_gather_6_f32_offset: ++** mov (x[0-9]+), #?6 ++** ldff1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_6_f32_offset, svfloat32_t, svuint32_t, ++ z0_res = svldff1_gather_u32base_offset_f32 (p0, z0, 6), ++ z0_res = svldff1_gather_offset_f32 (p0, z0, 6)) ++ ++/* ++** ldff1_gather_7_f32_offset: ++** mov (x[0-9]+), #?7 ++** ldff1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_7_f32_offset, svfloat32_t, svuint32_t, ++ z0_res = svldff1_gather_u32base_offset_f32 (p0, z0, 7), ++ z0_res = svldff1_gather_offset_f32 (p0, z0, 7)) ++ ++/* ++** ldff1_gather_8_f32_offset: ++** ldff1w z0\.s, p0/z, \[z0\.s, #8\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_8_f32_offset, svfloat32_t, svuint32_t, ++ z0_res = svldff1_gather_u32base_offset_f32 (p0, z0, 8), ++ z0_res = svldff1_gather_offset_f32 (p0, z0, 8)) ++ ++/* ++** ldff1_gather_124_f32_offset: ++** ldff1w z0\.s, p0/z, \[z0\.s, #124\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_124_f32_offset, svfloat32_t, svuint32_t, ++ z0_res = svldff1_gather_u32base_offset_f32 (p0, z0, 124), ++ z0_res = svldff1_gather_offset_f32 (p0, z0, 124)) ++ ++/* ++** ldff1_gather_128_f32_offset: ++** mov (x[0-9]+), #?128 ++** ldff1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_128_f32_offset, svfloat32_t, svuint32_t, ++ z0_res = svldff1_gather_u32base_offset_f32 (p0, z0, 128), ++ z0_res = svldff1_gather_offset_f32 (p0, z0, 128)) ++ ++/* ++** ldff1_gather_x0_f32_index: ++** lsl (x[0-9]+), x0, #?2 ++** ldff1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_x0_f32_index, svfloat32_t, svuint32_t, ++ z0_res = svldff1_gather_u32base_index_f32 (p0, z0, x0), ++ z0_res = svldff1_gather_index_f32 (p0, z0, x0)) ++ ++/* ++** ldff1_gather_m1_f32_index: ++** mov (x[0-9]+), #?-4 ++** ldff1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_m1_f32_index, svfloat32_t, svuint32_t, ++ z0_res = svldff1_gather_u32base_index_f32 (p0, z0, -1), ++ z0_res = svldff1_gather_index_f32 (p0, z0, -1)) ++ ++/* ++** ldff1_gather_0_f32_index: ++** ldff1w z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_0_f32_index, svfloat32_t, svuint32_t, ++ z0_res = svldff1_gather_u32base_index_f32 (p0, z0, 0), ++ z0_res = svldff1_gather_index_f32 (p0, z0, 0)) ++ ++/* ++** ldff1_gather_5_f32_index: ++** ldff1w z0\.s, p0/z, \[z0\.s, #20\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_5_f32_index, svfloat32_t, svuint32_t, ++ z0_res = svldff1_gather_u32base_index_f32 (p0, z0, 5), ++ z0_res = svldff1_gather_index_f32 (p0, z0, 5)) ++ ++/* ++** ldff1_gather_31_f32_index: ++** ldff1w z0\.s, p0/z, \[z0\.s, #124\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_31_f32_index, svfloat32_t, svuint32_t, ++ z0_res = svldff1_gather_u32base_index_f32 (p0, z0, 31), ++ z0_res = svldff1_gather_index_f32 (p0, z0, 31)) ++ ++/* ++** ldff1_gather_32_f32_index: ++** mov (x[0-9]+), #?128 ++** ldff1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_32_f32_index, svfloat32_t, svuint32_t, ++ z0_res = svldff1_gather_u32base_index_f32 (p0, z0, 32), ++ z0_res = svldff1_gather_index_f32 (p0, z0, 32)) ++ ++/* ++** ldff1_gather_x0_f32_s32offset: ++** ldff1w z0\.s, p0/z, \[x0, z0\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_x0_f32_s32offset, svfloat32_t, float32_t, svint32_t, ++ z0_res = svldff1_gather_s32offset_f32 (p0, x0, z0), ++ z0_res = svldff1_gather_offset (p0, x0, z0)) ++ ++/* ++** ldff1_gather_tied1_f32_s32offset: ++** ldff1w z0\.s, p0/z, \[x0, z0\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_tied1_f32_s32offset, svfloat32_t, float32_t, svint32_t, ++ z0_res = svldff1_gather_s32offset_f32 (p0, x0, z0), ++ z0_res = svldff1_gather_offset (p0, x0, z0)) ++ ++/* ++** ldff1_gather_untied_f32_s32offset: ++** ldff1w z0\.s, p0/z, \[x0, z1\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_untied_f32_s32offset, svfloat32_t, float32_t, svint32_t, ++ z0_res = svldff1_gather_s32offset_f32 (p0, x0, z1), ++ z0_res = svldff1_gather_offset (p0, x0, z1)) ++ ++/* ++** ldff1_gather_x0_f32_u32offset: ++** ldff1w z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_x0_f32_u32offset, svfloat32_t, float32_t, svuint32_t, ++ z0_res = svldff1_gather_u32offset_f32 (p0, x0, z0), ++ z0_res = svldff1_gather_offset (p0, x0, z0)) ++ ++/* ++** ldff1_gather_tied1_f32_u32offset: ++** ldff1w z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_tied1_f32_u32offset, svfloat32_t, float32_t, svuint32_t, ++ z0_res = svldff1_gather_u32offset_f32 (p0, x0, z0), ++ z0_res = svldff1_gather_offset (p0, x0, z0)) ++ ++/* ++** ldff1_gather_untied_f32_u32offset: ++** ldff1w z0\.s, p0/z, \[x0, z1\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_untied_f32_u32offset, svfloat32_t, float32_t, svuint32_t, ++ z0_res = svldff1_gather_u32offset_f32 (p0, x0, z1), ++ z0_res = svldff1_gather_offset (p0, x0, z1)) ++ ++/* ++** ldff1_gather_x0_f32_s32index: ++** ldff1w z0\.s, p0/z, \[x0, z0\.s, sxtw 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_x0_f32_s32index, svfloat32_t, float32_t, svint32_t, ++ z0_res = svldff1_gather_s32index_f32 (p0, x0, z0), ++ z0_res = svldff1_gather_index (p0, x0, z0)) ++ ++/* ++** ldff1_gather_tied1_f32_s32index: ++** ldff1w z0\.s, p0/z, \[x0, z0\.s, sxtw 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_tied1_f32_s32index, svfloat32_t, float32_t, svint32_t, ++ z0_res = svldff1_gather_s32index_f32 (p0, x0, z0), ++ z0_res = svldff1_gather_index (p0, x0, z0)) ++ ++/* ++** ldff1_gather_untied_f32_s32index: ++** ldff1w z0\.s, p0/z, \[x0, z1\.s, sxtw 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_untied_f32_s32index, svfloat32_t, float32_t, svint32_t, ++ z0_res = svldff1_gather_s32index_f32 (p0, x0, z1), ++ z0_res = svldff1_gather_index (p0, x0, z1)) ++ ++/* ++** ldff1_gather_x0_f32_u32index: ++** ldff1w z0\.s, p0/z, \[x0, z0\.s, uxtw 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_x0_f32_u32index, svfloat32_t, float32_t, svuint32_t, ++ z0_res = svldff1_gather_u32index_f32 (p0, x0, z0), ++ z0_res = svldff1_gather_index (p0, x0, z0)) ++ ++/* ++** ldff1_gather_tied1_f32_u32index: ++** ldff1w z0\.s, p0/z, \[x0, z0\.s, uxtw 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_tied1_f32_u32index, svfloat32_t, float32_t, svuint32_t, ++ z0_res = svldff1_gather_u32index_f32 (p0, x0, z0), ++ z0_res = svldff1_gather_index (p0, x0, z0)) ++ ++/* ++** ldff1_gather_untied_f32_u32index: ++** ldff1w z0\.s, p0/z, \[x0, z1\.s, uxtw 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_untied_f32_u32index, svfloat32_t, float32_t, svuint32_t, ++ z0_res = svldff1_gather_u32index_f32 (p0, x0, z1), ++ z0_res = svldff1_gather_index (p0, x0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_f64.c +new file mode 100644 +index 000000000..d0e47f0bf +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_f64.c +@@ -0,0 +1,348 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1_gather_f64_tied1: ++** ldff1d z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_f64_tied1, svfloat64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_f64 (p0, z0), ++ z0_res = svldff1_gather_f64 (p0, z0)) ++ ++/* ++** ldff1_gather_f64_untied: ++** ldff1d z0\.d, p0/z, \[z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_f64_untied, svfloat64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_f64 (p0, z1), ++ z0_res = svldff1_gather_f64 (p0, z1)) ++ ++/* ++** ldff1_gather_x0_f64_offset: ++** ldff1d z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_x0_f64_offset, svfloat64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_offset_f64 (p0, z0, x0), ++ z0_res = svldff1_gather_offset_f64 (p0, z0, x0)) ++ ++/* ++** ldff1_gather_m8_f64_offset: ++** mov (x[0-9]+), #?-8 ++** ldff1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_m8_f64_offset, svfloat64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_offset_f64 (p0, z0, -8), ++ z0_res = svldff1_gather_offset_f64 (p0, z0, -8)) ++ ++/* ++** ldff1_gather_0_f64_offset: ++** ldff1d z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_0_f64_offset, svfloat64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_offset_f64 (p0, z0, 0), ++ z0_res = svldff1_gather_offset_f64 (p0, z0, 0)) ++ ++/* ++** ldff1_gather_9_f64_offset: ++** mov (x[0-9]+), #?9 ++** ldff1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_9_f64_offset, svfloat64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_offset_f64 (p0, z0, 9), ++ z0_res = svldff1_gather_offset_f64 (p0, z0, 9)) ++ ++/* ++** ldff1_gather_10_f64_offset: ++** mov (x[0-9]+), #?10 ++** ldff1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_10_f64_offset, svfloat64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_offset_f64 (p0, z0, 10), ++ z0_res = svldff1_gather_offset_f64 (p0, z0, 10)) ++ ++/* ++** ldff1_gather_11_f64_offset: ++** mov (x[0-9]+), #?11 ++** ldff1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_11_f64_offset, svfloat64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_offset_f64 (p0, z0, 11), ++ z0_res = svldff1_gather_offset_f64 (p0, z0, 11)) ++ ++/* ++** ldff1_gather_12_f64_offset: ++** mov (x[0-9]+), #?12 ++** ldff1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_12_f64_offset, svfloat64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_offset_f64 (p0, z0, 12), ++ z0_res = svldff1_gather_offset_f64 (p0, z0, 12)) ++ ++/* ++** ldff1_gather_13_f64_offset: ++** mov (x[0-9]+), #?13 ++** ldff1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_13_f64_offset, svfloat64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_offset_f64 (p0, z0, 13), ++ z0_res = svldff1_gather_offset_f64 (p0, z0, 13)) ++ ++/* ++** ldff1_gather_14_f64_offset: ++** mov (x[0-9]+), #?14 ++** ldff1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_14_f64_offset, svfloat64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_offset_f64 (p0, z0, 14), ++ z0_res = svldff1_gather_offset_f64 (p0, z0, 14)) ++ ++/* ++** ldff1_gather_15_f64_offset: ++** mov (x[0-9]+), #?15 ++** ldff1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_15_f64_offset, svfloat64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_offset_f64 (p0, z0, 15), ++ z0_res = svldff1_gather_offset_f64 (p0, z0, 15)) ++ ++/* ++** ldff1_gather_16_f64_offset: ++** ldff1d z0\.d, p0/z, \[z0\.d, #16\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_16_f64_offset, svfloat64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_offset_f64 (p0, z0, 16), ++ z0_res = svldff1_gather_offset_f64 (p0, z0, 16)) ++ ++/* ++** ldff1_gather_248_f64_offset: ++** ldff1d z0\.d, p0/z, \[z0\.d, #248\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_248_f64_offset, svfloat64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_offset_f64 (p0, z0, 248), ++ z0_res = svldff1_gather_offset_f64 (p0, z0, 248)) ++ ++/* ++** ldff1_gather_256_f64_offset: ++** mov (x[0-9]+), #?256 ++** ldff1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_256_f64_offset, svfloat64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_offset_f64 (p0, z0, 256), ++ z0_res = svldff1_gather_offset_f64 (p0, z0, 256)) ++ ++/* ++** ldff1_gather_x0_f64_index: ++** lsl (x[0-9]+), x0, #?3 ++** ldff1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_x0_f64_index, svfloat64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_index_f64 (p0, z0, x0), ++ z0_res = svldff1_gather_index_f64 (p0, z0, x0)) ++ ++/* ++** ldff1_gather_m1_f64_index: ++** mov (x[0-9]+), #?-8 ++** ldff1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_m1_f64_index, svfloat64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_index_f64 (p0, z0, -1), ++ z0_res = svldff1_gather_index_f64 (p0, z0, -1)) ++ ++/* ++** ldff1_gather_0_f64_index: ++** ldff1d z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_0_f64_index, svfloat64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_index_f64 (p0, z0, 0), ++ z0_res = svldff1_gather_index_f64 (p0, z0, 0)) ++ ++/* ++** ldff1_gather_5_f64_index: ++** ldff1d z0\.d, p0/z, \[z0\.d, #40\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_5_f64_index, svfloat64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_index_f64 (p0, z0, 5), ++ z0_res = svldff1_gather_index_f64 (p0, z0, 5)) ++ ++/* ++** ldff1_gather_31_f64_index: ++** ldff1d z0\.d, p0/z, \[z0\.d, #248\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_31_f64_index, svfloat64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_index_f64 (p0, z0, 31), ++ z0_res = svldff1_gather_index_f64 (p0, z0, 31)) ++ ++/* ++** ldff1_gather_32_f64_index: ++** mov (x[0-9]+), #?256 ++** ldff1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_32_f64_index, svfloat64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_index_f64 (p0, z0, 32), ++ z0_res = svldff1_gather_index_f64 (p0, z0, 32)) ++ ++/* ++** ldff1_gather_x0_f64_s64offset: ++** ldff1d z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_x0_f64_s64offset, svfloat64_t, float64_t, svint64_t, ++ z0_res = svldff1_gather_s64offset_f64 (p0, x0, z0), ++ z0_res = svldff1_gather_offset (p0, x0, z0)) ++ ++/* ++** ldff1_gather_tied1_f64_s64offset: ++** ldff1d z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_tied1_f64_s64offset, svfloat64_t, float64_t, svint64_t, ++ z0_res = svldff1_gather_s64offset_f64 (p0, x0, z0), ++ z0_res = svldff1_gather_offset (p0, x0, z0)) ++ ++/* ++** ldff1_gather_untied_f64_s64offset: ++** ldff1d z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_untied_f64_s64offset, svfloat64_t, float64_t, svint64_t, ++ z0_res = svldff1_gather_s64offset_f64 (p0, x0, z1), ++ z0_res = svldff1_gather_offset (p0, x0, z1)) ++ ++/* ++** ldff1_gather_ext_f64_s64offset: ++** ldff1d z0\.d, p0/z, \[x0, z1\.d, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_ext_f64_s64offset, svfloat64_t, float64_t, svint64_t, ++ z0_res = svldff1_gather_s64offset_f64 (p0, x0, svextw_s64_x (p0, z1)), ++ z0_res = svldff1_gather_offset (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ldff1_gather_x0_f64_u64offset: ++** ldff1d z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_x0_f64_u64offset, svfloat64_t, float64_t, svuint64_t, ++ z0_res = svldff1_gather_u64offset_f64 (p0, x0, z0), ++ z0_res = svldff1_gather_offset (p0, x0, z0)) ++ ++/* ++** ldff1_gather_tied1_f64_u64offset: ++** ldff1d z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_tied1_f64_u64offset, svfloat64_t, float64_t, svuint64_t, ++ z0_res = svldff1_gather_u64offset_f64 (p0, x0, z0), ++ z0_res = svldff1_gather_offset (p0, x0, z0)) ++ ++/* ++** ldff1_gather_untied_f64_u64offset: ++** ldff1d z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_untied_f64_u64offset, svfloat64_t, float64_t, svuint64_t, ++ z0_res = svldff1_gather_u64offset_f64 (p0, x0, z1), ++ z0_res = svldff1_gather_offset (p0, x0, z1)) ++ ++/* ++** ldff1_gather_ext_f64_u64offset: ++** ldff1d z0\.d, p0/z, \[x0, z1\.d, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_ext_f64_u64offset, svfloat64_t, float64_t, svuint64_t, ++ z0_res = svldff1_gather_u64offset_f64 (p0, x0, svextw_u64_x (p0, z1)), ++ z0_res = svldff1_gather_offset (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ldff1_gather_x0_f64_s64index: ++** ldff1d z0\.d, p0/z, \[x0, z0\.d, lsl 3\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_x0_f64_s64index, svfloat64_t, float64_t, svint64_t, ++ z0_res = svldff1_gather_s64index_f64 (p0, x0, z0), ++ z0_res = svldff1_gather_index (p0, x0, z0)) ++ ++/* ++** ldff1_gather_tied1_f64_s64index: ++** ldff1d z0\.d, p0/z, \[x0, z0\.d, lsl 3\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_tied1_f64_s64index, svfloat64_t, float64_t, svint64_t, ++ z0_res = svldff1_gather_s64index_f64 (p0, x0, z0), ++ z0_res = svldff1_gather_index (p0, x0, z0)) ++ ++/* ++** ldff1_gather_untied_f64_s64index: ++** ldff1d z0\.d, p0/z, \[x0, z1\.d, lsl 3\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_untied_f64_s64index, svfloat64_t, float64_t, svint64_t, ++ z0_res = svldff1_gather_s64index_f64 (p0, x0, z1), ++ z0_res = svldff1_gather_index (p0, x0, z1)) ++ ++/* ++** ldff1_gather_ext_f64_s64index: ++** ldff1d z0\.d, p0/z, \[x0, z1\.d, sxtw 3\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_ext_f64_s64index, svfloat64_t, float64_t, svint64_t, ++ z0_res = svldff1_gather_s64index_f64 (p0, x0, svextw_s64_x (p0, z1)), ++ z0_res = svldff1_gather_index (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ldff1_gather_x0_f64_u64index: ++** ldff1d z0\.d, p0/z, \[x0, z0\.d, lsl 3\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_x0_f64_u64index, svfloat64_t, float64_t, svuint64_t, ++ z0_res = svldff1_gather_u64index_f64 (p0, x0, z0), ++ z0_res = svldff1_gather_index (p0, x0, z0)) ++ ++/* ++** ldff1_gather_tied1_f64_u64index: ++** ldff1d z0\.d, p0/z, \[x0, z0\.d, lsl 3\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_tied1_f64_u64index, svfloat64_t, float64_t, svuint64_t, ++ z0_res = svldff1_gather_u64index_f64 (p0, x0, z0), ++ z0_res = svldff1_gather_index (p0, x0, z0)) ++ ++/* ++** ldff1_gather_untied_f64_u64index: ++** ldff1d z0\.d, p0/z, \[x0, z1\.d, lsl 3\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_untied_f64_u64index, svfloat64_t, float64_t, svuint64_t, ++ z0_res = svldff1_gather_u64index_f64 (p0, x0, z1), ++ z0_res = svldff1_gather_index (p0, x0, z1)) ++ ++/* ++** ldff1_gather_ext_f64_u64index: ++** ldff1d z0\.d, p0/z, \[x0, z1\.d, uxtw 3\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_ext_f64_u64index, svfloat64_t, float64_t, svuint64_t, ++ z0_res = svldff1_gather_u64index_f64 (p0, x0, svextw_u64_x (p0, z1)), ++ z0_res = svldff1_gather_index (p0, x0, svextw_x (p0, z1))) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_s32.c +new file mode 100644 +index 000000000..66bf0f746 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_s32.c +@@ -0,0 +1,272 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1_gather_s32_tied1: ++** ldff1w z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_s32_tied1, svint32_t, svuint32_t, ++ z0_res = svldff1_gather_u32base_s32 (p0, z0), ++ z0_res = svldff1_gather_s32 (p0, z0)) ++ ++/* ++** ldff1_gather_s32_untied: ++** ldff1w z0\.s, p0/z, \[z1\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_s32_untied, svint32_t, svuint32_t, ++ z0_res = svldff1_gather_u32base_s32 (p0, z1), ++ z0_res = svldff1_gather_s32 (p0, z1)) ++ ++/* ++** ldff1_gather_x0_s32_offset: ++** ldff1w z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_x0_s32_offset, svint32_t, svuint32_t, ++ z0_res = svldff1_gather_u32base_offset_s32 (p0, z0, x0), ++ z0_res = svldff1_gather_offset_s32 (p0, z0, x0)) ++ ++/* ++** ldff1_gather_m4_s32_offset: ++** mov (x[0-9]+), #?-4 ++** ldff1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_m4_s32_offset, svint32_t, svuint32_t, ++ z0_res = svldff1_gather_u32base_offset_s32 (p0, z0, -4), ++ z0_res = svldff1_gather_offset_s32 (p0, z0, -4)) ++ ++/* ++** ldff1_gather_0_s32_offset: ++** ldff1w z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_0_s32_offset, svint32_t, svuint32_t, ++ z0_res = svldff1_gather_u32base_offset_s32 (p0, z0, 0), ++ z0_res = svldff1_gather_offset_s32 (p0, z0, 0)) ++ ++/* ++** ldff1_gather_5_s32_offset: ++** mov (x[0-9]+), #?5 ++** ldff1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_5_s32_offset, svint32_t, svuint32_t, ++ z0_res = svldff1_gather_u32base_offset_s32 (p0, z0, 5), ++ z0_res = svldff1_gather_offset_s32 (p0, z0, 5)) ++ ++/* ++** ldff1_gather_6_s32_offset: ++** mov (x[0-9]+), #?6 ++** ldff1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_6_s32_offset, svint32_t, svuint32_t, ++ z0_res = svldff1_gather_u32base_offset_s32 (p0, z0, 6), ++ z0_res = svldff1_gather_offset_s32 (p0, z0, 6)) ++ ++/* ++** ldff1_gather_7_s32_offset: ++** mov (x[0-9]+), #?7 ++** ldff1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_7_s32_offset, svint32_t, svuint32_t, ++ z0_res = svldff1_gather_u32base_offset_s32 (p0, z0, 7), ++ z0_res = svldff1_gather_offset_s32 (p0, z0, 7)) ++ ++/* ++** ldff1_gather_8_s32_offset: ++** ldff1w z0\.s, p0/z, \[z0\.s, #8\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_8_s32_offset, svint32_t, svuint32_t, ++ z0_res = svldff1_gather_u32base_offset_s32 (p0, z0, 8), ++ z0_res = svldff1_gather_offset_s32 (p0, z0, 8)) ++ ++/* ++** ldff1_gather_124_s32_offset: ++** ldff1w z0\.s, p0/z, \[z0\.s, #124\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_124_s32_offset, svint32_t, svuint32_t, ++ z0_res = svldff1_gather_u32base_offset_s32 (p0, z0, 124), ++ z0_res = svldff1_gather_offset_s32 (p0, z0, 124)) ++ ++/* ++** ldff1_gather_128_s32_offset: ++** mov (x[0-9]+), #?128 ++** ldff1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_128_s32_offset, svint32_t, svuint32_t, ++ z0_res = svldff1_gather_u32base_offset_s32 (p0, z0, 128), ++ z0_res = svldff1_gather_offset_s32 (p0, z0, 128)) ++ ++/* ++** ldff1_gather_x0_s32_index: ++** lsl (x[0-9]+), x0, #?2 ++** ldff1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_x0_s32_index, svint32_t, svuint32_t, ++ z0_res = svldff1_gather_u32base_index_s32 (p0, z0, x0), ++ z0_res = svldff1_gather_index_s32 (p0, z0, x0)) ++ ++/* ++** ldff1_gather_m1_s32_index: ++** mov (x[0-9]+), #?-4 ++** ldff1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_m1_s32_index, svint32_t, svuint32_t, ++ z0_res = svldff1_gather_u32base_index_s32 (p0, z0, -1), ++ z0_res = svldff1_gather_index_s32 (p0, z0, -1)) ++ ++/* ++** ldff1_gather_0_s32_index: ++** ldff1w z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_0_s32_index, svint32_t, svuint32_t, ++ z0_res = svldff1_gather_u32base_index_s32 (p0, z0, 0), ++ z0_res = svldff1_gather_index_s32 (p0, z0, 0)) ++ ++/* ++** ldff1_gather_5_s32_index: ++** ldff1w z0\.s, p0/z, \[z0\.s, #20\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_5_s32_index, svint32_t, svuint32_t, ++ z0_res = svldff1_gather_u32base_index_s32 (p0, z0, 5), ++ z0_res = svldff1_gather_index_s32 (p0, z0, 5)) ++ ++/* ++** ldff1_gather_31_s32_index: ++** ldff1w z0\.s, p0/z, \[z0\.s, #124\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_31_s32_index, svint32_t, svuint32_t, ++ z0_res = svldff1_gather_u32base_index_s32 (p0, z0, 31), ++ z0_res = svldff1_gather_index_s32 (p0, z0, 31)) ++ ++/* ++** ldff1_gather_32_s32_index: ++** mov (x[0-9]+), #?128 ++** ldff1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_32_s32_index, svint32_t, svuint32_t, ++ z0_res = svldff1_gather_u32base_index_s32 (p0, z0, 32), ++ z0_res = svldff1_gather_index_s32 (p0, z0, 32)) ++ ++/* ++** ldff1_gather_x0_s32_s32offset: ++** ldff1w z0\.s, p0/z, \[x0, z0\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_x0_s32_s32offset, svint32_t, int32_t, svint32_t, ++ z0_res = svldff1_gather_s32offset_s32 (p0, x0, z0), ++ z0_res = svldff1_gather_offset (p0, x0, z0)) ++ ++/* ++** ldff1_gather_tied1_s32_s32offset: ++** ldff1w z0\.s, p0/z, \[x0, z0\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_tied1_s32_s32offset, svint32_t, int32_t, svint32_t, ++ z0_res = svldff1_gather_s32offset_s32 (p0, x0, z0), ++ z0_res = svldff1_gather_offset (p0, x0, z0)) ++ ++/* ++** ldff1_gather_untied_s32_s32offset: ++** ldff1w z0\.s, p0/z, \[x0, z1\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_untied_s32_s32offset, svint32_t, int32_t, svint32_t, ++ z0_res = svldff1_gather_s32offset_s32 (p0, x0, z1), ++ z0_res = svldff1_gather_offset (p0, x0, z1)) ++ ++/* ++** ldff1_gather_x0_s32_u32offset: ++** ldff1w z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_x0_s32_u32offset, svint32_t, int32_t, svuint32_t, ++ z0_res = svldff1_gather_u32offset_s32 (p0, x0, z0), ++ z0_res = svldff1_gather_offset (p0, x0, z0)) ++ ++/* ++** ldff1_gather_tied1_s32_u32offset: ++** ldff1w z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_tied1_s32_u32offset, svint32_t, int32_t, svuint32_t, ++ z0_res = svldff1_gather_u32offset_s32 (p0, x0, z0), ++ z0_res = svldff1_gather_offset (p0, x0, z0)) ++ ++/* ++** ldff1_gather_untied_s32_u32offset: ++** ldff1w z0\.s, p0/z, \[x0, z1\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_untied_s32_u32offset, svint32_t, int32_t, svuint32_t, ++ z0_res = svldff1_gather_u32offset_s32 (p0, x0, z1), ++ z0_res = svldff1_gather_offset (p0, x0, z1)) ++ ++/* ++** ldff1_gather_x0_s32_s32index: ++** ldff1w z0\.s, p0/z, \[x0, z0\.s, sxtw 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_x0_s32_s32index, svint32_t, int32_t, svint32_t, ++ z0_res = svldff1_gather_s32index_s32 (p0, x0, z0), ++ z0_res = svldff1_gather_index (p0, x0, z0)) ++ ++/* ++** ldff1_gather_tied1_s32_s32index: ++** ldff1w z0\.s, p0/z, \[x0, z0\.s, sxtw 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_tied1_s32_s32index, svint32_t, int32_t, svint32_t, ++ z0_res = svldff1_gather_s32index_s32 (p0, x0, z0), ++ z0_res = svldff1_gather_index (p0, x0, z0)) ++ ++/* ++** ldff1_gather_untied_s32_s32index: ++** ldff1w z0\.s, p0/z, \[x0, z1\.s, sxtw 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_untied_s32_s32index, svint32_t, int32_t, svint32_t, ++ z0_res = svldff1_gather_s32index_s32 (p0, x0, z1), ++ z0_res = svldff1_gather_index (p0, x0, z1)) ++ ++/* ++** ldff1_gather_x0_s32_u32index: ++** ldff1w z0\.s, p0/z, \[x0, z0\.s, uxtw 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_x0_s32_u32index, svint32_t, int32_t, svuint32_t, ++ z0_res = svldff1_gather_u32index_s32 (p0, x0, z0), ++ z0_res = svldff1_gather_index (p0, x0, z0)) ++ ++/* ++** ldff1_gather_tied1_s32_u32index: ++** ldff1w z0\.s, p0/z, \[x0, z0\.s, uxtw 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_tied1_s32_u32index, svint32_t, int32_t, svuint32_t, ++ z0_res = svldff1_gather_u32index_s32 (p0, x0, z0), ++ z0_res = svldff1_gather_index (p0, x0, z0)) ++ ++/* ++** ldff1_gather_untied_s32_u32index: ++** ldff1w z0\.s, p0/z, \[x0, z1\.s, uxtw 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_untied_s32_u32index, svint32_t, int32_t, svuint32_t, ++ z0_res = svldff1_gather_u32index_s32 (p0, x0, z1), ++ z0_res = svldff1_gather_index (p0, x0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_s64.c +new file mode 100644 +index 000000000..faf71bf9d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_s64.c +@@ -0,0 +1,348 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1_gather_s64_tied1: ++** ldff1d z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_s64_tied1, svint64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_s64 (p0, z0), ++ z0_res = svldff1_gather_s64 (p0, z0)) ++ ++/* ++** ldff1_gather_s64_untied: ++** ldff1d z0\.d, p0/z, \[z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_s64_untied, svint64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_s64 (p0, z1), ++ z0_res = svldff1_gather_s64 (p0, z1)) ++ ++/* ++** ldff1_gather_x0_s64_offset: ++** ldff1d z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_x0_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_offset_s64 (p0, z0, x0), ++ z0_res = svldff1_gather_offset_s64 (p0, z0, x0)) ++ ++/* ++** ldff1_gather_m8_s64_offset: ++** mov (x[0-9]+), #?-8 ++** ldff1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_m8_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_offset_s64 (p0, z0, -8), ++ z0_res = svldff1_gather_offset_s64 (p0, z0, -8)) ++ ++/* ++** ldff1_gather_0_s64_offset: ++** ldff1d z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_0_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_offset_s64 (p0, z0, 0), ++ z0_res = svldff1_gather_offset_s64 (p0, z0, 0)) ++ ++/* ++** ldff1_gather_9_s64_offset: ++** mov (x[0-9]+), #?9 ++** ldff1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_9_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_offset_s64 (p0, z0, 9), ++ z0_res = svldff1_gather_offset_s64 (p0, z0, 9)) ++ ++/* ++** ldff1_gather_10_s64_offset: ++** mov (x[0-9]+), #?10 ++** ldff1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_10_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_offset_s64 (p0, z0, 10), ++ z0_res = svldff1_gather_offset_s64 (p0, z0, 10)) ++ ++/* ++** ldff1_gather_11_s64_offset: ++** mov (x[0-9]+), #?11 ++** ldff1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_11_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_offset_s64 (p0, z0, 11), ++ z0_res = svldff1_gather_offset_s64 (p0, z0, 11)) ++ ++/* ++** ldff1_gather_12_s64_offset: ++** mov (x[0-9]+), #?12 ++** ldff1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_12_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_offset_s64 (p0, z0, 12), ++ z0_res = svldff1_gather_offset_s64 (p0, z0, 12)) ++ ++/* ++** ldff1_gather_13_s64_offset: ++** mov (x[0-9]+), #?13 ++** ldff1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_13_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_offset_s64 (p0, z0, 13), ++ z0_res = svldff1_gather_offset_s64 (p0, z0, 13)) ++ ++/* ++** ldff1_gather_14_s64_offset: ++** mov (x[0-9]+), #?14 ++** ldff1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_14_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_offset_s64 (p0, z0, 14), ++ z0_res = svldff1_gather_offset_s64 (p0, z0, 14)) ++ ++/* ++** ldff1_gather_15_s64_offset: ++** mov (x[0-9]+), #?15 ++** ldff1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_15_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_offset_s64 (p0, z0, 15), ++ z0_res = svldff1_gather_offset_s64 (p0, z0, 15)) ++ ++/* ++** ldff1_gather_16_s64_offset: ++** ldff1d z0\.d, p0/z, \[z0\.d, #16\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_16_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_offset_s64 (p0, z0, 16), ++ z0_res = svldff1_gather_offset_s64 (p0, z0, 16)) ++ ++/* ++** ldff1_gather_248_s64_offset: ++** ldff1d z0\.d, p0/z, \[z0\.d, #248\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_248_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_offset_s64 (p0, z0, 248), ++ z0_res = svldff1_gather_offset_s64 (p0, z0, 248)) ++ ++/* ++** ldff1_gather_256_s64_offset: ++** mov (x[0-9]+), #?256 ++** ldff1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_256_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_offset_s64 (p0, z0, 256), ++ z0_res = svldff1_gather_offset_s64 (p0, z0, 256)) ++ ++/* ++** ldff1_gather_x0_s64_index: ++** lsl (x[0-9]+), x0, #?3 ++** ldff1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_x0_s64_index, svint64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_index_s64 (p0, z0, x0), ++ z0_res = svldff1_gather_index_s64 (p0, z0, x0)) ++ ++/* ++** ldff1_gather_m1_s64_index: ++** mov (x[0-9]+), #?-8 ++** ldff1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_m1_s64_index, svint64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_index_s64 (p0, z0, -1), ++ z0_res = svldff1_gather_index_s64 (p0, z0, -1)) ++ ++/* ++** ldff1_gather_0_s64_index: ++** ldff1d z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_0_s64_index, svint64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_index_s64 (p0, z0, 0), ++ z0_res = svldff1_gather_index_s64 (p0, z0, 0)) ++ ++/* ++** ldff1_gather_5_s64_index: ++** ldff1d z0\.d, p0/z, \[z0\.d, #40\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_5_s64_index, svint64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_index_s64 (p0, z0, 5), ++ z0_res = svldff1_gather_index_s64 (p0, z0, 5)) ++ ++/* ++** ldff1_gather_31_s64_index: ++** ldff1d z0\.d, p0/z, \[z0\.d, #248\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_31_s64_index, svint64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_index_s64 (p0, z0, 31), ++ z0_res = svldff1_gather_index_s64 (p0, z0, 31)) ++ ++/* ++** ldff1_gather_32_s64_index: ++** mov (x[0-9]+), #?256 ++** ldff1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_32_s64_index, svint64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_index_s64 (p0, z0, 32), ++ z0_res = svldff1_gather_index_s64 (p0, z0, 32)) ++ ++/* ++** ldff1_gather_x0_s64_s64offset: ++** ldff1d z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_x0_s64_s64offset, svint64_t, int64_t, svint64_t, ++ z0_res = svldff1_gather_s64offset_s64 (p0, x0, z0), ++ z0_res = svldff1_gather_offset (p0, x0, z0)) ++ ++/* ++** ldff1_gather_tied1_s64_s64offset: ++** ldff1d z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_tied1_s64_s64offset, svint64_t, int64_t, svint64_t, ++ z0_res = svldff1_gather_s64offset_s64 (p0, x0, z0), ++ z0_res = svldff1_gather_offset (p0, x0, z0)) ++ ++/* ++** ldff1_gather_untied_s64_s64offset: ++** ldff1d z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_untied_s64_s64offset, svint64_t, int64_t, svint64_t, ++ z0_res = svldff1_gather_s64offset_s64 (p0, x0, z1), ++ z0_res = svldff1_gather_offset (p0, x0, z1)) ++ ++/* ++** ldff1_gather_ext_s64_s64offset: ++** ldff1d z0\.d, p0/z, \[x0, z1\.d, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_ext_s64_s64offset, svint64_t, int64_t, svint64_t, ++ z0_res = svldff1_gather_s64offset_s64 (p0, x0, svextw_s64_x (p0, z1)), ++ z0_res = svldff1_gather_offset (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ldff1_gather_x0_s64_u64offset: ++** ldff1d z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_x0_s64_u64offset, svint64_t, int64_t, svuint64_t, ++ z0_res = svldff1_gather_u64offset_s64 (p0, x0, z0), ++ z0_res = svldff1_gather_offset (p0, x0, z0)) ++ ++/* ++** ldff1_gather_tied1_s64_u64offset: ++** ldff1d z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_tied1_s64_u64offset, svint64_t, int64_t, svuint64_t, ++ z0_res = svldff1_gather_u64offset_s64 (p0, x0, z0), ++ z0_res = svldff1_gather_offset (p0, x0, z0)) ++ ++/* ++** ldff1_gather_untied_s64_u64offset: ++** ldff1d z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_untied_s64_u64offset, svint64_t, int64_t, svuint64_t, ++ z0_res = svldff1_gather_u64offset_s64 (p0, x0, z1), ++ z0_res = svldff1_gather_offset (p0, x0, z1)) ++ ++/* ++** ldff1_gather_ext_s64_u64offset: ++** ldff1d z0\.d, p0/z, \[x0, z1\.d, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_ext_s64_u64offset, svint64_t, int64_t, svuint64_t, ++ z0_res = svldff1_gather_u64offset_s64 (p0, x0, svextw_u64_x (p0, z1)), ++ z0_res = svldff1_gather_offset (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ldff1_gather_x0_s64_s64index: ++** ldff1d z0\.d, p0/z, \[x0, z0\.d, lsl 3\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_x0_s64_s64index, svint64_t, int64_t, svint64_t, ++ z0_res = svldff1_gather_s64index_s64 (p0, x0, z0), ++ z0_res = svldff1_gather_index (p0, x0, z0)) ++ ++/* ++** ldff1_gather_tied1_s64_s64index: ++** ldff1d z0\.d, p0/z, \[x0, z0\.d, lsl 3\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_tied1_s64_s64index, svint64_t, int64_t, svint64_t, ++ z0_res = svldff1_gather_s64index_s64 (p0, x0, z0), ++ z0_res = svldff1_gather_index (p0, x0, z0)) ++ ++/* ++** ldff1_gather_untied_s64_s64index: ++** ldff1d z0\.d, p0/z, \[x0, z1\.d, lsl 3\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_untied_s64_s64index, svint64_t, int64_t, svint64_t, ++ z0_res = svldff1_gather_s64index_s64 (p0, x0, z1), ++ z0_res = svldff1_gather_index (p0, x0, z1)) ++ ++/* ++** ldff1_gather_ext_s64_s64index: ++** ldff1d z0\.d, p0/z, \[x0, z1\.d, sxtw 3\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_ext_s64_s64index, svint64_t, int64_t, svint64_t, ++ z0_res = svldff1_gather_s64index_s64 (p0, x0, svextw_s64_x (p0, z1)), ++ z0_res = svldff1_gather_index (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ldff1_gather_x0_s64_u64index: ++** ldff1d z0\.d, p0/z, \[x0, z0\.d, lsl 3\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_x0_s64_u64index, svint64_t, int64_t, svuint64_t, ++ z0_res = svldff1_gather_u64index_s64 (p0, x0, z0), ++ z0_res = svldff1_gather_index (p0, x0, z0)) ++ ++/* ++** ldff1_gather_tied1_s64_u64index: ++** ldff1d z0\.d, p0/z, \[x0, z0\.d, lsl 3\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_tied1_s64_u64index, svint64_t, int64_t, svuint64_t, ++ z0_res = svldff1_gather_u64index_s64 (p0, x0, z0), ++ z0_res = svldff1_gather_index (p0, x0, z0)) ++ ++/* ++** ldff1_gather_untied_s64_u64index: ++** ldff1d z0\.d, p0/z, \[x0, z1\.d, lsl 3\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_untied_s64_u64index, svint64_t, int64_t, svuint64_t, ++ z0_res = svldff1_gather_u64index_s64 (p0, x0, z1), ++ z0_res = svldff1_gather_index (p0, x0, z1)) ++ ++/* ++** ldff1_gather_ext_s64_u64index: ++** ldff1d z0\.d, p0/z, \[x0, z1\.d, uxtw 3\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_ext_s64_u64index, svint64_t, int64_t, svuint64_t, ++ z0_res = svldff1_gather_u64index_s64 (p0, x0, svextw_u64_x (p0, z1)), ++ z0_res = svldff1_gather_index (p0, x0, svextw_x (p0, z1))) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_u32.c +new file mode 100644 +index 000000000..41c7dc9cf +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_u32.c +@@ -0,0 +1,272 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1_gather_u32_tied1: ++** ldff1w z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_u32_tied1, svuint32_t, svuint32_t, ++ z0_res = svldff1_gather_u32base_u32 (p0, z0), ++ z0_res = svldff1_gather_u32 (p0, z0)) ++ ++/* ++** ldff1_gather_u32_untied: ++** ldff1w z0\.s, p0/z, \[z1\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_u32_untied, svuint32_t, svuint32_t, ++ z0_res = svldff1_gather_u32base_u32 (p0, z1), ++ z0_res = svldff1_gather_u32 (p0, z1)) ++ ++/* ++** ldff1_gather_x0_u32_offset: ++** ldff1w z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_x0_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svldff1_gather_u32base_offset_u32 (p0, z0, x0), ++ z0_res = svldff1_gather_offset_u32 (p0, z0, x0)) ++ ++/* ++** ldff1_gather_m4_u32_offset: ++** mov (x[0-9]+), #?-4 ++** ldff1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_m4_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svldff1_gather_u32base_offset_u32 (p0, z0, -4), ++ z0_res = svldff1_gather_offset_u32 (p0, z0, -4)) ++ ++/* ++** ldff1_gather_0_u32_offset: ++** ldff1w z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_0_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svldff1_gather_u32base_offset_u32 (p0, z0, 0), ++ z0_res = svldff1_gather_offset_u32 (p0, z0, 0)) ++ ++/* ++** ldff1_gather_5_u32_offset: ++** mov (x[0-9]+), #?5 ++** ldff1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_5_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svldff1_gather_u32base_offset_u32 (p0, z0, 5), ++ z0_res = svldff1_gather_offset_u32 (p0, z0, 5)) ++ ++/* ++** ldff1_gather_6_u32_offset: ++** mov (x[0-9]+), #?6 ++** ldff1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_6_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svldff1_gather_u32base_offset_u32 (p0, z0, 6), ++ z0_res = svldff1_gather_offset_u32 (p0, z0, 6)) ++ ++/* ++** ldff1_gather_7_u32_offset: ++** mov (x[0-9]+), #?7 ++** ldff1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_7_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svldff1_gather_u32base_offset_u32 (p0, z0, 7), ++ z0_res = svldff1_gather_offset_u32 (p0, z0, 7)) ++ ++/* ++** ldff1_gather_8_u32_offset: ++** ldff1w z0\.s, p0/z, \[z0\.s, #8\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_8_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svldff1_gather_u32base_offset_u32 (p0, z0, 8), ++ z0_res = svldff1_gather_offset_u32 (p0, z0, 8)) ++ ++/* ++** ldff1_gather_124_u32_offset: ++** ldff1w z0\.s, p0/z, \[z0\.s, #124\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_124_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svldff1_gather_u32base_offset_u32 (p0, z0, 124), ++ z0_res = svldff1_gather_offset_u32 (p0, z0, 124)) ++ ++/* ++** ldff1_gather_128_u32_offset: ++** mov (x[0-9]+), #?128 ++** ldff1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_128_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svldff1_gather_u32base_offset_u32 (p0, z0, 128), ++ z0_res = svldff1_gather_offset_u32 (p0, z0, 128)) ++ ++/* ++** ldff1_gather_x0_u32_index: ++** lsl (x[0-9]+), x0, #?2 ++** ldff1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_x0_u32_index, svuint32_t, svuint32_t, ++ z0_res = svldff1_gather_u32base_index_u32 (p0, z0, x0), ++ z0_res = svldff1_gather_index_u32 (p0, z0, x0)) ++ ++/* ++** ldff1_gather_m1_u32_index: ++** mov (x[0-9]+), #?-4 ++** ldff1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_m1_u32_index, svuint32_t, svuint32_t, ++ z0_res = svldff1_gather_u32base_index_u32 (p0, z0, -1), ++ z0_res = svldff1_gather_index_u32 (p0, z0, -1)) ++ ++/* ++** ldff1_gather_0_u32_index: ++** ldff1w z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_0_u32_index, svuint32_t, svuint32_t, ++ z0_res = svldff1_gather_u32base_index_u32 (p0, z0, 0), ++ z0_res = svldff1_gather_index_u32 (p0, z0, 0)) ++ ++/* ++** ldff1_gather_5_u32_index: ++** ldff1w z0\.s, p0/z, \[z0\.s, #20\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_5_u32_index, svuint32_t, svuint32_t, ++ z0_res = svldff1_gather_u32base_index_u32 (p0, z0, 5), ++ z0_res = svldff1_gather_index_u32 (p0, z0, 5)) ++ ++/* ++** ldff1_gather_31_u32_index: ++** ldff1w z0\.s, p0/z, \[z0\.s, #124\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_31_u32_index, svuint32_t, svuint32_t, ++ z0_res = svldff1_gather_u32base_index_u32 (p0, z0, 31), ++ z0_res = svldff1_gather_index_u32 (p0, z0, 31)) ++ ++/* ++** ldff1_gather_32_u32_index: ++** mov (x[0-9]+), #?128 ++** ldff1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_32_u32_index, svuint32_t, svuint32_t, ++ z0_res = svldff1_gather_u32base_index_u32 (p0, z0, 32), ++ z0_res = svldff1_gather_index_u32 (p0, z0, 32)) ++ ++/* ++** ldff1_gather_x0_u32_s32offset: ++** ldff1w z0\.s, p0/z, \[x0, z0\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_x0_u32_s32offset, svuint32_t, uint32_t, svint32_t, ++ z0_res = svldff1_gather_s32offset_u32 (p0, x0, z0), ++ z0_res = svldff1_gather_offset (p0, x0, z0)) ++ ++/* ++** ldff1_gather_tied1_u32_s32offset: ++** ldff1w z0\.s, p0/z, \[x0, z0\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_tied1_u32_s32offset, svuint32_t, uint32_t, svint32_t, ++ z0_res = svldff1_gather_s32offset_u32 (p0, x0, z0), ++ z0_res = svldff1_gather_offset (p0, x0, z0)) ++ ++/* ++** ldff1_gather_untied_u32_s32offset: ++** ldff1w z0\.s, p0/z, \[x0, z1\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_untied_u32_s32offset, svuint32_t, uint32_t, svint32_t, ++ z0_res = svldff1_gather_s32offset_u32 (p0, x0, z1), ++ z0_res = svldff1_gather_offset (p0, x0, z1)) ++ ++/* ++** ldff1_gather_x0_u32_u32offset: ++** ldff1w z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_x0_u32_u32offset, svuint32_t, uint32_t, svuint32_t, ++ z0_res = svldff1_gather_u32offset_u32 (p0, x0, z0), ++ z0_res = svldff1_gather_offset (p0, x0, z0)) ++ ++/* ++** ldff1_gather_tied1_u32_u32offset: ++** ldff1w z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_tied1_u32_u32offset, svuint32_t, uint32_t, svuint32_t, ++ z0_res = svldff1_gather_u32offset_u32 (p0, x0, z0), ++ z0_res = svldff1_gather_offset (p0, x0, z0)) ++ ++/* ++** ldff1_gather_untied_u32_u32offset: ++** ldff1w z0\.s, p0/z, \[x0, z1\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_untied_u32_u32offset, svuint32_t, uint32_t, svuint32_t, ++ z0_res = svldff1_gather_u32offset_u32 (p0, x0, z1), ++ z0_res = svldff1_gather_offset (p0, x0, z1)) ++ ++/* ++** ldff1_gather_x0_u32_s32index: ++** ldff1w z0\.s, p0/z, \[x0, z0\.s, sxtw 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_x0_u32_s32index, svuint32_t, uint32_t, svint32_t, ++ z0_res = svldff1_gather_s32index_u32 (p0, x0, z0), ++ z0_res = svldff1_gather_index (p0, x0, z0)) ++ ++/* ++** ldff1_gather_tied1_u32_s32index: ++** ldff1w z0\.s, p0/z, \[x0, z0\.s, sxtw 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_tied1_u32_s32index, svuint32_t, uint32_t, svint32_t, ++ z0_res = svldff1_gather_s32index_u32 (p0, x0, z0), ++ z0_res = svldff1_gather_index (p0, x0, z0)) ++ ++/* ++** ldff1_gather_untied_u32_s32index: ++** ldff1w z0\.s, p0/z, \[x0, z1\.s, sxtw 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_untied_u32_s32index, svuint32_t, uint32_t, svint32_t, ++ z0_res = svldff1_gather_s32index_u32 (p0, x0, z1), ++ z0_res = svldff1_gather_index (p0, x0, z1)) ++ ++/* ++** ldff1_gather_x0_u32_u32index: ++** ldff1w z0\.s, p0/z, \[x0, z0\.s, uxtw 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_x0_u32_u32index, svuint32_t, uint32_t, svuint32_t, ++ z0_res = svldff1_gather_u32index_u32 (p0, x0, z0), ++ z0_res = svldff1_gather_index (p0, x0, z0)) ++ ++/* ++** ldff1_gather_tied1_u32_u32index: ++** ldff1w z0\.s, p0/z, \[x0, z0\.s, uxtw 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_tied1_u32_u32index, svuint32_t, uint32_t, svuint32_t, ++ z0_res = svldff1_gather_u32index_u32 (p0, x0, z0), ++ z0_res = svldff1_gather_index (p0, x0, z0)) ++ ++/* ++** ldff1_gather_untied_u32_u32index: ++** ldff1w z0\.s, p0/z, \[x0, z1\.s, uxtw 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_untied_u32_u32index, svuint32_t, uint32_t, svuint32_t, ++ z0_res = svldff1_gather_u32index_u32 (p0, x0, z1), ++ z0_res = svldff1_gather_index (p0, x0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_u64.c +new file mode 100644 +index 000000000..8b53ce94f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_u64.c +@@ -0,0 +1,348 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1_gather_u64_tied1: ++** ldff1d z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_u64_tied1, svuint64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_u64 (p0, z0), ++ z0_res = svldff1_gather_u64 (p0, z0)) ++ ++/* ++** ldff1_gather_u64_untied: ++** ldff1d z0\.d, p0/z, \[z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_u64_untied, svuint64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_u64 (p0, z1), ++ z0_res = svldff1_gather_u64 (p0, z1)) ++ ++/* ++** ldff1_gather_x0_u64_offset: ++** ldff1d z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_x0_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_offset_u64 (p0, z0, x0), ++ z0_res = svldff1_gather_offset_u64 (p0, z0, x0)) ++ ++/* ++** ldff1_gather_m8_u64_offset: ++** mov (x[0-9]+), #?-8 ++** ldff1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_m8_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_offset_u64 (p0, z0, -8), ++ z0_res = svldff1_gather_offset_u64 (p0, z0, -8)) ++ ++/* ++** ldff1_gather_0_u64_offset: ++** ldff1d z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_0_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_offset_u64 (p0, z0, 0), ++ z0_res = svldff1_gather_offset_u64 (p0, z0, 0)) ++ ++/* ++** ldff1_gather_9_u64_offset: ++** mov (x[0-9]+), #?9 ++** ldff1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_9_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_offset_u64 (p0, z0, 9), ++ z0_res = svldff1_gather_offset_u64 (p0, z0, 9)) ++ ++/* ++** ldff1_gather_10_u64_offset: ++** mov (x[0-9]+), #?10 ++** ldff1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_10_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_offset_u64 (p0, z0, 10), ++ z0_res = svldff1_gather_offset_u64 (p0, z0, 10)) ++ ++/* ++** ldff1_gather_11_u64_offset: ++** mov (x[0-9]+), #?11 ++** ldff1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_11_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_offset_u64 (p0, z0, 11), ++ z0_res = svldff1_gather_offset_u64 (p0, z0, 11)) ++ ++/* ++** ldff1_gather_12_u64_offset: ++** mov (x[0-9]+), #?12 ++** ldff1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_12_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_offset_u64 (p0, z0, 12), ++ z0_res = svldff1_gather_offset_u64 (p0, z0, 12)) ++ ++/* ++** ldff1_gather_13_u64_offset: ++** mov (x[0-9]+), #?13 ++** ldff1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_13_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_offset_u64 (p0, z0, 13), ++ z0_res = svldff1_gather_offset_u64 (p0, z0, 13)) ++ ++/* ++** ldff1_gather_14_u64_offset: ++** mov (x[0-9]+), #?14 ++** ldff1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_14_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_offset_u64 (p0, z0, 14), ++ z0_res = svldff1_gather_offset_u64 (p0, z0, 14)) ++ ++/* ++** ldff1_gather_15_u64_offset: ++** mov (x[0-9]+), #?15 ++** ldff1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_15_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_offset_u64 (p0, z0, 15), ++ z0_res = svldff1_gather_offset_u64 (p0, z0, 15)) ++ ++/* ++** ldff1_gather_16_u64_offset: ++** ldff1d z0\.d, p0/z, \[z0\.d, #16\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_16_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_offset_u64 (p0, z0, 16), ++ z0_res = svldff1_gather_offset_u64 (p0, z0, 16)) ++ ++/* ++** ldff1_gather_248_u64_offset: ++** ldff1d z0\.d, p0/z, \[z0\.d, #248\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_248_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_offset_u64 (p0, z0, 248), ++ z0_res = svldff1_gather_offset_u64 (p0, z0, 248)) ++ ++/* ++** ldff1_gather_256_u64_offset: ++** mov (x[0-9]+), #?256 ++** ldff1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_256_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_offset_u64 (p0, z0, 256), ++ z0_res = svldff1_gather_offset_u64 (p0, z0, 256)) ++ ++/* ++** ldff1_gather_x0_u64_index: ++** lsl (x[0-9]+), x0, #?3 ++** ldff1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_x0_u64_index, svuint64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_index_u64 (p0, z0, x0), ++ z0_res = svldff1_gather_index_u64 (p0, z0, x0)) ++ ++/* ++** ldff1_gather_m1_u64_index: ++** mov (x[0-9]+), #?-8 ++** ldff1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_m1_u64_index, svuint64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_index_u64 (p0, z0, -1), ++ z0_res = svldff1_gather_index_u64 (p0, z0, -1)) ++ ++/* ++** ldff1_gather_0_u64_index: ++** ldff1d z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_0_u64_index, svuint64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_index_u64 (p0, z0, 0), ++ z0_res = svldff1_gather_index_u64 (p0, z0, 0)) ++ ++/* ++** ldff1_gather_5_u64_index: ++** ldff1d z0\.d, p0/z, \[z0\.d, #40\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_5_u64_index, svuint64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_index_u64 (p0, z0, 5), ++ z0_res = svldff1_gather_index_u64 (p0, z0, 5)) ++ ++/* ++** ldff1_gather_31_u64_index: ++** ldff1d z0\.d, p0/z, \[z0\.d, #248\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_31_u64_index, svuint64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_index_u64 (p0, z0, 31), ++ z0_res = svldff1_gather_index_u64 (p0, z0, 31)) ++ ++/* ++** ldff1_gather_32_u64_index: ++** mov (x[0-9]+), #?256 ++** ldff1d z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1_gather_32_u64_index, svuint64_t, svuint64_t, ++ z0_res = svldff1_gather_u64base_index_u64 (p0, z0, 32), ++ z0_res = svldff1_gather_index_u64 (p0, z0, 32)) ++ ++/* ++** ldff1_gather_x0_u64_s64offset: ++** ldff1d z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_x0_u64_s64offset, svuint64_t, uint64_t, svint64_t, ++ z0_res = svldff1_gather_s64offset_u64 (p0, x0, z0), ++ z0_res = svldff1_gather_offset (p0, x0, z0)) ++ ++/* ++** ldff1_gather_tied1_u64_s64offset: ++** ldff1d z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_tied1_u64_s64offset, svuint64_t, uint64_t, svint64_t, ++ z0_res = svldff1_gather_s64offset_u64 (p0, x0, z0), ++ z0_res = svldff1_gather_offset (p0, x0, z0)) ++ ++/* ++** ldff1_gather_untied_u64_s64offset: ++** ldff1d z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_untied_u64_s64offset, svuint64_t, uint64_t, svint64_t, ++ z0_res = svldff1_gather_s64offset_u64 (p0, x0, z1), ++ z0_res = svldff1_gather_offset (p0, x0, z1)) ++ ++/* ++** ldff1_gather_ext_u64_s64offset: ++** ldff1d z0\.d, p0/z, \[x0, z1\.d, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_ext_u64_s64offset, svuint64_t, uint64_t, svint64_t, ++ z0_res = svldff1_gather_s64offset_u64 (p0, x0, svextw_s64_x (p0, z1)), ++ z0_res = svldff1_gather_offset (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ldff1_gather_x0_u64_u64offset: ++** ldff1d z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_x0_u64_u64offset, svuint64_t, uint64_t, svuint64_t, ++ z0_res = svldff1_gather_u64offset_u64 (p0, x0, z0), ++ z0_res = svldff1_gather_offset (p0, x0, z0)) ++ ++/* ++** ldff1_gather_tied1_u64_u64offset: ++** ldff1d z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_tied1_u64_u64offset, svuint64_t, uint64_t, svuint64_t, ++ z0_res = svldff1_gather_u64offset_u64 (p0, x0, z0), ++ z0_res = svldff1_gather_offset (p0, x0, z0)) ++ ++/* ++** ldff1_gather_untied_u64_u64offset: ++** ldff1d z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_untied_u64_u64offset, svuint64_t, uint64_t, svuint64_t, ++ z0_res = svldff1_gather_u64offset_u64 (p0, x0, z1), ++ z0_res = svldff1_gather_offset (p0, x0, z1)) ++ ++/* ++** ldff1_gather_ext_u64_u64offset: ++** ldff1d z0\.d, p0/z, \[x0, z1\.d, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_ext_u64_u64offset, svuint64_t, uint64_t, svuint64_t, ++ z0_res = svldff1_gather_u64offset_u64 (p0, x0, svextw_u64_x (p0, z1)), ++ z0_res = svldff1_gather_offset (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ldff1_gather_x0_u64_s64index: ++** ldff1d z0\.d, p0/z, \[x0, z0\.d, lsl 3\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_x0_u64_s64index, svuint64_t, uint64_t, svint64_t, ++ z0_res = svldff1_gather_s64index_u64 (p0, x0, z0), ++ z0_res = svldff1_gather_index (p0, x0, z0)) ++ ++/* ++** ldff1_gather_tied1_u64_s64index: ++** ldff1d z0\.d, p0/z, \[x0, z0\.d, lsl 3\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_tied1_u64_s64index, svuint64_t, uint64_t, svint64_t, ++ z0_res = svldff1_gather_s64index_u64 (p0, x0, z0), ++ z0_res = svldff1_gather_index (p0, x0, z0)) ++ ++/* ++** ldff1_gather_untied_u64_s64index: ++** ldff1d z0\.d, p0/z, \[x0, z1\.d, lsl 3\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_untied_u64_s64index, svuint64_t, uint64_t, svint64_t, ++ z0_res = svldff1_gather_s64index_u64 (p0, x0, z1), ++ z0_res = svldff1_gather_index (p0, x0, z1)) ++ ++/* ++** ldff1_gather_ext_u64_s64index: ++** ldff1d z0\.d, p0/z, \[x0, z1\.d, sxtw 3\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_ext_u64_s64index, svuint64_t, uint64_t, svint64_t, ++ z0_res = svldff1_gather_s64index_u64 (p0, x0, svextw_s64_x (p0, z1)), ++ z0_res = svldff1_gather_index (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ldff1_gather_x0_u64_u64index: ++** ldff1d z0\.d, p0/z, \[x0, z0\.d, lsl 3\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_x0_u64_u64index, svuint64_t, uint64_t, svuint64_t, ++ z0_res = svldff1_gather_u64index_u64 (p0, x0, z0), ++ z0_res = svldff1_gather_index (p0, x0, z0)) ++ ++/* ++** ldff1_gather_tied1_u64_u64index: ++** ldff1d z0\.d, p0/z, \[x0, z0\.d, lsl 3\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_tied1_u64_u64index, svuint64_t, uint64_t, svuint64_t, ++ z0_res = svldff1_gather_u64index_u64 (p0, x0, z0), ++ z0_res = svldff1_gather_index (p0, x0, z0)) ++ ++/* ++** ldff1_gather_untied_u64_u64index: ++** ldff1d z0\.d, p0/z, \[x0, z1\.d, lsl 3\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_untied_u64_u64index, svuint64_t, uint64_t, svuint64_t, ++ z0_res = svldff1_gather_u64index_u64 (p0, x0, z1), ++ z0_res = svldff1_gather_index (p0, x0, z1)) ++ ++/* ++** ldff1_gather_ext_u64_u64index: ++** ldff1d z0\.d, p0/z, \[x0, z1\.d, uxtw 3\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1_gather_ext_u64_u64index, svuint64_t, uint64_t, svuint64_t, ++ z0_res = svldff1_gather_u64index_u64 (p0, x0, svextw_u64_x (p0, z1)), ++ z0_res = svldff1_gather_index (p0, x0, svextw_x (p0, z1))) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s16.c +new file mode 100644 +index 000000000..1d5fde0e6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s16.c +@@ -0,0 +1,86 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1_s16_base: ++** ldff1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_s16_base, svint16_t, int16_t, ++ z0 = svldff1_s16 (p0, x0), ++ z0 = svldff1 (p0, x0)) ++ ++/* ++** ldff1_s16_index: ++** ldff1h z0\.h, p0/z, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_LOAD (ldff1_s16_index, svint16_t, int16_t, ++ z0 = svldff1_s16 (p0, x0 + x1), ++ z0 = svldff1 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1_s16_1: ++** incb x0 ++** ldff1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_s16_1, svint16_t, int16_t, ++ z0 = svldff1_s16 (p0, x0 + svcnth ()), ++ z0 = svldff1 (p0, x0 + svcnth ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1_s16_m1: ++** decb x0 ++** ldff1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_s16_m1, svint16_t, int16_t, ++ z0 = svldff1_s16 (p0, x0 - svcnth ()), ++ z0 = svldff1 (p0, x0 - svcnth ())) ++ ++/* ++** ldff1_vnum_s16_0: ++** ldff1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_vnum_s16_0, svint16_t, int16_t, ++ z0 = svldff1_vnum_s16 (p0, x0, 0), ++ z0 = svldff1_vnum (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1_vnum_s16_1: ++** incb x0 ++** ldff1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_vnum_s16_1, svint16_t, int16_t, ++ z0 = svldff1_vnum_s16 (p0, x0, 1), ++ z0 = svldff1_vnum (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1_vnum_s16_m1: ++** decb x0 ++** ldff1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_vnum_s16_m1, svint16_t, int16_t, ++ z0 = svldff1_vnum_s16 (p0, x0, -1), ++ z0 = svldff1_vnum (p0, x0, -1)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ldff1_vnum_s16_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ldff1h z0\.h, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldff1_vnum_s16_x1, svint16_t, int16_t, ++ z0 = svldff1_vnum_s16 (p0, x0, x1), ++ z0 = svldff1_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s32.c +new file mode 100644 +index 000000000..97a36e884 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s32.c +@@ -0,0 +1,86 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1_s32_base: ++** ldff1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_s32_base, svint32_t, int32_t, ++ z0 = svldff1_s32 (p0, x0), ++ z0 = svldff1 (p0, x0)) ++ ++/* ++** ldff1_s32_index: ++** ldff1w z0\.s, p0/z, \[x0, x1, lsl 2\] ++** ret ++*/ ++TEST_LOAD (ldff1_s32_index, svint32_t, int32_t, ++ z0 = svldff1_s32 (p0, x0 + x1), ++ z0 = svldff1 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1_s32_1: ++** incb x0 ++** ldff1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_s32_1, svint32_t, int32_t, ++ z0 = svldff1_s32 (p0, x0 + svcntw ()), ++ z0 = svldff1 (p0, x0 + svcntw ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1_s32_m1: ++** decb x0 ++** ldff1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_s32_m1, svint32_t, int32_t, ++ z0 = svldff1_s32 (p0, x0 - svcntw ()), ++ z0 = svldff1 (p0, x0 - svcntw ())) ++ ++/* ++** ldff1_vnum_s32_0: ++** ldff1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_vnum_s32_0, svint32_t, int32_t, ++ z0 = svldff1_vnum_s32 (p0, x0, 0), ++ z0 = svldff1_vnum (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1_vnum_s32_1: ++** incb x0 ++** ldff1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_vnum_s32_1, svint32_t, int32_t, ++ z0 = svldff1_vnum_s32 (p0, x0, 1), ++ z0 = svldff1_vnum (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1_vnum_s32_m1: ++** decb x0 ++** ldff1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_vnum_s32_m1, svint32_t, int32_t, ++ z0 = svldff1_vnum_s32 (p0, x0, -1), ++ z0 = svldff1_vnum (p0, x0, -1)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ldff1_vnum_s32_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ldff1w z0\.s, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldff1_vnum_s32_x1, svint32_t, int32_t, ++ z0 = svldff1_vnum_s32 (p0, x0, x1), ++ z0 = svldff1_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s64.c +new file mode 100644 +index 000000000..c018a4c1c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s64.c +@@ -0,0 +1,86 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1_s64_base: ++** ldff1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_s64_base, svint64_t, int64_t, ++ z0 = svldff1_s64 (p0, x0), ++ z0 = svldff1 (p0, x0)) ++ ++/* ++** ldff1_s64_index: ++** ldff1d z0\.d, p0/z, \[x0, x1, lsl 3\] ++** ret ++*/ ++TEST_LOAD (ldff1_s64_index, svint64_t, int64_t, ++ z0 = svldff1_s64 (p0, x0 + x1), ++ z0 = svldff1 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1_s64_1: ++** incb x0 ++** ldff1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_s64_1, svint64_t, int64_t, ++ z0 = svldff1_s64 (p0, x0 + svcntd ()), ++ z0 = svldff1 (p0, x0 + svcntd ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1_s64_m1: ++** decb x0 ++** ldff1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_s64_m1, svint64_t, int64_t, ++ z0 = svldff1_s64 (p0, x0 - svcntd ()), ++ z0 = svldff1 (p0, x0 - svcntd ())) ++ ++/* ++** ldff1_vnum_s64_0: ++** ldff1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_vnum_s64_0, svint64_t, int64_t, ++ z0 = svldff1_vnum_s64 (p0, x0, 0), ++ z0 = svldff1_vnum (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1_vnum_s64_1: ++** incb x0 ++** ldff1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_vnum_s64_1, svint64_t, int64_t, ++ z0 = svldff1_vnum_s64 (p0, x0, 1), ++ z0 = svldff1_vnum (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1_vnum_s64_m1: ++** decb x0 ++** ldff1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_vnum_s64_m1, svint64_t, int64_t, ++ z0 = svldff1_vnum_s64 (p0, x0, -1), ++ z0 = svldff1_vnum (p0, x0, -1)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ldff1_vnum_s64_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ldff1d z0\.d, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldff1_vnum_s64_x1, svint64_t, int64_t, ++ z0 = svldff1_vnum_s64 (p0, x0, x1), ++ z0 = svldff1_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s8.c +new file mode 100644 +index 000000000..cf620d1f4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s8.c +@@ -0,0 +1,90 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1_s8_base: ++** ldff1b z0\.b, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_s8_base, svint8_t, int8_t, ++ z0 = svldff1_s8 (p0, x0), ++ z0 = svldff1 (p0, x0)) ++ ++/* ++** ldff1_s8_index: ++** ldff1b z0\.b, p0/z, \[x0, x1\] ++** ret ++*/ ++TEST_LOAD (ldff1_s8_index, svint8_t, int8_t, ++ z0 = svldff1_s8 (p0, x0 + x1), ++ z0 = svldff1 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1_s8_1: ++** incb x0 ++** ldff1b z0\.b, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_s8_1, svint8_t, int8_t, ++ z0 = svldff1_s8 (p0, x0 + svcntb ()), ++ z0 = svldff1 (p0, x0 + svcntb ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1_s8_m1: ++** decb x0 ++** ldff1b z0\.b, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_s8_m1, svint8_t, int8_t, ++ z0 = svldff1_s8 (p0, x0 - svcntb ()), ++ z0 = svldff1 (p0, x0 - svcntb ())) ++ ++/* ++** ldff1_vnum_s8_0: ++** ldff1b z0\.b, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_vnum_s8_0, svint8_t, int8_t, ++ z0 = svldff1_vnum_s8 (p0, x0, 0), ++ z0 = svldff1_vnum (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1_vnum_s8_1: ++** incb x0 ++** ldff1b z0\.b, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_vnum_s8_1, svint8_t, int8_t, ++ z0 = svldff1_vnum_s8 (p0, x0, 1), ++ z0 = svldff1_vnum (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1_vnum_s8_m1: ++** decb x0 ++** ldff1b z0\.b, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_vnum_s8_m1, svint8_t, int8_t, ++ z0 = svldff1_vnum_s8 (p0, x0, -1), ++ z0 = svldff1_vnum (p0, x0, -1)) ++ ++/* ++** ldff1_vnum_s8_x1: ++** cntb (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ldff1b z0\.b, p0/z, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** ldff1b z0\.b, p0/z, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_LOAD (ldff1_vnum_s8_x1, svint8_t, int8_t, ++ z0 = svldff1_vnum_s8 (p0, x0, x1), ++ z0 = svldff1_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u16.c +new file mode 100644 +index 000000000..1fa819296 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u16.c +@@ -0,0 +1,86 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1_u16_base: ++** ldff1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_u16_base, svuint16_t, uint16_t, ++ z0 = svldff1_u16 (p0, x0), ++ z0 = svldff1 (p0, x0)) ++ ++/* ++** ldff1_u16_index: ++** ldff1h z0\.h, p0/z, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_LOAD (ldff1_u16_index, svuint16_t, uint16_t, ++ z0 = svldff1_u16 (p0, x0 + x1), ++ z0 = svldff1 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1_u16_1: ++** incb x0 ++** ldff1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_u16_1, svuint16_t, uint16_t, ++ z0 = svldff1_u16 (p0, x0 + svcnth ()), ++ z0 = svldff1 (p0, x0 + svcnth ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1_u16_m1: ++** decb x0 ++** ldff1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_u16_m1, svuint16_t, uint16_t, ++ z0 = svldff1_u16 (p0, x0 - svcnth ()), ++ z0 = svldff1 (p0, x0 - svcnth ())) ++ ++/* ++** ldff1_vnum_u16_0: ++** ldff1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_vnum_u16_0, svuint16_t, uint16_t, ++ z0 = svldff1_vnum_u16 (p0, x0, 0), ++ z0 = svldff1_vnum (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1_vnum_u16_1: ++** incb x0 ++** ldff1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_vnum_u16_1, svuint16_t, uint16_t, ++ z0 = svldff1_vnum_u16 (p0, x0, 1), ++ z0 = svldff1_vnum (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1_vnum_u16_m1: ++** decb x0 ++** ldff1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_vnum_u16_m1, svuint16_t, uint16_t, ++ z0 = svldff1_vnum_u16 (p0, x0, -1), ++ z0 = svldff1_vnum (p0, x0, -1)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ldff1_vnum_u16_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ldff1h z0\.h, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldff1_vnum_u16_x1, svuint16_t, uint16_t, ++ z0 = svldff1_vnum_u16 (p0, x0, x1), ++ z0 = svldff1_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u32.c +new file mode 100644 +index 000000000..5224ec40a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u32.c +@@ -0,0 +1,86 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1_u32_base: ++** ldff1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_u32_base, svuint32_t, uint32_t, ++ z0 = svldff1_u32 (p0, x0), ++ z0 = svldff1 (p0, x0)) ++ ++/* ++** ldff1_u32_index: ++** ldff1w z0\.s, p0/z, \[x0, x1, lsl 2\] ++** ret ++*/ ++TEST_LOAD (ldff1_u32_index, svuint32_t, uint32_t, ++ z0 = svldff1_u32 (p0, x0 + x1), ++ z0 = svldff1 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1_u32_1: ++** incb x0 ++** ldff1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_u32_1, svuint32_t, uint32_t, ++ z0 = svldff1_u32 (p0, x0 + svcntw ()), ++ z0 = svldff1 (p0, x0 + svcntw ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1_u32_m1: ++** decb x0 ++** ldff1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_u32_m1, svuint32_t, uint32_t, ++ z0 = svldff1_u32 (p0, x0 - svcntw ()), ++ z0 = svldff1 (p0, x0 - svcntw ())) ++ ++/* ++** ldff1_vnum_u32_0: ++** ldff1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_vnum_u32_0, svuint32_t, uint32_t, ++ z0 = svldff1_vnum_u32 (p0, x0, 0), ++ z0 = svldff1_vnum (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1_vnum_u32_1: ++** incb x0 ++** ldff1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_vnum_u32_1, svuint32_t, uint32_t, ++ z0 = svldff1_vnum_u32 (p0, x0, 1), ++ z0 = svldff1_vnum (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1_vnum_u32_m1: ++** decb x0 ++** ldff1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_vnum_u32_m1, svuint32_t, uint32_t, ++ z0 = svldff1_vnum_u32 (p0, x0, -1), ++ z0 = svldff1_vnum (p0, x0, -1)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ldff1_vnum_u32_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ldff1w z0\.s, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldff1_vnum_u32_x1, svuint32_t, uint32_t, ++ z0 = svldff1_vnum_u32 (p0, x0, x1), ++ z0 = svldff1_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u64.c +new file mode 100644 +index 000000000..18e87f2b8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u64.c +@@ -0,0 +1,86 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1_u64_base: ++** ldff1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_u64_base, svuint64_t, uint64_t, ++ z0 = svldff1_u64 (p0, x0), ++ z0 = svldff1 (p0, x0)) ++ ++/* ++** ldff1_u64_index: ++** ldff1d z0\.d, p0/z, \[x0, x1, lsl 3\] ++** ret ++*/ ++TEST_LOAD (ldff1_u64_index, svuint64_t, uint64_t, ++ z0 = svldff1_u64 (p0, x0 + x1), ++ z0 = svldff1 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1_u64_1: ++** incb x0 ++** ldff1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_u64_1, svuint64_t, uint64_t, ++ z0 = svldff1_u64 (p0, x0 + svcntd ()), ++ z0 = svldff1 (p0, x0 + svcntd ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1_u64_m1: ++** decb x0 ++** ldff1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_u64_m1, svuint64_t, uint64_t, ++ z0 = svldff1_u64 (p0, x0 - svcntd ()), ++ z0 = svldff1 (p0, x0 - svcntd ())) ++ ++/* ++** ldff1_vnum_u64_0: ++** ldff1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_vnum_u64_0, svuint64_t, uint64_t, ++ z0 = svldff1_vnum_u64 (p0, x0, 0), ++ z0 = svldff1_vnum (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1_vnum_u64_1: ++** incb x0 ++** ldff1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_vnum_u64_1, svuint64_t, uint64_t, ++ z0 = svldff1_vnum_u64 (p0, x0, 1), ++ z0 = svldff1_vnum (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1_vnum_u64_m1: ++** decb x0 ++** ldff1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_vnum_u64_m1, svuint64_t, uint64_t, ++ z0 = svldff1_vnum_u64 (p0, x0, -1), ++ z0 = svldff1_vnum (p0, x0, -1)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ldff1_vnum_u64_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ldff1d z0\.d, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldff1_vnum_u64_x1, svuint64_t, uint64_t, ++ z0 = svldff1_vnum_u64 (p0, x0, x1), ++ z0 = svldff1_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u8.c +new file mode 100644 +index 000000000..83883fca4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u8.c +@@ -0,0 +1,90 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1_u8_base: ++** ldff1b z0\.b, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_u8_base, svuint8_t, uint8_t, ++ z0 = svldff1_u8 (p0, x0), ++ z0 = svldff1 (p0, x0)) ++ ++/* ++** ldff1_u8_index: ++** ldff1b z0\.b, p0/z, \[x0, x1\] ++** ret ++*/ ++TEST_LOAD (ldff1_u8_index, svuint8_t, uint8_t, ++ z0 = svldff1_u8 (p0, x0 + x1), ++ z0 = svldff1 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1_u8_1: ++** incb x0 ++** ldff1b z0\.b, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_u8_1, svuint8_t, uint8_t, ++ z0 = svldff1_u8 (p0, x0 + svcntb ()), ++ z0 = svldff1 (p0, x0 + svcntb ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1_u8_m1: ++** decb x0 ++** ldff1b z0\.b, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_u8_m1, svuint8_t, uint8_t, ++ z0 = svldff1_u8 (p0, x0 - svcntb ()), ++ z0 = svldff1 (p0, x0 - svcntb ())) ++ ++/* ++** ldff1_vnum_u8_0: ++** ldff1b z0\.b, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_vnum_u8_0, svuint8_t, uint8_t, ++ z0 = svldff1_vnum_u8 (p0, x0, 0), ++ z0 = svldff1_vnum (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1_vnum_u8_1: ++** incb x0 ++** ldff1b z0\.b, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_vnum_u8_1, svuint8_t, uint8_t, ++ z0 = svldff1_vnum_u8 (p0, x0, 1), ++ z0 = svldff1_vnum (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1_vnum_u8_m1: ++** decb x0 ++** ldff1b z0\.b, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1_vnum_u8_m1, svuint8_t, uint8_t, ++ z0 = svldff1_vnum_u8 (p0, x0, -1), ++ z0 = svldff1_vnum (p0, x0, -1)) ++ ++/* ++** ldff1_vnum_u8_x1: ++** cntb (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ldff1b z0\.b, p0/z, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** ldff1b z0\.b, p0/z, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_LOAD (ldff1_vnum_u8_x1, svuint8_t, uint8_t, ++ z0 = svldff1_vnum_u8 (p0, x0, x1), ++ z0 = svldff1_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_s32.c +new file mode 100644 +index 000000000..c2a676807 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_s32.c +@@ -0,0 +1,131 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1sb_gather_s32_tied1: ++** ldff1sb z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sb_gather_s32_tied1, svint32_t, svuint32_t, ++ z0_res = svldff1sb_gather_u32base_s32 (p0, z0), ++ z0_res = svldff1sb_gather_s32 (p0, z0)) ++ ++/* ++** ldff1sb_gather_s32_untied: ++** ldff1sb z0\.s, p0/z, \[z1\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sb_gather_s32_untied, svint32_t, svuint32_t, ++ z0_res = svldff1sb_gather_u32base_s32 (p0, z1), ++ z0_res = svldff1sb_gather_s32 (p0, z1)) ++ ++/* ++** ldff1sb_gather_x0_s32_offset: ++** ldff1sb z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sb_gather_x0_s32_offset, svint32_t, svuint32_t, ++ z0_res = svldff1sb_gather_u32base_offset_s32 (p0, z0, x0), ++ z0_res = svldff1sb_gather_offset_s32 (p0, z0, x0)) ++ ++/* ++** ldff1sb_gather_m1_s32_offset: ++** mov (x[0-9]+), #?-1 ++** ldff1sb z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sb_gather_m1_s32_offset, svint32_t, svuint32_t, ++ z0_res = svldff1sb_gather_u32base_offset_s32 (p0, z0, -1), ++ z0_res = svldff1sb_gather_offset_s32 (p0, z0, -1)) ++ ++/* ++** ldff1sb_gather_0_s32_offset: ++** ldff1sb z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sb_gather_0_s32_offset, svint32_t, svuint32_t, ++ z0_res = svldff1sb_gather_u32base_offset_s32 (p0, z0, 0), ++ z0_res = svldff1sb_gather_offset_s32 (p0, z0, 0)) ++ ++/* ++** ldff1sb_gather_5_s32_offset: ++** ldff1sb z0\.s, p0/z, \[z0\.s, #5\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sb_gather_5_s32_offset, svint32_t, svuint32_t, ++ z0_res = svldff1sb_gather_u32base_offset_s32 (p0, z0, 5), ++ z0_res = svldff1sb_gather_offset_s32 (p0, z0, 5)) ++ ++/* ++** ldff1sb_gather_31_s32_offset: ++** ldff1sb z0\.s, p0/z, \[z0\.s, #31\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sb_gather_31_s32_offset, svint32_t, svuint32_t, ++ z0_res = svldff1sb_gather_u32base_offset_s32 (p0, z0, 31), ++ z0_res = svldff1sb_gather_offset_s32 (p0, z0, 31)) ++ ++/* ++** ldff1sb_gather_32_s32_offset: ++** mov (x[0-9]+), #?32 ++** ldff1sb z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sb_gather_32_s32_offset, svint32_t, svuint32_t, ++ z0_res = svldff1sb_gather_u32base_offset_s32 (p0, z0, 32), ++ z0_res = svldff1sb_gather_offset_s32 (p0, z0, 32)) ++ ++/* ++** ldff1sb_gather_x0_s32_s32offset: ++** ldff1sb z0\.s, p0/z, \[x0, z0\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sb_gather_x0_s32_s32offset, svint32_t, int8_t, svint32_t, ++ z0_res = svldff1sb_gather_s32offset_s32 (p0, x0, z0), ++ z0_res = svldff1sb_gather_offset_s32 (p0, x0, z0)) ++ ++/* ++** ldff1sb_gather_tied1_s32_s32offset: ++** ldff1sb z0\.s, p0/z, \[x0, z0\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sb_gather_tied1_s32_s32offset, svint32_t, int8_t, svint32_t, ++ z0_res = svldff1sb_gather_s32offset_s32 (p0, x0, z0), ++ z0_res = svldff1sb_gather_offset_s32 (p0, x0, z0)) ++ ++/* ++** ldff1sb_gather_untied_s32_s32offset: ++** ldff1sb z0\.s, p0/z, \[x0, z1\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sb_gather_untied_s32_s32offset, svint32_t, int8_t, svint32_t, ++ z0_res = svldff1sb_gather_s32offset_s32 (p0, x0, z1), ++ z0_res = svldff1sb_gather_offset_s32 (p0, x0, z1)) ++ ++/* ++** ldff1sb_gather_x0_s32_u32offset: ++** ldff1sb z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sb_gather_x0_s32_u32offset, svint32_t, int8_t, svuint32_t, ++ z0_res = svldff1sb_gather_u32offset_s32 (p0, x0, z0), ++ z0_res = svldff1sb_gather_offset_s32 (p0, x0, z0)) ++ ++/* ++** ldff1sb_gather_tied1_s32_u32offset: ++** ldff1sb z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sb_gather_tied1_s32_u32offset, svint32_t, int8_t, svuint32_t, ++ z0_res = svldff1sb_gather_u32offset_s32 (p0, x0, z0), ++ z0_res = svldff1sb_gather_offset_s32 (p0, x0, z0)) ++ ++/* ++** ldff1sb_gather_untied_s32_u32offset: ++** ldff1sb z0\.s, p0/z, \[x0, z1\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sb_gather_untied_s32_u32offset, svint32_t, int8_t, svuint32_t, ++ z0_res = svldff1sb_gather_u32offset_s32 (p0, x0, z1), ++ z0_res = svldff1sb_gather_offset_s32 (p0, x0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_s64.c +new file mode 100644 +index 000000000..2f2a04d24 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_s64.c +@@ -0,0 +1,149 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1sb_gather_s64_tied1: ++** ldff1sb z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sb_gather_s64_tied1, svint64_t, svuint64_t, ++ z0_res = svldff1sb_gather_u64base_s64 (p0, z0), ++ z0_res = svldff1sb_gather_s64 (p0, z0)) ++ ++/* ++** ldff1sb_gather_s64_untied: ++** ldff1sb z0\.d, p0/z, \[z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sb_gather_s64_untied, svint64_t, svuint64_t, ++ z0_res = svldff1sb_gather_u64base_s64 (p0, z1), ++ z0_res = svldff1sb_gather_s64 (p0, z1)) ++ ++/* ++** ldff1sb_gather_x0_s64_offset: ++** ldff1sb z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sb_gather_x0_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1sb_gather_u64base_offset_s64 (p0, z0, x0), ++ z0_res = svldff1sb_gather_offset_s64 (p0, z0, x0)) ++ ++/* ++** ldff1sb_gather_m1_s64_offset: ++** mov (x[0-9]+), #?-1 ++** ldff1sb z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sb_gather_m1_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1sb_gather_u64base_offset_s64 (p0, z0, -1), ++ z0_res = svldff1sb_gather_offset_s64 (p0, z0, -1)) ++ ++/* ++** ldff1sb_gather_0_s64_offset: ++** ldff1sb z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sb_gather_0_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1sb_gather_u64base_offset_s64 (p0, z0, 0), ++ z0_res = svldff1sb_gather_offset_s64 (p0, z0, 0)) ++ ++/* ++** ldff1sb_gather_5_s64_offset: ++** ldff1sb z0\.d, p0/z, \[z0\.d, #5\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sb_gather_5_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1sb_gather_u64base_offset_s64 (p0, z0, 5), ++ z0_res = svldff1sb_gather_offset_s64 (p0, z0, 5)) ++ ++/* ++** ldff1sb_gather_31_s64_offset: ++** ldff1sb z0\.d, p0/z, \[z0\.d, #31\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sb_gather_31_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1sb_gather_u64base_offset_s64 (p0, z0, 31), ++ z0_res = svldff1sb_gather_offset_s64 (p0, z0, 31)) ++ ++/* ++** ldff1sb_gather_32_s64_offset: ++** mov (x[0-9]+), #?32 ++** ldff1sb z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sb_gather_32_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1sb_gather_u64base_offset_s64 (p0, z0, 32), ++ z0_res = svldff1sb_gather_offset_s64 (p0, z0, 32)) ++ ++/* ++** ldff1sb_gather_x0_s64_s64offset: ++** ldff1sb z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sb_gather_x0_s64_s64offset, svint64_t, int8_t, svint64_t, ++ z0_res = svldff1sb_gather_s64offset_s64 (p0, x0, z0), ++ z0_res = svldff1sb_gather_offset_s64 (p0, x0, z0)) ++ ++/* ++** ldff1sb_gather_tied1_s64_s64offset: ++** ldff1sb z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sb_gather_tied1_s64_s64offset, svint64_t, int8_t, svint64_t, ++ z0_res = svldff1sb_gather_s64offset_s64 (p0, x0, z0), ++ z0_res = svldff1sb_gather_offset_s64 (p0, x0, z0)) ++ ++/* ++** ldff1sb_gather_untied_s64_s64offset: ++** ldff1sb z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sb_gather_untied_s64_s64offset, svint64_t, int8_t, svint64_t, ++ z0_res = svldff1sb_gather_s64offset_s64 (p0, x0, z1), ++ z0_res = svldff1sb_gather_offset_s64 (p0, x0, z1)) ++ ++/* ++** ldff1sb_gather_ext_s64_s64offset: ++** ldff1sb z0\.d, p0/z, \[x0, z1\.d, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sb_gather_ext_s64_s64offset, svint64_t, int8_t, svint64_t, ++ z0_res = svldff1sb_gather_s64offset_s64 (p0, x0, svextw_s64_x (p0, z1)), ++ z0_res = svldff1sb_gather_offset_s64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ldff1sb_gather_x0_s64_u64offset: ++** ldff1sb z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sb_gather_x0_s64_u64offset, svint64_t, int8_t, svuint64_t, ++ z0_res = svldff1sb_gather_u64offset_s64 (p0, x0, z0), ++ z0_res = svldff1sb_gather_offset_s64 (p0, x0, z0)) ++ ++/* ++** ldff1sb_gather_tied1_s64_u64offset: ++** ldff1sb z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sb_gather_tied1_s64_u64offset, svint64_t, int8_t, svuint64_t, ++ z0_res = svldff1sb_gather_u64offset_s64 (p0, x0, z0), ++ z0_res = svldff1sb_gather_offset_s64 (p0, x0, z0)) ++ ++/* ++** ldff1sb_gather_untied_s64_u64offset: ++** ldff1sb z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sb_gather_untied_s64_u64offset, svint64_t, int8_t, svuint64_t, ++ z0_res = svldff1sb_gather_u64offset_s64 (p0, x0, z1), ++ z0_res = svldff1sb_gather_offset_s64 (p0, x0, z1)) ++ ++/* ++** ldff1sb_gather_ext_s64_u64offset: ++** ldff1sb z0\.d, p0/z, \[x0, z1\.d, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sb_gather_ext_s64_u64offset, svint64_t, int8_t, svuint64_t, ++ z0_res = svldff1sb_gather_u64offset_s64 (p0, x0, svextw_u64_x (p0, z1)), ++ z0_res = svldff1sb_gather_offset_s64 (p0, x0, svextw_x (p0, z1))) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_u32.c +new file mode 100644 +index 000000000..e3e83a205 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_u32.c +@@ -0,0 +1,131 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1sb_gather_u32_tied1: ++** ldff1sb z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sb_gather_u32_tied1, svuint32_t, svuint32_t, ++ z0_res = svldff1sb_gather_u32base_u32 (p0, z0), ++ z0_res = svldff1sb_gather_u32 (p0, z0)) ++ ++/* ++** ldff1sb_gather_u32_untied: ++** ldff1sb z0\.s, p0/z, \[z1\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sb_gather_u32_untied, svuint32_t, svuint32_t, ++ z0_res = svldff1sb_gather_u32base_u32 (p0, z1), ++ z0_res = svldff1sb_gather_u32 (p0, z1)) ++ ++/* ++** ldff1sb_gather_x0_u32_offset: ++** ldff1sb z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sb_gather_x0_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svldff1sb_gather_u32base_offset_u32 (p0, z0, x0), ++ z0_res = svldff1sb_gather_offset_u32 (p0, z0, x0)) ++ ++/* ++** ldff1sb_gather_m1_u32_offset: ++** mov (x[0-9]+), #?-1 ++** ldff1sb z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sb_gather_m1_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svldff1sb_gather_u32base_offset_u32 (p0, z0, -1), ++ z0_res = svldff1sb_gather_offset_u32 (p0, z0, -1)) ++ ++/* ++** ldff1sb_gather_0_u32_offset: ++** ldff1sb z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sb_gather_0_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svldff1sb_gather_u32base_offset_u32 (p0, z0, 0), ++ z0_res = svldff1sb_gather_offset_u32 (p0, z0, 0)) ++ ++/* ++** ldff1sb_gather_5_u32_offset: ++** ldff1sb z0\.s, p0/z, \[z0\.s, #5\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sb_gather_5_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svldff1sb_gather_u32base_offset_u32 (p0, z0, 5), ++ z0_res = svldff1sb_gather_offset_u32 (p0, z0, 5)) ++ ++/* ++** ldff1sb_gather_31_u32_offset: ++** ldff1sb z0\.s, p0/z, \[z0\.s, #31\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sb_gather_31_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svldff1sb_gather_u32base_offset_u32 (p0, z0, 31), ++ z0_res = svldff1sb_gather_offset_u32 (p0, z0, 31)) ++ ++/* ++** ldff1sb_gather_32_u32_offset: ++** mov (x[0-9]+), #?32 ++** ldff1sb z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sb_gather_32_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svldff1sb_gather_u32base_offset_u32 (p0, z0, 32), ++ z0_res = svldff1sb_gather_offset_u32 (p0, z0, 32)) ++ ++/* ++** ldff1sb_gather_x0_u32_s32offset: ++** ldff1sb z0\.s, p0/z, \[x0, z0\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sb_gather_x0_u32_s32offset, svuint32_t, int8_t, svint32_t, ++ z0_res = svldff1sb_gather_s32offset_u32 (p0, x0, z0), ++ z0_res = svldff1sb_gather_offset_u32 (p0, x0, z0)) ++ ++/* ++** ldff1sb_gather_tied1_u32_s32offset: ++** ldff1sb z0\.s, p0/z, \[x0, z0\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sb_gather_tied1_u32_s32offset, svuint32_t, int8_t, svint32_t, ++ z0_res = svldff1sb_gather_s32offset_u32 (p0, x0, z0), ++ z0_res = svldff1sb_gather_offset_u32 (p0, x0, z0)) ++ ++/* ++** ldff1sb_gather_untied_u32_s32offset: ++** ldff1sb z0\.s, p0/z, \[x0, z1\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sb_gather_untied_u32_s32offset, svuint32_t, int8_t, svint32_t, ++ z0_res = svldff1sb_gather_s32offset_u32 (p0, x0, z1), ++ z0_res = svldff1sb_gather_offset_u32 (p0, x0, z1)) ++ ++/* ++** ldff1sb_gather_x0_u32_u32offset: ++** ldff1sb z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sb_gather_x0_u32_u32offset, svuint32_t, int8_t, svuint32_t, ++ z0_res = svldff1sb_gather_u32offset_u32 (p0, x0, z0), ++ z0_res = svldff1sb_gather_offset_u32 (p0, x0, z0)) ++ ++/* ++** ldff1sb_gather_tied1_u32_u32offset: ++** ldff1sb z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sb_gather_tied1_u32_u32offset, svuint32_t, int8_t, svuint32_t, ++ z0_res = svldff1sb_gather_u32offset_u32 (p0, x0, z0), ++ z0_res = svldff1sb_gather_offset_u32 (p0, x0, z0)) ++ ++/* ++** ldff1sb_gather_untied_u32_u32offset: ++** ldff1sb z0\.s, p0/z, \[x0, z1\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sb_gather_untied_u32_u32offset, svuint32_t, int8_t, svuint32_t, ++ z0_res = svldff1sb_gather_u32offset_u32 (p0, x0, z1), ++ z0_res = svldff1sb_gather_offset_u32 (p0, x0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_u64.c +new file mode 100644 +index 000000000..769f2c266 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_u64.c +@@ -0,0 +1,149 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1sb_gather_u64_tied1: ++** ldff1sb z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sb_gather_u64_tied1, svuint64_t, svuint64_t, ++ z0_res = svldff1sb_gather_u64base_u64 (p0, z0), ++ z0_res = svldff1sb_gather_u64 (p0, z0)) ++ ++/* ++** ldff1sb_gather_u64_untied: ++** ldff1sb z0\.d, p0/z, \[z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sb_gather_u64_untied, svuint64_t, svuint64_t, ++ z0_res = svldff1sb_gather_u64base_u64 (p0, z1), ++ z0_res = svldff1sb_gather_u64 (p0, z1)) ++ ++/* ++** ldff1sb_gather_x0_u64_offset: ++** ldff1sb z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sb_gather_x0_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1sb_gather_u64base_offset_u64 (p0, z0, x0), ++ z0_res = svldff1sb_gather_offset_u64 (p0, z0, x0)) ++ ++/* ++** ldff1sb_gather_m1_u64_offset: ++** mov (x[0-9]+), #?-1 ++** ldff1sb z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sb_gather_m1_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1sb_gather_u64base_offset_u64 (p0, z0, -1), ++ z0_res = svldff1sb_gather_offset_u64 (p0, z0, -1)) ++ ++/* ++** ldff1sb_gather_0_u64_offset: ++** ldff1sb z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sb_gather_0_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1sb_gather_u64base_offset_u64 (p0, z0, 0), ++ z0_res = svldff1sb_gather_offset_u64 (p0, z0, 0)) ++ ++/* ++** ldff1sb_gather_5_u64_offset: ++** ldff1sb z0\.d, p0/z, \[z0\.d, #5\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sb_gather_5_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1sb_gather_u64base_offset_u64 (p0, z0, 5), ++ z0_res = svldff1sb_gather_offset_u64 (p0, z0, 5)) ++ ++/* ++** ldff1sb_gather_31_u64_offset: ++** ldff1sb z0\.d, p0/z, \[z0\.d, #31\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sb_gather_31_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1sb_gather_u64base_offset_u64 (p0, z0, 31), ++ z0_res = svldff1sb_gather_offset_u64 (p0, z0, 31)) ++ ++/* ++** ldff1sb_gather_32_u64_offset: ++** mov (x[0-9]+), #?32 ++** ldff1sb z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sb_gather_32_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1sb_gather_u64base_offset_u64 (p0, z0, 32), ++ z0_res = svldff1sb_gather_offset_u64 (p0, z0, 32)) ++ ++/* ++** ldff1sb_gather_x0_u64_s64offset: ++** ldff1sb z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sb_gather_x0_u64_s64offset, svuint64_t, int8_t, svint64_t, ++ z0_res = svldff1sb_gather_s64offset_u64 (p0, x0, z0), ++ z0_res = svldff1sb_gather_offset_u64 (p0, x0, z0)) ++ ++/* ++** ldff1sb_gather_tied1_u64_s64offset: ++** ldff1sb z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sb_gather_tied1_u64_s64offset, svuint64_t, int8_t, svint64_t, ++ z0_res = svldff1sb_gather_s64offset_u64 (p0, x0, z0), ++ z0_res = svldff1sb_gather_offset_u64 (p0, x0, z0)) ++ ++/* ++** ldff1sb_gather_untied_u64_s64offset: ++** ldff1sb z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sb_gather_untied_u64_s64offset, svuint64_t, int8_t, svint64_t, ++ z0_res = svldff1sb_gather_s64offset_u64 (p0, x0, z1), ++ z0_res = svldff1sb_gather_offset_u64 (p0, x0, z1)) ++ ++/* ++** ldff1sb_gather_ext_u64_s64offset: ++** ldff1sb z0\.d, p0/z, \[x0, z1\.d, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sb_gather_ext_u64_s64offset, svuint64_t, int8_t, svint64_t, ++ z0_res = svldff1sb_gather_s64offset_u64 (p0, x0, svextw_s64_x (p0, z1)), ++ z0_res = svldff1sb_gather_offset_u64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ldff1sb_gather_x0_u64_u64offset: ++** ldff1sb z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sb_gather_x0_u64_u64offset, svuint64_t, int8_t, svuint64_t, ++ z0_res = svldff1sb_gather_u64offset_u64 (p0, x0, z0), ++ z0_res = svldff1sb_gather_offset_u64 (p0, x0, z0)) ++ ++/* ++** ldff1sb_gather_tied1_u64_u64offset: ++** ldff1sb z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sb_gather_tied1_u64_u64offset, svuint64_t, int8_t, svuint64_t, ++ z0_res = svldff1sb_gather_u64offset_u64 (p0, x0, z0), ++ z0_res = svldff1sb_gather_offset_u64 (p0, x0, z0)) ++ ++/* ++** ldff1sb_gather_untied_u64_u64offset: ++** ldff1sb z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sb_gather_untied_u64_u64offset, svuint64_t, int8_t, svuint64_t, ++ z0_res = svldff1sb_gather_u64offset_u64 (p0, x0, z1), ++ z0_res = svldff1sb_gather_offset_u64 (p0, x0, z1)) ++ ++/* ++** ldff1sb_gather_ext_u64_u64offset: ++** ldff1sb z0\.d, p0/z, \[x0, z1\.d, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sb_gather_ext_u64_u64offset, svuint64_t, int8_t, svuint64_t, ++ z0_res = svldff1sb_gather_u64offset_u64 (p0, x0, svextw_u64_x (p0, z1)), ++ z0_res = svldff1sb_gather_offset_u64 (p0, x0, svextw_x (p0, z1))) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_s16.c +new file mode 100644 +index 000000000..e0a748c6a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_s16.c +@@ -0,0 +1,90 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1sb_s16_base: ++** ldff1sb z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sb_s16_base, svint16_t, int8_t, ++ z0 = svldff1sb_s16 (p0, x0), ++ z0 = svldff1sb_s16 (p0, x0)) ++ ++/* ++** ldff1sb_s16_index: ++** ldff1sb z0\.h, p0/z, \[x0, x1\] ++** ret ++*/ ++TEST_LOAD (ldff1sb_s16_index, svint16_t, int8_t, ++ z0 = svldff1sb_s16 (p0, x0 + x1), ++ z0 = svldff1sb_s16 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1sb_s16_1: ++** inch x0 ++** ldff1sb z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sb_s16_1, svint16_t, int8_t, ++ z0 = svldff1sb_s16 (p0, x0 + svcnth ()), ++ z0 = svldff1sb_s16 (p0, x0 + svcnth ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1sb_s16_m1: ++** dech x0 ++** ldff1sb z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sb_s16_m1, svint16_t, int8_t, ++ z0 = svldff1sb_s16 (p0, x0 - svcnth ()), ++ z0 = svldff1sb_s16 (p0, x0 - svcnth ())) ++ ++/* ++** ldff1sb_vnum_s16_0: ++** ldff1sb z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sb_vnum_s16_0, svint16_t, int8_t, ++ z0 = svldff1sb_vnum_s16 (p0, x0, 0), ++ z0 = svldff1sb_vnum_s16 (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1sb_vnum_s16_1: ++** inch x0 ++** ldff1sb z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sb_vnum_s16_1, svint16_t, int8_t, ++ z0 = svldff1sb_vnum_s16 (p0, x0, 1), ++ z0 = svldff1sb_vnum_s16 (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1sb_vnum_s16_m1: ++** dech x0 ++** ldff1sb z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sb_vnum_s16_m1, svint16_t, int8_t, ++ z0 = svldff1sb_vnum_s16 (p0, x0, -1), ++ z0 = svldff1sb_vnum_s16 (p0, x0, -1)) ++ ++/* ++** ldff1sb_vnum_s16_x1: ++** cnth (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ldff1sb z0\.h, p0/z, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** ldff1sb z0\.h, p0/z, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_LOAD (ldff1sb_vnum_s16_x1, svint16_t, int8_t, ++ z0 = svldff1sb_vnum_s16 (p0, x0, x1), ++ z0 = svldff1sb_vnum_s16 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_s32.c +new file mode 100644 +index 000000000..86716da9b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_s32.c +@@ -0,0 +1,90 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1sb_s32_base: ++** ldff1sb z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sb_s32_base, svint32_t, int8_t, ++ z0 = svldff1sb_s32 (p0, x0), ++ z0 = svldff1sb_s32 (p0, x0)) ++ ++/* ++** ldff1sb_s32_index: ++** ldff1sb z0\.s, p0/z, \[x0, x1\] ++** ret ++*/ ++TEST_LOAD (ldff1sb_s32_index, svint32_t, int8_t, ++ z0 = svldff1sb_s32 (p0, x0 + x1), ++ z0 = svldff1sb_s32 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1sb_s32_1: ++** incw x0 ++** ldff1sb z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sb_s32_1, svint32_t, int8_t, ++ z0 = svldff1sb_s32 (p0, x0 + svcntw ()), ++ z0 = svldff1sb_s32 (p0, x0 + svcntw ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1sb_s32_m1: ++** decw x0 ++** ldff1sb z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sb_s32_m1, svint32_t, int8_t, ++ z0 = svldff1sb_s32 (p0, x0 - svcntw ()), ++ z0 = svldff1sb_s32 (p0, x0 - svcntw ())) ++ ++/* ++** ldff1sb_vnum_s32_0: ++** ldff1sb z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sb_vnum_s32_0, svint32_t, int8_t, ++ z0 = svldff1sb_vnum_s32 (p0, x0, 0), ++ z0 = svldff1sb_vnum_s32 (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1sb_vnum_s32_1: ++** incw x0 ++** ldff1sb z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sb_vnum_s32_1, svint32_t, int8_t, ++ z0 = svldff1sb_vnum_s32 (p0, x0, 1), ++ z0 = svldff1sb_vnum_s32 (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1sb_vnum_s32_m1: ++** decw x0 ++** ldff1sb z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sb_vnum_s32_m1, svint32_t, int8_t, ++ z0 = svldff1sb_vnum_s32 (p0, x0, -1), ++ z0 = svldff1sb_vnum_s32 (p0, x0, -1)) ++ ++/* ++** ldff1sb_vnum_s32_x1: ++** cntw (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ldff1sb z0\.s, p0/z, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** ldff1sb z0\.s, p0/z, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_LOAD (ldff1sb_vnum_s32_x1, svint32_t, int8_t, ++ z0 = svldff1sb_vnum_s32 (p0, x0, x1), ++ z0 = svldff1sb_vnum_s32 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_s64.c +new file mode 100644 +index 000000000..e7a4aa6e9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_s64.c +@@ -0,0 +1,90 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1sb_s64_base: ++** ldff1sb z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sb_s64_base, svint64_t, int8_t, ++ z0 = svldff1sb_s64 (p0, x0), ++ z0 = svldff1sb_s64 (p0, x0)) ++ ++/* ++** ldff1sb_s64_index: ++** ldff1sb z0\.d, p0/z, \[x0, x1\] ++** ret ++*/ ++TEST_LOAD (ldff1sb_s64_index, svint64_t, int8_t, ++ z0 = svldff1sb_s64 (p0, x0 + x1), ++ z0 = svldff1sb_s64 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1sb_s64_1: ++** incd x0 ++** ldff1sb z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sb_s64_1, svint64_t, int8_t, ++ z0 = svldff1sb_s64 (p0, x0 + svcntd ()), ++ z0 = svldff1sb_s64 (p0, x0 + svcntd ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1sb_s64_m1: ++** decd x0 ++** ldff1sb z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sb_s64_m1, svint64_t, int8_t, ++ z0 = svldff1sb_s64 (p0, x0 - svcntd ()), ++ z0 = svldff1sb_s64 (p0, x0 - svcntd ())) ++ ++/* ++** ldff1sb_vnum_s64_0: ++** ldff1sb z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sb_vnum_s64_0, svint64_t, int8_t, ++ z0 = svldff1sb_vnum_s64 (p0, x0, 0), ++ z0 = svldff1sb_vnum_s64 (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1sb_vnum_s64_1: ++** incd x0 ++** ldff1sb z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sb_vnum_s64_1, svint64_t, int8_t, ++ z0 = svldff1sb_vnum_s64 (p0, x0, 1), ++ z0 = svldff1sb_vnum_s64 (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1sb_vnum_s64_m1: ++** decd x0 ++** ldff1sb z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sb_vnum_s64_m1, svint64_t, int8_t, ++ z0 = svldff1sb_vnum_s64 (p0, x0, -1), ++ z0 = svldff1sb_vnum_s64 (p0, x0, -1)) ++ ++/* ++** ldff1sb_vnum_s64_x1: ++** cntd (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ldff1sb z0\.d, p0/z, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** ldff1sb z0\.d, p0/z, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_LOAD (ldff1sb_vnum_s64_x1, svint64_t, int8_t, ++ z0 = svldff1sb_vnum_s64 (p0, x0, x1), ++ z0 = svldff1sb_vnum_s64 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_u16.c +new file mode 100644 +index 000000000..69ba96d52 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_u16.c +@@ -0,0 +1,90 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1sb_u16_base: ++** ldff1sb z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sb_u16_base, svuint16_t, int8_t, ++ z0 = svldff1sb_u16 (p0, x0), ++ z0 = svldff1sb_u16 (p0, x0)) ++ ++/* ++** ldff1sb_u16_index: ++** ldff1sb z0\.h, p0/z, \[x0, x1\] ++** ret ++*/ ++TEST_LOAD (ldff1sb_u16_index, svuint16_t, int8_t, ++ z0 = svldff1sb_u16 (p0, x0 + x1), ++ z0 = svldff1sb_u16 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1sb_u16_1: ++** inch x0 ++** ldff1sb z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sb_u16_1, svuint16_t, int8_t, ++ z0 = svldff1sb_u16 (p0, x0 + svcnth ()), ++ z0 = svldff1sb_u16 (p0, x0 + svcnth ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1sb_u16_m1: ++** dech x0 ++** ldff1sb z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sb_u16_m1, svuint16_t, int8_t, ++ z0 = svldff1sb_u16 (p0, x0 - svcnth ()), ++ z0 = svldff1sb_u16 (p0, x0 - svcnth ())) ++ ++/* ++** ldff1sb_vnum_u16_0: ++** ldff1sb z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sb_vnum_u16_0, svuint16_t, int8_t, ++ z0 = svldff1sb_vnum_u16 (p0, x0, 0), ++ z0 = svldff1sb_vnum_u16 (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1sb_vnum_u16_1: ++** inch x0 ++** ldff1sb z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sb_vnum_u16_1, svuint16_t, int8_t, ++ z0 = svldff1sb_vnum_u16 (p0, x0, 1), ++ z0 = svldff1sb_vnum_u16 (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1sb_vnum_u16_m1: ++** dech x0 ++** ldff1sb z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sb_vnum_u16_m1, svuint16_t, int8_t, ++ z0 = svldff1sb_vnum_u16 (p0, x0, -1), ++ z0 = svldff1sb_vnum_u16 (p0, x0, -1)) ++ ++/* ++** ldff1sb_vnum_u16_x1: ++** cnth (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ldff1sb z0\.h, p0/z, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** ldff1sb z0\.h, p0/z, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_LOAD (ldff1sb_vnum_u16_x1, svuint16_t, int8_t, ++ z0 = svldff1sb_vnum_u16 (p0, x0, x1), ++ z0 = svldff1sb_vnum_u16 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_u32.c +new file mode 100644 +index 000000000..e1a1873f0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_u32.c +@@ -0,0 +1,90 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1sb_u32_base: ++** ldff1sb z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sb_u32_base, svuint32_t, int8_t, ++ z0 = svldff1sb_u32 (p0, x0), ++ z0 = svldff1sb_u32 (p0, x0)) ++ ++/* ++** ldff1sb_u32_index: ++** ldff1sb z0\.s, p0/z, \[x0, x1\] ++** ret ++*/ ++TEST_LOAD (ldff1sb_u32_index, svuint32_t, int8_t, ++ z0 = svldff1sb_u32 (p0, x0 + x1), ++ z0 = svldff1sb_u32 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1sb_u32_1: ++** incw x0 ++** ldff1sb z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sb_u32_1, svuint32_t, int8_t, ++ z0 = svldff1sb_u32 (p0, x0 + svcntw ()), ++ z0 = svldff1sb_u32 (p0, x0 + svcntw ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1sb_u32_m1: ++** decw x0 ++** ldff1sb z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sb_u32_m1, svuint32_t, int8_t, ++ z0 = svldff1sb_u32 (p0, x0 - svcntw ()), ++ z0 = svldff1sb_u32 (p0, x0 - svcntw ())) ++ ++/* ++** ldff1sb_vnum_u32_0: ++** ldff1sb z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sb_vnum_u32_0, svuint32_t, int8_t, ++ z0 = svldff1sb_vnum_u32 (p0, x0, 0), ++ z0 = svldff1sb_vnum_u32 (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1sb_vnum_u32_1: ++** incw x0 ++** ldff1sb z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sb_vnum_u32_1, svuint32_t, int8_t, ++ z0 = svldff1sb_vnum_u32 (p0, x0, 1), ++ z0 = svldff1sb_vnum_u32 (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1sb_vnum_u32_m1: ++** decw x0 ++** ldff1sb z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sb_vnum_u32_m1, svuint32_t, int8_t, ++ z0 = svldff1sb_vnum_u32 (p0, x0, -1), ++ z0 = svldff1sb_vnum_u32 (p0, x0, -1)) ++ ++/* ++** ldff1sb_vnum_u32_x1: ++** cntw (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ldff1sb z0\.s, p0/z, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** ldff1sb z0\.s, p0/z, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_LOAD (ldff1sb_vnum_u32_x1, svuint32_t, int8_t, ++ z0 = svldff1sb_vnum_u32 (p0, x0, x1), ++ z0 = svldff1sb_vnum_u32 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_u64.c +new file mode 100644 +index 000000000..0a49cbcc0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_u64.c +@@ -0,0 +1,90 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1sb_u64_base: ++** ldff1sb z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sb_u64_base, svuint64_t, int8_t, ++ z0 = svldff1sb_u64 (p0, x0), ++ z0 = svldff1sb_u64 (p0, x0)) ++ ++/* ++** ldff1sb_u64_index: ++** ldff1sb z0\.d, p0/z, \[x0, x1\] ++** ret ++*/ ++TEST_LOAD (ldff1sb_u64_index, svuint64_t, int8_t, ++ z0 = svldff1sb_u64 (p0, x0 + x1), ++ z0 = svldff1sb_u64 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1sb_u64_1: ++** incd x0 ++** ldff1sb z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sb_u64_1, svuint64_t, int8_t, ++ z0 = svldff1sb_u64 (p0, x0 + svcntd ()), ++ z0 = svldff1sb_u64 (p0, x0 + svcntd ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1sb_u64_m1: ++** decd x0 ++** ldff1sb z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sb_u64_m1, svuint64_t, int8_t, ++ z0 = svldff1sb_u64 (p0, x0 - svcntd ()), ++ z0 = svldff1sb_u64 (p0, x0 - svcntd ())) ++ ++/* ++** ldff1sb_vnum_u64_0: ++** ldff1sb z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sb_vnum_u64_0, svuint64_t, int8_t, ++ z0 = svldff1sb_vnum_u64 (p0, x0, 0), ++ z0 = svldff1sb_vnum_u64 (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1sb_vnum_u64_1: ++** incd x0 ++** ldff1sb z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sb_vnum_u64_1, svuint64_t, int8_t, ++ z0 = svldff1sb_vnum_u64 (p0, x0, 1), ++ z0 = svldff1sb_vnum_u64 (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1sb_vnum_u64_m1: ++** decd x0 ++** ldff1sb z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sb_vnum_u64_m1, svuint64_t, int8_t, ++ z0 = svldff1sb_vnum_u64 (p0, x0, -1), ++ z0 = svldff1sb_vnum_u64 (p0, x0, -1)) ++ ++/* ++** ldff1sb_vnum_u64_x1: ++** cntd (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ldff1sb z0\.d, p0/z, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** ldff1sb z0\.d, p0/z, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_LOAD (ldff1sb_vnum_u64_x1, svuint64_t, int8_t, ++ z0 = svldff1sb_vnum_u64 (p0, x0, x1), ++ z0 = svldff1sb_vnum_u64 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_s32.c +new file mode 100644 +index 000000000..b633335dc +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_s32.c +@@ -0,0 +1,252 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1sh_gather_s32_tied1: ++** ldff1sh z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_s32_tied1, svint32_t, svuint32_t, ++ z0_res = svldff1sh_gather_u32base_s32 (p0, z0), ++ z0_res = svldff1sh_gather_s32 (p0, z0)) ++ ++/* ++** ldff1sh_gather_s32_untied: ++** ldff1sh z0\.s, p0/z, \[z1\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_s32_untied, svint32_t, svuint32_t, ++ z0_res = svldff1sh_gather_u32base_s32 (p0, z1), ++ z0_res = svldff1sh_gather_s32 (p0, z1)) ++ ++/* ++** ldff1sh_gather_x0_s32_offset: ++** ldff1sh z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_x0_s32_offset, svint32_t, svuint32_t, ++ z0_res = svldff1sh_gather_u32base_offset_s32 (p0, z0, x0), ++ z0_res = svldff1sh_gather_offset_s32 (p0, z0, x0)) ++ ++/* ++** ldff1sh_gather_m2_s32_offset: ++** mov (x[0-9]+), #?-2 ++** ldff1sh z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_m2_s32_offset, svint32_t, svuint32_t, ++ z0_res = svldff1sh_gather_u32base_offset_s32 (p0, z0, -2), ++ z0_res = svldff1sh_gather_offset_s32 (p0, z0, -2)) ++ ++/* ++** ldff1sh_gather_0_s32_offset: ++** ldff1sh z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_0_s32_offset, svint32_t, svuint32_t, ++ z0_res = svldff1sh_gather_u32base_offset_s32 (p0, z0, 0), ++ z0_res = svldff1sh_gather_offset_s32 (p0, z0, 0)) ++ ++/* ++** ldff1sh_gather_5_s32_offset: ++** mov (x[0-9]+), #?5 ++** ldff1sh z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_5_s32_offset, svint32_t, svuint32_t, ++ z0_res = svldff1sh_gather_u32base_offset_s32 (p0, z0, 5), ++ z0_res = svldff1sh_gather_offset_s32 (p0, z0, 5)) ++ ++/* ++** ldff1sh_gather_6_s32_offset: ++** ldff1sh z0\.s, p0/z, \[z0\.s, #6\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_6_s32_offset, svint32_t, svuint32_t, ++ z0_res = svldff1sh_gather_u32base_offset_s32 (p0, z0, 6), ++ z0_res = svldff1sh_gather_offset_s32 (p0, z0, 6)) ++ ++/* ++** ldff1sh_gather_62_s32_offset: ++** ldff1sh z0\.s, p0/z, \[z0\.s, #62\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_62_s32_offset, svint32_t, svuint32_t, ++ z0_res = svldff1sh_gather_u32base_offset_s32 (p0, z0, 62), ++ z0_res = svldff1sh_gather_offset_s32 (p0, z0, 62)) ++ ++/* ++** ldff1sh_gather_64_s32_offset: ++** mov (x[0-9]+), #?64 ++** ldff1sh z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_64_s32_offset, svint32_t, svuint32_t, ++ z0_res = svldff1sh_gather_u32base_offset_s32 (p0, z0, 64), ++ z0_res = svldff1sh_gather_offset_s32 (p0, z0, 64)) ++ ++/* ++** ldff1sh_gather_x0_s32_index: ++** lsl (x[0-9]+), x0, #?1 ++** ldff1sh z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_x0_s32_index, svint32_t, svuint32_t, ++ z0_res = svldff1sh_gather_u32base_index_s32 (p0, z0, x0), ++ z0_res = svldff1sh_gather_index_s32 (p0, z0, x0)) ++ ++/* ++** ldff1sh_gather_m1_s32_index: ++** mov (x[0-9]+), #?-2 ++** ldff1sh z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_m1_s32_index, svint32_t, svuint32_t, ++ z0_res = svldff1sh_gather_u32base_index_s32 (p0, z0, -1), ++ z0_res = svldff1sh_gather_index_s32 (p0, z0, -1)) ++ ++/* ++** ldff1sh_gather_0_s32_index: ++** ldff1sh z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_0_s32_index, svint32_t, svuint32_t, ++ z0_res = svldff1sh_gather_u32base_index_s32 (p0, z0, 0), ++ z0_res = svldff1sh_gather_index_s32 (p0, z0, 0)) ++ ++/* ++** ldff1sh_gather_5_s32_index: ++** ldff1sh z0\.s, p0/z, \[z0\.s, #10\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_5_s32_index, svint32_t, svuint32_t, ++ z0_res = svldff1sh_gather_u32base_index_s32 (p0, z0, 5), ++ z0_res = svldff1sh_gather_index_s32 (p0, z0, 5)) ++ ++/* ++** ldff1sh_gather_31_s32_index: ++** ldff1sh z0\.s, p0/z, \[z0\.s, #62\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_31_s32_index, svint32_t, svuint32_t, ++ z0_res = svldff1sh_gather_u32base_index_s32 (p0, z0, 31), ++ z0_res = svldff1sh_gather_index_s32 (p0, z0, 31)) ++ ++/* ++** ldff1sh_gather_32_s32_index: ++** mov (x[0-9]+), #?64 ++** ldff1sh z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_32_s32_index, svint32_t, svuint32_t, ++ z0_res = svldff1sh_gather_u32base_index_s32 (p0, z0, 32), ++ z0_res = svldff1sh_gather_index_s32 (p0, z0, 32)) ++ ++/* ++** ldff1sh_gather_x0_s32_s32offset: ++** ldff1sh z0\.s, p0/z, \[x0, z0\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_x0_s32_s32offset, svint32_t, int16_t, svint32_t, ++ z0_res = svldff1sh_gather_s32offset_s32 (p0, x0, z0), ++ z0_res = svldff1sh_gather_offset_s32 (p0, x0, z0)) ++ ++/* ++** ldff1sh_gather_tied1_s32_s32offset: ++** ldff1sh z0\.s, p0/z, \[x0, z0\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_tied1_s32_s32offset, svint32_t, int16_t, svint32_t, ++ z0_res = svldff1sh_gather_s32offset_s32 (p0, x0, z0), ++ z0_res = svldff1sh_gather_offset_s32 (p0, x0, z0)) ++ ++/* ++** ldff1sh_gather_untied_s32_s32offset: ++** ldff1sh z0\.s, p0/z, \[x0, z1\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_untied_s32_s32offset, svint32_t, int16_t, svint32_t, ++ z0_res = svldff1sh_gather_s32offset_s32 (p0, x0, z1), ++ z0_res = svldff1sh_gather_offset_s32 (p0, x0, z1)) ++ ++/* ++** ldff1sh_gather_x0_s32_u32offset: ++** ldff1sh z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_x0_s32_u32offset, svint32_t, int16_t, svuint32_t, ++ z0_res = svldff1sh_gather_u32offset_s32 (p0, x0, z0), ++ z0_res = svldff1sh_gather_offset_s32 (p0, x0, z0)) ++ ++/* ++** ldff1sh_gather_tied1_s32_u32offset: ++** ldff1sh z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_tied1_s32_u32offset, svint32_t, int16_t, svuint32_t, ++ z0_res = svldff1sh_gather_u32offset_s32 (p0, x0, z0), ++ z0_res = svldff1sh_gather_offset_s32 (p0, x0, z0)) ++ ++/* ++** ldff1sh_gather_untied_s32_u32offset: ++** ldff1sh z0\.s, p0/z, \[x0, z1\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_untied_s32_u32offset, svint32_t, int16_t, svuint32_t, ++ z0_res = svldff1sh_gather_u32offset_s32 (p0, x0, z1), ++ z0_res = svldff1sh_gather_offset_s32 (p0, x0, z1)) ++ ++/* ++** ldff1sh_gather_x0_s32_s32index: ++** ldff1sh z0\.s, p0/z, \[x0, z0\.s, sxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_x0_s32_s32index, svint32_t, int16_t, svint32_t, ++ z0_res = svldff1sh_gather_s32index_s32 (p0, x0, z0), ++ z0_res = svldff1sh_gather_index_s32 (p0, x0, z0)) ++ ++/* ++** ldff1sh_gather_tied1_s32_s32index: ++** ldff1sh z0\.s, p0/z, \[x0, z0\.s, sxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_tied1_s32_s32index, svint32_t, int16_t, svint32_t, ++ z0_res = svldff1sh_gather_s32index_s32 (p0, x0, z0), ++ z0_res = svldff1sh_gather_index_s32 (p0, x0, z0)) ++ ++/* ++** ldff1sh_gather_untied_s32_s32index: ++** ldff1sh z0\.s, p0/z, \[x0, z1\.s, sxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_untied_s32_s32index, svint32_t, int16_t, svint32_t, ++ z0_res = svldff1sh_gather_s32index_s32 (p0, x0, z1), ++ z0_res = svldff1sh_gather_index_s32 (p0, x0, z1)) ++ ++/* ++** ldff1sh_gather_x0_s32_u32index: ++** ldff1sh z0\.s, p0/z, \[x0, z0\.s, uxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_x0_s32_u32index, svint32_t, int16_t, svuint32_t, ++ z0_res = svldff1sh_gather_u32index_s32 (p0, x0, z0), ++ z0_res = svldff1sh_gather_index_s32 (p0, x0, z0)) ++ ++/* ++** ldff1sh_gather_tied1_s32_u32index: ++** ldff1sh z0\.s, p0/z, \[x0, z0\.s, uxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_tied1_s32_u32index, svint32_t, int16_t, svuint32_t, ++ z0_res = svldff1sh_gather_u32index_s32 (p0, x0, z0), ++ z0_res = svldff1sh_gather_index_s32 (p0, x0, z0)) ++ ++/* ++** ldff1sh_gather_untied_s32_u32index: ++** ldff1sh z0\.s, p0/z, \[x0, z1\.s, uxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_untied_s32_u32index, svint32_t, int16_t, svuint32_t, ++ z0_res = svldff1sh_gather_u32index_s32 (p0, x0, z1), ++ z0_res = svldff1sh_gather_index_s32 (p0, x0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_s64.c +new file mode 100644 +index 000000000..32a4309b6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_s64.c +@@ -0,0 +1,288 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1sh_gather_s64_tied1: ++** ldff1sh z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_s64_tied1, svint64_t, svuint64_t, ++ z0_res = svldff1sh_gather_u64base_s64 (p0, z0), ++ z0_res = svldff1sh_gather_s64 (p0, z0)) ++ ++/* ++** ldff1sh_gather_s64_untied: ++** ldff1sh z0\.d, p0/z, \[z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_s64_untied, svint64_t, svuint64_t, ++ z0_res = svldff1sh_gather_u64base_s64 (p0, z1), ++ z0_res = svldff1sh_gather_s64 (p0, z1)) ++ ++/* ++** ldff1sh_gather_x0_s64_offset: ++** ldff1sh z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_x0_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1sh_gather_u64base_offset_s64 (p0, z0, x0), ++ z0_res = svldff1sh_gather_offset_s64 (p0, z0, x0)) ++ ++/* ++** ldff1sh_gather_m2_s64_offset: ++** mov (x[0-9]+), #?-2 ++** ldff1sh z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_m2_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1sh_gather_u64base_offset_s64 (p0, z0, -2), ++ z0_res = svldff1sh_gather_offset_s64 (p0, z0, -2)) ++ ++/* ++** ldff1sh_gather_0_s64_offset: ++** ldff1sh z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_0_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1sh_gather_u64base_offset_s64 (p0, z0, 0), ++ z0_res = svldff1sh_gather_offset_s64 (p0, z0, 0)) ++ ++/* ++** ldff1sh_gather_5_s64_offset: ++** mov (x[0-9]+), #?5 ++** ldff1sh z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_5_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1sh_gather_u64base_offset_s64 (p0, z0, 5), ++ z0_res = svldff1sh_gather_offset_s64 (p0, z0, 5)) ++ ++/* ++** ldff1sh_gather_6_s64_offset: ++** ldff1sh z0\.d, p0/z, \[z0\.d, #6\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_6_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1sh_gather_u64base_offset_s64 (p0, z0, 6), ++ z0_res = svldff1sh_gather_offset_s64 (p0, z0, 6)) ++ ++/* ++** ldff1sh_gather_62_s64_offset: ++** ldff1sh z0\.d, p0/z, \[z0\.d, #62\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_62_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1sh_gather_u64base_offset_s64 (p0, z0, 62), ++ z0_res = svldff1sh_gather_offset_s64 (p0, z0, 62)) ++ ++/* ++** ldff1sh_gather_64_s64_offset: ++** mov (x[0-9]+), #?64 ++** ldff1sh z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_64_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1sh_gather_u64base_offset_s64 (p0, z0, 64), ++ z0_res = svldff1sh_gather_offset_s64 (p0, z0, 64)) ++ ++/* ++** ldff1sh_gather_x0_s64_index: ++** lsl (x[0-9]+), x0, #?1 ++** ldff1sh z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_x0_s64_index, svint64_t, svuint64_t, ++ z0_res = svldff1sh_gather_u64base_index_s64 (p0, z0, x0), ++ z0_res = svldff1sh_gather_index_s64 (p0, z0, x0)) ++ ++/* ++** ldff1sh_gather_m1_s64_index: ++** mov (x[0-9]+), #?-2 ++** ldff1sh z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_m1_s64_index, svint64_t, svuint64_t, ++ z0_res = svldff1sh_gather_u64base_index_s64 (p0, z0, -1), ++ z0_res = svldff1sh_gather_index_s64 (p0, z0, -1)) ++ ++/* ++** ldff1sh_gather_0_s64_index: ++** ldff1sh z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_0_s64_index, svint64_t, svuint64_t, ++ z0_res = svldff1sh_gather_u64base_index_s64 (p0, z0, 0), ++ z0_res = svldff1sh_gather_index_s64 (p0, z0, 0)) ++ ++/* ++** ldff1sh_gather_5_s64_index: ++** ldff1sh z0\.d, p0/z, \[z0\.d, #10\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_5_s64_index, svint64_t, svuint64_t, ++ z0_res = svldff1sh_gather_u64base_index_s64 (p0, z0, 5), ++ z0_res = svldff1sh_gather_index_s64 (p0, z0, 5)) ++ ++/* ++** ldff1sh_gather_31_s64_index: ++** ldff1sh z0\.d, p0/z, \[z0\.d, #62\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_31_s64_index, svint64_t, svuint64_t, ++ z0_res = svldff1sh_gather_u64base_index_s64 (p0, z0, 31), ++ z0_res = svldff1sh_gather_index_s64 (p0, z0, 31)) ++ ++/* ++** ldff1sh_gather_32_s64_index: ++** mov (x[0-9]+), #?64 ++** ldff1sh z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_32_s64_index, svint64_t, svuint64_t, ++ z0_res = svldff1sh_gather_u64base_index_s64 (p0, z0, 32), ++ z0_res = svldff1sh_gather_index_s64 (p0, z0, 32)) ++ ++/* ++** ldff1sh_gather_x0_s64_s64offset: ++** ldff1sh z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_x0_s64_s64offset, svint64_t, int16_t, svint64_t, ++ z0_res = svldff1sh_gather_s64offset_s64 (p0, x0, z0), ++ z0_res = svldff1sh_gather_offset_s64 (p0, x0, z0)) ++ ++/* ++** ldff1sh_gather_tied1_s64_s64offset: ++** ldff1sh z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_tied1_s64_s64offset, svint64_t, int16_t, svint64_t, ++ z0_res = svldff1sh_gather_s64offset_s64 (p0, x0, z0), ++ z0_res = svldff1sh_gather_offset_s64 (p0, x0, z0)) ++ ++/* ++** ldff1sh_gather_untied_s64_s64offset: ++** ldff1sh z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_untied_s64_s64offset, svint64_t, int16_t, svint64_t, ++ z0_res = svldff1sh_gather_s64offset_s64 (p0, x0, z1), ++ z0_res = svldff1sh_gather_offset_s64 (p0, x0, z1)) ++ ++/* ++** ldff1sh_gather_ext_s64_s64offset: ++** ldff1sh z0\.d, p0/z, \[x0, z1\.d, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_ext_s64_s64offset, svint64_t, int16_t, svint64_t, ++ z0_res = svldff1sh_gather_s64offset_s64 (p0, x0, svextw_s64_x (p0, z1)), ++ z0_res = svldff1sh_gather_offset_s64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ldff1sh_gather_x0_s64_u64offset: ++** ldff1sh z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_x0_s64_u64offset, svint64_t, int16_t, svuint64_t, ++ z0_res = svldff1sh_gather_u64offset_s64 (p0, x0, z0), ++ z0_res = svldff1sh_gather_offset_s64 (p0, x0, z0)) ++ ++/* ++** ldff1sh_gather_tied1_s64_u64offset: ++** ldff1sh z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_tied1_s64_u64offset, svint64_t, int16_t, svuint64_t, ++ z0_res = svldff1sh_gather_u64offset_s64 (p0, x0, z0), ++ z0_res = svldff1sh_gather_offset_s64 (p0, x0, z0)) ++ ++/* ++** ldff1sh_gather_untied_s64_u64offset: ++** ldff1sh z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_untied_s64_u64offset, svint64_t, int16_t, svuint64_t, ++ z0_res = svldff1sh_gather_u64offset_s64 (p0, x0, z1), ++ z0_res = svldff1sh_gather_offset_s64 (p0, x0, z1)) ++ ++/* ++** ldff1sh_gather_ext_s64_u64offset: ++** ldff1sh z0\.d, p0/z, \[x0, z1\.d, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_ext_s64_u64offset, svint64_t, int16_t, svuint64_t, ++ z0_res = svldff1sh_gather_u64offset_s64 (p0, x0, svextw_u64_x (p0, z1)), ++ z0_res = svldff1sh_gather_offset_s64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ldff1sh_gather_x0_s64_s64index: ++** ldff1sh z0\.d, p0/z, \[x0, z0\.d, lsl 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_x0_s64_s64index, svint64_t, int16_t, svint64_t, ++ z0_res = svldff1sh_gather_s64index_s64 (p0, x0, z0), ++ z0_res = svldff1sh_gather_index_s64 (p0, x0, z0)) ++ ++/* ++** ldff1sh_gather_tied1_s64_s64index: ++** ldff1sh z0\.d, p0/z, \[x0, z0\.d, lsl 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_tied1_s64_s64index, svint64_t, int16_t, svint64_t, ++ z0_res = svldff1sh_gather_s64index_s64 (p0, x0, z0), ++ z0_res = svldff1sh_gather_index_s64 (p0, x0, z0)) ++ ++/* ++** ldff1sh_gather_untied_s64_s64index: ++** ldff1sh z0\.d, p0/z, \[x0, z1\.d, lsl 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_untied_s64_s64index, svint64_t, int16_t, svint64_t, ++ z0_res = svldff1sh_gather_s64index_s64 (p0, x0, z1), ++ z0_res = svldff1sh_gather_index_s64 (p0, x0, z1)) ++ ++/* ++** ldff1sh_gather_ext_s64_s64index: ++** ldff1sh z0\.d, p0/z, \[x0, z1\.d, sxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_ext_s64_s64index, svint64_t, int16_t, svint64_t, ++ z0_res = svldff1sh_gather_s64index_s64 (p0, x0, svextw_s64_x (p0, z1)), ++ z0_res = svldff1sh_gather_index_s64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ldff1sh_gather_x0_s64_u64index: ++** ldff1sh z0\.d, p0/z, \[x0, z0\.d, lsl 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_x0_s64_u64index, svint64_t, int16_t, svuint64_t, ++ z0_res = svldff1sh_gather_u64index_s64 (p0, x0, z0), ++ z0_res = svldff1sh_gather_index_s64 (p0, x0, z0)) ++ ++/* ++** ldff1sh_gather_tied1_s64_u64index: ++** ldff1sh z0\.d, p0/z, \[x0, z0\.d, lsl 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_tied1_s64_u64index, svint64_t, int16_t, svuint64_t, ++ z0_res = svldff1sh_gather_u64index_s64 (p0, x0, z0), ++ z0_res = svldff1sh_gather_index_s64 (p0, x0, z0)) ++ ++/* ++** ldff1sh_gather_untied_s64_u64index: ++** ldff1sh z0\.d, p0/z, \[x0, z1\.d, lsl 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_untied_s64_u64index, svint64_t, int16_t, svuint64_t, ++ z0_res = svldff1sh_gather_u64index_s64 (p0, x0, z1), ++ z0_res = svldff1sh_gather_index_s64 (p0, x0, z1)) ++ ++/* ++** ldff1sh_gather_ext_s64_u64index: ++** ldff1sh z0\.d, p0/z, \[x0, z1\.d, uxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_ext_s64_u64index, svint64_t, int16_t, svuint64_t, ++ z0_res = svldff1sh_gather_u64index_s64 (p0, x0, svextw_u64_x (p0, z1)), ++ z0_res = svldff1sh_gather_index_s64 (p0, x0, svextw_x (p0, z1))) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_u32.c +new file mode 100644 +index 000000000..73a9be892 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_u32.c +@@ -0,0 +1,252 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1sh_gather_u32_tied1: ++** ldff1sh z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_u32_tied1, svuint32_t, svuint32_t, ++ z0_res = svldff1sh_gather_u32base_u32 (p0, z0), ++ z0_res = svldff1sh_gather_u32 (p0, z0)) ++ ++/* ++** ldff1sh_gather_u32_untied: ++** ldff1sh z0\.s, p0/z, \[z1\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_u32_untied, svuint32_t, svuint32_t, ++ z0_res = svldff1sh_gather_u32base_u32 (p0, z1), ++ z0_res = svldff1sh_gather_u32 (p0, z1)) ++ ++/* ++** ldff1sh_gather_x0_u32_offset: ++** ldff1sh z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_x0_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svldff1sh_gather_u32base_offset_u32 (p0, z0, x0), ++ z0_res = svldff1sh_gather_offset_u32 (p0, z0, x0)) ++ ++/* ++** ldff1sh_gather_m2_u32_offset: ++** mov (x[0-9]+), #?-2 ++** ldff1sh z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_m2_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svldff1sh_gather_u32base_offset_u32 (p0, z0, -2), ++ z0_res = svldff1sh_gather_offset_u32 (p0, z0, -2)) ++ ++/* ++** ldff1sh_gather_0_u32_offset: ++** ldff1sh z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_0_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svldff1sh_gather_u32base_offset_u32 (p0, z0, 0), ++ z0_res = svldff1sh_gather_offset_u32 (p0, z0, 0)) ++ ++/* ++** ldff1sh_gather_5_u32_offset: ++** mov (x[0-9]+), #?5 ++** ldff1sh z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_5_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svldff1sh_gather_u32base_offset_u32 (p0, z0, 5), ++ z0_res = svldff1sh_gather_offset_u32 (p0, z0, 5)) ++ ++/* ++** ldff1sh_gather_6_u32_offset: ++** ldff1sh z0\.s, p0/z, \[z0\.s, #6\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_6_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svldff1sh_gather_u32base_offset_u32 (p0, z0, 6), ++ z0_res = svldff1sh_gather_offset_u32 (p0, z0, 6)) ++ ++/* ++** ldff1sh_gather_62_u32_offset: ++** ldff1sh z0\.s, p0/z, \[z0\.s, #62\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_62_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svldff1sh_gather_u32base_offset_u32 (p0, z0, 62), ++ z0_res = svldff1sh_gather_offset_u32 (p0, z0, 62)) ++ ++/* ++** ldff1sh_gather_64_u32_offset: ++** mov (x[0-9]+), #?64 ++** ldff1sh z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_64_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svldff1sh_gather_u32base_offset_u32 (p0, z0, 64), ++ z0_res = svldff1sh_gather_offset_u32 (p0, z0, 64)) ++ ++/* ++** ldff1sh_gather_x0_u32_index: ++** lsl (x[0-9]+), x0, #?1 ++** ldff1sh z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_x0_u32_index, svuint32_t, svuint32_t, ++ z0_res = svldff1sh_gather_u32base_index_u32 (p0, z0, x0), ++ z0_res = svldff1sh_gather_index_u32 (p0, z0, x0)) ++ ++/* ++** ldff1sh_gather_m1_u32_index: ++** mov (x[0-9]+), #?-2 ++** ldff1sh z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_m1_u32_index, svuint32_t, svuint32_t, ++ z0_res = svldff1sh_gather_u32base_index_u32 (p0, z0, -1), ++ z0_res = svldff1sh_gather_index_u32 (p0, z0, -1)) ++ ++/* ++** ldff1sh_gather_0_u32_index: ++** ldff1sh z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_0_u32_index, svuint32_t, svuint32_t, ++ z0_res = svldff1sh_gather_u32base_index_u32 (p0, z0, 0), ++ z0_res = svldff1sh_gather_index_u32 (p0, z0, 0)) ++ ++/* ++** ldff1sh_gather_5_u32_index: ++** ldff1sh z0\.s, p0/z, \[z0\.s, #10\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_5_u32_index, svuint32_t, svuint32_t, ++ z0_res = svldff1sh_gather_u32base_index_u32 (p0, z0, 5), ++ z0_res = svldff1sh_gather_index_u32 (p0, z0, 5)) ++ ++/* ++** ldff1sh_gather_31_u32_index: ++** ldff1sh z0\.s, p0/z, \[z0\.s, #62\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_31_u32_index, svuint32_t, svuint32_t, ++ z0_res = svldff1sh_gather_u32base_index_u32 (p0, z0, 31), ++ z0_res = svldff1sh_gather_index_u32 (p0, z0, 31)) ++ ++/* ++** ldff1sh_gather_32_u32_index: ++** mov (x[0-9]+), #?64 ++** ldff1sh z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_32_u32_index, svuint32_t, svuint32_t, ++ z0_res = svldff1sh_gather_u32base_index_u32 (p0, z0, 32), ++ z0_res = svldff1sh_gather_index_u32 (p0, z0, 32)) ++ ++/* ++** ldff1sh_gather_x0_u32_s32offset: ++** ldff1sh z0\.s, p0/z, \[x0, z0\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_x0_u32_s32offset, svuint32_t, int16_t, svint32_t, ++ z0_res = svldff1sh_gather_s32offset_u32 (p0, x0, z0), ++ z0_res = svldff1sh_gather_offset_u32 (p0, x0, z0)) ++ ++/* ++** ldff1sh_gather_tied1_u32_s32offset: ++** ldff1sh z0\.s, p0/z, \[x0, z0\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_tied1_u32_s32offset, svuint32_t, int16_t, svint32_t, ++ z0_res = svldff1sh_gather_s32offset_u32 (p0, x0, z0), ++ z0_res = svldff1sh_gather_offset_u32 (p0, x0, z0)) ++ ++/* ++** ldff1sh_gather_untied_u32_s32offset: ++** ldff1sh z0\.s, p0/z, \[x0, z1\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_untied_u32_s32offset, svuint32_t, int16_t, svint32_t, ++ z0_res = svldff1sh_gather_s32offset_u32 (p0, x0, z1), ++ z0_res = svldff1sh_gather_offset_u32 (p0, x0, z1)) ++ ++/* ++** ldff1sh_gather_x0_u32_u32offset: ++** ldff1sh z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_x0_u32_u32offset, svuint32_t, int16_t, svuint32_t, ++ z0_res = svldff1sh_gather_u32offset_u32 (p0, x0, z0), ++ z0_res = svldff1sh_gather_offset_u32 (p0, x0, z0)) ++ ++/* ++** ldff1sh_gather_tied1_u32_u32offset: ++** ldff1sh z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_tied1_u32_u32offset, svuint32_t, int16_t, svuint32_t, ++ z0_res = svldff1sh_gather_u32offset_u32 (p0, x0, z0), ++ z0_res = svldff1sh_gather_offset_u32 (p0, x0, z0)) ++ ++/* ++** ldff1sh_gather_untied_u32_u32offset: ++** ldff1sh z0\.s, p0/z, \[x0, z1\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_untied_u32_u32offset, svuint32_t, int16_t, svuint32_t, ++ z0_res = svldff1sh_gather_u32offset_u32 (p0, x0, z1), ++ z0_res = svldff1sh_gather_offset_u32 (p0, x0, z1)) ++ ++/* ++** ldff1sh_gather_x0_u32_s32index: ++** ldff1sh z0\.s, p0/z, \[x0, z0\.s, sxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_x0_u32_s32index, svuint32_t, int16_t, svint32_t, ++ z0_res = svldff1sh_gather_s32index_u32 (p0, x0, z0), ++ z0_res = svldff1sh_gather_index_u32 (p0, x0, z0)) ++ ++/* ++** ldff1sh_gather_tied1_u32_s32index: ++** ldff1sh z0\.s, p0/z, \[x0, z0\.s, sxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_tied1_u32_s32index, svuint32_t, int16_t, svint32_t, ++ z0_res = svldff1sh_gather_s32index_u32 (p0, x0, z0), ++ z0_res = svldff1sh_gather_index_u32 (p0, x0, z0)) ++ ++/* ++** ldff1sh_gather_untied_u32_s32index: ++** ldff1sh z0\.s, p0/z, \[x0, z1\.s, sxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_untied_u32_s32index, svuint32_t, int16_t, svint32_t, ++ z0_res = svldff1sh_gather_s32index_u32 (p0, x0, z1), ++ z0_res = svldff1sh_gather_index_u32 (p0, x0, z1)) ++ ++/* ++** ldff1sh_gather_x0_u32_u32index: ++** ldff1sh z0\.s, p0/z, \[x0, z0\.s, uxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_x0_u32_u32index, svuint32_t, int16_t, svuint32_t, ++ z0_res = svldff1sh_gather_u32index_u32 (p0, x0, z0), ++ z0_res = svldff1sh_gather_index_u32 (p0, x0, z0)) ++ ++/* ++** ldff1sh_gather_tied1_u32_u32index: ++** ldff1sh z0\.s, p0/z, \[x0, z0\.s, uxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_tied1_u32_u32index, svuint32_t, int16_t, svuint32_t, ++ z0_res = svldff1sh_gather_u32index_u32 (p0, x0, z0), ++ z0_res = svldff1sh_gather_index_u32 (p0, x0, z0)) ++ ++/* ++** ldff1sh_gather_untied_u32_u32index: ++** ldff1sh z0\.s, p0/z, \[x0, z1\.s, uxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_untied_u32_u32index, svuint32_t, int16_t, svuint32_t, ++ z0_res = svldff1sh_gather_u32index_u32 (p0, x0, z1), ++ z0_res = svldff1sh_gather_index_u32 (p0, x0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_u64.c +new file mode 100644 +index 000000000..94ea73b63 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_u64.c +@@ -0,0 +1,288 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1sh_gather_u64_tied1: ++** ldff1sh z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_u64_tied1, svuint64_t, svuint64_t, ++ z0_res = svldff1sh_gather_u64base_u64 (p0, z0), ++ z0_res = svldff1sh_gather_u64 (p0, z0)) ++ ++/* ++** ldff1sh_gather_u64_untied: ++** ldff1sh z0\.d, p0/z, \[z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_u64_untied, svuint64_t, svuint64_t, ++ z0_res = svldff1sh_gather_u64base_u64 (p0, z1), ++ z0_res = svldff1sh_gather_u64 (p0, z1)) ++ ++/* ++** ldff1sh_gather_x0_u64_offset: ++** ldff1sh z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_x0_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1sh_gather_u64base_offset_u64 (p0, z0, x0), ++ z0_res = svldff1sh_gather_offset_u64 (p0, z0, x0)) ++ ++/* ++** ldff1sh_gather_m2_u64_offset: ++** mov (x[0-9]+), #?-2 ++** ldff1sh z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_m2_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1sh_gather_u64base_offset_u64 (p0, z0, -2), ++ z0_res = svldff1sh_gather_offset_u64 (p0, z0, -2)) ++ ++/* ++** ldff1sh_gather_0_u64_offset: ++** ldff1sh z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_0_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1sh_gather_u64base_offset_u64 (p0, z0, 0), ++ z0_res = svldff1sh_gather_offset_u64 (p0, z0, 0)) ++ ++/* ++** ldff1sh_gather_5_u64_offset: ++** mov (x[0-9]+), #?5 ++** ldff1sh z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_5_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1sh_gather_u64base_offset_u64 (p0, z0, 5), ++ z0_res = svldff1sh_gather_offset_u64 (p0, z0, 5)) ++ ++/* ++** ldff1sh_gather_6_u64_offset: ++** ldff1sh z0\.d, p0/z, \[z0\.d, #6\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_6_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1sh_gather_u64base_offset_u64 (p0, z0, 6), ++ z0_res = svldff1sh_gather_offset_u64 (p0, z0, 6)) ++ ++/* ++** ldff1sh_gather_62_u64_offset: ++** ldff1sh z0\.d, p0/z, \[z0\.d, #62\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_62_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1sh_gather_u64base_offset_u64 (p0, z0, 62), ++ z0_res = svldff1sh_gather_offset_u64 (p0, z0, 62)) ++ ++/* ++** ldff1sh_gather_64_u64_offset: ++** mov (x[0-9]+), #?64 ++** ldff1sh z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_64_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1sh_gather_u64base_offset_u64 (p0, z0, 64), ++ z0_res = svldff1sh_gather_offset_u64 (p0, z0, 64)) ++ ++/* ++** ldff1sh_gather_x0_u64_index: ++** lsl (x[0-9]+), x0, #?1 ++** ldff1sh z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_x0_u64_index, svuint64_t, svuint64_t, ++ z0_res = svldff1sh_gather_u64base_index_u64 (p0, z0, x0), ++ z0_res = svldff1sh_gather_index_u64 (p0, z0, x0)) ++ ++/* ++** ldff1sh_gather_m1_u64_index: ++** mov (x[0-9]+), #?-2 ++** ldff1sh z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_m1_u64_index, svuint64_t, svuint64_t, ++ z0_res = svldff1sh_gather_u64base_index_u64 (p0, z0, -1), ++ z0_res = svldff1sh_gather_index_u64 (p0, z0, -1)) ++ ++/* ++** ldff1sh_gather_0_u64_index: ++** ldff1sh z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_0_u64_index, svuint64_t, svuint64_t, ++ z0_res = svldff1sh_gather_u64base_index_u64 (p0, z0, 0), ++ z0_res = svldff1sh_gather_index_u64 (p0, z0, 0)) ++ ++/* ++** ldff1sh_gather_5_u64_index: ++** ldff1sh z0\.d, p0/z, \[z0\.d, #10\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_5_u64_index, svuint64_t, svuint64_t, ++ z0_res = svldff1sh_gather_u64base_index_u64 (p0, z0, 5), ++ z0_res = svldff1sh_gather_index_u64 (p0, z0, 5)) ++ ++/* ++** ldff1sh_gather_31_u64_index: ++** ldff1sh z0\.d, p0/z, \[z0\.d, #62\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_31_u64_index, svuint64_t, svuint64_t, ++ z0_res = svldff1sh_gather_u64base_index_u64 (p0, z0, 31), ++ z0_res = svldff1sh_gather_index_u64 (p0, z0, 31)) ++ ++/* ++** ldff1sh_gather_32_u64_index: ++** mov (x[0-9]+), #?64 ++** ldff1sh z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sh_gather_32_u64_index, svuint64_t, svuint64_t, ++ z0_res = svldff1sh_gather_u64base_index_u64 (p0, z0, 32), ++ z0_res = svldff1sh_gather_index_u64 (p0, z0, 32)) ++ ++/* ++** ldff1sh_gather_x0_u64_s64offset: ++** ldff1sh z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_x0_u64_s64offset, svuint64_t, int16_t, svint64_t, ++ z0_res = svldff1sh_gather_s64offset_u64 (p0, x0, z0), ++ z0_res = svldff1sh_gather_offset_u64 (p0, x0, z0)) ++ ++/* ++** ldff1sh_gather_tied1_u64_s64offset: ++** ldff1sh z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_tied1_u64_s64offset, svuint64_t, int16_t, svint64_t, ++ z0_res = svldff1sh_gather_s64offset_u64 (p0, x0, z0), ++ z0_res = svldff1sh_gather_offset_u64 (p0, x0, z0)) ++ ++/* ++** ldff1sh_gather_untied_u64_s64offset: ++** ldff1sh z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_untied_u64_s64offset, svuint64_t, int16_t, svint64_t, ++ z0_res = svldff1sh_gather_s64offset_u64 (p0, x0, z1), ++ z0_res = svldff1sh_gather_offset_u64 (p0, x0, z1)) ++ ++/* ++** ldff1sh_gather_ext_u64_s64offset: ++** ldff1sh z0\.d, p0/z, \[x0, z1\.d, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_ext_u64_s64offset, svuint64_t, int16_t, svint64_t, ++ z0_res = svldff1sh_gather_s64offset_u64 (p0, x0, svextw_s64_x (p0, z1)), ++ z0_res = svldff1sh_gather_offset_u64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ldff1sh_gather_x0_u64_u64offset: ++** ldff1sh z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_x0_u64_u64offset, svuint64_t, int16_t, svuint64_t, ++ z0_res = svldff1sh_gather_u64offset_u64 (p0, x0, z0), ++ z0_res = svldff1sh_gather_offset_u64 (p0, x0, z0)) ++ ++/* ++** ldff1sh_gather_tied1_u64_u64offset: ++** ldff1sh z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_tied1_u64_u64offset, svuint64_t, int16_t, svuint64_t, ++ z0_res = svldff1sh_gather_u64offset_u64 (p0, x0, z0), ++ z0_res = svldff1sh_gather_offset_u64 (p0, x0, z0)) ++ ++/* ++** ldff1sh_gather_untied_u64_u64offset: ++** ldff1sh z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_untied_u64_u64offset, svuint64_t, int16_t, svuint64_t, ++ z0_res = svldff1sh_gather_u64offset_u64 (p0, x0, z1), ++ z0_res = svldff1sh_gather_offset_u64 (p0, x0, z1)) ++ ++/* ++** ldff1sh_gather_ext_u64_u64offset: ++** ldff1sh z0\.d, p0/z, \[x0, z1\.d, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_ext_u64_u64offset, svuint64_t, int16_t, svuint64_t, ++ z0_res = svldff1sh_gather_u64offset_u64 (p0, x0, svextw_u64_x (p0, z1)), ++ z0_res = svldff1sh_gather_offset_u64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ldff1sh_gather_x0_u64_s64index: ++** ldff1sh z0\.d, p0/z, \[x0, z0\.d, lsl 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_x0_u64_s64index, svuint64_t, int16_t, svint64_t, ++ z0_res = svldff1sh_gather_s64index_u64 (p0, x0, z0), ++ z0_res = svldff1sh_gather_index_u64 (p0, x0, z0)) ++ ++/* ++** ldff1sh_gather_tied1_u64_s64index: ++** ldff1sh z0\.d, p0/z, \[x0, z0\.d, lsl 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_tied1_u64_s64index, svuint64_t, int16_t, svint64_t, ++ z0_res = svldff1sh_gather_s64index_u64 (p0, x0, z0), ++ z0_res = svldff1sh_gather_index_u64 (p0, x0, z0)) ++ ++/* ++** ldff1sh_gather_untied_u64_s64index: ++** ldff1sh z0\.d, p0/z, \[x0, z1\.d, lsl 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_untied_u64_s64index, svuint64_t, int16_t, svint64_t, ++ z0_res = svldff1sh_gather_s64index_u64 (p0, x0, z1), ++ z0_res = svldff1sh_gather_index_u64 (p0, x0, z1)) ++ ++/* ++** ldff1sh_gather_ext_u64_s64index: ++** ldff1sh z0\.d, p0/z, \[x0, z1\.d, sxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_ext_u64_s64index, svuint64_t, int16_t, svint64_t, ++ z0_res = svldff1sh_gather_s64index_u64 (p0, x0, svextw_s64_x (p0, z1)), ++ z0_res = svldff1sh_gather_index_u64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ldff1sh_gather_x0_u64_u64index: ++** ldff1sh z0\.d, p0/z, \[x0, z0\.d, lsl 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_x0_u64_u64index, svuint64_t, int16_t, svuint64_t, ++ z0_res = svldff1sh_gather_u64index_u64 (p0, x0, z0), ++ z0_res = svldff1sh_gather_index_u64 (p0, x0, z0)) ++ ++/* ++** ldff1sh_gather_tied1_u64_u64index: ++** ldff1sh z0\.d, p0/z, \[x0, z0\.d, lsl 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_tied1_u64_u64index, svuint64_t, int16_t, svuint64_t, ++ z0_res = svldff1sh_gather_u64index_u64 (p0, x0, z0), ++ z0_res = svldff1sh_gather_index_u64 (p0, x0, z0)) ++ ++/* ++** ldff1sh_gather_untied_u64_u64index: ++** ldff1sh z0\.d, p0/z, \[x0, z1\.d, lsl 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_untied_u64_u64index, svuint64_t, int16_t, svuint64_t, ++ z0_res = svldff1sh_gather_u64index_u64 (p0, x0, z1), ++ z0_res = svldff1sh_gather_index_u64 (p0, x0, z1)) ++ ++/* ++** ldff1sh_gather_ext_u64_u64index: ++** ldff1sh z0\.d, p0/z, \[x0, z1\.d, uxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sh_gather_ext_u64_u64index, svuint64_t, int16_t, svuint64_t, ++ z0_res = svldff1sh_gather_u64index_u64 (p0, x0, svextw_u64_x (p0, z1)), ++ z0_res = svldff1sh_gather_index_u64 (p0, x0, svextw_x (p0, z1))) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_s32.c +new file mode 100644 +index 000000000..81b64e836 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_s32.c +@@ -0,0 +1,86 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1sh_s32_base: ++** ldff1sh z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sh_s32_base, svint32_t, int16_t, ++ z0 = svldff1sh_s32 (p0, x0), ++ z0 = svldff1sh_s32 (p0, x0)) ++ ++/* ++** ldff1sh_s32_index: ++** ldff1sh z0\.s, p0/z, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_LOAD (ldff1sh_s32_index, svint32_t, int16_t, ++ z0 = svldff1sh_s32 (p0, x0 + x1), ++ z0 = svldff1sh_s32 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1sh_s32_1: ++** inch x0 ++** ldff1sh z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sh_s32_1, svint32_t, int16_t, ++ z0 = svldff1sh_s32 (p0, x0 + svcntw ()), ++ z0 = svldff1sh_s32 (p0, x0 + svcntw ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1sh_s32_m1: ++** dech x0 ++** ldff1sh z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sh_s32_m1, svint32_t, int16_t, ++ z0 = svldff1sh_s32 (p0, x0 - svcntw ()), ++ z0 = svldff1sh_s32 (p0, x0 - svcntw ())) ++ ++/* ++** ldff1sh_vnum_s32_0: ++** ldff1sh z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sh_vnum_s32_0, svint32_t, int16_t, ++ z0 = svldff1sh_vnum_s32 (p0, x0, 0), ++ z0 = svldff1sh_vnum_s32 (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1sh_vnum_s32_1: ++** inch x0 ++** ldff1sh z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sh_vnum_s32_1, svint32_t, int16_t, ++ z0 = svldff1sh_vnum_s32 (p0, x0, 1), ++ z0 = svldff1sh_vnum_s32 (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1sh_vnum_s32_m1: ++** dech x0 ++** ldff1sh z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sh_vnum_s32_m1, svint32_t, int16_t, ++ z0 = svldff1sh_vnum_s32 (p0, x0, -1), ++ z0 = svldff1sh_vnum_s32 (p0, x0, -1)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ldff1sh_vnum_s32_x1: ++** cnth (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ldff1sh z0\.s, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldff1sh_vnum_s32_x1, svint32_t, int16_t, ++ z0 = svldff1sh_vnum_s32 (p0, x0, x1), ++ z0 = svldff1sh_vnum_s32 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_s64.c +new file mode 100644 +index 000000000..453b3ff24 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_s64.c +@@ -0,0 +1,86 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1sh_s64_base: ++** ldff1sh z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sh_s64_base, svint64_t, int16_t, ++ z0 = svldff1sh_s64 (p0, x0), ++ z0 = svldff1sh_s64 (p0, x0)) ++ ++/* ++** ldff1sh_s64_index: ++** ldff1sh z0\.d, p0/z, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_LOAD (ldff1sh_s64_index, svint64_t, int16_t, ++ z0 = svldff1sh_s64 (p0, x0 + x1), ++ z0 = svldff1sh_s64 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1sh_s64_1: ++** incw x0 ++** ldff1sh z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sh_s64_1, svint64_t, int16_t, ++ z0 = svldff1sh_s64 (p0, x0 + svcntd ()), ++ z0 = svldff1sh_s64 (p0, x0 + svcntd ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1sh_s64_m1: ++** decw x0 ++** ldff1sh z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sh_s64_m1, svint64_t, int16_t, ++ z0 = svldff1sh_s64 (p0, x0 - svcntd ()), ++ z0 = svldff1sh_s64 (p0, x0 - svcntd ())) ++ ++/* ++** ldff1sh_vnum_s64_0: ++** ldff1sh z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sh_vnum_s64_0, svint64_t, int16_t, ++ z0 = svldff1sh_vnum_s64 (p0, x0, 0), ++ z0 = svldff1sh_vnum_s64 (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1sh_vnum_s64_1: ++** incw x0 ++** ldff1sh z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sh_vnum_s64_1, svint64_t, int16_t, ++ z0 = svldff1sh_vnum_s64 (p0, x0, 1), ++ z0 = svldff1sh_vnum_s64 (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1sh_vnum_s64_m1: ++** decw x0 ++** ldff1sh z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sh_vnum_s64_m1, svint64_t, int16_t, ++ z0 = svldff1sh_vnum_s64 (p0, x0, -1), ++ z0 = svldff1sh_vnum_s64 (p0, x0, -1)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ldff1sh_vnum_s64_x1: ++** cntw (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ldff1sh z0\.d, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldff1sh_vnum_s64_x1, svint64_t, int16_t, ++ z0 = svldff1sh_vnum_s64 (p0, x0, x1), ++ z0 = svldff1sh_vnum_s64 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_u32.c +new file mode 100644 +index 000000000..bbbed79dc +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_u32.c +@@ -0,0 +1,86 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1sh_u32_base: ++** ldff1sh z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sh_u32_base, svuint32_t, int16_t, ++ z0 = svldff1sh_u32 (p0, x0), ++ z0 = svldff1sh_u32 (p0, x0)) ++ ++/* ++** ldff1sh_u32_index: ++** ldff1sh z0\.s, p0/z, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_LOAD (ldff1sh_u32_index, svuint32_t, int16_t, ++ z0 = svldff1sh_u32 (p0, x0 + x1), ++ z0 = svldff1sh_u32 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1sh_u32_1: ++** inch x0 ++** ldff1sh z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sh_u32_1, svuint32_t, int16_t, ++ z0 = svldff1sh_u32 (p0, x0 + svcntw ()), ++ z0 = svldff1sh_u32 (p0, x0 + svcntw ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1sh_u32_m1: ++** dech x0 ++** ldff1sh z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sh_u32_m1, svuint32_t, int16_t, ++ z0 = svldff1sh_u32 (p0, x0 - svcntw ()), ++ z0 = svldff1sh_u32 (p0, x0 - svcntw ())) ++ ++/* ++** ldff1sh_vnum_u32_0: ++** ldff1sh z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sh_vnum_u32_0, svuint32_t, int16_t, ++ z0 = svldff1sh_vnum_u32 (p0, x0, 0), ++ z0 = svldff1sh_vnum_u32 (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1sh_vnum_u32_1: ++** inch x0 ++** ldff1sh z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sh_vnum_u32_1, svuint32_t, int16_t, ++ z0 = svldff1sh_vnum_u32 (p0, x0, 1), ++ z0 = svldff1sh_vnum_u32 (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1sh_vnum_u32_m1: ++** dech x0 ++** ldff1sh z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sh_vnum_u32_m1, svuint32_t, int16_t, ++ z0 = svldff1sh_vnum_u32 (p0, x0, -1), ++ z0 = svldff1sh_vnum_u32 (p0, x0, -1)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ldff1sh_vnum_u32_x1: ++** cnth (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ldff1sh z0\.s, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldff1sh_vnum_u32_x1, svuint32_t, int16_t, ++ z0 = svldff1sh_vnum_u32 (p0, x0, x1), ++ z0 = svldff1sh_vnum_u32 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_u64.c +new file mode 100644 +index 000000000..5430e256b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_u64.c +@@ -0,0 +1,86 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1sh_u64_base: ++** ldff1sh z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sh_u64_base, svuint64_t, int16_t, ++ z0 = svldff1sh_u64 (p0, x0), ++ z0 = svldff1sh_u64 (p0, x0)) ++ ++/* ++** ldff1sh_u64_index: ++** ldff1sh z0\.d, p0/z, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_LOAD (ldff1sh_u64_index, svuint64_t, int16_t, ++ z0 = svldff1sh_u64 (p0, x0 + x1), ++ z0 = svldff1sh_u64 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1sh_u64_1: ++** incw x0 ++** ldff1sh z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sh_u64_1, svuint64_t, int16_t, ++ z0 = svldff1sh_u64 (p0, x0 + svcntd ()), ++ z0 = svldff1sh_u64 (p0, x0 + svcntd ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1sh_u64_m1: ++** decw x0 ++** ldff1sh z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sh_u64_m1, svuint64_t, int16_t, ++ z0 = svldff1sh_u64 (p0, x0 - svcntd ()), ++ z0 = svldff1sh_u64 (p0, x0 - svcntd ())) ++ ++/* ++** ldff1sh_vnum_u64_0: ++** ldff1sh z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sh_vnum_u64_0, svuint64_t, int16_t, ++ z0 = svldff1sh_vnum_u64 (p0, x0, 0), ++ z0 = svldff1sh_vnum_u64 (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1sh_vnum_u64_1: ++** incw x0 ++** ldff1sh z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sh_vnum_u64_1, svuint64_t, int16_t, ++ z0 = svldff1sh_vnum_u64 (p0, x0, 1), ++ z0 = svldff1sh_vnum_u64 (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1sh_vnum_u64_m1: ++** decw x0 ++** ldff1sh z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sh_vnum_u64_m1, svuint64_t, int16_t, ++ z0 = svldff1sh_vnum_u64 (p0, x0, -1), ++ z0 = svldff1sh_vnum_u64 (p0, x0, -1)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ldff1sh_vnum_u64_x1: ++** cntw (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ldff1sh z0\.d, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldff1sh_vnum_u64_x1, svuint64_t, int16_t, ++ z0 = svldff1sh_vnum_u64 (p0, x0, x1), ++ z0 = svldff1sh_vnum_u64 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_gather_s64.c +new file mode 100644 +index 000000000..e5da8a83d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_gather_s64.c +@@ -0,0 +1,308 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1sw_gather_s64_tied1: ++** ldff1sw z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sw_gather_s64_tied1, svint64_t, svuint64_t, ++ z0_res = svldff1sw_gather_u64base_s64 (p0, z0), ++ z0_res = svldff1sw_gather_s64 (p0, z0)) ++ ++/* ++** ldff1sw_gather_s64_untied: ++** ldff1sw z0\.d, p0/z, \[z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sw_gather_s64_untied, svint64_t, svuint64_t, ++ z0_res = svldff1sw_gather_u64base_s64 (p0, z1), ++ z0_res = svldff1sw_gather_s64 (p0, z1)) ++ ++/* ++** ldff1sw_gather_x0_s64_offset: ++** ldff1sw z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sw_gather_x0_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1sw_gather_u64base_offset_s64 (p0, z0, x0), ++ z0_res = svldff1sw_gather_offset_s64 (p0, z0, x0)) ++ ++/* ++** ldff1sw_gather_m4_s64_offset: ++** mov (x[0-9]+), #?-4 ++** ldff1sw z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sw_gather_m4_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1sw_gather_u64base_offset_s64 (p0, z0, -4), ++ z0_res = svldff1sw_gather_offset_s64 (p0, z0, -4)) ++ ++/* ++** ldff1sw_gather_0_s64_offset: ++** ldff1sw z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sw_gather_0_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1sw_gather_u64base_offset_s64 (p0, z0, 0), ++ z0_res = svldff1sw_gather_offset_s64 (p0, z0, 0)) ++ ++/* ++** ldff1sw_gather_5_s64_offset: ++** mov (x[0-9]+), #?5 ++** ldff1sw z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sw_gather_5_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1sw_gather_u64base_offset_s64 (p0, z0, 5), ++ z0_res = svldff1sw_gather_offset_s64 (p0, z0, 5)) ++ ++/* ++** ldff1sw_gather_6_s64_offset: ++** mov (x[0-9]+), #?6 ++** ldff1sw z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sw_gather_6_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1sw_gather_u64base_offset_s64 (p0, z0, 6), ++ z0_res = svldff1sw_gather_offset_s64 (p0, z0, 6)) ++ ++/* ++** ldff1sw_gather_7_s64_offset: ++** mov (x[0-9]+), #?7 ++** ldff1sw z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sw_gather_7_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1sw_gather_u64base_offset_s64 (p0, z0, 7), ++ z0_res = svldff1sw_gather_offset_s64 (p0, z0, 7)) ++ ++/* ++** ldff1sw_gather_8_s64_offset: ++** ldff1sw z0\.d, p0/z, \[z0\.d, #8\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sw_gather_8_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1sw_gather_u64base_offset_s64 (p0, z0, 8), ++ z0_res = svldff1sw_gather_offset_s64 (p0, z0, 8)) ++ ++/* ++** ldff1sw_gather_124_s64_offset: ++** ldff1sw z0\.d, p0/z, \[z0\.d, #124\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sw_gather_124_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1sw_gather_u64base_offset_s64 (p0, z0, 124), ++ z0_res = svldff1sw_gather_offset_s64 (p0, z0, 124)) ++ ++/* ++** ldff1sw_gather_128_s64_offset: ++** mov (x[0-9]+), #?128 ++** ldff1sw z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sw_gather_128_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1sw_gather_u64base_offset_s64 (p0, z0, 128), ++ z0_res = svldff1sw_gather_offset_s64 (p0, z0, 128)) ++ ++/* ++** ldff1sw_gather_x0_s64_index: ++** lsl (x[0-9]+), x0, #?2 ++** ldff1sw z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sw_gather_x0_s64_index, svint64_t, svuint64_t, ++ z0_res = svldff1sw_gather_u64base_index_s64 (p0, z0, x0), ++ z0_res = svldff1sw_gather_index_s64 (p0, z0, x0)) ++ ++/* ++** ldff1sw_gather_m1_s64_index: ++** mov (x[0-9]+), #?-4 ++** ldff1sw z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sw_gather_m1_s64_index, svint64_t, svuint64_t, ++ z0_res = svldff1sw_gather_u64base_index_s64 (p0, z0, -1), ++ z0_res = svldff1sw_gather_index_s64 (p0, z0, -1)) ++ ++/* ++** ldff1sw_gather_0_s64_index: ++** ldff1sw z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sw_gather_0_s64_index, svint64_t, svuint64_t, ++ z0_res = svldff1sw_gather_u64base_index_s64 (p0, z0, 0), ++ z0_res = svldff1sw_gather_index_s64 (p0, z0, 0)) ++ ++/* ++** ldff1sw_gather_5_s64_index: ++** ldff1sw z0\.d, p0/z, \[z0\.d, #20\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sw_gather_5_s64_index, svint64_t, svuint64_t, ++ z0_res = svldff1sw_gather_u64base_index_s64 (p0, z0, 5), ++ z0_res = svldff1sw_gather_index_s64 (p0, z0, 5)) ++ ++/* ++** ldff1sw_gather_31_s64_index: ++** ldff1sw z0\.d, p0/z, \[z0\.d, #124\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sw_gather_31_s64_index, svint64_t, svuint64_t, ++ z0_res = svldff1sw_gather_u64base_index_s64 (p0, z0, 31), ++ z0_res = svldff1sw_gather_index_s64 (p0, z0, 31)) ++ ++/* ++** ldff1sw_gather_32_s64_index: ++** mov (x[0-9]+), #?128 ++** ldff1sw z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sw_gather_32_s64_index, svint64_t, svuint64_t, ++ z0_res = svldff1sw_gather_u64base_index_s64 (p0, z0, 32), ++ z0_res = svldff1sw_gather_index_s64 (p0, z0, 32)) ++ ++/* ++** ldff1sw_gather_x0_s64_s64offset: ++** ldff1sw z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sw_gather_x0_s64_s64offset, svint64_t, int32_t, svint64_t, ++ z0_res = svldff1sw_gather_s64offset_s64 (p0, x0, z0), ++ z0_res = svldff1sw_gather_offset_s64 (p0, x0, z0)) ++ ++/* ++** ldff1sw_gather_tied1_s64_s64offset: ++** ldff1sw z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sw_gather_tied1_s64_s64offset, svint64_t, int32_t, svint64_t, ++ z0_res = svldff1sw_gather_s64offset_s64 (p0, x0, z0), ++ z0_res = svldff1sw_gather_offset_s64 (p0, x0, z0)) ++ ++/* ++** ldff1sw_gather_untied_s64_s64offset: ++** ldff1sw z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sw_gather_untied_s64_s64offset, svint64_t, int32_t, svint64_t, ++ z0_res = svldff1sw_gather_s64offset_s64 (p0, x0, z1), ++ z0_res = svldff1sw_gather_offset_s64 (p0, x0, z1)) ++ ++/* ++** ldff1sw_gather_ext_s64_s64offset: ++** ldff1sw z0\.d, p0/z, \[x0, z1\.d, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sw_gather_ext_s64_s64offset, svint64_t, int32_t, svint64_t, ++ z0_res = svldff1sw_gather_s64offset_s64 (p0, x0, svextw_s64_x (p0, z1)), ++ z0_res = svldff1sw_gather_offset_s64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ldff1sw_gather_x0_s64_u64offset: ++** ldff1sw z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sw_gather_x0_s64_u64offset, svint64_t, int32_t, svuint64_t, ++ z0_res = svldff1sw_gather_u64offset_s64 (p0, x0, z0), ++ z0_res = svldff1sw_gather_offset_s64 (p0, x0, z0)) ++ ++/* ++** ldff1sw_gather_tied1_s64_u64offset: ++** ldff1sw z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sw_gather_tied1_s64_u64offset, svint64_t, int32_t, svuint64_t, ++ z0_res = svldff1sw_gather_u64offset_s64 (p0, x0, z0), ++ z0_res = svldff1sw_gather_offset_s64 (p0, x0, z0)) ++ ++/* ++** ldff1sw_gather_untied_s64_u64offset: ++** ldff1sw z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sw_gather_untied_s64_u64offset, svint64_t, int32_t, svuint64_t, ++ z0_res = svldff1sw_gather_u64offset_s64 (p0, x0, z1), ++ z0_res = svldff1sw_gather_offset_s64 (p0, x0, z1)) ++ ++/* ++** ldff1sw_gather_ext_s64_u64offset: ++** ldff1sw z0\.d, p0/z, \[x0, z1\.d, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sw_gather_ext_s64_u64offset, svint64_t, int32_t, svuint64_t, ++ z0_res = svldff1sw_gather_u64offset_s64 (p0, x0, svextw_u64_x (p0, z1)), ++ z0_res = svldff1sw_gather_offset_s64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ldff1sw_gather_x0_s64_s64index: ++** ldff1sw z0\.d, p0/z, \[x0, z0\.d, lsl 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sw_gather_x0_s64_s64index, svint64_t, int32_t, svint64_t, ++ z0_res = svldff1sw_gather_s64index_s64 (p0, x0, z0), ++ z0_res = svldff1sw_gather_index_s64 (p0, x0, z0)) ++ ++/* ++** ldff1sw_gather_tied1_s64_s64index: ++** ldff1sw z0\.d, p0/z, \[x0, z0\.d, lsl 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sw_gather_tied1_s64_s64index, svint64_t, int32_t, svint64_t, ++ z0_res = svldff1sw_gather_s64index_s64 (p0, x0, z0), ++ z0_res = svldff1sw_gather_index_s64 (p0, x0, z0)) ++ ++/* ++** ldff1sw_gather_untied_s64_s64index: ++** ldff1sw z0\.d, p0/z, \[x0, z1\.d, lsl 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sw_gather_untied_s64_s64index, svint64_t, int32_t, svint64_t, ++ z0_res = svldff1sw_gather_s64index_s64 (p0, x0, z1), ++ z0_res = svldff1sw_gather_index_s64 (p0, x0, z1)) ++ ++/* ++** ldff1sw_gather_ext_s64_s64index: ++** ldff1sw z0\.d, p0/z, \[x0, z1\.d, sxtw 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sw_gather_ext_s64_s64index, svint64_t, int32_t, svint64_t, ++ z0_res = svldff1sw_gather_s64index_s64 (p0, x0, svextw_s64_x (p0, z1)), ++ z0_res = svldff1sw_gather_index_s64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ldff1sw_gather_x0_s64_u64index: ++** ldff1sw z0\.d, p0/z, \[x0, z0\.d, lsl 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sw_gather_x0_s64_u64index, svint64_t, int32_t, svuint64_t, ++ z0_res = svldff1sw_gather_u64index_s64 (p0, x0, z0), ++ z0_res = svldff1sw_gather_index_s64 (p0, x0, z0)) ++ ++/* ++** ldff1sw_gather_tied1_s64_u64index: ++** ldff1sw z0\.d, p0/z, \[x0, z0\.d, lsl 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sw_gather_tied1_s64_u64index, svint64_t, int32_t, svuint64_t, ++ z0_res = svldff1sw_gather_u64index_s64 (p0, x0, z0), ++ z0_res = svldff1sw_gather_index_s64 (p0, x0, z0)) ++ ++/* ++** ldff1sw_gather_untied_s64_u64index: ++** ldff1sw z0\.d, p0/z, \[x0, z1\.d, lsl 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sw_gather_untied_s64_u64index, svint64_t, int32_t, svuint64_t, ++ z0_res = svldff1sw_gather_u64index_s64 (p0, x0, z1), ++ z0_res = svldff1sw_gather_index_s64 (p0, x0, z1)) ++ ++/* ++** ldff1sw_gather_ext_s64_u64index: ++** ldff1sw z0\.d, p0/z, \[x0, z1\.d, uxtw 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sw_gather_ext_s64_u64index, svint64_t, int32_t, svuint64_t, ++ z0_res = svldff1sw_gather_u64index_s64 (p0, x0, svextw_u64_x (p0, z1)), ++ z0_res = svldff1sw_gather_index_s64 (p0, x0, svextw_x (p0, z1))) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_gather_u64.c +new file mode 100644 +index 000000000..411428756 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_gather_u64.c +@@ -0,0 +1,308 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1sw_gather_u64_tied1: ++** ldff1sw z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sw_gather_u64_tied1, svuint64_t, svuint64_t, ++ z0_res = svldff1sw_gather_u64base_u64 (p0, z0), ++ z0_res = svldff1sw_gather_u64 (p0, z0)) ++ ++/* ++** ldff1sw_gather_u64_untied: ++** ldff1sw z0\.d, p0/z, \[z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sw_gather_u64_untied, svuint64_t, svuint64_t, ++ z0_res = svldff1sw_gather_u64base_u64 (p0, z1), ++ z0_res = svldff1sw_gather_u64 (p0, z1)) ++ ++/* ++** ldff1sw_gather_x0_u64_offset: ++** ldff1sw z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sw_gather_x0_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1sw_gather_u64base_offset_u64 (p0, z0, x0), ++ z0_res = svldff1sw_gather_offset_u64 (p0, z0, x0)) ++ ++/* ++** ldff1sw_gather_m4_u64_offset: ++** mov (x[0-9]+), #?-4 ++** ldff1sw z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sw_gather_m4_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1sw_gather_u64base_offset_u64 (p0, z0, -4), ++ z0_res = svldff1sw_gather_offset_u64 (p0, z0, -4)) ++ ++/* ++** ldff1sw_gather_0_u64_offset: ++** ldff1sw z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sw_gather_0_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1sw_gather_u64base_offset_u64 (p0, z0, 0), ++ z0_res = svldff1sw_gather_offset_u64 (p0, z0, 0)) ++ ++/* ++** ldff1sw_gather_5_u64_offset: ++** mov (x[0-9]+), #?5 ++** ldff1sw z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sw_gather_5_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1sw_gather_u64base_offset_u64 (p0, z0, 5), ++ z0_res = svldff1sw_gather_offset_u64 (p0, z0, 5)) ++ ++/* ++** ldff1sw_gather_6_u64_offset: ++** mov (x[0-9]+), #?6 ++** ldff1sw z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sw_gather_6_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1sw_gather_u64base_offset_u64 (p0, z0, 6), ++ z0_res = svldff1sw_gather_offset_u64 (p0, z0, 6)) ++ ++/* ++** ldff1sw_gather_7_u64_offset: ++** mov (x[0-9]+), #?7 ++** ldff1sw z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sw_gather_7_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1sw_gather_u64base_offset_u64 (p0, z0, 7), ++ z0_res = svldff1sw_gather_offset_u64 (p0, z0, 7)) ++ ++/* ++** ldff1sw_gather_8_u64_offset: ++** ldff1sw z0\.d, p0/z, \[z0\.d, #8\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sw_gather_8_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1sw_gather_u64base_offset_u64 (p0, z0, 8), ++ z0_res = svldff1sw_gather_offset_u64 (p0, z0, 8)) ++ ++/* ++** ldff1sw_gather_124_u64_offset: ++** ldff1sw z0\.d, p0/z, \[z0\.d, #124\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sw_gather_124_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1sw_gather_u64base_offset_u64 (p0, z0, 124), ++ z0_res = svldff1sw_gather_offset_u64 (p0, z0, 124)) ++ ++/* ++** ldff1sw_gather_128_u64_offset: ++** mov (x[0-9]+), #?128 ++** ldff1sw z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sw_gather_128_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1sw_gather_u64base_offset_u64 (p0, z0, 128), ++ z0_res = svldff1sw_gather_offset_u64 (p0, z0, 128)) ++ ++/* ++** ldff1sw_gather_x0_u64_index: ++** lsl (x[0-9]+), x0, #?2 ++** ldff1sw z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sw_gather_x0_u64_index, svuint64_t, svuint64_t, ++ z0_res = svldff1sw_gather_u64base_index_u64 (p0, z0, x0), ++ z0_res = svldff1sw_gather_index_u64 (p0, z0, x0)) ++ ++/* ++** ldff1sw_gather_m1_u64_index: ++** mov (x[0-9]+), #?-4 ++** ldff1sw z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sw_gather_m1_u64_index, svuint64_t, svuint64_t, ++ z0_res = svldff1sw_gather_u64base_index_u64 (p0, z0, -1), ++ z0_res = svldff1sw_gather_index_u64 (p0, z0, -1)) ++ ++/* ++** ldff1sw_gather_0_u64_index: ++** ldff1sw z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sw_gather_0_u64_index, svuint64_t, svuint64_t, ++ z0_res = svldff1sw_gather_u64base_index_u64 (p0, z0, 0), ++ z0_res = svldff1sw_gather_index_u64 (p0, z0, 0)) ++ ++/* ++** ldff1sw_gather_5_u64_index: ++** ldff1sw z0\.d, p0/z, \[z0\.d, #20\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sw_gather_5_u64_index, svuint64_t, svuint64_t, ++ z0_res = svldff1sw_gather_u64base_index_u64 (p0, z0, 5), ++ z0_res = svldff1sw_gather_index_u64 (p0, z0, 5)) ++ ++/* ++** ldff1sw_gather_31_u64_index: ++** ldff1sw z0\.d, p0/z, \[z0\.d, #124\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sw_gather_31_u64_index, svuint64_t, svuint64_t, ++ z0_res = svldff1sw_gather_u64base_index_u64 (p0, z0, 31), ++ z0_res = svldff1sw_gather_index_u64 (p0, z0, 31)) ++ ++/* ++** ldff1sw_gather_32_u64_index: ++** mov (x[0-9]+), #?128 ++** ldff1sw z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1sw_gather_32_u64_index, svuint64_t, svuint64_t, ++ z0_res = svldff1sw_gather_u64base_index_u64 (p0, z0, 32), ++ z0_res = svldff1sw_gather_index_u64 (p0, z0, 32)) ++ ++/* ++** ldff1sw_gather_x0_u64_s64offset: ++** ldff1sw z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sw_gather_x0_u64_s64offset, svuint64_t, int32_t, svint64_t, ++ z0_res = svldff1sw_gather_s64offset_u64 (p0, x0, z0), ++ z0_res = svldff1sw_gather_offset_u64 (p0, x0, z0)) ++ ++/* ++** ldff1sw_gather_tied1_u64_s64offset: ++** ldff1sw z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sw_gather_tied1_u64_s64offset, svuint64_t, int32_t, svint64_t, ++ z0_res = svldff1sw_gather_s64offset_u64 (p0, x0, z0), ++ z0_res = svldff1sw_gather_offset_u64 (p0, x0, z0)) ++ ++/* ++** ldff1sw_gather_untied_u64_s64offset: ++** ldff1sw z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sw_gather_untied_u64_s64offset, svuint64_t, int32_t, svint64_t, ++ z0_res = svldff1sw_gather_s64offset_u64 (p0, x0, z1), ++ z0_res = svldff1sw_gather_offset_u64 (p0, x0, z1)) ++ ++/* ++** ldff1sw_gather_ext_u64_s64offset: ++** ldff1sw z0\.d, p0/z, \[x0, z1\.d, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sw_gather_ext_u64_s64offset, svuint64_t, int32_t, svint64_t, ++ z0_res = svldff1sw_gather_s64offset_u64 (p0, x0, svextw_s64_x (p0, z1)), ++ z0_res = svldff1sw_gather_offset_u64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ldff1sw_gather_x0_u64_u64offset: ++** ldff1sw z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sw_gather_x0_u64_u64offset, svuint64_t, int32_t, svuint64_t, ++ z0_res = svldff1sw_gather_u64offset_u64 (p0, x0, z0), ++ z0_res = svldff1sw_gather_offset_u64 (p0, x0, z0)) ++ ++/* ++** ldff1sw_gather_tied1_u64_u64offset: ++** ldff1sw z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sw_gather_tied1_u64_u64offset, svuint64_t, int32_t, svuint64_t, ++ z0_res = svldff1sw_gather_u64offset_u64 (p0, x0, z0), ++ z0_res = svldff1sw_gather_offset_u64 (p0, x0, z0)) ++ ++/* ++** ldff1sw_gather_untied_u64_u64offset: ++** ldff1sw z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sw_gather_untied_u64_u64offset, svuint64_t, int32_t, svuint64_t, ++ z0_res = svldff1sw_gather_u64offset_u64 (p0, x0, z1), ++ z0_res = svldff1sw_gather_offset_u64 (p0, x0, z1)) ++ ++/* ++** ldff1sw_gather_ext_u64_u64offset: ++** ldff1sw z0\.d, p0/z, \[x0, z1\.d, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sw_gather_ext_u64_u64offset, svuint64_t, int32_t, svuint64_t, ++ z0_res = svldff1sw_gather_u64offset_u64 (p0, x0, svextw_u64_x (p0, z1)), ++ z0_res = svldff1sw_gather_offset_u64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ldff1sw_gather_x0_u64_s64index: ++** ldff1sw z0\.d, p0/z, \[x0, z0\.d, lsl 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sw_gather_x0_u64_s64index, svuint64_t, int32_t, svint64_t, ++ z0_res = svldff1sw_gather_s64index_u64 (p0, x0, z0), ++ z0_res = svldff1sw_gather_index_u64 (p0, x0, z0)) ++ ++/* ++** ldff1sw_gather_tied1_u64_s64index: ++** ldff1sw z0\.d, p0/z, \[x0, z0\.d, lsl 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sw_gather_tied1_u64_s64index, svuint64_t, int32_t, svint64_t, ++ z0_res = svldff1sw_gather_s64index_u64 (p0, x0, z0), ++ z0_res = svldff1sw_gather_index_u64 (p0, x0, z0)) ++ ++/* ++** ldff1sw_gather_untied_u64_s64index: ++** ldff1sw z0\.d, p0/z, \[x0, z1\.d, lsl 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sw_gather_untied_u64_s64index, svuint64_t, int32_t, svint64_t, ++ z0_res = svldff1sw_gather_s64index_u64 (p0, x0, z1), ++ z0_res = svldff1sw_gather_index_u64 (p0, x0, z1)) ++ ++/* ++** ldff1sw_gather_ext_u64_s64index: ++** ldff1sw z0\.d, p0/z, \[x0, z1\.d, sxtw 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sw_gather_ext_u64_s64index, svuint64_t, int32_t, svint64_t, ++ z0_res = svldff1sw_gather_s64index_u64 (p0, x0, svextw_s64_x (p0, z1)), ++ z0_res = svldff1sw_gather_index_u64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ldff1sw_gather_x0_u64_u64index: ++** ldff1sw z0\.d, p0/z, \[x0, z0\.d, lsl 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sw_gather_x0_u64_u64index, svuint64_t, int32_t, svuint64_t, ++ z0_res = svldff1sw_gather_u64index_u64 (p0, x0, z0), ++ z0_res = svldff1sw_gather_index_u64 (p0, x0, z0)) ++ ++/* ++** ldff1sw_gather_tied1_u64_u64index: ++** ldff1sw z0\.d, p0/z, \[x0, z0\.d, lsl 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sw_gather_tied1_u64_u64index, svuint64_t, int32_t, svuint64_t, ++ z0_res = svldff1sw_gather_u64index_u64 (p0, x0, z0), ++ z0_res = svldff1sw_gather_index_u64 (p0, x0, z0)) ++ ++/* ++** ldff1sw_gather_untied_u64_u64index: ++** ldff1sw z0\.d, p0/z, \[x0, z1\.d, lsl 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sw_gather_untied_u64_u64index, svuint64_t, int32_t, svuint64_t, ++ z0_res = svldff1sw_gather_u64index_u64 (p0, x0, z1), ++ z0_res = svldff1sw_gather_index_u64 (p0, x0, z1)) ++ ++/* ++** ldff1sw_gather_ext_u64_u64index: ++** ldff1sw z0\.d, p0/z, \[x0, z1\.d, uxtw 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1sw_gather_ext_u64_u64index, svuint64_t, int32_t, svuint64_t, ++ z0_res = svldff1sw_gather_u64index_u64 (p0, x0, svextw_u64_x (p0, z1)), ++ z0_res = svldff1sw_gather_index_u64 (p0, x0, svextw_x (p0, z1))) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_s64.c +new file mode 100644 +index 000000000..d795ace63 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_s64.c +@@ -0,0 +1,86 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1sw_s64_base: ++** ldff1sw z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sw_s64_base, svint64_t, int32_t, ++ z0 = svldff1sw_s64 (p0, x0), ++ z0 = svldff1sw_s64 (p0, x0)) ++ ++/* ++** ldff1sw_s64_index: ++** ldff1sw z0\.d, p0/z, \[x0, x1, lsl 2\] ++** ret ++*/ ++TEST_LOAD (ldff1sw_s64_index, svint64_t, int32_t, ++ z0 = svldff1sw_s64 (p0, x0 + x1), ++ z0 = svldff1sw_s64 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1sw_s64_1: ++** inch x0 ++** ldff1sw z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sw_s64_1, svint64_t, int32_t, ++ z0 = svldff1sw_s64 (p0, x0 + svcntd ()), ++ z0 = svldff1sw_s64 (p0, x0 + svcntd ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1sw_s64_m1: ++** dech x0 ++** ldff1sw z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sw_s64_m1, svint64_t, int32_t, ++ z0 = svldff1sw_s64 (p0, x0 - svcntd ()), ++ z0 = svldff1sw_s64 (p0, x0 - svcntd ())) ++ ++/* ++** ldff1sw_vnum_s64_0: ++** ldff1sw z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sw_vnum_s64_0, svint64_t, int32_t, ++ z0 = svldff1sw_vnum_s64 (p0, x0, 0), ++ z0 = svldff1sw_vnum_s64 (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1sw_vnum_s64_1: ++** inch x0 ++** ldff1sw z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sw_vnum_s64_1, svint64_t, int32_t, ++ z0 = svldff1sw_vnum_s64 (p0, x0, 1), ++ z0 = svldff1sw_vnum_s64 (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1sw_vnum_s64_m1: ++** dech x0 ++** ldff1sw z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sw_vnum_s64_m1, svint64_t, int32_t, ++ z0 = svldff1sw_vnum_s64 (p0, x0, -1), ++ z0 = svldff1sw_vnum_s64 (p0, x0, -1)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ldff1sw_vnum_s64_x1: ++** cnth (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ldff1sw z0\.d, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldff1sw_vnum_s64_x1, svint64_t, int32_t, ++ z0 = svldff1sw_vnum_s64 (p0, x0, x1), ++ z0 = svldff1sw_vnum_s64 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_u64.c +new file mode 100644 +index 000000000..6caf2f504 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_u64.c +@@ -0,0 +1,86 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1sw_u64_base: ++** ldff1sw z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sw_u64_base, svuint64_t, int32_t, ++ z0 = svldff1sw_u64 (p0, x0), ++ z0 = svldff1sw_u64 (p0, x0)) ++ ++/* ++** ldff1sw_u64_index: ++** ldff1sw z0\.d, p0/z, \[x0, x1, lsl 2\] ++** ret ++*/ ++TEST_LOAD (ldff1sw_u64_index, svuint64_t, int32_t, ++ z0 = svldff1sw_u64 (p0, x0 + x1), ++ z0 = svldff1sw_u64 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1sw_u64_1: ++** inch x0 ++** ldff1sw z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sw_u64_1, svuint64_t, int32_t, ++ z0 = svldff1sw_u64 (p0, x0 + svcntd ()), ++ z0 = svldff1sw_u64 (p0, x0 + svcntd ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1sw_u64_m1: ++** dech x0 ++** ldff1sw z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sw_u64_m1, svuint64_t, int32_t, ++ z0 = svldff1sw_u64 (p0, x0 - svcntd ()), ++ z0 = svldff1sw_u64 (p0, x0 - svcntd ())) ++ ++/* ++** ldff1sw_vnum_u64_0: ++** ldff1sw z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sw_vnum_u64_0, svuint64_t, int32_t, ++ z0 = svldff1sw_vnum_u64 (p0, x0, 0), ++ z0 = svldff1sw_vnum_u64 (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1sw_vnum_u64_1: ++** inch x0 ++** ldff1sw z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sw_vnum_u64_1, svuint64_t, int32_t, ++ z0 = svldff1sw_vnum_u64 (p0, x0, 1), ++ z0 = svldff1sw_vnum_u64 (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1sw_vnum_u64_m1: ++** dech x0 ++** ldff1sw z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1sw_vnum_u64_m1, svuint64_t, int32_t, ++ z0 = svldff1sw_vnum_u64 (p0, x0, -1), ++ z0 = svldff1sw_vnum_u64 (p0, x0, -1)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ldff1sw_vnum_u64_x1: ++** cnth (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ldff1sw z0\.d, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldff1sw_vnum_u64_x1, svuint64_t, int32_t, ++ z0 = svldff1sw_vnum_u64 (p0, x0, x1), ++ z0 = svldff1sw_vnum_u64 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_s32.c +new file mode 100644 +index 000000000..af0be08d2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_s32.c +@@ -0,0 +1,131 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1ub_gather_s32_tied1: ++** ldff1b z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1ub_gather_s32_tied1, svint32_t, svuint32_t, ++ z0_res = svldff1ub_gather_u32base_s32 (p0, z0), ++ z0_res = svldff1ub_gather_s32 (p0, z0)) ++ ++/* ++** ldff1ub_gather_s32_untied: ++** ldff1b z0\.s, p0/z, \[z1\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1ub_gather_s32_untied, svint32_t, svuint32_t, ++ z0_res = svldff1ub_gather_u32base_s32 (p0, z1), ++ z0_res = svldff1ub_gather_s32 (p0, z1)) ++ ++/* ++** ldff1ub_gather_x0_s32_offset: ++** ldff1b z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1ub_gather_x0_s32_offset, svint32_t, svuint32_t, ++ z0_res = svldff1ub_gather_u32base_offset_s32 (p0, z0, x0), ++ z0_res = svldff1ub_gather_offset_s32 (p0, z0, x0)) ++ ++/* ++** ldff1ub_gather_m1_s32_offset: ++** mov (x[0-9]+), #?-1 ++** ldff1b z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1ub_gather_m1_s32_offset, svint32_t, svuint32_t, ++ z0_res = svldff1ub_gather_u32base_offset_s32 (p0, z0, -1), ++ z0_res = svldff1ub_gather_offset_s32 (p0, z0, -1)) ++ ++/* ++** ldff1ub_gather_0_s32_offset: ++** ldff1b z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1ub_gather_0_s32_offset, svint32_t, svuint32_t, ++ z0_res = svldff1ub_gather_u32base_offset_s32 (p0, z0, 0), ++ z0_res = svldff1ub_gather_offset_s32 (p0, z0, 0)) ++ ++/* ++** ldff1ub_gather_5_s32_offset: ++** ldff1b z0\.s, p0/z, \[z0\.s, #5\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1ub_gather_5_s32_offset, svint32_t, svuint32_t, ++ z0_res = svldff1ub_gather_u32base_offset_s32 (p0, z0, 5), ++ z0_res = svldff1ub_gather_offset_s32 (p0, z0, 5)) ++ ++/* ++** ldff1ub_gather_31_s32_offset: ++** ldff1b z0\.s, p0/z, \[z0\.s, #31\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1ub_gather_31_s32_offset, svint32_t, svuint32_t, ++ z0_res = svldff1ub_gather_u32base_offset_s32 (p0, z0, 31), ++ z0_res = svldff1ub_gather_offset_s32 (p0, z0, 31)) ++ ++/* ++** ldff1ub_gather_32_s32_offset: ++** mov (x[0-9]+), #?32 ++** ldff1b z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1ub_gather_32_s32_offset, svint32_t, svuint32_t, ++ z0_res = svldff1ub_gather_u32base_offset_s32 (p0, z0, 32), ++ z0_res = svldff1ub_gather_offset_s32 (p0, z0, 32)) ++ ++/* ++** ldff1ub_gather_x0_s32_s32offset: ++** ldff1b z0\.s, p0/z, \[x0, z0\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1ub_gather_x0_s32_s32offset, svint32_t, uint8_t, svint32_t, ++ z0_res = svldff1ub_gather_s32offset_s32 (p0, x0, z0), ++ z0_res = svldff1ub_gather_offset_s32 (p0, x0, z0)) ++ ++/* ++** ldff1ub_gather_tied1_s32_s32offset: ++** ldff1b z0\.s, p0/z, \[x0, z0\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1ub_gather_tied1_s32_s32offset, svint32_t, uint8_t, svint32_t, ++ z0_res = svldff1ub_gather_s32offset_s32 (p0, x0, z0), ++ z0_res = svldff1ub_gather_offset_s32 (p0, x0, z0)) ++ ++/* ++** ldff1ub_gather_untied_s32_s32offset: ++** ldff1b z0\.s, p0/z, \[x0, z1\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1ub_gather_untied_s32_s32offset, svint32_t, uint8_t, svint32_t, ++ z0_res = svldff1ub_gather_s32offset_s32 (p0, x0, z1), ++ z0_res = svldff1ub_gather_offset_s32 (p0, x0, z1)) ++ ++/* ++** ldff1ub_gather_x0_s32_u32offset: ++** ldff1b z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1ub_gather_x0_s32_u32offset, svint32_t, uint8_t, svuint32_t, ++ z0_res = svldff1ub_gather_u32offset_s32 (p0, x0, z0), ++ z0_res = svldff1ub_gather_offset_s32 (p0, x0, z0)) ++ ++/* ++** ldff1ub_gather_tied1_s32_u32offset: ++** ldff1b z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1ub_gather_tied1_s32_u32offset, svint32_t, uint8_t, svuint32_t, ++ z0_res = svldff1ub_gather_u32offset_s32 (p0, x0, z0), ++ z0_res = svldff1ub_gather_offset_s32 (p0, x0, z0)) ++ ++/* ++** ldff1ub_gather_untied_s32_u32offset: ++** ldff1b z0\.s, p0/z, \[x0, z1\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1ub_gather_untied_s32_u32offset, svint32_t, uint8_t, svuint32_t, ++ z0_res = svldff1ub_gather_u32offset_s32 (p0, x0, z1), ++ z0_res = svldff1ub_gather_offset_s32 (p0, x0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_s64.c +new file mode 100644 +index 000000000..43124dd89 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_s64.c +@@ -0,0 +1,149 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1ub_gather_s64_tied1: ++** ldff1b z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1ub_gather_s64_tied1, svint64_t, svuint64_t, ++ z0_res = svldff1ub_gather_u64base_s64 (p0, z0), ++ z0_res = svldff1ub_gather_s64 (p0, z0)) ++ ++/* ++** ldff1ub_gather_s64_untied: ++** ldff1b z0\.d, p0/z, \[z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1ub_gather_s64_untied, svint64_t, svuint64_t, ++ z0_res = svldff1ub_gather_u64base_s64 (p0, z1), ++ z0_res = svldff1ub_gather_s64 (p0, z1)) ++ ++/* ++** ldff1ub_gather_x0_s64_offset: ++** ldff1b z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1ub_gather_x0_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1ub_gather_u64base_offset_s64 (p0, z0, x0), ++ z0_res = svldff1ub_gather_offset_s64 (p0, z0, x0)) ++ ++/* ++** ldff1ub_gather_m1_s64_offset: ++** mov (x[0-9]+), #?-1 ++** ldff1b z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1ub_gather_m1_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1ub_gather_u64base_offset_s64 (p0, z0, -1), ++ z0_res = svldff1ub_gather_offset_s64 (p0, z0, -1)) ++ ++/* ++** ldff1ub_gather_0_s64_offset: ++** ldff1b z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1ub_gather_0_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1ub_gather_u64base_offset_s64 (p0, z0, 0), ++ z0_res = svldff1ub_gather_offset_s64 (p0, z0, 0)) ++ ++/* ++** ldff1ub_gather_5_s64_offset: ++** ldff1b z0\.d, p0/z, \[z0\.d, #5\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1ub_gather_5_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1ub_gather_u64base_offset_s64 (p0, z0, 5), ++ z0_res = svldff1ub_gather_offset_s64 (p0, z0, 5)) ++ ++/* ++** ldff1ub_gather_31_s64_offset: ++** ldff1b z0\.d, p0/z, \[z0\.d, #31\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1ub_gather_31_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1ub_gather_u64base_offset_s64 (p0, z0, 31), ++ z0_res = svldff1ub_gather_offset_s64 (p0, z0, 31)) ++ ++/* ++** ldff1ub_gather_32_s64_offset: ++** mov (x[0-9]+), #?32 ++** ldff1b z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1ub_gather_32_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1ub_gather_u64base_offset_s64 (p0, z0, 32), ++ z0_res = svldff1ub_gather_offset_s64 (p0, z0, 32)) ++ ++/* ++** ldff1ub_gather_x0_s64_s64offset: ++** ldff1b z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1ub_gather_x0_s64_s64offset, svint64_t, uint8_t, svint64_t, ++ z0_res = svldff1ub_gather_s64offset_s64 (p0, x0, z0), ++ z0_res = svldff1ub_gather_offset_s64 (p0, x0, z0)) ++ ++/* ++** ldff1ub_gather_tied1_s64_s64offset: ++** ldff1b z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1ub_gather_tied1_s64_s64offset, svint64_t, uint8_t, svint64_t, ++ z0_res = svldff1ub_gather_s64offset_s64 (p0, x0, z0), ++ z0_res = svldff1ub_gather_offset_s64 (p0, x0, z0)) ++ ++/* ++** ldff1ub_gather_untied_s64_s64offset: ++** ldff1b z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1ub_gather_untied_s64_s64offset, svint64_t, uint8_t, svint64_t, ++ z0_res = svldff1ub_gather_s64offset_s64 (p0, x0, z1), ++ z0_res = svldff1ub_gather_offset_s64 (p0, x0, z1)) ++ ++/* ++** ldff1ub_gather_ext_s64_s64offset: ++** ldff1b z0\.d, p0/z, \[x0, z1\.d, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1ub_gather_ext_s64_s64offset, svint64_t, uint8_t, svint64_t, ++ z0_res = svldff1ub_gather_s64offset_s64 (p0, x0, svextw_s64_x (p0, z1)), ++ z0_res = svldff1ub_gather_offset_s64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ldff1ub_gather_x0_s64_u64offset: ++** ldff1b z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1ub_gather_x0_s64_u64offset, svint64_t, uint8_t, svuint64_t, ++ z0_res = svldff1ub_gather_u64offset_s64 (p0, x0, z0), ++ z0_res = svldff1ub_gather_offset_s64 (p0, x0, z0)) ++ ++/* ++** ldff1ub_gather_tied1_s64_u64offset: ++** ldff1b z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1ub_gather_tied1_s64_u64offset, svint64_t, uint8_t, svuint64_t, ++ z0_res = svldff1ub_gather_u64offset_s64 (p0, x0, z0), ++ z0_res = svldff1ub_gather_offset_s64 (p0, x0, z0)) ++ ++/* ++** ldff1ub_gather_untied_s64_u64offset: ++** ldff1b z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1ub_gather_untied_s64_u64offset, svint64_t, uint8_t, svuint64_t, ++ z0_res = svldff1ub_gather_u64offset_s64 (p0, x0, z1), ++ z0_res = svldff1ub_gather_offset_s64 (p0, x0, z1)) ++ ++/* ++** ldff1ub_gather_ext_s64_u64offset: ++** ldff1b z0\.d, p0/z, \[x0, z1\.d, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1ub_gather_ext_s64_u64offset, svint64_t, uint8_t, svuint64_t, ++ z0_res = svldff1ub_gather_u64offset_s64 (p0, x0, svextw_u64_x (p0, z1)), ++ z0_res = svldff1ub_gather_offset_s64 (p0, x0, svextw_x (p0, z1))) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_u32.c +new file mode 100644 +index 000000000..90c4e58a2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_u32.c +@@ -0,0 +1,131 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1ub_gather_u32_tied1: ++** ldff1b z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1ub_gather_u32_tied1, svuint32_t, svuint32_t, ++ z0_res = svldff1ub_gather_u32base_u32 (p0, z0), ++ z0_res = svldff1ub_gather_u32 (p0, z0)) ++ ++/* ++** ldff1ub_gather_u32_untied: ++** ldff1b z0\.s, p0/z, \[z1\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1ub_gather_u32_untied, svuint32_t, svuint32_t, ++ z0_res = svldff1ub_gather_u32base_u32 (p0, z1), ++ z0_res = svldff1ub_gather_u32 (p0, z1)) ++ ++/* ++** ldff1ub_gather_x0_u32_offset: ++** ldff1b z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1ub_gather_x0_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svldff1ub_gather_u32base_offset_u32 (p0, z0, x0), ++ z0_res = svldff1ub_gather_offset_u32 (p0, z0, x0)) ++ ++/* ++** ldff1ub_gather_m1_u32_offset: ++** mov (x[0-9]+), #?-1 ++** ldff1b z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1ub_gather_m1_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svldff1ub_gather_u32base_offset_u32 (p0, z0, -1), ++ z0_res = svldff1ub_gather_offset_u32 (p0, z0, -1)) ++ ++/* ++** ldff1ub_gather_0_u32_offset: ++** ldff1b z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1ub_gather_0_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svldff1ub_gather_u32base_offset_u32 (p0, z0, 0), ++ z0_res = svldff1ub_gather_offset_u32 (p0, z0, 0)) ++ ++/* ++** ldff1ub_gather_5_u32_offset: ++** ldff1b z0\.s, p0/z, \[z0\.s, #5\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1ub_gather_5_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svldff1ub_gather_u32base_offset_u32 (p0, z0, 5), ++ z0_res = svldff1ub_gather_offset_u32 (p0, z0, 5)) ++ ++/* ++** ldff1ub_gather_31_u32_offset: ++** ldff1b z0\.s, p0/z, \[z0\.s, #31\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1ub_gather_31_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svldff1ub_gather_u32base_offset_u32 (p0, z0, 31), ++ z0_res = svldff1ub_gather_offset_u32 (p0, z0, 31)) ++ ++/* ++** ldff1ub_gather_32_u32_offset: ++** mov (x[0-9]+), #?32 ++** ldff1b z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1ub_gather_32_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svldff1ub_gather_u32base_offset_u32 (p0, z0, 32), ++ z0_res = svldff1ub_gather_offset_u32 (p0, z0, 32)) ++ ++/* ++** ldff1ub_gather_x0_u32_s32offset: ++** ldff1b z0\.s, p0/z, \[x0, z0\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1ub_gather_x0_u32_s32offset, svuint32_t, uint8_t, svint32_t, ++ z0_res = svldff1ub_gather_s32offset_u32 (p0, x0, z0), ++ z0_res = svldff1ub_gather_offset_u32 (p0, x0, z0)) ++ ++/* ++** ldff1ub_gather_tied1_u32_s32offset: ++** ldff1b z0\.s, p0/z, \[x0, z0\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1ub_gather_tied1_u32_s32offset, svuint32_t, uint8_t, svint32_t, ++ z0_res = svldff1ub_gather_s32offset_u32 (p0, x0, z0), ++ z0_res = svldff1ub_gather_offset_u32 (p0, x0, z0)) ++ ++/* ++** ldff1ub_gather_untied_u32_s32offset: ++** ldff1b z0\.s, p0/z, \[x0, z1\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1ub_gather_untied_u32_s32offset, svuint32_t, uint8_t, svint32_t, ++ z0_res = svldff1ub_gather_s32offset_u32 (p0, x0, z1), ++ z0_res = svldff1ub_gather_offset_u32 (p0, x0, z1)) ++ ++/* ++** ldff1ub_gather_x0_u32_u32offset: ++** ldff1b z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1ub_gather_x0_u32_u32offset, svuint32_t, uint8_t, svuint32_t, ++ z0_res = svldff1ub_gather_u32offset_u32 (p0, x0, z0), ++ z0_res = svldff1ub_gather_offset_u32 (p0, x0, z0)) ++ ++/* ++** ldff1ub_gather_tied1_u32_u32offset: ++** ldff1b z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1ub_gather_tied1_u32_u32offset, svuint32_t, uint8_t, svuint32_t, ++ z0_res = svldff1ub_gather_u32offset_u32 (p0, x0, z0), ++ z0_res = svldff1ub_gather_offset_u32 (p0, x0, z0)) ++ ++/* ++** ldff1ub_gather_untied_u32_u32offset: ++** ldff1b z0\.s, p0/z, \[x0, z1\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1ub_gather_untied_u32_u32offset, svuint32_t, uint8_t, svuint32_t, ++ z0_res = svldff1ub_gather_u32offset_u32 (p0, x0, z1), ++ z0_res = svldff1ub_gather_offset_u32 (p0, x0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_u64.c +new file mode 100644 +index 000000000..302623a40 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_u64.c +@@ -0,0 +1,149 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1ub_gather_u64_tied1: ++** ldff1b z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1ub_gather_u64_tied1, svuint64_t, svuint64_t, ++ z0_res = svldff1ub_gather_u64base_u64 (p0, z0), ++ z0_res = svldff1ub_gather_u64 (p0, z0)) ++ ++/* ++** ldff1ub_gather_u64_untied: ++** ldff1b z0\.d, p0/z, \[z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1ub_gather_u64_untied, svuint64_t, svuint64_t, ++ z0_res = svldff1ub_gather_u64base_u64 (p0, z1), ++ z0_res = svldff1ub_gather_u64 (p0, z1)) ++ ++/* ++** ldff1ub_gather_x0_u64_offset: ++** ldff1b z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1ub_gather_x0_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1ub_gather_u64base_offset_u64 (p0, z0, x0), ++ z0_res = svldff1ub_gather_offset_u64 (p0, z0, x0)) ++ ++/* ++** ldff1ub_gather_m1_u64_offset: ++** mov (x[0-9]+), #?-1 ++** ldff1b z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1ub_gather_m1_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1ub_gather_u64base_offset_u64 (p0, z0, -1), ++ z0_res = svldff1ub_gather_offset_u64 (p0, z0, -1)) ++ ++/* ++** ldff1ub_gather_0_u64_offset: ++** ldff1b z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1ub_gather_0_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1ub_gather_u64base_offset_u64 (p0, z0, 0), ++ z0_res = svldff1ub_gather_offset_u64 (p0, z0, 0)) ++ ++/* ++** ldff1ub_gather_5_u64_offset: ++** ldff1b z0\.d, p0/z, \[z0\.d, #5\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1ub_gather_5_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1ub_gather_u64base_offset_u64 (p0, z0, 5), ++ z0_res = svldff1ub_gather_offset_u64 (p0, z0, 5)) ++ ++/* ++** ldff1ub_gather_31_u64_offset: ++** ldff1b z0\.d, p0/z, \[z0\.d, #31\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1ub_gather_31_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1ub_gather_u64base_offset_u64 (p0, z0, 31), ++ z0_res = svldff1ub_gather_offset_u64 (p0, z0, 31)) ++ ++/* ++** ldff1ub_gather_32_u64_offset: ++** mov (x[0-9]+), #?32 ++** ldff1b z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1ub_gather_32_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1ub_gather_u64base_offset_u64 (p0, z0, 32), ++ z0_res = svldff1ub_gather_offset_u64 (p0, z0, 32)) ++ ++/* ++** ldff1ub_gather_x0_u64_s64offset: ++** ldff1b z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1ub_gather_x0_u64_s64offset, svuint64_t, uint8_t, svint64_t, ++ z0_res = svldff1ub_gather_s64offset_u64 (p0, x0, z0), ++ z0_res = svldff1ub_gather_offset_u64 (p0, x0, z0)) ++ ++/* ++** ldff1ub_gather_tied1_u64_s64offset: ++** ldff1b z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1ub_gather_tied1_u64_s64offset, svuint64_t, uint8_t, svint64_t, ++ z0_res = svldff1ub_gather_s64offset_u64 (p0, x0, z0), ++ z0_res = svldff1ub_gather_offset_u64 (p0, x0, z0)) ++ ++/* ++** ldff1ub_gather_untied_u64_s64offset: ++** ldff1b z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1ub_gather_untied_u64_s64offset, svuint64_t, uint8_t, svint64_t, ++ z0_res = svldff1ub_gather_s64offset_u64 (p0, x0, z1), ++ z0_res = svldff1ub_gather_offset_u64 (p0, x0, z1)) ++ ++/* ++** ldff1ub_gather_ext_u64_s64offset: ++** ldff1b z0\.d, p0/z, \[x0, z1\.d, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1ub_gather_ext_u64_s64offset, svuint64_t, uint8_t, svint64_t, ++ z0_res = svldff1ub_gather_s64offset_u64 (p0, x0, svextw_s64_x (p0, z1)), ++ z0_res = svldff1ub_gather_offset_u64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ldff1ub_gather_x0_u64_u64offset: ++** ldff1b z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1ub_gather_x0_u64_u64offset, svuint64_t, uint8_t, svuint64_t, ++ z0_res = svldff1ub_gather_u64offset_u64 (p0, x0, z0), ++ z0_res = svldff1ub_gather_offset_u64 (p0, x0, z0)) ++ ++/* ++** ldff1ub_gather_tied1_u64_u64offset: ++** ldff1b z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1ub_gather_tied1_u64_u64offset, svuint64_t, uint8_t, svuint64_t, ++ z0_res = svldff1ub_gather_u64offset_u64 (p0, x0, z0), ++ z0_res = svldff1ub_gather_offset_u64 (p0, x0, z0)) ++ ++/* ++** ldff1ub_gather_untied_u64_u64offset: ++** ldff1b z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1ub_gather_untied_u64_u64offset, svuint64_t, uint8_t, svuint64_t, ++ z0_res = svldff1ub_gather_u64offset_u64 (p0, x0, z1), ++ z0_res = svldff1ub_gather_offset_u64 (p0, x0, z1)) ++ ++/* ++** ldff1ub_gather_ext_u64_u64offset: ++** ldff1b z0\.d, p0/z, \[x0, z1\.d, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1ub_gather_ext_u64_u64offset, svuint64_t, uint8_t, svuint64_t, ++ z0_res = svldff1ub_gather_u64offset_u64 (p0, x0, svextw_u64_x (p0, z1)), ++ z0_res = svldff1ub_gather_offset_u64 (p0, x0, svextw_x (p0, z1))) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_s16.c +new file mode 100644 +index 000000000..88ad2d1dc +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_s16.c +@@ -0,0 +1,90 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1ub_s16_base: ++** ldff1b z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1ub_s16_base, svint16_t, uint8_t, ++ z0 = svldff1ub_s16 (p0, x0), ++ z0 = svldff1ub_s16 (p0, x0)) ++ ++/* ++** ldff1ub_s16_index: ++** ldff1b z0\.h, p0/z, \[x0, x1\] ++** ret ++*/ ++TEST_LOAD (ldff1ub_s16_index, svint16_t, uint8_t, ++ z0 = svldff1ub_s16 (p0, x0 + x1), ++ z0 = svldff1ub_s16 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1ub_s16_1: ++** inch x0 ++** ldff1b z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1ub_s16_1, svint16_t, uint8_t, ++ z0 = svldff1ub_s16 (p0, x0 + svcnth ()), ++ z0 = svldff1ub_s16 (p0, x0 + svcnth ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1ub_s16_m1: ++** dech x0 ++** ldff1b z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1ub_s16_m1, svint16_t, uint8_t, ++ z0 = svldff1ub_s16 (p0, x0 - svcnth ()), ++ z0 = svldff1ub_s16 (p0, x0 - svcnth ())) ++ ++/* ++** ldff1ub_vnum_s16_0: ++** ldff1b z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1ub_vnum_s16_0, svint16_t, uint8_t, ++ z0 = svldff1ub_vnum_s16 (p0, x0, 0), ++ z0 = svldff1ub_vnum_s16 (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1ub_vnum_s16_1: ++** inch x0 ++** ldff1b z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1ub_vnum_s16_1, svint16_t, uint8_t, ++ z0 = svldff1ub_vnum_s16 (p0, x0, 1), ++ z0 = svldff1ub_vnum_s16 (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1ub_vnum_s16_m1: ++** dech x0 ++** ldff1b z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1ub_vnum_s16_m1, svint16_t, uint8_t, ++ z0 = svldff1ub_vnum_s16 (p0, x0, -1), ++ z0 = svldff1ub_vnum_s16 (p0, x0, -1)) ++ ++/* ++** ldff1ub_vnum_s16_x1: ++** cnth (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ldff1b z0\.h, p0/z, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** ldff1b z0\.h, p0/z, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_LOAD (ldff1ub_vnum_s16_x1, svint16_t, uint8_t, ++ z0 = svldff1ub_vnum_s16 (p0, x0, x1), ++ z0 = svldff1ub_vnum_s16 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_s32.c +new file mode 100644 +index 000000000..e8e06411f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_s32.c +@@ -0,0 +1,90 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1ub_s32_base: ++** ldff1b z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1ub_s32_base, svint32_t, uint8_t, ++ z0 = svldff1ub_s32 (p0, x0), ++ z0 = svldff1ub_s32 (p0, x0)) ++ ++/* ++** ldff1ub_s32_index: ++** ldff1b z0\.s, p0/z, \[x0, x1\] ++** ret ++*/ ++TEST_LOAD (ldff1ub_s32_index, svint32_t, uint8_t, ++ z0 = svldff1ub_s32 (p0, x0 + x1), ++ z0 = svldff1ub_s32 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1ub_s32_1: ++** incw x0 ++** ldff1b z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1ub_s32_1, svint32_t, uint8_t, ++ z0 = svldff1ub_s32 (p0, x0 + svcntw ()), ++ z0 = svldff1ub_s32 (p0, x0 + svcntw ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1ub_s32_m1: ++** decw x0 ++** ldff1b z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1ub_s32_m1, svint32_t, uint8_t, ++ z0 = svldff1ub_s32 (p0, x0 - svcntw ()), ++ z0 = svldff1ub_s32 (p0, x0 - svcntw ())) ++ ++/* ++** ldff1ub_vnum_s32_0: ++** ldff1b z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1ub_vnum_s32_0, svint32_t, uint8_t, ++ z0 = svldff1ub_vnum_s32 (p0, x0, 0), ++ z0 = svldff1ub_vnum_s32 (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1ub_vnum_s32_1: ++** incw x0 ++** ldff1b z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1ub_vnum_s32_1, svint32_t, uint8_t, ++ z0 = svldff1ub_vnum_s32 (p0, x0, 1), ++ z0 = svldff1ub_vnum_s32 (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1ub_vnum_s32_m1: ++** decw x0 ++** ldff1b z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1ub_vnum_s32_m1, svint32_t, uint8_t, ++ z0 = svldff1ub_vnum_s32 (p0, x0, -1), ++ z0 = svldff1ub_vnum_s32 (p0, x0, -1)) ++ ++/* ++** ldff1ub_vnum_s32_x1: ++** cntw (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ldff1b z0\.s, p0/z, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** ldff1b z0\.s, p0/z, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_LOAD (ldff1ub_vnum_s32_x1, svint32_t, uint8_t, ++ z0 = svldff1ub_vnum_s32 (p0, x0, x1), ++ z0 = svldff1ub_vnum_s32 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_s64.c +new file mode 100644 +index 000000000..21d02ddb7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_s64.c +@@ -0,0 +1,90 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1ub_s64_base: ++** ldff1b z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1ub_s64_base, svint64_t, uint8_t, ++ z0 = svldff1ub_s64 (p0, x0), ++ z0 = svldff1ub_s64 (p0, x0)) ++ ++/* ++** ldff1ub_s64_index: ++** ldff1b z0\.d, p0/z, \[x0, x1\] ++** ret ++*/ ++TEST_LOAD (ldff1ub_s64_index, svint64_t, uint8_t, ++ z0 = svldff1ub_s64 (p0, x0 + x1), ++ z0 = svldff1ub_s64 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1ub_s64_1: ++** incd x0 ++** ldff1b z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1ub_s64_1, svint64_t, uint8_t, ++ z0 = svldff1ub_s64 (p0, x0 + svcntd ()), ++ z0 = svldff1ub_s64 (p0, x0 + svcntd ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1ub_s64_m1: ++** decd x0 ++** ldff1b z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1ub_s64_m1, svint64_t, uint8_t, ++ z0 = svldff1ub_s64 (p0, x0 - svcntd ()), ++ z0 = svldff1ub_s64 (p0, x0 - svcntd ())) ++ ++/* ++** ldff1ub_vnum_s64_0: ++** ldff1b z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1ub_vnum_s64_0, svint64_t, uint8_t, ++ z0 = svldff1ub_vnum_s64 (p0, x0, 0), ++ z0 = svldff1ub_vnum_s64 (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1ub_vnum_s64_1: ++** incd x0 ++** ldff1b z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1ub_vnum_s64_1, svint64_t, uint8_t, ++ z0 = svldff1ub_vnum_s64 (p0, x0, 1), ++ z0 = svldff1ub_vnum_s64 (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1ub_vnum_s64_m1: ++** decd x0 ++** ldff1b z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1ub_vnum_s64_m1, svint64_t, uint8_t, ++ z0 = svldff1ub_vnum_s64 (p0, x0, -1), ++ z0 = svldff1ub_vnum_s64 (p0, x0, -1)) ++ ++/* ++** ldff1ub_vnum_s64_x1: ++** cntd (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ldff1b z0\.d, p0/z, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** ldff1b z0\.d, p0/z, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_LOAD (ldff1ub_vnum_s64_x1, svint64_t, uint8_t, ++ z0 = svldff1ub_vnum_s64 (p0, x0, x1), ++ z0 = svldff1ub_vnum_s64 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_u16.c +new file mode 100644 +index 000000000..904cb027e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_u16.c +@@ -0,0 +1,90 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1ub_u16_base: ++** ldff1b z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1ub_u16_base, svuint16_t, uint8_t, ++ z0 = svldff1ub_u16 (p0, x0), ++ z0 = svldff1ub_u16 (p0, x0)) ++ ++/* ++** ldff1ub_u16_index: ++** ldff1b z0\.h, p0/z, \[x0, x1\] ++** ret ++*/ ++TEST_LOAD (ldff1ub_u16_index, svuint16_t, uint8_t, ++ z0 = svldff1ub_u16 (p0, x0 + x1), ++ z0 = svldff1ub_u16 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1ub_u16_1: ++** inch x0 ++** ldff1b z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1ub_u16_1, svuint16_t, uint8_t, ++ z0 = svldff1ub_u16 (p0, x0 + svcnth ()), ++ z0 = svldff1ub_u16 (p0, x0 + svcnth ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1ub_u16_m1: ++** dech x0 ++** ldff1b z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1ub_u16_m1, svuint16_t, uint8_t, ++ z0 = svldff1ub_u16 (p0, x0 - svcnth ()), ++ z0 = svldff1ub_u16 (p0, x0 - svcnth ())) ++ ++/* ++** ldff1ub_vnum_u16_0: ++** ldff1b z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1ub_vnum_u16_0, svuint16_t, uint8_t, ++ z0 = svldff1ub_vnum_u16 (p0, x0, 0), ++ z0 = svldff1ub_vnum_u16 (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1ub_vnum_u16_1: ++** inch x0 ++** ldff1b z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1ub_vnum_u16_1, svuint16_t, uint8_t, ++ z0 = svldff1ub_vnum_u16 (p0, x0, 1), ++ z0 = svldff1ub_vnum_u16 (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1ub_vnum_u16_m1: ++** dech x0 ++** ldff1b z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1ub_vnum_u16_m1, svuint16_t, uint8_t, ++ z0 = svldff1ub_vnum_u16 (p0, x0, -1), ++ z0 = svldff1ub_vnum_u16 (p0, x0, -1)) ++ ++/* ++** ldff1ub_vnum_u16_x1: ++** cnth (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ldff1b z0\.h, p0/z, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** ldff1b z0\.h, p0/z, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_LOAD (ldff1ub_vnum_u16_x1, svuint16_t, uint8_t, ++ z0 = svldff1ub_vnum_u16 (p0, x0, x1), ++ z0 = svldff1ub_vnum_u16 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_u32.c +new file mode 100644 +index 000000000..a40012318 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_u32.c +@@ -0,0 +1,90 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1ub_u32_base: ++** ldff1b z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1ub_u32_base, svuint32_t, uint8_t, ++ z0 = svldff1ub_u32 (p0, x0), ++ z0 = svldff1ub_u32 (p0, x0)) ++ ++/* ++** ldff1ub_u32_index: ++** ldff1b z0\.s, p0/z, \[x0, x1\] ++** ret ++*/ ++TEST_LOAD (ldff1ub_u32_index, svuint32_t, uint8_t, ++ z0 = svldff1ub_u32 (p0, x0 + x1), ++ z0 = svldff1ub_u32 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1ub_u32_1: ++** incw x0 ++** ldff1b z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1ub_u32_1, svuint32_t, uint8_t, ++ z0 = svldff1ub_u32 (p0, x0 + svcntw ()), ++ z0 = svldff1ub_u32 (p0, x0 + svcntw ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1ub_u32_m1: ++** decw x0 ++** ldff1b z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1ub_u32_m1, svuint32_t, uint8_t, ++ z0 = svldff1ub_u32 (p0, x0 - svcntw ()), ++ z0 = svldff1ub_u32 (p0, x0 - svcntw ())) ++ ++/* ++** ldff1ub_vnum_u32_0: ++** ldff1b z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1ub_vnum_u32_0, svuint32_t, uint8_t, ++ z0 = svldff1ub_vnum_u32 (p0, x0, 0), ++ z0 = svldff1ub_vnum_u32 (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1ub_vnum_u32_1: ++** incw x0 ++** ldff1b z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1ub_vnum_u32_1, svuint32_t, uint8_t, ++ z0 = svldff1ub_vnum_u32 (p0, x0, 1), ++ z0 = svldff1ub_vnum_u32 (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1ub_vnum_u32_m1: ++** decw x0 ++** ldff1b z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1ub_vnum_u32_m1, svuint32_t, uint8_t, ++ z0 = svldff1ub_vnum_u32 (p0, x0, -1), ++ z0 = svldff1ub_vnum_u32 (p0, x0, -1)) ++ ++/* ++** ldff1ub_vnum_u32_x1: ++** cntw (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ldff1b z0\.s, p0/z, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** ldff1b z0\.s, p0/z, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_LOAD (ldff1ub_vnum_u32_x1, svuint32_t, uint8_t, ++ z0 = svldff1ub_vnum_u32 (p0, x0, x1), ++ z0 = svldff1ub_vnum_u32 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_u64.c +new file mode 100644 +index 000000000..a9a98a683 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_u64.c +@@ -0,0 +1,90 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1ub_u64_base: ++** ldff1b z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1ub_u64_base, svuint64_t, uint8_t, ++ z0 = svldff1ub_u64 (p0, x0), ++ z0 = svldff1ub_u64 (p0, x0)) ++ ++/* ++** ldff1ub_u64_index: ++** ldff1b z0\.d, p0/z, \[x0, x1\] ++** ret ++*/ ++TEST_LOAD (ldff1ub_u64_index, svuint64_t, uint8_t, ++ z0 = svldff1ub_u64 (p0, x0 + x1), ++ z0 = svldff1ub_u64 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1ub_u64_1: ++** incd x0 ++** ldff1b z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1ub_u64_1, svuint64_t, uint8_t, ++ z0 = svldff1ub_u64 (p0, x0 + svcntd ()), ++ z0 = svldff1ub_u64 (p0, x0 + svcntd ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1ub_u64_m1: ++** decd x0 ++** ldff1b z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1ub_u64_m1, svuint64_t, uint8_t, ++ z0 = svldff1ub_u64 (p0, x0 - svcntd ()), ++ z0 = svldff1ub_u64 (p0, x0 - svcntd ())) ++ ++/* ++** ldff1ub_vnum_u64_0: ++** ldff1b z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1ub_vnum_u64_0, svuint64_t, uint8_t, ++ z0 = svldff1ub_vnum_u64 (p0, x0, 0), ++ z0 = svldff1ub_vnum_u64 (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1ub_vnum_u64_1: ++** incd x0 ++** ldff1b z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1ub_vnum_u64_1, svuint64_t, uint8_t, ++ z0 = svldff1ub_vnum_u64 (p0, x0, 1), ++ z0 = svldff1ub_vnum_u64 (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1ub_vnum_u64_m1: ++** decd x0 ++** ldff1b z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1ub_vnum_u64_m1, svuint64_t, uint8_t, ++ z0 = svldff1ub_vnum_u64 (p0, x0, -1), ++ z0 = svldff1ub_vnum_u64 (p0, x0, -1)) ++ ++/* ++** ldff1ub_vnum_u64_x1: ++** cntd (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ldff1b z0\.d, p0/z, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** ldff1b z0\.d, p0/z, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_LOAD (ldff1ub_vnum_u64_x1, svuint64_t, uint8_t, ++ z0 = svldff1ub_vnum_u64 (p0, x0, x1), ++ z0 = svldff1ub_vnum_u64 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_s32.c +new file mode 100644 +index 000000000..d02e44342 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_s32.c +@@ -0,0 +1,252 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1uh_gather_s32_tied1: ++** ldff1h z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_s32_tied1, svint32_t, svuint32_t, ++ z0_res = svldff1uh_gather_u32base_s32 (p0, z0), ++ z0_res = svldff1uh_gather_s32 (p0, z0)) ++ ++/* ++** ldff1uh_gather_s32_untied: ++** ldff1h z0\.s, p0/z, \[z1\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_s32_untied, svint32_t, svuint32_t, ++ z0_res = svldff1uh_gather_u32base_s32 (p0, z1), ++ z0_res = svldff1uh_gather_s32 (p0, z1)) ++ ++/* ++** ldff1uh_gather_x0_s32_offset: ++** ldff1h z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_x0_s32_offset, svint32_t, svuint32_t, ++ z0_res = svldff1uh_gather_u32base_offset_s32 (p0, z0, x0), ++ z0_res = svldff1uh_gather_offset_s32 (p0, z0, x0)) ++ ++/* ++** ldff1uh_gather_m2_s32_offset: ++** mov (x[0-9]+), #?-2 ++** ldff1h z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_m2_s32_offset, svint32_t, svuint32_t, ++ z0_res = svldff1uh_gather_u32base_offset_s32 (p0, z0, -2), ++ z0_res = svldff1uh_gather_offset_s32 (p0, z0, -2)) ++ ++/* ++** ldff1uh_gather_0_s32_offset: ++** ldff1h z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_0_s32_offset, svint32_t, svuint32_t, ++ z0_res = svldff1uh_gather_u32base_offset_s32 (p0, z0, 0), ++ z0_res = svldff1uh_gather_offset_s32 (p0, z0, 0)) ++ ++/* ++** ldff1uh_gather_5_s32_offset: ++** mov (x[0-9]+), #?5 ++** ldff1h z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_5_s32_offset, svint32_t, svuint32_t, ++ z0_res = svldff1uh_gather_u32base_offset_s32 (p0, z0, 5), ++ z0_res = svldff1uh_gather_offset_s32 (p0, z0, 5)) ++ ++/* ++** ldff1uh_gather_6_s32_offset: ++** ldff1h z0\.s, p0/z, \[z0\.s, #6\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_6_s32_offset, svint32_t, svuint32_t, ++ z0_res = svldff1uh_gather_u32base_offset_s32 (p0, z0, 6), ++ z0_res = svldff1uh_gather_offset_s32 (p0, z0, 6)) ++ ++/* ++** ldff1uh_gather_62_s32_offset: ++** ldff1h z0\.s, p0/z, \[z0\.s, #62\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_62_s32_offset, svint32_t, svuint32_t, ++ z0_res = svldff1uh_gather_u32base_offset_s32 (p0, z0, 62), ++ z0_res = svldff1uh_gather_offset_s32 (p0, z0, 62)) ++ ++/* ++** ldff1uh_gather_64_s32_offset: ++** mov (x[0-9]+), #?64 ++** ldff1h z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_64_s32_offset, svint32_t, svuint32_t, ++ z0_res = svldff1uh_gather_u32base_offset_s32 (p0, z0, 64), ++ z0_res = svldff1uh_gather_offset_s32 (p0, z0, 64)) ++ ++/* ++** ldff1uh_gather_x0_s32_index: ++** lsl (x[0-9]+), x0, #?1 ++** ldff1h z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_x0_s32_index, svint32_t, svuint32_t, ++ z0_res = svldff1uh_gather_u32base_index_s32 (p0, z0, x0), ++ z0_res = svldff1uh_gather_index_s32 (p0, z0, x0)) ++ ++/* ++** ldff1uh_gather_m1_s32_index: ++** mov (x[0-9]+), #?-2 ++** ldff1h z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_m1_s32_index, svint32_t, svuint32_t, ++ z0_res = svldff1uh_gather_u32base_index_s32 (p0, z0, -1), ++ z0_res = svldff1uh_gather_index_s32 (p0, z0, -1)) ++ ++/* ++** ldff1uh_gather_0_s32_index: ++** ldff1h z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_0_s32_index, svint32_t, svuint32_t, ++ z0_res = svldff1uh_gather_u32base_index_s32 (p0, z0, 0), ++ z0_res = svldff1uh_gather_index_s32 (p0, z0, 0)) ++ ++/* ++** ldff1uh_gather_5_s32_index: ++** ldff1h z0\.s, p0/z, \[z0\.s, #10\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_5_s32_index, svint32_t, svuint32_t, ++ z0_res = svldff1uh_gather_u32base_index_s32 (p0, z0, 5), ++ z0_res = svldff1uh_gather_index_s32 (p0, z0, 5)) ++ ++/* ++** ldff1uh_gather_31_s32_index: ++** ldff1h z0\.s, p0/z, \[z0\.s, #62\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_31_s32_index, svint32_t, svuint32_t, ++ z0_res = svldff1uh_gather_u32base_index_s32 (p0, z0, 31), ++ z0_res = svldff1uh_gather_index_s32 (p0, z0, 31)) ++ ++/* ++** ldff1uh_gather_32_s32_index: ++** mov (x[0-9]+), #?64 ++** ldff1h z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_32_s32_index, svint32_t, svuint32_t, ++ z0_res = svldff1uh_gather_u32base_index_s32 (p0, z0, 32), ++ z0_res = svldff1uh_gather_index_s32 (p0, z0, 32)) ++ ++/* ++** ldff1uh_gather_x0_s32_s32offset: ++** ldff1h z0\.s, p0/z, \[x0, z0\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_x0_s32_s32offset, svint32_t, uint16_t, svint32_t, ++ z0_res = svldff1uh_gather_s32offset_s32 (p0, x0, z0), ++ z0_res = svldff1uh_gather_offset_s32 (p0, x0, z0)) ++ ++/* ++** ldff1uh_gather_tied1_s32_s32offset: ++** ldff1h z0\.s, p0/z, \[x0, z0\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_tied1_s32_s32offset, svint32_t, uint16_t, svint32_t, ++ z0_res = svldff1uh_gather_s32offset_s32 (p0, x0, z0), ++ z0_res = svldff1uh_gather_offset_s32 (p0, x0, z0)) ++ ++/* ++** ldff1uh_gather_untied_s32_s32offset: ++** ldff1h z0\.s, p0/z, \[x0, z1\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_untied_s32_s32offset, svint32_t, uint16_t, svint32_t, ++ z0_res = svldff1uh_gather_s32offset_s32 (p0, x0, z1), ++ z0_res = svldff1uh_gather_offset_s32 (p0, x0, z1)) ++ ++/* ++** ldff1uh_gather_x0_s32_u32offset: ++** ldff1h z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_x0_s32_u32offset, svint32_t, uint16_t, svuint32_t, ++ z0_res = svldff1uh_gather_u32offset_s32 (p0, x0, z0), ++ z0_res = svldff1uh_gather_offset_s32 (p0, x0, z0)) ++ ++/* ++** ldff1uh_gather_tied1_s32_u32offset: ++** ldff1h z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_tied1_s32_u32offset, svint32_t, uint16_t, svuint32_t, ++ z0_res = svldff1uh_gather_u32offset_s32 (p0, x0, z0), ++ z0_res = svldff1uh_gather_offset_s32 (p0, x0, z0)) ++ ++/* ++** ldff1uh_gather_untied_s32_u32offset: ++** ldff1h z0\.s, p0/z, \[x0, z1\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_untied_s32_u32offset, svint32_t, uint16_t, svuint32_t, ++ z0_res = svldff1uh_gather_u32offset_s32 (p0, x0, z1), ++ z0_res = svldff1uh_gather_offset_s32 (p0, x0, z1)) ++ ++/* ++** ldff1uh_gather_x0_s32_s32index: ++** ldff1h z0\.s, p0/z, \[x0, z0\.s, sxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_x0_s32_s32index, svint32_t, uint16_t, svint32_t, ++ z0_res = svldff1uh_gather_s32index_s32 (p0, x0, z0), ++ z0_res = svldff1uh_gather_index_s32 (p0, x0, z0)) ++ ++/* ++** ldff1uh_gather_tied1_s32_s32index: ++** ldff1h z0\.s, p0/z, \[x0, z0\.s, sxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_tied1_s32_s32index, svint32_t, uint16_t, svint32_t, ++ z0_res = svldff1uh_gather_s32index_s32 (p0, x0, z0), ++ z0_res = svldff1uh_gather_index_s32 (p0, x0, z0)) ++ ++/* ++** ldff1uh_gather_untied_s32_s32index: ++** ldff1h z0\.s, p0/z, \[x0, z1\.s, sxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_untied_s32_s32index, svint32_t, uint16_t, svint32_t, ++ z0_res = svldff1uh_gather_s32index_s32 (p0, x0, z1), ++ z0_res = svldff1uh_gather_index_s32 (p0, x0, z1)) ++ ++/* ++** ldff1uh_gather_x0_s32_u32index: ++** ldff1h z0\.s, p0/z, \[x0, z0\.s, uxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_x0_s32_u32index, svint32_t, uint16_t, svuint32_t, ++ z0_res = svldff1uh_gather_u32index_s32 (p0, x0, z0), ++ z0_res = svldff1uh_gather_index_s32 (p0, x0, z0)) ++ ++/* ++** ldff1uh_gather_tied1_s32_u32index: ++** ldff1h z0\.s, p0/z, \[x0, z0\.s, uxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_tied1_s32_u32index, svint32_t, uint16_t, svuint32_t, ++ z0_res = svldff1uh_gather_u32index_s32 (p0, x0, z0), ++ z0_res = svldff1uh_gather_index_s32 (p0, x0, z0)) ++ ++/* ++** ldff1uh_gather_untied_s32_u32index: ++** ldff1h z0\.s, p0/z, \[x0, z1\.s, uxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_untied_s32_u32index, svint32_t, uint16_t, svuint32_t, ++ z0_res = svldff1uh_gather_u32index_s32 (p0, x0, z1), ++ z0_res = svldff1uh_gather_index_s32 (p0, x0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_s64.c +new file mode 100644 +index 000000000..663a73d27 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_s64.c +@@ -0,0 +1,288 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1uh_gather_s64_tied1: ++** ldff1h z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_s64_tied1, svint64_t, svuint64_t, ++ z0_res = svldff1uh_gather_u64base_s64 (p0, z0), ++ z0_res = svldff1uh_gather_s64 (p0, z0)) ++ ++/* ++** ldff1uh_gather_s64_untied: ++** ldff1h z0\.d, p0/z, \[z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_s64_untied, svint64_t, svuint64_t, ++ z0_res = svldff1uh_gather_u64base_s64 (p0, z1), ++ z0_res = svldff1uh_gather_s64 (p0, z1)) ++ ++/* ++** ldff1uh_gather_x0_s64_offset: ++** ldff1h z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_x0_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1uh_gather_u64base_offset_s64 (p0, z0, x0), ++ z0_res = svldff1uh_gather_offset_s64 (p0, z0, x0)) ++ ++/* ++** ldff1uh_gather_m2_s64_offset: ++** mov (x[0-9]+), #?-2 ++** ldff1h z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_m2_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1uh_gather_u64base_offset_s64 (p0, z0, -2), ++ z0_res = svldff1uh_gather_offset_s64 (p0, z0, -2)) ++ ++/* ++** ldff1uh_gather_0_s64_offset: ++** ldff1h z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_0_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1uh_gather_u64base_offset_s64 (p0, z0, 0), ++ z0_res = svldff1uh_gather_offset_s64 (p0, z0, 0)) ++ ++/* ++** ldff1uh_gather_5_s64_offset: ++** mov (x[0-9]+), #?5 ++** ldff1h z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_5_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1uh_gather_u64base_offset_s64 (p0, z0, 5), ++ z0_res = svldff1uh_gather_offset_s64 (p0, z0, 5)) ++ ++/* ++** ldff1uh_gather_6_s64_offset: ++** ldff1h z0\.d, p0/z, \[z0\.d, #6\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_6_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1uh_gather_u64base_offset_s64 (p0, z0, 6), ++ z0_res = svldff1uh_gather_offset_s64 (p0, z0, 6)) ++ ++/* ++** ldff1uh_gather_62_s64_offset: ++** ldff1h z0\.d, p0/z, \[z0\.d, #62\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_62_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1uh_gather_u64base_offset_s64 (p0, z0, 62), ++ z0_res = svldff1uh_gather_offset_s64 (p0, z0, 62)) ++ ++/* ++** ldff1uh_gather_64_s64_offset: ++** mov (x[0-9]+), #?64 ++** ldff1h z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_64_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1uh_gather_u64base_offset_s64 (p0, z0, 64), ++ z0_res = svldff1uh_gather_offset_s64 (p0, z0, 64)) ++ ++/* ++** ldff1uh_gather_x0_s64_index: ++** lsl (x[0-9]+), x0, #?1 ++** ldff1h z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_x0_s64_index, svint64_t, svuint64_t, ++ z0_res = svldff1uh_gather_u64base_index_s64 (p0, z0, x0), ++ z0_res = svldff1uh_gather_index_s64 (p0, z0, x0)) ++ ++/* ++** ldff1uh_gather_m1_s64_index: ++** mov (x[0-9]+), #?-2 ++** ldff1h z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_m1_s64_index, svint64_t, svuint64_t, ++ z0_res = svldff1uh_gather_u64base_index_s64 (p0, z0, -1), ++ z0_res = svldff1uh_gather_index_s64 (p0, z0, -1)) ++ ++/* ++** ldff1uh_gather_0_s64_index: ++** ldff1h z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_0_s64_index, svint64_t, svuint64_t, ++ z0_res = svldff1uh_gather_u64base_index_s64 (p0, z0, 0), ++ z0_res = svldff1uh_gather_index_s64 (p0, z0, 0)) ++ ++/* ++** ldff1uh_gather_5_s64_index: ++** ldff1h z0\.d, p0/z, \[z0\.d, #10\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_5_s64_index, svint64_t, svuint64_t, ++ z0_res = svldff1uh_gather_u64base_index_s64 (p0, z0, 5), ++ z0_res = svldff1uh_gather_index_s64 (p0, z0, 5)) ++ ++/* ++** ldff1uh_gather_31_s64_index: ++** ldff1h z0\.d, p0/z, \[z0\.d, #62\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_31_s64_index, svint64_t, svuint64_t, ++ z0_res = svldff1uh_gather_u64base_index_s64 (p0, z0, 31), ++ z0_res = svldff1uh_gather_index_s64 (p0, z0, 31)) ++ ++/* ++** ldff1uh_gather_32_s64_index: ++** mov (x[0-9]+), #?64 ++** ldff1h z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_32_s64_index, svint64_t, svuint64_t, ++ z0_res = svldff1uh_gather_u64base_index_s64 (p0, z0, 32), ++ z0_res = svldff1uh_gather_index_s64 (p0, z0, 32)) ++ ++/* ++** ldff1uh_gather_x0_s64_s64offset: ++** ldff1h z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_x0_s64_s64offset, svint64_t, uint16_t, svint64_t, ++ z0_res = svldff1uh_gather_s64offset_s64 (p0, x0, z0), ++ z0_res = svldff1uh_gather_offset_s64 (p0, x0, z0)) ++ ++/* ++** ldff1uh_gather_tied1_s64_s64offset: ++** ldff1h z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_tied1_s64_s64offset, svint64_t, uint16_t, svint64_t, ++ z0_res = svldff1uh_gather_s64offset_s64 (p0, x0, z0), ++ z0_res = svldff1uh_gather_offset_s64 (p0, x0, z0)) ++ ++/* ++** ldff1uh_gather_untied_s64_s64offset: ++** ldff1h z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_untied_s64_s64offset, svint64_t, uint16_t, svint64_t, ++ z0_res = svldff1uh_gather_s64offset_s64 (p0, x0, z1), ++ z0_res = svldff1uh_gather_offset_s64 (p0, x0, z1)) ++ ++/* ++** ldff1uh_gather_ext_s64_s64offset: ++** ldff1h z0\.d, p0/z, \[x0, z1\.d, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_ext_s64_s64offset, svint64_t, uint16_t, svint64_t, ++ z0_res = svldff1uh_gather_s64offset_s64 (p0, x0, svextw_s64_x (p0, z1)), ++ z0_res = svldff1uh_gather_offset_s64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ldff1uh_gather_x0_s64_u64offset: ++** ldff1h z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_x0_s64_u64offset, svint64_t, uint16_t, svuint64_t, ++ z0_res = svldff1uh_gather_u64offset_s64 (p0, x0, z0), ++ z0_res = svldff1uh_gather_offset_s64 (p0, x0, z0)) ++ ++/* ++** ldff1uh_gather_tied1_s64_u64offset: ++** ldff1h z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_tied1_s64_u64offset, svint64_t, uint16_t, svuint64_t, ++ z0_res = svldff1uh_gather_u64offset_s64 (p0, x0, z0), ++ z0_res = svldff1uh_gather_offset_s64 (p0, x0, z0)) ++ ++/* ++** ldff1uh_gather_untied_s64_u64offset: ++** ldff1h z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_untied_s64_u64offset, svint64_t, uint16_t, svuint64_t, ++ z0_res = svldff1uh_gather_u64offset_s64 (p0, x0, z1), ++ z0_res = svldff1uh_gather_offset_s64 (p0, x0, z1)) ++ ++/* ++** ldff1uh_gather_ext_s64_u64offset: ++** ldff1h z0\.d, p0/z, \[x0, z1\.d, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_ext_s64_u64offset, svint64_t, uint16_t, svuint64_t, ++ z0_res = svldff1uh_gather_u64offset_s64 (p0, x0, svextw_u64_x (p0, z1)), ++ z0_res = svldff1uh_gather_offset_s64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ldff1uh_gather_x0_s64_s64index: ++** ldff1h z0\.d, p0/z, \[x0, z0\.d, lsl 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_x0_s64_s64index, svint64_t, uint16_t, svint64_t, ++ z0_res = svldff1uh_gather_s64index_s64 (p0, x0, z0), ++ z0_res = svldff1uh_gather_index_s64 (p0, x0, z0)) ++ ++/* ++** ldff1uh_gather_tied1_s64_s64index: ++** ldff1h z0\.d, p0/z, \[x0, z0\.d, lsl 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_tied1_s64_s64index, svint64_t, uint16_t, svint64_t, ++ z0_res = svldff1uh_gather_s64index_s64 (p0, x0, z0), ++ z0_res = svldff1uh_gather_index_s64 (p0, x0, z0)) ++ ++/* ++** ldff1uh_gather_untied_s64_s64index: ++** ldff1h z0\.d, p0/z, \[x0, z1\.d, lsl 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_untied_s64_s64index, svint64_t, uint16_t, svint64_t, ++ z0_res = svldff1uh_gather_s64index_s64 (p0, x0, z1), ++ z0_res = svldff1uh_gather_index_s64 (p0, x0, z1)) ++ ++/* ++** ldff1uh_gather_ext_s64_s64index: ++** ldff1h z0\.d, p0/z, \[x0, z1\.d, sxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_ext_s64_s64index, svint64_t, uint16_t, svint64_t, ++ z0_res = svldff1uh_gather_s64index_s64 (p0, x0, svextw_s64_x (p0, z1)), ++ z0_res = svldff1uh_gather_index_s64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ldff1uh_gather_x0_s64_u64index: ++** ldff1h z0\.d, p0/z, \[x0, z0\.d, lsl 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_x0_s64_u64index, svint64_t, uint16_t, svuint64_t, ++ z0_res = svldff1uh_gather_u64index_s64 (p0, x0, z0), ++ z0_res = svldff1uh_gather_index_s64 (p0, x0, z0)) ++ ++/* ++** ldff1uh_gather_tied1_s64_u64index: ++** ldff1h z0\.d, p0/z, \[x0, z0\.d, lsl 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_tied1_s64_u64index, svint64_t, uint16_t, svuint64_t, ++ z0_res = svldff1uh_gather_u64index_s64 (p0, x0, z0), ++ z0_res = svldff1uh_gather_index_s64 (p0, x0, z0)) ++ ++/* ++** ldff1uh_gather_untied_s64_u64index: ++** ldff1h z0\.d, p0/z, \[x0, z1\.d, lsl 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_untied_s64_u64index, svint64_t, uint16_t, svuint64_t, ++ z0_res = svldff1uh_gather_u64index_s64 (p0, x0, z1), ++ z0_res = svldff1uh_gather_index_s64 (p0, x0, z1)) ++ ++/* ++** ldff1uh_gather_ext_s64_u64index: ++** ldff1h z0\.d, p0/z, \[x0, z1\.d, uxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_ext_s64_u64index, svint64_t, uint16_t, svuint64_t, ++ z0_res = svldff1uh_gather_u64index_s64 (p0, x0, svextw_u64_x (p0, z1)), ++ z0_res = svldff1uh_gather_index_s64 (p0, x0, svextw_x (p0, z1))) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_u32.c +new file mode 100644 +index 000000000..5e0ef067f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_u32.c +@@ -0,0 +1,252 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1uh_gather_u32_tied1: ++** ldff1h z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_u32_tied1, svuint32_t, svuint32_t, ++ z0_res = svldff1uh_gather_u32base_u32 (p0, z0), ++ z0_res = svldff1uh_gather_u32 (p0, z0)) ++ ++/* ++** ldff1uh_gather_u32_untied: ++** ldff1h z0\.s, p0/z, \[z1\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_u32_untied, svuint32_t, svuint32_t, ++ z0_res = svldff1uh_gather_u32base_u32 (p0, z1), ++ z0_res = svldff1uh_gather_u32 (p0, z1)) ++ ++/* ++** ldff1uh_gather_x0_u32_offset: ++** ldff1h z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_x0_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svldff1uh_gather_u32base_offset_u32 (p0, z0, x0), ++ z0_res = svldff1uh_gather_offset_u32 (p0, z0, x0)) ++ ++/* ++** ldff1uh_gather_m2_u32_offset: ++** mov (x[0-9]+), #?-2 ++** ldff1h z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_m2_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svldff1uh_gather_u32base_offset_u32 (p0, z0, -2), ++ z0_res = svldff1uh_gather_offset_u32 (p0, z0, -2)) ++ ++/* ++** ldff1uh_gather_0_u32_offset: ++** ldff1h z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_0_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svldff1uh_gather_u32base_offset_u32 (p0, z0, 0), ++ z0_res = svldff1uh_gather_offset_u32 (p0, z0, 0)) ++ ++/* ++** ldff1uh_gather_5_u32_offset: ++** mov (x[0-9]+), #?5 ++** ldff1h z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_5_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svldff1uh_gather_u32base_offset_u32 (p0, z0, 5), ++ z0_res = svldff1uh_gather_offset_u32 (p0, z0, 5)) ++ ++/* ++** ldff1uh_gather_6_u32_offset: ++** ldff1h z0\.s, p0/z, \[z0\.s, #6\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_6_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svldff1uh_gather_u32base_offset_u32 (p0, z0, 6), ++ z0_res = svldff1uh_gather_offset_u32 (p0, z0, 6)) ++ ++/* ++** ldff1uh_gather_62_u32_offset: ++** ldff1h z0\.s, p0/z, \[z0\.s, #62\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_62_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svldff1uh_gather_u32base_offset_u32 (p0, z0, 62), ++ z0_res = svldff1uh_gather_offset_u32 (p0, z0, 62)) ++ ++/* ++** ldff1uh_gather_64_u32_offset: ++** mov (x[0-9]+), #?64 ++** ldff1h z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_64_u32_offset, svuint32_t, svuint32_t, ++ z0_res = svldff1uh_gather_u32base_offset_u32 (p0, z0, 64), ++ z0_res = svldff1uh_gather_offset_u32 (p0, z0, 64)) ++ ++/* ++** ldff1uh_gather_x0_u32_index: ++** lsl (x[0-9]+), x0, #?1 ++** ldff1h z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_x0_u32_index, svuint32_t, svuint32_t, ++ z0_res = svldff1uh_gather_u32base_index_u32 (p0, z0, x0), ++ z0_res = svldff1uh_gather_index_u32 (p0, z0, x0)) ++ ++/* ++** ldff1uh_gather_m1_u32_index: ++** mov (x[0-9]+), #?-2 ++** ldff1h z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_m1_u32_index, svuint32_t, svuint32_t, ++ z0_res = svldff1uh_gather_u32base_index_u32 (p0, z0, -1), ++ z0_res = svldff1uh_gather_index_u32 (p0, z0, -1)) ++ ++/* ++** ldff1uh_gather_0_u32_index: ++** ldff1h z0\.s, p0/z, \[z0\.s\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_0_u32_index, svuint32_t, svuint32_t, ++ z0_res = svldff1uh_gather_u32base_index_u32 (p0, z0, 0), ++ z0_res = svldff1uh_gather_index_u32 (p0, z0, 0)) ++ ++/* ++** ldff1uh_gather_5_u32_index: ++** ldff1h z0\.s, p0/z, \[z0\.s, #10\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_5_u32_index, svuint32_t, svuint32_t, ++ z0_res = svldff1uh_gather_u32base_index_u32 (p0, z0, 5), ++ z0_res = svldff1uh_gather_index_u32 (p0, z0, 5)) ++ ++/* ++** ldff1uh_gather_31_u32_index: ++** ldff1h z0\.s, p0/z, \[z0\.s, #62\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_31_u32_index, svuint32_t, svuint32_t, ++ z0_res = svldff1uh_gather_u32base_index_u32 (p0, z0, 31), ++ z0_res = svldff1uh_gather_index_u32 (p0, z0, 31)) ++ ++/* ++** ldff1uh_gather_32_u32_index: ++** mov (x[0-9]+), #?64 ++** ldff1h z0\.s, p0/z, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_32_u32_index, svuint32_t, svuint32_t, ++ z0_res = svldff1uh_gather_u32base_index_u32 (p0, z0, 32), ++ z0_res = svldff1uh_gather_index_u32 (p0, z0, 32)) ++ ++/* ++** ldff1uh_gather_x0_u32_s32offset: ++** ldff1h z0\.s, p0/z, \[x0, z0\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_x0_u32_s32offset, svuint32_t, uint16_t, svint32_t, ++ z0_res = svldff1uh_gather_s32offset_u32 (p0, x0, z0), ++ z0_res = svldff1uh_gather_offset_u32 (p0, x0, z0)) ++ ++/* ++** ldff1uh_gather_tied1_u32_s32offset: ++** ldff1h z0\.s, p0/z, \[x0, z0\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_tied1_u32_s32offset, svuint32_t, uint16_t, svint32_t, ++ z0_res = svldff1uh_gather_s32offset_u32 (p0, x0, z0), ++ z0_res = svldff1uh_gather_offset_u32 (p0, x0, z0)) ++ ++/* ++** ldff1uh_gather_untied_u32_s32offset: ++** ldff1h z0\.s, p0/z, \[x0, z1\.s, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_untied_u32_s32offset, svuint32_t, uint16_t, svint32_t, ++ z0_res = svldff1uh_gather_s32offset_u32 (p0, x0, z1), ++ z0_res = svldff1uh_gather_offset_u32 (p0, x0, z1)) ++ ++/* ++** ldff1uh_gather_x0_u32_u32offset: ++** ldff1h z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_x0_u32_u32offset, svuint32_t, uint16_t, svuint32_t, ++ z0_res = svldff1uh_gather_u32offset_u32 (p0, x0, z0), ++ z0_res = svldff1uh_gather_offset_u32 (p0, x0, z0)) ++ ++/* ++** ldff1uh_gather_tied1_u32_u32offset: ++** ldff1h z0\.s, p0/z, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_tied1_u32_u32offset, svuint32_t, uint16_t, svuint32_t, ++ z0_res = svldff1uh_gather_u32offset_u32 (p0, x0, z0), ++ z0_res = svldff1uh_gather_offset_u32 (p0, x0, z0)) ++ ++/* ++** ldff1uh_gather_untied_u32_u32offset: ++** ldff1h z0\.s, p0/z, \[x0, z1\.s, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_untied_u32_u32offset, svuint32_t, uint16_t, svuint32_t, ++ z0_res = svldff1uh_gather_u32offset_u32 (p0, x0, z1), ++ z0_res = svldff1uh_gather_offset_u32 (p0, x0, z1)) ++ ++/* ++** ldff1uh_gather_x0_u32_s32index: ++** ldff1h z0\.s, p0/z, \[x0, z0\.s, sxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_x0_u32_s32index, svuint32_t, uint16_t, svint32_t, ++ z0_res = svldff1uh_gather_s32index_u32 (p0, x0, z0), ++ z0_res = svldff1uh_gather_index_u32 (p0, x0, z0)) ++ ++/* ++** ldff1uh_gather_tied1_u32_s32index: ++** ldff1h z0\.s, p0/z, \[x0, z0\.s, sxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_tied1_u32_s32index, svuint32_t, uint16_t, svint32_t, ++ z0_res = svldff1uh_gather_s32index_u32 (p0, x0, z0), ++ z0_res = svldff1uh_gather_index_u32 (p0, x0, z0)) ++ ++/* ++** ldff1uh_gather_untied_u32_s32index: ++** ldff1h z0\.s, p0/z, \[x0, z1\.s, sxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_untied_u32_s32index, svuint32_t, uint16_t, svint32_t, ++ z0_res = svldff1uh_gather_s32index_u32 (p0, x0, z1), ++ z0_res = svldff1uh_gather_index_u32 (p0, x0, z1)) ++ ++/* ++** ldff1uh_gather_x0_u32_u32index: ++** ldff1h z0\.s, p0/z, \[x0, z0\.s, uxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_x0_u32_u32index, svuint32_t, uint16_t, svuint32_t, ++ z0_res = svldff1uh_gather_u32index_u32 (p0, x0, z0), ++ z0_res = svldff1uh_gather_index_u32 (p0, x0, z0)) ++ ++/* ++** ldff1uh_gather_tied1_u32_u32index: ++** ldff1h z0\.s, p0/z, \[x0, z0\.s, uxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_tied1_u32_u32index, svuint32_t, uint16_t, svuint32_t, ++ z0_res = svldff1uh_gather_u32index_u32 (p0, x0, z0), ++ z0_res = svldff1uh_gather_index_u32 (p0, x0, z0)) ++ ++/* ++** ldff1uh_gather_untied_u32_u32index: ++** ldff1h z0\.s, p0/z, \[x0, z1\.s, uxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_untied_u32_u32index, svuint32_t, uint16_t, svuint32_t, ++ z0_res = svldff1uh_gather_u32index_u32 (p0, x0, z1), ++ z0_res = svldff1uh_gather_index_u32 (p0, x0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_u64.c +new file mode 100644 +index 000000000..1cfae1b95 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_u64.c +@@ -0,0 +1,288 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1uh_gather_u64_tied1: ++** ldff1h z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_u64_tied1, svuint64_t, svuint64_t, ++ z0_res = svldff1uh_gather_u64base_u64 (p0, z0), ++ z0_res = svldff1uh_gather_u64 (p0, z0)) ++ ++/* ++** ldff1uh_gather_u64_untied: ++** ldff1h z0\.d, p0/z, \[z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_u64_untied, svuint64_t, svuint64_t, ++ z0_res = svldff1uh_gather_u64base_u64 (p0, z1), ++ z0_res = svldff1uh_gather_u64 (p0, z1)) ++ ++/* ++** ldff1uh_gather_x0_u64_offset: ++** ldff1h z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_x0_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1uh_gather_u64base_offset_u64 (p0, z0, x0), ++ z0_res = svldff1uh_gather_offset_u64 (p0, z0, x0)) ++ ++/* ++** ldff1uh_gather_m2_u64_offset: ++** mov (x[0-9]+), #?-2 ++** ldff1h z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_m2_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1uh_gather_u64base_offset_u64 (p0, z0, -2), ++ z0_res = svldff1uh_gather_offset_u64 (p0, z0, -2)) ++ ++/* ++** ldff1uh_gather_0_u64_offset: ++** ldff1h z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_0_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1uh_gather_u64base_offset_u64 (p0, z0, 0), ++ z0_res = svldff1uh_gather_offset_u64 (p0, z0, 0)) ++ ++/* ++** ldff1uh_gather_5_u64_offset: ++** mov (x[0-9]+), #?5 ++** ldff1h z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_5_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1uh_gather_u64base_offset_u64 (p0, z0, 5), ++ z0_res = svldff1uh_gather_offset_u64 (p0, z0, 5)) ++ ++/* ++** ldff1uh_gather_6_u64_offset: ++** ldff1h z0\.d, p0/z, \[z0\.d, #6\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_6_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1uh_gather_u64base_offset_u64 (p0, z0, 6), ++ z0_res = svldff1uh_gather_offset_u64 (p0, z0, 6)) ++ ++/* ++** ldff1uh_gather_62_u64_offset: ++** ldff1h z0\.d, p0/z, \[z0\.d, #62\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_62_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1uh_gather_u64base_offset_u64 (p0, z0, 62), ++ z0_res = svldff1uh_gather_offset_u64 (p0, z0, 62)) ++ ++/* ++** ldff1uh_gather_64_u64_offset: ++** mov (x[0-9]+), #?64 ++** ldff1h z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_64_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1uh_gather_u64base_offset_u64 (p0, z0, 64), ++ z0_res = svldff1uh_gather_offset_u64 (p0, z0, 64)) ++ ++/* ++** ldff1uh_gather_x0_u64_index: ++** lsl (x[0-9]+), x0, #?1 ++** ldff1h z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_x0_u64_index, svuint64_t, svuint64_t, ++ z0_res = svldff1uh_gather_u64base_index_u64 (p0, z0, x0), ++ z0_res = svldff1uh_gather_index_u64 (p0, z0, x0)) ++ ++/* ++** ldff1uh_gather_m1_u64_index: ++** mov (x[0-9]+), #?-2 ++** ldff1h z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_m1_u64_index, svuint64_t, svuint64_t, ++ z0_res = svldff1uh_gather_u64base_index_u64 (p0, z0, -1), ++ z0_res = svldff1uh_gather_index_u64 (p0, z0, -1)) ++ ++/* ++** ldff1uh_gather_0_u64_index: ++** ldff1h z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_0_u64_index, svuint64_t, svuint64_t, ++ z0_res = svldff1uh_gather_u64base_index_u64 (p0, z0, 0), ++ z0_res = svldff1uh_gather_index_u64 (p0, z0, 0)) ++ ++/* ++** ldff1uh_gather_5_u64_index: ++** ldff1h z0\.d, p0/z, \[z0\.d, #10\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_5_u64_index, svuint64_t, svuint64_t, ++ z0_res = svldff1uh_gather_u64base_index_u64 (p0, z0, 5), ++ z0_res = svldff1uh_gather_index_u64 (p0, z0, 5)) ++ ++/* ++** ldff1uh_gather_31_u64_index: ++** ldff1h z0\.d, p0/z, \[z0\.d, #62\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_31_u64_index, svuint64_t, svuint64_t, ++ z0_res = svldff1uh_gather_u64base_index_u64 (p0, z0, 31), ++ z0_res = svldff1uh_gather_index_u64 (p0, z0, 31)) ++ ++/* ++** ldff1uh_gather_32_u64_index: ++** mov (x[0-9]+), #?64 ++** ldff1h z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uh_gather_32_u64_index, svuint64_t, svuint64_t, ++ z0_res = svldff1uh_gather_u64base_index_u64 (p0, z0, 32), ++ z0_res = svldff1uh_gather_index_u64 (p0, z0, 32)) ++ ++/* ++** ldff1uh_gather_x0_u64_s64offset: ++** ldff1h z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_x0_u64_s64offset, svuint64_t, uint16_t, svint64_t, ++ z0_res = svldff1uh_gather_s64offset_u64 (p0, x0, z0), ++ z0_res = svldff1uh_gather_offset_u64 (p0, x0, z0)) ++ ++/* ++** ldff1uh_gather_tied1_u64_s64offset: ++** ldff1h z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_tied1_u64_s64offset, svuint64_t, uint16_t, svint64_t, ++ z0_res = svldff1uh_gather_s64offset_u64 (p0, x0, z0), ++ z0_res = svldff1uh_gather_offset_u64 (p0, x0, z0)) ++ ++/* ++** ldff1uh_gather_untied_u64_s64offset: ++** ldff1h z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_untied_u64_s64offset, svuint64_t, uint16_t, svint64_t, ++ z0_res = svldff1uh_gather_s64offset_u64 (p0, x0, z1), ++ z0_res = svldff1uh_gather_offset_u64 (p0, x0, z1)) ++ ++/* ++** ldff1uh_gather_ext_u64_s64offset: ++** ldff1h z0\.d, p0/z, \[x0, z1\.d, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_ext_u64_s64offset, svuint64_t, uint16_t, svint64_t, ++ z0_res = svldff1uh_gather_s64offset_u64 (p0, x0, svextw_s64_x (p0, z1)), ++ z0_res = svldff1uh_gather_offset_u64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ldff1uh_gather_x0_u64_u64offset: ++** ldff1h z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_x0_u64_u64offset, svuint64_t, uint16_t, svuint64_t, ++ z0_res = svldff1uh_gather_u64offset_u64 (p0, x0, z0), ++ z0_res = svldff1uh_gather_offset_u64 (p0, x0, z0)) ++ ++/* ++** ldff1uh_gather_tied1_u64_u64offset: ++** ldff1h z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_tied1_u64_u64offset, svuint64_t, uint16_t, svuint64_t, ++ z0_res = svldff1uh_gather_u64offset_u64 (p0, x0, z0), ++ z0_res = svldff1uh_gather_offset_u64 (p0, x0, z0)) ++ ++/* ++** ldff1uh_gather_untied_u64_u64offset: ++** ldff1h z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_untied_u64_u64offset, svuint64_t, uint16_t, svuint64_t, ++ z0_res = svldff1uh_gather_u64offset_u64 (p0, x0, z1), ++ z0_res = svldff1uh_gather_offset_u64 (p0, x0, z1)) ++ ++/* ++** ldff1uh_gather_ext_u64_u64offset: ++** ldff1h z0\.d, p0/z, \[x0, z1\.d, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_ext_u64_u64offset, svuint64_t, uint16_t, svuint64_t, ++ z0_res = svldff1uh_gather_u64offset_u64 (p0, x0, svextw_u64_x (p0, z1)), ++ z0_res = svldff1uh_gather_offset_u64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ldff1uh_gather_x0_u64_s64index: ++** ldff1h z0\.d, p0/z, \[x0, z0\.d, lsl 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_x0_u64_s64index, svuint64_t, uint16_t, svint64_t, ++ z0_res = svldff1uh_gather_s64index_u64 (p0, x0, z0), ++ z0_res = svldff1uh_gather_index_u64 (p0, x0, z0)) ++ ++/* ++** ldff1uh_gather_tied1_u64_s64index: ++** ldff1h z0\.d, p0/z, \[x0, z0\.d, lsl 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_tied1_u64_s64index, svuint64_t, uint16_t, svint64_t, ++ z0_res = svldff1uh_gather_s64index_u64 (p0, x0, z0), ++ z0_res = svldff1uh_gather_index_u64 (p0, x0, z0)) ++ ++/* ++** ldff1uh_gather_untied_u64_s64index: ++** ldff1h z0\.d, p0/z, \[x0, z1\.d, lsl 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_untied_u64_s64index, svuint64_t, uint16_t, svint64_t, ++ z0_res = svldff1uh_gather_s64index_u64 (p0, x0, z1), ++ z0_res = svldff1uh_gather_index_u64 (p0, x0, z1)) ++ ++/* ++** ldff1uh_gather_ext_u64_s64index: ++** ldff1h z0\.d, p0/z, \[x0, z1\.d, sxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_ext_u64_s64index, svuint64_t, uint16_t, svint64_t, ++ z0_res = svldff1uh_gather_s64index_u64 (p0, x0, svextw_s64_x (p0, z1)), ++ z0_res = svldff1uh_gather_index_u64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ldff1uh_gather_x0_u64_u64index: ++** ldff1h z0\.d, p0/z, \[x0, z0\.d, lsl 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_x0_u64_u64index, svuint64_t, uint16_t, svuint64_t, ++ z0_res = svldff1uh_gather_u64index_u64 (p0, x0, z0), ++ z0_res = svldff1uh_gather_index_u64 (p0, x0, z0)) ++ ++/* ++** ldff1uh_gather_tied1_u64_u64index: ++** ldff1h z0\.d, p0/z, \[x0, z0\.d, lsl 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_tied1_u64_u64index, svuint64_t, uint16_t, svuint64_t, ++ z0_res = svldff1uh_gather_u64index_u64 (p0, x0, z0), ++ z0_res = svldff1uh_gather_index_u64 (p0, x0, z0)) ++ ++/* ++** ldff1uh_gather_untied_u64_u64index: ++** ldff1h z0\.d, p0/z, \[x0, z1\.d, lsl 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_untied_u64_u64index, svuint64_t, uint16_t, svuint64_t, ++ z0_res = svldff1uh_gather_u64index_u64 (p0, x0, z1), ++ z0_res = svldff1uh_gather_index_u64 (p0, x0, z1)) ++ ++/* ++** ldff1uh_gather_ext_u64_u64index: ++** ldff1h z0\.d, p0/z, \[x0, z1\.d, uxtw 1\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uh_gather_ext_u64_u64index, svuint64_t, uint16_t, svuint64_t, ++ z0_res = svldff1uh_gather_u64index_u64 (p0, x0, svextw_u64_x (p0, z1)), ++ z0_res = svldff1uh_gather_index_u64 (p0, x0, svextw_x (p0, z1))) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_s32.c +new file mode 100644 +index 000000000..abb3d769a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_s32.c +@@ -0,0 +1,86 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1uh_s32_base: ++** ldff1h z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1uh_s32_base, svint32_t, uint16_t, ++ z0 = svldff1uh_s32 (p0, x0), ++ z0 = svldff1uh_s32 (p0, x0)) ++ ++/* ++** ldff1uh_s32_index: ++** ldff1h z0\.s, p0/z, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_LOAD (ldff1uh_s32_index, svint32_t, uint16_t, ++ z0 = svldff1uh_s32 (p0, x0 + x1), ++ z0 = svldff1uh_s32 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1uh_s32_1: ++** inch x0 ++** ldff1h z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1uh_s32_1, svint32_t, uint16_t, ++ z0 = svldff1uh_s32 (p0, x0 + svcntw ()), ++ z0 = svldff1uh_s32 (p0, x0 + svcntw ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1uh_s32_m1: ++** dech x0 ++** ldff1h z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1uh_s32_m1, svint32_t, uint16_t, ++ z0 = svldff1uh_s32 (p0, x0 - svcntw ()), ++ z0 = svldff1uh_s32 (p0, x0 - svcntw ())) ++ ++/* ++** ldff1uh_vnum_s32_0: ++** ldff1h z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1uh_vnum_s32_0, svint32_t, uint16_t, ++ z0 = svldff1uh_vnum_s32 (p0, x0, 0), ++ z0 = svldff1uh_vnum_s32 (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1uh_vnum_s32_1: ++** inch x0 ++** ldff1h z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1uh_vnum_s32_1, svint32_t, uint16_t, ++ z0 = svldff1uh_vnum_s32 (p0, x0, 1), ++ z0 = svldff1uh_vnum_s32 (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1uh_vnum_s32_m1: ++** dech x0 ++** ldff1h z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1uh_vnum_s32_m1, svint32_t, uint16_t, ++ z0 = svldff1uh_vnum_s32 (p0, x0, -1), ++ z0 = svldff1uh_vnum_s32 (p0, x0, -1)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ldff1uh_vnum_s32_x1: ++** cnth (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ldff1h z0\.s, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldff1uh_vnum_s32_x1, svint32_t, uint16_t, ++ z0 = svldff1uh_vnum_s32 (p0, x0, x1), ++ z0 = svldff1uh_vnum_s32 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_s64.c +new file mode 100644 +index 000000000..6e330e8e8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_s64.c +@@ -0,0 +1,86 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1uh_s64_base: ++** ldff1h z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1uh_s64_base, svint64_t, uint16_t, ++ z0 = svldff1uh_s64 (p0, x0), ++ z0 = svldff1uh_s64 (p0, x0)) ++ ++/* ++** ldff1uh_s64_index: ++** ldff1h z0\.d, p0/z, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_LOAD (ldff1uh_s64_index, svint64_t, uint16_t, ++ z0 = svldff1uh_s64 (p0, x0 + x1), ++ z0 = svldff1uh_s64 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1uh_s64_1: ++** incw x0 ++** ldff1h z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1uh_s64_1, svint64_t, uint16_t, ++ z0 = svldff1uh_s64 (p0, x0 + svcntd ()), ++ z0 = svldff1uh_s64 (p0, x0 + svcntd ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1uh_s64_m1: ++** decw x0 ++** ldff1h z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1uh_s64_m1, svint64_t, uint16_t, ++ z0 = svldff1uh_s64 (p0, x0 - svcntd ()), ++ z0 = svldff1uh_s64 (p0, x0 - svcntd ())) ++ ++/* ++** ldff1uh_vnum_s64_0: ++** ldff1h z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1uh_vnum_s64_0, svint64_t, uint16_t, ++ z0 = svldff1uh_vnum_s64 (p0, x0, 0), ++ z0 = svldff1uh_vnum_s64 (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1uh_vnum_s64_1: ++** incw x0 ++** ldff1h z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1uh_vnum_s64_1, svint64_t, uint16_t, ++ z0 = svldff1uh_vnum_s64 (p0, x0, 1), ++ z0 = svldff1uh_vnum_s64 (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1uh_vnum_s64_m1: ++** decw x0 ++** ldff1h z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1uh_vnum_s64_m1, svint64_t, uint16_t, ++ z0 = svldff1uh_vnum_s64 (p0, x0, -1), ++ z0 = svldff1uh_vnum_s64 (p0, x0, -1)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ldff1uh_vnum_s64_x1: ++** cntw (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ldff1h z0\.d, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldff1uh_vnum_s64_x1, svint64_t, uint16_t, ++ z0 = svldff1uh_vnum_s64 (p0, x0, x1), ++ z0 = svldff1uh_vnum_s64 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_u32.c +new file mode 100644 +index 000000000..4eb5323e9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_u32.c +@@ -0,0 +1,86 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1uh_u32_base: ++** ldff1h z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1uh_u32_base, svuint32_t, uint16_t, ++ z0 = svldff1uh_u32 (p0, x0), ++ z0 = svldff1uh_u32 (p0, x0)) ++ ++/* ++** ldff1uh_u32_index: ++** ldff1h z0\.s, p0/z, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_LOAD (ldff1uh_u32_index, svuint32_t, uint16_t, ++ z0 = svldff1uh_u32 (p0, x0 + x1), ++ z0 = svldff1uh_u32 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1uh_u32_1: ++** inch x0 ++** ldff1h z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1uh_u32_1, svuint32_t, uint16_t, ++ z0 = svldff1uh_u32 (p0, x0 + svcntw ()), ++ z0 = svldff1uh_u32 (p0, x0 + svcntw ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1uh_u32_m1: ++** dech x0 ++** ldff1h z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1uh_u32_m1, svuint32_t, uint16_t, ++ z0 = svldff1uh_u32 (p0, x0 - svcntw ()), ++ z0 = svldff1uh_u32 (p0, x0 - svcntw ())) ++ ++/* ++** ldff1uh_vnum_u32_0: ++** ldff1h z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1uh_vnum_u32_0, svuint32_t, uint16_t, ++ z0 = svldff1uh_vnum_u32 (p0, x0, 0), ++ z0 = svldff1uh_vnum_u32 (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1uh_vnum_u32_1: ++** inch x0 ++** ldff1h z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1uh_vnum_u32_1, svuint32_t, uint16_t, ++ z0 = svldff1uh_vnum_u32 (p0, x0, 1), ++ z0 = svldff1uh_vnum_u32 (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1uh_vnum_u32_m1: ++** dech x0 ++** ldff1h z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1uh_vnum_u32_m1, svuint32_t, uint16_t, ++ z0 = svldff1uh_vnum_u32 (p0, x0, -1), ++ z0 = svldff1uh_vnum_u32 (p0, x0, -1)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ldff1uh_vnum_u32_x1: ++** cnth (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ldff1h z0\.s, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldff1uh_vnum_u32_x1, svuint32_t, uint16_t, ++ z0 = svldff1uh_vnum_u32 (p0, x0, x1), ++ z0 = svldff1uh_vnum_u32 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_u64.c +new file mode 100644 +index 000000000..ebac26e7d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_u64.c +@@ -0,0 +1,86 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1uh_u64_base: ++** ldff1h z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1uh_u64_base, svuint64_t, uint16_t, ++ z0 = svldff1uh_u64 (p0, x0), ++ z0 = svldff1uh_u64 (p0, x0)) ++ ++/* ++** ldff1uh_u64_index: ++** ldff1h z0\.d, p0/z, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_LOAD (ldff1uh_u64_index, svuint64_t, uint16_t, ++ z0 = svldff1uh_u64 (p0, x0 + x1), ++ z0 = svldff1uh_u64 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1uh_u64_1: ++** incw x0 ++** ldff1h z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1uh_u64_1, svuint64_t, uint16_t, ++ z0 = svldff1uh_u64 (p0, x0 + svcntd ()), ++ z0 = svldff1uh_u64 (p0, x0 + svcntd ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1uh_u64_m1: ++** decw x0 ++** ldff1h z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1uh_u64_m1, svuint64_t, uint16_t, ++ z0 = svldff1uh_u64 (p0, x0 - svcntd ()), ++ z0 = svldff1uh_u64 (p0, x0 - svcntd ())) ++ ++/* ++** ldff1uh_vnum_u64_0: ++** ldff1h z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1uh_vnum_u64_0, svuint64_t, uint16_t, ++ z0 = svldff1uh_vnum_u64 (p0, x0, 0), ++ z0 = svldff1uh_vnum_u64 (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1uh_vnum_u64_1: ++** incw x0 ++** ldff1h z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1uh_vnum_u64_1, svuint64_t, uint16_t, ++ z0 = svldff1uh_vnum_u64 (p0, x0, 1), ++ z0 = svldff1uh_vnum_u64 (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1uh_vnum_u64_m1: ++** decw x0 ++** ldff1h z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1uh_vnum_u64_m1, svuint64_t, uint16_t, ++ z0 = svldff1uh_vnum_u64 (p0, x0, -1), ++ z0 = svldff1uh_vnum_u64 (p0, x0, -1)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ldff1uh_vnum_u64_x1: ++** cntw (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ldff1h z0\.d, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldff1uh_vnum_u64_x1, svuint64_t, uint16_t, ++ z0 = svldff1uh_vnum_u64 (p0, x0, x1), ++ z0 = svldff1uh_vnum_u64 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_gather_s64.c +new file mode 100644 +index 000000000..6c0daea52 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_gather_s64.c +@@ -0,0 +1,308 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1uw_gather_s64_tied1: ++** ldff1w z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uw_gather_s64_tied1, svint64_t, svuint64_t, ++ z0_res = svldff1uw_gather_u64base_s64 (p0, z0), ++ z0_res = svldff1uw_gather_s64 (p0, z0)) ++ ++/* ++** ldff1uw_gather_s64_untied: ++** ldff1w z0\.d, p0/z, \[z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uw_gather_s64_untied, svint64_t, svuint64_t, ++ z0_res = svldff1uw_gather_u64base_s64 (p0, z1), ++ z0_res = svldff1uw_gather_s64 (p0, z1)) ++ ++/* ++** ldff1uw_gather_x0_s64_offset: ++** ldff1w z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uw_gather_x0_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1uw_gather_u64base_offset_s64 (p0, z0, x0), ++ z0_res = svldff1uw_gather_offset_s64 (p0, z0, x0)) ++ ++/* ++** ldff1uw_gather_m4_s64_offset: ++** mov (x[0-9]+), #?-4 ++** ldff1w z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uw_gather_m4_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1uw_gather_u64base_offset_s64 (p0, z0, -4), ++ z0_res = svldff1uw_gather_offset_s64 (p0, z0, -4)) ++ ++/* ++** ldff1uw_gather_0_s64_offset: ++** ldff1w z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uw_gather_0_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1uw_gather_u64base_offset_s64 (p0, z0, 0), ++ z0_res = svldff1uw_gather_offset_s64 (p0, z0, 0)) ++ ++/* ++** ldff1uw_gather_5_s64_offset: ++** mov (x[0-9]+), #?5 ++** ldff1w z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uw_gather_5_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1uw_gather_u64base_offset_s64 (p0, z0, 5), ++ z0_res = svldff1uw_gather_offset_s64 (p0, z0, 5)) ++ ++/* ++** ldff1uw_gather_6_s64_offset: ++** mov (x[0-9]+), #?6 ++** ldff1w z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uw_gather_6_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1uw_gather_u64base_offset_s64 (p0, z0, 6), ++ z0_res = svldff1uw_gather_offset_s64 (p0, z0, 6)) ++ ++/* ++** ldff1uw_gather_7_s64_offset: ++** mov (x[0-9]+), #?7 ++** ldff1w z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uw_gather_7_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1uw_gather_u64base_offset_s64 (p0, z0, 7), ++ z0_res = svldff1uw_gather_offset_s64 (p0, z0, 7)) ++ ++/* ++** ldff1uw_gather_8_s64_offset: ++** ldff1w z0\.d, p0/z, \[z0\.d, #8\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uw_gather_8_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1uw_gather_u64base_offset_s64 (p0, z0, 8), ++ z0_res = svldff1uw_gather_offset_s64 (p0, z0, 8)) ++ ++/* ++** ldff1uw_gather_124_s64_offset: ++** ldff1w z0\.d, p0/z, \[z0\.d, #124\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uw_gather_124_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1uw_gather_u64base_offset_s64 (p0, z0, 124), ++ z0_res = svldff1uw_gather_offset_s64 (p0, z0, 124)) ++ ++/* ++** ldff1uw_gather_128_s64_offset: ++** mov (x[0-9]+), #?128 ++** ldff1w z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uw_gather_128_s64_offset, svint64_t, svuint64_t, ++ z0_res = svldff1uw_gather_u64base_offset_s64 (p0, z0, 128), ++ z0_res = svldff1uw_gather_offset_s64 (p0, z0, 128)) ++ ++/* ++** ldff1uw_gather_x0_s64_index: ++** lsl (x[0-9]+), x0, #?2 ++** ldff1w z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uw_gather_x0_s64_index, svint64_t, svuint64_t, ++ z0_res = svldff1uw_gather_u64base_index_s64 (p0, z0, x0), ++ z0_res = svldff1uw_gather_index_s64 (p0, z0, x0)) ++ ++/* ++** ldff1uw_gather_m1_s64_index: ++** mov (x[0-9]+), #?-4 ++** ldff1w z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uw_gather_m1_s64_index, svint64_t, svuint64_t, ++ z0_res = svldff1uw_gather_u64base_index_s64 (p0, z0, -1), ++ z0_res = svldff1uw_gather_index_s64 (p0, z0, -1)) ++ ++/* ++** ldff1uw_gather_0_s64_index: ++** ldff1w z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uw_gather_0_s64_index, svint64_t, svuint64_t, ++ z0_res = svldff1uw_gather_u64base_index_s64 (p0, z0, 0), ++ z0_res = svldff1uw_gather_index_s64 (p0, z0, 0)) ++ ++/* ++** ldff1uw_gather_5_s64_index: ++** ldff1w z0\.d, p0/z, \[z0\.d, #20\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uw_gather_5_s64_index, svint64_t, svuint64_t, ++ z0_res = svldff1uw_gather_u64base_index_s64 (p0, z0, 5), ++ z0_res = svldff1uw_gather_index_s64 (p0, z0, 5)) ++ ++/* ++** ldff1uw_gather_31_s64_index: ++** ldff1w z0\.d, p0/z, \[z0\.d, #124\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uw_gather_31_s64_index, svint64_t, svuint64_t, ++ z0_res = svldff1uw_gather_u64base_index_s64 (p0, z0, 31), ++ z0_res = svldff1uw_gather_index_s64 (p0, z0, 31)) ++ ++/* ++** ldff1uw_gather_32_s64_index: ++** mov (x[0-9]+), #?128 ++** ldff1w z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uw_gather_32_s64_index, svint64_t, svuint64_t, ++ z0_res = svldff1uw_gather_u64base_index_s64 (p0, z0, 32), ++ z0_res = svldff1uw_gather_index_s64 (p0, z0, 32)) ++ ++/* ++** ldff1uw_gather_x0_s64_s64offset: ++** ldff1w z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uw_gather_x0_s64_s64offset, svint64_t, uint32_t, svint64_t, ++ z0_res = svldff1uw_gather_s64offset_s64 (p0, x0, z0), ++ z0_res = svldff1uw_gather_offset_s64 (p0, x0, z0)) ++ ++/* ++** ldff1uw_gather_tied1_s64_s64offset: ++** ldff1w z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uw_gather_tied1_s64_s64offset, svint64_t, uint32_t, svint64_t, ++ z0_res = svldff1uw_gather_s64offset_s64 (p0, x0, z0), ++ z0_res = svldff1uw_gather_offset_s64 (p0, x0, z0)) ++ ++/* ++** ldff1uw_gather_untied_s64_s64offset: ++** ldff1w z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uw_gather_untied_s64_s64offset, svint64_t, uint32_t, svint64_t, ++ z0_res = svldff1uw_gather_s64offset_s64 (p0, x0, z1), ++ z0_res = svldff1uw_gather_offset_s64 (p0, x0, z1)) ++ ++/* ++** ldff1uw_gather_ext_s64_s64offset: ++** ldff1w z0\.d, p0/z, \[x0, z1\.d, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uw_gather_ext_s64_s64offset, svint64_t, uint32_t, svint64_t, ++ z0_res = svldff1uw_gather_s64offset_s64 (p0, x0, svextw_s64_x (p0, z1)), ++ z0_res = svldff1uw_gather_offset_s64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ldff1uw_gather_x0_s64_u64offset: ++** ldff1w z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uw_gather_x0_s64_u64offset, svint64_t, uint32_t, svuint64_t, ++ z0_res = svldff1uw_gather_u64offset_s64 (p0, x0, z0), ++ z0_res = svldff1uw_gather_offset_s64 (p0, x0, z0)) ++ ++/* ++** ldff1uw_gather_tied1_s64_u64offset: ++** ldff1w z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uw_gather_tied1_s64_u64offset, svint64_t, uint32_t, svuint64_t, ++ z0_res = svldff1uw_gather_u64offset_s64 (p0, x0, z0), ++ z0_res = svldff1uw_gather_offset_s64 (p0, x0, z0)) ++ ++/* ++** ldff1uw_gather_untied_s64_u64offset: ++** ldff1w z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uw_gather_untied_s64_u64offset, svint64_t, uint32_t, svuint64_t, ++ z0_res = svldff1uw_gather_u64offset_s64 (p0, x0, z1), ++ z0_res = svldff1uw_gather_offset_s64 (p0, x0, z1)) ++ ++/* ++** ldff1uw_gather_ext_s64_u64offset: ++** ldff1w z0\.d, p0/z, \[x0, z1\.d, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uw_gather_ext_s64_u64offset, svint64_t, uint32_t, svuint64_t, ++ z0_res = svldff1uw_gather_u64offset_s64 (p0, x0, svextw_u64_x (p0, z1)), ++ z0_res = svldff1uw_gather_offset_s64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ldff1uw_gather_x0_s64_s64index: ++** ldff1w z0\.d, p0/z, \[x0, z0\.d, lsl 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uw_gather_x0_s64_s64index, svint64_t, uint32_t, svint64_t, ++ z0_res = svldff1uw_gather_s64index_s64 (p0, x0, z0), ++ z0_res = svldff1uw_gather_index_s64 (p0, x0, z0)) ++ ++/* ++** ldff1uw_gather_tied1_s64_s64index: ++** ldff1w z0\.d, p0/z, \[x0, z0\.d, lsl 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uw_gather_tied1_s64_s64index, svint64_t, uint32_t, svint64_t, ++ z0_res = svldff1uw_gather_s64index_s64 (p0, x0, z0), ++ z0_res = svldff1uw_gather_index_s64 (p0, x0, z0)) ++ ++/* ++** ldff1uw_gather_untied_s64_s64index: ++** ldff1w z0\.d, p0/z, \[x0, z1\.d, lsl 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uw_gather_untied_s64_s64index, svint64_t, uint32_t, svint64_t, ++ z0_res = svldff1uw_gather_s64index_s64 (p0, x0, z1), ++ z0_res = svldff1uw_gather_index_s64 (p0, x0, z1)) ++ ++/* ++** ldff1uw_gather_ext_s64_s64index: ++** ldff1w z0\.d, p0/z, \[x0, z1\.d, sxtw 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uw_gather_ext_s64_s64index, svint64_t, uint32_t, svint64_t, ++ z0_res = svldff1uw_gather_s64index_s64 (p0, x0, svextw_s64_x (p0, z1)), ++ z0_res = svldff1uw_gather_index_s64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ldff1uw_gather_x0_s64_u64index: ++** ldff1w z0\.d, p0/z, \[x0, z0\.d, lsl 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uw_gather_x0_s64_u64index, svint64_t, uint32_t, svuint64_t, ++ z0_res = svldff1uw_gather_u64index_s64 (p0, x0, z0), ++ z0_res = svldff1uw_gather_index_s64 (p0, x0, z0)) ++ ++/* ++** ldff1uw_gather_tied1_s64_u64index: ++** ldff1w z0\.d, p0/z, \[x0, z0\.d, lsl 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uw_gather_tied1_s64_u64index, svint64_t, uint32_t, svuint64_t, ++ z0_res = svldff1uw_gather_u64index_s64 (p0, x0, z0), ++ z0_res = svldff1uw_gather_index_s64 (p0, x0, z0)) ++ ++/* ++** ldff1uw_gather_untied_s64_u64index: ++** ldff1w z0\.d, p0/z, \[x0, z1\.d, lsl 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uw_gather_untied_s64_u64index, svint64_t, uint32_t, svuint64_t, ++ z0_res = svldff1uw_gather_u64index_s64 (p0, x0, z1), ++ z0_res = svldff1uw_gather_index_s64 (p0, x0, z1)) ++ ++/* ++** ldff1uw_gather_ext_s64_u64index: ++** ldff1w z0\.d, p0/z, \[x0, z1\.d, uxtw 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uw_gather_ext_s64_u64index, svint64_t, uint32_t, svuint64_t, ++ z0_res = svldff1uw_gather_u64index_s64 (p0, x0, svextw_u64_x (p0, z1)), ++ z0_res = svldff1uw_gather_index_s64 (p0, x0, svextw_x (p0, z1))) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_gather_u64.c +new file mode 100644 +index 000000000..0e400c679 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_gather_u64.c +@@ -0,0 +1,308 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1uw_gather_u64_tied1: ++** ldff1w z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uw_gather_u64_tied1, svuint64_t, svuint64_t, ++ z0_res = svldff1uw_gather_u64base_u64 (p0, z0), ++ z0_res = svldff1uw_gather_u64 (p0, z0)) ++ ++/* ++** ldff1uw_gather_u64_untied: ++** ldff1w z0\.d, p0/z, \[z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uw_gather_u64_untied, svuint64_t, svuint64_t, ++ z0_res = svldff1uw_gather_u64base_u64 (p0, z1), ++ z0_res = svldff1uw_gather_u64 (p0, z1)) ++ ++/* ++** ldff1uw_gather_x0_u64_offset: ++** ldff1w z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uw_gather_x0_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1uw_gather_u64base_offset_u64 (p0, z0, x0), ++ z0_res = svldff1uw_gather_offset_u64 (p0, z0, x0)) ++ ++/* ++** ldff1uw_gather_m4_u64_offset: ++** mov (x[0-9]+), #?-4 ++** ldff1w z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uw_gather_m4_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1uw_gather_u64base_offset_u64 (p0, z0, -4), ++ z0_res = svldff1uw_gather_offset_u64 (p0, z0, -4)) ++ ++/* ++** ldff1uw_gather_0_u64_offset: ++** ldff1w z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uw_gather_0_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1uw_gather_u64base_offset_u64 (p0, z0, 0), ++ z0_res = svldff1uw_gather_offset_u64 (p0, z0, 0)) ++ ++/* ++** ldff1uw_gather_5_u64_offset: ++** mov (x[0-9]+), #?5 ++** ldff1w z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uw_gather_5_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1uw_gather_u64base_offset_u64 (p0, z0, 5), ++ z0_res = svldff1uw_gather_offset_u64 (p0, z0, 5)) ++ ++/* ++** ldff1uw_gather_6_u64_offset: ++** mov (x[0-9]+), #?6 ++** ldff1w z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uw_gather_6_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1uw_gather_u64base_offset_u64 (p0, z0, 6), ++ z0_res = svldff1uw_gather_offset_u64 (p0, z0, 6)) ++ ++/* ++** ldff1uw_gather_7_u64_offset: ++** mov (x[0-9]+), #?7 ++** ldff1w z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uw_gather_7_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1uw_gather_u64base_offset_u64 (p0, z0, 7), ++ z0_res = svldff1uw_gather_offset_u64 (p0, z0, 7)) ++ ++/* ++** ldff1uw_gather_8_u64_offset: ++** ldff1w z0\.d, p0/z, \[z0\.d, #8\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uw_gather_8_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1uw_gather_u64base_offset_u64 (p0, z0, 8), ++ z0_res = svldff1uw_gather_offset_u64 (p0, z0, 8)) ++ ++/* ++** ldff1uw_gather_124_u64_offset: ++** ldff1w z0\.d, p0/z, \[z0\.d, #124\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uw_gather_124_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1uw_gather_u64base_offset_u64 (p0, z0, 124), ++ z0_res = svldff1uw_gather_offset_u64 (p0, z0, 124)) ++ ++/* ++** ldff1uw_gather_128_u64_offset: ++** mov (x[0-9]+), #?128 ++** ldff1w z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uw_gather_128_u64_offset, svuint64_t, svuint64_t, ++ z0_res = svldff1uw_gather_u64base_offset_u64 (p0, z0, 128), ++ z0_res = svldff1uw_gather_offset_u64 (p0, z0, 128)) ++ ++/* ++** ldff1uw_gather_x0_u64_index: ++** lsl (x[0-9]+), x0, #?2 ++** ldff1w z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uw_gather_x0_u64_index, svuint64_t, svuint64_t, ++ z0_res = svldff1uw_gather_u64base_index_u64 (p0, z0, x0), ++ z0_res = svldff1uw_gather_index_u64 (p0, z0, x0)) ++ ++/* ++** ldff1uw_gather_m1_u64_index: ++** mov (x[0-9]+), #?-4 ++** ldff1w z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uw_gather_m1_u64_index, svuint64_t, svuint64_t, ++ z0_res = svldff1uw_gather_u64base_index_u64 (p0, z0, -1), ++ z0_res = svldff1uw_gather_index_u64 (p0, z0, -1)) ++ ++/* ++** ldff1uw_gather_0_u64_index: ++** ldff1w z0\.d, p0/z, \[z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uw_gather_0_u64_index, svuint64_t, svuint64_t, ++ z0_res = svldff1uw_gather_u64base_index_u64 (p0, z0, 0), ++ z0_res = svldff1uw_gather_index_u64 (p0, z0, 0)) ++ ++/* ++** ldff1uw_gather_5_u64_index: ++** ldff1w z0\.d, p0/z, \[z0\.d, #20\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uw_gather_5_u64_index, svuint64_t, svuint64_t, ++ z0_res = svldff1uw_gather_u64base_index_u64 (p0, z0, 5), ++ z0_res = svldff1uw_gather_index_u64 (p0, z0, 5)) ++ ++/* ++** ldff1uw_gather_31_u64_index: ++** ldff1w z0\.d, p0/z, \[z0\.d, #124\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uw_gather_31_u64_index, svuint64_t, svuint64_t, ++ z0_res = svldff1uw_gather_u64base_index_u64 (p0, z0, 31), ++ z0_res = svldff1uw_gather_index_u64 (p0, z0, 31)) ++ ++/* ++** ldff1uw_gather_32_u64_index: ++** mov (x[0-9]+), #?128 ++** ldff1w z0\.d, p0/z, \[\1, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_ZS (ldff1uw_gather_32_u64_index, svuint64_t, svuint64_t, ++ z0_res = svldff1uw_gather_u64base_index_u64 (p0, z0, 32), ++ z0_res = svldff1uw_gather_index_u64 (p0, z0, 32)) ++ ++/* ++** ldff1uw_gather_x0_u64_s64offset: ++** ldff1w z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uw_gather_x0_u64_s64offset, svuint64_t, uint32_t, svint64_t, ++ z0_res = svldff1uw_gather_s64offset_u64 (p0, x0, z0), ++ z0_res = svldff1uw_gather_offset_u64 (p0, x0, z0)) ++ ++/* ++** ldff1uw_gather_tied1_u64_s64offset: ++** ldff1w z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uw_gather_tied1_u64_s64offset, svuint64_t, uint32_t, svint64_t, ++ z0_res = svldff1uw_gather_s64offset_u64 (p0, x0, z0), ++ z0_res = svldff1uw_gather_offset_u64 (p0, x0, z0)) ++ ++/* ++** ldff1uw_gather_untied_u64_s64offset: ++** ldff1w z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uw_gather_untied_u64_s64offset, svuint64_t, uint32_t, svint64_t, ++ z0_res = svldff1uw_gather_s64offset_u64 (p0, x0, z1), ++ z0_res = svldff1uw_gather_offset_u64 (p0, x0, z1)) ++ ++/* ++** ldff1uw_gather_ext_u64_s64offset: ++** ldff1w z0\.d, p0/z, \[x0, z1\.d, sxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uw_gather_ext_u64_s64offset, svuint64_t, uint32_t, svint64_t, ++ z0_res = svldff1uw_gather_s64offset_u64 (p0, x0, svextw_s64_x (p0, z1)), ++ z0_res = svldff1uw_gather_offset_u64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ldff1uw_gather_x0_u64_u64offset: ++** ldff1w z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uw_gather_x0_u64_u64offset, svuint64_t, uint32_t, svuint64_t, ++ z0_res = svldff1uw_gather_u64offset_u64 (p0, x0, z0), ++ z0_res = svldff1uw_gather_offset_u64 (p0, x0, z0)) ++ ++/* ++** ldff1uw_gather_tied1_u64_u64offset: ++** ldff1w z0\.d, p0/z, \[x0, z0\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uw_gather_tied1_u64_u64offset, svuint64_t, uint32_t, svuint64_t, ++ z0_res = svldff1uw_gather_u64offset_u64 (p0, x0, z0), ++ z0_res = svldff1uw_gather_offset_u64 (p0, x0, z0)) ++ ++/* ++** ldff1uw_gather_untied_u64_u64offset: ++** ldff1w z0\.d, p0/z, \[x0, z1\.d\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uw_gather_untied_u64_u64offset, svuint64_t, uint32_t, svuint64_t, ++ z0_res = svldff1uw_gather_u64offset_u64 (p0, x0, z1), ++ z0_res = svldff1uw_gather_offset_u64 (p0, x0, z1)) ++ ++/* ++** ldff1uw_gather_ext_u64_u64offset: ++** ldff1w z0\.d, p0/z, \[x0, z1\.d, uxtw\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uw_gather_ext_u64_u64offset, svuint64_t, uint32_t, svuint64_t, ++ z0_res = svldff1uw_gather_u64offset_u64 (p0, x0, svextw_u64_x (p0, z1)), ++ z0_res = svldff1uw_gather_offset_u64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ldff1uw_gather_x0_u64_s64index: ++** ldff1w z0\.d, p0/z, \[x0, z0\.d, lsl 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uw_gather_x0_u64_s64index, svuint64_t, uint32_t, svint64_t, ++ z0_res = svldff1uw_gather_s64index_u64 (p0, x0, z0), ++ z0_res = svldff1uw_gather_index_u64 (p0, x0, z0)) ++ ++/* ++** ldff1uw_gather_tied1_u64_s64index: ++** ldff1w z0\.d, p0/z, \[x0, z0\.d, lsl 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uw_gather_tied1_u64_s64index, svuint64_t, uint32_t, svint64_t, ++ z0_res = svldff1uw_gather_s64index_u64 (p0, x0, z0), ++ z0_res = svldff1uw_gather_index_u64 (p0, x0, z0)) ++ ++/* ++** ldff1uw_gather_untied_u64_s64index: ++** ldff1w z0\.d, p0/z, \[x0, z1\.d, lsl 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uw_gather_untied_u64_s64index, svuint64_t, uint32_t, svint64_t, ++ z0_res = svldff1uw_gather_s64index_u64 (p0, x0, z1), ++ z0_res = svldff1uw_gather_index_u64 (p0, x0, z1)) ++ ++/* ++** ldff1uw_gather_ext_u64_s64index: ++** ldff1w z0\.d, p0/z, \[x0, z1\.d, sxtw 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uw_gather_ext_u64_s64index, svuint64_t, uint32_t, svint64_t, ++ z0_res = svldff1uw_gather_s64index_u64 (p0, x0, svextw_s64_x (p0, z1)), ++ z0_res = svldff1uw_gather_index_u64 (p0, x0, svextw_x (p0, z1))) ++ ++/* ++** ldff1uw_gather_x0_u64_u64index: ++** ldff1w z0\.d, p0/z, \[x0, z0\.d, lsl 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uw_gather_x0_u64_u64index, svuint64_t, uint32_t, svuint64_t, ++ z0_res = svldff1uw_gather_u64index_u64 (p0, x0, z0), ++ z0_res = svldff1uw_gather_index_u64 (p0, x0, z0)) ++ ++/* ++** ldff1uw_gather_tied1_u64_u64index: ++** ldff1w z0\.d, p0/z, \[x0, z0\.d, lsl 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uw_gather_tied1_u64_u64index, svuint64_t, uint32_t, svuint64_t, ++ z0_res = svldff1uw_gather_u64index_u64 (p0, x0, z0), ++ z0_res = svldff1uw_gather_index_u64 (p0, x0, z0)) ++ ++/* ++** ldff1uw_gather_untied_u64_u64index: ++** ldff1w z0\.d, p0/z, \[x0, z1\.d, lsl 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uw_gather_untied_u64_u64index, svuint64_t, uint32_t, svuint64_t, ++ z0_res = svldff1uw_gather_u64index_u64 (p0, x0, z1), ++ z0_res = svldff1uw_gather_index_u64 (p0, x0, z1)) ++ ++/* ++** ldff1uw_gather_ext_u64_u64index: ++** ldff1w z0\.d, p0/z, \[x0, z1\.d, uxtw 2\] ++** ret ++*/ ++TEST_LOAD_GATHER_SZ (ldff1uw_gather_ext_u64_u64index, svuint64_t, uint32_t, svuint64_t, ++ z0_res = svldff1uw_gather_u64index_u64 (p0, x0, svextw_u64_x (p0, z1)), ++ z0_res = svldff1uw_gather_index_u64 (p0, x0, svextw_x (p0, z1))) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_s64.c +new file mode 100644 +index 000000000..ac9779899 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_s64.c +@@ -0,0 +1,86 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1uw_s64_base: ++** ldff1w z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1uw_s64_base, svint64_t, uint32_t, ++ z0 = svldff1uw_s64 (p0, x0), ++ z0 = svldff1uw_s64 (p0, x0)) ++ ++/* ++** ldff1uw_s64_index: ++** ldff1w z0\.d, p0/z, \[x0, x1, lsl 2\] ++** ret ++*/ ++TEST_LOAD (ldff1uw_s64_index, svint64_t, uint32_t, ++ z0 = svldff1uw_s64 (p0, x0 + x1), ++ z0 = svldff1uw_s64 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1uw_s64_1: ++** inch x0 ++** ldff1w z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1uw_s64_1, svint64_t, uint32_t, ++ z0 = svldff1uw_s64 (p0, x0 + svcntd ()), ++ z0 = svldff1uw_s64 (p0, x0 + svcntd ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1uw_s64_m1: ++** dech x0 ++** ldff1w z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1uw_s64_m1, svint64_t, uint32_t, ++ z0 = svldff1uw_s64 (p0, x0 - svcntd ()), ++ z0 = svldff1uw_s64 (p0, x0 - svcntd ())) ++ ++/* ++** ldff1uw_vnum_s64_0: ++** ldff1w z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1uw_vnum_s64_0, svint64_t, uint32_t, ++ z0 = svldff1uw_vnum_s64 (p0, x0, 0), ++ z0 = svldff1uw_vnum_s64 (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1uw_vnum_s64_1: ++** inch x0 ++** ldff1w z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1uw_vnum_s64_1, svint64_t, uint32_t, ++ z0 = svldff1uw_vnum_s64 (p0, x0, 1), ++ z0 = svldff1uw_vnum_s64 (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1uw_vnum_s64_m1: ++** dech x0 ++** ldff1w z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1uw_vnum_s64_m1, svint64_t, uint32_t, ++ z0 = svldff1uw_vnum_s64 (p0, x0, -1), ++ z0 = svldff1uw_vnum_s64 (p0, x0, -1)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ldff1uw_vnum_s64_x1: ++** cnth (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ldff1w z0\.d, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldff1uw_vnum_s64_x1, svint64_t, uint32_t, ++ z0 = svldff1uw_vnum_s64 (p0, x0, x1), ++ z0 = svldff1uw_vnum_s64 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_u64.c +new file mode 100644 +index 000000000..c7ab06171 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_u64.c +@@ -0,0 +1,86 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldff1uw_u64_base: ++** ldff1w z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1uw_u64_base, svuint64_t, uint32_t, ++ z0 = svldff1uw_u64 (p0, x0), ++ z0 = svldff1uw_u64 (p0, x0)) ++ ++/* ++** ldff1uw_u64_index: ++** ldff1w z0\.d, p0/z, \[x0, x1, lsl 2\] ++** ret ++*/ ++TEST_LOAD (ldff1uw_u64_index, svuint64_t, uint32_t, ++ z0 = svldff1uw_u64 (p0, x0 + x1), ++ z0 = svldff1uw_u64 (p0, x0 + x1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1uw_u64_1: ++** inch x0 ++** ldff1w z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1uw_u64_1, svuint64_t, uint32_t, ++ z0 = svldff1uw_u64 (p0, x0 + svcntd ()), ++ z0 = svldff1uw_u64 (p0, x0 + svcntd ())) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1uw_u64_m1: ++** dech x0 ++** ldff1w z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1uw_u64_m1, svuint64_t, uint32_t, ++ z0 = svldff1uw_u64 (p0, x0 - svcntd ()), ++ z0 = svldff1uw_u64 (p0, x0 - svcntd ())) ++ ++/* ++** ldff1uw_vnum_u64_0: ++** ldff1w z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1uw_vnum_u64_0, svuint64_t, uint32_t, ++ z0 = svldff1uw_vnum_u64 (p0, x0, 0), ++ z0 = svldff1uw_vnum_u64 (p0, x0, 0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1uw_vnum_u64_1: ++** inch x0 ++** ldff1w z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1uw_vnum_u64_1, svuint64_t, uint32_t, ++ z0 = svldff1uw_vnum_u64 (p0, x0, 1), ++ z0 = svldff1uw_vnum_u64 (p0, x0, 1)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldff1uw_vnum_u64_m1: ++** dech x0 ++** ldff1w z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldff1uw_vnum_u64_m1, svuint64_t, uint32_t, ++ z0 = svldff1uw_vnum_u64 (p0, x0, -1), ++ z0 = svldff1uw_vnum_u64 (p0, x0, -1)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ldff1uw_vnum_u64_x1: ++** cnth (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ldff1w z0\.d, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldff1uw_vnum_u64_x1, svuint64_t, uint32_t, ++ z0 = svldff1uw_vnum_u64 (p0, x0, x1), ++ z0 = svldff1uw_vnum_u64 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_bf16.c +new file mode 100644 +index 000000000..947a896e7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_bf16.c +@@ -0,0 +1,154 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldnf1_bf16_base: ++** ldnf1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_bf16_base, svbfloat16_t, bfloat16_t, ++ z0 = svldnf1_bf16 (p0, x0), ++ z0 = svldnf1 (p0, x0)) ++ ++/* ++** ldnf1_bf16_index: ++** add (x[0-9]+), x0, x1, lsl 1 ++** ldnf1h z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ldnf1_bf16_index, svbfloat16_t, bfloat16_t, ++ z0 = svldnf1_bf16 (p0, x0 + x1), ++ z0 = svldnf1 (p0, x0 + x1)) ++ ++/* ++** ldnf1_bf16_1: ++** ldnf1h z0\.h, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_bf16_1, svbfloat16_t, bfloat16_t, ++ z0 = svldnf1_bf16 (p0, x0 + svcnth ()), ++ z0 = svldnf1 (p0, x0 + svcnth ())) ++ ++/* ++** ldnf1_bf16_7: ++** ldnf1h z0\.h, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_bf16_7, svbfloat16_t, bfloat16_t, ++ z0 = svldnf1_bf16 (p0, x0 + svcnth () * 7), ++ z0 = svldnf1 (p0, x0 + svcnth () * 7)) ++ ++/* ++** ldnf1_bf16_8: ++** incb x0, all, mul #8 ++** ldnf1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_bf16_8, svbfloat16_t, bfloat16_t, ++ z0 = svldnf1_bf16 (p0, x0 + svcnth () * 8), ++ z0 = svldnf1 (p0, x0 + svcnth () * 8)) ++ ++/* ++** ldnf1_bf16_m1: ++** ldnf1h z0\.h, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_bf16_m1, svbfloat16_t, bfloat16_t, ++ z0 = svldnf1_bf16 (p0, x0 - svcnth ()), ++ z0 = svldnf1 (p0, x0 - svcnth ())) ++ ++/* ++** ldnf1_bf16_m8: ++** ldnf1h z0\.h, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_bf16_m8, svbfloat16_t, bfloat16_t, ++ z0 = svldnf1_bf16 (p0, x0 - svcnth () * 8), ++ z0 = svldnf1 (p0, x0 - svcnth () * 8)) ++ ++/* ++** ldnf1_bf16_m9: ++** decb x0, all, mul #9 ++** ldnf1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_bf16_m9, svbfloat16_t, bfloat16_t, ++ z0 = svldnf1_bf16 (p0, x0 - svcnth () * 9), ++ z0 = svldnf1 (p0, x0 - svcnth () * 9)) ++ ++/* ++** ldnf1_vnum_bf16_0: ++** ldnf1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_bf16_0, svbfloat16_t, bfloat16_t, ++ z0 = svldnf1_vnum_bf16 (p0, x0, 0), ++ z0 = svldnf1_vnum (p0, x0, 0)) ++ ++/* ++** ldnf1_vnum_bf16_1: ++** ldnf1h z0\.h, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_bf16_1, svbfloat16_t, bfloat16_t, ++ z0 = svldnf1_vnum_bf16 (p0, x0, 1), ++ z0 = svldnf1_vnum (p0, x0, 1)) ++ ++/* ++** ldnf1_vnum_bf16_7: ++** ldnf1h z0\.h, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_bf16_7, svbfloat16_t, bfloat16_t, ++ z0 = svldnf1_vnum_bf16 (p0, x0, 7), ++ z0 = svldnf1_vnum (p0, x0, 7)) ++ ++/* ++** ldnf1_vnum_bf16_8: ++** incb x0, all, mul #8 ++** ldnf1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_bf16_8, svbfloat16_t, bfloat16_t, ++ z0 = svldnf1_vnum_bf16 (p0, x0, 8), ++ z0 = svldnf1_vnum (p0, x0, 8)) ++ ++/* ++** ldnf1_vnum_bf16_m1: ++** ldnf1h z0\.h, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_bf16_m1, svbfloat16_t, bfloat16_t, ++ z0 = svldnf1_vnum_bf16 (p0, x0, -1), ++ z0 = svldnf1_vnum (p0, x0, -1)) ++ ++/* ++** ldnf1_vnum_bf16_m8: ++** ldnf1h z0\.h, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_bf16_m8, svbfloat16_t, bfloat16_t, ++ z0 = svldnf1_vnum_bf16 (p0, x0, -8), ++ z0 = svldnf1_vnum (p0, x0, -8)) ++ ++/* ++** ldnf1_vnum_bf16_m9: ++** decb x0, all, mul #9 ++** ldnf1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_bf16_m9, svbfloat16_t, bfloat16_t, ++ z0 = svldnf1_vnum_bf16 (p0, x0, -9), ++ z0 = svldnf1_vnum (p0, x0, -9)) ++ ++/* ++** ldnf1_vnum_bf16_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ldnf1h z0\.h, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_bf16_x1, svbfloat16_t, bfloat16_t, ++ z0 = svldnf1_vnum_bf16 (p0, x0, x1), ++ z0 = svldnf1_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_f16.c +new file mode 100644 +index 000000000..cf0178688 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_f16.c +@@ -0,0 +1,154 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldnf1_f16_base: ++** ldnf1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_f16_base, svfloat16_t, float16_t, ++ z0 = svldnf1_f16 (p0, x0), ++ z0 = svldnf1 (p0, x0)) ++ ++/* ++** ldnf1_f16_index: ++** add (x[0-9]+), x0, x1, lsl 1 ++** ldnf1h z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ldnf1_f16_index, svfloat16_t, float16_t, ++ z0 = svldnf1_f16 (p0, x0 + x1), ++ z0 = svldnf1 (p0, x0 + x1)) ++ ++/* ++** ldnf1_f16_1: ++** ldnf1h z0\.h, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_f16_1, svfloat16_t, float16_t, ++ z0 = svldnf1_f16 (p0, x0 + svcnth ()), ++ z0 = svldnf1 (p0, x0 + svcnth ())) ++ ++/* ++** ldnf1_f16_7: ++** ldnf1h z0\.h, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_f16_7, svfloat16_t, float16_t, ++ z0 = svldnf1_f16 (p0, x0 + svcnth () * 7), ++ z0 = svldnf1 (p0, x0 + svcnth () * 7)) ++ ++/* ++** ldnf1_f16_8: ++** incb x0, all, mul #8 ++** ldnf1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_f16_8, svfloat16_t, float16_t, ++ z0 = svldnf1_f16 (p0, x0 + svcnth () * 8), ++ z0 = svldnf1 (p0, x0 + svcnth () * 8)) ++ ++/* ++** ldnf1_f16_m1: ++** ldnf1h z0\.h, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_f16_m1, svfloat16_t, float16_t, ++ z0 = svldnf1_f16 (p0, x0 - svcnth ()), ++ z0 = svldnf1 (p0, x0 - svcnth ())) ++ ++/* ++** ldnf1_f16_m8: ++** ldnf1h z0\.h, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_f16_m8, svfloat16_t, float16_t, ++ z0 = svldnf1_f16 (p0, x0 - svcnth () * 8), ++ z0 = svldnf1 (p0, x0 - svcnth () * 8)) ++ ++/* ++** ldnf1_f16_m9: ++** decb x0, all, mul #9 ++** ldnf1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_f16_m9, svfloat16_t, float16_t, ++ z0 = svldnf1_f16 (p0, x0 - svcnth () * 9), ++ z0 = svldnf1 (p0, x0 - svcnth () * 9)) ++ ++/* ++** ldnf1_vnum_f16_0: ++** ldnf1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_f16_0, svfloat16_t, float16_t, ++ z0 = svldnf1_vnum_f16 (p0, x0, 0), ++ z0 = svldnf1_vnum (p0, x0, 0)) ++ ++/* ++** ldnf1_vnum_f16_1: ++** ldnf1h z0\.h, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_f16_1, svfloat16_t, float16_t, ++ z0 = svldnf1_vnum_f16 (p0, x0, 1), ++ z0 = svldnf1_vnum (p0, x0, 1)) ++ ++/* ++** ldnf1_vnum_f16_7: ++** ldnf1h z0\.h, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_f16_7, svfloat16_t, float16_t, ++ z0 = svldnf1_vnum_f16 (p0, x0, 7), ++ z0 = svldnf1_vnum (p0, x0, 7)) ++ ++/* ++** ldnf1_vnum_f16_8: ++** incb x0, all, mul #8 ++** ldnf1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_f16_8, svfloat16_t, float16_t, ++ z0 = svldnf1_vnum_f16 (p0, x0, 8), ++ z0 = svldnf1_vnum (p0, x0, 8)) ++ ++/* ++** ldnf1_vnum_f16_m1: ++** ldnf1h z0\.h, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_f16_m1, svfloat16_t, float16_t, ++ z0 = svldnf1_vnum_f16 (p0, x0, -1), ++ z0 = svldnf1_vnum (p0, x0, -1)) ++ ++/* ++** ldnf1_vnum_f16_m8: ++** ldnf1h z0\.h, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_f16_m8, svfloat16_t, float16_t, ++ z0 = svldnf1_vnum_f16 (p0, x0, -8), ++ z0 = svldnf1_vnum (p0, x0, -8)) ++ ++/* ++** ldnf1_vnum_f16_m9: ++** decb x0, all, mul #9 ++** ldnf1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_f16_m9, svfloat16_t, float16_t, ++ z0 = svldnf1_vnum_f16 (p0, x0, -9), ++ z0 = svldnf1_vnum (p0, x0, -9)) ++ ++/* ++** ldnf1_vnum_f16_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ldnf1h z0\.h, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_f16_x1, svfloat16_t, float16_t, ++ z0 = svldnf1_vnum_f16 (p0, x0, x1), ++ z0 = svldnf1_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_f32.c +new file mode 100644 +index 000000000..83b73ec8e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_f32.c +@@ -0,0 +1,154 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldnf1_f32_base: ++** ldnf1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_f32_base, svfloat32_t, float32_t, ++ z0 = svldnf1_f32 (p0, x0), ++ z0 = svldnf1 (p0, x0)) ++ ++/* ++** ldnf1_f32_index: ++** add (x[0-9]+), x0, x1, lsl 2 ++** ldnf1w z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ldnf1_f32_index, svfloat32_t, float32_t, ++ z0 = svldnf1_f32 (p0, x0 + x1), ++ z0 = svldnf1 (p0, x0 + x1)) ++ ++/* ++** ldnf1_f32_1: ++** ldnf1w z0\.s, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_f32_1, svfloat32_t, float32_t, ++ z0 = svldnf1_f32 (p0, x0 + svcntw ()), ++ z0 = svldnf1 (p0, x0 + svcntw ())) ++ ++/* ++** ldnf1_f32_7: ++** ldnf1w z0\.s, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_f32_7, svfloat32_t, float32_t, ++ z0 = svldnf1_f32 (p0, x0 + svcntw () * 7), ++ z0 = svldnf1 (p0, x0 + svcntw () * 7)) ++ ++/* ++** ldnf1_f32_8: ++** incb x0, all, mul #8 ++** ldnf1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_f32_8, svfloat32_t, float32_t, ++ z0 = svldnf1_f32 (p0, x0 + svcntw () * 8), ++ z0 = svldnf1 (p0, x0 + svcntw () * 8)) ++ ++/* ++** ldnf1_f32_m1: ++** ldnf1w z0\.s, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_f32_m1, svfloat32_t, float32_t, ++ z0 = svldnf1_f32 (p0, x0 - svcntw ()), ++ z0 = svldnf1 (p0, x0 - svcntw ())) ++ ++/* ++** ldnf1_f32_m8: ++** ldnf1w z0\.s, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_f32_m8, svfloat32_t, float32_t, ++ z0 = svldnf1_f32 (p0, x0 - svcntw () * 8), ++ z0 = svldnf1 (p0, x0 - svcntw () * 8)) ++ ++/* ++** ldnf1_f32_m9: ++** decb x0, all, mul #9 ++** ldnf1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_f32_m9, svfloat32_t, float32_t, ++ z0 = svldnf1_f32 (p0, x0 - svcntw () * 9), ++ z0 = svldnf1 (p0, x0 - svcntw () * 9)) ++ ++/* ++** ldnf1_vnum_f32_0: ++** ldnf1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_f32_0, svfloat32_t, float32_t, ++ z0 = svldnf1_vnum_f32 (p0, x0, 0), ++ z0 = svldnf1_vnum (p0, x0, 0)) ++ ++/* ++** ldnf1_vnum_f32_1: ++** ldnf1w z0\.s, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_f32_1, svfloat32_t, float32_t, ++ z0 = svldnf1_vnum_f32 (p0, x0, 1), ++ z0 = svldnf1_vnum (p0, x0, 1)) ++ ++/* ++** ldnf1_vnum_f32_7: ++** ldnf1w z0\.s, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_f32_7, svfloat32_t, float32_t, ++ z0 = svldnf1_vnum_f32 (p0, x0, 7), ++ z0 = svldnf1_vnum (p0, x0, 7)) ++ ++/* ++** ldnf1_vnum_f32_8: ++** incb x0, all, mul #8 ++** ldnf1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_f32_8, svfloat32_t, float32_t, ++ z0 = svldnf1_vnum_f32 (p0, x0, 8), ++ z0 = svldnf1_vnum (p0, x0, 8)) ++ ++/* ++** ldnf1_vnum_f32_m1: ++** ldnf1w z0\.s, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_f32_m1, svfloat32_t, float32_t, ++ z0 = svldnf1_vnum_f32 (p0, x0, -1), ++ z0 = svldnf1_vnum (p0, x0, -1)) ++ ++/* ++** ldnf1_vnum_f32_m8: ++** ldnf1w z0\.s, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_f32_m8, svfloat32_t, float32_t, ++ z0 = svldnf1_vnum_f32 (p0, x0, -8), ++ z0 = svldnf1_vnum (p0, x0, -8)) ++ ++/* ++** ldnf1_vnum_f32_m9: ++** decb x0, all, mul #9 ++** ldnf1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_f32_m9, svfloat32_t, float32_t, ++ z0 = svldnf1_vnum_f32 (p0, x0, -9), ++ z0 = svldnf1_vnum (p0, x0, -9)) ++ ++/* ++** ldnf1_vnum_f32_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ldnf1w z0\.s, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_f32_x1, svfloat32_t, float32_t, ++ z0 = svldnf1_vnum_f32 (p0, x0, x1), ++ z0 = svldnf1_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_f64.c +new file mode 100644 +index 000000000..778096e82 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_f64.c +@@ -0,0 +1,154 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldnf1_f64_base: ++** ldnf1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_f64_base, svfloat64_t, float64_t, ++ z0 = svldnf1_f64 (p0, x0), ++ z0 = svldnf1 (p0, x0)) ++ ++/* ++** ldnf1_f64_index: ++** add (x[0-9]+), x0, x1, lsl 3 ++** ldnf1d z0\.d, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ldnf1_f64_index, svfloat64_t, float64_t, ++ z0 = svldnf1_f64 (p0, x0 + x1), ++ z0 = svldnf1 (p0, x0 + x1)) ++ ++/* ++** ldnf1_f64_1: ++** ldnf1d z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_f64_1, svfloat64_t, float64_t, ++ z0 = svldnf1_f64 (p0, x0 + svcntd ()), ++ z0 = svldnf1 (p0, x0 + svcntd ())) ++ ++/* ++** ldnf1_f64_7: ++** ldnf1d z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_f64_7, svfloat64_t, float64_t, ++ z0 = svldnf1_f64 (p0, x0 + svcntd () * 7), ++ z0 = svldnf1 (p0, x0 + svcntd () * 7)) ++ ++/* ++** ldnf1_f64_8: ++** incb x0, all, mul #8 ++** ldnf1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_f64_8, svfloat64_t, float64_t, ++ z0 = svldnf1_f64 (p0, x0 + svcntd () * 8), ++ z0 = svldnf1 (p0, x0 + svcntd () * 8)) ++ ++/* ++** ldnf1_f64_m1: ++** ldnf1d z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_f64_m1, svfloat64_t, float64_t, ++ z0 = svldnf1_f64 (p0, x0 - svcntd ()), ++ z0 = svldnf1 (p0, x0 - svcntd ())) ++ ++/* ++** ldnf1_f64_m8: ++** ldnf1d z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_f64_m8, svfloat64_t, float64_t, ++ z0 = svldnf1_f64 (p0, x0 - svcntd () * 8), ++ z0 = svldnf1 (p0, x0 - svcntd () * 8)) ++ ++/* ++** ldnf1_f64_m9: ++** decb x0, all, mul #9 ++** ldnf1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_f64_m9, svfloat64_t, float64_t, ++ z0 = svldnf1_f64 (p0, x0 - svcntd () * 9), ++ z0 = svldnf1 (p0, x0 - svcntd () * 9)) ++ ++/* ++** ldnf1_vnum_f64_0: ++** ldnf1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_f64_0, svfloat64_t, float64_t, ++ z0 = svldnf1_vnum_f64 (p0, x0, 0), ++ z0 = svldnf1_vnum (p0, x0, 0)) ++ ++/* ++** ldnf1_vnum_f64_1: ++** ldnf1d z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_f64_1, svfloat64_t, float64_t, ++ z0 = svldnf1_vnum_f64 (p0, x0, 1), ++ z0 = svldnf1_vnum (p0, x0, 1)) ++ ++/* ++** ldnf1_vnum_f64_7: ++** ldnf1d z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_f64_7, svfloat64_t, float64_t, ++ z0 = svldnf1_vnum_f64 (p0, x0, 7), ++ z0 = svldnf1_vnum (p0, x0, 7)) ++ ++/* ++** ldnf1_vnum_f64_8: ++** incb x0, all, mul #8 ++** ldnf1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_f64_8, svfloat64_t, float64_t, ++ z0 = svldnf1_vnum_f64 (p0, x0, 8), ++ z0 = svldnf1_vnum (p0, x0, 8)) ++ ++/* ++** ldnf1_vnum_f64_m1: ++** ldnf1d z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_f64_m1, svfloat64_t, float64_t, ++ z0 = svldnf1_vnum_f64 (p0, x0, -1), ++ z0 = svldnf1_vnum (p0, x0, -1)) ++ ++/* ++** ldnf1_vnum_f64_m8: ++** ldnf1d z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_f64_m8, svfloat64_t, float64_t, ++ z0 = svldnf1_vnum_f64 (p0, x0, -8), ++ z0 = svldnf1_vnum (p0, x0, -8)) ++ ++/* ++** ldnf1_vnum_f64_m9: ++** decb x0, all, mul #9 ++** ldnf1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_f64_m9, svfloat64_t, float64_t, ++ z0 = svldnf1_vnum_f64 (p0, x0, -9), ++ z0 = svldnf1_vnum (p0, x0, -9)) ++ ++/* ++** ldnf1_vnum_f64_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ldnf1d z0\.d, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_f64_x1, svfloat64_t, float64_t, ++ z0 = svldnf1_vnum_f64 (p0, x0, x1), ++ z0 = svldnf1_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s16.c +new file mode 100644 +index 000000000..592c8237d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s16.c +@@ -0,0 +1,154 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldnf1_s16_base: ++** ldnf1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_s16_base, svint16_t, int16_t, ++ z0 = svldnf1_s16 (p0, x0), ++ z0 = svldnf1 (p0, x0)) ++ ++/* ++** ldnf1_s16_index: ++** add (x[0-9]+), x0, x1, lsl 1 ++** ldnf1h z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ldnf1_s16_index, svint16_t, int16_t, ++ z0 = svldnf1_s16 (p0, x0 + x1), ++ z0 = svldnf1 (p0, x0 + x1)) ++ ++/* ++** ldnf1_s16_1: ++** ldnf1h z0\.h, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_s16_1, svint16_t, int16_t, ++ z0 = svldnf1_s16 (p0, x0 + svcnth ()), ++ z0 = svldnf1 (p0, x0 + svcnth ())) ++ ++/* ++** ldnf1_s16_7: ++** ldnf1h z0\.h, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_s16_7, svint16_t, int16_t, ++ z0 = svldnf1_s16 (p0, x0 + svcnth () * 7), ++ z0 = svldnf1 (p0, x0 + svcnth () * 7)) ++ ++/* ++** ldnf1_s16_8: ++** incb x0, all, mul #8 ++** ldnf1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_s16_8, svint16_t, int16_t, ++ z0 = svldnf1_s16 (p0, x0 + svcnth () * 8), ++ z0 = svldnf1 (p0, x0 + svcnth () * 8)) ++ ++/* ++** ldnf1_s16_m1: ++** ldnf1h z0\.h, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_s16_m1, svint16_t, int16_t, ++ z0 = svldnf1_s16 (p0, x0 - svcnth ()), ++ z0 = svldnf1 (p0, x0 - svcnth ())) ++ ++/* ++** ldnf1_s16_m8: ++** ldnf1h z0\.h, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_s16_m8, svint16_t, int16_t, ++ z0 = svldnf1_s16 (p0, x0 - svcnth () * 8), ++ z0 = svldnf1 (p0, x0 - svcnth () * 8)) ++ ++/* ++** ldnf1_s16_m9: ++** decb x0, all, mul #9 ++** ldnf1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_s16_m9, svint16_t, int16_t, ++ z0 = svldnf1_s16 (p0, x0 - svcnth () * 9), ++ z0 = svldnf1 (p0, x0 - svcnth () * 9)) ++ ++/* ++** ldnf1_vnum_s16_0: ++** ldnf1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_s16_0, svint16_t, int16_t, ++ z0 = svldnf1_vnum_s16 (p0, x0, 0), ++ z0 = svldnf1_vnum (p0, x0, 0)) ++ ++/* ++** ldnf1_vnum_s16_1: ++** ldnf1h z0\.h, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_s16_1, svint16_t, int16_t, ++ z0 = svldnf1_vnum_s16 (p0, x0, 1), ++ z0 = svldnf1_vnum (p0, x0, 1)) ++ ++/* ++** ldnf1_vnum_s16_7: ++** ldnf1h z0\.h, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_s16_7, svint16_t, int16_t, ++ z0 = svldnf1_vnum_s16 (p0, x0, 7), ++ z0 = svldnf1_vnum (p0, x0, 7)) ++ ++/* ++** ldnf1_vnum_s16_8: ++** incb x0, all, mul #8 ++** ldnf1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_s16_8, svint16_t, int16_t, ++ z0 = svldnf1_vnum_s16 (p0, x0, 8), ++ z0 = svldnf1_vnum (p0, x0, 8)) ++ ++/* ++** ldnf1_vnum_s16_m1: ++** ldnf1h z0\.h, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_s16_m1, svint16_t, int16_t, ++ z0 = svldnf1_vnum_s16 (p0, x0, -1), ++ z0 = svldnf1_vnum (p0, x0, -1)) ++ ++/* ++** ldnf1_vnum_s16_m8: ++** ldnf1h z0\.h, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_s16_m8, svint16_t, int16_t, ++ z0 = svldnf1_vnum_s16 (p0, x0, -8), ++ z0 = svldnf1_vnum (p0, x0, -8)) ++ ++/* ++** ldnf1_vnum_s16_m9: ++** decb x0, all, mul #9 ++** ldnf1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_s16_m9, svint16_t, int16_t, ++ z0 = svldnf1_vnum_s16 (p0, x0, -9), ++ z0 = svldnf1_vnum (p0, x0, -9)) ++ ++/* ++** ldnf1_vnum_s16_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ldnf1h z0\.h, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_s16_x1, svint16_t, int16_t, ++ z0 = svldnf1_vnum_s16 (p0, x0, x1), ++ z0 = svldnf1_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s32.c +new file mode 100644 +index 000000000..634092af8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s32.c +@@ -0,0 +1,154 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldnf1_s32_base: ++** ldnf1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_s32_base, svint32_t, int32_t, ++ z0 = svldnf1_s32 (p0, x0), ++ z0 = svldnf1 (p0, x0)) ++ ++/* ++** ldnf1_s32_index: ++** add (x[0-9]+), x0, x1, lsl 2 ++** ldnf1w z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ldnf1_s32_index, svint32_t, int32_t, ++ z0 = svldnf1_s32 (p0, x0 + x1), ++ z0 = svldnf1 (p0, x0 + x1)) ++ ++/* ++** ldnf1_s32_1: ++** ldnf1w z0\.s, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_s32_1, svint32_t, int32_t, ++ z0 = svldnf1_s32 (p0, x0 + svcntw ()), ++ z0 = svldnf1 (p0, x0 + svcntw ())) ++ ++/* ++** ldnf1_s32_7: ++** ldnf1w z0\.s, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_s32_7, svint32_t, int32_t, ++ z0 = svldnf1_s32 (p0, x0 + svcntw () * 7), ++ z0 = svldnf1 (p0, x0 + svcntw () * 7)) ++ ++/* ++** ldnf1_s32_8: ++** incb x0, all, mul #8 ++** ldnf1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_s32_8, svint32_t, int32_t, ++ z0 = svldnf1_s32 (p0, x0 + svcntw () * 8), ++ z0 = svldnf1 (p0, x0 + svcntw () * 8)) ++ ++/* ++** ldnf1_s32_m1: ++** ldnf1w z0\.s, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_s32_m1, svint32_t, int32_t, ++ z0 = svldnf1_s32 (p0, x0 - svcntw ()), ++ z0 = svldnf1 (p0, x0 - svcntw ())) ++ ++/* ++** ldnf1_s32_m8: ++** ldnf1w z0\.s, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_s32_m8, svint32_t, int32_t, ++ z0 = svldnf1_s32 (p0, x0 - svcntw () * 8), ++ z0 = svldnf1 (p0, x0 - svcntw () * 8)) ++ ++/* ++** ldnf1_s32_m9: ++** decb x0, all, mul #9 ++** ldnf1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_s32_m9, svint32_t, int32_t, ++ z0 = svldnf1_s32 (p0, x0 - svcntw () * 9), ++ z0 = svldnf1 (p0, x0 - svcntw () * 9)) ++ ++/* ++** ldnf1_vnum_s32_0: ++** ldnf1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_s32_0, svint32_t, int32_t, ++ z0 = svldnf1_vnum_s32 (p0, x0, 0), ++ z0 = svldnf1_vnum (p0, x0, 0)) ++ ++/* ++** ldnf1_vnum_s32_1: ++** ldnf1w z0\.s, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_s32_1, svint32_t, int32_t, ++ z0 = svldnf1_vnum_s32 (p0, x0, 1), ++ z0 = svldnf1_vnum (p0, x0, 1)) ++ ++/* ++** ldnf1_vnum_s32_7: ++** ldnf1w z0\.s, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_s32_7, svint32_t, int32_t, ++ z0 = svldnf1_vnum_s32 (p0, x0, 7), ++ z0 = svldnf1_vnum (p0, x0, 7)) ++ ++/* ++** ldnf1_vnum_s32_8: ++** incb x0, all, mul #8 ++** ldnf1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_s32_8, svint32_t, int32_t, ++ z0 = svldnf1_vnum_s32 (p0, x0, 8), ++ z0 = svldnf1_vnum (p0, x0, 8)) ++ ++/* ++** ldnf1_vnum_s32_m1: ++** ldnf1w z0\.s, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_s32_m1, svint32_t, int32_t, ++ z0 = svldnf1_vnum_s32 (p0, x0, -1), ++ z0 = svldnf1_vnum (p0, x0, -1)) ++ ++/* ++** ldnf1_vnum_s32_m8: ++** ldnf1w z0\.s, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_s32_m8, svint32_t, int32_t, ++ z0 = svldnf1_vnum_s32 (p0, x0, -8), ++ z0 = svldnf1_vnum (p0, x0, -8)) ++ ++/* ++** ldnf1_vnum_s32_m9: ++** decb x0, all, mul #9 ++** ldnf1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_s32_m9, svint32_t, int32_t, ++ z0 = svldnf1_vnum_s32 (p0, x0, -9), ++ z0 = svldnf1_vnum (p0, x0, -9)) ++ ++/* ++** ldnf1_vnum_s32_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ldnf1w z0\.s, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_s32_x1, svint32_t, int32_t, ++ z0 = svldnf1_vnum_s32 (p0, x0, x1), ++ z0 = svldnf1_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s64.c +new file mode 100644 +index 000000000..4a03f6676 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s64.c +@@ -0,0 +1,154 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldnf1_s64_base: ++** ldnf1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_s64_base, svint64_t, int64_t, ++ z0 = svldnf1_s64 (p0, x0), ++ z0 = svldnf1 (p0, x0)) ++ ++/* ++** ldnf1_s64_index: ++** add (x[0-9]+), x0, x1, lsl 3 ++** ldnf1d z0\.d, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ldnf1_s64_index, svint64_t, int64_t, ++ z0 = svldnf1_s64 (p0, x0 + x1), ++ z0 = svldnf1 (p0, x0 + x1)) ++ ++/* ++** ldnf1_s64_1: ++** ldnf1d z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_s64_1, svint64_t, int64_t, ++ z0 = svldnf1_s64 (p0, x0 + svcntd ()), ++ z0 = svldnf1 (p0, x0 + svcntd ())) ++ ++/* ++** ldnf1_s64_7: ++** ldnf1d z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_s64_7, svint64_t, int64_t, ++ z0 = svldnf1_s64 (p0, x0 + svcntd () * 7), ++ z0 = svldnf1 (p0, x0 + svcntd () * 7)) ++ ++/* ++** ldnf1_s64_8: ++** incb x0, all, mul #8 ++** ldnf1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_s64_8, svint64_t, int64_t, ++ z0 = svldnf1_s64 (p0, x0 + svcntd () * 8), ++ z0 = svldnf1 (p0, x0 + svcntd () * 8)) ++ ++/* ++** ldnf1_s64_m1: ++** ldnf1d z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_s64_m1, svint64_t, int64_t, ++ z0 = svldnf1_s64 (p0, x0 - svcntd ()), ++ z0 = svldnf1 (p0, x0 - svcntd ())) ++ ++/* ++** ldnf1_s64_m8: ++** ldnf1d z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_s64_m8, svint64_t, int64_t, ++ z0 = svldnf1_s64 (p0, x0 - svcntd () * 8), ++ z0 = svldnf1 (p0, x0 - svcntd () * 8)) ++ ++/* ++** ldnf1_s64_m9: ++** decb x0, all, mul #9 ++** ldnf1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_s64_m9, svint64_t, int64_t, ++ z0 = svldnf1_s64 (p0, x0 - svcntd () * 9), ++ z0 = svldnf1 (p0, x0 - svcntd () * 9)) ++ ++/* ++** ldnf1_vnum_s64_0: ++** ldnf1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_s64_0, svint64_t, int64_t, ++ z0 = svldnf1_vnum_s64 (p0, x0, 0), ++ z0 = svldnf1_vnum (p0, x0, 0)) ++ ++/* ++** ldnf1_vnum_s64_1: ++** ldnf1d z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_s64_1, svint64_t, int64_t, ++ z0 = svldnf1_vnum_s64 (p0, x0, 1), ++ z0 = svldnf1_vnum (p0, x0, 1)) ++ ++/* ++** ldnf1_vnum_s64_7: ++** ldnf1d z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_s64_7, svint64_t, int64_t, ++ z0 = svldnf1_vnum_s64 (p0, x0, 7), ++ z0 = svldnf1_vnum (p0, x0, 7)) ++ ++/* ++** ldnf1_vnum_s64_8: ++** incb x0, all, mul #8 ++** ldnf1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_s64_8, svint64_t, int64_t, ++ z0 = svldnf1_vnum_s64 (p0, x0, 8), ++ z0 = svldnf1_vnum (p0, x0, 8)) ++ ++/* ++** ldnf1_vnum_s64_m1: ++** ldnf1d z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_s64_m1, svint64_t, int64_t, ++ z0 = svldnf1_vnum_s64 (p0, x0, -1), ++ z0 = svldnf1_vnum (p0, x0, -1)) ++ ++/* ++** ldnf1_vnum_s64_m8: ++** ldnf1d z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_s64_m8, svint64_t, int64_t, ++ z0 = svldnf1_vnum_s64 (p0, x0, -8), ++ z0 = svldnf1_vnum (p0, x0, -8)) ++ ++/* ++** ldnf1_vnum_s64_m9: ++** decb x0, all, mul #9 ++** ldnf1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_s64_m9, svint64_t, int64_t, ++ z0 = svldnf1_vnum_s64 (p0, x0, -9), ++ z0 = svldnf1_vnum (p0, x0, -9)) ++ ++/* ++** ldnf1_vnum_s64_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ldnf1d z0\.d, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_s64_x1, svint64_t, int64_t, ++ z0 = svldnf1_vnum_s64 (p0, x0, x1), ++ z0 = svldnf1_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s8.c +new file mode 100644 +index 000000000..162ee176a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s8.c +@@ -0,0 +1,154 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldnf1_s8_base: ++** ldnf1b z0\.b, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_s8_base, svint8_t, int8_t, ++ z0 = svldnf1_s8 (p0, x0), ++ z0 = svldnf1 (p0, x0)) ++ ++/* ++** ldnf1_s8_index: ++** add (x[0-9]+), x0, x1 ++** ldnf1b z0\.b, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ldnf1_s8_index, svint8_t, int8_t, ++ z0 = svldnf1_s8 (p0, x0 + x1), ++ z0 = svldnf1 (p0, x0 + x1)) ++ ++/* ++** ldnf1_s8_1: ++** ldnf1b z0\.b, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_s8_1, svint8_t, int8_t, ++ z0 = svldnf1_s8 (p0, x0 + svcntb ()), ++ z0 = svldnf1 (p0, x0 + svcntb ())) ++ ++/* ++** ldnf1_s8_7: ++** ldnf1b z0\.b, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_s8_7, svint8_t, int8_t, ++ z0 = svldnf1_s8 (p0, x0 + svcntb () * 7), ++ z0 = svldnf1 (p0, x0 + svcntb () * 7)) ++ ++/* ++** ldnf1_s8_8: ++** incb x0, all, mul #8 ++** ldnf1b z0\.b, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_s8_8, svint8_t, int8_t, ++ z0 = svldnf1_s8 (p0, x0 + svcntb () * 8), ++ z0 = svldnf1 (p0, x0 + svcntb () * 8)) ++ ++/* ++** ldnf1_s8_m1: ++** ldnf1b z0\.b, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_s8_m1, svint8_t, int8_t, ++ z0 = svldnf1_s8 (p0, x0 - svcntb ()), ++ z0 = svldnf1 (p0, x0 - svcntb ())) ++ ++/* ++** ldnf1_s8_m8: ++** ldnf1b z0\.b, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_s8_m8, svint8_t, int8_t, ++ z0 = svldnf1_s8 (p0, x0 - svcntb () * 8), ++ z0 = svldnf1 (p0, x0 - svcntb () * 8)) ++ ++/* ++** ldnf1_s8_m9: ++** decb x0, all, mul #9 ++** ldnf1b z0\.b, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_s8_m9, svint8_t, int8_t, ++ z0 = svldnf1_s8 (p0, x0 - svcntb () * 9), ++ z0 = svldnf1 (p0, x0 - svcntb () * 9)) ++ ++/* ++** ldnf1_vnum_s8_0: ++** ldnf1b z0\.b, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_s8_0, svint8_t, int8_t, ++ z0 = svldnf1_vnum_s8 (p0, x0, 0), ++ z0 = svldnf1_vnum (p0, x0, 0)) ++ ++/* ++** ldnf1_vnum_s8_1: ++** ldnf1b z0\.b, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_s8_1, svint8_t, int8_t, ++ z0 = svldnf1_vnum_s8 (p0, x0, 1), ++ z0 = svldnf1_vnum (p0, x0, 1)) ++ ++/* ++** ldnf1_vnum_s8_7: ++** ldnf1b z0\.b, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_s8_7, svint8_t, int8_t, ++ z0 = svldnf1_vnum_s8 (p0, x0, 7), ++ z0 = svldnf1_vnum (p0, x0, 7)) ++ ++/* ++** ldnf1_vnum_s8_8: ++** incb x0, all, mul #8 ++** ldnf1b z0\.b, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_s8_8, svint8_t, int8_t, ++ z0 = svldnf1_vnum_s8 (p0, x0, 8), ++ z0 = svldnf1_vnum (p0, x0, 8)) ++ ++/* ++** ldnf1_vnum_s8_m1: ++** ldnf1b z0\.b, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_s8_m1, svint8_t, int8_t, ++ z0 = svldnf1_vnum_s8 (p0, x0, -1), ++ z0 = svldnf1_vnum (p0, x0, -1)) ++ ++/* ++** ldnf1_vnum_s8_m8: ++** ldnf1b z0\.b, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_s8_m8, svint8_t, int8_t, ++ z0 = svldnf1_vnum_s8 (p0, x0, -8), ++ z0 = svldnf1_vnum (p0, x0, -8)) ++ ++/* ++** ldnf1_vnum_s8_m9: ++** decb x0, all, mul #9 ++** ldnf1b z0\.b, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_s8_m9, svint8_t, int8_t, ++ z0 = svldnf1_vnum_s8 (p0, x0, -9), ++ z0 = svldnf1_vnum (p0, x0, -9)) ++ ++/* ++** ldnf1_vnum_s8_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ldnf1b z0\.b, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_s8_x1, svint8_t, int8_t, ++ z0 = svldnf1_vnum_s8 (p0, x0, x1), ++ z0 = svldnf1_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u16.c +new file mode 100644 +index 000000000..e920ac43b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u16.c +@@ -0,0 +1,154 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldnf1_u16_base: ++** ldnf1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_u16_base, svuint16_t, uint16_t, ++ z0 = svldnf1_u16 (p0, x0), ++ z0 = svldnf1 (p0, x0)) ++ ++/* ++** ldnf1_u16_index: ++** add (x[0-9]+), x0, x1, lsl 1 ++** ldnf1h z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ldnf1_u16_index, svuint16_t, uint16_t, ++ z0 = svldnf1_u16 (p0, x0 + x1), ++ z0 = svldnf1 (p0, x0 + x1)) ++ ++/* ++** ldnf1_u16_1: ++** ldnf1h z0\.h, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_u16_1, svuint16_t, uint16_t, ++ z0 = svldnf1_u16 (p0, x0 + svcnth ()), ++ z0 = svldnf1 (p0, x0 + svcnth ())) ++ ++/* ++** ldnf1_u16_7: ++** ldnf1h z0\.h, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_u16_7, svuint16_t, uint16_t, ++ z0 = svldnf1_u16 (p0, x0 + svcnth () * 7), ++ z0 = svldnf1 (p0, x0 + svcnth () * 7)) ++ ++/* ++** ldnf1_u16_8: ++** incb x0, all, mul #8 ++** ldnf1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_u16_8, svuint16_t, uint16_t, ++ z0 = svldnf1_u16 (p0, x0 + svcnth () * 8), ++ z0 = svldnf1 (p0, x0 + svcnth () * 8)) ++ ++/* ++** ldnf1_u16_m1: ++** ldnf1h z0\.h, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_u16_m1, svuint16_t, uint16_t, ++ z0 = svldnf1_u16 (p0, x0 - svcnth ()), ++ z0 = svldnf1 (p0, x0 - svcnth ())) ++ ++/* ++** ldnf1_u16_m8: ++** ldnf1h z0\.h, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_u16_m8, svuint16_t, uint16_t, ++ z0 = svldnf1_u16 (p0, x0 - svcnth () * 8), ++ z0 = svldnf1 (p0, x0 - svcnth () * 8)) ++ ++/* ++** ldnf1_u16_m9: ++** decb x0, all, mul #9 ++** ldnf1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_u16_m9, svuint16_t, uint16_t, ++ z0 = svldnf1_u16 (p0, x0 - svcnth () * 9), ++ z0 = svldnf1 (p0, x0 - svcnth () * 9)) ++ ++/* ++** ldnf1_vnum_u16_0: ++** ldnf1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_u16_0, svuint16_t, uint16_t, ++ z0 = svldnf1_vnum_u16 (p0, x0, 0), ++ z0 = svldnf1_vnum (p0, x0, 0)) ++ ++/* ++** ldnf1_vnum_u16_1: ++** ldnf1h z0\.h, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_u16_1, svuint16_t, uint16_t, ++ z0 = svldnf1_vnum_u16 (p0, x0, 1), ++ z0 = svldnf1_vnum (p0, x0, 1)) ++ ++/* ++** ldnf1_vnum_u16_7: ++** ldnf1h z0\.h, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_u16_7, svuint16_t, uint16_t, ++ z0 = svldnf1_vnum_u16 (p0, x0, 7), ++ z0 = svldnf1_vnum (p0, x0, 7)) ++ ++/* ++** ldnf1_vnum_u16_8: ++** incb x0, all, mul #8 ++** ldnf1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_u16_8, svuint16_t, uint16_t, ++ z0 = svldnf1_vnum_u16 (p0, x0, 8), ++ z0 = svldnf1_vnum (p0, x0, 8)) ++ ++/* ++** ldnf1_vnum_u16_m1: ++** ldnf1h z0\.h, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_u16_m1, svuint16_t, uint16_t, ++ z0 = svldnf1_vnum_u16 (p0, x0, -1), ++ z0 = svldnf1_vnum (p0, x0, -1)) ++ ++/* ++** ldnf1_vnum_u16_m8: ++** ldnf1h z0\.h, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_u16_m8, svuint16_t, uint16_t, ++ z0 = svldnf1_vnum_u16 (p0, x0, -8), ++ z0 = svldnf1_vnum (p0, x0, -8)) ++ ++/* ++** ldnf1_vnum_u16_m9: ++** decb x0, all, mul #9 ++** ldnf1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_u16_m9, svuint16_t, uint16_t, ++ z0 = svldnf1_vnum_u16 (p0, x0, -9), ++ z0 = svldnf1_vnum (p0, x0, -9)) ++ ++/* ++** ldnf1_vnum_u16_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ldnf1h z0\.h, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_u16_x1, svuint16_t, uint16_t, ++ z0 = svldnf1_vnum_u16 (p0, x0, x1), ++ z0 = svldnf1_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u32.c +new file mode 100644 +index 000000000..65e28c5c2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u32.c +@@ -0,0 +1,154 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldnf1_u32_base: ++** ldnf1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_u32_base, svuint32_t, uint32_t, ++ z0 = svldnf1_u32 (p0, x0), ++ z0 = svldnf1 (p0, x0)) ++ ++/* ++** ldnf1_u32_index: ++** add (x[0-9]+), x0, x1, lsl 2 ++** ldnf1w z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ldnf1_u32_index, svuint32_t, uint32_t, ++ z0 = svldnf1_u32 (p0, x0 + x1), ++ z0 = svldnf1 (p0, x0 + x1)) ++ ++/* ++** ldnf1_u32_1: ++** ldnf1w z0\.s, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_u32_1, svuint32_t, uint32_t, ++ z0 = svldnf1_u32 (p0, x0 + svcntw ()), ++ z0 = svldnf1 (p0, x0 + svcntw ())) ++ ++/* ++** ldnf1_u32_7: ++** ldnf1w z0\.s, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_u32_7, svuint32_t, uint32_t, ++ z0 = svldnf1_u32 (p0, x0 + svcntw () * 7), ++ z0 = svldnf1 (p0, x0 + svcntw () * 7)) ++ ++/* ++** ldnf1_u32_8: ++** incb x0, all, mul #8 ++** ldnf1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_u32_8, svuint32_t, uint32_t, ++ z0 = svldnf1_u32 (p0, x0 + svcntw () * 8), ++ z0 = svldnf1 (p0, x0 + svcntw () * 8)) ++ ++/* ++** ldnf1_u32_m1: ++** ldnf1w z0\.s, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_u32_m1, svuint32_t, uint32_t, ++ z0 = svldnf1_u32 (p0, x0 - svcntw ()), ++ z0 = svldnf1 (p0, x0 - svcntw ())) ++ ++/* ++** ldnf1_u32_m8: ++** ldnf1w z0\.s, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_u32_m8, svuint32_t, uint32_t, ++ z0 = svldnf1_u32 (p0, x0 - svcntw () * 8), ++ z0 = svldnf1 (p0, x0 - svcntw () * 8)) ++ ++/* ++** ldnf1_u32_m9: ++** decb x0, all, mul #9 ++** ldnf1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_u32_m9, svuint32_t, uint32_t, ++ z0 = svldnf1_u32 (p0, x0 - svcntw () * 9), ++ z0 = svldnf1 (p0, x0 - svcntw () * 9)) ++ ++/* ++** ldnf1_vnum_u32_0: ++** ldnf1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_u32_0, svuint32_t, uint32_t, ++ z0 = svldnf1_vnum_u32 (p0, x0, 0), ++ z0 = svldnf1_vnum (p0, x0, 0)) ++ ++/* ++** ldnf1_vnum_u32_1: ++** ldnf1w z0\.s, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_u32_1, svuint32_t, uint32_t, ++ z0 = svldnf1_vnum_u32 (p0, x0, 1), ++ z0 = svldnf1_vnum (p0, x0, 1)) ++ ++/* ++** ldnf1_vnum_u32_7: ++** ldnf1w z0\.s, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_u32_7, svuint32_t, uint32_t, ++ z0 = svldnf1_vnum_u32 (p0, x0, 7), ++ z0 = svldnf1_vnum (p0, x0, 7)) ++ ++/* ++** ldnf1_vnum_u32_8: ++** incb x0, all, mul #8 ++** ldnf1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_u32_8, svuint32_t, uint32_t, ++ z0 = svldnf1_vnum_u32 (p0, x0, 8), ++ z0 = svldnf1_vnum (p0, x0, 8)) ++ ++/* ++** ldnf1_vnum_u32_m1: ++** ldnf1w z0\.s, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_u32_m1, svuint32_t, uint32_t, ++ z0 = svldnf1_vnum_u32 (p0, x0, -1), ++ z0 = svldnf1_vnum (p0, x0, -1)) ++ ++/* ++** ldnf1_vnum_u32_m8: ++** ldnf1w z0\.s, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_u32_m8, svuint32_t, uint32_t, ++ z0 = svldnf1_vnum_u32 (p0, x0, -8), ++ z0 = svldnf1_vnum (p0, x0, -8)) ++ ++/* ++** ldnf1_vnum_u32_m9: ++** decb x0, all, mul #9 ++** ldnf1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_u32_m9, svuint32_t, uint32_t, ++ z0 = svldnf1_vnum_u32 (p0, x0, -9), ++ z0 = svldnf1_vnum (p0, x0, -9)) ++ ++/* ++** ldnf1_vnum_u32_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ldnf1w z0\.s, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_u32_x1, svuint32_t, uint32_t, ++ z0 = svldnf1_vnum_u32 (p0, x0, x1), ++ z0 = svldnf1_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u64.c +new file mode 100644 +index 000000000..70d3f27d8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u64.c +@@ -0,0 +1,154 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldnf1_u64_base: ++** ldnf1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_u64_base, svuint64_t, uint64_t, ++ z0 = svldnf1_u64 (p0, x0), ++ z0 = svldnf1 (p0, x0)) ++ ++/* ++** ldnf1_u64_index: ++** add (x[0-9]+), x0, x1, lsl 3 ++** ldnf1d z0\.d, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ldnf1_u64_index, svuint64_t, uint64_t, ++ z0 = svldnf1_u64 (p0, x0 + x1), ++ z0 = svldnf1 (p0, x0 + x1)) ++ ++/* ++** ldnf1_u64_1: ++** ldnf1d z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_u64_1, svuint64_t, uint64_t, ++ z0 = svldnf1_u64 (p0, x0 + svcntd ()), ++ z0 = svldnf1 (p0, x0 + svcntd ())) ++ ++/* ++** ldnf1_u64_7: ++** ldnf1d z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_u64_7, svuint64_t, uint64_t, ++ z0 = svldnf1_u64 (p0, x0 + svcntd () * 7), ++ z0 = svldnf1 (p0, x0 + svcntd () * 7)) ++ ++/* ++** ldnf1_u64_8: ++** incb x0, all, mul #8 ++** ldnf1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_u64_8, svuint64_t, uint64_t, ++ z0 = svldnf1_u64 (p0, x0 + svcntd () * 8), ++ z0 = svldnf1 (p0, x0 + svcntd () * 8)) ++ ++/* ++** ldnf1_u64_m1: ++** ldnf1d z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_u64_m1, svuint64_t, uint64_t, ++ z0 = svldnf1_u64 (p0, x0 - svcntd ()), ++ z0 = svldnf1 (p0, x0 - svcntd ())) ++ ++/* ++** ldnf1_u64_m8: ++** ldnf1d z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_u64_m8, svuint64_t, uint64_t, ++ z0 = svldnf1_u64 (p0, x0 - svcntd () * 8), ++ z0 = svldnf1 (p0, x0 - svcntd () * 8)) ++ ++/* ++** ldnf1_u64_m9: ++** decb x0, all, mul #9 ++** ldnf1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_u64_m9, svuint64_t, uint64_t, ++ z0 = svldnf1_u64 (p0, x0 - svcntd () * 9), ++ z0 = svldnf1 (p0, x0 - svcntd () * 9)) ++ ++/* ++** ldnf1_vnum_u64_0: ++** ldnf1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_u64_0, svuint64_t, uint64_t, ++ z0 = svldnf1_vnum_u64 (p0, x0, 0), ++ z0 = svldnf1_vnum (p0, x0, 0)) ++ ++/* ++** ldnf1_vnum_u64_1: ++** ldnf1d z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_u64_1, svuint64_t, uint64_t, ++ z0 = svldnf1_vnum_u64 (p0, x0, 1), ++ z0 = svldnf1_vnum (p0, x0, 1)) ++ ++/* ++** ldnf1_vnum_u64_7: ++** ldnf1d z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_u64_7, svuint64_t, uint64_t, ++ z0 = svldnf1_vnum_u64 (p0, x0, 7), ++ z0 = svldnf1_vnum (p0, x0, 7)) ++ ++/* ++** ldnf1_vnum_u64_8: ++** incb x0, all, mul #8 ++** ldnf1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_u64_8, svuint64_t, uint64_t, ++ z0 = svldnf1_vnum_u64 (p0, x0, 8), ++ z0 = svldnf1_vnum (p0, x0, 8)) ++ ++/* ++** ldnf1_vnum_u64_m1: ++** ldnf1d z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_u64_m1, svuint64_t, uint64_t, ++ z0 = svldnf1_vnum_u64 (p0, x0, -1), ++ z0 = svldnf1_vnum (p0, x0, -1)) ++ ++/* ++** ldnf1_vnum_u64_m8: ++** ldnf1d z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_u64_m8, svuint64_t, uint64_t, ++ z0 = svldnf1_vnum_u64 (p0, x0, -8), ++ z0 = svldnf1_vnum (p0, x0, -8)) ++ ++/* ++** ldnf1_vnum_u64_m9: ++** decb x0, all, mul #9 ++** ldnf1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_u64_m9, svuint64_t, uint64_t, ++ z0 = svldnf1_vnum_u64 (p0, x0, -9), ++ z0 = svldnf1_vnum (p0, x0, -9)) ++ ++/* ++** ldnf1_vnum_u64_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ldnf1d z0\.d, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_u64_x1, svuint64_t, uint64_t, ++ z0 = svldnf1_vnum_u64 (p0, x0, x1), ++ z0 = svldnf1_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u8.c +new file mode 100644 +index 000000000..5c29f1d19 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u8.c +@@ -0,0 +1,154 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldnf1_u8_base: ++** ldnf1b z0\.b, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_u8_base, svuint8_t, uint8_t, ++ z0 = svldnf1_u8 (p0, x0), ++ z0 = svldnf1 (p0, x0)) ++ ++/* ++** ldnf1_u8_index: ++** add (x[0-9]+), x0, x1 ++** ldnf1b z0\.b, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ldnf1_u8_index, svuint8_t, uint8_t, ++ z0 = svldnf1_u8 (p0, x0 + x1), ++ z0 = svldnf1 (p0, x0 + x1)) ++ ++/* ++** ldnf1_u8_1: ++** ldnf1b z0\.b, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_u8_1, svuint8_t, uint8_t, ++ z0 = svldnf1_u8 (p0, x0 + svcntb ()), ++ z0 = svldnf1 (p0, x0 + svcntb ())) ++ ++/* ++** ldnf1_u8_7: ++** ldnf1b z0\.b, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_u8_7, svuint8_t, uint8_t, ++ z0 = svldnf1_u8 (p0, x0 + svcntb () * 7), ++ z0 = svldnf1 (p0, x0 + svcntb () * 7)) ++ ++/* ++** ldnf1_u8_8: ++** incb x0, all, mul #8 ++** ldnf1b z0\.b, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_u8_8, svuint8_t, uint8_t, ++ z0 = svldnf1_u8 (p0, x0 + svcntb () * 8), ++ z0 = svldnf1 (p0, x0 + svcntb () * 8)) ++ ++/* ++** ldnf1_u8_m1: ++** ldnf1b z0\.b, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_u8_m1, svuint8_t, uint8_t, ++ z0 = svldnf1_u8 (p0, x0 - svcntb ()), ++ z0 = svldnf1 (p0, x0 - svcntb ())) ++ ++/* ++** ldnf1_u8_m8: ++** ldnf1b z0\.b, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_u8_m8, svuint8_t, uint8_t, ++ z0 = svldnf1_u8 (p0, x0 - svcntb () * 8), ++ z0 = svldnf1 (p0, x0 - svcntb () * 8)) ++ ++/* ++** ldnf1_u8_m9: ++** decb x0, all, mul #9 ++** ldnf1b z0\.b, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_u8_m9, svuint8_t, uint8_t, ++ z0 = svldnf1_u8 (p0, x0 - svcntb () * 9), ++ z0 = svldnf1 (p0, x0 - svcntb () * 9)) ++ ++/* ++** ldnf1_vnum_u8_0: ++** ldnf1b z0\.b, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_u8_0, svuint8_t, uint8_t, ++ z0 = svldnf1_vnum_u8 (p0, x0, 0), ++ z0 = svldnf1_vnum (p0, x0, 0)) ++ ++/* ++** ldnf1_vnum_u8_1: ++** ldnf1b z0\.b, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_u8_1, svuint8_t, uint8_t, ++ z0 = svldnf1_vnum_u8 (p0, x0, 1), ++ z0 = svldnf1_vnum (p0, x0, 1)) ++ ++/* ++** ldnf1_vnum_u8_7: ++** ldnf1b z0\.b, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_u8_7, svuint8_t, uint8_t, ++ z0 = svldnf1_vnum_u8 (p0, x0, 7), ++ z0 = svldnf1_vnum (p0, x0, 7)) ++ ++/* ++** ldnf1_vnum_u8_8: ++** incb x0, all, mul #8 ++** ldnf1b z0\.b, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_u8_8, svuint8_t, uint8_t, ++ z0 = svldnf1_vnum_u8 (p0, x0, 8), ++ z0 = svldnf1_vnum (p0, x0, 8)) ++ ++/* ++** ldnf1_vnum_u8_m1: ++** ldnf1b z0\.b, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_u8_m1, svuint8_t, uint8_t, ++ z0 = svldnf1_vnum_u8 (p0, x0, -1), ++ z0 = svldnf1_vnum (p0, x0, -1)) ++ ++/* ++** ldnf1_vnum_u8_m8: ++** ldnf1b z0\.b, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_u8_m8, svuint8_t, uint8_t, ++ z0 = svldnf1_vnum_u8 (p0, x0, -8), ++ z0 = svldnf1_vnum (p0, x0, -8)) ++ ++/* ++** ldnf1_vnum_u8_m9: ++** decb x0, all, mul #9 ++** ldnf1b z0\.b, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_u8_m9, svuint8_t, uint8_t, ++ z0 = svldnf1_vnum_u8 (p0, x0, -9), ++ z0 = svldnf1_vnum (p0, x0, -9)) ++ ++/* ++** ldnf1_vnum_u8_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ldnf1b z0\.b, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldnf1_vnum_u8_x1, svuint8_t, uint8_t, ++ z0 = svldnf1_vnum_u8 (p0, x0, x1), ++ z0 = svldnf1_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_s16.c +new file mode 100644 +index 000000000..e04b9a788 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_s16.c +@@ -0,0 +1,154 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldnf1sb_s16_base: ++** ldnf1sb z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_s16_base, svint16_t, int8_t, ++ z0 = svldnf1sb_s16 (p0, x0), ++ z0 = svldnf1sb_s16 (p0, x0)) ++ ++/* ++** ldnf1sb_s16_index: ++** add (x[0-9]+), x0, x1 ++** ldnf1sb z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_s16_index, svint16_t, int8_t, ++ z0 = svldnf1sb_s16 (p0, x0 + x1), ++ z0 = svldnf1sb_s16 (p0, x0 + x1)) ++ ++/* ++** ldnf1sb_s16_1: ++** ldnf1sb z0\.h, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_s16_1, svint16_t, int8_t, ++ z0 = svldnf1sb_s16 (p0, x0 + svcnth ()), ++ z0 = svldnf1sb_s16 (p0, x0 + svcnth ())) ++ ++/* ++** ldnf1sb_s16_7: ++** ldnf1sb z0\.h, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_s16_7, svint16_t, int8_t, ++ z0 = svldnf1sb_s16 (p0, x0 + svcnth () * 7), ++ z0 = svldnf1sb_s16 (p0, x0 + svcnth () * 7)) ++ ++/* ++** ldnf1sb_s16_8: ++** incb x0, all, mul #4 ++** ldnf1sb z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_s16_8, svint16_t, int8_t, ++ z0 = svldnf1sb_s16 (p0, x0 + svcnth () * 8), ++ z0 = svldnf1sb_s16 (p0, x0 + svcnth () * 8)) ++ ++/* ++** ldnf1sb_s16_m1: ++** ldnf1sb z0\.h, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_s16_m1, svint16_t, int8_t, ++ z0 = svldnf1sb_s16 (p0, x0 - svcnth ()), ++ z0 = svldnf1sb_s16 (p0, x0 - svcnth ())) ++ ++/* ++** ldnf1sb_s16_m8: ++** ldnf1sb z0\.h, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_s16_m8, svint16_t, int8_t, ++ z0 = svldnf1sb_s16 (p0, x0 - svcnth () * 8), ++ z0 = svldnf1sb_s16 (p0, x0 - svcnth () * 8)) ++ ++/* ++** ldnf1sb_s16_m9: ++** dech x0, all, mul #9 ++** ldnf1sb z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_s16_m9, svint16_t, int8_t, ++ z0 = svldnf1sb_s16 (p0, x0 - svcnth () * 9), ++ z0 = svldnf1sb_s16 (p0, x0 - svcnth () * 9)) ++ ++/* ++** ldnf1sb_vnum_s16_0: ++** ldnf1sb z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_vnum_s16_0, svint16_t, int8_t, ++ z0 = svldnf1sb_vnum_s16 (p0, x0, 0), ++ z0 = svldnf1sb_vnum_s16 (p0, x0, 0)) ++ ++/* ++** ldnf1sb_vnum_s16_1: ++** ldnf1sb z0\.h, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_vnum_s16_1, svint16_t, int8_t, ++ z0 = svldnf1sb_vnum_s16 (p0, x0, 1), ++ z0 = svldnf1sb_vnum_s16 (p0, x0, 1)) ++ ++/* ++** ldnf1sb_vnum_s16_7: ++** ldnf1sb z0\.h, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_vnum_s16_7, svint16_t, int8_t, ++ z0 = svldnf1sb_vnum_s16 (p0, x0, 7), ++ z0 = svldnf1sb_vnum_s16 (p0, x0, 7)) ++ ++/* ++** ldnf1sb_vnum_s16_8: ++** incb x0, all, mul #4 ++** ldnf1sb z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_vnum_s16_8, svint16_t, int8_t, ++ z0 = svldnf1sb_vnum_s16 (p0, x0, 8), ++ z0 = svldnf1sb_vnum_s16 (p0, x0, 8)) ++ ++/* ++** ldnf1sb_vnum_s16_m1: ++** ldnf1sb z0\.h, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_vnum_s16_m1, svint16_t, int8_t, ++ z0 = svldnf1sb_vnum_s16 (p0, x0, -1), ++ z0 = svldnf1sb_vnum_s16 (p0, x0, -1)) ++ ++/* ++** ldnf1sb_vnum_s16_m8: ++** ldnf1sb z0\.h, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_vnum_s16_m8, svint16_t, int8_t, ++ z0 = svldnf1sb_vnum_s16 (p0, x0, -8), ++ z0 = svldnf1sb_vnum_s16 (p0, x0, -8)) ++ ++/* ++** ldnf1sb_vnum_s16_m9: ++** dech x0, all, mul #9 ++** ldnf1sb z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_vnum_s16_m9, svint16_t, int8_t, ++ z0 = svldnf1sb_vnum_s16 (p0, x0, -9), ++ z0 = svldnf1sb_vnum_s16 (p0, x0, -9)) ++ ++/* ++** ldnf1sb_vnum_s16_x1: ++** cnth (x[0-9]+) ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ldnf1sb z0\.h, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_vnum_s16_x1, svint16_t, int8_t, ++ z0 = svldnf1sb_vnum_s16 (p0, x0, x1), ++ z0 = svldnf1sb_vnum_s16 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_s32.c +new file mode 100644 +index 000000000..0553fc98d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_s32.c +@@ -0,0 +1,154 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldnf1sb_s32_base: ++** ldnf1sb z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_s32_base, svint32_t, int8_t, ++ z0 = svldnf1sb_s32 (p0, x0), ++ z0 = svldnf1sb_s32 (p0, x0)) ++ ++/* ++** ldnf1sb_s32_index: ++** add (x[0-9]+), x0, x1 ++** ldnf1sb z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_s32_index, svint32_t, int8_t, ++ z0 = svldnf1sb_s32 (p0, x0 + x1), ++ z0 = svldnf1sb_s32 (p0, x0 + x1)) ++ ++/* ++** ldnf1sb_s32_1: ++** ldnf1sb z0\.s, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_s32_1, svint32_t, int8_t, ++ z0 = svldnf1sb_s32 (p0, x0 + svcntw ()), ++ z0 = svldnf1sb_s32 (p0, x0 + svcntw ())) ++ ++/* ++** ldnf1sb_s32_7: ++** ldnf1sb z0\.s, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_s32_7, svint32_t, int8_t, ++ z0 = svldnf1sb_s32 (p0, x0 + svcntw () * 7), ++ z0 = svldnf1sb_s32 (p0, x0 + svcntw () * 7)) ++ ++/* ++** ldnf1sb_s32_8: ++** incb x0, all, mul #2 ++** ldnf1sb z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_s32_8, svint32_t, int8_t, ++ z0 = svldnf1sb_s32 (p0, x0 + svcntw () * 8), ++ z0 = svldnf1sb_s32 (p0, x0 + svcntw () * 8)) ++ ++/* ++** ldnf1sb_s32_m1: ++** ldnf1sb z0\.s, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_s32_m1, svint32_t, int8_t, ++ z0 = svldnf1sb_s32 (p0, x0 - svcntw ()), ++ z0 = svldnf1sb_s32 (p0, x0 - svcntw ())) ++ ++/* ++** ldnf1sb_s32_m8: ++** ldnf1sb z0\.s, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_s32_m8, svint32_t, int8_t, ++ z0 = svldnf1sb_s32 (p0, x0 - svcntw () * 8), ++ z0 = svldnf1sb_s32 (p0, x0 - svcntw () * 8)) ++ ++/* ++** ldnf1sb_s32_m9: ++** decw x0, all, mul #9 ++** ldnf1sb z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_s32_m9, svint32_t, int8_t, ++ z0 = svldnf1sb_s32 (p0, x0 - svcntw () * 9), ++ z0 = svldnf1sb_s32 (p0, x0 - svcntw () * 9)) ++ ++/* ++** ldnf1sb_vnum_s32_0: ++** ldnf1sb z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_vnum_s32_0, svint32_t, int8_t, ++ z0 = svldnf1sb_vnum_s32 (p0, x0, 0), ++ z0 = svldnf1sb_vnum_s32 (p0, x0, 0)) ++ ++/* ++** ldnf1sb_vnum_s32_1: ++** ldnf1sb z0\.s, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_vnum_s32_1, svint32_t, int8_t, ++ z0 = svldnf1sb_vnum_s32 (p0, x0, 1), ++ z0 = svldnf1sb_vnum_s32 (p0, x0, 1)) ++ ++/* ++** ldnf1sb_vnum_s32_7: ++** ldnf1sb z0\.s, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_vnum_s32_7, svint32_t, int8_t, ++ z0 = svldnf1sb_vnum_s32 (p0, x0, 7), ++ z0 = svldnf1sb_vnum_s32 (p0, x0, 7)) ++ ++/* ++** ldnf1sb_vnum_s32_8: ++** incb x0, all, mul #2 ++** ldnf1sb z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_vnum_s32_8, svint32_t, int8_t, ++ z0 = svldnf1sb_vnum_s32 (p0, x0, 8), ++ z0 = svldnf1sb_vnum_s32 (p0, x0, 8)) ++ ++/* ++** ldnf1sb_vnum_s32_m1: ++** ldnf1sb z0\.s, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_vnum_s32_m1, svint32_t, int8_t, ++ z0 = svldnf1sb_vnum_s32 (p0, x0, -1), ++ z0 = svldnf1sb_vnum_s32 (p0, x0, -1)) ++ ++/* ++** ldnf1sb_vnum_s32_m8: ++** ldnf1sb z0\.s, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_vnum_s32_m8, svint32_t, int8_t, ++ z0 = svldnf1sb_vnum_s32 (p0, x0, -8), ++ z0 = svldnf1sb_vnum_s32 (p0, x0, -8)) ++ ++/* ++** ldnf1sb_vnum_s32_m9: ++** decw x0, all, mul #9 ++** ldnf1sb z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_vnum_s32_m9, svint32_t, int8_t, ++ z0 = svldnf1sb_vnum_s32 (p0, x0, -9), ++ z0 = svldnf1sb_vnum_s32 (p0, x0, -9)) ++ ++/* ++** ldnf1sb_vnum_s32_x1: ++** cntw (x[0-9]+) ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ldnf1sb z0\.s, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_vnum_s32_x1, svint32_t, int8_t, ++ z0 = svldnf1sb_vnum_s32 (p0, x0, x1), ++ z0 = svldnf1sb_vnum_s32 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_s64.c +new file mode 100644 +index 000000000..61a474fdf +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_s64.c +@@ -0,0 +1,154 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldnf1sb_s64_base: ++** ldnf1sb z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_s64_base, svint64_t, int8_t, ++ z0 = svldnf1sb_s64 (p0, x0), ++ z0 = svldnf1sb_s64 (p0, x0)) ++ ++/* ++** ldnf1sb_s64_index: ++** add (x[0-9]+), x0, x1 ++** ldnf1sb z0\.d, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_s64_index, svint64_t, int8_t, ++ z0 = svldnf1sb_s64 (p0, x0 + x1), ++ z0 = svldnf1sb_s64 (p0, x0 + x1)) ++ ++/* ++** ldnf1sb_s64_1: ++** ldnf1sb z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_s64_1, svint64_t, int8_t, ++ z0 = svldnf1sb_s64 (p0, x0 + svcntd ()), ++ z0 = svldnf1sb_s64 (p0, x0 + svcntd ())) ++ ++/* ++** ldnf1sb_s64_7: ++** ldnf1sb z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_s64_7, svint64_t, int8_t, ++ z0 = svldnf1sb_s64 (p0, x0 + svcntd () * 7), ++ z0 = svldnf1sb_s64 (p0, x0 + svcntd () * 7)) ++ ++/* ++** ldnf1sb_s64_8: ++** incb x0 ++** ldnf1sb z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_s64_8, svint64_t, int8_t, ++ z0 = svldnf1sb_s64 (p0, x0 + svcntd () * 8), ++ z0 = svldnf1sb_s64 (p0, x0 + svcntd () * 8)) ++ ++/* ++** ldnf1sb_s64_m1: ++** ldnf1sb z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_s64_m1, svint64_t, int8_t, ++ z0 = svldnf1sb_s64 (p0, x0 - svcntd ()), ++ z0 = svldnf1sb_s64 (p0, x0 - svcntd ())) ++ ++/* ++** ldnf1sb_s64_m8: ++** ldnf1sb z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_s64_m8, svint64_t, int8_t, ++ z0 = svldnf1sb_s64 (p0, x0 - svcntd () * 8), ++ z0 = svldnf1sb_s64 (p0, x0 - svcntd () * 8)) ++ ++/* ++** ldnf1sb_s64_m9: ++** decd x0, all, mul #9 ++** ldnf1sb z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_s64_m9, svint64_t, int8_t, ++ z0 = svldnf1sb_s64 (p0, x0 - svcntd () * 9), ++ z0 = svldnf1sb_s64 (p0, x0 - svcntd () * 9)) ++ ++/* ++** ldnf1sb_vnum_s64_0: ++** ldnf1sb z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_vnum_s64_0, svint64_t, int8_t, ++ z0 = svldnf1sb_vnum_s64 (p0, x0, 0), ++ z0 = svldnf1sb_vnum_s64 (p0, x0, 0)) ++ ++/* ++** ldnf1sb_vnum_s64_1: ++** ldnf1sb z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_vnum_s64_1, svint64_t, int8_t, ++ z0 = svldnf1sb_vnum_s64 (p0, x0, 1), ++ z0 = svldnf1sb_vnum_s64 (p0, x0, 1)) ++ ++/* ++** ldnf1sb_vnum_s64_7: ++** ldnf1sb z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_vnum_s64_7, svint64_t, int8_t, ++ z0 = svldnf1sb_vnum_s64 (p0, x0, 7), ++ z0 = svldnf1sb_vnum_s64 (p0, x0, 7)) ++ ++/* ++** ldnf1sb_vnum_s64_8: ++** incb x0 ++** ldnf1sb z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_vnum_s64_8, svint64_t, int8_t, ++ z0 = svldnf1sb_vnum_s64 (p0, x0, 8), ++ z0 = svldnf1sb_vnum_s64 (p0, x0, 8)) ++ ++/* ++** ldnf1sb_vnum_s64_m1: ++** ldnf1sb z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_vnum_s64_m1, svint64_t, int8_t, ++ z0 = svldnf1sb_vnum_s64 (p0, x0, -1), ++ z0 = svldnf1sb_vnum_s64 (p0, x0, -1)) ++ ++/* ++** ldnf1sb_vnum_s64_m8: ++** ldnf1sb z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_vnum_s64_m8, svint64_t, int8_t, ++ z0 = svldnf1sb_vnum_s64 (p0, x0, -8), ++ z0 = svldnf1sb_vnum_s64 (p0, x0, -8)) ++ ++/* ++** ldnf1sb_vnum_s64_m9: ++** decd x0, all, mul #9 ++** ldnf1sb z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_vnum_s64_m9, svint64_t, int8_t, ++ z0 = svldnf1sb_vnum_s64 (p0, x0, -9), ++ z0 = svldnf1sb_vnum_s64 (p0, x0, -9)) ++ ++/* ++** ldnf1sb_vnum_s64_x1: ++** cntd (x[0-9]+) ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ldnf1sb z0\.d, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_vnum_s64_x1, svint64_t, int8_t, ++ z0 = svldnf1sb_vnum_s64 (p0, x0, x1), ++ z0 = svldnf1sb_vnum_s64 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_u16.c +new file mode 100644 +index 000000000..be63d8bf9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_u16.c +@@ -0,0 +1,154 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldnf1sb_u16_base: ++** ldnf1sb z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_u16_base, svuint16_t, int8_t, ++ z0 = svldnf1sb_u16 (p0, x0), ++ z0 = svldnf1sb_u16 (p0, x0)) ++ ++/* ++** ldnf1sb_u16_index: ++** add (x[0-9]+), x0, x1 ++** ldnf1sb z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_u16_index, svuint16_t, int8_t, ++ z0 = svldnf1sb_u16 (p0, x0 + x1), ++ z0 = svldnf1sb_u16 (p0, x0 + x1)) ++ ++/* ++** ldnf1sb_u16_1: ++** ldnf1sb z0\.h, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_u16_1, svuint16_t, int8_t, ++ z0 = svldnf1sb_u16 (p0, x0 + svcnth ()), ++ z0 = svldnf1sb_u16 (p0, x0 + svcnth ())) ++ ++/* ++** ldnf1sb_u16_7: ++** ldnf1sb z0\.h, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_u16_7, svuint16_t, int8_t, ++ z0 = svldnf1sb_u16 (p0, x0 + svcnth () * 7), ++ z0 = svldnf1sb_u16 (p0, x0 + svcnth () * 7)) ++ ++/* ++** ldnf1sb_u16_8: ++** incb x0, all, mul #4 ++** ldnf1sb z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_u16_8, svuint16_t, int8_t, ++ z0 = svldnf1sb_u16 (p0, x0 + svcnth () * 8), ++ z0 = svldnf1sb_u16 (p0, x0 + svcnth () * 8)) ++ ++/* ++** ldnf1sb_u16_m1: ++** ldnf1sb z0\.h, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_u16_m1, svuint16_t, int8_t, ++ z0 = svldnf1sb_u16 (p0, x0 - svcnth ()), ++ z0 = svldnf1sb_u16 (p0, x0 - svcnth ())) ++ ++/* ++** ldnf1sb_u16_m8: ++** ldnf1sb z0\.h, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_u16_m8, svuint16_t, int8_t, ++ z0 = svldnf1sb_u16 (p0, x0 - svcnth () * 8), ++ z0 = svldnf1sb_u16 (p0, x0 - svcnth () * 8)) ++ ++/* ++** ldnf1sb_u16_m9: ++** dech x0, all, mul #9 ++** ldnf1sb z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_u16_m9, svuint16_t, int8_t, ++ z0 = svldnf1sb_u16 (p0, x0 - svcnth () * 9), ++ z0 = svldnf1sb_u16 (p0, x0 - svcnth () * 9)) ++ ++/* ++** ldnf1sb_vnum_u16_0: ++** ldnf1sb z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_vnum_u16_0, svuint16_t, int8_t, ++ z0 = svldnf1sb_vnum_u16 (p0, x0, 0), ++ z0 = svldnf1sb_vnum_u16 (p0, x0, 0)) ++ ++/* ++** ldnf1sb_vnum_u16_1: ++** ldnf1sb z0\.h, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_vnum_u16_1, svuint16_t, int8_t, ++ z0 = svldnf1sb_vnum_u16 (p0, x0, 1), ++ z0 = svldnf1sb_vnum_u16 (p0, x0, 1)) ++ ++/* ++** ldnf1sb_vnum_u16_7: ++** ldnf1sb z0\.h, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_vnum_u16_7, svuint16_t, int8_t, ++ z0 = svldnf1sb_vnum_u16 (p0, x0, 7), ++ z0 = svldnf1sb_vnum_u16 (p0, x0, 7)) ++ ++/* ++** ldnf1sb_vnum_u16_8: ++** incb x0, all, mul #4 ++** ldnf1sb z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_vnum_u16_8, svuint16_t, int8_t, ++ z0 = svldnf1sb_vnum_u16 (p0, x0, 8), ++ z0 = svldnf1sb_vnum_u16 (p0, x0, 8)) ++ ++/* ++** ldnf1sb_vnum_u16_m1: ++** ldnf1sb z0\.h, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_vnum_u16_m1, svuint16_t, int8_t, ++ z0 = svldnf1sb_vnum_u16 (p0, x0, -1), ++ z0 = svldnf1sb_vnum_u16 (p0, x0, -1)) ++ ++/* ++** ldnf1sb_vnum_u16_m8: ++** ldnf1sb z0\.h, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_vnum_u16_m8, svuint16_t, int8_t, ++ z0 = svldnf1sb_vnum_u16 (p0, x0, -8), ++ z0 = svldnf1sb_vnum_u16 (p0, x0, -8)) ++ ++/* ++** ldnf1sb_vnum_u16_m9: ++** dech x0, all, mul #9 ++** ldnf1sb z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_vnum_u16_m9, svuint16_t, int8_t, ++ z0 = svldnf1sb_vnum_u16 (p0, x0, -9), ++ z0 = svldnf1sb_vnum_u16 (p0, x0, -9)) ++ ++/* ++** ldnf1sb_vnum_u16_x1: ++** cnth (x[0-9]+) ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ldnf1sb z0\.h, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_vnum_u16_x1, svuint16_t, int8_t, ++ z0 = svldnf1sb_vnum_u16 (p0, x0, x1), ++ z0 = svldnf1sb_vnum_u16 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_u32.c +new file mode 100644 +index 000000000..4f52490b4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_u32.c +@@ -0,0 +1,154 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldnf1sb_u32_base: ++** ldnf1sb z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_u32_base, svuint32_t, int8_t, ++ z0 = svldnf1sb_u32 (p0, x0), ++ z0 = svldnf1sb_u32 (p0, x0)) ++ ++/* ++** ldnf1sb_u32_index: ++** add (x[0-9]+), x0, x1 ++** ldnf1sb z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_u32_index, svuint32_t, int8_t, ++ z0 = svldnf1sb_u32 (p0, x0 + x1), ++ z0 = svldnf1sb_u32 (p0, x0 + x1)) ++ ++/* ++** ldnf1sb_u32_1: ++** ldnf1sb z0\.s, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_u32_1, svuint32_t, int8_t, ++ z0 = svldnf1sb_u32 (p0, x0 + svcntw ()), ++ z0 = svldnf1sb_u32 (p0, x0 + svcntw ())) ++ ++/* ++** ldnf1sb_u32_7: ++** ldnf1sb z0\.s, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_u32_7, svuint32_t, int8_t, ++ z0 = svldnf1sb_u32 (p0, x0 + svcntw () * 7), ++ z0 = svldnf1sb_u32 (p0, x0 + svcntw () * 7)) ++ ++/* ++** ldnf1sb_u32_8: ++** incb x0, all, mul #2 ++** ldnf1sb z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_u32_8, svuint32_t, int8_t, ++ z0 = svldnf1sb_u32 (p0, x0 + svcntw () * 8), ++ z0 = svldnf1sb_u32 (p0, x0 + svcntw () * 8)) ++ ++/* ++** ldnf1sb_u32_m1: ++** ldnf1sb z0\.s, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_u32_m1, svuint32_t, int8_t, ++ z0 = svldnf1sb_u32 (p0, x0 - svcntw ()), ++ z0 = svldnf1sb_u32 (p0, x0 - svcntw ())) ++ ++/* ++** ldnf1sb_u32_m8: ++** ldnf1sb z0\.s, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_u32_m8, svuint32_t, int8_t, ++ z0 = svldnf1sb_u32 (p0, x0 - svcntw () * 8), ++ z0 = svldnf1sb_u32 (p0, x0 - svcntw () * 8)) ++ ++/* ++** ldnf1sb_u32_m9: ++** decw x0, all, mul #9 ++** ldnf1sb z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_u32_m9, svuint32_t, int8_t, ++ z0 = svldnf1sb_u32 (p0, x0 - svcntw () * 9), ++ z0 = svldnf1sb_u32 (p0, x0 - svcntw () * 9)) ++ ++/* ++** ldnf1sb_vnum_u32_0: ++** ldnf1sb z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_vnum_u32_0, svuint32_t, int8_t, ++ z0 = svldnf1sb_vnum_u32 (p0, x0, 0), ++ z0 = svldnf1sb_vnum_u32 (p0, x0, 0)) ++ ++/* ++** ldnf1sb_vnum_u32_1: ++** ldnf1sb z0\.s, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_vnum_u32_1, svuint32_t, int8_t, ++ z0 = svldnf1sb_vnum_u32 (p0, x0, 1), ++ z0 = svldnf1sb_vnum_u32 (p0, x0, 1)) ++ ++/* ++** ldnf1sb_vnum_u32_7: ++** ldnf1sb z0\.s, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_vnum_u32_7, svuint32_t, int8_t, ++ z0 = svldnf1sb_vnum_u32 (p0, x0, 7), ++ z0 = svldnf1sb_vnum_u32 (p0, x0, 7)) ++ ++/* ++** ldnf1sb_vnum_u32_8: ++** incb x0, all, mul #2 ++** ldnf1sb z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_vnum_u32_8, svuint32_t, int8_t, ++ z0 = svldnf1sb_vnum_u32 (p0, x0, 8), ++ z0 = svldnf1sb_vnum_u32 (p0, x0, 8)) ++ ++/* ++** ldnf1sb_vnum_u32_m1: ++** ldnf1sb z0\.s, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_vnum_u32_m1, svuint32_t, int8_t, ++ z0 = svldnf1sb_vnum_u32 (p0, x0, -1), ++ z0 = svldnf1sb_vnum_u32 (p0, x0, -1)) ++ ++/* ++** ldnf1sb_vnum_u32_m8: ++** ldnf1sb z0\.s, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_vnum_u32_m8, svuint32_t, int8_t, ++ z0 = svldnf1sb_vnum_u32 (p0, x0, -8), ++ z0 = svldnf1sb_vnum_u32 (p0, x0, -8)) ++ ++/* ++** ldnf1sb_vnum_u32_m9: ++** decw x0, all, mul #9 ++** ldnf1sb z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_vnum_u32_m9, svuint32_t, int8_t, ++ z0 = svldnf1sb_vnum_u32 (p0, x0, -9), ++ z0 = svldnf1sb_vnum_u32 (p0, x0, -9)) ++ ++/* ++** ldnf1sb_vnum_u32_x1: ++** cntw (x[0-9]+) ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ldnf1sb z0\.s, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_vnum_u32_x1, svuint32_t, int8_t, ++ z0 = svldnf1sb_vnum_u32 (p0, x0, x1), ++ z0 = svldnf1sb_vnum_u32 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_u64.c +new file mode 100644 +index 000000000..73f50d182 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_u64.c +@@ -0,0 +1,154 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldnf1sb_u64_base: ++** ldnf1sb z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_u64_base, svuint64_t, int8_t, ++ z0 = svldnf1sb_u64 (p0, x0), ++ z0 = svldnf1sb_u64 (p0, x0)) ++ ++/* ++** ldnf1sb_u64_index: ++** add (x[0-9]+), x0, x1 ++** ldnf1sb z0\.d, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_u64_index, svuint64_t, int8_t, ++ z0 = svldnf1sb_u64 (p0, x0 + x1), ++ z0 = svldnf1sb_u64 (p0, x0 + x1)) ++ ++/* ++** ldnf1sb_u64_1: ++** ldnf1sb z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_u64_1, svuint64_t, int8_t, ++ z0 = svldnf1sb_u64 (p0, x0 + svcntd ()), ++ z0 = svldnf1sb_u64 (p0, x0 + svcntd ())) ++ ++/* ++** ldnf1sb_u64_7: ++** ldnf1sb z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_u64_7, svuint64_t, int8_t, ++ z0 = svldnf1sb_u64 (p0, x0 + svcntd () * 7), ++ z0 = svldnf1sb_u64 (p0, x0 + svcntd () * 7)) ++ ++/* ++** ldnf1sb_u64_8: ++** incb x0 ++** ldnf1sb z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_u64_8, svuint64_t, int8_t, ++ z0 = svldnf1sb_u64 (p0, x0 + svcntd () * 8), ++ z0 = svldnf1sb_u64 (p0, x0 + svcntd () * 8)) ++ ++/* ++** ldnf1sb_u64_m1: ++** ldnf1sb z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_u64_m1, svuint64_t, int8_t, ++ z0 = svldnf1sb_u64 (p0, x0 - svcntd ()), ++ z0 = svldnf1sb_u64 (p0, x0 - svcntd ())) ++ ++/* ++** ldnf1sb_u64_m8: ++** ldnf1sb z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_u64_m8, svuint64_t, int8_t, ++ z0 = svldnf1sb_u64 (p0, x0 - svcntd () * 8), ++ z0 = svldnf1sb_u64 (p0, x0 - svcntd () * 8)) ++ ++/* ++** ldnf1sb_u64_m9: ++** decd x0, all, mul #9 ++** ldnf1sb z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_u64_m9, svuint64_t, int8_t, ++ z0 = svldnf1sb_u64 (p0, x0 - svcntd () * 9), ++ z0 = svldnf1sb_u64 (p0, x0 - svcntd () * 9)) ++ ++/* ++** ldnf1sb_vnum_u64_0: ++** ldnf1sb z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_vnum_u64_0, svuint64_t, int8_t, ++ z0 = svldnf1sb_vnum_u64 (p0, x0, 0), ++ z0 = svldnf1sb_vnum_u64 (p0, x0, 0)) ++ ++/* ++** ldnf1sb_vnum_u64_1: ++** ldnf1sb z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_vnum_u64_1, svuint64_t, int8_t, ++ z0 = svldnf1sb_vnum_u64 (p0, x0, 1), ++ z0 = svldnf1sb_vnum_u64 (p0, x0, 1)) ++ ++/* ++** ldnf1sb_vnum_u64_7: ++** ldnf1sb z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_vnum_u64_7, svuint64_t, int8_t, ++ z0 = svldnf1sb_vnum_u64 (p0, x0, 7), ++ z0 = svldnf1sb_vnum_u64 (p0, x0, 7)) ++ ++/* ++** ldnf1sb_vnum_u64_8: ++** incb x0 ++** ldnf1sb z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_vnum_u64_8, svuint64_t, int8_t, ++ z0 = svldnf1sb_vnum_u64 (p0, x0, 8), ++ z0 = svldnf1sb_vnum_u64 (p0, x0, 8)) ++ ++/* ++** ldnf1sb_vnum_u64_m1: ++** ldnf1sb z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_vnum_u64_m1, svuint64_t, int8_t, ++ z0 = svldnf1sb_vnum_u64 (p0, x0, -1), ++ z0 = svldnf1sb_vnum_u64 (p0, x0, -1)) ++ ++/* ++** ldnf1sb_vnum_u64_m8: ++** ldnf1sb z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_vnum_u64_m8, svuint64_t, int8_t, ++ z0 = svldnf1sb_vnum_u64 (p0, x0, -8), ++ z0 = svldnf1sb_vnum_u64 (p0, x0, -8)) ++ ++/* ++** ldnf1sb_vnum_u64_m9: ++** decd x0, all, mul #9 ++** ldnf1sb z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_vnum_u64_m9, svuint64_t, int8_t, ++ z0 = svldnf1sb_vnum_u64 (p0, x0, -9), ++ z0 = svldnf1sb_vnum_u64 (p0, x0, -9)) ++ ++/* ++** ldnf1sb_vnum_u64_x1: ++** cntd (x[0-9]+) ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ldnf1sb z0\.d, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldnf1sb_vnum_u64_x1, svuint64_t, int8_t, ++ z0 = svldnf1sb_vnum_u64 (p0, x0, x1), ++ z0 = svldnf1sb_vnum_u64 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_s32.c +new file mode 100644 +index 000000000..08c7dc6dd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_s32.c +@@ -0,0 +1,154 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldnf1sh_s32_base: ++** ldnf1sh z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_s32_base, svint32_t, int16_t, ++ z0 = svldnf1sh_s32 (p0, x0), ++ z0 = svldnf1sh_s32 (p0, x0)) ++ ++/* ++** ldnf1sh_s32_index: ++** add (x[0-9]+), x0, x1, lsl 1 ++** ldnf1sh z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_s32_index, svint32_t, int16_t, ++ z0 = svldnf1sh_s32 (p0, x0 + x1), ++ z0 = svldnf1sh_s32 (p0, x0 + x1)) ++ ++/* ++** ldnf1sh_s32_1: ++** ldnf1sh z0\.s, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_s32_1, svint32_t, int16_t, ++ z0 = svldnf1sh_s32 (p0, x0 + svcntw ()), ++ z0 = svldnf1sh_s32 (p0, x0 + svcntw ())) ++ ++/* ++** ldnf1sh_s32_7: ++** ldnf1sh z0\.s, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_s32_7, svint32_t, int16_t, ++ z0 = svldnf1sh_s32 (p0, x0 + svcntw () * 7), ++ z0 = svldnf1sh_s32 (p0, x0 + svcntw () * 7)) ++ ++/* ++** ldnf1sh_s32_8: ++** incb x0, all, mul #4 ++** ldnf1sh z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_s32_8, svint32_t, int16_t, ++ z0 = svldnf1sh_s32 (p0, x0 + svcntw () * 8), ++ z0 = svldnf1sh_s32 (p0, x0 + svcntw () * 8)) ++ ++/* ++** ldnf1sh_s32_m1: ++** ldnf1sh z0\.s, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_s32_m1, svint32_t, int16_t, ++ z0 = svldnf1sh_s32 (p0, x0 - svcntw ()), ++ z0 = svldnf1sh_s32 (p0, x0 - svcntw ())) ++ ++/* ++** ldnf1sh_s32_m8: ++** ldnf1sh z0\.s, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_s32_m8, svint32_t, int16_t, ++ z0 = svldnf1sh_s32 (p0, x0 - svcntw () * 8), ++ z0 = svldnf1sh_s32 (p0, x0 - svcntw () * 8)) ++ ++/* ++** ldnf1sh_s32_m9: ++** dech x0, all, mul #9 ++** ldnf1sh z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_s32_m9, svint32_t, int16_t, ++ z0 = svldnf1sh_s32 (p0, x0 - svcntw () * 9), ++ z0 = svldnf1sh_s32 (p0, x0 - svcntw () * 9)) ++ ++/* ++** ldnf1sh_vnum_s32_0: ++** ldnf1sh z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_vnum_s32_0, svint32_t, int16_t, ++ z0 = svldnf1sh_vnum_s32 (p0, x0, 0), ++ z0 = svldnf1sh_vnum_s32 (p0, x0, 0)) ++ ++/* ++** ldnf1sh_vnum_s32_1: ++** ldnf1sh z0\.s, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_vnum_s32_1, svint32_t, int16_t, ++ z0 = svldnf1sh_vnum_s32 (p0, x0, 1), ++ z0 = svldnf1sh_vnum_s32 (p0, x0, 1)) ++ ++/* ++** ldnf1sh_vnum_s32_7: ++** ldnf1sh z0\.s, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_vnum_s32_7, svint32_t, int16_t, ++ z0 = svldnf1sh_vnum_s32 (p0, x0, 7), ++ z0 = svldnf1sh_vnum_s32 (p0, x0, 7)) ++ ++/* ++** ldnf1sh_vnum_s32_8: ++** incb x0, all, mul #4 ++** ldnf1sh z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_vnum_s32_8, svint32_t, int16_t, ++ z0 = svldnf1sh_vnum_s32 (p0, x0, 8), ++ z0 = svldnf1sh_vnum_s32 (p0, x0, 8)) ++ ++/* ++** ldnf1sh_vnum_s32_m1: ++** ldnf1sh z0\.s, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_vnum_s32_m1, svint32_t, int16_t, ++ z0 = svldnf1sh_vnum_s32 (p0, x0, -1), ++ z0 = svldnf1sh_vnum_s32 (p0, x0, -1)) ++ ++/* ++** ldnf1sh_vnum_s32_m8: ++** ldnf1sh z0\.s, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_vnum_s32_m8, svint32_t, int16_t, ++ z0 = svldnf1sh_vnum_s32 (p0, x0, -8), ++ z0 = svldnf1sh_vnum_s32 (p0, x0, -8)) ++ ++/* ++** ldnf1sh_vnum_s32_m9: ++** dech x0, all, mul #9 ++** ldnf1sh z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_vnum_s32_m9, svint32_t, int16_t, ++ z0 = svldnf1sh_vnum_s32 (p0, x0, -9), ++ z0 = svldnf1sh_vnum_s32 (p0, x0, -9)) ++ ++/* ++** ldnf1sh_vnum_s32_x1: ++** cnth (x[0-9]+) ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ldnf1sh z0\.s, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_vnum_s32_x1, svint32_t, int16_t, ++ z0 = svldnf1sh_vnum_s32 (p0, x0, x1), ++ z0 = svldnf1sh_vnum_s32 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_s64.c +new file mode 100644 +index 000000000..6a41bc26b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_s64.c +@@ -0,0 +1,154 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldnf1sh_s64_base: ++** ldnf1sh z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_s64_base, svint64_t, int16_t, ++ z0 = svldnf1sh_s64 (p0, x0), ++ z0 = svldnf1sh_s64 (p0, x0)) ++ ++/* ++** ldnf1sh_s64_index: ++** add (x[0-9]+), x0, x1, lsl 1 ++** ldnf1sh z0\.d, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_s64_index, svint64_t, int16_t, ++ z0 = svldnf1sh_s64 (p0, x0 + x1), ++ z0 = svldnf1sh_s64 (p0, x0 + x1)) ++ ++/* ++** ldnf1sh_s64_1: ++** ldnf1sh z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_s64_1, svint64_t, int16_t, ++ z0 = svldnf1sh_s64 (p0, x0 + svcntd ()), ++ z0 = svldnf1sh_s64 (p0, x0 + svcntd ())) ++ ++/* ++** ldnf1sh_s64_7: ++** ldnf1sh z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_s64_7, svint64_t, int16_t, ++ z0 = svldnf1sh_s64 (p0, x0 + svcntd () * 7), ++ z0 = svldnf1sh_s64 (p0, x0 + svcntd () * 7)) ++ ++/* ++** ldnf1sh_s64_8: ++** incb x0, all, mul #2 ++** ldnf1sh z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_s64_8, svint64_t, int16_t, ++ z0 = svldnf1sh_s64 (p0, x0 + svcntd () * 8), ++ z0 = svldnf1sh_s64 (p0, x0 + svcntd () * 8)) ++ ++/* ++** ldnf1sh_s64_m1: ++** ldnf1sh z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_s64_m1, svint64_t, int16_t, ++ z0 = svldnf1sh_s64 (p0, x0 - svcntd ()), ++ z0 = svldnf1sh_s64 (p0, x0 - svcntd ())) ++ ++/* ++** ldnf1sh_s64_m8: ++** ldnf1sh z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_s64_m8, svint64_t, int16_t, ++ z0 = svldnf1sh_s64 (p0, x0 - svcntd () * 8), ++ z0 = svldnf1sh_s64 (p0, x0 - svcntd () * 8)) ++ ++/* ++** ldnf1sh_s64_m9: ++** decw x0, all, mul #9 ++** ldnf1sh z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_s64_m9, svint64_t, int16_t, ++ z0 = svldnf1sh_s64 (p0, x0 - svcntd () * 9), ++ z0 = svldnf1sh_s64 (p0, x0 - svcntd () * 9)) ++ ++/* ++** ldnf1sh_vnum_s64_0: ++** ldnf1sh z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_vnum_s64_0, svint64_t, int16_t, ++ z0 = svldnf1sh_vnum_s64 (p0, x0, 0), ++ z0 = svldnf1sh_vnum_s64 (p0, x0, 0)) ++ ++/* ++** ldnf1sh_vnum_s64_1: ++** ldnf1sh z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_vnum_s64_1, svint64_t, int16_t, ++ z0 = svldnf1sh_vnum_s64 (p0, x0, 1), ++ z0 = svldnf1sh_vnum_s64 (p0, x0, 1)) ++ ++/* ++** ldnf1sh_vnum_s64_7: ++** ldnf1sh z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_vnum_s64_7, svint64_t, int16_t, ++ z0 = svldnf1sh_vnum_s64 (p0, x0, 7), ++ z0 = svldnf1sh_vnum_s64 (p0, x0, 7)) ++ ++/* ++** ldnf1sh_vnum_s64_8: ++** incb x0, all, mul #2 ++** ldnf1sh z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_vnum_s64_8, svint64_t, int16_t, ++ z0 = svldnf1sh_vnum_s64 (p0, x0, 8), ++ z0 = svldnf1sh_vnum_s64 (p0, x0, 8)) ++ ++/* ++** ldnf1sh_vnum_s64_m1: ++** ldnf1sh z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_vnum_s64_m1, svint64_t, int16_t, ++ z0 = svldnf1sh_vnum_s64 (p0, x0, -1), ++ z0 = svldnf1sh_vnum_s64 (p0, x0, -1)) ++ ++/* ++** ldnf1sh_vnum_s64_m8: ++** ldnf1sh z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_vnum_s64_m8, svint64_t, int16_t, ++ z0 = svldnf1sh_vnum_s64 (p0, x0, -8), ++ z0 = svldnf1sh_vnum_s64 (p0, x0, -8)) ++ ++/* ++** ldnf1sh_vnum_s64_m9: ++** decw x0, all, mul #9 ++** ldnf1sh z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_vnum_s64_m9, svint64_t, int16_t, ++ z0 = svldnf1sh_vnum_s64 (p0, x0, -9), ++ z0 = svldnf1sh_vnum_s64 (p0, x0, -9)) ++ ++/* ++** ldnf1sh_vnum_s64_x1: ++** cntw (x[0-9]+) ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ldnf1sh z0\.d, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_vnum_s64_x1, svint64_t, int16_t, ++ z0 = svldnf1sh_vnum_s64 (p0, x0, x1), ++ z0 = svldnf1sh_vnum_s64 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_u32.c +new file mode 100644 +index 000000000..2f7718730 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_u32.c +@@ -0,0 +1,154 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldnf1sh_u32_base: ++** ldnf1sh z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_u32_base, svuint32_t, int16_t, ++ z0 = svldnf1sh_u32 (p0, x0), ++ z0 = svldnf1sh_u32 (p0, x0)) ++ ++/* ++** ldnf1sh_u32_index: ++** add (x[0-9]+), x0, x1, lsl 1 ++** ldnf1sh z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_u32_index, svuint32_t, int16_t, ++ z0 = svldnf1sh_u32 (p0, x0 + x1), ++ z0 = svldnf1sh_u32 (p0, x0 + x1)) ++ ++/* ++** ldnf1sh_u32_1: ++** ldnf1sh z0\.s, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_u32_1, svuint32_t, int16_t, ++ z0 = svldnf1sh_u32 (p0, x0 + svcntw ()), ++ z0 = svldnf1sh_u32 (p0, x0 + svcntw ())) ++ ++/* ++** ldnf1sh_u32_7: ++** ldnf1sh z0\.s, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_u32_7, svuint32_t, int16_t, ++ z0 = svldnf1sh_u32 (p0, x0 + svcntw () * 7), ++ z0 = svldnf1sh_u32 (p0, x0 + svcntw () * 7)) ++ ++/* ++** ldnf1sh_u32_8: ++** incb x0, all, mul #4 ++** ldnf1sh z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_u32_8, svuint32_t, int16_t, ++ z0 = svldnf1sh_u32 (p0, x0 + svcntw () * 8), ++ z0 = svldnf1sh_u32 (p0, x0 + svcntw () * 8)) ++ ++/* ++** ldnf1sh_u32_m1: ++** ldnf1sh z0\.s, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_u32_m1, svuint32_t, int16_t, ++ z0 = svldnf1sh_u32 (p0, x0 - svcntw ()), ++ z0 = svldnf1sh_u32 (p0, x0 - svcntw ())) ++ ++/* ++** ldnf1sh_u32_m8: ++** ldnf1sh z0\.s, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_u32_m8, svuint32_t, int16_t, ++ z0 = svldnf1sh_u32 (p0, x0 - svcntw () * 8), ++ z0 = svldnf1sh_u32 (p0, x0 - svcntw () * 8)) ++ ++/* ++** ldnf1sh_u32_m9: ++** dech x0, all, mul #9 ++** ldnf1sh z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_u32_m9, svuint32_t, int16_t, ++ z0 = svldnf1sh_u32 (p0, x0 - svcntw () * 9), ++ z0 = svldnf1sh_u32 (p0, x0 - svcntw () * 9)) ++ ++/* ++** ldnf1sh_vnum_u32_0: ++** ldnf1sh z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_vnum_u32_0, svuint32_t, int16_t, ++ z0 = svldnf1sh_vnum_u32 (p0, x0, 0), ++ z0 = svldnf1sh_vnum_u32 (p0, x0, 0)) ++ ++/* ++** ldnf1sh_vnum_u32_1: ++** ldnf1sh z0\.s, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_vnum_u32_1, svuint32_t, int16_t, ++ z0 = svldnf1sh_vnum_u32 (p0, x0, 1), ++ z0 = svldnf1sh_vnum_u32 (p0, x0, 1)) ++ ++/* ++** ldnf1sh_vnum_u32_7: ++** ldnf1sh z0\.s, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_vnum_u32_7, svuint32_t, int16_t, ++ z0 = svldnf1sh_vnum_u32 (p0, x0, 7), ++ z0 = svldnf1sh_vnum_u32 (p0, x0, 7)) ++ ++/* ++** ldnf1sh_vnum_u32_8: ++** incb x0, all, mul #4 ++** ldnf1sh z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_vnum_u32_8, svuint32_t, int16_t, ++ z0 = svldnf1sh_vnum_u32 (p0, x0, 8), ++ z0 = svldnf1sh_vnum_u32 (p0, x0, 8)) ++ ++/* ++** ldnf1sh_vnum_u32_m1: ++** ldnf1sh z0\.s, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_vnum_u32_m1, svuint32_t, int16_t, ++ z0 = svldnf1sh_vnum_u32 (p0, x0, -1), ++ z0 = svldnf1sh_vnum_u32 (p0, x0, -1)) ++ ++/* ++** ldnf1sh_vnum_u32_m8: ++** ldnf1sh z0\.s, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_vnum_u32_m8, svuint32_t, int16_t, ++ z0 = svldnf1sh_vnum_u32 (p0, x0, -8), ++ z0 = svldnf1sh_vnum_u32 (p0, x0, -8)) ++ ++/* ++** ldnf1sh_vnum_u32_m9: ++** dech x0, all, mul #9 ++** ldnf1sh z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_vnum_u32_m9, svuint32_t, int16_t, ++ z0 = svldnf1sh_vnum_u32 (p0, x0, -9), ++ z0 = svldnf1sh_vnum_u32 (p0, x0, -9)) ++ ++/* ++** ldnf1sh_vnum_u32_x1: ++** cnth (x[0-9]+) ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ldnf1sh z0\.s, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_vnum_u32_x1, svuint32_t, int16_t, ++ z0 = svldnf1sh_vnum_u32 (p0, x0, x1), ++ z0 = svldnf1sh_vnum_u32 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_u64.c +new file mode 100644 +index 000000000..d7f1a68a4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_u64.c +@@ -0,0 +1,154 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldnf1sh_u64_base: ++** ldnf1sh z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_u64_base, svuint64_t, int16_t, ++ z0 = svldnf1sh_u64 (p0, x0), ++ z0 = svldnf1sh_u64 (p0, x0)) ++ ++/* ++** ldnf1sh_u64_index: ++** add (x[0-9]+), x0, x1, lsl 1 ++** ldnf1sh z0\.d, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_u64_index, svuint64_t, int16_t, ++ z0 = svldnf1sh_u64 (p0, x0 + x1), ++ z0 = svldnf1sh_u64 (p0, x0 + x1)) ++ ++/* ++** ldnf1sh_u64_1: ++** ldnf1sh z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_u64_1, svuint64_t, int16_t, ++ z0 = svldnf1sh_u64 (p0, x0 + svcntd ()), ++ z0 = svldnf1sh_u64 (p0, x0 + svcntd ())) ++ ++/* ++** ldnf1sh_u64_7: ++** ldnf1sh z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_u64_7, svuint64_t, int16_t, ++ z0 = svldnf1sh_u64 (p0, x0 + svcntd () * 7), ++ z0 = svldnf1sh_u64 (p0, x0 + svcntd () * 7)) ++ ++/* ++** ldnf1sh_u64_8: ++** incb x0, all, mul #2 ++** ldnf1sh z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_u64_8, svuint64_t, int16_t, ++ z0 = svldnf1sh_u64 (p0, x0 + svcntd () * 8), ++ z0 = svldnf1sh_u64 (p0, x0 + svcntd () * 8)) ++ ++/* ++** ldnf1sh_u64_m1: ++** ldnf1sh z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_u64_m1, svuint64_t, int16_t, ++ z0 = svldnf1sh_u64 (p0, x0 - svcntd ()), ++ z0 = svldnf1sh_u64 (p0, x0 - svcntd ())) ++ ++/* ++** ldnf1sh_u64_m8: ++** ldnf1sh z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_u64_m8, svuint64_t, int16_t, ++ z0 = svldnf1sh_u64 (p0, x0 - svcntd () * 8), ++ z0 = svldnf1sh_u64 (p0, x0 - svcntd () * 8)) ++ ++/* ++** ldnf1sh_u64_m9: ++** decw x0, all, mul #9 ++** ldnf1sh z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_u64_m9, svuint64_t, int16_t, ++ z0 = svldnf1sh_u64 (p0, x0 - svcntd () * 9), ++ z0 = svldnf1sh_u64 (p0, x0 - svcntd () * 9)) ++ ++/* ++** ldnf1sh_vnum_u64_0: ++** ldnf1sh z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_vnum_u64_0, svuint64_t, int16_t, ++ z0 = svldnf1sh_vnum_u64 (p0, x0, 0), ++ z0 = svldnf1sh_vnum_u64 (p0, x0, 0)) ++ ++/* ++** ldnf1sh_vnum_u64_1: ++** ldnf1sh z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_vnum_u64_1, svuint64_t, int16_t, ++ z0 = svldnf1sh_vnum_u64 (p0, x0, 1), ++ z0 = svldnf1sh_vnum_u64 (p0, x0, 1)) ++ ++/* ++** ldnf1sh_vnum_u64_7: ++** ldnf1sh z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_vnum_u64_7, svuint64_t, int16_t, ++ z0 = svldnf1sh_vnum_u64 (p0, x0, 7), ++ z0 = svldnf1sh_vnum_u64 (p0, x0, 7)) ++ ++/* ++** ldnf1sh_vnum_u64_8: ++** incb x0, all, mul #2 ++** ldnf1sh z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_vnum_u64_8, svuint64_t, int16_t, ++ z0 = svldnf1sh_vnum_u64 (p0, x0, 8), ++ z0 = svldnf1sh_vnum_u64 (p0, x0, 8)) ++ ++/* ++** ldnf1sh_vnum_u64_m1: ++** ldnf1sh z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_vnum_u64_m1, svuint64_t, int16_t, ++ z0 = svldnf1sh_vnum_u64 (p0, x0, -1), ++ z0 = svldnf1sh_vnum_u64 (p0, x0, -1)) ++ ++/* ++** ldnf1sh_vnum_u64_m8: ++** ldnf1sh z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_vnum_u64_m8, svuint64_t, int16_t, ++ z0 = svldnf1sh_vnum_u64 (p0, x0, -8), ++ z0 = svldnf1sh_vnum_u64 (p0, x0, -8)) ++ ++/* ++** ldnf1sh_vnum_u64_m9: ++** decw x0, all, mul #9 ++** ldnf1sh z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_vnum_u64_m9, svuint64_t, int16_t, ++ z0 = svldnf1sh_vnum_u64 (p0, x0, -9), ++ z0 = svldnf1sh_vnum_u64 (p0, x0, -9)) ++ ++/* ++** ldnf1sh_vnum_u64_x1: ++** cntw (x[0-9]+) ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ldnf1sh z0\.d, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldnf1sh_vnum_u64_x1, svuint64_t, int16_t, ++ z0 = svldnf1sh_vnum_u64 (p0, x0, x1), ++ z0 = svldnf1sh_vnum_u64 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sw_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sw_s64.c +new file mode 100644 +index 000000000..5b483e4aa +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sw_s64.c +@@ -0,0 +1,154 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldnf1sw_s64_base: ++** ldnf1sw z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sw_s64_base, svint64_t, int32_t, ++ z0 = svldnf1sw_s64 (p0, x0), ++ z0 = svldnf1sw_s64 (p0, x0)) ++ ++/* ++** ldnf1sw_s64_index: ++** add (x[0-9]+), x0, x1, lsl 2 ++** ldnf1sw z0\.d, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ldnf1sw_s64_index, svint64_t, int32_t, ++ z0 = svldnf1sw_s64 (p0, x0 + x1), ++ z0 = svldnf1sw_s64 (p0, x0 + x1)) ++ ++/* ++** ldnf1sw_s64_1: ++** ldnf1sw z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sw_s64_1, svint64_t, int32_t, ++ z0 = svldnf1sw_s64 (p0, x0 + svcntd ()), ++ z0 = svldnf1sw_s64 (p0, x0 + svcntd ())) ++ ++/* ++** ldnf1sw_s64_7: ++** ldnf1sw z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sw_s64_7, svint64_t, int32_t, ++ z0 = svldnf1sw_s64 (p0, x0 + svcntd () * 7), ++ z0 = svldnf1sw_s64 (p0, x0 + svcntd () * 7)) ++ ++/* ++** ldnf1sw_s64_8: ++** incb x0, all, mul #4 ++** ldnf1sw z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sw_s64_8, svint64_t, int32_t, ++ z0 = svldnf1sw_s64 (p0, x0 + svcntd () * 8), ++ z0 = svldnf1sw_s64 (p0, x0 + svcntd () * 8)) ++ ++/* ++** ldnf1sw_s64_m1: ++** ldnf1sw z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sw_s64_m1, svint64_t, int32_t, ++ z0 = svldnf1sw_s64 (p0, x0 - svcntd ()), ++ z0 = svldnf1sw_s64 (p0, x0 - svcntd ())) ++ ++/* ++** ldnf1sw_s64_m8: ++** ldnf1sw z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sw_s64_m8, svint64_t, int32_t, ++ z0 = svldnf1sw_s64 (p0, x0 - svcntd () * 8), ++ z0 = svldnf1sw_s64 (p0, x0 - svcntd () * 8)) ++ ++/* ++** ldnf1sw_s64_m9: ++** dech x0, all, mul #9 ++** ldnf1sw z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sw_s64_m9, svint64_t, int32_t, ++ z0 = svldnf1sw_s64 (p0, x0 - svcntd () * 9), ++ z0 = svldnf1sw_s64 (p0, x0 - svcntd () * 9)) ++ ++/* ++** ldnf1sw_vnum_s64_0: ++** ldnf1sw z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sw_vnum_s64_0, svint64_t, int32_t, ++ z0 = svldnf1sw_vnum_s64 (p0, x0, 0), ++ z0 = svldnf1sw_vnum_s64 (p0, x0, 0)) ++ ++/* ++** ldnf1sw_vnum_s64_1: ++** ldnf1sw z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sw_vnum_s64_1, svint64_t, int32_t, ++ z0 = svldnf1sw_vnum_s64 (p0, x0, 1), ++ z0 = svldnf1sw_vnum_s64 (p0, x0, 1)) ++ ++/* ++** ldnf1sw_vnum_s64_7: ++** ldnf1sw z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sw_vnum_s64_7, svint64_t, int32_t, ++ z0 = svldnf1sw_vnum_s64 (p0, x0, 7), ++ z0 = svldnf1sw_vnum_s64 (p0, x0, 7)) ++ ++/* ++** ldnf1sw_vnum_s64_8: ++** incb x0, all, mul #4 ++** ldnf1sw z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sw_vnum_s64_8, svint64_t, int32_t, ++ z0 = svldnf1sw_vnum_s64 (p0, x0, 8), ++ z0 = svldnf1sw_vnum_s64 (p0, x0, 8)) ++ ++/* ++** ldnf1sw_vnum_s64_m1: ++** ldnf1sw z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sw_vnum_s64_m1, svint64_t, int32_t, ++ z0 = svldnf1sw_vnum_s64 (p0, x0, -1), ++ z0 = svldnf1sw_vnum_s64 (p0, x0, -1)) ++ ++/* ++** ldnf1sw_vnum_s64_m8: ++** ldnf1sw z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sw_vnum_s64_m8, svint64_t, int32_t, ++ z0 = svldnf1sw_vnum_s64 (p0, x0, -8), ++ z0 = svldnf1sw_vnum_s64 (p0, x0, -8)) ++ ++/* ++** ldnf1sw_vnum_s64_m9: ++** dech x0, all, mul #9 ++** ldnf1sw z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sw_vnum_s64_m9, svint64_t, int32_t, ++ z0 = svldnf1sw_vnum_s64 (p0, x0, -9), ++ z0 = svldnf1sw_vnum_s64 (p0, x0, -9)) ++ ++/* ++** ldnf1sw_vnum_s64_x1: ++** cnth (x[0-9]+) ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ldnf1sw z0\.d, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldnf1sw_vnum_s64_x1, svint64_t, int32_t, ++ z0 = svldnf1sw_vnum_s64 (p0, x0, x1), ++ z0 = svldnf1sw_vnum_s64 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sw_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sw_u64.c +new file mode 100644 +index 000000000..62121ce0a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sw_u64.c +@@ -0,0 +1,154 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldnf1sw_u64_base: ++** ldnf1sw z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sw_u64_base, svuint64_t, int32_t, ++ z0 = svldnf1sw_u64 (p0, x0), ++ z0 = svldnf1sw_u64 (p0, x0)) ++ ++/* ++** ldnf1sw_u64_index: ++** add (x[0-9]+), x0, x1, lsl 2 ++** ldnf1sw z0\.d, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ldnf1sw_u64_index, svuint64_t, int32_t, ++ z0 = svldnf1sw_u64 (p0, x0 + x1), ++ z0 = svldnf1sw_u64 (p0, x0 + x1)) ++ ++/* ++** ldnf1sw_u64_1: ++** ldnf1sw z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sw_u64_1, svuint64_t, int32_t, ++ z0 = svldnf1sw_u64 (p0, x0 + svcntd ()), ++ z0 = svldnf1sw_u64 (p0, x0 + svcntd ())) ++ ++/* ++** ldnf1sw_u64_7: ++** ldnf1sw z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sw_u64_7, svuint64_t, int32_t, ++ z0 = svldnf1sw_u64 (p0, x0 + svcntd () * 7), ++ z0 = svldnf1sw_u64 (p0, x0 + svcntd () * 7)) ++ ++/* ++** ldnf1sw_u64_8: ++** incb x0, all, mul #4 ++** ldnf1sw z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sw_u64_8, svuint64_t, int32_t, ++ z0 = svldnf1sw_u64 (p0, x0 + svcntd () * 8), ++ z0 = svldnf1sw_u64 (p0, x0 + svcntd () * 8)) ++ ++/* ++** ldnf1sw_u64_m1: ++** ldnf1sw z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sw_u64_m1, svuint64_t, int32_t, ++ z0 = svldnf1sw_u64 (p0, x0 - svcntd ()), ++ z0 = svldnf1sw_u64 (p0, x0 - svcntd ())) ++ ++/* ++** ldnf1sw_u64_m8: ++** ldnf1sw z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sw_u64_m8, svuint64_t, int32_t, ++ z0 = svldnf1sw_u64 (p0, x0 - svcntd () * 8), ++ z0 = svldnf1sw_u64 (p0, x0 - svcntd () * 8)) ++ ++/* ++** ldnf1sw_u64_m9: ++** dech x0, all, mul #9 ++** ldnf1sw z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sw_u64_m9, svuint64_t, int32_t, ++ z0 = svldnf1sw_u64 (p0, x0 - svcntd () * 9), ++ z0 = svldnf1sw_u64 (p0, x0 - svcntd () * 9)) ++ ++/* ++** ldnf1sw_vnum_u64_0: ++** ldnf1sw z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sw_vnum_u64_0, svuint64_t, int32_t, ++ z0 = svldnf1sw_vnum_u64 (p0, x0, 0), ++ z0 = svldnf1sw_vnum_u64 (p0, x0, 0)) ++ ++/* ++** ldnf1sw_vnum_u64_1: ++** ldnf1sw z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sw_vnum_u64_1, svuint64_t, int32_t, ++ z0 = svldnf1sw_vnum_u64 (p0, x0, 1), ++ z0 = svldnf1sw_vnum_u64 (p0, x0, 1)) ++ ++/* ++** ldnf1sw_vnum_u64_7: ++** ldnf1sw z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sw_vnum_u64_7, svuint64_t, int32_t, ++ z0 = svldnf1sw_vnum_u64 (p0, x0, 7), ++ z0 = svldnf1sw_vnum_u64 (p0, x0, 7)) ++ ++/* ++** ldnf1sw_vnum_u64_8: ++** incb x0, all, mul #4 ++** ldnf1sw z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sw_vnum_u64_8, svuint64_t, int32_t, ++ z0 = svldnf1sw_vnum_u64 (p0, x0, 8), ++ z0 = svldnf1sw_vnum_u64 (p0, x0, 8)) ++ ++/* ++** ldnf1sw_vnum_u64_m1: ++** ldnf1sw z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sw_vnum_u64_m1, svuint64_t, int32_t, ++ z0 = svldnf1sw_vnum_u64 (p0, x0, -1), ++ z0 = svldnf1sw_vnum_u64 (p0, x0, -1)) ++ ++/* ++** ldnf1sw_vnum_u64_m8: ++** ldnf1sw z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1sw_vnum_u64_m8, svuint64_t, int32_t, ++ z0 = svldnf1sw_vnum_u64 (p0, x0, -8), ++ z0 = svldnf1sw_vnum_u64 (p0, x0, -8)) ++ ++/* ++** ldnf1sw_vnum_u64_m9: ++** dech x0, all, mul #9 ++** ldnf1sw z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1sw_vnum_u64_m9, svuint64_t, int32_t, ++ z0 = svldnf1sw_vnum_u64 (p0, x0, -9), ++ z0 = svldnf1sw_vnum_u64 (p0, x0, -9)) ++ ++/* ++** ldnf1sw_vnum_u64_x1: ++** cnth (x[0-9]+) ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ldnf1sw z0\.d, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldnf1sw_vnum_u64_x1, svuint64_t, int32_t, ++ z0 = svldnf1sw_vnum_u64 (p0, x0, x1), ++ z0 = svldnf1sw_vnum_u64 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_s16.c +new file mode 100644 +index 000000000..8fe13411f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_s16.c +@@ -0,0 +1,154 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldnf1ub_s16_base: ++** ldnf1b z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_s16_base, svint16_t, uint8_t, ++ z0 = svldnf1ub_s16 (p0, x0), ++ z0 = svldnf1ub_s16 (p0, x0)) ++ ++/* ++** ldnf1ub_s16_index: ++** add (x[0-9]+), x0, x1 ++** ldnf1b z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_s16_index, svint16_t, uint8_t, ++ z0 = svldnf1ub_s16 (p0, x0 + x1), ++ z0 = svldnf1ub_s16 (p0, x0 + x1)) ++ ++/* ++** ldnf1ub_s16_1: ++** ldnf1b z0\.h, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_s16_1, svint16_t, uint8_t, ++ z0 = svldnf1ub_s16 (p0, x0 + svcnth ()), ++ z0 = svldnf1ub_s16 (p0, x0 + svcnth ())) ++ ++/* ++** ldnf1ub_s16_7: ++** ldnf1b z0\.h, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_s16_7, svint16_t, uint8_t, ++ z0 = svldnf1ub_s16 (p0, x0 + svcnth () * 7), ++ z0 = svldnf1ub_s16 (p0, x0 + svcnth () * 7)) ++ ++/* ++** ldnf1ub_s16_8: ++** incb x0, all, mul #4 ++** ldnf1b z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_s16_8, svint16_t, uint8_t, ++ z0 = svldnf1ub_s16 (p0, x0 + svcnth () * 8), ++ z0 = svldnf1ub_s16 (p0, x0 + svcnth () * 8)) ++ ++/* ++** ldnf1ub_s16_m1: ++** ldnf1b z0\.h, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_s16_m1, svint16_t, uint8_t, ++ z0 = svldnf1ub_s16 (p0, x0 - svcnth ()), ++ z0 = svldnf1ub_s16 (p0, x0 - svcnth ())) ++ ++/* ++** ldnf1ub_s16_m8: ++** ldnf1b z0\.h, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_s16_m8, svint16_t, uint8_t, ++ z0 = svldnf1ub_s16 (p0, x0 - svcnth () * 8), ++ z0 = svldnf1ub_s16 (p0, x0 - svcnth () * 8)) ++ ++/* ++** ldnf1ub_s16_m9: ++** dech x0, all, mul #9 ++** ldnf1b z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_s16_m9, svint16_t, uint8_t, ++ z0 = svldnf1ub_s16 (p0, x0 - svcnth () * 9), ++ z0 = svldnf1ub_s16 (p0, x0 - svcnth () * 9)) ++ ++/* ++** ldnf1ub_vnum_s16_0: ++** ldnf1b z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_vnum_s16_0, svint16_t, uint8_t, ++ z0 = svldnf1ub_vnum_s16 (p0, x0, 0), ++ z0 = svldnf1ub_vnum_s16 (p0, x0, 0)) ++ ++/* ++** ldnf1ub_vnum_s16_1: ++** ldnf1b z0\.h, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_vnum_s16_1, svint16_t, uint8_t, ++ z0 = svldnf1ub_vnum_s16 (p0, x0, 1), ++ z0 = svldnf1ub_vnum_s16 (p0, x0, 1)) ++ ++/* ++** ldnf1ub_vnum_s16_7: ++** ldnf1b z0\.h, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_vnum_s16_7, svint16_t, uint8_t, ++ z0 = svldnf1ub_vnum_s16 (p0, x0, 7), ++ z0 = svldnf1ub_vnum_s16 (p0, x0, 7)) ++ ++/* ++** ldnf1ub_vnum_s16_8: ++** incb x0, all, mul #4 ++** ldnf1b z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_vnum_s16_8, svint16_t, uint8_t, ++ z0 = svldnf1ub_vnum_s16 (p0, x0, 8), ++ z0 = svldnf1ub_vnum_s16 (p0, x0, 8)) ++ ++/* ++** ldnf1ub_vnum_s16_m1: ++** ldnf1b z0\.h, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_vnum_s16_m1, svint16_t, uint8_t, ++ z0 = svldnf1ub_vnum_s16 (p0, x0, -1), ++ z0 = svldnf1ub_vnum_s16 (p0, x0, -1)) ++ ++/* ++** ldnf1ub_vnum_s16_m8: ++** ldnf1b z0\.h, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_vnum_s16_m8, svint16_t, uint8_t, ++ z0 = svldnf1ub_vnum_s16 (p0, x0, -8), ++ z0 = svldnf1ub_vnum_s16 (p0, x0, -8)) ++ ++/* ++** ldnf1ub_vnum_s16_m9: ++** dech x0, all, mul #9 ++** ldnf1b z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_vnum_s16_m9, svint16_t, uint8_t, ++ z0 = svldnf1ub_vnum_s16 (p0, x0, -9), ++ z0 = svldnf1ub_vnum_s16 (p0, x0, -9)) ++ ++/* ++** ldnf1ub_vnum_s16_x1: ++** cnth (x[0-9]+) ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ldnf1b z0\.h, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_vnum_s16_x1, svint16_t, uint8_t, ++ z0 = svldnf1ub_vnum_s16 (p0, x0, x1), ++ z0 = svldnf1ub_vnum_s16 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_s32.c +new file mode 100644 +index 000000000..50122e3b7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_s32.c +@@ -0,0 +1,154 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldnf1ub_s32_base: ++** ldnf1b z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_s32_base, svint32_t, uint8_t, ++ z0 = svldnf1ub_s32 (p0, x0), ++ z0 = svldnf1ub_s32 (p0, x0)) ++ ++/* ++** ldnf1ub_s32_index: ++** add (x[0-9]+), x0, x1 ++** ldnf1b z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_s32_index, svint32_t, uint8_t, ++ z0 = svldnf1ub_s32 (p0, x0 + x1), ++ z0 = svldnf1ub_s32 (p0, x0 + x1)) ++ ++/* ++** ldnf1ub_s32_1: ++** ldnf1b z0\.s, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_s32_1, svint32_t, uint8_t, ++ z0 = svldnf1ub_s32 (p0, x0 + svcntw ()), ++ z0 = svldnf1ub_s32 (p0, x0 + svcntw ())) ++ ++/* ++** ldnf1ub_s32_7: ++** ldnf1b z0\.s, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_s32_7, svint32_t, uint8_t, ++ z0 = svldnf1ub_s32 (p0, x0 + svcntw () * 7), ++ z0 = svldnf1ub_s32 (p0, x0 + svcntw () * 7)) ++ ++/* ++** ldnf1ub_s32_8: ++** incb x0, all, mul #2 ++** ldnf1b z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_s32_8, svint32_t, uint8_t, ++ z0 = svldnf1ub_s32 (p0, x0 + svcntw () * 8), ++ z0 = svldnf1ub_s32 (p0, x0 + svcntw () * 8)) ++ ++/* ++** ldnf1ub_s32_m1: ++** ldnf1b z0\.s, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_s32_m1, svint32_t, uint8_t, ++ z0 = svldnf1ub_s32 (p0, x0 - svcntw ()), ++ z0 = svldnf1ub_s32 (p0, x0 - svcntw ())) ++ ++/* ++** ldnf1ub_s32_m8: ++** ldnf1b z0\.s, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_s32_m8, svint32_t, uint8_t, ++ z0 = svldnf1ub_s32 (p0, x0 - svcntw () * 8), ++ z0 = svldnf1ub_s32 (p0, x0 - svcntw () * 8)) ++ ++/* ++** ldnf1ub_s32_m9: ++** decw x0, all, mul #9 ++** ldnf1b z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_s32_m9, svint32_t, uint8_t, ++ z0 = svldnf1ub_s32 (p0, x0 - svcntw () * 9), ++ z0 = svldnf1ub_s32 (p0, x0 - svcntw () * 9)) ++ ++/* ++** ldnf1ub_vnum_s32_0: ++** ldnf1b z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_vnum_s32_0, svint32_t, uint8_t, ++ z0 = svldnf1ub_vnum_s32 (p0, x0, 0), ++ z0 = svldnf1ub_vnum_s32 (p0, x0, 0)) ++ ++/* ++** ldnf1ub_vnum_s32_1: ++** ldnf1b z0\.s, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_vnum_s32_1, svint32_t, uint8_t, ++ z0 = svldnf1ub_vnum_s32 (p0, x0, 1), ++ z0 = svldnf1ub_vnum_s32 (p0, x0, 1)) ++ ++/* ++** ldnf1ub_vnum_s32_7: ++** ldnf1b z0\.s, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_vnum_s32_7, svint32_t, uint8_t, ++ z0 = svldnf1ub_vnum_s32 (p0, x0, 7), ++ z0 = svldnf1ub_vnum_s32 (p0, x0, 7)) ++ ++/* ++** ldnf1ub_vnum_s32_8: ++** incb x0, all, mul #2 ++** ldnf1b z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_vnum_s32_8, svint32_t, uint8_t, ++ z0 = svldnf1ub_vnum_s32 (p0, x0, 8), ++ z0 = svldnf1ub_vnum_s32 (p0, x0, 8)) ++ ++/* ++** ldnf1ub_vnum_s32_m1: ++** ldnf1b z0\.s, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_vnum_s32_m1, svint32_t, uint8_t, ++ z0 = svldnf1ub_vnum_s32 (p0, x0, -1), ++ z0 = svldnf1ub_vnum_s32 (p0, x0, -1)) ++ ++/* ++** ldnf1ub_vnum_s32_m8: ++** ldnf1b z0\.s, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_vnum_s32_m8, svint32_t, uint8_t, ++ z0 = svldnf1ub_vnum_s32 (p0, x0, -8), ++ z0 = svldnf1ub_vnum_s32 (p0, x0, -8)) ++ ++/* ++** ldnf1ub_vnum_s32_m9: ++** decw x0, all, mul #9 ++** ldnf1b z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_vnum_s32_m9, svint32_t, uint8_t, ++ z0 = svldnf1ub_vnum_s32 (p0, x0, -9), ++ z0 = svldnf1ub_vnum_s32 (p0, x0, -9)) ++ ++/* ++** ldnf1ub_vnum_s32_x1: ++** cntw (x[0-9]+) ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ldnf1b z0\.s, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_vnum_s32_x1, svint32_t, uint8_t, ++ z0 = svldnf1ub_vnum_s32 (p0, x0, x1), ++ z0 = svldnf1ub_vnum_s32 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_s64.c +new file mode 100644 +index 000000000..d7cce11b6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_s64.c +@@ -0,0 +1,154 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldnf1ub_s64_base: ++** ldnf1b z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_s64_base, svint64_t, uint8_t, ++ z0 = svldnf1ub_s64 (p0, x0), ++ z0 = svldnf1ub_s64 (p0, x0)) ++ ++/* ++** ldnf1ub_s64_index: ++** add (x[0-9]+), x0, x1 ++** ldnf1b z0\.d, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_s64_index, svint64_t, uint8_t, ++ z0 = svldnf1ub_s64 (p0, x0 + x1), ++ z0 = svldnf1ub_s64 (p0, x0 + x1)) ++ ++/* ++** ldnf1ub_s64_1: ++** ldnf1b z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_s64_1, svint64_t, uint8_t, ++ z0 = svldnf1ub_s64 (p0, x0 + svcntd ()), ++ z0 = svldnf1ub_s64 (p0, x0 + svcntd ())) ++ ++/* ++** ldnf1ub_s64_7: ++** ldnf1b z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_s64_7, svint64_t, uint8_t, ++ z0 = svldnf1ub_s64 (p0, x0 + svcntd () * 7), ++ z0 = svldnf1ub_s64 (p0, x0 + svcntd () * 7)) ++ ++/* ++** ldnf1ub_s64_8: ++** incb x0 ++** ldnf1b z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_s64_8, svint64_t, uint8_t, ++ z0 = svldnf1ub_s64 (p0, x0 + svcntd () * 8), ++ z0 = svldnf1ub_s64 (p0, x0 + svcntd () * 8)) ++ ++/* ++** ldnf1ub_s64_m1: ++** ldnf1b z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_s64_m1, svint64_t, uint8_t, ++ z0 = svldnf1ub_s64 (p0, x0 - svcntd ()), ++ z0 = svldnf1ub_s64 (p0, x0 - svcntd ())) ++ ++/* ++** ldnf1ub_s64_m8: ++** ldnf1b z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_s64_m8, svint64_t, uint8_t, ++ z0 = svldnf1ub_s64 (p0, x0 - svcntd () * 8), ++ z0 = svldnf1ub_s64 (p0, x0 - svcntd () * 8)) ++ ++/* ++** ldnf1ub_s64_m9: ++** decd x0, all, mul #9 ++** ldnf1b z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_s64_m9, svint64_t, uint8_t, ++ z0 = svldnf1ub_s64 (p0, x0 - svcntd () * 9), ++ z0 = svldnf1ub_s64 (p0, x0 - svcntd () * 9)) ++ ++/* ++** ldnf1ub_vnum_s64_0: ++** ldnf1b z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_vnum_s64_0, svint64_t, uint8_t, ++ z0 = svldnf1ub_vnum_s64 (p0, x0, 0), ++ z0 = svldnf1ub_vnum_s64 (p0, x0, 0)) ++ ++/* ++** ldnf1ub_vnum_s64_1: ++** ldnf1b z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_vnum_s64_1, svint64_t, uint8_t, ++ z0 = svldnf1ub_vnum_s64 (p0, x0, 1), ++ z0 = svldnf1ub_vnum_s64 (p0, x0, 1)) ++ ++/* ++** ldnf1ub_vnum_s64_7: ++** ldnf1b z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_vnum_s64_7, svint64_t, uint8_t, ++ z0 = svldnf1ub_vnum_s64 (p0, x0, 7), ++ z0 = svldnf1ub_vnum_s64 (p0, x0, 7)) ++ ++/* ++** ldnf1ub_vnum_s64_8: ++** incb x0 ++** ldnf1b z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_vnum_s64_8, svint64_t, uint8_t, ++ z0 = svldnf1ub_vnum_s64 (p0, x0, 8), ++ z0 = svldnf1ub_vnum_s64 (p0, x0, 8)) ++ ++/* ++** ldnf1ub_vnum_s64_m1: ++** ldnf1b z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_vnum_s64_m1, svint64_t, uint8_t, ++ z0 = svldnf1ub_vnum_s64 (p0, x0, -1), ++ z0 = svldnf1ub_vnum_s64 (p0, x0, -1)) ++ ++/* ++** ldnf1ub_vnum_s64_m8: ++** ldnf1b z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_vnum_s64_m8, svint64_t, uint8_t, ++ z0 = svldnf1ub_vnum_s64 (p0, x0, -8), ++ z0 = svldnf1ub_vnum_s64 (p0, x0, -8)) ++ ++/* ++** ldnf1ub_vnum_s64_m9: ++** decd x0, all, mul #9 ++** ldnf1b z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_vnum_s64_m9, svint64_t, uint8_t, ++ z0 = svldnf1ub_vnum_s64 (p0, x0, -9), ++ z0 = svldnf1ub_vnum_s64 (p0, x0, -9)) ++ ++/* ++** ldnf1ub_vnum_s64_x1: ++** cntd (x[0-9]+) ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ldnf1b z0\.d, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_vnum_s64_x1, svint64_t, uint8_t, ++ z0 = svldnf1ub_vnum_s64 (p0, x0, x1), ++ z0 = svldnf1ub_vnum_s64 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_u16.c +new file mode 100644 +index 000000000..7bf82c3b6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_u16.c +@@ -0,0 +1,154 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldnf1ub_u16_base: ++** ldnf1b z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_u16_base, svuint16_t, uint8_t, ++ z0 = svldnf1ub_u16 (p0, x0), ++ z0 = svldnf1ub_u16 (p0, x0)) ++ ++/* ++** ldnf1ub_u16_index: ++** add (x[0-9]+), x0, x1 ++** ldnf1b z0\.h, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_u16_index, svuint16_t, uint8_t, ++ z0 = svldnf1ub_u16 (p0, x0 + x1), ++ z0 = svldnf1ub_u16 (p0, x0 + x1)) ++ ++/* ++** ldnf1ub_u16_1: ++** ldnf1b z0\.h, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_u16_1, svuint16_t, uint8_t, ++ z0 = svldnf1ub_u16 (p0, x0 + svcnth ()), ++ z0 = svldnf1ub_u16 (p0, x0 + svcnth ())) ++ ++/* ++** ldnf1ub_u16_7: ++** ldnf1b z0\.h, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_u16_7, svuint16_t, uint8_t, ++ z0 = svldnf1ub_u16 (p0, x0 + svcnth () * 7), ++ z0 = svldnf1ub_u16 (p0, x0 + svcnth () * 7)) ++ ++/* ++** ldnf1ub_u16_8: ++** incb x0, all, mul #4 ++** ldnf1b z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_u16_8, svuint16_t, uint8_t, ++ z0 = svldnf1ub_u16 (p0, x0 + svcnth () * 8), ++ z0 = svldnf1ub_u16 (p0, x0 + svcnth () * 8)) ++ ++/* ++** ldnf1ub_u16_m1: ++** ldnf1b z0\.h, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_u16_m1, svuint16_t, uint8_t, ++ z0 = svldnf1ub_u16 (p0, x0 - svcnth ()), ++ z0 = svldnf1ub_u16 (p0, x0 - svcnth ())) ++ ++/* ++** ldnf1ub_u16_m8: ++** ldnf1b z0\.h, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_u16_m8, svuint16_t, uint8_t, ++ z0 = svldnf1ub_u16 (p0, x0 - svcnth () * 8), ++ z0 = svldnf1ub_u16 (p0, x0 - svcnth () * 8)) ++ ++/* ++** ldnf1ub_u16_m9: ++** dech x0, all, mul #9 ++** ldnf1b z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_u16_m9, svuint16_t, uint8_t, ++ z0 = svldnf1ub_u16 (p0, x0 - svcnth () * 9), ++ z0 = svldnf1ub_u16 (p0, x0 - svcnth () * 9)) ++ ++/* ++** ldnf1ub_vnum_u16_0: ++** ldnf1b z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_vnum_u16_0, svuint16_t, uint8_t, ++ z0 = svldnf1ub_vnum_u16 (p0, x0, 0), ++ z0 = svldnf1ub_vnum_u16 (p0, x0, 0)) ++ ++/* ++** ldnf1ub_vnum_u16_1: ++** ldnf1b z0\.h, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_vnum_u16_1, svuint16_t, uint8_t, ++ z0 = svldnf1ub_vnum_u16 (p0, x0, 1), ++ z0 = svldnf1ub_vnum_u16 (p0, x0, 1)) ++ ++/* ++** ldnf1ub_vnum_u16_7: ++** ldnf1b z0\.h, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_vnum_u16_7, svuint16_t, uint8_t, ++ z0 = svldnf1ub_vnum_u16 (p0, x0, 7), ++ z0 = svldnf1ub_vnum_u16 (p0, x0, 7)) ++ ++/* ++** ldnf1ub_vnum_u16_8: ++** incb x0, all, mul #4 ++** ldnf1b z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_vnum_u16_8, svuint16_t, uint8_t, ++ z0 = svldnf1ub_vnum_u16 (p0, x0, 8), ++ z0 = svldnf1ub_vnum_u16 (p0, x0, 8)) ++ ++/* ++** ldnf1ub_vnum_u16_m1: ++** ldnf1b z0\.h, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_vnum_u16_m1, svuint16_t, uint8_t, ++ z0 = svldnf1ub_vnum_u16 (p0, x0, -1), ++ z0 = svldnf1ub_vnum_u16 (p0, x0, -1)) ++ ++/* ++** ldnf1ub_vnum_u16_m8: ++** ldnf1b z0\.h, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_vnum_u16_m8, svuint16_t, uint8_t, ++ z0 = svldnf1ub_vnum_u16 (p0, x0, -8), ++ z0 = svldnf1ub_vnum_u16 (p0, x0, -8)) ++ ++/* ++** ldnf1ub_vnum_u16_m9: ++** dech x0, all, mul #9 ++** ldnf1b z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_vnum_u16_m9, svuint16_t, uint8_t, ++ z0 = svldnf1ub_vnum_u16 (p0, x0, -9), ++ z0 = svldnf1ub_vnum_u16 (p0, x0, -9)) ++ ++/* ++** ldnf1ub_vnum_u16_x1: ++** cnth (x[0-9]+) ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ldnf1b z0\.h, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_vnum_u16_x1, svuint16_t, uint8_t, ++ z0 = svldnf1ub_vnum_u16 (p0, x0, x1), ++ z0 = svldnf1ub_vnum_u16 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_u32.c +new file mode 100644 +index 000000000..e2fef064b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_u32.c +@@ -0,0 +1,154 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldnf1ub_u32_base: ++** ldnf1b z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_u32_base, svuint32_t, uint8_t, ++ z0 = svldnf1ub_u32 (p0, x0), ++ z0 = svldnf1ub_u32 (p0, x0)) ++ ++/* ++** ldnf1ub_u32_index: ++** add (x[0-9]+), x0, x1 ++** ldnf1b z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_u32_index, svuint32_t, uint8_t, ++ z0 = svldnf1ub_u32 (p0, x0 + x1), ++ z0 = svldnf1ub_u32 (p0, x0 + x1)) ++ ++/* ++** ldnf1ub_u32_1: ++** ldnf1b z0\.s, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_u32_1, svuint32_t, uint8_t, ++ z0 = svldnf1ub_u32 (p0, x0 + svcntw ()), ++ z0 = svldnf1ub_u32 (p0, x0 + svcntw ())) ++ ++/* ++** ldnf1ub_u32_7: ++** ldnf1b z0\.s, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_u32_7, svuint32_t, uint8_t, ++ z0 = svldnf1ub_u32 (p0, x0 + svcntw () * 7), ++ z0 = svldnf1ub_u32 (p0, x0 + svcntw () * 7)) ++ ++/* ++** ldnf1ub_u32_8: ++** incb x0, all, mul #2 ++** ldnf1b z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_u32_8, svuint32_t, uint8_t, ++ z0 = svldnf1ub_u32 (p0, x0 + svcntw () * 8), ++ z0 = svldnf1ub_u32 (p0, x0 + svcntw () * 8)) ++ ++/* ++** ldnf1ub_u32_m1: ++** ldnf1b z0\.s, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_u32_m1, svuint32_t, uint8_t, ++ z0 = svldnf1ub_u32 (p0, x0 - svcntw ()), ++ z0 = svldnf1ub_u32 (p0, x0 - svcntw ())) ++ ++/* ++** ldnf1ub_u32_m8: ++** ldnf1b z0\.s, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_u32_m8, svuint32_t, uint8_t, ++ z0 = svldnf1ub_u32 (p0, x0 - svcntw () * 8), ++ z0 = svldnf1ub_u32 (p0, x0 - svcntw () * 8)) ++ ++/* ++** ldnf1ub_u32_m9: ++** decw x0, all, mul #9 ++** ldnf1b z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_u32_m9, svuint32_t, uint8_t, ++ z0 = svldnf1ub_u32 (p0, x0 - svcntw () * 9), ++ z0 = svldnf1ub_u32 (p0, x0 - svcntw () * 9)) ++ ++/* ++** ldnf1ub_vnum_u32_0: ++** ldnf1b z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_vnum_u32_0, svuint32_t, uint8_t, ++ z0 = svldnf1ub_vnum_u32 (p0, x0, 0), ++ z0 = svldnf1ub_vnum_u32 (p0, x0, 0)) ++ ++/* ++** ldnf1ub_vnum_u32_1: ++** ldnf1b z0\.s, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_vnum_u32_1, svuint32_t, uint8_t, ++ z0 = svldnf1ub_vnum_u32 (p0, x0, 1), ++ z0 = svldnf1ub_vnum_u32 (p0, x0, 1)) ++ ++/* ++** ldnf1ub_vnum_u32_7: ++** ldnf1b z0\.s, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_vnum_u32_7, svuint32_t, uint8_t, ++ z0 = svldnf1ub_vnum_u32 (p0, x0, 7), ++ z0 = svldnf1ub_vnum_u32 (p0, x0, 7)) ++ ++/* ++** ldnf1ub_vnum_u32_8: ++** incb x0, all, mul #2 ++** ldnf1b z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_vnum_u32_8, svuint32_t, uint8_t, ++ z0 = svldnf1ub_vnum_u32 (p0, x0, 8), ++ z0 = svldnf1ub_vnum_u32 (p0, x0, 8)) ++ ++/* ++** ldnf1ub_vnum_u32_m1: ++** ldnf1b z0\.s, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_vnum_u32_m1, svuint32_t, uint8_t, ++ z0 = svldnf1ub_vnum_u32 (p0, x0, -1), ++ z0 = svldnf1ub_vnum_u32 (p0, x0, -1)) ++ ++/* ++** ldnf1ub_vnum_u32_m8: ++** ldnf1b z0\.s, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_vnum_u32_m8, svuint32_t, uint8_t, ++ z0 = svldnf1ub_vnum_u32 (p0, x0, -8), ++ z0 = svldnf1ub_vnum_u32 (p0, x0, -8)) ++ ++/* ++** ldnf1ub_vnum_u32_m9: ++** decw x0, all, mul #9 ++** ldnf1b z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_vnum_u32_m9, svuint32_t, uint8_t, ++ z0 = svldnf1ub_vnum_u32 (p0, x0, -9), ++ z0 = svldnf1ub_vnum_u32 (p0, x0, -9)) ++ ++/* ++** ldnf1ub_vnum_u32_x1: ++** cntw (x[0-9]+) ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ldnf1b z0\.s, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_vnum_u32_x1, svuint32_t, uint8_t, ++ z0 = svldnf1ub_vnum_u32 (p0, x0, x1), ++ z0 = svldnf1ub_vnum_u32 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_u64.c +new file mode 100644 +index 000000000..57c61e122 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_u64.c +@@ -0,0 +1,154 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldnf1ub_u64_base: ++** ldnf1b z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_u64_base, svuint64_t, uint8_t, ++ z0 = svldnf1ub_u64 (p0, x0), ++ z0 = svldnf1ub_u64 (p0, x0)) ++ ++/* ++** ldnf1ub_u64_index: ++** add (x[0-9]+), x0, x1 ++** ldnf1b z0\.d, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_u64_index, svuint64_t, uint8_t, ++ z0 = svldnf1ub_u64 (p0, x0 + x1), ++ z0 = svldnf1ub_u64 (p0, x0 + x1)) ++ ++/* ++** ldnf1ub_u64_1: ++** ldnf1b z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_u64_1, svuint64_t, uint8_t, ++ z0 = svldnf1ub_u64 (p0, x0 + svcntd ()), ++ z0 = svldnf1ub_u64 (p0, x0 + svcntd ())) ++ ++/* ++** ldnf1ub_u64_7: ++** ldnf1b z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_u64_7, svuint64_t, uint8_t, ++ z0 = svldnf1ub_u64 (p0, x0 + svcntd () * 7), ++ z0 = svldnf1ub_u64 (p0, x0 + svcntd () * 7)) ++ ++/* ++** ldnf1ub_u64_8: ++** incb x0 ++** ldnf1b z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_u64_8, svuint64_t, uint8_t, ++ z0 = svldnf1ub_u64 (p0, x0 + svcntd () * 8), ++ z0 = svldnf1ub_u64 (p0, x0 + svcntd () * 8)) ++ ++/* ++** ldnf1ub_u64_m1: ++** ldnf1b z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_u64_m1, svuint64_t, uint8_t, ++ z0 = svldnf1ub_u64 (p0, x0 - svcntd ()), ++ z0 = svldnf1ub_u64 (p0, x0 - svcntd ())) ++ ++/* ++** ldnf1ub_u64_m8: ++** ldnf1b z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_u64_m8, svuint64_t, uint8_t, ++ z0 = svldnf1ub_u64 (p0, x0 - svcntd () * 8), ++ z0 = svldnf1ub_u64 (p0, x0 - svcntd () * 8)) ++ ++/* ++** ldnf1ub_u64_m9: ++** decd x0, all, mul #9 ++** ldnf1b z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_u64_m9, svuint64_t, uint8_t, ++ z0 = svldnf1ub_u64 (p0, x0 - svcntd () * 9), ++ z0 = svldnf1ub_u64 (p0, x0 - svcntd () * 9)) ++ ++/* ++** ldnf1ub_vnum_u64_0: ++** ldnf1b z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_vnum_u64_0, svuint64_t, uint8_t, ++ z0 = svldnf1ub_vnum_u64 (p0, x0, 0), ++ z0 = svldnf1ub_vnum_u64 (p0, x0, 0)) ++ ++/* ++** ldnf1ub_vnum_u64_1: ++** ldnf1b z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_vnum_u64_1, svuint64_t, uint8_t, ++ z0 = svldnf1ub_vnum_u64 (p0, x0, 1), ++ z0 = svldnf1ub_vnum_u64 (p0, x0, 1)) ++ ++/* ++** ldnf1ub_vnum_u64_7: ++** ldnf1b z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_vnum_u64_7, svuint64_t, uint8_t, ++ z0 = svldnf1ub_vnum_u64 (p0, x0, 7), ++ z0 = svldnf1ub_vnum_u64 (p0, x0, 7)) ++ ++/* ++** ldnf1ub_vnum_u64_8: ++** incb x0 ++** ldnf1b z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_vnum_u64_8, svuint64_t, uint8_t, ++ z0 = svldnf1ub_vnum_u64 (p0, x0, 8), ++ z0 = svldnf1ub_vnum_u64 (p0, x0, 8)) ++ ++/* ++** ldnf1ub_vnum_u64_m1: ++** ldnf1b z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_vnum_u64_m1, svuint64_t, uint8_t, ++ z0 = svldnf1ub_vnum_u64 (p0, x0, -1), ++ z0 = svldnf1ub_vnum_u64 (p0, x0, -1)) ++ ++/* ++** ldnf1ub_vnum_u64_m8: ++** ldnf1b z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_vnum_u64_m8, svuint64_t, uint8_t, ++ z0 = svldnf1ub_vnum_u64 (p0, x0, -8), ++ z0 = svldnf1ub_vnum_u64 (p0, x0, -8)) ++ ++/* ++** ldnf1ub_vnum_u64_m9: ++** decd x0, all, mul #9 ++** ldnf1b z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_vnum_u64_m9, svuint64_t, uint8_t, ++ z0 = svldnf1ub_vnum_u64 (p0, x0, -9), ++ z0 = svldnf1ub_vnum_u64 (p0, x0, -9)) ++ ++/* ++** ldnf1ub_vnum_u64_x1: ++** cntd (x[0-9]+) ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ldnf1b z0\.d, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldnf1ub_vnum_u64_x1, svuint64_t, uint8_t, ++ z0 = svldnf1ub_vnum_u64 (p0, x0, x1), ++ z0 = svldnf1ub_vnum_u64 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_s32.c +new file mode 100644 +index 000000000..ed9686c4e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_s32.c +@@ -0,0 +1,154 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldnf1uh_s32_base: ++** ldnf1h z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_s32_base, svint32_t, uint16_t, ++ z0 = svldnf1uh_s32 (p0, x0), ++ z0 = svldnf1uh_s32 (p0, x0)) ++ ++/* ++** ldnf1uh_s32_index: ++** add (x[0-9]+), x0, x1, lsl 1 ++** ldnf1h z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_s32_index, svint32_t, uint16_t, ++ z0 = svldnf1uh_s32 (p0, x0 + x1), ++ z0 = svldnf1uh_s32 (p0, x0 + x1)) ++ ++/* ++** ldnf1uh_s32_1: ++** ldnf1h z0\.s, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_s32_1, svint32_t, uint16_t, ++ z0 = svldnf1uh_s32 (p0, x0 + svcntw ()), ++ z0 = svldnf1uh_s32 (p0, x0 + svcntw ())) ++ ++/* ++** ldnf1uh_s32_7: ++** ldnf1h z0\.s, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_s32_7, svint32_t, uint16_t, ++ z0 = svldnf1uh_s32 (p0, x0 + svcntw () * 7), ++ z0 = svldnf1uh_s32 (p0, x0 + svcntw () * 7)) ++ ++/* ++** ldnf1uh_s32_8: ++** incb x0, all, mul #4 ++** ldnf1h z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_s32_8, svint32_t, uint16_t, ++ z0 = svldnf1uh_s32 (p0, x0 + svcntw () * 8), ++ z0 = svldnf1uh_s32 (p0, x0 + svcntw () * 8)) ++ ++/* ++** ldnf1uh_s32_m1: ++** ldnf1h z0\.s, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_s32_m1, svint32_t, uint16_t, ++ z0 = svldnf1uh_s32 (p0, x0 - svcntw ()), ++ z0 = svldnf1uh_s32 (p0, x0 - svcntw ())) ++ ++/* ++** ldnf1uh_s32_m8: ++** ldnf1h z0\.s, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_s32_m8, svint32_t, uint16_t, ++ z0 = svldnf1uh_s32 (p0, x0 - svcntw () * 8), ++ z0 = svldnf1uh_s32 (p0, x0 - svcntw () * 8)) ++ ++/* ++** ldnf1uh_s32_m9: ++** dech x0, all, mul #9 ++** ldnf1h z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_s32_m9, svint32_t, uint16_t, ++ z0 = svldnf1uh_s32 (p0, x0 - svcntw () * 9), ++ z0 = svldnf1uh_s32 (p0, x0 - svcntw () * 9)) ++ ++/* ++** ldnf1uh_vnum_s32_0: ++** ldnf1h z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_vnum_s32_0, svint32_t, uint16_t, ++ z0 = svldnf1uh_vnum_s32 (p0, x0, 0), ++ z0 = svldnf1uh_vnum_s32 (p0, x0, 0)) ++ ++/* ++** ldnf1uh_vnum_s32_1: ++** ldnf1h z0\.s, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_vnum_s32_1, svint32_t, uint16_t, ++ z0 = svldnf1uh_vnum_s32 (p0, x0, 1), ++ z0 = svldnf1uh_vnum_s32 (p0, x0, 1)) ++ ++/* ++** ldnf1uh_vnum_s32_7: ++** ldnf1h z0\.s, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_vnum_s32_7, svint32_t, uint16_t, ++ z0 = svldnf1uh_vnum_s32 (p0, x0, 7), ++ z0 = svldnf1uh_vnum_s32 (p0, x0, 7)) ++ ++/* ++** ldnf1uh_vnum_s32_8: ++** incb x0, all, mul #4 ++** ldnf1h z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_vnum_s32_8, svint32_t, uint16_t, ++ z0 = svldnf1uh_vnum_s32 (p0, x0, 8), ++ z0 = svldnf1uh_vnum_s32 (p0, x0, 8)) ++ ++/* ++** ldnf1uh_vnum_s32_m1: ++** ldnf1h z0\.s, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_vnum_s32_m1, svint32_t, uint16_t, ++ z0 = svldnf1uh_vnum_s32 (p0, x0, -1), ++ z0 = svldnf1uh_vnum_s32 (p0, x0, -1)) ++ ++/* ++** ldnf1uh_vnum_s32_m8: ++** ldnf1h z0\.s, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_vnum_s32_m8, svint32_t, uint16_t, ++ z0 = svldnf1uh_vnum_s32 (p0, x0, -8), ++ z0 = svldnf1uh_vnum_s32 (p0, x0, -8)) ++ ++/* ++** ldnf1uh_vnum_s32_m9: ++** dech x0, all, mul #9 ++** ldnf1h z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_vnum_s32_m9, svint32_t, uint16_t, ++ z0 = svldnf1uh_vnum_s32 (p0, x0, -9), ++ z0 = svldnf1uh_vnum_s32 (p0, x0, -9)) ++ ++/* ++** ldnf1uh_vnum_s32_x1: ++** cnth (x[0-9]+) ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ldnf1h z0\.s, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_vnum_s32_x1, svint32_t, uint16_t, ++ z0 = svldnf1uh_vnum_s32 (p0, x0, x1), ++ z0 = svldnf1uh_vnum_s32 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_s64.c +new file mode 100644 +index 000000000..a3107f562 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_s64.c +@@ -0,0 +1,154 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldnf1uh_s64_base: ++** ldnf1h z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_s64_base, svint64_t, uint16_t, ++ z0 = svldnf1uh_s64 (p0, x0), ++ z0 = svldnf1uh_s64 (p0, x0)) ++ ++/* ++** ldnf1uh_s64_index: ++** add (x[0-9]+), x0, x1, lsl 1 ++** ldnf1h z0\.d, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_s64_index, svint64_t, uint16_t, ++ z0 = svldnf1uh_s64 (p0, x0 + x1), ++ z0 = svldnf1uh_s64 (p0, x0 + x1)) ++ ++/* ++** ldnf1uh_s64_1: ++** ldnf1h z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_s64_1, svint64_t, uint16_t, ++ z0 = svldnf1uh_s64 (p0, x0 + svcntd ()), ++ z0 = svldnf1uh_s64 (p0, x0 + svcntd ())) ++ ++/* ++** ldnf1uh_s64_7: ++** ldnf1h z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_s64_7, svint64_t, uint16_t, ++ z0 = svldnf1uh_s64 (p0, x0 + svcntd () * 7), ++ z0 = svldnf1uh_s64 (p0, x0 + svcntd () * 7)) ++ ++/* ++** ldnf1uh_s64_8: ++** incb x0, all, mul #2 ++** ldnf1h z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_s64_8, svint64_t, uint16_t, ++ z0 = svldnf1uh_s64 (p0, x0 + svcntd () * 8), ++ z0 = svldnf1uh_s64 (p0, x0 + svcntd () * 8)) ++ ++/* ++** ldnf1uh_s64_m1: ++** ldnf1h z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_s64_m1, svint64_t, uint16_t, ++ z0 = svldnf1uh_s64 (p0, x0 - svcntd ()), ++ z0 = svldnf1uh_s64 (p0, x0 - svcntd ())) ++ ++/* ++** ldnf1uh_s64_m8: ++** ldnf1h z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_s64_m8, svint64_t, uint16_t, ++ z0 = svldnf1uh_s64 (p0, x0 - svcntd () * 8), ++ z0 = svldnf1uh_s64 (p0, x0 - svcntd () * 8)) ++ ++/* ++** ldnf1uh_s64_m9: ++** decw x0, all, mul #9 ++** ldnf1h z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_s64_m9, svint64_t, uint16_t, ++ z0 = svldnf1uh_s64 (p0, x0 - svcntd () * 9), ++ z0 = svldnf1uh_s64 (p0, x0 - svcntd () * 9)) ++ ++/* ++** ldnf1uh_vnum_s64_0: ++** ldnf1h z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_vnum_s64_0, svint64_t, uint16_t, ++ z0 = svldnf1uh_vnum_s64 (p0, x0, 0), ++ z0 = svldnf1uh_vnum_s64 (p0, x0, 0)) ++ ++/* ++** ldnf1uh_vnum_s64_1: ++** ldnf1h z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_vnum_s64_1, svint64_t, uint16_t, ++ z0 = svldnf1uh_vnum_s64 (p0, x0, 1), ++ z0 = svldnf1uh_vnum_s64 (p0, x0, 1)) ++ ++/* ++** ldnf1uh_vnum_s64_7: ++** ldnf1h z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_vnum_s64_7, svint64_t, uint16_t, ++ z0 = svldnf1uh_vnum_s64 (p0, x0, 7), ++ z0 = svldnf1uh_vnum_s64 (p0, x0, 7)) ++ ++/* ++** ldnf1uh_vnum_s64_8: ++** incb x0, all, mul #2 ++** ldnf1h z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_vnum_s64_8, svint64_t, uint16_t, ++ z0 = svldnf1uh_vnum_s64 (p0, x0, 8), ++ z0 = svldnf1uh_vnum_s64 (p0, x0, 8)) ++ ++/* ++** ldnf1uh_vnum_s64_m1: ++** ldnf1h z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_vnum_s64_m1, svint64_t, uint16_t, ++ z0 = svldnf1uh_vnum_s64 (p0, x0, -1), ++ z0 = svldnf1uh_vnum_s64 (p0, x0, -1)) ++ ++/* ++** ldnf1uh_vnum_s64_m8: ++** ldnf1h z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_vnum_s64_m8, svint64_t, uint16_t, ++ z0 = svldnf1uh_vnum_s64 (p0, x0, -8), ++ z0 = svldnf1uh_vnum_s64 (p0, x0, -8)) ++ ++/* ++** ldnf1uh_vnum_s64_m9: ++** decw x0, all, mul #9 ++** ldnf1h z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_vnum_s64_m9, svint64_t, uint16_t, ++ z0 = svldnf1uh_vnum_s64 (p0, x0, -9), ++ z0 = svldnf1uh_vnum_s64 (p0, x0, -9)) ++ ++/* ++** ldnf1uh_vnum_s64_x1: ++** cntw (x[0-9]+) ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ldnf1h z0\.d, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_vnum_s64_x1, svint64_t, uint16_t, ++ z0 = svldnf1uh_vnum_s64 (p0, x0, x1), ++ z0 = svldnf1uh_vnum_s64 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_u32.c +new file mode 100644 +index 000000000..93d5abaf7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_u32.c +@@ -0,0 +1,154 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldnf1uh_u32_base: ++** ldnf1h z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_u32_base, svuint32_t, uint16_t, ++ z0 = svldnf1uh_u32 (p0, x0), ++ z0 = svldnf1uh_u32 (p0, x0)) ++ ++/* ++** ldnf1uh_u32_index: ++** add (x[0-9]+), x0, x1, lsl 1 ++** ldnf1h z0\.s, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_u32_index, svuint32_t, uint16_t, ++ z0 = svldnf1uh_u32 (p0, x0 + x1), ++ z0 = svldnf1uh_u32 (p0, x0 + x1)) ++ ++/* ++** ldnf1uh_u32_1: ++** ldnf1h z0\.s, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_u32_1, svuint32_t, uint16_t, ++ z0 = svldnf1uh_u32 (p0, x0 + svcntw ()), ++ z0 = svldnf1uh_u32 (p0, x0 + svcntw ())) ++ ++/* ++** ldnf1uh_u32_7: ++** ldnf1h z0\.s, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_u32_7, svuint32_t, uint16_t, ++ z0 = svldnf1uh_u32 (p0, x0 + svcntw () * 7), ++ z0 = svldnf1uh_u32 (p0, x0 + svcntw () * 7)) ++ ++/* ++** ldnf1uh_u32_8: ++** incb x0, all, mul #4 ++** ldnf1h z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_u32_8, svuint32_t, uint16_t, ++ z0 = svldnf1uh_u32 (p0, x0 + svcntw () * 8), ++ z0 = svldnf1uh_u32 (p0, x0 + svcntw () * 8)) ++ ++/* ++** ldnf1uh_u32_m1: ++** ldnf1h z0\.s, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_u32_m1, svuint32_t, uint16_t, ++ z0 = svldnf1uh_u32 (p0, x0 - svcntw ()), ++ z0 = svldnf1uh_u32 (p0, x0 - svcntw ())) ++ ++/* ++** ldnf1uh_u32_m8: ++** ldnf1h z0\.s, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_u32_m8, svuint32_t, uint16_t, ++ z0 = svldnf1uh_u32 (p0, x0 - svcntw () * 8), ++ z0 = svldnf1uh_u32 (p0, x0 - svcntw () * 8)) ++ ++/* ++** ldnf1uh_u32_m9: ++** dech x0, all, mul #9 ++** ldnf1h z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_u32_m9, svuint32_t, uint16_t, ++ z0 = svldnf1uh_u32 (p0, x0 - svcntw () * 9), ++ z0 = svldnf1uh_u32 (p0, x0 - svcntw () * 9)) ++ ++/* ++** ldnf1uh_vnum_u32_0: ++** ldnf1h z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_vnum_u32_0, svuint32_t, uint16_t, ++ z0 = svldnf1uh_vnum_u32 (p0, x0, 0), ++ z0 = svldnf1uh_vnum_u32 (p0, x0, 0)) ++ ++/* ++** ldnf1uh_vnum_u32_1: ++** ldnf1h z0\.s, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_vnum_u32_1, svuint32_t, uint16_t, ++ z0 = svldnf1uh_vnum_u32 (p0, x0, 1), ++ z0 = svldnf1uh_vnum_u32 (p0, x0, 1)) ++ ++/* ++** ldnf1uh_vnum_u32_7: ++** ldnf1h z0\.s, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_vnum_u32_7, svuint32_t, uint16_t, ++ z0 = svldnf1uh_vnum_u32 (p0, x0, 7), ++ z0 = svldnf1uh_vnum_u32 (p0, x0, 7)) ++ ++/* ++** ldnf1uh_vnum_u32_8: ++** incb x0, all, mul #4 ++** ldnf1h z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_vnum_u32_8, svuint32_t, uint16_t, ++ z0 = svldnf1uh_vnum_u32 (p0, x0, 8), ++ z0 = svldnf1uh_vnum_u32 (p0, x0, 8)) ++ ++/* ++** ldnf1uh_vnum_u32_m1: ++** ldnf1h z0\.s, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_vnum_u32_m1, svuint32_t, uint16_t, ++ z0 = svldnf1uh_vnum_u32 (p0, x0, -1), ++ z0 = svldnf1uh_vnum_u32 (p0, x0, -1)) ++ ++/* ++** ldnf1uh_vnum_u32_m8: ++** ldnf1h z0\.s, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_vnum_u32_m8, svuint32_t, uint16_t, ++ z0 = svldnf1uh_vnum_u32 (p0, x0, -8), ++ z0 = svldnf1uh_vnum_u32 (p0, x0, -8)) ++ ++/* ++** ldnf1uh_vnum_u32_m9: ++** dech x0, all, mul #9 ++** ldnf1h z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_vnum_u32_m9, svuint32_t, uint16_t, ++ z0 = svldnf1uh_vnum_u32 (p0, x0, -9), ++ z0 = svldnf1uh_vnum_u32 (p0, x0, -9)) ++ ++/* ++** ldnf1uh_vnum_u32_x1: ++** cnth (x[0-9]+) ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ldnf1h z0\.s, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_vnum_u32_x1, svuint32_t, uint16_t, ++ z0 = svldnf1uh_vnum_u32 (p0, x0, x1), ++ z0 = svldnf1uh_vnum_u32 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_u64.c +new file mode 100644 +index 000000000..32d36a84c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_u64.c +@@ -0,0 +1,154 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldnf1uh_u64_base: ++** ldnf1h z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_u64_base, svuint64_t, uint16_t, ++ z0 = svldnf1uh_u64 (p0, x0), ++ z0 = svldnf1uh_u64 (p0, x0)) ++ ++/* ++** ldnf1uh_u64_index: ++** add (x[0-9]+), x0, x1, lsl 1 ++** ldnf1h z0\.d, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_u64_index, svuint64_t, uint16_t, ++ z0 = svldnf1uh_u64 (p0, x0 + x1), ++ z0 = svldnf1uh_u64 (p0, x0 + x1)) ++ ++/* ++** ldnf1uh_u64_1: ++** ldnf1h z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_u64_1, svuint64_t, uint16_t, ++ z0 = svldnf1uh_u64 (p0, x0 + svcntd ()), ++ z0 = svldnf1uh_u64 (p0, x0 + svcntd ())) ++ ++/* ++** ldnf1uh_u64_7: ++** ldnf1h z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_u64_7, svuint64_t, uint16_t, ++ z0 = svldnf1uh_u64 (p0, x0 + svcntd () * 7), ++ z0 = svldnf1uh_u64 (p0, x0 + svcntd () * 7)) ++ ++/* ++** ldnf1uh_u64_8: ++** incb x0, all, mul #2 ++** ldnf1h z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_u64_8, svuint64_t, uint16_t, ++ z0 = svldnf1uh_u64 (p0, x0 + svcntd () * 8), ++ z0 = svldnf1uh_u64 (p0, x0 + svcntd () * 8)) ++ ++/* ++** ldnf1uh_u64_m1: ++** ldnf1h z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_u64_m1, svuint64_t, uint16_t, ++ z0 = svldnf1uh_u64 (p0, x0 - svcntd ()), ++ z0 = svldnf1uh_u64 (p0, x0 - svcntd ())) ++ ++/* ++** ldnf1uh_u64_m8: ++** ldnf1h z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_u64_m8, svuint64_t, uint16_t, ++ z0 = svldnf1uh_u64 (p0, x0 - svcntd () * 8), ++ z0 = svldnf1uh_u64 (p0, x0 - svcntd () * 8)) ++ ++/* ++** ldnf1uh_u64_m9: ++** decw x0, all, mul #9 ++** ldnf1h z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_u64_m9, svuint64_t, uint16_t, ++ z0 = svldnf1uh_u64 (p0, x0 - svcntd () * 9), ++ z0 = svldnf1uh_u64 (p0, x0 - svcntd () * 9)) ++ ++/* ++** ldnf1uh_vnum_u64_0: ++** ldnf1h z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_vnum_u64_0, svuint64_t, uint16_t, ++ z0 = svldnf1uh_vnum_u64 (p0, x0, 0), ++ z0 = svldnf1uh_vnum_u64 (p0, x0, 0)) ++ ++/* ++** ldnf1uh_vnum_u64_1: ++** ldnf1h z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_vnum_u64_1, svuint64_t, uint16_t, ++ z0 = svldnf1uh_vnum_u64 (p0, x0, 1), ++ z0 = svldnf1uh_vnum_u64 (p0, x0, 1)) ++ ++/* ++** ldnf1uh_vnum_u64_7: ++** ldnf1h z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_vnum_u64_7, svuint64_t, uint16_t, ++ z0 = svldnf1uh_vnum_u64 (p0, x0, 7), ++ z0 = svldnf1uh_vnum_u64 (p0, x0, 7)) ++ ++/* ++** ldnf1uh_vnum_u64_8: ++** incb x0, all, mul #2 ++** ldnf1h z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_vnum_u64_8, svuint64_t, uint16_t, ++ z0 = svldnf1uh_vnum_u64 (p0, x0, 8), ++ z0 = svldnf1uh_vnum_u64 (p0, x0, 8)) ++ ++/* ++** ldnf1uh_vnum_u64_m1: ++** ldnf1h z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_vnum_u64_m1, svuint64_t, uint16_t, ++ z0 = svldnf1uh_vnum_u64 (p0, x0, -1), ++ z0 = svldnf1uh_vnum_u64 (p0, x0, -1)) ++ ++/* ++** ldnf1uh_vnum_u64_m8: ++** ldnf1h z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_vnum_u64_m8, svuint64_t, uint16_t, ++ z0 = svldnf1uh_vnum_u64 (p0, x0, -8), ++ z0 = svldnf1uh_vnum_u64 (p0, x0, -8)) ++ ++/* ++** ldnf1uh_vnum_u64_m9: ++** decw x0, all, mul #9 ++** ldnf1h z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_vnum_u64_m9, svuint64_t, uint16_t, ++ z0 = svldnf1uh_vnum_u64 (p0, x0, -9), ++ z0 = svldnf1uh_vnum_u64 (p0, x0, -9)) ++ ++/* ++** ldnf1uh_vnum_u64_x1: ++** cntw (x[0-9]+) ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ldnf1h z0\.d, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldnf1uh_vnum_u64_x1, svuint64_t, uint16_t, ++ z0 = svldnf1uh_vnum_u64 (p0, x0, x1), ++ z0 = svldnf1uh_vnum_u64 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uw_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uw_s64.c +new file mode 100644 +index 000000000..373922791 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uw_s64.c +@@ -0,0 +1,154 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldnf1uw_s64_base: ++** ldnf1w z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1uw_s64_base, svint64_t, uint32_t, ++ z0 = svldnf1uw_s64 (p0, x0), ++ z0 = svldnf1uw_s64 (p0, x0)) ++ ++/* ++** ldnf1uw_s64_index: ++** add (x[0-9]+), x0, x1, lsl 2 ++** ldnf1w z0\.d, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ldnf1uw_s64_index, svint64_t, uint32_t, ++ z0 = svldnf1uw_s64 (p0, x0 + x1), ++ z0 = svldnf1uw_s64 (p0, x0 + x1)) ++ ++/* ++** ldnf1uw_s64_1: ++** ldnf1w z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1uw_s64_1, svint64_t, uint32_t, ++ z0 = svldnf1uw_s64 (p0, x0 + svcntd ()), ++ z0 = svldnf1uw_s64 (p0, x0 + svcntd ())) ++ ++/* ++** ldnf1uw_s64_7: ++** ldnf1w z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1uw_s64_7, svint64_t, uint32_t, ++ z0 = svldnf1uw_s64 (p0, x0 + svcntd () * 7), ++ z0 = svldnf1uw_s64 (p0, x0 + svcntd () * 7)) ++ ++/* ++** ldnf1uw_s64_8: ++** incb x0, all, mul #4 ++** ldnf1w z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1uw_s64_8, svint64_t, uint32_t, ++ z0 = svldnf1uw_s64 (p0, x0 + svcntd () * 8), ++ z0 = svldnf1uw_s64 (p0, x0 + svcntd () * 8)) ++ ++/* ++** ldnf1uw_s64_m1: ++** ldnf1w z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1uw_s64_m1, svint64_t, uint32_t, ++ z0 = svldnf1uw_s64 (p0, x0 - svcntd ()), ++ z0 = svldnf1uw_s64 (p0, x0 - svcntd ())) ++ ++/* ++** ldnf1uw_s64_m8: ++** ldnf1w z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1uw_s64_m8, svint64_t, uint32_t, ++ z0 = svldnf1uw_s64 (p0, x0 - svcntd () * 8), ++ z0 = svldnf1uw_s64 (p0, x0 - svcntd () * 8)) ++ ++/* ++** ldnf1uw_s64_m9: ++** dech x0, all, mul #9 ++** ldnf1w z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1uw_s64_m9, svint64_t, uint32_t, ++ z0 = svldnf1uw_s64 (p0, x0 - svcntd () * 9), ++ z0 = svldnf1uw_s64 (p0, x0 - svcntd () * 9)) ++ ++/* ++** ldnf1uw_vnum_s64_0: ++** ldnf1w z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1uw_vnum_s64_0, svint64_t, uint32_t, ++ z0 = svldnf1uw_vnum_s64 (p0, x0, 0), ++ z0 = svldnf1uw_vnum_s64 (p0, x0, 0)) ++ ++/* ++** ldnf1uw_vnum_s64_1: ++** ldnf1w z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1uw_vnum_s64_1, svint64_t, uint32_t, ++ z0 = svldnf1uw_vnum_s64 (p0, x0, 1), ++ z0 = svldnf1uw_vnum_s64 (p0, x0, 1)) ++ ++/* ++** ldnf1uw_vnum_s64_7: ++** ldnf1w z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1uw_vnum_s64_7, svint64_t, uint32_t, ++ z0 = svldnf1uw_vnum_s64 (p0, x0, 7), ++ z0 = svldnf1uw_vnum_s64 (p0, x0, 7)) ++ ++/* ++** ldnf1uw_vnum_s64_8: ++** incb x0, all, mul #4 ++** ldnf1w z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1uw_vnum_s64_8, svint64_t, uint32_t, ++ z0 = svldnf1uw_vnum_s64 (p0, x0, 8), ++ z0 = svldnf1uw_vnum_s64 (p0, x0, 8)) ++ ++/* ++** ldnf1uw_vnum_s64_m1: ++** ldnf1w z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1uw_vnum_s64_m1, svint64_t, uint32_t, ++ z0 = svldnf1uw_vnum_s64 (p0, x0, -1), ++ z0 = svldnf1uw_vnum_s64 (p0, x0, -1)) ++ ++/* ++** ldnf1uw_vnum_s64_m8: ++** ldnf1w z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1uw_vnum_s64_m8, svint64_t, uint32_t, ++ z0 = svldnf1uw_vnum_s64 (p0, x0, -8), ++ z0 = svldnf1uw_vnum_s64 (p0, x0, -8)) ++ ++/* ++** ldnf1uw_vnum_s64_m9: ++** dech x0, all, mul #9 ++** ldnf1w z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1uw_vnum_s64_m9, svint64_t, uint32_t, ++ z0 = svldnf1uw_vnum_s64 (p0, x0, -9), ++ z0 = svldnf1uw_vnum_s64 (p0, x0, -9)) ++ ++/* ++** ldnf1uw_vnum_s64_x1: ++** cnth (x[0-9]+) ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ldnf1w z0\.d, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldnf1uw_vnum_s64_x1, svint64_t, uint32_t, ++ z0 = svldnf1uw_vnum_s64 (p0, x0, x1), ++ z0 = svldnf1uw_vnum_s64 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uw_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uw_u64.c +new file mode 100644 +index 000000000..b3c3be1d0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uw_u64.c +@@ -0,0 +1,154 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldnf1uw_u64_base: ++** ldnf1w z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1uw_u64_base, svuint64_t, uint32_t, ++ z0 = svldnf1uw_u64 (p0, x0), ++ z0 = svldnf1uw_u64 (p0, x0)) ++ ++/* ++** ldnf1uw_u64_index: ++** add (x[0-9]+), x0, x1, lsl 2 ++** ldnf1w z0\.d, p0/z, \[\1\] ++** ret ++*/ ++TEST_LOAD (ldnf1uw_u64_index, svuint64_t, uint32_t, ++ z0 = svldnf1uw_u64 (p0, x0 + x1), ++ z0 = svldnf1uw_u64 (p0, x0 + x1)) ++ ++/* ++** ldnf1uw_u64_1: ++** ldnf1w z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1uw_u64_1, svuint64_t, uint32_t, ++ z0 = svldnf1uw_u64 (p0, x0 + svcntd ()), ++ z0 = svldnf1uw_u64 (p0, x0 + svcntd ())) ++ ++/* ++** ldnf1uw_u64_7: ++** ldnf1w z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1uw_u64_7, svuint64_t, uint32_t, ++ z0 = svldnf1uw_u64 (p0, x0 + svcntd () * 7), ++ z0 = svldnf1uw_u64 (p0, x0 + svcntd () * 7)) ++ ++/* ++** ldnf1uw_u64_8: ++** incb x0, all, mul #4 ++** ldnf1w z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1uw_u64_8, svuint64_t, uint32_t, ++ z0 = svldnf1uw_u64 (p0, x0 + svcntd () * 8), ++ z0 = svldnf1uw_u64 (p0, x0 + svcntd () * 8)) ++ ++/* ++** ldnf1uw_u64_m1: ++** ldnf1w z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1uw_u64_m1, svuint64_t, uint32_t, ++ z0 = svldnf1uw_u64 (p0, x0 - svcntd ()), ++ z0 = svldnf1uw_u64 (p0, x0 - svcntd ())) ++ ++/* ++** ldnf1uw_u64_m8: ++** ldnf1w z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1uw_u64_m8, svuint64_t, uint32_t, ++ z0 = svldnf1uw_u64 (p0, x0 - svcntd () * 8), ++ z0 = svldnf1uw_u64 (p0, x0 - svcntd () * 8)) ++ ++/* ++** ldnf1uw_u64_m9: ++** dech x0, all, mul #9 ++** ldnf1w z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1uw_u64_m9, svuint64_t, uint32_t, ++ z0 = svldnf1uw_u64 (p0, x0 - svcntd () * 9), ++ z0 = svldnf1uw_u64 (p0, x0 - svcntd () * 9)) ++ ++/* ++** ldnf1uw_vnum_u64_0: ++** ldnf1w z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1uw_vnum_u64_0, svuint64_t, uint32_t, ++ z0 = svldnf1uw_vnum_u64 (p0, x0, 0), ++ z0 = svldnf1uw_vnum_u64 (p0, x0, 0)) ++ ++/* ++** ldnf1uw_vnum_u64_1: ++** ldnf1w z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1uw_vnum_u64_1, svuint64_t, uint32_t, ++ z0 = svldnf1uw_vnum_u64 (p0, x0, 1), ++ z0 = svldnf1uw_vnum_u64 (p0, x0, 1)) ++ ++/* ++** ldnf1uw_vnum_u64_7: ++** ldnf1w z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1uw_vnum_u64_7, svuint64_t, uint32_t, ++ z0 = svldnf1uw_vnum_u64 (p0, x0, 7), ++ z0 = svldnf1uw_vnum_u64 (p0, x0, 7)) ++ ++/* ++** ldnf1uw_vnum_u64_8: ++** incb x0, all, mul #4 ++** ldnf1w z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1uw_vnum_u64_8, svuint64_t, uint32_t, ++ z0 = svldnf1uw_vnum_u64 (p0, x0, 8), ++ z0 = svldnf1uw_vnum_u64 (p0, x0, 8)) ++ ++/* ++** ldnf1uw_vnum_u64_m1: ++** ldnf1w z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1uw_vnum_u64_m1, svuint64_t, uint32_t, ++ z0 = svldnf1uw_vnum_u64 (p0, x0, -1), ++ z0 = svldnf1uw_vnum_u64 (p0, x0, -1)) ++ ++/* ++** ldnf1uw_vnum_u64_m8: ++** ldnf1w z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnf1uw_vnum_u64_m8, svuint64_t, uint32_t, ++ z0 = svldnf1uw_vnum_u64 (p0, x0, -8), ++ z0 = svldnf1uw_vnum_u64 (p0, x0, -8)) ++ ++/* ++** ldnf1uw_vnum_u64_m9: ++** dech x0, all, mul #9 ++** ldnf1w z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnf1uw_vnum_u64_m9, svuint64_t, uint32_t, ++ z0 = svldnf1uw_vnum_u64 (p0, x0, -9), ++ z0 = svldnf1uw_vnum_u64 (p0, x0, -9)) ++ ++/* ++** ldnf1uw_vnum_u64_x1: ++** cnth (x[0-9]+) ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ldnf1w z0\.d, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldnf1uw_vnum_u64_x1, svuint64_t, uint32_t, ++ z0 = svldnf1uw_vnum_u64 (p0, x0, x1), ++ z0 = svldnf1uw_vnum_u64 (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_bf16.c +new file mode 100644 +index 000000000..b083901fa +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_bf16.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldnt1_bf16_base: ++** ldnt1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_bf16_base, svbfloat16_t, bfloat16_t, ++ z0 = svldnt1_bf16 (p0, x0), ++ z0 = svldnt1 (p0, x0)) ++ ++/* ++** ldnt1_bf16_index: ++** ldnt1h z0\.h, p0/z, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_LOAD (ldnt1_bf16_index, svbfloat16_t, bfloat16_t, ++ z0 = svldnt1_bf16 (p0, x0 + x1), ++ z0 = svldnt1 (p0, x0 + x1)) ++ ++/* ++** ldnt1_bf16_1: ++** ldnt1h z0\.h, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_bf16_1, svbfloat16_t, bfloat16_t, ++ z0 = svldnt1_bf16 (p0, x0 + svcnth ()), ++ z0 = svldnt1 (p0, x0 + svcnth ())) ++ ++/* ++** ldnt1_bf16_7: ++** ldnt1h z0\.h, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_bf16_7, svbfloat16_t, bfloat16_t, ++ z0 = svldnt1_bf16 (p0, x0 + svcnth () * 7), ++ z0 = svldnt1 (p0, x0 + svcnth () * 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldnt1_bf16_8: ++** incb x0, all, mul #8 ++** ldnt1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_bf16_8, svbfloat16_t, bfloat16_t, ++ z0 = svldnt1_bf16 (p0, x0 + svcnth () * 8), ++ z0 = svldnt1 (p0, x0 + svcnth () * 8)) ++ ++/* ++** ldnt1_bf16_m1: ++** ldnt1h z0\.h, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_bf16_m1, svbfloat16_t, bfloat16_t, ++ z0 = svldnt1_bf16 (p0, x0 - svcnth ()), ++ z0 = svldnt1 (p0, x0 - svcnth ())) ++ ++/* ++** ldnt1_bf16_m8: ++** ldnt1h z0\.h, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_bf16_m8, svbfloat16_t, bfloat16_t, ++ z0 = svldnt1_bf16 (p0, x0 - svcnth () * 8), ++ z0 = svldnt1 (p0, x0 - svcnth () * 8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldnt1_bf16_m9: ++** decb x0, all, mul #9 ++** ldnt1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_bf16_m9, svbfloat16_t, bfloat16_t, ++ z0 = svldnt1_bf16 (p0, x0 - svcnth () * 9), ++ z0 = svldnt1 (p0, x0 - svcnth () * 9)) ++ ++/* ++** ldnt1_vnum_bf16_0: ++** ldnt1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_bf16_0, svbfloat16_t, bfloat16_t, ++ z0 = svldnt1_vnum_bf16 (p0, x0, 0), ++ z0 = svldnt1_vnum (p0, x0, 0)) ++ ++/* ++** ldnt1_vnum_bf16_1: ++** ldnt1h z0\.h, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_bf16_1, svbfloat16_t, bfloat16_t, ++ z0 = svldnt1_vnum_bf16 (p0, x0, 1), ++ z0 = svldnt1_vnum (p0, x0, 1)) ++ ++/* ++** ldnt1_vnum_bf16_7: ++** ldnt1h z0\.h, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_bf16_7, svbfloat16_t, bfloat16_t, ++ z0 = svldnt1_vnum_bf16 (p0, x0, 7), ++ z0 = svldnt1_vnum (p0, x0, 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldnt1_vnum_bf16_8: ++** incb x0, all, mul #8 ++** ldnt1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_bf16_8, svbfloat16_t, bfloat16_t, ++ z0 = svldnt1_vnum_bf16 (p0, x0, 8), ++ z0 = svldnt1_vnum (p0, x0, 8)) ++ ++/* ++** ldnt1_vnum_bf16_m1: ++** ldnt1h z0\.h, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_bf16_m1, svbfloat16_t, bfloat16_t, ++ z0 = svldnt1_vnum_bf16 (p0, x0, -1), ++ z0 = svldnt1_vnum (p0, x0, -1)) ++ ++/* ++** ldnt1_vnum_bf16_m8: ++** ldnt1h z0\.h, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_bf16_m8, svbfloat16_t, bfloat16_t, ++ z0 = svldnt1_vnum_bf16 (p0, x0, -8), ++ z0 = svldnt1_vnum (p0, x0, -8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldnt1_vnum_bf16_m9: ++** decb x0, all, mul #9 ++** ldnt1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_bf16_m9, svbfloat16_t, bfloat16_t, ++ z0 = svldnt1_vnum_bf16 (p0, x0, -9), ++ z0 = svldnt1_vnum (p0, x0, -9)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ldnt1_vnum_bf16_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ldnt1h z0\.h, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_bf16_x1, svbfloat16_t, bfloat16_t, ++ z0 = svldnt1_vnum_bf16 (p0, x0, x1), ++ z0 = svldnt1_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_f16.c +new file mode 100644 +index 000000000..c98ab2da4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_f16.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldnt1_f16_base: ++** ldnt1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_f16_base, svfloat16_t, float16_t, ++ z0 = svldnt1_f16 (p0, x0), ++ z0 = svldnt1 (p0, x0)) ++ ++/* ++** ldnt1_f16_index: ++** ldnt1h z0\.h, p0/z, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_LOAD (ldnt1_f16_index, svfloat16_t, float16_t, ++ z0 = svldnt1_f16 (p0, x0 + x1), ++ z0 = svldnt1 (p0, x0 + x1)) ++ ++/* ++** ldnt1_f16_1: ++** ldnt1h z0\.h, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_f16_1, svfloat16_t, float16_t, ++ z0 = svldnt1_f16 (p0, x0 + svcnth ()), ++ z0 = svldnt1 (p0, x0 + svcnth ())) ++ ++/* ++** ldnt1_f16_7: ++** ldnt1h z0\.h, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_f16_7, svfloat16_t, float16_t, ++ z0 = svldnt1_f16 (p0, x0 + svcnth () * 7), ++ z0 = svldnt1 (p0, x0 + svcnth () * 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldnt1_f16_8: ++** incb x0, all, mul #8 ++** ldnt1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_f16_8, svfloat16_t, float16_t, ++ z0 = svldnt1_f16 (p0, x0 + svcnth () * 8), ++ z0 = svldnt1 (p0, x0 + svcnth () * 8)) ++ ++/* ++** ldnt1_f16_m1: ++** ldnt1h z0\.h, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_f16_m1, svfloat16_t, float16_t, ++ z0 = svldnt1_f16 (p0, x0 - svcnth ()), ++ z0 = svldnt1 (p0, x0 - svcnth ())) ++ ++/* ++** ldnt1_f16_m8: ++** ldnt1h z0\.h, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_f16_m8, svfloat16_t, float16_t, ++ z0 = svldnt1_f16 (p0, x0 - svcnth () * 8), ++ z0 = svldnt1 (p0, x0 - svcnth () * 8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldnt1_f16_m9: ++** decb x0, all, mul #9 ++** ldnt1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_f16_m9, svfloat16_t, float16_t, ++ z0 = svldnt1_f16 (p0, x0 - svcnth () * 9), ++ z0 = svldnt1 (p0, x0 - svcnth () * 9)) ++ ++/* ++** ldnt1_vnum_f16_0: ++** ldnt1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_f16_0, svfloat16_t, float16_t, ++ z0 = svldnt1_vnum_f16 (p0, x0, 0), ++ z0 = svldnt1_vnum (p0, x0, 0)) ++ ++/* ++** ldnt1_vnum_f16_1: ++** ldnt1h z0\.h, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_f16_1, svfloat16_t, float16_t, ++ z0 = svldnt1_vnum_f16 (p0, x0, 1), ++ z0 = svldnt1_vnum (p0, x0, 1)) ++ ++/* ++** ldnt1_vnum_f16_7: ++** ldnt1h z0\.h, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_f16_7, svfloat16_t, float16_t, ++ z0 = svldnt1_vnum_f16 (p0, x0, 7), ++ z0 = svldnt1_vnum (p0, x0, 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldnt1_vnum_f16_8: ++** incb x0, all, mul #8 ++** ldnt1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_f16_8, svfloat16_t, float16_t, ++ z0 = svldnt1_vnum_f16 (p0, x0, 8), ++ z0 = svldnt1_vnum (p0, x0, 8)) ++ ++/* ++** ldnt1_vnum_f16_m1: ++** ldnt1h z0\.h, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_f16_m1, svfloat16_t, float16_t, ++ z0 = svldnt1_vnum_f16 (p0, x0, -1), ++ z0 = svldnt1_vnum (p0, x0, -1)) ++ ++/* ++** ldnt1_vnum_f16_m8: ++** ldnt1h z0\.h, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_f16_m8, svfloat16_t, float16_t, ++ z0 = svldnt1_vnum_f16 (p0, x0, -8), ++ z0 = svldnt1_vnum (p0, x0, -8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldnt1_vnum_f16_m9: ++** decb x0, all, mul #9 ++** ldnt1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_f16_m9, svfloat16_t, float16_t, ++ z0 = svldnt1_vnum_f16 (p0, x0, -9), ++ z0 = svldnt1_vnum (p0, x0, -9)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ldnt1_vnum_f16_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ldnt1h z0\.h, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_f16_x1, svfloat16_t, float16_t, ++ z0 = svldnt1_vnum_f16 (p0, x0, x1), ++ z0 = svldnt1_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_f32.c +new file mode 100644 +index 000000000..fb09a8a6d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_f32.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldnt1_f32_base: ++** ldnt1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_f32_base, svfloat32_t, float32_t, ++ z0 = svldnt1_f32 (p0, x0), ++ z0 = svldnt1 (p0, x0)) ++ ++/* ++** ldnt1_f32_index: ++** ldnt1w z0\.s, p0/z, \[x0, x1, lsl 2\] ++** ret ++*/ ++TEST_LOAD (ldnt1_f32_index, svfloat32_t, float32_t, ++ z0 = svldnt1_f32 (p0, x0 + x1), ++ z0 = svldnt1 (p0, x0 + x1)) ++ ++/* ++** ldnt1_f32_1: ++** ldnt1w z0\.s, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_f32_1, svfloat32_t, float32_t, ++ z0 = svldnt1_f32 (p0, x0 + svcntw ()), ++ z0 = svldnt1 (p0, x0 + svcntw ())) ++ ++/* ++** ldnt1_f32_7: ++** ldnt1w z0\.s, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_f32_7, svfloat32_t, float32_t, ++ z0 = svldnt1_f32 (p0, x0 + svcntw () * 7), ++ z0 = svldnt1 (p0, x0 + svcntw () * 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldnt1_f32_8: ++** incb x0, all, mul #8 ++** ldnt1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_f32_8, svfloat32_t, float32_t, ++ z0 = svldnt1_f32 (p0, x0 + svcntw () * 8), ++ z0 = svldnt1 (p0, x0 + svcntw () * 8)) ++ ++/* ++** ldnt1_f32_m1: ++** ldnt1w z0\.s, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_f32_m1, svfloat32_t, float32_t, ++ z0 = svldnt1_f32 (p0, x0 - svcntw ()), ++ z0 = svldnt1 (p0, x0 - svcntw ())) ++ ++/* ++** ldnt1_f32_m8: ++** ldnt1w z0\.s, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_f32_m8, svfloat32_t, float32_t, ++ z0 = svldnt1_f32 (p0, x0 - svcntw () * 8), ++ z0 = svldnt1 (p0, x0 - svcntw () * 8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldnt1_f32_m9: ++** decb x0, all, mul #9 ++** ldnt1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_f32_m9, svfloat32_t, float32_t, ++ z0 = svldnt1_f32 (p0, x0 - svcntw () * 9), ++ z0 = svldnt1 (p0, x0 - svcntw () * 9)) ++ ++/* ++** ldnt1_vnum_f32_0: ++** ldnt1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_f32_0, svfloat32_t, float32_t, ++ z0 = svldnt1_vnum_f32 (p0, x0, 0), ++ z0 = svldnt1_vnum (p0, x0, 0)) ++ ++/* ++** ldnt1_vnum_f32_1: ++** ldnt1w z0\.s, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_f32_1, svfloat32_t, float32_t, ++ z0 = svldnt1_vnum_f32 (p0, x0, 1), ++ z0 = svldnt1_vnum (p0, x0, 1)) ++ ++/* ++** ldnt1_vnum_f32_7: ++** ldnt1w z0\.s, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_f32_7, svfloat32_t, float32_t, ++ z0 = svldnt1_vnum_f32 (p0, x0, 7), ++ z0 = svldnt1_vnum (p0, x0, 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldnt1_vnum_f32_8: ++** incb x0, all, mul #8 ++** ldnt1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_f32_8, svfloat32_t, float32_t, ++ z0 = svldnt1_vnum_f32 (p0, x0, 8), ++ z0 = svldnt1_vnum (p0, x0, 8)) ++ ++/* ++** ldnt1_vnum_f32_m1: ++** ldnt1w z0\.s, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_f32_m1, svfloat32_t, float32_t, ++ z0 = svldnt1_vnum_f32 (p0, x0, -1), ++ z0 = svldnt1_vnum (p0, x0, -1)) ++ ++/* ++** ldnt1_vnum_f32_m8: ++** ldnt1w z0\.s, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_f32_m8, svfloat32_t, float32_t, ++ z0 = svldnt1_vnum_f32 (p0, x0, -8), ++ z0 = svldnt1_vnum (p0, x0, -8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldnt1_vnum_f32_m9: ++** decb x0, all, mul #9 ++** ldnt1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_f32_m9, svfloat32_t, float32_t, ++ z0 = svldnt1_vnum_f32 (p0, x0, -9), ++ z0 = svldnt1_vnum (p0, x0, -9)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ldnt1_vnum_f32_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ldnt1w z0\.s, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_f32_x1, svfloat32_t, float32_t, ++ z0 = svldnt1_vnum_f32 (p0, x0, x1), ++ z0 = svldnt1_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_f64.c +new file mode 100644 +index 000000000..2a7863282 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_f64.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldnt1_f64_base: ++** ldnt1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_f64_base, svfloat64_t, float64_t, ++ z0 = svldnt1_f64 (p0, x0), ++ z0 = svldnt1 (p0, x0)) ++ ++/* ++** ldnt1_f64_index: ++** ldnt1d z0\.d, p0/z, \[x0, x1, lsl 3\] ++** ret ++*/ ++TEST_LOAD (ldnt1_f64_index, svfloat64_t, float64_t, ++ z0 = svldnt1_f64 (p0, x0 + x1), ++ z0 = svldnt1 (p0, x0 + x1)) ++ ++/* ++** ldnt1_f64_1: ++** ldnt1d z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_f64_1, svfloat64_t, float64_t, ++ z0 = svldnt1_f64 (p0, x0 + svcntd ()), ++ z0 = svldnt1 (p0, x0 + svcntd ())) ++ ++/* ++** ldnt1_f64_7: ++** ldnt1d z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_f64_7, svfloat64_t, float64_t, ++ z0 = svldnt1_f64 (p0, x0 + svcntd () * 7), ++ z0 = svldnt1 (p0, x0 + svcntd () * 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldnt1_f64_8: ++** incb x0, all, mul #8 ++** ldnt1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_f64_8, svfloat64_t, float64_t, ++ z0 = svldnt1_f64 (p0, x0 + svcntd () * 8), ++ z0 = svldnt1 (p0, x0 + svcntd () * 8)) ++ ++/* ++** ldnt1_f64_m1: ++** ldnt1d z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_f64_m1, svfloat64_t, float64_t, ++ z0 = svldnt1_f64 (p0, x0 - svcntd ()), ++ z0 = svldnt1 (p0, x0 - svcntd ())) ++ ++/* ++** ldnt1_f64_m8: ++** ldnt1d z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_f64_m8, svfloat64_t, float64_t, ++ z0 = svldnt1_f64 (p0, x0 - svcntd () * 8), ++ z0 = svldnt1 (p0, x0 - svcntd () * 8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldnt1_f64_m9: ++** decb x0, all, mul #9 ++** ldnt1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_f64_m9, svfloat64_t, float64_t, ++ z0 = svldnt1_f64 (p0, x0 - svcntd () * 9), ++ z0 = svldnt1 (p0, x0 - svcntd () * 9)) ++ ++/* ++** ldnt1_vnum_f64_0: ++** ldnt1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_f64_0, svfloat64_t, float64_t, ++ z0 = svldnt1_vnum_f64 (p0, x0, 0), ++ z0 = svldnt1_vnum (p0, x0, 0)) ++ ++/* ++** ldnt1_vnum_f64_1: ++** ldnt1d z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_f64_1, svfloat64_t, float64_t, ++ z0 = svldnt1_vnum_f64 (p0, x0, 1), ++ z0 = svldnt1_vnum (p0, x0, 1)) ++ ++/* ++** ldnt1_vnum_f64_7: ++** ldnt1d z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_f64_7, svfloat64_t, float64_t, ++ z0 = svldnt1_vnum_f64 (p0, x0, 7), ++ z0 = svldnt1_vnum (p0, x0, 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldnt1_vnum_f64_8: ++** incb x0, all, mul #8 ++** ldnt1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_f64_8, svfloat64_t, float64_t, ++ z0 = svldnt1_vnum_f64 (p0, x0, 8), ++ z0 = svldnt1_vnum (p0, x0, 8)) ++ ++/* ++** ldnt1_vnum_f64_m1: ++** ldnt1d z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_f64_m1, svfloat64_t, float64_t, ++ z0 = svldnt1_vnum_f64 (p0, x0, -1), ++ z0 = svldnt1_vnum (p0, x0, -1)) ++ ++/* ++** ldnt1_vnum_f64_m8: ++** ldnt1d z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_f64_m8, svfloat64_t, float64_t, ++ z0 = svldnt1_vnum_f64 (p0, x0, -8), ++ z0 = svldnt1_vnum (p0, x0, -8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldnt1_vnum_f64_m9: ++** decb x0, all, mul #9 ++** ldnt1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_f64_m9, svfloat64_t, float64_t, ++ z0 = svldnt1_vnum_f64 (p0, x0, -9), ++ z0 = svldnt1_vnum (p0, x0, -9)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ldnt1_vnum_f64_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ldnt1d z0\.d, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_f64_x1, svfloat64_t, float64_t, ++ z0 = svldnt1_vnum_f64 (p0, x0, x1), ++ z0 = svldnt1_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_s16.c +new file mode 100644 +index 000000000..c307ed51f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_s16.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldnt1_s16_base: ++** ldnt1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_s16_base, svint16_t, int16_t, ++ z0 = svldnt1_s16 (p0, x0), ++ z0 = svldnt1 (p0, x0)) ++ ++/* ++** ldnt1_s16_index: ++** ldnt1h z0\.h, p0/z, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_LOAD (ldnt1_s16_index, svint16_t, int16_t, ++ z0 = svldnt1_s16 (p0, x0 + x1), ++ z0 = svldnt1 (p0, x0 + x1)) ++ ++/* ++** ldnt1_s16_1: ++** ldnt1h z0\.h, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_s16_1, svint16_t, int16_t, ++ z0 = svldnt1_s16 (p0, x0 + svcnth ()), ++ z0 = svldnt1 (p0, x0 + svcnth ())) ++ ++/* ++** ldnt1_s16_7: ++** ldnt1h z0\.h, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_s16_7, svint16_t, int16_t, ++ z0 = svldnt1_s16 (p0, x0 + svcnth () * 7), ++ z0 = svldnt1 (p0, x0 + svcnth () * 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldnt1_s16_8: ++** incb x0, all, mul #8 ++** ldnt1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_s16_8, svint16_t, int16_t, ++ z0 = svldnt1_s16 (p0, x0 + svcnth () * 8), ++ z0 = svldnt1 (p0, x0 + svcnth () * 8)) ++ ++/* ++** ldnt1_s16_m1: ++** ldnt1h z0\.h, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_s16_m1, svint16_t, int16_t, ++ z0 = svldnt1_s16 (p0, x0 - svcnth ()), ++ z0 = svldnt1 (p0, x0 - svcnth ())) ++ ++/* ++** ldnt1_s16_m8: ++** ldnt1h z0\.h, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_s16_m8, svint16_t, int16_t, ++ z0 = svldnt1_s16 (p0, x0 - svcnth () * 8), ++ z0 = svldnt1 (p0, x0 - svcnth () * 8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldnt1_s16_m9: ++** decb x0, all, mul #9 ++** ldnt1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_s16_m9, svint16_t, int16_t, ++ z0 = svldnt1_s16 (p0, x0 - svcnth () * 9), ++ z0 = svldnt1 (p0, x0 - svcnth () * 9)) ++ ++/* ++** ldnt1_vnum_s16_0: ++** ldnt1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_s16_0, svint16_t, int16_t, ++ z0 = svldnt1_vnum_s16 (p0, x0, 0), ++ z0 = svldnt1_vnum (p0, x0, 0)) ++ ++/* ++** ldnt1_vnum_s16_1: ++** ldnt1h z0\.h, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_s16_1, svint16_t, int16_t, ++ z0 = svldnt1_vnum_s16 (p0, x0, 1), ++ z0 = svldnt1_vnum (p0, x0, 1)) ++ ++/* ++** ldnt1_vnum_s16_7: ++** ldnt1h z0\.h, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_s16_7, svint16_t, int16_t, ++ z0 = svldnt1_vnum_s16 (p0, x0, 7), ++ z0 = svldnt1_vnum (p0, x0, 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldnt1_vnum_s16_8: ++** incb x0, all, mul #8 ++** ldnt1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_s16_8, svint16_t, int16_t, ++ z0 = svldnt1_vnum_s16 (p0, x0, 8), ++ z0 = svldnt1_vnum (p0, x0, 8)) ++ ++/* ++** ldnt1_vnum_s16_m1: ++** ldnt1h z0\.h, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_s16_m1, svint16_t, int16_t, ++ z0 = svldnt1_vnum_s16 (p0, x0, -1), ++ z0 = svldnt1_vnum (p0, x0, -1)) ++ ++/* ++** ldnt1_vnum_s16_m8: ++** ldnt1h z0\.h, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_s16_m8, svint16_t, int16_t, ++ z0 = svldnt1_vnum_s16 (p0, x0, -8), ++ z0 = svldnt1_vnum (p0, x0, -8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldnt1_vnum_s16_m9: ++** decb x0, all, mul #9 ++** ldnt1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_s16_m9, svint16_t, int16_t, ++ z0 = svldnt1_vnum_s16 (p0, x0, -9), ++ z0 = svldnt1_vnum (p0, x0, -9)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ldnt1_vnum_s16_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ldnt1h z0\.h, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_s16_x1, svint16_t, int16_t, ++ z0 = svldnt1_vnum_s16 (p0, x0, x1), ++ z0 = svldnt1_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_s32.c +new file mode 100644 +index 000000000..2b9df1781 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_s32.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldnt1_s32_base: ++** ldnt1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_s32_base, svint32_t, int32_t, ++ z0 = svldnt1_s32 (p0, x0), ++ z0 = svldnt1 (p0, x0)) ++ ++/* ++** ldnt1_s32_index: ++** ldnt1w z0\.s, p0/z, \[x0, x1, lsl 2\] ++** ret ++*/ ++TEST_LOAD (ldnt1_s32_index, svint32_t, int32_t, ++ z0 = svldnt1_s32 (p0, x0 + x1), ++ z0 = svldnt1 (p0, x0 + x1)) ++ ++/* ++** ldnt1_s32_1: ++** ldnt1w z0\.s, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_s32_1, svint32_t, int32_t, ++ z0 = svldnt1_s32 (p0, x0 + svcntw ()), ++ z0 = svldnt1 (p0, x0 + svcntw ())) ++ ++/* ++** ldnt1_s32_7: ++** ldnt1w z0\.s, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_s32_7, svint32_t, int32_t, ++ z0 = svldnt1_s32 (p0, x0 + svcntw () * 7), ++ z0 = svldnt1 (p0, x0 + svcntw () * 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldnt1_s32_8: ++** incb x0, all, mul #8 ++** ldnt1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_s32_8, svint32_t, int32_t, ++ z0 = svldnt1_s32 (p0, x0 + svcntw () * 8), ++ z0 = svldnt1 (p0, x0 + svcntw () * 8)) ++ ++/* ++** ldnt1_s32_m1: ++** ldnt1w z0\.s, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_s32_m1, svint32_t, int32_t, ++ z0 = svldnt1_s32 (p0, x0 - svcntw ()), ++ z0 = svldnt1 (p0, x0 - svcntw ())) ++ ++/* ++** ldnt1_s32_m8: ++** ldnt1w z0\.s, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_s32_m8, svint32_t, int32_t, ++ z0 = svldnt1_s32 (p0, x0 - svcntw () * 8), ++ z0 = svldnt1 (p0, x0 - svcntw () * 8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldnt1_s32_m9: ++** decb x0, all, mul #9 ++** ldnt1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_s32_m9, svint32_t, int32_t, ++ z0 = svldnt1_s32 (p0, x0 - svcntw () * 9), ++ z0 = svldnt1 (p0, x0 - svcntw () * 9)) ++ ++/* ++** ldnt1_vnum_s32_0: ++** ldnt1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_s32_0, svint32_t, int32_t, ++ z0 = svldnt1_vnum_s32 (p0, x0, 0), ++ z0 = svldnt1_vnum (p0, x0, 0)) ++ ++/* ++** ldnt1_vnum_s32_1: ++** ldnt1w z0\.s, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_s32_1, svint32_t, int32_t, ++ z0 = svldnt1_vnum_s32 (p0, x0, 1), ++ z0 = svldnt1_vnum (p0, x0, 1)) ++ ++/* ++** ldnt1_vnum_s32_7: ++** ldnt1w z0\.s, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_s32_7, svint32_t, int32_t, ++ z0 = svldnt1_vnum_s32 (p0, x0, 7), ++ z0 = svldnt1_vnum (p0, x0, 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldnt1_vnum_s32_8: ++** incb x0, all, mul #8 ++** ldnt1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_s32_8, svint32_t, int32_t, ++ z0 = svldnt1_vnum_s32 (p0, x0, 8), ++ z0 = svldnt1_vnum (p0, x0, 8)) ++ ++/* ++** ldnt1_vnum_s32_m1: ++** ldnt1w z0\.s, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_s32_m1, svint32_t, int32_t, ++ z0 = svldnt1_vnum_s32 (p0, x0, -1), ++ z0 = svldnt1_vnum (p0, x0, -1)) ++ ++/* ++** ldnt1_vnum_s32_m8: ++** ldnt1w z0\.s, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_s32_m8, svint32_t, int32_t, ++ z0 = svldnt1_vnum_s32 (p0, x0, -8), ++ z0 = svldnt1_vnum (p0, x0, -8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldnt1_vnum_s32_m9: ++** decb x0, all, mul #9 ++** ldnt1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_s32_m9, svint32_t, int32_t, ++ z0 = svldnt1_vnum_s32 (p0, x0, -9), ++ z0 = svldnt1_vnum (p0, x0, -9)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ldnt1_vnum_s32_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ldnt1w z0\.s, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_s32_x1, svint32_t, int32_t, ++ z0 = svldnt1_vnum_s32 (p0, x0, x1), ++ z0 = svldnt1_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_s64.c +new file mode 100644 +index 000000000..5bc7ac6ed +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_s64.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldnt1_s64_base: ++** ldnt1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_s64_base, svint64_t, int64_t, ++ z0 = svldnt1_s64 (p0, x0), ++ z0 = svldnt1 (p0, x0)) ++ ++/* ++** ldnt1_s64_index: ++** ldnt1d z0\.d, p0/z, \[x0, x1, lsl 3\] ++** ret ++*/ ++TEST_LOAD (ldnt1_s64_index, svint64_t, int64_t, ++ z0 = svldnt1_s64 (p0, x0 + x1), ++ z0 = svldnt1 (p0, x0 + x1)) ++ ++/* ++** ldnt1_s64_1: ++** ldnt1d z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_s64_1, svint64_t, int64_t, ++ z0 = svldnt1_s64 (p0, x0 + svcntd ()), ++ z0 = svldnt1 (p0, x0 + svcntd ())) ++ ++/* ++** ldnt1_s64_7: ++** ldnt1d z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_s64_7, svint64_t, int64_t, ++ z0 = svldnt1_s64 (p0, x0 + svcntd () * 7), ++ z0 = svldnt1 (p0, x0 + svcntd () * 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldnt1_s64_8: ++** incb x0, all, mul #8 ++** ldnt1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_s64_8, svint64_t, int64_t, ++ z0 = svldnt1_s64 (p0, x0 + svcntd () * 8), ++ z0 = svldnt1 (p0, x0 + svcntd () * 8)) ++ ++/* ++** ldnt1_s64_m1: ++** ldnt1d z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_s64_m1, svint64_t, int64_t, ++ z0 = svldnt1_s64 (p0, x0 - svcntd ()), ++ z0 = svldnt1 (p0, x0 - svcntd ())) ++ ++/* ++** ldnt1_s64_m8: ++** ldnt1d z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_s64_m8, svint64_t, int64_t, ++ z0 = svldnt1_s64 (p0, x0 - svcntd () * 8), ++ z0 = svldnt1 (p0, x0 - svcntd () * 8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldnt1_s64_m9: ++** decb x0, all, mul #9 ++** ldnt1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_s64_m9, svint64_t, int64_t, ++ z0 = svldnt1_s64 (p0, x0 - svcntd () * 9), ++ z0 = svldnt1 (p0, x0 - svcntd () * 9)) ++ ++/* ++** ldnt1_vnum_s64_0: ++** ldnt1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_s64_0, svint64_t, int64_t, ++ z0 = svldnt1_vnum_s64 (p0, x0, 0), ++ z0 = svldnt1_vnum (p0, x0, 0)) ++ ++/* ++** ldnt1_vnum_s64_1: ++** ldnt1d z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_s64_1, svint64_t, int64_t, ++ z0 = svldnt1_vnum_s64 (p0, x0, 1), ++ z0 = svldnt1_vnum (p0, x0, 1)) ++ ++/* ++** ldnt1_vnum_s64_7: ++** ldnt1d z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_s64_7, svint64_t, int64_t, ++ z0 = svldnt1_vnum_s64 (p0, x0, 7), ++ z0 = svldnt1_vnum (p0, x0, 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldnt1_vnum_s64_8: ++** incb x0, all, mul #8 ++** ldnt1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_s64_8, svint64_t, int64_t, ++ z0 = svldnt1_vnum_s64 (p0, x0, 8), ++ z0 = svldnt1_vnum (p0, x0, 8)) ++ ++/* ++** ldnt1_vnum_s64_m1: ++** ldnt1d z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_s64_m1, svint64_t, int64_t, ++ z0 = svldnt1_vnum_s64 (p0, x0, -1), ++ z0 = svldnt1_vnum (p0, x0, -1)) ++ ++/* ++** ldnt1_vnum_s64_m8: ++** ldnt1d z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_s64_m8, svint64_t, int64_t, ++ z0 = svldnt1_vnum_s64 (p0, x0, -8), ++ z0 = svldnt1_vnum (p0, x0, -8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldnt1_vnum_s64_m9: ++** decb x0, all, mul #9 ++** ldnt1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_s64_m9, svint64_t, int64_t, ++ z0 = svldnt1_vnum_s64 (p0, x0, -9), ++ z0 = svldnt1_vnum (p0, x0, -9)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ldnt1_vnum_s64_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ldnt1d z0\.d, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_s64_x1, svint64_t, int64_t, ++ z0 = svldnt1_vnum_s64 (p0, x0, x1), ++ z0 = svldnt1_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_s8.c +new file mode 100644 +index 000000000..eb8e2e548 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_s8.c +@@ -0,0 +1,162 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldnt1_s8_base: ++** ldnt1b z0\.b, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_s8_base, svint8_t, int8_t, ++ z0 = svldnt1_s8 (p0, x0), ++ z0 = svldnt1 (p0, x0)) ++ ++/* ++** ldnt1_s8_index: ++** ldnt1b z0\.b, p0/z, \[x0, x1\] ++** ret ++*/ ++TEST_LOAD (ldnt1_s8_index, svint8_t, int8_t, ++ z0 = svldnt1_s8 (p0, x0 + x1), ++ z0 = svldnt1 (p0, x0 + x1)) ++ ++/* ++** ldnt1_s8_1: ++** ldnt1b z0\.b, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_s8_1, svint8_t, int8_t, ++ z0 = svldnt1_s8 (p0, x0 + svcntb ()), ++ z0 = svldnt1 (p0, x0 + svcntb ())) ++ ++/* ++** ldnt1_s8_7: ++** ldnt1b z0\.b, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_s8_7, svint8_t, int8_t, ++ z0 = svldnt1_s8 (p0, x0 + svcntb () * 7), ++ z0 = svldnt1 (p0, x0 + svcntb () * 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldnt1_s8_8: ++** incb x0, all, mul #8 ++** ldnt1b z0\.b, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_s8_8, svint8_t, int8_t, ++ z0 = svldnt1_s8 (p0, x0 + svcntb () * 8), ++ z0 = svldnt1 (p0, x0 + svcntb () * 8)) ++ ++/* ++** ldnt1_s8_m1: ++** ldnt1b z0\.b, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_s8_m1, svint8_t, int8_t, ++ z0 = svldnt1_s8 (p0, x0 - svcntb ()), ++ z0 = svldnt1 (p0, x0 - svcntb ())) ++ ++/* ++** ldnt1_s8_m8: ++** ldnt1b z0\.b, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_s8_m8, svint8_t, int8_t, ++ z0 = svldnt1_s8 (p0, x0 - svcntb () * 8), ++ z0 = svldnt1 (p0, x0 - svcntb () * 8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldnt1_s8_m9: ++** decb x0, all, mul #9 ++** ldnt1b z0\.b, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_s8_m9, svint8_t, int8_t, ++ z0 = svldnt1_s8 (p0, x0 - svcntb () * 9), ++ z0 = svldnt1 (p0, x0 - svcntb () * 9)) ++ ++/* ++** ldnt1_vnum_s8_0: ++** ldnt1b z0\.b, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_s8_0, svint8_t, int8_t, ++ z0 = svldnt1_vnum_s8 (p0, x0, 0), ++ z0 = svldnt1_vnum (p0, x0, 0)) ++ ++/* ++** ldnt1_vnum_s8_1: ++** ldnt1b z0\.b, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_s8_1, svint8_t, int8_t, ++ z0 = svldnt1_vnum_s8 (p0, x0, 1), ++ z0 = svldnt1_vnum (p0, x0, 1)) ++ ++/* ++** ldnt1_vnum_s8_7: ++** ldnt1b z0\.b, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_s8_7, svint8_t, int8_t, ++ z0 = svldnt1_vnum_s8 (p0, x0, 7), ++ z0 = svldnt1_vnum (p0, x0, 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldnt1_vnum_s8_8: ++** incb x0, all, mul #8 ++** ldnt1b z0\.b, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_s8_8, svint8_t, int8_t, ++ z0 = svldnt1_vnum_s8 (p0, x0, 8), ++ z0 = svldnt1_vnum (p0, x0, 8)) ++ ++/* ++** ldnt1_vnum_s8_m1: ++** ldnt1b z0\.b, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_s8_m1, svint8_t, int8_t, ++ z0 = svldnt1_vnum_s8 (p0, x0, -1), ++ z0 = svldnt1_vnum (p0, x0, -1)) ++ ++/* ++** ldnt1_vnum_s8_m8: ++** ldnt1b z0\.b, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_s8_m8, svint8_t, int8_t, ++ z0 = svldnt1_vnum_s8 (p0, x0, -8), ++ z0 = svldnt1_vnum (p0, x0, -8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldnt1_vnum_s8_m9: ++** decb x0, all, mul #9 ++** ldnt1b z0\.b, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_s8_m9, svint8_t, int8_t, ++ z0 = svldnt1_vnum_s8 (p0, x0, -9), ++ z0 = svldnt1_vnum (p0, x0, -9)) ++ ++/* ++** ldnt1_vnum_s8_x1: ++** cntb (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ldnt1b z0\.b, p0/z, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** ldnt1b z0\.b, p0/z, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_s8_x1, svint8_t, int8_t, ++ z0 = svldnt1_vnum_s8 (p0, x0, x1), ++ z0 = svldnt1_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_u16.c +new file mode 100644 +index 000000000..c032c3d93 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_u16.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldnt1_u16_base: ++** ldnt1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_u16_base, svuint16_t, uint16_t, ++ z0 = svldnt1_u16 (p0, x0), ++ z0 = svldnt1 (p0, x0)) ++ ++/* ++** ldnt1_u16_index: ++** ldnt1h z0\.h, p0/z, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_LOAD (ldnt1_u16_index, svuint16_t, uint16_t, ++ z0 = svldnt1_u16 (p0, x0 + x1), ++ z0 = svldnt1 (p0, x0 + x1)) ++ ++/* ++** ldnt1_u16_1: ++** ldnt1h z0\.h, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_u16_1, svuint16_t, uint16_t, ++ z0 = svldnt1_u16 (p0, x0 + svcnth ()), ++ z0 = svldnt1 (p0, x0 + svcnth ())) ++ ++/* ++** ldnt1_u16_7: ++** ldnt1h z0\.h, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_u16_7, svuint16_t, uint16_t, ++ z0 = svldnt1_u16 (p0, x0 + svcnth () * 7), ++ z0 = svldnt1 (p0, x0 + svcnth () * 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldnt1_u16_8: ++** incb x0, all, mul #8 ++** ldnt1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_u16_8, svuint16_t, uint16_t, ++ z0 = svldnt1_u16 (p0, x0 + svcnth () * 8), ++ z0 = svldnt1 (p0, x0 + svcnth () * 8)) ++ ++/* ++** ldnt1_u16_m1: ++** ldnt1h z0\.h, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_u16_m1, svuint16_t, uint16_t, ++ z0 = svldnt1_u16 (p0, x0 - svcnth ()), ++ z0 = svldnt1 (p0, x0 - svcnth ())) ++ ++/* ++** ldnt1_u16_m8: ++** ldnt1h z0\.h, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_u16_m8, svuint16_t, uint16_t, ++ z0 = svldnt1_u16 (p0, x0 - svcnth () * 8), ++ z0 = svldnt1 (p0, x0 - svcnth () * 8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldnt1_u16_m9: ++** decb x0, all, mul #9 ++** ldnt1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_u16_m9, svuint16_t, uint16_t, ++ z0 = svldnt1_u16 (p0, x0 - svcnth () * 9), ++ z0 = svldnt1 (p0, x0 - svcnth () * 9)) ++ ++/* ++** ldnt1_vnum_u16_0: ++** ldnt1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_u16_0, svuint16_t, uint16_t, ++ z0 = svldnt1_vnum_u16 (p0, x0, 0), ++ z0 = svldnt1_vnum (p0, x0, 0)) ++ ++/* ++** ldnt1_vnum_u16_1: ++** ldnt1h z0\.h, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_u16_1, svuint16_t, uint16_t, ++ z0 = svldnt1_vnum_u16 (p0, x0, 1), ++ z0 = svldnt1_vnum (p0, x0, 1)) ++ ++/* ++** ldnt1_vnum_u16_7: ++** ldnt1h z0\.h, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_u16_7, svuint16_t, uint16_t, ++ z0 = svldnt1_vnum_u16 (p0, x0, 7), ++ z0 = svldnt1_vnum (p0, x0, 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldnt1_vnum_u16_8: ++** incb x0, all, mul #8 ++** ldnt1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_u16_8, svuint16_t, uint16_t, ++ z0 = svldnt1_vnum_u16 (p0, x0, 8), ++ z0 = svldnt1_vnum (p0, x0, 8)) ++ ++/* ++** ldnt1_vnum_u16_m1: ++** ldnt1h z0\.h, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_u16_m1, svuint16_t, uint16_t, ++ z0 = svldnt1_vnum_u16 (p0, x0, -1), ++ z0 = svldnt1_vnum (p0, x0, -1)) ++ ++/* ++** ldnt1_vnum_u16_m8: ++** ldnt1h z0\.h, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_u16_m8, svuint16_t, uint16_t, ++ z0 = svldnt1_vnum_u16 (p0, x0, -8), ++ z0 = svldnt1_vnum (p0, x0, -8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldnt1_vnum_u16_m9: ++** decb x0, all, mul #9 ++** ldnt1h z0\.h, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_u16_m9, svuint16_t, uint16_t, ++ z0 = svldnt1_vnum_u16 (p0, x0, -9), ++ z0 = svldnt1_vnum (p0, x0, -9)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ldnt1_vnum_u16_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ldnt1h z0\.h, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_u16_x1, svuint16_t, uint16_t, ++ z0 = svldnt1_vnum_u16 (p0, x0, x1), ++ z0 = svldnt1_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_u32.c +new file mode 100644 +index 000000000..278794459 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_u32.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldnt1_u32_base: ++** ldnt1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_u32_base, svuint32_t, uint32_t, ++ z0 = svldnt1_u32 (p0, x0), ++ z0 = svldnt1 (p0, x0)) ++ ++/* ++** ldnt1_u32_index: ++** ldnt1w z0\.s, p0/z, \[x0, x1, lsl 2\] ++** ret ++*/ ++TEST_LOAD (ldnt1_u32_index, svuint32_t, uint32_t, ++ z0 = svldnt1_u32 (p0, x0 + x1), ++ z0 = svldnt1 (p0, x0 + x1)) ++ ++/* ++** ldnt1_u32_1: ++** ldnt1w z0\.s, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_u32_1, svuint32_t, uint32_t, ++ z0 = svldnt1_u32 (p0, x0 + svcntw ()), ++ z0 = svldnt1 (p0, x0 + svcntw ())) ++ ++/* ++** ldnt1_u32_7: ++** ldnt1w z0\.s, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_u32_7, svuint32_t, uint32_t, ++ z0 = svldnt1_u32 (p0, x0 + svcntw () * 7), ++ z0 = svldnt1 (p0, x0 + svcntw () * 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldnt1_u32_8: ++** incb x0, all, mul #8 ++** ldnt1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_u32_8, svuint32_t, uint32_t, ++ z0 = svldnt1_u32 (p0, x0 + svcntw () * 8), ++ z0 = svldnt1 (p0, x0 + svcntw () * 8)) ++ ++/* ++** ldnt1_u32_m1: ++** ldnt1w z0\.s, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_u32_m1, svuint32_t, uint32_t, ++ z0 = svldnt1_u32 (p0, x0 - svcntw ()), ++ z0 = svldnt1 (p0, x0 - svcntw ())) ++ ++/* ++** ldnt1_u32_m8: ++** ldnt1w z0\.s, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_u32_m8, svuint32_t, uint32_t, ++ z0 = svldnt1_u32 (p0, x0 - svcntw () * 8), ++ z0 = svldnt1 (p0, x0 - svcntw () * 8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldnt1_u32_m9: ++** decb x0, all, mul #9 ++** ldnt1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_u32_m9, svuint32_t, uint32_t, ++ z0 = svldnt1_u32 (p0, x0 - svcntw () * 9), ++ z0 = svldnt1 (p0, x0 - svcntw () * 9)) ++ ++/* ++** ldnt1_vnum_u32_0: ++** ldnt1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_u32_0, svuint32_t, uint32_t, ++ z0 = svldnt1_vnum_u32 (p0, x0, 0), ++ z0 = svldnt1_vnum (p0, x0, 0)) ++ ++/* ++** ldnt1_vnum_u32_1: ++** ldnt1w z0\.s, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_u32_1, svuint32_t, uint32_t, ++ z0 = svldnt1_vnum_u32 (p0, x0, 1), ++ z0 = svldnt1_vnum (p0, x0, 1)) ++ ++/* ++** ldnt1_vnum_u32_7: ++** ldnt1w z0\.s, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_u32_7, svuint32_t, uint32_t, ++ z0 = svldnt1_vnum_u32 (p0, x0, 7), ++ z0 = svldnt1_vnum (p0, x0, 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldnt1_vnum_u32_8: ++** incb x0, all, mul #8 ++** ldnt1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_u32_8, svuint32_t, uint32_t, ++ z0 = svldnt1_vnum_u32 (p0, x0, 8), ++ z0 = svldnt1_vnum (p0, x0, 8)) ++ ++/* ++** ldnt1_vnum_u32_m1: ++** ldnt1w z0\.s, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_u32_m1, svuint32_t, uint32_t, ++ z0 = svldnt1_vnum_u32 (p0, x0, -1), ++ z0 = svldnt1_vnum (p0, x0, -1)) ++ ++/* ++** ldnt1_vnum_u32_m8: ++** ldnt1w z0\.s, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_u32_m8, svuint32_t, uint32_t, ++ z0 = svldnt1_vnum_u32 (p0, x0, -8), ++ z0 = svldnt1_vnum (p0, x0, -8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldnt1_vnum_u32_m9: ++** decb x0, all, mul #9 ++** ldnt1w z0\.s, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_u32_m9, svuint32_t, uint32_t, ++ z0 = svldnt1_vnum_u32 (p0, x0, -9), ++ z0 = svldnt1_vnum (p0, x0, -9)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ldnt1_vnum_u32_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ldnt1w z0\.s, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_u32_x1, svuint32_t, uint32_t, ++ z0 = svldnt1_vnum_u32 (p0, x0, x1), ++ z0 = svldnt1_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_u64.c +new file mode 100644 +index 000000000..abafee6f7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_u64.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldnt1_u64_base: ++** ldnt1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_u64_base, svuint64_t, uint64_t, ++ z0 = svldnt1_u64 (p0, x0), ++ z0 = svldnt1 (p0, x0)) ++ ++/* ++** ldnt1_u64_index: ++** ldnt1d z0\.d, p0/z, \[x0, x1, lsl 3\] ++** ret ++*/ ++TEST_LOAD (ldnt1_u64_index, svuint64_t, uint64_t, ++ z0 = svldnt1_u64 (p0, x0 + x1), ++ z0 = svldnt1 (p0, x0 + x1)) ++ ++/* ++** ldnt1_u64_1: ++** ldnt1d z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_u64_1, svuint64_t, uint64_t, ++ z0 = svldnt1_u64 (p0, x0 + svcntd ()), ++ z0 = svldnt1 (p0, x0 + svcntd ())) ++ ++/* ++** ldnt1_u64_7: ++** ldnt1d z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_u64_7, svuint64_t, uint64_t, ++ z0 = svldnt1_u64 (p0, x0 + svcntd () * 7), ++ z0 = svldnt1 (p0, x0 + svcntd () * 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldnt1_u64_8: ++** incb x0, all, mul #8 ++** ldnt1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_u64_8, svuint64_t, uint64_t, ++ z0 = svldnt1_u64 (p0, x0 + svcntd () * 8), ++ z0 = svldnt1 (p0, x0 + svcntd () * 8)) ++ ++/* ++** ldnt1_u64_m1: ++** ldnt1d z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_u64_m1, svuint64_t, uint64_t, ++ z0 = svldnt1_u64 (p0, x0 - svcntd ()), ++ z0 = svldnt1 (p0, x0 - svcntd ())) ++ ++/* ++** ldnt1_u64_m8: ++** ldnt1d z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_u64_m8, svuint64_t, uint64_t, ++ z0 = svldnt1_u64 (p0, x0 - svcntd () * 8), ++ z0 = svldnt1 (p0, x0 - svcntd () * 8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldnt1_u64_m9: ++** decb x0, all, mul #9 ++** ldnt1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_u64_m9, svuint64_t, uint64_t, ++ z0 = svldnt1_u64 (p0, x0 - svcntd () * 9), ++ z0 = svldnt1 (p0, x0 - svcntd () * 9)) ++ ++/* ++** ldnt1_vnum_u64_0: ++** ldnt1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_u64_0, svuint64_t, uint64_t, ++ z0 = svldnt1_vnum_u64 (p0, x0, 0), ++ z0 = svldnt1_vnum (p0, x0, 0)) ++ ++/* ++** ldnt1_vnum_u64_1: ++** ldnt1d z0\.d, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_u64_1, svuint64_t, uint64_t, ++ z0 = svldnt1_vnum_u64 (p0, x0, 1), ++ z0 = svldnt1_vnum (p0, x0, 1)) ++ ++/* ++** ldnt1_vnum_u64_7: ++** ldnt1d z0\.d, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_u64_7, svuint64_t, uint64_t, ++ z0 = svldnt1_vnum_u64 (p0, x0, 7), ++ z0 = svldnt1_vnum (p0, x0, 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldnt1_vnum_u64_8: ++** incb x0, all, mul #8 ++** ldnt1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_u64_8, svuint64_t, uint64_t, ++ z0 = svldnt1_vnum_u64 (p0, x0, 8), ++ z0 = svldnt1_vnum (p0, x0, 8)) ++ ++/* ++** ldnt1_vnum_u64_m1: ++** ldnt1d z0\.d, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_u64_m1, svuint64_t, uint64_t, ++ z0 = svldnt1_vnum_u64 (p0, x0, -1), ++ z0 = svldnt1_vnum (p0, x0, -1)) ++ ++/* ++** ldnt1_vnum_u64_m8: ++** ldnt1d z0\.d, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_u64_m8, svuint64_t, uint64_t, ++ z0 = svldnt1_vnum_u64 (p0, x0, -8), ++ z0 = svldnt1_vnum (p0, x0, -8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldnt1_vnum_u64_m9: ++** decb x0, all, mul #9 ++** ldnt1d z0\.d, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_u64_m9, svuint64_t, uint64_t, ++ z0 = svldnt1_vnum_u64 (p0, x0, -9), ++ z0 = svldnt1_vnum (p0, x0, -9)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** ldnt1_vnum_u64_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** ldnt1d z0\.d, p0/z, \[\2\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_u64_x1, svuint64_t, uint64_t, ++ z0 = svldnt1_vnum_u64 (p0, x0, x1), ++ z0 = svldnt1_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_u8.c +new file mode 100644 +index 000000000..7bf9acc26 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_u8.c +@@ -0,0 +1,162 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ldnt1_u8_base: ++** ldnt1b z0\.b, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_u8_base, svuint8_t, uint8_t, ++ z0 = svldnt1_u8 (p0, x0), ++ z0 = svldnt1 (p0, x0)) ++ ++/* ++** ldnt1_u8_index: ++** ldnt1b z0\.b, p0/z, \[x0, x1\] ++** ret ++*/ ++TEST_LOAD (ldnt1_u8_index, svuint8_t, uint8_t, ++ z0 = svldnt1_u8 (p0, x0 + x1), ++ z0 = svldnt1 (p0, x0 + x1)) ++ ++/* ++** ldnt1_u8_1: ++** ldnt1b z0\.b, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_u8_1, svuint8_t, uint8_t, ++ z0 = svldnt1_u8 (p0, x0 + svcntb ()), ++ z0 = svldnt1 (p0, x0 + svcntb ())) ++ ++/* ++** ldnt1_u8_7: ++** ldnt1b z0\.b, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_u8_7, svuint8_t, uint8_t, ++ z0 = svldnt1_u8 (p0, x0 + svcntb () * 7), ++ z0 = svldnt1 (p0, x0 + svcntb () * 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldnt1_u8_8: ++** incb x0, all, mul #8 ++** ldnt1b z0\.b, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_u8_8, svuint8_t, uint8_t, ++ z0 = svldnt1_u8 (p0, x0 + svcntb () * 8), ++ z0 = svldnt1 (p0, x0 + svcntb () * 8)) ++ ++/* ++** ldnt1_u8_m1: ++** ldnt1b z0\.b, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_u8_m1, svuint8_t, uint8_t, ++ z0 = svldnt1_u8 (p0, x0 - svcntb ()), ++ z0 = svldnt1 (p0, x0 - svcntb ())) ++ ++/* ++** ldnt1_u8_m8: ++** ldnt1b z0\.b, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_u8_m8, svuint8_t, uint8_t, ++ z0 = svldnt1_u8 (p0, x0 - svcntb () * 8), ++ z0 = svldnt1 (p0, x0 - svcntb () * 8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldnt1_u8_m9: ++** decb x0, all, mul #9 ++** ldnt1b z0\.b, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_u8_m9, svuint8_t, uint8_t, ++ z0 = svldnt1_u8 (p0, x0 - svcntb () * 9), ++ z0 = svldnt1 (p0, x0 - svcntb () * 9)) ++ ++/* ++** ldnt1_vnum_u8_0: ++** ldnt1b z0\.b, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_u8_0, svuint8_t, uint8_t, ++ z0 = svldnt1_vnum_u8 (p0, x0, 0), ++ z0 = svldnt1_vnum (p0, x0, 0)) ++ ++/* ++** ldnt1_vnum_u8_1: ++** ldnt1b z0\.b, p0/z, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_u8_1, svuint8_t, uint8_t, ++ z0 = svldnt1_vnum_u8 (p0, x0, 1), ++ z0 = svldnt1_vnum (p0, x0, 1)) ++ ++/* ++** ldnt1_vnum_u8_7: ++** ldnt1b z0\.b, p0/z, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_u8_7, svuint8_t, uint8_t, ++ z0 = svldnt1_vnum_u8 (p0, x0, 7), ++ z0 = svldnt1_vnum (p0, x0, 7)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldnt1_vnum_u8_8: ++** incb x0, all, mul #8 ++** ldnt1b z0\.b, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_u8_8, svuint8_t, uint8_t, ++ z0 = svldnt1_vnum_u8 (p0, x0, 8), ++ z0 = svldnt1_vnum (p0, x0, 8)) ++ ++/* ++** ldnt1_vnum_u8_m1: ++** ldnt1b z0\.b, p0/z, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_u8_m1, svuint8_t, uint8_t, ++ z0 = svldnt1_vnum_u8 (p0, x0, -1), ++ z0 = svldnt1_vnum (p0, x0, -1)) ++ ++/* ++** ldnt1_vnum_u8_m8: ++** ldnt1b z0\.b, p0/z, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_u8_m8, svuint8_t, uint8_t, ++ z0 = svldnt1_vnum_u8 (p0, x0, -8), ++ z0 = svldnt1_vnum (p0, x0, -8)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** ldnt1_vnum_u8_m9: ++** decb x0, all, mul #9 ++** ldnt1b z0\.b, p0/z, \[x0\] ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_u8_m9, svuint8_t, uint8_t, ++ z0 = svldnt1_vnum_u8 (p0, x0, -9), ++ z0 = svldnt1_vnum (p0, x0, -9)) ++ ++/* ++** ldnt1_vnum_u8_x1: ++** cntb (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** ldnt1b z0\.b, p0/z, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** ldnt1b z0\.b, p0/z, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_LOAD (ldnt1_vnum_u8_x1, svuint8_t, uint8_t, ++ z0 = svldnt1_vnum_u8 (p0, x0, x1), ++ z0 = svldnt1_vnum (p0, x0, x1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_bf16.c +new file mode 100644 +index 000000000..cd91ff48d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_bf16.c +@@ -0,0 +1,12 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** len_x0_bf16: ++** cnth x0 ++** ret ++*/ ++TEST_REDUCTION_X (len_x0_bf16, uint64_t, svbfloat16_t, ++ x0 = svlen_bf16 (z0), ++ x0 = svlen (z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_f16.c +new file mode 100644 +index 000000000..aa6d94bbc +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_f16.c +@@ -0,0 +1,12 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** len_x0_f16: ++** cnth x0 ++** ret ++*/ ++TEST_REDUCTION_X (len_x0_f16, uint64_t, svfloat16_t, ++ x0 = svlen_f16 (z0), ++ x0 = svlen (z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_f32.c +new file mode 100644 +index 000000000..1dd50cee0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_f32.c +@@ -0,0 +1,12 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** len_x0_f32: ++** cntw x0 ++** ret ++*/ ++TEST_REDUCTION_X (len_x0_f32, uint64_t, svfloat32_t, ++ x0 = svlen_f32 (z0), ++ x0 = svlen (z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_f64.c +new file mode 100644 +index 000000000..1f210653e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_f64.c +@@ -0,0 +1,12 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** len_x0_f64: ++** cntd x0 ++** ret ++*/ ++TEST_REDUCTION_X (len_x0_f64, uint64_t, svfloat64_t, ++ x0 = svlen_f64 (z0), ++ x0 = svlen (z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_s16.c +new file mode 100644 +index 000000000..f56796182 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_s16.c +@@ -0,0 +1,12 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** len_x0_s16: ++** cnth x0 ++** ret ++*/ ++TEST_REDUCTION_X (len_x0_s16, uint64_t, svint16_t, ++ x0 = svlen_s16 (z0), ++ x0 = svlen (z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_s32.c +new file mode 100644 +index 000000000..662fac177 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_s32.c +@@ -0,0 +1,12 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** len_x0_s32: ++** cntw x0 ++** ret ++*/ ++TEST_REDUCTION_X (len_x0_s32, uint64_t, svint32_t, ++ x0 = svlen_s32 (z0), ++ x0 = svlen (z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_s64.c +new file mode 100644 +index 000000000..f95770302 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_s64.c +@@ -0,0 +1,12 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** len_x0_s64: ++** cntd x0 ++** ret ++*/ ++TEST_REDUCTION_X (len_x0_s64, uint64_t, svint64_t, ++ x0 = svlen_s64 (z0), ++ x0 = svlen (z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_s8.c +new file mode 100644 +index 000000000..6ed8a7177 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_s8.c +@@ -0,0 +1,12 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** len_x0_s8: ++** cntb x0 ++** ret ++*/ ++TEST_REDUCTION_X (len_x0_s8, uint64_t, svint8_t, ++ x0 = svlen_s8 (z0), ++ x0 = svlen (z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_u16.c +new file mode 100644 +index 000000000..13692c927 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_u16.c +@@ -0,0 +1,12 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** len_x0_u16: ++** cnth x0 ++** ret ++*/ ++TEST_REDUCTION_X (len_x0_u16, uint64_t, svuint16_t, ++ x0 = svlen_u16 (z0), ++ x0 = svlen (z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_u32.c +new file mode 100644 +index 000000000..b03146089 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_u32.c +@@ -0,0 +1,12 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** len_x0_u32: ++** cntw x0 ++** ret ++*/ ++TEST_REDUCTION_X (len_x0_u32, uint64_t, svuint32_t, ++ x0 = svlen_u32 (z0), ++ x0 = svlen (z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_u64.c +new file mode 100644 +index 000000000..11f2e4b81 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_u64.c +@@ -0,0 +1,12 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** len_x0_u64: ++** cntd x0 ++** ret ++*/ ++TEST_REDUCTION_X (len_x0_u64, uint64_t, svuint64_t, ++ x0 = svlen_u64 (z0), ++ x0 = svlen (z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_u8.c +new file mode 100644 +index 000000000..fbd39a432 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_u8.c +@@ -0,0 +1,12 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** len_x0_u8: ++** cntb x0 ++** ret ++*/ ++TEST_REDUCTION_X (len_x0_u8, uint64_t, svuint8_t, ++ x0 = svlen_u8 (z0), ++ x0 = svlen (z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_s16.c +new file mode 100644 +index 000000000..edaaca5f1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_s16.c +@@ -0,0 +1,351 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** lsl_s16_m_tied1: ++** lsl z0\.h, p0/m, z0\.h, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (lsl_s16_m_tied1, svint16_t, svuint16_t, ++ z0 = svlsl_s16_m (p0, z0, z4), ++ z0 = svlsl_m (p0, z0, z4)) ++ ++/* ++** lsl_s16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** lsl z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (lsl_s16_m_tied2, svint16_t, svuint16_t, ++ z0_res = svlsl_s16_m (p0, z4, z0), ++ z0_res = svlsl_m (p0, z4, z0)) ++ ++/* ++** lsl_s16_m_untied: ++** movprfx z0, z1 ++** lsl z0\.h, p0/m, z0\.h, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (lsl_s16_m_untied, svint16_t, svuint16_t, ++ z0 = svlsl_s16_m (p0, z1, z4), ++ z0 = svlsl_m (p0, z1, z4)) ++ ++/* ++** lsl_w0_s16_m_tied1: ++** mov (z[0-9]+\.h), w0 ++** lsl z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_w0_s16_m_tied1, svint16_t, uint16_t, ++ z0 = svlsl_n_s16_m (p0, z0, x0), ++ z0 = svlsl_m (p0, z0, x0)) ++ ++/* ++** lsl_w0_s16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), w0 ++** movprfx z0, z1 ++** lsl z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_w0_s16_m_untied, svint16_t, uint16_t, ++ z0 = svlsl_n_s16_m (p0, z1, x0), ++ z0 = svlsl_m (p0, z1, x0)) ++ ++/* ++** lsl_1_s16_m_tied1: ++** lsl z0\.h, p0/m, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_1_s16_m_tied1, svint16_t, ++ z0 = svlsl_n_s16_m (p0, z0, 1), ++ z0 = svlsl_m (p0, z0, 1)) ++ ++/* ++** lsl_1_s16_m_untied: ++** movprfx z0, z1 ++** lsl z0\.h, p0/m, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_1_s16_m_untied, svint16_t, ++ z0 = svlsl_n_s16_m (p0, z1, 1), ++ z0 = svlsl_m (p0, z1, 1)) ++ ++/* ++** lsl_15_s16_m_tied1: ++** lsl z0\.h, p0/m, z0\.h, #15 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_15_s16_m_tied1, svint16_t, ++ z0 = svlsl_n_s16_m (p0, z0, 15), ++ z0 = svlsl_m (p0, z0, 15)) ++ ++/* ++** lsl_15_s16_m_untied: ++** movprfx z0, z1 ++** lsl z0\.h, p0/m, z0\.h, #15 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_15_s16_m_untied, svint16_t, ++ z0 = svlsl_n_s16_m (p0, z1, 15), ++ z0 = svlsl_m (p0, z1, 15)) ++ ++/* ++** lsl_16_s16_m_tied1: ++** mov (z[0-9]+\.h), #16 ++** lsl z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_16_s16_m_tied1, svint16_t, ++ z0 = svlsl_n_s16_m (p0, z0, 16), ++ z0 = svlsl_m (p0, z0, 16)) ++ ++/* ++** lsl_16_s16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), #16 ++** movprfx z0, z1 ++** lsl z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_16_s16_m_untied, svint16_t, ++ z0 = svlsl_n_s16_m (p0, z1, 16), ++ z0 = svlsl_m (p0, z1, 16)) ++ ++/* ++** lsl_s16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** lsl z0\.h, p0/m, z0\.h, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (lsl_s16_z_tied1, svint16_t, svuint16_t, ++ z0 = svlsl_s16_z (p0, z0, z4), ++ z0 = svlsl_z (p0, z0, z4)) ++ ++/* ++** lsl_s16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** lslr z0\.h, p0/m, z0\.h, z4\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (lsl_s16_z_tied2, svint16_t, svuint16_t, ++ z0_res = svlsl_s16_z (p0, z4, z0), ++ z0_res = svlsl_z (p0, z4, z0)) ++ ++/* ++** lsl_s16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** lsl z0\.h, p0/m, z0\.h, z4\.h ++** | ++** movprfx z0\.h, p0/z, z4\.h ++** lslr z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_DUAL_Z (lsl_s16_z_untied, svint16_t, svuint16_t, ++ z0 = svlsl_s16_z (p0, z1, z4), ++ z0 = svlsl_z (p0, z1, z4)) ++ ++/* ++** lsl_w0_s16_z_tied1: ++** mov (z[0-9]+\.h), w0 ++** movprfx z0\.h, p0/z, z0\.h ++** lsl z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_w0_s16_z_tied1, svint16_t, uint16_t, ++ z0 = svlsl_n_s16_z (p0, z0, x0), ++ z0 = svlsl_z (p0, z0, x0)) ++ ++/* ++** lsl_w0_s16_z_untied: ++** mov (z[0-9]+\.h), w0 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** lsl z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** lslr z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_w0_s16_z_untied, svint16_t, uint16_t, ++ z0 = svlsl_n_s16_z (p0, z1, x0), ++ z0 = svlsl_z (p0, z1, x0)) ++ ++/* ++** lsl_1_s16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** lsl z0\.h, p0/m, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_1_s16_z_tied1, svint16_t, ++ z0 = svlsl_n_s16_z (p0, z0, 1), ++ z0 = svlsl_z (p0, z0, 1)) ++ ++/* ++** lsl_1_s16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** lsl z0\.h, p0/m, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_1_s16_z_untied, svint16_t, ++ z0 = svlsl_n_s16_z (p0, z1, 1), ++ z0 = svlsl_z (p0, z1, 1)) ++ ++/* ++** lsl_15_s16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** lsl z0\.h, p0/m, z0\.h, #15 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_15_s16_z_tied1, svint16_t, ++ z0 = svlsl_n_s16_z (p0, z0, 15), ++ z0 = svlsl_z (p0, z0, 15)) ++ ++/* ++** lsl_15_s16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** lsl z0\.h, p0/m, z0\.h, #15 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_15_s16_z_untied, svint16_t, ++ z0 = svlsl_n_s16_z (p0, z1, 15), ++ z0 = svlsl_z (p0, z1, 15)) ++ ++/* ++** lsl_16_s16_z_tied1: ++** mov (z[0-9]+\.h), #16 ++** movprfx z0\.h, p0/z, z0\.h ++** lsl z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_16_s16_z_tied1, svint16_t, ++ z0 = svlsl_n_s16_z (p0, z0, 16), ++ z0 = svlsl_z (p0, z0, 16)) ++ ++/* ++** lsl_16_s16_z_untied: ++** mov (z[0-9]+\.h), #16 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** lsl z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** lslr z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_16_s16_z_untied, svint16_t, ++ z0 = svlsl_n_s16_z (p0, z1, 16), ++ z0 = svlsl_z (p0, z1, 16)) ++ ++/* ++** lsl_s16_x_tied1: ++** lsl z0\.h, p0/m, z0\.h, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (lsl_s16_x_tied1, svint16_t, svuint16_t, ++ z0 = svlsl_s16_x (p0, z0, z4), ++ z0 = svlsl_x (p0, z0, z4)) ++ ++/* ++** lsl_s16_x_tied2: ++** lslr z0\.h, p0/m, z0\.h, z4\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (lsl_s16_x_tied2, svint16_t, svuint16_t, ++ z0_res = svlsl_s16_x (p0, z4, z0), ++ z0_res = svlsl_x (p0, z4, z0)) ++ ++/* ++** lsl_s16_x_untied: ++** ( ++** movprfx z0, z1 ++** lsl z0\.h, p0/m, z0\.h, z4\.h ++** | ++** movprfx z0, z4 ++** lslr z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_DUAL_Z (lsl_s16_x_untied, svint16_t, svuint16_t, ++ z0 = svlsl_s16_x (p0, z1, z4), ++ z0 = svlsl_x (p0, z1, z4)) ++ ++/* ++** lsl_w0_s16_x_tied1: ++** mov (z[0-9]+\.h), w0 ++** lsl z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_w0_s16_x_tied1, svint16_t, uint16_t, ++ z0 = svlsl_n_s16_x (p0, z0, x0), ++ z0 = svlsl_x (p0, z0, x0)) ++ ++/* ++** lsl_w0_s16_x_untied: ++** mov z0\.h, w0 ++** lslr z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_w0_s16_x_untied, svint16_t, uint16_t, ++ z0 = svlsl_n_s16_x (p0, z1, x0), ++ z0 = svlsl_x (p0, z1, x0)) ++ ++/* ++** lsl_1_s16_x_tied1: ++** lsl z0\.h, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_1_s16_x_tied1, svint16_t, ++ z0 = svlsl_n_s16_x (p0, z0, 1), ++ z0 = svlsl_x (p0, z0, 1)) ++ ++/* ++** lsl_1_s16_x_untied: ++** lsl z0\.h, z1\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_1_s16_x_untied, svint16_t, ++ z0 = svlsl_n_s16_x (p0, z1, 1), ++ z0 = svlsl_x (p0, z1, 1)) ++ ++/* ++** lsl_15_s16_x_tied1: ++** lsl z0\.h, z0\.h, #15 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_15_s16_x_tied1, svint16_t, ++ z0 = svlsl_n_s16_x (p0, z0, 15), ++ z0 = svlsl_x (p0, z0, 15)) ++ ++/* ++** lsl_15_s16_x_untied: ++** lsl z0\.h, z1\.h, #15 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_15_s16_x_untied, svint16_t, ++ z0 = svlsl_n_s16_x (p0, z1, 15), ++ z0 = svlsl_x (p0, z1, 15)) ++ ++/* ++** lsl_16_s16_x_tied1: ++** mov (z[0-9]+\.h), #16 ++** lsl z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_16_s16_x_tied1, svint16_t, ++ z0 = svlsl_n_s16_x (p0, z0, 16), ++ z0 = svlsl_x (p0, z0, 16)) ++ ++/* ++** lsl_16_s16_x_untied: ++** mov z0\.h, #16 ++** lslr z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_16_s16_x_untied, svint16_t, ++ z0 = svlsl_n_s16_x (p0, z1, 16), ++ z0 = svlsl_x (p0, z1, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_s32.c +new file mode 100644 +index 000000000..f98f1f94b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_s32.c +@@ -0,0 +1,351 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** lsl_s32_m_tied1: ++** lsl z0\.s, p0/m, z0\.s, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (lsl_s32_m_tied1, svint32_t, svuint32_t, ++ z0 = svlsl_s32_m (p0, z0, z4), ++ z0 = svlsl_m (p0, z0, z4)) ++ ++/* ++** lsl_s32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** lsl z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (lsl_s32_m_tied2, svint32_t, svuint32_t, ++ z0_res = svlsl_s32_m (p0, z4, z0), ++ z0_res = svlsl_m (p0, z4, z0)) ++ ++/* ++** lsl_s32_m_untied: ++** movprfx z0, z1 ++** lsl z0\.s, p0/m, z0\.s, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (lsl_s32_m_untied, svint32_t, svuint32_t, ++ z0 = svlsl_s32_m (p0, z1, z4), ++ z0 = svlsl_m (p0, z1, z4)) ++ ++/* ++** lsl_w0_s32_m_tied1: ++** mov (z[0-9]+\.s), w0 ++** lsl z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_w0_s32_m_tied1, svint32_t, uint32_t, ++ z0 = svlsl_n_s32_m (p0, z0, x0), ++ z0 = svlsl_m (p0, z0, x0)) ++ ++/* ++** lsl_w0_s32_m_untied: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0, z1 ++** lsl z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_w0_s32_m_untied, svint32_t, uint32_t, ++ z0 = svlsl_n_s32_m (p0, z1, x0), ++ z0 = svlsl_m (p0, z1, x0)) ++ ++/* ++** lsl_1_s32_m_tied1: ++** lsl z0\.s, p0/m, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_1_s32_m_tied1, svint32_t, ++ z0 = svlsl_n_s32_m (p0, z0, 1), ++ z0 = svlsl_m (p0, z0, 1)) ++ ++/* ++** lsl_1_s32_m_untied: ++** movprfx z0, z1 ++** lsl z0\.s, p0/m, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_1_s32_m_untied, svint32_t, ++ z0 = svlsl_n_s32_m (p0, z1, 1), ++ z0 = svlsl_m (p0, z1, 1)) ++ ++/* ++** lsl_31_s32_m_tied1: ++** lsl z0\.s, p0/m, z0\.s, #31 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_31_s32_m_tied1, svint32_t, ++ z0 = svlsl_n_s32_m (p0, z0, 31), ++ z0 = svlsl_m (p0, z0, 31)) ++ ++/* ++** lsl_31_s32_m_untied: ++** movprfx z0, z1 ++** lsl z0\.s, p0/m, z0\.s, #31 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_31_s32_m_untied, svint32_t, ++ z0 = svlsl_n_s32_m (p0, z1, 31), ++ z0 = svlsl_m (p0, z1, 31)) ++ ++/* ++** lsl_32_s32_m_tied1: ++** mov (z[0-9]+\.s), #32 ++** lsl z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_32_s32_m_tied1, svint32_t, ++ z0 = svlsl_n_s32_m (p0, z0, 32), ++ z0 = svlsl_m (p0, z0, 32)) ++ ++/* ++** lsl_32_s32_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.s), #32 ++** movprfx z0, z1 ++** lsl z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_32_s32_m_untied, svint32_t, ++ z0 = svlsl_n_s32_m (p0, z1, 32), ++ z0 = svlsl_m (p0, z1, 32)) ++ ++/* ++** lsl_s32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** lsl z0\.s, p0/m, z0\.s, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (lsl_s32_z_tied1, svint32_t, svuint32_t, ++ z0 = svlsl_s32_z (p0, z0, z4), ++ z0 = svlsl_z (p0, z0, z4)) ++ ++/* ++** lsl_s32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** lslr z0\.s, p0/m, z0\.s, z4\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (lsl_s32_z_tied2, svint32_t, svuint32_t, ++ z0_res = svlsl_s32_z (p0, z4, z0), ++ z0_res = svlsl_z (p0, z4, z0)) ++ ++/* ++** lsl_s32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** lsl z0\.s, p0/m, z0\.s, z4\.s ++** | ++** movprfx z0\.s, p0/z, z4\.s ++** lslr z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_DUAL_Z (lsl_s32_z_untied, svint32_t, svuint32_t, ++ z0 = svlsl_s32_z (p0, z1, z4), ++ z0 = svlsl_z (p0, z1, z4)) ++ ++/* ++** lsl_w0_s32_z_tied1: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0\.s, p0/z, z0\.s ++** lsl z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_w0_s32_z_tied1, svint32_t, uint32_t, ++ z0 = svlsl_n_s32_z (p0, z0, x0), ++ z0 = svlsl_z (p0, z0, x0)) ++ ++/* ++** lsl_w0_s32_z_untied: ++** mov (z[0-9]+\.s), w0 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** lsl z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** lslr z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_w0_s32_z_untied, svint32_t, uint32_t, ++ z0 = svlsl_n_s32_z (p0, z1, x0), ++ z0 = svlsl_z (p0, z1, x0)) ++ ++/* ++** lsl_1_s32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** lsl z0\.s, p0/m, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_1_s32_z_tied1, svint32_t, ++ z0 = svlsl_n_s32_z (p0, z0, 1), ++ z0 = svlsl_z (p0, z0, 1)) ++ ++/* ++** lsl_1_s32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** lsl z0\.s, p0/m, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_1_s32_z_untied, svint32_t, ++ z0 = svlsl_n_s32_z (p0, z1, 1), ++ z0 = svlsl_z (p0, z1, 1)) ++ ++/* ++** lsl_31_s32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** lsl z0\.s, p0/m, z0\.s, #31 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_31_s32_z_tied1, svint32_t, ++ z0 = svlsl_n_s32_z (p0, z0, 31), ++ z0 = svlsl_z (p0, z0, 31)) ++ ++/* ++** lsl_31_s32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** lsl z0\.s, p0/m, z0\.s, #31 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_31_s32_z_untied, svint32_t, ++ z0 = svlsl_n_s32_z (p0, z1, 31), ++ z0 = svlsl_z (p0, z1, 31)) ++ ++/* ++** lsl_32_s32_z_tied1: ++** mov (z[0-9]+\.s), #32 ++** movprfx z0\.s, p0/z, z0\.s ++** lsl z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_32_s32_z_tied1, svint32_t, ++ z0 = svlsl_n_s32_z (p0, z0, 32), ++ z0 = svlsl_z (p0, z0, 32)) ++ ++/* ++** lsl_32_s32_z_untied: ++** mov (z[0-9]+\.s), #32 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** lsl z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** lslr z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_32_s32_z_untied, svint32_t, ++ z0 = svlsl_n_s32_z (p0, z1, 32), ++ z0 = svlsl_z (p0, z1, 32)) ++ ++/* ++** lsl_s32_x_tied1: ++** lsl z0\.s, p0/m, z0\.s, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (lsl_s32_x_tied1, svint32_t, svuint32_t, ++ z0 = svlsl_s32_x (p0, z0, z4), ++ z0 = svlsl_x (p0, z0, z4)) ++ ++/* ++** lsl_s32_x_tied2: ++** lslr z0\.s, p0/m, z0\.s, z4\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (lsl_s32_x_tied2, svint32_t, svuint32_t, ++ z0_res = svlsl_s32_x (p0, z4, z0), ++ z0_res = svlsl_x (p0, z4, z0)) ++ ++/* ++** lsl_s32_x_untied: ++** ( ++** movprfx z0, z1 ++** lsl z0\.s, p0/m, z0\.s, z4\.s ++** | ++** movprfx z0, z4 ++** lslr z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_DUAL_Z (lsl_s32_x_untied, svint32_t, svuint32_t, ++ z0 = svlsl_s32_x (p0, z1, z4), ++ z0 = svlsl_x (p0, z1, z4)) ++ ++/* ++** lsl_w0_s32_x_tied1: ++** mov (z[0-9]+\.s), w0 ++** lsl z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_w0_s32_x_tied1, svint32_t, uint32_t, ++ z0 = svlsl_n_s32_x (p0, z0, x0), ++ z0 = svlsl_x (p0, z0, x0)) ++ ++/* ++** lsl_w0_s32_x_untied: ++** mov z0\.s, w0 ++** lslr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_w0_s32_x_untied, svint32_t, uint32_t, ++ z0 = svlsl_n_s32_x (p0, z1, x0), ++ z0 = svlsl_x (p0, z1, x0)) ++ ++/* ++** lsl_1_s32_x_tied1: ++** lsl z0\.s, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_1_s32_x_tied1, svint32_t, ++ z0 = svlsl_n_s32_x (p0, z0, 1), ++ z0 = svlsl_x (p0, z0, 1)) ++ ++/* ++** lsl_1_s32_x_untied: ++** lsl z0\.s, z1\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_1_s32_x_untied, svint32_t, ++ z0 = svlsl_n_s32_x (p0, z1, 1), ++ z0 = svlsl_x (p0, z1, 1)) ++ ++/* ++** lsl_31_s32_x_tied1: ++** lsl z0\.s, z0\.s, #31 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_31_s32_x_tied1, svint32_t, ++ z0 = svlsl_n_s32_x (p0, z0, 31), ++ z0 = svlsl_x (p0, z0, 31)) ++ ++/* ++** lsl_31_s32_x_untied: ++** lsl z0\.s, z1\.s, #31 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_31_s32_x_untied, svint32_t, ++ z0 = svlsl_n_s32_x (p0, z1, 31), ++ z0 = svlsl_x (p0, z1, 31)) ++ ++/* ++** lsl_32_s32_x_tied1: ++** mov (z[0-9]+\.s), #32 ++** lsl z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_32_s32_x_tied1, svint32_t, ++ z0 = svlsl_n_s32_x (p0, z0, 32), ++ z0 = svlsl_x (p0, z0, 32)) ++ ++/* ++** lsl_32_s32_x_untied: ++** mov z0\.s, #32 ++** lslr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_32_s32_x_untied, svint32_t, ++ z0 = svlsl_n_s32_x (p0, z1, 32), ++ z0 = svlsl_x (p0, z1, 32)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_s64.c +new file mode 100644 +index 000000000..39753986b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_s64.c +@@ -0,0 +1,351 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** lsl_s64_m_tied1: ++** lsl z0\.d, p0/m, z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsl_s64_m_tied1, svint64_t, svuint64_t, ++ z0 = svlsl_s64_m (p0, z0, z4), ++ z0 = svlsl_m (p0, z0, z4)) ++ ++/* ++** lsl_s64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z4 ++** lsl z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (lsl_s64_m_tied2, svint64_t, svuint64_t, ++ z0_res = svlsl_s64_m (p0, z4, z0), ++ z0_res = svlsl_m (p0, z4, z0)) ++ ++/* ++** lsl_s64_m_untied: ++** movprfx z0, z1 ++** lsl z0\.d, p0/m, z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsl_s64_m_untied, svint64_t, svuint64_t, ++ z0 = svlsl_s64_m (p0, z1, z4), ++ z0 = svlsl_m (p0, z1, z4)) ++ ++/* ++** lsl_x0_s64_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** lsl z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_x0_s64_m_tied1, svint64_t, uint64_t, ++ z0 = svlsl_n_s64_m (p0, z0, x0), ++ z0 = svlsl_m (p0, z0, x0)) ++ ++/* ++** lsl_x0_s64_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** lsl z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_x0_s64_m_untied, svint64_t, uint64_t, ++ z0 = svlsl_n_s64_m (p0, z1, x0), ++ z0 = svlsl_m (p0, z1, x0)) ++ ++/* ++** lsl_1_s64_m_tied1: ++** lsl z0\.d, p0/m, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_1_s64_m_tied1, svint64_t, ++ z0 = svlsl_n_s64_m (p0, z0, 1), ++ z0 = svlsl_m (p0, z0, 1)) ++ ++/* ++** lsl_1_s64_m_untied: ++** movprfx z0, z1 ++** lsl z0\.d, p0/m, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_1_s64_m_untied, svint64_t, ++ z0 = svlsl_n_s64_m (p0, z1, 1), ++ z0 = svlsl_m (p0, z1, 1)) ++ ++/* ++** lsl_63_s64_m_tied1: ++** lsl z0\.d, p0/m, z0\.d, #63 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_63_s64_m_tied1, svint64_t, ++ z0 = svlsl_n_s64_m (p0, z0, 63), ++ z0 = svlsl_m (p0, z0, 63)) ++ ++/* ++** lsl_63_s64_m_untied: ++** movprfx z0, z1 ++** lsl z0\.d, p0/m, z0\.d, #63 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_63_s64_m_untied, svint64_t, ++ z0 = svlsl_n_s64_m (p0, z1, 63), ++ z0 = svlsl_m (p0, z1, 63)) ++ ++/* ++** lsl_64_s64_m_tied1: ++** mov (z[0-9]+\.d), #64 ++** lsl z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_64_s64_m_tied1, svint64_t, ++ z0 = svlsl_n_s64_m (p0, z0, 64), ++ z0 = svlsl_m (p0, z0, 64)) ++ ++/* ++** lsl_64_s64_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), #64 ++** movprfx z0, z1 ++** lsl z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_64_s64_m_untied, svint64_t, ++ z0 = svlsl_n_s64_m (p0, z1, 64), ++ z0 = svlsl_m (p0, z1, 64)) ++ ++/* ++** lsl_s64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** lsl z0\.d, p0/m, z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsl_s64_z_tied1, svint64_t, svuint64_t, ++ z0 = svlsl_s64_z (p0, z0, z4), ++ z0 = svlsl_z (p0, z0, z4)) ++ ++/* ++** lsl_s64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** lslr z0\.d, p0/m, z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z_REV (lsl_s64_z_tied2, svint64_t, svuint64_t, ++ z0_res = svlsl_s64_z (p0, z4, z0), ++ z0_res = svlsl_z (p0, z4, z0)) ++ ++/* ++** lsl_s64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** lsl z0\.d, p0/m, z0\.d, z4\.d ++** | ++** movprfx z0\.d, p0/z, z4\.d ++** lslr z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_DUAL_Z (lsl_s64_z_untied, svint64_t, svuint64_t, ++ z0 = svlsl_s64_z (p0, z1, z4), ++ z0 = svlsl_z (p0, z1, z4)) ++ ++/* ++** lsl_x0_s64_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.d, p0/z, z0\.d ++** lsl z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_x0_s64_z_tied1, svint64_t, uint64_t, ++ z0 = svlsl_n_s64_z (p0, z0, x0), ++ z0 = svlsl_z (p0, z0, x0)) ++ ++/* ++** lsl_x0_s64_z_untied: ++** mov (z[0-9]+\.d), x0 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** lsl z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** lslr z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_x0_s64_z_untied, svint64_t, uint64_t, ++ z0 = svlsl_n_s64_z (p0, z1, x0), ++ z0 = svlsl_z (p0, z1, x0)) ++ ++/* ++** lsl_1_s64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** lsl z0\.d, p0/m, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_1_s64_z_tied1, svint64_t, ++ z0 = svlsl_n_s64_z (p0, z0, 1), ++ z0 = svlsl_z (p0, z0, 1)) ++ ++/* ++** lsl_1_s64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** lsl z0\.d, p0/m, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_1_s64_z_untied, svint64_t, ++ z0 = svlsl_n_s64_z (p0, z1, 1), ++ z0 = svlsl_z (p0, z1, 1)) ++ ++/* ++** lsl_63_s64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** lsl z0\.d, p0/m, z0\.d, #63 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_63_s64_z_tied1, svint64_t, ++ z0 = svlsl_n_s64_z (p0, z0, 63), ++ z0 = svlsl_z (p0, z0, 63)) ++ ++/* ++** lsl_63_s64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** lsl z0\.d, p0/m, z0\.d, #63 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_63_s64_z_untied, svint64_t, ++ z0 = svlsl_n_s64_z (p0, z1, 63), ++ z0 = svlsl_z (p0, z1, 63)) ++ ++/* ++** lsl_64_s64_z_tied1: ++** mov (z[0-9]+\.d), #64 ++** movprfx z0\.d, p0/z, z0\.d ++** lsl z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_64_s64_z_tied1, svint64_t, ++ z0 = svlsl_n_s64_z (p0, z0, 64), ++ z0 = svlsl_z (p0, z0, 64)) ++ ++/* ++** lsl_64_s64_z_untied: ++** mov (z[0-9]+\.d), #64 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** lsl z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** lslr z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_64_s64_z_untied, svint64_t, ++ z0 = svlsl_n_s64_z (p0, z1, 64), ++ z0 = svlsl_z (p0, z1, 64)) ++ ++/* ++** lsl_s64_x_tied1: ++** lsl z0\.d, p0/m, z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsl_s64_x_tied1, svint64_t, svuint64_t, ++ z0 = svlsl_s64_x (p0, z0, z4), ++ z0 = svlsl_x (p0, z0, z4)) ++ ++/* ++** lsl_s64_x_tied2: ++** lslr z0\.d, p0/m, z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z_REV (lsl_s64_x_tied2, svint64_t, svuint64_t, ++ z0_res = svlsl_s64_x (p0, z4, z0), ++ z0_res = svlsl_x (p0, z4, z0)) ++ ++/* ++** lsl_s64_x_untied: ++** ( ++** movprfx z0, z1 ++** lsl z0\.d, p0/m, z0\.d, z4\.d ++** | ++** movprfx z0, z4 ++** lslr z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_DUAL_Z (lsl_s64_x_untied, svint64_t, svuint64_t, ++ z0 = svlsl_s64_x (p0, z1, z4), ++ z0 = svlsl_x (p0, z1, z4)) ++ ++/* ++** lsl_x0_s64_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** lsl z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_x0_s64_x_tied1, svint64_t, uint64_t, ++ z0 = svlsl_n_s64_x (p0, z0, x0), ++ z0 = svlsl_x (p0, z0, x0)) ++ ++/* ++** lsl_x0_s64_x_untied: ++** mov z0\.d, x0 ++** lslr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_x0_s64_x_untied, svint64_t, uint64_t, ++ z0 = svlsl_n_s64_x (p0, z1, x0), ++ z0 = svlsl_x (p0, z1, x0)) ++ ++/* ++** lsl_1_s64_x_tied1: ++** lsl z0\.d, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_1_s64_x_tied1, svint64_t, ++ z0 = svlsl_n_s64_x (p0, z0, 1), ++ z0 = svlsl_x (p0, z0, 1)) ++ ++/* ++** lsl_1_s64_x_untied: ++** lsl z0\.d, z1\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_1_s64_x_untied, svint64_t, ++ z0 = svlsl_n_s64_x (p0, z1, 1), ++ z0 = svlsl_x (p0, z1, 1)) ++ ++/* ++** lsl_63_s64_x_tied1: ++** lsl z0\.d, z0\.d, #63 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_63_s64_x_tied1, svint64_t, ++ z0 = svlsl_n_s64_x (p0, z0, 63), ++ z0 = svlsl_x (p0, z0, 63)) ++ ++/* ++** lsl_63_s64_x_untied: ++** lsl z0\.d, z1\.d, #63 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_63_s64_x_untied, svint64_t, ++ z0 = svlsl_n_s64_x (p0, z1, 63), ++ z0 = svlsl_x (p0, z1, 63)) ++ ++/* ++** lsl_64_s64_x_tied1: ++** mov (z[0-9]+\.d), #64 ++** lsl z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_64_s64_x_tied1, svint64_t, ++ z0 = svlsl_n_s64_x (p0, z0, 64), ++ z0 = svlsl_x (p0, z0, 64)) ++ ++/* ++** lsl_64_s64_x_untied: ++** mov z0\.d, #64 ++** lslr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_64_s64_x_untied, svint64_t, ++ z0 = svlsl_n_s64_x (p0, z1, 64), ++ z0 = svlsl_x (p0, z1, 64)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_s8.c +new file mode 100644 +index 000000000..9a9cc959c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_s8.c +@@ -0,0 +1,351 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** lsl_s8_m_tied1: ++** lsl z0\.b, p0/m, z0\.b, z4\.b ++** ret ++*/ ++TEST_DUAL_Z (lsl_s8_m_tied1, svint8_t, svuint8_t, ++ z0 = svlsl_s8_m (p0, z0, z4), ++ z0 = svlsl_m (p0, z0, z4)) ++ ++/* ++** lsl_s8_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** lsl z0\.b, p0/m, z0\.b, \1\.b ++** ret ++*/ ++TEST_DUAL_Z_REV (lsl_s8_m_tied2, svint8_t, svuint8_t, ++ z0_res = svlsl_s8_m (p0, z4, z0), ++ z0_res = svlsl_m (p0, z4, z0)) ++ ++/* ++** lsl_s8_m_untied: ++** movprfx z0, z1 ++** lsl z0\.b, p0/m, z0\.b, z4\.b ++** ret ++*/ ++TEST_DUAL_Z (lsl_s8_m_untied, svint8_t, svuint8_t, ++ z0 = svlsl_s8_m (p0, z1, z4), ++ z0 = svlsl_m (p0, z1, z4)) ++ ++/* ++** lsl_w0_s8_m_tied1: ++** mov (z[0-9]+\.b), w0 ++** lsl z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_w0_s8_m_tied1, svint8_t, uint8_t, ++ z0 = svlsl_n_s8_m (p0, z0, x0), ++ z0 = svlsl_m (p0, z0, x0)) ++ ++/* ++** lsl_w0_s8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), w0 ++** movprfx z0, z1 ++** lsl z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_w0_s8_m_untied, svint8_t, uint8_t, ++ z0 = svlsl_n_s8_m (p0, z1, x0), ++ z0 = svlsl_m (p0, z1, x0)) ++ ++/* ++** lsl_1_s8_m_tied1: ++** lsl z0\.b, p0/m, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_1_s8_m_tied1, svint8_t, ++ z0 = svlsl_n_s8_m (p0, z0, 1), ++ z0 = svlsl_m (p0, z0, 1)) ++ ++/* ++** lsl_1_s8_m_untied: ++** movprfx z0, z1 ++** lsl z0\.b, p0/m, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_1_s8_m_untied, svint8_t, ++ z0 = svlsl_n_s8_m (p0, z1, 1), ++ z0 = svlsl_m (p0, z1, 1)) ++ ++/* ++** lsl_7_s8_m_tied1: ++** lsl z0\.b, p0/m, z0\.b, #7 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_7_s8_m_tied1, svint8_t, ++ z0 = svlsl_n_s8_m (p0, z0, 7), ++ z0 = svlsl_m (p0, z0, 7)) ++ ++/* ++** lsl_7_s8_m_untied: ++** movprfx z0, z1 ++** lsl z0\.b, p0/m, z0\.b, #7 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_7_s8_m_untied, svint8_t, ++ z0 = svlsl_n_s8_m (p0, z1, 7), ++ z0 = svlsl_m (p0, z1, 7)) ++ ++/* ++** lsl_8_s8_m_tied1: ++** mov (z[0-9]+\.b), #8 ++** lsl z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_8_s8_m_tied1, svint8_t, ++ z0 = svlsl_n_s8_m (p0, z0, 8), ++ z0 = svlsl_m (p0, z0, 8)) ++ ++/* ++** lsl_8_s8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), #8 ++** movprfx z0, z1 ++** lsl z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_8_s8_m_untied, svint8_t, ++ z0 = svlsl_n_s8_m (p0, z1, 8), ++ z0 = svlsl_m (p0, z1, 8)) ++ ++/* ++** lsl_s8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** lsl z0\.b, p0/m, z0\.b, z4\.b ++** ret ++*/ ++TEST_DUAL_Z (lsl_s8_z_tied1, svint8_t, svuint8_t, ++ z0 = svlsl_s8_z (p0, z0, z4), ++ z0 = svlsl_z (p0, z0, z4)) ++ ++/* ++** lsl_s8_z_tied2: ++** movprfx z0\.b, p0/z, z0\.b ++** lslr z0\.b, p0/m, z0\.b, z4\.b ++** ret ++*/ ++TEST_DUAL_Z_REV (lsl_s8_z_tied2, svint8_t, svuint8_t, ++ z0_res = svlsl_s8_z (p0, z4, z0), ++ z0_res = svlsl_z (p0, z4, z0)) ++ ++/* ++** lsl_s8_z_untied: ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** lsl z0\.b, p0/m, z0\.b, z4\.b ++** | ++** movprfx z0\.b, p0/z, z4\.b ++** lslr z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_DUAL_Z (lsl_s8_z_untied, svint8_t, svuint8_t, ++ z0 = svlsl_s8_z (p0, z1, z4), ++ z0 = svlsl_z (p0, z1, z4)) ++ ++/* ++** lsl_w0_s8_z_tied1: ++** mov (z[0-9]+\.b), w0 ++** movprfx z0\.b, p0/z, z0\.b ++** lsl z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_w0_s8_z_tied1, svint8_t, uint8_t, ++ z0 = svlsl_n_s8_z (p0, z0, x0), ++ z0 = svlsl_z (p0, z0, x0)) ++ ++/* ++** lsl_w0_s8_z_untied: ++** mov (z[0-9]+\.b), w0 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** lsl z0\.b, p0/m, z0\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** lslr z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_w0_s8_z_untied, svint8_t, uint8_t, ++ z0 = svlsl_n_s8_z (p0, z1, x0), ++ z0 = svlsl_z (p0, z1, x0)) ++ ++/* ++** lsl_1_s8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** lsl z0\.b, p0/m, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_1_s8_z_tied1, svint8_t, ++ z0 = svlsl_n_s8_z (p0, z0, 1), ++ z0 = svlsl_z (p0, z0, 1)) ++ ++/* ++** lsl_1_s8_z_untied: ++** movprfx z0\.b, p0/z, z1\.b ++** lsl z0\.b, p0/m, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_1_s8_z_untied, svint8_t, ++ z0 = svlsl_n_s8_z (p0, z1, 1), ++ z0 = svlsl_z (p0, z1, 1)) ++ ++/* ++** lsl_7_s8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** lsl z0\.b, p0/m, z0\.b, #7 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_7_s8_z_tied1, svint8_t, ++ z0 = svlsl_n_s8_z (p0, z0, 7), ++ z0 = svlsl_z (p0, z0, 7)) ++ ++/* ++** lsl_7_s8_z_untied: ++** movprfx z0\.b, p0/z, z1\.b ++** lsl z0\.b, p0/m, z0\.b, #7 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_7_s8_z_untied, svint8_t, ++ z0 = svlsl_n_s8_z (p0, z1, 7), ++ z0 = svlsl_z (p0, z1, 7)) ++ ++/* ++** lsl_8_s8_z_tied1: ++** mov (z[0-9]+\.b), #8 ++** movprfx z0\.b, p0/z, z0\.b ++** lsl z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_8_s8_z_tied1, svint8_t, ++ z0 = svlsl_n_s8_z (p0, z0, 8), ++ z0 = svlsl_z (p0, z0, 8)) ++ ++/* ++** lsl_8_s8_z_untied: ++** mov (z[0-9]+\.b), #8 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** lsl z0\.b, p0/m, z0\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** lslr z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_8_s8_z_untied, svint8_t, ++ z0 = svlsl_n_s8_z (p0, z1, 8), ++ z0 = svlsl_z (p0, z1, 8)) ++ ++/* ++** lsl_s8_x_tied1: ++** lsl z0\.b, p0/m, z0\.b, z4\.b ++** ret ++*/ ++TEST_DUAL_Z (lsl_s8_x_tied1, svint8_t, svuint8_t, ++ z0 = svlsl_s8_x (p0, z0, z4), ++ z0 = svlsl_x (p0, z0, z4)) ++ ++/* ++** lsl_s8_x_tied2: ++** lslr z0\.b, p0/m, z0\.b, z4\.b ++** ret ++*/ ++TEST_DUAL_Z_REV (lsl_s8_x_tied2, svint8_t, svuint8_t, ++ z0_res = svlsl_s8_x (p0, z4, z0), ++ z0_res = svlsl_x (p0, z4, z0)) ++ ++/* ++** lsl_s8_x_untied: ++** ( ++** movprfx z0, z1 ++** lsl z0\.b, p0/m, z0\.b, z4\.b ++** | ++** movprfx z0, z4 ++** lslr z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_DUAL_Z (lsl_s8_x_untied, svint8_t, svuint8_t, ++ z0 = svlsl_s8_x (p0, z1, z4), ++ z0 = svlsl_x (p0, z1, z4)) ++ ++/* ++** lsl_w0_s8_x_tied1: ++** mov (z[0-9]+\.b), w0 ++** lsl z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_w0_s8_x_tied1, svint8_t, uint8_t, ++ z0 = svlsl_n_s8_x (p0, z0, x0), ++ z0 = svlsl_x (p0, z0, x0)) ++ ++/* ++** lsl_w0_s8_x_untied: ++** mov z0\.b, w0 ++** lslr z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_w0_s8_x_untied, svint8_t, uint8_t, ++ z0 = svlsl_n_s8_x (p0, z1, x0), ++ z0 = svlsl_x (p0, z1, x0)) ++ ++/* ++** lsl_1_s8_x_tied1: ++** lsl z0\.b, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_1_s8_x_tied1, svint8_t, ++ z0 = svlsl_n_s8_x (p0, z0, 1), ++ z0 = svlsl_x (p0, z0, 1)) ++ ++/* ++** lsl_1_s8_x_untied: ++** lsl z0\.b, z1\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_1_s8_x_untied, svint8_t, ++ z0 = svlsl_n_s8_x (p0, z1, 1), ++ z0 = svlsl_x (p0, z1, 1)) ++ ++/* ++** lsl_7_s8_x_tied1: ++** lsl z0\.b, z0\.b, #7 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_7_s8_x_tied1, svint8_t, ++ z0 = svlsl_n_s8_x (p0, z0, 7), ++ z0 = svlsl_x (p0, z0, 7)) ++ ++/* ++** lsl_7_s8_x_untied: ++** lsl z0\.b, z1\.b, #7 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_7_s8_x_untied, svint8_t, ++ z0 = svlsl_n_s8_x (p0, z1, 7), ++ z0 = svlsl_x (p0, z1, 7)) ++ ++/* ++** lsl_8_s8_x_tied1: ++** mov (z[0-9]+\.b), #8 ++** lsl z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_8_s8_x_tied1, svint8_t, ++ z0 = svlsl_n_s8_x (p0, z0, 8), ++ z0 = svlsl_x (p0, z0, 8)) ++ ++/* ++** lsl_8_s8_x_untied: ++** mov z0\.b, #8 ++** lslr z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_8_s8_x_untied, svint8_t, ++ z0 = svlsl_n_s8_x (p0, z1, 8), ++ z0 = svlsl_x (p0, z1, 8)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_u16.c +new file mode 100644 +index 000000000..57db0fda6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_u16.c +@@ -0,0 +1,351 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** lsl_u16_m_tied1: ++** lsl z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_u16_m_tied1, svuint16_t, ++ z0 = svlsl_u16_m (p0, z0, z1), ++ z0 = svlsl_m (p0, z0, z1)) ++ ++/* ++** lsl_u16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** lsl z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_u16_m_tied2, svuint16_t, ++ z0 = svlsl_u16_m (p0, z1, z0), ++ z0 = svlsl_m (p0, z1, z0)) ++ ++/* ++** lsl_u16_m_untied: ++** movprfx z0, z1 ++** lsl z0\.h, p0/m, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_u16_m_untied, svuint16_t, ++ z0 = svlsl_u16_m (p0, z1, z2), ++ z0 = svlsl_m (p0, z1, z2)) ++ ++/* ++** lsl_w0_u16_m_tied1: ++** mov (z[0-9]+\.h), w0 ++** lsl z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_w0_u16_m_tied1, svuint16_t, uint16_t, ++ z0 = svlsl_n_u16_m (p0, z0, x0), ++ z0 = svlsl_m (p0, z0, x0)) ++ ++/* ++** lsl_w0_u16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), w0 ++** movprfx z0, z1 ++** lsl z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_w0_u16_m_untied, svuint16_t, uint16_t, ++ z0 = svlsl_n_u16_m (p0, z1, x0), ++ z0 = svlsl_m (p0, z1, x0)) ++ ++/* ++** lsl_1_u16_m_tied1: ++** lsl z0\.h, p0/m, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_1_u16_m_tied1, svuint16_t, ++ z0 = svlsl_n_u16_m (p0, z0, 1), ++ z0 = svlsl_m (p0, z0, 1)) ++ ++/* ++** lsl_1_u16_m_untied: ++** movprfx z0, z1 ++** lsl z0\.h, p0/m, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_1_u16_m_untied, svuint16_t, ++ z0 = svlsl_n_u16_m (p0, z1, 1), ++ z0 = svlsl_m (p0, z1, 1)) ++ ++/* ++** lsl_15_u16_m_tied1: ++** lsl z0\.h, p0/m, z0\.h, #15 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_15_u16_m_tied1, svuint16_t, ++ z0 = svlsl_n_u16_m (p0, z0, 15), ++ z0 = svlsl_m (p0, z0, 15)) ++ ++/* ++** lsl_15_u16_m_untied: ++** movprfx z0, z1 ++** lsl z0\.h, p0/m, z0\.h, #15 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_15_u16_m_untied, svuint16_t, ++ z0 = svlsl_n_u16_m (p0, z1, 15), ++ z0 = svlsl_m (p0, z1, 15)) ++ ++/* ++** lsl_16_u16_m_tied1: ++** mov (z[0-9]+\.h), #16 ++** lsl z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_16_u16_m_tied1, svuint16_t, ++ z0 = svlsl_n_u16_m (p0, z0, 16), ++ z0 = svlsl_m (p0, z0, 16)) ++ ++/* ++** lsl_16_u16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), #16 ++** movprfx z0, z1 ++** lsl z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_16_u16_m_untied, svuint16_t, ++ z0 = svlsl_n_u16_m (p0, z1, 16), ++ z0 = svlsl_m (p0, z1, 16)) ++ ++/* ++** lsl_u16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** lsl z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_u16_z_tied1, svuint16_t, ++ z0 = svlsl_u16_z (p0, z0, z1), ++ z0 = svlsl_z (p0, z0, z1)) ++ ++/* ++** lsl_u16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** lslr z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_u16_z_tied2, svuint16_t, ++ z0 = svlsl_u16_z (p0, z1, z0), ++ z0 = svlsl_z (p0, z1, z0)) ++ ++/* ++** lsl_u16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** lsl z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** lslr z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_u16_z_untied, svuint16_t, ++ z0 = svlsl_u16_z (p0, z1, z2), ++ z0 = svlsl_z (p0, z1, z2)) ++ ++/* ++** lsl_w0_u16_z_tied1: ++** mov (z[0-9]+\.h), w0 ++** movprfx z0\.h, p0/z, z0\.h ++** lsl z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_w0_u16_z_tied1, svuint16_t, uint16_t, ++ z0 = svlsl_n_u16_z (p0, z0, x0), ++ z0 = svlsl_z (p0, z0, x0)) ++ ++/* ++** lsl_w0_u16_z_untied: ++** mov (z[0-9]+\.h), w0 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** lsl z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** lslr z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_w0_u16_z_untied, svuint16_t, uint16_t, ++ z0 = svlsl_n_u16_z (p0, z1, x0), ++ z0 = svlsl_z (p0, z1, x0)) ++ ++/* ++** lsl_1_u16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** lsl z0\.h, p0/m, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_1_u16_z_tied1, svuint16_t, ++ z0 = svlsl_n_u16_z (p0, z0, 1), ++ z0 = svlsl_z (p0, z0, 1)) ++ ++/* ++** lsl_1_u16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** lsl z0\.h, p0/m, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_1_u16_z_untied, svuint16_t, ++ z0 = svlsl_n_u16_z (p0, z1, 1), ++ z0 = svlsl_z (p0, z1, 1)) ++ ++/* ++** lsl_15_u16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** lsl z0\.h, p0/m, z0\.h, #15 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_15_u16_z_tied1, svuint16_t, ++ z0 = svlsl_n_u16_z (p0, z0, 15), ++ z0 = svlsl_z (p0, z0, 15)) ++ ++/* ++** lsl_15_u16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** lsl z0\.h, p0/m, z0\.h, #15 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_15_u16_z_untied, svuint16_t, ++ z0 = svlsl_n_u16_z (p0, z1, 15), ++ z0 = svlsl_z (p0, z1, 15)) ++ ++/* ++** lsl_16_u16_z_tied1: ++** mov (z[0-9]+\.h), #16 ++** movprfx z0\.h, p0/z, z0\.h ++** lsl z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_16_u16_z_tied1, svuint16_t, ++ z0 = svlsl_n_u16_z (p0, z0, 16), ++ z0 = svlsl_z (p0, z0, 16)) ++ ++/* ++** lsl_16_u16_z_untied: ++** mov (z[0-9]+\.h), #16 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** lsl z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** lslr z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_16_u16_z_untied, svuint16_t, ++ z0 = svlsl_n_u16_z (p0, z1, 16), ++ z0 = svlsl_z (p0, z1, 16)) ++ ++/* ++** lsl_u16_x_tied1: ++** lsl z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_u16_x_tied1, svuint16_t, ++ z0 = svlsl_u16_x (p0, z0, z1), ++ z0 = svlsl_x (p0, z0, z1)) ++ ++/* ++** lsl_u16_x_tied2: ++** lslr z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_u16_x_tied2, svuint16_t, ++ z0 = svlsl_u16_x (p0, z1, z0), ++ z0 = svlsl_x (p0, z1, z0)) ++ ++/* ++** lsl_u16_x_untied: ++** ( ++** movprfx z0, z1 ++** lsl z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0, z2 ++** lslr z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_u16_x_untied, svuint16_t, ++ z0 = svlsl_u16_x (p0, z1, z2), ++ z0 = svlsl_x (p0, z1, z2)) ++ ++/* ++** lsl_w0_u16_x_tied1: ++** mov (z[0-9]+\.h), w0 ++** lsl z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_w0_u16_x_tied1, svuint16_t, uint16_t, ++ z0 = svlsl_n_u16_x (p0, z0, x0), ++ z0 = svlsl_x (p0, z0, x0)) ++ ++/* ++** lsl_w0_u16_x_untied: ++** mov z0\.h, w0 ++** lslr z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_w0_u16_x_untied, svuint16_t, uint16_t, ++ z0 = svlsl_n_u16_x (p0, z1, x0), ++ z0 = svlsl_x (p0, z1, x0)) ++ ++/* ++** lsl_1_u16_x_tied1: ++** lsl z0\.h, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_1_u16_x_tied1, svuint16_t, ++ z0 = svlsl_n_u16_x (p0, z0, 1), ++ z0 = svlsl_x (p0, z0, 1)) ++ ++/* ++** lsl_1_u16_x_untied: ++** lsl z0\.h, z1\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_1_u16_x_untied, svuint16_t, ++ z0 = svlsl_n_u16_x (p0, z1, 1), ++ z0 = svlsl_x (p0, z1, 1)) ++ ++/* ++** lsl_15_u16_x_tied1: ++** lsl z0\.h, z0\.h, #15 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_15_u16_x_tied1, svuint16_t, ++ z0 = svlsl_n_u16_x (p0, z0, 15), ++ z0 = svlsl_x (p0, z0, 15)) ++ ++/* ++** lsl_15_u16_x_untied: ++** lsl z0\.h, z1\.h, #15 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_15_u16_x_untied, svuint16_t, ++ z0 = svlsl_n_u16_x (p0, z1, 15), ++ z0 = svlsl_x (p0, z1, 15)) ++ ++/* ++** lsl_16_u16_x_tied1: ++** mov (z[0-9]+\.h), #16 ++** lsl z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_16_u16_x_tied1, svuint16_t, ++ z0 = svlsl_n_u16_x (p0, z0, 16), ++ z0 = svlsl_x (p0, z0, 16)) ++ ++/* ++** lsl_16_u16_x_untied: ++** mov z0\.h, #16 ++** lslr z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_16_u16_x_untied, svuint16_t, ++ z0 = svlsl_n_u16_x (p0, z1, 16), ++ z0 = svlsl_x (p0, z1, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_u32.c +new file mode 100644 +index 000000000..8773f15db +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_u32.c +@@ -0,0 +1,351 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** lsl_u32_m_tied1: ++** lsl z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_u32_m_tied1, svuint32_t, ++ z0 = svlsl_u32_m (p0, z0, z1), ++ z0 = svlsl_m (p0, z0, z1)) ++ ++/* ++** lsl_u32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** lsl z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_u32_m_tied2, svuint32_t, ++ z0 = svlsl_u32_m (p0, z1, z0), ++ z0 = svlsl_m (p0, z1, z0)) ++ ++/* ++** lsl_u32_m_untied: ++** movprfx z0, z1 ++** lsl z0\.s, p0/m, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_u32_m_untied, svuint32_t, ++ z0 = svlsl_u32_m (p0, z1, z2), ++ z0 = svlsl_m (p0, z1, z2)) ++ ++/* ++** lsl_w0_u32_m_tied1: ++** mov (z[0-9]+\.s), w0 ++** lsl z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_w0_u32_m_tied1, svuint32_t, uint32_t, ++ z0 = svlsl_n_u32_m (p0, z0, x0), ++ z0 = svlsl_m (p0, z0, x0)) ++ ++/* ++** lsl_w0_u32_m_untied: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0, z1 ++** lsl z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_w0_u32_m_untied, svuint32_t, uint32_t, ++ z0 = svlsl_n_u32_m (p0, z1, x0), ++ z0 = svlsl_m (p0, z1, x0)) ++ ++/* ++** lsl_1_u32_m_tied1: ++** lsl z0\.s, p0/m, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_1_u32_m_tied1, svuint32_t, ++ z0 = svlsl_n_u32_m (p0, z0, 1), ++ z0 = svlsl_m (p0, z0, 1)) ++ ++/* ++** lsl_1_u32_m_untied: ++** movprfx z0, z1 ++** lsl z0\.s, p0/m, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_1_u32_m_untied, svuint32_t, ++ z0 = svlsl_n_u32_m (p0, z1, 1), ++ z0 = svlsl_m (p0, z1, 1)) ++ ++/* ++** lsl_31_u32_m_tied1: ++** lsl z0\.s, p0/m, z0\.s, #31 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_31_u32_m_tied1, svuint32_t, ++ z0 = svlsl_n_u32_m (p0, z0, 31), ++ z0 = svlsl_m (p0, z0, 31)) ++ ++/* ++** lsl_31_u32_m_untied: ++** movprfx z0, z1 ++** lsl z0\.s, p0/m, z0\.s, #31 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_31_u32_m_untied, svuint32_t, ++ z0 = svlsl_n_u32_m (p0, z1, 31), ++ z0 = svlsl_m (p0, z1, 31)) ++ ++/* ++** lsl_32_u32_m_tied1: ++** mov (z[0-9]+\.s), #32 ++** lsl z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_32_u32_m_tied1, svuint32_t, ++ z0 = svlsl_n_u32_m (p0, z0, 32), ++ z0 = svlsl_m (p0, z0, 32)) ++ ++/* ++** lsl_32_u32_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.s), #32 ++** movprfx z0, z1 ++** lsl z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_32_u32_m_untied, svuint32_t, ++ z0 = svlsl_n_u32_m (p0, z1, 32), ++ z0 = svlsl_m (p0, z1, 32)) ++ ++/* ++** lsl_u32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** lsl z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_u32_z_tied1, svuint32_t, ++ z0 = svlsl_u32_z (p0, z0, z1), ++ z0 = svlsl_z (p0, z0, z1)) ++ ++/* ++** lsl_u32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** lslr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_u32_z_tied2, svuint32_t, ++ z0 = svlsl_u32_z (p0, z1, z0), ++ z0 = svlsl_z (p0, z1, z0)) ++ ++/* ++** lsl_u32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** lsl z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** lslr z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_u32_z_untied, svuint32_t, ++ z0 = svlsl_u32_z (p0, z1, z2), ++ z0 = svlsl_z (p0, z1, z2)) ++ ++/* ++** lsl_w0_u32_z_tied1: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0\.s, p0/z, z0\.s ++** lsl z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_w0_u32_z_tied1, svuint32_t, uint32_t, ++ z0 = svlsl_n_u32_z (p0, z0, x0), ++ z0 = svlsl_z (p0, z0, x0)) ++ ++/* ++** lsl_w0_u32_z_untied: ++** mov (z[0-9]+\.s), w0 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** lsl z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** lslr z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_w0_u32_z_untied, svuint32_t, uint32_t, ++ z0 = svlsl_n_u32_z (p0, z1, x0), ++ z0 = svlsl_z (p0, z1, x0)) ++ ++/* ++** lsl_1_u32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** lsl z0\.s, p0/m, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_1_u32_z_tied1, svuint32_t, ++ z0 = svlsl_n_u32_z (p0, z0, 1), ++ z0 = svlsl_z (p0, z0, 1)) ++ ++/* ++** lsl_1_u32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** lsl z0\.s, p0/m, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_1_u32_z_untied, svuint32_t, ++ z0 = svlsl_n_u32_z (p0, z1, 1), ++ z0 = svlsl_z (p0, z1, 1)) ++ ++/* ++** lsl_31_u32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** lsl z0\.s, p0/m, z0\.s, #31 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_31_u32_z_tied1, svuint32_t, ++ z0 = svlsl_n_u32_z (p0, z0, 31), ++ z0 = svlsl_z (p0, z0, 31)) ++ ++/* ++** lsl_31_u32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** lsl z0\.s, p0/m, z0\.s, #31 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_31_u32_z_untied, svuint32_t, ++ z0 = svlsl_n_u32_z (p0, z1, 31), ++ z0 = svlsl_z (p0, z1, 31)) ++ ++/* ++** lsl_32_u32_z_tied1: ++** mov (z[0-9]+\.s), #32 ++** movprfx z0\.s, p0/z, z0\.s ++** lsl z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_32_u32_z_tied1, svuint32_t, ++ z0 = svlsl_n_u32_z (p0, z0, 32), ++ z0 = svlsl_z (p0, z0, 32)) ++ ++/* ++** lsl_32_u32_z_untied: ++** mov (z[0-9]+\.s), #32 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** lsl z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** lslr z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_32_u32_z_untied, svuint32_t, ++ z0 = svlsl_n_u32_z (p0, z1, 32), ++ z0 = svlsl_z (p0, z1, 32)) ++ ++/* ++** lsl_u32_x_tied1: ++** lsl z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_u32_x_tied1, svuint32_t, ++ z0 = svlsl_u32_x (p0, z0, z1), ++ z0 = svlsl_x (p0, z0, z1)) ++ ++/* ++** lsl_u32_x_tied2: ++** lslr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_u32_x_tied2, svuint32_t, ++ z0 = svlsl_u32_x (p0, z1, z0), ++ z0 = svlsl_x (p0, z1, z0)) ++ ++/* ++** lsl_u32_x_untied: ++** ( ++** movprfx z0, z1 ++** lsl z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0, z2 ++** lslr z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_u32_x_untied, svuint32_t, ++ z0 = svlsl_u32_x (p0, z1, z2), ++ z0 = svlsl_x (p0, z1, z2)) ++ ++/* ++** lsl_w0_u32_x_tied1: ++** mov (z[0-9]+\.s), w0 ++** lsl z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_w0_u32_x_tied1, svuint32_t, uint32_t, ++ z0 = svlsl_n_u32_x (p0, z0, x0), ++ z0 = svlsl_x (p0, z0, x0)) ++ ++/* ++** lsl_w0_u32_x_untied: ++** mov z0\.s, w0 ++** lslr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_w0_u32_x_untied, svuint32_t, uint32_t, ++ z0 = svlsl_n_u32_x (p0, z1, x0), ++ z0 = svlsl_x (p0, z1, x0)) ++ ++/* ++** lsl_1_u32_x_tied1: ++** lsl z0\.s, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_1_u32_x_tied1, svuint32_t, ++ z0 = svlsl_n_u32_x (p0, z0, 1), ++ z0 = svlsl_x (p0, z0, 1)) ++ ++/* ++** lsl_1_u32_x_untied: ++** lsl z0\.s, z1\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_1_u32_x_untied, svuint32_t, ++ z0 = svlsl_n_u32_x (p0, z1, 1), ++ z0 = svlsl_x (p0, z1, 1)) ++ ++/* ++** lsl_31_u32_x_tied1: ++** lsl z0\.s, z0\.s, #31 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_31_u32_x_tied1, svuint32_t, ++ z0 = svlsl_n_u32_x (p0, z0, 31), ++ z0 = svlsl_x (p0, z0, 31)) ++ ++/* ++** lsl_31_u32_x_untied: ++** lsl z0\.s, z1\.s, #31 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_31_u32_x_untied, svuint32_t, ++ z0 = svlsl_n_u32_x (p0, z1, 31), ++ z0 = svlsl_x (p0, z1, 31)) ++ ++/* ++** lsl_32_u32_x_tied1: ++** mov (z[0-9]+\.s), #32 ++** lsl z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_32_u32_x_tied1, svuint32_t, ++ z0 = svlsl_n_u32_x (p0, z0, 32), ++ z0 = svlsl_x (p0, z0, 32)) ++ ++/* ++** lsl_32_u32_x_untied: ++** mov z0\.s, #32 ++** lslr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_32_u32_x_untied, svuint32_t, ++ z0 = svlsl_n_u32_x (p0, z1, 32), ++ z0 = svlsl_x (p0, z1, 32)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_u64.c +new file mode 100644 +index 000000000..7b12bd43e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_u64.c +@@ -0,0 +1,351 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** lsl_u64_m_tied1: ++** lsl z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_u64_m_tied1, svuint64_t, ++ z0 = svlsl_u64_m (p0, z0, z1), ++ z0 = svlsl_m (p0, z0, z1)) ++ ++/* ++** lsl_u64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** lsl z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_u64_m_tied2, svuint64_t, ++ z0 = svlsl_u64_m (p0, z1, z0), ++ z0 = svlsl_m (p0, z1, z0)) ++ ++/* ++** lsl_u64_m_untied: ++** movprfx z0, z1 ++** lsl z0\.d, p0/m, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_u64_m_untied, svuint64_t, ++ z0 = svlsl_u64_m (p0, z1, z2), ++ z0 = svlsl_m (p0, z1, z2)) ++ ++/* ++** lsl_x0_u64_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** lsl z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_x0_u64_m_tied1, svuint64_t, uint64_t, ++ z0 = svlsl_n_u64_m (p0, z0, x0), ++ z0 = svlsl_m (p0, z0, x0)) ++ ++/* ++** lsl_x0_u64_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** lsl z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_x0_u64_m_untied, svuint64_t, uint64_t, ++ z0 = svlsl_n_u64_m (p0, z1, x0), ++ z0 = svlsl_m (p0, z1, x0)) ++ ++/* ++** lsl_1_u64_m_tied1: ++** lsl z0\.d, p0/m, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_1_u64_m_tied1, svuint64_t, ++ z0 = svlsl_n_u64_m (p0, z0, 1), ++ z0 = svlsl_m (p0, z0, 1)) ++ ++/* ++** lsl_1_u64_m_untied: ++** movprfx z0, z1 ++** lsl z0\.d, p0/m, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_1_u64_m_untied, svuint64_t, ++ z0 = svlsl_n_u64_m (p0, z1, 1), ++ z0 = svlsl_m (p0, z1, 1)) ++ ++/* ++** lsl_63_u64_m_tied1: ++** lsl z0\.d, p0/m, z0\.d, #63 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_63_u64_m_tied1, svuint64_t, ++ z0 = svlsl_n_u64_m (p0, z0, 63), ++ z0 = svlsl_m (p0, z0, 63)) ++ ++/* ++** lsl_63_u64_m_untied: ++** movprfx z0, z1 ++** lsl z0\.d, p0/m, z0\.d, #63 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_63_u64_m_untied, svuint64_t, ++ z0 = svlsl_n_u64_m (p0, z1, 63), ++ z0 = svlsl_m (p0, z1, 63)) ++ ++/* ++** lsl_64_u64_m_tied1: ++** mov (z[0-9]+\.d), #64 ++** lsl z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_64_u64_m_tied1, svuint64_t, ++ z0 = svlsl_n_u64_m (p0, z0, 64), ++ z0 = svlsl_m (p0, z0, 64)) ++ ++/* ++** lsl_64_u64_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), #64 ++** movprfx z0, z1 ++** lsl z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_64_u64_m_untied, svuint64_t, ++ z0 = svlsl_n_u64_m (p0, z1, 64), ++ z0 = svlsl_m (p0, z1, 64)) ++ ++/* ++** lsl_u64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** lsl z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_u64_z_tied1, svuint64_t, ++ z0 = svlsl_u64_z (p0, z0, z1), ++ z0 = svlsl_z (p0, z0, z1)) ++ ++/* ++** lsl_u64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** lslr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_u64_z_tied2, svuint64_t, ++ z0 = svlsl_u64_z (p0, z1, z0), ++ z0 = svlsl_z (p0, z1, z0)) ++ ++/* ++** lsl_u64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** lsl z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** lslr z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_u64_z_untied, svuint64_t, ++ z0 = svlsl_u64_z (p0, z1, z2), ++ z0 = svlsl_z (p0, z1, z2)) ++ ++/* ++** lsl_x0_u64_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.d, p0/z, z0\.d ++** lsl z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_x0_u64_z_tied1, svuint64_t, uint64_t, ++ z0 = svlsl_n_u64_z (p0, z0, x0), ++ z0 = svlsl_z (p0, z0, x0)) ++ ++/* ++** lsl_x0_u64_z_untied: ++** mov (z[0-9]+\.d), x0 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** lsl z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** lslr z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_x0_u64_z_untied, svuint64_t, uint64_t, ++ z0 = svlsl_n_u64_z (p0, z1, x0), ++ z0 = svlsl_z (p0, z1, x0)) ++ ++/* ++** lsl_1_u64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** lsl z0\.d, p0/m, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_1_u64_z_tied1, svuint64_t, ++ z0 = svlsl_n_u64_z (p0, z0, 1), ++ z0 = svlsl_z (p0, z0, 1)) ++ ++/* ++** lsl_1_u64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** lsl z0\.d, p0/m, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_1_u64_z_untied, svuint64_t, ++ z0 = svlsl_n_u64_z (p0, z1, 1), ++ z0 = svlsl_z (p0, z1, 1)) ++ ++/* ++** lsl_63_u64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** lsl z0\.d, p0/m, z0\.d, #63 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_63_u64_z_tied1, svuint64_t, ++ z0 = svlsl_n_u64_z (p0, z0, 63), ++ z0 = svlsl_z (p0, z0, 63)) ++ ++/* ++** lsl_63_u64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** lsl z0\.d, p0/m, z0\.d, #63 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_63_u64_z_untied, svuint64_t, ++ z0 = svlsl_n_u64_z (p0, z1, 63), ++ z0 = svlsl_z (p0, z1, 63)) ++ ++/* ++** lsl_64_u64_z_tied1: ++** mov (z[0-9]+\.d), #64 ++** movprfx z0\.d, p0/z, z0\.d ++** lsl z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_64_u64_z_tied1, svuint64_t, ++ z0 = svlsl_n_u64_z (p0, z0, 64), ++ z0 = svlsl_z (p0, z0, 64)) ++ ++/* ++** lsl_64_u64_z_untied: ++** mov (z[0-9]+\.d), #64 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** lsl z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** lslr z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_64_u64_z_untied, svuint64_t, ++ z0 = svlsl_n_u64_z (p0, z1, 64), ++ z0 = svlsl_z (p0, z1, 64)) ++ ++/* ++** lsl_u64_x_tied1: ++** lsl z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_u64_x_tied1, svuint64_t, ++ z0 = svlsl_u64_x (p0, z0, z1), ++ z0 = svlsl_x (p0, z0, z1)) ++ ++/* ++** lsl_u64_x_tied2: ++** lslr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_u64_x_tied2, svuint64_t, ++ z0 = svlsl_u64_x (p0, z1, z0), ++ z0 = svlsl_x (p0, z1, z0)) ++ ++/* ++** lsl_u64_x_untied: ++** ( ++** movprfx z0, z1 ++** lsl z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0, z2 ++** lslr z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_u64_x_untied, svuint64_t, ++ z0 = svlsl_u64_x (p0, z1, z2), ++ z0 = svlsl_x (p0, z1, z2)) ++ ++/* ++** lsl_x0_u64_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** lsl z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_x0_u64_x_tied1, svuint64_t, uint64_t, ++ z0 = svlsl_n_u64_x (p0, z0, x0), ++ z0 = svlsl_x (p0, z0, x0)) ++ ++/* ++** lsl_x0_u64_x_untied: ++** mov z0\.d, x0 ++** lslr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_x0_u64_x_untied, svuint64_t, uint64_t, ++ z0 = svlsl_n_u64_x (p0, z1, x0), ++ z0 = svlsl_x (p0, z1, x0)) ++ ++/* ++** lsl_1_u64_x_tied1: ++** lsl z0\.d, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_1_u64_x_tied1, svuint64_t, ++ z0 = svlsl_n_u64_x (p0, z0, 1), ++ z0 = svlsl_x (p0, z0, 1)) ++ ++/* ++** lsl_1_u64_x_untied: ++** lsl z0\.d, z1\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_1_u64_x_untied, svuint64_t, ++ z0 = svlsl_n_u64_x (p0, z1, 1), ++ z0 = svlsl_x (p0, z1, 1)) ++ ++/* ++** lsl_63_u64_x_tied1: ++** lsl z0\.d, z0\.d, #63 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_63_u64_x_tied1, svuint64_t, ++ z0 = svlsl_n_u64_x (p0, z0, 63), ++ z0 = svlsl_x (p0, z0, 63)) ++ ++/* ++** lsl_63_u64_x_untied: ++** lsl z0\.d, z1\.d, #63 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_63_u64_x_untied, svuint64_t, ++ z0 = svlsl_n_u64_x (p0, z1, 63), ++ z0 = svlsl_x (p0, z1, 63)) ++ ++/* ++** lsl_64_u64_x_tied1: ++** mov (z[0-9]+\.d), #64 ++** lsl z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_64_u64_x_tied1, svuint64_t, ++ z0 = svlsl_n_u64_x (p0, z0, 64), ++ z0 = svlsl_x (p0, z0, 64)) ++ ++/* ++** lsl_64_u64_x_untied: ++** mov z0\.d, #64 ++** lslr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_64_u64_x_untied, svuint64_t, ++ z0 = svlsl_n_u64_x (p0, z1, 64), ++ z0 = svlsl_x (p0, z1, 64)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_u8.c +new file mode 100644 +index 000000000..894b55138 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_u8.c +@@ -0,0 +1,351 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** lsl_u8_m_tied1: ++** lsl z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_u8_m_tied1, svuint8_t, ++ z0 = svlsl_u8_m (p0, z0, z1), ++ z0 = svlsl_m (p0, z0, z1)) ++ ++/* ++** lsl_u8_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** lsl z0\.b, p0/m, z0\.b, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_u8_m_tied2, svuint8_t, ++ z0 = svlsl_u8_m (p0, z1, z0), ++ z0 = svlsl_m (p0, z1, z0)) ++ ++/* ++** lsl_u8_m_untied: ++** movprfx z0, z1 ++** lsl z0\.b, p0/m, z0\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_u8_m_untied, svuint8_t, ++ z0 = svlsl_u8_m (p0, z1, z2), ++ z0 = svlsl_m (p0, z1, z2)) ++ ++/* ++** lsl_w0_u8_m_tied1: ++** mov (z[0-9]+\.b), w0 ++** lsl z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_w0_u8_m_tied1, svuint8_t, uint8_t, ++ z0 = svlsl_n_u8_m (p0, z0, x0), ++ z0 = svlsl_m (p0, z0, x0)) ++ ++/* ++** lsl_w0_u8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), w0 ++** movprfx z0, z1 ++** lsl z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_w0_u8_m_untied, svuint8_t, uint8_t, ++ z0 = svlsl_n_u8_m (p0, z1, x0), ++ z0 = svlsl_m (p0, z1, x0)) ++ ++/* ++** lsl_1_u8_m_tied1: ++** lsl z0\.b, p0/m, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_1_u8_m_tied1, svuint8_t, ++ z0 = svlsl_n_u8_m (p0, z0, 1), ++ z0 = svlsl_m (p0, z0, 1)) ++ ++/* ++** lsl_1_u8_m_untied: ++** movprfx z0, z1 ++** lsl z0\.b, p0/m, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_1_u8_m_untied, svuint8_t, ++ z0 = svlsl_n_u8_m (p0, z1, 1), ++ z0 = svlsl_m (p0, z1, 1)) ++ ++/* ++** lsl_7_u8_m_tied1: ++** lsl z0\.b, p0/m, z0\.b, #7 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_7_u8_m_tied1, svuint8_t, ++ z0 = svlsl_n_u8_m (p0, z0, 7), ++ z0 = svlsl_m (p0, z0, 7)) ++ ++/* ++** lsl_7_u8_m_untied: ++** movprfx z0, z1 ++** lsl z0\.b, p0/m, z0\.b, #7 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_7_u8_m_untied, svuint8_t, ++ z0 = svlsl_n_u8_m (p0, z1, 7), ++ z0 = svlsl_m (p0, z1, 7)) ++ ++/* ++** lsl_8_u8_m_tied1: ++** mov (z[0-9]+\.b), #8 ++** lsl z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_8_u8_m_tied1, svuint8_t, ++ z0 = svlsl_n_u8_m (p0, z0, 8), ++ z0 = svlsl_m (p0, z0, 8)) ++ ++/* ++** lsl_8_u8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), #8 ++** movprfx z0, z1 ++** lsl z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_8_u8_m_untied, svuint8_t, ++ z0 = svlsl_n_u8_m (p0, z1, 8), ++ z0 = svlsl_m (p0, z1, 8)) ++ ++/* ++** lsl_u8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** lsl z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_u8_z_tied1, svuint8_t, ++ z0 = svlsl_u8_z (p0, z0, z1), ++ z0 = svlsl_z (p0, z0, z1)) ++ ++/* ++** lsl_u8_z_tied2: ++** movprfx z0\.b, p0/z, z0\.b ++** lslr z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_u8_z_tied2, svuint8_t, ++ z0 = svlsl_u8_z (p0, z1, z0), ++ z0 = svlsl_z (p0, z1, z0)) ++ ++/* ++** lsl_u8_z_untied: ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** lsl z0\.b, p0/m, z0\.b, z2\.b ++** | ++** movprfx z0\.b, p0/z, z2\.b ++** lslr z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_u8_z_untied, svuint8_t, ++ z0 = svlsl_u8_z (p0, z1, z2), ++ z0 = svlsl_z (p0, z1, z2)) ++ ++/* ++** lsl_w0_u8_z_tied1: ++** mov (z[0-9]+\.b), w0 ++** movprfx z0\.b, p0/z, z0\.b ++** lsl z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_w0_u8_z_tied1, svuint8_t, uint8_t, ++ z0 = svlsl_n_u8_z (p0, z0, x0), ++ z0 = svlsl_z (p0, z0, x0)) ++ ++/* ++** lsl_w0_u8_z_untied: ++** mov (z[0-9]+\.b), w0 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** lsl z0\.b, p0/m, z0\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** lslr z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_w0_u8_z_untied, svuint8_t, uint8_t, ++ z0 = svlsl_n_u8_z (p0, z1, x0), ++ z0 = svlsl_z (p0, z1, x0)) ++ ++/* ++** lsl_1_u8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** lsl z0\.b, p0/m, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_1_u8_z_tied1, svuint8_t, ++ z0 = svlsl_n_u8_z (p0, z0, 1), ++ z0 = svlsl_z (p0, z0, 1)) ++ ++/* ++** lsl_1_u8_z_untied: ++** movprfx z0\.b, p0/z, z1\.b ++** lsl z0\.b, p0/m, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_1_u8_z_untied, svuint8_t, ++ z0 = svlsl_n_u8_z (p0, z1, 1), ++ z0 = svlsl_z (p0, z1, 1)) ++ ++/* ++** lsl_7_u8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** lsl z0\.b, p0/m, z0\.b, #7 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_7_u8_z_tied1, svuint8_t, ++ z0 = svlsl_n_u8_z (p0, z0, 7), ++ z0 = svlsl_z (p0, z0, 7)) ++ ++/* ++** lsl_7_u8_z_untied: ++** movprfx z0\.b, p0/z, z1\.b ++** lsl z0\.b, p0/m, z0\.b, #7 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_7_u8_z_untied, svuint8_t, ++ z0 = svlsl_n_u8_z (p0, z1, 7), ++ z0 = svlsl_z (p0, z1, 7)) ++ ++/* ++** lsl_8_u8_z_tied1: ++** mov (z[0-9]+\.b), #8 ++** movprfx z0\.b, p0/z, z0\.b ++** lsl z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_8_u8_z_tied1, svuint8_t, ++ z0 = svlsl_n_u8_z (p0, z0, 8), ++ z0 = svlsl_z (p0, z0, 8)) ++ ++/* ++** lsl_8_u8_z_untied: ++** mov (z[0-9]+\.b), #8 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** lsl z0\.b, p0/m, z0\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** lslr z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_8_u8_z_untied, svuint8_t, ++ z0 = svlsl_n_u8_z (p0, z1, 8), ++ z0 = svlsl_z (p0, z1, 8)) ++ ++/* ++** lsl_u8_x_tied1: ++** lsl z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_u8_x_tied1, svuint8_t, ++ z0 = svlsl_u8_x (p0, z0, z1), ++ z0 = svlsl_x (p0, z0, z1)) ++ ++/* ++** lsl_u8_x_tied2: ++** lslr z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_u8_x_tied2, svuint8_t, ++ z0 = svlsl_u8_x (p0, z1, z0), ++ z0 = svlsl_x (p0, z1, z0)) ++ ++/* ++** lsl_u8_x_untied: ++** ( ++** movprfx z0, z1 ++** lsl z0\.b, p0/m, z0\.b, z2\.b ++** | ++** movprfx z0, z2 ++** lslr z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_u8_x_untied, svuint8_t, ++ z0 = svlsl_u8_x (p0, z1, z2), ++ z0 = svlsl_x (p0, z1, z2)) ++ ++/* ++** lsl_w0_u8_x_tied1: ++** mov (z[0-9]+\.b), w0 ++** lsl z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_w0_u8_x_tied1, svuint8_t, uint8_t, ++ z0 = svlsl_n_u8_x (p0, z0, x0), ++ z0 = svlsl_x (p0, z0, x0)) ++ ++/* ++** lsl_w0_u8_x_untied: ++** mov z0\.b, w0 ++** lslr z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_w0_u8_x_untied, svuint8_t, uint8_t, ++ z0 = svlsl_n_u8_x (p0, z1, x0), ++ z0 = svlsl_x (p0, z1, x0)) ++ ++/* ++** lsl_1_u8_x_tied1: ++** lsl z0\.b, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_1_u8_x_tied1, svuint8_t, ++ z0 = svlsl_n_u8_x (p0, z0, 1), ++ z0 = svlsl_x (p0, z0, 1)) ++ ++/* ++** lsl_1_u8_x_untied: ++** lsl z0\.b, z1\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_1_u8_x_untied, svuint8_t, ++ z0 = svlsl_n_u8_x (p0, z1, 1), ++ z0 = svlsl_x (p0, z1, 1)) ++ ++/* ++** lsl_7_u8_x_tied1: ++** lsl z0\.b, z0\.b, #7 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_7_u8_x_tied1, svuint8_t, ++ z0 = svlsl_n_u8_x (p0, z0, 7), ++ z0 = svlsl_x (p0, z0, 7)) ++ ++/* ++** lsl_7_u8_x_untied: ++** lsl z0\.b, z1\.b, #7 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_7_u8_x_untied, svuint8_t, ++ z0 = svlsl_n_u8_x (p0, z1, 7), ++ z0 = svlsl_x (p0, z1, 7)) ++ ++/* ++** lsl_8_u8_x_tied1: ++** mov (z[0-9]+\.b), #8 ++** lsl z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_8_u8_x_tied1, svuint8_t, ++ z0 = svlsl_n_u8_x (p0, z0, 8), ++ z0 = svlsl_x (p0, z0, 8)) ++ ++/* ++** lsl_8_u8_x_untied: ++** mov z0\.b, #8 ++** lslr z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_8_u8_x_untied, svuint8_t, ++ z0 = svlsl_n_u8_x (p0, z1, 8), ++ z0 = svlsl_x (p0, z1, 8)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_wide_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_wide_s16.c +new file mode 100644 +index 000000000..8d63d3909 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_wide_s16.c +@@ -0,0 +1,331 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** lsl_wide_s16_m_tied1: ++** lsl z0\.h, p0/m, z0\.h, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsl_wide_s16_m_tied1, svint16_t, svuint64_t, ++ z0 = svlsl_wide_s16_m (p0, z0, z4), ++ z0 = svlsl_wide_m (p0, z0, z4)) ++ ++/* ++** lsl_wide_s16_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z4 ++** lsl z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (lsl_wide_s16_m_tied2, svint16_t, svuint64_t, ++ z0_res = svlsl_wide_s16_m (p0, z4, z0), ++ z0_res = svlsl_wide_m (p0, z4, z0)) ++ ++/* ++** lsl_wide_s16_m_untied: ++** movprfx z0, z1 ++** lsl z0\.h, p0/m, z0\.h, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsl_wide_s16_m_untied, svint16_t, svuint64_t, ++ z0 = svlsl_wide_s16_m (p0, z1, z4), ++ z0 = svlsl_wide_m (p0, z1, z4)) ++ ++/* ++** lsl_wide_x0_s16_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** lsl z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_wide_x0_s16_m_tied1, svint16_t, uint64_t, ++ z0 = svlsl_wide_n_s16_m (p0, z0, x0), ++ z0 = svlsl_wide_m (p0, z0, x0)) ++ ++/* ++** lsl_wide_x0_s16_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** lsl z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_wide_x0_s16_m_untied, svint16_t, uint64_t, ++ z0 = svlsl_wide_n_s16_m (p0, z1, x0), ++ z0 = svlsl_wide_m (p0, z1, x0)) ++ ++/* ++** lsl_wide_1_s16_m_tied1: ++** lsl z0\.h, p0/m, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_1_s16_m_tied1, svint16_t, ++ z0 = svlsl_wide_n_s16_m (p0, z0, 1), ++ z0 = svlsl_wide_m (p0, z0, 1)) ++ ++/* ++** lsl_wide_1_s16_m_untied: ++** movprfx z0, z1 ++** lsl z0\.h, p0/m, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_1_s16_m_untied, svint16_t, ++ z0 = svlsl_wide_n_s16_m (p0, z1, 1), ++ z0 = svlsl_wide_m (p0, z1, 1)) ++ ++/* ++** lsl_wide_15_s16_m_tied1: ++** lsl z0\.h, p0/m, z0\.h, #15 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_15_s16_m_tied1, svint16_t, ++ z0 = svlsl_wide_n_s16_m (p0, z0, 15), ++ z0 = svlsl_wide_m (p0, z0, 15)) ++ ++/* ++** lsl_wide_15_s16_m_untied: ++** movprfx z0, z1 ++** lsl z0\.h, p0/m, z0\.h, #15 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_15_s16_m_untied, svint16_t, ++ z0 = svlsl_wide_n_s16_m (p0, z1, 15), ++ z0 = svlsl_wide_m (p0, z1, 15)) ++ ++/* ++** lsl_wide_16_s16_m_tied1: ++** mov (z[0-9]+\.d), #16 ++** lsl z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_16_s16_m_tied1, svint16_t, ++ z0 = svlsl_wide_n_s16_m (p0, z0, 16), ++ z0 = svlsl_wide_m (p0, z0, 16)) ++ ++/* ++** lsl_wide_16_s16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), #16 ++** movprfx z0, z1 ++** lsl z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_16_s16_m_untied, svint16_t, ++ z0 = svlsl_wide_n_s16_m (p0, z1, 16), ++ z0 = svlsl_wide_m (p0, z1, 16)) ++ ++/* ++** lsl_wide_s16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** lsl z0\.h, p0/m, z0\.h, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsl_wide_s16_z_tied1, svint16_t, svuint64_t, ++ z0 = svlsl_wide_s16_z (p0, z0, z4), ++ z0 = svlsl_wide_z (p0, z0, z4)) ++ ++/* ++** lsl_wide_s16_z_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.h, p0/z, z4\.h ++** lsl z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (lsl_wide_s16_z_tied2, svint16_t, svuint64_t, ++ z0_res = svlsl_wide_s16_z (p0, z4, z0), ++ z0_res = svlsl_wide_z (p0, z4, z0)) ++ ++/* ++** lsl_wide_s16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** lsl z0\.h, p0/m, z0\.h, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsl_wide_s16_z_untied, svint16_t, svuint64_t, ++ z0 = svlsl_wide_s16_z (p0, z1, z4), ++ z0 = svlsl_wide_z (p0, z1, z4)) ++ ++/* ++** lsl_wide_x0_s16_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.h, p0/z, z0\.h ++** lsl z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_wide_x0_s16_z_tied1, svint16_t, uint64_t, ++ z0 = svlsl_wide_n_s16_z (p0, z0, x0), ++ z0 = svlsl_wide_z (p0, z0, x0)) ++ ++/* ++** lsl_wide_x0_s16_z_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.h, p0/z, z1\.h ++** lsl z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_wide_x0_s16_z_untied, svint16_t, uint64_t, ++ z0 = svlsl_wide_n_s16_z (p0, z1, x0), ++ z0 = svlsl_wide_z (p0, z1, x0)) ++ ++/* ++** lsl_wide_1_s16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** lsl z0\.h, p0/m, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_1_s16_z_tied1, svint16_t, ++ z0 = svlsl_wide_n_s16_z (p0, z0, 1), ++ z0 = svlsl_wide_z (p0, z0, 1)) ++ ++/* ++** lsl_wide_1_s16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** lsl z0\.h, p0/m, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_1_s16_z_untied, svint16_t, ++ z0 = svlsl_wide_n_s16_z (p0, z1, 1), ++ z0 = svlsl_wide_z (p0, z1, 1)) ++ ++/* ++** lsl_wide_15_s16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** lsl z0\.h, p0/m, z0\.h, #15 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_15_s16_z_tied1, svint16_t, ++ z0 = svlsl_wide_n_s16_z (p0, z0, 15), ++ z0 = svlsl_wide_z (p0, z0, 15)) ++ ++/* ++** lsl_wide_15_s16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** lsl z0\.h, p0/m, z0\.h, #15 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_15_s16_z_untied, svint16_t, ++ z0 = svlsl_wide_n_s16_z (p0, z1, 15), ++ z0 = svlsl_wide_z (p0, z1, 15)) ++ ++/* ++** lsl_wide_16_s16_z_tied1: ++** mov (z[0-9]+\.d), #16 ++** movprfx z0\.h, p0/z, z0\.h ++** lsl z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_16_s16_z_tied1, svint16_t, ++ z0 = svlsl_wide_n_s16_z (p0, z0, 16), ++ z0 = svlsl_wide_z (p0, z0, 16)) ++ ++/* ++** lsl_wide_16_s16_z_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), #16 ++** movprfx z0\.h, p0/z, z1\.h ++** lsl z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_16_s16_z_untied, svint16_t, ++ z0 = svlsl_wide_n_s16_z (p0, z1, 16), ++ z0 = svlsl_wide_z (p0, z1, 16)) ++ ++/* ++** lsl_wide_s16_x_tied1: ++** lsl z0\.h, z0\.h, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsl_wide_s16_x_tied1, svint16_t, svuint64_t, ++ z0 = svlsl_wide_s16_x (p0, z0, z4), ++ z0 = svlsl_wide_x (p0, z0, z4)) ++ ++/* ++** lsl_wide_s16_x_tied2: ++** lsl z0\.h, z4\.h, z0\.d ++** ret ++*/ ++TEST_DUAL_Z_REV (lsl_wide_s16_x_tied2, svint16_t, svuint64_t, ++ z0_res = svlsl_wide_s16_x (p0, z4, z0), ++ z0_res = svlsl_wide_x (p0, z4, z0)) ++ ++/* ++** lsl_wide_s16_x_untied: ++** lsl z0\.h, z1\.h, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsl_wide_s16_x_untied, svint16_t, svuint64_t, ++ z0 = svlsl_wide_s16_x (p0, z1, z4), ++ z0 = svlsl_wide_x (p0, z1, z4)) ++ ++/* ++** lsl_wide_x0_s16_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** lsl z0\.h, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_wide_x0_s16_x_tied1, svint16_t, uint64_t, ++ z0 = svlsl_wide_n_s16_x (p0, z0, x0), ++ z0 = svlsl_wide_x (p0, z0, x0)) ++ ++/* ++** lsl_wide_x0_s16_x_untied: ++** mov (z[0-9]+\.d), x0 ++** lsl z0\.h, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_wide_x0_s16_x_untied, svint16_t, uint64_t, ++ z0 = svlsl_wide_n_s16_x (p0, z1, x0), ++ z0 = svlsl_wide_x (p0, z1, x0)) ++ ++/* ++** lsl_wide_1_s16_x_tied1: ++** lsl z0\.h, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_1_s16_x_tied1, svint16_t, ++ z0 = svlsl_wide_n_s16_x (p0, z0, 1), ++ z0 = svlsl_wide_x (p0, z0, 1)) ++ ++/* ++** lsl_wide_1_s16_x_untied: ++** lsl z0\.h, z1\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_1_s16_x_untied, svint16_t, ++ z0 = svlsl_wide_n_s16_x (p0, z1, 1), ++ z0 = svlsl_wide_x (p0, z1, 1)) ++ ++/* ++** lsl_wide_15_s16_x_tied1: ++** lsl z0\.h, z0\.h, #15 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_15_s16_x_tied1, svint16_t, ++ z0 = svlsl_wide_n_s16_x (p0, z0, 15), ++ z0 = svlsl_wide_x (p0, z0, 15)) ++ ++/* ++** lsl_wide_15_s16_x_untied: ++** lsl z0\.h, z1\.h, #15 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_15_s16_x_untied, svint16_t, ++ z0 = svlsl_wide_n_s16_x (p0, z1, 15), ++ z0 = svlsl_wide_x (p0, z1, 15)) ++ ++/* ++** lsl_wide_16_s16_x_tied1: ++** mov (z[0-9]+\.d), #16 ++** lsl z0\.h, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_16_s16_x_tied1, svint16_t, ++ z0 = svlsl_wide_n_s16_x (p0, z0, 16), ++ z0 = svlsl_wide_x (p0, z0, 16)) ++ ++/* ++** lsl_wide_16_s16_x_untied: ++** mov (z[0-9]+\.d), #16 ++** lsl z0\.h, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_16_s16_x_untied, svint16_t, ++ z0 = svlsl_wide_n_s16_x (p0, z1, 16), ++ z0 = svlsl_wide_x (p0, z1, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_wide_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_wide_s32.c +new file mode 100644 +index 000000000..acd813df3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_wide_s32.c +@@ -0,0 +1,331 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** lsl_wide_s32_m_tied1: ++** lsl z0\.s, p0/m, z0\.s, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsl_wide_s32_m_tied1, svint32_t, svuint64_t, ++ z0 = svlsl_wide_s32_m (p0, z0, z4), ++ z0 = svlsl_wide_m (p0, z0, z4)) ++ ++/* ++** lsl_wide_s32_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z4 ++** lsl z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (lsl_wide_s32_m_tied2, svint32_t, svuint64_t, ++ z0_res = svlsl_wide_s32_m (p0, z4, z0), ++ z0_res = svlsl_wide_m (p0, z4, z0)) ++ ++/* ++** lsl_wide_s32_m_untied: ++** movprfx z0, z1 ++** lsl z0\.s, p0/m, z0\.s, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsl_wide_s32_m_untied, svint32_t, svuint64_t, ++ z0 = svlsl_wide_s32_m (p0, z1, z4), ++ z0 = svlsl_wide_m (p0, z1, z4)) ++ ++/* ++** lsl_wide_x0_s32_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** lsl z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_wide_x0_s32_m_tied1, svint32_t, uint64_t, ++ z0 = svlsl_wide_n_s32_m (p0, z0, x0), ++ z0 = svlsl_wide_m (p0, z0, x0)) ++ ++/* ++** lsl_wide_x0_s32_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** lsl z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_wide_x0_s32_m_untied, svint32_t, uint64_t, ++ z0 = svlsl_wide_n_s32_m (p0, z1, x0), ++ z0 = svlsl_wide_m (p0, z1, x0)) ++ ++/* ++** lsl_wide_1_s32_m_tied1: ++** lsl z0\.s, p0/m, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_1_s32_m_tied1, svint32_t, ++ z0 = svlsl_wide_n_s32_m (p0, z0, 1), ++ z0 = svlsl_wide_m (p0, z0, 1)) ++ ++/* ++** lsl_wide_1_s32_m_untied: ++** movprfx z0, z1 ++** lsl z0\.s, p0/m, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_1_s32_m_untied, svint32_t, ++ z0 = svlsl_wide_n_s32_m (p0, z1, 1), ++ z0 = svlsl_wide_m (p0, z1, 1)) ++ ++/* ++** lsl_wide_31_s32_m_tied1: ++** lsl z0\.s, p0/m, z0\.s, #31 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_31_s32_m_tied1, svint32_t, ++ z0 = svlsl_wide_n_s32_m (p0, z0, 31), ++ z0 = svlsl_wide_m (p0, z0, 31)) ++ ++/* ++** lsl_wide_31_s32_m_untied: ++** movprfx z0, z1 ++** lsl z0\.s, p0/m, z0\.s, #31 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_31_s32_m_untied, svint32_t, ++ z0 = svlsl_wide_n_s32_m (p0, z1, 31), ++ z0 = svlsl_wide_m (p0, z1, 31)) ++ ++/* ++** lsl_wide_32_s32_m_tied1: ++** mov (z[0-9]+\.d), #32 ++** lsl z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_32_s32_m_tied1, svint32_t, ++ z0 = svlsl_wide_n_s32_m (p0, z0, 32), ++ z0 = svlsl_wide_m (p0, z0, 32)) ++ ++/* ++** lsl_wide_32_s32_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), #32 ++** movprfx z0, z1 ++** lsl z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_32_s32_m_untied, svint32_t, ++ z0 = svlsl_wide_n_s32_m (p0, z1, 32), ++ z0 = svlsl_wide_m (p0, z1, 32)) ++ ++/* ++** lsl_wide_s32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** lsl z0\.s, p0/m, z0\.s, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsl_wide_s32_z_tied1, svint32_t, svuint64_t, ++ z0 = svlsl_wide_s32_z (p0, z0, z4), ++ z0 = svlsl_wide_z (p0, z0, z4)) ++ ++/* ++** lsl_wide_s32_z_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.s, p0/z, z4\.s ++** lsl z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (lsl_wide_s32_z_tied2, svint32_t, svuint64_t, ++ z0_res = svlsl_wide_s32_z (p0, z4, z0), ++ z0_res = svlsl_wide_z (p0, z4, z0)) ++ ++/* ++** lsl_wide_s32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** lsl z0\.s, p0/m, z0\.s, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsl_wide_s32_z_untied, svint32_t, svuint64_t, ++ z0 = svlsl_wide_s32_z (p0, z1, z4), ++ z0 = svlsl_wide_z (p0, z1, z4)) ++ ++/* ++** lsl_wide_x0_s32_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.s, p0/z, z0\.s ++** lsl z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_wide_x0_s32_z_tied1, svint32_t, uint64_t, ++ z0 = svlsl_wide_n_s32_z (p0, z0, x0), ++ z0 = svlsl_wide_z (p0, z0, x0)) ++ ++/* ++** lsl_wide_x0_s32_z_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.s, p0/z, z1\.s ++** lsl z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_wide_x0_s32_z_untied, svint32_t, uint64_t, ++ z0 = svlsl_wide_n_s32_z (p0, z1, x0), ++ z0 = svlsl_wide_z (p0, z1, x0)) ++ ++/* ++** lsl_wide_1_s32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** lsl z0\.s, p0/m, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_1_s32_z_tied1, svint32_t, ++ z0 = svlsl_wide_n_s32_z (p0, z0, 1), ++ z0 = svlsl_wide_z (p0, z0, 1)) ++ ++/* ++** lsl_wide_1_s32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** lsl z0\.s, p0/m, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_1_s32_z_untied, svint32_t, ++ z0 = svlsl_wide_n_s32_z (p0, z1, 1), ++ z0 = svlsl_wide_z (p0, z1, 1)) ++ ++/* ++** lsl_wide_31_s32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** lsl z0\.s, p0/m, z0\.s, #31 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_31_s32_z_tied1, svint32_t, ++ z0 = svlsl_wide_n_s32_z (p0, z0, 31), ++ z0 = svlsl_wide_z (p0, z0, 31)) ++ ++/* ++** lsl_wide_31_s32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** lsl z0\.s, p0/m, z0\.s, #31 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_31_s32_z_untied, svint32_t, ++ z0 = svlsl_wide_n_s32_z (p0, z1, 31), ++ z0 = svlsl_wide_z (p0, z1, 31)) ++ ++/* ++** lsl_wide_32_s32_z_tied1: ++** mov (z[0-9]+\.d), #32 ++** movprfx z0\.s, p0/z, z0\.s ++** lsl z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_32_s32_z_tied1, svint32_t, ++ z0 = svlsl_wide_n_s32_z (p0, z0, 32), ++ z0 = svlsl_wide_z (p0, z0, 32)) ++ ++/* ++** lsl_wide_32_s32_z_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), #32 ++** movprfx z0\.s, p0/z, z1\.s ++** lsl z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_32_s32_z_untied, svint32_t, ++ z0 = svlsl_wide_n_s32_z (p0, z1, 32), ++ z0 = svlsl_wide_z (p0, z1, 32)) ++ ++/* ++** lsl_wide_s32_x_tied1: ++** lsl z0\.s, z0\.s, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsl_wide_s32_x_tied1, svint32_t, svuint64_t, ++ z0 = svlsl_wide_s32_x (p0, z0, z4), ++ z0 = svlsl_wide_x (p0, z0, z4)) ++ ++/* ++** lsl_wide_s32_x_tied2: ++** lsl z0\.s, z4\.s, z0\.d ++** ret ++*/ ++TEST_DUAL_Z_REV (lsl_wide_s32_x_tied2, svint32_t, svuint64_t, ++ z0_res = svlsl_wide_s32_x (p0, z4, z0), ++ z0_res = svlsl_wide_x (p0, z4, z0)) ++ ++/* ++** lsl_wide_s32_x_untied: ++** lsl z0\.s, z1\.s, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsl_wide_s32_x_untied, svint32_t, svuint64_t, ++ z0 = svlsl_wide_s32_x (p0, z1, z4), ++ z0 = svlsl_wide_x (p0, z1, z4)) ++ ++/* ++** lsl_wide_x0_s32_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** lsl z0\.s, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_wide_x0_s32_x_tied1, svint32_t, uint64_t, ++ z0 = svlsl_wide_n_s32_x (p0, z0, x0), ++ z0 = svlsl_wide_x (p0, z0, x0)) ++ ++/* ++** lsl_wide_x0_s32_x_untied: ++** mov (z[0-9]+\.d), x0 ++** lsl z0\.s, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_wide_x0_s32_x_untied, svint32_t, uint64_t, ++ z0 = svlsl_wide_n_s32_x (p0, z1, x0), ++ z0 = svlsl_wide_x (p0, z1, x0)) ++ ++/* ++** lsl_wide_1_s32_x_tied1: ++** lsl z0\.s, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_1_s32_x_tied1, svint32_t, ++ z0 = svlsl_wide_n_s32_x (p0, z0, 1), ++ z0 = svlsl_wide_x (p0, z0, 1)) ++ ++/* ++** lsl_wide_1_s32_x_untied: ++** lsl z0\.s, z1\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_1_s32_x_untied, svint32_t, ++ z0 = svlsl_wide_n_s32_x (p0, z1, 1), ++ z0 = svlsl_wide_x (p0, z1, 1)) ++ ++/* ++** lsl_wide_31_s32_x_tied1: ++** lsl z0\.s, z0\.s, #31 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_31_s32_x_tied1, svint32_t, ++ z0 = svlsl_wide_n_s32_x (p0, z0, 31), ++ z0 = svlsl_wide_x (p0, z0, 31)) ++ ++/* ++** lsl_wide_31_s32_x_untied: ++** lsl z0\.s, z1\.s, #31 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_31_s32_x_untied, svint32_t, ++ z0 = svlsl_wide_n_s32_x (p0, z1, 31), ++ z0 = svlsl_wide_x (p0, z1, 31)) ++ ++/* ++** lsl_wide_32_s32_x_tied1: ++** mov (z[0-9]+\.d), #32 ++** lsl z0\.s, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_32_s32_x_tied1, svint32_t, ++ z0 = svlsl_wide_n_s32_x (p0, z0, 32), ++ z0 = svlsl_wide_x (p0, z0, 32)) ++ ++/* ++** lsl_wide_32_s32_x_untied: ++** mov (z[0-9]+\.d), #32 ++** lsl z0\.s, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_32_s32_x_untied, svint32_t, ++ z0 = svlsl_wide_n_s32_x (p0, z1, 32), ++ z0 = svlsl_wide_x (p0, z1, 32)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_wide_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_wide_s8.c +new file mode 100644 +index 000000000..17e8e8685 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_wide_s8.c +@@ -0,0 +1,331 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** lsl_wide_s8_m_tied1: ++** lsl z0\.b, p0/m, z0\.b, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsl_wide_s8_m_tied1, svint8_t, svuint64_t, ++ z0 = svlsl_wide_s8_m (p0, z0, z4), ++ z0 = svlsl_wide_m (p0, z0, z4)) ++ ++/* ++** lsl_wide_s8_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z4 ++** lsl z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (lsl_wide_s8_m_tied2, svint8_t, svuint64_t, ++ z0_res = svlsl_wide_s8_m (p0, z4, z0), ++ z0_res = svlsl_wide_m (p0, z4, z0)) ++ ++/* ++** lsl_wide_s8_m_untied: ++** movprfx z0, z1 ++** lsl z0\.b, p0/m, z0\.b, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsl_wide_s8_m_untied, svint8_t, svuint64_t, ++ z0 = svlsl_wide_s8_m (p0, z1, z4), ++ z0 = svlsl_wide_m (p0, z1, z4)) ++ ++/* ++** lsl_wide_x0_s8_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** lsl z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_wide_x0_s8_m_tied1, svint8_t, uint64_t, ++ z0 = svlsl_wide_n_s8_m (p0, z0, x0), ++ z0 = svlsl_wide_m (p0, z0, x0)) ++ ++/* ++** lsl_wide_x0_s8_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** lsl z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_wide_x0_s8_m_untied, svint8_t, uint64_t, ++ z0 = svlsl_wide_n_s8_m (p0, z1, x0), ++ z0 = svlsl_wide_m (p0, z1, x0)) ++ ++/* ++** lsl_wide_1_s8_m_tied1: ++** lsl z0\.b, p0/m, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_1_s8_m_tied1, svint8_t, ++ z0 = svlsl_wide_n_s8_m (p0, z0, 1), ++ z0 = svlsl_wide_m (p0, z0, 1)) ++ ++/* ++** lsl_wide_1_s8_m_untied: ++** movprfx z0, z1 ++** lsl z0\.b, p0/m, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_1_s8_m_untied, svint8_t, ++ z0 = svlsl_wide_n_s8_m (p0, z1, 1), ++ z0 = svlsl_wide_m (p0, z1, 1)) ++ ++/* ++** lsl_wide_7_s8_m_tied1: ++** lsl z0\.b, p0/m, z0\.b, #7 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_7_s8_m_tied1, svint8_t, ++ z0 = svlsl_wide_n_s8_m (p0, z0, 7), ++ z0 = svlsl_wide_m (p0, z0, 7)) ++ ++/* ++** lsl_wide_7_s8_m_untied: ++** movprfx z0, z1 ++** lsl z0\.b, p0/m, z0\.b, #7 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_7_s8_m_untied, svint8_t, ++ z0 = svlsl_wide_n_s8_m (p0, z1, 7), ++ z0 = svlsl_wide_m (p0, z1, 7)) ++ ++/* ++** lsl_wide_8_s8_m_tied1: ++** mov (z[0-9]+\.d), #8 ++** lsl z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_8_s8_m_tied1, svint8_t, ++ z0 = svlsl_wide_n_s8_m (p0, z0, 8), ++ z0 = svlsl_wide_m (p0, z0, 8)) ++ ++/* ++** lsl_wide_8_s8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), #8 ++** movprfx z0, z1 ++** lsl z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_8_s8_m_untied, svint8_t, ++ z0 = svlsl_wide_n_s8_m (p0, z1, 8), ++ z0 = svlsl_wide_m (p0, z1, 8)) ++ ++/* ++** lsl_wide_s8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** lsl z0\.b, p0/m, z0\.b, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsl_wide_s8_z_tied1, svint8_t, svuint64_t, ++ z0 = svlsl_wide_s8_z (p0, z0, z4), ++ z0 = svlsl_wide_z (p0, z0, z4)) ++ ++/* ++** lsl_wide_s8_z_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.b, p0/z, z4\.b ++** lsl z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (lsl_wide_s8_z_tied2, svint8_t, svuint64_t, ++ z0_res = svlsl_wide_s8_z (p0, z4, z0), ++ z0_res = svlsl_wide_z (p0, z4, z0)) ++ ++/* ++** lsl_wide_s8_z_untied: ++** movprfx z0\.b, p0/z, z1\.b ++** lsl z0\.b, p0/m, z0\.b, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsl_wide_s8_z_untied, svint8_t, svuint64_t, ++ z0 = svlsl_wide_s8_z (p0, z1, z4), ++ z0 = svlsl_wide_z (p0, z1, z4)) ++ ++/* ++** lsl_wide_x0_s8_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.b, p0/z, z0\.b ++** lsl z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_wide_x0_s8_z_tied1, svint8_t, uint64_t, ++ z0 = svlsl_wide_n_s8_z (p0, z0, x0), ++ z0 = svlsl_wide_z (p0, z0, x0)) ++ ++/* ++** lsl_wide_x0_s8_z_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.b, p0/z, z1\.b ++** lsl z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_wide_x0_s8_z_untied, svint8_t, uint64_t, ++ z0 = svlsl_wide_n_s8_z (p0, z1, x0), ++ z0 = svlsl_wide_z (p0, z1, x0)) ++ ++/* ++** lsl_wide_1_s8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** lsl z0\.b, p0/m, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_1_s8_z_tied1, svint8_t, ++ z0 = svlsl_wide_n_s8_z (p0, z0, 1), ++ z0 = svlsl_wide_z (p0, z0, 1)) ++ ++/* ++** lsl_wide_1_s8_z_untied: ++** movprfx z0\.b, p0/z, z1\.b ++** lsl z0\.b, p0/m, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_1_s8_z_untied, svint8_t, ++ z0 = svlsl_wide_n_s8_z (p0, z1, 1), ++ z0 = svlsl_wide_z (p0, z1, 1)) ++ ++/* ++** lsl_wide_7_s8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** lsl z0\.b, p0/m, z0\.b, #7 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_7_s8_z_tied1, svint8_t, ++ z0 = svlsl_wide_n_s8_z (p0, z0, 7), ++ z0 = svlsl_wide_z (p0, z0, 7)) ++ ++/* ++** lsl_wide_7_s8_z_untied: ++** movprfx z0\.b, p0/z, z1\.b ++** lsl z0\.b, p0/m, z0\.b, #7 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_7_s8_z_untied, svint8_t, ++ z0 = svlsl_wide_n_s8_z (p0, z1, 7), ++ z0 = svlsl_wide_z (p0, z1, 7)) ++ ++/* ++** lsl_wide_8_s8_z_tied1: ++** mov (z[0-9]+\.d), #8 ++** movprfx z0\.b, p0/z, z0\.b ++** lsl z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_8_s8_z_tied1, svint8_t, ++ z0 = svlsl_wide_n_s8_z (p0, z0, 8), ++ z0 = svlsl_wide_z (p0, z0, 8)) ++ ++/* ++** lsl_wide_8_s8_z_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), #8 ++** movprfx z0\.b, p0/z, z1\.b ++** lsl z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_8_s8_z_untied, svint8_t, ++ z0 = svlsl_wide_n_s8_z (p0, z1, 8), ++ z0 = svlsl_wide_z (p0, z1, 8)) ++ ++/* ++** lsl_wide_s8_x_tied1: ++** lsl z0\.b, z0\.b, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsl_wide_s8_x_tied1, svint8_t, svuint64_t, ++ z0 = svlsl_wide_s8_x (p0, z0, z4), ++ z0 = svlsl_wide_x (p0, z0, z4)) ++ ++/* ++** lsl_wide_s8_x_tied2: ++** lsl z0\.b, z4\.b, z0\.d ++** ret ++*/ ++TEST_DUAL_Z_REV (lsl_wide_s8_x_tied2, svint8_t, svuint64_t, ++ z0_res = svlsl_wide_s8_x (p0, z4, z0), ++ z0_res = svlsl_wide_x (p0, z4, z0)) ++ ++/* ++** lsl_wide_s8_x_untied: ++** lsl z0\.b, z1\.b, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsl_wide_s8_x_untied, svint8_t, svuint64_t, ++ z0 = svlsl_wide_s8_x (p0, z1, z4), ++ z0 = svlsl_wide_x (p0, z1, z4)) ++ ++/* ++** lsl_wide_x0_s8_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** lsl z0\.b, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_wide_x0_s8_x_tied1, svint8_t, uint64_t, ++ z0 = svlsl_wide_n_s8_x (p0, z0, x0), ++ z0 = svlsl_wide_x (p0, z0, x0)) ++ ++/* ++** lsl_wide_x0_s8_x_untied: ++** mov (z[0-9]+\.d), x0 ++** lsl z0\.b, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_wide_x0_s8_x_untied, svint8_t, uint64_t, ++ z0 = svlsl_wide_n_s8_x (p0, z1, x0), ++ z0 = svlsl_wide_x (p0, z1, x0)) ++ ++/* ++** lsl_wide_1_s8_x_tied1: ++** lsl z0\.b, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_1_s8_x_tied1, svint8_t, ++ z0 = svlsl_wide_n_s8_x (p0, z0, 1), ++ z0 = svlsl_wide_x (p0, z0, 1)) ++ ++/* ++** lsl_wide_1_s8_x_untied: ++** lsl z0\.b, z1\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_1_s8_x_untied, svint8_t, ++ z0 = svlsl_wide_n_s8_x (p0, z1, 1), ++ z0 = svlsl_wide_x (p0, z1, 1)) ++ ++/* ++** lsl_wide_7_s8_x_tied1: ++** lsl z0\.b, z0\.b, #7 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_7_s8_x_tied1, svint8_t, ++ z0 = svlsl_wide_n_s8_x (p0, z0, 7), ++ z0 = svlsl_wide_x (p0, z0, 7)) ++ ++/* ++** lsl_wide_7_s8_x_untied: ++** lsl z0\.b, z1\.b, #7 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_7_s8_x_untied, svint8_t, ++ z0 = svlsl_wide_n_s8_x (p0, z1, 7), ++ z0 = svlsl_wide_x (p0, z1, 7)) ++ ++/* ++** lsl_wide_8_s8_x_tied1: ++** mov (z[0-9]+\.d), #8 ++** lsl z0\.b, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_8_s8_x_tied1, svint8_t, ++ z0 = svlsl_wide_n_s8_x (p0, z0, 8), ++ z0 = svlsl_wide_x (p0, z0, 8)) ++ ++/* ++** lsl_wide_8_s8_x_untied: ++** mov (z[0-9]+\.d), #8 ++** lsl z0\.b, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_8_s8_x_untied, svint8_t, ++ z0 = svlsl_wide_n_s8_x (p0, z1, 8), ++ z0 = svlsl_wide_x (p0, z1, 8)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_wide_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_wide_u16.c +new file mode 100644 +index 000000000..cff24a850 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_wide_u16.c +@@ -0,0 +1,331 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** lsl_wide_u16_m_tied1: ++** lsl z0\.h, p0/m, z0\.h, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsl_wide_u16_m_tied1, svuint16_t, svuint64_t, ++ z0 = svlsl_wide_u16_m (p0, z0, z4), ++ z0 = svlsl_wide_m (p0, z0, z4)) ++ ++/* ++** lsl_wide_u16_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z4 ++** lsl z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (lsl_wide_u16_m_tied2, svuint16_t, svuint64_t, ++ z0_res = svlsl_wide_u16_m (p0, z4, z0), ++ z0_res = svlsl_wide_m (p0, z4, z0)) ++ ++/* ++** lsl_wide_u16_m_untied: ++** movprfx z0, z1 ++** lsl z0\.h, p0/m, z0\.h, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsl_wide_u16_m_untied, svuint16_t, svuint64_t, ++ z0 = svlsl_wide_u16_m (p0, z1, z4), ++ z0 = svlsl_wide_m (p0, z1, z4)) ++ ++/* ++** lsl_wide_x0_u16_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** lsl z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_wide_x0_u16_m_tied1, svuint16_t, uint64_t, ++ z0 = svlsl_wide_n_u16_m (p0, z0, x0), ++ z0 = svlsl_wide_m (p0, z0, x0)) ++ ++/* ++** lsl_wide_x0_u16_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** lsl z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_wide_x0_u16_m_untied, svuint16_t, uint64_t, ++ z0 = svlsl_wide_n_u16_m (p0, z1, x0), ++ z0 = svlsl_wide_m (p0, z1, x0)) ++ ++/* ++** lsl_wide_1_u16_m_tied1: ++** lsl z0\.h, p0/m, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_1_u16_m_tied1, svuint16_t, ++ z0 = svlsl_wide_n_u16_m (p0, z0, 1), ++ z0 = svlsl_wide_m (p0, z0, 1)) ++ ++/* ++** lsl_wide_1_u16_m_untied: ++** movprfx z0, z1 ++** lsl z0\.h, p0/m, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_1_u16_m_untied, svuint16_t, ++ z0 = svlsl_wide_n_u16_m (p0, z1, 1), ++ z0 = svlsl_wide_m (p0, z1, 1)) ++ ++/* ++** lsl_wide_15_u16_m_tied1: ++** lsl z0\.h, p0/m, z0\.h, #15 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_15_u16_m_tied1, svuint16_t, ++ z0 = svlsl_wide_n_u16_m (p0, z0, 15), ++ z0 = svlsl_wide_m (p0, z0, 15)) ++ ++/* ++** lsl_wide_15_u16_m_untied: ++** movprfx z0, z1 ++** lsl z0\.h, p0/m, z0\.h, #15 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_15_u16_m_untied, svuint16_t, ++ z0 = svlsl_wide_n_u16_m (p0, z1, 15), ++ z0 = svlsl_wide_m (p0, z1, 15)) ++ ++/* ++** lsl_wide_16_u16_m_tied1: ++** mov (z[0-9]+\.d), #16 ++** lsl z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_16_u16_m_tied1, svuint16_t, ++ z0 = svlsl_wide_n_u16_m (p0, z0, 16), ++ z0 = svlsl_wide_m (p0, z0, 16)) ++ ++/* ++** lsl_wide_16_u16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), #16 ++** movprfx z0, z1 ++** lsl z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_16_u16_m_untied, svuint16_t, ++ z0 = svlsl_wide_n_u16_m (p0, z1, 16), ++ z0 = svlsl_wide_m (p0, z1, 16)) ++ ++/* ++** lsl_wide_u16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** lsl z0\.h, p0/m, z0\.h, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsl_wide_u16_z_tied1, svuint16_t, svuint64_t, ++ z0 = svlsl_wide_u16_z (p0, z0, z4), ++ z0 = svlsl_wide_z (p0, z0, z4)) ++ ++/* ++** lsl_wide_u16_z_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.h, p0/z, z4\.h ++** lsl z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (lsl_wide_u16_z_tied2, svuint16_t, svuint64_t, ++ z0_res = svlsl_wide_u16_z (p0, z4, z0), ++ z0_res = svlsl_wide_z (p0, z4, z0)) ++ ++/* ++** lsl_wide_u16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** lsl z0\.h, p0/m, z0\.h, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsl_wide_u16_z_untied, svuint16_t, svuint64_t, ++ z0 = svlsl_wide_u16_z (p0, z1, z4), ++ z0 = svlsl_wide_z (p0, z1, z4)) ++ ++/* ++** lsl_wide_x0_u16_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.h, p0/z, z0\.h ++** lsl z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_wide_x0_u16_z_tied1, svuint16_t, uint64_t, ++ z0 = svlsl_wide_n_u16_z (p0, z0, x0), ++ z0 = svlsl_wide_z (p0, z0, x0)) ++ ++/* ++** lsl_wide_x0_u16_z_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.h, p0/z, z1\.h ++** lsl z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_wide_x0_u16_z_untied, svuint16_t, uint64_t, ++ z0 = svlsl_wide_n_u16_z (p0, z1, x0), ++ z0 = svlsl_wide_z (p0, z1, x0)) ++ ++/* ++** lsl_wide_1_u16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** lsl z0\.h, p0/m, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_1_u16_z_tied1, svuint16_t, ++ z0 = svlsl_wide_n_u16_z (p0, z0, 1), ++ z0 = svlsl_wide_z (p0, z0, 1)) ++ ++/* ++** lsl_wide_1_u16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** lsl z0\.h, p0/m, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_1_u16_z_untied, svuint16_t, ++ z0 = svlsl_wide_n_u16_z (p0, z1, 1), ++ z0 = svlsl_wide_z (p0, z1, 1)) ++ ++/* ++** lsl_wide_15_u16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** lsl z0\.h, p0/m, z0\.h, #15 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_15_u16_z_tied1, svuint16_t, ++ z0 = svlsl_wide_n_u16_z (p0, z0, 15), ++ z0 = svlsl_wide_z (p0, z0, 15)) ++ ++/* ++** lsl_wide_15_u16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** lsl z0\.h, p0/m, z0\.h, #15 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_15_u16_z_untied, svuint16_t, ++ z0 = svlsl_wide_n_u16_z (p0, z1, 15), ++ z0 = svlsl_wide_z (p0, z1, 15)) ++ ++/* ++** lsl_wide_16_u16_z_tied1: ++** mov (z[0-9]+\.d), #16 ++** movprfx z0\.h, p0/z, z0\.h ++** lsl z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_16_u16_z_tied1, svuint16_t, ++ z0 = svlsl_wide_n_u16_z (p0, z0, 16), ++ z0 = svlsl_wide_z (p0, z0, 16)) ++ ++/* ++** lsl_wide_16_u16_z_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), #16 ++** movprfx z0\.h, p0/z, z1\.h ++** lsl z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_16_u16_z_untied, svuint16_t, ++ z0 = svlsl_wide_n_u16_z (p0, z1, 16), ++ z0 = svlsl_wide_z (p0, z1, 16)) ++ ++/* ++** lsl_wide_u16_x_tied1: ++** lsl z0\.h, z0\.h, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsl_wide_u16_x_tied1, svuint16_t, svuint64_t, ++ z0 = svlsl_wide_u16_x (p0, z0, z4), ++ z0 = svlsl_wide_x (p0, z0, z4)) ++ ++/* ++** lsl_wide_u16_x_tied2: ++** lsl z0\.h, z4\.h, z0\.d ++** ret ++*/ ++TEST_DUAL_Z_REV (lsl_wide_u16_x_tied2, svuint16_t, svuint64_t, ++ z0_res = svlsl_wide_u16_x (p0, z4, z0), ++ z0_res = svlsl_wide_x (p0, z4, z0)) ++ ++/* ++** lsl_wide_u16_x_untied: ++** lsl z0\.h, z1\.h, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsl_wide_u16_x_untied, svuint16_t, svuint64_t, ++ z0 = svlsl_wide_u16_x (p0, z1, z4), ++ z0 = svlsl_wide_x (p0, z1, z4)) ++ ++/* ++** lsl_wide_x0_u16_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** lsl z0\.h, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_wide_x0_u16_x_tied1, svuint16_t, uint64_t, ++ z0 = svlsl_wide_n_u16_x (p0, z0, x0), ++ z0 = svlsl_wide_x (p0, z0, x0)) ++ ++/* ++** lsl_wide_x0_u16_x_untied: ++** mov (z[0-9]+\.d), x0 ++** lsl z0\.h, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_wide_x0_u16_x_untied, svuint16_t, uint64_t, ++ z0 = svlsl_wide_n_u16_x (p0, z1, x0), ++ z0 = svlsl_wide_x (p0, z1, x0)) ++ ++/* ++** lsl_wide_1_u16_x_tied1: ++** lsl z0\.h, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_1_u16_x_tied1, svuint16_t, ++ z0 = svlsl_wide_n_u16_x (p0, z0, 1), ++ z0 = svlsl_wide_x (p0, z0, 1)) ++ ++/* ++** lsl_wide_1_u16_x_untied: ++** lsl z0\.h, z1\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_1_u16_x_untied, svuint16_t, ++ z0 = svlsl_wide_n_u16_x (p0, z1, 1), ++ z0 = svlsl_wide_x (p0, z1, 1)) ++ ++/* ++** lsl_wide_15_u16_x_tied1: ++** lsl z0\.h, z0\.h, #15 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_15_u16_x_tied1, svuint16_t, ++ z0 = svlsl_wide_n_u16_x (p0, z0, 15), ++ z0 = svlsl_wide_x (p0, z0, 15)) ++ ++/* ++** lsl_wide_15_u16_x_untied: ++** lsl z0\.h, z1\.h, #15 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_15_u16_x_untied, svuint16_t, ++ z0 = svlsl_wide_n_u16_x (p0, z1, 15), ++ z0 = svlsl_wide_x (p0, z1, 15)) ++ ++/* ++** lsl_wide_16_u16_x_tied1: ++** mov (z[0-9]+\.d), #16 ++** lsl z0\.h, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_16_u16_x_tied1, svuint16_t, ++ z0 = svlsl_wide_n_u16_x (p0, z0, 16), ++ z0 = svlsl_wide_x (p0, z0, 16)) ++ ++/* ++** lsl_wide_16_u16_x_untied: ++** mov (z[0-9]+\.d), #16 ++** lsl z0\.h, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_16_u16_x_untied, svuint16_t, ++ z0 = svlsl_wide_n_u16_x (p0, z1, 16), ++ z0 = svlsl_wide_x (p0, z1, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_wide_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_wide_u32.c +new file mode 100644 +index 000000000..7b1afab49 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_wide_u32.c +@@ -0,0 +1,331 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** lsl_wide_u32_m_tied1: ++** lsl z0\.s, p0/m, z0\.s, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsl_wide_u32_m_tied1, svuint32_t, svuint64_t, ++ z0 = svlsl_wide_u32_m (p0, z0, z4), ++ z0 = svlsl_wide_m (p0, z0, z4)) ++ ++/* ++** lsl_wide_u32_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z4 ++** lsl z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (lsl_wide_u32_m_tied2, svuint32_t, svuint64_t, ++ z0_res = svlsl_wide_u32_m (p0, z4, z0), ++ z0_res = svlsl_wide_m (p0, z4, z0)) ++ ++/* ++** lsl_wide_u32_m_untied: ++** movprfx z0, z1 ++** lsl z0\.s, p0/m, z0\.s, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsl_wide_u32_m_untied, svuint32_t, svuint64_t, ++ z0 = svlsl_wide_u32_m (p0, z1, z4), ++ z0 = svlsl_wide_m (p0, z1, z4)) ++ ++/* ++** lsl_wide_x0_u32_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** lsl z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_wide_x0_u32_m_tied1, svuint32_t, uint64_t, ++ z0 = svlsl_wide_n_u32_m (p0, z0, x0), ++ z0 = svlsl_wide_m (p0, z0, x0)) ++ ++/* ++** lsl_wide_x0_u32_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** lsl z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_wide_x0_u32_m_untied, svuint32_t, uint64_t, ++ z0 = svlsl_wide_n_u32_m (p0, z1, x0), ++ z0 = svlsl_wide_m (p0, z1, x0)) ++ ++/* ++** lsl_wide_1_u32_m_tied1: ++** lsl z0\.s, p0/m, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_1_u32_m_tied1, svuint32_t, ++ z0 = svlsl_wide_n_u32_m (p0, z0, 1), ++ z0 = svlsl_wide_m (p0, z0, 1)) ++ ++/* ++** lsl_wide_1_u32_m_untied: ++** movprfx z0, z1 ++** lsl z0\.s, p0/m, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_1_u32_m_untied, svuint32_t, ++ z0 = svlsl_wide_n_u32_m (p0, z1, 1), ++ z0 = svlsl_wide_m (p0, z1, 1)) ++ ++/* ++** lsl_wide_31_u32_m_tied1: ++** lsl z0\.s, p0/m, z0\.s, #31 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_31_u32_m_tied1, svuint32_t, ++ z0 = svlsl_wide_n_u32_m (p0, z0, 31), ++ z0 = svlsl_wide_m (p0, z0, 31)) ++ ++/* ++** lsl_wide_31_u32_m_untied: ++** movprfx z0, z1 ++** lsl z0\.s, p0/m, z0\.s, #31 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_31_u32_m_untied, svuint32_t, ++ z0 = svlsl_wide_n_u32_m (p0, z1, 31), ++ z0 = svlsl_wide_m (p0, z1, 31)) ++ ++/* ++** lsl_wide_32_u32_m_tied1: ++** mov (z[0-9]+\.d), #32 ++** lsl z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_32_u32_m_tied1, svuint32_t, ++ z0 = svlsl_wide_n_u32_m (p0, z0, 32), ++ z0 = svlsl_wide_m (p0, z0, 32)) ++ ++/* ++** lsl_wide_32_u32_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), #32 ++** movprfx z0, z1 ++** lsl z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_32_u32_m_untied, svuint32_t, ++ z0 = svlsl_wide_n_u32_m (p0, z1, 32), ++ z0 = svlsl_wide_m (p0, z1, 32)) ++ ++/* ++** lsl_wide_u32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** lsl z0\.s, p0/m, z0\.s, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsl_wide_u32_z_tied1, svuint32_t, svuint64_t, ++ z0 = svlsl_wide_u32_z (p0, z0, z4), ++ z0 = svlsl_wide_z (p0, z0, z4)) ++ ++/* ++** lsl_wide_u32_z_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.s, p0/z, z4\.s ++** lsl z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (lsl_wide_u32_z_tied2, svuint32_t, svuint64_t, ++ z0_res = svlsl_wide_u32_z (p0, z4, z0), ++ z0_res = svlsl_wide_z (p0, z4, z0)) ++ ++/* ++** lsl_wide_u32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** lsl z0\.s, p0/m, z0\.s, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsl_wide_u32_z_untied, svuint32_t, svuint64_t, ++ z0 = svlsl_wide_u32_z (p0, z1, z4), ++ z0 = svlsl_wide_z (p0, z1, z4)) ++ ++/* ++** lsl_wide_x0_u32_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.s, p0/z, z0\.s ++** lsl z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_wide_x0_u32_z_tied1, svuint32_t, uint64_t, ++ z0 = svlsl_wide_n_u32_z (p0, z0, x0), ++ z0 = svlsl_wide_z (p0, z0, x0)) ++ ++/* ++** lsl_wide_x0_u32_z_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.s, p0/z, z1\.s ++** lsl z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_wide_x0_u32_z_untied, svuint32_t, uint64_t, ++ z0 = svlsl_wide_n_u32_z (p0, z1, x0), ++ z0 = svlsl_wide_z (p0, z1, x0)) ++ ++/* ++** lsl_wide_1_u32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** lsl z0\.s, p0/m, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_1_u32_z_tied1, svuint32_t, ++ z0 = svlsl_wide_n_u32_z (p0, z0, 1), ++ z0 = svlsl_wide_z (p0, z0, 1)) ++ ++/* ++** lsl_wide_1_u32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** lsl z0\.s, p0/m, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_1_u32_z_untied, svuint32_t, ++ z0 = svlsl_wide_n_u32_z (p0, z1, 1), ++ z0 = svlsl_wide_z (p0, z1, 1)) ++ ++/* ++** lsl_wide_31_u32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** lsl z0\.s, p0/m, z0\.s, #31 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_31_u32_z_tied1, svuint32_t, ++ z0 = svlsl_wide_n_u32_z (p0, z0, 31), ++ z0 = svlsl_wide_z (p0, z0, 31)) ++ ++/* ++** lsl_wide_31_u32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** lsl z0\.s, p0/m, z0\.s, #31 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_31_u32_z_untied, svuint32_t, ++ z0 = svlsl_wide_n_u32_z (p0, z1, 31), ++ z0 = svlsl_wide_z (p0, z1, 31)) ++ ++/* ++** lsl_wide_32_u32_z_tied1: ++** mov (z[0-9]+\.d), #32 ++** movprfx z0\.s, p0/z, z0\.s ++** lsl z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_32_u32_z_tied1, svuint32_t, ++ z0 = svlsl_wide_n_u32_z (p0, z0, 32), ++ z0 = svlsl_wide_z (p0, z0, 32)) ++ ++/* ++** lsl_wide_32_u32_z_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), #32 ++** movprfx z0\.s, p0/z, z1\.s ++** lsl z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_32_u32_z_untied, svuint32_t, ++ z0 = svlsl_wide_n_u32_z (p0, z1, 32), ++ z0 = svlsl_wide_z (p0, z1, 32)) ++ ++/* ++** lsl_wide_u32_x_tied1: ++** lsl z0\.s, z0\.s, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsl_wide_u32_x_tied1, svuint32_t, svuint64_t, ++ z0 = svlsl_wide_u32_x (p0, z0, z4), ++ z0 = svlsl_wide_x (p0, z0, z4)) ++ ++/* ++** lsl_wide_u32_x_tied2: ++** lsl z0\.s, z4\.s, z0\.d ++** ret ++*/ ++TEST_DUAL_Z_REV (lsl_wide_u32_x_tied2, svuint32_t, svuint64_t, ++ z0_res = svlsl_wide_u32_x (p0, z4, z0), ++ z0_res = svlsl_wide_x (p0, z4, z0)) ++ ++/* ++** lsl_wide_u32_x_untied: ++** lsl z0\.s, z1\.s, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsl_wide_u32_x_untied, svuint32_t, svuint64_t, ++ z0 = svlsl_wide_u32_x (p0, z1, z4), ++ z0 = svlsl_wide_x (p0, z1, z4)) ++ ++/* ++** lsl_wide_x0_u32_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** lsl z0\.s, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_wide_x0_u32_x_tied1, svuint32_t, uint64_t, ++ z0 = svlsl_wide_n_u32_x (p0, z0, x0), ++ z0 = svlsl_wide_x (p0, z0, x0)) ++ ++/* ++** lsl_wide_x0_u32_x_untied: ++** mov (z[0-9]+\.d), x0 ++** lsl z0\.s, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_wide_x0_u32_x_untied, svuint32_t, uint64_t, ++ z0 = svlsl_wide_n_u32_x (p0, z1, x0), ++ z0 = svlsl_wide_x (p0, z1, x0)) ++ ++/* ++** lsl_wide_1_u32_x_tied1: ++** lsl z0\.s, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_1_u32_x_tied1, svuint32_t, ++ z0 = svlsl_wide_n_u32_x (p0, z0, 1), ++ z0 = svlsl_wide_x (p0, z0, 1)) ++ ++/* ++** lsl_wide_1_u32_x_untied: ++** lsl z0\.s, z1\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_1_u32_x_untied, svuint32_t, ++ z0 = svlsl_wide_n_u32_x (p0, z1, 1), ++ z0 = svlsl_wide_x (p0, z1, 1)) ++ ++/* ++** lsl_wide_31_u32_x_tied1: ++** lsl z0\.s, z0\.s, #31 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_31_u32_x_tied1, svuint32_t, ++ z0 = svlsl_wide_n_u32_x (p0, z0, 31), ++ z0 = svlsl_wide_x (p0, z0, 31)) ++ ++/* ++** lsl_wide_31_u32_x_untied: ++** lsl z0\.s, z1\.s, #31 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_31_u32_x_untied, svuint32_t, ++ z0 = svlsl_wide_n_u32_x (p0, z1, 31), ++ z0 = svlsl_wide_x (p0, z1, 31)) ++ ++/* ++** lsl_wide_32_u32_x_tied1: ++** mov (z[0-9]+\.d), #32 ++** lsl z0\.s, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_32_u32_x_tied1, svuint32_t, ++ z0 = svlsl_wide_n_u32_x (p0, z0, 32), ++ z0 = svlsl_wide_x (p0, z0, 32)) ++ ++/* ++** lsl_wide_32_u32_x_untied: ++** mov (z[0-9]+\.d), #32 ++** lsl z0\.s, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_32_u32_x_untied, svuint32_t, ++ z0 = svlsl_wide_n_u32_x (p0, z1, 32), ++ z0 = svlsl_wide_x (p0, z1, 32)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_wide_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_wide_u8.c +new file mode 100644 +index 000000000..df8b1ec86 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_wide_u8.c +@@ -0,0 +1,331 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** lsl_wide_u8_m_tied1: ++** lsl z0\.b, p0/m, z0\.b, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsl_wide_u8_m_tied1, svuint8_t, svuint64_t, ++ z0 = svlsl_wide_u8_m (p0, z0, z4), ++ z0 = svlsl_wide_m (p0, z0, z4)) ++ ++/* ++** lsl_wide_u8_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z4 ++** lsl z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (lsl_wide_u8_m_tied2, svuint8_t, svuint64_t, ++ z0_res = svlsl_wide_u8_m (p0, z4, z0), ++ z0_res = svlsl_wide_m (p0, z4, z0)) ++ ++/* ++** lsl_wide_u8_m_untied: ++** movprfx z0, z1 ++** lsl z0\.b, p0/m, z0\.b, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsl_wide_u8_m_untied, svuint8_t, svuint64_t, ++ z0 = svlsl_wide_u8_m (p0, z1, z4), ++ z0 = svlsl_wide_m (p0, z1, z4)) ++ ++/* ++** lsl_wide_x0_u8_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** lsl z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_wide_x0_u8_m_tied1, svuint8_t, uint64_t, ++ z0 = svlsl_wide_n_u8_m (p0, z0, x0), ++ z0 = svlsl_wide_m (p0, z0, x0)) ++ ++/* ++** lsl_wide_x0_u8_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** lsl z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_wide_x0_u8_m_untied, svuint8_t, uint64_t, ++ z0 = svlsl_wide_n_u8_m (p0, z1, x0), ++ z0 = svlsl_wide_m (p0, z1, x0)) ++ ++/* ++** lsl_wide_1_u8_m_tied1: ++** lsl z0\.b, p0/m, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_1_u8_m_tied1, svuint8_t, ++ z0 = svlsl_wide_n_u8_m (p0, z0, 1), ++ z0 = svlsl_wide_m (p0, z0, 1)) ++ ++/* ++** lsl_wide_1_u8_m_untied: ++** movprfx z0, z1 ++** lsl z0\.b, p0/m, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_1_u8_m_untied, svuint8_t, ++ z0 = svlsl_wide_n_u8_m (p0, z1, 1), ++ z0 = svlsl_wide_m (p0, z1, 1)) ++ ++/* ++** lsl_wide_7_u8_m_tied1: ++** lsl z0\.b, p0/m, z0\.b, #7 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_7_u8_m_tied1, svuint8_t, ++ z0 = svlsl_wide_n_u8_m (p0, z0, 7), ++ z0 = svlsl_wide_m (p0, z0, 7)) ++ ++/* ++** lsl_wide_7_u8_m_untied: ++** movprfx z0, z1 ++** lsl z0\.b, p0/m, z0\.b, #7 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_7_u8_m_untied, svuint8_t, ++ z0 = svlsl_wide_n_u8_m (p0, z1, 7), ++ z0 = svlsl_wide_m (p0, z1, 7)) ++ ++/* ++** lsl_wide_8_u8_m_tied1: ++** mov (z[0-9]+\.d), #8 ++** lsl z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_8_u8_m_tied1, svuint8_t, ++ z0 = svlsl_wide_n_u8_m (p0, z0, 8), ++ z0 = svlsl_wide_m (p0, z0, 8)) ++ ++/* ++** lsl_wide_8_u8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), #8 ++** movprfx z0, z1 ++** lsl z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_8_u8_m_untied, svuint8_t, ++ z0 = svlsl_wide_n_u8_m (p0, z1, 8), ++ z0 = svlsl_wide_m (p0, z1, 8)) ++ ++/* ++** lsl_wide_u8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** lsl z0\.b, p0/m, z0\.b, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsl_wide_u8_z_tied1, svuint8_t, svuint64_t, ++ z0 = svlsl_wide_u8_z (p0, z0, z4), ++ z0 = svlsl_wide_z (p0, z0, z4)) ++ ++/* ++** lsl_wide_u8_z_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.b, p0/z, z4\.b ++** lsl z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (lsl_wide_u8_z_tied2, svuint8_t, svuint64_t, ++ z0_res = svlsl_wide_u8_z (p0, z4, z0), ++ z0_res = svlsl_wide_z (p0, z4, z0)) ++ ++/* ++** lsl_wide_u8_z_untied: ++** movprfx z0\.b, p0/z, z1\.b ++** lsl z0\.b, p0/m, z0\.b, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsl_wide_u8_z_untied, svuint8_t, svuint64_t, ++ z0 = svlsl_wide_u8_z (p0, z1, z4), ++ z0 = svlsl_wide_z (p0, z1, z4)) ++ ++/* ++** lsl_wide_x0_u8_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.b, p0/z, z0\.b ++** lsl z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_wide_x0_u8_z_tied1, svuint8_t, uint64_t, ++ z0 = svlsl_wide_n_u8_z (p0, z0, x0), ++ z0 = svlsl_wide_z (p0, z0, x0)) ++ ++/* ++** lsl_wide_x0_u8_z_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.b, p0/z, z1\.b ++** lsl z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_wide_x0_u8_z_untied, svuint8_t, uint64_t, ++ z0 = svlsl_wide_n_u8_z (p0, z1, x0), ++ z0 = svlsl_wide_z (p0, z1, x0)) ++ ++/* ++** lsl_wide_1_u8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** lsl z0\.b, p0/m, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_1_u8_z_tied1, svuint8_t, ++ z0 = svlsl_wide_n_u8_z (p0, z0, 1), ++ z0 = svlsl_wide_z (p0, z0, 1)) ++ ++/* ++** lsl_wide_1_u8_z_untied: ++** movprfx z0\.b, p0/z, z1\.b ++** lsl z0\.b, p0/m, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_1_u8_z_untied, svuint8_t, ++ z0 = svlsl_wide_n_u8_z (p0, z1, 1), ++ z0 = svlsl_wide_z (p0, z1, 1)) ++ ++/* ++** lsl_wide_7_u8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** lsl z0\.b, p0/m, z0\.b, #7 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_7_u8_z_tied1, svuint8_t, ++ z0 = svlsl_wide_n_u8_z (p0, z0, 7), ++ z0 = svlsl_wide_z (p0, z0, 7)) ++ ++/* ++** lsl_wide_7_u8_z_untied: ++** movprfx z0\.b, p0/z, z1\.b ++** lsl z0\.b, p0/m, z0\.b, #7 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_7_u8_z_untied, svuint8_t, ++ z0 = svlsl_wide_n_u8_z (p0, z1, 7), ++ z0 = svlsl_wide_z (p0, z1, 7)) ++ ++/* ++** lsl_wide_8_u8_z_tied1: ++** mov (z[0-9]+\.d), #8 ++** movprfx z0\.b, p0/z, z0\.b ++** lsl z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_8_u8_z_tied1, svuint8_t, ++ z0 = svlsl_wide_n_u8_z (p0, z0, 8), ++ z0 = svlsl_wide_z (p0, z0, 8)) ++ ++/* ++** lsl_wide_8_u8_z_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), #8 ++** movprfx z0\.b, p0/z, z1\.b ++** lsl z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_8_u8_z_untied, svuint8_t, ++ z0 = svlsl_wide_n_u8_z (p0, z1, 8), ++ z0 = svlsl_wide_z (p0, z1, 8)) ++ ++/* ++** lsl_wide_u8_x_tied1: ++** lsl z0\.b, z0\.b, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsl_wide_u8_x_tied1, svuint8_t, svuint64_t, ++ z0 = svlsl_wide_u8_x (p0, z0, z4), ++ z0 = svlsl_wide_x (p0, z0, z4)) ++ ++/* ++** lsl_wide_u8_x_tied2: ++** lsl z0\.b, z4\.b, z0\.d ++** ret ++*/ ++TEST_DUAL_Z_REV (lsl_wide_u8_x_tied2, svuint8_t, svuint64_t, ++ z0_res = svlsl_wide_u8_x (p0, z4, z0), ++ z0_res = svlsl_wide_x (p0, z4, z0)) ++ ++/* ++** lsl_wide_u8_x_untied: ++** lsl z0\.b, z1\.b, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsl_wide_u8_x_untied, svuint8_t, svuint64_t, ++ z0 = svlsl_wide_u8_x (p0, z1, z4), ++ z0 = svlsl_wide_x (p0, z1, z4)) ++ ++/* ++** lsl_wide_x0_u8_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** lsl z0\.b, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_wide_x0_u8_x_tied1, svuint8_t, uint64_t, ++ z0 = svlsl_wide_n_u8_x (p0, z0, x0), ++ z0 = svlsl_wide_x (p0, z0, x0)) ++ ++/* ++** lsl_wide_x0_u8_x_untied: ++** mov (z[0-9]+\.d), x0 ++** lsl z0\.b, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsl_wide_x0_u8_x_untied, svuint8_t, uint64_t, ++ z0 = svlsl_wide_n_u8_x (p0, z1, x0), ++ z0 = svlsl_wide_x (p0, z1, x0)) ++ ++/* ++** lsl_wide_1_u8_x_tied1: ++** lsl z0\.b, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_1_u8_x_tied1, svuint8_t, ++ z0 = svlsl_wide_n_u8_x (p0, z0, 1), ++ z0 = svlsl_wide_x (p0, z0, 1)) ++ ++/* ++** lsl_wide_1_u8_x_untied: ++** lsl z0\.b, z1\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_1_u8_x_untied, svuint8_t, ++ z0 = svlsl_wide_n_u8_x (p0, z1, 1), ++ z0 = svlsl_wide_x (p0, z1, 1)) ++ ++/* ++** lsl_wide_7_u8_x_tied1: ++** lsl z0\.b, z0\.b, #7 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_7_u8_x_tied1, svuint8_t, ++ z0 = svlsl_wide_n_u8_x (p0, z0, 7), ++ z0 = svlsl_wide_x (p0, z0, 7)) ++ ++/* ++** lsl_wide_7_u8_x_untied: ++** lsl z0\.b, z1\.b, #7 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_7_u8_x_untied, svuint8_t, ++ z0 = svlsl_wide_n_u8_x (p0, z1, 7), ++ z0 = svlsl_wide_x (p0, z1, 7)) ++ ++/* ++** lsl_wide_8_u8_x_tied1: ++** mov (z[0-9]+\.d), #8 ++** lsl z0\.b, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_8_u8_x_tied1, svuint8_t, ++ z0 = svlsl_wide_n_u8_x (p0, z0, 8), ++ z0 = svlsl_wide_x (p0, z0, 8)) ++ ++/* ++** lsl_wide_8_u8_x_untied: ++** mov (z[0-9]+\.d), #8 ++** lsl z0\.b, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsl_wide_8_u8_x_untied, svuint8_t, ++ z0 = svlsl_wide_n_u8_x (p0, z1, 8), ++ z0 = svlsl_wide_x (p0, z1, 8)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsr_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsr_u16.c +new file mode 100644 +index 000000000..61575645f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsr_u16.c +@@ -0,0 +1,340 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** lsr_u16_m_tied1: ++** lsr z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_u16_m_tied1, svuint16_t, ++ z0 = svlsr_u16_m (p0, z0, z1), ++ z0 = svlsr_m (p0, z0, z1)) ++ ++/* ++** lsr_u16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** lsr z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_u16_m_tied2, svuint16_t, ++ z0 = svlsr_u16_m (p0, z1, z0), ++ z0 = svlsr_m (p0, z1, z0)) ++ ++/* ++** lsr_u16_m_untied: ++** movprfx z0, z1 ++** lsr z0\.h, p0/m, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_u16_m_untied, svuint16_t, ++ z0 = svlsr_u16_m (p0, z1, z2), ++ z0 = svlsr_m (p0, z1, z2)) ++ ++/* ++** lsr_w0_u16_m_tied1: ++** mov (z[0-9]+\.h), w0 ++** lsr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsr_w0_u16_m_tied1, svuint16_t, uint16_t, ++ z0 = svlsr_n_u16_m (p0, z0, x0), ++ z0 = svlsr_m (p0, z0, x0)) ++ ++/* ++** lsr_w0_u16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), w0 ++** movprfx z0, z1 ++** lsr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsr_w0_u16_m_untied, svuint16_t, uint16_t, ++ z0 = svlsr_n_u16_m (p0, z1, x0), ++ z0 = svlsr_m (p0, z1, x0)) ++ ++/* ++** lsr_1_u16_m_tied1: ++** lsr z0\.h, p0/m, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_1_u16_m_tied1, svuint16_t, ++ z0 = svlsr_n_u16_m (p0, z0, 1), ++ z0 = svlsr_m (p0, z0, 1)) ++ ++/* ++** lsr_1_u16_m_untied: ++** movprfx z0, z1 ++** lsr z0\.h, p0/m, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_1_u16_m_untied, svuint16_t, ++ z0 = svlsr_n_u16_m (p0, z1, 1), ++ z0 = svlsr_m (p0, z1, 1)) ++ ++/* ++** lsr_15_u16_m_tied1: ++** lsr z0\.h, p0/m, z0\.h, #15 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_15_u16_m_tied1, svuint16_t, ++ z0 = svlsr_n_u16_m (p0, z0, 15), ++ z0 = svlsr_m (p0, z0, 15)) ++ ++/* ++** lsr_15_u16_m_untied: ++** movprfx z0, z1 ++** lsr z0\.h, p0/m, z0\.h, #15 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_15_u16_m_untied, svuint16_t, ++ z0 = svlsr_n_u16_m (p0, z1, 15), ++ z0 = svlsr_m (p0, z1, 15)) ++ ++/* ++** lsr_16_u16_m_tied1: ++** lsr z0\.h, p0/m, z0\.h, #16 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_16_u16_m_tied1, svuint16_t, ++ z0 = svlsr_n_u16_m (p0, z0, 16), ++ z0 = svlsr_m (p0, z0, 16)) ++ ++/* ++** lsr_16_u16_m_untied: ++** movprfx z0, z1 ++** lsr z0\.h, p0/m, z0\.h, #16 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_16_u16_m_untied, svuint16_t, ++ z0 = svlsr_n_u16_m (p0, z1, 16), ++ z0 = svlsr_m (p0, z1, 16)) ++ ++/* ++** lsr_u16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** lsr z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_u16_z_tied1, svuint16_t, ++ z0 = svlsr_u16_z (p0, z0, z1), ++ z0 = svlsr_z (p0, z0, z1)) ++ ++/* ++** lsr_u16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** lsrr z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_u16_z_tied2, svuint16_t, ++ z0 = svlsr_u16_z (p0, z1, z0), ++ z0 = svlsr_z (p0, z1, z0)) ++ ++/* ++** lsr_u16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** lsr z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** lsrr z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_u16_z_untied, svuint16_t, ++ z0 = svlsr_u16_z (p0, z1, z2), ++ z0 = svlsr_z (p0, z1, z2)) ++ ++/* ++** lsr_w0_u16_z_tied1: ++** mov (z[0-9]+\.h), w0 ++** movprfx z0\.h, p0/z, z0\.h ++** lsr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsr_w0_u16_z_tied1, svuint16_t, uint16_t, ++ z0 = svlsr_n_u16_z (p0, z0, x0), ++ z0 = svlsr_z (p0, z0, x0)) ++ ++/* ++** lsr_w0_u16_z_untied: ++** mov (z[0-9]+\.h), w0 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** lsr z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** lsrr z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (lsr_w0_u16_z_untied, svuint16_t, uint16_t, ++ z0 = svlsr_n_u16_z (p0, z1, x0), ++ z0 = svlsr_z (p0, z1, x0)) ++ ++/* ++** lsr_1_u16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** lsr z0\.h, p0/m, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_1_u16_z_tied1, svuint16_t, ++ z0 = svlsr_n_u16_z (p0, z0, 1), ++ z0 = svlsr_z (p0, z0, 1)) ++ ++/* ++** lsr_1_u16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** lsr z0\.h, p0/m, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_1_u16_z_untied, svuint16_t, ++ z0 = svlsr_n_u16_z (p0, z1, 1), ++ z0 = svlsr_z (p0, z1, 1)) ++ ++/* ++** lsr_15_u16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** lsr z0\.h, p0/m, z0\.h, #15 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_15_u16_z_tied1, svuint16_t, ++ z0 = svlsr_n_u16_z (p0, z0, 15), ++ z0 = svlsr_z (p0, z0, 15)) ++ ++/* ++** lsr_15_u16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** lsr z0\.h, p0/m, z0\.h, #15 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_15_u16_z_untied, svuint16_t, ++ z0 = svlsr_n_u16_z (p0, z1, 15), ++ z0 = svlsr_z (p0, z1, 15)) ++ ++/* ++** lsr_16_u16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** lsr z0\.h, p0/m, z0\.h, #16 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_16_u16_z_tied1, svuint16_t, ++ z0 = svlsr_n_u16_z (p0, z0, 16), ++ z0 = svlsr_z (p0, z0, 16)) ++ ++/* ++** lsr_16_u16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** lsr z0\.h, p0/m, z0\.h, #16 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_16_u16_z_untied, svuint16_t, ++ z0 = svlsr_n_u16_z (p0, z1, 16), ++ z0 = svlsr_z (p0, z1, 16)) ++ ++/* ++** lsr_u16_x_tied1: ++** lsr z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_u16_x_tied1, svuint16_t, ++ z0 = svlsr_u16_x (p0, z0, z1), ++ z0 = svlsr_x (p0, z0, z1)) ++ ++/* ++** lsr_u16_x_tied2: ++** lsrr z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_u16_x_tied2, svuint16_t, ++ z0 = svlsr_u16_x (p0, z1, z0), ++ z0 = svlsr_x (p0, z1, z0)) ++ ++/* ++** lsr_u16_x_untied: ++** ( ++** movprfx z0, z1 ++** lsr z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0, z2 ++** lsrr z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_u16_x_untied, svuint16_t, ++ z0 = svlsr_u16_x (p0, z1, z2), ++ z0 = svlsr_x (p0, z1, z2)) ++ ++/* ++** lsr_w0_u16_x_tied1: ++** mov (z[0-9]+\.h), w0 ++** lsr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsr_w0_u16_x_tied1, svuint16_t, uint16_t, ++ z0 = svlsr_n_u16_x (p0, z0, x0), ++ z0 = svlsr_x (p0, z0, x0)) ++ ++/* ++** lsr_w0_u16_x_untied: ++** mov z0\.h, w0 ++** lsrr z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZX (lsr_w0_u16_x_untied, svuint16_t, uint16_t, ++ z0 = svlsr_n_u16_x (p0, z1, x0), ++ z0 = svlsr_x (p0, z1, x0)) ++ ++/* ++** lsr_1_u16_x_tied1: ++** lsr z0\.h, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_1_u16_x_tied1, svuint16_t, ++ z0 = svlsr_n_u16_x (p0, z0, 1), ++ z0 = svlsr_x (p0, z0, 1)) ++ ++/* ++** lsr_1_u16_x_untied: ++** lsr z0\.h, z1\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_1_u16_x_untied, svuint16_t, ++ z0 = svlsr_n_u16_x (p0, z1, 1), ++ z0 = svlsr_x (p0, z1, 1)) ++ ++/* ++** lsr_15_u16_x_tied1: ++** lsr z0\.h, z0\.h, #15 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_15_u16_x_tied1, svuint16_t, ++ z0 = svlsr_n_u16_x (p0, z0, 15), ++ z0 = svlsr_x (p0, z0, 15)) ++ ++/* ++** lsr_15_u16_x_untied: ++** lsr z0\.h, z1\.h, #15 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_15_u16_x_untied, svuint16_t, ++ z0 = svlsr_n_u16_x (p0, z1, 15), ++ z0 = svlsr_x (p0, z1, 15)) ++ ++/* ++** lsr_16_u16_x_tied1: ++** lsr z0\.h, z0\.h, #16 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_16_u16_x_tied1, svuint16_t, ++ z0 = svlsr_n_u16_x (p0, z0, 16), ++ z0 = svlsr_x (p0, z0, 16)) ++ ++/* ++** lsr_16_u16_x_untied: ++** lsr z0\.h, z1\.h, #16 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_16_u16_x_untied, svuint16_t, ++ z0 = svlsr_n_u16_x (p0, z1, 16), ++ z0 = svlsr_x (p0, z1, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsr_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsr_u32.c +new file mode 100644 +index 000000000..796867ef8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsr_u32.c +@@ -0,0 +1,340 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** lsr_u32_m_tied1: ++** lsr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_u32_m_tied1, svuint32_t, ++ z0 = svlsr_u32_m (p0, z0, z1), ++ z0 = svlsr_m (p0, z0, z1)) ++ ++/* ++** lsr_u32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** lsr z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_u32_m_tied2, svuint32_t, ++ z0 = svlsr_u32_m (p0, z1, z0), ++ z0 = svlsr_m (p0, z1, z0)) ++ ++/* ++** lsr_u32_m_untied: ++** movprfx z0, z1 ++** lsr z0\.s, p0/m, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_u32_m_untied, svuint32_t, ++ z0 = svlsr_u32_m (p0, z1, z2), ++ z0 = svlsr_m (p0, z1, z2)) ++ ++/* ++** lsr_w0_u32_m_tied1: ++** mov (z[0-9]+\.s), w0 ++** lsr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsr_w0_u32_m_tied1, svuint32_t, uint32_t, ++ z0 = svlsr_n_u32_m (p0, z0, x0), ++ z0 = svlsr_m (p0, z0, x0)) ++ ++/* ++** lsr_w0_u32_m_untied: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0, z1 ++** lsr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsr_w0_u32_m_untied, svuint32_t, uint32_t, ++ z0 = svlsr_n_u32_m (p0, z1, x0), ++ z0 = svlsr_m (p0, z1, x0)) ++ ++/* ++** lsr_1_u32_m_tied1: ++** lsr z0\.s, p0/m, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_1_u32_m_tied1, svuint32_t, ++ z0 = svlsr_n_u32_m (p0, z0, 1), ++ z0 = svlsr_m (p0, z0, 1)) ++ ++/* ++** lsr_1_u32_m_untied: ++** movprfx z0, z1 ++** lsr z0\.s, p0/m, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_1_u32_m_untied, svuint32_t, ++ z0 = svlsr_n_u32_m (p0, z1, 1), ++ z0 = svlsr_m (p0, z1, 1)) ++ ++/* ++** lsr_31_u32_m_tied1: ++** lsr z0\.s, p0/m, z0\.s, #31 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_31_u32_m_tied1, svuint32_t, ++ z0 = svlsr_n_u32_m (p0, z0, 31), ++ z0 = svlsr_m (p0, z0, 31)) ++ ++/* ++** lsr_31_u32_m_untied: ++** movprfx z0, z1 ++** lsr z0\.s, p0/m, z0\.s, #31 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_31_u32_m_untied, svuint32_t, ++ z0 = svlsr_n_u32_m (p0, z1, 31), ++ z0 = svlsr_m (p0, z1, 31)) ++ ++/* ++** lsr_32_u32_m_tied1: ++** lsr z0\.s, p0/m, z0\.s, #32 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_32_u32_m_tied1, svuint32_t, ++ z0 = svlsr_n_u32_m (p0, z0, 32), ++ z0 = svlsr_m (p0, z0, 32)) ++ ++/* ++** lsr_32_u32_m_untied: ++** movprfx z0, z1 ++** lsr z0\.s, p0/m, z0\.s, #32 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_32_u32_m_untied, svuint32_t, ++ z0 = svlsr_n_u32_m (p0, z1, 32), ++ z0 = svlsr_m (p0, z1, 32)) ++ ++/* ++** lsr_u32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** lsr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_u32_z_tied1, svuint32_t, ++ z0 = svlsr_u32_z (p0, z0, z1), ++ z0 = svlsr_z (p0, z0, z1)) ++ ++/* ++** lsr_u32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** lsrr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_u32_z_tied2, svuint32_t, ++ z0 = svlsr_u32_z (p0, z1, z0), ++ z0 = svlsr_z (p0, z1, z0)) ++ ++/* ++** lsr_u32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** lsr z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** lsrr z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_u32_z_untied, svuint32_t, ++ z0 = svlsr_u32_z (p0, z1, z2), ++ z0 = svlsr_z (p0, z1, z2)) ++ ++/* ++** lsr_w0_u32_z_tied1: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0\.s, p0/z, z0\.s ++** lsr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsr_w0_u32_z_tied1, svuint32_t, uint32_t, ++ z0 = svlsr_n_u32_z (p0, z0, x0), ++ z0 = svlsr_z (p0, z0, x0)) ++ ++/* ++** lsr_w0_u32_z_untied: ++** mov (z[0-9]+\.s), w0 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** lsr z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** lsrr z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (lsr_w0_u32_z_untied, svuint32_t, uint32_t, ++ z0 = svlsr_n_u32_z (p0, z1, x0), ++ z0 = svlsr_z (p0, z1, x0)) ++ ++/* ++** lsr_1_u32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** lsr z0\.s, p0/m, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_1_u32_z_tied1, svuint32_t, ++ z0 = svlsr_n_u32_z (p0, z0, 1), ++ z0 = svlsr_z (p0, z0, 1)) ++ ++/* ++** lsr_1_u32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** lsr z0\.s, p0/m, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_1_u32_z_untied, svuint32_t, ++ z0 = svlsr_n_u32_z (p0, z1, 1), ++ z0 = svlsr_z (p0, z1, 1)) ++ ++/* ++** lsr_31_u32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** lsr z0\.s, p0/m, z0\.s, #31 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_31_u32_z_tied1, svuint32_t, ++ z0 = svlsr_n_u32_z (p0, z0, 31), ++ z0 = svlsr_z (p0, z0, 31)) ++ ++/* ++** lsr_31_u32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** lsr z0\.s, p0/m, z0\.s, #31 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_31_u32_z_untied, svuint32_t, ++ z0 = svlsr_n_u32_z (p0, z1, 31), ++ z0 = svlsr_z (p0, z1, 31)) ++ ++/* ++** lsr_32_u32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** lsr z0\.s, p0/m, z0\.s, #32 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_32_u32_z_tied1, svuint32_t, ++ z0 = svlsr_n_u32_z (p0, z0, 32), ++ z0 = svlsr_z (p0, z0, 32)) ++ ++/* ++** lsr_32_u32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** lsr z0\.s, p0/m, z0\.s, #32 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_32_u32_z_untied, svuint32_t, ++ z0 = svlsr_n_u32_z (p0, z1, 32), ++ z0 = svlsr_z (p0, z1, 32)) ++ ++/* ++** lsr_u32_x_tied1: ++** lsr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_u32_x_tied1, svuint32_t, ++ z0 = svlsr_u32_x (p0, z0, z1), ++ z0 = svlsr_x (p0, z0, z1)) ++ ++/* ++** lsr_u32_x_tied2: ++** lsrr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_u32_x_tied2, svuint32_t, ++ z0 = svlsr_u32_x (p0, z1, z0), ++ z0 = svlsr_x (p0, z1, z0)) ++ ++/* ++** lsr_u32_x_untied: ++** ( ++** movprfx z0, z1 ++** lsr z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0, z2 ++** lsrr z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_u32_x_untied, svuint32_t, ++ z0 = svlsr_u32_x (p0, z1, z2), ++ z0 = svlsr_x (p0, z1, z2)) ++ ++/* ++** lsr_w0_u32_x_tied1: ++** mov (z[0-9]+\.s), w0 ++** lsr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsr_w0_u32_x_tied1, svuint32_t, uint32_t, ++ z0 = svlsr_n_u32_x (p0, z0, x0), ++ z0 = svlsr_x (p0, z0, x0)) ++ ++/* ++** lsr_w0_u32_x_untied: ++** mov z0\.s, w0 ++** lsrr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZX (lsr_w0_u32_x_untied, svuint32_t, uint32_t, ++ z0 = svlsr_n_u32_x (p0, z1, x0), ++ z0 = svlsr_x (p0, z1, x0)) ++ ++/* ++** lsr_1_u32_x_tied1: ++** lsr z0\.s, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_1_u32_x_tied1, svuint32_t, ++ z0 = svlsr_n_u32_x (p0, z0, 1), ++ z0 = svlsr_x (p0, z0, 1)) ++ ++/* ++** lsr_1_u32_x_untied: ++** lsr z0\.s, z1\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_1_u32_x_untied, svuint32_t, ++ z0 = svlsr_n_u32_x (p0, z1, 1), ++ z0 = svlsr_x (p0, z1, 1)) ++ ++/* ++** lsr_31_u32_x_tied1: ++** lsr z0\.s, z0\.s, #31 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_31_u32_x_tied1, svuint32_t, ++ z0 = svlsr_n_u32_x (p0, z0, 31), ++ z0 = svlsr_x (p0, z0, 31)) ++ ++/* ++** lsr_31_u32_x_untied: ++** lsr z0\.s, z1\.s, #31 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_31_u32_x_untied, svuint32_t, ++ z0 = svlsr_n_u32_x (p0, z1, 31), ++ z0 = svlsr_x (p0, z1, 31)) ++ ++/* ++** lsr_32_u32_x_tied1: ++** lsr z0\.s, z0\.s, #32 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_32_u32_x_tied1, svuint32_t, ++ z0 = svlsr_n_u32_x (p0, z0, 32), ++ z0 = svlsr_x (p0, z0, 32)) ++ ++/* ++** lsr_32_u32_x_untied: ++** lsr z0\.s, z1\.s, #32 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_32_u32_x_untied, svuint32_t, ++ z0 = svlsr_n_u32_x (p0, z1, 32), ++ z0 = svlsr_x (p0, z1, 32)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsr_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsr_u64.c +new file mode 100644 +index 000000000..b50777f50 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsr_u64.c +@@ -0,0 +1,340 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** lsr_u64_m_tied1: ++** lsr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_u64_m_tied1, svuint64_t, ++ z0 = svlsr_u64_m (p0, z0, z1), ++ z0 = svlsr_m (p0, z0, z1)) ++ ++/* ++** lsr_u64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** lsr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_u64_m_tied2, svuint64_t, ++ z0 = svlsr_u64_m (p0, z1, z0), ++ z0 = svlsr_m (p0, z1, z0)) ++ ++/* ++** lsr_u64_m_untied: ++** movprfx z0, z1 ++** lsr z0\.d, p0/m, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_u64_m_untied, svuint64_t, ++ z0 = svlsr_u64_m (p0, z1, z2), ++ z0 = svlsr_m (p0, z1, z2)) ++ ++/* ++** lsr_x0_u64_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** lsr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsr_x0_u64_m_tied1, svuint64_t, uint64_t, ++ z0 = svlsr_n_u64_m (p0, z0, x0), ++ z0 = svlsr_m (p0, z0, x0)) ++ ++/* ++** lsr_x0_u64_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** lsr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsr_x0_u64_m_untied, svuint64_t, uint64_t, ++ z0 = svlsr_n_u64_m (p0, z1, x0), ++ z0 = svlsr_m (p0, z1, x0)) ++ ++/* ++** lsr_1_u64_m_tied1: ++** lsr z0\.d, p0/m, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_1_u64_m_tied1, svuint64_t, ++ z0 = svlsr_n_u64_m (p0, z0, 1), ++ z0 = svlsr_m (p0, z0, 1)) ++ ++/* ++** lsr_1_u64_m_untied: ++** movprfx z0, z1 ++** lsr z0\.d, p0/m, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_1_u64_m_untied, svuint64_t, ++ z0 = svlsr_n_u64_m (p0, z1, 1), ++ z0 = svlsr_m (p0, z1, 1)) ++ ++/* ++** lsr_63_u64_m_tied1: ++** lsr z0\.d, p0/m, z0\.d, #63 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_63_u64_m_tied1, svuint64_t, ++ z0 = svlsr_n_u64_m (p0, z0, 63), ++ z0 = svlsr_m (p0, z0, 63)) ++ ++/* ++** lsr_63_u64_m_untied: ++** movprfx z0, z1 ++** lsr z0\.d, p0/m, z0\.d, #63 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_63_u64_m_untied, svuint64_t, ++ z0 = svlsr_n_u64_m (p0, z1, 63), ++ z0 = svlsr_m (p0, z1, 63)) ++ ++/* ++** lsr_64_u64_m_tied1: ++** lsr z0\.d, p0/m, z0\.d, #64 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_64_u64_m_tied1, svuint64_t, ++ z0 = svlsr_n_u64_m (p0, z0, 64), ++ z0 = svlsr_m (p0, z0, 64)) ++ ++/* ++** lsr_64_u64_m_untied: ++** movprfx z0, z1 ++** lsr z0\.d, p0/m, z0\.d, #64 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_64_u64_m_untied, svuint64_t, ++ z0 = svlsr_n_u64_m (p0, z1, 64), ++ z0 = svlsr_m (p0, z1, 64)) ++ ++/* ++** lsr_u64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** lsr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_u64_z_tied1, svuint64_t, ++ z0 = svlsr_u64_z (p0, z0, z1), ++ z0 = svlsr_z (p0, z0, z1)) ++ ++/* ++** lsr_u64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** lsrr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_u64_z_tied2, svuint64_t, ++ z0 = svlsr_u64_z (p0, z1, z0), ++ z0 = svlsr_z (p0, z1, z0)) ++ ++/* ++** lsr_u64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** lsr z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** lsrr z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_u64_z_untied, svuint64_t, ++ z0 = svlsr_u64_z (p0, z1, z2), ++ z0 = svlsr_z (p0, z1, z2)) ++ ++/* ++** lsr_x0_u64_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.d, p0/z, z0\.d ++** lsr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsr_x0_u64_z_tied1, svuint64_t, uint64_t, ++ z0 = svlsr_n_u64_z (p0, z0, x0), ++ z0 = svlsr_z (p0, z0, x0)) ++ ++/* ++** lsr_x0_u64_z_untied: ++** mov (z[0-9]+\.d), x0 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** lsr z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** lsrr z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (lsr_x0_u64_z_untied, svuint64_t, uint64_t, ++ z0 = svlsr_n_u64_z (p0, z1, x0), ++ z0 = svlsr_z (p0, z1, x0)) ++ ++/* ++** lsr_1_u64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** lsr z0\.d, p0/m, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_1_u64_z_tied1, svuint64_t, ++ z0 = svlsr_n_u64_z (p0, z0, 1), ++ z0 = svlsr_z (p0, z0, 1)) ++ ++/* ++** lsr_1_u64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** lsr z0\.d, p0/m, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_1_u64_z_untied, svuint64_t, ++ z0 = svlsr_n_u64_z (p0, z1, 1), ++ z0 = svlsr_z (p0, z1, 1)) ++ ++/* ++** lsr_63_u64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** lsr z0\.d, p0/m, z0\.d, #63 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_63_u64_z_tied1, svuint64_t, ++ z0 = svlsr_n_u64_z (p0, z0, 63), ++ z0 = svlsr_z (p0, z0, 63)) ++ ++/* ++** lsr_63_u64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** lsr z0\.d, p0/m, z0\.d, #63 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_63_u64_z_untied, svuint64_t, ++ z0 = svlsr_n_u64_z (p0, z1, 63), ++ z0 = svlsr_z (p0, z1, 63)) ++ ++/* ++** lsr_64_u64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** lsr z0\.d, p0/m, z0\.d, #64 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_64_u64_z_tied1, svuint64_t, ++ z0 = svlsr_n_u64_z (p0, z0, 64), ++ z0 = svlsr_z (p0, z0, 64)) ++ ++/* ++** lsr_64_u64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** lsr z0\.d, p0/m, z0\.d, #64 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_64_u64_z_untied, svuint64_t, ++ z0 = svlsr_n_u64_z (p0, z1, 64), ++ z0 = svlsr_z (p0, z1, 64)) ++ ++/* ++** lsr_u64_x_tied1: ++** lsr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_u64_x_tied1, svuint64_t, ++ z0 = svlsr_u64_x (p0, z0, z1), ++ z0 = svlsr_x (p0, z0, z1)) ++ ++/* ++** lsr_u64_x_tied2: ++** lsrr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_u64_x_tied2, svuint64_t, ++ z0 = svlsr_u64_x (p0, z1, z0), ++ z0 = svlsr_x (p0, z1, z0)) ++ ++/* ++** lsr_u64_x_untied: ++** ( ++** movprfx z0, z1 ++** lsr z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0, z2 ++** lsrr z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_u64_x_untied, svuint64_t, ++ z0 = svlsr_u64_x (p0, z1, z2), ++ z0 = svlsr_x (p0, z1, z2)) ++ ++/* ++** lsr_x0_u64_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** lsr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsr_x0_u64_x_tied1, svuint64_t, uint64_t, ++ z0 = svlsr_n_u64_x (p0, z0, x0), ++ z0 = svlsr_x (p0, z0, x0)) ++ ++/* ++** lsr_x0_u64_x_untied: ++** mov z0\.d, x0 ++** lsrr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZX (lsr_x0_u64_x_untied, svuint64_t, uint64_t, ++ z0 = svlsr_n_u64_x (p0, z1, x0), ++ z0 = svlsr_x (p0, z1, x0)) ++ ++/* ++** lsr_1_u64_x_tied1: ++** lsr z0\.d, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_1_u64_x_tied1, svuint64_t, ++ z0 = svlsr_n_u64_x (p0, z0, 1), ++ z0 = svlsr_x (p0, z0, 1)) ++ ++/* ++** lsr_1_u64_x_untied: ++** lsr z0\.d, z1\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_1_u64_x_untied, svuint64_t, ++ z0 = svlsr_n_u64_x (p0, z1, 1), ++ z0 = svlsr_x (p0, z1, 1)) ++ ++/* ++** lsr_63_u64_x_tied1: ++** lsr z0\.d, z0\.d, #63 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_63_u64_x_tied1, svuint64_t, ++ z0 = svlsr_n_u64_x (p0, z0, 63), ++ z0 = svlsr_x (p0, z0, 63)) ++ ++/* ++** lsr_63_u64_x_untied: ++** lsr z0\.d, z1\.d, #63 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_63_u64_x_untied, svuint64_t, ++ z0 = svlsr_n_u64_x (p0, z1, 63), ++ z0 = svlsr_x (p0, z1, 63)) ++ ++/* ++** lsr_64_u64_x_tied1: ++** lsr z0\.d, z0\.d, #64 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_64_u64_x_tied1, svuint64_t, ++ z0 = svlsr_n_u64_x (p0, z0, 64), ++ z0 = svlsr_x (p0, z0, 64)) ++ ++/* ++** lsr_64_u64_x_untied: ++** lsr z0\.d, z1\.d, #64 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_64_u64_x_untied, svuint64_t, ++ z0 = svlsr_n_u64_x (p0, z1, 64), ++ z0 = svlsr_x (p0, z1, 64)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsr_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsr_u8.c +new file mode 100644 +index 000000000..a049ca905 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsr_u8.c +@@ -0,0 +1,340 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** lsr_u8_m_tied1: ++** lsr z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_u8_m_tied1, svuint8_t, ++ z0 = svlsr_u8_m (p0, z0, z1), ++ z0 = svlsr_m (p0, z0, z1)) ++ ++/* ++** lsr_u8_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** lsr z0\.b, p0/m, z0\.b, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_u8_m_tied2, svuint8_t, ++ z0 = svlsr_u8_m (p0, z1, z0), ++ z0 = svlsr_m (p0, z1, z0)) ++ ++/* ++** lsr_u8_m_untied: ++** movprfx z0, z1 ++** lsr z0\.b, p0/m, z0\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_u8_m_untied, svuint8_t, ++ z0 = svlsr_u8_m (p0, z1, z2), ++ z0 = svlsr_m (p0, z1, z2)) ++ ++/* ++** lsr_w0_u8_m_tied1: ++** mov (z[0-9]+\.b), w0 ++** lsr z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsr_w0_u8_m_tied1, svuint8_t, uint8_t, ++ z0 = svlsr_n_u8_m (p0, z0, x0), ++ z0 = svlsr_m (p0, z0, x0)) ++ ++/* ++** lsr_w0_u8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), w0 ++** movprfx z0, z1 ++** lsr z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsr_w0_u8_m_untied, svuint8_t, uint8_t, ++ z0 = svlsr_n_u8_m (p0, z1, x0), ++ z0 = svlsr_m (p0, z1, x0)) ++ ++/* ++** lsr_1_u8_m_tied1: ++** lsr z0\.b, p0/m, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_1_u8_m_tied1, svuint8_t, ++ z0 = svlsr_n_u8_m (p0, z0, 1), ++ z0 = svlsr_m (p0, z0, 1)) ++ ++/* ++** lsr_1_u8_m_untied: ++** movprfx z0, z1 ++** lsr z0\.b, p0/m, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_1_u8_m_untied, svuint8_t, ++ z0 = svlsr_n_u8_m (p0, z1, 1), ++ z0 = svlsr_m (p0, z1, 1)) ++ ++/* ++** lsr_7_u8_m_tied1: ++** lsr z0\.b, p0/m, z0\.b, #7 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_7_u8_m_tied1, svuint8_t, ++ z0 = svlsr_n_u8_m (p0, z0, 7), ++ z0 = svlsr_m (p0, z0, 7)) ++ ++/* ++** lsr_7_u8_m_untied: ++** movprfx z0, z1 ++** lsr z0\.b, p0/m, z0\.b, #7 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_7_u8_m_untied, svuint8_t, ++ z0 = svlsr_n_u8_m (p0, z1, 7), ++ z0 = svlsr_m (p0, z1, 7)) ++ ++/* ++** lsr_8_u8_m_tied1: ++** lsr z0\.b, p0/m, z0\.b, #8 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_8_u8_m_tied1, svuint8_t, ++ z0 = svlsr_n_u8_m (p0, z0, 8), ++ z0 = svlsr_m (p0, z0, 8)) ++ ++/* ++** lsr_8_u8_m_untied: ++** movprfx z0, z1 ++** lsr z0\.b, p0/m, z0\.b, #8 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_8_u8_m_untied, svuint8_t, ++ z0 = svlsr_n_u8_m (p0, z1, 8), ++ z0 = svlsr_m (p0, z1, 8)) ++ ++/* ++** lsr_u8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** lsr z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_u8_z_tied1, svuint8_t, ++ z0 = svlsr_u8_z (p0, z0, z1), ++ z0 = svlsr_z (p0, z0, z1)) ++ ++/* ++** lsr_u8_z_tied2: ++** movprfx z0\.b, p0/z, z0\.b ++** lsrr z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_u8_z_tied2, svuint8_t, ++ z0 = svlsr_u8_z (p0, z1, z0), ++ z0 = svlsr_z (p0, z1, z0)) ++ ++/* ++** lsr_u8_z_untied: ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** lsr z0\.b, p0/m, z0\.b, z2\.b ++** | ++** movprfx z0\.b, p0/z, z2\.b ++** lsrr z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_u8_z_untied, svuint8_t, ++ z0 = svlsr_u8_z (p0, z1, z2), ++ z0 = svlsr_z (p0, z1, z2)) ++ ++/* ++** lsr_w0_u8_z_tied1: ++** mov (z[0-9]+\.b), w0 ++** movprfx z0\.b, p0/z, z0\.b ++** lsr z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsr_w0_u8_z_tied1, svuint8_t, uint8_t, ++ z0 = svlsr_n_u8_z (p0, z0, x0), ++ z0 = svlsr_z (p0, z0, x0)) ++ ++/* ++** lsr_w0_u8_z_untied: ++** mov (z[0-9]+\.b), w0 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** lsr z0\.b, p0/m, z0\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** lsrr z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (lsr_w0_u8_z_untied, svuint8_t, uint8_t, ++ z0 = svlsr_n_u8_z (p0, z1, x0), ++ z0 = svlsr_z (p0, z1, x0)) ++ ++/* ++** lsr_1_u8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** lsr z0\.b, p0/m, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_1_u8_z_tied1, svuint8_t, ++ z0 = svlsr_n_u8_z (p0, z0, 1), ++ z0 = svlsr_z (p0, z0, 1)) ++ ++/* ++** lsr_1_u8_z_untied: ++** movprfx z0\.b, p0/z, z1\.b ++** lsr z0\.b, p0/m, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_1_u8_z_untied, svuint8_t, ++ z0 = svlsr_n_u8_z (p0, z1, 1), ++ z0 = svlsr_z (p0, z1, 1)) ++ ++/* ++** lsr_7_u8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** lsr z0\.b, p0/m, z0\.b, #7 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_7_u8_z_tied1, svuint8_t, ++ z0 = svlsr_n_u8_z (p0, z0, 7), ++ z0 = svlsr_z (p0, z0, 7)) ++ ++/* ++** lsr_7_u8_z_untied: ++** movprfx z0\.b, p0/z, z1\.b ++** lsr z0\.b, p0/m, z0\.b, #7 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_7_u8_z_untied, svuint8_t, ++ z0 = svlsr_n_u8_z (p0, z1, 7), ++ z0 = svlsr_z (p0, z1, 7)) ++ ++/* ++** lsr_8_u8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** lsr z0\.b, p0/m, z0\.b, #8 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_8_u8_z_tied1, svuint8_t, ++ z0 = svlsr_n_u8_z (p0, z0, 8), ++ z0 = svlsr_z (p0, z0, 8)) ++ ++/* ++** lsr_8_u8_z_untied: ++** movprfx z0\.b, p0/z, z1\.b ++** lsr z0\.b, p0/m, z0\.b, #8 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_8_u8_z_untied, svuint8_t, ++ z0 = svlsr_n_u8_z (p0, z1, 8), ++ z0 = svlsr_z (p0, z1, 8)) ++ ++/* ++** lsr_u8_x_tied1: ++** lsr z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_u8_x_tied1, svuint8_t, ++ z0 = svlsr_u8_x (p0, z0, z1), ++ z0 = svlsr_x (p0, z0, z1)) ++ ++/* ++** lsr_u8_x_tied2: ++** lsrr z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_u8_x_tied2, svuint8_t, ++ z0 = svlsr_u8_x (p0, z1, z0), ++ z0 = svlsr_x (p0, z1, z0)) ++ ++/* ++** lsr_u8_x_untied: ++** ( ++** movprfx z0, z1 ++** lsr z0\.b, p0/m, z0\.b, z2\.b ++** | ++** movprfx z0, z2 ++** lsrr z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_u8_x_untied, svuint8_t, ++ z0 = svlsr_u8_x (p0, z1, z2), ++ z0 = svlsr_x (p0, z1, z2)) ++ ++/* ++** lsr_w0_u8_x_tied1: ++** mov (z[0-9]+\.b), w0 ++** lsr z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsr_w0_u8_x_tied1, svuint8_t, uint8_t, ++ z0 = svlsr_n_u8_x (p0, z0, x0), ++ z0 = svlsr_x (p0, z0, x0)) ++ ++/* ++** lsr_w0_u8_x_untied: ++** mov z0\.b, w0 ++** lsrr z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_ZX (lsr_w0_u8_x_untied, svuint8_t, uint8_t, ++ z0 = svlsr_n_u8_x (p0, z1, x0), ++ z0 = svlsr_x (p0, z1, x0)) ++ ++/* ++** lsr_1_u8_x_tied1: ++** lsr z0\.b, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_1_u8_x_tied1, svuint8_t, ++ z0 = svlsr_n_u8_x (p0, z0, 1), ++ z0 = svlsr_x (p0, z0, 1)) ++ ++/* ++** lsr_1_u8_x_untied: ++** lsr z0\.b, z1\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_1_u8_x_untied, svuint8_t, ++ z0 = svlsr_n_u8_x (p0, z1, 1), ++ z0 = svlsr_x (p0, z1, 1)) ++ ++/* ++** lsr_7_u8_x_tied1: ++** lsr z0\.b, z0\.b, #7 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_7_u8_x_tied1, svuint8_t, ++ z0 = svlsr_n_u8_x (p0, z0, 7), ++ z0 = svlsr_x (p0, z0, 7)) ++ ++/* ++** lsr_7_u8_x_untied: ++** lsr z0\.b, z1\.b, #7 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_7_u8_x_untied, svuint8_t, ++ z0 = svlsr_n_u8_x (p0, z1, 7), ++ z0 = svlsr_x (p0, z1, 7)) ++ ++/* ++** lsr_8_u8_x_tied1: ++** lsr z0\.b, z0\.b, #8 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_8_u8_x_tied1, svuint8_t, ++ z0 = svlsr_n_u8_x (p0, z0, 8), ++ z0 = svlsr_x (p0, z0, 8)) ++ ++/* ++** lsr_8_u8_x_untied: ++** lsr z0\.b, z1\.b, #8 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_8_u8_x_untied, svuint8_t, ++ z0 = svlsr_n_u8_x (p0, z1, 8), ++ z0 = svlsr_x (p0, z1, 8)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsr_wide_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsr_wide_u16.c +new file mode 100644 +index 000000000..863b51a2f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsr_wide_u16.c +@@ -0,0 +1,325 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** lsr_wide_u16_m_tied1: ++** lsr z0\.h, p0/m, z0\.h, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsr_wide_u16_m_tied1, svuint16_t, svuint64_t, ++ z0 = svlsr_wide_u16_m (p0, z0, z4), ++ z0 = svlsr_wide_m (p0, z0, z4)) ++ ++/* ++** lsr_wide_u16_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z4 ++** lsr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (lsr_wide_u16_m_tied2, svuint16_t, svuint64_t, ++ z0_res = svlsr_wide_u16_m (p0, z4, z0), ++ z0_res = svlsr_wide_m (p0, z4, z0)) ++ ++/* ++** lsr_wide_u16_m_untied: ++** movprfx z0, z1 ++** lsr z0\.h, p0/m, z0\.h, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsr_wide_u16_m_untied, svuint16_t, svuint64_t, ++ z0 = svlsr_wide_u16_m (p0, z1, z4), ++ z0 = svlsr_wide_m (p0, z1, z4)) ++ ++/* ++** lsr_wide_x0_u16_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** lsr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsr_wide_x0_u16_m_tied1, svuint16_t, uint64_t, ++ z0 = svlsr_wide_n_u16_m (p0, z0, x0), ++ z0 = svlsr_wide_m (p0, z0, x0)) ++ ++/* ++** lsr_wide_x0_u16_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** lsr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsr_wide_x0_u16_m_untied, svuint16_t, uint64_t, ++ z0 = svlsr_wide_n_u16_m (p0, z1, x0), ++ z0 = svlsr_wide_m (p0, z1, x0)) ++ ++/* ++** lsr_wide_1_u16_m_tied1: ++** lsr z0\.h, p0/m, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_1_u16_m_tied1, svuint16_t, ++ z0 = svlsr_wide_n_u16_m (p0, z0, 1), ++ z0 = svlsr_wide_m (p0, z0, 1)) ++ ++/* ++** lsr_wide_1_u16_m_untied: ++** movprfx z0, z1 ++** lsr z0\.h, p0/m, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_1_u16_m_untied, svuint16_t, ++ z0 = svlsr_wide_n_u16_m (p0, z1, 1), ++ z0 = svlsr_wide_m (p0, z1, 1)) ++ ++/* ++** lsr_wide_15_u16_m_tied1: ++** lsr z0\.h, p0/m, z0\.h, #15 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_15_u16_m_tied1, svuint16_t, ++ z0 = svlsr_wide_n_u16_m (p0, z0, 15), ++ z0 = svlsr_wide_m (p0, z0, 15)) ++ ++/* ++** lsr_wide_15_u16_m_untied: ++** movprfx z0, z1 ++** lsr z0\.h, p0/m, z0\.h, #15 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_15_u16_m_untied, svuint16_t, ++ z0 = svlsr_wide_n_u16_m (p0, z1, 15), ++ z0 = svlsr_wide_m (p0, z1, 15)) ++ ++/* ++** lsr_wide_16_u16_m_tied1: ++** lsr z0\.h, p0/m, z0\.h, #16 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_16_u16_m_tied1, svuint16_t, ++ z0 = svlsr_wide_n_u16_m (p0, z0, 16), ++ z0 = svlsr_wide_m (p0, z0, 16)) ++ ++/* ++** lsr_wide_16_u16_m_untied: ++** movprfx z0, z1 ++** lsr z0\.h, p0/m, z0\.h, #16 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_16_u16_m_untied, svuint16_t, ++ z0 = svlsr_wide_n_u16_m (p0, z1, 16), ++ z0 = svlsr_wide_m (p0, z1, 16)) ++ ++/* ++** lsr_wide_u16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** lsr z0\.h, p0/m, z0\.h, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsr_wide_u16_z_tied1, svuint16_t, svuint64_t, ++ z0 = svlsr_wide_u16_z (p0, z0, z4), ++ z0 = svlsr_wide_z (p0, z0, z4)) ++ ++/* ++** lsr_wide_u16_z_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.h, p0/z, z4\.h ++** lsr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (lsr_wide_u16_z_tied2, svuint16_t, svuint64_t, ++ z0_res = svlsr_wide_u16_z (p0, z4, z0), ++ z0_res = svlsr_wide_z (p0, z4, z0)) ++ ++/* ++** lsr_wide_u16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** lsr z0\.h, p0/m, z0\.h, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsr_wide_u16_z_untied, svuint16_t, svuint64_t, ++ z0 = svlsr_wide_u16_z (p0, z1, z4), ++ z0 = svlsr_wide_z (p0, z1, z4)) ++ ++/* ++** lsr_wide_x0_u16_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.h, p0/z, z0\.h ++** lsr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsr_wide_x0_u16_z_tied1, svuint16_t, uint64_t, ++ z0 = svlsr_wide_n_u16_z (p0, z0, x0), ++ z0 = svlsr_wide_z (p0, z0, x0)) ++ ++/* ++** lsr_wide_x0_u16_z_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.h, p0/z, z1\.h ++** lsr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsr_wide_x0_u16_z_untied, svuint16_t, uint64_t, ++ z0 = svlsr_wide_n_u16_z (p0, z1, x0), ++ z0 = svlsr_wide_z (p0, z1, x0)) ++ ++/* ++** lsr_wide_1_u16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** lsr z0\.h, p0/m, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_1_u16_z_tied1, svuint16_t, ++ z0 = svlsr_wide_n_u16_z (p0, z0, 1), ++ z0 = svlsr_wide_z (p0, z0, 1)) ++ ++/* ++** lsr_wide_1_u16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** lsr z0\.h, p0/m, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_1_u16_z_untied, svuint16_t, ++ z0 = svlsr_wide_n_u16_z (p0, z1, 1), ++ z0 = svlsr_wide_z (p0, z1, 1)) ++ ++/* ++** lsr_wide_15_u16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** lsr z0\.h, p0/m, z0\.h, #15 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_15_u16_z_tied1, svuint16_t, ++ z0 = svlsr_wide_n_u16_z (p0, z0, 15), ++ z0 = svlsr_wide_z (p0, z0, 15)) ++ ++/* ++** lsr_wide_15_u16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** lsr z0\.h, p0/m, z0\.h, #15 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_15_u16_z_untied, svuint16_t, ++ z0 = svlsr_wide_n_u16_z (p0, z1, 15), ++ z0 = svlsr_wide_z (p0, z1, 15)) ++ ++/* ++** lsr_wide_16_u16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** lsr z0\.h, p0/m, z0\.h, #16 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_16_u16_z_tied1, svuint16_t, ++ z0 = svlsr_wide_n_u16_z (p0, z0, 16), ++ z0 = svlsr_wide_z (p0, z0, 16)) ++ ++/* ++** lsr_wide_16_u16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** lsr z0\.h, p0/m, z0\.h, #16 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_16_u16_z_untied, svuint16_t, ++ z0 = svlsr_wide_n_u16_z (p0, z1, 16), ++ z0 = svlsr_wide_z (p0, z1, 16)) ++ ++/* ++** lsr_wide_u16_x_tied1: ++** lsr z0\.h, z0\.h, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsr_wide_u16_x_tied1, svuint16_t, svuint64_t, ++ z0 = svlsr_wide_u16_x (p0, z0, z4), ++ z0 = svlsr_wide_x (p0, z0, z4)) ++ ++/* ++** lsr_wide_u16_x_tied2: ++** lsr z0\.h, z4\.h, z0\.d ++** ret ++*/ ++TEST_DUAL_Z_REV (lsr_wide_u16_x_tied2, svuint16_t, svuint64_t, ++ z0_res = svlsr_wide_u16_x (p0, z4, z0), ++ z0_res = svlsr_wide_x (p0, z4, z0)) ++ ++/* ++** lsr_wide_u16_x_untied: ++** lsr z0\.h, z1\.h, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsr_wide_u16_x_untied, svuint16_t, svuint64_t, ++ z0 = svlsr_wide_u16_x (p0, z1, z4), ++ z0 = svlsr_wide_x (p0, z1, z4)) ++ ++/* ++** lsr_wide_x0_u16_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** lsr z0\.h, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsr_wide_x0_u16_x_tied1, svuint16_t, uint64_t, ++ z0 = svlsr_wide_n_u16_x (p0, z0, x0), ++ z0 = svlsr_wide_x (p0, z0, x0)) ++ ++/* ++** lsr_wide_x0_u16_x_untied: ++** mov (z[0-9]+\.d), x0 ++** lsr z0\.h, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsr_wide_x0_u16_x_untied, svuint16_t, uint64_t, ++ z0 = svlsr_wide_n_u16_x (p0, z1, x0), ++ z0 = svlsr_wide_x (p0, z1, x0)) ++ ++/* ++** lsr_wide_1_u16_x_tied1: ++** lsr z0\.h, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_1_u16_x_tied1, svuint16_t, ++ z0 = svlsr_wide_n_u16_x (p0, z0, 1), ++ z0 = svlsr_wide_x (p0, z0, 1)) ++ ++/* ++** lsr_wide_1_u16_x_untied: ++** lsr z0\.h, z1\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_1_u16_x_untied, svuint16_t, ++ z0 = svlsr_wide_n_u16_x (p0, z1, 1), ++ z0 = svlsr_wide_x (p0, z1, 1)) ++ ++/* ++** lsr_wide_15_u16_x_tied1: ++** lsr z0\.h, z0\.h, #15 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_15_u16_x_tied1, svuint16_t, ++ z0 = svlsr_wide_n_u16_x (p0, z0, 15), ++ z0 = svlsr_wide_x (p0, z0, 15)) ++ ++/* ++** lsr_wide_15_u16_x_untied: ++** lsr z0\.h, z1\.h, #15 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_15_u16_x_untied, svuint16_t, ++ z0 = svlsr_wide_n_u16_x (p0, z1, 15), ++ z0 = svlsr_wide_x (p0, z1, 15)) ++ ++/* ++** lsr_wide_16_u16_x_tied1: ++** lsr z0\.h, z0\.h, #16 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_16_u16_x_tied1, svuint16_t, ++ z0 = svlsr_wide_n_u16_x (p0, z0, 16), ++ z0 = svlsr_wide_x (p0, z0, 16)) ++ ++/* ++** lsr_wide_16_u16_x_untied: ++** lsr z0\.h, z1\.h, #16 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_16_u16_x_untied, svuint16_t, ++ z0 = svlsr_wide_n_u16_x (p0, z1, 16), ++ z0 = svlsr_wide_x (p0, z1, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsr_wide_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsr_wide_u32.c +new file mode 100644 +index 000000000..73c2cf86e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsr_wide_u32.c +@@ -0,0 +1,325 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** lsr_wide_u32_m_tied1: ++** lsr z0\.s, p0/m, z0\.s, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsr_wide_u32_m_tied1, svuint32_t, svuint64_t, ++ z0 = svlsr_wide_u32_m (p0, z0, z4), ++ z0 = svlsr_wide_m (p0, z0, z4)) ++ ++/* ++** lsr_wide_u32_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z4 ++** lsr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (lsr_wide_u32_m_tied2, svuint32_t, svuint64_t, ++ z0_res = svlsr_wide_u32_m (p0, z4, z0), ++ z0_res = svlsr_wide_m (p0, z4, z0)) ++ ++/* ++** lsr_wide_u32_m_untied: ++** movprfx z0, z1 ++** lsr z0\.s, p0/m, z0\.s, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsr_wide_u32_m_untied, svuint32_t, svuint64_t, ++ z0 = svlsr_wide_u32_m (p0, z1, z4), ++ z0 = svlsr_wide_m (p0, z1, z4)) ++ ++/* ++** lsr_wide_x0_u32_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** lsr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsr_wide_x0_u32_m_tied1, svuint32_t, uint64_t, ++ z0 = svlsr_wide_n_u32_m (p0, z0, x0), ++ z0 = svlsr_wide_m (p0, z0, x0)) ++ ++/* ++** lsr_wide_x0_u32_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** lsr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsr_wide_x0_u32_m_untied, svuint32_t, uint64_t, ++ z0 = svlsr_wide_n_u32_m (p0, z1, x0), ++ z0 = svlsr_wide_m (p0, z1, x0)) ++ ++/* ++** lsr_wide_1_u32_m_tied1: ++** lsr z0\.s, p0/m, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_1_u32_m_tied1, svuint32_t, ++ z0 = svlsr_wide_n_u32_m (p0, z0, 1), ++ z0 = svlsr_wide_m (p0, z0, 1)) ++ ++/* ++** lsr_wide_1_u32_m_untied: ++** movprfx z0, z1 ++** lsr z0\.s, p0/m, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_1_u32_m_untied, svuint32_t, ++ z0 = svlsr_wide_n_u32_m (p0, z1, 1), ++ z0 = svlsr_wide_m (p0, z1, 1)) ++ ++/* ++** lsr_wide_31_u32_m_tied1: ++** lsr z0\.s, p0/m, z0\.s, #31 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_31_u32_m_tied1, svuint32_t, ++ z0 = svlsr_wide_n_u32_m (p0, z0, 31), ++ z0 = svlsr_wide_m (p0, z0, 31)) ++ ++/* ++** lsr_wide_31_u32_m_untied: ++** movprfx z0, z1 ++** lsr z0\.s, p0/m, z0\.s, #31 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_31_u32_m_untied, svuint32_t, ++ z0 = svlsr_wide_n_u32_m (p0, z1, 31), ++ z0 = svlsr_wide_m (p0, z1, 31)) ++ ++/* ++** lsr_wide_32_u32_m_tied1: ++** lsr z0\.s, p0/m, z0\.s, #32 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_32_u32_m_tied1, svuint32_t, ++ z0 = svlsr_wide_n_u32_m (p0, z0, 32), ++ z0 = svlsr_wide_m (p0, z0, 32)) ++ ++/* ++** lsr_wide_32_u32_m_untied: ++** movprfx z0, z1 ++** lsr z0\.s, p0/m, z0\.s, #32 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_32_u32_m_untied, svuint32_t, ++ z0 = svlsr_wide_n_u32_m (p0, z1, 32), ++ z0 = svlsr_wide_m (p0, z1, 32)) ++ ++/* ++** lsr_wide_u32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** lsr z0\.s, p0/m, z0\.s, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsr_wide_u32_z_tied1, svuint32_t, svuint64_t, ++ z0 = svlsr_wide_u32_z (p0, z0, z4), ++ z0 = svlsr_wide_z (p0, z0, z4)) ++ ++/* ++** lsr_wide_u32_z_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.s, p0/z, z4\.s ++** lsr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (lsr_wide_u32_z_tied2, svuint32_t, svuint64_t, ++ z0_res = svlsr_wide_u32_z (p0, z4, z0), ++ z0_res = svlsr_wide_z (p0, z4, z0)) ++ ++/* ++** lsr_wide_u32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** lsr z0\.s, p0/m, z0\.s, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsr_wide_u32_z_untied, svuint32_t, svuint64_t, ++ z0 = svlsr_wide_u32_z (p0, z1, z4), ++ z0 = svlsr_wide_z (p0, z1, z4)) ++ ++/* ++** lsr_wide_x0_u32_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.s, p0/z, z0\.s ++** lsr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsr_wide_x0_u32_z_tied1, svuint32_t, uint64_t, ++ z0 = svlsr_wide_n_u32_z (p0, z0, x0), ++ z0 = svlsr_wide_z (p0, z0, x0)) ++ ++/* ++** lsr_wide_x0_u32_z_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.s, p0/z, z1\.s ++** lsr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsr_wide_x0_u32_z_untied, svuint32_t, uint64_t, ++ z0 = svlsr_wide_n_u32_z (p0, z1, x0), ++ z0 = svlsr_wide_z (p0, z1, x0)) ++ ++/* ++** lsr_wide_1_u32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** lsr z0\.s, p0/m, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_1_u32_z_tied1, svuint32_t, ++ z0 = svlsr_wide_n_u32_z (p0, z0, 1), ++ z0 = svlsr_wide_z (p0, z0, 1)) ++ ++/* ++** lsr_wide_1_u32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** lsr z0\.s, p0/m, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_1_u32_z_untied, svuint32_t, ++ z0 = svlsr_wide_n_u32_z (p0, z1, 1), ++ z0 = svlsr_wide_z (p0, z1, 1)) ++ ++/* ++** lsr_wide_31_u32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** lsr z0\.s, p0/m, z0\.s, #31 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_31_u32_z_tied1, svuint32_t, ++ z0 = svlsr_wide_n_u32_z (p0, z0, 31), ++ z0 = svlsr_wide_z (p0, z0, 31)) ++ ++/* ++** lsr_wide_31_u32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** lsr z0\.s, p0/m, z0\.s, #31 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_31_u32_z_untied, svuint32_t, ++ z0 = svlsr_wide_n_u32_z (p0, z1, 31), ++ z0 = svlsr_wide_z (p0, z1, 31)) ++ ++/* ++** lsr_wide_32_u32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** lsr z0\.s, p0/m, z0\.s, #32 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_32_u32_z_tied1, svuint32_t, ++ z0 = svlsr_wide_n_u32_z (p0, z0, 32), ++ z0 = svlsr_wide_z (p0, z0, 32)) ++ ++/* ++** lsr_wide_32_u32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** lsr z0\.s, p0/m, z0\.s, #32 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_32_u32_z_untied, svuint32_t, ++ z0 = svlsr_wide_n_u32_z (p0, z1, 32), ++ z0 = svlsr_wide_z (p0, z1, 32)) ++ ++/* ++** lsr_wide_u32_x_tied1: ++** lsr z0\.s, z0\.s, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsr_wide_u32_x_tied1, svuint32_t, svuint64_t, ++ z0 = svlsr_wide_u32_x (p0, z0, z4), ++ z0 = svlsr_wide_x (p0, z0, z4)) ++ ++/* ++** lsr_wide_u32_x_tied2: ++** lsr z0\.s, z4\.s, z0\.d ++** ret ++*/ ++TEST_DUAL_Z_REV (lsr_wide_u32_x_tied2, svuint32_t, svuint64_t, ++ z0_res = svlsr_wide_u32_x (p0, z4, z0), ++ z0_res = svlsr_wide_x (p0, z4, z0)) ++ ++/* ++** lsr_wide_u32_x_untied: ++** lsr z0\.s, z1\.s, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsr_wide_u32_x_untied, svuint32_t, svuint64_t, ++ z0 = svlsr_wide_u32_x (p0, z1, z4), ++ z0 = svlsr_wide_x (p0, z1, z4)) ++ ++/* ++** lsr_wide_x0_u32_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** lsr z0\.s, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsr_wide_x0_u32_x_tied1, svuint32_t, uint64_t, ++ z0 = svlsr_wide_n_u32_x (p0, z0, x0), ++ z0 = svlsr_wide_x (p0, z0, x0)) ++ ++/* ++** lsr_wide_x0_u32_x_untied: ++** mov (z[0-9]+\.d), x0 ++** lsr z0\.s, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsr_wide_x0_u32_x_untied, svuint32_t, uint64_t, ++ z0 = svlsr_wide_n_u32_x (p0, z1, x0), ++ z0 = svlsr_wide_x (p0, z1, x0)) ++ ++/* ++** lsr_wide_1_u32_x_tied1: ++** lsr z0\.s, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_1_u32_x_tied1, svuint32_t, ++ z0 = svlsr_wide_n_u32_x (p0, z0, 1), ++ z0 = svlsr_wide_x (p0, z0, 1)) ++ ++/* ++** lsr_wide_1_u32_x_untied: ++** lsr z0\.s, z1\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_1_u32_x_untied, svuint32_t, ++ z0 = svlsr_wide_n_u32_x (p0, z1, 1), ++ z0 = svlsr_wide_x (p0, z1, 1)) ++ ++/* ++** lsr_wide_31_u32_x_tied1: ++** lsr z0\.s, z0\.s, #31 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_31_u32_x_tied1, svuint32_t, ++ z0 = svlsr_wide_n_u32_x (p0, z0, 31), ++ z0 = svlsr_wide_x (p0, z0, 31)) ++ ++/* ++** lsr_wide_31_u32_x_untied: ++** lsr z0\.s, z1\.s, #31 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_31_u32_x_untied, svuint32_t, ++ z0 = svlsr_wide_n_u32_x (p0, z1, 31), ++ z0 = svlsr_wide_x (p0, z1, 31)) ++ ++/* ++** lsr_wide_32_u32_x_tied1: ++** lsr z0\.s, z0\.s, #32 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_32_u32_x_tied1, svuint32_t, ++ z0 = svlsr_wide_n_u32_x (p0, z0, 32), ++ z0 = svlsr_wide_x (p0, z0, 32)) ++ ++/* ++** lsr_wide_32_u32_x_untied: ++** lsr z0\.s, z1\.s, #32 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_32_u32_x_untied, svuint32_t, ++ z0 = svlsr_wide_n_u32_x (p0, z1, 32), ++ z0 = svlsr_wide_x (p0, z1, 32)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsr_wide_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsr_wide_u8.c +new file mode 100644 +index 000000000..fe44eabda +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsr_wide_u8.c +@@ -0,0 +1,325 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** lsr_wide_u8_m_tied1: ++** lsr z0\.b, p0/m, z0\.b, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsr_wide_u8_m_tied1, svuint8_t, svuint64_t, ++ z0 = svlsr_wide_u8_m (p0, z0, z4), ++ z0 = svlsr_wide_m (p0, z0, z4)) ++ ++/* ++** lsr_wide_u8_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z4 ++** lsr z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (lsr_wide_u8_m_tied2, svuint8_t, svuint64_t, ++ z0_res = svlsr_wide_u8_m (p0, z4, z0), ++ z0_res = svlsr_wide_m (p0, z4, z0)) ++ ++/* ++** lsr_wide_u8_m_untied: ++** movprfx z0, z1 ++** lsr z0\.b, p0/m, z0\.b, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsr_wide_u8_m_untied, svuint8_t, svuint64_t, ++ z0 = svlsr_wide_u8_m (p0, z1, z4), ++ z0 = svlsr_wide_m (p0, z1, z4)) ++ ++/* ++** lsr_wide_x0_u8_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** lsr z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsr_wide_x0_u8_m_tied1, svuint8_t, uint64_t, ++ z0 = svlsr_wide_n_u8_m (p0, z0, x0), ++ z0 = svlsr_wide_m (p0, z0, x0)) ++ ++/* ++** lsr_wide_x0_u8_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** lsr z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsr_wide_x0_u8_m_untied, svuint8_t, uint64_t, ++ z0 = svlsr_wide_n_u8_m (p0, z1, x0), ++ z0 = svlsr_wide_m (p0, z1, x0)) ++ ++/* ++** lsr_wide_1_u8_m_tied1: ++** lsr z0\.b, p0/m, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_1_u8_m_tied1, svuint8_t, ++ z0 = svlsr_wide_n_u8_m (p0, z0, 1), ++ z0 = svlsr_wide_m (p0, z0, 1)) ++ ++/* ++** lsr_wide_1_u8_m_untied: ++** movprfx z0, z1 ++** lsr z0\.b, p0/m, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_1_u8_m_untied, svuint8_t, ++ z0 = svlsr_wide_n_u8_m (p0, z1, 1), ++ z0 = svlsr_wide_m (p0, z1, 1)) ++ ++/* ++** lsr_wide_7_u8_m_tied1: ++** lsr z0\.b, p0/m, z0\.b, #7 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_7_u8_m_tied1, svuint8_t, ++ z0 = svlsr_wide_n_u8_m (p0, z0, 7), ++ z0 = svlsr_wide_m (p0, z0, 7)) ++ ++/* ++** lsr_wide_7_u8_m_untied: ++** movprfx z0, z1 ++** lsr z0\.b, p0/m, z0\.b, #7 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_7_u8_m_untied, svuint8_t, ++ z0 = svlsr_wide_n_u8_m (p0, z1, 7), ++ z0 = svlsr_wide_m (p0, z1, 7)) ++ ++/* ++** lsr_wide_8_u8_m_tied1: ++** lsr z0\.b, p0/m, z0\.b, #8 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_8_u8_m_tied1, svuint8_t, ++ z0 = svlsr_wide_n_u8_m (p0, z0, 8), ++ z0 = svlsr_wide_m (p0, z0, 8)) ++ ++/* ++** lsr_wide_8_u8_m_untied: ++** movprfx z0, z1 ++** lsr z0\.b, p0/m, z0\.b, #8 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_8_u8_m_untied, svuint8_t, ++ z0 = svlsr_wide_n_u8_m (p0, z1, 8), ++ z0 = svlsr_wide_m (p0, z1, 8)) ++ ++/* ++** lsr_wide_u8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** lsr z0\.b, p0/m, z0\.b, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsr_wide_u8_z_tied1, svuint8_t, svuint64_t, ++ z0 = svlsr_wide_u8_z (p0, z0, z4), ++ z0 = svlsr_wide_z (p0, z0, z4)) ++ ++/* ++** lsr_wide_u8_z_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.b, p0/z, z4\.b ++** lsr z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (lsr_wide_u8_z_tied2, svuint8_t, svuint64_t, ++ z0_res = svlsr_wide_u8_z (p0, z4, z0), ++ z0_res = svlsr_wide_z (p0, z4, z0)) ++ ++/* ++** lsr_wide_u8_z_untied: ++** movprfx z0\.b, p0/z, z1\.b ++** lsr z0\.b, p0/m, z0\.b, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsr_wide_u8_z_untied, svuint8_t, svuint64_t, ++ z0 = svlsr_wide_u8_z (p0, z1, z4), ++ z0 = svlsr_wide_z (p0, z1, z4)) ++ ++/* ++** lsr_wide_x0_u8_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.b, p0/z, z0\.b ++** lsr z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsr_wide_x0_u8_z_tied1, svuint8_t, uint64_t, ++ z0 = svlsr_wide_n_u8_z (p0, z0, x0), ++ z0 = svlsr_wide_z (p0, z0, x0)) ++ ++/* ++** lsr_wide_x0_u8_z_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.b, p0/z, z1\.b ++** lsr z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsr_wide_x0_u8_z_untied, svuint8_t, uint64_t, ++ z0 = svlsr_wide_n_u8_z (p0, z1, x0), ++ z0 = svlsr_wide_z (p0, z1, x0)) ++ ++/* ++** lsr_wide_1_u8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** lsr z0\.b, p0/m, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_1_u8_z_tied1, svuint8_t, ++ z0 = svlsr_wide_n_u8_z (p0, z0, 1), ++ z0 = svlsr_wide_z (p0, z0, 1)) ++ ++/* ++** lsr_wide_1_u8_z_untied: ++** movprfx z0\.b, p0/z, z1\.b ++** lsr z0\.b, p0/m, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_1_u8_z_untied, svuint8_t, ++ z0 = svlsr_wide_n_u8_z (p0, z1, 1), ++ z0 = svlsr_wide_z (p0, z1, 1)) ++ ++/* ++** lsr_wide_7_u8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** lsr z0\.b, p0/m, z0\.b, #7 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_7_u8_z_tied1, svuint8_t, ++ z0 = svlsr_wide_n_u8_z (p0, z0, 7), ++ z0 = svlsr_wide_z (p0, z0, 7)) ++ ++/* ++** lsr_wide_7_u8_z_untied: ++** movprfx z0\.b, p0/z, z1\.b ++** lsr z0\.b, p0/m, z0\.b, #7 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_7_u8_z_untied, svuint8_t, ++ z0 = svlsr_wide_n_u8_z (p0, z1, 7), ++ z0 = svlsr_wide_z (p0, z1, 7)) ++ ++/* ++** lsr_wide_8_u8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** lsr z0\.b, p0/m, z0\.b, #8 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_8_u8_z_tied1, svuint8_t, ++ z0 = svlsr_wide_n_u8_z (p0, z0, 8), ++ z0 = svlsr_wide_z (p0, z0, 8)) ++ ++/* ++** lsr_wide_8_u8_z_untied: ++** movprfx z0\.b, p0/z, z1\.b ++** lsr z0\.b, p0/m, z0\.b, #8 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_8_u8_z_untied, svuint8_t, ++ z0 = svlsr_wide_n_u8_z (p0, z1, 8), ++ z0 = svlsr_wide_z (p0, z1, 8)) ++ ++/* ++** lsr_wide_u8_x_tied1: ++** lsr z0\.b, z0\.b, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsr_wide_u8_x_tied1, svuint8_t, svuint64_t, ++ z0 = svlsr_wide_u8_x (p0, z0, z4), ++ z0 = svlsr_wide_x (p0, z0, z4)) ++ ++/* ++** lsr_wide_u8_x_tied2: ++** lsr z0\.b, z4\.b, z0\.d ++** ret ++*/ ++TEST_DUAL_Z_REV (lsr_wide_u8_x_tied2, svuint8_t, svuint64_t, ++ z0_res = svlsr_wide_u8_x (p0, z4, z0), ++ z0_res = svlsr_wide_x (p0, z4, z0)) ++ ++/* ++** lsr_wide_u8_x_untied: ++** lsr z0\.b, z1\.b, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (lsr_wide_u8_x_untied, svuint8_t, svuint64_t, ++ z0 = svlsr_wide_u8_x (p0, z1, z4), ++ z0 = svlsr_wide_x (p0, z1, z4)) ++ ++/* ++** lsr_wide_x0_u8_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** lsr z0\.b, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsr_wide_x0_u8_x_tied1, svuint8_t, uint64_t, ++ z0 = svlsr_wide_n_u8_x (p0, z0, x0), ++ z0 = svlsr_wide_x (p0, z0, x0)) ++ ++/* ++** lsr_wide_x0_u8_x_untied: ++** mov (z[0-9]+\.d), x0 ++** lsr z0\.b, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (lsr_wide_x0_u8_x_untied, svuint8_t, uint64_t, ++ z0 = svlsr_wide_n_u8_x (p0, z1, x0), ++ z0 = svlsr_wide_x (p0, z1, x0)) ++ ++/* ++** lsr_wide_1_u8_x_tied1: ++** lsr z0\.b, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_1_u8_x_tied1, svuint8_t, ++ z0 = svlsr_wide_n_u8_x (p0, z0, 1), ++ z0 = svlsr_wide_x (p0, z0, 1)) ++ ++/* ++** lsr_wide_1_u8_x_untied: ++** lsr z0\.b, z1\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_1_u8_x_untied, svuint8_t, ++ z0 = svlsr_wide_n_u8_x (p0, z1, 1), ++ z0 = svlsr_wide_x (p0, z1, 1)) ++ ++/* ++** lsr_wide_7_u8_x_tied1: ++** lsr z0\.b, z0\.b, #7 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_7_u8_x_tied1, svuint8_t, ++ z0 = svlsr_wide_n_u8_x (p0, z0, 7), ++ z0 = svlsr_wide_x (p0, z0, 7)) ++ ++/* ++** lsr_wide_7_u8_x_untied: ++** lsr z0\.b, z1\.b, #7 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_7_u8_x_untied, svuint8_t, ++ z0 = svlsr_wide_n_u8_x (p0, z1, 7), ++ z0 = svlsr_wide_x (p0, z1, 7)) ++ ++/* ++** lsr_wide_8_u8_x_tied1: ++** lsr z0\.b, z0\.b, #8 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_8_u8_x_tied1, svuint8_t, ++ z0 = svlsr_wide_n_u8_x (p0, z0, 8), ++ z0 = svlsr_wide_x (p0, z0, 8)) ++ ++/* ++** lsr_wide_8_u8_x_untied: ++** lsr z0\.b, z1\.b, #8 ++** ret ++*/ ++TEST_UNIFORM_Z (lsr_wide_8_u8_x_untied, svuint8_t, ++ z0 = svlsr_wide_n_u8_x (p0, z1, 8), ++ z0 = svlsr_wide_x (p0, z1, 8)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_f16.c +new file mode 100644 +index 000000000..7656f9e54 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_f16.c +@@ -0,0 +1,398 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mad_f16_m_tied1: ++** fmad z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mad_f16_m_tied1, svfloat16_t, ++ z0 = svmad_f16_m (p0, z0, z1, z2), ++ z0 = svmad_m (p0, z0, z1, z2)) ++ ++/* ++** mad_f16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fmad z0\.h, p0/m, \1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mad_f16_m_tied2, svfloat16_t, ++ z0 = svmad_f16_m (p0, z1, z0, z2), ++ z0 = svmad_m (p0, z1, z0, z2)) ++ ++/* ++** mad_f16_m_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fmad z0\.h, p0/m, z2\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mad_f16_m_tied3, svfloat16_t, ++ z0 = svmad_f16_m (p0, z1, z2, z0), ++ z0 = svmad_m (p0, z1, z2, z0)) ++ ++/* ++** mad_f16_m_untied: ++** movprfx z0, z1 ++** fmad z0\.h, p0/m, z2\.h, z3\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mad_f16_m_untied, svfloat16_t, ++ z0 = svmad_f16_m (p0, z1, z2, z3), ++ z0 = svmad_m (p0, z1, z2, z3)) ++ ++/* ++** mad_h4_f16_m_tied1: ++** mov (z[0-9]+\.h), h4 ++** fmad z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mad_h4_f16_m_tied1, svfloat16_t, __fp16, ++ z0 = svmad_n_f16_m (p0, z0, z1, d4), ++ z0 = svmad_m (p0, z0, z1, d4)) ++ ++/* ++** mad_h4_f16_m_untied: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0, z1 ++** fmad z0\.h, p0/m, z2\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mad_h4_f16_m_untied, svfloat16_t, __fp16, ++ z0 = svmad_n_f16_m (p0, z1, z2, d4), ++ z0 = svmad_m (p0, z1, z2, d4)) ++ ++/* ++** mad_2_f16_m_tied1: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** fmad z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_2_f16_m_tied1, svfloat16_t, ++ z0 = svmad_n_f16_m (p0, z0, z1, 2), ++ z0 = svmad_m (p0, z0, z1, 2)) ++ ++/* ++** mad_2_f16_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** movprfx z0, z1 ++** fmad z0\.h, p0/m, z2\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_2_f16_m_untied, svfloat16_t, ++ z0 = svmad_n_f16_m (p0, z1, z2, 2), ++ z0 = svmad_m (p0, z1, z2, 2)) ++ ++/* ++** mad_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fmad z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mad_f16_z_tied1, svfloat16_t, ++ z0 = svmad_f16_z (p0, z0, z1, z2), ++ z0 = svmad_z (p0, z0, z1, z2)) ++ ++/* ++** mad_f16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** fmad z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mad_f16_z_tied2, svfloat16_t, ++ z0 = svmad_f16_z (p0, z1, z0, z2), ++ z0 = svmad_z (p0, z1, z0, z2)) ++ ++/* ++** mad_f16_z_tied3: ++** movprfx z0\.h, p0/z, z0\.h ++** fmla z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mad_f16_z_tied3, svfloat16_t, ++ z0 = svmad_f16_z (p0, z1, z2, z0), ++ z0 = svmad_z (p0, z1, z2, z0)) ++ ++/* ++** mad_f16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fmad z0\.h, p0/m, z2\.h, z3\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** fmad z0\.h, p0/m, z1\.h, z3\.h ++** | ++** movprfx z0\.h, p0/z, z3\.h ++** fmla z0\.h, p0/m, z1\.h, z2\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mad_f16_z_untied, svfloat16_t, ++ z0 = svmad_f16_z (p0, z1, z2, z3), ++ z0 = svmad_z (p0, z1, z2, z3)) ++ ++/* ++** mad_h4_f16_z_tied1: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0\.h, p0/z, z0\.h ++** fmad z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mad_h4_f16_z_tied1, svfloat16_t, __fp16, ++ z0 = svmad_n_f16_z (p0, z0, z1, d4), ++ z0 = svmad_z (p0, z0, z1, d4)) ++ ++/* ++** mad_h4_f16_z_tied2: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0\.h, p0/z, z0\.h ++** fmad z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mad_h4_f16_z_tied2, svfloat16_t, __fp16, ++ z0 = svmad_n_f16_z (p0, z1, z0, d4), ++ z0 = svmad_z (p0, z1, z0, d4)) ++ ++/* ++** mad_h4_f16_z_untied: ++** mov (z[0-9]+\.h), h4 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fmad z0\.h, p0/m, z2\.h, \1 ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** fmad z0\.h, p0/m, z1\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** fmla z0\.h, p0/m, z1\.h, z2\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (mad_h4_f16_z_untied, svfloat16_t, __fp16, ++ z0 = svmad_n_f16_z (p0, z1, z2, d4), ++ z0 = svmad_z (p0, z1, z2, d4)) ++ ++/* ++** mad_2_f16_z_tied1: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** movprfx z0\.h, p0/z, z0\.h ++** fmad z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_2_f16_z_tied1, svfloat16_t, ++ z0 = svmad_n_f16_z (p0, z0, z1, 2), ++ z0 = svmad_z (p0, z0, z1, 2)) ++ ++/* ++** mad_2_f16_z_tied2: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** movprfx z0\.h, p0/z, z0\.h ++** fmad z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_2_f16_z_tied2, svfloat16_t, ++ z0 = svmad_n_f16_z (p0, z1, z0, 2), ++ z0 = svmad_z (p0, z1, z0, 2)) ++ ++/* ++** mad_2_f16_z_untied: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fmad z0\.h, p0/m, z2\.h, \1 ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** fmad z0\.h, p0/m, z1\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** fmla z0\.h, p0/m, z1\.h, z2\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mad_2_f16_z_untied, svfloat16_t, ++ z0 = svmad_n_f16_z (p0, z1, z2, 2), ++ z0 = svmad_z (p0, z1, z2, 2)) ++ ++/* ++** mad_f16_x_tied1: ++** fmad z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mad_f16_x_tied1, svfloat16_t, ++ z0 = svmad_f16_x (p0, z0, z1, z2), ++ z0 = svmad_x (p0, z0, z1, z2)) ++ ++/* ++** mad_f16_x_tied2: ++** fmad z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mad_f16_x_tied2, svfloat16_t, ++ z0 = svmad_f16_x (p0, z1, z0, z2), ++ z0 = svmad_x (p0, z1, z0, z2)) ++ ++/* ++** mad_f16_x_tied3: ++** fmla z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mad_f16_x_tied3, svfloat16_t, ++ z0 = svmad_f16_x (p0, z1, z2, z0), ++ z0 = svmad_x (p0, z1, z2, z0)) ++ ++/* ++** mad_f16_x_untied: ++** ( ++** movprfx z0, z1 ++** fmad z0\.h, p0/m, z2\.h, z3\.h ++** | ++** movprfx z0, z2 ++** fmad z0\.h, p0/m, z1\.h, z3\.h ++** | ++** movprfx z0, z3 ++** fmla z0\.h, p0/m, z1\.h, z2\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mad_f16_x_untied, svfloat16_t, ++ z0 = svmad_f16_x (p0, z1, z2, z3), ++ z0 = svmad_x (p0, z1, z2, z3)) ++ ++/* ++** mad_h4_f16_x_tied1: ++** mov (z[0-9]+\.h), h4 ++** fmad z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mad_h4_f16_x_tied1, svfloat16_t, __fp16, ++ z0 = svmad_n_f16_x (p0, z0, z1, d4), ++ z0 = svmad_x (p0, z0, z1, d4)) ++ ++/* ++** mad_h4_f16_x_tied2: ++** mov (z[0-9]+\.h), h4 ++** fmad z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mad_h4_f16_x_tied2, svfloat16_t, __fp16, ++ z0 = svmad_n_f16_x (p0, z1, z0, d4), ++ z0 = svmad_x (p0, z1, z0, d4)) ++ ++/* ++** mad_h4_f16_x_untied: { xfail *-*-* } ++** mov z0\.h, h4 ++** fmla z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_ZD (mad_h4_f16_x_untied, svfloat16_t, __fp16, ++ z0 = svmad_n_f16_x (p0, z1, z2, d4), ++ z0 = svmad_x (p0, z1, z2, d4)) ++ ++/* ++** mad_2_f16_x_tied1: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** fmad z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_2_f16_x_tied1, svfloat16_t, ++ z0 = svmad_n_f16_x (p0, z0, z1, 2), ++ z0 = svmad_x (p0, z0, z1, 2)) ++ ++/* ++** mad_2_f16_x_tied2: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** fmad z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_2_f16_x_tied2, svfloat16_t, ++ z0 = svmad_n_f16_x (p0, z1, z0, 2), ++ z0 = svmad_x (p0, z1, z0, 2)) ++ ++/* ++** mad_2_f16_x_untied: ++** fmov z0\.h, #2\.0(?:e\+0)? ++** fmla z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mad_2_f16_x_untied, svfloat16_t, ++ z0 = svmad_n_f16_x (p0, z1, z2, 2), ++ z0 = svmad_x (p0, z1, z2, 2)) ++ ++/* ++** ptrue_mad_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mad_f16_x_tied1, svfloat16_t, ++ z0 = svmad_f16_x (svptrue_b16 (), z0, z1, z2), ++ z0 = svmad_x (svptrue_b16 (), z0, z1, z2)) ++ ++/* ++** ptrue_mad_f16_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mad_f16_x_tied2, svfloat16_t, ++ z0 = svmad_f16_x (svptrue_b16 (), z1, z0, z2), ++ z0 = svmad_x (svptrue_b16 (), z1, z0, z2)) ++ ++/* ++** ptrue_mad_f16_x_tied3: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mad_f16_x_tied3, svfloat16_t, ++ z0 = svmad_f16_x (svptrue_b16 (), z1, z2, z0), ++ z0 = svmad_x (svptrue_b16 (), z1, z2, z0)) ++ ++/* ++** ptrue_mad_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mad_f16_x_untied, svfloat16_t, ++ z0 = svmad_f16_x (svptrue_b16 (), z1, z2, z3), ++ z0 = svmad_x (svptrue_b16 (), z1, z2, z3)) ++ ++/* ++** ptrue_mad_2_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mad_2_f16_x_tied1, svfloat16_t, ++ z0 = svmad_n_f16_x (svptrue_b16 (), z0, z1, 2), ++ z0 = svmad_x (svptrue_b16 (), z0, z1, 2)) ++ ++/* ++** ptrue_mad_2_f16_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mad_2_f16_x_tied2, svfloat16_t, ++ z0 = svmad_n_f16_x (svptrue_b16 (), z1, z0, 2), ++ z0 = svmad_x (svptrue_b16 (), z1, z0, 2)) ++ ++/* ++** ptrue_mad_2_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mad_2_f16_x_untied, svfloat16_t, ++ z0 = svmad_n_f16_x (svptrue_b16 (), z1, z2, 2), ++ z0 = svmad_x (svptrue_b16 (), z1, z2, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_f32.c +new file mode 100644 +index 000000000..dbdd2b9d1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_f32.c +@@ -0,0 +1,398 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mad_f32_m_tied1: ++** fmad z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mad_f32_m_tied1, svfloat32_t, ++ z0 = svmad_f32_m (p0, z0, z1, z2), ++ z0 = svmad_m (p0, z0, z1, z2)) ++ ++/* ++** mad_f32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fmad z0\.s, p0/m, \1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mad_f32_m_tied2, svfloat32_t, ++ z0 = svmad_f32_m (p0, z1, z0, z2), ++ z0 = svmad_m (p0, z1, z0, z2)) ++ ++/* ++** mad_f32_m_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fmad z0\.s, p0/m, z2\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mad_f32_m_tied3, svfloat32_t, ++ z0 = svmad_f32_m (p0, z1, z2, z0), ++ z0 = svmad_m (p0, z1, z2, z0)) ++ ++/* ++** mad_f32_m_untied: ++** movprfx z0, z1 ++** fmad z0\.s, p0/m, z2\.s, z3\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mad_f32_m_untied, svfloat32_t, ++ z0 = svmad_f32_m (p0, z1, z2, z3), ++ z0 = svmad_m (p0, z1, z2, z3)) ++ ++/* ++** mad_s4_f32_m_tied1: ++** mov (z[0-9]+\.s), s4 ++** fmad z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mad_s4_f32_m_tied1, svfloat32_t, float, ++ z0 = svmad_n_f32_m (p0, z0, z1, d4), ++ z0 = svmad_m (p0, z0, z1, d4)) ++ ++/* ++** mad_s4_f32_m_untied: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0, z1 ++** fmad z0\.s, p0/m, z2\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mad_s4_f32_m_untied, svfloat32_t, float, ++ z0 = svmad_n_f32_m (p0, z1, z2, d4), ++ z0 = svmad_m (p0, z1, z2, d4)) ++ ++/* ++** mad_2_f32_m_tied1: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** fmad z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_2_f32_m_tied1, svfloat32_t, ++ z0 = svmad_n_f32_m (p0, z0, z1, 2), ++ z0 = svmad_m (p0, z0, z1, 2)) ++ ++/* ++** mad_2_f32_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** movprfx z0, z1 ++** fmad z0\.s, p0/m, z2\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_2_f32_m_untied, svfloat32_t, ++ z0 = svmad_n_f32_m (p0, z1, z2, 2), ++ z0 = svmad_m (p0, z1, z2, 2)) ++ ++/* ++** mad_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fmad z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mad_f32_z_tied1, svfloat32_t, ++ z0 = svmad_f32_z (p0, z0, z1, z2), ++ z0 = svmad_z (p0, z0, z1, z2)) ++ ++/* ++** mad_f32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** fmad z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mad_f32_z_tied2, svfloat32_t, ++ z0 = svmad_f32_z (p0, z1, z0, z2), ++ z0 = svmad_z (p0, z1, z0, z2)) ++ ++/* ++** mad_f32_z_tied3: ++** movprfx z0\.s, p0/z, z0\.s ++** fmla z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mad_f32_z_tied3, svfloat32_t, ++ z0 = svmad_f32_z (p0, z1, z2, z0), ++ z0 = svmad_z (p0, z1, z2, z0)) ++ ++/* ++** mad_f32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fmad z0\.s, p0/m, z2\.s, z3\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** fmad z0\.s, p0/m, z1\.s, z3\.s ++** | ++** movprfx z0\.s, p0/z, z3\.s ++** fmla z0\.s, p0/m, z1\.s, z2\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mad_f32_z_untied, svfloat32_t, ++ z0 = svmad_f32_z (p0, z1, z2, z3), ++ z0 = svmad_z (p0, z1, z2, z3)) ++ ++/* ++** mad_s4_f32_z_tied1: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0\.s, p0/z, z0\.s ++** fmad z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mad_s4_f32_z_tied1, svfloat32_t, float, ++ z0 = svmad_n_f32_z (p0, z0, z1, d4), ++ z0 = svmad_z (p0, z0, z1, d4)) ++ ++/* ++** mad_s4_f32_z_tied2: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0\.s, p0/z, z0\.s ++** fmad z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mad_s4_f32_z_tied2, svfloat32_t, float, ++ z0 = svmad_n_f32_z (p0, z1, z0, d4), ++ z0 = svmad_z (p0, z1, z0, d4)) ++ ++/* ++** mad_s4_f32_z_untied: ++** mov (z[0-9]+\.s), s4 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fmad z0\.s, p0/m, z2\.s, \1 ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** fmad z0\.s, p0/m, z1\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** fmla z0\.s, p0/m, z1\.s, z2\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (mad_s4_f32_z_untied, svfloat32_t, float, ++ z0 = svmad_n_f32_z (p0, z1, z2, d4), ++ z0 = svmad_z (p0, z1, z2, d4)) ++ ++/* ++** mad_2_f32_z_tied1: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** movprfx z0\.s, p0/z, z0\.s ++** fmad z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_2_f32_z_tied1, svfloat32_t, ++ z0 = svmad_n_f32_z (p0, z0, z1, 2), ++ z0 = svmad_z (p0, z0, z1, 2)) ++ ++/* ++** mad_2_f32_z_tied2: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** movprfx z0\.s, p0/z, z0\.s ++** fmad z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_2_f32_z_tied2, svfloat32_t, ++ z0 = svmad_n_f32_z (p0, z1, z0, 2), ++ z0 = svmad_z (p0, z1, z0, 2)) ++ ++/* ++** mad_2_f32_z_untied: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fmad z0\.s, p0/m, z2\.s, \1 ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** fmad z0\.s, p0/m, z1\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** fmla z0\.s, p0/m, z1\.s, z2\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mad_2_f32_z_untied, svfloat32_t, ++ z0 = svmad_n_f32_z (p0, z1, z2, 2), ++ z0 = svmad_z (p0, z1, z2, 2)) ++ ++/* ++** mad_f32_x_tied1: ++** fmad z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mad_f32_x_tied1, svfloat32_t, ++ z0 = svmad_f32_x (p0, z0, z1, z2), ++ z0 = svmad_x (p0, z0, z1, z2)) ++ ++/* ++** mad_f32_x_tied2: ++** fmad z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mad_f32_x_tied2, svfloat32_t, ++ z0 = svmad_f32_x (p0, z1, z0, z2), ++ z0 = svmad_x (p0, z1, z0, z2)) ++ ++/* ++** mad_f32_x_tied3: ++** fmla z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mad_f32_x_tied3, svfloat32_t, ++ z0 = svmad_f32_x (p0, z1, z2, z0), ++ z0 = svmad_x (p0, z1, z2, z0)) ++ ++/* ++** mad_f32_x_untied: ++** ( ++** movprfx z0, z1 ++** fmad z0\.s, p0/m, z2\.s, z3\.s ++** | ++** movprfx z0, z2 ++** fmad z0\.s, p0/m, z1\.s, z3\.s ++** | ++** movprfx z0, z3 ++** fmla z0\.s, p0/m, z1\.s, z2\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mad_f32_x_untied, svfloat32_t, ++ z0 = svmad_f32_x (p0, z1, z2, z3), ++ z0 = svmad_x (p0, z1, z2, z3)) ++ ++/* ++** mad_s4_f32_x_tied1: ++** mov (z[0-9]+\.s), s4 ++** fmad z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mad_s4_f32_x_tied1, svfloat32_t, float, ++ z0 = svmad_n_f32_x (p0, z0, z1, d4), ++ z0 = svmad_x (p0, z0, z1, d4)) ++ ++/* ++** mad_s4_f32_x_tied2: ++** mov (z[0-9]+\.s), s4 ++** fmad z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mad_s4_f32_x_tied2, svfloat32_t, float, ++ z0 = svmad_n_f32_x (p0, z1, z0, d4), ++ z0 = svmad_x (p0, z1, z0, d4)) ++ ++/* ++** mad_s4_f32_x_untied: { xfail *-*-* } ++** mov z0\.s, s4 ++** fmla z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_ZD (mad_s4_f32_x_untied, svfloat32_t, float, ++ z0 = svmad_n_f32_x (p0, z1, z2, d4), ++ z0 = svmad_x (p0, z1, z2, d4)) ++ ++/* ++** mad_2_f32_x_tied1: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** fmad z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_2_f32_x_tied1, svfloat32_t, ++ z0 = svmad_n_f32_x (p0, z0, z1, 2), ++ z0 = svmad_x (p0, z0, z1, 2)) ++ ++/* ++** mad_2_f32_x_tied2: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** fmad z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_2_f32_x_tied2, svfloat32_t, ++ z0 = svmad_n_f32_x (p0, z1, z0, 2), ++ z0 = svmad_x (p0, z1, z0, 2)) ++ ++/* ++** mad_2_f32_x_untied: ++** fmov z0\.s, #2\.0(?:e\+0)? ++** fmla z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mad_2_f32_x_untied, svfloat32_t, ++ z0 = svmad_n_f32_x (p0, z1, z2, 2), ++ z0 = svmad_x (p0, z1, z2, 2)) ++ ++/* ++** ptrue_mad_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mad_f32_x_tied1, svfloat32_t, ++ z0 = svmad_f32_x (svptrue_b32 (), z0, z1, z2), ++ z0 = svmad_x (svptrue_b32 (), z0, z1, z2)) ++ ++/* ++** ptrue_mad_f32_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mad_f32_x_tied2, svfloat32_t, ++ z0 = svmad_f32_x (svptrue_b32 (), z1, z0, z2), ++ z0 = svmad_x (svptrue_b32 (), z1, z0, z2)) ++ ++/* ++** ptrue_mad_f32_x_tied3: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mad_f32_x_tied3, svfloat32_t, ++ z0 = svmad_f32_x (svptrue_b32 (), z1, z2, z0), ++ z0 = svmad_x (svptrue_b32 (), z1, z2, z0)) ++ ++/* ++** ptrue_mad_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mad_f32_x_untied, svfloat32_t, ++ z0 = svmad_f32_x (svptrue_b32 (), z1, z2, z3), ++ z0 = svmad_x (svptrue_b32 (), z1, z2, z3)) ++ ++/* ++** ptrue_mad_2_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mad_2_f32_x_tied1, svfloat32_t, ++ z0 = svmad_n_f32_x (svptrue_b32 (), z0, z1, 2), ++ z0 = svmad_x (svptrue_b32 (), z0, z1, 2)) ++ ++/* ++** ptrue_mad_2_f32_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mad_2_f32_x_tied2, svfloat32_t, ++ z0 = svmad_n_f32_x (svptrue_b32 (), z1, z0, 2), ++ z0 = svmad_x (svptrue_b32 (), z1, z0, 2)) ++ ++/* ++** ptrue_mad_2_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mad_2_f32_x_untied, svfloat32_t, ++ z0 = svmad_n_f32_x (svptrue_b32 (), z1, z2, 2), ++ z0 = svmad_x (svptrue_b32 (), z1, z2, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_f64.c +new file mode 100644 +index 000000000..978281295 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_f64.c +@@ -0,0 +1,398 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mad_f64_m_tied1: ++** fmad z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mad_f64_m_tied1, svfloat64_t, ++ z0 = svmad_f64_m (p0, z0, z1, z2), ++ z0 = svmad_m (p0, z0, z1, z2)) ++ ++/* ++** mad_f64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fmad z0\.d, p0/m, \1, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mad_f64_m_tied2, svfloat64_t, ++ z0 = svmad_f64_m (p0, z1, z0, z2), ++ z0 = svmad_m (p0, z1, z0, z2)) ++ ++/* ++** mad_f64_m_tied3: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fmad z0\.d, p0/m, z2\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_f64_m_tied3, svfloat64_t, ++ z0 = svmad_f64_m (p0, z1, z2, z0), ++ z0 = svmad_m (p0, z1, z2, z0)) ++ ++/* ++** mad_f64_m_untied: ++** movprfx z0, z1 ++** fmad z0\.d, p0/m, z2\.d, z3\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mad_f64_m_untied, svfloat64_t, ++ z0 = svmad_f64_m (p0, z1, z2, z3), ++ z0 = svmad_m (p0, z1, z2, z3)) ++ ++/* ++** mad_d4_f64_m_tied1: ++** mov (z[0-9]+\.d), d4 ++** fmad z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mad_d4_f64_m_tied1, svfloat64_t, double, ++ z0 = svmad_n_f64_m (p0, z0, z1, d4), ++ z0 = svmad_m (p0, z0, z1, d4)) ++ ++/* ++** mad_d4_f64_m_untied: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0, z1 ++** fmad z0\.d, p0/m, z2\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mad_d4_f64_m_untied, svfloat64_t, double, ++ z0 = svmad_n_f64_m (p0, z1, z2, d4), ++ z0 = svmad_m (p0, z1, z2, d4)) ++ ++/* ++** mad_2_f64_m_tied1: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** fmad z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_2_f64_m_tied1, svfloat64_t, ++ z0 = svmad_n_f64_m (p0, z0, z1, 2), ++ z0 = svmad_m (p0, z0, z1, 2)) ++ ++/* ++** mad_2_f64_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** movprfx z0, z1 ++** fmad z0\.d, p0/m, z2\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_2_f64_m_untied, svfloat64_t, ++ z0 = svmad_n_f64_m (p0, z1, z2, 2), ++ z0 = svmad_m (p0, z1, z2, 2)) ++ ++/* ++** mad_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fmad z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mad_f64_z_tied1, svfloat64_t, ++ z0 = svmad_f64_z (p0, z0, z1, z2), ++ z0 = svmad_z (p0, z0, z1, z2)) ++ ++/* ++** mad_f64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** fmad z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mad_f64_z_tied2, svfloat64_t, ++ z0 = svmad_f64_z (p0, z1, z0, z2), ++ z0 = svmad_z (p0, z1, z0, z2)) ++ ++/* ++** mad_f64_z_tied3: ++** movprfx z0\.d, p0/z, z0\.d ++** fmla z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mad_f64_z_tied3, svfloat64_t, ++ z0 = svmad_f64_z (p0, z1, z2, z0), ++ z0 = svmad_z (p0, z1, z2, z0)) ++ ++/* ++** mad_f64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fmad z0\.d, p0/m, z2\.d, z3\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** fmad z0\.d, p0/m, z1\.d, z3\.d ++** | ++** movprfx z0\.d, p0/z, z3\.d ++** fmla z0\.d, p0/m, z1\.d, z2\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mad_f64_z_untied, svfloat64_t, ++ z0 = svmad_f64_z (p0, z1, z2, z3), ++ z0 = svmad_z (p0, z1, z2, z3)) ++ ++/* ++** mad_d4_f64_z_tied1: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0\.d, p0/z, z0\.d ++** fmad z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mad_d4_f64_z_tied1, svfloat64_t, double, ++ z0 = svmad_n_f64_z (p0, z0, z1, d4), ++ z0 = svmad_z (p0, z0, z1, d4)) ++ ++/* ++** mad_d4_f64_z_tied2: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0\.d, p0/z, z0\.d ++** fmad z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mad_d4_f64_z_tied2, svfloat64_t, double, ++ z0 = svmad_n_f64_z (p0, z1, z0, d4), ++ z0 = svmad_z (p0, z1, z0, d4)) ++ ++/* ++** mad_d4_f64_z_untied: ++** mov (z[0-9]+\.d), d4 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fmad z0\.d, p0/m, z2\.d, \1 ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** fmad z0\.d, p0/m, z1\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** fmla z0\.d, p0/m, z1\.d, z2\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (mad_d4_f64_z_untied, svfloat64_t, double, ++ z0 = svmad_n_f64_z (p0, z1, z2, d4), ++ z0 = svmad_z (p0, z1, z2, d4)) ++ ++/* ++** mad_2_f64_z_tied1: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** movprfx z0\.d, p0/z, z0\.d ++** fmad z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_2_f64_z_tied1, svfloat64_t, ++ z0 = svmad_n_f64_z (p0, z0, z1, 2), ++ z0 = svmad_z (p0, z0, z1, 2)) ++ ++/* ++** mad_2_f64_z_tied2: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** movprfx z0\.d, p0/z, z0\.d ++** fmad z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_2_f64_z_tied2, svfloat64_t, ++ z0 = svmad_n_f64_z (p0, z1, z0, 2), ++ z0 = svmad_z (p0, z1, z0, 2)) ++ ++/* ++** mad_2_f64_z_untied: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fmad z0\.d, p0/m, z2\.d, \1 ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** fmad z0\.d, p0/m, z1\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** fmla z0\.d, p0/m, z1\.d, z2\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mad_2_f64_z_untied, svfloat64_t, ++ z0 = svmad_n_f64_z (p0, z1, z2, 2), ++ z0 = svmad_z (p0, z1, z2, 2)) ++ ++/* ++** mad_f64_x_tied1: ++** fmad z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mad_f64_x_tied1, svfloat64_t, ++ z0 = svmad_f64_x (p0, z0, z1, z2), ++ z0 = svmad_x (p0, z0, z1, z2)) ++ ++/* ++** mad_f64_x_tied2: ++** fmad z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mad_f64_x_tied2, svfloat64_t, ++ z0 = svmad_f64_x (p0, z1, z0, z2), ++ z0 = svmad_x (p0, z1, z0, z2)) ++ ++/* ++** mad_f64_x_tied3: ++** fmla z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mad_f64_x_tied3, svfloat64_t, ++ z0 = svmad_f64_x (p0, z1, z2, z0), ++ z0 = svmad_x (p0, z1, z2, z0)) ++ ++/* ++** mad_f64_x_untied: ++** ( ++** movprfx z0, z1 ++** fmad z0\.d, p0/m, z2\.d, z3\.d ++** | ++** movprfx z0, z2 ++** fmad z0\.d, p0/m, z1\.d, z3\.d ++** | ++** movprfx z0, z3 ++** fmla z0\.d, p0/m, z1\.d, z2\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mad_f64_x_untied, svfloat64_t, ++ z0 = svmad_f64_x (p0, z1, z2, z3), ++ z0 = svmad_x (p0, z1, z2, z3)) ++ ++/* ++** mad_d4_f64_x_tied1: ++** mov (z[0-9]+\.d), d4 ++** fmad z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mad_d4_f64_x_tied1, svfloat64_t, double, ++ z0 = svmad_n_f64_x (p0, z0, z1, d4), ++ z0 = svmad_x (p0, z0, z1, d4)) ++ ++/* ++** mad_d4_f64_x_tied2: ++** mov (z[0-9]+\.d), d4 ++** fmad z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mad_d4_f64_x_tied2, svfloat64_t, double, ++ z0 = svmad_n_f64_x (p0, z1, z0, d4), ++ z0 = svmad_x (p0, z1, z0, d4)) ++ ++/* ++** mad_d4_f64_x_untied: { xfail *-*-* } ++** mov z0\.d, d4 ++** fmla z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_ZD (mad_d4_f64_x_untied, svfloat64_t, double, ++ z0 = svmad_n_f64_x (p0, z1, z2, d4), ++ z0 = svmad_x (p0, z1, z2, d4)) ++ ++/* ++** mad_2_f64_x_tied1: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** fmad z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_2_f64_x_tied1, svfloat64_t, ++ z0 = svmad_n_f64_x (p0, z0, z1, 2), ++ z0 = svmad_x (p0, z0, z1, 2)) ++ ++/* ++** mad_2_f64_x_tied2: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** fmad z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_2_f64_x_tied2, svfloat64_t, ++ z0 = svmad_n_f64_x (p0, z1, z0, 2), ++ z0 = svmad_x (p0, z1, z0, 2)) ++ ++/* ++** mad_2_f64_x_untied: ++** fmov z0\.d, #2\.0(?:e\+0)? ++** fmla z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mad_2_f64_x_untied, svfloat64_t, ++ z0 = svmad_n_f64_x (p0, z1, z2, 2), ++ z0 = svmad_x (p0, z1, z2, 2)) ++ ++/* ++** ptrue_mad_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mad_f64_x_tied1, svfloat64_t, ++ z0 = svmad_f64_x (svptrue_b64 (), z0, z1, z2), ++ z0 = svmad_x (svptrue_b64 (), z0, z1, z2)) ++ ++/* ++** ptrue_mad_f64_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mad_f64_x_tied2, svfloat64_t, ++ z0 = svmad_f64_x (svptrue_b64 (), z1, z0, z2), ++ z0 = svmad_x (svptrue_b64 (), z1, z0, z2)) ++ ++/* ++** ptrue_mad_f64_x_tied3: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mad_f64_x_tied3, svfloat64_t, ++ z0 = svmad_f64_x (svptrue_b64 (), z1, z2, z0), ++ z0 = svmad_x (svptrue_b64 (), z1, z2, z0)) ++ ++/* ++** ptrue_mad_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mad_f64_x_untied, svfloat64_t, ++ z0 = svmad_f64_x (svptrue_b64 (), z1, z2, z3), ++ z0 = svmad_x (svptrue_b64 (), z1, z2, z3)) ++ ++/* ++** ptrue_mad_2_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mad_2_f64_x_tied1, svfloat64_t, ++ z0 = svmad_n_f64_x (svptrue_b64 (), z0, z1, 2), ++ z0 = svmad_x (svptrue_b64 (), z0, z1, 2)) ++ ++/* ++** ptrue_mad_2_f64_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mad_2_f64_x_tied2, svfloat64_t, ++ z0 = svmad_n_f64_x (svptrue_b64 (), z1, z0, 2), ++ z0 = svmad_x (svptrue_b64 (), z1, z0, 2)) ++ ++/* ++** ptrue_mad_2_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mad_2_f64_x_untied, svfloat64_t, ++ z0 = svmad_n_f64_x (svptrue_b64 (), z1, z2, 2), ++ z0 = svmad_x (svptrue_b64 (), z1, z2, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_s16.c +new file mode 100644 +index 000000000..02a6d4588 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_s16.c +@@ -0,0 +1,321 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mad_s16_m_tied1: ++** mad z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mad_s16_m_tied1, svint16_t, ++ z0 = svmad_s16_m (p0, z0, z1, z2), ++ z0 = svmad_m (p0, z0, z1, z2)) ++ ++/* ++** mad_s16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** mad z0\.h, p0/m, \1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mad_s16_m_tied2, svint16_t, ++ z0 = svmad_s16_m (p0, z1, z0, z2), ++ z0 = svmad_m (p0, z1, z0, z2)) ++ ++/* ++** mad_s16_m_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** mad z0\.h, p0/m, z2\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mad_s16_m_tied3, svint16_t, ++ z0 = svmad_s16_m (p0, z1, z2, z0), ++ z0 = svmad_m (p0, z1, z2, z0)) ++ ++/* ++** mad_s16_m_untied: ++** movprfx z0, z1 ++** mad z0\.h, p0/m, z2\.h, z3\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mad_s16_m_untied, svint16_t, ++ z0 = svmad_s16_m (p0, z1, z2, z3), ++ z0 = svmad_m (p0, z1, z2, z3)) ++ ++/* ++** mad_w0_s16_m_tied1: ++** mov (z[0-9]+\.h), w0 ++** mad z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_w0_s16_m_tied1, svint16_t, int16_t, ++ z0 = svmad_n_s16_m (p0, z0, z1, x0), ++ z0 = svmad_m (p0, z0, z1, x0)) ++ ++/* ++** mad_w0_s16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), w0 ++** movprfx z0, z1 ++** mad z0\.h, p0/m, z2\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_w0_s16_m_untied, svint16_t, int16_t, ++ z0 = svmad_n_s16_m (p0, z1, z2, x0), ++ z0 = svmad_m (p0, z1, z2, x0)) ++ ++/* ++** mad_11_s16_m_tied1: ++** mov (z[0-9]+\.h), #11 ++** mad z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_s16_m_tied1, svint16_t, ++ z0 = svmad_n_s16_m (p0, z0, z1, 11), ++ z0 = svmad_m (p0, z0, z1, 11)) ++ ++/* ++** mad_11_s16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), #11 ++** movprfx z0, z1 ++** mad z0\.h, p0/m, z2\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_s16_m_untied, svint16_t, ++ z0 = svmad_n_s16_m (p0, z1, z2, 11), ++ z0 = svmad_m (p0, z1, z2, 11)) ++ ++/* ++** mad_s16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** mad z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mad_s16_z_tied1, svint16_t, ++ z0 = svmad_s16_z (p0, z0, z1, z2), ++ z0 = svmad_z (p0, z0, z1, z2)) ++ ++/* ++** mad_s16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** mad z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mad_s16_z_tied2, svint16_t, ++ z0 = svmad_s16_z (p0, z1, z0, z2), ++ z0 = svmad_z (p0, z1, z0, z2)) ++ ++/* ++** mad_s16_z_tied3: ++** movprfx z0\.h, p0/z, z0\.h ++** mla z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mad_s16_z_tied3, svint16_t, ++ z0 = svmad_s16_z (p0, z1, z2, z0), ++ z0 = svmad_z (p0, z1, z2, z0)) ++ ++/* ++** mad_s16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** mad z0\.h, p0/m, z2\.h, z3\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** mad z0\.h, p0/m, z1\.h, z3\.h ++** | ++** movprfx z0\.h, p0/z, z3\.h ++** mla z0\.h, p0/m, z1\.h, z2\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mad_s16_z_untied, svint16_t, ++ z0 = svmad_s16_z (p0, z1, z2, z3), ++ z0 = svmad_z (p0, z1, z2, z3)) ++ ++/* ++** mad_w0_s16_z_tied1: ++** mov (z[0-9]+\.h), w0 ++** movprfx z0\.h, p0/z, z0\.h ++** mad z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_w0_s16_z_tied1, svint16_t, int16_t, ++ z0 = svmad_n_s16_z (p0, z0, z1, x0), ++ z0 = svmad_z (p0, z0, z1, x0)) ++ ++/* ++** mad_w0_s16_z_tied2: ++** mov (z[0-9]+\.h), w0 ++** movprfx z0\.h, p0/z, z0\.h ++** mad z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_w0_s16_z_tied2, svint16_t, int16_t, ++ z0 = svmad_n_s16_z (p0, z1, z0, x0), ++ z0 = svmad_z (p0, z1, z0, x0)) ++ ++/* ++** mad_w0_s16_z_untied: ++** mov (z[0-9]+\.h), w0 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** mad z0\.h, p0/m, z2\.h, \1 ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** mad z0\.h, p0/m, z1\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** mla z0\.h, p0/m, z1\.h, z2\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_w0_s16_z_untied, svint16_t, int16_t, ++ z0 = svmad_n_s16_z (p0, z1, z2, x0), ++ z0 = svmad_z (p0, z1, z2, x0)) ++ ++/* ++** mad_11_s16_z_tied1: ++** mov (z[0-9]+\.h), #11 ++** movprfx z0\.h, p0/z, z0\.h ++** mad z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_s16_z_tied1, svint16_t, ++ z0 = svmad_n_s16_z (p0, z0, z1, 11), ++ z0 = svmad_z (p0, z0, z1, 11)) ++ ++/* ++** mad_11_s16_z_tied2: ++** mov (z[0-9]+\.h), #11 ++** movprfx z0\.h, p0/z, z0\.h ++** mad z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_s16_z_tied2, svint16_t, ++ z0 = svmad_n_s16_z (p0, z1, z0, 11), ++ z0 = svmad_z (p0, z1, z0, 11)) ++ ++/* ++** mad_11_s16_z_untied: ++** mov (z[0-9]+\.h), #11 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** mad z0\.h, p0/m, z2\.h, \1 ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** mad z0\.h, p0/m, z1\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** mla z0\.h, p0/m, z1\.h, z2\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_s16_z_untied, svint16_t, ++ z0 = svmad_n_s16_z (p0, z1, z2, 11), ++ z0 = svmad_z (p0, z1, z2, 11)) ++ ++/* ++** mad_s16_x_tied1: ++** mad z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mad_s16_x_tied1, svint16_t, ++ z0 = svmad_s16_x (p0, z0, z1, z2), ++ z0 = svmad_x (p0, z0, z1, z2)) ++ ++/* ++** mad_s16_x_tied2: ++** mad z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mad_s16_x_tied2, svint16_t, ++ z0 = svmad_s16_x (p0, z1, z0, z2), ++ z0 = svmad_x (p0, z1, z0, z2)) ++ ++/* ++** mad_s16_x_tied3: ++** mla z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mad_s16_x_tied3, svint16_t, ++ z0 = svmad_s16_x (p0, z1, z2, z0), ++ z0 = svmad_x (p0, z1, z2, z0)) ++ ++/* ++** mad_s16_x_untied: ++** ( ++** movprfx z0, z1 ++** mad z0\.h, p0/m, z2\.h, z3\.h ++** | ++** movprfx z0, z2 ++** mad z0\.h, p0/m, z1\.h, z3\.h ++** | ++** movprfx z0, z3 ++** mla z0\.h, p0/m, z1\.h, z2\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mad_s16_x_untied, svint16_t, ++ z0 = svmad_s16_x (p0, z1, z2, z3), ++ z0 = svmad_x (p0, z1, z2, z3)) ++ ++/* ++** mad_w0_s16_x_tied1: ++** mov (z[0-9]+\.h), w0 ++** mad z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_w0_s16_x_tied1, svint16_t, int16_t, ++ z0 = svmad_n_s16_x (p0, z0, z1, x0), ++ z0 = svmad_x (p0, z0, z1, x0)) ++ ++/* ++** mad_w0_s16_x_tied2: ++** mov (z[0-9]+\.h), w0 ++** mad z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_w0_s16_x_tied2, svint16_t, int16_t, ++ z0 = svmad_n_s16_x (p0, z1, z0, x0), ++ z0 = svmad_x (p0, z1, z0, x0)) ++ ++/* ++** mad_w0_s16_x_untied: ++** mov z0\.h, w0 ++** mla z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_w0_s16_x_untied, svint16_t, int16_t, ++ z0 = svmad_n_s16_x (p0, z1, z2, x0), ++ z0 = svmad_x (p0, z1, z2, x0)) ++ ++/* ++** mad_11_s16_x_tied1: ++** mov (z[0-9]+\.h), #11 ++** mad z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_s16_x_tied1, svint16_t, ++ z0 = svmad_n_s16_x (p0, z0, z1, 11), ++ z0 = svmad_x (p0, z0, z1, 11)) ++ ++/* ++** mad_11_s16_x_tied2: ++** mov (z[0-9]+\.h), #11 ++** mad z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_s16_x_tied2, svint16_t, ++ z0 = svmad_n_s16_x (p0, z1, z0, 11), ++ z0 = svmad_x (p0, z1, z0, 11)) ++ ++/* ++** mad_11_s16_x_untied: ++** mov z0\.h, #11 ++** mla z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_s16_x_untied, svint16_t, ++ z0 = svmad_n_s16_x (p0, z1, z2, 11), ++ z0 = svmad_x (p0, z1, z2, 11)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_s32.c +new file mode 100644 +index 000000000..d676a0c11 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_s32.c +@@ -0,0 +1,321 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mad_s32_m_tied1: ++** mad z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mad_s32_m_tied1, svint32_t, ++ z0 = svmad_s32_m (p0, z0, z1, z2), ++ z0 = svmad_m (p0, z0, z1, z2)) ++ ++/* ++** mad_s32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** mad z0\.s, p0/m, \1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mad_s32_m_tied2, svint32_t, ++ z0 = svmad_s32_m (p0, z1, z0, z2), ++ z0 = svmad_m (p0, z1, z0, z2)) ++ ++/* ++** mad_s32_m_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** mad z0\.s, p0/m, z2\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mad_s32_m_tied3, svint32_t, ++ z0 = svmad_s32_m (p0, z1, z2, z0), ++ z0 = svmad_m (p0, z1, z2, z0)) ++ ++/* ++** mad_s32_m_untied: ++** movprfx z0, z1 ++** mad z0\.s, p0/m, z2\.s, z3\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mad_s32_m_untied, svint32_t, ++ z0 = svmad_s32_m (p0, z1, z2, z3), ++ z0 = svmad_m (p0, z1, z2, z3)) ++ ++/* ++** mad_w0_s32_m_tied1: ++** mov (z[0-9]+\.s), w0 ++** mad z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_w0_s32_m_tied1, svint32_t, int32_t, ++ z0 = svmad_n_s32_m (p0, z0, z1, x0), ++ z0 = svmad_m (p0, z0, z1, x0)) ++ ++/* ++** mad_w0_s32_m_untied: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0, z1 ++** mad z0\.s, p0/m, z2\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_w0_s32_m_untied, svint32_t, int32_t, ++ z0 = svmad_n_s32_m (p0, z1, z2, x0), ++ z0 = svmad_m (p0, z1, z2, x0)) ++ ++/* ++** mad_11_s32_m_tied1: ++** mov (z[0-9]+\.s), #11 ++** mad z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_s32_m_tied1, svint32_t, ++ z0 = svmad_n_s32_m (p0, z0, z1, 11), ++ z0 = svmad_m (p0, z0, z1, 11)) ++ ++/* ++** mad_11_s32_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.s), #11 ++** movprfx z0, z1 ++** mad z0\.s, p0/m, z2\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_s32_m_untied, svint32_t, ++ z0 = svmad_n_s32_m (p0, z1, z2, 11), ++ z0 = svmad_m (p0, z1, z2, 11)) ++ ++/* ++** mad_s32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** mad z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mad_s32_z_tied1, svint32_t, ++ z0 = svmad_s32_z (p0, z0, z1, z2), ++ z0 = svmad_z (p0, z0, z1, z2)) ++ ++/* ++** mad_s32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** mad z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mad_s32_z_tied2, svint32_t, ++ z0 = svmad_s32_z (p0, z1, z0, z2), ++ z0 = svmad_z (p0, z1, z0, z2)) ++ ++/* ++** mad_s32_z_tied3: ++** movprfx z0\.s, p0/z, z0\.s ++** mla z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mad_s32_z_tied3, svint32_t, ++ z0 = svmad_s32_z (p0, z1, z2, z0), ++ z0 = svmad_z (p0, z1, z2, z0)) ++ ++/* ++** mad_s32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** mad z0\.s, p0/m, z2\.s, z3\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** mad z0\.s, p0/m, z1\.s, z3\.s ++** | ++** movprfx z0\.s, p0/z, z3\.s ++** mla z0\.s, p0/m, z1\.s, z2\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mad_s32_z_untied, svint32_t, ++ z0 = svmad_s32_z (p0, z1, z2, z3), ++ z0 = svmad_z (p0, z1, z2, z3)) ++ ++/* ++** mad_w0_s32_z_tied1: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0\.s, p0/z, z0\.s ++** mad z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_w0_s32_z_tied1, svint32_t, int32_t, ++ z0 = svmad_n_s32_z (p0, z0, z1, x0), ++ z0 = svmad_z (p0, z0, z1, x0)) ++ ++/* ++** mad_w0_s32_z_tied2: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0\.s, p0/z, z0\.s ++** mad z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_w0_s32_z_tied2, svint32_t, int32_t, ++ z0 = svmad_n_s32_z (p0, z1, z0, x0), ++ z0 = svmad_z (p0, z1, z0, x0)) ++ ++/* ++** mad_w0_s32_z_untied: ++** mov (z[0-9]+\.s), w0 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** mad z0\.s, p0/m, z2\.s, \1 ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** mad z0\.s, p0/m, z1\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** mla z0\.s, p0/m, z1\.s, z2\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_w0_s32_z_untied, svint32_t, int32_t, ++ z0 = svmad_n_s32_z (p0, z1, z2, x0), ++ z0 = svmad_z (p0, z1, z2, x0)) ++ ++/* ++** mad_11_s32_z_tied1: ++** mov (z[0-9]+\.s), #11 ++** movprfx z0\.s, p0/z, z0\.s ++** mad z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_s32_z_tied1, svint32_t, ++ z0 = svmad_n_s32_z (p0, z0, z1, 11), ++ z0 = svmad_z (p0, z0, z1, 11)) ++ ++/* ++** mad_11_s32_z_tied2: ++** mov (z[0-9]+\.s), #11 ++** movprfx z0\.s, p0/z, z0\.s ++** mad z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_s32_z_tied2, svint32_t, ++ z0 = svmad_n_s32_z (p0, z1, z0, 11), ++ z0 = svmad_z (p0, z1, z0, 11)) ++ ++/* ++** mad_11_s32_z_untied: ++** mov (z[0-9]+\.s), #11 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** mad z0\.s, p0/m, z2\.s, \1 ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** mad z0\.s, p0/m, z1\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** mla z0\.s, p0/m, z1\.s, z2\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_s32_z_untied, svint32_t, ++ z0 = svmad_n_s32_z (p0, z1, z2, 11), ++ z0 = svmad_z (p0, z1, z2, 11)) ++ ++/* ++** mad_s32_x_tied1: ++** mad z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mad_s32_x_tied1, svint32_t, ++ z0 = svmad_s32_x (p0, z0, z1, z2), ++ z0 = svmad_x (p0, z0, z1, z2)) ++ ++/* ++** mad_s32_x_tied2: ++** mad z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mad_s32_x_tied2, svint32_t, ++ z0 = svmad_s32_x (p0, z1, z0, z2), ++ z0 = svmad_x (p0, z1, z0, z2)) ++ ++/* ++** mad_s32_x_tied3: ++** mla z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mad_s32_x_tied3, svint32_t, ++ z0 = svmad_s32_x (p0, z1, z2, z0), ++ z0 = svmad_x (p0, z1, z2, z0)) ++ ++/* ++** mad_s32_x_untied: ++** ( ++** movprfx z0, z1 ++** mad z0\.s, p0/m, z2\.s, z3\.s ++** | ++** movprfx z0, z2 ++** mad z0\.s, p0/m, z1\.s, z3\.s ++** | ++** movprfx z0, z3 ++** mla z0\.s, p0/m, z1\.s, z2\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mad_s32_x_untied, svint32_t, ++ z0 = svmad_s32_x (p0, z1, z2, z3), ++ z0 = svmad_x (p0, z1, z2, z3)) ++ ++/* ++** mad_w0_s32_x_tied1: ++** mov (z[0-9]+\.s), w0 ++** mad z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_w0_s32_x_tied1, svint32_t, int32_t, ++ z0 = svmad_n_s32_x (p0, z0, z1, x0), ++ z0 = svmad_x (p0, z0, z1, x0)) ++ ++/* ++** mad_w0_s32_x_tied2: ++** mov (z[0-9]+\.s), w0 ++** mad z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_w0_s32_x_tied2, svint32_t, int32_t, ++ z0 = svmad_n_s32_x (p0, z1, z0, x0), ++ z0 = svmad_x (p0, z1, z0, x0)) ++ ++/* ++** mad_w0_s32_x_untied: ++** mov z0\.s, w0 ++** mla z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_w0_s32_x_untied, svint32_t, int32_t, ++ z0 = svmad_n_s32_x (p0, z1, z2, x0), ++ z0 = svmad_x (p0, z1, z2, x0)) ++ ++/* ++** mad_11_s32_x_tied1: ++** mov (z[0-9]+\.s), #11 ++** mad z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_s32_x_tied1, svint32_t, ++ z0 = svmad_n_s32_x (p0, z0, z1, 11), ++ z0 = svmad_x (p0, z0, z1, 11)) ++ ++/* ++** mad_11_s32_x_tied2: ++** mov (z[0-9]+\.s), #11 ++** mad z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_s32_x_tied2, svint32_t, ++ z0 = svmad_n_s32_x (p0, z1, z0, 11), ++ z0 = svmad_x (p0, z1, z0, 11)) ++ ++/* ++** mad_11_s32_x_untied: ++** mov z0\.s, #11 ++** mla z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_s32_x_untied, svint32_t, ++ z0 = svmad_n_s32_x (p0, z1, z2, 11), ++ z0 = svmad_x (p0, z1, z2, 11)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_s64.c +new file mode 100644 +index 000000000..7aa017536 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_s64.c +@@ -0,0 +1,321 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mad_s64_m_tied1: ++** mad z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mad_s64_m_tied1, svint64_t, ++ z0 = svmad_s64_m (p0, z0, z1, z2), ++ z0 = svmad_m (p0, z0, z1, z2)) ++ ++/* ++** mad_s64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** mad z0\.d, p0/m, \1, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mad_s64_m_tied2, svint64_t, ++ z0 = svmad_s64_m (p0, z1, z0, z2), ++ z0 = svmad_m (p0, z1, z0, z2)) ++ ++/* ++** mad_s64_m_tied3: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** mad z0\.d, p0/m, z2\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_s64_m_tied3, svint64_t, ++ z0 = svmad_s64_m (p0, z1, z2, z0), ++ z0 = svmad_m (p0, z1, z2, z0)) ++ ++/* ++** mad_s64_m_untied: ++** movprfx z0, z1 ++** mad z0\.d, p0/m, z2\.d, z3\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mad_s64_m_untied, svint64_t, ++ z0 = svmad_s64_m (p0, z1, z2, z3), ++ z0 = svmad_m (p0, z1, z2, z3)) ++ ++/* ++** mad_x0_s64_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** mad z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_x0_s64_m_tied1, svint64_t, int64_t, ++ z0 = svmad_n_s64_m (p0, z0, z1, x0), ++ z0 = svmad_m (p0, z0, z1, x0)) ++ ++/* ++** mad_x0_s64_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** mad z0\.d, p0/m, z2\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_x0_s64_m_untied, svint64_t, int64_t, ++ z0 = svmad_n_s64_m (p0, z1, z2, x0), ++ z0 = svmad_m (p0, z1, z2, x0)) ++ ++/* ++** mad_11_s64_m_tied1: ++** mov (z[0-9]+\.d), #11 ++** mad z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_s64_m_tied1, svint64_t, ++ z0 = svmad_n_s64_m (p0, z0, z1, 11), ++ z0 = svmad_m (p0, z0, z1, 11)) ++ ++/* ++** mad_11_s64_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), #11 ++** movprfx z0, z1 ++** mad z0\.d, p0/m, z2\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_s64_m_untied, svint64_t, ++ z0 = svmad_n_s64_m (p0, z1, z2, 11), ++ z0 = svmad_m (p0, z1, z2, 11)) ++ ++/* ++** mad_s64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** mad z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mad_s64_z_tied1, svint64_t, ++ z0 = svmad_s64_z (p0, z0, z1, z2), ++ z0 = svmad_z (p0, z0, z1, z2)) ++ ++/* ++** mad_s64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** mad z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mad_s64_z_tied2, svint64_t, ++ z0 = svmad_s64_z (p0, z1, z0, z2), ++ z0 = svmad_z (p0, z1, z0, z2)) ++ ++/* ++** mad_s64_z_tied3: ++** movprfx z0\.d, p0/z, z0\.d ++** mla z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mad_s64_z_tied3, svint64_t, ++ z0 = svmad_s64_z (p0, z1, z2, z0), ++ z0 = svmad_z (p0, z1, z2, z0)) ++ ++/* ++** mad_s64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** mad z0\.d, p0/m, z2\.d, z3\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** mad z0\.d, p0/m, z1\.d, z3\.d ++** | ++** movprfx z0\.d, p0/z, z3\.d ++** mla z0\.d, p0/m, z1\.d, z2\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mad_s64_z_untied, svint64_t, ++ z0 = svmad_s64_z (p0, z1, z2, z3), ++ z0 = svmad_z (p0, z1, z2, z3)) ++ ++/* ++** mad_x0_s64_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.d, p0/z, z0\.d ++** mad z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_x0_s64_z_tied1, svint64_t, int64_t, ++ z0 = svmad_n_s64_z (p0, z0, z1, x0), ++ z0 = svmad_z (p0, z0, z1, x0)) ++ ++/* ++** mad_x0_s64_z_tied2: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.d, p0/z, z0\.d ++** mad z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_x0_s64_z_tied2, svint64_t, int64_t, ++ z0 = svmad_n_s64_z (p0, z1, z0, x0), ++ z0 = svmad_z (p0, z1, z0, x0)) ++ ++/* ++** mad_x0_s64_z_untied: ++** mov (z[0-9]+\.d), x0 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** mad z0\.d, p0/m, z2\.d, \1 ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** mad z0\.d, p0/m, z1\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** mla z0\.d, p0/m, z1\.d, z2\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_x0_s64_z_untied, svint64_t, int64_t, ++ z0 = svmad_n_s64_z (p0, z1, z2, x0), ++ z0 = svmad_z (p0, z1, z2, x0)) ++ ++/* ++** mad_11_s64_z_tied1: ++** mov (z[0-9]+\.d), #11 ++** movprfx z0\.d, p0/z, z0\.d ++** mad z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_s64_z_tied1, svint64_t, ++ z0 = svmad_n_s64_z (p0, z0, z1, 11), ++ z0 = svmad_z (p0, z0, z1, 11)) ++ ++/* ++** mad_11_s64_z_tied2: ++** mov (z[0-9]+\.d), #11 ++** movprfx z0\.d, p0/z, z0\.d ++** mad z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_s64_z_tied2, svint64_t, ++ z0 = svmad_n_s64_z (p0, z1, z0, 11), ++ z0 = svmad_z (p0, z1, z0, 11)) ++ ++/* ++** mad_11_s64_z_untied: ++** mov (z[0-9]+\.d), #11 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** mad z0\.d, p0/m, z2\.d, \1 ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** mad z0\.d, p0/m, z1\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** mla z0\.d, p0/m, z1\.d, z2\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_s64_z_untied, svint64_t, ++ z0 = svmad_n_s64_z (p0, z1, z2, 11), ++ z0 = svmad_z (p0, z1, z2, 11)) ++ ++/* ++** mad_s64_x_tied1: ++** mad z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mad_s64_x_tied1, svint64_t, ++ z0 = svmad_s64_x (p0, z0, z1, z2), ++ z0 = svmad_x (p0, z0, z1, z2)) ++ ++/* ++** mad_s64_x_tied2: ++** mad z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mad_s64_x_tied2, svint64_t, ++ z0 = svmad_s64_x (p0, z1, z0, z2), ++ z0 = svmad_x (p0, z1, z0, z2)) ++ ++/* ++** mad_s64_x_tied3: ++** mla z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mad_s64_x_tied3, svint64_t, ++ z0 = svmad_s64_x (p0, z1, z2, z0), ++ z0 = svmad_x (p0, z1, z2, z0)) ++ ++/* ++** mad_s64_x_untied: ++** ( ++** movprfx z0, z1 ++** mad z0\.d, p0/m, z2\.d, z3\.d ++** | ++** movprfx z0, z2 ++** mad z0\.d, p0/m, z1\.d, z3\.d ++** | ++** movprfx z0, z3 ++** mla z0\.d, p0/m, z1\.d, z2\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mad_s64_x_untied, svint64_t, ++ z0 = svmad_s64_x (p0, z1, z2, z3), ++ z0 = svmad_x (p0, z1, z2, z3)) ++ ++/* ++** mad_x0_s64_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** mad z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_x0_s64_x_tied1, svint64_t, int64_t, ++ z0 = svmad_n_s64_x (p0, z0, z1, x0), ++ z0 = svmad_x (p0, z0, z1, x0)) ++ ++/* ++** mad_x0_s64_x_tied2: ++** mov (z[0-9]+\.d), x0 ++** mad z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_x0_s64_x_tied2, svint64_t, int64_t, ++ z0 = svmad_n_s64_x (p0, z1, z0, x0), ++ z0 = svmad_x (p0, z1, z0, x0)) ++ ++/* ++** mad_x0_s64_x_untied: ++** mov z0\.d, x0 ++** mla z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_x0_s64_x_untied, svint64_t, int64_t, ++ z0 = svmad_n_s64_x (p0, z1, z2, x0), ++ z0 = svmad_x (p0, z1, z2, x0)) ++ ++/* ++** mad_11_s64_x_tied1: ++** mov (z[0-9]+\.d), #11 ++** mad z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_s64_x_tied1, svint64_t, ++ z0 = svmad_n_s64_x (p0, z0, z1, 11), ++ z0 = svmad_x (p0, z0, z1, 11)) ++ ++/* ++** mad_11_s64_x_tied2: ++** mov (z[0-9]+\.d), #11 ++** mad z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_s64_x_tied2, svint64_t, ++ z0 = svmad_n_s64_x (p0, z1, z0, 11), ++ z0 = svmad_x (p0, z1, z0, 11)) ++ ++/* ++** mad_11_s64_x_untied: ++** mov z0\.d, #11 ++** mla z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_s64_x_untied, svint64_t, ++ z0 = svmad_n_s64_x (p0, z1, z2, 11), ++ z0 = svmad_x (p0, z1, z2, 11)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_s8.c +new file mode 100644 +index 000000000..90d712686 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_s8.c +@@ -0,0 +1,321 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mad_s8_m_tied1: ++** mad z0\.b, p0/m, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mad_s8_m_tied1, svint8_t, ++ z0 = svmad_s8_m (p0, z0, z1, z2), ++ z0 = svmad_m (p0, z0, z1, z2)) ++ ++/* ++** mad_s8_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** mad z0\.b, p0/m, \1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mad_s8_m_tied2, svint8_t, ++ z0 = svmad_s8_m (p0, z1, z0, z2), ++ z0 = svmad_m (p0, z1, z0, z2)) ++ ++/* ++** mad_s8_m_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** mad z0\.b, p0/m, z2\.b, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mad_s8_m_tied3, svint8_t, ++ z0 = svmad_s8_m (p0, z1, z2, z0), ++ z0 = svmad_m (p0, z1, z2, z0)) ++ ++/* ++** mad_s8_m_untied: ++** movprfx z0, z1 ++** mad z0\.b, p0/m, z2\.b, z3\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mad_s8_m_untied, svint8_t, ++ z0 = svmad_s8_m (p0, z1, z2, z3), ++ z0 = svmad_m (p0, z1, z2, z3)) ++ ++/* ++** mad_w0_s8_m_tied1: ++** mov (z[0-9]+\.b), w0 ++** mad z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_w0_s8_m_tied1, svint8_t, int8_t, ++ z0 = svmad_n_s8_m (p0, z0, z1, x0), ++ z0 = svmad_m (p0, z0, z1, x0)) ++ ++/* ++** mad_w0_s8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), w0 ++** movprfx z0, z1 ++** mad z0\.b, p0/m, z2\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_w0_s8_m_untied, svint8_t, int8_t, ++ z0 = svmad_n_s8_m (p0, z1, z2, x0), ++ z0 = svmad_m (p0, z1, z2, x0)) ++ ++/* ++** mad_11_s8_m_tied1: ++** mov (z[0-9]+\.b), #11 ++** mad z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_s8_m_tied1, svint8_t, ++ z0 = svmad_n_s8_m (p0, z0, z1, 11), ++ z0 = svmad_m (p0, z0, z1, 11)) ++ ++/* ++** mad_11_s8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), #11 ++** movprfx z0, z1 ++** mad z0\.b, p0/m, z2\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_s8_m_untied, svint8_t, ++ z0 = svmad_n_s8_m (p0, z1, z2, 11), ++ z0 = svmad_m (p0, z1, z2, 11)) ++ ++/* ++** mad_s8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** mad z0\.b, p0/m, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mad_s8_z_tied1, svint8_t, ++ z0 = svmad_s8_z (p0, z0, z1, z2), ++ z0 = svmad_z (p0, z0, z1, z2)) ++ ++/* ++** mad_s8_z_tied2: ++** movprfx z0\.b, p0/z, z0\.b ++** mad z0\.b, p0/m, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mad_s8_z_tied2, svint8_t, ++ z0 = svmad_s8_z (p0, z1, z0, z2), ++ z0 = svmad_z (p0, z1, z0, z2)) ++ ++/* ++** mad_s8_z_tied3: ++** movprfx z0\.b, p0/z, z0\.b ++** mla z0\.b, p0/m, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mad_s8_z_tied3, svint8_t, ++ z0 = svmad_s8_z (p0, z1, z2, z0), ++ z0 = svmad_z (p0, z1, z2, z0)) ++ ++/* ++** mad_s8_z_untied: ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** mad z0\.b, p0/m, z2\.b, z3\.b ++** | ++** movprfx z0\.b, p0/z, z2\.b ++** mad z0\.b, p0/m, z1\.b, z3\.b ++** | ++** movprfx z0\.b, p0/z, z3\.b ++** mla z0\.b, p0/m, z1\.b, z2\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mad_s8_z_untied, svint8_t, ++ z0 = svmad_s8_z (p0, z1, z2, z3), ++ z0 = svmad_z (p0, z1, z2, z3)) ++ ++/* ++** mad_w0_s8_z_tied1: ++** mov (z[0-9]+\.b), w0 ++** movprfx z0\.b, p0/z, z0\.b ++** mad z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_w0_s8_z_tied1, svint8_t, int8_t, ++ z0 = svmad_n_s8_z (p0, z0, z1, x0), ++ z0 = svmad_z (p0, z0, z1, x0)) ++ ++/* ++** mad_w0_s8_z_tied2: ++** mov (z[0-9]+\.b), w0 ++** movprfx z0\.b, p0/z, z0\.b ++** mad z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_w0_s8_z_tied2, svint8_t, int8_t, ++ z0 = svmad_n_s8_z (p0, z1, z0, x0), ++ z0 = svmad_z (p0, z1, z0, x0)) ++ ++/* ++** mad_w0_s8_z_untied: ++** mov (z[0-9]+\.b), w0 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** mad z0\.b, p0/m, z2\.b, \1 ++** | ++** movprfx z0\.b, p0/z, z2\.b ++** mad z0\.b, p0/m, z1\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** mla z0\.b, p0/m, z1\.b, z2\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_w0_s8_z_untied, svint8_t, int8_t, ++ z0 = svmad_n_s8_z (p0, z1, z2, x0), ++ z0 = svmad_z (p0, z1, z2, x0)) ++ ++/* ++** mad_11_s8_z_tied1: ++** mov (z[0-9]+\.b), #11 ++** movprfx z0\.b, p0/z, z0\.b ++** mad z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_s8_z_tied1, svint8_t, ++ z0 = svmad_n_s8_z (p0, z0, z1, 11), ++ z0 = svmad_z (p0, z0, z1, 11)) ++ ++/* ++** mad_11_s8_z_tied2: ++** mov (z[0-9]+\.b), #11 ++** movprfx z0\.b, p0/z, z0\.b ++** mad z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_s8_z_tied2, svint8_t, ++ z0 = svmad_n_s8_z (p0, z1, z0, 11), ++ z0 = svmad_z (p0, z1, z0, 11)) ++ ++/* ++** mad_11_s8_z_untied: ++** mov (z[0-9]+\.b), #11 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** mad z0\.b, p0/m, z2\.b, \1 ++** | ++** movprfx z0\.b, p0/z, z2\.b ++** mad z0\.b, p0/m, z1\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** mla z0\.b, p0/m, z1\.b, z2\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_s8_z_untied, svint8_t, ++ z0 = svmad_n_s8_z (p0, z1, z2, 11), ++ z0 = svmad_z (p0, z1, z2, 11)) ++ ++/* ++** mad_s8_x_tied1: ++** mad z0\.b, p0/m, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mad_s8_x_tied1, svint8_t, ++ z0 = svmad_s8_x (p0, z0, z1, z2), ++ z0 = svmad_x (p0, z0, z1, z2)) ++ ++/* ++** mad_s8_x_tied2: ++** mad z0\.b, p0/m, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mad_s8_x_tied2, svint8_t, ++ z0 = svmad_s8_x (p0, z1, z0, z2), ++ z0 = svmad_x (p0, z1, z0, z2)) ++ ++/* ++** mad_s8_x_tied3: ++** mla z0\.b, p0/m, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mad_s8_x_tied3, svint8_t, ++ z0 = svmad_s8_x (p0, z1, z2, z0), ++ z0 = svmad_x (p0, z1, z2, z0)) ++ ++/* ++** mad_s8_x_untied: ++** ( ++** movprfx z0, z1 ++** mad z0\.b, p0/m, z2\.b, z3\.b ++** | ++** movprfx z0, z2 ++** mad z0\.b, p0/m, z1\.b, z3\.b ++** | ++** movprfx z0, z3 ++** mla z0\.b, p0/m, z1\.b, z2\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mad_s8_x_untied, svint8_t, ++ z0 = svmad_s8_x (p0, z1, z2, z3), ++ z0 = svmad_x (p0, z1, z2, z3)) ++ ++/* ++** mad_w0_s8_x_tied1: ++** mov (z[0-9]+\.b), w0 ++** mad z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_w0_s8_x_tied1, svint8_t, int8_t, ++ z0 = svmad_n_s8_x (p0, z0, z1, x0), ++ z0 = svmad_x (p0, z0, z1, x0)) ++ ++/* ++** mad_w0_s8_x_tied2: ++** mov (z[0-9]+\.b), w0 ++** mad z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_w0_s8_x_tied2, svint8_t, int8_t, ++ z0 = svmad_n_s8_x (p0, z1, z0, x0), ++ z0 = svmad_x (p0, z1, z0, x0)) ++ ++/* ++** mad_w0_s8_x_untied: ++** mov z0\.b, w0 ++** mla z0\.b, p0/m, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_w0_s8_x_untied, svint8_t, int8_t, ++ z0 = svmad_n_s8_x (p0, z1, z2, x0), ++ z0 = svmad_x (p0, z1, z2, x0)) ++ ++/* ++** mad_11_s8_x_tied1: ++** mov (z[0-9]+\.b), #11 ++** mad z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_s8_x_tied1, svint8_t, ++ z0 = svmad_n_s8_x (p0, z0, z1, 11), ++ z0 = svmad_x (p0, z0, z1, 11)) ++ ++/* ++** mad_11_s8_x_tied2: ++** mov (z[0-9]+\.b), #11 ++** mad z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_s8_x_tied2, svint8_t, ++ z0 = svmad_n_s8_x (p0, z1, z0, 11), ++ z0 = svmad_x (p0, z1, z0, 11)) ++ ++/* ++** mad_11_s8_x_untied: ++** mov z0\.b, #11 ++** mla z0\.b, p0/m, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_s8_x_untied, svint8_t, ++ z0 = svmad_n_s8_x (p0, z1, z2, 11), ++ z0 = svmad_x (p0, z1, z2, 11)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_u16.c +new file mode 100644 +index 000000000..1d2ad9c5f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_u16.c +@@ -0,0 +1,321 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mad_u16_m_tied1: ++** mad z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mad_u16_m_tied1, svuint16_t, ++ z0 = svmad_u16_m (p0, z0, z1, z2), ++ z0 = svmad_m (p0, z0, z1, z2)) ++ ++/* ++** mad_u16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** mad z0\.h, p0/m, \1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mad_u16_m_tied2, svuint16_t, ++ z0 = svmad_u16_m (p0, z1, z0, z2), ++ z0 = svmad_m (p0, z1, z0, z2)) ++ ++/* ++** mad_u16_m_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** mad z0\.h, p0/m, z2\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mad_u16_m_tied3, svuint16_t, ++ z0 = svmad_u16_m (p0, z1, z2, z0), ++ z0 = svmad_m (p0, z1, z2, z0)) ++ ++/* ++** mad_u16_m_untied: ++** movprfx z0, z1 ++** mad z0\.h, p0/m, z2\.h, z3\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mad_u16_m_untied, svuint16_t, ++ z0 = svmad_u16_m (p0, z1, z2, z3), ++ z0 = svmad_m (p0, z1, z2, z3)) ++ ++/* ++** mad_w0_u16_m_tied1: ++** mov (z[0-9]+\.h), w0 ++** mad z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_w0_u16_m_tied1, svuint16_t, uint16_t, ++ z0 = svmad_n_u16_m (p0, z0, z1, x0), ++ z0 = svmad_m (p0, z0, z1, x0)) ++ ++/* ++** mad_w0_u16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), w0 ++** movprfx z0, z1 ++** mad z0\.h, p0/m, z2\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_w0_u16_m_untied, svuint16_t, uint16_t, ++ z0 = svmad_n_u16_m (p0, z1, z2, x0), ++ z0 = svmad_m (p0, z1, z2, x0)) ++ ++/* ++** mad_11_u16_m_tied1: ++** mov (z[0-9]+\.h), #11 ++** mad z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_u16_m_tied1, svuint16_t, ++ z0 = svmad_n_u16_m (p0, z0, z1, 11), ++ z0 = svmad_m (p0, z0, z1, 11)) ++ ++/* ++** mad_11_u16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), #11 ++** movprfx z0, z1 ++** mad z0\.h, p0/m, z2\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_u16_m_untied, svuint16_t, ++ z0 = svmad_n_u16_m (p0, z1, z2, 11), ++ z0 = svmad_m (p0, z1, z2, 11)) ++ ++/* ++** mad_u16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** mad z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mad_u16_z_tied1, svuint16_t, ++ z0 = svmad_u16_z (p0, z0, z1, z2), ++ z0 = svmad_z (p0, z0, z1, z2)) ++ ++/* ++** mad_u16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** mad z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mad_u16_z_tied2, svuint16_t, ++ z0 = svmad_u16_z (p0, z1, z0, z2), ++ z0 = svmad_z (p0, z1, z0, z2)) ++ ++/* ++** mad_u16_z_tied3: ++** movprfx z0\.h, p0/z, z0\.h ++** mla z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mad_u16_z_tied3, svuint16_t, ++ z0 = svmad_u16_z (p0, z1, z2, z0), ++ z0 = svmad_z (p0, z1, z2, z0)) ++ ++/* ++** mad_u16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** mad z0\.h, p0/m, z2\.h, z3\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** mad z0\.h, p0/m, z1\.h, z3\.h ++** | ++** movprfx z0\.h, p0/z, z3\.h ++** mla z0\.h, p0/m, z1\.h, z2\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mad_u16_z_untied, svuint16_t, ++ z0 = svmad_u16_z (p0, z1, z2, z3), ++ z0 = svmad_z (p0, z1, z2, z3)) ++ ++/* ++** mad_w0_u16_z_tied1: ++** mov (z[0-9]+\.h), w0 ++** movprfx z0\.h, p0/z, z0\.h ++** mad z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_w0_u16_z_tied1, svuint16_t, uint16_t, ++ z0 = svmad_n_u16_z (p0, z0, z1, x0), ++ z0 = svmad_z (p0, z0, z1, x0)) ++ ++/* ++** mad_w0_u16_z_tied2: ++** mov (z[0-9]+\.h), w0 ++** movprfx z0\.h, p0/z, z0\.h ++** mad z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_w0_u16_z_tied2, svuint16_t, uint16_t, ++ z0 = svmad_n_u16_z (p0, z1, z0, x0), ++ z0 = svmad_z (p0, z1, z0, x0)) ++ ++/* ++** mad_w0_u16_z_untied: ++** mov (z[0-9]+\.h), w0 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** mad z0\.h, p0/m, z2\.h, \1 ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** mad z0\.h, p0/m, z1\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** mla z0\.h, p0/m, z1\.h, z2\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_w0_u16_z_untied, svuint16_t, uint16_t, ++ z0 = svmad_n_u16_z (p0, z1, z2, x0), ++ z0 = svmad_z (p0, z1, z2, x0)) ++ ++/* ++** mad_11_u16_z_tied1: ++** mov (z[0-9]+\.h), #11 ++** movprfx z0\.h, p0/z, z0\.h ++** mad z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_u16_z_tied1, svuint16_t, ++ z0 = svmad_n_u16_z (p0, z0, z1, 11), ++ z0 = svmad_z (p0, z0, z1, 11)) ++ ++/* ++** mad_11_u16_z_tied2: ++** mov (z[0-9]+\.h), #11 ++** movprfx z0\.h, p0/z, z0\.h ++** mad z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_u16_z_tied2, svuint16_t, ++ z0 = svmad_n_u16_z (p0, z1, z0, 11), ++ z0 = svmad_z (p0, z1, z0, 11)) ++ ++/* ++** mad_11_u16_z_untied: ++** mov (z[0-9]+\.h), #11 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** mad z0\.h, p0/m, z2\.h, \1 ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** mad z0\.h, p0/m, z1\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** mla z0\.h, p0/m, z1\.h, z2\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_u16_z_untied, svuint16_t, ++ z0 = svmad_n_u16_z (p0, z1, z2, 11), ++ z0 = svmad_z (p0, z1, z2, 11)) ++ ++/* ++** mad_u16_x_tied1: ++** mad z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mad_u16_x_tied1, svuint16_t, ++ z0 = svmad_u16_x (p0, z0, z1, z2), ++ z0 = svmad_x (p0, z0, z1, z2)) ++ ++/* ++** mad_u16_x_tied2: ++** mad z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mad_u16_x_tied2, svuint16_t, ++ z0 = svmad_u16_x (p0, z1, z0, z2), ++ z0 = svmad_x (p0, z1, z0, z2)) ++ ++/* ++** mad_u16_x_tied3: ++** mla z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mad_u16_x_tied3, svuint16_t, ++ z0 = svmad_u16_x (p0, z1, z2, z0), ++ z0 = svmad_x (p0, z1, z2, z0)) ++ ++/* ++** mad_u16_x_untied: ++** ( ++** movprfx z0, z1 ++** mad z0\.h, p0/m, z2\.h, z3\.h ++** | ++** movprfx z0, z2 ++** mad z0\.h, p0/m, z1\.h, z3\.h ++** | ++** movprfx z0, z3 ++** mla z0\.h, p0/m, z1\.h, z2\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mad_u16_x_untied, svuint16_t, ++ z0 = svmad_u16_x (p0, z1, z2, z3), ++ z0 = svmad_x (p0, z1, z2, z3)) ++ ++/* ++** mad_w0_u16_x_tied1: ++** mov (z[0-9]+\.h), w0 ++** mad z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_w0_u16_x_tied1, svuint16_t, uint16_t, ++ z0 = svmad_n_u16_x (p0, z0, z1, x0), ++ z0 = svmad_x (p0, z0, z1, x0)) ++ ++/* ++** mad_w0_u16_x_tied2: ++** mov (z[0-9]+\.h), w0 ++** mad z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_w0_u16_x_tied2, svuint16_t, uint16_t, ++ z0 = svmad_n_u16_x (p0, z1, z0, x0), ++ z0 = svmad_x (p0, z1, z0, x0)) ++ ++/* ++** mad_w0_u16_x_untied: ++** mov z0\.h, w0 ++** mla z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_w0_u16_x_untied, svuint16_t, uint16_t, ++ z0 = svmad_n_u16_x (p0, z1, z2, x0), ++ z0 = svmad_x (p0, z1, z2, x0)) ++ ++/* ++** mad_11_u16_x_tied1: ++** mov (z[0-9]+\.h), #11 ++** mad z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_u16_x_tied1, svuint16_t, ++ z0 = svmad_n_u16_x (p0, z0, z1, 11), ++ z0 = svmad_x (p0, z0, z1, 11)) ++ ++/* ++** mad_11_u16_x_tied2: ++** mov (z[0-9]+\.h), #11 ++** mad z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_u16_x_tied2, svuint16_t, ++ z0 = svmad_n_u16_x (p0, z1, z0, 11), ++ z0 = svmad_x (p0, z1, z0, 11)) ++ ++/* ++** mad_11_u16_x_untied: ++** mov z0\.h, #11 ++** mla z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_u16_x_untied, svuint16_t, ++ z0 = svmad_n_u16_x (p0, z1, z2, 11), ++ z0 = svmad_x (p0, z1, z2, 11)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_u32.c +new file mode 100644 +index 000000000..4b51958b1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_u32.c +@@ -0,0 +1,321 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mad_u32_m_tied1: ++** mad z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mad_u32_m_tied1, svuint32_t, ++ z0 = svmad_u32_m (p0, z0, z1, z2), ++ z0 = svmad_m (p0, z0, z1, z2)) ++ ++/* ++** mad_u32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** mad z0\.s, p0/m, \1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mad_u32_m_tied2, svuint32_t, ++ z0 = svmad_u32_m (p0, z1, z0, z2), ++ z0 = svmad_m (p0, z1, z0, z2)) ++ ++/* ++** mad_u32_m_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** mad z0\.s, p0/m, z2\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mad_u32_m_tied3, svuint32_t, ++ z0 = svmad_u32_m (p0, z1, z2, z0), ++ z0 = svmad_m (p0, z1, z2, z0)) ++ ++/* ++** mad_u32_m_untied: ++** movprfx z0, z1 ++** mad z0\.s, p0/m, z2\.s, z3\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mad_u32_m_untied, svuint32_t, ++ z0 = svmad_u32_m (p0, z1, z2, z3), ++ z0 = svmad_m (p0, z1, z2, z3)) ++ ++/* ++** mad_w0_u32_m_tied1: ++** mov (z[0-9]+\.s), w0 ++** mad z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_w0_u32_m_tied1, svuint32_t, uint32_t, ++ z0 = svmad_n_u32_m (p0, z0, z1, x0), ++ z0 = svmad_m (p0, z0, z1, x0)) ++ ++/* ++** mad_w0_u32_m_untied: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0, z1 ++** mad z0\.s, p0/m, z2\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_w0_u32_m_untied, svuint32_t, uint32_t, ++ z0 = svmad_n_u32_m (p0, z1, z2, x0), ++ z0 = svmad_m (p0, z1, z2, x0)) ++ ++/* ++** mad_11_u32_m_tied1: ++** mov (z[0-9]+\.s), #11 ++** mad z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_u32_m_tied1, svuint32_t, ++ z0 = svmad_n_u32_m (p0, z0, z1, 11), ++ z0 = svmad_m (p0, z0, z1, 11)) ++ ++/* ++** mad_11_u32_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.s), #11 ++** movprfx z0, z1 ++** mad z0\.s, p0/m, z2\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_u32_m_untied, svuint32_t, ++ z0 = svmad_n_u32_m (p0, z1, z2, 11), ++ z0 = svmad_m (p0, z1, z2, 11)) ++ ++/* ++** mad_u32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** mad z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mad_u32_z_tied1, svuint32_t, ++ z0 = svmad_u32_z (p0, z0, z1, z2), ++ z0 = svmad_z (p0, z0, z1, z2)) ++ ++/* ++** mad_u32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** mad z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mad_u32_z_tied2, svuint32_t, ++ z0 = svmad_u32_z (p0, z1, z0, z2), ++ z0 = svmad_z (p0, z1, z0, z2)) ++ ++/* ++** mad_u32_z_tied3: ++** movprfx z0\.s, p0/z, z0\.s ++** mla z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mad_u32_z_tied3, svuint32_t, ++ z0 = svmad_u32_z (p0, z1, z2, z0), ++ z0 = svmad_z (p0, z1, z2, z0)) ++ ++/* ++** mad_u32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** mad z0\.s, p0/m, z2\.s, z3\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** mad z0\.s, p0/m, z1\.s, z3\.s ++** | ++** movprfx z0\.s, p0/z, z3\.s ++** mla z0\.s, p0/m, z1\.s, z2\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mad_u32_z_untied, svuint32_t, ++ z0 = svmad_u32_z (p0, z1, z2, z3), ++ z0 = svmad_z (p0, z1, z2, z3)) ++ ++/* ++** mad_w0_u32_z_tied1: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0\.s, p0/z, z0\.s ++** mad z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_w0_u32_z_tied1, svuint32_t, uint32_t, ++ z0 = svmad_n_u32_z (p0, z0, z1, x0), ++ z0 = svmad_z (p0, z0, z1, x0)) ++ ++/* ++** mad_w0_u32_z_tied2: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0\.s, p0/z, z0\.s ++** mad z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_w0_u32_z_tied2, svuint32_t, uint32_t, ++ z0 = svmad_n_u32_z (p0, z1, z0, x0), ++ z0 = svmad_z (p0, z1, z0, x0)) ++ ++/* ++** mad_w0_u32_z_untied: ++** mov (z[0-9]+\.s), w0 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** mad z0\.s, p0/m, z2\.s, \1 ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** mad z0\.s, p0/m, z1\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** mla z0\.s, p0/m, z1\.s, z2\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_w0_u32_z_untied, svuint32_t, uint32_t, ++ z0 = svmad_n_u32_z (p0, z1, z2, x0), ++ z0 = svmad_z (p0, z1, z2, x0)) ++ ++/* ++** mad_11_u32_z_tied1: ++** mov (z[0-9]+\.s), #11 ++** movprfx z0\.s, p0/z, z0\.s ++** mad z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_u32_z_tied1, svuint32_t, ++ z0 = svmad_n_u32_z (p0, z0, z1, 11), ++ z0 = svmad_z (p0, z0, z1, 11)) ++ ++/* ++** mad_11_u32_z_tied2: ++** mov (z[0-9]+\.s), #11 ++** movprfx z0\.s, p0/z, z0\.s ++** mad z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_u32_z_tied2, svuint32_t, ++ z0 = svmad_n_u32_z (p0, z1, z0, 11), ++ z0 = svmad_z (p0, z1, z0, 11)) ++ ++/* ++** mad_11_u32_z_untied: ++** mov (z[0-9]+\.s), #11 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** mad z0\.s, p0/m, z2\.s, \1 ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** mad z0\.s, p0/m, z1\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** mla z0\.s, p0/m, z1\.s, z2\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_u32_z_untied, svuint32_t, ++ z0 = svmad_n_u32_z (p0, z1, z2, 11), ++ z0 = svmad_z (p0, z1, z2, 11)) ++ ++/* ++** mad_u32_x_tied1: ++** mad z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mad_u32_x_tied1, svuint32_t, ++ z0 = svmad_u32_x (p0, z0, z1, z2), ++ z0 = svmad_x (p0, z0, z1, z2)) ++ ++/* ++** mad_u32_x_tied2: ++** mad z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mad_u32_x_tied2, svuint32_t, ++ z0 = svmad_u32_x (p0, z1, z0, z2), ++ z0 = svmad_x (p0, z1, z0, z2)) ++ ++/* ++** mad_u32_x_tied3: ++** mla z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mad_u32_x_tied3, svuint32_t, ++ z0 = svmad_u32_x (p0, z1, z2, z0), ++ z0 = svmad_x (p0, z1, z2, z0)) ++ ++/* ++** mad_u32_x_untied: ++** ( ++** movprfx z0, z1 ++** mad z0\.s, p0/m, z2\.s, z3\.s ++** | ++** movprfx z0, z2 ++** mad z0\.s, p0/m, z1\.s, z3\.s ++** | ++** movprfx z0, z3 ++** mla z0\.s, p0/m, z1\.s, z2\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mad_u32_x_untied, svuint32_t, ++ z0 = svmad_u32_x (p0, z1, z2, z3), ++ z0 = svmad_x (p0, z1, z2, z3)) ++ ++/* ++** mad_w0_u32_x_tied1: ++** mov (z[0-9]+\.s), w0 ++** mad z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_w0_u32_x_tied1, svuint32_t, uint32_t, ++ z0 = svmad_n_u32_x (p0, z0, z1, x0), ++ z0 = svmad_x (p0, z0, z1, x0)) ++ ++/* ++** mad_w0_u32_x_tied2: ++** mov (z[0-9]+\.s), w0 ++** mad z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_w0_u32_x_tied2, svuint32_t, uint32_t, ++ z0 = svmad_n_u32_x (p0, z1, z0, x0), ++ z0 = svmad_x (p0, z1, z0, x0)) ++ ++/* ++** mad_w0_u32_x_untied: ++** mov z0\.s, w0 ++** mla z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_w0_u32_x_untied, svuint32_t, uint32_t, ++ z0 = svmad_n_u32_x (p0, z1, z2, x0), ++ z0 = svmad_x (p0, z1, z2, x0)) ++ ++/* ++** mad_11_u32_x_tied1: ++** mov (z[0-9]+\.s), #11 ++** mad z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_u32_x_tied1, svuint32_t, ++ z0 = svmad_n_u32_x (p0, z0, z1, 11), ++ z0 = svmad_x (p0, z0, z1, 11)) ++ ++/* ++** mad_11_u32_x_tied2: ++** mov (z[0-9]+\.s), #11 ++** mad z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_u32_x_tied2, svuint32_t, ++ z0 = svmad_n_u32_x (p0, z1, z0, 11), ++ z0 = svmad_x (p0, z1, z0, 11)) ++ ++/* ++** mad_11_u32_x_untied: ++** mov z0\.s, #11 ++** mla z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_u32_x_untied, svuint32_t, ++ z0 = svmad_n_u32_x (p0, z1, z2, 11), ++ z0 = svmad_x (p0, z1, z2, 11)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_u64.c +new file mode 100644 +index 000000000..c4939093e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_u64.c +@@ -0,0 +1,321 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mad_u64_m_tied1: ++** mad z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mad_u64_m_tied1, svuint64_t, ++ z0 = svmad_u64_m (p0, z0, z1, z2), ++ z0 = svmad_m (p0, z0, z1, z2)) ++ ++/* ++** mad_u64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** mad z0\.d, p0/m, \1, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mad_u64_m_tied2, svuint64_t, ++ z0 = svmad_u64_m (p0, z1, z0, z2), ++ z0 = svmad_m (p0, z1, z0, z2)) ++ ++/* ++** mad_u64_m_tied3: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** mad z0\.d, p0/m, z2\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_u64_m_tied3, svuint64_t, ++ z0 = svmad_u64_m (p0, z1, z2, z0), ++ z0 = svmad_m (p0, z1, z2, z0)) ++ ++/* ++** mad_u64_m_untied: ++** movprfx z0, z1 ++** mad z0\.d, p0/m, z2\.d, z3\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mad_u64_m_untied, svuint64_t, ++ z0 = svmad_u64_m (p0, z1, z2, z3), ++ z0 = svmad_m (p0, z1, z2, z3)) ++ ++/* ++** mad_x0_u64_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** mad z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_x0_u64_m_tied1, svuint64_t, uint64_t, ++ z0 = svmad_n_u64_m (p0, z0, z1, x0), ++ z0 = svmad_m (p0, z0, z1, x0)) ++ ++/* ++** mad_x0_u64_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** mad z0\.d, p0/m, z2\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_x0_u64_m_untied, svuint64_t, uint64_t, ++ z0 = svmad_n_u64_m (p0, z1, z2, x0), ++ z0 = svmad_m (p0, z1, z2, x0)) ++ ++/* ++** mad_11_u64_m_tied1: ++** mov (z[0-9]+\.d), #11 ++** mad z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_u64_m_tied1, svuint64_t, ++ z0 = svmad_n_u64_m (p0, z0, z1, 11), ++ z0 = svmad_m (p0, z0, z1, 11)) ++ ++/* ++** mad_11_u64_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), #11 ++** movprfx z0, z1 ++** mad z0\.d, p0/m, z2\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_u64_m_untied, svuint64_t, ++ z0 = svmad_n_u64_m (p0, z1, z2, 11), ++ z0 = svmad_m (p0, z1, z2, 11)) ++ ++/* ++** mad_u64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** mad z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mad_u64_z_tied1, svuint64_t, ++ z0 = svmad_u64_z (p0, z0, z1, z2), ++ z0 = svmad_z (p0, z0, z1, z2)) ++ ++/* ++** mad_u64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** mad z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mad_u64_z_tied2, svuint64_t, ++ z0 = svmad_u64_z (p0, z1, z0, z2), ++ z0 = svmad_z (p0, z1, z0, z2)) ++ ++/* ++** mad_u64_z_tied3: ++** movprfx z0\.d, p0/z, z0\.d ++** mla z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mad_u64_z_tied3, svuint64_t, ++ z0 = svmad_u64_z (p0, z1, z2, z0), ++ z0 = svmad_z (p0, z1, z2, z0)) ++ ++/* ++** mad_u64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** mad z0\.d, p0/m, z2\.d, z3\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** mad z0\.d, p0/m, z1\.d, z3\.d ++** | ++** movprfx z0\.d, p0/z, z3\.d ++** mla z0\.d, p0/m, z1\.d, z2\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mad_u64_z_untied, svuint64_t, ++ z0 = svmad_u64_z (p0, z1, z2, z3), ++ z0 = svmad_z (p0, z1, z2, z3)) ++ ++/* ++** mad_x0_u64_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.d, p0/z, z0\.d ++** mad z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_x0_u64_z_tied1, svuint64_t, uint64_t, ++ z0 = svmad_n_u64_z (p0, z0, z1, x0), ++ z0 = svmad_z (p0, z0, z1, x0)) ++ ++/* ++** mad_x0_u64_z_tied2: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.d, p0/z, z0\.d ++** mad z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_x0_u64_z_tied2, svuint64_t, uint64_t, ++ z0 = svmad_n_u64_z (p0, z1, z0, x0), ++ z0 = svmad_z (p0, z1, z0, x0)) ++ ++/* ++** mad_x0_u64_z_untied: ++** mov (z[0-9]+\.d), x0 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** mad z0\.d, p0/m, z2\.d, \1 ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** mad z0\.d, p0/m, z1\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** mla z0\.d, p0/m, z1\.d, z2\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_x0_u64_z_untied, svuint64_t, uint64_t, ++ z0 = svmad_n_u64_z (p0, z1, z2, x0), ++ z0 = svmad_z (p0, z1, z2, x0)) ++ ++/* ++** mad_11_u64_z_tied1: ++** mov (z[0-9]+\.d), #11 ++** movprfx z0\.d, p0/z, z0\.d ++** mad z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_u64_z_tied1, svuint64_t, ++ z0 = svmad_n_u64_z (p0, z0, z1, 11), ++ z0 = svmad_z (p0, z0, z1, 11)) ++ ++/* ++** mad_11_u64_z_tied2: ++** mov (z[0-9]+\.d), #11 ++** movprfx z0\.d, p0/z, z0\.d ++** mad z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_u64_z_tied2, svuint64_t, ++ z0 = svmad_n_u64_z (p0, z1, z0, 11), ++ z0 = svmad_z (p0, z1, z0, 11)) ++ ++/* ++** mad_11_u64_z_untied: ++** mov (z[0-9]+\.d), #11 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** mad z0\.d, p0/m, z2\.d, \1 ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** mad z0\.d, p0/m, z1\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** mla z0\.d, p0/m, z1\.d, z2\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_u64_z_untied, svuint64_t, ++ z0 = svmad_n_u64_z (p0, z1, z2, 11), ++ z0 = svmad_z (p0, z1, z2, 11)) ++ ++/* ++** mad_u64_x_tied1: ++** mad z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mad_u64_x_tied1, svuint64_t, ++ z0 = svmad_u64_x (p0, z0, z1, z2), ++ z0 = svmad_x (p0, z0, z1, z2)) ++ ++/* ++** mad_u64_x_tied2: ++** mad z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mad_u64_x_tied2, svuint64_t, ++ z0 = svmad_u64_x (p0, z1, z0, z2), ++ z0 = svmad_x (p0, z1, z0, z2)) ++ ++/* ++** mad_u64_x_tied3: ++** mla z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mad_u64_x_tied3, svuint64_t, ++ z0 = svmad_u64_x (p0, z1, z2, z0), ++ z0 = svmad_x (p0, z1, z2, z0)) ++ ++/* ++** mad_u64_x_untied: ++** ( ++** movprfx z0, z1 ++** mad z0\.d, p0/m, z2\.d, z3\.d ++** | ++** movprfx z0, z2 ++** mad z0\.d, p0/m, z1\.d, z3\.d ++** | ++** movprfx z0, z3 ++** mla z0\.d, p0/m, z1\.d, z2\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mad_u64_x_untied, svuint64_t, ++ z0 = svmad_u64_x (p0, z1, z2, z3), ++ z0 = svmad_x (p0, z1, z2, z3)) ++ ++/* ++** mad_x0_u64_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** mad z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_x0_u64_x_tied1, svuint64_t, uint64_t, ++ z0 = svmad_n_u64_x (p0, z0, z1, x0), ++ z0 = svmad_x (p0, z0, z1, x0)) ++ ++/* ++** mad_x0_u64_x_tied2: ++** mov (z[0-9]+\.d), x0 ++** mad z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_x0_u64_x_tied2, svuint64_t, uint64_t, ++ z0 = svmad_n_u64_x (p0, z1, z0, x0), ++ z0 = svmad_x (p0, z1, z0, x0)) ++ ++/* ++** mad_x0_u64_x_untied: ++** mov z0\.d, x0 ++** mla z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_x0_u64_x_untied, svuint64_t, uint64_t, ++ z0 = svmad_n_u64_x (p0, z1, z2, x0), ++ z0 = svmad_x (p0, z1, z2, x0)) ++ ++/* ++** mad_11_u64_x_tied1: ++** mov (z[0-9]+\.d), #11 ++** mad z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_u64_x_tied1, svuint64_t, ++ z0 = svmad_n_u64_x (p0, z0, z1, 11), ++ z0 = svmad_x (p0, z0, z1, 11)) ++ ++/* ++** mad_11_u64_x_tied2: ++** mov (z[0-9]+\.d), #11 ++** mad z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_u64_x_tied2, svuint64_t, ++ z0 = svmad_n_u64_x (p0, z1, z0, 11), ++ z0 = svmad_x (p0, z1, z0, 11)) ++ ++/* ++** mad_11_u64_x_untied: ++** mov z0\.d, #11 ++** mla z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_u64_x_untied, svuint64_t, ++ z0 = svmad_n_u64_x (p0, z1, z2, 11), ++ z0 = svmad_x (p0, z1, z2, 11)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_u8.c +new file mode 100644 +index 000000000..0b4b1b8cf +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_u8.c +@@ -0,0 +1,321 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mad_u8_m_tied1: ++** mad z0\.b, p0/m, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mad_u8_m_tied1, svuint8_t, ++ z0 = svmad_u8_m (p0, z0, z1, z2), ++ z0 = svmad_m (p0, z0, z1, z2)) ++ ++/* ++** mad_u8_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** mad z0\.b, p0/m, \1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mad_u8_m_tied2, svuint8_t, ++ z0 = svmad_u8_m (p0, z1, z0, z2), ++ z0 = svmad_m (p0, z1, z0, z2)) ++ ++/* ++** mad_u8_m_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** mad z0\.b, p0/m, z2\.b, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mad_u8_m_tied3, svuint8_t, ++ z0 = svmad_u8_m (p0, z1, z2, z0), ++ z0 = svmad_m (p0, z1, z2, z0)) ++ ++/* ++** mad_u8_m_untied: ++** movprfx z0, z1 ++** mad z0\.b, p0/m, z2\.b, z3\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mad_u8_m_untied, svuint8_t, ++ z0 = svmad_u8_m (p0, z1, z2, z3), ++ z0 = svmad_m (p0, z1, z2, z3)) ++ ++/* ++** mad_w0_u8_m_tied1: ++** mov (z[0-9]+\.b), w0 ++** mad z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_w0_u8_m_tied1, svuint8_t, uint8_t, ++ z0 = svmad_n_u8_m (p0, z0, z1, x0), ++ z0 = svmad_m (p0, z0, z1, x0)) ++ ++/* ++** mad_w0_u8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), w0 ++** movprfx z0, z1 ++** mad z0\.b, p0/m, z2\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_w0_u8_m_untied, svuint8_t, uint8_t, ++ z0 = svmad_n_u8_m (p0, z1, z2, x0), ++ z0 = svmad_m (p0, z1, z2, x0)) ++ ++/* ++** mad_11_u8_m_tied1: ++** mov (z[0-9]+\.b), #11 ++** mad z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_u8_m_tied1, svuint8_t, ++ z0 = svmad_n_u8_m (p0, z0, z1, 11), ++ z0 = svmad_m (p0, z0, z1, 11)) ++ ++/* ++** mad_11_u8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), #11 ++** movprfx z0, z1 ++** mad z0\.b, p0/m, z2\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_u8_m_untied, svuint8_t, ++ z0 = svmad_n_u8_m (p0, z1, z2, 11), ++ z0 = svmad_m (p0, z1, z2, 11)) ++ ++/* ++** mad_u8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** mad z0\.b, p0/m, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mad_u8_z_tied1, svuint8_t, ++ z0 = svmad_u8_z (p0, z0, z1, z2), ++ z0 = svmad_z (p0, z0, z1, z2)) ++ ++/* ++** mad_u8_z_tied2: ++** movprfx z0\.b, p0/z, z0\.b ++** mad z0\.b, p0/m, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mad_u8_z_tied2, svuint8_t, ++ z0 = svmad_u8_z (p0, z1, z0, z2), ++ z0 = svmad_z (p0, z1, z0, z2)) ++ ++/* ++** mad_u8_z_tied3: ++** movprfx z0\.b, p0/z, z0\.b ++** mla z0\.b, p0/m, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mad_u8_z_tied3, svuint8_t, ++ z0 = svmad_u8_z (p0, z1, z2, z0), ++ z0 = svmad_z (p0, z1, z2, z0)) ++ ++/* ++** mad_u8_z_untied: ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** mad z0\.b, p0/m, z2\.b, z3\.b ++** | ++** movprfx z0\.b, p0/z, z2\.b ++** mad z0\.b, p0/m, z1\.b, z3\.b ++** | ++** movprfx z0\.b, p0/z, z3\.b ++** mla z0\.b, p0/m, z1\.b, z2\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mad_u8_z_untied, svuint8_t, ++ z0 = svmad_u8_z (p0, z1, z2, z3), ++ z0 = svmad_z (p0, z1, z2, z3)) ++ ++/* ++** mad_w0_u8_z_tied1: ++** mov (z[0-9]+\.b), w0 ++** movprfx z0\.b, p0/z, z0\.b ++** mad z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_w0_u8_z_tied1, svuint8_t, uint8_t, ++ z0 = svmad_n_u8_z (p0, z0, z1, x0), ++ z0 = svmad_z (p0, z0, z1, x0)) ++ ++/* ++** mad_w0_u8_z_tied2: ++** mov (z[0-9]+\.b), w0 ++** movprfx z0\.b, p0/z, z0\.b ++** mad z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_w0_u8_z_tied2, svuint8_t, uint8_t, ++ z0 = svmad_n_u8_z (p0, z1, z0, x0), ++ z0 = svmad_z (p0, z1, z0, x0)) ++ ++/* ++** mad_w0_u8_z_untied: ++** mov (z[0-9]+\.b), w0 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** mad z0\.b, p0/m, z2\.b, \1 ++** | ++** movprfx z0\.b, p0/z, z2\.b ++** mad z0\.b, p0/m, z1\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** mla z0\.b, p0/m, z1\.b, z2\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_w0_u8_z_untied, svuint8_t, uint8_t, ++ z0 = svmad_n_u8_z (p0, z1, z2, x0), ++ z0 = svmad_z (p0, z1, z2, x0)) ++ ++/* ++** mad_11_u8_z_tied1: ++** mov (z[0-9]+\.b), #11 ++** movprfx z0\.b, p0/z, z0\.b ++** mad z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_u8_z_tied1, svuint8_t, ++ z0 = svmad_n_u8_z (p0, z0, z1, 11), ++ z0 = svmad_z (p0, z0, z1, 11)) ++ ++/* ++** mad_11_u8_z_tied2: ++** mov (z[0-9]+\.b), #11 ++** movprfx z0\.b, p0/z, z0\.b ++** mad z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_u8_z_tied2, svuint8_t, ++ z0 = svmad_n_u8_z (p0, z1, z0, 11), ++ z0 = svmad_z (p0, z1, z0, 11)) ++ ++/* ++** mad_11_u8_z_untied: ++** mov (z[0-9]+\.b), #11 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** mad z0\.b, p0/m, z2\.b, \1 ++** | ++** movprfx z0\.b, p0/z, z2\.b ++** mad z0\.b, p0/m, z1\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** mla z0\.b, p0/m, z1\.b, z2\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_u8_z_untied, svuint8_t, ++ z0 = svmad_n_u8_z (p0, z1, z2, 11), ++ z0 = svmad_z (p0, z1, z2, 11)) ++ ++/* ++** mad_u8_x_tied1: ++** mad z0\.b, p0/m, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mad_u8_x_tied1, svuint8_t, ++ z0 = svmad_u8_x (p0, z0, z1, z2), ++ z0 = svmad_x (p0, z0, z1, z2)) ++ ++/* ++** mad_u8_x_tied2: ++** mad z0\.b, p0/m, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mad_u8_x_tied2, svuint8_t, ++ z0 = svmad_u8_x (p0, z1, z0, z2), ++ z0 = svmad_x (p0, z1, z0, z2)) ++ ++/* ++** mad_u8_x_tied3: ++** mla z0\.b, p0/m, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mad_u8_x_tied3, svuint8_t, ++ z0 = svmad_u8_x (p0, z1, z2, z0), ++ z0 = svmad_x (p0, z1, z2, z0)) ++ ++/* ++** mad_u8_x_untied: ++** ( ++** movprfx z0, z1 ++** mad z0\.b, p0/m, z2\.b, z3\.b ++** | ++** movprfx z0, z2 ++** mad z0\.b, p0/m, z1\.b, z3\.b ++** | ++** movprfx z0, z3 ++** mla z0\.b, p0/m, z1\.b, z2\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mad_u8_x_untied, svuint8_t, ++ z0 = svmad_u8_x (p0, z1, z2, z3), ++ z0 = svmad_x (p0, z1, z2, z3)) ++ ++/* ++** mad_w0_u8_x_tied1: ++** mov (z[0-9]+\.b), w0 ++** mad z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_w0_u8_x_tied1, svuint8_t, uint8_t, ++ z0 = svmad_n_u8_x (p0, z0, z1, x0), ++ z0 = svmad_x (p0, z0, z1, x0)) ++ ++/* ++** mad_w0_u8_x_tied2: ++** mov (z[0-9]+\.b), w0 ++** mad z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_w0_u8_x_tied2, svuint8_t, uint8_t, ++ z0 = svmad_n_u8_x (p0, z1, z0, x0), ++ z0 = svmad_x (p0, z1, z0, x0)) ++ ++/* ++** mad_w0_u8_x_untied: ++** mov z0\.b, w0 ++** mla z0\.b, p0/m, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_ZX (mad_w0_u8_x_untied, svuint8_t, uint8_t, ++ z0 = svmad_n_u8_x (p0, z1, z2, x0), ++ z0 = svmad_x (p0, z1, z2, x0)) ++ ++/* ++** mad_11_u8_x_tied1: ++** mov (z[0-9]+\.b), #11 ++** mad z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_u8_x_tied1, svuint8_t, ++ z0 = svmad_n_u8_x (p0, z0, z1, 11), ++ z0 = svmad_x (p0, z0, z1, 11)) ++ ++/* ++** mad_11_u8_x_tied2: ++** mov (z[0-9]+\.b), #11 ++** mad z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_u8_x_tied2, svuint8_t, ++ z0 = svmad_n_u8_x (p0, z1, z0, 11), ++ z0 = svmad_x (p0, z1, z0, 11)) ++ ++/* ++** mad_11_u8_x_untied: ++** mov z0\.b, #11 ++** mla z0\.b, p0/m, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mad_11_u8_x_untied, svuint8_t, ++ z0 = svmad_n_u8_x (p0, z1, z2, 11), ++ z0 = svmad_x (p0, z1, z2, 11)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_f16.c +new file mode 100644 +index 000000000..f21099a24 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_f16.c +@@ -0,0 +1,425 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** max_f16_m_tied1: ++** fmax z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (max_f16_m_tied1, svfloat16_t, ++ z0 = svmax_f16_m (p0, z0, z1), ++ z0 = svmax_m (p0, z0, z1)) ++ ++/* ++** max_f16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fmax z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (max_f16_m_tied2, svfloat16_t, ++ z0 = svmax_f16_m (p0, z1, z0), ++ z0 = svmax_m (p0, z1, z0)) ++ ++/* ++** max_f16_m_untied: ++** movprfx z0, z1 ++** fmax z0\.h, p0/m, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (max_f16_m_untied, svfloat16_t, ++ z0 = svmax_f16_m (p0, z1, z2), ++ z0 = svmax_m (p0, z1, z2)) ++ ++/* ++** max_h4_f16_m_tied1: ++** mov (z[0-9]+\.h), h4 ++** fmax z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (max_h4_f16_m_tied1, svfloat16_t, __fp16, ++ z0 = svmax_n_f16_m (p0, z0, d4), ++ z0 = svmax_m (p0, z0, d4)) ++ ++/* ++** max_h4_f16_m_untied: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0, z1 ++** fmax z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (max_h4_f16_m_untied, svfloat16_t, __fp16, ++ z0 = svmax_n_f16_m (p0, z1, d4), ++ z0 = svmax_m (p0, z1, d4)) ++ ++/* ++** max_0_f16_m_tied1: ++** fmax z0\.h, p0/m, z0\.h, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (max_0_f16_m_tied1, svfloat16_t, ++ z0 = svmax_n_f16_m (p0, z0, 0), ++ z0 = svmax_m (p0, z0, 0)) ++ ++/* ++** max_0_f16_m_untied: ++** movprfx z0, z1 ++** fmax z0\.h, p0/m, z0\.h, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (max_0_f16_m_untied, svfloat16_t, ++ z0 = svmax_n_f16_m (p0, z1, 0), ++ z0 = svmax_m (p0, z1, 0)) ++ ++/* ++** max_1_f16_m_tied1: ++** fmax z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_f16_m_tied1, svfloat16_t, ++ z0 = svmax_n_f16_m (p0, z0, 1), ++ z0 = svmax_m (p0, z0, 1)) ++ ++/* ++** max_1_f16_m_untied: ++** movprfx z0, z1 ++** fmax z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_f16_m_untied, svfloat16_t, ++ z0 = svmax_n_f16_m (p0, z1, 1), ++ z0 = svmax_m (p0, z1, 1)) ++ ++/* ++** max_2_f16_m: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** fmax z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_2_f16_m, svfloat16_t, ++ z0 = svmax_n_f16_m (p0, z0, 2), ++ z0 = svmax_m (p0, z0, 2)) ++ ++/* ++** max_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fmax z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (max_f16_z_tied1, svfloat16_t, ++ z0 = svmax_f16_z (p0, z0, z1), ++ z0 = svmax_z (p0, z0, z1)) ++ ++/* ++** max_f16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** fmax z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (max_f16_z_tied2, svfloat16_t, ++ z0 = svmax_f16_z (p0, z1, z0), ++ z0 = svmax_z (p0, z1, z0)) ++ ++/* ++** max_f16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fmax z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** fmax z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (max_f16_z_untied, svfloat16_t, ++ z0 = svmax_f16_z (p0, z1, z2), ++ z0 = svmax_z (p0, z1, z2)) ++ ++/* ++** max_h4_f16_z_tied1: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0\.h, p0/z, z0\.h ++** fmax z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (max_h4_f16_z_tied1, svfloat16_t, __fp16, ++ z0 = svmax_n_f16_z (p0, z0, d4), ++ z0 = svmax_z (p0, z0, d4)) ++ ++/* ++** max_h4_f16_z_untied: ++** mov (z[0-9]+\.h), h4 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fmax z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** fmax z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (max_h4_f16_z_untied, svfloat16_t, __fp16, ++ z0 = svmax_n_f16_z (p0, z1, d4), ++ z0 = svmax_z (p0, z1, d4)) ++ ++/* ++** max_0_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fmax z0\.h, p0/m, z0\.h, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (max_0_f16_z_tied1, svfloat16_t, ++ z0 = svmax_n_f16_z (p0, z0, 0), ++ z0 = svmax_z (p0, z0, 0)) ++ ++/* ++** max_0_f16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** fmax z0\.h, p0/m, z0\.h, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (max_0_f16_z_untied, svfloat16_t, ++ z0 = svmax_n_f16_z (p0, z1, 0), ++ z0 = svmax_z (p0, z1, 0)) ++ ++/* ++** max_1_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fmax z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_f16_z_tied1, svfloat16_t, ++ z0 = svmax_n_f16_z (p0, z0, 1), ++ z0 = svmax_z (p0, z0, 1)) ++ ++/* ++** max_1_f16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** fmax z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_f16_z_untied, svfloat16_t, ++ z0 = svmax_n_f16_z (p0, z1, 1), ++ z0 = svmax_z (p0, z1, 1)) ++ ++/* ++** max_2_f16_z: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** movprfx z0\.h, p0/z, z0\.h ++** fmax z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_2_f16_z, svfloat16_t, ++ z0 = svmax_n_f16_z (p0, z0, 2), ++ z0 = svmax_z (p0, z0, 2)) ++ ++/* ++** max_f16_x_tied1: ++** fmax z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (max_f16_x_tied1, svfloat16_t, ++ z0 = svmax_f16_x (p0, z0, z1), ++ z0 = svmax_x (p0, z0, z1)) ++ ++/* ++** max_f16_x_tied2: ++** fmax z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (max_f16_x_tied2, svfloat16_t, ++ z0 = svmax_f16_x (p0, z1, z0), ++ z0 = svmax_x (p0, z1, z0)) ++ ++/* ++** max_f16_x_untied: ++** ( ++** movprfx z0, z1 ++** fmax z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0, z2 ++** fmax z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (max_f16_x_untied, svfloat16_t, ++ z0 = svmax_f16_x (p0, z1, z2), ++ z0 = svmax_x (p0, z1, z2)) ++ ++/* ++** max_h4_f16_x_tied1: ++** mov (z[0-9]+\.h), h4 ++** fmax z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (max_h4_f16_x_tied1, svfloat16_t, __fp16, ++ z0 = svmax_n_f16_x (p0, z0, d4), ++ z0 = svmax_x (p0, z0, d4)) ++ ++/* ++** max_h4_f16_x_untied: ++** mov z0\.h, h4 ++** fmax z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZD (max_h4_f16_x_untied, svfloat16_t, __fp16, ++ z0 = svmax_n_f16_x (p0, z1, d4), ++ z0 = svmax_x (p0, z1, d4)) ++ ++/* ++** max_0_f16_x_tied1: ++** fmax z0\.h, p0/m, z0\.h, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (max_0_f16_x_tied1, svfloat16_t, ++ z0 = svmax_n_f16_x (p0, z0, 0), ++ z0 = svmax_x (p0, z0, 0)) ++ ++/* ++** max_0_f16_x_untied: ++** movprfx z0, z1 ++** fmax z0\.h, p0/m, z0\.h, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (max_0_f16_x_untied, svfloat16_t, ++ z0 = svmax_n_f16_x (p0, z1, 0), ++ z0 = svmax_x (p0, z1, 0)) ++ ++/* ++** max_1_f16_x_tied1: ++** fmax z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_f16_x_tied1, svfloat16_t, ++ z0 = svmax_n_f16_x (p0, z0, 1), ++ z0 = svmax_x (p0, z0, 1)) ++ ++/* ++** max_1_f16_x_untied: ++** movprfx z0, z1 ++** fmax z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_f16_x_untied, svfloat16_t, ++ z0 = svmax_n_f16_x (p0, z1, 1), ++ z0 = svmax_x (p0, z1, 1)) ++ ++/* ++** max_2_f16_x_tied1: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** fmax z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_2_f16_x_tied1, svfloat16_t, ++ z0 = svmax_n_f16_x (p0, z0, 2), ++ z0 = svmax_x (p0, z0, 2)) ++ ++/* ++** max_2_f16_x_untied: ++** fmov z0\.h, #2\.0(?:e\+0)? ++** fmax z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (max_2_f16_x_untied, svfloat16_t, ++ z0 = svmax_n_f16_x (p0, z1, 2), ++ z0 = svmax_x (p0, z1, 2)) ++ ++/* ++** ptrue_max_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_max_f16_x_tied1, svfloat16_t, ++ z0 = svmax_f16_x (svptrue_b16 (), z0, z1), ++ z0 = svmax_x (svptrue_b16 (), z0, z1)) ++ ++/* ++** ptrue_max_f16_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_max_f16_x_tied2, svfloat16_t, ++ z0 = svmax_f16_x (svptrue_b16 (), z1, z0), ++ z0 = svmax_x (svptrue_b16 (), z1, z0)) ++ ++/* ++** ptrue_max_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_max_f16_x_untied, svfloat16_t, ++ z0 = svmax_f16_x (svptrue_b16 (), z1, z2), ++ z0 = svmax_x (svptrue_b16 (), z1, z2)) ++ ++/* ++** ptrue_max_0_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_max_0_f16_x_tied1, svfloat16_t, ++ z0 = svmax_n_f16_x (svptrue_b16 (), z0, 0), ++ z0 = svmax_x (svptrue_b16 (), z0, 0)) ++ ++/* ++** ptrue_max_0_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_max_0_f16_x_untied, svfloat16_t, ++ z0 = svmax_n_f16_x (svptrue_b16 (), z1, 0), ++ z0 = svmax_x (svptrue_b16 (), z1, 0)) ++ ++/* ++** ptrue_max_1_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_max_1_f16_x_tied1, svfloat16_t, ++ z0 = svmax_n_f16_x (svptrue_b16 (), z0, 1), ++ z0 = svmax_x (svptrue_b16 (), z0, 1)) ++ ++/* ++** ptrue_max_1_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_max_1_f16_x_untied, svfloat16_t, ++ z0 = svmax_n_f16_x (svptrue_b16 (), z1, 1), ++ z0 = svmax_x (svptrue_b16 (), z1, 1)) ++ ++/* ++** ptrue_max_2_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_max_2_f16_x_tied1, svfloat16_t, ++ z0 = svmax_n_f16_x (svptrue_b16 (), z0, 2), ++ z0 = svmax_x (svptrue_b16 (), z0, 2)) ++ ++/* ++** ptrue_max_2_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_max_2_f16_x_untied, svfloat16_t, ++ z0 = svmax_n_f16_x (svptrue_b16 (), z1, 2), ++ z0 = svmax_x (svptrue_b16 (), z1, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_f32.c +new file mode 100644 +index 000000000..6f5c92c9f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_f32.c +@@ -0,0 +1,425 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** max_f32_m_tied1: ++** fmax z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (max_f32_m_tied1, svfloat32_t, ++ z0 = svmax_f32_m (p0, z0, z1), ++ z0 = svmax_m (p0, z0, z1)) ++ ++/* ++** max_f32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fmax z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (max_f32_m_tied2, svfloat32_t, ++ z0 = svmax_f32_m (p0, z1, z0), ++ z0 = svmax_m (p0, z1, z0)) ++ ++/* ++** max_f32_m_untied: ++** movprfx z0, z1 ++** fmax z0\.s, p0/m, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (max_f32_m_untied, svfloat32_t, ++ z0 = svmax_f32_m (p0, z1, z2), ++ z0 = svmax_m (p0, z1, z2)) ++ ++/* ++** max_s4_f32_m_tied1: ++** mov (z[0-9]+\.s), s4 ++** fmax z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (max_s4_f32_m_tied1, svfloat32_t, float, ++ z0 = svmax_n_f32_m (p0, z0, d4), ++ z0 = svmax_m (p0, z0, d4)) ++ ++/* ++** max_s4_f32_m_untied: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0, z1 ++** fmax z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (max_s4_f32_m_untied, svfloat32_t, float, ++ z0 = svmax_n_f32_m (p0, z1, d4), ++ z0 = svmax_m (p0, z1, d4)) ++ ++/* ++** max_0_f32_m_tied1: ++** fmax z0\.s, p0/m, z0\.s, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (max_0_f32_m_tied1, svfloat32_t, ++ z0 = svmax_n_f32_m (p0, z0, 0), ++ z0 = svmax_m (p0, z0, 0)) ++ ++/* ++** max_0_f32_m_untied: ++** movprfx z0, z1 ++** fmax z0\.s, p0/m, z0\.s, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (max_0_f32_m_untied, svfloat32_t, ++ z0 = svmax_n_f32_m (p0, z1, 0), ++ z0 = svmax_m (p0, z1, 0)) ++ ++/* ++** max_1_f32_m_tied1: ++** fmax z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_f32_m_tied1, svfloat32_t, ++ z0 = svmax_n_f32_m (p0, z0, 1), ++ z0 = svmax_m (p0, z0, 1)) ++ ++/* ++** max_1_f32_m_untied: ++** movprfx z0, z1 ++** fmax z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_f32_m_untied, svfloat32_t, ++ z0 = svmax_n_f32_m (p0, z1, 1), ++ z0 = svmax_m (p0, z1, 1)) ++ ++/* ++** max_2_f32_m: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** fmax z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_2_f32_m, svfloat32_t, ++ z0 = svmax_n_f32_m (p0, z0, 2), ++ z0 = svmax_m (p0, z0, 2)) ++ ++/* ++** max_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fmax z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (max_f32_z_tied1, svfloat32_t, ++ z0 = svmax_f32_z (p0, z0, z1), ++ z0 = svmax_z (p0, z0, z1)) ++ ++/* ++** max_f32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** fmax z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (max_f32_z_tied2, svfloat32_t, ++ z0 = svmax_f32_z (p0, z1, z0), ++ z0 = svmax_z (p0, z1, z0)) ++ ++/* ++** max_f32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fmax z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** fmax z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (max_f32_z_untied, svfloat32_t, ++ z0 = svmax_f32_z (p0, z1, z2), ++ z0 = svmax_z (p0, z1, z2)) ++ ++/* ++** max_s4_f32_z_tied1: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0\.s, p0/z, z0\.s ++** fmax z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (max_s4_f32_z_tied1, svfloat32_t, float, ++ z0 = svmax_n_f32_z (p0, z0, d4), ++ z0 = svmax_z (p0, z0, d4)) ++ ++/* ++** max_s4_f32_z_untied: ++** mov (z[0-9]+\.s), s4 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fmax z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** fmax z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (max_s4_f32_z_untied, svfloat32_t, float, ++ z0 = svmax_n_f32_z (p0, z1, d4), ++ z0 = svmax_z (p0, z1, d4)) ++ ++/* ++** max_0_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fmax z0\.s, p0/m, z0\.s, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (max_0_f32_z_tied1, svfloat32_t, ++ z0 = svmax_n_f32_z (p0, z0, 0), ++ z0 = svmax_z (p0, z0, 0)) ++ ++/* ++** max_0_f32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** fmax z0\.s, p0/m, z0\.s, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (max_0_f32_z_untied, svfloat32_t, ++ z0 = svmax_n_f32_z (p0, z1, 0), ++ z0 = svmax_z (p0, z1, 0)) ++ ++/* ++** max_1_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fmax z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_f32_z_tied1, svfloat32_t, ++ z0 = svmax_n_f32_z (p0, z0, 1), ++ z0 = svmax_z (p0, z0, 1)) ++ ++/* ++** max_1_f32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** fmax z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_f32_z_untied, svfloat32_t, ++ z0 = svmax_n_f32_z (p0, z1, 1), ++ z0 = svmax_z (p0, z1, 1)) ++ ++/* ++** max_2_f32_z: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** movprfx z0\.s, p0/z, z0\.s ++** fmax z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_2_f32_z, svfloat32_t, ++ z0 = svmax_n_f32_z (p0, z0, 2), ++ z0 = svmax_z (p0, z0, 2)) ++ ++/* ++** max_f32_x_tied1: ++** fmax z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (max_f32_x_tied1, svfloat32_t, ++ z0 = svmax_f32_x (p0, z0, z1), ++ z0 = svmax_x (p0, z0, z1)) ++ ++/* ++** max_f32_x_tied2: ++** fmax z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (max_f32_x_tied2, svfloat32_t, ++ z0 = svmax_f32_x (p0, z1, z0), ++ z0 = svmax_x (p0, z1, z0)) ++ ++/* ++** max_f32_x_untied: ++** ( ++** movprfx z0, z1 ++** fmax z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0, z2 ++** fmax z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (max_f32_x_untied, svfloat32_t, ++ z0 = svmax_f32_x (p0, z1, z2), ++ z0 = svmax_x (p0, z1, z2)) ++ ++/* ++** max_s4_f32_x_tied1: ++** mov (z[0-9]+\.s), s4 ++** fmax z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (max_s4_f32_x_tied1, svfloat32_t, float, ++ z0 = svmax_n_f32_x (p0, z0, d4), ++ z0 = svmax_x (p0, z0, d4)) ++ ++/* ++** max_s4_f32_x_untied: ++** mov z0\.s, s4 ++** fmax z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZD (max_s4_f32_x_untied, svfloat32_t, float, ++ z0 = svmax_n_f32_x (p0, z1, d4), ++ z0 = svmax_x (p0, z1, d4)) ++ ++/* ++** max_0_f32_x_tied1: ++** fmax z0\.s, p0/m, z0\.s, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (max_0_f32_x_tied1, svfloat32_t, ++ z0 = svmax_n_f32_x (p0, z0, 0), ++ z0 = svmax_x (p0, z0, 0)) ++ ++/* ++** max_0_f32_x_untied: ++** movprfx z0, z1 ++** fmax z0\.s, p0/m, z0\.s, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (max_0_f32_x_untied, svfloat32_t, ++ z0 = svmax_n_f32_x (p0, z1, 0), ++ z0 = svmax_x (p0, z1, 0)) ++ ++/* ++** max_1_f32_x_tied1: ++** fmax z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_f32_x_tied1, svfloat32_t, ++ z0 = svmax_n_f32_x (p0, z0, 1), ++ z0 = svmax_x (p0, z0, 1)) ++ ++/* ++** max_1_f32_x_untied: ++** movprfx z0, z1 ++** fmax z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_f32_x_untied, svfloat32_t, ++ z0 = svmax_n_f32_x (p0, z1, 1), ++ z0 = svmax_x (p0, z1, 1)) ++ ++/* ++** max_2_f32_x_tied1: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** fmax z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_2_f32_x_tied1, svfloat32_t, ++ z0 = svmax_n_f32_x (p0, z0, 2), ++ z0 = svmax_x (p0, z0, 2)) ++ ++/* ++** max_2_f32_x_untied: ++** fmov z0\.s, #2\.0(?:e\+0)? ++** fmax z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (max_2_f32_x_untied, svfloat32_t, ++ z0 = svmax_n_f32_x (p0, z1, 2), ++ z0 = svmax_x (p0, z1, 2)) ++ ++/* ++** ptrue_max_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_max_f32_x_tied1, svfloat32_t, ++ z0 = svmax_f32_x (svptrue_b32 (), z0, z1), ++ z0 = svmax_x (svptrue_b32 (), z0, z1)) ++ ++/* ++** ptrue_max_f32_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_max_f32_x_tied2, svfloat32_t, ++ z0 = svmax_f32_x (svptrue_b32 (), z1, z0), ++ z0 = svmax_x (svptrue_b32 (), z1, z0)) ++ ++/* ++** ptrue_max_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_max_f32_x_untied, svfloat32_t, ++ z0 = svmax_f32_x (svptrue_b32 (), z1, z2), ++ z0 = svmax_x (svptrue_b32 (), z1, z2)) ++ ++/* ++** ptrue_max_0_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_max_0_f32_x_tied1, svfloat32_t, ++ z0 = svmax_n_f32_x (svptrue_b32 (), z0, 0), ++ z0 = svmax_x (svptrue_b32 (), z0, 0)) ++ ++/* ++** ptrue_max_0_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_max_0_f32_x_untied, svfloat32_t, ++ z0 = svmax_n_f32_x (svptrue_b32 (), z1, 0), ++ z0 = svmax_x (svptrue_b32 (), z1, 0)) ++ ++/* ++** ptrue_max_1_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_max_1_f32_x_tied1, svfloat32_t, ++ z0 = svmax_n_f32_x (svptrue_b32 (), z0, 1), ++ z0 = svmax_x (svptrue_b32 (), z0, 1)) ++ ++/* ++** ptrue_max_1_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_max_1_f32_x_untied, svfloat32_t, ++ z0 = svmax_n_f32_x (svptrue_b32 (), z1, 1), ++ z0 = svmax_x (svptrue_b32 (), z1, 1)) ++ ++/* ++** ptrue_max_2_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_max_2_f32_x_tied1, svfloat32_t, ++ z0 = svmax_n_f32_x (svptrue_b32 (), z0, 2), ++ z0 = svmax_x (svptrue_b32 (), z0, 2)) ++ ++/* ++** ptrue_max_2_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_max_2_f32_x_untied, svfloat32_t, ++ z0 = svmax_n_f32_x (svptrue_b32 (), z1, 2), ++ z0 = svmax_x (svptrue_b32 (), z1, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_f64.c +new file mode 100644 +index 000000000..8ac6cca75 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_f64.c +@@ -0,0 +1,425 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** max_f64_m_tied1: ++** fmax z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (max_f64_m_tied1, svfloat64_t, ++ z0 = svmax_f64_m (p0, z0, z1), ++ z0 = svmax_m (p0, z0, z1)) ++ ++/* ++** max_f64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fmax z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_f64_m_tied2, svfloat64_t, ++ z0 = svmax_f64_m (p0, z1, z0), ++ z0 = svmax_m (p0, z1, z0)) ++ ++/* ++** max_f64_m_untied: ++** movprfx z0, z1 ++** fmax z0\.d, p0/m, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (max_f64_m_untied, svfloat64_t, ++ z0 = svmax_f64_m (p0, z1, z2), ++ z0 = svmax_m (p0, z1, z2)) ++ ++/* ++** max_d4_f64_m_tied1: ++** mov (z[0-9]+\.d), d4 ++** fmax z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (max_d4_f64_m_tied1, svfloat64_t, double, ++ z0 = svmax_n_f64_m (p0, z0, d4), ++ z0 = svmax_m (p0, z0, d4)) ++ ++/* ++** max_d4_f64_m_untied: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0, z1 ++** fmax z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (max_d4_f64_m_untied, svfloat64_t, double, ++ z0 = svmax_n_f64_m (p0, z1, d4), ++ z0 = svmax_m (p0, z1, d4)) ++ ++/* ++** max_0_f64_m_tied1: ++** fmax z0\.d, p0/m, z0\.d, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (max_0_f64_m_tied1, svfloat64_t, ++ z0 = svmax_n_f64_m (p0, z0, 0), ++ z0 = svmax_m (p0, z0, 0)) ++ ++/* ++** max_0_f64_m_untied: ++** movprfx z0, z1 ++** fmax z0\.d, p0/m, z0\.d, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (max_0_f64_m_untied, svfloat64_t, ++ z0 = svmax_n_f64_m (p0, z1, 0), ++ z0 = svmax_m (p0, z1, 0)) ++ ++/* ++** max_1_f64_m_tied1: ++** fmax z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_f64_m_tied1, svfloat64_t, ++ z0 = svmax_n_f64_m (p0, z0, 1), ++ z0 = svmax_m (p0, z0, 1)) ++ ++/* ++** max_1_f64_m_untied: ++** movprfx z0, z1 ++** fmax z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_f64_m_untied, svfloat64_t, ++ z0 = svmax_n_f64_m (p0, z1, 1), ++ z0 = svmax_m (p0, z1, 1)) ++ ++/* ++** max_2_f64_m: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** fmax z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_2_f64_m, svfloat64_t, ++ z0 = svmax_n_f64_m (p0, z0, 2), ++ z0 = svmax_m (p0, z0, 2)) ++ ++/* ++** max_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fmax z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (max_f64_z_tied1, svfloat64_t, ++ z0 = svmax_f64_z (p0, z0, z1), ++ z0 = svmax_z (p0, z0, z1)) ++ ++/* ++** max_f64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** fmax z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (max_f64_z_tied2, svfloat64_t, ++ z0 = svmax_f64_z (p0, z1, z0), ++ z0 = svmax_z (p0, z1, z0)) ++ ++/* ++** max_f64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fmax z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** fmax z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (max_f64_z_untied, svfloat64_t, ++ z0 = svmax_f64_z (p0, z1, z2), ++ z0 = svmax_z (p0, z1, z2)) ++ ++/* ++** max_d4_f64_z_tied1: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0\.d, p0/z, z0\.d ++** fmax z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (max_d4_f64_z_tied1, svfloat64_t, double, ++ z0 = svmax_n_f64_z (p0, z0, d4), ++ z0 = svmax_z (p0, z0, d4)) ++ ++/* ++** max_d4_f64_z_untied: ++** mov (z[0-9]+\.d), d4 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fmax z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** fmax z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (max_d4_f64_z_untied, svfloat64_t, double, ++ z0 = svmax_n_f64_z (p0, z1, d4), ++ z0 = svmax_z (p0, z1, d4)) ++ ++/* ++** max_0_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fmax z0\.d, p0/m, z0\.d, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (max_0_f64_z_tied1, svfloat64_t, ++ z0 = svmax_n_f64_z (p0, z0, 0), ++ z0 = svmax_z (p0, z0, 0)) ++ ++/* ++** max_0_f64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** fmax z0\.d, p0/m, z0\.d, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (max_0_f64_z_untied, svfloat64_t, ++ z0 = svmax_n_f64_z (p0, z1, 0), ++ z0 = svmax_z (p0, z1, 0)) ++ ++/* ++** max_1_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fmax z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_f64_z_tied1, svfloat64_t, ++ z0 = svmax_n_f64_z (p0, z0, 1), ++ z0 = svmax_z (p0, z0, 1)) ++ ++/* ++** max_1_f64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** fmax z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_f64_z_untied, svfloat64_t, ++ z0 = svmax_n_f64_z (p0, z1, 1), ++ z0 = svmax_z (p0, z1, 1)) ++ ++/* ++** max_2_f64_z: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** movprfx z0\.d, p0/z, z0\.d ++** fmax z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_2_f64_z, svfloat64_t, ++ z0 = svmax_n_f64_z (p0, z0, 2), ++ z0 = svmax_z (p0, z0, 2)) ++ ++/* ++** max_f64_x_tied1: ++** fmax z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (max_f64_x_tied1, svfloat64_t, ++ z0 = svmax_f64_x (p0, z0, z1), ++ z0 = svmax_x (p0, z0, z1)) ++ ++/* ++** max_f64_x_tied2: ++** fmax z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (max_f64_x_tied2, svfloat64_t, ++ z0 = svmax_f64_x (p0, z1, z0), ++ z0 = svmax_x (p0, z1, z0)) ++ ++/* ++** max_f64_x_untied: ++** ( ++** movprfx z0, z1 ++** fmax z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0, z2 ++** fmax z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (max_f64_x_untied, svfloat64_t, ++ z0 = svmax_f64_x (p0, z1, z2), ++ z0 = svmax_x (p0, z1, z2)) ++ ++/* ++** max_d4_f64_x_tied1: ++** mov (z[0-9]+\.d), d4 ++** fmax z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (max_d4_f64_x_tied1, svfloat64_t, double, ++ z0 = svmax_n_f64_x (p0, z0, d4), ++ z0 = svmax_x (p0, z0, d4)) ++ ++/* ++** max_d4_f64_x_untied: ++** mov z0\.d, d4 ++** fmax z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZD (max_d4_f64_x_untied, svfloat64_t, double, ++ z0 = svmax_n_f64_x (p0, z1, d4), ++ z0 = svmax_x (p0, z1, d4)) ++ ++/* ++** max_0_f64_x_tied1: ++** fmax z0\.d, p0/m, z0\.d, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (max_0_f64_x_tied1, svfloat64_t, ++ z0 = svmax_n_f64_x (p0, z0, 0), ++ z0 = svmax_x (p0, z0, 0)) ++ ++/* ++** max_0_f64_x_untied: ++** movprfx z0, z1 ++** fmax z0\.d, p0/m, z0\.d, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (max_0_f64_x_untied, svfloat64_t, ++ z0 = svmax_n_f64_x (p0, z1, 0), ++ z0 = svmax_x (p0, z1, 0)) ++ ++/* ++** max_1_f64_x_tied1: ++** fmax z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_f64_x_tied1, svfloat64_t, ++ z0 = svmax_n_f64_x (p0, z0, 1), ++ z0 = svmax_x (p0, z0, 1)) ++ ++/* ++** max_1_f64_x_untied: ++** movprfx z0, z1 ++** fmax z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_f64_x_untied, svfloat64_t, ++ z0 = svmax_n_f64_x (p0, z1, 1), ++ z0 = svmax_x (p0, z1, 1)) ++ ++/* ++** max_2_f64_x_tied1: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** fmax z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_2_f64_x_tied1, svfloat64_t, ++ z0 = svmax_n_f64_x (p0, z0, 2), ++ z0 = svmax_x (p0, z0, 2)) ++ ++/* ++** max_2_f64_x_untied: ++** fmov z0\.d, #2\.0(?:e\+0)? ++** fmax z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (max_2_f64_x_untied, svfloat64_t, ++ z0 = svmax_n_f64_x (p0, z1, 2), ++ z0 = svmax_x (p0, z1, 2)) ++ ++/* ++** ptrue_max_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_max_f64_x_tied1, svfloat64_t, ++ z0 = svmax_f64_x (svptrue_b64 (), z0, z1), ++ z0 = svmax_x (svptrue_b64 (), z0, z1)) ++ ++/* ++** ptrue_max_f64_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_max_f64_x_tied2, svfloat64_t, ++ z0 = svmax_f64_x (svptrue_b64 (), z1, z0), ++ z0 = svmax_x (svptrue_b64 (), z1, z0)) ++ ++/* ++** ptrue_max_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_max_f64_x_untied, svfloat64_t, ++ z0 = svmax_f64_x (svptrue_b64 (), z1, z2), ++ z0 = svmax_x (svptrue_b64 (), z1, z2)) ++ ++/* ++** ptrue_max_0_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_max_0_f64_x_tied1, svfloat64_t, ++ z0 = svmax_n_f64_x (svptrue_b64 (), z0, 0), ++ z0 = svmax_x (svptrue_b64 (), z0, 0)) ++ ++/* ++** ptrue_max_0_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_max_0_f64_x_untied, svfloat64_t, ++ z0 = svmax_n_f64_x (svptrue_b64 (), z1, 0), ++ z0 = svmax_x (svptrue_b64 (), z1, 0)) ++ ++/* ++** ptrue_max_1_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_max_1_f64_x_tied1, svfloat64_t, ++ z0 = svmax_n_f64_x (svptrue_b64 (), z0, 1), ++ z0 = svmax_x (svptrue_b64 (), z0, 1)) ++ ++/* ++** ptrue_max_1_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_max_1_f64_x_untied, svfloat64_t, ++ z0 = svmax_n_f64_x (svptrue_b64 (), z1, 1), ++ z0 = svmax_x (svptrue_b64 (), z1, 1)) ++ ++/* ++** ptrue_max_2_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_max_2_f64_x_tied1, svfloat64_t, ++ z0 = svmax_n_f64_x (svptrue_b64 (), z0, 2), ++ z0 = svmax_x (svptrue_b64 (), z0, 2)) ++ ++/* ++** ptrue_max_2_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_max_2_f64_x_untied, svfloat64_t, ++ z0 = svmax_n_f64_x (svptrue_b64 (), z1, 2), ++ z0 = svmax_x (svptrue_b64 (), z1, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_s16.c +new file mode 100644 +index 000000000..6a2167522 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_s16.c +@@ -0,0 +1,293 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** max_s16_m_tied1: ++** smax z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (max_s16_m_tied1, svint16_t, ++ z0 = svmax_s16_m (p0, z0, z1), ++ z0 = svmax_m (p0, z0, z1)) ++ ++/* ++** max_s16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** smax z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (max_s16_m_tied2, svint16_t, ++ z0 = svmax_s16_m (p0, z1, z0), ++ z0 = svmax_m (p0, z1, z0)) ++ ++/* ++** max_s16_m_untied: ++** movprfx z0, z1 ++** smax z0\.h, p0/m, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (max_s16_m_untied, svint16_t, ++ z0 = svmax_s16_m (p0, z1, z2), ++ z0 = svmax_m (p0, z1, z2)) ++ ++/* ++** max_w0_s16_m_tied1: ++** mov (z[0-9]+\.h), w0 ++** smax z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (max_w0_s16_m_tied1, svint16_t, int16_t, ++ z0 = svmax_n_s16_m (p0, z0, x0), ++ z0 = svmax_m (p0, z0, x0)) ++ ++/* ++** max_w0_s16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), w0 ++** movprfx z0, z1 ++** smax z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (max_w0_s16_m_untied, svint16_t, int16_t, ++ z0 = svmax_n_s16_m (p0, z1, x0), ++ z0 = svmax_m (p0, z1, x0)) ++ ++/* ++** max_1_s16_m_tied1: ++** mov (z[0-9]+\.h), #1 ++** smax z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_s16_m_tied1, svint16_t, ++ z0 = svmax_n_s16_m (p0, z0, 1), ++ z0 = svmax_m (p0, z0, 1)) ++ ++/* ++** max_1_s16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), #1 ++** movprfx z0, z1 ++** smax z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_s16_m_untied, svint16_t, ++ z0 = svmax_n_s16_m (p0, z1, 1), ++ z0 = svmax_m (p0, z1, 1)) ++ ++/* ++** max_m1_s16_m: ++** mov (z[0-9]+)\.b, #-1 ++** smax z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (max_m1_s16_m, svint16_t, ++ z0 = svmax_n_s16_m (p0, z0, -1), ++ z0 = svmax_m (p0, z0, -1)) ++ ++/* ++** max_s16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** smax z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (max_s16_z_tied1, svint16_t, ++ z0 = svmax_s16_z (p0, z0, z1), ++ z0 = svmax_z (p0, z0, z1)) ++ ++/* ++** max_s16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** smax z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (max_s16_z_tied2, svint16_t, ++ z0 = svmax_s16_z (p0, z1, z0), ++ z0 = svmax_z (p0, z1, z0)) ++ ++/* ++** max_s16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** smax z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** smax z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (max_s16_z_untied, svint16_t, ++ z0 = svmax_s16_z (p0, z1, z2), ++ z0 = svmax_z (p0, z1, z2)) ++ ++/* ++** max_w0_s16_z_tied1: ++** mov (z[0-9]+\.h), w0 ++** movprfx z0\.h, p0/z, z0\.h ++** smax z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (max_w0_s16_z_tied1, svint16_t, int16_t, ++ z0 = svmax_n_s16_z (p0, z0, x0), ++ z0 = svmax_z (p0, z0, x0)) ++ ++/* ++** max_w0_s16_z_untied: ++** mov (z[0-9]+\.h), w0 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** smax z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** smax z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (max_w0_s16_z_untied, svint16_t, int16_t, ++ z0 = svmax_n_s16_z (p0, z1, x0), ++ z0 = svmax_z (p0, z1, x0)) ++ ++/* ++** max_1_s16_z_tied1: ++** mov (z[0-9]+\.h), #1 ++** movprfx z0\.h, p0/z, z0\.h ++** smax z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_s16_z_tied1, svint16_t, ++ z0 = svmax_n_s16_z (p0, z0, 1), ++ z0 = svmax_z (p0, z0, 1)) ++ ++/* ++** max_1_s16_z_untied: ++** mov (z[0-9]+\.h), #1 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** smax z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** smax z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_s16_z_untied, svint16_t, ++ z0 = svmax_n_s16_z (p0, z1, 1), ++ z0 = svmax_z (p0, z1, 1)) ++ ++/* ++** max_s16_x_tied1: ++** smax z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (max_s16_x_tied1, svint16_t, ++ z0 = svmax_s16_x (p0, z0, z1), ++ z0 = svmax_x (p0, z0, z1)) ++ ++/* ++** max_s16_x_tied2: ++** smax z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (max_s16_x_tied2, svint16_t, ++ z0 = svmax_s16_x (p0, z1, z0), ++ z0 = svmax_x (p0, z1, z0)) ++ ++/* ++** max_s16_x_untied: ++** ( ++** movprfx z0, z1 ++** smax z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0, z2 ++** smax z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (max_s16_x_untied, svint16_t, ++ z0 = svmax_s16_x (p0, z1, z2), ++ z0 = svmax_x (p0, z1, z2)) ++ ++/* ++** max_w0_s16_x_tied1: ++** mov (z[0-9]+\.h), w0 ++** smax z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (max_w0_s16_x_tied1, svint16_t, int16_t, ++ z0 = svmax_n_s16_x (p0, z0, x0), ++ z0 = svmax_x (p0, z0, x0)) ++ ++/* ++** max_w0_s16_x_untied: ++** mov z0\.h, w0 ++** smax z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZX (max_w0_s16_x_untied, svint16_t, int16_t, ++ z0 = svmax_n_s16_x (p0, z1, x0), ++ z0 = svmax_x (p0, z1, x0)) ++ ++/* ++** max_1_s16_x_tied1: ++** smax z0\.h, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_s16_x_tied1, svint16_t, ++ z0 = svmax_n_s16_x (p0, z0, 1), ++ z0 = svmax_x (p0, z0, 1)) ++ ++/* ++** max_1_s16_x_untied: ++** movprfx z0, z1 ++** smax z0\.h, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_s16_x_untied, svint16_t, ++ z0 = svmax_n_s16_x (p0, z1, 1), ++ z0 = svmax_x (p0, z1, 1)) ++ ++/* ++** max_127_s16_x: ++** smax z0\.h, z0\.h, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (max_127_s16_x, svint16_t, ++ z0 = svmax_n_s16_x (p0, z0, 127), ++ z0 = svmax_x (p0, z0, 127)) ++ ++/* ++** max_128_s16_x: ++** mov (z[0-9]+\.h), #128 ++** smax z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_128_s16_x, svint16_t, ++ z0 = svmax_n_s16_x (p0, z0, 128), ++ z0 = svmax_x (p0, z0, 128)) ++ ++/* ++** max_m1_s16_x: ++** smax z0\.h, z0\.h, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_m1_s16_x, svint16_t, ++ z0 = svmax_n_s16_x (p0, z0, -1), ++ z0 = svmax_x (p0, z0, -1)) ++ ++/* ++** max_m128_s16_x: ++** smax z0\.h, z0\.h, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (max_m128_s16_x, svint16_t, ++ z0 = svmax_n_s16_x (p0, z0, -128), ++ z0 = svmax_x (p0, z0, -128)) ++ ++/* ++** max_m129_s16_x: ++** mov (z[0-9]+\.h), #-129 ++** smax z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_m129_s16_x, svint16_t, ++ z0 = svmax_n_s16_x (p0, z0, -129), ++ z0 = svmax_x (p0, z0, -129)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_s32.c +new file mode 100644 +index 000000000..07402c7a9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_s32.c +@@ -0,0 +1,293 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** max_s32_m_tied1: ++** smax z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (max_s32_m_tied1, svint32_t, ++ z0 = svmax_s32_m (p0, z0, z1), ++ z0 = svmax_m (p0, z0, z1)) ++ ++/* ++** max_s32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** smax z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (max_s32_m_tied2, svint32_t, ++ z0 = svmax_s32_m (p0, z1, z0), ++ z0 = svmax_m (p0, z1, z0)) ++ ++/* ++** max_s32_m_untied: ++** movprfx z0, z1 ++** smax z0\.s, p0/m, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (max_s32_m_untied, svint32_t, ++ z0 = svmax_s32_m (p0, z1, z2), ++ z0 = svmax_m (p0, z1, z2)) ++ ++/* ++** max_w0_s32_m_tied1: ++** mov (z[0-9]+\.s), w0 ++** smax z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (max_w0_s32_m_tied1, svint32_t, int32_t, ++ z0 = svmax_n_s32_m (p0, z0, x0), ++ z0 = svmax_m (p0, z0, x0)) ++ ++/* ++** max_w0_s32_m_untied: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0, z1 ++** smax z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (max_w0_s32_m_untied, svint32_t, int32_t, ++ z0 = svmax_n_s32_m (p0, z1, x0), ++ z0 = svmax_m (p0, z1, x0)) ++ ++/* ++** max_1_s32_m_tied1: ++** mov (z[0-9]+\.s), #1 ++** smax z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_s32_m_tied1, svint32_t, ++ z0 = svmax_n_s32_m (p0, z0, 1), ++ z0 = svmax_m (p0, z0, 1)) ++ ++/* ++** max_1_s32_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.s), #1 ++** movprfx z0, z1 ++** smax z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_s32_m_untied, svint32_t, ++ z0 = svmax_n_s32_m (p0, z1, 1), ++ z0 = svmax_m (p0, z1, 1)) ++ ++/* ++** max_m1_s32_m: ++** mov (z[0-9]+)\.b, #-1 ++** smax z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (max_m1_s32_m, svint32_t, ++ z0 = svmax_n_s32_m (p0, z0, -1), ++ z0 = svmax_m (p0, z0, -1)) ++ ++/* ++** max_s32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** smax z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (max_s32_z_tied1, svint32_t, ++ z0 = svmax_s32_z (p0, z0, z1), ++ z0 = svmax_z (p0, z0, z1)) ++ ++/* ++** max_s32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** smax z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (max_s32_z_tied2, svint32_t, ++ z0 = svmax_s32_z (p0, z1, z0), ++ z0 = svmax_z (p0, z1, z0)) ++ ++/* ++** max_s32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** smax z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** smax z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (max_s32_z_untied, svint32_t, ++ z0 = svmax_s32_z (p0, z1, z2), ++ z0 = svmax_z (p0, z1, z2)) ++ ++/* ++** max_w0_s32_z_tied1: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0\.s, p0/z, z0\.s ++** smax z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (max_w0_s32_z_tied1, svint32_t, int32_t, ++ z0 = svmax_n_s32_z (p0, z0, x0), ++ z0 = svmax_z (p0, z0, x0)) ++ ++/* ++** max_w0_s32_z_untied: ++** mov (z[0-9]+\.s), w0 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** smax z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** smax z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (max_w0_s32_z_untied, svint32_t, int32_t, ++ z0 = svmax_n_s32_z (p0, z1, x0), ++ z0 = svmax_z (p0, z1, x0)) ++ ++/* ++** max_1_s32_z_tied1: ++** mov (z[0-9]+\.s), #1 ++** movprfx z0\.s, p0/z, z0\.s ++** smax z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_s32_z_tied1, svint32_t, ++ z0 = svmax_n_s32_z (p0, z0, 1), ++ z0 = svmax_z (p0, z0, 1)) ++ ++/* ++** max_1_s32_z_untied: ++** mov (z[0-9]+\.s), #1 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** smax z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** smax z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_s32_z_untied, svint32_t, ++ z0 = svmax_n_s32_z (p0, z1, 1), ++ z0 = svmax_z (p0, z1, 1)) ++ ++/* ++** max_s32_x_tied1: ++** smax z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (max_s32_x_tied1, svint32_t, ++ z0 = svmax_s32_x (p0, z0, z1), ++ z0 = svmax_x (p0, z0, z1)) ++ ++/* ++** max_s32_x_tied2: ++** smax z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (max_s32_x_tied2, svint32_t, ++ z0 = svmax_s32_x (p0, z1, z0), ++ z0 = svmax_x (p0, z1, z0)) ++ ++/* ++** max_s32_x_untied: ++** ( ++** movprfx z0, z1 ++** smax z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0, z2 ++** smax z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (max_s32_x_untied, svint32_t, ++ z0 = svmax_s32_x (p0, z1, z2), ++ z0 = svmax_x (p0, z1, z2)) ++ ++/* ++** max_w0_s32_x_tied1: ++** mov (z[0-9]+\.s), w0 ++** smax z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (max_w0_s32_x_tied1, svint32_t, int32_t, ++ z0 = svmax_n_s32_x (p0, z0, x0), ++ z0 = svmax_x (p0, z0, x0)) ++ ++/* ++** max_w0_s32_x_untied: ++** mov z0\.s, w0 ++** smax z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZX (max_w0_s32_x_untied, svint32_t, int32_t, ++ z0 = svmax_n_s32_x (p0, z1, x0), ++ z0 = svmax_x (p0, z1, x0)) ++ ++/* ++** max_1_s32_x_tied1: ++** smax z0\.s, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_s32_x_tied1, svint32_t, ++ z0 = svmax_n_s32_x (p0, z0, 1), ++ z0 = svmax_x (p0, z0, 1)) ++ ++/* ++** max_1_s32_x_untied: ++** movprfx z0, z1 ++** smax z0\.s, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_s32_x_untied, svint32_t, ++ z0 = svmax_n_s32_x (p0, z1, 1), ++ z0 = svmax_x (p0, z1, 1)) ++ ++/* ++** max_127_s32_x: ++** smax z0\.s, z0\.s, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (max_127_s32_x, svint32_t, ++ z0 = svmax_n_s32_x (p0, z0, 127), ++ z0 = svmax_x (p0, z0, 127)) ++ ++/* ++** max_128_s32_x: ++** mov (z[0-9]+\.s), #128 ++** smax z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_128_s32_x, svint32_t, ++ z0 = svmax_n_s32_x (p0, z0, 128), ++ z0 = svmax_x (p0, z0, 128)) ++ ++/* ++** max_m1_s32_x: ++** smax z0\.s, z0\.s, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_m1_s32_x, svint32_t, ++ z0 = svmax_n_s32_x (p0, z0, -1), ++ z0 = svmax_x (p0, z0, -1)) ++ ++/* ++** max_m128_s32_x: ++** smax z0\.s, z0\.s, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (max_m128_s32_x, svint32_t, ++ z0 = svmax_n_s32_x (p0, z0, -128), ++ z0 = svmax_x (p0, z0, -128)) ++ ++/* ++** max_m129_s32_x: ++** mov (z[0-9]+\.s), #-129 ++** smax z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_m129_s32_x, svint32_t, ++ z0 = svmax_n_s32_x (p0, z0, -129), ++ z0 = svmax_x (p0, z0, -129)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_s64.c +new file mode 100644 +index 000000000..66f00fdf1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_s64.c +@@ -0,0 +1,293 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** max_s64_m_tied1: ++** smax z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (max_s64_m_tied1, svint64_t, ++ z0 = svmax_s64_m (p0, z0, z1), ++ z0 = svmax_m (p0, z0, z1)) ++ ++/* ++** max_s64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** smax z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_s64_m_tied2, svint64_t, ++ z0 = svmax_s64_m (p0, z1, z0), ++ z0 = svmax_m (p0, z1, z0)) ++ ++/* ++** max_s64_m_untied: ++** movprfx z0, z1 ++** smax z0\.d, p0/m, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (max_s64_m_untied, svint64_t, ++ z0 = svmax_s64_m (p0, z1, z2), ++ z0 = svmax_m (p0, z1, z2)) ++ ++/* ++** max_x0_s64_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** smax z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (max_x0_s64_m_tied1, svint64_t, int64_t, ++ z0 = svmax_n_s64_m (p0, z0, x0), ++ z0 = svmax_m (p0, z0, x0)) ++ ++/* ++** max_x0_s64_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** smax z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (max_x0_s64_m_untied, svint64_t, int64_t, ++ z0 = svmax_n_s64_m (p0, z1, x0), ++ z0 = svmax_m (p0, z1, x0)) ++ ++/* ++** max_1_s64_m_tied1: ++** mov (z[0-9]+\.d), #1 ++** smax z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_s64_m_tied1, svint64_t, ++ z0 = svmax_n_s64_m (p0, z0, 1), ++ z0 = svmax_m (p0, z0, 1)) ++ ++/* ++** max_1_s64_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), #1 ++** movprfx z0, z1 ++** smax z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_s64_m_untied, svint64_t, ++ z0 = svmax_n_s64_m (p0, z1, 1), ++ z0 = svmax_m (p0, z1, 1)) ++ ++/* ++** max_m1_s64_m: ++** mov (z[0-9]+)\.b, #-1 ++** smax z0\.d, p0/m, z0\.d, \1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (max_m1_s64_m, svint64_t, ++ z0 = svmax_n_s64_m (p0, z0, -1), ++ z0 = svmax_m (p0, z0, -1)) ++ ++/* ++** max_s64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** smax z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (max_s64_z_tied1, svint64_t, ++ z0 = svmax_s64_z (p0, z0, z1), ++ z0 = svmax_z (p0, z0, z1)) ++ ++/* ++** max_s64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** smax z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (max_s64_z_tied2, svint64_t, ++ z0 = svmax_s64_z (p0, z1, z0), ++ z0 = svmax_z (p0, z1, z0)) ++ ++/* ++** max_s64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** smax z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** smax z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (max_s64_z_untied, svint64_t, ++ z0 = svmax_s64_z (p0, z1, z2), ++ z0 = svmax_z (p0, z1, z2)) ++ ++/* ++** max_x0_s64_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.d, p0/z, z0\.d ++** smax z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (max_x0_s64_z_tied1, svint64_t, int64_t, ++ z0 = svmax_n_s64_z (p0, z0, x0), ++ z0 = svmax_z (p0, z0, x0)) ++ ++/* ++** max_x0_s64_z_untied: ++** mov (z[0-9]+\.d), x0 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** smax z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** smax z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (max_x0_s64_z_untied, svint64_t, int64_t, ++ z0 = svmax_n_s64_z (p0, z1, x0), ++ z0 = svmax_z (p0, z1, x0)) ++ ++/* ++** max_1_s64_z_tied1: ++** mov (z[0-9]+\.d), #1 ++** movprfx z0\.d, p0/z, z0\.d ++** smax z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_s64_z_tied1, svint64_t, ++ z0 = svmax_n_s64_z (p0, z0, 1), ++ z0 = svmax_z (p0, z0, 1)) ++ ++/* ++** max_1_s64_z_untied: ++** mov (z[0-9]+\.d), #1 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** smax z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** smax z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_s64_z_untied, svint64_t, ++ z0 = svmax_n_s64_z (p0, z1, 1), ++ z0 = svmax_z (p0, z1, 1)) ++ ++/* ++** max_s64_x_tied1: ++** smax z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (max_s64_x_tied1, svint64_t, ++ z0 = svmax_s64_x (p0, z0, z1), ++ z0 = svmax_x (p0, z0, z1)) ++ ++/* ++** max_s64_x_tied2: ++** smax z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (max_s64_x_tied2, svint64_t, ++ z0 = svmax_s64_x (p0, z1, z0), ++ z0 = svmax_x (p0, z1, z0)) ++ ++/* ++** max_s64_x_untied: ++** ( ++** movprfx z0, z1 ++** smax z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0, z2 ++** smax z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (max_s64_x_untied, svint64_t, ++ z0 = svmax_s64_x (p0, z1, z2), ++ z0 = svmax_x (p0, z1, z2)) ++ ++/* ++** max_x0_s64_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** smax z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (max_x0_s64_x_tied1, svint64_t, int64_t, ++ z0 = svmax_n_s64_x (p0, z0, x0), ++ z0 = svmax_x (p0, z0, x0)) ++ ++/* ++** max_x0_s64_x_untied: ++** mov z0\.d, x0 ++** smax z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZX (max_x0_s64_x_untied, svint64_t, int64_t, ++ z0 = svmax_n_s64_x (p0, z1, x0), ++ z0 = svmax_x (p0, z1, x0)) ++ ++/* ++** max_1_s64_x_tied1: ++** smax z0\.d, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_s64_x_tied1, svint64_t, ++ z0 = svmax_n_s64_x (p0, z0, 1), ++ z0 = svmax_x (p0, z0, 1)) ++ ++/* ++** max_1_s64_x_untied: ++** movprfx z0, z1 ++** smax z0\.d, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_s64_x_untied, svint64_t, ++ z0 = svmax_n_s64_x (p0, z1, 1), ++ z0 = svmax_x (p0, z1, 1)) ++ ++/* ++** max_127_s64_x: ++** smax z0\.d, z0\.d, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (max_127_s64_x, svint64_t, ++ z0 = svmax_n_s64_x (p0, z0, 127), ++ z0 = svmax_x (p0, z0, 127)) ++ ++/* ++** max_128_s64_x: ++** mov (z[0-9]+\.d), #128 ++** smax z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_128_s64_x, svint64_t, ++ z0 = svmax_n_s64_x (p0, z0, 128), ++ z0 = svmax_x (p0, z0, 128)) ++ ++/* ++** max_m1_s64_x: ++** smax z0\.d, z0\.d, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_m1_s64_x, svint64_t, ++ z0 = svmax_n_s64_x (p0, z0, -1), ++ z0 = svmax_x (p0, z0, -1)) ++ ++/* ++** max_m128_s64_x: ++** smax z0\.d, z0\.d, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (max_m128_s64_x, svint64_t, ++ z0 = svmax_n_s64_x (p0, z0, -128), ++ z0 = svmax_x (p0, z0, -128)) ++ ++/* ++** max_m129_s64_x: ++** mov (z[0-9]+\.d), #-129 ++** smax z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_m129_s64_x, svint64_t, ++ z0 = svmax_n_s64_x (p0, z0, -129), ++ z0 = svmax_x (p0, z0, -129)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_s8.c +new file mode 100644 +index 000000000..c651a26f0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_s8.c +@@ -0,0 +1,273 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** max_s8_m_tied1: ++** smax z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (max_s8_m_tied1, svint8_t, ++ z0 = svmax_s8_m (p0, z0, z1), ++ z0 = svmax_m (p0, z0, z1)) ++ ++/* ++** max_s8_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** smax z0\.b, p0/m, z0\.b, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (max_s8_m_tied2, svint8_t, ++ z0 = svmax_s8_m (p0, z1, z0), ++ z0 = svmax_m (p0, z1, z0)) ++ ++/* ++** max_s8_m_untied: ++** movprfx z0, z1 ++** smax z0\.b, p0/m, z0\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (max_s8_m_untied, svint8_t, ++ z0 = svmax_s8_m (p0, z1, z2), ++ z0 = svmax_m (p0, z1, z2)) ++ ++/* ++** max_w0_s8_m_tied1: ++** mov (z[0-9]+\.b), w0 ++** smax z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (max_w0_s8_m_tied1, svint8_t, int8_t, ++ z0 = svmax_n_s8_m (p0, z0, x0), ++ z0 = svmax_m (p0, z0, x0)) ++ ++/* ++** max_w0_s8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), w0 ++** movprfx z0, z1 ++** smax z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (max_w0_s8_m_untied, svint8_t, int8_t, ++ z0 = svmax_n_s8_m (p0, z1, x0), ++ z0 = svmax_m (p0, z1, x0)) ++ ++/* ++** max_1_s8_m_tied1: ++** mov (z[0-9]+\.b), #1 ++** smax z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_s8_m_tied1, svint8_t, ++ z0 = svmax_n_s8_m (p0, z0, 1), ++ z0 = svmax_m (p0, z0, 1)) ++ ++/* ++** max_1_s8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), #1 ++** movprfx z0, z1 ++** smax z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_s8_m_untied, svint8_t, ++ z0 = svmax_n_s8_m (p0, z1, 1), ++ z0 = svmax_m (p0, z1, 1)) ++ ++/* ++** max_m1_s8_m: ++** mov (z[0-9]+\.b), #-1 ++** smax z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_m1_s8_m, svint8_t, ++ z0 = svmax_n_s8_m (p0, z0, -1), ++ z0 = svmax_m (p0, z0, -1)) ++ ++/* ++** max_s8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** smax z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (max_s8_z_tied1, svint8_t, ++ z0 = svmax_s8_z (p0, z0, z1), ++ z0 = svmax_z (p0, z0, z1)) ++ ++/* ++** max_s8_z_tied2: ++** movprfx z0\.b, p0/z, z0\.b ++** smax z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (max_s8_z_tied2, svint8_t, ++ z0 = svmax_s8_z (p0, z1, z0), ++ z0 = svmax_z (p0, z1, z0)) ++ ++/* ++** max_s8_z_untied: ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** smax z0\.b, p0/m, z0\.b, z2\.b ++** | ++** movprfx z0\.b, p0/z, z2\.b ++** smax z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (max_s8_z_untied, svint8_t, ++ z0 = svmax_s8_z (p0, z1, z2), ++ z0 = svmax_z (p0, z1, z2)) ++ ++/* ++** max_w0_s8_z_tied1: ++** mov (z[0-9]+\.b), w0 ++** movprfx z0\.b, p0/z, z0\.b ++** smax z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (max_w0_s8_z_tied1, svint8_t, int8_t, ++ z0 = svmax_n_s8_z (p0, z0, x0), ++ z0 = svmax_z (p0, z0, x0)) ++ ++/* ++** max_w0_s8_z_untied: ++** mov (z[0-9]+\.b), w0 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** smax z0\.b, p0/m, z0\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** smax z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (max_w0_s8_z_untied, svint8_t, int8_t, ++ z0 = svmax_n_s8_z (p0, z1, x0), ++ z0 = svmax_z (p0, z1, x0)) ++ ++/* ++** max_1_s8_z_tied1: ++** mov (z[0-9]+\.b), #1 ++** movprfx z0\.b, p0/z, z0\.b ++** smax z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_s8_z_tied1, svint8_t, ++ z0 = svmax_n_s8_z (p0, z0, 1), ++ z0 = svmax_z (p0, z0, 1)) ++ ++/* ++** max_1_s8_z_untied: ++** mov (z[0-9]+\.b), #1 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** smax z0\.b, p0/m, z0\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** smax z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_s8_z_untied, svint8_t, ++ z0 = svmax_n_s8_z (p0, z1, 1), ++ z0 = svmax_z (p0, z1, 1)) ++ ++/* ++** max_s8_x_tied1: ++** smax z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (max_s8_x_tied1, svint8_t, ++ z0 = svmax_s8_x (p0, z0, z1), ++ z0 = svmax_x (p0, z0, z1)) ++ ++/* ++** max_s8_x_tied2: ++** smax z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (max_s8_x_tied2, svint8_t, ++ z0 = svmax_s8_x (p0, z1, z0), ++ z0 = svmax_x (p0, z1, z0)) ++ ++/* ++** max_s8_x_untied: ++** ( ++** movprfx z0, z1 ++** smax z0\.b, p0/m, z0\.b, z2\.b ++** | ++** movprfx z0, z2 ++** smax z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (max_s8_x_untied, svint8_t, ++ z0 = svmax_s8_x (p0, z1, z2), ++ z0 = svmax_x (p0, z1, z2)) ++ ++/* ++** max_w0_s8_x_tied1: ++** mov (z[0-9]+\.b), w0 ++** smax z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (max_w0_s8_x_tied1, svint8_t, int8_t, ++ z0 = svmax_n_s8_x (p0, z0, x0), ++ z0 = svmax_x (p0, z0, x0)) ++ ++/* ++** max_w0_s8_x_untied: ++** mov z0\.b, w0 ++** smax z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_ZX (max_w0_s8_x_untied, svint8_t, int8_t, ++ z0 = svmax_n_s8_x (p0, z1, x0), ++ z0 = svmax_x (p0, z1, x0)) ++ ++/* ++** max_1_s8_x_tied1: ++** smax z0\.b, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_s8_x_tied1, svint8_t, ++ z0 = svmax_n_s8_x (p0, z0, 1), ++ z0 = svmax_x (p0, z0, 1)) ++ ++/* ++** max_1_s8_x_untied: ++** movprfx z0, z1 ++** smax z0\.b, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_s8_x_untied, svint8_t, ++ z0 = svmax_n_s8_x (p0, z1, 1), ++ z0 = svmax_x (p0, z1, 1)) ++ ++/* ++** max_127_s8_x: ++** smax z0\.b, z0\.b, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (max_127_s8_x, svint8_t, ++ z0 = svmax_n_s8_x (p0, z0, 127), ++ z0 = svmax_x (p0, z0, 127)) ++ ++/* ++** max_m1_s8_x: ++** smax z0\.b, z0\.b, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_m1_s8_x, svint8_t, ++ z0 = svmax_n_s8_x (p0, z0, -1), ++ z0 = svmax_x (p0, z0, -1)) ++ ++/* ++** max_m127_s8_x: ++** smax z0\.b, z0\.b, #-127 ++** ret ++*/ ++TEST_UNIFORM_Z (max_m127_s8_x, svint8_t, ++ z0 = svmax_n_s8_x (p0, z0, -127), ++ z0 = svmax_x (p0, z0, -127)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_u16.c +new file mode 100644 +index 000000000..9a0b95431 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_u16.c +@@ -0,0 +1,293 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** max_u16_m_tied1: ++** umax z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (max_u16_m_tied1, svuint16_t, ++ z0 = svmax_u16_m (p0, z0, z1), ++ z0 = svmax_m (p0, z0, z1)) ++ ++/* ++** max_u16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** umax z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (max_u16_m_tied2, svuint16_t, ++ z0 = svmax_u16_m (p0, z1, z0), ++ z0 = svmax_m (p0, z1, z0)) ++ ++/* ++** max_u16_m_untied: ++** movprfx z0, z1 ++** umax z0\.h, p0/m, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (max_u16_m_untied, svuint16_t, ++ z0 = svmax_u16_m (p0, z1, z2), ++ z0 = svmax_m (p0, z1, z2)) ++ ++/* ++** max_w0_u16_m_tied1: ++** mov (z[0-9]+\.h), w0 ++** umax z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (max_w0_u16_m_tied1, svuint16_t, uint16_t, ++ z0 = svmax_n_u16_m (p0, z0, x0), ++ z0 = svmax_m (p0, z0, x0)) ++ ++/* ++** max_w0_u16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), w0 ++** movprfx z0, z1 ++** umax z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (max_w0_u16_m_untied, svuint16_t, uint16_t, ++ z0 = svmax_n_u16_m (p0, z1, x0), ++ z0 = svmax_m (p0, z1, x0)) ++ ++/* ++** max_1_u16_m_tied1: ++** mov (z[0-9]+\.h), #1 ++** umax z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_u16_m_tied1, svuint16_t, ++ z0 = svmax_n_u16_m (p0, z0, 1), ++ z0 = svmax_m (p0, z0, 1)) ++ ++/* ++** max_1_u16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), #1 ++** movprfx z0, z1 ++** umax z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_u16_m_untied, svuint16_t, ++ z0 = svmax_n_u16_m (p0, z1, 1), ++ z0 = svmax_m (p0, z1, 1)) ++ ++/* ++** max_m1_u16_m: ++** mov (z[0-9]+)\.b, #-1 ++** umax z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (max_m1_u16_m, svuint16_t, ++ z0 = svmax_n_u16_m (p0, z0, -1), ++ z0 = svmax_m (p0, z0, -1)) ++ ++/* ++** max_u16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** umax z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (max_u16_z_tied1, svuint16_t, ++ z0 = svmax_u16_z (p0, z0, z1), ++ z0 = svmax_z (p0, z0, z1)) ++ ++/* ++** max_u16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** umax z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (max_u16_z_tied2, svuint16_t, ++ z0 = svmax_u16_z (p0, z1, z0), ++ z0 = svmax_z (p0, z1, z0)) ++ ++/* ++** max_u16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** umax z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** umax z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (max_u16_z_untied, svuint16_t, ++ z0 = svmax_u16_z (p0, z1, z2), ++ z0 = svmax_z (p0, z1, z2)) ++ ++/* ++** max_w0_u16_z_tied1: ++** mov (z[0-9]+\.h), w0 ++** movprfx z0\.h, p0/z, z0\.h ++** umax z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (max_w0_u16_z_tied1, svuint16_t, uint16_t, ++ z0 = svmax_n_u16_z (p0, z0, x0), ++ z0 = svmax_z (p0, z0, x0)) ++ ++/* ++** max_w0_u16_z_untied: ++** mov (z[0-9]+\.h), w0 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** umax z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** umax z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (max_w0_u16_z_untied, svuint16_t, uint16_t, ++ z0 = svmax_n_u16_z (p0, z1, x0), ++ z0 = svmax_z (p0, z1, x0)) ++ ++/* ++** max_1_u16_z_tied1: ++** mov (z[0-9]+\.h), #1 ++** movprfx z0\.h, p0/z, z0\.h ++** umax z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_u16_z_tied1, svuint16_t, ++ z0 = svmax_n_u16_z (p0, z0, 1), ++ z0 = svmax_z (p0, z0, 1)) ++ ++/* ++** max_1_u16_z_untied: ++** mov (z[0-9]+\.h), #1 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** umax z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** umax z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_u16_z_untied, svuint16_t, ++ z0 = svmax_n_u16_z (p0, z1, 1), ++ z0 = svmax_z (p0, z1, 1)) ++ ++/* ++** max_u16_x_tied1: ++** umax z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (max_u16_x_tied1, svuint16_t, ++ z0 = svmax_u16_x (p0, z0, z1), ++ z0 = svmax_x (p0, z0, z1)) ++ ++/* ++** max_u16_x_tied2: ++** umax z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (max_u16_x_tied2, svuint16_t, ++ z0 = svmax_u16_x (p0, z1, z0), ++ z0 = svmax_x (p0, z1, z0)) ++ ++/* ++** max_u16_x_untied: ++** ( ++** movprfx z0, z1 ++** umax z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0, z2 ++** umax z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (max_u16_x_untied, svuint16_t, ++ z0 = svmax_u16_x (p0, z1, z2), ++ z0 = svmax_x (p0, z1, z2)) ++ ++/* ++** max_w0_u16_x_tied1: ++** mov (z[0-9]+\.h), w0 ++** umax z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (max_w0_u16_x_tied1, svuint16_t, uint16_t, ++ z0 = svmax_n_u16_x (p0, z0, x0), ++ z0 = svmax_x (p0, z0, x0)) ++ ++/* ++** max_w0_u16_x_untied: ++** mov z0\.h, w0 ++** umax z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZX (max_w0_u16_x_untied, svuint16_t, uint16_t, ++ z0 = svmax_n_u16_x (p0, z1, x0), ++ z0 = svmax_x (p0, z1, x0)) ++ ++/* ++** max_1_u16_x_tied1: ++** umax z0\.h, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_u16_x_tied1, svuint16_t, ++ z0 = svmax_n_u16_x (p0, z0, 1), ++ z0 = svmax_x (p0, z0, 1)) ++ ++/* ++** max_1_u16_x_untied: ++** movprfx z0, z1 ++** umax z0\.h, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_u16_x_untied, svuint16_t, ++ z0 = svmax_n_u16_x (p0, z1, 1), ++ z0 = svmax_x (p0, z1, 1)) ++ ++/* ++** max_127_u16_x: ++** umax z0\.h, z0\.h, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (max_127_u16_x, svuint16_t, ++ z0 = svmax_n_u16_x (p0, z0, 127), ++ z0 = svmax_x (p0, z0, 127)) ++ ++/* ++** max_128_u16_x: ++** umax z0\.h, z0\.h, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (max_128_u16_x, svuint16_t, ++ z0 = svmax_n_u16_x (p0, z0, 128), ++ z0 = svmax_x (p0, z0, 128)) ++ ++/* ++** max_255_u16_x: ++** umax z0\.h, z0\.h, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (max_255_u16_x, svuint16_t, ++ z0 = svmax_n_u16_x (p0, z0, 255), ++ z0 = svmax_x (p0, z0, 255)) ++ ++/* ++** max_256_u16_x: ++** mov (z[0-9]+\.h), #256 ++** umax z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_256_u16_x, svuint16_t, ++ z0 = svmax_n_u16_x (p0, z0, 256), ++ z0 = svmax_x (p0, z0, 256)) ++ ++/* ++** max_m2_u16_x: ++** mov (z[0-9]+\.h), #-2 ++** umax z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_m2_u16_x, svuint16_t, ++ z0 = svmax_n_u16_x (p0, z0, -2), ++ z0 = svmax_x (p0, z0, -2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_u32.c +new file mode 100644 +index 000000000..91eba25c1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_u32.c +@@ -0,0 +1,293 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** max_u32_m_tied1: ++** umax z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (max_u32_m_tied1, svuint32_t, ++ z0 = svmax_u32_m (p0, z0, z1), ++ z0 = svmax_m (p0, z0, z1)) ++ ++/* ++** max_u32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** umax z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (max_u32_m_tied2, svuint32_t, ++ z0 = svmax_u32_m (p0, z1, z0), ++ z0 = svmax_m (p0, z1, z0)) ++ ++/* ++** max_u32_m_untied: ++** movprfx z0, z1 ++** umax z0\.s, p0/m, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (max_u32_m_untied, svuint32_t, ++ z0 = svmax_u32_m (p0, z1, z2), ++ z0 = svmax_m (p0, z1, z2)) ++ ++/* ++** max_w0_u32_m_tied1: ++** mov (z[0-9]+\.s), w0 ++** umax z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (max_w0_u32_m_tied1, svuint32_t, uint32_t, ++ z0 = svmax_n_u32_m (p0, z0, x0), ++ z0 = svmax_m (p0, z0, x0)) ++ ++/* ++** max_w0_u32_m_untied: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0, z1 ++** umax z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (max_w0_u32_m_untied, svuint32_t, uint32_t, ++ z0 = svmax_n_u32_m (p0, z1, x0), ++ z0 = svmax_m (p0, z1, x0)) ++ ++/* ++** max_1_u32_m_tied1: ++** mov (z[0-9]+\.s), #1 ++** umax z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_u32_m_tied1, svuint32_t, ++ z0 = svmax_n_u32_m (p0, z0, 1), ++ z0 = svmax_m (p0, z0, 1)) ++ ++/* ++** max_1_u32_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.s), #1 ++** movprfx z0, z1 ++** umax z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_u32_m_untied, svuint32_t, ++ z0 = svmax_n_u32_m (p0, z1, 1), ++ z0 = svmax_m (p0, z1, 1)) ++ ++/* ++** max_m1_u32_m: ++** mov (z[0-9]+)\.b, #-1 ++** umax z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (max_m1_u32_m, svuint32_t, ++ z0 = svmax_n_u32_m (p0, z0, -1), ++ z0 = svmax_m (p0, z0, -1)) ++ ++/* ++** max_u32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** umax z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (max_u32_z_tied1, svuint32_t, ++ z0 = svmax_u32_z (p0, z0, z1), ++ z0 = svmax_z (p0, z0, z1)) ++ ++/* ++** max_u32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** umax z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (max_u32_z_tied2, svuint32_t, ++ z0 = svmax_u32_z (p0, z1, z0), ++ z0 = svmax_z (p0, z1, z0)) ++ ++/* ++** max_u32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** umax z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** umax z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (max_u32_z_untied, svuint32_t, ++ z0 = svmax_u32_z (p0, z1, z2), ++ z0 = svmax_z (p0, z1, z2)) ++ ++/* ++** max_w0_u32_z_tied1: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0\.s, p0/z, z0\.s ++** umax z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (max_w0_u32_z_tied1, svuint32_t, uint32_t, ++ z0 = svmax_n_u32_z (p0, z0, x0), ++ z0 = svmax_z (p0, z0, x0)) ++ ++/* ++** max_w0_u32_z_untied: ++** mov (z[0-9]+\.s), w0 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** umax z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** umax z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (max_w0_u32_z_untied, svuint32_t, uint32_t, ++ z0 = svmax_n_u32_z (p0, z1, x0), ++ z0 = svmax_z (p0, z1, x0)) ++ ++/* ++** max_1_u32_z_tied1: ++** mov (z[0-9]+\.s), #1 ++** movprfx z0\.s, p0/z, z0\.s ++** umax z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_u32_z_tied1, svuint32_t, ++ z0 = svmax_n_u32_z (p0, z0, 1), ++ z0 = svmax_z (p0, z0, 1)) ++ ++/* ++** max_1_u32_z_untied: ++** mov (z[0-9]+\.s), #1 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** umax z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** umax z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_u32_z_untied, svuint32_t, ++ z0 = svmax_n_u32_z (p0, z1, 1), ++ z0 = svmax_z (p0, z1, 1)) ++ ++/* ++** max_u32_x_tied1: ++** umax z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (max_u32_x_tied1, svuint32_t, ++ z0 = svmax_u32_x (p0, z0, z1), ++ z0 = svmax_x (p0, z0, z1)) ++ ++/* ++** max_u32_x_tied2: ++** umax z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (max_u32_x_tied2, svuint32_t, ++ z0 = svmax_u32_x (p0, z1, z0), ++ z0 = svmax_x (p0, z1, z0)) ++ ++/* ++** max_u32_x_untied: ++** ( ++** movprfx z0, z1 ++** umax z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0, z2 ++** umax z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (max_u32_x_untied, svuint32_t, ++ z0 = svmax_u32_x (p0, z1, z2), ++ z0 = svmax_x (p0, z1, z2)) ++ ++/* ++** max_w0_u32_x_tied1: ++** mov (z[0-9]+\.s), w0 ++** umax z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (max_w0_u32_x_tied1, svuint32_t, uint32_t, ++ z0 = svmax_n_u32_x (p0, z0, x0), ++ z0 = svmax_x (p0, z0, x0)) ++ ++/* ++** max_w0_u32_x_untied: ++** mov z0\.s, w0 ++** umax z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZX (max_w0_u32_x_untied, svuint32_t, uint32_t, ++ z0 = svmax_n_u32_x (p0, z1, x0), ++ z0 = svmax_x (p0, z1, x0)) ++ ++/* ++** max_1_u32_x_tied1: ++** umax z0\.s, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_u32_x_tied1, svuint32_t, ++ z0 = svmax_n_u32_x (p0, z0, 1), ++ z0 = svmax_x (p0, z0, 1)) ++ ++/* ++** max_1_u32_x_untied: ++** movprfx z0, z1 ++** umax z0\.s, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_u32_x_untied, svuint32_t, ++ z0 = svmax_n_u32_x (p0, z1, 1), ++ z0 = svmax_x (p0, z1, 1)) ++ ++/* ++** max_127_u32_x: ++** umax z0\.s, z0\.s, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (max_127_u32_x, svuint32_t, ++ z0 = svmax_n_u32_x (p0, z0, 127), ++ z0 = svmax_x (p0, z0, 127)) ++ ++/* ++** max_128_u32_x: ++** umax z0\.s, z0\.s, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (max_128_u32_x, svuint32_t, ++ z0 = svmax_n_u32_x (p0, z0, 128), ++ z0 = svmax_x (p0, z0, 128)) ++ ++/* ++** max_255_u32_x: ++** umax z0\.s, z0\.s, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (max_255_u32_x, svuint32_t, ++ z0 = svmax_n_u32_x (p0, z0, 255), ++ z0 = svmax_x (p0, z0, 255)) ++ ++/* ++** max_256_u32_x: ++** mov (z[0-9]+\.s), #256 ++** umax z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_256_u32_x, svuint32_t, ++ z0 = svmax_n_u32_x (p0, z0, 256), ++ z0 = svmax_x (p0, z0, 256)) ++ ++/* ++** max_m2_u32_x: ++** mov (z[0-9]+\.s), #-2 ++** umax z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_m2_u32_x, svuint32_t, ++ z0 = svmax_n_u32_x (p0, z0, -2), ++ z0 = svmax_x (p0, z0, -2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_u64.c +new file mode 100644 +index 000000000..5be4c9fb7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_u64.c +@@ -0,0 +1,293 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** max_u64_m_tied1: ++** umax z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (max_u64_m_tied1, svuint64_t, ++ z0 = svmax_u64_m (p0, z0, z1), ++ z0 = svmax_m (p0, z0, z1)) ++ ++/* ++** max_u64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** umax z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_u64_m_tied2, svuint64_t, ++ z0 = svmax_u64_m (p0, z1, z0), ++ z0 = svmax_m (p0, z1, z0)) ++ ++/* ++** max_u64_m_untied: ++** movprfx z0, z1 ++** umax z0\.d, p0/m, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (max_u64_m_untied, svuint64_t, ++ z0 = svmax_u64_m (p0, z1, z2), ++ z0 = svmax_m (p0, z1, z2)) ++ ++/* ++** max_x0_u64_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** umax z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (max_x0_u64_m_tied1, svuint64_t, uint64_t, ++ z0 = svmax_n_u64_m (p0, z0, x0), ++ z0 = svmax_m (p0, z0, x0)) ++ ++/* ++** max_x0_u64_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** umax z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (max_x0_u64_m_untied, svuint64_t, uint64_t, ++ z0 = svmax_n_u64_m (p0, z1, x0), ++ z0 = svmax_m (p0, z1, x0)) ++ ++/* ++** max_1_u64_m_tied1: ++** mov (z[0-9]+\.d), #1 ++** umax z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_u64_m_tied1, svuint64_t, ++ z0 = svmax_n_u64_m (p0, z0, 1), ++ z0 = svmax_m (p0, z0, 1)) ++ ++/* ++** max_1_u64_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), #1 ++** movprfx z0, z1 ++** umax z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_u64_m_untied, svuint64_t, ++ z0 = svmax_n_u64_m (p0, z1, 1), ++ z0 = svmax_m (p0, z1, 1)) ++ ++/* ++** max_m1_u64_m: ++** mov (z[0-9]+)\.b, #-1 ++** umax z0\.d, p0/m, z0\.d, \1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (max_m1_u64_m, svuint64_t, ++ z0 = svmax_n_u64_m (p0, z0, -1), ++ z0 = svmax_m (p0, z0, -1)) ++ ++/* ++** max_u64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** umax z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (max_u64_z_tied1, svuint64_t, ++ z0 = svmax_u64_z (p0, z0, z1), ++ z0 = svmax_z (p0, z0, z1)) ++ ++/* ++** max_u64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** umax z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (max_u64_z_tied2, svuint64_t, ++ z0 = svmax_u64_z (p0, z1, z0), ++ z0 = svmax_z (p0, z1, z0)) ++ ++/* ++** max_u64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** umax z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** umax z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (max_u64_z_untied, svuint64_t, ++ z0 = svmax_u64_z (p0, z1, z2), ++ z0 = svmax_z (p0, z1, z2)) ++ ++/* ++** max_x0_u64_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.d, p0/z, z0\.d ++** umax z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (max_x0_u64_z_tied1, svuint64_t, uint64_t, ++ z0 = svmax_n_u64_z (p0, z0, x0), ++ z0 = svmax_z (p0, z0, x0)) ++ ++/* ++** max_x0_u64_z_untied: ++** mov (z[0-9]+\.d), x0 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** umax z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** umax z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (max_x0_u64_z_untied, svuint64_t, uint64_t, ++ z0 = svmax_n_u64_z (p0, z1, x0), ++ z0 = svmax_z (p0, z1, x0)) ++ ++/* ++** max_1_u64_z_tied1: ++** mov (z[0-9]+\.d), #1 ++** movprfx z0\.d, p0/z, z0\.d ++** umax z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_u64_z_tied1, svuint64_t, ++ z0 = svmax_n_u64_z (p0, z0, 1), ++ z0 = svmax_z (p0, z0, 1)) ++ ++/* ++** max_1_u64_z_untied: ++** mov (z[0-9]+\.d), #1 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** umax z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** umax z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_u64_z_untied, svuint64_t, ++ z0 = svmax_n_u64_z (p0, z1, 1), ++ z0 = svmax_z (p0, z1, 1)) ++ ++/* ++** max_u64_x_tied1: ++** umax z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (max_u64_x_tied1, svuint64_t, ++ z0 = svmax_u64_x (p0, z0, z1), ++ z0 = svmax_x (p0, z0, z1)) ++ ++/* ++** max_u64_x_tied2: ++** umax z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (max_u64_x_tied2, svuint64_t, ++ z0 = svmax_u64_x (p0, z1, z0), ++ z0 = svmax_x (p0, z1, z0)) ++ ++/* ++** max_u64_x_untied: ++** ( ++** movprfx z0, z1 ++** umax z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0, z2 ++** umax z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (max_u64_x_untied, svuint64_t, ++ z0 = svmax_u64_x (p0, z1, z2), ++ z0 = svmax_x (p0, z1, z2)) ++ ++/* ++** max_x0_u64_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** umax z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (max_x0_u64_x_tied1, svuint64_t, uint64_t, ++ z0 = svmax_n_u64_x (p0, z0, x0), ++ z0 = svmax_x (p0, z0, x0)) ++ ++/* ++** max_x0_u64_x_untied: ++** mov z0\.d, x0 ++** umax z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZX (max_x0_u64_x_untied, svuint64_t, uint64_t, ++ z0 = svmax_n_u64_x (p0, z1, x0), ++ z0 = svmax_x (p0, z1, x0)) ++ ++/* ++** max_1_u64_x_tied1: ++** umax z0\.d, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_u64_x_tied1, svuint64_t, ++ z0 = svmax_n_u64_x (p0, z0, 1), ++ z0 = svmax_x (p0, z0, 1)) ++ ++/* ++** max_1_u64_x_untied: ++** movprfx z0, z1 ++** umax z0\.d, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_u64_x_untied, svuint64_t, ++ z0 = svmax_n_u64_x (p0, z1, 1), ++ z0 = svmax_x (p0, z1, 1)) ++ ++/* ++** max_127_u64_x: ++** umax z0\.d, z0\.d, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (max_127_u64_x, svuint64_t, ++ z0 = svmax_n_u64_x (p0, z0, 127), ++ z0 = svmax_x (p0, z0, 127)) ++ ++/* ++** max_128_u64_x: ++** umax z0\.d, z0\.d, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (max_128_u64_x, svuint64_t, ++ z0 = svmax_n_u64_x (p0, z0, 128), ++ z0 = svmax_x (p0, z0, 128)) ++ ++/* ++** max_255_u64_x: ++** umax z0\.d, z0\.d, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (max_255_u64_x, svuint64_t, ++ z0 = svmax_n_u64_x (p0, z0, 255), ++ z0 = svmax_x (p0, z0, 255)) ++ ++/* ++** max_256_u64_x: ++** mov (z[0-9]+\.d), #256 ++** umax z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_256_u64_x, svuint64_t, ++ z0 = svmax_n_u64_x (p0, z0, 256), ++ z0 = svmax_x (p0, z0, 256)) ++ ++/* ++** max_m2_u64_x: ++** mov (z[0-9]+\.d), #-2 ++** umax z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_m2_u64_x, svuint64_t, ++ z0 = svmax_n_u64_x (p0, z0, -2), ++ z0 = svmax_x (p0, z0, -2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_u8.c +new file mode 100644 +index 000000000..04c9ddb36 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_u8.c +@@ -0,0 +1,273 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** max_u8_m_tied1: ++** umax z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (max_u8_m_tied1, svuint8_t, ++ z0 = svmax_u8_m (p0, z0, z1), ++ z0 = svmax_m (p0, z0, z1)) ++ ++/* ++** max_u8_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** umax z0\.b, p0/m, z0\.b, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (max_u8_m_tied2, svuint8_t, ++ z0 = svmax_u8_m (p0, z1, z0), ++ z0 = svmax_m (p0, z1, z0)) ++ ++/* ++** max_u8_m_untied: ++** movprfx z0, z1 ++** umax z0\.b, p0/m, z0\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (max_u8_m_untied, svuint8_t, ++ z0 = svmax_u8_m (p0, z1, z2), ++ z0 = svmax_m (p0, z1, z2)) ++ ++/* ++** max_w0_u8_m_tied1: ++** mov (z[0-9]+\.b), w0 ++** umax z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (max_w0_u8_m_tied1, svuint8_t, uint8_t, ++ z0 = svmax_n_u8_m (p0, z0, x0), ++ z0 = svmax_m (p0, z0, x0)) ++ ++/* ++** max_w0_u8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), w0 ++** movprfx z0, z1 ++** umax z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (max_w0_u8_m_untied, svuint8_t, uint8_t, ++ z0 = svmax_n_u8_m (p0, z1, x0), ++ z0 = svmax_m (p0, z1, x0)) ++ ++/* ++** max_1_u8_m_tied1: ++** mov (z[0-9]+\.b), #1 ++** umax z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_u8_m_tied1, svuint8_t, ++ z0 = svmax_n_u8_m (p0, z0, 1), ++ z0 = svmax_m (p0, z0, 1)) ++ ++/* ++** max_1_u8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), #1 ++** movprfx z0, z1 ++** umax z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_u8_m_untied, svuint8_t, ++ z0 = svmax_n_u8_m (p0, z1, 1), ++ z0 = svmax_m (p0, z1, 1)) ++ ++/* ++** max_m1_u8_m: ++** mov (z[0-9]+\.b), #-1 ++** umax z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_m1_u8_m, svuint8_t, ++ z0 = svmax_n_u8_m (p0, z0, -1), ++ z0 = svmax_m (p0, z0, -1)) ++ ++/* ++** max_u8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** umax z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (max_u8_z_tied1, svuint8_t, ++ z0 = svmax_u8_z (p0, z0, z1), ++ z0 = svmax_z (p0, z0, z1)) ++ ++/* ++** max_u8_z_tied2: ++** movprfx z0\.b, p0/z, z0\.b ++** umax z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (max_u8_z_tied2, svuint8_t, ++ z0 = svmax_u8_z (p0, z1, z0), ++ z0 = svmax_z (p0, z1, z0)) ++ ++/* ++** max_u8_z_untied: ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** umax z0\.b, p0/m, z0\.b, z2\.b ++** | ++** movprfx z0\.b, p0/z, z2\.b ++** umax z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (max_u8_z_untied, svuint8_t, ++ z0 = svmax_u8_z (p0, z1, z2), ++ z0 = svmax_z (p0, z1, z2)) ++ ++/* ++** max_w0_u8_z_tied1: ++** mov (z[0-9]+\.b), w0 ++** movprfx z0\.b, p0/z, z0\.b ++** umax z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (max_w0_u8_z_tied1, svuint8_t, uint8_t, ++ z0 = svmax_n_u8_z (p0, z0, x0), ++ z0 = svmax_z (p0, z0, x0)) ++ ++/* ++** max_w0_u8_z_untied: ++** mov (z[0-9]+\.b), w0 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** umax z0\.b, p0/m, z0\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** umax z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (max_w0_u8_z_untied, svuint8_t, uint8_t, ++ z0 = svmax_n_u8_z (p0, z1, x0), ++ z0 = svmax_z (p0, z1, x0)) ++ ++/* ++** max_1_u8_z_tied1: ++** mov (z[0-9]+\.b), #1 ++** movprfx z0\.b, p0/z, z0\.b ++** umax z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_u8_z_tied1, svuint8_t, ++ z0 = svmax_n_u8_z (p0, z0, 1), ++ z0 = svmax_z (p0, z0, 1)) ++ ++/* ++** max_1_u8_z_untied: ++** mov (z[0-9]+\.b), #1 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** umax z0\.b, p0/m, z0\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** umax z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_u8_z_untied, svuint8_t, ++ z0 = svmax_n_u8_z (p0, z1, 1), ++ z0 = svmax_z (p0, z1, 1)) ++ ++/* ++** max_u8_x_tied1: ++** umax z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (max_u8_x_tied1, svuint8_t, ++ z0 = svmax_u8_x (p0, z0, z1), ++ z0 = svmax_x (p0, z0, z1)) ++ ++/* ++** max_u8_x_tied2: ++** umax z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (max_u8_x_tied2, svuint8_t, ++ z0 = svmax_u8_x (p0, z1, z0), ++ z0 = svmax_x (p0, z1, z0)) ++ ++/* ++** max_u8_x_untied: ++** ( ++** movprfx z0, z1 ++** umax z0\.b, p0/m, z0\.b, z2\.b ++** | ++** movprfx z0, z2 ++** umax z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (max_u8_x_untied, svuint8_t, ++ z0 = svmax_u8_x (p0, z1, z2), ++ z0 = svmax_x (p0, z1, z2)) ++ ++/* ++** max_w0_u8_x_tied1: ++** mov (z[0-9]+\.b), w0 ++** umax z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (max_w0_u8_x_tied1, svuint8_t, uint8_t, ++ z0 = svmax_n_u8_x (p0, z0, x0), ++ z0 = svmax_x (p0, z0, x0)) ++ ++/* ++** max_w0_u8_x_untied: ++** mov z0\.b, w0 ++** umax z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_ZX (max_w0_u8_x_untied, svuint8_t, uint8_t, ++ z0 = svmax_n_u8_x (p0, z1, x0), ++ z0 = svmax_x (p0, z1, x0)) ++ ++/* ++** max_1_u8_x_tied1: ++** umax z0\.b, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_u8_x_tied1, svuint8_t, ++ z0 = svmax_n_u8_x (p0, z0, 1), ++ z0 = svmax_x (p0, z0, 1)) ++ ++/* ++** max_1_u8_x_untied: ++** movprfx z0, z1 ++** umax z0\.b, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (max_1_u8_x_untied, svuint8_t, ++ z0 = svmax_n_u8_x (p0, z1, 1), ++ z0 = svmax_x (p0, z1, 1)) ++ ++/* ++** max_127_u8_x: ++** umax z0\.b, z0\.b, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (max_127_u8_x, svuint8_t, ++ z0 = svmax_n_u8_x (p0, z0, 127), ++ z0 = svmax_x (p0, z0, 127)) ++ ++/* ++** max_128_u8_x: ++** umax z0\.b, z0\.b, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (max_128_u8_x, svuint8_t, ++ z0 = svmax_n_u8_x (p0, z0, 128), ++ z0 = svmax_x (p0, z0, 128)) ++ ++/* ++** max_254_u8_x: ++** umax z0\.b, z0\.b, #254 ++** ret ++*/ ++TEST_UNIFORM_Z (max_254_u8_x, svuint8_t, ++ z0 = svmax_n_u8_x (p0, z0, 254), ++ z0 = svmax_x (p0, z0, 254)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxnm_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxnm_f16.c +new file mode 100644 +index 000000000..a9da710d0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxnm_f16.c +@@ -0,0 +1,425 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** maxnm_f16_m_tied1: ++** fmaxnm z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_f16_m_tied1, svfloat16_t, ++ z0 = svmaxnm_f16_m (p0, z0, z1), ++ z0 = svmaxnm_m (p0, z0, z1)) ++ ++/* ++** maxnm_f16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fmaxnm z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_f16_m_tied2, svfloat16_t, ++ z0 = svmaxnm_f16_m (p0, z1, z0), ++ z0 = svmaxnm_m (p0, z1, z0)) ++ ++/* ++** maxnm_f16_m_untied: ++** movprfx z0, z1 ++** fmaxnm z0\.h, p0/m, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_f16_m_untied, svfloat16_t, ++ z0 = svmaxnm_f16_m (p0, z1, z2), ++ z0 = svmaxnm_m (p0, z1, z2)) ++ ++/* ++** maxnm_h4_f16_m_tied1: ++** mov (z[0-9]+\.h), h4 ++** fmaxnm z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (maxnm_h4_f16_m_tied1, svfloat16_t, __fp16, ++ z0 = svmaxnm_n_f16_m (p0, z0, d4), ++ z0 = svmaxnm_m (p0, z0, d4)) ++ ++/* ++** maxnm_h4_f16_m_untied: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0, z1 ++** fmaxnm z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (maxnm_h4_f16_m_untied, svfloat16_t, __fp16, ++ z0 = svmaxnm_n_f16_m (p0, z1, d4), ++ z0 = svmaxnm_m (p0, z1, d4)) ++ ++/* ++** maxnm_0_f16_m_tied1: ++** fmaxnm z0\.h, p0/m, z0\.h, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_0_f16_m_tied1, svfloat16_t, ++ z0 = svmaxnm_n_f16_m (p0, z0, 0), ++ z0 = svmaxnm_m (p0, z0, 0)) ++ ++/* ++** maxnm_0_f16_m_untied: ++** movprfx z0, z1 ++** fmaxnm z0\.h, p0/m, z0\.h, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_0_f16_m_untied, svfloat16_t, ++ z0 = svmaxnm_n_f16_m (p0, z1, 0), ++ z0 = svmaxnm_m (p0, z1, 0)) ++ ++/* ++** maxnm_1_f16_m_tied1: ++** fmaxnm z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_1_f16_m_tied1, svfloat16_t, ++ z0 = svmaxnm_n_f16_m (p0, z0, 1), ++ z0 = svmaxnm_m (p0, z0, 1)) ++ ++/* ++** maxnm_1_f16_m_untied: ++** movprfx z0, z1 ++** fmaxnm z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_1_f16_m_untied, svfloat16_t, ++ z0 = svmaxnm_n_f16_m (p0, z1, 1), ++ z0 = svmaxnm_m (p0, z1, 1)) ++ ++/* ++** maxnm_2_f16_m: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** fmaxnm z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_2_f16_m, svfloat16_t, ++ z0 = svmaxnm_n_f16_m (p0, z0, 2), ++ z0 = svmaxnm_m (p0, z0, 2)) ++ ++/* ++** maxnm_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fmaxnm z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_f16_z_tied1, svfloat16_t, ++ z0 = svmaxnm_f16_z (p0, z0, z1), ++ z0 = svmaxnm_z (p0, z0, z1)) ++ ++/* ++** maxnm_f16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** fmaxnm z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_f16_z_tied2, svfloat16_t, ++ z0 = svmaxnm_f16_z (p0, z1, z0), ++ z0 = svmaxnm_z (p0, z1, z0)) ++ ++/* ++** maxnm_f16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fmaxnm z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** fmaxnm z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_f16_z_untied, svfloat16_t, ++ z0 = svmaxnm_f16_z (p0, z1, z2), ++ z0 = svmaxnm_z (p0, z1, z2)) ++ ++/* ++** maxnm_h4_f16_z_tied1: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0\.h, p0/z, z0\.h ++** fmaxnm z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (maxnm_h4_f16_z_tied1, svfloat16_t, __fp16, ++ z0 = svmaxnm_n_f16_z (p0, z0, d4), ++ z0 = svmaxnm_z (p0, z0, d4)) ++ ++/* ++** maxnm_h4_f16_z_untied: ++** mov (z[0-9]+\.h), h4 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fmaxnm z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** fmaxnm z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (maxnm_h4_f16_z_untied, svfloat16_t, __fp16, ++ z0 = svmaxnm_n_f16_z (p0, z1, d4), ++ z0 = svmaxnm_z (p0, z1, d4)) ++ ++/* ++** maxnm_0_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fmaxnm z0\.h, p0/m, z0\.h, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_0_f16_z_tied1, svfloat16_t, ++ z0 = svmaxnm_n_f16_z (p0, z0, 0), ++ z0 = svmaxnm_z (p0, z0, 0)) ++ ++/* ++** maxnm_0_f16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** fmaxnm z0\.h, p0/m, z0\.h, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_0_f16_z_untied, svfloat16_t, ++ z0 = svmaxnm_n_f16_z (p0, z1, 0), ++ z0 = svmaxnm_z (p0, z1, 0)) ++ ++/* ++** maxnm_1_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fmaxnm z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_1_f16_z_tied1, svfloat16_t, ++ z0 = svmaxnm_n_f16_z (p0, z0, 1), ++ z0 = svmaxnm_z (p0, z0, 1)) ++ ++/* ++** maxnm_1_f16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** fmaxnm z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_1_f16_z_untied, svfloat16_t, ++ z0 = svmaxnm_n_f16_z (p0, z1, 1), ++ z0 = svmaxnm_z (p0, z1, 1)) ++ ++/* ++** maxnm_2_f16_z: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** movprfx z0\.h, p0/z, z0\.h ++** fmaxnm z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_2_f16_z, svfloat16_t, ++ z0 = svmaxnm_n_f16_z (p0, z0, 2), ++ z0 = svmaxnm_z (p0, z0, 2)) ++ ++/* ++** maxnm_f16_x_tied1: ++** fmaxnm z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_f16_x_tied1, svfloat16_t, ++ z0 = svmaxnm_f16_x (p0, z0, z1), ++ z0 = svmaxnm_x (p0, z0, z1)) ++ ++/* ++** maxnm_f16_x_tied2: ++** fmaxnm z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_f16_x_tied2, svfloat16_t, ++ z0 = svmaxnm_f16_x (p0, z1, z0), ++ z0 = svmaxnm_x (p0, z1, z0)) ++ ++/* ++** maxnm_f16_x_untied: ++** ( ++** movprfx z0, z1 ++** fmaxnm z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0, z2 ++** fmaxnm z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_f16_x_untied, svfloat16_t, ++ z0 = svmaxnm_f16_x (p0, z1, z2), ++ z0 = svmaxnm_x (p0, z1, z2)) ++ ++/* ++** maxnm_h4_f16_x_tied1: ++** mov (z[0-9]+\.h), h4 ++** fmaxnm z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (maxnm_h4_f16_x_tied1, svfloat16_t, __fp16, ++ z0 = svmaxnm_n_f16_x (p0, z0, d4), ++ z0 = svmaxnm_x (p0, z0, d4)) ++ ++/* ++** maxnm_h4_f16_x_untied: ++** mov z0\.h, h4 ++** fmaxnm z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZD (maxnm_h4_f16_x_untied, svfloat16_t, __fp16, ++ z0 = svmaxnm_n_f16_x (p0, z1, d4), ++ z0 = svmaxnm_x (p0, z1, d4)) ++ ++/* ++** maxnm_0_f16_x_tied1: ++** fmaxnm z0\.h, p0/m, z0\.h, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_0_f16_x_tied1, svfloat16_t, ++ z0 = svmaxnm_n_f16_x (p0, z0, 0), ++ z0 = svmaxnm_x (p0, z0, 0)) ++ ++/* ++** maxnm_0_f16_x_untied: ++** movprfx z0, z1 ++** fmaxnm z0\.h, p0/m, z0\.h, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_0_f16_x_untied, svfloat16_t, ++ z0 = svmaxnm_n_f16_x (p0, z1, 0), ++ z0 = svmaxnm_x (p0, z1, 0)) ++ ++/* ++** maxnm_1_f16_x_tied1: ++** fmaxnm z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_1_f16_x_tied1, svfloat16_t, ++ z0 = svmaxnm_n_f16_x (p0, z0, 1), ++ z0 = svmaxnm_x (p0, z0, 1)) ++ ++/* ++** maxnm_1_f16_x_untied: ++** movprfx z0, z1 ++** fmaxnm z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_1_f16_x_untied, svfloat16_t, ++ z0 = svmaxnm_n_f16_x (p0, z1, 1), ++ z0 = svmaxnm_x (p0, z1, 1)) ++ ++/* ++** maxnm_2_f16_x_tied1: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** fmaxnm z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_2_f16_x_tied1, svfloat16_t, ++ z0 = svmaxnm_n_f16_x (p0, z0, 2), ++ z0 = svmaxnm_x (p0, z0, 2)) ++ ++/* ++** maxnm_2_f16_x_untied: ++** fmov z0\.h, #2\.0(?:e\+0)? ++** fmaxnm z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_2_f16_x_untied, svfloat16_t, ++ z0 = svmaxnm_n_f16_x (p0, z1, 2), ++ z0 = svmaxnm_x (p0, z1, 2)) ++ ++/* ++** ptrue_maxnm_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_maxnm_f16_x_tied1, svfloat16_t, ++ z0 = svmaxnm_f16_x (svptrue_b16 (), z0, z1), ++ z0 = svmaxnm_x (svptrue_b16 (), z0, z1)) ++ ++/* ++** ptrue_maxnm_f16_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_maxnm_f16_x_tied2, svfloat16_t, ++ z0 = svmaxnm_f16_x (svptrue_b16 (), z1, z0), ++ z0 = svmaxnm_x (svptrue_b16 (), z1, z0)) ++ ++/* ++** ptrue_maxnm_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_maxnm_f16_x_untied, svfloat16_t, ++ z0 = svmaxnm_f16_x (svptrue_b16 (), z1, z2), ++ z0 = svmaxnm_x (svptrue_b16 (), z1, z2)) ++ ++/* ++** ptrue_maxnm_0_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_maxnm_0_f16_x_tied1, svfloat16_t, ++ z0 = svmaxnm_n_f16_x (svptrue_b16 (), z0, 0), ++ z0 = svmaxnm_x (svptrue_b16 (), z0, 0)) ++ ++/* ++** ptrue_maxnm_0_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_maxnm_0_f16_x_untied, svfloat16_t, ++ z0 = svmaxnm_n_f16_x (svptrue_b16 (), z1, 0), ++ z0 = svmaxnm_x (svptrue_b16 (), z1, 0)) ++ ++/* ++** ptrue_maxnm_1_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_maxnm_1_f16_x_tied1, svfloat16_t, ++ z0 = svmaxnm_n_f16_x (svptrue_b16 (), z0, 1), ++ z0 = svmaxnm_x (svptrue_b16 (), z0, 1)) ++ ++/* ++** ptrue_maxnm_1_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_maxnm_1_f16_x_untied, svfloat16_t, ++ z0 = svmaxnm_n_f16_x (svptrue_b16 (), z1, 1), ++ z0 = svmaxnm_x (svptrue_b16 (), z1, 1)) ++ ++/* ++** ptrue_maxnm_2_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_maxnm_2_f16_x_tied1, svfloat16_t, ++ z0 = svmaxnm_n_f16_x (svptrue_b16 (), z0, 2), ++ z0 = svmaxnm_x (svptrue_b16 (), z0, 2)) ++ ++/* ++** ptrue_maxnm_2_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_maxnm_2_f16_x_untied, svfloat16_t, ++ z0 = svmaxnm_n_f16_x (svptrue_b16 (), z1, 2), ++ z0 = svmaxnm_x (svptrue_b16 (), z1, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxnm_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxnm_f32.c +new file mode 100644 +index 000000000..4657d57c0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxnm_f32.c +@@ -0,0 +1,425 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** maxnm_f32_m_tied1: ++** fmaxnm z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_f32_m_tied1, svfloat32_t, ++ z0 = svmaxnm_f32_m (p0, z0, z1), ++ z0 = svmaxnm_m (p0, z0, z1)) ++ ++/* ++** maxnm_f32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fmaxnm z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_f32_m_tied2, svfloat32_t, ++ z0 = svmaxnm_f32_m (p0, z1, z0), ++ z0 = svmaxnm_m (p0, z1, z0)) ++ ++/* ++** maxnm_f32_m_untied: ++** movprfx z0, z1 ++** fmaxnm z0\.s, p0/m, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_f32_m_untied, svfloat32_t, ++ z0 = svmaxnm_f32_m (p0, z1, z2), ++ z0 = svmaxnm_m (p0, z1, z2)) ++ ++/* ++** maxnm_s4_f32_m_tied1: ++** mov (z[0-9]+\.s), s4 ++** fmaxnm z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (maxnm_s4_f32_m_tied1, svfloat32_t, float, ++ z0 = svmaxnm_n_f32_m (p0, z0, d4), ++ z0 = svmaxnm_m (p0, z0, d4)) ++ ++/* ++** maxnm_s4_f32_m_untied: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0, z1 ++** fmaxnm z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (maxnm_s4_f32_m_untied, svfloat32_t, float, ++ z0 = svmaxnm_n_f32_m (p0, z1, d4), ++ z0 = svmaxnm_m (p0, z1, d4)) ++ ++/* ++** maxnm_0_f32_m_tied1: ++** fmaxnm z0\.s, p0/m, z0\.s, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_0_f32_m_tied1, svfloat32_t, ++ z0 = svmaxnm_n_f32_m (p0, z0, 0), ++ z0 = svmaxnm_m (p0, z0, 0)) ++ ++/* ++** maxnm_0_f32_m_untied: ++** movprfx z0, z1 ++** fmaxnm z0\.s, p0/m, z0\.s, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_0_f32_m_untied, svfloat32_t, ++ z0 = svmaxnm_n_f32_m (p0, z1, 0), ++ z0 = svmaxnm_m (p0, z1, 0)) ++ ++/* ++** maxnm_1_f32_m_tied1: ++** fmaxnm z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_1_f32_m_tied1, svfloat32_t, ++ z0 = svmaxnm_n_f32_m (p0, z0, 1), ++ z0 = svmaxnm_m (p0, z0, 1)) ++ ++/* ++** maxnm_1_f32_m_untied: ++** movprfx z0, z1 ++** fmaxnm z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_1_f32_m_untied, svfloat32_t, ++ z0 = svmaxnm_n_f32_m (p0, z1, 1), ++ z0 = svmaxnm_m (p0, z1, 1)) ++ ++/* ++** maxnm_2_f32_m: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** fmaxnm z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_2_f32_m, svfloat32_t, ++ z0 = svmaxnm_n_f32_m (p0, z0, 2), ++ z0 = svmaxnm_m (p0, z0, 2)) ++ ++/* ++** maxnm_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fmaxnm z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_f32_z_tied1, svfloat32_t, ++ z0 = svmaxnm_f32_z (p0, z0, z1), ++ z0 = svmaxnm_z (p0, z0, z1)) ++ ++/* ++** maxnm_f32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** fmaxnm z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_f32_z_tied2, svfloat32_t, ++ z0 = svmaxnm_f32_z (p0, z1, z0), ++ z0 = svmaxnm_z (p0, z1, z0)) ++ ++/* ++** maxnm_f32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fmaxnm z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** fmaxnm z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_f32_z_untied, svfloat32_t, ++ z0 = svmaxnm_f32_z (p0, z1, z2), ++ z0 = svmaxnm_z (p0, z1, z2)) ++ ++/* ++** maxnm_s4_f32_z_tied1: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0\.s, p0/z, z0\.s ++** fmaxnm z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (maxnm_s4_f32_z_tied1, svfloat32_t, float, ++ z0 = svmaxnm_n_f32_z (p0, z0, d4), ++ z0 = svmaxnm_z (p0, z0, d4)) ++ ++/* ++** maxnm_s4_f32_z_untied: ++** mov (z[0-9]+\.s), s4 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fmaxnm z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** fmaxnm z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (maxnm_s4_f32_z_untied, svfloat32_t, float, ++ z0 = svmaxnm_n_f32_z (p0, z1, d4), ++ z0 = svmaxnm_z (p0, z1, d4)) ++ ++/* ++** maxnm_0_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fmaxnm z0\.s, p0/m, z0\.s, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_0_f32_z_tied1, svfloat32_t, ++ z0 = svmaxnm_n_f32_z (p0, z0, 0), ++ z0 = svmaxnm_z (p0, z0, 0)) ++ ++/* ++** maxnm_0_f32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** fmaxnm z0\.s, p0/m, z0\.s, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_0_f32_z_untied, svfloat32_t, ++ z0 = svmaxnm_n_f32_z (p0, z1, 0), ++ z0 = svmaxnm_z (p0, z1, 0)) ++ ++/* ++** maxnm_1_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fmaxnm z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_1_f32_z_tied1, svfloat32_t, ++ z0 = svmaxnm_n_f32_z (p0, z0, 1), ++ z0 = svmaxnm_z (p0, z0, 1)) ++ ++/* ++** maxnm_1_f32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** fmaxnm z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_1_f32_z_untied, svfloat32_t, ++ z0 = svmaxnm_n_f32_z (p0, z1, 1), ++ z0 = svmaxnm_z (p0, z1, 1)) ++ ++/* ++** maxnm_2_f32_z: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** movprfx z0\.s, p0/z, z0\.s ++** fmaxnm z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_2_f32_z, svfloat32_t, ++ z0 = svmaxnm_n_f32_z (p0, z0, 2), ++ z0 = svmaxnm_z (p0, z0, 2)) ++ ++/* ++** maxnm_f32_x_tied1: ++** fmaxnm z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_f32_x_tied1, svfloat32_t, ++ z0 = svmaxnm_f32_x (p0, z0, z1), ++ z0 = svmaxnm_x (p0, z0, z1)) ++ ++/* ++** maxnm_f32_x_tied2: ++** fmaxnm z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_f32_x_tied2, svfloat32_t, ++ z0 = svmaxnm_f32_x (p0, z1, z0), ++ z0 = svmaxnm_x (p0, z1, z0)) ++ ++/* ++** maxnm_f32_x_untied: ++** ( ++** movprfx z0, z1 ++** fmaxnm z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0, z2 ++** fmaxnm z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_f32_x_untied, svfloat32_t, ++ z0 = svmaxnm_f32_x (p0, z1, z2), ++ z0 = svmaxnm_x (p0, z1, z2)) ++ ++/* ++** maxnm_s4_f32_x_tied1: ++** mov (z[0-9]+\.s), s4 ++** fmaxnm z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (maxnm_s4_f32_x_tied1, svfloat32_t, float, ++ z0 = svmaxnm_n_f32_x (p0, z0, d4), ++ z0 = svmaxnm_x (p0, z0, d4)) ++ ++/* ++** maxnm_s4_f32_x_untied: ++** mov z0\.s, s4 ++** fmaxnm z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZD (maxnm_s4_f32_x_untied, svfloat32_t, float, ++ z0 = svmaxnm_n_f32_x (p0, z1, d4), ++ z0 = svmaxnm_x (p0, z1, d4)) ++ ++/* ++** maxnm_0_f32_x_tied1: ++** fmaxnm z0\.s, p0/m, z0\.s, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_0_f32_x_tied1, svfloat32_t, ++ z0 = svmaxnm_n_f32_x (p0, z0, 0), ++ z0 = svmaxnm_x (p0, z0, 0)) ++ ++/* ++** maxnm_0_f32_x_untied: ++** movprfx z0, z1 ++** fmaxnm z0\.s, p0/m, z0\.s, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_0_f32_x_untied, svfloat32_t, ++ z0 = svmaxnm_n_f32_x (p0, z1, 0), ++ z0 = svmaxnm_x (p0, z1, 0)) ++ ++/* ++** maxnm_1_f32_x_tied1: ++** fmaxnm z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_1_f32_x_tied1, svfloat32_t, ++ z0 = svmaxnm_n_f32_x (p0, z0, 1), ++ z0 = svmaxnm_x (p0, z0, 1)) ++ ++/* ++** maxnm_1_f32_x_untied: ++** movprfx z0, z1 ++** fmaxnm z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_1_f32_x_untied, svfloat32_t, ++ z0 = svmaxnm_n_f32_x (p0, z1, 1), ++ z0 = svmaxnm_x (p0, z1, 1)) ++ ++/* ++** maxnm_2_f32_x_tied1: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** fmaxnm z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_2_f32_x_tied1, svfloat32_t, ++ z0 = svmaxnm_n_f32_x (p0, z0, 2), ++ z0 = svmaxnm_x (p0, z0, 2)) ++ ++/* ++** maxnm_2_f32_x_untied: ++** fmov z0\.s, #2\.0(?:e\+0)? ++** fmaxnm z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_2_f32_x_untied, svfloat32_t, ++ z0 = svmaxnm_n_f32_x (p0, z1, 2), ++ z0 = svmaxnm_x (p0, z1, 2)) ++ ++/* ++** ptrue_maxnm_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_maxnm_f32_x_tied1, svfloat32_t, ++ z0 = svmaxnm_f32_x (svptrue_b32 (), z0, z1), ++ z0 = svmaxnm_x (svptrue_b32 (), z0, z1)) ++ ++/* ++** ptrue_maxnm_f32_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_maxnm_f32_x_tied2, svfloat32_t, ++ z0 = svmaxnm_f32_x (svptrue_b32 (), z1, z0), ++ z0 = svmaxnm_x (svptrue_b32 (), z1, z0)) ++ ++/* ++** ptrue_maxnm_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_maxnm_f32_x_untied, svfloat32_t, ++ z0 = svmaxnm_f32_x (svptrue_b32 (), z1, z2), ++ z0 = svmaxnm_x (svptrue_b32 (), z1, z2)) ++ ++/* ++** ptrue_maxnm_0_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_maxnm_0_f32_x_tied1, svfloat32_t, ++ z0 = svmaxnm_n_f32_x (svptrue_b32 (), z0, 0), ++ z0 = svmaxnm_x (svptrue_b32 (), z0, 0)) ++ ++/* ++** ptrue_maxnm_0_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_maxnm_0_f32_x_untied, svfloat32_t, ++ z0 = svmaxnm_n_f32_x (svptrue_b32 (), z1, 0), ++ z0 = svmaxnm_x (svptrue_b32 (), z1, 0)) ++ ++/* ++** ptrue_maxnm_1_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_maxnm_1_f32_x_tied1, svfloat32_t, ++ z0 = svmaxnm_n_f32_x (svptrue_b32 (), z0, 1), ++ z0 = svmaxnm_x (svptrue_b32 (), z0, 1)) ++ ++/* ++** ptrue_maxnm_1_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_maxnm_1_f32_x_untied, svfloat32_t, ++ z0 = svmaxnm_n_f32_x (svptrue_b32 (), z1, 1), ++ z0 = svmaxnm_x (svptrue_b32 (), z1, 1)) ++ ++/* ++** ptrue_maxnm_2_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_maxnm_2_f32_x_tied1, svfloat32_t, ++ z0 = svmaxnm_n_f32_x (svptrue_b32 (), z0, 2), ++ z0 = svmaxnm_x (svptrue_b32 (), z0, 2)) ++ ++/* ++** ptrue_maxnm_2_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_maxnm_2_f32_x_untied, svfloat32_t, ++ z0 = svmaxnm_n_f32_x (svptrue_b32 (), z1, 2), ++ z0 = svmaxnm_x (svptrue_b32 (), z1, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxnm_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxnm_f64.c +new file mode 100644 +index 000000000..07d88e6c1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxnm_f64.c +@@ -0,0 +1,425 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** maxnm_f64_m_tied1: ++** fmaxnm z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_f64_m_tied1, svfloat64_t, ++ z0 = svmaxnm_f64_m (p0, z0, z1), ++ z0 = svmaxnm_m (p0, z0, z1)) ++ ++/* ++** maxnm_f64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fmaxnm z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_f64_m_tied2, svfloat64_t, ++ z0 = svmaxnm_f64_m (p0, z1, z0), ++ z0 = svmaxnm_m (p0, z1, z0)) ++ ++/* ++** maxnm_f64_m_untied: ++** movprfx z0, z1 ++** fmaxnm z0\.d, p0/m, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_f64_m_untied, svfloat64_t, ++ z0 = svmaxnm_f64_m (p0, z1, z2), ++ z0 = svmaxnm_m (p0, z1, z2)) ++ ++/* ++** maxnm_d4_f64_m_tied1: ++** mov (z[0-9]+\.d), d4 ++** fmaxnm z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (maxnm_d4_f64_m_tied1, svfloat64_t, double, ++ z0 = svmaxnm_n_f64_m (p0, z0, d4), ++ z0 = svmaxnm_m (p0, z0, d4)) ++ ++/* ++** maxnm_d4_f64_m_untied: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0, z1 ++** fmaxnm z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (maxnm_d4_f64_m_untied, svfloat64_t, double, ++ z0 = svmaxnm_n_f64_m (p0, z1, d4), ++ z0 = svmaxnm_m (p0, z1, d4)) ++ ++/* ++** maxnm_0_f64_m_tied1: ++** fmaxnm z0\.d, p0/m, z0\.d, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_0_f64_m_tied1, svfloat64_t, ++ z0 = svmaxnm_n_f64_m (p0, z0, 0), ++ z0 = svmaxnm_m (p0, z0, 0)) ++ ++/* ++** maxnm_0_f64_m_untied: ++** movprfx z0, z1 ++** fmaxnm z0\.d, p0/m, z0\.d, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_0_f64_m_untied, svfloat64_t, ++ z0 = svmaxnm_n_f64_m (p0, z1, 0), ++ z0 = svmaxnm_m (p0, z1, 0)) ++ ++/* ++** maxnm_1_f64_m_tied1: ++** fmaxnm z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_1_f64_m_tied1, svfloat64_t, ++ z0 = svmaxnm_n_f64_m (p0, z0, 1), ++ z0 = svmaxnm_m (p0, z0, 1)) ++ ++/* ++** maxnm_1_f64_m_untied: ++** movprfx z0, z1 ++** fmaxnm z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_1_f64_m_untied, svfloat64_t, ++ z0 = svmaxnm_n_f64_m (p0, z1, 1), ++ z0 = svmaxnm_m (p0, z1, 1)) ++ ++/* ++** maxnm_2_f64_m: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** fmaxnm z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_2_f64_m, svfloat64_t, ++ z0 = svmaxnm_n_f64_m (p0, z0, 2), ++ z0 = svmaxnm_m (p0, z0, 2)) ++ ++/* ++** maxnm_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fmaxnm z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_f64_z_tied1, svfloat64_t, ++ z0 = svmaxnm_f64_z (p0, z0, z1), ++ z0 = svmaxnm_z (p0, z0, z1)) ++ ++/* ++** maxnm_f64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** fmaxnm z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_f64_z_tied2, svfloat64_t, ++ z0 = svmaxnm_f64_z (p0, z1, z0), ++ z0 = svmaxnm_z (p0, z1, z0)) ++ ++/* ++** maxnm_f64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fmaxnm z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** fmaxnm z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_f64_z_untied, svfloat64_t, ++ z0 = svmaxnm_f64_z (p0, z1, z2), ++ z0 = svmaxnm_z (p0, z1, z2)) ++ ++/* ++** maxnm_d4_f64_z_tied1: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0\.d, p0/z, z0\.d ++** fmaxnm z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (maxnm_d4_f64_z_tied1, svfloat64_t, double, ++ z0 = svmaxnm_n_f64_z (p0, z0, d4), ++ z0 = svmaxnm_z (p0, z0, d4)) ++ ++/* ++** maxnm_d4_f64_z_untied: ++** mov (z[0-9]+\.d), d4 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fmaxnm z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** fmaxnm z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (maxnm_d4_f64_z_untied, svfloat64_t, double, ++ z0 = svmaxnm_n_f64_z (p0, z1, d4), ++ z0 = svmaxnm_z (p0, z1, d4)) ++ ++/* ++** maxnm_0_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fmaxnm z0\.d, p0/m, z0\.d, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_0_f64_z_tied1, svfloat64_t, ++ z0 = svmaxnm_n_f64_z (p0, z0, 0), ++ z0 = svmaxnm_z (p0, z0, 0)) ++ ++/* ++** maxnm_0_f64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** fmaxnm z0\.d, p0/m, z0\.d, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_0_f64_z_untied, svfloat64_t, ++ z0 = svmaxnm_n_f64_z (p0, z1, 0), ++ z0 = svmaxnm_z (p0, z1, 0)) ++ ++/* ++** maxnm_1_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fmaxnm z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_1_f64_z_tied1, svfloat64_t, ++ z0 = svmaxnm_n_f64_z (p0, z0, 1), ++ z0 = svmaxnm_z (p0, z0, 1)) ++ ++/* ++** maxnm_1_f64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** fmaxnm z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_1_f64_z_untied, svfloat64_t, ++ z0 = svmaxnm_n_f64_z (p0, z1, 1), ++ z0 = svmaxnm_z (p0, z1, 1)) ++ ++/* ++** maxnm_2_f64_z: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** movprfx z0\.d, p0/z, z0\.d ++** fmaxnm z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_2_f64_z, svfloat64_t, ++ z0 = svmaxnm_n_f64_z (p0, z0, 2), ++ z0 = svmaxnm_z (p0, z0, 2)) ++ ++/* ++** maxnm_f64_x_tied1: ++** fmaxnm z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_f64_x_tied1, svfloat64_t, ++ z0 = svmaxnm_f64_x (p0, z0, z1), ++ z0 = svmaxnm_x (p0, z0, z1)) ++ ++/* ++** maxnm_f64_x_tied2: ++** fmaxnm z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_f64_x_tied2, svfloat64_t, ++ z0 = svmaxnm_f64_x (p0, z1, z0), ++ z0 = svmaxnm_x (p0, z1, z0)) ++ ++/* ++** maxnm_f64_x_untied: ++** ( ++** movprfx z0, z1 ++** fmaxnm z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0, z2 ++** fmaxnm z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_f64_x_untied, svfloat64_t, ++ z0 = svmaxnm_f64_x (p0, z1, z2), ++ z0 = svmaxnm_x (p0, z1, z2)) ++ ++/* ++** maxnm_d4_f64_x_tied1: ++** mov (z[0-9]+\.d), d4 ++** fmaxnm z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (maxnm_d4_f64_x_tied1, svfloat64_t, double, ++ z0 = svmaxnm_n_f64_x (p0, z0, d4), ++ z0 = svmaxnm_x (p0, z0, d4)) ++ ++/* ++** maxnm_d4_f64_x_untied: ++** mov z0\.d, d4 ++** fmaxnm z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZD (maxnm_d4_f64_x_untied, svfloat64_t, double, ++ z0 = svmaxnm_n_f64_x (p0, z1, d4), ++ z0 = svmaxnm_x (p0, z1, d4)) ++ ++/* ++** maxnm_0_f64_x_tied1: ++** fmaxnm z0\.d, p0/m, z0\.d, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_0_f64_x_tied1, svfloat64_t, ++ z0 = svmaxnm_n_f64_x (p0, z0, 0), ++ z0 = svmaxnm_x (p0, z0, 0)) ++ ++/* ++** maxnm_0_f64_x_untied: ++** movprfx z0, z1 ++** fmaxnm z0\.d, p0/m, z0\.d, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_0_f64_x_untied, svfloat64_t, ++ z0 = svmaxnm_n_f64_x (p0, z1, 0), ++ z0 = svmaxnm_x (p0, z1, 0)) ++ ++/* ++** maxnm_1_f64_x_tied1: ++** fmaxnm z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_1_f64_x_tied1, svfloat64_t, ++ z0 = svmaxnm_n_f64_x (p0, z0, 1), ++ z0 = svmaxnm_x (p0, z0, 1)) ++ ++/* ++** maxnm_1_f64_x_untied: ++** movprfx z0, z1 ++** fmaxnm z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_1_f64_x_untied, svfloat64_t, ++ z0 = svmaxnm_n_f64_x (p0, z1, 1), ++ z0 = svmaxnm_x (p0, z1, 1)) ++ ++/* ++** maxnm_2_f64_x_tied1: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** fmaxnm z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_2_f64_x_tied1, svfloat64_t, ++ z0 = svmaxnm_n_f64_x (p0, z0, 2), ++ z0 = svmaxnm_x (p0, z0, 2)) ++ ++/* ++** maxnm_2_f64_x_untied: ++** fmov z0\.d, #2\.0(?:e\+0)? ++** fmaxnm z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (maxnm_2_f64_x_untied, svfloat64_t, ++ z0 = svmaxnm_n_f64_x (p0, z1, 2), ++ z0 = svmaxnm_x (p0, z1, 2)) ++ ++/* ++** ptrue_maxnm_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_maxnm_f64_x_tied1, svfloat64_t, ++ z0 = svmaxnm_f64_x (svptrue_b64 (), z0, z1), ++ z0 = svmaxnm_x (svptrue_b64 (), z0, z1)) ++ ++/* ++** ptrue_maxnm_f64_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_maxnm_f64_x_tied2, svfloat64_t, ++ z0 = svmaxnm_f64_x (svptrue_b64 (), z1, z0), ++ z0 = svmaxnm_x (svptrue_b64 (), z1, z0)) ++ ++/* ++** ptrue_maxnm_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_maxnm_f64_x_untied, svfloat64_t, ++ z0 = svmaxnm_f64_x (svptrue_b64 (), z1, z2), ++ z0 = svmaxnm_x (svptrue_b64 (), z1, z2)) ++ ++/* ++** ptrue_maxnm_0_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_maxnm_0_f64_x_tied1, svfloat64_t, ++ z0 = svmaxnm_n_f64_x (svptrue_b64 (), z0, 0), ++ z0 = svmaxnm_x (svptrue_b64 (), z0, 0)) ++ ++/* ++** ptrue_maxnm_0_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_maxnm_0_f64_x_untied, svfloat64_t, ++ z0 = svmaxnm_n_f64_x (svptrue_b64 (), z1, 0), ++ z0 = svmaxnm_x (svptrue_b64 (), z1, 0)) ++ ++/* ++** ptrue_maxnm_1_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_maxnm_1_f64_x_tied1, svfloat64_t, ++ z0 = svmaxnm_n_f64_x (svptrue_b64 (), z0, 1), ++ z0 = svmaxnm_x (svptrue_b64 (), z0, 1)) ++ ++/* ++** ptrue_maxnm_1_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_maxnm_1_f64_x_untied, svfloat64_t, ++ z0 = svmaxnm_n_f64_x (svptrue_b64 (), z1, 1), ++ z0 = svmaxnm_x (svptrue_b64 (), z1, 1)) ++ ++/* ++** ptrue_maxnm_2_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_maxnm_2_f64_x_tied1, svfloat64_t, ++ z0 = svmaxnm_n_f64_x (svptrue_b64 (), z0, 2), ++ z0 = svmaxnm_x (svptrue_b64 (), z0, 2)) ++ ++/* ++** ptrue_maxnm_2_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_maxnm_2_f64_x_untied, svfloat64_t, ++ z0 = svmaxnm_n_f64_x (svptrue_b64 (), z1, 2), ++ z0 = svmaxnm_x (svptrue_b64 (), z1, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxnmv_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxnmv_f16.c +new file mode 100644 +index 000000000..086bcf974 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxnmv_f16.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** maxnmv_d0_f16_tied: ++** fmaxnmv h0, p0, z0\.h ++** ret ++*/ ++TEST_REDUCTION_D (maxnmv_d0_f16_tied, float16_t, svfloat16_t, ++ d0 = svmaxnmv_f16 (p0, z0), ++ d0 = svmaxnmv (p0, z0)) ++ ++/* ++** maxnmv_d0_f16_untied: ++** fmaxnmv h0, p0, z1\.h ++** ret ++*/ ++TEST_REDUCTION_D (maxnmv_d0_f16_untied, float16_t, svfloat16_t, ++ d0 = svmaxnmv_f16 (p0, z1), ++ d0 = svmaxnmv (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxnmv_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxnmv_f32.c +new file mode 100644 +index 000000000..7fca8bc9e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxnmv_f32.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** maxnmv_d0_f32_tied: ++** fmaxnmv s0, p0, z0\.s ++** ret ++*/ ++TEST_REDUCTION_D (maxnmv_d0_f32_tied, float32_t, svfloat32_t, ++ d0 = svmaxnmv_f32 (p0, z0), ++ d0 = svmaxnmv (p0, z0)) ++ ++/* ++** maxnmv_d0_f32_untied: ++** fmaxnmv s0, p0, z1\.s ++** ret ++*/ ++TEST_REDUCTION_D (maxnmv_d0_f32_untied, float32_t, svfloat32_t, ++ d0 = svmaxnmv_f32 (p0, z1), ++ d0 = svmaxnmv (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxnmv_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxnmv_f64.c +new file mode 100644 +index 000000000..8b0884479 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxnmv_f64.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** maxnmv_d0_f64_tied: ++** fmaxnmv d0, p0, z0\.d ++** ret ++*/ ++TEST_REDUCTION_D (maxnmv_d0_f64_tied, float64_t, svfloat64_t, ++ d0 = svmaxnmv_f64 (p0, z0), ++ d0 = svmaxnmv (p0, z0)) ++ ++/* ++** maxnmv_d0_f64_untied: ++** fmaxnmv d0, p0, z1\.d ++** ret ++*/ ++TEST_REDUCTION_D (maxnmv_d0_f64_untied, float64_t, svfloat64_t, ++ d0 = svmaxnmv_f64 (p0, z1), ++ d0 = svmaxnmv (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_f16.c +new file mode 100644 +index 000000000..a16823987 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_f16.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** maxv_d0_f16_tied: ++** fmaxv h0, p0, z0\.h ++** ret ++*/ ++TEST_REDUCTION_D (maxv_d0_f16_tied, float16_t, svfloat16_t, ++ d0 = svmaxv_f16 (p0, z0), ++ d0 = svmaxv (p0, z0)) ++ ++/* ++** maxv_d0_f16_untied: ++** fmaxv h0, p0, z1\.h ++** ret ++*/ ++TEST_REDUCTION_D (maxv_d0_f16_untied, float16_t, svfloat16_t, ++ d0 = svmaxv_f16 (p0, z1), ++ d0 = svmaxv (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_f32.c +new file mode 100644 +index 000000000..64e5edfef +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_f32.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** maxv_d0_f32_tied: ++** fmaxv s0, p0, z0\.s ++** ret ++*/ ++TEST_REDUCTION_D (maxv_d0_f32_tied, float32_t, svfloat32_t, ++ d0 = svmaxv_f32 (p0, z0), ++ d0 = svmaxv (p0, z0)) ++ ++/* ++** maxv_d0_f32_untied: ++** fmaxv s0, p0, z1\.s ++** ret ++*/ ++TEST_REDUCTION_D (maxv_d0_f32_untied, float32_t, svfloat32_t, ++ d0 = svmaxv_f32 (p0, z1), ++ d0 = svmaxv (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_f64.c +new file mode 100644 +index 000000000..837d6dfdc +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_f64.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** maxv_d0_f64_tied: ++** fmaxv d0, p0, z0\.d ++** ret ++*/ ++TEST_REDUCTION_D (maxv_d0_f64_tied, float64_t, svfloat64_t, ++ d0 = svmaxv_f64 (p0, z0), ++ d0 = svmaxv (p0, z0)) ++ ++/* ++** maxv_d0_f64_untied: ++** fmaxv d0, p0, z1\.d ++** ret ++*/ ++TEST_REDUCTION_D (maxv_d0_f64_untied, float64_t, svfloat64_t, ++ d0 = svmaxv_f64 (p0, z1), ++ d0 = svmaxv (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_s16.c +new file mode 100644 +index 000000000..bbf36a110 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_s16.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** maxv_x0_s16: ++** smaxv h([0-9]+), p0, z0\.h ++** umov w0, v\1\.h\[0\] ++** ret ++*/ ++TEST_REDUCTION_X (maxv_x0_s16, int16_t, svint16_t, ++ x0 = svmaxv_s16 (p0, z0), ++ x0 = svmaxv (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_s32.c +new file mode 100644 +index 000000000..645169ee8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_s32.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** maxv_x0_s32: ++** smaxv (s[0-9]+), p0, z0\.s ++** fmov w0, \1 ++** ret ++*/ ++TEST_REDUCTION_X (maxv_x0_s32, int32_t, svint32_t, ++ x0 = svmaxv_s32 (p0, z0), ++ x0 = svmaxv (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_s64.c +new file mode 100644 +index 000000000..009c1e9e2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_s64.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** maxv_x0_s64: ++** smaxv (d[0-9]+), p0, z0\.d ++** fmov x0, \1 ++** ret ++*/ ++TEST_REDUCTION_X (maxv_x0_s64, int64_t, svint64_t, ++ x0 = svmaxv_s64 (p0, z0), ++ x0 = svmaxv (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_s8.c +new file mode 100644 +index 000000000..2c1f1b9b3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_s8.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** maxv_x0_s8: ++** smaxv b([0-9]+), p0, z0\.b ++** umov w0, v\1\.b\[0\] ++** ret ++*/ ++TEST_REDUCTION_X (maxv_x0_s8, int8_t, svint8_t, ++ x0 = svmaxv_s8 (p0, z0), ++ x0 = svmaxv (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_u16.c +new file mode 100644 +index 000000000..978b8251a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_u16.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** maxv_x0_u16: ++** umaxv h([0-9]+), p0, z0\.h ++** umov w0, v\1\.h\[0\] ++** ret ++*/ ++TEST_REDUCTION_X (maxv_x0_u16, uint16_t, svuint16_t, ++ x0 = svmaxv_u16 (p0, z0), ++ x0 = svmaxv (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_u32.c +new file mode 100644 +index 000000000..85853b4b0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_u32.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** maxv_x0_u32: ++** umaxv (s[0-9]+), p0, z0\.s ++** fmov w0, \1 ++** ret ++*/ ++TEST_REDUCTION_X (maxv_x0_u32, uint32_t, svuint32_t, ++ x0 = svmaxv_u32 (p0, z0), ++ x0 = svmaxv (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_u64.c +new file mode 100644 +index 000000000..95980ed34 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_u64.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** maxv_x0_u64: ++** umaxv (d[0-9]+), p0, z0\.d ++** fmov x0, \1 ++** ret ++*/ ++TEST_REDUCTION_X (maxv_x0_u64, uint64_t, svuint64_t, ++ x0 = svmaxv_u64 (p0, z0), ++ x0 = svmaxv (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_u8.c +new file mode 100644 +index 000000000..a0b23d242 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_u8.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** maxv_x0_u8: ++** umaxv b([0-9]+), p0, z0\.b ++** umov w0, v\1\.b\[0\] ++** ret ++*/ ++TEST_REDUCTION_X (maxv_x0_u8, uint8_t, svuint8_t, ++ x0 = svmaxv_u8 (p0, z0), ++ x0 = svmaxv (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_f16.c +new file mode 100644 +index 000000000..721ee7389 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_f16.c +@@ -0,0 +1,425 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** min_f16_m_tied1: ++** fmin z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (min_f16_m_tied1, svfloat16_t, ++ z0 = svmin_f16_m (p0, z0, z1), ++ z0 = svmin_m (p0, z0, z1)) ++ ++/* ++** min_f16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fmin z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (min_f16_m_tied2, svfloat16_t, ++ z0 = svmin_f16_m (p0, z1, z0), ++ z0 = svmin_m (p0, z1, z0)) ++ ++/* ++** min_f16_m_untied: ++** movprfx z0, z1 ++** fmin z0\.h, p0/m, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (min_f16_m_untied, svfloat16_t, ++ z0 = svmin_f16_m (p0, z1, z2), ++ z0 = svmin_m (p0, z1, z2)) ++ ++/* ++** min_h4_f16_m_tied1: ++** mov (z[0-9]+\.h), h4 ++** fmin z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (min_h4_f16_m_tied1, svfloat16_t, __fp16, ++ z0 = svmin_n_f16_m (p0, z0, d4), ++ z0 = svmin_m (p0, z0, d4)) ++ ++/* ++** min_h4_f16_m_untied: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0, z1 ++** fmin z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (min_h4_f16_m_untied, svfloat16_t, __fp16, ++ z0 = svmin_n_f16_m (p0, z1, d4), ++ z0 = svmin_m (p0, z1, d4)) ++ ++/* ++** min_0_f16_m_tied1: ++** fmin z0\.h, p0/m, z0\.h, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (min_0_f16_m_tied1, svfloat16_t, ++ z0 = svmin_n_f16_m (p0, z0, 0), ++ z0 = svmin_m (p0, z0, 0)) ++ ++/* ++** min_0_f16_m_untied: ++** movprfx z0, z1 ++** fmin z0\.h, p0/m, z0\.h, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (min_0_f16_m_untied, svfloat16_t, ++ z0 = svmin_n_f16_m (p0, z1, 0), ++ z0 = svmin_m (p0, z1, 0)) ++ ++/* ++** min_1_f16_m_tied1: ++** fmin z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_f16_m_tied1, svfloat16_t, ++ z0 = svmin_n_f16_m (p0, z0, 1), ++ z0 = svmin_m (p0, z0, 1)) ++ ++/* ++** min_1_f16_m_untied: ++** movprfx z0, z1 ++** fmin z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_f16_m_untied, svfloat16_t, ++ z0 = svmin_n_f16_m (p0, z1, 1), ++ z0 = svmin_m (p0, z1, 1)) ++ ++/* ++** min_2_f16_m: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** fmin z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_2_f16_m, svfloat16_t, ++ z0 = svmin_n_f16_m (p0, z0, 2), ++ z0 = svmin_m (p0, z0, 2)) ++ ++/* ++** min_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fmin z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (min_f16_z_tied1, svfloat16_t, ++ z0 = svmin_f16_z (p0, z0, z1), ++ z0 = svmin_z (p0, z0, z1)) ++ ++/* ++** min_f16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** fmin z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (min_f16_z_tied2, svfloat16_t, ++ z0 = svmin_f16_z (p0, z1, z0), ++ z0 = svmin_z (p0, z1, z0)) ++ ++/* ++** min_f16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fmin z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** fmin z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (min_f16_z_untied, svfloat16_t, ++ z0 = svmin_f16_z (p0, z1, z2), ++ z0 = svmin_z (p0, z1, z2)) ++ ++/* ++** min_h4_f16_z_tied1: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0\.h, p0/z, z0\.h ++** fmin z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (min_h4_f16_z_tied1, svfloat16_t, __fp16, ++ z0 = svmin_n_f16_z (p0, z0, d4), ++ z0 = svmin_z (p0, z0, d4)) ++ ++/* ++** min_h4_f16_z_untied: ++** mov (z[0-9]+\.h), h4 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fmin z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** fmin z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (min_h4_f16_z_untied, svfloat16_t, __fp16, ++ z0 = svmin_n_f16_z (p0, z1, d4), ++ z0 = svmin_z (p0, z1, d4)) ++ ++/* ++** min_0_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fmin z0\.h, p0/m, z0\.h, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (min_0_f16_z_tied1, svfloat16_t, ++ z0 = svmin_n_f16_z (p0, z0, 0), ++ z0 = svmin_z (p0, z0, 0)) ++ ++/* ++** min_0_f16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** fmin z0\.h, p0/m, z0\.h, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (min_0_f16_z_untied, svfloat16_t, ++ z0 = svmin_n_f16_z (p0, z1, 0), ++ z0 = svmin_z (p0, z1, 0)) ++ ++/* ++** min_1_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fmin z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_f16_z_tied1, svfloat16_t, ++ z0 = svmin_n_f16_z (p0, z0, 1), ++ z0 = svmin_z (p0, z0, 1)) ++ ++/* ++** min_1_f16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** fmin z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_f16_z_untied, svfloat16_t, ++ z0 = svmin_n_f16_z (p0, z1, 1), ++ z0 = svmin_z (p0, z1, 1)) ++ ++/* ++** min_2_f16_z: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** movprfx z0\.h, p0/z, z0\.h ++** fmin z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_2_f16_z, svfloat16_t, ++ z0 = svmin_n_f16_z (p0, z0, 2), ++ z0 = svmin_z (p0, z0, 2)) ++ ++/* ++** min_f16_x_tied1: ++** fmin z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (min_f16_x_tied1, svfloat16_t, ++ z0 = svmin_f16_x (p0, z0, z1), ++ z0 = svmin_x (p0, z0, z1)) ++ ++/* ++** min_f16_x_tied2: ++** fmin z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (min_f16_x_tied2, svfloat16_t, ++ z0 = svmin_f16_x (p0, z1, z0), ++ z0 = svmin_x (p0, z1, z0)) ++ ++/* ++** min_f16_x_untied: ++** ( ++** movprfx z0, z1 ++** fmin z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0, z2 ++** fmin z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (min_f16_x_untied, svfloat16_t, ++ z0 = svmin_f16_x (p0, z1, z2), ++ z0 = svmin_x (p0, z1, z2)) ++ ++/* ++** min_h4_f16_x_tied1: ++** mov (z[0-9]+\.h), h4 ++** fmin z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (min_h4_f16_x_tied1, svfloat16_t, __fp16, ++ z0 = svmin_n_f16_x (p0, z0, d4), ++ z0 = svmin_x (p0, z0, d4)) ++ ++/* ++** min_h4_f16_x_untied: ++** mov z0\.h, h4 ++** fmin z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZD (min_h4_f16_x_untied, svfloat16_t, __fp16, ++ z0 = svmin_n_f16_x (p0, z1, d4), ++ z0 = svmin_x (p0, z1, d4)) ++ ++/* ++** min_0_f16_x_tied1: ++** fmin z0\.h, p0/m, z0\.h, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (min_0_f16_x_tied1, svfloat16_t, ++ z0 = svmin_n_f16_x (p0, z0, 0), ++ z0 = svmin_x (p0, z0, 0)) ++ ++/* ++** min_0_f16_x_untied: ++** movprfx z0, z1 ++** fmin z0\.h, p0/m, z0\.h, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (min_0_f16_x_untied, svfloat16_t, ++ z0 = svmin_n_f16_x (p0, z1, 0), ++ z0 = svmin_x (p0, z1, 0)) ++ ++/* ++** min_1_f16_x_tied1: ++** fmin z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_f16_x_tied1, svfloat16_t, ++ z0 = svmin_n_f16_x (p0, z0, 1), ++ z0 = svmin_x (p0, z0, 1)) ++ ++/* ++** min_1_f16_x_untied: ++** movprfx z0, z1 ++** fmin z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_f16_x_untied, svfloat16_t, ++ z0 = svmin_n_f16_x (p0, z1, 1), ++ z0 = svmin_x (p0, z1, 1)) ++ ++/* ++** min_2_f16_x_tied1: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** fmin z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_2_f16_x_tied1, svfloat16_t, ++ z0 = svmin_n_f16_x (p0, z0, 2), ++ z0 = svmin_x (p0, z0, 2)) ++ ++/* ++** min_2_f16_x_untied: ++** fmov z0\.h, #2\.0(?:e\+0)? ++** fmin z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (min_2_f16_x_untied, svfloat16_t, ++ z0 = svmin_n_f16_x (p0, z1, 2), ++ z0 = svmin_x (p0, z1, 2)) ++ ++/* ++** ptrue_min_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_min_f16_x_tied1, svfloat16_t, ++ z0 = svmin_f16_x (svptrue_b16 (), z0, z1), ++ z0 = svmin_x (svptrue_b16 (), z0, z1)) ++ ++/* ++** ptrue_min_f16_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_min_f16_x_tied2, svfloat16_t, ++ z0 = svmin_f16_x (svptrue_b16 (), z1, z0), ++ z0 = svmin_x (svptrue_b16 (), z1, z0)) ++ ++/* ++** ptrue_min_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_min_f16_x_untied, svfloat16_t, ++ z0 = svmin_f16_x (svptrue_b16 (), z1, z2), ++ z0 = svmin_x (svptrue_b16 (), z1, z2)) ++ ++/* ++** ptrue_min_0_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_min_0_f16_x_tied1, svfloat16_t, ++ z0 = svmin_n_f16_x (svptrue_b16 (), z0, 0), ++ z0 = svmin_x (svptrue_b16 (), z0, 0)) ++ ++/* ++** ptrue_min_0_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_min_0_f16_x_untied, svfloat16_t, ++ z0 = svmin_n_f16_x (svptrue_b16 (), z1, 0), ++ z0 = svmin_x (svptrue_b16 (), z1, 0)) ++ ++/* ++** ptrue_min_1_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_min_1_f16_x_tied1, svfloat16_t, ++ z0 = svmin_n_f16_x (svptrue_b16 (), z0, 1), ++ z0 = svmin_x (svptrue_b16 (), z0, 1)) ++ ++/* ++** ptrue_min_1_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_min_1_f16_x_untied, svfloat16_t, ++ z0 = svmin_n_f16_x (svptrue_b16 (), z1, 1), ++ z0 = svmin_x (svptrue_b16 (), z1, 1)) ++ ++/* ++** ptrue_min_2_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_min_2_f16_x_tied1, svfloat16_t, ++ z0 = svmin_n_f16_x (svptrue_b16 (), z0, 2), ++ z0 = svmin_x (svptrue_b16 (), z0, 2)) ++ ++/* ++** ptrue_min_2_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_min_2_f16_x_untied, svfloat16_t, ++ z0 = svmin_n_f16_x (svptrue_b16 (), z1, 2), ++ z0 = svmin_x (svptrue_b16 (), z1, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_f32.c +new file mode 100644 +index 000000000..a3b1cf5c5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_f32.c +@@ -0,0 +1,425 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** min_f32_m_tied1: ++** fmin z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (min_f32_m_tied1, svfloat32_t, ++ z0 = svmin_f32_m (p0, z0, z1), ++ z0 = svmin_m (p0, z0, z1)) ++ ++/* ++** min_f32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fmin z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (min_f32_m_tied2, svfloat32_t, ++ z0 = svmin_f32_m (p0, z1, z0), ++ z0 = svmin_m (p0, z1, z0)) ++ ++/* ++** min_f32_m_untied: ++** movprfx z0, z1 ++** fmin z0\.s, p0/m, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (min_f32_m_untied, svfloat32_t, ++ z0 = svmin_f32_m (p0, z1, z2), ++ z0 = svmin_m (p0, z1, z2)) ++ ++/* ++** min_s4_f32_m_tied1: ++** mov (z[0-9]+\.s), s4 ++** fmin z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (min_s4_f32_m_tied1, svfloat32_t, float, ++ z0 = svmin_n_f32_m (p0, z0, d4), ++ z0 = svmin_m (p0, z0, d4)) ++ ++/* ++** min_s4_f32_m_untied: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0, z1 ++** fmin z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (min_s4_f32_m_untied, svfloat32_t, float, ++ z0 = svmin_n_f32_m (p0, z1, d4), ++ z0 = svmin_m (p0, z1, d4)) ++ ++/* ++** min_0_f32_m_tied1: ++** fmin z0\.s, p0/m, z0\.s, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (min_0_f32_m_tied1, svfloat32_t, ++ z0 = svmin_n_f32_m (p0, z0, 0), ++ z0 = svmin_m (p0, z0, 0)) ++ ++/* ++** min_0_f32_m_untied: ++** movprfx z0, z1 ++** fmin z0\.s, p0/m, z0\.s, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (min_0_f32_m_untied, svfloat32_t, ++ z0 = svmin_n_f32_m (p0, z1, 0), ++ z0 = svmin_m (p0, z1, 0)) ++ ++/* ++** min_1_f32_m_tied1: ++** fmin z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_f32_m_tied1, svfloat32_t, ++ z0 = svmin_n_f32_m (p0, z0, 1), ++ z0 = svmin_m (p0, z0, 1)) ++ ++/* ++** min_1_f32_m_untied: ++** movprfx z0, z1 ++** fmin z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_f32_m_untied, svfloat32_t, ++ z0 = svmin_n_f32_m (p0, z1, 1), ++ z0 = svmin_m (p0, z1, 1)) ++ ++/* ++** min_2_f32_m: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** fmin z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_2_f32_m, svfloat32_t, ++ z0 = svmin_n_f32_m (p0, z0, 2), ++ z0 = svmin_m (p0, z0, 2)) ++ ++/* ++** min_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fmin z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (min_f32_z_tied1, svfloat32_t, ++ z0 = svmin_f32_z (p0, z0, z1), ++ z0 = svmin_z (p0, z0, z1)) ++ ++/* ++** min_f32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** fmin z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (min_f32_z_tied2, svfloat32_t, ++ z0 = svmin_f32_z (p0, z1, z0), ++ z0 = svmin_z (p0, z1, z0)) ++ ++/* ++** min_f32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fmin z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** fmin z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (min_f32_z_untied, svfloat32_t, ++ z0 = svmin_f32_z (p0, z1, z2), ++ z0 = svmin_z (p0, z1, z2)) ++ ++/* ++** min_s4_f32_z_tied1: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0\.s, p0/z, z0\.s ++** fmin z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (min_s4_f32_z_tied1, svfloat32_t, float, ++ z0 = svmin_n_f32_z (p0, z0, d4), ++ z0 = svmin_z (p0, z0, d4)) ++ ++/* ++** min_s4_f32_z_untied: ++** mov (z[0-9]+\.s), s4 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fmin z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** fmin z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (min_s4_f32_z_untied, svfloat32_t, float, ++ z0 = svmin_n_f32_z (p0, z1, d4), ++ z0 = svmin_z (p0, z1, d4)) ++ ++/* ++** min_0_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fmin z0\.s, p0/m, z0\.s, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (min_0_f32_z_tied1, svfloat32_t, ++ z0 = svmin_n_f32_z (p0, z0, 0), ++ z0 = svmin_z (p0, z0, 0)) ++ ++/* ++** min_0_f32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** fmin z0\.s, p0/m, z0\.s, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (min_0_f32_z_untied, svfloat32_t, ++ z0 = svmin_n_f32_z (p0, z1, 0), ++ z0 = svmin_z (p0, z1, 0)) ++ ++/* ++** min_1_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fmin z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_f32_z_tied1, svfloat32_t, ++ z0 = svmin_n_f32_z (p0, z0, 1), ++ z0 = svmin_z (p0, z0, 1)) ++ ++/* ++** min_1_f32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** fmin z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_f32_z_untied, svfloat32_t, ++ z0 = svmin_n_f32_z (p0, z1, 1), ++ z0 = svmin_z (p0, z1, 1)) ++ ++/* ++** min_2_f32_z: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** movprfx z0\.s, p0/z, z0\.s ++** fmin z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_2_f32_z, svfloat32_t, ++ z0 = svmin_n_f32_z (p0, z0, 2), ++ z0 = svmin_z (p0, z0, 2)) ++ ++/* ++** min_f32_x_tied1: ++** fmin z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (min_f32_x_tied1, svfloat32_t, ++ z0 = svmin_f32_x (p0, z0, z1), ++ z0 = svmin_x (p0, z0, z1)) ++ ++/* ++** min_f32_x_tied2: ++** fmin z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (min_f32_x_tied2, svfloat32_t, ++ z0 = svmin_f32_x (p0, z1, z0), ++ z0 = svmin_x (p0, z1, z0)) ++ ++/* ++** min_f32_x_untied: ++** ( ++** movprfx z0, z1 ++** fmin z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0, z2 ++** fmin z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (min_f32_x_untied, svfloat32_t, ++ z0 = svmin_f32_x (p0, z1, z2), ++ z0 = svmin_x (p0, z1, z2)) ++ ++/* ++** min_s4_f32_x_tied1: ++** mov (z[0-9]+\.s), s4 ++** fmin z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (min_s4_f32_x_tied1, svfloat32_t, float, ++ z0 = svmin_n_f32_x (p0, z0, d4), ++ z0 = svmin_x (p0, z0, d4)) ++ ++/* ++** min_s4_f32_x_untied: ++** mov z0\.s, s4 ++** fmin z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZD (min_s4_f32_x_untied, svfloat32_t, float, ++ z0 = svmin_n_f32_x (p0, z1, d4), ++ z0 = svmin_x (p0, z1, d4)) ++ ++/* ++** min_0_f32_x_tied1: ++** fmin z0\.s, p0/m, z0\.s, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (min_0_f32_x_tied1, svfloat32_t, ++ z0 = svmin_n_f32_x (p0, z0, 0), ++ z0 = svmin_x (p0, z0, 0)) ++ ++/* ++** min_0_f32_x_untied: ++** movprfx z0, z1 ++** fmin z0\.s, p0/m, z0\.s, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (min_0_f32_x_untied, svfloat32_t, ++ z0 = svmin_n_f32_x (p0, z1, 0), ++ z0 = svmin_x (p0, z1, 0)) ++ ++/* ++** min_1_f32_x_tied1: ++** fmin z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_f32_x_tied1, svfloat32_t, ++ z0 = svmin_n_f32_x (p0, z0, 1), ++ z0 = svmin_x (p0, z0, 1)) ++ ++/* ++** min_1_f32_x_untied: ++** movprfx z0, z1 ++** fmin z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_f32_x_untied, svfloat32_t, ++ z0 = svmin_n_f32_x (p0, z1, 1), ++ z0 = svmin_x (p0, z1, 1)) ++ ++/* ++** min_2_f32_x_tied1: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** fmin z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_2_f32_x_tied1, svfloat32_t, ++ z0 = svmin_n_f32_x (p0, z0, 2), ++ z0 = svmin_x (p0, z0, 2)) ++ ++/* ++** min_2_f32_x_untied: ++** fmov z0\.s, #2\.0(?:e\+0)? ++** fmin z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (min_2_f32_x_untied, svfloat32_t, ++ z0 = svmin_n_f32_x (p0, z1, 2), ++ z0 = svmin_x (p0, z1, 2)) ++ ++/* ++** ptrue_min_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_min_f32_x_tied1, svfloat32_t, ++ z0 = svmin_f32_x (svptrue_b32 (), z0, z1), ++ z0 = svmin_x (svptrue_b32 (), z0, z1)) ++ ++/* ++** ptrue_min_f32_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_min_f32_x_tied2, svfloat32_t, ++ z0 = svmin_f32_x (svptrue_b32 (), z1, z0), ++ z0 = svmin_x (svptrue_b32 (), z1, z0)) ++ ++/* ++** ptrue_min_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_min_f32_x_untied, svfloat32_t, ++ z0 = svmin_f32_x (svptrue_b32 (), z1, z2), ++ z0 = svmin_x (svptrue_b32 (), z1, z2)) ++ ++/* ++** ptrue_min_0_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_min_0_f32_x_tied1, svfloat32_t, ++ z0 = svmin_n_f32_x (svptrue_b32 (), z0, 0), ++ z0 = svmin_x (svptrue_b32 (), z0, 0)) ++ ++/* ++** ptrue_min_0_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_min_0_f32_x_untied, svfloat32_t, ++ z0 = svmin_n_f32_x (svptrue_b32 (), z1, 0), ++ z0 = svmin_x (svptrue_b32 (), z1, 0)) ++ ++/* ++** ptrue_min_1_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_min_1_f32_x_tied1, svfloat32_t, ++ z0 = svmin_n_f32_x (svptrue_b32 (), z0, 1), ++ z0 = svmin_x (svptrue_b32 (), z0, 1)) ++ ++/* ++** ptrue_min_1_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_min_1_f32_x_untied, svfloat32_t, ++ z0 = svmin_n_f32_x (svptrue_b32 (), z1, 1), ++ z0 = svmin_x (svptrue_b32 (), z1, 1)) ++ ++/* ++** ptrue_min_2_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_min_2_f32_x_tied1, svfloat32_t, ++ z0 = svmin_n_f32_x (svptrue_b32 (), z0, 2), ++ z0 = svmin_x (svptrue_b32 (), z0, 2)) ++ ++/* ++** ptrue_min_2_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_min_2_f32_x_untied, svfloat32_t, ++ z0 = svmin_n_f32_x (svptrue_b32 (), z1, 2), ++ z0 = svmin_x (svptrue_b32 (), z1, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_f64.c +new file mode 100644 +index 000000000..bb31102e2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_f64.c +@@ -0,0 +1,425 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** min_f64_m_tied1: ++** fmin z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (min_f64_m_tied1, svfloat64_t, ++ z0 = svmin_f64_m (p0, z0, z1), ++ z0 = svmin_m (p0, z0, z1)) ++ ++/* ++** min_f64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fmin z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_f64_m_tied2, svfloat64_t, ++ z0 = svmin_f64_m (p0, z1, z0), ++ z0 = svmin_m (p0, z1, z0)) ++ ++/* ++** min_f64_m_untied: ++** movprfx z0, z1 ++** fmin z0\.d, p0/m, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (min_f64_m_untied, svfloat64_t, ++ z0 = svmin_f64_m (p0, z1, z2), ++ z0 = svmin_m (p0, z1, z2)) ++ ++/* ++** min_d4_f64_m_tied1: ++** mov (z[0-9]+\.d), d4 ++** fmin z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (min_d4_f64_m_tied1, svfloat64_t, double, ++ z0 = svmin_n_f64_m (p0, z0, d4), ++ z0 = svmin_m (p0, z0, d4)) ++ ++/* ++** min_d4_f64_m_untied: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0, z1 ++** fmin z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (min_d4_f64_m_untied, svfloat64_t, double, ++ z0 = svmin_n_f64_m (p0, z1, d4), ++ z0 = svmin_m (p0, z1, d4)) ++ ++/* ++** min_0_f64_m_tied1: ++** fmin z0\.d, p0/m, z0\.d, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (min_0_f64_m_tied1, svfloat64_t, ++ z0 = svmin_n_f64_m (p0, z0, 0), ++ z0 = svmin_m (p0, z0, 0)) ++ ++/* ++** min_0_f64_m_untied: ++** movprfx z0, z1 ++** fmin z0\.d, p0/m, z0\.d, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (min_0_f64_m_untied, svfloat64_t, ++ z0 = svmin_n_f64_m (p0, z1, 0), ++ z0 = svmin_m (p0, z1, 0)) ++ ++/* ++** min_1_f64_m_tied1: ++** fmin z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_f64_m_tied1, svfloat64_t, ++ z0 = svmin_n_f64_m (p0, z0, 1), ++ z0 = svmin_m (p0, z0, 1)) ++ ++/* ++** min_1_f64_m_untied: ++** movprfx z0, z1 ++** fmin z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_f64_m_untied, svfloat64_t, ++ z0 = svmin_n_f64_m (p0, z1, 1), ++ z0 = svmin_m (p0, z1, 1)) ++ ++/* ++** min_2_f64_m: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** fmin z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_2_f64_m, svfloat64_t, ++ z0 = svmin_n_f64_m (p0, z0, 2), ++ z0 = svmin_m (p0, z0, 2)) ++ ++/* ++** min_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fmin z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (min_f64_z_tied1, svfloat64_t, ++ z0 = svmin_f64_z (p0, z0, z1), ++ z0 = svmin_z (p0, z0, z1)) ++ ++/* ++** min_f64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** fmin z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (min_f64_z_tied2, svfloat64_t, ++ z0 = svmin_f64_z (p0, z1, z0), ++ z0 = svmin_z (p0, z1, z0)) ++ ++/* ++** min_f64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fmin z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** fmin z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (min_f64_z_untied, svfloat64_t, ++ z0 = svmin_f64_z (p0, z1, z2), ++ z0 = svmin_z (p0, z1, z2)) ++ ++/* ++** min_d4_f64_z_tied1: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0\.d, p0/z, z0\.d ++** fmin z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (min_d4_f64_z_tied1, svfloat64_t, double, ++ z0 = svmin_n_f64_z (p0, z0, d4), ++ z0 = svmin_z (p0, z0, d4)) ++ ++/* ++** min_d4_f64_z_untied: ++** mov (z[0-9]+\.d), d4 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fmin z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** fmin z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (min_d4_f64_z_untied, svfloat64_t, double, ++ z0 = svmin_n_f64_z (p0, z1, d4), ++ z0 = svmin_z (p0, z1, d4)) ++ ++/* ++** min_0_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fmin z0\.d, p0/m, z0\.d, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (min_0_f64_z_tied1, svfloat64_t, ++ z0 = svmin_n_f64_z (p0, z0, 0), ++ z0 = svmin_z (p0, z0, 0)) ++ ++/* ++** min_0_f64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** fmin z0\.d, p0/m, z0\.d, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (min_0_f64_z_untied, svfloat64_t, ++ z0 = svmin_n_f64_z (p0, z1, 0), ++ z0 = svmin_z (p0, z1, 0)) ++ ++/* ++** min_1_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fmin z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_f64_z_tied1, svfloat64_t, ++ z0 = svmin_n_f64_z (p0, z0, 1), ++ z0 = svmin_z (p0, z0, 1)) ++ ++/* ++** min_1_f64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** fmin z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_f64_z_untied, svfloat64_t, ++ z0 = svmin_n_f64_z (p0, z1, 1), ++ z0 = svmin_z (p0, z1, 1)) ++ ++/* ++** min_2_f64_z: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** movprfx z0\.d, p0/z, z0\.d ++** fmin z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_2_f64_z, svfloat64_t, ++ z0 = svmin_n_f64_z (p0, z0, 2), ++ z0 = svmin_z (p0, z0, 2)) ++ ++/* ++** min_f64_x_tied1: ++** fmin z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (min_f64_x_tied1, svfloat64_t, ++ z0 = svmin_f64_x (p0, z0, z1), ++ z0 = svmin_x (p0, z0, z1)) ++ ++/* ++** min_f64_x_tied2: ++** fmin z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (min_f64_x_tied2, svfloat64_t, ++ z0 = svmin_f64_x (p0, z1, z0), ++ z0 = svmin_x (p0, z1, z0)) ++ ++/* ++** min_f64_x_untied: ++** ( ++** movprfx z0, z1 ++** fmin z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0, z2 ++** fmin z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (min_f64_x_untied, svfloat64_t, ++ z0 = svmin_f64_x (p0, z1, z2), ++ z0 = svmin_x (p0, z1, z2)) ++ ++/* ++** min_d4_f64_x_tied1: ++** mov (z[0-9]+\.d), d4 ++** fmin z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (min_d4_f64_x_tied1, svfloat64_t, double, ++ z0 = svmin_n_f64_x (p0, z0, d4), ++ z0 = svmin_x (p0, z0, d4)) ++ ++/* ++** min_d4_f64_x_untied: ++** mov z0\.d, d4 ++** fmin z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZD (min_d4_f64_x_untied, svfloat64_t, double, ++ z0 = svmin_n_f64_x (p0, z1, d4), ++ z0 = svmin_x (p0, z1, d4)) ++ ++/* ++** min_0_f64_x_tied1: ++** fmin z0\.d, p0/m, z0\.d, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (min_0_f64_x_tied1, svfloat64_t, ++ z0 = svmin_n_f64_x (p0, z0, 0), ++ z0 = svmin_x (p0, z0, 0)) ++ ++/* ++** min_0_f64_x_untied: ++** movprfx z0, z1 ++** fmin z0\.d, p0/m, z0\.d, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (min_0_f64_x_untied, svfloat64_t, ++ z0 = svmin_n_f64_x (p0, z1, 0), ++ z0 = svmin_x (p0, z1, 0)) ++ ++/* ++** min_1_f64_x_tied1: ++** fmin z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_f64_x_tied1, svfloat64_t, ++ z0 = svmin_n_f64_x (p0, z0, 1), ++ z0 = svmin_x (p0, z0, 1)) ++ ++/* ++** min_1_f64_x_untied: ++** movprfx z0, z1 ++** fmin z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_f64_x_untied, svfloat64_t, ++ z0 = svmin_n_f64_x (p0, z1, 1), ++ z0 = svmin_x (p0, z1, 1)) ++ ++/* ++** min_2_f64_x_tied1: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** fmin z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_2_f64_x_tied1, svfloat64_t, ++ z0 = svmin_n_f64_x (p0, z0, 2), ++ z0 = svmin_x (p0, z0, 2)) ++ ++/* ++** min_2_f64_x_untied: ++** fmov z0\.d, #2\.0(?:e\+0)? ++** fmin z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (min_2_f64_x_untied, svfloat64_t, ++ z0 = svmin_n_f64_x (p0, z1, 2), ++ z0 = svmin_x (p0, z1, 2)) ++ ++/* ++** ptrue_min_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_min_f64_x_tied1, svfloat64_t, ++ z0 = svmin_f64_x (svptrue_b64 (), z0, z1), ++ z0 = svmin_x (svptrue_b64 (), z0, z1)) ++ ++/* ++** ptrue_min_f64_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_min_f64_x_tied2, svfloat64_t, ++ z0 = svmin_f64_x (svptrue_b64 (), z1, z0), ++ z0 = svmin_x (svptrue_b64 (), z1, z0)) ++ ++/* ++** ptrue_min_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_min_f64_x_untied, svfloat64_t, ++ z0 = svmin_f64_x (svptrue_b64 (), z1, z2), ++ z0 = svmin_x (svptrue_b64 (), z1, z2)) ++ ++/* ++** ptrue_min_0_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_min_0_f64_x_tied1, svfloat64_t, ++ z0 = svmin_n_f64_x (svptrue_b64 (), z0, 0), ++ z0 = svmin_x (svptrue_b64 (), z0, 0)) ++ ++/* ++** ptrue_min_0_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_min_0_f64_x_untied, svfloat64_t, ++ z0 = svmin_n_f64_x (svptrue_b64 (), z1, 0), ++ z0 = svmin_x (svptrue_b64 (), z1, 0)) ++ ++/* ++** ptrue_min_1_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_min_1_f64_x_tied1, svfloat64_t, ++ z0 = svmin_n_f64_x (svptrue_b64 (), z0, 1), ++ z0 = svmin_x (svptrue_b64 (), z0, 1)) ++ ++/* ++** ptrue_min_1_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_min_1_f64_x_untied, svfloat64_t, ++ z0 = svmin_n_f64_x (svptrue_b64 (), z1, 1), ++ z0 = svmin_x (svptrue_b64 (), z1, 1)) ++ ++/* ++** ptrue_min_2_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_min_2_f64_x_tied1, svfloat64_t, ++ z0 = svmin_n_f64_x (svptrue_b64 (), z0, 2), ++ z0 = svmin_x (svptrue_b64 (), z0, 2)) ++ ++/* ++** ptrue_min_2_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_min_2_f64_x_untied, svfloat64_t, ++ z0 = svmin_n_f64_x (svptrue_b64 (), z1, 2), ++ z0 = svmin_x (svptrue_b64 (), z1, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_s16.c +new file mode 100644 +index 000000000..14dfcc4c3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_s16.c +@@ -0,0 +1,293 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** min_s16_m_tied1: ++** smin z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (min_s16_m_tied1, svint16_t, ++ z0 = svmin_s16_m (p0, z0, z1), ++ z0 = svmin_m (p0, z0, z1)) ++ ++/* ++** min_s16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** smin z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (min_s16_m_tied2, svint16_t, ++ z0 = svmin_s16_m (p0, z1, z0), ++ z0 = svmin_m (p0, z1, z0)) ++ ++/* ++** min_s16_m_untied: ++** movprfx z0, z1 ++** smin z0\.h, p0/m, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (min_s16_m_untied, svint16_t, ++ z0 = svmin_s16_m (p0, z1, z2), ++ z0 = svmin_m (p0, z1, z2)) ++ ++/* ++** min_w0_s16_m_tied1: ++** mov (z[0-9]+\.h), w0 ++** smin z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (min_w0_s16_m_tied1, svint16_t, int16_t, ++ z0 = svmin_n_s16_m (p0, z0, x0), ++ z0 = svmin_m (p0, z0, x0)) ++ ++/* ++** min_w0_s16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), w0 ++** movprfx z0, z1 ++** smin z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (min_w0_s16_m_untied, svint16_t, int16_t, ++ z0 = svmin_n_s16_m (p0, z1, x0), ++ z0 = svmin_m (p0, z1, x0)) ++ ++/* ++** min_1_s16_m_tied1: ++** mov (z[0-9]+\.h), #1 ++** smin z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_s16_m_tied1, svint16_t, ++ z0 = svmin_n_s16_m (p0, z0, 1), ++ z0 = svmin_m (p0, z0, 1)) ++ ++/* ++** min_1_s16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), #1 ++** movprfx z0, z1 ++** smin z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_s16_m_untied, svint16_t, ++ z0 = svmin_n_s16_m (p0, z1, 1), ++ z0 = svmin_m (p0, z1, 1)) ++ ++/* ++** min_m1_s16_m: ++** mov (z[0-9]+)\.b, #-1 ++** smin z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (min_m1_s16_m, svint16_t, ++ z0 = svmin_n_s16_m (p0, z0, -1), ++ z0 = svmin_m (p0, z0, -1)) ++ ++/* ++** min_s16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** smin z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (min_s16_z_tied1, svint16_t, ++ z0 = svmin_s16_z (p0, z0, z1), ++ z0 = svmin_z (p0, z0, z1)) ++ ++/* ++** min_s16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** smin z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (min_s16_z_tied2, svint16_t, ++ z0 = svmin_s16_z (p0, z1, z0), ++ z0 = svmin_z (p0, z1, z0)) ++ ++/* ++** min_s16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** smin z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** smin z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (min_s16_z_untied, svint16_t, ++ z0 = svmin_s16_z (p0, z1, z2), ++ z0 = svmin_z (p0, z1, z2)) ++ ++/* ++** min_w0_s16_z_tied1: ++** mov (z[0-9]+\.h), w0 ++** movprfx z0\.h, p0/z, z0\.h ++** smin z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (min_w0_s16_z_tied1, svint16_t, int16_t, ++ z0 = svmin_n_s16_z (p0, z0, x0), ++ z0 = svmin_z (p0, z0, x0)) ++ ++/* ++** min_w0_s16_z_untied: ++** mov (z[0-9]+\.h), w0 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** smin z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** smin z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (min_w0_s16_z_untied, svint16_t, int16_t, ++ z0 = svmin_n_s16_z (p0, z1, x0), ++ z0 = svmin_z (p0, z1, x0)) ++ ++/* ++** min_1_s16_z_tied1: ++** mov (z[0-9]+\.h), #1 ++** movprfx z0\.h, p0/z, z0\.h ++** smin z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_s16_z_tied1, svint16_t, ++ z0 = svmin_n_s16_z (p0, z0, 1), ++ z0 = svmin_z (p0, z0, 1)) ++ ++/* ++** min_1_s16_z_untied: ++** mov (z[0-9]+\.h), #1 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** smin z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** smin z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_s16_z_untied, svint16_t, ++ z0 = svmin_n_s16_z (p0, z1, 1), ++ z0 = svmin_z (p0, z1, 1)) ++ ++/* ++** min_s16_x_tied1: ++** smin z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (min_s16_x_tied1, svint16_t, ++ z0 = svmin_s16_x (p0, z0, z1), ++ z0 = svmin_x (p0, z0, z1)) ++ ++/* ++** min_s16_x_tied2: ++** smin z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (min_s16_x_tied2, svint16_t, ++ z0 = svmin_s16_x (p0, z1, z0), ++ z0 = svmin_x (p0, z1, z0)) ++ ++/* ++** min_s16_x_untied: ++** ( ++** movprfx z0, z1 ++** smin z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0, z2 ++** smin z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (min_s16_x_untied, svint16_t, ++ z0 = svmin_s16_x (p0, z1, z2), ++ z0 = svmin_x (p0, z1, z2)) ++ ++/* ++** min_w0_s16_x_tied1: ++** mov (z[0-9]+\.h), w0 ++** smin z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (min_w0_s16_x_tied1, svint16_t, int16_t, ++ z0 = svmin_n_s16_x (p0, z0, x0), ++ z0 = svmin_x (p0, z0, x0)) ++ ++/* ++** min_w0_s16_x_untied: ++** mov z0\.h, w0 ++** smin z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZX (min_w0_s16_x_untied, svint16_t, int16_t, ++ z0 = svmin_n_s16_x (p0, z1, x0), ++ z0 = svmin_x (p0, z1, x0)) ++ ++/* ++** min_1_s16_x_tied1: ++** smin z0\.h, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_s16_x_tied1, svint16_t, ++ z0 = svmin_n_s16_x (p0, z0, 1), ++ z0 = svmin_x (p0, z0, 1)) ++ ++/* ++** min_1_s16_x_untied: ++** movprfx z0, z1 ++** smin z0\.h, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_s16_x_untied, svint16_t, ++ z0 = svmin_n_s16_x (p0, z1, 1), ++ z0 = svmin_x (p0, z1, 1)) ++ ++/* ++** min_127_s16_x: ++** smin z0\.h, z0\.h, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (min_127_s16_x, svint16_t, ++ z0 = svmin_n_s16_x (p0, z0, 127), ++ z0 = svmin_x (p0, z0, 127)) ++ ++/* ++** min_128_s16_x: ++** mov (z[0-9]+\.h), #128 ++** smin z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_128_s16_x, svint16_t, ++ z0 = svmin_n_s16_x (p0, z0, 128), ++ z0 = svmin_x (p0, z0, 128)) ++ ++/* ++** min_m1_s16_x: ++** smin z0\.h, z0\.h, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_m1_s16_x, svint16_t, ++ z0 = svmin_n_s16_x (p0, z0, -1), ++ z0 = svmin_x (p0, z0, -1)) ++ ++/* ++** min_m128_s16_x: ++** smin z0\.h, z0\.h, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (min_m128_s16_x, svint16_t, ++ z0 = svmin_n_s16_x (p0, z0, -128), ++ z0 = svmin_x (p0, z0, -128)) ++ ++/* ++** min_m129_s16_x: ++** mov (z[0-9]+\.h), #-129 ++** smin z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_m129_s16_x, svint16_t, ++ z0 = svmin_n_s16_x (p0, z0, -129), ++ z0 = svmin_x (p0, z0, -129)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_s32.c +new file mode 100644 +index 000000000..cee2b649d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_s32.c +@@ -0,0 +1,293 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** min_s32_m_tied1: ++** smin z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (min_s32_m_tied1, svint32_t, ++ z0 = svmin_s32_m (p0, z0, z1), ++ z0 = svmin_m (p0, z0, z1)) ++ ++/* ++** min_s32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** smin z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (min_s32_m_tied2, svint32_t, ++ z0 = svmin_s32_m (p0, z1, z0), ++ z0 = svmin_m (p0, z1, z0)) ++ ++/* ++** min_s32_m_untied: ++** movprfx z0, z1 ++** smin z0\.s, p0/m, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (min_s32_m_untied, svint32_t, ++ z0 = svmin_s32_m (p0, z1, z2), ++ z0 = svmin_m (p0, z1, z2)) ++ ++/* ++** min_w0_s32_m_tied1: ++** mov (z[0-9]+\.s), w0 ++** smin z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (min_w0_s32_m_tied1, svint32_t, int32_t, ++ z0 = svmin_n_s32_m (p0, z0, x0), ++ z0 = svmin_m (p0, z0, x0)) ++ ++/* ++** min_w0_s32_m_untied: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0, z1 ++** smin z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (min_w0_s32_m_untied, svint32_t, int32_t, ++ z0 = svmin_n_s32_m (p0, z1, x0), ++ z0 = svmin_m (p0, z1, x0)) ++ ++/* ++** min_1_s32_m_tied1: ++** mov (z[0-9]+\.s), #1 ++** smin z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_s32_m_tied1, svint32_t, ++ z0 = svmin_n_s32_m (p0, z0, 1), ++ z0 = svmin_m (p0, z0, 1)) ++ ++/* ++** min_1_s32_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.s), #1 ++** movprfx z0, z1 ++** smin z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_s32_m_untied, svint32_t, ++ z0 = svmin_n_s32_m (p0, z1, 1), ++ z0 = svmin_m (p0, z1, 1)) ++ ++/* ++** min_m1_s32_m: ++** mov (z[0-9]+)\.b, #-1 ++** smin z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (min_m1_s32_m, svint32_t, ++ z0 = svmin_n_s32_m (p0, z0, -1), ++ z0 = svmin_m (p0, z0, -1)) ++ ++/* ++** min_s32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** smin z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (min_s32_z_tied1, svint32_t, ++ z0 = svmin_s32_z (p0, z0, z1), ++ z0 = svmin_z (p0, z0, z1)) ++ ++/* ++** min_s32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** smin z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (min_s32_z_tied2, svint32_t, ++ z0 = svmin_s32_z (p0, z1, z0), ++ z0 = svmin_z (p0, z1, z0)) ++ ++/* ++** min_s32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** smin z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** smin z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (min_s32_z_untied, svint32_t, ++ z0 = svmin_s32_z (p0, z1, z2), ++ z0 = svmin_z (p0, z1, z2)) ++ ++/* ++** min_w0_s32_z_tied1: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0\.s, p0/z, z0\.s ++** smin z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (min_w0_s32_z_tied1, svint32_t, int32_t, ++ z0 = svmin_n_s32_z (p0, z0, x0), ++ z0 = svmin_z (p0, z0, x0)) ++ ++/* ++** min_w0_s32_z_untied: ++** mov (z[0-9]+\.s), w0 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** smin z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** smin z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (min_w0_s32_z_untied, svint32_t, int32_t, ++ z0 = svmin_n_s32_z (p0, z1, x0), ++ z0 = svmin_z (p0, z1, x0)) ++ ++/* ++** min_1_s32_z_tied1: ++** mov (z[0-9]+\.s), #1 ++** movprfx z0\.s, p0/z, z0\.s ++** smin z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_s32_z_tied1, svint32_t, ++ z0 = svmin_n_s32_z (p0, z0, 1), ++ z0 = svmin_z (p0, z0, 1)) ++ ++/* ++** min_1_s32_z_untied: ++** mov (z[0-9]+\.s), #1 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** smin z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** smin z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_s32_z_untied, svint32_t, ++ z0 = svmin_n_s32_z (p0, z1, 1), ++ z0 = svmin_z (p0, z1, 1)) ++ ++/* ++** min_s32_x_tied1: ++** smin z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (min_s32_x_tied1, svint32_t, ++ z0 = svmin_s32_x (p0, z0, z1), ++ z0 = svmin_x (p0, z0, z1)) ++ ++/* ++** min_s32_x_tied2: ++** smin z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (min_s32_x_tied2, svint32_t, ++ z0 = svmin_s32_x (p0, z1, z0), ++ z0 = svmin_x (p0, z1, z0)) ++ ++/* ++** min_s32_x_untied: ++** ( ++** movprfx z0, z1 ++** smin z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0, z2 ++** smin z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (min_s32_x_untied, svint32_t, ++ z0 = svmin_s32_x (p0, z1, z2), ++ z0 = svmin_x (p0, z1, z2)) ++ ++/* ++** min_w0_s32_x_tied1: ++** mov (z[0-9]+\.s), w0 ++** smin z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (min_w0_s32_x_tied1, svint32_t, int32_t, ++ z0 = svmin_n_s32_x (p0, z0, x0), ++ z0 = svmin_x (p0, z0, x0)) ++ ++/* ++** min_w0_s32_x_untied: ++** mov z0\.s, w0 ++** smin z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZX (min_w0_s32_x_untied, svint32_t, int32_t, ++ z0 = svmin_n_s32_x (p0, z1, x0), ++ z0 = svmin_x (p0, z1, x0)) ++ ++/* ++** min_1_s32_x_tied1: ++** smin z0\.s, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_s32_x_tied1, svint32_t, ++ z0 = svmin_n_s32_x (p0, z0, 1), ++ z0 = svmin_x (p0, z0, 1)) ++ ++/* ++** min_1_s32_x_untied: ++** movprfx z0, z1 ++** smin z0\.s, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_s32_x_untied, svint32_t, ++ z0 = svmin_n_s32_x (p0, z1, 1), ++ z0 = svmin_x (p0, z1, 1)) ++ ++/* ++** min_127_s32_x: ++** smin z0\.s, z0\.s, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (min_127_s32_x, svint32_t, ++ z0 = svmin_n_s32_x (p0, z0, 127), ++ z0 = svmin_x (p0, z0, 127)) ++ ++/* ++** min_128_s32_x: ++** mov (z[0-9]+\.s), #128 ++** smin z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_128_s32_x, svint32_t, ++ z0 = svmin_n_s32_x (p0, z0, 128), ++ z0 = svmin_x (p0, z0, 128)) ++ ++/* ++** min_m1_s32_x: ++** smin z0\.s, z0\.s, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_m1_s32_x, svint32_t, ++ z0 = svmin_n_s32_x (p0, z0, -1), ++ z0 = svmin_x (p0, z0, -1)) ++ ++/* ++** min_m128_s32_x: ++** smin z0\.s, z0\.s, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (min_m128_s32_x, svint32_t, ++ z0 = svmin_n_s32_x (p0, z0, -128), ++ z0 = svmin_x (p0, z0, -128)) ++ ++/* ++** min_m129_s32_x: ++** mov (z[0-9]+\.s), #-129 ++** smin z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_m129_s32_x, svint32_t, ++ z0 = svmin_n_s32_x (p0, z0, -129), ++ z0 = svmin_x (p0, z0, -129)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_s64.c +new file mode 100644 +index 000000000..0d20bd0b2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_s64.c +@@ -0,0 +1,293 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** min_s64_m_tied1: ++** smin z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (min_s64_m_tied1, svint64_t, ++ z0 = svmin_s64_m (p0, z0, z1), ++ z0 = svmin_m (p0, z0, z1)) ++ ++/* ++** min_s64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** smin z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_s64_m_tied2, svint64_t, ++ z0 = svmin_s64_m (p0, z1, z0), ++ z0 = svmin_m (p0, z1, z0)) ++ ++/* ++** min_s64_m_untied: ++** movprfx z0, z1 ++** smin z0\.d, p0/m, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (min_s64_m_untied, svint64_t, ++ z0 = svmin_s64_m (p0, z1, z2), ++ z0 = svmin_m (p0, z1, z2)) ++ ++/* ++** min_x0_s64_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** smin z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (min_x0_s64_m_tied1, svint64_t, int64_t, ++ z0 = svmin_n_s64_m (p0, z0, x0), ++ z0 = svmin_m (p0, z0, x0)) ++ ++/* ++** min_x0_s64_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** smin z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (min_x0_s64_m_untied, svint64_t, int64_t, ++ z0 = svmin_n_s64_m (p0, z1, x0), ++ z0 = svmin_m (p0, z1, x0)) ++ ++/* ++** min_1_s64_m_tied1: ++** mov (z[0-9]+\.d), #1 ++** smin z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_s64_m_tied1, svint64_t, ++ z0 = svmin_n_s64_m (p0, z0, 1), ++ z0 = svmin_m (p0, z0, 1)) ++ ++/* ++** min_1_s64_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), #1 ++** movprfx z0, z1 ++** smin z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_s64_m_untied, svint64_t, ++ z0 = svmin_n_s64_m (p0, z1, 1), ++ z0 = svmin_m (p0, z1, 1)) ++ ++/* ++** min_m1_s64_m: ++** mov (z[0-9]+)\.b, #-1 ++** smin z0\.d, p0/m, z0\.d, \1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (min_m1_s64_m, svint64_t, ++ z0 = svmin_n_s64_m (p0, z0, -1), ++ z0 = svmin_m (p0, z0, -1)) ++ ++/* ++** min_s64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** smin z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (min_s64_z_tied1, svint64_t, ++ z0 = svmin_s64_z (p0, z0, z1), ++ z0 = svmin_z (p0, z0, z1)) ++ ++/* ++** min_s64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** smin z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (min_s64_z_tied2, svint64_t, ++ z0 = svmin_s64_z (p0, z1, z0), ++ z0 = svmin_z (p0, z1, z0)) ++ ++/* ++** min_s64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** smin z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** smin z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (min_s64_z_untied, svint64_t, ++ z0 = svmin_s64_z (p0, z1, z2), ++ z0 = svmin_z (p0, z1, z2)) ++ ++/* ++** min_x0_s64_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.d, p0/z, z0\.d ++** smin z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (min_x0_s64_z_tied1, svint64_t, int64_t, ++ z0 = svmin_n_s64_z (p0, z0, x0), ++ z0 = svmin_z (p0, z0, x0)) ++ ++/* ++** min_x0_s64_z_untied: ++** mov (z[0-9]+\.d), x0 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** smin z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** smin z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (min_x0_s64_z_untied, svint64_t, int64_t, ++ z0 = svmin_n_s64_z (p0, z1, x0), ++ z0 = svmin_z (p0, z1, x0)) ++ ++/* ++** min_1_s64_z_tied1: ++** mov (z[0-9]+\.d), #1 ++** movprfx z0\.d, p0/z, z0\.d ++** smin z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_s64_z_tied1, svint64_t, ++ z0 = svmin_n_s64_z (p0, z0, 1), ++ z0 = svmin_z (p0, z0, 1)) ++ ++/* ++** min_1_s64_z_untied: ++** mov (z[0-9]+\.d), #1 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** smin z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** smin z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_s64_z_untied, svint64_t, ++ z0 = svmin_n_s64_z (p0, z1, 1), ++ z0 = svmin_z (p0, z1, 1)) ++ ++/* ++** min_s64_x_tied1: ++** smin z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (min_s64_x_tied1, svint64_t, ++ z0 = svmin_s64_x (p0, z0, z1), ++ z0 = svmin_x (p0, z0, z1)) ++ ++/* ++** min_s64_x_tied2: ++** smin z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (min_s64_x_tied2, svint64_t, ++ z0 = svmin_s64_x (p0, z1, z0), ++ z0 = svmin_x (p0, z1, z0)) ++ ++/* ++** min_s64_x_untied: ++** ( ++** movprfx z0, z1 ++** smin z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0, z2 ++** smin z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (min_s64_x_untied, svint64_t, ++ z0 = svmin_s64_x (p0, z1, z2), ++ z0 = svmin_x (p0, z1, z2)) ++ ++/* ++** min_x0_s64_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** smin z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (min_x0_s64_x_tied1, svint64_t, int64_t, ++ z0 = svmin_n_s64_x (p0, z0, x0), ++ z0 = svmin_x (p0, z0, x0)) ++ ++/* ++** min_x0_s64_x_untied: ++** mov z0\.d, x0 ++** smin z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZX (min_x0_s64_x_untied, svint64_t, int64_t, ++ z0 = svmin_n_s64_x (p0, z1, x0), ++ z0 = svmin_x (p0, z1, x0)) ++ ++/* ++** min_1_s64_x_tied1: ++** smin z0\.d, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_s64_x_tied1, svint64_t, ++ z0 = svmin_n_s64_x (p0, z0, 1), ++ z0 = svmin_x (p0, z0, 1)) ++ ++/* ++** min_1_s64_x_untied: ++** movprfx z0, z1 ++** smin z0\.d, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_s64_x_untied, svint64_t, ++ z0 = svmin_n_s64_x (p0, z1, 1), ++ z0 = svmin_x (p0, z1, 1)) ++ ++/* ++** min_127_s64_x: ++** smin z0\.d, z0\.d, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (min_127_s64_x, svint64_t, ++ z0 = svmin_n_s64_x (p0, z0, 127), ++ z0 = svmin_x (p0, z0, 127)) ++ ++/* ++** min_128_s64_x: ++** mov (z[0-9]+\.d), #128 ++** smin z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_128_s64_x, svint64_t, ++ z0 = svmin_n_s64_x (p0, z0, 128), ++ z0 = svmin_x (p0, z0, 128)) ++ ++/* ++** min_m1_s64_x: ++** smin z0\.d, z0\.d, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_m1_s64_x, svint64_t, ++ z0 = svmin_n_s64_x (p0, z0, -1), ++ z0 = svmin_x (p0, z0, -1)) ++ ++/* ++** min_m128_s64_x: ++** smin z0\.d, z0\.d, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (min_m128_s64_x, svint64_t, ++ z0 = svmin_n_s64_x (p0, z0, -128), ++ z0 = svmin_x (p0, z0, -128)) ++ ++/* ++** min_m129_s64_x: ++** mov (z[0-9]+\.d), #-129 ++** smin z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_m129_s64_x, svint64_t, ++ z0 = svmin_n_s64_x (p0, z0, -129), ++ z0 = svmin_x (p0, z0, -129)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_s8.c +new file mode 100644 +index 000000000..714b1576d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_s8.c +@@ -0,0 +1,273 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** min_s8_m_tied1: ++** smin z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (min_s8_m_tied1, svint8_t, ++ z0 = svmin_s8_m (p0, z0, z1), ++ z0 = svmin_m (p0, z0, z1)) ++ ++/* ++** min_s8_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** smin z0\.b, p0/m, z0\.b, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (min_s8_m_tied2, svint8_t, ++ z0 = svmin_s8_m (p0, z1, z0), ++ z0 = svmin_m (p0, z1, z0)) ++ ++/* ++** min_s8_m_untied: ++** movprfx z0, z1 ++** smin z0\.b, p0/m, z0\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (min_s8_m_untied, svint8_t, ++ z0 = svmin_s8_m (p0, z1, z2), ++ z0 = svmin_m (p0, z1, z2)) ++ ++/* ++** min_w0_s8_m_tied1: ++** mov (z[0-9]+\.b), w0 ++** smin z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (min_w0_s8_m_tied1, svint8_t, int8_t, ++ z0 = svmin_n_s8_m (p0, z0, x0), ++ z0 = svmin_m (p0, z0, x0)) ++ ++/* ++** min_w0_s8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), w0 ++** movprfx z0, z1 ++** smin z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (min_w0_s8_m_untied, svint8_t, int8_t, ++ z0 = svmin_n_s8_m (p0, z1, x0), ++ z0 = svmin_m (p0, z1, x0)) ++ ++/* ++** min_1_s8_m_tied1: ++** mov (z[0-9]+\.b), #1 ++** smin z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_s8_m_tied1, svint8_t, ++ z0 = svmin_n_s8_m (p0, z0, 1), ++ z0 = svmin_m (p0, z0, 1)) ++ ++/* ++** min_1_s8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), #1 ++** movprfx z0, z1 ++** smin z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_s8_m_untied, svint8_t, ++ z0 = svmin_n_s8_m (p0, z1, 1), ++ z0 = svmin_m (p0, z1, 1)) ++ ++/* ++** min_m1_s8_m: ++** mov (z[0-9]+\.b), #-1 ++** smin z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_m1_s8_m, svint8_t, ++ z0 = svmin_n_s8_m (p0, z0, -1), ++ z0 = svmin_m (p0, z0, -1)) ++ ++/* ++** min_s8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** smin z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (min_s8_z_tied1, svint8_t, ++ z0 = svmin_s8_z (p0, z0, z1), ++ z0 = svmin_z (p0, z0, z1)) ++ ++/* ++** min_s8_z_tied2: ++** movprfx z0\.b, p0/z, z0\.b ++** smin z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (min_s8_z_tied2, svint8_t, ++ z0 = svmin_s8_z (p0, z1, z0), ++ z0 = svmin_z (p0, z1, z0)) ++ ++/* ++** min_s8_z_untied: ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** smin z0\.b, p0/m, z0\.b, z2\.b ++** | ++** movprfx z0\.b, p0/z, z2\.b ++** smin z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (min_s8_z_untied, svint8_t, ++ z0 = svmin_s8_z (p0, z1, z2), ++ z0 = svmin_z (p0, z1, z2)) ++ ++/* ++** min_w0_s8_z_tied1: ++** mov (z[0-9]+\.b), w0 ++** movprfx z0\.b, p0/z, z0\.b ++** smin z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (min_w0_s8_z_tied1, svint8_t, int8_t, ++ z0 = svmin_n_s8_z (p0, z0, x0), ++ z0 = svmin_z (p0, z0, x0)) ++ ++/* ++** min_w0_s8_z_untied: ++** mov (z[0-9]+\.b), w0 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** smin z0\.b, p0/m, z0\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** smin z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (min_w0_s8_z_untied, svint8_t, int8_t, ++ z0 = svmin_n_s8_z (p0, z1, x0), ++ z0 = svmin_z (p0, z1, x0)) ++ ++/* ++** min_1_s8_z_tied1: ++** mov (z[0-9]+\.b), #1 ++** movprfx z0\.b, p0/z, z0\.b ++** smin z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_s8_z_tied1, svint8_t, ++ z0 = svmin_n_s8_z (p0, z0, 1), ++ z0 = svmin_z (p0, z0, 1)) ++ ++/* ++** min_1_s8_z_untied: ++** mov (z[0-9]+\.b), #1 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** smin z0\.b, p0/m, z0\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** smin z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_s8_z_untied, svint8_t, ++ z0 = svmin_n_s8_z (p0, z1, 1), ++ z0 = svmin_z (p0, z1, 1)) ++ ++/* ++** min_s8_x_tied1: ++** smin z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (min_s8_x_tied1, svint8_t, ++ z0 = svmin_s8_x (p0, z0, z1), ++ z0 = svmin_x (p0, z0, z1)) ++ ++/* ++** min_s8_x_tied2: ++** smin z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (min_s8_x_tied2, svint8_t, ++ z0 = svmin_s8_x (p0, z1, z0), ++ z0 = svmin_x (p0, z1, z0)) ++ ++/* ++** min_s8_x_untied: ++** ( ++** movprfx z0, z1 ++** smin z0\.b, p0/m, z0\.b, z2\.b ++** | ++** movprfx z0, z2 ++** smin z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (min_s8_x_untied, svint8_t, ++ z0 = svmin_s8_x (p0, z1, z2), ++ z0 = svmin_x (p0, z1, z2)) ++ ++/* ++** min_w0_s8_x_tied1: ++** mov (z[0-9]+\.b), w0 ++** smin z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (min_w0_s8_x_tied1, svint8_t, int8_t, ++ z0 = svmin_n_s8_x (p0, z0, x0), ++ z0 = svmin_x (p0, z0, x0)) ++ ++/* ++** min_w0_s8_x_untied: ++** mov z0\.b, w0 ++** smin z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_ZX (min_w0_s8_x_untied, svint8_t, int8_t, ++ z0 = svmin_n_s8_x (p0, z1, x0), ++ z0 = svmin_x (p0, z1, x0)) ++ ++/* ++** min_1_s8_x_tied1: ++** smin z0\.b, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_s8_x_tied1, svint8_t, ++ z0 = svmin_n_s8_x (p0, z0, 1), ++ z0 = svmin_x (p0, z0, 1)) ++ ++/* ++** min_1_s8_x_untied: ++** movprfx z0, z1 ++** smin z0\.b, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_s8_x_untied, svint8_t, ++ z0 = svmin_n_s8_x (p0, z1, 1), ++ z0 = svmin_x (p0, z1, 1)) ++ ++/* ++** min_127_s8_x: ++** smin z0\.b, z0\.b, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (min_127_s8_x, svint8_t, ++ z0 = svmin_n_s8_x (p0, z0, 127), ++ z0 = svmin_x (p0, z0, 127)) ++ ++/* ++** min_m1_s8_x: ++** smin z0\.b, z0\.b, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_m1_s8_x, svint8_t, ++ z0 = svmin_n_s8_x (p0, z0, -1), ++ z0 = svmin_x (p0, z0, -1)) ++ ++/* ++** min_m127_s8_x: ++** smin z0\.b, z0\.b, #-127 ++** ret ++*/ ++TEST_UNIFORM_Z (min_m127_s8_x, svint8_t, ++ z0 = svmin_n_s8_x (p0, z0, -127), ++ z0 = svmin_x (p0, z0, -127)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_u16.c +new file mode 100644 +index 000000000..df35cf113 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_u16.c +@@ -0,0 +1,293 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** min_u16_m_tied1: ++** umin z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (min_u16_m_tied1, svuint16_t, ++ z0 = svmin_u16_m (p0, z0, z1), ++ z0 = svmin_m (p0, z0, z1)) ++ ++/* ++** min_u16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** umin z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (min_u16_m_tied2, svuint16_t, ++ z0 = svmin_u16_m (p0, z1, z0), ++ z0 = svmin_m (p0, z1, z0)) ++ ++/* ++** min_u16_m_untied: ++** movprfx z0, z1 ++** umin z0\.h, p0/m, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (min_u16_m_untied, svuint16_t, ++ z0 = svmin_u16_m (p0, z1, z2), ++ z0 = svmin_m (p0, z1, z2)) ++ ++/* ++** min_w0_u16_m_tied1: ++** mov (z[0-9]+\.h), w0 ++** umin z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (min_w0_u16_m_tied1, svuint16_t, uint16_t, ++ z0 = svmin_n_u16_m (p0, z0, x0), ++ z0 = svmin_m (p0, z0, x0)) ++ ++/* ++** min_w0_u16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), w0 ++** movprfx z0, z1 ++** umin z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (min_w0_u16_m_untied, svuint16_t, uint16_t, ++ z0 = svmin_n_u16_m (p0, z1, x0), ++ z0 = svmin_m (p0, z1, x0)) ++ ++/* ++** min_1_u16_m_tied1: ++** mov (z[0-9]+\.h), #1 ++** umin z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_u16_m_tied1, svuint16_t, ++ z0 = svmin_n_u16_m (p0, z0, 1), ++ z0 = svmin_m (p0, z0, 1)) ++ ++/* ++** min_1_u16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), #1 ++** movprfx z0, z1 ++** umin z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_u16_m_untied, svuint16_t, ++ z0 = svmin_n_u16_m (p0, z1, 1), ++ z0 = svmin_m (p0, z1, 1)) ++ ++/* ++** min_m1_u16_m: ++** mov (z[0-9]+)\.b, #-1 ++** umin z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (min_m1_u16_m, svuint16_t, ++ z0 = svmin_n_u16_m (p0, z0, -1), ++ z0 = svmin_m (p0, z0, -1)) ++ ++/* ++** min_u16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** umin z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (min_u16_z_tied1, svuint16_t, ++ z0 = svmin_u16_z (p0, z0, z1), ++ z0 = svmin_z (p0, z0, z1)) ++ ++/* ++** min_u16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** umin z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (min_u16_z_tied2, svuint16_t, ++ z0 = svmin_u16_z (p0, z1, z0), ++ z0 = svmin_z (p0, z1, z0)) ++ ++/* ++** min_u16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** umin z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** umin z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (min_u16_z_untied, svuint16_t, ++ z0 = svmin_u16_z (p0, z1, z2), ++ z0 = svmin_z (p0, z1, z2)) ++ ++/* ++** min_w0_u16_z_tied1: ++** mov (z[0-9]+\.h), w0 ++** movprfx z0\.h, p0/z, z0\.h ++** umin z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (min_w0_u16_z_tied1, svuint16_t, uint16_t, ++ z0 = svmin_n_u16_z (p0, z0, x0), ++ z0 = svmin_z (p0, z0, x0)) ++ ++/* ++** min_w0_u16_z_untied: ++** mov (z[0-9]+\.h), w0 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** umin z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** umin z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (min_w0_u16_z_untied, svuint16_t, uint16_t, ++ z0 = svmin_n_u16_z (p0, z1, x0), ++ z0 = svmin_z (p0, z1, x0)) ++ ++/* ++** min_1_u16_z_tied1: ++** mov (z[0-9]+\.h), #1 ++** movprfx z0\.h, p0/z, z0\.h ++** umin z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_u16_z_tied1, svuint16_t, ++ z0 = svmin_n_u16_z (p0, z0, 1), ++ z0 = svmin_z (p0, z0, 1)) ++ ++/* ++** min_1_u16_z_untied: ++** mov (z[0-9]+\.h), #1 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** umin z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** umin z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_u16_z_untied, svuint16_t, ++ z0 = svmin_n_u16_z (p0, z1, 1), ++ z0 = svmin_z (p0, z1, 1)) ++ ++/* ++** min_u16_x_tied1: ++** umin z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (min_u16_x_tied1, svuint16_t, ++ z0 = svmin_u16_x (p0, z0, z1), ++ z0 = svmin_x (p0, z0, z1)) ++ ++/* ++** min_u16_x_tied2: ++** umin z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (min_u16_x_tied2, svuint16_t, ++ z0 = svmin_u16_x (p0, z1, z0), ++ z0 = svmin_x (p0, z1, z0)) ++ ++/* ++** min_u16_x_untied: ++** ( ++** movprfx z0, z1 ++** umin z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0, z2 ++** umin z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (min_u16_x_untied, svuint16_t, ++ z0 = svmin_u16_x (p0, z1, z2), ++ z0 = svmin_x (p0, z1, z2)) ++ ++/* ++** min_w0_u16_x_tied1: ++** mov (z[0-9]+\.h), w0 ++** umin z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (min_w0_u16_x_tied1, svuint16_t, uint16_t, ++ z0 = svmin_n_u16_x (p0, z0, x0), ++ z0 = svmin_x (p0, z0, x0)) ++ ++/* ++** min_w0_u16_x_untied: ++** mov z0\.h, w0 ++** umin z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZX (min_w0_u16_x_untied, svuint16_t, uint16_t, ++ z0 = svmin_n_u16_x (p0, z1, x0), ++ z0 = svmin_x (p0, z1, x0)) ++ ++/* ++** min_1_u16_x_tied1: ++** umin z0\.h, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_u16_x_tied1, svuint16_t, ++ z0 = svmin_n_u16_x (p0, z0, 1), ++ z0 = svmin_x (p0, z0, 1)) ++ ++/* ++** min_1_u16_x_untied: ++** movprfx z0, z1 ++** umin z0\.h, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_u16_x_untied, svuint16_t, ++ z0 = svmin_n_u16_x (p0, z1, 1), ++ z0 = svmin_x (p0, z1, 1)) ++ ++/* ++** min_127_u16_x: ++** umin z0\.h, z0\.h, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (min_127_u16_x, svuint16_t, ++ z0 = svmin_n_u16_x (p0, z0, 127), ++ z0 = svmin_x (p0, z0, 127)) ++ ++/* ++** min_128_u16_x: ++** umin z0\.h, z0\.h, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (min_128_u16_x, svuint16_t, ++ z0 = svmin_n_u16_x (p0, z0, 128), ++ z0 = svmin_x (p0, z0, 128)) ++ ++/* ++** min_255_u16_x: ++** umin z0\.h, z0\.h, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (min_255_u16_x, svuint16_t, ++ z0 = svmin_n_u16_x (p0, z0, 255), ++ z0 = svmin_x (p0, z0, 255)) ++ ++/* ++** min_256_u16_x: ++** mov (z[0-9]+\.h), #256 ++** umin z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_256_u16_x, svuint16_t, ++ z0 = svmin_n_u16_x (p0, z0, 256), ++ z0 = svmin_x (p0, z0, 256)) ++ ++/* ++** min_m2_u16_x: ++** mov (z[0-9]+\.h), #-2 ++** umin z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_m2_u16_x, svuint16_t, ++ z0 = svmin_n_u16_x (p0, z0, -2), ++ z0 = svmin_x (p0, z0, -2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_u32.c +new file mode 100644 +index 000000000..7f84d099d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_u32.c +@@ -0,0 +1,293 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** min_u32_m_tied1: ++** umin z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (min_u32_m_tied1, svuint32_t, ++ z0 = svmin_u32_m (p0, z0, z1), ++ z0 = svmin_m (p0, z0, z1)) ++ ++/* ++** min_u32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** umin z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (min_u32_m_tied2, svuint32_t, ++ z0 = svmin_u32_m (p0, z1, z0), ++ z0 = svmin_m (p0, z1, z0)) ++ ++/* ++** min_u32_m_untied: ++** movprfx z0, z1 ++** umin z0\.s, p0/m, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (min_u32_m_untied, svuint32_t, ++ z0 = svmin_u32_m (p0, z1, z2), ++ z0 = svmin_m (p0, z1, z2)) ++ ++/* ++** min_w0_u32_m_tied1: ++** mov (z[0-9]+\.s), w0 ++** umin z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (min_w0_u32_m_tied1, svuint32_t, uint32_t, ++ z0 = svmin_n_u32_m (p0, z0, x0), ++ z0 = svmin_m (p0, z0, x0)) ++ ++/* ++** min_w0_u32_m_untied: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0, z1 ++** umin z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (min_w0_u32_m_untied, svuint32_t, uint32_t, ++ z0 = svmin_n_u32_m (p0, z1, x0), ++ z0 = svmin_m (p0, z1, x0)) ++ ++/* ++** min_1_u32_m_tied1: ++** mov (z[0-9]+\.s), #1 ++** umin z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_u32_m_tied1, svuint32_t, ++ z0 = svmin_n_u32_m (p0, z0, 1), ++ z0 = svmin_m (p0, z0, 1)) ++ ++/* ++** min_1_u32_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.s), #1 ++** movprfx z0, z1 ++** umin z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_u32_m_untied, svuint32_t, ++ z0 = svmin_n_u32_m (p0, z1, 1), ++ z0 = svmin_m (p0, z1, 1)) ++ ++/* ++** min_m1_u32_m: ++** mov (z[0-9]+)\.b, #-1 ++** umin z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (min_m1_u32_m, svuint32_t, ++ z0 = svmin_n_u32_m (p0, z0, -1), ++ z0 = svmin_m (p0, z0, -1)) ++ ++/* ++** min_u32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** umin z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (min_u32_z_tied1, svuint32_t, ++ z0 = svmin_u32_z (p0, z0, z1), ++ z0 = svmin_z (p0, z0, z1)) ++ ++/* ++** min_u32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** umin z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (min_u32_z_tied2, svuint32_t, ++ z0 = svmin_u32_z (p0, z1, z0), ++ z0 = svmin_z (p0, z1, z0)) ++ ++/* ++** min_u32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** umin z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** umin z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (min_u32_z_untied, svuint32_t, ++ z0 = svmin_u32_z (p0, z1, z2), ++ z0 = svmin_z (p0, z1, z2)) ++ ++/* ++** min_w0_u32_z_tied1: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0\.s, p0/z, z0\.s ++** umin z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (min_w0_u32_z_tied1, svuint32_t, uint32_t, ++ z0 = svmin_n_u32_z (p0, z0, x0), ++ z0 = svmin_z (p0, z0, x0)) ++ ++/* ++** min_w0_u32_z_untied: ++** mov (z[0-9]+\.s), w0 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** umin z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** umin z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (min_w0_u32_z_untied, svuint32_t, uint32_t, ++ z0 = svmin_n_u32_z (p0, z1, x0), ++ z0 = svmin_z (p0, z1, x0)) ++ ++/* ++** min_1_u32_z_tied1: ++** mov (z[0-9]+\.s), #1 ++** movprfx z0\.s, p0/z, z0\.s ++** umin z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_u32_z_tied1, svuint32_t, ++ z0 = svmin_n_u32_z (p0, z0, 1), ++ z0 = svmin_z (p0, z0, 1)) ++ ++/* ++** min_1_u32_z_untied: ++** mov (z[0-9]+\.s), #1 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** umin z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** umin z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_u32_z_untied, svuint32_t, ++ z0 = svmin_n_u32_z (p0, z1, 1), ++ z0 = svmin_z (p0, z1, 1)) ++ ++/* ++** min_u32_x_tied1: ++** umin z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (min_u32_x_tied1, svuint32_t, ++ z0 = svmin_u32_x (p0, z0, z1), ++ z0 = svmin_x (p0, z0, z1)) ++ ++/* ++** min_u32_x_tied2: ++** umin z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (min_u32_x_tied2, svuint32_t, ++ z0 = svmin_u32_x (p0, z1, z0), ++ z0 = svmin_x (p0, z1, z0)) ++ ++/* ++** min_u32_x_untied: ++** ( ++** movprfx z0, z1 ++** umin z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0, z2 ++** umin z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (min_u32_x_untied, svuint32_t, ++ z0 = svmin_u32_x (p0, z1, z2), ++ z0 = svmin_x (p0, z1, z2)) ++ ++/* ++** min_w0_u32_x_tied1: ++** mov (z[0-9]+\.s), w0 ++** umin z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (min_w0_u32_x_tied1, svuint32_t, uint32_t, ++ z0 = svmin_n_u32_x (p0, z0, x0), ++ z0 = svmin_x (p0, z0, x0)) ++ ++/* ++** min_w0_u32_x_untied: ++** mov z0\.s, w0 ++** umin z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZX (min_w0_u32_x_untied, svuint32_t, uint32_t, ++ z0 = svmin_n_u32_x (p0, z1, x0), ++ z0 = svmin_x (p0, z1, x0)) ++ ++/* ++** min_1_u32_x_tied1: ++** umin z0\.s, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_u32_x_tied1, svuint32_t, ++ z0 = svmin_n_u32_x (p0, z0, 1), ++ z0 = svmin_x (p0, z0, 1)) ++ ++/* ++** min_1_u32_x_untied: ++** movprfx z0, z1 ++** umin z0\.s, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_u32_x_untied, svuint32_t, ++ z0 = svmin_n_u32_x (p0, z1, 1), ++ z0 = svmin_x (p0, z1, 1)) ++ ++/* ++** min_127_u32_x: ++** umin z0\.s, z0\.s, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (min_127_u32_x, svuint32_t, ++ z0 = svmin_n_u32_x (p0, z0, 127), ++ z0 = svmin_x (p0, z0, 127)) ++ ++/* ++** min_128_u32_x: ++** umin z0\.s, z0\.s, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (min_128_u32_x, svuint32_t, ++ z0 = svmin_n_u32_x (p0, z0, 128), ++ z0 = svmin_x (p0, z0, 128)) ++ ++/* ++** min_255_u32_x: ++** umin z0\.s, z0\.s, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (min_255_u32_x, svuint32_t, ++ z0 = svmin_n_u32_x (p0, z0, 255), ++ z0 = svmin_x (p0, z0, 255)) ++ ++/* ++** min_256_u32_x: ++** mov (z[0-9]+\.s), #256 ++** umin z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_256_u32_x, svuint32_t, ++ z0 = svmin_n_u32_x (p0, z0, 256), ++ z0 = svmin_x (p0, z0, 256)) ++ ++/* ++** min_m2_u32_x: ++** mov (z[0-9]+\.s), #-2 ++** umin z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_m2_u32_x, svuint32_t, ++ z0 = svmin_n_u32_x (p0, z0, -2), ++ z0 = svmin_x (p0, z0, -2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_u64.c +new file mode 100644 +index 000000000..06e6e5099 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_u64.c +@@ -0,0 +1,293 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** min_u64_m_tied1: ++** umin z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (min_u64_m_tied1, svuint64_t, ++ z0 = svmin_u64_m (p0, z0, z1), ++ z0 = svmin_m (p0, z0, z1)) ++ ++/* ++** min_u64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** umin z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_u64_m_tied2, svuint64_t, ++ z0 = svmin_u64_m (p0, z1, z0), ++ z0 = svmin_m (p0, z1, z0)) ++ ++/* ++** min_u64_m_untied: ++** movprfx z0, z1 ++** umin z0\.d, p0/m, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (min_u64_m_untied, svuint64_t, ++ z0 = svmin_u64_m (p0, z1, z2), ++ z0 = svmin_m (p0, z1, z2)) ++ ++/* ++** min_x0_u64_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** umin z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (min_x0_u64_m_tied1, svuint64_t, uint64_t, ++ z0 = svmin_n_u64_m (p0, z0, x0), ++ z0 = svmin_m (p0, z0, x0)) ++ ++/* ++** min_x0_u64_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** umin z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (min_x0_u64_m_untied, svuint64_t, uint64_t, ++ z0 = svmin_n_u64_m (p0, z1, x0), ++ z0 = svmin_m (p0, z1, x0)) ++ ++/* ++** min_1_u64_m_tied1: ++** mov (z[0-9]+\.d), #1 ++** umin z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_u64_m_tied1, svuint64_t, ++ z0 = svmin_n_u64_m (p0, z0, 1), ++ z0 = svmin_m (p0, z0, 1)) ++ ++/* ++** min_1_u64_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), #1 ++** movprfx z0, z1 ++** umin z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_u64_m_untied, svuint64_t, ++ z0 = svmin_n_u64_m (p0, z1, 1), ++ z0 = svmin_m (p0, z1, 1)) ++ ++/* ++** min_m1_u64_m: ++** mov (z[0-9]+)\.b, #-1 ++** umin z0\.d, p0/m, z0\.d, \1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (min_m1_u64_m, svuint64_t, ++ z0 = svmin_n_u64_m (p0, z0, -1), ++ z0 = svmin_m (p0, z0, -1)) ++ ++/* ++** min_u64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** umin z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (min_u64_z_tied1, svuint64_t, ++ z0 = svmin_u64_z (p0, z0, z1), ++ z0 = svmin_z (p0, z0, z1)) ++ ++/* ++** min_u64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** umin z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (min_u64_z_tied2, svuint64_t, ++ z0 = svmin_u64_z (p0, z1, z0), ++ z0 = svmin_z (p0, z1, z0)) ++ ++/* ++** min_u64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** umin z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** umin z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (min_u64_z_untied, svuint64_t, ++ z0 = svmin_u64_z (p0, z1, z2), ++ z0 = svmin_z (p0, z1, z2)) ++ ++/* ++** min_x0_u64_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.d, p0/z, z0\.d ++** umin z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (min_x0_u64_z_tied1, svuint64_t, uint64_t, ++ z0 = svmin_n_u64_z (p0, z0, x0), ++ z0 = svmin_z (p0, z0, x0)) ++ ++/* ++** min_x0_u64_z_untied: ++** mov (z[0-9]+\.d), x0 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** umin z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** umin z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (min_x0_u64_z_untied, svuint64_t, uint64_t, ++ z0 = svmin_n_u64_z (p0, z1, x0), ++ z0 = svmin_z (p0, z1, x0)) ++ ++/* ++** min_1_u64_z_tied1: ++** mov (z[0-9]+\.d), #1 ++** movprfx z0\.d, p0/z, z0\.d ++** umin z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_u64_z_tied1, svuint64_t, ++ z0 = svmin_n_u64_z (p0, z0, 1), ++ z0 = svmin_z (p0, z0, 1)) ++ ++/* ++** min_1_u64_z_untied: ++** mov (z[0-9]+\.d), #1 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** umin z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** umin z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_u64_z_untied, svuint64_t, ++ z0 = svmin_n_u64_z (p0, z1, 1), ++ z0 = svmin_z (p0, z1, 1)) ++ ++/* ++** min_u64_x_tied1: ++** umin z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (min_u64_x_tied1, svuint64_t, ++ z0 = svmin_u64_x (p0, z0, z1), ++ z0 = svmin_x (p0, z0, z1)) ++ ++/* ++** min_u64_x_tied2: ++** umin z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (min_u64_x_tied2, svuint64_t, ++ z0 = svmin_u64_x (p0, z1, z0), ++ z0 = svmin_x (p0, z1, z0)) ++ ++/* ++** min_u64_x_untied: ++** ( ++** movprfx z0, z1 ++** umin z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0, z2 ++** umin z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (min_u64_x_untied, svuint64_t, ++ z0 = svmin_u64_x (p0, z1, z2), ++ z0 = svmin_x (p0, z1, z2)) ++ ++/* ++** min_x0_u64_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** umin z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (min_x0_u64_x_tied1, svuint64_t, uint64_t, ++ z0 = svmin_n_u64_x (p0, z0, x0), ++ z0 = svmin_x (p0, z0, x0)) ++ ++/* ++** min_x0_u64_x_untied: ++** mov z0\.d, x0 ++** umin z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZX (min_x0_u64_x_untied, svuint64_t, uint64_t, ++ z0 = svmin_n_u64_x (p0, z1, x0), ++ z0 = svmin_x (p0, z1, x0)) ++ ++/* ++** min_1_u64_x_tied1: ++** umin z0\.d, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_u64_x_tied1, svuint64_t, ++ z0 = svmin_n_u64_x (p0, z0, 1), ++ z0 = svmin_x (p0, z0, 1)) ++ ++/* ++** min_1_u64_x_untied: ++** movprfx z0, z1 ++** umin z0\.d, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_u64_x_untied, svuint64_t, ++ z0 = svmin_n_u64_x (p0, z1, 1), ++ z0 = svmin_x (p0, z1, 1)) ++ ++/* ++** min_127_u64_x: ++** umin z0\.d, z0\.d, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (min_127_u64_x, svuint64_t, ++ z0 = svmin_n_u64_x (p0, z0, 127), ++ z0 = svmin_x (p0, z0, 127)) ++ ++/* ++** min_128_u64_x: ++** umin z0\.d, z0\.d, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (min_128_u64_x, svuint64_t, ++ z0 = svmin_n_u64_x (p0, z0, 128), ++ z0 = svmin_x (p0, z0, 128)) ++ ++/* ++** min_255_u64_x: ++** umin z0\.d, z0\.d, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (min_255_u64_x, svuint64_t, ++ z0 = svmin_n_u64_x (p0, z0, 255), ++ z0 = svmin_x (p0, z0, 255)) ++ ++/* ++** min_256_u64_x: ++** mov (z[0-9]+\.d), #256 ++** umin z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_256_u64_x, svuint64_t, ++ z0 = svmin_n_u64_x (p0, z0, 256), ++ z0 = svmin_x (p0, z0, 256)) ++ ++/* ++** min_m2_u64_x: ++** mov (z[0-9]+\.d), #-2 ++** umin z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_m2_u64_x, svuint64_t, ++ z0 = svmin_n_u64_x (p0, z0, -2), ++ z0 = svmin_x (p0, z0, -2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_u8.c +new file mode 100644 +index 000000000..2ca274278 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_u8.c +@@ -0,0 +1,273 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** min_u8_m_tied1: ++** umin z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (min_u8_m_tied1, svuint8_t, ++ z0 = svmin_u8_m (p0, z0, z1), ++ z0 = svmin_m (p0, z0, z1)) ++ ++/* ++** min_u8_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** umin z0\.b, p0/m, z0\.b, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (min_u8_m_tied2, svuint8_t, ++ z0 = svmin_u8_m (p0, z1, z0), ++ z0 = svmin_m (p0, z1, z0)) ++ ++/* ++** min_u8_m_untied: ++** movprfx z0, z1 ++** umin z0\.b, p0/m, z0\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (min_u8_m_untied, svuint8_t, ++ z0 = svmin_u8_m (p0, z1, z2), ++ z0 = svmin_m (p0, z1, z2)) ++ ++/* ++** min_w0_u8_m_tied1: ++** mov (z[0-9]+\.b), w0 ++** umin z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (min_w0_u8_m_tied1, svuint8_t, uint8_t, ++ z0 = svmin_n_u8_m (p0, z0, x0), ++ z0 = svmin_m (p0, z0, x0)) ++ ++/* ++** min_w0_u8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), w0 ++** movprfx z0, z1 ++** umin z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (min_w0_u8_m_untied, svuint8_t, uint8_t, ++ z0 = svmin_n_u8_m (p0, z1, x0), ++ z0 = svmin_m (p0, z1, x0)) ++ ++/* ++** min_1_u8_m_tied1: ++** mov (z[0-9]+\.b), #1 ++** umin z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_u8_m_tied1, svuint8_t, ++ z0 = svmin_n_u8_m (p0, z0, 1), ++ z0 = svmin_m (p0, z0, 1)) ++ ++/* ++** min_1_u8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), #1 ++** movprfx z0, z1 ++** umin z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_u8_m_untied, svuint8_t, ++ z0 = svmin_n_u8_m (p0, z1, 1), ++ z0 = svmin_m (p0, z1, 1)) ++ ++/* ++** min_m1_u8_m: ++** mov (z[0-9]+\.b), #-1 ++** umin z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_m1_u8_m, svuint8_t, ++ z0 = svmin_n_u8_m (p0, z0, -1), ++ z0 = svmin_m (p0, z0, -1)) ++ ++/* ++** min_u8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** umin z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (min_u8_z_tied1, svuint8_t, ++ z0 = svmin_u8_z (p0, z0, z1), ++ z0 = svmin_z (p0, z0, z1)) ++ ++/* ++** min_u8_z_tied2: ++** movprfx z0\.b, p0/z, z0\.b ++** umin z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (min_u8_z_tied2, svuint8_t, ++ z0 = svmin_u8_z (p0, z1, z0), ++ z0 = svmin_z (p0, z1, z0)) ++ ++/* ++** min_u8_z_untied: ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** umin z0\.b, p0/m, z0\.b, z2\.b ++** | ++** movprfx z0\.b, p0/z, z2\.b ++** umin z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (min_u8_z_untied, svuint8_t, ++ z0 = svmin_u8_z (p0, z1, z2), ++ z0 = svmin_z (p0, z1, z2)) ++ ++/* ++** min_w0_u8_z_tied1: ++** mov (z[0-9]+\.b), w0 ++** movprfx z0\.b, p0/z, z0\.b ++** umin z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (min_w0_u8_z_tied1, svuint8_t, uint8_t, ++ z0 = svmin_n_u8_z (p0, z0, x0), ++ z0 = svmin_z (p0, z0, x0)) ++ ++/* ++** min_w0_u8_z_untied: ++** mov (z[0-9]+\.b), w0 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** umin z0\.b, p0/m, z0\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** umin z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (min_w0_u8_z_untied, svuint8_t, uint8_t, ++ z0 = svmin_n_u8_z (p0, z1, x0), ++ z0 = svmin_z (p0, z1, x0)) ++ ++/* ++** min_1_u8_z_tied1: ++** mov (z[0-9]+\.b), #1 ++** movprfx z0\.b, p0/z, z0\.b ++** umin z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_u8_z_tied1, svuint8_t, ++ z0 = svmin_n_u8_z (p0, z0, 1), ++ z0 = svmin_z (p0, z0, 1)) ++ ++/* ++** min_1_u8_z_untied: ++** mov (z[0-9]+\.b), #1 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** umin z0\.b, p0/m, z0\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** umin z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_u8_z_untied, svuint8_t, ++ z0 = svmin_n_u8_z (p0, z1, 1), ++ z0 = svmin_z (p0, z1, 1)) ++ ++/* ++** min_u8_x_tied1: ++** umin z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (min_u8_x_tied1, svuint8_t, ++ z0 = svmin_u8_x (p0, z0, z1), ++ z0 = svmin_x (p0, z0, z1)) ++ ++/* ++** min_u8_x_tied2: ++** umin z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (min_u8_x_tied2, svuint8_t, ++ z0 = svmin_u8_x (p0, z1, z0), ++ z0 = svmin_x (p0, z1, z0)) ++ ++/* ++** min_u8_x_untied: ++** ( ++** movprfx z0, z1 ++** umin z0\.b, p0/m, z0\.b, z2\.b ++** | ++** movprfx z0, z2 ++** umin z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (min_u8_x_untied, svuint8_t, ++ z0 = svmin_u8_x (p0, z1, z2), ++ z0 = svmin_x (p0, z1, z2)) ++ ++/* ++** min_w0_u8_x_tied1: ++** mov (z[0-9]+\.b), w0 ++** umin z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (min_w0_u8_x_tied1, svuint8_t, uint8_t, ++ z0 = svmin_n_u8_x (p0, z0, x0), ++ z0 = svmin_x (p0, z0, x0)) ++ ++/* ++** min_w0_u8_x_untied: ++** mov z0\.b, w0 ++** umin z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_ZX (min_w0_u8_x_untied, svuint8_t, uint8_t, ++ z0 = svmin_n_u8_x (p0, z1, x0), ++ z0 = svmin_x (p0, z1, x0)) ++ ++/* ++** min_1_u8_x_tied1: ++** umin z0\.b, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_u8_x_tied1, svuint8_t, ++ z0 = svmin_n_u8_x (p0, z0, 1), ++ z0 = svmin_x (p0, z0, 1)) ++ ++/* ++** min_1_u8_x_untied: ++** movprfx z0, z1 ++** umin z0\.b, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (min_1_u8_x_untied, svuint8_t, ++ z0 = svmin_n_u8_x (p0, z1, 1), ++ z0 = svmin_x (p0, z1, 1)) ++ ++/* ++** min_127_u8_x: ++** umin z0\.b, z0\.b, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (min_127_u8_x, svuint8_t, ++ z0 = svmin_n_u8_x (p0, z0, 127), ++ z0 = svmin_x (p0, z0, 127)) ++ ++/* ++** min_128_u8_x: ++** umin z0\.b, z0\.b, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (min_128_u8_x, svuint8_t, ++ z0 = svmin_n_u8_x (p0, z0, 128), ++ z0 = svmin_x (p0, z0, 128)) ++ ++/* ++** min_254_u8_x: ++** umin z0\.b, z0\.b, #254 ++** ret ++*/ ++TEST_UNIFORM_Z (min_254_u8_x, svuint8_t, ++ z0 = svmin_n_u8_x (p0, z0, 254), ++ z0 = svmin_x (p0, z0, 254)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minnm_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minnm_f16.c +new file mode 100644 +index 000000000..43caaa14e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minnm_f16.c +@@ -0,0 +1,425 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** minnm_f16_m_tied1: ++** fminnm z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_f16_m_tied1, svfloat16_t, ++ z0 = svminnm_f16_m (p0, z0, z1), ++ z0 = svminnm_m (p0, z0, z1)) ++ ++/* ++** minnm_f16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fminnm z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_f16_m_tied2, svfloat16_t, ++ z0 = svminnm_f16_m (p0, z1, z0), ++ z0 = svminnm_m (p0, z1, z0)) ++ ++/* ++** minnm_f16_m_untied: ++** movprfx z0, z1 ++** fminnm z0\.h, p0/m, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_f16_m_untied, svfloat16_t, ++ z0 = svminnm_f16_m (p0, z1, z2), ++ z0 = svminnm_m (p0, z1, z2)) ++ ++/* ++** minnm_h4_f16_m_tied1: ++** mov (z[0-9]+\.h), h4 ++** fminnm z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (minnm_h4_f16_m_tied1, svfloat16_t, __fp16, ++ z0 = svminnm_n_f16_m (p0, z0, d4), ++ z0 = svminnm_m (p0, z0, d4)) ++ ++/* ++** minnm_h4_f16_m_untied: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0, z1 ++** fminnm z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (minnm_h4_f16_m_untied, svfloat16_t, __fp16, ++ z0 = svminnm_n_f16_m (p0, z1, d4), ++ z0 = svminnm_m (p0, z1, d4)) ++ ++/* ++** minnm_0_f16_m_tied1: ++** fminnm z0\.h, p0/m, z0\.h, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_0_f16_m_tied1, svfloat16_t, ++ z0 = svminnm_n_f16_m (p0, z0, 0), ++ z0 = svminnm_m (p0, z0, 0)) ++ ++/* ++** minnm_0_f16_m_untied: ++** movprfx z0, z1 ++** fminnm z0\.h, p0/m, z0\.h, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_0_f16_m_untied, svfloat16_t, ++ z0 = svminnm_n_f16_m (p0, z1, 0), ++ z0 = svminnm_m (p0, z1, 0)) ++ ++/* ++** minnm_1_f16_m_tied1: ++** fminnm z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_1_f16_m_tied1, svfloat16_t, ++ z0 = svminnm_n_f16_m (p0, z0, 1), ++ z0 = svminnm_m (p0, z0, 1)) ++ ++/* ++** minnm_1_f16_m_untied: ++** movprfx z0, z1 ++** fminnm z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_1_f16_m_untied, svfloat16_t, ++ z0 = svminnm_n_f16_m (p0, z1, 1), ++ z0 = svminnm_m (p0, z1, 1)) ++ ++/* ++** minnm_2_f16_m: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** fminnm z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_2_f16_m, svfloat16_t, ++ z0 = svminnm_n_f16_m (p0, z0, 2), ++ z0 = svminnm_m (p0, z0, 2)) ++ ++/* ++** minnm_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fminnm z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_f16_z_tied1, svfloat16_t, ++ z0 = svminnm_f16_z (p0, z0, z1), ++ z0 = svminnm_z (p0, z0, z1)) ++ ++/* ++** minnm_f16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** fminnm z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_f16_z_tied2, svfloat16_t, ++ z0 = svminnm_f16_z (p0, z1, z0), ++ z0 = svminnm_z (p0, z1, z0)) ++ ++/* ++** minnm_f16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fminnm z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** fminnm z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_f16_z_untied, svfloat16_t, ++ z0 = svminnm_f16_z (p0, z1, z2), ++ z0 = svminnm_z (p0, z1, z2)) ++ ++/* ++** minnm_h4_f16_z_tied1: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0\.h, p0/z, z0\.h ++** fminnm z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (minnm_h4_f16_z_tied1, svfloat16_t, __fp16, ++ z0 = svminnm_n_f16_z (p0, z0, d4), ++ z0 = svminnm_z (p0, z0, d4)) ++ ++/* ++** minnm_h4_f16_z_untied: ++** mov (z[0-9]+\.h), h4 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fminnm z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** fminnm z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (minnm_h4_f16_z_untied, svfloat16_t, __fp16, ++ z0 = svminnm_n_f16_z (p0, z1, d4), ++ z0 = svminnm_z (p0, z1, d4)) ++ ++/* ++** minnm_0_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fminnm z0\.h, p0/m, z0\.h, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_0_f16_z_tied1, svfloat16_t, ++ z0 = svminnm_n_f16_z (p0, z0, 0), ++ z0 = svminnm_z (p0, z0, 0)) ++ ++/* ++** minnm_0_f16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** fminnm z0\.h, p0/m, z0\.h, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_0_f16_z_untied, svfloat16_t, ++ z0 = svminnm_n_f16_z (p0, z1, 0), ++ z0 = svminnm_z (p0, z1, 0)) ++ ++/* ++** minnm_1_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fminnm z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_1_f16_z_tied1, svfloat16_t, ++ z0 = svminnm_n_f16_z (p0, z0, 1), ++ z0 = svminnm_z (p0, z0, 1)) ++ ++/* ++** minnm_1_f16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** fminnm z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_1_f16_z_untied, svfloat16_t, ++ z0 = svminnm_n_f16_z (p0, z1, 1), ++ z0 = svminnm_z (p0, z1, 1)) ++ ++/* ++** minnm_2_f16_z: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** movprfx z0\.h, p0/z, z0\.h ++** fminnm z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_2_f16_z, svfloat16_t, ++ z0 = svminnm_n_f16_z (p0, z0, 2), ++ z0 = svminnm_z (p0, z0, 2)) ++ ++/* ++** minnm_f16_x_tied1: ++** fminnm z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_f16_x_tied1, svfloat16_t, ++ z0 = svminnm_f16_x (p0, z0, z1), ++ z0 = svminnm_x (p0, z0, z1)) ++ ++/* ++** minnm_f16_x_tied2: ++** fminnm z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_f16_x_tied2, svfloat16_t, ++ z0 = svminnm_f16_x (p0, z1, z0), ++ z0 = svminnm_x (p0, z1, z0)) ++ ++/* ++** minnm_f16_x_untied: ++** ( ++** movprfx z0, z1 ++** fminnm z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0, z2 ++** fminnm z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_f16_x_untied, svfloat16_t, ++ z0 = svminnm_f16_x (p0, z1, z2), ++ z0 = svminnm_x (p0, z1, z2)) ++ ++/* ++** minnm_h4_f16_x_tied1: ++** mov (z[0-9]+\.h), h4 ++** fminnm z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (minnm_h4_f16_x_tied1, svfloat16_t, __fp16, ++ z0 = svminnm_n_f16_x (p0, z0, d4), ++ z0 = svminnm_x (p0, z0, d4)) ++ ++/* ++** minnm_h4_f16_x_untied: ++** mov z0\.h, h4 ++** fminnm z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZD (minnm_h4_f16_x_untied, svfloat16_t, __fp16, ++ z0 = svminnm_n_f16_x (p0, z1, d4), ++ z0 = svminnm_x (p0, z1, d4)) ++ ++/* ++** minnm_0_f16_x_tied1: ++** fminnm z0\.h, p0/m, z0\.h, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_0_f16_x_tied1, svfloat16_t, ++ z0 = svminnm_n_f16_x (p0, z0, 0), ++ z0 = svminnm_x (p0, z0, 0)) ++ ++/* ++** minnm_0_f16_x_untied: ++** movprfx z0, z1 ++** fminnm z0\.h, p0/m, z0\.h, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_0_f16_x_untied, svfloat16_t, ++ z0 = svminnm_n_f16_x (p0, z1, 0), ++ z0 = svminnm_x (p0, z1, 0)) ++ ++/* ++** minnm_1_f16_x_tied1: ++** fminnm z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_1_f16_x_tied1, svfloat16_t, ++ z0 = svminnm_n_f16_x (p0, z0, 1), ++ z0 = svminnm_x (p0, z0, 1)) ++ ++/* ++** minnm_1_f16_x_untied: ++** movprfx z0, z1 ++** fminnm z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_1_f16_x_untied, svfloat16_t, ++ z0 = svminnm_n_f16_x (p0, z1, 1), ++ z0 = svminnm_x (p0, z1, 1)) ++ ++/* ++** minnm_2_f16_x_tied1: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** fminnm z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_2_f16_x_tied1, svfloat16_t, ++ z0 = svminnm_n_f16_x (p0, z0, 2), ++ z0 = svminnm_x (p0, z0, 2)) ++ ++/* ++** minnm_2_f16_x_untied: ++** fmov z0\.h, #2\.0(?:e\+0)? ++** fminnm z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_2_f16_x_untied, svfloat16_t, ++ z0 = svminnm_n_f16_x (p0, z1, 2), ++ z0 = svminnm_x (p0, z1, 2)) ++ ++/* ++** ptrue_minnm_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_minnm_f16_x_tied1, svfloat16_t, ++ z0 = svminnm_f16_x (svptrue_b16 (), z0, z1), ++ z0 = svminnm_x (svptrue_b16 (), z0, z1)) ++ ++/* ++** ptrue_minnm_f16_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_minnm_f16_x_tied2, svfloat16_t, ++ z0 = svminnm_f16_x (svptrue_b16 (), z1, z0), ++ z0 = svminnm_x (svptrue_b16 (), z1, z0)) ++ ++/* ++** ptrue_minnm_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_minnm_f16_x_untied, svfloat16_t, ++ z0 = svminnm_f16_x (svptrue_b16 (), z1, z2), ++ z0 = svminnm_x (svptrue_b16 (), z1, z2)) ++ ++/* ++** ptrue_minnm_0_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_minnm_0_f16_x_tied1, svfloat16_t, ++ z0 = svminnm_n_f16_x (svptrue_b16 (), z0, 0), ++ z0 = svminnm_x (svptrue_b16 (), z0, 0)) ++ ++/* ++** ptrue_minnm_0_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_minnm_0_f16_x_untied, svfloat16_t, ++ z0 = svminnm_n_f16_x (svptrue_b16 (), z1, 0), ++ z0 = svminnm_x (svptrue_b16 (), z1, 0)) ++ ++/* ++** ptrue_minnm_1_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_minnm_1_f16_x_tied1, svfloat16_t, ++ z0 = svminnm_n_f16_x (svptrue_b16 (), z0, 1), ++ z0 = svminnm_x (svptrue_b16 (), z0, 1)) ++ ++/* ++** ptrue_minnm_1_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_minnm_1_f16_x_untied, svfloat16_t, ++ z0 = svminnm_n_f16_x (svptrue_b16 (), z1, 1), ++ z0 = svminnm_x (svptrue_b16 (), z1, 1)) ++ ++/* ++** ptrue_minnm_2_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_minnm_2_f16_x_tied1, svfloat16_t, ++ z0 = svminnm_n_f16_x (svptrue_b16 (), z0, 2), ++ z0 = svminnm_x (svptrue_b16 (), z0, 2)) ++ ++/* ++** ptrue_minnm_2_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_minnm_2_f16_x_untied, svfloat16_t, ++ z0 = svminnm_n_f16_x (svptrue_b16 (), z1, 2), ++ z0 = svminnm_x (svptrue_b16 (), z1, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minnm_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minnm_f32.c +new file mode 100644 +index 000000000..4fac8e8ad +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minnm_f32.c +@@ -0,0 +1,425 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** minnm_f32_m_tied1: ++** fminnm z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_f32_m_tied1, svfloat32_t, ++ z0 = svminnm_f32_m (p0, z0, z1), ++ z0 = svminnm_m (p0, z0, z1)) ++ ++/* ++** minnm_f32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fminnm z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_f32_m_tied2, svfloat32_t, ++ z0 = svminnm_f32_m (p0, z1, z0), ++ z0 = svminnm_m (p0, z1, z0)) ++ ++/* ++** minnm_f32_m_untied: ++** movprfx z0, z1 ++** fminnm z0\.s, p0/m, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_f32_m_untied, svfloat32_t, ++ z0 = svminnm_f32_m (p0, z1, z2), ++ z0 = svminnm_m (p0, z1, z2)) ++ ++/* ++** minnm_s4_f32_m_tied1: ++** mov (z[0-9]+\.s), s4 ++** fminnm z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (minnm_s4_f32_m_tied1, svfloat32_t, float, ++ z0 = svminnm_n_f32_m (p0, z0, d4), ++ z0 = svminnm_m (p0, z0, d4)) ++ ++/* ++** minnm_s4_f32_m_untied: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0, z1 ++** fminnm z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (minnm_s4_f32_m_untied, svfloat32_t, float, ++ z0 = svminnm_n_f32_m (p0, z1, d4), ++ z0 = svminnm_m (p0, z1, d4)) ++ ++/* ++** minnm_0_f32_m_tied1: ++** fminnm z0\.s, p0/m, z0\.s, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_0_f32_m_tied1, svfloat32_t, ++ z0 = svminnm_n_f32_m (p0, z0, 0), ++ z0 = svminnm_m (p0, z0, 0)) ++ ++/* ++** minnm_0_f32_m_untied: ++** movprfx z0, z1 ++** fminnm z0\.s, p0/m, z0\.s, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_0_f32_m_untied, svfloat32_t, ++ z0 = svminnm_n_f32_m (p0, z1, 0), ++ z0 = svminnm_m (p0, z1, 0)) ++ ++/* ++** minnm_1_f32_m_tied1: ++** fminnm z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_1_f32_m_tied1, svfloat32_t, ++ z0 = svminnm_n_f32_m (p0, z0, 1), ++ z0 = svminnm_m (p0, z0, 1)) ++ ++/* ++** minnm_1_f32_m_untied: ++** movprfx z0, z1 ++** fminnm z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_1_f32_m_untied, svfloat32_t, ++ z0 = svminnm_n_f32_m (p0, z1, 1), ++ z0 = svminnm_m (p0, z1, 1)) ++ ++/* ++** minnm_2_f32_m: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** fminnm z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_2_f32_m, svfloat32_t, ++ z0 = svminnm_n_f32_m (p0, z0, 2), ++ z0 = svminnm_m (p0, z0, 2)) ++ ++/* ++** minnm_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fminnm z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_f32_z_tied1, svfloat32_t, ++ z0 = svminnm_f32_z (p0, z0, z1), ++ z0 = svminnm_z (p0, z0, z1)) ++ ++/* ++** minnm_f32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** fminnm z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_f32_z_tied2, svfloat32_t, ++ z0 = svminnm_f32_z (p0, z1, z0), ++ z0 = svminnm_z (p0, z1, z0)) ++ ++/* ++** minnm_f32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fminnm z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** fminnm z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_f32_z_untied, svfloat32_t, ++ z0 = svminnm_f32_z (p0, z1, z2), ++ z0 = svminnm_z (p0, z1, z2)) ++ ++/* ++** minnm_s4_f32_z_tied1: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0\.s, p0/z, z0\.s ++** fminnm z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (minnm_s4_f32_z_tied1, svfloat32_t, float, ++ z0 = svminnm_n_f32_z (p0, z0, d4), ++ z0 = svminnm_z (p0, z0, d4)) ++ ++/* ++** minnm_s4_f32_z_untied: ++** mov (z[0-9]+\.s), s4 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fminnm z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** fminnm z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (minnm_s4_f32_z_untied, svfloat32_t, float, ++ z0 = svminnm_n_f32_z (p0, z1, d4), ++ z0 = svminnm_z (p0, z1, d4)) ++ ++/* ++** minnm_0_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fminnm z0\.s, p0/m, z0\.s, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_0_f32_z_tied1, svfloat32_t, ++ z0 = svminnm_n_f32_z (p0, z0, 0), ++ z0 = svminnm_z (p0, z0, 0)) ++ ++/* ++** minnm_0_f32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** fminnm z0\.s, p0/m, z0\.s, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_0_f32_z_untied, svfloat32_t, ++ z0 = svminnm_n_f32_z (p0, z1, 0), ++ z0 = svminnm_z (p0, z1, 0)) ++ ++/* ++** minnm_1_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fminnm z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_1_f32_z_tied1, svfloat32_t, ++ z0 = svminnm_n_f32_z (p0, z0, 1), ++ z0 = svminnm_z (p0, z0, 1)) ++ ++/* ++** minnm_1_f32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** fminnm z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_1_f32_z_untied, svfloat32_t, ++ z0 = svminnm_n_f32_z (p0, z1, 1), ++ z0 = svminnm_z (p0, z1, 1)) ++ ++/* ++** minnm_2_f32_z: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** movprfx z0\.s, p0/z, z0\.s ++** fminnm z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_2_f32_z, svfloat32_t, ++ z0 = svminnm_n_f32_z (p0, z0, 2), ++ z0 = svminnm_z (p0, z0, 2)) ++ ++/* ++** minnm_f32_x_tied1: ++** fminnm z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_f32_x_tied1, svfloat32_t, ++ z0 = svminnm_f32_x (p0, z0, z1), ++ z0 = svminnm_x (p0, z0, z1)) ++ ++/* ++** minnm_f32_x_tied2: ++** fminnm z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_f32_x_tied2, svfloat32_t, ++ z0 = svminnm_f32_x (p0, z1, z0), ++ z0 = svminnm_x (p0, z1, z0)) ++ ++/* ++** minnm_f32_x_untied: ++** ( ++** movprfx z0, z1 ++** fminnm z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0, z2 ++** fminnm z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_f32_x_untied, svfloat32_t, ++ z0 = svminnm_f32_x (p0, z1, z2), ++ z0 = svminnm_x (p0, z1, z2)) ++ ++/* ++** minnm_s4_f32_x_tied1: ++** mov (z[0-9]+\.s), s4 ++** fminnm z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (minnm_s4_f32_x_tied1, svfloat32_t, float, ++ z0 = svminnm_n_f32_x (p0, z0, d4), ++ z0 = svminnm_x (p0, z0, d4)) ++ ++/* ++** minnm_s4_f32_x_untied: ++** mov z0\.s, s4 ++** fminnm z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZD (minnm_s4_f32_x_untied, svfloat32_t, float, ++ z0 = svminnm_n_f32_x (p0, z1, d4), ++ z0 = svminnm_x (p0, z1, d4)) ++ ++/* ++** minnm_0_f32_x_tied1: ++** fminnm z0\.s, p0/m, z0\.s, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_0_f32_x_tied1, svfloat32_t, ++ z0 = svminnm_n_f32_x (p0, z0, 0), ++ z0 = svminnm_x (p0, z0, 0)) ++ ++/* ++** minnm_0_f32_x_untied: ++** movprfx z0, z1 ++** fminnm z0\.s, p0/m, z0\.s, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_0_f32_x_untied, svfloat32_t, ++ z0 = svminnm_n_f32_x (p0, z1, 0), ++ z0 = svminnm_x (p0, z1, 0)) ++ ++/* ++** minnm_1_f32_x_tied1: ++** fminnm z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_1_f32_x_tied1, svfloat32_t, ++ z0 = svminnm_n_f32_x (p0, z0, 1), ++ z0 = svminnm_x (p0, z0, 1)) ++ ++/* ++** minnm_1_f32_x_untied: ++** movprfx z0, z1 ++** fminnm z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_1_f32_x_untied, svfloat32_t, ++ z0 = svminnm_n_f32_x (p0, z1, 1), ++ z0 = svminnm_x (p0, z1, 1)) ++ ++/* ++** minnm_2_f32_x_tied1: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** fminnm z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_2_f32_x_tied1, svfloat32_t, ++ z0 = svminnm_n_f32_x (p0, z0, 2), ++ z0 = svminnm_x (p0, z0, 2)) ++ ++/* ++** minnm_2_f32_x_untied: ++** fmov z0\.s, #2\.0(?:e\+0)? ++** fminnm z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_2_f32_x_untied, svfloat32_t, ++ z0 = svminnm_n_f32_x (p0, z1, 2), ++ z0 = svminnm_x (p0, z1, 2)) ++ ++/* ++** ptrue_minnm_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_minnm_f32_x_tied1, svfloat32_t, ++ z0 = svminnm_f32_x (svptrue_b32 (), z0, z1), ++ z0 = svminnm_x (svptrue_b32 (), z0, z1)) ++ ++/* ++** ptrue_minnm_f32_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_minnm_f32_x_tied2, svfloat32_t, ++ z0 = svminnm_f32_x (svptrue_b32 (), z1, z0), ++ z0 = svminnm_x (svptrue_b32 (), z1, z0)) ++ ++/* ++** ptrue_minnm_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_minnm_f32_x_untied, svfloat32_t, ++ z0 = svminnm_f32_x (svptrue_b32 (), z1, z2), ++ z0 = svminnm_x (svptrue_b32 (), z1, z2)) ++ ++/* ++** ptrue_minnm_0_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_minnm_0_f32_x_tied1, svfloat32_t, ++ z0 = svminnm_n_f32_x (svptrue_b32 (), z0, 0), ++ z0 = svminnm_x (svptrue_b32 (), z0, 0)) ++ ++/* ++** ptrue_minnm_0_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_minnm_0_f32_x_untied, svfloat32_t, ++ z0 = svminnm_n_f32_x (svptrue_b32 (), z1, 0), ++ z0 = svminnm_x (svptrue_b32 (), z1, 0)) ++ ++/* ++** ptrue_minnm_1_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_minnm_1_f32_x_tied1, svfloat32_t, ++ z0 = svminnm_n_f32_x (svptrue_b32 (), z0, 1), ++ z0 = svminnm_x (svptrue_b32 (), z0, 1)) ++ ++/* ++** ptrue_minnm_1_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_minnm_1_f32_x_untied, svfloat32_t, ++ z0 = svminnm_n_f32_x (svptrue_b32 (), z1, 1), ++ z0 = svminnm_x (svptrue_b32 (), z1, 1)) ++ ++/* ++** ptrue_minnm_2_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_minnm_2_f32_x_tied1, svfloat32_t, ++ z0 = svminnm_n_f32_x (svptrue_b32 (), z0, 2), ++ z0 = svminnm_x (svptrue_b32 (), z0, 2)) ++ ++/* ++** ptrue_minnm_2_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_minnm_2_f32_x_untied, svfloat32_t, ++ z0 = svminnm_n_f32_x (svptrue_b32 (), z1, 2), ++ z0 = svminnm_x (svptrue_b32 (), z1, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minnm_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minnm_f64.c +new file mode 100644 +index 000000000..67993928f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minnm_f64.c +@@ -0,0 +1,425 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** minnm_f64_m_tied1: ++** fminnm z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_f64_m_tied1, svfloat64_t, ++ z0 = svminnm_f64_m (p0, z0, z1), ++ z0 = svminnm_m (p0, z0, z1)) ++ ++/* ++** minnm_f64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fminnm z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_f64_m_tied2, svfloat64_t, ++ z0 = svminnm_f64_m (p0, z1, z0), ++ z0 = svminnm_m (p0, z1, z0)) ++ ++/* ++** minnm_f64_m_untied: ++** movprfx z0, z1 ++** fminnm z0\.d, p0/m, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_f64_m_untied, svfloat64_t, ++ z0 = svminnm_f64_m (p0, z1, z2), ++ z0 = svminnm_m (p0, z1, z2)) ++ ++/* ++** minnm_d4_f64_m_tied1: ++** mov (z[0-9]+\.d), d4 ++** fminnm z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (minnm_d4_f64_m_tied1, svfloat64_t, double, ++ z0 = svminnm_n_f64_m (p0, z0, d4), ++ z0 = svminnm_m (p0, z0, d4)) ++ ++/* ++** minnm_d4_f64_m_untied: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0, z1 ++** fminnm z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (minnm_d4_f64_m_untied, svfloat64_t, double, ++ z0 = svminnm_n_f64_m (p0, z1, d4), ++ z0 = svminnm_m (p0, z1, d4)) ++ ++/* ++** minnm_0_f64_m_tied1: ++** fminnm z0\.d, p0/m, z0\.d, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_0_f64_m_tied1, svfloat64_t, ++ z0 = svminnm_n_f64_m (p0, z0, 0), ++ z0 = svminnm_m (p0, z0, 0)) ++ ++/* ++** minnm_0_f64_m_untied: ++** movprfx z0, z1 ++** fminnm z0\.d, p0/m, z0\.d, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_0_f64_m_untied, svfloat64_t, ++ z0 = svminnm_n_f64_m (p0, z1, 0), ++ z0 = svminnm_m (p0, z1, 0)) ++ ++/* ++** minnm_1_f64_m_tied1: ++** fminnm z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_1_f64_m_tied1, svfloat64_t, ++ z0 = svminnm_n_f64_m (p0, z0, 1), ++ z0 = svminnm_m (p0, z0, 1)) ++ ++/* ++** minnm_1_f64_m_untied: ++** movprfx z0, z1 ++** fminnm z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_1_f64_m_untied, svfloat64_t, ++ z0 = svminnm_n_f64_m (p0, z1, 1), ++ z0 = svminnm_m (p0, z1, 1)) ++ ++/* ++** minnm_2_f64_m: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** fminnm z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_2_f64_m, svfloat64_t, ++ z0 = svminnm_n_f64_m (p0, z0, 2), ++ z0 = svminnm_m (p0, z0, 2)) ++ ++/* ++** minnm_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fminnm z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_f64_z_tied1, svfloat64_t, ++ z0 = svminnm_f64_z (p0, z0, z1), ++ z0 = svminnm_z (p0, z0, z1)) ++ ++/* ++** minnm_f64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** fminnm z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_f64_z_tied2, svfloat64_t, ++ z0 = svminnm_f64_z (p0, z1, z0), ++ z0 = svminnm_z (p0, z1, z0)) ++ ++/* ++** minnm_f64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fminnm z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** fminnm z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_f64_z_untied, svfloat64_t, ++ z0 = svminnm_f64_z (p0, z1, z2), ++ z0 = svminnm_z (p0, z1, z2)) ++ ++/* ++** minnm_d4_f64_z_tied1: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0\.d, p0/z, z0\.d ++** fminnm z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (minnm_d4_f64_z_tied1, svfloat64_t, double, ++ z0 = svminnm_n_f64_z (p0, z0, d4), ++ z0 = svminnm_z (p0, z0, d4)) ++ ++/* ++** minnm_d4_f64_z_untied: ++** mov (z[0-9]+\.d), d4 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fminnm z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** fminnm z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (minnm_d4_f64_z_untied, svfloat64_t, double, ++ z0 = svminnm_n_f64_z (p0, z1, d4), ++ z0 = svminnm_z (p0, z1, d4)) ++ ++/* ++** minnm_0_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fminnm z0\.d, p0/m, z0\.d, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_0_f64_z_tied1, svfloat64_t, ++ z0 = svminnm_n_f64_z (p0, z0, 0), ++ z0 = svminnm_z (p0, z0, 0)) ++ ++/* ++** minnm_0_f64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** fminnm z0\.d, p0/m, z0\.d, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_0_f64_z_untied, svfloat64_t, ++ z0 = svminnm_n_f64_z (p0, z1, 0), ++ z0 = svminnm_z (p0, z1, 0)) ++ ++/* ++** minnm_1_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fminnm z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_1_f64_z_tied1, svfloat64_t, ++ z0 = svminnm_n_f64_z (p0, z0, 1), ++ z0 = svminnm_z (p0, z0, 1)) ++ ++/* ++** minnm_1_f64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** fminnm z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_1_f64_z_untied, svfloat64_t, ++ z0 = svminnm_n_f64_z (p0, z1, 1), ++ z0 = svminnm_z (p0, z1, 1)) ++ ++/* ++** minnm_2_f64_z: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** movprfx z0\.d, p0/z, z0\.d ++** fminnm z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_2_f64_z, svfloat64_t, ++ z0 = svminnm_n_f64_z (p0, z0, 2), ++ z0 = svminnm_z (p0, z0, 2)) ++ ++/* ++** minnm_f64_x_tied1: ++** fminnm z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_f64_x_tied1, svfloat64_t, ++ z0 = svminnm_f64_x (p0, z0, z1), ++ z0 = svminnm_x (p0, z0, z1)) ++ ++/* ++** minnm_f64_x_tied2: ++** fminnm z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_f64_x_tied2, svfloat64_t, ++ z0 = svminnm_f64_x (p0, z1, z0), ++ z0 = svminnm_x (p0, z1, z0)) ++ ++/* ++** minnm_f64_x_untied: ++** ( ++** movprfx z0, z1 ++** fminnm z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0, z2 ++** fminnm z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_f64_x_untied, svfloat64_t, ++ z0 = svminnm_f64_x (p0, z1, z2), ++ z0 = svminnm_x (p0, z1, z2)) ++ ++/* ++** minnm_d4_f64_x_tied1: ++** mov (z[0-9]+\.d), d4 ++** fminnm z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (minnm_d4_f64_x_tied1, svfloat64_t, double, ++ z0 = svminnm_n_f64_x (p0, z0, d4), ++ z0 = svminnm_x (p0, z0, d4)) ++ ++/* ++** minnm_d4_f64_x_untied: ++** mov z0\.d, d4 ++** fminnm z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZD (minnm_d4_f64_x_untied, svfloat64_t, double, ++ z0 = svminnm_n_f64_x (p0, z1, d4), ++ z0 = svminnm_x (p0, z1, d4)) ++ ++/* ++** minnm_0_f64_x_tied1: ++** fminnm z0\.d, p0/m, z0\.d, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_0_f64_x_tied1, svfloat64_t, ++ z0 = svminnm_n_f64_x (p0, z0, 0), ++ z0 = svminnm_x (p0, z0, 0)) ++ ++/* ++** minnm_0_f64_x_untied: ++** movprfx z0, z1 ++** fminnm z0\.d, p0/m, z0\.d, #0\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_0_f64_x_untied, svfloat64_t, ++ z0 = svminnm_n_f64_x (p0, z1, 0), ++ z0 = svminnm_x (p0, z1, 0)) ++ ++/* ++** minnm_1_f64_x_tied1: ++** fminnm z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_1_f64_x_tied1, svfloat64_t, ++ z0 = svminnm_n_f64_x (p0, z0, 1), ++ z0 = svminnm_x (p0, z0, 1)) ++ ++/* ++** minnm_1_f64_x_untied: ++** movprfx z0, z1 ++** fminnm z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_1_f64_x_untied, svfloat64_t, ++ z0 = svminnm_n_f64_x (p0, z1, 1), ++ z0 = svminnm_x (p0, z1, 1)) ++ ++/* ++** minnm_2_f64_x_tied1: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** fminnm z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_2_f64_x_tied1, svfloat64_t, ++ z0 = svminnm_n_f64_x (p0, z0, 2), ++ z0 = svminnm_x (p0, z0, 2)) ++ ++/* ++** minnm_2_f64_x_untied: ++** fmov z0\.d, #2\.0(?:e\+0)? ++** fminnm z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (minnm_2_f64_x_untied, svfloat64_t, ++ z0 = svminnm_n_f64_x (p0, z1, 2), ++ z0 = svminnm_x (p0, z1, 2)) ++ ++/* ++** ptrue_minnm_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_minnm_f64_x_tied1, svfloat64_t, ++ z0 = svminnm_f64_x (svptrue_b64 (), z0, z1), ++ z0 = svminnm_x (svptrue_b64 (), z0, z1)) ++ ++/* ++** ptrue_minnm_f64_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_minnm_f64_x_tied2, svfloat64_t, ++ z0 = svminnm_f64_x (svptrue_b64 (), z1, z0), ++ z0 = svminnm_x (svptrue_b64 (), z1, z0)) ++ ++/* ++** ptrue_minnm_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_minnm_f64_x_untied, svfloat64_t, ++ z0 = svminnm_f64_x (svptrue_b64 (), z1, z2), ++ z0 = svminnm_x (svptrue_b64 (), z1, z2)) ++ ++/* ++** ptrue_minnm_0_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_minnm_0_f64_x_tied1, svfloat64_t, ++ z0 = svminnm_n_f64_x (svptrue_b64 (), z0, 0), ++ z0 = svminnm_x (svptrue_b64 (), z0, 0)) ++ ++/* ++** ptrue_minnm_0_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_minnm_0_f64_x_untied, svfloat64_t, ++ z0 = svminnm_n_f64_x (svptrue_b64 (), z1, 0), ++ z0 = svminnm_x (svptrue_b64 (), z1, 0)) ++ ++/* ++** ptrue_minnm_1_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_minnm_1_f64_x_tied1, svfloat64_t, ++ z0 = svminnm_n_f64_x (svptrue_b64 (), z0, 1), ++ z0 = svminnm_x (svptrue_b64 (), z0, 1)) ++ ++/* ++** ptrue_minnm_1_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_minnm_1_f64_x_untied, svfloat64_t, ++ z0 = svminnm_n_f64_x (svptrue_b64 (), z1, 1), ++ z0 = svminnm_x (svptrue_b64 (), z1, 1)) ++ ++/* ++** ptrue_minnm_2_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_minnm_2_f64_x_tied1, svfloat64_t, ++ z0 = svminnm_n_f64_x (svptrue_b64 (), z0, 2), ++ z0 = svminnm_x (svptrue_b64 (), z0, 2)) ++ ++/* ++** ptrue_minnm_2_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_minnm_2_f64_x_untied, svfloat64_t, ++ z0 = svminnm_n_f64_x (svptrue_b64 (), z1, 2), ++ z0 = svminnm_x (svptrue_b64 (), z1, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minnmv_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minnmv_f16.c +new file mode 100644 +index 000000000..827f41bfe +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minnmv_f16.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** minnmv_d0_f16_tied: ++** fminnmv h0, p0, z0\.h ++** ret ++*/ ++TEST_REDUCTION_D (minnmv_d0_f16_tied, float16_t, svfloat16_t, ++ d0 = svminnmv_f16 (p0, z0), ++ d0 = svminnmv (p0, z0)) ++ ++/* ++** minnmv_d0_f16_untied: ++** fminnmv h0, p0, z1\.h ++** ret ++*/ ++TEST_REDUCTION_D (minnmv_d0_f16_untied, float16_t, svfloat16_t, ++ d0 = svminnmv_f16 (p0, z1), ++ d0 = svminnmv (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minnmv_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minnmv_f32.c +new file mode 100644 +index 000000000..2352ec2a3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minnmv_f32.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** minnmv_d0_f32_tied: ++** fminnmv s0, p0, z0\.s ++** ret ++*/ ++TEST_REDUCTION_D (minnmv_d0_f32_tied, float32_t, svfloat32_t, ++ d0 = svminnmv_f32 (p0, z0), ++ d0 = svminnmv (p0, z0)) ++ ++/* ++** minnmv_d0_f32_untied: ++** fminnmv s0, p0, z1\.s ++** ret ++*/ ++TEST_REDUCTION_D (minnmv_d0_f32_untied, float32_t, svfloat32_t, ++ d0 = svminnmv_f32 (p0, z1), ++ d0 = svminnmv (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minnmv_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minnmv_f64.c +new file mode 100644 +index 000000000..3d769a3d5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minnmv_f64.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** minnmv_d0_f64_tied: ++** fminnmv d0, p0, z0\.d ++** ret ++*/ ++TEST_REDUCTION_D (minnmv_d0_f64_tied, float64_t, svfloat64_t, ++ d0 = svminnmv_f64 (p0, z0), ++ d0 = svminnmv (p0, z0)) ++ ++/* ++** minnmv_d0_f64_untied: ++** fminnmv d0, p0, z1\.d ++** ret ++*/ ++TEST_REDUCTION_D (minnmv_d0_f64_untied, float64_t, svfloat64_t, ++ d0 = svminnmv_f64 (p0, z1), ++ d0 = svminnmv (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_f16.c +new file mode 100644 +index 000000000..190aa16e1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_f16.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** minv_d0_f16_tied: ++** fminv h0, p0, z0\.h ++** ret ++*/ ++TEST_REDUCTION_D (minv_d0_f16_tied, float16_t, svfloat16_t, ++ d0 = svminv_f16 (p0, z0), ++ d0 = svminv (p0, z0)) ++ ++/* ++** minv_d0_f16_untied: ++** fminv h0, p0, z1\.h ++** ret ++*/ ++TEST_REDUCTION_D (minv_d0_f16_untied, float16_t, svfloat16_t, ++ d0 = svminv_f16 (p0, z1), ++ d0 = svminv (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_f32.c +new file mode 100644 +index 000000000..07871b893 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_f32.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** minv_d0_f32_tied: ++** fminv s0, p0, z0\.s ++** ret ++*/ ++TEST_REDUCTION_D (minv_d0_f32_tied, float32_t, svfloat32_t, ++ d0 = svminv_f32 (p0, z0), ++ d0 = svminv (p0, z0)) ++ ++/* ++** minv_d0_f32_untied: ++** fminv s0, p0, z1\.s ++** ret ++*/ ++TEST_REDUCTION_D (minv_d0_f32_untied, float32_t, svfloat32_t, ++ d0 = svminv_f32 (p0, z1), ++ d0 = svminv (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_f64.c +new file mode 100644 +index 000000000..7435f306f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_f64.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** minv_d0_f64_tied: ++** fminv d0, p0, z0\.d ++** ret ++*/ ++TEST_REDUCTION_D (minv_d0_f64_tied, float64_t, svfloat64_t, ++ d0 = svminv_f64 (p0, z0), ++ d0 = svminv (p0, z0)) ++ ++/* ++** minv_d0_f64_untied: ++** fminv d0, p0, z1\.d ++** ret ++*/ ++TEST_REDUCTION_D (minv_d0_f64_untied, float64_t, svfloat64_t, ++ d0 = svminv_f64 (p0, z1), ++ d0 = svminv (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_s16.c +new file mode 100644 +index 000000000..dfb66a9f7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_s16.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** minv_x0_s16: ++** sminv h([0-9]+), p0, z0\.h ++** umov w0, v\1\.h\[0\] ++** ret ++*/ ++TEST_REDUCTION_X (minv_x0_s16, int16_t, svint16_t, ++ x0 = svminv_s16 (p0, z0), ++ x0 = svminv (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_s32.c +new file mode 100644 +index 000000000..c02df5dd3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_s32.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** minv_x0_s32: ++** sminv (s[0-9]+), p0, z0\.s ++** fmov w0, \1 ++** ret ++*/ ++TEST_REDUCTION_X (minv_x0_s32, int32_t, svint32_t, ++ x0 = svminv_s32 (p0, z0), ++ x0 = svminv (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_s64.c +new file mode 100644 +index 000000000..784973231 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_s64.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** minv_x0_s64: ++** sminv (d[0-9]+), p0, z0\.d ++** fmov x0, \1 ++** ret ++*/ ++TEST_REDUCTION_X (minv_x0_s64, int64_t, svint64_t, ++ x0 = svminv_s64 (p0, z0), ++ x0 = svminv (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_s8.c +new file mode 100644 +index 000000000..0b1bce5de +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_s8.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** minv_x0_s8: ++** sminv b([0-9]+), p0, z0\.b ++** umov w0, v\1\.b\[0\] ++** ret ++*/ ++TEST_REDUCTION_X (minv_x0_s8, int8_t, svint8_t, ++ x0 = svminv_s8 (p0, z0), ++ x0 = svminv (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_u16.c +new file mode 100644 +index 000000000..b499de33e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_u16.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** minv_x0_u16: ++** uminv h([0-9]+), p0, z0\.h ++** umov w0, v\1\.h\[0\] ++** ret ++*/ ++TEST_REDUCTION_X (minv_x0_u16, uint16_t, svuint16_t, ++ x0 = svminv_u16 (p0, z0), ++ x0 = svminv (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_u32.c +new file mode 100644 +index 000000000..18c9d8c6d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_u32.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** minv_x0_u32: ++** uminv (s[0-9]+), p0, z0\.s ++** fmov w0, \1 ++** ret ++*/ ++TEST_REDUCTION_X (minv_x0_u32, uint32_t, svuint32_t, ++ x0 = svminv_u32 (p0, z0), ++ x0 = svminv (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_u64.c +new file mode 100644 +index 000000000..374d5e426 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_u64.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** minv_x0_u64: ++** uminv (d[0-9]+), p0, z0\.d ++** fmov x0, \1 ++** ret ++*/ ++TEST_REDUCTION_X (minv_x0_u64, uint64_t, svuint64_t, ++ x0 = svminv_u64 (p0, z0), ++ x0 = svminv (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_u8.c +new file mode 100644 +index 000000000..d9f6f5835 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_u8.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** minv_x0_u8: ++** uminv b([0-9]+), p0, z0\.b ++** umov w0, v\1\.b\[0\] ++** ret ++*/ ++TEST_REDUCTION_X (minv_x0_u8, uint8_t, svuint8_t, ++ x0 = svminv_u8 (p0, z0), ++ x0 = svminv (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_f16.c +new file mode 100644 +index 000000000..f22a582ef +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_f16.c +@@ -0,0 +1,398 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mla_f16_m_tied1: ++** fmla z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mla_f16_m_tied1, svfloat16_t, ++ z0 = svmla_f16_m (p0, z0, z1, z2), ++ z0 = svmla_m (p0, z0, z1, z2)) ++ ++/* ++** mla_f16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fmla z0\.h, p0/m, \1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mla_f16_m_tied2, svfloat16_t, ++ z0 = svmla_f16_m (p0, z1, z0, z2), ++ z0 = svmla_m (p0, z1, z0, z2)) ++ ++/* ++** mla_f16_m_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fmla z0\.h, p0/m, z2\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mla_f16_m_tied3, svfloat16_t, ++ z0 = svmla_f16_m (p0, z1, z2, z0), ++ z0 = svmla_m (p0, z1, z2, z0)) ++ ++/* ++** mla_f16_m_untied: ++** movprfx z0, z1 ++** fmla z0\.h, p0/m, z2\.h, z3\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mla_f16_m_untied, svfloat16_t, ++ z0 = svmla_f16_m (p0, z1, z2, z3), ++ z0 = svmla_m (p0, z1, z2, z3)) ++ ++/* ++** mla_h4_f16_m_tied1: ++** mov (z[0-9]+\.h), h4 ++** fmla z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mla_h4_f16_m_tied1, svfloat16_t, __fp16, ++ z0 = svmla_n_f16_m (p0, z0, z1, d4), ++ z0 = svmla_m (p0, z0, z1, d4)) ++ ++/* ++** mla_h4_f16_m_untied: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0, z1 ++** fmla z0\.h, p0/m, z2\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mla_h4_f16_m_untied, svfloat16_t, __fp16, ++ z0 = svmla_n_f16_m (p0, z1, z2, d4), ++ z0 = svmla_m (p0, z1, z2, d4)) ++ ++/* ++** mla_2_f16_m_tied1: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** fmla z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mla_2_f16_m_tied1, svfloat16_t, ++ z0 = svmla_n_f16_m (p0, z0, z1, 2), ++ z0 = svmla_m (p0, z0, z1, 2)) ++ ++/* ++** mla_2_f16_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** movprfx z0, z1 ++** fmla z0\.h, p0/m, z2\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mla_2_f16_m_untied, svfloat16_t, ++ z0 = svmla_n_f16_m (p0, z1, z2, 2), ++ z0 = svmla_m (p0, z1, z2, 2)) ++ ++/* ++** mla_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fmla z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mla_f16_z_tied1, svfloat16_t, ++ z0 = svmla_f16_z (p0, z0, z1, z2), ++ z0 = svmla_z (p0, z0, z1, z2)) ++ ++/* ++** mla_f16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** fmad z0\.h, p0/m, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mla_f16_z_tied2, svfloat16_t, ++ z0 = svmla_f16_z (p0, z1, z0, z2), ++ z0 = svmla_z (p0, z1, z0, z2)) ++ ++/* ++** mla_f16_z_tied3: ++** movprfx z0\.h, p0/z, z0\.h ++** fmad z0\.h, p0/m, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mla_f16_z_tied3, svfloat16_t, ++ z0 = svmla_f16_z (p0, z1, z2, z0), ++ z0 = svmla_z (p0, z1, z2, z0)) ++ ++/* ++** mla_f16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fmla z0\.h, p0/m, z2\.h, z3\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** fmad z0\.h, p0/m, z3\.h, z1\.h ++** | ++** movprfx z0\.h, p0/z, z3\.h ++** fmad z0\.h, p0/m, z2\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mla_f16_z_untied, svfloat16_t, ++ z0 = svmla_f16_z (p0, z1, z2, z3), ++ z0 = svmla_z (p0, z1, z2, z3)) ++ ++/* ++** mla_h4_f16_z_tied1: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0\.h, p0/z, z0\.h ++** fmla z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mla_h4_f16_z_tied1, svfloat16_t, __fp16, ++ z0 = svmla_n_f16_z (p0, z0, z1, d4), ++ z0 = svmla_z (p0, z0, z1, d4)) ++ ++/* ++** mla_h4_f16_z_tied2: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0\.h, p0/z, z0\.h ++** fmad z0\.h, p0/m, \1, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZD (mla_h4_f16_z_tied2, svfloat16_t, __fp16, ++ z0 = svmla_n_f16_z (p0, z1, z0, d4), ++ z0 = svmla_z (p0, z1, z0, d4)) ++ ++/* ++** mla_h4_f16_z_untied: ++** mov (z[0-9]+\.h), h4 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fmla z0\.h, p0/m, z2\.h, \1 ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** fmad z0\.h, p0/m, \1, z1\.h ++** | ++** movprfx z0\.h, p0/z, \1 ++** fmad z0\.h, p0/m, z2\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (mla_h4_f16_z_untied, svfloat16_t, __fp16, ++ z0 = svmla_n_f16_z (p0, z1, z2, d4), ++ z0 = svmla_z (p0, z1, z2, d4)) ++ ++/* ++** mla_2_f16_z_tied1: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** movprfx z0\.h, p0/z, z0\.h ++** fmla z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mla_2_f16_z_tied1, svfloat16_t, ++ z0 = svmla_n_f16_z (p0, z0, z1, 2), ++ z0 = svmla_z (p0, z0, z1, 2)) ++ ++/* ++** mla_2_f16_z_tied2: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** movprfx z0\.h, p0/z, z0\.h ++** fmad z0\.h, p0/m, \1, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mla_2_f16_z_tied2, svfloat16_t, ++ z0 = svmla_n_f16_z (p0, z1, z0, 2), ++ z0 = svmla_z (p0, z1, z0, 2)) ++ ++/* ++** mla_2_f16_z_untied: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fmla z0\.h, p0/m, z2\.h, \1 ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** fmad z0\.h, p0/m, \1, z1\.h ++** | ++** movprfx z0\.h, p0/z, \1 ++** fmad z0\.h, p0/m, z2\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mla_2_f16_z_untied, svfloat16_t, ++ z0 = svmla_n_f16_z (p0, z1, z2, 2), ++ z0 = svmla_z (p0, z1, z2, 2)) ++ ++/* ++** mla_f16_x_tied1: ++** fmla z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mla_f16_x_tied1, svfloat16_t, ++ z0 = svmla_f16_x (p0, z0, z1, z2), ++ z0 = svmla_x (p0, z0, z1, z2)) ++ ++/* ++** mla_f16_x_tied2: ++** fmad z0\.h, p0/m, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mla_f16_x_tied2, svfloat16_t, ++ z0 = svmla_f16_x (p0, z1, z0, z2), ++ z0 = svmla_x (p0, z1, z0, z2)) ++ ++/* ++** mla_f16_x_tied3: ++** fmad z0\.h, p0/m, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mla_f16_x_tied3, svfloat16_t, ++ z0 = svmla_f16_x (p0, z1, z2, z0), ++ z0 = svmla_x (p0, z1, z2, z0)) ++ ++/* ++** mla_f16_x_untied: ++** ( ++** movprfx z0, z1 ++** fmla z0\.h, p0/m, z2\.h, z3\.h ++** | ++** movprfx z0, z2 ++** fmad z0\.h, p0/m, z3\.h, z1\.h ++** | ++** movprfx z0, z3 ++** fmad z0\.h, p0/m, z2\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mla_f16_x_untied, svfloat16_t, ++ z0 = svmla_f16_x (p0, z1, z2, z3), ++ z0 = svmla_x (p0, z1, z2, z3)) ++ ++/* ++** mla_h4_f16_x_tied1: ++** mov (z[0-9]+\.h), h4 ++** fmla z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mla_h4_f16_x_tied1, svfloat16_t, __fp16, ++ z0 = svmla_n_f16_x (p0, z0, z1, d4), ++ z0 = svmla_x (p0, z0, z1, d4)) ++ ++/* ++** mla_h4_f16_x_tied2: ++** mov (z[0-9]+\.h), h4 ++** fmad z0\.h, p0/m, \1, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZD (mla_h4_f16_x_tied2, svfloat16_t, __fp16, ++ z0 = svmla_n_f16_x (p0, z1, z0, d4), ++ z0 = svmla_x (p0, z1, z0, d4)) ++ ++/* ++** mla_h4_f16_x_untied: { xfail *-*-* } ++** mov z0\.h, h4 ++** fmad z0\.h, p0/m, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZD (mla_h4_f16_x_untied, svfloat16_t, __fp16, ++ z0 = svmla_n_f16_x (p0, z1, z2, d4), ++ z0 = svmla_x (p0, z1, z2, d4)) ++ ++/* ++** mla_2_f16_x_tied1: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** fmla z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mla_2_f16_x_tied1, svfloat16_t, ++ z0 = svmla_n_f16_x (p0, z0, z1, 2), ++ z0 = svmla_x (p0, z0, z1, 2)) ++ ++/* ++** mla_2_f16_x_tied2: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** fmad z0\.h, p0/m, \1, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mla_2_f16_x_tied2, svfloat16_t, ++ z0 = svmla_n_f16_x (p0, z1, z0, 2), ++ z0 = svmla_x (p0, z1, z0, 2)) ++ ++/* ++** mla_2_f16_x_untied: ++** fmov z0\.h, #2\.0(?:e\+0)? ++** fmad z0\.h, p0/m, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mla_2_f16_x_untied, svfloat16_t, ++ z0 = svmla_n_f16_x (p0, z1, z2, 2), ++ z0 = svmla_x (p0, z1, z2, 2)) ++ ++/* ++** ptrue_mla_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mla_f16_x_tied1, svfloat16_t, ++ z0 = svmla_f16_x (svptrue_b16 (), z0, z1, z2), ++ z0 = svmla_x (svptrue_b16 (), z0, z1, z2)) ++ ++/* ++** ptrue_mla_f16_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mla_f16_x_tied2, svfloat16_t, ++ z0 = svmla_f16_x (svptrue_b16 (), z1, z0, z2), ++ z0 = svmla_x (svptrue_b16 (), z1, z0, z2)) ++ ++/* ++** ptrue_mla_f16_x_tied3: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mla_f16_x_tied3, svfloat16_t, ++ z0 = svmla_f16_x (svptrue_b16 (), z1, z2, z0), ++ z0 = svmla_x (svptrue_b16 (), z1, z2, z0)) ++ ++/* ++** ptrue_mla_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mla_f16_x_untied, svfloat16_t, ++ z0 = svmla_f16_x (svptrue_b16 (), z1, z2, z3), ++ z0 = svmla_x (svptrue_b16 (), z1, z2, z3)) ++ ++/* ++** ptrue_mla_2_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mla_2_f16_x_tied1, svfloat16_t, ++ z0 = svmla_n_f16_x (svptrue_b16 (), z0, z1, 2), ++ z0 = svmla_x (svptrue_b16 (), z0, z1, 2)) ++ ++/* ++** ptrue_mla_2_f16_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mla_2_f16_x_tied2, svfloat16_t, ++ z0 = svmla_n_f16_x (svptrue_b16 (), z1, z0, 2), ++ z0 = svmla_x (svptrue_b16 (), z1, z0, 2)) ++ ++/* ++** ptrue_mla_2_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mla_2_f16_x_untied, svfloat16_t, ++ z0 = svmla_n_f16_x (svptrue_b16 (), z1, z2, 2), ++ z0 = svmla_x (svptrue_b16 (), z1, z2, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_f32.c +new file mode 100644 +index 000000000..1d95eb0a7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_f32.c +@@ -0,0 +1,398 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mla_f32_m_tied1: ++** fmla z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mla_f32_m_tied1, svfloat32_t, ++ z0 = svmla_f32_m (p0, z0, z1, z2), ++ z0 = svmla_m (p0, z0, z1, z2)) ++ ++/* ++** mla_f32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fmla z0\.s, p0/m, \1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mla_f32_m_tied2, svfloat32_t, ++ z0 = svmla_f32_m (p0, z1, z0, z2), ++ z0 = svmla_m (p0, z1, z0, z2)) ++ ++/* ++** mla_f32_m_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fmla z0\.s, p0/m, z2\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mla_f32_m_tied3, svfloat32_t, ++ z0 = svmla_f32_m (p0, z1, z2, z0), ++ z0 = svmla_m (p0, z1, z2, z0)) ++ ++/* ++** mla_f32_m_untied: ++** movprfx z0, z1 ++** fmla z0\.s, p0/m, z2\.s, z3\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mla_f32_m_untied, svfloat32_t, ++ z0 = svmla_f32_m (p0, z1, z2, z3), ++ z0 = svmla_m (p0, z1, z2, z3)) ++ ++/* ++** mla_s4_f32_m_tied1: ++** mov (z[0-9]+\.s), s4 ++** fmla z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mla_s4_f32_m_tied1, svfloat32_t, float, ++ z0 = svmla_n_f32_m (p0, z0, z1, d4), ++ z0 = svmla_m (p0, z0, z1, d4)) ++ ++/* ++** mla_s4_f32_m_untied: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0, z1 ++** fmla z0\.s, p0/m, z2\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mla_s4_f32_m_untied, svfloat32_t, float, ++ z0 = svmla_n_f32_m (p0, z1, z2, d4), ++ z0 = svmla_m (p0, z1, z2, d4)) ++ ++/* ++** mla_2_f32_m_tied1: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** fmla z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mla_2_f32_m_tied1, svfloat32_t, ++ z0 = svmla_n_f32_m (p0, z0, z1, 2), ++ z0 = svmla_m (p0, z0, z1, 2)) ++ ++/* ++** mla_2_f32_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** movprfx z0, z1 ++** fmla z0\.s, p0/m, z2\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mla_2_f32_m_untied, svfloat32_t, ++ z0 = svmla_n_f32_m (p0, z1, z2, 2), ++ z0 = svmla_m (p0, z1, z2, 2)) ++ ++/* ++** mla_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fmla z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mla_f32_z_tied1, svfloat32_t, ++ z0 = svmla_f32_z (p0, z0, z1, z2), ++ z0 = svmla_z (p0, z0, z1, z2)) ++ ++/* ++** mla_f32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** fmad z0\.s, p0/m, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mla_f32_z_tied2, svfloat32_t, ++ z0 = svmla_f32_z (p0, z1, z0, z2), ++ z0 = svmla_z (p0, z1, z0, z2)) ++ ++/* ++** mla_f32_z_tied3: ++** movprfx z0\.s, p0/z, z0\.s ++** fmad z0\.s, p0/m, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mla_f32_z_tied3, svfloat32_t, ++ z0 = svmla_f32_z (p0, z1, z2, z0), ++ z0 = svmla_z (p0, z1, z2, z0)) ++ ++/* ++** mla_f32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fmla z0\.s, p0/m, z2\.s, z3\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** fmad z0\.s, p0/m, z3\.s, z1\.s ++** | ++** movprfx z0\.s, p0/z, z3\.s ++** fmad z0\.s, p0/m, z2\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mla_f32_z_untied, svfloat32_t, ++ z0 = svmla_f32_z (p0, z1, z2, z3), ++ z0 = svmla_z (p0, z1, z2, z3)) ++ ++/* ++** mla_s4_f32_z_tied1: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0\.s, p0/z, z0\.s ++** fmla z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mla_s4_f32_z_tied1, svfloat32_t, float, ++ z0 = svmla_n_f32_z (p0, z0, z1, d4), ++ z0 = svmla_z (p0, z0, z1, d4)) ++ ++/* ++** mla_s4_f32_z_tied2: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0\.s, p0/z, z0\.s ++** fmad z0\.s, p0/m, \1, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZD (mla_s4_f32_z_tied2, svfloat32_t, float, ++ z0 = svmla_n_f32_z (p0, z1, z0, d4), ++ z0 = svmla_z (p0, z1, z0, d4)) ++ ++/* ++** mla_s4_f32_z_untied: ++** mov (z[0-9]+\.s), s4 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fmla z0\.s, p0/m, z2\.s, \1 ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** fmad z0\.s, p0/m, \1, z1\.s ++** | ++** movprfx z0\.s, p0/z, \1 ++** fmad z0\.s, p0/m, z2\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (mla_s4_f32_z_untied, svfloat32_t, float, ++ z0 = svmla_n_f32_z (p0, z1, z2, d4), ++ z0 = svmla_z (p0, z1, z2, d4)) ++ ++/* ++** mla_2_f32_z_tied1: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** movprfx z0\.s, p0/z, z0\.s ++** fmla z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mla_2_f32_z_tied1, svfloat32_t, ++ z0 = svmla_n_f32_z (p0, z0, z1, 2), ++ z0 = svmla_z (p0, z0, z1, 2)) ++ ++/* ++** mla_2_f32_z_tied2: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** movprfx z0\.s, p0/z, z0\.s ++** fmad z0\.s, p0/m, \1, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mla_2_f32_z_tied2, svfloat32_t, ++ z0 = svmla_n_f32_z (p0, z1, z0, 2), ++ z0 = svmla_z (p0, z1, z0, 2)) ++ ++/* ++** mla_2_f32_z_untied: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fmla z0\.s, p0/m, z2\.s, \1 ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** fmad z0\.s, p0/m, \1, z1\.s ++** | ++** movprfx z0\.s, p0/z, \1 ++** fmad z0\.s, p0/m, z2\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mla_2_f32_z_untied, svfloat32_t, ++ z0 = svmla_n_f32_z (p0, z1, z2, 2), ++ z0 = svmla_z (p0, z1, z2, 2)) ++ ++/* ++** mla_f32_x_tied1: ++** fmla z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mla_f32_x_tied1, svfloat32_t, ++ z0 = svmla_f32_x (p0, z0, z1, z2), ++ z0 = svmla_x (p0, z0, z1, z2)) ++ ++/* ++** mla_f32_x_tied2: ++** fmad z0\.s, p0/m, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mla_f32_x_tied2, svfloat32_t, ++ z0 = svmla_f32_x (p0, z1, z0, z2), ++ z0 = svmla_x (p0, z1, z0, z2)) ++ ++/* ++** mla_f32_x_tied3: ++** fmad z0\.s, p0/m, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mla_f32_x_tied3, svfloat32_t, ++ z0 = svmla_f32_x (p0, z1, z2, z0), ++ z0 = svmla_x (p0, z1, z2, z0)) ++ ++/* ++** mla_f32_x_untied: ++** ( ++** movprfx z0, z1 ++** fmla z0\.s, p0/m, z2\.s, z3\.s ++** | ++** movprfx z0, z2 ++** fmad z0\.s, p0/m, z3\.s, z1\.s ++** | ++** movprfx z0, z3 ++** fmad z0\.s, p0/m, z2\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mla_f32_x_untied, svfloat32_t, ++ z0 = svmla_f32_x (p0, z1, z2, z3), ++ z0 = svmla_x (p0, z1, z2, z3)) ++ ++/* ++** mla_s4_f32_x_tied1: ++** mov (z[0-9]+\.s), s4 ++** fmla z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mla_s4_f32_x_tied1, svfloat32_t, float, ++ z0 = svmla_n_f32_x (p0, z0, z1, d4), ++ z0 = svmla_x (p0, z0, z1, d4)) ++ ++/* ++** mla_s4_f32_x_tied2: ++** mov (z[0-9]+\.s), s4 ++** fmad z0\.s, p0/m, \1, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZD (mla_s4_f32_x_tied2, svfloat32_t, float, ++ z0 = svmla_n_f32_x (p0, z1, z0, d4), ++ z0 = svmla_x (p0, z1, z0, d4)) ++ ++/* ++** mla_s4_f32_x_untied: { xfail *-*-* } ++** mov z0\.s, s4 ++** fmad z0\.s, p0/m, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZD (mla_s4_f32_x_untied, svfloat32_t, float, ++ z0 = svmla_n_f32_x (p0, z1, z2, d4), ++ z0 = svmla_x (p0, z1, z2, d4)) ++ ++/* ++** mla_2_f32_x_tied1: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** fmla z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mla_2_f32_x_tied1, svfloat32_t, ++ z0 = svmla_n_f32_x (p0, z0, z1, 2), ++ z0 = svmla_x (p0, z0, z1, 2)) ++ ++/* ++** mla_2_f32_x_tied2: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** fmad z0\.s, p0/m, \1, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mla_2_f32_x_tied2, svfloat32_t, ++ z0 = svmla_n_f32_x (p0, z1, z0, 2), ++ z0 = svmla_x (p0, z1, z0, 2)) ++ ++/* ++** mla_2_f32_x_untied: ++** fmov z0\.s, #2\.0(?:e\+0)? ++** fmad z0\.s, p0/m, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mla_2_f32_x_untied, svfloat32_t, ++ z0 = svmla_n_f32_x (p0, z1, z2, 2), ++ z0 = svmla_x (p0, z1, z2, 2)) ++ ++/* ++** ptrue_mla_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mla_f32_x_tied1, svfloat32_t, ++ z0 = svmla_f32_x (svptrue_b32 (), z0, z1, z2), ++ z0 = svmla_x (svptrue_b32 (), z0, z1, z2)) ++ ++/* ++** ptrue_mla_f32_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mla_f32_x_tied2, svfloat32_t, ++ z0 = svmla_f32_x (svptrue_b32 (), z1, z0, z2), ++ z0 = svmla_x (svptrue_b32 (), z1, z0, z2)) ++ ++/* ++** ptrue_mla_f32_x_tied3: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mla_f32_x_tied3, svfloat32_t, ++ z0 = svmla_f32_x (svptrue_b32 (), z1, z2, z0), ++ z0 = svmla_x (svptrue_b32 (), z1, z2, z0)) ++ ++/* ++** ptrue_mla_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mla_f32_x_untied, svfloat32_t, ++ z0 = svmla_f32_x (svptrue_b32 (), z1, z2, z3), ++ z0 = svmla_x (svptrue_b32 (), z1, z2, z3)) ++ ++/* ++** ptrue_mla_2_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mla_2_f32_x_tied1, svfloat32_t, ++ z0 = svmla_n_f32_x (svptrue_b32 (), z0, z1, 2), ++ z0 = svmla_x (svptrue_b32 (), z0, z1, 2)) ++ ++/* ++** ptrue_mla_2_f32_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mla_2_f32_x_tied2, svfloat32_t, ++ z0 = svmla_n_f32_x (svptrue_b32 (), z1, z0, 2), ++ z0 = svmla_x (svptrue_b32 (), z1, z0, 2)) ++ ++/* ++** ptrue_mla_2_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mla_2_f32_x_untied, svfloat32_t, ++ z0 = svmla_n_f32_x (svptrue_b32 (), z1, z2, 2), ++ z0 = svmla_x (svptrue_b32 (), z1, z2, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_f64.c +new file mode 100644 +index 000000000..74fd29267 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_f64.c +@@ -0,0 +1,398 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mla_f64_m_tied1: ++** fmla z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mla_f64_m_tied1, svfloat64_t, ++ z0 = svmla_f64_m (p0, z0, z1, z2), ++ z0 = svmla_m (p0, z0, z1, z2)) ++ ++/* ++** mla_f64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fmla z0\.d, p0/m, \1, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mla_f64_m_tied2, svfloat64_t, ++ z0 = svmla_f64_m (p0, z1, z0, z2), ++ z0 = svmla_m (p0, z1, z0, z2)) ++ ++/* ++** mla_f64_m_tied3: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fmla z0\.d, p0/m, z2\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mla_f64_m_tied3, svfloat64_t, ++ z0 = svmla_f64_m (p0, z1, z2, z0), ++ z0 = svmla_m (p0, z1, z2, z0)) ++ ++/* ++** mla_f64_m_untied: ++** movprfx z0, z1 ++** fmla z0\.d, p0/m, z2\.d, z3\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mla_f64_m_untied, svfloat64_t, ++ z0 = svmla_f64_m (p0, z1, z2, z3), ++ z0 = svmla_m (p0, z1, z2, z3)) ++ ++/* ++** mla_d4_f64_m_tied1: ++** mov (z[0-9]+\.d), d4 ++** fmla z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mla_d4_f64_m_tied1, svfloat64_t, double, ++ z0 = svmla_n_f64_m (p0, z0, z1, d4), ++ z0 = svmla_m (p0, z0, z1, d4)) ++ ++/* ++** mla_d4_f64_m_untied: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0, z1 ++** fmla z0\.d, p0/m, z2\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mla_d4_f64_m_untied, svfloat64_t, double, ++ z0 = svmla_n_f64_m (p0, z1, z2, d4), ++ z0 = svmla_m (p0, z1, z2, d4)) ++ ++/* ++** mla_2_f64_m_tied1: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** fmla z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mla_2_f64_m_tied1, svfloat64_t, ++ z0 = svmla_n_f64_m (p0, z0, z1, 2), ++ z0 = svmla_m (p0, z0, z1, 2)) ++ ++/* ++** mla_2_f64_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** movprfx z0, z1 ++** fmla z0\.d, p0/m, z2\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mla_2_f64_m_untied, svfloat64_t, ++ z0 = svmla_n_f64_m (p0, z1, z2, 2), ++ z0 = svmla_m (p0, z1, z2, 2)) ++ ++/* ++** mla_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fmla z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mla_f64_z_tied1, svfloat64_t, ++ z0 = svmla_f64_z (p0, z0, z1, z2), ++ z0 = svmla_z (p0, z0, z1, z2)) ++ ++/* ++** mla_f64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** fmad z0\.d, p0/m, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mla_f64_z_tied2, svfloat64_t, ++ z0 = svmla_f64_z (p0, z1, z0, z2), ++ z0 = svmla_z (p0, z1, z0, z2)) ++ ++/* ++** mla_f64_z_tied3: ++** movprfx z0\.d, p0/z, z0\.d ++** fmad z0\.d, p0/m, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mla_f64_z_tied3, svfloat64_t, ++ z0 = svmla_f64_z (p0, z1, z2, z0), ++ z0 = svmla_z (p0, z1, z2, z0)) ++ ++/* ++** mla_f64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fmla z0\.d, p0/m, z2\.d, z3\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** fmad z0\.d, p0/m, z3\.d, z1\.d ++** | ++** movprfx z0\.d, p0/z, z3\.d ++** fmad z0\.d, p0/m, z2\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mla_f64_z_untied, svfloat64_t, ++ z0 = svmla_f64_z (p0, z1, z2, z3), ++ z0 = svmla_z (p0, z1, z2, z3)) ++ ++/* ++** mla_d4_f64_z_tied1: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0\.d, p0/z, z0\.d ++** fmla z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mla_d4_f64_z_tied1, svfloat64_t, double, ++ z0 = svmla_n_f64_z (p0, z0, z1, d4), ++ z0 = svmla_z (p0, z0, z1, d4)) ++ ++/* ++** mla_d4_f64_z_tied2: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0\.d, p0/z, z0\.d ++** fmad z0\.d, p0/m, \1, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZD (mla_d4_f64_z_tied2, svfloat64_t, double, ++ z0 = svmla_n_f64_z (p0, z1, z0, d4), ++ z0 = svmla_z (p0, z1, z0, d4)) ++ ++/* ++** mla_d4_f64_z_untied: ++** mov (z[0-9]+\.d), d4 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fmla z0\.d, p0/m, z2\.d, \1 ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** fmad z0\.d, p0/m, \1, z1\.d ++** | ++** movprfx z0\.d, p0/z, \1 ++** fmad z0\.d, p0/m, z2\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (mla_d4_f64_z_untied, svfloat64_t, double, ++ z0 = svmla_n_f64_z (p0, z1, z2, d4), ++ z0 = svmla_z (p0, z1, z2, d4)) ++ ++/* ++** mla_2_f64_z_tied1: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** movprfx z0\.d, p0/z, z0\.d ++** fmla z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mla_2_f64_z_tied1, svfloat64_t, ++ z0 = svmla_n_f64_z (p0, z0, z1, 2), ++ z0 = svmla_z (p0, z0, z1, 2)) ++ ++/* ++** mla_2_f64_z_tied2: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** movprfx z0\.d, p0/z, z0\.d ++** fmad z0\.d, p0/m, \1, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mla_2_f64_z_tied2, svfloat64_t, ++ z0 = svmla_n_f64_z (p0, z1, z0, 2), ++ z0 = svmla_z (p0, z1, z0, 2)) ++ ++/* ++** mla_2_f64_z_untied: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fmla z0\.d, p0/m, z2\.d, \1 ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** fmad z0\.d, p0/m, \1, z1\.d ++** | ++** movprfx z0\.d, p0/z, \1 ++** fmad z0\.d, p0/m, z2\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mla_2_f64_z_untied, svfloat64_t, ++ z0 = svmla_n_f64_z (p0, z1, z2, 2), ++ z0 = svmla_z (p0, z1, z2, 2)) ++ ++/* ++** mla_f64_x_tied1: ++** fmla z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mla_f64_x_tied1, svfloat64_t, ++ z0 = svmla_f64_x (p0, z0, z1, z2), ++ z0 = svmla_x (p0, z0, z1, z2)) ++ ++/* ++** mla_f64_x_tied2: ++** fmad z0\.d, p0/m, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mla_f64_x_tied2, svfloat64_t, ++ z0 = svmla_f64_x (p0, z1, z0, z2), ++ z0 = svmla_x (p0, z1, z0, z2)) ++ ++/* ++** mla_f64_x_tied3: ++** fmad z0\.d, p0/m, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mla_f64_x_tied3, svfloat64_t, ++ z0 = svmla_f64_x (p0, z1, z2, z0), ++ z0 = svmla_x (p0, z1, z2, z0)) ++ ++/* ++** mla_f64_x_untied: ++** ( ++** movprfx z0, z1 ++** fmla z0\.d, p0/m, z2\.d, z3\.d ++** | ++** movprfx z0, z2 ++** fmad z0\.d, p0/m, z3\.d, z1\.d ++** | ++** movprfx z0, z3 ++** fmad z0\.d, p0/m, z2\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mla_f64_x_untied, svfloat64_t, ++ z0 = svmla_f64_x (p0, z1, z2, z3), ++ z0 = svmla_x (p0, z1, z2, z3)) ++ ++/* ++** mla_d4_f64_x_tied1: ++** mov (z[0-9]+\.d), d4 ++** fmla z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mla_d4_f64_x_tied1, svfloat64_t, double, ++ z0 = svmla_n_f64_x (p0, z0, z1, d4), ++ z0 = svmla_x (p0, z0, z1, d4)) ++ ++/* ++** mla_d4_f64_x_tied2: ++** mov (z[0-9]+\.d), d4 ++** fmad z0\.d, p0/m, \1, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZD (mla_d4_f64_x_tied2, svfloat64_t, double, ++ z0 = svmla_n_f64_x (p0, z1, z0, d4), ++ z0 = svmla_x (p0, z1, z0, d4)) ++ ++/* ++** mla_d4_f64_x_untied: { xfail *-*-* } ++** mov z0\.d, d4 ++** fmad z0\.d, p0/m, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZD (mla_d4_f64_x_untied, svfloat64_t, double, ++ z0 = svmla_n_f64_x (p0, z1, z2, d4), ++ z0 = svmla_x (p0, z1, z2, d4)) ++ ++/* ++** mla_2_f64_x_tied1: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** fmla z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mla_2_f64_x_tied1, svfloat64_t, ++ z0 = svmla_n_f64_x (p0, z0, z1, 2), ++ z0 = svmla_x (p0, z0, z1, 2)) ++ ++/* ++** mla_2_f64_x_tied2: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** fmad z0\.d, p0/m, \1, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mla_2_f64_x_tied2, svfloat64_t, ++ z0 = svmla_n_f64_x (p0, z1, z0, 2), ++ z0 = svmla_x (p0, z1, z0, 2)) ++ ++/* ++** mla_2_f64_x_untied: ++** fmov z0\.d, #2\.0(?:e\+0)? ++** fmad z0\.d, p0/m, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mla_2_f64_x_untied, svfloat64_t, ++ z0 = svmla_n_f64_x (p0, z1, z2, 2), ++ z0 = svmla_x (p0, z1, z2, 2)) ++ ++/* ++** ptrue_mla_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mla_f64_x_tied1, svfloat64_t, ++ z0 = svmla_f64_x (svptrue_b64 (), z0, z1, z2), ++ z0 = svmla_x (svptrue_b64 (), z0, z1, z2)) ++ ++/* ++** ptrue_mla_f64_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mla_f64_x_tied2, svfloat64_t, ++ z0 = svmla_f64_x (svptrue_b64 (), z1, z0, z2), ++ z0 = svmla_x (svptrue_b64 (), z1, z0, z2)) ++ ++/* ++** ptrue_mla_f64_x_tied3: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mla_f64_x_tied3, svfloat64_t, ++ z0 = svmla_f64_x (svptrue_b64 (), z1, z2, z0), ++ z0 = svmla_x (svptrue_b64 (), z1, z2, z0)) ++ ++/* ++** ptrue_mla_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mla_f64_x_untied, svfloat64_t, ++ z0 = svmla_f64_x (svptrue_b64 (), z1, z2, z3), ++ z0 = svmla_x (svptrue_b64 (), z1, z2, z3)) ++ ++/* ++** ptrue_mla_2_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mla_2_f64_x_tied1, svfloat64_t, ++ z0 = svmla_n_f64_x (svptrue_b64 (), z0, z1, 2), ++ z0 = svmla_x (svptrue_b64 (), z0, z1, 2)) ++ ++/* ++** ptrue_mla_2_f64_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mla_2_f64_x_tied2, svfloat64_t, ++ z0 = svmla_n_f64_x (svptrue_b64 (), z1, z0, 2), ++ z0 = svmla_x (svptrue_b64 (), z1, z0, 2)) ++ ++/* ++** ptrue_mla_2_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mla_2_f64_x_untied, svfloat64_t, ++ z0 = svmla_n_f64_x (svptrue_b64 (), z1, z2, 2), ++ z0 = svmla_x (svptrue_b64 (), z1, z2, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_lane_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_lane_f16.c +new file mode 100644 +index 000000000..949e3bb47 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_lane_f16.c +@@ -0,0 +1,128 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mla_lane_0_f16_tied1: ++** fmla z0\.h, z1\.h, z2\.h\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (mla_lane_0_f16_tied1, svfloat16_t, ++ z0 = svmla_lane_f16 (z0, z1, z2, 0), ++ z0 = svmla_lane (z0, z1, z2, 0)) ++ ++/* ++** mla_lane_0_f16_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fmla z0\.h, \1\.h, z2\.h\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (mla_lane_0_f16_tied2, svfloat16_t, ++ z0 = svmla_lane_f16 (z1, z0, z2, 0), ++ z0 = svmla_lane (z1, z0, z2, 0)) ++ ++/* ++** mla_lane_0_f16_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fmla z0\.h, z2\.h, \1\.h\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (mla_lane_0_f16_tied3, svfloat16_t, ++ z0 = svmla_lane_f16 (z1, z2, z0, 0), ++ z0 = svmla_lane (z1, z2, z0, 0)) ++ ++/* ++** mla_lane_0_f16_untied: ++** movprfx z0, z1 ++** fmla z0\.h, z2\.h, z3\.h\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (mla_lane_0_f16_untied, svfloat16_t, ++ z0 = svmla_lane_f16 (z1, z2, z3, 0), ++ z0 = svmla_lane (z1, z2, z3, 0)) ++ ++/* ++** mla_lane_1_f16: ++** fmla z0\.h, z1\.h, z2\.h\[1\] ++** ret ++*/ ++TEST_UNIFORM_Z (mla_lane_1_f16, svfloat16_t, ++ z0 = svmla_lane_f16 (z0, z1, z2, 1), ++ z0 = svmla_lane (z0, z1, z2, 1)) ++ ++/* ++** mla_lane_2_f16: ++** fmla z0\.h, z1\.h, z2\.h\[2\] ++** ret ++*/ ++TEST_UNIFORM_Z (mla_lane_2_f16, svfloat16_t, ++ z0 = svmla_lane_f16 (z0, z1, z2, 2), ++ z0 = svmla_lane (z0, z1, z2, 2)) ++ ++/* ++** mla_lane_3_f16: ++** fmla z0\.h, z1\.h, z2\.h\[3\] ++** ret ++*/ ++TEST_UNIFORM_Z (mla_lane_3_f16, svfloat16_t, ++ z0 = svmla_lane_f16 (z0, z1, z2, 3), ++ z0 = svmla_lane (z0, z1, z2, 3)) ++ ++/* ++** mla_lane_4_f16: ++** fmla z0\.h, z1\.h, z2\.h\[4\] ++** ret ++*/ ++TEST_UNIFORM_Z (mla_lane_4_f16, svfloat16_t, ++ z0 = svmla_lane_f16 (z0, z1, z2, 4), ++ z0 = svmla_lane (z0, z1, z2, 4)) ++ ++/* ++** mla_lane_5_f16: ++** fmla z0\.h, z1\.h, z2\.h\[5\] ++** ret ++*/ ++TEST_UNIFORM_Z (mla_lane_5_f16, svfloat16_t, ++ z0 = svmla_lane_f16 (z0, z1, z2, 5), ++ z0 = svmla_lane (z0, z1, z2, 5)) ++ ++/* ++** mla_lane_6_f16: ++** fmla z0\.h, z1\.h, z2\.h\[6\] ++** ret ++*/ ++TEST_UNIFORM_Z (mla_lane_6_f16, svfloat16_t, ++ z0 = svmla_lane_f16 (z0, z1, z2, 6), ++ z0 = svmla_lane (z0, z1, z2, 6)) ++ ++/* ++** mla_lane_7_f16: ++** fmla z0\.h, z1\.h, z2\.h\[7\] ++** ret ++*/ ++TEST_UNIFORM_Z (mla_lane_7_f16, svfloat16_t, ++ z0 = svmla_lane_f16 (z0, z1, z2, 7), ++ z0 = svmla_lane (z0, z1, z2, 7)) ++ ++/* ++** mla_lane_z7_f16: ++** fmla z0\.h, z1\.h, z7\.h\[7\] ++** ret ++*/ ++TEST_DUAL_Z (mla_lane_z7_f16, svfloat16_t, svfloat16_t, ++ z0 = svmla_lane_f16 (z0, z1, z7, 7), ++ z0 = svmla_lane (z0, z1, z7, 7)) ++ ++/* ++** mla_lane_z8_f16: ++** str d8, \[sp, -16\]! ++** mov (z[0-7])\.d, z8\.d ++** fmla z0\.h, z1\.h, \1\.h\[7\] ++** ldr d8, \[sp\], 16 ++** ret ++*/ ++TEST_DUAL_LANE_REG (mla_lane_z8_f16, svfloat16_t, svfloat16_t, z8, ++ z0 = svmla_lane_f16 (z0, z1, z8, 7), ++ z0 = svmla_lane (z0, z1, z8, 7)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_lane_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_lane_f32.c +new file mode 100644 +index 000000000..d376532d6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_lane_f32.c +@@ -0,0 +1,92 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mla_lane_0_f32_tied1: ++** fmla z0\.s, z1\.s, z2\.s\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (mla_lane_0_f32_tied1, svfloat32_t, ++ z0 = svmla_lane_f32 (z0, z1, z2, 0), ++ z0 = svmla_lane (z0, z1, z2, 0)) ++ ++/* ++** mla_lane_0_f32_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fmla z0\.s, \1\.s, z2\.s\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (mla_lane_0_f32_tied2, svfloat32_t, ++ z0 = svmla_lane_f32 (z1, z0, z2, 0), ++ z0 = svmla_lane (z1, z0, z2, 0)) ++ ++/* ++** mla_lane_0_f32_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fmla z0\.s, z2\.s, \1\.s\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (mla_lane_0_f32_tied3, svfloat32_t, ++ z0 = svmla_lane_f32 (z1, z2, z0, 0), ++ z0 = svmla_lane (z1, z2, z0, 0)) ++ ++/* ++** mla_lane_0_f32_untied: ++** movprfx z0, z1 ++** fmla z0\.s, z2\.s, z3\.s\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (mla_lane_0_f32_untied, svfloat32_t, ++ z0 = svmla_lane_f32 (z1, z2, z3, 0), ++ z0 = svmla_lane (z1, z2, z3, 0)) ++ ++/* ++** mla_lane_1_f32: ++** fmla z0\.s, z1\.s, z2\.s\[1\] ++** ret ++*/ ++TEST_UNIFORM_Z (mla_lane_1_f32, svfloat32_t, ++ z0 = svmla_lane_f32 (z0, z1, z2, 1), ++ z0 = svmla_lane (z0, z1, z2, 1)) ++ ++/* ++** mla_lane_2_f32: ++** fmla z0\.s, z1\.s, z2\.s\[2\] ++** ret ++*/ ++TEST_UNIFORM_Z (mla_lane_2_f32, svfloat32_t, ++ z0 = svmla_lane_f32 (z0, z1, z2, 2), ++ z0 = svmla_lane (z0, z1, z2, 2)) ++ ++/* ++** mla_lane_3_f32: ++** fmla z0\.s, z1\.s, z2\.s\[3\] ++** ret ++*/ ++TEST_UNIFORM_Z (mla_lane_3_f32, svfloat32_t, ++ z0 = svmla_lane_f32 (z0, z1, z2, 3), ++ z0 = svmla_lane (z0, z1, z2, 3)) ++ ++/* ++** mla_lane_z7_f32: ++** fmla z0\.s, z1\.s, z7\.s\[3\] ++** ret ++*/ ++TEST_DUAL_Z (mla_lane_z7_f32, svfloat32_t, svfloat32_t, ++ z0 = svmla_lane_f32 (z0, z1, z7, 3), ++ z0 = svmla_lane (z0, z1, z7, 3)) ++ ++/* ++** mla_lane_z8_f32: ++** str d8, \[sp, -16\]! ++** mov (z[0-7])\.d, z8\.d ++** fmla z0\.s, z1\.s, \1\.s\[3\] ++** ldr d8, \[sp\], 16 ++** ret ++*/ ++TEST_DUAL_LANE_REG (mla_lane_z8_f32, svfloat32_t, svfloat32_t, z8, ++ z0 = svmla_lane_f32 (z0, z1, z8, 3), ++ z0 = svmla_lane (z0, z1, z8, 3)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_lane_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_lane_f64.c +new file mode 100644 +index 000000000..7c58a8a57 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_lane_f64.c +@@ -0,0 +1,83 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mla_lane_0_f64_tied1: ++** fmla z0\.d, z1\.d, z2\.d\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (mla_lane_0_f64_tied1, svfloat64_t, ++ z0 = svmla_lane_f64 (z0, z1, z2, 0), ++ z0 = svmla_lane (z0, z1, z2, 0)) ++ ++/* ++** mla_lane_0_f64_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fmla z0\.d, \1, z2\.d\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (mla_lane_0_f64_tied2, svfloat64_t, ++ z0 = svmla_lane_f64 (z1, z0, z2, 0), ++ z0 = svmla_lane (z1, z0, z2, 0)) ++ ++/* ++** mla_lane_0_f64_tied3: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fmla z0\.d, z2\.d, \1\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (mla_lane_0_f64_tied3, svfloat64_t, ++ z0 = svmla_lane_f64 (z1, z2, z0, 0), ++ z0 = svmla_lane (z1, z2, z0, 0)) ++ ++/* ++** mla_lane_0_f64_untied: ++** movprfx z0, z1 ++** fmla z0\.d, z2\.d, z3\.d\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (mla_lane_0_f64_untied, svfloat64_t, ++ z0 = svmla_lane_f64 (z1, z2, z3, 0), ++ z0 = svmla_lane (z1, z2, z3, 0)) ++ ++/* ++** mla_lane_1_f64: ++** fmla z0\.d, z1\.d, z2\.d\[1\] ++** ret ++*/ ++TEST_UNIFORM_Z (mla_lane_1_f64, svfloat64_t, ++ z0 = svmla_lane_f64 (z0, z1, z2, 1), ++ z0 = svmla_lane (z0, z1, z2, 1)) ++ ++/* ++** mla_lane_z7_f64: ++** fmla z0\.d, z1\.d, z7\.d\[1\] ++** ret ++*/ ++TEST_DUAL_Z (mla_lane_z7_f64, svfloat64_t, svfloat64_t, ++ z0 = svmla_lane_f64 (z0, z1, z7, 1), ++ z0 = svmla_lane (z0, z1, z7, 1)) ++ ++/* ++** mla_lane_z15_f64: ++** str d15, \[sp, -16\]! ++** fmla z0\.d, z1\.d, z15\.d\[1\] ++** ldr d15, \[sp\], 16 ++** ret ++*/ ++TEST_DUAL_LANE_REG (mla_lane_z15_f64, svfloat64_t, svfloat64_t, z15, ++ z0 = svmla_lane_f64 (z0, z1, z15, 1), ++ z0 = svmla_lane (z0, z1, z15, 1)) ++ ++/* ++** mla_lane_z16_f64: ++** mov (z[0-9]|z1[0-5])\.d, z16\.d ++** fmla z0\.d, z1\.d, \1\.d\[1\] ++** ret ++*/ ++TEST_DUAL_LANE_REG (mla_lane_z16_f64, svfloat64_t, svfloat64_t, z16, ++ z0 = svmla_lane_f64 (z0, z1, z16, 1), ++ z0 = svmla_lane (z0, z1, z16, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_s16.c +new file mode 100644 +index 000000000..f3ed191db +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_s16.c +@@ -0,0 +1,321 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mla_s16_m_tied1: ++** mla z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mla_s16_m_tied1, svint16_t, ++ z0 = svmla_s16_m (p0, z0, z1, z2), ++ z0 = svmla_m (p0, z0, z1, z2)) ++ ++/* ++** mla_s16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** mla z0\.h, p0/m, \1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mla_s16_m_tied2, svint16_t, ++ z0 = svmla_s16_m (p0, z1, z0, z2), ++ z0 = svmla_m (p0, z1, z0, z2)) ++ ++/* ++** mla_s16_m_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** mla z0\.h, p0/m, z2\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mla_s16_m_tied3, svint16_t, ++ z0 = svmla_s16_m (p0, z1, z2, z0), ++ z0 = svmla_m (p0, z1, z2, z0)) ++ ++/* ++** mla_s16_m_untied: ++** movprfx z0, z1 ++** mla z0\.h, p0/m, z2\.h, z3\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mla_s16_m_untied, svint16_t, ++ z0 = svmla_s16_m (p0, z1, z2, z3), ++ z0 = svmla_m (p0, z1, z2, z3)) ++ ++/* ++** mla_w0_s16_m_tied1: ++** mov (z[0-9]+\.h), w0 ++** mla z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_w0_s16_m_tied1, svint16_t, int16_t, ++ z0 = svmla_n_s16_m (p0, z0, z1, x0), ++ z0 = svmla_m (p0, z0, z1, x0)) ++ ++/* ++** mla_w0_s16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), w0 ++** movprfx z0, z1 ++** mla z0\.h, p0/m, z2\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_w0_s16_m_untied, svint16_t, int16_t, ++ z0 = svmla_n_s16_m (p0, z1, z2, x0), ++ z0 = svmla_m (p0, z1, z2, x0)) ++ ++/* ++** mla_11_s16_m_tied1: ++** mov (z[0-9]+\.h), #11 ++** mla z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_s16_m_tied1, svint16_t, ++ z0 = svmla_n_s16_m (p0, z0, z1, 11), ++ z0 = svmla_m (p0, z0, z1, 11)) ++ ++/* ++** mla_11_s16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), #11 ++** movprfx z0, z1 ++** mla z0\.h, p0/m, z2\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_s16_m_untied, svint16_t, ++ z0 = svmla_n_s16_m (p0, z1, z2, 11), ++ z0 = svmla_m (p0, z1, z2, 11)) ++ ++/* ++** mla_s16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** mla z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mla_s16_z_tied1, svint16_t, ++ z0 = svmla_s16_z (p0, z0, z1, z2), ++ z0 = svmla_z (p0, z0, z1, z2)) ++ ++/* ++** mla_s16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** mad z0\.h, p0/m, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mla_s16_z_tied2, svint16_t, ++ z0 = svmla_s16_z (p0, z1, z0, z2), ++ z0 = svmla_z (p0, z1, z0, z2)) ++ ++/* ++** mla_s16_z_tied3: ++** movprfx z0\.h, p0/z, z0\.h ++** mad z0\.h, p0/m, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mla_s16_z_tied3, svint16_t, ++ z0 = svmla_s16_z (p0, z1, z2, z0), ++ z0 = svmla_z (p0, z1, z2, z0)) ++ ++/* ++** mla_s16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** mla z0\.h, p0/m, z2\.h, z3\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** mad z0\.h, p0/m, z3\.h, z1\.h ++** | ++** movprfx z0\.h, p0/z, z3\.h ++** mad z0\.h, p0/m, z2\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mla_s16_z_untied, svint16_t, ++ z0 = svmla_s16_z (p0, z1, z2, z3), ++ z0 = svmla_z (p0, z1, z2, z3)) ++ ++/* ++** mla_w0_s16_z_tied1: ++** mov (z[0-9]+\.h), w0 ++** movprfx z0\.h, p0/z, z0\.h ++** mla z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_w0_s16_z_tied1, svint16_t, int16_t, ++ z0 = svmla_n_s16_z (p0, z0, z1, x0), ++ z0 = svmla_z (p0, z0, z1, x0)) ++ ++/* ++** mla_w0_s16_z_tied2: ++** mov (z[0-9]+\.h), w0 ++** movprfx z0\.h, p0/z, z0\.h ++** mad z0\.h, p0/m, \1, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_w0_s16_z_tied2, svint16_t, int16_t, ++ z0 = svmla_n_s16_z (p0, z1, z0, x0), ++ z0 = svmla_z (p0, z1, z0, x0)) ++ ++/* ++** mla_w0_s16_z_untied: ++** mov (z[0-9]+\.h), w0 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** mla z0\.h, p0/m, z2\.h, \1 ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** mad z0\.h, p0/m, \1, z1\.h ++** | ++** movprfx z0\.h, p0/z, \1 ++** mad z0\.h, p0/m, z2\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_w0_s16_z_untied, svint16_t, int16_t, ++ z0 = svmla_n_s16_z (p0, z1, z2, x0), ++ z0 = svmla_z (p0, z1, z2, x0)) ++ ++/* ++** mla_11_s16_z_tied1: ++** mov (z[0-9]+\.h), #11 ++** movprfx z0\.h, p0/z, z0\.h ++** mla z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_s16_z_tied1, svint16_t, ++ z0 = svmla_n_s16_z (p0, z0, z1, 11), ++ z0 = svmla_z (p0, z0, z1, 11)) ++ ++/* ++** mla_11_s16_z_tied2: ++** mov (z[0-9]+\.h), #11 ++** movprfx z0\.h, p0/z, z0\.h ++** mad z0\.h, p0/m, \1, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_s16_z_tied2, svint16_t, ++ z0 = svmla_n_s16_z (p0, z1, z0, 11), ++ z0 = svmla_z (p0, z1, z0, 11)) ++ ++/* ++** mla_11_s16_z_untied: ++** mov (z[0-9]+\.h), #11 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** mla z0\.h, p0/m, z2\.h, \1 ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** mad z0\.h, p0/m, \1, z1\.h ++** | ++** movprfx z0\.h, p0/z, \1 ++** mad z0\.h, p0/m, z2\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_s16_z_untied, svint16_t, ++ z0 = svmla_n_s16_z (p0, z1, z2, 11), ++ z0 = svmla_z (p0, z1, z2, 11)) ++ ++/* ++** mla_s16_x_tied1: ++** mla z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mla_s16_x_tied1, svint16_t, ++ z0 = svmla_s16_x (p0, z0, z1, z2), ++ z0 = svmla_x (p0, z0, z1, z2)) ++ ++/* ++** mla_s16_x_tied2: ++** mad z0\.h, p0/m, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mla_s16_x_tied2, svint16_t, ++ z0 = svmla_s16_x (p0, z1, z0, z2), ++ z0 = svmla_x (p0, z1, z0, z2)) ++ ++/* ++** mla_s16_x_tied3: ++** mad z0\.h, p0/m, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mla_s16_x_tied3, svint16_t, ++ z0 = svmla_s16_x (p0, z1, z2, z0), ++ z0 = svmla_x (p0, z1, z2, z0)) ++ ++/* ++** mla_s16_x_untied: ++** ( ++** movprfx z0, z1 ++** mla z0\.h, p0/m, z2\.h, z3\.h ++** | ++** movprfx z0, z2 ++** mad z0\.h, p0/m, z3\.h, z1\.h ++** | ++** movprfx z0, z3 ++** mad z0\.h, p0/m, z2\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mla_s16_x_untied, svint16_t, ++ z0 = svmla_s16_x (p0, z1, z2, z3), ++ z0 = svmla_x (p0, z1, z2, z3)) ++ ++/* ++** mla_w0_s16_x_tied1: ++** mov (z[0-9]+\.h), w0 ++** mla z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_w0_s16_x_tied1, svint16_t, int16_t, ++ z0 = svmla_n_s16_x (p0, z0, z1, x0), ++ z0 = svmla_x (p0, z0, z1, x0)) ++ ++/* ++** mla_w0_s16_x_tied2: ++** mov (z[0-9]+\.h), w0 ++** mad z0\.h, p0/m, \1, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_w0_s16_x_tied2, svint16_t, int16_t, ++ z0 = svmla_n_s16_x (p0, z1, z0, x0), ++ z0 = svmla_x (p0, z1, z0, x0)) ++ ++/* ++** mla_w0_s16_x_untied: ++** mov z0\.h, w0 ++** mad z0\.h, p0/m, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_w0_s16_x_untied, svint16_t, int16_t, ++ z0 = svmla_n_s16_x (p0, z1, z2, x0), ++ z0 = svmla_x (p0, z1, z2, x0)) ++ ++/* ++** mla_11_s16_x_tied1: ++** mov (z[0-9]+\.h), #11 ++** mla z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_s16_x_tied1, svint16_t, ++ z0 = svmla_n_s16_x (p0, z0, z1, 11), ++ z0 = svmla_x (p0, z0, z1, 11)) ++ ++/* ++** mla_11_s16_x_tied2: ++** mov (z[0-9]+\.h), #11 ++** mad z0\.h, p0/m, \1, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_s16_x_tied2, svint16_t, ++ z0 = svmla_n_s16_x (p0, z1, z0, 11), ++ z0 = svmla_x (p0, z1, z0, 11)) ++ ++/* ++** mla_11_s16_x_untied: ++** mov z0\.h, #11 ++** mad z0\.h, p0/m, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_s16_x_untied, svint16_t, ++ z0 = svmla_n_s16_x (p0, z1, z2, 11), ++ z0 = svmla_x (p0, z1, z2, 11)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_s32.c +new file mode 100644 +index 000000000..5e8001a71 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_s32.c +@@ -0,0 +1,321 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mla_s32_m_tied1: ++** mla z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mla_s32_m_tied1, svint32_t, ++ z0 = svmla_s32_m (p0, z0, z1, z2), ++ z0 = svmla_m (p0, z0, z1, z2)) ++ ++/* ++** mla_s32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** mla z0\.s, p0/m, \1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mla_s32_m_tied2, svint32_t, ++ z0 = svmla_s32_m (p0, z1, z0, z2), ++ z0 = svmla_m (p0, z1, z0, z2)) ++ ++/* ++** mla_s32_m_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** mla z0\.s, p0/m, z2\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mla_s32_m_tied3, svint32_t, ++ z0 = svmla_s32_m (p0, z1, z2, z0), ++ z0 = svmla_m (p0, z1, z2, z0)) ++ ++/* ++** mla_s32_m_untied: ++** movprfx z0, z1 ++** mla z0\.s, p0/m, z2\.s, z3\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mla_s32_m_untied, svint32_t, ++ z0 = svmla_s32_m (p0, z1, z2, z3), ++ z0 = svmla_m (p0, z1, z2, z3)) ++ ++/* ++** mla_w0_s32_m_tied1: ++** mov (z[0-9]+\.s), w0 ++** mla z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_w0_s32_m_tied1, svint32_t, int32_t, ++ z0 = svmla_n_s32_m (p0, z0, z1, x0), ++ z0 = svmla_m (p0, z0, z1, x0)) ++ ++/* ++** mla_w0_s32_m_untied: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0, z1 ++** mla z0\.s, p0/m, z2\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_w0_s32_m_untied, svint32_t, int32_t, ++ z0 = svmla_n_s32_m (p0, z1, z2, x0), ++ z0 = svmla_m (p0, z1, z2, x0)) ++ ++/* ++** mla_11_s32_m_tied1: ++** mov (z[0-9]+\.s), #11 ++** mla z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_s32_m_tied1, svint32_t, ++ z0 = svmla_n_s32_m (p0, z0, z1, 11), ++ z0 = svmla_m (p0, z0, z1, 11)) ++ ++/* ++** mla_11_s32_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.s), #11 ++** movprfx z0, z1 ++** mla z0\.s, p0/m, z2\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_s32_m_untied, svint32_t, ++ z0 = svmla_n_s32_m (p0, z1, z2, 11), ++ z0 = svmla_m (p0, z1, z2, 11)) ++ ++/* ++** mla_s32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** mla z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mla_s32_z_tied1, svint32_t, ++ z0 = svmla_s32_z (p0, z0, z1, z2), ++ z0 = svmla_z (p0, z0, z1, z2)) ++ ++/* ++** mla_s32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** mad z0\.s, p0/m, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mla_s32_z_tied2, svint32_t, ++ z0 = svmla_s32_z (p0, z1, z0, z2), ++ z0 = svmla_z (p0, z1, z0, z2)) ++ ++/* ++** mla_s32_z_tied3: ++** movprfx z0\.s, p0/z, z0\.s ++** mad z0\.s, p0/m, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mla_s32_z_tied3, svint32_t, ++ z0 = svmla_s32_z (p0, z1, z2, z0), ++ z0 = svmla_z (p0, z1, z2, z0)) ++ ++/* ++** mla_s32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** mla z0\.s, p0/m, z2\.s, z3\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** mad z0\.s, p0/m, z3\.s, z1\.s ++** | ++** movprfx z0\.s, p0/z, z3\.s ++** mad z0\.s, p0/m, z2\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mla_s32_z_untied, svint32_t, ++ z0 = svmla_s32_z (p0, z1, z2, z3), ++ z0 = svmla_z (p0, z1, z2, z3)) ++ ++/* ++** mla_w0_s32_z_tied1: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0\.s, p0/z, z0\.s ++** mla z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_w0_s32_z_tied1, svint32_t, int32_t, ++ z0 = svmla_n_s32_z (p0, z0, z1, x0), ++ z0 = svmla_z (p0, z0, z1, x0)) ++ ++/* ++** mla_w0_s32_z_tied2: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0\.s, p0/z, z0\.s ++** mad z0\.s, p0/m, \1, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_w0_s32_z_tied2, svint32_t, int32_t, ++ z0 = svmla_n_s32_z (p0, z1, z0, x0), ++ z0 = svmla_z (p0, z1, z0, x0)) ++ ++/* ++** mla_w0_s32_z_untied: ++** mov (z[0-9]+\.s), w0 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** mla z0\.s, p0/m, z2\.s, \1 ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** mad z0\.s, p0/m, \1, z1\.s ++** | ++** movprfx z0\.s, p0/z, \1 ++** mad z0\.s, p0/m, z2\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_w0_s32_z_untied, svint32_t, int32_t, ++ z0 = svmla_n_s32_z (p0, z1, z2, x0), ++ z0 = svmla_z (p0, z1, z2, x0)) ++ ++/* ++** mla_11_s32_z_tied1: ++** mov (z[0-9]+\.s), #11 ++** movprfx z0\.s, p0/z, z0\.s ++** mla z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_s32_z_tied1, svint32_t, ++ z0 = svmla_n_s32_z (p0, z0, z1, 11), ++ z0 = svmla_z (p0, z0, z1, 11)) ++ ++/* ++** mla_11_s32_z_tied2: ++** mov (z[0-9]+\.s), #11 ++** movprfx z0\.s, p0/z, z0\.s ++** mad z0\.s, p0/m, \1, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_s32_z_tied2, svint32_t, ++ z0 = svmla_n_s32_z (p0, z1, z0, 11), ++ z0 = svmla_z (p0, z1, z0, 11)) ++ ++/* ++** mla_11_s32_z_untied: ++** mov (z[0-9]+\.s), #11 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** mla z0\.s, p0/m, z2\.s, \1 ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** mad z0\.s, p0/m, \1, z1\.s ++** | ++** movprfx z0\.s, p0/z, \1 ++** mad z0\.s, p0/m, z2\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_s32_z_untied, svint32_t, ++ z0 = svmla_n_s32_z (p0, z1, z2, 11), ++ z0 = svmla_z (p0, z1, z2, 11)) ++ ++/* ++** mla_s32_x_tied1: ++** mla z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mla_s32_x_tied1, svint32_t, ++ z0 = svmla_s32_x (p0, z0, z1, z2), ++ z0 = svmla_x (p0, z0, z1, z2)) ++ ++/* ++** mla_s32_x_tied2: ++** mad z0\.s, p0/m, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mla_s32_x_tied2, svint32_t, ++ z0 = svmla_s32_x (p0, z1, z0, z2), ++ z0 = svmla_x (p0, z1, z0, z2)) ++ ++/* ++** mla_s32_x_tied3: ++** mad z0\.s, p0/m, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mla_s32_x_tied3, svint32_t, ++ z0 = svmla_s32_x (p0, z1, z2, z0), ++ z0 = svmla_x (p0, z1, z2, z0)) ++ ++/* ++** mla_s32_x_untied: ++** ( ++** movprfx z0, z1 ++** mla z0\.s, p0/m, z2\.s, z3\.s ++** | ++** movprfx z0, z2 ++** mad z0\.s, p0/m, z3\.s, z1\.s ++** | ++** movprfx z0, z3 ++** mad z0\.s, p0/m, z2\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mla_s32_x_untied, svint32_t, ++ z0 = svmla_s32_x (p0, z1, z2, z3), ++ z0 = svmla_x (p0, z1, z2, z3)) ++ ++/* ++** mla_w0_s32_x_tied1: ++** mov (z[0-9]+\.s), w0 ++** mla z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_w0_s32_x_tied1, svint32_t, int32_t, ++ z0 = svmla_n_s32_x (p0, z0, z1, x0), ++ z0 = svmla_x (p0, z0, z1, x0)) ++ ++/* ++** mla_w0_s32_x_tied2: ++** mov (z[0-9]+\.s), w0 ++** mad z0\.s, p0/m, \1, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_w0_s32_x_tied2, svint32_t, int32_t, ++ z0 = svmla_n_s32_x (p0, z1, z0, x0), ++ z0 = svmla_x (p0, z1, z0, x0)) ++ ++/* ++** mla_w0_s32_x_untied: ++** mov z0\.s, w0 ++** mad z0\.s, p0/m, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_w0_s32_x_untied, svint32_t, int32_t, ++ z0 = svmla_n_s32_x (p0, z1, z2, x0), ++ z0 = svmla_x (p0, z1, z2, x0)) ++ ++/* ++** mla_11_s32_x_tied1: ++** mov (z[0-9]+\.s), #11 ++** mla z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_s32_x_tied1, svint32_t, ++ z0 = svmla_n_s32_x (p0, z0, z1, 11), ++ z0 = svmla_x (p0, z0, z1, 11)) ++ ++/* ++** mla_11_s32_x_tied2: ++** mov (z[0-9]+\.s), #11 ++** mad z0\.s, p0/m, \1, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_s32_x_tied2, svint32_t, ++ z0 = svmla_n_s32_x (p0, z1, z0, 11), ++ z0 = svmla_x (p0, z1, z0, 11)) ++ ++/* ++** mla_11_s32_x_untied: ++** mov z0\.s, #11 ++** mad z0\.s, p0/m, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_s32_x_untied, svint32_t, ++ z0 = svmla_n_s32_x (p0, z1, z2, 11), ++ z0 = svmla_x (p0, z1, z2, 11)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_s64.c +new file mode 100644 +index 000000000..7b619e521 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_s64.c +@@ -0,0 +1,321 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mla_s64_m_tied1: ++** mla z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mla_s64_m_tied1, svint64_t, ++ z0 = svmla_s64_m (p0, z0, z1, z2), ++ z0 = svmla_m (p0, z0, z1, z2)) ++ ++/* ++** mla_s64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** mla z0\.d, p0/m, \1, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mla_s64_m_tied2, svint64_t, ++ z0 = svmla_s64_m (p0, z1, z0, z2), ++ z0 = svmla_m (p0, z1, z0, z2)) ++ ++/* ++** mla_s64_m_tied3: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** mla z0\.d, p0/m, z2\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mla_s64_m_tied3, svint64_t, ++ z0 = svmla_s64_m (p0, z1, z2, z0), ++ z0 = svmla_m (p0, z1, z2, z0)) ++ ++/* ++** mla_s64_m_untied: ++** movprfx z0, z1 ++** mla z0\.d, p0/m, z2\.d, z3\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mla_s64_m_untied, svint64_t, ++ z0 = svmla_s64_m (p0, z1, z2, z3), ++ z0 = svmla_m (p0, z1, z2, z3)) ++ ++/* ++** mla_x0_s64_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** mla z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_x0_s64_m_tied1, svint64_t, int64_t, ++ z0 = svmla_n_s64_m (p0, z0, z1, x0), ++ z0 = svmla_m (p0, z0, z1, x0)) ++ ++/* ++** mla_x0_s64_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** mla z0\.d, p0/m, z2\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_x0_s64_m_untied, svint64_t, int64_t, ++ z0 = svmla_n_s64_m (p0, z1, z2, x0), ++ z0 = svmla_m (p0, z1, z2, x0)) ++ ++/* ++** mla_11_s64_m_tied1: ++** mov (z[0-9]+\.d), #11 ++** mla z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_s64_m_tied1, svint64_t, ++ z0 = svmla_n_s64_m (p0, z0, z1, 11), ++ z0 = svmla_m (p0, z0, z1, 11)) ++ ++/* ++** mla_11_s64_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), #11 ++** movprfx z0, z1 ++** mla z0\.d, p0/m, z2\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_s64_m_untied, svint64_t, ++ z0 = svmla_n_s64_m (p0, z1, z2, 11), ++ z0 = svmla_m (p0, z1, z2, 11)) ++ ++/* ++** mla_s64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** mla z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mla_s64_z_tied1, svint64_t, ++ z0 = svmla_s64_z (p0, z0, z1, z2), ++ z0 = svmla_z (p0, z0, z1, z2)) ++ ++/* ++** mla_s64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** mad z0\.d, p0/m, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mla_s64_z_tied2, svint64_t, ++ z0 = svmla_s64_z (p0, z1, z0, z2), ++ z0 = svmla_z (p0, z1, z0, z2)) ++ ++/* ++** mla_s64_z_tied3: ++** movprfx z0\.d, p0/z, z0\.d ++** mad z0\.d, p0/m, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mla_s64_z_tied3, svint64_t, ++ z0 = svmla_s64_z (p0, z1, z2, z0), ++ z0 = svmla_z (p0, z1, z2, z0)) ++ ++/* ++** mla_s64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** mla z0\.d, p0/m, z2\.d, z3\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** mad z0\.d, p0/m, z3\.d, z1\.d ++** | ++** movprfx z0\.d, p0/z, z3\.d ++** mad z0\.d, p0/m, z2\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mla_s64_z_untied, svint64_t, ++ z0 = svmla_s64_z (p0, z1, z2, z3), ++ z0 = svmla_z (p0, z1, z2, z3)) ++ ++/* ++** mla_x0_s64_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.d, p0/z, z0\.d ++** mla z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_x0_s64_z_tied1, svint64_t, int64_t, ++ z0 = svmla_n_s64_z (p0, z0, z1, x0), ++ z0 = svmla_z (p0, z0, z1, x0)) ++ ++/* ++** mla_x0_s64_z_tied2: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.d, p0/z, z0\.d ++** mad z0\.d, p0/m, \1, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_x0_s64_z_tied2, svint64_t, int64_t, ++ z0 = svmla_n_s64_z (p0, z1, z0, x0), ++ z0 = svmla_z (p0, z1, z0, x0)) ++ ++/* ++** mla_x0_s64_z_untied: ++** mov (z[0-9]+\.d), x0 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** mla z0\.d, p0/m, z2\.d, \1 ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** mad z0\.d, p0/m, \1, z1\.d ++** | ++** movprfx z0\.d, p0/z, \1 ++** mad z0\.d, p0/m, z2\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_x0_s64_z_untied, svint64_t, int64_t, ++ z0 = svmla_n_s64_z (p0, z1, z2, x0), ++ z0 = svmla_z (p0, z1, z2, x0)) ++ ++/* ++** mla_11_s64_z_tied1: ++** mov (z[0-9]+\.d), #11 ++** movprfx z0\.d, p0/z, z0\.d ++** mla z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_s64_z_tied1, svint64_t, ++ z0 = svmla_n_s64_z (p0, z0, z1, 11), ++ z0 = svmla_z (p0, z0, z1, 11)) ++ ++/* ++** mla_11_s64_z_tied2: ++** mov (z[0-9]+\.d), #11 ++** movprfx z0\.d, p0/z, z0\.d ++** mad z0\.d, p0/m, \1, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_s64_z_tied2, svint64_t, ++ z0 = svmla_n_s64_z (p0, z1, z0, 11), ++ z0 = svmla_z (p0, z1, z0, 11)) ++ ++/* ++** mla_11_s64_z_untied: ++** mov (z[0-9]+\.d), #11 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** mla z0\.d, p0/m, z2\.d, \1 ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** mad z0\.d, p0/m, \1, z1\.d ++** | ++** movprfx z0\.d, p0/z, \1 ++** mad z0\.d, p0/m, z2\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_s64_z_untied, svint64_t, ++ z0 = svmla_n_s64_z (p0, z1, z2, 11), ++ z0 = svmla_z (p0, z1, z2, 11)) ++ ++/* ++** mla_s64_x_tied1: ++** mla z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mla_s64_x_tied1, svint64_t, ++ z0 = svmla_s64_x (p0, z0, z1, z2), ++ z0 = svmla_x (p0, z0, z1, z2)) ++ ++/* ++** mla_s64_x_tied2: ++** mad z0\.d, p0/m, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mla_s64_x_tied2, svint64_t, ++ z0 = svmla_s64_x (p0, z1, z0, z2), ++ z0 = svmla_x (p0, z1, z0, z2)) ++ ++/* ++** mla_s64_x_tied3: ++** mad z0\.d, p0/m, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mla_s64_x_tied3, svint64_t, ++ z0 = svmla_s64_x (p0, z1, z2, z0), ++ z0 = svmla_x (p0, z1, z2, z0)) ++ ++/* ++** mla_s64_x_untied: ++** ( ++** movprfx z0, z1 ++** mla z0\.d, p0/m, z2\.d, z3\.d ++** | ++** movprfx z0, z2 ++** mad z0\.d, p0/m, z3\.d, z1\.d ++** | ++** movprfx z0, z3 ++** mad z0\.d, p0/m, z2\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mla_s64_x_untied, svint64_t, ++ z0 = svmla_s64_x (p0, z1, z2, z3), ++ z0 = svmla_x (p0, z1, z2, z3)) ++ ++/* ++** mla_x0_s64_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** mla z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_x0_s64_x_tied1, svint64_t, int64_t, ++ z0 = svmla_n_s64_x (p0, z0, z1, x0), ++ z0 = svmla_x (p0, z0, z1, x0)) ++ ++/* ++** mla_x0_s64_x_tied2: ++** mov (z[0-9]+\.d), x0 ++** mad z0\.d, p0/m, \1, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_x0_s64_x_tied2, svint64_t, int64_t, ++ z0 = svmla_n_s64_x (p0, z1, z0, x0), ++ z0 = svmla_x (p0, z1, z0, x0)) ++ ++/* ++** mla_x0_s64_x_untied: ++** mov z0\.d, x0 ++** mad z0\.d, p0/m, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_x0_s64_x_untied, svint64_t, int64_t, ++ z0 = svmla_n_s64_x (p0, z1, z2, x0), ++ z0 = svmla_x (p0, z1, z2, x0)) ++ ++/* ++** mla_11_s64_x_tied1: ++** mov (z[0-9]+\.d), #11 ++** mla z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_s64_x_tied1, svint64_t, ++ z0 = svmla_n_s64_x (p0, z0, z1, 11), ++ z0 = svmla_x (p0, z0, z1, 11)) ++ ++/* ++** mla_11_s64_x_tied2: ++** mov (z[0-9]+\.d), #11 ++** mad z0\.d, p0/m, \1, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_s64_x_tied2, svint64_t, ++ z0 = svmla_n_s64_x (p0, z1, z0, 11), ++ z0 = svmla_x (p0, z1, z0, 11)) ++ ++/* ++** mla_11_s64_x_untied: ++** mov z0\.d, #11 ++** mad z0\.d, p0/m, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_s64_x_untied, svint64_t, ++ z0 = svmla_n_s64_x (p0, z1, z2, 11), ++ z0 = svmla_x (p0, z1, z2, 11)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_s8.c +new file mode 100644 +index 000000000..47468947d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_s8.c +@@ -0,0 +1,321 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mla_s8_m_tied1: ++** mla z0\.b, p0/m, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mla_s8_m_tied1, svint8_t, ++ z0 = svmla_s8_m (p0, z0, z1, z2), ++ z0 = svmla_m (p0, z0, z1, z2)) ++ ++/* ++** mla_s8_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** mla z0\.b, p0/m, \1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mla_s8_m_tied2, svint8_t, ++ z0 = svmla_s8_m (p0, z1, z0, z2), ++ z0 = svmla_m (p0, z1, z0, z2)) ++ ++/* ++** mla_s8_m_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** mla z0\.b, p0/m, z2\.b, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mla_s8_m_tied3, svint8_t, ++ z0 = svmla_s8_m (p0, z1, z2, z0), ++ z0 = svmla_m (p0, z1, z2, z0)) ++ ++/* ++** mla_s8_m_untied: ++** movprfx z0, z1 ++** mla z0\.b, p0/m, z2\.b, z3\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mla_s8_m_untied, svint8_t, ++ z0 = svmla_s8_m (p0, z1, z2, z3), ++ z0 = svmla_m (p0, z1, z2, z3)) ++ ++/* ++** mla_w0_s8_m_tied1: ++** mov (z[0-9]+\.b), w0 ++** mla z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_w0_s8_m_tied1, svint8_t, int8_t, ++ z0 = svmla_n_s8_m (p0, z0, z1, x0), ++ z0 = svmla_m (p0, z0, z1, x0)) ++ ++/* ++** mla_w0_s8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), w0 ++** movprfx z0, z1 ++** mla z0\.b, p0/m, z2\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_w0_s8_m_untied, svint8_t, int8_t, ++ z0 = svmla_n_s8_m (p0, z1, z2, x0), ++ z0 = svmla_m (p0, z1, z2, x0)) ++ ++/* ++** mla_11_s8_m_tied1: ++** mov (z[0-9]+\.b), #11 ++** mla z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_s8_m_tied1, svint8_t, ++ z0 = svmla_n_s8_m (p0, z0, z1, 11), ++ z0 = svmla_m (p0, z0, z1, 11)) ++ ++/* ++** mla_11_s8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), #11 ++** movprfx z0, z1 ++** mla z0\.b, p0/m, z2\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_s8_m_untied, svint8_t, ++ z0 = svmla_n_s8_m (p0, z1, z2, 11), ++ z0 = svmla_m (p0, z1, z2, 11)) ++ ++/* ++** mla_s8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** mla z0\.b, p0/m, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mla_s8_z_tied1, svint8_t, ++ z0 = svmla_s8_z (p0, z0, z1, z2), ++ z0 = svmla_z (p0, z0, z1, z2)) ++ ++/* ++** mla_s8_z_tied2: ++** movprfx z0\.b, p0/z, z0\.b ++** mad z0\.b, p0/m, z2\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mla_s8_z_tied2, svint8_t, ++ z0 = svmla_s8_z (p0, z1, z0, z2), ++ z0 = svmla_z (p0, z1, z0, z2)) ++ ++/* ++** mla_s8_z_tied3: ++** movprfx z0\.b, p0/z, z0\.b ++** mad z0\.b, p0/m, z2\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mla_s8_z_tied3, svint8_t, ++ z0 = svmla_s8_z (p0, z1, z2, z0), ++ z0 = svmla_z (p0, z1, z2, z0)) ++ ++/* ++** mla_s8_z_untied: ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** mla z0\.b, p0/m, z2\.b, z3\.b ++** | ++** movprfx z0\.b, p0/z, z2\.b ++** mad z0\.b, p0/m, z3\.b, z1\.b ++** | ++** movprfx z0\.b, p0/z, z3\.b ++** mad z0\.b, p0/m, z2\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mla_s8_z_untied, svint8_t, ++ z0 = svmla_s8_z (p0, z1, z2, z3), ++ z0 = svmla_z (p0, z1, z2, z3)) ++ ++/* ++** mla_w0_s8_z_tied1: ++** mov (z[0-9]+\.b), w0 ++** movprfx z0\.b, p0/z, z0\.b ++** mla z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_w0_s8_z_tied1, svint8_t, int8_t, ++ z0 = svmla_n_s8_z (p0, z0, z1, x0), ++ z0 = svmla_z (p0, z0, z1, x0)) ++ ++/* ++** mla_w0_s8_z_tied2: ++** mov (z[0-9]+\.b), w0 ++** movprfx z0\.b, p0/z, z0\.b ++** mad z0\.b, p0/m, \1, z1\.b ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_w0_s8_z_tied2, svint8_t, int8_t, ++ z0 = svmla_n_s8_z (p0, z1, z0, x0), ++ z0 = svmla_z (p0, z1, z0, x0)) ++ ++/* ++** mla_w0_s8_z_untied: ++** mov (z[0-9]+\.b), w0 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** mla z0\.b, p0/m, z2\.b, \1 ++** | ++** movprfx z0\.b, p0/z, z2\.b ++** mad z0\.b, p0/m, \1, z1\.b ++** | ++** movprfx z0\.b, p0/z, \1 ++** mad z0\.b, p0/m, z2\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_w0_s8_z_untied, svint8_t, int8_t, ++ z0 = svmla_n_s8_z (p0, z1, z2, x0), ++ z0 = svmla_z (p0, z1, z2, x0)) ++ ++/* ++** mla_11_s8_z_tied1: ++** mov (z[0-9]+\.b), #11 ++** movprfx z0\.b, p0/z, z0\.b ++** mla z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_s8_z_tied1, svint8_t, ++ z0 = svmla_n_s8_z (p0, z0, z1, 11), ++ z0 = svmla_z (p0, z0, z1, 11)) ++ ++/* ++** mla_11_s8_z_tied2: ++** mov (z[0-9]+\.b), #11 ++** movprfx z0\.b, p0/z, z0\.b ++** mad z0\.b, p0/m, \1, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_s8_z_tied2, svint8_t, ++ z0 = svmla_n_s8_z (p0, z1, z0, 11), ++ z0 = svmla_z (p0, z1, z0, 11)) ++ ++/* ++** mla_11_s8_z_untied: ++** mov (z[0-9]+\.b), #11 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** mla z0\.b, p0/m, z2\.b, \1 ++** | ++** movprfx z0\.b, p0/z, z2\.b ++** mad z0\.b, p0/m, \1, z1\.b ++** | ++** movprfx z0\.b, p0/z, \1 ++** mad z0\.b, p0/m, z2\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_s8_z_untied, svint8_t, ++ z0 = svmla_n_s8_z (p0, z1, z2, 11), ++ z0 = svmla_z (p0, z1, z2, 11)) ++ ++/* ++** mla_s8_x_tied1: ++** mla z0\.b, p0/m, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mla_s8_x_tied1, svint8_t, ++ z0 = svmla_s8_x (p0, z0, z1, z2), ++ z0 = svmla_x (p0, z0, z1, z2)) ++ ++/* ++** mla_s8_x_tied2: ++** mad z0\.b, p0/m, z2\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mla_s8_x_tied2, svint8_t, ++ z0 = svmla_s8_x (p0, z1, z0, z2), ++ z0 = svmla_x (p0, z1, z0, z2)) ++ ++/* ++** mla_s8_x_tied3: ++** mad z0\.b, p0/m, z2\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mla_s8_x_tied3, svint8_t, ++ z0 = svmla_s8_x (p0, z1, z2, z0), ++ z0 = svmla_x (p0, z1, z2, z0)) ++ ++/* ++** mla_s8_x_untied: ++** ( ++** movprfx z0, z1 ++** mla z0\.b, p0/m, z2\.b, z3\.b ++** | ++** movprfx z0, z2 ++** mad z0\.b, p0/m, z3\.b, z1\.b ++** | ++** movprfx z0, z3 ++** mad z0\.b, p0/m, z2\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mla_s8_x_untied, svint8_t, ++ z0 = svmla_s8_x (p0, z1, z2, z3), ++ z0 = svmla_x (p0, z1, z2, z3)) ++ ++/* ++** mla_w0_s8_x_tied1: ++** mov (z[0-9]+\.b), w0 ++** mla z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_w0_s8_x_tied1, svint8_t, int8_t, ++ z0 = svmla_n_s8_x (p0, z0, z1, x0), ++ z0 = svmla_x (p0, z0, z1, x0)) ++ ++/* ++** mla_w0_s8_x_tied2: ++** mov (z[0-9]+\.b), w0 ++** mad z0\.b, p0/m, \1, z1\.b ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_w0_s8_x_tied2, svint8_t, int8_t, ++ z0 = svmla_n_s8_x (p0, z1, z0, x0), ++ z0 = svmla_x (p0, z1, z0, x0)) ++ ++/* ++** mla_w0_s8_x_untied: ++** mov z0\.b, w0 ++** mad z0\.b, p0/m, z2\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_w0_s8_x_untied, svint8_t, int8_t, ++ z0 = svmla_n_s8_x (p0, z1, z2, x0), ++ z0 = svmla_x (p0, z1, z2, x0)) ++ ++/* ++** mla_11_s8_x_tied1: ++** mov (z[0-9]+\.b), #11 ++** mla z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_s8_x_tied1, svint8_t, ++ z0 = svmla_n_s8_x (p0, z0, z1, 11), ++ z0 = svmla_x (p0, z0, z1, 11)) ++ ++/* ++** mla_11_s8_x_tied2: ++** mov (z[0-9]+\.b), #11 ++** mad z0\.b, p0/m, \1, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_s8_x_tied2, svint8_t, ++ z0 = svmla_n_s8_x (p0, z1, z0, 11), ++ z0 = svmla_x (p0, z1, z0, 11)) ++ ++/* ++** mla_11_s8_x_untied: ++** mov z0\.b, #11 ++** mad z0\.b, p0/m, z2\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_s8_x_untied, svint8_t, ++ z0 = svmla_n_s8_x (p0, z1, z2, 11), ++ z0 = svmla_x (p0, z1, z2, 11)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_u16.c +new file mode 100644 +index 000000000..7238e428f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_u16.c +@@ -0,0 +1,321 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mla_u16_m_tied1: ++** mla z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mla_u16_m_tied1, svuint16_t, ++ z0 = svmla_u16_m (p0, z0, z1, z2), ++ z0 = svmla_m (p0, z0, z1, z2)) ++ ++/* ++** mla_u16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** mla z0\.h, p0/m, \1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mla_u16_m_tied2, svuint16_t, ++ z0 = svmla_u16_m (p0, z1, z0, z2), ++ z0 = svmla_m (p0, z1, z0, z2)) ++ ++/* ++** mla_u16_m_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** mla z0\.h, p0/m, z2\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mla_u16_m_tied3, svuint16_t, ++ z0 = svmla_u16_m (p0, z1, z2, z0), ++ z0 = svmla_m (p0, z1, z2, z0)) ++ ++/* ++** mla_u16_m_untied: ++** movprfx z0, z1 ++** mla z0\.h, p0/m, z2\.h, z3\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mla_u16_m_untied, svuint16_t, ++ z0 = svmla_u16_m (p0, z1, z2, z3), ++ z0 = svmla_m (p0, z1, z2, z3)) ++ ++/* ++** mla_w0_u16_m_tied1: ++** mov (z[0-9]+\.h), w0 ++** mla z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_w0_u16_m_tied1, svuint16_t, uint16_t, ++ z0 = svmla_n_u16_m (p0, z0, z1, x0), ++ z0 = svmla_m (p0, z0, z1, x0)) ++ ++/* ++** mla_w0_u16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), w0 ++** movprfx z0, z1 ++** mla z0\.h, p0/m, z2\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_w0_u16_m_untied, svuint16_t, uint16_t, ++ z0 = svmla_n_u16_m (p0, z1, z2, x0), ++ z0 = svmla_m (p0, z1, z2, x0)) ++ ++/* ++** mla_11_u16_m_tied1: ++** mov (z[0-9]+\.h), #11 ++** mla z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_u16_m_tied1, svuint16_t, ++ z0 = svmla_n_u16_m (p0, z0, z1, 11), ++ z0 = svmla_m (p0, z0, z1, 11)) ++ ++/* ++** mla_11_u16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), #11 ++** movprfx z0, z1 ++** mla z0\.h, p0/m, z2\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_u16_m_untied, svuint16_t, ++ z0 = svmla_n_u16_m (p0, z1, z2, 11), ++ z0 = svmla_m (p0, z1, z2, 11)) ++ ++/* ++** mla_u16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** mla z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mla_u16_z_tied1, svuint16_t, ++ z0 = svmla_u16_z (p0, z0, z1, z2), ++ z0 = svmla_z (p0, z0, z1, z2)) ++ ++/* ++** mla_u16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** mad z0\.h, p0/m, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mla_u16_z_tied2, svuint16_t, ++ z0 = svmla_u16_z (p0, z1, z0, z2), ++ z0 = svmla_z (p0, z1, z0, z2)) ++ ++/* ++** mla_u16_z_tied3: ++** movprfx z0\.h, p0/z, z0\.h ++** mad z0\.h, p0/m, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mla_u16_z_tied3, svuint16_t, ++ z0 = svmla_u16_z (p0, z1, z2, z0), ++ z0 = svmla_z (p0, z1, z2, z0)) ++ ++/* ++** mla_u16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** mla z0\.h, p0/m, z2\.h, z3\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** mad z0\.h, p0/m, z3\.h, z1\.h ++** | ++** movprfx z0\.h, p0/z, z3\.h ++** mad z0\.h, p0/m, z2\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mla_u16_z_untied, svuint16_t, ++ z0 = svmla_u16_z (p0, z1, z2, z3), ++ z0 = svmla_z (p0, z1, z2, z3)) ++ ++/* ++** mla_w0_u16_z_tied1: ++** mov (z[0-9]+\.h), w0 ++** movprfx z0\.h, p0/z, z0\.h ++** mla z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_w0_u16_z_tied1, svuint16_t, uint16_t, ++ z0 = svmla_n_u16_z (p0, z0, z1, x0), ++ z0 = svmla_z (p0, z0, z1, x0)) ++ ++/* ++** mla_w0_u16_z_tied2: ++** mov (z[0-9]+\.h), w0 ++** movprfx z0\.h, p0/z, z0\.h ++** mad z0\.h, p0/m, \1, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_w0_u16_z_tied2, svuint16_t, uint16_t, ++ z0 = svmla_n_u16_z (p0, z1, z0, x0), ++ z0 = svmla_z (p0, z1, z0, x0)) ++ ++/* ++** mla_w0_u16_z_untied: ++** mov (z[0-9]+\.h), w0 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** mla z0\.h, p0/m, z2\.h, \1 ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** mad z0\.h, p0/m, \1, z1\.h ++** | ++** movprfx z0\.h, p0/z, \1 ++** mad z0\.h, p0/m, z2\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_w0_u16_z_untied, svuint16_t, uint16_t, ++ z0 = svmla_n_u16_z (p0, z1, z2, x0), ++ z0 = svmla_z (p0, z1, z2, x0)) ++ ++/* ++** mla_11_u16_z_tied1: ++** mov (z[0-9]+\.h), #11 ++** movprfx z0\.h, p0/z, z0\.h ++** mla z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_u16_z_tied1, svuint16_t, ++ z0 = svmla_n_u16_z (p0, z0, z1, 11), ++ z0 = svmla_z (p0, z0, z1, 11)) ++ ++/* ++** mla_11_u16_z_tied2: ++** mov (z[0-9]+\.h), #11 ++** movprfx z0\.h, p0/z, z0\.h ++** mad z0\.h, p0/m, \1, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_u16_z_tied2, svuint16_t, ++ z0 = svmla_n_u16_z (p0, z1, z0, 11), ++ z0 = svmla_z (p0, z1, z0, 11)) ++ ++/* ++** mla_11_u16_z_untied: ++** mov (z[0-9]+\.h), #11 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** mla z0\.h, p0/m, z2\.h, \1 ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** mad z0\.h, p0/m, \1, z1\.h ++** | ++** movprfx z0\.h, p0/z, \1 ++** mad z0\.h, p0/m, z2\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_u16_z_untied, svuint16_t, ++ z0 = svmla_n_u16_z (p0, z1, z2, 11), ++ z0 = svmla_z (p0, z1, z2, 11)) ++ ++/* ++** mla_u16_x_tied1: ++** mla z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mla_u16_x_tied1, svuint16_t, ++ z0 = svmla_u16_x (p0, z0, z1, z2), ++ z0 = svmla_x (p0, z0, z1, z2)) ++ ++/* ++** mla_u16_x_tied2: ++** mad z0\.h, p0/m, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mla_u16_x_tied2, svuint16_t, ++ z0 = svmla_u16_x (p0, z1, z0, z2), ++ z0 = svmla_x (p0, z1, z0, z2)) ++ ++/* ++** mla_u16_x_tied3: ++** mad z0\.h, p0/m, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mla_u16_x_tied3, svuint16_t, ++ z0 = svmla_u16_x (p0, z1, z2, z0), ++ z0 = svmla_x (p0, z1, z2, z0)) ++ ++/* ++** mla_u16_x_untied: ++** ( ++** movprfx z0, z1 ++** mla z0\.h, p0/m, z2\.h, z3\.h ++** | ++** movprfx z0, z2 ++** mad z0\.h, p0/m, z3\.h, z1\.h ++** | ++** movprfx z0, z3 ++** mad z0\.h, p0/m, z2\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mla_u16_x_untied, svuint16_t, ++ z0 = svmla_u16_x (p0, z1, z2, z3), ++ z0 = svmla_x (p0, z1, z2, z3)) ++ ++/* ++** mla_w0_u16_x_tied1: ++** mov (z[0-9]+\.h), w0 ++** mla z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_w0_u16_x_tied1, svuint16_t, uint16_t, ++ z0 = svmla_n_u16_x (p0, z0, z1, x0), ++ z0 = svmla_x (p0, z0, z1, x0)) ++ ++/* ++** mla_w0_u16_x_tied2: ++** mov (z[0-9]+\.h), w0 ++** mad z0\.h, p0/m, \1, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_w0_u16_x_tied2, svuint16_t, uint16_t, ++ z0 = svmla_n_u16_x (p0, z1, z0, x0), ++ z0 = svmla_x (p0, z1, z0, x0)) ++ ++/* ++** mla_w0_u16_x_untied: ++** mov z0\.h, w0 ++** mad z0\.h, p0/m, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_w0_u16_x_untied, svuint16_t, uint16_t, ++ z0 = svmla_n_u16_x (p0, z1, z2, x0), ++ z0 = svmla_x (p0, z1, z2, x0)) ++ ++/* ++** mla_11_u16_x_tied1: ++** mov (z[0-9]+\.h), #11 ++** mla z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_u16_x_tied1, svuint16_t, ++ z0 = svmla_n_u16_x (p0, z0, z1, 11), ++ z0 = svmla_x (p0, z0, z1, 11)) ++ ++/* ++** mla_11_u16_x_tied2: ++** mov (z[0-9]+\.h), #11 ++** mad z0\.h, p0/m, \1, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_u16_x_tied2, svuint16_t, ++ z0 = svmla_n_u16_x (p0, z1, z0, 11), ++ z0 = svmla_x (p0, z1, z0, 11)) ++ ++/* ++** mla_11_u16_x_untied: ++** mov z0\.h, #11 ++** mad z0\.h, p0/m, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_u16_x_untied, svuint16_t, ++ z0 = svmla_n_u16_x (p0, z1, z2, 11), ++ z0 = svmla_x (p0, z1, z2, 11)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_u32.c +new file mode 100644 +index 000000000..7a68bce3d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_u32.c +@@ -0,0 +1,321 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mla_u32_m_tied1: ++** mla z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mla_u32_m_tied1, svuint32_t, ++ z0 = svmla_u32_m (p0, z0, z1, z2), ++ z0 = svmla_m (p0, z0, z1, z2)) ++ ++/* ++** mla_u32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** mla z0\.s, p0/m, \1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mla_u32_m_tied2, svuint32_t, ++ z0 = svmla_u32_m (p0, z1, z0, z2), ++ z0 = svmla_m (p0, z1, z0, z2)) ++ ++/* ++** mla_u32_m_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** mla z0\.s, p0/m, z2\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mla_u32_m_tied3, svuint32_t, ++ z0 = svmla_u32_m (p0, z1, z2, z0), ++ z0 = svmla_m (p0, z1, z2, z0)) ++ ++/* ++** mla_u32_m_untied: ++** movprfx z0, z1 ++** mla z0\.s, p0/m, z2\.s, z3\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mla_u32_m_untied, svuint32_t, ++ z0 = svmla_u32_m (p0, z1, z2, z3), ++ z0 = svmla_m (p0, z1, z2, z3)) ++ ++/* ++** mla_w0_u32_m_tied1: ++** mov (z[0-9]+\.s), w0 ++** mla z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_w0_u32_m_tied1, svuint32_t, uint32_t, ++ z0 = svmla_n_u32_m (p0, z0, z1, x0), ++ z0 = svmla_m (p0, z0, z1, x0)) ++ ++/* ++** mla_w0_u32_m_untied: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0, z1 ++** mla z0\.s, p0/m, z2\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_w0_u32_m_untied, svuint32_t, uint32_t, ++ z0 = svmla_n_u32_m (p0, z1, z2, x0), ++ z0 = svmla_m (p0, z1, z2, x0)) ++ ++/* ++** mla_11_u32_m_tied1: ++** mov (z[0-9]+\.s), #11 ++** mla z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_u32_m_tied1, svuint32_t, ++ z0 = svmla_n_u32_m (p0, z0, z1, 11), ++ z0 = svmla_m (p0, z0, z1, 11)) ++ ++/* ++** mla_11_u32_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.s), #11 ++** movprfx z0, z1 ++** mla z0\.s, p0/m, z2\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_u32_m_untied, svuint32_t, ++ z0 = svmla_n_u32_m (p0, z1, z2, 11), ++ z0 = svmla_m (p0, z1, z2, 11)) ++ ++/* ++** mla_u32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** mla z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mla_u32_z_tied1, svuint32_t, ++ z0 = svmla_u32_z (p0, z0, z1, z2), ++ z0 = svmla_z (p0, z0, z1, z2)) ++ ++/* ++** mla_u32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** mad z0\.s, p0/m, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mla_u32_z_tied2, svuint32_t, ++ z0 = svmla_u32_z (p0, z1, z0, z2), ++ z0 = svmla_z (p0, z1, z0, z2)) ++ ++/* ++** mla_u32_z_tied3: ++** movprfx z0\.s, p0/z, z0\.s ++** mad z0\.s, p0/m, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mla_u32_z_tied3, svuint32_t, ++ z0 = svmla_u32_z (p0, z1, z2, z0), ++ z0 = svmla_z (p0, z1, z2, z0)) ++ ++/* ++** mla_u32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** mla z0\.s, p0/m, z2\.s, z3\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** mad z0\.s, p0/m, z3\.s, z1\.s ++** | ++** movprfx z0\.s, p0/z, z3\.s ++** mad z0\.s, p0/m, z2\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mla_u32_z_untied, svuint32_t, ++ z0 = svmla_u32_z (p0, z1, z2, z3), ++ z0 = svmla_z (p0, z1, z2, z3)) ++ ++/* ++** mla_w0_u32_z_tied1: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0\.s, p0/z, z0\.s ++** mla z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_w0_u32_z_tied1, svuint32_t, uint32_t, ++ z0 = svmla_n_u32_z (p0, z0, z1, x0), ++ z0 = svmla_z (p0, z0, z1, x0)) ++ ++/* ++** mla_w0_u32_z_tied2: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0\.s, p0/z, z0\.s ++** mad z0\.s, p0/m, \1, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_w0_u32_z_tied2, svuint32_t, uint32_t, ++ z0 = svmla_n_u32_z (p0, z1, z0, x0), ++ z0 = svmla_z (p0, z1, z0, x0)) ++ ++/* ++** mla_w0_u32_z_untied: ++** mov (z[0-9]+\.s), w0 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** mla z0\.s, p0/m, z2\.s, \1 ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** mad z0\.s, p0/m, \1, z1\.s ++** | ++** movprfx z0\.s, p0/z, \1 ++** mad z0\.s, p0/m, z2\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_w0_u32_z_untied, svuint32_t, uint32_t, ++ z0 = svmla_n_u32_z (p0, z1, z2, x0), ++ z0 = svmla_z (p0, z1, z2, x0)) ++ ++/* ++** mla_11_u32_z_tied1: ++** mov (z[0-9]+\.s), #11 ++** movprfx z0\.s, p0/z, z0\.s ++** mla z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_u32_z_tied1, svuint32_t, ++ z0 = svmla_n_u32_z (p0, z0, z1, 11), ++ z0 = svmla_z (p0, z0, z1, 11)) ++ ++/* ++** mla_11_u32_z_tied2: ++** mov (z[0-9]+\.s), #11 ++** movprfx z0\.s, p0/z, z0\.s ++** mad z0\.s, p0/m, \1, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_u32_z_tied2, svuint32_t, ++ z0 = svmla_n_u32_z (p0, z1, z0, 11), ++ z0 = svmla_z (p0, z1, z0, 11)) ++ ++/* ++** mla_11_u32_z_untied: ++** mov (z[0-9]+\.s), #11 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** mla z0\.s, p0/m, z2\.s, \1 ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** mad z0\.s, p0/m, \1, z1\.s ++** | ++** movprfx z0\.s, p0/z, \1 ++** mad z0\.s, p0/m, z2\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_u32_z_untied, svuint32_t, ++ z0 = svmla_n_u32_z (p0, z1, z2, 11), ++ z0 = svmla_z (p0, z1, z2, 11)) ++ ++/* ++** mla_u32_x_tied1: ++** mla z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mla_u32_x_tied1, svuint32_t, ++ z0 = svmla_u32_x (p0, z0, z1, z2), ++ z0 = svmla_x (p0, z0, z1, z2)) ++ ++/* ++** mla_u32_x_tied2: ++** mad z0\.s, p0/m, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mla_u32_x_tied2, svuint32_t, ++ z0 = svmla_u32_x (p0, z1, z0, z2), ++ z0 = svmla_x (p0, z1, z0, z2)) ++ ++/* ++** mla_u32_x_tied3: ++** mad z0\.s, p0/m, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mla_u32_x_tied3, svuint32_t, ++ z0 = svmla_u32_x (p0, z1, z2, z0), ++ z0 = svmla_x (p0, z1, z2, z0)) ++ ++/* ++** mla_u32_x_untied: ++** ( ++** movprfx z0, z1 ++** mla z0\.s, p0/m, z2\.s, z3\.s ++** | ++** movprfx z0, z2 ++** mad z0\.s, p0/m, z3\.s, z1\.s ++** | ++** movprfx z0, z3 ++** mad z0\.s, p0/m, z2\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mla_u32_x_untied, svuint32_t, ++ z0 = svmla_u32_x (p0, z1, z2, z3), ++ z0 = svmla_x (p0, z1, z2, z3)) ++ ++/* ++** mla_w0_u32_x_tied1: ++** mov (z[0-9]+\.s), w0 ++** mla z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_w0_u32_x_tied1, svuint32_t, uint32_t, ++ z0 = svmla_n_u32_x (p0, z0, z1, x0), ++ z0 = svmla_x (p0, z0, z1, x0)) ++ ++/* ++** mla_w0_u32_x_tied2: ++** mov (z[0-9]+\.s), w0 ++** mad z0\.s, p0/m, \1, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_w0_u32_x_tied2, svuint32_t, uint32_t, ++ z0 = svmla_n_u32_x (p0, z1, z0, x0), ++ z0 = svmla_x (p0, z1, z0, x0)) ++ ++/* ++** mla_w0_u32_x_untied: ++** mov z0\.s, w0 ++** mad z0\.s, p0/m, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_w0_u32_x_untied, svuint32_t, uint32_t, ++ z0 = svmla_n_u32_x (p0, z1, z2, x0), ++ z0 = svmla_x (p0, z1, z2, x0)) ++ ++/* ++** mla_11_u32_x_tied1: ++** mov (z[0-9]+\.s), #11 ++** mla z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_u32_x_tied1, svuint32_t, ++ z0 = svmla_n_u32_x (p0, z0, z1, 11), ++ z0 = svmla_x (p0, z0, z1, 11)) ++ ++/* ++** mla_11_u32_x_tied2: ++** mov (z[0-9]+\.s), #11 ++** mad z0\.s, p0/m, \1, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_u32_x_tied2, svuint32_t, ++ z0 = svmla_n_u32_x (p0, z1, z0, 11), ++ z0 = svmla_x (p0, z1, z0, 11)) ++ ++/* ++** mla_11_u32_x_untied: ++** mov z0\.s, #11 ++** mad z0\.s, p0/m, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_u32_x_untied, svuint32_t, ++ z0 = svmla_n_u32_x (p0, z1, z2, 11), ++ z0 = svmla_x (p0, z1, z2, 11)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_u64.c +new file mode 100644 +index 000000000..6233265c8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_u64.c +@@ -0,0 +1,321 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mla_u64_m_tied1: ++** mla z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mla_u64_m_tied1, svuint64_t, ++ z0 = svmla_u64_m (p0, z0, z1, z2), ++ z0 = svmla_m (p0, z0, z1, z2)) ++ ++/* ++** mla_u64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** mla z0\.d, p0/m, \1, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mla_u64_m_tied2, svuint64_t, ++ z0 = svmla_u64_m (p0, z1, z0, z2), ++ z0 = svmla_m (p0, z1, z0, z2)) ++ ++/* ++** mla_u64_m_tied3: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** mla z0\.d, p0/m, z2\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mla_u64_m_tied3, svuint64_t, ++ z0 = svmla_u64_m (p0, z1, z2, z0), ++ z0 = svmla_m (p0, z1, z2, z0)) ++ ++/* ++** mla_u64_m_untied: ++** movprfx z0, z1 ++** mla z0\.d, p0/m, z2\.d, z3\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mla_u64_m_untied, svuint64_t, ++ z0 = svmla_u64_m (p0, z1, z2, z3), ++ z0 = svmla_m (p0, z1, z2, z3)) ++ ++/* ++** mla_x0_u64_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** mla z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_x0_u64_m_tied1, svuint64_t, uint64_t, ++ z0 = svmla_n_u64_m (p0, z0, z1, x0), ++ z0 = svmla_m (p0, z0, z1, x0)) ++ ++/* ++** mla_x0_u64_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** mla z0\.d, p0/m, z2\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_x0_u64_m_untied, svuint64_t, uint64_t, ++ z0 = svmla_n_u64_m (p0, z1, z2, x0), ++ z0 = svmla_m (p0, z1, z2, x0)) ++ ++/* ++** mla_11_u64_m_tied1: ++** mov (z[0-9]+\.d), #11 ++** mla z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_u64_m_tied1, svuint64_t, ++ z0 = svmla_n_u64_m (p0, z0, z1, 11), ++ z0 = svmla_m (p0, z0, z1, 11)) ++ ++/* ++** mla_11_u64_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), #11 ++** movprfx z0, z1 ++** mla z0\.d, p0/m, z2\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_u64_m_untied, svuint64_t, ++ z0 = svmla_n_u64_m (p0, z1, z2, 11), ++ z0 = svmla_m (p0, z1, z2, 11)) ++ ++/* ++** mla_u64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** mla z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mla_u64_z_tied1, svuint64_t, ++ z0 = svmla_u64_z (p0, z0, z1, z2), ++ z0 = svmla_z (p0, z0, z1, z2)) ++ ++/* ++** mla_u64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** mad z0\.d, p0/m, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mla_u64_z_tied2, svuint64_t, ++ z0 = svmla_u64_z (p0, z1, z0, z2), ++ z0 = svmla_z (p0, z1, z0, z2)) ++ ++/* ++** mla_u64_z_tied3: ++** movprfx z0\.d, p0/z, z0\.d ++** mad z0\.d, p0/m, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mla_u64_z_tied3, svuint64_t, ++ z0 = svmla_u64_z (p0, z1, z2, z0), ++ z0 = svmla_z (p0, z1, z2, z0)) ++ ++/* ++** mla_u64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** mla z0\.d, p0/m, z2\.d, z3\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** mad z0\.d, p0/m, z3\.d, z1\.d ++** | ++** movprfx z0\.d, p0/z, z3\.d ++** mad z0\.d, p0/m, z2\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mla_u64_z_untied, svuint64_t, ++ z0 = svmla_u64_z (p0, z1, z2, z3), ++ z0 = svmla_z (p0, z1, z2, z3)) ++ ++/* ++** mla_x0_u64_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.d, p0/z, z0\.d ++** mla z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_x0_u64_z_tied1, svuint64_t, uint64_t, ++ z0 = svmla_n_u64_z (p0, z0, z1, x0), ++ z0 = svmla_z (p0, z0, z1, x0)) ++ ++/* ++** mla_x0_u64_z_tied2: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.d, p0/z, z0\.d ++** mad z0\.d, p0/m, \1, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_x0_u64_z_tied2, svuint64_t, uint64_t, ++ z0 = svmla_n_u64_z (p0, z1, z0, x0), ++ z0 = svmla_z (p0, z1, z0, x0)) ++ ++/* ++** mla_x0_u64_z_untied: ++** mov (z[0-9]+\.d), x0 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** mla z0\.d, p0/m, z2\.d, \1 ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** mad z0\.d, p0/m, \1, z1\.d ++** | ++** movprfx z0\.d, p0/z, \1 ++** mad z0\.d, p0/m, z2\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_x0_u64_z_untied, svuint64_t, uint64_t, ++ z0 = svmla_n_u64_z (p0, z1, z2, x0), ++ z0 = svmla_z (p0, z1, z2, x0)) ++ ++/* ++** mla_11_u64_z_tied1: ++** mov (z[0-9]+\.d), #11 ++** movprfx z0\.d, p0/z, z0\.d ++** mla z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_u64_z_tied1, svuint64_t, ++ z0 = svmla_n_u64_z (p0, z0, z1, 11), ++ z0 = svmla_z (p0, z0, z1, 11)) ++ ++/* ++** mla_11_u64_z_tied2: ++** mov (z[0-9]+\.d), #11 ++** movprfx z0\.d, p0/z, z0\.d ++** mad z0\.d, p0/m, \1, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_u64_z_tied2, svuint64_t, ++ z0 = svmla_n_u64_z (p0, z1, z0, 11), ++ z0 = svmla_z (p0, z1, z0, 11)) ++ ++/* ++** mla_11_u64_z_untied: ++** mov (z[0-9]+\.d), #11 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** mla z0\.d, p0/m, z2\.d, \1 ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** mad z0\.d, p0/m, \1, z1\.d ++** | ++** movprfx z0\.d, p0/z, \1 ++** mad z0\.d, p0/m, z2\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_u64_z_untied, svuint64_t, ++ z0 = svmla_n_u64_z (p0, z1, z2, 11), ++ z0 = svmla_z (p0, z1, z2, 11)) ++ ++/* ++** mla_u64_x_tied1: ++** mla z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mla_u64_x_tied1, svuint64_t, ++ z0 = svmla_u64_x (p0, z0, z1, z2), ++ z0 = svmla_x (p0, z0, z1, z2)) ++ ++/* ++** mla_u64_x_tied2: ++** mad z0\.d, p0/m, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mla_u64_x_tied2, svuint64_t, ++ z0 = svmla_u64_x (p0, z1, z0, z2), ++ z0 = svmla_x (p0, z1, z0, z2)) ++ ++/* ++** mla_u64_x_tied3: ++** mad z0\.d, p0/m, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mla_u64_x_tied3, svuint64_t, ++ z0 = svmla_u64_x (p0, z1, z2, z0), ++ z0 = svmla_x (p0, z1, z2, z0)) ++ ++/* ++** mla_u64_x_untied: ++** ( ++** movprfx z0, z1 ++** mla z0\.d, p0/m, z2\.d, z3\.d ++** | ++** movprfx z0, z2 ++** mad z0\.d, p0/m, z3\.d, z1\.d ++** | ++** movprfx z0, z3 ++** mad z0\.d, p0/m, z2\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mla_u64_x_untied, svuint64_t, ++ z0 = svmla_u64_x (p0, z1, z2, z3), ++ z0 = svmla_x (p0, z1, z2, z3)) ++ ++/* ++** mla_x0_u64_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** mla z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_x0_u64_x_tied1, svuint64_t, uint64_t, ++ z0 = svmla_n_u64_x (p0, z0, z1, x0), ++ z0 = svmla_x (p0, z0, z1, x0)) ++ ++/* ++** mla_x0_u64_x_tied2: ++** mov (z[0-9]+\.d), x0 ++** mad z0\.d, p0/m, \1, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_x0_u64_x_tied2, svuint64_t, uint64_t, ++ z0 = svmla_n_u64_x (p0, z1, z0, x0), ++ z0 = svmla_x (p0, z1, z0, x0)) ++ ++/* ++** mla_x0_u64_x_untied: ++** mov z0\.d, x0 ++** mad z0\.d, p0/m, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_x0_u64_x_untied, svuint64_t, uint64_t, ++ z0 = svmla_n_u64_x (p0, z1, z2, x0), ++ z0 = svmla_x (p0, z1, z2, x0)) ++ ++/* ++** mla_11_u64_x_tied1: ++** mov (z[0-9]+\.d), #11 ++** mla z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_u64_x_tied1, svuint64_t, ++ z0 = svmla_n_u64_x (p0, z0, z1, 11), ++ z0 = svmla_x (p0, z0, z1, 11)) ++ ++/* ++** mla_11_u64_x_tied2: ++** mov (z[0-9]+\.d), #11 ++** mad z0\.d, p0/m, \1, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_u64_x_tied2, svuint64_t, ++ z0 = svmla_n_u64_x (p0, z1, z0, 11), ++ z0 = svmla_x (p0, z1, z0, 11)) ++ ++/* ++** mla_11_u64_x_untied: ++** mov z0\.d, #11 ++** mad z0\.d, p0/m, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_u64_x_untied, svuint64_t, ++ z0 = svmla_n_u64_x (p0, z1, z2, 11), ++ z0 = svmla_x (p0, z1, z2, 11)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_u8.c +new file mode 100644 +index 000000000..832ed4141 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_u8.c +@@ -0,0 +1,321 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mla_u8_m_tied1: ++** mla z0\.b, p0/m, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mla_u8_m_tied1, svuint8_t, ++ z0 = svmla_u8_m (p0, z0, z1, z2), ++ z0 = svmla_m (p0, z0, z1, z2)) ++ ++/* ++** mla_u8_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** mla z0\.b, p0/m, \1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mla_u8_m_tied2, svuint8_t, ++ z0 = svmla_u8_m (p0, z1, z0, z2), ++ z0 = svmla_m (p0, z1, z0, z2)) ++ ++/* ++** mla_u8_m_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** mla z0\.b, p0/m, z2\.b, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mla_u8_m_tied3, svuint8_t, ++ z0 = svmla_u8_m (p0, z1, z2, z0), ++ z0 = svmla_m (p0, z1, z2, z0)) ++ ++/* ++** mla_u8_m_untied: ++** movprfx z0, z1 ++** mla z0\.b, p0/m, z2\.b, z3\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mla_u8_m_untied, svuint8_t, ++ z0 = svmla_u8_m (p0, z1, z2, z3), ++ z0 = svmla_m (p0, z1, z2, z3)) ++ ++/* ++** mla_w0_u8_m_tied1: ++** mov (z[0-9]+\.b), w0 ++** mla z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_w0_u8_m_tied1, svuint8_t, uint8_t, ++ z0 = svmla_n_u8_m (p0, z0, z1, x0), ++ z0 = svmla_m (p0, z0, z1, x0)) ++ ++/* ++** mla_w0_u8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), w0 ++** movprfx z0, z1 ++** mla z0\.b, p0/m, z2\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_w0_u8_m_untied, svuint8_t, uint8_t, ++ z0 = svmla_n_u8_m (p0, z1, z2, x0), ++ z0 = svmla_m (p0, z1, z2, x0)) ++ ++/* ++** mla_11_u8_m_tied1: ++** mov (z[0-9]+\.b), #11 ++** mla z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_u8_m_tied1, svuint8_t, ++ z0 = svmla_n_u8_m (p0, z0, z1, 11), ++ z0 = svmla_m (p0, z0, z1, 11)) ++ ++/* ++** mla_11_u8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), #11 ++** movprfx z0, z1 ++** mla z0\.b, p0/m, z2\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_u8_m_untied, svuint8_t, ++ z0 = svmla_n_u8_m (p0, z1, z2, 11), ++ z0 = svmla_m (p0, z1, z2, 11)) ++ ++/* ++** mla_u8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** mla z0\.b, p0/m, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mla_u8_z_tied1, svuint8_t, ++ z0 = svmla_u8_z (p0, z0, z1, z2), ++ z0 = svmla_z (p0, z0, z1, z2)) ++ ++/* ++** mla_u8_z_tied2: ++** movprfx z0\.b, p0/z, z0\.b ++** mad z0\.b, p0/m, z2\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mla_u8_z_tied2, svuint8_t, ++ z0 = svmla_u8_z (p0, z1, z0, z2), ++ z0 = svmla_z (p0, z1, z0, z2)) ++ ++/* ++** mla_u8_z_tied3: ++** movprfx z0\.b, p0/z, z0\.b ++** mad z0\.b, p0/m, z2\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mla_u8_z_tied3, svuint8_t, ++ z0 = svmla_u8_z (p0, z1, z2, z0), ++ z0 = svmla_z (p0, z1, z2, z0)) ++ ++/* ++** mla_u8_z_untied: ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** mla z0\.b, p0/m, z2\.b, z3\.b ++** | ++** movprfx z0\.b, p0/z, z2\.b ++** mad z0\.b, p0/m, z3\.b, z1\.b ++** | ++** movprfx z0\.b, p0/z, z3\.b ++** mad z0\.b, p0/m, z2\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mla_u8_z_untied, svuint8_t, ++ z0 = svmla_u8_z (p0, z1, z2, z3), ++ z0 = svmla_z (p0, z1, z2, z3)) ++ ++/* ++** mla_w0_u8_z_tied1: ++** mov (z[0-9]+\.b), w0 ++** movprfx z0\.b, p0/z, z0\.b ++** mla z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_w0_u8_z_tied1, svuint8_t, uint8_t, ++ z0 = svmla_n_u8_z (p0, z0, z1, x0), ++ z0 = svmla_z (p0, z0, z1, x0)) ++ ++/* ++** mla_w0_u8_z_tied2: ++** mov (z[0-9]+\.b), w0 ++** movprfx z0\.b, p0/z, z0\.b ++** mad z0\.b, p0/m, \1, z1\.b ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_w0_u8_z_tied2, svuint8_t, uint8_t, ++ z0 = svmla_n_u8_z (p0, z1, z0, x0), ++ z0 = svmla_z (p0, z1, z0, x0)) ++ ++/* ++** mla_w0_u8_z_untied: ++** mov (z[0-9]+\.b), w0 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** mla z0\.b, p0/m, z2\.b, \1 ++** | ++** movprfx z0\.b, p0/z, z2\.b ++** mad z0\.b, p0/m, \1, z1\.b ++** | ++** movprfx z0\.b, p0/z, \1 ++** mad z0\.b, p0/m, z2\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_w0_u8_z_untied, svuint8_t, uint8_t, ++ z0 = svmla_n_u8_z (p0, z1, z2, x0), ++ z0 = svmla_z (p0, z1, z2, x0)) ++ ++/* ++** mla_11_u8_z_tied1: ++** mov (z[0-9]+\.b), #11 ++** movprfx z0\.b, p0/z, z0\.b ++** mla z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_u8_z_tied1, svuint8_t, ++ z0 = svmla_n_u8_z (p0, z0, z1, 11), ++ z0 = svmla_z (p0, z0, z1, 11)) ++ ++/* ++** mla_11_u8_z_tied2: ++** mov (z[0-9]+\.b), #11 ++** movprfx z0\.b, p0/z, z0\.b ++** mad z0\.b, p0/m, \1, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_u8_z_tied2, svuint8_t, ++ z0 = svmla_n_u8_z (p0, z1, z0, 11), ++ z0 = svmla_z (p0, z1, z0, 11)) ++ ++/* ++** mla_11_u8_z_untied: ++** mov (z[0-9]+\.b), #11 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** mla z0\.b, p0/m, z2\.b, \1 ++** | ++** movprfx z0\.b, p0/z, z2\.b ++** mad z0\.b, p0/m, \1, z1\.b ++** | ++** movprfx z0\.b, p0/z, \1 ++** mad z0\.b, p0/m, z2\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_u8_z_untied, svuint8_t, ++ z0 = svmla_n_u8_z (p0, z1, z2, 11), ++ z0 = svmla_z (p0, z1, z2, 11)) ++ ++/* ++** mla_u8_x_tied1: ++** mla z0\.b, p0/m, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mla_u8_x_tied1, svuint8_t, ++ z0 = svmla_u8_x (p0, z0, z1, z2), ++ z0 = svmla_x (p0, z0, z1, z2)) ++ ++/* ++** mla_u8_x_tied2: ++** mad z0\.b, p0/m, z2\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mla_u8_x_tied2, svuint8_t, ++ z0 = svmla_u8_x (p0, z1, z0, z2), ++ z0 = svmla_x (p0, z1, z0, z2)) ++ ++/* ++** mla_u8_x_tied3: ++** mad z0\.b, p0/m, z2\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mla_u8_x_tied3, svuint8_t, ++ z0 = svmla_u8_x (p0, z1, z2, z0), ++ z0 = svmla_x (p0, z1, z2, z0)) ++ ++/* ++** mla_u8_x_untied: ++** ( ++** movprfx z0, z1 ++** mla z0\.b, p0/m, z2\.b, z3\.b ++** | ++** movprfx z0, z2 ++** mad z0\.b, p0/m, z3\.b, z1\.b ++** | ++** movprfx z0, z3 ++** mad z0\.b, p0/m, z2\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mla_u8_x_untied, svuint8_t, ++ z0 = svmla_u8_x (p0, z1, z2, z3), ++ z0 = svmla_x (p0, z1, z2, z3)) ++ ++/* ++** mla_w0_u8_x_tied1: ++** mov (z[0-9]+\.b), w0 ++** mla z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_w0_u8_x_tied1, svuint8_t, uint8_t, ++ z0 = svmla_n_u8_x (p0, z0, z1, x0), ++ z0 = svmla_x (p0, z0, z1, x0)) ++ ++/* ++** mla_w0_u8_x_tied2: ++** mov (z[0-9]+\.b), w0 ++** mad z0\.b, p0/m, \1, z1\.b ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_w0_u8_x_tied2, svuint8_t, uint8_t, ++ z0 = svmla_n_u8_x (p0, z1, z0, x0), ++ z0 = svmla_x (p0, z1, z0, x0)) ++ ++/* ++** mla_w0_u8_x_untied: ++** mov z0\.b, w0 ++** mad z0\.b, p0/m, z2\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_ZX (mla_w0_u8_x_untied, svuint8_t, uint8_t, ++ z0 = svmla_n_u8_x (p0, z1, z2, x0), ++ z0 = svmla_x (p0, z1, z2, x0)) ++ ++/* ++** mla_11_u8_x_tied1: ++** mov (z[0-9]+\.b), #11 ++** mla z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_u8_x_tied1, svuint8_t, ++ z0 = svmla_n_u8_x (p0, z0, z1, 11), ++ z0 = svmla_x (p0, z0, z1, 11)) ++ ++/* ++** mla_11_u8_x_tied2: ++** mov (z[0-9]+\.b), #11 ++** mad z0\.b, p0/m, \1, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_u8_x_tied2, svuint8_t, ++ z0 = svmla_n_u8_x (p0, z1, z0, 11), ++ z0 = svmla_x (p0, z1, z0, 11)) ++ ++/* ++** mla_11_u8_x_untied: ++** mov z0\.b, #11 ++** mad z0\.b, p0/m, z2\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mla_11_u8_x_untied, svuint8_t, ++ z0 = svmla_n_u8_x (p0, z1, z2, 11), ++ z0 = svmla_x (p0, z1, z2, 11)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_f16.c +new file mode 100644 +index 000000000..87fba3da7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_f16.c +@@ -0,0 +1,398 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mls_f16_m_tied1: ++** fmls z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mls_f16_m_tied1, svfloat16_t, ++ z0 = svmls_f16_m (p0, z0, z1, z2), ++ z0 = svmls_m (p0, z0, z1, z2)) ++ ++/* ++** mls_f16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fmls z0\.h, p0/m, \1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mls_f16_m_tied2, svfloat16_t, ++ z0 = svmls_f16_m (p0, z1, z0, z2), ++ z0 = svmls_m (p0, z1, z0, z2)) ++ ++/* ++** mls_f16_m_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fmls z0\.h, p0/m, z2\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mls_f16_m_tied3, svfloat16_t, ++ z0 = svmls_f16_m (p0, z1, z2, z0), ++ z0 = svmls_m (p0, z1, z2, z0)) ++ ++/* ++** mls_f16_m_untied: ++** movprfx z0, z1 ++** fmls z0\.h, p0/m, z2\.h, z3\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mls_f16_m_untied, svfloat16_t, ++ z0 = svmls_f16_m (p0, z1, z2, z3), ++ z0 = svmls_m (p0, z1, z2, z3)) ++ ++/* ++** mls_h4_f16_m_tied1: ++** mov (z[0-9]+\.h), h4 ++** fmls z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mls_h4_f16_m_tied1, svfloat16_t, __fp16, ++ z0 = svmls_n_f16_m (p0, z0, z1, d4), ++ z0 = svmls_m (p0, z0, z1, d4)) ++ ++/* ++** mls_h4_f16_m_untied: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0, z1 ++** fmls z0\.h, p0/m, z2\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mls_h4_f16_m_untied, svfloat16_t, __fp16, ++ z0 = svmls_n_f16_m (p0, z1, z2, d4), ++ z0 = svmls_m (p0, z1, z2, d4)) ++ ++/* ++** mls_2_f16_m_tied1: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** fmls z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mls_2_f16_m_tied1, svfloat16_t, ++ z0 = svmls_n_f16_m (p0, z0, z1, 2), ++ z0 = svmls_m (p0, z0, z1, 2)) ++ ++/* ++** mls_2_f16_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** movprfx z0, z1 ++** fmls z0\.h, p0/m, z2\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mls_2_f16_m_untied, svfloat16_t, ++ z0 = svmls_n_f16_m (p0, z1, z2, 2), ++ z0 = svmls_m (p0, z1, z2, 2)) ++ ++/* ++** mls_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fmls z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mls_f16_z_tied1, svfloat16_t, ++ z0 = svmls_f16_z (p0, z0, z1, z2), ++ z0 = svmls_z (p0, z0, z1, z2)) ++ ++/* ++** mls_f16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** fmsb z0\.h, p0/m, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mls_f16_z_tied2, svfloat16_t, ++ z0 = svmls_f16_z (p0, z1, z0, z2), ++ z0 = svmls_z (p0, z1, z0, z2)) ++ ++/* ++** mls_f16_z_tied3: ++** movprfx z0\.h, p0/z, z0\.h ++** fmsb z0\.h, p0/m, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mls_f16_z_tied3, svfloat16_t, ++ z0 = svmls_f16_z (p0, z1, z2, z0), ++ z0 = svmls_z (p0, z1, z2, z0)) ++ ++/* ++** mls_f16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fmls z0\.h, p0/m, z2\.h, z3\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** fmsb z0\.h, p0/m, z3\.h, z1\.h ++** | ++** movprfx z0\.h, p0/z, z3\.h ++** fmsb z0\.h, p0/m, z2\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mls_f16_z_untied, svfloat16_t, ++ z0 = svmls_f16_z (p0, z1, z2, z3), ++ z0 = svmls_z (p0, z1, z2, z3)) ++ ++/* ++** mls_h4_f16_z_tied1: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0\.h, p0/z, z0\.h ++** fmls z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mls_h4_f16_z_tied1, svfloat16_t, __fp16, ++ z0 = svmls_n_f16_z (p0, z0, z1, d4), ++ z0 = svmls_z (p0, z0, z1, d4)) ++ ++/* ++** mls_h4_f16_z_tied2: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0\.h, p0/z, z0\.h ++** fmsb z0\.h, p0/m, \1, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZD (mls_h4_f16_z_tied2, svfloat16_t, __fp16, ++ z0 = svmls_n_f16_z (p0, z1, z0, d4), ++ z0 = svmls_z (p0, z1, z0, d4)) ++ ++/* ++** mls_h4_f16_z_untied: ++** mov (z[0-9]+\.h), h4 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fmls z0\.h, p0/m, z2\.h, \1 ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** fmsb z0\.h, p0/m, \1, z1\.h ++** | ++** movprfx z0\.h, p0/z, \1 ++** fmsb z0\.h, p0/m, z2\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (mls_h4_f16_z_untied, svfloat16_t, __fp16, ++ z0 = svmls_n_f16_z (p0, z1, z2, d4), ++ z0 = svmls_z (p0, z1, z2, d4)) ++ ++/* ++** mls_2_f16_z_tied1: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** movprfx z0\.h, p0/z, z0\.h ++** fmls z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mls_2_f16_z_tied1, svfloat16_t, ++ z0 = svmls_n_f16_z (p0, z0, z1, 2), ++ z0 = svmls_z (p0, z0, z1, 2)) ++ ++/* ++** mls_2_f16_z_tied2: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** movprfx z0\.h, p0/z, z0\.h ++** fmsb z0\.h, p0/m, \1, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mls_2_f16_z_tied2, svfloat16_t, ++ z0 = svmls_n_f16_z (p0, z1, z0, 2), ++ z0 = svmls_z (p0, z1, z0, 2)) ++ ++/* ++** mls_2_f16_z_untied: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fmls z0\.h, p0/m, z2\.h, \1 ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** fmsb z0\.h, p0/m, \1, z1\.h ++** | ++** movprfx z0\.h, p0/z, \1 ++** fmsb z0\.h, p0/m, z2\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mls_2_f16_z_untied, svfloat16_t, ++ z0 = svmls_n_f16_z (p0, z1, z2, 2), ++ z0 = svmls_z (p0, z1, z2, 2)) ++ ++/* ++** mls_f16_x_tied1: ++** fmls z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mls_f16_x_tied1, svfloat16_t, ++ z0 = svmls_f16_x (p0, z0, z1, z2), ++ z0 = svmls_x (p0, z0, z1, z2)) ++ ++/* ++** mls_f16_x_tied2: ++** fmsb z0\.h, p0/m, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mls_f16_x_tied2, svfloat16_t, ++ z0 = svmls_f16_x (p0, z1, z0, z2), ++ z0 = svmls_x (p0, z1, z0, z2)) ++ ++/* ++** mls_f16_x_tied3: ++** fmsb z0\.h, p0/m, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mls_f16_x_tied3, svfloat16_t, ++ z0 = svmls_f16_x (p0, z1, z2, z0), ++ z0 = svmls_x (p0, z1, z2, z0)) ++ ++/* ++** mls_f16_x_untied: ++** ( ++** movprfx z0, z1 ++** fmls z0\.h, p0/m, z2\.h, z3\.h ++** | ++** movprfx z0, z2 ++** fmsb z0\.h, p0/m, z3\.h, z1\.h ++** | ++** movprfx z0, z3 ++** fmsb z0\.h, p0/m, z2\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mls_f16_x_untied, svfloat16_t, ++ z0 = svmls_f16_x (p0, z1, z2, z3), ++ z0 = svmls_x (p0, z1, z2, z3)) ++ ++/* ++** mls_h4_f16_x_tied1: ++** mov (z[0-9]+\.h), h4 ++** fmls z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mls_h4_f16_x_tied1, svfloat16_t, __fp16, ++ z0 = svmls_n_f16_x (p0, z0, z1, d4), ++ z0 = svmls_x (p0, z0, z1, d4)) ++ ++/* ++** mls_h4_f16_x_tied2: ++** mov (z[0-9]+\.h), h4 ++** fmsb z0\.h, p0/m, \1, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZD (mls_h4_f16_x_tied2, svfloat16_t, __fp16, ++ z0 = svmls_n_f16_x (p0, z1, z0, d4), ++ z0 = svmls_x (p0, z1, z0, d4)) ++ ++/* ++** mls_h4_f16_x_untied: { xfail *-*-* } ++** mov z0\.h, h4 ++** fmsb z0\.h, p0/m, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZD (mls_h4_f16_x_untied, svfloat16_t, __fp16, ++ z0 = svmls_n_f16_x (p0, z1, z2, d4), ++ z0 = svmls_x (p0, z1, z2, d4)) ++ ++/* ++** mls_2_f16_x_tied1: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** fmls z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mls_2_f16_x_tied1, svfloat16_t, ++ z0 = svmls_n_f16_x (p0, z0, z1, 2), ++ z0 = svmls_x (p0, z0, z1, 2)) ++ ++/* ++** mls_2_f16_x_tied2: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** fmsb z0\.h, p0/m, \1, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mls_2_f16_x_tied2, svfloat16_t, ++ z0 = svmls_n_f16_x (p0, z1, z0, 2), ++ z0 = svmls_x (p0, z1, z0, 2)) ++ ++/* ++** mls_2_f16_x_untied: ++** fmov z0\.h, #2\.0(?:e\+0)? ++** fmsb z0\.h, p0/m, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mls_2_f16_x_untied, svfloat16_t, ++ z0 = svmls_n_f16_x (p0, z1, z2, 2), ++ z0 = svmls_x (p0, z1, z2, 2)) ++ ++/* ++** ptrue_mls_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mls_f16_x_tied1, svfloat16_t, ++ z0 = svmls_f16_x (svptrue_b16 (), z0, z1, z2), ++ z0 = svmls_x (svptrue_b16 (), z0, z1, z2)) ++ ++/* ++** ptrue_mls_f16_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mls_f16_x_tied2, svfloat16_t, ++ z0 = svmls_f16_x (svptrue_b16 (), z1, z0, z2), ++ z0 = svmls_x (svptrue_b16 (), z1, z0, z2)) ++ ++/* ++** ptrue_mls_f16_x_tied3: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mls_f16_x_tied3, svfloat16_t, ++ z0 = svmls_f16_x (svptrue_b16 (), z1, z2, z0), ++ z0 = svmls_x (svptrue_b16 (), z1, z2, z0)) ++ ++/* ++** ptrue_mls_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mls_f16_x_untied, svfloat16_t, ++ z0 = svmls_f16_x (svptrue_b16 (), z1, z2, z3), ++ z0 = svmls_x (svptrue_b16 (), z1, z2, z3)) ++ ++/* ++** ptrue_mls_2_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mls_2_f16_x_tied1, svfloat16_t, ++ z0 = svmls_n_f16_x (svptrue_b16 (), z0, z1, 2), ++ z0 = svmls_x (svptrue_b16 (), z0, z1, 2)) ++ ++/* ++** ptrue_mls_2_f16_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mls_2_f16_x_tied2, svfloat16_t, ++ z0 = svmls_n_f16_x (svptrue_b16 (), z1, z0, 2), ++ z0 = svmls_x (svptrue_b16 (), z1, z0, 2)) ++ ++/* ++** ptrue_mls_2_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mls_2_f16_x_untied, svfloat16_t, ++ z0 = svmls_n_f16_x (svptrue_b16 (), z1, z2, 2), ++ z0 = svmls_x (svptrue_b16 (), z1, z2, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_f32.c +new file mode 100644 +index 000000000..04ce1ec46 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_f32.c +@@ -0,0 +1,398 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mls_f32_m_tied1: ++** fmls z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mls_f32_m_tied1, svfloat32_t, ++ z0 = svmls_f32_m (p0, z0, z1, z2), ++ z0 = svmls_m (p0, z0, z1, z2)) ++ ++/* ++** mls_f32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fmls z0\.s, p0/m, \1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mls_f32_m_tied2, svfloat32_t, ++ z0 = svmls_f32_m (p0, z1, z0, z2), ++ z0 = svmls_m (p0, z1, z0, z2)) ++ ++/* ++** mls_f32_m_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fmls z0\.s, p0/m, z2\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mls_f32_m_tied3, svfloat32_t, ++ z0 = svmls_f32_m (p0, z1, z2, z0), ++ z0 = svmls_m (p0, z1, z2, z0)) ++ ++/* ++** mls_f32_m_untied: ++** movprfx z0, z1 ++** fmls z0\.s, p0/m, z2\.s, z3\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mls_f32_m_untied, svfloat32_t, ++ z0 = svmls_f32_m (p0, z1, z2, z3), ++ z0 = svmls_m (p0, z1, z2, z3)) ++ ++/* ++** mls_s4_f32_m_tied1: ++** mov (z[0-9]+\.s), s4 ++** fmls z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mls_s4_f32_m_tied1, svfloat32_t, float, ++ z0 = svmls_n_f32_m (p0, z0, z1, d4), ++ z0 = svmls_m (p0, z0, z1, d4)) ++ ++/* ++** mls_s4_f32_m_untied: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0, z1 ++** fmls z0\.s, p0/m, z2\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mls_s4_f32_m_untied, svfloat32_t, float, ++ z0 = svmls_n_f32_m (p0, z1, z2, d4), ++ z0 = svmls_m (p0, z1, z2, d4)) ++ ++/* ++** mls_2_f32_m_tied1: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** fmls z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mls_2_f32_m_tied1, svfloat32_t, ++ z0 = svmls_n_f32_m (p0, z0, z1, 2), ++ z0 = svmls_m (p0, z0, z1, 2)) ++ ++/* ++** mls_2_f32_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** movprfx z0, z1 ++** fmls z0\.s, p0/m, z2\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mls_2_f32_m_untied, svfloat32_t, ++ z0 = svmls_n_f32_m (p0, z1, z2, 2), ++ z0 = svmls_m (p0, z1, z2, 2)) ++ ++/* ++** mls_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fmls z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mls_f32_z_tied1, svfloat32_t, ++ z0 = svmls_f32_z (p0, z0, z1, z2), ++ z0 = svmls_z (p0, z0, z1, z2)) ++ ++/* ++** mls_f32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** fmsb z0\.s, p0/m, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mls_f32_z_tied2, svfloat32_t, ++ z0 = svmls_f32_z (p0, z1, z0, z2), ++ z0 = svmls_z (p0, z1, z0, z2)) ++ ++/* ++** mls_f32_z_tied3: ++** movprfx z0\.s, p0/z, z0\.s ++** fmsb z0\.s, p0/m, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mls_f32_z_tied3, svfloat32_t, ++ z0 = svmls_f32_z (p0, z1, z2, z0), ++ z0 = svmls_z (p0, z1, z2, z0)) ++ ++/* ++** mls_f32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fmls z0\.s, p0/m, z2\.s, z3\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** fmsb z0\.s, p0/m, z3\.s, z1\.s ++** | ++** movprfx z0\.s, p0/z, z3\.s ++** fmsb z0\.s, p0/m, z2\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mls_f32_z_untied, svfloat32_t, ++ z0 = svmls_f32_z (p0, z1, z2, z3), ++ z0 = svmls_z (p0, z1, z2, z3)) ++ ++/* ++** mls_s4_f32_z_tied1: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0\.s, p0/z, z0\.s ++** fmls z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mls_s4_f32_z_tied1, svfloat32_t, float, ++ z0 = svmls_n_f32_z (p0, z0, z1, d4), ++ z0 = svmls_z (p0, z0, z1, d4)) ++ ++/* ++** mls_s4_f32_z_tied2: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0\.s, p0/z, z0\.s ++** fmsb z0\.s, p0/m, \1, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZD (mls_s4_f32_z_tied2, svfloat32_t, float, ++ z0 = svmls_n_f32_z (p0, z1, z0, d4), ++ z0 = svmls_z (p0, z1, z0, d4)) ++ ++/* ++** mls_s4_f32_z_untied: ++** mov (z[0-9]+\.s), s4 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fmls z0\.s, p0/m, z2\.s, \1 ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** fmsb z0\.s, p0/m, \1, z1\.s ++** | ++** movprfx z0\.s, p0/z, \1 ++** fmsb z0\.s, p0/m, z2\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (mls_s4_f32_z_untied, svfloat32_t, float, ++ z0 = svmls_n_f32_z (p0, z1, z2, d4), ++ z0 = svmls_z (p0, z1, z2, d4)) ++ ++/* ++** mls_2_f32_z_tied1: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** movprfx z0\.s, p0/z, z0\.s ++** fmls z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mls_2_f32_z_tied1, svfloat32_t, ++ z0 = svmls_n_f32_z (p0, z0, z1, 2), ++ z0 = svmls_z (p0, z0, z1, 2)) ++ ++/* ++** mls_2_f32_z_tied2: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** movprfx z0\.s, p0/z, z0\.s ++** fmsb z0\.s, p0/m, \1, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mls_2_f32_z_tied2, svfloat32_t, ++ z0 = svmls_n_f32_z (p0, z1, z0, 2), ++ z0 = svmls_z (p0, z1, z0, 2)) ++ ++/* ++** mls_2_f32_z_untied: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fmls z0\.s, p0/m, z2\.s, \1 ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** fmsb z0\.s, p0/m, \1, z1\.s ++** | ++** movprfx z0\.s, p0/z, \1 ++** fmsb z0\.s, p0/m, z2\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mls_2_f32_z_untied, svfloat32_t, ++ z0 = svmls_n_f32_z (p0, z1, z2, 2), ++ z0 = svmls_z (p0, z1, z2, 2)) ++ ++/* ++** mls_f32_x_tied1: ++** fmls z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mls_f32_x_tied1, svfloat32_t, ++ z0 = svmls_f32_x (p0, z0, z1, z2), ++ z0 = svmls_x (p0, z0, z1, z2)) ++ ++/* ++** mls_f32_x_tied2: ++** fmsb z0\.s, p0/m, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mls_f32_x_tied2, svfloat32_t, ++ z0 = svmls_f32_x (p0, z1, z0, z2), ++ z0 = svmls_x (p0, z1, z0, z2)) ++ ++/* ++** mls_f32_x_tied3: ++** fmsb z0\.s, p0/m, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mls_f32_x_tied3, svfloat32_t, ++ z0 = svmls_f32_x (p0, z1, z2, z0), ++ z0 = svmls_x (p0, z1, z2, z0)) ++ ++/* ++** mls_f32_x_untied: ++** ( ++** movprfx z0, z1 ++** fmls z0\.s, p0/m, z2\.s, z3\.s ++** | ++** movprfx z0, z2 ++** fmsb z0\.s, p0/m, z3\.s, z1\.s ++** | ++** movprfx z0, z3 ++** fmsb z0\.s, p0/m, z2\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mls_f32_x_untied, svfloat32_t, ++ z0 = svmls_f32_x (p0, z1, z2, z3), ++ z0 = svmls_x (p0, z1, z2, z3)) ++ ++/* ++** mls_s4_f32_x_tied1: ++** mov (z[0-9]+\.s), s4 ++** fmls z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mls_s4_f32_x_tied1, svfloat32_t, float, ++ z0 = svmls_n_f32_x (p0, z0, z1, d4), ++ z0 = svmls_x (p0, z0, z1, d4)) ++ ++/* ++** mls_s4_f32_x_tied2: ++** mov (z[0-9]+\.s), s4 ++** fmsb z0\.s, p0/m, \1, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZD (mls_s4_f32_x_tied2, svfloat32_t, float, ++ z0 = svmls_n_f32_x (p0, z1, z0, d4), ++ z0 = svmls_x (p0, z1, z0, d4)) ++ ++/* ++** mls_s4_f32_x_untied: { xfail *-*-* } ++** mov z0\.s, s4 ++** fmsb z0\.s, p0/m, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZD (mls_s4_f32_x_untied, svfloat32_t, float, ++ z0 = svmls_n_f32_x (p0, z1, z2, d4), ++ z0 = svmls_x (p0, z1, z2, d4)) ++ ++/* ++** mls_2_f32_x_tied1: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** fmls z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mls_2_f32_x_tied1, svfloat32_t, ++ z0 = svmls_n_f32_x (p0, z0, z1, 2), ++ z0 = svmls_x (p0, z0, z1, 2)) ++ ++/* ++** mls_2_f32_x_tied2: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** fmsb z0\.s, p0/m, \1, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mls_2_f32_x_tied2, svfloat32_t, ++ z0 = svmls_n_f32_x (p0, z1, z0, 2), ++ z0 = svmls_x (p0, z1, z0, 2)) ++ ++/* ++** mls_2_f32_x_untied: ++** fmov z0\.s, #2\.0(?:e\+0)? ++** fmsb z0\.s, p0/m, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mls_2_f32_x_untied, svfloat32_t, ++ z0 = svmls_n_f32_x (p0, z1, z2, 2), ++ z0 = svmls_x (p0, z1, z2, 2)) ++ ++/* ++** ptrue_mls_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mls_f32_x_tied1, svfloat32_t, ++ z0 = svmls_f32_x (svptrue_b32 (), z0, z1, z2), ++ z0 = svmls_x (svptrue_b32 (), z0, z1, z2)) ++ ++/* ++** ptrue_mls_f32_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mls_f32_x_tied2, svfloat32_t, ++ z0 = svmls_f32_x (svptrue_b32 (), z1, z0, z2), ++ z0 = svmls_x (svptrue_b32 (), z1, z0, z2)) ++ ++/* ++** ptrue_mls_f32_x_tied3: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mls_f32_x_tied3, svfloat32_t, ++ z0 = svmls_f32_x (svptrue_b32 (), z1, z2, z0), ++ z0 = svmls_x (svptrue_b32 (), z1, z2, z0)) ++ ++/* ++** ptrue_mls_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mls_f32_x_untied, svfloat32_t, ++ z0 = svmls_f32_x (svptrue_b32 (), z1, z2, z3), ++ z0 = svmls_x (svptrue_b32 (), z1, z2, z3)) ++ ++/* ++** ptrue_mls_2_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mls_2_f32_x_tied1, svfloat32_t, ++ z0 = svmls_n_f32_x (svptrue_b32 (), z0, z1, 2), ++ z0 = svmls_x (svptrue_b32 (), z0, z1, 2)) ++ ++/* ++** ptrue_mls_2_f32_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mls_2_f32_x_tied2, svfloat32_t, ++ z0 = svmls_n_f32_x (svptrue_b32 (), z1, z0, 2), ++ z0 = svmls_x (svptrue_b32 (), z1, z0, 2)) ++ ++/* ++** ptrue_mls_2_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mls_2_f32_x_untied, svfloat32_t, ++ z0 = svmls_n_f32_x (svptrue_b32 (), z1, z2, 2), ++ z0 = svmls_x (svptrue_b32 (), z1, z2, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_f64.c +new file mode 100644 +index 000000000..1e2108af6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_f64.c +@@ -0,0 +1,398 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mls_f64_m_tied1: ++** fmls z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mls_f64_m_tied1, svfloat64_t, ++ z0 = svmls_f64_m (p0, z0, z1, z2), ++ z0 = svmls_m (p0, z0, z1, z2)) ++ ++/* ++** mls_f64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fmls z0\.d, p0/m, \1, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mls_f64_m_tied2, svfloat64_t, ++ z0 = svmls_f64_m (p0, z1, z0, z2), ++ z0 = svmls_m (p0, z1, z0, z2)) ++ ++/* ++** mls_f64_m_tied3: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fmls z0\.d, p0/m, z2\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mls_f64_m_tied3, svfloat64_t, ++ z0 = svmls_f64_m (p0, z1, z2, z0), ++ z0 = svmls_m (p0, z1, z2, z0)) ++ ++/* ++** mls_f64_m_untied: ++** movprfx z0, z1 ++** fmls z0\.d, p0/m, z2\.d, z3\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mls_f64_m_untied, svfloat64_t, ++ z0 = svmls_f64_m (p0, z1, z2, z3), ++ z0 = svmls_m (p0, z1, z2, z3)) ++ ++/* ++** mls_d4_f64_m_tied1: ++** mov (z[0-9]+\.d), d4 ++** fmls z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mls_d4_f64_m_tied1, svfloat64_t, double, ++ z0 = svmls_n_f64_m (p0, z0, z1, d4), ++ z0 = svmls_m (p0, z0, z1, d4)) ++ ++/* ++** mls_d4_f64_m_untied: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0, z1 ++** fmls z0\.d, p0/m, z2\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mls_d4_f64_m_untied, svfloat64_t, double, ++ z0 = svmls_n_f64_m (p0, z1, z2, d4), ++ z0 = svmls_m (p0, z1, z2, d4)) ++ ++/* ++** mls_2_f64_m_tied1: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** fmls z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mls_2_f64_m_tied1, svfloat64_t, ++ z0 = svmls_n_f64_m (p0, z0, z1, 2), ++ z0 = svmls_m (p0, z0, z1, 2)) ++ ++/* ++** mls_2_f64_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** movprfx z0, z1 ++** fmls z0\.d, p0/m, z2\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mls_2_f64_m_untied, svfloat64_t, ++ z0 = svmls_n_f64_m (p0, z1, z2, 2), ++ z0 = svmls_m (p0, z1, z2, 2)) ++ ++/* ++** mls_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fmls z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mls_f64_z_tied1, svfloat64_t, ++ z0 = svmls_f64_z (p0, z0, z1, z2), ++ z0 = svmls_z (p0, z0, z1, z2)) ++ ++/* ++** mls_f64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** fmsb z0\.d, p0/m, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mls_f64_z_tied2, svfloat64_t, ++ z0 = svmls_f64_z (p0, z1, z0, z2), ++ z0 = svmls_z (p0, z1, z0, z2)) ++ ++/* ++** mls_f64_z_tied3: ++** movprfx z0\.d, p0/z, z0\.d ++** fmsb z0\.d, p0/m, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mls_f64_z_tied3, svfloat64_t, ++ z0 = svmls_f64_z (p0, z1, z2, z0), ++ z0 = svmls_z (p0, z1, z2, z0)) ++ ++/* ++** mls_f64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fmls z0\.d, p0/m, z2\.d, z3\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** fmsb z0\.d, p0/m, z3\.d, z1\.d ++** | ++** movprfx z0\.d, p0/z, z3\.d ++** fmsb z0\.d, p0/m, z2\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mls_f64_z_untied, svfloat64_t, ++ z0 = svmls_f64_z (p0, z1, z2, z3), ++ z0 = svmls_z (p0, z1, z2, z3)) ++ ++/* ++** mls_d4_f64_z_tied1: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0\.d, p0/z, z0\.d ++** fmls z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mls_d4_f64_z_tied1, svfloat64_t, double, ++ z0 = svmls_n_f64_z (p0, z0, z1, d4), ++ z0 = svmls_z (p0, z0, z1, d4)) ++ ++/* ++** mls_d4_f64_z_tied2: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0\.d, p0/z, z0\.d ++** fmsb z0\.d, p0/m, \1, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZD (mls_d4_f64_z_tied2, svfloat64_t, double, ++ z0 = svmls_n_f64_z (p0, z1, z0, d4), ++ z0 = svmls_z (p0, z1, z0, d4)) ++ ++/* ++** mls_d4_f64_z_untied: ++** mov (z[0-9]+\.d), d4 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fmls z0\.d, p0/m, z2\.d, \1 ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** fmsb z0\.d, p0/m, \1, z1\.d ++** | ++** movprfx z0\.d, p0/z, \1 ++** fmsb z0\.d, p0/m, z2\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (mls_d4_f64_z_untied, svfloat64_t, double, ++ z0 = svmls_n_f64_z (p0, z1, z2, d4), ++ z0 = svmls_z (p0, z1, z2, d4)) ++ ++/* ++** mls_2_f64_z_tied1: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** movprfx z0\.d, p0/z, z0\.d ++** fmls z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mls_2_f64_z_tied1, svfloat64_t, ++ z0 = svmls_n_f64_z (p0, z0, z1, 2), ++ z0 = svmls_z (p0, z0, z1, 2)) ++ ++/* ++** mls_2_f64_z_tied2: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** movprfx z0\.d, p0/z, z0\.d ++** fmsb z0\.d, p0/m, \1, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mls_2_f64_z_tied2, svfloat64_t, ++ z0 = svmls_n_f64_z (p0, z1, z0, 2), ++ z0 = svmls_z (p0, z1, z0, 2)) ++ ++/* ++** mls_2_f64_z_untied: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fmls z0\.d, p0/m, z2\.d, \1 ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** fmsb z0\.d, p0/m, \1, z1\.d ++** | ++** movprfx z0\.d, p0/z, \1 ++** fmsb z0\.d, p0/m, z2\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mls_2_f64_z_untied, svfloat64_t, ++ z0 = svmls_n_f64_z (p0, z1, z2, 2), ++ z0 = svmls_z (p0, z1, z2, 2)) ++ ++/* ++** mls_f64_x_tied1: ++** fmls z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mls_f64_x_tied1, svfloat64_t, ++ z0 = svmls_f64_x (p0, z0, z1, z2), ++ z0 = svmls_x (p0, z0, z1, z2)) ++ ++/* ++** mls_f64_x_tied2: ++** fmsb z0\.d, p0/m, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mls_f64_x_tied2, svfloat64_t, ++ z0 = svmls_f64_x (p0, z1, z0, z2), ++ z0 = svmls_x (p0, z1, z0, z2)) ++ ++/* ++** mls_f64_x_tied3: ++** fmsb z0\.d, p0/m, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mls_f64_x_tied3, svfloat64_t, ++ z0 = svmls_f64_x (p0, z1, z2, z0), ++ z0 = svmls_x (p0, z1, z2, z0)) ++ ++/* ++** mls_f64_x_untied: ++** ( ++** movprfx z0, z1 ++** fmls z0\.d, p0/m, z2\.d, z3\.d ++** | ++** movprfx z0, z2 ++** fmsb z0\.d, p0/m, z3\.d, z1\.d ++** | ++** movprfx z0, z3 ++** fmsb z0\.d, p0/m, z2\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mls_f64_x_untied, svfloat64_t, ++ z0 = svmls_f64_x (p0, z1, z2, z3), ++ z0 = svmls_x (p0, z1, z2, z3)) ++ ++/* ++** mls_d4_f64_x_tied1: ++** mov (z[0-9]+\.d), d4 ++** fmls z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mls_d4_f64_x_tied1, svfloat64_t, double, ++ z0 = svmls_n_f64_x (p0, z0, z1, d4), ++ z0 = svmls_x (p0, z0, z1, d4)) ++ ++/* ++** mls_d4_f64_x_tied2: ++** mov (z[0-9]+\.d), d4 ++** fmsb z0\.d, p0/m, \1, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZD (mls_d4_f64_x_tied2, svfloat64_t, double, ++ z0 = svmls_n_f64_x (p0, z1, z0, d4), ++ z0 = svmls_x (p0, z1, z0, d4)) ++ ++/* ++** mls_d4_f64_x_untied: { xfail *-*-* } ++** mov z0\.d, d4 ++** fmsb z0\.d, p0/m, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZD (mls_d4_f64_x_untied, svfloat64_t, double, ++ z0 = svmls_n_f64_x (p0, z1, z2, d4), ++ z0 = svmls_x (p0, z1, z2, d4)) ++ ++/* ++** mls_2_f64_x_tied1: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** fmls z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mls_2_f64_x_tied1, svfloat64_t, ++ z0 = svmls_n_f64_x (p0, z0, z1, 2), ++ z0 = svmls_x (p0, z0, z1, 2)) ++ ++/* ++** mls_2_f64_x_tied2: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** fmsb z0\.d, p0/m, \1, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mls_2_f64_x_tied2, svfloat64_t, ++ z0 = svmls_n_f64_x (p0, z1, z0, 2), ++ z0 = svmls_x (p0, z1, z0, 2)) ++ ++/* ++** mls_2_f64_x_untied: ++** fmov z0\.d, #2\.0(?:e\+0)? ++** fmsb z0\.d, p0/m, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mls_2_f64_x_untied, svfloat64_t, ++ z0 = svmls_n_f64_x (p0, z1, z2, 2), ++ z0 = svmls_x (p0, z1, z2, 2)) ++ ++/* ++** ptrue_mls_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mls_f64_x_tied1, svfloat64_t, ++ z0 = svmls_f64_x (svptrue_b64 (), z0, z1, z2), ++ z0 = svmls_x (svptrue_b64 (), z0, z1, z2)) ++ ++/* ++** ptrue_mls_f64_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mls_f64_x_tied2, svfloat64_t, ++ z0 = svmls_f64_x (svptrue_b64 (), z1, z0, z2), ++ z0 = svmls_x (svptrue_b64 (), z1, z0, z2)) ++ ++/* ++** ptrue_mls_f64_x_tied3: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mls_f64_x_tied3, svfloat64_t, ++ z0 = svmls_f64_x (svptrue_b64 (), z1, z2, z0), ++ z0 = svmls_x (svptrue_b64 (), z1, z2, z0)) ++ ++/* ++** ptrue_mls_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mls_f64_x_untied, svfloat64_t, ++ z0 = svmls_f64_x (svptrue_b64 (), z1, z2, z3), ++ z0 = svmls_x (svptrue_b64 (), z1, z2, z3)) ++ ++/* ++** ptrue_mls_2_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mls_2_f64_x_tied1, svfloat64_t, ++ z0 = svmls_n_f64_x (svptrue_b64 (), z0, z1, 2), ++ z0 = svmls_x (svptrue_b64 (), z0, z1, 2)) ++ ++/* ++** ptrue_mls_2_f64_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mls_2_f64_x_tied2, svfloat64_t, ++ z0 = svmls_n_f64_x (svptrue_b64 (), z1, z0, 2), ++ z0 = svmls_x (svptrue_b64 (), z1, z0, 2)) ++ ++/* ++** ptrue_mls_2_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mls_2_f64_x_untied, svfloat64_t, ++ z0 = svmls_n_f64_x (svptrue_b64 (), z1, z2, 2), ++ z0 = svmls_x (svptrue_b64 (), z1, z2, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_lane_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_lane_f16.c +new file mode 100644 +index 000000000..832376d0b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_lane_f16.c +@@ -0,0 +1,128 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mls_lane_0_f16_tied1: ++** fmls z0\.h, z1\.h, z2\.h\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (mls_lane_0_f16_tied1, svfloat16_t, ++ z0 = svmls_lane_f16 (z0, z1, z2, 0), ++ z0 = svmls_lane (z0, z1, z2, 0)) ++ ++/* ++** mls_lane_0_f16_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fmls z0\.h, \1\.h, z2\.h\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (mls_lane_0_f16_tied2, svfloat16_t, ++ z0 = svmls_lane_f16 (z1, z0, z2, 0), ++ z0 = svmls_lane (z1, z0, z2, 0)) ++ ++/* ++** mls_lane_0_f16_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fmls z0\.h, z2\.h, \1\.h\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (mls_lane_0_f16_tied3, svfloat16_t, ++ z0 = svmls_lane_f16 (z1, z2, z0, 0), ++ z0 = svmls_lane (z1, z2, z0, 0)) ++ ++/* ++** mls_lane_0_f16_untied: ++** movprfx z0, z1 ++** fmls z0\.h, z2\.h, z3\.h\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (mls_lane_0_f16_untied, svfloat16_t, ++ z0 = svmls_lane_f16 (z1, z2, z3, 0), ++ z0 = svmls_lane (z1, z2, z3, 0)) ++ ++/* ++** mls_lane_1_f16: ++** fmls z0\.h, z1\.h, z2\.h\[1\] ++** ret ++*/ ++TEST_UNIFORM_Z (mls_lane_1_f16, svfloat16_t, ++ z0 = svmls_lane_f16 (z0, z1, z2, 1), ++ z0 = svmls_lane (z0, z1, z2, 1)) ++ ++/* ++** mls_lane_2_f16: ++** fmls z0\.h, z1\.h, z2\.h\[2\] ++** ret ++*/ ++TEST_UNIFORM_Z (mls_lane_2_f16, svfloat16_t, ++ z0 = svmls_lane_f16 (z0, z1, z2, 2), ++ z0 = svmls_lane (z0, z1, z2, 2)) ++ ++/* ++** mls_lane_3_f16: ++** fmls z0\.h, z1\.h, z2\.h\[3\] ++** ret ++*/ ++TEST_UNIFORM_Z (mls_lane_3_f16, svfloat16_t, ++ z0 = svmls_lane_f16 (z0, z1, z2, 3), ++ z0 = svmls_lane (z0, z1, z2, 3)) ++ ++/* ++** mls_lane_4_f16: ++** fmls z0\.h, z1\.h, z2\.h\[4\] ++** ret ++*/ ++TEST_UNIFORM_Z (mls_lane_4_f16, svfloat16_t, ++ z0 = svmls_lane_f16 (z0, z1, z2, 4), ++ z0 = svmls_lane (z0, z1, z2, 4)) ++ ++/* ++** mls_lane_5_f16: ++** fmls z0\.h, z1\.h, z2\.h\[5\] ++** ret ++*/ ++TEST_UNIFORM_Z (mls_lane_5_f16, svfloat16_t, ++ z0 = svmls_lane_f16 (z0, z1, z2, 5), ++ z0 = svmls_lane (z0, z1, z2, 5)) ++ ++/* ++** mls_lane_6_f16: ++** fmls z0\.h, z1\.h, z2\.h\[6\] ++** ret ++*/ ++TEST_UNIFORM_Z (mls_lane_6_f16, svfloat16_t, ++ z0 = svmls_lane_f16 (z0, z1, z2, 6), ++ z0 = svmls_lane (z0, z1, z2, 6)) ++ ++/* ++** mls_lane_7_f16: ++** fmls z0\.h, z1\.h, z2\.h\[7\] ++** ret ++*/ ++TEST_UNIFORM_Z (mls_lane_7_f16, svfloat16_t, ++ z0 = svmls_lane_f16 (z0, z1, z2, 7), ++ z0 = svmls_lane (z0, z1, z2, 7)) ++ ++/* ++** mls_lane_z7_f16: ++** fmls z0\.h, z1\.h, z7\.h\[7\] ++** ret ++*/ ++TEST_DUAL_Z (mls_lane_z7_f16, svfloat16_t, svfloat16_t, ++ z0 = svmls_lane_f16 (z0, z1, z7, 7), ++ z0 = svmls_lane (z0, z1, z7, 7)) ++ ++/* ++** mls_lane_z8_f16: ++** str d8, \[sp, -16\]! ++** mov (z[0-7])\.d, z8\.d ++** fmls z0\.h, z1\.h, \1\.h\[7\] ++** ldr d8, \[sp\], 16 ++** ret ++*/ ++TEST_DUAL_LANE_REG (mls_lane_z8_f16, svfloat16_t, svfloat16_t, z8, ++ z0 = svmls_lane_f16 (z0, z1, z8, 7), ++ z0 = svmls_lane (z0, z1, z8, 7)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_lane_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_lane_f32.c +new file mode 100644 +index 000000000..3244b972f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_lane_f32.c +@@ -0,0 +1,92 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mls_lane_0_f32_tied1: ++** fmls z0\.s, z1\.s, z2\.s\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (mls_lane_0_f32_tied1, svfloat32_t, ++ z0 = svmls_lane_f32 (z0, z1, z2, 0), ++ z0 = svmls_lane (z0, z1, z2, 0)) ++ ++/* ++** mls_lane_0_f32_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fmls z0\.s, \1\.s, z2\.s\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (mls_lane_0_f32_tied2, svfloat32_t, ++ z0 = svmls_lane_f32 (z1, z0, z2, 0), ++ z0 = svmls_lane (z1, z0, z2, 0)) ++ ++/* ++** mls_lane_0_f32_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fmls z0\.s, z2\.s, \1\.s\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (mls_lane_0_f32_tied3, svfloat32_t, ++ z0 = svmls_lane_f32 (z1, z2, z0, 0), ++ z0 = svmls_lane (z1, z2, z0, 0)) ++ ++/* ++** mls_lane_0_f32_untied: ++** movprfx z0, z1 ++** fmls z0\.s, z2\.s, z3\.s\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (mls_lane_0_f32_untied, svfloat32_t, ++ z0 = svmls_lane_f32 (z1, z2, z3, 0), ++ z0 = svmls_lane (z1, z2, z3, 0)) ++ ++/* ++** mls_lane_1_f32: ++** fmls z0\.s, z1\.s, z2\.s\[1\] ++** ret ++*/ ++TEST_UNIFORM_Z (mls_lane_1_f32, svfloat32_t, ++ z0 = svmls_lane_f32 (z0, z1, z2, 1), ++ z0 = svmls_lane (z0, z1, z2, 1)) ++ ++/* ++** mls_lane_2_f32: ++** fmls z0\.s, z1\.s, z2\.s\[2\] ++** ret ++*/ ++TEST_UNIFORM_Z (mls_lane_2_f32, svfloat32_t, ++ z0 = svmls_lane_f32 (z0, z1, z2, 2), ++ z0 = svmls_lane (z0, z1, z2, 2)) ++ ++/* ++** mls_lane_3_f32: ++** fmls z0\.s, z1\.s, z2\.s\[3\] ++** ret ++*/ ++TEST_UNIFORM_Z (mls_lane_3_f32, svfloat32_t, ++ z0 = svmls_lane_f32 (z0, z1, z2, 3), ++ z0 = svmls_lane (z0, z1, z2, 3)) ++ ++/* ++** mls_lane_z7_f32: ++** fmls z0\.s, z1\.s, z7\.s\[3\] ++** ret ++*/ ++TEST_DUAL_Z (mls_lane_z7_f32, svfloat32_t, svfloat32_t, ++ z0 = svmls_lane_f32 (z0, z1, z7, 3), ++ z0 = svmls_lane (z0, z1, z7, 3)) ++ ++/* ++** mls_lane_z8_f32: ++** str d8, \[sp, -16\]! ++** mov (z[0-7])\.d, z8\.d ++** fmls z0\.s, z1\.s, \1\.s\[3\] ++** ldr d8, \[sp\], 16 ++** ret ++*/ ++TEST_DUAL_LANE_REG (mls_lane_z8_f32, svfloat32_t, svfloat32_t, z8, ++ z0 = svmls_lane_f32 (z0, z1, z8, 3), ++ z0 = svmls_lane (z0, z1, z8, 3)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_lane_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_lane_f64.c +new file mode 100644 +index 000000000..16f20ca53 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_lane_f64.c +@@ -0,0 +1,83 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mls_lane_0_f64_tied1: ++** fmls z0\.d, z1\.d, z2\.d\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (mls_lane_0_f64_tied1, svfloat64_t, ++ z0 = svmls_lane_f64 (z0, z1, z2, 0), ++ z0 = svmls_lane (z0, z1, z2, 0)) ++ ++/* ++** mls_lane_0_f64_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fmls z0\.d, \1, z2\.d\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (mls_lane_0_f64_tied2, svfloat64_t, ++ z0 = svmls_lane_f64 (z1, z0, z2, 0), ++ z0 = svmls_lane (z1, z0, z2, 0)) ++ ++/* ++** mls_lane_0_f64_tied3: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fmls z0\.d, z2\.d, \1\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (mls_lane_0_f64_tied3, svfloat64_t, ++ z0 = svmls_lane_f64 (z1, z2, z0, 0), ++ z0 = svmls_lane (z1, z2, z0, 0)) ++ ++/* ++** mls_lane_0_f64_untied: ++** movprfx z0, z1 ++** fmls z0\.d, z2\.d, z3\.d\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (mls_lane_0_f64_untied, svfloat64_t, ++ z0 = svmls_lane_f64 (z1, z2, z3, 0), ++ z0 = svmls_lane (z1, z2, z3, 0)) ++ ++/* ++** mls_lane_1_f64: ++** fmls z0\.d, z1\.d, z2\.d\[1\] ++** ret ++*/ ++TEST_UNIFORM_Z (mls_lane_1_f64, svfloat64_t, ++ z0 = svmls_lane_f64 (z0, z1, z2, 1), ++ z0 = svmls_lane (z0, z1, z2, 1)) ++ ++/* ++** mls_lane_z7_f64: ++** fmls z0\.d, z1\.d, z7\.d\[1\] ++** ret ++*/ ++TEST_DUAL_Z (mls_lane_z7_f64, svfloat64_t, svfloat64_t, ++ z0 = svmls_lane_f64 (z0, z1, z7, 1), ++ z0 = svmls_lane (z0, z1, z7, 1)) ++ ++/* ++** mls_lane_z15_f64: ++** str d15, \[sp, -16\]! ++** fmls z0\.d, z1\.d, z15\.d\[1\] ++** ldr d15, \[sp\], 16 ++** ret ++*/ ++TEST_DUAL_LANE_REG (mls_lane_z15_f64, svfloat64_t, svfloat64_t, z15, ++ z0 = svmls_lane_f64 (z0, z1, z15, 1), ++ z0 = svmls_lane (z0, z1, z15, 1)) ++ ++/* ++** mls_lane_z16_f64: ++** mov (z[0-9]|z1[0-5])\.d, z16\.d ++** fmls z0\.d, z1\.d, \1\.d\[1\] ++** ret ++*/ ++TEST_DUAL_LANE_REG (mls_lane_z16_f64, svfloat64_t, svfloat64_t, z16, ++ z0 = svmls_lane_f64 (z0, z1, z16, 1), ++ z0 = svmls_lane (z0, z1, z16, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_s16.c +new file mode 100644 +index 000000000..e199829c4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_s16.c +@@ -0,0 +1,321 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mls_s16_m_tied1: ++** mls z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mls_s16_m_tied1, svint16_t, ++ z0 = svmls_s16_m (p0, z0, z1, z2), ++ z0 = svmls_m (p0, z0, z1, z2)) ++ ++/* ++** mls_s16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** mls z0\.h, p0/m, \1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mls_s16_m_tied2, svint16_t, ++ z0 = svmls_s16_m (p0, z1, z0, z2), ++ z0 = svmls_m (p0, z1, z0, z2)) ++ ++/* ++** mls_s16_m_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** mls z0\.h, p0/m, z2\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mls_s16_m_tied3, svint16_t, ++ z0 = svmls_s16_m (p0, z1, z2, z0), ++ z0 = svmls_m (p0, z1, z2, z0)) ++ ++/* ++** mls_s16_m_untied: ++** movprfx z0, z1 ++** mls z0\.h, p0/m, z2\.h, z3\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mls_s16_m_untied, svint16_t, ++ z0 = svmls_s16_m (p0, z1, z2, z3), ++ z0 = svmls_m (p0, z1, z2, z3)) ++ ++/* ++** mls_w0_s16_m_tied1: ++** mov (z[0-9]+\.h), w0 ++** mls z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_w0_s16_m_tied1, svint16_t, int16_t, ++ z0 = svmls_n_s16_m (p0, z0, z1, x0), ++ z0 = svmls_m (p0, z0, z1, x0)) ++ ++/* ++** mls_w0_s16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), w0 ++** movprfx z0, z1 ++** mls z0\.h, p0/m, z2\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_w0_s16_m_untied, svint16_t, int16_t, ++ z0 = svmls_n_s16_m (p0, z1, z2, x0), ++ z0 = svmls_m (p0, z1, z2, x0)) ++ ++/* ++** mls_11_s16_m_tied1: ++** mov (z[0-9]+\.h), #11 ++** mls z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_s16_m_tied1, svint16_t, ++ z0 = svmls_n_s16_m (p0, z0, z1, 11), ++ z0 = svmls_m (p0, z0, z1, 11)) ++ ++/* ++** mls_11_s16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), #11 ++** movprfx z0, z1 ++** mls z0\.h, p0/m, z2\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_s16_m_untied, svint16_t, ++ z0 = svmls_n_s16_m (p0, z1, z2, 11), ++ z0 = svmls_m (p0, z1, z2, 11)) ++ ++/* ++** mls_s16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** mls z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mls_s16_z_tied1, svint16_t, ++ z0 = svmls_s16_z (p0, z0, z1, z2), ++ z0 = svmls_z (p0, z0, z1, z2)) ++ ++/* ++** mls_s16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** msb z0\.h, p0/m, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mls_s16_z_tied2, svint16_t, ++ z0 = svmls_s16_z (p0, z1, z0, z2), ++ z0 = svmls_z (p0, z1, z0, z2)) ++ ++/* ++** mls_s16_z_tied3: ++** movprfx z0\.h, p0/z, z0\.h ++** msb z0\.h, p0/m, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mls_s16_z_tied3, svint16_t, ++ z0 = svmls_s16_z (p0, z1, z2, z0), ++ z0 = svmls_z (p0, z1, z2, z0)) ++ ++/* ++** mls_s16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** mls z0\.h, p0/m, z2\.h, z3\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** msb z0\.h, p0/m, z3\.h, z1\.h ++** | ++** movprfx z0\.h, p0/z, z3\.h ++** msb z0\.h, p0/m, z2\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mls_s16_z_untied, svint16_t, ++ z0 = svmls_s16_z (p0, z1, z2, z3), ++ z0 = svmls_z (p0, z1, z2, z3)) ++ ++/* ++** mls_w0_s16_z_tied1: ++** mov (z[0-9]+\.h), w0 ++** movprfx z0\.h, p0/z, z0\.h ++** mls z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_w0_s16_z_tied1, svint16_t, int16_t, ++ z0 = svmls_n_s16_z (p0, z0, z1, x0), ++ z0 = svmls_z (p0, z0, z1, x0)) ++ ++/* ++** mls_w0_s16_z_tied2: ++** mov (z[0-9]+\.h), w0 ++** movprfx z0\.h, p0/z, z0\.h ++** msb z0\.h, p0/m, \1, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_w0_s16_z_tied2, svint16_t, int16_t, ++ z0 = svmls_n_s16_z (p0, z1, z0, x0), ++ z0 = svmls_z (p0, z1, z0, x0)) ++ ++/* ++** mls_w0_s16_z_untied: ++** mov (z[0-9]+\.h), w0 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** mls z0\.h, p0/m, z2\.h, \1 ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** msb z0\.h, p0/m, \1, z1\.h ++** | ++** movprfx z0\.h, p0/z, \1 ++** msb z0\.h, p0/m, z2\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_w0_s16_z_untied, svint16_t, int16_t, ++ z0 = svmls_n_s16_z (p0, z1, z2, x0), ++ z0 = svmls_z (p0, z1, z2, x0)) ++ ++/* ++** mls_11_s16_z_tied1: ++** mov (z[0-9]+\.h), #11 ++** movprfx z0\.h, p0/z, z0\.h ++** mls z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_s16_z_tied1, svint16_t, ++ z0 = svmls_n_s16_z (p0, z0, z1, 11), ++ z0 = svmls_z (p0, z0, z1, 11)) ++ ++/* ++** mls_11_s16_z_tied2: ++** mov (z[0-9]+\.h), #11 ++** movprfx z0\.h, p0/z, z0\.h ++** msb z0\.h, p0/m, \1, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_s16_z_tied2, svint16_t, ++ z0 = svmls_n_s16_z (p0, z1, z0, 11), ++ z0 = svmls_z (p0, z1, z0, 11)) ++ ++/* ++** mls_11_s16_z_untied: ++** mov (z[0-9]+\.h), #11 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** mls z0\.h, p0/m, z2\.h, \1 ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** msb z0\.h, p0/m, \1, z1\.h ++** | ++** movprfx z0\.h, p0/z, \1 ++** msb z0\.h, p0/m, z2\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_s16_z_untied, svint16_t, ++ z0 = svmls_n_s16_z (p0, z1, z2, 11), ++ z0 = svmls_z (p0, z1, z2, 11)) ++ ++/* ++** mls_s16_x_tied1: ++** mls z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mls_s16_x_tied1, svint16_t, ++ z0 = svmls_s16_x (p0, z0, z1, z2), ++ z0 = svmls_x (p0, z0, z1, z2)) ++ ++/* ++** mls_s16_x_tied2: ++** msb z0\.h, p0/m, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mls_s16_x_tied2, svint16_t, ++ z0 = svmls_s16_x (p0, z1, z0, z2), ++ z0 = svmls_x (p0, z1, z0, z2)) ++ ++/* ++** mls_s16_x_tied3: ++** msb z0\.h, p0/m, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mls_s16_x_tied3, svint16_t, ++ z0 = svmls_s16_x (p0, z1, z2, z0), ++ z0 = svmls_x (p0, z1, z2, z0)) ++ ++/* ++** mls_s16_x_untied: ++** ( ++** movprfx z0, z1 ++** mls z0\.h, p0/m, z2\.h, z3\.h ++** | ++** movprfx z0, z2 ++** msb z0\.h, p0/m, z3\.h, z1\.h ++** | ++** movprfx z0, z3 ++** msb z0\.h, p0/m, z2\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mls_s16_x_untied, svint16_t, ++ z0 = svmls_s16_x (p0, z1, z2, z3), ++ z0 = svmls_x (p0, z1, z2, z3)) ++ ++/* ++** mls_w0_s16_x_tied1: ++** mov (z[0-9]+\.h), w0 ++** mls z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_w0_s16_x_tied1, svint16_t, int16_t, ++ z0 = svmls_n_s16_x (p0, z0, z1, x0), ++ z0 = svmls_x (p0, z0, z1, x0)) ++ ++/* ++** mls_w0_s16_x_tied2: ++** mov (z[0-9]+\.h), w0 ++** msb z0\.h, p0/m, \1, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_w0_s16_x_tied2, svint16_t, int16_t, ++ z0 = svmls_n_s16_x (p0, z1, z0, x0), ++ z0 = svmls_x (p0, z1, z0, x0)) ++ ++/* ++** mls_w0_s16_x_untied: ++** mov z0\.h, w0 ++** msb z0\.h, p0/m, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_w0_s16_x_untied, svint16_t, int16_t, ++ z0 = svmls_n_s16_x (p0, z1, z2, x0), ++ z0 = svmls_x (p0, z1, z2, x0)) ++ ++/* ++** mls_11_s16_x_tied1: ++** mov (z[0-9]+\.h), #11 ++** mls z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_s16_x_tied1, svint16_t, ++ z0 = svmls_n_s16_x (p0, z0, z1, 11), ++ z0 = svmls_x (p0, z0, z1, 11)) ++ ++/* ++** mls_11_s16_x_tied2: ++** mov (z[0-9]+\.h), #11 ++** msb z0\.h, p0/m, \1, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_s16_x_tied2, svint16_t, ++ z0 = svmls_n_s16_x (p0, z1, z0, 11), ++ z0 = svmls_x (p0, z1, z0, 11)) ++ ++/* ++** mls_11_s16_x_untied: ++** mov z0\.h, #11 ++** msb z0\.h, p0/m, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_s16_x_untied, svint16_t, ++ z0 = svmls_n_s16_x (p0, z1, z2, 11), ++ z0 = svmls_x (p0, z1, z2, 11)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_s32.c +new file mode 100644 +index 000000000..fe386d01c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_s32.c +@@ -0,0 +1,321 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mls_s32_m_tied1: ++** mls z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mls_s32_m_tied1, svint32_t, ++ z0 = svmls_s32_m (p0, z0, z1, z2), ++ z0 = svmls_m (p0, z0, z1, z2)) ++ ++/* ++** mls_s32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** mls z0\.s, p0/m, \1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mls_s32_m_tied2, svint32_t, ++ z0 = svmls_s32_m (p0, z1, z0, z2), ++ z0 = svmls_m (p0, z1, z0, z2)) ++ ++/* ++** mls_s32_m_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** mls z0\.s, p0/m, z2\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mls_s32_m_tied3, svint32_t, ++ z0 = svmls_s32_m (p0, z1, z2, z0), ++ z0 = svmls_m (p0, z1, z2, z0)) ++ ++/* ++** mls_s32_m_untied: ++** movprfx z0, z1 ++** mls z0\.s, p0/m, z2\.s, z3\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mls_s32_m_untied, svint32_t, ++ z0 = svmls_s32_m (p0, z1, z2, z3), ++ z0 = svmls_m (p0, z1, z2, z3)) ++ ++/* ++** mls_w0_s32_m_tied1: ++** mov (z[0-9]+\.s), w0 ++** mls z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_w0_s32_m_tied1, svint32_t, int32_t, ++ z0 = svmls_n_s32_m (p0, z0, z1, x0), ++ z0 = svmls_m (p0, z0, z1, x0)) ++ ++/* ++** mls_w0_s32_m_untied: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0, z1 ++** mls z0\.s, p0/m, z2\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_w0_s32_m_untied, svint32_t, int32_t, ++ z0 = svmls_n_s32_m (p0, z1, z2, x0), ++ z0 = svmls_m (p0, z1, z2, x0)) ++ ++/* ++** mls_11_s32_m_tied1: ++** mov (z[0-9]+\.s), #11 ++** mls z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_s32_m_tied1, svint32_t, ++ z0 = svmls_n_s32_m (p0, z0, z1, 11), ++ z0 = svmls_m (p0, z0, z1, 11)) ++ ++/* ++** mls_11_s32_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.s), #11 ++** movprfx z0, z1 ++** mls z0\.s, p0/m, z2\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_s32_m_untied, svint32_t, ++ z0 = svmls_n_s32_m (p0, z1, z2, 11), ++ z0 = svmls_m (p0, z1, z2, 11)) ++ ++/* ++** mls_s32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** mls z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mls_s32_z_tied1, svint32_t, ++ z0 = svmls_s32_z (p0, z0, z1, z2), ++ z0 = svmls_z (p0, z0, z1, z2)) ++ ++/* ++** mls_s32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** msb z0\.s, p0/m, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mls_s32_z_tied2, svint32_t, ++ z0 = svmls_s32_z (p0, z1, z0, z2), ++ z0 = svmls_z (p0, z1, z0, z2)) ++ ++/* ++** mls_s32_z_tied3: ++** movprfx z0\.s, p0/z, z0\.s ++** msb z0\.s, p0/m, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mls_s32_z_tied3, svint32_t, ++ z0 = svmls_s32_z (p0, z1, z2, z0), ++ z0 = svmls_z (p0, z1, z2, z0)) ++ ++/* ++** mls_s32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** mls z0\.s, p0/m, z2\.s, z3\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** msb z0\.s, p0/m, z3\.s, z1\.s ++** | ++** movprfx z0\.s, p0/z, z3\.s ++** msb z0\.s, p0/m, z2\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mls_s32_z_untied, svint32_t, ++ z0 = svmls_s32_z (p0, z1, z2, z3), ++ z0 = svmls_z (p0, z1, z2, z3)) ++ ++/* ++** mls_w0_s32_z_tied1: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0\.s, p0/z, z0\.s ++** mls z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_w0_s32_z_tied1, svint32_t, int32_t, ++ z0 = svmls_n_s32_z (p0, z0, z1, x0), ++ z0 = svmls_z (p0, z0, z1, x0)) ++ ++/* ++** mls_w0_s32_z_tied2: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0\.s, p0/z, z0\.s ++** msb z0\.s, p0/m, \1, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_w0_s32_z_tied2, svint32_t, int32_t, ++ z0 = svmls_n_s32_z (p0, z1, z0, x0), ++ z0 = svmls_z (p0, z1, z0, x0)) ++ ++/* ++** mls_w0_s32_z_untied: ++** mov (z[0-9]+\.s), w0 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** mls z0\.s, p0/m, z2\.s, \1 ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** msb z0\.s, p0/m, \1, z1\.s ++** | ++** movprfx z0\.s, p0/z, \1 ++** msb z0\.s, p0/m, z2\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_w0_s32_z_untied, svint32_t, int32_t, ++ z0 = svmls_n_s32_z (p0, z1, z2, x0), ++ z0 = svmls_z (p0, z1, z2, x0)) ++ ++/* ++** mls_11_s32_z_tied1: ++** mov (z[0-9]+\.s), #11 ++** movprfx z0\.s, p0/z, z0\.s ++** mls z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_s32_z_tied1, svint32_t, ++ z0 = svmls_n_s32_z (p0, z0, z1, 11), ++ z0 = svmls_z (p0, z0, z1, 11)) ++ ++/* ++** mls_11_s32_z_tied2: ++** mov (z[0-9]+\.s), #11 ++** movprfx z0\.s, p0/z, z0\.s ++** msb z0\.s, p0/m, \1, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_s32_z_tied2, svint32_t, ++ z0 = svmls_n_s32_z (p0, z1, z0, 11), ++ z0 = svmls_z (p0, z1, z0, 11)) ++ ++/* ++** mls_11_s32_z_untied: ++** mov (z[0-9]+\.s), #11 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** mls z0\.s, p0/m, z2\.s, \1 ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** msb z0\.s, p0/m, \1, z1\.s ++** | ++** movprfx z0\.s, p0/z, \1 ++** msb z0\.s, p0/m, z2\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_s32_z_untied, svint32_t, ++ z0 = svmls_n_s32_z (p0, z1, z2, 11), ++ z0 = svmls_z (p0, z1, z2, 11)) ++ ++/* ++** mls_s32_x_tied1: ++** mls z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mls_s32_x_tied1, svint32_t, ++ z0 = svmls_s32_x (p0, z0, z1, z2), ++ z0 = svmls_x (p0, z0, z1, z2)) ++ ++/* ++** mls_s32_x_tied2: ++** msb z0\.s, p0/m, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mls_s32_x_tied2, svint32_t, ++ z0 = svmls_s32_x (p0, z1, z0, z2), ++ z0 = svmls_x (p0, z1, z0, z2)) ++ ++/* ++** mls_s32_x_tied3: ++** msb z0\.s, p0/m, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mls_s32_x_tied3, svint32_t, ++ z0 = svmls_s32_x (p0, z1, z2, z0), ++ z0 = svmls_x (p0, z1, z2, z0)) ++ ++/* ++** mls_s32_x_untied: ++** ( ++** movprfx z0, z1 ++** mls z0\.s, p0/m, z2\.s, z3\.s ++** | ++** movprfx z0, z2 ++** msb z0\.s, p0/m, z3\.s, z1\.s ++** | ++** movprfx z0, z3 ++** msb z0\.s, p0/m, z2\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mls_s32_x_untied, svint32_t, ++ z0 = svmls_s32_x (p0, z1, z2, z3), ++ z0 = svmls_x (p0, z1, z2, z3)) ++ ++/* ++** mls_w0_s32_x_tied1: ++** mov (z[0-9]+\.s), w0 ++** mls z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_w0_s32_x_tied1, svint32_t, int32_t, ++ z0 = svmls_n_s32_x (p0, z0, z1, x0), ++ z0 = svmls_x (p0, z0, z1, x0)) ++ ++/* ++** mls_w0_s32_x_tied2: ++** mov (z[0-9]+\.s), w0 ++** msb z0\.s, p0/m, \1, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_w0_s32_x_tied2, svint32_t, int32_t, ++ z0 = svmls_n_s32_x (p0, z1, z0, x0), ++ z0 = svmls_x (p0, z1, z0, x0)) ++ ++/* ++** mls_w0_s32_x_untied: ++** mov z0\.s, w0 ++** msb z0\.s, p0/m, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_w0_s32_x_untied, svint32_t, int32_t, ++ z0 = svmls_n_s32_x (p0, z1, z2, x0), ++ z0 = svmls_x (p0, z1, z2, x0)) ++ ++/* ++** mls_11_s32_x_tied1: ++** mov (z[0-9]+\.s), #11 ++** mls z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_s32_x_tied1, svint32_t, ++ z0 = svmls_n_s32_x (p0, z0, z1, 11), ++ z0 = svmls_x (p0, z0, z1, 11)) ++ ++/* ++** mls_11_s32_x_tied2: ++** mov (z[0-9]+\.s), #11 ++** msb z0\.s, p0/m, \1, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_s32_x_tied2, svint32_t, ++ z0 = svmls_n_s32_x (p0, z1, z0, 11), ++ z0 = svmls_x (p0, z1, z0, 11)) ++ ++/* ++** mls_11_s32_x_untied: ++** mov z0\.s, #11 ++** msb z0\.s, p0/m, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_s32_x_untied, svint32_t, ++ z0 = svmls_n_s32_x (p0, z1, z2, 11), ++ z0 = svmls_x (p0, z1, z2, 11)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_s64.c +new file mode 100644 +index 000000000..2998d733f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_s64.c +@@ -0,0 +1,321 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mls_s64_m_tied1: ++** mls z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mls_s64_m_tied1, svint64_t, ++ z0 = svmls_s64_m (p0, z0, z1, z2), ++ z0 = svmls_m (p0, z0, z1, z2)) ++ ++/* ++** mls_s64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** mls z0\.d, p0/m, \1, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mls_s64_m_tied2, svint64_t, ++ z0 = svmls_s64_m (p0, z1, z0, z2), ++ z0 = svmls_m (p0, z1, z0, z2)) ++ ++/* ++** mls_s64_m_tied3: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** mls z0\.d, p0/m, z2\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mls_s64_m_tied3, svint64_t, ++ z0 = svmls_s64_m (p0, z1, z2, z0), ++ z0 = svmls_m (p0, z1, z2, z0)) ++ ++/* ++** mls_s64_m_untied: ++** movprfx z0, z1 ++** mls z0\.d, p0/m, z2\.d, z3\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mls_s64_m_untied, svint64_t, ++ z0 = svmls_s64_m (p0, z1, z2, z3), ++ z0 = svmls_m (p0, z1, z2, z3)) ++ ++/* ++** mls_x0_s64_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** mls z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_x0_s64_m_tied1, svint64_t, int64_t, ++ z0 = svmls_n_s64_m (p0, z0, z1, x0), ++ z0 = svmls_m (p0, z0, z1, x0)) ++ ++/* ++** mls_x0_s64_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** mls z0\.d, p0/m, z2\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_x0_s64_m_untied, svint64_t, int64_t, ++ z0 = svmls_n_s64_m (p0, z1, z2, x0), ++ z0 = svmls_m (p0, z1, z2, x0)) ++ ++/* ++** mls_11_s64_m_tied1: ++** mov (z[0-9]+\.d), #11 ++** mls z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_s64_m_tied1, svint64_t, ++ z0 = svmls_n_s64_m (p0, z0, z1, 11), ++ z0 = svmls_m (p0, z0, z1, 11)) ++ ++/* ++** mls_11_s64_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), #11 ++** movprfx z0, z1 ++** mls z0\.d, p0/m, z2\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_s64_m_untied, svint64_t, ++ z0 = svmls_n_s64_m (p0, z1, z2, 11), ++ z0 = svmls_m (p0, z1, z2, 11)) ++ ++/* ++** mls_s64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** mls z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mls_s64_z_tied1, svint64_t, ++ z0 = svmls_s64_z (p0, z0, z1, z2), ++ z0 = svmls_z (p0, z0, z1, z2)) ++ ++/* ++** mls_s64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** msb z0\.d, p0/m, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mls_s64_z_tied2, svint64_t, ++ z0 = svmls_s64_z (p0, z1, z0, z2), ++ z0 = svmls_z (p0, z1, z0, z2)) ++ ++/* ++** mls_s64_z_tied3: ++** movprfx z0\.d, p0/z, z0\.d ++** msb z0\.d, p0/m, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mls_s64_z_tied3, svint64_t, ++ z0 = svmls_s64_z (p0, z1, z2, z0), ++ z0 = svmls_z (p0, z1, z2, z0)) ++ ++/* ++** mls_s64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** mls z0\.d, p0/m, z2\.d, z3\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** msb z0\.d, p0/m, z3\.d, z1\.d ++** | ++** movprfx z0\.d, p0/z, z3\.d ++** msb z0\.d, p0/m, z2\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mls_s64_z_untied, svint64_t, ++ z0 = svmls_s64_z (p0, z1, z2, z3), ++ z0 = svmls_z (p0, z1, z2, z3)) ++ ++/* ++** mls_x0_s64_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.d, p0/z, z0\.d ++** mls z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_x0_s64_z_tied1, svint64_t, int64_t, ++ z0 = svmls_n_s64_z (p0, z0, z1, x0), ++ z0 = svmls_z (p0, z0, z1, x0)) ++ ++/* ++** mls_x0_s64_z_tied2: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.d, p0/z, z0\.d ++** msb z0\.d, p0/m, \1, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_x0_s64_z_tied2, svint64_t, int64_t, ++ z0 = svmls_n_s64_z (p0, z1, z0, x0), ++ z0 = svmls_z (p0, z1, z0, x0)) ++ ++/* ++** mls_x0_s64_z_untied: ++** mov (z[0-9]+\.d), x0 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** mls z0\.d, p0/m, z2\.d, \1 ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** msb z0\.d, p0/m, \1, z1\.d ++** | ++** movprfx z0\.d, p0/z, \1 ++** msb z0\.d, p0/m, z2\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_x0_s64_z_untied, svint64_t, int64_t, ++ z0 = svmls_n_s64_z (p0, z1, z2, x0), ++ z0 = svmls_z (p0, z1, z2, x0)) ++ ++/* ++** mls_11_s64_z_tied1: ++** mov (z[0-9]+\.d), #11 ++** movprfx z0\.d, p0/z, z0\.d ++** mls z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_s64_z_tied1, svint64_t, ++ z0 = svmls_n_s64_z (p0, z0, z1, 11), ++ z0 = svmls_z (p0, z0, z1, 11)) ++ ++/* ++** mls_11_s64_z_tied2: ++** mov (z[0-9]+\.d), #11 ++** movprfx z0\.d, p0/z, z0\.d ++** msb z0\.d, p0/m, \1, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_s64_z_tied2, svint64_t, ++ z0 = svmls_n_s64_z (p0, z1, z0, 11), ++ z0 = svmls_z (p0, z1, z0, 11)) ++ ++/* ++** mls_11_s64_z_untied: ++** mov (z[0-9]+\.d), #11 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** mls z0\.d, p0/m, z2\.d, \1 ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** msb z0\.d, p0/m, \1, z1\.d ++** | ++** movprfx z0\.d, p0/z, \1 ++** msb z0\.d, p0/m, z2\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_s64_z_untied, svint64_t, ++ z0 = svmls_n_s64_z (p0, z1, z2, 11), ++ z0 = svmls_z (p0, z1, z2, 11)) ++ ++/* ++** mls_s64_x_tied1: ++** mls z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mls_s64_x_tied1, svint64_t, ++ z0 = svmls_s64_x (p0, z0, z1, z2), ++ z0 = svmls_x (p0, z0, z1, z2)) ++ ++/* ++** mls_s64_x_tied2: ++** msb z0\.d, p0/m, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mls_s64_x_tied2, svint64_t, ++ z0 = svmls_s64_x (p0, z1, z0, z2), ++ z0 = svmls_x (p0, z1, z0, z2)) ++ ++/* ++** mls_s64_x_tied3: ++** msb z0\.d, p0/m, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mls_s64_x_tied3, svint64_t, ++ z0 = svmls_s64_x (p0, z1, z2, z0), ++ z0 = svmls_x (p0, z1, z2, z0)) ++ ++/* ++** mls_s64_x_untied: ++** ( ++** movprfx z0, z1 ++** mls z0\.d, p0/m, z2\.d, z3\.d ++** | ++** movprfx z0, z2 ++** msb z0\.d, p0/m, z3\.d, z1\.d ++** | ++** movprfx z0, z3 ++** msb z0\.d, p0/m, z2\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mls_s64_x_untied, svint64_t, ++ z0 = svmls_s64_x (p0, z1, z2, z3), ++ z0 = svmls_x (p0, z1, z2, z3)) ++ ++/* ++** mls_x0_s64_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** mls z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_x0_s64_x_tied1, svint64_t, int64_t, ++ z0 = svmls_n_s64_x (p0, z0, z1, x0), ++ z0 = svmls_x (p0, z0, z1, x0)) ++ ++/* ++** mls_x0_s64_x_tied2: ++** mov (z[0-9]+\.d), x0 ++** msb z0\.d, p0/m, \1, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_x0_s64_x_tied2, svint64_t, int64_t, ++ z0 = svmls_n_s64_x (p0, z1, z0, x0), ++ z0 = svmls_x (p0, z1, z0, x0)) ++ ++/* ++** mls_x0_s64_x_untied: ++** mov z0\.d, x0 ++** msb z0\.d, p0/m, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_x0_s64_x_untied, svint64_t, int64_t, ++ z0 = svmls_n_s64_x (p0, z1, z2, x0), ++ z0 = svmls_x (p0, z1, z2, x0)) ++ ++/* ++** mls_11_s64_x_tied1: ++** mov (z[0-9]+\.d), #11 ++** mls z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_s64_x_tied1, svint64_t, ++ z0 = svmls_n_s64_x (p0, z0, z1, 11), ++ z0 = svmls_x (p0, z0, z1, 11)) ++ ++/* ++** mls_11_s64_x_tied2: ++** mov (z[0-9]+\.d), #11 ++** msb z0\.d, p0/m, \1, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_s64_x_tied2, svint64_t, ++ z0 = svmls_n_s64_x (p0, z1, z0, 11), ++ z0 = svmls_x (p0, z1, z0, 11)) ++ ++/* ++** mls_11_s64_x_untied: ++** mov z0\.d, #11 ++** msb z0\.d, p0/m, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_s64_x_untied, svint64_t, ++ z0 = svmls_n_s64_x (p0, z1, z2, 11), ++ z0 = svmls_x (p0, z1, z2, 11)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_s8.c +new file mode 100644 +index 000000000..c60c43145 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_s8.c +@@ -0,0 +1,321 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mls_s8_m_tied1: ++** mls z0\.b, p0/m, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mls_s8_m_tied1, svint8_t, ++ z0 = svmls_s8_m (p0, z0, z1, z2), ++ z0 = svmls_m (p0, z0, z1, z2)) ++ ++/* ++** mls_s8_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** mls z0\.b, p0/m, \1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mls_s8_m_tied2, svint8_t, ++ z0 = svmls_s8_m (p0, z1, z0, z2), ++ z0 = svmls_m (p0, z1, z0, z2)) ++ ++/* ++** mls_s8_m_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** mls z0\.b, p0/m, z2\.b, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mls_s8_m_tied3, svint8_t, ++ z0 = svmls_s8_m (p0, z1, z2, z0), ++ z0 = svmls_m (p0, z1, z2, z0)) ++ ++/* ++** mls_s8_m_untied: ++** movprfx z0, z1 ++** mls z0\.b, p0/m, z2\.b, z3\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mls_s8_m_untied, svint8_t, ++ z0 = svmls_s8_m (p0, z1, z2, z3), ++ z0 = svmls_m (p0, z1, z2, z3)) ++ ++/* ++** mls_w0_s8_m_tied1: ++** mov (z[0-9]+\.b), w0 ++** mls z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_w0_s8_m_tied1, svint8_t, int8_t, ++ z0 = svmls_n_s8_m (p0, z0, z1, x0), ++ z0 = svmls_m (p0, z0, z1, x0)) ++ ++/* ++** mls_w0_s8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), w0 ++** movprfx z0, z1 ++** mls z0\.b, p0/m, z2\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_w0_s8_m_untied, svint8_t, int8_t, ++ z0 = svmls_n_s8_m (p0, z1, z2, x0), ++ z0 = svmls_m (p0, z1, z2, x0)) ++ ++/* ++** mls_11_s8_m_tied1: ++** mov (z[0-9]+\.b), #11 ++** mls z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_s8_m_tied1, svint8_t, ++ z0 = svmls_n_s8_m (p0, z0, z1, 11), ++ z0 = svmls_m (p0, z0, z1, 11)) ++ ++/* ++** mls_11_s8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), #11 ++** movprfx z0, z1 ++** mls z0\.b, p0/m, z2\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_s8_m_untied, svint8_t, ++ z0 = svmls_n_s8_m (p0, z1, z2, 11), ++ z0 = svmls_m (p0, z1, z2, 11)) ++ ++/* ++** mls_s8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** mls z0\.b, p0/m, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mls_s8_z_tied1, svint8_t, ++ z0 = svmls_s8_z (p0, z0, z1, z2), ++ z0 = svmls_z (p0, z0, z1, z2)) ++ ++/* ++** mls_s8_z_tied2: ++** movprfx z0\.b, p0/z, z0\.b ++** msb z0\.b, p0/m, z2\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mls_s8_z_tied2, svint8_t, ++ z0 = svmls_s8_z (p0, z1, z0, z2), ++ z0 = svmls_z (p0, z1, z0, z2)) ++ ++/* ++** mls_s8_z_tied3: ++** movprfx z0\.b, p0/z, z0\.b ++** msb z0\.b, p0/m, z2\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mls_s8_z_tied3, svint8_t, ++ z0 = svmls_s8_z (p0, z1, z2, z0), ++ z0 = svmls_z (p0, z1, z2, z0)) ++ ++/* ++** mls_s8_z_untied: ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** mls z0\.b, p0/m, z2\.b, z3\.b ++** | ++** movprfx z0\.b, p0/z, z2\.b ++** msb z0\.b, p0/m, z3\.b, z1\.b ++** | ++** movprfx z0\.b, p0/z, z3\.b ++** msb z0\.b, p0/m, z2\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mls_s8_z_untied, svint8_t, ++ z0 = svmls_s8_z (p0, z1, z2, z3), ++ z0 = svmls_z (p0, z1, z2, z3)) ++ ++/* ++** mls_w0_s8_z_tied1: ++** mov (z[0-9]+\.b), w0 ++** movprfx z0\.b, p0/z, z0\.b ++** mls z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_w0_s8_z_tied1, svint8_t, int8_t, ++ z0 = svmls_n_s8_z (p0, z0, z1, x0), ++ z0 = svmls_z (p0, z0, z1, x0)) ++ ++/* ++** mls_w0_s8_z_tied2: ++** mov (z[0-9]+\.b), w0 ++** movprfx z0\.b, p0/z, z0\.b ++** msb z0\.b, p0/m, \1, z1\.b ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_w0_s8_z_tied2, svint8_t, int8_t, ++ z0 = svmls_n_s8_z (p0, z1, z0, x0), ++ z0 = svmls_z (p0, z1, z0, x0)) ++ ++/* ++** mls_w0_s8_z_untied: ++** mov (z[0-9]+\.b), w0 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** mls z0\.b, p0/m, z2\.b, \1 ++** | ++** movprfx z0\.b, p0/z, z2\.b ++** msb z0\.b, p0/m, \1, z1\.b ++** | ++** movprfx z0\.b, p0/z, \1 ++** msb z0\.b, p0/m, z2\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_w0_s8_z_untied, svint8_t, int8_t, ++ z0 = svmls_n_s8_z (p0, z1, z2, x0), ++ z0 = svmls_z (p0, z1, z2, x0)) ++ ++/* ++** mls_11_s8_z_tied1: ++** mov (z[0-9]+\.b), #11 ++** movprfx z0\.b, p0/z, z0\.b ++** mls z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_s8_z_tied1, svint8_t, ++ z0 = svmls_n_s8_z (p0, z0, z1, 11), ++ z0 = svmls_z (p0, z0, z1, 11)) ++ ++/* ++** mls_11_s8_z_tied2: ++** mov (z[0-9]+\.b), #11 ++** movprfx z0\.b, p0/z, z0\.b ++** msb z0\.b, p0/m, \1, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_s8_z_tied2, svint8_t, ++ z0 = svmls_n_s8_z (p0, z1, z0, 11), ++ z0 = svmls_z (p0, z1, z0, 11)) ++ ++/* ++** mls_11_s8_z_untied: ++** mov (z[0-9]+\.b), #11 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** mls z0\.b, p0/m, z2\.b, \1 ++** | ++** movprfx z0\.b, p0/z, z2\.b ++** msb z0\.b, p0/m, \1, z1\.b ++** | ++** movprfx z0\.b, p0/z, \1 ++** msb z0\.b, p0/m, z2\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_s8_z_untied, svint8_t, ++ z0 = svmls_n_s8_z (p0, z1, z2, 11), ++ z0 = svmls_z (p0, z1, z2, 11)) ++ ++/* ++** mls_s8_x_tied1: ++** mls z0\.b, p0/m, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mls_s8_x_tied1, svint8_t, ++ z0 = svmls_s8_x (p0, z0, z1, z2), ++ z0 = svmls_x (p0, z0, z1, z2)) ++ ++/* ++** mls_s8_x_tied2: ++** msb z0\.b, p0/m, z2\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mls_s8_x_tied2, svint8_t, ++ z0 = svmls_s8_x (p0, z1, z0, z2), ++ z0 = svmls_x (p0, z1, z0, z2)) ++ ++/* ++** mls_s8_x_tied3: ++** msb z0\.b, p0/m, z2\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mls_s8_x_tied3, svint8_t, ++ z0 = svmls_s8_x (p0, z1, z2, z0), ++ z0 = svmls_x (p0, z1, z2, z0)) ++ ++/* ++** mls_s8_x_untied: ++** ( ++** movprfx z0, z1 ++** mls z0\.b, p0/m, z2\.b, z3\.b ++** | ++** movprfx z0, z2 ++** msb z0\.b, p0/m, z3\.b, z1\.b ++** | ++** movprfx z0, z3 ++** msb z0\.b, p0/m, z2\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mls_s8_x_untied, svint8_t, ++ z0 = svmls_s8_x (p0, z1, z2, z3), ++ z0 = svmls_x (p0, z1, z2, z3)) ++ ++/* ++** mls_w0_s8_x_tied1: ++** mov (z[0-9]+\.b), w0 ++** mls z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_w0_s8_x_tied1, svint8_t, int8_t, ++ z0 = svmls_n_s8_x (p0, z0, z1, x0), ++ z0 = svmls_x (p0, z0, z1, x0)) ++ ++/* ++** mls_w0_s8_x_tied2: ++** mov (z[0-9]+\.b), w0 ++** msb z0\.b, p0/m, \1, z1\.b ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_w0_s8_x_tied2, svint8_t, int8_t, ++ z0 = svmls_n_s8_x (p0, z1, z0, x0), ++ z0 = svmls_x (p0, z1, z0, x0)) ++ ++/* ++** mls_w0_s8_x_untied: ++** mov z0\.b, w0 ++** msb z0\.b, p0/m, z2\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_w0_s8_x_untied, svint8_t, int8_t, ++ z0 = svmls_n_s8_x (p0, z1, z2, x0), ++ z0 = svmls_x (p0, z1, z2, x0)) ++ ++/* ++** mls_11_s8_x_tied1: ++** mov (z[0-9]+\.b), #11 ++** mls z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_s8_x_tied1, svint8_t, ++ z0 = svmls_n_s8_x (p0, z0, z1, 11), ++ z0 = svmls_x (p0, z0, z1, 11)) ++ ++/* ++** mls_11_s8_x_tied2: ++** mov (z[0-9]+\.b), #11 ++** msb z0\.b, p0/m, \1, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_s8_x_tied2, svint8_t, ++ z0 = svmls_n_s8_x (p0, z1, z0, 11), ++ z0 = svmls_x (p0, z1, z0, 11)) ++ ++/* ++** mls_11_s8_x_untied: ++** mov z0\.b, #11 ++** msb z0\.b, p0/m, z2\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_s8_x_untied, svint8_t, ++ z0 = svmls_n_s8_x (p0, z1, z2, 11), ++ z0 = svmls_x (p0, z1, z2, 11)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_u16.c +new file mode 100644 +index 000000000..e8a9f5cd9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_u16.c +@@ -0,0 +1,321 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mls_u16_m_tied1: ++** mls z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mls_u16_m_tied1, svuint16_t, ++ z0 = svmls_u16_m (p0, z0, z1, z2), ++ z0 = svmls_m (p0, z0, z1, z2)) ++ ++/* ++** mls_u16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** mls z0\.h, p0/m, \1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mls_u16_m_tied2, svuint16_t, ++ z0 = svmls_u16_m (p0, z1, z0, z2), ++ z0 = svmls_m (p0, z1, z0, z2)) ++ ++/* ++** mls_u16_m_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** mls z0\.h, p0/m, z2\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mls_u16_m_tied3, svuint16_t, ++ z0 = svmls_u16_m (p0, z1, z2, z0), ++ z0 = svmls_m (p0, z1, z2, z0)) ++ ++/* ++** mls_u16_m_untied: ++** movprfx z0, z1 ++** mls z0\.h, p0/m, z2\.h, z3\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mls_u16_m_untied, svuint16_t, ++ z0 = svmls_u16_m (p0, z1, z2, z3), ++ z0 = svmls_m (p0, z1, z2, z3)) ++ ++/* ++** mls_w0_u16_m_tied1: ++** mov (z[0-9]+\.h), w0 ++** mls z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_w0_u16_m_tied1, svuint16_t, uint16_t, ++ z0 = svmls_n_u16_m (p0, z0, z1, x0), ++ z0 = svmls_m (p0, z0, z1, x0)) ++ ++/* ++** mls_w0_u16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), w0 ++** movprfx z0, z1 ++** mls z0\.h, p0/m, z2\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_w0_u16_m_untied, svuint16_t, uint16_t, ++ z0 = svmls_n_u16_m (p0, z1, z2, x0), ++ z0 = svmls_m (p0, z1, z2, x0)) ++ ++/* ++** mls_11_u16_m_tied1: ++** mov (z[0-9]+\.h), #11 ++** mls z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_u16_m_tied1, svuint16_t, ++ z0 = svmls_n_u16_m (p0, z0, z1, 11), ++ z0 = svmls_m (p0, z0, z1, 11)) ++ ++/* ++** mls_11_u16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), #11 ++** movprfx z0, z1 ++** mls z0\.h, p0/m, z2\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_u16_m_untied, svuint16_t, ++ z0 = svmls_n_u16_m (p0, z1, z2, 11), ++ z0 = svmls_m (p0, z1, z2, 11)) ++ ++/* ++** mls_u16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** mls z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mls_u16_z_tied1, svuint16_t, ++ z0 = svmls_u16_z (p0, z0, z1, z2), ++ z0 = svmls_z (p0, z0, z1, z2)) ++ ++/* ++** mls_u16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** msb z0\.h, p0/m, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mls_u16_z_tied2, svuint16_t, ++ z0 = svmls_u16_z (p0, z1, z0, z2), ++ z0 = svmls_z (p0, z1, z0, z2)) ++ ++/* ++** mls_u16_z_tied3: ++** movprfx z0\.h, p0/z, z0\.h ++** msb z0\.h, p0/m, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mls_u16_z_tied3, svuint16_t, ++ z0 = svmls_u16_z (p0, z1, z2, z0), ++ z0 = svmls_z (p0, z1, z2, z0)) ++ ++/* ++** mls_u16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** mls z0\.h, p0/m, z2\.h, z3\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** msb z0\.h, p0/m, z3\.h, z1\.h ++** | ++** movprfx z0\.h, p0/z, z3\.h ++** msb z0\.h, p0/m, z2\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mls_u16_z_untied, svuint16_t, ++ z0 = svmls_u16_z (p0, z1, z2, z3), ++ z0 = svmls_z (p0, z1, z2, z3)) ++ ++/* ++** mls_w0_u16_z_tied1: ++** mov (z[0-9]+\.h), w0 ++** movprfx z0\.h, p0/z, z0\.h ++** mls z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_w0_u16_z_tied1, svuint16_t, uint16_t, ++ z0 = svmls_n_u16_z (p0, z0, z1, x0), ++ z0 = svmls_z (p0, z0, z1, x0)) ++ ++/* ++** mls_w0_u16_z_tied2: ++** mov (z[0-9]+\.h), w0 ++** movprfx z0\.h, p0/z, z0\.h ++** msb z0\.h, p0/m, \1, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_w0_u16_z_tied2, svuint16_t, uint16_t, ++ z0 = svmls_n_u16_z (p0, z1, z0, x0), ++ z0 = svmls_z (p0, z1, z0, x0)) ++ ++/* ++** mls_w0_u16_z_untied: ++** mov (z[0-9]+\.h), w0 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** mls z0\.h, p0/m, z2\.h, \1 ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** msb z0\.h, p0/m, \1, z1\.h ++** | ++** movprfx z0\.h, p0/z, \1 ++** msb z0\.h, p0/m, z2\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_w0_u16_z_untied, svuint16_t, uint16_t, ++ z0 = svmls_n_u16_z (p0, z1, z2, x0), ++ z0 = svmls_z (p0, z1, z2, x0)) ++ ++/* ++** mls_11_u16_z_tied1: ++** mov (z[0-9]+\.h), #11 ++** movprfx z0\.h, p0/z, z0\.h ++** mls z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_u16_z_tied1, svuint16_t, ++ z0 = svmls_n_u16_z (p0, z0, z1, 11), ++ z0 = svmls_z (p0, z0, z1, 11)) ++ ++/* ++** mls_11_u16_z_tied2: ++** mov (z[0-9]+\.h), #11 ++** movprfx z0\.h, p0/z, z0\.h ++** msb z0\.h, p0/m, \1, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_u16_z_tied2, svuint16_t, ++ z0 = svmls_n_u16_z (p0, z1, z0, 11), ++ z0 = svmls_z (p0, z1, z0, 11)) ++ ++/* ++** mls_11_u16_z_untied: ++** mov (z[0-9]+\.h), #11 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** mls z0\.h, p0/m, z2\.h, \1 ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** msb z0\.h, p0/m, \1, z1\.h ++** | ++** movprfx z0\.h, p0/z, \1 ++** msb z0\.h, p0/m, z2\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_u16_z_untied, svuint16_t, ++ z0 = svmls_n_u16_z (p0, z1, z2, 11), ++ z0 = svmls_z (p0, z1, z2, 11)) ++ ++/* ++** mls_u16_x_tied1: ++** mls z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mls_u16_x_tied1, svuint16_t, ++ z0 = svmls_u16_x (p0, z0, z1, z2), ++ z0 = svmls_x (p0, z0, z1, z2)) ++ ++/* ++** mls_u16_x_tied2: ++** msb z0\.h, p0/m, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mls_u16_x_tied2, svuint16_t, ++ z0 = svmls_u16_x (p0, z1, z0, z2), ++ z0 = svmls_x (p0, z1, z0, z2)) ++ ++/* ++** mls_u16_x_tied3: ++** msb z0\.h, p0/m, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mls_u16_x_tied3, svuint16_t, ++ z0 = svmls_u16_x (p0, z1, z2, z0), ++ z0 = svmls_x (p0, z1, z2, z0)) ++ ++/* ++** mls_u16_x_untied: ++** ( ++** movprfx z0, z1 ++** mls z0\.h, p0/m, z2\.h, z3\.h ++** | ++** movprfx z0, z2 ++** msb z0\.h, p0/m, z3\.h, z1\.h ++** | ++** movprfx z0, z3 ++** msb z0\.h, p0/m, z2\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mls_u16_x_untied, svuint16_t, ++ z0 = svmls_u16_x (p0, z1, z2, z3), ++ z0 = svmls_x (p0, z1, z2, z3)) ++ ++/* ++** mls_w0_u16_x_tied1: ++** mov (z[0-9]+\.h), w0 ++** mls z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_w0_u16_x_tied1, svuint16_t, uint16_t, ++ z0 = svmls_n_u16_x (p0, z0, z1, x0), ++ z0 = svmls_x (p0, z0, z1, x0)) ++ ++/* ++** mls_w0_u16_x_tied2: ++** mov (z[0-9]+\.h), w0 ++** msb z0\.h, p0/m, \1, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_w0_u16_x_tied2, svuint16_t, uint16_t, ++ z0 = svmls_n_u16_x (p0, z1, z0, x0), ++ z0 = svmls_x (p0, z1, z0, x0)) ++ ++/* ++** mls_w0_u16_x_untied: ++** mov z0\.h, w0 ++** msb z0\.h, p0/m, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_w0_u16_x_untied, svuint16_t, uint16_t, ++ z0 = svmls_n_u16_x (p0, z1, z2, x0), ++ z0 = svmls_x (p0, z1, z2, x0)) ++ ++/* ++** mls_11_u16_x_tied1: ++** mov (z[0-9]+\.h), #11 ++** mls z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_u16_x_tied1, svuint16_t, ++ z0 = svmls_n_u16_x (p0, z0, z1, 11), ++ z0 = svmls_x (p0, z0, z1, 11)) ++ ++/* ++** mls_11_u16_x_tied2: ++** mov (z[0-9]+\.h), #11 ++** msb z0\.h, p0/m, \1, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_u16_x_tied2, svuint16_t, ++ z0 = svmls_n_u16_x (p0, z1, z0, 11), ++ z0 = svmls_x (p0, z1, z0, 11)) ++ ++/* ++** mls_11_u16_x_untied: ++** mov z0\.h, #11 ++** msb z0\.h, p0/m, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_u16_x_untied, svuint16_t, ++ z0 = svmls_n_u16_x (p0, z1, z2, 11), ++ z0 = svmls_x (p0, z1, z2, 11)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_u32.c +new file mode 100644 +index 000000000..47e885012 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_u32.c +@@ -0,0 +1,321 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mls_u32_m_tied1: ++** mls z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mls_u32_m_tied1, svuint32_t, ++ z0 = svmls_u32_m (p0, z0, z1, z2), ++ z0 = svmls_m (p0, z0, z1, z2)) ++ ++/* ++** mls_u32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** mls z0\.s, p0/m, \1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mls_u32_m_tied2, svuint32_t, ++ z0 = svmls_u32_m (p0, z1, z0, z2), ++ z0 = svmls_m (p0, z1, z0, z2)) ++ ++/* ++** mls_u32_m_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** mls z0\.s, p0/m, z2\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mls_u32_m_tied3, svuint32_t, ++ z0 = svmls_u32_m (p0, z1, z2, z0), ++ z0 = svmls_m (p0, z1, z2, z0)) ++ ++/* ++** mls_u32_m_untied: ++** movprfx z0, z1 ++** mls z0\.s, p0/m, z2\.s, z3\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mls_u32_m_untied, svuint32_t, ++ z0 = svmls_u32_m (p0, z1, z2, z3), ++ z0 = svmls_m (p0, z1, z2, z3)) ++ ++/* ++** mls_w0_u32_m_tied1: ++** mov (z[0-9]+\.s), w0 ++** mls z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_w0_u32_m_tied1, svuint32_t, uint32_t, ++ z0 = svmls_n_u32_m (p0, z0, z1, x0), ++ z0 = svmls_m (p0, z0, z1, x0)) ++ ++/* ++** mls_w0_u32_m_untied: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0, z1 ++** mls z0\.s, p0/m, z2\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_w0_u32_m_untied, svuint32_t, uint32_t, ++ z0 = svmls_n_u32_m (p0, z1, z2, x0), ++ z0 = svmls_m (p0, z1, z2, x0)) ++ ++/* ++** mls_11_u32_m_tied1: ++** mov (z[0-9]+\.s), #11 ++** mls z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_u32_m_tied1, svuint32_t, ++ z0 = svmls_n_u32_m (p0, z0, z1, 11), ++ z0 = svmls_m (p0, z0, z1, 11)) ++ ++/* ++** mls_11_u32_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.s), #11 ++** movprfx z0, z1 ++** mls z0\.s, p0/m, z2\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_u32_m_untied, svuint32_t, ++ z0 = svmls_n_u32_m (p0, z1, z2, 11), ++ z0 = svmls_m (p0, z1, z2, 11)) ++ ++/* ++** mls_u32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** mls z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mls_u32_z_tied1, svuint32_t, ++ z0 = svmls_u32_z (p0, z0, z1, z2), ++ z0 = svmls_z (p0, z0, z1, z2)) ++ ++/* ++** mls_u32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** msb z0\.s, p0/m, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mls_u32_z_tied2, svuint32_t, ++ z0 = svmls_u32_z (p0, z1, z0, z2), ++ z0 = svmls_z (p0, z1, z0, z2)) ++ ++/* ++** mls_u32_z_tied3: ++** movprfx z0\.s, p0/z, z0\.s ++** msb z0\.s, p0/m, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mls_u32_z_tied3, svuint32_t, ++ z0 = svmls_u32_z (p0, z1, z2, z0), ++ z0 = svmls_z (p0, z1, z2, z0)) ++ ++/* ++** mls_u32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** mls z0\.s, p0/m, z2\.s, z3\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** msb z0\.s, p0/m, z3\.s, z1\.s ++** | ++** movprfx z0\.s, p0/z, z3\.s ++** msb z0\.s, p0/m, z2\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mls_u32_z_untied, svuint32_t, ++ z0 = svmls_u32_z (p0, z1, z2, z3), ++ z0 = svmls_z (p0, z1, z2, z3)) ++ ++/* ++** mls_w0_u32_z_tied1: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0\.s, p0/z, z0\.s ++** mls z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_w0_u32_z_tied1, svuint32_t, uint32_t, ++ z0 = svmls_n_u32_z (p0, z0, z1, x0), ++ z0 = svmls_z (p0, z0, z1, x0)) ++ ++/* ++** mls_w0_u32_z_tied2: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0\.s, p0/z, z0\.s ++** msb z0\.s, p0/m, \1, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_w0_u32_z_tied2, svuint32_t, uint32_t, ++ z0 = svmls_n_u32_z (p0, z1, z0, x0), ++ z0 = svmls_z (p0, z1, z0, x0)) ++ ++/* ++** mls_w0_u32_z_untied: ++** mov (z[0-9]+\.s), w0 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** mls z0\.s, p0/m, z2\.s, \1 ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** msb z0\.s, p0/m, \1, z1\.s ++** | ++** movprfx z0\.s, p0/z, \1 ++** msb z0\.s, p0/m, z2\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_w0_u32_z_untied, svuint32_t, uint32_t, ++ z0 = svmls_n_u32_z (p0, z1, z2, x0), ++ z0 = svmls_z (p0, z1, z2, x0)) ++ ++/* ++** mls_11_u32_z_tied1: ++** mov (z[0-9]+\.s), #11 ++** movprfx z0\.s, p0/z, z0\.s ++** mls z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_u32_z_tied1, svuint32_t, ++ z0 = svmls_n_u32_z (p0, z0, z1, 11), ++ z0 = svmls_z (p0, z0, z1, 11)) ++ ++/* ++** mls_11_u32_z_tied2: ++** mov (z[0-9]+\.s), #11 ++** movprfx z0\.s, p0/z, z0\.s ++** msb z0\.s, p0/m, \1, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_u32_z_tied2, svuint32_t, ++ z0 = svmls_n_u32_z (p0, z1, z0, 11), ++ z0 = svmls_z (p0, z1, z0, 11)) ++ ++/* ++** mls_11_u32_z_untied: ++** mov (z[0-9]+\.s), #11 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** mls z0\.s, p0/m, z2\.s, \1 ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** msb z0\.s, p0/m, \1, z1\.s ++** | ++** movprfx z0\.s, p0/z, \1 ++** msb z0\.s, p0/m, z2\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_u32_z_untied, svuint32_t, ++ z0 = svmls_n_u32_z (p0, z1, z2, 11), ++ z0 = svmls_z (p0, z1, z2, 11)) ++ ++/* ++** mls_u32_x_tied1: ++** mls z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mls_u32_x_tied1, svuint32_t, ++ z0 = svmls_u32_x (p0, z0, z1, z2), ++ z0 = svmls_x (p0, z0, z1, z2)) ++ ++/* ++** mls_u32_x_tied2: ++** msb z0\.s, p0/m, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mls_u32_x_tied2, svuint32_t, ++ z0 = svmls_u32_x (p0, z1, z0, z2), ++ z0 = svmls_x (p0, z1, z0, z2)) ++ ++/* ++** mls_u32_x_tied3: ++** msb z0\.s, p0/m, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mls_u32_x_tied3, svuint32_t, ++ z0 = svmls_u32_x (p0, z1, z2, z0), ++ z0 = svmls_x (p0, z1, z2, z0)) ++ ++/* ++** mls_u32_x_untied: ++** ( ++** movprfx z0, z1 ++** mls z0\.s, p0/m, z2\.s, z3\.s ++** | ++** movprfx z0, z2 ++** msb z0\.s, p0/m, z3\.s, z1\.s ++** | ++** movprfx z0, z3 ++** msb z0\.s, p0/m, z2\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mls_u32_x_untied, svuint32_t, ++ z0 = svmls_u32_x (p0, z1, z2, z3), ++ z0 = svmls_x (p0, z1, z2, z3)) ++ ++/* ++** mls_w0_u32_x_tied1: ++** mov (z[0-9]+\.s), w0 ++** mls z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_w0_u32_x_tied1, svuint32_t, uint32_t, ++ z0 = svmls_n_u32_x (p0, z0, z1, x0), ++ z0 = svmls_x (p0, z0, z1, x0)) ++ ++/* ++** mls_w0_u32_x_tied2: ++** mov (z[0-9]+\.s), w0 ++** msb z0\.s, p0/m, \1, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_w0_u32_x_tied2, svuint32_t, uint32_t, ++ z0 = svmls_n_u32_x (p0, z1, z0, x0), ++ z0 = svmls_x (p0, z1, z0, x0)) ++ ++/* ++** mls_w0_u32_x_untied: ++** mov z0\.s, w0 ++** msb z0\.s, p0/m, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_w0_u32_x_untied, svuint32_t, uint32_t, ++ z0 = svmls_n_u32_x (p0, z1, z2, x0), ++ z0 = svmls_x (p0, z1, z2, x0)) ++ ++/* ++** mls_11_u32_x_tied1: ++** mov (z[0-9]+\.s), #11 ++** mls z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_u32_x_tied1, svuint32_t, ++ z0 = svmls_n_u32_x (p0, z0, z1, 11), ++ z0 = svmls_x (p0, z0, z1, 11)) ++ ++/* ++** mls_11_u32_x_tied2: ++** mov (z[0-9]+\.s), #11 ++** msb z0\.s, p0/m, \1, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_u32_x_tied2, svuint32_t, ++ z0 = svmls_n_u32_x (p0, z1, z0, 11), ++ z0 = svmls_x (p0, z1, z0, 11)) ++ ++/* ++** mls_11_u32_x_untied: ++** mov z0\.s, #11 ++** msb z0\.s, p0/m, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_u32_x_untied, svuint32_t, ++ z0 = svmls_n_u32_x (p0, z1, z2, 11), ++ z0 = svmls_x (p0, z1, z2, 11)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_u64.c +new file mode 100644 +index 000000000..4d441b759 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_u64.c +@@ -0,0 +1,321 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mls_u64_m_tied1: ++** mls z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mls_u64_m_tied1, svuint64_t, ++ z0 = svmls_u64_m (p0, z0, z1, z2), ++ z0 = svmls_m (p0, z0, z1, z2)) ++ ++/* ++** mls_u64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** mls z0\.d, p0/m, \1, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mls_u64_m_tied2, svuint64_t, ++ z0 = svmls_u64_m (p0, z1, z0, z2), ++ z0 = svmls_m (p0, z1, z0, z2)) ++ ++/* ++** mls_u64_m_tied3: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** mls z0\.d, p0/m, z2\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mls_u64_m_tied3, svuint64_t, ++ z0 = svmls_u64_m (p0, z1, z2, z0), ++ z0 = svmls_m (p0, z1, z2, z0)) ++ ++/* ++** mls_u64_m_untied: ++** movprfx z0, z1 ++** mls z0\.d, p0/m, z2\.d, z3\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mls_u64_m_untied, svuint64_t, ++ z0 = svmls_u64_m (p0, z1, z2, z3), ++ z0 = svmls_m (p0, z1, z2, z3)) ++ ++/* ++** mls_x0_u64_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** mls z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_x0_u64_m_tied1, svuint64_t, uint64_t, ++ z0 = svmls_n_u64_m (p0, z0, z1, x0), ++ z0 = svmls_m (p0, z0, z1, x0)) ++ ++/* ++** mls_x0_u64_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** mls z0\.d, p0/m, z2\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_x0_u64_m_untied, svuint64_t, uint64_t, ++ z0 = svmls_n_u64_m (p0, z1, z2, x0), ++ z0 = svmls_m (p0, z1, z2, x0)) ++ ++/* ++** mls_11_u64_m_tied1: ++** mov (z[0-9]+\.d), #11 ++** mls z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_u64_m_tied1, svuint64_t, ++ z0 = svmls_n_u64_m (p0, z0, z1, 11), ++ z0 = svmls_m (p0, z0, z1, 11)) ++ ++/* ++** mls_11_u64_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), #11 ++** movprfx z0, z1 ++** mls z0\.d, p0/m, z2\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_u64_m_untied, svuint64_t, ++ z0 = svmls_n_u64_m (p0, z1, z2, 11), ++ z0 = svmls_m (p0, z1, z2, 11)) ++ ++/* ++** mls_u64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** mls z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mls_u64_z_tied1, svuint64_t, ++ z0 = svmls_u64_z (p0, z0, z1, z2), ++ z0 = svmls_z (p0, z0, z1, z2)) ++ ++/* ++** mls_u64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** msb z0\.d, p0/m, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mls_u64_z_tied2, svuint64_t, ++ z0 = svmls_u64_z (p0, z1, z0, z2), ++ z0 = svmls_z (p0, z1, z0, z2)) ++ ++/* ++** mls_u64_z_tied3: ++** movprfx z0\.d, p0/z, z0\.d ++** msb z0\.d, p0/m, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mls_u64_z_tied3, svuint64_t, ++ z0 = svmls_u64_z (p0, z1, z2, z0), ++ z0 = svmls_z (p0, z1, z2, z0)) ++ ++/* ++** mls_u64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** mls z0\.d, p0/m, z2\.d, z3\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** msb z0\.d, p0/m, z3\.d, z1\.d ++** | ++** movprfx z0\.d, p0/z, z3\.d ++** msb z0\.d, p0/m, z2\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mls_u64_z_untied, svuint64_t, ++ z0 = svmls_u64_z (p0, z1, z2, z3), ++ z0 = svmls_z (p0, z1, z2, z3)) ++ ++/* ++** mls_x0_u64_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.d, p0/z, z0\.d ++** mls z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_x0_u64_z_tied1, svuint64_t, uint64_t, ++ z0 = svmls_n_u64_z (p0, z0, z1, x0), ++ z0 = svmls_z (p0, z0, z1, x0)) ++ ++/* ++** mls_x0_u64_z_tied2: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.d, p0/z, z0\.d ++** msb z0\.d, p0/m, \1, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_x0_u64_z_tied2, svuint64_t, uint64_t, ++ z0 = svmls_n_u64_z (p0, z1, z0, x0), ++ z0 = svmls_z (p0, z1, z0, x0)) ++ ++/* ++** mls_x0_u64_z_untied: ++** mov (z[0-9]+\.d), x0 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** mls z0\.d, p0/m, z2\.d, \1 ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** msb z0\.d, p0/m, \1, z1\.d ++** | ++** movprfx z0\.d, p0/z, \1 ++** msb z0\.d, p0/m, z2\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_x0_u64_z_untied, svuint64_t, uint64_t, ++ z0 = svmls_n_u64_z (p0, z1, z2, x0), ++ z0 = svmls_z (p0, z1, z2, x0)) ++ ++/* ++** mls_11_u64_z_tied1: ++** mov (z[0-9]+\.d), #11 ++** movprfx z0\.d, p0/z, z0\.d ++** mls z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_u64_z_tied1, svuint64_t, ++ z0 = svmls_n_u64_z (p0, z0, z1, 11), ++ z0 = svmls_z (p0, z0, z1, 11)) ++ ++/* ++** mls_11_u64_z_tied2: ++** mov (z[0-9]+\.d), #11 ++** movprfx z0\.d, p0/z, z0\.d ++** msb z0\.d, p0/m, \1, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_u64_z_tied2, svuint64_t, ++ z0 = svmls_n_u64_z (p0, z1, z0, 11), ++ z0 = svmls_z (p0, z1, z0, 11)) ++ ++/* ++** mls_11_u64_z_untied: ++** mov (z[0-9]+\.d), #11 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** mls z0\.d, p0/m, z2\.d, \1 ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** msb z0\.d, p0/m, \1, z1\.d ++** | ++** movprfx z0\.d, p0/z, \1 ++** msb z0\.d, p0/m, z2\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_u64_z_untied, svuint64_t, ++ z0 = svmls_n_u64_z (p0, z1, z2, 11), ++ z0 = svmls_z (p0, z1, z2, 11)) ++ ++/* ++** mls_u64_x_tied1: ++** mls z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mls_u64_x_tied1, svuint64_t, ++ z0 = svmls_u64_x (p0, z0, z1, z2), ++ z0 = svmls_x (p0, z0, z1, z2)) ++ ++/* ++** mls_u64_x_tied2: ++** msb z0\.d, p0/m, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mls_u64_x_tied2, svuint64_t, ++ z0 = svmls_u64_x (p0, z1, z0, z2), ++ z0 = svmls_x (p0, z1, z0, z2)) ++ ++/* ++** mls_u64_x_tied3: ++** msb z0\.d, p0/m, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mls_u64_x_tied3, svuint64_t, ++ z0 = svmls_u64_x (p0, z1, z2, z0), ++ z0 = svmls_x (p0, z1, z2, z0)) ++ ++/* ++** mls_u64_x_untied: ++** ( ++** movprfx z0, z1 ++** mls z0\.d, p0/m, z2\.d, z3\.d ++** | ++** movprfx z0, z2 ++** msb z0\.d, p0/m, z3\.d, z1\.d ++** | ++** movprfx z0, z3 ++** msb z0\.d, p0/m, z2\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mls_u64_x_untied, svuint64_t, ++ z0 = svmls_u64_x (p0, z1, z2, z3), ++ z0 = svmls_x (p0, z1, z2, z3)) ++ ++/* ++** mls_x0_u64_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** mls z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_x0_u64_x_tied1, svuint64_t, uint64_t, ++ z0 = svmls_n_u64_x (p0, z0, z1, x0), ++ z0 = svmls_x (p0, z0, z1, x0)) ++ ++/* ++** mls_x0_u64_x_tied2: ++** mov (z[0-9]+\.d), x0 ++** msb z0\.d, p0/m, \1, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_x0_u64_x_tied2, svuint64_t, uint64_t, ++ z0 = svmls_n_u64_x (p0, z1, z0, x0), ++ z0 = svmls_x (p0, z1, z0, x0)) ++ ++/* ++** mls_x0_u64_x_untied: ++** mov z0\.d, x0 ++** msb z0\.d, p0/m, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_x0_u64_x_untied, svuint64_t, uint64_t, ++ z0 = svmls_n_u64_x (p0, z1, z2, x0), ++ z0 = svmls_x (p0, z1, z2, x0)) ++ ++/* ++** mls_11_u64_x_tied1: ++** mov (z[0-9]+\.d), #11 ++** mls z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_u64_x_tied1, svuint64_t, ++ z0 = svmls_n_u64_x (p0, z0, z1, 11), ++ z0 = svmls_x (p0, z0, z1, 11)) ++ ++/* ++** mls_11_u64_x_tied2: ++** mov (z[0-9]+\.d), #11 ++** msb z0\.d, p0/m, \1, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_u64_x_tied2, svuint64_t, ++ z0 = svmls_n_u64_x (p0, z1, z0, 11), ++ z0 = svmls_x (p0, z1, z0, 11)) ++ ++/* ++** mls_11_u64_x_untied: ++** mov z0\.d, #11 ++** msb z0\.d, p0/m, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_u64_x_untied, svuint64_t, ++ z0 = svmls_n_u64_x (p0, z1, z2, 11), ++ z0 = svmls_x (p0, z1, z2, 11)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_u8.c +new file mode 100644 +index 000000000..0489aaa7c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_u8.c +@@ -0,0 +1,321 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mls_u8_m_tied1: ++** mls z0\.b, p0/m, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mls_u8_m_tied1, svuint8_t, ++ z0 = svmls_u8_m (p0, z0, z1, z2), ++ z0 = svmls_m (p0, z0, z1, z2)) ++ ++/* ++** mls_u8_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** mls z0\.b, p0/m, \1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mls_u8_m_tied2, svuint8_t, ++ z0 = svmls_u8_m (p0, z1, z0, z2), ++ z0 = svmls_m (p0, z1, z0, z2)) ++ ++/* ++** mls_u8_m_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** mls z0\.b, p0/m, z2\.b, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mls_u8_m_tied3, svuint8_t, ++ z0 = svmls_u8_m (p0, z1, z2, z0), ++ z0 = svmls_m (p0, z1, z2, z0)) ++ ++/* ++** mls_u8_m_untied: ++** movprfx z0, z1 ++** mls z0\.b, p0/m, z2\.b, z3\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mls_u8_m_untied, svuint8_t, ++ z0 = svmls_u8_m (p0, z1, z2, z3), ++ z0 = svmls_m (p0, z1, z2, z3)) ++ ++/* ++** mls_w0_u8_m_tied1: ++** mov (z[0-9]+\.b), w0 ++** mls z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_w0_u8_m_tied1, svuint8_t, uint8_t, ++ z0 = svmls_n_u8_m (p0, z0, z1, x0), ++ z0 = svmls_m (p0, z0, z1, x0)) ++ ++/* ++** mls_w0_u8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), w0 ++** movprfx z0, z1 ++** mls z0\.b, p0/m, z2\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_w0_u8_m_untied, svuint8_t, uint8_t, ++ z0 = svmls_n_u8_m (p0, z1, z2, x0), ++ z0 = svmls_m (p0, z1, z2, x0)) ++ ++/* ++** mls_11_u8_m_tied1: ++** mov (z[0-9]+\.b), #11 ++** mls z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_u8_m_tied1, svuint8_t, ++ z0 = svmls_n_u8_m (p0, z0, z1, 11), ++ z0 = svmls_m (p0, z0, z1, 11)) ++ ++/* ++** mls_11_u8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), #11 ++** movprfx z0, z1 ++** mls z0\.b, p0/m, z2\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_u8_m_untied, svuint8_t, ++ z0 = svmls_n_u8_m (p0, z1, z2, 11), ++ z0 = svmls_m (p0, z1, z2, 11)) ++ ++/* ++** mls_u8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** mls z0\.b, p0/m, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mls_u8_z_tied1, svuint8_t, ++ z0 = svmls_u8_z (p0, z0, z1, z2), ++ z0 = svmls_z (p0, z0, z1, z2)) ++ ++/* ++** mls_u8_z_tied2: ++** movprfx z0\.b, p0/z, z0\.b ++** msb z0\.b, p0/m, z2\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mls_u8_z_tied2, svuint8_t, ++ z0 = svmls_u8_z (p0, z1, z0, z2), ++ z0 = svmls_z (p0, z1, z0, z2)) ++ ++/* ++** mls_u8_z_tied3: ++** movprfx z0\.b, p0/z, z0\.b ++** msb z0\.b, p0/m, z2\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mls_u8_z_tied3, svuint8_t, ++ z0 = svmls_u8_z (p0, z1, z2, z0), ++ z0 = svmls_z (p0, z1, z2, z0)) ++ ++/* ++** mls_u8_z_untied: ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** mls z0\.b, p0/m, z2\.b, z3\.b ++** | ++** movprfx z0\.b, p0/z, z2\.b ++** msb z0\.b, p0/m, z3\.b, z1\.b ++** | ++** movprfx z0\.b, p0/z, z3\.b ++** msb z0\.b, p0/m, z2\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mls_u8_z_untied, svuint8_t, ++ z0 = svmls_u8_z (p0, z1, z2, z3), ++ z0 = svmls_z (p0, z1, z2, z3)) ++ ++/* ++** mls_w0_u8_z_tied1: ++** mov (z[0-9]+\.b), w0 ++** movprfx z0\.b, p0/z, z0\.b ++** mls z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_w0_u8_z_tied1, svuint8_t, uint8_t, ++ z0 = svmls_n_u8_z (p0, z0, z1, x0), ++ z0 = svmls_z (p0, z0, z1, x0)) ++ ++/* ++** mls_w0_u8_z_tied2: ++** mov (z[0-9]+\.b), w0 ++** movprfx z0\.b, p0/z, z0\.b ++** msb z0\.b, p0/m, \1, z1\.b ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_w0_u8_z_tied2, svuint8_t, uint8_t, ++ z0 = svmls_n_u8_z (p0, z1, z0, x0), ++ z0 = svmls_z (p0, z1, z0, x0)) ++ ++/* ++** mls_w0_u8_z_untied: ++** mov (z[0-9]+\.b), w0 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** mls z0\.b, p0/m, z2\.b, \1 ++** | ++** movprfx z0\.b, p0/z, z2\.b ++** msb z0\.b, p0/m, \1, z1\.b ++** | ++** movprfx z0\.b, p0/z, \1 ++** msb z0\.b, p0/m, z2\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_w0_u8_z_untied, svuint8_t, uint8_t, ++ z0 = svmls_n_u8_z (p0, z1, z2, x0), ++ z0 = svmls_z (p0, z1, z2, x0)) ++ ++/* ++** mls_11_u8_z_tied1: ++** mov (z[0-9]+\.b), #11 ++** movprfx z0\.b, p0/z, z0\.b ++** mls z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_u8_z_tied1, svuint8_t, ++ z0 = svmls_n_u8_z (p0, z0, z1, 11), ++ z0 = svmls_z (p0, z0, z1, 11)) ++ ++/* ++** mls_11_u8_z_tied2: ++** mov (z[0-9]+\.b), #11 ++** movprfx z0\.b, p0/z, z0\.b ++** msb z0\.b, p0/m, \1, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_u8_z_tied2, svuint8_t, ++ z0 = svmls_n_u8_z (p0, z1, z0, 11), ++ z0 = svmls_z (p0, z1, z0, 11)) ++ ++/* ++** mls_11_u8_z_untied: ++** mov (z[0-9]+\.b), #11 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** mls z0\.b, p0/m, z2\.b, \1 ++** | ++** movprfx z0\.b, p0/z, z2\.b ++** msb z0\.b, p0/m, \1, z1\.b ++** | ++** movprfx z0\.b, p0/z, \1 ++** msb z0\.b, p0/m, z2\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_u8_z_untied, svuint8_t, ++ z0 = svmls_n_u8_z (p0, z1, z2, 11), ++ z0 = svmls_z (p0, z1, z2, 11)) ++ ++/* ++** mls_u8_x_tied1: ++** mls z0\.b, p0/m, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mls_u8_x_tied1, svuint8_t, ++ z0 = svmls_u8_x (p0, z0, z1, z2), ++ z0 = svmls_x (p0, z0, z1, z2)) ++ ++/* ++** mls_u8_x_tied2: ++** msb z0\.b, p0/m, z2\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mls_u8_x_tied2, svuint8_t, ++ z0 = svmls_u8_x (p0, z1, z0, z2), ++ z0 = svmls_x (p0, z1, z0, z2)) ++ ++/* ++** mls_u8_x_tied3: ++** msb z0\.b, p0/m, z2\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mls_u8_x_tied3, svuint8_t, ++ z0 = svmls_u8_x (p0, z1, z2, z0), ++ z0 = svmls_x (p0, z1, z2, z0)) ++ ++/* ++** mls_u8_x_untied: ++** ( ++** movprfx z0, z1 ++** mls z0\.b, p0/m, z2\.b, z3\.b ++** | ++** movprfx z0, z2 ++** msb z0\.b, p0/m, z3\.b, z1\.b ++** | ++** movprfx z0, z3 ++** msb z0\.b, p0/m, z2\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mls_u8_x_untied, svuint8_t, ++ z0 = svmls_u8_x (p0, z1, z2, z3), ++ z0 = svmls_x (p0, z1, z2, z3)) ++ ++/* ++** mls_w0_u8_x_tied1: ++** mov (z[0-9]+\.b), w0 ++** mls z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_w0_u8_x_tied1, svuint8_t, uint8_t, ++ z0 = svmls_n_u8_x (p0, z0, z1, x0), ++ z0 = svmls_x (p0, z0, z1, x0)) ++ ++/* ++** mls_w0_u8_x_tied2: ++** mov (z[0-9]+\.b), w0 ++** msb z0\.b, p0/m, \1, z1\.b ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_w0_u8_x_tied2, svuint8_t, uint8_t, ++ z0 = svmls_n_u8_x (p0, z1, z0, x0), ++ z0 = svmls_x (p0, z1, z0, x0)) ++ ++/* ++** mls_w0_u8_x_untied: ++** mov z0\.b, w0 ++** msb z0\.b, p0/m, z2\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_ZX (mls_w0_u8_x_untied, svuint8_t, uint8_t, ++ z0 = svmls_n_u8_x (p0, z1, z2, x0), ++ z0 = svmls_x (p0, z1, z2, x0)) ++ ++/* ++** mls_11_u8_x_tied1: ++** mov (z[0-9]+\.b), #11 ++** mls z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_u8_x_tied1, svuint8_t, ++ z0 = svmls_n_u8_x (p0, z0, z1, 11), ++ z0 = svmls_x (p0, z0, z1, 11)) ++ ++/* ++** mls_11_u8_x_tied2: ++** mov (z[0-9]+\.b), #11 ++** msb z0\.b, p0/m, \1, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_u8_x_tied2, svuint8_t, ++ z0 = svmls_n_u8_x (p0, z1, z0, 11), ++ z0 = svmls_x (p0, z1, z0, 11)) ++ ++/* ++** mls_11_u8_x_untied: ++** mov z0\.b, #11 ++** msb z0\.b, p0/m, z2\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mls_11_u8_x_untied, svuint8_t, ++ z0 = svmls_n_u8_x (p0, z1, z2, 11), ++ z0 = svmls_x (p0, z1, z2, 11)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_f32.c +new file mode 100644 +index 000000000..f66dbf397 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_f32.c +@@ -0,0 +1,46 @@ ++/* { dg-require-effective-target aarch64_asm_f32mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f32mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mmla_f32_tied1: ++** fmmla z0\.s, z4\.s, z5\.s ++** ret ++*/ ++TEST_DUAL_Z (mmla_f32_tied1, svfloat32_t, svfloat32_t, ++ z0 = svmmla_f32 (z0, z4, z5), ++ z0 = svmmla (z0, z4, z5)) ++ ++/* ++** mmla_f32_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** fmmla z0\.s, \1\.s, z1\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (mmla_f32_tied2, svfloat32_t, svfloat32_t, ++ z0_res = svmmla_f32 (z4, z0, z1), ++ z0_res = svmmla (z4, z0, z1)) ++ ++/* ++** mmla_f32_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** fmmla z0\.s, z1\.s, \1\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (mmla_f32_tied3, svfloat32_t, svfloat32_t, ++ z0_res = svmmla_f32 (z4, z1, z0), ++ z0_res = svmmla (z4, z1, z0)) ++ ++/* ++** mmla_f32_untied: ++** movprfx z0, z1 ++** fmmla z0\.s, z4\.s, z5\.s ++** ret ++*/ ++TEST_DUAL_Z (mmla_f32_untied, svfloat32_t, svfloat32_t, ++ z0 = svmmla_f32 (z1, z4, z5), ++ z0 = svmmla (z1, z4, z5)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_f64.c +new file mode 100644 +index 000000000..49dc0607c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_f64.c +@@ -0,0 +1,46 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mmla_f64_tied1: ++** fmmla z0\.d, z4\.d, z5\.d ++** ret ++*/ ++TEST_DUAL_Z (mmla_f64_tied1, svfloat64_t, svfloat64_t, ++ z0 = svmmla_f64 (z0, z4, z5), ++ z0 = svmmla (z0, z4, z5)) ++ ++/* ++** mmla_f64_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z4 ++** fmmla z0\.d, \1, z1\.d ++** ret ++*/ ++TEST_DUAL_Z_REV (mmla_f64_tied2, svfloat64_t, svfloat64_t, ++ z0_res = svmmla_f64 (z4, z0, z1), ++ z0_res = svmmla (z4, z0, z1)) ++ ++/* ++** mmla_f64_tied3: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z4 ++** fmmla z0\.d, z1\.d, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (mmla_f64_tied3, svfloat64_t, svfloat64_t, ++ z0_res = svmmla_f64 (z4, z1, z0), ++ z0_res = svmmla (z4, z1, z0)) ++ ++/* ++** mmla_f64_untied: ++** movprfx z0, z1 ++** fmmla z0\.d, z4\.d, z5\.d ++** ret ++*/ ++TEST_DUAL_Z (mmla_f64_untied, svfloat64_t, svfloat64_t, ++ z0 = svmmla_f64 (z1, z4, z5), ++ z0 = svmmla (z1, z4, z5)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_s32.c +new file mode 100644 +index 000000000..e7ce009ac +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_s32.c +@@ -0,0 +1,46 @@ ++/* { dg-require-effective-target aarch64_asm_i8mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+sve+i8mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mmla_s32_tied1: ++** smmla z0\.s, z4\.b, z5\.b ++** ret ++*/ ++TEST_DUAL_Z (mmla_s32_tied1, svint32_t, svint8_t, ++ z0 = svmmla_s32 (z0, z4, z5), ++ z0 = svmmla (z0, z4, z5)) ++ ++/* ++** mmla_s32_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** smmla z0\.s, \1\.b, z1\.b ++** ret ++*/ ++TEST_DUAL_Z_REV (mmla_s32_tied2, svint32_t, svint8_t, ++ z0_res = svmmla_s32 (z4, z0, z1), ++ z0_res = svmmla (z4, z0, z1)) ++ ++/* ++** mmla_s32_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** smmla z0\.s, z1\.b, \1\.b ++** ret ++*/ ++TEST_DUAL_Z_REV (mmla_s32_tied3, svint32_t, svint8_t, ++ z0_res = svmmla_s32 (z4, z1, z0), ++ z0_res = svmmla (z4, z1, z0)) ++ ++/* ++** mmla_s32_untied: ++** movprfx z0, z1 ++** smmla z0\.s, z4\.b, z5\.b ++** ret ++*/ ++TEST_DUAL_Z (mmla_s32_untied, svint32_t, svint8_t, ++ z0 = svmmla_s32 (z1, z4, z5), ++ z0 = svmmla (z1, z4, z5)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_u32.c +new file mode 100644 +index 000000000..81f5166fb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_u32.c +@@ -0,0 +1,46 @@ ++/* { dg-require-effective-target aarch64_asm_i8mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+sve+i8mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mmla_u32_tied1: ++** ummla z0\.s, z4\.b, z5\.b ++** ret ++*/ ++TEST_DUAL_Z (mmla_u32_tied1, svuint32_t, svuint8_t, ++ z0 = svmmla_u32 (z0, z4, z5), ++ z0 = svmmla (z0, z4, z5)) ++ ++/* ++** mmla_u32_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** ummla z0\.s, \1\.b, z1\.b ++** ret ++*/ ++TEST_DUAL_Z_REV (mmla_u32_tied2, svuint32_t, svuint8_t, ++ z0_res = svmmla_u32 (z4, z0, z1), ++ z0_res = svmmla (z4, z0, z1)) ++ ++/* ++** mmla_u32_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** ummla z0\.s, z1\.b, \1\.b ++** ret ++*/ ++TEST_DUAL_Z_REV (mmla_u32_tied3, svuint32_t, svuint8_t, ++ z0_res = svmmla_u32 (z4, z1, z0), ++ z0_res = svmmla (z4, z1, z0)) ++ ++/* ++** mmla_u32_untied: ++** movprfx z0, z1 ++** ummla z0\.s, z4\.b, z5\.b ++** ret ++*/ ++TEST_DUAL_Z (mmla_u32_untied, svuint32_t, svuint8_t, ++ z0 = svmmla_u32 (z1, z4, z5), ++ z0 = svmmla (z1, z4, z5)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mov_b.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mov_b.c +new file mode 100644 +index 000000000..6b78f348f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mov_b.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mov_b_z_tied1: ++** and p0\.b, (?:p3/z, p0\.b, p0\.b|p0/z, p3\.b, p3\.b) ++** ret ++*/ ++TEST_UNIFORM_P (mov_b_z_tied1, ++ p0 = svmov_b_z (p3, p0), ++ p0 = svmov_z (p3, p0)) ++ ++/* ++** mov_b_z_untied: ++** and p0\.b, (?:p3/z, p1\.b, p1\.b|p1/z, p3\.b, p3\.b) ++** ret ++*/ ++TEST_UNIFORM_P (mov_b_z_untied, ++ p0 = svmov_b_z (p3, p1), ++ p0 = svmov_z (p3, p1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_f16.c +new file mode 100644 +index 000000000..fe11457c4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_f16.c +@@ -0,0 +1,398 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** msb_f16_m_tied1: ++** fmsb z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (msb_f16_m_tied1, svfloat16_t, ++ z0 = svmsb_f16_m (p0, z0, z1, z2), ++ z0 = svmsb_m (p0, z0, z1, z2)) ++ ++/* ++** msb_f16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fmsb z0\.h, p0/m, \1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (msb_f16_m_tied2, svfloat16_t, ++ z0 = svmsb_f16_m (p0, z1, z0, z2), ++ z0 = svmsb_m (p0, z1, z0, z2)) ++ ++/* ++** msb_f16_m_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fmsb z0\.h, p0/m, z2\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (msb_f16_m_tied3, svfloat16_t, ++ z0 = svmsb_f16_m (p0, z1, z2, z0), ++ z0 = svmsb_m (p0, z1, z2, z0)) ++ ++/* ++** msb_f16_m_untied: ++** movprfx z0, z1 ++** fmsb z0\.h, p0/m, z2\.h, z3\.h ++** ret ++*/ ++TEST_UNIFORM_Z (msb_f16_m_untied, svfloat16_t, ++ z0 = svmsb_f16_m (p0, z1, z2, z3), ++ z0 = svmsb_m (p0, z1, z2, z3)) ++ ++/* ++** msb_h4_f16_m_tied1: ++** mov (z[0-9]+\.h), h4 ++** fmsb z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (msb_h4_f16_m_tied1, svfloat16_t, __fp16, ++ z0 = svmsb_n_f16_m (p0, z0, z1, d4), ++ z0 = svmsb_m (p0, z0, z1, d4)) ++ ++/* ++** msb_h4_f16_m_untied: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0, z1 ++** fmsb z0\.h, p0/m, z2\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (msb_h4_f16_m_untied, svfloat16_t, __fp16, ++ z0 = svmsb_n_f16_m (p0, z1, z2, d4), ++ z0 = svmsb_m (p0, z1, z2, d4)) ++ ++/* ++** msb_2_f16_m_tied1: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** fmsb z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_2_f16_m_tied1, svfloat16_t, ++ z0 = svmsb_n_f16_m (p0, z0, z1, 2), ++ z0 = svmsb_m (p0, z0, z1, 2)) ++ ++/* ++** msb_2_f16_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** movprfx z0, z1 ++** fmsb z0\.h, p0/m, z2\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_2_f16_m_untied, svfloat16_t, ++ z0 = svmsb_n_f16_m (p0, z1, z2, 2), ++ z0 = svmsb_m (p0, z1, z2, 2)) ++ ++/* ++** msb_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fmsb z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (msb_f16_z_tied1, svfloat16_t, ++ z0 = svmsb_f16_z (p0, z0, z1, z2), ++ z0 = svmsb_z (p0, z0, z1, z2)) ++ ++/* ++** msb_f16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** fmsb z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (msb_f16_z_tied2, svfloat16_t, ++ z0 = svmsb_f16_z (p0, z1, z0, z2), ++ z0 = svmsb_z (p0, z1, z0, z2)) ++ ++/* ++** msb_f16_z_tied3: ++** movprfx z0\.h, p0/z, z0\.h ++** fmls z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (msb_f16_z_tied3, svfloat16_t, ++ z0 = svmsb_f16_z (p0, z1, z2, z0), ++ z0 = svmsb_z (p0, z1, z2, z0)) ++ ++/* ++** msb_f16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fmsb z0\.h, p0/m, z2\.h, z3\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** fmsb z0\.h, p0/m, z1\.h, z3\.h ++** | ++** movprfx z0\.h, p0/z, z3\.h ++** fmls z0\.h, p0/m, z1\.h, z2\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (msb_f16_z_untied, svfloat16_t, ++ z0 = svmsb_f16_z (p0, z1, z2, z3), ++ z0 = svmsb_z (p0, z1, z2, z3)) ++ ++/* ++** msb_h4_f16_z_tied1: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0\.h, p0/z, z0\.h ++** fmsb z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (msb_h4_f16_z_tied1, svfloat16_t, __fp16, ++ z0 = svmsb_n_f16_z (p0, z0, z1, d4), ++ z0 = svmsb_z (p0, z0, z1, d4)) ++ ++/* ++** msb_h4_f16_z_tied2: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0\.h, p0/z, z0\.h ++** fmsb z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (msb_h4_f16_z_tied2, svfloat16_t, __fp16, ++ z0 = svmsb_n_f16_z (p0, z1, z0, d4), ++ z0 = svmsb_z (p0, z1, z0, d4)) ++ ++/* ++** msb_h4_f16_z_untied: ++** mov (z[0-9]+\.h), h4 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fmsb z0\.h, p0/m, z2\.h, \1 ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** fmsb z0\.h, p0/m, z1\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** fmls z0\.h, p0/m, z1\.h, z2\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (msb_h4_f16_z_untied, svfloat16_t, __fp16, ++ z0 = svmsb_n_f16_z (p0, z1, z2, d4), ++ z0 = svmsb_z (p0, z1, z2, d4)) ++ ++/* ++** msb_2_f16_z_tied1: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** movprfx z0\.h, p0/z, z0\.h ++** fmsb z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_2_f16_z_tied1, svfloat16_t, ++ z0 = svmsb_n_f16_z (p0, z0, z1, 2), ++ z0 = svmsb_z (p0, z0, z1, 2)) ++ ++/* ++** msb_2_f16_z_tied2: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** movprfx z0\.h, p0/z, z0\.h ++** fmsb z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_2_f16_z_tied2, svfloat16_t, ++ z0 = svmsb_n_f16_z (p0, z1, z0, 2), ++ z0 = svmsb_z (p0, z1, z0, 2)) ++ ++/* ++** msb_2_f16_z_untied: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fmsb z0\.h, p0/m, z2\.h, \1 ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** fmsb z0\.h, p0/m, z1\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** fmls z0\.h, p0/m, z1\.h, z2\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (msb_2_f16_z_untied, svfloat16_t, ++ z0 = svmsb_n_f16_z (p0, z1, z2, 2), ++ z0 = svmsb_z (p0, z1, z2, 2)) ++ ++/* ++** msb_f16_x_tied1: ++** fmsb z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (msb_f16_x_tied1, svfloat16_t, ++ z0 = svmsb_f16_x (p0, z0, z1, z2), ++ z0 = svmsb_x (p0, z0, z1, z2)) ++ ++/* ++** msb_f16_x_tied2: ++** fmsb z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (msb_f16_x_tied2, svfloat16_t, ++ z0 = svmsb_f16_x (p0, z1, z0, z2), ++ z0 = svmsb_x (p0, z1, z0, z2)) ++ ++/* ++** msb_f16_x_tied3: ++** fmls z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (msb_f16_x_tied3, svfloat16_t, ++ z0 = svmsb_f16_x (p0, z1, z2, z0), ++ z0 = svmsb_x (p0, z1, z2, z0)) ++ ++/* ++** msb_f16_x_untied: ++** ( ++** movprfx z0, z1 ++** fmsb z0\.h, p0/m, z2\.h, z3\.h ++** | ++** movprfx z0, z2 ++** fmsb z0\.h, p0/m, z1\.h, z3\.h ++** | ++** movprfx z0, z3 ++** fmls z0\.h, p0/m, z1\.h, z2\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (msb_f16_x_untied, svfloat16_t, ++ z0 = svmsb_f16_x (p0, z1, z2, z3), ++ z0 = svmsb_x (p0, z1, z2, z3)) ++ ++/* ++** msb_h4_f16_x_tied1: ++** mov (z[0-9]+\.h), h4 ++** fmsb z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (msb_h4_f16_x_tied1, svfloat16_t, __fp16, ++ z0 = svmsb_n_f16_x (p0, z0, z1, d4), ++ z0 = svmsb_x (p0, z0, z1, d4)) ++ ++/* ++** msb_h4_f16_x_tied2: ++** mov (z[0-9]+\.h), h4 ++** fmsb z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (msb_h4_f16_x_tied2, svfloat16_t, __fp16, ++ z0 = svmsb_n_f16_x (p0, z1, z0, d4), ++ z0 = svmsb_x (p0, z1, z0, d4)) ++ ++/* ++** msb_h4_f16_x_untied: { xfail *-*-* } ++** mov z0\.h, h4 ++** fmls z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_ZD (msb_h4_f16_x_untied, svfloat16_t, __fp16, ++ z0 = svmsb_n_f16_x (p0, z1, z2, d4), ++ z0 = svmsb_x (p0, z1, z2, d4)) ++ ++/* ++** msb_2_f16_x_tied1: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** fmsb z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_2_f16_x_tied1, svfloat16_t, ++ z0 = svmsb_n_f16_x (p0, z0, z1, 2), ++ z0 = svmsb_x (p0, z0, z1, 2)) ++ ++/* ++** msb_2_f16_x_tied2: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** fmsb z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_2_f16_x_tied2, svfloat16_t, ++ z0 = svmsb_n_f16_x (p0, z1, z0, 2), ++ z0 = svmsb_x (p0, z1, z0, 2)) ++ ++/* ++** msb_2_f16_x_untied: ++** fmov z0\.h, #2\.0(?:e\+0)? ++** fmls z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (msb_2_f16_x_untied, svfloat16_t, ++ z0 = svmsb_n_f16_x (p0, z1, z2, 2), ++ z0 = svmsb_x (p0, z1, z2, 2)) ++ ++/* ++** ptrue_msb_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_msb_f16_x_tied1, svfloat16_t, ++ z0 = svmsb_f16_x (svptrue_b16 (), z0, z1, z2), ++ z0 = svmsb_x (svptrue_b16 (), z0, z1, z2)) ++ ++/* ++** ptrue_msb_f16_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_msb_f16_x_tied2, svfloat16_t, ++ z0 = svmsb_f16_x (svptrue_b16 (), z1, z0, z2), ++ z0 = svmsb_x (svptrue_b16 (), z1, z0, z2)) ++ ++/* ++** ptrue_msb_f16_x_tied3: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_msb_f16_x_tied3, svfloat16_t, ++ z0 = svmsb_f16_x (svptrue_b16 (), z1, z2, z0), ++ z0 = svmsb_x (svptrue_b16 (), z1, z2, z0)) ++ ++/* ++** ptrue_msb_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_msb_f16_x_untied, svfloat16_t, ++ z0 = svmsb_f16_x (svptrue_b16 (), z1, z2, z3), ++ z0 = svmsb_x (svptrue_b16 (), z1, z2, z3)) ++ ++/* ++** ptrue_msb_2_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_msb_2_f16_x_tied1, svfloat16_t, ++ z0 = svmsb_n_f16_x (svptrue_b16 (), z0, z1, 2), ++ z0 = svmsb_x (svptrue_b16 (), z0, z1, 2)) ++ ++/* ++** ptrue_msb_2_f16_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_msb_2_f16_x_tied2, svfloat16_t, ++ z0 = svmsb_n_f16_x (svptrue_b16 (), z1, z0, 2), ++ z0 = svmsb_x (svptrue_b16 (), z1, z0, 2)) ++ ++/* ++** ptrue_msb_2_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_msb_2_f16_x_untied, svfloat16_t, ++ z0 = svmsb_n_f16_x (svptrue_b16 (), z1, z2, 2), ++ z0 = svmsb_x (svptrue_b16 (), z1, z2, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_f32.c +new file mode 100644 +index 000000000..f7a9f2767 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_f32.c +@@ -0,0 +1,398 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** msb_f32_m_tied1: ++** fmsb z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (msb_f32_m_tied1, svfloat32_t, ++ z0 = svmsb_f32_m (p0, z0, z1, z2), ++ z0 = svmsb_m (p0, z0, z1, z2)) ++ ++/* ++** msb_f32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fmsb z0\.s, p0/m, \1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (msb_f32_m_tied2, svfloat32_t, ++ z0 = svmsb_f32_m (p0, z1, z0, z2), ++ z0 = svmsb_m (p0, z1, z0, z2)) ++ ++/* ++** msb_f32_m_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fmsb z0\.s, p0/m, z2\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (msb_f32_m_tied3, svfloat32_t, ++ z0 = svmsb_f32_m (p0, z1, z2, z0), ++ z0 = svmsb_m (p0, z1, z2, z0)) ++ ++/* ++** msb_f32_m_untied: ++** movprfx z0, z1 ++** fmsb z0\.s, p0/m, z2\.s, z3\.s ++** ret ++*/ ++TEST_UNIFORM_Z (msb_f32_m_untied, svfloat32_t, ++ z0 = svmsb_f32_m (p0, z1, z2, z3), ++ z0 = svmsb_m (p0, z1, z2, z3)) ++ ++/* ++** msb_s4_f32_m_tied1: ++** mov (z[0-9]+\.s), s4 ++** fmsb z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (msb_s4_f32_m_tied1, svfloat32_t, float, ++ z0 = svmsb_n_f32_m (p0, z0, z1, d4), ++ z0 = svmsb_m (p0, z0, z1, d4)) ++ ++/* ++** msb_s4_f32_m_untied: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0, z1 ++** fmsb z0\.s, p0/m, z2\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (msb_s4_f32_m_untied, svfloat32_t, float, ++ z0 = svmsb_n_f32_m (p0, z1, z2, d4), ++ z0 = svmsb_m (p0, z1, z2, d4)) ++ ++/* ++** msb_2_f32_m_tied1: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** fmsb z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_2_f32_m_tied1, svfloat32_t, ++ z0 = svmsb_n_f32_m (p0, z0, z1, 2), ++ z0 = svmsb_m (p0, z0, z1, 2)) ++ ++/* ++** msb_2_f32_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** movprfx z0, z1 ++** fmsb z0\.s, p0/m, z2\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_2_f32_m_untied, svfloat32_t, ++ z0 = svmsb_n_f32_m (p0, z1, z2, 2), ++ z0 = svmsb_m (p0, z1, z2, 2)) ++ ++/* ++** msb_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fmsb z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (msb_f32_z_tied1, svfloat32_t, ++ z0 = svmsb_f32_z (p0, z0, z1, z2), ++ z0 = svmsb_z (p0, z0, z1, z2)) ++ ++/* ++** msb_f32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** fmsb z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (msb_f32_z_tied2, svfloat32_t, ++ z0 = svmsb_f32_z (p0, z1, z0, z2), ++ z0 = svmsb_z (p0, z1, z0, z2)) ++ ++/* ++** msb_f32_z_tied3: ++** movprfx z0\.s, p0/z, z0\.s ++** fmls z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (msb_f32_z_tied3, svfloat32_t, ++ z0 = svmsb_f32_z (p0, z1, z2, z0), ++ z0 = svmsb_z (p0, z1, z2, z0)) ++ ++/* ++** msb_f32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fmsb z0\.s, p0/m, z2\.s, z3\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** fmsb z0\.s, p0/m, z1\.s, z3\.s ++** | ++** movprfx z0\.s, p0/z, z3\.s ++** fmls z0\.s, p0/m, z1\.s, z2\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (msb_f32_z_untied, svfloat32_t, ++ z0 = svmsb_f32_z (p0, z1, z2, z3), ++ z0 = svmsb_z (p0, z1, z2, z3)) ++ ++/* ++** msb_s4_f32_z_tied1: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0\.s, p0/z, z0\.s ++** fmsb z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (msb_s4_f32_z_tied1, svfloat32_t, float, ++ z0 = svmsb_n_f32_z (p0, z0, z1, d4), ++ z0 = svmsb_z (p0, z0, z1, d4)) ++ ++/* ++** msb_s4_f32_z_tied2: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0\.s, p0/z, z0\.s ++** fmsb z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (msb_s4_f32_z_tied2, svfloat32_t, float, ++ z0 = svmsb_n_f32_z (p0, z1, z0, d4), ++ z0 = svmsb_z (p0, z1, z0, d4)) ++ ++/* ++** msb_s4_f32_z_untied: ++** mov (z[0-9]+\.s), s4 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fmsb z0\.s, p0/m, z2\.s, \1 ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** fmsb z0\.s, p0/m, z1\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** fmls z0\.s, p0/m, z1\.s, z2\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (msb_s4_f32_z_untied, svfloat32_t, float, ++ z0 = svmsb_n_f32_z (p0, z1, z2, d4), ++ z0 = svmsb_z (p0, z1, z2, d4)) ++ ++/* ++** msb_2_f32_z_tied1: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** movprfx z0\.s, p0/z, z0\.s ++** fmsb z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_2_f32_z_tied1, svfloat32_t, ++ z0 = svmsb_n_f32_z (p0, z0, z1, 2), ++ z0 = svmsb_z (p0, z0, z1, 2)) ++ ++/* ++** msb_2_f32_z_tied2: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** movprfx z0\.s, p0/z, z0\.s ++** fmsb z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_2_f32_z_tied2, svfloat32_t, ++ z0 = svmsb_n_f32_z (p0, z1, z0, 2), ++ z0 = svmsb_z (p0, z1, z0, 2)) ++ ++/* ++** msb_2_f32_z_untied: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fmsb z0\.s, p0/m, z2\.s, \1 ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** fmsb z0\.s, p0/m, z1\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** fmls z0\.s, p0/m, z1\.s, z2\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (msb_2_f32_z_untied, svfloat32_t, ++ z0 = svmsb_n_f32_z (p0, z1, z2, 2), ++ z0 = svmsb_z (p0, z1, z2, 2)) ++ ++/* ++** msb_f32_x_tied1: ++** fmsb z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (msb_f32_x_tied1, svfloat32_t, ++ z0 = svmsb_f32_x (p0, z0, z1, z2), ++ z0 = svmsb_x (p0, z0, z1, z2)) ++ ++/* ++** msb_f32_x_tied2: ++** fmsb z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (msb_f32_x_tied2, svfloat32_t, ++ z0 = svmsb_f32_x (p0, z1, z0, z2), ++ z0 = svmsb_x (p0, z1, z0, z2)) ++ ++/* ++** msb_f32_x_tied3: ++** fmls z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (msb_f32_x_tied3, svfloat32_t, ++ z0 = svmsb_f32_x (p0, z1, z2, z0), ++ z0 = svmsb_x (p0, z1, z2, z0)) ++ ++/* ++** msb_f32_x_untied: ++** ( ++** movprfx z0, z1 ++** fmsb z0\.s, p0/m, z2\.s, z3\.s ++** | ++** movprfx z0, z2 ++** fmsb z0\.s, p0/m, z1\.s, z3\.s ++** | ++** movprfx z0, z3 ++** fmls z0\.s, p0/m, z1\.s, z2\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (msb_f32_x_untied, svfloat32_t, ++ z0 = svmsb_f32_x (p0, z1, z2, z3), ++ z0 = svmsb_x (p0, z1, z2, z3)) ++ ++/* ++** msb_s4_f32_x_tied1: ++** mov (z[0-9]+\.s), s4 ++** fmsb z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (msb_s4_f32_x_tied1, svfloat32_t, float, ++ z0 = svmsb_n_f32_x (p0, z0, z1, d4), ++ z0 = svmsb_x (p0, z0, z1, d4)) ++ ++/* ++** msb_s4_f32_x_tied2: ++** mov (z[0-9]+\.s), s4 ++** fmsb z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (msb_s4_f32_x_tied2, svfloat32_t, float, ++ z0 = svmsb_n_f32_x (p0, z1, z0, d4), ++ z0 = svmsb_x (p0, z1, z0, d4)) ++ ++/* ++** msb_s4_f32_x_untied: { xfail *-*-* } ++** mov z0\.s, s4 ++** fmls z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_ZD (msb_s4_f32_x_untied, svfloat32_t, float, ++ z0 = svmsb_n_f32_x (p0, z1, z2, d4), ++ z0 = svmsb_x (p0, z1, z2, d4)) ++ ++/* ++** msb_2_f32_x_tied1: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** fmsb z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_2_f32_x_tied1, svfloat32_t, ++ z0 = svmsb_n_f32_x (p0, z0, z1, 2), ++ z0 = svmsb_x (p0, z0, z1, 2)) ++ ++/* ++** msb_2_f32_x_tied2: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** fmsb z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_2_f32_x_tied2, svfloat32_t, ++ z0 = svmsb_n_f32_x (p0, z1, z0, 2), ++ z0 = svmsb_x (p0, z1, z0, 2)) ++ ++/* ++** msb_2_f32_x_untied: ++** fmov z0\.s, #2\.0(?:e\+0)? ++** fmls z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (msb_2_f32_x_untied, svfloat32_t, ++ z0 = svmsb_n_f32_x (p0, z1, z2, 2), ++ z0 = svmsb_x (p0, z1, z2, 2)) ++ ++/* ++** ptrue_msb_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_msb_f32_x_tied1, svfloat32_t, ++ z0 = svmsb_f32_x (svptrue_b32 (), z0, z1, z2), ++ z0 = svmsb_x (svptrue_b32 (), z0, z1, z2)) ++ ++/* ++** ptrue_msb_f32_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_msb_f32_x_tied2, svfloat32_t, ++ z0 = svmsb_f32_x (svptrue_b32 (), z1, z0, z2), ++ z0 = svmsb_x (svptrue_b32 (), z1, z0, z2)) ++ ++/* ++** ptrue_msb_f32_x_tied3: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_msb_f32_x_tied3, svfloat32_t, ++ z0 = svmsb_f32_x (svptrue_b32 (), z1, z2, z0), ++ z0 = svmsb_x (svptrue_b32 (), z1, z2, z0)) ++ ++/* ++** ptrue_msb_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_msb_f32_x_untied, svfloat32_t, ++ z0 = svmsb_f32_x (svptrue_b32 (), z1, z2, z3), ++ z0 = svmsb_x (svptrue_b32 (), z1, z2, z3)) ++ ++/* ++** ptrue_msb_2_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_msb_2_f32_x_tied1, svfloat32_t, ++ z0 = svmsb_n_f32_x (svptrue_b32 (), z0, z1, 2), ++ z0 = svmsb_x (svptrue_b32 (), z0, z1, 2)) ++ ++/* ++** ptrue_msb_2_f32_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_msb_2_f32_x_tied2, svfloat32_t, ++ z0 = svmsb_n_f32_x (svptrue_b32 (), z1, z0, 2), ++ z0 = svmsb_x (svptrue_b32 (), z1, z0, 2)) ++ ++/* ++** ptrue_msb_2_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_msb_2_f32_x_untied, svfloat32_t, ++ z0 = svmsb_n_f32_x (svptrue_b32 (), z1, z2, 2), ++ z0 = svmsb_x (svptrue_b32 (), z1, z2, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_f64.c +new file mode 100644 +index 000000000..e3ff414d8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_f64.c +@@ -0,0 +1,398 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** msb_f64_m_tied1: ++** fmsb z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (msb_f64_m_tied1, svfloat64_t, ++ z0 = svmsb_f64_m (p0, z0, z1, z2), ++ z0 = svmsb_m (p0, z0, z1, z2)) ++ ++/* ++** msb_f64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fmsb z0\.d, p0/m, \1, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (msb_f64_m_tied2, svfloat64_t, ++ z0 = svmsb_f64_m (p0, z1, z0, z2), ++ z0 = svmsb_m (p0, z1, z0, z2)) ++ ++/* ++** msb_f64_m_tied3: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fmsb z0\.d, p0/m, z2\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_f64_m_tied3, svfloat64_t, ++ z0 = svmsb_f64_m (p0, z1, z2, z0), ++ z0 = svmsb_m (p0, z1, z2, z0)) ++ ++/* ++** msb_f64_m_untied: ++** movprfx z0, z1 ++** fmsb z0\.d, p0/m, z2\.d, z3\.d ++** ret ++*/ ++TEST_UNIFORM_Z (msb_f64_m_untied, svfloat64_t, ++ z0 = svmsb_f64_m (p0, z1, z2, z3), ++ z0 = svmsb_m (p0, z1, z2, z3)) ++ ++/* ++** msb_d4_f64_m_tied1: ++** mov (z[0-9]+\.d), d4 ++** fmsb z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (msb_d4_f64_m_tied1, svfloat64_t, double, ++ z0 = svmsb_n_f64_m (p0, z0, z1, d4), ++ z0 = svmsb_m (p0, z0, z1, d4)) ++ ++/* ++** msb_d4_f64_m_untied: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0, z1 ++** fmsb z0\.d, p0/m, z2\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (msb_d4_f64_m_untied, svfloat64_t, double, ++ z0 = svmsb_n_f64_m (p0, z1, z2, d4), ++ z0 = svmsb_m (p0, z1, z2, d4)) ++ ++/* ++** msb_2_f64_m_tied1: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** fmsb z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_2_f64_m_tied1, svfloat64_t, ++ z0 = svmsb_n_f64_m (p0, z0, z1, 2), ++ z0 = svmsb_m (p0, z0, z1, 2)) ++ ++/* ++** msb_2_f64_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** movprfx z0, z1 ++** fmsb z0\.d, p0/m, z2\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_2_f64_m_untied, svfloat64_t, ++ z0 = svmsb_n_f64_m (p0, z1, z2, 2), ++ z0 = svmsb_m (p0, z1, z2, 2)) ++ ++/* ++** msb_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fmsb z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (msb_f64_z_tied1, svfloat64_t, ++ z0 = svmsb_f64_z (p0, z0, z1, z2), ++ z0 = svmsb_z (p0, z0, z1, z2)) ++ ++/* ++** msb_f64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** fmsb z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (msb_f64_z_tied2, svfloat64_t, ++ z0 = svmsb_f64_z (p0, z1, z0, z2), ++ z0 = svmsb_z (p0, z1, z0, z2)) ++ ++/* ++** msb_f64_z_tied3: ++** movprfx z0\.d, p0/z, z0\.d ++** fmls z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (msb_f64_z_tied3, svfloat64_t, ++ z0 = svmsb_f64_z (p0, z1, z2, z0), ++ z0 = svmsb_z (p0, z1, z2, z0)) ++ ++/* ++** msb_f64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fmsb z0\.d, p0/m, z2\.d, z3\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** fmsb z0\.d, p0/m, z1\.d, z3\.d ++** | ++** movprfx z0\.d, p0/z, z3\.d ++** fmls z0\.d, p0/m, z1\.d, z2\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (msb_f64_z_untied, svfloat64_t, ++ z0 = svmsb_f64_z (p0, z1, z2, z3), ++ z0 = svmsb_z (p0, z1, z2, z3)) ++ ++/* ++** msb_d4_f64_z_tied1: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0\.d, p0/z, z0\.d ++** fmsb z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (msb_d4_f64_z_tied1, svfloat64_t, double, ++ z0 = svmsb_n_f64_z (p0, z0, z1, d4), ++ z0 = svmsb_z (p0, z0, z1, d4)) ++ ++/* ++** msb_d4_f64_z_tied2: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0\.d, p0/z, z0\.d ++** fmsb z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (msb_d4_f64_z_tied2, svfloat64_t, double, ++ z0 = svmsb_n_f64_z (p0, z1, z0, d4), ++ z0 = svmsb_z (p0, z1, z0, d4)) ++ ++/* ++** msb_d4_f64_z_untied: ++** mov (z[0-9]+\.d), d4 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fmsb z0\.d, p0/m, z2\.d, \1 ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** fmsb z0\.d, p0/m, z1\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** fmls z0\.d, p0/m, z1\.d, z2\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (msb_d4_f64_z_untied, svfloat64_t, double, ++ z0 = svmsb_n_f64_z (p0, z1, z2, d4), ++ z0 = svmsb_z (p0, z1, z2, d4)) ++ ++/* ++** msb_2_f64_z_tied1: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** movprfx z0\.d, p0/z, z0\.d ++** fmsb z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_2_f64_z_tied1, svfloat64_t, ++ z0 = svmsb_n_f64_z (p0, z0, z1, 2), ++ z0 = svmsb_z (p0, z0, z1, 2)) ++ ++/* ++** msb_2_f64_z_tied2: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** movprfx z0\.d, p0/z, z0\.d ++** fmsb z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_2_f64_z_tied2, svfloat64_t, ++ z0 = svmsb_n_f64_z (p0, z1, z0, 2), ++ z0 = svmsb_z (p0, z1, z0, 2)) ++ ++/* ++** msb_2_f64_z_untied: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fmsb z0\.d, p0/m, z2\.d, \1 ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** fmsb z0\.d, p0/m, z1\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** fmls z0\.d, p0/m, z1\.d, z2\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (msb_2_f64_z_untied, svfloat64_t, ++ z0 = svmsb_n_f64_z (p0, z1, z2, 2), ++ z0 = svmsb_z (p0, z1, z2, 2)) ++ ++/* ++** msb_f64_x_tied1: ++** fmsb z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (msb_f64_x_tied1, svfloat64_t, ++ z0 = svmsb_f64_x (p0, z0, z1, z2), ++ z0 = svmsb_x (p0, z0, z1, z2)) ++ ++/* ++** msb_f64_x_tied2: ++** fmsb z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (msb_f64_x_tied2, svfloat64_t, ++ z0 = svmsb_f64_x (p0, z1, z0, z2), ++ z0 = svmsb_x (p0, z1, z0, z2)) ++ ++/* ++** msb_f64_x_tied3: ++** fmls z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (msb_f64_x_tied3, svfloat64_t, ++ z0 = svmsb_f64_x (p0, z1, z2, z0), ++ z0 = svmsb_x (p0, z1, z2, z0)) ++ ++/* ++** msb_f64_x_untied: ++** ( ++** movprfx z0, z1 ++** fmsb z0\.d, p0/m, z2\.d, z3\.d ++** | ++** movprfx z0, z2 ++** fmsb z0\.d, p0/m, z1\.d, z3\.d ++** | ++** movprfx z0, z3 ++** fmls z0\.d, p0/m, z1\.d, z2\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (msb_f64_x_untied, svfloat64_t, ++ z0 = svmsb_f64_x (p0, z1, z2, z3), ++ z0 = svmsb_x (p0, z1, z2, z3)) ++ ++/* ++** msb_d4_f64_x_tied1: ++** mov (z[0-9]+\.d), d4 ++** fmsb z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (msb_d4_f64_x_tied1, svfloat64_t, double, ++ z0 = svmsb_n_f64_x (p0, z0, z1, d4), ++ z0 = svmsb_x (p0, z0, z1, d4)) ++ ++/* ++** msb_d4_f64_x_tied2: ++** mov (z[0-9]+\.d), d4 ++** fmsb z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (msb_d4_f64_x_tied2, svfloat64_t, double, ++ z0 = svmsb_n_f64_x (p0, z1, z0, d4), ++ z0 = svmsb_x (p0, z1, z0, d4)) ++ ++/* ++** msb_d4_f64_x_untied: { xfail *-*-* } ++** mov z0\.d, d4 ++** fmls z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_ZD (msb_d4_f64_x_untied, svfloat64_t, double, ++ z0 = svmsb_n_f64_x (p0, z1, z2, d4), ++ z0 = svmsb_x (p0, z1, z2, d4)) ++ ++/* ++** msb_2_f64_x_tied1: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** fmsb z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_2_f64_x_tied1, svfloat64_t, ++ z0 = svmsb_n_f64_x (p0, z0, z1, 2), ++ z0 = svmsb_x (p0, z0, z1, 2)) ++ ++/* ++** msb_2_f64_x_tied2: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** fmsb z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_2_f64_x_tied2, svfloat64_t, ++ z0 = svmsb_n_f64_x (p0, z1, z0, 2), ++ z0 = svmsb_x (p0, z1, z0, 2)) ++ ++/* ++** msb_2_f64_x_untied: ++** fmov z0\.d, #2\.0(?:e\+0)? ++** fmls z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (msb_2_f64_x_untied, svfloat64_t, ++ z0 = svmsb_n_f64_x (p0, z1, z2, 2), ++ z0 = svmsb_x (p0, z1, z2, 2)) ++ ++/* ++** ptrue_msb_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_msb_f64_x_tied1, svfloat64_t, ++ z0 = svmsb_f64_x (svptrue_b64 (), z0, z1, z2), ++ z0 = svmsb_x (svptrue_b64 (), z0, z1, z2)) ++ ++/* ++** ptrue_msb_f64_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_msb_f64_x_tied2, svfloat64_t, ++ z0 = svmsb_f64_x (svptrue_b64 (), z1, z0, z2), ++ z0 = svmsb_x (svptrue_b64 (), z1, z0, z2)) ++ ++/* ++** ptrue_msb_f64_x_tied3: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_msb_f64_x_tied3, svfloat64_t, ++ z0 = svmsb_f64_x (svptrue_b64 (), z1, z2, z0), ++ z0 = svmsb_x (svptrue_b64 (), z1, z2, z0)) ++ ++/* ++** ptrue_msb_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_msb_f64_x_untied, svfloat64_t, ++ z0 = svmsb_f64_x (svptrue_b64 (), z1, z2, z3), ++ z0 = svmsb_x (svptrue_b64 (), z1, z2, z3)) ++ ++/* ++** ptrue_msb_2_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_msb_2_f64_x_tied1, svfloat64_t, ++ z0 = svmsb_n_f64_x (svptrue_b64 (), z0, z1, 2), ++ z0 = svmsb_x (svptrue_b64 (), z0, z1, 2)) ++ ++/* ++** ptrue_msb_2_f64_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_msb_2_f64_x_tied2, svfloat64_t, ++ z0 = svmsb_n_f64_x (svptrue_b64 (), z1, z0, 2), ++ z0 = svmsb_x (svptrue_b64 (), z1, z0, 2)) ++ ++/* ++** ptrue_msb_2_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_msb_2_f64_x_untied, svfloat64_t, ++ z0 = svmsb_n_f64_x (svptrue_b64 (), z1, z2, 2), ++ z0 = svmsb_x (svptrue_b64 (), z1, z2, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_s16.c +new file mode 100644 +index 000000000..56347cfb9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_s16.c +@@ -0,0 +1,321 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** msb_s16_m_tied1: ++** msb z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (msb_s16_m_tied1, svint16_t, ++ z0 = svmsb_s16_m (p0, z0, z1, z2), ++ z0 = svmsb_m (p0, z0, z1, z2)) ++ ++/* ++** msb_s16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** msb z0\.h, p0/m, \1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (msb_s16_m_tied2, svint16_t, ++ z0 = svmsb_s16_m (p0, z1, z0, z2), ++ z0 = svmsb_m (p0, z1, z0, z2)) ++ ++/* ++** msb_s16_m_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** msb z0\.h, p0/m, z2\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (msb_s16_m_tied3, svint16_t, ++ z0 = svmsb_s16_m (p0, z1, z2, z0), ++ z0 = svmsb_m (p0, z1, z2, z0)) ++ ++/* ++** msb_s16_m_untied: ++** movprfx z0, z1 ++** msb z0\.h, p0/m, z2\.h, z3\.h ++** ret ++*/ ++TEST_UNIFORM_Z (msb_s16_m_untied, svint16_t, ++ z0 = svmsb_s16_m (p0, z1, z2, z3), ++ z0 = svmsb_m (p0, z1, z2, z3)) ++ ++/* ++** msb_w0_s16_m_tied1: ++** mov (z[0-9]+\.h), w0 ++** msb z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_w0_s16_m_tied1, svint16_t, int16_t, ++ z0 = svmsb_n_s16_m (p0, z0, z1, x0), ++ z0 = svmsb_m (p0, z0, z1, x0)) ++ ++/* ++** msb_w0_s16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), w0 ++** movprfx z0, z1 ++** msb z0\.h, p0/m, z2\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_w0_s16_m_untied, svint16_t, int16_t, ++ z0 = svmsb_n_s16_m (p0, z1, z2, x0), ++ z0 = svmsb_m (p0, z1, z2, x0)) ++ ++/* ++** msb_11_s16_m_tied1: ++** mov (z[0-9]+\.h), #11 ++** msb z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_s16_m_tied1, svint16_t, ++ z0 = svmsb_n_s16_m (p0, z0, z1, 11), ++ z0 = svmsb_m (p0, z0, z1, 11)) ++ ++/* ++** msb_11_s16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), #11 ++** movprfx z0, z1 ++** msb z0\.h, p0/m, z2\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_s16_m_untied, svint16_t, ++ z0 = svmsb_n_s16_m (p0, z1, z2, 11), ++ z0 = svmsb_m (p0, z1, z2, 11)) ++ ++/* ++** msb_s16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** msb z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (msb_s16_z_tied1, svint16_t, ++ z0 = svmsb_s16_z (p0, z0, z1, z2), ++ z0 = svmsb_z (p0, z0, z1, z2)) ++ ++/* ++** msb_s16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** msb z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (msb_s16_z_tied2, svint16_t, ++ z0 = svmsb_s16_z (p0, z1, z0, z2), ++ z0 = svmsb_z (p0, z1, z0, z2)) ++ ++/* ++** msb_s16_z_tied3: ++** movprfx z0\.h, p0/z, z0\.h ++** mls z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (msb_s16_z_tied3, svint16_t, ++ z0 = svmsb_s16_z (p0, z1, z2, z0), ++ z0 = svmsb_z (p0, z1, z2, z0)) ++ ++/* ++** msb_s16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** msb z0\.h, p0/m, z2\.h, z3\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** msb z0\.h, p0/m, z1\.h, z3\.h ++** | ++** movprfx z0\.h, p0/z, z3\.h ++** mls z0\.h, p0/m, z1\.h, z2\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (msb_s16_z_untied, svint16_t, ++ z0 = svmsb_s16_z (p0, z1, z2, z3), ++ z0 = svmsb_z (p0, z1, z2, z3)) ++ ++/* ++** msb_w0_s16_z_tied1: ++** mov (z[0-9]+\.h), w0 ++** movprfx z0\.h, p0/z, z0\.h ++** msb z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_w0_s16_z_tied1, svint16_t, int16_t, ++ z0 = svmsb_n_s16_z (p0, z0, z1, x0), ++ z0 = svmsb_z (p0, z0, z1, x0)) ++ ++/* ++** msb_w0_s16_z_tied2: ++** mov (z[0-9]+\.h), w0 ++** movprfx z0\.h, p0/z, z0\.h ++** msb z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_w0_s16_z_tied2, svint16_t, int16_t, ++ z0 = svmsb_n_s16_z (p0, z1, z0, x0), ++ z0 = svmsb_z (p0, z1, z0, x0)) ++ ++/* ++** msb_w0_s16_z_untied: ++** mov (z[0-9]+\.h), w0 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** msb z0\.h, p0/m, z2\.h, \1 ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** msb z0\.h, p0/m, z1\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** mls z0\.h, p0/m, z1\.h, z2\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_w0_s16_z_untied, svint16_t, int16_t, ++ z0 = svmsb_n_s16_z (p0, z1, z2, x0), ++ z0 = svmsb_z (p0, z1, z2, x0)) ++ ++/* ++** msb_11_s16_z_tied1: ++** mov (z[0-9]+\.h), #11 ++** movprfx z0\.h, p0/z, z0\.h ++** msb z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_s16_z_tied1, svint16_t, ++ z0 = svmsb_n_s16_z (p0, z0, z1, 11), ++ z0 = svmsb_z (p0, z0, z1, 11)) ++ ++/* ++** msb_11_s16_z_tied2: ++** mov (z[0-9]+\.h), #11 ++** movprfx z0\.h, p0/z, z0\.h ++** msb z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_s16_z_tied2, svint16_t, ++ z0 = svmsb_n_s16_z (p0, z1, z0, 11), ++ z0 = svmsb_z (p0, z1, z0, 11)) ++ ++/* ++** msb_11_s16_z_untied: ++** mov (z[0-9]+\.h), #11 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** msb z0\.h, p0/m, z2\.h, \1 ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** msb z0\.h, p0/m, z1\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** mls z0\.h, p0/m, z1\.h, z2\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_s16_z_untied, svint16_t, ++ z0 = svmsb_n_s16_z (p0, z1, z2, 11), ++ z0 = svmsb_z (p0, z1, z2, 11)) ++ ++/* ++** msb_s16_x_tied1: ++** msb z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (msb_s16_x_tied1, svint16_t, ++ z0 = svmsb_s16_x (p0, z0, z1, z2), ++ z0 = svmsb_x (p0, z0, z1, z2)) ++ ++/* ++** msb_s16_x_tied2: ++** msb z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (msb_s16_x_tied2, svint16_t, ++ z0 = svmsb_s16_x (p0, z1, z0, z2), ++ z0 = svmsb_x (p0, z1, z0, z2)) ++ ++/* ++** msb_s16_x_tied3: ++** mls z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (msb_s16_x_tied3, svint16_t, ++ z0 = svmsb_s16_x (p0, z1, z2, z0), ++ z0 = svmsb_x (p0, z1, z2, z0)) ++ ++/* ++** msb_s16_x_untied: ++** ( ++** movprfx z0, z1 ++** msb z0\.h, p0/m, z2\.h, z3\.h ++** | ++** movprfx z0, z2 ++** msb z0\.h, p0/m, z1\.h, z3\.h ++** | ++** movprfx z0, z3 ++** mls z0\.h, p0/m, z1\.h, z2\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (msb_s16_x_untied, svint16_t, ++ z0 = svmsb_s16_x (p0, z1, z2, z3), ++ z0 = svmsb_x (p0, z1, z2, z3)) ++ ++/* ++** msb_w0_s16_x_tied1: ++** mov (z[0-9]+\.h), w0 ++** msb z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_w0_s16_x_tied1, svint16_t, int16_t, ++ z0 = svmsb_n_s16_x (p0, z0, z1, x0), ++ z0 = svmsb_x (p0, z0, z1, x0)) ++ ++/* ++** msb_w0_s16_x_tied2: ++** mov (z[0-9]+\.h), w0 ++** msb z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_w0_s16_x_tied2, svint16_t, int16_t, ++ z0 = svmsb_n_s16_x (p0, z1, z0, x0), ++ z0 = svmsb_x (p0, z1, z0, x0)) ++ ++/* ++** msb_w0_s16_x_untied: ++** mov z0\.h, w0 ++** mls z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_w0_s16_x_untied, svint16_t, int16_t, ++ z0 = svmsb_n_s16_x (p0, z1, z2, x0), ++ z0 = svmsb_x (p0, z1, z2, x0)) ++ ++/* ++** msb_11_s16_x_tied1: ++** mov (z[0-9]+\.h), #11 ++** msb z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_s16_x_tied1, svint16_t, ++ z0 = svmsb_n_s16_x (p0, z0, z1, 11), ++ z0 = svmsb_x (p0, z0, z1, 11)) ++ ++/* ++** msb_11_s16_x_tied2: ++** mov (z[0-9]+\.h), #11 ++** msb z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_s16_x_tied2, svint16_t, ++ z0 = svmsb_n_s16_x (p0, z1, z0, 11), ++ z0 = svmsb_x (p0, z1, z0, 11)) ++ ++/* ++** msb_11_s16_x_untied: ++** mov z0\.h, #11 ++** mls z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_s16_x_untied, svint16_t, ++ z0 = svmsb_n_s16_x (p0, z1, z2, 11), ++ z0 = svmsb_x (p0, z1, z2, 11)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_s32.c +new file mode 100644 +index 000000000..fb7a7815b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_s32.c +@@ -0,0 +1,321 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** msb_s32_m_tied1: ++** msb z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (msb_s32_m_tied1, svint32_t, ++ z0 = svmsb_s32_m (p0, z0, z1, z2), ++ z0 = svmsb_m (p0, z0, z1, z2)) ++ ++/* ++** msb_s32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** msb z0\.s, p0/m, \1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (msb_s32_m_tied2, svint32_t, ++ z0 = svmsb_s32_m (p0, z1, z0, z2), ++ z0 = svmsb_m (p0, z1, z0, z2)) ++ ++/* ++** msb_s32_m_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** msb z0\.s, p0/m, z2\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (msb_s32_m_tied3, svint32_t, ++ z0 = svmsb_s32_m (p0, z1, z2, z0), ++ z0 = svmsb_m (p0, z1, z2, z0)) ++ ++/* ++** msb_s32_m_untied: ++** movprfx z0, z1 ++** msb z0\.s, p0/m, z2\.s, z3\.s ++** ret ++*/ ++TEST_UNIFORM_Z (msb_s32_m_untied, svint32_t, ++ z0 = svmsb_s32_m (p0, z1, z2, z3), ++ z0 = svmsb_m (p0, z1, z2, z3)) ++ ++/* ++** msb_w0_s32_m_tied1: ++** mov (z[0-9]+\.s), w0 ++** msb z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_w0_s32_m_tied1, svint32_t, int32_t, ++ z0 = svmsb_n_s32_m (p0, z0, z1, x0), ++ z0 = svmsb_m (p0, z0, z1, x0)) ++ ++/* ++** msb_w0_s32_m_untied: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0, z1 ++** msb z0\.s, p0/m, z2\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_w0_s32_m_untied, svint32_t, int32_t, ++ z0 = svmsb_n_s32_m (p0, z1, z2, x0), ++ z0 = svmsb_m (p0, z1, z2, x0)) ++ ++/* ++** msb_11_s32_m_tied1: ++** mov (z[0-9]+\.s), #11 ++** msb z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_s32_m_tied1, svint32_t, ++ z0 = svmsb_n_s32_m (p0, z0, z1, 11), ++ z0 = svmsb_m (p0, z0, z1, 11)) ++ ++/* ++** msb_11_s32_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.s), #11 ++** movprfx z0, z1 ++** msb z0\.s, p0/m, z2\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_s32_m_untied, svint32_t, ++ z0 = svmsb_n_s32_m (p0, z1, z2, 11), ++ z0 = svmsb_m (p0, z1, z2, 11)) ++ ++/* ++** msb_s32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** msb z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (msb_s32_z_tied1, svint32_t, ++ z0 = svmsb_s32_z (p0, z0, z1, z2), ++ z0 = svmsb_z (p0, z0, z1, z2)) ++ ++/* ++** msb_s32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** msb z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (msb_s32_z_tied2, svint32_t, ++ z0 = svmsb_s32_z (p0, z1, z0, z2), ++ z0 = svmsb_z (p0, z1, z0, z2)) ++ ++/* ++** msb_s32_z_tied3: ++** movprfx z0\.s, p0/z, z0\.s ++** mls z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (msb_s32_z_tied3, svint32_t, ++ z0 = svmsb_s32_z (p0, z1, z2, z0), ++ z0 = svmsb_z (p0, z1, z2, z0)) ++ ++/* ++** msb_s32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** msb z0\.s, p0/m, z2\.s, z3\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** msb z0\.s, p0/m, z1\.s, z3\.s ++** | ++** movprfx z0\.s, p0/z, z3\.s ++** mls z0\.s, p0/m, z1\.s, z2\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (msb_s32_z_untied, svint32_t, ++ z0 = svmsb_s32_z (p0, z1, z2, z3), ++ z0 = svmsb_z (p0, z1, z2, z3)) ++ ++/* ++** msb_w0_s32_z_tied1: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0\.s, p0/z, z0\.s ++** msb z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_w0_s32_z_tied1, svint32_t, int32_t, ++ z0 = svmsb_n_s32_z (p0, z0, z1, x0), ++ z0 = svmsb_z (p0, z0, z1, x0)) ++ ++/* ++** msb_w0_s32_z_tied2: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0\.s, p0/z, z0\.s ++** msb z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_w0_s32_z_tied2, svint32_t, int32_t, ++ z0 = svmsb_n_s32_z (p0, z1, z0, x0), ++ z0 = svmsb_z (p0, z1, z0, x0)) ++ ++/* ++** msb_w0_s32_z_untied: ++** mov (z[0-9]+\.s), w0 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** msb z0\.s, p0/m, z2\.s, \1 ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** msb z0\.s, p0/m, z1\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** mls z0\.s, p0/m, z1\.s, z2\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_w0_s32_z_untied, svint32_t, int32_t, ++ z0 = svmsb_n_s32_z (p0, z1, z2, x0), ++ z0 = svmsb_z (p0, z1, z2, x0)) ++ ++/* ++** msb_11_s32_z_tied1: ++** mov (z[0-9]+\.s), #11 ++** movprfx z0\.s, p0/z, z0\.s ++** msb z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_s32_z_tied1, svint32_t, ++ z0 = svmsb_n_s32_z (p0, z0, z1, 11), ++ z0 = svmsb_z (p0, z0, z1, 11)) ++ ++/* ++** msb_11_s32_z_tied2: ++** mov (z[0-9]+\.s), #11 ++** movprfx z0\.s, p0/z, z0\.s ++** msb z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_s32_z_tied2, svint32_t, ++ z0 = svmsb_n_s32_z (p0, z1, z0, 11), ++ z0 = svmsb_z (p0, z1, z0, 11)) ++ ++/* ++** msb_11_s32_z_untied: ++** mov (z[0-9]+\.s), #11 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** msb z0\.s, p0/m, z2\.s, \1 ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** msb z0\.s, p0/m, z1\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** mls z0\.s, p0/m, z1\.s, z2\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_s32_z_untied, svint32_t, ++ z0 = svmsb_n_s32_z (p0, z1, z2, 11), ++ z0 = svmsb_z (p0, z1, z2, 11)) ++ ++/* ++** msb_s32_x_tied1: ++** msb z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (msb_s32_x_tied1, svint32_t, ++ z0 = svmsb_s32_x (p0, z0, z1, z2), ++ z0 = svmsb_x (p0, z0, z1, z2)) ++ ++/* ++** msb_s32_x_tied2: ++** msb z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (msb_s32_x_tied2, svint32_t, ++ z0 = svmsb_s32_x (p0, z1, z0, z2), ++ z0 = svmsb_x (p0, z1, z0, z2)) ++ ++/* ++** msb_s32_x_tied3: ++** mls z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (msb_s32_x_tied3, svint32_t, ++ z0 = svmsb_s32_x (p0, z1, z2, z0), ++ z0 = svmsb_x (p0, z1, z2, z0)) ++ ++/* ++** msb_s32_x_untied: ++** ( ++** movprfx z0, z1 ++** msb z0\.s, p0/m, z2\.s, z3\.s ++** | ++** movprfx z0, z2 ++** msb z0\.s, p0/m, z1\.s, z3\.s ++** | ++** movprfx z0, z3 ++** mls z0\.s, p0/m, z1\.s, z2\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (msb_s32_x_untied, svint32_t, ++ z0 = svmsb_s32_x (p0, z1, z2, z3), ++ z0 = svmsb_x (p0, z1, z2, z3)) ++ ++/* ++** msb_w0_s32_x_tied1: ++** mov (z[0-9]+\.s), w0 ++** msb z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_w0_s32_x_tied1, svint32_t, int32_t, ++ z0 = svmsb_n_s32_x (p0, z0, z1, x0), ++ z0 = svmsb_x (p0, z0, z1, x0)) ++ ++/* ++** msb_w0_s32_x_tied2: ++** mov (z[0-9]+\.s), w0 ++** msb z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_w0_s32_x_tied2, svint32_t, int32_t, ++ z0 = svmsb_n_s32_x (p0, z1, z0, x0), ++ z0 = svmsb_x (p0, z1, z0, x0)) ++ ++/* ++** msb_w0_s32_x_untied: ++** mov z0\.s, w0 ++** mls z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_w0_s32_x_untied, svint32_t, int32_t, ++ z0 = svmsb_n_s32_x (p0, z1, z2, x0), ++ z0 = svmsb_x (p0, z1, z2, x0)) ++ ++/* ++** msb_11_s32_x_tied1: ++** mov (z[0-9]+\.s), #11 ++** msb z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_s32_x_tied1, svint32_t, ++ z0 = svmsb_n_s32_x (p0, z0, z1, 11), ++ z0 = svmsb_x (p0, z0, z1, 11)) ++ ++/* ++** msb_11_s32_x_tied2: ++** mov (z[0-9]+\.s), #11 ++** msb z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_s32_x_tied2, svint32_t, ++ z0 = svmsb_n_s32_x (p0, z1, z0, 11), ++ z0 = svmsb_x (p0, z1, z0, 11)) ++ ++/* ++** msb_11_s32_x_untied: ++** mov z0\.s, #11 ++** mls z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_s32_x_untied, svint32_t, ++ z0 = svmsb_n_s32_x (p0, z1, z2, 11), ++ z0 = svmsb_x (p0, z1, z2, 11)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_s64.c +new file mode 100644 +index 000000000..6829fab36 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_s64.c +@@ -0,0 +1,321 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** msb_s64_m_tied1: ++** msb z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (msb_s64_m_tied1, svint64_t, ++ z0 = svmsb_s64_m (p0, z0, z1, z2), ++ z0 = svmsb_m (p0, z0, z1, z2)) ++ ++/* ++** msb_s64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** msb z0\.d, p0/m, \1, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (msb_s64_m_tied2, svint64_t, ++ z0 = svmsb_s64_m (p0, z1, z0, z2), ++ z0 = svmsb_m (p0, z1, z0, z2)) ++ ++/* ++** msb_s64_m_tied3: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** msb z0\.d, p0/m, z2\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_s64_m_tied3, svint64_t, ++ z0 = svmsb_s64_m (p0, z1, z2, z0), ++ z0 = svmsb_m (p0, z1, z2, z0)) ++ ++/* ++** msb_s64_m_untied: ++** movprfx z0, z1 ++** msb z0\.d, p0/m, z2\.d, z3\.d ++** ret ++*/ ++TEST_UNIFORM_Z (msb_s64_m_untied, svint64_t, ++ z0 = svmsb_s64_m (p0, z1, z2, z3), ++ z0 = svmsb_m (p0, z1, z2, z3)) ++ ++/* ++** msb_x0_s64_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** msb z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_x0_s64_m_tied1, svint64_t, int64_t, ++ z0 = svmsb_n_s64_m (p0, z0, z1, x0), ++ z0 = svmsb_m (p0, z0, z1, x0)) ++ ++/* ++** msb_x0_s64_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** msb z0\.d, p0/m, z2\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_x0_s64_m_untied, svint64_t, int64_t, ++ z0 = svmsb_n_s64_m (p0, z1, z2, x0), ++ z0 = svmsb_m (p0, z1, z2, x0)) ++ ++/* ++** msb_11_s64_m_tied1: ++** mov (z[0-9]+\.d), #11 ++** msb z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_s64_m_tied1, svint64_t, ++ z0 = svmsb_n_s64_m (p0, z0, z1, 11), ++ z0 = svmsb_m (p0, z0, z1, 11)) ++ ++/* ++** msb_11_s64_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), #11 ++** movprfx z0, z1 ++** msb z0\.d, p0/m, z2\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_s64_m_untied, svint64_t, ++ z0 = svmsb_n_s64_m (p0, z1, z2, 11), ++ z0 = svmsb_m (p0, z1, z2, 11)) ++ ++/* ++** msb_s64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** msb z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (msb_s64_z_tied1, svint64_t, ++ z0 = svmsb_s64_z (p0, z0, z1, z2), ++ z0 = svmsb_z (p0, z0, z1, z2)) ++ ++/* ++** msb_s64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** msb z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (msb_s64_z_tied2, svint64_t, ++ z0 = svmsb_s64_z (p0, z1, z0, z2), ++ z0 = svmsb_z (p0, z1, z0, z2)) ++ ++/* ++** msb_s64_z_tied3: ++** movprfx z0\.d, p0/z, z0\.d ++** mls z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (msb_s64_z_tied3, svint64_t, ++ z0 = svmsb_s64_z (p0, z1, z2, z0), ++ z0 = svmsb_z (p0, z1, z2, z0)) ++ ++/* ++** msb_s64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** msb z0\.d, p0/m, z2\.d, z3\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** msb z0\.d, p0/m, z1\.d, z3\.d ++** | ++** movprfx z0\.d, p0/z, z3\.d ++** mls z0\.d, p0/m, z1\.d, z2\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (msb_s64_z_untied, svint64_t, ++ z0 = svmsb_s64_z (p0, z1, z2, z3), ++ z0 = svmsb_z (p0, z1, z2, z3)) ++ ++/* ++** msb_x0_s64_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.d, p0/z, z0\.d ++** msb z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_x0_s64_z_tied1, svint64_t, int64_t, ++ z0 = svmsb_n_s64_z (p0, z0, z1, x0), ++ z0 = svmsb_z (p0, z0, z1, x0)) ++ ++/* ++** msb_x0_s64_z_tied2: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.d, p0/z, z0\.d ++** msb z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_x0_s64_z_tied2, svint64_t, int64_t, ++ z0 = svmsb_n_s64_z (p0, z1, z0, x0), ++ z0 = svmsb_z (p0, z1, z0, x0)) ++ ++/* ++** msb_x0_s64_z_untied: ++** mov (z[0-9]+\.d), x0 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** msb z0\.d, p0/m, z2\.d, \1 ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** msb z0\.d, p0/m, z1\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** mls z0\.d, p0/m, z1\.d, z2\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_x0_s64_z_untied, svint64_t, int64_t, ++ z0 = svmsb_n_s64_z (p0, z1, z2, x0), ++ z0 = svmsb_z (p0, z1, z2, x0)) ++ ++/* ++** msb_11_s64_z_tied1: ++** mov (z[0-9]+\.d), #11 ++** movprfx z0\.d, p0/z, z0\.d ++** msb z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_s64_z_tied1, svint64_t, ++ z0 = svmsb_n_s64_z (p0, z0, z1, 11), ++ z0 = svmsb_z (p0, z0, z1, 11)) ++ ++/* ++** msb_11_s64_z_tied2: ++** mov (z[0-9]+\.d), #11 ++** movprfx z0\.d, p0/z, z0\.d ++** msb z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_s64_z_tied2, svint64_t, ++ z0 = svmsb_n_s64_z (p0, z1, z0, 11), ++ z0 = svmsb_z (p0, z1, z0, 11)) ++ ++/* ++** msb_11_s64_z_untied: ++** mov (z[0-9]+\.d), #11 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** msb z0\.d, p0/m, z2\.d, \1 ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** msb z0\.d, p0/m, z1\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** mls z0\.d, p0/m, z1\.d, z2\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_s64_z_untied, svint64_t, ++ z0 = svmsb_n_s64_z (p0, z1, z2, 11), ++ z0 = svmsb_z (p0, z1, z2, 11)) ++ ++/* ++** msb_s64_x_tied1: ++** msb z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (msb_s64_x_tied1, svint64_t, ++ z0 = svmsb_s64_x (p0, z0, z1, z2), ++ z0 = svmsb_x (p0, z0, z1, z2)) ++ ++/* ++** msb_s64_x_tied2: ++** msb z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (msb_s64_x_tied2, svint64_t, ++ z0 = svmsb_s64_x (p0, z1, z0, z2), ++ z0 = svmsb_x (p0, z1, z0, z2)) ++ ++/* ++** msb_s64_x_tied3: ++** mls z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (msb_s64_x_tied3, svint64_t, ++ z0 = svmsb_s64_x (p0, z1, z2, z0), ++ z0 = svmsb_x (p0, z1, z2, z0)) ++ ++/* ++** msb_s64_x_untied: ++** ( ++** movprfx z0, z1 ++** msb z0\.d, p0/m, z2\.d, z3\.d ++** | ++** movprfx z0, z2 ++** msb z0\.d, p0/m, z1\.d, z3\.d ++** | ++** movprfx z0, z3 ++** mls z0\.d, p0/m, z1\.d, z2\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (msb_s64_x_untied, svint64_t, ++ z0 = svmsb_s64_x (p0, z1, z2, z3), ++ z0 = svmsb_x (p0, z1, z2, z3)) ++ ++/* ++** msb_x0_s64_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** msb z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_x0_s64_x_tied1, svint64_t, int64_t, ++ z0 = svmsb_n_s64_x (p0, z0, z1, x0), ++ z0 = svmsb_x (p0, z0, z1, x0)) ++ ++/* ++** msb_x0_s64_x_tied2: ++** mov (z[0-9]+\.d), x0 ++** msb z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_x0_s64_x_tied2, svint64_t, int64_t, ++ z0 = svmsb_n_s64_x (p0, z1, z0, x0), ++ z0 = svmsb_x (p0, z1, z0, x0)) ++ ++/* ++** msb_x0_s64_x_untied: ++** mov z0\.d, x0 ++** mls z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_x0_s64_x_untied, svint64_t, int64_t, ++ z0 = svmsb_n_s64_x (p0, z1, z2, x0), ++ z0 = svmsb_x (p0, z1, z2, x0)) ++ ++/* ++** msb_11_s64_x_tied1: ++** mov (z[0-9]+\.d), #11 ++** msb z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_s64_x_tied1, svint64_t, ++ z0 = svmsb_n_s64_x (p0, z0, z1, 11), ++ z0 = svmsb_x (p0, z0, z1, 11)) ++ ++/* ++** msb_11_s64_x_tied2: ++** mov (z[0-9]+\.d), #11 ++** msb z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_s64_x_tied2, svint64_t, ++ z0 = svmsb_n_s64_x (p0, z1, z0, 11), ++ z0 = svmsb_x (p0, z1, z0, 11)) ++ ++/* ++** msb_11_s64_x_untied: ++** mov z0\.d, #11 ++** mls z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_s64_x_untied, svint64_t, ++ z0 = svmsb_n_s64_x (p0, z1, z2, 11), ++ z0 = svmsb_x (p0, z1, z2, 11)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_s8.c +new file mode 100644 +index 000000000..d7fcafdd0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_s8.c +@@ -0,0 +1,321 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** msb_s8_m_tied1: ++** msb z0\.b, p0/m, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (msb_s8_m_tied1, svint8_t, ++ z0 = svmsb_s8_m (p0, z0, z1, z2), ++ z0 = svmsb_m (p0, z0, z1, z2)) ++ ++/* ++** msb_s8_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** msb z0\.b, p0/m, \1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (msb_s8_m_tied2, svint8_t, ++ z0 = svmsb_s8_m (p0, z1, z0, z2), ++ z0 = svmsb_m (p0, z1, z0, z2)) ++ ++/* ++** msb_s8_m_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** msb z0\.b, p0/m, z2\.b, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (msb_s8_m_tied3, svint8_t, ++ z0 = svmsb_s8_m (p0, z1, z2, z0), ++ z0 = svmsb_m (p0, z1, z2, z0)) ++ ++/* ++** msb_s8_m_untied: ++** movprfx z0, z1 ++** msb z0\.b, p0/m, z2\.b, z3\.b ++** ret ++*/ ++TEST_UNIFORM_Z (msb_s8_m_untied, svint8_t, ++ z0 = svmsb_s8_m (p0, z1, z2, z3), ++ z0 = svmsb_m (p0, z1, z2, z3)) ++ ++/* ++** msb_w0_s8_m_tied1: ++** mov (z[0-9]+\.b), w0 ++** msb z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_w0_s8_m_tied1, svint8_t, int8_t, ++ z0 = svmsb_n_s8_m (p0, z0, z1, x0), ++ z0 = svmsb_m (p0, z0, z1, x0)) ++ ++/* ++** msb_w0_s8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), w0 ++** movprfx z0, z1 ++** msb z0\.b, p0/m, z2\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_w0_s8_m_untied, svint8_t, int8_t, ++ z0 = svmsb_n_s8_m (p0, z1, z2, x0), ++ z0 = svmsb_m (p0, z1, z2, x0)) ++ ++/* ++** msb_11_s8_m_tied1: ++** mov (z[0-9]+\.b), #11 ++** msb z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_s8_m_tied1, svint8_t, ++ z0 = svmsb_n_s8_m (p0, z0, z1, 11), ++ z0 = svmsb_m (p0, z0, z1, 11)) ++ ++/* ++** msb_11_s8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), #11 ++** movprfx z0, z1 ++** msb z0\.b, p0/m, z2\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_s8_m_untied, svint8_t, ++ z0 = svmsb_n_s8_m (p0, z1, z2, 11), ++ z0 = svmsb_m (p0, z1, z2, 11)) ++ ++/* ++** msb_s8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** msb z0\.b, p0/m, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (msb_s8_z_tied1, svint8_t, ++ z0 = svmsb_s8_z (p0, z0, z1, z2), ++ z0 = svmsb_z (p0, z0, z1, z2)) ++ ++/* ++** msb_s8_z_tied2: ++** movprfx z0\.b, p0/z, z0\.b ++** msb z0\.b, p0/m, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (msb_s8_z_tied2, svint8_t, ++ z0 = svmsb_s8_z (p0, z1, z0, z2), ++ z0 = svmsb_z (p0, z1, z0, z2)) ++ ++/* ++** msb_s8_z_tied3: ++** movprfx z0\.b, p0/z, z0\.b ++** mls z0\.b, p0/m, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (msb_s8_z_tied3, svint8_t, ++ z0 = svmsb_s8_z (p0, z1, z2, z0), ++ z0 = svmsb_z (p0, z1, z2, z0)) ++ ++/* ++** msb_s8_z_untied: ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** msb z0\.b, p0/m, z2\.b, z3\.b ++** | ++** movprfx z0\.b, p0/z, z2\.b ++** msb z0\.b, p0/m, z1\.b, z3\.b ++** | ++** movprfx z0\.b, p0/z, z3\.b ++** mls z0\.b, p0/m, z1\.b, z2\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (msb_s8_z_untied, svint8_t, ++ z0 = svmsb_s8_z (p0, z1, z2, z3), ++ z0 = svmsb_z (p0, z1, z2, z3)) ++ ++/* ++** msb_w0_s8_z_tied1: ++** mov (z[0-9]+\.b), w0 ++** movprfx z0\.b, p0/z, z0\.b ++** msb z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_w0_s8_z_tied1, svint8_t, int8_t, ++ z0 = svmsb_n_s8_z (p0, z0, z1, x0), ++ z0 = svmsb_z (p0, z0, z1, x0)) ++ ++/* ++** msb_w0_s8_z_tied2: ++** mov (z[0-9]+\.b), w0 ++** movprfx z0\.b, p0/z, z0\.b ++** msb z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_w0_s8_z_tied2, svint8_t, int8_t, ++ z0 = svmsb_n_s8_z (p0, z1, z0, x0), ++ z0 = svmsb_z (p0, z1, z0, x0)) ++ ++/* ++** msb_w0_s8_z_untied: ++** mov (z[0-9]+\.b), w0 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** msb z0\.b, p0/m, z2\.b, \1 ++** | ++** movprfx z0\.b, p0/z, z2\.b ++** msb z0\.b, p0/m, z1\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** mls z0\.b, p0/m, z1\.b, z2\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_w0_s8_z_untied, svint8_t, int8_t, ++ z0 = svmsb_n_s8_z (p0, z1, z2, x0), ++ z0 = svmsb_z (p0, z1, z2, x0)) ++ ++/* ++** msb_11_s8_z_tied1: ++** mov (z[0-9]+\.b), #11 ++** movprfx z0\.b, p0/z, z0\.b ++** msb z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_s8_z_tied1, svint8_t, ++ z0 = svmsb_n_s8_z (p0, z0, z1, 11), ++ z0 = svmsb_z (p0, z0, z1, 11)) ++ ++/* ++** msb_11_s8_z_tied2: ++** mov (z[0-9]+\.b), #11 ++** movprfx z0\.b, p0/z, z0\.b ++** msb z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_s8_z_tied2, svint8_t, ++ z0 = svmsb_n_s8_z (p0, z1, z0, 11), ++ z0 = svmsb_z (p0, z1, z0, 11)) ++ ++/* ++** msb_11_s8_z_untied: ++** mov (z[0-9]+\.b), #11 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** msb z0\.b, p0/m, z2\.b, \1 ++** | ++** movprfx z0\.b, p0/z, z2\.b ++** msb z0\.b, p0/m, z1\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** mls z0\.b, p0/m, z1\.b, z2\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_s8_z_untied, svint8_t, ++ z0 = svmsb_n_s8_z (p0, z1, z2, 11), ++ z0 = svmsb_z (p0, z1, z2, 11)) ++ ++/* ++** msb_s8_x_tied1: ++** msb z0\.b, p0/m, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (msb_s8_x_tied1, svint8_t, ++ z0 = svmsb_s8_x (p0, z0, z1, z2), ++ z0 = svmsb_x (p0, z0, z1, z2)) ++ ++/* ++** msb_s8_x_tied2: ++** msb z0\.b, p0/m, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (msb_s8_x_tied2, svint8_t, ++ z0 = svmsb_s8_x (p0, z1, z0, z2), ++ z0 = svmsb_x (p0, z1, z0, z2)) ++ ++/* ++** msb_s8_x_tied3: ++** mls z0\.b, p0/m, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (msb_s8_x_tied3, svint8_t, ++ z0 = svmsb_s8_x (p0, z1, z2, z0), ++ z0 = svmsb_x (p0, z1, z2, z0)) ++ ++/* ++** msb_s8_x_untied: ++** ( ++** movprfx z0, z1 ++** msb z0\.b, p0/m, z2\.b, z3\.b ++** | ++** movprfx z0, z2 ++** msb z0\.b, p0/m, z1\.b, z3\.b ++** | ++** movprfx z0, z3 ++** mls z0\.b, p0/m, z1\.b, z2\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (msb_s8_x_untied, svint8_t, ++ z0 = svmsb_s8_x (p0, z1, z2, z3), ++ z0 = svmsb_x (p0, z1, z2, z3)) ++ ++/* ++** msb_w0_s8_x_tied1: ++** mov (z[0-9]+\.b), w0 ++** msb z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_w0_s8_x_tied1, svint8_t, int8_t, ++ z0 = svmsb_n_s8_x (p0, z0, z1, x0), ++ z0 = svmsb_x (p0, z0, z1, x0)) ++ ++/* ++** msb_w0_s8_x_tied2: ++** mov (z[0-9]+\.b), w0 ++** msb z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_w0_s8_x_tied2, svint8_t, int8_t, ++ z0 = svmsb_n_s8_x (p0, z1, z0, x0), ++ z0 = svmsb_x (p0, z1, z0, x0)) ++ ++/* ++** msb_w0_s8_x_untied: ++** mov z0\.b, w0 ++** mls z0\.b, p0/m, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_w0_s8_x_untied, svint8_t, int8_t, ++ z0 = svmsb_n_s8_x (p0, z1, z2, x0), ++ z0 = svmsb_x (p0, z1, z2, x0)) ++ ++/* ++** msb_11_s8_x_tied1: ++** mov (z[0-9]+\.b), #11 ++** msb z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_s8_x_tied1, svint8_t, ++ z0 = svmsb_n_s8_x (p0, z0, z1, 11), ++ z0 = svmsb_x (p0, z0, z1, 11)) ++ ++/* ++** msb_11_s8_x_tied2: ++** mov (z[0-9]+\.b), #11 ++** msb z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_s8_x_tied2, svint8_t, ++ z0 = svmsb_n_s8_x (p0, z1, z0, 11), ++ z0 = svmsb_x (p0, z1, z0, 11)) ++ ++/* ++** msb_11_s8_x_untied: ++** mov z0\.b, #11 ++** mls z0\.b, p0/m, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_s8_x_untied, svint8_t, ++ z0 = svmsb_n_s8_x (p0, z1, z2, 11), ++ z0 = svmsb_x (p0, z1, z2, 11)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_u16.c +new file mode 100644 +index 000000000..437a96040 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_u16.c +@@ -0,0 +1,321 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** msb_u16_m_tied1: ++** msb z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (msb_u16_m_tied1, svuint16_t, ++ z0 = svmsb_u16_m (p0, z0, z1, z2), ++ z0 = svmsb_m (p0, z0, z1, z2)) ++ ++/* ++** msb_u16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** msb z0\.h, p0/m, \1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (msb_u16_m_tied2, svuint16_t, ++ z0 = svmsb_u16_m (p0, z1, z0, z2), ++ z0 = svmsb_m (p0, z1, z0, z2)) ++ ++/* ++** msb_u16_m_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** msb z0\.h, p0/m, z2\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (msb_u16_m_tied3, svuint16_t, ++ z0 = svmsb_u16_m (p0, z1, z2, z0), ++ z0 = svmsb_m (p0, z1, z2, z0)) ++ ++/* ++** msb_u16_m_untied: ++** movprfx z0, z1 ++** msb z0\.h, p0/m, z2\.h, z3\.h ++** ret ++*/ ++TEST_UNIFORM_Z (msb_u16_m_untied, svuint16_t, ++ z0 = svmsb_u16_m (p0, z1, z2, z3), ++ z0 = svmsb_m (p0, z1, z2, z3)) ++ ++/* ++** msb_w0_u16_m_tied1: ++** mov (z[0-9]+\.h), w0 ++** msb z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_w0_u16_m_tied1, svuint16_t, uint16_t, ++ z0 = svmsb_n_u16_m (p0, z0, z1, x0), ++ z0 = svmsb_m (p0, z0, z1, x0)) ++ ++/* ++** msb_w0_u16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), w0 ++** movprfx z0, z1 ++** msb z0\.h, p0/m, z2\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_w0_u16_m_untied, svuint16_t, uint16_t, ++ z0 = svmsb_n_u16_m (p0, z1, z2, x0), ++ z0 = svmsb_m (p0, z1, z2, x0)) ++ ++/* ++** msb_11_u16_m_tied1: ++** mov (z[0-9]+\.h), #11 ++** msb z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_u16_m_tied1, svuint16_t, ++ z0 = svmsb_n_u16_m (p0, z0, z1, 11), ++ z0 = svmsb_m (p0, z0, z1, 11)) ++ ++/* ++** msb_11_u16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), #11 ++** movprfx z0, z1 ++** msb z0\.h, p0/m, z2\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_u16_m_untied, svuint16_t, ++ z0 = svmsb_n_u16_m (p0, z1, z2, 11), ++ z0 = svmsb_m (p0, z1, z2, 11)) ++ ++/* ++** msb_u16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** msb z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (msb_u16_z_tied1, svuint16_t, ++ z0 = svmsb_u16_z (p0, z0, z1, z2), ++ z0 = svmsb_z (p0, z0, z1, z2)) ++ ++/* ++** msb_u16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** msb z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (msb_u16_z_tied2, svuint16_t, ++ z0 = svmsb_u16_z (p0, z1, z0, z2), ++ z0 = svmsb_z (p0, z1, z0, z2)) ++ ++/* ++** msb_u16_z_tied3: ++** movprfx z0\.h, p0/z, z0\.h ++** mls z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (msb_u16_z_tied3, svuint16_t, ++ z0 = svmsb_u16_z (p0, z1, z2, z0), ++ z0 = svmsb_z (p0, z1, z2, z0)) ++ ++/* ++** msb_u16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** msb z0\.h, p0/m, z2\.h, z3\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** msb z0\.h, p0/m, z1\.h, z3\.h ++** | ++** movprfx z0\.h, p0/z, z3\.h ++** mls z0\.h, p0/m, z1\.h, z2\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (msb_u16_z_untied, svuint16_t, ++ z0 = svmsb_u16_z (p0, z1, z2, z3), ++ z0 = svmsb_z (p0, z1, z2, z3)) ++ ++/* ++** msb_w0_u16_z_tied1: ++** mov (z[0-9]+\.h), w0 ++** movprfx z0\.h, p0/z, z0\.h ++** msb z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_w0_u16_z_tied1, svuint16_t, uint16_t, ++ z0 = svmsb_n_u16_z (p0, z0, z1, x0), ++ z0 = svmsb_z (p0, z0, z1, x0)) ++ ++/* ++** msb_w0_u16_z_tied2: ++** mov (z[0-9]+\.h), w0 ++** movprfx z0\.h, p0/z, z0\.h ++** msb z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_w0_u16_z_tied2, svuint16_t, uint16_t, ++ z0 = svmsb_n_u16_z (p0, z1, z0, x0), ++ z0 = svmsb_z (p0, z1, z0, x0)) ++ ++/* ++** msb_w0_u16_z_untied: ++** mov (z[0-9]+\.h), w0 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** msb z0\.h, p0/m, z2\.h, \1 ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** msb z0\.h, p0/m, z1\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** mls z0\.h, p0/m, z1\.h, z2\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_w0_u16_z_untied, svuint16_t, uint16_t, ++ z0 = svmsb_n_u16_z (p0, z1, z2, x0), ++ z0 = svmsb_z (p0, z1, z2, x0)) ++ ++/* ++** msb_11_u16_z_tied1: ++** mov (z[0-9]+\.h), #11 ++** movprfx z0\.h, p0/z, z0\.h ++** msb z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_u16_z_tied1, svuint16_t, ++ z0 = svmsb_n_u16_z (p0, z0, z1, 11), ++ z0 = svmsb_z (p0, z0, z1, 11)) ++ ++/* ++** msb_11_u16_z_tied2: ++** mov (z[0-9]+\.h), #11 ++** movprfx z0\.h, p0/z, z0\.h ++** msb z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_u16_z_tied2, svuint16_t, ++ z0 = svmsb_n_u16_z (p0, z1, z0, 11), ++ z0 = svmsb_z (p0, z1, z0, 11)) ++ ++/* ++** msb_11_u16_z_untied: ++** mov (z[0-9]+\.h), #11 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** msb z0\.h, p0/m, z2\.h, \1 ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** msb z0\.h, p0/m, z1\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** mls z0\.h, p0/m, z1\.h, z2\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_u16_z_untied, svuint16_t, ++ z0 = svmsb_n_u16_z (p0, z1, z2, 11), ++ z0 = svmsb_z (p0, z1, z2, 11)) ++ ++/* ++** msb_u16_x_tied1: ++** msb z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (msb_u16_x_tied1, svuint16_t, ++ z0 = svmsb_u16_x (p0, z0, z1, z2), ++ z0 = svmsb_x (p0, z0, z1, z2)) ++ ++/* ++** msb_u16_x_tied2: ++** msb z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (msb_u16_x_tied2, svuint16_t, ++ z0 = svmsb_u16_x (p0, z1, z0, z2), ++ z0 = svmsb_x (p0, z1, z0, z2)) ++ ++/* ++** msb_u16_x_tied3: ++** mls z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (msb_u16_x_tied3, svuint16_t, ++ z0 = svmsb_u16_x (p0, z1, z2, z0), ++ z0 = svmsb_x (p0, z1, z2, z0)) ++ ++/* ++** msb_u16_x_untied: ++** ( ++** movprfx z0, z1 ++** msb z0\.h, p0/m, z2\.h, z3\.h ++** | ++** movprfx z0, z2 ++** msb z0\.h, p0/m, z1\.h, z3\.h ++** | ++** movprfx z0, z3 ++** mls z0\.h, p0/m, z1\.h, z2\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (msb_u16_x_untied, svuint16_t, ++ z0 = svmsb_u16_x (p0, z1, z2, z3), ++ z0 = svmsb_x (p0, z1, z2, z3)) ++ ++/* ++** msb_w0_u16_x_tied1: ++** mov (z[0-9]+\.h), w0 ++** msb z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_w0_u16_x_tied1, svuint16_t, uint16_t, ++ z0 = svmsb_n_u16_x (p0, z0, z1, x0), ++ z0 = svmsb_x (p0, z0, z1, x0)) ++ ++/* ++** msb_w0_u16_x_tied2: ++** mov (z[0-9]+\.h), w0 ++** msb z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_w0_u16_x_tied2, svuint16_t, uint16_t, ++ z0 = svmsb_n_u16_x (p0, z1, z0, x0), ++ z0 = svmsb_x (p0, z1, z0, x0)) ++ ++/* ++** msb_w0_u16_x_untied: ++** mov z0\.h, w0 ++** mls z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_w0_u16_x_untied, svuint16_t, uint16_t, ++ z0 = svmsb_n_u16_x (p0, z1, z2, x0), ++ z0 = svmsb_x (p0, z1, z2, x0)) ++ ++/* ++** msb_11_u16_x_tied1: ++** mov (z[0-9]+\.h), #11 ++** msb z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_u16_x_tied1, svuint16_t, ++ z0 = svmsb_n_u16_x (p0, z0, z1, 11), ++ z0 = svmsb_x (p0, z0, z1, 11)) ++ ++/* ++** msb_11_u16_x_tied2: ++** mov (z[0-9]+\.h), #11 ++** msb z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_u16_x_tied2, svuint16_t, ++ z0 = svmsb_n_u16_x (p0, z1, z0, 11), ++ z0 = svmsb_x (p0, z1, z0, 11)) ++ ++/* ++** msb_11_u16_x_untied: ++** mov z0\.h, #11 ++** mls z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_u16_x_untied, svuint16_t, ++ z0 = svmsb_n_u16_x (p0, z1, z2, 11), ++ z0 = svmsb_x (p0, z1, z2, 11)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_u32.c +new file mode 100644 +index 000000000..aaaf0344a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_u32.c +@@ -0,0 +1,321 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** msb_u32_m_tied1: ++** msb z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (msb_u32_m_tied1, svuint32_t, ++ z0 = svmsb_u32_m (p0, z0, z1, z2), ++ z0 = svmsb_m (p0, z0, z1, z2)) ++ ++/* ++** msb_u32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** msb z0\.s, p0/m, \1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (msb_u32_m_tied2, svuint32_t, ++ z0 = svmsb_u32_m (p0, z1, z0, z2), ++ z0 = svmsb_m (p0, z1, z0, z2)) ++ ++/* ++** msb_u32_m_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** msb z0\.s, p0/m, z2\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (msb_u32_m_tied3, svuint32_t, ++ z0 = svmsb_u32_m (p0, z1, z2, z0), ++ z0 = svmsb_m (p0, z1, z2, z0)) ++ ++/* ++** msb_u32_m_untied: ++** movprfx z0, z1 ++** msb z0\.s, p0/m, z2\.s, z3\.s ++** ret ++*/ ++TEST_UNIFORM_Z (msb_u32_m_untied, svuint32_t, ++ z0 = svmsb_u32_m (p0, z1, z2, z3), ++ z0 = svmsb_m (p0, z1, z2, z3)) ++ ++/* ++** msb_w0_u32_m_tied1: ++** mov (z[0-9]+\.s), w0 ++** msb z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_w0_u32_m_tied1, svuint32_t, uint32_t, ++ z0 = svmsb_n_u32_m (p0, z0, z1, x0), ++ z0 = svmsb_m (p0, z0, z1, x0)) ++ ++/* ++** msb_w0_u32_m_untied: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0, z1 ++** msb z0\.s, p0/m, z2\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_w0_u32_m_untied, svuint32_t, uint32_t, ++ z0 = svmsb_n_u32_m (p0, z1, z2, x0), ++ z0 = svmsb_m (p0, z1, z2, x0)) ++ ++/* ++** msb_11_u32_m_tied1: ++** mov (z[0-9]+\.s), #11 ++** msb z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_u32_m_tied1, svuint32_t, ++ z0 = svmsb_n_u32_m (p0, z0, z1, 11), ++ z0 = svmsb_m (p0, z0, z1, 11)) ++ ++/* ++** msb_11_u32_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.s), #11 ++** movprfx z0, z1 ++** msb z0\.s, p0/m, z2\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_u32_m_untied, svuint32_t, ++ z0 = svmsb_n_u32_m (p0, z1, z2, 11), ++ z0 = svmsb_m (p0, z1, z2, 11)) ++ ++/* ++** msb_u32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** msb z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (msb_u32_z_tied1, svuint32_t, ++ z0 = svmsb_u32_z (p0, z0, z1, z2), ++ z0 = svmsb_z (p0, z0, z1, z2)) ++ ++/* ++** msb_u32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** msb z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (msb_u32_z_tied2, svuint32_t, ++ z0 = svmsb_u32_z (p0, z1, z0, z2), ++ z0 = svmsb_z (p0, z1, z0, z2)) ++ ++/* ++** msb_u32_z_tied3: ++** movprfx z0\.s, p0/z, z0\.s ++** mls z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (msb_u32_z_tied3, svuint32_t, ++ z0 = svmsb_u32_z (p0, z1, z2, z0), ++ z0 = svmsb_z (p0, z1, z2, z0)) ++ ++/* ++** msb_u32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** msb z0\.s, p0/m, z2\.s, z3\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** msb z0\.s, p0/m, z1\.s, z3\.s ++** | ++** movprfx z0\.s, p0/z, z3\.s ++** mls z0\.s, p0/m, z1\.s, z2\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (msb_u32_z_untied, svuint32_t, ++ z0 = svmsb_u32_z (p0, z1, z2, z3), ++ z0 = svmsb_z (p0, z1, z2, z3)) ++ ++/* ++** msb_w0_u32_z_tied1: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0\.s, p0/z, z0\.s ++** msb z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_w0_u32_z_tied1, svuint32_t, uint32_t, ++ z0 = svmsb_n_u32_z (p0, z0, z1, x0), ++ z0 = svmsb_z (p0, z0, z1, x0)) ++ ++/* ++** msb_w0_u32_z_tied2: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0\.s, p0/z, z0\.s ++** msb z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_w0_u32_z_tied2, svuint32_t, uint32_t, ++ z0 = svmsb_n_u32_z (p0, z1, z0, x0), ++ z0 = svmsb_z (p0, z1, z0, x0)) ++ ++/* ++** msb_w0_u32_z_untied: ++** mov (z[0-9]+\.s), w0 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** msb z0\.s, p0/m, z2\.s, \1 ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** msb z0\.s, p0/m, z1\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** mls z0\.s, p0/m, z1\.s, z2\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_w0_u32_z_untied, svuint32_t, uint32_t, ++ z0 = svmsb_n_u32_z (p0, z1, z2, x0), ++ z0 = svmsb_z (p0, z1, z2, x0)) ++ ++/* ++** msb_11_u32_z_tied1: ++** mov (z[0-9]+\.s), #11 ++** movprfx z0\.s, p0/z, z0\.s ++** msb z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_u32_z_tied1, svuint32_t, ++ z0 = svmsb_n_u32_z (p0, z0, z1, 11), ++ z0 = svmsb_z (p0, z0, z1, 11)) ++ ++/* ++** msb_11_u32_z_tied2: ++** mov (z[0-9]+\.s), #11 ++** movprfx z0\.s, p0/z, z0\.s ++** msb z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_u32_z_tied2, svuint32_t, ++ z0 = svmsb_n_u32_z (p0, z1, z0, 11), ++ z0 = svmsb_z (p0, z1, z0, 11)) ++ ++/* ++** msb_11_u32_z_untied: ++** mov (z[0-9]+\.s), #11 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** msb z0\.s, p0/m, z2\.s, \1 ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** msb z0\.s, p0/m, z1\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** mls z0\.s, p0/m, z1\.s, z2\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_u32_z_untied, svuint32_t, ++ z0 = svmsb_n_u32_z (p0, z1, z2, 11), ++ z0 = svmsb_z (p0, z1, z2, 11)) ++ ++/* ++** msb_u32_x_tied1: ++** msb z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (msb_u32_x_tied1, svuint32_t, ++ z0 = svmsb_u32_x (p0, z0, z1, z2), ++ z0 = svmsb_x (p0, z0, z1, z2)) ++ ++/* ++** msb_u32_x_tied2: ++** msb z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (msb_u32_x_tied2, svuint32_t, ++ z0 = svmsb_u32_x (p0, z1, z0, z2), ++ z0 = svmsb_x (p0, z1, z0, z2)) ++ ++/* ++** msb_u32_x_tied3: ++** mls z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (msb_u32_x_tied3, svuint32_t, ++ z0 = svmsb_u32_x (p0, z1, z2, z0), ++ z0 = svmsb_x (p0, z1, z2, z0)) ++ ++/* ++** msb_u32_x_untied: ++** ( ++** movprfx z0, z1 ++** msb z0\.s, p0/m, z2\.s, z3\.s ++** | ++** movprfx z0, z2 ++** msb z0\.s, p0/m, z1\.s, z3\.s ++** | ++** movprfx z0, z3 ++** mls z0\.s, p0/m, z1\.s, z2\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (msb_u32_x_untied, svuint32_t, ++ z0 = svmsb_u32_x (p0, z1, z2, z3), ++ z0 = svmsb_x (p0, z1, z2, z3)) ++ ++/* ++** msb_w0_u32_x_tied1: ++** mov (z[0-9]+\.s), w0 ++** msb z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_w0_u32_x_tied1, svuint32_t, uint32_t, ++ z0 = svmsb_n_u32_x (p0, z0, z1, x0), ++ z0 = svmsb_x (p0, z0, z1, x0)) ++ ++/* ++** msb_w0_u32_x_tied2: ++** mov (z[0-9]+\.s), w0 ++** msb z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_w0_u32_x_tied2, svuint32_t, uint32_t, ++ z0 = svmsb_n_u32_x (p0, z1, z0, x0), ++ z0 = svmsb_x (p0, z1, z0, x0)) ++ ++/* ++** msb_w0_u32_x_untied: ++** mov z0\.s, w0 ++** mls z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_w0_u32_x_untied, svuint32_t, uint32_t, ++ z0 = svmsb_n_u32_x (p0, z1, z2, x0), ++ z0 = svmsb_x (p0, z1, z2, x0)) ++ ++/* ++** msb_11_u32_x_tied1: ++** mov (z[0-9]+\.s), #11 ++** msb z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_u32_x_tied1, svuint32_t, ++ z0 = svmsb_n_u32_x (p0, z0, z1, 11), ++ z0 = svmsb_x (p0, z0, z1, 11)) ++ ++/* ++** msb_11_u32_x_tied2: ++** mov (z[0-9]+\.s), #11 ++** msb z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_u32_x_tied2, svuint32_t, ++ z0 = svmsb_n_u32_x (p0, z1, z0, 11), ++ z0 = svmsb_x (p0, z1, z0, 11)) ++ ++/* ++** msb_11_u32_x_untied: ++** mov z0\.s, #11 ++** mls z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_u32_x_untied, svuint32_t, ++ z0 = svmsb_n_u32_x (p0, z1, z2, 11), ++ z0 = svmsb_x (p0, z1, z2, 11)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_u64.c +new file mode 100644 +index 000000000..5c5d33073 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_u64.c +@@ -0,0 +1,321 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** msb_u64_m_tied1: ++** msb z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (msb_u64_m_tied1, svuint64_t, ++ z0 = svmsb_u64_m (p0, z0, z1, z2), ++ z0 = svmsb_m (p0, z0, z1, z2)) ++ ++/* ++** msb_u64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** msb z0\.d, p0/m, \1, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (msb_u64_m_tied2, svuint64_t, ++ z0 = svmsb_u64_m (p0, z1, z0, z2), ++ z0 = svmsb_m (p0, z1, z0, z2)) ++ ++/* ++** msb_u64_m_tied3: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** msb z0\.d, p0/m, z2\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_u64_m_tied3, svuint64_t, ++ z0 = svmsb_u64_m (p0, z1, z2, z0), ++ z0 = svmsb_m (p0, z1, z2, z0)) ++ ++/* ++** msb_u64_m_untied: ++** movprfx z0, z1 ++** msb z0\.d, p0/m, z2\.d, z3\.d ++** ret ++*/ ++TEST_UNIFORM_Z (msb_u64_m_untied, svuint64_t, ++ z0 = svmsb_u64_m (p0, z1, z2, z3), ++ z0 = svmsb_m (p0, z1, z2, z3)) ++ ++/* ++** msb_x0_u64_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** msb z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_x0_u64_m_tied1, svuint64_t, uint64_t, ++ z0 = svmsb_n_u64_m (p0, z0, z1, x0), ++ z0 = svmsb_m (p0, z0, z1, x0)) ++ ++/* ++** msb_x0_u64_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** msb z0\.d, p0/m, z2\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_x0_u64_m_untied, svuint64_t, uint64_t, ++ z0 = svmsb_n_u64_m (p0, z1, z2, x0), ++ z0 = svmsb_m (p0, z1, z2, x0)) ++ ++/* ++** msb_11_u64_m_tied1: ++** mov (z[0-9]+\.d), #11 ++** msb z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_u64_m_tied1, svuint64_t, ++ z0 = svmsb_n_u64_m (p0, z0, z1, 11), ++ z0 = svmsb_m (p0, z0, z1, 11)) ++ ++/* ++** msb_11_u64_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), #11 ++** movprfx z0, z1 ++** msb z0\.d, p0/m, z2\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_u64_m_untied, svuint64_t, ++ z0 = svmsb_n_u64_m (p0, z1, z2, 11), ++ z0 = svmsb_m (p0, z1, z2, 11)) ++ ++/* ++** msb_u64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** msb z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (msb_u64_z_tied1, svuint64_t, ++ z0 = svmsb_u64_z (p0, z0, z1, z2), ++ z0 = svmsb_z (p0, z0, z1, z2)) ++ ++/* ++** msb_u64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** msb z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (msb_u64_z_tied2, svuint64_t, ++ z0 = svmsb_u64_z (p0, z1, z0, z2), ++ z0 = svmsb_z (p0, z1, z0, z2)) ++ ++/* ++** msb_u64_z_tied3: ++** movprfx z0\.d, p0/z, z0\.d ++** mls z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (msb_u64_z_tied3, svuint64_t, ++ z0 = svmsb_u64_z (p0, z1, z2, z0), ++ z0 = svmsb_z (p0, z1, z2, z0)) ++ ++/* ++** msb_u64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** msb z0\.d, p0/m, z2\.d, z3\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** msb z0\.d, p0/m, z1\.d, z3\.d ++** | ++** movprfx z0\.d, p0/z, z3\.d ++** mls z0\.d, p0/m, z1\.d, z2\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (msb_u64_z_untied, svuint64_t, ++ z0 = svmsb_u64_z (p0, z1, z2, z3), ++ z0 = svmsb_z (p0, z1, z2, z3)) ++ ++/* ++** msb_x0_u64_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.d, p0/z, z0\.d ++** msb z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_x0_u64_z_tied1, svuint64_t, uint64_t, ++ z0 = svmsb_n_u64_z (p0, z0, z1, x0), ++ z0 = svmsb_z (p0, z0, z1, x0)) ++ ++/* ++** msb_x0_u64_z_tied2: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.d, p0/z, z0\.d ++** msb z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_x0_u64_z_tied2, svuint64_t, uint64_t, ++ z0 = svmsb_n_u64_z (p0, z1, z0, x0), ++ z0 = svmsb_z (p0, z1, z0, x0)) ++ ++/* ++** msb_x0_u64_z_untied: ++** mov (z[0-9]+\.d), x0 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** msb z0\.d, p0/m, z2\.d, \1 ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** msb z0\.d, p0/m, z1\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** mls z0\.d, p0/m, z1\.d, z2\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_x0_u64_z_untied, svuint64_t, uint64_t, ++ z0 = svmsb_n_u64_z (p0, z1, z2, x0), ++ z0 = svmsb_z (p0, z1, z2, x0)) ++ ++/* ++** msb_11_u64_z_tied1: ++** mov (z[0-9]+\.d), #11 ++** movprfx z0\.d, p0/z, z0\.d ++** msb z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_u64_z_tied1, svuint64_t, ++ z0 = svmsb_n_u64_z (p0, z0, z1, 11), ++ z0 = svmsb_z (p0, z0, z1, 11)) ++ ++/* ++** msb_11_u64_z_tied2: ++** mov (z[0-9]+\.d), #11 ++** movprfx z0\.d, p0/z, z0\.d ++** msb z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_u64_z_tied2, svuint64_t, ++ z0 = svmsb_n_u64_z (p0, z1, z0, 11), ++ z0 = svmsb_z (p0, z1, z0, 11)) ++ ++/* ++** msb_11_u64_z_untied: ++** mov (z[0-9]+\.d), #11 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** msb z0\.d, p0/m, z2\.d, \1 ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** msb z0\.d, p0/m, z1\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** mls z0\.d, p0/m, z1\.d, z2\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_u64_z_untied, svuint64_t, ++ z0 = svmsb_n_u64_z (p0, z1, z2, 11), ++ z0 = svmsb_z (p0, z1, z2, 11)) ++ ++/* ++** msb_u64_x_tied1: ++** msb z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (msb_u64_x_tied1, svuint64_t, ++ z0 = svmsb_u64_x (p0, z0, z1, z2), ++ z0 = svmsb_x (p0, z0, z1, z2)) ++ ++/* ++** msb_u64_x_tied2: ++** msb z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (msb_u64_x_tied2, svuint64_t, ++ z0 = svmsb_u64_x (p0, z1, z0, z2), ++ z0 = svmsb_x (p0, z1, z0, z2)) ++ ++/* ++** msb_u64_x_tied3: ++** mls z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (msb_u64_x_tied3, svuint64_t, ++ z0 = svmsb_u64_x (p0, z1, z2, z0), ++ z0 = svmsb_x (p0, z1, z2, z0)) ++ ++/* ++** msb_u64_x_untied: ++** ( ++** movprfx z0, z1 ++** msb z0\.d, p0/m, z2\.d, z3\.d ++** | ++** movprfx z0, z2 ++** msb z0\.d, p0/m, z1\.d, z3\.d ++** | ++** movprfx z0, z3 ++** mls z0\.d, p0/m, z1\.d, z2\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (msb_u64_x_untied, svuint64_t, ++ z0 = svmsb_u64_x (p0, z1, z2, z3), ++ z0 = svmsb_x (p0, z1, z2, z3)) ++ ++/* ++** msb_x0_u64_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** msb z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_x0_u64_x_tied1, svuint64_t, uint64_t, ++ z0 = svmsb_n_u64_x (p0, z0, z1, x0), ++ z0 = svmsb_x (p0, z0, z1, x0)) ++ ++/* ++** msb_x0_u64_x_tied2: ++** mov (z[0-9]+\.d), x0 ++** msb z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_x0_u64_x_tied2, svuint64_t, uint64_t, ++ z0 = svmsb_n_u64_x (p0, z1, z0, x0), ++ z0 = svmsb_x (p0, z1, z0, x0)) ++ ++/* ++** msb_x0_u64_x_untied: ++** mov z0\.d, x0 ++** mls z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_x0_u64_x_untied, svuint64_t, uint64_t, ++ z0 = svmsb_n_u64_x (p0, z1, z2, x0), ++ z0 = svmsb_x (p0, z1, z2, x0)) ++ ++/* ++** msb_11_u64_x_tied1: ++** mov (z[0-9]+\.d), #11 ++** msb z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_u64_x_tied1, svuint64_t, ++ z0 = svmsb_n_u64_x (p0, z0, z1, 11), ++ z0 = svmsb_x (p0, z0, z1, 11)) ++ ++/* ++** msb_11_u64_x_tied2: ++** mov (z[0-9]+\.d), #11 ++** msb z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_u64_x_tied2, svuint64_t, ++ z0 = svmsb_n_u64_x (p0, z1, z0, 11), ++ z0 = svmsb_x (p0, z1, z0, 11)) ++ ++/* ++** msb_11_u64_x_untied: ++** mov z0\.d, #11 ++** mls z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_u64_x_untied, svuint64_t, ++ z0 = svmsb_n_u64_x (p0, z1, z2, 11), ++ z0 = svmsb_x (p0, z1, z2, 11)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_u8.c +new file mode 100644 +index 000000000..5665ec9e3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_u8.c +@@ -0,0 +1,321 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** msb_u8_m_tied1: ++** msb z0\.b, p0/m, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (msb_u8_m_tied1, svuint8_t, ++ z0 = svmsb_u8_m (p0, z0, z1, z2), ++ z0 = svmsb_m (p0, z0, z1, z2)) ++ ++/* ++** msb_u8_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** msb z0\.b, p0/m, \1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (msb_u8_m_tied2, svuint8_t, ++ z0 = svmsb_u8_m (p0, z1, z0, z2), ++ z0 = svmsb_m (p0, z1, z0, z2)) ++ ++/* ++** msb_u8_m_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** msb z0\.b, p0/m, z2\.b, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (msb_u8_m_tied3, svuint8_t, ++ z0 = svmsb_u8_m (p0, z1, z2, z0), ++ z0 = svmsb_m (p0, z1, z2, z0)) ++ ++/* ++** msb_u8_m_untied: ++** movprfx z0, z1 ++** msb z0\.b, p0/m, z2\.b, z3\.b ++** ret ++*/ ++TEST_UNIFORM_Z (msb_u8_m_untied, svuint8_t, ++ z0 = svmsb_u8_m (p0, z1, z2, z3), ++ z0 = svmsb_m (p0, z1, z2, z3)) ++ ++/* ++** msb_w0_u8_m_tied1: ++** mov (z[0-9]+\.b), w0 ++** msb z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_w0_u8_m_tied1, svuint8_t, uint8_t, ++ z0 = svmsb_n_u8_m (p0, z0, z1, x0), ++ z0 = svmsb_m (p0, z0, z1, x0)) ++ ++/* ++** msb_w0_u8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), w0 ++** movprfx z0, z1 ++** msb z0\.b, p0/m, z2\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_w0_u8_m_untied, svuint8_t, uint8_t, ++ z0 = svmsb_n_u8_m (p0, z1, z2, x0), ++ z0 = svmsb_m (p0, z1, z2, x0)) ++ ++/* ++** msb_11_u8_m_tied1: ++** mov (z[0-9]+\.b), #11 ++** msb z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_u8_m_tied1, svuint8_t, ++ z0 = svmsb_n_u8_m (p0, z0, z1, 11), ++ z0 = svmsb_m (p0, z0, z1, 11)) ++ ++/* ++** msb_11_u8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), #11 ++** movprfx z0, z1 ++** msb z0\.b, p0/m, z2\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_u8_m_untied, svuint8_t, ++ z0 = svmsb_n_u8_m (p0, z1, z2, 11), ++ z0 = svmsb_m (p0, z1, z2, 11)) ++ ++/* ++** msb_u8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** msb z0\.b, p0/m, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (msb_u8_z_tied1, svuint8_t, ++ z0 = svmsb_u8_z (p0, z0, z1, z2), ++ z0 = svmsb_z (p0, z0, z1, z2)) ++ ++/* ++** msb_u8_z_tied2: ++** movprfx z0\.b, p0/z, z0\.b ++** msb z0\.b, p0/m, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (msb_u8_z_tied2, svuint8_t, ++ z0 = svmsb_u8_z (p0, z1, z0, z2), ++ z0 = svmsb_z (p0, z1, z0, z2)) ++ ++/* ++** msb_u8_z_tied3: ++** movprfx z0\.b, p0/z, z0\.b ++** mls z0\.b, p0/m, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (msb_u8_z_tied3, svuint8_t, ++ z0 = svmsb_u8_z (p0, z1, z2, z0), ++ z0 = svmsb_z (p0, z1, z2, z0)) ++ ++/* ++** msb_u8_z_untied: ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** msb z0\.b, p0/m, z2\.b, z3\.b ++** | ++** movprfx z0\.b, p0/z, z2\.b ++** msb z0\.b, p0/m, z1\.b, z3\.b ++** | ++** movprfx z0\.b, p0/z, z3\.b ++** mls z0\.b, p0/m, z1\.b, z2\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (msb_u8_z_untied, svuint8_t, ++ z0 = svmsb_u8_z (p0, z1, z2, z3), ++ z0 = svmsb_z (p0, z1, z2, z3)) ++ ++/* ++** msb_w0_u8_z_tied1: ++** mov (z[0-9]+\.b), w0 ++** movprfx z0\.b, p0/z, z0\.b ++** msb z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_w0_u8_z_tied1, svuint8_t, uint8_t, ++ z0 = svmsb_n_u8_z (p0, z0, z1, x0), ++ z0 = svmsb_z (p0, z0, z1, x0)) ++ ++/* ++** msb_w0_u8_z_tied2: ++** mov (z[0-9]+\.b), w0 ++** movprfx z0\.b, p0/z, z0\.b ++** msb z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_w0_u8_z_tied2, svuint8_t, uint8_t, ++ z0 = svmsb_n_u8_z (p0, z1, z0, x0), ++ z0 = svmsb_z (p0, z1, z0, x0)) ++ ++/* ++** msb_w0_u8_z_untied: ++** mov (z[0-9]+\.b), w0 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** msb z0\.b, p0/m, z2\.b, \1 ++** | ++** movprfx z0\.b, p0/z, z2\.b ++** msb z0\.b, p0/m, z1\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** mls z0\.b, p0/m, z1\.b, z2\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_w0_u8_z_untied, svuint8_t, uint8_t, ++ z0 = svmsb_n_u8_z (p0, z1, z2, x0), ++ z0 = svmsb_z (p0, z1, z2, x0)) ++ ++/* ++** msb_11_u8_z_tied1: ++** mov (z[0-9]+\.b), #11 ++** movprfx z0\.b, p0/z, z0\.b ++** msb z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_u8_z_tied1, svuint8_t, ++ z0 = svmsb_n_u8_z (p0, z0, z1, 11), ++ z0 = svmsb_z (p0, z0, z1, 11)) ++ ++/* ++** msb_11_u8_z_tied2: ++** mov (z[0-9]+\.b), #11 ++** movprfx z0\.b, p0/z, z0\.b ++** msb z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_u8_z_tied2, svuint8_t, ++ z0 = svmsb_n_u8_z (p0, z1, z0, 11), ++ z0 = svmsb_z (p0, z1, z0, 11)) ++ ++/* ++** msb_11_u8_z_untied: ++** mov (z[0-9]+\.b), #11 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** msb z0\.b, p0/m, z2\.b, \1 ++** | ++** movprfx z0\.b, p0/z, z2\.b ++** msb z0\.b, p0/m, z1\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** mls z0\.b, p0/m, z1\.b, z2\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_u8_z_untied, svuint8_t, ++ z0 = svmsb_n_u8_z (p0, z1, z2, 11), ++ z0 = svmsb_z (p0, z1, z2, 11)) ++ ++/* ++** msb_u8_x_tied1: ++** msb z0\.b, p0/m, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (msb_u8_x_tied1, svuint8_t, ++ z0 = svmsb_u8_x (p0, z0, z1, z2), ++ z0 = svmsb_x (p0, z0, z1, z2)) ++ ++/* ++** msb_u8_x_tied2: ++** msb z0\.b, p0/m, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (msb_u8_x_tied2, svuint8_t, ++ z0 = svmsb_u8_x (p0, z1, z0, z2), ++ z0 = svmsb_x (p0, z1, z0, z2)) ++ ++/* ++** msb_u8_x_tied3: ++** mls z0\.b, p0/m, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (msb_u8_x_tied3, svuint8_t, ++ z0 = svmsb_u8_x (p0, z1, z2, z0), ++ z0 = svmsb_x (p0, z1, z2, z0)) ++ ++/* ++** msb_u8_x_untied: ++** ( ++** movprfx z0, z1 ++** msb z0\.b, p0/m, z2\.b, z3\.b ++** | ++** movprfx z0, z2 ++** msb z0\.b, p0/m, z1\.b, z3\.b ++** | ++** movprfx z0, z3 ++** mls z0\.b, p0/m, z1\.b, z2\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (msb_u8_x_untied, svuint8_t, ++ z0 = svmsb_u8_x (p0, z1, z2, z3), ++ z0 = svmsb_x (p0, z1, z2, z3)) ++ ++/* ++** msb_w0_u8_x_tied1: ++** mov (z[0-9]+\.b), w0 ++** msb z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_w0_u8_x_tied1, svuint8_t, uint8_t, ++ z0 = svmsb_n_u8_x (p0, z0, z1, x0), ++ z0 = svmsb_x (p0, z0, z1, x0)) ++ ++/* ++** msb_w0_u8_x_tied2: ++** mov (z[0-9]+\.b), w0 ++** msb z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_w0_u8_x_tied2, svuint8_t, uint8_t, ++ z0 = svmsb_n_u8_x (p0, z1, z0, x0), ++ z0 = svmsb_x (p0, z1, z0, x0)) ++ ++/* ++** msb_w0_u8_x_untied: ++** mov z0\.b, w0 ++** mls z0\.b, p0/m, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_ZX (msb_w0_u8_x_untied, svuint8_t, uint8_t, ++ z0 = svmsb_n_u8_x (p0, z1, z2, x0), ++ z0 = svmsb_x (p0, z1, z2, x0)) ++ ++/* ++** msb_11_u8_x_tied1: ++** mov (z[0-9]+\.b), #11 ++** msb z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_u8_x_tied1, svuint8_t, ++ z0 = svmsb_n_u8_x (p0, z0, z1, 11), ++ z0 = svmsb_x (p0, z0, z1, 11)) ++ ++/* ++** msb_11_u8_x_tied2: ++** mov (z[0-9]+\.b), #11 ++** msb z0\.b, p0/m, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_u8_x_tied2, svuint8_t, ++ z0 = svmsb_n_u8_x (p0, z1, z0, 11), ++ z0 = svmsb_x (p0, z1, z0, 11)) ++ ++/* ++** msb_11_u8_x_untied: ++** mov z0\.b, #11 ++** mls z0\.b, p0/m, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (msb_11_u8_x_untied, svuint8_t, ++ z0 = svmsb_n_u8_x (p0, z1, z2, 11), ++ z0 = svmsb_x (p0, z1, z2, 11)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_f16.c +new file mode 100644 +index 000000000..ef3de0c59 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_f16.c +@@ -0,0 +1,444 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mul_f16_m_tied1: ++** fmul z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f16_m_tied1, svfloat16_t, ++ z0 = svmul_f16_m (p0, z0, z1), ++ z0 = svmul_m (p0, z0, z1)) ++ ++/* ++** mul_f16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fmul z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f16_m_tied2, svfloat16_t, ++ z0 = svmul_f16_m (p0, z1, z0), ++ z0 = svmul_m (p0, z1, z0)) ++ ++/* ++** mul_f16_m_untied: ++** movprfx z0, z1 ++** fmul z0\.h, p0/m, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f16_m_untied, svfloat16_t, ++ z0 = svmul_f16_m (p0, z1, z2), ++ z0 = svmul_m (p0, z1, z2)) ++ ++/* ++** mul_h4_f16_m_tied1: ++** mov (z[0-9]+\.h), h4 ++** fmul z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mul_h4_f16_m_tied1, svfloat16_t, __fp16, ++ z0 = svmul_n_f16_m (p0, z0, d4), ++ z0 = svmul_m (p0, z0, d4)) ++ ++/* ++** mul_h4_f16_m_untied: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0, z1 ++** fmul z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mul_h4_f16_m_untied, svfloat16_t, __fp16, ++ z0 = svmul_n_f16_m (p0, z1, d4), ++ z0 = svmul_m (p0, z1, d4)) ++ ++/* ++** mul_1_f16_m_tied1: ++** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? ++** fmul z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_1_f16_m_tied1, svfloat16_t, ++ z0 = svmul_n_f16_m (p0, z0, 1), ++ z0 = svmul_m (p0, z0, 1)) ++ ++/* ++** mul_1_f16_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? ++** movprfx z0, z1 ++** fmul z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_1_f16_m_untied, svfloat16_t, ++ z0 = svmul_n_f16_m (p0, z1, 1), ++ z0 = svmul_m (p0, z1, 1)) ++ ++/* ++** mul_0p5_f16_m_tied1: ++** fmul z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_0p5_f16_m_tied1, svfloat16_t, ++ z0 = svmul_n_f16_m (p0, z0, 0.5), ++ z0 = svmul_m (p0, z0, 0.5)) ++ ++/* ++** mul_0p5_f16_m_untied: ++** movprfx z0, z1 ++** fmul z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_0p5_f16_m_untied, svfloat16_t, ++ z0 = svmul_n_f16_m (p0, z1, 0.5), ++ z0 = svmul_m (p0, z1, 0.5)) ++ ++/* ++** mul_2_f16_m_tied1: ++** fmul z0\.h, p0/m, z0\.h, #2\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_f16_m_tied1, svfloat16_t, ++ z0 = svmul_n_f16_m (p0, z0, 2), ++ z0 = svmul_m (p0, z0, 2)) ++ ++/* ++** mul_2_f16_m_untied: ++** movprfx z0, z1 ++** fmul z0\.h, p0/m, z0\.h, #2\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_f16_m_untied, svfloat16_t, ++ z0 = svmul_n_f16_m (p0, z1, 2), ++ z0 = svmul_m (p0, z1, 2)) ++ ++/* ++** mul_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fmul z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f16_z_tied1, svfloat16_t, ++ z0 = svmul_f16_z (p0, z0, z1), ++ z0 = svmul_z (p0, z0, z1)) ++ ++/* ++** mul_f16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** fmul z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f16_z_tied2, svfloat16_t, ++ z0 = svmul_f16_z (p0, z1, z0), ++ z0 = svmul_z (p0, z1, z0)) ++ ++/* ++** mul_f16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fmul z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** fmul z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f16_z_untied, svfloat16_t, ++ z0 = svmul_f16_z (p0, z1, z2), ++ z0 = svmul_z (p0, z1, z2)) ++ ++/* ++** mul_h4_f16_z_tied1: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0\.h, p0/z, z0\.h ++** fmul z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mul_h4_f16_z_tied1, svfloat16_t, __fp16, ++ z0 = svmul_n_f16_z (p0, z0, d4), ++ z0 = svmul_z (p0, z0, d4)) ++ ++/* ++** mul_h4_f16_z_untied: ++** mov (z[0-9]+\.h), h4 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fmul z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** fmul z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (mul_h4_f16_z_untied, svfloat16_t, __fp16, ++ z0 = svmul_n_f16_z (p0, z1, d4), ++ z0 = svmul_z (p0, z1, d4)) ++ ++/* ++** mul_1_f16_z_tied1: ++** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? ++** movprfx z0\.h, p0/z, z0\.h ++** fmul z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_1_f16_z_tied1, svfloat16_t, ++ z0 = svmul_n_f16_z (p0, z0, 1), ++ z0 = svmul_z (p0, z0, 1)) ++ ++/* ++** mul_1_f16_z_untied: ++** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fmul z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** fmul z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_1_f16_z_untied, svfloat16_t, ++ z0 = svmul_n_f16_z (p0, z1, 1), ++ z0 = svmul_z (p0, z1, 1)) ++ ++/* ++** mul_0p5_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fmul z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_0p5_f16_z_tied1, svfloat16_t, ++ z0 = svmul_n_f16_z (p0, z0, 0.5), ++ z0 = svmul_z (p0, z0, 0.5)) ++ ++/* ++** mul_0p5_f16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** fmul z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_0p5_f16_z_untied, svfloat16_t, ++ z0 = svmul_n_f16_z (p0, z1, 0.5), ++ z0 = svmul_z (p0, z1, 0.5)) ++ ++/* ++** mul_2_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fmul z0\.h, p0/m, z0\.h, #2\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_f16_z_tied1, svfloat16_t, ++ z0 = svmul_n_f16_z (p0, z0, 2), ++ z0 = svmul_z (p0, z0, 2)) ++ ++/* ++** mul_2_f16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** fmul z0\.h, p0/m, z0\.h, #2\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_f16_z_untied, svfloat16_t, ++ z0 = svmul_n_f16_z (p0, z1, 2), ++ z0 = svmul_z (p0, z1, 2)) ++ ++/* ++** mul_f16_x_tied1: ++** fmul z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f16_x_tied1, svfloat16_t, ++ z0 = svmul_f16_x (p0, z0, z1), ++ z0 = svmul_x (p0, z0, z1)) ++ ++/* ++** mul_f16_x_tied2: ++** fmul z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f16_x_tied2, svfloat16_t, ++ z0 = svmul_f16_x (p0, z1, z0), ++ z0 = svmul_x (p0, z1, z0)) ++ ++/* ++** mul_f16_x_untied: ++** ( ++** movprfx z0, z1 ++** fmul z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0, z2 ++** fmul z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f16_x_untied, svfloat16_t, ++ z0 = svmul_f16_x (p0, z1, z2), ++ z0 = svmul_x (p0, z1, z2)) ++ ++/* ++** mul_h4_f16_x_tied1: ++** mov (z[0-9]+\.h), h4 ++** fmul z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mul_h4_f16_x_tied1, svfloat16_t, __fp16, ++ z0 = svmul_n_f16_x (p0, z0, d4), ++ z0 = svmul_x (p0, z0, d4)) ++ ++/* ++** mul_h4_f16_x_untied: ++** mov z0\.h, h4 ++** fmul z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZD (mul_h4_f16_x_untied, svfloat16_t, __fp16, ++ z0 = svmul_n_f16_x (p0, z1, d4), ++ z0 = svmul_x (p0, z1, d4)) ++ ++/* ++** mul_1_f16_x_tied1: ++** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? ++** fmul z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_1_f16_x_tied1, svfloat16_t, ++ z0 = svmul_n_f16_x (p0, z0, 1), ++ z0 = svmul_x (p0, z0, 1)) ++ ++/* ++** mul_1_f16_x_untied: ++** fmov z0\.h, #1\.0(?:e\+0)? ++** fmul z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mul_1_f16_x_untied, svfloat16_t, ++ z0 = svmul_n_f16_x (p0, z1, 1), ++ z0 = svmul_x (p0, z1, 1)) ++ ++/* ++** mul_0p5_f16_x_tied1: ++** fmul z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_0p5_f16_x_tied1, svfloat16_t, ++ z0 = svmul_n_f16_x (p0, z0, 0.5), ++ z0 = svmul_x (p0, z0, 0.5)) ++ ++/* ++** mul_0p5_f16_x_untied: ++** movprfx z0, z1 ++** fmul z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_0p5_f16_x_untied, svfloat16_t, ++ z0 = svmul_n_f16_x (p0, z1, 0.5), ++ z0 = svmul_x (p0, z1, 0.5)) ++ ++/* ++** mul_2_f16_x_tied1: ++** fmul z0\.h, p0/m, z0\.h, #2\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_f16_x_tied1, svfloat16_t, ++ z0 = svmul_n_f16_x (p0, z0, 2), ++ z0 = svmul_x (p0, z0, 2)) ++ ++/* ++** mul_2_f16_x_untied: ++** movprfx z0, z1 ++** fmul z0\.h, p0/m, z0\.h, #2\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_f16_x_untied, svfloat16_t, ++ z0 = svmul_n_f16_x (p0, z1, 2), ++ z0 = svmul_x (p0, z1, 2)) ++ ++/* ++** ptrue_mul_f16_x_tied1: ++** fmul z0\.h, (z0\.h, z1\.h|z1\.h, z0\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_f16_x_tied1, svfloat16_t, ++ z0 = svmul_f16_x (svptrue_b16 (), z0, z1), ++ z0 = svmul_x (svptrue_b16 (), z0, z1)) ++ ++/* ++** ptrue_mul_f16_x_tied2: ++** fmul z0\.h, (z0\.h, z1\.h|z1\.h, z0\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_f16_x_tied2, svfloat16_t, ++ z0 = svmul_f16_x (svptrue_b16 (), z1, z0), ++ z0 = svmul_x (svptrue_b16 (), z1, z0)) ++ ++/* ++** ptrue_mul_f16_x_untied: ++** fmul z0\.h, (z1\.h, z2\.h|z2\.h, z1\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_f16_x_untied, svfloat16_t, ++ z0 = svmul_f16_x (svptrue_b16 (), z1, z2), ++ z0 = svmul_x (svptrue_b16 (), z1, z2)) ++ ++/* ++** ptrue_mul_1_f16_x_tied1: ++** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? ++** fmul z0\.h, (z0\.h, \1|\1, z0\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_1_f16_x_tied1, svfloat16_t, ++ z0 = svmul_n_f16_x (svptrue_b16 (), z0, 1), ++ z0 = svmul_x (svptrue_b16 (), z0, 1)) ++ ++/* ++** ptrue_mul_1_f16_x_untied: ++** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? ++** fmul z0\.h, (z1\.h, \1|\1, z1\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_1_f16_x_untied, svfloat16_t, ++ z0 = svmul_n_f16_x (svptrue_b16 (), z1, 1), ++ z0 = svmul_x (svptrue_b16 (), z1, 1)) ++ ++/* ++** ptrue_mul_0p5_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_0p5_f16_x_tied1, svfloat16_t, ++ z0 = svmul_n_f16_x (svptrue_b16 (), z0, 0.5), ++ z0 = svmul_x (svptrue_b16 (), z0, 0.5)) ++ ++/* ++** ptrue_mul_0p5_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_0p5_f16_x_untied, svfloat16_t, ++ z0 = svmul_n_f16_x (svptrue_b16 (), z1, 0.5), ++ z0 = svmul_x (svptrue_b16 (), z1, 0.5)) ++ ++/* ++** ptrue_mul_2_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_2_f16_x_tied1, svfloat16_t, ++ z0 = svmul_n_f16_x (svptrue_b16 (), z0, 2), ++ z0 = svmul_x (svptrue_b16 (), z0, 2)) ++ ++/* ++** ptrue_mul_2_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_2_f16_x_untied, svfloat16_t, ++ z0 = svmul_n_f16_x (svptrue_b16 (), z1, 2), ++ z0 = svmul_x (svptrue_b16 (), z1, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_f16_notrap.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_f16_notrap.c +new file mode 100644 +index 000000000..481fe999c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_f16_notrap.c +@@ -0,0 +1,439 @@ ++/* { dg-additional-options "-fno-trapping-math" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mul_f16_m_tied1: ++** fmul z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f16_m_tied1, svfloat16_t, ++ z0 = svmul_f16_m (p0, z0, z1), ++ z0 = svmul_m (p0, z0, z1)) ++ ++/* ++** mul_f16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fmul z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f16_m_tied2, svfloat16_t, ++ z0 = svmul_f16_m (p0, z1, z0), ++ z0 = svmul_m (p0, z1, z0)) ++ ++/* ++** mul_f16_m_untied: ++** movprfx z0, z1 ++** fmul z0\.h, p0/m, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f16_m_untied, svfloat16_t, ++ z0 = svmul_f16_m (p0, z1, z2), ++ z0 = svmul_m (p0, z1, z2)) ++ ++/* ++** mul_h4_f16_m_tied1: ++** mov (z[0-9]+\.h), h4 ++** fmul z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mul_h4_f16_m_tied1, svfloat16_t, __fp16, ++ z0 = svmul_n_f16_m (p0, z0, d4), ++ z0 = svmul_m (p0, z0, d4)) ++ ++/* ++** mul_h4_f16_m_untied: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0, z1 ++** fmul z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mul_h4_f16_m_untied, svfloat16_t, __fp16, ++ z0 = svmul_n_f16_m (p0, z1, d4), ++ z0 = svmul_m (p0, z1, d4)) ++ ++/* ++** mul_1_f16_m_tied1: ++** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? ++** fmul z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_1_f16_m_tied1, svfloat16_t, ++ z0 = svmul_n_f16_m (p0, z0, 1), ++ z0 = svmul_m (p0, z0, 1)) ++ ++/* ++** mul_1_f16_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? ++** movprfx z0, z1 ++** fmul z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_1_f16_m_untied, svfloat16_t, ++ z0 = svmul_n_f16_m (p0, z1, 1), ++ z0 = svmul_m (p0, z1, 1)) ++ ++/* ++** mul_0p5_f16_m_tied1: ++** fmul z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_0p5_f16_m_tied1, svfloat16_t, ++ z0 = svmul_n_f16_m (p0, z0, 0.5), ++ z0 = svmul_m (p0, z0, 0.5)) ++ ++/* ++** mul_0p5_f16_m_untied: ++** movprfx z0, z1 ++** fmul z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_0p5_f16_m_untied, svfloat16_t, ++ z0 = svmul_n_f16_m (p0, z1, 0.5), ++ z0 = svmul_m (p0, z1, 0.5)) ++ ++/* ++** mul_2_f16_m_tied1: ++** fmul z0\.h, p0/m, z0\.h, #2\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_f16_m_tied1, svfloat16_t, ++ z0 = svmul_n_f16_m (p0, z0, 2), ++ z0 = svmul_m (p0, z0, 2)) ++ ++/* ++** mul_2_f16_m_untied: ++** movprfx z0, z1 ++** fmul z0\.h, p0/m, z0\.h, #2\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_f16_m_untied, svfloat16_t, ++ z0 = svmul_n_f16_m (p0, z1, 2), ++ z0 = svmul_m (p0, z1, 2)) ++ ++/* ++** mul_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fmul z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f16_z_tied1, svfloat16_t, ++ z0 = svmul_f16_z (p0, z0, z1), ++ z0 = svmul_z (p0, z0, z1)) ++ ++/* ++** mul_f16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** fmul z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f16_z_tied2, svfloat16_t, ++ z0 = svmul_f16_z (p0, z1, z0), ++ z0 = svmul_z (p0, z1, z0)) ++ ++/* ++** mul_f16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fmul z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** fmul z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f16_z_untied, svfloat16_t, ++ z0 = svmul_f16_z (p0, z1, z2), ++ z0 = svmul_z (p0, z1, z2)) ++ ++/* ++** mul_h4_f16_z_tied1: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0\.h, p0/z, z0\.h ++** fmul z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mul_h4_f16_z_tied1, svfloat16_t, __fp16, ++ z0 = svmul_n_f16_z (p0, z0, d4), ++ z0 = svmul_z (p0, z0, d4)) ++ ++/* ++** mul_h4_f16_z_untied: ++** mov (z[0-9]+\.h), h4 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fmul z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** fmul z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (mul_h4_f16_z_untied, svfloat16_t, __fp16, ++ z0 = svmul_n_f16_z (p0, z1, d4), ++ z0 = svmul_z (p0, z1, d4)) ++ ++/* ++** mul_1_f16_z_tied1: ++** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? ++** movprfx z0\.h, p0/z, z0\.h ++** fmul z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_1_f16_z_tied1, svfloat16_t, ++ z0 = svmul_n_f16_z (p0, z0, 1), ++ z0 = svmul_z (p0, z0, 1)) ++ ++/* ++** mul_1_f16_z_untied: ++** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fmul z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** fmul z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_1_f16_z_untied, svfloat16_t, ++ z0 = svmul_n_f16_z (p0, z1, 1), ++ z0 = svmul_z (p0, z1, 1)) ++ ++/* ++** mul_0p5_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fmul z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_0p5_f16_z_tied1, svfloat16_t, ++ z0 = svmul_n_f16_z (p0, z0, 0.5), ++ z0 = svmul_z (p0, z0, 0.5)) ++ ++/* ++** mul_0p5_f16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** fmul z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_0p5_f16_z_untied, svfloat16_t, ++ z0 = svmul_n_f16_z (p0, z1, 0.5), ++ z0 = svmul_z (p0, z1, 0.5)) ++ ++/* ++** mul_2_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fmul z0\.h, p0/m, z0\.h, #2\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_f16_z_tied1, svfloat16_t, ++ z0 = svmul_n_f16_z (p0, z0, 2), ++ z0 = svmul_z (p0, z0, 2)) ++ ++/* ++** mul_2_f16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** fmul z0\.h, p0/m, z0\.h, #2\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_f16_z_untied, svfloat16_t, ++ z0 = svmul_n_f16_z (p0, z1, 2), ++ z0 = svmul_z (p0, z1, 2)) ++ ++/* ++** mul_f16_x_tied1: ++** fmul z0\.h, (z0\.h, z1\.h|z1\.h, z0\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f16_x_tied1, svfloat16_t, ++ z0 = svmul_f16_x (p0, z0, z1), ++ z0 = svmul_x (p0, z0, z1)) ++ ++/* ++** mul_f16_x_tied2: ++** fmul z0\.h, (z0\.h, z1\.h|z1\.h, z0\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f16_x_tied2, svfloat16_t, ++ z0 = svmul_f16_x (p0, z1, z0), ++ z0 = svmul_x (p0, z1, z0)) ++ ++/* ++** mul_f16_x_untied: ++** fmul z0\.h, (z1\.h, z2\.h|z2\.h, z1\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f16_x_untied, svfloat16_t, ++ z0 = svmul_f16_x (p0, z1, z2), ++ z0 = svmul_x (p0, z1, z2)) ++ ++/* ++** mul_h4_f16_x_tied1: ++** mov (z[0-9]+\.h), h4 ++** fmul z0\.h, (z0\.h, \1|\1, z0\.h) ++** ret ++*/ ++TEST_UNIFORM_ZD (mul_h4_f16_x_tied1, svfloat16_t, __fp16, ++ z0 = svmul_n_f16_x (p0, z0, d4), ++ z0 = svmul_x (p0, z0, d4)) ++ ++/* ++** mul_h4_f16_x_untied: ++** mov (z[0-9]+\.h), h4 ++** fmul z0\.h, (z1\.h, \1|\1, z1\.h) ++** ret ++*/ ++TEST_UNIFORM_ZD (mul_h4_f16_x_untied, svfloat16_t, __fp16, ++ z0 = svmul_n_f16_x (p0, z1, d4), ++ z0 = svmul_x (p0, z1, d4)) ++ ++/* ++** mul_1_f16_x_tied1: ++** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? ++** fmul z0\.h, (z0\.h, \1|\1, z0\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_1_f16_x_tied1, svfloat16_t, ++ z0 = svmul_n_f16_x (p0, z0, 1), ++ z0 = svmul_x (p0, z0, 1)) ++ ++/* ++** mul_1_f16_x_untied: ++** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? ++** fmul z0\.h, (z1\.h, \1|\1, z1\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_1_f16_x_untied, svfloat16_t, ++ z0 = svmul_n_f16_x (p0, z1, 1), ++ z0 = svmul_x (p0, z1, 1)) ++ ++/* ++** mul_0p5_f16_x_tied1: ++** fmul z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_0p5_f16_x_tied1, svfloat16_t, ++ z0 = svmul_n_f16_x (p0, z0, 0.5), ++ z0 = svmul_x (p0, z0, 0.5)) ++ ++/* ++** mul_0p5_f16_x_untied: ++** movprfx z0, z1 ++** fmul z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_0p5_f16_x_untied, svfloat16_t, ++ z0 = svmul_n_f16_x (p0, z1, 0.5), ++ z0 = svmul_x (p0, z1, 0.5)) ++ ++/* ++** mul_2_f16_x_tied1: ++** fmul z0\.h, p0/m, z0\.h, #2\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_f16_x_tied1, svfloat16_t, ++ z0 = svmul_n_f16_x (p0, z0, 2), ++ z0 = svmul_x (p0, z0, 2)) ++ ++/* ++** mul_2_f16_x_untied: ++** movprfx z0, z1 ++** fmul z0\.h, p0/m, z0\.h, #2\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_f16_x_untied, svfloat16_t, ++ z0 = svmul_n_f16_x (p0, z1, 2), ++ z0 = svmul_x (p0, z1, 2)) ++ ++/* ++** ptrue_mul_f16_x_tied1: ++** fmul z0\.h, (z0\.h, z1\.h|z1\.h, z0\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_f16_x_tied1, svfloat16_t, ++ z0 = svmul_f16_x (svptrue_b16 (), z0, z1), ++ z0 = svmul_x (svptrue_b16 (), z0, z1)) ++ ++/* ++** ptrue_mul_f16_x_tied2: ++** fmul z0\.h, (z0\.h, z1\.h|z1\.h, z0\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_f16_x_tied2, svfloat16_t, ++ z0 = svmul_f16_x (svptrue_b16 (), z1, z0), ++ z0 = svmul_x (svptrue_b16 (), z1, z0)) ++ ++/* ++** ptrue_mul_f16_x_untied: ++** fmul z0\.h, (z1\.h, z2\.h|z2\.h, z1\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_f16_x_untied, svfloat16_t, ++ z0 = svmul_f16_x (svptrue_b16 (), z1, z2), ++ z0 = svmul_x (svptrue_b16 (), z1, z2)) ++ ++/* ++** ptrue_mul_1_f16_x_tied1: ++** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? ++** fmul z0\.h, (z0\.h, \1|\1, z0\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_1_f16_x_tied1, svfloat16_t, ++ z0 = svmul_n_f16_x (svptrue_b16 (), z0, 1), ++ z0 = svmul_x (svptrue_b16 (), z0, 1)) ++ ++/* ++** ptrue_mul_1_f16_x_untied: ++** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? ++** fmul z0\.h, (z1\.h, \1|\1, z1\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_1_f16_x_untied, svfloat16_t, ++ z0 = svmul_n_f16_x (svptrue_b16 (), z1, 1), ++ z0 = svmul_x (svptrue_b16 (), z1, 1)) ++ ++/* ++** ptrue_mul_0p5_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_0p5_f16_x_tied1, svfloat16_t, ++ z0 = svmul_n_f16_x (svptrue_b16 (), z0, 0.5), ++ z0 = svmul_x (svptrue_b16 (), z0, 0.5)) ++ ++/* ++** ptrue_mul_0p5_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_0p5_f16_x_untied, svfloat16_t, ++ z0 = svmul_n_f16_x (svptrue_b16 (), z1, 0.5), ++ z0 = svmul_x (svptrue_b16 (), z1, 0.5)) ++ ++/* ++** ptrue_mul_2_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_2_f16_x_tied1, svfloat16_t, ++ z0 = svmul_n_f16_x (svptrue_b16 (), z0, 2), ++ z0 = svmul_x (svptrue_b16 (), z0, 2)) ++ ++/* ++** ptrue_mul_2_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_2_f16_x_untied, svfloat16_t, ++ z0 = svmul_n_f16_x (svptrue_b16 (), z1, 2), ++ z0 = svmul_x (svptrue_b16 (), z1, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_f32.c +new file mode 100644 +index 000000000..5b3df6fde +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_f32.c +@@ -0,0 +1,444 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mul_f32_m_tied1: ++** fmul z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f32_m_tied1, svfloat32_t, ++ z0 = svmul_f32_m (p0, z0, z1), ++ z0 = svmul_m (p0, z0, z1)) ++ ++/* ++** mul_f32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fmul z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f32_m_tied2, svfloat32_t, ++ z0 = svmul_f32_m (p0, z1, z0), ++ z0 = svmul_m (p0, z1, z0)) ++ ++/* ++** mul_f32_m_untied: ++** movprfx z0, z1 ++** fmul z0\.s, p0/m, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f32_m_untied, svfloat32_t, ++ z0 = svmul_f32_m (p0, z1, z2), ++ z0 = svmul_m (p0, z1, z2)) ++ ++/* ++** mul_s4_f32_m_tied1: ++** mov (z[0-9]+\.s), s4 ++** fmul z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mul_s4_f32_m_tied1, svfloat32_t, float, ++ z0 = svmul_n_f32_m (p0, z0, d4), ++ z0 = svmul_m (p0, z0, d4)) ++ ++/* ++** mul_s4_f32_m_untied: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0, z1 ++** fmul z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mul_s4_f32_m_untied, svfloat32_t, float, ++ z0 = svmul_n_f32_m (p0, z1, d4), ++ z0 = svmul_m (p0, z1, d4)) ++ ++/* ++** mul_1_f32_m_tied1: ++** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? ++** fmul z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_1_f32_m_tied1, svfloat32_t, ++ z0 = svmul_n_f32_m (p0, z0, 1), ++ z0 = svmul_m (p0, z0, 1)) ++ ++/* ++** mul_1_f32_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? ++** movprfx z0, z1 ++** fmul z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_1_f32_m_untied, svfloat32_t, ++ z0 = svmul_n_f32_m (p0, z1, 1), ++ z0 = svmul_m (p0, z1, 1)) ++ ++/* ++** mul_0p5_f32_m_tied1: ++** fmul z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_0p5_f32_m_tied1, svfloat32_t, ++ z0 = svmul_n_f32_m (p0, z0, 0.5), ++ z0 = svmul_m (p0, z0, 0.5)) ++ ++/* ++** mul_0p5_f32_m_untied: ++** movprfx z0, z1 ++** fmul z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_0p5_f32_m_untied, svfloat32_t, ++ z0 = svmul_n_f32_m (p0, z1, 0.5), ++ z0 = svmul_m (p0, z1, 0.5)) ++ ++/* ++** mul_2_f32_m_tied1: ++** fmul z0\.s, p0/m, z0\.s, #2\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_f32_m_tied1, svfloat32_t, ++ z0 = svmul_n_f32_m (p0, z0, 2), ++ z0 = svmul_m (p0, z0, 2)) ++ ++/* ++** mul_2_f32_m_untied: ++** movprfx z0, z1 ++** fmul z0\.s, p0/m, z0\.s, #2\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_f32_m_untied, svfloat32_t, ++ z0 = svmul_n_f32_m (p0, z1, 2), ++ z0 = svmul_m (p0, z1, 2)) ++ ++/* ++** mul_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fmul z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f32_z_tied1, svfloat32_t, ++ z0 = svmul_f32_z (p0, z0, z1), ++ z0 = svmul_z (p0, z0, z1)) ++ ++/* ++** mul_f32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** fmul z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f32_z_tied2, svfloat32_t, ++ z0 = svmul_f32_z (p0, z1, z0), ++ z0 = svmul_z (p0, z1, z0)) ++ ++/* ++** mul_f32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fmul z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** fmul z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f32_z_untied, svfloat32_t, ++ z0 = svmul_f32_z (p0, z1, z2), ++ z0 = svmul_z (p0, z1, z2)) ++ ++/* ++** mul_s4_f32_z_tied1: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0\.s, p0/z, z0\.s ++** fmul z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mul_s4_f32_z_tied1, svfloat32_t, float, ++ z0 = svmul_n_f32_z (p0, z0, d4), ++ z0 = svmul_z (p0, z0, d4)) ++ ++/* ++** mul_s4_f32_z_untied: ++** mov (z[0-9]+\.s), s4 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fmul z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** fmul z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (mul_s4_f32_z_untied, svfloat32_t, float, ++ z0 = svmul_n_f32_z (p0, z1, d4), ++ z0 = svmul_z (p0, z1, d4)) ++ ++/* ++** mul_1_f32_z_tied1: ++** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? ++** movprfx z0\.s, p0/z, z0\.s ++** fmul z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_1_f32_z_tied1, svfloat32_t, ++ z0 = svmul_n_f32_z (p0, z0, 1), ++ z0 = svmul_z (p0, z0, 1)) ++ ++/* ++** mul_1_f32_z_untied: ++** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fmul z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** fmul z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_1_f32_z_untied, svfloat32_t, ++ z0 = svmul_n_f32_z (p0, z1, 1), ++ z0 = svmul_z (p0, z1, 1)) ++ ++/* ++** mul_0p5_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fmul z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_0p5_f32_z_tied1, svfloat32_t, ++ z0 = svmul_n_f32_z (p0, z0, 0.5), ++ z0 = svmul_z (p0, z0, 0.5)) ++ ++/* ++** mul_0p5_f32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** fmul z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_0p5_f32_z_untied, svfloat32_t, ++ z0 = svmul_n_f32_z (p0, z1, 0.5), ++ z0 = svmul_z (p0, z1, 0.5)) ++ ++/* ++** mul_2_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fmul z0\.s, p0/m, z0\.s, #2\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_f32_z_tied1, svfloat32_t, ++ z0 = svmul_n_f32_z (p0, z0, 2), ++ z0 = svmul_z (p0, z0, 2)) ++ ++/* ++** mul_2_f32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** fmul z0\.s, p0/m, z0\.s, #2\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_f32_z_untied, svfloat32_t, ++ z0 = svmul_n_f32_z (p0, z1, 2), ++ z0 = svmul_z (p0, z1, 2)) ++ ++/* ++** mul_f32_x_tied1: ++** fmul z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f32_x_tied1, svfloat32_t, ++ z0 = svmul_f32_x (p0, z0, z1), ++ z0 = svmul_x (p0, z0, z1)) ++ ++/* ++** mul_f32_x_tied2: ++** fmul z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f32_x_tied2, svfloat32_t, ++ z0 = svmul_f32_x (p0, z1, z0), ++ z0 = svmul_x (p0, z1, z0)) ++ ++/* ++** mul_f32_x_untied: ++** ( ++** movprfx z0, z1 ++** fmul z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0, z2 ++** fmul z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f32_x_untied, svfloat32_t, ++ z0 = svmul_f32_x (p0, z1, z2), ++ z0 = svmul_x (p0, z1, z2)) ++ ++/* ++** mul_s4_f32_x_tied1: ++** mov (z[0-9]+\.s), s4 ++** fmul z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mul_s4_f32_x_tied1, svfloat32_t, float, ++ z0 = svmul_n_f32_x (p0, z0, d4), ++ z0 = svmul_x (p0, z0, d4)) ++ ++/* ++** mul_s4_f32_x_untied: ++** mov z0\.s, s4 ++** fmul z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZD (mul_s4_f32_x_untied, svfloat32_t, float, ++ z0 = svmul_n_f32_x (p0, z1, d4), ++ z0 = svmul_x (p0, z1, d4)) ++ ++/* ++** mul_1_f32_x_tied1: ++** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? ++** fmul z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_1_f32_x_tied1, svfloat32_t, ++ z0 = svmul_n_f32_x (p0, z0, 1), ++ z0 = svmul_x (p0, z0, 1)) ++ ++/* ++** mul_1_f32_x_untied: ++** fmov z0\.s, #1\.0(?:e\+0)? ++** fmul z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mul_1_f32_x_untied, svfloat32_t, ++ z0 = svmul_n_f32_x (p0, z1, 1), ++ z0 = svmul_x (p0, z1, 1)) ++ ++/* ++** mul_0p5_f32_x_tied1: ++** fmul z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_0p5_f32_x_tied1, svfloat32_t, ++ z0 = svmul_n_f32_x (p0, z0, 0.5), ++ z0 = svmul_x (p0, z0, 0.5)) ++ ++/* ++** mul_0p5_f32_x_untied: ++** movprfx z0, z1 ++** fmul z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_0p5_f32_x_untied, svfloat32_t, ++ z0 = svmul_n_f32_x (p0, z1, 0.5), ++ z0 = svmul_x (p0, z1, 0.5)) ++ ++/* ++** mul_2_f32_x_tied1: ++** fmul z0\.s, p0/m, z0\.s, #2\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_f32_x_tied1, svfloat32_t, ++ z0 = svmul_n_f32_x (p0, z0, 2), ++ z0 = svmul_x (p0, z0, 2)) ++ ++/* ++** mul_2_f32_x_untied: ++** movprfx z0, z1 ++** fmul z0\.s, p0/m, z0\.s, #2\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_f32_x_untied, svfloat32_t, ++ z0 = svmul_n_f32_x (p0, z1, 2), ++ z0 = svmul_x (p0, z1, 2)) ++ ++/* ++** ptrue_mul_f32_x_tied1: ++** fmul z0\.s, (z0\.s, z1\.s|z1\.s, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_f32_x_tied1, svfloat32_t, ++ z0 = svmul_f32_x (svptrue_b32 (), z0, z1), ++ z0 = svmul_x (svptrue_b32 (), z0, z1)) ++ ++/* ++** ptrue_mul_f32_x_tied2: ++** fmul z0\.s, (z0\.s, z1\.s|z1\.s, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_f32_x_tied2, svfloat32_t, ++ z0 = svmul_f32_x (svptrue_b32 (), z1, z0), ++ z0 = svmul_x (svptrue_b32 (), z1, z0)) ++ ++/* ++** ptrue_mul_f32_x_untied: ++** fmul z0\.s, (z1\.s, z2\.s|z2\.s, z1\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_f32_x_untied, svfloat32_t, ++ z0 = svmul_f32_x (svptrue_b32 (), z1, z2), ++ z0 = svmul_x (svptrue_b32 (), z1, z2)) ++ ++/* ++** ptrue_mul_1_f32_x_tied1: ++** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? ++** fmul z0\.s, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_1_f32_x_tied1, svfloat32_t, ++ z0 = svmul_n_f32_x (svptrue_b32 (), z0, 1), ++ z0 = svmul_x (svptrue_b32 (), z0, 1)) ++ ++/* ++** ptrue_mul_1_f32_x_untied: ++** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? ++** fmul z0\.s, (z1\.s, \1|\1, z1\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_1_f32_x_untied, svfloat32_t, ++ z0 = svmul_n_f32_x (svptrue_b32 (), z1, 1), ++ z0 = svmul_x (svptrue_b32 (), z1, 1)) ++ ++/* ++** ptrue_mul_0p5_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_0p5_f32_x_tied1, svfloat32_t, ++ z0 = svmul_n_f32_x (svptrue_b32 (), z0, 0.5), ++ z0 = svmul_x (svptrue_b32 (), z0, 0.5)) ++ ++/* ++** ptrue_mul_0p5_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_0p5_f32_x_untied, svfloat32_t, ++ z0 = svmul_n_f32_x (svptrue_b32 (), z1, 0.5), ++ z0 = svmul_x (svptrue_b32 (), z1, 0.5)) ++ ++/* ++** ptrue_mul_2_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_2_f32_x_tied1, svfloat32_t, ++ z0 = svmul_n_f32_x (svptrue_b32 (), z0, 2), ++ z0 = svmul_x (svptrue_b32 (), z0, 2)) ++ ++/* ++** ptrue_mul_2_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_2_f32_x_untied, svfloat32_t, ++ z0 = svmul_n_f32_x (svptrue_b32 (), z1, 2), ++ z0 = svmul_x (svptrue_b32 (), z1, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_f32_notrap.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_f32_notrap.c +new file mode 100644 +index 000000000..eb2d240ef +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_f32_notrap.c +@@ -0,0 +1,439 @@ ++/* { dg-additional-options "-fno-trapping-math" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mul_f32_m_tied1: ++** fmul z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f32_m_tied1, svfloat32_t, ++ z0 = svmul_f32_m (p0, z0, z1), ++ z0 = svmul_m (p0, z0, z1)) ++ ++/* ++** mul_f32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fmul z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f32_m_tied2, svfloat32_t, ++ z0 = svmul_f32_m (p0, z1, z0), ++ z0 = svmul_m (p0, z1, z0)) ++ ++/* ++** mul_f32_m_untied: ++** movprfx z0, z1 ++** fmul z0\.s, p0/m, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f32_m_untied, svfloat32_t, ++ z0 = svmul_f32_m (p0, z1, z2), ++ z0 = svmul_m (p0, z1, z2)) ++ ++/* ++** mul_s4_f32_m_tied1: ++** mov (z[0-9]+\.s), s4 ++** fmul z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mul_s4_f32_m_tied1, svfloat32_t, float, ++ z0 = svmul_n_f32_m (p0, z0, d4), ++ z0 = svmul_m (p0, z0, d4)) ++ ++/* ++** mul_s4_f32_m_untied: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0, z1 ++** fmul z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mul_s4_f32_m_untied, svfloat32_t, float, ++ z0 = svmul_n_f32_m (p0, z1, d4), ++ z0 = svmul_m (p0, z1, d4)) ++ ++/* ++** mul_1_f32_m_tied1: ++** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? ++** fmul z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_1_f32_m_tied1, svfloat32_t, ++ z0 = svmul_n_f32_m (p0, z0, 1), ++ z0 = svmul_m (p0, z0, 1)) ++ ++/* ++** mul_1_f32_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? ++** movprfx z0, z1 ++** fmul z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_1_f32_m_untied, svfloat32_t, ++ z0 = svmul_n_f32_m (p0, z1, 1), ++ z0 = svmul_m (p0, z1, 1)) ++ ++/* ++** mul_0p5_f32_m_tied1: ++** fmul z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_0p5_f32_m_tied1, svfloat32_t, ++ z0 = svmul_n_f32_m (p0, z0, 0.5), ++ z0 = svmul_m (p0, z0, 0.5)) ++ ++/* ++** mul_0p5_f32_m_untied: ++** movprfx z0, z1 ++** fmul z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_0p5_f32_m_untied, svfloat32_t, ++ z0 = svmul_n_f32_m (p0, z1, 0.5), ++ z0 = svmul_m (p0, z1, 0.5)) ++ ++/* ++** mul_2_f32_m_tied1: ++** fmul z0\.s, p0/m, z0\.s, #2\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_f32_m_tied1, svfloat32_t, ++ z0 = svmul_n_f32_m (p0, z0, 2), ++ z0 = svmul_m (p0, z0, 2)) ++ ++/* ++** mul_2_f32_m_untied: ++** movprfx z0, z1 ++** fmul z0\.s, p0/m, z0\.s, #2\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_f32_m_untied, svfloat32_t, ++ z0 = svmul_n_f32_m (p0, z1, 2), ++ z0 = svmul_m (p0, z1, 2)) ++ ++/* ++** mul_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fmul z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f32_z_tied1, svfloat32_t, ++ z0 = svmul_f32_z (p0, z0, z1), ++ z0 = svmul_z (p0, z0, z1)) ++ ++/* ++** mul_f32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** fmul z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f32_z_tied2, svfloat32_t, ++ z0 = svmul_f32_z (p0, z1, z0), ++ z0 = svmul_z (p0, z1, z0)) ++ ++/* ++** mul_f32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fmul z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** fmul z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f32_z_untied, svfloat32_t, ++ z0 = svmul_f32_z (p0, z1, z2), ++ z0 = svmul_z (p0, z1, z2)) ++ ++/* ++** mul_s4_f32_z_tied1: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0\.s, p0/z, z0\.s ++** fmul z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mul_s4_f32_z_tied1, svfloat32_t, float, ++ z0 = svmul_n_f32_z (p0, z0, d4), ++ z0 = svmul_z (p0, z0, d4)) ++ ++/* ++** mul_s4_f32_z_untied: ++** mov (z[0-9]+\.s), s4 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fmul z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** fmul z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (mul_s4_f32_z_untied, svfloat32_t, float, ++ z0 = svmul_n_f32_z (p0, z1, d4), ++ z0 = svmul_z (p0, z1, d4)) ++ ++/* ++** mul_1_f32_z_tied1: ++** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? ++** movprfx z0\.s, p0/z, z0\.s ++** fmul z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_1_f32_z_tied1, svfloat32_t, ++ z0 = svmul_n_f32_z (p0, z0, 1), ++ z0 = svmul_z (p0, z0, 1)) ++ ++/* ++** mul_1_f32_z_untied: ++** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fmul z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** fmul z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_1_f32_z_untied, svfloat32_t, ++ z0 = svmul_n_f32_z (p0, z1, 1), ++ z0 = svmul_z (p0, z1, 1)) ++ ++/* ++** mul_0p5_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fmul z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_0p5_f32_z_tied1, svfloat32_t, ++ z0 = svmul_n_f32_z (p0, z0, 0.5), ++ z0 = svmul_z (p0, z0, 0.5)) ++ ++/* ++** mul_0p5_f32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** fmul z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_0p5_f32_z_untied, svfloat32_t, ++ z0 = svmul_n_f32_z (p0, z1, 0.5), ++ z0 = svmul_z (p0, z1, 0.5)) ++ ++/* ++** mul_2_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fmul z0\.s, p0/m, z0\.s, #2\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_f32_z_tied1, svfloat32_t, ++ z0 = svmul_n_f32_z (p0, z0, 2), ++ z0 = svmul_z (p0, z0, 2)) ++ ++/* ++** mul_2_f32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** fmul z0\.s, p0/m, z0\.s, #2\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_f32_z_untied, svfloat32_t, ++ z0 = svmul_n_f32_z (p0, z1, 2), ++ z0 = svmul_z (p0, z1, 2)) ++ ++/* ++** mul_f32_x_tied1: ++** fmul z0\.s, (z0\.s, z1\.s|z1\.s, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f32_x_tied1, svfloat32_t, ++ z0 = svmul_f32_x (p0, z0, z1), ++ z0 = svmul_x (p0, z0, z1)) ++ ++/* ++** mul_f32_x_tied2: ++** fmul z0\.s, (z0\.s, z1\.s|z1\.s, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f32_x_tied2, svfloat32_t, ++ z0 = svmul_f32_x (p0, z1, z0), ++ z0 = svmul_x (p0, z1, z0)) ++ ++/* ++** mul_f32_x_untied: ++** fmul z0\.s, (z1\.s, z2\.s|z2\.s, z1\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f32_x_untied, svfloat32_t, ++ z0 = svmul_f32_x (p0, z1, z2), ++ z0 = svmul_x (p0, z1, z2)) ++ ++/* ++** mul_s4_f32_x_tied1: ++** mov (z[0-9]+\.s), s4 ++** fmul z0\.s, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_ZD (mul_s4_f32_x_tied1, svfloat32_t, float, ++ z0 = svmul_n_f32_x (p0, z0, d4), ++ z0 = svmul_x (p0, z0, d4)) ++ ++/* ++** mul_s4_f32_x_untied: ++** mov (z[0-9]+\.s), s4 ++** fmul z0\.s, (z1\.s, \1|\1, z1\.s) ++** ret ++*/ ++TEST_UNIFORM_ZD (mul_s4_f32_x_untied, svfloat32_t, float, ++ z0 = svmul_n_f32_x (p0, z1, d4), ++ z0 = svmul_x (p0, z1, d4)) ++ ++/* ++** mul_1_f32_x_tied1: ++** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? ++** fmul z0\.s, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_1_f32_x_tied1, svfloat32_t, ++ z0 = svmul_n_f32_x (p0, z0, 1), ++ z0 = svmul_x (p0, z0, 1)) ++ ++/* ++** mul_1_f32_x_untied: ++** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? ++** fmul z0\.s, (z1\.s, \1|\1, z1\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_1_f32_x_untied, svfloat32_t, ++ z0 = svmul_n_f32_x (p0, z1, 1), ++ z0 = svmul_x (p0, z1, 1)) ++ ++/* ++** mul_0p5_f32_x_tied1: ++** fmul z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_0p5_f32_x_tied1, svfloat32_t, ++ z0 = svmul_n_f32_x (p0, z0, 0.5), ++ z0 = svmul_x (p0, z0, 0.5)) ++ ++/* ++** mul_0p5_f32_x_untied: ++** movprfx z0, z1 ++** fmul z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_0p5_f32_x_untied, svfloat32_t, ++ z0 = svmul_n_f32_x (p0, z1, 0.5), ++ z0 = svmul_x (p0, z1, 0.5)) ++ ++/* ++** mul_2_f32_x_tied1: ++** fmul z0\.s, p0/m, z0\.s, #2\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_f32_x_tied1, svfloat32_t, ++ z0 = svmul_n_f32_x (p0, z0, 2), ++ z0 = svmul_x (p0, z0, 2)) ++ ++/* ++** mul_2_f32_x_untied: ++** movprfx z0, z1 ++** fmul z0\.s, p0/m, z0\.s, #2\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_f32_x_untied, svfloat32_t, ++ z0 = svmul_n_f32_x (p0, z1, 2), ++ z0 = svmul_x (p0, z1, 2)) ++ ++/* ++** ptrue_mul_f32_x_tied1: ++** fmul z0\.s, (z0\.s, z1\.s|z1\.s, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_f32_x_tied1, svfloat32_t, ++ z0 = svmul_f32_x (svptrue_b32 (), z0, z1), ++ z0 = svmul_x (svptrue_b32 (), z0, z1)) ++ ++/* ++** ptrue_mul_f32_x_tied2: ++** fmul z0\.s, (z0\.s, z1\.s|z1\.s, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_f32_x_tied2, svfloat32_t, ++ z0 = svmul_f32_x (svptrue_b32 (), z1, z0), ++ z0 = svmul_x (svptrue_b32 (), z1, z0)) ++ ++/* ++** ptrue_mul_f32_x_untied: ++** fmul z0\.s, (z1\.s, z2\.s|z2\.s, z1\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_f32_x_untied, svfloat32_t, ++ z0 = svmul_f32_x (svptrue_b32 (), z1, z2), ++ z0 = svmul_x (svptrue_b32 (), z1, z2)) ++ ++/* ++** ptrue_mul_1_f32_x_tied1: ++** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? ++** fmul z0\.s, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_1_f32_x_tied1, svfloat32_t, ++ z0 = svmul_n_f32_x (svptrue_b32 (), z0, 1), ++ z0 = svmul_x (svptrue_b32 (), z0, 1)) ++ ++/* ++** ptrue_mul_1_f32_x_untied: ++** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? ++** fmul z0\.s, (z1\.s, \1|\1, z1\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_1_f32_x_untied, svfloat32_t, ++ z0 = svmul_n_f32_x (svptrue_b32 (), z1, 1), ++ z0 = svmul_x (svptrue_b32 (), z1, 1)) ++ ++/* ++** ptrue_mul_0p5_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_0p5_f32_x_tied1, svfloat32_t, ++ z0 = svmul_n_f32_x (svptrue_b32 (), z0, 0.5), ++ z0 = svmul_x (svptrue_b32 (), z0, 0.5)) ++ ++/* ++** ptrue_mul_0p5_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_0p5_f32_x_untied, svfloat32_t, ++ z0 = svmul_n_f32_x (svptrue_b32 (), z1, 0.5), ++ z0 = svmul_x (svptrue_b32 (), z1, 0.5)) ++ ++/* ++** ptrue_mul_2_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_2_f32_x_tied1, svfloat32_t, ++ z0 = svmul_n_f32_x (svptrue_b32 (), z0, 2), ++ z0 = svmul_x (svptrue_b32 (), z0, 2)) ++ ++/* ++** ptrue_mul_2_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_2_f32_x_untied, svfloat32_t, ++ z0 = svmul_n_f32_x (svptrue_b32 (), z1, 2), ++ z0 = svmul_x (svptrue_b32 (), z1, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_f64.c +new file mode 100644 +index 000000000..f5654a9f1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_f64.c +@@ -0,0 +1,444 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mul_f64_m_tied1: ++** fmul z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f64_m_tied1, svfloat64_t, ++ z0 = svmul_f64_m (p0, z0, z1), ++ z0 = svmul_m (p0, z0, z1)) ++ ++/* ++** mul_f64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fmul z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f64_m_tied2, svfloat64_t, ++ z0 = svmul_f64_m (p0, z1, z0), ++ z0 = svmul_m (p0, z1, z0)) ++ ++/* ++** mul_f64_m_untied: ++** movprfx z0, z1 ++** fmul z0\.d, p0/m, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f64_m_untied, svfloat64_t, ++ z0 = svmul_f64_m (p0, z1, z2), ++ z0 = svmul_m (p0, z1, z2)) ++ ++/* ++** mul_d4_f64_m_tied1: ++** mov (z[0-9]+\.d), d4 ++** fmul z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mul_d4_f64_m_tied1, svfloat64_t, double, ++ z0 = svmul_n_f64_m (p0, z0, d4), ++ z0 = svmul_m (p0, z0, d4)) ++ ++/* ++** mul_d4_f64_m_untied: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0, z1 ++** fmul z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mul_d4_f64_m_untied, svfloat64_t, double, ++ z0 = svmul_n_f64_m (p0, z1, d4), ++ z0 = svmul_m (p0, z1, d4)) ++ ++/* ++** mul_1_f64_m_tied1: ++** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? ++** fmul z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_1_f64_m_tied1, svfloat64_t, ++ z0 = svmul_n_f64_m (p0, z0, 1), ++ z0 = svmul_m (p0, z0, 1)) ++ ++/* ++** mul_1_f64_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? ++** movprfx z0, z1 ++** fmul z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_1_f64_m_untied, svfloat64_t, ++ z0 = svmul_n_f64_m (p0, z1, 1), ++ z0 = svmul_m (p0, z1, 1)) ++ ++/* ++** mul_0p5_f64_m_tied1: ++** fmul z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_0p5_f64_m_tied1, svfloat64_t, ++ z0 = svmul_n_f64_m (p0, z0, 0.5), ++ z0 = svmul_m (p0, z0, 0.5)) ++ ++/* ++** mul_0p5_f64_m_untied: ++** movprfx z0, z1 ++** fmul z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_0p5_f64_m_untied, svfloat64_t, ++ z0 = svmul_n_f64_m (p0, z1, 0.5), ++ z0 = svmul_m (p0, z1, 0.5)) ++ ++/* ++** mul_2_f64_m_tied1: ++** fmul z0\.d, p0/m, z0\.d, #2\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_f64_m_tied1, svfloat64_t, ++ z0 = svmul_n_f64_m (p0, z0, 2), ++ z0 = svmul_m (p0, z0, 2)) ++ ++/* ++** mul_2_f64_m_untied: ++** movprfx z0, z1 ++** fmul z0\.d, p0/m, z0\.d, #2\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_f64_m_untied, svfloat64_t, ++ z0 = svmul_n_f64_m (p0, z1, 2), ++ z0 = svmul_m (p0, z1, 2)) ++ ++/* ++** mul_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fmul z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f64_z_tied1, svfloat64_t, ++ z0 = svmul_f64_z (p0, z0, z1), ++ z0 = svmul_z (p0, z0, z1)) ++ ++/* ++** mul_f64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** fmul z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f64_z_tied2, svfloat64_t, ++ z0 = svmul_f64_z (p0, z1, z0), ++ z0 = svmul_z (p0, z1, z0)) ++ ++/* ++** mul_f64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fmul z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** fmul z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f64_z_untied, svfloat64_t, ++ z0 = svmul_f64_z (p0, z1, z2), ++ z0 = svmul_z (p0, z1, z2)) ++ ++/* ++** mul_d4_f64_z_tied1: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0\.d, p0/z, z0\.d ++** fmul z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mul_d4_f64_z_tied1, svfloat64_t, double, ++ z0 = svmul_n_f64_z (p0, z0, d4), ++ z0 = svmul_z (p0, z0, d4)) ++ ++/* ++** mul_d4_f64_z_untied: ++** mov (z[0-9]+\.d), d4 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fmul z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** fmul z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (mul_d4_f64_z_untied, svfloat64_t, double, ++ z0 = svmul_n_f64_z (p0, z1, d4), ++ z0 = svmul_z (p0, z1, d4)) ++ ++/* ++** mul_1_f64_z_tied1: ++** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? ++** movprfx z0\.d, p0/z, z0\.d ++** fmul z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_1_f64_z_tied1, svfloat64_t, ++ z0 = svmul_n_f64_z (p0, z0, 1), ++ z0 = svmul_z (p0, z0, 1)) ++ ++/* ++** mul_1_f64_z_untied: ++** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fmul z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** fmul z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_1_f64_z_untied, svfloat64_t, ++ z0 = svmul_n_f64_z (p0, z1, 1), ++ z0 = svmul_z (p0, z1, 1)) ++ ++/* ++** mul_0p5_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fmul z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_0p5_f64_z_tied1, svfloat64_t, ++ z0 = svmul_n_f64_z (p0, z0, 0.5), ++ z0 = svmul_z (p0, z0, 0.5)) ++ ++/* ++** mul_0p5_f64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** fmul z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_0p5_f64_z_untied, svfloat64_t, ++ z0 = svmul_n_f64_z (p0, z1, 0.5), ++ z0 = svmul_z (p0, z1, 0.5)) ++ ++/* ++** mul_2_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fmul z0\.d, p0/m, z0\.d, #2\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_f64_z_tied1, svfloat64_t, ++ z0 = svmul_n_f64_z (p0, z0, 2), ++ z0 = svmul_z (p0, z0, 2)) ++ ++/* ++** mul_2_f64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** fmul z0\.d, p0/m, z0\.d, #2\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_f64_z_untied, svfloat64_t, ++ z0 = svmul_n_f64_z (p0, z1, 2), ++ z0 = svmul_z (p0, z1, 2)) ++ ++/* ++** mul_f64_x_tied1: ++** fmul z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f64_x_tied1, svfloat64_t, ++ z0 = svmul_f64_x (p0, z0, z1), ++ z0 = svmul_x (p0, z0, z1)) ++ ++/* ++** mul_f64_x_tied2: ++** fmul z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f64_x_tied2, svfloat64_t, ++ z0 = svmul_f64_x (p0, z1, z0), ++ z0 = svmul_x (p0, z1, z0)) ++ ++/* ++** mul_f64_x_untied: ++** ( ++** movprfx z0, z1 ++** fmul z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0, z2 ++** fmul z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f64_x_untied, svfloat64_t, ++ z0 = svmul_f64_x (p0, z1, z2), ++ z0 = svmul_x (p0, z1, z2)) ++ ++/* ++** mul_d4_f64_x_tied1: ++** mov (z[0-9]+\.d), d4 ++** fmul z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mul_d4_f64_x_tied1, svfloat64_t, double, ++ z0 = svmul_n_f64_x (p0, z0, d4), ++ z0 = svmul_x (p0, z0, d4)) ++ ++/* ++** mul_d4_f64_x_untied: ++** mov z0\.d, d4 ++** fmul z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZD (mul_d4_f64_x_untied, svfloat64_t, double, ++ z0 = svmul_n_f64_x (p0, z1, d4), ++ z0 = svmul_x (p0, z1, d4)) ++ ++/* ++** mul_1_f64_x_tied1: ++** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? ++** fmul z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_1_f64_x_tied1, svfloat64_t, ++ z0 = svmul_n_f64_x (p0, z0, 1), ++ z0 = svmul_x (p0, z0, 1)) ++ ++/* ++** mul_1_f64_x_untied: ++** fmov z0\.d, #1\.0(?:e\+0)? ++** fmul z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mul_1_f64_x_untied, svfloat64_t, ++ z0 = svmul_n_f64_x (p0, z1, 1), ++ z0 = svmul_x (p0, z1, 1)) ++ ++/* ++** mul_0p5_f64_x_tied1: ++** fmul z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_0p5_f64_x_tied1, svfloat64_t, ++ z0 = svmul_n_f64_x (p0, z0, 0.5), ++ z0 = svmul_x (p0, z0, 0.5)) ++ ++/* ++** mul_0p5_f64_x_untied: ++** movprfx z0, z1 ++** fmul z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_0p5_f64_x_untied, svfloat64_t, ++ z0 = svmul_n_f64_x (p0, z1, 0.5), ++ z0 = svmul_x (p0, z1, 0.5)) ++ ++/* ++** mul_2_f64_x_tied1: ++** fmul z0\.d, p0/m, z0\.d, #2\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_f64_x_tied1, svfloat64_t, ++ z0 = svmul_n_f64_x (p0, z0, 2), ++ z0 = svmul_x (p0, z0, 2)) ++ ++/* ++** mul_2_f64_x_untied: ++** movprfx z0, z1 ++** fmul z0\.d, p0/m, z0\.d, #2\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_f64_x_untied, svfloat64_t, ++ z0 = svmul_n_f64_x (p0, z1, 2), ++ z0 = svmul_x (p0, z1, 2)) ++ ++/* ++** ptrue_mul_f64_x_tied1: ++** fmul z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_f64_x_tied1, svfloat64_t, ++ z0 = svmul_f64_x (svptrue_b64 (), z0, z1), ++ z0 = svmul_x (svptrue_b64 (), z0, z1)) ++ ++/* ++** ptrue_mul_f64_x_tied2: ++** fmul z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_f64_x_tied2, svfloat64_t, ++ z0 = svmul_f64_x (svptrue_b64 (), z1, z0), ++ z0 = svmul_x (svptrue_b64 (), z1, z0)) ++ ++/* ++** ptrue_mul_f64_x_untied: ++** fmul z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_f64_x_untied, svfloat64_t, ++ z0 = svmul_f64_x (svptrue_b64 (), z1, z2), ++ z0 = svmul_x (svptrue_b64 (), z1, z2)) ++ ++/* ++** ptrue_mul_1_f64_x_tied1: ++** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? ++** fmul z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_1_f64_x_tied1, svfloat64_t, ++ z0 = svmul_n_f64_x (svptrue_b64 (), z0, 1), ++ z0 = svmul_x (svptrue_b64 (), z0, 1)) ++ ++/* ++** ptrue_mul_1_f64_x_untied: ++** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? ++** fmul z0\.d, (z1\.d, \1|\1, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_1_f64_x_untied, svfloat64_t, ++ z0 = svmul_n_f64_x (svptrue_b64 (), z1, 1), ++ z0 = svmul_x (svptrue_b64 (), z1, 1)) ++ ++/* ++** ptrue_mul_0p5_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_0p5_f64_x_tied1, svfloat64_t, ++ z0 = svmul_n_f64_x (svptrue_b64 (), z0, 0.5), ++ z0 = svmul_x (svptrue_b64 (), z0, 0.5)) ++ ++/* ++** ptrue_mul_0p5_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_0p5_f64_x_untied, svfloat64_t, ++ z0 = svmul_n_f64_x (svptrue_b64 (), z1, 0.5), ++ z0 = svmul_x (svptrue_b64 (), z1, 0.5)) ++ ++/* ++** ptrue_mul_2_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_2_f64_x_tied1, svfloat64_t, ++ z0 = svmul_n_f64_x (svptrue_b64 (), z0, 2), ++ z0 = svmul_x (svptrue_b64 (), z0, 2)) ++ ++/* ++** ptrue_mul_2_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_2_f64_x_untied, svfloat64_t, ++ z0 = svmul_n_f64_x (svptrue_b64 (), z1, 2), ++ z0 = svmul_x (svptrue_b64 (), z1, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_f64_notrap.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_f64_notrap.c +new file mode 100644 +index 000000000..d865618d4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_f64_notrap.c +@@ -0,0 +1,439 @@ ++/* { dg-additional-options "-fno-trapping-math" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mul_f64_m_tied1: ++** fmul z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f64_m_tied1, svfloat64_t, ++ z0 = svmul_f64_m (p0, z0, z1), ++ z0 = svmul_m (p0, z0, z1)) ++ ++/* ++** mul_f64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fmul z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f64_m_tied2, svfloat64_t, ++ z0 = svmul_f64_m (p0, z1, z0), ++ z0 = svmul_m (p0, z1, z0)) ++ ++/* ++** mul_f64_m_untied: ++** movprfx z0, z1 ++** fmul z0\.d, p0/m, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f64_m_untied, svfloat64_t, ++ z0 = svmul_f64_m (p0, z1, z2), ++ z0 = svmul_m (p0, z1, z2)) ++ ++/* ++** mul_d4_f64_m_tied1: ++** mov (z[0-9]+\.d), d4 ++** fmul z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mul_d4_f64_m_tied1, svfloat64_t, double, ++ z0 = svmul_n_f64_m (p0, z0, d4), ++ z0 = svmul_m (p0, z0, d4)) ++ ++/* ++** mul_d4_f64_m_untied: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0, z1 ++** fmul z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mul_d4_f64_m_untied, svfloat64_t, double, ++ z0 = svmul_n_f64_m (p0, z1, d4), ++ z0 = svmul_m (p0, z1, d4)) ++ ++/* ++** mul_1_f64_m_tied1: ++** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? ++** fmul z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_1_f64_m_tied1, svfloat64_t, ++ z0 = svmul_n_f64_m (p0, z0, 1), ++ z0 = svmul_m (p0, z0, 1)) ++ ++/* ++** mul_1_f64_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? ++** movprfx z0, z1 ++** fmul z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_1_f64_m_untied, svfloat64_t, ++ z0 = svmul_n_f64_m (p0, z1, 1), ++ z0 = svmul_m (p0, z1, 1)) ++ ++/* ++** mul_0p5_f64_m_tied1: ++** fmul z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_0p5_f64_m_tied1, svfloat64_t, ++ z0 = svmul_n_f64_m (p0, z0, 0.5), ++ z0 = svmul_m (p0, z0, 0.5)) ++ ++/* ++** mul_0p5_f64_m_untied: ++** movprfx z0, z1 ++** fmul z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_0p5_f64_m_untied, svfloat64_t, ++ z0 = svmul_n_f64_m (p0, z1, 0.5), ++ z0 = svmul_m (p0, z1, 0.5)) ++ ++/* ++** mul_2_f64_m_tied1: ++** fmul z0\.d, p0/m, z0\.d, #2\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_f64_m_tied1, svfloat64_t, ++ z0 = svmul_n_f64_m (p0, z0, 2), ++ z0 = svmul_m (p0, z0, 2)) ++ ++/* ++** mul_2_f64_m_untied: ++** movprfx z0, z1 ++** fmul z0\.d, p0/m, z0\.d, #2\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_f64_m_untied, svfloat64_t, ++ z0 = svmul_n_f64_m (p0, z1, 2), ++ z0 = svmul_m (p0, z1, 2)) ++ ++/* ++** mul_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fmul z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f64_z_tied1, svfloat64_t, ++ z0 = svmul_f64_z (p0, z0, z1), ++ z0 = svmul_z (p0, z0, z1)) ++ ++/* ++** mul_f64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** fmul z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f64_z_tied2, svfloat64_t, ++ z0 = svmul_f64_z (p0, z1, z0), ++ z0 = svmul_z (p0, z1, z0)) ++ ++/* ++** mul_f64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fmul z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** fmul z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f64_z_untied, svfloat64_t, ++ z0 = svmul_f64_z (p0, z1, z2), ++ z0 = svmul_z (p0, z1, z2)) ++ ++/* ++** mul_d4_f64_z_tied1: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0\.d, p0/z, z0\.d ++** fmul z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mul_d4_f64_z_tied1, svfloat64_t, double, ++ z0 = svmul_n_f64_z (p0, z0, d4), ++ z0 = svmul_z (p0, z0, d4)) ++ ++/* ++** mul_d4_f64_z_untied: ++** mov (z[0-9]+\.d), d4 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fmul z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** fmul z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (mul_d4_f64_z_untied, svfloat64_t, double, ++ z0 = svmul_n_f64_z (p0, z1, d4), ++ z0 = svmul_z (p0, z1, d4)) ++ ++/* ++** mul_1_f64_z_tied1: ++** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? ++** movprfx z0\.d, p0/z, z0\.d ++** fmul z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_1_f64_z_tied1, svfloat64_t, ++ z0 = svmul_n_f64_z (p0, z0, 1), ++ z0 = svmul_z (p0, z0, 1)) ++ ++/* ++** mul_1_f64_z_untied: ++** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fmul z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** fmul z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_1_f64_z_untied, svfloat64_t, ++ z0 = svmul_n_f64_z (p0, z1, 1), ++ z0 = svmul_z (p0, z1, 1)) ++ ++/* ++** mul_0p5_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fmul z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_0p5_f64_z_tied1, svfloat64_t, ++ z0 = svmul_n_f64_z (p0, z0, 0.5), ++ z0 = svmul_z (p0, z0, 0.5)) ++ ++/* ++** mul_0p5_f64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** fmul z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_0p5_f64_z_untied, svfloat64_t, ++ z0 = svmul_n_f64_z (p0, z1, 0.5), ++ z0 = svmul_z (p0, z1, 0.5)) ++ ++/* ++** mul_2_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fmul z0\.d, p0/m, z0\.d, #2\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_f64_z_tied1, svfloat64_t, ++ z0 = svmul_n_f64_z (p0, z0, 2), ++ z0 = svmul_z (p0, z0, 2)) ++ ++/* ++** mul_2_f64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** fmul z0\.d, p0/m, z0\.d, #2\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_f64_z_untied, svfloat64_t, ++ z0 = svmul_n_f64_z (p0, z1, 2), ++ z0 = svmul_z (p0, z1, 2)) ++ ++/* ++** mul_f64_x_tied1: ++** fmul z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f64_x_tied1, svfloat64_t, ++ z0 = svmul_f64_x (p0, z0, z1), ++ z0 = svmul_x (p0, z0, z1)) ++ ++/* ++** mul_f64_x_tied2: ++** fmul z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f64_x_tied2, svfloat64_t, ++ z0 = svmul_f64_x (p0, z1, z0), ++ z0 = svmul_x (p0, z1, z0)) ++ ++/* ++** mul_f64_x_untied: ++** fmul z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_f64_x_untied, svfloat64_t, ++ z0 = svmul_f64_x (p0, z1, z2), ++ z0 = svmul_x (p0, z1, z2)) ++ ++/* ++** mul_d4_f64_x_tied1: ++** mov (z[0-9]+\.d), d4 ++** fmul z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_ZD (mul_d4_f64_x_tied1, svfloat64_t, double, ++ z0 = svmul_n_f64_x (p0, z0, d4), ++ z0 = svmul_x (p0, z0, d4)) ++ ++/* ++** mul_d4_f64_x_untied: ++** mov (z[0-9]+\.d), d4 ++** fmul z0\.d, (z1\.d, \1|\1, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_ZD (mul_d4_f64_x_untied, svfloat64_t, double, ++ z0 = svmul_n_f64_x (p0, z1, d4), ++ z0 = svmul_x (p0, z1, d4)) ++ ++/* ++** mul_1_f64_x_tied1: ++** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? ++** fmul z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_1_f64_x_tied1, svfloat64_t, ++ z0 = svmul_n_f64_x (p0, z0, 1), ++ z0 = svmul_x (p0, z0, 1)) ++ ++/* ++** mul_1_f64_x_untied: ++** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? ++** fmul z0\.d, (z1\.d, \1|\1, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_1_f64_x_untied, svfloat64_t, ++ z0 = svmul_n_f64_x (p0, z1, 1), ++ z0 = svmul_x (p0, z1, 1)) ++ ++/* ++** mul_0p5_f64_x_tied1: ++** fmul z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_0p5_f64_x_tied1, svfloat64_t, ++ z0 = svmul_n_f64_x (p0, z0, 0.5), ++ z0 = svmul_x (p0, z0, 0.5)) ++ ++/* ++** mul_0p5_f64_x_untied: ++** movprfx z0, z1 ++** fmul z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_0p5_f64_x_untied, svfloat64_t, ++ z0 = svmul_n_f64_x (p0, z1, 0.5), ++ z0 = svmul_x (p0, z1, 0.5)) ++ ++/* ++** mul_2_f64_x_tied1: ++** fmul z0\.d, p0/m, z0\.d, #2\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_f64_x_tied1, svfloat64_t, ++ z0 = svmul_n_f64_x (p0, z0, 2), ++ z0 = svmul_x (p0, z0, 2)) ++ ++/* ++** mul_2_f64_x_untied: ++** movprfx z0, z1 ++** fmul z0\.d, p0/m, z0\.d, #2\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_f64_x_untied, svfloat64_t, ++ z0 = svmul_n_f64_x (p0, z1, 2), ++ z0 = svmul_x (p0, z1, 2)) ++ ++/* ++** ptrue_mul_f64_x_tied1: ++** fmul z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_f64_x_tied1, svfloat64_t, ++ z0 = svmul_f64_x (svptrue_b64 (), z0, z1), ++ z0 = svmul_x (svptrue_b64 (), z0, z1)) ++ ++/* ++** ptrue_mul_f64_x_tied2: ++** fmul z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_f64_x_tied2, svfloat64_t, ++ z0 = svmul_f64_x (svptrue_b64 (), z1, z0), ++ z0 = svmul_x (svptrue_b64 (), z1, z0)) ++ ++/* ++** ptrue_mul_f64_x_untied: ++** fmul z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_f64_x_untied, svfloat64_t, ++ z0 = svmul_f64_x (svptrue_b64 (), z1, z2), ++ z0 = svmul_x (svptrue_b64 (), z1, z2)) ++ ++/* ++** ptrue_mul_1_f64_x_tied1: ++** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? ++** fmul z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_1_f64_x_tied1, svfloat64_t, ++ z0 = svmul_n_f64_x (svptrue_b64 (), z0, 1), ++ z0 = svmul_x (svptrue_b64 (), z0, 1)) ++ ++/* ++** ptrue_mul_1_f64_x_untied: ++** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? ++** fmul z0\.d, (z1\.d, \1|\1, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_1_f64_x_untied, svfloat64_t, ++ z0 = svmul_n_f64_x (svptrue_b64 (), z1, 1), ++ z0 = svmul_x (svptrue_b64 (), z1, 1)) ++ ++/* ++** ptrue_mul_0p5_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_0p5_f64_x_tied1, svfloat64_t, ++ z0 = svmul_n_f64_x (svptrue_b64 (), z0, 0.5), ++ z0 = svmul_x (svptrue_b64 (), z0, 0.5)) ++ ++/* ++** ptrue_mul_0p5_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_0p5_f64_x_untied, svfloat64_t, ++ z0 = svmul_n_f64_x (svptrue_b64 (), z1, 0.5), ++ z0 = svmul_x (svptrue_b64 (), z1, 0.5)) ++ ++/* ++** ptrue_mul_2_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_2_f64_x_tied1, svfloat64_t, ++ z0 = svmul_n_f64_x (svptrue_b64 (), z0, 2), ++ z0 = svmul_x (svptrue_b64 (), z0, 2)) ++ ++/* ++** ptrue_mul_2_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mul_2_f64_x_untied, svfloat64_t, ++ z0 = svmul_n_f64_x (svptrue_b64 (), z1, 2), ++ z0 = svmul_x (svptrue_b64 (), z1, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_lane_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_lane_f16.c +new file mode 100644 +index 000000000..1c7503bfd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_lane_f16.c +@@ -0,0 +1,114 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mul_lane_0_f16_tied1: ++** fmul z0\.h, z0\.h, z1\.h\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (mul_lane_0_f16_tied1, svfloat16_t, ++ z0 = svmul_lane_f16 (z0, z1, 0), ++ z0 = svmul_lane (z0, z1, 0)) ++ ++/* ++** mul_lane_0_f16_tied2: ++** fmul z0\.h, z1\.h, z0\.h\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (mul_lane_0_f16_tied2, svfloat16_t, ++ z0 = svmul_lane_f16 (z1, z0, 0), ++ z0 = svmul_lane (z1, z0, 0)) ++ ++/* ++** mul_lane_0_f16_untied: ++** fmul z0\.h, z1\.h, z2\.h\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (mul_lane_0_f16_untied, svfloat16_t, ++ z0 = svmul_lane_f16 (z1, z2, 0), ++ z0 = svmul_lane (z1, z2, 0)) ++ ++/* ++** mul_lane_1_f16: ++** fmul z0\.h, z1\.h, z2\.h\[1\] ++** ret ++*/ ++TEST_UNIFORM_Z (mul_lane_1_f16, svfloat16_t, ++ z0 = svmul_lane_f16 (z1, z2, 1), ++ z0 = svmul_lane (z1, z2, 1)) ++ ++/* ++** mul_lane_2_f16: ++** fmul z0\.h, z1\.h, z2\.h\[2\] ++** ret ++*/ ++TEST_UNIFORM_Z (mul_lane_2_f16, svfloat16_t, ++ z0 = svmul_lane_f16 (z1, z2, 2), ++ z0 = svmul_lane (z1, z2, 2)) ++ ++/* ++** mul_lane_3_f16: ++** fmul z0\.h, z1\.h, z2\.h\[3\] ++** ret ++*/ ++TEST_UNIFORM_Z (mul_lane_3_f16, svfloat16_t, ++ z0 = svmul_lane_f16 (z1, z2, 3), ++ z0 = svmul_lane (z1, z2, 3)) ++ ++/* ++** mul_lane_4_f16: ++** fmul z0\.h, z1\.h, z2\.h\[4\] ++** ret ++*/ ++TEST_UNIFORM_Z (mul_lane_4_f16, svfloat16_t, ++ z0 = svmul_lane_f16 (z1, z2, 4), ++ z0 = svmul_lane (z1, z2, 4)) ++ ++/* ++** mul_lane_5_f16: ++** fmul z0\.h, z1\.h, z2\.h\[5\] ++** ret ++*/ ++TEST_UNIFORM_Z (mul_lane_5_f16, svfloat16_t, ++ z0 = svmul_lane_f16 (z1, z2, 5), ++ z0 = svmul_lane (z1, z2, 5)) ++ ++/* ++** mul_lane_6_f16: ++** fmul z0\.h, z1\.h, z2\.h\[6\] ++** ret ++*/ ++TEST_UNIFORM_Z (mul_lane_6_f16, svfloat16_t, ++ z0 = svmul_lane_f16 (z1, z2, 6), ++ z0 = svmul_lane (z1, z2, 6)) ++ ++/* ++** mul_lane_7_f16: ++** fmul z0\.h, z1\.h, z2\.h\[7\] ++** ret ++*/ ++TEST_UNIFORM_Z (mul_lane_7_f16, svfloat16_t, ++ z0 = svmul_lane_f16 (z1, z2, 7), ++ z0 = svmul_lane (z1, z2, 7)) ++ ++/* ++** mul_lane_z7_f16: ++** fmul z0\.h, z1\.h, z7\.h\[7\] ++** ret ++*/ ++TEST_DUAL_Z (mul_lane_z7_f16, svfloat16_t, svfloat16_t, ++ z0 = svmul_lane_f16 (z1, z7, 7), ++ z0 = svmul_lane (z1, z7, 7)) ++ ++/* ++** mul_lane_z8_f16: ++** str d8, \[sp, -16\]! ++** mov (z[0-7])\.d, z8\.d ++** fmul z0\.h, z1\.h, \1\.h\[7\] ++** ldr d8, \[sp\], 16 ++** ret ++*/ ++TEST_DUAL_LANE_REG (mul_lane_z8_f16, svfloat16_t, svfloat16_t, z8, ++ z0 = svmul_lane_f16 (z1, z8, 7), ++ z0 = svmul_lane (z1, z8, 7)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_lane_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_lane_f32.c +new file mode 100644 +index 000000000..5355e7e0b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_lane_f32.c +@@ -0,0 +1,78 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mul_lane_0_f32_tied1: ++** fmul z0\.s, z0\.s, z1\.s\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (mul_lane_0_f32_tied1, svfloat32_t, ++ z0 = svmul_lane_f32 (z0, z1, 0), ++ z0 = svmul_lane (z0, z1, 0)) ++ ++/* ++** mul_lane_0_f32_tied2: ++** fmul z0\.s, z1\.s, z0\.s\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (mul_lane_0_f32_tied2, svfloat32_t, ++ z0 = svmul_lane_f32 (z1, z0, 0), ++ z0 = svmul_lane (z1, z0, 0)) ++ ++/* ++** mul_lane_0_f32_untied: ++** fmul z0\.s, z1\.s, z2\.s\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (mul_lane_0_f32_untied, svfloat32_t, ++ z0 = svmul_lane_f32 (z1, z2, 0), ++ z0 = svmul_lane (z1, z2, 0)) ++ ++/* ++** mul_lane_1_f32: ++** fmul z0\.s, z1\.s, z2\.s\[1\] ++** ret ++*/ ++TEST_UNIFORM_Z (mul_lane_1_f32, svfloat32_t, ++ z0 = svmul_lane_f32 (z1, z2, 1), ++ z0 = svmul_lane (z1, z2, 1)) ++ ++/* ++** mul_lane_2_f32: ++** fmul z0\.s, z1\.s, z2\.s\[2\] ++** ret ++*/ ++TEST_UNIFORM_Z (mul_lane_2_f32, svfloat32_t, ++ z0 = svmul_lane_f32 (z1, z2, 2), ++ z0 = svmul_lane (z1, z2, 2)) ++ ++/* ++** mul_lane_3_f32: ++** fmul z0\.s, z1\.s, z2\.s\[3\] ++** ret ++*/ ++TEST_UNIFORM_Z (mul_lane_3_f32, svfloat32_t, ++ z0 = svmul_lane_f32 (z1, z2, 3), ++ z0 = svmul_lane (z1, z2, 3)) ++ ++/* ++** mul_lane_z7_f32: ++** fmul z0\.s, z1\.s, z7\.s\[3\] ++** ret ++*/ ++TEST_DUAL_Z (mul_lane_z7_f32, svfloat32_t, svfloat32_t, ++ z0 = svmul_lane_f32 (z1, z7, 3), ++ z0 = svmul_lane (z1, z7, 3)) ++ ++/* ++** mul_lane_z8_f32: ++** str d8, \[sp, -16\]! ++** mov (z[0-7])\.d, z8\.d ++** fmul z0\.s, z1\.s, \1\.s\[3\] ++** ldr d8, \[sp\], 16 ++** ret ++*/ ++TEST_DUAL_LANE_REG (mul_lane_z8_f32, svfloat32_t, svfloat32_t, z8, ++ z0 = svmul_lane_f32 (z1, z8, 3), ++ z0 = svmul_lane (z1, z8, 3)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_lane_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_lane_f64.c +new file mode 100644 +index 000000000..a53a013c5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_lane_f64.c +@@ -0,0 +1,69 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mul_lane_0_f64_tied1: ++** fmul z0\.d, z0\.d, z1\.d\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (mul_lane_0_f64_tied1, svfloat64_t, ++ z0 = svmul_lane_f64 (z0, z1, 0), ++ z0 = svmul_lane (z0, z1, 0)) ++ ++/* ++** mul_lane_0_f64_tied2: ++** fmul z0\.d, z1\.d, z0\.d\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (mul_lane_0_f64_tied2, svfloat64_t, ++ z0 = svmul_lane_f64 (z1, z0, 0), ++ z0 = svmul_lane (z1, z0, 0)) ++ ++/* ++** mul_lane_0_f64_untied: ++** fmul z0\.d, z1\.d, z2\.d\[0\] ++** ret ++*/ ++TEST_UNIFORM_Z (mul_lane_0_f64_untied, svfloat64_t, ++ z0 = svmul_lane_f64 (z1, z2, 0), ++ z0 = svmul_lane (z1, z2, 0)) ++ ++/* ++** mul_lane_1_f64: ++** fmul z0\.d, z1\.d, z2\.d\[1\] ++** ret ++*/ ++TEST_UNIFORM_Z (mul_lane_1_f64, svfloat64_t, ++ z0 = svmul_lane_f64 (z1, z2, 1), ++ z0 = svmul_lane (z1, z2, 1)) ++ ++/* ++** mul_lane_z7_f64: ++** fmul z0\.d, z1\.d, z7\.d\[1\] ++** ret ++*/ ++TEST_DUAL_Z (mul_lane_z7_f64, svfloat64_t, svfloat64_t, ++ z0 = svmul_lane_f64 (z1, z7, 1), ++ z0 = svmul_lane (z1, z7, 1)) ++ ++/* ++** mul_lane_z15_f64: ++** str d15, \[sp, -16\]! ++** fmul z0\.d, z1\.d, z15\.d\[1\] ++** ldr d15, \[sp\], 16 ++** ret ++*/ ++TEST_DUAL_LANE_REG (mul_lane_z15_f64, svfloat64_t, svfloat64_t, z15, ++ z0 = svmul_lane_f64 (z1, z15, 1), ++ z0 = svmul_lane (z1, z15, 1)) ++ ++/* ++** mul_lane_z16_f64: ++** mov (z[0-9]|z1[0-5])\.d, z16\.d ++** fmul z0\.d, z1\.d, \1\.d\[1\] ++** ret ++*/ ++TEST_DUAL_LANE_REG (mul_lane_z16_f64, svfloat64_t, svfloat64_t, z16, ++ z0 = svmul_lane_f64 (z1, z16, 1), ++ z0 = svmul_lane (z1, z16, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s16.c +new file mode 100644 +index 000000000..aa08bc274 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s16.c +@@ -0,0 +1,302 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mul_s16_m_tied1: ++** mul z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mul_s16_m_tied1, svint16_t, ++ z0 = svmul_s16_m (p0, z0, z1), ++ z0 = svmul_m (p0, z0, z1)) ++ ++/* ++** mul_s16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** mul z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mul_s16_m_tied2, svint16_t, ++ z0 = svmul_s16_m (p0, z1, z0), ++ z0 = svmul_m (p0, z1, z0)) ++ ++/* ++** mul_s16_m_untied: ++** movprfx z0, z1 ++** mul z0\.h, p0/m, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mul_s16_m_untied, svint16_t, ++ z0 = svmul_s16_m (p0, z1, z2), ++ z0 = svmul_m (p0, z1, z2)) ++ ++/* ++** mul_w0_s16_m_tied1: ++** mov (z[0-9]+\.h), w0 ++** mul z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mul_w0_s16_m_tied1, svint16_t, int16_t, ++ z0 = svmul_n_s16_m (p0, z0, x0), ++ z0 = svmul_m (p0, z0, x0)) ++ ++/* ++** mul_w0_s16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), w0 ++** movprfx z0, z1 ++** mul z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mul_w0_s16_m_untied, svint16_t, int16_t, ++ z0 = svmul_n_s16_m (p0, z1, x0), ++ z0 = svmul_m (p0, z1, x0)) ++ ++/* ++** mul_2_s16_m_tied1: ++** mov (z[0-9]+\.h), #2 ++** mul z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_s16_m_tied1, svint16_t, ++ z0 = svmul_n_s16_m (p0, z0, 2), ++ z0 = svmul_m (p0, z0, 2)) ++ ++/* ++** mul_2_s16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), #2 ++** movprfx z0, z1 ++** mul z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_s16_m_untied, svint16_t, ++ z0 = svmul_n_s16_m (p0, z1, 2), ++ z0 = svmul_m (p0, z1, 2)) ++ ++/* ++** mul_m1_s16_m: ++** mov (z[0-9]+)\.b, #-1 ++** mul z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mul_m1_s16_m, svint16_t, ++ z0 = svmul_n_s16_m (p0, z0, -1), ++ z0 = svmul_m (p0, z0, -1)) ++ ++/* ++** mul_s16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** mul z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mul_s16_z_tied1, svint16_t, ++ z0 = svmul_s16_z (p0, z0, z1), ++ z0 = svmul_z (p0, z0, z1)) ++ ++/* ++** mul_s16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** mul z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mul_s16_z_tied2, svint16_t, ++ z0 = svmul_s16_z (p0, z1, z0), ++ z0 = svmul_z (p0, z1, z0)) ++ ++/* ++** mul_s16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** mul z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** mul z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_s16_z_untied, svint16_t, ++ z0 = svmul_s16_z (p0, z1, z2), ++ z0 = svmul_z (p0, z1, z2)) ++ ++/* ++** mul_w0_s16_z_tied1: ++** mov (z[0-9]+\.h), w0 ++** movprfx z0\.h, p0/z, z0\.h ++** mul z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mul_w0_s16_z_tied1, svint16_t, int16_t, ++ z0 = svmul_n_s16_z (p0, z0, x0), ++ z0 = svmul_z (p0, z0, x0)) ++ ++/* ++** mul_w0_s16_z_untied: ++** mov (z[0-9]+\.h), w0 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** mul z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** mul z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (mul_w0_s16_z_untied, svint16_t, int16_t, ++ z0 = svmul_n_s16_z (p0, z1, x0), ++ z0 = svmul_z (p0, z1, x0)) ++ ++/* ++** mul_2_s16_z_tied1: ++** mov (z[0-9]+\.h), #2 ++** movprfx z0\.h, p0/z, z0\.h ++** mul z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_s16_z_tied1, svint16_t, ++ z0 = svmul_n_s16_z (p0, z0, 2), ++ z0 = svmul_z (p0, z0, 2)) ++ ++/* ++** mul_2_s16_z_untied: ++** mov (z[0-9]+\.h), #2 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** mul z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** mul z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_s16_z_untied, svint16_t, ++ z0 = svmul_n_s16_z (p0, z1, 2), ++ z0 = svmul_z (p0, z1, 2)) ++ ++/* ++** mul_s16_x_tied1: ++** mul z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mul_s16_x_tied1, svint16_t, ++ z0 = svmul_s16_x (p0, z0, z1), ++ z0 = svmul_x (p0, z0, z1)) ++ ++/* ++** mul_s16_x_tied2: ++** mul z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mul_s16_x_tied2, svint16_t, ++ z0 = svmul_s16_x (p0, z1, z0), ++ z0 = svmul_x (p0, z1, z0)) ++ ++/* ++** mul_s16_x_untied: ++** ( ++** movprfx z0, z1 ++** mul z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0, z2 ++** mul z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_s16_x_untied, svint16_t, ++ z0 = svmul_s16_x (p0, z1, z2), ++ z0 = svmul_x (p0, z1, z2)) ++ ++/* ++** mul_w0_s16_x_tied1: ++** mov (z[0-9]+\.h), w0 ++** mul z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mul_w0_s16_x_tied1, svint16_t, int16_t, ++ z0 = svmul_n_s16_x (p0, z0, x0), ++ z0 = svmul_x (p0, z0, x0)) ++ ++/* ++** mul_w0_s16_x_untied: ++** mov z0\.h, w0 ++** mul z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZX (mul_w0_s16_x_untied, svint16_t, int16_t, ++ z0 = svmul_n_s16_x (p0, z1, x0), ++ z0 = svmul_x (p0, z1, x0)) ++ ++/* ++** mul_2_s16_x_tied1: ++** mul z0\.h, z0\.h, #2 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_s16_x_tied1, svint16_t, ++ z0 = svmul_n_s16_x (p0, z0, 2), ++ z0 = svmul_x (p0, z0, 2)) ++ ++/* ++** mul_2_s16_x_untied: ++** movprfx z0, z1 ++** mul z0\.h, z0\.h, #2 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_s16_x_untied, svint16_t, ++ z0 = svmul_n_s16_x (p0, z1, 2), ++ z0 = svmul_x (p0, z1, 2)) ++ ++/* ++** mul_127_s16_x: ++** mul z0\.h, z0\.h, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_127_s16_x, svint16_t, ++ z0 = svmul_n_s16_x (p0, z0, 127), ++ z0 = svmul_x (p0, z0, 127)) ++ ++/* ++** mul_128_s16_x: ++** mov (z[0-9]+\.h), #128 ++** mul z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_128_s16_x, svint16_t, ++ z0 = svmul_n_s16_x (p0, z0, 128), ++ z0 = svmul_x (p0, z0, 128)) ++ ++/* ++** mul_255_s16_x: ++** mov (z[0-9]+\.h), #255 ++** mul z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_255_s16_x, svint16_t, ++ z0 = svmul_n_s16_x (p0, z0, 255), ++ z0 = svmul_x (p0, z0, 255)) ++ ++/* ++** mul_m1_s16_x: ++** mul z0\.h, z0\.h, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_m1_s16_x, svint16_t, ++ z0 = svmul_n_s16_x (p0, z0, -1), ++ z0 = svmul_x (p0, z0, -1)) ++ ++/* ++** mul_m127_s16_x: ++** mul z0\.h, z0\.h, #-127 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_m127_s16_x, svint16_t, ++ z0 = svmul_n_s16_x (p0, z0, -127), ++ z0 = svmul_x (p0, z0, -127)) ++ ++/* ++** mul_m128_s16_x: ++** mul z0\.h, z0\.h, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_m128_s16_x, svint16_t, ++ z0 = svmul_n_s16_x (p0, z0, -128), ++ z0 = svmul_x (p0, z0, -128)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s32.c +new file mode 100644 +index 000000000..7acf77fdb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s32.c +@@ -0,0 +1,302 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mul_s32_m_tied1: ++** mul z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mul_s32_m_tied1, svint32_t, ++ z0 = svmul_s32_m (p0, z0, z1), ++ z0 = svmul_m (p0, z0, z1)) ++ ++/* ++** mul_s32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** mul z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mul_s32_m_tied2, svint32_t, ++ z0 = svmul_s32_m (p0, z1, z0), ++ z0 = svmul_m (p0, z1, z0)) ++ ++/* ++** mul_s32_m_untied: ++** movprfx z0, z1 ++** mul z0\.s, p0/m, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mul_s32_m_untied, svint32_t, ++ z0 = svmul_s32_m (p0, z1, z2), ++ z0 = svmul_m (p0, z1, z2)) ++ ++/* ++** mul_w0_s32_m_tied1: ++** mov (z[0-9]+\.s), w0 ++** mul z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mul_w0_s32_m_tied1, svint32_t, int32_t, ++ z0 = svmul_n_s32_m (p0, z0, x0), ++ z0 = svmul_m (p0, z0, x0)) ++ ++/* ++** mul_w0_s32_m_untied: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0, z1 ++** mul z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mul_w0_s32_m_untied, svint32_t, int32_t, ++ z0 = svmul_n_s32_m (p0, z1, x0), ++ z0 = svmul_m (p0, z1, x0)) ++ ++/* ++** mul_2_s32_m_tied1: ++** mov (z[0-9]+\.s), #2 ++** mul z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_s32_m_tied1, svint32_t, ++ z0 = svmul_n_s32_m (p0, z0, 2), ++ z0 = svmul_m (p0, z0, 2)) ++ ++/* ++** mul_2_s32_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.s), #2 ++** movprfx z0, z1 ++** mul z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_s32_m_untied, svint32_t, ++ z0 = svmul_n_s32_m (p0, z1, 2), ++ z0 = svmul_m (p0, z1, 2)) ++ ++/* ++** mul_m1_s32_m: ++** mov (z[0-9]+)\.b, #-1 ++** mul z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mul_m1_s32_m, svint32_t, ++ z0 = svmul_n_s32_m (p0, z0, -1), ++ z0 = svmul_m (p0, z0, -1)) ++ ++/* ++** mul_s32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** mul z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mul_s32_z_tied1, svint32_t, ++ z0 = svmul_s32_z (p0, z0, z1), ++ z0 = svmul_z (p0, z0, z1)) ++ ++/* ++** mul_s32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** mul z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mul_s32_z_tied2, svint32_t, ++ z0 = svmul_s32_z (p0, z1, z0), ++ z0 = svmul_z (p0, z1, z0)) ++ ++/* ++** mul_s32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** mul z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** mul z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_s32_z_untied, svint32_t, ++ z0 = svmul_s32_z (p0, z1, z2), ++ z0 = svmul_z (p0, z1, z2)) ++ ++/* ++** mul_w0_s32_z_tied1: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0\.s, p0/z, z0\.s ++** mul z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mul_w0_s32_z_tied1, svint32_t, int32_t, ++ z0 = svmul_n_s32_z (p0, z0, x0), ++ z0 = svmul_z (p0, z0, x0)) ++ ++/* ++** mul_w0_s32_z_untied: ++** mov (z[0-9]+\.s), w0 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** mul z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** mul z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (mul_w0_s32_z_untied, svint32_t, int32_t, ++ z0 = svmul_n_s32_z (p0, z1, x0), ++ z0 = svmul_z (p0, z1, x0)) ++ ++/* ++** mul_2_s32_z_tied1: ++** mov (z[0-9]+\.s), #2 ++** movprfx z0\.s, p0/z, z0\.s ++** mul z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_s32_z_tied1, svint32_t, ++ z0 = svmul_n_s32_z (p0, z0, 2), ++ z0 = svmul_z (p0, z0, 2)) ++ ++/* ++** mul_2_s32_z_untied: ++** mov (z[0-9]+\.s), #2 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** mul z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** mul z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_s32_z_untied, svint32_t, ++ z0 = svmul_n_s32_z (p0, z1, 2), ++ z0 = svmul_z (p0, z1, 2)) ++ ++/* ++** mul_s32_x_tied1: ++** mul z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mul_s32_x_tied1, svint32_t, ++ z0 = svmul_s32_x (p0, z0, z1), ++ z0 = svmul_x (p0, z0, z1)) ++ ++/* ++** mul_s32_x_tied2: ++** mul z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mul_s32_x_tied2, svint32_t, ++ z0 = svmul_s32_x (p0, z1, z0), ++ z0 = svmul_x (p0, z1, z0)) ++ ++/* ++** mul_s32_x_untied: ++** ( ++** movprfx z0, z1 ++** mul z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0, z2 ++** mul z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_s32_x_untied, svint32_t, ++ z0 = svmul_s32_x (p0, z1, z2), ++ z0 = svmul_x (p0, z1, z2)) ++ ++/* ++** mul_w0_s32_x_tied1: ++** mov (z[0-9]+\.s), w0 ++** mul z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mul_w0_s32_x_tied1, svint32_t, int32_t, ++ z0 = svmul_n_s32_x (p0, z0, x0), ++ z0 = svmul_x (p0, z0, x0)) ++ ++/* ++** mul_w0_s32_x_untied: ++** mov z0\.s, w0 ++** mul z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZX (mul_w0_s32_x_untied, svint32_t, int32_t, ++ z0 = svmul_n_s32_x (p0, z1, x0), ++ z0 = svmul_x (p0, z1, x0)) ++ ++/* ++** mul_2_s32_x_tied1: ++** mul z0\.s, z0\.s, #2 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_s32_x_tied1, svint32_t, ++ z0 = svmul_n_s32_x (p0, z0, 2), ++ z0 = svmul_x (p0, z0, 2)) ++ ++/* ++** mul_2_s32_x_untied: ++** movprfx z0, z1 ++** mul z0\.s, z0\.s, #2 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_s32_x_untied, svint32_t, ++ z0 = svmul_n_s32_x (p0, z1, 2), ++ z0 = svmul_x (p0, z1, 2)) ++ ++/* ++** mul_127_s32_x: ++** mul z0\.s, z0\.s, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_127_s32_x, svint32_t, ++ z0 = svmul_n_s32_x (p0, z0, 127), ++ z0 = svmul_x (p0, z0, 127)) ++ ++/* ++** mul_128_s32_x: ++** mov (z[0-9]+\.s), #128 ++** mul z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_128_s32_x, svint32_t, ++ z0 = svmul_n_s32_x (p0, z0, 128), ++ z0 = svmul_x (p0, z0, 128)) ++ ++/* ++** mul_255_s32_x: ++** mov (z[0-9]+\.s), #255 ++** mul z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_255_s32_x, svint32_t, ++ z0 = svmul_n_s32_x (p0, z0, 255), ++ z0 = svmul_x (p0, z0, 255)) ++ ++/* ++** mul_m1_s32_x: ++** mul z0\.s, z0\.s, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_m1_s32_x, svint32_t, ++ z0 = svmul_n_s32_x (p0, z0, -1), ++ z0 = svmul_x (p0, z0, -1)) ++ ++/* ++** mul_m127_s32_x: ++** mul z0\.s, z0\.s, #-127 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_m127_s32_x, svint32_t, ++ z0 = svmul_n_s32_x (p0, z0, -127), ++ z0 = svmul_x (p0, z0, -127)) ++ ++/* ++** mul_m128_s32_x: ++** mul z0\.s, z0\.s, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_m128_s32_x, svint32_t, ++ z0 = svmul_n_s32_x (p0, z0, -128), ++ z0 = svmul_x (p0, z0, -128)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s64.c +new file mode 100644 +index 000000000..549105f1e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s64.c +@@ -0,0 +1,302 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mul_s64_m_tied1: ++** mul z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mul_s64_m_tied1, svint64_t, ++ z0 = svmul_s64_m (p0, z0, z1), ++ z0 = svmul_m (p0, z0, z1)) ++ ++/* ++** mul_s64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** mul z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_s64_m_tied2, svint64_t, ++ z0 = svmul_s64_m (p0, z1, z0), ++ z0 = svmul_m (p0, z1, z0)) ++ ++/* ++** mul_s64_m_untied: ++** movprfx z0, z1 ++** mul z0\.d, p0/m, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mul_s64_m_untied, svint64_t, ++ z0 = svmul_s64_m (p0, z1, z2), ++ z0 = svmul_m (p0, z1, z2)) ++ ++/* ++** mul_x0_s64_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** mul z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mul_x0_s64_m_tied1, svint64_t, int64_t, ++ z0 = svmul_n_s64_m (p0, z0, x0), ++ z0 = svmul_m (p0, z0, x0)) ++ ++/* ++** mul_x0_s64_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** mul z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mul_x0_s64_m_untied, svint64_t, int64_t, ++ z0 = svmul_n_s64_m (p0, z1, x0), ++ z0 = svmul_m (p0, z1, x0)) ++ ++/* ++** mul_2_s64_m_tied1: ++** mov (z[0-9]+\.d), #2 ++** mul z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_s64_m_tied1, svint64_t, ++ z0 = svmul_n_s64_m (p0, z0, 2), ++ z0 = svmul_m (p0, z0, 2)) ++ ++/* ++** mul_2_s64_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), #2 ++** movprfx z0, z1 ++** mul z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_s64_m_untied, svint64_t, ++ z0 = svmul_n_s64_m (p0, z1, 2), ++ z0 = svmul_m (p0, z1, 2)) ++ ++/* ++** mul_m1_s64_m: ++** mov (z[0-9]+)\.b, #-1 ++** mul z0\.d, p0/m, z0\.d, \1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mul_m1_s64_m, svint64_t, ++ z0 = svmul_n_s64_m (p0, z0, -1), ++ z0 = svmul_m (p0, z0, -1)) ++ ++/* ++** mul_s64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** mul z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mul_s64_z_tied1, svint64_t, ++ z0 = svmul_s64_z (p0, z0, z1), ++ z0 = svmul_z (p0, z0, z1)) ++ ++/* ++** mul_s64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** mul z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mul_s64_z_tied2, svint64_t, ++ z0 = svmul_s64_z (p0, z1, z0), ++ z0 = svmul_z (p0, z1, z0)) ++ ++/* ++** mul_s64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** mul z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** mul z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_s64_z_untied, svint64_t, ++ z0 = svmul_s64_z (p0, z1, z2), ++ z0 = svmul_z (p0, z1, z2)) ++ ++/* ++** mul_x0_s64_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.d, p0/z, z0\.d ++** mul z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mul_x0_s64_z_tied1, svint64_t, int64_t, ++ z0 = svmul_n_s64_z (p0, z0, x0), ++ z0 = svmul_z (p0, z0, x0)) ++ ++/* ++** mul_x0_s64_z_untied: ++** mov (z[0-9]+\.d), x0 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** mul z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** mul z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (mul_x0_s64_z_untied, svint64_t, int64_t, ++ z0 = svmul_n_s64_z (p0, z1, x0), ++ z0 = svmul_z (p0, z1, x0)) ++ ++/* ++** mul_2_s64_z_tied1: ++** mov (z[0-9]+\.d), #2 ++** movprfx z0\.d, p0/z, z0\.d ++** mul z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_s64_z_tied1, svint64_t, ++ z0 = svmul_n_s64_z (p0, z0, 2), ++ z0 = svmul_z (p0, z0, 2)) ++ ++/* ++** mul_2_s64_z_untied: ++** mov (z[0-9]+\.d), #2 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** mul z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** mul z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_s64_z_untied, svint64_t, ++ z0 = svmul_n_s64_z (p0, z1, 2), ++ z0 = svmul_z (p0, z1, 2)) ++ ++/* ++** mul_s64_x_tied1: ++** mul z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mul_s64_x_tied1, svint64_t, ++ z0 = svmul_s64_x (p0, z0, z1), ++ z0 = svmul_x (p0, z0, z1)) ++ ++/* ++** mul_s64_x_tied2: ++** mul z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mul_s64_x_tied2, svint64_t, ++ z0 = svmul_s64_x (p0, z1, z0), ++ z0 = svmul_x (p0, z1, z0)) ++ ++/* ++** mul_s64_x_untied: ++** ( ++** movprfx z0, z1 ++** mul z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0, z2 ++** mul z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_s64_x_untied, svint64_t, ++ z0 = svmul_s64_x (p0, z1, z2), ++ z0 = svmul_x (p0, z1, z2)) ++ ++/* ++** mul_x0_s64_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** mul z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mul_x0_s64_x_tied1, svint64_t, int64_t, ++ z0 = svmul_n_s64_x (p0, z0, x0), ++ z0 = svmul_x (p0, z0, x0)) ++ ++/* ++** mul_x0_s64_x_untied: ++** mov z0\.d, x0 ++** mul z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZX (mul_x0_s64_x_untied, svint64_t, int64_t, ++ z0 = svmul_n_s64_x (p0, z1, x0), ++ z0 = svmul_x (p0, z1, x0)) ++ ++/* ++** mul_2_s64_x_tied1: ++** mul z0\.d, z0\.d, #2 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_s64_x_tied1, svint64_t, ++ z0 = svmul_n_s64_x (p0, z0, 2), ++ z0 = svmul_x (p0, z0, 2)) ++ ++/* ++** mul_2_s64_x_untied: ++** movprfx z0, z1 ++** mul z0\.d, z0\.d, #2 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_s64_x_untied, svint64_t, ++ z0 = svmul_n_s64_x (p0, z1, 2), ++ z0 = svmul_x (p0, z1, 2)) ++ ++/* ++** mul_127_s64_x: ++** mul z0\.d, z0\.d, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_127_s64_x, svint64_t, ++ z0 = svmul_n_s64_x (p0, z0, 127), ++ z0 = svmul_x (p0, z0, 127)) ++ ++/* ++** mul_128_s64_x: ++** mov (z[0-9]+\.d), #128 ++** mul z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_128_s64_x, svint64_t, ++ z0 = svmul_n_s64_x (p0, z0, 128), ++ z0 = svmul_x (p0, z0, 128)) ++ ++/* ++** mul_255_s64_x: ++** mov (z[0-9]+\.d), #255 ++** mul z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_255_s64_x, svint64_t, ++ z0 = svmul_n_s64_x (p0, z0, 255), ++ z0 = svmul_x (p0, z0, 255)) ++ ++/* ++** mul_m1_s64_x: ++** mul z0\.d, z0\.d, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_m1_s64_x, svint64_t, ++ z0 = svmul_n_s64_x (p0, z0, -1), ++ z0 = svmul_x (p0, z0, -1)) ++ ++/* ++** mul_m127_s64_x: ++** mul z0\.d, z0\.d, #-127 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_m127_s64_x, svint64_t, ++ z0 = svmul_n_s64_x (p0, z0, -127), ++ z0 = svmul_x (p0, z0, -127)) ++ ++/* ++** mul_m128_s64_x: ++** mul z0\.d, z0\.d, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_m128_s64_x, svint64_t, ++ z0 = svmul_n_s64_x (p0, z0, -128), ++ z0 = svmul_x (p0, z0, -128)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s8.c +new file mode 100644 +index 000000000..012e6f250 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s8.c +@@ -0,0 +1,300 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mul_s8_m_tied1: ++** mul z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mul_s8_m_tied1, svint8_t, ++ z0 = svmul_s8_m (p0, z0, z1), ++ z0 = svmul_m (p0, z0, z1)) ++ ++/* ++** mul_s8_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** mul z0\.b, p0/m, z0\.b, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mul_s8_m_tied2, svint8_t, ++ z0 = svmul_s8_m (p0, z1, z0), ++ z0 = svmul_m (p0, z1, z0)) ++ ++/* ++** mul_s8_m_untied: ++** movprfx z0, z1 ++** mul z0\.b, p0/m, z0\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mul_s8_m_untied, svint8_t, ++ z0 = svmul_s8_m (p0, z1, z2), ++ z0 = svmul_m (p0, z1, z2)) ++ ++/* ++** mul_w0_s8_m_tied1: ++** mov (z[0-9]+\.b), w0 ++** mul z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mul_w0_s8_m_tied1, svint8_t, int8_t, ++ z0 = svmul_n_s8_m (p0, z0, x0), ++ z0 = svmul_m (p0, z0, x0)) ++ ++/* ++** mul_w0_s8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), w0 ++** movprfx z0, z1 ++** mul z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mul_w0_s8_m_untied, svint8_t, int8_t, ++ z0 = svmul_n_s8_m (p0, z1, x0), ++ z0 = svmul_m (p0, z1, x0)) ++ ++/* ++** mul_2_s8_m_tied1: ++** mov (z[0-9]+\.b), #2 ++** mul z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_s8_m_tied1, svint8_t, ++ z0 = svmul_n_s8_m (p0, z0, 2), ++ z0 = svmul_m (p0, z0, 2)) ++ ++/* ++** mul_2_s8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), #2 ++** movprfx z0, z1 ++** mul z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_s8_m_untied, svint8_t, ++ z0 = svmul_n_s8_m (p0, z1, 2), ++ z0 = svmul_m (p0, z1, 2)) ++ ++/* ++** mul_m1_s8_m: ++** mov (z[0-9]+\.b), #-1 ++** mul z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_m1_s8_m, svint8_t, ++ z0 = svmul_n_s8_m (p0, z0, -1), ++ z0 = svmul_m (p0, z0, -1)) ++ ++/* ++** mul_s8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** mul z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mul_s8_z_tied1, svint8_t, ++ z0 = svmul_s8_z (p0, z0, z1), ++ z0 = svmul_z (p0, z0, z1)) ++ ++/* ++** mul_s8_z_tied2: ++** movprfx z0\.b, p0/z, z0\.b ++** mul z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mul_s8_z_tied2, svint8_t, ++ z0 = svmul_s8_z (p0, z1, z0), ++ z0 = svmul_z (p0, z1, z0)) ++ ++/* ++** mul_s8_z_untied: ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** mul z0\.b, p0/m, z0\.b, z2\.b ++** | ++** movprfx z0\.b, p0/z, z2\.b ++** mul z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_s8_z_untied, svint8_t, ++ z0 = svmul_s8_z (p0, z1, z2), ++ z0 = svmul_z (p0, z1, z2)) ++ ++/* ++** mul_w0_s8_z_tied1: ++** mov (z[0-9]+\.b), w0 ++** movprfx z0\.b, p0/z, z0\.b ++** mul z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mul_w0_s8_z_tied1, svint8_t, int8_t, ++ z0 = svmul_n_s8_z (p0, z0, x0), ++ z0 = svmul_z (p0, z0, x0)) ++ ++/* ++** mul_w0_s8_z_untied: ++** mov (z[0-9]+\.b), w0 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** mul z0\.b, p0/m, z0\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** mul z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (mul_w0_s8_z_untied, svint8_t, int8_t, ++ z0 = svmul_n_s8_z (p0, z1, x0), ++ z0 = svmul_z (p0, z1, x0)) ++ ++/* ++** mul_2_s8_z_tied1: ++** mov (z[0-9]+\.b), #2 ++** movprfx z0\.b, p0/z, z0\.b ++** mul z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_s8_z_tied1, svint8_t, ++ z0 = svmul_n_s8_z (p0, z0, 2), ++ z0 = svmul_z (p0, z0, 2)) ++ ++/* ++** mul_2_s8_z_untied: ++** mov (z[0-9]+\.b), #2 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** mul z0\.b, p0/m, z0\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** mul z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_s8_z_untied, svint8_t, ++ z0 = svmul_n_s8_z (p0, z1, 2), ++ z0 = svmul_z (p0, z1, 2)) ++ ++/* ++** mul_s8_x_tied1: ++** mul z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mul_s8_x_tied1, svint8_t, ++ z0 = svmul_s8_x (p0, z0, z1), ++ z0 = svmul_x (p0, z0, z1)) ++ ++/* ++** mul_s8_x_tied2: ++** mul z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mul_s8_x_tied2, svint8_t, ++ z0 = svmul_s8_x (p0, z1, z0), ++ z0 = svmul_x (p0, z1, z0)) ++ ++/* ++** mul_s8_x_untied: ++** ( ++** movprfx z0, z1 ++** mul z0\.b, p0/m, z0\.b, z2\.b ++** | ++** movprfx z0, z2 ++** mul z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_s8_x_untied, svint8_t, ++ z0 = svmul_s8_x (p0, z1, z2), ++ z0 = svmul_x (p0, z1, z2)) ++ ++/* ++** mul_w0_s8_x_tied1: ++** mov (z[0-9]+\.b), w0 ++** mul z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mul_w0_s8_x_tied1, svint8_t, int8_t, ++ z0 = svmul_n_s8_x (p0, z0, x0), ++ z0 = svmul_x (p0, z0, x0)) ++ ++/* ++** mul_w0_s8_x_untied: ++** mov z0\.b, w0 ++** mul z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_ZX (mul_w0_s8_x_untied, svint8_t, int8_t, ++ z0 = svmul_n_s8_x (p0, z1, x0), ++ z0 = svmul_x (p0, z1, x0)) ++ ++/* ++** mul_2_s8_x_tied1: ++** mul z0\.b, z0\.b, #2 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_s8_x_tied1, svint8_t, ++ z0 = svmul_n_s8_x (p0, z0, 2), ++ z0 = svmul_x (p0, z0, 2)) ++ ++/* ++** mul_2_s8_x_untied: ++** movprfx z0, z1 ++** mul z0\.b, z0\.b, #2 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_s8_x_untied, svint8_t, ++ z0 = svmul_n_s8_x (p0, z1, 2), ++ z0 = svmul_x (p0, z1, 2)) ++ ++/* ++** mul_127_s8_x: ++** mul z0\.b, z0\.b, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_127_s8_x, svint8_t, ++ z0 = svmul_n_s8_x (p0, z0, 127), ++ z0 = svmul_x (p0, z0, 127)) ++ ++/* ++** mul_128_s8_x: ++** mul z0\.b, z0\.b, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_128_s8_x, svint8_t, ++ z0 = svmul_n_s8_x (p0, z0, 128), ++ z0 = svmul_x (p0, z0, 128)) ++ ++/* ++** mul_255_s8_x: ++** mul z0\.b, z0\.b, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_255_s8_x, svint8_t, ++ z0 = svmul_n_s8_x (p0, z0, 255), ++ z0 = svmul_x (p0, z0, 255)) ++ ++/* ++** mul_m1_s8_x: ++** mul z0\.b, z0\.b, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_m1_s8_x, svint8_t, ++ z0 = svmul_n_s8_x (p0, z0, -1), ++ z0 = svmul_x (p0, z0, -1)) ++ ++/* ++** mul_m127_s8_x: ++** mul z0\.b, z0\.b, #-127 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_m127_s8_x, svint8_t, ++ z0 = svmul_n_s8_x (p0, z0, -127), ++ z0 = svmul_x (p0, z0, -127)) ++ ++/* ++** mul_m128_s8_x: ++** mul z0\.b, z0\.b, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_m128_s8_x, svint8_t, ++ z0 = svmul_n_s8_x (p0, z0, -128), ++ z0 = svmul_x (p0, z0, -128)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u16.c +new file mode 100644 +index 000000000..300987eb6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u16.c +@@ -0,0 +1,302 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mul_u16_m_tied1: ++** mul z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mul_u16_m_tied1, svuint16_t, ++ z0 = svmul_u16_m (p0, z0, z1), ++ z0 = svmul_m (p0, z0, z1)) ++ ++/* ++** mul_u16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** mul z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mul_u16_m_tied2, svuint16_t, ++ z0 = svmul_u16_m (p0, z1, z0), ++ z0 = svmul_m (p0, z1, z0)) ++ ++/* ++** mul_u16_m_untied: ++** movprfx z0, z1 ++** mul z0\.h, p0/m, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mul_u16_m_untied, svuint16_t, ++ z0 = svmul_u16_m (p0, z1, z2), ++ z0 = svmul_m (p0, z1, z2)) ++ ++/* ++** mul_w0_u16_m_tied1: ++** mov (z[0-9]+\.h), w0 ++** mul z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mul_w0_u16_m_tied1, svuint16_t, uint16_t, ++ z0 = svmul_n_u16_m (p0, z0, x0), ++ z0 = svmul_m (p0, z0, x0)) ++ ++/* ++** mul_w0_u16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), w0 ++** movprfx z0, z1 ++** mul z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mul_w0_u16_m_untied, svuint16_t, uint16_t, ++ z0 = svmul_n_u16_m (p0, z1, x0), ++ z0 = svmul_m (p0, z1, x0)) ++ ++/* ++** mul_2_u16_m_tied1: ++** mov (z[0-9]+\.h), #2 ++** mul z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_u16_m_tied1, svuint16_t, ++ z0 = svmul_n_u16_m (p0, z0, 2), ++ z0 = svmul_m (p0, z0, 2)) ++ ++/* ++** mul_2_u16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), #2 ++** movprfx z0, z1 ++** mul z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_u16_m_untied, svuint16_t, ++ z0 = svmul_n_u16_m (p0, z1, 2), ++ z0 = svmul_m (p0, z1, 2)) ++ ++/* ++** mul_m1_u16_m: ++** mov (z[0-9]+)\.b, #-1 ++** mul z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mul_m1_u16_m, svuint16_t, ++ z0 = svmul_n_u16_m (p0, z0, -1), ++ z0 = svmul_m (p0, z0, -1)) ++ ++/* ++** mul_u16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** mul z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mul_u16_z_tied1, svuint16_t, ++ z0 = svmul_u16_z (p0, z0, z1), ++ z0 = svmul_z (p0, z0, z1)) ++ ++/* ++** mul_u16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** mul z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mul_u16_z_tied2, svuint16_t, ++ z0 = svmul_u16_z (p0, z1, z0), ++ z0 = svmul_z (p0, z1, z0)) ++ ++/* ++** mul_u16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** mul z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** mul z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_u16_z_untied, svuint16_t, ++ z0 = svmul_u16_z (p0, z1, z2), ++ z0 = svmul_z (p0, z1, z2)) ++ ++/* ++** mul_w0_u16_z_tied1: ++** mov (z[0-9]+\.h), w0 ++** movprfx z0\.h, p0/z, z0\.h ++** mul z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mul_w0_u16_z_tied1, svuint16_t, uint16_t, ++ z0 = svmul_n_u16_z (p0, z0, x0), ++ z0 = svmul_z (p0, z0, x0)) ++ ++/* ++** mul_w0_u16_z_untied: ++** mov (z[0-9]+\.h), w0 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** mul z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** mul z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (mul_w0_u16_z_untied, svuint16_t, uint16_t, ++ z0 = svmul_n_u16_z (p0, z1, x0), ++ z0 = svmul_z (p0, z1, x0)) ++ ++/* ++** mul_2_u16_z_tied1: ++** mov (z[0-9]+\.h), #2 ++** movprfx z0\.h, p0/z, z0\.h ++** mul z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_u16_z_tied1, svuint16_t, ++ z0 = svmul_n_u16_z (p0, z0, 2), ++ z0 = svmul_z (p0, z0, 2)) ++ ++/* ++** mul_2_u16_z_untied: ++** mov (z[0-9]+\.h), #2 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** mul z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** mul z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_u16_z_untied, svuint16_t, ++ z0 = svmul_n_u16_z (p0, z1, 2), ++ z0 = svmul_z (p0, z1, 2)) ++ ++/* ++** mul_u16_x_tied1: ++** mul z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mul_u16_x_tied1, svuint16_t, ++ z0 = svmul_u16_x (p0, z0, z1), ++ z0 = svmul_x (p0, z0, z1)) ++ ++/* ++** mul_u16_x_tied2: ++** mul z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mul_u16_x_tied2, svuint16_t, ++ z0 = svmul_u16_x (p0, z1, z0), ++ z0 = svmul_x (p0, z1, z0)) ++ ++/* ++** mul_u16_x_untied: ++** ( ++** movprfx z0, z1 ++** mul z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0, z2 ++** mul z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_u16_x_untied, svuint16_t, ++ z0 = svmul_u16_x (p0, z1, z2), ++ z0 = svmul_x (p0, z1, z2)) ++ ++/* ++** mul_w0_u16_x_tied1: ++** mov (z[0-9]+\.h), w0 ++** mul z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mul_w0_u16_x_tied1, svuint16_t, uint16_t, ++ z0 = svmul_n_u16_x (p0, z0, x0), ++ z0 = svmul_x (p0, z0, x0)) ++ ++/* ++** mul_w0_u16_x_untied: ++** mov z0\.h, w0 ++** mul z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZX (mul_w0_u16_x_untied, svuint16_t, uint16_t, ++ z0 = svmul_n_u16_x (p0, z1, x0), ++ z0 = svmul_x (p0, z1, x0)) ++ ++/* ++** mul_2_u16_x_tied1: ++** mul z0\.h, z0\.h, #2 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_u16_x_tied1, svuint16_t, ++ z0 = svmul_n_u16_x (p0, z0, 2), ++ z0 = svmul_x (p0, z0, 2)) ++ ++/* ++** mul_2_u16_x_untied: ++** movprfx z0, z1 ++** mul z0\.h, z0\.h, #2 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_u16_x_untied, svuint16_t, ++ z0 = svmul_n_u16_x (p0, z1, 2), ++ z0 = svmul_x (p0, z1, 2)) ++ ++/* ++** mul_127_u16_x: ++** mul z0\.h, z0\.h, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_127_u16_x, svuint16_t, ++ z0 = svmul_n_u16_x (p0, z0, 127), ++ z0 = svmul_x (p0, z0, 127)) ++ ++/* ++** mul_128_u16_x: ++** mov (z[0-9]+\.h), #128 ++** mul z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_128_u16_x, svuint16_t, ++ z0 = svmul_n_u16_x (p0, z0, 128), ++ z0 = svmul_x (p0, z0, 128)) ++ ++/* ++** mul_255_u16_x: ++** mov (z[0-9]+\.h), #255 ++** mul z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_255_u16_x, svuint16_t, ++ z0 = svmul_n_u16_x (p0, z0, 255), ++ z0 = svmul_x (p0, z0, 255)) ++ ++/* ++** mul_m1_u16_x: ++** mul z0\.h, z0\.h, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_m1_u16_x, svuint16_t, ++ z0 = svmul_n_u16_x (p0, z0, -1), ++ z0 = svmul_x (p0, z0, -1)) ++ ++/* ++** mul_m127_u16_x: ++** mul z0\.h, z0\.h, #-127 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_m127_u16_x, svuint16_t, ++ z0 = svmul_n_u16_x (p0, z0, -127), ++ z0 = svmul_x (p0, z0, -127)) ++ ++/* ++** mul_m128_u16_x: ++** mul z0\.h, z0\.h, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_m128_u16_x, svuint16_t, ++ z0 = svmul_n_u16_x (p0, z0, -128), ++ z0 = svmul_x (p0, z0, -128)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u32.c +new file mode 100644 +index 000000000..288d17b16 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u32.c +@@ -0,0 +1,302 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mul_u32_m_tied1: ++** mul z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mul_u32_m_tied1, svuint32_t, ++ z0 = svmul_u32_m (p0, z0, z1), ++ z0 = svmul_m (p0, z0, z1)) ++ ++/* ++** mul_u32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** mul z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mul_u32_m_tied2, svuint32_t, ++ z0 = svmul_u32_m (p0, z1, z0), ++ z0 = svmul_m (p0, z1, z0)) ++ ++/* ++** mul_u32_m_untied: ++** movprfx z0, z1 ++** mul z0\.s, p0/m, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mul_u32_m_untied, svuint32_t, ++ z0 = svmul_u32_m (p0, z1, z2), ++ z0 = svmul_m (p0, z1, z2)) ++ ++/* ++** mul_w0_u32_m_tied1: ++** mov (z[0-9]+\.s), w0 ++** mul z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mul_w0_u32_m_tied1, svuint32_t, uint32_t, ++ z0 = svmul_n_u32_m (p0, z0, x0), ++ z0 = svmul_m (p0, z0, x0)) ++ ++/* ++** mul_w0_u32_m_untied: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0, z1 ++** mul z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mul_w0_u32_m_untied, svuint32_t, uint32_t, ++ z0 = svmul_n_u32_m (p0, z1, x0), ++ z0 = svmul_m (p0, z1, x0)) ++ ++/* ++** mul_2_u32_m_tied1: ++** mov (z[0-9]+\.s), #2 ++** mul z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_u32_m_tied1, svuint32_t, ++ z0 = svmul_n_u32_m (p0, z0, 2), ++ z0 = svmul_m (p0, z0, 2)) ++ ++/* ++** mul_2_u32_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.s), #2 ++** movprfx z0, z1 ++** mul z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_u32_m_untied, svuint32_t, ++ z0 = svmul_n_u32_m (p0, z1, 2), ++ z0 = svmul_m (p0, z1, 2)) ++ ++/* ++** mul_m1_u32_m: ++** mov (z[0-9]+)\.b, #-1 ++** mul z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mul_m1_u32_m, svuint32_t, ++ z0 = svmul_n_u32_m (p0, z0, -1), ++ z0 = svmul_m (p0, z0, -1)) ++ ++/* ++** mul_u32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** mul z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mul_u32_z_tied1, svuint32_t, ++ z0 = svmul_u32_z (p0, z0, z1), ++ z0 = svmul_z (p0, z0, z1)) ++ ++/* ++** mul_u32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** mul z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mul_u32_z_tied2, svuint32_t, ++ z0 = svmul_u32_z (p0, z1, z0), ++ z0 = svmul_z (p0, z1, z0)) ++ ++/* ++** mul_u32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** mul z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** mul z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_u32_z_untied, svuint32_t, ++ z0 = svmul_u32_z (p0, z1, z2), ++ z0 = svmul_z (p0, z1, z2)) ++ ++/* ++** mul_w0_u32_z_tied1: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0\.s, p0/z, z0\.s ++** mul z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mul_w0_u32_z_tied1, svuint32_t, uint32_t, ++ z0 = svmul_n_u32_z (p0, z0, x0), ++ z0 = svmul_z (p0, z0, x0)) ++ ++/* ++** mul_w0_u32_z_untied: ++** mov (z[0-9]+\.s), w0 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** mul z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** mul z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (mul_w0_u32_z_untied, svuint32_t, uint32_t, ++ z0 = svmul_n_u32_z (p0, z1, x0), ++ z0 = svmul_z (p0, z1, x0)) ++ ++/* ++** mul_2_u32_z_tied1: ++** mov (z[0-9]+\.s), #2 ++** movprfx z0\.s, p0/z, z0\.s ++** mul z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_u32_z_tied1, svuint32_t, ++ z0 = svmul_n_u32_z (p0, z0, 2), ++ z0 = svmul_z (p0, z0, 2)) ++ ++/* ++** mul_2_u32_z_untied: ++** mov (z[0-9]+\.s), #2 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** mul z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** mul z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_u32_z_untied, svuint32_t, ++ z0 = svmul_n_u32_z (p0, z1, 2), ++ z0 = svmul_z (p0, z1, 2)) ++ ++/* ++** mul_u32_x_tied1: ++** mul z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mul_u32_x_tied1, svuint32_t, ++ z0 = svmul_u32_x (p0, z0, z1), ++ z0 = svmul_x (p0, z0, z1)) ++ ++/* ++** mul_u32_x_tied2: ++** mul z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mul_u32_x_tied2, svuint32_t, ++ z0 = svmul_u32_x (p0, z1, z0), ++ z0 = svmul_x (p0, z1, z0)) ++ ++/* ++** mul_u32_x_untied: ++** ( ++** movprfx z0, z1 ++** mul z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0, z2 ++** mul z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_u32_x_untied, svuint32_t, ++ z0 = svmul_u32_x (p0, z1, z2), ++ z0 = svmul_x (p0, z1, z2)) ++ ++/* ++** mul_w0_u32_x_tied1: ++** mov (z[0-9]+\.s), w0 ++** mul z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mul_w0_u32_x_tied1, svuint32_t, uint32_t, ++ z0 = svmul_n_u32_x (p0, z0, x0), ++ z0 = svmul_x (p0, z0, x0)) ++ ++/* ++** mul_w0_u32_x_untied: ++** mov z0\.s, w0 ++** mul z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZX (mul_w0_u32_x_untied, svuint32_t, uint32_t, ++ z0 = svmul_n_u32_x (p0, z1, x0), ++ z0 = svmul_x (p0, z1, x0)) ++ ++/* ++** mul_2_u32_x_tied1: ++** mul z0\.s, z0\.s, #2 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_u32_x_tied1, svuint32_t, ++ z0 = svmul_n_u32_x (p0, z0, 2), ++ z0 = svmul_x (p0, z0, 2)) ++ ++/* ++** mul_2_u32_x_untied: ++** movprfx z0, z1 ++** mul z0\.s, z0\.s, #2 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_u32_x_untied, svuint32_t, ++ z0 = svmul_n_u32_x (p0, z1, 2), ++ z0 = svmul_x (p0, z1, 2)) ++ ++/* ++** mul_127_u32_x: ++** mul z0\.s, z0\.s, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_127_u32_x, svuint32_t, ++ z0 = svmul_n_u32_x (p0, z0, 127), ++ z0 = svmul_x (p0, z0, 127)) ++ ++/* ++** mul_128_u32_x: ++** mov (z[0-9]+\.s), #128 ++** mul z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_128_u32_x, svuint32_t, ++ z0 = svmul_n_u32_x (p0, z0, 128), ++ z0 = svmul_x (p0, z0, 128)) ++ ++/* ++** mul_255_u32_x: ++** mov (z[0-9]+\.s), #255 ++** mul z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_255_u32_x, svuint32_t, ++ z0 = svmul_n_u32_x (p0, z0, 255), ++ z0 = svmul_x (p0, z0, 255)) ++ ++/* ++** mul_m1_u32_x: ++** mul z0\.s, z0\.s, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_m1_u32_x, svuint32_t, ++ z0 = svmul_n_u32_x (p0, z0, -1), ++ z0 = svmul_x (p0, z0, -1)) ++ ++/* ++** mul_m127_u32_x: ++** mul z0\.s, z0\.s, #-127 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_m127_u32_x, svuint32_t, ++ z0 = svmul_n_u32_x (p0, z0, -127), ++ z0 = svmul_x (p0, z0, -127)) ++ ++/* ++** mul_m128_u32_x: ++** mul z0\.s, z0\.s, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_m128_u32_x, svuint32_t, ++ z0 = svmul_n_u32_x (p0, z0, -128), ++ z0 = svmul_x (p0, z0, -128)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u64.c +new file mode 100644 +index 000000000..f6959dbc7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u64.c +@@ -0,0 +1,302 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mul_u64_m_tied1: ++** mul z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mul_u64_m_tied1, svuint64_t, ++ z0 = svmul_u64_m (p0, z0, z1), ++ z0 = svmul_m (p0, z0, z1)) ++ ++/* ++** mul_u64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** mul z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_u64_m_tied2, svuint64_t, ++ z0 = svmul_u64_m (p0, z1, z0), ++ z0 = svmul_m (p0, z1, z0)) ++ ++/* ++** mul_u64_m_untied: ++** movprfx z0, z1 ++** mul z0\.d, p0/m, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mul_u64_m_untied, svuint64_t, ++ z0 = svmul_u64_m (p0, z1, z2), ++ z0 = svmul_m (p0, z1, z2)) ++ ++/* ++** mul_x0_u64_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** mul z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mul_x0_u64_m_tied1, svuint64_t, uint64_t, ++ z0 = svmul_n_u64_m (p0, z0, x0), ++ z0 = svmul_m (p0, z0, x0)) ++ ++/* ++** mul_x0_u64_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** mul z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mul_x0_u64_m_untied, svuint64_t, uint64_t, ++ z0 = svmul_n_u64_m (p0, z1, x0), ++ z0 = svmul_m (p0, z1, x0)) ++ ++/* ++** mul_2_u64_m_tied1: ++** mov (z[0-9]+\.d), #2 ++** mul z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_u64_m_tied1, svuint64_t, ++ z0 = svmul_n_u64_m (p0, z0, 2), ++ z0 = svmul_m (p0, z0, 2)) ++ ++/* ++** mul_2_u64_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), #2 ++** movprfx z0, z1 ++** mul z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_u64_m_untied, svuint64_t, ++ z0 = svmul_n_u64_m (p0, z1, 2), ++ z0 = svmul_m (p0, z1, 2)) ++ ++/* ++** mul_m1_u64_m: ++** mov (z[0-9]+)\.b, #-1 ++** mul z0\.d, p0/m, z0\.d, \1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mul_m1_u64_m, svuint64_t, ++ z0 = svmul_n_u64_m (p0, z0, -1), ++ z0 = svmul_m (p0, z0, -1)) ++ ++/* ++** mul_u64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** mul z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mul_u64_z_tied1, svuint64_t, ++ z0 = svmul_u64_z (p0, z0, z1), ++ z0 = svmul_z (p0, z0, z1)) ++ ++/* ++** mul_u64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** mul z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mul_u64_z_tied2, svuint64_t, ++ z0 = svmul_u64_z (p0, z1, z0), ++ z0 = svmul_z (p0, z1, z0)) ++ ++/* ++** mul_u64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** mul z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** mul z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_u64_z_untied, svuint64_t, ++ z0 = svmul_u64_z (p0, z1, z2), ++ z0 = svmul_z (p0, z1, z2)) ++ ++/* ++** mul_x0_u64_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.d, p0/z, z0\.d ++** mul z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mul_x0_u64_z_tied1, svuint64_t, uint64_t, ++ z0 = svmul_n_u64_z (p0, z0, x0), ++ z0 = svmul_z (p0, z0, x0)) ++ ++/* ++** mul_x0_u64_z_untied: ++** mov (z[0-9]+\.d), x0 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** mul z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** mul z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (mul_x0_u64_z_untied, svuint64_t, uint64_t, ++ z0 = svmul_n_u64_z (p0, z1, x0), ++ z0 = svmul_z (p0, z1, x0)) ++ ++/* ++** mul_2_u64_z_tied1: ++** mov (z[0-9]+\.d), #2 ++** movprfx z0\.d, p0/z, z0\.d ++** mul z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_u64_z_tied1, svuint64_t, ++ z0 = svmul_n_u64_z (p0, z0, 2), ++ z0 = svmul_z (p0, z0, 2)) ++ ++/* ++** mul_2_u64_z_untied: ++** mov (z[0-9]+\.d), #2 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** mul z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** mul z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_u64_z_untied, svuint64_t, ++ z0 = svmul_n_u64_z (p0, z1, 2), ++ z0 = svmul_z (p0, z1, 2)) ++ ++/* ++** mul_u64_x_tied1: ++** mul z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mul_u64_x_tied1, svuint64_t, ++ z0 = svmul_u64_x (p0, z0, z1), ++ z0 = svmul_x (p0, z0, z1)) ++ ++/* ++** mul_u64_x_tied2: ++** mul z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mul_u64_x_tied2, svuint64_t, ++ z0 = svmul_u64_x (p0, z1, z0), ++ z0 = svmul_x (p0, z1, z0)) ++ ++/* ++** mul_u64_x_untied: ++** ( ++** movprfx z0, z1 ++** mul z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0, z2 ++** mul z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_u64_x_untied, svuint64_t, ++ z0 = svmul_u64_x (p0, z1, z2), ++ z0 = svmul_x (p0, z1, z2)) ++ ++/* ++** mul_x0_u64_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** mul z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mul_x0_u64_x_tied1, svuint64_t, uint64_t, ++ z0 = svmul_n_u64_x (p0, z0, x0), ++ z0 = svmul_x (p0, z0, x0)) ++ ++/* ++** mul_x0_u64_x_untied: ++** mov z0\.d, x0 ++** mul z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZX (mul_x0_u64_x_untied, svuint64_t, uint64_t, ++ z0 = svmul_n_u64_x (p0, z1, x0), ++ z0 = svmul_x (p0, z1, x0)) ++ ++/* ++** mul_2_u64_x_tied1: ++** mul z0\.d, z0\.d, #2 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_u64_x_tied1, svuint64_t, ++ z0 = svmul_n_u64_x (p0, z0, 2), ++ z0 = svmul_x (p0, z0, 2)) ++ ++/* ++** mul_2_u64_x_untied: ++** movprfx z0, z1 ++** mul z0\.d, z0\.d, #2 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_u64_x_untied, svuint64_t, ++ z0 = svmul_n_u64_x (p0, z1, 2), ++ z0 = svmul_x (p0, z1, 2)) ++ ++/* ++** mul_127_u64_x: ++** mul z0\.d, z0\.d, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_127_u64_x, svuint64_t, ++ z0 = svmul_n_u64_x (p0, z0, 127), ++ z0 = svmul_x (p0, z0, 127)) ++ ++/* ++** mul_128_u64_x: ++** mov (z[0-9]+\.d), #128 ++** mul z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_128_u64_x, svuint64_t, ++ z0 = svmul_n_u64_x (p0, z0, 128), ++ z0 = svmul_x (p0, z0, 128)) ++ ++/* ++** mul_255_u64_x: ++** mov (z[0-9]+\.d), #255 ++** mul z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_255_u64_x, svuint64_t, ++ z0 = svmul_n_u64_x (p0, z0, 255), ++ z0 = svmul_x (p0, z0, 255)) ++ ++/* ++** mul_m1_u64_x: ++** mul z0\.d, z0\.d, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_m1_u64_x, svuint64_t, ++ z0 = svmul_n_u64_x (p0, z0, -1), ++ z0 = svmul_x (p0, z0, -1)) ++ ++/* ++** mul_m127_u64_x: ++** mul z0\.d, z0\.d, #-127 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_m127_u64_x, svuint64_t, ++ z0 = svmul_n_u64_x (p0, z0, -127), ++ z0 = svmul_x (p0, z0, -127)) ++ ++/* ++** mul_m128_u64_x: ++** mul z0\.d, z0\.d, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_m128_u64_x, svuint64_t, ++ z0 = svmul_n_u64_x (p0, z0, -128), ++ z0 = svmul_x (p0, z0, -128)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u8.c +new file mode 100644 +index 000000000..b2745a48f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u8.c +@@ -0,0 +1,300 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mul_u8_m_tied1: ++** mul z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mul_u8_m_tied1, svuint8_t, ++ z0 = svmul_u8_m (p0, z0, z1), ++ z0 = svmul_m (p0, z0, z1)) ++ ++/* ++** mul_u8_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** mul z0\.b, p0/m, z0\.b, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mul_u8_m_tied2, svuint8_t, ++ z0 = svmul_u8_m (p0, z1, z0), ++ z0 = svmul_m (p0, z1, z0)) ++ ++/* ++** mul_u8_m_untied: ++** movprfx z0, z1 ++** mul z0\.b, p0/m, z0\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mul_u8_m_untied, svuint8_t, ++ z0 = svmul_u8_m (p0, z1, z2), ++ z0 = svmul_m (p0, z1, z2)) ++ ++/* ++** mul_w0_u8_m_tied1: ++** mov (z[0-9]+\.b), w0 ++** mul z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mul_w0_u8_m_tied1, svuint8_t, uint8_t, ++ z0 = svmul_n_u8_m (p0, z0, x0), ++ z0 = svmul_m (p0, z0, x0)) ++ ++/* ++** mul_w0_u8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), w0 ++** movprfx z0, z1 ++** mul z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mul_w0_u8_m_untied, svuint8_t, uint8_t, ++ z0 = svmul_n_u8_m (p0, z1, x0), ++ z0 = svmul_m (p0, z1, x0)) ++ ++/* ++** mul_2_u8_m_tied1: ++** mov (z[0-9]+\.b), #2 ++** mul z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_u8_m_tied1, svuint8_t, ++ z0 = svmul_n_u8_m (p0, z0, 2), ++ z0 = svmul_m (p0, z0, 2)) ++ ++/* ++** mul_2_u8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), #2 ++** movprfx z0, z1 ++** mul z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_u8_m_untied, svuint8_t, ++ z0 = svmul_n_u8_m (p0, z1, 2), ++ z0 = svmul_m (p0, z1, 2)) ++ ++/* ++** mul_m1_u8_m: ++** mov (z[0-9]+\.b), #-1 ++** mul z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_m1_u8_m, svuint8_t, ++ z0 = svmul_n_u8_m (p0, z0, -1), ++ z0 = svmul_m (p0, z0, -1)) ++ ++/* ++** mul_u8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** mul z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mul_u8_z_tied1, svuint8_t, ++ z0 = svmul_u8_z (p0, z0, z1), ++ z0 = svmul_z (p0, z0, z1)) ++ ++/* ++** mul_u8_z_tied2: ++** movprfx z0\.b, p0/z, z0\.b ++** mul z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mul_u8_z_tied2, svuint8_t, ++ z0 = svmul_u8_z (p0, z1, z0), ++ z0 = svmul_z (p0, z1, z0)) ++ ++/* ++** mul_u8_z_untied: ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** mul z0\.b, p0/m, z0\.b, z2\.b ++** | ++** movprfx z0\.b, p0/z, z2\.b ++** mul z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_u8_z_untied, svuint8_t, ++ z0 = svmul_u8_z (p0, z1, z2), ++ z0 = svmul_z (p0, z1, z2)) ++ ++/* ++** mul_w0_u8_z_tied1: ++** mov (z[0-9]+\.b), w0 ++** movprfx z0\.b, p0/z, z0\.b ++** mul z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mul_w0_u8_z_tied1, svuint8_t, uint8_t, ++ z0 = svmul_n_u8_z (p0, z0, x0), ++ z0 = svmul_z (p0, z0, x0)) ++ ++/* ++** mul_w0_u8_z_untied: ++** mov (z[0-9]+\.b), w0 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** mul z0\.b, p0/m, z0\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** mul z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (mul_w0_u8_z_untied, svuint8_t, uint8_t, ++ z0 = svmul_n_u8_z (p0, z1, x0), ++ z0 = svmul_z (p0, z1, x0)) ++ ++/* ++** mul_2_u8_z_tied1: ++** mov (z[0-9]+\.b), #2 ++** movprfx z0\.b, p0/z, z0\.b ++** mul z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_u8_z_tied1, svuint8_t, ++ z0 = svmul_n_u8_z (p0, z0, 2), ++ z0 = svmul_z (p0, z0, 2)) ++ ++/* ++** mul_2_u8_z_untied: ++** mov (z[0-9]+\.b), #2 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** mul z0\.b, p0/m, z0\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** mul z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_u8_z_untied, svuint8_t, ++ z0 = svmul_n_u8_z (p0, z1, 2), ++ z0 = svmul_z (p0, z1, 2)) ++ ++/* ++** mul_u8_x_tied1: ++** mul z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mul_u8_x_tied1, svuint8_t, ++ z0 = svmul_u8_x (p0, z0, z1), ++ z0 = svmul_x (p0, z0, z1)) ++ ++/* ++** mul_u8_x_tied2: ++** mul z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mul_u8_x_tied2, svuint8_t, ++ z0 = svmul_u8_x (p0, z1, z0), ++ z0 = svmul_x (p0, z1, z0)) ++ ++/* ++** mul_u8_x_untied: ++** ( ++** movprfx z0, z1 ++** mul z0\.b, p0/m, z0\.b, z2\.b ++** | ++** movprfx z0, z2 ++** mul z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mul_u8_x_untied, svuint8_t, ++ z0 = svmul_u8_x (p0, z1, z2), ++ z0 = svmul_x (p0, z1, z2)) ++ ++/* ++** mul_w0_u8_x_tied1: ++** mov (z[0-9]+\.b), w0 ++** mul z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mul_w0_u8_x_tied1, svuint8_t, uint8_t, ++ z0 = svmul_n_u8_x (p0, z0, x0), ++ z0 = svmul_x (p0, z0, x0)) ++ ++/* ++** mul_w0_u8_x_untied: ++** mov z0\.b, w0 ++** mul z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_ZX (mul_w0_u8_x_untied, svuint8_t, uint8_t, ++ z0 = svmul_n_u8_x (p0, z1, x0), ++ z0 = svmul_x (p0, z1, x0)) ++ ++/* ++** mul_2_u8_x_tied1: ++** mul z0\.b, z0\.b, #2 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_u8_x_tied1, svuint8_t, ++ z0 = svmul_n_u8_x (p0, z0, 2), ++ z0 = svmul_x (p0, z0, 2)) ++ ++/* ++** mul_2_u8_x_untied: ++** movprfx z0, z1 ++** mul z0\.b, z0\.b, #2 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_2_u8_x_untied, svuint8_t, ++ z0 = svmul_n_u8_x (p0, z1, 2), ++ z0 = svmul_x (p0, z1, 2)) ++ ++/* ++** mul_127_u8_x: ++** mul z0\.b, z0\.b, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_127_u8_x, svuint8_t, ++ z0 = svmul_n_u8_x (p0, z0, 127), ++ z0 = svmul_x (p0, z0, 127)) ++ ++/* ++** mul_128_u8_x: ++** mul z0\.b, z0\.b, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_128_u8_x, svuint8_t, ++ z0 = svmul_n_u8_x (p0, z0, 128), ++ z0 = svmul_x (p0, z0, 128)) ++ ++/* ++** mul_255_u8_x: ++** mul z0\.b, z0\.b, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_255_u8_x, svuint8_t, ++ z0 = svmul_n_u8_x (p0, z0, 255), ++ z0 = svmul_x (p0, z0, 255)) ++ ++/* ++** mul_m1_u8_x: ++** mul z0\.b, z0\.b, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_m1_u8_x, svuint8_t, ++ z0 = svmul_n_u8_x (p0, z0, -1), ++ z0 = svmul_x (p0, z0, -1)) ++ ++/* ++** mul_m127_u8_x: ++** mul z0\.b, z0\.b, #-127 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_m127_u8_x, svuint8_t, ++ z0 = svmul_n_u8_x (p0, z0, -127), ++ z0 = svmul_x (p0, z0, -127)) ++ ++/* ++** mul_m128_u8_x: ++** mul z0\.b, z0\.b, #-128 ++** ret ++*/ ++TEST_UNIFORM_Z (mul_m128_u8_x, svuint8_t, ++ z0 = svmul_n_u8_x (p0, z0, -128), ++ z0 = svmul_x (p0, z0, -128)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulh_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulh_s16.c +new file mode 100644 +index 000000000..a81532f5d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulh_s16.c +@@ -0,0 +1,237 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mulh_s16_m_tied1: ++** smulh z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_s16_m_tied1, svint16_t, ++ z0 = svmulh_s16_m (p0, z0, z1), ++ z0 = svmulh_m (p0, z0, z1)) ++ ++/* ++** mulh_s16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** smulh z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_s16_m_tied2, svint16_t, ++ z0 = svmulh_s16_m (p0, z1, z0), ++ z0 = svmulh_m (p0, z1, z0)) ++ ++/* ++** mulh_s16_m_untied: ++** movprfx z0, z1 ++** smulh z0\.h, p0/m, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_s16_m_untied, svint16_t, ++ z0 = svmulh_s16_m (p0, z1, z2), ++ z0 = svmulh_m (p0, z1, z2)) ++ ++/* ++** mulh_w0_s16_m_tied1: ++** mov (z[0-9]+\.h), w0 ++** smulh z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mulh_w0_s16_m_tied1, svint16_t, int16_t, ++ z0 = svmulh_n_s16_m (p0, z0, x0), ++ z0 = svmulh_m (p0, z0, x0)) ++ ++/* ++** mulh_w0_s16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), w0 ++** movprfx z0, z1 ++** smulh z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mulh_w0_s16_m_untied, svint16_t, int16_t, ++ z0 = svmulh_n_s16_m (p0, z1, x0), ++ z0 = svmulh_m (p0, z1, x0)) ++ ++/* ++** mulh_11_s16_m_tied1: ++** mov (z[0-9]+\.h), #11 ++** smulh z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_11_s16_m_tied1, svint16_t, ++ z0 = svmulh_n_s16_m (p0, z0, 11), ++ z0 = svmulh_m (p0, z0, 11)) ++ ++/* ++** mulh_11_s16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), #11 ++** movprfx z0, z1 ++** smulh z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_11_s16_m_untied, svint16_t, ++ z0 = svmulh_n_s16_m (p0, z1, 11), ++ z0 = svmulh_m (p0, z1, 11)) ++ ++/* ++** mulh_s16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** smulh z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_s16_z_tied1, svint16_t, ++ z0 = svmulh_s16_z (p0, z0, z1), ++ z0 = svmulh_z (p0, z0, z1)) ++ ++/* ++** mulh_s16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** smulh z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_s16_z_tied2, svint16_t, ++ z0 = svmulh_s16_z (p0, z1, z0), ++ z0 = svmulh_z (p0, z1, z0)) ++ ++/* ++** mulh_s16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** smulh z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** smulh z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_s16_z_untied, svint16_t, ++ z0 = svmulh_s16_z (p0, z1, z2), ++ z0 = svmulh_z (p0, z1, z2)) ++ ++/* ++** mulh_w0_s16_z_tied1: ++** mov (z[0-9]+\.h), w0 ++** movprfx z0\.h, p0/z, z0\.h ++** smulh z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mulh_w0_s16_z_tied1, svint16_t, int16_t, ++ z0 = svmulh_n_s16_z (p0, z0, x0), ++ z0 = svmulh_z (p0, z0, x0)) ++ ++/* ++** mulh_w0_s16_z_untied: ++** mov (z[0-9]+\.h), w0 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** smulh z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** smulh z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (mulh_w0_s16_z_untied, svint16_t, int16_t, ++ z0 = svmulh_n_s16_z (p0, z1, x0), ++ z0 = svmulh_z (p0, z1, x0)) ++ ++/* ++** mulh_11_s16_z_tied1: ++** mov (z[0-9]+\.h), #11 ++** movprfx z0\.h, p0/z, z0\.h ++** smulh z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_11_s16_z_tied1, svint16_t, ++ z0 = svmulh_n_s16_z (p0, z0, 11), ++ z0 = svmulh_z (p0, z0, 11)) ++ ++/* ++** mulh_11_s16_z_untied: ++** mov (z[0-9]+\.h), #11 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** smulh z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** smulh z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_11_s16_z_untied, svint16_t, ++ z0 = svmulh_n_s16_z (p0, z1, 11), ++ z0 = svmulh_z (p0, z1, 11)) ++ ++/* ++** mulh_s16_x_tied1: ++** smulh z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_s16_x_tied1, svint16_t, ++ z0 = svmulh_s16_x (p0, z0, z1), ++ z0 = svmulh_x (p0, z0, z1)) ++ ++/* ++** mulh_s16_x_tied2: ++** smulh z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_s16_x_tied2, svint16_t, ++ z0 = svmulh_s16_x (p0, z1, z0), ++ z0 = svmulh_x (p0, z1, z0)) ++ ++/* ++** mulh_s16_x_untied: ++** ( ++** movprfx z0, z1 ++** smulh z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0, z2 ++** smulh z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_s16_x_untied, svint16_t, ++ z0 = svmulh_s16_x (p0, z1, z2), ++ z0 = svmulh_x (p0, z1, z2)) ++ ++/* ++** mulh_w0_s16_x_tied1: ++** mov (z[0-9]+\.h), w0 ++** smulh z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mulh_w0_s16_x_tied1, svint16_t, int16_t, ++ z0 = svmulh_n_s16_x (p0, z0, x0), ++ z0 = svmulh_x (p0, z0, x0)) ++ ++/* ++** mulh_w0_s16_x_untied: ++** mov z0\.h, w0 ++** smulh z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZX (mulh_w0_s16_x_untied, svint16_t, int16_t, ++ z0 = svmulh_n_s16_x (p0, z1, x0), ++ z0 = svmulh_x (p0, z1, x0)) ++ ++/* ++** mulh_11_s16_x_tied1: ++** mov (z[0-9]+\.h), #11 ++** smulh z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_11_s16_x_tied1, svint16_t, ++ z0 = svmulh_n_s16_x (p0, z0, 11), ++ z0 = svmulh_x (p0, z0, 11)) ++ ++/* ++** mulh_11_s16_x_untied: ++** mov z0\.h, #11 ++** smulh z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_11_s16_x_untied, svint16_t, ++ z0 = svmulh_n_s16_x (p0, z1, 11), ++ z0 = svmulh_x (p0, z1, 11)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulh_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulh_s32.c +new file mode 100644 +index 000000000..078feeb6a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulh_s32.c +@@ -0,0 +1,237 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mulh_s32_m_tied1: ++** smulh z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_s32_m_tied1, svint32_t, ++ z0 = svmulh_s32_m (p0, z0, z1), ++ z0 = svmulh_m (p0, z0, z1)) ++ ++/* ++** mulh_s32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** smulh z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_s32_m_tied2, svint32_t, ++ z0 = svmulh_s32_m (p0, z1, z0), ++ z0 = svmulh_m (p0, z1, z0)) ++ ++/* ++** mulh_s32_m_untied: ++** movprfx z0, z1 ++** smulh z0\.s, p0/m, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_s32_m_untied, svint32_t, ++ z0 = svmulh_s32_m (p0, z1, z2), ++ z0 = svmulh_m (p0, z1, z2)) ++ ++/* ++** mulh_w0_s32_m_tied1: ++** mov (z[0-9]+\.s), w0 ++** smulh z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mulh_w0_s32_m_tied1, svint32_t, int32_t, ++ z0 = svmulh_n_s32_m (p0, z0, x0), ++ z0 = svmulh_m (p0, z0, x0)) ++ ++/* ++** mulh_w0_s32_m_untied: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0, z1 ++** smulh z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mulh_w0_s32_m_untied, svint32_t, int32_t, ++ z0 = svmulh_n_s32_m (p0, z1, x0), ++ z0 = svmulh_m (p0, z1, x0)) ++ ++/* ++** mulh_11_s32_m_tied1: ++** mov (z[0-9]+\.s), #11 ++** smulh z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_11_s32_m_tied1, svint32_t, ++ z0 = svmulh_n_s32_m (p0, z0, 11), ++ z0 = svmulh_m (p0, z0, 11)) ++ ++/* ++** mulh_11_s32_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.s), #11 ++** movprfx z0, z1 ++** smulh z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_11_s32_m_untied, svint32_t, ++ z0 = svmulh_n_s32_m (p0, z1, 11), ++ z0 = svmulh_m (p0, z1, 11)) ++ ++/* ++** mulh_s32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** smulh z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_s32_z_tied1, svint32_t, ++ z0 = svmulh_s32_z (p0, z0, z1), ++ z0 = svmulh_z (p0, z0, z1)) ++ ++/* ++** mulh_s32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** smulh z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_s32_z_tied2, svint32_t, ++ z0 = svmulh_s32_z (p0, z1, z0), ++ z0 = svmulh_z (p0, z1, z0)) ++ ++/* ++** mulh_s32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** smulh z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** smulh z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_s32_z_untied, svint32_t, ++ z0 = svmulh_s32_z (p0, z1, z2), ++ z0 = svmulh_z (p0, z1, z2)) ++ ++/* ++** mulh_w0_s32_z_tied1: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0\.s, p0/z, z0\.s ++** smulh z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mulh_w0_s32_z_tied1, svint32_t, int32_t, ++ z0 = svmulh_n_s32_z (p0, z0, x0), ++ z0 = svmulh_z (p0, z0, x0)) ++ ++/* ++** mulh_w0_s32_z_untied: ++** mov (z[0-9]+\.s), w0 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** smulh z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** smulh z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (mulh_w0_s32_z_untied, svint32_t, int32_t, ++ z0 = svmulh_n_s32_z (p0, z1, x0), ++ z0 = svmulh_z (p0, z1, x0)) ++ ++/* ++** mulh_11_s32_z_tied1: ++** mov (z[0-9]+\.s), #11 ++** movprfx z0\.s, p0/z, z0\.s ++** smulh z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_11_s32_z_tied1, svint32_t, ++ z0 = svmulh_n_s32_z (p0, z0, 11), ++ z0 = svmulh_z (p0, z0, 11)) ++ ++/* ++** mulh_11_s32_z_untied: ++** mov (z[0-9]+\.s), #11 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** smulh z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** smulh z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_11_s32_z_untied, svint32_t, ++ z0 = svmulh_n_s32_z (p0, z1, 11), ++ z0 = svmulh_z (p0, z1, 11)) ++ ++/* ++** mulh_s32_x_tied1: ++** smulh z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_s32_x_tied1, svint32_t, ++ z0 = svmulh_s32_x (p0, z0, z1), ++ z0 = svmulh_x (p0, z0, z1)) ++ ++/* ++** mulh_s32_x_tied2: ++** smulh z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_s32_x_tied2, svint32_t, ++ z0 = svmulh_s32_x (p0, z1, z0), ++ z0 = svmulh_x (p0, z1, z0)) ++ ++/* ++** mulh_s32_x_untied: ++** ( ++** movprfx z0, z1 ++** smulh z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0, z2 ++** smulh z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_s32_x_untied, svint32_t, ++ z0 = svmulh_s32_x (p0, z1, z2), ++ z0 = svmulh_x (p0, z1, z2)) ++ ++/* ++** mulh_w0_s32_x_tied1: ++** mov (z[0-9]+\.s), w0 ++** smulh z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mulh_w0_s32_x_tied1, svint32_t, int32_t, ++ z0 = svmulh_n_s32_x (p0, z0, x0), ++ z0 = svmulh_x (p0, z0, x0)) ++ ++/* ++** mulh_w0_s32_x_untied: ++** mov z0\.s, w0 ++** smulh z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZX (mulh_w0_s32_x_untied, svint32_t, int32_t, ++ z0 = svmulh_n_s32_x (p0, z1, x0), ++ z0 = svmulh_x (p0, z1, x0)) ++ ++/* ++** mulh_11_s32_x_tied1: ++** mov (z[0-9]+\.s), #11 ++** smulh z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_11_s32_x_tied1, svint32_t, ++ z0 = svmulh_n_s32_x (p0, z0, 11), ++ z0 = svmulh_x (p0, z0, 11)) ++ ++/* ++** mulh_11_s32_x_untied: ++** mov z0\.s, #11 ++** smulh z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_11_s32_x_untied, svint32_t, ++ z0 = svmulh_n_s32_x (p0, z1, 11), ++ z0 = svmulh_x (p0, z1, 11)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulh_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulh_s64.c +new file mode 100644 +index 000000000..a87d4d5ce +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulh_s64.c +@@ -0,0 +1,237 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mulh_s64_m_tied1: ++** smulh z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_s64_m_tied1, svint64_t, ++ z0 = svmulh_s64_m (p0, z0, z1), ++ z0 = svmulh_m (p0, z0, z1)) ++ ++/* ++** mulh_s64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** smulh z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_s64_m_tied2, svint64_t, ++ z0 = svmulh_s64_m (p0, z1, z0), ++ z0 = svmulh_m (p0, z1, z0)) ++ ++/* ++** mulh_s64_m_untied: ++** movprfx z0, z1 ++** smulh z0\.d, p0/m, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_s64_m_untied, svint64_t, ++ z0 = svmulh_s64_m (p0, z1, z2), ++ z0 = svmulh_m (p0, z1, z2)) ++ ++/* ++** mulh_x0_s64_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** smulh z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mulh_x0_s64_m_tied1, svint64_t, int64_t, ++ z0 = svmulh_n_s64_m (p0, z0, x0), ++ z0 = svmulh_m (p0, z0, x0)) ++ ++/* ++** mulh_x0_s64_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** smulh z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mulh_x0_s64_m_untied, svint64_t, int64_t, ++ z0 = svmulh_n_s64_m (p0, z1, x0), ++ z0 = svmulh_m (p0, z1, x0)) ++ ++/* ++** mulh_11_s64_m_tied1: ++** mov (z[0-9]+\.d), #11 ++** smulh z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_11_s64_m_tied1, svint64_t, ++ z0 = svmulh_n_s64_m (p0, z0, 11), ++ z0 = svmulh_m (p0, z0, 11)) ++ ++/* ++** mulh_11_s64_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), #11 ++** movprfx z0, z1 ++** smulh z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_11_s64_m_untied, svint64_t, ++ z0 = svmulh_n_s64_m (p0, z1, 11), ++ z0 = svmulh_m (p0, z1, 11)) ++ ++/* ++** mulh_s64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** smulh z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_s64_z_tied1, svint64_t, ++ z0 = svmulh_s64_z (p0, z0, z1), ++ z0 = svmulh_z (p0, z0, z1)) ++ ++/* ++** mulh_s64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** smulh z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_s64_z_tied2, svint64_t, ++ z0 = svmulh_s64_z (p0, z1, z0), ++ z0 = svmulh_z (p0, z1, z0)) ++ ++/* ++** mulh_s64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** smulh z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** smulh z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_s64_z_untied, svint64_t, ++ z0 = svmulh_s64_z (p0, z1, z2), ++ z0 = svmulh_z (p0, z1, z2)) ++ ++/* ++** mulh_x0_s64_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.d, p0/z, z0\.d ++** smulh z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mulh_x0_s64_z_tied1, svint64_t, int64_t, ++ z0 = svmulh_n_s64_z (p0, z0, x0), ++ z0 = svmulh_z (p0, z0, x0)) ++ ++/* ++** mulh_x0_s64_z_untied: ++** mov (z[0-9]+\.d), x0 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** smulh z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** smulh z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (mulh_x0_s64_z_untied, svint64_t, int64_t, ++ z0 = svmulh_n_s64_z (p0, z1, x0), ++ z0 = svmulh_z (p0, z1, x0)) ++ ++/* ++** mulh_11_s64_z_tied1: ++** mov (z[0-9]+\.d), #11 ++** movprfx z0\.d, p0/z, z0\.d ++** smulh z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_11_s64_z_tied1, svint64_t, ++ z0 = svmulh_n_s64_z (p0, z0, 11), ++ z0 = svmulh_z (p0, z0, 11)) ++ ++/* ++** mulh_11_s64_z_untied: ++** mov (z[0-9]+\.d), #11 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** smulh z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** smulh z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_11_s64_z_untied, svint64_t, ++ z0 = svmulh_n_s64_z (p0, z1, 11), ++ z0 = svmulh_z (p0, z1, 11)) ++ ++/* ++** mulh_s64_x_tied1: ++** smulh z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_s64_x_tied1, svint64_t, ++ z0 = svmulh_s64_x (p0, z0, z1), ++ z0 = svmulh_x (p0, z0, z1)) ++ ++/* ++** mulh_s64_x_tied2: ++** smulh z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_s64_x_tied2, svint64_t, ++ z0 = svmulh_s64_x (p0, z1, z0), ++ z0 = svmulh_x (p0, z1, z0)) ++ ++/* ++** mulh_s64_x_untied: ++** ( ++** movprfx z0, z1 ++** smulh z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0, z2 ++** smulh z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_s64_x_untied, svint64_t, ++ z0 = svmulh_s64_x (p0, z1, z2), ++ z0 = svmulh_x (p0, z1, z2)) ++ ++/* ++** mulh_x0_s64_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** smulh z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mulh_x0_s64_x_tied1, svint64_t, int64_t, ++ z0 = svmulh_n_s64_x (p0, z0, x0), ++ z0 = svmulh_x (p0, z0, x0)) ++ ++/* ++** mulh_x0_s64_x_untied: ++** mov z0\.d, x0 ++** smulh z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZX (mulh_x0_s64_x_untied, svint64_t, int64_t, ++ z0 = svmulh_n_s64_x (p0, z1, x0), ++ z0 = svmulh_x (p0, z1, x0)) ++ ++/* ++** mulh_11_s64_x_tied1: ++** mov (z[0-9]+\.d), #11 ++** smulh z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_11_s64_x_tied1, svint64_t, ++ z0 = svmulh_n_s64_x (p0, z0, 11), ++ z0 = svmulh_x (p0, z0, 11)) ++ ++/* ++** mulh_11_s64_x_untied: ++** mov z0\.d, #11 ++** smulh z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_11_s64_x_untied, svint64_t, ++ z0 = svmulh_n_s64_x (p0, z1, 11), ++ z0 = svmulh_x (p0, z1, 11)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulh_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulh_s8.c +new file mode 100644 +index 000000000..f9cd01afd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulh_s8.c +@@ -0,0 +1,237 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mulh_s8_m_tied1: ++** smulh z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_s8_m_tied1, svint8_t, ++ z0 = svmulh_s8_m (p0, z0, z1), ++ z0 = svmulh_m (p0, z0, z1)) ++ ++/* ++** mulh_s8_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** smulh z0\.b, p0/m, z0\.b, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_s8_m_tied2, svint8_t, ++ z0 = svmulh_s8_m (p0, z1, z0), ++ z0 = svmulh_m (p0, z1, z0)) ++ ++/* ++** mulh_s8_m_untied: ++** movprfx z0, z1 ++** smulh z0\.b, p0/m, z0\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_s8_m_untied, svint8_t, ++ z0 = svmulh_s8_m (p0, z1, z2), ++ z0 = svmulh_m (p0, z1, z2)) ++ ++/* ++** mulh_w0_s8_m_tied1: ++** mov (z[0-9]+\.b), w0 ++** smulh z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mulh_w0_s8_m_tied1, svint8_t, int8_t, ++ z0 = svmulh_n_s8_m (p0, z0, x0), ++ z0 = svmulh_m (p0, z0, x0)) ++ ++/* ++** mulh_w0_s8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), w0 ++** movprfx z0, z1 ++** smulh z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mulh_w0_s8_m_untied, svint8_t, int8_t, ++ z0 = svmulh_n_s8_m (p0, z1, x0), ++ z0 = svmulh_m (p0, z1, x0)) ++ ++/* ++** mulh_11_s8_m_tied1: ++** mov (z[0-9]+\.b), #11 ++** smulh z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_11_s8_m_tied1, svint8_t, ++ z0 = svmulh_n_s8_m (p0, z0, 11), ++ z0 = svmulh_m (p0, z0, 11)) ++ ++/* ++** mulh_11_s8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), #11 ++** movprfx z0, z1 ++** smulh z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_11_s8_m_untied, svint8_t, ++ z0 = svmulh_n_s8_m (p0, z1, 11), ++ z0 = svmulh_m (p0, z1, 11)) ++ ++/* ++** mulh_s8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** smulh z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_s8_z_tied1, svint8_t, ++ z0 = svmulh_s8_z (p0, z0, z1), ++ z0 = svmulh_z (p0, z0, z1)) ++ ++/* ++** mulh_s8_z_tied2: ++** movprfx z0\.b, p0/z, z0\.b ++** smulh z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_s8_z_tied2, svint8_t, ++ z0 = svmulh_s8_z (p0, z1, z0), ++ z0 = svmulh_z (p0, z1, z0)) ++ ++/* ++** mulh_s8_z_untied: ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** smulh z0\.b, p0/m, z0\.b, z2\.b ++** | ++** movprfx z0\.b, p0/z, z2\.b ++** smulh z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_s8_z_untied, svint8_t, ++ z0 = svmulh_s8_z (p0, z1, z2), ++ z0 = svmulh_z (p0, z1, z2)) ++ ++/* ++** mulh_w0_s8_z_tied1: ++** mov (z[0-9]+\.b), w0 ++** movprfx z0\.b, p0/z, z0\.b ++** smulh z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mulh_w0_s8_z_tied1, svint8_t, int8_t, ++ z0 = svmulh_n_s8_z (p0, z0, x0), ++ z0 = svmulh_z (p0, z0, x0)) ++ ++/* ++** mulh_w0_s8_z_untied: ++** mov (z[0-9]+\.b), w0 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** smulh z0\.b, p0/m, z0\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** smulh z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (mulh_w0_s8_z_untied, svint8_t, int8_t, ++ z0 = svmulh_n_s8_z (p0, z1, x0), ++ z0 = svmulh_z (p0, z1, x0)) ++ ++/* ++** mulh_11_s8_z_tied1: ++** mov (z[0-9]+\.b), #11 ++** movprfx z0\.b, p0/z, z0\.b ++** smulh z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_11_s8_z_tied1, svint8_t, ++ z0 = svmulh_n_s8_z (p0, z0, 11), ++ z0 = svmulh_z (p0, z0, 11)) ++ ++/* ++** mulh_11_s8_z_untied: ++** mov (z[0-9]+\.b), #11 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** smulh z0\.b, p0/m, z0\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** smulh z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_11_s8_z_untied, svint8_t, ++ z0 = svmulh_n_s8_z (p0, z1, 11), ++ z0 = svmulh_z (p0, z1, 11)) ++ ++/* ++** mulh_s8_x_tied1: ++** smulh z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_s8_x_tied1, svint8_t, ++ z0 = svmulh_s8_x (p0, z0, z1), ++ z0 = svmulh_x (p0, z0, z1)) ++ ++/* ++** mulh_s8_x_tied2: ++** smulh z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_s8_x_tied2, svint8_t, ++ z0 = svmulh_s8_x (p0, z1, z0), ++ z0 = svmulh_x (p0, z1, z0)) ++ ++/* ++** mulh_s8_x_untied: ++** ( ++** movprfx z0, z1 ++** smulh z0\.b, p0/m, z0\.b, z2\.b ++** | ++** movprfx z0, z2 ++** smulh z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_s8_x_untied, svint8_t, ++ z0 = svmulh_s8_x (p0, z1, z2), ++ z0 = svmulh_x (p0, z1, z2)) ++ ++/* ++** mulh_w0_s8_x_tied1: ++** mov (z[0-9]+\.b), w0 ++** smulh z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mulh_w0_s8_x_tied1, svint8_t, int8_t, ++ z0 = svmulh_n_s8_x (p0, z0, x0), ++ z0 = svmulh_x (p0, z0, x0)) ++ ++/* ++** mulh_w0_s8_x_untied: ++** mov z0\.b, w0 ++** smulh z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_ZX (mulh_w0_s8_x_untied, svint8_t, int8_t, ++ z0 = svmulh_n_s8_x (p0, z1, x0), ++ z0 = svmulh_x (p0, z1, x0)) ++ ++/* ++** mulh_11_s8_x_tied1: ++** mov (z[0-9]+\.b), #11 ++** smulh z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_11_s8_x_tied1, svint8_t, ++ z0 = svmulh_n_s8_x (p0, z0, 11), ++ z0 = svmulh_x (p0, z0, 11)) ++ ++/* ++** mulh_11_s8_x_untied: ++** mov z0\.b, #11 ++** smulh z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_11_s8_x_untied, svint8_t, ++ z0 = svmulh_n_s8_x (p0, z1, 11), ++ z0 = svmulh_x (p0, z1, 11)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulh_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulh_u16.c +new file mode 100644 +index 000000000..e9173eb24 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulh_u16.c +@@ -0,0 +1,237 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mulh_u16_m_tied1: ++** umulh z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_u16_m_tied1, svuint16_t, ++ z0 = svmulh_u16_m (p0, z0, z1), ++ z0 = svmulh_m (p0, z0, z1)) ++ ++/* ++** mulh_u16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** umulh z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_u16_m_tied2, svuint16_t, ++ z0 = svmulh_u16_m (p0, z1, z0), ++ z0 = svmulh_m (p0, z1, z0)) ++ ++/* ++** mulh_u16_m_untied: ++** movprfx z0, z1 ++** umulh z0\.h, p0/m, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_u16_m_untied, svuint16_t, ++ z0 = svmulh_u16_m (p0, z1, z2), ++ z0 = svmulh_m (p0, z1, z2)) ++ ++/* ++** mulh_w0_u16_m_tied1: ++** mov (z[0-9]+\.h), w0 ++** umulh z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mulh_w0_u16_m_tied1, svuint16_t, uint16_t, ++ z0 = svmulh_n_u16_m (p0, z0, x0), ++ z0 = svmulh_m (p0, z0, x0)) ++ ++/* ++** mulh_w0_u16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), w0 ++** movprfx z0, z1 ++** umulh z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mulh_w0_u16_m_untied, svuint16_t, uint16_t, ++ z0 = svmulh_n_u16_m (p0, z1, x0), ++ z0 = svmulh_m (p0, z1, x0)) ++ ++/* ++** mulh_11_u16_m_tied1: ++** mov (z[0-9]+\.h), #11 ++** umulh z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_11_u16_m_tied1, svuint16_t, ++ z0 = svmulh_n_u16_m (p0, z0, 11), ++ z0 = svmulh_m (p0, z0, 11)) ++ ++/* ++** mulh_11_u16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), #11 ++** movprfx z0, z1 ++** umulh z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_11_u16_m_untied, svuint16_t, ++ z0 = svmulh_n_u16_m (p0, z1, 11), ++ z0 = svmulh_m (p0, z1, 11)) ++ ++/* ++** mulh_u16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** umulh z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_u16_z_tied1, svuint16_t, ++ z0 = svmulh_u16_z (p0, z0, z1), ++ z0 = svmulh_z (p0, z0, z1)) ++ ++/* ++** mulh_u16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** umulh z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_u16_z_tied2, svuint16_t, ++ z0 = svmulh_u16_z (p0, z1, z0), ++ z0 = svmulh_z (p0, z1, z0)) ++ ++/* ++** mulh_u16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** umulh z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** umulh z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_u16_z_untied, svuint16_t, ++ z0 = svmulh_u16_z (p0, z1, z2), ++ z0 = svmulh_z (p0, z1, z2)) ++ ++/* ++** mulh_w0_u16_z_tied1: ++** mov (z[0-9]+\.h), w0 ++** movprfx z0\.h, p0/z, z0\.h ++** umulh z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mulh_w0_u16_z_tied1, svuint16_t, uint16_t, ++ z0 = svmulh_n_u16_z (p0, z0, x0), ++ z0 = svmulh_z (p0, z0, x0)) ++ ++/* ++** mulh_w0_u16_z_untied: ++** mov (z[0-9]+\.h), w0 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** umulh z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** umulh z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (mulh_w0_u16_z_untied, svuint16_t, uint16_t, ++ z0 = svmulh_n_u16_z (p0, z1, x0), ++ z0 = svmulh_z (p0, z1, x0)) ++ ++/* ++** mulh_11_u16_z_tied1: ++** mov (z[0-9]+\.h), #11 ++** movprfx z0\.h, p0/z, z0\.h ++** umulh z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_11_u16_z_tied1, svuint16_t, ++ z0 = svmulh_n_u16_z (p0, z0, 11), ++ z0 = svmulh_z (p0, z0, 11)) ++ ++/* ++** mulh_11_u16_z_untied: ++** mov (z[0-9]+\.h), #11 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** umulh z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** umulh z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_11_u16_z_untied, svuint16_t, ++ z0 = svmulh_n_u16_z (p0, z1, 11), ++ z0 = svmulh_z (p0, z1, 11)) ++ ++/* ++** mulh_u16_x_tied1: ++** umulh z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_u16_x_tied1, svuint16_t, ++ z0 = svmulh_u16_x (p0, z0, z1), ++ z0 = svmulh_x (p0, z0, z1)) ++ ++/* ++** mulh_u16_x_tied2: ++** umulh z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_u16_x_tied2, svuint16_t, ++ z0 = svmulh_u16_x (p0, z1, z0), ++ z0 = svmulh_x (p0, z1, z0)) ++ ++/* ++** mulh_u16_x_untied: ++** ( ++** movprfx z0, z1 ++** umulh z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0, z2 ++** umulh z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_u16_x_untied, svuint16_t, ++ z0 = svmulh_u16_x (p0, z1, z2), ++ z0 = svmulh_x (p0, z1, z2)) ++ ++/* ++** mulh_w0_u16_x_tied1: ++** mov (z[0-9]+\.h), w0 ++** umulh z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mulh_w0_u16_x_tied1, svuint16_t, uint16_t, ++ z0 = svmulh_n_u16_x (p0, z0, x0), ++ z0 = svmulh_x (p0, z0, x0)) ++ ++/* ++** mulh_w0_u16_x_untied: ++** mov z0\.h, w0 ++** umulh z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZX (mulh_w0_u16_x_untied, svuint16_t, uint16_t, ++ z0 = svmulh_n_u16_x (p0, z1, x0), ++ z0 = svmulh_x (p0, z1, x0)) ++ ++/* ++** mulh_11_u16_x_tied1: ++** mov (z[0-9]+\.h), #11 ++** umulh z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_11_u16_x_tied1, svuint16_t, ++ z0 = svmulh_n_u16_x (p0, z0, 11), ++ z0 = svmulh_x (p0, z0, 11)) ++ ++/* ++** mulh_11_u16_x_untied: ++** mov z0\.h, #11 ++** umulh z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_11_u16_x_untied, svuint16_t, ++ z0 = svmulh_n_u16_x (p0, z1, 11), ++ z0 = svmulh_x (p0, z1, 11)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulh_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulh_u32.c +new file mode 100644 +index 000000000..de1f24f09 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulh_u32.c +@@ -0,0 +1,237 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mulh_u32_m_tied1: ++** umulh z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_u32_m_tied1, svuint32_t, ++ z0 = svmulh_u32_m (p0, z0, z1), ++ z0 = svmulh_m (p0, z0, z1)) ++ ++/* ++** mulh_u32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** umulh z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_u32_m_tied2, svuint32_t, ++ z0 = svmulh_u32_m (p0, z1, z0), ++ z0 = svmulh_m (p0, z1, z0)) ++ ++/* ++** mulh_u32_m_untied: ++** movprfx z0, z1 ++** umulh z0\.s, p0/m, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_u32_m_untied, svuint32_t, ++ z0 = svmulh_u32_m (p0, z1, z2), ++ z0 = svmulh_m (p0, z1, z2)) ++ ++/* ++** mulh_w0_u32_m_tied1: ++** mov (z[0-9]+\.s), w0 ++** umulh z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mulh_w0_u32_m_tied1, svuint32_t, uint32_t, ++ z0 = svmulh_n_u32_m (p0, z0, x0), ++ z0 = svmulh_m (p0, z0, x0)) ++ ++/* ++** mulh_w0_u32_m_untied: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0, z1 ++** umulh z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mulh_w0_u32_m_untied, svuint32_t, uint32_t, ++ z0 = svmulh_n_u32_m (p0, z1, x0), ++ z0 = svmulh_m (p0, z1, x0)) ++ ++/* ++** mulh_11_u32_m_tied1: ++** mov (z[0-9]+\.s), #11 ++** umulh z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_11_u32_m_tied1, svuint32_t, ++ z0 = svmulh_n_u32_m (p0, z0, 11), ++ z0 = svmulh_m (p0, z0, 11)) ++ ++/* ++** mulh_11_u32_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.s), #11 ++** movprfx z0, z1 ++** umulh z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_11_u32_m_untied, svuint32_t, ++ z0 = svmulh_n_u32_m (p0, z1, 11), ++ z0 = svmulh_m (p0, z1, 11)) ++ ++/* ++** mulh_u32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** umulh z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_u32_z_tied1, svuint32_t, ++ z0 = svmulh_u32_z (p0, z0, z1), ++ z0 = svmulh_z (p0, z0, z1)) ++ ++/* ++** mulh_u32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** umulh z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_u32_z_tied2, svuint32_t, ++ z0 = svmulh_u32_z (p0, z1, z0), ++ z0 = svmulh_z (p0, z1, z0)) ++ ++/* ++** mulh_u32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** umulh z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** umulh z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_u32_z_untied, svuint32_t, ++ z0 = svmulh_u32_z (p0, z1, z2), ++ z0 = svmulh_z (p0, z1, z2)) ++ ++/* ++** mulh_w0_u32_z_tied1: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0\.s, p0/z, z0\.s ++** umulh z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mulh_w0_u32_z_tied1, svuint32_t, uint32_t, ++ z0 = svmulh_n_u32_z (p0, z0, x0), ++ z0 = svmulh_z (p0, z0, x0)) ++ ++/* ++** mulh_w0_u32_z_untied: ++** mov (z[0-9]+\.s), w0 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** umulh z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** umulh z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (mulh_w0_u32_z_untied, svuint32_t, uint32_t, ++ z0 = svmulh_n_u32_z (p0, z1, x0), ++ z0 = svmulh_z (p0, z1, x0)) ++ ++/* ++** mulh_11_u32_z_tied1: ++** mov (z[0-9]+\.s), #11 ++** movprfx z0\.s, p0/z, z0\.s ++** umulh z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_11_u32_z_tied1, svuint32_t, ++ z0 = svmulh_n_u32_z (p0, z0, 11), ++ z0 = svmulh_z (p0, z0, 11)) ++ ++/* ++** mulh_11_u32_z_untied: ++** mov (z[0-9]+\.s), #11 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** umulh z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** umulh z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_11_u32_z_untied, svuint32_t, ++ z0 = svmulh_n_u32_z (p0, z1, 11), ++ z0 = svmulh_z (p0, z1, 11)) ++ ++/* ++** mulh_u32_x_tied1: ++** umulh z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_u32_x_tied1, svuint32_t, ++ z0 = svmulh_u32_x (p0, z0, z1), ++ z0 = svmulh_x (p0, z0, z1)) ++ ++/* ++** mulh_u32_x_tied2: ++** umulh z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_u32_x_tied2, svuint32_t, ++ z0 = svmulh_u32_x (p0, z1, z0), ++ z0 = svmulh_x (p0, z1, z0)) ++ ++/* ++** mulh_u32_x_untied: ++** ( ++** movprfx z0, z1 ++** umulh z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0, z2 ++** umulh z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_u32_x_untied, svuint32_t, ++ z0 = svmulh_u32_x (p0, z1, z2), ++ z0 = svmulh_x (p0, z1, z2)) ++ ++/* ++** mulh_w0_u32_x_tied1: ++** mov (z[0-9]+\.s), w0 ++** umulh z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mulh_w0_u32_x_tied1, svuint32_t, uint32_t, ++ z0 = svmulh_n_u32_x (p0, z0, x0), ++ z0 = svmulh_x (p0, z0, x0)) ++ ++/* ++** mulh_w0_u32_x_untied: ++** mov z0\.s, w0 ++** umulh z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZX (mulh_w0_u32_x_untied, svuint32_t, uint32_t, ++ z0 = svmulh_n_u32_x (p0, z1, x0), ++ z0 = svmulh_x (p0, z1, x0)) ++ ++/* ++** mulh_11_u32_x_tied1: ++** mov (z[0-9]+\.s), #11 ++** umulh z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_11_u32_x_tied1, svuint32_t, ++ z0 = svmulh_n_u32_x (p0, z0, 11), ++ z0 = svmulh_x (p0, z0, 11)) ++ ++/* ++** mulh_11_u32_x_untied: ++** mov z0\.s, #11 ++** umulh z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_11_u32_x_untied, svuint32_t, ++ z0 = svmulh_n_u32_x (p0, z1, 11), ++ z0 = svmulh_x (p0, z1, 11)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulh_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulh_u64.c +new file mode 100644 +index 000000000..0d7e12a7c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulh_u64.c +@@ -0,0 +1,237 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mulh_u64_m_tied1: ++** umulh z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_u64_m_tied1, svuint64_t, ++ z0 = svmulh_u64_m (p0, z0, z1), ++ z0 = svmulh_m (p0, z0, z1)) ++ ++/* ++** mulh_u64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** umulh z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_u64_m_tied2, svuint64_t, ++ z0 = svmulh_u64_m (p0, z1, z0), ++ z0 = svmulh_m (p0, z1, z0)) ++ ++/* ++** mulh_u64_m_untied: ++** movprfx z0, z1 ++** umulh z0\.d, p0/m, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_u64_m_untied, svuint64_t, ++ z0 = svmulh_u64_m (p0, z1, z2), ++ z0 = svmulh_m (p0, z1, z2)) ++ ++/* ++** mulh_x0_u64_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** umulh z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mulh_x0_u64_m_tied1, svuint64_t, uint64_t, ++ z0 = svmulh_n_u64_m (p0, z0, x0), ++ z0 = svmulh_m (p0, z0, x0)) ++ ++/* ++** mulh_x0_u64_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** umulh z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mulh_x0_u64_m_untied, svuint64_t, uint64_t, ++ z0 = svmulh_n_u64_m (p0, z1, x0), ++ z0 = svmulh_m (p0, z1, x0)) ++ ++/* ++** mulh_11_u64_m_tied1: ++** mov (z[0-9]+\.d), #11 ++** umulh z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_11_u64_m_tied1, svuint64_t, ++ z0 = svmulh_n_u64_m (p0, z0, 11), ++ z0 = svmulh_m (p0, z0, 11)) ++ ++/* ++** mulh_11_u64_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), #11 ++** movprfx z0, z1 ++** umulh z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_11_u64_m_untied, svuint64_t, ++ z0 = svmulh_n_u64_m (p0, z1, 11), ++ z0 = svmulh_m (p0, z1, 11)) ++ ++/* ++** mulh_u64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** umulh z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_u64_z_tied1, svuint64_t, ++ z0 = svmulh_u64_z (p0, z0, z1), ++ z0 = svmulh_z (p0, z0, z1)) ++ ++/* ++** mulh_u64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** umulh z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_u64_z_tied2, svuint64_t, ++ z0 = svmulh_u64_z (p0, z1, z0), ++ z0 = svmulh_z (p0, z1, z0)) ++ ++/* ++** mulh_u64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** umulh z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** umulh z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_u64_z_untied, svuint64_t, ++ z0 = svmulh_u64_z (p0, z1, z2), ++ z0 = svmulh_z (p0, z1, z2)) ++ ++/* ++** mulh_x0_u64_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.d, p0/z, z0\.d ++** umulh z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mulh_x0_u64_z_tied1, svuint64_t, uint64_t, ++ z0 = svmulh_n_u64_z (p0, z0, x0), ++ z0 = svmulh_z (p0, z0, x0)) ++ ++/* ++** mulh_x0_u64_z_untied: ++** mov (z[0-9]+\.d), x0 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** umulh z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** umulh z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (mulh_x0_u64_z_untied, svuint64_t, uint64_t, ++ z0 = svmulh_n_u64_z (p0, z1, x0), ++ z0 = svmulh_z (p0, z1, x0)) ++ ++/* ++** mulh_11_u64_z_tied1: ++** mov (z[0-9]+\.d), #11 ++** movprfx z0\.d, p0/z, z0\.d ++** umulh z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_11_u64_z_tied1, svuint64_t, ++ z0 = svmulh_n_u64_z (p0, z0, 11), ++ z0 = svmulh_z (p0, z0, 11)) ++ ++/* ++** mulh_11_u64_z_untied: ++** mov (z[0-9]+\.d), #11 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** umulh z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** umulh z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_11_u64_z_untied, svuint64_t, ++ z0 = svmulh_n_u64_z (p0, z1, 11), ++ z0 = svmulh_z (p0, z1, 11)) ++ ++/* ++** mulh_u64_x_tied1: ++** umulh z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_u64_x_tied1, svuint64_t, ++ z0 = svmulh_u64_x (p0, z0, z1), ++ z0 = svmulh_x (p0, z0, z1)) ++ ++/* ++** mulh_u64_x_tied2: ++** umulh z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_u64_x_tied2, svuint64_t, ++ z0 = svmulh_u64_x (p0, z1, z0), ++ z0 = svmulh_x (p0, z1, z0)) ++ ++/* ++** mulh_u64_x_untied: ++** ( ++** movprfx z0, z1 ++** umulh z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0, z2 ++** umulh z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_u64_x_untied, svuint64_t, ++ z0 = svmulh_u64_x (p0, z1, z2), ++ z0 = svmulh_x (p0, z1, z2)) ++ ++/* ++** mulh_x0_u64_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** umulh z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mulh_x0_u64_x_tied1, svuint64_t, uint64_t, ++ z0 = svmulh_n_u64_x (p0, z0, x0), ++ z0 = svmulh_x (p0, z0, x0)) ++ ++/* ++** mulh_x0_u64_x_untied: ++** mov z0\.d, x0 ++** umulh z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZX (mulh_x0_u64_x_untied, svuint64_t, uint64_t, ++ z0 = svmulh_n_u64_x (p0, z1, x0), ++ z0 = svmulh_x (p0, z1, x0)) ++ ++/* ++** mulh_11_u64_x_tied1: ++** mov (z[0-9]+\.d), #11 ++** umulh z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_11_u64_x_tied1, svuint64_t, ++ z0 = svmulh_n_u64_x (p0, z0, 11), ++ z0 = svmulh_x (p0, z0, 11)) ++ ++/* ++** mulh_11_u64_x_untied: ++** mov z0\.d, #11 ++** umulh z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_11_u64_x_untied, svuint64_t, ++ z0 = svmulh_n_u64_x (p0, z1, 11), ++ z0 = svmulh_x (p0, z1, 11)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulh_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulh_u8.c +new file mode 100644 +index 000000000..db7b1be1b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulh_u8.c +@@ -0,0 +1,237 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mulh_u8_m_tied1: ++** umulh z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_u8_m_tied1, svuint8_t, ++ z0 = svmulh_u8_m (p0, z0, z1), ++ z0 = svmulh_m (p0, z0, z1)) ++ ++/* ++** mulh_u8_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** umulh z0\.b, p0/m, z0\.b, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_u8_m_tied2, svuint8_t, ++ z0 = svmulh_u8_m (p0, z1, z0), ++ z0 = svmulh_m (p0, z1, z0)) ++ ++/* ++** mulh_u8_m_untied: ++** movprfx z0, z1 ++** umulh z0\.b, p0/m, z0\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_u8_m_untied, svuint8_t, ++ z0 = svmulh_u8_m (p0, z1, z2), ++ z0 = svmulh_m (p0, z1, z2)) ++ ++/* ++** mulh_w0_u8_m_tied1: ++** mov (z[0-9]+\.b), w0 ++** umulh z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mulh_w0_u8_m_tied1, svuint8_t, uint8_t, ++ z0 = svmulh_n_u8_m (p0, z0, x0), ++ z0 = svmulh_m (p0, z0, x0)) ++ ++/* ++** mulh_w0_u8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), w0 ++** movprfx z0, z1 ++** umulh z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mulh_w0_u8_m_untied, svuint8_t, uint8_t, ++ z0 = svmulh_n_u8_m (p0, z1, x0), ++ z0 = svmulh_m (p0, z1, x0)) ++ ++/* ++** mulh_11_u8_m_tied1: ++** mov (z[0-9]+\.b), #11 ++** umulh z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_11_u8_m_tied1, svuint8_t, ++ z0 = svmulh_n_u8_m (p0, z0, 11), ++ z0 = svmulh_m (p0, z0, 11)) ++ ++/* ++** mulh_11_u8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), #11 ++** movprfx z0, z1 ++** umulh z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_11_u8_m_untied, svuint8_t, ++ z0 = svmulh_n_u8_m (p0, z1, 11), ++ z0 = svmulh_m (p0, z1, 11)) ++ ++/* ++** mulh_u8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** umulh z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_u8_z_tied1, svuint8_t, ++ z0 = svmulh_u8_z (p0, z0, z1), ++ z0 = svmulh_z (p0, z0, z1)) ++ ++/* ++** mulh_u8_z_tied2: ++** movprfx z0\.b, p0/z, z0\.b ++** umulh z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_u8_z_tied2, svuint8_t, ++ z0 = svmulh_u8_z (p0, z1, z0), ++ z0 = svmulh_z (p0, z1, z0)) ++ ++/* ++** mulh_u8_z_untied: ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** umulh z0\.b, p0/m, z0\.b, z2\.b ++** | ++** movprfx z0\.b, p0/z, z2\.b ++** umulh z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_u8_z_untied, svuint8_t, ++ z0 = svmulh_u8_z (p0, z1, z2), ++ z0 = svmulh_z (p0, z1, z2)) ++ ++/* ++** mulh_w0_u8_z_tied1: ++** mov (z[0-9]+\.b), w0 ++** movprfx z0\.b, p0/z, z0\.b ++** umulh z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mulh_w0_u8_z_tied1, svuint8_t, uint8_t, ++ z0 = svmulh_n_u8_z (p0, z0, x0), ++ z0 = svmulh_z (p0, z0, x0)) ++ ++/* ++** mulh_w0_u8_z_untied: ++** mov (z[0-9]+\.b), w0 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** umulh z0\.b, p0/m, z0\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** umulh z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (mulh_w0_u8_z_untied, svuint8_t, uint8_t, ++ z0 = svmulh_n_u8_z (p0, z1, x0), ++ z0 = svmulh_z (p0, z1, x0)) ++ ++/* ++** mulh_11_u8_z_tied1: ++** mov (z[0-9]+\.b), #11 ++** movprfx z0\.b, p0/z, z0\.b ++** umulh z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_11_u8_z_tied1, svuint8_t, ++ z0 = svmulh_n_u8_z (p0, z0, 11), ++ z0 = svmulh_z (p0, z0, 11)) ++ ++/* ++** mulh_11_u8_z_untied: ++** mov (z[0-9]+\.b), #11 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** umulh z0\.b, p0/m, z0\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** umulh z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_11_u8_z_untied, svuint8_t, ++ z0 = svmulh_n_u8_z (p0, z1, 11), ++ z0 = svmulh_z (p0, z1, 11)) ++ ++/* ++** mulh_u8_x_tied1: ++** umulh z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_u8_x_tied1, svuint8_t, ++ z0 = svmulh_u8_x (p0, z0, z1), ++ z0 = svmulh_x (p0, z0, z1)) ++ ++/* ++** mulh_u8_x_tied2: ++** umulh z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_u8_x_tied2, svuint8_t, ++ z0 = svmulh_u8_x (p0, z1, z0), ++ z0 = svmulh_x (p0, z1, z0)) ++ ++/* ++** mulh_u8_x_untied: ++** ( ++** movprfx z0, z1 ++** umulh z0\.b, p0/m, z0\.b, z2\.b ++** | ++** movprfx z0, z2 ++** umulh z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_u8_x_untied, svuint8_t, ++ z0 = svmulh_u8_x (p0, z1, z2), ++ z0 = svmulh_x (p0, z1, z2)) ++ ++/* ++** mulh_w0_u8_x_tied1: ++** mov (z[0-9]+\.b), w0 ++** umulh z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (mulh_w0_u8_x_tied1, svuint8_t, uint8_t, ++ z0 = svmulh_n_u8_x (p0, z0, x0), ++ z0 = svmulh_x (p0, z0, x0)) ++ ++/* ++** mulh_w0_u8_x_untied: ++** mov z0\.b, w0 ++** umulh z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_ZX (mulh_w0_u8_x_untied, svuint8_t, uint8_t, ++ z0 = svmulh_n_u8_x (p0, z1, x0), ++ z0 = svmulh_x (p0, z1, x0)) ++ ++/* ++** mulh_11_u8_x_tied1: ++** mov (z[0-9]+\.b), #11 ++** umulh z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_11_u8_x_tied1, svuint8_t, ++ z0 = svmulh_n_u8_x (p0, z0, 11), ++ z0 = svmulh_x (p0, z0, 11)) ++ ++/* ++** mulh_11_u8_x_untied: ++** mov z0\.b, #11 ++** umulh z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (mulh_11_u8_x_untied, svuint8_t, ++ z0 = svmulh_n_u8_x (p0, z1, 11), ++ z0 = svmulh_x (p0, z1, 11)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulx_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulx_f16.c +new file mode 100644 +index 000000000..ce02c3caa +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulx_f16.c +@@ -0,0 +1,472 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mulx_f16_m_tied1: ++** fmulx z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_f16_m_tied1, svfloat16_t, ++ z0 = svmulx_f16_m (p0, z0, z1), ++ z0 = svmulx_m (p0, z0, z1)) ++ ++/* ++** mulx_f16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fmulx z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_f16_m_tied2, svfloat16_t, ++ z0 = svmulx_f16_m (p0, z1, z0), ++ z0 = svmulx_m (p0, z1, z0)) ++ ++/* ++** mulx_f16_m_untied: ++** movprfx z0, z1 ++** fmulx z0\.h, p0/m, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_f16_m_untied, svfloat16_t, ++ z0 = svmulx_f16_m (p0, z1, z2), ++ z0 = svmulx_m (p0, z1, z2)) ++ ++/* ++** mulx_h4_f16_m_tied1: ++** mov (z[0-9]+\.h), h4 ++** fmulx z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mulx_h4_f16_m_tied1, svfloat16_t, __fp16, ++ z0 = svmulx_n_f16_m (p0, z0, d4), ++ z0 = svmulx_m (p0, z0, d4)) ++ ++/* ++** mulx_h4_f16_m_untied: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0, z1 ++** fmulx z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mulx_h4_f16_m_untied, svfloat16_t, __fp16, ++ z0 = svmulx_n_f16_m (p0, z1, d4), ++ z0 = svmulx_m (p0, z1, d4)) ++ ++/* ++** mulx_1_f16_m_tied1: ++** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? ++** fmulx z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_1_f16_m_tied1, svfloat16_t, ++ z0 = svmulx_n_f16_m (p0, z0, 1), ++ z0 = svmulx_m (p0, z0, 1)) ++ ++/* ++** mulx_1_f16_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? ++** movprfx z0, z1 ++** fmulx z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_1_f16_m_untied, svfloat16_t, ++ z0 = svmulx_n_f16_m (p0, z1, 1), ++ z0 = svmulx_m (p0, z1, 1)) ++ ++/* ++** mulx_0p5_f16_m_tied1: ++** fmov (z[0-9]+\.h), #(?:0\.5|5\.0e-1) ++** fmulx z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_0p5_f16_m_tied1, svfloat16_t, ++ z0 = svmulx_n_f16_m (p0, z0, 0.5), ++ z0 = svmulx_m (p0, z0, 0.5)) ++ ++/* ++** mulx_0p5_f16_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.h), #(?:0\.5|5\.0e-1) ++** movprfx z0, z1 ++** fmulx z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_0p5_f16_m_untied, svfloat16_t, ++ z0 = svmulx_n_f16_m (p0, z1, 0.5), ++ z0 = svmulx_m (p0, z1, 0.5)) ++ ++/* ++** mulx_2_f16_m_tied1: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** fmulx z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_2_f16_m_tied1, svfloat16_t, ++ z0 = svmulx_n_f16_m (p0, z0, 2), ++ z0 = svmulx_m (p0, z0, 2)) ++ ++/* ++** mulx_2_f16_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** movprfx z0, z1 ++** fmulx z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_2_f16_m_untied, svfloat16_t, ++ z0 = svmulx_n_f16_m (p0, z1, 2), ++ z0 = svmulx_m (p0, z1, 2)) ++ ++/* ++** mulx_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fmulx z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_f16_z_tied1, svfloat16_t, ++ z0 = svmulx_f16_z (p0, z0, z1), ++ z0 = svmulx_z (p0, z0, z1)) ++ ++/* ++** mulx_f16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** fmulx z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_f16_z_tied2, svfloat16_t, ++ z0 = svmulx_f16_z (p0, z1, z0), ++ z0 = svmulx_z (p0, z1, z0)) ++ ++/* ++** mulx_f16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fmulx z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** fmulx z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_f16_z_untied, svfloat16_t, ++ z0 = svmulx_f16_z (p0, z1, z2), ++ z0 = svmulx_z (p0, z1, z2)) ++ ++/* ++** mulx_h4_f16_z_tied1: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0\.h, p0/z, z0\.h ++** fmulx z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mulx_h4_f16_z_tied1, svfloat16_t, __fp16, ++ z0 = svmulx_n_f16_z (p0, z0, d4), ++ z0 = svmulx_z (p0, z0, d4)) ++ ++/* ++** mulx_h4_f16_z_untied: ++** mov (z[0-9]+\.h), h4 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fmulx z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** fmulx z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (mulx_h4_f16_z_untied, svfloat16_t, __fp16, ++ z0 = svmulx_n_f16_z (p0, z1, d4), ++ z0 = svmulx_z (p0, z1, d4)) ++ ++/* ++** mulx_1_f16_z_tied1: ++** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? ++** movprfx z0\.h, p0/z, z0\.h ++** fmulx z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_1_f16_z_tied1, svfloat16_t, ++ z0 = svmulx_n_f16_z (p0, z0, 1), ++ z0 = svmulx_z (p0, z0, 1)) ++ ++/* ++** mulx_1_f16_z_untied: ++** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fmulx z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** fmulx z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_1_f16_z_untied, svfloat16_t, ++ z0 = svmulx_n_f16_z (p0, z1, 1), ++ z0 = svmulx_z (p0, z1, 1)) ++ ++/* ++** mulx_0p5_f16_z_tied1: ++** fmov (z[0-9]+\.h), #(?:0\.5|5\.0e-1) ++** movprfx z0\.h, p0/z, z0\.h ++** fmulx z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_0p5_f16_z_tied1, svfloat16_t, ++ z0 = svmulx_n_f16_z (p0, z0, 0.5), ++ z0 = svmulx_z (p0, z0, 0.5)) ++ ++/* ++** mulx_0p5_f16_z_untied: ++** fmov (z[0-9]+\.h), #(?:0\.5|5\.0e-1) ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fmulx z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** fmulx z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_0p5_f16_z_untied, svfloat16_t, ++ z0 = svmulx_n_f16_z (p0, z1, 0.5), ++ z0 = svmulx_z (p0, z1, 0.5)) ++ ++/* ++** mulx_2_f16_z_tied1: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** movprfx z0\.h, p0/z, z0\.h ++** fmulx z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_2_f16_z_tied1, svfloat16_t, ++ z0 = svmulx_n_f16_z (p0, z0, 2), ++ z0 = svmulx_z (p0, z0, 2)) ++ ++/* ++** mulx_2_f16_z_untied: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fmulx z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** fmulx z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_2_f16_z_untied, svfloat16_t, ++ z0 = svmulx_n_f16_z (p0, z1, 2), ++ z0 = svmulx_z (p0, z1, 2)) ++ ++/* ++** mulx_f16_x_tied1: ++** fmulx z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_f16_x_tied1, svfloat16_t, ++ z0 = svmulx_f16_x (p0, z0, z1), ++ z0 = svmulx_x (p0, z0, z1)) ++ ++/* ++** mulx_f16_x_tied2: ++** fmulx z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_f16_x_tied2, svfloat16_t, ++ z0 = svmulx_f16_x (p0, z1, z0), ++ z0 = svmulx_x (p0, z1, z0)) ++ ++/* ++** mulx_f16_x_untied: ++** ( ++** movprfx z0, z1 ++** fmulx z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0, z2 ++** fmulx z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_f16_x_untied, svfloat16_t, ++ z0 = svmulx_f16_x (p0, z1, z2), ++ z0 = svmulx_x (p0, z1, z2)) ++ ++/* ++** mulx_h4_f16_x_tied1: ++** mov (z[0-9]+\.h), h4 ++** fmulx z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mulx_h4_f16_x_tied1, svfloat16_t, __fp16, ++ z0 = svmulx_n_f16_x (p0, z0, d4), ++ z0 = svmulx_x (p0, z0, d4)) ++ ++/* ++** mulx_h4_f16_x_untied: { xfail *-*-* } ++** mov z0\.h, h4 ++** fmulx z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZD (mulx_h4_f16_x_untied, svfloat16_t, __fp16, ++ z0 = svmulx_n_f16_x (p0, z1, d4), ++ z0 = svmulx_x (p0, z1, d4)) ++ ++/* ++** mulx_1_f16_x_tied1: ++** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? ++** fmulx z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_1_f16_x_tied1, svfloat16_t, ++ z0 = svmulx_n_f16_x (p0, z0, 1), ++ z0 = svmulx_x (p0, z0, 1)) ++ ++/* ++** mulx_1_f16_x_untied: ++** fmov z0\.h, #1\.0(?:e\+0)? ++** fmulx z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_1_f16_x_untied, svfloat16_t, ++ z0 = svmulx_n_f16_x (p0, z1, 1), ++ z0 = svmulx_x (p0, z1, 1)) ++ ++/* ++** mulx_0p5_f16_x_tied1: ++** fmov (z[0-9]+\.h), #(?:0\.5|5\.0e-1) ++** fmulx z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_0p5_f16_x_tied1, svfloat16_t, ++ z0 = svmulx_n_f16_x (p0, z0, 0.5), ++ z0 = svmulx_x (p0, z0, 0.5)) ++ ++/* ++** mulx_0p5_f16_x_untied: ++** fmov z0\.h, #(?:0\.5|5\.0e-1) ++** fmulx z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_0p5_f16_x_untied, svfloat16_t, ++ z0 = svmulx_n_f16_x (p0, z1, 0.5), ++ z0 = svmulx_x (p0, z1, 0.5)) ++ ++/* ++** mulx_2_f16_x_tied1: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** fmulx z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_2_f16_x_tied1, svfloat16_t, ++ z0 = svmulx_n_f16_x (p0, z0, 2), ++ z0 = svmulx_x (p0, z0, 2)) ++ ++/* ++** mulx_2_f16_x_untied: ++** fmov z0\.h, #2\.0(?:e\+0)? ++** fmulx z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_2_f16_x_untied, svfloat16_t, ++ z0 = svmulx_n_f16_x (p0, z1, 2), ++ z0 = svmulx_x (p0, z1, 2)) ++ ++/* ++** ptrue_mulx_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mulx_f16_x_tied1, svfloat16_t, ++ z0 = svmulx_f16_x (svptrue_b16 (), z0, z1), ++ z0 = svmulx_x (svptrue_b16 (), z0, z1)) ++ ++/* ++** ptrue_mulx_f16_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mulx_f16_x_tied2, svfloat16_t, ++ z0 = svmulx_f16_x (svptrue_b16 (), z1, z0), ++ z0 = svmulx_x (svptrue_b16 (), z1, z0)) ++ ++/* ++** ptrue_mulx_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mulx_f16_x_untied, svfloat16_t, ++ z0 = svmulx_f16_x (svptrue_b16 (), z1, z2), ++ z0 = svmulx_x (svptrue_b16 (), z1, z2)) ++ ++/* ++** ptrue_mulx_1_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mulx_1_f16_x_tied1, svfloat16_t, ++ z0 = svmulx_n_f16_x (svptrue_b16 (), z0, 1), ++ z0 = svmulx_x (svptrue_b16 (), z0, 1)) ++ ++/* ++** ptrue_mulx_1_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mulx_1_f16_x_untied, svfloat16_t, ++ z0 = svmulx_n_f16_x (svptrue_b16 (), z1, 1), ++ z0 = svmulx_x (svptrue_b16 (), z1, 1)) ++ ++/* ++** ptrue_mulx_0p5_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mulx_0p5_f16_x_tied1, svfloat16_t, ++ z0 = svmulx_n_f16_x (svptrue_b16 (), z0, 0.5), ++ z0 = svmulx_x (svptrue_b16 (), z0, 0.5)) ++ ++/* ++** ptrue_mulx_0p5_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mulx_0p5_f16_x_untied, svfloat16_t, ++ z0 = svmulx_n_f16_x (svptrue_b16 (), z1, 0.5), ++ z0 = svmulx_x (svptrue_b16 (), z1, 0.5)) ++ ++/* ++** ptrue_mulx_2_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mulx_2_f16_x_tied1, svfloat16_t, ++ z0 = svmulx_n_f16_x (svptrue_b16 (), z0, 2), ++ z0 = svmulx_x (svptrue_b16 (), z0, 2)) ++ ++/* ++** ptrue_mulx_2_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mulx_2_f16_x_untied, svfloat16_t, ++ z0 = svmulx_n_f16_x (svptrue_b16 (), z1, 2), ++ z0 = svmulx_x (svptrue_b16 (), z1, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulx_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulx_f32.c +new file mode 100644 +index 000000000..e0d369593 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulx_f32.c +@@ -0,0 +1,472 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mulx_f32_m_tied1: ++** fmulx z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_f32_m_tied1, svfloat32_t, ++ z0 = svmulx_f32_m (p0, z0, z1), ++ z0 = svmulx_m (p0, z0, z1)) ++ ++/* ++** mulx_f32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fmulx z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_f32_m_tied2, svfloat32_t, ++ z0 = svmulx_f32_m (p0, z1, z0), ++ z0 = svmulx_m (p0, z1, z0)) ++ ++/* ++** mulx_f32_m_untied: ++** movprfx z0, z1 ++** fmulx z0\.s, p0/m, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_f32_m_untied, svfloat32_t, ++ z0 = svmulx_f32_m (p0, z1, z2), ++ z0 = svmulx_m (p0, z1, z2)) ++ ++/* ++** mulx_s4_f32_m_tied1: ++** mov (z[0-9]+\.s), s4 ++** fmulx z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mulx_s4_f32_m_tied1, svfloat32_t, float, ++ z0 = svmulx_n_f32_m (p0, z0, d4), ++ z0 = svmulx_m (p0, z0, d4)) ++ ++/* ++** mulx_s4_f32_m_untied: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0, z1 ++** fmulx z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mulx_s4_f32_m_untied, svfloat32_t, float, ++ z0 = svmulx_n_f32_m (p0, z1, d4), ++ z0 = svmulx_m (p0, z1, d4)) ++ ++/* ++** mulx_1_f32_m_tied1: ++** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? ++** fmulx z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_1_f32_m_tied1, svfloat32_t, ++ z0 = svmulx_n_f32_m (p0, z0, 1), ++ z0 = svmulx_m (p0, z0, 1)) ++ ++/* ++** mulx_1_f32_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? ++** movprfx z0, z1 ++** fmulx z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_1_f32_m_untied, svfloat32_t, ++ z0 = svmulx_n_f32_m (p0, z1, 1), ++ z0 = svmulx_m (p0, z1, 1)) ++ ++/* ++** mulx_0p5_f32_m_tied1: ++** fmov (z[0-9]+\.s), #(?:0\.5|5\.0e-1) ++** fmulx z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_0p5_f32_m_tied1, svfloat32_t, ++ z0 = svmulx_n_f32_m (p0, z0, 0.5), ++ z0 = svmulx_m (p0, z0, 0.5)) ++ ++/* ++** mulx_0p5_f32_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.s), #(?:0\.5|5\.0e-1) ++** movprfx z0, z1 ++** fmulx z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_0p5_f32_m_untied, svfloat32_t, ++ z0 = svmulx_n_f32_m (p0, z1, 0.5), ++ z0 = svmulx_m (p0, z1, 0.5)) ++ ++/* ++** mulx_2_f32_m_tied1: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** fmulx z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_2_f32_m_tied1, svfloat32_t, ++ z0 = svmulx_n_f32_m (p0, z0, 2), ++ z0 = svmulx_m (p0, z0, 2)) ++ ++/* ++** mulx_2_f32_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** movprfx z0, z1 ++** fmulx z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_2_f32_m_untied, svfloat32_t, ++ z0 = svmulx_n_f32_m (p0, z1, 2), ++ z0 = svmulx_m (p0, z1, 2)) ++ ++/* ++** mulx_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fmulx z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_f32_z_tied1, svfloat32_t, ++ z0 = svmulx_f32_z (p0, z0, z1), ++ z0 = svmulx_z (p0, z0, z1)) ++ ++/* ++** mulx_f32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** fmulx z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_f32_z_tied2, svfloat32_t, ++ z0 = svmulx_f32_z (p0, z1, z0), ++ z0 = svmulx_z (p0, z1, z0)) ++ ++/* ++** mulx_f32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fmulx z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** fmulx z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_f32_z_untied, svfloat32_t, ++ z0 = svmulx_f32_z (p0, z1, z2), ++ z0 = svmulx_z (p0, z1, z2)) ++ ++/* ++** mulx_s4_f32_z_tied1: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0\.s, p0/z, z0\.s ++** fmulx z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mulx_s4_f32_z_tied1, svfloat32_t, float, ++ z0 = svmulx_n_f32_z (p0, z0, d4), ++ z0 = svmulx_z (p0, z0, d4)) ++ ++/* ++** mulx_s4_f32_z_untied: ++** mov (z[0-9]+\.s), s4 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fmulx z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** fmulx z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (mulx_s4_f32_z_untied, svfloat32_t, float, ++ z0 = svmulx_n_f32_z (p0, z1, d4), ++ z0 = svmulx_z (p0, z1, d4)) ++ ++/* ++** mulx_1_f32_z_tied1: ++** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? ++** movprfx z0\.s, p0/z, z0\.s ++** fmulx z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_1_f32_z_tied1, svfloat32_t, ++ z0 = svmulx_n_f32_z (p0, z0, 1), ++ z0 = svmulx_z (p0, z0, 1)) ++ ++/* ++** mulx_1_f32_z_untied: ++** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fmulx z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** fmulx z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_1_f32_z_untied, svfloat32_t, ++ z0 = svmulx_n_f32_z (p0, z1, 1), ++ z0 = svmulx_z (p0, z1, 1)) ++ ++/* ++** mulx_0p5_f32_z_tied1: ++** fmov (z[0-9]+\.s), #(?:0\.5|5\.0e-1) ++** movprfx z0\.s, p0/z, z0\.s ++** fmulx z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_0p5_f32_z_tied1, svfloat32_t, ++ z0 = svmulx_n_f32_z (p0, z0, 0.5), ++ z0 = svmulx_z (p0, z0, 0.5)) ++ ++/* ++** mulx_0p5_f32_z_untied: ++** fmov (z[0-9]+\.s), #(?:0\.5|5\.0e-1) ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fmulx z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** fmulx z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_0p5_f32_z_untied, svfloat32_t, ++ z0 = svmulx_n_f32_z (p0, z1, 0.5), ++ z0 = svmulx_z (p0, z1, 0.5)) ++ ++/* ++** mulx_2_f32_z_tied1: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** movprfx z0\.s, p0/z, z0\.s ++** fmulx z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_2_f32_z_tied1, svfloat32_t, ++ z0 = svmulx_n_f32_z (p0, z0, 2), ++ z0 = svmulx_z (p0, z0, 2)) ++ ++/* ++** mulx_2_f32_z_untied: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fmulx z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** fmulx z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_2_f32_z_untied, svfloat32_t, ++ z0 = svmulx_n_f32_z (p0, z1, 2), ++ z0 = svmulx_z (p0, z1, 2)) ++ ++/* ++** mulx_f32_x_tied1: ++** fmulx z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_f32_x_tied1, svfloat32_t, ++ z0 = svmulx_f32_x (p0, z0, z1), ++ z0 = svmulx_x (p0, z0, z1)) ++ ++/* ++** mulx_f32_x_tied2: ++** fmulx z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_f32_x_tied2, svfloat32_t, ++ z0 = svmulx_f32_x (p0, z1, z0), ++ z0 = svmulx_x (p0, z1, z0)) ++ ++/* ++** mulx_f32_x_untied: ++** ( ++** movprfx z0, z1 ++** fmulx z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0, z2 ++** fmulx z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_f32_x_untied, svfloat32_t, ++ z0 = svmulx_f32_x (p0, z1, z2), ++ z0 = svmulx_x (p0, z1, z2)) ++ ++/* ++** mulx_s4_f32_x_tied1: ++** mov (z[0-9]+\.s), s4 ++** fmulx z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mulx_s4_f32_x_tied1, svfloat32_t, float, ++ z0 = svmulx_n_f32_x (p0, z0, d4), ++ z0 = svmulx_x (p0, z0, d4)) ++ ++/* ++** mulx_s4_f32_x_untied: { xfail *-*-* } ++** mov z0\.s, s4 ++** fmulx z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZD (mulx_s4_f32_x_untied, svfloat32_t, float, ++ z0 = svmulx_n_f32_x (p0, z1, d4), ++ z0 = svmulx_x (p0, z1, d4)) ++ ++/* ++** mulx_1_f32_x_tied1: ++** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? ++** fmulx z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_1_f32_x_tied1, svfloat32_t, ++ z0 = svmulx_n_f32_x (p0, z0, 1), ++ z0 = svmulx_x (p0, z0, 1)) ++ ++/* ++** mulx_1_f32_x_untied: ++** fmov z0\.s, #1\.0(?:e\+0)? ++** fmulx z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_1_f32_x_untied, svfloat32_t, ++ z0 = svmulx_n_f32_x (p0, z1, 1), ++ z0 = svmulx_x (p0, z1, 1)) ++ ++/* ++** mulx_0p5_f32_x_tied1: ++** fmov (z[0-9]+\.s), #(?:0\.5|5\.0e-1) ++** fmulx z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_0p5_f32_x_tied1, svfloat32_t, ++ z0 = svmulx_n_f32_x (p0, z0, 0.5), ++ z0 = svmulx_x (p0, z0, 0.5)) ++ ++/* ++** mulx_0p5_f32_x_untied: ++** fmov z0\.s, #(?:0\.5|5\.0e-1) ++** fmulx z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_0p5_f32_x_untied, svfloat32_t, ++ z0 = svmulx_n_f32_x (p0, z1, 0.5), ++ z0 = svmulx_x (p0, z1, 0.5)) ++ ++/* ++** mulx_2_f32_x_tied1: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** fmulx z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_2_f32_x_tied1, svfloat32_t, ++ z0 = svmulx_n_f32_x (p0, z0, 2), ++ z0 = svmulx_x (p0, z0, 2)) ++ ++/* ++** mulx_2_f32_x_untied: ++** fmov z0\.s, #2\.0(?:e\+0)? ++** fmulx z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_2_f32_x_untied, svfloat32_t, ++ z0 = svmulx_n_f32_x (p0, z1, 2), ++ z0 = svmulx_x (p0, z1, 2)) ++ ++/* ++** ptrue_mulx_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mulx_f32_x_tied1, svfloat32_t, ++ z0 = svmulx_f32_x (svptrue_b32 (), z0, z1), ++ z0 = svmulx_x (svptrue_b32 (), z0, z1)) ++ ++/* ++** ptrue_mulx_f32_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mulx_f32_x_tied2, svfloat32_t, ++ z0 = svmulx_f32_x (svptrue_b32 (), z1, z0), ++ z0 = svmulx_x (svptrue_b32 (), z1, z0)) ++ ++/* ++** ptrue_mulx_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mulx_f32_x_untied, svfloat32_t, ++ z0 = svmulx_f32_x (svptrue_b32 (), z1, z2), ++ z0 = svmulx_x (svptrue_b32 (), z1, z2)) ++ ++/* ++** ptrue_mulx_1_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mulx_1_f32_x_tied1, svfloat32_t, ++ z0 = svmulx_n_f32_x (svptrue_b32 (), z0, 1), ++ z0 = svmulx_x (svptrue_b32 (), z0, 1)) ++ ++/* ++** ptrue_mulx_1_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mulx_1_f32_x_untied, svfloat32_t, ++ z0 = svmulx_n_f32_x (svptrue_b32 (), z1, 1), ++ z0 = svmulx_x (svptrue_b32 (), z1, 1)) ++ ++/* ++** ptrue_mulx_0p5_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mulx_0p5_f32_x_tied1, svfloat32_t, ++ z0 = svmulx_n_f32_x (svptrue_b32 (), z0, 0.5), ++ z0 = svmulx_x (svptrue_b32 (), z0, 0.5)) ++ ++/* ++** ptrue_mulx_0p5_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mulx_0p5_f32_x_untied, svfloat32_t, ++ z0 = svmulx_n_f32_x (svptrue_b32 (), z1, 0.5), ++ z0 = svmulx_x (svptrue_b32 (), z1, 0.5)) ++ ++/* ++** ptrue_mulx_2_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mulx_2_f32_x_tied1, svfloat32_t, ++ z0 = svmulx_n_f32_x (svptrue_b32 (), z0, 2), ++ z0 = svmulx_x (svptrue_b32 (), z0, 2)) ++ ++/* ++** ptrue_mulx_2_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mulx_2_f32_x_untied, svfloat32_t, ++ z0 = svmulx_n_f32_x (svptrue_b32 (), z1, 2), ++ z0 = svmulx_x (svptrue_b32 (), z1, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulx_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulx_f64.c +new file mode 100644 +index 000000000..6af5703ff +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulx_f64.c +@@ -0,0 +1,472 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** mulx_f64_m_tied1: ++** fmulx z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_f64_m_tied1, svfloat64_t, ++ z0 = svmulx_f64_m (p0, z0, z1), ++ z0 = svmulx_m (p0, z0, z1)) ++ ++/* ++** mulx_f64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fmulx z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_f64_m_tied2, svfloat64_t, ++ z0 = svmulx_f64_m (p0, z1, z0), ++ z0 = svmulx_m (p0, z1, z0)) ++ ++/* ++** mulx_f64_m_untied: ++** movprfx z0, z1 ++** fmulx z0\.d, p0/m, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_f64_m_untied, svfloat64_t, ++ z0 = svmulx_f64_m (p0, z1, z2), ++ z0 = svmulx_m (p0, z1, z2)) ++ ++/* ++** mulx_d4_f64_m_tied1: ++** mov (z[0-9]+\.d), d4 ++** fmulx z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mulx_d4_f64_m_tied1, svfloat64_t, double, ++ z0 = svmulx_n_f64_m (p0, z0, d4), ++ z0 = svmulx_m (p0, z0, d4)) ++ ++/* ++** mulx_d4_f64_m_untied: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0, z1 ++** fmulx z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mulx_d4_f64_m_untied, svfloat64_t, double, ++ z0 = svmulx_n_f64_m (p0, z1, d4), ++ z0 = svmulx_m (p0, z1, d4)) ++ ++/* ++** mulx_1_f64_m_tied1: ++** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? ++** fmulx z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_1_f64_m_tied1, svfloat64_t, ++ z0 = svmulx_n_f64_m (p0, z0, 1), ++ z0 = svmulx_m (p0, z0, 1)) ++ ++/* ++** mulx_1_f64_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? ++** movprfx z0, z1 ++** fmulx z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_1_f64_m_untied, svfloat64_t, ++ z0 = svmulx_n_f64_m (p0, z1, 1), ++ z0 = svmulx_m (p0, z1, 1)) ++ ++/* ++** mulx_0p5_f64_m_tied1: ++** fmov (z[0-9]+\.d), #(?:0\.5|5\.0e-1) ++** fmulx z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_0p5_f64_m_tied1, svfloat64_t, ++ z0 = svmulx_n_f64_m (p0, z0, 0.5), ++ z0 = svmulx_m (p0, z0, 0.5)) ++ ++/* ++** mulx_0p5_f64_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.d), #(?:0\.5|5\.0e-1) ++** movprfx z0, z1 ++** fmulx z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_0p5_f64_m_untied, svfloat64_t, ++ z0 = svmulx_n_f64_m (p0, z1, 0.5), ++ z0 = svmulx_m (p0, z1, 0.5)) ++ ++/* ++** mulx_2_f64_m_tied1: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** fmulx z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_2_f64_m_tied1, svfloat64_t, ++ z0 = svmulx_n_f64_m (p0, z0, 2), ++ z0 = svmulx_m (p0, z0, 2)) ++ ++/* ++** mulx_2_f64_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** movprfx z0, z1 ++** fmulx z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_2_f64_m_untied, svfloat64_t, ++ z0 = svmulx_n_f64_m (p0, z1, 2), ++ z0 = svmulx_m (p0, z1, 2)) ++ ++/* ++** mulx_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fmulx z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_f64_z_tied1, svfloat64_t, ++ z0 = svmulx_f64_z (p0, z0, z1), ++ z0 = svmulx_z (p0, z0, z1)) ++ ++/* ++** mulx_f64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** fmulx z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_f64_z_tied2, svfloat64_t, ++ z0 = svmulx_f64_z (p0, z1, z0), ++ z0 = svmulx_z (p0, z1, z0)) ++ ++/* ++** mulx_f64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fmulx z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** fmulx z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_f64_z_untied, svfloat64_t, ++ z0 = svmulx_f64_z (p0, z1, z2), ++ z0 = svmulx_z (p0, z1, z2)) ++ ++/* ++** mulx_d4_f64_z_tied1: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0\.d, p0/z, z0\.d ++** fmulx z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mulx_d4_f64_z_tied1, svfloat64_t, double, ++ z0 = svmulx_n_f64_z (p0, z0, d4), ++ z0 = svmulx_z (p0, z0, d4)) ++ ++/* ++** mulx_d4_f64_z_untied: ++** mov (z[0-9]+\.d), d4 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fmulx z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** fmulx z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (mulx_d4_f64_z_untied, svfloat64_t, double, ++ z0 = svmulx_n_f64_z (p0, z1, d4), ++ z0 = svmulx_z (p0, z1, d4)) ++ ++/* ++** mulx_1_f64_z_tied1: ++** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? ++** movprfx z0\.d, p0/z, z0\.d ++** fmulx z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_1_f64_z_tied1, svfloat64_t, ++ z0 = svmulx_n_f64_z (p0, z0, 1), ++ z0 = svmulx_z (p0, z0, 1)) ++ ++/* ++** mulx_1_f64_z_untied: ++** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fmulx z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** fmulx z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_1_f64_z_untied, svfloat64_t, ++ z0 = svmulx_n_f64_z (p0, z1, 1), ++ z0 = svmulx_z (p0, z1, 1)) ++ ++/* ++** mulx_0p5_f64_z_tied1: ++** fmov (z[0-9]+\.d), #(?:0\.5|5\.0e-1) ++** movprfx z0\.d, p0/z, z0\.d ++** fmulx z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_0p5_f64_z_tied1, svfloat64_t, ++ z0 = svmulx_n_f64_z (p0, z0, 0.5), ++ z0 = svmulx_z (p0, z0, 0.5)) ++ ++/* ++** mulx_0p5_f64_z_untied: ++** fmov (z[0-9]+\.d), #(?:0\.5|5\.0e-1) ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fmulx z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** fmulx z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_0p5_f64_z_untied, svfloat64_t, ++ z0 = svmulx_n_f64_z (p0, z1, 0.5), ++ z0 = svmulx_z (p0, z1, 0.5)) ++ ++/* ++** mulx_2_f64_z_tied1: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** movprfx z0\.d, p0/z, z0\.d ++** fmulx z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_2_f64_z_tied1, svfloat64_t, ++ z0 = svmulx_n_f64_z (p0, z0, 2), ++ z0 = svmulx_z (p0, z0, 2)) ++ ++/* ++** mulx_2_f64_z_untied: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fmulx z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** fmulx z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_2_f64_z_untied, svfloat64_t, ++ z0 = svmulx_n_f64_z (p0, z1, 2), ++ z0 = svmulx_z (p0, z1, 2)) ++ ++/* ++** mulx_f64_x_tied1: ++** fmulx z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_f64_x_tied1, svfloat64_t, ++ z0 = svmulx_f64_x (p0, z0, z1), ++ z0 = svmulx_x (p0, z0, z1)) ++ ++/* ++** mulx_f64_x_tied2: ++** fmulx z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_f64_x_tied2, svfloat64_t, ++ z0 = svmulx_f64_x (p0, z1, z0), ++ z0 = svmulx_x (p0, z1, z0)) ++ ++/* ++** mulx_f64_x_untied: ++** ( ++** movprfx z0, z1 ++** fmulx z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0, z2 ++** fmulx z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_f64_x_untied, svfloat64_t, ++ z0 = svmulx_f64_x (p0, z1, z2), ++ z0 = svmulx_x (p0, z1, z2)) ++ ++/* ++** mulx_d4_f64_x_tied1: ++** mov (z[0-9]+\.d), d4 ++** fmulx z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (mulx_d4_f64_x_tied1, svfloat64_t, double, ++ z0 = svmulx_n_f64_x (p0, z0, d4), ++ z0 = svmulx_x (p0, z0, d4)) ++ ++/* ++** mulx_d4_f64_x_untied: { xfail *-*-* } ++** mov z0\.d, d4 ++** fmulx z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZD (mulx_d4_f64_x_untied, svfloat64_t, double, ++ z0 = svmulx_n_f64_x (p0, z1, d4), ++ z0 = svmulx_x (p0, z1, d4)) ++ ++/* ++** mulx_1_f64_x_tied1: ++** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? ++** fmulx z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_1_f64_x_tied1, svfloat64_t, ++ z0 = svmulx_n_f64_x (p0, z0, 1), ++ z0 = svmulx_x (p0, z0, 1)) ++ ++/* ++** mulx_1_f64_x_untied: ++** fmov z0\.d, #1\.0(?:e\+0)? ++** fmulx z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_1_f64_x_untied, svfloat64_t, ++ z0 = svmulx_n_f64_x (p0, z1, 1), ++ z0 = svmulx_x (p0, z1, 1)) ++ ++/* ++** mulx_0p5_f64_x_tied1: ++** fmov (z[0-9]+\.d), #(?:0\.5|5\.0e-1) ++** fmulx z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_0p5_f64_x_tied1, svfloat64_t, ++ z0 = svmulx_n_f64_x (p0, z0, 0.5), ++ z0 = svmulx_x (p0, z0, 0.5)) ++ ++/* ++** mulx_0p5_f64_x_untied: ++** fmov z0\.d, #(?:0\.5|5\.0e-1) ++** fmulx z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_0p5_f64_x_untied, svfloat64_t, ++ z0 = svmulx_n_f64_x (p0, z1, 0.5), ++ z0 = svmulx_x (p0, z1, 0.5)) ++ ++/* ++** mulx_2_f64_x_tied1: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** fmulx z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_2_f64_x_tied1, svfloat64_t, ++ z0 = svmulx_n_f64_x (p0, z0, 2), ++ z0 = svmulx_x (p0, z0, 2)) ++ ++/* ++** mulx_2_f64_x_untied: ++** fmov z0\.d, #2\.0(?:e\+0)? ++** fmulx z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (mulx_2_f64_x_untied, svfloat64_t, ++ z0 = svmulx_n_f64_x (p0, z1, 2), ++ z0 = svmulx_x (p0, z1, 2)) ++ ++/* ++** ptrue_mulx_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mulx_f64_x_tied1, svfloat64_t, ++ z0 = svmulx_f64_x (svptrue_b64 (), z0, z1), ++ z0 = svmulx_x (svptrue_b64 (), z0, z1)) ++ ++/* ++** ptrue_mulx_f64_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mulx_f64_x_tied2, svfloat64_t, ++ z0 = svmulx_f64_x (svptrue_b64 (), z1, z0), ++ z0 = svmulx_x (svptrue_b64 (), z1, z0)) ++ ++/* ++** ptrue_mulx_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mulx_f64_x_untied, svfloat64_t, ++ z0 = svmulx_f64_x (svptrue_b64 (), z1, z2), ++ z0 = svmulx_x (svptrue_b64 (), z1, z2)) ++ ++/* ++** ptrue_mulx_1_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mulx_1_f64_x_tied1, svfloat64_t, ++ z0 = svmulx_n_f64_x (svptrue_b64 (), z0, 1), ++ z0 = svmulx_x (svptrue_b64 (), z0, 1)) ++ ++/* ++** ptrue_mulx_1_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mulx_1_f64_x_untied, svfloat64_t, ++ z0 = svmulx_n_f64_x (svptrue_b64 (), z1, 1), ++ z0 = svmulx_x (svptrue_b64 (), z1, 1)) ++ ++/* ++** ptrue_mulx_0p5_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mulx_0p5_f64_x_tied1, svfloat64_t, ++ z0 = svmulx_n_f64_x (svptrue_b64 (), z0, 0.5), ++ z0 = svmulx_x (svptrue_b64 (), z0, 0.5)) ++ ++/* ++** ptrue_mulx_0p5_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mulx_0p5_f64_x_untied, svfloat64_t, ++ z0 = svmulx_n_f64_x (svptrue_b64 (), z1, 0.5), ++ z0 = svmulx_x (svptrue_b64 (), z1, 0.5)) ++ ++/* ++** ptrue_mulx_2_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mulx_2_f64_x_tied1, svfloat64_t, ++ z0 = svmulx_n_f64_x (svptrue_b64 (), z0, 2), ++ z0 = svmulx_x (svptrue_b64 (), z0, 2)) ++ ++/* ++** ptrue_mulx_2_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_mulx_2_f64_x_untied, svfloat64_t, ++ z0 = svmulx_n_f64_x (svptrue_b64 (), z1, 2), ++ z0 = svmulx_x (svptrue_b64 (), z1, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nand_b.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nand_b.c +new file mode 100644 +index 000000000..c306b80c7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nand_b.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** nand_b_z_tied1: ++** nand p0\.b, p3/z, p0\.b, p1\.b ++** ret ++*/ ++TEST_UNIFORM_P (nand_b_z_tied1, ++ p0 = svnand_b_z (p3, p0, p1), ++ p0 = svnand_z (p3, p0, p1)) ++ ++/* ++** nand_b_z_tied2: ++** nand p0\.b, p3/z, p1\.b, p0\.b ++** ret ++*/ ++TEST_UNIFORM_P (nand_b_z_tied2, ++ p0 = svnand_b_z (p3, p1, p0), ++ p0 = svnand_z (p3, p1, p0)) ++ ++/* ++** nand_b_z_untied: ++** nand p0\.b, p3/z, p1\.b, p2\.b ++** ret ++*/ ++TEST_UNIFORM_P (nand_b_z_untied, ++ p0 = svnand_b_z (p3, p1, p2), ++ p0 = svnand_z (p3, p1, p2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/neg_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/neg_f16.c +new file mode 100644 +index 000000000..c31eba922 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/neg_f16.c +@@ -0,0 +1,103 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** neg_f16_m_tied12: ++** fneg z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (neg_f16_m_tied12, svfloat16_t, ++ z0 = svneg_f16_m (z0, p0, z0), ++ z0 = svneg_m (z0, p0, z0)) ++ ++/* ++** neg_f16_m_tied1: ++** fneg z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (neg_f16_m_tied1, svfloat16_t, ++ z0 = svneg_f16_m (z0, p0, z1), ++ z0 = svneg_m (z0, p0, z1)) ++ ++/* ++** neg_f16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fneg z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (neg_f16_m_tied2, svfloat16_t, ++ z0 = svneg_f16_m (z1, p0, z0), ++ z0 = svneg_m (z1, p0, z0)) ++ ++/* ++** neg_f16_m_untied: ++** movprfx z0, z2 ++** fneg z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (neg_f16_m_untied, svfloat16_t, ++ z0 = svneg_f16_m (z2, p0, z1), ++ z0 = svneg_m (z2, p0, z1)) ++ ++/* ++** neg_f16_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.h, p0/z, \1\.h ++** fneg z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (neg_f16_z_tied1, svfloat16_t, ++ z0 = svneg_f16_z (p0, z0), ++ z0 = svneg_z (p0, z0)) ++ ++/* ++** neg_f16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** fneg z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (neg_f16_z_untied, svfloat16_t, ++ z0 = svneg_f16_z (p0, z1), ++ z0 = svneg_z (p0, z1)) ++ ++/* ++** neg_f16_x_tied1: ++** fneg z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (neg_f16_x_tied1, svfloat16_t, ++ z0 = svneg_f16_x (p0, z0), ++ z0 = svneg_x (p0, z0)) ++ ++/* ++** neg_f16_x_untied: ++** fneg z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (neg_f16_x_untied, svfloat16_t, ++ z0 = svneg_f16_x (p0, z1), ++ z0 = svneg_x (p0, z1)) ++ ++/* ++** ptrue_neg_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_neg_f16_x_tied1, svfloat16_t, ++ z0 = svneg_f16_x (svptrue_b16 (), z0), ++ z0 = svneg_x (svptrue_b16 (), z0)) ++ ++/* ++** ptrue_neg_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_neg_f16_x_untied, svfloat16_t, ++ z0 = svneg_f16_x (svptrue_b16 (), z1), ++ z0 = svneg_x (svptrue_b16 (), z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/neg_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/neg_f32.c +new file mode 100644 +index 000000000..a57d264ad +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/neg_f32.c +@@ -0,0 +1,103 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** neg_f32_m_tied12: ++** fneg z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (neg_f32_m_tied12, svfloat32_t, ++ z0 = svneg_f32_m (z0, p0, z0), ++ z0 = svneg_m (z0, p0, z0)) ++ ++/* ++** neg_f32_m_tied1: ++** fneg z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (neg_f32_m_tied1, svfloat32_t, ++ z0 = svneg_f32_m (z0, p0, z1), ++ z0 = svneg_m (z0, p0, z1)) ++ ++/* ++** neg_f32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fneg z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (neg_f32_m_tied2, svfloat32_t, ++ z0 = svneg_f32_m (z1, p0, z0), ++ z0 = svneg_m (z1, p0, z0)) ++ ++/* ++** neg_f32_m_untied: ++** movprfx z0, z2 ++** fneg z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (neg_f32_m_untied, svfloat32_t, ++ z0 = svneg_f32_m (z2, p0, z1), ++ z0 = svneg_m (z2, p0, z1)) ++ ++/* ++** neg_f32_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, \1\.s ++** fneg z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (neg_f32_z_tied1, svfloat32_t, ++ z0 = svneg_f32_z (p0, z0), ++ z0 = svneg_z (p0, z0)) ++ ++/* ++** neg_f32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** fneg z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (neg_f32_z_untied, svfloat32_t, ++ z0 = svneg_f32_z (p0, z1), ++ z0 = svneg_z (p0, z1)) ++ ++/* ++** neg_f32_x_tied1: ++** fneg z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (neg_f32_x_tied1, svfloat32_t, ++ z0 = svneg_f32_x (p0, z0), ++ z0 = svneg_x (p0, z0)) ++ ++/* ++** neg_f32_x_untied: ++** fneg z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (neg_f32_x_untied, svfloat32_t, ++ z0 = svneg_f32_x (p0, z1), ++ z0 = svneg_x (p0, z1)) ++ ++/* ++** ptrue_neg_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_neg_f32_x_tied1, svfloat32_t, ++ z0 = svneg_f32_x (svptrue_b32 (), z0), ++ z0 = svneg_x (svptrue_b32 (), z0)) ++ ++/* ++** ptrue_neg_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_neg_f32_x_untied, svfloat32_t, ++ z0 = svneg_f32_x (svptrue_b32 (), z1), ++ z0 = svneg_x (svptrue_b32 (), z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/neg_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/neg_f64.c +new file mode 100644 +index 000000000..90cadd4f9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/neg_f64.c +@@ -0,0 +1,103 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** neg_f64_m_tied12: ++** fneg z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (neg_f64_m_tied12, svfloat64_t, ++ z0 = svneg_f64_m (z0, p0, z0), ++ z0 = svneg_m (z0, p0, z0)) ++ ++/* ++** neg_f64_m_tied1: ++** fneg z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (neg_f64_m_tied1, svfloat64_t, ++ z0 = svneg_f64_m (z0, p0, z1), ++ z0 = svneg_m (z0, p0, z1)) ++ ++/* ++** neg_f64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fneg z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (neg_f64_m_tied2, svfloat64_t, ++ z0 = svneg_f64_m (z1, p0, z0), ++ z0 = svneg_m (z1, p0, z0)) ++ ++/* ++** neg_f64_m_untied: ++** movprfx z0, z2 ++** fneg z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (neg_f64_m_untied, svfloat64_t, ++ z0 = svneg_f64_m (z2, p0, z1), ++ z0 = svneg_m (z2, p0, z1)) ++ ++/* ++** neg_f64_z_tied1: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, \1 ++** fneg z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (neg_f64_z_tied1, svfloat64_t, ++ z0 = svneg_f64_z (p0, z0), ++ z0 = svneg_z (p0, z0)) ++ ++/* ++** neg_f64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** fneg z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (neg_f64_z_untied, svfloat64_t, ++ z0 = svneg_f64_z (p0, z1), ++ z0 = svneg_z (p0, z1)) ++ ++/* ++** neg_f64_x_tied1: ++** fneg z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (neg_f64_x_tied1, svfloat64_t, ++ z0 = svneg_f64_x (p0, z0), ++ z0 = svneg_x (p0, z0)) ++ ++/* ++** neg_f64_x_untied: ++** fneg z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (neg_f64_x_untied, svfloat64_t, ++ z0 = svneg_f64_x (p0, z1), ++ z0 = svneg_x (p0, z1)) ++ ++/* ++** ptrue_neg_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_neg_f64_x_tied1, svfloat64_t, ++ z0 = svneg_f64_x (svptrue_b64 (), z0), ++ z0 = svneg_x (svptrue_b64 (), z0)) ++ ++/* ++** ptrue_neg_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_neg_f64_x_untied, svfloat64_t, ++ z0 = svneg_f64_x (svptrue_b64 (), z1), ++ z0 = svneg_x (svptrue_b64 (), z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/neg_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/neg_s16.c +new file mode 100644 +index 000000000..80b2ee0f7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/neg_s16.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** neg_s16_m_tied12: ++** neg z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (neg_s16_m_tied12, svint16_t, ++ z0 = svneg_s16_m (z0, p0, z0), ++ z0 = svneg_m (z0, p0, z0)) ++ ++/* ++** neg_s16_m_tied1: ++** neg z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (neg_s16_m_tied1, svint16_t, ++ z0 = svneg_s16_m (z0, p0, z1), ++ z0 = svneg_m (z0, p0, z1)) ++ ++/* ++** neg_s16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** neg z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (neg_s16_m_tied2, svint16_t, ++ z0 = svneg_s16_m (z1, p0, z0), ++ z0 = svneg_m (z1, p0, z0)) ++ ++/* ++** neg_s16_m_untied: ++** movprfx z0, z2 ++** neg z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (neg_s16_m_untied, svint16_t, ++ z0 = svneg_s16_m (z2, p0, z1), ++ z0 = svneg_m (z2, p0, z1)) ++ ++/* ++** neg_s16_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.h, p0/z, \1\.h ++** neg z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (neg_s16_z_tied1, svint16_t, ++ z0 = svneg_s16_z (p0, z0), ++ z0 = svneg_z (p0, z0)) ++ ++/* ++** neg_s16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** neg z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (neg_s16_z_untied, svint16_t, ++ z0 = svneg_s16_z (p0, z1), ++ z0 = svneg_z (p0, z1)) ++ ++/* ++** neg_s16_x_tied1: ++** neg z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (neg_s16_x_tied1, svint16_t, ++ z0 = svneg_s16_x (p0, z0), ++ z0 = svneg_x (p0, z0)) ++ ++/* ++** neg_s16_x_untied: ++** neg z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (neg_s16_x_untied, svint16_t, ++ z0 = svneg_s16_x (p0, z1), ++ z0 = svneg_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/neg_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/neg_s32.c +new file mode 100644 +index 000000000..b8805034e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/neg_s32.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** neg_s32_m_tied12: ++** neg z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (neg_s32_m_tied12, svint32_t, ++ z0 = svneg_s32_m (z0, p0, z0), ++ z0 = svneg_m (z0, p0, z0)) ++ ++/* ++** neg_s32_m_tied1: ++** neg z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (neg_s32_m_tied1, svint32_t, ++ z0 = svneg_s32_m (z0, p0, z1), ++ z0 = svneg_m (z0, p0, z1)) ++ ++/* ++** neg_s32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** neg z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (neg_s32_m_tied2, svint32_t, ++ z0 = svneg_s32_m (z1, p0, z0), ++ z0 = svneg_m (z1, p0, z0)) ++ ++/* ++** neg_s32_m_untied: ++** movprfx z0, z2 ++** neg z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (neg_s32_m_untied, svint32_t, ++ z0 = svneg_s32_m (z2, p0, z1), ++ z0 = svneg_m (z2, p0, z1)) ++ ++/* ++** neg_s32_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, \1\.s ++** neg z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (neg_s32_z_tied1, svint32_t, ++ z0 = svneg_s32_z (p0, z0), ++ z0 = svneg_z (p0, z0)) ++ ++/* ++** neg_s32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** neg z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (neg_s32_z_untied, svint32_t, ++ z0 = svneg_s32_z (p0, z1), ++ z0 = svneg_z (p0, z1)) ++ ++/* ++** neg_s32_x_tied1: ++** neg z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (neg_s32_x_tied1, svint32_t, ++ z0 = svneg_s32_x (p0, z0), ++ z0 = svneg_x (p0, z0)) ++ ++/* ++** neg_s32_x_untied: ++** neg z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (neg_s32_x_untied, svint32_t, ++ z0 = svneg_s32_x (p0, z1), ++ z0 = svneg_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/neg_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/neg_s64.c +new file mode 100644 +index 000000000..82abe6723 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/neg_s64.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** neg_s64_m_tied12: ++** neg z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (neg_s64_m_tied12, svint64_t, ++ z0 = svneg_s64_m (z0, p0, z0), ++ z0 = svneg_m (z0, p0, z0)) ++ ++/* ++** neg_s64_m_tied1: ++** neg z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (neg_s64_m_tied1, svint64_t, ++ z0 = svneg_s64_m (z0, p0, z1), ++ z0 = svneg_m (z0, p0, z1)) ++ ++/* ++** neg_s64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** neg z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (neg_s64_m_tied2, svint64_t, ++ z0 = svneg_s64_m (z1, p0, z0), ++ z0 = svneg_m (z1, p0, z0)) ++ ++/* ++** neg_s64_m_untied: ++** movprfx z0, z2 ++** neg z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (neg_s64_m_untied, svint64_t, ++ z0 = svneg_s64_m (z2, p0, z1), ++ z0 = svneg_m (z2, p0, z1)) ++ ++/* ++** neg_s64_z_tied1: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, \1 ++** neg z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (neg_s64_z_tied1, svint64_t, ++ z0 = svneg_s64_z (p0, z0), ++ z0 = svneg_z (p0, z0)) ++ ++/* ++** neg_s64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** neg z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (neg_s64_z_untied, svint64_t, ++ z0 = svneg_s64_z (p0, z1), ++ z0 = svneg_z (p0, z1)) ++ ++/* ++** neg_s64_x_tied1: ++** neg z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (neg_s64_x_tied1, svint64_t, ++ z0 = svneg_s64_x (p0, z0), ++ z0 = svneg_x (p0, z0)) ++ ++/* ++** neg_s64_x_untied: ++** neg z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (neg_s64_x_untied, svint64_t, ++ z0 = svneg_s64_x (p0, z1), ++ z0 = svneg_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/neg_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/neg_s8.c +new file mode 100644 +index 000000000..b7c9949ad +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/neg_s8.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** neg_s8_m_tied12: ++** neg z0\.b, p0/m, z0\.b ++** ret ++*/ ++TEST_UNIFORM_Z (neg_s8_m_tied12, svint8_t, ++ z0 = svneg_s8_m (z0, p0, z0), ++ z0 = svneg_m (z0, p0, z0)) ++ ++/* ++** neg_s8_m_tied1: ++** neg z0\.b, p0/m, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (neg_s8_m_tied1, svint8_t, ++ z0 = svneg_s8_m (z0, p0, z1), ++ z0 = svneg_m (z0, p0, z1)) ++ ++/* ++** neg_s8_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** neg z0\.b, p0/m, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (neg_s8_m_tied2, svint8_t, ++ z0 = svneg_s8_m (z1, p0, z0), ++ z0 = svneg_m (z1, p0, z0)) ++ ++/* ++** neg_s8_m_untied: ++** movprfx z0, z2 ++** neg z0\.b, p0/m, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (neg_s8_m_untied, svint8_t, ++ z0 = svneg_s8_m (z2, p0, z1), ++ z0 = svneg_m (z2, p0, z1)) ++ ++/* ++** neg_s8_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.b, p0/z, \1\.b ++** neg z0\.b, p0/m, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (neg_s8_z_tied1, svint8_t, ++ z0 = svneg_s8_z (p0, z0), ++ z0 = svneg_z (p0, z0)) ++ ++/* ++** neg_s8_z_untied: ++** movprfx z0\.b, p0/z, z1\.b ++** neg z0\.b, p0/m, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (neg_s8_z_untied, svint8_t, ++ z0 = svneg_s8_z (p0, z1), ++ z0 = svneg_z (p0, z1)) ++ ++/* ++** neg_s8_x_tied1: ++** neg z0\.b, p0/m, z0\.b ++** ret ++*/ ++TEST_UNIFORM_Z (neg_s8_x_tied1, svint8_t, ++ z0 = svneg_s8_x (p0, z0), ++ z0 = svneg_x (p0, z0)) ++ ++/* ++** neg_s8_x_untied: ++** neg z0\.b, p0/m, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (neg_s8_x_untied, svint8_t, ++ z0 = svneg_s8_x (p0, z1), ++ z0 = svneg_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmad_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmad_f16.c +new file mode 100644 +index 000000000..abfe0a0c0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmad_f16.c +@@ -0,0 +1,398 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** nmad_f16_m_tied1: ++** fnmad z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_f16_m_tied1, svfloat16_t, ++ z0 = svnmad_f16_m (p0, z0, z1, z2), ++ z0 = svnmad_m (p0, z0, z1, z2)) ++ ++/* ++** nmad_f16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fnmad z0\.h, p0/m, \1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_f16_m_tied2, svfloat16_t, ++ z0 = svnmad_f16_m (p0, z1, z0, z2), ++ z0 = svnmad_m (p0, z1, z0, z2)) ++ ++/* ++** nmad_f16_m_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fnmad z0\.h, p0/m, z2\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_f16_m_tied3, svfloat16_t, ++ z0 = svnmad_f16_m (p0, z1, z2, z0), ++ z0 = svnmad_m (p0, z1, z2, z0)) ++ ++/* ++** nmad_f16_m_untied: ++** movprfx z0, z1 ++** fnmad z0\.h, p0/m, z2\.h, z3\.h ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_f16_m_untied, svfloat16_t, ++ z0 = svnmad_f16_m (p0, z1, z2, z3), ++ z0 = svnmad_m (p0, z1, z2, z3)) ++ ++/* ++** nmad_h4_f16_m_tied1: ++** mov (z[0-9]+\.h), h4 ++** fnmad z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmad_h4_f16_m_tied1, svfloat16_t, __fp16, ++ z0 = svnmad_n_f16_m (p0, z0, z1, d4), ++ z0 = svnmad_m (p0, z0, z1, d4)) ++ ++/* ++** nmad_h4_f16_m_untied: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0, z1 ++** fnmad z0\.h, p0/m, z2\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmad_h4_f16_m_untied, svfloat16_t, __fp16, ++ z0 = svnmad_n_f16_m (p0, z1, z2, d4), ++ z0 = svnmad_m (p0, z1, z2, d4)) ++ ++/* ++** nmad_2_f16_m_tied1: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** fnmad z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_2_f16_m_tied1, svfloat16_t, ++ z0 = svnmad_n_f16_m (p0, z0, z1, 2), ++ z0 = svnmad_m (p0, z0, z1, 2)) ++ ++/* ++** nmad_2_f16_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** movprfx z0, z1 ++** fnmad z0\.h, p0/m, z2\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_2_f16_m_untied, svfloat16_t, ++ z0 = svnmad_n_f16_m (p0, z1, z2, 2), ++ z0 = svnmad_m (p0, z1, z2, 2)) ++ ++/* ++** nmad_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fnmad z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_f16_z_tied1, svfloat16_t, ++ z0 = svnmad_f16_z (p0, z0, z1, z2), ++ z0 = svnmad_z (p0, z0, z1, z2)) ++ ++/* ++** nmad_f16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** fnmad z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_f16_z_tied2, svfloat16_t, ++ z0 = svnmad_f16_z (p0, z1, z0, z2), ++ z0 = svnmad_z (p0, z1, z0, z2)) ++ ++/* ++** nmad_f16_z_tied3: ++** movprfx z0\.h, p0/z, z0\.h ++** fnmla z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_f16_z_tied3, svfloat16_t, ++ z0 = svnmad_f16_z (p0, z1, z2, z0), ++ z0 = svnmad_z (p0, z1, z2, z0)) ++ ++/* ++** nmad_f16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fnmad z0\.h, p0/m, z2\.h, z3\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** fnmad z0\.h, p0/m, z1\.h, z3\.h ++** | ++** movprfx z0\.h, p0/z, z3\.h ++** fnmla z0\.h, p0/m, z1\.h, z2\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_f16_z_untied, svfloat16_t, ++ z0 = svnmad_f16_z (p0, z1, z2, z3), ++ z0 = svnmad_z (p0, z1, z2, z3)) ++ ++/* ++** nmad_h4_f16_z_tied1: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0\.h, p0/z, z0\.h ++** fnmad z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmad_h4_f16_z_tied1, svfloat16_t, __fp16, ++ z0 = svnmad_n_f16_z (p0, z0, z1, d4), ++ z0 = svnmad_z (p0, z0, z1, d4)) ++ ++/* ++** nmad_h4_f16_z_tied2: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0\.h, p0/z, z0\.h ++** fnmad z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmad_h4_f16_z_tied2, svfloat16_t, __fp16, ++ z0 = svnmad_n_f16_z (p0, z1, z0, d4), ++ z0 = svnmad_z (p0, z1, z0, d4)) ++ ++/* ++** nmad_h4_f16_z_untied: ++** mov (z[0-9]+\.h), h4 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fnmad z0\.h, p0/m, z2\.h, \1 ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** fnmad z0\.h, p0/m, z1\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** fnmla z0\.h, p0/m, z1\.h, z2\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (nmad_h4_f16_z_untied, svfloat16_t, __fp16, ++ z0 = svnmad_n_f16_z (p0, z1, z2, d4), ++ z0 = svnmad_z (p0, z1, z2, d4)) ++ ++/* ++** nmad_2_f16_z_tied1: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** movprfx z0\.h, p0/z, z0\.h ++** fnmad z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_2_f16_z_tied1, svfloat16_t, ++ z0 = svnmad_n_f16_z (p0, z0, z1, 2), ++ z0 = svnmad_z (p0, z0, z1, 2)) ++ ++/* ++** nmad_2_f16_z_tied2: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** movprfx z0\.h, p0/z, z0\.h ++** fnmad z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_2_f16_z_tied2, svfloat16_t, ++ z0 = svnmad_n_f16_z (p0, z1, z0, 2), ++ z0 = svnmad_z (p0, z1, z0, 2)) ++ ++/* ++** nmad_2_f16_z_untied: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fnmad z0\.h, p0/m, z2\.h, \1 ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** fnmad z0\.h, p0/m, z1\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** fnmla z0\.h, p0/m, z1\.h, z2\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_2_f16_z_untied, svfloat16_t, ++ z0 = svnmad_n_f16_z (p0, z1, z2, 2), ++ z0 = svnmad_z (p0, z1, z2, 2)) ++ ++/* ++** nmad_f16_x_tied1: ++** fnmad z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_f16_x_tied1, svfloat16_t, ++ z0 = svnmad_f16_x (p0, z0, z1, z2), ++ z0 = svnmad_x (p0, z0, z1, z2)) ++ ++/* ++** nmad_f16_x_tied2: ++** fnmad z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_f16_x_tied2, svfloat16_t, ++ z0 = svnmad_f16_x (p0, z1, z0, z2), ++ z0 = svnmad_x (p0, z1, z0, z2)) ++ ++/* ++** nmad_f16_x_tied3: ++** fnmla z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_f16_x_tied3, svfloat16_t, ++ z0 = svnmad_f16_x (p0, z1, z2, z0), ++ z0 = svnmad_x (p0, z1, z2, z0)) ++ ++/* ++** nmad_f16_x_untied: ++** ( ++** movprfx z0, z1 ++** fnmad z0\.h, p0/m, z2\.h, z3\.h ++** | ++** movprfx z0, z2 ++** fnmad z0\.h, p0/m, z1\.h, z3\.h ++** | ++** movprfx z0, z3 ++** fnmla z0\.h, p0/m, z1\.h, z2\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_f16_x_untied, svfloat16_t, ++ z0 = svnmad_f16_x (p0, z1, z2, z3), ++ z0 = svnmad_x (p0, z1, z2, z3)) ++ ++/* ++** nmad_h4_f16_x_tied1: ++** mov (z[0-9]+\.h), h4 ++** fnmad z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmad_h4_f16_x_tied1, svfloat16_t, __fp16, ++ z0 = svnmad_n_f16_x (p0, z0, z1, d4), ++ z0 = svnmad_x (p0, z0, z1, d4)) ++ ++/* ++** nmad_h4_f16_x_tied2: ++** mov (z[0-9]+\.h), h4 ++** fnmad z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmad_h4_f16_x_tied2, svfloat16_t, __fp16, ++ z0 = svnmad_n_f16_x (p0, z1, z0, d4), ++ z0 = svnmad_x (p0, z1, z0, d4)) ++ ++/* ++** nmad_h4_f16_x_untied: { xfail *-*-* } ++** mov z0\.h, h4 ++** fnmla z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_ZD (nmad_h4_f16_x_untied, svfloat16_t, __fp16, ++ z0 = svnmad_n_f16_x (p0, z1, z2, d4), ++ z0 = svnmad_x (p0, z1, z2, d4)) ++ ++/* ++** nmad_2_f16_x_tied1: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** fnmad z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_2_f16_x_tied1, svfloat16_t, ++ z0 = svnmad_n_f16_x (p0, z0, z1, 2), ++ z0 = svnmad_x (p0, z0, z1, 2)) ++ ++/* ++** nmad_2_f16_x_tied2: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** fnmad z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_2_f16_x_tied2, svfloat16_t, ++ z0 = svnmad_n_f16_x (p0, z1, z0, 2), ++ z0 = svnmad_x (p0, z1, z0, 2)) ++ ++/* ++** nmad_2_f16_x_untied: ++** fmov z0\.h, #2\.0(?:e\+0)? ++** fnmla z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_2_f16_x_untied, svfloat16_t, ++ z0 = svnmad_n_f16_x (p0, z1, z2, 2), ++ z0 = svnmad_x (p0, z1, z2, 2)) ++ ++/* ++** ptrue_nmad_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmad_f16_x_tied1, svfloat16_t, ++ z0 = svnmad_f16_x (svptrue_b16 (), z0, z1, z2), ++ z0 = svnmad_x (svptrue_b16 (), z0, z1, z2)) ++ ++/* ++** ptrue_nmad_f16_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmad_f16_x_tied2, svfloat16_t, ++ z0 = svnmad_f16_x (svptrue_b16 (), z1, z0, z2), ++ z0 = svnmad_x (svptrue_b16 (), z1, z0, z2)) ++ ++/* ++** ptrue_nmad_f16_x_tied3: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmad_f16_x_tied3, svfloat16_t, ++ z0 = svnmad_f16_x (svptrue_b16 (), z1, z2, z0), ++ z0 = svnmad_x (svptrue_b16 (), z1, z2, z0)) ++ ++/* ++** ptrue_nmad_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmad_f16_x_untied, svfloat16_t, ++ z0 = svnmad_f16_x (svptrue_b16 (), z1, z2, z3), ++ z0 = svnmad_x (svptrue_b16 (), z1, z2, z3)) ++ ++/* ++** ptrue_nmad_2_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmad_2_f16_x_tied1, svfloat16_t, ++ z0 = svnmad_n_f16_x (svptrue_b16 (), z0, z1, 2), ++ z0 = svnmad_x (svptrue_b16 (), z0, z1, 2)) ++ ++/* ++** ptrue_nmad_2_f16_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmad_2_f16_x_tied2, svfloat16_t, ++ z0 = svnmad_n_f16_x (svptrue_b16 (), z1, z0, 2), ++ z0 = svnmad_x (svptrue_b16 (), z1, z0, 2)) ++ ++/* ++** ptrue_nmad_2_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmad_2_f16_x_untied, svfloat16_t, ++ z0 = svnmad_n_f16_x (svptrue_b16 (), z1, z2, 2), ++ z0 = svnmad_x (svptrue_b16 (), z1, z2, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmad_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmad_f32.c +new file mode 100644 +index 000000000..ab86385c3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmad_f32.c +@@ -0,0 +1,398 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** nmad_f32_m_tied1: ++** fnmad z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_f32_m_tied1, svfloat32_t, ++ z0 = svnmad_f32_m (p0, z0, z1, z2), ++ z0 = svnmad_m (p0, z0, z1, z2)) ++ ++/* ++** nmad_f32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fnmad z0\.s, p0/m, \1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_f32_m_tied2, svfloat32_t, ++ z0 = svnmad_f32_m (p0, z1, z0, z2), ++ z0 = svnmad_m (p0, z1, z0, z2)) ++ ++/* ++** nmad_f32_m_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fnmad z0\.s, p0/m, z2\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_f32_m_tied3, svfloat32_t, ++ z0 = svnmad_f32_m (p0, z1, z2, z0), ++ z0 = svnmad_m (p0, z1, z2, z0)) ++ ++/* ++** nmad_f32_m_untied: ++** movprfx z0, z1 ++** fnmad z0\.s, p0/m, z2\.s, z3\.s ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_f32_m_untied, svfloat32_t, ++ z0 = svnmad_f32_m (p0, z1, z2, z3), ++ z0 = svnmad_m (p0, z1, z2, z3)) ++ ++/* ++** nmad_s4_f32_m_tied1: ++** mov (z[0-9]+\.s), s4 ++** fnmad z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmad_s4_f32_m_tied1, svfloat32_t, float, ++ z0 = svnmad_n_f32_m (p0, z0, z1, d4), ++ z0 = svnmad_m (p0, z0, z1, d4)) ++ ++/* ++** nmad_s4_f32_m_untied: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0, z1 ++** fnmad z0\.s, p0/m, z2\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmad_s4_f32_m_untied, svfloat32_t, float, ++ z0 = svnmad_n_f32_m (p0, z1, z2, d4), ++ z0 = svnmad_m (p0, z1, z2, d4)) ++ ++/* ++** nmad_2_f32_m_tied1: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** fnmad z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_2_f32_m_tied1, svfloat32_t, ++ z0 = svnmad_n_f32_m (p0, z0, z1, 2), ++ z0 = svnmad_m (p0, z0, z1, 2)) ++ ++/* ++** nmad_2_f32_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** movprfx z0, z1 ++** fnmad z0\.s, p0/m, z2\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_2_f32_m_untied, svfloat32_t, ++ z0 = svnmad_n_f32_m (p0, z1, z2, 2), ++ z0 = svnmad_m (p0, z1, z2, 2)) ++ ++/* ++** nmad_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fnmad z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_f32_z_tied1, svfloat32_t, ++ z0 = svnmad_f32_z (p0, z0, z1, z2), ++ z0 = svnmad_z (p0, z0, z1, z2)) ++ ++/* ++** nmad_f32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** fnmad z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_f32_z_tied2, svfloat32_t, ++ z0 = svnmad_f32_z (p0, z1, z0, z2), ++ z0 = svnmad_z (p0, z1, z0, z2)) ++ ++/* ++** nmad_f32_z_tied3: ++** movprfx z0\.s, p0/z, z0\.s ++** fnmla z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_f32_z_tied3, svfloat32_t, ++ z0 = svnmad_f32_z (p0, z1, z2, z0), ++ z0 = svnmad_z (p0, z1, z2, z0)) ++ ++/* ++** nmad_f32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fnmad z0\.s, p0/m, z2\.s, z3\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** fnmad z0\.s, p0/m, z1\.s, z3\.s ++** | ++** movprfx z0\.s, p0/z, z3\.s ++** fnmla z0\.s, p0/m, z1\.s, z2\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_f32_z_untied, svfloat32_t, ++ z0 = svnmad_f32_z (p0, z1, z2, z3), ++ z0 = svnmad_z (p0, z1, z2, z3)) ++ ++/* ++** nmad_s4_f32_z_tied1: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0\.s, p0/z, z0\.s ++** fnmad z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmad_s4_f32_z_tied1, svfloat32_t, float, ++ z0 = svnmad_n_f32_z (p0, z0, z1, d4), ++ z0 = svnmad_z (p0, z0, z1, d4)) ++ ++/* ++** nmad_s4_f32_z_tied2: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0\.s, p0/z, z0\.s ++** fnmad z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmad_s4_f32_z_tied2, svfloat32_t, float, ++ z0 = svnmad_n_f32_z (p0, z1, z0, d4), ++ z0 = svnmad_z (p0, z1, z0, d4)) ++ ++/* ++** nmad_s4_f32_z_untied: ++** mov (z[0-9]+\.s), s4 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fnmad z0\.s, p0/m, z2\.s, \1 ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** fnmad z0\.s, p0/m, z1\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** fnmla z0\.s, p0/m, z1\.s, z2\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (nmad_s4_f32_z_untied, svfloat32_t, float, ++ z0 = svnmad_n_f32_z (p0, z1, z2, d4), ++ z0 = svnmad_z (p0, z1, z2, d4)) ++ ++/* ++** nmad_2_f32_z_tied1: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** movprfx z0\.s, p0/z, z0\.s ++** fnmad z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_2_f32_z_tied1, svfloat32_t, ++ z0 = svnmad_n_f32_z (p0, z0, z1, 2), ++ z0 = svnmad_z (p0, z0, z1, 2)) ++ ++/* ++** nmad_2_f32_z_tied2: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** movprfx z0\.s, p0/z, z0\.s ++** fnmad z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_2_f32_z_tied2, svfloat32_t, ++ z0 = svnmad_n_f32_z (p0, z1, z0, 2), ++ z0 = svnmad_z (p0, z1, z0, 2)) ++ ++/* ++** nmad_2_f32_z_untied: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fnmad z0\.s, p0/m, z2\.s, \1 ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** fnmad z0\.s, p0/m, z1\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** fnmla z0\.s, p0/m, z1\.s, z2\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_2_f32_z_untied, svfloat32_t, ++ z0 = svnmad_n_f32_z (p0, z1, z2, 2), ++ z0 = svnmad_z (p0, z1, z2, 2)) ++ ++/* ++** nmad_f32_x_tied1: ++** fnmad z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_f32_x_tied1, svfloat32_t, ++ z0 = svnmad_f32_x (p0, z0, z1, z2), ++ z0 = svnmad_x (p0, z0, z1, z2)) ++ ++/* ++** nmad_f32_x_tied2: ++** fnmad z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_f32_x_tied2, svfloat32_t, ++ z0 = svnmad_f32_x (p0, z1, z0, z2), ++ z0 = svnmad_x (p0, z1, z0, z2)) ++ ++/* ++** nmad_f32_x_tied3: ++** fnmla z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_f32_x_tied3, svfloat32_t, ++ z0 = svnmad_f32_x (p0, z1, z2, z0), ++ z0 = svnmad_x (p0, z1, z2, z0)) ++ ++/* ++** nmad_f32_x_untied: ++** ( ++** movprfx z0, z1 ++** fnmad z0\.s, p0/m, z2\.s, z3\.s ++** | ++** movprfx z0, z2 ++** fnmad z0\.s, p0/m, z1\.s, z3\.s ++** | ++** movprfx z0, z3 ++** fnmla z0\.s, p0/m, z1\.s, z2\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_f32_x_untied, svfloat32_t, ++ z0 = svnmad_f32_x (p0, z1, z2, z3), ++ z0 = svnmad_x (p0, z1, z2, z3)) ++ ++/* ++** nmad_s4_f32_x_tied1: ++** mov (z[0-9]+\.s), s4 ++** fnmad z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmad_s4_f32_x_tied1, svfloat32_t, float, ++ z0 = svnmad_n_f32_x (p0, z0, z1, d4), ++ z0 = svnmad_x (p0, z0, z1, d4)) ++ ++/* ++** nmad_s4_f32_x_tied2: ++** mov (z[0-9]+\.s), s4 ++** fnmad z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmad_s4_f32_x_tied2, svfloat32_t, float, ++ z0 = svnmad_n_f32_x (p0, z1, z0, d4), ++ z0 = svnmad_x (p0, z1, z0, d4)) ++ ++/* ++** nmad_s4_f32_x_untied: { xfail *-*-* } ++** mov z0\.s, s4 ++** fnmla z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_ZD (nmad_s4_f32_x_untied, svfloat32_t, float, ++ z0 = svnmad_n_f32_x (p0, z1, z2, d4), ++ z0 = svnmad_x (p0, z1, z2, d4)) ++ ++/* ++** nmad_2_f32_x_tied1: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** fnmad z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_2_f32_x_tied1, svfloat32_t, ++ z0 = svnmad_n_f32_x (p0, z0, z1, 2), ++ z0 = svnmad_x (p0, z0, z1, 2)) ++ ++/* ++** nmad_2_f32_x_tied2: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** fnmad z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_2_f32_x_tied2, svfloat32_t, ++ z0 = svnmad_n_f32_x (p0, z1, z0, 2), ++ z0 = svnmad_x (p0, z1, z0, 2)) ++ ++/* ++** nmad_2_f32_x_untied: ++** fmov z0\.s, #2\.0(?:e\+0)? ++** fnmla z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_2_f32_x_untied, svfloat32_t, ++ z0 = svnmad_n_f32_x (p0, z1, z2, 2), ++ z0 = svnmad_x (p0, z1, z2, 2)) ++ ++/* ++** ptrue_nmad_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmad_f32_x_tied1, svfloat32_t, ++ z0 = svnmad_f32_x (svptrue_b32 (), z0, z1, z2), ++ z0 = svnmad_x (svptrue_b32 (), z0, z1, z2)) ++ ++/* ++** ptrue_nmad_f32_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmad_f32_x_tied2, svfloat32_t, ++ z0 = svnmad_f32_x (svptrue_b32 (), z1, z0, z2), ++ z0 = svnmad_x (svptrue_b32 (), z1, z0, z2)) ++ ++/* ++** ptrue_nmad_f32_x_tied3: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmad_f32_x_tied3, svfloat32_t, ++ z0 = svnmad_f32_x (svptrue_b32 (), z1, z2, z0), ++ z0 = svnmad_x (svptrue_b32 (), z1, z2, z0)) ++ ++/* ++** ptrue_nmad_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmad_f32_x_untied, svfloat32_t, ++ z0 = svnmad_f32_x (svptrue_b32 (), z1, z2, z3), ++ z0 = svnmad_x (svptrue_b32 (), z1, z2, z3)) ++ ++/* ++** ptrue_nmad_2_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmad_2_f32_x_tied1, svfloat32_t, ++ z0 = svnmad_n_f32_x (svptrue_b32 (), z0, z1, 2), ++ z0 = svnmad_x (svptrue_b32 (), z0, z1, 2)) ++ ++/* ++** ptrue_nmad_2_f32_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmad_2_f32_x_tied2, svfloat32_t, ++ z0 = svnmad_n_f32_x (svptrue_b32 (), z1, z0, 2), ++ z0 = svnmad_x (svptrue_b32 (), z1, z0, 2)) ++ ++/* ++** ptrue_nmad_2_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmad_2_f32_x_untied, svfloat32_t, ++ z0 = svnmad_n_f32_x (svptrue_b32 (), z1, z2, 2), ++ z0 = svnmad_x (svptrue_b32 (), z1, z2, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmad_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmad_f64.c +new file mode 100644 +index 000000000..c236ff5a1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmad_f64.c +@@ -0,0 +1,398 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** nmad_f64_m_tied1: ++** fnmad z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_f64_m_tied1, svfloat64_t, ++ z0 = svnmad_f64_m (p0, z0, z1, z2), ++ z0 = svnmad_m (p0, z0, z1, z2)) ++ ++/* ++** nmad_f64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fnmad z0\.d, p0/m, \1, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_f64_m_tied2, svfloat64_t, ++ z0 = svnmad_f64_m (p0, z1, z0, z2), ++ z0 = svnmad_m (p0, z1, z0, z2)) ++ ++/* ++** nmad_f64_m_tied3: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fnmad z0\.d, p0/m, z2\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_f64_m_tied3, svfloat64_t, ++ z0 = svnmad_f64_m (p0, z1, z2, z0), ++ z0 = svnmad_m (p0, z1, z2, z0)) ++ ++/* ++** nmad_f64_m_untied: ++** movprfx z0, z1 ++** fnmad z0\.d, p0/m, z2\.d, z3\.d ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_f64_m_untied, svfloat64_t, ++ z0 = svnmad_f64_m (p0, z1, z2, z3), ++ z0 = svnmad_m (p0, z1, z2, z3)) ++ ++/* ++** nmad_d4_f64_m_tied1: ++** mov (z[0-9]+\.d), d4 ++** fnmad z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmad_d4_f64_m_tied1, svfloat64_t, double, ++ z0 = svnmad_n_f64_m (p0, z0, z1, d4), ++ z0 = svnmad_m (p0, z0, z1, d4)) ++ ++/* ++** nmad_d4_f64_m_untied: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0, z1 ++** fnmad z0\.d, p0/m, z2\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmad_d4_f64_m_untied, svfloat64_t, double, ++ z0 = svnmad_n_f64_m (p0, z1, z2, d4), ++ z0 = svnmad_m (p0, z1, z2, d4)) ++ ++/* ++** nmad_2_f64_m_tied1: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** fnmad z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_2_f64_m_tied1, svfloat64_t, ++ z0 = svnmad_n_f64_m (p0, z0, z1, 2), ++ z0 = svnmad_m (p0, z0, z1, 2)) ++ ++/* ++** nmad_2_f64_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** movprfx z0, z1 ++** fnmad z0\.d, p0/m, z2\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_2_f64_m_untied, svfloat64_t, ++ z0 = svnmad_n_f64_m (p0, z1, z2, 2), ++ z0 = svnmad_m (p0, z1, z2, 2)) ++ ++/* ++** nmad_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fnmad z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_f64_z_tied1, svfloat64_t, ++ z0 = svnmad_f64_z (p0, z0, z1, z2), ++ z0 = svnmad_z (p0, z0, z1, z2)) ++ ++/* ++** nmad_f64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** fnmad z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_f64_z_tied2, svfloat64_t, ++ z0 = svnmad_f64_z (p0, z1, z0, z2), ++ z0 = svnmad_z (p0, z1, z0, z2)) ++ ++/* ++** nmad_f64_z_tied3: ++** movprfx z0\.d, p0/z, z0\.d ++** fnmla z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_f64_z_tied3, svfloat64_t, ++ z0 = svnmad_f64_z (p0, z1, z2, z0), ++ z0 = svnmad_z (p0, z1, z2, z0)) ++ ++/* ++** nmad_f64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fnmad z0\.d, p0/m, z2\.d, z3\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** fnmad z0\.d, p0/m, z1\.d, z3\.d ++** | ++** movprfx z0\.d, p0/z, z3\.d ++** fnmla z0\.d, p0/m, z1\.d, z2\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_f64_z_untied, svfloat64_t, ++ z0 = svnmad_f64_z (p0, z1, z2, z3), ++ z0 = svnmad_z (p0, z1, z2, z3)) ++ ++/* ++** nmad_d4_f64_z_tied1: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0\.d, p0/z, z0\.d ++** fnmad z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmad_d4_f64_z_tied1, svfloat64_t, double, ++ z0 = svnmad_n_f64_z (p0, z0, z1, d4), ++ z0 = svnmad_z (p0, z0, z1, d4)) ++ ++/* ++** nmad_d4_f64_z_tied2: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0\.d, p0/z, z0\.d ++** fnmad z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmad_d4_f64_z_tied2, svfloat64_t, double, ++ z0 = svnmad_n_f64_z (p0, z1, z0, d4), ++ z0 = svnmad_z (p0, z1, z0, d4)) ++ ++/* ++** nmad_d4_f64_z_untied: ++** mov (z[0-9]+\.d), d4 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fnmad z0\.d, p0/m, z2\.d, \1 ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** fnmad z0\.d, p0/m, z1\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** fnmla z0\.d, p0/m, z1\.d, z2\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (nmad_d4_f64_z_untied, svfloat64_t, double, ++ z0 = svnmad_n_f64_z (p0, z1, z2, d4), ++ z0 = svnmad_z (p0, z1, z2, d4)) ++ ++/* ++** nmad_2_f64_z_tied1: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** movprfx z0\.d, p0/z, z0\.d ++** fnmad z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_2_f64_z_tied1, svfloat64_t, ++ z0 = svnmad_n_f64_z (p0, z0, z1, 2), ++ z0 = svnmad_z (p0, z0, z1, 2)) ++ ++/* ++** nmad_2_f64_z_tied2: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** movprfx z0\.d, p0/z, z0\.d ++** fnmad z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_2_f64_z_tied2, svfloat64_t, ++ z0 = svnmad_n_f64_z (p0, z1, z0, 2), ++ z0 = svnmad_z (p0, z1, z0, 2)) ++ ++/* ++** nmad_2_f64_z_untied: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fnmad z0\.d, p0/m, z2\.d, \1 ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** fnmad z0\.d, p0/m, z1\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** fnmla z0\.d, p0/m, z1\.d, z2\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_2_f64_z_untied, svfloat64_t, ++ z0 = svnmad_n_f64_z (p0, z1, z2, 2), ++ z0 = svnmad_z (p0, z1, z2, 2)) ++ ++/* ++** nmad_f64_x_tied1: ++** fnmad z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_f64_x_tied1, svfloat64_t, ++ z0 = svnmad_f64_x (p0, z0, z1, z2), ++ z0 = svnmad_x (p0, z0, z1, z2)) ++ ++/* ++** nmad_f64_x_tied2: ++** fnmad z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_f64_x_tied2, svfloat64_t, ++ z0 = svnmad_f64_x (p0, z1, z0, z2), ++ z0 = svnmad_x (p0, z1, z0, z2)) ++ ++/* ++** nmad_f64_x_tied3: ++** fnmla z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_f64_x_tied3, svfloat64_t, ++ z0 = svnmad_f64_x (p0, z1, z2, z0), ++ z0 = svnmad_x (p0, z1, z2, z0)) ++ ++/* ++** nmad_f64_x_untied: ++** ( ++** movprfx z0, z1 ++** fnmad z0\.d, p0/m, z2\.d, z3\.d ++** | ++** movprfx z0, z2 ++** fnmad z0\.d, p0/m, z1\.d, z3\.d ++** | ++** movprfx z0, z3 ++** fnmla z0\.d, p0/m, z1\.d, z2\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_f64_x_untied, svfloat64_t, ++ z0 = svnmad_f64_x (p0, z1, z2, z3), ++ z0 = svnmad_x (p0, z1, z2, z3)) ++ ++/* ++** nmad_d4_f64_x_tied1: ++** mov (z[0-9]+\.d), d4 ++** fnmad z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmad_d4_f64_x_tied1, svfloat64_t, double, ++ z0 = svnmad_n_f64_x (p0, z0, z1, d4), ++ z0 = svnmad_x (p0, z0, z1, d4)) ++ ++/* ++** nmad_d4_f64_x_tied2: ++** mov (z[0-9]+\.d), d4 ++** fnmad z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmad_d4_f64_x_tied2, svfloat64_t, double, ++ z0 = svnmad_n_f64_x (p0, z1, z0, d4), ++ z0 = svnmad_x (p0, z1, z0, d4)) ++ ++/* ++** nmad_d4_f64_x_untied: { xfail *-*-* } ++** mov z0\.d, d4 ++** fnmla z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_ZD (nmad_d4_f64_x_untied, svfloat64_t, double, ++ z0 = svnmad_n_f64_x (p0, z1, z2, d4), ++ z0 = svnmad_x (p0, z1, z2, d4)) ++ ++/* ++** nmad_2_f64_x_tied1: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** fnmad z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_2_f64_x_tied1, svfloat64_t, ++ z0 = svnmad_n_f64_x (p0, z0, z1, 2), ++ z0 = svnmad_x (p0, z0, z1, 2)) ++ ++/* ++** nmad_2_f64_x_tied2: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** fnmad z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_2_f64_x_tied2, svfloat64_t, ++ z0 = svnmad_n_f64_x (p0, z1, z0, 2), ++ z0 = svnmad_x (p0, z1, z0, 2)) ++ ++/* ++** nmad_2_f64_x_untied: ++** fmov z0\.d, #2\.0(?:e\+0)? ++** fnmla z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (nmad_2_f64_x_untied, svfloat64_t, ++ z0 = svnmad_n_f64_x (p0, z1, z2, 2), ++ z0 = svnmad_x (p0, z1, z2, 2)) ++ ++/* ++** ptrue_nmad_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmad_f64_x_tied1, svfloat64_t, ++ z0 = svnmad_f64_x (svptrue_b64 (), z0, z1, z2), ++ z0 = svnmad_x (svptrue_b64 (), z0, z1, z2)) ++ ++/* ++** ptrue_nmad_f64_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmad_f64_x_tied2, svfloat64_t, ++ z0 = svnmad_f64_x (svptrue_b64 (), z1, z0, z2), ++ z0 = svnmad_x (svptrue_b64 (), z1, z0, z2)) ++ ++/* ++** ptrue_nmad_f64_x_tied3: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmad_f64_x_tied3, svfloat64_t, ++ z0 = svnmad_f64_x (svptrue_b64 (), z1, z2, z0), ++ z0 = svnmad_x (svptrue_b64 (), z1, z2, z0)) ++ ++/* ++** ptrue_nmad_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmad_f64_x_untied, svfloat64_t, ++ z0 = svnmad_f64_x (svptrue_b64 (), z1, z2, z3), ++ z0 = svnmad_x (svptrue_b64 (), z1, z2, z3)) ++ ++/* ++** ptrue_nmad_2_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmad_2_f64_x_tied1, svfloat64_t, ++ z0 = svnmad_n_f64_x (svptrue_b64 (), z0, z1, 2), ++ z0 = svnmad_x (svptrue_b64 (), z0, z1, 2)) ++ ++/* ++** ptrue_nmad_2_f64_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmad_2_f64_x_tied2, svfloat64_t, ++ z0 = svnmad_n_f64_x (svptrue_b64 (), z1, z0, 2), ++ z0 = svnmad_x (svptrue_b64 (), z1, z0, 2)) ++ ++/* ++** ptrue_nmad_2_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmad_2_f64_x_untied, svfloat64_t, ++ z0 = svnmad_n_f64_x (svptrue_b64 (), z1, z2, 2), ++ z0 = svnmad_x (svptrue_b64 (), z1, z2, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmla_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmla_f16.c +new file mode 100644 +index 000000000..f7ac377fd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmla_f16.c +@@ -0,0 +1,398 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** nmla_f16_m_tied1: ++** fnmla z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_f16_m_tied1, svfloat16_t, ++ z0 = svnmla_f16_m (p0, z0, z1, z2), ++ z0 = svnmla_m (p0, z0, z1, z2)) ++ ++/* ++** nmla_f16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fnmla z0\.h, p0/m, \1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_f16_m_tied2, svfloat16_t, ++ z0 = svnmla_f16_m (p0, z1, z0, z2), ++ z0 = svnmla_m (p0, z1, z0, z2)) ++ ++/* ++** nmla_f16_m_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fnmla z0\.h, p0/m, z2\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_f16_m_tied3, svfloat16_t, ++ z0 = svnmla_f16_m (p0, z1, z2, z0), ++ z0 = svnmla_m (p0, z1, z2, z0)) ++ ++/* ++** nmla_f16_m_untied: ++** movprfx z0, z1 ++** fnmla z0\.h, p0/m, z2\.h, z3\.h ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_f16_m_untied, svfloat16_t, ++ z0 = svnmla_f16_m (p0, z1, z2, z3), ++ z0 = svnmla_m (p0, z1, z2, z3)) ++ ++/* ++** nmla_h4_f16_m_tied1: ++** mov (z[0-9]+\.h), h4 ++** fnmla z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmla_h4_f16_m_tied1, svfloat16_t, __fp16, ++ z0 = svnmla_n_f16_m (p0, z0, z1, d4), ++ z0 = svnmla_m (p0, z0, z1, d4)) ++ ++/* ++** nmla_h4_f16_m_untied: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0, z1 ++** fnmla z0\.h, p0/m, z2\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmla_h4_f16_m_untied, svfloat16_t, __fp16, ++ z0 = svnmla_n_f16_m (p0, z1, z2, d4), ++ z0 = svnmla_m (p0, z1, z2, d4)) ++ ++/* ++** nmla_2_f16_m_tied1: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** fnmla z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_2_f16_m_tied1, svfloat16_t, ++ z0 = svnmla_n_f16_m (p0, z0, z1, 2), ++ z0 = svnmla_m (p0, z0, z1, 2)) ++ ++/* ++** nmla_2_f16_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** movprfx z0, z1 ++** fnmla z0\.h, p0/m, z2\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_2_f16_m_untied, svfloat16_t, ++ z0 = svnmla_n_f16_m (p0, z1, z2, 2), ++ z0 = svnmla_m (p0, z1, z2, 2)) ++ ++/* ++** nmla_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fnmla z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_f16_z_tied1, svfloat16_t, ++ z0 = svnmla_f16_z (p0, z0, z1, z2), ++ z0 = svnmla_z (p0, z0, z1, z2)) ++ ++/* ++** nmla_f16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** fnmad z0\.h, p0/m, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_f16_z_tied2, svfloat16_t, ++ z0 = svnmla_f16_z (p0, z1, z0, z2), ++ z0 = svnmla_z (p0, z1, z0, z2)) ++ ++/* ++** nmla_f16_z_tied3: ++** movprfx z0\.h, p0/z, z0\.h ++** fnmad z0\.h, p0/m, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_f16_z_tied3, svfloat16_t, ++ z0 = svnmla_f16_z (p0, z1, z2, z0), ++ z0 = svnmla_z (p0, z1, z2, z0)) ++ ++/* ++** nmla_f16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fnmla z0\.h, p0/m, z2\.h, z3\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** fnmad z0\.h, p0/m, z3\.h, z1\.h ++** | ++** movprfx z0\.h, p0/z, z3\.h ++** fnmad z0\.h, p0/m, z2\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_f16_z_untied, svfloat16_t, ++ z0 = svnmla_f16_z (p0, z1, z2, z3), ++ z0 = svnmla_z (p0, z1, z2, z3)) ++ ++/* ++** nmla_h4_f16_z_tied1: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0\.h, p0/z, z0\.h ++** fnmla z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmla_h4_f16_z_tied1, svfloat16_t, __fp16, ++ z0 = svnmla_n_f16_z (p0, z0, z1, d4), ++ z0 = svnmla_z (p0, z0, z1, d4)) ++ ++/* ++** nmla_h4_f16_z_tied2: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0\.h, p0/z, z0\.h ++** fnmad z0\.h, p0/m, \1, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZD (nmla_h4_f16_z_tied2, svfloat16_t, __fp16, ++ z0 = svnmla_n_f16_z (p0, z1, z0, d4), ++ z0 = svnmla_z (p0, z1, z0, d4)) ++ ++/* ++** nmla_h4_f16_z_untied: ++** mov (z[0-9]+\.h), h4 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fnmla z0\.h, p0/m, z2\.h, \1 ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** fnmad z0\.h, p0/m, \1, z1\.h ++** | ++** movprfx z0\.h, p0/z, \1 ++** fnmad z0\.h, p0/m, z2\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (nmla_h4_f16_z_untied, svfloat16_t, __fp16, ++ z0 = svnmla_n_f16_z (p0, z1, z2, d4), ++ z0 = svnmla_z (p0, z1, z2, d4)) ++ ++/* ++** nmla_2_f16_z_tied1: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** movprfx z0\.h, p0/z, z0\.h ++** fnmla z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_2_f16_z_tied1, svfloat16_t, ++ z0 = svnmla_n_f16_z (p0, z0, z1, 2), ++ z0 = svnmla_z (p0, z0, z1, 2)) ++ ++/* ++** nmla_2_f16_z_tied2: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** movprfx z0\.h, p0/z, z0\.h ++** fnmad z0\.h, p0/m, \1, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_2_f16_z_tied2, svfloat16_t, ++ z0 = svnmla_n_f16_z (p0, z1, z0, 2), ++ z0 = svnmla_z (p0, z1, z0, 2)) ++ ++/* ++** nmla_2_f16_z_untied: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fnmla z0\.h, p0/m, z2\.h, \1 ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** fnmad z0\.h, p0/m, \1, z1\.h ++** | ++** movprfx z0\.h, p0/z, \1 ++** fnmad z0\.h, p0/m, z2\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_2_f16_z_untied, svfloat16_t, ++ z0 = svnmla_n_f16_z (p0, z1, z2, 2), ++ z0 = svnmla_z (p0, z1, z2, 2)) ++ ++/* ++** nmla_f16_x_tied1: ++** fnmla z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_f16_x_tied1, svfloat16_t, ++ z0 = svnmla_f16_x (p0, z0, z1, z2), ++ z0 = svnmla_x (p0, z0, z1, z2)) ++ ++/* ++** nmla_f16_x_tied2: ++** fnmad z0\.h, p0/m, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_f16_x_tied2, svfloat16_t, ++ z0 = svnmla_f16_x (p0, z1, z0, z2), ++ z0 = svnmla_x (p0, z1, z0, z2)) ++ ++/* ++** nmla_f16_x_tied3: ++** fnmad z0\.h, p0/m, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_f16_x_tied3, svfloat16_t, ++ z0 = svnmla_f16_x (p0, z1, z2, z0), ++ z0 = svnmla_x (p0, z1, z2, z0)) ++ ++/* ++** nmla_f16_x_untied: ++** ( ++** movprfx z0, z1 ++** fnmla z0\.h, p0/m, z2\.h, z3\.h ++** | ++** movprfx z0, z2 ++** fnmad z0\.h, p0/m, z3\.h, z1\.h ++** | ++** movprfx z0, z3 ++** fnmad z0\.h, p0/m, z2\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_f16_x_untied, svfloat16_t, ++ z0 = svnmla_f16_x (p0, z1, z2, z3), ++ z0 = svnmla_x (p0, z1, z2, z3)) ++ ++/* ++** nmla_h4_f16_x_tied1: ++** mov (z[0-9]+\.h), h4 ++** fnmla z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmla_h4_f16_x_tied1, svfloat16_t, __fp16, ++ z0 = svnmla_n_f16_x (p0, z0, z1, d4), ++ z0 = svnmla_x (p0, z0, z1, d4)) ++ ++/* ++** nmla_h4_f16_x_tied2: ++** mov (z[0-9]+\.h), h4 ++** fnmad z0\.h, p0/m, \1, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZD (nmla_h4_f16_x_tied2, svfloat16_t, __fp16, ++ z0 = svnmla_n_f16_x (p0, z1, z0, d4), ++ z0 = svnmla_x (p0, z1, z0, d4)) ++ ++/* ++** nmla_h4_f16_x_untied: { xfail *-*-* } ++** mov z0\.h, h4 ++** fnmad z0\.h, p0/m, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZD (nmla_h4_f16_x_untied, svfloat16_t, __fp16, ++ z0 = svnmla_n_f16_x (p0, z1, z2, d4), ++ z0 = svnmla_x (p0, z1, z2, d4)) ++ ++/* ++** nmla_2_f16_x_tied1: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** fnmla z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_2_f16_x_tied1, svfloat16_t, ++ z0 = svnmla_n_f16_x (p0, z0, z1, 2), ++ z0 = svnmla_x (p0, z0, z1, 2)) ++ ++/* ++** nmla_2_f16_x_tied2: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** fnmad z0\.h, p0/m, \1, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_2_f16_x_tied2, svfloat16_t, ++ z0 = svnmla_n_f16_x (p0, z1, z0, 2), ++ z0 = svnmla_x (p0, z1, z0, 2)) ++ ++/* ++** nmla_2_f16_x_untied: ++** fmov z0\.h, #2\.0(?:e\+0)? ++** fnmad z0\.h, p0/m, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_2_f16_x_untied, svfloat16_t, ++ z0 = svnmla_n_f16_x (p0, z1, z2, 2), ++ z0 = svnmla_x (p0, z1, z2, 2)) ++ ++/* ++** ptrue_nmla_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmla_f16_x_tied1, svfloat16_t, ++ z0 = svnmla_f16_x (svptrue_b16 (), z0, z1, z2), ++ z0 = svnmla_x (svptrue_b16 (), z0, z1, z2)) ++ ++/* ++** ptrue_nmla_f16_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmla_f16_x_tied2, svfloat16_t, ++ z0 = svnmla_f16_x (svptrue_b16 (), z1, z0, z2), ++ z0 = svnmla_x (svptrue_b16 (), z1, z0, z2)) ++ ++/* ++** ptrue_nmla_f16_x_tied3: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmla_f16_x_tied3, svfloat16_t, ++ z0 = svnmla_f16_x (svptrue_b16 (), z1, z2, z0), ++ z0 = svnmla_x (svptrue_b16 (), z1, z2, z0)) ++ ++/* ++** ptrue_nmla_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmla_f16_x_untied, svfloat16_t, ++ z0 = svnmla_f16_x (svptrue_b16 (), z1, z2, z3), ++ z0 = svnmla_x (svptrue_b16 (), z1, z2, z3)) ++ ++/* ++** ptrue_nmla_2_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmla_2_f16_x_tied1, svfloat16_t, ++ z0 = svnmla_n_f16_x (svptrue_b16 (), z0, z1, 2), ++ z0 = svnmla_x (svptrue_b16 (), z0, z1, 2)) ++ ++/* ++** ptrue_nmla_2_f16_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmla_2_f16_x_tied2, svfloat16_t, ++ z0 = svnmla_n_f16_x (svptrue_b16 (), z1, z0, 2), ++ z0 = svnmla_x (svptrue_b16 (), z1, z0, 2)) ++ ++/* ++** ptrue_nmla_2_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmla_2_f16_x_untied, svfloat16_t, ++ z0 = svnmla_n_f16_x (svptrue_b16 (), z1, z2, 2), ++ z0 = svnmla_x (svptrue_b16 (), z1, z2, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmla_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmla_f32.c +new file mode 100644 +index 000000000..ef9542d74 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmla_f32.c +@@ -0,0 +1,398 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** nmla_f32_m_tied1: ++** fnmla z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_f32_m_tied1, svfloat32_t, ++ z0 = svnmla_f32_m (p0, z0, z1, z2), ++ z0 = svnmla_m (p0, z0, z1, z2)) ++ ++/* ++** nmla_f32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fnmla z0\.s, p0/m, \1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_f32_m_tied2, svfloat32_t, ++ z0 = svnmla_f32_m (p0, z1, z0, z2), ++ z0 = svnmla_m (p0, z1, z0, z2)) ++ ++/* ++** nmla_f32_m_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fnmla z0\.s, p0/m, z2\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_f32_m_tied3, svfloat32_t, ++ z0 = svnmla_f32_m (p0, z1, z2, z0), ++ z0 = svnmla_m (p0, z1, z2, z0)) ++ ++/* ++** nmla_f32_m_untied: ++** movprfx z0, z1 ++** fnmla z0\.s, p0/m, z2\.s, z3\.s ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_f32_m_untied, svfloat32_t, ++ z0 = svnmla_f32_m (p0, z1, z2, z3), ++ z0 = svnmla_m (p0, z1, z2, z3)) ++ ++/* ++** nmla_s4_f32_m_tied1: ++** mov (z[0-9]+\.s), s4 ++** fnmla z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmla_s4_f32_m_tied1, svfloat32_t, float, ++ z0 = svnmla_n_f32_m (p0, z0, z1, d4), ++ z0 = svnmla_m (p0, z0, z1, d4)) ++ ++/* ++** nmla_s4_f32_m_untied: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0, z1 ++** fnmla z0\.s, p0/m, z2\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmla_s4_f32_m_untied, svfloat32_t, float, ++ z0 = svnmla_n_f32_m (p0, z1, z2, d4), ++ z0 = svnmla_m (p0, z1, z2, d4)) ++ ++/* ++** nmla_2_f32_m_tied1: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** fnmla z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_2_f32_m_tied1, svfloat32_t, ++ z0 = svnmla_n_f32_m (p0, z0, z1, 2), ++ z0 = svnmla_m (p0, z0, z1, 2)) ++ ++/* ++** nmla_2_f32_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** movprfx z0, z1 ++** fnmla z0\.s, p0/m, z2\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_2_f32_m_untied, svfloat32_t, ++ z0 = svnmla_n_f32_m (p0, z1, z2, 2), ++ z0 = svnmla_m (p0, z1, z2, 2)) ++ ++/* ++** nmla_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fnmla z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_f32_z_tied1, svfloat32_t, ++ z0 = svnmla_f32_z (p0, z0, z1, z2), ++ z0 = svnmla_z (p0, z0, z1, z2)) ++ ++/* ++** nmla_f32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** fnmad z0\.s, p0/m, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_f32_z_tied2, svfloat32_t, ++ z0 = svnmla_f32_z (p0, z1, z0, z2), ++ z0 = svnmla_z (p0, z1, z0, z2)) ++ ++/* ++** nmla_f32_z_tied3: ++** movprfx z0\.s, p0/z, z0\.s ++** fnmad z0\.s, p0/m, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_f32_z_tied3, svfloat32_t, ++ z0 = svnmla_f32_z (p0, z1, z2, z0), ++ z0 = svnmla_z (p0, z1, z2, z0)) ++ ++/* ++** nmla_f32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fnmla z0\.s, p0/m, z2\.s, z3\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** fnmad z0\.s, p0/m, z3\.s, z1\.s ++** | ++** movprfx z0\.s, p0/z, z3\.s ++** fnmad z0\.s, p0/m, z2\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_f32_z_untied, svfloat32_t, ++ z0 = svnmla_f32_z (p0, z1, z2, z3), ++ z0 = svnmla_z (p0, z1, z2, z3)) ++ ++/* ++** nmla_s4_f32_z_tied1: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0\.s, p0/z, z0\.s ++** fnmla z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmla_s4_f32_z_tied1, svfloat32_t, float, ++ z0 = svnmla_n_f32_z (p0, z0, z1, d4), ++ z0 = svnmla_z (p0, z0, z1, d4)) ++ ++/* ++** nmla_s4_f32_z_tied2: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0\.s, p0/z, z0\.s ++** fnmad z0\.s, p0/m, \1, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZD (nmla_s4_f32_z_tied2, svfloat32_t, float, ++ z0 = svnmla_n_f32_z (p0, z1, z0, d4), ++ z0 = svnmla_z (p0, z1, z0, d4)) ++ ++/* ++** nmla_s4_f32_z_untied: ++** mov (z[0-9]+\.s), s4 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fnmla z0\.s, p0/m, z2\.s, \1 ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** fnmad z0\.s, p0/m, \1, z1\.s ++** | ++** movprfx z0\.s, p0/z, \1 ++** fnmad z0\.s, p0/m, z2\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (nmla_s4_f32_z_untied, svfloat32_t, float, ++ z0 = svnmla_n_f32_z (p0, z1, z2, d4), ++ z0 = svnmla_z (p0, z1, z2, d4)) ++ ++/* ++** nmla_2_f32_z_tied1: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** movprfx z0\.s, p0/z, z0\.s ++** fnmla z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_2_f32_z_tied1, svfloat32_t, ++ z0 = svnmla_n_f32_z (p0, z0, z1, 2), ++ z0 = svnmla_z (p0, z0, z1, 2)) ++ ++/* ++** nmla_2_f32_z_tied2: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** movprfx z0\.s, p0/z, z0\.s ++** fnmad z0\.s, p0/m, \1, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_2_f32_z_tied2, svfloat32_t, ++ z0 = svnmla_n_f32_z (p0, z1, z0, 2), ++ z0 = svnmla_z (p0, z1, z0, 2)) ++ ++/* ++** nmla_2_f32_z_untied: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fnmla z0\.s, p0/m, z2\.s, \1 ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** fnmad z0\.s, p0/m, \1, z1\.s ++** | ++** movprfx z0\.s, p0/z, \1 ++** fnmad z0\.s, p0/m, z2\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_2_f32_z_untied, svfloat32_t, ++ z0 = svnmla_n_f32_z (p0, z1, z2, 2), ++ z0 = svnmla_z (p0, z1, z2, 2)) ++ ++/* ++** nmla_f32_x_tied1: ++** fnmla z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_f32_x_tied1, svfloat32_t, ++ z0 = svnmla_f32_x (p0, z0, z1, z2), ++ z0 = svnmla_x (p0, z0, z1, z2)) ++ ++/* ++** nmla_f32_x_tied2: ++** fnmad z0\.s, p0/m, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_f32_x_tied2, svfloat32_t, ++ z0 = svnmla_f32_x (p0, z1, z0, z2), ++ z0 = svnmla_x (p0, z1, z0, z2)) ++ ++/* ++** nmla_f32_x_tied3: ++** fnmad z0\.s, p0/m, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_f32_x_tied3, svfloat32_t, ++ z0 = svnmla_f32_x (p0, z1, z2, z0), ++ z0 = svnmla_x (p0, z1, z2, z0)) ++ ++/* ++** nmla_f32_x_untied: ++** ( ++** movprfx z0, z1 ++** fnmla z0\.s, p0/m, z2\.s, z3\.s ++** | ++** movprfx z0, z2 ++** fnmad z0\.s, p0/m, z3\.s, z1\.s ++** | ++** movprfx z0, z3 ++** fnmad z0\.s, p0/m, z2\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_f32_x_untied, svfloat32_t, ++ z0 = svnmla_f32_x (p0, z1, z2, z3), ++ z0 = svnmla_x (p0, z1, z2, z3)) ++ ++/* ++** nmla_s4_f32_x_tied1: ++** mov (z[0-9]+\.s), s4 ++** fnmla z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmla_s4_f32_x_tied1, svfloat32_t, float, ++ z0 = svnmla_n_f32_x (p0, z0, z1, d4), ++ z0 = svnmla_x (p0, z0, z1, d4)) ++ ++/* ++** nmla_s4_f32_x_tied2: ++** mov (z[0-9]+\.s), s4 ++** fnmad z0\.s, p0/m, \1, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZD (nmla_s4_f32_x_tied2, svfloat32_t, float, ++ z0 = svnmla_n_f32_x (p0, z1, z0, d4), ++ z0 = svnmla_x (p0, z1, z0, d4)) ++ ++/* ++** nmla_s4_f32_x_untied: { xfail *-*-* } ++** mov z0\.s, s4 ++** fnmad z0\.s, p0/m, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZD (nmla_s4_f32_x_untied, svfloat32_t, float, ++ z0 = svnmla_n_f32_x (p0, z1, z2, d4), ++ z0 = svnmla_x (p0, z1, z2, d4)) ++ ++/* ++** nmla_2_f32_x_tied1: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** fnmla z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_2_f32_x_tied1, svfloat32_t, ++ z0 = svnmla_n_f32_x (p0, z0, z1, 2), ++ z0 = svnmla_x (p0, z0, z1, 2)) ++ ++/* ++** nmla_2_f32_x_tied2: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** fnmad z0\.s, p0/m, \1, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_2_f32_x_tied2, svfloat32_t, ++ z0 = svnmla_n_f32_x (p0, z1, z0, 2), ++ z0 = svnmla_x (p0, z1, z0, 2)) ++ ++/* ++** nmla_2_f32_x_untied: ++** fmov z0\.s, #2\.0(?:e\+0)? ++** fnmad z0\.s, p0/m, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_2_f32_x_untied, svfloat32_t, ++ z0 = svnmla_n_f32_x (p0, z1, z2, 2), ++ z0 = svnmla_x (p0, z1, z2, 2)) ++ ++/* ++** ptrue_nmla_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmla_f32_x_tied1, svfloat32_t, ++ z0 = svnmla_f32_x (svptrue_b32 (), z0, z1, z2), ++ z0 = svnmla_x (svptrue_b32 (), z0, z1, z2)) ++ ++/* ++** ptrue_nmla_f32_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmla_f32_x_tied2, svfloat32_t, ++ z0 = svnmla_f32_x (svptrue_b32 (), z1, z0, z2), ++ z0 = svnmla_x (svptrue_b32 (), z1, z0, z2)) ++ ++/* ++** ptrue_nmla_f32_x_tied3: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmla_f32_x_tied3, svfloat32_t, ++ z0 = svnmla_f32_x (svptrue_b32 (), z1, z2, z0), ++ z0 = svnmla_x (svptrue_b32 (), z1, z2, z0)) ++ ++/* ++** ptrue_nmla_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmla_f32_x_untied, svfloat32_t, ++ z0 = svnmla_f32_x (svptrue_b32 (), z1, z2, z3), ++ z0 = svnmla_x (svptrue_b32 (), z1, z2, z3)) ++ ++/* ++** ptrue_nmla_2_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmla_2_f32_x_tied1, svfloat32_t, ++ z0 = svnmla_n_f32_x (svptrue_b32 (), z0, z1, 2), ++ z0 = svnmla_x (svptrue_b32 (), z0, z1, 2)) ++ ++/* ++** ptrue_nmla_2_f32_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmla_2_f32_x_tied2, svfloat32_t, ++ z0 = svnmla_n_f32_x (svptrue_b32 (), z1, z0, 2), ++ z0 = svnmla_x (svptrue_b32 (), z1, z0, 2)) ++ ++/* ++** ptrue_nmla_2_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmla_2_f32_x_untied, svfloat32_t, ++ z0 = svnmla_n_f32_x (svptrue_b32 (), z1, z2, 2), ++ z0 = svnmla_x (svptrue_b32 (), z1, z2, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmla_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmla_f64.c +new file mode 100644 +index 000000000..441821f60 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmla_f64.c +@@ -0,0 +1,398 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** nmla_f64_m_tied1: ++** fnmla z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_f64_m_tied1, svfloat64_t, ++ z0 = svnmla_f64_m (p0, z0, z1, z2), ++ z0 = svnmla_m (p0, z0, z1, z2)) ++ ++/* ++** nmla_f64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fnmla z0\.d, p0/m, \1, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_f64_m_tied2, svfloat64_t, ++ z0 = svnmla_f64_m (p0, z1, z0, z2), ++ z0 = svnmla_m (p0, z1, z0, z2)) ++ ++/* ++** nmla_f64_m_tied3: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fnmla z0\.d, p0/m, z2\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_f64_m_tied3, svfloat64_t, ++ z0 = svnmla_f64_m (p0, z1, z2, z0), ++ z0 = svnmla_m (p0, z1, z2, z0)) ++ ++/* ++** nmla_f64_m_untied: ++** movprfx z0, z1 ++** fnmla z0\.d, p0/m, z2\.d, z3\.d ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_f64_m_untied, svfloat64_t, ++ z0 = svnmla_f64_m (p0, z1, z2, z3), ++ z0 = svnmla_m (p0, z1, z2, z3)) ++ ++/* ++** nmla_d4_f64_m_tied1: ++** mov (z[0-9]+\.d), d4 ++** fnmla z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmla_d4_f64_m_tied1, svfloat64_t, double, ++ z0 = svnmla_n_f64_m (p0, z0, z1, d4), ++ z0 = svnmla_m (p0, z0, z1, d4)) ++ ++/* ++** nmla_d4_f64_m_untied: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0, z1 ++** fnmla z0\.d, p0/m, z2\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmla_d4_f64_m_untied, svfloat64_t, double, ++ z0 = svnmla_n_f64_m (p0, z1, z2, d4), ++ z0 = svnmla_m (p0, z1, z2, d4)) ++ ++/* ++** nmla_2_f64_m_tied1: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** fnmla z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_2_f64_m_tied1, svfloat64_t, ++ z0 = svnmla_n_f64_m (p0, z0, z1, 2), ++ z0 = svnmla_m (p0, z0, z1, 2)) ++ ++/* ++** nmla_2_f64_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** movprfx z0, z1 ++** fnmla z0\.d, p0/m, z2\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_2_f64_m_untied, svfloat64_t, ++ z0 = svnmla_n_f64_m (p0, z1, z2, 2), ++ z0 = svnmla_m (p0, z1, z2, 2)) ++ ++/* ++** nmla_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fnmla z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_f64_z_tied1, svfloat64_t, ++ z0 = svnmla_f64_z (p0, z0, z1, z2), ++ z0 = svnmla_z (p0, z0, z1, z2)) ++ ++/* ++** nmla_f64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** fnmad z0\.d, p0/m, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_f64_z_tied2, svfloat64_t, ++ z0 = svnmla_f64_z (p0, z1, z0, z2), ++ z0 = svnmla_z (p0, z1, z0, z2)) ++ ++/* ++** nmla_f64_z_tied3: ++** movprfx z0\.d, p0/z, z0\.d ++** fnmad z0\.d, p0/m, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_f64_z_tied3, svfloat64_t, ++ z0 = svnmla_f64_z (p0, z1, z2, z0), ++ z0 = svnmla_z (p0, z1, z2, z0)) ++ ++/* ++** nmla_f64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fnmla z0\.d, p0/m, z2\.d, z3\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** fnmad z0\.d, p0/m, z3\.d, z1\.d ++** | ++** movprfx z0\.d, p0/z, z3\.d ++** fnmad z0\.d, p0/m, z2\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_f64_z_untied, svfloat64_t, ++ z0 = svnmla_f64_z (p0, z1, z2, z3), ++ z0 = svnmla_z (p0, z1, z2, z3)) ++ ++/* ++** nmla_d4_f64_z_tied1: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0\.d, p0/z, z0\.d ++** fnmla z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmla_d4_f64_z_tied1, svfloat64_t, double, ++ z0 = svnmla_n_f64_z (p0, z0, z1, d4), ++ z0 = svnmla_z (p0, z0, z1, d4)) ++ ++/* ++** nmla_d4_f64_z_tied2: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0\.d, p0/z, z0\.d ++** fnmad z0\.d, p0/m, \1, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZD (nmla_d4_f64_z_tied2, svfloat64_t, double, ++ z0 = svnmla_n_f64_z (p0, z1, z0, d4), ++ z0 = svnmla_z (p0, z1, z0, d4)) ++ ++/* ++** nmla_d4_f64_z_untied: ++** mov (z[0-9]+\.d), d4 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fnmla z0\.d, p0/m, z2\.d, \1 ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** fnmad z0\.d, p0/m, \1, z1\.d ++** | ++** movprfx z0\.d, p0/z, \1 ++** fnmad z0\.d, p0/m, z2\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (nmla_d4_f64_z_untied, svfloat64_t, double, ++ z0 = svnmla_n_f64_z (p0, z1, z2, d4), ++ z0 = svnmla_z (p0, z1, z2, d4)) ++ ++/* ++** nmla_2_f64_z_tied1: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** movprfx z0\.d, p0/z, z0\.d ++** fnmla z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_2_f64_z_tied1, svfloat64_t, ++ z0 = svnmla_n_f64_z (p0, z0, z1, 2), ++ z0 = svnmla_z (p0, z0, z1, 2)) ++ ++/* ++** nmla_2_f64_z_tied2: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** movprfx z0\.d, p0/z, z0\.d ++** fnmad z0\.d, p0/m, \1, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_2_f64_z_tied2, svfloat64_t, ++ z0 = svnmla_n_f64_z (p0, z1, z0, 2), ++ z0 = svnmla_z (p0, z1, z0, 2)) ++ ++/* ++** nmla_2_f64_z_untied: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fnmla z0\.d, p0/m, z2\.d, \1 ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** fnmad z0\.d, p0/m, \1, z1\.d ++** | ++** movprfx z0\.d, p0/z, \1 ++** fnmad z0\.d, p0/m, z2\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_2_f64_z_untied, svfloat64_t, ++ z0 = svnmla_n_f64_z (p0, z1, z2, 2), ++ z0 = svnmla_z (p0, z1, z2, 2)) ++ ++/* ++** nmla_f64_x_tied1: ++** fnmla z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_f64_x_tied1, svfloat64_t, ++ z0 = svnmla_f64_x (p0, z0, z1, z2), ++ z0 = svnmla_x (p0, z0, z1, z2)) ++ ++/* ++** nmla_f64_x_tied2: ++** fnmad z0\.d, p0/m, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_f64_x_tied2, svfloat64_t, ++ z0 = svnmla_f64_x (p0, z1, z0, z2), ++ z0 = svnmla_x (p0, z1, z0, z2)) ++ ++/* ++** nmla_f64_x_tied3: ++** fnmad z0\.d, p0/m, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_f64_x_tied3, svfloat64_t, ++ z0 = svnmla_f64_x (p0, z1, z2, z0), ++ z0 = svnmla_x (p0, z1, z2, z0)) ++ ++/* ++** nmla_f64_x_untied: ++** ( ++** movprfx z0, z1 ++** fnmla z0\.d, p0/m, z2\.d, z3\.d ++** | ++** movprfx z0, z2 ++** fnmad z0\.d, p0/m, z3\.d, z1\.d ++** | ++** movprfx z0, z3 ++** fnmad z0\.d, p0/m, z2\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_f64_x_untied, svfloat64_t, ++ z0 = svnmla_f64_x (p0, z1, z2, z3), ++ z0 = svnmla_x (p0, z1, z2, z3)) ++ ++/* ++** nmla_d4_f64_x_tied1: ++** mov (z[0-9]+\.d), d4 ++** fnmla z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmla_d4_f64_x_tied1, svfloat64_t, double, ++ z0 = svnmla_n_f64_x (p0, z0, z1, d4), ++ z0 = svnmla_x (p0, z0, z1, d4)) ++ ++/* ++** nmla_d4_f64_x_tied2: ++** mov (z[0-9]+\.d), d4 ++** fnmad z0\.d, p0/m, \1, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZD (nmla_d4_f64_x_tied2, svfloat64_t, double, ++ z0 = svnmla_n_f64_x (p0, z1, z0, d4), ++ z0 = svnmla_x (p0, z1, z0, d4)) ++ ++/* ++** nmla_d4_f64_x_untied: { xfail *-*-* } ++** mov z0\.d, d4 ++** fnmad z0\.d, p0/m, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZD (nmla_d4_f64_x_untied, svfloat64_t, double, ++ z0 = svnmla_n_f64_x (p0, z1, z2, d4), ++ z0 = svnmla_x (p0, z1, z2, d4)) ++ ++/* ++** nmla_2_f64_x_tied1: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** fnmla z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_2_f64_x_tied1, svfloat64_t, ++ z0 = svnmla_n_f64_x (p0, z0, z1, 2), ++ z0 = svnmla_x (p0, z0, z1, 2)) ++ ++/* ++** nmla_2_f64_x_tied2: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** fnmad z0\.d, p0/m, \1, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_2_f64_x_tied2, svfloat64_t, ++ z0 = svnmla_n_f64_x (p0, z1, z0, 2), ++ z0 = svnmla_x (p0, z1, z0, 2)) ++ ++/* ++** nmla_2_f64_x_untied: ++** fmov z0\.d, #2\.0(?:e\+0)? ++** fnmad z0\.d, p0/m, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (nmla_2_f64_x_untied, svfloat64_t, ++ z0 = svnmla_n_f64_x (p0, z1, z2, 2), ++ z0 = svnmla_x (p0, z1, z2, 2)) ++ ++/* ++** ptrue_nmla_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmla_f64_x_tied1, svfloat64_t, ++ z0 = svnmla_f64_x (svptrue_b64 (), z0, z1, z2), ++ z0 = svnmla_x (svptrue_b64 (), z0, z1, z2)) ++ ++/* ++** ptrue_nmla_f64_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmla_f64_x_tied2, svfloat64_t, ++ z0 = svnmla_f64_x (svptrue_b64 (), z1, z0, z2), ++ z0 = svnmla_x (svptrue_b64 (), z1, z0, z2)) ++ ++/* ++** ptrue_nmla_f64_x_tied3: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmla_f64_x_tied3, svfloat64_t, ++ z0 = svnmla_f64_x (svptrue_b64 (), z1, z2, z0), ++ z0 = svnmla_x (svptrue_b64 (), z1, z2, z0)) ++ ++/* ++** ptrue_nmla_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmla_f64_x_untied, svfloat64_t, ++ z0 = svnmla_f64_x (svptrue_b64 (), z1, z2, z3), ++ z0 = svnmla_x (svptrue_b64 (), z1, z2, z3)) ++ ++/* ++** ptrue_nmla_2_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmla_2_f64_x_tied1, svfloat64_t, ++ z0 = svnmla_n_f64_x (svptrue_b64 (), z0, z1, 2), ++ z0 = svnmla_x (svptrue_b64 (), z0, z1, 2)) ++ ++/* ++** ptrue_nmla_2_f64_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmla_2_f64_x_tied2, svfloat64_t, ++ z0 = svnmla_n_f64_x (svptrue_b64 (), z1, z0, 2), ++ z0 = svnmla_x (svptrue_b64 (), z1, z0, 2)) ++ ++/* ++** ptrue_nmla_2_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmla_2_f64_x_untied, svfloat64_t, ++ z0 = svnmla_n_f64_x (svptrue_b64 (), z1, z2, 2), ++ z0 = svnmla_x (svptrue_b64 (), z1, z2, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmls_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmls_f16.c +new file mode 100644 +index 000000000..8aa6c7509 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmls_f16.c +@@ -0,0 +1,398 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** nmls_f16_m_tied1: ++** fnmls z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_f16_m_tied1, svfloat16_t, ++ z0 = svnmls_f16_m (p0, z0, z1, z2), ++ z0 = svnmls_m (p0, z0, z1, z2)) ++ ++/* ++** nmls_f16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fnmls z0\.h, p0/m, \1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_f16_m_tied2, svfloat16_t, ++ z0 = svnmls_f16_m (p0, z1, z0, z2), ++ z0 = svnmls_m (p0, z1, z0, z2)) ++ ++/* ++** nmls_f16_m_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fnmls z0\.h, p0/m, z2\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_f16_m_tied3, svfloat16_t, ++ z0 = svnmls_f16_m (p0, z1, z2, z0), ++ z0 = svnmls_m (p0, z1, z2, z0)) ++ ++/* ++** nmls_f16_m_untied: ++** movprfx z0, z1 ++** fnmls z0\.h, p0/m, z2\.h, z3\.h ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_f16_m_untied, svfloat16_t, ++ z0 = svnmls_f16_m (p0, z1, z2, z3), ++ z0 = svnmls_m (p0, z1, z2, z3)) ++ ++/* ++** nmls_h4_f16_m_tied1: ++** mov (z[0-9]+\.h), h4 ++** fnmls z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmls_h4_f16_m_tied1, svfloat16_t, __fp16, ++ z0 = svnmls_n_f16_m (p0, z0, z1, d4), ++ z0 = svnmls_m (p0, z0, z1, d4)) ++ ++/* ++** nmls_h4_f16_m_untied: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0, z1 ++** fnmls z0\.h, p0/m, z2\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmls_h4_f16_m_untied, svfloat16_t, __fp16, ++ z0 = svnmls_n_f16_m (p0, z1, z2, d4), ++ z0 = svnmls_m (p0, z1, z2, d4)) ++ ++/* ++** nmls_2_f16_m_tied1: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** fnmls z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_2_f16_m_tied1, svfloat16_t, ++ z0 = svnmls_n_f16_m (p0, z0, z1, 2), ++ z0 = svnmls_m (p0, z0, z1, 2)) ++ ++/* ++** nmls_2_f16_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** movprfx z0, z1 ++** fnmls z0\.h, p0/m, z2\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_2_f16_m_untied, svfloat16_t, ++ z0 = svnmls_n_f16_m (p0, z1, z2, 2), ++ z0 = svnmls_m (p0, z1, z2, 2)) ++ ++/* ++** nmls_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fnmls z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_f16_z_tied1, svfloat16_t, ++ z0 = svnmls_f16_z (p0, z0, z1, z2), ++ z0 = svnmls_z (p0, z0, z1, z2)) ++ ++/* ++** nmls_f16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** fnmsb z0\.h, p0/m, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_f16_z_tied2, svfloat16_t, ++ z0 = svnmls_f16_z (p0, z1, z0, z2), ++ z0 = svnmls_z (p0, z1, z0, z2)) ++ ++/* ++** nmls_f16_z_tied3: ++** movprfx z0\.h, p0/z, z0\.h ++** fnmsb z0\.h, p0/m, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_f16_z_tied3, svfloat16_t, ++ z0 = svnmls_f16_z (p0, z1, z2, z0), ++ z0 = svnmls_z (p0, z1, z2, z0)) ++ ++/* ++** nmls_f16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fnmls z0\.h, p0/m, z2\.h, z3\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** fnmsb z0\.h, p0/m, z3\.h, z1\.h ++** | ++** movprfx z0\.h, p0/z, z3\.h ++** fnmsb z0\.h, p0/m, z2\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_f16_z_untied, svfloat16_t, ++ z0 = svnmls_f16_z (p0, z1, z2, z3), ++ z0 = svnmls_z (p0, z1, z2, z3)) ++ ++/* ++** nmls_h4_f16_z_tied1: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0\.h, p0/z, z0\.h ++** fnmls z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmls_h4_f16_z_tied1, svfloat16_t, __fp16, ++ z0 = svnmls_n_f16_z (p0, z0, z1, d4), ++ z0 = svnmls_z (p0, z0, z1, d4)) ++ ++/* ++** nmls_h4_f16_z_tied2: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0\.h, p0/z, z0\.h ++** fnmsb z0\.h, p0/m, \1, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZD (nmls_h4_f16_z_tied2, svfloat16_t, __fp16, ++ z0 = svnmls_n_f16_z (p0, z1, z0, d4), ++ z0 = svnmls_z (p0, z1, z0, d4)) ++ ++/* ++** nmls_h4_f16_z_untied: ++** mov (z[0-9]+\.h), h4 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fnmls z0\.h, p0/m, z2\.h, \1 ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** fnmsb z0\.h, p0/m, \1, z1\.h ++** | ++** movprfx z0\.h, p0/z, \1 ++** fnmsb z0\.h, p0/m, z2\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (nmls_h4_f16_z_untied, svfloat16_t, __fp16, ++ z0 = svnmls_n_f16_z (p0, z1, z2, d4), ++ z0 = svnmls_z (p0, z1, z2, d4)) ++ ++/* ++** nmls_2_f16_z_tied1: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** movprfx z0\.h, p0/z, z0\.h ++** fnmls z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_2_f16_z_tied1, svfloat16_t, ++ z0 = svnmls_n_f16_z (p0, z0, z1, 2), ++ z0 = svnmls_z (p0, z0, z1, 2)) ++ ++/* ++** nmls_2_f16_z_tied2: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** movprfx z0\.h, p0/z, z0\.h ++** fnmsb z0\.h, p0/m, \1, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_2_f16_z_tied2, svfloat16_t, ++ z0 = svnmls_n_f16_z (p0, z1, z0, 2), ++ z0 = svnmls_z (p0, z1, z0, 2)) ++ ++/* ++** nmls_2_f16_z_untied: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fnmls z0\.h, p0/m, z2\.h, \1 ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** fnmsb z0\.h, p0/m, \1, z1\.h ++** | ++** movprfx z0\.h, p0/z, \1 ++** fnmsb z0\.h, p0/m, z2\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_2_f16_z_untied, svfloat16_t, ++ z0 = svnmls_n_f16_z (p0, z1, z2, 2), ++ z0 = svnmls_z (p0, z1, z2, 2)) ++ ++/* ++** nmls_f16_x_tied1: ++** fnmls z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_f16_x_tied1, svfloat16_t, ++ z0 = svnmls_f16_x (p0, z0, z1, z2), ++ z0 = svnmls_x (p0, z0, z1, z2)) ++ ++/* ++** nmls_f16_x_tied2: ++** fnmsb z0\.h, p0/m, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_f16_x_tied2, svfloat16_t, ++ z0 = svnmls_f16_x (p0, z1, z0, z2), ++ z0 = svnmls_x (p0, z1, z0, z2)) ++ ++/* ++** nmls_f16_x_tied3: ++** fnmsb z0\.h, p0/m, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_f16_x_tied3, svfloat16_t, ++ z0 = svnmls_f16_x (p0, z1, z2, z0), ++ z0 = svnmls_x (p0, z1, z2, z0)) ++ ++/* ++** nmls_f16_x_untied: ++** ( ++** movprfx z0, z1 ++** fnmls z0\.h, p0/m, z2\.h, z3\.h ++** | ++** movprfx z0, z2 ++** fnmsb z0\.h, p0/m, z3\.h, z1\.h ++** | ++** movprfx z0, z3 ++** fnmsb z0\.h, p0/m, z2\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_f16_x_untied, svfloat16_t, ++ z0 = svnmls_f16_x (p0, z1, z2, z3), ++ z0 = svnmls_x (p0, z1, z2, z3)) ++ ++/* ++** nmls_h4_f16_x_tied1: ++** mov (z[0-9]+\.h), h4 ++** fnmls z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmls_h4_f16_x_tied1, svfloat16_t, __fp16, ++ z0 = svnmls_n_f16_x (p0, z0, z1, d4), ++ z0 = svnmls_x (p0, z0, z1, d4)) ++ ++/* ++** nmls_h4_f16_x_tied2: ++** mov (z[0-9]+\.h), h4 ++** fnmsb z0\.h, p0/m, \1, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZD (nmls_h4_f16_x_tied2, svfloat16_t, __fp16, ++ z0 = svnmls_n_f16_x (p0, z1, z0, d4), ++ z0 = svnmls_x (p0, z1, z0, d4)) ++ ++/* ++** nmls_h4_f16_x_untied: { xfail *-*-* } ++** mov z0\.h, h4 ++** fnmsb z0\.h, p0/m, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZD (nmls_h4_f16_x_untied, svfloat16_t, __fp16, ++ z0 = svnmls_n_f16_x (p0, z1, z2, d4), ++ z0 = svnmls_x (p0, z1, z2, d4)) ++ ++/* ++** nmls_2_f16_x_tied1: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** fnmls z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_2_f16_x_tied1, svfloat16_t, ++ z0 = svnmls_n_f16_x (p0, z0, z1, 2), ++ z0 = svnmls_x (p0, z0, z1, 2)) ++ ++/* ++** nmls_2_f16_x_tied2: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** fnmsb z0\.h, p0/m, \1, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_2_f16_x_tied2, svfloat16_t, ++ z0 = svnmls_n_f16_x (p0, z1, z0, 2), ++ z0 = svnmls_x (p0, z1, z0, 2)) ++ ++/* ++** nmls_2_f16_x_untied: ++** fmov z0\.h, #2\.0(?:e\+0)? ++** fnmsb z0\.h, p0/m, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_2_f16_x_untied, svfloat16_t, ++ z0 = svnmls_n_f16_x (p0, z1, z2, 2), ++ z0 = svnmls_x (p0, z1, z2, 2)) ++ ++/* ++** ptrue_nmls_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmls_f16_x_tied1, svfloat16_t, ++ z0 = svnmls_f16_x (svptrue_b16 (), z0, z1, z2), ++ z0 = svnmls_x (svptrue_b16 (), z0, z1, z2)) ++ ++/* ++** ptrue_nmls_f16_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmls_f16_x_tied2, svfloat16_t, ++ z0 = svnmls_f16_x (svptrue_b16 (), z1, z0, z2), ++ z0 = svnmls_x (svptrue_b16 (), z1, z0, z2)) ++ ++/* ++** ptrue_nmls_f16_x_tied3: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmls_f16_x_tied3, svfloat16_t, ++ z0 = svnmls_f16_x (svptrue_b16 (), z1, z2, z0), ++ z0 = svnmls_x (svptrue_b16 (), z1, z2, z0)) ++ ++/* ++** ptrue_nmls_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmls_f16_x_untied, svfloat16_t, ++ z0 = svnmls_f16_x (svptrue_b16 (), z1, z2, z3), ++ z0 = svnmls_x (svptrue_b16 (), z1, z2, z3)) ++ ++/* ++** ptrue_nmls_2_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmls_2_f16_x_tied1, svfloat16_t, ++ z0 = svnmls_n_f16_x (svptrue_b16 (), z0, z1, 2), ++ z0 = svnmls_x (svptrue_b16 (), z0, z1, 2)) ++ ++/* ++** ptrue_nmls_2_f16_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmls_2_f16_x_tied2, svfloat16_t, ++ z0 = svnmls_n_f16_x (svptrue_b16 (), z1, z0, 2), ++ z0 = svnmls_x (svptrue_b16 (), z1, z0, 2)) ++ ++/* ++** ptrue_nmls_2_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmls_2_f16_x_untied, svfloat16_t, ++ z0 = svnmls_n_f16_x (svptrue_b16 (), z1, z2, 2), ++ z0 = svnmls_x (svptrue_b16 (), z1, z2, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmls_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmls_f32.c +new file mode 100644 +index 000000000..42ea13fac +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmls_f32.c +@@ -0,0 +1,398 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** nmls_f32_m_tied1: ++** fnmls z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_f32_m_tied1, svfloat32_t, ++ z0 = svnmls_f32_m (p0, z0, z1, z2), ++ z0 = svnmls_m (p0, z0, z1, z2)) ++ ++/* ++** nmls_f32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fnmls z0\.s, p0/m, \1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_f32_m_tied2, svfloat32_t, ++ z0 = svnmls_f32_m (p0, z1, z0, z2), ++ z0 = svnmls_m (p0, z1, z0, z2)) ++ ++/* ++** nmls_f32_m_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fnmls z0\.s, p0/m, z2\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_f32_m_tied3, svfloat32_t, ++ z0 = svnmls_f32_m (p0, z1, z2, z0), ++ z0 = svnmls_m (p0, z1, z2, z0)) ++ ++/* ++** nmls_f32_m_untied: ++** movprfx z0, z1 ++** fnmls z0\.s, p0/m, z2\.s, z3\.s ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_f32_m_untied, svfloat32_t, ++ z0 = svnmls_f32_m (p0, z1, z2, z3), ++ z0 = svnmls_m (p0, z1, z2, z3)) ++ ++/* ++** nmls_s4_f32_m_tied1: ++** mov (z[0-9]+\.s), s4 ++** fnmls z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmls_s4_f32_m_tied1, svfloat32_t, float, ++ z0 = svnmls_n_f32_m (p0, z0, z1, d4), ++ z0 = svnmls_m (p0, z0, z1, d4)) ++ ++/* ++** nmls_s4_f32_m_untied: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0, z1 ++** fnmls z0\.s, p0/m, z2\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmls_s4_f32_m_untied, svfloat32_t, float, ++ z0 = svnmls_n_f32_m (p0, z1, z2, d4), ++ z0 = svnmls_m (p0, z1, z2, d4)) ++ ++/* ++** nmls_2_f32_m_tied1: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** fnmls z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_2_f32_m_tied1, svfloat32_t, ++ z0 = svnmls_n_f32_m (p0, z0, z1, 2), ++ z0 = svnmls_m (p0, z0, z1, 2)) ++ ++/* ++** nmls_2_f32_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** movprfx z0, z1 ++** fnmls z0\.s, p0/m, z2\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_2_f32_m_untied, svfloat32_t, ++ z0 = svnmls_n_f32_m (p0, z1, z2, 2), ++ z0 = svnmls_m (p0, z1, z2, 2)) ++ ++/* ++** nmls_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fnmls z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_f32_z_tied1, svfloat32_t, ++ z0 = svnmls_f32_z (p0, z0, z1, z2), ++ z0 = svnmls_z (p0, z0, z1, z2)) ++ ++/* ++** nmls_f32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** fnmsb z0\.s, p0/m, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_f32_z_tied2, svfloat32_t, ++ z0 = svnmls_f32_z (p0, z1, z0, z2), ++ z0 = svnmls_z (p0, z1, z0, z2)) ++ ++/* ++** nmls_f32_z_tied3: ++** movprfx z0\.s, p0/z, z0\.s ++** fnmsb z0\.s, p0/m, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_f32_z_tied3, svfloat32_t, ++ z0 = svnmls_f32_z (p0, z1, z2, z0), ++ z0 = svnmls_z (p0, z1, z2, z0)) ++ ++/* ++** nmls_f32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fnmls z0\.s, p0/m, z2\.s, z3\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** fnmsb z0\.s, p0/m, z3\.s, z1\.s ++** | ++** movprfx z0\.s, p0/z, z3\.s ++** fnmsb z0\.s, p0/m, z2\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_f32_z_untied, svfloat32_t, ++ z0 = svnmls_f32_z (p0, z1, z2, z3), ++ z0 = svnmls_z (p0, z1, z2, z3)) ++ ++/* ++** nmls_s4_f32_z_tied1: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0\.s, p0/z, z0\.s ++** fnmls z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmls_s4_f32_z_tied1, svfloat32_t, float, ++ z0 = svnmls_n_f32_z (p0, z0, z1, d4), ++ z0 = svnmls_z (p0, z0, z1, d4)) ++ ++/* ++** nmls_s4_f32_z_tied2: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0\.s, p0/z, z0\.s ++** fnmsb z0\.s, p0/m, \1, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZD (nmls_s4_f32_z_tied2, svfloat32_t, float, ++ z0 = svnmls_n_f32_z (p0, z1, z0, d4), ++ z0 = svnmls_z (p0, z1, z0, d4)) ++ ++/* ++** nmls_s4_f32_z_untied: ++** mov (z[0-9]+\.s), s4 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fnmls z0\.s, p0/m, z2\.s, \1 ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** fnmsb z0\.s, p0/m, \1, z1\.s ++** | ++** movprfx z0\.s, p0/z, \1 ++** fnmsb z0\.s, p0/m, z2\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (nmls_s4_f32_z_untied, svfloat32_t, float, ++ z0 = svnmls_n_f32_z (p0, z1, z2, d4), ++ z0 = svnmls_z (p0, z1, z2, d4)) ++ ++/* ++** nmls_2_f32_z_tied1: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** movprfx z0\.s, p0/z, z0\.s ++** fnmls z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_2_f32_z_tied1, svfloat32_t, ++ z0 = svnmls_n_f32_z (p0, z0, z1, 2), ++ z0 = svnmls_z (p0, z0, z1, 2)) ++ ++/* ++** nmls_2_f32_z_tied2: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** movprfx z0\.s, p0/z, z0\.s ++** fnmsb z0\.s, p0/m, \1, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_2_f32_z_tied2, svfloat32_t, ++ z0 = svnmls_n_f32_z (p0, z1, z0, 2), ++ z0 = svnmls_z (p0, z1, z0, 2)) ++ ++/* ++** nmls_2_f32_z_untied: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fnmls z0\.s, p0/m, z2\.s, \1 ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** fnmsb z0\.s, p0/m, \1, z1\.s ++** | ++** movprfx z0\.s, p0/z, \1 ++** fnmsb z0\.s, p0/m, z2\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_2_f32_z_untied, svfloat32_t, ++ z0 = svnmls_n_f32_z (p0, z1, z2, 2), ++ z0 = svnmls_z (p0, z1, z2, 2)) ++ ++/* ++** nmls_f32_x_tied1: ++** fnmls z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_f32_x_tied1, svfloat32_t, ++ z0 = svnmls_f32_x (p0, z0, z1, z2), ++ z0 = svnmls_x (p0, z0, z1, z2)) ++ ++/* ++** nmls_f32_x_tied2: ++** fnmsb z0\.s, p0/m, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_f32_x_tied2, svfloat32_t, ++ z0 = svnmls_f32_x (p0, z1, z0, z2), ++ z0 = svnmls_x (p0, z1, z0, z2)) ++ ++/* ++** nmls_f32_x_tied3: ++** fnmsb z0\.s, p0/m, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_f32_x_tied3, svfloat32_t, ++ z0 = svnmls_f32_x (p0, z1, z2, z0), ++ z0 = svnmls_x (p0, z1, z2, z0)) ++ ++/* ++** nmls_f32_x_untied: ++** ( ++** movprfx z0, z1 ++** fnmls z0\.s, p0/m, z2\.s, z3\.s ++** | ++** movprfx z0, z2 ++** fnmsb z0\.s, p0/m, z3\.s, z1\.s ++** | ++** movprfx z0, z3 ++** fnmsb z0\.s, p0/m, z2\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_f32_x_untied, svfloat32_t, ++ z0 = svnmls_f32_x (p0, z1, z2, z3), ++ z0 = svnmls_x (p0, z1, z2, z3)) ++ ++/* ++** nmls_s4_f32_x_tied1: ++** mov (z[0-9]+\.s), s4 ++** fnmls z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmls_s4_f32_x_tied1, svfloat32_t, float, ++ z0 = svnmls_n_f32_x (p0, z0, z1, d4), ++ z0 = svnmls_x (p0, z0, z1, d4)) ++ ++/* ++** nmls_s4_f32_x_tied2: ++** mov (z[0-9]+\.s), s4 ++** fnmsb z0\.s, p0/m, \1, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZD (nmls_s4_f32_x_tied2, svfloat32_t, float, ++ z0 = svnmls_n_f32_x (p0, z1, z0, d4), ++ z0 = svnmls_x (p0, z1, z0, d4)) ++ ++/* ++** nmls_s4_f32_x_untied: { xfail *-*-* } ++** mov z0\.s, s4 ++** fnmsb z0\.s, p0/m, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZD (nmls_s4_f32_x_untied, svfloat32_t, float, ++ z0 = svnmls_n_f32_x (p0, z1, z2, d4), ++ z0 = svnmls_x (p0, z1, z2, d4)) ++ ++/* ++** nmls_2_f32_x_tied1: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** fnmls z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_2_f32_x_tied1, svfloat32_t, ++ z0 = svnmls_n_f32_x (p0, z0, z1, 2), ++ z0 = svnmls_x (p0, z0, z1, 2)) ++ ++/* ++** nmls_2_f32_x_tied2: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** fnmsb z0\.s, p0/m, \1, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_2_f32_x_tied2, svfloat32_t, ++ z0 = svnmls_n_f32_x (p0, z1, z0, 2), ++ z0 = svnmls_x (p0, z1, z0, 2)) ++ ++/* ++** nmls_2_f32_x_untied: ++** fmov z0\.s, #2\.0(?:e\+0)? ++** fnmsb z0\.s, p0/m, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_2_f32_x_untied, svfloat32_t, ++ z0 = svnmls_n_f32_x (p0, z1, z2, 2), ++ z0 = svnmls_x (p0, z1, z2, 2)) ++ ++/* ++** ptrue_nmls_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmls_f32_x_tied1, svfloat32_t, ++ z0 = svnmls_f32_x (svptrue_b32 (), z0, z1, z2), ++ z0 = svnmls_x (svptrue_b32 (), z0, z1, z2)) ++ ++/* ++** ptrue_nmls_f32_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmls_f32_x_tied2, svfloat32_t, ++ z0 = svnmls_f32_x (svptrue_b32 (), z1, z0, z2), ++ z0 = svnmls_x (svptrue_b32 (), z1, z0, z2)) ++ ++/* ++** ptrue_nmls_f32_x_tied3: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmls_f32_x_tied3, svfloat32_t, ++ z0 = svnmls_f32_x (svptrue_b32 (), z1, z2, z0), ++ z0 = svnmls_x (svptrue_b32 (), z1, z2, z0)) ++ ++/* ++** ptrue_nmls_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmls_f32_x_untied, svfloat32_t, ++ z0 = svnmls_f32_x (svptrue_b32 (), z1, z2, z3), ++ z0 = svnmls_x (svptrue_b32 (), z1, z2, z3)) ++ ++/* ++** ptrue_nmls_2_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmls_2_f32_x_tied1, svfloat32_t, ++ z0 = svnmls_n_f32_x (svptrue_b32 (), z0, z1, 2), ++ z0 = svnmls_x (svptrue_b32 (), z0, z1, 2)) ++ ++/* ++** ptrue_nmls_2_f32_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmls_2_f32_x_tied2, svfloat32_t, ++ z0 = svnmls_n_f32_x (svptrue_b32 (), z1, z0, 2), ++ z0 = svnmls_x (svptrue_b32 (), z1, z0, 2)) ++ ++/* ++** ptrue_nmls_2_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmls_2_f32_x_untied, svfloat32_t, ++ z0 = svnmls_n_f32_x (svptrue_b32 (), z1, z2, 2), ++ z0 = svnmls_x (svptrue_b32 (), z1, z2, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmls_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmls_f64.c +new file mode 100644 +index 000000000..994c2a74e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmls_f64.c +@@ -0,0 +1,398 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** nmls_f64_m_tied1: ++** fnmls z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_f64_m_tied1, svfloat64_t, ++ z0 = svnmls_f64_m (p0, z0, z1, z2), ++ z0 = svnmls_m (p0, z0, z1, z2)) ++ ++/* ++** nmls_f64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fnmls z0\.d, p0/m, \1, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_f64_m_tied2, svfloat64_t, ++ z0 = svnmls_f64_m (p0, z1, z0, z2), ++ z0 = svnmls_m (p0, z1, z0, z2)) ++ ++/* ++** nmls_f64_m_tied3: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fnmls z0\.d, p0/m, z2\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_f64_m_tied3, svfloat64_t, ++ z0 = svnmls_f64_m (p0, z1, z2, z0), ++ z0 = svnmls_m (p0, z1, z2, z0)) ++ ++/* ++** nmls_f64_m_untied: ++** movprfx z0, z1 ++** fnmls z0\.d, p0/m, z2\.d, z3\.d ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_f64_m_untied, svfloat64_t, ++ z0 = svnmls_f64_m (p0, z1, z2, z3), ++ z0 = svnmls_m (p0, z1, z2, z3)) ++ ++/* ++** nmls_d4_f64_m_tied1: ++** mov (z[0-9]+\.d), d4 ++** fnmls z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmls_d4_f64_m_tied1, svfloat64_t, double, ++ z0 = svnmls_n_f64_m (p0, z0, z1, d4), ++ z0 = svnmls_m (p0, z0, z1, d4)) ++ ++/* ++** nmls_d4_f64_m_untied: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0, z1 ++** fnmls z0\.d, p0/m, z2\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmls_d4_f64_m_untied, svfloat64_t, double, ++ z0 = svnmls_n_f64_m (p0, z1, z2, d4), ++ z0 = svnmls_m (p0, z1, z2, d4)) ++ ++/* ++** nmls_2_f64_m_tied1: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** fnmls z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_2_f64_m_tied1, svfloat64_t, ++ z0 = svnmls_n_f64_m (p0, z0, z1, 2), ++ z0 = svnmls_m (p0, z0, z1, 2)) ++ ++/* ++** nmls_2_f64_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** movprfx z0, z1 ++** fnmls z0\.d, p0/m, z2\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_2_f64_m_untied, svfloat64_t, ++ z0 = svnmls_n_f64_m (p0, z1, z2, 2), ++ z0 = svnmls_m (p0, z1, z2, 2)) ++ ++/* ++** nmls_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fnmls z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_f64_z_tied1, svfloat64_t, ++ z0 = svnmls_f64_z (p0, z0, z1, z2), ++ z0 = svnmls_z (p0, z0, z1, z2)) ++ ++/* ++** nmls_f64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** fnmsb z0\.d, p0/m, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_f64_z_tied2, svfloat64_t, ++ z0 = svnmls_f64_z (p0, z1, z0, z2), ++ z0 = svnmls_z (p0, z1, z0, z2)) ++ ++/* ++** nmls_f64_z_tied3: ++** movprfx z0\.d, p0/z, z0\.d ++** fnmsb z0\.d, p0/m, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_f64_z_tied3, svfloat64_t, ++ z0 = svnmls_f64_z (p0, z1, z2, z0), ++ z0 = svnmls_z (p0, z1, z2, z0)) ++ ++/* ++** nmls_f64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fnmls z0\.d, p0/m, z2\.d, z3\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** fnmsb z0\.d, p0/m, z3\.d, z1\.d ++** | ++** movprfx z0\.d, p0/z, z3\.d ++** fnmsb z0\.d, p0/m, z2\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_f64_z_untied, svfloat64_t, ++ z0 = svnmls_f64_z (p0, z1, z2, z3), ++ z0 = svnmls_z (p0, z1, z2, z3)) ++ ++/* ++** nmls_d4_f64_z_tied1: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0\.d, p0/z, z0\.d ++** fnmls z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmls_d4_f64_z_tied1, svfloat64_t, double, ++ z0 = svnmls_n_f64_z (p0, z0, z1, d4), ++ z0 = svnmls_z (p0, z0, z1, d4)) ++ ++/* ++** nmls_d4_f64_z_tied2: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0\.d, p0/z, z0\.d ++** fnmsb z0\.d, p0/m, \1, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZD (nmls_d4_f64_z_tied2, svfloat64_t, double, ++ z0 = svnmls_n_f64_z (p0, z1, z0, d4), ++ z0 = svnmls_z (p0, z1, z0, d4)) ++ ++/* ++** nmls_d4_f64_z_untied: ++** mov (z[0-9]+\.d), d4 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fnmls z0\.d, p0/m, z2\.d, \1 ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** fnmsb z0\.d, p0/m, \1, z1\.d ++** | ++** movprfx z0\.d, p0/z, \1 ++** fnmsb z0\.d, p0/m, z2\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (nmls_d4_f64_z_untied, svfloat64_t, double, ++ z0 = svnmls_n_f64_z (p0, z1, z2, d4), ++ z0 = svnmls_z (p0, z1, z2, d4)) ++ ++/* ++** nmls_2_f64_z_tied1: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** movprfx z0\.d, p0/z, z0\.d ++** fnmls z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_2_f64_z_tied1, svfloat64_t, ++ z0 = svnmls_n_f64_z (p0, z0, z1, 2), ++ z0 = svnmls_z (p0, z0, z1, 2)) ++ ++/* ++** nmls_2_f64_z_tied2: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** movprfx z0\.d, p0/z, z0\.d ++** fnmsb z0\.d, p0/m, \1, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_2_f64_z_tied2, svfloat64_t, ++ z0 = svnmls_n_f64_z (p0, z1, z0, 2), ++ z0 = svnmls_z (p0, z1, z0, 2)) ++ ++/* ++** nmls_2_f64_z_untied: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fnmls z0\.d, p0/m, z2\.d, \1 ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** fnmsb z0\.d, p0/m, \1, z1\.d ++** | ++** movprfx z0\.d, p0/z, \1 ++** fnmsb z0\.d, p0/m, z2\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_2_f64_z_untied, svfloat64_t, ++ z0 = svnmls_n_f64_z (p0, z1, z2, 2), ++ z0 = svnmls_z (p0, z1, z2, 2)) ++ ++/* ++** nmls_f64_x_tied1: ++** fnmls z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_f64_x_tied1, svfloat64_t, ++ z0 = svnmls_f64_x (p0, z0, z1, z2), ++ z0 = svnmls_x (p0, z0, z1, z2)) ++ ++/* ++** nmls_f64_x_tied2: ++** fnmsb z0\.d, p0/m, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_f64_x_tied2, svfloat64_t, ++ z0 = svnmls_f64_x (p0, z1, z0, z2), ++ z0 = svnmls_x (p0, z1, z0, z2)) ++ ++/* ++** nmls_f64_x_tied3: ++** fnmsb z0\.d, p0/m, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_f64_x_tied3, svfloat64_t, ++ z0 = svnmls_f64_x (p0, z1, z2, z0), ++ z0 = svnmls_x (p0, z1, z2, z0)) ++ ++/* ++** nmls_f64_x_untied: ++** ( ++** movprfx z0, z1 ++** fnmls z0\.d, p0/m, z2\.d, z3\.d ++** | ++** movprfx z0, z2 ++** fnmsb z0\.d, p0/m, z3\.d, z1\.d ++** | ++** movprfx z0, z3 ++** fnmsb z0\.d, p0/m, z2\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_f64_x_untied, svfloat64_t, ++ z0 = svnmls_f64_x (p0, z1, z2, z3), ++ z0 = svnmls_x (p0, z1, z2, z3)) ++ ++/* ++** nmls_d4_f64_x_tied1: ++** mov (z[0-9]+\.d), d4 ++** fnmls z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmls_d4_f64_x_tied1, svfloat64_t, double, ++ z0 = svnmls_n_f64_x (p0, z0, z1, d4), ++ z0 = svnmls_x (p0, z0, z1, d4)) ++ ++/* ++** nmls_d4_f64_x_tied2: ++** mov (z[0-9]+\.d), d4 ++** fnmsb z0\.d, p0/m, \1, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZD (nmls_d4_f64_x_tied2, svfloat64_t, double, ++ z0 = svnmls_n_f64_x (p0, z1, z0, d4), ++ z0 = svnmls_x (p0, z1, z0, d4)) ++ ++/* ++** nmls_d4_f64_x_untied: { xfail *-*-* } ++** mov z0\.d, d4 ++** fnmsb z0\.d, p0/m, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZD (nmls_d4_f64_x_untied, svfloat64_t, double, ++ z0 = svnmls_n_f64_x (p0, z1, z2, d4), ++ z0 = svnmls_x (p0, z1, z2, d4)) ++ ++/* ++** nmls_2_f64_x_tied1: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** fnmls z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_2_f64_x_tied1, svfloat64_t, ++ z0 = svnmls_n_f64_x (p0, z0, z1, 2), ++ z0 = svnmls_x (p0, z0, z1, 2)) ++ ++/* ++** nmls_2_f64_x_tied2: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** fnmsb z0\.d, p0/m, \1, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_2_f64_x_tied2, svfloat64_t, ++ z0 = svnmls_n_f64_x (p0, z1, z0, 2), ++ z0 = svnmls_x (p0, z1, z0, 2)) ++ ++/* ++** nmls_2_f64_x_untied: ++** fmov z0\.d, #2\.0(?:e\+0)? ++** fnmsb z0\.d, p0/m, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (nmls_2_f64_x_untied, svfloat64_t, ++ z0 = svnmls_n_f64_x (p0, z1, z2, 2), ++ z0 = svnmls_x (p0, z1, z2, 2)) ++ ++/* ++** ptrue_nmls_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmls_f64_x_tied1, svfloat64_t, ++ z0 = svnmls_f64_x (svptrue_b64 (), z0, z1, z2), ++ z0 = svnmls_x (svptrue_b64 (), z0, z1, z2)) ++ ++/* ++** ptrue_nmls_f64_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmls_f64_x_tied2, svfloat64_t, ++ z0 = svnmls_f64_x (svptrue_b64 (), z1, z0, z2), ++ z0 = svnmls_x (svptrue_b64 (), z1, z0, z2)) ++ ++/* ++** ptrue_nmls_f64_x_tied3: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmls_f64_x_tied3, svfloat64_t, ++ z0 = svnmls_f64_x (svptrue_b64 (), z1, z2, z0), ++ z0 = svnmls_x (svptrue_b64 (), z1, z2, z0)) ++ ++/* ++** ptrue_nmls_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmls_f64_x_untied, svfloat64_t, ++ z0 = svnmls_f64_x (svptrue_b64 (), z1, z2, z3), ++ z0 = svnmls_x (svptrue_b64 (), z1, z2, z3)) ++ ++/* ++** ptrue_nmls_2_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmls_2_f64_x_tied1, svfloat64_t, ++ z0 = svnmls_n_f64_x (svptrue_b64 (), z0, z1, 2), ++ z0 = svnmls_x (svptrue_b64 (), z0, z1, 2)) ++ ++/* ++** ptrue_nmls_2_f64_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmls_2_f64_x_tied2, svfloat64_t, ++ z0 = svnmls_n_f64_x (svptrue_b64 (), z1, z0, 2), ++ z0 = svnmls_x (svptrue_b64 (), z1, z0, 2)) ++ ++/* ++** ptrue_nmls_2_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmls_2_f64_x_untied, svfloat64_t, ++ z0 = svnmls_n_f64_x (svptrue_b64 (), z1, z2, 2), ++ z0 = svnmls_x (svptrue_b64 (), z1, z2, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmsb_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmsb_f16.c +new file mode 100644 +index 000000000..c11401485 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmsb_f16.c +@@ -0,0 +1,398 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** nmsb_f16_m_tied1: ++** fnmsb z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_f16_m_tied1, svfloat16_t, ++ z0 = svnmsb_f16_m (p0, z0, z1, z2), ++ z0 = svnmsb_m (p0, z0, z1, z2)) ++ ++/* ++** nmsb_f16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fnmsb z0\.h, p0/m, \1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_f16_m_tied2, svfloat16_t, ++ z0 = svnmsb_f16_m (p0, z1, z0, z2), ++ z0 = svnmsb_m (p0, z1, z0, z2)) ++ ++/* ++** nmsb_f16_m_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fnmsb z0\.h, p0/m, z2\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_f16_m_tied3, svfloat16_t, ++ z0 = svnmsb_f16_m (p0, z1, z2, z0), ++ z0 = svnmsb_m (p0, z1, z2, z0)) ++ ++/* ++** nmsb_f16_m_untied: ++** movprfx z0, z1 ++** fnmsb z0\.h, p0/m, z2\.h, z3\.h ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_f16_m_untied, svfloat16_t, ++ z0 = svnmsb_f16_m (p0, z1, z2, z3), ++ z0 = svnmsb_m (p0, z1, z2, z3)) ++ ++/* ++** nmsb_h4_f16_m_tied1: ++** mov (z[0-9]+\.h), h4 ++** fnmsb z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmsb_h4_f16_m_tied1, svfloat16_t, __fp16, ++ z0 = svnmsb_n_f16_m (p0, z0, z1, d4), ++ z0 = svnmsb_m (p0, z0, z1, d4)) ++ ++/* ++** nmsb_h4_f16_m_untied: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0, z1 ++** fnmsb z0\.h, p0/m, z2\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmsb_h4_f16_m_untied, svfloat16_t, __fp16, ++ z0 = svnmsb_n_f16_m (p0, z1, z2, d4), ++ z0 = svnmsb_m (p0, z1, z2, d4)) ++ ++/* ++** nmsb_2_f16_m_tied1: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** fnmsb z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_2_f16_m_tied1, svfloat16_t, ++ z0 = svnmsb_n_f16_m (p0, z0, z1, 2), ++ z0 = svnmsb_m (p0, z0, z1, 2)) ++ ++/* ++** nmsb_2_f16_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** movprfx z0, z1 ++** fnmsb z0\.h, p0/m, z2\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_2_f16_m_untied, svfloat16_t, ++ z0 = svnmsb_n_f16_m (p0, z1, z2, 2), ++ z0 = svnmsb_m (p0, z1, z2, 2)) ++ ++/* ++** nmsb_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fnmsb z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_f16_z_tied1, svfloat16_t, ++ z0 = svnmsb_f16_z (p0, z0, z1, z2), ++ z0 = svnmsb_z (p0, z0, z1, z2)) ++ ++/* ++** nmsb_f16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** fnmsb z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_f16_z_tied2, svfloat16_t, ++ z0 = svnmsb_f16_z (p0, z1, z0, z2), ++ z0 = svnmsb_z (p0, z1, z0, z2)) ++ ++/* ++** nmsb_f16_z_tied3: ++** movprfx z0\.h, p0/z, z0\.h ++** fnmls z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_f16_z_tied3, svfloat16_t, ++ z0 = svnmsb_f16_z (p0, z1, z2, z0), ++ z0 = svnmsb_z (p0, z1, z2, z0)) ++ ++/* ++** nmsb_f16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fnmsb z0\.h, p0/m, z2\.h, z3\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** fnmsb z0\.h, p0/m, z1\.h, z3\.h ++** | ++** movprfx z0\.h, p0/z, z3\.h ++** fnmls z0\.h, p0/m, z1\.h, z2\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_f16_z_untied, svfloat16_t, ++ z0 = svnmsb_f16_z (p0, z1, z2, z3), ++ z0 = svnmsb_z (p0, z1, z2, z3)) ++ ++/* ++** nmsb_h4_f16_z_tied1: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0\.h, p0/z, z0\.h ++** fnmsb z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmsb_h4_f16_z_tied1, svfloat16_t, __fp16, ++ z0 = svnmsb_n_f16_z (p0, z0, z1, d4), ++ z0 = svnmsb_z (p0, z0, z1, d4)) ++ ++/* ++** nmsb_h4_f16_z_tied2: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0\.h, p0/z, z0\.h ++** fnmsb z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmsb_h4_f16_z_tied2, svfloat16_t, __fp16, ++ z0 = svnmsb_n_f16_z (p0, z1, z0, d4), ++ z0 = svnmsb_z (p0, z1, z0, d4)) ++ ++/* ++** nmsb_h4_f16_z_untied: ++** mov (z[0-9]+\.h), h4 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fnmsb z0\.h, p0/m, z2\.h, \1 ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** fnmsb z0\.h, p0/m, z1\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** fnmls z0\.h, p0/m, z1\.h, z2\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (nmsb_h4_f16_z_untied, svfloat16_t, __fp16, ++ z0 = svnmsb_n_f16_z (p0, z1, z2, d4), ++ z0 = svnmsb_z (p0, z1, z2, d4)) ++ ++/* ++** nmsb_2_f16_z_tied1: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** movprfx z0\.h, p0/z, z0\.h ++** fnmsb z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_2_f16_z_tied1, svfloat16_t, ++ z0 = svnmsb_n_f16_z (p0, z0, z1, 2), ++ z0 = svnmsb_z (p0, z0, z1, 2)) ++ ++/* ++** nmsb_2_f16_z_tied2: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** movprfx z0\.h, p0/z, z0\.h ++** fnmsb z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_2_f16_z_tied2, svfloat16_t, ++ z0 = svnmsb_n_f16_z (p0, z1, z0, 2), ++ z0 = svnmsb_z (p0, z1, z0, 2)) ++ ++/* ++** nmsb_2_f16_z_untied: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fnmsb z0\.h, p0/m, z2\.h, \1 ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** fnmsb z0\.h, p0/m, z1\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** fnmls z0\.h, p0/m, z1\.h, z2\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_2_f16_z_untied, svfloat16_t, ++ z0 = svnmsb_n_f16_z (p0, z1, z2, 2), ++ z0 = svnmsb_z (p0, z1, z2, 2)) ++ ++/* ++** nmsb_f16_x_tied1: ++** fnmsb z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_f16_x_tied1, svfloat16_t, ++ z0 = svnmsb_f16_x (p0, z0, z1, z2), ++ z0 = svnmsb_x (p0, z0, z1, z2)) ++ ++/* ++** nmsb_f16_x_tied2: ++** fnmsb z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_f16_x_tied2, svfloat16_t, ++ z0 = svnmsb_f16_x (p0, z1, z0, z2), ++ z0 = svnmsb_x (p0, z1, z0, z2)) ++ ++/* ++** nmsb_f16_x_tied3: ++** fnmls z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_f16_x_tied3, svfloat16_t, ++ z0 = svnmsb_f16_x (p0, z1, z2, z0), ++ z0 = svnmsb_x (p0, z1, z2, z0)) ++ ++/* ++** nmsb_f16_x_untied: ++** ( ++** movprfx z0, z1 ++** fnmsb z0\.h, p0/m, z2\.h, z3\.h ++** | ++** movprfx z0, z2 ++** fnmsb z0\.h, p0/m, z1\.h, z3\.h ++** | ++** movprfx z0, z3 ++** fnmls z0\.h, p0/m, z1\.h, z2\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_f16_x_untied, svfloat16_t, ++ z0 = svnmsb_f16_x (p0, z1, z2, z3), ++ z0 = svnmsb_x (p0, z1, z2, z3)) ++ ++/* ++** nmsb_h4_f16_x_tied1: ++** mov (z[0-9]+\.h), h4 ++** fnmsb z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmsb_h4_f16_x_tied1, svfloat16_t, __fp16, ++ z0 = svnmsb_n_f16_x (p0, z0, z1, d4), ++ z0 = svnmsb_x (p0, z0, z1, d4)) ++ ++/* ++** nmsb_h4_f16_x_tied2: ++** mov (z[0-9]+\.h), h4 ++** fnmsb z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmsb_h4_f16_x_tied2, svfloat16_t, __fp16, ++ z0 = svnmsb_n_f16_x (p0, z1, z0, d4), ++ z0 = svnmsb_x (p0, z1, z0, d4)) ++ ++/* ++** nmsb_h4_f16_x_untied: { xfail *-*-* } ++** mov z0\.h, h4 ++** fnmls z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_ZD (nmsb_h4_f16_x_untied, svfloat16_t, __fp16, ++ z0 = svnmsb_n_f16_x (p0, z1, z2, d4), ++ z0 = svnmsb_x (p0, z1, z2, d4)) ++ ++/* ++** nmsb_2_f16_x_tied1: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** fnmsb z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_2_f16_x_tied1, svfloat16_t, ++ z0 = svnmsb_n_f16_x (p0, z0, z1, 2), ++ z0 = svnmsb_x (p0, z0, z1, 2)) ++ ++/* ++** nmsb_2_f16_x_tied2: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** fnmsb z0\.h, p0/m, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_2_f16_x_tied2, svfloat16_t, ++ z0 = svnmsb_n_f16_x (p0, z1, z0, 2), ++ z0 = svnmsb_x (p0, z1, z0, 2)) ++ ++/* ++** nmsb_2_f16_x_untied: ++** fmov z0\.h, #2\.0(?:e\+0)? ++** fnmls z0\.h, p0/m, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_2_f16_x_untied, svfloat16_t, ++ z0 = svnmsb_n_f16_x (p0, z1, z2, 2), ++ z0 = svnmsb_x (p0, z1, z2, 2)) ++ ++/* ++** ptrue_nmsb_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmsb_f16_x_tied1, svfloat16_t, ++ z0 = svnmsb_f16_x (svptrue_b16 (), z0, z1, z2), ++ z0 = svnmsb_x (svptrue_b16 (), z0, z1, z2)) ++ ++/* ++** ptrue_nmsb_f16_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmsb_f16_x_tied2, svfloat16_t, ++ z0 = svnmsb_f16_x (svptrue_b16 (), z1, z0, z2), ++ z0 = svnmsb_x (svptrue_b16 (), z1, z0, z2)) ++ ++/* ++** ptrue_nmsb_f16_x_tied3: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmsb_f16_x_tied3, svfloat16_t, ++ z0 = svnmsb_f16_x (svptrue_b16 (), z1, z2, z0), ++ z0 = svnmsb_x (svptrue_b16 (), z1, z2, z0)) ++ ++/* ++** ptrue_nmsb_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmsb_f16_x_untied, svfloat16_t, ++ z0 = svnmsb_f16_x (svptrue_b16 (), z1, z2, z3), ++ z0 = svnmsb_x (svptrue_b16 (), z1, z2, z3)) ++ ++/* ++** ptrue_nmsb_2_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmsb_2_f16_x_tied1, svfloat16_t, ++ z0 = svnmsb_n_f16_x (svptrue_b16 (), z0, z1, 2), ++ z0 = svnmsb_x (svptrue_b16 (), z0, z1, 2)) ++ ++/* ++** ptrue_nmsb_2_f16_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmsb_2_f16_x_tied2, svfloat16_t, ++ z0 = svnmsb_n_f16_x (svptrue_b16 (), z1, z0, 2), ++ z0 = svnmsb_x (svptrue_b16 (), z1, z0, 2)) ++ ++/* ++** ptrue_nmsb_2_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmsb_2_f16_x_untied, svfloat16_t, ++ z0 = svnmsb_n_f16_x (svptrue_b16 (), z1, z2, 2), ++ z0 = svnmsb_x (svptrue_b16 (), z1, z2, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmsb_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmsb_f32.c +new file mode 100644 +index 000000000..c2204e040 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmsb_f32.c +@@ -0,0 +1,398 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** nmsb_f32_m_tied1: ++** fnmsb z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_f32_m_tied1, svfloat32_t, ++ z0 = svnmsb_f32_m (p0, z0, z1, z2), ++ z0 = svnmsb_m (p0, z0, z1, z2)) ++ ++/* ++** nmsb_f32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fnmsb z0\.s, p0/m, \1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_f32_m_tied2, svfloat32_t, ++ z0 = svnmsb_f32_m (p0, z1, z0, z2), ++ z0 = svnmsb_m (p0, z1, z0, z2)) ++ ++/* ++** nmsb_f32_m_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fnmsb z0\.s, p0/m, z2\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_f32_m_tied3, svfloat32_t, ++ z0 = svnmsb_f32_m (p0, z1, z2, z0), ++ z0 = svnmsb_m (p0, z1, z2, z0)) ++ ++/* ++** nmsb_f32_m_untied: ++** movprfx z0, z1 ++** fnmsb z0\.s, p0/m, z2\.s, z3\.s ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_f32_m_untied, svfloat32_t, ++ z0 = svnmsb_f32_m (p0, z1, z2, z3), ++ z0 = svnmsb_m (p0, z1, z2, z3)) ++ ++/* ++** nmsb_s4_f32_m_tied1: ++** mov (z[0-9]+\.s), s4 ++** fnmsb z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmsb_s4_f32_m_tied1, svfloat32_t, float, ++ z0 = svnmsb_n_f32_m (p0, z0, z1, d4), ++ z0 = svnmsb_m (p0, z0, z1, d4)) ++ ++/* ++** nmsb_s4_f32_m_untied: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0, z1 ++** fnmsb z0\.s, p0/m, z2\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmsb_s4_f32_m_untied, svfloat32_t, float, ++ z0 = svnmsb_n_f32_m (p0, z1, z2, d4), ++ z0 = svnmsb_m (p0, z1, z2, d4)) ++ ++/* ++** nmsb_2_f32_m_tied1: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** fnmsb z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_2_f32_m_tied1, svfloat32_t, ++ z0 = svnmsb_n_f32_m (p0, z0, z1, 2), ++ z0 = svnmsb_m (p0, z0, z1, 2)) ++ ++/* ++** nmsb_2_f32_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** movprfx z0, z1 ++** fnmsb z0\.s, p0/m, z2\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_2_f32_m_untied, svfloat32_t, ++ z0 = svnmsb_n_f32_m (p0, z1, z2, 2), ++ z0 = svnmsb_m (p0, z1, z2, 2)) ++ ++/* ++** nmsb_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fnmsb z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_f32_z_tied1, svfloat32_t, ++ z0 = svnmsb_f32_z (p0, z0, z1, z2), ++ z0 = svnmsb_z (p0, z0, z1, z2)) ++ ++/* ++** nmsb_f32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** fnmsb z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_f32_z_tied2, svfloat32_t, ++ z0 = svnmsb_f32_z (p0, z1, z0, z2), ++ z0 = svnmsb_z (p0, z1, z0, z2)) ++ ++/* ++** nmsb_f32_z_tied3: ++** movprfx z0\.s, p0/z, z0\.s ++** fnmls z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_f32_z_tied3, svfloat32_t, ++ z0 = svnmsb_f32_z (p0, z1, z2, z0), ++ z0 = svnmsb_z (p0, z1, z2, z0)) ++ ++/* ++** nmsb_f32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fnmsb z0\.s, p0/m, z2\.s, z3\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** fnmsb z0\.s, p0/m, z1\.s, z3\.s ++** | ++** movprfx z0\.s, p0/z, z3\.s ++** fnmls z0\.s, p0/m, z1\.s, z2\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_f32_z_untied, svfloat32_t, ++ z0 = svnmsb_f32_z (p0, z1, z2, z3), ++ z0 = svnmsb_z (p0, z1, z2, z3)) ++ ++/* ++** nmsb_s4_f32_z_tied1: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0\.s, p0/z, z0\.s ++** fnmsb z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmsb_s4_f32_z_tied1, svfloat32_t, float, ++ z0 = svnmsb_n_f32_z (p0, z0, z1, d4), ++ z0 = svnmsb_z (p0, z0, z1, d4)) ++ ++/* ++** nmsb_s4_f32_z_tied2: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0\.s, p0/z, z0\.s ++** fnmsb z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmsb_s4_f32_z_tied2, svfloat32_t, float, ++ z0 = svnmsb_n_f32_z (p0, z1, z0, d4), ++ z0 = svnmsb_z (p0, z1, z0, d4)) ++ ++/* ++** nmsb_s4_f32_z_untied: ++** mov (z[0-9]+\.s), s4 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fnmsb z0\.s, p0/m, z2\.s, \1 ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** fnmsb z0\.s, p0/m, z1\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** fnmls z0\.s, p0/m, z1\.s, z2\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (nmsb_s4_f32_z_untied, svfloat32_t, float, ++ z0 = svnmsb_n_f32_z (p0, z1, z2, d4), ++ z0 = svnmsb_z (p0, z1, z2, d4)) ++ ++/* ++** nmsb_2_f32_z_tied1: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** movprfx z0\.s, p0/z, z0\.s ++** fnmsb z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_2_f32_z_tied1, svfloat32_t, ++ z0 = svnmsb_n_f32_z (p0, z0, z1, 2), ++ z0 = svnmsb_z (p0, z0, z1, 2)) ++ ++/* ++** nmsb_2_f32_z_tied2: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** movprfx z0\.s, p0/z, z0\.s ++** fnmsb z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_2_f32_z_tied2, svfloat32_t, ++ z0 = svnmsb_n_f32_z (p0, z1, z0, 2), ++ z0 = svnmsb_z (p0, z1, z0, 2)) ++ ++/* ++** nmsb_2_f32_z_untied: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fnmsb z0\.s, p0/m, z2\.s, \1 ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** fnmsb z0\.s, p0/m, z1\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** fnmls z0\.s, p0/m, z1\.s, z2\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_2_f32_z_untied, svfloat32_t, ++ z0 = svnmsb_n_f32_z (p0, z1, z2, 2), ++ z0 = svnmsb_z (p0, z1, z2, 2)) ++ ++/* ++** nmsb_f32_x_tied1: ++** fnmsb z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_f32_x_tied1, svfloat32_t, ++ z0 = svnmsb_f32_x (p0, z0, z1, z2), ++ z0 = svnmsb_x (p0, z0, z1, z2)) ++ ++/* ++** nmsb_f32_x_tied2: ++** fnmsb z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_f32_x_tied2, svfloat32_t, ++ z0 = svnmsb_f32_x (p0, z1, z0, z2), ++ z0 = svnmsb_x (p0, z1, z0, z2)) ++ ++/* ++** nmsb_f32_x_tied3: ++** fnmls z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_f32_x_tied3, svfloat32_t, ++ z0 = svnmsb_f32_x (p0, z1, z2, z0), ++ z0 = svnmsb_x (p0, z1, z2, z0)) ++ ++/* ++** nmsb_f32_x_untied: ++** ( ++** movprfx z0, z1 ++** fnmsb z0\.s, p0/m, z2\.s, z3\.s ++** | ++** movprfx z0, z2 ++** fnmsb z0\.s, p0/m, z1\.s, z3\.s ++** | ++** movprfx z0, z3 ++** fnmls z0\.s, p0/m, z1\.s, z2\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_f32_x_untied, svfloat32_t, ++ z0 = svnmsb_f32_x (p0, z1, z2, z3), ++ z0 = svnmsb_x (p0, z1, z2, z3)) ++ ++/* ++** nmsb_s4_f32_x_tied1: ++** mov (z[0-9]+\.s), s4 ++** fnmsb z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmsb_s4_f32_x_tied1, svfloat32_t, float, ++ z0 = svnmsb_n_f32_x (p0, z0, z1, d4), ++ z0 = svnmsb_x (p0, z0, z1, d4)) ++ ++/* ++** nmsb_s4_f32_x_tied2: ++** mov (z[0-9]+\.s), s4 ++** fnmsb z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmsb_s4_f32_x_tied2, svfloat32_t, float, ++ z0 = svnmsb_n_f32_x (p0, z1, z0, d4), ++ z0 = svnmsb_x (p0, z1, z0, d4)) ++ ++/* ++** nmsb_s4_f32_x_untied: { xfail *-*-* } ++** mov z0\.s, s4 ++** fnmls z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_ZD (nmsb_s4_f32_x_untied, svfloat32_t, float, ++ z0 = svnmsb_n_f32_x (p0, z1, z2, d4), ++ z0 = svnmsb_x (p0, z1, z2, d4)) ++ ++/* ++** nmsb_2_f32_x_tied1: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** fnmsb z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_2_f32_x_tied1, svfloat32_t, ++ z0 = svnmsb_n_f32_x (p0, z0, z1, 2), ++ z0 = svnmsb_x (p0, z0, z1, 2)) ++ ++/* ++** nmsb_2_f32_x_tied2: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** fnmsb z0\.s, p0/m, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_2_f32_x_tied2, svfloat32_t, ++ z0 = svnmsb_n_f32_x (p0, z1, z0, 2), ++ z0 = svnmsb_x (p0, z1, z0, 2)) ++ ++/* ++** nmsb_2_f32_x_untied: ++** fmov z0\.s, #2\.0(?:e\+0)? ++** fnmls z0\.s, p0/m, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_2_f32_x_untied, svfloat32_t, ++ z0 = svnmsb_n_f32_x (p0, z1, z2, 2), ++ z0 = svnmsb_x (p0, z1, z2, 2)) ++ ++/* ++** ptrue_nmsb_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmsb_f32_x_tied1, svfloat32_t, ++ z0 = svnmsb_f32_x (svptrue_b32 (), z0, z1, z2), ++ z0 = svnmsb_x (svptrue_b32 (), z0, z1, z2)) ++ ++/* ++** ptrue_nmsb_f32_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmsb_f32_x_tied2, svfloat32_t, ++ z0 = svnmsb_f32_x (svptrue_b32 (), z1, z0, z2), ++ z0 = svnmsb_x (svptrue_b32 (), z1, z0, z2)) ++ ++/* ++** ptrue_nmsb_f32_x_tied3: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmsb_f32_x_tied3, svfloat32_t, ++ z0 = svnmsb_f32_x (svptrue_b32 (), z1, z2, z0), ++ z0 = svnmsb_x (svptrue_b32 (), z1, z2, z0)) ++ ++/* ++** ptrue_nmsb_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmsb_f32_x_untied, svfloat32_t, ++ z0 = svnmsb_f32_x (svptrue_b32 (), z1, z2, z3), ++ z0 = svnmsb_x (svptrue_b32 (), z1, z2, z3)) ++ ++/* ++** ptrue_nmsb_2_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmsb_2_f32_x_tied1, svfloat32_t, ++ z0 = svnmsb_n_f32_x (svptrue_b32 (), z0, z1, 2), ++ z0 = svnmsb_x (svptrue_b32 (), z0, z1, 2)) ++ ++/* ++** ptrue_nmsb_2_f32_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmsb_2_f32_x_tied2, svfloat32_t, ++ z0 = svnmsb_n_f32_x (svptrue_b32 (), z1, z0, 2), ++ z0 = svnmsb_x (svptrue_b32 (), z1, z0, 2)) ++ ++/* ++** ptrue_nmsb_2_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmsb_2_f32_x_untied, svfloat32_t, ++ z0 = svnmsb_n_f32_x (svptrue_b32 (), z1, z2, 2), ++ z0 = svnmsb_x (svptrue_b32 (), z1, z2, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmsb_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmsb_f64.c +new file mode 100644 +index 000000000..56592d3ae +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmsb_f64.c +@@ -0,0 +1,398 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** nmsb_f64_m_tied1: ++** fnmsb z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_f64_m_tied1, svfloat64_t, ++ z0 = svnmsb_f64_m (p0, z0, z1, z2), ++ z0 = svnmsb_m (p0, z0, z1, z2)) ++ ++/* ++** nmsb_f64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fnmsb z0\.d, p0/m, \1, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_f64_m_tied2, svfloat64_t, ++ z0 = svnmsb_f64_m (p0, z1, z0, z2), ++ z0 = svnmsb_m (p0, z1, z0, z2)) ++ ++/* ++** nmsb_f64_m_tied3: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fnmsb z0\.d, p0/m, z2\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_f64_m_tied3, svfloat64_t, ++ z0 = svnmsb_f64_m (p0, z1, z2, z0), ++ z0 = svnmsb_m (p0, z1, z2, z0)) ++ ++/* ++** nmsb_f64_m_untied: ++** movprfx z0, z1 ++** fnmsb z0\.d, p0/m, z2\.d, z3\.d ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_f64_m_untied, svfloat64_t, ++ z0 = svnmsb_f64_m (p0, z1, z2, z3), ++ z0 = svnmsb_m (p0, z1, z2, z3)) ++ ++/* ++** nmsb_d4_f64_m_tied1: ++** mov (z[0-9]+\.d), d4 ++** fnmsb z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmsb_d4_f64_m_tied1, svfloat64_t, double, ++ z0 = svnmsb_n_f64_m (p0, z0, z1, d4), ++ z0 = svnmsb_m (p0, z0, z1, d4)) ++ ++/* ++** nmsb_d4_f64_m_untied: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0, z1 ++** fnmsb z0\.d, p0/m, z2\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmsb_d4_f64_m_untied, svfloat64_t, double, ++ z0 = svnmsb_n_f64_m (p0, z1, z2, d4), ++ z0 = svnmsb_m (p0, z1, z2, d4)) ++ ++/* ++** nmsb_2_f64_m_tied1: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** fnmsb z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_2_f64_m_tied1, svfloat64_t, ++ z0 = svnmsb_n_f64_m (p0, z0, z1, 2), ++ z0 = svnmsb_m (p0, z0, z1, 2)) ++ ++/* ++** nmsb_2_f64_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** movprfx z0, z1 ++** fnmsb z0\.d, p0/m, z2\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_2_f64_m_untied, svfloat64_t, ++ z0 = svnmsb_n_f64_m (p0, z1, z2, 2), ++ z0 = svnmsb_m (p0, z1, z2, 2)) ++ ++/* ++** nmsb_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fnmsb z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_f64_z_tied1, svfloat64_t, ++ z0 = svnmsb_f64_z (p0, z0, z1, z2), ++ z0 = svnmsb_z (p0, z0, z1, z2)) ++ ++/* ++** nmsb_f64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** fnmsb z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_f64_z_tied2, svfloat64_t, ++ z0 = svnmsb_f64_z (p0, z1, z0, z2), ++ z0 = svnmsb_z (p0, z1, z0, z2)) ++ ++/* ++** nmsb_f64_z_tied3: ++** movprfx z0\.d, p0/z, z0\.d ++** fnmls z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_f64_z_tied3, svfloat64_t, ++ z0 = svnmsb_f64_z (p0, z1, z2, z0), ++ z0 = svnmsb_z (p0, z1, z2, z0)) ++ ++/* ++** nmsb_f64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fnmsb z0\.d, p0/m, z2\.d, z3\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** fnmsb z0\.d, p0/m, z1\.d, z3\.d ++** | ++** movprfx z0\.d, p0/z, z3\.d ++** fnmls z0\.d, p0/m, z1\.d, z2\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_f64_z_untied, svfloat64_t, ++ z0 = svnmsb_f64_z (p0, z1, z2, z3), ++ z0 = svnmsb_z (p0, z1, z2, z3)) ++ ++/* ++** nmsb_d4_f64_z_tied1: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0\.d, p0/z, z0\.d ++** fnmsb z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmsb_d4_f64_z_tied1, svfloat64_t, double, ++ z0 = svnmsb_n_f64_z (p0, z0, z1, d4), ++ z0 = svnmsb_z (p0, z0, z1, d4)) ++ ++/* ++** nmsb_d4_f64_z_tied2: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0\.d, p0/z, z0\.d ++** fnmsb z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmsb_d4_f64_z_tied2, svfloat64_t, double, ++ z0 = svnmsb_n_f64_z (p0, z1, z0, d4), ++ z0 = svnmsb_z (p0, z1, z0, d4)) ++ ++/* ++** nmsb_d4_f64_z_untied: ++** mov (z[0-9]+\.d), d4 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fnmsb z0\.d, p0/m, z2\.d, \1 ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** fnmsb z0\.d, p0/m, z1\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** fnmls z0\.d, p0/m, z1\.d, z2\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (nmsb_d4_f64_z_untied, svfloat64_t, double, ++ z0 = svnmsb_n_f64_z (p0, z1, z2, d4), ++ z0 = svnmsb_z (p0, z1, z2, d4)) ++ ++/* ++** nmsb_2_f64_z_tied1: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** movprfx z0\.d, p0/z, z0\.d ++** fnmsb z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_2_f64_z_tied1, svfloat64_t, ++ z0 = svnmsb_n_f64_z (p0, z0, z1, 2), ++ z0 = svnmsb_z (p0, z0, z1, 2)) ++ ++/* ++** nmsb_2_f64_z_tied2: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** movprfx z0\.d, p0/z, z0\.d ++** fnmsb z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_2_f64_z_tied2, svfloat64_t, ++ z0 = svnmsb_n_f64_z (p0, z1, z0, 2), ++ z0 = svnmsb_z (p0, z1, z0, 2)) ++ ++/* ++** nmsb_2_f64_z_untied: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fnmsb z0\.d, p0/m, z2\.d, \1 ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** fnmsb z0\.d, p0/m, z1\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** fnmls z0\.d, p0/m, z1\.d, z2\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_2_f64_z_untied, svfloat64_t, ++ z0 = svnmsb_n_f64_z (p0, z1, z2, 2), ++ z0 = svnmsb_z (p0, z1, z2, 2)) ++ ++/* ++** nmsb_f64_x_tied1: ++** fnmsb z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_f64_x_tied1, svfloat64_t, ++ z0 = svnmsb_f64_x (p0, z0, z1, z2), ++ z0 = svnmsb_x (p0, z0, z1, z2)) ++ ++/* ++** nmsb_f64_x_tied2: ++** fnmsb z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_f64_x_tied2, svfloat64_t, ++ z0 = svnmsb_f64_x (p0, z1, z0, z2), ++ z0 = svnmsb_x (p0, z1, z0, z2)) ++ ++/* ++** nmsb_f64_x_tied3: ++** fnmls z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_f64_x_tied3, svfloat64_t, ++ z0 = svnmsb_f64_x (p0, z1, z2, z0), ++ z0 = svnmsb_x (p0, z1, z2, z0)) ++ ++/* ++** nmsb_f64_x_untied: ++** ( ++** movprfx z0, z1 ++** fnmsb z0\.d, p0/m, z2\.d, z3\.d ++** | ++** movprfx z0, z2 ++** fnmsb z0\.d, p0/m, z1\.d, z3\.d ++** | ++** movprfx z0, z3 ++** fnmls z0\.d, p0/m, z1\.d, z2\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_f64_x_untied, svfloat64_t, ++ z0 = svnmsb_f64_x (p0, z1, z2, z3), ++ z0 = svnmsb_x (p0, z1, z2, z3)) ++ ++/* ++** nmsb_d4_f64_x_tied1: ++** mov (z[0-9]+\.d), d4 ++** fnmsb z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmsb_d4_f64_x_tied1, svfloat64_t, double, ++ z0 = svnmsb_n_f64_x (p0, z0, z1, d4), ++ z0 = svnmsb_x (p0, z0, z1, d4)) ++ ++/* ++** nmsb_d4_f64_x_tied2: ++** mov (z[0-9]+\.d), d4 ++** fnmsb z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (nmsb_d4_f64_x_tied2, svfloat64_t, double, ++ z0 = svnmsb_n_f64_x (p0, z1, z0, d4), ++ z0 = svnmsb_x (p0, z1, z0, d4)) ++ ++/* ++** nmsb_d4_f64_x_untied: { xfail *-*-* } ++** mov z0\.d, d4 ++** fnmls z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_ZD (nmsb_d4_f64_x_untied, svfloat64_t, double, ++ z0 = svnmsb_n_f64_x (p0, z1, z2, d4), ++ z0 = svnmsb_x (p0, z1, z2, d4)) ++ ++/* ++** nmsb_2_f64_x_tied1: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** fnmsb z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_2_f64_x_tied1, svfloat64_t, ++ z0 = svnmsb_n_f64_x (p0, z0, z1, 2), ++ z0 = svnmsb_x (p0, z0, z1, 2)) ++ ++/* ++** nmsb_2_f64_x_tied2: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** fnmsb z0\.d, p0/m, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_2_f64_x_tied2, svfloat64_t, ++ z0 = svnmsb_n_f64_x (p0, z1, z0, 2), ++ z0 = svnmsb_x (p0, z1, z0, 2)) ++ ++/* ++** nmsb_2_f64_x_untied: ++** fmov z0\.d, #2\.0(?:e\+0)? ++** fnmls z0\.d, p0/m, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (nmsb_2_f64_x_untied, svfloat64_t, ++ z0 = svnmsb_n_f64_x (p0, z1, z2, 2), ++ z0 = svnmsb_x (p0, z1, z2, 2)) ++ ++/* ++** ptrue_nmsb_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmsb_f64_x_tied1, svfloat64_t, ++ z0 = svnmsb_f64_x (svptrue_b64 (), z0, z1, z2), ++ z0 = svnmsb_x (svptrue_b64 (), z0, z1, z2)) ++ ++/* ++** ptrue_nmsb_f64_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmsb_f64_x_tied2, svfloat64_t, ++ z0 = svnmsb_f64_x (svptrue_b64 (), z1, z0, z2), ++ z0 = svnmsb_x (svptrue_b64 (), z1, z0, z2)) ++ ++/* ++** ptrue_nmsb_f64_x_tied3: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmsb_f64_x_tied3, svfloat64_t, ++ z0 = svnmsb_f64_x (svptrue_b64 (), z1, z2, z0), ++ z0 = svnmsb_x (svptrue_b64 (), z1, z2, z0)) ++ ++/* ++** ptrue_nmsb_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmsb_f64_x_untied, svfloat64_t, ++ z0 = svnmsb_f64_x (svptrue_b64 (), z1, z2, z3), ++ z0 = svnmsb_x (svptrue_b64 (), z1, z2, z3)) ++ ++/* ++** ptrue_nmsb_2_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmsb_2_f64_x_tied1, svfloat64_t, ++ z0 = svnmsb_n_f64_x (svptrue_b64 (), z0, z1, 2), ++ z0 = svnmsb_x (svptrue_b64 (), z0, z1, 2)) ++ ++/* ++** ptrue_nmsb_2_f64_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmsb_2_f64_x_tied2, svfloat64_t, ++ z0 = svnmsb_n_f64_x (svptrue_b64 (), z1, z0, 2), ++ z0 = svnmsb_x (svptrue_b64 (), z1, z0, 2)) ++ ++/* ++** ptrue_nmsb_2_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_nmsb_2_f64_x_untied, svfloat64_t, ++ z0 = svnmsb_n_f64_x (svptrue_b64 (), z1, z2, 2), ++ z0 = svnmsb_x (svptrue_b64 (), z1, z2, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nor_b.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nor_b.c +new file mode 100644 +index 000000000..997e34537 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nor_b.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** nor_b_z_tied1: ++** nor p0\.b, p3/z, p0\.b, p1\.b ++** ret ++*/ ++TEST_UNIFORM_P (nor_b_z_tied1, ++ p0 = svnor_b_z (p3, p0, p1), ++ p0 = svnor_z (p3, p0, p1)) ++ ++/* ++** nor_b_z_tied2: ++** nor p0\.b, p3/z, p1\.b, p0\.b ++** ret ++*/ ++TEST_UNIFORM_P (nor_b_z_tied2, ++ p0 = svnor_b_z (p3, p1, p0), ++ p0 = svnor_z (p3, p1, p0)) ++ ++/* ++** nor_b_z_untied: ++** nor p0\.b, p3/z, p1\.b, p2\.b ++** ret ++*/ ++TEST_UNIFORM_P (nor_b_z_untied, ++ p0 = svnor_b_z (p3, p1, p2), ++ p0 = svnor_z (p3, p1, p2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_b.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_b.c +new file mode 100644 +index 000000000..23a3a6aae +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_b.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** not_b_z_tied1: ++** not p0\.b, p3/z, p0\.b ++** ret ++*/ ++TEST_UNIFORM_P (not_b_z_tied1, ++ p0 = svnot_b_z (p3, p0), ++ p0 = svnot_z (p3, p0)) ++ ++/* ++** not_b_z_untied: ++** not p0\.b, p3/z, p1\.b ++** ret ++*/ ++TEST_UNIFORM_P (not_b_z_untied, ++ p0 = svnot_b_z (p3, p1), ++ p0 = svnot_z (p3, p1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_s16.c +new file mode 100644 +index 000000000..bacd6b12c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_s16.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** not_s16_m_tied12: ++** not z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (not_s16_m_tied12, svint16_t, ++ z0 = svnot_s16_m (z0, p0, z0), ++ z0 = svnot_m (z0, p0, z0)) ++ ++/* ++** not_s16_m_tied1: ++** not z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (not_s16_m_tied1, svint16_t, ++ z0 = svnot_s16_m (z0, p0, z1), ++ z0 = svnot_m (z0, p0, z1)) ++ ++/* ++** not_s16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** not z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (not_s16_m_tied2, svint16_t, ++ z0 = svnot_s16_m (z1, p0, z0), ++ z0 = svnot_m (z1, p0, z0)) ++ ++/* ++** not_s16_m_untied: ++** movprfx z0, z2 ++** not z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (not_s16_m_untied, svint16_t, ++ z0 = svnot_s16_m (z2, p0, z1), ++ z0 = svnot_m (z2, p0, z1)) ++ ++/* ++** not_s16_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.h, p0/z, \1\.h ++** not z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (not_s16_z_tied1, svint16_t, ++ z0 = svnot_s16_z (p0, z0), ++ z0 = svnot_z (p0, z0)) ++ ++/* ++** not_s16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** not z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (not_s16_z_untied, svint16_t, ++ z0 = svnot_s16_z (p0, z1), ++ z0 = svnot_z (p0, z1)) ++ ++/* ++** not_s16_x_tied1: ++** not z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (not_s16_x_tied1, svint16_t, ++ z0 = svnot_s16_x (p0, z0), ++ z0 = svnot_x (p0, z0)) ++ ++/* ++** not_s16_x_untied: ++** not z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (not_s16_x_untied, svint16_t, ++ z0 = svnot_s16_x (p0, z1), ++ z0 = svnot_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_s32.c +new file mode 100644 +index 000000000..8b15d6e91 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_s32.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** not_s32_m_tied12: ++** not z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (not_s32_m_tied12, svint32_t, ++ z0 = svnot_s32_m (z0, p0, z0), ++ z0 = svnot_m (z0, p0, z0)) ++ ++/* ++** not_s32_m_tied1: ++** not z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (not_s32_m_tied1, svint32_t, ++ z0 = svnot_s32_m (z0, p0, z1), ++ z0 = svnot_m (z0, p0, z1)) ++ ++/* ++** not_s32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** not z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (not_s32_m_tied2, svint32_t, ++ z0 = svnot_s32_m (z1, p0, z0), ++ z0 = svnot_m (z1, p0, z0)) ++ ++/* ++** not_s32_m_untied: ++** movprfx z0, z2 ++** not z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (not_s32_m_untied, svint32_t, ++ z0 = svnot_s32_m (z2, p0, z1), ++ z0 = svnot_m (z2, p0, z1)) ++ ++/* ++** not_s32_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, \1\.s ++** not z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (not_s32_z_tied1, svint32_t, ++ z0 = svnot_s32_z (p0, z0), ++ z0 = svnot_z (p0, z0)) ++ ++/* ++** not_s32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** not z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (not_s32_z_untied, svint32_t, ++ z0 = svnot_s32_z (p0, z1), ++ z0 = svnot_z (p0, z1)) ++ ++/* ++** not_s32_x_tied1: ++** not z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (not_s32_x_tied1, svint32_t, ++ z0 = svnot_s32_x (p0, z0), ++ z0 = svnot_x (p0, z0)) ++ ++/* ++** not_s32_x_untied: ++** not z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (not_s32_x_untied, svint32_t, ++ z0 = svnot_s32_x (p0, z1), ++ z0 = svnot_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_s64.c +new file mode 100644 +index 000000000..8e7f7b9e8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_s64.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** not_s64_m_tied12: ++** not z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (not_s64_m_tied12, svint64_t, ++ z0 = svnot_s64_m (z0, p0, z0), ++ z0 = svnot_m (z0, p0, z0)) ++ ++/* ++** not_s64_m_tied1: ++** not z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (not_s64_m_tied1, svint64_t, ++ z0 = svnot_s64_m (z0, p0, z1), ++ z0 = svnot_m (z0, p0, z1)) ++ ++/* ++** not_s64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** not z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (not_s64_m_tied2, svint64_t, ++ z0 = svnot_s64_m (z1, p0, z0), ++ z0 = svnot_m (z1, p0, z0)) ++ ++/* ++** not_s64_m_untied: ++** movprfx z0, z2 ++** not z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (not_s64_m_untied, svint64_t, ++ z0 = svnot_s64_m (z2, p0, z1), ++ z0 = svnot_m (z2, p0, z1)) ++ ++/* ++** not_s64_z_tied1: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, \1 ++** not z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (not_s64_z_tied1, svint64_t, ++ z0 = svnot_s64_z (p0, z0), ++ z0 = svnot_z (p0, z0)) ++ ++/* ++** not_s64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** not z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (not_s64_z_untied, svint64_t, ++ z0 = svnot_s64_z (p0, z1), ++ z0 = svnot_z (p0, z1)) ++ ++/* ++** not_s64_x_tied1: ++** not z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (not_s64_x_tied1, svint64_t, ++ z0 = svnot_s64_x (p0, z0), ++ z0 = svnot_x (p0, z0)) ++ ++/* ++** not_s64_x_untied: ++** not z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (not_s64_x_untied, svint64_t, ++ z0 = svnot_s64_x (p0, z1), ++ z0 = svnot_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_s8.c +new file mode 100644 +index 000000000..e807f08f8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_s8.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** not_s8_m_tied12: ++** not z0\.b, p0/m, z0\.b ++** ret ++*/ ++TEST_UNIFORM_Z (not_s8_m_tied12, svint8_t, ++ z0 = svnot_s8_m (z0, p0, z0), ++ z0 = svnot_m (z0, p0, z0)) ++ ++/* ++** not_s8_m_tied1: ++** not z0\.b, p0/m, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (not_s8_m_tied1, svint8_t, ++ z0 = svnot_s8_m (z0, p0, z1), ++ z0 = svnot_m (z0, p0, z1)) ++ ++/* ++** not_s8_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** not z0\.b, p0/m, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (not_s8_m_tied2, svint8_t, ++ z0 = svnot_s8_m (z1, p0, z0), ++ z0 = svnot_m (z1, p0, z0)) ++ ++/* ++** not_s8_m_untied: ++** movprfx z0, z2 ++** not z0\.b, p0/m, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (not_s8_m_untied, svint8_t, ++ z0 = svnot_s8_m (z2, p0, z1), ++ z0 = svnot_m (z2, p0, z1)) ++ ++/* ++** not_s8_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.b, p0/z, \1\.b ++** not z0\.b, p0/m, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (not_s8_z_tied1, svint8_t, ++ z0 = svnot_s8_z (p0, z0), ++ z0 = svnot_z (p0, z0)) ++ ++/* ++** not_s8_z_untied: ++** movprfx z0\.b, p0/z, z1\.b ++** not z0\.b, p0/m, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (not_s8_z_untied, svint8_t, ++ z0 = svnot_s8_z (p0, z1), ++ z0 = svnot_z (p0, z1)) ++ ++/* ++** not_s8_x_tied1: ++** not z0\.b, p0/m, z0\.b ++** ret ++*/ ++TEST_UNIFORM_Z (not_s8_x_tied1, svint8_t, ++ z0 = svnot_s8_x (p0, z0), ++ z0 = svnot_x (p0, z0)) ++ ++/* ++** not_s8_x_untied: ++** not z0\.b, p0/m, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (not_s8_x_untied, svint8_t, ++ z0 = svnot_s8_x (p0, z1), ++ z0 = svnot_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_u16.c +new file mode 100644 +index 000000000..c812005f1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_u16.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** not_u16_m_tied12: ++** not z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (not_u16_m_tied12, svuint16_t, ++ z0 = svnot_u16_m (z0, p0, z0), ++ z0 = svnot_m (z0, p0, z0)) ++ ++/* ++** not_u16_m_tied1: ++** not z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (not_u16_m_tied1, svuint16_t, ++ z0 = svnot_u16_m (z0, p0, z1), ++ z0 = svnot_m (z0, p0, z1)) ++ ++/* ++** not_u16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** not z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (not_u16_m_tied2, svuint16_t, ++ z0 = svnot_u16_m (z1, p0, z0), ++ z0 = svnot_m (z1, p0, z0)) ++ ++/* ++** not_u16_m_untied: ++** movprfx z0, z2 ++** not z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (not_u16_m_untied, svuint16_t, ++ z0 = svnot_u16_m (z2, p0, z1), ++ z0 = svnot_m (z2, p0, z1)) ++ ++/* ++** not_u16_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.h, p0/z, \1\.h ++** not z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (not_u16_z_tied1, svuint16_t, ++ z0 = svnot_u16_z (p0, z0), ++ z0 = svnot_z (p0, z0)) ++ ++/* ++** not_u16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** not z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (not_u16_z_untied, svuint16_t, ++ z0 = svnot_u16_z (p0, z1), ++ z0 = svnot_z (p0, z1)) ++ ++/* ++** not_u16_x_tied1: ++** not z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (not_u16_x_tied1, svuint16_t, ++ z0 = svnot_u16_x (p0, z0), ++ z0 = svnot_x (p0, z0)) ++ ++/* ++** not_u16_x_untied: ++** not z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (not_u16_x_untied, svuint16_t, ++ z0 = svnot_u16_x (p0, z1), ++ z0 = svnot_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_u32.c +new file mode 100644 +index 000000000..7b7e9ca21 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_u32.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** not_u32_m_tied12: ++** not z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (not_u32_m_tied12, svuint32_t, ++ z0 = svnot_u32_m (z0, p0, z0), ++ z0 = svnot_m (z0, p0, z0)) ++ ++/* ++** not_u32_m_tied1: ++** not z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (not_u32_m_tied1, svuint32_t, ++ z0 = svnot_u32_m (z0, p0, z1), ++ z0 = svnot_m (z0, p0, z1)) ++ ++/* ++** not_u32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** not z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (not_u32_m_tied2, svuint32_t, ++ z0 = svnot_u32_m (z1, p0, z0), ++ z0 = svnot_m (z1, p0, z0)) ++ ++/* ++** not_u32_m_untied: ++** movprfx z0, z2 ++** not z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (not_u32_m_untied, svuint32_t, ++ z0 = svnot_u32_m (z2, p0, z1), ++ z0 = svnot_m (z2, p0, z1)) ++ ++/* ++** not_u32_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, \1\.s ++** not z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (not_u32_z_tied1, svuint32_t, ++ z0 = svnot_u32_z (p0, z0), ++ z0 = svnot_z (p0, z0)) ++ ++/* ++** not_u32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** not z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (not_u32_z_untied, svuint32_t, ++ z0 = svnot_u32_z (p0, z1), ++ z0 = svnot_z (p0, z1)) ++ ++/* ++** not_u32_x_tied1: ++** not z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (not_u32_x_tied1, svuint32_t, ++ z0 = svnot_u32_x (p0, z0), ++ z0 = svnot_x (p0, z0)) ++ ++/* ++** not_u32_x_untied: ++** not z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (not_u32_x_untied, svuint32_t, ++ z0 = svnot_u32_x (p0, z1), ++ z0 = svnot_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_u64.c +new file mode 100644 +index 000000000..27b92ad84 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_u64.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** not_u64_m_tied12: ++** not z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (not_u64_m_tied12, svuint64_t, ++ z0 = svnot_u64_m (z0, p0, z0), ++ z0 = svnot_m (z0, p0, z0)) ++ ++/* ++** not_u64_m_tied1: ++** not z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (not_u64_m_tied1, svuint64_t, ++ z0 = svnot_u64_m (z0, p0, z1), ++ z0 = svnot_m (z0, p0, z1)) ++ ++/* ++** not_u64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** not z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (not_u64_m_tied2, svuint64_t, ++ z0 = svnot_u64_m (z1, p0, z0), ++ z0 = svnot_m (z1, p0, z0)) ++ ++/* ++** not_u64_m_untied: ++** movprfx z0, z2 ++** not z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (not_u64_m_untied, svuint64_t, ++ z0 = svnot_u64_m (z2, p0, z1), ++ z0 = svnot_m (z2, p0, z1)) ++ ++/* ++** not_u64_z_tied1: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, \1 ++** not z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (not_u64_z_tied1, svuint64_t, ++ z0 = svnot_u64_z (p0, z0), ++ z0 = svnot_z (p0, z0)) ++ ++/* ++** not_u64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** not z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (not_u64_z_untied, svuint64_t, ++ z0 = svnot_u64_z (p0, z1), ++ z0 = svnot_z (p0, z1)) ++ ++/* ++** not_u64_x_tied1: ++** not z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (not_u64_x_tied1, svuint64_t, ++ z0 = svnot_u64_x (p0, z0), ++ z0 = svnot_x (p0, z0)) ++ ++/* ++** not_u64_x_untied: ++** not z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (not_u64_x_untied, svuint64_t, ++ z0 = svnot_u64_x (p0, z1), ++ z0 = svnot_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_u8.c +new file mode 100644 +index 000000000..bd2f36cad +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_u8.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** not_u8_m_tied12: ++** not z0\.b, p0/m, z0\.b ++** ret ++*/ ++TEST_UNIFORM_Z (not_u8_m_tied12, svuint8_t, ++ z0 = svnot_u8_m (z0, p0, z0), ++ z0 = svnot_m (z0, p0, z0)) ++ ++/* ++** not_u8_m_tied1: ++** not z0\.b, p0/m, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (not_u8_m_tied1, svuint8_t, ++ z0 = svnot_u8_m (z0, p0, z1), ++ z0 = svnot_m (z0, p0, z1)) ++ ++/* ++** not_u8_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** not z0\.b, p0/m, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (not_u8_m_tied2, svuint8_t, ++ z0 = svnot_u8_m (z1, p0, z0), ++ z0 = svnot_m (z1, p0, z0)) ++ ++/* ++** not_u8_m_untied: ++** movprfx z0, z2 ++** not z0\.b, p0/m, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (not_u8_m_untied, svuint8_t, ++ z0 = svnot_u8_m (z2, p0, z1), ++ z0 = svnot_m (z2, p0, z1)) ++ ++/* ++** not_u8_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.b, p0/z, \1\.b ++** not z0\.b, p0/m, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (not_u8_z_tied1, svuint8_t, ++ z0 = svnot_u8_z (p0, z0), ++ z0 = svnot_z (p0, z0)) ++ ++/* ++** not_u8_z_untied: ++** movprfx z0\.b, p0/z, z1\.b ++** not z0\.b, p0/m, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (not_u8_z_untied, svuint8_t, ++ z0 = svnot_u8_z (p0, z1), ++ z0 = svnot_z (p0, z1)) ++ ++/* ++** not_u8_x_tied1: ++** not z0\.b, p0/m, z0\.b ++** ret ++*/ ++TEST_UNIFORM_Z (not_u8_x_tied1, svuint8_t, ++ z0 = svnot_u8_x (p0, z0), ++ z0 = svnot_x (p0, z0)) ++ ++/* ++** not_u8_x_untied: ++** not z0\.b, p0/m, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (not_u8_x_untied, svuint8_t, ++ z0 = svnot_u8_x (p0, z1), ++ z0 = svnot_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orn_b.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orn_b.c +new file mode 100644 +index 000000000..423a18bc7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orn_b.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** orn_b_z_tied1: ++** orn p0\.b, p3/z, p0\.b, p1\.b ++** ret ++*/ ++TEST_UNIFORM_P (orn_b_z_tied1, ++ p0 = svorn_b_z (p3, p0, p1), ++ p0 = svorn_z (p3, p0, p1)) ++ ++/* ++** orn_b_z_tied2: ++** orn p0\.b, p3/z, p1\.b, p0\.b ++** ret ++*/ ++TEST_UNIFORM_P (orn_b_z_tied2, ++ p0 = svorn_b_z (p3, p1, p0), ++ p0 = svorn_z (p3, p1, p0)) ++ ++/* ++** orn_b_z_untied: ++** orn p0\.b, p3/z, p1\.b, p2\.b ++** ret ++*/ ++TEST_UNIFORM_P (orn_b_z_untied, ++ p0 = svorn_b_z (p3, p1, p2), ++ p0 = svorn_z (p3, p1, p2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_b.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_b.c +new file mode 100644 +index 000000000..fba9ba7df +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_b.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** orr_b_z_tied1: ++** orr p0\.b, p3/z, (p0\.b, p1\.b|p1\.b, p0\.b) ++** ret ++*/ ++TEST_UNIFORM_P (orr_b_z_tied1, ++ p0 = svorr_b_z (p3, p0, p1), ++ p0 = svorr_z (p3, p0, p1)) ++ ++/* ++** orr_b_z_tied2: ++** orr p0\.b, p3/z, (p0\.b, p1\.b|p1\.b, p0\.b) ++** ret ++*/ ++TEST_UNIFORM_P (orr_b_z_tied2, ++ p0 = svorr_b_z (p3, p1, p0), ++ p0 = svorr_z (p3, p1, p0)) ++ ++/* ++** orr_b_z_untied: ++** orr p0\.b, p3/z, (p1\.b, p2\.b|p2\.b, p1\.b) ++** ret ++*/ ++TEST_UNIFORM_P (orr_b_z_untied, ++ p0 = svorr_b_z (p3, p1, p2), ++ p0 = svorr_z (p3, p1, p2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_s16.c +new file mode 100644 +index 000000000..62b707a9c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_s16.c +@@ -0,0 +1,376 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** orr_s16_m_tied1: ++** orr z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (orr_s16_m_tied1, svint16_t, ++ z0 = svorr_s16_m (p0, z0, z1), ++ z0 = svorr_m (p0, z0, z1)) ++ ++/* ++** orr_s16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** orr z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (orr_s16_m_tied2, svint16_t, ++ z0 = svorr_s16_m (p0, z1, z0), ++ z0 = svorr_m (p0, z1, z0)) ++ ++/* ++** orr_s16_m_untied: ++** movprfx z0, z1 ++** orr z0\.h, p0/m, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (orr_s16_m_untied, svint16_t, ++ z0 = svorr_s16_m (p0, z1, z2), ++ z0 = svorr_m (p0, z1, z2)) ++ ++/* ++** orr_w0_s16_m_tied1: ++** mov (z[0-9]+\.h), w0 ++** orr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (orr_w0_s16_m_tied1, svint16_t, int16_t, ++ z0 = svorr_n_s16_m (p0, z0, x0), ++ z0 = svorr_m (p0, z0, x0)) ++ ++/* ++** orr_w0_s16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), w0 ++** movprfx z0, z1 ++** orr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (orr_w0_s16_m_untied, svint16_t, int16_t, ++ z0 = svorr_n_s16_m (p0, z1, x0), ++ z0 = svorr_m (p0, z1, x0)) ++ ++/* ++** orr_1_s16_m_tied1: ++** mov (z[0-9]+\.h), #1 ++** orr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_1_s16_m_tied1, svint16_t, ++ z0 = svorr_n_s16_m (p0, z0, 1), ++ z0 = svorr_m (p0, z0, 1)) ++ ++/* ++** orr_1_s16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), #1 ++** movprfx z0, z1 ++** orr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_1_s16_m_untied, svint16_t, ++ z0 = svorr_n_s16_m (p0, z1, 1), ++ z0 = svorr_m (p0, z1, 1)) ++ ++/* ++** orr_m2_s16_m: ++** mov (z[0-9]+\.h), #-2 ++** orr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m2_s16_m, svint16_t, ++ z0 = svorr_n_s16_m (p0, z0, -2), ++ z0 = svorr_m (p0, z0, -2)) ++ ++/* ++** orr_s16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** orr z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (orr_s16_z_tied1, svint16_t, ++ z0 = svorr_s16_z (p0, z0, z1), ++ z0 = svorr_z (p0, z0, z1)) ++ ++/* ++** orr_s16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** orr z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (orr_s16_z_tied2, svint16_t, ++ z0 = svorr_s16_z (p0, z1, z0), ++ z0 = svorr_z (p0, z1, z0)) ++ ++/* ++** orr_s16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** orr z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** orr z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (orr_s16_z_untied, svint16_t, ++ z0 = svorr_s16_z (p0, z1, z2), ++ z0 = svorr_z (p0, z1, z2)) ++ ++/* ++** orr_w0_s16_z_tied1: ++** mov (z[0-9]+\.h), w0 ++** movprfx z0\.h, p0/z, z0\.h ++** orr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (orr_w0_s16_z_tied1, svint16_t, int16_t, ++ z0 = svorr_n_s16_z (p0, z0, x0), ++ z0 = svorr_z (p0, z0, x0)) ++ ++/* ++** orr_w0_s16_z_untied: ++** mov (z[0-9]+\.h), w0 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** orr z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** orr z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (orr_w0_s16_z_untied, svint16_t, int16_t, ++ z0 = svorr_n_s16_z (p0, z1, x0), ++ z0 = svorr_z (p0, z1, x0)) ++ ++/* ++** orr_1_s16_z_tied1: ++** mov (z[0-9]+\.h), #1 ++** movprfx z0\.h, p0/z, z0\.h ++** orr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_1_s16_z_tied1, svint16_t, ++ z0 = svorr_n_s16_z (p0, z0, 1), ++ z0 = svorr_z (p0, z0, 1)) ++ ++/* ++** orr_1_s16_z_untied: ++** mov (z[0-9]+\.h), #1 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** orr z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** orr z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (orr_1_s16_z_untied, svint16_t, ++ z0 = svorr_n_s16_z (p0, z1, 1), ++ z0 = svorr_z (p0, z1, 1)) ++ ++/* ++** orr_s16_x_tied1: ++** orr z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (orr_s16_x_tied1, svint16_t, ++ z0 = svorr_s16_x (p0, z0, z1), ++ z0 = svorr_x (p0, z0, z1)) ++ ++/* ++** orr_s16_x_tied2: ++** orr z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (orr_s16_x_tied2, svint16_t, ++ z0 = svorr_s16_x (p0, z1, z0), ++ z0 = svorr_x (p0, z1, z0)) ++ ++/* ++** orr_s16_x_untied: ++** orr z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (orr_s16_x_untied, svint16_t, ++ z0 = svorr_s16_x (p0, z1, z2), ++ z0 = svorr_x (p0, z1, z2)) ++ ++/* ++** orr_w0_s16_x_tied1: ++** mov (z[0-9]+)\.h, w0 ++** orr z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (orr_w0_s16_x_tied1, svint16_t, int16_t, ++ z0 = svorr_n_s16_x (p0, z0, x0), ++ z0 = svorr_x (p0, z0, x0)) ++ ++/* ++** orr_w0_s16_x_untied: ++** mov (z[0-9]+)\.h, w0 ++** orr z0\.d, (z1\.d, \1\.d|\1\.d, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (orr_w0_s16_x_untied, svint16_t, int16_t, ++ z0 = svorr_n_s16_x (p0, z1, x0), ++ z0 = svorr_x (p0, z1, x0)) ++ ++/* ++** orr_1_s16_x_tied1: ++** orr z0\.h, z0\.h, #0x1 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_1_s16_x_tied1, svint16_t, ++ z0 = svorr_n_s16_x (p0, z0, 1), ++ z0 = svorr_x (p0, z0, 1)) ++ ++/* ++** orr_1_s16_x_untied: ++** movprfx z0, z1 ++** orr z0\.h, z0\.h, #0x1 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_1_s16_x_untied, svint16_t, ++ z0 = svorr_n_s16_x (p0, z1, 1), ++ z0 = svorr_x (p0, z1, 1)) ++ ++/* ++** orr_127_s16_x: ++** orr z0\.h, z0\.h, #0x7f ++** ret ++*/ ++TEST_UNIFORM_Z (orr_127_s16_x, svint16_t, ++ z0 = svorr_n_s16_x (p0, z0, 127), ++ z0 = svorr_x (p0, z0, 127)) ++ ++/* ++** orr_128_s16_x: ++** orr z0\.h, z0\.h, #0x80 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_128_s16_x, svint16_t, ++ z0 = svorr_n_s16_x (p0, z0, 128), ++ z0 = svorr_x (p0, z0, 128)) ++ ++/* ++** orr_255_s16_x: ++** orr z0\.h, z0\.h, #0xff ++** ret ++*/ ++TEST_UNIFORM_Z (orr_255_s16_x, svint16_t, ++ z0 = svorr_n_s16_x (p0, z0, 255), ++ z0 = svorr_x (p0, z0, 255)) ++ ++/* ++** orr_256_s16_x: ++** orr z0\.h, z0\.h, #0x100 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_256_s16_x, svint16_t, ++ z0 = svorr_n_s16_x (p0, z0, 256), ++ z0 = svorr_x (p0, z0, 256)) ++ ++/* ++** orr_257_s16_x: ++** orr z0\.h, z0\.h, #0x101 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_257_s16_x, svint16_t, ++ z0 = svorr_n_s16_x (p0, z0, 257), ++ z0 = svorr_x (p0, z0, 257)) ++ ++/* ++** orr_512_s16_x: ++** orr z0\.h, z0\.h, #0x200 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_512_s16_x, svint16_t, ++ z0 = svorr_n_s16_x (p0, z0, 512), ++ z0 = svorr_x (p0, z0, 512)) ++ ++/* ++** orr_65280_s16_x: ++** orr z0\.h, z0\.h, #0xff00 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_65280_s16_x, svint16_t, ++ z0 = svorr_n_s16_x (p0, z0, 0xff00), ++ z0 = svorr_x (p0, z0, 0xff00)) ++ ++/* ++** orr_m127_s16_x: ++** orr z0\.h, z0\.h, #0xff81 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m127_s16_x, svint16_t, ++ z0 = svorr_n_s16_x (p0, z0, -127), ++ z0 = svorr_x (p0, z0, -127)) ++ ++/* ++** orr_m128_s16_x: ++** orr z0\.h, z0\.h, #0xff80 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m128_s16_x, svint16_t, ++ z0 = svorr_n_s16_x (p0, z0, -128), ++ z0 = svorr_x (p0, z0, -128)) ++ ++/* ++** orr_m255_s16_x: ++** orr z0\.h, z0\.h, #0xff01 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m255_s16_x, svint16_t, ++ z0 = svorr_n_s16_x (p0, z0, -255), ++ z0 = svorr_x (p0, z0, -255)) ++ ++/* ++** orr_m256_s16_x: ++** orr z0\.h, z0\.h, #0xff00 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m256_s16_x, svint16_t, ++ z0 = svorr_n_s16_x (p0, z0, -256), ++ z0 = svorr_x (p0, z0, -256)) ++ ++/* ++** orr_m257_s16_x: ++** orr z0\.h, z0\.h, #0xfeff ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m257_s16_x, svint16_t, ++ z0 = svorr_n_s16_x (p0, z0, -257), ++ z0 = svorr_x (p0, z0, -257)) ++ ++/* ++** orr_m512_s16_x: ++** orr z0\.h, z0\.h, #0xfe00 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m512_s16_x, svint16_t, ++ z0 = svorr_n_s16_x (p0, z0, -512), ++ z0 = svorr_x (p0, z0, -512)) ++ ++/* ++** orr_m32768_s16_x: ++** orr z0\.h, z0\.h, #0x8000 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m32768_s16_x, svint16_t, ++ z0 = svorr_n_s16_x (p0, z0, -0x8000), ++ z0 = svorr_x (p0, z0, -0x8000)) ++ ++/* ++** orr_5_s16_x: ++** mov (z[0-9]+)\.h, #5 ++** orr z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (orr_5_s16_x, svint16_t, ++ z0 = svorr_n_s16_x (p0, z0, 5), ++ z0 = svorr_x (p0, z0, 5)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_s32.c +new file mode 100644 +index 000000000..2e0e1e888 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_s32.c +@@ -0,0 +1,372 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** orr_s32_m_tied1: ++** orr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (orr_s32_m_tied1, svint32_t, ++ z0 = svorr_s32_m (p0, z0, z1), ++ z0 = svorr_m (p0, z0, z1)) ++ ++/* ++** orr_s32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** orr z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (orr_s32_m_tied2, svint32_t, ++ z0 = svorr_s32_m (p0, z1, z0), ++ z0 = svorr_m (p0, z1, z0)) ++ ++/* ++** orr_s32_m_untied: ++** movprfx z0, z1 ++** orr z0\.s, p0/m, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (orr_s32_m_untied, svint32_t, ++ z0 = svorr_s32_m (p0, z1, z2), ++ z0 = svorr_m (p0, z1, z2)) ++ ++/* ++** orr_w0_s32_m_tied1: ++** mov (z[0-9]+\.s), w0 ++** orr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (orr_w0_s32_m_tied1, svint32_t, int32_t, ++ z0 = svorr_n_s32_m (p0, z0, x0), ++ z0 = svorr_m (p0, z0, x0)) ++ ++/* ++** orr_w0_s32_m_untied: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0, z1 ++** orr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (orr_w0_s32_m_untied, svint32_t, int32_t, ++ z0 = svorr_n_s32_m (p0, z1, x0), ++ z0 = svorr_m (p0, z1, x0)) ++ ++/* ++** orr_1_s32_m_tied1: ++** mov (z[0-9]+\.s), #1 ++** orr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_1_s32_m_tied1, svint32_t, ++ z0 = svorr_n_s32_m (p0, z0, 1), ++ z0 = svorr_m (p0, z0, 1)) ++ ++/* ++** orr_1_s32_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.s), #1 ++** movprfx z0, z1 ++** orr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_1_s32_m_untied, svint32_t, ++ z0 = svorr_n_s32_m (p0, z1, 1), ++ z0 = svorr_m (p0, z1, 1)) ++ ++/* ++** orr_m2_s32_m: ++** mov (z[0-9]+\.s), #-2 ++** orr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m2_s32_m, svint32_t, ++ z0 = svorr_n_s32_m (p0, z0, -2), ++ z0 = svorr_m (p0, z0, -2)) ++ ++/* ++** orr_s32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** orr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (orr_s32_z_tied1, svint32_t, ++ z0 = svorr_s32_z (p0, z0, z1), ++ z0 = svorr_z (p0, z0, z1)) ++ ++/* ++** orr_s32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** orr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (orr_s32_z_tied2, svint32_t, ++ z0 = svorr_s32_z (p0, z1, z0), ++ z0 = svorr_z (p0, z1, z0)) ++ ++/* ++** orr_s32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** orr z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** orr z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (orr_s32_z_untied, svint32_t, ++ z0 = svorr_s32_z (p0, z1, z2), ++ z0 = svorr_z (p0, z1, z2)) ++ ++/* ++** orr_w0_s32_z_tied1: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0\.s, p0/z, z0\.s ++** orr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (orr_w0_s32_z_tied1, svint32_t, int32_t, ++ z0 = svorr_n_s32_z (p0, z0, x0), ++ z0 = svorr_z (p0, z0, x0)) ++ ++/* ++** orr_w0_s32_z_untied: ++** mov (z[0-9]+\.s), w0 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** orr z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** orr z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (orr_w0_s32_z_untied, svint32_t, int32_t, ++ z0 = svorr_n_s32_z (p0, z1, x0), ++ z0 = svorr_z (p0, z1, x0)) ++ ++/* ++** orr_1_s32_z_tied1: ++** mov (z[0-9]+\.s), #1 ++** movprfx z0\.s, p0/z, z0\.s ++** orr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_1_s32_z_tied1, svint32_t, ++ z0 = svorr_n_s32_z (p0, z0, 1), ++ z0 = svorr_z (p0, z0, 1)) ++ ++/* ++** orr_1_s32_z_untied: ++** mov (z[0-9]+\.s), #1 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** orr z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** orr z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (orr_1_s32_z_untied, svint32_t, ++ z0 = svorr_n_s32_z (p0, z1, 1), ++ z0 = svorr_z (p0, z1, 1)) ++ ++/* ++** orr_s32_x_tied1: ++** orr z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (orr_s32_x_tied1, svint32_t, ++ z0 = svorr_s32_x (p0, z0, z1), ++ z0 = svorr_x (p0, z0, z1)) ++ ++/* ++** orr_s32_x_tied2: ++** orr z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (orr_s32_x_tied2, svint32_t, ++ z0 = svorr_s32_x (p0, z1, z0), ++ z0 = svorr_x (p0, z1, z0)) ++ ++/* ++** orr_s32_x_untied: ++** orr z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (orr_s32_x_untied, svint32_t, ++ z0 = svorr_s32_x (p0, z1, z2), ++ z0 = svorr_x (p0, z1, z2)) ++ ++/* ++** orr_w0_s32_x_tied1: ++** mov (z[0-9]+)\.s, w0 ++** orr z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (orr_w0_s32_x_tied1, svint32_t, int32_t, ++ z0 = svorr_n_s32_x (p0, z0, x0), ++ z0 = svorr_x (p0, z0, x0)) ++ ++/* ++** orr_w0_s32_x_untied: ++** mov (z[0-9]+)\.s, w0 ++** orr z0\.d, (z1\.d, \1\.d|\1\.d, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (orr_w0_s32_x_untied, svint32_t, int32_t, ++ z0 = svorr_n_s32_x (p0, z1, x0), ++ z0 = svorr_x (p0, z1, x0)) ++ ++/* ++** orr_1_s32_x_tied1: ++** orr z0\.s, z0\.s, #0x1 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_1_s32_x_tied1, svint32_t, ++ z0 = svorr_n_s32_x (p0, z0, 1), ++ z0 = svorr_x (p0, z0, 1)) ++ ++/* ++** orr_1_s32_x_untied: ++** movprfx z0, z1 ++** orr z0\.s, z0\.s, #0x1 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_1_s32_x_untied, svint32_t, ++ z0 = svorr_n_s32_x (p0, z1, 1), ++ z0 = svorr_x (p0, z1, 1)) ++ ++/* ++** orr_127_s32_x: ++** orr z0\.s, z0\.s, #0x7f ++** ret ++*/ ++TEST_UNIFORM_Z (orr_127_s32_x, svint32_t, ++ z0 = svorr_n_s32_x (p0, z0, 127), ++ z0 = svorr_x (p0, z0, 127)) ++ ++/* ++** orr_128_s32_x: ++** orr z0\.s, z0\.s, #0x80 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_128_s32_x, svint32_t, ++ z0 = svorr_n_s32_x (p0, z0, 128), ++ z0 = svorr_x (p0, z0, 128)) ++ ++/* ++** orr_255_s32_x: ++** orr z0\.s, z0\.s, #0xff ++** ret ++*/ ++TEST_UNIFORM_Z (orr_255_s32_x, svint32_t, ++ z0 = svorr_n_s32_x (p0, z0, 255), ++ z0 = svorr_x (p0, z0, 255)) ++ ++/* ++** orr_256_s32_x: ++** orr z0\.s, z0\.s, #0x100 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_256_s32_x, svint32_t, ++ z0 = svorr_n_s32_x (p0, z0, 256), ++ z0 = svorr_x (p0, z0, 256)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (orr_257_s32_x, svint32_t, ++ z0 = svorr_n_s32_x (p0, z0, 257), ++ z0 = svorr_x (p0, z0, 257)) ++ ++/* ++** orr_512_s32_x: ++** orr z0\.s, z0\.s, #0x200 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_512_s32_x, svint32_t, ++ z0 = svorr_n_s32_x (p0, z0, 512), ++ z0 = svorr_x (p0, z0, 512)) ++ ++/* ++** orr_65280_s32_x: ++** orr z0\.s, z0\.s, #0xff00 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_65280_s32_x, svint32_t, ++ z0 = svorr_n_s32_x (p0, z0, 0xff00), ++ z0 = svorr_x (p0, z0, 0xff00)) ++ ++/* ++** orr_m127_s32_x: ++** orr z0\.s, z0\.s, #0xffffff81 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m127_s32_x, svint32_t, ++ z0 = svorr_n_s32_x (p0, z0, -127), ++ z0 = svorr_x (p0, z0, -127)) ++ ++/* ++** orr_m128_s32_x: ++** orr z0\.s, z0\.s, #0xffffff80 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m128_s32_x, svint32_t, ++ z0 = svorr_n_s32_x (p0, z0, -128), ++ z0 = svorr_x (p0, z0, -128)) ++ ++/* ++** orr_m255_s32_x: ++** orr z0\.s, z0\.s, #0xffffff01 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m255_s32_x, svint32_t, ++ z0 = svorr_n_s32_x (p0, z0, -255), ++ z0 = svorr_x (p0, z0, -255)) ++ ++/* ++** orr_m256_s32_x: ++** orr z0\.s, z0\.s, #0xffffff00 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m256_s32_x, svint32_t, ++ z0 = svorr_n_s32_x (p0, z0, -256), ++ z0 = svorr_x (p0, z0, -256)) ++ ++/* ++** orr_m257_s32_x: ++** orr z0\.s, z0\.s, #0xfffffeff ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m257_s32_x, svint32_t, ++ z0 = svorr_n_s32_x (p0, z0, -257), ++ z0 = svorr_x (p0, z0, -257)) ++ ++/* ++** orr_m512_s32_x: ++** orr z0\.s, z0\.s, #0xfffffe00 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m512_s32_x, svint32_t, ++ z0 = svorr_n_s32_x (p0, z0, -512), ++ z0 = svorr_x (p0, z0, -512)) ++ ++/* ++** orr_m32768_s32_x: ++** orr z0\.s, z0\.s, #0xffff8000 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m32768_s32_x, svint32_t, ++ z0 = svorr_n_s32_x (p0, z0, -0x8000), ++ z0 = svorr_x (p0, z0, -0x8000)) ++ ++/* ++** orr_5_s32_x: ++** mov (z[0-9]+)\.s, #5 ++** orr z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (orr_5_s32_x, svint32_t, ++ z0 = svorr_n_s32_x (p0, z0, 5), ++ z0 = svorr_x (p0, z0, 5)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_s64.c +new file mode 100644 +index 000000000..1538fdd14 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_s64.c +@@ -0,0 +1,372 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** orr_s64_m_tied1: ++** orr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (orr_s64_m_tied1, svint64_t, ++ z0 = svorr_s64_m (p0, z0, z1), ++ z0 = svorr_m (p0, z0, z1)) ++ ++/* ++** orr_s64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** orr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_s64_m_tied2, svint64_t, ++ z0 = svorr_s64_m (p0, z1, z0), ++ z0 = svorr_m (p0, z1, z0)) ++ ++/* ++** orr_s64_m_untied: ++** movprfx z0, z1 ++** orr z0\.d, p0/m, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (orr_s64_m_untied, svint64_t, ++ z0 = svorr_s64_m (p0, z1, z2), ++ z0 = svorr_m (p0, z1, z2)) ++ ++/* ++** orr_x0_s64_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** orr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (orr_x0_s64_m_tied1, svint64_t, int64_t, ++ z0 = svorr_n_s64_m (p0, z0, x0), ++ z0 = svorr_m (p0, z0, x0)) ++ ++/* ++** orr_x0_s64_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** orr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (orr_x0_s64_m_untied, svint64_t, int64_t, ++ z0 = svorr_n_s64_m (p0, z1, x0), ++ z0 = svorr_m (p0, z1, x0)) ++ ++/* ++** orr_1_s64_m_tied1: ++** mov (z[0-9]+\.d), #1 ++** orr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_1_s64_m_tied1, svint64_t, ++ z0 = svorr_n_s64_m (p0, z0, 1), ++ z0 = svorr_m (p0, z0, 1)) ++ ++/* ++** orr_1_s64_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), #1 ++** movprfx z0, z1 ++** orr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_1_s64_m_untied, svint64_t, ++ z0 = svorr_n_s64_m (p0, z1, 1), ++ z0 = svorr_m (p0, z1, 1)) ++ ++/* ++** orr_m2_s64_m: ++** mov (z[0-9]+\.d), #-2 ++** orr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m2_s64_m, svint64_t, ++ z0 = svorr_n_s64_m (p0, z0, -2), ++ z0 = svorr_m (p0, z0, -2)) ++ ++/* ++** orr_s64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** orr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (orr_s64_z_tied1, svint64_t, ++ z0 = svorr_s64_z (p0, z0, z1), ++ z0 = svorr_z (p0, z0, z1)) ++ ++/* ++** orr_s64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** orr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (orr_s64_z_tied2, svint64_t, ++ z0 = svorr_s64_z (p0, z1, z0), ++ z0 = svorr_z (p0, z1, z0)) ++ ++/* ++** orr_s64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** orr z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** orr z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (orr_s64_z_untied, svint64_t, ++ z0 = svorr_s64_z (p0, z1, z2), ++ z0 = svorr_z (p0, z1, z2)) ++ ++/* ++** orr_x0_s64_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.d, p0/z, z0\.d ++** orr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (orr_x0_s64_z_tied1, svint64_t, int64_t, ++ z0 = svorr_n_s64_z (p0, z0, x0), ++ z0 = svorr_z (p0, z0, x0)) ++ ++/* ++** orr_x0_s64_z_untied: ++** mov (z[0-9]+\.d), x0 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** orr z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** orr z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (orr_x0_s64_z_untied, svint64_t, int64_t, ++ z0 = svorr_n_s64_z (p0, z1, x0), ++ z0 = svorr_z (p0, z1, x0)) ++ ++/* ++** orr_1_s64_z_tied1: ++** mov (z[0-9]+\.d), #1 ++** movprfx z0\.d, p0/z, z0\.d ++** orr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_1_s64_z_tied1, svint64_t, ++ z0 = svorr_n_s64_z (p0, z0, 1), ++ z0 = svorr_z (p0, z0, 1)) ++ ++/* ++** orr_1_s64_z_untied: ++** mov (z[0-9]+\.d), #1 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** orr z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** orr z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (orr_1_s64_z_untied, svint64_t, ++ z0 = svorr_n_s64_z (p0, z1, 1), ++ z0 = svorr_z (p0, z1, 1)) ++ ++/* ++** orr_s64_x_tied1: ++** orr z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (orr_s64_x_tied1, svint64_t, ++ z0 = svorr_s64_x (p0, z0, z1), ++ z0 = svorr_x (p0, z0, z1)) ++ ++/* ++** orr_s64_x_tied2: ++** orr z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (orr_s64_x_tied2, svint64_t, ++ z0 = svorr_s64_x (p0, z1, z0), ++ z0 = svorr_x (p0, z1, z0)) ++ ++/* ++** orr_s64_x_untied: ++** orr z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (orr_s64_x_untied, svint64_t, ++ z0 = svorr_s64_x (p0, z1, z2), ++ z0 = svorr_x (p0, z1, z2)) ++ ++/* ++** orr_x0_s64_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** orr z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (orr_x0_s64_x_tied1, svint64_t, int64_t, ++ z0 = svorr_n_s64_x (p0, z0, x0), ++ z0 = svorr_x (p0, z0, x0)) ++ ++/* ++** orr_x0_s64_x_untied: ++** mov (z[0-9]+\.d), x0 ++** orr z0\.d, (z1\.d, \1|\1, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (orr_x0_s64_x_untied, svint64_t, int64_t, ++ z0 = svorr_n_s64_x (p0, z1, x0), ++ z0 = svorr_x (p0, z1, x0)) ++ ++/* ++** orr_1_s64_x_tied1: ++** orr z0\.d, z0\.d, #0x1 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_1_s64_x_tied1, svint64_t, ++ z0 = svorr_n_s64_x (p0, z0, 1), ++ z0 = svorr_x (p0, z0, 1)) ++ ++/* ++** orr_1_s64_x_untied: ++** movprfx z0, z1 ++** orr z0\.d, z0\.d, #0x1 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_1_s64_x_untied, svint64_t, ++ z0 = svorr_n_s64_x (p0, z1, 1), ++ z0 = svorr_x (p0, z1, 1)) ++ ++/* ++** orr_127_s64_x: ++** orr z0\.d, z0\.d, #0x7f ++** ret ++*/ ++TEST_UNIFORM_Z (orr_127_s64_x, svint64_t, ++ z0 = svorr_n_s64_x (p0, z0, 127), ++ z0 = svorr_x (p0, z0, 127)) ++ ++/* ++** orr_128_s64_x: ++** orr z0\.d, z0\.d, #0x80 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_128_s64_x, svint64_t, ++ z0 = svorr_n_s64_x (p0, z0, 128), ++ z0 = svorr_x (p0, z0, 128)) ++ ++/* ++** orr_255_s64_x: ++** orr z0\.d, z0\.d, #0xff ++** ret ++*/ ++TEST_UNIFORM_Z (orr_255_s64_x, svint64_t, ++ z0 = svorr_n_s64_x (p0, z0, 255), ++ z0 = svorr_x (p0, z0, 255)) ++ ++/* ++** orr_256_s64_x: ++** orr z0\.d, z0\.d, #0x100 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_256_s64_x, svint64_t, ++ z0 = svorr_n_s64_x (p0, z0, 256), ++ z0 = svorr_x (p0, z0, 256)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (orr_257_s64_x, svint64_t, ++ z0 = svorr_n_s64_x (p0, z0, 257), ++ z0 = svorr_x (p0, z0, 257)) ++ ++/* ++** orr_512_s64_x: ++** orr z0\.d, z0\.d, #0x200 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_512_s64_x, svint64_t, ++ z0 = svorr_n_s64_x (p0, z0, 512), ++ z0 = svorr_x (p0, z0, 512)) ++ ++/* ++** orr_65280_s64_x: ++** orr z0\.d, z0\.d, #0xff00 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_65280_s64_x, svint64_t, ++ z0 = svorr_n_s64_x (p0, z0, 0xff00), ++ z0 = svorr_x (p0, z0, 0xff00)) ++ ++/* ++** orr_m127_s64_x: ++** orr z0\.d, z0\.d, #0xffffffffffffff81 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m127_s64_x, svint64_t, ++ z0 = svorr_n_s64_x (p0, z0, -127), ++ z0 = svorr_x (p0, z0, -127)) ++ ++/* ++** orr_m128_s64_x: ++** orr z0\.d, z0\.d, #0xffffffffffffff80 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m128_s64_x, svint64_t, ++ z0 = svorr_n_s64_x (p0, z0, -128), ++ z0 = svorr_x (p0, z0, -128)) ++ ++/* ++** orr_m255_s64_x: ++** orr z0\.d, z0\.d, #0xffffffffffffff01 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m255_s64_x, svint64_t, ++ z0 = svorr_n_s64_x (p0, z0, -255), ++ z0 = svorr_x (p0, z0, -255)) ++ ++/* ++** orr_m256_s64_x: ++** orr z0\.d, z0\.d, #0xffffffffffffff00 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m256_s64_x, svint64_t, ++ z0 = svorr_n_s64_x (p0, z0, -256), ++ z0 = svorr_x (p0, z0, -256)) ++ ++/* ++** orr_m257_s64_x: ++** orr z0\.d, z0\.d, #0xfffffffffffffeff ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m257_s64_x, svint64_t, ++ z0 = svorr_n_s64_x (p0, z0, -257), ++ z0 = svorr_x (p0, z0, -257)) ++ ++/* ++** orr_m512_s64_x: ++** orr z0\.d, z0\.d, #0xfffffffffffffe00 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m512_s64_x, svint64_t, ++ z0 = svorr_n_s64_x (p0, z0, -512), ++ z0 = svorr_x (p0, z0, -512)) ++ ++/* ++** orr_m32768_s64_x: ++** orr z0\.d, z0\.d, #0xffffffffffff8000 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m32768_s64_x, svint64_t, ++ z0 = svorr_n_s64_x (p0, z0, -0x8000), ++ z0 = svorr_x (p0, z0, -0x8000)) ++ ++/* ++** orr_5_s64_x: ++** mov (z[0-9]+\.d), #5 ++** orr z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (orr_5_s64_x, svint64_t, ++ z0 = svorr_n_s64_x (p0, z0, 5), ++ z0 = svorr_x (p0, z0, 5)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_s8.c +new file mode 100644 +index 000000000..b6483b6e7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_s8.c +@@ -0,0 +1,295 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** orr_s8_m_tied1: ++** orr z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (orr_s8_m_tied1, svint8_t, ++ z0 = svorr_s8_m (p0, z0, z1), ++ z0 = svorr_m (p0, z0, z1)) ++ ++/* ++** orr_s8_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** orr z0\.b, p0/m, z0\.b, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (orr_s8_m_tied2, svint8_t, ++ z0 = svorr_s8_m (p0, z1, z0), ++ z0 = svorr_m (p0, z1, z0)) ++ ++/* ++** orr_s8_m_untied: ++** movprfx z0, z1 ++** orr z0\.b, p0/m, z0\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (orr_s8_m_untied, svint8_t, ++ z0 = svorr_s8_m (p0, z1, z2), ++ z0 = svorr_m (p0, z1, z2)) ++ ++/* ++** orr_w0_s8_m_tied1: ++** mov (z[0-9]+\.b), w0 ++** orr z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (orr_w0_s8_m_tied1, svint8_t, int8_t, ++ z0 = svorr_n_s8_m (p0, z0, x0), ++ z0 = svorr_m (p0, z0, x0)) ++ ++/* ++** orr_w0_s8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), w0 ++** movprfx z0, z1 ++** orr z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (orr_w0_s8_m_untied, svint8_t, int8_t, ++ z0 = svorr_n_s8_m (p0, z1, x0), ++ z0 = svorr_m (p0, z1, x0)) ++ ++/* ++** orr_1_s8_m_tied1: ++** mov (z[0-9]+\.b), #1 ++** orr z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_1_s8_m_tied1, svint8_t, ++ z0 = svorr_n_s8_m (p0, z0, 1), ++ z0 = svorr_m (p0, z0, 1)) ++ ++/* ++** orr_1_s8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), #1 ++** movprfx z0, z1 ++** orr z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_1_s8_m_untied, svint8_t, ++ z0 = svorr_n_s8_m (p0, z1, 1), ++ z0 = svorr_m (p0, z1, 1)) ++ ++/* ++** orr_m2_s8_m: ++** mov (z[0-9]+\.b), #-2 ++** orr z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m2_s8_m, svint8_t, ++ z0 = svorr_n_s8_m (p0, z0, -2), ++ z0 = svorr_m (p0, z0, -2)) ++ ++/* ++** orr_s8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** orr z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (orr_s8_z_tied1, svint8_t, ++ z0 = svorr_s8_z (p0, z0, z1), ++ z0 = svorr_z (p0, z0, z1)) ++ ++/* ++** orr_s8_z_tied2: ++** movprfx z0\.b, p0/z, z0\.b ++** orr z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (orr_s8_z_tied2, svint8_t, ++ z0 = svorr_s8_z (p0, z1, z0), ++ z0 = svorr_z (p0, z1, z0)) ++ ++/* ++** orr_s8_z_untied: ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** orr z0\.b, p0/m, z0\.b, z2\.b ++** | ++** movprfx z0\.b, p0/z, z2\.b ++** orr z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (orr_s8_z_untied, svint8_t, ++ z0 = svorr_s8_z (p0, z1, z2), ++ z0 = svorr_z (p0, z1, z2)) ++ ++/* ++** orr_w0_s8_z_tied1: ++** mov (z[0-9]+\.b), w0 ++** movprfx z0\.b, p0/z, z0\.b ++** orr z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (orr_w0_s8_z_tied1, svint8_t, int8_t, ++ z0 = svorr_n_s8_z (p0, z0, x0), ++ z0 = svorr_z (p0, z0, x0)) ++ ++/* ++** orr_w0_s8_z_untied: ++** mov (z[0-9]+\.b), w0 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** orr z0\.b, p0/m, z0\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** orr z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (orr_w0_s8_z_untied, svint8_t, int8_t, ++ z0 = svorr_n_s8_z (p0, z1, x0), ++ z0 = svorr_z (p0, z1, x0)) ++ ++/* ++** orr_1_s8_z_tied1: ++** mov (z[0-9]+\.b), #1 ++** movprfx z0\.b, p0/z, z0\.b ++** orr z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_1_s8_z_tied1, svint8_t, ++ z0 = svorr_n_s8_z (p0, z0, 1), ++ z0 = svorr_z (p0, z0, 1)) ++ ++/* ++** orr_1_s8_z_untied: ++** mov (z[0-9]+\.b), #1 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** orr z0\.b, p0/m, z0\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** orr z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (orr_1_s8_z_untied, svint8_t, ++ z0 = svorr_n_s8_z (p0, z1, 1), ++ z0 = svorr_z (p0, z1, 1)) ++ ++/* ++** orr_s8_x_tied1: ++** orr z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (orr_s8_x_tied1, svint8_t, ++ z0 = svorr_s8_x (p0, z0, z1), ++ z0 = svorr_x (p0, z0, z1)) ++ ++/* ++** orr_s8_x_tied2: ++** orr z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (orr_s8_x_tied2, svint8_t, ++ z0 = svorr_s8_x (p0, z1, z0), ++ z0 = svorr_x (p0, z1, z0)) ++ ++/* ++** orr_s8_x_untied: ++** orr z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (orr_s8_x_untied, svint8_t, ++ z0 = svorr_s8_x (p0, z1, z2), ++ z0 = svorr_x (p0, z1, z2)) ++ ++/* ++** orr_w0_s8_x_tied1: ++** mov (z[0-9]+)\.b, w0 ++** orr z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (orr_w0_s8_x_tied1, svint8_t, int8_t, ++ z0 = svorr_n_s8_x (p0, z0, x0), ++ z0 = svorr_x (p0, z0, x0)) ++ ++/* ++** orr_w0_s8_x_untied: ++** mov (z[0-9]+)\.b, w0 ++** orr z0\.d, (z1\.d, \1\.d|\1\.d, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (orr_w0_s8_x_untied, svint8_t, int8_t, ++ z0 = svorr_n_s8_x (p0, z1, x0), ++ z0 = svorr_x (p0, z1, x0)) ++ ++/* ++** orr_1_s8_x_tied1: ++** orr z0\.b, z0\.b, #0x1 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_1_s8_x_tied1, svint8_t, ++ z0 = svorr_n_s8_x (p0, z0, 1), ++ z0 = svorr_x (p0, z0, 1)) ++ ++/* ++** orr_1_s8_x_untied: ++** movprfx z0, z1 ++** orr z0\.b, z0\.b, #0x1 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_1_s8_x_untied, svint8_t, ++ z0 = svorr_n_s8_x (p0, z1, 1), ++ z0 = svorr_x (p0, z1, 1)) ++ ++/* ++** orr_127_s8_x: ++** orr z0\.b, z0\.b, #0x7f ++** ret ++*/ ++TEST_UNIFORM_Z (orr_127_s8_x, svint8_t, ++ z0 = svorr_n_s8_x (p0, z0, 127), ++ z0 = svorr_x (p0, z0, 127)) ++ ++/* ++** orr_128_s8_x: ++** orr z0\.b, z0\.b, #0x80 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_128_s8_x, svint8_t, ++ z0 = svorr_n_s8_x (p0, z0, 128), ++ z0 = svorr_x (p0, z0, 128)) ++ ++/* ++** orr_255_s8_x: ++** mov z0\.b, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_255_s8_x, svint8_t, ++ z0 = svorr_n_s8_x (p0, z0, 255), ++ z0 = svorr_x (p0, z0, 255)) ++ ++/* ++** orr_m127_s8_x: ++** orr z0\.b, z0\.b, #0x81 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m127_s8_x, svint8_t, ++ z0 = svorr_n_s8_x (p0, z0, -127), ++ z0 = svorr_x (p0, z0, -127)) ++ ++/* ++** orr_m128_s8_x: ++** orr z0\.b, z0\.b, #0x80 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m128_s8_x, svint8_t, ++ z0 = svorr_n_s8_x (p0, z0, -128), ++ z0 = svorr_x (p0, z0, -128)) ++ ++/* ++** orr_5_s8_x: ++** mov (z[0-9]+)\.b, #5 ++** orr z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (orr_5_s8_x, svint8_t, ++ z0 = svorr_n_s8_x (p0, z0, 5), ++ z0 = svorr_x (p0, z0, 5)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_u16.c +new file mode 100644 +index 000000000..000a0444c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_u16.c +@@ -0,0 +1,376 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** orr_u16_m_tied1: ++** orr z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (orr_u16_m_tied1, svuint16_t, ++ z0 = svorr_u16_m (p0, z0, z1), ++ z0 = svorr_m (p0, z0, z1)) ++ ++/* ++** orr_u16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** orr z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (orr_u16_m_tied2, svuint16_t, ++ z0 = svorr_u16_m (p0, z1, z0), ++ z0 = svorr_m (p0, z1, z0)) ++ ++/* ++** orr_u16_m_untied: ++** movprfx z0, z1 ++** orr z0\.h, p0/m, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (orr_u16_m_untied, svuint16_t, ++ z0 = svorr_u16_m (p0, z1, z2), ++ z0 = svorr_m (p0, z1, z2)) ++ ++/* ++** orr_w0_u16_m_tied1: ++** mov (z[0-9]+\.h), w0 ++** orr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (orr_w0_u16_m_tied1, svuint16_t, uint16_t, ++ z0 = svorr_n_u16_m (p0, z0, x0), ++ z0 = svorr_m (p0, z0, x0)) ++ ++/* ++** orr_w0_u16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), w0 ++** movprfx z0, z1 ++** orr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (orr_w0_u16_m_untied, svuint16_t, uint16_t, ++ z0 = svorr_n_u16_m (p0, z1, x0), ++ z0 = svorr_m (p0, z1, x0)) ++ ++/* ++** orr_1_u16_m_tied1: ++** mov (z[0-9]+\.h), #1 ++** orr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_1_u16_m_tied1, svuint16_t, ++ z0 = svorr_n_u16_m (p0, z0, 1), ++ z0 = svorr_m (p0, z0, 1)) ++ ++/* ++** orr_1_u16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), #1 ++** movprfx z0, z1 ++** orr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_1_u16_m_untied, svuint16_t, ++ z0 = svorr_n_u16_m (p0, z1, 1), ++ z0 = svorr_m (p0, z1, 1)) ++ ++/* ++** orr_m2_u16_m: ++** mov (z[0-9]+\.h), #-2 ++** orr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m2_u16_m, svuint16_t, ++ z0 = svorr_n_u16_m (p0, z0, -2), ++ z0 = svorr_m (p0, z0, -2)) ++ ++/* ++** orr_u16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** orr z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (orr_u16_z_tied1, svuint16_t, ++ z0 = svorr_u16_z (p0, z0, z1), ++ z0 = svorr_z (p0, z0, z1)) ++ ++/* ++** orr_u16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** orr z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (orr_u16_z_tied2, svuint16_t, ++ z0 = svorr_u16_z (p0, z1, z0), ++ z0 = svorr_z (p0, z1, z0)) ++ ++/* ++** orr_u16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** orr z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** orr z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (orr_u16_z_untied, svuint16_t, ++ z0 = svorr_u16_z (p0, z1, z2), ++ z0 = svorr_z (p0, z1, z2)) ++ ++/* ++** orr_w0_u16_z_tied1: ++** mov (z[0-9]+\.h), w0 ++** movprfx z0\.h, p0/z, z0\.h ++** orr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (orr_w0_u16_z_tied1, svuint16_t, uint16_t, ++ z0 = svorr_n_u16_z (p0, z0, x0), ++ z0 = svorr_z (p0, z0, x0)) ++ ++/* ++** orr_w0_u16_z_untied: ++** mov (z[0-9]+\.h), w0 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** orr z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** orr z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (orr_w0_u16_z_untied, svuint16_t, uint16_t, ++ z0 = svorr_n_u16_z (p0, z1, x0), ++ z0 = svorr_z (p0, z1, x0)) ++ ++/* ++** orr_1_u16_z_tied1: ++** mov (z[0-9]+\.h), #1 ++** movprfx z0\.h, p0/z, z0\.h ++** orr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_1_u16_z_tied1, svuint16_t, ++ z0 = svorr_n_u16_z (p0, z0, 1), ++ z0 = svorr_z (p0, z0, 1)) ++ ++/* ++** orr_1_u16_z_untied: ++** mov (z[0-9]+\.h), #1 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** orr z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** orr z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (orr_1_u16_z_untied, svuint16_t, ++ z0 = svorr_n_u16_z (p0, z1, 1), ++ z0 = svorr_z (p0, z1, 1)) ++ ++/* ++** orr_u16_x_tied1: ++** orr z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (orr_u16_x_tied1, svuint16_t, ++ z0 = svorr_u16_x (p0, z0, z1), ++ z0 = svorr_x (p0, z0, z1)) ++ ++/* ++** orr_u16_x_tied2: ++** orr z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (orr_u16_x_tied2, svuint16_t, ++ z0 = svorr_u16_x (p0, z1, z0), ++ z0 = svorr_x (p0, z1, z0)) ++ ++/* ++** orr_u16_x_untied: ++** orr z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (orr_u16_x_untied, svuint16_t, ++ z0 = svorr_u16_x (p0, z1, z2), ++ z0 = svorr_x (p0, z1, z2)) ++ ++/* ++** orr_w0_u16_x_tied1: ++** mov (z[0-9]+)\.h, w0 ++** orr z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (orr_w0_u16_x_tied1, svuint16_t, uint16_t, ++ z0 = svorr_n_u16_x (p0, z0, x0), ++ z0 = svorr_x (p0, z0, x0)) ++ ++/* ++** orr_w0_u16_x_untied: ++** mov (z[0-9]+)\.h, w0 ++** orr z0\.d, (z1\.d, \1\.d|\1\.d, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (orr_w0_u16_x_untied, svuint16_t, uint16_t, ++ z0 = svorr_n_u16_x (p0, z1, x0), ++ z0 = svorr_x (p0, z1, x0)) ++ ++/* ++** orr_1_u16_x_tied1: ++** orr z0\.h, z0\.h, #0x1 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_1_u16_x_tied1, svuint16_t, ++ z0 = svorr_n_u16_x (p0, z0, 1), ++ z0 = svorr_x (p0, z0, 1)) ++ ++/* ++** orr_1_u16_x_untied: ++** movprfx z0, z1 ++** orr z0\.h, z0\.h, #0x1 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_1_u16_x_untied, svuint16_t, ++ z0 = svorr_n_u16_x (p0, z1, 1), ++ z0 = svorr_x (p0, z1, 1)) ++ ++/* ++** orr_127_u16_x: ++** orr z0\.h, z0\.h, #0x7f ++** ret ++*/ ++TEST_UNIFORM_Z (orr_127_u16_x, svuint16_t, ++ z0 = svorr_n_u16_x (p0, z0, 127), ++ z0 = svorr_x (p0, z0, 127)) ++ ++/* ++** orr_128_u16_x: ++** orr z0\.h, z0\.h, #0x80 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_128_u16_x, svuint16_t, ++ z0 = svorr_n_u16_x (p0, z0, 128), ++ z0 = svorr_x (p0, z0, 128)) ++ ++/* ++** orr_255_u16_x: ++** orr z0\.h, z0\.h, #0xff ++** ret ++*/ ++TEST_UNIFORM_Z (orr_255_u16_x, svuint16_t, ++ z0 = svorr_n_u16_x (p0, z0, 255), ++ z0 = svorr_x (p0, z0, 255)) ++ ++/* ++** orr_256_u16_x: ++** orr z0\.h, z0\.h, #0x100 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_256_u16_x, svuint16_t, ++ z0 = svorr_n_u16_x (p0, z0, 256), ++ z0 = svorr_x (p0, z0, 256)) ++ ++/* ++** orr_257_u16_x: ++** orr z0\.h, z0\.h, #0x101 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_257_u16_x, svuint16_t, ++ z0 = svorr_n_u16_x (p0, z0, 257), ++ z0 = svorr_x (p0, z0, 257)) ++ ++/* ++** orr_512_u16_x: ++** orr z0\.h, z0\.h, #0x200 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_512_u16_x, svuint16_t, ++ z0 = svorr_n_u16_x (p0, z0, 512), ++ z0 = svorr_x (p0, z0, 512)) ++ ++/* ++** orr_65280_u16_x: ++** orr z0\.h, z0\.h, #0xff00 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_65280_u16_x, svuint16_t, ++ z0 = svorr_n_u16_x (p0, z0, 0xff00), ++ z0 = svorr_x (p0, z0, 0xff00)) ++ ++/* ++** orr_m127_u16_x: ++** orr z0\.h, z0\.h, #0xff81 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m127_u16_x, svuint16_t, ++ z0 = svorr_n_u16_x (p0, z0, -127), ++ z0 = svorr_x (p0, z0, -127)) ++ ++/* ++** orr_m128_u16_x: ++** orr z0\.h, z0\.h, #0xff80 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m128_u16_x, svuint16_t, ++ z0 = svorr_n_u16_x (p0, z0, -128), ++ z0 = svorr_x (p0, z0, -128)) ++ ++/* ++** orr_m255_u16_x: ++** orr z0\.h, z0\.h, #0xff01 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m255_u16_x, svuint16_t, ++ z0 = svorr_n_u16_x (p0, z0, -255), ++ z0 = svorr_x (p0, z0, -255)) ++ ++/* ++** orr_m256_u16_x: ++** orr z0\.h, z0\.h, #0xff00 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m256_u16_x, svuint16_t, ++ z0 = svorr_n_u16_x (p0, z0, -256), ++ z0 = svorr_x (p0, z0, -256)) ++ ++/* ++** orr_m257_u16_x: ++** orr z0\.h, z0\.h, #0xfeff ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m257_u16_x, svuint16_t, ++ z0 = svorr_n_u16_x (p0, z0, -257), ++ z0 = svorr_x (p0, z0, -257)) ++ ++/* ++** orr_m512_u16_x: ++** orr z0\.h, z0\.h, #0xfe00 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m512_u16_x, svuint16_t, ++ z0 = svorr_n_u16_x (p0, z0, -512), ++ z0 = svorr_x (p0, z0, -512)) ++ ++/* ++** orr_m32768_u16_x: ++** orr z0\.h, z0\.h, #0x8000 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m32768_u16_x, svuint16_t, ++ z0 = svorr_n_u16_x (p0, z0, -0x8000), ++ z0 = svorr_x (p0, z0, -0x8000)) ++ ++/* ++** orr_5_u16_x: ++** mov (z[0-9]+)\.h, #5 ++** orr z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (orr_5_u16_x, svuint16_t, ++ z0 = svorr_n_u16_x (p0, z0, 5), ++ z0 = svorr_x (p0, z0, 5)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_u32.c +new file mode 100644 +index 000000000..8e2351d16 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_u32.c +@@ -0,0 +1,372 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** orr_u32_m_tied1: ++** orr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (orr_u32_m_tied1, svuint32_t, ++ z0 = svorr_u32_m (p0, z0, z1), ++ z0 = svorr_m (p0, z0, z1)) ++ ++/* ++** orr_u32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** orr z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (orr_u32_m_tied2, svuint32_t, ++ z0 = svorr_u32_m (p0, z1, z0), ++ z0 = svorr_m (p0, z1, z0)) ++ ++/* ++** orr_u32_m_untied: ++** movprfx z0, z1 ++** orr z0\.s, p0/m, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (orr_u32_m_untied, svuint32_t, ++ z0 = svorr_u32_m (p0, z1, z2), ++ z0 = svorr_m (p0, z1, z2)) ++ ++/* ++** orr_w0_u32_m_tied1: ++** mov (z[0-9]+\.s), w0 ++** orr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (orr_w0_u32_m_tied1, svuint32_t, uint32_t, ++ z0 = svorr_n_u32_m (p0, z0, x0), ++ z0 = svorr_m (p0, z0, x0)) ++ ++/* ++** orr_w0_u32_m_untied: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0, z1 ++** orr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (orr_w0_u32_m_untied, svuint32_t, uint32_t, ++ z0 = svorr_n_u32_m (p0, z1, x0), ++ z0 = svorr_m (p0, z1, x0)) ++ ++/* ++** orr_1_u32_m_tied1: ++** mov (z[0-9]+\.s), #1 ++** orr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_1_u32_m_tied1, svuint32_t, ++ z0 = svorr_n_u32_m (p0, z0, 1), ++ z0 = svorr_m (p0, z0, 1)) ++ ++/* ++** orr_1_u32_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.s), #1 ++** movprfx z0, z1 ++** orr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_1_u32_m_untied, svuint32_t, ++ z0 = svorr_n_u32_m (p0, z1, 1), ++ z0 = svorr_m (p0, z1, 1)) ++ ++/* ++** orr_m2_u32_m: ++** mov (z[0-9]+\.s), #-2 ++** orr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m2_u32_m, svuint32_t, ++ z0 = svorr_n_u32_m (p0, z0, -2), ++ z0 = svorr_m (p0, z0, -2)) ++ ++/* ++** orr_u32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** orr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (orr_u32_z_tied1, svuint32_t, ++ z0 = svorr_u32_z (p0, z0, z1), ++ z0 = svorr_z (p0, z0, z1)) ++ ++/* ++** orr_u32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** orr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (orr_u32_z_tied2, svuint32_t, ++ z0 = svorr_u32_z (p0, z1, z0), ++ z0 = svorr_z (p0, z1, z0)) ++ ++/* ++** orr_u32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** orr z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** orr z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (orr_u32_z_untied, svuint32_t, ++ z0 = svorr_u32_z (p0, z1, z2), ++ z0 = svorr_z (p0, z1, z2)) ++ ++/* ++** orr_w0_u32_z_tied1: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0\.s, p0/z, z0\.s ++** orr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (orr_w0_u32_z_tied1, svuint32_t, uint32_t, ++ z0 = svorr_n_u32_z (p0, z0, x0), ++ z0 = svorr_z (p0, z0, x0)) ++ ++/* ++** orr_w0_u32_z_untied: ++** mov (z[0-9]+\.s), w0 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** orr z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** orr z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (orr_w0_u32_z_untied, svuint32_t, uint32_t, ++ z0 = svorr_n_u32_z (p0, z1, x0), ++ z0 = svorr_z (p0, z1, x0)) ++ ++/* ++** orr_1_u32_z_tied1: ++** mov (z[0-9]+\.s), #1 ++** movprfx z0\.s, p0/z, z0\.s ++** orr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_1_u32_z_tied1, svuint32_t, ++ z0 = svorr_n_u32_z (p0, z0, 1), ++ z0 = svorr_z (p0, z0, 1)) ++ ++/* ++** orr_1_u32_z_untied: ++** mov (z[0-9]+\.s), #1 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** orr z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** orr z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (orr_1_u32_z_untied, svuint32_t, ++ z0 = svorr_n_u32_z (p0, z1, 1), ++ z0 = svorr_z (p0, z1, 1)) ++ ++/* ++** orr_u32_x_tied1: ++** orr z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (orr_u32_x_tied1, svuint32_t, ++ z0 = svorr_u32_x (p0, z0, z1), ++ z0 = svorr_x (p0, z0, z1)) ++ ++/* ++** orr_u32_x_tied2: ++** orr z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (orr_u32_x_tied2, svuint32_t, ++ z0 = svorr_u32_x (p0, z1, z0), ++ z0 = svorr_x (p0, z1, z0)) ++ ++/* ++** orr_u32_x_untied: ++** orr z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (orr_u32_x_untied, svuint32_t, ++ z0 = svorr_u32_x (p0, z1, z2), ++ z0 = svorr_x (p0, z1, z2)) ++ ++/* ++** orr_w0_u32_x_tied1: ++** mov (z[0-9]+)\.s, w0 ++** orr z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (orr_w0_u32_x_tied1, svuint32_t, uint32_t, ++ z0 = svorr_n_u32_x (p0, z0, x0), ++ z0 = svorr_x (p0, z0, x0)) ++ ++/* ++** orr_w0_u32_x_untied: ++** mov (z[0-9]+)\.s, w0 ++** orr z0\.d, (z1\.d, \1\.d|\1\.d, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (orr_w0_u32_x_untied, svuint32_t, uint32_t, ++ z0 = svorr_n_u32_x (p0, z1, x0), ++ z0 = svorr_x (p0, z1, x0)) ++ ++/* ++** orr_1_u32_x_tied1: ++** orr z0\.s, z0\.s, #0x1 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_1_u32_x_tied1, svuint32_t, ++ z0 = svorr_n_u32_x (p0, z0, 1), ++ z0 = svorr_x (p0, z0, 1)) ++ ++/* ++** orr_1_u32_x_untied: ++** movprfx z0, z1 ++** orr z0\.s, z0\.s, #0x1 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_1_u32_x_untied, svuint32_t, ++ z0 = svorr_n_u32_x (p0, z1, 1), ++ z0 = svorr_x (p0, z1, 1)) ++ ++/* ++** orr_127_u32_x: ++** orr z0\.s, z0\.s, #0x7f ++** ret ++*/ ++TEST_UNIFORM_Z (orr_127_u32_x, svuint32_t, ++ z0 = svorr_n_u32_x (p0, z0, 127), ++ z0 = svorr_x (p0, z0, 127)) ++ ++/* ++** orr_128_u32_x: ++** orr z0\.s, z0\.s, #0x80 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_128_u32_x, svuint32_t, ++ z0 = svorr_n_u32_x (p0, z0, 128), ++ z0 = svorr_x (p0, z0, 128)) ++ ++/* ++** orr_255_u32_x: ++** orr z0\.s, z0\.s, #0xff ++** ret ++*/ ++TEST_UNIFORM_Z (orr_255_u32_x, svuint32_t, ++ z0 = svorr_n_u32_x (p0, z0, 255), ++ z0 = svorr_x (p0, z0, 255)) ++ ++/* ++** orr_256_u32_x: ++** orr z0\.s, z0\.s, #0x100 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_256_u32_x, svuint32_t, ++ z0 = svorr_n_u32_x (p0, z0, 256), ++ z0 = svorr_x (p0, z0, 256)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (orr_257_u32_x, svuint32_t, ++ z0 = svorr_n_u32_x (p0, z0, 257), ++ z0 = svorr_x (p0, z0, 257)) ++ ++/* ++** orr_512_u32_x: ++** orr z0\.s, z0\.s, #0x200 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_512_u32_x, svuint32_t, ++ z0 = svorr_n_u32_x (p0, z0, 512), ++ z0 = svorr_x (p0, z0, 512)) ++ ++/* ++** orr_65280_u32_x: ++** orr z0\.s, z0\.s, #0xff00 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_65280_u32_x, svuint32_t, ++ z0 = svorr_n_u32_x (p0, z0, 0xff00), ++ z0 = svorr_x (p0, z0, 0xff00)) ++ ++/* ++** orr_m127_u32_x: ++** orr z0\.s, z0\.s, #0xffffff81 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m127_u32_x, svuint32_t, ++ z0 = svorr_n_u32_x (p0, z0, -127), ++ z0 = svorr_x (p0, z0, -127)) ++ ++/* ++** orr_m128_u32_x: ++** orr z0\.s, z0\.s, #0xffffff80 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m128_u32_x, svuint32_t, ++ z0 = svorr_n_u32_x (p0, z0, -128), ++ z0 = svorr_x (p0, z0, -128)) ++ ++/* ++** orr_m255_u32_x: ++** orr z0\.s, z0\.s, #0xffffff01 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m255_u32_x, svuint32_t, ++ z0 = svorr_n_u32_x (p0, z0, -255), ++ z0 = svorr_x (p0, z0, -255)) ++ ++/* ++** orr_m256_u32_x: ++** orr z0\.s, z0\.s, #0xffffff00 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m256_u32_x, svuint32_t, ++ z0 = svorr_n_u32_x (p0, z0, -256), ++ z0 = svorr_x (p0, z0, -256)) ++ ++/* ++** orr_m257_u32_x: ++** orr z0\.s, z0\.s, #0xfffffeff ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m257_u32_x, svuint32_t, ++ z0 = svorr_n_u32_x (p0, z0, -257), ++ z0 = svorr_x (p0, z0, -257)) ++ ++/* ++** orr_m512_u32_x: ++** orr z0\.s, z0\.s, #0xfffffe00 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m512_u32_x, svuint32_t, ++ z0 = svorr_n_u32_x (p0, z0, -512), ++ z0 = svorr_x (p0, z0, -512)) ++ ++/* ++** orr_m32768_u32_x: ++** orr z0\.s, z0\.s, #0xffff8000 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m32768_u32_x, svuint32_t, ++ z0 = svorr_n_u32_x (p0, z0, -0x8000), ++ z0 = svorr_x (p0, z0, -0x8000)) ++ ++/* ++** orr_5_u32_x: ++** mov (z[0-9]+)\.s, #5 ++** orr z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (orr_5_u32_x, svuint32_t, ++ z0 = svorr_n_u32_x (p0, z0, 5), ++ z0 = svorr_x (p0, z0, 5)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_u64.c +new file mode 100644 +index 000000000..323e2101e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_u64.c +@@ -0,0 +1,372 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** orr_u64_m_tied1: ++** orr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (orr_u64_m_tied1, svuint64_t, ++ z0 = svorr_u64_m (p0, z0, z1), ++ z0 = svorr_m (p0, z0, z1)) ++ ++/* ++** orr_u64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** orr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_u64_m_tied2, svuint64_t, ++ z0 = svorr_u64_m (p0, z1, z0), ++ z0 = svorr_m (p0, z1, z0)) ++ ++/* ++** orr_u64_m_untied: ++** movprfx z0, z1 ++** orr z0\.d, p0/m, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (orr_u64_m_untied, svuint64_t, ++ z0 = svorr_u64_m (p0, z1, z2), ++ z0 = svorr_m (p0, z1, z2)) ++ ++/* ++** orr_x0_u64_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** orr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (orr_x0_u64_m_tied1, svuint64_t, uint64_t, ++ z0 = svorr_n_u64_m (p0, z0, x0), ++ z0 = svorr_m (p0, z0, x0)) ++ ++/* ++** orr_x0_u64_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** orr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (orr_x0_u64_m_untied, svuint64_t, uint64_t, ++ z0 = svorr_n_u64_m (p0, z1, x0), ++ z0 = svorr_m (p0, z1, x0)) ++ ++/* ++** orr_1_u64_m_tied1: ++** mov (z[0-9]+\.d), #1 ++** orr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_1_u64_m_tied1, svuint64_t, ++ z0 = svorr_n_u64_m (p0, z0, 1), ++ z0 = svorr_m (p0, z0, 1)) ++ ++/* ++** orr_1_u64_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), #1 ++** movprfx z0, z1 ++** orr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_1_u64_m_untied, svuint64_t, ++ z0 = svorr_n_u64_m (p0, z1, 1), ++ z0 = svorr_m (p0, z1, 1)) ++ ++/* ++** orr_m2_u64_m: ++** mov (z[0-9]+\.d), #-2 ++** orr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m2_u64_m, svuint64_t, ++ z0 = svorr_n_u64_m (p0, z0, -2), ++ z0 = svorr_m (p0, z0, -2)) ++ ++/* ++** orr_u64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** orr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (orr_u64_z_tied1, svuint64_t, ++ z0 = svorr_u64_z (p0, z0, z1), ++ z0 = svorr_z (p0, z0, z1)) ++ ++/* ++** orr_u64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** orr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (orr_u64_z_tied2, svuint64_t, ++ z0 = svorr_u64_z (p0, z1, z0), ++ z0 = svorr_z (p0, z1, z0)) ++ ++/* ++** orr_u64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** orr z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** orr z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (orr_u64_z_untied, svuint64_t, ++ z0 = svorr_u64_z (p0, z1, z2), ++ z0 = svorr_z (p0, z1, z2)) ++ ++/* ++** orr_x0_u64_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.d, p0/z, z0\.d ++** orr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (orr_x0_u64_z_tied1, svuint64_t, uint64_t, ++ z0 = svorr_n_u64_z (p0, z0, x0), ++ z0 = svorr_z (p0, z0, x0)) ++ ++/* ++** orr_x0_u64_z_untied: ++** mov (z[0-9]+\.d), x0 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** orr z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** orr z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (orr_x0_u64_z_untied, svuint64_t, uint64_t, ++ z0 = svorr_n_u64_z (p0, z1, x0), ++ z0 = svorr_z (p0, z1, x0)) ++ ++/* ++** orr_1_u64_z_tied1: ++** mov (z[0-9]+\.d), #1 ++** movprfx z0\.d, p0/z, z0\.d ++** orr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_1_u64_z_tied1, svuint64_t, ++ z0 = svorr_n_u64_z (p0, z0, 1), ++ z0 = svorr_z (p0, z0, 1)) ++ ++/* ++** orr_1_u64_z_untied: ++** mov (z[0-9]+\.d), #1 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** orr z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** orr z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (orr_1_u64_z_untied, svuint64_t, ++ z0 = svorr_n_u64_z (p0, z1, 1), ++ z0 = svorr_z (p0, z1, 1)) ++ ++/* ++** orr_u64_x_tied1: ++** orr z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (orr_u64_x_tied1, svuint64_t, ++ z0 = svorr_u64_x (p0, z0, z1), ++ z0 = svorr_x (p0, z0, z1)) ++ ++/* ++** orr_u64_x_tied2: ++** orr z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (orr_u64_x_tied2, svuint64_t, ++ z0 = svorr_u64_x (p0, z1, z0), ++ z0 = svorr_x (p0, z1, z0)) ++ ++/* ++** orr_u64_x_untied: ++** orr z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (orr_u64_x_untied, svuint64_t, ++ z0 = svorr_u64_x (p0, z1, z2), ++ z0 = svorr_x (p0, z1, z2)) ++ ++/* ++** orr_x0_u64_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** orr z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (orr_x0_u64_x_tied1, svuint64_t, uint64_t, ++ z0 = svorr_n_u64_x (p0, z0, x0), ++ z0 = svorr_x (p0, z0, x0)) ++ ++/* ++** orr_x0_u64_x_untied: ++** mov (z[0-9]+\.d), x0 ++** orr z0\.d, (z1\.d, \1|\1, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (orr_x0_u64_x_untied, svuint64_t, uint64_t, ++ z0 = svorr_n_u64_x (p0, z1, x0), ++ z0 = svorr_x (p0, z1, x0)) ++ ++/* ++** orr_1_u64_x_tied1: ++** orr z0\.d, z0\.d, #0x1 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_1_u64_x_tied1, svuint64_t, ++ z0 = svorr_n_u64_x (p0, z0, 1), ++ z0 = svorr_x (p0, z0, 1)) ++ ++/* ++** orr_1_u64_x_untied: ++** movprfx z0, z1 ++** orr z0\.d, z0\.d, #0x1 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_1_u64_x_untied, svuint64_t, ++ z0 = svorr_n_u64_x (p0, z1, 1), ++ z0 = svorr_x (p0, z1, 1)) ++ ++/* ++** orr_127_u64_x: ++** orr z0\.d, z0\.d, #0x7f ++** ret ++*/ ++TEST_UNIFORM_Z (orr_127_u64_x, svuint64_t, ++ z0 = svorr_n_u64_x (p0, z0, 127), ++ z0 = svorr_x (p0, z0, 127)) ++ ++/* ++** orr_128_u64_x: ++** orr z0\.d, z0\.d, #0x80 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_128_u64_x, svuint64_t, ++ z0 = svorr_n_u64_x (p0, z0, 128), ++ z0 = svorr_x (p0, z0, 128)) ++ ++/* ++** orr_255_u64_x: ++** orr z0\.d, z0\.d, #0xff ++** ret ++*/ ++TEST_UNIFORM_Z (orr_255_u64_x, svuint64_t, ++ z0 = svorr_n_u64_x (p0, z0, 255), ++ z0 = svorr_x (p0, z0, 255)) ++ ++/* ++** orr_256_u64_x: ++** orr z0\.d, z0\.d, #0x100 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_256_u64_x, svuint64_t, ++ z0 = svorr_n_u64_x (p0, z0, 256), ++ z0 = svorr_x (p0, z0, 256)) ++ ++/* TODO: Bad code and needs fixing. */ ++TEST_UNIFORM_Z (orr_257_u64_x, svuint64_t, ++ z0 = svorr_n_u64_x (p0, z0, 257), ++ z0 = svorr_x (p0, z0, 257)) ++ ++/* ++** orr_512_u64_x: ++** orr z0\.d, z0\.d, #0x200 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_512_u64_x, svuint64_t, ++ z0 = svorr_n_u64_x (p0, z0, 512), ++ z0 = svorr_x (p0, z0, 512)) ++ ++/* ++** orr_65280_u64_x: ++** orr z0\.d, z0\.d, #0xff00 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_65280_u64_x, svuint64_t, ++ z0 = svorr_n_u64_x (p0, z0, 0xff00), ++ z0 = svorr_x (p0, z0, 0xff00)) ++ ++/* ++** orr_m127_u64_x: ++** orr z0\.d, z0\.d, #0xffffffffffffff81 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m127_u64_x, svuint64_t, ++ z0 = svorr_n_u64_x (p0, z0, -127), ++ z0 = svorr_x (p0, z0, -127)) ++ ++/* ++** orr_m128_u64_x: ++** orr z0\.d, z0\.d, #0xffffffffffffff80 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m128_u64_x, svuint64_t, ++ z0 = svorr_n_u64_x (p0, z0, -128), ++ z0 = svorr_x (p0, z0, -128)) ++ ++/* ++** orr_m255_u64_x: ++** orr z0\.d, z0\.d, #0xffffffffffffff01 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m255_u64_x, svuint64_t, ++ z0 = svorr_n_u64_x (p0, z0, -255), ++ z0 = svorr_x (p0, z0, -255)) ++ ++/* ++** orr_m256_u64_x: ++** orr z0\.d, z0\.d, #0xffffffffffffff00 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m256_u64_x, svuint64_t, ++ z0 = svorr_n_u64_x (p0, z0, -256), ++ z0 = svorr_x (p0, z0, -256)) ++ ++/* ++** orr_m257_u64_x: ++** orr z0\.d, z0\.d, #0xfffffffffffffeff ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m257_u64_x, svuint64_t, ++ z0 = svorr_n_u64_x (p0, z0, -257), ++ z0 = svorr_x (p0, z0, -257)) ++ ++/* ++** orr_m512_u64_x: ++** orr z0\.d, z0\.d, #0xfffffffffffffe00 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m512_u64_x, svuint64_t, ++ z0 = svorr_n_u64_x (p0, z0, -512), ++ z0 = svorr_x (p0, z0, -512)) ++ ++/* ++** orr_m32768_u64_x: ++** orr z0\.d, z0\.d, #0xffffffffffff8000 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m32768_u64_x, svuint64_t, ++ z0 = svorr_n_u64_x (p0, z0, -0x8000), ++ z0 = svorr_x (p0, z0, -0x8000)) ++ ++/* ++** orr_5_u64_x: ++** mov (z[0-9]+\.d), #5 ++** orr z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (orr_5_u64_x, svuint64_t, ++ z0 = svorr_n_u64_x (p0, z0, 5), ++ z0 = svorr_x (p0, z0, 5)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_u8.c +new file mode 100644 +index 000000000..efe5591b4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_u8.c +@@ -0,0 +1,295 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** orr_u8_m_tied1: ++** orr z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (orr_u8_m_tied1, svuint8_t, ++ z0 = svorr_u8_m (p0, z0, z1), ++ z0 = svorr_m (p0, z0, z1)) ++ ++/* ++** orr_u8_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** orr z0\.b, p0/m, z0\.b, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (orr_u8_m_tied2, svuint8_t, ++ z0 = svorr_u8_m (p0, z1, z0), ++ z0 = svorr_m (p0, z1, z0)) ++ ++/* ++** orr_u8_m_untied: ++** movprfx z0, z1 ++** orr z0\.b, p0/m, z0\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (orr_u8_m_untied, svuint8_t, ++ z0 = svorr_u8_m (p0, z1, z2), ++ z0 = svorr_m (p0, z1, z2)) ++ ++/* ++** orr_w0_u8_m_tied1: ++** mov (z[0-9]+\.b), w0 ++** orr z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (orr_w0_u8_m_tied1, svuint8_t, uint8_t, ++ z0 = svorr_n_u8_m (p0, z0, x0), ++ z0 = svorr_m (p0, z0, x0)) ++ ++/* ++** orr_w0_u8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), w0 ++** movprfx z0, z1 ++** orr z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (orr_w0_u8_m_untied, svuint8_t, uint8_t, ++ z0 = svorr_n_u8_m (p0, z1, x0), ++ z0 = svorr_m (p0, z1, x0)) ++ ++/* ++** orr_1_u8_m_tied1: ++** mov (z[0-9]+\.b), #1 ++** orr z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_1_u8_m_tied1, svuint8_t, ++ z0 = svorr_n_u8_m (p0, z0, 1), ++ z0 = svorr_m (p0, z0, 1)) ++ ++/* ++** orr_1_u8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), #1 ++** movprfx z0, z1 ++** orr z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_1_u8_m_untied, svuint8_t, ++ z0 = svorr_n_u8_m (p0, z1, 1), ++ z0 = svorr_m (p0, z1, 1)) ++ ++/* ++** orr_m2_u8_m: ++** mov (z[0-9]+\.b), #-2 ++** orr z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m2_u8_m, svuint8_t, ++ z0 = svorr_n_u8_m (p0, z0, -2), ++ z0 = svorr_m (p0, z0, -2)) ++ ++/* ++** orr_u8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** orr z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (orr_u8_z_tied1, svuint8_t, ++ z0 = svorr_u8_z (p0, z0, z1), ++ z0 = svorr_z (p0, z0, z1)) ++ ++/* ++** orr_u8_z_tied2: ++** movprfx z0\.b, p0/z, z0\.b ++** orr z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (orr_u8_z_tied2, svuint8_t, ++ z0 = svorr_u8_z (p0, z1, z0), ++ z0 = svorr_z (p0, z1, z0)) ++ ++/* ++** orr_u8_z_untied: ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** orr z0\.b, p0/m, z0\.b, z2\.b ++** | ++** movprfx z0\.b, p0/z, z2\.b ++** orr z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (orr_u8_z_untied, svuint8_t, ++ z0 = svorr_u8_z (p0, z1, z2), ++ z0 = svorr_z (p0, z1, z2)) ++ ++/* ++** orr_w0_u8_z_tied1: ++** mov (z[0-9]+\.b), w0 ++** movprfx z0\.b, p0/z, z0\.b ++** orr z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (orr_w0_u8_z_tied1, svuint8_t, uint8_t, ++ z0 = svorr_n_u8_z (p0, z0, x0), ++ z0 = svorr_z (p0, z0, x0)) ++ ++/* ++** orr_w0_u8_z_untied: ++** mov (z[0-9]+\.b), w0 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** orr z0\.b, p0/m, z0\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** orr z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (orr_w0_u8_z_untied, svuint8_t, uint8_t, ++ z0 = svorr_n_u8_z (p0, z1, x0), ++ z0 = svorr_z (p0, z1, x0)) ++ ++/* ++** orr_1_u8_z_tied1: ++** mov (z[0-9]+\.b), #1 ++** movprfx z0\.b, p0/z, z0\.b ++** orr z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_1_u8_z_tied1, svuint8_t, ++ z0 = svorr_n_u8_z (p0, z0, 1), ++ z0 = svorr_z (p0, z0, 1)) ++ ++/* ++** orr_1_u8_z_untied: ++** mov (z[0-9]+\.b), #1 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** orr z0\.b, p0/m, z0\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** orr z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (orr_1_u8_z_untied, svuint8_t, ++ z0 = svorr_n_u8_z (p0, z1, 1), ++ z0 = svorr_z (p0, z1, 1)) ++ ++/* ++** orr_u8_x_tied1: ++** orr z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (orr_u8_x_tied1, svuint8_t, ++ z0 = svorr_u8_x (p0, z0, z1), ++ z0 = svorr_x (p0, z0, z1)) ++ ++/* ++** orr_u8_x_tied2: ++** orr z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (orr_u8_x_tied2, svuint8_t, ++ z0 = svorr_u8_x (p0, z1, z0), ++ z0 = svorr_x (p0, z1, z0)) ++ ++/* ++** orr_u8_x_untied: ++** orr z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (orr_u8_x_untied, svuint8_t, ++ z0 = svorr_u8_x (p0, z1, z2), ++ z0 = svorr_x (p0, z1, z2)) ++ ++/* ++** orr_w0_u8_x_tied1: ++** mov (z[0-9]+)\.b, w0 ++** orr z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (orr_w0_u8_x_tied1, svuint8_t, uint8_t, ++ z0 = svorr_n_u8_x (p0, z0, x0), ++ z0 = svorr_x (p0, z0, x0)) ++ ++/* ++** orr_w0_u8_x_untied: ++** mov (z[0-9]+)\.b, w0 ++** orr z0\.d, (z1\.d, \1\.d|\1\.d, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (orr_w0_u8_x_untied, svuint8_t, uint8_t, ++ z0 = svorr_n_u8_x (p0, z1, x0), ++ z0 = svorr_x (p0, z1, x0)) ++ ++/* ++** orr_1_u8_x_tied1: ++** orr z0\.b, z0\.b, #0x1 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_1_u8_x_tied1, svuint8_t, ++ z0 = svorr_n_u8_x (p0, z0, 1), ++ z0 = svorr_x (p0, z0, 1)) ++ ++/* ++** orr_1_u8_x_untied: ++** movprfx z0, z1 ++** orr z0\.b, z0\.b, #0x1 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_1_u8_x_untied, svuint8_t, ++ z0 = svorr_n_u8_x (p0, z1, 1), ++ z0 = svorr_x (p0, z1, 1)) ++ ++/* ++** orr_127_u8_x: ++** orr z0\.b, z0\.b, #0x7f ++** ret ++*/ ++TEST_UNIFORM_Z (orr_127_u8_x, svuint8_t, ++ z0 = svorr_n_u8_x (p0, z0, 127), ++ z0 = svorr_x (p0, z0, 127)) ++ ++/* ++** orr_128_u8_x: ++** orr z0\.b, z0\.b, #0x80 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_128_u8_x, svuint8_t, ++ z0 = svorr_n_u8_x (p0, z0, 128), ++ z0 = svorr_x (p0, z0, 128)) ++ ++/* ++** orr_255_u8_x: ++** mov z0\.b, #-1 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_255_u8_x, svuint8_t, ++ z0 = svorr_n_u8_x (p0, z0, 255), ++ z0 = svorr_x (p0, z0, 255)) ++ ++/* ++** orr_m127_u8_x: ++** orr z0\.b, z0\.b, #0x81 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m127_u8_x, svuint8_t, ++ z0 = svorr_n_u8_x (p0, z0, -127), ++ z0 = svorr_x (p0, z0, -127)) ++ ++/* ++** orr_m128_u8_x: ++** orr z0\.b, z0\.b, #0x80 ++** ret ++*/ ++TEST_UNIFORM_Z (orr_m128_u8_x, svuint8_t, ++ z0 = svorr_n_u8_x (p0, z0, -128), ++ z0 = svorr_x (p0, z0, -128)) ++ ++/* ++** orr_5_u8_x: ++** mov (z[0-9]+)\.b, #5 ++** orr z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (orr_5_u8_x, svuint8_t, ++ z0 = svorr_n_u8_x (p0, z0, 5), ++ z0 = svorr_x (p0, z0, 5)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orv_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orv_s16.c +new file mode 100644 +index 000000000..c9b268d3d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orv_s16.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** orv_x0_s16: ++** orv h([0-9]+), p0, z0\.h ++** umov w0, v\1\.h\[0\] ++** ret ++*/ ++TEST_REDUCTION_X (orv_x0_s16, int16_t, svint16_t, ++ x0 = svorv_s16 (p0, z0), ++ x0 = svorv (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orv_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orv_s32.c +new file mode 100644 +index 000000000..df4025f54 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orv_s32.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** orv_x0_s32: ++** orv (s[0-9]+), p0, z0\.s ++** fmov w0, \1 ++** ret ++*/ ++TEST_REDUCTION_X (orv_x0_s32, int32_t, svint32_t, ++ x0 = svorv_s32 (p0, z0), ++ x0 = svorv (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orv_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orv_s64.c +new file mode 100644 +index 000000000..76a835ce3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orv_s64.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** orv_x0_s64: ++** orv (d[0-9]+), p0, z0\.d ++** fmov x0, \1 ++** ret ++*/ ++TEST_REDUCTION_X (orv_x0_s64, int64_t, svint64_t, ++ x0 = svorv_s64 (p0, z0), ++ x0 = svorv (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orv_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orv_s8.c +new file mode 100644 +index 000000000..3f2031d9c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orv_s8.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** orv_x0_s8: ++** orv b([0-9]+), p0, z0\.b ++** umov w0, v\1\.b\[0\] ++** ret ++*/ ++TEST_REDUCTION_X (orv_x0_s8, int8_t, svint8_t, ++ x0 = svorv_s8 (p0, z0), ++ x0 = svorv (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orv_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orv_u16.c +new file mode 100644 +index 000000000..28bfbecb0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orv_u16.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** orv_x0_u16: ++** orv h([0-9]+), p0, z0\.h ++** umov w0, v\1\.h\[0\] ++** ret ++*/ ++TEST_REDUCTION_X (orv_x0_u16, uint16_t, svuint16_t, ++ x0 = svorv_u16 (p0, z0), ++ x0 = svorv (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orv_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orv_u32.c +new file mode 100644 +index 000000000..1988d5623 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orv_u32.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** orv_x0_u32: ++** orv (s[0-9]+), p0, z0\.s ++** fmov w0, \1 ++** ret ++*/ ++TEST_REDUCTION_X (orv_x0_u32, uint32_t, svuint32_t, ++ x0 = svorv_u32 (p0, z0), ++ x0 = svorv (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orv_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orv_u64.c +new file mode 100644 +index 000000000..c8a8429a7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orv_u64.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** orv_x0_u64: ++** orv (d[0-9]+), p0, z0\.d ++** fmov x0, \1 ++** ret ++*/ ++TEST_REDUCTION_X (orv_x0_u64, uint64_t, svuint64_t, ++ x0 = svorv_u64 (p0, z0), ++ x0 = svorv (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orv_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orv_u8.c +new file mode 100644 +index 000000000..bcab32d8b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orv_u8.c +@@ -0,0 +1,13 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** orv_x0_u8: ++** orv b([0-9]+), p0, z0\.b ++** umov w0, v\1\.b\[0\] ++** ret ++*/ ++TEST_REDUCTION_X (orv_x0_u8, uint8_t, svuint8_t, ++ x0 = svorv_u8 (p0, z0), ++ x0 = svorv (p0, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pfalse.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pfalse.c +new file mode 100644 +index 000000000..a74a59283 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pfalse.c +@@ -0,0 +1,13 @@ ++/* { dg-additional-options "-msve-vector-bits=scalable" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** pfalse_b: ++** pfalse p0\.b ++** ret ++*/ ++TEST_P (pfalse_b, ++ p0 = svpfalse_b (), ++ p0 = svpfalse ()); +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pfirst_b.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pfirst_b.c +new file mode 100644 +index 000000000..a32099656 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pfirst_b.c +@@ -0,0 +1,22 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** pfirst_b_tied1: ++** pfirst p0\.b, p3, p0\.b ++** ret ++*/ ++TEST_UNIFORM_P (pfirst_b_tied1, ++ p0 = svpfirst_b (p3, p0), ++ p0 = svpfirst (p3, p0)) ++ ++/* ++** pfirst_b_untied: ++** mov p0\.b, p1\.b ++** pfirst p0\.b, p3, p0\.b ++** ret ++*/ ++TEST_UNIFORM_P (pfirst_b_untied, ++ p0 = svpfirst_b (p3, p1), ++ p0 = svpfirst (p3, p1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b16.c +new file mode 100644 +index 000000000..ad0efe5e7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b16.c +@@ -0,0 +1,22 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** pnext_b16_tied1: ++** pnext p0\.h, p3, p0\.h ++** ret ++*/ ++TEST_UNIFORM_P (pnext_b16_tied1, ++ p0 = svpnext_b16 (p3, p0), ++ p0 = svpnext_b16 (p3, p0)) ++ ++/* ++** pnext_b16_untied: ++** mov p0\.b, p1\.b ++** pnext p0\.h, p3, p0\.h ++** ret ++*/ ++TEST_UNIFORM_P (pnext_b16_untied, ++ p0 = svpnext_b16 (p3, p1), ++ p0 = svpnext_b16 (p3, p1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b32.c +new file mode 100644 +index 000000000..a0030fae1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b32.c +@@ -0,0 +1,22 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** pnext_b32_tied1: ++** pnext p0\.s, p3, p0\.s ++** ret ++*/ ++TEST_UNIFORM_P (pnext_b32_tied1, ++ p0 = svpnext_b32 (p3, p0), ++ p0 = svpnext_b32 (p3, p0)) ++ ++/* ++** pnext_b32_untied: ++** mov p0\.b, p1\.b ++** pnext p0\.s, p3, p0\.s ++** ret ++*/ ++TEST_UNIFORM_P (pnext_b32_untied, ++ p0 = svpnext_b32 (p3, p1), ++ p0 = svpnext_b32 (p3, p1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b64.c +new file mode 100644 +index 000000000..59db2f04f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b64.c +@@ -0,0 +1,22 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** pnext_b64_tied1: ++** pnext p0\.d, p3, p0\.d ++** ret ++*/ ++TEST_UNIFORM_P (pnext_b64_tied1, ++ p0 = svpnext_b64 (p3, p0), ++ p0 = svpnext_b64 (p3, p0)) ++ ++/* ++** pnext_b64_untied: ++** mov p0\.b, p1\.b ++** pnext p0\.d, p3, p0\.d ++** ret ++*/ ++TEST_UNIFORM_P (pnext_b64_untied, ++ p0 = svpnext_b64 (p3, p1), ++ p0 = svpnext_b64 (p3, p1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b8.c +new file mode 100644 +index 000000000..cfc2e907c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b8.c +@@ -0,0 +1,22 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** pnext_b8_tied1: ++** pnext p0\.b, p3, p0\.b ++** ret ++*/ ++TEST_UNIFORM_P (pnext_b8_tied1, ++ p0 = svpnext_b8 (p3, p0), ++ p0 = svpnext_b8 (p3, p0)) ++ ++/* ++** pnext_b8_untied: ++** mov p0\.b, p1\.b ++** pnext p0\.b, p3, p0\.b ++** ret ++*/ ++TEST_UNIFORM_P (pnext_b8_untied, ++ p0 = svpnext_b8 (p3, p1), ++ p0 = svpnext_b8 (p3, p1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfb.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfb.c +new file mode 100644 +index 000000000..d2b2777e6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfb.c +@@ -0,0 +1,245 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** prfb_base: ++** prfb pldl1keep, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfb_base, uint8_t, ++ svprfb (p0, x0, SV_PLDL1KEEP), ++ svprfb (p0, x0, SV_PLDL1KEEP)) ++ ++/* ++** prfb_u8_index: ++** prfb pldl1keep, p0, \[x0, x1\] ++** ret ++*/ ++TEST_PREFETCH (prfb_u8_index, uint8_t, ++ svprfb (p0, x0 + x1, SV_PLDL1KEEP), ++ svprfb (p0, x0 + x1, SV_PLDL1KEEP)) ++ ++/* ++** prfb_u8_1: ++** add (x[0-9+]), x0, #?1 ++** prfb pldl1keep, p0, \[\1\] ++** ret ++*/ ++TEST_PREFETCH (prfb_u8_1, uint8_t, ++ svprfb (p0, x0 + 1, SV_PLDL1KEEP), ++ svprfb (p0, x0 + 1, SV_PLDL1KEEP)) ++ ++/* ++** prfb_u16_index: ++** add (x[0-9+]), x0, x1, lsl #?1 ++** prfb pldl1keep, p0, \[\1\] ++** ret ++*/ ++TEST_PREFETCH (prfb_u16_index, uint16_t, ++ svprfb (p0, x0 + x1, SV_PLDL1KEEP), ++ svprfb (p0, x0 + x1, SV_PLDL1KEEP)) ++ ++/* ++** prfb_u16_1: ++** add (x[0-9+]), x0, #?2 ++** prfb pldl1keep, p0, \[\1\] ++** ret ++*/ ++TEST_PREFETCH (prfb_u16_1, uint16_t, ++ svprfb (p0, x0 + 1, SV_PLDL1KEEP), ++ svprfb (p0, x0 + 1, SV_PLDL1KEEP)) ++ ++/* ++** prfb_u32_index: ++** add (x[0-9+]), x0, x1, lsl #?2 ++** prfb pldl1keep, p0, \[\1\] ++** ret ++*/ ++TEST_PREFETCH (prfb_u32_index, uint32_t, ++ svprfb (p0, x0 + x1, SV_PLDL1KEEP), ++ svprfb (p0, x0 + x1, SV_PLDL1KEEP)) ++ ++/* ++** prfb_u32_1: ++** add (x[0-9+]), x0, #?4 ++** prfb pldl1keep, p0, \[\1\] ++** ret ++*/ ++TEST_PREFETCH (prfb_u32_1, uint32_t, ++ svprfb (p0, x0 + 1, SV_PLDL1KEEP), ++ svprfb (p0, x0 + 1, SV_PLDL1KEEP)) ++ ++/* ++** prfb_u64_index: ++** add (x[0-9+]), x0, x1, lsl #?3 ++** prfb pldl1keep, p0, \[\1\] ++** ret ++*/ ++TEST_PREFETCH (prfb_u64_index, uint64_t, ++ svprfb (p0, x0 + x1, SV_PLDL1KEEP), ++ svprfb (p0, x0 + x1, SV_PLDL1KEEP)) ++ ++/* ++** prfb_u64_1: ++** add (x[0-9+]), x0, #?8 ++** prfb pldl1keep, p0, \[\1\] ++** ret ++*/ ++TEST_PREFETCH (prfb_u64_1, uint64_t, ++ svprfb (p0, x0 + 1, SV_PLDL1KEEP), ++ svprfb (p0, x0 + 1, SV_PLDL1KEEP)) ++ ++/* ++** prfb_pldl1strm: ++** prfb pldl1strm, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfb_pldl1strm, uint8_t, ++ svprfb (p0, x0, SV_PLDL1STRM), ++ svprfb (p0, x0, SV_PLDL1STRM)) ++ ++/* ++** prfb_pldl2keep: ++** prfb pldl2keep, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfb_pldl2keep, uint8_t, ++ svprfb (p0, x0, SV_PLDL2KEEP), ++ svprfb (p0, x0, SV_PLDL2KEEP)) ++ ++/* ++** prfb_pldl2strm: ++** prfb pldl2strm, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfb_pldl2strm, uint8_t, ++ svprfb (p0, x0, SV_PLDL2STRM), ++ svprfb (p0, x0, SV_PLDL2STRM)) ++ ++/* ++** prfb_pldl3keep: ++** prfb pldl3keep, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfb_pldl3keep, uint8_t, ++ svprfb (p0, x0, SV_PLDL3KEEP), ++ svprfb (p0, x0, SV_PLDL3KEEP)) ++ ++/* ++** prfb_pldl3strm: ++** prfb pldl3strm, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfb_pldl3strm, uint8_t, ++ svprfb (p0, x0, SV_PLDL3STRM), ++ svprfb (p0, x0, SV_PLDL3STRM)) ++ ++/* ++** prfb_pstl1keep: ++** prfb pstl1keep, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfb_pstl1keep, uint8_t, ++ svprfb (p0, x0, SV_PSTL1KEEP), ++ svprfb (p0, x0, SV_PSTL1KEEP)) ++ ++/* ++** prfb_pstl1strm: ++** prfb pstl1strm, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfb_pstl1strm, uint8_t, ++ svprfb (p0, x0, SV_PSTL1STRM), ++ svprfb (p0, x0, SV_PSTL1STRM)) ++ ++/* ++** prfb_pstl2keep: ++** prfb pstl2keep, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfb_pstl2keep, uint8_t, ++ svprfb (p0, x0, SV_PSTL2KEEP), ++ svprfb (p0, x0, SV_PSTL2KEEP)) ++ ++/* ++** prfb_pstl2strm: ++** prfb pstl2strm, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfb_pstl2strm, uint8_t, ++ svprfb (p0, x0, SV_PSTL2STRM), ++ svprfb (p0, x0, SV_PSTL2STRM)) ++ ++/* ++** prfb_pstl3keep: ++** prfb pstl3keep, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfb_pstl3keep, uint8_t, ++ svprfb (p0, x0, SV_PSTL3KEEP), ++ svprfb (p0, x0, SV_PSTL3KEEP)) ++ ++/* ++** prfb_pstl3strm: ++** prfb pstl3strm, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfb_pstl3strm, uint8_t, ++ svprfb (p0, x0, SV_PSTL3STRM), ++ svprfb (p0, x0, SV_PSTL3STRM)) ++ ++/* ++** prfb_vnum_0: ++** prfb pldl1keep, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfb_vnum_0, uint8_t, ++ svprfb_vnum (p0, x0, 0, SV_PLDL1KEEP), ++ svprfb_vnum (p0, x0, 0, SV_PLDL1KEEP)) ++ ++/* ++** prfb_vnum_1: ++** incb x0 ++** prfb pldl1keep, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfb_vnum_1, uint16_t, ++ svprfb_vnum (p0, x0, 1, SV_PLDL1KEEP), ++ svprfb_vnum (p0, x0, 1, SV_PLDL1KEEP)) ++ ++/* ++** prfb_vnum_2: ++** incb x0, all, mul #2 ++** prfb pldl1keep, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfb_vnum_2, uint32_t, ++ svprfb_vnum (p0, x0, 2, SV_PLDL1KEEP), ++ svprfb_vnum (p0, x0, 2, SV_PLDL1KEEP)) ++ ++/* ++** prfb_vnum_3: ++** incb x0, all, mul #3 ++** prfb pldl1keep, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfb_vnum_3, uint64_t, ++ svprfb_vnum (p0, x0, 3, SV_PLDL1KEEP), ++ svprfb_vnum (p0, x0, 3, SV_PLDL1KEEP)) ++ ++/* ++** prfb_vnum_x1: ++** cntb (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** prfb pldl1keep, p0, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** prfb zldl1keep, p0, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_PREFETCH (prfb_vnum_x1, uint64_t, ++ svprfb_vnum (p0, x0, x1, SV_PLDL1KEEP), ++ svprfb_vnum (p0, x0, x1, SV_PLDL1KEEP)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfb_gather.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfb_gather.c +new file mode 100644 +index 000000000..c4bfbbbf7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfb_gather.c +@@ -0,0 +1,223 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** prfb_gather_u32base: ++** prfb pldl1keep, p0, \[z0\.s\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfb_gather_u32base, svuint32_t, ++ svprfb_gather_u32base (p0, z0, SV_PLDL1KEEP), ++ svprfb_gather (p0, z0, SV_PLDL1KEEP)) ++ ++/* ++** prfb_gather_u64base: ++** prfb pldl1strm, p0, \[z0\.d\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfb_gather_u64base, svuint64_t, ++ svprfb_gather_u64base (p0, z0, SV_PLDL1STRM), ++ svprfb_gather (p0, z0, SV_PLDL1STRM)) ++ ++/* ++** prfb_gather_x0_u32base_offset: ++** prfb pldl2keep, p0, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfb_gather_x0_u32base_offset, svuint32_t, ++ svprfb_gather_u32base_offset (p0, z0, x0, SV_PLDL2KEEP), ++ svprfb_gather_offset (p0, z0, x0, SV_PLDL2KEEP)) ++ ++/* ++** prfb_gather_m1_u32base_offset: ++** mov (x[0-9]+), #?-1 ++** prfb pldl2strm, p0, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfb_gather_m1_u32base_offset, svuint32_t, ++ svprfb_gather_u32base_offset (p0, z0, -1, SV_PLDL2STRM), ++ svprfb_gather_offset (p0, z0, -1, SV_PLDL2STRM)) ++ ++/* ++** prfb_gather_0_u32base_offset: ++** prfb pldl3keep, p0, \[z0\.s\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfb_gather_0_u32base_offset, svuint32_t, ++ svprfb_gather_u32base_offset (p0, z0, 0, SV_PLDL3KEEP), ++ svprfb_gather_offset (p0, z0, 0, SV_PLDL3KEEP)) ++ ++/* ++** prfb_gather_5_u32base_offset: ++** prfb pldl3strm, p0, \[z0\.s, #5\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfb_gather_5_u32base_offset, svuint32_t, ++ svprfb_gather_u32base_offset (p0, z0, 5, SV_PLDL3STRM), ++ svprfb_gather_offset (p0, z0, 5, SV_PLDL3STRM)) ++ ++/* ++** prfb_gather_31_u32base_offset: ++** prfb pstl1keep, p0, \[z0\.s, #31\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfb_gather_31_u32base_offset, svuint32_t, ++ svprfb_gather_u32base_offset (p0, z0, 31, SV_PSTL1KEEP), ++ svprfb_gather_offset (p0, z0, 31, SV_PSTL1KEEP)) ++ ++/* ++** prfb_gather_32_u32base_offset: ++** mov (x[0-9]+), #?32 ++** prfb pstl1strm, p0, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfb_gather_32_u32base_offset, svuint32_t, ++ svprfb_gather_u32base_offset (p0, z0, 32, SV_PSTL1STRM), ++ svprfb_gather_offset (p0, z0, 32, SV_PSTL1STRM)) ++ ++/* ++** prfb_gather_x0_u64base_offset: ++** prfb pstl2keep, p0, \[x0, z0\.d\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfb_gather_x0_u64base_offset, svuint64_t, ++ svprfb_gather_u64base_offset (p0, z0, x0, SV_PSTL2KEEP), ++ svprfb_gather_offset (p0, z0, x0, SV_PSTL2KEEP)) ++ ++/* ++** prfb_gather_m1_u64base_offset: ++** mov (x[0-9]+), #?-1 ++** prfb pstl2strm, p0, \[\1, z0\.d\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfb_gather_m1_u64base_offset, svuint64_t, ++ svprfb_gather_u64base_offset (p0, z0, -1, SV_PSTL2STRM), ++ svprfb_gather_offset (p0, z0, -1, SV_PSTL2STRM)) ++ ++/* ++** prfb_gather_0_u64base_offset: ++** prfb pstl3keep, p0, \[z0\.d\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfb_gather_0_u64base_offset, svuint64_t, ++ svprfb_gather_u64base_offset (p0, z0, 0, SV_PSTL3KEEP), ++ svprfb_gather_offset (p0, z0, 0, SV_PSTL3KEEP)) ++ ++/* ++** prfb_gather_5_u64base_offset: ++** prfb pstl3strm, p0, \[z0\.d, #5\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfb_gather_5_u64base_offset, svuint64_t, ++ svprfb_gather_u64base_offset (p0, z0, 5, SV_PSTL3STRM), ++ svprfb_gather_offset (p0, z0, 5, SV_PSTL3STRM)) ++ ++/* ++** prfb_gather_31_u64base_offset: ++** prfb pldl1keep, p0, \[z0\.d, #31\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfb_gather_31_u64base_offset, svuint64_t, ++ svprfb_gather_u64base_offset (p0, z0, 31, SV_PLDL1KEEP), ++ svprfb_gather_offset (p0, z0, 31, SV_PLDL1KEEP)) ++ ++/* ++** prfb_gather_32_u64base_offset: ++** mov (x[0-9]+), #?32 ++** prfb pldl1strm, p0, \[\1, z0\.d\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfb_gather_32_u64base_offset, svuint64_t, ++ svprfb_gather_u64base_offset (p0, z0, 32, SV_PLDL1STRM), ++ svprfb_gather_offset (p0, z0, 32, SV_PLDL1STRM)) ++ ++/* ++** prfb_gather_x0_s32offset: ++** prfb pldl2keep, p0, \[x0, z0\.s, sxtw\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_SZ (prfb_gather_x0_s32offset, svint32_t, ++ svprfb_gather_s32offset (p0, x0, z0, SV_PLDL2KEEP), ++ svprfb_gather_offset (p0, x0, z0, SV_PLDL2KEEP)) ++ ++/* ++** prfb_gather_s32offset: ++** prfb pldl2strm, p0, \[x0, z1\.s, sxtw\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_SZ (prfb_gather_s32offset, svint32_t, ++ svprfb_gather_s32offset (p0, x0, z1, SV_PLDL2STRM), ++ svprfb_gather_offset (p0, x0, z1, SV_PLDL2STRM)) ++ ++/* ++** prfb_gather_x0_u32offset: ++** prfb pldl3keep, p0, \[x0, z0\.s, uxtw\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_SZ (prfb_gather_x0_u32offset, svuint32_t, ++ svprfb_gather_u32offset (p0, x0, z0, SV_PLDL3KEEP), ++ svprfb_gather_offset (p0, x0, z0, SV_PLDL3KEEP)) ++ ++/* ++** prfb_gather_u32offset: ++** prfb pldl3strm, p0, \[x0, z1\.s, uxtw\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_SZ (prfb_gather_u32offset, svuint32_t, ++ svprfb_gather_u32offset (p0, x0, z1, SV_PLDL3STRM), ++ svprfb_gather_offset (p0, x0, z1, SV_PLDL3STRM)) ++ ++/* ++** prfb_gather_x0_s64offset: ++** prfb pstl1keep, p0, \[x0, z0\.d\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_SZ (prfb_gather_x0_s64offset, svint64_t, ++ svprfb_gather_s64offset (p0, x0, z0, SV_PSTL1KEEP), ++ svprfb_gather_offset (p0, x0, z0, SV_PSTL1KEEP)) ++ ++/* ++** prfb_gather_s64offset: ++** prfb pstl1strm, p0, \[x0, z1\.d\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_SZ (prfb_gather_s64offset, svint64_t, ++ svprfb_gather_s64offset (p0, x0, z1, SV_PSTL1STRM), ++ svprfb_gather_offset (p0, x0, z1, SV_PSTL1STRM)) ++ ++/* ++** prfb_gather_ext_s64offset: ++** prfb pstl1strm, p0, \[x0, z1\.d, sxtw\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_SZ (prfb_gather_ext_s64offset, svint64_t, ++ svprfb_gather_s64offset (p0, x0, svextw_s64_x (p0, z1), SV_PSTL1STRM), ++ svprfb_gather_offset (p0, x0, svextw_x (p0, z1), SV_PSTL1STRM)) ++ ++/* ++** prfb_gather_x0_u64offset: ++** prfb pstl2keep, p0, \[x0, z0\.d\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_SZ (prfb_gather_x0_u64offset, svuint64_t, ++ svprfb_gather_u64offset (p0, x0, z0, SV_PSTL2KEEP), ++ svprfb_gather_offset (p0, x0, z0, SV_PSTL2KEEP)) ++ ++/* ++** prfb_gather_u64offset: ++** prfb pstl2strm, p0, \[x0, z1\.d\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_SZ (prfb_gather_u64offset, svuint64_t, ++ svprfb_gather_u64offset (p0, x0, z1, SV_PSTL2STRM), ++ svprfb_gather_offset (p0, x0, z1, SV_PSTL2STRM)) ++ ++/* ++** prfb_gather_ext_u64offset: ++** prfb pstl2strm, p0, \[x0, z1\.d, uxtw\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_SZ (prfb_gather_ext_u64offset, svuint64_t, ++ svprfb_gather_u64offset (p0, x0, svextw_u64_x (p0, z1), SV_PSTL2STRM), ++ svprfb_gather_offset (p0, x0, svextw_x (p0, z1), SV_PSTL2STRM)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfd.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfd.c +new file mode 100644 +index 000000000..72b2e6415 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfd.c +@@ -0,0 +1,245 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** prfd_base: ++** prfd pldl1keep, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfd_base, uint8_t, ++ svprfd (p0, x0, SV_PLDL1KEEP), ++ svprfd (p0, x0, SV_PLDL1KEEP)) ++ ++/* ++** prfd_u8_index: ++** add (x[0-9+]), (x0, x1|x1, x0) ++** prfd pldl1keep, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfd_u8_index, uint8_t, ++ svprfd (p0, x0 + x1, SV_PLDL1KEEP), ++ svprfd (p0, x0 + x1, SV_PLDL1KEEP)) ++ ++/* ++** prfd_u8_1: ++** add (x[0-9+]), x0, #?1 ++** prfd pldl1keep, p0, \[\1\] ++** ret ++*/ ++TEST_PREFETCH (prfd_u8_1, uint8_t, ++ svprfd (p0, x0 + 1, SV_PLDL1KEEP), ++ svprfd (p0, x0 + 1, SV_PLDL1KEEP)) ++ ++/* ++** prfd_u16_index: ++** add (x[0-9+]), x0, x1, lsl #?1 ++** prfd pldl1keep, p0, \[\1\] ++** ret ++*/ ++TEST_PREFETCH (prfd_u16_index, uint16_t, ++ svprfd (p0, x0 + x1, SV_PLDL1KEEP), ++ svprfd (p0, x0 + x1, SV_PLDL1KEEP)) ++ ++/* ++** prfd_u16_1: ++** add (x[0-9+]), x0, #?2 ++** prfd pldl1keep, p0, \[\1\] ++** ret ++*/ ++TEST_PREFETCH (prfd_u16_1, uint16_t, ++ svprfd (p0, x0 + 1, SV_PLDL1KEEP), ++ svprfd (p0, x0 + 1, SV_PLDL1KEEP)) ++ ++/* ++** prfd_u32_index: ++** add (x[0-9+]), x0, x1, lsl #?2 ++** prfd pldl1keep, p0, \[\1\] ++** ret ++*/ ++TEST_PREFETCH (prfd_u32_index, uint32_t, ++ svprfd (p0, x0 + x1, SV_PLDL1KEEP), ++ svprfd (p0, x0 + x1, SV_PLDL1KEEP)) ++ ++/* ++** prfd_u32_1: ++** add (x[0-9+]), x0, #?4 ++** prfd pldl1keep, p0, \[\1\] ++** ret ++*/ ++TEST_PREFETCH (prfd_u32_1, uint32_t, ++ svprfd (p0, x0 + 1, SV_PLDL1KEEP), ++ svprfd (p0, x0 + 1, SV_PLDL1KEEP)) ++ ++/* ++** prfd_u64_index: ++** prfd pldl1keep, p0, \[x0, x1, lsl #?3\] ++** ret ++*/ ++TEST_PREFETCH (prfd_u64_index, uint64_t, ++ svprfd (p0, x0 + x1, SV_PLDL1KEEP), ++ svprfd (p0, x0 + x1, SV_PLDL1KEEP)) ++ ++/* ++** prfd_u64_1: ++** add (x[0-9+]), x0, #?8 ++** prfd pldl1keep, p0, \[\1\] ++** ret ++*/ ++TEST_PREFETCH (prfd_u64_1, uint64_t, ++ svprfd (p0, x0 + 1, SV_PLDL1KEEP), ++ svprfd (p0, x0 + 1, SV_PLDL1KEEP)) ++ ++/* ++** prfd_pldl1strm: ++** prfd pldl1strm, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfd_pldl1strm, uint8_t, ++ svprfd (p0, x0, SV_PLDL1STRM), ++ svprfd (p0, x0, SV_PLDL1STRM)) ++ ++/* ++** prfd_pldl2keep: ++** prfd pldl2keep, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfd_pldl2keep, uint8_t, ++ svprfd (p0, x0, SV_PLDL2KEEP), ++ svprfd (p0, x0, SV_PLDL2KEEP)) ++ ++/* ++** prfd_pldl2strm: ++** prfd pldl2strm, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfd_pldl2strm, uint8_t, ++ svprfd (p0, x0, SV_PLDL2STRM), ++ svprfd (p0, x0, SV_PLDL2STRM)) ++ ++/* ++** prfd_pldl3keep: ++** prfd pldl3keep, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfd_pldl3keep, uint8_t, ++ svprfd (p0, x0, SV_PLDL3KEEP), ++ svprfd (p0, x0, SV_PLDL3KEEP)) ++ ++/* ++** prfd_pldl3strm: ++** prfd pldl3strm, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfd_pldl3strm, uint8_t, ++ svprfd (p0, x0, SV_PLDL3STRM), ++ svprfd (p0, x0, SV_PLDL3STRM)) ++ ++/* ++** prfd_pstl1keep: ++** prfd pstl1keep, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfd_pstl1keep, uint8_t, ++ svprfd (p0, x0, SV_PSTL1KEEP), ++ svprfd (p0, x0, SV_PSTL1KEEP)) ++ ++/* ++** prfd_pstl1strm: ++** prfd pstl1strm, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfd_pstl1strm, uint8_t, ++ svprfd (p0, x0, SV_PSTL1STRM), ++ svprfd (p0, x0, SV_PSTL1STRM)) ++ ++/* ++** prfd_pstl2keep: ++** prfd pstl2keep, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfd_pstl2keep, uint8_t, ++ svprfd (p0, x0, SV_PSTL2KEEP), ++ svprfd (p0, x0, SV_PSTL2KEEP)) ++ ++/* ++** prfd_pstl2strm: ++** prfd pstl2strm, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfd_pstl2strm, uint8_t, ++ svprfd (p0, x0, SV_PSTL2STRM), ++ svprfd (p0, x0, SV_PSTL2STRM)) ++ ++/* ++** prfd_pstl3keep: ++** prfd pstl3keep, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfd_pstl3keep, uint8_t, ++ svprfd (p0, x0, SV_PSTL3KEEP), ++ svprfd (p0, x0, SV_PSTL3KEEP)) ++ ++/* ++** prfd_pstl3strm: ++** prfd pstl3strm, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfd_pstl3strm, uint8_t, ++ svprfd (p0, x0, SV_PSTL3STRM), ++ svprfd (p0, x0, SV_PSTL3STRM)) ++ ++/* ++** prfd_vnum_0: ++** prfd pldl1keep, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfd_vnum_0, uint8_t, ++ svprfd_vnum (p0, x0, 0, SV_PLDL1KEEP), ++ svprfd_vnum (p0, x0, 0, SV_PLDL1KEEP)) ++ ++/* ++** prfd_vnum_1: ++** incb x0 ++** prfd pldl1keep, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfd_vnum_1, uint16_t, ++ svprfd_vnum (p0, x0, 1, SV_PLDL1KEEP), ++ svprfd_vnum (p0, x0, 1, SV_PLDL1KEEP)) ++ ++/* ++** prfd_vnum_2: ++** incb x0, all, mul #2 ++** prfd pldl1keep, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfd_vnum_2, uint32_t, ++ svprfd_vnum (p0, x0, 2, SV_PLDL1KEEP), ++ svprfd_vnum (p0, x0, 2, SV_PLDL1KEEP)) ++ ++/* ++** prfd_vnum_3: ++** incb x0, all, mul #3 ++** prfd pldl1keep, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfd_vnum_3, uint64_t, ++ svprfd_vnum (p0, x0, 3, SV_PLDL1KEEP), ++ svprfd_vnum (p0, x0, 3, SV_PLDL1KEEP)) ++ ++/* ++** prfd_vnum_x1: ++** cntb (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** prfd pldl1keep, p0, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** prfd zldl1keep, p0, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_PREFETCH (prfd_vnum_x1, uint64_t, ++ svprfd_vnum (p0, x0, x1, SV_PLDL1KEEP), ++ svprfd_vnum (p0, x0, x1, SV_PLDL1KEEP)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfd_gather.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfd_gather.c +new file mode 100644 +index 000000000..a84acb1a1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfd_gather.c +@@ -0,0 +1,225 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** prfd_gather_u32base: ++** prfd pldl1keep, p0, \[z0\.s\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfd_gather_u32base, svuint32_t, ++ svprfd_gather_u32base (p0, z0, SV_PLDL1KEEP), ++ svprfd_gather (p0, z0, SV_PLDL1KEEP)) ++ ++/* ++** prfd_gather_u64base: ++** prfd pldl1strm, p0, \[z0\.d\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfd_gather_u64base, svuint64_t, ++ svprfd_gather_u64base (p0, z0, SV_PLDL1STRM), ++ svprfd_gather (p0, z0, SV_PLDL1STRM)) ++ ++/* ++** prfd_gather_x0_u32base_index: ++** lsl (x[0-9]+), x0, #?3 ++** prfb pldl2keep, p0, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfd_gather_x0_u32base_index, svuint32_t, ++ svprfd_gather_u32base_index (p0, z0, x0, SV_PLDL2KEEP), ++ svprfd_gather_index (p0, z0, x0, SV_PLDL2KEEP)) ++ ++/* ++** prfd_gather_m1_u32base_index: ++** mov (x[0-9]+), #?-8 ++** prfb pldl2strm, p0, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfd_gather_m1_u32base_index, svuint32_t, ++ svprfd_gather_u32base_index (p0, z0, -1, SV_PLDL2STRM), ++ svprfd_gather_index (p0, z0, -1, SV_PLDL2STRM)) ++ ++/* ++** prfd_gather_0_u32base_index: ++** prfd pldl3keep, p0, \[z0\.s\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfd_gather_0_u32base_index, svuint32_t, ++ svprfd_gather_u32base_index (p0, z0, 0, SV_PLDL3KEEP), ++ svprfd_gather_index (p0, z0, 0, SV_PLDL3KEEP)) ++ ++/* ++** prfd_gather_5_u32base_index: ++** prfd pldl3strm, p0, \[z0\.s, #40\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfd_gather_5_u32base_index, svuint32_t, ++ svprfd_gather_u32base_index (p0, z0, 5, SV_PLDL3STRM), ++ svprfd_gather_index (p0, z0, 5, SV_PLDL3STRM)) ++ ++/* ++** prfd_gather_31_u32base_index: ++** prfd pstl1keep, p0, \[z0\.s, #248\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfd_gather_31_u32base_index, svuint32_t, ++ svprfd_gather_u32base_index (p0, z0, 31, SV_PSTL1KEEP), ++ svprfd_gather_index (p0, z0, 31, SV_PSTL1KEEP)) ++ ++/* ++** prfd_gather_32_u32base_index: ++** mov (x[0-9]+), #?256 ++** prfb pstl1strm, p0, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfd_gather_32_u32base_index, svuint32_t, ++ svprfd_gather_u32base_index (p0, z0, 32, SV_PSTL1STRM), ++ svprfd_gather_index (p0, z0, 32, SV_PSTL1STRM)) ++ ++/* ++** prfd_gather_x0_u64base_index: ++** lsl (x[0-9]+), x0, #?3 ++** prfb pstl2keep, p0, \[\1, z0\.d\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfd_gather_x0_u64base_index, svuint64_t, ++ svprfd_gather_u64base_index (p0, z0, x0, SV_PSTL2KEEP), ++ svprfd_gather_index (p0, z0, x0, SV_PSTL2KEEP)) ++ ++/* ++** prfd_gather_m1_u64base_index: ++** mov (x[0-9]+), #?-8 ++** prfb pstl2strm, p0, \[\1, z0\.d\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfd_gather_m1_u64base_index, svuint64_t, ++ svprfd_gather_u64base_index (p0, z0, -1, SV_PSTL2STRM), ++ svprfd_gather_index (p0, z0, -1, SV_PSTL2STRM)) ++ ++/* ++** prfd_gather_0_u64base_index: ++** prfd pstl3keep, p0, \[z0\.d\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfd_gather_0_u64base_index, svuint64_t, ++ svprfd_gather_u64base_index (p0, z0, 0, SV_PSTL3KEEP), ++ svprfd_gather_index (p0, z0, 0, SV_PSTL3KEEP)) ++ ++/* ++** prfd_gather_5_u64base_index: ++** prfd pstl3strm, p0, \[z0\.d, #40\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfd_gather_5_u64base_index, svuint64_t, ++ svprfd_gather_u64base_index (p0, z0, 5, SV_PSTL3STRM), ++ svprfd_gather_index (p0, z0, 5, SV_PSTL3STRM)) ++ ++/* ++** prfd_gather_31_u64base_index: ++** prfd pldl1keep, p0, \[z0\.d, #248\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfd_gather_31_u64base_index, svuint64_t, ++ svprfd_gather_u64base_index (p0, z0, 31, SV_PLDL1KEEP), ++ svprfd_gather_index (p0, z0, 31, SV_PLDL1KEEP)) ++ ++/* ++** prfd_gather_32_u64base_index: ++** mov (x[0-9]+), #?256 ++** prfb pldl1strm, p0, \[\1, z0\.d\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfd_gather_32_u64base_index, svuint64_t, ++ svprfd_gather_u64base_index (p0, z0, 32, SV_PLDL1STRM), ++ svprfd_gather_index (p0, z0, 32, SV_PLDL1STRM)) ++ ++/* ++** prfd_gather_x0_s32index: ++** prfd pldl2keep, p0, \[x0, z0\.s, sxtw 3\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_SZ (prfd_gather_x0_s32index, svint32_t, ++ svprfd_gather_s32index (p0, x0, z0, SV_PLDL2KEEP), ++ svprfd_gather_index (p0, x0, z0, SV_PLDL2KEEP)) ++ ++/* ++** prfd_gather_s32index: ++** prfd pldl2strm, p0, \[x0, z1\.s, sxtw 3\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_SZ (prfd_gather_s32index, svint32_t, ++ svprfd_gather_s32index (p0, x0, z1, SV_PLDL2STRM), ++ svprfd_gather_index (p0, x0, z1, SV_PLDL2STRM)) ++ ++/* ++** prfd_gather_x0_u32index: ++** prfd pldl3keep, p0, \[x0, z0\.s, uxtw 3\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_SZ (prfd_gather_x0_u32index, svuint32_t, ++ svprfd_gather_u32index (p0, x0, z0, SV_PLDL3KEEP), ++ svprfd_gather_index (p0, x0, z0, SV_PLDL3KEEP)) ++ ++/* ++** prfd_gather_u32index: ++** prfd pldl3strm, p0, \[x0, z1\.s, uxtw 3\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_SZ (prfd_gather_u32index, svuint32_t, ++ svprfd_gather_u32index (p0, x0, z1, SV_PLDL3STRM), ++ svprfd_gather_index (p0, x0, z1, SV_PLDL3STRM)) ++ ++/* ++** prfd_gather_x0_s64index: ++** prfd pstl1keep, p0, \[x0, z0\.d, lsl 3\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_SZ (prfd_gather_x0_s64index, svint64_t, ++ svprfd_gather_s64index (p0, x0, z0, SV_PSTL1KEEP), ++ svprfd_gather_index (p0, x0, z0, SV_PSTL1KEEP)) ++ ++/* ++** prfd_gather_s64index: ++** prfd pstl1strm, p0, \[x0, z1\.d, lsl 3\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_SZ (prfd_gather_s64index, svint64_t, ++ svprfd_gather_s64index (p0, x0, z1, SV_PSTL1STRM), ++ svprfd_gather_index (p0, x0, z1, SV_PSTL1STRM)) ++ ++/* ++** prfd_gather_ext_s64index: ++** prfd pstl1strm, p0, \[x0, z1\.d, sxtw 3\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_SZ (prfd_gather_ext_s64index, svint64_t, ++ svprfd_gather_s64index (p0, x0, svextw_s64_x (p0, z1), SV_PSTL1STRM), ++ svprfd_gather_index (p0, x0, svextw_x (p0, z1), SV_PSTL1STRM)) ++ ++/* ++** prfd_gather_x0_u64index: ++** prfd pstl2keep, p0, \[x0, z0\.d, lsl 3\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_SZ (prfd_gather_x0_u64index, svuint64_t, ++ svprfd_gather_u64index (p0, x0, z0, SV_PSTL2KEEP), ++ svprfd_gather_index (p0, x0, z0, SV_PSTL2KEEP)) ++ ++/* ++** prfd_gather_u64index: ++** prfd pstl2strm, p0, \[x0, z1\.d, lsl 3\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_SZ (prfd_gather_u64index, svuint64_t, ++ svprfd_gather_u64index (p0, x0, z1, SV_PSTL2STRM), ++ svprfd_gather_index (p0, x0, z1, SV_PSTL2STRM)) ++ ++/* ++** prfd_gather_ext_u64index: ++** prfd pstl2strm, p0, \[x0, z1\.d, uxtw 3\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_SZ (prfd_gather_ext_u64index, svuint64_t, ++ svprfd_gather_u64index (p0, x0, svextw_u64_x (p0, z1), SV_PSTL2STRM), ++ svprfd_gather_index (p0, x0, svextw_x (p0, z1), SV_PSTL2STRM)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfh.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfh.c +new file mode 100644 +index 000000000..89069f9b7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfh.c +@@ -0,0 +1,245 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** prfh_base: ++** prfh pldl1keep, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfh_base, uint8_t, ++ svprfh (p0, x0, SV_PLDL1KEEP), ++ svprfh (p0, x0, SV_PLDL1KEEP)) ++ ++/* ++** prfh_u8_index: ++** add (x[0-9+]), (x0, x1|x1, x0) ++** prfh pldl1keep, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfh_u8_index, uint8_t, ++ svprfh (p0, x0 + x1, SV_PLDL1KEEP), ++ svprfh (p0, x0 + x1, SV_PLDL1KEEP)) ++ ++/* ++** prfh_u8_1: ++** add (x[0-9+]), x0, #?1 ++** prfh pldl1keep, p0, \[\1\] ++** ret ++*/ ++TEST_PREFETCH (prfh_u8_1, uint8_t, ++ svprfh (p0, x0 + 1, SV_PLDL1KEEP), ++ svprfh (p0, x0 + 1, SV_PLDL1KEEP)) ++ ++/* ++** prfh_u16_index: ++** prfh pldl1keep, p0, \[x0, x1, lsl #?1\] ++** ret ++*/ ++TEST_PREFETCH (prfh_u16_index, uint16_t, ++ svprfh (p0, x0 + x1, SV_PLDL1KEEP), ++ svprfh (p0, x0 + x1, SV_PLDL1KEEP)) ++ ++/* ++** prfh_u16_1: ++** add (x[0-9+]), x0, #?2 ++** prfh pldl1keep, p0, \[\1\] ++** ret ++*/ ++TEST_PREFETCH (prfh_u16_1, uint16_t, ++ svprfh (p0, x0 + 1, SV_PLDL1KEEP), ++ svprfh (p0, x0 + 1, SV_PLDL1KEEP)) ++ ++/* ++** prfh_u32_index: ++** add (x[0-9+]), x0, x1, lsl #?2 ++** prfh pldl1keep, p0, \[\1\] ++** ret ++*/ ++TEST_PREFETCH (prfh_u32_index, uint32_t, ++ svprfh (p0, x0 + x1, SV_PLDL1KEEP), ++ svprfh (p0, x0 + x1, SV_PLDL1KEEP)) ++ ++/* ++** prfh_u32_1: ++** add (x[0-9+]), x0, #?4 ++** prfh pldl1keep, p0, \[\1\] ++** ret ++*/ ++TEST_PREFETCH (prfh_u32_1, uint32_t, ++ svprfh (p0, x0 + 1, SV_PLDL1KEEP), ++ svprfh (p0, x0 + 1, SV_PLDL1KEEP)) ++ ++/* ++** prfh_u64_index: ++** add (x[0-9+]), x0, x1, lsl #?3 ++** prfh pldl1keep, p0, \[\1\] ++** ret ++*/ ++TEST_PREFETCH (prfh_u64_index, uint64_t, ++ svprfh (p0, x0 + x1, SV_PLDL1KEEP), ++ svprfh (p0, x0 + x1, SV_PLDL1KEEP)) ++ ++/* ++** prfh_u64_1: ++** add (x[0-9+]), x0, #?8 ++** prfh pldl1keep, p0, \[\1\] ++** ret ++*/ ++TEST_PREFETCH (prfh_u64_1, uint64_t, ++ svprfh (p0, x0 + 1, SV_PLDL1KEEP), ++ svprfh (p0, x0 + 1, SV_PLDL1KEEP)) ++ ++/* ++** prfh_pldl1strm: ++** prfh pldl1strm, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfh_pldl1strm, uint8_t, ++ svprfh (p0, x0, SV_PLDL1STRM), ++ svprfh (p0, x0, SV_PLDL1STRM)) ++ ++/* ++** prfh_pldl2keep: ++** prfh pldl2keep, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfh_pldl2keep, uint8_t, ++ svprfh (p0, x0, SV_PLDL2KEEP), ++ svprfh (p0, x0, SV_PLDL2KEEP)) ++ ++/* ++** prfh_pldl2strm: ++** prfh pldl2strm, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfh_pldl2strm, uint8_t, ++ svprfh (p0, x0, SV_PLDL2STRM), ++ svprfh (p0, x0, SV_PLDL2STRM)) ++ ++/* ++** prfh_pldl3keep: ++** prfh pldl3keep, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfh_pldl3keep, uint8_t, ++ svprfh (p0, x0, SV_PLDL3KEEP), ++ svprfh (p0, x0, SV_PLDL3KEEP)) ++ ++/* ++** prfh_pldl3strm: ++** prfh pldl3strm, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfh_pldl3strm, uint8_t, ++ svprfh (p0, x0, SV_PLDL3STRM), ++ svprfh (p0, x0, SV_PLDL3STRM)) ++ ++/* ++** prfh_pstl1keep: ++** prfh pstl1keep, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfh_pstl1keep, uint8_t, ++ svprfh (p0, x0, SV_PSTL1KEEP), ++ svprfh (p0, x0, SV_PSTL1KEEP)) ++ ++/* ++** prfh_pstl1strm: ++** prfh pstl1strm, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfh_pstl1strm, uint8_t, ++ svprfh (p0, x0, SV_PSTL1STRM), ++ svprfh (p0, x0, SV_PSTL1STRM)) ++ ++/* ++** prfh_pstl2keep: ++** prfh pstl2keep, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfh_pstl2keep, uint8_t, ++ svprfh (p0, x0, SV_PSTL2KEEP), ++ svprfh (p0, x0, SV_PSTL2KEEP)) ++ ++/* ++** prfh_pstl2strm: ++** prfh pstl2strm, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfh_pstl2strm, uint8_t, ++ svprfh (p0, x0, SV_PSTL2STRM), ++ svprfh (p0, x0, SV_PSTL2STRM)) ++ ++/* ++** prfh_pstl3keep: ++** prfh pstl3keep, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfh_pstl3keep, uint8_t, ++ svprfh (p0, x0, SV_PSTL3KEEP), ++ svprfh (p0, x0, SV_PSTL3KEEP)) ++ ++/* ++** prfh_pstl3strm: ++** prfh pstl3strm, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfh_pstl3strm, uint8_t, ++ svprfh (p0, x0, SV_PSTL3STRM), ++ svprfh (p0, x0, SV_PSTL3STRM)) ++ ++/* ++** prfh_vnum_0: ++** prfh pldl1keep, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfh_vnum_0, uint8_t, ++ svprfh_vnum (p0, x0, 0, SV_PLDL1KEEP), ++ svprfh_vnum (p0, x0, 0, SV_PLDL1KEEP)) ++ ++/* ++** prfh_vnum_1: ++** incb x0 ++** prfh pldl1keep, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfh_vnum_1, uint16_t, ++ svprfh_vnum (p0, x0, 1, SV_PLDL1KEEP), ++ svprfh_vnum (p0, x0, 1, SV_PLDL1KEEP)) ++ ++/* ++** prfh_vnum_2: ++** incb x0, all, mul #2 ++** prfh pldl1keep, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfh_vnum_2, uint32_t, ++ svprfh_vnum (p0, x0, 2, SV_PLDL1KEEP), ++ svprfh_vnum (p0, x0, 2, SV_PLDL1KEEP)) ++ ++/* ++** prfh_vnum_3: ++** incb x0, all, mul #3 ++** prfh pldl1keep, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfh_vnum_3, uint64_t, ++ svprfh_vnum (p0, x0, 3, SV_PLDL1KEEP), ++ svprfh_vnum (p0, x0, 3, SV_PLDL1KEEP)) ++ ++/* ++** prfh_vnum_x1: ++** cntb (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** prfh pldl1keep, p0, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** prfh zldl1keep, p0, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_PREFETCH (prfh_vnum_x1, uint64_t, ++ svprfh_vnum (p0, x0, x1, SV_PLDL1KEEP), ++ svprfh_vnum (p0, x0, x1, SV_PLDL1KEEP)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfh_gather.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfh_gather.c +new file mode 100644 +index 000000000..04b7a1575 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfh_gather.c +@@ -0,0 +1,225 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** prfh_gather_u32base: ++** prfh pldl1keep, p0, \[z0\.s\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfh_gather_u32base, svuint32_t, ++ svprfh_gather_u32base (p0, z0, SV_PLDL1KEEP), ++ svprfh_gather (p0, z0, SV_PLDL1KEEP)) ++ ++/* ++** prfh_gather_u64base: ++** prfh pldl1strm, p0, \[z0\.d\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfh_gather_u64base, svuint64_t, ++ svprfh_gather_u64base (p0, z0, SV_PLDL1STRM), ++ svprfh_gather (p0, z0, SV_PLDL1STRM)) ++ ++/* ++** prfh_gather_x0_u32base_index: ++** lsl (x[0-9]+), x0, #?1 ++** prfb pldl2keep, p0, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfh_gather_x0_u32base_index, svuint32_t, ++ svprfh_gather_u32base_index (p0, z0, x0, SV_PLDL2KEEP), ++ svprfh_gather_index (p0, z0, x0, SV_PLDL2KEEP)) ++ ++/* ++** prfh_gather_m1_u32base_index: ++** mov (x[0-9]+), #?-2 ++** prfb pldl2strm, p0, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfh_gather_m1_u32base_index, svuint32_t, ++ svprfh_gather_u32base_index (p0, z0, -1, SV_PLDL2STRM), ++ svprfh_gather_index (p0, z0, -1, SV_PLDL2STRM)) ++ ++/* ++** prfh_gather_0_u32base_index: ++** prfh pldl3keep, p0, \[z0\.s\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfh_gather_0_u32base_index, svuint32_t, ++ svprfh_gather_u32base_index (p0, z0, 0, SV_PLDL3KEEP), ++ svprfh_gather_index (p0, z0, 0, SV_PLDL3KEEP)) ++ ++/* ++** prfh_gather_5_u32base_index: ++** prfh pldl3strm, p0, \[z0\.s, #10\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfh_gather_5_u32base_index, svuint32_t, ++ svprfh_gather_u32base_index (p0, z0, 5, SV_PLDL3STRM), ++ svprfh_gather_index (p0, z0, 5, SV_PLDL3STRM)) ++ ++/* ++** prfh_gather_31_u32base_index: ++** prfh pstl1keep, p0, \[z0\.s, #62\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfh_gather_31_u32base_index, svuint32_t, ++ svprfh_gather_u32base_index (p0, z0, 31, SV_PSTL1KEEP), ++ svprfh_gather_index (p0, z0, 31, SV_PSTL1KEEP)) ++ ++/* ++** prfh_gather_32_u32base_index: ++** mov (x[0-9]+), #?64 ++** prfb pstl1strm, p0, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfh_gather_32_u32base_index, svuint32_t, ++ svprfh_gather_u32base_index (p0, z0, 32, SV_PSTL1STRM), ++ svprfh_gather_index (p0, z0, 32, SV_PSTL1STRM)) ++ ++/* ++** prfh_gather_x0_u64base_index: ++** lsl (x[0-9]+), x0, #?1 ++** prfb pstl2keep, p0, \[\1, z0\.d\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfh_gather_x0_u64base_index, svuint64_t, ++ svprfh_gather_u64base_index (p0, z0, x0, SV_PSTL2KEEP), ++ svprfh_gather_index (p0, z0, x0, SV_PSTL2KEEP)) ++ ++/* ++** prfh_gather_m1_u64base_index: ++** mov (x[0-9]+), #?-2 ++** prfb pstl2strm, p0, \[\1, z0\.d\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfh_gather_m1_u64base_index, svuint64_t, ++ svprfh_gather_u64base_index (p0, z0, -1, SV_PSTL2STRM), ++ svprfh_gather_index (p0, z0, -1, SV_PSTL2STRM)) ++ ++/* ++** prfh_gather_0_u64base_index: ++** prfh pstl3keep, p0, \[z0\.d\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfh_gather_0_u64base_index, svuint64_t, ++ svprfh_gather_u64base_index (p0, z0, 0, SV_PSTL3KEEP), ++ svprfh_gather_index (p0, z0, 0, SV_PSTL3KEEP)) ++ ++/* ++** prfh_gather_5_u64base_index: ++** prfh pstl3strm, p0, \[z0\.d, #10\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfh_gather_5_u64base_index, svuint64_t, ++ svprfh_gather_u64base_index (p0, z0, 5, SV_PSTL3STRM), ++ svprfh_gather_index (p0, z0, 5, SV_PSTL3STRM)) ++ ++/* ++** prfh_gather_31_u64base_index: ++** prfh pldl1keep, p0, \[z0\.d, #62\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfh_gather_31_u64base_index, svuint64_t, ++ svprfh_gather_u64base_index (p0, z0, 31, SV_PLDL1KEEP), ++ svprfh_gather_index (p0, z0, 31, SV_PLDL1KEEP)) ++ ++/* ++** prfh_gather_32_u64base_index: ++** mov (x[0-9]+), #?64 ++** prfb pldl1strm, p0, \[\1, z0\.d\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfh_gather_32_u64base_index, svuint64_t, ++ svprfh_gather_u64base_index (p0, z0, 32, SV_PLDL1STRM), ++ svprfh_gather_index (p0, z0, 32, SV_PLDL1STRM)) ++ ++/* ++** prfh_gather_x0_s32index: ++** prfh pldl2keep, p0, \[x0, z0\.s, sxtw 1\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_SZ (prfh_gather_x0_s32index, svint32_t, ++ svprfh_gather_s32index (p0, x0, z0, SV_PLDL2KEEP), ++ svprfh_gather_index (p0, x0, z0, SV_PLDL2KEEP)) ++ ++/* ++** prfh_gather_s32index: ++** prfh pldl2strm, p0, \[x0, z1\.s, sxtw 1\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_SZ (prfh_gather_s32index, svint32_t, ++ svprfh_gather_s32index (p0, x0, z1, SV_PLDL2STRM), ++ svprfh_gather_index (p0, x0, z1, SV_PLDL2STRM)) ++ ++/* ++** prfh_gather_x0_u32index: ++** prfh pldl3keep, p0, \[x0, z0\.s, uxtw 1\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_SZ (prfh_gather_x0_u32index, svuint32_t, ++ svprfh_gather_u32index (p0, x0, z0, SV_PLDL3KEEP), ++ svprfh_gather_index (p0, x0, z0, SV_PLDL3KEEP)) ++ ++/* ++** prfh_gather_u32index: ++** prfh pldl3strm, p0, \[x0, z1\.s, uxtw 1\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_SZ (prfh_gather_u32index, svuint32_t, ++ svprfh_gather_u32index (p0, x0, z1, SV_PLDL3STRM), ++ svprfh_gather_index (p0, x0, z1, SV_PLDL3STRM)) ++ ++/* ++** prfh_gather_x0_s64index: ++** prfh pstl1keep, p0, \[x0, z0\.d, lsl 1\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_SZ (prfh_gather_x0_s64index, svint64_t, ++ svprfh_gather_s64index (p0, x0, z0, SV_PSTL1KEEP), ++ svprfh_gather_index (p0, x0, z0, SV_PSTL1KEEP)) ++ ++/* ++** prfh_gather_s64index: ++** prfh pstl1strm, p0, \[x0, z1\.d, lsl 1\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_SZ (prfh_gather_s64index, svint64_t, ++ svprfh_gather_s64index (p0, x0, z1, SV_PSTL1STRM), ++ svprfh_gather_index (p0, x0, z1, SV_PSTL1STRM)) ++ ++/* ++** prfh_gather_ext_s64index: ++** prfh pstl1strm, p0, \[x0, z1\.d, sxtw 1\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_SZ (prfh_gather_ext_s64index, svint64_t, ++ svprfh_gather_s64index (p0, x0, svextw_s64_x (p0, z1), SV_PSTL1STRM), ++ svprfh_gather_index (p0, x0, svextw_x (p0, z1), SV_PSTL1STRM)) ++ ++/* ++** prfh_gather_x0_u64index: ++** prfh pstl2keep, p0, \[x0, z0\.d, lsl 1\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_SZ (prfh_gather_x0_u64index, svuint64_t, ++ svprfh_gather_u64index (p0, x0, z0, SV_PSTL2KEEP), ++ svprfh_gather_index (p0, x0, z0, SV_PSTL2KEEP)) ++ ++/* ++** prfh_gather_u64index: ++** prfh pstl2strm, p0, \[x0, z1\.d, lsl 1\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_SZ (prfh_gather_u64index, svuint64_t, ++ svprfh_gather_u64index (p0, x0, z1, SV_PSTL2STRM), ++ svprfh_gather_index (p0, x0, z1, SV_PSTL2STRM)) ++ ++/* ++** prfh_gather_ext_u64index: ++** prfh pstl2strm, p0, \[x0, z1\.d, uxtw 1\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_SZ (prfh_gather_ext_u64index, svuint64_t, ++ svprfh_gather_u64index (p0, x0, svextw_u64_x (p0, z1), SV_PSTL2STRM), ++ svprfh_gather_index (p0, x0, svextw_x (p0, z1), SV_PSTL2STRM)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfw.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfw.c +new file mode 100644 +index 000000000..bbf6a45c9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfw.c +@@ -0,0 +1,245 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** prfw_base: ++** prfw pldl1keep, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfw_base, uint8_t, ++ svprfw (p0, x0, SV_PLDL1KEEP), ++ svprfw (p0, x0, SV_PLDL1KEEP)) ++ ++/* ++** prfw_u8_index: ++** add (x[0-9+]), (x0, x1|x1, x0) ++** prfw pldl1keep, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfw_u8_index, uint8_t, ++ svprfw (p0, x0 + x1, SV_PLDL1KEEP), ++ svprfw (p0, x0 + x1, SV_PLDL1KEEP)) ++ ++/* ++** prfw_u8_1: ++** add (x[0-9+]), x0, #?1 ++** prfw pldl1keep, p0, \[\1\] ++** ret ++*/ ++TEST_PREFETCH (prfw_u8_1, uint8_t, ++ svprfw (p0, x0 + 1, SV_PLDL1KEEP), ++ svprfw (p0, x0 + 1, SV_PLDL1KEEP)) ++ ++/* ++** prfw_u16_index: ++** add (x[0-9+]), x0, x1, lsl #?1 ++** prfw pldl1keep, p0, \[\1\] ++** ret ++*/ ++TEST_PREFETCH (prfw_u16_index, uint16_t, ++ svprfw (p0, x0 + x1, SV_PLDL1KEEP), ++ svprfw (p0, x0 + x1, SV_PLDL1KEEP)) ++ ++/* ++** prfw_u16_1: ++** add (x[0-9+]), x0, #?2 ++** prfw pldl1keep, p0, \[\1\] ++** ret ++*/ ++TEST_PREFETCH (prfw_u16_1, uint16_t, ++ svprfw (p0, x0 + 1, SV_PLDL1KEEP), ++ svprfw (p0, x0 + 1, SV_PLDL1KEEP)) ++ ++/* ++** prfw_u32_index: ++** prfw pldl1keep, p0, \[x0, x1, lsl #?2\] ++** ret ++*/ ++TEST_PREFETCH (prfw_u32_index, uint32_t, ++ svprfw (p0, x0 + x1, SV_PLDL1KEEP), ++ svprfw (p0, x0 + x1, SV_PLDL1KEEP)) ++ ++/* ++** prfw_u32_1: ++** add (x[0-9+]), x0, #?4 ++** prfw pldl1keep, p0, \[\1\] ++** ret ++*/ ++TEST_PREFETCH (prfw_u32_1, uint32_t, ++ svprfw (p0, x0 + 1, SV_PLDL1KEEP), ++ svprfw (p0, x0 + 1, SV_PLDL1KEEP)) ++ ++/* ++** prfw_u64_index: ++** add (x[0-9+]), x0, x1, lsl #?3 ++** prfw pldl1keep, p0, \[\1\] ++** ret ++*/ ++TEST_PREFETCH (prfw_u64_index, uint64_t, ++ svprfw (p0, x0 + x1, SV_PLDL1KEEP), ++ svprfw (p0, x0 + x1, SV_PLDL1KEEP)) ++ ++/* ++** prfw_u64_1: ++** add (x[0-9+]), x0, #?8 ++** prfw pldl1keep, p0, \[\1\] ++** ret ++*/ ++TEST_PREFETCH (prfw_u64_1, uint64_t, ++ svprfw (p0, x0 + 1, SV_PLDL1KEEP), ++ svprfw (p0, x0 + 1, SV_PLDL1KEEP)) ++ ++/* ++** prfw_pldl1strm: ++** prfw pldl1strm, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfw_pldl1strm, uint8_t, ++ svprfw (p0, x0, SV_PLDL1STRM), ++ svprfw (p0, x0, SV_PLDL1STRM)) ++ ++/* ++** prfw_pldl2keep: ++** prfw pldl2keep, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfw_pldl2keep, uint8_t, ++ svprfw (p0, x0, SV_PLDL2KEEP), ++ svprfw (p0, x0, SV_PLDL2KEEP)) ++ ++/* ++** prfw_pldl2strm: ++** prfw pldl2strm, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfw_pldl2strm, uint8_t, ++ svprfw (p0, x0, SV_PLDL2STRM), ++ svprfw (p0, x0, SV_PLDL2STRM)) ++ ++/* ++** prfw_pldl3keep: ++** prfw pldl3keep, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfw_pldl3keep, uint8_t, ++ svprfw (p0, x0, SV_PLDL3KEEP), ++ svprfw (p0, x0, SV_PLDL3KEEP)) ++ ++/* ++** prfw_pldl3strm: ++** prfw pldl3strm, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfw_pldl3strm, uint8_t, ++ svprfw (p0, x0, SV_PLDL3STRM), ++ svprfw (p0, x0, SV_PLDL3STRM)) ++ ++/* ++** prfw_pstl1keep: ++** prfw pstl1keep, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfw_pstl1keep, uint8_t, ++ svprfw (p0, x0, SV_PSTL1KEEP), ++ svprfw (p0, x0, SV_PSTL1KEEP)) ++ ++/* ++** prfw_pstl1strm: ++** prfw pstl1strm, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfw_pstl1strm, uint8_t, ++ svprfw (p0, x0, SV_PSTL1STRM), ++ svprfw (p0, x0, SV_PSTL1STRM)) ++ ++/* ++** prfw_pstl2keep: ++** prfw pstl2keep, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfw_pstl2keep, uint8_t, ++ svprfw (p0, x0, SV_PSTL2KEEP), ++ svprfw (p0, x0, SV_PSTL2KEEP)) ++ ++/* ++** prfw_pstl2strm: ++** prfw pstl2strm, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfw_pstl2strm, uint8_t, ++ svprfw (p0, x0, SV_PSTL2STRM), ++ svprfw (p0, x0, SV_PSTL2STRM)) ++ ++/* ++** prfw_pstl3keep: ++** prfw pstl3keep, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfw_pstl3keep, uint8_t, ++ svprfw (p0, x0, SV_PSTL3KEEP), ++ svprfw (p0, x0, SV_PSTL3KEEP)) ++ ++/* ++** prfw_pstl3strm: ++** prfw pstl3strm, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfw_pstl3strm, uint8_t, ++ svprfw (p0, x0, SV_PSTL3STRM), ++ svprfw (p0, x0, SV_PSTL3STRM)) ++ ++/* ++** prfw_vnum_0: ++** prfw pldl1keep, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfw_vnum_0, uint8_t, ++ svprfw_vnum (p0, x0, 0, SV_PLDL1KEEP), ++ svprfw_vnum (p0, x0, 0, SV_PLDL1KEEP)) ++ ++/* ++** prfw_vnum_1: ++** incb x0 ++** prfw pldl1keep, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfw_vnum_1, uint16_t, ++ svprfw_vnum (p0, x0, 1, SV_PLDL1KEEP), ++ svprfw_vnum (p0, x0, 1, SV_PLDL1KEEP)) ++ ++/* ++** prfw_vnum_2: ++** incb x0, all, mul #2 ++** prfw pldl1keep, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfw_vnum_2, uint32_t, ++ svprfw_vnum (p0, x0, 2, SV_PLDL1KEEP), ++ svprfw_vnum (p0, x0, 2, SV_PLDL1KEEP)) ++ ++/* ++** prfw_vnum_3: ++** incb x0, all, mul #3 ++** prfw pldl1keep, p0, \[x0\] ++** ret ++*/ ++TEST_PREFETCH (prfw_vnum_3, uint64_t, ++ svprfw_vnum (p0, x0, 3, SV_PLDL1KEEP), ++ svprfw_vnum (p0, x0, 3, SV_PLDL1KEEP)) ++ ++/* ++** prfw_vnum_x1: ++** cntb (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** prfw pldl1keep, p0, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** prfw zldl1keep, p0, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_PREFETCH (prfw_vnum_x1, uint64_t, ++ svprfw_vnum (p0, x0, x1, SV_PLDL1KEEP), ++ svprfw_vnum (p0, x0, x1, SV_PLDL1KEEP)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfw_gather.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfw_gather.c +new file mode 100644 +index 000000000..2bbae1b9e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfw_gather.c +@@ -0,0 +1,225 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** prfw_gather_u32base: ++** prfw pldl1keep, p0, \[z0\.s\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfw_gather_u32base, svuint32_t, ++ svprfw_gather_u32base (p0, z0, SV_PLDL1KEEP), ++ svprfw_gather (p0, z0, SV_PLDL1KEEP)) ++ ++/* ++** prfw_gather_u64base: ++** prfw pldl1strm, p0, \[z0\.d\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfw_gather_u64base, svuint64_t, ++ svprfw_gather_u64base (p0, z0, SV_PLDL1STRM), ++ svprfw_gather (p0, z0, SV_PLDL1STRM)) ++ ++/* ++** prfw_gather_x0_u32base_index: ++** lsl (x[0-9]+), x0, #?2 ++** prfb pldl2keep, p0, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfw_gather_x0_u32base_index, svuint32_t, ++ svprfw_gather_u32base_index (p0, z0, x0, SV_PLDL2KEEP), ++ svprfw_gather_index (p0, z0, x0, SV_PLDL2KEEP)) ++ ++/* ++** prfw_gather_m1_u32base_index: ++** mov (x[0-9]+), #?-4 ++** prfb pldl2strm, p0, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfw_gather_m1_u32base_index, svuint32_t, ++ svprfw_gather_u32base_index (p0, z0, -1, SV_PLDL2STRM), ++ svprfw_gather_index (p0, z0, -1, SV_PLDL2STRM)) ++ ++/* ++** prfw_gather_0_u32base_index: ++** prfw pldl3keep, p0, \[z0\.s\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfw_gather_0_u32base_index, svuint32_t, ++ svprfw_gather_u32base_index (p0, z0, 0, SV_PLDL3KEEP), ++ svprfw_gather_index (p0, z0, 0, SV_PLDL3KEEP)) ++ ++/* ++** prfw_gather_5_u32base_index: ++** prfw pldl3strm, p0, \[z0\.s, #20\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfw_gather_5_u32base_index, svuint32_t, ++ svprfw_gather_u32base_index (p0, z0, 5, SV_PLDL3STRM), ++ svprfw_gather_index (p0, z0, 5, SV_PLDL3STRM)) ++ ++/* ++** prfw_gather_31_u32base_index: ++** prfw pstl1keep, p0, \[z0\.s, #124\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfw_gather_31_u32base_index, svuint32_t, ++ svprfw_gather_u32base_index (p0, z0, 31, SV_PSTL1KEEP), ++ svprfw_gather_index (p0, z0, 31, SV_PSTL1KEEP)) ++ ++/* ++** prfw_gather_32_u32base_index: ++** mov (x[0-9]+), #?128 ++** prfb pstl1strm, p0, \[\1, z0\.s, uxtw\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfw_gather_32_u32base_index, svuint32_t, ++ svprfw_gather_u32base_index (p0, z0, 32, SV_PSTL1STRM), ++ svprfw_gather_index (p0, z0, 32, SV_PSTL1STRM)) ++ ++/* ++** prfw_gather_x0_u64base_index: ++** lsl (x[0-9]+), x0, #?2 ++** prfb pstl2keep, p0, \[\1, z0\.d\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfw_gather_x0_u64base_index, svuint64_t, ++ svprfw_gather_u64base_index (p0, z0, x0, SV_PSTL2KEEP), ++ svprfw_gather_index (p0, z0, x0, SV_PSTL2KEEP)) ++ ++/* ++** prfw_gather_m1_u64base_index: ++** mov (x[0-9]+), #?-4 ++** prfb pstl2strm, p0, \[\1, z0\.d\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfw_gather_m1_u64base_index, svuint64_t, ++ svprfw_gather_u64base_index (p0, z0, -1, SV_PSTL2STRM), ++ svprfw_gather_index (p0, z0, -1, SV_PSTL2STRM)) ++ ++/* ++** prfw_gather_0_u64base_index: ++** prfw pstl3keep, p0, \[z0\.d\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfw_gather_0_u64base_index, svuint64_t, ++ svprfw_gather_u64base_index (p0, z0, 0, SV_PSTL3KEEP), ++ svprfw_gather_index (p0, z0, 0, SV_PSTL3KEEP)) ++ ++/* ++** prfw_gather_5_u64base_index: ++** prfw pstl3strm, p0, \[z0\.d, #20\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfw_gather_5_u64base_index, svuint64_t, ++ svprfw_gather_u64base_index (p0, z0, 5, SV_PSTL3STRM), ++ svprfw_gather_index (p0, z0, 5, SV_PSTL3STRM)) ++ ++/* ++** prfw_gather_31_u64base_index: ++** prfw pldl1keep, p0, \[z0\.d, #124\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfw_gather_31_u64base_index, svuint64_t, ++ svprfw_gather_u64base_index (p0, z0, 31, SV_PLDL1KEEP), ++ svprfw_gather_index (p0, z0, 31, SV_PLDL1KEEP)) ++ ++/* ++** prfw_gather_32_u64base_index: ++** mov (x[0-9]+), #?128 ++** prfb pldl1strm, p0, \[\1, z0\.d\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_ZS (prfw_gather_32_u64base_index, svuint64_t, ++ svprfw_gather_u64base_index (p0, z0, 32, SV_PLDL1STRM), ++ svprfw_gather_index (p0, z0, 32, SV_PLDL1STRM)) ++ ++/* ++** prfw_gather_x0_s32index: ++** prfw pldl2keep, p0, \[x0, z0\.s, sxtw 2\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_SZ (prfw_gather_x0_s32index, svint32_t, ++ svprfw_gather_s32index (p0, x0, z0, SV_PLDL2KEEP), ++ svprfw_gather_index (p0, x0, z0, SV_PLDL2KEEP)) ++ ++/* ++** prfw_gather_s32index: ++** prfw pldl2strm, p0, \[x0, z1\.s, sxtw 2\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_SZ (prfw_gather_s32index, svint32_t, ++ svprfw_gather_s32index (p0, x0, z1, SV_PLDL2STRM), ++ svprfw_gather_index (p0, x0, z1, SV_PLDL2STRM)) ++ ++/* ++** prfw_gather_x0_u32index: ++** prfw pldl3keep, p0, \[x0, z0\.s, uxtw 2\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_SZ (prfw_gather_x0_u32index, svuint32_t, ++ svprfw_gather_u32index (p0, x0, z0, SV_PLDL3KEEP), ++ svprfw_gather_index (p0, x0, z0, SV_PLDL3KEEP)) ++ ++/* ++** prfw_gather_u32index: ++** prfw pldl3strm, p0, \[x0, z1\.s, uxtw 2\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_SZ (prfw_gather_u32index, svuint32_t, ++ svprfw_gather_u32index (p0, x0, z1, SV_PLDL3STRM), ++ svprfw_gather_index (p0, x0, z1, SV_PLDL3STRM)) ++ ++/* ++** prfw_gather_x0_s64index: ++** prfw pstl1keep, p0, \[x0, z0\.d, lsl 2\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_SZ (prfw_gather_x0_s64index, svint64_t, ++ svprfw_gather_s64index (p0, x0, z0, SV_PSTL1KEEP), ++ svprfw_gather_index (p0, x0, z0, SV_PSTL1KEEP)) ++ ++/* ++** prfw_gather_s64index: ++** prfw pstl1strm, p0, \[x0, z1\.d, lsl 2\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_SZ (prfw_gather_s64index, svint64_t, ++ svprfw_gather_s64index (p0, x0, z1, SV_PSTL1STRM), ++ svprfw_gather_index (p0, x0, z1, SV_PSTL1STRM)) ++ ++/* ++** prfw_gather_ext_s64index: ++** prfw pstl1strm, p0, \[x0, z1\.d, sxtw 2\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_SZ (prfw_gather_ext_s64index, svint64_t, ++ svprfw_gather_s64index (p0, x0, svextw_s64_x (p0, z1), SV_PSTL1STRM), ++ svprfw_gather_index (p0, x0, svextw_x (p0, z1), SV_PSTL1STRM)) ++ ++/* ++** prfw_gather_x0_u64index: ++** prfw pstl2keep, p0, \[x0, z0\.d, lsl 2\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_SZ (prfw_gather_x0_u64index, svuint64_t, ++ svprfw_gather_u64index (p0, x0, z0, SV_PSTL2KEEP), ++ svprfw_gather_index (p0, x0, z0, SV_PSTL2KEEP)) ++ ++/* ++** prfw_gather_u64index: ++** prfw pstl2strm, p0, \[x0, z1\.d, lsl 2\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_SZ (prfw_gather_u64index, svuint64_t, ++ svprfw_gather_u64index (p0, x0, z1, SV_PSTL2STRM), ++ svprfw_gather_index (p0, x0, z1, SV_PSTL2STRM)) ++ ++/* ++** prfw_gather_ext_u64index: ++** prfw pstl2strm, p0, \[x0, z1\.d, uxtw 2\] ++** ret ++*/ ++TEST_PREFETCH_GATHER_SZ (prfw_gather_ext_u64index, svuint64_t, ++ svprfw_gather_u64index (p0, x0, svextw_u64_x (p0, z1), SV_PSTL2STRM), ++ svprfw_gather_index (p0, x0, svextw_x (p0, z1), SV_PSTL2STRM)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ptest_any.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ptest_any.c +new file mode 100644 +index 000000000..33280d388 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ptest_any.c +@@ -0,0 +1,77 @@ ++/* { dg-additional-options "-msve-vector-bits=scalable" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++#include ++ ++/* ++** test_bool_any: ++** ptest p0, p1\.b ++** cset [wx]0, any ++** ret ++*/ ++TEST_PTEST (test_bool_any, bool, ++ x0 = svptest_any (p0, p1)); ++ ++/* ++** test_bool_none: ++** ptest p0, p1\.b ++** cset [wx]0, none ++** ret ++*/ ++TEST_PTEST (test_bool_none, bool, ++ x0 = !svptest_any (p0, p1)); ++ ++/* ++** test_int_any: ++** ptest p0, p1\.b ++** cset [wx]0, any ++** ret ++*/ ++TEST_PTEST (test_int_any, int, ++ x0 = svptest_any (p0, p1)); ++ ++/* ++** test_int_none: ++** ptest p0, p1\.b ++** cset [wx]0, none ++** ret ++*/ ++TEST_PTEST (test_int_none, int, ++ x0 = !svptest_any (p0, p1)); ++ ++/* ++** test_int64_t_any: ++** ptest p0, p1\.b ++** cset [wx]0, any ++** ret ++*/ ++TEST_PTEST (test_int64_t_any, int64_t, ++ x0 = svptest_any (p0, p1)); ++ ++/* ++** test_int64_t_none: ++** ptest p0, p1\.b ++** cset [wx]0, none ++** ret ++*/ ++TEST_PTEST (test_int64_t_none, int64_t, ++ x0 = !svptest_any (p0, p1)); ++ ++/* ++** sel_any: ++** ptest p0, p1\.b ++** csel x0, (x0, x1, any|x1, x0, none) ++** ret ++*/ ++TEST_PTEST (sel_any, int64_t, ++ x0 = svptest_any (p0, p1) ? x0 : x1); ++ ++/* ++** sel_none: ++** ptest p0, p1\.b ++** csel x0, (x0, x1, none|x1, x0, any) ++** ret ++*/ ++TEST_PTEST (sel_none, int64_t, ++ x0 = !svptest_any (p0, p1) ? x0 : x1); +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ptest_first.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ptest_first.c +new file mode 100644 +index 000000000..991dabd3d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ptest_first.c +@@ -0,0 +1,77 @@ ++/* { dg-additional-options "-msve-vector-bits=scalable" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++#include ++ ++/* ++** test_bool_first: ++** ptest p0, p1\.b ++** cset [wx]0, first ++** ret ++*/ ++TEST_PTEST (test_bool_first, bool, ++ x0 = svptest_first (p0, p1)); ++ ++/* ++** test_bool_nfrst: ++** ptest p0, p1\.b ++** cset [wx]0, nfrst ++** ret ++*/ ++TEST_PTEST (test_bool_nfrst, bool, ++ x0 = !svptest_first (p0, p1)); ++ ++/* ++** test_int_first: ++** ptest p0, p1\.b ++** cset [wx]0, first ++** ret ++*/ ++TEST_PTEST (test_int_first, int, ++ x0 = svptest_first (p0, p1)); ++ ++/* ++** test_int_nfrst: ++** ptest p0, p1\.b ++** cset [wx]0, nfrst ++** ret ++*/ ++TEST_PTEST (test_int_nfrst, int, ++ x0 = !svptest_first (p0, p1)); ++ ++/* ++** test_int64_t_first: ++** ptest p0, p1\.b ++** cset [wx]0, first ++** ret ++*/ ++TEST_PTEST (test_int64_t_first, int64_t, ++ x0 = svptest_first (p0, p1)); ++ ++/* ++** test_int64_t_nfrst: ++** ptest p0, p1\.b ++** cset [wx]0, nfrst ++** ret ++*/ ++TEST_PTEST (test_int64_t_nfrst, int64_t, ++ x0 = !svptest_first (p0, p1)); ++ ++/* ++** sel_first: ++** ptest p0, p1\.b ++** csel x0, (x0, x1, first|x1, x0, nfrst) ++** ret ++*/ ++TEST_PTEST (sel_first, int64_t, ++ x0 = svptest_first (p0, p1) ? x0 : x1); ++ ++/* ++** sel_nfrst: ++** ptest p0, p1\.b ++** csel x0, (x0, x1, nfrst|x1, x0, first) ++** ret ++*/ ++TEST_PTEST (sel_nfrst, int64_t, ++ x0 = !svptest_first (p0, p1) ? x0 : x1); +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ptest_last.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ptest_last.c +new file mode 100644 +index 000000000..b952a4149 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ptest_last.c +@@ -0,0 +1,77 @@ ++/* { dg-additional-options "-msve-vector-bits=scalable" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++#include ++ ++/* ++** test_bool_last: ++** ptest p0, p1\.b ++** cset [wx]0, last ++** ret ++*/ ++TEST_PTEST (test_bool_last, bool, ++ x0 = svptest_last (p0, p1)); ++ ++/* ++** test_bool_nlast: ++** ptest p0, p1\.b ++** cset [wx]0, nlast ++** ret ++*/ ++TEST_PTEST (test_bool_nlast, bool, ++ x0 = !svptest_last (p0, p1)); ++ ++/* ++** test_int_last: ++** ptest p0, p1\.b ++** cset [wx]0, last ++** ret ++*/ ++TEST_PTEST (test_int_last, int, ++ x0 = svptest_last (p0, p1)); ++ ++/* ++** test_int_nlast: ++** ptest p0, p1\.b ++** cset [wx]0, nlast ++** ret ++*/ ++TEST_PTEST (test_int_nlast, int, ++ x0 = !svptest_last (p0, p1)); ++ ++/* ++** test_int64_t_last: ++** ptest p0, p1\.b ++** cset [wx]0, last ++** ret ++*/ ++TEST_PTEST (test_int64_t_last, int64_t, ++ x0 = svptest_last (p0, p1)); ++ ++/* ++** test_int64_t_nlast: ++** ptest p0, p1\.b ++** cset [wx]0, nlast ++** ret ++*/ ++TEST_PTEST (test_int64_t_nlast, int64_t, ++ x0 = !svptest_last (p0, p1)); ++ ++/* ++** sel_last: ++** ptest p0, p1\.b ++** csel x0, (x0, x1, last|x1, x0, nlast) ++** ret ++*/ ++TEST_PTEST (sel_last, int64_t, ++ x0 = svptest_last (p0, p1) ? x0 : x1); ++ ++/* ++** sel_nlast: ++** ptest p0, p1\.b ++** csel x0, (x0, x1, nlast|x1, x0, last) ++** ret ++*/ ++TEST_PTEST (sel_nlast, int64_t, ++ x0 = !svptest_last (p0, p1) ? x0 : x1); +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ptrue.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ptrue.c +new file mode 100644 +index 000000000..9c86170cb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ptrue.c +@@ -0,0 +1,40 @@ ++/* { dg-additional-options "-msve-vector-bits=scalable" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ptrue_b8: ++** ptrue p0\.b, all ++** ret ++*/ ++TEST_P (ptrue_b8, ++ p0 = svptrue_b8 (), ++ p0 = svptrue_b8 ()); ++ ++/* ++** ptrue_b16: ++** ptrue p0\.h, all ++** ret ++*/ ++TEST_P (ptrue_b16, ++ p0 = svptrue_b16 (), ++ p0 = svptrue_b16 ()); ++ ++/* ++** ptrue_b32: ++** ptrue p0\.s, all ++** ret ++*/ ++TEST_P (ptrue_b32, ++ p0 = svptrue_b32 (), ++ p0 = svptrue_b32 ()); ++ ++/* ++** ptrue_b64: ++** ptrue p0\.d, all ++** ret ++*/ ++TEST_P (ptrue_b64, ++ p0 = svptrue_b64 (), ++ p0 = svptrue_b64 ()); +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ptrue_pat_b16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ptrue_pat_b16.c +new file mode 100644 +index 000000000..d7f83f5c6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ptrue_pat_b16.c +@@ -0,0 +1,156 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ptrue_pat_pow2_b16: ++** ptrue p0\.h, pow2 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_pow2_b16, ++ p0 = svptrue_pat_b16 (SV_POW2), ++ p0 = svptrue_pat_b16 (SV_POW2)) ++ ++/* ++** ptrue_pat_vl1_b16: ++** ptrue p0\.[bhsd], vl1 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_vl1_b16, ++ p0 = svptrue_pat_b16 (SV_VL1), ++ p0 = svptrue_pat_b16 (SV_VL1)) ++ ++/* ++** ptrue_pat_vl2_b16: ++** ptrue p0\.h, vl2 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_vl2_b16, ++ p0 = svptrue_pat_b16 (SV_VL2), ++ p0 = svptrue_pat_b16 (SV_VL2)) ++ ++/* ++** ptrue_pat_vl3_b16: ++** ptrue p0\.h, vl3 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_vl3_b16, ++ p0 = svptrue_pat_b16 (SV_VL3), ++ p0 = svptrue_pat_b16 (SV_VL3)) ++ ++/* ++** ptrue_pat_vl4_b16: ++** ptrue p0\.h, vl4 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_vl4_b16, ++ p0 = svptrue_pat_b16 (SV_VL4), ++ p0 = svptrue_pat_b16 (SV_VL4)) ++ ++/* ++** ptrue_pat_vl5_b16: ++** ptrue p0\.h, vl5 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_vl5_b16, ++ p0 = svptrue_pat_b16 (SV_VL5), ++ p0 = svptrue_pat_b16 (SV_VL5)) ++ ++/* ++** ptrue_pat_vl6_b16: ++** ptrue p0\.h, vl6 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_vl6_b16, ++ p0 = svptrue_pat_b16 (SV_VL6), ++ p0 = svptrue_pat_b16 (SV_VL6)) ++ ++/* ++** ptrue_pat_vl7_b16: ++** ptrue p0\.h, vl7 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_vl7_b16, ++ p0 = svptrue_pat_b16 (SV_VL7), ++ p0 = svptrue_pat_b16 (SV_VL7)) ++ ++/* ++** ptrue_pat_vl8_b16: ++** ptrue p0\.h, vl8 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_vl8_b16, ++ p0 = svptrue_pat_b16 (SV_VL8), ++ p0 = svptrue_pat_b16 (SV_VL8)) ++ ++/* ++** ptrue_pat_vl16_b16: ++** ptrue p0\.[bhsd], vl16 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_vl16_b16, ++ p0 = svptrue_pat_b16 (SV_VL16), ++ p0 = svptrue_pat_b16 (SV_VL16)) ++ ++/* ++** ptrue_pat_vl32_b16: ++** ptrue p0\.h, vl32 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_vl32_b16, ++ p0 = svptrue_pat_b16 (SV_VL32), ++ p0 = svptrue_pat_b16 (SV_VL32)) ++ ++/* ++** ptrue_pat_vl64_b16: ++** ptrue p0\.h, vl64 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_vl64_b16, ++ p0 = svptrue_pat_b16 (SV_VL64), ++ p0 = svptrue_pat_b16 (SV_VL64)) ++ ++/* ++** ptrue_pat_vl128_b16: ++** ptrue p0\.[bhsd], vl128 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_vl128_b16, ++ p0 = svptrue_pat_b16 (SV_VL128), ++ p0 = svptrue_pat_b16 (SV_VL128)) ++ ++/* ++** ptrue_pat_vl256_b16: ++** ptrue p0\.h, vl256 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_vl256_b16, ++ p0 = svptrue_pat_b16 (SV_VL256), ++ p0 = svptrue_pat_b16 (SV_VL256)) ++ ++/* ++** ptrue_pat_mul4_b16: ++** ptrue p0\.h, mul4 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_mul4_b16, ++ p0 = svptrue_pat_b16 (SV_MUL4), ++ p0 = svptrue_pat_b16 (SV_MUL4)) ++ ++/* ++** ptrue_pat_mul3_b16: ++** ptrue p0\.h, mul3 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_mul3_b16, ++ p0 = svptrue_pat_b16 (SV_MUL3), ++ p0 = svptrue_pat_b16 (SV_MUL3)) ++ ++/* ++** ptrue_pat_all_b16: ++** ptrue p0\.h[^\n]* ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_all_b16, ++ p0 = svptrue_pat_b16 (SV_ALL), ++ p0 = svptrue_pat_b16 (SV_ALL)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ptrue_pat_b32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ptrue_pat_b32.c +new file mode 100644 +index 000000000..11cf5aebb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ptrue_pat_b32.c +@@ -0,0 +1,156 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ptrue_pat_pow2_b32: ++** ptrue p0\.s, pow2 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_pow2_b32, ++ p0 = svptrue_pat_b32 (SV_POW2), ++ p0 = svptrue_pat_b32 (SV_POW2)) ++ ++/* ++** ptrue_pat_vl1_b32: ++** ptrue p0\.[bhsd], vl1 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_vl1_b32, ++ p0 = svptrue_pat_b32 (SV_VL1), ++ p0 = svptrue_pat_b32 (SV_VL1)) ++ ++/* ++** ptrue_pat_vl2_b32: ++** ptrue p0\.s, vl2 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_vl2_b32, ++ p0 = svptrue_pat_b32 (SV_VL2), ++ p0 = svptrue_pat_b32 (SV_VL2)) ++ ++/* ++** ptrue_pat_vl3_b32: ++** ptrue p0\.s, vl3 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_vl3_b32, ++ p0 = svptrue_pat_b32 (SV_VL3), ++ p0 = svptrue_pat_b32 (SV_VL3)) ++ ++/* ++** ptrue_pat_vl4_b32: ++** ptrue p0\.s, vl4 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_vl4_b32, ++ p0 = svptrue_pat_b32 (SV_VL4), ++ p0 = svptrue_pat_b32 (SV_VL4)) ++ ++/* ++** ptrue_pat_vl5_b32: ++** ptrue p0\.s, vl5 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_vl5_b32, ++ p0 = svptrue_pat_b32 (SV_VL5), ++ p0 = svptrue_pat_b32 (SV_VL5)) ++ ++/* ++** ptrue_pat_vl6_b32: ++** ptrue p0\.s, vl6 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_vl6_b32, ++ p0 = svptrue_pat_b32 (SV_VL6), ++ p0 = svptrue_pat_b32 (SV_VL6)) ++ ++/* ++** ptrue_pat_vl7_b32: ++** ptrue p0\.s, vl7 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_vl7_b32, ++ p0 = svptrue_pat_b32 (SV_VL7), ++ p0 = svptrue_pat_b32 (SV_VL7)) ++ ++/* ++** ptrue_pat_vl8_b32: ++** ptrue p0\.s, vl8 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_vl8_b32, ++ p0 = svptrue_pat_b32 (SV_VL8), ++ p0 = svptrue_pat_b32 (SV_VL8)) ++ ++/* ++** ptrue_pat_vl16_b32: ++** ptrue p0\.[bhsd], vl16 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_vl16_b32, ++ p0 = svptrue_pat_b32 (SV_VL16), ++ p0 = svptrue_pat_b32 (SV_VL16)) ++ ++/* ++** ptrue_pat_vl32_b32: ++** ptrue p0\.s, vl32 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_vl32_b32, ++ p0 = svptrue_pat_b32 (SV_VL32), ++ p0 = svptrue_pat_b32 (SV_VL32)) ++ ++/* ++** ptrue_pat_vl64_b32: ++** ptrue p0\.s, vl64 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_vl64_b32, ++ p0 = svptrue_pat_b32 (SV_VL64), ++ p0 = svptrue_pat_b32 (SV_VL64)) ++ ++/* ++** ptrue_pat_vl128_b32: ++** ptrue p0\.[bhsd], vl128 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_vl128_b32, ++ p0 = svptrue_pat_b32 (SV_VL128), ++ p0 = svptrue_pat_b32 (SV_VL128)) ++ ++/* ++** ptrue_pat_vl256_b32: ++** ptrue p0\.s, vl256 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_vl256_b32, ++ p0 = svptrue_pat_b32 (SV_VL256), ++ p0 = svptrue_pat_b32 (SV_VL256)) ++ ++/* ++** ptrue_pat_mul4_b32: ++** ptrue p0\.s, mul4 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_mul4_b32, ++ p0 = svptrue_pat_b32 (SV_MUL4), ++ p0 = svptrue_pat_b32 (SV_MUL4)) ++ ++/* ++** ptrue_pat_mul3_b32: ++** ptrue p0\.s, mul3 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_mul3_b32, ++ p0 = svptrue_pat_b32 (SV_MUL3), ++ p0 = svptrue_pat_b32 (SV_MUL3)) ++ ++/* ++** ptrue_pat_all_b32: ++** ptrue p0\.s[^\n]* ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_all_b32, ++ p0 = svptrue_pat_b32 (SV_ALL), ++ p0 = svptrue_pat_b32 (SV_ALL)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ptrue_pat_b64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ptrue_pat_b64.c +new file mode 100644 +index 000000000..4c4202bb3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ptrue_pat_b64.c +@@ -0,0 +1,156 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ptrue_pat_pow2_b64: ++** ptrue p0\.d, pow2 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_pow2_b64, ++ p0 = svptrue_pat_b64 (SV_POW2), ++ p0 = svptrue_pat_b64 (SV_POW2)) ++ ++/* ++** ptrue_pat_vl1_b64: ++** ptrue p0\.[bhsd], vl1 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_vl1_b64, ++ p0 = svptrue_pat_b64 (SV_VL1), ++ p0 = svptrue_pat_b64 (SV_VL1)) ++ ++/* ++** ptrue_pat_vl2_b64: ++** ptrue p0\.d, vl2 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_vl2_b64, ++ p0 = svptrue_pat_b64 (SV_VL2), ++ p0 = svptrue_pat_b64 (SV_VL2)) ++ ++/* ++** ptrue_pat_vl3_b64: ++** ptrue p0\.d, vl3 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_vl3_b64, ++ p0 = svptrue_pat_b64 (SV_VL3), ++ p0 = svptrue_pat_b64 (SV_VL3)) ++ ++/* ++** ptrue_pat_vl4_b64: ++** ptrue p0\.d, vl4 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_vl4_b64, ++ p0 = svptrue_pat_b64 (SV_VL4), ++ p0 = svptrue_pat_b64 (SV_VL4)) ++ ++/* ++** ptrue_pat_vl5_b64: ++** ptrue p0\.d, vl5 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_vl5_b64, ++ p0 = svptrue_pat_b64 (SV_VL5), ++ p0 = svptrue_pat_b64 (SV_VL5)) ++ ++/* ++** ptrue_pat_vl6_b64: ++** ptrue p0\.d, vl6 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_vl6_b64, ++ p0 = svptrue_pat_b64 (SV_VL6), ++ p0 = svptrue_pat_b64 (SV_VL6)) ++ ++/* ++** ptrue_pat_vl7_b64: ++** ptrue p0\.d, vl7 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_vl7_b64, ++ p0 = svptrue_pat_b64 (SV_VL7), ++ p0 = svptrue_pat_b64 (SV_VL7)) ++ ++/* ++** ptrue_pat_vl8_b64: ++** ptrue p0\.d, vl8 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_vl8_b64, ++ p0 = svptrue_pat_b64 (SV_VL8), ++ p0 = svptrue_pat_b64 (SV_VL8)) ++ ++/* ++** ptrue_pat_vl16_b64: ++** ptrue p0\.[bhsd], vl16 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_vl16_b64, ++ p0 = svptrue_pat_b64 (SV_VL16), ++ p0 = svptrue_pat_b64 (SV_VL16)) ++ ++/* ++** ptrue_pat_vl32_b64: ++** ptrue p0\.d, vl32 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_vl32_b64, ++ p0 = svptrue_pat_b64 (SV_VL32), ++ p0 = svptrue_pat_b64 (SV_VL32)) ++ ++/* ++** ptrue_pat_vl64_b64: ++** ptrue p0\.d, vl64 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_vl64_b64, ++ p0 = svptrue_pat_b64 (SV_VL64), ++ p0 = svptrue_pat_b64 (SV_VL64)) ++ ++/* ++** ptrue_pat_vl128_b64: ++** ptrue p0\.[bhsd], vl128 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_vl128_b64, ++ p0 = svptrue_pat_b64 (SV_VL128), ++ p0 = svptrue_pat_b64 (SV_VL128)) ++ ++/* ++** ptrue_pat_vl256_b64: ++** ptrue p0\.d, vl256 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_vl256_b64, ++ p0 = svptrue_pat_b64 (SV_VL256), ++ p0 = svptrue_pat_b64 (SV_VL256)) ++ ++/* ++** ptrue_pat_mul4_b64: ++** ptrue p0\.d, mul4 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_mul4_b64, ++ p0 = svptrue_pat_b64 (SV_MUL4), ++ p0 = svptrue_pat_b64 (SV_MUL4)) ++ ++/* ++** ptrue_pat_mul3_b64: ++** ptrue p0\.d, mul3 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_mul3_b64, ++ p0 = svptrue_pat_b64 (SV_MUL3), ++ p0 = svptrue_pat_b64 (SV_MUL3)) ++ ++/* ++** ptrue_pat_all_b64: ++** ptrue p0\.d[^\n]* ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_all_b64, ++ p0 = svptrue_pat_b64 (SV_ALL), ++ p0 = svptrue_pat_b64 (SV_ALL)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ptrue_pat_b8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ptrue_pat_b8.c +new file mode 100644 +index 000000000..49fb8c555 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ptrue_pat_b8.c +@@ -0,0 +1,156 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** ptrue_pat_pow2_b8: ++** ptrue p0\.b, pow2 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_pow2_b8, ++ p0 = svptrue_pat_b8 (SV_POW2), ++ p0 = svptrue_pat_b8 (SV_POW2)) ++ ++/* ++** ptrue_pat_vl1_b8: ++** ptrue p0\.[bhsd], vl1 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_vl1_b8, ++ p0 = svptrue_pat_b8 (SV_VL1), ++ p0 = svptrue_pat_b8 (SV_VL1)) ++ ++/* ++** ptrue_pat_vl2_b8: ++** ptrue p0\.b, vl2 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_vl2_b8, ++ p0 = svptrue_pat_b8 (SV_VL2), ++ p0 = svptrue_pat_b8 (SV_VL2)) ++ ++/* ++** ptrue_pat_vl3_b8: ++** ptrue p0\.b, vl3 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_vl3_b8, ++ p0 = svptrue_pat_b8 (SV_VL3), ++ p0 = svptrue_pat_b8 (SV_VL3)) ++ ++/* ++** ptrue_pat_vl4_b8: ++** ptrue p0\.b, vl4 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_vl4_b8, ++ p0 = svptrue_pat_b8 (SV_VL4), ++ p0 = svptrue_pat_b8 (SV_VL4)) ++ ++/* ++** ptrue_pat_vl5_b8: ++** ptrue p0\.b, vl5 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_vl5_b8, ++ p0 = svptrue_pat_b8 (SV_VL5), ++ p0 = svptrue_pat_b8 (SV_VL5)) ++ ++/* ++** ptrue_pat_vl6_b8: ++** ptrue p0\.b, vl6 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_vl6_b8, ++ p0 = svptrue_pat_b8 (SV_VL6), ++ p0 = svptrue_pat_b8 (SV_VL6)) ++ ++/* ++** ptrue_pat_vl7_b8: ++** ptrue p0\.b, vl7 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_vl7_b8, ++ p0 = svptrue_pat_b8 (SV_VL7), ++ p0 = svptrue_pat_b8 (SV_VL7)) ++ ++/* ++** ptrue_pat_vl8_b8: ++** ptrue p0\.b, vl8 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_vl8_b8, ++ p0 = svptrue_pat_b8 (SV_VL8), ++ p0 = svptrue_pat_b8 (SV_VL8)) ++ ++/* ++** ptrue_pat_vl16_b8: ++** ptrue p0\.[bhsd], vl16 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_vl16_b8, ++ p0 = svptrue_pat_b8 (SV_VL16), ++ p0 = svptrue_pat_b8 (SV_VL16)) ++ ++/* ++** ptrue_pat_vl32_b8: ++** ptrue p0\.b, vl32 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_vl32_b8, ++ p0 = svptrue_pat_b8 (SV_VL32), ++ p0 = svptrue_pat_b8 (SV_VL32)) ++ ++/* ++** ptrue_pat_vl64_b8: ++** ptrue p0\.b, vl64 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_vl64_b8, ++ p0 = svptrue_pat_b8 (SV_VL64), ++ p0 = svptrue_pat_b8 (SV_VL64)) ++ ++/* ++** ptrue_pat_vl128_b8: ++** ptrue p0\.[bhsd], vl128 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_vl128_b8, ++ p0 = svptrue_pat_b8 (SV_VL128), ++ p0 = svptrue_pat_b8 (SV_VL128)) ++ ++/* ++** ptrue_pat_vl256_b8: ++** ptrue p0\.b, vl256 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_vl256_b8, ++ p0 = svptrue_pat_b8 (SV_VL256), ++ p0 = svptrue_pat_b8 (SV_VL256)) ++ ++/* ++** ptrue_pat_mul4_b8: ++** ptrue p0\.b, mul4 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_mul4_b8, ++ p0 = svptrue_pat_b8 (SV_MUL4), ++ p0 = svptrue_pat_b8 (SV_MUL4)) ++ ++/* ++** ptrue_pat_mul3_b8: ++** ptrue p0\.b, mul3 ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_mul3_b8, ++ p0 = svptrue_pat_b8 (SV_MUL3), ++ p0 = svptrue_pat_b8 (SV_MUL3)) ++ ++/* ++** ptrue_pat_all_b8: ++** ptrue p0\.b[^\n]* ++** ret ++*/ ++TEST_UNIFORM_P (ptrue_pat_all_b8, ++ p0 = svptrue_pat_b8 (SV_ALL), ++ p0 = svptrue_pat_b8 (SV_ALL)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qadd_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qadd_s16.c +new file mode 100644 +index 000000000..03255c41c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qadd_s16.c +@@ -0,0 +1,123 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qadd_s16_tied1: ++** sqadd z0\.h, (z0\.h, z1\.h|z1\.h, z0\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_s16_tied1, svint16_t, ++ z0 = svqadd_s16 (z0, z1), ++ z0 = svqadd (z0, z1)) ++ ++/* ++** qadd_s16_tied2: ++** sqadd z0\.h, (z0\.h, z1\.h|z1\.h, z0\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_s16_tied2, svint16_t, ++ z0 = svqadd_s16 (z1, z0), ++ z0 = svqadd (z1, z0)) ++ ++/* ++** qadd_s16_untied: ++** sqadd z0\.h, (z1\.h, z2\.h|z2\.h, z1\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_s16_untied, svint16_t, ++ z0 = svqadd_s16 (z1, z2), ++ z0 = svqadd (z1, z2)) ++ ++/* ++** qadd_w0_s16_tied1: ++** mov (z[0-9]+\.h), w0 ++** sqadd z0\.h, (z0\.h, \1|\1, z0\.h) ++** ret ++*/ ++TEST_UNIFORM_ZX (qadd_w0_s16_tied1, svint16_t, int16_t, ++ z0 = svqadd_n_s16 (z0, x0), ++ z0 = svqadd (z0, x0)) ++ ++/* ++** qadd_w0_s16_untied: ++** mov (z[0-9]+\.h), w0 ++** sqadd z0\.h, (z1\.h, \1|\1, z1\.h) ++** ret ++*/ ++TEST_UNIFORM_ZX (qadd_w0_s16_untied, svint16_t, int16_t, ++ z0 = svqadd_n_s16 (z1, x0), ++ z0 = svqadd (z1, x0)) ++ ++/* ++** qadd_1_s16_tied1: ++** sqadd z0\.h, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_1_s16_tied1, svint16_t, ++ z0 = svqadd_n_s16 (z0, 1), ++ z0 = svqadd (z0, 1)) ++ ++/* ++** qadd_1_s16_untied: ++** movprfx z0, z1 ++** sqadd z0\.h, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_1_s16_untied, svint16_t, ++ z0 = svqadd_n_s16 (z1, 1), ++ z0 = svqadd (z1, 1)) ++ ++/* ++** qadd_127_s16: ++** sqadd z0\.h, z0\.h, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_127_s16, svint16_t, ++ z0 = svqadd_n_s16 (z0, 127), ++ z0 = svqadd (z0, 127)) ++ ++/* ++** qadd_128_s16: ++** sqadd z0\.h, z0\.h, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_128_s16, svint16_t, ++ z0 = svqadd_n_s16 (z0, 128), ++ z0 = svqadd (z0, 128)) ++ ++/* ++** qadd_255_s16: ++** sqadd z0\.h, z0\.h, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_255_s16, svint16_t, ++ z0 = svqadd_n_s16 (z0, 255), ++ z0 = svqadd (z0, 255)) ++ ++/* ++** qadd_m1_s16: ++** sqsub z0\.h, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_m1_s16, svint16_t, ++ z0 = svqadd_n_s16 (z0, -1), ++ z0 = svqadd (z0, -1)) ++ ++/* ++** qadd_m127_s16: ++** sqsub z0\.h, z0\.h, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_m127_s16, svint16_t, ++ z0 = svqadd_n_s16 (z0, -127), ++ z0 = svqadd (z0, -127)) ++ ++/* ++** qadd_m128_s16: ++** sqsub z0\.h, z0\.h, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_m128_s16, svint16_t, ++ z0 = svqadd_n_s16 (z0, -128), ++ z0 = svqadd (z0, -128)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qadd_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qadd_s32.c +new file mode 100644 +index 000000000..197cc3840 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qadd_s32.c +@@ -0,0 +1,123 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qadd_s32_tied1: ++** sqadd z0\.s, (z0\.s, z1\.s|z1\.s, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_s32_tied1, svint32_t, ++ z0 = svqadd_s32 (z0, z1), ++ z0 = svqadd (z0, z1)) ++ ++/* ++** qadd_s32_tied2: ++** sqadd z0\.s, (z0\.s, z1\.s|z1\.s, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_s32_tied2, svint32_t, ++ z0 = svqadd_s32 (z1, z0), ++ z0 = svqadd (z1, z0)) ++ ++/* ++** qadd_s32_untied: ++** sqadd z0\.s, (z1\.s, z2\.s|z2\.s, z1\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_s32_untied, svint32_t, ++ z0 = svqadd_s32 (z1, z2), ++ z0 = svqadd (z1, z2)) ++ ++/* ++** qadd_w0_s32_tied1: ++** mov (z[0-9]+\.s), w0 ++** sqadd z0\.s, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_ZX (qadd_w0_s32_tied1, svint32_t, int32_t, ++ z0 = svqadd_n_s32 (z0, x0), ++ z0 = svqadd (z0, x0)) ++ ++/* ++** qadd_w0_s32_untied: ++** mov (z[0-9]+\.s), w0 ++** sqadd z0\.s, (z1\.s, \1|\1, z1\.s) ++** ret ++*/ ++TEST_UNIFORM_ZX (qadd_w0_s32_untied, svint32_t, int32_t, ++ z0 = svqadd_n_s32 (z1, x0), ++ z0 = svqadd (z1, x0)) ++ ++/* ++** qadd_1_s32_tied1: ++** sqadd z0\.s, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_1_s32_tied1, svint32_t, ++ z0 = svqadd_n_s32 (z0, 1), ++ z0 = svqadd (z0, 1)) ++ ++/* ++** qadd_1_s32_untied: ++** movprfx z0, z1 ++** sqadd z0\.s, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_1_s32_untied, svint32_t, ++ z0 = svqadd_n_s32 (z1, 1), ++ z0 = svqadd (z1, 1)) ++ ++/* ++** qadd_127_s32: ++** sqadd z0\.s, z0\.s, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_127_s32, svint32_t, ++ z0 = svqadd_n_s32 (z0, 127), ++ z0 = svqadd (z0, 127)) ++ ++/* ++** qadd_128_s32: ++** sqadd z0\.s, z0\.s, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_128_s32, svint32_t, ++ z0 = svqadd_n_s32 (z0, 128), ++ z0 = svqadd (z0, 128)) ++ ++/* ++** qadd_255_s32: ++** sqadd z0\.s, z0\.s, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_255_s32, svint32_t, ++ z0 = svqadd_n_s32 (z0, 255), ++ z0 = svqadd (z0, 255)) ++ ++/* ++** qadd_m1_s32: ++** sqsub z0\.s, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_m1_s32, svint32_t, ++ z0 = svqadd_n_s32 (z0, -1), ++ z0 = svqadd (z0, -1)) ++ ++/* ++** qadd_m127_s32: ++** sqsub z0\.s, z0\.s, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_m127_s32, svint32_t, ++ z0 = svqadd_n_s32 (z0, -127), ++ z0 = svqadd (z0, -127)) ++ ++/* ++** qadd_m128_s32: ++** sqsub z0\.s, z0\.s, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_m128_s32, svint32_t, ++ z0 = svqadd_n_s32 (z0, -128), ++ z0 = svqadd (z0, -128)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qadd_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qadd_s64.c +new file mode 100644 +index 000000000..0218866ea +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qadd_s64.c +@@ -0,0 +1,123 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qadd_s64_tied1: ++** sqadd z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_s64_tied1, svint64_t, ++ z0 = svqadd_s64 (z0, z1), ++ z0 = svqadd (z0, z1)) ++ ++/* ++** qadd_s64_tied2: ++** sqadd z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_s64_tied2, svint64_t, ++ z0 = svqadd_s64 (z1, z0), ++ z0 = svqadd (z1, z0)) ++ ++/* ++** qadd_s64_untied: ++** sqadd z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_s64_untied, svint64_t, ++ z0 = svqadd_s64 (z1, z2), ++ z0 = svqadd (z1, z2)) ++ ++/* ++** qadd_x0_s64_tied1: ++** mov (z[0-9]+\.d), x0 ++** sqadd z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (qadd_x0_s64_tied1, svint64_t, int64_t, ++ z0 = svqadd_n_s64 (z0, x0), ++ z0 = svqadd (z0, x0)) ++ ++/* ++** qadd_x0_s64_untied: ++** mov (z[0-9]+\.d), x0 ++** sqadd z0\.d, (z1\.d, \1|\1, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (qadd_x0_s64_untied, svint64_t, int64_t, ++ z0 = svqadd_n_s64 (z1, x0), ++ z0 = svqadd (z1, x0)) ++ ++/* ++** qadd_1_s64_tied1: ++** sqadd z0\.d, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_1_s64_tied1, svint64_t, ++ z0 = svqadd_n_s64 (z0, 1), ++ z0 = svqadd (z0, 1)) ++ ++/* ++** qadd_1_s64_untied: ++** movprfx z0, z1 ++** sqadd z0\.d, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_1_s64_untied, svint64_t, ++ z0 = svqadd_n_s64 (z1, 1), ++ z0 = svqadd (z1, 1)) ++ ++/* ++** qadd_127_s64: ++** sqadd z0\.d, z0\.d, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_127_s64, svint64_t, ++ z0 = svqadd_n_s64 (z0, 127), ++ z0 = svqadd (z0, 127)) ++ ++/* ++** qadd_128_s64: ++** sqadd z0\.d, z0\.d, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_128_s64, svint64_t, ++ z0 = svqadd_n_s64 (z0, 128), ++ z0 = svqadd (z0, 128)) ++ ++/* ++** qadd_255_s64: ++** sqadd z0\.d, z0\.d, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_255_s64, svint64_t, ++ z0 = svqadd_n_s64 (z0, 255), ++ z0 = svqadd (z0, 255)) ++ ++/* ++** qadd_m1_s64: ++** sqsub z0\.d, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_m1_s64, svint64_t, ++ z0 = svqadd_n_s64 (z0, -1), ++ z0 = svqadd (z0, -1)) ++ ++/* ++** qadd_m127_s64: ++** sqsub z0\.d, z0\.d, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_m127_s64, svint64_t, ++ z0 = svqadd_n_s64 (z0, -127), ++ z0 = svqadd (z0, -127)) ++ ++/* ++** qadd_m128_s64: ++** sqsub z0\.d, z0\.d, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_m128_s64, svint64_t, ++ z0 = svqadd_n_s64 (z0, -128), ++ z0 = svqadd (z0, -128)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qadd_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qadd_s8.c +new file mode 100644 +index 000000000..c8b88fa82 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qadd_s8.c +@@ -0,0 +1,123 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qadd_s8_tied1: ++** sqadd z0\.b, (z0\.b, z1\.b|z1\.b, z0\.b) ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_s8_tied1, svint8_t, ++ z0 = svqadd_s8 (z0, z1), ++ z0 = svqadd (z0, z1)) ++ ++/* ++** qadd_s8_tied2: ++** sqadd z0\.b, (z0\.b, z1\.b|z1\.b, z0\.b) ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_s8_tied2, svint8_t, ++ z0 = svqadd_s8 (z1, z0), ++ z0 = svqadd (z1, z0)) ++ ++/* ++** qadd_s8_untied: ++** sqadd z0\.b, (z1\.b, z2\.b|z2\.b, z1\.b) ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_s8_untied, svint8_t, ++ z0 = svqadd_s8 (z1, z2), ++ z0 = svqadd (z1, z2)) ++ ++/* ++** qadd_w0_s8_tied1: ++** mov (z[0-9]+\.b), w0 ++** sqadd z0\.b, (z0\.b, \1|\1, z0\.b) ++** ret ++*/ ++TEST_UNIFORM_ZX (qadd_w0_s8_tied1, svint8_t, int8_t, ++ z0 = svqadd_n_s8 (z0, x0), ++ z0 = svqadd (z0, x0)) ++ ++/* ++** qadd_w0_s8_untied: ++** mov (z[0-9]+\.b), w0 ++** sqadd z0\.b, (z1\.b, \1|\1, z1\.b) ++** ret ++*/ ++TEST_UNIFORM_ZX (qadd_w0_s8_untied, svint8_t, int8_t, ++ z0 = svqadd_n_s8 (z1, x0), ++ z0 = svqadd (z1, x0)) ++ ++/* ++** qadd_1_s8_tied1: ++** sqadd z0\.b, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_1_s8_tied1, svint8_t, ++ z0 = svqadd_n_s8 (z0, 1), ++ z0 = svqadd (z0, 1)) ++ ++/* ++** qadd_1_s8_untied: ++** movprfx z0, z1 ++** sqadd z0\.b, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_1_s8_untied, svint8_t, ++ z0 = svqadd_n_s8 (z1, 1), ++ z0 = svqadd (z1, 1)) ++ ++/* ++** qadd_127_s8: ++** sqadd z0\.b, z0\.b, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_127_s8, svint8_t, ++ z0 = svqadd_n_s8 (z0, 127), ++ z0 = svqadd (z0, 127)) ++ ++/* ++** qadd_128_s8: ++** sqsub z0\.b, z0\.b, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_128_s8, svint8_t, ++ z0 = svqadd_n_s8 (z0, 128), ++ z0 = svqadd (z0, 128)) ++ ++/* ++** qadd_255_s8: ++** sqsub z0\.b, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_255_s8, svint8_t, ++ z0 = svqadd_n_s8 (z0, 255), ++ z0 = svqadd (z0, 255)) ++ ++/* ++** qadd_m1_s8: ++** sqsub z0\.b, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_m1_s8, svint8_t, ++ z0 = svqadd_n_s8 (z0, -1), ++ z0 = svqadd (z0, -1)) ++ ++/* ++** qadd_m127_s8: ++** sqsub z0\.b, z0\.b, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_m127_s8, svint8_t, ++ z0 = svqadd_n_s8 (z0, -127), ++ z0 = svqadd (z0, -127)) ++ ++/* ++** qadd_m128_s8: ++** sqsub z0\.b, z0\.b, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_m128_s8, svint8_t, ++ z0 = svqadd_n_s8 (z0, -128), ++ z0 = svqadd (z0, -128)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qadd_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qadd_u16.c +new file mode 100644 +index 000000000..dd7bc5b6a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qadd_u16.c +@@ -0,0 +1,126 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qadd_u16_tied1: ++** uqadd z0\.h, (z0\.h, z1\.h|z1\.h, z0\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_u16_tied1, svuint16_t, ++ z0 = svqadd_u16 (z0, z1), ++ z0 = svqadd (z0, z1)) ++ ++/* ++** qadd_u16_tied2: ++** uqadd z0\.h, (z0\.h, z1\.h|z1\.h, z0\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_u16_tied2, svuint16_t, ++ z0 = svqadd_u16 (z1, z0), ++ z0 = svqadd (z1, z0)) ++ ++/* ++** qadd_u16_untied: ++** uqadd z0\.h, (z1\.h, z2\.h|z2\.h, z1\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_u16_untied, svuint16_t, ++ z0 = svqadd_u16 (z1, z2), ++ z0 = svqadd (z1, z2)) ++ ++/* ++** qadd_w0_u16_tied1: ++** mov (z[0-9]+\.h), w0 ++** uqadd z0\.h, (z0\.h, \1|\1, z0\.h) ++** ret ++*/ ++TEST_UNIFORM_ZX (qadd_w0_u16_tied1, svuint16_t, uint16_t, ++ z0 = svqadd_n_u16 (z0, x0), ++ z0 = svqadd (z0, x0)) ++ ++/* ++** qadd_w0_u16_untied: ++** mov (z[0-9]+\.h), w0 ++** uqadd z0\.h, (z1\.h, \1|\1, z1\.h) ++** ret ++*/ ++TEST_UNIFORM_ZX (qadd_w0_u16_untied, svuint16_t, uint16_t, ++ z0 = svqadd_n_u16 (z1, x0), ++ z0 = svqadd (z1, x0)) ++ ++/* ++** qadd_1_u16_tied1: ++** uqadd z0\.h, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_1_u16_tied1, svuint16_t, ++ z0 = svqadd_n_u16 (z0, 1), ++ z0 = svqadd (z0, 1)) ++ ++/* ++** qadd_1_u16_untied: ++** movprfx z0, z1 ++** uqadd z0\.h, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_1_u16_untied, svuint16_t, ++ z0 = svqadd_n_u16 (z1, 1), ++ z0 = svqadd (z1, 1)) ++ ++/* ++** qadd_127_u16: ++** uqadd z0\.h, z0\.h, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_127_u16, svuint16_t, ++ z0 = svqadd_n_u16 (z0, 127), ++ z0 = svqadd (z0, 127)) ++ ++/* ++** qadd_128_u16: ++** uqadd z0\.h, z0\.h, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_128_u16, svuint16_t, ++ z0 = svqadd_n_u16 (z0, 128), ++ z0 = svqadd (z0, 128)) ++ ++/* ++** qadd_255_u16: ++** uqadd z0\.h, z0\.h, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_255_u16, svuint16_t, ++ z0 = svqadd_n_u16 (z0, 255), ++ z0 = svqadd (z0, 255)) ++ ++/* ++** qadd_m1_u16: ++** mov (z[0-9]+)\.b, #-1 ++** uqadd z0\.h, (z0\.h, \1\.h|\1\.h, z0\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_m1_u16, svuint16_t, ++ z0 = svqadd_n_u16 (z0, -1), ++ z0 = svqadd (z0, -1)) ++ ++/* ++** qadd_m127_u16: ++** mov (z[0-9]+\.h), #-127 ++** uqadd z0\.h, (z0\.h, \1|\1, z0\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_m127_u16, svuint16_t, ++ z0 = svqadd_n_u16 (z0, -127), ++ z0 = svqadd (z0, -127)) ++ ++/* ++** qadd_m128_u16: ++** mov (z[0-9]+\.h), #-128 ++** uqadd z0\.h, (z0\.h, \1|\1, z0\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_m128_u16, svuint16_t, ++ z0 = svqadd_n_u16 (z0, -128), ++ z0 = svqadd (z0, -128)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qadd_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qadd_u32.c +new file mode 100644 +index 000000000..0f846e44e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qadd_u32.c +@@ -0,0 +1,126 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qadd_u32_tied1: ++** uqadd z0\.s, (z0\.s, z1\.s|z1\.s, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_u32_tied1, svuint32_t, ++ z0 = svqadd_u32 (z0, z1), ++ z0 = svqadd (z0, z1)) ++ ++/* ++** qadd_u32_tied2: ++** uqadd z0\.s, (z0\.s, z1\.s|z1\.s, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_u32_tied2, svuint32_t, ++ z0 = svqadd_u32 (z1, z0), ++ z0 = svqadd (z1, z0)) ++ ++/* ++** qadd_u32_untied: ++** uqadd z0\.s, (z1\.s, z2\.s|z2\.s, z1\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_u32_untied, svuint32_t, ++ z0 = svqadd_u32 (z1, z2), ++ z0 = svqadd (z1, z2)) ++ ++/* ++** qadd_w0_u32_tied1: ++** mov (z[0-9]+\.s), w0 ++** uqadd z0\.s, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_ZX (qadd_w0_u32_tied1, svuint32_t, uint32_t, ++ z0 = svqadd_n_u32 (z0, x0), ++ z0 = svqadd (z0, x0)) ++ ++/* ++** qadd_w0_u32_untied: ++** mov (z[0-9]+\.s), w0 ++** uqadd z0\.s, (z1\.s, \1|\1, z1\.s) ++** ret ++*/ ++TEST_UNIFORM_ZX (qadd_w0_u32_untied, svuint32_t, uint32_t, ++ z0 = svqadd_n_u32 (z1, x0), ++ z0 = svqadd (z1, x0)) ++ ++/* ++** qadd_1_u32_tied1: ++** uqadd z0\.s, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_1_u32_tied1, svuint32_t, ++ z0 = svqadd_n_u32 (z0, 1), ++ z0 = svqadd (z0, 1)) ++ ++/* ++** qadd_1_u32_untied: ++** movprfx z0, z1 ++** uqadd z0\.s, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_1_u32_untied, svuint32_t, ++ z0 = svqadd_n_u32 (z1, 1), ++ z0 = svqadd (z1, 1)) ++ ++/* ++** qadd_127_u32: ++** uqadd z0\.s, z0\.s, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_127_u32, svuint32_t, ++ z0 = svqadd_n_u32 (z0, 127), ++ z0 = svqadd (z0, 127)) ++ ++/* ++** qadd_128_u32: ++** uqadd z0\.s, z0\.s, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_128_u32, svuint32_t, ++ z0 = svqadd_n_u32 (z0, 128), ++ z0 = svqadd (z0, 128)) ++ ++/* ++** qadd_255_u32: ++** uqadd z0\.s, z0\.s, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_255_u32, svuint32_t, ++ z0 = svqadd_n_u32 (z0, 255), ++ z0 = svqadd (z0, 255)) ++ ++/* ++** qadd_m1_u32: ++** mov (z[0-9]+)\.b, #-1 ++** uqadd z0\.s, (z0\.s, \1\.s|\1\.s, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_m1_u32, svuint32_t, ++ z0 = svqadd_n_u32 (z0, -1), ++ z0 = svqadd (z0, -1)) ++ ++/* ++** qadd_m127_u32: ++** mov (z[0-9]+\.s), #-127 ++** uqadd z0\.s, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_m127_u32, svuint32_t, ++ z0 = svqadd_n_u32 (z0, -127), ++ z0 = svqadd (z0, -127)) ++ ++/* ++** qadd_m128_u32: ++** mov (z[0-9]+\.s), #-128 ++** uqadd z0\.s, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_m128_u32, svuint32_t, ++ z0 = svqadd_n_u32 (z0, -128), ++ z0 = svqadd (z0, -128)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qadd_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qadd_u64.c +new file mode 100644 +index 000000000..454fb1d63 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qadd_u64.c +@@ -0,0 +1,126 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qadd_u64_tied1: ++** uqadd z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_u64_tied1, svuint64_t, ++ z0 = svqadd_u64 (z0, z1), ++ z0 = svqadd (z0, z1)) ++ ++/* ++** qadd_u64_tied2: ++** uqadd z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_u64_tied2, svuint64_t, ++ z0 = svqadd_u64 (z1, z0), ++ z0 = svqadd (z1, z0)) ++ ++/* ++** qadd_u64_untied: ++** uqadd z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_u64_untied, svuint64_t, ++ z0 = svqadd_u64 (z1, z2), ++ z0 = svqadd (z1, z2)) ++ ++/* ++** qadd_x0_u64_tied1: ++** mov (z[0-9]+\.d), x0 ++** uqadd z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (qadd_x0_u64_tied1, svuint64_t, uint64_t, ++ z0 = svqadd_n_u64 (z0, x0), ++ z0 = svqadd (z0, x0)) ++ ++/* ++** qadd_x0_u64_untied: ++** mov (z[0-9]+\.d), x0 ++** uqadd z0\.d, (z1\.d, \1|\1, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_ZX (qadd_x0_u64_untied, svuint64_t, uint64_t, ++ z0 = svqadd_n_u64 (z1, x0), ++ z0 = svqadd (z1, x0)) ++ ++/* ++** qadd_1_u64_tied1: ++** uqadd z0\.d, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_1_u64_tied1, svuint64_t, ++ z0 = svqadd_n_u64 (z0, 1), ++ z0 = svqadd (z0, 1)) ++ ++/* ++** qadd_1_u64_untied: ++** movprfx z0, z1 ++** uqadd z0\.d, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_1_u64_untied, svuint64_t, ++ z0 = svqadd_n_u64 (z1, 1), ++ z0 = svqadd (z1, 1)) ++ ++/* ++** qadd_127_u64: ++** uqadd z0\.d, z0\.d, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_127_u64, svuint64_t, ++ z0 = svqadd_n_u64 (z0, 127), ++ z0 = svqadd (z0, 127)) ++ ++/* ++** qadd_128_u64: ++** uqadd z0\.d, z0\.d, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_128_u64, svuint64_t, ++ z0 = svqadd_n_u64 (z0, 128), ++ z0 = svqadd (z0, 128)) ++ ++/* ++** qadd_255_u64: ++** uqadd z0\.d, z0\.d, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_255_u64, svuint64_t, ++ z0 = svqadd_n_u64 (z0, 255), ++ z0 = svqadd (z0, 255)) ++ ++/* ++** qadd_m1_u64: ++** mov (z[0-9]+)\.b, #-1 ++** uqadd z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_m1_u64, svuint64_t, ++ z0 = svqadd_n_u64 (z0, -1), ++ z0 = svqadd (z0, -1)) ++ ++/* ++** qadd_m127_u64: ++** mov (z[0-9]+\.d), #-127 ++** uqadd z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_m127_u64, svuint64_t, ++ z0 = svqadd_n_u64 (z0, -127), ++ z0 = svqadd (z0, -127)) ++ ++/* ++** qadd_m128_u64: ++** mov (z[0-9]+\.d), #-128 ++** uqadd z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_m128_u64, svuint64_t, ++ z0 = svqadd_n_u64 (z0, -128), ++ z0 = svqadd (z0, -128)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qadd_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qadd_u8.c +new file mode 100644 +index 000000000..e86b8988c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qadd_u8.c +@@ -0,0 +1,123 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qadd_u8_tied1: ++** uqadd z0\.b, (z0\.b, z1\.b|z1\.b, z0\.b) ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_u8_tied1, svuint8_t, ++ z0 = svqadd_u8 (z0, z1), ++ z0 = svqadd (z0, z1)) ++ ++/* ++** qadd_u8_tied2: ++** uqadd z0\.b, (z0\.b, z1\.b|z1\.b, z0\.b) ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_u8_tied2, svuint8_t, ++ z0 = svqadd_u8 (z1, z0), ++ z0 = svqadd (z1, z0)) ++ ++/* ++** qadd_u8_untied: ++** uqadd z0\.b, (z1\.b, z2\.b|z2\.b, z1\.b) ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_u8_untied, svuint8_t, ++ z0 = svqadd_u8 (z1, z2), ++ z0 = svqadd (z1, z2)) ++ ++/* ++** qadd_w0_u8_tied1: ++** mov (z[0-9]+\.b), w0 ++** uqadd z0\.b, (z0\.b, \1|\1, z0\.b) ++** ret ++*/ ++TEST_UNIFORM_ZX (qadd_w0_u8_tied1, svuint8_t, uint8_t, ++ z0 = svqadd_n_u8 (z0, x0), ++ z0 = svqadd (z0, x0)) ++ ++/* ++** qadd_w0_u8_untied: ++** mov (z[0-9]+\.b), w0 ++** uqadd z0\.b, (z1\.b, \1|\1, z1\.b) ++** ret ++*/ ++TEST_UNIFORM_ZX (qadd_w0_u8_untied, svuint8_t, uint8_t, ++ z0 = svqadd_n_u8 (z1, x0), ++ z0 = svqadd (z1, x0)) ++ ++/* ++** qadd_1_u8_tied1: ++** uqadd z0\.b, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_1_u8_tied1, svuint8_t, ++ z0 = svqadd_n_u8 (z0, 1), ++ z0 = svqadd (z0, 1)) ++ ++/* ++** qadd_1_u8_untied: ++** movprfx z0, z1 ++** uqadd z0\.b, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_1_u8_untied, svuint8_t, ++ z0 = svqadd_n_u8 (z1, 1), ++ z0 = svqadd (z1, 1)) ++ ++/* ++** qadd_127_u8: ++** uqadd z0\.b, z0\.b, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_127_u8, svuint8_t, ++ z0 = svqadd_n_u8 (z0, 127), ++ z0 = svqadd (z0, 127)) ++ ++/* ++** qadd_128_u8: ++** uqadd z0\.b, z0\.b, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_128_u8, svuint8_t, ++ z0 = svqadd_n_u8 (z0, 128), ++ z0 = svqadd (z0, 128)) ++ ++/* ++** qadd_255_u8: ++** uqadd z0\.b, z0\.b, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_255_u8, svuint8_t, ++ z0 = svqadd_n_u8 (z0, 255), ++ z0 = svqadd (z0, 255)) ++ ++/* ++** qadd_m1_u8: ++** uqadd z0\.b, z0\.b, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_m1_u8, svuint8_t, ++ z0 = svqadd_n_u8 (z0, -1), ++ z0 = svqadd (z0, -1)) ++ ++/* ++** qadd_m127_u8: ++** uqadd z0\.b, z0\.b, #129 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_m127_u8, svuint8_t, ++ z0 = svqadd_n_u8 (z0, -127), ++ z0 = svqadd (z0, -127)) ++ ++/* ++** qadd_m128_u8: ++** uqadd z0\.b, z0\.b, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (qadd_m128_u8, svuint8_t, ++ z0 = svqadd_n_u8 (z0, -128), ++ z0 = svqadd (z0, -128)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecb_pat_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecb_pat_s32.c +new file mode 100644 +index 000000000..22b3afef7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecb_pat_s32.c +@@ -0,0 +1,202 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qdecb_pat_n_1_s32_tied: ++** sqdecb x0, w0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_1_s32_tied, int32_t, ++ x0 = svqdecb_pat_n_s32 (x0, SV_POW2, 1), ++ x0 = svqdecb_pat (x0, SV_POW2, 1)) ++ ++/* ++** qdecb_pat_n_1_s32_untied: ++** mov w0, w1 ++** sqdecb x0, w0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_1_s32_untied, int32_t, ++ x0 = svqdecb_pat_n_s32 (x1, SV_POW2, 1), ++ x0 = svqdecb_pat (x1, SV_POW2, 1)) ++ ++/* ++** qdecb_pat_n_2_s32: ++** sqdecb x0, w0, pow2, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_2_s32, int32_t, ++ x0 = svqdecb_pat_n_s32 (x0, SV_POW2, 2), ++ x0 = svqdecb_pat (x0, SV_POW2, 2)) ++ ++/* ++** qdecb_pat_n_7_s32: ++** sqdecb x0, w0, pow2, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_7_s32, int32_t, ++ x0 = svqdecb_pat_n_s32 (x0, SV_POW2, 7), ++ x0 = svqdecb_pat (x0, SV_POW2, 7)) ++ ++/* ++** qdecb_pat_n_15_s32: ++** sqdecb x0, w0, pow2, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_15_s32, int32_t, ++ x0 = svqdecb_pat_n_s32 (x0, SV_POW2, 15), ++ x0 = svqdecb_pat (x0, SV_POW2, 15)) ++ ++/* ++** qdecb_pat_n_16_s32: ++** sqdecb x0, w0, pow2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_16_s32, int32_t, ++ x0 = svqdecb_pat_n_s32 (x0, SV_POW2, 16), ++ x0 = svqdecb_pat (x0, SV_POW2, 16)) ++ ++/* ++** qdecb_pat_n_vl1_s32: ++** sqdecb x0, w0, vl1, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_vl1_s32, int32_t, ++ x0 = svqdecb_pat_n_s32 (x0, SV_VL1, 16), ++ x0 = svqdecb_pat (x0, SV_VL1, 16)) ++ ++/* ++** qdecb_pat_n_vl2_s32: ++** sqdecb x0, w0, vl2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_vl2_s32, int32_t, ++ x0 = svqdecb_pat_n_s32 (x0, SV_VL2, 16), ++ x0 = svqdecb_pat (x0, SV_VL2, 16)) ++ ++/* ++** qdecb_pat_n_vl3_s32: ++** sqdecb x0, w0, vl3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_vl3_s32, int32_t, ++ x0 = svqdecb_pat_n_s32 (x0, SV_VL3, 16), ++ x0 = svqdecb_pat (x0, SV_VL3, 16)) ++ ++/* ++** qdecb_pat_n_vl4_s32: ++** sqdecb x0, w0, vl4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_vl4_s32, int32_t, ++ x0 = svqdecb_pat_n_s32 (x0, SV_VL4, 16), ++ x0 = svqdecb_pat (x0, SV_VL4, 16)) ++ ++/* ++** qdecb_pat_n_vl5_s32: ++** sqdecb x0, w0, vl5, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_vl5_s32, int32_t, ++ x0 = svqdecb_pat_n_s32 (x0, SV_VL5, 16), ++ x0 = svqdecb_pat (x0, SV_VL5, 16)) ++ ++/* ++** qdecb_pat_n_vl6_s32: ++** sqdecb x0, w0, vl6, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_vl6_s32, int32_t, ++ x0 = svqdecb_pat_n_s32 (x0, SV_VL6, 16), ++ x0 = svqdecb_pat (x0, SV_VL6, 16)) ++ ++/* ++** qdecb_pat_n_vl7_s32: ++** sqdecb x0, w0, vl7, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_vl7_s32, int32_t, ++ x0 = svqdecb_pat_n_s32 (x0, SV_VL7, 16), ++ x0 = svqdecb_pat (x0, SV_VL7, 16)) ++ ++/* ++** qdecb_pat_n_vl8_s32: ++** sqdecb x0, w0, vl8, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_vl8_s32, int32_t, ++ x0 = svqdecb_pat_n_s32 (x0, SV_VL8, 16), ++ x0 = svqdecb_pat (x0, SV_VL8, 16)) ++ ++/* ++** qdecb_pat_n_vl16_s32: ++** sqdecb x0, w0, vl16, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_vl16_s32, int32_t, ++ x0 = svqdecb_pat_n_s32 (x0, SV_VL16, 16), ++ x0 = svqdecb_pat (x0, SV_VL16, 16)) ++ ++/* ++** qdecb_pat_n_vl32_s32: ++** sqdecb x0, w0, vl32, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_vl32_s32, int32_t, ++ x0 = svqdecb_pat_n_s32 (x0, SV_VL32, 16), ++ x0 = svqdecb_pat (x0, SV_VL32, 16)) ++ ++/* ++** qdecb_pat_n_vl64_s32: ++** sqdecb x0, w0, vl64, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_vl64_s32, int32_t, ++ x0 = svqdecb_pat_n_s32 (x0, SV_VL64, 16), ++ x0 = svqdecb_pat (x0, SV_VL64, 16)) ++ ++/* ++** qdecb_pat_n_vl128_s32: ++** sqdecb x0, w0, vl128, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_vl128_s32, int32_t, ++ x0 = svqdecb_pat_n_s32 (x0, SV_VL128, 16), ++ x0 = svqdecb_pat (x0, SV_VL128, 16)) ++ ++/* ++** qdecb_pat_n_vl256_s32: ++** sqdecb x0, w0, vl256, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_vl256_s32, int32_t, ++ x0 = svqdecb_pat_n_s32 (x0, SV_VL256, 16), ++ x0 = svqdecb_pat (x0, SV_VL256, 16)) ++ ++/* ++** qdecb_pat_n_mul4_s32: ++** sqdecb x0, w0, mul4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_mul4_s32, int32_t, ++ x0 = svqdecb_pat_n_s32 (x0, SV_MUL4, 16), ++ x0 = svqdecb_pat (x0, SV_MUL4, 16)) ++ ++/* ++** qdecb_pat_n_mul3_s32: ++** sqdecb x0, w0, mul3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_mul3_s32, int32_t, ++ x0 = svqdecb_pat_n_s32 (x0, SV_MUL3, 16), ++ x0 = svqdecb_pat (x0, SV_MUL3, 16)) ++ ++/* ++** qdecb_pat_n_all_s32: ++** sqdecb x0, w0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_all_s32, int32_t, ++ x0 = svqdecb_pat_n_s32 (x0, SV_ALL, 16), ++ x0 = svqdecb_pat (x0, SV_ALL, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecb_pat_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecb_pat_s64.c +new file mode 100644 +index 000000000..1380e6c8e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecb_pat_s64.c +@@ -0,0 +1,202 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qdecb_pat_n_1_s64_tied: ++** sqdecb x0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_1_s64_tied, int64_t, ++ x0 = svqdecb_pat_n_s64 (x0, SV_POW2, 1), ++ x0 = svqdecb_pat (x0, SV_POW2, 1)) ++ ++/* ++** qdecb_pat_n_1_s64_untied: ++** mov x0, x1 ++** sqdecb x0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_1_s64_untied, int64_t, ++ x0 = svqdecb_pat_n_s64 (x1, SV_POW2, 1), ++ x0 = svqdecb_pat (x1, SV_POW2, 1)) ++ ++/* ++** qdecb_pat_n_2_s64: ++** sqdecb x0, pow2, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_2_s64, int64_t, ++ x0 = svqdecb_pat_n_s64 (x0, SV_POW2, 2), ++ x0 = svqdecb_pat (x0, SV_POW2, 2)) ++ ++/* ++** qdecb_pat_n_7_s64: ++** sqdecb x0, pow2, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_7_s64, int64_t, ++ x0 = svqdecb_pat_n_s64 (x0, SV_POW2, 7), ++ x0 = svqdecb_pat (x0, SV_POW2, 7)) ++ ++/* ++** qdecb_pat_n_15_s64: ++** sqdecb x0, pow2, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_15_s64, int64_t, ++ x0 = svqdecb_pat_n_s64 (x0, SV_POW2, 15), ++ x0 = svqdecb_pat (x0, SV_POW2, 15)) ++ ++/* ++** qdecb_pat_n_16_s64: ++** sqdecb x0, pow2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_16_s64, int64_t, ++ x0 = svqdecb_pat_n_s64 (x0, SV_POW2, 16), ++ x0 = svqdecb_pat (x0, SV_POW2, 16)) ++ ++/* ++** qdecb_pat_n_vl1_s64: ++** sqdecb x0, vl1, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_vl1_s64, int64_t, ++ x0 = svqdecb_pat_n_s64 (x0, SV_VL1, 16), ++ x0 = svqdecb_pat (x0, SV_VL1, 16)) ++ ++/* ++** qdecb_pat_n_vl2_s64: ++** sqdecb x0, vl2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_vl2_s64, int64_t, ++ x0 = svqdecb_pat_n_s64 (x0, SV_VL2, 16), ++ x0 = svqdecb_pat (x0, SV_VL2, 16)) ++ ++/* ++** qdecb_pat_n_vl3_s64: ++** sqdecb x0, vl3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_vl3_s64, int64_t, ++ x0 = svqdecb_pat_n_s64 (x0, SV_VL3, 16), ++ x0 = svqdecb_pat (x0, SV_VL3, 16)) ++ ++/* ++** qdecb_pat_n_vl4_s64: ++** sqdecb x0, vl4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_vl4_s64, int64_t, ++ x0 = svqdecb_pat_n_s64 (x0, SV_VL4, 16), ++ x0 = svqdecb_pat (x0, SV_VL4, 16)) ++ ++/* ++** qdecb_pat_n_vl5_s64: ++** sqdecb x0, vl5, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_vl5_s64, int64_t, ++ x0 = svqdecb_pat_n_s64 (x0, SV_VL5, 16), ++ x0 = svqdecb_pat (x0, SV_VL5, 16)) ++ ++/* ++** qdecb_pat_n_vl6_s64: ++** sqdecb x0, vl6, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_vl6_s64, int64_t, ++ x0 = svqdecb_pat_n_s64 (x0, SV_VL6, 16), ++ x0 = svqdecb_pat (x0, SV_VL6, 16)) ++ ++/* ++** qdecb_pat_n_vl7_s64: ++** sqdecb x0, vl7, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_vl7_s64, int64_t, ++ x0 = svqdecb_pat_n_s64 (x0, SV_VL7, 16), ++ x0 = svqdecb_pat (x0, SV_VL7, 16)) ++ ++/* ++** qdecb_pat_n_vl8_s64: ++** sqdecb x0, vl8, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_vl8_s64, int64_t, ++ x0 = svqdecb_pat_n_s64 (x0, SV_VL8, 16), ++ x0 = svqdecb_pat (x0, SV_VL8, 16)) ++ ++/* ++** qdecb_pat_n_vl16_s64: ++** sqdecb x0, vl16, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_vl16_s64, int64_t, ++ x0 = svqdecb_pat_n_s64 (x0, SV_VL16, 16), ++ x0 = svqdecb_pat (x0, SV_VL16, 16)) ++ ++/* ++** qdecb_pat_n_vl32_s64: ++** sqdecb x0, vl32, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_vl32_s64, int64_t, ++ x0 = svqdecb_pat_n_s64 (x0, SV_VL32, 16), ++ x0 = svqdecb_pat (x0, SV_VL32, 16)) ++ ++/* ++** qdecb_pat_n_vl64_s64: ++** sqdecb x0, vl64, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_vl64_s64, int64_t, ++ x0 = svqdecb_pat_n_s64 (x0, SV_VL64, 16), ++ x0 = svqdecb_pat (x0, SV_VL64, 16)) ++ ++/* ++** qdecb_pat_n_vl128_s64: ++** sqdecb x0, vl128, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_vl128_s64, int64_t, ++ x0 = svqdecb_pat_n_s64 (x0, SV_VL128, 16), ++ x0 = svqdecb_pat (x0, SV_VL128, 16)) ++ ++/* ++** qdecb_pat_n_vl256_s64: ++** sqdecb x0, vl256, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_vl256_s64, int64_t, ++ x0 = svqdecb_pat_n_s64 (x0, SV_VL256, 16), ++ x0 = svqdecb_pat (x0, SV_VL256, 16)) ++ ++/* ++** qdecb_pat_n_mul4_s64: ++** sqdecb x0, mul4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_mul4_s64, int64_t, ++ x0 = svqdecb_pat_n_s64 (x0, SV_MUL4, 16), ++ x0 = svqdecb_pat (x0, SV_MUL4, 16)) ++ ++/* ++** qdecb_pat_n_mul3_s64: ++** sqdecb x0, mul3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_mul3_s64, int64_t, ++ x0 = svqdecb_pat_n_s64 (x0, SV_MUL3, 16), ++ x0 = svqdecb_pat (x0, SV_MUL3, 16)) ++ ++/* ++** qdecb_pat_n_all_s64: ++** sqdecb x0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_all_s64, int64_t, ++ x0 = svqdecb_pat_n_s64 (x0, SV_ALL, 16), ++ x0 = svqdecb_pat (x0, SV_ALL, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecb_pat_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecb_pat_u32.c +new file mode 100644 +index 000000000..3db3da866 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecb_pat_u32.c +@@ -0,0 +1,202 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qdecb_pat_n_1_u32_tied: ++** uqdecb w0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_1_u32_tied, uint32_t, ++ x0 = svqdecb_pat_n_u32 (x0, SV_POW2, 1), ++ x0 = svqdecb_pat (x0, SV_POW2, 1)) ++ ++/* ++** qdecb_pat_n_1_u32_untied: ++** mov w0, w1 ++** uqdecb w0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_1_u32_untied, uint32_t, ++ x0 = svqdecb_pat_n_u32 (x1, SV_POW2, 1), ++ x0 = svqdecb_pat (x1, SV_POW2, 1)) ++ ++/* ++** qdecb_pat_n_2_u32: ++** uqdecb w0, pow2, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_2_u32, uint32_t, ++ x0 = svqdecb_pat_n_u32 (x0, SV_POW2, 2), ++ x0 = svqdecb_pat (x0, SV_POW2, 2)) ++ ++/* ++** qdecb_pat_n_7_u32: ++** uqdecb w0, pow2, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_7_u32, uint32_t, ++ x0 = svqdecb_pat_n_u32 (x0, SV_POW2, 7), ++ x0 = svqdecb_pat (x0, SV_POW2, 7)) ++ ++/* ++** qdecb_pat_n_15_u32: ++** uqdecb w0, pow2, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_15_u32, uint32_t, ++ x0 = svqdecb_pat_n_u32 (x0, SV_POW2, 15), ++ x0 = svqdecb_pat (x0, SV_POW2, 15)) ++ ++/* ++** qdecb_pat_n_16_u32: ++** uqdecb w0, pow2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_16_u32, uint32_t, ++ x0 = svqdecb_pat_n_u32 (x0, SV_POW2, 16), ++ x0 = svqdecb_pat (x0, SV_POW2, 16)) ++ ++/* ++** qdecb_pat_n_vl1_u32: ++** uqdecb w0, vl1, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_vl1_u32, uint32_t, ++ x0 = svqdecb_pat_n_u32 (x0, SV_VL1, 16), ++ x0 = svqdecb_pat (x0, SV_VL1, 16)) ++ ++/* ++** qdecb_pat_n_vl2_u32: ++** uqdecb w0, vl2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_vl2_u32, uint32_t, ++ x0 = svqdecb_pat_n_u32 (x0, SV_VL2, 16), ++ x0 = svqdecb_pat (x0, SV_VL2, 16)) ++ ++/* ++** qdecb_pat_n_vl3_u32: ++** uqdecb w0, vl3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_vl3_u32, uint32_t, ++ x0 = svqdecb_pat_n_u32 (x0, SV_VL3, 16), ++ x0 = svqdecb_pat (x0, SV_VL3, 16)) ++ ++/* ++** qdecb_pat_n_vl4_u32: ++** uqdecb w0, vl4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_vl4_u32, uint32_t, ++ x0 = svqdecb_pat_n_u32 (x0, SV_VL4, 16), ++ x0 = svqdecb_pat (x0, SV_VL4, 16)) ++ ++/* ++** qdecb_pat_n_vl5_u32: ++** uqdecb w0, vl5, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_vl5_u32, uint32_t, ++ x0 = svqdecb_pat_n_u32 (x0, SV_VL5, 16), ++ x0 = svqdecb_pat (x0, SV_VL5, 16)) ++ ++/* ++** qdecb_pat_n_vl6_u32: ++** uqdecb w0, vl6, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_vl6_u32, uint32_t, ++ x0 = svqdecb_pat_n_u32 (x0, SV_VL6, 16), ++ x0 = svqdecb_pat (x0, SV_VL6, 16)) ++ ++/* ++** qdecb_pat_n_vl7_u32: ++** uqdecb w0, vl7, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_vl7_u32, uint32_t, ++ x0 = svqdecb_pat_n_u32 (x0, SV_VL7, 16), ++ x0 = svqdecb_pat (x0, SV_VL7, 16)) ++ ++/* ++** qdecb_pat_n_vl8_u32: ++** uqdecb w0, vl8, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_vl8_u32, uint32_t, ++ x0 = svqdecb_pat_n_u32 (x0, SV_VL8, 16), ++ x0 = svqdecb_pat (x0, SV_VL8, 16)) ++ ++/* ++** qdecb_pat_n_vl16_u32: ++** uqdecb w0, vl16, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_vl16_u32, uint32_t, ++ x0 = svqdecb_pat_n_u32 (x0, SV_VL16, 16), ++ x0 = svqdecb_pat (x0, SV_VL16, 16)) ++ ++/* ++** qdecb_pat_n_vl32_u32: ++** uqdecb w0, vl32, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_vl32_u32, uint32_t, ++ x0 = svqdecb_pat_n_u32 (x0, SV_VL32, 16), ++ x0 = svqdecb_pat (x0, SV_VL32, 16)) ++ ++/* ++** qdecb_pat_n_vl64_u32: ++** uqdecb w0, vl64, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_vl64_u32, uint32_t, ++ x0 = svqdecb_pat_n_u32 (x0, SV_VL64, 16), ++ x0 = svqdecb_pat (x0, SV_VL64, 16)) ++ ++/* ++** qdecb_pat_n_vl128_u32: ++** uqdecb w0, vl128, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_vl128_u32, uint32_t, ++ x0 = svqdecb_pat_n_u32 (x0, SV_VL128, 16), ++ x0 = svqdecb_pat (x0, SV_VL128, 16)) ++ ++/* ++** qdecb_pat_n_vl256_u32: ++** uqdecb w0, vl256, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_vl256_u32, uint32_t, ++ x0 = svqdecb_pat_n_u32 (x0, SV_VL256, 16), ++ x0 = svqdecb_pat (x0, SV_VL256, 16)) ++ ++/* ++** qdecb_pat_n_mul4_u32: ++** uqdecb w0, mul4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_mul4_u32, uint32_t, ++ x0 = svqdecb_pat_n_u32 (x0, SV_MUL4, 16), ++ x0 = svqdecb_pat (x0, SV_MUL4, 16)) ++ ++/* ++** qdecb_pat_n_mul3_u32: ++** uqdecb w0, mul3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_mul3_u32, uint32_t, ++ x0 = svqdecb_pat_n_u32 (x0, SV_MUL3, 16), ++ x0 = svqdecb_pat (x0, SV_MUL3, 16)) ++ ++/* ++** qdecb_pat_n_all_u32: ++** uqdecb w0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_all_u32, uint32_t, ++ x0 = svqdecb_pat_n_u32 (x0, SV_ALL, 16), ++ x0 = svqdecb_pat (x0, SV_ALL, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecb_pat_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecb_pat_u64.c +new file mode 100644 +index 000000000..2f4c3c7aa +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecb_pat_u64.c +@@ -0,0 +1,202 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qdecb_pat_n_1_u64_tied: ++** uqdecb x0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_1_u64_tied, uint64_t, ++ x0 = svqdecb_pat_n_u64 (x0, SV_POW2, 1), ++ x0 = svqdecb_pat (x0, SV_POW2, 1)) ++ ++/* ++** qdecb_pat_n_1_u64_untied: ++** mov x0, x1 ++** uqdecb x0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_1_u64_untied, uint64_t, ++ x0 = svqdecb_pat_n_u64 (x1, SV_POW2, 1), ++ x0 = svqdecb_pat (x1, SV_POW2, 1)) ++ ++/* ++** qdecb_pat_n_2_u64: ++** uqdecb x0, pow2, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_2_u64, uint64_t, ++ x0 = svqdecb_pat_n_u64 (x0, SV_POW2, 2), ++ x0 = svqdecb_pat (x0, SV_POW2, 2)) ++ ++/* ++** qdecb_pat_n_7_u64: ++** uqdecb x0, pow2, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_7_u64, uint64_t, ++ x0 = svqdecb_pat_n_u64 (x0, SV_POW2, 7), ++ x0 = svqdecb_pat (x0, SV_POW2, 7)) ++ ++/* ++** qdecb_pat_n_15_u64: ++** uqdecb x0, pow2, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_15_u64, uint64_t, ++ x0 = svqdecb_pat_n_u64 (x0, SV_POW2, 15), ++ x0 = svqdecb_pat (x0, SV_POW2, 15)) ++ ++/* ++** qdecb_pat_n_16_u64: ++** uqdecb x0, pow2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_16_u64, uint64_t, ++ x0 = svqdecb_pat_n_u64 (x0, SV_POW2, 16), ++ x0 = svqdecb_pat (x0, SV_POW2, 16)) ++ ++/* ++** qdecb_pat_n_vl1_u64: ++** uqdecb x0, vl1, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_vl1_u64, uint64_t, ++ x0 = svqdecb_pat_n_u64 (x0, SV_VL1, 16), ++ x0 = svqdecb_pat (x0, SV_VL1, 16)) ++ ++/* ++** qdecb_pat_n_vl2_u64: ++** uqdecb x0, vl2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_vl2_u64, uint64_t, ++ x0 = svqdecb_pat_n_u64 (x0, SV_VL2, 16), ++ x0 = svqdecb_pat (x0, SV_VL2, 16)) ++ ++/* ++** qdecb_pat_n_vl3_u64: ++** uqdecb x0, vl3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_vl3_u64, uint64_t, ++ x0 = svqdecb_pat_n_u64 (x0, SV_VL3, 16), ++ x0 = svqdecb_pat (x0, SV_VL3, 16)) ++ ++/* ++** qdecb_pat_n_vl4_u64: ++** uqdecb x0, vl4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_vl4_u64, uint64_t, ++ x0 = svqdecb_pat_n_u64 (x0, SV_VL4, 16), ++ x0 = svqdecb_pat (x0, SV_VL4, 16)) ++ ++/* ++** qdecb_pat_n_vl5_u64: ++** uqdecb x0, vl5, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_vl5_u64, uint64_t, ++ x0 = svqdecb_pat_n_u64 (x0, SV_VL5, 16), ++ x0 = svqdecb_pat (x0, SV_VL5, 16)) ++ ++/* ++** qdecb_pat_n_vl6_u64: ++** uqdecb x0, vl6, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_vl6_u64, uint64_t, ++ x0 = svqdecb_pat_n_u64 (x0, SV_VL6, 16), ++ x0 = svqdecb_pat (x0, SV_VL6, 16)) ++ ++/* ++** qdecb_pat_n_vl7_u64: ++** uqdecb x0, vl7, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_vl7_u64, uint64_t, ++ x0 = svqdecb_pat_n_u64 (x0, SV_VL7, 16), ++ x0 = svqdecb_pat (x0, SV_VL7, 16)) ++ ++/* ++** qdecb_pat_n_vl8_u64: ++** uqdecb x0, vl8, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_vl8_u64, uint64_t, ++ x0 = svqdecb_pat_n_u64 (x0, SV_VL8, 16), ++ x0 = svqdecb_pat (x0, SV_VL8, 16)) ++ ++/* ++** qdecb_pat_n_vl16_u64: ++** uqdecb x0, vl16, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_vl16_u64, uint64_t, ++ x0 = svqdecb_pat_n_u64 (x0, SV_VL16, 16), ++ x0 = svqdecb_pat (x0, SV_VL16, 16)) ++ ++/* ++** qdecb_pat_n_vl32_u64: ++** uqdecb x0, vl32, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_vl32_u64, uint64_t, ++ x0 = svqdecb_pat_n_u64 (x0, SV_VL32, 16), ++ x0 = svqdecb_pat (x0, SV_VL32, 16)) ++ ++/* ++** qdecb_pat_n_vl64_u64: ++** uqdecb x0, vl64, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_vl64_u64, uint64_t, ++ x0 = svqdecb_pat_n_u64 (x0, SV_VL64, 16), ++ x0 = svqdecb_pat (x0, SV_VL64, 16)) ++ ++/* ++** qdecb_pat_n_vl128_u64: ++** uqdecb x0, vl128, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_vl128_u64, uint64_t, ++ x0 = svqdecb_pat_n_u64 (x0, SV_VL128, 16), ++ x0 = svqdecb_pat (x0, SV_VL128, 16)) ++ ++/* ++** qdecb_pat_n_vl256_u64: ++** uqdecb x0, vl256, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_vl256_u64, uint64_t, ++ x0 = svqdecb_pat_n_u64 (x0, SV_VL256, 16), ++ x0 = svqdecb_pat (x0, SV_VL256, 16)) ++ ++/* ++** qdecb_pat_n_mul4_u64: ++** uqdecb x0, mul4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_mul4_u64, uint64_t, ++ x0 = svqdecb_pat_n_u64 (x0, SV_MUL4, 16), ++ x0 = svqdecb_pat (x0, SV_MUL4, 16)) ++ ++/* ++** qdecb_pat_n_mul3_u64: ++** uqdecb x0, mul3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_mul3_u64, uint64_t, ++ x0 = svqdecb_pat_n_u64 (x0, SV_MUL3, 16), ++ x0 = svqdecb_pat (x0, SV_MUL3, 16)) ++ ++/* ++** qdecb_pat_n_all_u64: ++** uqdecb x0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_pat_n_all_u64, uint64_t, ++ x0 = svqdecb_pat_n_u64 (x0, SV_ALL, 16), ++ x0 = svqdecb_pat (x0, SV_ALL, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecb_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecb_s32.c +new file mode 100644 +index 000000000..11180654e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecb_s32.c +@@ -0,0 +1,58 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qdecb_n_1_s32_tied: ++** sqdecb x0, w0 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_n_1_s32_tied, int32_t, ++ x0 = svqdecb_n_s32 (x0, 1), ++ x0 = svqdecb (x0, 1)) ++ ++/* ++** qdecb_n_1_s32_untied: ++** mov w0, w1 ++** sqdecb x0, w0 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_n_1_s32_untied, int32_t, ++ x0 = svqdecb_n_s32 (x1, 1), ++ x0 = svqdecb (x1, 1)) ++ ++/* ++** qdecb_n_2_s32: ++** sqdecb x0, w0, all, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_n_2_s32, int32_t, ++ x0 = svqdecb_n_s32 (x0, 2), ++ x0 = svqdecb (x0, 2)) ++ ++/* ++** qdecb_n_7_s32: ++** sqdecb x0, w0, all, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_n_7_s32, int32_t, ++ x0 = svqdecb_n_s32 (x0, 7), ++ x0 = svqdecb (x0, 7)) ++ ++/* ++** qdecb_n_15_s32: ++** sqdecb x0, w0, all, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_n_15_s32, int32_t, ++ x0 = svqdecb_n_s32 (x0, 15), ++ x0 = svqdecb (x0, 15)) ++ ++/* ++** qdecb_n_16_s32: ++** sqdecb x0, w0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_n_16_s32, int32_t, ++ x0 = svqdecb_n_s32 (x0, 16), ++ x0 = svqdecb (x0, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecb_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecb_s64.c +new file mode 100644 +index 000000000..17b765655 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecb_s64.c +@@ -0,0 +1,58 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qdecb_n_1_s64_tied: ++** sqdecb x0 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_n_1_s64_tied, int64_t, ++ x0 = svqdecb_n_s64 (x0, 1), ++ x0 = svqdecb (x0, 1)) ++ ++/* ++** qdecb_n_1_s64_untied: ++** mov x0, x1 ++** sqdecb x0 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_n_1_s64_untied, int64_t, ++ x0 = svqdecb_n_s64 (x1, 1), ++ x0 = svqdecb (x1, 1)) ++ ++/* ++** qdecb_n_2_s64: ++** sqdecb x0, all, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_n_2_s64, int64_t, ++ x0 = svqdecb_n_s64 (x0, 2), ++ x0 = svqdecb (x0, 2)) ++ ++/* ++** qdecb_n_7_s64: ++** sqdecb x0, all, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_n_7_s64, int64_t, ++ x0 = svqdecb_n_s64 (x0, 7), ++ x0 = svqdecb (x0, 7)) ++ ++/* ++** qdecb_n_15_s64: ++** sqdecb x0, all, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_n_15_s64, int64_t, ++ x0 = svqdecb_n_s64 (x0, 15), ++ x0 = svqdecb (x0, 15)) ++ ++/* ++** qdecb_n_16_s64: ++** sqdecb x0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_n_16_s64, int64_t, ++ x0 = svqdecb_n_s64 (x0, 16), ++ x0 = svqdecb (x0, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecb_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecb_u32.c +new file mode 100644 +index 000000000..b31e04de5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecb_u32.c +@@ -0,0 +1,58 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qdecb_n_1_u32_tied: ++** uqdecb w0 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_n_1_u32_tied, uint32_t, ++ x0 = svqdecb_n_u32 (x0, 1), ++ x0 = svqdecb (x0, 1)) ++ ++/* ++** qdecb_n_1_u32_untied: ++** mov w0, w1 ++** uqdecb w0 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_n_1_u32_untied, uint32_t, ++ x0 = svqdecb_n_u32 (x1, 1), ++ x0 = svqdecb (x1, 1)) ++ ++/* ++** qdecb_n_2_u32: ++** uqdecb w0, all, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_n_2_u32, uint32_t, ++ x0 = svqdecb_n_u32 (x0, 2), ++ x0 = svqdecb (x0, 2)) ++ ++/* ++** qdecb_n_7_u32: ++** uqdecb w0, all, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_n_7_u32, uint32_t, ++ x0 = svqdecb_n_u32 (x0, 7), ++ x0 = svqdecb (x0, 7)) ++ ++/* ++** qdecb_n_15_u32: ++** uqdecb w0, all, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_n_15_u32, uint32_t, ++ x0 = svqdecb_n_u32 (x0, 15), ++ x0 = svqdecb (x0, 15)) ++ ++/* ++** qdecb_n_16_u32: ++** uqdecb w0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_n_16_u32, uint32_t, ++ x0 = svqdecb_n_u32 (x0, 16), ++ x0 = svqdecb (x0, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecb_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecb_u64.c +new file mode 100644 +index 000000000..aab6faba9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecb_u64.c +@@ -0,0 +1,58 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qdecb_n_1_u64_tied: ++** uqdecb x0 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_n_1_u64_tied, uint64_t, ++ x0 = svqdecb_n_u64 (x0, 1), ++ x0 = svqdecb (x0, 1)) ++ ++/* ++** qdecb_n_1_u64_untied: ++** mov x0, x1 ++** uqdecb x0 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_n_1_u64_untied, uint64_t, ++ x0 = svqdecb_n_u64 (x1, 1), ++ x0 = svqdecb (x1, 1)) ++ ++/* ++** qdecb_n_2_u64: ++** uqdecb x0, all, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_n_2_u64, uint64_t, ++ x0 = svqdecb_n_u64 (x0, 2), ++ x0 = svqdecb (x0, 2)) ++ ++/* ++** qdecb_n_7_u64: ++** uqdecb x0, all, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_n_7_u64, uint64_t, ++ x0 = svqdecb_n_u64 (x0, 7), ++ x0 = svqdecb (x0, 7)) ++ ++/* ++** qdecb_n_15_u64: ++** uqdecb x0, all, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_n_15_u64, uint64_t, ++ x0 = svqdecb_n_u64 (x0, 15), ++ x0 = svqdecb (x0, 15)) ++ ++/* ++** qdecb_n_16_u64: ++** uqdecb x0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecb_n_16_u64, uint64_t, ++ x0 = svqdecb_n_u64 (x0, 16), ++ x0 = svqdecb (x0, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecd_pat_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecd_pat_s32.c +new file mode 100644 +index 000000000..bc491d397 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecd_pat_s32.c +@@ -0,0 +1,202 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qdecd_pat_n_1_s32_tied: ++** sqdecd x0, w0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_1_s32_tied, int32_t, ++ x0 = svqdecd_pat_n_s32 (x0, SV_POW2, 1), ++ x0 = svqdecd_pat (x0, SV_POW2, 1)) ++ ++/* ++** qdecd_pat_n_1_s32_untied: ++** mov w0, w1 ++** sqdecd x0, w0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_1_s32_untied, int32_t, ++ x0 = svqdecd_pat_n_s32 (x1, SV_POW2, 1), ++ x0 = svqdecd_pat (x1, SV_POW2, 1)) ++ ++/* ++** qdecd_pat_n_2_s32: ++** sqdecd x0, w0, pow2, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_2_s32, int32_t, ++ x0 = svqdecd_pat_n_s32 (x0, SV_POW2, 2), ++ x0 = svqdecd_pat (x0, SV_POW2, 2)) ++ ++/* ++** qdecd_pat_n_7_s32: ++** sqdecd x0, w0, pow2, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_7_s32, int32_t, ++ x0 = svqdecd_pat_n_s32 (x0, SV_POW2, 7), ++ x0 = svqdecd_pat (x0, SV_POW2, 7)) ++ ++/* ++** qdecd_pat_n_15_s32: ++** sqdecd x0, w0, pow2, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_15_s32, int32_t, ++ x0 = svqdecd_pat_n_s32 (x0, SV_POW2, 15), ++ x0 = svqdecd_pat (x0, SV_POW2, 15)) ++ ++/* ++** qdecd_pat_n_16_s32: ++** sqdecd x0, w0, pow2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_16_s32, int32_t, ++ x0 = svqdecd_pat_n_s32 (x0, SV_POW2, 16), ++ x0 = svqdecd_pat (x0, SV_POW2, 16)) ++ ++/* ++** qdecd_pat_n_vl1_s32: ++** sqdecd x0, w0, vl1, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_vl1_s32, int32_t, ++ x0 = svqdecd_pat_n_s32 (x0, SV_VL1, 16), ++ x0 = svqdecd_pat (x0, SV_VL1, 16)) ++ ++/* ++** qdecd_pat_n_vl2_s32: ++** sqdecd x0, w0, vl2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_vl2_s32, int32_t, ++ x0 = svqdecd_pat_n_s32 (x0, SV_VL2, 16), ++ x0 = svqdecd_pat (x0, SV_VL2, 16)) ++ ++/* ++** qdecd_pat_n_vl3_s32: ++** sqdecd x0, w0, vl3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_vl3_s32, int32_t, ++ x0 = svqdecd_pat_n_s32 (x0, SV_VL3, 16), ++ x0 = svqdecd_pat (x0, SV_VL3, 16)) ++ ++/* ++** qdecd_pat_n_vl4_s32: ++** sqdecd x0, w0, vl4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_vl4_s32, int32_t, ++ x0 = svqdecd_pat_n_s32 (x0, SV_VL4, 16), ++ x0 = svqdecd_pat (x0, SV_VL4, 16)) ++ ++/* ++** qdecd_pat_n_vl5_s32: ++** sqdecd x0, w0, vl5, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_vl5_s32, int32_t, ++ x0 = svqdecd_pat_n_s32 (x0, SV_VL5, 16), ++ x0 = svqdecd_pat (x0, SV_VL5, 16)) ++ ++/* ++** qdecd_pat_n_vl6_s32: ++** sqdecd x0, w0, vl6, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_vl6_s32, int32_t, ++ x0 = svqdecd_pat_n_s32 (x0, SV_VL6, 16), ++ x0 = svqdecd_pat (x0, SV_VL6, 16)) ++ ++/* ++** qdecd_pat_n_vl7_s32: ++** sqdecd x0, w0, vl7, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_vl7_s32, int32_t, ++ x0 = svqdecd_pat_n_s32 (x0, SV_VL7, 16), ++ x0 = svqdecd_pat (x0, SV_VL7, 16)) ++ ++/* ++** qdecd_pat_n_vl8_s32: ++** sqdecd x0, w0, vl8, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_vl8_s32, int32_t, ++ x0 = svqdecd_pat_n_s32 (x0, SV_VL8, 16), ++ x0 = svqdecd_pat (x0, SV_VL8, 16)) ++ ++/* ++** qdecd_pat_n_vl16_s32: ++** sqdecd x0, w0, vl16, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_vl16_s32, int32_t, ++ x0 = svqdecd_pat_n_s32 (x0, SV_VL16, 16), ++ x0 = svqdecd_pat (x0, SV_VL16, 16)) ++ ++/* ++** qdecd_pat_n_vl32_s32: ++** sqdecd x0, w0, vl32, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_vl32_s32, int32_t, ++ x0 = svqdecd_pat_n_s32 (x0, SV_VL32, 16), ++ x0 = svqdecd_pat (x0, SV_VL32, 16)) ++ ++/* ++** qdecd_pat_n_vl64_s32: ++** sqdecd x0, w0, vl64, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_vl64_s32, int32_t, ++ x0 = svqdecd_pat_n_s32 (x0, SV_VL64, 16), ++ x0 = svqdecd_pat (x0, SV_VL64, 16)) ++ ++/* ++** qdecd_pat_n_vl128_s32: ++** sqdecd x0, w0, vl128, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_vl128_s32, int32_t, ++ x0 = svqdecd_pat_n_s32 (x0, SV_VL128, 16), ++ x0 = svqdecd_pat (x0, SV_VL128, 16)) ++ ++/* ++** qdecd_pat_n_vl256_s32: ++** sqdecd x0, w0, vl256, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_vl256_s32, int32_t, ++ x0 = svqdecd_pat_n_s32 (x0, SV_VL256, 16), ++ x0 = svqdecd_pat (x0, SV_VL256, 16)) ++ ++/* ++** qdecd_pat_n_mul4_s32: ++** sqdecd x0, w0, mul4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_mul4_s32, int32_t, ++ x0 = svqdecd_pat_n_s32 (x0, SV_MUL4, 16), ++ x0 = svqdecd_pat (x0, SV_MUL4, 16)) ++ ++/* ++** qdecd_pat_n_mul3_s32: ++** sqdecd x0, w0, mul3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_mul3_s32, int32_t, ++ x0 = svqdecd_pat_n_s32 (x0, SV_MUL3, 16), ++ x0 = svqdecd_pat (x0, SV_MUL3, 16)) ++ ++/* ++** qdecd_pat_n_all_s32: ++** sqdecd x0, w0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_all_s32, int32_t, ++ x0 = svqdecd_pat_n_s32 (x0, SV_ALL, 16), ++ x0 = svqdecd_pat (x0, SV_ALL, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecd_pat_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecd_pat_s64.c +new file mode 100644 +index 000000000..3970ff058 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecd_pat_s64.c +@@ -0,0 +1,401 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qdecd_pat_1_s64_tied: ++** sqdecd z0\.d, pow2 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_pat_1_s64_tied, svint64_t, ++ z0 = svqdecd_pat_s64 (z0, SV_POW2, 1), ++ z0 = svqdecd_pat (z0, SV_POW2, 1)) ++ ++/* ++** qdecd_pat_1_s64_untied: ++** movprfx z0, z1 ++** sqdecd z0\.d, pow2 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_pat_1_s64_untied, svint64_t, ++ z0 = svqdecd_pat_s64 (z1, SV_POW2, 1), ++ z0 = svqdecd_pat (z1, SV_POW2, 1)) ++ ++/* ++** qdecd_pat_2_s64: ++** sqdecd z0\.d, pow2, mul #2 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_pat_2_s64, svint64_t, ++ z0 = svqdecd_pat_s64 (z0, SV_POW2, 2), ++ z0 = svqdecd_pat (z0, SV_POW2, 2)) ++ ++/* ++** qdecd_pat_7_s64: ++** sqdecd z0\.d, pow2, mul #7 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_pat_7_s64, svint64_t, ++ z0 = svqdecd_pat_s64 (z0, SV_POW2, 7), ++ z0 = svqdecd_pat (z0, SV_POW2, 7)) ++ ++/* ++** qdecd_pat_15_s64: ++** sqdecd z0\.d, pow2, mul #15 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_pat_15_s64, svint64_t, ++ z0 = svqdecd_pat_s64 (z0, SV_POW2, 15), ++ z0 = svqdecd_pat (z0, SV_POW2, 15)) ++ ++/* ++** qdecd_pat_16_s64: ++** sqdecd z0\.d, pow2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_pat_16_s64, svint64_t, ++ z0 = svqdecd_pat_s64 (z0, SV_POW2, 16), ++ z0 = svqdecd_pat (z0, SV_POW2, 16)) ++ ++/* ++** qdecd_pat_vl1_s64: ++** sqdecd z0\.d, vl1, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_pat_vl1_s64, svint64_t, ++ z0 = svqdecd_pat_s64 (z0, SV_VL1, 16), ++ z0 = svqdecd_pat (z0, SV_VL1, 16)) ++ ++/* ++** qdecd_pat_vl2_s64: ++** sqdecd z0\.d, vl2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_pat_vl2_s64, svint64_t, ++ z0 = svqdecd_pat_s64 (z0, SV_VL2, 16), ++ z0 = svqdecd_pat (z0, SV_VL2, 16)) ++ ++/* ++** qdecd_pat_vl3_s64: ++** sqdecd z0\.d, vl3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_pat_vl3_s64, svint64_t, ++ z0 = svqdecd_pat_s64 (z0, SV_VL3, 16), ++ z0 = svqdecd_pat (z0, SV_VL3, 16)) ++ ++/* ++** qdecd_pat_vl4_s64: ++** sqdecd z0\.d, vl4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_pat_vl4_s64, svint64_t, ++ z0 = svqdecd_pat_s64 (z0, SV_VL4, 16), ++ z0 = svqdecd_pat (z0, SV_VL4, 16)) ++ ++/* ++** qdecd_pat_vl5_s64: ++** sqdecd z0\.d, vl5, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_pat_vl5_s64, svint64_t, ++ z0 = svqdecd_pat_s64 (z0, SV_VL5, 16), ++ z0 = svqdecd_pat (z0, SV_VL5, 16)) ++ ++/* ++** qdecd_pat_vl6_s64: ++** sqdecd z0\.d, vl6, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_pat_vl6_s64, svint64_t, ++ z0 = svqdecd_pat_s64 (z0, SV_VL6, 16), ++ z0 = svqdecd_pat (z0, SV_VL6, 16)) ++ ++/* ++** qdecd_pat_vl7_s64: ++** sqdecd z0\.d, vl7, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_pat_vl7_s64, svint64_t, ++ z0 = svqdecd_pat_s64 (z0, SV_VL7, 16), ++ z0 = svqdecd_pat (z0, SV_VL7, 16)) ++ ++/* ++** qdecd_pat_vl8_s64: ++** sqdecd z0\.d, vl8, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_pat_vl8_s64, svint64_t, ++ z0 = svqdecd_pat_s64 (z0, SV_VL8, 16), ++ z0 = svqdecd_pat (z0, SV_VL8, 16)) ++ ++/* ++** qdecd_pat_vl16_s64: ++** sqdecd z0\.d, vl16, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_pat_vl16_s64, svint64_t, ++ z0 = svqdecd_pat_s64 (z0, SV_VL16, 16), ++ z0 = svqdecd_pat (z0, SV_VL16, 16)) ++ ++/* ++** qdecd_pat_vl32_s64: ++** sqdecd z0\.d, vl32, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_pat_vl32_s64, svint64_t, ++ z0 = svqdecd_pat_s64 (z0, SV_VL32, 16), ++ z0 = svqdecd_pat (z0, SV_VL32, 16)) ++ ++/* ++** qdecd_pat_vl64_s64: ++** sqdecd z0\.d, vl64, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_pat_vl64_s64, svint64_t, ++ z0 = svqdecd_pat_s64 (z0, SV_VL64, 16), ++ z0 = svqdecd_pat (z0, SV_VL64, 16)) ++ ++/* ++** qdecd_pat_vl128_s64: ++** sqdecd z0\.d, vl128, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_pat_vl128_s64, svint64_t, ++ z0 = svqdecd_pat_s64 (z0, SV_VL128, 16), ++ z0 = svqdecd_pat (z0, SV_VL128, 16)) ++ ++/* ++** qdecd_pat_vl256_s64: ++** sqdecd z0\.d, vl256, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_pat_vl256_s64, svint64_t, ++ z0 = svqdecd_pat_s64 (z0, SV_VL256, 16), ++ z0 = svqdecd_pat (z0, SV_VL256, 16)) ++ ++/* ++** qdecd_pat_mul4_s64: ++** sqdecd z0\.d, mul4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_pat_mul4_s64, svint64_t, ++ z0 = svqdecd_pat_s64 (z0, SV_MUL4, 16), ++ z0 = svqdecd_pat (z0, SV_MUL4, 16)) ++ ++/* ++** qdecd_pat_mul3_s64: ++** sqdecd z0\.d, mul3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_pat_mul3_s64, svint64_t, ++ z0 = svqdecd_pat_s64 (z0, SV_MUL3, 16), ++ z0 = svqdecd_pat (z0, SV_MUL3, 16)) ++ ++/* ++** qdecd_pat_all_s64: ++** sqdecd z0\.d, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_pat_all_s64, svint64_t, ++ z0 = svqdecd_pat_s64 (z0, SV_ALL, 16), ++ z0 = svqdecd_pat (z0, SV_ALL, 16)) ++ ++/* ++** qdecd_pat_n_1_s64_tied: ++** sqdecd x0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_1_s64_tied, int64_t, ++ x0 = svqdecd_pat_n_s64 (x0, SV_POW2, 1), ++ x0 = svqdecd_pat (x0, SV_POW2, 1)) ++ ++/* ++** qdecd_pat_n_1_s64_untied: ++** mov x0, x1 ++** sqdecd x0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_1_s64_untied, int64_t, ++ x0 = svqdecd_pat_n_s64 (x1, SV_POW2, 1), ++ x0 = svqdecd_pat (x1, SV_POW2, 1)) ++ ++/* ++** qdecd_pat_n_2_s64: ++** sqdecd x0, pow2, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_2_s64, int64_t, ++ x0 = svqdecd_pat_n_s64 (x0, SV_POW2, 2), ++ x0 = svqdecd_pat (x0, SV_POW2, 2)) ++ ++/* ++** qdecd_pat_n_7_s64: ++** sqdecd x0, pow2, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_7_s64, int64_t, ++ x0 = svqdecd_pat_n_s64 (x0, SV_POW2, 7), ++ x0 = svqdecd_pat (x0, SV_POW2, 7)) ++ ++/* ++** qdecd_pat_n_15_s64: ++** sqdecd x0, pow2, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_15_s64, int64_t, ++ x0 = svqdecd_pat_n_s64 (x0, SV_POW2, 15), ++ x0 = svqdecd_pat (x0, SV_POW2, 15)) ++ ++/* ++** qdecd_pat_n_16_s64: ++** sqdecd x0, pow2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_16_s64, int64_t, ++ x0 = svqdecd_pat_n_s64 (x0, SV_POW2, 16), ++ x0 = svqdecd_pat (x0, SV_POW2, 16)) ++ ++/* ++** qdecd_pat_n_vl1_s64: ++** sqdecd x0, vl1, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_vl1_s64, int64_t, ++ x0 = svqdecd_pat_n_s64 (x0, SV_VL1, 16), ++ x0 = svqdecd_pat (x0, SV_VL1, 16)) ++ ++/* ++** qdecd_pat_n_vl2_s64: ++** sqdecd x0, vl2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_vl2_s64, int64_t, ++ x0 = svqdecd_pat_n_s64 (x0, SV_VL2, 16), ++ x0 = svqdecd_pat (x0, SV_VL2, 16)) ++ ++/* ++** qdecd_pat_n_vl3_s64: ++** sqdecd x0, vl3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_vl3_s64, int64_t, ++ x0 = svqdecd_pat_n_s64 (x0, SV_VL3, 16), ++ x0 = svqdecd_pat (x0, SV_VL3, 16)) ++ ++/* ++** qdecd_pat_n_vl4_s64: ++** sqdecd x0, vl4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_vl4_s64, int64_t, ++ x0 = svqdecd_pat_n_s64 (x0, SV_VL4, 16), ++ x0 = svqdecd_pat (x0, SV_VL4, 16)) ++ ++/* ++** qdecd_pat_n_vl5_s64: ++** sqdecd x0, vl5, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_vl5_s64, int64_t, ++ x0 = svqdecd_pat_n_s64 (x0, SV_VL5, 16), ++ x0 = svqdecd_pat (x0, SV_VL5, 16)) ++ ++/* ++** qdecd_pat_n_vl6_s64: ++** sqdecd x0, vl6, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_vl6_s64, int64_t, ++ x0 = svqdecd_pat_n_s64 (x0, SV_VL6, 16), ++ x0 = svqdecd_pat (x0, SV_VL6, 16)) ++ ++/* ++** qdecd_pat_n_vl7_s64: ++** sqdecd x0, vl7, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_vl7_s64, int64_t, ++ x0 = svqdecd_pat_n_s64 (x0, SV_VL7, 16), ++ x0 = svqdecd_pat (x0, SV_VL7, 16)) ++ ++/* ++** qdecd_pat_n_vl8_s64: ++** sqdecd x0, vl8, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_vl8_s64, int64_t, ++ x0 = svqdecd_pat_n_s64 (x0, SV_VL8, 16), ++ x0 = svqdecd_pat (x0, SV_VL8, 16)) ++ ++/* ++** qdecd_pat_n_vl16_s64: ++** sqdecd x0, vl16, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_vl16_s64, int64_t, ++ x0 = svqdecd_pat_n_s64 (x0, SV_VL16, 16), ++ x0 = svqdecd_pat (x0, SV_VL16, 16)) ++ ++/* ++** qdecd_pat_n_vl32_s64: ++** sqdecd x0, vl32, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_vl32_s64, int64_t, ++ x0 = svqdecd_pat_n_s64 (x0, SV_VL32, 16), ++ x0 = svqdecd_pat (x0, SV_VL32, 16)) ++ ++/* ++** qdecd_pat_n_vl64_s64: ++** sqdecd x0, vl64, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_vl64_s64, int64_t, ++ x0 = svqdecd_pat_n_s64 (x0, SV_VL64, 16), ++ x0 = svqdecd_pat (x0, SV_VL64, 16)) ++ ++/* ++** qdecd_pat_n_vl128_s64: ++** sqdecd x0, vl128, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_vl128_s64, int64_t, ++ x0 = svqdecd_pat_n_s64 (x0, SV_VL128, 16), ++ x0 = svqdecd_pat (x0, SV_VL128, 16)) ++ ++/* ++** qdecd_pat_n_vl256_s64: ++** sqdecd x0, vl256, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_vl256_s64, int64_t, ++ x0 = svqdecd_pat_n_s64 (x0, SV_VL256, 16), ++ x0 = svqdecd_pat (x0, SV_VL256, 16)) ++ ++/* ++** qdecd_pat_n_mul4_s64: ++** sqdecd x0, mul4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_mul4_s64, int64_t, ++ x0 = svqdecd_pat_n_s64 (x0, SV_MUL4, 16), ++ x0 = svqdecd_pat (x0, SV_MUL4, 16)) ++ ++/* ++** qdecd_pat_n_mul3_s64: ++** sqdecd x0, mul3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_mul3_s64, int64_t, ++ x0 = svqdecd_pat_n_s64 (x0, SV_MUL3, 16), ++ x0 = svqdecd_pat (x0, SV_MUL3, 16)) ++ ++/* ++** qdecd_pat_n_all_s64: ++** sqdecd x0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_all_s64, int64_t, ++ x0 = svqdecd_pat_n_s64 (x0, SV_ALL, 16), ++ x0 = svqdecd_pat (x0, SV_ALL, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecd_pat_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecd_pat_u32.c +new file mode 100644 +index 000000000..b33e402f2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecd_pat_u32.c +@@ -0,0 +1,202 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qdecd_pat_n_1_u32_tied: ++** uqdecd w0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_1_u32_tied, uint32_t, ++ x0 = svqdecd_pat_n_u32 (x0, SV_POW2, 1), ++ x0 = svqdecd_pat (x0, SV_POW2, 1)) ++ ++/* ++** qdecd_pat_n_1_u32_untied: ++** mov w0, w1 ++** uqdecd w0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_1_u32_untied, uint32_t, ++ x0 = svqdecd_pat_n_u32 (x1, SV_POW2, 1), ++ x0 = svqdecd_pat (x1, SV_POW2, 1)) ++ ++/* ++** qdecd_pat_n_2_u32: ++** uqdecd w0, pow2, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_2_u32, uint32_t, ++ x0 = svqdecd_pat_n_u32 (x0, SV_POW2, 2), ++ x0 = svqdecd_pat (x0, SV_POW2, 2)) ++ ++/* ++** qdecd_pat_n_7_u32: ++** uqdecd w0, pow2, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_7_u32, uint32_t, ++ x0 = svqdecd_pat_n_u32 (x0, SV_POW2, 7), ++ x0 = svqdecd_pat (x0, SV_POW2, 7)) ++ ++/* ++** qdecd_pat_n_15_u32: ++** uqdecd w0, pow2, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_15_u32, uint32_t, ++ x0 = svqdecd_pat_n_u32 (x0, SV_POW2, 15), ++ x0 = svqdecd_pat (x0, SV_POW2, 15)) ++ ++/* ++** qdecd_pat_n_16_u32: ++** uqdecd w0, pow2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_16_u32, uint32_t, ++ x0 = svqdecd_pat_n_u32 (x0, SV_POW2, 16), ++ x0 = svqdecd_pat (x0, SV_POW2, 16)) ++ ++/* ++** qdecd_pat_n_vl1_u32: ++** uqdecd w0, vl1, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_vl1_u32, uint32_t, ++ x0 = svqdecd_pat_n_u32 (x0, SV_VL1, 16), ++ x0 = svqdecd_pat (x0, SV_VL1, 16)) ++ ++/* ++** qdecd_pat_n_vl2_u32: ++** uqdecd w0, vl2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_vl2_u32, uint32_t, ++ x0 = svqdecd_pat_n_u32 (x0, SV_VL2, 16), ++ x0 = svqdecd_pat (x0, SV_VL2, 16)) ++ ++/* ++** qdecd_pat_n_vl3_u32: ++** uqdecd w0, vl3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_vl3_u32, uint32_t, ++ x0 = svqdecd_pat_n_u32 (x0, SV_VL3, 16), ++ x0 = svqdecd_pat (x0, SV_VL3, 16)) ++ ++/* ++** qdecd_pat_n_vl4_u32: ++** uqdecd w0, vl4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_vl4_u32, uint32_t, ++ x0 = svqdecd_pat_n_u32 (x0, SV_VL4, 16), ++ x0 = svqdecd_pat (x0, SV_VL4, 16)) ++ ++/* ++** qdecd_pat_n_vl5_u32: ++** uqdecd w0, vl5, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_vl5_u32, uint32_t, ++ x0 = svqdecd_pat_n_u32 (x0, SV_VL5, 16), ++ x0 = svqdecd_pat (x0, SV_VL5, 16)) ++ ++/* ++** qdecd_pat_n_vl6_u32: ++** uqdecd w0, vl6, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_vl6_u32, uint32_t, ++ x0 = svqdecd_pat_n_u32 (x0, SV_VL6, 16), ++ x0 = svqdecd_pat (x0, SV_VL6, 16)) ++ ++/* ++** qdecd_pat_n_vl7_u32: ++** uqdecd w0, vl7, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_vl7_u32, uint32_t, ++ x0 = svqdecd_pat_n_u32 (x0, SV_VL7, 16), ++ x0 = svqdecd_pat (x0, SV_VL7, 16)) ++ ++/* ++** qdecd_pat_n_vl8_u32: ++** uqdecd w0, vl8, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_vl8_u32, uint32_t, ++ x0 = svqdecd_pat_n_u32 (x0, SV_VL8, 16), ++ x0 = svqdecd_pat (x0, SV_VL8, 16)) ++ ++/* ++** qdecd_pat_n_vl16_u32: ++** uqdecd w0, vl16, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_vl16_u32, uint32_t, ++ x0 = svqdecd_pat_n_u32 (x0, SV_VL16, 16), ++ x0 = svqdecd_pat (x0, SV_VL16, 16)) ++ ++/* ++** qdecd_pat_n_vl32_u32: ++** uqdecd w0, vl32, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_vl32_u32, uint32_t, ++ x0 = svqdecd_pat_n_u32 (x0, SV_VL32, 16), ++ x0 = svqdecd_pat (x0, SV_VL32, 16)) ++ ++/* ++** qdecd_pat_n_vl64_u32: ++** uqdecd w0, vl64, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_vl64_u32, uint32_t, ++ x0 = svqdecd_pat_n_u32 (x0, SV_VL64, 16), ++ x0 = svqdecd_pat (x0, SV_VL64, 16)) ++ ++/* ++** qdecd_pat_n_vl128_u32: ++** uqdecd w0, vl128, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_vl128_u32, uint32_t, ++ x0 = svqdecd_pat_n_u32 (x0, SV_VL128, 16), ++ x0 = svqdecd_pat (x0, SV_VL128, 16)) ++ ++/* ++** qdecd_pat_n_vl256_u32: ++** uqdecd w0, vl256, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_vl256_u32, uint32_t, ++ x0 = svqdecd_pat_n_u32 (x0, SV_VL256, 16), ++ x0 = svqdecd_pat (x0, SV_VL256, 16)) ++ ++/* ++** qdecd_pat_n_mul4_u32: ++** uqdecd w0, mul4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_mul4_u32, uint32_t, ++ x0 = svqdecd_pat_n_u32 (x0, SV_MUL4, 16), ++ x0 = svqdecd_pat (x0, SV_MUL4, 16)) ++ ++/* ++** qdecd_pat_n_mul3_u32: ++** uqdecd w0, mul3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_mul3_u32, uint32_t, ++ x0 = svqdecd_pat_n_u32 (x0, SV_MUL3, 16), ++ x0 = svqdecd_pat (x0, SV_MUL3, 16)) ++ ++/* ++** qdecd_pat_n_all_u32: ++** uqdecd w0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_all_u32, uint32_t, ++ x0 = svqdecd_pat_n_u32 (x0, SV_ALL, 16), ++ x0 = svqdecd_pat (x0, SV_ALL, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecd_pat_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecd_pat_u64.c +new file mode 100644 +index 000000000..f0d1bd357 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecd_pat_u64.c +@@ -0,0 +1,401 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qdecd_pat_1_u64_tied: ++** uqdecd z0\.d, pow2 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_pat_1_u64_tied, svuint64_t, ++ z0 = svqdecd_pat_u64 (z0, SV_POW2, 1), ++ z0 = svqdecd_pat (z0, SV_POW2, 1)) ++ ++/* ++** qdecd_pat_1_u64_untied: ++** movprfx z0, z1 ++** uqdecd z0\.d, pow2 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_pat_1_u64_untied, svuint64_t, ++ z0 = svqdecd_pat_u64 (z1, SV_POW2, 1), ++ z0 = svqdecd_pat (z1, SV_POW2, 1)) ++ ++/* ++** qdecd_pat_2_u64: ++** uqdecd z0\.d, pow2, mul #2 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_pat_2_u64, svuint64_t, ++ z0 = svqdecd_pat_u64 (z0, SV_POW2, 2), ++ z0 = svqdecd_pat (z0, SV_POW2, 2)) ++ ++/* ++** qdecd_pat_7_u64: ++** uqdecd z0\.d, pow2, mul #7 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_pat_7_u64, svuint64_t, ++ z0 = svqdecd_pat_u64 (z0, SV_POW2, 7), ++ z0 = svqdecd_pat (z0, SV_POW2, 7)) ++ ++/* ++** qdecd_pat_15_u64: ++** uqdecd z0\.d, pow2, mul #15 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_pat_15_u64, svuint64_t, ++ z0 = svqdecd_pat_u64 (z0, SV_POW2, 15), ++ z0 = svqdecd_pat (z0, SV_POW2, 15)) ++ ++/* ++** qdecd_pat_16_u64: ++** uqdecd z0\.d, pow2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_pat_16_u64, svuint64_t, ++ z0 = svqdecd_pat_u64 (z0, SV_POW2, 16), ++ z0 = svqdecd_pat (z0, SV_POW2, 16)) ++ ++/* ++** qdecd_pat_vl1_u64: ++** uqdecd z0\.d, vl1, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_pat_vl1_u64, svuint64_t, ++ z0 = svqdecd_pat_u64 (z0, SV_VL1, 16), ++ z0 = svqdecd_pat (z0, SV_VL1, 16)) ++ ++/* ++** qdecd_pat_vl2_u64: ++** uqdecd z0\.d, vl2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_pat_vl2_u64, svuint64_t, ++ z0 = svqdecd_pat_u64 (z0, SV_VL2, 16), ++ z0 = svqdecd_pat (z0, SV_VL2, 16)) ++ ++/* ++** qdecd_pat_vl3_u64: ++** uqdecd z0\.d, vl3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_pat_vl3_u64, svuint64_t, ++ z0 = svqdecd_pat_u64 (z0, SV_VL3, 16), ++ z0 = svqdecd_pat (z0, SV_VL3, 16)) ++ ++/* ++** qdecd_pat_vl4_u64: ++** uqdecd z0\.d, vl4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_pat_vl4_u64, svuint64_t, ++ z0 = svqdecd_pat_u64 (z0, SV_VL4, 16), ++ z0 = svqdecd_pat (z0, SV_VL4, 16)) ++ ++/* ++** qdecd_pat_vl5_u64: ++** uqdecd z0\.d, vl5, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_pat_vl5_u64, svuint64_t, ++ z0 = svqdecd_pat_u64 (z0, SV_VL5, 16), ++ z0 = svqdecd_pat (z0, SV_VL5, 16)) ++ ++/* ++** qdecd_pat_vl6_u64: ++** uqdecd z0\.d, vl6, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_pat_vl6_u64, svuint64_t, ++ z0 = svqdecd_pat_u64 (z0, SV_VL6, 16), ++ z0 = svqdecd_pat (z0, SV_VL6, 16)) ++ ++/* ++** qdecd_pat_vl7_u64: ++** uqdecd z0\.d, vl7, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_pat_vl7_u64, svuint64_t, ++ z0 = svqdecd_pat_u64 (z0, SV_VL7, 16), ++ z0 = svqdecd_pat (z0, SV_VL7, 16)) ++ ++/* ++** qdecd_pat_vl8_u64: ++** uqdecd z0\.d, vl8, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_pat_vl8_u64, svuint64_t, ++ z0 = svqdecd_pat_u64 (z0, SV_VL8, 16), ++ z0 = svqdecd_pat (z0, SV_VL8, 16)) ++ ++/* ++** qdecd_pat_vl16_u64: ++** uqdecd z0\.d, vl16, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_pat_vl16_u64, svuint64_t, ++ z0 = svqdecd_pat_u64 (z0, SV_VL16, 16), ++ z0 = svqdecd_pat (z0, SV_VL16, 16)) ++ ++/* ++** qdecd_pat_vl32_u64: ++** uqdecd z0\.d, vl32, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_pat_vl32_u64, svuint64_t, ++ z0 = svqdecd_pat_u64 (z0, SV_VL32, 16), ++ z0 = svqdecd_pat (z0, SV_VL32, 16)) ++ ++/* ++** qdecd_pat_vl64_u64: ++** uqdecd z0\.d, vl64, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_pat_vl64_u64, svuint64_t, ++ z0 = svqdecd_pat_u64 (z0, SV_VL64, 16), ++ z0 = svqdecd_pat (z0, SV_VL64, 16)) ++ ++/* ++** qdecd_pat_vl128_u64: ++** uqdecd z0\.d, vl128, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_pat_vl128_u64, svuint64_t, ++ z0 = svqdecd_pat_u64 (z0, SV_VL128, 16), ++ z0 = svqdecd_pat (z0, SV_VL128, 16)) ++ ++/* ++** qdecd_pat_vl256_u64: ++** uqdecd z0\.d, vl256, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_pat_vl256_u64, svuint64_t, ++ z0 = svqdecd_pat_u64 (z0, SV_VL256, 16), ++ z0 = svqdecd_pat (z0, SV_VL256, 16)) ++ ++/* ++** qdecd_pat_mul4_u64: ++** uqdecd z0\.d, mul4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_pat_mul4_u64, svuint64_t, ++ z0 = svqdecd_pat_u64 (z0, SV_MUL4, 16), ++ z0 = svqdecd_pat (z0, SV_MUL4, 16)) ++ ++/* ++** qdecd_pat_mul3_u64: ++** uqdecd z0\.d, mul3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_pat_mul3_u64, svuint64_t, ++ z0 = svqdecd_pat_u64 (z0, SV_MUL3, 16), ++ z0 = svqdecd_pat (z0, SV_MUL3, 16)) ++ ++/* ++** qdecd_pat_all_u64: ++** uqdecd z0\.d, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_pat_all_u64, svuint64_t, ++ z0 = svqdecd_pat_u64 (z0, SV_ALL, 16), ++ z0 = svqdecd_pat (z0, SV_ALL, 16)) ++ ++/* ++** qdecd_pat_n_1_u64_tied: ++** uqdecd x0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_1_u64_tied, uint64_t, ++ x0 = svqdecd_pat_n_u64 (x0, SV_POW2, 1), ++ x0 = svqdecd_pat (x0, SV_POW2, 1)) ++ ++/* ++** qdecd_pat_n_1_u64_untied: ++** mov x0, x1 ++** uqdecd x0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_1_u64_untied, uint64_t, ++ x0 = svqdecd_pat_n_u64 (x1, SV_POW2, 1), ++ x0 = svqdecd_pat (x1, SV_POW2, 1)) ++ ++/* ++** qdecd_pat_n_2_u64: ++** uqdecd x0, pow2, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_2_u64, uint64_t, ++ x0 = svqdecd_pat_n_u64 (x0, SV_POW2, 2), ++ x0 = svqdecd_pat (x0, SV_POW2, 2)) ++ ++/* ++** qdecd_pat_n_7_u64: ++** uqdecd x0, pow2, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_7_u64, uint64_t, ++ x0 = svqdecd_pat_n_u64 (x0, SV_POW2, 7), ++ x0 = svqdecd_pat (x0, SV_POW2, 7)) ++ ++/* ++** qdecd_pat_n_15_u64: ++** uqdecd x0, pow2, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_15_u64, uint64_t, ++ x0 = svqdecd_pat_n_u64 (x0, SV_POW2, 15), ++ x0 = svqdecd_pat (x0, SV_POW2, 15)) ++ ++/* ++** qdecd_pat_n_16_u64: ++** uqdecd x0, pow2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_16_u64, uint64_t, ++ x0 = svqdecd_pat_n_u64 (x0, SV_POW2, 16), ++ x0 = svqdecd_pat (x0, SV_POW2, 16)) ++ ++/* ++** qdecd_pat_n_vl1_u64: ++** uqdecd x0, vl1, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_vl1_u64, uint64_t, ++ x0 = svqdecd_pat_n_u64 (x0, SV_VL1, 16), ++ x0 = svqdecd_pat (x0, SV_VL1, 16)) ++ ++/* ++** qdecd_pat_n_vl2_u64: ++** uqdecd x0, vl2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_vl2_u64, uint64_t, ++ x0 = svqdecd_pat_n_u64 (x0, SV_VL2, 16), ++ x0 = svqdecd_pat (x0, SV_VL2, 16)) ++ ++/* ++** qdecd_pat_n_vl3_u64: ++** uqdecd x0, vl3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_vl3_u64, uint64_t, ++ x0 = svqdecd_pat_n_u64 (x0, SV_VL3, 16), ++ x0 = svqdecd_pat (x0, SV_VL3, 16)) ++ ++/* ++** qdecd_pat_n_vl4_u64: ++** uqdecd x0, vl4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_vl4_u64, uint64_t, ++ x0 = svqdecd_pat_n_u64 (x0, SV_VL4, 16), ++ x0 = svqdecd_pat (x0, SV_VL4, 16)) ++ ++/* ++** qdecd_pat_n_vl5_u64: ++** uqdecd x0, vl5, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_vl5_u64, uint64_t, ++ x0 = svqdecd_pat_n_u64 (x0, SV_VL5, 16), ++ x0 = svqdecd_pat (x0, SV_VL5, 16)) ++ ++/* ++** qdecd_pat_n_vl6_u64: ++** uqdecd x0, vl6, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_vl6_u64, uint64_t, ++ x0 = svqdecd_pat_n_u64 (x0, SV_VL6, 16), ++ x0 = svqdecd_pat (x0, SV_VL6, 16)) ++ ++/* ++** qdecd_pat_n_vl7_u64: ++** uqdecd x0, vl7, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_vl7_u64, uint64_t, ++ x0 = svqdecd_pat_n_u64 (x0, SV_VL7, 16), ++ x0 = svqdecd_pat (x0, SV_VL7, 16)) ++ ++/* ++** qdecd_pat_n_vl8_u64: ++** uqdecd x0, vl8, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_vl8_u64, uint64_t, ++ x0 = svqdecd_pat_n_u64 (x0, SV_VL8, 16), ++ x0 = svqdecd_pat (x0, SV_VL8, 16)) ++ ++/* ++** qdecd_pat_n_vl16_u64: ++** uqdecd x0, vl16, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_vl16_u64, uint64_t, ++ x0 = svqdecd_pat_n_u64 (x0, SV_VL16, 16), ++ x0 = svqdecd_pat (x0, SV_VL16, 16)) ++ ++/* ++** qdecd_pat_n_vl32_u64: ++** uqdecd x0, vl32, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_vl32_u64, uint64_t, ++ x0 = svqdecd_pat_n_u64 (x0, SV_VL32, 16), ++ x0 = svqdecd_pat (x0, SV_VL32, 16)) ++ ++/* ++** qdecd_pat_n_vl64_u64: ++** uqdecd x0, vl64, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_vl64_u64, uint64_t, ++ x0 = svqdecd_pat_n_u64 (x0, SV_VL64, 16), ++ x0 = svqdecd_pat (x0, SV_VL64, 16)) ++ ++/* ++** qdecd_pat_n_vl128_u64: ++** uqdecd x0, vl128, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_vl128_u64, uint64_t, ++ x0 = svqdecd_pat_n_u64 (x0, SV_VL128, 16), ++ x0 = svqdecd_pat (x0, SV_VL128, 16)) ++ ++/* ++** qdecd_pat_n_vl256_u64: ++** uqdecd x0, vl256, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_vl256_u64, uint64_t, ++ x0 = svqdecd_pat_n_u64 (x0, SV_VL256, 16), ++ x0 = svqdecd_pat (x0, SV_VL256, 16)) ++ ++/* ++** qdecd_pat_n_mul4_u64: ++** uqdecd x0, mul4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_mul4_u64, uint64_t, ++ x0 = svqdecd_pat_n_u64 (x0, SV_MUL4, 16), ++ x0 = svqdecd_pat (x0, SV_MUL4, 16)) ++ ++/* ++** qdecd_pat_n_mul3_u64: ++** uqdecd x0, mul3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_mul3_u64, uint64_t, ++ x0 = svqdecd_pat_n_u64 (x0, SV_MUL3, 16), ++ x0 = svqdecd_pat (x0, SV_MUL3, 16)) ++ ++/* ++** qdecd_pat_n_all_u64: ++** uqdecd x0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_pat_n_all_u64, uint64_t, ++ x0 = svqdecd_pat_n_u64 (x0, SV_ALL, 16), ++ x0 = svqdecd_pat (x0, SV_ALL, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecd_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecd_s32.c +new file mode 100644 +index 000000000..1912ed53f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecd_s32.c +@@ -0,0 +1,58 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qdecd_n_1_s32_tied: ++** sqdecd x0, w0 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_n_1_s32_tied, int32_t, ++ x0 = svqdecd_n_s32 (x0, 1), ++ x0 = svqdecd (x0, 1)) ++ ++/* ++** qdecd_n_1_s32_untied: ++** mov w0, w1 ++** sqdecd x0, w0 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_n_1_s32_untied, int32_t, ++ x0 = svqdecd_n_s32 (x1, 1), ++ x0 = svqdecd (x1, 1)) ++ ++/* ++** qdecd_n_2_s32: ++** sqdecd x0, w0, all, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_n_2_s32, int32_t, ++ x0 = svqdecd_n_s32 (x0, 2), ++ x0 = svqdecd (x0, 2)) ++ ++/* ++** qdecd_n_7_s32: ++** sqdecd x0, w0, all, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_n_7_s32, int32_t, ++ x0 = svqdecd_n_s32 (x0, 7), ++ x0 = svqdecd (x0, 7)) ++ ++/* ++** qdecd_n_15_s32: ++** sqdecd x0, w0, all, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_n_15_s32, int32_t, ++ x0 = svqdecd_n_s32 (x0, 15), ++ x0 = svqdecd (x0, 15)) ++ ++/* ++** qdecd_n_16_s32: ++** sqdecd x0, w0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_n_16_s32, int32_t, ++ x0 = svqdecd_n_s32 (x0, 16), ++ x0 = svqdecd (x0, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecd_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecd_s64.c +new file mode 100644 +index 000000000..bd113fc66 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecd_s64.c +@@ -0,0 +1,113 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qdecd_1_s64_tied: ++** sqdecd z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_1_s64_tied, svint64_t, ++ z0 = svqdecd_s64 (z0, 1), ++ z0 = svqdecd (z0, 1)) ++ ++/* ++** qdecd_1_s64_untied: ++** movprfx z0, z1 ++** sqdecd z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_1_s64_untied, svint64_t, ++ z0 = svqdecd_s64 (z1, 1), ++ z0 = svqdecd (z1, 1)) ++ ++/* ++** qdecd_2_s64: ++** sqdecd z0\.d, all, mul #2 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_2_s64, svint64_t, ++ z0 = svqdecd_s64 (z0, 2), ++ z0 = svqdecd (z0, 2)) ++ ++/* ++** qdecd_7_s64: ++** sqdecd z0\.d, all, mul #7 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_7_s64, svint64_t, ++ z0 = svqdecd_s64 (z0, 7), ++ z0 = svqdecd (z0, 7)) ++ ++/* ++** qdecd_15_s64: ++** sqdecd z0\.d, all, mul #15 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_15_s64, svint64_t, ++ z0 = svqdecd_s64 (z0, 15), ++ z0 = svqdecd (z0, 15)) ++ ++/* ++** qdecd_16_s64: ++** sqdecd z0\.d, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_16_s64, svint64_t, ++ z0 = svqdecd_s64 (z0, 16), ++ z0 = svqdecd (z0, 16)) ++ ++/* ++** qdecd_n_1_s64_tied: ++** sqdecd x0 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_n_1_s64_tied, int64_t, ++ x0 = svqdecd_n_s64 (x0, 1), ++ x0 = svqdecd (x0, 1)) ++ ++/* ++** qdecd_n_1_s64_untied: ++** mov x0, x1 ++** sqdecd x0 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_n_1_s64_untied, int64_t, ++ x0 = svqdecd_n_s64 (x1, 1), ++ x0 = svqdecd (x1, 1)) ++ ++/* ++** qdecd_n_2_s64: ++** sqdecd x0, all, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_n_2_s64, int64_t, ++ x0 = svqdecd_n_s64 (x0, 2), ++ x0 = svqdecd (x0, 2)) ++ ++/* ++** qdecd_n_7_s64: ++** sqdecd x0, all, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_n_7_s64, int64_t, ++ x0 = svqdecd_n_s64 (x0, 7), ++ x0 = svqdecd (x0, 7)) ++ ++/* ++** qdecd_n_15_s64: ++** sqdecd x0, all, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_n_15_s64, int64_t, ++ x0 = svqdecd_n_s64 (x0, 15), ++ x0 = svqdecd (x0, 15)) ++ ++/* ++** qdecd_n_16_s64: ++** sqdecd x0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_n_16_s64, int64_t, ++ x0 = svqdecd_n_s64 (x0, 16), ++ x0 = svqdecd (x0, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecd_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecd_u32.c +new file mode 100644 +index 000000000..a672dc215 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecd_u32.c +@@ -0,0 +1,58 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qdecd_n_1_u32_tied: ++** uqdecd w0 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_n_1_u32_tied, uint32_t, ++ x0 = svqdecd_n_u32 (x0, 1), ++ x0 = svqdecd (x0, 1)) ++ ++/* ++** qdecd_n_1_u32_untied: ++** mov w0, w1 ++** uqdecd w0 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_n_1_u32_untied, uint32_t, ++ x0 = svqdecd_n_u32 (x1, 1), ++ x0 = svqdecd (x1, 1)) ++ ++/* ++** qdecd_n_2_u32: ++** uqdecd w0, all, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_n_2_u32, uint32_t, ++ x0 = svqdecd_n_u32 (x0, 2), ++ x0 = svqdecd (x0, 2)) ++ ++/* ++** qdecd_n_7_u32: ++** uqdecd w0, all, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_n_7_u32, uint32_t, ++ x0 = svqdecd_n_u32 (x0, 7), ++ x0 = svqdecd (x0, 7)) ++ ++/* ++** qdecd_n_15_u32: ++** uqdecd w0, all, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_n_15_u32, uint32_t, ++ x0 = svqdecd_n_u32 (x0, 15), ++ x0 = svqdecd (x0, 15)) ++ ++/* ++** qdecd_n_16_u32: ++** uqdecd w0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_n_16_u32, uint32_t, ++ x0 = svqdecd_n_u32 (x0, 16), ++ x0 = svqdecd (x0, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecd_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecd_u64.c +new file mode 100644 +index 000000000..fca8868f3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecd_u64.c +@@ -0,0 +1,113 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qdecd_1_u64_tied: ++** uqdecd z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_1_u64_tied, svuint64_t, ++ z0 = svqdecd_u64 (z0, 1), ++ z0 = svqdecd (z0, 1)) ++ ++/* ++** qdecd_1_u64_untied: ++** movprfx z0, z1 ++** uqdecd z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_1_u64_untied, svuint64_t, ++ z0 = svqdecd_u64 (z1, 1), ++ z0 = svqdecd (z1, 1)) ++ ++/* ++** qdecd_2_u64: ++** uqdecd z0\.d, all, mul #2 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_2_u64, svuint64_t, ++ z0 = svqdecd_u64 (z0, 2), ++ z0 = svqdecd (z0, 2)) ++ ++/* ++** qdecd_7_u64: ++** uqdecd z0\.d, all, mul #7 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_7_u64, svuint64_t, ++ z0 = svqdecd_u64 (z0, 7), ++ z0 = svqdecd (z0, 7)) ++ ++/* ++** qdecd_15_u64: ++** uqdecd z0\.d, all, mul #15 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_15_u64, svuint64_t, ++ z0 = svqdecd_u64 (z0, 15), ++ z0 = svqdecd (z0, 15)) ++ ++/* ++** qdecd_16_u64: ++** uqdecd z0\.d, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecd_16_u64, svuint64_t, ++ z0 = svqdecd_u64 (z0, 16), ++ z0 = svqdecd (z0, 16)) ++ ++/* ++** qdecd_n_1_u64_tied: ++** uqdecd x0 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_n_1_u64_tied, uint64_t, ++ x0 = svqdecd_n_u64 (x0, 1), ++ x0 = svqdecd (x0, 1)) ++ ++/* ++** qdecd_n_1_u64_untied: ++** mov x0, x1 ++** uqdecd x0 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_n_1_u64_untied, uint64_t, ++ x0 = svqdecd_n_u64 (x1, 1), ++ x0 = svqdecd (x1, 1)) ++ ++/* ++** qdecd_n_2_u64: ++** uqdecd x0, all, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_n_2_u64, uint64_t, ++ x0 = svqdecd_n_u64 (x0, 2), ++ x0 = svqdecd (x0, 2)) ++ ++/* ++** qdecd_n_7_u64: ++** uqdecd x0, all, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_n_7_u64, uint64_t, ++ x0 = svqdecd_n_u64 (x0, 7), ++ x0 = svqdecd (x0, 7)) ++ ++/* ++** qdecd_n_15_u64: ++** uqdecd x0, all, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_n_15_u64, uint64_t, ++ x0 = svqdecd_n_u64 (x0, 15), ++ x0 = svqdecd (x0, 15)) ++ ++/* ++** qdecd_n_16_u64: ++** uqdecd x0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecd_n_16_u64, uint64_t, ++ x0 = svqdecd_n_u64 (x0, 16), ++ x0 = svqdecd (x0, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_pat_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_pat_s16.c +new file mode 100644 +index 000000000..c084043f1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_pat_s16.c +@@ -0,0 +1,202 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qdech_pat_1_s16_tied: ++** sqdech z0\.h, pow2 ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_pat_1_s16_tied, svint16_t, ++ z0 = svqdech_pat_s16 (z0, SV_POW2, 1), ++ z0 = svqdech_pat (z0, SV_POW2, 1)) ++ ++/* ++** qdech_pat_1_s16_untied: ++** movprfx z0, z1 ++** sqdech z0\.h, pow2 ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_pat_1_s16_untied, svint16_t, ++ z0 = svqdech_pat_s16 (z1, SV_POW2, 1), ++ z0 = svqdech_pat (z1, SV_POW2, 1)) ++ ++/* ++** qdech_pat_2_s16: ++** sqdech z0\.h, pow2, mul #2 ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_pat_2_s16, svint16_t, ++ z0 = svqdech_pat_s16 (z0, SV_POW2, 2), ++ z0 = svqdech_pat (z0, SV_POW2, 2)) ++ ++/* ++** qdech_pat_7_s16: ++** sqdech z0\.h, pow2, mul #7 ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_pat_7_s16, svint16_t, ++ z0 = svqdech_pat_s16 (z0, SV_POW2, 7), ++ z0 = svqdech_pat (z0, SV_POW2, 7)) ++ ++/* ++** qdech_pat_15_s16: ++** sqdech z0\.h, pow2, mul #15 ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_pat_15_s16, svint16_t, ++ z0 = svqdech_pat_s16 (z0, SV_POW2, 15), ++ z0 = svqdech_pat (z0, SV_POW2, 15)) ++ ++/* ++** qdech_pat_16_s16: ++** sqdech z0\.h, pow2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_pat_16_s16, svint16_t, ++ z0 = svqdech_pat_s16 (z0, SV_POW2, 16), ++ z0 = svqdech_pat (z0, SV_POW2, 16)) ++ ++/* ++** qdech_pat_vl1_s16: ++** sqdech z0\.h, vl1, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_pat_vl1_s16, svint16_t, ++ z0 = svqdech_pat_s16 (z0, SV_VL1, 16), ++ z0 = svqdech_pat (z0, SV_VL1, 16)) ++ ++/* ++** qdech_pat_vl2_s16: ++** sqdech z0\.h, vl2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_pat_vl2_s16, svint16_t, ++ z0 = svqdech_pat_s16 (z0, SV_VL2, 16), ++ z0 = svqdech_pat (z0, SV_VL2, 16)) ++ ++/* ++** qdech_pat_vl3_s16: ++** sqdech z0\.h, vl3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_pat_vl3_s16, svint16_t, ++ z0 = svqdech_pat_s16 (z0, SV_VL3, 16), ++ z0 = svqdech_pat (z0, SV_VL3, 16)) ++ ++/* ++** qdech_pat_vl4_s16: ++** sqdech z0\.h, vl4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_pat_vl4_s16, svint16_t, ++ z0 = svqdech_pat_s16 (z0, SV_VL4, 16), ++ z0 = svqdech_pat (z0, SV_VL4, 16)) ++ ++/* ++** qdech_pat_vl5_s16: ++** sqdech z0\.h, vl5, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_pat_vl5_s16, svint16_t, ++ z0 = svqdech_pat_s16 (z0, SV_VL5, 16), ++ z0 = svqdech_pat (z0, SV_VL5, 16)) ++ ++/* ++** qdech_pat_vl6_s16: ++** sqdech z0\.h, vl6, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_pat_vl6_s16, svint16_t, ++ z0 = svqdech_pat_s16 (z0, SV_VL6, 16), ++ z0 = svqdech_pat (z0, SV_VL6, 16)) ++ ++/* ++** qdech_pat_vl7_s16: ++** sqdech z0\.h, vl7, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_pat_vl7_s16, svint16_t, ++ z0 = svqdech_pat_s16 (z0, SV_VL7, 16), ++ z0 = svqdech_pat (z0, SV_VL7, 16)) ++ ++/* ++** qdech_pat_vl8_s16: ++** sqdech z0\.h, vl8, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_pat_vl8_s16, svint16_t, ++ z0 = svqdech_pat_s16 (z0, SV_VL8, 16), ++ z0 = svqdech_pat (z0, SV_VL8, 16)) ++ ++/* ++** qdech_pat_vl16_s16: ++** sqdech z0\.h, vl16, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_pat_vl16_s16, svint16_t, ++ z0 = svqdech_pat_s16 (z0, SV_VL16, 16), ++ z0 = svqdech_pat (z0, SV_VL16, 16)) ++ ++/* ++** qdech_pat_vl32_s16: ++** sqdech z0\.h, vl32, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_pat_vl32_s16, svint16_t, ++ z0 = svqdech_pat_s16 (z0, SV_VL32, 16), ++ z0 = svqdech_pat (z0, SV_VL32, 16)) ++ ++/* ++** qdech_pat_vl64_s16: ++** sqdech z0\.h, vl64, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_pat_vl64_s16, svint16_t, ++ z0 = svqdech_pat_s16 (z0, SV_VL64, 16), ++ z0 = svqdech_pat (z0, SV_VL64, 16)) ++ ++/* ++** qdech_pat_vl128_s16: ++** sqdech z0\.h, vl128, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_pat_vl128_s16, svint16_t, ++ z0 = svqdech_pat_s16 (z0, SV_VL128, 16), ++ z0 = svqdech_pat (z0, SV_VL128, 16)) ++ ++/* ++** qdech_pat_vl256_s16: ++** sqdech z0\.h, vl256, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_pat_vl256_s16, svint16_t, ++ z0 = svqdech_pat_s16 (z0, SV_VL256, 16), ++ z0 = svqdech_pat (z0, SV_VL256, 16)) ++ ++/* ++** qdech_pat_mul4_s16: ++** sqdech z0\.h, mul4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_pat_mul4_s16, svint16_t, ++ z0 = svqdech_pat_s16 (z0, SV_MUL4, 16), ++ z0 = svqdech_pat (z0, SV_MUL4, 16)) ++ ++/* ++** qdech_pat_mul3_s16: ++** sqdech z0\.h, mul3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_pat_mul3_s16, svint16_t, ++ z0 = svqdech_pat_s16 (z0, SV_MUL3, 16), ++ z0 = svqdech_pat (z0, SV_MUL3, 16)) ++ ++/* ++** qdech_pat_all_s16: ++** sqdech z0\.h, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_pat_all_s16, svint16_t, ++ z0 = svqdech_pat_s16 (z0, SV_ALL, 16), ++ z0 = svqdech_pat (z0, SV_ALL, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_pat_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_pat_s32.c +new file mode 100644 +index 000000000..b56306db7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_pat_s32.c +@@ -0,0 +1,202 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qdech_pat_n_1_s32_tied: ++** sqdech x0, w0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_1_s32_tied, int32_t, ++ x0 = svqdech_pat_n_s32 (x0, SV_POW2, 1), ++ x0 = svqdech_pat (x0, SV_POW2, 1)) ++ ++/* ++** qdech_pat_n_1_s32_untied: ++** mov w0, w1 ++** sqdech x0, w0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_1_s32_untied, int32_t, ++ x0 = svqdech_pat_n_s32 (x1, SV_POW2, 1), ++ x0 = svqdech_pat (x1, SV_POW2, 1)) ++ ++/* ++** qdech_pat_n_2_s32: ++** sqdech x0, w0, pow2, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_2_s32, int32_t, ++ x0 = svqdech_pat_n_s32 (x0, SV_POW2, 2), ++ x0 = svqdech_pat (x0, SV_POW2, 2)) ++ ++/* ++** qdech_pat_n_7_s32: ++** sqdech x0, w0, pow2, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_7_s32, int32_t, ++ x0 = svqdech_pat_n_s32 (x0, SV_POW2, 7), ++ x0 = svqdech_pat (x0, SV_POW2, 7)) ++ ++/* ++** qdech_pat_n_15_s32: ++** sqdech x0, w0, pow2, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_15_s32, int32_t, ++ x0 = svqdech_pat_n_s32 (x0, SV_POW2, 15), ++ x0 = svqdech_pat (x0, SV_POW2, 15)) ++ ++/* ++** qdech_pat_n_16_s32: ++** sqdech x0, w0, pow2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_16_s32, int32_t, ++ x0 = svqdech_pat_n_s32 (x0, SV_POW2, 16), ++ x0 = svqdech_pat (x0, SV_POW2, 16)) ++ ++/* ++** qdech_pat_n_vl1_s32: ++** sqdech x0, w0, vl1, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_vl1_s32, int32_t, ++ x0 = svqdech_pat_n_s32 (x0, SV_VL1, 16), ++ x0 = svqdech_pat (x0, SV_VL1, 16)) ++ ++/* ++** qdech_pat_n_vl2_s32: ++** sqdech x0, w0, vl2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_vl2_s32, int32_t, ++ x0 = svqdech_pat_n_s32 (x0, SV_VL2, 16), ++ x0 = svqdech_pat (x0, SV_VL2, 16)) ++ ++/* ++** qdech_pat_n_vl3_s32: ++** sqdech x0, w0, vl3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_vl3_s32, int32_t, ++ x0 = svqdech_pat_n_s32 (x0, SV_VL3, 16), ++ x0 = svqdech_pat (x0, SV_VL3, 16)) ++ ++/* ++** qdech_pat_n_vl4_s32: ++** sqdech x0, w0, vl4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_vl4_s32, int32_t, ++ x0 = svqdech_pat_n_s32 (x0, SV_VL4, 16), ++ x0 = svqdech_pat (x0, SV_VL4, 16)) ++ ++/* ++** qdech_pat_n_vl5_s32: ++** sqdech x0, w0, vl5, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_vl5_s32, int32_t, ++ x0 = svqdech_pat_n_s32 (x0, SV_VL5, 16), ++ x0 = svqdech_pat (x0, SV_VL5, 16)) ++ ++/* ++** qdech_pat_n_vl6_s32: ++** sqdech x0, w0, vl6, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_vl6_s32, int32_t, ++ x0 = svqdech_pat_n_s32 (x0, SV_VL6, 16), ++ x0 = svqdech_pat (x0, SV_VL6, 16)) ++ ++/* ++** qdech_pat_n_vl7_s32: ++** sqdech x0, w0, vl7, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_vl7_s32, int32_t, ++ x0 = svqdech_pat_n_s32 (x0, SV_VL7, 16), ++ x0 = svqdech_pat (x0, SV_VL7, 16)) ++ ++/* ++** qdech_pat_n_vl8_s32: ++** sqdech x0, w0, vl8, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_vl8_s32, int32_t, ++ x0 = svqdech_pat_n_s32 (x0, SV_VL8, 16), ++ x0 = svqdech_pat (x0, SV_VL8, 16)) ++ ++/* ++** qdech_pat_n_vl16_s32: ++** sqdech x0, w0, vl16, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_vl16_s32, int32_t, ++ x0 = svqdech_pat_n_s32 (x0, SV_VL16, 16), ++ x0 = svqdech_pat (x0, SV_VL16, 16)) ++ ++/* ++** qdech_pat_n_vl32_s32: ++** sqdech x0, w0, vl32, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_vl32_s32, int32_t, ++ x0 = svqdech_pat_n_s32 (x0, SV_VL32, 16), ++ x0 = svqdech_pat (x0, SV_VL32, 16)) ++ ++/* ++** qdech_pat_n_vl64_s32: ++** sqdech x0, w0, vl64, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_vl64_s32, int32_t, ++ x0 = svqdech_pat_n_s32 (x0, SV_VL64, 16), ++ x0 = svqdech_pat (x0, SV_VL64, 16)) ++ ++/* ++** qdech_pat_n_vl128_s32: ++** sqdech x0, w0, vl128, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_vl128_s32, int32_t, ++ x0 = svqdech_pat_n_s32 (x0, SV_VL128, 16), ++ x0 = svqdech_pat (x0, SV_VL128, 16)) ++ ++/* ++** qdech_pat_n_vl256_s32: ++** sqdech x0, w0, vl256, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_vl256_s32, int32_t, ++ x0 = svqdech_pat_n_s32 (x0, SV_VL256, 16), ++ x0 = svqdech_pat (x0, SV_VL256, 16)) ++ ++/* ++** qdech_pat_n_mul4_s32: ++** sqdech x0, w0, mul4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_mul4_s32, int32_t, ++ x0 = svqdech_pat_n_s32 (x0, SV_MUL4, 16), ++ x0 = svqdech_pat (x0, SV_MUL4, 16)) ++ ++/* ++** qdech_pat_n_mul3_s32: ++** sqdech x0, w0, mul3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_mul3_s32, int32_t, ++ x0 = svqdech_pat_n_s32 (x0, SV_MUL3, 16), ++ x0 = svqdech_pat (x0, SV_MUL3, 16)) ++ ++/* ++** qdech_pat_n_all_s32: ++** sqdech x0, w0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_all_s32, int32_t, ++ x0 = svqdech_pat_n_s32 (x0, SV_ALL, 16), ++ x0 = svqdech_pat (x0, SV_ALL, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_pat_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_pat_s64.c +new file mode 100644 +index 000000000..591658f54 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_pat_s64.c +@@ -0,0 +1,202 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qdech_pat_n_1_s64_tied: ++** sqdech x0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_1_s64_tied, int64_t, ++ x0 = svqdech_pat_n_s64 (x0, SV_POW2, 1), ++ x0 = svqdech_pat (x0, SV_POW2, 1)) ++ ++/* ++** qdech_pat_n_1_s64_untied: ++** mov x0, x1 ++** sqdech x0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_1_s64_untied, int64_t, ++ x0 = svqdech_pat_n_s64 (x1, SV_POW2, 1), ++ x0 = svqdech_pat (x1, SV_POW2, 1)) ++ ++/* ++** qdech_pat_n_2_s64: ++** sqdech x0, pow2, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_2_s64, int64_t, ++ x0 = svqdech_pat_n_s64 (x0, SV_POW2, 2), ++ x0 = svqdech_pat (x0, SV_POW2, 2)) ++ ++/* ++** qdech_pat_n_7_s64: ++** sqdech x0, pow2, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_7_s64, int64_t, ++ x0 = svqdech_pat_n_s64 (x0, SV_POW2, 7), ++ x0 = svqdech_pat (x0, SV_POW2, 7)) ++ ++/* ++** qdech_pat_n_15_s64: ++** sqdech x0, pow2, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_15_s64, int64_t, ++ x0 = svqdech_pat_n_s64 (x0, SV_POW2, 15), ++ x0 = svqdech_pat (x0, SV_POW2, 15)) ++ ++/* ++** qdech_pat_n_16_s64: ++** sqdech x0, pow2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_16_s64, int64_t, ++ x0 = svqdech_pat_n_s64 (x0, SV_POW2, 16), ++ x0 = svqdech_pat (x0, SV_POW2, 16)) ++ ++/* ++** qdech_pat_n_vl1_s64: ++** sqdech x0, vl1, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_vl1_s64, int64_t, ++ x0 = svqdech_pat_n_s64 (x0, SV_VL1, 16), ++ x0 = svqdech_pat (x0, SV_VL1, 16)) ++ ++/* ++** qdech_pat_n_vl2_s64: ++** sqdech x0, vl2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_vl2_s64, int64_t, ++ x0 = svqdech_pat_n_s64 (x0, SV_VL2, 16), ++ x0 = svqdech_pat (x0, SV_VL2, 16)) ++ ++/* ++** qdech_pat_n_vl3_s64: ++** sqdech x0, vl3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_vl3_s64, int64_t, ++ x0 = svqdech_pat_n_s64 (x0, SV_VL3, 16), ++ x0 = svqdech_pat (x0, SV_VL3, 16)) ++ ++/* ++** qdech_pat_n_vl4_s64: ++** sqdech x0, vl4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_vl4_s64, int64_t, ++ x0 = svqdech_pat_n_s64 (x0, SV_VL4, 16), ++ x0 = svqdech_pat (x0, SV_VL4, 16)) ++ ++/* ++** qdech_pat_n_vl5_s64: ++** sqdech x0, vl5, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_vl5_s64, int64_t, ++ x0 = svqdech_pat_n_s64 (x0, SV_VL5, 16), ++ x0 = svqdech_pat (x0, SV_VL5, 16)) ++ ++/* ++** qdech_pat_n_vl6_s64: ++** sqdech x0, vl6, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_vl6_s64, int64_t, ++ x0 = svqdech_pat_n_s64 (x0, SV_VL6, 16), ++ x0 = svqdech_pat (x0, SV_VL6, 16)) ++ ++/* ++** qdech_pat_n_vl7_s64: ++** sqdech x0, vl7, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_vl7_s64, int64_t, ++ x0 = svqdech_pat_n_s64 (x0, SV_VL7, 16), ++ x0 = svqdech_pat (x0, SV_VL7, 16)) ++ ++/* ++** qdech_pat_n_vl8_s64: ++** sqdech x0, vl8, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_vl8_s64, int64_t, ++ x0 = svqdech_pat_n_s64 (x0, SV_VL8, 16), ++ x0 = svqdech_pat (x0, SV_VL8, 16)) ++ ++/* ++** qdech_pat_n_vl16_s64: ++** sqdech x0, vl16, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_vl16_s64, int64_t, ++ x0 = svqdech_pat_n_s64 (x0, SV_VL16, 16), ++ x0 = svqdech_pat (x0, SV_VL16, 16)) ++ ++/* ++** qdech_pat_n_vl32_s64: ++** sqdech x0, vl32, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_vl32_s64, int64_t, ++ x0 = svqdech_pat_n_s64 (x0, SV_VL32, 16), ++ x0 = svqdech_pat (x0, SV_VL32, 16)) ++ ++/* ++** qdech_pat_n_vl64_s64: ++** sqdech x0, vl64, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_vl64_s64, int64_t, ++ x0 = svqdech_pat_n_s64 (x0, SV_VL64, 16), ++ x0 = svqdech_pat (x0, SV_VL64, 16)) ++ ++/* ++** qdech_pat_n_vl128_s64: ++** sqdech x0, vl128, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_vl128_s64, int64_t, ++ x0 = svqdech_pat_n_s64 (x0, SV_VL128, 16), ++ x0 = svqdech_pat (x0, SV_VL128, 16)) ++ ++/* ++** qdech_pat_n_vl256_s64: ++** sqdech x0, vl256, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_vl256_s64, int64_t, ++ x0 = svqdech_pat_n_s64 (x0, SV_VL256, 16), ++ x0 = svqdech_pat (x0, SV_VL256, 16)) ++ ++/* ++** qdech_pat_n_mul4_s64: ++** sqdech x0, mul4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_mul4_s64, int64_t, ++ x0 = svqdech_pat_n_s64 (x0, SV_MUL4, 16), ++ x0 = svqdech_pat (x0, SV_MUL4, 16)) ++ ++/* ++** qdech_pat_n_mul3_s64: ++** sqdech x0, mul3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_mul3_s64, int64_t, ++ x0 = svqdech_pat_n_s64 (x0, SV_MUL3, 16), ++ x0 = svqdech_pat (x0, SV_MUL3, 16)) ++ ++/* ++** qdech_pat_n_all_s64: ++** sqdech x0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_all_s64, int64_t, ++ x0 = svqdech_pat_n_s64 (x0, SV_ALL, 16), ++ x0 = svqdech_pat (x0, SV_ALL, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_pat_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_pat_u16.c +new file mode 100644 +index 000000000..ce0b5f3e8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_pat_u16.c +@@ -0,0 +1,202 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qdech_pat_1_u16_tied: ++** uqdech z0\.h, pow2 ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_pat_1_u16_tied, svuint16_t, ++ z0 = svqdech_pat_u16 (z0, SV_POW2, 1), ++ z0 = svqdech_pat (z0, SV_POW2, 1)) ++ ++/* ++** qdech_pat_1_u16_untied: ++** movprfx z0, z1 ++** uqdech z0\.h, pow2 ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_pat_1_u16_untied, svuint16_t, ++ z0 = svqdech_pat_u16 (z1, SV_POW2, 1), ++ z0 = svqdech_pat (z1, SV_POW2, 1)) ++ ++/* ++** qdech_pat_2_u16: ++** uqdech z0\.h, pow2, mul #2 ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_pat_2_u16, svuint16_t, ++ z0 = svqdech_pat_u16 (z0, SV_POW2, 2), ++ z0 = svqdech_pat (z0, SV_POW2, 2)) ++ ++/* ++** qdech_pat_7_u16: ++** uqdech z0\.h, pow2, mul #7 ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_pat_7_u16, svuint16_t, ++ z0 = svqdech_pat_u16 (z0, SV_POW2, 7), ++ z0 = svqdech_pat (z0, SV_POW2, 7)) ++ ++/* ++** qdech_pat_15_u16: ++** uqdech z0\.h, pow2, mul #15 ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_pat_15_u16, svuint16_t, ++ z0 = svqdech_pat_u16 (z0, SV_POW2, 15), ++ z0 = svqdech_pat (z0, SV_POW2, 15)) ++ ++/* ++** qdech_pat_16_u16: ++** uqdech z0\.h, pow2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_pat_16_u16, svuint16_t, ++ z0 = svqdech_pat_u16 (z0, SV_POW2, 16), ++ z0 = svqdech_pat (z0, SV_POW2, 16)) ++ ++/* ++** qdech_pat_vl1_u16: ++** uqdech z0\.h, vl1, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_pat_vl1_u16, svuint16_t, ++ z0 = svqdech_pat_u16 (z0, SV_VL1, 16), ++ z0 = svqdech_pat (z0, SV_VL1, 16)) ++ ++/* ++** qdech_pat_vl2_u16: ++** uqdech z0\.h, vl2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_pat_vl2_u16, svuint16_t, ++ z0 = svqdech_pat_u16 (z0, SV_VL2, 16), ++ z0 = svqdech_pat (z0, SV_VL2, 16)) ++ ++/* ++** qdech_pat_vl3_u16: ++** uqdech z0\.h, vl3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_pat_vl3_u16, svuint16_t, ++ z0 = svqdech_pat_u16 (z0, SV_VL3, 16), ++ z0 = svqdech_pat (z0, SV_VL3, 16)) ++ ++/* ++** qdech_pat_vl4_u16: ++** uqdech z0\.h, vl4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_pat_vl4_u16, svuint16_t, ++ z0 = svqdech_pat_u16 (z0, SV_VL4, 16), ++ z0 = svqdech_pat (z0, SV_VL4, 16)) ++ ++/* ++** qdech_pat_vl5_u16: ++** uqdech z0\.h, vl5, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_pat_vl5_u16, svuint16_t, ++ z0 = svqdech_pat_u16 (z0, SV_VL5, 16), ++ z0 = svqdech_pat (z0, SV_VL5, 16)) ++ ++/* ++** qdech_pat_vl6_u16: ++** uqdech z0\.h, vl6, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_pat_vl6_u16, svuint16_t, ++ z0 = svqdech_pat_u16 (z0, SV_VL6, 16), ++ z0 = svqdech_pat (z0, SV_VL6, 16)) ++ ++/* ++** qdech_pat_vl7_u16: ++** uqdech z0\.h, vl7, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_pat_vl7_u16, svuint16_t, ++ z0 = svqdech_pat_u16 (z0, SV_VL7, 16), ++ z0 = svqdech_pat (z0, SV_VL7, 16)) ++ ++/* ++** qdech_pat_vl8_u16: ++** uqdech z0\.h, vl8, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_pat_vl8_u16, svuint16_t, ++ z0 = svqdech_pat_u16 (z0, SV_VL8, 16), ++ z0 = svqdech_pat (z0, SV_VL8, 16)) ++ ++/* ++** qdech_pat_vl16_u16: ++** uqdech z0\.h, vl16, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_pat_vl16_u16, svuint16_t, ++ z0 = svqdech_pat_u16 (z0, SV_VL16, 16), ++ z0 = svqdech_pat (z0, SV_VL16, 16)) ++ ++/* ++** qdech_pat_vl32_u16: ++** uqdech z0\.h, vl32, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_pat_vl32_u16, svuint16_t, ++ z0 = svqdech_pat_u16 (z0, SV_VL32, 16), ++ z0 = svqdech_pat (z0, SV_VL32, 16)) ++ ++/* ++** qdech_pat_vl64_u16: ++** uqdech z0\.h, vl64, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_pat_vl64_u16, svuint16_t, ++ z0 = svqdech_pat_u16 (z0, SV_VL64, 16), ++ z0 = svqdech_pat (z0, SV_VL64, 16)) ++ ++/* ++** qdech_pat_vl128_u16: ++** uqdech z0\.h, vl128, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_pat_vl128_u16, svuint16_t, ++ z0 = svqdech_pat_u16 (z0, SV_VL128, 16), ++ z0 = svqdech_pat (z0, SV_VL128, 16)) ++ ++/* ++** qdech_pat_vl256_u16: ++** uqdech z0\.h, vl256, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_pat_vl256_u16, svuint16_t, ++ z0 = svqdech_pat_u16 (z0, SV_VL256, 16), ++ z0 = svqdech_pat (z0, SV_VL256, 16)) ++ ++/* ++** qdech_pat_mul4_u16: ++** uqdech z0\.h, mul4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_pat_mul4_u16, svuint16_t, ++ z0 = svqdech_pat_u16 (z0, SV_MUL4, 16), ++ z0 = svqdech_pat (z0, SV_MUL4, 16)) ++ ++/* ++** qdech_pat_mul3_u16: ++** uqdech z0\.h, mul3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_pat_mul3_u16, svuint16_t, ++ z0 = svqdech_pat_u16 (z0, SV_MUL3, 16), ++ z0 = svqdech_pat (z0, SV_MUL3, 16)) ++ ++/* ++** qdech_pat_all_u16: ++** uqdech z0\.h, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_pat_all_u16, svuint16_t, ++ z0 = svqdech_pat_u16 (z0, SV_ALL, 16), ++ z0 = svqdech_pat (z0, SV_ALL, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_pat_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_pat_u32.c +new file mode 100644 +index 000000000..177f32ec7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_pat_u32.c +@@ -0,0 +1,202 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qdech_pat_n_1_u32_tied: ++** uqdech w0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_1_u32_tied, uint32_t, ++ x0 = svqdech_pat_n_u32 (x0, SV_POW2, 1), ++ x0 = svqdech_pat (x0, SV_POW2, 1)) ++ ++/* ++** qdech_pat_n_1_u32_untied: ++** mov w0, w1 ++** uqdech w0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_1_u32_untied, uint32_t, ++ x0 = svqdech_pat_n_u32 (x1, SV_POW2, 1), ++ x0 = svqdech_pat (x1, SV_POW2, 1)) ++ ++/* ++** qdech_pat_n_2_u32: ++** uqdech w0, pow2, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_2_u32, uint32_t, ++ x0 = svqdech_pat_n_u32 (x0, SV_POW2, 2), ++ x0 = svqdech_pat (x0, SV_POW2, 2)) ++ ++/* ++** qdech_pat_n_7_u32: ++** uqdech w0, pow2, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_7_u32, uint32_t, ++ x0 = svqdech_pat_n_u32 (x0, SV_POW2, 7), ++ x0 = svqdech_pat (x0, SV_POW2, 7)) ++ ++/* ++** qdech_pat_n_15_u32: ++** uqdech w0, pow2, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_15_u32, uint32_t, ++ x0 = svqdech_pat_n_u32 (x0, SV_POW2, 15), ++ x0 = svqdech_pat (x0, SV_POW2, 15)) ++ ++/* ++** qdech_pat_n_16_u32: ++** uqdech w0, pow2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_16_u32, uint32_t, ++ x0 = svqdech_pat_n_u32 (x0, SV_POW2, 16), ++ x0 = svqdech_pat (x0, SV_POW2, 16)) ++ ++/* ++** qdech_pat_n_vl1_u32: ++** uqdech w0, vl1, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_vl1_u32, uint32_t, ++ x0 = svqdech_pat_n_u32 (x0, SV_VL1, 16), ++ x0 = svqdech_pat (x0, SV_VL1, 16)) ++ ++/* ++** qdech_pat_n_vl2_u32: ++** uqdech w0, vl2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_vl2_u32, uint32_t, ++ x0 = svqdech_pat_n_u32 (x0, SV_VL2, 16), ++ x0 = svqdech_pat (x0, SV_VL2, 16)) ++ ++/* ++** qdech_pat_n_vl3_u32: ++** uqdech w0, vl3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_vl3_u32, uint32_t, ++ x0 = svqdech_pat_n_u32 (x0, SV_VL3, 16), ++ x0 = svqdech_pat (x0, SV_VL3, 16)) ++ ++/* ++** qdech_pat_n_vl4_u32: ++** uqdech w0, vl4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_vl4_u32, uint32_t, ++ x0 = svqdech_pat_n_u32 (x0, SV_VL4, 16), ++ x0 = svqdech_pat (x0, SV_VL4, 16)) ++ ++/* ++** qdech_pat_n_vl5_u32: ++** uqdech w0, vl5, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_vl5_u32, uint32_t, ++ x0 = svqdech_pat_n_u32 (x0, SV_VL5, 16), ++ x0 = svqdech_pat (x0, SV_VL5, 16)) ++ ++/* ++** qdech_pat_n_vl6_u32: ++** uqdech w0, vl6, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_vl6_u32, uint32_t, ++ x0 = svqdech_pat_n_u32 (x0, SV_VL6, 16), ++ x0 = svqdech_pat (x0, SV_VL6, 16)) ++ ++/* ++** qdech_pat_n_vl7_u32: ++** uqdech w0, vl7, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_vl7_u32, uint32_t, ++ x0 = svqdech_pat_n_u32 (x0, SV_VL7, 16), ++ x0 = svqdech_pat (x0, SV_VL7, 16)) ++ ++/* ++** qdech_pat_n_vl8_u32: ++** uqdech w0, vl8, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_vl8_u32, uint32_t, ++ x0 = svqdech_pat_n_u32 (x0, SV_VL8, 16), ++ x0 = svqdech_pat (x0, SV_VL8, 16)) ++ ++/* ++** qdech_pat_n_vl16_u32: ++** uqdech w0, vl16, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_vl16_u32, uint32_t, ++ x0 = svqdech_pat_n_u32 (x0, SV_VL16, 16), ++ x0 = svqdech_pat (x0, SV_VL16, 16)) ++ ++/* ++** qdech_pat_n_vl32_u32: ++** uqdech w0, vl32, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_vl32_u32, uint32_t, ++ x0 = svqdech_pat_n_u32 (x0, SV_VL32, 16), ++ x0 = svqdech_pat (x0, SV_VL32, 16)) ++ ++/* ++** qdech_pat_n_vl64_u32: ++** uqdech w0, vl64, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_vl64_u32, uint32_t, ++ x0 = svqdech_pat_n_u32 (x0, SV_VL64, 16), ++ x0 = svqdech_pat (x0, SV_VL64, 16)) ++ ++/* ++** qdech_pat_n_vl128_u32: ++** uqdech w0, vl128, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_vl128_u32, uint32_t, ++ x0 = svqdech_pat_n_u32 (x0, SV_VL128, 16), ++ x0 = svqdech_pat (x0, SV_VL128, 16)) ++ ++/* ++** qdech_pat_n_vl256_u32: ++** uqdech w0, vl256, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_vl256_u32, uint32_t, ++ x0 = svqdech_pat_n_u32 (x0, SV_VL256, 16), ++ x0 = svqdech_pat (x0, SV_VL256, 16)) ++ ++/* ++** qdech_pat_n_mul4_u32: ++** uqdech w0, mul4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_mul4_u32, uint32_t, ++ x0 = svqdech_pat_n_u32 (x0, SV_MUL4, 16), ++ x0 = svqdech_pat (x0, SV_MUL4, 16)) ++ ++/* ++** qdech_pat_n_mul3_u32: ++** uqdech w0, mul3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_mul3_u32, uint32_t, ++ x0 = svqdech_pat_n_u32 (x0, SV_MUL3, 16), ++ x0 = svqdech_pat (x0, SV_MUL3, 16)) ++ ++/* ++** qdech_pat_n_all_u32: ++** uqdech w0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_all_u32, uint32_t, ++ x0 = svqdech_pat_n_u32 (x0, SV_ALL, 16), ++ x0 = svqdech_pat (x0, SV_ALL, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_pat_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_pat_u64.c +new file mode 100644 +index 000000000..7092127f2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_pat_u64.c +@@ -0,0 +1,202 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qdech_pat_n_1_u64_tied: ++** uqdech x0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_1_u64_tied, uint64_t, ++ x0 = svqdech_pat_n_u64 (x0, SV_POW2, 1), ++ x0 = svqdech_pat (x0, SV_POW2, 1)) ++ ++/* ++** qdech_pat_n_1_u64_untied: ++** mov x0, x1 ++** uqdech x0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_1_u64_untied, uint64_t, ++ x0 = svqdech_pat_n_u64 (x1, SV_POW2, 1), ++ x0 = svqdech_pat (x1, SV_POW2, 1)) ++ ++/* ++** qdech_pat_n_2_u64: ++** uqdech x0, pow2, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_2_u64, uint64_t, ++ x0 = svqdech_pat_n_u64 (x0, SV_POW2, 2), ++ x0 = svqdech_pat (x0, SV_POW2, 2)) ++ ++/* ++** qdech_pat_n_7_u64: ++** uqdech x0, pow2, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_7_u64, uint64_t, ++ x0 = svqdech_pat_n_u64 (x0, SV_POW2, 7), ++ x0 = svqdech_pat (x0, SV_POW2, 7)) ++ ++/* ++** qdech_pat_n_15_u64: ++** uqdech x0, pow2, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_15_u64, uint64_t, ++ x0 = svqdech_pat_n_u64 (x0, SV_POW2, 15), ++ x0 = svqdech_pat (x0, SV_POW2, 15)) ++ ++/* ++** qdech_pat_n_16_u64: ++** uqdech x0, pow2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_16_u64, uint64_t, ++ x0 = svqdech_pat_n_u64 (x0, SV_POW2, 16), ++ x0 = svqdech_pat (x0, SV_POW2, 16)) ++ ++/* ++** qdech_pat_n_vl1_u64: ++** uqdech x0, vl1, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_vl1_u64, uint64_t, ++ x0 = svqdech_pat_n_u64 (x0, SV_VL1, 16), ++ x0 = svqdech_pat (x0, SV_VL1, 16)) ++ ++/* ++** qdech_pat_n_vl2_u64: ++** uqdech x0, vl2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_vl2_u64, uint64_t, ++ x0 = svqdech_pat_n_u64 (x0, SV_VL2, 16), ++ x0 = svqdech_pat (x0, SV_VL2, 16)) ++ ++/* ++** qdech_pat_n_vl3_u64: ++** uqdech x0, vl3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_vl3_u64, uint64_t, ++ x0 = svqdech_pat_n_u64 (x0, SV_VL3, 16), ++ x0 = svqdech_pat (x0, SV_VL3, 16)) ++ ++/* ++** qdech_pat_n_vl4_u64: ++** uqdech x0, vl4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_vl4_u64, uint64_t, ++ x0 = svqdech_pat_n_u64 (x0, SV_VL4, 16), ++ x0 = svqdech_pat (x0, SV_VL4, 16)) ++ ++/* ++** qdech_pat_n_vl5_u64: ++** uqdech x0, vl5, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_vl5_u64, uint64_t, ++ x0 = svqdech_pat_n_u64 (x0, SV_VL5, 16), ++ x0 = svqdech_pat (x0, SV_VL5, 16)) ++ ++/* ++** qdech_pat_n_vl6_u64: ++** uqdech x0, vl6, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_vl6_u64, uint64_t, ++ x0 = svqdech_pat_n_u64 (x0, SV_VL6, 16), ++ x0 = svqdech_pat (x0, SV_VL6, 16)) ++ ++/* ++** qdech_pat_n_vl7_u64: ++** uqdech x0, vl7, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_vl7_u64, uint64_t, ++ x0 = svqdech_pat_n_u64 (x0, SV_VL7, 16), ++ x0 = svqdech_pat (x0, SV_VL7, 16)) ++ ++/* ++** qdech_pat_n_vl8_u64: ++** uqdech x0, vl8, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_vl8_u64, uint64_t, ++ x0 = svqdech_pat_n_u64 (x0, SV_VL8, 16), ++ x0 = svqdech_pat (x0, SV_VL8, 16)) ++ ++/* ++** qdech_pat_n_vl16_u64: ++** uqdech x0, vl16, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_vl16_u64, uint64_t, ++ x0 = svqdech_pat_n_u64 (x0, SV_VL16, 16), ++ x0 = svqdech_pat (x0, SV_VL16, 16)) ++ ++/* ++** qdech_pat_n_vl32_u64: ++** uqdech x0, vl32, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_vl32_u64, uint64_t, ++ x0 = svqdech_pat_n_u64 (x0, SV_VL32, 16), ++ x0 = svqdech_pat (x0, SV_VL32, 16)) ++ ++/* ++** qdech_pat_n_vl64_u64: ++** uqdech x0, vl64, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_vl64_u64, uint64_t, ++ x0 = svqdech_pat_n_u64 (x0, SV_VL64, 16), ++ x0 = svqdech_pat (x0, SV_VL64, 16)) ++ ++/* ++** qdech_pat_n_vl128_u64: ++** uqdech x0, vl128, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_vl128_u64, uint64_t, ++ x0 = svqdech_pat_n_u64 (x0, SV_VL128, 16), ++ x0 = svqdech_pat (x0, SV_VL128, 16)) ++ ++/* ++** qdech_pat_n_vl256_u64: ++** uqdech x0, vl256, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_vl256_u64, uint64_t, ++ x0 = svqdech_pat_n_u64 (x0, SV_VL256, 16), ++ x0 = svqdech_pat (x0, SV_VL256, 16)) ++ ++/* ++** qdech_pat_n_mul4_u64: ++** uqdech x0, mul4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_mul4_u64, uint64_t, ++ x0 = svqdech_pat_n_u64 (x0, SV_MUL4, 16), ++ x0 = svqdech_pat (x0, SV_MUL4, 16)) ++ ++/* ++** qdech_pat_n_mul3_u64: ++** uqdech x0, mul3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_mul3_u64, uint64_t, ++ x0 = svqdech_pat_n_u64 (x0, SV_MUL3, 16), ++ x0 = svqdech_pat (x0, SV_MUL3, 16)) ++ ++/* ++** qdech_pat_n_all_u64: ++** uqdech x0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_pat_n_all_u64, uint64_t, ++ x0 = svqdech_pat_n_u64 (x0, SV_ALL, 16), ++ x0 = svqdech_pat (x0, SV_ALL, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_s16.c +new file mode 100644 +index 000000000..2a7a8f7a6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_s16.c +@@ -0,0 +1,58 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qdech_1_s16_tied: ++** sqdech z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_1_s16_tied, svint16_t, ++ z0 = svqdech_s16 (z0, 1), ++ z0 = svqdech (z0, 1)) ++ ++/* ++** qdech_1_s16_untied: ++** movprfx z0, z1 ++** sqdech z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_1_s16_untied, svint16_t, ++ z0 = svqdech_s16 (z1, 1), ++ z0 = svqdech (z1, 1)) ++ ++/* ++** qdech_2_s16: ++** sqdech z0\.h, all, mul #2 ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_2_s16, svint16_t, ++ z0 = svqdech_s16 (z0, 2), ++ z0 = svqdech (z0, 2)) ++ ++/* ++** qdech_7_s16: ++** sqdech z0\.h, all, mul #7 ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_7_s16, svint16_t, ++ z0 = svqdech_s16 (z0, 7), ++ z0 = svqdech (z0, 7)) ++ ++/* ++** qdech_15_s16: ++** sqdech z0\.h, all, mul #15 ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_15_s16, svint16_t, ++ z0 = svqdech_s16 (z0, 15), ++ z0 = svqdech (z0, 15)) ++ ++/* ++** qdech_16_s16: ++** sqdech z0\.h, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_16_s16, svint16_t, ++ z0 = svqdech_s16 (z0, 16), ++ z0 = svqdech (z0, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_s32.c +new file mode 100644 +index 000000000..7fd57d85a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_s32.c +@@ -0,0 +1,58 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qdech_n_1_s32_tied: ++** sqdech x0, w0 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_n_1_s32_tied, int32_t, ++ x0 = svqdech_n_s32 (x0, 1), ++ x0 = svqdech (x0, 1)) ++ ++/* ++** qdech_n_1_s32_untied: ++** mov w0, w1 ++** sqdech x0, w0 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_n_1_s32_untied, int32_t, ++ x0 = svqdech_n_s32 (x1, 1), ++ x0 = svqdech (x1, 1)) ++ ++/* ++** qdech_n_2_s32: ++** sqdech x0, w0, all, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_n_2_s32, int32_t, ++ x0 = svqdech_n_s32 (x0, 2), ++ x0 = svqdech (x0, 2)) ++ ++/* ++** qdech_n_7_s32: ++** sqdech x0, w0, all, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_n_7_s32, int32_t, ++ x0 = svqdech_n_s32 (x0, 7), ++ x0 = svqdech (x0, 7)) ++ ++/* ++** qdech_n_15_s32: ++** sqdech x0, w0, all, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_n_15_s32, int32_t, ++ x0 = svqdech_n_s32 (x0, 15), ++ x0 = svqdech (x0, 15)) ++ ++/* ++** qdech_n_16_s32: ++** sqdech x0, w0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_n_16_s32, int32_t, ++ x0 = svqdech_n_s32 (x0, 16), ++ x0 = svqdech (x0, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_s64.c +new file mode 100644 +index 000000000..61989f8d6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_s64.c +@@ -0,0 +1,58 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qdech_n_1_s64_tied: ++** sqdech x0 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_n_1_s64_tied, int64_t, ++ x0 = svqdech_n_s64 (x0, 1), ++ x0 = svqdech (x0, 1)) ++ ++/* ++** qdech_n_1_s64_untied: ++** mov x0, x1 ++** sqdech x0 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_n_1_s64_untied, int64_t, ++ x0 = svqdech_n_s64 (x1, 1), ++ x0 = svqdech (x1, 1)) ++ ++/* ++** qdech_n_2_s64: ++** sqdech x0, all, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_n_2_s64, int64_t, ++ x0 = svqdech_n_s64 (x0, 2), ++ x0 = svqdech (x0, 2)) ++ ++/* ++** qdech_n_7_s64: ++** sqdech x0, all, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_n_7_s64, int64_t, ++ x0 = svqdech_n_s64 (x0, 7), ++ x0 = svqdech (x0, 7)) ++ ++/* ++** qdech_n_15_s64: ++** sqdech x0, all, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_n_15_s64, int64_t, ++ x0 = svqdech_n_s64 (x0, 15), ++ x0 = svqdech (x0, 15)) ++ ++/* ++** qdech_n_16_s64: ++** sqdech x0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_n_16_s64, int64_t, ++ x0 = svqdech_n_s64 (x0, 16), ++ x0 = svqdech (x0, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_u16.c +new file mode 100644 +index 000000000..0d6587851 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_u16.c +@@ -0,0 +1,58 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qdech_1_u16_tied: ++** uqdech z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_1_u16_tied, svuint16_t, ++ z0 = svqdech_u16 (z0, 1), ++ z0 = svqdech (z0, 1)) ++ ++/* ++** qdech_1_u16_untied: ++** movprfx z0, z1 ++** uqdech z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_1_u16_untied, svuint16_t, ++ z0 = svqdech_u16 (z1, 1), ++ z0 = svqdech (z1, 1)) ++ ++/* ++** qdech_2_u16: ++** uqdech z0\.h, all, mul #2 ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_2_u16, svuint16_t, ++ z0 = svqdech_u16 (z0, 2), ++ z0 = svqdech (z0, 2)) ++ ++/* ++** qdech_7_u16: ++** uqdech z0\.h, all, mul #7 ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_7_u16, svuint16_t, ++ z0 = svqdech_u16 (z0, 7), ++ z0 = svqdech (z0, 7)) ++ ++/* ++** qdech_15_u16: ++** uqdech z0\.h, all, mul #15 ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_15_u16, svuint16_t, ++ z0 = svqdech_u16 (z0, 15), ++ z0 = svqdech (z0, 15)) ++ ++/* ++** qdech_16_u16: ++** uqdech z0\.h, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdech_16_u16, svuint16_t, ++ z0 = svqdech_u16 (z0, 16), ++ z0 = svqdech (z0, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_u32.c +new file mode 100644 +index 000000000..179d67953 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_u32.c +@@ -0,0 +1,58 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qdech_n_1_u32_tied: ++** uqdech w0 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_n_1_u32_tied, uint32_t, ++ x0 = svqdech_n_u32 (x0, 1), ++ x0 = svqdech (x0, 1)) ++ ++/* ++** qdech_n_1_u32_untied: ++** mov w0, w1 ++** uqdech w0 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_n_1_u32_untied, uint32_t, ++ x0 = svqdech_n_u32 (x1, 1), ++ x0 = svqdech (x1, 1)) ++ ++/* ++** qdech_n_2_u32: ++** uqdech w0, all, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_n_2_u32, uint32_t, ++ x0 = svqdech_n_u32 (x0, 2), ++ x0 = svqdech (x0, 2)) ++ ++/* ++** qdech_n_7_u32: ++** uqdech w0, all, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_n_7_u32, uint32_t, ++ x0 = svqdech_n_u32 (x0, 7), ++ x0 = svqdech (x0, 7)) ++ ++/* ++** qdech_n_15_u32: ++** uqdech w0, all, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_n_15_u32, uint32_t, ++ x0 = svqdech_n_u32 (x0, 15), ++ x0 = svqdech (x0, 15)) ++ ++/* ++** qdech_n_16_u32: ++** uqdech w0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_n_16_u32, uint32_t, ++ x0 = svqdech_n_u32 (x0, 16), ++ x0 = svqdech (x0, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_u64.c +new file mode 100644 +index 000000000..da2f051af +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_u64.c +@@ -0,0 +1,58 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qdech_n_1_u64_tied: ++** uqdech x0 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_n_1_u64_tied, uint64_t, ++ x0 = svqdech_n_u64 (x0, 1), ++ x0 = svqdech (x0, 1)) ++ ++/* ++** qdech_n_1_u64_untied: ++** mov x0, x1 ++** uqdech x0 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_n_1_u64_untied, uint64_t, ++ x0 = svqdech_n_u64 (x1, 1), ++ x0 = svqdech (x1, 1)) ++ ++/* ++** qdech_n_2_u64: ++** uqdech x0, all, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_n_2_u64, uint64_t, ++ x0 = svqdech_n_u64 (x0, 2), ++ x0 = svqdech (x0, 2)) ++ ++/* ++** qdech_n_7_u64: ++** uqdech x0, all, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_n_7_u64, uint64_t, ++ x0 = svqdech_n_u64 (x0, 7), ++ x0 = svqdech (x0, 7)) ++ ++/* ++** qdech_n_15_u64: ++** uqdech x0, all, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_n_15_u64, uint64_t, ++ x0 = svqdech_n_u64 (x0, 15), ++ x0 = svqdech (x0, 15)) ++ ++/* ++** qdech_n_16_u64: ++** uqdech x0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdech_n_16_u64, uint64_t, ++ x0 = svqdech_n_u64 (x0, 16), ++ x0 = svqdech (x0, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecp_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecp_s16.c +new file mode 100644 +index 000000000..71b40c152 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecp_s16.c +@@ -0,0 +1,22 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qdecp_s16_tied: ++** sqdecp z0\.h, p0 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecp_s16_tied, svint16_t, ++ z0 = svqdecp_s16 (z0, p0), ++ z0 = svqdecp (z0, p0)) ++ ++/* ++** qdecp_s16_untied: ++** movprfx z0, z1 ++** sqdecp z0\.h, p0 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecp_s16_untied, svint16_t, ++ z0 = svqdecp_s16 (z1, p0), ++ z0 = svqdecp (z1, p0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecp_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecp_s32.c +new file mode 100644 +index 000000000..55e4067d1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecp_s32.c +@@ -0,0 +1,98 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qdecp_s32_tied: ++** sqdecp z0\.s, p0 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecp_s32_tied, svint32_t, ++ z0 = svqdecp_s32 (z0, p0), ++ z0 = svqdecp (z0, p0)) ++ ++/* ++** qdecp_s32_untied: ++** movprfx z0, z1 ++** sqdecp z0\.s, p0 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecp_s32_untied, svint32_t, ++ z0 = svqdecp_s32 (z1, p0), ++ z0 = svqdecp (z1, p0)) ++ ++/* ++** qdecp_n_s32_b8_tied: ++** sqdecp x0, p0\.b, w0 ++** ret ++*/ ++TEST_UNIFORM_S (qdecp_n_s32_b8_tied, int32_t, ++ x0 = svqdecp_n_s32_b8 (x0, p0), ++ x0 = svqdecp_b8 (x0, p0)) ++ ++/* ++** qdecp_n_s32_b8_untied: ++** mov w0, w1 ++** sqdecp x0, p0\.b, w0 ++** ret ++*/ ++TEST_UNIFORM_S (qdecp_n_s32_b8_untied, int32_t, ++ x0 = svqdecp_n_s32_b8 (x1, p0), ++ x0 = svqdecp_b8 (x1, p0)) ++ ++/* ++** qdecp_n_s32_b16_tied: ++** sqdecp x0, p0\.h, w0 ++** ret ++*/ ++TEST_UNIFORM_S (qdecp_n_s32_b16_tied, int32_t, ++ x0 = svqdecp_n_s32_b16 (x0, p0), ++ x0 = svqdecp_b16 (x0, p0)) ++ ++/* ++** qdecp_n_s32_b16_untied: ++** mov w0, w1 ++** sqdecp x0, p0\.h, w0 ++** ret ++*/ ++TEST_UNIFORM_S (qdecp_n_s32_b16_untied, int32_t, ++ x0 = svqdecp_n_s32_b16 (x1, p0), ++ x0 = svqdecp_b16 (x1, p0)) ++ ++/* ++** qdecp_n_s32_b32_tied: ++** sqdecp x0, p0\.s, w0 ++** ret ++*/ ++TEST_UNIFORM_S (qdecp_n_s32_b32_tied, int32_t, ++ x0 = svqdecp_n_s32_b32 (x0, p0), ++ x0 = svqdecp_b32 (x0, p0)) ++ ++/* ++** qdecp_n_s32_b32_untied: ++** mov w0, w1 ++** sqdecp x0, p0\.s, w0 ++** ret ++*/ ++TEST_UNIFORM_S (qdecp_n_s32_b32_untied, int32_t, ++ x0 = svqdecp_n_s32_b32 (x1, p0), ++ x0 = svqdecp_b32 (x1, p0)) ++ ++/* ++** qdecp_n_s32_b64_tied: ++** sqdecp x0, p0\.d, w0 ++** ret ++*/ ++TEST_UNIFORM_S (qdecp_n_s32_b64_tied, int32_t, ++ x0 = svqdecp_n_s32_b64 (x0, p0), ++ x0 = svqdecp_b64 (x0, p0)) ++ ++/* ++** qdecp_n_s32_b64_untied: ++** mov w0, w1 ++** sqdecp x0, p0\.d, w0 ++** ret ++*/ ++TEST_UNIFORM_S (qdecp_n_s32_b64_untied, int32_t, ++ x0 = svqdecp_n_s32_b64 (x1, p0), ++ x0 = svqdecp_b64 (x1, p0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecp_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecp_s64.c +new file mode 100644 +index 000000000..9527999c8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecp_s64.c +@@ -0,0 +1,98 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qdecp_s64_tied: ++** sqdecp z0\.d, p0 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecp_s64_tied, svint64_t, ++ z0 = svqdecp_s64 (z0, p0), ++ z0 = svqdecp (z0, p0)) ++ ++/* ++** qdecp_s64_untied: ++** movprfx z0, z1 ++** sqdecp z0\.d, p0 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecp_s64_untied, svint64_t, ++ z0 = svqdecp_s64 (z1, p0), ++ z0 = svqdecp (z1, p0)) ++ ++/* ++** qdecp_n_s64_b8_tied: ++** sqdecp x0, p0\.b ++** ret ++*/ ++TEST_UNIFORM_S (qdecp_n_s64_b8_tied, int64_t, ++ x0 = svqdecp_n_s64_b8 (x0, p0), ++ x0 = svqdecp_b8 (x0, p0)) ++ ++/* ++** qdecp_n_s64_b8_untied: ++** mov x0, x1 ++** sqdecp x0, p0\.b ++** ret ++*/ ++TEST_UNIFORM_S (qdecp_n_s64_b8_untied, int64_t, ++ x0 = svqdecp_n_s64_b8 (x1, p0), ++ x0 = svqdecp_b8 (x1, p0)) ++ ++/* ++** qdecp_n_s64_b16_tied: ++** sqdecp x0, p0\.h ++** ret ++*/ ++TEST_UNIFORM_S (qdecp_n_s64_b16_tied, int64_t, ++ x0 = svqdecp_n_s64_b16 (x0, p0), ++ x0 = svqdecp_b16 (x0, p0)) ++ ++/* ++** qdecp_n_s64_b16_untied: ++** mov x0, x1 ++** sqdecp x0, p0\.h ++** ret ++*/ ++TEST_UNIFORM_S (qdecp_n_s64_b16_untied, int64_t, ++ x0 = svqdecp_n_s64_b16 (x1, p0), ++ x0 = svqdecp_b16 (x1, p0)) ++ ++/* ++** qdecp_n_s64_b32_tied: ++** sqdecp x0, p0\.s ++** ret ++*/ ++TEST_UNIFORM_S (qdecp_n_s64_b32_tied, int64_t, ++ x0 = svqdecp_n_s64_b32 (x0, p0), ++ x0 = svqdecp_b32 (x0, p0)) ++ ++/* ++** qdecp_n_s64_b32_untied: ++** mov x0, x1 ++** sqdecp x0, p0\.s ++** ret ++*/ ++TEST_UNIFORM_S (qdecp_n_s64_b32_untied, int64_t, ++ x0 = svqdecp_n_s64_b32 (x1, p0), ++ x0 = svqdecp_b32 (x1, p0)) ++ ++/* ++** qdecp_n_s64_b64_tied: ++** sqdecp x0, p0\.d ++** ret ++*/ ++TEST_UNIFORM_S (qdecp_n_s64_b64_tied, int64_t, ++ x0 = svqdecp_n_s64_b64 (x0, p0), ++ x0 = svqdecp_b64 (x0, p0)) ++ ++/* ++** qdecp_n_s64_b64_untied: ++** mov x0, x1 ++** sqdecp x0, p0\.d ++** ret ++*/ ++TEST_UNIFORM_S (qdecp_n_s64_b64_untied, int64_t, ++ x0 = svqdecp_n_s64_b64 (x1, p0), ++ x0 = svqdecp_b64 (x1, p0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecp_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecp_u16.c +new file mode 100644 +index 000000000..33357ada4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecp_u16.c +@@ -0,0 +1,22 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qdecp_u16_tied: ++** uqdecp z0\.h, p0 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecp_u16_tied, svuint16_t, ++ z0 = svqdecp_u16 (z0, p0), ++ z0 = svqdecp (z0, p0)) ++ ++/* ++** qdecp_u16_untied: ++** movprfx z0, z1 ++** uqdecp z0\.h, p0 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecp_u16_untied, svuint16_t, ++ z0 = svqdecp_u16 (z1, p0), ++ z0 = svqdecp (z1, p0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecp_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecp_u32.c +new file mode 100644 +index 000000000..58e9a642e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecp_u32.c +@@ -0,0 +1,98 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qdecp_u32_tied: ++** uqdecp z0\.s, p0 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecp_u32_tied, svuint32_t, ++ z0 = svqdecp_u32 (z0, p0), ++ z0 = svqdecp (z0, p0)) ++ ++/* ++** qdecp_u32_untied: ++** movprfx z0, z1 ++** uqdecp z0\.s, p0 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecp_u32_untied, svuint32_t, ++ z0 = svqdecp_u32 (z1, p0), ++ z0 = svqdecp (z1, p0)) ++ ++/* ++** qdecp_n_u32_b8_tied: ++** uqdecp w0, p0\.b ++** ret ++*/ ++TEST_UNIFORM_S (qdecp_n_u32_b8_tied, uint32_t, ++ x0 = svqdecp_n_u32_b8 (x0, p0), ++ x0 = svqdecp_b8 (x0, p0)) ++ ++/* ++** qdecp_n_u32_b8_untied: ++** mov w0, w1 ++** uqdecp w0, p0\.b ++** ret ++*/ ++TEST_UNIFORM_S (qdecp_n_u32_b8_untied, uint32_t, ++ x0 = svqdecp_n_u32_b8 (x1, p0), ++ x0 = svqdecp_b8 (x1, p0)) ++ ++/* ++** qdecp_n_u32_b16_tied: ++** uqdecp w0, p0\.h ++** ret ++*/ ++TEST_UNIFORM_S (qdecp_n_u32_b16_tied, uint32_t, ++ x0 = svqdecp_n_u32_b16 (x0, p0), ++ x0 = svqdecp_b16 (x0, p0)) ++ ++/* ++** qdecp_n_u32_b16_untied: ++** mov w0, w1 ++** uqdecp w0, p0\.h ++** ret ++*/ ++TEST_UNIFORM_S (qdecp_n_u32_b16_untied, uint32_t, ++ x0 = svqdecp_n_u32_b16 (x1, p0), ++ x0 = svqdecp_b16 (x1, p0)) ++ ++/* ++** qdecp_n_u32_b32_tied: ++** uqdecp w0, p0\.s ++** ret ++*/ ++TEST_UNIFORM_S (qdecp_n_u32_b32_tied, uint32_t, ++ x0 = svqdecp_n_u32_b32 (x0, p0), ++ x0 = svqdecp_b32 (x0, p0)) ++ ++/* ++** qdecp_n_u32_b32_untied: ++** mov w0, w1 ++** uqdecp w0, p0\.s ++** ret ++*/ ++TEST_UNIFORM_S (qdecp_n_u32_b32_untied, uint32_t, ++ x0 = svqdecp_n_u32_b32 (x1, p0), ++ x0 = svqdecp_b32 (x1, p0)) ++ ++/* ++** qdecp_n_u32_b64_tied: ++** uqdecp w0, p0\.d ++** ret ++*/ ++TEST_UNIFORM_S (qdecp_n_u32_b64_tied, uint32_t, ++ x0 = svqdecp_n_u32_b64 (x0, p0), ++ x0 = svqdecp_b64 (x0, p0)) ++ ++/* ++** qdecp_n_u32_b64_untied: ++** mov w0, w1 ++** uqdecp w0, p0\.d ++** ret ++*/ ++TEST_UNIFORM_S (qdecp_n_u32_b64_untied, uint32_t, ++ x0 = svqdecp_n_u32_b64 (x1, p0), ++ x0 = svqdecp_b64 (x1, p0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecp_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecp_u64.c +new file mode 100644 +index 000000000..e2091d8ae +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecp_u64.c +@@ -0,0 +1,98 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qdecp_u64_tied: ++** uqdecp z0\.d, p0 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecp_u64_tied, svuint64_t, ++ z0 = svqdecp_u64 (z0, p0), ++ z0 = svqdecp (z0, p0)) ++ ++/* ++** qdecp_u64_untied: ++** movprfx z0, z1 ++** uqdecp z0\.d, p0 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecp_u64_untied, svuint64_t, ++ z0 = svqdecp_u64 (z1, p0), ++ z0 = svqdecp (z1, p0)) ++ ++/* ++** qdecp_n_u64_b8_tied: ++** uqdecp x0, p0\.b ++** ret ++*/ ++TEST_UNIFORM_S (qdecp_n_u64_b8_tied, uint64_t, ++ x0 = svqdecp_n_u64_b8 (x0, p0), ++ x0 = svqdecp_b8 (x0, p0)) ++ ++/* ++** qdecp_n_u64_b8_untied: ++** mov x0, x1 ++** uqdecp x0, p0\.b ++** ret ++*/ ++TEST_UNIFORM_S (qdecp_n_u64_b8_untied, uint64_t, ++ x0 = svqdecp_n_u64_b8 (x1, p0), ++ x0 = svqdecp_b8 (x1, p0)) ++ ++/* ++** qdecp_n_u64_b16_tied: ++** uqdecp x0, p0\.h ++** ret ++*/ ++TEST_UNIFORM_S (qdecp_n_u64_b16_tied, uint64_t, ++ x0 = svqdecp_n_u64_b16 (x0, p0), ++ x0 = svqdecp_b16 (x0, p0)) ++ ++/* ++** qdecp_n_u64_b16_untied: ++** mov x0, x1 ++** uqdecp x0, p0\.h ++** ret ++*/ ++TEST_UNIFORM_S (qdecp_n_u64_b16_untied, uint64_t, ++ x0 = svqdecp_n_u64_b16 (x1, p0), ++ x0 = svqdecp_b16 (x1, p0)) ++ ++/* ++** qdecp_n_u64_b32_tied: ++** uqdecp x0, p0\.s ++** ret ++*/ ++TEST_UNIFORM_S (qdecp_n_u64_b32_tied, uint64_t, ++ x0 = svqdecp_n_u64_b32 (x0, p0), ++ x0 = svqdecp_b32 (x0, p0)) ++ ++/* ++** qdecp_n_u64_b32_untied: ++** mov x0, x1 ++** uqdecp x0, p0\.s ++** ret ++*/ ++TEST_UNIFORM_S (qdecp_n_u64_b32_untied, uint64_t, ++ x0 = svqdecp_n_u64_b32 (x1, p0), ++ x0 = svqdecp_b32 (x1, p0)) ++ ++/* ++** qdecp_n_u64_b64_tied: ++** uqdecp x0, p0\.d ++** ret ++*/ ++TEST_UNIFORM_S (qdecp_n_u64_b64_tied, uint64_t, ++ x0 = svqdecp_n_u64_b64 (x0, p0), ++ x0 = svqdecp_b64 (x0, p0)) ++ ++/* ++** qdecp_n_u64_b64_untied: ++** mov x0, x1 ++** uqdecp x0, p0\.d ++** ret ++*/ ++TEST_UNIFORM_S (qdecp_n_u64_b64_untied, uint64_t, ++ x0 = svqdecp_n_u64_b64 (x1, p0), ++ x0 = svqdecp_b64 (x1, p0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecw_pat_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecw_pat_s32.c +new file mode 100644 +index 000000000..d80f7be4d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecw_pat_s32.c +@@ -0,0 +1,401 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qdecw_pat_1_s32_tied: ++** sqdecw z0\.s, pow2 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_pat_1_s32_tied, svint32_t, ++ z0 = svqdecw_pat_s32 (z0, SV_POW2, 1), ++ z0 = svqdecw_pat (z0, SV_POW2, 1)) ++ ++/* ++** qdecw_pat_1_s32_untied: ++** movprfx z0, z1 ++** sqdecw z0\.s, pow2 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_pat_1_s32_untied, svint32_t, ++ z0 = svqdecw_pat_s32 (z1, SV_POW2, 1), ++ z0 = svqdecw_pat (z1, SV_POW2, 1)) ++ ++/* ++** qdecw_pat_2_s32: ++** sqdecw z0\.s, pow2, mul #2 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_pat_2_s32, svint32_t, ++ z0 = svqdecw_pat_s32 (z0, SV_POW2, 2), ++ z0 = svqdecw_pat (z0, SV_POW2, 2)) ++ ++/* ++** qdecw_pat_7_s32: ++** sqdecw z0\.s, pow2, mul #7 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_pat_7_s32, svint32_t, ++ z0 = svqdecw_pat_s32 (z0, SV_POW2, 7), ++ z0 = svqdecw_pat (z0, SV_POW2, 7)) ++ ++/* ++** qdecw_pat_15_s32: ++** sqdecw z0\.s, pow2, mul #15 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_pat_15_s32, svint32_t, ++ z0 = svqdecw_pat_s32 (z0, SV_POW2, 15), ++ z0 = svqdecw_pat (z0, SV_POW2, 15)) ++ ++/* ++** qdecw_pat_16_s32: ++** sqdecw z0\.s, pow2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_pat_16_s32, svint32_t, ++ z0 = svqdecw_pat_s32 (z0, SV_POW2, 16), ++ z0 = svqdecw_pat (z0, SV_POW2, 16)) ++ ++/* ++** qdecw_pat_vl1_s32: ++** sqdecw z0\.s, vl1, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_pat_vl1_s32, svint32_t, ++ z0 = svqdecw_pat_s32 (z0, SV_VL1, 16), ++ z0 = svqdecw_pat (z0, SV_VL1, 16)) ++ ++/* ++** qdecw_pat_vl2_s32: ++** sqdecw z0\.s, vl2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_pat_vl2_s32, svint32_t, ++ z0 = svqdecw_pat_s32 (z0, SV_VL2, 16), ++ z0 = svqdecw_pat (z0, SV_VL2, 16)) ++ ++/* ++** qdecw_pat_vl3_s32: ++** sqdecw z0\.s, vl3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_pat_vl3_s32, svint32_t, ++ z0 = svqdecw_pat_s32 (z0, SV_VL3, 16), ++ z0 = svqdecw_pat (z0, SV_VL3, 16)) ++ ++/* ++** qdecw_pat_vl4_s32: ++** sqdecw z0\.s, vl4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_pat_vl4_s32, svint32_t, ++ z0 = svqdecw_pat_s32 (z0, SV_VL4, 16), ++ z0 = svqdecw_pat (z0, SV_VL4, 16)) ++ ++/* ++** qdecw_pat_vl5_s32: ++** sqdecw z0\.s, vl5, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_pat_vl5_s32, svint32_t, ++ z0 = svqdecw_pat_s32 (z0, SV_VL5, 16), ++ z0 = svqdecw_pat (z0, SV_VL5, 16)) ++ ++/* ++** qdecw_pat_vl6_s32: ++** sqdecw z0\.s, vl6, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_pat_vl6_s32, svint32_t, ++ z0 = svqdecw_pat_s32 (z0, SV_VL6, 16), ++ z0 = svqdecw_pat (z0, SV_VL6, 16)) ++ ++/* ++** qdecw_pat_vl7_s32: ++** sqdecw z0\.s, vl7, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_pat_vl7_s32, svint32_t, ++ z0 = svqdecw_pat_s32 (z0, SV_VL7, 16), ++ z0 = svqdecw_pat (z0, SV_VL7, 16)) ++ ++/* ++** qdecw_pat_vl8_s32: ++** sqdecw z0\.s, vl8, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_pat_vl8_s32, svint32_t, ++ z0 = svqdecw_pat_s32 (z0, SV_VL8, 16), ++ z0 = svqdecw_pat (z0, SV_VL8, 16)) ++ ++/* ++** qdecw_pat_vl16_s32: ++** sqdecw z0\.s, vl16, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_pat_vl16_s32, svint32_t, ++ z0 = svqdecw_pat_s32 (z0, SV_VL16, 16), ++ z0 = svqdecw_pat (z0, SV_VL16, 16)) ++ ++/* ++** qdecw_pat_vl32_s32: ++** sqdecw z0\.s, vl32, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_pat_vl32_s32, svint32_t, ++ z0 = svqdecw_pat_s32 (z0, SV_VL32, 16), ++ z0 = svqdecw_pat (z0, SV_VL32, 16)) ++ ++/* ++** qdecw_pat_vl64_s32: ++** sqdecw z0\.s, vl64, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_pat_vl64_s32, svint32_t, ++ z0 = svqdecw_pat_s32 (z0, SV_VL64, 16), ++ z0 = svqdecw_pat (z0, SV_VL64, 16)) ++ ++/* ++** qdecw_pat_vl128_s32: ++** sqdecw z0\.s, vl128, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_pat_vl128_s32, svint32_t, ++ z0 = svqdecw_pat_s32 (z0, SV_VL128, 16), ++ z0 = svqdecw_pat (z0, SV_VL128, 16)) ++ ++/* ++** qdecw_pat_vl256_s32: ++** sqdecw z0\.s, vl256, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_pat_vl256_s32, svint32_t, ++ z0 = svqdecw_pat_s32 (z0, SV_VL256, 16), ++ z0 = svqdecw_pat (z0, SV_VL256, 16)) ++ ++/* ++** qdecw_pat_mul4_s32: ++** sqdecw z0\.s, mul4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_pat_mul4_s32, svint32_t, ++ z0 = svqdecw_pat_s32 (z0, SV_MUL4, 16), ++ z0 = svqdecw_pat (z0, SV_MUL4, 16)) ++ ++/* ++** qdecw_pat_mul3_s32: ++** sqdecw z0\.s, mul3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_pat_mul3_s32, svint32_t, ++ z0 = svqdecw_pat_s32 (z0, SV_MUL3, 16), ++ z0 = svqdecw_pat (z0, SV_MUL3, 16)) ++ ++/* ++** qdecw_pat_all_s32: ++** sqdecw z0\.s, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_pat_all_s32, svint32_t, ++ z0 = svqdecw_pat_s32 (z0, SV_ALL, 16), ++ z0 = svqdecw_pat (z0, SV_ALL, 16)) ++ ++/* ++** qdecw_pat_n_1_s32_tied: ++** sqdecw x0, w0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_1_s32_tied, int32_t, ++ x0 = svqdecw_pat_n_s32 (x0, SV_POW2, 1), ++ x0 = svqdecw_pat (x0, SV_POW2, 1)) ++ ++/* ++** qdecw_pat_n_1_s32_untied: ++** mov w0, w1 ++** sqdecw x0, w0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_1_s32_untied, int32_t, ++ x0 = svqdecw_pat_n_s32 (x1, SV_POW2, 1), ++ x0 = svqdecw_pat (x1, SV_POW2, 1)) ++ ++/* ++** qdecw_pat_n_2_s32: ++** sqdecw x0, w0, pow2, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_2_s32, int32_t, ++ x0 = svqdecw_pat_n_s32 (x0, SV_POW2, 2), ++ x0 = svqdecw_pat (x0, SV_POW2, 2)) ++ ++/* ++** qdecw_pat_n_7_s32: ++** sqdecw x0, w0, pow2, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_7_s32, int32_t, ++ x0 = svqdecw_pat_n_s32 (x0, SV_POW2, 7), ++ x0 = svqdecw_pat (x0, SV_POW2, 7)) ++ ++/* ++** qdecw_pat_n_15_s32: ++** sqdecw x0, w0, pow2, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_15_s32, int32_t, ++ x0 = svqdecw_pat_n_s32 (x0, SV_POW2, 15), ++ x0 = svqdecw_pat (x0, SV_POW2, 15)) ++ ++/* ++** qdecw_pat_n_16_s32: ++** sqdecw x0, w0, pow2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_16_s32, int32_t, ++ x0 = svqdecw_pat_n_s32 (x0, SV_POW2, 16), ++ x0 = svqdecw_pat (x0, SV_POW2, 16)) ++ ++/* ++** qdecw_pat_n_vl1_s32: ++** sqdecw x0, w0, vl1, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_vl1_s32, int32_t, ++ x0 = svqdecw_pat_n_s32 (x0, SV_VL1, 16), ++ x0 = svqdecw_pat (x0, SV_VL1, 16)) ++ ++/* ++** qdecw_pat_n_vl2_s32: ++** sqdecw x0, w0, vl2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_vl2_s32, int32_t, ++ x0 = svqdecw_pat_n_s32 (x0, SV_VL2, 16), ++ x0 = svqdecw_pat (x0, SV_VL2, 16)) ++ ++/* ++** qdecw_pat_n_vl3_s32: ++** sqdecw x0, w0, vl3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_vl3_s32, int32_t, ++ x0 = svqdecw_pat_n_s32 (x0, SV_VL3, 16), ++ x0 = svqdecw_pat (x0, SV_VL3, 16)) ++ ++/* ++** qdecw_pat_n_vl4_s32: ++** sqdecw x0, w0, vl4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_vl4_s32, int32_t, ++ x0 = svqdecw_pat_n_s32 (x0, SV_VL4, 16), ++ x0 = svqdecw_pat (x0, SV_VL4, 16)) ++ ++/* ++** qdecw_pat_n_vl5_s32: ++** sqdecw x0, w0, vl5, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_vl5_s32, int32_t, ++ x0 = svqdecw_pat_n_s32 (x0, SV_VL5, 16), ++ x0 = svqdecw_pat (x0, SV_VL5, 16)) ++ ++/* ++** qdecw_pat_n_vl6_s32: ++** sqdecw x0, w0, vl6, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_vl6_s32, int32_t, ++ x0 = svqdecw_pat_n_s32 (x0, SV_VL6, 16), ++ x0 = svqdecw_pat (x0, SV_VL6, 16)) ++ ++/* ++** qdecw_pat_n_vl7_s32: ++** sqdecw x0, w0, vl7, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_vl7_s32, int32_t, ++ x0 = svqdecw_pat_n_s32 (x0, SV_VL7, 16), ++ x0 = svqdecw_pat (x0, SV_VL7, 16)) ++ ++/* ++** qdecw_pat_n_vl8_s32: ++** sqdecw x0, w0, vl8, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_vl8_s32, int32_t, ++ x0 = svqdecw_pat_n_s32 (x0, SV_VL8, 16), ++ x0 = svqdecw_pat (x0, SV_VL8, 16)) ++ ++/* ++** qdecw_pat_n_vl16_s32: ++** sqdecw x0, w0, vl16, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_vl16_s32, int32_t, ++ x0 = svqdecw_pat_n_s32 (x0, SV_VL16, 16), ++ x0 = svqdecw_pat (x0, SV_VL16, 16)) ++ ++/* ++** qdecw_pat_n_vl32_s32: ++** sqdecw x0, w0, vl32, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_vl32_s32, int32_t, ++ x0 = svqdecw_pat_n_s32 (x0, SV_VL32, 16), ++ x0 = svqdecw_pat (x0, SV_VL32, 16)) ++ ++/* ++** qdecw_pat_n_vl64_s32: ++** sqdecw x0, w0, vl64, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_vl64_s32, int32_t, ++ x0 = svqdecw_pat_n_s32 (x0, SV_VL64, 16), ++ x0 = svqdecw_pat (x0, SV_VL64, 16)) ++ ++/* ++** qdecw_pat_n_vl128_s32: ++** sqdecw x0, w0, vl128, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_vl128_s32, int32_t, ++ x0 = svqdecw_pat_n_s32 (x0, SV_VL128, 16), ++ x0 = svqdecw_pat (x0, SV_VL128, 16)) ++ ++/* ++** qdecw_pat_n_vl256_s32: ++** sqdecw x0, w0, vl256, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_vl256_s32, int32_t, ++ x0 = svqdecw_pat_n_s32 (x0, SV_VL256, 16), ++ x0 = svqdecw_pat (x0, SV_VL256, 16)) ++ ++/* ++** qdecw_pat_n_mul4_s32: ++** sqdecw x0, w0, mul4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_mul4_s32, int32_t, ++ x0 = svqdecw_pat_n_s32 (x0, SV_MUL4, 16), ++ x0 = svqdecw_pat (x0, SV_MUL4, 16)) ++ ++/* ++** qdecw_pat_n_mul3_s32: ++** sqdecw x0, w0, mul3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_mul3_s32, int32_t, ++ x0 = svqdecw_pat_n_s32 (x0, SV_MUL3, 16), ++ x0 = svqdecw_pat (x0, SV_MUL3, 16)) ++ ++/* ++** qdecw_pat_n_all_s32: ++** sqdecw x0, w0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_all_s32, int32_t, ++ x0 = svqdecw_pat_n_s32 (x0, SV_ALL, 16), ++ x0 = svqdecw_pat (x0, SV_ALL, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecw_pat_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecw_pat_s64.c +new file mode 100644 +index 000000000..9c684a7c7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecw_pat_s64.c +@@ -0,0 +1,202 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qdecw_pat_n_1_s64_tied: ++** sqdecw x0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_1_s64_tied, int64_t, ++ x0 = svqdecw_pat_n_s64 (x0, SV_POW2, 1), ++ x0 = svqdecw_pat (x0, SV_POW2, 1)) ++ ++/* ++** qdecw_pat_n_1_s64_untied: ++** mov x0, x1 ++** sqdecw x0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_1_s64_untied, int64_t, ++ x0 = svqdecw_pat_n_s64 (x1, SV_POW2, 1), ++ x0 = svqdecw_pat (x1, SV_POW2, 1)) ++ ++/* ++** qdecw_pat_n_2_s64: ++** sqdecw x0, pow2, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_2_s64, int64_t, ++ x0 = svqdecw_pat_n_s64 (x0, SV_POW2, 2), ++ x0 = svqdecw_pat (x0, SV_POW2, 2)) ++ ++/* ++** qdecw_pat_n_7_s64: ++** sqdecw x0, pow2, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_7_s64, int64_t, ++ x0 = svqdecw_pat_n_s64 (x0, SV_POW2, 7), ++ x0 = svqdecw_pat (x0, SV_POW2, 7)) ++ ++/* ++** qdecw_pat_n_15_s64: ++** sqdecw x0, pow2, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_15_s64, int64_t, ++ x0 = svqdecw_pat_n_s64 (x0, SV_POW2, 15), ++ x0 = svqdecw_pat (x0, SV_POW2, 15)) ++ ++/* ++** qdecw_pat_n_16_s64: ++** sqdecw x0, pow2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_16_s64, int64_t, ++ x0 = svqdecw_pat_n_s64 (x0, SV_POW2, 16), ++ x0 = svqdecw_pat (x0, SV_POW2, 16)) ++ ++/* ++** qdecw_pat_n_vl1_s64: ++** sqdecw x0, vl1, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_vl1_s64, int64_t, ++ x0 = svqdecw_pat_n_s64 (x0, SV_VL1, 16), ++ x0 = svqdecw_pat (x0, SV_VL1, 16)) ++ ++/* ++** qdecw_pat_n_vl2_s64: ++** sqdecw x0, vl2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_vl2_s64, int64_t, ++ x0 = svqdecw_pat_n_s64 (x0, SV_VL2, 16), ++ x0 = svqdecw_pat (x0, SV_VL2, 16)) ++ ++/* ++** qdecw_pat_n_vl3_s64: ++** sqdecw x0, vl3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_vl3_s64, int64_t, ++ x0 = svqdecw_pat_n_s64 (x0, SV_VL3, 16), ++ x0 = svqdecw_pat (x0, SV_VL3, 16)) ++ ++/* ++** qdecw_pat_n_vl4_s64: ++** sqdecw x0, vl4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_vl4_s64, int64_t, ++ x0 = svqdecw_pat_n_s64 (x0, SV_VL4, 16), ++ x0 = svqdecw_pat (x0, SV_VL4, 16)) ++ ++/* ++** qdecw_pat_n_vl5_s64: ++** sqdecw x0, vl5, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_vl5_s64, int64_t, ++ x0 = svqdecw_pat_n_s64 (x0, SV_VL5, 16), ++ x0 = svqdecw_pat (x0, SV_VL5, 16)) ++ ++/* ++** qdecw_pat_n_vl6_s64: ++** sqdecw x0, vl6, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_vl6_s64, int64_t, ++ x0 = svqdecw_pat_n_s64 (x0, SV_VL6, 16), ++ x0 = svqdecw_pat (x0, SV_VL6, 16)) ++ ++/* ++** qdecw_pat_n_vl7_s64: ++** sqdecw x0, vl7, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_vl7_s64, int64_t, ++ x0 = svqdecw_pat_n_s64 (x0, SV_VL7, 16), ++ x0 = svqdecw_pat (x0, SV_VL7, 16)) ++ ++/* ++** qdecw_pat_n_vl8_s64: ++** sqdecw x0, vl8, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_vl8_s64, int64_t, ++ x0 = svqdecw_pat_n_s64 (x0, SV_VL8, 16), ++ x0 = svqdecw_pat (x0, SV_VL8, 16)) ++ ++/* ++** qdecw_pat_n_vl16_s64: ++** sqdecw x0, vl16, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_vl16_s64, int64_t, ++ x0 = svqdecw_pat_n_s64 (x0, SV_VL16, 16), ++ x0 = svqdecw_pat (x0, SV_VL16, 16)) ++ ++/* ++** qdecw_pat_n_vl32_s64: ++** sqdecw x0, vl32, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_vl32_s64, int64_t, ++ x0 = svqdecw_pat_n_s64 (x0, SV_VL32, 16), ++ x0 = svqdecw_pat (x0, SV_VL32, 16)) ++ ++/* ++** qdecw_pat_n_vl64_s64: ++** sqdecw x0, vl64, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_vl64_s64, int64_t, ++ x0 = svqdecw_pat_n_s64 (x0, SV_VL64, 16), ++ x0 = svqdecw_pat (x0, SV_VL64, 16)) ++ ++/* ++** qdecw_pat_n_vl128_s64: ++** sqdecw x0, vl128, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_vl128_s64, int64_t, ++ x0 = svqdecw_pat_n_s64 (x0, SV_VL128, 16), ++ x0 = svqdecw_pat (x0, SV_VL128, 16)) ++ ++/* ++** qdecw_pat_n_vl256_s64: ++** sqdecw x0, vl256, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_vl256_s64, int64_t, ++ x0 = svqdecw_pat_n_s64 (x0, SV_VL256, 16), ++ x0 = svqdecw_pat (x0, SV_VL256, 16)) ++ ++/* ++** qdecw_pat_n_mul4_s64: ++** sqdecw x0, mul4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_mul4_s64, int64_t, ++ x0 = svqdecw_pat_n_s64 (x0, SV_MUL4, 16), ++ x0 = svqdecw_pat (x0, SV_MUL4, 16)) ++ ++/* ++** qdecw_pat_n_mul3_s64: ++** sqdecw x0, mul3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_mul3_s64, int64_t, ++ x0 = svqdecw_pat_n_s64 (x0, SV_MUL3, 16), ++ x0 = svqdecw_pat (x0, SV_MUL3, 16)) ++ ++/* ++** qdecw_pat_n_all_s64: ++** sqdecw x0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_all_s64, int64_t, ++ x0 = svqdecw_pat_n_s64 (x0, SV_ALL, 16), ++ x0 = svqdecw_pat (x0, SV_ALL, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecw_pat_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecw_pat_u32.c +new file mode 100644 +index 000000000..8d3fcb473 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecw_pat_u32.c +@@ -0,0 +1,401 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qdecw_pat_1_u32_tied: ++** uqdecw z0\.s, pow2 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_pat_1_u32_tied, svuint32_t, ++ z0 = svqdecw_pat_u32 (z0, SV_POW2, 1), ++ z0 = svqdecw_pat (z0, SV_POW2, 1)) ++ ++/* ++** qdecw_pat_1_u32_untied: ++** movprfx z0, z1 ++** uqdecw z0\.s, pow2 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_pat_1_u32_untied, svuint32_t, ++ z0 = svqdecw_pat_u32 (z1, SV_POW2, 1), ++ z0 = svqdecw_pat (z1, SV_POW2, 1)) ++ ++/* ++** qdecw_pat_2_u32: ++** uqdecw z0\.s, pow2, mul #2 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_pat_2_u32, svuint32_t, ++ z0 = svqdecw_pat_u32 (z0, SV_POW2, 2), ++ z0 = svqdecw_pat (z0, SV_POW2, 2)) ++ ++/* ++** qdecw_pat_7_u32: ++** uqdecw z0\.s, pow2, mul #7 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_pat_7_u32, svuint32_t, ++ z0 = svqdecw_pat_u32 (z0, SV_POW2, 7), ++ z0 = svqdecw_pat (z0, SV_POW2, 7)) ++ ++/* ++** qdecw_pat_15_u32: ++** uqdecw z0\.s, pow2, mul #15 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_pat_15_u32, svuint32_t, ++ z0 = svqdecw_pat_u32 (z0, SV_POW2, 15), ++ z0 = svqdecw_pat (z0, SV_POW2, 15)) ++ ++/* ++** qdecw_pat_16_u32: ++** uqdecw z0\.s, pow2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_pat_16_u32, svuint32_t, ++ z0 = svqdecw_pat_u32 (z0, SV_POW2, 16), ++ z0 = svqdecw_pat (z0, SV_POW2, 16)) ++ ++/* ++** qdecw_pat_vl1_u32: ++** uqdecw z0\.s, vl1, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_pat_vl1_u32, svuint32_t, ++ z0 = svqdecw_pat_u32 (z0, SV_VL1, 16), ++ z0 = svqdecw_pat (z0, SV_VL1, 16)) ++ ++/* ++** qdecw_pat_vl2_u32: ++** uqdecw z0\.s, vl2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_pat_vl2_u32, svuint32_t, ++ z0 = svqdecw_pat_u32 (z0, SV_VL2, 16), ++ z0 = svqdecw_pat (z0, SV_VL2, 16)) ++ ++/* ++** qdecw_pat_vl3_u32: ++** uqdecw z0\.s, vl3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_pat_vl3_u32, svuint32_t, ++ z0 = svqdecw_pat_u32 (z0, SV_VL3, 16), ++ z0 = svqdecw_pat (z0, SV_VL3, 16)) ++ ++/* ++** qdecw_pat_vl4_u32: ++** uqdecw z0\.s, vl4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_pat_vl4_u32, svuint32_t, ++ z0 = svqdecw_pat_u32 (z0, SV_VL4, 16), ++ z0 = svqdecw_pat (z0, SV_VL4, 16)) ++ ++/* ++** qdecw_pat_vl5_u32: ++** uqdecw z0\.s, vl5, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_pat_vl5_u32, svuint32_t, ++ z0 = svqdecw_pat_u32 (z0, SV_VL5, 16), ++ z0 = svqdecw_pat (z0, SV_VL5, 16)) ++ ++/* ++** qdecw_pat_vl6_u32: ++** uqdecw z0\.s, vl6, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_pat_vl6_u32, svuint32_t, ++ z0 = svqdecw_pat_u32 (z0, SV_VL6, 16), ++ z0 = svqdecw_pat (z0, SV_VL6, 16)) ++ ++/* ++** qdecw_pat_vl7_u32: ++** uqdecw z0\.s, vl7, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_pat_vl7_u32, svuint32_t, ++ z0 = svqdecw_pat_u32 (z0, SV_VL7, 16), ++ z0 = svqdecw_pat (z0, SV_VL7, 16)) ++ ++/* ++** qdecw_pat_vl8_u32: ++** uqdecw z0\.s, vl8, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_pat_vl8_u32, svuint32_t, ++ z0 = svqdecw_pat_u32 (z0, SV_VL8, 16), ++ z0 = svqdecw_pat (z0, SV_VL8, 16)) ++ ++/* ++** qdecw_pat_vl16_u32: ++** uqdecw z0\.s, vl16, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_pat_vl16_u32, svuint32_t, ++ z0 = svqdecw_pat_u32 (z0, SV_VL16, 16), ++ z0 = svqdecw_pat (z0, SV_VL16, 16)) ++ ++/* ++** qdecw_pat_vl32_u32: ++** uqdecw z0\.s, vl32, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_pat_vl32_u32, svuint32_t, ++ z0 = svqdecw_pat_u32 (z0, SV_VL32, 16), ++ z0 = svqdecw_pat (z0, SV_VL32, 16)) ++ ++/* ++** qdecw_pat_vl64_u32: ++** uqdecw z0\.s, vl64, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_pat_vl64_u32, svuint32_t, ++ z0 = svqdecw_pat_u32 (z0, SV_VL64, 16), ++ z0 = svqdecw_pat (z0, SV_VL64, 16)) ++ ++/* ++** qdecw_pat_vl128_u32: ++** uqdecw z0\.s, vl128, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_pat_vl128_u32, svuint32_t, ++ z0 = svqdecw_pat_u32 (z0, SV_VL128, 16), ++ z0 = svqdecw_pat (z0, SV_VL128, 16)) ++ ++/* ++** qdecw_pat_vl256_u32: ++** uqdecw z0\.s, vl256, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_pat_vl256_u32, svuint32_t, ++ z0 = svqdecw_pat_u32 (z0, SV_VL256, 16), ++ z0 = svqdecw_pat (z0, SV_VL256, 16)) ++ ++/* ++** qdecw_pat_mul4_u32: ++** uqdecw z0\.s, mul4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_pat_mul4_u32, svuint32_t, ++ z0 = svqdecw_pat_u32 (z0, SV_MUL4, 16), ++ z0 = svqdecw_pat (z0, SV_MUL4, 16)) ++ ++/* ++** qdecw_pat_mul3_u32: ++** uqdecw z0\.s, mul3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_pat_mul3_u32, svuint32_t, ++ z0 = svqdecw_pat_u32 (z0, SV_MUL3, 16), ++ z0 = svqdecw_pat (z0, SV_MUL3, 16)) ++ ++/* ++** qdecw_pat_all_u32: ++** uqdecw z0\.s, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_pat_all_u32, svuint32_t, ++ z0 = svqdecw_pat_u32 (z0, SV_ALL, 16), ++ z0 = svqdecw_pat (z0, SV_ALL, 16)) ++ ++/* ++** qdecw_pat_n_1_u32_tied: ++** uqdecw w0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_1_u32_tied, uint32_t, ++ x0 = svqdecw_pat_n_u32 (x0, SV_POW2, 1), ++ x0 = svqdecw_pat (x0, SV_POW2, 1)) ++ ++/* ++** qdecw_pat_n_1_u32_untied: ++** mov w0, w1 ++** uqdecw w0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_1_u32_untied, uint32_t, ++ x0 = svqdecw_pat_n_u32 (x1, SV_POW2, 1), ++ x0 = svqdecw_pat (x1, SV_POW2, 1)) ++ ++/* ++** qdecw_pat_n_2_u32: ++** uqdecw w0, pow2, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_2_u32, uint32_t, ++ x0 = svqdecw_pat_n_u32 (x0, SV_POW2, 2), ++ x0 = svqdecw_pat (x0, SV_POW2, 2)) ++ ++/* ++** qdecw_pat_n_7_u32: ++** uqdecw w0, pow2, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_7_u32, uint32_t, ++ x0 = svqdecw_pat_n_u32 (x0, SV_POW2, 7), ++ x0 = svqdecw_pat (x0, SV_POW2, 7)) ++ ++/* ++** qdecw_pat_n_15_u32: ++** uqdecw w0, pow2, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_15_u32, uint32_t, ++ x0 = svqdecw_pat_n_u32 (x0, SV_POW2, 15), ++ x0 = svqdecw_pat (x0, SV_POW2, 15)) ++ ++/* ++** qdecw_pat_n_16_u32: ++** uqdecw w0, pow2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_16_u32, uint32_t, ++ x0 = svqdecw_pat_n_u32 (x0, SV_POW2, 16), ++ x0 = svqdecw_pat (x0, SV_POW2, 16)) ++ ++/* ++** qdecw_pat_n_vl1_u32: ++** uqdecw w0, vl1, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_vl1_u32, uint32_t, ++ x0 = svqdecw_pat_n_u32 (x0, SV_VL1, 16), ++ x0 = svqdecw_pat (x0, SV_VL1, 16)) ++ ++/* ++** qdecw_pat_n_vl2_u32: ++** uqdecw w0, vl2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_vl2_u32, uint32_t, ++ x0 = svqdecw_pat_n_u32 (x0, SV_VL2, 16), ++ x0 = svqdecw_pat (x0, SV_VL2, 16)) ++ ++/* ++** qdecw_pat_n_vl3_u32: ++** uqdecw w0, vl3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_vl3_u32, uint32_t, ++ x0 = svqdecw_pat_n_u32 (x0, SV_VL3, 16), ++ x0 = svqdecw_pat (x0, SV_VL3, 16)) ++ ++/* ++** qdecw_pat_n_vl4_u32: ++** uqdecw w0, vl4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_vl4_u32, uint32_t, ++ x0 = svqdecw_pat_n_u32 (x0, SV_VL4, 16), ++ x0 = svqdecw_pat (x0, SV_VL4, 16)) ++ ++/* ++** qdecw_pat_n_vl5_u32: ++** uqdecw w0, vl5, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_vl5_u32, uint32_t, ++ x0 = svqdecw_pat_n_u32 (x0, SV_VL5, 16), ++ x0 = svqdecw_pat (x0, SV_VL5, 16)) ++ ++/* ++** qdecw_pat_n_vl6_u32: ++** uqdecw w0, vl6, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_vl6_u32, uint32_t, ++ x0 = svqdecw_pat_n_u32 (x0, SV_VL6, 16), ++ x0 = svqdecw_pat (x0, SV_VL6, 16)) ++ ++/* ++** qdecw_pat_n_vl7_u32: ++** uqdecw w0, vl7, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_vl7_u32, uint32_t, ++ x0 = svqdecw_pat_n_u32 (x0, SV_VL7, 16), ++ x0 = svqdecw_pat (x0, SV_VL7, 16)) ++ ++/* ++** qdecw_pat_n_vl8_u32: ++** uqdecw w0, vl8, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_vl8_u32, uint32_t, ++ x0 = svqdecw_pat_n_u32 (x0, SV_VL8, 16), ++ x0 = svqdecw_pat (x0, SV_VL8, 16)) ++ ++/* ++** qdecw_pat_n_vl16_u32: ++** uqdecw w0, vl16, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_vl16_u32, uint32_t, ++ x0 = svqdecw_pat_n_u32 (x0, SV_VL16, 16), ++ x0 = svqdecw_pat (x0, SV_VL16, 16)) ++ ++/* ++** qdecw_pat_n_vl32_u32: ++** uqdecw w0, vl32, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_vl32_u32, uint32_t, ++ x0 = svqdecw_pat_n_u32 (x0, SV_VL32, 16), ++ x0 = svqdecw_pat (x0, SV_VL32, 16)) ++ ++/* ++** qdecw_pat_n_vl64_u32: ++** uqdecw w0, vl64, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_vl64_u32, uint32_t, ++ x0 = svqdecw_pat_n_u32 (x0, SV_VL64, 16), ++ x0 = svqdecw_pat (x0, SV_VL64, 16)) ++ ++/* ++** qdecw_pat_n_vl128_u32: ++** uqdecw w0, vl128, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_vl128_u32, uint32_t, ++ x0 = svqdecw_pat_n_u32 (x0, SV_VL128, 16), ++ x0 = svqdecw_pat (x0, SV_VL128, 16)) ++ ++/* ++** qdecw_pat_n_vl256_u32: ++** uqdecw w0, vl256, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_vl256_u32, uint32_t, ++ x0 = svqdecw_pat_n_u32 (x0, SV_VL256, 16), ++ x0 = svqdecw_pat (x0, SV_VL256, 16)) ++ ++/* ++** qdecw_pat_n_mul4_u32: ++** uqdecw w0, mul4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_mul4_u32, uint32_t, ++ x0 = svqdecw_pat_n_u32 (x0, SV_MUL4, 16), ++ x0 = svqdecw_pat (x0, SV_MUL4, 16)) ++ ++/* ++** qdecw_pat_n_mul3_u32: ++** uqdecw w0, mul3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_mul3_u32, uint32_t, ++ x0 = svqdecw_pat_n_u32 (x0, SV_MUL3, 16), ++ x0 = svqdecw_pat (x0, SV_MUL3, 16)) ++ ++/* ++** qdecw_pat_n_all_u32: ++** uqdecw w0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_all_u32, uint32_t, ++ x0 = svqdecw_pat_n_u32 (x0, SV_ALL, 16), ++ x0 = svqdecw_pat (x0, SV_ALL, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecw_pat_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecw_pat_u64.c +new file mode 100644 +index 000000000..015775b17 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecw_pat_u64.c +@@ -0,0 +1,202 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qdecw_pat_n_1_u64_tied: ++** uqdecw x0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_1_u64_tied, uint64_t, ++ x0 = svqdecw_pat_n_u64 (x0, SV_POW2, 1), ++ x0 = svqdecw_pat (x0, SV_POW2, 1)) ++ ++/* ++** qdecw_pat_n_1_u64_untied: ++** mov x0, x1 ++** uqdecw x0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_1_u64_untied, uint64_t, ++ x0 = svqdecw_pat_n_u64 (x1, SV_POW2, 1), ++ x0 = svqdecw_pat (x1, SV_POW2, 1)) ++ ++/* ++** qdecw_pat_n_2_u64: ++** uqdecw x0, pow2, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_2_u64, uint64_t, ++ x0 = svqdecw_pat_n_u64 (x0, SV_POW2, 2), ++ x0 = svqdecw_pat (x0, SV_POW2, 2)) ++ ++/* ++** qdecw_pat_n_7_u64: ++** uqdecw x0, pow2, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_7_u64, uint64_t, ++ x0 = svqdecw_pat_n_u64 (x0, SV_POW2, 7), ++ x0 = svqdecw_pat (x0, SV_POW2, 7)) ++ ++/* ++** qdecw_pat_n_15_u64: ++** uqdecw x0, pow2, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_15_u64, uint64_t, ++ x0 = svqdecw_pat_n_u64 (x0, SV_POW2, 15), ++ x0 = svqdecw_pat (x0, SV_POW2, 15)) ++ ++/* ++** qdecw_pat_n_16_u64: ++** uqdecw x0, pow2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_16_u64, uint64_t, ++ x0 = svqdecw_pat_n_u64 (x0, SV_POW2, 16), ++ x0 = svqdecw_pat (x0, SV_POW2, 16)) ++ ++/* ++** qdecw_pat_n_vl1_u64: ++** uqdecw x0, vl1, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_vl1_u64, uint64_t, ++ x0 = svqdecw_pat_n_u64 (x0, SV_VL1, 16), ++ x0 = svqdecw_pat (x0, SV_VL1, 16)) ++ ++/* ++** qdecw_pat_n_vl2_u64: ++** uqdecw x0, vl2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_vl2_u64, uint64_t, ++ x0 = svqdecw_pat_n_u64 (x0, SV_VL2, 16), ++ x0 = svqdecw_pat (x0, SV_VL2, 16)) ++ ++/* ++** qdecw_pat_n_vl3_u64: ++** uqdecw x0, vl3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_vl3_u64, uint64_t, ++ x0 = svqdecw_pat_n_u64 (x0, SV_VL3, 16), ++ x0 = svqdecw_pat (x0, SV_VL3, 16)) ++ ++/* ++** qdecw_pat_n_vl4_u64: ++** uqdecw x0, vl4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_vl4_u64, uint64_t, ++ x0 = svqdecw_pat_n_u64 (x0, SV_VL4, 16), ++ x0 = svqdecw_pat (x0, SV_VL4, 16)) ++ ++/* ++** qdecw_pat_n_vl5_u64: ++** uqdecw x0, vl5, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_vl5_u64, uint64_t, ++ x0 = svqdecw_pat_n_u64 (x0, SV_VL5, 16), ++ x0 = svqdecw_pat (x0, SV_VL5, 16)) ++ ++/* ++** qdecw_pat_n_vl6_u64: ++** uqdecw x0, vl6, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_vl6_u64, uint64_t, ++ x0 = svqdecw_pat_n_u64 (x0, SV_VL6, 16), ++ x0 = svqdecw_pat (x0, SV_VL6, 16)) ++ ++/* ++** qdecw_pat_n_vl7_u64: ++** uqdecw x0, vl7, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_vl7_u64, uint64_t, ++ x0 = svqdecw_pat_n_u64 (x0, SV_VL7, 16), ++ x0 = svqdecw_pat (x0, SV_VL7, 16)) ++ ++/* ++** qdecw_pat_n_vl8_u64: ++** uqdecw x0, vl8, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_vl8_u64, uint64_t, ++ x0 = svqdecw_pat_n_u64 (x0, SV_VL8, 16), ++ x0 = svqdecw_pat (x0, SV_VL8, 16)) ++ ++/* ++** qdecw_pat_n_vl16_u64: ++** uqdecw x0, vl16, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_vl16_u64, uint64_t, ++ x0 = svqdecw_pat_n_u64 (x0, SV_VL16, 16), ++ x0 = svqdecw_pat (x0, SV_VL16, 16)) ++ ++/* ++** qdecw_pat_n_vl32_u64: ++** uqdecw x0, vl32, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_vl32_u64, uint64_t, ++ x0 = svqdecw_pat_n_u64 (x0, SV_VL32, 16), ++ x0 = svqdecw_pat (x0, SV_VL32, 16)) ++ ++/* ++** qdecw_pat_n_vl64_u64: ++** uqdecw x0, vl64, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_vl64_u64, uint64_t, ++ x0 = svqdecw_pat_n_u64 (x0, SV_VL64, 16), ++ x0 = svqdecw_pat (x0, SV_VL64, 16)) ++ ++/* ++** qdecw_pat_n_vl128_u64: ++** uqdecw x0, vl128, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_vl128_u64, uint64_t, ++ x0 = svqdecw_pat_n_u64 (x0, SV_VL128, 16), ++ x0 = svqdecw_pat (x0, SV_VL128, 16)) ++ ++/* ++** qdecw_pat_n_vl256_u64: ++** uqdecw x0, vl256, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_vl256_u64, uint64_t, ++ x0 = svqdecw_pat_n_u64 (x0, SV_VL256, 16), ++ x0 = svqdecw_pat (x0, SV_VL256, 16)) ++ ++/* ++** qdecw_pat_n_mul4_u64: ++** uqdecw x0, mul4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_mul4_u64, uint64_t, ++ x0 = svqdecw_pat_n_u64 (x0, SV_MUL4, 16), ++ x0 = svqdecw_pat (x0, SV_MUL4, 16)) ++ ++/* ++** qdecw_pat_n_mul3_u64: ++** uqdecw x0, mul3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_mul3_u64, uint64_t, ++ x0 = svqdecw_pat_n_u64 (x0, SV_MUL3, 16), ++ x0 = svqdecw_pat (x0, SV_MUL3, 16)) ++ ++/* ++** qdecw_pat_n_all_u64: ++** uqdecw x0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_pat_n_all_u64, uint64_t, ++ x0 = svqdecw_pat_n_u64 (x0, SV_ALL, 16), ++ x0 = svqdecw_pat (x0, SV_ALL, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecw_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecw_s32.c +new file mode 100644 +index 000000000..8dfe8a177 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecw_s32.c +@@ -0,0 +1,113 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qdecw_1_s32_tied: ++** sqdecw z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_1_s32_tied, svint32_t, ++ z0 = svqdecw_s32 (z0, 1), ++ z0 = svqdecw (z0, 1)) ++ ++/* ++** qdecw_1_s32_untied: ++** movprfx z0, z1 ++** sqdecw z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_1_s32_untied, svint32_t, ++ z0 = svqdecw_s32 (z1, 1), ++ z0 = svqdecw (z1, 1)) ++ ++/* ++** qdecw_2_s32: ++** sqdecw z0\.s, all, mul #2 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_2_s32, svint32_t, ++ z0 = svqdecw_s32 (z0, 2), ++ z0 = svqdecw (z0, 2)) ++ ++/* ++** qdecw_7_s32: ++** sqdecw z0\.s, all, mul #7 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_7_s32, svint32_t, ++ z0 = svqdecw_s32 (z0, 7), ++ z0 = svqdecw (z0, 7)) ++ ++/* ++** qdecw_15_s32: ++** sqdecw z0\.s, all, mul #15 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_15_s32, svint32_t, ++ z0 = svqdecw_s32 (z0, 15), ++ z0 = svqdecw (z0, 15)) ++ ++/* ++** qdecw_16_s32: ++** sqdecw z0\.s, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_16_s32, svint32_t, ++ z0 = svqdecw_s32 (z0, 16), ++ z0 = svqdecw (z0, 16)) ++ ++/* ++** qdecw_n_1_s32_tied: ++** sqdecw x0, w0 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_n_1_s32_tied, int32_t, ++ x0 = svqdecw_n_s32 (x0, 1), ++ x0 = svqdecw (x0, 1)) ++ ++/* ++** qdecw_n_1_s32_untied: ++** mov w0, w1 ++** sqdecw x0, w0 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_n_1_s32_untied, int32_t, ++ x0 = svqdecw_n_s32 (x1, 1), ++ x0 = svqdecw (x1, 1)) ++ ++/* ++** qdecw_n_2_s32: ++** sqdecw x0, w0, all, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_n_2_s32, int32_t, ++ x0 = svqdecw_n_s32 (x0, 2), ++ x0 = svqdecw (x0, 2)) ++ ++/* ++** qdecw_n_7_s32: ++** sqdecw x0, w0, all, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_n_7_s32, int32_t, ++ x0 = svqdecw_n_s32 (x0, 7), ++ x0 = svqdecw (x0, 7)) ++ ++/* ++** qdecw_n_15_s32: ++** sqdecw x0, w0, all, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_n_15_s32, int32_t, ++ x0 = svqdecw_n_s32 (x0, 15), ++ x0 = svqdecw (x0, 15)) ++ ++/* ++** qdecw_n_16_s32: ++** sqdecw x0, w0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_n_16_s32, int32_t, ++ x0 = svqdecw_n_s32 (x0, 16), ++ x0 = svqdecw (x0, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecw_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecw_s64.c +new file mode 100644 +index 000000000..b0841a8b2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecw_s64.c +@@ -0,0 +1,58 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qdecw_n_1_s64_tied: ++** sqdecw x0 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_n_1_s64_tied, int64_t, ++ x0 = svqdecw_n_s64 (x0, 1), ++ x0 = svqdecw (x0, 1)) ++ ++/* ++** qdecw_n_1_s64_untied: ++** mov x0, x1 ++** sqdecw x0 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_n_1_s64_untied, int64_t, ++ x0 = svqdecw_n_s64 (x1, 1), ++ x0 = svqdecw (x1, 1)) ++ ++/* ++** qdecw_n_2_s64: ++** sqdecw x0, all, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_n_2_s64, int64_t, ++ x0 = svqdecw_n_s64 (x0, 2), ++ x0 = svqdecw (x0, 2)) ++ ++/* ++** qdecw_n_7_s64: ++** sqdecw x0, all, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_n_7_s64, int64_t, ++ x0 = svqdecw_n_s64 (x0, 7), ++ x0 = svqdecw (x0, 7)) ++ ++/* ++** qdecw_n_15_s64: ++** sqdecw x0, all, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_n_15_s64, int64_t, ++ x0 = svqdecw_n_s64 (x0, 15), ++ x0 = svqdecw (x0, 15)) ++ ++/* ++** qdecw_n_16_s64: ++** sqdecw x0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_n_16_s64, int64_t, ++ x0 = svqdecw_n_s64 (x0, 16), ++ x0 = svqdecw (x0, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecw_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecw_u32.c +new file mode 100644 +index 000000000..22e8a8d69 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecw_u32.c +@@ -0,0 +1,113 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qdecw_1_u32_tied: ++** uqdecw z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_1_u32_tied, svuint32_t, ++ z0 = svqdecw_u32 (z0, 1), ++ z0 = svqdecw (z0, 1)) ++ ++/* ++** qdecw_1_u32_untied: ++** movprfx z0, z1 ++** uqdecw z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_1_u32_untied, svuint32_t, ++ z0 = svqdecw_u32 (z1, 1), ++ z0 = svqdecw (z1, 1)) ++ ++/* ++** qdecw_2_u32: ++** uqdecw z0\.s, all, mul #2 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_2_u32, svuint32_t, ++ z0 = svqdecw_u32 (z0, 2), ++ z0 = svqdecw (z0, 2)) ++ ++/* ++** qdecw_7_u32: ++** uqdecw z0\.s, all, mul #7 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_7_u32, svuint32_t, ++ z0 = svqdecw_u32 (z0, 7), ++ z0 = svqdecw (z0, 7)) ++ ++/* ++** qdecw_15_u32: ++** uqdecw z0\.s, all, mul #15 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_15_u32, svuint32_t, ++ z0 = svqdecw_u32 (z0, 15), ++ z0 = svqdecw (z0, 15)) ++ ++/* ++** qdecw_16_u32: ++** uqdecw z0\.s, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qdecw_16_u32, svuint32_t, ++ z0 = svqdecw_u32 (z0, 16), ++ z0 = svqdecw (z0, 16)) ++ ++/* ++** qdecw_n_1_u32_tied: ++** uqdecw w0 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_n_1_u32_tied, uint32_t, ++ x0 = svqdecw_n_u32 (x0, 1), ++ x0 = svqdecw (x0, 1)) ++ ++/* ++** qdecw_n_1_u32_untied: ++** mov w0, w1 ++** uqdecw w0 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_n_1_u32_untied, uint32_t, ++ x0 = svqdecw_n_u32 (x1, 1), ++ x0 = svqdecw (x1, 1)) ++ ++/* ++** qdecw_n_2_u32: ++** uqdecw w0, all, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_n_2_u32, uint32_t, ++ x0 = svqdecw_n_u32 (x0, 2), ++ x0 = svqdecw (x0, 2)) ++ ++/* ++** qdecw_n_7_u32: ++** uqdecw w0, all, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_n_7_u32, uint32_t, ++ x0 = svqdecw_n_u32 (x0, 7), ++ x0 = svqdecw (x0, 7)) ++ ++/* ++** qdecw_n_15_u32: ++** uqdecw w0, all, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_n_15_u32, uint32_t, ++ x0 = svqdecw_n_u32 (x0, 15), ++ x0 = svqdecw (x0, 15)) ++ ++/* ++** qdecw_n_16_u32: ++** uqdecw w0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_n_16_u32, uint32_t, ++ x0 = svqdecw_n_u32 (x0, 16), ++ x0 = svqdecw (x0, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecw_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecw_u64.c +new file mode 100644 +index 000000000..88c484e8b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecw_u64.c +@@ -0,0 +1,58 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qdecw_n_1_u64_tied: ++** uqdecw x0 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_n_1_u64_tied, uint64_t, ++ x0 = svqdecw_n_u64 (x0, 1), ++ x0 = svqdecw (x0, 1)) ++ ++/* ++** qdecw_n_1_u64_untied: ++** mov x0, x1 ++** uqdecw x0 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_n_1_u64_untied, uint64_t, ++ x0 = svqdecw_n_u64 (x1, 1), ++ x0 = svqdecw (x1, 1)) ++ ++/* ++** qdecw_n_2_u64: ++** uqdecw x0, all, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_n_2_u64, uint64_t, ++ x0 = svqdecw_n_u64 (x0, 2), ++ x0 = svqdecw (x0, 2)) ++ ++/* ++** qdecw_n_7_u64: ++** uqdecw x0, all, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_n_7_u64, uint64_t, ++ x0 = svqdecw_n_u64 (x0, 7), ++ x0 = svqdecw (x0, 7)) ++ ++/* ++** qdecw_n_15_u64: ++** uqdecw x0, all, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_n_15_u64, uint64_t, ++ x0 = svqdecw_n_u64 (x0, 15), ++ x0 = svqdecw (x0, 15)) ++ ++/* ++** qdecw_n_16_u64: ++** uqdecw x0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qdecw_n_16_u64, uint64_t, ++ x0 = svqdecw_n_u64 (x0, 16), ++ x0 = svqdecw (x0, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincb_pat_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincb_pat_s32.c +new file mode 100644 +index 000000000..16a8d8e9a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincb_pat_s32.c +@@ -0,0 +1,202 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qincb_pat_n_1_s32_tied: ++** sqincb x0, w0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_1_s32_tied, int32_t, ++ x0 = svqincb_pat_n_s32 (x0, SV_POW2, 1), ++ x0 = svqincb_pat (x0, SV_POW2, 1)) ++ ++/* ++** qincb_pat_n_1_s32_untied: ++** mov w0, w1 ++** sqincb x0, w0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_1_s32_untied, int32_t, ++ x0 = svqincb_pat_n_s32 (x1, SV_POW2, 1), ++ x0 = svqincb_pat (x1, SV_POW2, 1)) ++ ++/* ++** qincb_pat_n_2_s32: ++** sqincb x0, w0, pow2, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_2_s32, int32_t, ++ x0 = svqincb_pat_n_s32 (x0, SV_POW2, 2), ++ x0 = svqincb_pat (x0, SV_POW2, 2)) ++ ++/* ++** qincb_pat_n_7_s32: ++** sqincb x0, w0, pow2, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_7_s32, int32_t, ++ x0 = svqincb_pat_n_s32 (x0, SV_POW2, 7), ++ x0 = svqincb_pat (x0, SV_POW2, 7)) ++ ++/* ++** qincb_pat_n_15_s32: ++** sqincb x0, w0, pow2, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_15_s32, int32_t, ++ x0 = svqincb_pat_n_s32 (x0, SV_POW2, 15), ++ x0 = svqincb_pat (x0, SV_POW2, 15)) ++ ++/* ++** qincb_pat_n_16_s32: ++** sqincb x0, w0, pow2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_16_s32, int32_t, ++ x0 = svqincb_pat_n_s32 (x0, SV_POW2, 16), ++ x0 = svqincb_pat (x0, SV_POW2, 16)) ++ ++/* ++** qincb_pat_n_vl1_s32: ++** sqincb x0, w0, vl1, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_vl1_s32, int32_t, ++ x0 = svqincb_pat_n_s32 (x0, SV_VL1, 16), ++ x0 = svqincb_pat (x0, SV_VL1, 16)) ++ ++/* ++** qincb_pat_n_vl2_s32: ++** sqincb x0, w0, vl2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_vl2_s32, int32_t, ++ x0 = svqincb_pat_n_s32 (x0, SV_VL2, 16), ++ x0 = svqincb_pat (x0, SV_VL2, 16)) ++ ++/* ++** qincb_pat_n_vl3_s32: ++** sqincb x0, w0, vl3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_vl3_s32, int32_t, ++ x0 = svqincb_pat_n_s32 (x0, SV_VL3, 16), ++ x0 = svqincb_pat (x0, SV_VL3, 16)) ++ ++/* ++** qincb_pat_n_vl4_s32: ++** sqincb x0, w0, vl4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_vl4_s32, int32_t, ++ x0 = svqincb_pat_n_s32 (x0, SV_VL4, 16), ++ x0 = svqincb_pat (x0, SV_VL4, 16)) ++ ++/* ++** qincb_pat_n_vl5_s32: ++** sqincb x0, w0, vl5, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_vl5_s32, int32_t, ++ x0 = svqincb_pat_n_s32 (x0, SV_VL5, 16), ++ x0 = svqincb_pat (x0, SV_VL5, 16)) ++ ++/* ++** qincb_pat_n_vl6_s32: ++** sqincb x0, w0, vl6, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_vl6_s32, int32_t, ++ x0 = svqincb_pat_n_s32 (x0, SV_VL6, 16), ++ x0 = svqincb_pat (x0, SV_VL6, 16)) ++ ++/* ++** qincb_pat_n_vl7_s32: ++** sqincb x0, w0, vl7, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_vl7_s32, int32_t, ++ x0 = svqincb_pat_n_s32 (x0, SV_VL7, 16), ++ x0 = svqincb_pat (x0, SV_VL7, 16)) ++ ++/* ++** qincb_pat_n_vl8_s32: ++** sqincb x0, w0, vl8, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_vl8_s32, int32_t, ++ x0 = svqincb_pat_n_s32 (x0, SV_VL8, 16), ++ x0 = svqincb_pat (x0, SV_VL8, 16)) ++ ++/* ++** qincb_pat_n_vl16_s32: ++** sqincb x0, w0, vl16, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_vl16_s32, int32_t, ++ x0 = svqincb_pat_n_s32 (x0, SV_VL16, 16), ++ x0 = svqincb_pat (x0, SV_VL16, 16)) ++ ++/* ++** qincb_pat_n_vl32_s32: ++** sqincb x0, w0, vl32, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_vl32_s32, int32_t, ++ x0 = svqincb_pat_n_s32 (x0, SV_VL32, 16), ++ x0 = svqincb_pat (x0, SV_VL32, 16)) ++ ++/* ++** qincb_pat_n_vl64_s32: ++** sqincb x0, w0, vl64, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_vl64_s32, int32_t, ++ x0 = svqincb_pat_n_s32 (x0, SV_VL64, 16), ++ x0 = svqincb_pat (x0, SV_VL64, 16)) ++ ++/* ++** qincb_pat_n_vl128_s32: ++** sqincb x0, w0, vl128, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_vl128_s32, int32_t, ++ x0 = svqincb_pat_n_s32 (x0, SV_VL128, 16), ++ x0 = svqincb_pat (x0, SV_VL128, 16)) ++ ++/* ++** qincb_pat_n_vl256_s32: ++** sqincb x0, w0, vl256, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_vl256_s32, int32_t, ++ x0 = svqincb_pat_n_s32 (x0, SV_VL256, 16), ++ x0 = svqincb_pat (x0, SV_VL256, 16)) ++ ++/* ++** qincb_pat_n_mul4_s32: ++** sqincb x0, w0, mul4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_mul4_s32, int32_t, ++ x0 = svqincb_pat_n_s32 (x0, SV_MUL4, 16), ++ x0 = svqincb_pat (x0, SV_MUL4, 16)) ++ ++/* ++** qincb_pat_n_mul3_s32: ++** sqincb x0, w0, mul3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_mul3_s32, int32_t, ++ x0 = svqincb_pat_n_s32 (x0, SV_MUL3, 16), ++ x0 = svqincb_pat (x0, SV_MUL3, 16)) ++ ++/* ++** qincb_pat_n_all_s32: ++** sqincb x0, w0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_all_s32, int32_t, ++ x0 = svqincb_pat_n_s32 (x0, SV_ALL, 16), ++ x0 = svqincb_pat (x0, SV_ALL, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincb_pat_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincb_pat_s64.c +new file mode 100644 +index 000000000..79ed73ba7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincb_pat_s64.c +@@ -0,0 +1,202 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qincb_pat_n_1_s64_tied: ++** sqincb x0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_1_s64_tied, int64_t, ++ x0 = svqincb_pat_n_s64 (x0, SV_POW2, 1), ++ x0 = svqincb_pat (x0, SV_POW2, 1)) ++ ++/* ++** qincb_pat_n_1_s64_untied: ++** mov x0, x1 ++** sqincb x0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_1_s64_untied, int64_t, ++ x0 = svqincb_pat_n_s64 (x1, SV_POW2, 1), ++ x0 = svqincb_pat (x1, SV_POW2, 1)) ++ ++/* ++** qincb_pat_n_2_s64: ++** sqincb x0, pow2, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_2_s64, int64_t, ++ x0 = svqincb_pat_n_s64 (x0, SV_POW2, 2), ++ x0 = svqincb_pat (x0, SV_POW2, 2)) ++ ++/* ++** qincb_pat_n_7_s64: ++** sqincb x0, pow2, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_7_s64, int64_t, ++ x0 = svqincb_pat_n_s64 (x0, SV_POW2, 7), ++ x0 = svqincb_pat (x0, SV_POW2, 7)) ++ ++/* ++** qincb_pat_n_15_s64: ++** sqincb x0, pow2, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_15_s64, int64_t, ++ x0 = svqincb_pat_n_s64 (x0, SV_POW2, 15), ++ x0 = svqincb_pat (x0, SV_POW2, 15)) ++ ++/* ++** qincb_pat_n_16_s64: ++** sqincb x0, pow2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_16_s64, int64_t, ++ x0 = svqincb_pat_n_s64 (x0, SV_POW2, 16), ++ x0 = svqincb_pat (x0, SV_POW2, 16)) ++ ++/* ++** qincb_pat_n_vl1_s64: ++** sqincb x0, vl1, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_vl1_s64, int64_t, ++ x0 = svqincb_pat_n_s64 (x0, SV_VL1, 16), ++ x0 = svqincb_pat (x0, SV_VL1, 16)) ++ ++/* ++** qincb_pat_n_vl2_s64: ++** sqincb x0, vl2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_vl2_s64, int64_t, ++ x0 = svqincb_pat_n_s64 (x0, SV_VL2, 16), ++ x0 = svqincb_pat (x0, SV_VL2, 16)) ++ ++/* ++** qincb_pat_n_vl3_s64: ++** sqincb x0, vl3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_vl3_s64, int64_t, ++ x0 = svqincb_pat_n_s64 (x0, SV_VL3, 16), ++ x0 = svqincb_pat (x0, SV_VL3, 16)) ++ ++/* ++** qincb_pat_n_vl4_s64: ++** sqincb x0, vl4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_vl4_s64, int64_t, ++ x0 = svqincb_pat_n_s64 (x0, SV_VL4, 16), ++ x0 = svqincb_pat (x0, SV_VL4, 16)) ++ ++/* ++** qincb_pat_n_vl5_s64: ++** sqincb x0, vl5, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_vl5_s64, int64_t, ++ x0 = svqincb_pat_n_s64 (x0, SV_VL5, 16), ++ x0 = svqincb_pat (x0, SV_VL5, 16)) ++ ++/* ++** qincb_pat_n_vl6_s64: ++** sqincb x0, vl6, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_vl6_s64, int64_t, ++ x0 = svqincb_pat_n_s64 (x0, SV_VL6, 16), ++ x0 = svqincb_pat (x0, SV_VL6, 16)) ++ ++/* ++** qincb_pat_n_vl7_s64: ++** sqincb x0, vl7, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_vl7_s64, int64_t, ++ x0 = svqincb_pat_n_s64 (x0, SV_VL7, 16), ++ x0 = svqincb_pat (x0, SV_VL7, 16)) ++ ++/* ++** qincb_pat_n_vl8_s64: ++** sqincb x0, vl8, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_vl8_s64, int64_t, ++ x0 = svqincb_pat_n_s64 (x0, SV_VL8, 16), ++ x0 = svqincb_pat (x0, SV_VL8, 16)) ++ ++/* ++** qincb_pat_n_vl16_s64: ++** sqincb x0, vl16, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_vl16_s64, int64_t, ++ x0 = svqincb_pat_n_s64 (x0, SV_VL16, 16), ++ x0 = svqincb_pat (x0, SV_VL16, 16)) ++ ++/* ++** qincb_pat_n_vl32_s64: ++** sqincb x0, vl32, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_vl32_s64, int64_t, ++ x0 = svqincb_pat_n_s64 (x0, SV_VL32, 16), ++ x0 = svqincb_pat (x0, SV_VL32, 16)) ++ ++/* ++** qincb_pat_n_vl64_s64: ++** sqincb x0, vl64, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_vl64_s64, int64_t, ++ x0 = svqincb_pat_n_s64 (x0, SV_VL64, 16), ++ x0 = svqincb_pat (x0, SV_VL64, 16)) ++ ++/* ++** qincb_pat_n_vl128_s64: ++** sqincb x0, vl128, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_vl128_s64, int64_t, ++ x0 = svqincb_pat_n_s64 (x0, SV_VL128, 16), ++ x0 = svqincb_pat (x0, SV_VL128, 16)) ++ ++/* ++** qincb_pat_n_vl256_s64: ++** sqincb x0, vl256, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_vl256_s64, int64_t, ++ x0 = svqincb_pat_n_s64 (x0, SV_VL256, 16), ++ x0 = svqincb_pat (x0, SV_VL256, 16)) ++ ++/* ++** qincb_pat_n_mul4_s64: ++** sqincb x0, mul4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_mul4_s64, int64_t, ++ x0 = svqincb_pat_n_s64 (x0, SV_MUL4, 16), ++ x0 = svqincb_pat (x0, SV_MUL4, 16)) ++ ++/* ++** qincb_pat_n_mul3_s64: ++** sqincb x0, mul3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_mul3_s64, int64_t, ++ x0 = svqincb_pat_n_s64 (x0, SV_MUL3, 16), ++ x0 = svqincb_pat (x0, SV_MUL3, 16)) ++ ++/* ++** qincb_pat_n_all_s64: ++** sqincb x0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_all_s64, int64_t, ++ x0 = svqincb_pat_n_s64 (x0, SV_ALL, 16), ++ x0 = svqincb_pat (x0, SV_ALL, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincb_pat_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincb_pat_u32.c +new file mode 100644 +index 000000000..30e5f28ee +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincb_pat_u32.c +@@ -0,0 +1,202 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qincb_pat_n_1_u32_tied: ++** uqincb w0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_1_u32_tied, uint32_t, ++ x0 = svqincb_pat_n_u32 (x0, SV_POW2, 1), ++ x0 = svqincb_pat (x0, SV_POW2, 1)) ++ ++/* ++** qincb_pat_n_1_u32_untied: ++** mov w0, w1 ++** uqincb w0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_1_u32_untied, uint32_t, ++ x0 = svqincb_pat_n_u32 (x1, SV_POW2, 1), ++ x0 = svqincb_pat (x1, SV_POW2, 1)) ++ ++/* ++** qincb_pat_n_2_u32: ++** uqincb w0, pow2, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_2_u32, uint32_t, ++ x0 = svqincb_pat_n_u32 (x0, SV_POW2, 2), ++ x0 = svqincb_pat (x0, SV_POW2, 2)) ++ ++/* ++** qincb_pat_n_7_u32: ++** uqincb w0, pow2, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_7_u32, uint32_t, ++ x0 = svqincb_pat_n_u32 (x0, SV_POW2, 7), ++ x0 = svqincb_pat (x0, SV_POW2, 7)) ++ ++/* ++** qincb_pat_n_15_u32: ++** uqincb w0, pow2, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_15_u32, uint32_t, ++ x0 = svqincb_pat_n_u32 (x0, SV_POW2, 15), ++ x0 = svqincb_pat (x0, SV_POW2, 15)) ++ ++/* ++** qincb_pat_n_16_u32: ++** uqincb w0, pow2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_16_u32, uint32_t, ++ x0 = svqincb_pat_n_u32 (x0, SV_POW2, 16), ++ x0 = svqincb_pat (x0, SV_POW2, 16)) ++ ++/* ++** qincb_pat_n_vl1_u32: ++** uqincb w0, vl1, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_vl1_u32, uint32_t, ++ x0 = svqincb_pat_n_u32 (x0, SV_VL1, 16), ++ x0 = svqincb_pat (x0, SV_VL1, 16)) ++ ++/* ++** qincb_pat_n_vl2_u32: ++** uqincb w0, vl2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_vl2_u32, uint32_t, ++ x0 = svqincb_pat_n_u32 (x0, SV_VL2, 16), ++ x0 = svqincb_pat (x0, SV_VL2, 16)) ++ ++/* ++** qincb_pat_n_vl3_u32: ++** uqincb w0, vl3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_vl3_u32, uint32_t, ++ x0 = svqincb_pat_n_u32 (x0, SV_VL3, 16), ++ x0 = svqincb_pat (x0, SV_VL3, 16)) ++ ++/* ++** qincb_pat_n_vl4_u32: ++** uqincb w0, vl4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_vl4_u32, uint32_t, ++ x0 = svqincb_pat_n_u32 (x0, SV_VL4, 16), ++ x0 = svqincb_pat (x0, SV_VL4, 16)) ++ ++/* ++** qincb_pat_n_vl5_u32: ++** uqincb w0, vl5, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_vl5_u32, uint32_t, ++ x0 = svqincb_pat_n_u32 (x0, SV_VL5, 16), ++ x0 = svqincb_pat (x0, SV_VL5, 16)) ++ ++/* ++** qincb_pat_n_vl6_u32: ++** uqincb w0, vl6, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_vl6_u32, uint32_t, ++ x0 = svqincb_pat_n_u32 (x0, SV_VL6, 16), ++ x0 = svqincb_pat (x0, SV_VL6, 16)) ++ ++/* ++** qincb_pat_n_vl7_u32: ++** uqincb w0, vl7, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_vl7_u32, uint32_t, ++ x0 = svqincb_pat_n_u32 (x0, SV_VL7, 16), ++ x0 = svqincb_pat (x0, SV_VL7, 16)) ++ ++/* ++** qincb_pat_n_vl8_u32: ++** uqincb w0, vl8, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_vl8_u32, uint32_t, ++ x0 = svqincb_pat_n_u32 (x0, SV_VL8, 16), ++ x0 = svqincb_pat (x0, SV_VL8, 16)) ++ ++/* ++** qincb_pat_n_vl16_u32: ++** uqincb w0, vl16, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_vl16_u32, uint32_t, ++ x0 = svqincb_pat_n_u32 (x0, SV_VL16, 16), ++ x0 = svqincb_pat (x0, SV_VL16, 16)) ++ ++/* ++** qincb_pat_n_vl32_u32: ++** uqincb w0, vl32, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_vl32_u32, uint32_t, ++ x0 = svqincb_pat_n_u32 (x0, SV_VL32, 16), ++ x0 = svqincb_pat (x0, SV_VL32, 16)) ++ ++/* ++** qincb_pat_n_vl64_u32: ++** uqincb w0, vl64, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_vl64_u32, uint32_t, ++ x0 = svqincb_pat_n_u32 (x0, SV_VL64, 16), ++ x0 = svqincb_pat (x0, SV_VL64, 16)) ++ ++/* ++** qincb_pat_n_vl128_u32: ++** uqincb w0, vl128, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_vl128_u32, uint32_t, ++ x0 = svqincb_pat_n_u32 (x0, SV_VL128, 16), ++ x0 = svqincb_pat (x0, SV_VL128, 16)) ++ ++/* ++** qincb_pat_n_vl256_u32: ++** uqincb w0, vl256, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_vl256_u32, uint32_t, ++ x0 = svqincb_pat_n_u32 (x0, SV_VL256, 16), ++ x0 = svqincb_pat (x0, SV_VL256, 16)) ++ ++/* ++** qincb_pat_n_mul4_u32: ++** uqincb w0, mul4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_mul4_u32, uint32_t, ++ x0 = svqincb_pat_n_u32 (x0, SV_MUL4, 16), ++ x0 = svqincb_pat (x0, SV_MUL4, 16)) ++ ++/* ++** qincb_pat_n_mul3_u32: ++** uqincb w0, mul3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_mul3_u32, uint32_t, ++ x0 = svqincb_pat_n_u32 (x0, SV_MUL3, 16), ++ x0 = svqincb_pat (x0, SV_MUL3, 16)) ++ ++/* ++** qincb_pat_n_all_u32: ++** uqincb w0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_all_u32, uint32_t, ++ x0 = svqincb_pat_n_u32 (x0, SV_ALL, 16), ++ x0 = svqincb_pat (x0, SV_ALL, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincb_pat_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincb_pat_u64.c +new file mode 100644 +index 000000000..038b1edb6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincb_pat_u64.c +@@ -0,0 +1,202 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qincb_pat_n_1_u64_tied: ++** uqincb x0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_1_u64_tied, uint64_t, ++ x0 = svqincb_pat_n_u64 (x0, SV_POW2, 1), ++ x0 = svqincb_pat (x0, SV_POW2, 1)) ++ ++/* ++** qincb_pat_n_1_u64_untied: ++** mov x0, x1 ++** uqincb x0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_1_u64_untied, uint64_t, ++ x0 = svqincb_pat_n_u64 (x1, SV_POW2, 1), ++ x0 = svqincb_pat (x1, SV_POW2, 1)) ++ ++/* ++** qincb_pat_n_2_u64: ++** uqincb x0, pow2, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_2_u64, uint64_t, ++ x0 = svqincb_pat_n_u64 (x0, SV_POW2, 2), ++ x0 = svqincb_pat (x0, SV_POW2, 2)) ++ ++/* ++** qincb_pat_n_7_u64: ++** uqincb x0, pow2, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_7_u64, uint64_t, ++ x0 = svqincb_pat_n_u64 (x0, SV_POW2, 7), ++ x0 = svqincb_pat (x0, SV_POW2, 7)) ++ ++/* ++** qincb_pat_n_15_u64: ++** uqincb x0, pow2, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_15_u64, uint64_t, ++ x0 = svqincb_pat_n_u64 (x0, SV_POW2, 15), ++ x0 = svqincb_pat (x0, SV_POW2, 15)) ++ ++/* ++** qincb_pat_n_16_u64: ++** uqincb x0, pow2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_16_u64, uint64_t, ++ x0 = svqincb_pat_n_u64 (x0, SV_POW2, 16), ++ x0 = svqincb_pat (x0, SV_POW2, 16)) ++ ++/* ++** qincb_pat_n_vl1_u64: ++** uqincb x0, vl1, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_vl1_u64, uint64_t, ++ x0 = svqincb_pat_n_u64 (x0, SV_VL1, 16), ++ x0 = svqincb_pat (x0, SV_VL1, 16)) ++ ++/* ++** qincb_pat_n_vl2_u64: ++** uqincb x0, vl2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_vl2_u64, uint64_t, ++ x0 = svqincb_pat_n_u64 (x0, SV_VL2, 16), ++ x0 = svqincb_pat (x0, SV_VL2, 16)) ++ ++/* ++** qincb_pat_n_vl3_u64: ++** uqincb x0, vl3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_vl3_u64, uint64_t, ++ x0 = svqincb_pat_n_u64 (x0, SV_VL3, 16), ++ x0 = svqincb_pat (x0, SV_VL3, 16)) ++ ++/* ++** qincb_pat_n_vl4_u64: ++** uqincb x0, vl4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_vl4_u64, uint64_t, ++ x0 = svqincb_pat_n_u64 (x0, SV_VL4, 16), ++ x0 = svqincb_pat (x0, SV_VL4, 16)) ++ ++/* ++** qincb_pat_n_vl5_u64: ++** uqincb x0, vl5, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_vl5_u64, uint64_t, ++ x0 = svqincb_pat_n_u64 (x0, SV_VL5, 16), ++ x0 = svqincb_pat (x0, SV_VL5, 16)) ++ ++/* ++** qincb_pat_n_vl6_u64: ++** uqincb x0, vl6, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_vl6_u64, uint64_t, ++ x0 = svqincb_pat_n_u64 (x0, SV_VL6, 16), ++ x0 = svqincb_pat (x0, SV_VL6, 16)) ++ ++/* ++** qincb_pat_n_vl7_u64: ++** uqincb x0, vl7, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_vl7_u64, uint64_t, ++ x0 = svqincb_pat_n_u64 (x0, SV_VL7, 16), ++ x0 = svqincb_pat (x0, SV_VL7, 16)) ++ ++/* ++** qincb_pat_n_vl8_u64: ++** uqincb x0, vl8, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_vl8_u64, uint64_t, ++ x0 = svqincb_pat_n_u64 (x0, SV_VL8, 16), ++ x0 = svqincb_pat (x0, SV_VL8, 16)) ++ ++/* ++** qincb_pat_n_vl16_u64: ++** uqincb x0, vl16, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_vl16_u64, uint64_t, ++ x0 = svqincb_pat_n_u64 (x0, SV_VL16, 16), ++ x0 = svqincb_pat (x0, SV_VL16, 16)) ++ ++/* ++** qincb_pat_n_vl32_u64: ++** uqincb x0, vl32, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_vl32_u64, uint64_t, ++ x0 = svqincb_pat_n_u64 (x0, SV_VL32, 16), ++ x0 = svqincb_pat (x0, SV_VL32, 16)) ++ ++/* ++** qincb_pat_n_vl64_u64: ++** uqincb x0, vl64, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_vl64_u64, uint64_t, ++ x0 = svqincb_pat_n_u64 (x0, SV_VL64, 16), ++ x0 = svqincb_pat (x0, SV_VL64, 16)) ++ ++/* ++** qincb_pat_n_vl128_u64: ++** uqincb x0, vl128, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_vl128_u64, uint64_t, ++ x0 = svqincb_pat_n_u64 (x0, SV_VL128, 16), ++ x0 = svqincb_pat (x0, SV_VL128, 16)) ++ ++/* ++** qincb_pat_n_vl256_u64: ++** uqincb x0, vl256, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_vl256_u64, uint64_t, ++ x0 = svqincb_pat_n_u64 (x0, SV_VL256, 16), ++ x0 = svqincb_pat (x0, SV_VL256, 16)) ++ ++/* ++** qincb_pat_n_mul4_u64: ++** uqincb x0, mul4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_mul4_u64, uint64_t, ++ x0 = svqincb_pat_n_u64 (x0, SV_MUL4, 16), ++ x0 = svqincb_pat (x0, SV_MUL4, 16)) ++ ++/* ++** qincb_pat_n_mul3_u64: ++** uqincb x0, mul3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_mul3_u64, uint64_t, ++ x0 = svqincb_pat_n_u64 (x0, SV_MUL3, 16), ++ x0 = svqincb_pat (x0, SV_MUL3, 16)) ++ ++/* ++** qincb_pat_n_all_u64: ++** uqincb x0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_pat_n_all_u64, uint64_t, ++ x0 = svqincb_pat_n_u64 (x0, SV_ALL, 16), ++ x0 = svqincb_pat (x0, SV_ALL, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincb_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincb_s32.c +new file mode 100644 +index 000000000..8e74073de +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincb_s32.c +@@ -0,0 +1,58 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qincb_n_1_s32_tied: ++** sqincb x0, w0 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_n_1_s32_tied, int32_t, ++ x0 = svqincb_n_s32 (x0, 1), ++ x0 = svqincb (x0, 1)) ++ ++/* ++** qincb_n_1_s32_untied: ++** mov w0, w1 ++** sqincb x0, w0 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_n_1_s32_untied, int32_t, ++ x0 = svqincb_n_s32 (x1, 1), ++ x0 = svqincb (x1, 1)) ++ ++/* ++** qincb_n_2_s32: ++** sqincb x0, w0, all, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_n_2_s32, int32_t, ++ x0 = svqincb_n_s32 (x0, 2), ++ x0 = svqincb (x0, 2)) ++ ++/* ++** qincb_n_7_s32: ++** sqincb x0, w0, all, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_n_7_s32, int32_t, ++ x0 = svqincb_n_s32 (x0, 7), ++ x0 = svqincb (x0, 7)) ++ ++/* ++** qincb_n_15_s32: ++** sqincb x0, w0, all, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_n_15_s32, int32_t, ++ x0 = svqincb_n_s32 (x0, 15), ++ x0 = svqincb (x0, 15)) ++ ++/* ++** qincb_n_16_s32: ++** sqincb x0, w0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_n_16_s32, int32_t, ++ x0 = svqincb_n_s32 (x0, 16), ++ x0 = svqincb (x0, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincb_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincb_s64.c +new file mode 100644 +index 000000000..b064c1264 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincb_s64.c +@@ -0,0 +1,58 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qincb_n_1_s64_tied: ++** sqincb x0 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_n_1_s64_tied, int64_t, ++ x0 = svqincb_n_s64 (x0, 1), ++ x0 = svqincb (x0, 1)) ++ ++/* ++** qincb_n_1_s64_untied: ++** mov x0, x1 ++** sqincb x0 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_n_1_s64_untied, int64_t, ++ x0 = svqincb_n_s64 (x1, 1), ++ x0 = svqincb (x1, 1)) ++ ++/* ++** qincb_n_2_s64: ++** sqincb x0, all, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_n_2_s64, int64_t, ++ x0 = svqincb_n_s64 (x0, 2), ++ x0 = svqincb (x0, 2)) ++ ++/* ++** qincb_n_7_s64: ++** sqincb x0, all, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_n_7_s64, int64_t, ++ x0 = svqincb_n_s64 (x0, 7), ++ x0 = svqincb (x0, 7)) ++ ++/* ++** qincb_n_15_s64: ++** sqincb x0, all, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_n_15_s64, int64_t, ++ x0 = svqincb_n_s64 (x0, 15), ++ x0 = svqincb (x0, 15)) ++ ++/* ++** qincb_n_16_s64: ++** sqincb x0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_n_16_s64, int64_t, ++ x0 = svqincb_n_s64 (x0, 16), ++ x0 = svqincb (x0, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincb_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincb_u32.c +new file mode 100644 +index 000000000..df3add73e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincb_u32.c +@@ -0,0 +1,58 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qincb_n_1_u32_tied: ++** uqincb w0 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_n_1_u32_tied, uint32_t, ++ x0 = svqincb_n_u32 (x0, 1), ++ x0 = svqincb (x0, 1)) ++ ++/* ++** qincb_n_1_u32_untied: ++** mov w0, w1 ++** uqincb w0 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_n_1_u32_untied, uint32_t, ++ x0 = svqincb_n_u32 (x1, 1), ++ x0 = svqincb (x1, 1)) ++ ++/* ++** qincb_n_2_u32: ++** uqincb w0, all, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_n_2_u32, uint32_t, ++ x0 = svqincb_n_u32 (x0, 2), ++ x0 = svqincb (x0, 2)) ++ ++/* ++** qincb_n_7_u32: ++** uqincb w0, all, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_n_7_u32, uint32_t, ++ x0 = svqincb_n_u32 (x0, 7), ++ x0 = svqincb (x0, 7)) ++ ++/* ++** qincb_n_15_u32: ++** uqincb w0, all, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_n_15_u32, uint32_t, ++ x0 = svqincb_n_u32 (x0, 15), ++ x0 = svqincb (x0, 15)) ++ ++/* ++** qincb_n_16_u32: ++** uqincb w0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_n_16_u32, uint32_t, ++ x0 = svqincb_n_u32 (x0, 16), ++ x0 = svqincb (x0, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincb_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincb_u64.c +new file mode 100644 +index 000000000..d9a08c865 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincb_u64.c +@@ -0,0 +1,58 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qincb_n_1_u64_tied: ++** uqincb x0 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_n_1_u64_tied, uint64_t, ++ x0 = svqincb_n_u64 (x0, 1), ++ x0 = svqincb (x0, 1)) ++ ++/* ++** qincb_n_1_u64_untied: ++** mov x0, x1 ++** uqincb x0 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_n_1_u64_untied, uint64_t, ++ x0 = svqincb_n_u64 (x1, 1), ++ x0 = svqincb (x1, 1)) ++ ++/* ++** qincb_n_2_u64: ++** uqincb x0, all, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_n_2_u64, uint64_t, ++ x0 = svqincb_n_u64 (x0, 2), ++ x0 = svqincb (x0, 2)) ++ ++/* ++** qincb_n_7_u64: ++** uqincb x0, all, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_n_7_u64, uint64_t, ++ x0 = svqincb_n_u64 (x0, 7), ++ x0 = svqincb (x0, 7)) ++ ++/* ++** qincb_n_15_u64: ++** uqincb x0, all, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_n_15_u64, uint64_t, ++ x0 = svqincb_n_u64 (x0, 15), ++ x0 = svqincb (x0, 15)) ++ ++/* ++** qincb_n_16_u64: ++** uqincb x0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincb_n_16_u64, uint64_t, ++ x0 = svqincb_n_u64 (x0, 16), ++ x0 = svqincb (x0, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincd_pat_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincd_pat_s32.c +new file mode 100644 +index 000000000..061f88314 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincd_pat_s32.c +@@ -0,0 +1,202 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qincd_pat_n_1_s32_tied: ++** sqincd x0, w0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_1_s32_tied, int32_t, ++ x0 = svqincd_pat_n_s32 (x0, SV_POW2, 1), ++ x0 = svqincd_pat (x0, SV_POW2, 1)) ++ ++/* ++** qincd_pat_n_1_s32_untied: ++** mov w0, w1 ++** sqincd x0, w0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_1_s32_untied, int32_t, ++ x0 = svqincd_pat_n_s32 (x1, SV_POW2, 1), ++ x0 = svqincd_pat (x1, SV_POW2, 1)) ++ ++/* ++** qincd_pat_n_2_s32: ++** sqincd x0, w0, pow2, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_2_s32, int32_t, ++ x0 = svqincd_pat_n_s32 (x0, SV_POW2, 2), ++ x0 = svqincd_pat (x0, SV_POW2, 2)) ++ ++/* ++** qincd_pat_n_7_s32: ++** sqincd x0, w0, pow2, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_7_s32, int32_t, ++ x0 = svqincd_pat_n_s32 (x0, SV_POW2, 7), ++ x0 = svqincd_pat (x0, SV_POW2, 7)) ++ ++/* ++** qincd_pat_n_15_s32: ++** sqincd x0, w0, pow2, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_15_s32, int32_t, ++ x0 = svqincd_pat_n_s32 (x0, SV_POW2, 15), ++ x0 = svqincd_pat (x0, SV_POW2, 15)) ++ ++/* ++** qincd_pat_n_16_s32: ++** sqincd x0, w0, pow2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_16_s32, int32_t, ++ x0 = svqincd_pat_n_s32 (x0, SV_POW2, 16), ++ x0 = svqincd_pat (x0, SV_POW2, 16)) ++ ++/* ++** qincd_pat_n_vl1_s32: ++** sqincd x0, w0, vl1, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_vl1_s32, int32_t, ++ x0 = svqincd_pat_n_s32 (x0, SV_VL1, 16), ++ x0 = svqincd_pat (x0, SV_VL1, 16)) ++ ++/* ++** qincd_pat_n_vl2_s32: ++** sqincd x0, w0, vl2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_vl2_s32, int32_t, ++ x0 = svqincd_pat_n_s32 (x0, SV_VL2, 16), ++ x0 = svqincd_pat (x0, SV_VL2, 16)) ++ ++/* ++** qincd_pat_n_vl3_s32: ++** sqincd x0, w0, vl3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_vl3_s32, int32_t, ++ x0 = svqincd_pat_n_s32 (x0, SV_VL3, 16), ++ x0 = svqincd_pat (x0, SV_VL3, 16)) ++ ++/* ++** qincd_pat_n_vl4_s32: ++** sqincd x0, w0, vl4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_vl4_s32, int32_t, ++ x0 = svqincd_pat_n_s32 (x0, SV_VL4, 16), ++ x0 = svqincd_pat (x0, SV_VL4, 16)) ++ ++/* ++** qincd_pat_n_vl5_s32: ++** sqincd x0, w0, vl5, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_vl5_s32, int32_t, ++ x0 = svqincd_pat_n_s32 (x0, SV_VL5, 16), ++ x0 = svqincd_pat (x0, SV_VL5, 16)) ++ ++/* ++** qincd_pat_n_vl6_s32: ++** sqincd x0, w0, vl6, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_vl6_s32, int32_t, ++ x0 = svqincd_pat_n_s32 (x0, SV_VL6, 16), ++ x0 = svqincd_pat (x0, SV_VL6, 16)) ++ ++/* ++** qincd_pat_n_vl7_s32: ++** sqincd x0, w0, vl7, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_vl7_s32, int32_t, ++ x0 = svqincd_pat_n_s32 (x0, SV_VL7, 16), ++ x0 = svqincd_pat (x0, SV_VL7, 16)) ++ ++/* ++** qincd_pat_n_vl8_s32: ++** sqincd x0, w0, vl8, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_vl8_s32, int32_t, ++ x0 = svqincd_pat_n_s32 (x0, SV_VL8, 16), ++ x0 = svqincd_pat (x0, SV_VL8, 16)) ++ ++/* ++** qincd_pat_n_vl16_s32: ++** sqincd x0, w0, vl16, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_vl16_s32, int32_t, ++ x0 = svqincd_pat_n_s32 (x0, SV_VL16, 16), ++ x0 = svqincd_pat (x0, SV_VL16, 16)) ++ ++/* ++** qincd_pat_n_vl32_s32: ++** sqincd x0, w0, vl32, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_vl32_s32, int32_t, ++ x0 = svqincd_pat_n_s32 (x0, SV_VL32, 16), ++ x0 = svqincd_pat (x0, SV_VL32, 16)) ++ ++/* ++** qincd_pat_n_vl64_s32: ++** sqincd x0, w0, vl64, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_vl64_s32, int32_t, ++ x0 = svqincd_pat_n_s32 (x0, SV_VL64, 16), ++ x0 = svqincd_pat (x0, SV_VL64, 16)) ++ ++/* ++** qincd_pat_n_vl128_s32: ++** sqincd x0, w0, vl128, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_vl128_s32, int32_t, ++ x0 = svqincd_pat_n_s32 (x0, SV_VL128, 16), ++ x0 = svqincd_pat (x0, SV_VL128, 16)) ++ ++/* ++** qincd_pat_n_vl256_s32: ++** sqincd x0, w0, vl256, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_vl256_s32, int32_t, ++ x0 = svqincd_pat_n_s32 (x0, SV_VL256, 16), ++ x0 = svqincd_pat (x0, SV_VL256, 16)) ++ ++/* ++** qincd_pat_n_mul4_s32: ++** sqincd x0, w0, mul4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_mul4_s32, int32_t, ++ x0 = svqincd_pat_n_s32 (x0, SV_MUL4, 16), ++ x0 = svqincd_pat (x0, SV_MUL4, 16)) ++ ++/* ++** qincd_pat_n_mul3_s32: ++** sqincd x0, w0, mul3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_mul3_s32, int32_t, ++ x0 = svqincd_pat_n_s32 (x0, SV_MUL3, 16), ++ x0 = svqincd_pat (x0, SV_MUL3, 16)) ++ ++/* ++** qincd_pat_n_all_s32: ++** sqincd x0, w0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_all_s32, int32_t, ++ x0 = svqincd_pat_n_s32 (x0, SV_ALL, 16), ++ x0 = svqincd_pat (x0, SV_ALL, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincd_pat_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincd_pat_s64.c +new file mode 100644 +index 000000000..02b53e1bc +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincd_pat_s64.c +@@ -0,0 +1,401 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qincd_pat_1_s64_tied: ++** sqincd z0\.d, pow2 ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_pat_1_s64_tied, svint64_t, ++ z0 = svqincd_pat_s64 (z0, SV_POW2, 1), ++ z0 = svqincd_pat (z0, SV_POW2, 1)) ++ ++/* ++** qincd_pat_1_s64_untied: ++** movprfx z0, z1 ++** sqincd z0\.d, pow2 ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_pat_1_s64_untied, svint64_t, ++ z0 = svqincd_pat_s64 (z1, SV_POW2, 1), ++ z0 = svqincd_pat (z1, SV_POW2, 1)) ++ ++/* ++** qincd_pat_2_s64: ++** sqincd z0\.d, pow2, mul #2 ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_pat_2_s64, svint64_t, ++ z0 = svqincd_pat_s64 (z0, SV_POW2, 2), ++ z0 = svqincd_pat (z0, SV_POW2, 2)) ++ ++/* ++** qincd_pat_7_s64: ++** sqincd z0\.d, pow2, mul #7 ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_pat_7_s64, svint64_t, ++ z0 = svqincd_pat_s64 (z0, SV_POW2, 7), ++ z0 = svqincd_pat (z0, SV_POW2, 7)) ++ ++/* ++** qincd_pat_15_s64: ++** sqincd z0\.d, pow2, mul #15 ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_pat_15_s64, svint64_t, ++ z0 = svqincd_pat_s64 (z0, SV_POW2, 15), ++ z0 = svqincd_pat (z0, SV_POW2, 15)) ++ ++/* ++** qincd_pat_16_s64: ++** sqincd z0\.d, pow2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_pat_16_s64, svint64_t, ++ z0 = svqincd_pat_s64 (z0, SV_POW2, 16), ++ z0 = svqincd_pat (z0, SV_POW2, 16)) ++ ++/* ++** qincd_pat_vl1_s64: ++** sqincd z0\.d, vl1, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_pat_vl1_s64, svint64_t, ++ z0 = svqincd_pat_s64 (z0, SV_VL1, 16), ++ z0 = svqincd_pat (z0, SV_VL1, 16)) ++ ++/* ++** qincd_pat_vl2_s64: ++** sqincd z0\.d, vl2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_pat_vl2_s64, svint64_t, ++ z0 = svqincd_pat_s64 (z0, SV_VL2, 16), ++ z0 = svqincd_pat (z0, SV_VL2, 16)) ++ ++/* ++** qincd_pat_vl3_s64: ++** sqincd z0\.d, vl3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_pat_vl3_s64, svint64_t, ++ z0 = svqincd_pat_s64 (z0, SV_VL3, 16), ++ z0 = svqincd_pat (z0, SV_VL3, 16)) ++ ++/* ++** qincd_pat_vl4_s64: ++** sqincd z0\.d, vl4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_pat_vl4_s64, svint64_t, ++ z0 = svqincd_pat_s64 (z0, SV_VL4, 16), ++ z0 = svqincd_pat (z0, SV_VL4, 16)) ++ ++/* ++** qincd_pat_vl5_s64: ++** sqincd z0\.d, vl5, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_pat_vl5_s64, svint64_t, ++ z0 = svqincd_pat_s64 (z0, SV_VL5, 16), ++ z0 = svqincd_pat (z0, SV_VL5, 16)) ++ ++/* ++** qincd_pat_vl6_s64: ++** sqincd z0\.d, vl6, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_pat_vl6_s64, svint64_t, ++ z0 = svqincd_pat_s64 (z0, SV_VL6, 16), ++ z0 = svqincd_pat (z0, SV_VL6, 16)) ++ ++/* ++** qincd_pat_vl7_s64: ++** sqincd z0\.d, vl7, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_pat_vl7_s64, svint64_t, ++ z0 = svqincd_pat_s64 (z0, SV_VL7, 16), ++ z0 = svqincd_pat (z0, SV_VL7, 16)) ++ ++/* ++** qincd_pat_vl8_s64: ++** sqincd z0\.d, vl8, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_pat_vl8_s64, svint64_t, ++ z0 = svqincd_pat_s64 (z0, SV_VL8, 16), ++ z0 = svqincd_pat (z0, SV_VL8, 16)) ++ ++/* ++** qincd_pat_vl16_s64: ++** sqincd z0\.d, vl16, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_pat_vl16_s64, svint64_t, ++ z0 = svqincd_pat_s64 (z0, SV_VL16, 16), ++ z0 = svqincd_pat (z0, SV_VL16, 16)) ++ ++/* ++** qincd_pat_vl32_s64: ++** sqincd z0\.d, vl32, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_pat_vl32_s64, svint64_t, ++ z0 = svqincd_pat_s64 (z0, SV_VL32, 16), ++ z0 = svqincd_pat (z0, SV_VL32, 16)) ++ ++/* ++** qincd_pat_vl64_s64: ++** sqincd z0\.d, vl64, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_pat_vl64_s64, svint64_t, ++ z0 = svqincd_pat_s64 (z0, SV_VL64, 16), ++ z0 = svqincd_pat (z0, SV_VL64, 16)) ++ ++/* ++** qincd_pat_vl128_s64: ++** sqincd z0\.d, vl128, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_pat_vl128_s64, svint64_t, ++ z0 = svqincd_pat_s64 (z0, SV_VL128, 16), ++ z0 = svqincd_pat (z0, SV_VL128, 16)) ++ ++/* ++** qincd_pat_vl256_s64: ++** sqincd z0\.d, vl256, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_pat_vl256_s64, svint64_t, ++ z0 = svqincd_pat_s64 (z0, SV_VL256, 16), ++ z0 = svqincd_pat (z0, SV_VL256, 16)) ++ ++/* ++** qincd_pat_mul4_s64: ++** sqincd z0\.d, mul4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_pat_mul4_s64, svint64_t, ++ z0 = svqincd_pat_s64 (z0, SV_MUL4, 16), ++ z0 = svqincd_pat (z0, SV_MUL4, 16)) ++ ++/* ++** qincd_pat_mul3_s64: ++** sqincd z0\.d, mul3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_pat_mul3_s64, svint64_t, ++ z0 = svqincd_pat_s64 (z0, SV_MUL3, 16), ++ z0 = svqincd_pat (z0, SV_MUL3, 16)) ++ ++/* ++** qincd_pat_all_s64: ++** sqincd z0\.d, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_pat_all_s64, svint64_t, ++ z0 = svqincd_pat_s64 (z0, SV_ALL, 16), ++ z0 = svqincd_pat (z0, SV_ALL, 16)) ++ ++/* ++** qincd_pat_n_1_s64_tied: ++** sqincd x0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_1_s64_tied, int64_t, ++ x0 = svqincd_pat_n_s64 (x0, SV_POW2, 1), ++ x0 = svqincd_pat (x0, SV_POW2, 1)) ++ ++/* ++** qincd_pat_n_1_s64_untied: ++** mov x0, x1 ++** sqincd x0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_1_s64_untied, int64_t, ++ x0 = svqincd_pat_n_s64 (x1, SV_POW2, 1), ++ x0 = svqincd_pat (x1, SV_POW2, 1)) ++ ++/* ++** qincd_pat_n_2_s64: ++** sqincd x0, pow2, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_2_s64, int64_t, ++ x0 = svqincd_pat_n_s64 (x0, SV_POW2, 2), ++ x0 = svqincd_pat (x0, SV_POW2, 2)) ++ ++/* ++** qincd_pat_n_7_s64: ++** sqincd x0, pow2, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_7_s64, int64_t, ++ x0 = svqincd_pat_n_s64 (x0, SV_POW2, 7), ++ x0 = svqincd_pat (x0, SV_POW2, 7)) ++ ++/* ++** qincd_pat_n_15_s64: ++** sqincd x0, pow2, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_15_s64, int64_t, ++ x0 = svqincd_pat_n_s64 (x0, SV_POW2, 15), ++ x0 = svqincd_pat (x0, SV_POW2, 15)) ++ ++/* ++** qincd_pat_n_16_s64: ++** sqincd x0, pow2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_16_s64, int64_t, ++ x0 = svqincd_pat_n_s64 (x0, SV_POW2, 16), ++ x0 = svqincd_pat (x0, SV_POW2, 16)) ++ ++/* ++** qincd_pat_n_vl1_s64: ++** sqincd x0, vl1, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_vl1_s64, int64_t, ++ x0 = svqincd_pat_n_s64 (x0, SV_VL1, 16), ++ x0 = svqincd_pat (x0, SV_VL1, 16)) ++ ++/* ++** qincd_pat_n_vl2_s64: ++** sqincd x0, vl2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_vl2_s64, int64_t, ++ x0 = svqincd_pat_n_s64 (x0, SV_VL2, 16), ++ x0 = svqincd_pat (x0, SV_VL2, 16)) ++ ++/* ++** qincd_pat_n_vl3_s64: ++** sqincd x0, vl3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_vl3_s64, int64_t, ++ x0 = svqincd_pat_n_s64 (x0, SV_VL3, 16), ++ x0 = svqincd_pat (x0, SV_VL3, 16)) ++ ++/* ++** qincd_pat_n_vl4_s64: ++** sqincd x0, vl4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_vl4_s64, int64_t, ++ x0 = svqincd_pat_n_s64 (x0, SV_VL4, 16), ++ x0 = svqincd_pat (x0, SV_VL4, 16)) ++ ++/* ++** qincd_pat_n_vl5_s64: ++** sqincd x0, vl5, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_vl5_s64, int64_t, ++ x0 = svqincd_pat_n_s64 (x0, SV_VL5, 16), ++ x0 = svqincd_pat (x0, SV_VL5, 16)) ++ ++/* ++** qincd_pat_n_vl6_s64: ++** sqincd x0, vl6, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_vl6_s64, int64_t, ++ x0 = svqincd_pat_n_s64 (x0, SV_VL6, 16), ++ x0 = svqincd_pat (x0, SV_VL6, 16)) ++ ++/* ++** qincd_pat_n_vl7_s64: ++** sqincd x0, vl7, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_vl7_s64, int64_t, ++ x0 = svqincd_pat_n_s64 (x0, SV_VL7, 16), ++ x0 = svqincd_pat (x0, SV_VL7, 16)) ++ ++/* ++** qincd_pat_n_vl8_s64: ++** sqincd x0, vl8, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_vl8_s64, int64_t, ++ x0 = svqincd_pat_n_s64 (x0, SV_VL8, 16), ++ x0 = svqincd_pat (x0, SV_VL8, 16)) ++ ++/* ++** qincd_pat_n_vl16_s64: ++** sqincd x0, vl16, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_vl16_s64, int64_t, ++ x0 = svqincd_pat_n_s64 (x0, SV_VL16, 16), ++ x0 = svqincd_pat (x0, SV_VL16, 16)) ++ ++/* ++** qincd_pat_n_vl32_s64: ++** sqincd x0, vl32, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_vl32_s64, int64_t, ++ x0 = svqincd_pat_n_s64 (x0, SV_VL32, 16), ++ x0 = svqincd_pat (x0, SV_VL32, 16)) ++ ++/* ++** qincd_pat_n_vl64_s64: ++** sqincd x0, vl64, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_vl64_s64, int64_t, ++ x0 = svqincd_pat_n_s64 (x0, SV_VL64, 16), ++ x0 = svqincd_pat (x0, SV_VL64, 16)) ++ ++/* ++** qincd_pat_n_vl128_s64: ++** sqincd x0, vl128, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_vl128_s64, int64_t, ++ x0 = svqincd_pat_n_s64 (x0, SV_VL128, 16), ++ x0 = svqincd_pat (x0, SV_VL128, 16)) ++ ++/* ++** qincd_pat_n_vl256_s64: ++** sqincd x0, vl256, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_vl256_s64, int64_t, ++ x0 = svqincd_pat_n_s64 (x0, SV_VL256, 16), ++ x0 = svqincd_pat (x0, SV_VL256, 16)) ++ ++/* ++** qincd_pat_n_mul4_s64: ++** sqincd x0, mul4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_mul4_s64, int64_t, ++ x0 = svqincd_pat_n_s64 (x0, SV_MUL4, 16), ++ x0 = svqincd_pat (x0, SV_MUL4, 16)) ++ ++/* ++** qincd_pat_n_mul3_s64: ++** sqincd x0, mul3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_mul3_s64, int64_t, ++ x0 = svqincd_pat_n_s64 (x0, SV_MUL3, 16), ++ x0 = svqincd_pat (x0, SV_MUL3, 16)) ++ ++/* ++** qincd_pat_n_all_s64: ++** sqincd x0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_all_s64, int64_t, ++ x0 = svqincd_pat_n_s64 (x0, SV_ALL, 16), ++ x0 = svqincd_pat (x0, SV_ALL, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincd_pat_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincd_pat_u32.c +new file mode 100644 +index 000000000..0e3cbdb54 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincd_pat_u32.c +@@ -0,0 +1,202 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qincd_pat_n_1_u32_tied: ++** uqincd w0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_1_u32_tied, uint32_t, ++ x0 = svqincd_pat_n_u32 (x0, SV_POW2, 1), ++ x0 = svqincd_pat (x0, SV_POW2, 1)) ++ ++/* ++** qincd_pat_n_1_u32_untied: ++** mov w0, w1 ++** uqincd w0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_1_u32_untied, uint32_t, ++ x0 = svqincd_pat_n_u32 (x1, SV_POW2, 1), ++ x0 = svqincd_pat (x1, SV_POW2, 1)) ++ ++/* ++** qincd_pat_n_2_u32: ++** uqincd w0, pow2, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_2_u32, uint32_t, ++ x0 = svqincd_pat_n_u32 (x0, SV_POW2, 2), ++ x0 = svqincd_pat (x0, SV_POW2, 2)) ++ ++/* ++** qincd_pat_n_7_u32: ++** uqincd w0, pow2, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_7_u32, uint32_t, ++ x0 = svqincd_pat_n_u32 (x0, SV_POW2, 7), ++ x0 = svqincd_pat (x0, SV_POW2, 7)) ++ ++/* ++** qincd_pat_n_15_u32: ++** uqincd w0, pow2, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_15_u32, uint32_t, ++ x0 = svqincd_pat_n_u32 (x0, SV_POW2, 15), ++ x0 = svqincd_pat (x0, SV_POW2, 15)) ++ ++/* ++** qincd_pat_n_16_u32: ++** uqincd w0, pow2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_16_u32, uint32_t, ++ x0 = svqincd_pat_n_u32 (x0, SV_POW2, 16), ++ x0 = svqincd_pat (x0, SV_POW2, 16)) ++ ++/* ++** qincd_pat_n_vl1_u32: ++** uqincd w0, vl1, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_vl1_u32, uint32_t, ++ x0 = svqincd_pat_n_u32 (x0, SV_VL1, 16), ++ x0 = svqincd_pat (x0, SV_VL1, 16)) ++ ++/* ++** qincd_pat_n_vl2_u32: ++** uqincd w0, vl2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_vl2_u32, uint32_t, ++ x0 = svqincd_pat_n_u32 (x0, SV_VL2, 16), ++ x0 = svqincd_pat (x0, SV_VL2, 16)) ++ ++/* ++** qincd_pat_n_vl3_u32: ++** uqincd w0, vl3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_vl3_u32, uint32_t, ++ x0 = svqincd_pat_n_u32 (x0, SV_VL3, 16), ++ x0 = svqincd_pat (x0, SV_VL3, 16)) ++ ++/* ++** qincd_pat_n_vl4_u32: ++** uqincd w0, vl4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_vl4_u32, uint32_t, ++ x0 = svqincd_pat_n_u32 (x0, SV_VL4, 16), ++ x0 = svqincd_pat (x0, SV_VL4, 16)) ++ ++/* ++** qincd_pat_n_vl5_u32: ++** uqincd w0, vl5, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_vl5_u32, uint32_t, ++ x0 = svqincd_pat_n_u32 (x0, SV_VL5, 16), ++ x0 = svqincd_pat (x0, SV_VL5, 16)) ++ ++/* ++** qincd_pat_n_vl6_u32: ++** uqincd w0, vl6, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_vl6_u32, uint32_t, ++ x0 = svqincd_pat_n_u32 (x0, SV_VL6, 16), ++ x0 = svqincd_pat (x0, SV_VL6, 16)) ++ ++/* ++** qincd_pat_n_vl7_u32: ++** uqincd w0, vl7, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_vl7_u32, uint32_t, ++ x0 = svqincd_pat_n_u32 (x0, SV_VL7, 16), ++ x0 = svqincd_pat (x0, SV_VL7, 16)) ++ ++/* ++** qincd_pat_n_vl8_u32: ++** uqincd w0, vl8, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_vl8_u32, uint32_t, ++ x0 = svqincd_pat_n_u32 (x0, SV_VL8, 16), ++ x0 = svqincd_pat (x0, SV_VL8, 16)) ++ ++/* ++** qincd_pat_n_vl16_u32: ++** uqincd w0, vl16, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_vl16_u32, uint32_t, ++ x0 = svqincd_pat_n_u32 (x0, SV_VL16, 16), ++ x0 = svqincd_pat (x0, SV_VL16, 16)) ++ ++/* ++** qincd_pat_n_vl32_u32: ++** uqincd w0, vl32, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_vl32_u32, uint32_t, ++ x0 = svqincd_pat_n_u32 (x0, SV_VL32, 16), ++ x0 = svqincd_pat (x0, SV_VL32, 16)) ++ ++/* ++** qincd_pat_n_vl64_u32: ++** uqincd w0, vl64, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_vl64_u32, uint32_t, ++ x0 = svqincd_pat_n_u32 (x0, SV_VL64, 16), ++ x0 = svqincd_pat (x0, SV_VL64, 16)) ++ ++/* ++** qincd_pat_n_vl128_u32: ++** uqincd w0, vl128, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_vl128_u32, uint32_t, ++ x0 = svqincd_pat_n_u32 (x0, SV_VL128, 16), ++ x0 = svqincd_pat (x0, SV_VL128, 16)) ++ ++/* ++** qincd_pat_n_vl256_u32: ++** uqincd w0, vl256, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_vl256_u32, uint32_t, ++ x0 = svqincd_pat_n_u32 (x0, SV_VL256, 16), ++ x0 = svqincd_pat (x0, SV_VL256, 16)) ++ ++/* ++** qincd_pat_n_mul4_u32: ++** uqincd w0, mul4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_mul4_u32, uint32_t, ++ x0 = svqincd_pat_n_u32 (x0, SV_MUL4, 16), ++ x0 = svqincd_pat (x0, SV_MUL4, 16)) ++ ++/* ++** qincd_pat_n_mul3_u32: ++** uqincd w0, mul3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_mul3_u32, uint32_t, ++ x0 = svqincd_pat_n_u32 (x0, SV_MUL3, 16), ++ x0 = svqincd_pat (x0, SV_MUL3, 16)) ++ ++/* ++** qincd_pat_n_all_u32: ++** uqincd w0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_all_u32, uint32_t, ++ x0 = svqincd_pat_n_u32 (x0, SV_ALL, 16), ++ x0 = svqincd_pat (x0, SV_ALL, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincd_pat_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincd_pat_u64.c +new file mode 100644 +index 000000000..49dc350df +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincd_pat_u64.c +@@ -0,0 +1,401 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qincd_pat_1_u64_tied: ++** uqincd z0\.d, pow2 ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_pat_1_u64_tied, svuint64_t, ++ z0 = svqincd_pat_u64 (z0, SV_POW2, 1), ++ z0 = svqincd_pat (z0, SV_POW2, 1)) ++ ++/* ++** qincd_pat_1_u64_untied: ++** movprfx z0, z1 ++** uqincd z0\.d, pow2 ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_pat_1_u64_untied, svuint64_t, ++ z0 = svqincd_pat_u64 (z1, SV_POW2, 1), ++ z0 = svqincd_pat (z1, SV_POW2, 1)) ++ ++/* ++** qincd_pat_2_u64: ++** uqincd z0\.d, pow2, mul #2 ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_pat_2_u64, svuint64_t, ++ z0 = svqincd_pat_u64 (z0, SV_POW2, 2), ++ z0 = svqincd_pat (z0, SV_POW2, 2)) ++ ++/* ++** qincd_pat_7_u64: ++** uqincd z0\.d, pow2, mul #7 ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_pat_7_u64, svuint64_t, ++ z0 = svqincd_pat_u64 (z0, SV_POW2, 7), ++ z0 = svqincd_pat (z0, SV_POW2, 7)) ++ ++/* ++** qincd_pat_15_u64: ++** uqincd z0\.d, pow2, mul #15 ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_pat_15_u64, svuint64_t, ++ z0 = svqincd_pat_u64 (z0, SV_POW2, 15), ++ z0 = svqincd_pat (z0, SV_POW2, 15)) ++ ++/* ++** qincd_pat_16_u64: ++** uqincd z0\.d, pow2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_pat_16_u64, svuint64_t, ++ z0 = svqincd_pat_u64 (z0, SV_POW2, 16), ++ z0 = svqincd_pat (z0, SV_POW2, 16)) ++ ++/* ++** qincd_pat_vl1_u64: ++** uqincd z0\.d, vl1, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_pat_vl1_u64, svuint64_t, ++ z0 = svqincd_pat_u64 (z0, SV_VL1, 16), ++ z0 = svqincd_pat (z0, SV_VL1, 16)) ++ ++/* ++** qincd_pat_vl2_u64: ++** uqincd z0\.d, vl2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_pat_vl2_u64, svuint64_t, ++ z0 = svqincd_pat_u64 (z0, SV_VL2, 16), ++ z0 = svqincd_pat (z0, SV_VL2, 16)) ++ ++/* ++** qincd_pat_vl3_u64: ++** uqincd z0\.d, vl3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_pat_vl3_u64, svuint64_t, ++ z0 = svqincd_pat_u64 (z0, SV_VL3, 16), ++ z0 = svqincd_pat (z0, SV_VL3, 16)) ++ ++/* ++** qincd_pat_vl4_u64: ++** uqincd z0\.d, vl4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_pat_vl4_u64, svuint64_t, ++ z0 = svqincd_pat_u64 (z0, SV_VL4, 16), ++ z0 = svqincd_pat (z0, SV_VL4, 16)) ++ ++/* ++** qincd_pat_vl5_u64: ++** uqincd z0\.d, vl5, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_pat_vl5_u64, svuint64_t, ++ z0 = svqincd_pat_u64 (z0, SV_VL5, 16), ++ z0 = svqincd_pat (z0, SV_VL5, 16)) ++ ++/* ++** qincd_pat_vl6_u64: ++** uqincd z0\.d, vl6, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_pat_vl6_u64, svuint64_t, ++ z0 = svqincd_pat_u64 (z0, SV_VL6, 16), ++ z0 = svqincd_pat (z0, SV_VL6, 16)) ++ ++/* ++** qincd_pat_vl7_u64: ++** uqincd z0\.d, vl7, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_pat_vl7_u64, svuint64_t, ++ z0 = svqincd_pat_u64 (z0, SV_VL7, 16), ++ z0 = svqincd_pat (z0, SV_VL7, 16)) ++ ++/* ++** qincd_pat_vl8_u64: ++** uqincd z0\.d, vl8, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_pat_vl8_u64, svuint64_t, ++ z0 = svqincd_pat_u64 (z0, SV_VL8, 16), ++ z0 = svqincd_pat (z0, SV_VL8, 16)) ++ ++/* ++** qincd_pat_vl16_u64: ++** uqincd z0\.d, vl16, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_pat_vl16_u64, svuint64_t, ++ z0 = svqincd_pat_u64 (z0, SV_VL16, 16), ++ z0 = svqincd_pat (z0, SV_VL16, 16)) ++ ++/* ++** qincd_pat_vl32_u64: ++** uqincd z0\.d, vl32, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_pat_vl32_u64, svuint64_t, ++ z0 = svqincd_pat_u64 (z0, SV_VL32, 16), ++ z0 = svqincd_pat (z0, SV_VL32, 16)) ++ ++/* ++** qincd_pat_vl64_u64: ++** uqincd z0\.d, vl64, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_pat_vl64_u64, svuint64_t, ++ z0 = svqincd_pat_u64 (z0, SV_VL64, 16), ++ z0 = svqincd_pat (z0, SV_VL64, 16)) ++ ++/* ++** qincd_pat_vl128_u64: ++** uqincd z0\.d, vl128, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_pat_vl128_u64, svuint64_t, ++ z0 = svqincd_pat_u64 (z0, SV_VL128, 16), ++ z0 = svqincd_pat (z0, SV_VL128, 16)) ++ ++/* ++** qincd_pat_vl256_u64: ++** uqincd z0\.d, vl256, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_pat_vl256_u64, svuint64_t, ++ z0 = svqincd_pat_u64 (z0, SV_VL256, 16), ++ z0 = svqincd_pat (z0, SV_VL256, 16)) ++ ++/* ++** qincd_pat_mul4_u64: ++** uqincd z0\.d, mul4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_pat_mul4_u64, svuint64_t, ++ z0 = svqincd_pat_u64 (z0, SV_MUL4, 16), ++ z0 = svqincd_pat (z0, SV_MUL4, 16)) ++ ++/* ++** qincd_pat_mul3_u64: ++** uqincd z0\.d, mul3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_pat_mul3_u64, svuint64_t, ++ z0 = svqincd_pat_u64 (z0, SV_MUL3, 16), ++ z0 = svqincd_pat (z0, SV_MUL3, 16)) ++ ++/* ++** qincd_pat_all_u64: ++** uqincd z0\.d, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_pat_all_u64, svuint64_t, ++ z0 = svqincd_pat_u64 (z0, SV_ALL, 16), ++ z0 = svqincd_pat (z0, SV_ALL, 16)) ++ ++/* ++** qincd_pat_n_1_u64_tied: ++** uqincd x0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_1_u64_tied, uint64_t, ++ x0 = svqincd_pat_n_u64 (x0, SV_POW2, 1), ++ x0 = svqincd_pat (x0, SV_POW2, 1)) ++ ++/* ++** qincd_pat_n_1_u64_untied: ++** mov x0, x1 ++** uqincd x0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_1_u64_untied, uint64_t, ++ x0 = svqincd_pat_n_u64 (x1, SV_POW2, 1), ++ x0 = svqincd_pat (x1, SV_POW2, 1)) ++ ++/* ++** qincd_pat_n_2_u64: ++** uqincd x0, pow2, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_2_u64, uint64_t, ++ x0 = svqincd_pat_n_u64 (x0, SV_POW2, 2), ++ x0 = svqincd_pat (x0, SV_POW2, 2)) ++ ++/* ++** qincd_pat_n_7_u64: ++** uqincd x0, pow2, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_7_u64, uint64_t, ++ x0 = svqincd_pat_n_u64 (x0, SV_POW2, 7), ++ x0 = svqincd_pat (x0, SV_POW2, 7)) ++ ++/* ++** qincd_pat_n_15_u64: ++** uqincd x0, pow2, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_15_u64, uint64_t, ++ x0 = svqincd_pat_n_u64 (x0, SV_POW2, 15), ++ x0 = svqincd_pat (x0, SV_POW2, 15)) ++ ++/* ++** qincd_pat_n_16_u64: ++** uqincd x0, pow2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_16_u64, uint64_t, ++ x0 = svqincd_pat_n_u64 (x0, SV_POW2, 16), ++ x0 = svqincd_pat (x0, SV_POW2, 16)) ++ ++/* ++** qincd_pat_n_vl1_u64: ++** uqincd x0, vl1, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_vl1_u64, uint64_t, ++ x0 = svqincd_pat_n_u64 (x0, SV_VL1, 16), ++ x0 = svqincd_pat (x0, SV_VL1, 16)) ++ ++/* ++** qincd_pat_n_vl2_u64: ++** uqincd x0, vl2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_vl2_u64, uint64_t, ++ x0 = svqincd_pat_n_u64 (x0, SV_VL2, 16), ++ x0 = svqincd_pat (x0, SV_VL2, 16)) ++ ++/* ++** qincd_pat_n_vl3_u64: ++** uqincd x0, vl3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_vl3_u64, uint64_t, ++ x0 = svqincd_pat_n_u64 (x0, SV_VL3, 16), ++ x0 = svqincd_pat (x0, SV_VL3, 16)) ++ ++/* ++** qincd_pat_n_vl4_u64: ++** uqincd x0, vl4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_vl4_u64, uint64_t, ++ x0 = svqincd_pat_n_u64 (x0, SV_VL4, 16), ++ x0 = svqincd_pat (x0, SV_VL4, 16)) ++ ++/* ++** qincd_pat_n_vl5_u64: ++** uqincd x0, vl5, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_vl5_u64, uint64_t, ++ x0 = svqincd_pat_n_u64 (x0, SV_VL5, 16), ++ x0 = svqincd_pat (x0, SV_VL5, 16)) ++ ++/* ++** qincd_pat_n_vl6_u64: ++** uqincd x0, vl6, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_vl6_u64, uint64_t, ++ x0 = svqincd_pat_n_u64 (x0, SV_VL6, 16), ++ x0 = svqincd_pat (x0, SV_VL6, 16)) ++ ++/* ++** qincd_pat_n_vl7_u64: ++** uqincd x0, vl7, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_vl7_u64, uint64_t, ++ x0 = svqincd_pat_n_u64 (x0, SV_VL7, 16), ++ x0 = svqincd_pat (x0, SV_VL7, 16)) ++ ++/* ++** qincd_pat_n_vl8_u64: ++** uqincd x0, vl8, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_vl8_u64, uint64_t, ++ x0 = svqincd_pat_n_u64 (x0, SV_VL8, 16), ++ x0 = svqincd_pat (x0, SV_VL8, 16)) ++ ++/* ++** qincd_pat_n_vl16_u64: ++** uqincd x0, vl16, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_vl16_u64, uint64_t, ++ x0 = svqincd_pat_n_u64 (x0, SV_VL16, 16), ++ x0 = svqincd_pat (x0, SV_VL16, 16)) ++ ++/* ++** qincd_pat_n_vl32_u64: ++** uqincd x0, vl32, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_vl32_u64, uint64_t, ++ x0 = svqincd_pat_n_u64 (x0, SV_VL32, 16), ++ x0 = svqincd_pat (x0, SV_VL32, 16)) ++ ++/* ++** qincd_pat_n_vl64_u64: ++** uqincd x0, vl64, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_vl64_u64, uint64_t, ++ x0 = svqincd_pat_n_u64 (x0, SV_VL64, 16), ++ x0 = svqincd_pat (x0, SV_VL64, 16)) ++ ++/* ++** qincd_pat_n_vl128_u64: ++** uqincd x0, vl128, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_vl128_u64, uint64_t, ++ x0 = svqincd_pat_n_u64 (x0, SV_VL128, 16), ++ x0 = svqincd_pat (x0, SV_VL128, 16)) ++ ++/* ++** qincd_pat_n_vl256_u64: ++** uqincd x0, vl256, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_vl256_u64, uint64_t, ++ x0 = svqincd_pat_n_u64 (x0, SV_VL256, 16), ++ x0 = svqincd_pat (x0, SV_VL256, 16)) ++ ++/* ++** qincd_pat_n_mul4_u64: ++** uqincd x0, mul4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_mul4_u64, uint64_t, ++ x0 = svqincd_pat_n_u64 (x0, SV_MUL4, 16), ++ x0 = svqincd_pat (x0, SV_MUL4, 16)) ++ ++/* ++** qincd_pat_n_mul3_u64: ++** uqincd x0, mul3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_mul3_u64, uint64_t, ++ x0 = svqincd_pat_n_u64 (x0, SV_MUL3, 16), ++ x0 = svqincd_pat (x0, SV_MUL3, 16)) ++ ++/* ++** qincd_pat_n_all_u64: ++** uqincd x0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_pat_n_all_u64, uint64_t, ++ x0 = svqincd_pat_n_u64 (x0, SV_ALL, 16), ++ x0 = svqincd_pat (x0, SV_ALL, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincd_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincd_s32.c +new file mode 100644 +index 000000000..2fa0438a3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincd_s32.c +@@ -0,0 +1,58 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qincd_n_1_s32_tied: ++** sqincd x0, w0 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_n_1_s32_tied, int32_t, ++ x0 = svqincd_n_s32 (x0, 1), ++ x0 = svqincd (x0, 1)) ++ ++/* ++** qincd_n_1_s32_untied: ++** mov w0, w1 ++** sqincd x0, w0 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_n_1_s32_untied, int32_t, ++ x0 = svqincd_n_s32 (x1, 1), ++ x0 = svqincd (x1, 1)) ++ ++/* ++** qincd_n_2_s32: ++** sqincd x0, w0, all, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_n_2_s32, int32_t, ++ x0 = svqincd_n_s32 (x0, 2), ++ x0 = svqincd (x0, 2)) ++ ++/* ++** qincd_n_7_s32: ++** sqincd x0, w0, all, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_n_7_s32, int32_t, ++ x0 = svqincd_n_s32 (x0, 7), ++ x0 = svqincd (x0, 7)) ++ ++/* ++** qincd_n_15_s32: ++** sqincd x0, w0, all, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_n_15_s32, int32_t, ++ x0 = svqincd_n_s32 (x0, 15), ++ x0 = svqincd (x0, 15)) ++ ++/* ++** qincd_n_16_s32: ++** sqincd x0, w0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_n_16_s32, int32_t, ++ x0 = svqincd_n_s32 (x0, 16), ++ x0 = svqincd (x0, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincd_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincd_s64.c +new file mode 100644 +index 000000000..0920ac2ec +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincd_s64.c +@@ -0,0 +1,113 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qincd_1_s64_tied: ++** sqincd z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_1_s64_tied, svint64_t, ++ z0 = svqincd_s64 (z0, 1), ++ z0 = svqincd (z0, 1)) ++ ++/* ++** qincd_1_s64_untied: ++** movprfx z0, z1 ++** sqincd z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_1_s64_untied, svint64_t, ++ z0 = svqincd_s64 (z1, 1), ++ z0 = svqincd (z1, 1)) ++ ++/* ++** qincd_2_s64: ++** sqincd z0\.d, all, mul #2 ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_2_s64, svint64_t, ++ z0 = svqincd_s64 (z0, 2), ++ z0 = svqincd (z0, 2)) ++ ++/* ++** qincd_7_s64: ++** sqincd z0\.d, all, mul #7 ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_7_s64, svint64_t, ++ z0 = svqincd_s64 (z0, 7), ++ z0 = svqincd (z0, 7)) ++ ++/* ++** qincd_15_s64: ++** sqincd z0\.d, all, mul #15 ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_15_s64, svint64_t, ++ z0 = svqincd_s64 (z0, 15), ++ z0 = svqincd (z0, 15)) ++ ++/* ++** qincd_16_s64: ++** sqincd z0\.d, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_16_s64, svint64_t, ++ z0 = svqincd_s64 (z0, 16), ++ z0 = svqincd (z0, 16)) ++ ++/* ++** qincd_n_1_s64_tied: ++** sqincd x0 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_n_1_s64_tied, int64_t, ++ x0 = svqincd_n_s64 (x0, 1), ++ x0 = svqincd (x0, 1)) ++ ++/* ++** qincd_n_1_s64_untied: ++** mov x0, x1 ++** sqincd x0 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_n_1_s64_untied, int64_t, ++ x0 = svqincd_n_s64 (x1, 1), ++ x0 = svqincd (x1, 1)) ++ ++/* ++** qincd_n_2_s64: ++** sqincd x0, all, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_n_2_s64, int64_t, ++ x0 = svqincd_n_s64 (x0, 2), ++ x0 = svqincd (x0, 2)) ++ ++/* ++** qincd_n_7_s64: ++** sqincd x0, all, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_n_7_s64, int64_t, ++ x0 = svqincd_n_s64 (x0, 7), ++ x0 = svqincd (x0, 7)) ++ ++/* ++** qincd_n_15_s64: ++** sqincd x0, all, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_n_15_s64, int64_t, ++ x0 = svqincd_n_s64 (x0, 15), ++ x0 = svqincd (x0, 15)) ++ ++/* ++** qincd_n_16_s64: ++** sqincd x0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_n_16_s64, int64_t, ++ x0 = svqincd_n_s64 (x0, 16), ++ x0 = svqincd (x0, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincd_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincd_u32.c +new file mode 100644 +index 000000000..33dc12cb1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincd_u32.c +@@ -0,0 +1,58 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qincd_n_1_u32_tied: ++** uqincd w0 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_n_1_u32_tied, uint32_t, ++ x0 = svqincd_n_u32 (x0, 1), ++ x0 = svqincd (x0, 1)) ++ ++/* ++** qincd_n_1_u32_untied: ++** mov w0, w1 ++** uqincd w0 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_n_1_u32_untied, uint32_t, ++ x0 = svqincd_n_u32 (x1, 1), ++ x0 = svqincd (x1, 1)) ++ ++/* ++** qincd_n_2_u32: ++** uqincd w0, all, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_n_2_u32, uint32_t, ++ x0 = svqincd_n_u32 (x0, 2), ++ x0 = svqincd (x0, 2)) ++ ++/* ++** qincd_n_7_u32: ++** uqincd w0, all, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_n_7_u32, uint32_t, ++ x0 = svqincd_n_u32 (x0, 7), ++ x0 = svqincd (x0, 7)) ++ ++/* ++** qincd_n_15_u32: ++** uqincd w0, all, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_n_15_u32, uint32_t, ++ x0 = svqincd_n_u32 (x0, 15), ++ x0 = svqincd (x0, 15)) ++ ++/* ++** qincd_n_16_u32: ++** uqincd w0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_n_16_u32, uint32_t, ++ x0 = svqincd_n_u32 (x0, 16), ++ x0 = svqincd (x0, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincd_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincd_u64.c +new file mode 100644 +index 000000000..28c611a8f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincd_u64.c +@@ -0,0 +1,113 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qincd_1_u64_tied: ++** uqincd z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_1_u64_tied, svuint64_t, ++ z0 = svqincd_u64 (z0, 1), ++ z0 = svqincd (z0, 1)) ++ ++/* ++** qincd_1_u64_untied: ++** movprfx z0, z1 ++** uqincd z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_1_u64_untied, svuint64_t, ++ z0 = svqincd_u64 (z1, 1), ++ z0 = svqincd (z1, 1)) ++ ++/* ++** qincd_2_u64: ++** uqincd z0\.d, all, mul #2 ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_2_u64, svuint64_t, ++ z0 = svqincd_u64 (z0, 2), ++ z0 = svqincd (z0, 2)) ++ ++/* ++** qincd_7_u64: ++** uqincd z0\.d, all, mul #7 ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_7_u64, svuint64_t, ++ z0 = svqincd_u64 (z0, 7), ++ z0 = svqincd (z0, 7)) ++ ++/* ++** qincd_15_u64: ++** uqincd z0\.d, all, mul #15 ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_15_u64, svuint64_t, ++ z0 = svqincd_u64 (z0, 15), ++ z0 = svqincd (z0, 15)) ++ ++/* ++** qincd_16_u64: ++** uqincd z0\.d, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincd_16_u64, svuint64_t, ++ z0 = svqincd_u64 (z0, 16), ++ z0 = svqincd (z0, 16)) ++ ++/* ++** qincd_n_1_u64_tied: ++** uqincd x0 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_n_1_u64_tied, uint64_t, ++ x0 = svqincd_n_u64 (x0, 1), ++ x0 = svqincd (x0, 1)) ++ ++/* ++** qincd_n_1_u64_untied: ++** mov x0, x1 ++** uqincd x0 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_n_1_u64_untied, uint64_t, ++ x0 = svqincd_n_u64 (x1, 1), ++ x0 = svqincd (x1, 1)) ++ ++/* ++** qincd_n_2_u64: ++** uqincd x0, all, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_n_2_u64, uint64_t, ++ x0 = svqincd_n_u64 (x0, 2), ++ x0 = svqincd (x0, 2)) ++ ++/* ++** qincd_n_7_u64: ++** uqincd x0, all, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_n_7_u64, uint64_t, ++ x0 = svqincd_n_u64 (x0, 7), ++ x0 = svqincd (x0, 7)) ++ ++/* ++** qincd_n_15_u64: ++** uqincd x0, all, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_n_15_u64, uint64_t, ++ x0 = svqincd_n_u64 (x0, 15), ++ x0 = svqincd (x0, 15)) ++ ++/* ++** qincd_n_16_u64: ++** uqincd x0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincd_n_16_u64, uint64_t, ++ x0 = svqincd_n_u64 (x0, 16), ++ x0 = svqincd (x0, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_pat_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_pat_s16.c +new file mode 100644 +index 000000000..708d635c5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_pat_s16.c +@@ -0,0 +1,202 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qinch_pat_1_s16_tied: ++** sqinch z0\.h, pow2 ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_pat_1_s16_tied, svint16_t, ++ z0 = svqinch_pat_s16 (z0, SV_POW2, 1), ++ z0 = svqinch_pat (z0, SV_POW2, 1)) ++ ++/* ++** qinch_pat_1_s16_untied: ++** movprfx z0, z1 ++** sqinch z0\.h, pow2 ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_pat_1_s16_untied, svint16_t, ++ z0 = svqinch_pat_s16 (z1, SV_POW2, 1), ++ z0 = svqinch_pat (z1, SV_POW2, 1)) ++ ++/* ++** qinch_pat_2_s16: ++** sqinch z0\.h, pow2, mul #2 ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_pat_2_s16, svint16_t, ++ z0 = svqinch_pat_s16 (z0, SV_POW2, 2), ++ z0 = svqinch_pat (z0, SV_POW2, 2)) ++ ++/* ++** qinch_pat_7_s16: ++** sqinch z0\.h, pow2, mul #7 ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_pat_7_s16, svint16_t, ++ z0 = svqinch_pat_s16 (z0, SV_POW2, 7), ++ z0 = svqinch_pat (z0, SV_POW2, 7)) ++ ++/* ++** qinch_pat_15_s16: ++** sqinch z0\.h, pow2, mul #15 ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_pat_15_s16, svint16_t, ++ z0 = svqinch_pat_s16 (z0, SV_POW2, 15), ++ z0 = svqinch_pat (z0, SV_POW2, 15)) ++ ++/* ++** qinch_pat_16_s16: ++** sqinch z0\.h, pow2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_pat_16_s16, svint16_t, ++ z0 = svqinch_pat_s16 (z0, SV_POW2, 16), ++ z0 = svqinch_pat (z0, SV_POW2, 16)) ++ ++/* ++** qinch_pat_vl1_s16: ++** sqinch z0\.h, vl1, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_pat_vl1_s16, svint16_t, ++ z0 = svqinch_pat_s16 (z0, SV_VL1, 16), ++ z0 = svqinch_pat (z0, SV_VL1, 16)) ++ ++/* ++** qinch_pat_vl2_s16: ++** sqinch z0\.h, vl2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_pat_vl2_s16, svint16_t, ++ z0 = svqinch_pat_s16 (z0, SV_VL2, 16), ++ z0 = svqinch_pat (z0, SV_VL2, 16)) ++ ++/* ++** qinch_pat_vl3_s16: ++** sqinch z0\.h, vl3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_pat_vl3_s16, svint16_t, ++ z0 = svqinch_pat_s16 (z0, SV_VL3, 16), ++ z0 = svqinch_pat (z0, SV_VL3, 16)) ++ ++/* ++** qinch_pat_vl4_s16: ++** sqinch z0\.h, vl4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_pat_vl4_s16, svint16_t, ++ z0 = svqinch_pat_s16 (z0, SV_VL4, 16), ++ z0 = svqinch_pat (z0, SV_VL4, 16)) ++ ++/* ++** qinch_pat_vl5_s16: ++** sqinch z0\.h, vl5, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_pat_vl5_s16, svint16_t, ++ z0 = svqinch_pat_s16 (z0, SV_VL5, 16), ++ z0 = svqinch_pat (z0, SV_VL5, 16)) ++ ++/* ++** qinch_pat_vl6_s16: ++** sqinch z0\.h, vl6, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_pat_vl6_s16, svint16_t, ++ z0 = svqinch_pat_s16 (z0, SV_VL6, 16), ++ z0 = svqinch_pat (z0, SV_VL6, 16)) ++ ++/* ++** qinch_pat_vl7_s16: ++** sqinch z0\.h, vl7, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_pat_vl7_s16, svint16_t, ++ z0 = svqinch_pat_s16 (z0, SV_VL7, 16), ++ z0 = svqinch_pat (z0, SV_VL7, 16)) ++ ++/* ++** qinch_pat_vl8_s16: ++** sqinch z0\.h, vl8, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_pat_vl8_s16, svint16_t, ++ z0 = svqinch_pat_s16 (z0, SV_VL8, 16), ++ z0 = svqinch_pat (z0, SV_VL8, 16)) ++ ++/* ++** qinch_pat_vl16_s16: ++** sqinch z0\.h, vl16, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_pat_vl16_s16, svint16_t, ++ z0 = svqinch_pat_s16 (z0, SV_VL16, 16), ++ z0 = svqinch_pat (z0, SV_VL16, 16)) ++ ++/* ++** qinch_pat_vl32_s16: ++** sqinch z0\.h, vl32, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_pat_vl32_s16, svint16_t, ++ z0 = svqinch_pat_s16 (z0, SV_VL32, 16), ++ z0 = svqinch_pat (z0, SV_VL32, 16)) ++ ++/* ++** qinch_pat_vl64_s16: ++** sqinch z0\.h, vl64, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_pat_vl64_s16, svint16_t, ++ z0 = svqinch_pat_s16 (z0, SV_VL64, 16), ++ z0 = svqinch_pat (z0, SV_VL64, 16)) ++ ++/* ++** qinch_pat_vl128_s16: ++** sqinch z0\.h, vl128, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_pat_vl128_s16, svint16_t, ++ z0 = svqinch_pat_s16 (z0, SV_VL128, 16), ++ z0 = svqinch_pat (z0, SV_VL128, 16)) ++ ++/* ++** qinch_pat_vl256_s16: ++** sqinch z0\.h, vl256, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_pat_vl256_s16, svint16_t, ++ z0 = svqinch_pat_s16 (z0, SV_VL256, 16), ++ z0 = svqinch_pat (z0, SV_VL256, 16)) ++ ++/* ++** qinch_pat_mul4_s16: ++** sqinch z0\.h, mul4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_pat_mul4_s16, svint16_t, ++ z0 = svqinch_pat_s16 (z0, SV_MUL4, 16), ++ z0 = svqinch_pat (z0, SV_MUL4, 16)) ++ ++/* ++** qinch_pat_mul3_s16: ++** sqinch z0\.h, mul3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_pat_mul3_s16, svint16_t, ++ z0 = svqinch_pat_s16 (z0, SV_MUL3, 16), ++ z0 = svqinch_pat (z0, SV_MUL3, 16)) ++ ++/* ++** qinch_pat_all_s16: ++** sqinch z0\.h, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_pat_all_s16, svint16_t, ++ z0 = svqinch_pat_s16 (z0, SV_ALL, 16), ++ z0 = svqinch_pat (z0, SV_ALL, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_pat_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_pat_s32.c +new file mode 100644 +index 000000000..7c91c6202 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_pat_s32.c +@@ -0,0 +1,202 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qinch_pat_n_1_s32_tied: ++** sqinch x0, w0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_1_s32_tied, int32_t, ++ x0 = svqinch_pat_n_s32 (x0, SV_POW2, 1), ++ x0 = svqinch_pat (x0, SV_POW2, 1)) ++ ++/* ++** qinch_pat_n_1_s32_untied: ++** mov w0, w1 ++** sqinch x0, w0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_1_s32_untied, int32_t, ++ x0 = svqinch_pat_n_s32 (x1, SV_POW2, 1), ++ x0 = svqinch_pat (x1, SV_POW2, 1)) ++ ++/* ++** qinch_pat_n_2_s32: ++** sqinch x0, w0, pow2, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_2_s32, int32_t, ++ x0 = svqinch_pat_n_s32 (x0, SV_POW2, 2), ++ x0 = svqinch_pat (x0, SV_POW2, 2)) ++ ++/* ++** qinch_pat_n_7_s32: ++** sqinch x0, w0, pow2, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_7_s32, int32_t, ++ x0 = svqinch_pat_n_s32 (x0, SV_POW2, 7), ++ x0 = svqinch_pat (x0, SV_POW2, 7)) ++ ++/* ++** qinch_pat_n_15_s32: ++** sqinch x0, w0, pow2, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_15_s32, int32_t, ++ x0 = svqinch_pat_n_s32 (x0, SV_POW2, 15), ++ x0 = svqinch_pat (x0, SV_POW2, 15)) ++ ++/* ++** qinch_pat_n_16_s32: ++** sqinch x0, w0, pow2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_16_s32, int32_t, ++ x0 = svqinch_pat_n_s32 (x0, SV_POW2, 16), ++ x0 = svqinch_pat (x0, SV_POW2, 16)) ++ ++/* ++** qinch_pat_n_vl1_s32: ++** sqinch x0, w0, vl1, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_vl1_s32, int32_t, ++ x0 = svqinch_pat_n_s32 (x0, SV_VL1, 16), ++ x0 = svqinch_pat (x0, SV_VL1, 16)) ++ ++/* ++** qinch_pat_n_vl2_s32: ++** sqinch x0, w0, vl2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_vl2_s32, int32_t, ++ x0 = svqinch_pat_n_s32 (x0, SV_VL2, 16), ++ x0 = svqinch_pat (x0, SV_VL2, 16)) ++ ++/* ++** qinch_pat_n_vl3_s32: ++** sqinch x0, w0, vl3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_vl3_s32, int32_t, ++ x0 = svqinch_pat_n_s32 (x0, SV_VL3, 16), ++ x0 = svqinch_pat (x0, SV_VL3, 16)) ++ ++/* ++** qinch_pat_n_vl4_s32: ++** sqinch x0, w0, vl4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_vl4_s32, int32_t, ++ x0 = svqinch_pat_n_s32 (x0, SV_VL4, 16), ++ x0 = svqinch_pat (x0, SV_VL4, 16)) ++ ++/* ++** qinch_pat_n_vl5_s32: ++** sqinch x0, w0, vl5, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_vl5_s32, int32_t, ++ x0 = svqinch_pat_n_s32 (x0, SV_VL5, 16), ++ x0 = svqinch_pat (x0, SV_VL5, 16)) ++ ++/* ++** qinch_pat_n_vl6_s32: ++** sqinch x0, w0, vl6, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_vl6_s32, int32_t, ++ x0 = svqinch_pat_n_s32 (x0, SV_VL6, 16), ++ x0 = svqinch_pat (x0, SV_VL6, 16)) ++ ++/* ++** qinch_pat_n_vl7_s32: ++** sqinch x0, w0, vl7, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_vl7_s32, int32_t, ++ x0 = svqinch_pat_n_s32 (x0, SV_VL7, 16), ++ x0 = svqinch_pat (x0, SV_VL7, 16)) ++ ++/* ++** qinch_pat_n_vl8_s32: ++** sqinch x0, w0, vl8, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_vl8_s32, int32_t, ++ x0 = svqinch_pat_n_s32 (x0, SV_VL8, 16), ++ x0 = svqinch_pat (x0, SV_VL8, 16)) ++ ++/* ++** qinch_pat_n_vl16_s32: ++** sqinch x0, w0, vl16, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_vl16_s32, int32_t, ++ x0 = svqinch_pat_n_s32 (x0, SV_VL16, 16), ++ x0 = svqinch_pat (x0, SV_VL16, 16)) ++ ++/* ++** qinch_pat_n_vl32_s32: ++** sqinch x0, w0, vl32, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_vl32_s32, int32_t, ++ x0 = svqinch_pat_n_s32 (x0, SV_VL32, 16), ++ x0 = svqinch_pat (x0, SV_VL32, 16)) ++ ++/* ++** qinch_pat_n_vl64_s32: ++** sqinch x0, w0, vl64, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_vl64_s32, int32_t, ++ x0 = svqinch_pat_n_s32 (x0, SV_VL64, 16), ++ x0 = svqinch_pat (x0, SV_VL64, 16)) ++ ++/* ++** qinch_pat_n_vl128_s32: ++** sqinch x0, w0, vl128, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_vl128_s32, int32_t, ++ x0 = svqinch_pat_n_s32 (x0, SV_VL128, 16), ++ x0 = svqinch_pat (x0, SV_VL128, 16)) ++ ++/* ++** qinch_pat_n_vl256_s32: ++** sqinch x0, w0, vl256, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_vl256_s32, int32_t, ++ x0 = svqinch_pat_n_s32 (x0, SV_VL256, 16), ++ x0 = svqinch_pat (x0, SV_VL256, 16)) ++ ++/* ++** qinch_pat_n_mul4_s32: ++** sqinch x0, w0, mul4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_mul4_s32, int32_t, ++ x0 = svqinch_pat_n_s32 (x0, SV_MUL4, 16), ++ x0 = svqinch_pat (x0, SV_MUL4, 16)) ++ ++/* ++** qinch_pat_n_mul3_s32: ++** sqinch x0, w0, mul3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_mul3_s32, int32_t, ++ x0 = svqinch_pat_n_s32 (x0, SV_MUL3, 16), ++ x0 = svqinch_pat (x0, SV_MUL3, 16)) ++ ++/* ++** qinch_pat_n_all_s32: ++** sqinch x0, w0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_all_s32, int32_t, ++ x0 = svqinch_pat_n_s32 (x0, SV_ALL, 16), ++ x0 = svqinch_pat (x0, SV_ALL, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_pat_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_pat_s64.c +new file mode 100644 +index 000000000..2cde6482f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_pat_s64.c +@@ -0,0 +1,202 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qinch_pat_n_1_s64_tied: ++** sqinch x0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_1_s64_tied, int64_t, ++ x0 = svqinch_pat_n_s64 (x0, SV_POW2, 1), ++ x0 = svqinch_pat (x0, SV_POW2, 1)) ++ ++/* ++** qinch_pat_n_1_s64_untied: ++** mov x0, x1 ++** sqinch x0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_1_s64_untied, int64_t, ++ x0 = svqinch_pat_n_s64 (x1, SV_POW2, 1), ++ x0 = svqinch_pat (x1, SV_POW2, 1)) ++ ++/* ++** qinch_pat_n_2_s64: ++** sqinch x0, pow2, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_2_s64, int64_t, ++ x0 = svqinch_pat_n_s64 (x0, SV_POW2, 2), ++ x0 = svqinch_pat (x0, SV_POW2, 2)) ++ ++/* ++** qinch_pat_n_7_s64: ++** sqinch x0, pow2, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_7_s64, int64_t, ++ x0 = svqinch_pat_n_s64 (x0, SV_POW2, 7), ++ x0 = svqinch_pat (x0, SV_POW2, 7)) ++ ++/* ++** qinch_pat_n_15_s64: ++** sqinch x0, pow2, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_15_s64, int64_t, ++ x0 = svqinch_pat_n_s64 (x0, SV_POW2, 15), ++ x0 = svqinch_pat (x0, SV_POW2, 15)) ++ ++/* ++** qinch_pat_n_16_s64: ++** sqinch x0, pow2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_16_s64, int64_t, ++ x0 = svqinch_pat_n_s64 (x0, SV_POW2, 16), ++ x0 = svqinch_pat (x0, SV_POW2, 16)) ++ ++/* ++** qinch_pat_n_vl1_s64: ++** sqinch x0, vl1, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_vl1_s64, int64_t, ++ x0 = svqinch_pat_n_s64 (x0, SV_VL1, 16), ++ x0 = svqinch_pat (x0, SV_VL1, 16)) ++ ++/* ++** qinch_pat_n_vl2_s64: ++** sqinch x0, vl2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_vl2_s64, int64_t, ++ x0 = svqinch_pat_n_s64 (x0, SV_VL2, 16), ++ x0 = svqinch_pat (x0, SV_VL2, 16)) ++ ++/* ++** qinch_pat_n_vl3_s64: ++** sqinch x0, vl3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_vl3_s64, int64_t, ++ x0 = svqinch_pat_n_s64 (x0, SV_VL3, 16), ++ x0 = svqinch_pat (x0, SV_VL3, 16)) ++ ++/* ++** qinch_pat_n_vl4_s64: ++** sqinch x0, vl4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_vl4_s64, int64_t, ++ x0 = svqinch_pat_n_s64 (x0, SV_VL4, 16), ++ x0 = svqinch_pat (x0, SV_VL4, 16)) ++ ++/* ++** qinch_pat_n_vl5_s64: ++** sqinch x0, vl5, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_vl5_s64, int64_t, ++ x0 = svqinch_pat_n_s64 (x0, SV_VL5, 16), ++ x0 = svqinch_pat (x0, SV_VL5, 16)) ++ ++/* ++** qinch_pat_n_vl6_s64: ++** sqinch x0, vl6, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_vl6_s64, int64_t, ++ x0 = svqinch_pat_n_s64 (x0, SV_VL6, 16), ++ x0 = svqinch_pat (x0, SV_VL6, 16)) ++ ++/* ++** qinch_pat_n_vl7_s64: ++** sqinch x0, vl7, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_vl7_s64, int64_t, ++ x0 = svqinch_pat_n_s64 (x0, SV_VL7, 16), ++ x0 = svqinch_pat (x0, SV_VL7, 16)) ++ ++/* ++** qinch_pat_n_vl8_s64: ++** sqinch x0, vl8, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_vl8_s64, int64_t, ++ x0 = svqinch_pat_n_s64 (x0, SV_VL8, 16), ++ x0 = svqinch_pat (x0, SV_VL8, 16)) ++ ++/* ++** qinch_pat_n_vl16_s64: ++** sqinch x0, vl16, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_vl16_s64, int64_t, ++ x0 = svqinch_pat_n_s64 (x0, SV_VL16, 16), ++ x0 = svqinch_pat (x0, SV_VL16, 16)) ++ ++/* ++** qinch_pat_n_vl32_s64: ++** sqinch x0, vl32, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_vl32_s64, int64_t, ++ x0 = svqinch_pat_n_s64 (x0, SV_VL32, 16), ++ x0 = svqinch_pat (x0, SV_VL32, 16)) ++ ++/* ++** qinch_pat_n_vl64_s64: ++** sqinch x0, vl64, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_vl64_s64, int64_t, ++ x0 = svqinch_pat_n_s64 (x0, SV_VL64, 16), ++ x0 = svqinch_pat (x0, SV_VL64, 16)) ++ ++/* ++** qinch_pat_n_vl128_s64: ++** sqinch x0, vl128, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_vl128_s64, int64_t, ++ x0 = svqinch_pat_n_s64 (x0, SV_VL128, 16), ++ x0 = svqinch_pat (x0, SV_VL128, 16)) ++ ++/* ++** qinch_pat_n_vl256_s64: ++** sqinch x0, vl256, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_vl256_s64, int64_t, ++ x0 = svqinch_pat_n_s64 (x0, SV_VL256, 16), ++ x0 = svqinch_pat (x0, SV_VL256, 16)) ++ ++/* ++** qinch_pat_n_mul4_s64: ++** sqinch x0, mul4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_mul4_s64, int64_t, ++ x0 = svqinch_pat_n_s64 (x0, SV_MUL4, 16), ++ x0 = svqinch_pat (x0, SV_MUL4, 16)) ++ ++/* ++** qinch_pat_n_mul3_s64: ++** sqinch x0, mul3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_mul3_s64, int64_t, ++ x0 = svqinch_pat_n_s64 (x0, SV_MUL3, 16), ++ x0 = svqinch_pat (x0, SV_MUL3, 16)) ++ ++/* ++** qinch_pat_n_all_s64: ++** sqinch x0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_all_s64, int64_t, ++ x0 = svqinch_pat_n_s64 (x0, SV_ALL, 16), ++ x0 = svqinch_pat (x0, SV_ALL, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_pat_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_pat_u16.c +new file mode 100644 +index 000000000..5a1a846a0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_pat_u16.c +@@ -0,0 +1,202 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qinch_pat_1_u16_tied: ++** uqinch z0\.h, pow2 ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_pat_1_u16_tied, svuint16_t, ++ z0 = svqinch_pat_u16 (z0, SV_POW2, 1), ++ z0 = svqinch_pat (z0, SV_POW2, 1)) ++ ++/* ++** qinch_pat_1_u16_untied: ++** movprfx z0, z1 ++** uqinch z0\.h, pow2 ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_pat_1_u16_untied, svuint16_t, ++ z0 = svqinch_pat_u16 (z1, SV_POW2, 1), ++ z0 = svqinch_pat (z1, SV_POW2, 1)) ++ ++/* ++** qinch_pat_2_u16: ++** uqinch z0\.h, pow2, mul #2 ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_pat_2_u16, svuint16_t, ++ z0 = svqinch_pat_u16 (z0, SV_POW2, 2), ++ z0 = svqinch_pat (z0, SV_POW2, 2)) ++ ++/* ++** qinch_pat_7_u16: ++** uqinch z0\.h, pow2, mul #7 ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_pat_7_u16, svuint16_t, ++ z0 = svqinch_pat_u16 (z0, SV_POW2, 7), ++ z0 = svqinch_pat (z0, SV_POW2, 7)) ++ ++/* ++** qinch_pat_15_u16: ++** uqinch z0\.h, pow2, mul #15 ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_pat_15_u16, svuint16_t, ++ z0 = svqinch_pat_u16 (z0, SV_POW2, 15), ++ z0 = svqinch_pat (z0, SV_POW2, 15)) ++ ++/* ++** qinch_pat_16_u16: ++** uqinch z0\.h, pow2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_pat_16_u16, svuint16_t, ++ z0 = svqinch_pat_u16 (z0, SV_POW2, 16), ++ z0 = svqinch_pat (z0, SV_POW2, 16)) ++ ++/* ++** qinch_pat_vl1_u16: ++** uqinch z0\.h, vl1, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_pat_vl1_u16, svuint16_t, ++ z0 = svqinch_pat_u16 (z0, SV_VL1, 16), ++ z0 = svqinch_pat (z0, SV_VL1, 16)) ++ ++/* ++** qinch_pat_vl2_u16: ++** uqinch z0\.h, vl2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_pat_vl2_u16, svuint16_t, ++ z0 = svqinch_pat_u16 (z0, SV_VL2, 16), ++ z0 = svqinch_pat (z0, SV_VL2, 16)) ++ ++/* ++** qinch_pat_vl3_u16: ++** uqinch z0\.h, vl3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_pat_vl3_u16, svuint16_t, ++ z0 = svqinch_pat_u16 (z0, SV_VL3, 16), ++ z0 = svqinch_pat (z0, SV_VL3, 16)) ++ ++/* ++** qinch_pat_vl4_u16: ++** uqinch z0\.h, vl4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_pat_vl4_u16, svuint16_t, ++ z0 = svqinch_pat_u16 (z0, SV_VL4, 16), ++ z0 = svqinch_pat (z0, SV_VL4, 16)) ++ ++/* ++** qinch_pat_vl5_u16: ++** uqinch z0\.h, vl5, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_pat_vl5_u16, svuint16_t, ++ z0 = svqinch_pat_u16 (z0, SV_VL5, 16), ++ z0 = svqinch_pat (z0, SV_VL5, 16)) ++ ++/* ++** qinch_pat_vl6_u16: ++** uqinch z0\.h, vl6, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_pat_vl6_u16, svuint16_t, ++ z0 = svqinch_pat_u16 (z0, SV_VL6, 16), ++ z0 = svqinch_pat (z0, SV_VL6, 16)) ++ ++/* ++** qinch_pat_vl7_u16: ++** uqinch z0\.h, vl7, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_pat_vl7_u16, svuint16_t, ++ z0 = svqinch_pat_u16 (z0, SV_VL7, 16), ++ z0 = svqinch_pat (z0, SV_VL7, 16)) ++ ++/* ++** qinch_pat_vl8_u16: ++** uqinch z0\.h, vl8, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_pat_vl8_u16, svuint16_t, ++ z0 = svqinch_pat_u16 (z0, SV_VL8, 16), ++ z0 = svqinch_pat (z0, SV_VL8, 16)) ++ ++/* ++** qinch_pat_vl16_u16: ++** uqinch z0\.h, vl16, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_pat_vl16_u16, svuint16_t, ++ z0 = svqinch_pat_u16 (z0, SV_VL16, 16), ++ z0 = svqinch_pat (z0, SV_VL16, 16)) ++ ++/* ++** qinch_pat_vl32_u16: ++** uqinch z0\.h, vl32, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_pat_vl32_u16, svuint16_t, ++ z0 = svqinch_pat_u16 (z0, SV_VL32, 16), ++ z0 = svqinch_pat (z0, SV_VL32, 16)) ++ ++/* ++** qinch_pat_vl64_u16: ++** uqinch z0\.h, vl64, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_pat_vl64_u16, svuint16_t, ++ z0 = svqinch_pat_u16 (z0, SV_VL64, 16), ++ z0 = svqinch_pat (z0, SV_VL64, 16)) ++ ++/* ++** qinch_pat_vl128_u16: ++** uqinch z0\.h, vl128, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_pat_vl128_u16, svuint16_t, ++ z0 = svqinch_pat_u16 (z0, SV_VL128, 16), ++ z0 = svqinch_pat (z0, SV_VL128, 16)) ++ ++/* ++** qinch_pat_vl256_u16: ++** uqinch z0\.h, vl256, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_pat_vl256_u16, svuint16_t, ++ z0 = svqinch_pat_u16 (z0, SV_VL256, 16), ++ z0 = svqinch_pat (z0, SV_VL256, 16)) ++ ++/* ++** qinch_pat_mul4_u16: ++** uqinch z0\.h, mul4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_pat_mul4_u16, svuint16_t, ++ z0 = svqinch_pat_u16 (z0, SV_MUL4, 16), ++ z0 = svqinch_pat (z0, SV_MUL4, 16)) ++ ++/* ++** qinch_pat_mul3_u16: ++** uqinch z0\.h, mul3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_pat_mul3_u16, svuint16_t, ++ z0 = svqinch_pat_u16 (z0, SV_MUL3, 16), ++ z0 = svqinch_pat (z0, SV_MUL3, 16)) ++ ++/* ++** qinch_pat_all_u16: ++** uqinch z0\.h, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_pat_all_u16, svuint16_t, ++ z0 = svqinch_pat_u16 (z0, SV_ALL, 16), ++ z0 = svqinch_pat (z0, SV_ALL, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_pat_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_pat_u32.c +new file mode 100644 +index 000000000..8398c5689 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_pat_u32.c +@@ -0,0 +1,202 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qinch_pat_n_1_u32_tied: ++** uqinch w0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_1_u32_tied, uint32_t, ++ x0 = svqinch_pat_n_u32 (x0, SV_POW2, 1), ++ x0 = svqinch_pat (x0, SV_POW2, 1)) ++ ++/* ++** qinch_pat_n_1_u32_untied: ++** mov w0, w1 ++** uqinch w0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_1_u32_untied, uint32_t, ++ x0 = svqinch_pat_n_u32 (x1, SV_POW2, 1), ++ x0 = svqinch_pat (x1, SV_POW2, 1)) ++ ++/* ++** qinch_pat_n_2_u32: ++** uqinch w0, pow2, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_2_u32, uint32_t, ++ x0 = svqinch_pat_n_u32 (x0, SV_POW2, 2), ++ x0 = svqinch_pat (x0, SV_POW2, 2)) ++ ++/* ++** qinch_pat_n_7_u32: ++** uqinch w0, pow2, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_7_u32, uint32_t, ++ x0 = svqinch_pat_n_u32 (x0, SV_POW2, 7), ++ x0 = svqinch_pat (x0, SV_POW2, 7)) ++ ++/* ++** qinch_pat_n_15_u32: ++** uqinch w0, pow2, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_15_u32, uint32_t, ++ x0 = svqinch_pat_n_u32 (x0, SV_POW2, 15), ++ x0 = svqinch_pat (x0, SV_POW2, 15)) ++ ++/* ++** qinch_pat_n_16_u32: ++** uqinch w0, pow2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_16_u32, uint32_t, ++ x0 = svqinch_pat_n_u32 (x0, SV_POW2, 16), ++ x0 = svqinch_pat (x0, SV_POW2, 16)) ++ ++/* ++** qinch_pat_n_vl1_u32: ++** uqinch w0, vl1, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_vl1_u32, uint32_t, ++ x0 = svqinch_pat_n_u32 (x0, SV_VL1, 16), ++ x0 = svqinch_pat (x0, SV_VL1, 16)) ++ ++/* ++** qinch_pat_n_vl2_u32: ++** uqinch w0, vl2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_vl2_u32, uint32_t, ++ x0 = svqinch_pat_n_u32 (x0, SV_VL2, 16), ++ x0 = svqinch_pat (x0, SV_VL2, 16)) ++ ++/* ++** qinch_pat_n_vl3_u32: ++** uqinch w0, vl3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_vl3_u32, uint32_t, ++ x0 = svqinch_pat_n_u32 (x0, SV_VL3, 16), ++ x0 = svqinch_pat (x0, SV_VL3, 16)) ++ ++/* ++** qinch_pat_n_vl4_u32: ++** uqinch w0, vl4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_vl4_u32, uint32_t, ++ x0 = svqinch_pat_n_u32 (x0, SV_VL4, 16), ++ x0 = svqinch_pat (x0, SV_VL4, 16)) ++ ++/* ++** qinch_pat_n_vl5_u32: ++** uqinch w0, vl5, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_vl5_u32, uint32_t, ++ x0 = svqinch_pat_n_u32 (x0, SV_VL5, 16), ++ x0 = svqinch_pat (x0, SV_VL5, 16)) ++ ++/* ++** qinch_pat_n_vl6_u32: ++** uqinch w0, vl6, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_vl6_u32, uint32_t, ++ x0 = svqinch_pat_n_u32 (x0, SV_VL6, 16), ++ x0 = svqinch_pat (x0, SV_VL6, 16)) ++ ++/* ++** qinch_pat_n_vl7_u32: ++** uqinch w0, vl7, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_vl7_u32, uint32_t, ++ x0 = svqinch_pat_n_u32 (x0, SV_VL7, 16), ++ x0 = svqinch_pat (x0, SV_VL7, 16)) ++ ++/* ++** qinch_pat_n_vl8_u32: ++** uqinch w0, vl8, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_vl8_u32, uint32_t, ++ x0 = svqinch_pat_n_u32 (x0, SV_VL8, 16), ++ x0 = svqinch_pat (x0, SV_VL8, 16)) ++ ++/* ++** qinch_pat_n_vl16_u32: ++** uqinch w0, vl16, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_vl16_u32, uint32_t, ++ x0 = svqinch_pat_n_u32 (x0, SV_VL16, 16), ++ x0 = svqinch_pat (x0, SV_VL16, 16)) ++ ++/* ++** qinch_pat_n_vl32_u32: ++** uqinch w0, vl32, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_vl32_u32, uint32_t, ++ x0 = svqinch_pat_n_u32 (x0, SV_VL32, 16), ++ x0 = svqinch_pat (x0, SV_VL32, 16)) ++ ++/* ++** qinch_pat_n_vl64_u32: ++** uqinch w0, vl64, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_vl64_u32, uint32_t, ++ x0 = svqinch_pat_n_u32 (x0, SV_VL64, 16), ++ x0 = svqinch_pat (x0, SV_VL64, 16)) ++ ++/* ++** qinch_pat_n_vl128_u32: ++** uqinch w0, vl128, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_vl128_u32, uint32_t, ++ x0 = svqinch_pat_n_u32 (x0, SV_VL128, 16), ++ x0 = svqinch_pat (x0, SV_VL128, 16)) ++ ++/* ++** qinch_pat_n_vl256_u32: ++** uqinch w0, vl256, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_vl256_u32, uint32_t, ++ x0 = svqinch_pat_n_u32 (x0, SV_VL256, 16), ++ x0 = svqinch_pat (x0, SV_VL256, 16)) ++ ++/* ++** qinch_pat_n_mul4_u32: ++** uqinch w0, mul4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_mul4_u32, uint32_t, ++ x0 = svqinch_pat_n_u32 (x0, SV_MUL4, 16), ++ x0 = svqinch_pat (x0, SV_MUL4, 16)) ++ ++/* ++** qinch_pat_n_mul3_u32: ++** uqinch w0, mul3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_mul3_u32, uint32_t, ++ x0 = svqinch_pat_n_u32 (x0, SV_MUL3, 16), ++ x0 = svqinch_pat (x0, SV_MUL3, 16)) ++ ++/* ++** qinch_pat_n_all_u32: ++** uqinch w0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_all_u32, uint32_t, ++ x0 = svqinch_pat_n_u32 (x0, SV_ALL, 16), ++ x0 = svqinch_pat (x0, SV_ALL, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_pat_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_pat_u64.c +new file mode 100644 +index 000000000..51722646d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_pat_u64.c +@@ -0,0 +1,202 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qinch_pat_n_1_u64_tied: ++** uqinch x0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_1_u64_tied, uint64_t, ++ x0 = svqinch_pat_n_u64 (x0, SV_POW2, 1), ++ x0 = svqinch_pat (x0, SV_POW2, 1)) ++ ++/* ++** qinch_pat_n_1_u64_untied: ++** mov x0, x1 ++** uqinch x0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_1_u64_untied, uint64_t, ++ x0 = svqinch_pat_n_u64 (x1, SV_POW2, 1), ++ x0 = svqinch_pat (x1, SV_POW2, 1)) ++ ++/* ++** qinch_pat_n_2_u64: ++** uqinch x0, pow2, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_2_u64, uint64_t, ++ x0 = svqinch_pat_n_u64 (x0, SV_POW2, 2), ++ x0 = svqinch_pat (x0, SV_POW2, 2)) ++ ++/* ++** qinch_pat_n_7_u64: ++** uqinch x0, pow2, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_7_u64, uint64_t, ++ x0 = svqinch_pat_n_u64 (x0, SV_POW2, 7), ++ x0 = svqinch_pat (x0, SV_POW2, 7)) ++ ++/* ++** qinch_pat_n_15_u64: ++** uqinch x0, pow2, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_15_u64, uint64_t, ++ x0 = svqinch_pat_n_u64 (x0, SV_POW2, 15), ++ x0 = svqinch_pat (x0, SV_POW2, 15)) ++ ++/* ++** qinch_pat_n_16_u64: ++** uqinch x0, pow2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_16_u64, uint64_t, ++ x0 = svqinch_pat_n_u64 (x0, SV_POW2, 16), ++ x0 = svqinch_pat (x0, SV_POW2, 16)) ++ ++/* ++** qinch_pat_n_vl1_u64: ++** uqinch x0, vl1, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_vl1_u64, uint64_t, ++ x0 = svqinch_pat_n_u64 (x0, SV_VL1, 16), ++ x0 = svqinch_pat (x0, SV_VL1, 16)) ++ ++/* ++** qinch_pat_n_vl2_u64: ++** uqinch x0, vl2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_vl2_u64, uint64_t, ++ x0 = svqinch_pat_n_u64 (x0, SV_VL2, 16), ++ x0 = svqinch_pat (x0, SV_VL2, 16)) ++ ++/* ++** qinch_pat_n_vl3_u64: ++** uqinch x0, vl3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_vl3_u64, uint64_t, ++ x0 = svqinch_pat_n_u64 (x0, SV_VL3, 16), ++ x0 = svqinch_pat (x0, SV_VL3, 16)) ++ ++/* ++** qinch_pat_n_vl4_u64: ++** uqinch x0, vl4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_vl4_u64, uint64_t, ++ x0 = svqinch_pat_n_u64 (x0, SV_VL4, 16), ++ x0 = svqinch_pat (x0, SV_VL4, 16)) ++ ++/* ++** qinch_pat_n_vl5_u64: ++** uqinch x0, vl5, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_vl5_u64, uint64_t, ++ x0 = svqinch_pat_n_u64 (x0, SV_VL5, 16), ++ x0 = svqinch_pat (x0, SV_VL5, 16)) ++ ++/* ++** qinch_pat_n_vl6_u64: ++** uqinch x0, vl6, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_vl6_u64, uint64_t, ++ x0 = svqinch_pat_n_u64 (x0, SV_VL6, 16), ++ x0 = svqinch_pat (x0, SV_VL6, 16)) ++ ++/* ++** qinch_pat_n_vl7_u64: ++** uqinch x0, vl7, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_vl7_u64, uint64_t, ++ x0 = svqinch_pat_n_u64 (x0, SV_VL7, 16), ++ x0 = svqinch_pat (x0, SV_VL7, 16)) ++ ++/* ++** qinch_pat_n_vl8_u64: ++** uqinch x0, vl8, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_vl8_u64, uint64_t, ++ x0 = svqinch_pat_n_u64 (x0, SV_VL8, 16), ++ x0 = svqinch_pat (x0, SV_VL8, 16)) ++ ++/* ++** qinch_pat_n_vl16_u64: ++** uqinch x0, vl16, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_vl16_u64, uint64_t, ++ x0 = svqinch_pat_n_u64 (x0, SV_VL16, 16), ++ x0 = svqinch_pat (x0, SV_VL16, 16)) ++ ++/* ++** qinch_pat_n_vl32_u64: ++** uqinch x0, vl32, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_vl32_u64, uint64_t, ++ x0 = svqinch_pat_n_u64 (x0, SV_VL32, 16), ++ x0 = svqinch_pat (x0, SV_VL32, 16)) ++ ++/* ++** qinch_pat_n_vl64_u64: ++** uqinch x0, vl64, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_vl64_u64, uint64_t, ++ x0 = svqinch_pat_n_u64 (x0, SV_VL64, 16), ++ x0 = svqinch_pat (x0, SV_VL64, 16)) ++ ++/* ++** qinch_pat_n_vl128_u64: ++** uqinch x0, vl128, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_vl128_u64, uint64_t, ++ x0 = svqinch_pat_n_u64 (x0, SV_VL128, 16), ++ x0 = svqinch_pat (x0, SV_VL128, 16)) ++ ++/* ++** qinch_pat_n_vl256_u64: ++** uqinch x0, vl256, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_vl256_u64, uint64_t, ++ x0 = svqinch_pat_n_u64 (x0, SV_VL256, 16), ++ x0 = svqinch_pat (x0, SV_VL256, 16)) ++ ++/* ++** qinch_pat_n_mul4_u64: ++** uqinch x0, mul4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_mul4_u64, uint64_t, ++ x0 = svqinch_pat_n_u64 (x0, SV_MUL4, 16), ++ x0 = svqinch_pat (x0, SV_MUL4, 16)) ++ ++/* ++** qinch_pat_n_mul3_u64: ++** uqinch x0, mul3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_mul3_u64, uint64_t, ++ x0 = svqinch_pat_n_u64 (x0, SV_MUL3, 16), ++ x0 = svqinch_pat (x0, SV_MUL3, 16)) ++ ++/* ++** qinch_pat_n_all_u64: ++** uqinch x0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_pat_n_all_u64, uint64_t, ++ x0 = svqinch_pat_n_u64 (x0, SV_ALL, 16), ++ x0 = svqinch_pat (x0, SV_ALL, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_s16.c +new file mode 100644 +index 000000000..1f460db8e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_s16.c +@@ -0,0 +1,58 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qinch_1_s16_tied: ++** sqinch z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_1_s16_tied, svint16_t, ++ z0 = svqinch_s16 (z0, 1), ++ z0 = svqinch (z0, 1)) ++ ++/* ++** qinch_1_s16_untied: ++** movprfx z0, z1 ++** sqinch z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_1_s16_untied, svint16_t, ++ z0 = svqinch_s16 (z1, 1), ++ z0 = svqinch (z1, 1)) ++ ++/* ++** qinch_2_s16: ++** sqinch z0\.h, all, mul #2 ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_2_s16, svint16_t, ++ z0 = svqinch_s16 (z0, 2), ++ z0 = svqinch (z0, 2)) ++ ++/* ++** qinch_7_s16: ++** sqinch z0\.h, all, mul #7 ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_7_s16, svint16_t, ++ z0 = svqinch_s16 (z0, 7), ++ z0 = svqinch (z0, 7)) ++ ++/* ++** qinch_15_s16: ++** sqinch z0\.h, all, mul #15 ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_15_s16, svint16_t, ++ z0 = svqinch_s16 (z0, 15), ++ z0 = svqinch (z0, 15)) ++ ++/* ++** qinch_16_s16: ++** sqinch z0\.h, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_16_s16, svint16_t, ++ z0 = svqinch_s16 (z0, 16), ++ z0 = svqinch (z0, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_s32.c +new file mode 100644 +index 000000000..a7b1aac80 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_s32.c +@@ -0,0 +1,58 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qinch_n_1_s32_tied: ++** sqinch x0, w0 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_n_1_s32_tied, int32_t, ++ x0 = svqinch_n_s32 (x0, 1), ++ x0 = svqinch (x0, 1)) ++ ++/* ++** qinch_n_1_s32_untied: ++** mov w0, w1 ++** sqinch x0, w0 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_n_1_s32_untied, int32_t, ++ x0 = svqinch_n_s32 (x1, 1), ++ x0 = svqinch (x1, 1)) ++ ++/* ++** qinch_n_2_s32: ++** sqinch x0, w0, all, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_n_2_s32, int32_t, ++ x0 = svqinch_n_s32 (x0, 2), ++ x0 = svqinch (x0, 2)) ++ ++/* ++** qinch_n_7_s32: ++** sqinch x0, w0, all, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_n_7_s32, int32_t, ++ x0 = svqinch_n_s32 (x0, 7), ++ x0 = svqinch (x0, 7)) ++ ++/* ++** qinch_n_15_s32: ++** sqinch x0, w0, all, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_n_15_s32, int32_t, ++ x0 = svqinch_n_s32 (x0, 15), ++ x0 = svqinch (x0, 15)) ++ ++/* ++** qinch_n_16_s32: ++** sqinch x0, w0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_n_16_s32, int32_t, ++ x0 = svqinch_n_s32 (x0, 16), ++ x0 = svqinch (x0, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_s64.c +new file mode 100644 +index 000000000..74ac6a3df +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_s64.c +@@ -0,0 +1,58 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qinch_n_1_s64_tied: ++** sqinch x0 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_n_1_s64_tied, int64_t, ++ x0 = svqinch_n_s64 (x0, 1), ++ x0 = svqinch (x0, 1)) ++ ++/* ++** qinch_n_1_s64_untied: ++** mov x0, x1 ++** sqinch x0 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_n_1_s64_untied, int64_t, ++ x0 = svqinch_n_s64 (x1, 1), ++ x0 = svqinch (x1, 1)) ++ ++/* ++** qinch_n_2_s64: ++** sqinch x0, all, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_n_2_s64, int64_t, ++ x0 = svqinch_n_s64 (x0, 2), ++ x0 = svqinch (x0, 2)) ++ ++/* ++** qinch_n_7_s64: ++** sqinch x0, all, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_n_7_s64, int64_t, ++ x0 = svqinch_n_s64 (x0, 7), ++ x0 = svqinch (x0, 7)) ++ ++/* ++** qinch_n_15_s64: ++** sqinch x0, all, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_n_15_s64, int64_t, ++ x0 = svqinch_n_s64 (x0, 15), ++ x0 = svqinch (x0, 15)) ++ ++/* ++** qinch_n_16_s64: ++** sqinch x0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_n_16_s64, int64_t, ++ x0 = svqinch_n_s64 (x0, 16), ++ x0 = svqinch (x0, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_u16.c +new file mode 100644 +index 000000000..aa9905897 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_u16.c +@@ -0,0 +1,58 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qinch_1_u16_tied: ++** uqinch z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_1_u16_tied, svuint16_t, ++ z0 = svqinch_u16 (z0, 1), ++ z0 = svqinch (z0, 1)) ++ ++/* ++** qinch_1_u16_untied: ++** movprfx z0, z1 ++** uqinch z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_1_u16_untied, svuint16_t, ++ z0 = svqinch_u16 (z1, 1), ++ z0 = svqinch (z1, 1)) ++ ++/* ++** qinch_2_u16: ++** uqinch z0\.h, all, mul #2 ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_2_u16, svuint16_t, ++ z0 = svqinch_u16 (z0, 2), ++ z0 = svqinch (z0, 2)) ++ ++/* ++** qinch_7_u16: ++** uqinch z0\.h, all, mul #7 ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_7_u16, svuint16_t, ++ z0 = svqinch_u16 (z0, 7), ++ z0 = svqinch (z0, 7)) ++ ++/* ++** qinch_15_u16: ++** uqinch z0\.h, all, mul #15 ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_15_u16, svuint16_t, ++ z0 = svqinch_u16 (z0, 15), ++ z0 = svqinch (z0, 15)) ++ ++/* ++** qinch_16_u16: ++** uqinch z0\.h, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qinch_16_u16, svuint16_t, ++ z0 = svqinch_u16 (z0, 16), ++ z0 = svqinch (z0, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_u32.c +new file mode 100644 +index 000000000..396f95b2a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_u32.c +@@ -0,0 +1,58 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qinch_n_1_u32_tied: ++** uqinch w0 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_n_1_u32_tied, uint32_t, ++ x0 = svqinch_n_u32 (x0, 1), ++ x0 = svqinch (x0, 1)) ++ ++/* ++** qinch_n_1_u32_untied: ++** mov w0, w1 ++** uqinch w0 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_n_1_u32_untied, uint32_t, ++ x0 = svqinch_n_u32 (x1, 1), ++ x0 = svqinch (x1, 1)) ++ ++/* ++** qinch_n_2_u32: ++** uqinch w0, all, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_n_2_u32, uint32_t, ++ x0 = svqinch_n_u32 (x0, 2), ++ x0 = svqinch (x0, 2)) ++ ++/* ++** qinch_n_7_u32: ++** uqinch w0, all, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_n_7_u32, uint32_t, ++ x0 = svqinch_n_u32 (x0, 7), ++ x0 = svqinch (x0, 7)) ++ ++/* ++** qinch_n_15_u32: ++** uqinch w0, all, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_n_15_u32, uint32_t, ++ x0 = svqinch_n_u32 (x0, 15), ++ x0 = svqinch (x0, 15)) ++ ++/* ++** qinch_n_16_u32: ++** uqinch w0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_n_16_u32, uint32_t, ++ x0 = svqinch_n_u32 (x0, 16), ++ x0 = svqinch (x0, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_u64.c +new file mode 100644 +index 000000000..5a9231722 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_u64.c +@@ -0,0 +1,58 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qinch_n_1_u64_tied: ++** uqinch x0 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_n_1_u64_tied, uint64_t, ++ x0 = svqinch_n_u64 (x0, 1), ++ x0 = svqinch (x0, 1)) ++ ++/* ++** qinch_n_1_u64_untied: ++** mov x0, x1 ++** uqinch x0 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_n_1_u64_untied, uint64_t, ++ x0 = svqinch_n_u64 (x1, 1), ++ x0 = svqinch (x1, 1)) ++ ++/* ++** qinch_n_2_u64: ++** uqinch x0, all, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_n_2_u64, uint64_t, ++ x0 = svqinch_n_u64 (x0, 2), ++ x0 = svqinch (x0, 2)) ++ ++/* ++** qinch_n_7_u64: ++** uqinch x0, all, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_n_7_u64, uint64_t, ++ x0 = svqinch_n_u64 (x0, 7), ++ x0 = svqinch (x0, 7)) ++ ++/* ++** qinch_n_15_u64: ++** uqinch x0, all, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_n_15_u64, uint64_t, ++ x0 = svqinch_n_u64 (x0, 15), ++ x0 = svqinch (x0, 15)) ++ ++/* ++** qinch_n_16_u64: ++** uqinch x0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qinch_n_16_u64, uint64_t, ++ x0 = svqinch_n_u64 (x0, 16), ++ x0 = svqinch (x0, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincp_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincp_s16.c +new file mode 100644 +index 000000000..979b57476 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincp_s16.c +@@ -0,0 +1,22 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qincp_s16_tied: ++** sqincp z0\.h, p0 ++** ret ++*/ ++TEST_UNIFORM_Z (qincp_s16_tied, svint16_t, ++ z0 = svqincp_s16 (z0, p0), ++ z0 = svqincp (z0, p0)) ++ ++/* ++** qincp_s16_untied: ++** movprfx z0, z1 ++** sqincp z0\.h, p0 ++** ret ++*/ ++TEST_UNIFORM_Z (qincp_s16_untied, svint16_t, ++ z0 = svqincp_s16 (z1, p0), ++ z0 = svqincp (z1, p0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincp_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincp_s32.c +new file mode 100644 +index 000000000..46ad51b01 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincp_s32.c +@@ -0,0 +1,98 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qincp_s32_tied: ++** sqincp z0\.s, p0 ++** ret ++*/ ++TEST_UNIFORM_Z (qincp_s32_tied, svint32_t, ++ z0 = svqincp_s32 (z0, p0), ++ z0 = svqincp (z0, p0)) ++ ++/* ++** qincp_s32_untied: ++** movprfx z0, z1 ++** sqincp z0\.s, p0 ++** ret ++*/ ++TEST_UNIFORM_Z (qincp_s32_untied, svint32_t, ++ z0 = svqincp_s32 (z1, p0), ++ z0 = svqincp (z1, p0)) ++ ++/* ++** qincp_n_s32_b8_tied: ++** sqincp x0, p0\.b, w0 ++** ret ++*/ ++TEST_UNIFORM_S (qincp_n_s32_b8_tied, int32_t, ++ x0 = svqincp_n_s32_b8 (x0, p0), ++ x0 = svqincp_b8 (x0, p0)) ++ ++/* ++** qincp_n_s32_b8_untied: ++** mov w0, w1 ++** sqincp x0, p0\.b, w0 ++** ret ++*/ ++TEST_UNIFORM_S (qincp_n_s32_b8_untied, int32_t, ++ x0 = svqincp_n_s32_b8 (x1, p0), ++ x0 = svqincp_b8 (x1, p0)) ++ ++/* ++** qincp_n_s32_b16_tied: ++** sqincp x0, p0\.h, w0 ++** ret ++*/ ++TEST_UNIFORM_S (qincp_n_s32_b16_tied, int32_t, ++ x0 = svqincp_n_s32_b16 (x0, p0), ++ x0 = svqincp_b16 (x0, p0)) ++ ++/* ++** qincp_n_s32_b16_untied: ++** mov w0, w1 ++** sqincp x0, p0\.h, w0 ++** ret ++*/ ++TEST_UNIFORM_S (qincp_n_s32_b16_untied, int32_t, ++ x0 = svqincp_n_s32_b16 (x1, p0), ++ x0 = svqincp_b16 (x1, p0)) ++ ++/* ++** qincp_n_s32_b32_tied: ++** sqincp x0, p0\.s, w0 ++** ret ++*/ ++TEST_UNIFORM_S (qincp_n_s32_b32_tied, int32_t, ++ x0 = svqincp_n_s32_b32 (x0, p0), ++ x0 = svqincp_b32 (x0, p0)) ++ ++/* ++** qincp_n_s32_b32_untied: ++** mov w0, w1 ++** sqincp x0, p0\.s, w0 ++** ret ++*/ ++TEST_UNIFORM_S (qincp_n_s32_b32_untied, int32_t, ++ x0 = svqincp_n_s32_b32 (x1, p0), ++ x0 = svqincp_b32 (x1, p0)) ++ ++/* ++** qincp_n_s32_b64_tied: ++** sqincp x0, p0\.d, w0 ++** ret ++*/ ++TEST_UNIFORM_S (qincp_n_s32_b64_tied, int32_t, ++ x0 = svqincp_n_s32_b64 (x0, p0), ++ x0 = svqincp_b64 (x0, p0)) ++ ++/* ++** qincp_n_s32_b64_untied: ++** mov w0, w1 ++** sqincp x0, p0\.d, w0 ++** ret ++*/ ++TEST_UNIFORM_S (qincp_n_s32_b64_untied, int32_t, ++ x0 = svqincp_n_s32_b64 (x1, p0), ++ x0 = svqincp_b64 (x1, p0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincp_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincp_s64.c +new file mode 100644 +index 000000000..226502328 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincp_s64.c +@@ -0,0 +1,98 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qincp_s64_tied: ++** sqincp z0\.d, p0 ++** ret ++*/ ++TEST_UNIFORM_Z (qincp_s64_tied, svint64_t, ++ z0 = svqincp_s64 (z0, p0), ++ z0 = svqincp (z0, p0)) ++ ++/* ++** qincp_s64_untied: ++** movprfx z0, z1 ++** sqincp z0\.d, p0 ++** ret ++*/ ++TEST_UNIFORM_Z (qincp_s64_untied, svint64_t, ++ z0 = svqincp_s64 (z1, p0), ++ z0 = svqincp (z1, p0)) ++ ++/* ++** qincp_n_s64_b8_tied: ++** sqincp x0, p0\.b ++** ret ++*/ ++TEST_UNIFORM_S (qincp_n_s64_b8_tied, int64_t, ++ x0 = svqincp_n_s64_b8 (x0, p0), ++ x0 = svqincp_b8 (x0, p0)) ++ ++/* ++** qincp_n_s64_b8_untied: ++** mov x0, x1 ++** sqincp x0, p0\.b ++** ret ++*/ ++TEST_UNIFORM_S (qincp_n_s64_b8_untied, int64_t, ++ x0 = svqincp_n_s64_b8 (x1, p0), ++ x0 = svqincp_b8 (x1, p0)) ++ ++/* ++** qincp_n_s64_b16_tied: ++** sqincp x0, p0\.h ++** ret ++*/ ++TEST_UNIFORM_S (qincp_n_s64_b16_tied, int64_t, ++ x0 = svqincp_n_s64_b16 (x0, p0), ++ x0 = svqincp_b16 (x0, p0)) ++ ++/* ++** qincp_n_s64_b16_untied: ++** mov x0, x1 ++** sqincp x0, p0\.h ++** ret ++*/ ++TEST_UNIFORM_S (qincp_n_s64_b16_untied, int64_t, ++ x0 = svqincp_n_s64_b16 (x1, p0), ++ x0 = svqincp_b16 (x1, p0)) ++ ++/* ++** qincp_n_s64_b32_tied: ++** sqincp x0, p0\.s ++** ret ++*/ ++TEST_UNIFORM_S (qincp_n_s64_b32_tied, int64_t, ++ x0 = svqincp_n_s64_b32 (x0, p0), ++ x0 = svqincp_b32 (x0, p0)) ++ ++/* ++** qincp_n_s64_b32_untied: ++** mov x0, x1 ++** sqincp x0, p0\.s ++** ret ++*/ ++TEST_UNIFORM_S (qincp_n_s64_b32_untied, int64_t, ++ x0 = svqincp_n_s64_b32 (x1, p0), ++ x0 = svqincp_b32 (x1, p0)) ++ ++/* ++** qincp_n_s64_b64_tied: ++** sqincp x0, p0\.d ++** ret ++*/ ++TEST_UNIFORM_S (qincp_n_s64_b64_tied, int64_t, ++ x0 = svqincp_n_s64_b64 (x0, p0), ++ x0 = svqincp_b64 (x0, p0)) ++ ++/* ++** qincp_n_s64_b64_untied: ++** mov x0, x1 ++** sqincp x0, p0\.d ++** ret ++*/ ++TEST_UNIFORM_S (qincp_n_s64_b64_untied, int64_t, ++ x0 = svqincp_n_s64_b64 (x1, p0), ++ x0 = svqincp_b64 (x1, p0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincp_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincp_u16.c +new file mode 100644 +index 000000000..ecd84470c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincp_u16.c +@@ -0,0 +1,22 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qincp_u16_tied: ++** uqincp z0\.h, p0 ++** ret ++*/ ++TEST_UNIFORM_Z (qincp_u16_tied, svuint16_t, ++ z0 = svqincp_u16 (z0, p0), ++ z0 = svqincp (z0, p0)) ++ ++/* ++** qincp_u16_untied: ++** movprfx z0, z1 ++** uqincp z0\.h, p0 ++** ret ++*/ ++TEST_UNIFORM_Z (qincp_u16_untied, svuint16_t, ++ z0 = svqincp_u16 (z1, p0), ++ z0 = svqincp (z1, p0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincp_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincp_u32.c +new file mode 100644 +index 000000000..011a26253 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincp_u32.c +@@ -0,0 +1,98 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qincp_u32_tied: ++** uqincp z0\.s, p0 ++** ret ++*/ ++TEST_UNIFORM_Z (qincp_u32_tied, svuint32_t, ++ z0 = svqincp_u32 (z0, p0), ++ z0 = svqincp (z0, p0)) ++ ++/* ++** qincp_u32_untied: ++** movprfx z0, z1 ++** uqincp z0\.s, p0 ++** ret ++*/ ++TEST_UNIFORM_Z (qincp_u32_untied, svuint32_t, ++ z0 = svqincp_u32 (z1, p0), ++ z0 = svqincp (z1, p0)) ++ ++/* ++** qincp_n_u32_b8_tied: ++** uqincp w0, p0\.b ++** ret ++*/ ++TEST_UNIFORM_S (qincp_n_u32_b8_tied, uint32_t, ++ x0 = svqincp_n_u32_b8 (x0, p0), ++ x0 = svqincp_b8 (x0, p0)) ++ ++/* ++** qincp_n_u32_b8_untied: ++** mov w0, w1 ++** uqincp w0, p0\.b ++** ret ++*/ ++TEST_UNIFORM_S (qincp_n_u32_b8_untied, uint32_t, ++ x0 = svqincp_n_u32_b8 (x1, p0), ++ x0 = svqincp_b8 (x1, p0)) ++ ++/* ++** qincp_n_u32_b16_tied: ++** uqincp w0, p0\.h ++** ret ++*/ ++TEST_UNIFORM_S (qincp_n_u32_b16_tied, uint32_t, ++ x0 = svqincp_n_u32_b16 (x0, p0), ++ x0 = svqincp_b16 (x0, p0)) ++ ++/* ++** qincp_n_u32_b16_untied: ++** mov w0, w1 ++** uqincp w0, p0\.h ++** ret ++*/ ++TEST_UNIFORM_S (qincp_n_u32_b16_untied, uint32_t, ++ x0 = svqincp_n_u32_b16 (x1, p0), ++ x0 = svqincp_b16 (x1, p0)) ++ ++/* ++** qincp_n_u32_b32_tied: ++** uqincp w0, p0\.s ++** ret ++*/ ++TEST_UNIFORM_S (qincp_n_u32_b32_tied, uint32_t, ++ x0 = svqincp_n_u32_b32 (x0, p0), ++ x0 = svqincp_b32 (x0, p0)) ++ ++/* ++** qincp_n_u32_b32_untied: ++** mov w0, w1 ++** uqincp w0, p0\.s ++** ret ++*/ ++TEST_UNIFORM_S (qincp_n_u32_b32_untied, uint32_t, ++ x0 = svqincp_n_u32_b32 (x1, p0), ++ x0 = svqincp_b32 (x1, p0)) ++ ++/* ++** qincp_n_u32_b64_tied: ++** uqincp w0, p0\.d ++** ret ++*/ ++TEST_UNIFORM_S (qincp_n_u32_b64_tied, uint32_t, ++ x0 = svqincp_n_u32_b64 (x0, p0), ++ x0 = svqincp_b64 (x0, p0)) ++ ++/* ++** qincp_n_u32_b64_untied: ++** mov w0, w1 ++** uqincp w0, p0\.d ++** ret ++*/ ++TEST_UNIFORM_S (qincp_n_u32_b64_untied, uint32_t, ++ x0 = svqincp_n_u32_b64 (x1, p0), ++ x0 = svqincp_b64 (x1, p0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincp_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincp_u64.c +new file mode 100644 +index 000000000..761ac553a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincp_u64.c +@@ -0,0 +1,98 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qincp_u64_tied: ++** uqincp z0\.d, p0 ++** ret ++*/ ++TEST_UNIFORM_Z (qincp_u64_tied, svuint64_t, ++ z0 = svqincp_u64 (z0, p0), ++ z0 = svqincp (z0, p0)) ++ ++/* ++** qincp_u64_untied: ++** movprfx z0, z1 ++** uqincp z0\.d, p0 ++** ret ++*/ ++TEST_UNIFORM_Z (qincp_u64_untied, svuint64_t, ++ z0 = svqincp_u64 (z1, p0), ++ z0 = svqincp (z1, p0)) ++ ++/* ++** qincp_n_u64_b8_tied: ++** uqincp x0, p0\.b ++** ret ++*/ ++TEST_UNIFORM_S (qincp_n_u64_b8_tied, uint64_t, ++ x0 = svqincp_n_u64_b8 (x0, p0), ++ x0 = svqincp_b8 (x0, p0)) ++ ++/* ++** qincp_n_u64_b8_untied: ++** mov x0, x1 ++** uqincp x0, p0\.b ++** ret ++*/ ++TEST_UNIFORM_S (qincp_n_u64_b8_untied, uint64_t, ++ x0 = svqincp_n_u64_b8 (x1, p0), ++ x0 = svqincp_b8 (x1, p0)) ++ ++/* ++** qincp_n_u64_b16_tied: ++** uqincp x0, p0\.h ++** ret ++*/ ++TEST_UNIFORM_S (qincp_n_u64_b16_tied, uint64_t, ++ x0 = svqincp_n_u64_b16 (x0, p0), ++ x0 = svqincp_b16 (x0, p0)) ++ ++/* ++** qincp_n_u64_b16_untied: ++** mov x0, x1 ++** uqincp x0, p0\.h ++** ret ++*/ ++TEST_UNIFORM_S (qincp_n_u64_b16_untied, uint64_t, ++ x0 = svqincp_n_u64_b16 (x1, p0), ++ x0 = svqincp_b16 (x1, p0)) ++ ++/* ++** qincp_n_u64_b32_tied: ++** uqincp x0, p0\.s ++** ret ++*/ ++TEST_UNIFORM_S (qincp_n_u64_b32_tied, uint64_t, ++ x0 = svqincp_n_u64_b32 (x0, p0), ++ x0 = svqincp_b32 (x0, p0)) ++ ++/* ++** qincp_n_u64_b32_untied: ++** mov x0, x1 ++** uqincp x0, p0\.s ++** ret ++*/ ++TEST_UNIFORM_S (qincp_n_u64_b32_untied, uint64_t, ++ x0 = svqincp_n_u64_b32 (x1, p0), ++ x0 = svqincp_b32 (x1, p0)) ++ ++/* ++** qincp_n_u64_b64_tied: ++** uqincp x0, p0\.d ++** ret ++*/ ++TEST_UNIFORM_S (qincp_n_u64_b64_tied, uint64_t, ++ x0 = svqincp_n_u64_b64 (x0, p0), ++ x0 = svqincp_b64 (x0, p0)) ++ ++/* ++** qincp_n_u64_b64_untied: ++** mov x0, x1 ++** uqincp x0, p0\.d ++** ret ++*/ ++TEST_UNIFORM_S (qincp_n_u64_b64_untied, uint64_t, ++ x0 = svqincp_n_u64_b64 (x1, p0), ++ x0 = svqincp_b64 (x1, p0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincw_pat_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincw_pat_s32.c +new file mode 100644 +index 000000000..6ceb003ab +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincw_pat_s32.c +@@ -0,0 +1,401 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qincw_pat_1_s32_tied: ++** sqincw z0\.s, pow2 ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_pat_1_s32_tied, svint32_t, ++ z0 = svqincw_pat_s32 (z0, SV_POW2, 1), ++ z0 = svqincw_pat (z0, SV_POW2, 1)) ++ ++/* ++** qincw_pat_1_s32_untied: ++** movprfx z0, z1 ++** sqincw z0\.s, pow2 ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_pat_1_s32_untied, svint32_t, ++ z0 = svqincw_pat_s32 (z1, SV_POW2, 1), ++ z0 = svqincw_pat (z1, SV_POW2, 1)) ++ ++/* ++** qincw_pat_2_s32: ++** sqincw z0\.s, pow2, mul #2 ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_pat_2_s32, svint32_t, ++ z0 = svqincw_pat_s32 (z0, SV_POW2, 2), ++ z0 = svqincw_pat (z0, SV_POW2, 2)) ++ ++/* ++** qincw_pat_7_s32: ++** sqincw z0\.s, pow2, mul #7 ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_pat_7_s32, svint32_t, ++ z0 = svqincw_pat_s32 (z0, SV_POW2, 7), ++ z0 = svqincw_pat (z0, SV_POW2, 7)) ++ ++/* ++** qincw_pat_15_s32: ++** sqincw z0\.s, pow2, mul #15 ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_pat_15_s32, svint32_t, ++ z0 = svqincw_pat_s32 (z0, SV_POW2, 15), ++ z0 = svqincw_pat (z0, SV_POW2, 15)) ++ ++/* ++** qincw_pat_16_s32: ++** sqincw z0\.s, pow2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_pat_16_s32, svint32_t, ++ z0 = svqincw_pat_s32 (z0, SV_POW2, 16), ++ z0 = svqincw_pat (z0, SV_POW2, 16)) ++ ++/* ++** qincw_pat_vl1_s32: ++** sqincw z0\.s, vl1, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_pat_vl1_s32, svint32_t, ++ z0 = svqincw_pat_s32 (z0, SV_VL1, 16), ++ z0 = svqincw_pat (z0, SV_VL1, 16)) ++ ++/* ++** qincw_pat_vl2_s32: ++** sqincw z0\.s, vl2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_pat_vl2_s32, svint32_t, ++ z0 = svqincw_pat_s32 (z0, SV_VL2, 16), ++ z0 = svqincw_pat (z0, SV_VL2, 16)) ++ ++/* ++** qincw_pat_vl3_s32: ++** sqincw z0\.s, vl3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_pat_vl3_s32, svint32_t, ++ z0 = svqincw_pat_s32 (z0, SV_VL3, 16), ++ z0 = svqincw_pat (z0, SV_VL3, 16)) ++ ++/* ++** qincw_pat_vl4_s32: ++** sqincw z0\.s, vl4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_pat_vl4_s32, svint32_t, ++ z0 = svqincw_pat_s32 (z0, SV_VL4, 16), ++ z0 = svqincw_pat (z0, SV_VL4, 16)) ++ ++/* ++** qincw_pat_vl5_s32: ++** sqincw z0\.s, vl5, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_pat_vl5_s32, svint32_t, ++ z0 = svqincw_pat_s32 (z0, SV_VL5, 16), ++ z0 = svqincw_pat (z0, SV_VL5, 16)) ++ ++/* ++** qincw_pat_vl6_s32: ++** sqincw z0\.s, vl6, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_pat_vl6_s32, svint32_t, ++ z0 = svqincw_pat_s32 (z0, SV_VL6, 16), ++ z0 = svqincw_pat (z0, SV_VL6, 16)) ++ ++/* ++** qincw_pat_vl7_s32: ++** sqincw z0\.s, vl7, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_pat_vl7_s32, svint32_t, ++ z0 = svqincw_pat_s32 (z0, SV_VL7, 16), ++ z0 = svqincw_pat (z0, SV_VL7, 16)) ++ ++/* ++** qincw_pat_vl8_s32: ++** sqincw z0\.s, vl8, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_pat_vl8_s32, svint32_t, ++ z0 = svqincw_pat_s32 (z0, SV_VL8, 16), ++ z0 = svqincw_pat (z0, SV_VL8, 16)) ++ ++/* ++** qincw_pat_vl16_s32: ++** sqincw z0\.s, vl16, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_pat_vl16_s32, svint32_t, ++ z0 = svqincw_pat_s32 (z0, SV_VL16, 16), ++ z0 = svqincw_pat (z0, SV_VL16, 16)) ++ ++/* ++** qincw_pat_vl32_s32: ++** sqincw z0\.s, vl32, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_pat_vl32_s32, svint32_t, ++ z0 = svqincw_pat_s32 (z0, SV_VL32, 16), ++ z0 = svqincw_pat (z0, SV_VL32, 16)) ++ ++/* ++** qincw_pat_vl64_s32: ++** sqincw z0\.s, vl64, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_pat_vl64_s32, svint32_t, ++ z0 = svqincw_pat_s32 (z0, SV_VL64, 16), ++ z0 = svqincw_pat (z0, SV_VL64, 16)) ++ ++/* ++** qincw_pat_vl128_s32: ++** sqincw z0\.s, vl128, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_pat_vl128_s32, svint32_t, ++ z0 = svqincw_pat_s32 (z0, SV_VL128, 16), ++ z0 = svqincw_pat (z0, SV_VL128, 16)) ++ ++/* ++** qincw_pat_vl256_s32: ++** sqincw z0\.s, vl256, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_pat_vl256_s32, svint32_t, ++ z0 = svqincw_pat_s32 (z0, SV_VL256, 16), ++ z0 = svqincw_pat (z0, SV_VL256, 16)) ++ ++/* ++** qincw_pat_mul4_s32: ++** sqincw z0\.s, mul4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_pat_mul4_s32, svint32_t, ++ z0 = svqincw_pat_s32 (z0, SV_MUL4, 16), ++ z0 = svqincw_pat (z0, SV_MUL4, 16)) ++ ++/* ++** qincw_pat_mul3_s32: ++** sqincw z0\.s, mul3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_pat_mul3_s32, svint32_t, ++ z0 = svqincw_pat_s32 (z0, SV_MUL3, 16), ++ z0 = svqincw_pat (z0, SV_MUL3, 16)) ++ ++/* ++** qincw_pat_all_s32: ++** sqincw z0\.s, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_pat_all_s32, svint32_t, ++ z0 = svqincw_pat_s32 (z0, SV_ALL, 16), ++ z0 = svqincw_pat (z0, SV_ALL, 16)) ++ ++/* ++** qincw_pat_n_1_s32_tied: ++** sqincw x0, w0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_1_s32_tied, int32_t, ++ x0 = svqincw_pat_n_s32 (x0, SV_POW2, 1), ++ x0 = svqincw_pat (x0, SV_POW2, 1)) ++ ++/* ++** qincw_pat_n_1_s32_untied: ++** mov w0, w1 ++** sqincw x0, w0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_1_s32_untied, int32_t, ++ x0 = svqincw_pat_n_s32 (x1, SV_POW2, 1), ++ x0 = svqincw_pat (x1, SV_POW2, 1)) ++ ++/* ++** qincw_pat_n_2_s32: ++** sqincw x0, w0, pow2, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_2_s32, int32_t, ++ x0 = svqincw_pat_n_s32 (x0, SV_POW2, 2), ++ x0 = svqincw_pat (x0, SV_POW2, 2)) ++ ++/* ++** qincw_pat_n_7_s32: ++** sqincw x0, w0, pow2, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_7_s32, int32_t, ++ x0 = svqincw_pat_n_s32 (x0, SV_POW2, 7), ++ x0 = svqincw_pat (x0, SV_POW2, 7)) ++ ++/* ++** qincw_pat_n_15_s32: ++** sqincw x0, w0, pow2, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_15_s32, int32_t, ++ x0 = svqincw_pat_n_s32 (x0, SV_POW2, 15), ++ x0 = svqincw_pat (x0, SV_POW2, 15)) ++ ++/* ++** qincw_pat_n_16_s32: ++** sqincw x0, w0, pow2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_16_s32, int32_t, ++ x0 = svqincw_pat_n_s32 (x0, SV_POW2, 16), ++ x0 = svqincw_pat (x0, SV_POW2, 16)) ++ ++/* ++** qincw_pat_n_vl1_s32: ++** sqincw x0, w0, vl1, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_vl1_s32, int32_t, ++ x0 = svqincw_pat_n_s32 (x0, SV_VL1, 16), ++ x0 = svqincw_pat (x0, SV_VL1, 16)) ++ ++/* ++** qincw_pat_n_vl2_s32: ++** sqincw x0, w0, vl2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_vl2_s32, int32_t, ++ x0 = svqincw_pat_n_s32 (x0, SV_VL2, 16), ++ x0 = svqincw_pat (x0, SV_VL2, 16)) ++ ++/* ++** qincw_pat_n_vl3_s32: ++** sqincw x0, w0, vl3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_vl3_s32, int32_t, ++ x0 = svqincw_pat_n_s32 (x0, SV_VL3, 16), ++ x0 = svqincw_pat (x0, SV_VL3, 16)) ++ ++/* ++** qincw_pat_n_vl4_s32: ++** sqincw x0, w0, vl4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_vl4_s32, int32_t, ++ x0 = svqincw_pat_n_s32 (x0, SV_VL4, 16), ++ x0 = svqincw_pat (x0, SV_VL4, 16)) ++ ++/* ++** qincw_pat_n_vl5_s32: ++** sqincw x0, w0, vl5, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_vl5_s32, int32_t, ++ x0 = svqincw_pat_n_s32 (x0, SV_VL5, 16), ++ x0 = svqincw_pat (x0, SV_VL5, 16)) ++ ++/* ++** qincw_pat_n_vl6_s32: ++** sqincw x0, w0, vl6, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_vl6_s32, int32_t, ++ x0 = svqincw_pat_n_s32 (x0, SV_VL6, 16), ++ x0 = svqincw_pat (x0, SV_VL6, 16)) ++ ++/* ++** qincw_pat_n_vl7_s32: ++** sqincw x0, w0, vl7, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_vl7_s32, int32_t, ++ x0 = svqincw_pat_n_s32 (x0, SV_VL7, 16), ++ x0 = svqincw_pat (x0, SV_VL7, 16)) ++ ++/* ++** qincw_pat_n_vl8_s32: ++** sqincw x0, w0, vl8, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_vl8_s32, int32_t, ++ x0 = svqincw_pat_n_s32 (x0, SV_VL8, 16), ++ x0 = svqincw_pat (x0, SV_VL8, 16)) ++ ++/* ++** qincw_pat_n_vl16_s32: ++** sqincw x0, w0, vl16, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_vl16_s32, int32_t, ++ x0 = svqincw_pat_n_s32 (x0, SV_VL16, 16), ++ x0 = svqincw_pat (x0, SV_VL16, 16)) ++ ++/* ++** qincw_pat_n_vl32_s32: ++** sqincw x0, w0, vl32, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_vl32_s32, int32_t, ++ x0 = svqincw_pat_n_s32 (x0, SV_VL32, 16), ++ x0 = svqincw_pat (x0, SV_VL32, 16)) ++ ++/* ++** qincw_pat_n_vl64_s32: ++** sqincw x0, w0, vl64, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_vl64_s32, int32_t, ++ x0 = svqincw_pat_n_s32 (x0, SV_VL64, 16), ++ x0 = svqincw_pat (x0, SV_VL64, 16)) ++ ++/* ++** qincw_pat_n_vl128_s32: ++** sqincw x0, w0, vl128, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_vl128_s32, int32_t, ++ x0 = svqincw_pat_n_s32 (x0, SV_VL128, 16), ++ x0 = svqincw_pat (x0, SV_VL128, 16)) ++ ++/* ++** qincw_pat_n_vl256_s32: ++** sqincw x0, w0, vl256, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_vl256_s32, int32_t, ++ x0 = svqincw_pat_n_s32 (x0, SV_VL256, 16), ++ x0 = svqincw_pat (x0, SV_VL256, 16)) ++ ++/* ++** qincw_pat_n_mul4_s32: ++** sqincw x0, w0, mul4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_mul4_s32, int32_t, ++ x0 = svqincw_pat_n_s32 (x0, SV_MUL4, 16), ++ x0 = svqincw_pat (x0, SV_MUL4, 16)) ++ ++/* ++** qincw_pat_n_mul3_s32: ++** sqincw x0, w0, mul3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_mul3_s32, int32_t, ++ x0 = svqincw_pat_n_s32 (x0, SV_MUL3, 16), ++ x0 = svqincw_pat (x0, SV_MUL3, 16)) ++ ++/* ++** qincw_pat_n_all_s32: ++** sqincw x0, w0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_all_s32, int32_t, ++ x0 = svqincw_pat_n_s32 (x0, SV_ALL, 16), ++ x0 = svqincw_pat (x0, SV_ALL, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincw_pat_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincw_pat_s64.c +new file mode 100644 +index 000000000..feebc25cc +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincw_pat_s64.c +@@ -0,0 +1,202 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qincw_pat_n_1_s64_tied: ++** sqincw x0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_1_s64_tied, int64_t, ++ x0 = svqincw_pat_n_s64 (x0, SV_POW2, 1), ++ x0 = svqincw_pat (x0, SV_POW2, 1)) ++ ++/* ++** qincw_pat_n_1_s64_untied: ++** mov x0, x1 ++** sqincw x0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_1_s64_untied, int64_t, ++ x0 = svqincw_pat_n_s64 (x1, SV_POW2, 1), ++ x0 = svqincw_pat (x1, SV_POW2, 1)) ++ ++/* ++** qincw_pat_n_2_s64: ++** sqincw x0, pow2, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_2_s64, int64_t, ++ x0 = svqincw_pat_n_s64 (x0, SV_POW2, 2), ++ x0 = svqincw_pat (x0, SV_POW2, 2)) ++ ++/* ++** qincw_pat_n_7_s64: ++** sqincw x0, pow2, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_7_s64, int64_t, ++ x0 = svqincw_pat_n_s64 (x0, SV_POW2, 7), ++ x0 = svqincw_pat (x0, SV_POW2, 7)) ++ ++/* ++** qincw_pat_n_15_s64: ++** sqincw x0, pow2, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_15_s64, int64_t, ++ x0 = svqincw_pat_n_s64 (x0, SV_POW2, 15), ++ x0 = svqincw_pat (x0, SV_POW2, 15)) ++ ++/* ++** qincw_pat_n_16_s64: ++** sqincw x0, pow2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_16_s64, int64_t, ++ x0 = svqincw_pat_n_s64 (x0, SV_POW2, 16), ++ x0 = svqincw_pat (x0, SV_POW2, 16)) ++ ++/* ++** qincw_pat_n_vl1_s64: ++** sqincw x0, vl1, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_vl1_s64, int64_t, ++ x0 = svqincw_pat_n_s64 (x0, SV_VL1, 16), ++ x0 = svqincw_pat (x0, SV_VL1, 16)) ++ ++/* ++** qincw_pat_n_vl2_s64: ++** sqincw x0, vl2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_vl2_s64, int64_t, ++ x0 = svqincw_pat_n_s64 (x0, SV_VL2, 16), ++ x0 = svqincw_pat (x0, SV_VL2, 16)) ++ ++/* ++** qincw_pat_n_vl3_s64: ++** sqincw x0, vl3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_vl3_s64, int64_t, ++ x0 = svqincw_pat_n_s64 (x0, SV_VL3, 16), ++ x0 = svqincw_pat (x0, SV_VL3, 16)) ++ ++/* ++** qincw_pat_n_vl4_s64: ++** sqincw x0, vl4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_vl4_s64, int64_t, ++ x0 = svqincw_pat_n_s64 (x0, SV_VL4, 16), ++ x0 = svqincw_pat (x0, SV_VL4, 16)) ++ ++/* ++** qincw_pat_n_vl5_s64: ++** sqincw x0, vl5, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_vl5_s64, int64_t, ++ x0 = svqincw_pat_n_s64 (x0, SV_VL5, 16), ++ x0 = svqincw_pat (x0, SV_VL5, 16)) ++ ++/* ++** qincw_pat_n_vl6_s64: ++** sqincw x0, vl6, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_vl6_s64, int64_t, ++ x0 = svqincw_pat_n_s64 (x0, SV_VL6, 16), ++ x0 = svqincw_pat (x0, SV_VL6, 16)) ++ ++/* ++** qincw_pat_n_vl7_s64: ++** sqincw x0, vl7, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_vl7_s64, int64_t, ++ x0 = svqincw_pat_n_s64 (x0, SV_VL7, 16), ++ x0 = svqincw_pat (x0, SV_VL7, 16)) ++ ++/* ++** qincw_pat_n_vl8_s64: ++** sqincw x0, vl8, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_vl8_s64, int64_t, ++ x0 = svqincw_pat_n_s64 (x0, SV_VL8, 16), ++ x0 = svqincw_pat (x0, SV_VL8, 16)) ++ ++/* ++** qincw_pat_n_vl16_s64: ++** sqincw x0, vl16, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_vl16_s64, int64_t, ++ x0 = svqincw_pat_n_s64 (x0, SV_VL16, 16), ++ x0 = svqincw_pat (x0, SV_VL16, 16)) ++ ++/* ++** qincw_pat_n_vl32_s64: ++** sqincw x0, vl32, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_vl32_s64, int64_t, ++ x0 = svqincw_pat_n_s64 (x0, SV_VL32, 16), ++ x0 = svqincw_pat (x0, SV_VL32, 16)) ++ ++/* ++** qincw_pat_n_vl64_s64: ++** sqincw x0, vl64, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_vl64_s64, int64_t, ++ x0 = svqincw_pat_n_s64 (x0, SV_VL64, 16), ++ x0 = svqincw_pat (x0, SV_VL64, 16)) ++ ++/* ++** qincw_pat_n_vl128_s64: ++** sqincw x0, vl128, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_vl128_s64, int64_t, ++ x0 = svqincw_pat_n_s64 (x0, SV_VL128, 16), ++ x0 = svqincw_pat (x0, SV_VL128, 16)) ++ ++/* ++** qincw_pat_n_vl256_s64: ++** sqincw x0, vl256, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_vl256_s64, int64_t, ++ x0 = svqincw_pat_n_s64 (x0, SV_VL256, 16), ++ x0 = svqincw_pat (x0, SV_VL256, 16)) ++ ++/* ++** qincw_pat_n_mul4_s64: ++** sqincw x0, mul4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_mul4_s64, int64_t, ++ x0 = svqincw_pat_n_s64 (x0, SV_MUL4, 16), ++ x0 = svqincw_pat (x0, SV_MUL4, 16)) ++ ++/* ++** qincw_pat_n_mul3_s64: ++** sqincw x0, mul3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_mul3_s64, int64_t, ++ x0 = svqincw_pat_n_s64 (x0, SV_MUL3, 16), ++ x0 = svqincw_pat (x0, SV_MUL3, 16)) ++ ++/* ++** qincw_pat_n_all_s64: ++** sqincw x0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_all_s64, int64_t, ++ x0 = svqincw_pat_n_s64 (x0, SV_ALL, 16), ++ x0 = svqincw_pat (x0, SV_ALL, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincw_pat_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincw_pat_u32.c +new file mode 100644 +index 000000000..e08e91d09 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincw_pat_u32.c +@@ -0,0 +1,401 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qincw_pat_1_u32_tied: ++** uqincw z0\.s, pow2 ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_pat_1_u32_tied, svuint32_t, ++ z0 = svqincw_pat_u32 (z0, SV_POW2, 1), ++ z0 = svqincw_pat (z0, SV_POW2, 1)) ++ ++/* ++** qincw_pat_1_u32_untied: ++** movprfx z0, z1 ++** uqincw z0\.s, pow2 ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_pat_1_u32_untied, svuint32_t, ++ z0 = svqincw_pat_u32 (z1, SV_POW2, 1), ++ z0 = svqincw_pat (z1, SV_POW2, 1)) ++ ++/* ++** qincw_pat_2_u32: ++** uqincw z0\.s, pow2, mul #2 ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_pat_2_u32, svuint32_t, ++ z0 = svqincw_pat_u32 (z0, SV_POW2, 2), ++ z0 = svqincw_pat (z0, SV_POW2, 2)) ++ ++/* ++** qincw_pat_7_u32: ++** uqincw z0\.s, pow2, mul #7 ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_pat_7_u32, svuint32_t, ++ z0 = svqincw_pat_u32 (z0, SV_POW2, 7), ++ z0 = svqincw_pat (z0, SV_POW2, 7)) ++ ++/* ++** qincw_pat_15_u32: ++** uqincw z0\.s, pow2, mul #15 ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_pat_15_u32, svuint32_t, ++ z0 = svqincw_pat_u32 (z0, SV_POW2, 15), ++ z0 = svqincw_pat (z0, SV_POW2, 15)) ++ ++/* ++** qincw_pat_16_u32: ++** uqincw z0\.s, pow2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_pat_16_u32, svuint32_t, ++ z0 = svqincw_pat_u32 (z0, SV_POW2, 16), ++ z0 = svqincw_pat (z0, SV_POW2, 16)) ++ ++/* ++** qincw_pat_vl1_u32: ++** uqincw z0\.s, vl1, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_pat_vl1_u32, svuint32_t, ++ z0 = svqincw_pat_u32 (z0, SV_VL1, 16), ++ z0 = svqincw_pat (z0, SV_VL1, 16)) ++ ++/* ++** qincw_pat_vl2_u32: ++** uqincw z0\.s, vl2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_pat_vl2_u32, svuint32_t, ++ z0 = svqincw_pat_u32 (z0, SV_VL2, 16), ++ z0 = svqincw_pat (z0, SV_VL2, 16)) ++ ++/* ++** qincw_pat_vl3_u32: ++** uqincw z0\.s, vl3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_pat_vl3_u32, svuint32_t, ++ z0 = svqincw_pat_u32 (z0, SV_VL3, 16), ++ z0 = svqincw_pat (z0, SV_VL3, 16)) ++ ++/* ++** qincw_pat_vl4_u32: ++** uqincw z0\.s, vl4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_pat_vl4_u32, svuint32_t, ++ z0 = svqincw_pat_u32 (z0, SV_VL4, 16), ++ z0 = svqincw_pat (z0, SV_VL4, 16)) ++ ++/* ++** qincw_pat_vl5_u32: ++** uqincw z0\.s, vl5, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_pat_vl5_u32, svuint32_t, ++ z0 = svqincw_pat_u32 (z0, SV_VL5, 16), ++ z0 = svqincw_pat (z0, SV_VL5, 16)) ++ ++/* ++** qincw_pat_vl6_u32: ++** uqincw z0\.s, vl6, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_pat_vl6_u32, svuint32_t, ++ z0 = svqincw_pat_u32 (z0, SV_VL6, 16), ++ z0 = svqincw_pat (z0, SV_VL6, 16)) ++ ++/* ++** qincw_pat_vl7_u32: ++** uqincw z0\.s, vl7, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_pat_vl7_u32, svuint32_t, ++ z0 = svqincw_pat_u32 (z0, SV_VL7, 16), ++ z0 = svqincw_pat (z0, SV_VL7, 16)) ++ ++/* ++** qincw_pat_vl8_u32: ++** uqincw z0\.s, vl8, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_pat_vl8_u32, svuint32_t, ++ z0 = svqincw_pat_u32 (z0, SV_VL8, 16), ++ z0 = svqincw_pat (z0, SV_VL8, 16)) ++ ++/* ++** qincw_pat_vl16_u32: ++** uqincw z0\.s, vl16, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_pat_vl16_u32, svuint32_t, ++ z0 = svqincw_pat_u32 (z0, SV_VL16, 16), ++ z0 = svqincw_pat (z0, SV_VL16, 16)) ++ ++/* ++** qincw_pat_vl32_u32: ++** uqincw z0\.s, vl32, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_pat_vl32_u32, svuint32_t, ++ z0 = svqincw_pat_u32 (z0, SV_VL32, 16), ++ z0 = svqincw_pat (z0, SV_VL32, 16)) ++ ++/* ++** qincw_pat_vl64_u32: ++** uqincw z0\.s, vl64, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_pat_vl64_u32, svuint32_t, ++ z0 = svqincw_pat_u32 (z0, SV_VL64, 16), ++ z0 = svqincw_pat (z0, SV_VL64, 16)) ++ ++/* ++** qincw_pat_vl128_u32: ++** uqincw z0\.s, vl128, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_pat_vl128_u32, svuint32_t, ++ z0 = svqincw_pat_u32 (z0, SV_VL128, 16), ++ z0 = svqincw_pat (z0, SV_VL128, 16)) ++ ++/* ++** qincw_pat_vl256_u32: ++** uqincw z0\.s, vl256, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_pat_vl256_u32, svuint32_t, ++ z0 = svqincw_pat_u32 (z0, SV_VL256, 16), ++ z0 = svqincw_pat (z0, SV_VL256, 16)) ++ ++/* ++** qincw_pat_mul4_u32: ++** uqincw z0\.s, mul4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_pat_mul4_u32, svuint32_t, ++ z0 = svqincw_pat_u32 (z0, SV_MUL4, 16), ++ z0 = svqincw_pat (z0, SV_MUL4, 16)) ++ ++/* ++** qincw_pat_mul3_u32: ++** uqincw z0\.s, mul3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_pat_mul3_u32, svuint32_t, ++ z0 = svqincw_pat_u32 (z0, SV_MUL3, 16), ++ z0 = svqincw_pat (z0, SV_MUL3, 16)) ++ ++/* ++** qincw_pat_all_u32: ++** uqincw z0\.s, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_pat_all_u32, svuint32_t, ++ z0 = svqincw_pat_u32 (z0, SV_ALL, 16), ++ z0 = svqincw_pat (z0, SV_ALL, 16)) ++ ++/* ++** qincw_pat_n_1_u32_tied: ++** uqincw w0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_1_u32_tied, uint32_t, ++ x0 = svqincw_pat_n_u32 (x0, SV_POW2, 1), ++ x0 = svqincw_pat (x0, SV_POW2, 1)) ++ ++/* ++** qincw_pat_n_1_u32_untied: ++** mov w0, w1 ++** uqincw w0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_1_u32_untied, uint32_t, ++ x0 = svqincw_pat_n_u32 (x1, SV_POW2, 1), ++ x0 = svqincw_pat (x1, SV_POW2, 1)) ++ ++/* ++** qincw_pat_n_2_u32: ++** uqincw w0, pow2, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_2_u32, uint32_t, ++ x0 = svqincw_pat_n_u32 (x0, SV_POW2, 2), ++ x0 = svqincw_pat (x0, SV_POW2, 2)) ++ ++/* ++** qincw_pat_n_7_u32: ++** uqincw w0, pow2, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_7_u32, uint32_t, ++ x0 = svqincw_pat_n_u32 (x0, SV_POW2, 7), ++ x0 = svqincw_pat (x0, SV_POW2, 7)) ++ ++/* ++** qincw_pat_n_15_u32: ++** uqincw w0, pow2, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_15_u32, uint32_t, ++ x0 = svqincw_pat_n_u32 (x0, SV_POW2, 15), ++ x0 = svqincw_pat (x0, SV_POW2, 15)) ++ ++/* ++** qincw_pat_n_16_u32: ++** uqincw w0, pow2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_16_u32, uint32_t, ++ x0 = svqincw_pat_n_u32 (x0, SV_POW2, 16), ++ x0 = svqincw_pat (x0, SV_POW2, 16)) ++ ++/* ++** qincw_pat_n_vl1_u32: ++** uqincw w0, vl1, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_vl1_u32, uint32_t, ++ x0 = svqincw_pat_n_u32 (x0, SV_VL1, 16), ++ x0 = svqincw_pat (x0, SV_VL1, 16)) ++ ++/* ++** qincw_pat_n_vl2_u32: ++** uqincw w0, vl2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_vl2_u32, uint32_t, ++ x0 = svqincw_pat_n_u32 (x0, SV_VL2, 16), ++ x0 = svqincw_pat (x0, SV_VL2, 16)) ++ ++/* ++** qincw_pat_n_vl3_u32: ++** uqincw w0, vl3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_vl3_u32, uint32_t, ++ x0 = svqincw_pat_n_u32 (x0, SV_VL3, 16), ++ x0 = svqincw_pat (x0, SV_VL3, 16)) ++ ++/* ++** qincw_pat_n_vl4_u32: ++** uqincw w0, vl4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_vl4_u32, uint32_t, ++ x0 = svqincw_pat_n_u32 (x0, SV_VL4, 16), ++ x0 = svqincw_pat (x0, SV_VL4, 16)) ++ ++/* ++** qincw_pat_n_vl5_u32: ++** uqincw w0, vl5, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_vl5_u32, uint32_t, ++ x0 = svqincw_pat_n_u32 (x0, SV_VL5, 16), ++ x0 = svqincw_pat (x0, SV_VL5, 16)) ++ ++/* ++** qincw_pat_n_vl6_u32: ++** uqincw w0, vl6, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_vl6_u32, uint32_t, ++ x0 = svqincw_pat_n_u32 (x0, SV_VL6, 16), ++ x0 = svqincw_pat (x0, SV_VL6, 16)) ++ ++/* ++** qincw_pat_n_vl7_u32: ++** uqincw w0, vl7, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_vl7_u32, uint32_t, ++ x0 = svqincw_pat_n_u32 (x0, SV_VL7, 16), ++ x0 = svqincw_pat (x0, SV_VL7, 16)) ++ ++/* ++** qincw_pat_n_vl8_u32: ++** uqincw w0, vl8, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_vl8_u32, uint32_t, ++ x0 = svqincw_pat_n_u32 (x0, SV_VL8, 16), ++ x0 = svqincw_pat (x0, SV_VL8, 16)) ++ ++/* ++** qincw_pat_n_vl16_u32: ++** uqincw w0, vl16, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_vl16_u32, uint32_t, ++ x0 = svqincw_pat_n_u32 (x0, SV_VL16, 16), ++ x0 = svqincw_pat (x0, SV_VL16, 16)) ++ ++/* ++** qincw_pat_n_vl32_u32: ++** uqincw w0, vl32, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_vl32_u32, uint32_t, ++ x0 = svqincw_pat_n_u32 (x0, SV_VL32, 16), ++ x0 = svqincw_pat (x0, SV_VL32, 16)) ++ ++/* ++** qincw_pat_n_vl64_u32: ++** uqincw w0, vl64, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_vl64_u32, uint32_t, ++ x0 = svqincw_pat_n_u32 (x0, SV_VL64, 16), ++ x0 = svqincw_pat (x0, SV_VL64, 16)) ++ ++/* ++** qincw_pat_n_vl128_u32: ++** uqincw w0, vl128, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_vl128_u32, uint32_t, ++ x0 = svqincw_pat_n_u32 (x0, SV_VL128, 16), ++ x0 = svqincw_pat (x0, SV_VL128, 16)) ++ ++/* ++** qincw_pat_n_vl256_u32: ++** uqincw w0, vl256, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_vl256_u32, uint32_t, ++ x0 = svqincw_pat_n_u32 (x0, SV_VL256, 16), ++ x0 = svqincw_pat (x0, SV_VL256, 16)) ++ ++/* ++** qincw_pat_n_mul4_u32: ++** uqincw w0, mul4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_mul4_u32, uint32_t, ++ x0 = svqincw_pat_n_u32 (x0, SV_MUL4, 16), ++ x0 = svqincw_pat (x0, SV_MUL4, 16)) ++ ++/* ++** qincw_pat_n_mul3_u32: ++** uqincw w0, mul3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_mul3_u32, uint32_t, ++ x0 = svqincw_pat_n_u32 (x0, SV_MUL3, 16), ++ x0 = svqincw_pat (x0, SV_MUL3, 16)) ++ ++/* ++** qincw_pat_n_all_u32: ++** uqincw w0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_all_u32, uint32_t, ++ x0 = svqincw_pat_n_u32 (x0, SV_ALL, 16), ++ x0 = svqincw_pat (x0, SV_ALL, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincw_pat_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincw_pat_u64.c +new file mode 100644 +index 000000000..a2ac9ee72 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincw_pat_u64.c +@@ -0,0 +1,202 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qincw_pat_n_1_u64_tied: ++** uqincw x0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_1_u64_tied, uint64_t, ++ x0 = svqincw_pat_n_u64 (x0, SV_POW2, 1), ++ x0 = svqincw_pat (x0, SV_POW2, 1)) ++ ++/* ++** qincw_pat_n_1_u64_untied: ++** mov x0, x1 ++** uqincw x0, pow2 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_1_u64_untied, uint64_t, ++ x0 = svqincw_pat_n_u64 (x1, SV_POW2, 1), ++ x0 = svqincw_pat (x1, SV_POW2, 1)) ++ ++/* ++** qincw_pat_n_2_u64: ++** uqincw x0, pow2, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_2_u64, uint64_t, ++ x0 = svqincw_pat_n_u64 (x0, SV_POW2, 2), ++ x0 = svqincw_pat (x0, SV_POW2, 2)) ++ ++/* ++** qincw_pat_n_7_u64: ++** uqincw x0, pow2, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_7_u64, uint64_t, ++ x0 = svqincw_pat_n_u64 (x0, SV_POW2, 7), ++ x0 = svqincw_pat (x0, SV_POW2, 7)) ++ ++/* ++** qincw_pat_n_15_u64: ++** uqincw x0, pow2, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_15_u64, uint64_t, ++ x0 = svqincw_pat_n_u64 (x0, SV_POW2, 15), ++ x0 = svqincw_pat (x0, SV_POW2, 15)) ++ ++/* ++** qincw_pat_n_16_u64: ++** uqincw x0, pow2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_16_u64, uint64_t, ++ x0 = svqincw_pat_n_u64 (x0, SV_POW2, 16), ++ x0 = svqincw_pat (x0, SV_POW2, 16)) ++ ++/* ++** qincw_pat_n_vl1_u64: ++** uqincw x0, vl1, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_vl1_u64, uint64_t, ++ x0 = svqincw_pat_n_u64 (x0, SV_VL1, 16), ++ x0 = svqincw_pat (x0, SV_VL1, 16)) ++ ++/* ++** qincw_pat_n_vl2_u64: ++** uqincw x0, vl2, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_vl2_u64, uint64_t, ++ x0 = svqincw_pat_n_u64 (x0, SV_VL2, 16), ++ x0 = svqincw_pat (x0, SV_VL2, 16)) ++ ++/* ++** qincw_pat_n_vl3_u64: ++** uqincw x0, vl3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_vl3_u64, uint64_t, ++ x0 = svqincw_pat_n_u64 (x0, SV_VL3, 16), ++ x0 = svqincw_pat (x0, SV_VL3, 16)) ++ ++/* ++** qincw_pat_n_vl4_u64: ++** uqincw x0, vl4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_vl4_u64, uint64_t, ++ x0 = svqincw_pat_n_u64 (x0, SV_VL4, 16), ++ x0 = svqincw_pat (x0, SV_VL4, 16)) ++ ++/* ++** qincw_pat_n_vl5_u64: ++** uqincw x0, vl5, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_vl5_u64, uint64_t, ++ x0 = svqincw_pat_n_u64 (x0, SV_VL5, 16), ++ x0 = svqincw_pat (x0, SV_VL5, 16)) ++ ++/* ++** qincw_pat_n_vl6_u64: ++** uqincw x0, vl6, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_vl6_u64, uint64_t, ++ x0 = svqincw_pat_n_u64 (x0, SV_VL6, 16), ++ x0 = svqincw_pat (x0, SV_VL6, 16)) ++ ++/* ++** qincw_pat_n_vl7_u64: ++** uqincw x0, vl7, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_vl7_u64, uint64_t, ++ x0 = svqincw_pat_n_u64 (x0, SV_VL7, 16), ++ x0 = svqincw_pat (x0, SV_VL7, 16)) ++ ++/* ++** qincw_pat_n_vl8_u64: ++** uqincw x0, vl8, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_vl8_u64, uint64_t, ++ x0 = svqincw_pat_n_u64 (x0, SV_VL8, 16), ++ x0 = svqincw_pat (x0, SV_VL8, 16)) ++ ++/* ++** qincw_pat_n_vl16_u64: ++** uqincw x0, vl16, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_vl16_u64, uint64_t, ++ x0 = svqincw_pat_n_u64 (x0, SV_VL16, 16), ++ x0 = svqincw_pat (x0, SV_VL16, 16)) ++ ++/* ++** qincw_pat_n_vl32_u64: ++** uqincw x0, vl32, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_vl32_u64, uint64_t, ++ x0 = svqincw_pat_n_u64 (x0, SV_VL32, 16), ++ x0 = svqincw_pat (x0, SV_VL32, 16)) ++ ++/* ++** qincw_pat_n_vl64_u64: ++** uqincw x0, vl64, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_vl64_u64, uint64_t, ++ x0 = svqincw_pat_n_u64 (x0, SV_VL64, 16), ++ x0 = svqincw_pat (x0, SV_VL64, 16)) ++ ++/* ++** qincw_pat_n_vl128_u64: ++** uqincw x0, vl128, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_vl128_u64, uint64_t, ++ x0 = svqincw_pat_n_u64 (x0, SV_VL128, 16), ++ x0 = svqincw_pat (x0, SV_VL128, 16)) ++ ++/* ++** qincw_pat_n_vl256_u64: ++** uqincw x0, vl256, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_vl256_u64, uint64_t, ++ x0 = svqincw_pat_n_u64 (x0, SV_VL256, 16), ++ x0 = svqincw_pat (x0, SV_VL256, 16)) ++ ++/* ++** qincw_pat_n_mul4_u64: ++** uqincw x0, mul4, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_mul4_u64, uint64_t, ++ x0 = svqincw_pat_n_u64 (x0, SV_MUL4, 16), ++ x0 = svqincw_pat (x0, SV_MUL4, 16)) ++ ++/* ++** qincw_pat_n_mul3_u64: ++** uqincw x0, mul3, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_mul3_u64, uint64_t, ++ x0 = svqincw_pat_n_u64 (x0, SV_MUL3, 16), ++ x0 = svqincw_pat (x0, SV_MUL3, 16)) ++ ++/* ++** qincw_pat_n_all_u64: ++** uqincw x0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_pat_n_all_u64, uint64_t, ++ x0 = svqincw_pat_n_u64 (x0, SV_ALL, 16), ++ x0 = svqincw_pat (x0, SV_ALL, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincw_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincw_s32.c +new file mode 100644 +index 000000000..031824acf +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincw_s32.c +@@ -0,0 +1,113 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qincw_1_s32_tied: ++** sqincw z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_1_s32_tied, svint32_t, ++ z0 = svqincw_s32 (z0, 1), ++ z0 = svqincw (z0, 1)) ++ ++/* ++** qincw_1_s32_untied: ++** movprfx z0, z1 ++** sqincw z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_1_s32_untied, svint32_t, ++ z0 = svqincw_s32 (z1, 1), ++ z0 = svqincw (z1, 1)) ++ ++/* ++** qincw_2_s32: ++** sqincw z0\.s, all, mul #2 ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_2_s32, svint32_t, ++ z0 = svqincw_s32 (z0, 2), ++ z0 = svqincw (z0, 2)) ++ ++/* ++** qincw_7_s32: ++** sqincw z0\.s, all, mul #7 ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_7_s32, svint32_t, ++ z0 = svqincw_s32 (z0, 7), ++ z0 = svqincw (z0, 7)) ++ ++/* ++** qincw_15_s32: ++** sqincw z0\.s, all, mul #15 ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_15_s32, svint32_t, ++ z0 = svqincw_s32 (z0, 15), ++ z0 = svqincw (z0, 15)) ++ ++/* ++** qincw_16_s32: ++** sqincw z0\.s, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_16_s32, svint32_t, ++ z0 = svqincw_s32 (z0, 16), ++ z0 = svqincw (z0, 16)) ++ ++/* ++** qincw_n_1_s32_tied: ++** sqincw x0, w0 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_n_1_s32_tied, int32_t, ++ x0 = svqincw_n_s32 (x0, 1), ++ x0 = svqincw (x0, 1)) ++ ++/* ++** qincw_n_1_s32_untied: ++** mov w0, w1 ++** sqincw x0, w0 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_n_1_s32_untied, int32_t, ++ x0 = svqincw_n_s32 (x1, 1), ++ x0 = svqincw (x1, 1)) ++ ++/* ++** qincw_n_2_s32: ++** sqincw x0, w0, all, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_n_2_s32, int32_t, ++ x0 = svqincw_n_s32 (x0, 2), ++ x0 = svqincw (x0, 2)) ++ ++/* ++** qincw_n_7_s32: ++** sqincw x0, w0, all, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_n_7_s32, int32_t, ++ x0 = svqincw_n_s32 (x0, 7), ++ x0 = svqincw (x0, 7)) ++ ++/* ++** qincw_n_15_s32: ++** sqincw x0, w0, all, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_n_15_s32, int32_t, ++ x0 = svqincw_n_s32 (x0, 15), ++ x0 = svqincw (x0, 15)) ++ ++/* ++** qincw_n_16_s32: ++** sqincw x0, w0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_n_16_s32, int32_t, ++ x0 = svqincw_n_s32 (x0, 16), ++ x0 = svqincw (x0, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincw_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincw_s64.c +new file mode 100644 +index 000000000..df61f909f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincw_s64.c +@@ -0,0 +1,58 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qincw_n_1_s64_tied: ++** sqincw x0 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_n_1_s64_tied, int64_t, ++ x0 = svqincw_n_s64 (x0, 1), ++ x0 = svqincw (x0, 1)) ++ ++/* ++** qincw_n_1_s64_untied: ++** mov x0, x1 ++** sqincw x0 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_n_1_s64_untied, int64_t, ++ x0 = svqincw_n_s64 (x1, 1), ++ x0 = svqincw (x1, 1)) ++ ++/* ++** qincw_n_2_s64: ++** sqincw x0, all, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_n_2_s64, int64_t, ++ x0 = svqincw_n_s64 (x0, 2), ++ x0 = svqincw (x0, 2)) ++ ++/* ++** qincw_n_7_s64: ++** sqincw x0, all, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_n_7_s64, int64_t, ++ x0 = svqincw_n_s64 (x0, 7), ++ x0 = svqincw (x0, 7)) ++ ++/* ++** qincw_n_15_s64: ++** sqincw x0, all, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_n_15_s64, int64_t, ++ x0 = svqincw_n_s64 (x0, 15), ++ x0 = svqincw (x0, 15)) ++ ++/* ++** qincw_n_16_s64: ++** sqincw x0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_n_16_s64, int64_t, ++ x0 = svqincw_n_s64 (x0, 16), ++ x0 = svqincw (x0, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincw_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincw_u32.c +new file mode 100644 +index 000000000..65a446ab6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincw_u32.c +@@ -0,0 +1,113 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qincw_1_u32_tied: ++** uqincw z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_1_u32_tied, svuint32_t, ++ z0 = svqincw_u32 (z0, 1), ++ z0 = svqincw (z0, 1)) ++ ++/* ++** qincw_1_u32_untied: ++** movprfx z0, z1 ++** uqincw z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_1_u32_untied, svuint32_t, ++ z0 = svqincw_u32 (z1, 1), ++ z0 = svqincw (z1, 1)) ++ ++/* ++** qincw_2_u32: ++** uqincw z0\.s, all, mul #2 ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_2_u32, svuint32_t, ++ z0 = svqincw_u32 (z0, 2), ++ z0 = svqincw (z0, 2)) ++ ++/* ++** qincw_7_u32: ++** uqincw z0\.s, all, mul #7 ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_7_u32, svuint32_t, ++ z0 = svqincw_u32 (z0, 7), ++ z0 = svqincw (z0, 7)) ++ ++/* ++** qincw_15_u32: ++** uqincw z0\.s, all, mul #15 ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_15_u32, svuint32_t, ++ z0 = svqincw_u32 (z0, 15), ++ z0 = svqincw (z0, 15)) ++ ++/* ++** qincw_16_u32: ++** uqincw z0\.s, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_Z (qincw_16_u32, svuint32_t, ++ z0 = svqincw_u32 (z0, 16), ++ z0 = svqincw (z0, 16)) ++ ++/* ++** qincw_n_1_u32_tied: ++** uqincw w0 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_n_1_u32_tied, uint32_t, ++ x0 = svqincw_n_u32 (x0, 1), ++ x0 = svqincw (x0, 1)) ++ ++/* ++** qincw_n_1_u32_untied: ++** mov w0, w1 ++** uqincw w0 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_n_1_u32_untied, uint32_t, ++ x0 = svqincw_n_u32 (x1, 1), ++ x0 = svqincw (x1, 1)) ++ ++/* ++** qincw_n_2_u32: ++** uqincw w0, all, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_n_2_u32, uint32_t, ++ x0 = svqincw_n_u32 (x0, 2), ++ x0 = svqincw (x0, 2)) ++ ++/* ++** qincw_n_7_u32: ++** uqincw w0, all, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_n_7_u32, uint32_t, ++ x0 = svqincw_n_u32 (x0, 7), ++ x0 = svqincw (x0, 7)) ++ ++/* ++** qincw_n_15_u32: ++** uqincw w0, all, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_n_15_u32, uint32_t, ++ x0 = svqincw_n_u32 (x0, 15), ++ x0 = svqincw (x0, 15)) ++ ++/* ++** qincw_n_16_u32: ++** uqincw w0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_n_16_u32, uint32_t, ++ x0 = svqincw_n_u32 (x0, 16), ++ x0 = svqincw (x0, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincw_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincw_u64.c +new file mode 100644 +index 000000000..806a79945 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincw_u64.c +@@ -0,0 +1,58 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qincw_n_1_u64_tied: ++** uqincw x0 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_n_1_u64_tied, uint64_t, ++ x0 = svqincw_n_u64 (x0, 1), ++ x0 = svqincw (x0, 1)) ++ ++/* ++** qincw_n_1_u64_untied: ++** mov x0, x1 ++** uqincw x0 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_n_1_u64_untied, uint64_t, ++ x0 = svqincw_n_u64 (x1, 1), ++ x0 = svqincw (x1, 1)) ++ ++/* ++** qincw_n_2_u64: ++** uqincw x0, all, mul #2 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_n_2_u64, uint64_t, ++ x0 = svqincw_n_u64 (x0, 2), ++ x0 = svqincw (x0, 2)) ++ ++/* ++** qincw_n_7_u64: ++** uqincw x0, all, mul #7 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_n_7_u64, uint64_t, ++ x0 = svqincw_n_u64 (x0, 7), ++ x0 = svqincw (x0, 7)) ++ ++/* ++** qincw_n_15_u64: ++** uqincw x0, all, mul #15 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_n_15_u64, uint64_t, ++ x0 = svqincw_n_u64 (x0, 15), ++ x0 = svqincw (x0, 15)) ++ ++/* ++** qincw_n_16_u64: ++** uqincw x0, all, mul #16 ++** ret ++*/ ++TEST_UNIFORM_S (qincw_n_16_u64, uint64_t, ++ x0 = svqincw_n_u64 (x0, 16), ++ x0 = svqincw (x0, 16)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qsub_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qsub_s16.c +new file mode 100644 +index 000000000..8dd8381dc +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qsub_s16.c +@@ -0,0 +1,123 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qsub_s16_tied1: ++** sqsub z0\.h, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_s16_tied1, svint16_t, ++ z0 = svqsub_s16 (z0, z1), ++ z0 = svqsub (z0, z1)) ++ ++/* ++** qsub_s16_tied2: ++** sqsub z0\.h, z1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_s16_tied2, svint16_t, ++ z0 = svqsub_s16 (z1, z0), ++ z0 = svqsub (z1, z0)) ++ ++/* ++** qsub_s16_untied: ++** sqsub z0\.h, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_s16_untied, svint16_t, ++ z0 = svqsub_s16 (z1, z2), ++ z0 = svqsub (z1, z2)) ++ ++/* ++** qsub_w0_s16_tied1: ++** mov (z[0-9]+\.h), w0 ++** sqsub z0\.h, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (qsub_w0_s16_tied1, svint16_t, int16_t, ++ z0 = svqsub_n_s16 (z0, x0), ++ z0 = svqsub (z0, x0)) ++ ++/* ++** qsub_w0_s16_untied: ++** mov (z[0-9]+\.h), w0 ++** sqsub z0\.h, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (qsub_w0_s16_untied, svint16_t, int16_t, ++ z0 = svqsub_n_s16 (z1, x0), ++ z0 = svqsub (z1, x0)) ++ ++/* ++** qsub_1_s16_tied1: ++** sqsub z0\.h, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_1_s16_tied1, svint16_t, ++ z0 = svqsub_n_s16 (z0, 1), ++ z0 = svqsub (z0, 1)) ++ ++/* ++** qsub_1_s16_untied: ++** movprfx z0, z1 ++** sqsub z0\.h, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_1_s16_untied, svint16_t, ++ z0 = svqsub_n_s16 (z1, 1), ++ z0 = svqsub (z1, 1)) ++ ++/* ++** qsub_127_s16: ++** sqsub z0\.h, z0\.h, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_127_s16, svint16_t, ++ z0 = svqsub_n_s16 (z0, 127), ++ z0 = svqsub (z0, 127)) ++ ++/* ++** qsub_128_s16: ++** sqsub z0\.h, z0\.h, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_128_s16, svint16_t, ++ z0 = svqsub_n_s16 (z0, 128), ++ z0 = svqsub (z0, 128)) ++ ++/* ++** qsub_255_s16: ++** sqsub z0\.h, z0\.h, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_255_s16, svint16_t, ++ z0 = svqsub_n_s16 (z0, 255), ++ z0 = svqsub (z0, 255)) ++ ++/* ++** qsub_m1_s16: ++** sqadd z0\.h, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_m1_s16, svint16_t, ++ z0 = svqsub_n_s16 (z0, -1), ++ z0 = svqsub (z0, -1)) ++ ++/* ++** qsub_m127_s16: ++** sqadd z0\.h, z0\.h, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_m127_s16, svint16_t, ++ z0 = svqsub_n_s16 (z0, -127), ++ z0 = svqsub (z0, -127)) ++ ++/* ++** qsub_m128_s16: ++** sqadd z0\.h, z0\.h, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_m128_s16, svint16_t, ++ z0 = svqsub_n_s16 (z0, -128), ++ z0 = svqsub (z0, -128)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qsub_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qsub_s32.c +new file mode 100644 +index 000000000..920736aec +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qsub_s32.c +@@ -0,0 +1,123 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qsub_s32_tied1: ++** sqsub z0\.s, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_s32_tied1, svint32_t, ++ z0 = svqsub_s32 (z0, z1), ++ z0 = svqsub (z0, z1)) ++ ++/* ++** qsub_s32_tied2: ++** sqsub z0\.s, z1\.s, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_s32_tied2, svint32_t, ++ z0 = svqsub_s32 (z1, z0), ++ z0 = svqsub (z1, z0)) ++ ++/* ++** qsub_s32_untied: ++** sqsub z0\.s, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_s32_untied, svint32_t, ++ z0 = svqsub_s32 (z1, z2), ++ z0 = svqsub (z1, z2)) ++ ++/* ++** qsub_w0_s32_tied1: ++** mov (z[0-9]+\.s), w0 ++** sqsub z0\.s, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (qsub_w0_s32_tied1, svint32_t, int32_t, ++ z0 = svqsub_n_s32 (z0, x0), ++ z0 = svqsub (z0, x0)) ++ ++/* ++** qsub_w0_s32_untied: ++** mov (z[0-9]+\.s), w0 ++** sqsub z0\.s, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (qsub_w0_s32_untied, svint32_t, int32_t, ++ z0 = svqsub_n_s32 (z1, x0), ++ z0 = svqsub (z1, x0)) ++ ++/* ++** qsub_1_s32_tied1: ++** sqsub z0\.s, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_1_s32_tied1, svint32_t, ++ z0 = svqsub_n_s32 (z0, 1), ++ z0 = svqsub (z0, 1)) ++ ++/* ++** qsub_1_s32_untied: ++** movprfx z0, z1 ++** sqsub z0\.s, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_1_s32_untied, svint32_t, ++ z0 = svqsub_n_s32 (z1, 1), ++ z0 = svqsub (z1, 1)) ++ ++/* ++** qsub_127_s32: ++** sqsub z0\.s, z0\.s, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_127_s32, svint32_t, ++ z0 = svqsub_n_s32 (z0, 127), ++ z0 = svqsub (z0, 127)) ++ ++/* ++** qsub_128_s32: ++** sqsub z0\.s, z0\.s, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_128_s32, svint32_t, ++ z0 = svqsub_n_s32 (z0, 128), ++ z0 = svqsub (z0, 128)) ++ ++/* ++** qsub_255_s32: ++** sqsub z0\.s, z0\.s, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_255_s32, svint32_t, ++ z0 = svqsub_n_s32 (z0, 255), ++ z0 = svqsub (z0, 255)) ++ ++/* ++** qsub_m1_s32: ++** sqadd z0\.s, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_m1_s32, svint32_t, ++ z0 = svqsub_n_s32 (z0, -1), ++ z0 = svqsub (z0, -1)) ++ ++/* ++** qsub_m127_s32: ++** sqadd z0\.s, z0\.s, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_m127_s32, svint32_t, ++ z0 = svqsub_n_s32 (z0, -127), ++ z0 = svqsub (z0, -127)) ++ ++/* ++** qsub_m128_s32: ++** sqadd z0\.s, z0\.s, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_m128_s32, svint32_t, ++ z0 = svqsub_n_s32 (z0, -128), ++ z0 = svqsub (z0, -128)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qsub_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qsub_s64.c +new file mode 100644 +index 000000000..3d0fc2bcc +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qsub_s64.c +@@ -0,0 +1,123 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qsub_s64_tied1: ++** sqsub z0\.d, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_s64_tied1, svint64_t, ++ z0 = svqsub_s64 (z0, z1), ++ z0 = svqsub (z0, z1)) ++ ++/* ++** qsub_s64_tied2: ++** sqsub z0\.d, z1\.d, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_s64_tied2, svint64_t, ++ z0 = svqsub_s64 (z1, z0), ++ z0 = svqsub (z1, z0)) ++ ++/* ++** qsub_s64_untied: ++** sqsub z0\.d, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_s64_untied, svint64_t, ++ z0 = svqsub_s64 (z1, z2), ++ z0 = svqsub (z1, z2)) ++ ++/* ++** qsub_x0_s64_tied1: ++** mov (z[0-9]+\.d), x0 ++** sqsub z0\.d, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (qsub_x0_s64_tied1, svint64_t, int64_t, ++ z0 = svqsub_n_s64 (z0, x0), ++ z0 = svqsub (z0, x0)) ++ ++/* ++** qsub_x0_s64_untied: ++** mov (z[0-9]+\.d), x0 ++** sqsub z0\.d, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (qsub_x0_s64_untied, svint64_t, int64_t, ++ z0 = svqsub_n_s64 (z1, x0), ++ z0 = svqsub (z1, x0)) ++ ++/* ++** qsub_1_s64_tied1: ++** sqsub z0\.d, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_1_s64_tied1, svint64_t, ++ z0 = svqsub_n_s64 (z0, 1), ++ z0 = svqsub (z0, 1)) ++ ++/* ++** qsub_1_s64_untied: ++** movprfx z0, z1 ++** sqsub z0\.d, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_1_s64_untied, svint64_t, ++ z0 = svqsub_n_s64 (z1, 1), ++ z0 = svqsub (z1, 1)) ++ ++/* ++** qsub_127_s64: ++** sqsub z0\.d, z0\.d, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_127_s64, svint64_t, ++ z0 = svqsub_n_s64 (z0, 127), ++ z0 = svqsub (z0, 127)) ++ ++/* ++** qsub_128_s64: ++** sqsub z0\.d, z0\.d, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_128_s64, svint64_t, ++ z0 = svqsub_n_s64 (z0, 128), ++ z0 = svqsub (z0, 128)) ++ ++/* ++** qsub_255_s64: ++** sqsub z0\.d, z0\.d, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_255_s64, svint64_t, ++ z0 = svqsub_n_s64 (z0, 255), ++ z0 = svqsub (z0, 255)) ++ ++/* ++** qsub_m1_s64: ++** sqadd z0\.d, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_m1_s64, svint64_t, ++ z0 = svqsub_n_s64 (z0, -1), ++ z0 = svqsub (z0, -1)) ++ ++/* ++** qsub_m127_s64: ++** sqadd z0\.d, z0\.d, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_m127_s64, svint64_t, ++ z0 = svqsub_n_s64 (z0, -127), ++ z0 = svqsub (z0, -127)) ++ ++/* ++** qsub_m128_s64: ++** sqadd z0\.d, z0\.d, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_m128_s64, svint64_t, ++ z0 = svqsub_n_s64 (z0, -128), ++ z0 = svqsub (z0, -128)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qsub_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qsub_s8.c +new file mode 100644 +index 000000000..3e7e84c77 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qsub_s8.c +@@ -0,0 +1,123 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qsub_s8_tied1: ++** sqsub z0\.b, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_s8_tied1, svint8_t, ++ z0 = svqsub_s8 (z0, z1), ++ z0 = svqsub (z0, z1)) ++ ++/* ++** qsub_s8_tied2: ++** sqsub z0\.b, z1\.b, z0\.b ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_s8_tied2, svint8_t, ++ z0 = svqsub_s8 (z1, z0), ++ z0 = svqsub (z1, z0)) ++ ++/* ++** qsub_s8_untied: ++** sqsub z0\.b, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_s8_untied, svint8_t, ++ z0 = svqsub_s8 (z1, z2), ++ z0 = svqsub (z1, z2)) ++ ++/* ++** qsub_w0_s8_tied1: ++** mov (z[0-9]+\.b), w0 ++** sqsub z0\.b, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (qsub_w0_s8_tied1, svint8_t, int8_t, ++ z0 = svqsub_n_s8 (z0, x0), ++ z0 = svqsub (z0, x0)) ++ ++/* ++** qsub_w0_s8_untied: ++** mov (z[0-9]+\.b), w0 ++** sqsub z0\.b, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (qsub_w0_s8_untied, svint8_t, int8_t, ++ z0 = svqsub_n_s8 (z1, x0), ++ z0 = svqsub (z1, x0)) ++ ++/* ++** qsub_1_s8_tied1: ++** sqsub z0\.b, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_1_s8_tied1, svint8_t, ++ z0 = svqsub_n_s8 (z0, 1), ++ z0 = svqsub (z0, 1)) ++ ++/* ++** qsub_1_s8_untied: ++** movprfx z0, z1 ++** sqsub z0\.b, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_1_s8_untied, svint8_t, ++ z0 = svqsub_n_s8 (z1, 1), ++ z0 = svqsub (z1, 1)) ++ ++/* ++** qsub_127_s8: ++** sqsub z0\.b, z0\.b, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_127_s8, svint8_t, ++ z0 = svqsub_n_s8 (z0, 127), ++ z0 = svqsub (z0, 127)) ++ ++/* ++** qsub_128_s8: ++** sqadd z0\.b, z0\.b, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_128_s8, svint8_t, ++ z0 = svqsub_n_s8 (z0, 128), ++ z0 = svqsub (z0, 128)) ++ ++/* ++** qsub_255_s8: ++** sqadd z0\.b, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_255_s8, svint8_t, ++ z0 = svqsub_n_s8 (z0, 255), ++ z0 = svqsub (z0, 255)) ++ ++/* ++** qsub_m1_s8: ++** sqadd z0\.b, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_m1_s8, svint8_t, ++ z0 = svqsub_n_s8 (z0, -1), ++ z0 = svqsub (z0, -1)) ++ ++/* ++** qsub_m127_s8: ++** sqadd z0\.b, z0\.b, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_m127_s8, svint8_t, ++ z0 = svqsub_n_s8 (z0, -127), ++ z0 = svqsub (z0, -127)) ++ ++/* ++** qsub_m128_s8: ++** sqadd z0\.b, z0\.b, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_m128_s8, svint8_t, ++ z0 = svqsub_n_s8 (z0, -128), ++ z0 = svqsub (z0, -128)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qsub_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qsub_u16.c +new file mode 100644 +index 000000000..6d4d68e20 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qsub_u16.c +@@ -0,0 +1,126 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qsub_u16_tied1: ++** uqsub z0\.h, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_u16_tied1, svuint16_t, ++ z0 = svqsub_u16 (z0, z1), ++ z0 = svqsub (z0, z1)) ++ ++/* ++** qsub_u16_tied2: ++** uqsub z0\.h, z1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_u16_tied2, svuint16_t, ++ z0 = svqsub_u16 (z1, z0), ++ z0 = svqsub (z1, z0)) ++ ++/* ++** qsub_u16_untied: ++** uqsub z0\.h, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_u16_untied, svuint16_t, ++ z0 = svqsub_u16 (z1, z2), ++ z0 = svqsub (z1, z2)) ++ ++/* ++** qsub_w0_u16_tied1: ++** mov (z[0-9]+\.h), w0 ++** uqsub z0\.h, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (qsub_w0_u16_tied1, svuint16_t, uint16_t, ++ z0 = svqsub_n_u16 (z0, x0), ++ z0 = svqsub (z0, x0)) ++ ++/* ++** qsub_w0_u16_untied: ++** mov (z[0-9]+\.h), w0 ++** uqsub z0\.h, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (qsub_w0_u16_untied, svuint16_t, uint16_t, ++ z0 = svqsub_n_u16 (z1, x0), ++ z0 = svqsub (z1, x0)) ++ ++/* ++** qsub_1_u16_tied1: ++** uqsub z0\.h, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_1_u16_tied1, svuint16_t, ++ z0 = svqsub_n_u16 (z0, 1), ++ z0 = svqsub (z0, 1)) ++ ++/* ++** qsub_1_u16_untied: ++** movprfx z0, z1 ++** uqsub z0\.h, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_1_u16_untied, svuint16_t, ++ z0 = svqsub_n_u16 (z1, 1), ++ z0 = svqsub (z1, 1)) ++ ++/* ++** qsub_127_u16: ++** uqsub z0\.h, z0\.h, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_127_u16, svuint16_t, ++ z0 = svqsub_n_u16 (z0, 127), ++ z0 = svqsub (z0, 127)) ++ ++/* ++** qsub_128_u16: ++** uqsub z0\.h, z0\.h, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_128_u16, svuint16_t, ++ z0 = svqsub_n_u16 (z0, 128), ++ z0 = svqsub (z0, 128)) ++ ++/* ++** qsub_255_u16: ++** uqsub z0\.h, z0\.h, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_255_u16, svuint16_t, ++ z0 = svqsub_n_u16 (z0, 255), ++ z0 = svqsub (z0, 255)) ++ ++/* ++** qsub_m1_u16: ++** mov (z[0-9]+)\.b, #-1 ++** uqsub z0\.h, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_m1_u16, svuint16_t, ++ z0 = svqsub_n_u16 (z0, -1), ++ z0 = svqsub (z0, -1)) ++ ++/* ++** qsub_m127_u16: ++** mov (z[0-9]+\.h), #-127 ++** uqsub z0\.h, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_m127_u16, svuint16_t, ++ z0 = svqsub_n_u16 (z0, -127), ++ z0 = svqsub (z0, -127)) ++ ++/* ++** qsub_m128_u16: ++** mov (z[0-9]+\.h), #-128 ++** uqsub z0\.h, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_m128_u16, svuint16_t, ++ z0 = svqsub_n_u16 (z0, -128), ++ z0 = svqsub (z0, -128)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qsub_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qsub_u32.c +new file mode 100644 +index 000000000..9c93cfc45 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qsub_u32.c +@@ -0,0 +1,126 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qsub_u32_tied1: ++** uqsub z0\.s, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_u32_tied1, svuint32_t, ++ z0 = svqsub_u32 (z0, z1), ++ z0 = svqsub (z0, z1)) ++ ++/* ++** qsub_u32_tied2: ++** uqsub z0\.s, z1\.s, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_u32_tied2, svuint32_t, ++ z0 = svqsub_u32 (z1, z0), ++ z0 = svqsub (z1, z0)) ++ ++/* ++** qsub_u32_untied: ++** uqsub z0\.s, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_u32_untied, svuint32_t, ++ z0 = svqsub_u32 (z1, z2), ++ z0 = svqsub (z1, z2)) ++ ++/* ++** qsub_w0_u32_tied1: ++** mov (z[0-9]+\.s), w0 ++** uqsub z0\.s, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (qsub_w0_u32_tied1, svuint32_t, uint32_t, ++ z0 = svqsub_n_u32 (z0, x0), ++ z0 = svqsub (z0, x0)) ++ ++/* ++** qsub_w0_u32_untied: ++** mov (z[0-9]+\.s), w0 ++** uqsub z0\.s, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (qsub_w0_u32_untied, svuint32_t, uint32_t, ++ z0 = svqsub_n_u32 (z1, x0), ++ z0 = svqsub (z1, x0)) ++ ++/* ++** qsub_1_u32_tied1: ++** uqsub z0\.s, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_1_u32_tied1, svuint32_t, ++ z0 = svqsub_n_u32 (z0, 1), ++ z0 = svqsub (z0, 1)) ++ ++/* ++** qsub_1_u32_untied: ++** movprfx z0, z1 ++** uqsub z0\.s, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_1_u32_untied, svuint32_t, ++ z0 = svqsub_n_u32 (z1, 1), ++ z0 = svqsub (z1, 1)) ++ ++/* ++** qsub_127_u32: ++** uqsub z0\.s, z0\.s, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_127_u32, svuint32_t, ++ z0 = svqsub_n_u32 (z0, 127), ++ z0 = svqsub (z0, 127)) ++ ++/* ++** qsub_128_u32: ++** uqsub z0\.s, z0\.s, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_128_u32, svuint32_t, ++ z0 = svqsub_n_u32 (z0, 128), ++ z0 = svqsub (z0, 128)) ++ ++/* ++** qsub_255_u32: ++** uqsub z0\.s, z0\.s, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_255_u32, svuint32_t, ++ z0 = svqsub_n_u32 (z0, 255), ++ z0 = svqsub (z0, 255)) ++ ++/* ++** qsub_m1_u32: ++** mov (z[0-9]+)\.b, #-1 ++** uqsub z0\.s, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_m1_u32, svuint32_t, ++ z0 = svqsub_n_u32 (z0, -1), ++ z0 = svqsub (z0, -1)) ++ ++/* ++** qsub_m127_u32: ++** mov (z[0-9]+\.s), #-127 ++** uqsub z0\.s, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_m127_u32, svuint32_t, ++ z0 = svqsub_n_u32 (z0, -127), ++ z0 = svqsub (z0, -127)) ++ ++/* ++** qsub_m128_u32: ++** mov (z[0-9]+\.s), #-128 ++** uqsub z0\.s, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_m128_u32, svuint32_t, ++ z0 = svqsub_n_u32 (z0, -128), ++ z0 = svqsub (z0, -128)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qsub_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qsub_u64.c +new file mode 100644 +index 000000000..6109b5f29 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qsub_u64.c +@@ -0,0 +1,126 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qsub_u64_tied1: ++** uqsub z0\.d, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_u64_tied1, svuint64_t, ++ z0 = svqsub_u64 (z0, z1), ++ z0 = svqsub (z0, z1)) ++ ++/* ++** qsub_u64_tied2: ++** uqsub z0\.d, z1\.d, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_u64_tied2, svuint64_t, ++ z0 = svqsub_u64 (z1, z0), ++ z0 = svqsub (z1, z0)) ++ ++/* ++** qsub_u64_untied: ++** uqsub z0\.d, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_u64_untied, svuint64_t, ++ z0 = svqsub_u64 (z1, z2), ++ z0 = svqsub (z1, z2)) ++ ++/* ++** qsub_x0_u64_tied1: ++** mov (z[0-9]+\.d), x0 ++** uqsub z0\.d, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (qsub_x0_u64_tied1, svuint64_t, uint64_t, ++ z0 = svqsub_n_u64 (z0, x0), ++ z0 = svqsub (z0, x0)) ++ ++/* ++** qsub_x0_u64_untied: ++** mov (z[0-9]+\.d), x0 ++** uqsub z0\.d, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (qsub_x0_u64_untied, svuint64_t, uint64_t, ++ z0 = svqsub_n_u64 (z1, x0), ++ z0 = svqsub (z1, x0)) ++ ++/* ++** qsub_1_u64_tied1: ++** uqsub z0\.d, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_1_u64_tied1, svuint64_t, ++ z0 = svqsub_n_u64 (z0, 1), ++ z0 = svqsub (z0, 1)) ++ ++/* ++** qsub_1_u64_untied: ++** movprfx z0, z1 ++** uqsub z0\.d, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_1_u64_untied, svuint64_t, ++ z0 = svqsub_n_u64 (z1, 1), ++ z0 = svqsub (z1, 1)) ++ ++/* ++** qsub_127_u64: ++** uqsub z0\.d, z0\.d, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_127_u64, svuint64_t, ++ z0 = svqsub_n_u64 (z0, 127), ++ z0 = svqsub (z0, 127)) ++ ++/* ++** qsub_128_u64: ++** uqsub z0\.d, z0\.d, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_128_u64, svuint64_t, ++ z0 = svqsub_n_u64 (z0, 128), ++ z0 = svqsub (z0, 128)) ++ ++/* ++** qsub_255_u64: ++** uqsub z0\.d, z0\.d, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_255_u64, svuint64_t, ++ z0 = svqsub_n_u64 (z0, 255), ++ z0 = svqsub (z0, 255)) ++ ++/* ++** qsub_m1_u64: ++** mov (z[0-9]+)\.b, #-1 ++** uqsub z0\.d, z0\.d, \1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_m1_u64, svuint64_t, ++ z0 = svqsub_n_u64 (z0, -1), ++ z0 = svqsub (z0, -1)) ++ ++/* ++** qsub_m127_u64: ++** mov (z[0-9]+\.d), #-127 ++** uqsub z0\.d, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_m127_u64, svuint64_t, ++ z0 = svqsub_n_u64 (z0, -127), ++ z0 = svqsub (z0, -127)) ++ ++/* ++** qsub_m128_u64: ++** mov (z[0-9]+\.d), #-128 ++** uqsub z0\.d, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_m128_u64, svuint64_t, ++ z0 = svqsub_n_u64 (z0, -128), ++ z0 = svqsub (z0, -128)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qsub_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qsub_u8.c +new file mode 100644 +index 000000000..40aa74e8d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qsub_u8.c +@@ -0,0 +1,123 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** qsub_u8_tied1: ++** uqsub z0\.b, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_u8_tied1, svuint8_t, ++ z0 = svqsub_u8 (z0, z1), ++ z0 = svqsub (z0, z1)) ++ ++/* ++** qsub_u8_tied2: ++** uqsub z0\.b, z1\.b, z0\.b ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_u8_tied2, svuint8_t, ++ z0 = svqsub_u8 (z1, z0), ++ z0 = svqsub (z1, z0)) ++ ++/* ++** qsub_u8_untied: ++** uqsub z0\.b, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_u8_untied, svuint8_t, ++ z0 = svqsub_u8 (z1, z2), ++ z0 = svqsub (z1, z2)) ++ ++/* ++** qsub_w0_u8_tied1: ++** mov (z[0-9]+\.b), w0 ++** uqsub z0\.b, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (qsub_w0_u8_tied1, svuint8_t, uint8_t, ++ z0 = svqsub_n_u8 (z0, x0), ++ z0 = svqsub (z0, x0)) ++ ++/* ++** qsub_w0_u8_untied: ++** mov (z[0-9]+\.b), w0 ++** uqsub z0\.b, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (qsub_w0_u8_untied, svuint8_t, uint8_t, ++ z0 = svqsub_n_u8 (z1, x0), ++ z0 = svqsub (z1, x0)) ++ ++/* ++** qsub_1_u8_tied1: ++** uqsub z0\.b, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_1_u8_tied1, svuint8_t, ++ z0 = svqsub_n_u8 (z0, 1), ++ z0 = svqsub (z0, 1)) ++ ++/* ++** qsub_1_u8_untied: ++** movprfx z0, z1 ++** uqsub z0\.b, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_1_u8_untied, svuint8_t, ++ z0 = svqsub_n_u8 (z1, 1), ++ z0 = svqsub (z1, 1)) ++ ++/* ++** qsub_127_u8: ++** uqsub z0\.b, z0\.b, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_127_u8, svuint8_t, ++ z0 = svqsub_n_u8 (z0, 127), ++ z0 = svqsub (z0, 127)) ++ ++/* ++** qsub_128_u8: ++** uqsub z0\.b, z0\.b, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_128_u8, svuint8_t, ++ z0 = svqsub_n_u8 (z0, 128), ++ z0 = svqsub (z0, 128)) ++ ++/* ++** qsub_255_u8: ++** uqsub z0\.b, z0\.b, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_255_u8, svuint8_t, ++ z0 = svqsub_n_u8 (z0, 255), ++ z0 = svqsub (z0, 255)) ++ ++/* ++** qsub_m1_u8: ++** uqsub z0\.b, z0\.b, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_m1_u8, svuint8_t, ++ z0 = svqsub_n_u8 (z0, -1), ++ z0 = svqsub (z0, -1)) ++ ++/* ++** qsub_m127_u8: ++** uqsub z0\.b, z0\.b, #129 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_m127_u8, svuint8_t, ++ z0 = svqsub_n_u8 (z0, -127), ++ z0 = svqsub (z0, -127)) ++ ++/* ++** qsub_m128_u8: ++** uqsub z0\.b, z0\.b, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (qsub_m128_u8, svuint8_t, ++ z0 = svqsub_n_u8 (z0, -128), ++ z0 = svqsub (z0, -128)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_s16.c +new file mode 100644 +index 000000000..4f794f600 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_s16.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** rbit_s16_m_tied12: ++** rbit z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_s16_m_tied12, svint16_t, ++ z0 = svrbit_s16_m (z0, p0, z0), ++ z0 = svrbit_m (z0, p0, z0)) ++ ++/* ++** rbit_s16_m_tied1: ++** rbit z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_s16_m_tied1, svint16_t, ++ z0 = svrbit_s16_m (z0, p0, z1), ++ z0 = svrbit_m (z0, p0, z1)) ++ ++/* ++** rbit_s16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** rbit z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_s16_m_tied2, svint16_t, ++ z0 = svrbit_s16_m (z1, p0, z0), ++ z0 = svrbit_m (z1, p0, z0)) ++ ++/* ++** rbit_s16_m_untied: ++** movprfx z0, z2 ++** rbit z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_s16_m_untied, svint16_t, ++ z0 = svrbit_s16_m (z2, p0, z1), ++ z0 = svrbit_m (z2, p0, z1)) ++ ++/* ++** rbit_s16_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.h, p0/z, \1\.h ++** rbit z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_s16_z_tied1, svint16_t, ++ z0 = svrbit_s16_z (p0, z0), ++ z0 = svrbit_z (p0, z0)) ++ ++/* ++** rbit_s16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** rbit z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_s16_z_untied, svint16_t, ++ z0 = svrbit_s16_z (p0, z1), ++ z0 = svrbit_z (p0, z1)) ++ ++/* ++** rbit_s16_x_tied1: ++** rbit z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_s16_x_tied1, svint16_t, ++ z0 = svrbit_s16_x (p0, z0), ++ z0 = svrbit_x (p0, z0)) ++ ++/* ++** rbit_s16_x_untied: ++** rbit z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_s16_x_untied, svint16_t, ++ z0 = svrbit_s16_x (p0, z1), ++ z0 = svrbit_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_s32.c +new file mode 100644 +index 000000000..8b5e1a463 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_s32.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** rbit_s32_m_tied12: ++** rbit z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_s32_m_tied12, svint32_t, ++ z0 = svrbit_s32_m (z0, p0, z0), ++ z0 = svrbit_m (z0, p0, z0)) ++ ++/* ++** rbit_s32_m_tied1: ++** rbit z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_s32_m_tied1, svint32_t, ++ z0 = svrbit_s32_m (z0, p0, z1), ++ z0 = svrbit_m (z0, p0, z1)) ++ ++/* ++** rbit_s32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** rbit z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_s32_m_tied2, svint32_t, ++ z0 = svrbit_s32_m (z1, p0, z0), ++ z0 = svrbit_m (z1, p0, z0)) ++ ++/* ++** rbit_s32_m_untied: ++** movprfx z0, z2 ++** rbit z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_s32_m_untied, svint32_t, ++ z0 = svrbit_s32_m (z2, p0, z1), ++ z0 = svrbit_m (z2, p0, z1)) ++ ++/* ++** rbit_s32_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, \1\.s ++** rbit z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_s32_z_tied1, svint32_t, ++ z0 = svrbit_s32_z (p0, z0), ++ z0 = svrbit_z (p0, z0)) ++ ++/* ++** rbit_s32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** rbit z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_s32_z_untied, svint32_t, ++ z0 = svrbit_s32_z (p0, z1), ++ z0 = svrbit_z (p0, z1)) ++ ++/* ++** rbit_s32_x_tied1: ++** rbit z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_s32_x_tied1, svint32_t, ++ z0 = svrbit_s32_x (p0, z0), ++ z0 = svrbit_x (p0, z0)) ++ ++/* ++** rbit_s32_x_untied: ++** rbit z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_s32_x_untied, svint32_t, ++ z0 = svrbit_s32_x (p0, z1), ++ z0 = svrbit_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_s64.c +new file mode 100644 +index 000000000..cec27a421 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_s64.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** rbit_s64_m_tied12: ++** rbit z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_s64_m_tied12, svint64_t, ++ z0 = svrbit_s64_m (z0, p0, z0), ++ z0 = svrbit_m (z0, p0, z0)) ++ ++/* ++** rbit_s64_m_tied1: ++** rbit z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_s64_m_tied1, svint64_t, ++ z0 = svrbit_s64_m (z0, p0, z1), ++ z0 = svrbit_m (z0, p0, z1)) ++ ++/* ++** rbit_s64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** rbit z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_s64_m_tied2, svint64_t, ++ z0 = svrbit_s64_m (z1, p0, z0), ++ z0 = svrbit_m (z1, p0, z0)) ++ ++/* ++** rbit_s64_m_untied: ++** movprfx z0, z2 ++** rbit z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_s64_m_untied, svint64_t, ++ z0 = svrbit_s64_m (z2, p0, z1), ++ z0 = svrbit_m (z2, p0, z1)) ++ ++/* ++** rbit_s64_z_tied1: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, \1 ++** rbit z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_s64_z_tied1, svint64_t, ++ z0 = svrbit_s64_z (p0, z0), ++ z0 = svrbit_z (p0, z0)) ++ ++/* ++** rbit_s64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** rbit z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_s64_z_untied, svint64_t, ++ z0 = svrbit_s64_z (p0, z1), ++ z0 = svrbit_z (p0, z1)) ++ ++/* ++** rbit_s64_x_tied1: ++** rbit z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_s64_x_tied1, svint64_t, ++ z0 = svrbit_s64_x (p0, z0), ++ z0 = svrbit_x (p0, z0)) ++ ++/* ++** rbit_s64_x_untied: ++** rbit z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_s64_x_untied, svint64_t, ++ z0 = svrbit_s64_x (p0, z1), ++ z0 = svrbit_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_s8.c +new file mode 100644 +index 000000000..9c152116a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_s8.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** rbit_s8_m_tied12: ++** rbit z0\.b, p0/m, z0\.b ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_s8_m_tied12, svint8_t, ++ z0 = svrbit_s8_m (z0, p0, z0), ++ z0 = svrbit_m (z0, p0, z0)) ++ ++/* ++** rbit_s8_m_tied1: ++** rbit z0\.b, p0/m, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_s8_m_tied1, svint8_t, ++ z0 = svrbit_s8_m (z0, p0, z1), ++ z0 = svrbit_m (z0, p0, z1)) ++ ++/* ++** rbit_s8_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** rbit z0\.b, p0/m, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_s8_m_tied2, svint8_t, ++ z0 = svrbit_s8_m (z1, p0, z0), ++ z0 = svrbit_m (z1, p0, z0)) ++ ++/* ++** rbit_s8_m_untied: ++** movprfx z0, z2 ++** rbit z0\.b, p0/m, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_s8_m_untied, svint8_t, ++ z0 = svrbit_s8_m (z2, p0, z1), ++ z0 = svrbit_m (z2, p0, z1)) ++ ++/* ++** rbit_s8_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.b, p0/z, \1\.b ++** rbit z0\.b, p0/m, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_s8_z_tied1, svint8_t, ++ z0 = svrbit_s8_z (p0, z0), ++ z0 = svrbit_z (p0, z0)) ++ ++/* ++** rbit_s8_z_untied: ++** movprfx z0\.b, p0/z, z1\.b ++** rbit z0\.b, p0/m, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_s8_z_untied, svint8_t, ++ z0 = svrbit_s8_z (p0, z1), ++ z0 = svrbit_z (p0, z1)) ++ ++/* ++** rbit_s8_x_tied1: ++** rbit z0\.b, p0/m, z0\.b ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_s8_x_tied1, svint8_t, ++ z0 = svrbit_s8_x (p0, z0), ++ z0 = svrbit_x (p0, z0)) ++ ++/* ++** rbit_s8_x_untied: ++** rbit z0\.b, p0/m, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_s8_x_untied, svint8_t, ++ z0 = svrbit_s8_x (p0, z1), ++ z0 = svrbit_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_u16.c +new file mode 100644 +index 000000000..001ef2bf0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_u16.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** rbit_u16_m_tied12: ++** rbit z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_u16_m_tied12, svuint16_t, ++ z0 = svrbit_u16_m (z0, p0, z0), ++ z0 = svrbit_m (z0, p0, z0)) ++ ++/* ++** rbit_u16_m_tied1: ++** rbit z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_u16_m_tied1, svuint16_t, ++ z0 = svrbit_u16_m (z0, p0, z1), ++ z0 = svrbit_m (z0, p0, z1)) ++ ++/* ++** rbit_u16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** rbit z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_u16_m_tied2, svuint16_t, ++ z0 = svrbit_u16_m (z1, p0, z0), ++ z0 = svrbit_m (z1, p0, z0)) ++ ++/* ++** rbit_u16_m_untied: ++** movprfx z0, z2 ++** rbit z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_u16_m_untied, svuint16_t, ++ z0 = svrbit_u16_m (z2, p0, z1), ++ z0 = svrbit_m (z2, p0, z1)) ++ ++/* ++** rbit_u16_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.h, p0/z, \1\.h ++** rbit z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_u16_z_tied1, svuint16_t, ++ z0 = svrbit_u16_z (p0, z0), ++ z0 = svrbit_z (p0, z0)) ++ ++/* ++** rbit_u16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** rbit z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_u16_z_untied, svuint16_t, ++ z0 = svrbit_u16_z (p0, z1), ++ z0 = svrbit_z (p0, z1)) ++ ++/* ++** rbit_u16_x_tied1: ++** rbit z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_u16_x_tied1, svuint16_t, ++ z0 = svrbit_u16_x (p0, z0), ++ z0 = svrbit_x (p0, z0)) ++ ++/* ++** rbit_u16_x_untied: ++** rbit z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_u16_x_untied, svuint16_t, ++ z0 = svrbit_u16_x (p0, z1), ++ z0 = svrbit_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_u32.c +new file mode 100644 +index 000000000..4d91e954d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_u32.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** rbit_u32_m_tied12: ++** rbit z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_u32_m_tied12, svuint32_t, ++ z0 = svrbit_u32_m (z0, p0, z0), ++ z0 = svrbit_m (z0, p0, z0)) ++ ++/* ++** rbit_u32_m_tied1: ++** rbit z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_u32_m_tied1, svuint32_t, ++ z0 = svrbit_u32_m (z0, p0, z1), ++ z0 = svrbit_m (z0, p0, z1)) ++ ++/* ++** rbit_u32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** rbit z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_u32_m_tied2, svuint32_t, ++ z0 = svrbit_u32_m (z1, p0, z0), ++ z0 = svrbit_m (z1, p0, z0)) ++ ++/* ++** rbit_u32_m_untied: ++** movprfx z0, z2 ++** rbit z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_u32_m_untied, svuint32_t, ++ z0 = svrbit_u32_m (z2, p0, z1), ++ z0 = svrbit_m (z2, p0, z1)) ++ ++/* ++** rbit_u32_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, \1\.s ++** rbit z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_u32_z_tied1, svuint32_t, ++ z0 = svrbit_u32_z (p0, z0), ++ z0 = svrbit_z (p0, z0)) ++ ++/* ++** rbit_u32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** rbit z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_u32_z_untied, svuint32_t, ++ z0 = svrbit_u32_z (p0, z1), ++ z0 = svrbit_z (p0, z1)) ++ ++/* ++** rbit_u32_x_tied1: ++** rbit z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_u32_x_tied1, svuint32_t, ++ z0 = svrbit_u32_x (p0, z0), ++ z0 = svrbit_x (p0, z0)) ++ ++/* ++** rbit_u32_x_untied: ++** rbit z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_u32_x_untied, svuint32_t, ++ z0 = svrbit_u32_x (p0, z1), ++ z0 = svrbit_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_u64.c +new file mode 100644 +index 000000000..77f88d116 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_u64.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** rbit_u64_m_tied12: ++** rbit z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_u64_m_tied12, svuint64_t, ++ z0 = svrbit_u64_m (z0, p0, z0), ++ z0 = svrbit_m (z0, p0, z0)) ++ ++/* ++** rbit_u64_m_tied1: ++** rbit z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_u64_m_tied1, svuint64_t, ++ z0 = svrbit_u64_m (z0, p0, z1), ++ z0 = svrbit_m (z0, p0, z1)) ++ ++/* ++** rbit_u64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** rbit z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_u64_m_tied2, svuint64_t, ++ z0 = svrbit_u64_m (z1, p0, z0), ++ z0 = svrbit_m (z1, p0, z0)) ++ ++/* ++** rbit_u64_m_untied: ++** movprfx z0, z2 ++** rbit z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_u64_m_untied, svuint64_t, ++ z0 = svrbit_u64_m (z2, p0, z1), ++ z0 = svrbit_m (z2, p0, z1)) ++ ++/* ++** rbit_u64_z_tied1: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, \1 ++** rbit z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_u64_z_tied1, svuint64_t, ++ z0 = svrbit_u64_z (p0, z0), ++ z0 = svrbit_z (p0, z0)) ++ ++/* ++** rbit_u64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** rbit z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_u64_z_untied, svuint64_t, ++ z0 = svrbit_u64_z (p0, z1), ++ z0 = svrbit_z (p0, z1)) ++ ++/* ++** rbit_u64_x_tied1: ++** rbit z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_u64_x_tied1, svuint64_t, ++ z0 = svrbit_u64_x (p0, z0), ++ z0 = svrbit_x (p0, z0)) ++ ++/* ++** rbit_u64_x_untied: ++** rbit z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_u64_x_untied, svuint64_t, ++ z0 = svrbit_u64_x (p0, z1), ++ z0 = svrbit_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_u8.c +new file mode 100644 +index 000000000..fa347e4c7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_u8.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** rbit_u8_m_tied12: ++** rbit z0\.b, p0/m, z0\.b ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_u8_m_tied12, svuint8_t, ++ z0 = svrbit_u8_m (z0, p0, z0), ++ z0 = svrbit_m (z0, p0, z0)) ++ ++/* ++** rbit_u8_m_tied1: ++** rbit z0\.b, p0/m, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_u8_m_tied1, svuint8_t, ++ z0 = svrbit_u8_m (z0, p0, z1), ++ z0 = svrbit_m (z0, p0, z1)) ++ ++/* ++** rbit_u8_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** rbit z0\.b, p0/m, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_u8_m_tied2, svuint8_t, ++ z0 = svrbit_u8_m (z1, p0, z0), ++ z0 = svrbit_m (z1, p0, z0)) ++ ++/* ++** rbit_u8_m_untied: ++** movprfx z0, z2 ++** rbit z0\.b, p0/m, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_u8_m_untied, svuint8_t, ++ z0 = svrbit_u8_m (z2, p0, z1), ++ z0 = svrbit_m (z2, p0, z1)) ++ ++/* ++** rbit_u8_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.b, p0/z, \1\.b ++** rbit z0\.b, p0/m, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_u8_z_tied1, svuint8_t, ++ z0 = svrbit_u8_z (p0, z0), ++ z0 = svrbit_z (p0, z0)) ++ ++/* ++** rbit_u8_z_untied: ++** movprfx z0\.b, p0/z, z1\.b ++** rbit z0\.b, p0/m, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_u8_z_untied, svuint8_t, ++ z0 = svrbit_u8_z (p0, z1), ++ z0 = svrbit_z (p0, z1)) ++ ++/* ++** rbit_u8_x_tied1: ++** rbit z0\.b, p0/m, z0\.b ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_u8_x_tied1, svuint8_t, ++ z0 = svrbit_u8_x (p0, z0), ++ z0 = svrbit_x (p0, z0)) ++ ++/* ++** rbit_u8_x_untied: ++** rbit z0\.b, p0/m, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (rbit_u8_x_untied, svuint8_t, ++ z0 = svrbit_u8_x (p0, z1), ++ z0 = svrbit_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rdffr_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rdffr_1.c +new file mode 100644 +index 000000000..5564e967f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rdffr_1.c +@@ -0,0 +1,59 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** setffr_rdffr_1: ++** ptrue p0\.b, all ++** ret ++*/ ++TEST_UNIFORM_P_SINGLE (setffr_rdffr_1, ++ svsetffr (); ++ p0 = svrdffr ()); ++ ++/* ++** setffr_rdffr_2: ++** ret ++*/ ++TEST_UNIFORM_P_SINGLE (setffr_rdffr_2, ++ svsetffr (); ++ svrdffr ()); ++ ++/* ++** setffr_rdffr_3: ++** ptrue p0\.b, all ++** ret ++*/ ++TEST_UNIFORM_P_SINGLE (setffr_rdffr_3, ++ svsetffr (); ++ svsetffr (); ++ svrdffr (); ++ p0 = svrdffr ()); ++ ++/* ++** wrffr_rdffr_1: ++** mov p0\.b, p1\.b ++** ret ++*/ ++TEST_UNIFORM_P_SINGLE (wrffr_rdffr_1, ++ svwrffr (p1); ++ p0 = svrdffr ()); ++ ++/* ++** wrffr_rdffr_2: ++** ret ++*/ ++TEST_UNIFORM_P_SINGLE (wrffr_rdffr_2, ++ svwrffr (p1); ++ svrdffr ()); ++ ++/* ++** wrffr_rdffr_3: ++** mov p0\.b, p2\.b ++** ret ++*/ ++TEST_UNIFORM_P_SINGLE (wrffr_rdffr_3, ++ svwrffr (p1); ++ svwrffr (p2); ++ svrdffr (); ++ p0 = svrdffr ()); +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recpe_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recpe_f16.c +new file mode 100644 +index 000000000..d0cd8281a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recpe_f16.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** recpe_f16_tied1: ++** frecpe z0\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (recpe_f16_tied1, svfloat16_t, ++ z0 = svrecpe_f16 (z0), ++ z0 = svrecpe (z0)) ++ ++/* ++** recpe_f16_untied: ++** frecpe z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (recpe_f16_untied, svfloat16_t, ++ z0 = svrecpe_f16 (z1), ++ z0 = svrecpe (z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recpe_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recpe_f32.c +new file mode 100644 +index 000000000..013ed8c43 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recpe_f32.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** recpe_f32_tied1: ++** frecpe z0\.s, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (recpe_f32_tied1, svfloat32_t, ++ z0 = svrecpe_f32 (z0), ++ z0 = svrecpe (z0)) ++ ++/* ++** recpe_f32_untied: ++** frecpe z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (recpe_f32_untied, svfloat32_t, ++ z0 = svrecpe_f32 (z1), ++ z0 = svrecpe (z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recpe_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recpe_f64.c +new file mode 100644 +index 000000000..40b3df292 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recpe_f64.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** recpe_f64_tied1: ++** frecpe z0\.d, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (recpe_f64_tied1, svfloat64_t, ++ z0 = svrecpe_f64 (z0), ++ z0 = svrecpe (z0)) ++ ++/* ++** recpe_f64_untied: ++** frecpe z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (recpe_f64_untied, svfloat64_t, ++ z0 = svrecpe_f64 (z1), ++ z0 = svrecpe (z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recps_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recps_f16.c +new file mode 100644 +index 000000000..e35c5c545 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recps_f16.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** recps_f16_tied1: ++** frecps z0\.h, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (recps_f16_tied1, svfloat16_t, ++ z0 = svrecps_f16 (z0, z1), ++ z0 = svrecps (z0, z1)) ++ ++/* ++** recps_f16_tied2: ++** frecps z0\.h, z1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (recps_f16_tied2, svfloat16_t, ++ z0 = svrecps_f16 (z1, z0), ++ z0 = svrecps (z1, z0)) ++ ++/* ++** recps_f16_untied: ++** frecps z0\.h, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (recps_f16_untied, svfloat16_t, ++ z0 = svrecps_f16 (z1, z2), ++ z0 = svrecps (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recps_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recps_f32.c +new file mode 100644 +index 000000000..3f3aa203e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recps_f32.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** recps_f32_tied1: ++** frecps z0\.s, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (recps_f32_tied1, svfloat32_t, ++ z0 = svrecps_f32 (z0, z1), ++ z0 = svrecps (z0, z1)) ++ ++/* ++** recps_f32_tied2: ++** frecps z0\.s, z1\.s, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (recps_f32_tied2, svfloat32_t, ++ z0 = svrecps_f32 (z1, z0), ++ z0 = svrecps (z1, z0)) ++ ++/* ++** recps_f32_untied: ++** frecps z0\.s, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (recps_f32_untied, svfloat32_t, ++ z0 = svrecps_f32 (z1, z2), ++ z0 = svrecps (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recps_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recps_f64.c +new file mode 100644 +index 000000000..eca421d5e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recps_f64.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** recps_f64_tied1: ++** frecps z0\.d, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (recps_f64_tied1, svfloat64_t, ++ z0 = svrecps_f64 (z0, z1), ++ z0 = svrecps (z0, z1)) ++ ++/* ++** recps_f64_tied2: ++** frecps z0\.d, z1\.d, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (recps_f64_tied2, svfloat64_t, ++ z0 = svrecps_f64 (z1, z0), ++ z0 = svrecps (z1, z0)) ++ ++/* ++** recps_f64_untied: ++** frecps z0\.d, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (recps_f64_untied, svfloat64_t, ++ z0 = svrecps_f64 (z1, z2), ++ z0 = svrecps (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recpx_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recpx_f16.c +new file mode 100644 +index 000000000..2dd7ada2c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recpx_f16.c +@@ -0,0 +1,103 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** recpx_f16_m_tied12: ++** frecpx z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (recpx_f16_m_tied12, svfloat16_t, ++ z0 = svrecpx_f16_m (z0, p0, z0), ++ z0 = svrecpx_m (z0, p0, z0)) ++ ++/* ++** recpx_f16_m_tied1: ++** frecpx z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (recpx_f16_m_tied1, svfloat16_t, ++ z0 = svrecpx_f16_m (z0, p0, z1), ++ z0 = svrecpx_m (z0, p0, z1)) ++ ++/* ++** recpx_f16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** frecpx z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (recpx_f16_m_tied2, svfloat16_t, ++ z0 = svrecpx_f16_m (z1, p0, z0), ++ z0 = svrecpx_m (z1, p0, z0)) ++ ++/* ++** recpx_f16_m_untied: ++** movprfx z0, z2 ++** frecpx z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (recpx_f16_m_untied, svfloat16_t, ++ z0 = svrecpx_f16_m (z2, p0, z1), ++ z0 = svrecpx_m (z2, p0, z1)) ++ ++/* ++** recpx_f16_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.h, p0/z, \1\.h ++** frecpx z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (recpx_f16_z_tied1, svfloat16_t, ++ z0 = svrecpx_f16_z (p0, z0), ++ z0 = svrecpx_z (p0, z0)) ++ ++/* ++** recpx_f16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** frecpx z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (recpx_f16_z_untied, svfloat16_t, ++ z0 = svrecpx_f16_z (p0, z1), ++ z0 = svrecpx_z (p0, z1)) ++ ++/* ++** recpx_f16_x_tied1: ++** frecpx z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (recpx_f16_x_tied1, svfloat16_t, ++ z0 = svrecpx_f16_x (p0, z0), ++ z0 = svrecpx_x (p0, z0)) ++ ++/* ++** recpx_f16_x_untied: ++** frecpx z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (recpx_f16_x_untied, svfloat16_t, ++ z0 = svrecpx_f16_x (p0, z1), ++ z0 = svrecpx_x (p0, z1)) ++ ++/* ++** ptrue_recpx_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_recpx_f16_x_tied1, svfloat16_t, ++ z0 = svrecpx_f16_x (svptrue_b16 (), z0), ++ z0 = svrecpx_x (svptrue_b16 (), z0)) ++ ++/* ++** ptrue_recpx_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_recpx_f16_x_untied, svfloat16_t, ++ z0 = svrecpx_f16_x (svptrue_b16 (), z1), ++ z0 = svrecpx_x (svptrue_b16 (), z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recpx_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recpx_f32.c +new file mode 100644 +index 000000000..6364fb83b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recpx_f32.c +@@ -0,0 +1,103 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** recpx_f32_m_tied12: ++** frecpx z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (recpx_f32_m_tied12, svfloat32_t, ++ z0 = svrecpx_f32_m (z0, p0, z0), ++ z0 = svrecpx_m (z0, p0, z0)) ++ ++/* ++** recpx_f32_m_tied1: ++** frecpx z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (recpx_f32_m_tied1, svfloat32_t, ++ z0 = svrecpx_f32_m (z0, p0, z1), ++ z0 = svrecpx_m (z0, p0, z1)) ++ ++/* ++** recpx_f32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** frecpx z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (recpx_f32_m_tied2, svfloat32_t, ++ z0 = svrecpx_f32_m (z1, p0, z0), ++ z0 = svrecpx_m (z1, p0, z0)) ++ ++/* ++** recpx_f32_m_untied: ++** movprfx z0, z2 ++** frecpx z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (recpx_f32_m_untied, svfloat32_t, ++ z0 = svrecpx_f32_m (z2, p0, z1), ++ z0 = svrecpx_m (z2, p0, z1)) ++ ++/* ++** recpx_f32_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, \1\.s ++** frecpx z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (recpx_f32_z_tied1, svfloat32_t, ++ z0 = svrecpx_f32_z (p0, z0), ++ z0 = svrecpx_z (p0, z0)) ++ ++/* ++** recpx_f32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** frecpx z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (recpx_f32_z_untied, svfloat32_t, ++ z0 = svrecpx_f32_z (p0, z1), ++ z0 = svrecpx_z (p0, z1)) ++ ++/* ++** recpx_f32_x_tied1: ++** frecpx z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (recpx_f32_x_tied1, svfloat32_t, ++ z0 = svrecpx_f32_x (p0, z0), ++ z0 = svrecpx_x (p0, z0)) ++ ++/* ++** recpx_f32_x_untied: ++** frecpx z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (recpx_f32_x_untied, svfloat32_t, ++ z0 = svrecpx_f32_x (p0, z1), ++ z0 = svrecpx_x (p0, z1)) ++ ++/* ++** ptrue_recpx_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_recpx_f32_x_tied1, svfloat32_t, ++ z0 = svrecpx_f32_x (svptrue_b32 (), z0), ++ z0 = svrecpx_x (svptrue_b32 (), z0)) ++ ++/* ++** ptrue_recpx_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_recpx_f32_x_untied, svfloat32_t, ++ z0 = svrecpx_f32_x (svptrue_b32 (), z1), ++ z0 = svrecpx_x (svptrue_b32 (), z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recpx_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recpx_f64.c +new file mode 100644 +index 000000000..ca5232331 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recpx_f64.c +@@ -0,0 +1,103 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** recpx_f64_m_tied12: ++** frecpx z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (recpx_f64_m_tied12, svfloat64_t, ++ z0 = svrecpx_f64_m (z0, p0, z0), ++ z0 = svrecpx_m (z0, p0, z0)) ++ ++/* ++** recpx_f64_m_tied1: ++** frecpx z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (recpx_f64_m_tied1, svfloat64_t, ++ z0 = svrecpx_f64_m (z0, p0, z1), ++ z0 = svrecpx_m (z0, p0, z1)) ++ ++/* ++** recpx_f64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** frecpx z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (recpx_f64_m_tied2, svfloat64_t, ++ z0 = svrecpx_f64_m (z1, p0, z0), ++ z0 = svrecpx_m (z1, p0, z0)) ++ ++/* ++** recpx_f64_m_untied: ++** movprfx z0, z2 ++** frecpx z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (recpx_f64_m_untied, svfloat64_t, ++ z0 = svrecpx_f64_m (z2, p0, z1), ++ z0 = svrecpx_m (z2, p0, z1)) ++ ++/* ++** recpx_f64_z_tied1: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, \1 ++** frecpx z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (recpx_f64_z_tied1, svfloat64_t, ++ z0 = svrecpx_f64_z (p0, z0), ++ z0 = svrecpx_z (p0, z0)) ++ ++/* ++** recpx_f64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** frecpx z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (recpx_f64_z_untied, svfloat64_t, ++ z0 = svrecpx_f64_z (p0, z1), ++ z0 = svrecpx_z (p0, z1)) ++ ++/* ++** recpx_f64_x_tied1: ++** frecpx z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (recpx_f64_x_tied1, svfloat64_t, ++ z0 = svrecpx_f64_x (p0, z0), ++ z0 = svrecpx_x (p0, z0)) ++ ++/* ++** recpx_f64_x_untied: ++** frecpx z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (recpx_f64_x_untied, svfloat64_t, ++ z0 = svrecpx_f64_x (p0, z1), ++ z0 = svrecpx_x (p0, z1)) ++ ++/* ++** ptrue_recpx_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_recpx_f64_x_tied1, svfloat64_t, ++ z0 = svrecpx_f64_x (svptrue_b64 (), z0), ++ z0 = svrecpx_x (svptrue_b64 (), z0)) ++ ++/* ++** ptrue_recpx_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_recpx_f64_x_untied, svfloat64_t, ++ z0 = svrecpx_f64_x (svptrue_b64 (), z1), ++ z0 = svrecpx_x (svptrue_b64 (), z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_bf16.c +new file mode 100644 +index 000000000..2d2c2a714 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_bf16.c +@@ -0,0 +1,207 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** reinterpret_bf16_bf16_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_bf16_bf16_tied1, svbfloat16_t, svbfloat16_t, ++ z0_res = svreinterpret_bf16_bf16 (z0), ++ z0_res = svreinterpret_bf16 (z0)) ++ ++/* ++** reinterpret_bf16_bf16_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_bf16_bf16_untied, svbfloat16_t, svbfloat16_t, ++ z0 = svreinterpret_bf16_bf16 (z4), ++ z0 = svreinterpret_bf16 (z4)) ++ ++/* ++** reinterpret_bf16_f16_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_bf16_f16_tied1, svbfloat16_t, svfloat16_t, ++ z0_res = svreinterpret_bf16_f16 (z0), ++ z0_res = svreinterpret_bf16 (z0)) ++ ++/* ++** reinterpret_bf16_f16_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_bf16_f16_untied, svbfloat16_t, svfloat16_t, ++ z0 = svreinterpret_bf16_f16 (z4), ++ z0 = svreinterpret_bf16 (z4)) ++ ++/* ++** reinterpret_bf16_f32_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_bf16_f32_tied1, svbfloat16_t, svfloat32_t, ++ z0_res = svreinterpret_bf16_f32 (z0), ++ z0_res = svreinterpret_bf16 (z0)) ++ ++/* ++** reinterpret_bf16_f32_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_bf16_f32_untied, svbfloat16_t, svfloat32_t, ++ z0 = svreinterpret_bf16_f32 (z4), ++ z0 = svreinterpret_bf16 (z4)) ++ ++/* ++** reinterpret_bf16_f64_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_bf16_f64_tied1, svbfloat16_t, svfloat64_t, ++ z0_res = svreinterpret_bf16_f64 (z0), ++ z0_res = svreinterpret_bf16 (z0)) ++ ++/* ++** reinterpret_bf16_f64_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_bf16_f64_untied, svbfloat16_t, svfloat64_t, ++ z0 = svreinterpret_bf16_f64 (z4), ++ z0 = svreinterpret_bf16 (z4)) ++ ++/* ++** reinterpret_bf16_s8_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_bf16_s8_tied1, svbfloat16_t, svint8_t, ++ z0_res = svreinterpret_bf16_s8 (z0), ++ z0_res = svreinterpret_bf16 (z0)) ++ ++/* ++** reinterpret_bf16_s8_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_bf16_s8_untied, svbfloat16_t, svint8_t, ++ z0 = svreinterpret_bf16_s8 (z4), ++ z0 = svreinterpret_bf16 (z4)) ++ ++/* ++** reinterpret_bf16_s16_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_bf16_s16_tied1, svbfloat16_t, svint16_t, ++ z0_res = svreinterpret_bf16_s16 (z0), ++ z0_res = svreinterpret_bf16 (z0)) ++ ++/* ++** reinterpret_bf16_s16_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_bf16_s16_untied, svbfloat16_t, svint16_t, ++ z0 = svreinterpret_bf16_s16 (z4), ++ z0 = svreinterpret_bf16 (z4)) ++ ++/* ++** reinterpret_bf16_s32_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_bf16_s32_tied1, svbfloat16_t, svint32_t, ++ z0_res = svreinterpret_bf16_s32 (z0), ++ z0_res = svreinterpret_bf16 (z0)) ++ ++/* ++** reinterpret_bf16_s32_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_bf16_s32_untied, svbfloat16_t, svint32_t, ++ z0 = svreinterpret_bf16_s32 (z4), ++ z0 = svreinterpret_bf16 (z4)) ++ ++/* ++** reinterpret_bf16_s64_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_bf16_s64_tied1, svbfloat16_t, svint64_t, ++ z0_res = svreinterpret_bf16_s64 (z0), ++ z0_res = svreinterpret_bf16 (z0)) ++ ++/* ++** reinterpret_bf16_s64_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_bf16_s64_untied, svbfloat16_t, svint64_t, ++ z0 = svreinterpret_bf16_s64 (z4), ++ z0 = svreinterpret_bf16 (z4)) ++ ++/* ++** reinterpret_bf16_u8_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_bf16_u8_tied1, svbfloat16_t, svuint8_t, ++ z0_res = svreinterpret_bf16_u8 (z0), ++ z0_res = svreinterpret_bf16 (z0)) ++ ++/* ++** reinterpret_bf16_u8_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_bf16_u8_untied, svbfloat16_t, svuint8_t, ++ z0 = svreinterpret_bf16_u8 (z4), ++ z0 = svreinterpret_bf16 (z4)) ++ ++/* ++** reinterpret_bf16_u16_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_bf16_u16_tied1, svbfloat16_t, svuint16_t, ++ z0_res = svreinterpret_bf16_u16 (z0), ++ z0_res = svreinterpret_bf16 (z0)) ++ ++/* ++** reinterpret_bf16_u16_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_bf16_u16_untied, svbfloat16_t, svuint16_t, ++ z0 = svreinterpret_bf16_u16 (z4), ++ z0 = svreinterpret_bf16 (z4)) ++ ++/* ++** reinterpret_bf16_u32_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_bf16_u32_tied1, svbfloat16_t, svuint32_t, ++ z0_res = svreinterpret_bf16_u32 (z0), ++ z0_res = svreinterpret_bf16 (z0)) ++ ++/* ++** reinterpret_bf16_u32_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_bf16_u32_untied, svbfloat16_t, svuint32_t, ++ z0 = svreinterpret_bf16_u32 (z4), ++ z0 = svreinterpret_bf16 (z4)) ++ ++/* ++** reinterpret_bf16_u64_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_bf16_u64_tied1, svbfloat16_t, svuint64_t, ++ z0_res = svreinterpret_bf16_u64 (z0), ++ z0_res = svreinterpret_bf16 (z0)) ++ ++/* ++** reinterpret_bf16_u64_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_bf16_u64_untied, svbfloat16_t, svuint64_t, ++ z0 = svreinterpret_bf16_u64 (z4), ++ z0 = svreinterpret_bf16 (z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_f16.c +new file mode 100644 +index 000000000..60705e628 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_f16.c +@@ -0,0 +1,207 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** reinterpret_f16_bf16_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_f16_bf16_tied1, svfloat16_t, svbfloat16_t, ++ z0_res = svreinterpret_f16_bf16 (z0), ++ z0_res = svreinterpret_f16 (z0)) ++ ++/* ++** reinterpret_f16_bf16_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_f16_bf16_untied, svfloat16_t, svbfloat16_t, ++ z0 = svreinterpret_f16_bf16 (z4), ++ z0 = svreinterpret_f16 (z4)) ++ ++/* ++** reinterpret_f16_f16_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_f16_f16_tied1, svfloat16_t, svfloat16_t, ++ z0_res = svreinterpret_f16_f16 (z0), ++ z0_res = svreinterpret_f16 (z0)) ++ ++/* ++** reinterpret_f16_f16_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_f16_f16_untied, svfloat16_t, svfloat16_t, ++ z0 = svreinterpret_f16_f16 (z4), ++ z0 = svreinterpret_f16 (z4)) ++ ++/* ++** reinterpret_f16_f32_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_f16_f32_tied1, svfloat16_t, svfloat32_t, ++ z0_res = svreinterpret_f16_f32 (z0), ++ z0_res = svreinterpret_f16 (z0)) ++ ++/* ++** reinterpret_f16_f32_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_f16_f32_untied, svfloat16_t, svfloat32_t, ++ z0 = svreinterpret_f16_f32 (z4), ++ z0 = svreinterpret_f16 (z4)) ++ ++/* ++** reinterpret_f16_f64_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_f16_f64_tied1, svfloat16_t, svfloat64_t, ++ z0_res = svreinterpret_f16_f64 (z0), ++ z0_res = svreinterpret_f16 (z0)) ++ ++/* ++** reinterpret_f16_f64_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_f16_f64_untied, svfloat16_t, svfloat64_t, ++ z0 = svreinterpret_f16_f64 (z4), ++ z0 = svreinterpret_f16 (z4)) ++ ++/* ++** reinterpret_f16_s8_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_f16_s8_tied1, svfloat16_t, svint8_t, ++ z0_res = svreinterpret_f16_s8 (z0), ++ z0_res = svreinterpret_f16 (z0)) ++ ++/* ++** reinterpret_f16_s8_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_f16_s8_untied, svfloat16_t, svint8_t, ++ z0 = svreinterpret_f16_s8 (z4), ++ z0 = svreinterpret_f16 (z4)) ++ ++/* ++** reinterpret_f16_s16_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_f16_s16_tied1, svfloat16_t, svint16_t, ++ z0_res = svreinterpret_f16_s16 (z0), ++ z0_res = svreinterpret_f16 (z0)) ++ ++/* ++** reinterpret_f16_s16_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_f16_s16_untied, svfloat16_t, svint16_t, ++ z0 = svreinterpret_f16_s16 (z4), ++ z0 = svreinterpret_f16 (z4)) ++ ++/* ++** reinterpret_f16_s32_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_f16_s32_tied1, svfloat16_t, svint32_t, ++ z0_res = svreinterpret_f16_s32 (z0), ++ z0_res = svreinterpret_f16 (z0)) ++ ++/* ++** reinterpret_f16_s32_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_f16_s32_untied, svfloat16_t, svint32_t, ++ z0 = svreinterpret_f16_s32 (z4), ++ z0 = svreinterpret_f16 (z4)) ++ ++/* ++** reinterpret_f16_s64_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_f16_s64_tied1, svfloat16_t, svint64_t, ++ z0_res = svreinterpret_f16_s64 (z0), ++ z0_res = svreinterpret_f16 (z0)) ++ ++/* ++** reinterpret_f16_s64_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_f16_s64_untied, svfloat16_t, svint64_t, ++ z0 = svreinterpret_f16_s64 (z4), ++ z0 = svreinterpret_f16 (z4)) ++ ++/* ++** reinterpret_f16_u8_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_f16_u8_tied1, svfloat16_t, svuint8_t, ++ z0_res = svreinterpret_f16_u8 (z0), ++ z0_res = svreinterpret_f16 (z0)) ++ ++/* ++** reinterpret_f16_u8_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_f16_u8_untied, svfloat16_t, svuint8_t, ++ z0 = svreinterpret_f16_u8 (z4), ++ z0 = svreinterpret_f16 (z4)) ++ ++/* ++** reinterpret_f16_u16_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_f16_u16_tied1, svfloat16_t, svuint16_t, ++ z0_res = svreinterpret_f16_u16 (z0), ++ z0_res = svreinterpret_f16 (z0)) ++ ++/* ++** reinterpret_f16_u16_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_f16_u16_untied, svfloat16_t, svuint16_t, ++ z0 = svreinterpret_f16_u16 (z4), ++ z0 = svreinterpret_f16 (z4)) ++ ++/* ++** reinterpret_f16_u32_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_f16_u32_tied1, svfloat16_t, svuint32_t, ++ z0_res = svreinterpret_f16_u32 (z0), ++ z0_res = svreinterpret_f16 (z0)) ++ ++/* ++** reinterpret_f16_u32_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_f16_u32_untied, svfloat16_t, svuint32_t, ++ z0 = svreinterpret_f16_u32 (z4), ++ z0 = svreinterpret_f16 (z4)) ++ ++/* ++** reinterpret_f16_u64_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_f16_u64_tied1, svfloat16_t, svuint64_t, ++ z0_res = svreinterpret_f16_u64 (z0), ++ z0_res = svreinterpret_f16 (z0)) ++ ++/* ++** reinterpret_f16_u64_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_f16_u64_untied, svfloat16_t, svuint64_t, ++ z0 = svreinterpret_f16_u64 (z4), ++ z0 = svreinterpret_f16 (z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_f32.c +new file mode 100644 +index 000000000..06fc46f25 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_f32.c +@@ -0,0 +1,207 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** reinterpret_f32_bf16_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_f32_bf16_tied1, svfloat32_t, svbfloat16_t, ++ z0_res = svreinterpret_f32_bf16 (z0), ++ z0_res = svreinterpret_f32 (z0)) ++ ++/* ++** reinterpret_f32_bf16_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_f32_bf16_untied, svfloat32_t, svbfloat16_t, ++ z0 = svreinterpret_f32_bf16 (z4), ++ z0 = svreinterpret_f32 (z4)) ++ ++/* ++** reinterpret_f32_f16_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_f32_f16_tied1, svfloat32_t, svfloat16_t, ++ z0_res = svreinterpret_f32_f16 (z0), ++ z0_res = svreinterpret_f32 (z0)) ++ ++/* ++** reinterpret_f32_f16_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_f32_f16_untied, svfloat32_t, svfloat16_t, ++ z0 = svreinterpret_f32_f16 (z4), ++ z0 = svreinterpret_f32 (z4)) ++ ++/* ++** reinterpret_f32_f32_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_f32_f32_tied1, svfloat32_t, svfloat32_t, ++ z0_res = svreinterpret_f32_f32 (z0), ++ z0_res = svreinterpret_f32 (z0)) ++ ++/* ++** reinterpret_f32_f32_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_f32_f32_untied, svfloat32_t, svfloat32_t, ++ z0 = svreinterpret_f32_f32 (z4), ++ z0 = svreinterpret_f32 (z4)) ++ ++/* ++** reinterpret_f32_f64_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_f32_f64_tied1, svfloat32_t, svfloat64_t, ++ z0_res = svreinterpret_f32_f64 (z0), ++ z0_res = svreinterpret_f32 (z0)) ++ ++/* ++** reinterpret_f32_f64_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_f32_f64_untied, svfloat32_t, svfloat64_t, ++ z0 = svreinterpret_f32_f64 (z4), ++ z0 = svreinterpret_f32 (z4)) ++ ++/* ++** reinterpret_f32_s8_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_f32_s8_tied1, svfloat32_t, svint8_t, ++ z0_res = svreinterpret_f32_s8 (z0), ++ z0_res = svreinterpret_f32 (z0)) ++ ++/* ++** reinterpret_f32_s8_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_f32_s8_untied, svfloat32_t, svint8_t, ++ z0 = svreinterpret_f32_s8 (z4), ++ z0 = svreinterpret_f32 (z4)) ++ ++/* ++** reinterpret_f32_s16_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_f32_s16_tied1, svfloat32_t, svint16_t, ++ z0_res = svreinterpret_f32_s16 (z0), ++ z0_res = svreinterpret_f32 (z0)) ++ ++/* ++** reinterpret_f32_s16_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_f32_s16_untied, svfloat32_t, svint16_t, ++ z0 = svreinterpret_f32_s16 (z4), ++ z0 = svreinterpret_f32 (z4)) ++ ++/* ++** reinterpret_f32_s32_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_f32_s32_tied1, svfloat32_t, svint32_t, ++ z0_res = svreinterpret_f32_s32 (z0), ++ z0_res = svreinterpret_f32 (z0)) ++ ++/* ++** reinterpret_f32_s32_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_f32_s32_untied, svfloat32_t, svint32_t, ++ z0 = svreinterpret_f32_s32 (z4), ++ z0 = svreinterpret_f32 (z4)) ++ ++/* ++** reinterpret_f32_s64_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_f32_s64_tied1, svfloat32_t, svint64_t, ++ z0_res = svreinterpret_f32_s64 (z0), ++ z0_res = svreinterpret_f32 (z0)) ++ ++/* ++** reinterpret_f32_s64_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_f32_s64_untied, svfloat32_t, svint64_t, ++ z0 = svreinterpret_f32_s64 (z4), ++ z0 = svreinterpret_f32 (z4)) ++ ++/* ++** reinterpret_f32_u8_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_f32_u8_tied1, svfloat32_t, svuint8_t, ++ z0_res = svreinterpret_f32_u8 (z0), ++ z0_res = svreinterpret_f32 (z0)) ++ ++/* ++** reinterpret_f32_u8_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_f32_u8_untied, svfloat32_t, svuint8_t, ++ z0 = svreinterpret_f32_u8 (z4), ++ z0 = svreinterpret_f32 (z4)) ++ ++/* ++** reinterpret_f32_u16_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_f32_u16_tied1, svfloat32_t, svuint16_t, ++ z0_res = svreinterpret_f32_u16 (z0), ++ z0_res = svreinterpret_f32 (z0)) ++ ++/* ++** reinterpret_f32_u16_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_f32_u16_untied, svfloat32_t, svuint16_t, ++ z0 = svreinterpret_f32_u16 (z4), ++ z0 = svreinterpret_f32 (z4)) ++ ++/* ++** reinterpret_f32_u32_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_f32_u32_tied1, svfloat32_t, svuint32_t, ++ z0_res = svreinterpret_f32_u32 (z0), ++ z0_res = svreinterpret_f32 (z0)) ++ ++/* ++** reinterpret_f32_u32_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_f32_u32_untied, svfloat32_t, svuint32_t, ++ z0 = svreinterpret_f32_u32 (z4), ++ z0 = svreinterpret_f32 (z4)) ++ ++/* ++** reinterpret_f32_u64_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_f32_u64_tied1, svfloat32_t, svuint64_t, ++ z0_res = svreinterpret_f32_u64 (z0), ++ z0_res = svreinterpret_f32 (z0)) ++ ++/* ++** reinterpret_f32_u64_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_f32_u64_untied, svfloat32_t, svuint64_t, ++ z0 = svreinterpret_f32_u64 (z4), ++ z0 = svreinterpret_f32 (z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_f64.c +new file mode 100644 +index 000000000..003ee3fe2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_f64.c +@@ -0,0 +1,207 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** reinterpret_f64_bf16_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_f64_bf16_tied1, svfloat64_t, svbfloat16_t, ++ z0_res = svreinterpret_f64_bf16 (z0), ++ z0_res = svreinterpret_f64 (z0)) ++ ++/* ++** reinterpret_f64_bf16_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_f64_bf16_untied, svfloat64_t, svbfloat16_t, ++ z0 = svreinterpret_f64_bf16 (z4), ++ z0 = svreinterpret_f64 (z4)) ++ ++/* ++** reinterpret_f64_f16_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_f64_f16_tied1, svfloat64_t, svfloat16_t, ++ z0_res = svreinterpret_f64_f16 (z0), ++ z0_res = svreinterpret_f64 (z0)) ++ ++/* ++** reinterpret_f64_f16_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_f64_f16_untied, svfloat64_t, svfloat16_t, ++ z0 = svreinterpret_f64_f16 (z4), ++ z0 = svreinterpret_f64 (z4)) ++ ++/* ++** reinterpret_f64_f32_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_f64_f32_tied1, svfloat64_t, svfloat32_t, ++ z0_res = svreinterpret_f64_f32 (z0), ++ z0_res = svreinterpret_f64 (z0)) ++ ++/* ++** reinterpret_f64_f32_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_f64_f32_untied, svfloat64_t, svfloat32_t, ++ z0 = svreinterpret_f64_f32 (z4), ++ z0 = svreinterpret_f64 (z4)) ++ ++/* ++** reinterpret_f64_f64_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_f64_f64_tied1, svfloat64_t, svfloat64_t, ++ z0_res = svreinterpret_f64_f64 (z0), ++ z0_res = svreinterpret_f64 (z0)) ++ ++/* ++** reinterpret_f64_f64_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_f64_f64_untied, svfloat64_t, svfloat64_t, ++ z0 = svreinterpret_f64_f64 (z4), ++ z0 = svreinterpret_f64 (z4)) ++ ++/* ++** reinterpret_f64_s8_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_f64_s8_tied1, svfloat64_t, svint8_t, ++ z0_res = svreinterpret_f64_s8 (z0), ++ z0_res = svreinterpret_f64 (z0)) ++ ++/* ++** reinterpret_f64_s8_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_f64_s8_untied, svfloat64_t, svint8_t, ++ z0 = svreinterpret_f64_s8 (z4), ++ z0 = svreinterpret_f64 (z4)) ++ ++/* ++** reinterpret_f64_s16_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_f64_s16_tied1, svfloat64_t, svint16_t, ++ z0_res = svreinterpret_f64_s16 (z0), ++ z0_res = svreinterpret_f64 (z0)) ++ ++/* ++** reinterpret_f64_s16_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_f64_s16_untied, svfloat64_t, svint16_t, ++ z0 = svreinterpret_f64_s16 (z4), ++ z0 = svreinterpret_f64 (z4)) ++ ++/* ++** reinterpret_f64_s32_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_f64_s32_tied1, svfloat64_t, svint32_t, ++ z0_res = svreinterpret_f64_s32 (z0), ++ z0_res = svreinterpret_f64 (z0)) ++ ++/* ++** reinterpret_f64_s32_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_f64_s32_untied, svfloat64_t, svint32_t, ++ z0 = svreinterpret_f64_s32 (z4), ++ z0 = svreinterpret_f64 (z4)) ++ ++/* ++** reinterpret_f64_s64_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_f64_s64_tied1, svfloat64_t, svint64_t, ++ z0_res = svreinterpret_f64_s64 (z0), ++ z0_res = svreinterpret_f64 (z0)) ++ ++/* ++** reinterpret_f64_s64_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_f64_s64_untied, svfloat64_t, svint64_t, ++ z0 = svreinterpret_f64_s64 (z4), ++ z0 = svreinterpret_f64 (z4)) ++ ++/* ++** reinterpret_f64_u8_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_f64_u8_tied1, svfloat64_t, svuint8_t, ++ z0_res = svreinterpret_f64_u8 (z0), ++ z0_res = svreinterpret_f64 (z0)) ++ ++/* ++** reinterpret_f64_u8_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_f64_u8_untied, svfloat64_t, svuint8_t, ++ z0 = svreinterpret_f64_u8 (z4), ++ z0 = svreinterpret_f64 (z4)) ++ ++/* ++** reinterpret_f64_u16_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_f64_u16_tied1, svfloat64_t, svuint16_t, ++ z0_res = svreinterpret_f64_u16 (z0), ++ z0_res = svreinterpret_f64 (z0)) ++ ++/* ++** reinterpret_f64_u16_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_f64_u16_untied, svfloat64_t, svuint16_t, ++ z0 = svreinterpret_f64_u16 (z4), ++ z0 = svreinterpret_f64 (z4)) ++ ++/* ++** reinterpret_f64_u32_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_f64_u32_tied1, svfloat64_t, svuint32_t, ++ z0_res = svreinterpret_f64_u32 (z0), ++ z0_res = svreinterpret_f64 (z0)) ++ ++/* ++** reinterpret_f64_u32_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_f64_u32_untied, svfloat64_t, svuint32_t, ++ z0 = svreinterpret_f64_u32 (z4), ++ z0 = svreinterpret_f64 (z4)) ++ ++/* ++** reinterpret_f64_u64_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_f64_u64_tied1, svfloat64_t, svuint64_t, ++ z0_res = svreinterpret_f64_u64 (z0), ++ z0_res = svreinterpret_f64 (z0)) ++ ++/* ++** reinterpret_f64_u64_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_f64_u64_untied, svfloat64_t, svuint64_t, ++ z0 = svreinterpret_f64_u64 (z4), ++ z0 = svreinterpret_f64 (z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s16.c +new file mode 100644 +index 000000000..d62817c2c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s16.c +@@ -0,0 +1,207 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** reinterpret_s16_bf16_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s16_bf16_tied1, svint16_t, svbfloat16_t, ++ z0_res = svreinterpret_s16_bf16 (z0), ++ z0_res = svreinterpret_s16 (z0)) ++ ++/* ++** reinterpret_s16_bf16_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_s16_bf16_untied, svint16_t, svbfloat16_t, ++ z0 = svreinterpret_s16_bf16 (z4), ++ z0 = svreinterpret_s16 (z4)) ++ ++/* ++** reinterpret_s16_f16_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s16_f16_tied1, svint16_t, svfloat16_t, ++ z0_res = svreinterpret_s16_f16 (z0), ++ z0_res = svreinterpret_s16 (z0)) ++ ++/* ++** reinterpret_s16_f16_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_s16_f16_untied, svint16_t, svfloat16_t, ++ z0 = svreinterpret_s16_f16 (z4), ++ z0 = svreinterpret_s16 (z4)) ++ ++/* ++** reinterpret_s16_f32_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s16_f32_tied1, svint16_t, svfloat32_t, ++ z0_res = svreinterpret_s16_f32 (z0), ++ z0_res = svreinterpret_s16 (z0)) ++ ++/* ++** reinterpret_s16_f32_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_s16_f32_untied, svint16_t, svfloat32_t, ++ z0 = svreinterpret_s16_f32 (z4), ++ z0 = svreinterpret_s16 (z4)) ++ ++/* ++** reinterpret_s16_f64_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s16_f64_tied1, svint16_t, svfloat64_t, ++ z0_res = svreinterpret_s16_f64 (z0), ++ z0_res = svreinterpret_s16 (z0)) ++ ++/* ++** reinterpret_s16_f64_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_s16_f64_untied, svint16_t, svfloat64_t, ++ z0 = svreinterpret_s16_f64 (z4), ++ z0 = svreinterpret_s16 (z4)) ++ ++/* ++** reinterpret_s16_s8_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s16_s8_tied1, svint16_t, svint8_t, ++ z0_res = svreinterpret_s16_s8 (z0), ++ z0_res = svreinterpret_s16 (z0)) ++ ++/* ++** reinterpret_s16_s8_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_s16_s8_untied, svint16_t, svint8_t, ++ z0 = svreinterpret_s16_s8 (z4), ++ z0 = svreinterpret_s16 (z4)) ++ ++/* ++** reinterpret_s16_s16_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s16_s16_tied1, svint16_t, svint16_t, ++ z0_res = svreinterpret_s16_s16 (z0), ++ z0_res = svreinterpret_s16 (z0)) ++ ++/* ++** reinterpret_s16_s16_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_s16_s16_untied, svint16_t, svint16_t, ++ z0 = svreinterpret_s16_s16 (z4), ++ z0 = svreinterpret_s16 (z4)) ++ ++/* ++** reinterpret_s16_s32_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s16_s32_tied1, svint16_t, svint32_t, ++ z0_res = svreinterpret_s16_s32 (z0), ++ z0_res = svreinterpret_s16 (z0)) ++ ++/* ++** reinterpret_s16_s32_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_s16_s32_untied, svint16_t, svint32_t, ++ z0 = svreinterpret_s16_s32 (z4), ++ z0 = svreinterpret_s16 (z4)) ++ ++/* ++** reinterpret_s16_s64_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s16_s64_tied1, svint16_t, svint64_t, ++ z0_res = svreinterpret_s16_s64 (z0), ++ z0_res = svreinterpret_s16 (z0)) ++ ++/* ++** reinterpret_s16_s64_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_s16_s64_untied, svint16_t, svint64_t, ++ z0 = svreinterpret_s16_s64 (z4), ++ z0 = svreinterpret_s16 (z4)) ++ ++/* ++** reinterpret_s16_u8_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s16_u8_tied1, svint16_t, svuint8_t, ++ z0_res = svreinterpret_s16_u8 (z0), ++ z0_res = svreinterpret_s16 (z0)) ++ ++/* ++** reinterpret_s16_u8_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_s16_u8_untied, svint16_t, svuint8_t, ++ z0 = svreinterpret_s16_u8 (z4), ++ z0 = svreinterpret_s16 (z4)) ++ ++/* ++** reinterpret_s16_u16_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s16_u16_tied1, svint16_t, svuint16_t, ++ z0_res = svreinterpret_s16_u16 (z0), ++ z0_res = svreinterpret_s16 (z0)) ++ ++/* ++** reinterpret_s16_u16_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_s16_u16_untied, svint16_t, svuint16_t, ++ z0 = svreinterpret_s16_u16 (z4), ++ z0 = svreinterpret_s16 (z4)) ++ ++/* ++** reinterpret_s16_u32_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s16_u32_tied1, svint16_t, svuint32_t, ++ z0_res = svreinterpret_s16_u32 (z0), ++ z0_res = svreinterpret_s16 (z0)) ++ ++/* ++** reinterpret_s16_u32_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_s16_u32_untied, svint16_t, svuint32_t, ++ z0 = svreinterpret_s16_u32 (z4), ++ z0 = svreinterpret_s16 (z4)) ++ ++/* ++** reinterpret_s16_u64_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s16_u64_tied1, svint16_t, svuint64_t, ++ z0_res = svreinterpret_s16_u64 (z0), ++ z0_res = svreinterpret_s16 (z0)) ++ ++/* ++** reinterpret_s16_u64_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_s16_u64_untied, svint16_t, svuint64_t, ++ z0 = svreinterpret_s16_u64 (z4), ++ z0 = svreinterpret_s16 (z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s32.c +new file mode 100644 +index 000000000..e1068f244 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s32.c +@@ -0,0 +1,207 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** reinterpret_s32_bf16_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s32_bf16_tied1, svint32_t, svbfloat16_t, ++ z0_res = svreinterpret_s32_bf16 (z0), ++ z0_res = svreinterpret_s32 (z0)) ++ ++/* ++** reinterpret_s32_bf16_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_s32_bf16_untied, svint32_t, svbfloat16_t, ++ z0 = svreinterpret_s32_bf16 (z4), ++ z0 = svreinterpret_s32 (z4)) ++ ++/* ++** reinterpret_s32_f16_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s32_f16_tied1, svint32_t, svfloat16_t, ++ z0_res = svreinterpret_s32_f16 (z0), ++ z0_res = svreinterpret_s32 (z0)) ++ ++/* ++** reinterpret_s32_f16_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_s32_f16_untied, svint32_t, svfloat16_t, ++ z0 = svreinterpret_s32_f16 (z4), ++ z0 = svreinterpret_s32 (z4)) ++ ++/* ++** reinterpret_s32_f32_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s32_f32_tied1, svint32_t, svfloat32_t, ++ z0_res = svreinterpret_s32_f32 (z0), ++ z0_res = svreinterpret_s32 (z0)) ++ ++/* ++** reinterpret_s32_f32_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_s32_f32_untied, svint32_t, svfloat32_t, ++ z0 = svreinterpret_s32_f32 (z4), ++ z0 = svreinterpret_s32 (z4)) ++ ++/* ++** reinterpret_s32_f64_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s32_f64_tied1, svint32_t, svfloat64_t, ++ z0_res = svreinterpret_s32_f64 (z0), ++ z0_res = svreinterpret_s32 (z0)) ++ ++/* ++** reinterpret_s32_f64_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_s32_f64_untied, svint32_t, svfloat64_t, ++ z0 = svreinterpret_s32_f64 (z4), ++ z0 = svreinterpret_s32 (z4)) ++ ++/* ++** reinterpret_s32_s8_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s32_s8_tied1, svint32_t, svint8_t, ++ z0_res = svreinterpret_s32_s8 (z0), ++ z0_res = svreinterpret_s32 (z0)) ++ ++/* ++** reinterpret_s32_s8_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_s32_s8_untied, svint32_t, svint8_t, ++ z0 = svreinterpret_s32_s8 (z4), ++ z0 = svreinterpret_s32 (z4)) ++ ++/* ++** reinterpret_s32_s16_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s32_s16_tied1, svint32_t, svint16_t, ++ z0_res = svreinterpret_s32_s16 (z0), ++ z0_res = svreinterpret_s32 (z0)) ++ ++/* ++** reinterpret_s32_s16_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_s32_s16_untied, svint32_t, svint16_t, ++ z0 = svreinterpret_s32_s16 (z4), ++ z0 = svreinterpret_s32 (z4)) ++ ++/* ++** reinterpret_s32_s32_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s32_s32_tied1, svint32_t, svint32_t, ++ z0_res = svreinterpret_s32_s32 (z0), ++ z0_res = svreinterpret_s32 (z0)) ++ ++/* ++** reinterpret_s32_s32_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_s32_s32_untied, svint32_t, svint32_t, ++ z0 = svreinterpret_s32_s32 (z4), ++ z0 = svreinterpret_s32 (z4)) ++ ++/* ++** reinterpret_s32_s64_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s32_s64_tied1, svint32_t, svint64_t, ++ z0_res = svreinterpret_s32_s64 (z0), ++ z0_res = svreinterpret_s32 (z0)) ++ ++/* ++** reinterpret_s32_s64_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_s32_s64_untied, svint32_t, svint64_t, ++ z0 = svreinterpret_s32_s64 (z4), ++ z0 = svreinterpret_s32 (z4)) ++ ++/* ++** reinterpret_s32_u8_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s32_u8_tied1, svint32_t, svuint8_t, ++ z0_res = svreinterpret_s32_u8 (z0), ++ z0_res = svreinterpret_s32 (z0)) ++ ++/* ++** reinterpret_s32_u8_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_s32_u8_untied, svint32_t, svuint8_t, ++ z0 = svreinterpret_s32_u8 (z4), ++ z0 = svreinterpret_s32 (z4)) ++ ++/* ++** reinterpret_s32_u16_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s32_u16_tied1, svint32_t, svuint16_t, ++ z0_res = svreinterpret_s32_u16 (z0), ++ z0_res = svreinterpret_s32 (z0)) ++ ++/* ++** reinterpret_s32_u16_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_s32_u16_untied, svint32_t, svuint16_t, ++ z0 = svreinterpret_s32_u16 (z4), ++ z0 = svreinterpret_s32 (z4)) ++ ++/* ++** reinterpret_s32_u32_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s32_u32_tied1, svint32_t, svuint32_t, ++ z0_res = svreinterpret_s32_u32 (z0), ++ z0_res = svreinterpret_s32 (z0)) ++ ++/* ++** reinterpret_s32_u32_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_s32_u32_untied, svint32_t, svuint32_t, ++ z0 = svreinterpret_s32_u32 (z4), ++ z0 = svreinterpret_s32 (z4)) ++ ++/* ++** reinterpret_s32_u64_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s32_u64_tied1, svint32_t, svuint64_t, ++ z0_res = svreinterpret_s32_u64 (z0), ++ z0_res = svreinterpret_s32 (z0)) ++ ++/* ++** reinterpret_s32_u64_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_s32_u64_untied, svint32_t, svuint64_t, ++ z0 = svreinterpret_s32_u64 (z4), ++ z0 = svreinterpret_s32 (z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s64.c +new file mode 100644 +index 000000000..cada7533c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s64.c +@@ -0,0 +1,207 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** reinterpret_s64_bf16_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s64_bf16_tied1, svint64_t, svbfloat16_t, ++ z0_res = svreinterpret_s64_bf16 (z0), ++ z0_res = svreinterpret_s64 (z0)) ++ ++/* ++** reinterpret_s64_bf16_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_s64_bf16_untied, svint64_t, svbfloat16_t, ++ z0 = svreinterpret_s64_bf16 (z4), ++ z0 = svreinterpret_s64 (z4)) ++ ++/* ++** reinterpret_s64_f16_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s64_f16_tied1, svint64_t, svfloat16_t, ++ z0_res = svreinterpret_s64_f16 (z0), ++ z0_res = svreinterpret_s64 (z0)) ++ ++/* ++** reinterpret_s64_f16_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_s64_f16_untied, svint64_t, svfloat16_t, ++ z0 = svreinterpret_s64_f16 (z4), ++ z0 = svreinterpret_s64 (z4)) ++ ++/* ++** reinterpret_s64_f32_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s64_f32_tied1, svint64_t, svfloat32_t, ++ z0_res = svreinterpret_s64_f32 (z0), ++ z0_res = svreinterpret_s64 (z0)) ++ ++/* ++** reinterpret_s64_f32_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_s64_f32_untied, svint64_t, svfloat32_t, ++ z0 = svreinterpret_s64_f32 (z4), ++ z0 = svreinterpret_s64 (z4)) ++ ++/* ++** reinterpret_s64_f64_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s64_f64_tied1, svint64_t, svfloat64_t, ++ z0_res = svreinterpret_s64_f64 (z0), ++ z0_res = svreinterpret_s64 (z0)) ++ ++/* ++** reinterpret_s64_f64_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_s64_f64_untied, svint64_t, svfloat64_t, ++ z0 = svreinterpret_s64_f64 (z4), ++ z0 = svreinterpret_s64 (z4)) ++ ++/* ++** reinterpret_s64_s8_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s64_s8_tied1, svint64_t, svint8_t, ++ z0_res = svreinterpret_s64_s8 (z0), ++ z0_res = svreinterpret_s64 (z0)) ++ ++/* ++** reinterpret_s64_s8_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_s64_s8_untied, svint64_t, svint8_t, ++ z0 = svreinterpret_s64_s8 (z4), ++ z0 = svreinterpret_s64 (z4)) ++ ++/* ++** reinterpret_s64_s16_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s64_s16_tied1, svint64_t, svint16_t, ++ z0_res = svreinterpret_s64_s16 (z0), ++ z0_res = svreinterpret_s64 (z0)) ++ ++/* ++** reinterpret_s64_s16_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_s64_s16_untied, svint64_t, svint16_t, ++ z0 = svreinterpret_s64_s16 (z4), ++ z0 = svreinterpret_s64 (z4)) ++ ++/* ++** reinterpret_s64_s32_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s64_s32_tied1, svint64_t, svint32_t, ++ z0_res = svreinterpret_s64_s32 (z0), ++ z0_res = svreinterpret_s64 (z0)) ++ ++/* ++** reinterpret_s64_s32_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_s64_s32_untied, svint64_t, svint32_t, ++ z0 = svreinterpret_s64_s32 (z4), ++ z0 = svreinterpret_s64 (z4)) ++ ++/* ++** reinterpret_s64_s64_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s64_s64_tied1, svint64_t, svint64_t, ++ z0_res = svreinterpret_s64_s64 (z0), ++ z0_res = svreinterpret_s64 (z0)) ++ ++/* ++** reinterpret_s64_s64_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_s64_s64_untied, svint64_t, svint64_t, ++ z0 = svreinterpret_s64_s64 (z4), ++ z0 = svreinterpret_s64 (z4)) ++ ++/* ++** reinterpret_s64_u8_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s64_u8_tied1, svint64_t, svuint8_t, ++ z0_res = svreinterpret_s64_u8 (z0), ++ z0_res = svreinterpret_s64 (z0)) ++ ++/* ++** reinterpret_s64_u8_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_s64_u8_untied, svint64_t, svuint8_t, ++ z0 = svreinterpret_s64_u8 (z4), ++ z0 = svreinterpret_s64 (z4)) ++ ++/* ++** reinterpret_s64_u16_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s64_u16_tied1, svint64_t, svuint16_t, ++ z0_res = svreinterpret_s64_u16 (z0), ++ z0_res = svreinterpret_s64 (z0)) ++ ++/* ++** reinterpret_s64_u16_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_s64_u16_untied, svint64_t, svuint16_t, ++ z0 = svreinterpret_s64_u16 (z4), ++ z0 = svreinterpret_s64 (z4)) ++ ++/* ++** reinterpret_s64_u32_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s64_u32_tied1, svint64_t, svuint32_t, ++ z0_res = svreinterpret_s64_u32 (z0), ++ z0_res = svreinterpret_s64 (z0)) ++ ++/* ++** reinterpret_s64_u32_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_s64_u32_untied, svint64_t, svuint32_t, ++ z0 = svreinterpret_s64_u32 (z4), ++ z0 = svreinterpret_s64 (z4)) ++ ++/* ++** reinterpret_s64_u64_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s64_u64_tied1, svint64_t, svuint64_t, ++ z0_res = svreinterpret_s64_u64 (z0), ++ z0_res = svreinterpret_s64 (z0)) ++ ++/* ++** reinterpret_s64_u64_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_s64_u64_untied, svint64_t, svuint64_t, ++ z0 = svreinterpret_s64_u64 (z4), ++ z0 = svreinterpret_s64 (z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s8.c +new file mode 100644 +index 000000000..23a40d0ba +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s8.c +@@ -0,0 +1,207 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** reinterpret_s8_bf16_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s8_bf16_tied1, svint8_t, svbfloat16_t, ++ z0_res = svreinterpret_s8_bf16 (z0), ++ z0_res = svreinterpret_s8 (z0)) ++ ++/* ++** reinterpret_s8_bf16_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_s8_bf16_untied, svint8_t, svbfloat16_t, ++ z0 = svreinterpret_s8_bf16 (z4), ++ z0 = svreinterpret_s8 (z4)) ++ ++/* ++** reinterpret_s8_f16_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s8_f16_tied1, svint8_t, svfloat16_t, ++ z0_res = svreinterpret_s8_f16 (z0), ++ z0_res = svreinterpret_s8 (z0)) ++ ++/* ++** reinterpret_s8_f16_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_s8_f16_untied, svint8_t, svfloat16_t, ++ z0 = svreinterpret_s8_f16 (z4), ++ z0 = svreinterpret_s8 (z4)) ++ ++/* ++** reinterpret_s8_f32_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s8_f32_tied1, svint8_t, svfloat32_t, ++ z0_res = svreinterpret_s8_f32 (z0), ++ z0_res = svreinterpret_s8 (z0)) ++ ++/* ++** reinterpret_s8_f32_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_s8_f32_untied, svint8_t, svfloat32_t, ++ z0 = svreinterpret_s8_f32 (z4), ++ z0 = svreinterpret_s8 (z4)) ++ ++/* ++** reinterpret_s8_f64_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s8_f64_tied1, svint8_t, svfloat64_t, ++ z0_res = svreinterpret_s8_f64 (z0), ++ z0_res = svreinterpret_s8 (z0)) ++ ++/* ++** reinterpret_s8_f64_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_s8_f64_untied, svint8_t, svfloat64_t, ++ z0 = svreinterpret_s8_f64 (z4), ++ z0 = svreinterpret_s8 (z4)) ++ ++/* ++** reinterpret_s8_s8_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s8_s8_tied1, svint8_t, svint8_t, ++ z0_res = svreinterpret_s8_s8 (z0), ++ z0_res = svreinterpret_s8 (z0)) ++ ++/* ++** reinterpret_s8_s8_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_s8_s8_untied, svint8_t, svint8_t, ++ z0 = svreinterpret_s8_s8 (z4), ++ z0 = svreinterpret_s8 (z4)) ++ ++/* ++** reinterpret_s8_s16_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s8_s16_tied1, svint8_t, svint16_t, ++ z0_res = svreinterpret_s8_s16 (z0), ++ z0_res = svreinterpret_s8 (z0)) ++ ++/* ++** reinterpret_s8_s16_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_s8_s16_untied, svint8_t, svint16_t, ++ z0 = svreinterpret_s8_s16 (z4), ++ z0 = svreinterpret_s8 (z4)) ++ ++/* ++** reinterpret_s8_s32_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s8_s32_tied1, svint8_t, svint32_t, ++ z0_res = svreinterpret_s8_s32 (z0), ++ z0_res = svreinterpret_s8 (z0)) ++ ++/* ++** reinterpret_s8_s32_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_s8_s32_untied, svint8_t, svint32_t, ++ z0 = svreinterpret_s8_s32 (z4), ++ z0 = svreinterpret_s8 (z4)) ++ ++/* ++** reinterpret_s8_s64_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s8_s64_tied1, svint8_t, svint64_t, ++ z0_res = svreinterpret_s8_s64 (z0), ++ z0_res = svreinterpret_s8 (z0)) ++ ++/* ++** reinterpret_s8_s64_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_s8_s64_untied, svint8_t, svint64_t, ++ z0 = svreinterpret_s8_s64 (z4), ++ z0 = svreinterpret_s8 (z4)) ++ ++/* ++** reinterpret_s8_u8_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s8_u8_tied1, svint8_t, svuint8_t, ++ z0_res = svreinterpret_s8_u8 (z0), ++ z0_res = svreinterpret_s8 (z0)) ++ ++/* ++** reinterpret_s8_u8_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_s8_u8_untied, svint8_t, svuint8_t, ++ z0 = svreinterpret_s8_u8 (z4), ++ z0 = svreinterpret_s8 (z4)) ++ ++/* ++** reinterpret_s8_u16_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s8_u16_tied1, svint8_t, svuint16_t, ++ z0_res = svreinterpret_s8_u16 (z0), ++ z0_res = svreinterpret_s8 (z0)) ++ ++/* ++** reinterpret_s8_u16_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_s8_u16_untied, svint8_t, svuint16_t, ++ z0 = svreinterpret_s8_u16 (z4), ++ z0 = svreinterpret_s8 (z4)) ++ ++/* ++** reinterpret_s8_u32_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s8_u32_tied1, svint8_t, svuint32_t, ++ z0_res = svreinterpret_s8_u32 (z0), ++ z0_res = svreinterpret_s8 (z0)) ++ ++/* ++** reinterpret_s8_u32_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_s8_u32_untied, svint8_t, svuint32_t, ++ z0 = svreinterpret_s8_u32 (z4), ++ z0 = svreinterpret_s8 (z4)) ++ ++/* ++** reinterpret_s8_u64_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_s8_u64_tied1, svint8_t, svuint64_t, ++ z0_res = svreinterpret_s8_u64 (z0), ++ z0_res = svreinterpret_s8 (z0)) ++ ++/* ++** reinterpret_s8_u64_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_s8_u64_untied, svint8_t, svuint64_t, ++ z0 = svreinterpret_s8_u64 (z4), ++ z0 = svreinterpret_s8 (z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u16.c +new file mode 100644 +index 000000000..48e8ecaff +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u16.c +@@ -0,0 +1,207 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** reinterpret_u16_bf16_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u16_bf16_tied1, svuint16_t, svbfloat16_t, ++ z0_res = svreinterpret_u16_bf16 (z0), ++ z0_res = svreinterpret_u16 (z0)) ++ ++/* ++** reinterpret_u16_bf16_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_u16_bf16_untied, svuint16_t, svbfloat16_t, ++ z0 = svreinterpret_u16_bf16 (z4), ++ z0 = svreinterpret_u16 (z4)) ++ ++/* ++** reinterpret_u16_f16_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u16_f16_tied1, svuint16_t, svfloat16_t, ++ z0_res = svreinterpret_u16_f16 (z0), ++ z0_res = svreinterpret_u16 (z0)) ++ ++/* ++** reinterpret_u16_f16_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_u16_f16_untied, svuint16_t, svfloat16_t, ++ z0 = svreinterpret_u16_f16 (z4), ++ z0 = svreinterpret_u16 (z4)) ++ ++/* ++** reinterpret_u16_f32_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u16_f32_tied1, svuint16_t, svfloat32_t, ++ z0_res = svreinterpret_u16_f32 (z0), ++ z0_res = svreinterpret_u16 (z0)) ++ ++/* ++** reinterpret_u16_f32_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_u16_f32_untied, svuint16_t, svfloat32_t, ++ z0 = svreinterpret_u16_f32 (z4), ++ z0 = svreinterpret_u16 (z4)) ++ ++/* ++** reinterpret_u16_f64_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u16_f64_tied1, svuint16_t, svfloat64_t, ++ z0_res = svreinterpret_u16_f64 (z0), ++ z0_res = svreinterpret_u16 (z0)) ++ ++/* ++** reinterpret_u16_f64_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_u16_f64_untied, svuint16_t, svfloat64_t, ++ z0 = svreinterpret_u16_f64 (z4), ++ z0 = svreinterpret_u16 (z4)) ++ ++/* ++** reinterpret_u16_s8_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u16_s8_tied1, svuint16_t, svint8_t, ++ z0_res = svreinterpret_u16_s8 (z0), ++ z0_res = svreinterpret_u16 (z0)) ++ ++/* ++** reinterpret_u16_s8_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_u16_s8_untied, svuint16_t, svint8_t, ++ z0 = svreinterpret_u16_s8 (z4), ++ z0 = svreinterpret_u16 (z4)) ++ ++/* ++** reinterpret_u16_s16_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u16_s16_tied1, svuint16_t, svint16_t, ++ z0_res = svreinterpret_u16_s16 (z0), ++ z0_res = svreinterpret_u16 (z0)) ++ ++/* ++** reinterpret_u16_s16_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_u16_s16_untied, svuint16_t, svint16_t, ++ z0 = svreinterpret_u16_s16 (z4), ++ z0 = svreinterpret_u16 (z4)) ++ ++/* ++** reinterpret_u16_s32_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u16_s32_tied1, svuint16_t, svint32_t, ++ z0_res = svreinterpret_u16_s32 (z0), ++ z0_res = svreinterpret_u16 (z0)) ++ ++/* ++** reinterpret_u16_s32_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_u16_s32_untied, svuint16_t, svint32_t, ++ z0 = svreinterpret_u16_s32 (z4), ++ z0 = svreinterpret_u16 (z4)) ++ ++/* ++** reinterpret_u16_s64_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u16_s64_tied1, svuint16_t, svint64_t, ++ z0_res = svreinterpret_u16_s64 (z0), ++ z0_res = svreinterpret_u16 (z0)) ++ ++/* ++** reinterpret_u16_s64_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_u16_s64_untied, svuint16_t, svint64_t, ++ z0 = svreinterpret_u16_s64 (z4), ++ z0 = svreinterpret_u16 (z4)) ++ ++/* ++** reinterpret_u16_u8_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u16_u8_tied1, svuint16_t, svuint8_t, ++ z0_res = svreinterpret_u16_u8 (z0), ++ z0_res = svreinterpret_u16 (z0)) ++ ++/* ++** reinterpret_u16_u8_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_u16_u8_untied, svuint16_t, svuint8_t, ++ z0 = svreinterpret_u16_u8 (z4), ++ z0 = svreinterpret_u16 (z4)) ++ ++/* ++** reinterpret_u16_u16_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u16_u16_tied1, svuint16_t, svuint16_t, ++ z0_res = svreinterpret_u16_u16 (z0), ++ z0_res = svreinterpret_u16 (z0)) ++ ++/* ++** reinterpret_u16_u16_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_u16_u16_untied, svuint16_t, svuint16_t, ++ z0 = svreinterpret_u16_u16 (z4), ++ z0 = svreinterpret_u16 (z4)) ++ ++/* ++** reinterpret_u16_u32_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u16_u32_tied1, svuint16_t, svuint32_t, ++ z0_res = svreinterpret_u16_u32 (z0), ++ z0_res = svreinterpret_u16 (z0)) ++ ++/* ++** reinterpret_u16_u32_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_u16_u32_untied, svuint16_t, svuint32_t, ++ z0 = svreinterpret_u16_u32 (z4), ++ z0 = svreinterpret_u16 (z4)) ++ ++/* ++** reinterpret_u16_u64_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u16_u64_tied1, svuint16_t, svuint64_t, ++ z0_res = svreinterpret_u16_u64 (z0), ++ z0_res = svreinterpret_u16 (z0)) ++ ++/* ++** reinterpret_u16_u64_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_u16_u64_untied, svuint16_t, svuint64_t, ++ z0 = svreinterpret_u16_u64 (z4), ++ z0 = svreinterpret_u16 (z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u32.c +new file mode 100644 +index 000000000..1d4e85712 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u32.c +@@ -0,0 +1,207 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** reinterpret_u32_bf16_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u32_bf16_tied1, svuint32_t, svbfloat16_t, ++ z0_res = svreinterpret_u32_bf16 (z0), ++ z0_res = svreinterpret_u32 (z0)) ++ ++/* ++** reinterpret_u32_bf16_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_u32_bf16_untied, svuint32_t, svbfloat16_t, ++ z0 = svreinterpret_u32_bf16 (z4), ++ z0 = svreinterpret_u32 (z4)) ++ ++/* ++** reinterpret_u32_f16_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u32_f16_tied1, svuint32_t, svfloat16_t, ++ z0_res = svreinterpret_u32_f16 (z0), ++ z0_res = svreinterpret_u32 (z0)) ++ ++/* ++** reinterpret_u32_f16_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_u32_f16_untied, svuint32_t, svfloat16_t, ++ z0 = svreinterpret_u32_f16 (z4), ++ z0 = svreinterpret_u32 (z4)) ++ ++/* ++** reinterpret_u32_f32_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u32_f32_tied1, svuint32_t, svfloat32_t, ++ z0_res = svreinterpret_u32_f32 (z0), ++ z0_res = svreinterpret_u32 (z0)) ++ ++/* ++** reinterpret_u32_f32_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_u32_f32_untied, svuint32_t, svfloat32_t, ++ z0 = svreinterpret_u32_f32 (z4), ++ z0 = svreinterpret_u32 (z4)) ++ ++/* ++** reinterpret_u32_f64_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u32_f64_tied1, svuint32_t, svfloat64_t, ++ z0_res = svreinterpret_u32_f64 (z0), ++ z0_res = svreinterpret_u32 (z0)) ++ ++/* ++** reinterpret_u32_f64_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_u32_f64_untied, svuint32_t, svfloat64_t, ++ z0 = svreinterpret_u32_f64 (z4), ++ z0 = svreinterpret_u32 (z4)) ++ ++/* ++** reinterpret_u32_s8_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u32_s8_tied1, svuint32_t, svint8_t, ++ z0_res = svreinterpret_u32_s8 (z0), ++ z0_res = svreinterpret_u32 (z0)) ++ ++/* ++** reinterpret_u32_s8_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_u32_s8_untied, svuint32_t, svint8_t, ++ z0 = svreinterpret_u32_s8 (z4), ++ z0 = svreinterpret_u32 (z4)) ++ ++/* ++** reinterpret_u32_s16_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u32_s16_tied1, svuint32_t, svint16_t, ++ z0_res = svreinterpret_u32_s16 (z0), ++ z0_res = svreinterpret_u32 (z0)) ++ ++/* ++** reinterpret_u32_s16_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_u32_s16_untied, svuint32_t, svint16_t, ++ z0 = svreinterpret_u32_s16 (z4), ++ z0 = svreinterpret_u32 (z4)) ++ ++/* ++** reinterpret_u32_s32_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u32_s32_tied1, svuint32_t, svint32_t, ++ z0_res = svreinterpret_u32_s32 (z0), ++ z0_res = svreinterpret_u32 (z0)) ++ ++/* ++** reinterpret_u32_s32_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_u32_s32_untied, svuint32_t, svint32_t, ++ z0 = svreinterpret_u32_s32 (z4), ++ z0 = svreinterpret_u32 (z4)) ++ ++/* ++** reinterpret_u32_s64_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u32_s64_tied1, svuint32_t, svint64_t, ++ z0_res = svreinterpret_u32_s64 (z0), ++ z0_res = svreinterpret_u32 (z0)) ++ ++/* ++** reinterpret_u32_s64_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_u32_s64_untied, svuint32_t, svint64_t, ++ z0 = svreinterpret_u32_s64 (z4), ++ z0 = svreinterpret_u32 (z4)) ++ ++/* ++** reinterpret_u32_u8_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u32_u8_tied1, svuint32_t, svuint8_t, ++ z0_res = svreinterpret_u32_u8 (z0), ++ z0_res = svreinterpret_u32 (z0)) ++ ++/* ++** reinterpret_u32_u8_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_u32_u8_untied, svuint32_t, svuint8_t, ++ z0 = svreinterpret_u32_u8 (z4), ++ z0 = svreinterpret_u32 (z4)) ++ ++/* ++** reinterpret_u32_u16_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u32_u16_tied1, svuint32_t, svuint16_t, ++ z0_res = svreinterpret_u32_u16 (z0), ++ z0_res = svreinterpret_u32 (z0)) ++ ++/* ++** reinterpret_u32_u16_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_u32_u16_untied, svuint32_t, svuint16_t, ++ z0 = svreinterpret_u32_u16 (z4), ++ z0 = svreinterpret_u32 (z4)) ++ ++/* ++** reinterpret_u32_u32_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u32_u32_tied1, svuint32_t, svuint32_t, ++ z0_res = svreinterpret_u32_u32 (z0), ++ z0_res = svreinterpret_u32 (z0)) ++ ++/* ++** reinterpret_u32_u32_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_u32_u32_untied, svuint32_t, svuint32_t, ++ z0 = svreinterpret_u32_u32 (z4), ++ z0 = svreinterpret_u32 (z4)) ++ ++/* ++** reinterpret_u32_u64_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u32_u64_tied1, svuint32_t, svuint64_t, ++ z0_res = svreinterpret_u32_u64 (z0), ++ z0_res = svreinterpret_u32 (z0)) ++ ++/* ++** reinterpret_u32_u64_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_u32_u64_untied, svuint32_t, svuint64_t, ++ z0 = svreinterpret_u32_u64 (z4), ++ z0 = svreinterpret_u32 (z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u64.c +new file mode 100644 +index 000000000..07af69dce +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u64.c +@@ -0,0 +1,207 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** reinterpret_u64_bf16_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u64_bf16_tied1, svuint64_t, svbfloat16_t, ++ z0_res = svreinterpret_u64_bf16 (z0), ++ z0_res = svreinterpret_u64 (z0)) ++ ++/* ++** reinterpret_u64_bf16_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_u64_bf16_untied, svuint64_t, svbfloat16_t, ++ z0 = svreinterpret_u64_bf16 (z4), ++ z0 = svreinterpret_u64 (z4)) ++ ++/* ++** reinterpret_u64_f16_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u64_f16_tied1, svuint64_t, svfloat16_t, ++ z0_res = svreinterpret_u64_f16 (z0), ++ z0_res = svreinterpret_u64 (z0)) ++ ++/* ++** reinterpret_u64_f16_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_u64_f16_untied, svuint64_t, svfloat16_t, ++ z0 = svreinterpret_u64_f16 (z4), ++ z0 = svreinterpret_u64 (z4)) ++ ++/* ++** reinterpret_u64_f32_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u64_f32_tied1, svuint64_t, svfloat32_t, ++ z0_res = svreinterpret_u64_f32 (z0), ++ z0_res = svreinterpret_u64 (z0)) ++ ++/* ++** reinterpret_u64_f32_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_u64_f32_untied, svuint64_t, svfloat32_t, ++ z0 = svreinterpret_u64_f32 (z4), ++ z0 = svreinterpret_u64 (z4)) ++ ++/* ++** reinterpret_u64_f64_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u64_f64_tied1, svuint64_t, svfloat64_t, ++ z0_res = svreinterpret_u64_f64 (z0), ++ z0_res = svreinterpret_u64 (z0)) ++ ++/* ++** reinterpret_u64_f64_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_u64_f64_untied, svuint64_t, svfloat64_t, ++ z0 = svreinterpret_u64_f64 (z4), ++ z0 = svreinterpret_u64 (z4)) ++ ++/* ++** reinterpret_u64_s8_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u64_s8_tied1, svuint64_t, svint8_t, ++ z0_res = svreinterpret_u64_s8 (z0), ++ z0_res = svreinterpret_u64 (z0)) ++ ++/* ++** reinterpret_u64_s8_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_u64_s8_untied, svuint64_t, svint8_t, ++ z0 = svreinterpret_u64_s8 (z4), ++ z0 = svreinterpret_u64 (z4)) ++ ++/* ++** reinterpret_u64_s16_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u64_s16_tied1, svuint64_t, svint16_t, ++ z0_res = svreinterpret_u64_s16 (z0), ++ z0_res = svreinterpret_u64 (z0)) ++ ++/* ++** reinterpret_u64_s16_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_u64_s16_untied, svuint64_t, svint16_t, ++ z0 = svreinterpret_u64_s16 (z4), ++ z0 = svreinterpret_u64 (z4)) ++ ++/* ++** reinterpret_u64_s32_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u64_s32_tied1, svuint64_t, svint32_t, ++ z0_res = svreinterpret_u64_s32 (z0), ++ z0_res = svreinterpret_u64 (z0)) ++ ++/* ++** reinterpret_u64_s32_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_u64_s32_untied, svuint64_t, svint32_t, ++ z0 = svreinterpret_u64_s32 (z4), ++ z0 = svreinterpret_u64 (z4)) ++ ++/* ++** reinterpret_u64_s64_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u64_s64_tied1, svuint64_t, svint64_t, ++ z0_res = svreinterpret_u64_s64 (z0), ++ z0_res = svreinterpret_u64 (z0)) ++ ++/* ++** reinterpret_u64_s64_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_u64_s64_untied, svuint64_t, svint64_t, ++ z0 = svreinterpret_u64_s64 (z4), ++ z0 = svreinterpret_u64 (z4)) ++ ++/* ++** reinterpret_u64_u8_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u64_u8_tied1, svuint64_t, svuint8_t, ++ z0_res = svreinterpret_u64_u8 (z0), ++ z0_res = svreinterpret_u64 (z0)) ++ ++/* ++** reinterpret_u64_u8_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_u64_u8_untied, svuint64_t, svuint8_t, ++ z0 = svreinterpret_u64_u8 (z4), ++ z0 = svreinterpret_u64 (z4)) ++ ++/* ++** reinterpret_u64_u16_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u64_u16_tied1, svuint64_t, svuint16_t, ++ z0_res = svreinterpret_u64_u16 (z0), ++ z0_res = svreinterpret_u64 (z0)) ++ ++/* ++** reinterpret_u64_u16_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_u64_u16_untied, svuint64_t, svuint16_t, ++ z0 = svreinterpret_u64_u16 (z4), ++ z0 = svreinterpret_u64 (z4)) ++ ++/* ++** reinterpret_u64_u32_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u64_u32_tied1, svuint64_t, svuint32_t, ++ z0_res = svreinterpret_u64_u32 (z0), ++ z0_res = svreinterpret_u64 (z0)) ++ ++/* ++** reinterpret_u64_u32_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_u64_u32_untied, svuint64_t, svuint32_t, ++ z0 = svreinterpret_u64_u32 (z4), ++ z0 = svreinterpret_u64 (z4)) ++ ++/* ++** reinterpret_u64_u64_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u64_u64_tied1, svuint64_t, svuint64_t, ++ z0_res = svreinterpret_u64_u64 (z0), ++ z0_res = svreinterpret_u64 (z0)) ++ ++/* ++** reinterpret_u64_u64_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_u64_u64_untied, svuint64_t, svuint64_t, ++ z0 = svreinterpret_u64_u64 (z4), ++ z0 = svreinterpret_u64 (z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u8.c +new file mode 100644 +index 000000000..a4c7f4c8d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u8.c +@@ -0,0 +1,207 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** reinterpret_u8_bf16_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u8_bf16_tied1, svuint8_t, svbfloat16_t, ++ z0_res = svreinterpret_u8_bf16 (z0), ++ z0_res = svreinterpret_u8 (z0)) ++ ++/* ++** reinterpret_u8_bf16_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_u8_bf16_untied, svuint8_t, svbfloat16_t, ++ z0 = svreinterpret_u8_bf16 (z4), ++ z0 = svreinterpret_u8 (z4)) ++ ++/* ++** reinterpret_u8_f16_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u8_f16_tied1, svuint8_t, svfloat16_t, ++ z0_res = svreinterpret_u8_f16 (z0), ++ z0_res = svreinterpret_u8 (z0)) ++ ++/* ++** reinterpret_u8_f16_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_u8_f16_untied, svuint8_t, svfloat16_t, ++ z0 = svreinterpret_u8_f16 (z4), ++ z0 = svreinterpret_u8 (z4)) ++ ++/* ++** reinterpret_u8_f32_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u8_f32_tied1, svuint8_t, svfloat32_t, ++ z0_res = svreinterpret_u8_f32 (z0), ++ z0_res = svreinterpret_u8 (z0)) ++ ++/* ++** reinterpret_u8_f32_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_u8_f32_untied, svuint8_t, svfloat32_t, ++ z0 = svreinterpret_u8_f32 (z4), ++ z0 = svreinterpret_u8 (z4)) ++ ++/* ++** reinterpret_u8_f64_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u8_f64_tied1, svuint8_t, svfloat64_t, ++ z0_res = svreinterpret_u8_f64 (z0), ++ z0_res = svreinterpret_u8 (z0)) ++ ++/* ++** reinterpret_u8_f64_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_u8_f64_untied, svuint8_t, svfloat64_t, ++ z0 = svreinterpret_u8_f64 (z4), ++ z0 = svreinterpret_u8 (z4)) ++ ++/* ++** reinterpret_u8_s8_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u8_s8_tied1, svuint8_t, svint8_t, ++ z0_res = svreinterpret_u8_s8 (z0), ++ z0_res = svreinterpret_u8 (z0)) ++ ++/* ++** reinterpret_u8_s8_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_u8_s8_untied, svuint8_t, svint8_t, ++ z0 = svreinterpret_u8_s8 (z4), ++ z0 = svreinterpret_u8 (z4)) ++ ++/* ++** reinterpret_u8_s16_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u8_s16_tied1, svuint8_t, svint16_t, ++ z0_res = svreinterpret_u8_s16 (z0), ++ z0_res = svreinterpret_u8 (z0)) ++ ++/* ++** reinterpret_u8_s16_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_u8_s16_untied, svuint8_t, svint16_t, ++ z0 = svreinterpret_u8_s16 (z4), ++ z0 = svreinterpret_u8 (z4)) ++ ++/* ++** reinterpret_u8_s32_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u8_s32_tied1, svuint8_t, svint32_t, ++ z0_res = svreinterpret_u8_s32 (z0), ++ z0_res = svreinterpret_u8 (z0)) ++ ++/* ++** reinterpret_u8_s32_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_u8_s32_untied, svuint8_t, svint32_t, ++ z0 = svreinterpret_u8_s32 (z4), ++ z0 = svreinterpret_u8 (z4)) ++ ++/* ++** reinterpret_u8_s64_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u8_s64_tied1, svuint8_t, svint64_t, ++ z0_res = svreinterpret_u8_s64 (z0), ++ z0_res = svreinterpret_u8 (z0)) ++ ++/* ++** reinterpret_u8_s64_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_u8_s64_untied, svuint8_t, svint64_t, ++ z0 = svreinterpret_u8_s64 (z4), ++ z0 = svreinterpret_u8 (z4)) ++ ++/* ++** reinterpret_u8_u8_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u8_u8_tied1, svuint8_t, svuint8_t, ++ z0_res = svreinterpret_u8_u8 (z0), ++ z0_res = svreinterpret_u8 (z0)) ++ ++/* ++** reinterpret_u8_u8_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_u8_u8_untied, svuint8_t, svuint8_t, ++ z0 = svreinterpret_u8_u8 (z4), ++ z0 = svreinterpret_u8 (z4)) ++ ++/* ++** reinterpret_u8_u16_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u8_u16_tied1, svuint8_t, svuint16_t, ++ z0_res = svreinterpret_u8_u16 (z0), ++ z0_res = svreinterpret_u8 (z0)) ++ ++/* ++** reinterpret_u8_u16_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_u8_u16_untied, svuint8_t, svuint16_t, ++ z0 = svreinterpret_u8_u16 (z4), ++ z0 = svreinterpret_u8 (z4)) ++ ++/* ++** reinterpret_u8_u32_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u8_u32_tied1, svuint8_t, svuint32_t, ++ z0_res = svreinterpret_u8_u32 (z0), ++ z0_res = svreinterpret_u8 (z0)) ++ ++/* ++** reinterpret_u8_u32_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_u8_u32_untied, svuint8_t, svuint32_t, ++ z0 = svreinterpret_u8_u32 (z4), ++ z0 = svreinterpret_u8 (z4)) ++ ++/* ++** reinterpret_u8_u64_tied1: ++** ret ++*/ ++TEST_DUAL_Z_REV (reinterpret_u8_u64_tied1, svuint8_t, svuint64_t, ++ z0_res = svreinterpret_u8_u64 (z0), ++ z0_res = svreinterpret_u8 (z0)) ++ ++/* ++** reinterpret_u8_u64_untied: ++** mov z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (reinterpret_u8_u64_untied, svuint8_t, svuint64_t, ++ z0 = svreinterpret_u8_u64 (z4), ++ z0 = svreinterpret_u8 (z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_b16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_b16.c +new file mode 100644 +index 000000000..7d5c67d5c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_b16.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** rev_b16_tied1: ++** rev p0\.h, p0\.h ++** ret ++*/ ++TEST_UNIFORM_P (rev_b16_tied1, ++ p0 = svrev_b16 (p0), ++ p0 = svrev_b16 (p0)) ++ ++/* ++** rev_b16_untied: ++** rev p0\.h, p1\.h ++** ret ++*/ ++TEST_UNIFORM_P (rev_b16_untied, ++ p0 = svrev_b16 (p1), ++ p0 = svrev_b16 (p1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_b32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_b32.c +new file mode 100644 +index 000000000..3f8c810c0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_b32.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** rev_b32_tied1: ++** rev p0\.s, p0\.s ++** ret ++*/ ++TEST_UNIFORM_P (rev_b32_tied1, ++ p0 = svrev_b32 (p0), ++ p0 = svrev_b32 (p0)) ++ ++/* ++** rev_b32_untied: ++** rev p0\.s, p1\.s ++** ret ++*/ ++TEST_UNIFORM_P (rev_b32_untied, ++ p0 = svrev_b32 (p1), ++ p0 = svrev_b32 (p1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_b64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_b64.c +new file mode 100644 +index 000000000..fe937ecc6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_b64.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** rev_b64_tied1: ++** rev p0\.d, p0\.d ++** ret ++*/ ++TEST_UNIFORM_P (rev_b64_tied1, ++ p0 = svrev_b64 (p0), ++ p0 = svrev_b64 (p0)) ++ ++/* ++** rev_b64_untied: ++** rev p0\.d, p1\.d ++** ret ++*/ ++TEST_UNIFORM_P (rev_b64_untied, ++ p0 = svrev_b64 (p1), ++ p0 = svrev_b64 (p1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_b8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_b8.c +new file mode 100644 +index 000000000..d23e50407 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_b8.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** rev_b8_tied1: ++** rev p0\.b, p0\.b ++** ret ++*/ ++TEST_UNIFORM_P (rev_b8_tied1, ++ p0 = svrev_b8 (p0), ++ p0 = svrev_b8 (p0)) ++ ++/* ++** rev_b8_untied: ++** rev p0\.b, p1\.b ++** ret ++*/ ++TEST_UNIFORM_P (rev_b8_untied, ++ p0 = svrev_b8 (p1), ++ p0 = svrev_b8 (p1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_bf16.c +new file mode 100644 +index 000000000..fe587d42c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_bf16.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** rev_bf16_tied1: ++** rev z0\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rev_bf16_tied1, svbfloat16_t, ++ z0 = svrev_bf16 (z0), ++ z0 = svrev (z0)) ++ ++/* ++** rev_bf16_untied: ++** rev z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rev_bf16_untied, svbfloat16_t, ++ z0 = svrev_bf16 (z1), ++ z0 = svrev (z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_f16.c +new file mode 100644 +index 000000000..321e2f900 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_f16.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** rev_f16_tied1: ++** rev z0\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rev_f16_tied1, svfloat16_t, ++ z0 = svrev_f16 (z0), ++ z0 = svrev (z0)) ++ ++/* ++** rev_f16_untied: ++** rev z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rev_f16_untied, svfloat16_t, ++ z0 = svrev_f16 (z1), ++ z0 = svrev (z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_f32.c +new file mode 100644 +index 000000000..6f31928b5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_f32.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** rev_f32_tied1: ++** rev z0\.s, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rev_f32_tied1, svfloat32_t, ++ z0 = svrev_f32 (z0), ++ z0 = svrev (z0)) ++ ++/* ++** rev_f32_untied: ++** rev z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rev_f32_untied, svfloat32_t, ++ z0 = svrev_f32 (z1), ++ z0 = svrev (z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_f64.c +new file mode 100644 +index 000000000..6f14078a7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_f64.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** rev_f64_tied1: ++** rev z0\.d, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rev_f64_tied1, svfloat64_t, ++ z0 = svrev_f64 (z0), ++ z0 = svrev (z0)) ++ ++/* ++** rev_f64_untied: ++** rev z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rev_f64_untied, svfloat64_t, ++ z0 = svrev_f64 (z1), ++ z0 = svrev (z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_s16.c +new file mode 100644 +index 000000000..63f6ea73c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_s16.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** rev_s16_tied1: ++** rev z0\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rev_s16_tied1, svint16_t, ++ z0 = svrev_s16 (z0), ++ z0 = svrev (z0)) ++ ++/* ++** rev_s16_untied: ++** rev z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rev_s16_untied, svint16_t, ++ z0 = svrev_s16 (z1), ++ z0 = svrev (z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_s32.c +new file mode 100644 +index 000000000..38240b7ec +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_s32.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** rev_s32_tied1: ++** rev z0\.s, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rev_s32_tied1, svint32_t, ++ z0 = svrev_s32 (z0), ++ z0 = svrev (z0)) ++ ++/* ++** rev_s32_untied: ++** rev z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rev_s32_untied, svint32_t, ++ z0 = svrev_s32 (z1), ++ z0 = svrev (z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_s64.c +new file mode 100644 +index 000000000..0004e4586 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_s64.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** rev_s64_tied1: ++** rev z0\.d, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rev_s64_tied1, svint64_t, ++ z0 = svrev_s64 (z0), ++ z0 = svrev (z0)) ++ ++/* ++** rev_s64_untied: ++** rev z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rev_s64_untied, svint64_t, ++ z0 = svrev_s64 (z1), ++ z0 = svrev (z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_s8.c +new file mode 100644 +index 000000000..44b874c92 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_s8.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** rev_s8_tied1: ++** rev z0\.b, z0\.b ++** ret ++*/ ++TEST_UNIFORM_Z (rev_s8_tied1, svint8_t, ++ z0 = svrev_s8 (z0), ++ z0 = svrev (z0)) ++ ++/* ++** rev_s8_untied: ++** rev z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (rev_s8_untied, svint8_t, ++ z0 = svrev_s8 (z1), ++ z0 = svrev (z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_u16.c +new file mode 100644 +index 000000000..2b4c88854 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_u16.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** rev_u16_tied1: ++** rev z0\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rev_u16_tied1, svuint16_t, ++ z0 = svrev_u16 (z0), ++ z0 = svrev (z0)) ++ ++/* ++** rev_u16_untied: ++** rev z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rev_u16_untied, svuint16_t, ++ z0 = svrev_u16 (z1), ++ z0 = svrev (z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_u32.c +new file mode 100644 +index 000000000..e14351f30 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_u32.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** rev_u32_tied1: ++** rev z0\.s, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rev_u32_tied1, svuint32_t, ++ z0 = svrev_u32 (z0), ++ z0 = svrev (z0)) ++ ++/* ++** rev_u32_untied: ++** rev z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rev_u32_untied, svuint32_t, ++ z0 = svrev_u32 (z1), ++ z0 = svrev (z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_u64.c +new file mode 100644 +index 000000000..5fc987475 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_u64.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** rev_u64_tied1: ++** rev z0\.d, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rev_u64_tied1, svuint64_t, ++ z0 = svrev_u64 (z0), ++ z0 = svrev (z0)) ++ ++/* ++** rev_u64_untied: ++** rev z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rev_u64_untied, svuint64_t, ++ z0 = svrev_u64 (z1), ++ z0 = svrev (z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_u8.c +new file mode 100644 +index 000000000..9dd4f440b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_u8.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** rev_u8_tied1: ++** rev z0\.b, z0\.b ++** ret ++*/ ++TEST_UNIFORM_Z (rev_u8_tied1, svuint8_t, ++ z0 = svrev_u8 (z0), ++ z0 = svrev (z0)) ++ ++/* ++** rev_u8_untied: ++** rev z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (rev_u8_untied, svuint8_t, ++ z0 = svrev_u8 (z1), ++ z0 = svrev (z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revb_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revb_s16.c +new file mode 100644 +index 000000000..ecfabe668 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revb_s16.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** revb_s16_m_tied12: ++** revb z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (revb_s16_m_tied12, svint16_t, ++ z0 = svrevb_s16_m (z0, p0, z0), ++ z0 = svrevb_m (z0, p0, z0)) ++ ++/* ++** revb_s16_m_tied1: ++** revb z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (revb_s16_m_tied1, svint16_t, ++ z0 = svrevb_s16_m (z0, p0, z1), ++ z0 = svrevb_m (z0, p0, z1)) ++ ++/* ++** revb_s16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** revb z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (revb_s16_m_tied2, svint16_t, ++ z0 = svrevb_s16_m (z1, p0, z0), ++ z0 = svrevb_m (z1, p0, z0)) ++ ++/* ++** revb_s16_m_untied: ++** movprfx z0, z2 ++** revb z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (revb_s16_m_untied, svint16_t, ++ z0 = svrevb_s16_m (z2, p0, z1), ++ z0 = svrevb_m (z2, p0, z1)) ++ ++/* ++** revb_s16_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.h, p0/z, \1\.h ++** revb z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (revb_s16_z_tied1, svint16_t, ++ z0 = svrevb_s16_z (p0, z0), ++ z0 = svrevb_z (p0, z0)) ++ ++/* ++** revb_s16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** revb z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (revb_s16_z_untied, svint16_t, ++ z0 = svrevb_s16_z (p0, z1), ++ z0 = svrevb_z (p0, z1)) ++ ++/* ++** revb_s16_x_tied1: ++** revb z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (revb_s16_x_tied1, svint16_t, ++ z0 = svrevb_s16_x (p0, z0), ++ z0 = svrevb_x (p0, z0)) ++ ++/* ++** revb_s16_x_untied: ++** revb z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (revb_s16_x_untied, svint16_t, ++ z0 = svrevb_s16_x (p0, z1), ++ z0 = svrevb_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revb_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revb_s32.c +new file mode 100644 +index 000000000..a46a81973 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revb_s32.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** revb_s32_m_tied12: ++** revb z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (revb_s32_m_tied12, svint32_t, ++ z0 = svrevb_s32_m (z0, p0, z0), ++ z0 = svrevb_m (z0, p0, z0)) ++ ++/* ++** revb_s32_m_tied1: ++** revb z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (revb_s32_m_tied1, svint32_t, ++ z0 = svrevb_s32_m (z0, p0, z1), ++ z0 = svrevb_m (z0, p0, z1)) ++ ++/* ++** revb_s32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** revb z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (revb_s32_m_tied2, svint32_t, ++ z0 = svrevb_s32_m (z1, p0, z0), ++ z0 = svrevb_m (z1, p0, z0)) ++ ++/* ++** revb_s32_m_untied: ++** movprfx z0, z2 ++** revb z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (revb_s32_m_untied, svint32_t, ++ z0 = svrevb_s32_m (z2, p0, z1), ++ z0 = svrevb_m (z2, p0, z1)) ++ ++/* ++** revb_s32_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, \1\.s ++** revb z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (revb_s32_z_tied1, svint32_t, ++ z0 = svrevb_s32_z (p0, z0), ++ z0 = svrevb_z (p0, z0)) ++ ++/* ++** revb_s32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** revb z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (revb_s32_z_untied, svint32_t, ++ z0 = svrevb_s32_z (p0, z1), ++ z0 = svrevb_z (p0, z1)) ++ ++/* ++** revb_s32_x_tied1: ++** revb z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (revb_s32_x_tied1, svint32_t, ++ z0 = svrevb_s32_x (p0, z0), ++ z0 = svrevb_x (p0, z0)) ++ ++/* ++** revb_s32_x_untied: ++** revb z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (revb_s32_x_untied, svint32_t, ++ z0 = svrevb_s32_x (p0, z1), ++ z0 = svrevb_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revb_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revb_s64.c +new file mode 100644 +index 000000000..21547238c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revb_s64.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** revb_s64_m_tied12: ++** revb z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (revb_s64_m_tied12, svint64_t, ++ z0 = svrevb_s64_m (z0, p0, z0), ++ z0 = svrevb_m (z0, p0, z0)) ++ ++/* ++** revb_s64_m_tied1: ++** revb z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (revb_s64_m_tied1, svint64_t, ++ z0 = svrevb_s64_m (z0, p0, z1), ++ z0 = svrevb_m (z0, p0, z1)) ++ ++/* ++** revb_s64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** revb z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (revb_s64_m_tied2, svint64_t, ++ z0 = svrevb_s64_m (z1, p0, z0), ++ z0 = svrevb_m (z1, p0, z0)) ++ ++/* ++** revb_s64_m_untied: ++** movprfx z0, z2 ++** revb z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (revb_s64_m_untied, svint64_t, ++ z0 = svrevb_s64_m (z2, p0, z1), ++ z0 = svrevb_m (z2, p0, z1)) ++ ++/* ++** revb_s64_z_tied1: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, \1 ++** revb z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (revb_s64_z_tied1, svint64_t, ++ z0 = svrevb_s64_z (p0, z0), ++ z0 = svrevb_z (p0, z0)) ++ ++/* ++** revb_s64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** revb z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (revb_s64_z_untied, svint64_t, ++ z0 = svrevb_s64_z (p0, z1), ++ z0 = svrevb_z (p0, z1)) ++ ++/* ++** revb_s64_x_tied1: ++** revb z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (revb_s64_x_tied1, svint64_t, ++ z0 = svrevb_s64_x (p0, z0), ++ z0 = svrevb_x (p0, z0)) ++ ++/* ++** revb_s64_x_untied: ++** revb z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (revb_s64_x_untied, svint64_t, ++ z0 = svrevb_s64_x (p0, z1), ++ z0 = svrevb_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revb_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revb_u16.c +new file mode 100644 +index 000000000..d58bd3d74 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revb_u16.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** revb_u16_m_tied12: ++** revb z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (revb_u16_m_tied12, svuint16_t, ++ z0 = svrevb_u16_m (z0, p0, z0), ++ z0 = svrevb_m (z0, p0, z0)) ++ ++/* ++** revb_u16_m_tied1: ++** revb z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (revb_u16_m_tied1, svuint16_t, ++ z0 = svrevb_u16_m (z0, p0, z1), ++ z0 = svrevb_m (z0, p0, z1)) ++ ++/* ++** revb_u16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** revb z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (revb_u16_m_tied2, svuint16_t, ++ z0 = svrevb_u16_m (z1, p0, z0), ++ z0 = svrevb_m (z1, p0, z0)) ++ ++/* ++** revb_u16_m_untied: ++** movprfx z0, z2 ++** revb z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (revb_u16_m_untied, svuint16_t, ++ z0 = svrevb_u16_m (z2, p0, z1), ++ z0 = svrevb_m (z2, p0, z1)) ++ ++/* ++** revb_u16_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.h, p0/z, \1\.h ++** revb z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (revb_u16_z_tied1, svuint16_t, ++ z0 = svrevb_u16_z (p0, z0), ++ z0 = svrevb_z (p0, z0)) ++ ++/* ++** revb_u16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** revb z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (revb_u16_z_untied, svuint16_t, ++ z0 = svrevb_u16_z (p0, z1), ++ z0 = svrevb_z (p0, z1)) ++ ++/* ++** revb_u16_x_tied1: ++** revb z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (revb_u16_x_tied1, svuint16_t, ++ z0 = svrevb_u16_x (p0, z0), ++ z0 = svrevb_x (p0, z0)) ++ ++/* ++** revb_u16_x_untied: ++** revb z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (revb_u16_x_untied, svuint16_t, ++ z0 = svrevb_u16_x (p0, z1), ++ z0 = svrevb_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revb_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revb_u32.c +new file mode 100644 +index 000000000..33df990d5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revb_u32.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** revb_u32_m_tied12: ++** revb z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (revb_u32_m_tied12, svuint32_t, ++ z0 = svrevb_u32_m (z0, p0, z0), ++ z0 = svrevb_m (z0, p0, z0)) ++ ++/* ++** revb_u32_m_tied1: ++** revb z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (revb_u32_m_tied1, svuint32_t, ++ z0 = svrevb_u32_m (z0, p0, z1), ++ z0 = svrevb_m (z0, p0, z1)) ++ ++/* ++** revb_u32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** revb z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (revb_u32_m_tied2, svuint32_t, ++ z0 = svrevb_u32_m (z1, p0, z0), ++ z0 = svrevb_m (z1, p0, z0)) ++ ++/* ++** revb_u32_m_untied: ++** movprfx z0, z2 ++** revb z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (revb_u32_m_untied, svuint32_t, ++ z0 = svrevb_u32_m (z2, p0, z1), ++ z0 = svrevb_m (z2, p0, z1)) ++ ++/* ++** revb_u32_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, \1\.s ++** revb z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (revb_u32_z_tied1, svuint32_t, ++ z0 = svrevb_u32_z (p0, z0), ++ z0 = svrevb_z (p0, z0)) ++ ++/* ++** revb_u32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** revb z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (revb_u32_z_untied, svuint32_t, ++ z0 = svrevb_u32_z (p0, z1), ++ z0 = svrevb_z (p0, z1)) ++ ++/* ++** revb_u32_x_tied1: ++** revb z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (revb_u32_x_tied1, svuint32_t, ++ z0 = svrevb_u32_x (p0, z0), ++ z0 = svrevb_x (p0, z0)) ++ ++/* ++** revb_u32_x_untied: ++** revb z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (revb_u32_x_untied, svuint32_t, ++ z0 = svrevb_u32_x (p0, z1), ++ z0 = svrevb_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revb_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revb_u64.c +new file mode 100644 +index 000000000..50ad618cc +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revb_u64.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** revb_u64_m_tied12: ++** revb z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (revb_u64_m_tied12, svuint64_t, ++ z0 = svrevb_u64_m (z0, p0, z0), ++ z0 = svrevb_m (z0, p0, z0)) ++ ++/* ++** revb_u64_m_tied1: ++** revb z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (revb_u64_m_tied1, svuint64_t, ++ z0 = svrevb_u64_m (z0, p0, z1), ++ z0 = svrevb_m (z0, p0, z1)) ++ ++/* ++** revb_u64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** revb z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (revb_u64_m_tied2, svuint64_t, ++ z0 = svrevb_u64_m (z1, p0, z0), ++ z0 = svrevb_m (z1, p0, z0)) ++ ++/* ++** revb_u64_m_untied: ++** movprfx z0, z2 ++** revb z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (revb_u64_m_untied, svuint64_t, ++ z0 = svrevb_u64_m (z2, p0, z1), ++ z0 = svrevb_m (z2, p0, z1)) ++ ++/* ++** revb_u64_z_tied1: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, \1 ++** revb z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (revb_u64_z_tied1, svuint64_t, ++ z0 = svrevb_u64_z (p0, z0), ++ z0 = svrevb_z (p0, z0)) ++ ++/* ++** revb_u64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** revb z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (revb_u64_z_untied, svuint64_t, ++ z0 = svrevb_u64_z (p0, z1), ++ z0 = svrevb_z (p0, z1)) ++ ++/* ++** revb_u64_x_tied1: ++** revb z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (revb_u64_x_tied1, svuint64_t, ++ z0 = svrevb_u64_x (p0, z0), ++ z0 = svrevb_x (p0, z0)) ++ ++/* ++** revb_u64_x_untied: ++** revb z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (revb_u64_x_untied, svuint64_t, ++ z0 = svrevb_u64_x (p0, z1), ++ z0 = svrevb_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revh_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revh_s32.c +new file mode 100644 +index 000000000..07d512ddb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revh_s32.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** revh_s32_m_tied12: ++** revh z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (revh_s32_m_tied12, svint32_t, ++ z0 = svrevh_s32_m (z0, p0, z0), ++ z0 = svrevh_m (z0, p0, z0)) ++ ++/* ++** revh_s32_m_tied1: ++** revh z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (revh_s32_m_tied1, svint32_t, ++ z0 = svrevh_s32_m (z0, p0, z1), ++ z0 = svrevh_m (z0, p0, z1)) ++ ++/* ++** revh_s32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** revh z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (revh_s32_m_tied2, svint32_t, ++ z0 = svrevh_s32_m (z1, p0, z0), ++ z0 = svrevh_m (z1, p0, z0)) ++ ++/* ++** revh_s32_m_untied: ++** movprfx z0, z2 ++** revh z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (revh_s32_m_untied, svint32_t, ++ z0 = svrevh_s32_m (z2, p0, z1), ++ z0 = svrevh_m (z2, p0, z1)) ++ ++/* ++** revh_s32_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, \1\.s ++** revh z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (revh_s32_z_tied1, svint32_t, ++ z0 = svrevh_s32_z (p0, z0), ++ z0 = svrevh_z (p0, z0)) ++ ++/* ++** revh_s32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** revh z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (revh_s32_z_untied, svint32_t, ++ z0 = svrevh_s32_z (p0, z1), ++ z0 = svrevh_z (p0, z1)) ++ ++/* ++** revh_s32_x_tied1: ++** revh z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (revh_s32_x_tied1, svint32_t, ++ z0 = svrevh_s32_x (p0, z0), ++ z0 = svrevh_x (p0, z0)) ++ ++/* ++** revh_s32_x_untied: ++** revh z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (revh_s32_x_untied, svint32_t, ++ z0 = svrevh_s32_x (p0, z1), ++ z0 = svrevh_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revh_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revh_s64.c +new file mode 100644 +index 000000000..b1446347c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revh_s64.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** revh_s64_m_tied12: ++** revh z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (revh_s64_m_tied12, svint64_t, ++ z0 = svrevh_s64_m (z0, p0, z0), ++ z0 = svrevh_m (z0, p0, z0)) ++ ++/* ++** revh_s64_m_tied1: ++** revh z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (revh_s64_m_tied1, svint64_t, ++ z0 = svrevh_s64_m (z0, p0, z1), ++ z0 = svrevh_m (z0, p0, z1)) ++ ++/* ++** revh_s64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** revh z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (revh_s64_m_tied2, svint64_t, ++ z0 = svrevh_s64_m (z1, p0, z0), ++ z0 = svrevh_m (z1, p0, z0)) ++ ++/* ++** revh_s64_m_untied: ++** movprfx z0, z2 ++** revh z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (revh_s64_m_untied, svint64_t, ++ z0 = svrevh_s64_m (z2, p0, z1), ++ z0 = svrevh_m (z2, p0, z1)) ++ ++/* ++** revh_s64_z_tied1: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, \1 ++** revh z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (revh_s64_z_tied1, svint64_t, ++ z0 = svrevh_s64_z (p0, z0), ++ z0 = svrevh_z (p0, z0)) ++ ++/* ++** revh_s64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** revh z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (revh_s64_z_untied, svint64_t, ++ z0 = svrevh_s64_z (p0, z1), ++ z0 = svrevh_z (p0, z1)) ++ ++/* ++** revh_s64_x_tied1: ++** revh z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (revh_s64_x_tied1, svint64_t, ++ z0 = svrevh_s64_x (p0, z0), ++ z0 = svrevh_x (p0, z0)) ++ ++/* ++** revh_s64_x_untied: ++** revh z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (revh_s64_x_untied, svint64_t, ++ z0 = svrevh_s64_x (p0, z1), ++ z0 = svrevh_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revh_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revh_u32.c +new file mode 100644 +index 000000000..9ea51884d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revh_u32.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** revh_u32_m_tied12: ++** revh z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (revh_u32_m_tied12, svuint32_t, ++ z0 = svrevh_u32_m (z0, p0, z0), ++ z0 = svrevh_m (z0, p0, z0)) ++ ++/* ++** revh_u32_m_tied1: ++** revh z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (revh_u32_m_tied1, svuint32_t, ++ z0 = svrevh_u32_m (z0, p0, z1), ++ z0 = svrevh_m (z0, p0, z1)) ++ ++/* ++** revh_u32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** revh z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (revh_u32_m_tied2, svuint32_t, ++ z0 = svrevh_u32_m (z1, p0, z0), ++ z0 = svrevh_m (z1, p0, z0)) ++ ++/* ++** revh_u32_m_untied: ++** movprfx z0, z2 ++** revh z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (revh_u32_m_untied, svuint32_t, ++ z0 = svrevh_u32_m (z2, p0, z1), ++ z0 = svrevh_m (z2, p0, z1)) ++ ++/* ++** revh_u32_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, \1\.s ++** revh z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (revh_u32_z_tied1, svuint32_t, ++ z0 = svrevh_u32_z (p0, z0), ++ z0 = svrevh_z (p0, z0)) ++ ++/* ++** revh_u32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** revh z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (revh_u32_z_untied, svuint32_t, ++ z0 = svrevh_u32_z (p0, z1), ++ z0 = svrevh_z (p0, z1)) ++ ++/* ++** revh_u32_x_tied1: ++** revh z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (revh_u32_x_tied1, svuint32_t, ++ z0 = svrevh_u32_x (p0, z0), ++ z0 = svrevh_x (p0, z0)) ++ ++/* ++** revh_u32_x_untied: ++** revh z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (revh_u32_x_untied, svuint32_t, ++ z0 = svrevh_u32_x (p0, z1), ++ z0 = svrevh_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revh_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revh_u64.c +new file mode 100644 +index 000000000..7b2da2701 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revh_u64.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** revh_u64_m_tied12: ++** revh z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (revh_u64_m_tied12, svuint64_t, ++ z0 = svrevh_u64_m (z0, p0, z0), ++ z0 = svrevh_m (z0, p0, z0)) ++ ++/* ++** revh_u64_m_tied1: ++** revh z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (revh_u64_m_tied1, svuint64_t, ++ z0 = svrevh_u64_m (z0, p0, z1), ++ z0 = svrevh_m (z0, p0, z1)) ++ ++/* ++** revh_u64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** revh z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (revh_u64_m_tied2, svuint64_t, ++ z0 = svrevh_u64_m (z1, p0, z0), ++ z0 = svrevh_m (z1, p0, z0)) ++ ++/* ++** revh_u64_m_untied: ++** movprfx z0, z2 ++** revh z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (revh_u64_m_untied, svuint64_t, ++ z0 = svrevh_u64_m (z2, p0, z1), ++ z0 = svrevh_m (z2, p0, z1)) ++ ++/* ++** revh_u64_z_tied1: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, \1 ++** revh z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (revh_u64_z_tied1, svuint64_t, ++ z0 = svrevh_u64_z (p0, z0), ++ z0 = svrevh_z (p0, z0)) ++ ++/* ++** revh_u64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** revh z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (revh_u64_z_untied, svuint64_t, ++ z0 = svrevh_u64_z (p0, z1), ++ z0 = svrevh_z (p0, z1)) ++ ++/* ++** revh_u64_x_tied1: ++** revh z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (revh_u64_x_tied1, svuint64_t, ++ z0 = svrevh_u64_x (p0, z0), ++ z0 = svrevh_x (p0, z0)) ++ ++/* ++** revh_u64_x_untied: ++** revh z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (revh_u64_x_untied, svuint64_t, ++ z0 = svrevh_u64_x (p0, z1), ++ z0 = svrevh_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revw_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revw_s64.c +new file mode 100644 +index 000000000..26ca0f0bd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revw_s64.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** revw_s64_m_tied12: ++** revw z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (revw_s64_m_tied12, svint64_t, ++ z0 = svrevw_s64_m (z0, p0, z0), ++ z0 = svrevw_m (z0, p0, z0)) ++ ++/* ++** revw_s64_m_tied1: ++** revw z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (revw_s64_m_tied1, svint64_t, ++ z0 = svrevw_s64_m (z0, p0, z1), ++ z0 = svrevw_m (z0, p0, z1)) ++ ++/* ++** revw_s64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** revw z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (revw_s64_m_tied2, svint64_t, ++ z0 = svrevw_s64_m (z1, p0, z0), ++ z0 = svrevw_m (z1, p0, z0)) ++ ++/* ++** revw_s64_m_untied: ++** movprfx z0, z2 ++** revw z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (revw_s64_m_untied, svint64_t, ++ z0 = svrevw_s64_m (z2, p0, z1), ++ z0 = svrevw_m (z2, p0, z1)) ++ ++/* ++** revw_s64_z_tied1: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, \1 ++** revw z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (revw_s64_z_tied1, svint64_t, ++ z0 = svrevw_s64_z (p0, z0), ++ z0 = svrevw_z (p0, z0)) ++ ++/* ++** revw_s64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** revw z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (revw_s64_z_untied, svint64_t, ++ z0 = svrevw_s64_z (p0, z1), ++ z0 = svrevw_z (p0, z1)) ++ ++/* ++** revw_s64_x_tied1: ++** revw z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (revw_s64_x_tied1, svint64_t, ++ z0 = svrevw_s64_x (p0, z0), ++ z0 = svrevw_x (p0, z0)) ++ ++/* ++** revw_s64_x_untied: ++** revw z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (revw_s64_x_untied, svint64_t, ++ z0 = svrevw_s64_x (p0, z1), ++ z0 = svrevw_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revw_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revw_u64.c +new file mode 100644 +index 000000000..c70cdb428 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revw_u64.c +@@ -0,0 +1,81 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** revw_u64_m_tied12: ++** revw z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (revw_u64_m_tied12, svuint64_t, ++ z0 = svrevw_u64_m (z0, p0, z0), ++ z0 = svrevw_m (z0, p0, z0)) ++ ++/* ++** revw_u64_m_tied1: ++** revw z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (revw_u64_m_tied1, svuint64_t, ++ z0 = svrevw_u64_m (z0, p0, z1), ++ z0 = svrevw_m (z0, p0, z1)) ++ ++/* ++** revw_u64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** revw z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (revw_u64_m_tied2, svuint64_t, ++ z0 = svrevw_u64_m (z1, p0, z0), ++ z0 = svrevw_m (z1, p0, z0)) ++ ++/* ++** revw_u64_m_untied: ++** movprfx z0, z2 ++** revw z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (revw_u64_m_untied, svuint64_t, ++ z0 = svrevw_u64_m (z2, p0, z1), ++ z0 = svrevw_m (z2, p0, z1)) ++ ++/* ++** revw_u64_z_tied1: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, \1 ++** revw z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (revw_u64_z_tied1, svuint64_t, ++ z0 = svrevw_u64_z (p0, z0), ++ z0 = svrevw_z (p0, z0)) ++ ++/* ++** revw_u64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** revw z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (revw_u64_z_untied, svuint64_t, ++ z0 = svrevw_u64_z (p0, z1), ++ z0 = svrevw_z (p0, z1)) ++ ++/* ++** revw_u64_x_tied1: ++** revw z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (revw_u64_x_tied1, svuint64_t, ++ z0 = svrevw_u64_x (p0, z0), ++ z0 = svrevw_x (p0, z0)) ++ ++/* ++** revw_u64_x_untied: ++** revw z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (revw_u64_x_untied, svuint64_t, ++ z0 = svrevw_u64_x (p0, z1), ++ z0 = svrevw_x (p0, z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rinta_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rinta_f16.c +new file mode 100644 +index 000000000..99a604209 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rinta_f16.c +@@ -0,0 +1,103 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** rinta_f16_m_tied12: ++** frinta z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rinta_f16_m_tied12, svfloat16_t, ++ z0 = svrinta_f16_m (z0, p0, z0), ++ z0 = svrinta_m (z0, p0, z0)) ++ ++/* ++** rinta_f16_m_tied1: ++** frinta z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rinta_f16_m_tied1, svfloat16_t, ++ z0 = svrinta_f16_m (z0, p0, z1), ++ z0 = svrinta_m (z0, p0, z1)) ++ ++/* ++** rinta_f16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** frinta z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rinta_f16_m_tied2, svfloat16_t, ++ z0 = svrinta_f16_m (z1, p0, z0), ++ z0 = svrinta_m (z1, p0, z0)) ++ ++/* ++** rinta_f16_m_untied: ++** movprfx z0, z2 ++** frinta z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rinta_f16_m_untied, svfloat16_t, ++ z0 = svrinta_f16_m (z2, p0, z1), ++ z0 = svrinta_m (z2, p0, z1)) ++ ++/* ++** rinta_f16_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.h, p0/z, \1\.h ++** frinta z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rinta_f16_z_tied1, svfloat16_t, ++ z0 = svrinta_f16_z (p0, z0), ++ z0 = svrinta_z (p0, z0)) ++ ++/* ++** rinta_f16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** frinta z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rinta_f16_z_untied, svfloat16_t, ++ z0 = svrinta_f16_z (p0, z1), ++ z0 = svrinta_z (p0, z1)) ++ ++/* ++** rinta_f16_x_tied1: ++** frinta z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rinta_f16_x_tied1, svfloat16_t, ++ z0 = svrinta_f16_x (p0, z0), ++ z0 = svrinta_x (p0, z0)) ++ ++/* ++** rinta_f16_x_untied: ++** frinta z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rinta_f16_x_untied, svfloat16_t, ++ z0 = svrinta_f16_x (p0, z1), ++ z0 = svrinta_x (p0, z1)) ++ ++/* ++** ptrue_rinta_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_rinta_f16_x_tied1, svfloat16_t, ++ z0 = svrinta_f16_x (svptrue_b16 (), z0), ++ z0 = svrinta_x (svptrue_b16 (), z0)) ++ ++/* ++** ptrue_rinta_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_rinta_f16_x_untied, svfloat16_t, ++ z0 = svrinta_f16_x (svptrue_b16 (), z1), ++ z0 = svrinta_x (svptrue_b16 (), z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rinta_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rinta_f32.c +new file mode 100644 +index 000000000..b4e3714bc +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rinta_f32.c +@@ -0,0 +1,103 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** rinta_f32_m_tied12: ++** frinta z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rinta_f32_m_tied12, svfloat32_t, ++ z0 = svrinta_f32_m (z0, p0, z0), ++ z0 = svrinta_m (z0, p0, z0)) ++ ++/* ++** rinta_f32_m_tied1: ++** frinta z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rinta_f32_m_tied1, svfloat32_t, ++ z0 = svrinta_f32_m (z0, p0, z1), ++ z0 = svrinta_m (z0, p0, z1)) ++ ++/* ++** rinta_f32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** frinta z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rinta_f32_m_tied2, svfloat32_t, ++ z0 = svrinta_f32_m (z1, p0, z0), ++ z0 = svrinta_m (z1, p0, z0)) ++ ++/* ++** rinta_f32_m_untied: ++** movprfx z0, z2 ++** frinta z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rinta_f32_m_untied, svfloat32_t, ++ z0 = svrinta_f32_m (z2, p0, z1), ++ z0 = svrinta_m (z2, p0, z1)) ++ ++/* ++** rinta_f32_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, \1\.s ++** frinta z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rinta_f32_z_tied1, svfloat32_t, ++ z0 = svrinta_f32_z (p0, z0), ++ z0 = svrinta_z (p0, z0)) ++ ++/* ++** rinta_f32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** frinta z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rinta_f32_z_untied, svfloat32_t, ++ z0 = svrinta_f32_z (p0, z1), ++ z0 = svrinta_z (p0, z1)) ++ ++/* ++** rinta_f32_x_tied1: ++** frinta z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rinta_f32_x_tied1, svfloat32_t, ++ z0 = svrinta_f32_x (p0, z0), ++ z0 = svrinta_x (p0, z0)) ++ ++/* ++** rinta_f32_x_untied: ++** frinta z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rinta_f32_x_untied, svfloat32_t, ++ z0 = svrinta_f32_x (p0, z1), ++ z0 = svrinta_x (p0, z1)) ++ ++/* ++** ptrue_rinta_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_rinta_f32_x_tied1, svfloat32_t, ++ z0 = svrinta_f32_x (svptrue_b32 (), z0), ++ z0 = svrinta_x (svptrue_b32 (), z0)) ++ ++/* ++** ptrue_rinta_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_rinta_f32_x_untied, svfloat32_t, ++ z0 = svrinta_f32_x (svptrue_b32 (), z1), ++ z0 = svrinta_x (svptrue_b32 (), z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rinta_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rinta_f64.c +new file mode 100644 +index 000000000..24d6b7dc8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rinta_f64.c +@@ -0,0 +1,103 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** rinta_f64_m_tied12: ++** frinta z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rinta_f64_m_tied12, svfloat64_t, ++ z0 = svrinta_f64_m (z0, p0, z0), ++ z0 = svrinta_m (z0, p0, z0)) ++ ++/* ++** rinta_f64_m_tied1: ++** frinta z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rinta_f64_m_tied1, svfloat64_t, ++ z0 = svrinta_f64_m (z0, p0, z1), ++ z0 = svrinta_m (z0, p0, z1)) ++ ++/* ++** rinta_f64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** frinta z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (rinta_f64_m_tied2, svfloat64_t, ++ z0 = svrinta_f64_m (z1, p0, z0), ++ z0 = svrinta_m (z1, p0, z0)) ++ ++/* ++** rinta_f64_m_untied: ++** movprfx z0, z2 ++** frinta z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rinta_f64_m_untied, svfloat64_t, ++ z0 = svrinta_f64_m (z2, p0, z1), ++ z0 = svrinta_m (z2, p0, z1)) ++ ++/* ++** rinta_f64_z_tied1: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, \1 ++** frinta z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (rinta_f64_z_tied1, svfloat64_t, ++ z0 = svrinta_f64_z (p0, z0), ++ z0 = svrinta_z (p0, z0)) ++ ++/* ++** rinta_f64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** frinta z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rinta_f64_z_untied, svfloat64_t, ++ z0 = svrinta_f64_z (p0, z1), ++ z0 = svrinta_z (p0, z1)) ++ ++/* ++** rinta_f64_x_tied1: ++** frinta z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rinta_f64_x_tied1, svfloat64_t, ++ z0 = svrinta_f64_x (p0, z0), ++ z0 = svrinta_x (p0, z0)) ++ ++/* ++** rinta_f64_x_untied: ++** frinta z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rinta_f64_x_untied, svfloat64_t, ++ z0 = svrinta_f64_x (p0, z1), ++ z0 = svrinta_x (p0, z1)) ++ ++/* ++** ptrue_rinta_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_rinta_f64_x_tied1, svfloat64_t, ++ z0 = svrinta_f64_x (svptrue_b64 (), z0), ++ z0 = svrinta_x (svptrue_b64 (), z0)) ++ ++/* ++** ptrue_rinta_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_rinta_f64_x_untied, svfloat64_t, ++ z0 = svrinta_f64_x (svptrue_b64 (), z1), ++ z0 = svrinta_x (svptrue_b64 (), z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rinti_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rinti_f16.c +new file mode 100644 +index 000000000..1f0ac85e3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rinti_f16.c +@@ -0,0 +1,103 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** rinti_f16_m_tied12: ++** frinti z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rinti_f16_m_tied12, svfloat16_t, ++ z0 = svrinti_f16_m (z0, p0, z0), ++ z0 = svrinti_m (z0, p0, z0)) ++ ++/* ++** rinti_f16_m_tied1: ++** frinti z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rinti_f16_m_tied1, svfloat16_t, ++ z0 = svrinti_f16_m (z0, p0, z1), ++ z0 = svrinti_m (z0, p0, z1)) ++ ++/* ++** rinti_f16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** frinti z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rinti_f16_m_tied2, svfloat16_t, ++ z0 = svrinti_f16_m (z1, p0, z0), ++ z0 = svrinti_m (z1, p0, z0)) ++ ++/* ++** rinti_f16_m_untied: ++** movprfx z0, z2 ++** frinti z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rinti_f16_m_untied, svfloat16_t, ++ z0 = svrinti_f16_m (z2, p0, z1), ++ z0 = svrinti_m (z2, p0, z1)) ++ ++/* ++** rinti_f16_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.h, p0/z, \1\.h ++** frinti z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rinti_f16_z_tied1, svfloat16_t, ++ z0 = svrinti_f16_z (p0, z0), ++ z0 = svrinti_z (p0, z0)) ++ ++/* ++** rinti_f16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** frinti z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rinti_f16_z_untied, svfloat16_t, ++ z0 = svrinti_f16_z (p0, z1), ++ z0 = svrinti_z (p0, z1)) ++ ++/* ++** rinti_f16_x_tied1: ++** frinti z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rinti_f16_x_tied1, svfloat16_t, ++ z0 = svrinti_f16_x (p0, z0), ++ z0 = svrinti_x (p0, z0)) ++ ++/* ++** rinti_f16_x_untied: ++** frinti z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rinti_f16_x_untied, svfloat16_t, ++ z0 = svrinti_f16_x (p0, z1), ++ z0 = svrinti_x (p0, z1)) ++ ++/* ++** ptrue_rinti_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_rinti_f16_x_tied1, svfloat16_t, ++ z0 = svrinti_f16_x (svptrue_b16 (), z0), ++ z0 = svrinti_x (svptrue_b16 (), z0)) ++ ++/* ++** ptrue_rinti_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_rinti_f16_x_untied, svfloat16_t, ++ z0 = svrinti_f16_x (svptrue_b16 (), z1), ++ z0 = svrinti_x (svptrue_b16 (), z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rinti_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rinti_f32.c +new file mode 100644 +index 000000000..cf54fde5c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rinti_f32.c +@@ -0,0 +1,103 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** rinti_f32_m_tied12: ++** frinti z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rinti_f32_m_tied12, svfloat32_t, ++ z0 = svrinti_f32_m (z0, p0, z0), ++ z0 = svrinti_m (z0, p0, z0)) ++ ++/* ++** rinti_f32_m_tied1: ++** frinti z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rinti_f32_m_tied1, svfloat32_t, ++ z0 = svrinti_f32_m (z0, p0, z1), ++ z0 = svrinti_m (z0, p0, z1)) ++ ++/* ++** rinti_f32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** frinti z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rinti_f32_m_tied2, svfloat32_t, ++ z0 = svrinti_f32_m (z1, p0, z0), ++ z0 = svrinti_m (z1, p0, z0)) ++ ++/* ++** rinti_f32_m_untied: ++** movprfx z0, z2 ++** frinti z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rinti_f32_m_untied, svfloat32_t, ++ z0 = svrinti_f32_m (z2, p0, z1), ++ z0 = svrinti_m (z2, p0, z1)) ++ ++/* ++** rinti_f32_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, \1\.s ++** frinti z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rinti_f32_z_tied1, svfloat32_t, ++ z0 = svrinti_f32_z (p0, z0), ++ z0 = svrinti_z (p0, z0)) ++ ++/* ++** rinti_f32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** frinti z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rinti_f32_z_untied, svfloat32_t, ++ z0 = svrinti_f32_z (p0, z1), ++ z0 = svrinti_z (p0, z1)) ++ ++/* ++** rinti_f32_x_tied1: ++** frinti z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rinti_f32_x_tied1, svfloat32_t, ++ z0 = svrinti_f32_x (p0, z0), ++ z0 = svrinti_x (p0, z0)) ++ ++/* ++** rinti_f32_x_untied: ++** frinti z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rinti_f32_x_untied, svfloat32_t, ++ z0 = svrinti_f32_x (p0, z1), ++ z0 = svrinti_x (p0, z1)) ++ ++/* ++** ptrue_rinti_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_rinti_f32_x_tied1, svfloat32_t, ++ z0 = svrinti_f32_x (svptrue_b32 (), z0), ++ z0 = svrinti_x (svptrue_b32 (), z0)) ++ ++/* ++** ptrue_rinti_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_rinti_f32_x_untied, svfloat32_t, ++ z0 = svrinti_f32_x (svptrue_b32 (), z1), ++ z0 = svrinti_x (svptrue_b32 (), z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rinti_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rinti_f64.c +new file mode 100644 +index 000000000..08b861caa +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rinti_f64.c +@@ -0,0 +1,103 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** rinti_f64_m_tied12: ++** frinti z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rinti_f64_m_tied12, svfloat64_t, ++ z0 = svrinti_f64_m (z0, p0, z0), ++ z0 = svrinti_m (z0, p0, z0)) ++ ++/* ++** rinti_f64_m_tied1: ++** frinti z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rinti_f64_m_tied1, svfloat64_t, ++ z0 = svrinti_f64_m (z0, p0, z1), ++ z0 = svrinti_m (z0, p0, z1)) ++ ++/* ++** rinti_f64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** frinti z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (rinti_f64_m_tied2, svfloat64_t, ++ z0 = svrinti_f64_m (z1, p0, z0), ++ z0 = svrinti_m (z1, p0, z0)) ++ ++/* ++** rinti_f64_m_untied: ++** movprfx z0, z2 ++** frinti z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rinti_f64_m_untied, svfloat64_t, ++ z0 = svrinti_f64_m (z2, p0, z1), ++ z0 = svrinti_m (z2, p0, z1)) ++ ++/* ++** rinti_f64_z_tied1: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, \1 ++** frinti z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (rinti_f64_z_tied1, svfloat64_t, ++ z0 = svrinti_f64_z (p0, z0), ++ z0 = svrinti_z (p0, z0)) ++ ++/* ++** rinti_f64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** frinti z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rinti_f64_z_untied, svfloat64_t, ++ z0 = svrinti_f64_z (p0, z1), ++ z0 = svrinti_z (p0, z1)) ++ ++/* ++** rinti_f64_x_tied1: ++** frinti z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rinti_f64_x_tied1, svfloat64_t, ++ z0 = svrinti_f64_x (p0, z0), ++ z0 = svrinti_x (p0, z0)) ++ ++/* ++** rinti_f64_x_untied: ++** frinti z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rinti_f64_x_untied, svfloat64_t, ++ z0 = svrinti_f64_x (p0, z1), ++ z0 = svrinti_x (p0, z1)) ++ ++/* ++** ptrue_rinti_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_rinti_f64_x_tied1, svfloat64_t, ++ z0 = svrinti_f64_x (svptrue_b64 (), z0), ++ z0 = svrinti_x (svptrue_b64 (), z0)) ++ ++/* ++** ptrue_rinti_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_rinti_f64_x_untied, svfloat64_t, ++ z0 = svrinti_f64_x (svptrue_b64 (), z1), ++ z0 = svrinti_x (svptrue_b64 (), z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintm_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintm_f16.c +new file mode 100644 +index 000000000..194d01cbd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintm_f16.c +@@ -0,0 +1,103 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** rintm_f16_m_tied12: ++** frintm z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rintm_f16_m_tied12, svfloat16_t, ++ z0 = svrintm_f16_m (z0, p0, z0), ++ z0 = svrintm_m (z0, p0, z0)) ++ ++/* ++** rintm_f16_m_tied1: ++** frintm z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rintm_f16_m_tied1, svfloat16_t, ++ z0 = svrintm_f16_m (z0, p0, z1), ++ z0 = svrintm_m (z0, p0, z1)) ++ ++/* ++** rintm_f16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** frintm z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rintm_f16_m_tied2, svfloat16_t, ++ z0 = svrintm_f16_m (z1, p0, z0), ++ z0 = svrintm_m (z1, p0, z0)) ++ ++/* ++** rintm_f16_m_untied: ++** movprfx z0, z2 ++** frintm z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rintm_f16_m_untied, svfloat16_t, ++ z0 = svrintm_f16_m (z2, p0, z1), ++ z0 = svrintm_m (z2, p0, z1)) ++ ++/* ++** rintm_f16_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.h, p0/z, \1\.h ++** frintm z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rintm_f16_z_tied1, svfloat16_t, ++ z0 = svrintm_f16_z (p0, z0), ++ z0 = svrintm_z (p0, z0)) ++ ++/* ++** rintm_f16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** frintm z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rintm_f16_z_untied, svfloat16_t, ++ z0 = svrintm_f16_z (p0, z1), ++ z0 = svrintm_z (p0, z1)) ++ ++/* ++** rintm_f16_x_tied1: ++** frintm z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rintm_f16_x_tied1, svfloat16_t, ++ z0 = svrintm_f16_x (p0, z0), ++ z0 = svrintm_x (p0, z0)) ++ ++/* ++** rintm_f16_x_untied: ++** frintm z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rintm_f16_x_untied, svfloat16_t, ++ z0 = svrintm_f16_x (p0, z1), ++ z0 = svrintm_x (p0, z1)) ++ ++/* ++** ptrue_rintm_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_rintm_f16_x_tied1, svfloat16_t, ++ z0 = svrintm_f16_x (svptrue_b16 (), z0), ++ z0 = svrintm_x (svptrue_b16 (), z0)) ++ ++/* ++** ptrue_rintm_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_rintm_f16_x_untied, svfloat16_t, ++ z0 = svrintm_f16_x (svptrue_b16 (), z1), ++ z0 = svrintm_x (svptrue_b16 (), z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintm_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintm_f32.c +new file mode 100644 +index 000000000..6c3297aa1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintm_f32.c +@@ -0,0 +1,103 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** rintm_f32_m_tied12: ++** frintm z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rintm_f32_m_tied12, svfloat32_t, ++ z0 = svrintm_f32_m (z0, p0, z0), ++ z0 = svrintm_m (z0, p0, z0)) ++ ++/* ++** rintm_f32_m_tied1: ++** frintm z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rintm_f32_m_tied1, svfloat32_t, ++ z0 = svrintm_f32_m (z0, p0, z1), ++ z0 = svrintm_m (z0, p0, z1)) ++ ++/* ++** rintm_f32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** frintm z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rintm_f32_m_tied2, svfloat32_t, ++ z0 = svrintm_f32_m (z1, p0, z0), ++ z0 = svrintm_m (z1, p0, z0)) ++ ++/* ++** rintm_f32_m_untied: ++** movprfx z0, z2 ++** frintm z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rintm_f32_m_untied, svfloat32_t, ++ z0 = svrintm_f32_m (z2, p0, z1), ++ z0 = svrintm_m (z2, p0, z1)) ++ ++/* ++** rintm_f32_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, \1\.s ++** frintm z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rintm_f32_z_tied1, svfloat32_t, ++ z0 = svrintm_f32_z (p0, z0), ++ z0 = svrintm_z (p0, z0)) ++ ++/* ++** rintm_f32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** frintm z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rintm_f32_z_untied, svfloat32_t, ++ z0 = svrintm_f32_z (p0, z1), ++ z0 = svrintm_z (p0, z1)) ++ ++/* ++** rintm_f32_x_tied1: ++** frintm z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rintm_f32_x_tied1, svfloat32_t, ++ z0 = svrintm_f32_x (p0, z0), ++ z0 = svrintm_x (p0, z0)) ++ ++/* ++** rintm_f32_x_untied: ++** frintm z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rintm_f32_x_untied, svfloat32_t, ++ z0 = svrintm_f32_x (p0, z1), ++ z0 = svrintm_x (p0, z1)) ++ ++/* ++** ptrue_rintm_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_rintm_f32_x_tied1, svfloat32_t, ++ z0 = svrintm_f32_x (svptrue_b32 (), z0), ++ z0 = svrintm_x (svptrue_b32 (), z0)) ++ ++/* ++** ptrue_rintm_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_rintm_f32_x_untied, svfloat32_t, ++ z0 = svrintm_f32_x (svptrue_b32 (), z1), ++ z0 = svrintm_x (svptrue_b32 (), z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintm_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintm_f64.c +new file mode 100644 +index 000000000..ecbb24447 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintm_f64.c +@@ -0,0 +1,103 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** rintm_f64_m_tied12: ++** frintm z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rintm_f64_m_tied12, svfloat64_t, ++ z0 = svrintm_f64_m (z0, p0, z0), ++ z0 = svrintm_m (z0, p0, z0)) ++ ++/* ++** rintm_f64_m_tied1: ++** frintm z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rintm_f64_m_tied1, svfloat64_t, ++ z0 = svrintm_f64_m (z0, p0, z1), ++ z0 = svrintm_m (z0, p0, z1)) ++ ++/* ++** rintm_f64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** frintm z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (rintm_f64_m_tied2, svfloat64_t, ++ z0 = svrintm_f64_m (z1, p0, z0), ++ z0 = svrintm_m (z1, p0, z0)) ++ ++/* ++** rintm_f64_m_untied: ++** movprfx z0, z2 ++** frintm z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rintm_f64_m_untied, svfloat64_t, ++ z0 = svrintm_f64_m (z2, p0, z1), ++ z0 = svrintm_m (z2, p0, z1)) ++ ++/* ++** rintm_f64_z_tied1: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, \1 ++** frintm z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (rintm_f64_z_tied1, svfloat64_t, ++ z0 = svrintm_f64_z (p0, z0), ++ z0 = svrintm_z (p0, z0)) ++ ++/* ++** rintm_f64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** frintm z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rintm_f64_z_untied, svfloat64_t, ++ z0 = svrintm_f64_z (p0, z1), ++ z0 = svrintm_z (p0, z1)) ++ ++/* ++** rintm_f64_x_tied1: ++** frintm z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rintm_f64_x_tied1, svfloat64_t, ++ z0 = svrintm_f64_x (p0, z0), ++ z0 = svrintm_x (p0, z0)) ++ ++/* ++** rintm_f64_x_untied: ++** frintm z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rintm_f64_x_untied, svfloat64_t, ++ z0 = svrintm_f64_x (p0, z1), ++ z0 = svrintm_x (p0, z1)) ++ ++/* ++** ptrue_rintm_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_rintm_f64_x_tied1, svfloat64_t, ++ z0 = svrintm_f64_x (svptrue_b64 (), z0), ++ z0 = svrintm_x (svptrue_b64 (), z0)) ++ ++/* ++** ptrue_rintm_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_rintm_f64_x_untied, svfloat64_t, ++ z0 = svrintm_f64_x (svptrue_b64 (), z1), ++ z0 = svrintm_x (svptrue_b64 (), z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintn_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintn_f16.c +new file mode 100644 +index 000000000..273307ef1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintn_f16.c +@@ -0,0 +1,103 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** rintn_f16_m_tied12: ++** frintn z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rintn_f16_m_tied12, svfloat16_t, ++ z0 = svrintn_f16_m (z0, p0, z0), ++ z0 = svrintn_m (z0, p0, z0)) ++ ++/* ++** rintn_f16_m_tied1: ++** frintn z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rintn_f16_m_tied1, svfloat16_t, ++ z0 = svrintn_f16_m (z0, p0, z1), ++ z0 = svrintn_m (z0, p0, z1)) ++ ++/* ++** rintn_f16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** frintn z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rintn_f16_m_tied2, svfloat16_t, ++ z0 = svrintn_f16_m (z1, p0, z0), ++ z0 = svrintn_m (z1, p0, z0)) ++ ++/* ++** rintn_f16_m_untied: ++** movprfx z0, z2 ++** frintn z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rintn_f16_m_untied, svfloat16_t, ++ z0 = svrintn_f16_m (z2, p0, z1), ++ z0 = svrintn_m (z2, p0, z1)) ++ ++/* ++** rintn_f16_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.h, p0/z, \1\.h ++** frintn z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rintn_f16_z_tied1, svfloat16_t, ++ z0 = svrintn_f16_z (p0, z0), ++ z0 = svrintn_z (p0, z0)) ++ ++/* ++** rintn_f16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** frintn z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rintn_f16_z_untied, svfloat16_t, ++ z0 = svrintn_f16_z (p0, z1), ++ z0 = svrintn_z (p0, z1)) ++ ++/* ++** rintn_f16_x_tied1: ++** frintn z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rintn_f16_x_tied1, svfloat16_t, ++ z0 = svrintn_f16_x (p0, z0), ++ z0 = svrintn_x (p0, z0)) ++ ++/* ++** rintn_f16_x_untied: ++** frintn z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rintn_f16_x_untied, svfloat16_t, ++ z0 = svrintn_f16_x (p0, z1), ++ z0 = svrintn_x (p0, z1)) ++ ++/* ++** ptrue_rintn_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_rintn_f16_x_tied1, svfloat16_t, ++ z0 = svrintn_f16_x (svptrue_b16 (), z0), ++ z0 = svrintn_x (svptrue_b16 (), z0)) ++ ++/* ++** ptrue_rintn_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_rintn_f16_x_untied, svfloat16_t, ++ z0 = svrintn_f16_x (svptrue_b16 (), z1), ++ z0 = svrintn_x (svptrue_b16 (), z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintn_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintn_f32.c +new file mode 100644 +index 000000000..bafd43106 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintn_f32.c +@@ -0,0 +1,103 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** rintn_f32_m_tied12: ++** frintn z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rintn_f32_m_tied12, svfloat32_t, ++ z0 = svrintn_f32_m (z0, p0, z0), ++ z0 = svrintn_m (z0, p0, z0)) ++ ++/* ++** rintn_f32_m_tied1: ++** frintn z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rintn_f32_m_tied1, svfloat32_t, ++ z0 = svrintn_f32_m (z0, p0, z1), ++ z0 = svrintn_m (z0, p0, z1)) ++ ++/* ++** rintn_f32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** frintn z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rintn_f32_m_tied2, svfloat32_t, ++ z0 = svrintn_f32_m (z1, p0, z0), ++ z0 = svrintn_m (z1, p0, z0)) ++ ++/* ++** rintn_f32_m_untied: ++** movprfx z0, z2 ++** frintn z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rintn_f32_m_untied, svfloat32_t, ++ z0 = svrintn_f32_m (z2, p0, z1), ++ z0 = svrintn_m (z2, p0, z1)) ++ ++/* ++** rintn_f32_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, \1\.s ++** frintn z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rintn_f32_z_tied1, svfloat32_t, ++ z0 = svrintn_f32_z (p0, z0), ++ z0 = svrintn_z (p0, z0)) ++ ++/* ++** rintn_f32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** frintn z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rintn_f32_z_untied, svfloat32_t, ++ z0 = svrintn_f32_z (p0, z1), ++ z0 = svrintn_z (p0, z1)) ++ ++/* ++** rintn_f32_x_tied1: ++** frintn z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rintn_f32_x_tied1, svfloat32_t, ++ z0 = svrintn_f32_x (p0, z0), ++ z0 = svrintn_x (p0, z0)) ++ ++/* ++** rintn_f32_x_untied: ++** frintn z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rintn_f32_x_untied, svfloat32_t, ++ z0 = svrintn_f32_x (p0, z1), ++ z0 = svrintn_x (p0, z1)) ++ ++/* ++** ptrue_rintn_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_rintn_f32_x_tied1, svfloat32_t, ++ z0 = svrintn_f32_x (svptrue_b32 (), z0), ++ z0 = svrintn_x (svptrue_b32 (), z0)) ++ ++/* ++** ptrue_rintn_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_rintn_f32_x_untied, svfloat32_t, ++ z0 = svrintn_f32_x (svptrue_b32 (), z1), ++ z0 = svrintn_x (svptrue_b32 (), z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintn_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintn_f64.c +new file mode 100644 +index 000000000..0142315e6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintn_f64.c +@@ -0,0 +1,103 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** rintn_f64_m_tied12: ++** frintn z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rintn_f64_m_tied12, svfloat64_t, ++ z0 = svrintn_f64_m (z0, p0, z0), ++ z0 = svrintn_m (z0, p0, z0)) ++ ++/* ++** rintn_f64_m_tied1: ++** frintn z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rintn_f64_m_tied1, svfloat64_t, ++ z0 = svrintn_f64_m (z0, p0, z1), ++ z0 = svrintn_m (z0, p0, z1)) ++ ++/* ++** rintn_f64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** frintn z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (rintn_f64_m_tied2, svfloat64_t, ++ z0 = svrintn_f64_m (z1, p0, z0), ++ z0 = svrintn_m (z1, p0, z0)) ++ ++/* ++** rintn_f64_m_untied: ++** movprfx z0, z2 ++** frintn z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rintn_f64_m_untied, svfloat64_t, ++ z0 = svrintn_f64_m (z2, p0, z1), ++ z0 = svrintn_m (z2, p0, z1)) ++ ++/* ++** rintn_f64_z_tied1: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, \1 ++** frintn z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (rintn_f64_z_tied1, svfloat64_t, ++ z0 = svrintn_f64_z (p0, z0), ++ z0 = svrintn_z (p0, z0)) ++ ++/* ++** rintn_f64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** frintn z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rintn_f64_z_untied, svfloat64_t, ++ z0 = svrintn_f64_z (p0, z1), ++ z0 = svrintn_z (p0, z1)) ++ ++/* ++** rintn_f64_x_tied1: ++** frintn z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rintn_f64_x_tied1, svfloat64_t, ++ z0 = svrintn_f64_x (p0, z0), ++ z0 = svrintn_x (p0, z0)) ++ ++/* ++** rintn_f64_x_untied: ++** frintn z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rintn_f64_x_untied, svfloat64_t, ++ z0 = svrintn_f64_x (p0, z1), ++ z0 = svrintn_x (p0, z1)) ++ ++/* ++** ptrue_rintn_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_rintn_f64_x_tied1, svfloat64_t, ++ z0 = svrintn_f64_x (svptrue_b64 (), z0), ++ z0 = svrintn_x (svptrue_b64 (), z0)) ++ ++/* ++** ptrue_rintn_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_rintn_f64_x_untied, svfloat64_t, ++ z0 = svrintn_f64_x (svptrue_b64 (), z1), ++ z0 = svrintn_x (svptrue_b64 (), z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintp_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintp_f16.c +new file mode 100644 +index 000000000..0e85c3448 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintp_f16.c +@@ -0,0 +1,103 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** rintp_f16_m_tied12: ++** frintp z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rintp_f16_m_tied12, svfloat16_t, ++ z0 = svrintp_f16_m (z0, p0, z0), ++ z0 = svrintp_m (z0, p0, z0)) ++ ++/* ++** rintp_f16_m_tied1: ++** frintp z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rintp_f16_m_tied1, svfloat16_t, ++ z0 = svrintp_f16_m (z0, p0, z1), ++ z0 = svrintp_m (z0, p0, z1)) ++ ++/* ++** rintp_f16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** frintp z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rintp_f16_m_tied2, svfloat16_t, ++ z0 = svrintp_f16_m (z1, p0, z0), ++ z0 = svrintp_m (z1, p0, z0)) ++ ++/* ++** rintp_f16_m_untied: ++** movprfx z0, z2 ++** frintp z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rintp_f16_m_untied, svfloat16_t, ++ z0 = svrintp_f16_m (z2, p0, z1), ++ z0 = svrintp_m (z2, p0, z1)) ++ ++/* ++** rintp_f16_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.h, p0/z, \1\.h ++** frintp z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rintp_f16_z_tied1, svfloat16_t, ++ z0 = svrintp_f16_z (p0, z0), ++ z0 = svrintp_z (p0, z0)) ++ ++/* ++** rintp_f16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** frintp z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rintp_f16_z_untied, svfloat16_t, ++ z0 = svrintp_f16_z (p0, z1), ++ z0 = svrintp_z (p0, z1)) ++ ++/* ++** rintp_f16_x_tied1: ++** frintp z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rintp_f16_x_tied1, svfloat16_t, ++ z0 = svrintp_f16_x (p0, z0), ++ z0 = svrintp_x (p0, z0)) ++ ++/* ++** rintp_f16_x_untied: ++** frintp z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rintp_f16_x_untied, svfloat16_t, ++ z0 = svrintp_f16_x (p0, z1), ++ z0 = svrintp_x (p0, z1)) ++ ++/* ++** ptrue_rintp_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_rintp_f16_x_tied1, svfloat16_t, ++ z0 = svrintp_f16_x (svptrue_b16 (), z0), ++ z0 = svrintp_x (svptrue_b16 (), z0)) ++ ++/* ++** ptrue_rintp_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_rintp_f16_x_untied, svfloat16_t, ++ z0 = svrintp_f16_x (svptrue_b16 (), z1), ++ z0 = svrintp_x (svptrue_b16 (), z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintp_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintp_f32.c +new file mode 100644 +index 000000000..cec360d7c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintp_f32.c +@@ -0,0 +1,103 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** rintp_f32_m_tied12: ++** frintp z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rintp_f32_m_tied12, svfloat32_t, ++ z0 = svrintp_f32_m (z0, p0, z0), ++ z0 = svrintp_m (z0, p0, z0)) ++ ++/* ++** rintp_f32_m_tied1: ++** frintp z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rintp_f32_m_tied1, svfloat32_t, ++ z0 = svrintp_f32_m (z0, p0, z1), ++ z0 = svrintp_m (z0, p0, z1)) ++ ++/* ++** rintp_f32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** frintp z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rintp_f32_m_tied2, svfloat32_t, ++ z0 = svrintp_f32_m (z1, p0, z0), ++ z0 = svrintp_m (z1, p0, z0)) ++ ++/* ++** rintp_f32_m_untied: ++** movprfx z0, z2 ++** frintp z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rintp_f32_m_untied, svfloat32_t, ++ z0 = svrintp_f32_m (z2, p0, z1), ++ z0 = svrintp_m (z2, p0, z1)) ++ ++/* ++** rintp_f32_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, \1\.s ++** frintp z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rintp_f32_z_tied1, svfloat32_t, ++ z0 = svrintp_f32_z (p0, z0), ++ z0 = svrintp_z (p0, z0)) ++ ++/* ++** rintp_f32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** frintp z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rintp_f32_z_untied, svfloat32_t, ++ z0 = svrintp_f32_z (p0, z1), ++ z0 = svrintp_z (p0, z1)) ++ ++/* ++** rintp_f32_x_tied1: ++** frintp z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rintp_f32_x_tied1, svfloat32_t, ++ z0 = svrintp_f32_x (p0, z0), ++ z0 = svrintp_x (p0, z0)) ++ ++/* ++** rintp_f32_x_untied: ++** frintp z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rintp_f32_x_untied, svfloat32_t, ++ z0 = svrintp_f32_x (p0, z1), ++ z0 = svrintp_x (p0, z1)) ++ ++/* ++** ptrue_rintp_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_rintp_f32_x_tied1, svfloat32_t, ++ z0 = svrintp_f32_x (svptrue_b32 (), z0), ++ z0 = svrintp_x (svptrue_b32 (), z0)) ++ ++/* ++** ptrue_rintp_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_rintp_f32_x_untied, svfloat32_t, ++ z0 = svrintp_f32_x (svptrue_b32 (), z1), ++ z0 = svrintp_x (svptrue_b32 (), z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintp_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintp_f64.c +new file mode 100644 +index 000000000..1305fb682 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintp_f64.c +@@ -0,0 +1,103 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** rintp_f64_m_tied12: ++** frintp z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rintp_f64_m_tied12, svfloat64_t, ++ z0 = svrintp_f64_m (z0, p0, z0), ++ z0 = svrintp_m (z0, p0, z0)) ++ ++/* ++** rintp_f64_m_tied1: ++** frintp z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rintp_f64_m_tied1, svfloat64_t, ++ z0 = svrintp_f64_m (z0, p0, z1), ++ z0 = svrintp_m (z0, p0, z1)) ++ ++/* ++** rintp_f64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** frintp z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (rintp_f64_m_tied2, svfloat64_t, ++ z0 = svrintp_f64_m (z1, p0, z0), ++ z0 = svrintp_m (z1, p0, z0)) ++ ++/* ++** rintp_f64_m_untied: ++** movprfx z0, z2 ++** frintp z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rintp_f64_m_untied, svfloat64_t, ++ z0 = svrintp_f64_m (z2, p0, z1), ++ z0 = svrintp_m (z2, p0, z1)) ++ ++/* ++** rintp_f64_z_tied1: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, \1 ++** frintp z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (rintp_f64_z_tied1, svfloat64_t, ++ z0 = svrintp_f64_z (p0, z0), ++ z0 = svrintp_z (p0, z0)) ++ ++/* ++** rintp_f64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** frintp z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rintp_f64_z_untied, svfloat64_t, ++ z0 = svrintp_f64_z (p0, z1), ++ z0 = svrintp_z (p0, z1)) ++ ++/* ++** rintp_f64_x_tied1: ++** frintp z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rintp_f64_x_tied1, svfloat64_t, ++ z0 = svrintp_f64_x (p0, z0), ++ z0 = svrintp_x (p0, z0)) ++ ++/* ++** rintp_f64_x_untied: ++** frintp z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rintp_f64_x_untied, svfloat64_t, ++ z0 = svrintp_f64_x (p0, z1), ++ z0 = svrintp_x (p0, z1)) ++ ++/* ++** ptrue_rintp_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_rintp_f64_x_tied1, svfloat64_t, ++ z0 = svrintp_f64_x (svptrue_b64 (), z0), ++ z0 = svrintp_x (svptrue_b64 (), z0)) ++ ++/* ++** ptrue_rintp_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_rintp_f64_x_untied, svfloat64_t, ++ z0 = svrintp_f64_x (svptrue_b64 (), z1), ++ z0 = svrintp_x (svptrue_b64 (), z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintx_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintx_f16.c +new file mode 100644 +index 000000000..96f7f2c72 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintx_f16.c +@@ -0,0 +1,103 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** rintx_f16_m_tied12: ++** frintx z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rintx_f16_m_tied12, svfloat16_t, ++ z0 = svrintx_f16_m (z0, p0, z0), ++ z0 = svrintx_m (z0, p0, z0)) ++ ++/* ++** rintx_f16_m_tied1: ++** frintx z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rintx_f16_m_tied1, svfloat16_t, ++ z0 = svrintx_f16_m (z0, p0, z1), ++ z0 = svrintx_m (z0, p0, z1)) ++ ++/* ++** rintx_f16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** frintx z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rintx_f16_m_tied2, svfloat16_t, ++ z0 = svrintx_f16_m (z1, p0, z0), ++ z0 = svrintx_m (z1, p0, z0)) ++ ++/* ++** rintx_f16_m_untied: ++** movprfx z0, z2 ++** frintx z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rintx_f16_m_untied, svfloat16_t, ++ z0 = svrintx_f16_m (z2, p0, z1), ++ z0 = svrintx_m (z2, p0, z1)) ++ ++/* ++** rintx_f16_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.h, p0/z, \1\.h ++** frintx z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rintx_f16_z_tied1, svfloat16_t, ++ z0 = svrintx_f16_z (p0, z0), ++ z0 = svrintx_z (p0, z0)) ++ ++/* ++** rintx_f16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** frintx z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rintx_f16_z_untied, svfloat16_t, ++ z0 = svrintx_f16_z (p0, z1), ++ z0 = svrintx_z (p0, z1)) ++ ++/* ++** rintx_f16_x_tied1: ++** frintx z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rintx_f16_x_tied1, svfloat16_t, ++ z0 = svrintx_f16_x (p0, z0), ++ z0 = svrintx_x (p0, z0)) ++ ++/* ++** rintx_f16_x_untied: ++** frintx z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rintx_f16_x_untied, svfloat16_t, ++ z0 = svrintx_f16_x (p0, z1), ++ z0 = svrintx_x (p0, z1)) ++ ++/* ++** ptrue_rintx_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_rintx_f16_x_tied1, svfloat16_t, ++ z0 = svrintx_f16_x (svptrue_b16 (), z0), ++ z0 = svrintx_x (svptrue_b16 (), z0)) ++ ++/* ++** ptrue_rintx_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_rintx_f16_x_untied, svfloat16_t, ++ z0 = svrintx_f16_x (svptrue_b16 (), z1), ++ z0 = svrintx_x (svptrue_b16 (), z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintx_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintx_f32.c +new file mode 100644 +index 000000000..1c42d2a94 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintx_f32.c +@@ -0,0 +1,103 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** rintx_f32_m_tied12: ++** frintx z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rintx_f32_m_tied12, svfloat32_t, ++ z0 = svrintx_f32_m (z0, p0, z0), ++ z0 = svrintx_m (z0, p0, z0)) ++ ++/* ++** rintx_f32_m_tied1: ++** frintx z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rintx_f32_m_tied1, svfloat32_t, ++ z0 = svrintx_f32_m (z0, p0, z1), ++ z0 = svrintx_m (z0, p0, z1)) ++ ++/* ++** rintx_f32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** frintx z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rintx_f32_m_tied2, svfloat32_t, ++ z0 = svrintx_f32_m (z1, p0, z0), ++ z0 = svrintx_m (z1, p0, z0)) ++ ++/* ++** rintx_f32_m_untied: ++** movprfx z0, z2 ++** frintx z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rintx_f32_m_untied, svfloat32_t, ++ z0 = svrintx_f32_m (z2, p0, z1), ++ z0 = svrintx_m (z2, p0, z1)) ++ ++/* ++** rintx_f32_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, \1\.s ++** frintx z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rintx_f32_z_tied1, svfloat32_t, ++ z0 = svrintx_f32_z (p0, z0), ++ z0 = svrintx_z (p0, z0)) ++ ++/* ++** rintx_f32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** frintx z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rintx_f32_z_untied, svfloat32_t, ++ z0 = svrintx_f32_z (p0, z1), ++ z0 = svrintx_z (p0, z1)) ++ ++/* ++** rintx_f32_x_tied1: ++** frintx z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rintx_f32_x_tied1, svfloat32_t, ++ z0 = svrintx_f32_x (p0, z0), ++ z0 = svrintx_x (p0, z0)) ++ ++/* ++** rintx_f32_x_untied: ++** frintx z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rintx_f32_x_untied, svfloat32_t, ++ z0 = svrintx_f32_x (p0, z1), ++ z0 = svrintx_x (p0, z1)) ++ ++/* ++** ptrue_rintx_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_rintx_f32_x_tied1, svfloat32_t, ++ z0 = svrintx_f32_x (svptrue_b32 (), z0), ++ z0 = svrintx_x (svptrue_b32 (), z0)) ++ ++/* ++** ptrue_rintx_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_rintx_f32_x_untied, svfloat32_t, ++ z0 = svrintx_f32_x (svptrue_b32 (), z1), ++ z0 = svrintx_x (svptrue_b32 (), z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintx_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintx_f64.c +new file mode 100644 +index 000000000..bee806b3b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintx_f64.c +@@ -0,0 +1,103 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** rintx_f64_m_tied12: ++** frintx z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rintx_f64_m_tied12, svfloat64_t, ++ z0 = svrintx_f64_m (z0, p0, z0), ++ z0 = svrintx_m (z0, p0, z0)) ++ ++/* ++** rintx_f64_m_tied1: ++** frintx z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rintx_f64_m_tied1, svfloat64_t, ++ z0 = svrintx_f64_m (z0, p0, z1), ++ z0 = svrintx_m (z0, p0, z1)) ++ ++/* ++** rintx_f64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** frintx z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (rintx_f64_m_tied2, svfloat64_t, ++ z0 = svrintx_f64_m (z1, p0, z0), ++ z0 = svrintx_m (z1, p0, z0)) ++ ++/* ++** rintx_f64_m_untied: ++** movprfx z0, z2 ++** frintx z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rintx_f64_m_untied, svfloat64_t, ++ z0 = svrintx_f64_m (z2, p0, z1), ++ z0 = svrintx_m (z2, p0, z1)) ++ ++/* ++** rintx_f64_z_tied1: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, \1 ++** frintx z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (rintx_f64_z_tied1, svfloat64_t, ++ z0 = svrintx_f64_z (p0, z0), ++ z0 = svrintx_z (p0, z0)) ++ ++/* ++** rintx_f64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** frintx z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rintx_f64_z_untied, svfloat64_t, ++ z0 = svrintx_f64_z (p0, z1), ++ z0 = svrintx_z (p0, z1)) ++ ++/* ++** rintx_f64_x_tied1: ++** frintx z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rintx_f64_x_tied1, svfloat64_t, ++ z0 = svrintx_f64_x (p0, z0), ++ z0 = svrintx_x (p0, z0)) ++ ++/* ++** rintx_f64_x_untied: ++** frintx z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rintx_f64_x_untied, svfloat64_t, ++ z0 = svrintx_f64_x (p0, z1), ++ z0 = svrintx_x (p0, z1)) ++ ++/* ++** ptrue_rintx_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_rintx_f64_x_tied1, svfloat64_t, ++ z0 = svrintx_f64_x (svptrue_b64 (), z0), ++ z0 = svrintx_x (svptrue_b64 (), z0)) ++ ++/* ++** ptrue_rintx_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_rintx_f64_x_untied, svfloat64_t, ++ z0 = svrintx_f64_x (svptrue_b64 (), z1), ++ z0 = svrintx_x (svptrue_b64 (), z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintz_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintz_f16.c +new file mode 100644 +index 000000000..be13d82b4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintz_f16.c +@@ -0,0 +1,103 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** rintz_f16_m_tied12: ++** frintz z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rintz_f16_m_tied12, svfloat16_t, ++ z0 = svrintz_f16_m (z0, p0, z0), ++ z0 = svrintz_m (z0, p0, z0)) ++ ++/* ++** rintz_f16_m_tied1: ++** frintz z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rintz_f16_m_tied1, svfloat16_t, ++ z0 = svrintz_f16_m (z0, p0, z1), ++ z0 = svrintz_m (z0, p0, z1)) ++ ++/* ++** rintz_f16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** frintz z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rintz_f16_m_tied2, svfloat16_t, ++ z0 = svrintz_f16_m (z1, p0, z0), ++ z0 = svrintz_m (z1, p0, z0)) ++ ++/* ++** rintz_f16_m_untied: ++** movprfx z0, z2 ++** frintz z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rintz_f16_m_untied, svfloat16_t, ++ z0 = svrintz_f16_m (z2, p0, z1), ++ z0 = svrintz_m (z2, p0, z1)) ++ ++/* ++** rintz_f16_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.h, p0/z, \1\.h ++** frintz z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rintz_f16_z_tied1, svfloat16_t, ++ z0 = svrintz_f16_z (p0, z0), ++ z0 = svrintz_z (p0, z0)) ++ ++/* ++** rintz_f16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** frintz z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rintz_f16_z_untied, svfloat16_t, ++ z0 = svrintz_f16_z (p0, z1), ++ z0 = svrintz_z (p0, z1)) ++ ++/* ++** rintz_f16_x_tied1: ++** frintz z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rintz_f16_x_tied1, svfloat16_t, ++ z0 = svrintz_f16_x (p0, z0), ++ z0 = svrintz_x (p0, z0)) ++ ++/* ++** rintz_f16_x_untied: ++** frintz z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rintz_f16_x_untied, svfloat16_t, ++ z0 = svrintz_f16_x (p0, z1), ++ z0 = svrintz_x (p0, z1)) ++ ++/* ++** ptrue_rintz_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_rintz_f16_x_tied1, svfloat16_t, ++ z0 = svrintz_f16_x (svptrue_b16 (), z0), ++ z0 = svrintz_x (svptrue_b16 (), z0)) ++ ++/* ++** ptrue_rintz_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_rintz_f16_x_untied, svfloat16_t, ++ z0 = svrintz_f16_x (svptrue_b16 (), z1), ++ z0 = svrintz_x (svptrue_b16 (), z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintz_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintz_f32.c +new file mode 100644 +index 000000000..873c0d468 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintz_f32.c +@@ -0,0 +1,103 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** rintz_f32_m_tied12: ++** frintz z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rintz_f32_m_tied12, svfloat32_t, ++ z0 = svrintz_f32_m (z0, p0, z0), ++ z0 = svrintz_m (z0, p0, z0)) ++ ++/* ++** rintz_f32_m_tied1: ++** frintz z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rintz_f32_m_tied1, svfloat32_t, ++ z0 = svrintz_f32_m (z0, p0, z1), ++ z0 = svrintz_m (z0, p0, z1)) ++ ++/* ++** rintz_f32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** frintz z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rintz_f32_m_tied2, svfloat32_t, ++ z0 = svrintz_f32_m (z1, p0, z0), ++ z0 = svrintz_m (z1, p0, z0)) ++ ++/* ++** rintz_f32_m_untied: ++** movprfx z0, z2 ++** frintz z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rintz_f32_m_untied, svfloat32_t, ++ z0 = svrintz_f32_m (z2, p0, z1), ++ z0 = svrintz_m (z2, p0, z1)) ++ ++/* ++** rintz_f32_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, \1\.s ++** frintz z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rintz_f32_z_tied1, svfloat32_t, ++ z0 = svrintz_f32_z (p0, z0), ++ z0 = svrintz_z (p0, z0)) ++ ++/* ++** rintz_f32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** frintz z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rintz_f32_z_untied, svfloat32_t, ++ z0 = svrintz_f32_z (p0, z1), ++ z0 = svrintz_z (p0, z1)) ++ ++/* ++** rintz_f32_x_tied1: ++** frintz z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rintz_f32_x_tied1, svfloat32_t, ++ z0 = svrintz_f32_x (p0, z0), ++ z0 = svrintz_x (p0, z0)) ++ ++/* ++** rintz_f32_x_untied: ++** frintz z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rintz_f32_x_untied, svfloat32_t, ++ z0 = svrintz_f32_x (p0, z1), ++ z0 = svrintz_x (p0, z1)) ++ ++/* ++** ptrue_rintz_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_rintz_f32_x_tied1, svfloat32_t, ++ z0 = svrintz_f32_x (svptrue_b32 (), z0), ++ z0 = svrintz_x (svptrue_b32 (), z0)) ++ ++/* ++** ptrue_rintz_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_rintz_f32_x_untied, svfloat32_t, ++ z0 = svrintz_f32_x (svptrue_b32 (), z1), ++ z0 = svrintz_x (svptrue_b32 (), z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintz_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintz_f64.c +new file mode 100644 +index 000000000..e6c9d1fc8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintz_f64.c +@@ -0,0 +1,103 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** rintz_f64_m_tied12: ++** frintz z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rintz_f64_m_tied12, svfloat64_t, ++ z0 = svrintz_f64_m (z0, p0, z0), ++ z0 = svrintz_m (z0, p0, z0)) ++ ++/* ++** rintz_f64_m_tied1: ++** frintz z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rintz_f64_m_tied1, svfloat64_t, ++ z0 = svrintz_f64_m (z0, p0, z1), ++ z0 = svrintz_m (z0, p0, z1)) ++ ++/* ++** rintz_f64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** frintz z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (rintz_f64_m_tied2, svfloat64_t, ++ z0 = svrintz_f64_m (z1, p0, z0), ++ z0 = svrintz_m (z1, p0, z0)) ++ ++/* ++** rintz_f64_m_untied: ++** movprfx z0, z2 ++** frintz z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rintz_f64_m_untied, svfloat64_t, ++ z0 = svrintz_f64_m (z2, p0, z1), ++ z0 = svrintz_m (z2, p0, z1)) ++ ++/* ++** rintz_f64_z_tied1: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, \1 ++** frintz z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (rintz_f64_z_tied1, svfloat64_t, ++ z0 = svrintz_f64_z (p0, z0), ++ z0 = svrintz_z (p0, z0)) ++ ++/* ++** rintz_f64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** frintz z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rintz_f64_z_untied, svfloat64_t, ++ z0 = svrintz_f64_z (p0, z1), ++ z0 = svrintz_z (p0, z1)) ++ ++/* ++** rintz_f64_x_tied1: ++** frintz z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rintz_f64_x_tied1, svfloat64_t, ++ z0 = svrintz_f64_x (p0, z0), ++ z0 = svrintz_x (p0, z0)) ++ ++/* ++** rintz_f64_x_untied: ++** frintz z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rintz_f64_x_untied, svfloat64_t, ++ z0 = svrintz_f64_x (p0, z1), ++ z0 = svrintz_x (p0, z1)) ++ ++/* ++** ptrue_rintz_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_rintz_f64_x_tied1, svfloat64_t, ++ z0 = svrintz_f64_x (svptrue_b64 (), z0), ++ z0 = svrintz_x (svptrue_b64 (), z0)) ++ ++/* ++** ptrue_rintz_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_rintz_f64_x_untied, svfloat64_t, ++ z0 = svrintz_f64_x (svptrue_b64 (), z1), ++ z0 = svrintz_x (svptrue_b64 (), z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rsqrte_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rsqrte_f16.c +new file mode 100644 +index 000000000..adfdc2b9c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rsqrte_f16.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** rsqrte_f16_tied1: ++** frsqrte z0\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rsqrte_f16_tied1, svfloat16_t, ++ z0 = svrsqrte_f16 (z0), ++ z0 = svrsqrte (z0)) ++ ++/* ++** rsqrte_f16_untied: ++** frsqrte z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rsqrte_f16_untied, svfloat16_t, ++ z0 = svrsqrte_f16 (z1), ++ z0 = svrsqrte (z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rsqrte_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rsqrte_f32.c +new file mode 100644 +index 000000000..fd938ebdf +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rsqrte_f32.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** rsqrte_f32_tied1: ++** frsqrte z0\.s, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rsqrte_f32_tied1, svfloat32_t, ++ z0 = svrsqrte_f32 (z0), ++ z0 = svrsqrte (z0)) ++ ++/* ++** rsqrte_f32_untied: ++** frsqrte z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rsqrte_f32_untied, svfloat32_t, ++ z0 = svrsqrte_f32 (z1), ++ z0 = svrsqrte (z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rsqrte_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rsqrte_f64.c +new file mode 100644 +index 000000000..3ac0f4053 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rsqrte_f64.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** rsqrte_f64_tied1: ++** frsqrte z0\.d, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rsqrte_f64_tied1, svfloat64_t, ++ z0 = svrsqrte_f64 (z0), ++ z0 = svrsqrte (z0)) ++ ++/* ++** rsqrte_f64_untied: ++** frsqrte z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rsqrte_f64_untied, svfloat64_t, ++ z0 = svrsqrte_f64 (z1), ++ z0 = svrsqrte (z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rsqrts_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rsqrts_f16.c +new file mode 100644 +index 000000000..2d88be3d6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rsqrts_f16.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** rsqrts_f16_tied1: ++** frsqrts z0\.h, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rsqrts_f16_tied1, svfloat16_t, ++ z0 = svrsqrts_f16 (z0, z1), ++ z0 = svrsqrts (z0, z1)) ++ ++/* ++** rsqrts_f16_tied2: ++** frsqrts z0\.h, z1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rsqrts_f16_tied2, svfloat16_t, ++ z0 = svrsqrts_f16 (z1, z0), ++ z0 = svrsqrts (z1, z0)) ++ ++/* ++** rsqrts_f16_untied: ++** frsqrts z0\.h, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (rsqrts_f16_untied, svfloat16_t, ++ z0 = svrsqrts_f16 (z1, z2), ++ z0 = svrsqrts (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rsqrts_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rsqrts_f32.c +new file mode 100644 +index 000000000..cd76aef4d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rsqrts_f32.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** rsqrts_f32_tied1: ++** frsqrts z0\.s, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rsqrts_f32_tied1, svfloat32_t, ++ z0 = svrsqrts_f32 (z0, z1), ++ z0 = svrsqrts (z0, z1)) ++ ++/* ++** rsqrts_f32_tied2: ++** frsqrts z0\.s, z1\.s, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rsqrts_f32_tied2, svfloat32_t, ++ z0 = svrsqrts_f32 (z1, z0), ++ z0 = svrsqrts (z1, z0)) ++ ++/* ++** rsqrts_f32_untied: ++** frsqrts z0\.s, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (rsqrts_f32_untied, svfloat32_t, ++ z0 = svrsqrts_f32 (z1, z2), ++ z0 = svrsqrts (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rsqrts_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rsqrts_f64.c +new file mode 100644 +index 000000000..e72a82fcb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rsqrts_f64.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** rsqrts_f64_tied1: ++** frsqrts z0\.d, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rsqrts_f64_tied1, svfloat64_t, ++ z0 = svrsqrts_f64 (z0, z1), ++ z0 = svrsqrts (z0, z1)) ++ ++/* ++** rsqrts_f64_tied2: ++** frsqrts z0\.d, z1\.d, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rsqrts_f64_tied2, svfloat64_t, ++ z0 = svrsqrts_f64 (z1, z0), ++ z0 = svrsqrts (z1, z0)) ++ ++/* ++** rsqrts_f64_untied: ++** frsqrts z0\.d, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (rsqrts_f64_untied, svfloat64_t, ++ z0 = svrsqrts_f64 (z1, z2), ++ z0 = svrsqrts (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/scale_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/scale_f16.c +new file mode 100644 +index 000000000..9c554255b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/scale_f16.c +@@ -0,0 +1,330 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** scale_f16_m_tied1: ++** fscale z0\.h, p0/m, z0\.h, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (scale_f16_m_tied1, svfloat16_t, svint16_t, ++ z0 = svscale_f16_m (p0, z0, z4), ++ z0 = svscale_m (p0, z0, z4)) ++ ++/* ++** scale_f16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** fscale z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (scale_f16_m_tied2, svfloat16_t, svint16_t, ++ z0_res = svscale_f16_m (p0, z4, z0), ++ z0_res = svscale_m (p0, z4, z0)) ++ ++/* ++** scale_f16_m_untied: ++** movprfx z0, z1 ++** fscale z0\.h, p0/m, z0\.h, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (scale_f16_m_untied, svfloat16_t, svint16_t, ++ z0 = svscale_f16_m (p0, z1, z4), ++ z0 = svscale_m (p0, z1, z4)) ++ ++/* ++** scale_w0_f16_m_tied1: ++** mov (z[0-9]+\.h), w0 ++** fscale z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (scale_w0_f16_m_tied1, svfloat16_t, int16_t, ++ z0 = svscale_n_f16_m (p0, z0, x0), ++ z0 = svscale_m (p0, z0, x0)) ++ ++/* ++** scale_w0_f16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), w0 ++** movprfx z0, z1 ++** fscale z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (scale_w0_f16_m_untied, svfloat16_t, int16_t, ++ z0 = svscale_n_f16_m (p0, z1, x0), ++ z0 = svscale_m (p0, z1, x0)) ++ ++/* ++** scale_3_f16_m_tied1: ++** mov (z[0-9]+\.h), #3 ++** fscale z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (scale_3_f16_m_tied1, svfloat16_t, ++ z0 = svscale_n_f16_m (p0, z0, 3), ++ z0 = svscale_m (p0, z0, 3)) ++ ++/* ++** scale_3_f16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), #3 ++** movprfx z0, z1 ++** fscale z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (scale_3_f16_m_untied, svfloat16_t, ++ z0 = svscale_n_f16_m (p0, z1, 3), ++ z0 = svscale_m (p0, z1, 3)) ++ ++/* ++** scale_m3_f16_m: ++** mov (z[0-9]+\.h), #-3 ++** fscale z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (scale_m3_f16_m, svfloat16_t, ++ z0 = svscale_n_f16_m (p0, z0, -3), ++ z0 = svscale_m (p0, z0, -3)) ++ ++/* ++** scale_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fscale z0\.h, p0/m, z0\.h, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (scale_f16_z_tied1, svfloat16_t, svint16_t, ++ z0 = svscale_f16_z (p0, z0, z4), ++ z0 = svscale_z (p0, z0, z4)) ++ ++/* ++** scale_f16_z_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.h, p0/z, z4\.h ++** fscale z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (scale_f16_z_tied2, svfloat16_t, svint16_t, ++ z0_res = svscale_f16_z (p0, z4, z0), ++ z0_res = svscale_z (p0, z4, z0)) ++ ++/* ++** scale_f16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** fscale z0\.h, p0/m, z0\.h, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (scale_f16_z_untied, svfloat16_t, svint16_t, ++ z0 = svscale_f16_z (p0, z1, z4), ++ z0 = svscale_z (p0, z1, z4)) ++ ++/* ++** scale_w0_f16_z_tied1: ++** mov (z[0-9]+\.h), w0 ++** movprfx z0\.h, p0/z, z0\.h ++** fscale z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (scale_w0_f16_z_tied1, svfloat16_t, int16_t, ++ z0 = svscale_n_f16_z (p0, z0, x0), ++ z0 = svscale_z (p0, z0, x0)) ++ ++/* ++** scale_w0_f16_z_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), w0 ++** movprfx z0\.h, p0/z, z1\.h ++** fscale z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (scale_w0_f16_z_untied, svfloat16_t, int16_t, ++ z0 = svscale_n_f16_z (p0, z1, x0), ++ z0 = svscale_z (p0, z1, x0)) ++ ++/* ++** scale_3_f16_z_tied1: ++** mov (z[0-9]+\.h), #3 ++** movprfx z0\.h, p0/z, z0\.h ++** fscale z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (scale_3_f16_z_tied1, svfloat16_t, ++ z0 = svscale_n_f16_z (p0, z0, 3), ++ z0 = svscale_z (p0, z0, 3)) ++ ++/* ++** scale_3_f16_z_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), #3 ++** movprfx z0\.h, p0/z, z1\.h ++** fscale z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (scale_3_f16_z_untied, svfloat16_t, ++ z0 = svscale_n_f16_z (p0, z1, 3), ++ z0 = svscale_z (p0, z1, 3)) ++ ++/* ++** scale_m3_f16_z: ++** mov (z[0-9]+\.h), #-3 ++** movprfx z0\.h, p0/z, z0\.h ++** fscale z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (scale_m3_f16_z, svfloat16_t, ++ z0 = svscale_n_f16_z (p0, z0, -3), ++ z0 = svscale_z (p0, z0, -3)) ++ ++/* ++** scale_f16_x_tied1: ++** fscale z0\.h, p0/m, z0\.h, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (scale_f16_x_tied1, svfloat16_t, svint16_t, ++ z0 = svscale_f16_x (p0, z0, z4), ++ z0 = svscale_x (p0, z0, z4)) ++ ++/* ++** scale_f16_x_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** fscale z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (scale_f16_x_tied2, svfloat16_t, svint16_t, ++ z0_res = svscale_f16_x (p0, z4, z0), ++ z0_res = svscale_x (p0, z4, z0)) ++ ++/* ++** scale_f16_x_untied: ++** movprfx z0, z1 ++** fscale z0\.h, p0/m, z0\.h, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (scale_f16_x_untied, svfloat16_t, svint16_t, ++ z0 = svscale_f16_x (p0, z1, z4), ++ z0 = svscale_x (p0, z1, z4)) ++ ++/* ++** scale_w0_f16_x_tied1: ++** mov (z[0-9]+\.h), w0 ++** fscale z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (scale_w0_f16_x_tied1, svfloat16_t, int16_t, ++ z0 = svscale_n_f16_x (p0, z0, x0), ++ z0 = svscale_x (p0, z0, x0)) ++ ++/* ++** scale_w0_f16_x_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), w0 ++** movprfx z0, z1 ++** fscale z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (scale_w0_f16_x_untied, svfloat16_t, int16_t, ++ z0 = svscale_n_f16_x (p0, z1, x0), ++ z0 = svscale_x (p0, z1, x0)) ++ ++/* ++** scale_3_f16_x_tied1: ++** mov (z[0-9]+\.h), #3 ++** fscale z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (scale_3_f16_x_tied1, svfloat16_t, ++ z0 = svscale_n_f16_x (p0, z0, 3), ++ z0 = svscale_x (p0, z0, 3)) ++ ++/* ++** scale_3_f16_x_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), #3 ++** movprfx z0, z1 ++** fscale z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (scale_3_f16_x_untied, svfloat16_t, ++ z0 = svscale_n_f16_x (p0, z1, 3), ++ z0 = svscale_x (p0, z1, 3)) ++ ++/* ++** scale_m3_f16_x: ++** mov (z[0-9]+\.h), #-3 ++** fscale z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (scale_m3_f16_x, svfloat16_t, ++ z0 = svscale_n_f16_x (p0, z0, -3), ++ z0 = svscale_x (p0, z0, -3)) ++ ++/* ++** ptrue_scale_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z (ptrue_scale_f16_x_tied1, svfloat16_t, svint16_t, ++ z0 = svscale_f16_x (svptrue_b16 (), z0, z4), ++ z0 = svscale_x (svptrue_b16 (), z0, z4)) ++ ++/* ++** ptrue_scale_f16_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z_REV (ptrue_scale_f16_x_tied2, svfloat16_t, svint16_t, ++ z0_res = svscale_f16_x (svptrue_b16 (), z4, z0), ++ z0_res = svscale_x (svptrue_b16 (), z4, z0)) ++ ++/* ++** ptrue_scale_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z (ptrue_scale_f16_x_untied, svfloat16_t, svint16_t, ++ z0 = svscale_f16_x (svptrue_b16 (), z1, z4), ++ z0 = svscale_x (svptrue_b16 (), z1, z4)) ++ ++/* ++** ptrue_scale_3_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_scale_3_f16_x_tied1, svfloat16_t, ++ z0 = svscale_n_f16_x (svptrue_b16 (), z0, 3), ++ z0 = svscale_x (svptrue_b16 (), z0, 3)) ++ ++/* ++** ptrue_scale_3_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_scale_3_f16_x_untied, svfloat16_t, ++ z0 = svscale_n_f16_x (svptrue_b16 (), z1, 3), ++ z0 = svscale_x (svptrue_b16 (), z1, 3)) ++ ++/* ++** ptrue_scale_m3_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_scale_m3_f16_x_tied1, svfloat16_t, ++ z0 = svscale_n_f16_x (svptrue_b16 (), z0, -3), ++ z0 = svscale_x (svptrue_b16 (), z0, -3)) ++ ++/* ++** ptrue_scale_m3_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_scale_m3_f16_x_untied, svfloat16_t, ++ z0 = svscale_n_f16_x (svptrue_b16 (), z1, -3), ++ z0 = svscale_x (svptrue_b16 (), z1, -3)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/scale_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/scale_f32.c +new file mode 100644 +index 000000000..747f8a639 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/scale_f32.c +@@ -0,0 +1,330 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** scale_f32_m_tied1: ++** fscale z0\.s, p0/m, z0\.s, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (scale_f32_m_tied1, svfloat32_t, svint32_t, ++ z0 = svscale_f32_m (p0, z0, z4), ++ z0 = svscale_m (p0, z0, z4)) ++ ++/* ++** scale_f32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** fscale z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (scale_f32_m_tied2, svfloat32_t, svint32_t, ++ z0_res = svscale_f32_m (p0, z4, z0), ++ z0_res = svscale_m (p0, z4, z0)) ++ ++/* ++** scale_f32_m_untied: ++** movprfx z0, z1 ++** fscale z0\.s, p0/m, z0\.s, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (scale_f32_m_untied, svfloat32_t, svint32_t, ++ z0 = svscale_f32_m (p0, z1, z4), ++ z0 = svscale_m (p0, z1, z4)) ++ ++/* ++** scale_w0_f32_m_tied1: ++** mov (z[0-9]+\.s), w0 ++** fscale z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (scale_w0_f32_m_tied1, svfloat32_t, int32_t, ++ z0 = svscale_n_f32_m (p0, z0, x0), ++ z0 = svscale_m (p0, z0, x0)) ++ ++/* ++** scale_w0_f32_m_untied: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0, z1 ++** fscale z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (scale_w0_f32_m_untied, svfloat32_t, int32_t, ++ z0 = svscale_n_f32_m (p0, z1, x0), ++ z0 = svscale_m (p0, z1, x0)) ++ ++/* ++** scale_3_f32_m_tied1: ++** mov (z[0-9]+\.s), #3 ++** fscale z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (scale_3_f32_m_tied1, svfloat32_t, ++ z0 = svscale_n_f32_m (p0, z0, 3), ++ z0 = svscale_m (p0, z0, 3)) ++ ++/* ++** scale_3_f32_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.s), #3 ++** movprfx z0, z1 ++** fscale z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (scale_3_f32_m_untied, svfloat32_t, ++ z0 = svscale_n_f32_m (p0, z1, 3), ++ z0 = svscale_m (p0, z1, 3)) ++ ++/* ++** scale_m3_f32_m: ++** mov (z[0-9]+\.s), #-3 ++** fscale z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (scale_m3_f32_m, svfloat32_t, ++ z0 = svscale_n_f32_m (p0, z0, -3), ++ z0 = svscale_m (p0, z0, -3)) ++ ++/* ++** scale_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fscale z0\.s, p0/m, z0\.s, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (scale_f32_z_tied1, svfloat32_t, svint32_t, ++ z0 = svscale_f32_z (p0, z0, z4), ++ z0 = svscale_z (p0, z0, z4)) ++ ++/* ++** scale_f32_z_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, z4\.s ++** fscale z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (scale_f32_z_tied2, svfloat32_t, svint32_t, ++ z0_res = svscale_f32_z (p0, z4, z0), ++ z0_res = svscale_z (p0, z4, z0)) ++ ++/* ++** scale_f32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** fscale z0\.s, p0/m, z0\.s, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (scale_f32_z_untied, svfloat32_t, svint32_t, ++ z0 = svscale_f32_z (p0, z1, z4), ++ z0 = svscale_z (p0, z1, z4)) ++ ++/* ++** scale_w0_f32_z_tied1: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0\.s, p0/z, z0\.s ++** fscale z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (scale_w0_f32_z_tied1, svfloat32_t, int32_t, ++ z0 = svscale_n_f32_z (p0, z0, x0), ++ z0 = svscale_z (p0, z0, x0)) ++ ++/* ++** scale_w0_f32_z_untied: { xfail *-*-* } ++** mov (z[0-9]+\.s), w0 ++** movprfx z0\.s, p0/z, z1\.s ++** fscale z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (scale_w0_f32_z_untied, svfloat32_t, int32_t, ++ z0 = svscale_n_f32_z (p0, z1, x0), ++ z0 = svscale_z (p0, z1, x0)) ++ ++/* ++** scale_3_f32_z_tied1: ++** mov (z[0-9]+\.s), #3 ++** movprfx z0\.s, p0/z, z0\.s ++** fscale z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (scale_3_f32_z_tied1, svfloat32_t, ++ z0 = svscale_n_f32_z (p0, z0, 3), ++ z0 = svscale_z (p0, z0, 3)) ++ ++/* ++** scale_3_f32_z_untied: { xfail *-*-* } ++** mov (z[0-9]+\.s), #3 ++** movprfx z0\.s, p0/z, z1\.s ++** fscale z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (scale_3_f32_z_untied, svfloat32_t, ++ z0 = svscale_n_f32_z (p0, z1, 3), ++ z0 = svscale_z (p0, z1, 3)) ++ ++/* ++** scale_m3_f32_z: ++** mov (z[0-9]+\.s), #-3 ++** movprfx z0\.s, p0/z, z0\.s ++** fscale z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (scale_m3_f32_z, svfloat32_t, ++ z0 = svscale_n_f32_z (p0, z0, -3), ++ z0 = svscale_z (p0, z0, -3)) ++ ++/* ++** scale_f32_x_tied1: ++** fscale z0\.s, p0/m, z0\.s, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (scale_f32_x_tied1, svfloat32_t, svint32_t, ++ z0 = svscale_f32_x (p0, z0, z4), ++ z0 = svscale_x (p0, z0, z4)) ++ ++/* ++** scale_f32_x_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** fscale z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (scale_f32_x_tied2, svfloat32_t, svint32_t, ++ z0_res = svscale_f32_x (p0, z4, z0), ++ z0_res = svscale_x (p0, z4, z0)) ++ ++/* ++** scale_f32_x_untied: ++** movprfx z0, z1 ++** fscale z0\.s, p0/m, z0\.s, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (scale_f32_x_untied, svfloat32_t, svint32_t, ++ z0 = svscale_f32_x (p0, z1, z4), ++ z0 = svscale_x (p0, z1, z4)) ++ ++/* ++** scale_w0_f32_x_tied1: ++** mov (z[0-9]+\.s), w0 ++** fscale z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (scale_w0_f32_x_tied1, svfloat32_t, int32_t, ++ z0 = svscale_n_f32_x (p0, z0, x0), ++ z0 = svscale_x (p0, z0, x0)) ++ ++/* ++** scale_w0_f32_x_untied: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0, z1 ++** fscale z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (scale_w0_f32_x_untied, svfloat32_t, int32_t, ++ z0 = svscale_n_f32_x (p0, z1, x0), ++ z0 = svscale_x (p0, z1, x0)) ++ ++/* ++** scale_3_f32_x_tied1: ++** mov (z[0-9]+\.s), #3 ++** fscale z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (scale_3_f32_x_tied1, svfloat32_t, ++ z0 = svscale_n_f32_x (p0, z0, 3), ++ z0 = svscale_x (p0, z0, 3)) ++ ++/* ++** scale_3_f32_x_untied: { xfail *-*-* } ++** mov (z[0-9]+\.s), #3 ++** movprfx z0, z1 ++** fscale z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (scale_3_f32_x_untied, svfloat32_t, ++ z0 = svscale_n_f32_x (p0, z1, 3), ++ z0 = svscale_x (p0, z1, 3)) ++ ++/* ++** scale_m3_f32_x: ++** mov (z[0-9]+\.s), #-3 ++** fscale z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (scale_m3_f32_x, svfloat32_t, ++ z0 = svscale_n_f32_x (p0, z0, -3), ++ z0 = svscale_x (p0, z0, -3)) ++ ++/* ++** ptrue_scale_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z (ptrue_scale_f32_x_tied1, svfloat32_t, svint32_t, ++ z0 = svscale_f32_x (svptrue_b32 (), z0, z4), ++ z0 = svscale_x (svptrue_b32 (), z0, z4)) ++ ++/* ++** ptrue_scale_f32_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z_REV (ptrue_scale_f32_x_tied2, svfloat32_t, svint32_t, ++ z0_res = svscale_f32_x (svptrue_b32 (), z4, z0), ++ z0_res = svscale_x (svptrue_b32 (), z4, z0)) ++ ++/* ++** ptrue_scale_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z (ptrue_scale_f32_x_untied, svfloat32_t, svint32_t, ++ z0 = svscale_f32_x (svptrue_b32 (), z1, z4), ++ z0 = svscale_x (svptrue_b32 (), z1, z4)) ++ ++/* ++** ptrue_scale_3_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_scale_3_f32_x_tied1, svfloat32_t, ++ z0 = svscale_n_f32_x (svptrue_b32 (), z0, 3), ++ z0 = svscale_x (svptrue_b32 (), z0, 3)) ++ ++/* ++** ptrue_scale_3_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_scale_3_f32_x_untied, svfloat32_t, ++ z0 = svscale_n_f32_x (svptrue_b32 (), z1, 3), ++ z0 = svscale_x (svptrue_b32 (), z1, 3)) ++ ++/* ++** ptrue_scale_m3_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_scale_m3_f32_x_tied1, svfloat32_t, ++ z0 = svscale_n_f32_x (svptrue_b32 (), z0, -3), ++ z0 = svscale_x (svptrue_b32 (), z0, -3)) ++ ++/* ++** ptrue_scale_m3_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_scale_m3_f32_x_untied, svfloat32_t, ++ z0 = svscale_n_f32_x (svptrue_b32 (), z1, -3), ++ z0 = svscale_x (svptrue_b32 (), z1, -3)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/scale_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/scale_f64.c +new file mode 100644 +index 000000000..004cbfa3e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/scale_f64.c +@@ -0,0 +1,330 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** scale_f64_m_tied1: ++** fscale z0\.d, p0/m, z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (scale_f64_m_tied1, svfloat64_t, svint64_t, ++ z0 = svscale_f64_m (p0, z0, z4), ++ z0 = svscale_m (p0, z0, z4)) ++ ++/* ++** scale_f64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z4 ++** fscale z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (scale_f64_m_tied2, svfloat64_t, svint64_t, ++ z0_res = svscale_f64_m (p0, z4, z0), ++ z0_res = svscale_m (p0, z4, z0)) ++ ++/* ++** scale_f64_m_untied: ++** movprfx z0, z1 ++** fscale z0\.d, p0/m, z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (scale_f64_m_untied, svfloat64_t, svint64_t, ++ z0 = svscale_f64_m (p0, z1, z4), ++ z0 = svscale_m (p0, z1, z4)) ++ ++/* ++** scale_x0_f64_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** fscale z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (scale_x0_f64_m_tied1, svfloat64_t, int64_t, ++ z0 = svscale_n_f64_m (p0, z0, x0), ++ z0 = svscale_m (p0, z0, x0)) ++ ++/* ++** scale_x0_f64_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** fscale z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (scale_x0_f64_m_untied, svfloat64_t, int64_t, ++ z0 = svscale_n_f64_m (p0, z1, x0), ++ z0 = svscale_m (p0, z1, x0)) ++ ++/* ++** scale_3_f64_m_tied1: ++** mov (z[0-9]+\.d), #3 ++** fscale z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (scale_3_f64_m_tied1, svfloat64_t, ++ z0 = svscale_n_f64_m (p0, z0, 3), ++ z0 = svscale_m (p0, z0, 3)) ++ ++/* ++** scale_3_f64_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), #3 ++** movprfx z0, z1 ++** fscale z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (scale_3_f64_m_untied, svfloat64_t, ++ z0 = svscale_n_f64_m (p0, z1, 3), ++ z0 = svscale_m (p0, z1, 3)) ++ ++/* ++** scale_m3_f64_m: ++** mov (z[0-9]+\.d), #-3 ++** fscale z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (scale_m3_f64_m, svfloat64_t, ++ z0 = svscale_n_f64_m (p0, z0, -3), ++ z0 = svscale_m (p0, z0, -3)) ++ ++/* ++** scale_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fscale z0\.d, p0/m, z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (scale_f64_z_tied1, svfloat64_t, svint64_t, ++ z0 = svscale_f64_z (p0, z0, z4), ++ z0 = svscale_z (p0, z0, z4)) ++ ++/* ++** scale_f64_z_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, z4\.d ++** fscale z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (scale_f64_z_tied2, svfloat64_t, svint64_t, ++ z0_res = svscale_f64_z (p0, z4, z0), ++ z0_res = svscale_z (p0, z4, z0)) ++ ++/* ++** scale_f64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** fscale z0\.d, p0/m, z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (scale_f64_z_untied, svfloat64_t, svint64_t, ++ z0 = svscale_f64_z (p0, z1, z4), ++ z0 = svscale_z (p0, z1, z4)) ++ ++/* ++** scale_x0_f64_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.d, p0/z, z0\.d ++** fscale z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (scale_x0_f64_z_tied1, svfloat64_t, int64_t, ++ z0 = svscale_n_f64_z (p0, z0, x0), ++ z0 = svscale_z (p0, z0, x0)) ++ ++/* ++** scale_x0_f64_z_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.d, p0/z, z1\.d ++** fscale z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (scale_x0_f64_z_untied, svfloat64_t, int64_t, ++ z0 = svscale_n_f64_z (p0, z1, x0), ++ z0 = svscale_z (p0, z1, x0)) ++ ++/* ++** scale_3_f64_z_tied1: ++** mov (z[0-9]+\.d), #3 ++** movprfx z0\.d, p0/z, z0\.d ++** fscale z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (scale_3_f64_z_tied1, svfloat64_t, ++ z0 = svscale_n_f64_z (p0, z0, 3), ++ z0 = svscale_z (p0, z0, 3)) ++ ++/* ++** scale_3_f64_z_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), #3 ++** movprfx z0\.d, p0/z, z1\.d ++** fscale z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (scale_3_f64_z_untied, svfloat64_t, ++ z0 = svscale_n_f64_z (p0, z1, 3), ++ z0 = svscale_z (p0, z1, 3)) ++ ++/* ++** scale_m3_f64_z: ++** mov (z[0-9]+\.d), #-3 ++** movprfx z0\.d, p0/z, z0\.d ++** fscale z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (scale_m3_f64_z, svfloat64_t, ++ z0 = svscale_n_f64_z (p0, z0, -3), ++ z0 = svscale_z (p0, z0, -3)) ++ ++/* ++** scale_f64_x_tied1: ++** fscale z0\.d, p0/m, z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (scale_f64_x_tied1, svfloat64_t, svint64_t, ++ z0 = svscale_f64_x (p0, z0, z4), ++ z0 = svscale_x (p0, z0, z4)) ++ ++/* ++** scale_f64_x_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z4 ++** fscale z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_DUAL_Z_REV (scale_f64_x_tied2, svfloat64_t, svint64_t, ++ z0_res = svscale_f64_x (p0, z4, z0), ++ z0_res = svscale_x (p0, z4, z0)) ++ ++/* ++** scale_f64_x_untied: ++** movprfx z0, z1 ++** fscale z0\.d, p0/m, z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (scale_f64_x_untied, svfloat64_t, svint64_t, ++ z0 = svscale_f64_x (p0, z1, z4), ++ z0 = svscale_x (p0, z1, z4)) ++ ++/* ++** scale_x0_f64_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** fscale z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (scale_x0_f64_x_tied1, svfloat64_t, int64_t, ++ z0 = svscale_n_f64_x (p0, z0, x0), ++ z0 = svscale_x (p0, z0, x0)) ++ ++/* ++** scale_x0_f64_x_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** fscale z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (scale_x0_f64_x_untied, svfloat64_t, int64_t, ++ z0 = svscale_n_f64_x (p0, z1, x0), ++ z0 = svscale_x (p0, z1, x0)) ++ ++/* ++** scale_3_f64_x_tied1: ++** mov (z[0-9]+\.d), #3 ++** fscale z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (scale_3_f64_x_tied1, svfloat64_t, ++ z0 = svscale_n_f64_x (p0, z0, 3), ++ z0 = svscale_x (p0, z0, 3)) ++ ++/* ++** scale_3_f64_x_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), #3 ++** movprfx z0, z1 ++** fscale z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (scale_3_f64_x_untied, svfloat64_t, ++ z0 = svscale_n_f64_x (p0, z1, 3), ++ z0 = svscale_x (p0, z1, 3)) ++ ++/* ++** scale_m3_f64_x: ++** mov (z[0-9]+\.d), #-3 ++** fscale z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (scale_m3_f64_x, svfloat64_t, ++ z0 = svscale_n_f64_x (p0, z0, -3), ++ z0 = svscale_x (p0, z0, -3)) ++ ++/* ++** ptrue_scale_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z (ptrue_scale_f64_x_tied1, svfloat64_t, svint64_t, ++ z0 = svscale_f64_x (svptrue_b64 (), z0, z4), ++ z0 = svscale_x (svptrue_b64 (), z0, z4)) ++ ++/* ++** ptrue_scale_f64_x_tied2: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z_REV (ptrue_scale_f64_x_tied2, svfloat64_t, svint64_t, ++ z0_res = svscale_f64_x (svptrue_b64 (), z4, z0), ++ z0_res = svscale_x (svptrue_b64 (), z4, z0)) ++ ++/* ++** ptrue_scale_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_DUAL_Z (ptrue_scale_f64_x_untied, svfloat64_t, svint64_t, ++ z0 = svscale_f64_x (svptrue_b64 (), z1, z4), ++ z0 = svscale_x (svptrue_b64 (), z1, z4)) ++ ++/* ++** ptrue_scale_3_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_scale_3_f64_x_tied1, svfloat64_t, ++ z0 = svscale_n_f64_x (svptrue_b64 (), z0, 3), ++ z0 = svscale_x (svptrue_b64 (), z0, 3)) ++ ++/* ++** ptrue_scale_3_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_scale_3_f64_x_untied, svfloat64_t, ++ z0 = svscale_n_f64_x (svptrue_b64 (), z1, 3), ++ z0 = svscale_x (svptrue_b64 (), z1, 3)) ++ ++/* ++** ptrue_scale_m3_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_scale_m3_f64_x_tied1, svfloat64_t, ++ z0 = svscale_n_f64_x (svptrue_b64 (), z0, -3), ++ z0 = svscale_x (svptrue_b64 (), z0, -3)) ++ ++/* ++** ptrue_scale_m3_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_scale_m3_f64_x_untied, svfloat64_t, ++ z0 = svscale_n_f64_x (svptrue_b64 (), z1, -3), ++ z0 = svscale_x (svptrue_b64 (), z1, -3)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_b.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_b.c +new file mode 100644 +index 000000000..a135e9c99 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_b.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** sel_b_tied1: ++** sel p0\.b, p3, p0\.b, p1\.b ++** ret ++*/ ++TEST_UNIFORM_P (sel_b_tied1, ++ p0 = svsel_b (p3, p0, p1), ++ p0 = svsel (p3, p0, p1)) ++ ++/* ++** sel_b_tied2: ++** sel p0\.b, p3, p1\.b, p0\.b ++** ret ++*/ ++TEST_UNIFORM_P (sel_b_tied2, ++ p0 = svsel_b (p3, p1, p0), ++ p0 = svsel (p3, p1, p0)) ++ ++/* ++** sel_b_untied: ++** sel p0\.b, p3, p1\.b, p2\.b ++** ret ++*/ ++TEST_UNIFORM_P (sel_b_untied, ++ p0 = svsel_b (p3, p1, p2), ++ p0 = svsel (p3, p1, p2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_bf16.c +new file mode 100644 +index 000000000..44636d8f8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_bf16.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** sel_bf16_tied1: ++** sel z0\.h, p0, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sel_bf16_tied1, svbfloat16_t, ++ z0 = svsel_bf16 (p0, z0, z1), ++ z0 = svsel (p0, z0, z1)) ++ ++/* ++** sel_bf16_tied2: ++** sel z0\.h, p0, z1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sel_bf16_tied2, svbfloat16_t, ++ z0 = svsel_bf16 (p0, z1, z0), ++ z0 = svsel (p0, z1, z0)) ++ ++/* ++** sel_bf16_untied: ++** sel z0\.h, p0, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sel_bf16_untied, svbfloat16_t, ++ z0 = svsel_bf16 (p0, z1, z2), ++ z0 = svsel (p0, z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_f16.c +new file mode 100644 +index 000000000..35750ea81 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_f16.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** sel_f16_tied1: ++** sel z0\.h, p0, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sel_f16_tied1, svfloat16_t, ++ z0 = svsel_f16 (p0, z0, z1), ++ z0 = svsel (p0, z0, z1)) ++ ++/* ++** sel_f16_tied2: ++** sel z0\.h, p0, z1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sel_f16_tied2, svfloat16_t, ++ z0 = svsel_f16 (p0, z1, z0), ++ z0 = svsel (p0, z1, z0)) ++ ++/* ++** sel_f16_untied: ++** sel z0\.h, p0, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sel_f16_untied, svfloat16_t, ++ z0 = svsel_f16 (p0, z1, z2), ++ z0 = svsel (p0, z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_f32.c +new file mode 100644 +index 000000000..639a84724 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_f32.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** sel_f32_tied1: ++** sel z0\.s, p0, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sel_f32_tied1, svfloat32_t, ++ z0 = svsel_f32 (p0, z0, z1), ++ z0 = svsel (p0, z0, z1)) ++ ++/* ++** sel_f32_tied2: ++** sel z0\.s, p0, z1\.s, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sel_f32_tied2, svfloat32_t, ++ z0 = svsel_f32 (p0, z1, z0), ++ z0 = svsel (p0, z1, z0)) ++ ++/* ++** sel_f32_untied: ++** sel z0\.s, p0, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sel_f32_untied, svfloat32_t, ++ z0 = svsel_f32 (p0, z1, z2), ++ z0 = svsel (p0, z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_f64.c +new file mode 100644 +index 000000000..048d6e52a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_f64.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** sel_f64_tied1: ++** sel z0\.d, p0, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (sel_f64_tied1, svfloat64_t, ++ z0 = svsel_f64 (p0, z0, z1), ++ z0 = svsel (p0, z0, z1)) ++ ++/* ++** sel_f64_tied2: ++** sel z0\.d, p0, z1\.d, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (sel_f64_tied2, svfloat64_t, ++ z0 = svsel_f64 (p0, z1, z0), ++ z0 = svsel (p0, z1, z0)) ++ ++/* ++** sel_f64_untied: ++** sel z0\.d, p0, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (sel_f64_untied, svfloat64_t, ++ z0 = svsel_f64 (p0, z1, z2), ++ z0 = svsel (p0, z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_s16.c +new file mode 100644 +index 000000000..e162da499 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_s16.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** sel_s16_tied1: ++** sel z0\.h, p0, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sel_s16_tied1, svint16_t, ++ z0 = svsel_s16 (p0, z0, z1), ++ z0 = svsel (p0, z0, z1)) ++ ++/* ++** sel_s16_tied2: ++** sel z0\.h, p0, z1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sel_s16_tied2, svint16_t, ++ z0 = svsel_s16 (p0, z1, z0), ++ z0 = svsel (p0, z1, z0)) ++ ++/* ++** sel_s16_untied: ++** sel z0\.h, p0, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sel_s16_untied, svint16_t, ++ z0 = svsel_s16 (p0, z1, z2), ++ z0 = svsel (p0, z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_s32.c +new file mode 100644 +index 000000000..80839d803 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_s32.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** sel_s32_tied1: ++** sel z0\.s, p0, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sel_s32_tied1, svint32_t, ++ z0 = svsel_s32 (p0, z0, z1), ++ z0 = svsel (p0, z0, z1)) ++ ++/* ++** sel_s32_tied2: ++** sel z0\.s, p0, z1\.s, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sel_s32_tied2, svint32_t, ++ z0 = svsel_s32 (p0, z1, z0), ++ z0 = svsel (p0, z1, z0)) ++ ++/* ++** sel_s32_untied: ++** sel z0\.s, p0, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sel_s32_untied, svint32_t, ++ z0 = svsel_s32 (p0, z1, z2), ++ z0 = svsel (p0, z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_s64.c +new file mode 100644 +index 000000000..85a77eafb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_s64.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** sel_s64_tied1: ++** sel z0\.d, p0, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (sel_s64_tied1, svint64_t, ++ z0 = svsel_s64 (p0, z0, z1), ++ z0 = svsel (p0, z0, z1)) ++ ++/* ++** sel_s64_tied2: ++** sel z0\.d, p0, z1\.d, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (sel_s64_tied2, svint64_t, ++ z0 = svsel_s64 (p0, z1, z0), ++ z0 = svsel (p0, z1, z0)) ++ ++/* ++** sel_s64_untied: ++** sel z0\.d, p0, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (sel_s64_untied, svint64_t, ++ z0 = svsel_s64 (p0, z1, z2), ++ z0 = svsel (p0, z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_s8.c +new file mode 100644 +index 000000000..28c43f627 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_s8.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** sel_s8_tied1: ++** sel z0\.b, p0, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (sel_s8_tied1, svint8_t, ++ z0 = svsel_s8 (p0, z0, z1), ++ z0 = svsel (p0, z0, z1)) ++ ++/* ++** sel_s8_tied2: ++** sel z0\.b, p0, z1\.b, z0\.b ++** ret ++*/ ++TEST_UNIFORM_Z (sel_s8_tied2, svint8_t, ++ z0 = svsel_s8 (p0, z1, z0), ++ z0 = svsel (p0, z1, z0)) ++ ++/* ++** sel_s8_untied: ++** sel z0\.b, p0, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (sel_s8_untied, svint8_t, ++ z0 = svsel_s8 (p0, z1, z2), ++ z0 = svsel (p0, z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_u16.c +new file mode 100644 +index 000000000..b85ede803 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_u16.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** sel_u16_tied1: ++** sel z0\.h, p0, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sel_u16_tied1, svuint16_t, ++ z0 = svsel_u16 (p0, z0, z1), ++ z0 = svsel (p0, z0, z1)) ++ ++/* ++** sel_u16_tied2: ++** sel z0\.h, p0, z1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sel_u16_tied2, svuint16_t, ++ z0 = svsel_u16 (p0, z1, z0), ++ z0 = svsel (p0, z1, z0)) ++ ++/* ++** sel_u16_untied: ++** sel z0\.h, p0, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sel_u16_untied, svuint16_t, ++ z0 = svsel_u16 (p0, z1, z2), ++ z0 = svsel (p0, z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_u32.c +new file mode 100644 +index 000000000..636cf8790 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_u32.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** sel_u32_tied1: ++** sel z0\.s, p0, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sel_u32_tied1, svuint32_t, ++ z0 = svsel_u32 (p0, z0, z1), ++ z0 = svsel (p0, z0, z1)) ++ ++/* ++** sel_u32_tied2: ++** sel z0\.s, p0, z1\.s, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sel_u32_tied2, svuint32_t, ++ z0 = svsel_u32 (p0, z1, z0), ++ z0 = svsel (p0, z1, z0)) ++ ++/* ++** sel_u32_untied: ++** sel z0\.s, p0, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sel_u32_untied, svuint32_t, ++ z0 = svsel_u32 (p0, z1, z2), ++ z0 = svsel (p0, z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_u64.c +new file mode 100644 +index 000000000..6325ca56f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_u64.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** sel_u64_tied1: ++** sel z0\.d, p0, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (sel_u64_tied1, svuint64_t, ++ z0 = svsel_u64 (p0, z0, z1), ++ z0 = svsel (p0, z0, z1)) ++ ++/* ++** sel_u64_tied2: ++** sel z0\.d, p0, z1\.d, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (sel_u64_tied2, svuint64_t, ++ z0 = svsel_u64 (p0, z1, z0), ++ z0 = svsel (p0, z1, z0)) ++ ++/* ++** sel_u64_untied: ++** sel z0\.d, p0, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (sel_u64_untied, svuint64_t, ++ z0 = svsel_u64 (p0, z1, z2), ++ z0 = svsel (p0, z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_u8.c +new file mode 100644 +index 000000000..5af53dccd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_u8.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** sel_u8_tied1: ++** sel z0\.b, p0, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (sel_u8_tied1, svuint8_t, ++ z0 = svsel_u8 (p0, z0, z1), ++ z0 = svsel (p0, z0, z1)) ++ ++/* ++** sel_u8_tied2: ++** sel z0\.b, p0, z1\.b, z0\.b ++** ret ++*/ ++TEST_UNIFORM_Z (sel_u8_tied2, svuint8_t, ++ z0 = svsel_u8 (p0, z1, z0), ++ z0 = svsel (p0, z1, z0)) ++ ++/* ++** sel_u8_untied: ++** sel z0\.b, p0, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (sel_u8_untied, svuint8_t, ++ z0 = svsel_u8 (p0, z1, z2), ++ z0 = svsel (p0, z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_bf16.c +new file mode 100644 +index 000000000..b160a2517 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_bf16.c +@@ -0,0 +1,41 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** set2_bf16_z24_0: ++** mov z25\.d, z5\.d ++** mov z24\.d, z0\.d ++** ret ++*/ ++TEST_SET (set2_bf16_z24_0, svbfloat16x2_t, svbfloat16_t, ++ z24 = svset2_bf16 (z4, 0, z0), ++ z24 = svset2 (z4, 0, z0)) ++ ++/* ++** set2_bf16_z24_1: ++** mov z24\.d, z4\.d ++** mov z25\.d, z0\.d ++** ret ++*/ ++TEST_SET (set2_bf16_z24_1, svbfloat16x2_t, svbfloat16_t, ++ z24 = svset2_bf16 (z4, 1, z0), ++ z24 = svset2 (z4, 1, z0)) ++ ++/* ++** set2_bf16_z4_0: ++** mov z4\.d, z0\.d ++** ret ++*/ ++TEST_SET (set2_bf16_z4_0, svbfloat16x2_t, svbfloat16_t, ++ z4 = svset2_bf16 (z4, 0, z0), ++ z4 = svset2 (z4, 0, z0)) ++ ++/* ++** set2_bf16_z4_1: ++** mov z5\.d, z0\.d ++** ret ++*/ ++TEST_SET (set2_bf16_z4_1, svbfloat16x2_t, svbfloat16_t, ++ z4 = svset2_bf16 (z4, 1, z0), ++ z4 = svset2 (z4, 1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_f16.c +new file mode 100644 +index 000000000..859600698 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_f16.c +@@ -0,0 +1,41 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** set2_f16_z24_0: ++** mov z25\.d, z5\.d ++** mov z24\.d, z0\.d ++** ret ++*/ ++TEST_SET (set2_f16_z24_0, svfloat16x2_t, svfloat16_t, ++ z24 = svset2_f16 (z4, 0, z0), ++ z24 = svset2 (z4, 0, z0)) ++ ++/* ++** set2_f16_z24_1: ++** mov z24\.d, z4\.d ++** mov z25\.d, z0\.d ++** ret ++*/ ++TEST_SET (set2_f16_z24_1, svfloat16x2_t, svfloat16_t, ++ z24 = svset2_f16 (z4, 1, z0), ++ z24 = svset2 (z4, 1, z0)) ++ ++/* ++** set2_f16_z4_0: ++** mov z4\.d, z0\.d ++** ret ++*/ ++TEST_SET (set2_f16_z4_0, svfloat16x2_t, svfloat16_t, ++ z4 = svset2_f16 (z4, 0, z0), ++ z4 = svset2 (z4, 0, z0)) ++ ++/* ++** set2_f16_z4_1: ++** mov z5\.d, z0\.d ++** ret ++*/ ++TEST_SET (set2_f16_z4_1, svfloat16x2_t, svfloat16_t, ++ z4 = svset2_f16 (z4, 1, z0), ++ z4 = svset2 (z4, 1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_f32.c +new file mode 100644 +index 000000000..a95ff2fc5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_f32.c +@@ -0,0 +1,41 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** set2_f32_z24_0: ++** mov z25\.d, z5\.d ++** mov z24\.d, z0\.d ++** ret ++*/ ++TEST_SET (set2_f32_z24_0, svfloat32x2_t, svfloat32_t, ++ z24 = svset2_f32 (z4, 0, z0), ++ z24 = svset2 (z4, 0, z0)) ++ ++/* ++** set2_f32_z24_1: ++** mov z24\.d, z4\.d ++** mov z25\.d, z0\.d ++** ret ++*/ ++TEST_SET (set2_f32_z24_1, svfloat32x2_t, svfloat32_t, ++ z24 = svset2_f32 (z4, 1, z0), ++ z24 = svset2 (z4, 1, z0)) ++ ++/* ++** set2_f32_z4_0: ++** mov z4\.d, z0\.d ++** ret ++*/ ++TEST_SET (set2_f32_z4_0, svfloat32x2_t, svfloat32_t, ++ z4 = svset2_f32 (z4, 0, z0), ++ z4 = svset2 (z4, 0, z0)) ++ ++/* ++** set2_f32_z4_1: ++** mov z5\.d, z0\.d ++** ret ++*/ ++TEST_SET (set2_f32_z4_1, svfloat32x2_t, svfloat32_t, ++ z4 = svset2_f32 (z4, 1, z0), ++ z4 = svset2 (z4, 1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_f64.c +new file mode 100644 +index 000000000..77837b7d8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_f64.c +@@ -0,0 +1,41 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** set2_f64_z24_0: ++** mov z25\.d, z5\.d ++** mov z24\.d, z0\.d ++** ret ++*/ ++TEST_SET (set2_f64_z24_0, svfloat64x2_t, svfloat64_t, ++ z24 = svset2_f64 (z4, 0, z0), ++ z24 = svset2 (z4, 0, z0)) ++ ++/* ++** set2_f64_z24_1: ++** mov z24\.d, z4\.d ++** mov z25\.d, z0\.d ++** ret ++*/ ++TEST_SET (set2_f64_z24_1, svfloat64x2_t, svfloat64_t, ++ z24 = svset2_f64 (z4, 1, z0), ++ z24 = svset2 (z4, 1, z0)) ++ ++/* ++** set2_f64_z4_0: ++** mov z4\.d, z0\.d ++** ret ++*/ ++TEST_SET (set2_f64_z4_0, svfloat64x2_t, svfloat64_t, ++ z4 = svset2_f64 (z4, 0, z0), ++ z4 = svset2 (z4, 0, z0)) ++ ++/* ++** set2_f64_z4_1: ++** mov z5\.d, z0\.d ++** ret ++*/ ++TEST_SET (set2_f64_z4_1, svfloat64x2_t, svfloat64_t, ++ z4 = svset2_f64 (z4, 1, z0), ++ z4 = svset2 (z4, 1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_s16.c +new file mode 100644 +index 000000000..aa2e70fd1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_s16.c +@@ -0,0 +1,41 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** set2_s16_z24_0: ++** mov z25\.d, z5\.d ++** mov z24\.d, z0\.d ++** ret ++*/ ++TEST_SET (set2_s16_z24_0, svint16x2_t, svint16_t, ++ z24 = svset2_s16 (z4, 0, z0), ++ z24 = svset2 (z4, 0, z0)) ++ ++/* ++** set2_s16_z24_1: ++** mov z24\.d, z4\.d ++** mov z25\.d, z0\.d ++** ret ++*/ ++TEST_SET (set2_s16_z24_1, svint16x2_t, svint16_t, ++ z24 = svset2_s16 (z4, 1, z0), ++ z24 = svset2 (z4, 1, z0)) ++ ++/* ++** set2_s16_z4_0: ++** mov z4\.d, z0\.d ++** ret ++*/ ++TEST_SET (set2_s16_z4_0, svint16x2_t, svint16_t, ++ z4 = svset2_s16 (z4, 0, z0), ++ z4 = svset2 (z4, 0, z0)) ++ ++/* ++** set2_s16_z4_1: ++** mov z5\.d, z0\.d ++** ret ++*/ ++TEST_SET (set2_s16_z4_1, svint16x2_t, svint16_t, ++ z4 = svset2_s16 (z4, 1, z0), ++ z4 = svset2 (z4, 1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_s32.c +new file mode 100644 +index 000000000..3a7c289aa +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_s32.c +@@ -0,0 +1,41 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** set2_s32_z24_0: ++** mov z25\.d, z5\.d ++** mov z24\.d, z0\.d ++** ret ++*/ ++TEST_SET (set2_s32_z24_0, svint32x2_t, svint32_t, ++ z24 = svset2_s32 (z4, 0, z0), ++ z24 = svset2 (z4, 0, z0)) ++ ++/* ++** set2_s32_z24_1: ++** mov z24\.d, z4\.d ++** mov z25\.d, z0\.d ++** ret ++*/ ++TEST_SET (set2_s32_z24_1, svint32x2_t, svint32_t, ++ z24 = svset2_s32 (z4, 1, z0), ++ z24 = svset2 (z4, 1, z0)) ++ ++/* ++** set2_s32_z4_0: ++** mov z4\.d, z0\.d ++** ret ++*/ ++TEST_SET (set2_s32_z4_0, svint32x2_t, svint32_t, ++ z4 = svset2_s32 (z4, 0, z0), ++ z4 = svset2 (z4, 0, z0)) ++ ++/* ++** set2_s32_z4_1: ++** mov z5\.d, z0\.d ++** ret ++*/ ++TEST_SET (set2_s32_z4_1, svint32x2_t, svint32_t, ++ z4 = svset2_s32 (z4, 1, z0), ++ z4 = svset2 (z4, 1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_s64.c +new file mode 100644 +index 000000000..ca6df54d9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_s64.c +@@ -0,0 +1,41 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** set2_s64_z24_0: ++** mov z25\.d, z5\.d ++** mov z24\.d, z0\.d ++** ret ++*/ ++TEST_SET (set2_s64_z24_0, svint64x2_t, svint64_t, ++ z24 = svset2_s64 (z4, 0, z0), ++ z24 = svset2 (z4, 0, z0)) ++ ++/* ++** set2_s64_z24_1: ++** mov z24\.d, z4\.d ++** mov z25\.d, z0\.d ++** ret ++*/ ++TEST_SET (set2_s64_z24_1, svint64x2_t, svint64_t, ++ z24 = svset2_s64 (z4, 1, z0), ++ z24 = svset2 (z4, 1, z0)) ++ ++/* ++** set2_s64_z4_0: ++** mov z4\.d, z0\.d ++** ret ++*/ ++TEST_SET (set2_s64_z4_0, svint64x2_t, svint64_t, ++ z4 = svset2_s64 (z4, 0, z0), ++ z4 = svset2 (z4, 0, z0)) ++ ++/* ++** set2_s64_z4_1: ++** mov z5\.d, z0\.d ++** ret ++*/ ++TEST_SET (set2_s64_z4_1, svint64x2_t, svint64_t, ++ z4 = svset2_s64 (z4, 1, z0), ++ z4 = svset2 (z4, 1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_s8.c +new file mode 100644 +index 000000000..e143128a4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_s8.c +@@ -0,0 +1,41 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** set2_s8_z24_0: ++** mov z25\.d, z5\.d ++** mov z24\.d, z0\.d ++** ret ++*/ ++TEST_SET (set2_s8_z24_0, svint8x2_t, svint8_t, ++ z24 = svset2_s8 (z4, 0, z0), ++ z24 = svset2 (z4, 0, z0)) ++ ++/* ++** set2_s8_z24_1: ++** mov z24\.d, z4\.d ++** mov z25\.d, z0\.d ++** ret ++*/ ++TEST_SET (set2_s8_z24_1, svint8x2_t, svint8_t, ++ z24 = svset2_s8 (z4, 1, z0), ++ z24 = svset2 (z4, 1, z0)) ++ ++/* ++** set2_s8_z4_0: ++** mov z4\.d, z0\.d ++** ret ++*/ ++TEST_SET (set2_s8_z4_0, svint8x2_t, svint8_t, ++ z4 = svset2_s8 (z4, 0, z0), ++ z4 = svset2 (z4, 0, z0)) ++ ++/* ++** set2_s8_z4_1: ++** mov z5\.d, z0\.d ++** ret ++*/ ++TEST_SET (set2_s8_z4_1, svint8x2_t, svint8_t, ++ z4 = svset2_s8 (z4, 1, z0), ++ z4 = svset2 (z4, 1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_u16.c +new file mode 100644 +index 000000000..53da08398 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_u16.c +@@ -0,0 +1,41 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** set2_u16_z24_0: ++** mov z25\.d, z5\.d ++** mov z24\.d, z0\.d ++** ret ++*/ ++TEST_SET (set2_u16_z24_0, svuint16x2_t, svuint16_t, ++ z24 = svset2_u16 (z4, 0, z0), ++ z24 = svset2 (z4, 0, z0)) ++ ++/* ++** set2_u16_z24_1: ++** mov z24\.d, z4\.d ++** mov z25\.d, z0\.d ++** ret ++*/ ++TEST_SET (set2_u16_z24_1, svuint16x2_t, svuint16_t, ++ z24 = svset2_u16 (z4, 1, z0), ++ z24 = svset2 (z4, 1, z0)) ++ ++/* ++** set2_u16_z4_0: ++** mov z4\.d, z0\.d ++** ret ++*/ ++TEST_SET (set2_u16_z4_0, svuint16x2_t, svuint16_t, ++ z4 = svset2_u16 (z4, 0, z0), ++ z4 = svset2 (z4, 0, z0)) ++ ++/* ++** set2_u16_z4_1: ++** mov z5\.d, z0\.d ++** ret ++*/ ++TEST_SET (set2_u16_z4_1, svuint16x2_t, svuint16_t, ++ z4 = svset2_u16 (z4, 1, z0), ++ z4 = svset2 (z4, 1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_u32.c +new file mode 100644 +index 000000000..5266a62d8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_u32.c +@@ -0,0 +1,41 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** set2_u32_z24_0: ++** mov z25\.d, z5\.d ++** mov z24\.d, z0\.d ++** ret ++*/ ++TEST_SET (set2_u32_z24_0, svuint32x2_t, svuint32_t, ++ z24 = svset2_u32 (z4, 0, z0), ++ z24 = svset2 (z4, 0, z0)) ++ ++/* ++** set2_u32_z24_1: ++** mov z24\.d, z4\.d ++** mov z25\.d, z0\.d ++** ret ++*/ ++TEST_SET (set2_u32_z24_1, svuint32x2_t, svuint32_t, ++ z24 = svset2_u32 (z4, 1, z0), ++ z24 = svset2 (z4, 1, z0)) ++ ++/* ++** set2_u32_z4_0: ++** mov z4\.d, z0\.d ++** ret ++*/ ++TEST_SET (set2_u32_z4_0, svuint32x2_t, svuint32_t, ++ z4 = svset2_u32 (z4, 0, z0), ++ z4 = svset2 (z4, 0, z0)) ++ ++/* ++** set2_u32_z4_1: ++** mov z5\.d, z0\.d ++** ret ++*/ ++TEST_SET (set2_u32_z4_1, svuint32x2_t, svuint32_t, ++ z4 = svset2_u32 (z4, 1, z0), ++ z4 = svset2 (z4, 1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_u64.c +new file mode 100644 +index 000000000..f7d2a1807 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_u64.c +@@ -0,0 +1,41 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** set2_u64_z24_0: ++** mov z25\.d, z5\.d ++** mov z24\.d, z0\.d ++** ret ++*/ ++TEST_SET (set2_u64_z24_0, svuint64x2_t, svuint64_t, ++ z24 = svset2_u64 (z4, 0, z0), ++ z24 = svset2 (z4, 0, z0)) ++ ++/* ++** set2_u64_z24_1: ++** mov z24\.d, z4\.d ++** mov z25\.d, z0\.d ++** ret ++*/ ++TEST_SET (set2_u64_z24_1, svuint64x2_t, svuint64_t, ++ z24 = svset2_u64 (z4, 1, z0), ++ z24 = svset2 (z4, 1, z0)) ++ ++/* ++** set2_u64_z4_0: ++** mov z4\.d, z0\.d ++** ret ++*/ ++TEST_SET (set2_u64_z4_0, svuint64x2_t, svuint64_t, ++ z4 = svset2_u64 (z4, 0, z0), ++ z4 = svset2 (z4, 0, z0)) ++ ++/* ++** set2_u64_z4_1: ++** mov z5\.d, z0\.d ++** ret ++*/ ++TEST_SET (set2_u64_z4_1, svuint64x2_t, svuint64_t, ++ z4 = svset2_u64 (z4, 1, z0), ++ z4 = svset2 (z4, 1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_u8.c +new file mode 100644 +index 000000000..9494a0e54 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_u8.c +@@ -0,0 +1,41 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** set2_u8_z24_0: ++** mov z25\.d, z5\.d ++** mov z24\.d, z0\.d ++** ret ++*/ ++TEST_SET (set2_u8_z24_0, svuint8x2_t, svuint8_t, ++ z24 = svset2_u8 (z4, 0, z0), ++ z24 = svset2 (z4, 0, z0)) ++ ++/* ++** set2_u8_z24_1: ++** mov z24\.d, z4\.d ++** mov z25\.d, z0\.d ++** ret ++*/ ++TEST_SET (set2_u8_z24_1, svuint8x2_t, svuint8_t, ++ z24 = svset2_u8 (z4, 1, z0), ++ z24 = svset2 (z4, 1, z0)) ++ ++/* ++** set2_u8_z4_0: ++** mov z4\.d, z0\.d ++** ret ++*/ ++TEST_SET (set2_u8_z4_0, svuint8x2_t, svuint8_t, ++ z4 = svset2_u8 (z4, 0, z0), ++ z4 = svset2 (z4, 0, z0)) ++ ++/* ++** set2_u8_z4_1: ++** mov z5\.d, z0\.d ++** ret ++*/ ++TEST_SET (set2_u8_z4_1, svuint8x2_t, svuint8_t, ++ z4 = svset2_u8 (z4, 1, z0), ++ z4 = svset2 (z4, 1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_bf16.c +new file mode 100644 +index 000000000..4e0707d09 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_bf16.c +@@ -0,0 +1,63 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** set3_bf16_z24_0: ++** mov z25\.d, z5\.d ++** mov z26\.d, z6\.d ++** mov z24\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_bf16_z24_0, svbfloat16x3_t, svbfloat16_t, ++ z24 = svset3_bf16 (z4, 0, z0), ++ z24 = svset3 (z4, 0, z0)) ++ ++/* ++** set3_bf16_z24_1: ++** mov z24\.d, z4\.d ++** mov z26\.d, z6\.d ++** mov z25\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_bf16_z24_1, svbfloat16x3_t, svbfloat16_t, ++ z24 = svset3_bf16 (z4, 1, z0), ++ z24 = svset3 (z4, 1, z0)) ++ ++/* ++** set3_bf16_z24_2: ++** mov z24\.d, z4\.d ++** mov z25\.d, z5\.d ++** mov z26\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_bf16_z24_2, svbfloat16x3_t, svbfloat16_t, ++ z24 = svset3_bf16 (z4, 2, z0), ++ z24 = svset3 (z4, 2, z0)) ++ ++/* ++** set3_bf16_z4_0: ++** mov z4\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_bf16_z4_0, svbfloat16x3_t, svbfloat16_t, ++ z4 = svset3_bf16 (z4, 0, z0), ++ z4 = svset3 (z4, 0, z0)) ++ ++/* ++** set3_bf16_z4_1: ++** mov z5\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_bf16_z4_1, svbfloat16x3_t, svbfloat16_t, ++ z4 = svset3_bf16 (z4, 1, z0), ++ z4 = svset3 (z4, 1, z0)) ++ ++/* ++** set3_bf16_z4_2: ++** mov z6\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_bf16_z4_2, svbfloat16x3_t, svbfloat16_t, ++ z4 = svset3_bf16 (z4, 2, z0), ++ z4 = svset3 (z4, 2, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_f16.c +new file mode 100644 +index 000000000..b6bb3a2bf +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_f16.c +@@ -0,0 +1,63 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** set3_f16_z24_0: ++** mov z25\.d, z5\.d ++** mov z26\.d, z6\.d ++** mov z24\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_f16_z24_0, svfloat16x3_t, svfloat16_t, ++ z24 = svset3_f16 (z4, 0, z0), ++ z24 = svset3 (z4, 0, z0)) ++ ++/* ++** set3_f16_z24_1: ++** mov z24\.d, z4\.d ++** mov z26\.d, z6\.d ++** mov z25\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_f16_z24_1, svfloat16x3_t, svfloat16_t, ++ z24 = svset3_f16 (z4, 1, z0), ++ z24 = svset3 (z4, 1, z0)) ++ ++/* ++** set3_f16_z24_2: ++** mov z24\.d, z4\.d ++** mov z25\.d, z5\.d ++** mov z26\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_f16_z24_2, svfloat16x3_t, svfloat16_t, ++ z24 = svset3_f16 (z4, 2, z0), ++ z24 = svset3 (z4, 2, z0)) ++ ++/* ++** set3_f16_z4_0: ++** mov z4\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_f16_z4_0, svfloat16x3_t, svfloat16_t, ++ z4 = svset3_f16 (z4, 0, z0), ++ z4 = svset3 (z4, 0, z0)) ++ ++/* ++** set3_f16_z4_1: ++** mov z5\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_f16_z4_1, svfloat16x3_t, svfloat16_t, ++ z4 = svset3_f16 (z4, 1, z0), ++ z4 = svset3 (z4, 1, z0)) ++ ++/* ++** set3_f16_z4_2: ++** mov z6\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_f16_z4_2, svfloat16x3_t, svfloat16_t, ++ z4 = svset3_f16 (z4, 2, z0), ++ z4 = svset3 (z4, 2, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_f32.c +new file mode 100644 +index 000000000..659bc713f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_f32.c +@@ -0,0 +1,63 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** set3_f32_z24_0: ++** mov z25\.d, z5\.d ++** mov z26\.d, z6\.d ++** mov z24\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_f32_z24_0, svfloat32x3_t, svfloat32_t, ++ z24 = svset3_f32 (z4, 0, z0), ++ z24 = svset3 (z4, 0, z0)) ++ ++/* ++** set3_f32_z24_1: ++** mov z24\.d, z4\.d ++** mov z26\.d, z6\.d ++** mov z25\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_f32_z24_1, svfloat32x3_t, svfloat32_t, ++ z24 = svset3_f32 (z4, 1, z0), ++ z24 = svset3 (z4, 1, z0)) ++ ++/* ++** set3_f32_z24_2: ++** mov z24\.d, z4\.d ++** mov z25\.d, z5\.d ++** mov z26\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_f32_z24_2, svfloat32x3_t, svfloat32_t, ++ z24 = svset3_f32 (z4, 2, z0), ++ z24 = svset3 (z4, 2, z0)) ++ ++/* ++** set3_f32_z4_0: ++** mov z4\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_f32_z4_0, svfloat32x3_t, svfloat32_t, ++ z4 = svset3_f32 (z4, 0, z0), ++ z4 = svset3 (z4, 0, z0)) ++ ++/* ++** set3_f32_z4_1: ++** mov z5\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_f32_z4_1, svfloat32x3_t, svfloat32_t, ++ z4 = svset3_f32 (z4, 1, z0), ++ z4 = svset3 (z4, 1, z0)) ++ ++/* ++** set3_f32_z4_2: ++** mov z6\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_f32_z4_2, svfloat32x3_t, svfloat32_t, ++ z4 = svset3_f32 (z4, 2, z0), ++ z4 = svset3 (z4, 2, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_f64.c +new file mode 100644 +index 000000000..2cf3b6015 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_f64.c +@@ -0,0 +1,63 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** set3_f64_z24_0: ++** mov z25\.d, z5\.d ++** mov z26\.d, z6\.d ++** mov z24\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_f64_z24_0, svfloat64x3_t, svfloat64_t, ++ z24 = svset3_f64 (z4, 0, z0), ++ z24 = svset3 (z4, 0, z0)) ++ ++/* ++** set3_f64_z24_1: ++** mov z24\.d, z4\.d ++** mov z26\.d, z6\.d ++** mov z25\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_f64_z24_1, svfloat64x3_t, svfloat64_t, ++ z24 = svset3_f64 (z4, 1, z0), ++ z24 = svset3 (z4, 1, z0)) ++ ++/* ++** set3_f64_z24_2: ++** mov z24\.d, z4\.d ++** mov z25\.d, z5\.d ++** mov z26\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_f64_z24_2, svfloat64x3_t, svfloat64_t, ++ z24 = svset3_f64 (z4, 2, z0), ++ z24 = svset3 (z4, 2, z0)) ++ ++/* ++** set3_f64_z4_0: ++** mov z4\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_f64_z4_0, svfloat64x3_t, svfloat64_t, ++ z4 = svset3_f64 (z4, 0, z0), ++ z4 = svset3 (z4, 0, z0)) ++ ++/* ++** set3_f64_z4_1: ++** mov z5\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_f64_z4_1, svfloat64x3_t, svfloat64_t, ++ z4 = svset3_f64 (z4, 1, z0), ++ z4 = svset3 (z4, 1, z0)) ++ ++/* ++** set3_f64_z4_2: ++** mov z6\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_f64_z4_2, svfloat64x3_t, svfloat64_t, ++ z4 = svset3_f64 (z4, 2, z0), ++ z4 = svset3 (z4, 2, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_s16.c +new file mode 100644 +index 000000000..907ae9894 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_s16.c +@@ -0,0 +1,63 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** set3_s16_z24_0: ++** mov z25\.d, z5\.d ++** mov z26\.d, z6\.d ++** mov z24\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_s16_z24_0, svint16x3_t, svint16_t, ++ z24 = svset3_s16 (z4, 0, z0), ++ z24 = svset3 (z4, 0, z0)) ++ ++/* ++** set3_s16_z24_1: ++** mov z24\.d, z4\.d ++** mov z26\.d, z6\.d ++** mov z25\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_s16_z24_1, svint16x3_t, svint16_t, ++ z24 = svset3_s16 (z4, 1, z0), ++ z24 = svset3 (z4, 1, z0)) ++ ++/* ++** set3_s16_z24_2: ++** mov z24\.d, z4\.d ++** mov z25\.d, z5\.d ++** mov z26\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_s16_z24_2, svint16x3_t, svint16_t, ++ z24 = svset3_s16 (z4, 2, z0), ++ z24 = svset3 (z4, 2, z0)) ++ ++/* ++** set3_s16_z4_0: ++** mov z4\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_s16_z4_0, svint16x3_t, svint16_t, ++ z4 = svset3_s16 (z4, 0, z0), ++ z4 = svset3 (z4, 0, z0)) ++ ++/* ++** set3_s16_z4_1: ++** mov z5\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_s16_z4_1, svint16x3_t, svint16_t, ++ z4 = svset3_s16 (z4, 1, z0), ++ z4 = svset3 (z4, 1, z0)) ++ ++/* ++** set3_s16_z4_2: ++** mov z6\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_s16_z4_2, svint16x3_t, svint16_t, ++ z4 = svset3_s16 (z4, 2, z0), ++ z4 = svset3 (z4, 2, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_s32.c +new file mode 100644 +index 000000000..0baa33c3a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_s32.c +@@ -0,0 +1,63 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** set3_s32_z24_0: ++** mov z25\.d, z5\.d ++** mov z26\.d, z6\.d ++** mov z24\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_s32_z24_0, svint32x3_t, svint32_t, ++ z24 = svset3_s32 (z4, 0, z0), ++ z24 = svset3 (z4, 0, z0)) ++ ++/* ++** set3_s32_z24_1: ++** mov z24\.d, z4\.d ++** mov z26\.d, z6\.d ++** mov z25\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_s32_z24_1, svint32x3_t, svint32_t, ++ z24 = svset3_s32 (z4, 1, z0), ++ z24 = svset3 (z4, 1, z0)) ++ ++/* ++** set3_s32_z24_2: ++** mov z24\.d, z4\.d ++** mov z25\.d, z5\.d ++** mov z26\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_s32_z24_2, svint32x3_t, svint32_t, ++ z24 = svset3_s32 (z4, 2, z0), ++ z24 = svset3 (z4, 2, z0)) ++ ++/* ++** set3_s32_z4_0: ++** mov z4\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_s32_z4_0, svint32x3_t, svint32_t, ++ z4 = svset3_s32 (z4, 0, z0), ++ z4 = svset3 (z4, 0, z0)) ++ ++/* ++** set3_s32_z4_1: ++** mov z5\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_s32_z4_1, svint32x3_t, svint32_t, ++ z4 = svset3_s32 (z4, 1, z0), ++ z4 = svset3 (z4, 1, z0)) ++ ++/* ++** set3_s32_z4_2: ++** mov z6\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_s32_z4_2, svint32x3_t, svint32_t, ++ z4 = svset3_s32 (z4, 2, z0), ++ z4 = svset3 (z4, 2, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_s64.c +new file mode 100644 +index 000000000..d1d142c71 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_s64.c +@@ -0,0 +1,63 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** set3_s64_z24_0: ++** mov z25\.d, z5\.d ++** mov z26\.d, z6\.d ++** mov z24\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_s64_z24_0, svint64x3_t, svint64_t, ++ z24 = svset3_s64 (z4, 0, z0), ++ z24 = svset3 (z4, 0, z0)) ++ ++/* ++** set3_s64_z24_1: ++** mov z24\.d, z4\.d ++** mov z26\.d, z6\.d ++** mov z25\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_s64_z24_1, svint64x3_t, svint64_t, ++ z24 = svset3_s64 (z4, 1, z0), ++ z24 = svset3 (z4, 1, z0)) ++ ++/* ++** set3_s64_z24_2: ++** mov z24\.d, z4\.d ++** mov z25\.d, z5\.d ++** mov z26\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_s64_z24_2, svint64x3_t, svint64_t, ++ z24 = svset3_s64 (z4, 2, z0), ++ z24 = svset3 (z4, 2, z0)) ++ ++/* ++** set3_s64_z4_0: ++** mov z4\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_s64_z4_0, svint64x3_t, svint64_t, ++ z4 = svset3_s64 (z4, 0, z0), ++ z4 = svset3 (z4, 0, z0)) ++ ++/* ++** set3_s64_z4_1: ++** mov z5\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_s64_z4_1, svint64x3_t, svint64_t, ++ z4 = svset3_s64 (z4, 1, z0), ++ z4 = svset3 (z4, 1, z0)) ++ ++/* ++** set3_s64_z4_2: ++** mov z6\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_s64_z4_2, svint64x3_t, svint64_t, ++ z4 = svset3_s64 (z4, 2, z0), ++ z4 = svset3 (z4, 2, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_s8.c +new file mode 100644 +index 000000000..8badf4b1d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_s8.c +@@ -0,0 +1,63 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** set3_s8_z24_0: ++** mov z25\.d, z5\.d ++** mov z26\.d, z6\.d ++** mov z24\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_s8_z24_0, svint8x3_t, svint8_t, ++ z24 = svset3_s8 (z4, 0, z0), ++ z24 = svset3 (z4, 0, z0)) ++ ++/* ++** set3_s8_z24_1: ++** mov z24\.d, z4\.d ++** mov z26\.d, z6\.d ++** mov z25\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_s8_z24_1, svint8x3_t, svint8_t, ++ z24 = svset3_s8 (z4, 1, z0), ++ z24 = svset3 (z4, 1, z0)) ++ ++/* ++** set3_s8_z24_2: ++** mov z24\.d, z4\.d ++** mov z25\.d, z5\.d ++** mov z26\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_s8_z24_2, svint8x3_t, svint8_t, ++ z24 = svset3_s8 (z4, 2, z0), ++ z24 = svset3 (z4, 2, z0)) ++ ++/* ++** set3_s8_z4_0: ++** mov z4\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_s8_z4_0, svint8x3_t, svint8_t, ++ z4 = svset3_s8 (z4, 0, z0), ++ z4 = svset3 (z4, 0, z0)) ++ ++/* ++** set3_s8_z4_1: ++** mov z5\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_s8_z4_1, svint8x3_t, svint8_t, ++ z4 = svset3_s8 (z4, 1, z0), ++ z4 = svset3 (z4, 1, z0)) ++ ++/* ++** set3_s8_z4_2: ++** mov z6\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_s8_z4_2, svint8x3_t, svint8_t, ++ z4 = svset3_s8 (z4, 2, z0), ++ z4 = svset3 (z4, 2, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_u16.c +new file mode 100644 +index 000000000..df7ce88d8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_u16.c +@@ -0,0 +1,63 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** set3_u16_z24_0: ++** mov z25\.d, z5\.d ++** mov z26\.d, z6\.d ++** mov z24\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_u16_z24_0, svuint16x3_t, svuint16_t, ++ z24 = svset3_u16 (z4, 0, z0), ++ z24 = svset3 (z4, 0, z0)) ++ ++/* ++** set3_u16_z24_1: ++** mov z24\.d, z4\.d ++** mov z26\.d, z6\.d ++** mov z25\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_u16_z24_1, svuint16x3_t, svuint16_t, ++ z24 = svset3_u16 (z4, 1, z0), ++ z24 = svset3 (z4, 1, z0)) ++ ++/* ++** set3_u16_z24_2: ++** mov z24\.d, z4\.d ++** mov z25\.d, z5\.d ++** mov z26\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_u16_z24_2, svuint16x3_t, svuint16_t, ++ z24 = svset3_u16 (z4, 2, z0), ++ z24 = svset3 (z4, 2, z0)) ++ ++/* ++** set3_u16_z4_0: ++** mov z4\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_u16_z4_0, svuint16x3_t, svuint16_t, ++ z4 = svset3_u16 (z4, 0, z0), ++ z4 = svset3 (z4, 0, z0)) ++ ++/* ++** set3_u16_z4_1: ++** mov z5\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_u16_z4_1, svuint16x3_t, svuint16_t, ++ z4 = svset3_u16 (z4, 1, z0), ++ z4 = svset3 (z4, 1, z0)) ++ ++/* ++** set3_u16_z4_2: ++** mov z6\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_u16_z4_2, svuint16x3_t, svuint16_t, ++ z4 = svset3_u16 (z4, 2, z0), ++ z4 = svset3 (z4, 2, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_u32.c +new file mode 100644 +index 000000000..703a68f5c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_u32.c +@@ -0,0 +1,63 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** set3_u32_z24_0: ++** mov z25\.d, z5\.d ++** mov z26\.d, z6\.d ++** mov z24\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_u32_z24_0, svuint32x3_t, svuint32_t, ++ z24 = svset3_u32 (z4, 0, z0), ++ z24 = svset3 (z4, 0, z0)) ++ ++/* ++** set3_u32_z24_1: ++** mov z24\.d, z4\.d ++** mov z26\.d, z6\.d ++** mov z25\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_u32_z24_1, svuint32x3_t, svuint32_t, ++ z24 = svset3_u32 (z4, 1, z0), ++ z24 = svset3 (z4, 1, z0)) ++ ++/* ++** set3_u32_z24_2: ++** mov z24\.d, z4\.d ++** mov z25\.d, z5\.d ++** mov z26\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_u32_z24_2, svuint32x3_t, svuint32_t, ++ z24 = svset3_u32 (z4, 2, z0), ++ z24 = svset3 (z4, 2, z0)) ++ ++/* ++** set3_u32_z4_0: ++** mov z4\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_u32_z4_0, svuint32x3_t, svuint32_t, ++ z4 = svset3_u32 (z4, 0, z0), ++ z4 = svset3 (z4, 0, z0)) ++ ++/* ++** set3_u32_z4_1: ++** mov z5\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_u32_z4_1, svuint32x3_t, svuint32_t, ++ z4 = svset3_u32 (z4, 1, z0), ++ z4 = svset3 (z4, 1, z0)) ++ ++/* ++** set3_u32_z4_2: ++** mov z6\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_u32_z4_2, svuint32x3_t, svuint32_t, ++ z4 = svset3_u32 (z4, 2, z0), ++ z4 = svset3 (z4, 2, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_u64.c +new file mode 100644 +index 000000000..bff5b3539 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_u64.c +@@ -0,0 +1,63 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** set3_u64_z24_0: ++** mov z25\.d, z5\.d ++** mov z26\.d, z6\.d ++** mov z24\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_u64_z24_0, svuint64x3_t, svuint64_t, ++ z24 = svset3_u64 (z4, 0, z0), ++ z24 = svset3 (z4, 0, z0)) ++ ++/* ++** set3_u64_z24_1: ++** mov z24\.d, z4\.d ++** mov z26\.d, z6\.d ++** mov z25\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_u64_z24_1, svuint64x3_t, svuint64_t, ++ z24 = svset3_u64 (z4, 1, z0), ++ z24 = svset3 (z4, 1, z0)) ++ ++/* ++** set3_u64_z24_2: ++** mov z24\.d, z4\.d ++** mov z25\.d, z5\.d ++** mov z26\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_u64_z24_2, svuint64x3_t, svuint64_t, ++ z24 = svset3_u64 (z4, 2, z0), ++ z24 = svset3 (z4, 2, z0)) ++ ++/* ++** set3_u64_z4_0: ++** mov z4\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_u64_z4_0, svuint64x3_t, svuint64_t, ++ z4 = svset3_u64 (z4, 0, z0), ++ z4 = svset3 (z4, 0, z0)) ++ ++/* ++** set3_u64_z4_1: ++** mov z5\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_u64_z4_1, svuint64x3_t, svuint64_t, ++ z4 = svset3_u64 (z4, 1, z0), ++ z4 = svset3 (z4, 1, z0)) ++ ++/* ++** set3_u64_z4_2: ++** mov z6\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_u64_z4_2, svuint64x3_t, svuint64_t, ++ z4 = svset3_u64 (z4, 2, z0), ++ z4 = svset3 (z4, 2, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_u8.c +new file mode 100644 +index 000000000..9f40001c4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_u8.c +@@ -0,0 +1,63 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** set3_u8_z24_0: ++** mov z25\.d, z5\.d ++** mov z26\.d, z6\.d ++** mov z24\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_u8_z24_0, svuint8x3_t, svuint8_t, ++ z24 = svset3_u8 (z4, 0, z0), ++ z24 = svset3 (z4, 0, z0)) ++ ++/* ++** set3_u8_z24_1: ++** mov z24\.d, z4\.d ++** mov z26\.d, z6\.d ++** mov z25\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_u8_z24_1, svuint8x3_t, svuint8_t, ++ z24 = svset3_u8 (z4, 1, z0), ++ z24 = svset3 (z4, 1, z0)) ++ ++/* ++** set3_u8_z24_2: ++** mov z24\.d, z4\.d ++** mov z25\.d, z5\.d ++** mov z26\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_u8_z24_2, svuint8x3_t, svuint8_t, ++ z24 = svset3_u8 (z4, 2, z0), ++ z24 = svset3 (z4, 2, z0)) ++ ++/* ++** set3_u8_z4_0: ++** mov z4\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_u8_z4_0, svuint8x3_t, svuint8_t, ++ z4 = svset3_u8 (z4, 0, z0), ++ z4 = svset3 (z4, 0, z0)) ++ ++/* ++** set3_u8_z4_1: ++** mov z5\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_u8_z4_1, svuint8x3_t, svuint8_t, ++ z4 = svset3_u8 (z4, 1, z0), ++ z4 = svset3 (z4, 1, z0)) ++ ++/* ++** set3_u8_z4_2: ++** mov z6\.d, z0\.d ++** ret ++*/ ++TEST_SET (set3_u8_z4_2, svuint8x3_t, svuint8_t, ++ z4 = svset3_u8 (z4, 2, z0), ++ z4 = svset3 (z4, 2, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_bf16.c +new file mode 100644 +index 000000000..4e26c1117 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_bf16.c +@@ -0,0 +1,87 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** set4_bf16_z24_0: ++** mov z25\.d, z5\.d ++** mov z26\.d, z6\.d ++** mov z27\.d, z7\.d ++** mov z24\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_bf16_z24_0, svbfloat16x4_t, svbfloat16_t, ++ z24 = svset4_bf16 (z4, 0, z0), ++ z24 = svset4 (z4, 0, z0)) ++ ++/* ++** set4_bf16_z24_1: ++** mov z24\.d, z4\.d ++** mov z26\.d, z6\.d ++** mov z27\.d, z7\.d ++** mov z25\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_bf16_z24_1, svbfloat16x4_t, svbfloat16_t, ++ z24 = svset4_bf16 (z4, 1, z0), ++ z24 = svset4 (z4, 1, z0)) ++ ++/* ++** set4_bf16_z24_2: ++** mov z24\.d, z4\.d ++** mov z25\.d, z5\.d ++** mov z27\.d, z7\.d ++** mov z26\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_bf16_z24_2, svbfloat16x4_t, svbfloat16_t, ++ z24 = svset4_bf16 (z4, 2, z0), ++ z24 = svset4 (z4, 2, z0)) ++ ++/* ++** set4_bf16_z24_3: ++** mov z24\.d, z4\.d ++** mov z25\.d, z5\.d ++** mov z26\.d, z6\.d ++** mov z27\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_bf16_z24_3, svbfloat16x4_t, svbfloat16_t, ++ z24 = svset4_bf16 (z4, 3, z0), ++ z24 = svset4 (z4, 3, z0)) ++ ++/* ++** set4_bf16_z4_0: ++** mov z4\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_bf16_z4_0, svbfloat16x4_t, svbfloat16_t, ++ z4 = svset4_bf16 (z4, 0, z0), ++ z4 = svset4 (z4, 0, z0)) ++ ++/* ++** set4_bf16_z4_1: ++** mov z5\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_bf16_z4_1, svbfloat16x4_t, svbfloat16_t, ++ z4 = svset4_bf16 (z4, 1, z0), ++ z4 = svset4 (z4, 1, z0)) ++ ++/* ++** set4_bf16_z4_2: ++** mov z6\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_bf16_z4_2, svbfloat16x4_t, svbfloat16_t, ++ z4 = svset4_bf16 (z4, 2, z0), ++ z4 = svset4 (z4, 2, z0)) ++ ++/* ++** set4_bf16_z4_3: ++** mov z7\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_bf16_z4_3, svbfloat16x4_t, svbfloat16_t, ++ z4 = svset4_bf16 (z4, 3, z0), ++ z4 = svset4 (z4, 3, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_f16.c +new file mode 100644 +index 000000000..a28ff9ca6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_f16.c +@@ -0,0 +1,87 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** set4_f16_z24_0: ++** mov z25\.d, z5\.d ++** mov z26\.d, z6\.d ++** mov z27\.d, z7\.d ++** mov z24\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_f16_z24_0, svfloat16x4_t, svfloat16_t, ++ z24 = svset4_f16 (z4, 0, z0), ++ z24 = svset4 (z4, 0, z0)) ++ ++/* ++** set4_f16_z24_1: ++** mov z24\.d, z4\.d ++** mov z26\.d, z6\.d ++** mov z27\.d, z7\.d ++** mov z25\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_f16_z24_1, svfloat16x4_t, svfloat16_t, ++ z24 = svset4_f16 (z4, 1, z0), ++ z24 = svset4 (z4, 1, z0)) ++ ++/* ++** set4_f16_z24_2: ++** mov z24\.d, z4\.d ++** mov z25\.d, z5\.d ++** mov z27\.d, z7\.d ++** mov z26\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_f16_z24_2, svfloat16x4_t, svfloat16_t, ++ z24 = svset4_f16 (z4, 2, z0), ++ z24 = svset4 (z4, 2, z0)) ++ ++/* ++** set4_f16_z24_3: ++** mov z24\.d, z4\.d ++** mov z25\.d, z5\.d ++** mov z26\.d, z6\.d ++** mov z27\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_f16_z24_3, svfloat16x4_t, svfloat16_t, ++ z24 = svset4_f16 (z4, 3, z0), ++ z24 = svset4 (z4, 3, z0)) ++ ++/* ++** set4_f16_z4_0: ++** mov z4\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_f16_z4_0, svfloat16x4_t, svfloat16_t, ++ z4 = svset4_f16 (z4, 0, z0), ++ z4 = svset4 (z4, 0, z0)) ++ ++/* ++** set4_f16_z4_1: ++** mov z5\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_f16_z4_1, svfloat16x4_t, svfloat16_t, ++ z4 = svset4_f16 (z4, 1, z0), ++ z4 = svset4 (z4, 1, z0)) ++ ++/* ++** set4_f16_z4_2: ++** mov z6\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_f16_z4_2, svfloat16x4_t, svfloat16_t, ++ z4 = svset4_f16 (z4, 2, z0), ++ z4 = svset4 (z4, 2, z0)) ++ ++/* ++** set4_f16_z4_3: ++** mov z7\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_f16_z4_3, svfloat16x4_t, svfloat16_t, ++ z4 = svset4_f16 (z4, 3, z0), ++ z4 = svset4 (z4, 3, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_f32.c +new file mode 100644 +index 000000000..e6e3f5ebd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_f32.c +@@ -0,0 +1,87 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** set4_f32_z24_0: ++** mov z25\.d, z5\.d ++** mov z26\.d, z6\.d ++** mov z27\.d, z7\.d ++** mov z24\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_f32_z24_0, svfloat32x4_t, svfloat32_t, ++ z24 = svset4_f32 (z4, 0, z0), ++ z24 = svset4 (z4, 0, z0)) ++ ++/* ++** set4_f32_z24_1: ++** mov z24\.d, z4\.d ++** mov z26\.d, z6\.d ++** mov z27\.d, z7\.d ++** mov z25\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_f32_z24_1, svfloat32x4_t, svfloat32_t, ++ z24 = svset4_f32 (z4, 1, z0), ++ z24 = svset4 (z4, 1, z0)) ++ ++/* ++** set4_f32_z24_2: ++** mov z24\.d, z4\.d ++** mov z25\.d, z5\.d ++** mov z27\.d, z7\.d ++** mov z26\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_f32_z24_2, svfloat32x4_t, svfloat32_t, ++ z24 = svset4_f32 (z4, 2, z0), ++ z24 = svset4 (z4, 2, z0)) ++ ++/* ++** set4_f32_z24_3: ++** mov z24\.d, z4\.d ++** mov z25\.d, z5\.d ++** mov z26\.d, z6\.d ++** mov z27\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_f32_z24_3, svfloat32x4_t, svfloat32_t, ++ z24 = svset4_f32 (z4, 3, z0), ++ z24 = svset4 (z4, 3, z0)) ++ ++/* ++** set4_f32_z4_0: ++** mov z4\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_f32_z4_0, svfloat32x4_t, svfloat32_t, ++ z4 = svset4_f32 (z4, 0, z0), ++ z4 = svset4 (z4, 0, z0)) ++ ++/* ++** set4_f32_z4_1: ++** mov z5\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_f32_z4_1, svfloat32x4_t, svfloat32_t, ++ z4 = svset4_f32 (z4, 1, z0), ++ z4 = svset4 (z4, 1, z0)) ++ ++/* ++** set4_f32_z4_2: ++** mov z6\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_f32_z4_2, svfloat32x4_t, svfloat32_t, ++ z4 = svset4_f32 (z4, 2, z0), ++ z4 = svset4 (z4, 2, z0)) ++ ++/* ++** set4_f32_z4_3: ++** mov z7\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_f32_z4_3, svfloat32x4_t, svfloat32_t, ++ z4 = svset4_f32 (z4, 3, z0), ++ z4 = svset4 (z4, 3, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_f64.c +new file mode 100644 +index 000000000..3ceaa459a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_f64.c +@@ -0,0 +1,87 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** set4_f64_z24_0: ++** mov z25\.d, z5\.d ++** mov z26\.d, z6\.d ++** mov z27\.d, z7\.d ++** mov z24\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_f64_z24_0, svfloat64x4_t, svfloat64_t, ++ z24 = svset4_f64 (z4, 0, z0), ++ z24 = svset4 (z4, 0, z0)) ++ ++/* ++** set4_f64_z24_1: ++** mov z24\.d, z4\.d ++** mov z26\.d, z6\.d ++** mov z27\.d, z7\.d ++** mov z25\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_f64_z24_1, svfloat64x4_t, svfloat64_t, ++ z24 = svset4_f64 (z4, 1, z0), ++ z24 = svset4 (z4, 1, z0)) ++ ++/* ++** set4_f64_z24_2: ++** mov z24\.d, z4\.d ++** mov z25\.d, z5\.d ++** mov z27\.d, z7\.d ++** mov z26\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_f64_z24_2, svfloat64x4_t, svfloat64_t, ++ z24 = svset4_f64 (z4, 2, z0), ++ z24 = svset4 (z4, 2, z0)) ++ ++/* ++** set4_f64_z24_3: ++** mov z24\.d, z4\.d ++** mov z25\.d, z5\.d ++** mov z26\.d, z6\.d ++** mov z27\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_f64_z24_3, svfloat64x4_t, svfloat64_t, ++ z24 = svset4_f64 (z4, 3, z0), ++ z24 = svset4 (z4, 3, z0)) ++ ++/* ++** set4_f64_z4_0: ++** mov z4\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_f64_z4_0, svfloat64x4_t, svfloat64_t, ++ z4 = svset4_f64 (z4, 0, z0), ++ z4 = svset4 (z4, 0, z0)) ++ ++/* ++** set4_f64_z4_1: ++** mov z5\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_f64_z4_1, svfloat64x4_t, svfloat64_t, ++ z4 = svset4_f64 (z4, 1, z0), ++ z4 = svset4 (z4, 1, z0)) ++ ++/* ++** set4_f64_z4_2: ++** mov z6\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_f64_z4_2, svfloat64x4_t, svfloat64_t, ++ z4 = svset4_f64 (z4, 2, z0), ++ z4 = svset4 (z4, 2, z0)) ++ ++/* ++** set4_f64_z4_3: ++** mov z7\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_f64_z4_3, svfloat64x4_t, svfloat64_t, ++ z4 = svset4_f64 (z4, 3, z0), ++ z4 = svset4 (z4, 3, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_s16.c +new file mode 100644 +index 000000000..3cef6ebe8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_s16.c +@@ -0,0 +1,87 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** set4_s16_z24_0: ++** mov z25\.d, z5\.d ++** mov z26\.d, z6\.d ++** mov z27\.d, z7\.d ++** mov z24\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_s16_z24_0, svint16x4_t, svint16_t, ++ z24 = svset4_s16 (z4, 0, z0), ++ z24 = svset4 (z4, 0, z0)) ++ ++/* ++** set4_s16_z24_1: ++** mov z24\.d, z4\.d ++** mov z26\.d, z6\.d ++** mov z27\.d, z7\.d ++** mov z25\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_s16_z24_1, svint16x4_t, svint16_t, ++ z24 = svset4_s16 (z4, 1, z0), ++ z24 = svset4 (z4, 1, z0)) ++ ++/* ++** set4_s16_z24_2: ++** mov z24\.d, z4\.d ++** mov z25\.d, z5\.d ++** mov z27\.d, z7\.d ++** mov z26\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_s16_z24_2, svint16x4_t, svint16_t, ++ z24 = svset4_s16 (z4, 2, z0), ++ z24 = svset4 (z4, 2, z0)) ++ ++/* ++** set4_s16_z24_3: ++** mov z24\.d, z4\.d ++** mov z25\.d, z5\.d ++** mov z26\.d, z6\.d ++** mov z27\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_s16_z24_3, svint16x4_t, svint16_t, ++ z24 = svset4_s16 (z4, 3, z0), ++ z24 = svset4 (z4, 3, z0)) ++ ++/* ++** set4_s16_z4_0: ++** mov z4\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_s16_z4_0, svint16x4_t, svint16_t, ++ z4 = svset4_s16 (z4, 0, z0), ++ z4 = svset4 (z4, 0, z0)) ++ ++/* ++** set4_s16_z4_1: ++** mov z5\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_s16_z4_1, svint16x4_t, svint16_t, ++ z4 = svset4_s16 (z4, 1, z0), ++ z4 = svset4 (z4, 1, z0)) ++ ++/* ++** set4_s16_z4_2: ++** mov z6\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_s16_z4_2, svint16x4_t, svint16_t, ++ z4 = svset4_s16 (z4, 2, z0), ++ z4 = svset4 (z4, 2, z0)) ++ ++/* ++** set4_s16_z4_3: ++** mov z7\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_s16_z4_3, svint16x4_t, svint16_t, ++ z4 = svset4_s16 (z4, 3, z0), ++ z4 = svset4 (z4, 3, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_s32.c +new file mode 100644 +index 000000000..49f646e8d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_s32.c +@@ -0,0 +1,87 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** set4_s32_z24_0: ++** mov z25\.d, z5\.d ++** mov z26\.d, z6\.d ++** mov z27\.d, z7\.d ++** mov z24\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_s32_z24_0, svint32x4_t, svint32_t, ++ z24 = svset4_s32 (z4, 0, z0), ++ z24 = svset4 (z4, 0, z0)) ++ ++/* ++** set4_s32_z24_1: ++** mov z24\.d, z4\.d ++** mov z26\.d, z6\.d ++** mov z27\.d, z7\.d ++** mov z25\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_s32_z24_1, svint32x4_t, svint32_t, ++ z24 = svset4_s32 (z4, 1, z0), ++ z24 = svset4 (z4, 1, z0)) ++ ++/* ++** set4_s32_z24_2: ++** mov z24\.d, z4\.d ++** mov z25\.d, z5\.d ++** mov z27\.d, z7\.d ++** mov z26\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_s32_z24_2, svint32x4_t, svint32_t, ++ z24 = svset4_s32 (z4, 2, z0), ++ z24 = svset4 (z4, 2, z0)) ++ ++/* ++** set4_s32_z24_3: ++** mov z24\.d, z4\.d ++** mov z25\.d, z5\.d ++** mov z26\.d, z6\.d ++** mov z27\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_s32_z24_3, svint32x4_t, svint32_t, ++ z24 = svset4_s32 (z4, 3, z0), ++ z24 = svset4 (z4, 3, z0)) ++ ++/* ++** set4_s32_z4_0: ++** mov z4\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_s32_z4_0, svint32x4_t, svint32_t, ++ z4 = svset4_s32 (z4, 0, z0), ++ z4 = svset4 (z4, 0, z0)) ++ ++/* ++** set4_s32_z4_1: ++** mov z5\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_s32_z4_1, svint32x4_t, svint32_t, ++ z4 = svset4_s32 (z4, 1, z0), ++ z4 = svset4 (z4, 1, z0)) ++ ++/* ++** set4_s32_z4_2: ++** mov z6\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_s32_z4_2, svint32x4_t, svint32_t, ++ z4 = svset4_s32 (z4, 2, z0), ++ z4 = svset4 (z4, 2, z0)) ++ ++/* ++** set4_s32_z4_3: ++** mov z7\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_s32_z4_3, svint32x4_t, svint32_t, ++ z4 = svset4_s32 (z4, 3, z0), ++ z4 = svset4 (z4, 3, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_s64.c +new file mode 100644 +index 000000000..7544e25a2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_s64.c +@@ -0,0 +1,87 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** set4_s64_z24_0: ++** mov z25\.d, z5\.d ++** mov z26\.d, z6\.d ++** mov z27\.d, z7\.d ++** mov z24\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_s64_z24_0, svint64x4_t, svint64_t, ++ z24 = svset4_s64 (z4, 0, z0), ++ z24 = svset4 (z4, 0, z0)) ++ ++/* ++** set4_s64_z24_1: ++** mov z24\.d, z4\.d ++** mov z26\.d, z6\.d ++** mov z27\.d, z7\.d ++** mov z25\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_s64_z24_1, svint64x4_t, svint64_t, ++ z24 = svset4_s64 (z4, 1, z0), ++ z24 = svset4 (z4, 1, z0)) ++ ++/* ++** set4_s64_z24_2: ++** mov z24\.d, z4\.d ++** mov z25\.d, z5\.d ++** mov z27\.d, z7\.d ++** mov z26\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_s64_z24_2, svint64x4_t, svint64_t, ++ z24 = svset4_s64 (z4, 2, z0), ++ z24 = svset4 (z4, 2, z0)) ++ ++/* ++** set4_s64_z24_3: ++** mov z24\.d, z4\.d ++** mov z25\.d, z5\.d ++** mov z26\.d, z6\.d ++** mov z27\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_s64_z24_3, svint64x4_t, svint64_t, ++ z24 = svset4_s64 (z4, 3, z0), ++ z24 = svset4 (z4, 3, z0)) ++ ++/* ++** set4_s64_z4_0: ++** mov z4\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_s64_z4_0, svint64x4_t, svint64_t, ++ z4 = svset4_s64 (z4, 0, z0), ++ z4 = svset4 (z4, 0, z0)) ++ ++/* ++** set4_s64_z4_1: ++** mov z5\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_s64_z4_1, svint64x4_t, svint64_t, ++ z4 = svset4_s64 (z4, 1, z0), ++ z4 = svset4 (z4, 1, z0)) ++ ++/* ++** set4_s64_z4_2: ++** mov z6\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_s64_z4_2, svint64x4_t, svint64_t, ++ z4 = svset4_s64 (z4, 2, z0), ++ z4 = svset4 (z4, 2, z0)) ++ ++/* ++** set4_s64_z4_3: ++** mov z7\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_s64_z4_3, svint64x4_t, svint64_t, ++ z4 = svset4_s64 (z4, 3, z0), ++ z4 = svset4 (z4, 3, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_s8.c +new file mode 100644 +index 000000000..2ec9ff059 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_s8.c +@@ -0,0 +1,87 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** set4_s8_z24_0: ++** mov z25\.d, z5\.d ++** mov z26\.d, z6\.d ++** mov z27\.d, z7\.d ++** mov z24\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_s8_z24_0, svint8x4_t, svint8_t, ++ z24 = svset4_s8 (z4, 0, z0), ++ z24 = svset4 (z4, 0, z0)) ++ ++/* ++** set4_s8_z24_1: ++** mov z24\.d, z4\.d ++** mov z26\.d, z6\.d ++** mov z27\.d, z7\.d ++** mov z25\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_s8_z24_1, svint8x4_t, svint8_t, ++ z24 = svset4_s8 (z4, 1, z0), ++ z24 = svset4 (z4, 1, z0)) ++ ++/* ++** set4_s8_z24_2: ++** mov z24\.d, z4\.d ++** mov z25\.d, z5\.d ++** mov z27\.d, z7\.d ++** mov z26\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_s8_z24_2, svint8x4_t, svint8_t, ++ z24 = svset4_s8 (z4, 2, z0), ++ z24 = svset4 (z4, 2, z0)) ++ ++/* ++** set4_s8_z24_3: ++** mov z24\.d, z4\.d ++** mov z25\.d, z5\.d ++** mov z26\.d, z6\.d ++** mov z27\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_s8_z24_3, svint8x4_t, svint8_t, ++ z24 = svset4_s8 (z4, 3, z0), ++ z24 = svset4 (z4, 3, z0)) ++ ++/* ++** set4_s8_z4_0: ++** mov z4\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_s8_z4_0, svint8x4_t, svint8_t, ++ z4 = svset4_s8 (z4, 0, z0), ++ z4 = svset4 (z4, 0, z0)) ++ ++/* ++** set4_s8_z4_1: ++** mov z5\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_s8_z4_1, svint8x4_t, svint8_t, ++ z4 = svset4_s8 (z4, 1, z0), ++ z4 = svset4 (z4, 1, z0)) ++ ++/* ++** set4_s8_z4_2: ++** mov z6\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_s8_z4_2, svint8x4_t, svint8_t, ++ z4 = svset4_s8 (z4, 2, z0), ++ z4 = svset4 (z4, 2, z0)) ++ ++/* ++** set4_s8_z4_3: ++** mov z7\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_s8_z4_3, svint8x4_t, svint8_t, ++ z4 = svset4_s8 (z4, 3, z0), ++ z4 = svset4 (z4, 3, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_u16.c +new file mode 100644 +index 000000000..c9499b044 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_u16.c +@@ -0,0 +1,87 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** set4_u16_z24_0: ++** mov z25\.d, z5\.d ++** mov z26\.d, z6\.d ++** mov z27\.d, z7\.d ++** mov z24\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_u16_z24_0, svuint16x4_t, svuint16_t, ++ z24 = svset4_u16 (z4, 0, z0), ++ z24 = svset4 (z4, 0, z0)) ++ ++/* ++** set4_u16_z24_1: ++** mov z24\.d, z4\.d ++** mov z26\.d, z6\.d ++** mov z27\.d, z7\.d ++** mov z25\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_u16_z24_1, svuint16x4_t, svuint16_t, ++ z24 = svset4_u16 (z4, 1, z0), ++ z24 = svset4 (z4, 1, z0)) ++ ++/* ++** set4_u16_z24_2: ++** mov z24\.d, z4\.d ++** mov z25\.d, z5\.d ++** mov z27\.d, z7\.d ++** mov z26\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_u16_z24_2, svuint16x4_t, svuint16_t, ++ z24 = svset4_u16 (z4, 2, z0), ++ z24 = svset4 (z4, 2, z0)) ++ ++/* ++** set4_u16_z24_3: ++** mov z24\.d, z4\.d ++** mov z25\.d, z5\.d ++** mov z26\.d, z6\.d ++** mov z27\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_u16_z24_3, svuint16x4_t, svuint16_t, ++ z24 = svset4_u16 (z4, 3, z0), ++ z24 = svset4 (z4, 3, z0)) ++ ++/* ++** set4_u16_z4_0: ++** mov z4\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_u16_z4_0, svuint16x4_t, svuint16_t, ++ z4 = svset4_u16 (z4, 0, z0), ++ z4 = svset4 (z4, 0, z0)) ++ ++/* ++** set4_u16_z4_1: ++** mov z5\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_u16_z4_1, svuint16x4_t, svuint16_t, ++ z4 = svset4_u16 (z4, 1, z0), ++ z4 = svset4 (z4, 1, z0)) ++ ++/* ++** set4_u16_z4_2: ++** mov z6\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_u16_z4_2, svuint16x4_t, svuint16_t, ++ z4 = svset4_u16 (z4, 2, z0), ++ z4 = svset4 (z4, 2, z0)) ++ ++/* ++** set4_u16_z4_3: ++** mov z7\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_u16_z4_3, svuint16x4_t, svuint16_t, ++ z4 = svset4_u16 (z4, 3, z0), ++ z4 = svset4 (z4, 3, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_u32.c +new file mode 100644 +index 000000000..00b3dc513 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_u32.c +@@ -0,0 +1,87 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** set4_u32_z24_0: ++** mov z25\.d, z5\.d ++** mov z26\.d, z6\.d ++** mov z27\.d, z7\.d ++** mov z24\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_u32_z24_0, svuint32x4_t, svuint32_t, ++ z24 = svset4_u32 (z4, 0, z0), ++ z24 = svset4 (z4, 0, z0)) ++ ++/* ++** set4_u32_z24_1: ++** mov z24\.d, z4\.d ++** mov z26\.d, z6\.d ++** mov z27\.d, z7\.d ++** mov z25\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_u32_z24_1, svuint32x4_t, svuint32_t, ++ z24 = svset4_u32 (z4, 1, z0), ++ z24 = svset4 (z4, 1, z0)) ++ ++/* ++** set4_u32_z24_2: ++** mov z24\.d, z4\.d ++** mov z25\.d, z5\.d ++** mov z27\.d, z7\.d ++** mov z26\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_u32_z24_2, svuint32x4_t, svuint32_t, ++ z24 = svset4_u32 (z4, 2, z0), ++ z24 = svset4 (z4, 2, z0)) ++ ++/* ++** set4_u32_z24_3: ++** mov z24\.d, z4\.d ++** mov z25\.d, z5\.d ++** mov z26\.d, z6\.d ++** mov z27\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_u32_z24_3, svuint32x4_t, svuint32_t, ++ z24 = svset4_u32 (z4, 3, z0), ++ z24 = svset4 (z4, 3, z0)) ++ ++/* ++** set4_u32_z4_0: ++** mov z4\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_u32_z4_0, svuint32x4_t, svuint32_t, ++ z4 = svset4_u32 (z4, 0, z0), ++ z4 = svset4 (z4, 0, z0)) ++ ++/* ++** set4_u32_z4_1: ++** mov z5\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_u32_z4_1, svuint32x4_t, svuint32_t, ++ z4 = svset4_u32 (z4, 1, z0), ++ z4 = svset4 (z4, 1, z0)) ++ ++/* ++** set4_u32_z4_2: ++** mov z6\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_u32_z4_2, svuint32x4_t, svuint32_t, ++ z4 = svset4_u32 (z4, 2, z0), ++ z4 = svset4 (z4, 2, z0)) ++ ++/* ++** set4_u32_z4_3: ++** mov z7\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_u32_z4_3, svuint32x4_t, svuint32_t, ++ z4 = svset4_u32 (z4, 3, z0), ++ z4 = svset4 (z4, 3, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_u64.c +new file mode 100644 +index 000000000..d2f048b82 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_u64.c +@@ -0,0 +1,87 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** set4_u64_z24_0: ++** mov z25\.d, z5\.d ++** mov z26\.d, z6\.d ++** mov z27\.d, z7\.d ++** mov z24\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_u64_z24_0, svuint64x4_t, svuint64_t, ++ z24 = svset4_u64 (z4, 0, z0), ++ z24 = svset4 (z4, 0, z0)) ++ ++/* ++** set4_u64_z24_1: ++** mov z24\.d, z4\.d ++** mov z26\.d, z6\.d ++** mov z27\.d, z7\.d ++** mov z25\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_u64_z24_1, svuint64x4_t, svuint64_t, ++ z24 = svset4_u64 (z4, 1, z0), ++ z24 = svset4 (z4, 1, z0)) ++ ++/* ++** set4_u64_z24_2: ++** mov z24\.d, z4\.d ++** mov z25\.d, z5\.d ++** mov z27\.d, z7\.d ++** mov z26\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_u64_z24_2, svuint64x4_t, svuint64_t, ++ z24 = svset4_u64 (z4, 2, z0), ++ z24 = svset4 (z4, 2, z0)) ++ ++/* ++** set4_u64_z24_3: ++** mov z24\.d, z4\.d ++** mov z25\.d, z5\.d ++** mov z26\.d, z6\.d ++** mov z27\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_u64_z24_3, svuint64x4_t, svuint64_t, ++ z24 = svset4_u64 (z4, 3, z0), ++ z24 = svset4 (z4, 3, z0)) ++ ++/* ++** set4_u64_z4_0: ++** mov z4\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_u64_z4_0, svuint64x4_t, svuint64_t, ++ z4 = svset4_u64 (z4, 0, z0), ++ z4 = svset4 (z4, 0, z0)) ++ ++/* ++** set4_u64_z4_1: ++** mov z5\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_u64_z4_1, svuint64x4_t, svuint64_t, ++ z4 = svset4_u64 (z4, 1, z0), ++ z4 = svset4 (z4, 1, z0)) ++ ++/* ++** set4_u64_z4_2: ++** mov z6\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_u64_z4_2, svuint64x4_t, svuint64_t, ++ z4 = svset4_u64 (z4, 2, z0), ++ z4 = svset4 (z4, 2, z0)) ++ ++/* ++** set4_u64_z4_3: ++** mov z7\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_u64_z4_3, svuint64x4_t, svuint64_t, ++ z4 = svset4_u64 (z4, 3, z0), ++ z4 = svset4 (z4, 3, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_u8.c +new file mode 100644 +index 000000000..b4f27c6f1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_u8.c +@@ -0,0 +1,87 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** set4_u8_z24_0: ++** mov z25\.d, z5\.d ++** mov z26\.d, z6\.d ++** mov z27\.d, z7\.d ++** mov z24\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_u8_z24_0, svuint8x4_t, svuint8_t, ++ z24 = svset4_u8 (z4, 0, z0), ++ z24 = svset4 (z4, 0, z0)) ++ ++/* ++** set4_u8_z24_1: ++** mov z24\.d, z4\.d ++** mov z26\.d, z6\.d ++** mov z27\.d, z7\.d ++** mov z25\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_u8_z24_1, svuint8x4_t, svuint8_t, ++ z24 = svset4_u8 (z4, 1, z0), ++ z24 = svset4 (z4, 1, z0)) ++ ++/* ++** set4_u8_z24_2: ++** mov z24\.d, z4\.d ++** mov z25\.d, z5\.d ++** mov z27\.d, z7\.d ++** mov z26\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_u8_z24_2, svuint8x4_t, svuint8_t, ++ z24 = svset4_u8 (z4, 2, z0), ++ z24 = svset4 (z4, 2, z0)) ++ ++/* ++** set4_u8_z24_3: ++** mov z24\.d, z4\.d ++** mov z25\.d, z5\.d ++** mov z26\.d, z6\.d ++** mov z27\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_u8_z24_3, svuint8x4_t, svuint8_t, ++ z24 = svset4_u8 (z4, 3, z0), ++ z24 = svset4 (z4, 3, z0)) ++ ++/* ++** set4_u8_z4_0: ++** mov z4\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_u8_z4_0, svuint8x4_t, svuint8_t, ++ z4 = svset4_u8 (z4, 0, z0), ++ z4 = svset4 (z4, 0, z0)) ++ ++/* ++** set4_u8_z4_1: ++** mov z5\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_u8_z4_1, svuint8x4_t, svuint8_t, ++ z4 = svset4_u8 (z4, 1, z0), ++ z4 = svset4 (z4, 1, z0)) ++ ++/* ++** set4_u8_z4_2: ++** mov z6\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_u8_z4_2, svuint8x4_t, svuint8_t, ++ z4 = svset4_u8 (z4, 2, z0), ++ z4 = svset4 (z4, 2, z0)) ++ ++/* ++** set4_u8_z4_3: ++** mov z7\.d, z0\.d ++** ret ++*/ ++TEST_SET (set4_u8_z4_3, svuint8x4_t, svuint8_t, ++ z4 = svset4_u8 (z4, 3, z0), ++ z4 = svset4 (z4, 3, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_bf16.c +new file mode 100644 +index 000000000..3d2dbf20d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_bf16.c +@@ -0,0 +1,33 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** splice_bf16_tied1: ++** splice z0\.h, p0, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (splice_bf16_tied1, svbfloat16_t, ++ z0 = svsplice_bf16 (p0, z0, z1), ++ z0 = svsplice (p0, z0, z1)) ++ ++/* ++** splice_bf16_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** splice z0\.h, p0, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (splice_bf16_tied2, svbfloat16_t, ++ z0 = svsplice_bf16 (p0, z1, z0), ++ z0 = svsplice (p0, z1, z0)) ++ ++/* ++** splice_bf16_untied: ++** movprfx z0, z1 ++** splice z0\.h, p0, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (splice_bf16_untied, svbfloat16_t, ++ z0 = svsplice_bf16 (p0, z1, z2), ++ z0 = svsplice (p0, z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_f16.c +new file mode 100644 +index 000000000..b796eaf3d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_f16.c +@@ -0,0 +1,33 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** splice_f16_tied1: ++** splice z0\.h, p0, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (splice_f16_tied1, svfloat16_t, ++ z0 = svsplice_f16 (p0, z0, z1), ++ z0 = svsplice (p0, z0, z1)) ++ ++/* ++** splice_f16_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** splice z0\.h, p0, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (splice_f16_tied2, svfloat16_t, ++ z0 = svsplice_f16 (p0, z1, z0), ++ z0 = svsplice (p0, z1, z0)) ++ ++/* ++** splice_f16_untied: ++** movprfx z0, z1 ++** splice z0\.h, p0, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (splice_f16_untied, svfloat16_t, ++ z0 = svsplice_f16 (p0, z1, z2), ++ z0 = svsplice (p0, z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_f32.c +new file mode 100644 +index 000000000..1fc552bc3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_f32.c +@@ -0,0 +1,33 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** splice_f32_tied1: ++** splice z0\.s, p0, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (splice_f32_tied1, svfloat32_t, ++ z0 = svsplice_f32 (p0, z0, z1), ++ z0 = svsplice (p0, z0, z1)) ++ ++/* ++** splice_f32_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** splice z0\.s, p0, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (splice_f32_tied2, svfloat32_t, ++ z0 = svsplice_f32 (p0, z1, z0), ++ z0 = svsplice (p0, z1, z0)) ++ ++/* ++** splice_f32_untied: ++** movprfx z0, z1 ++** splice z0\.s, p0, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (splice_f32_untied, svfloat32_t, ++ z0 = svsplice_f32 (p0, z1, z2), ++ z0 = svsplice (p0, z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_f64.c +new file mode 100644 +index 000000000..26b523520 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_f64.c +@@ -0,0 +1,33 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** splice_f64_tied1: ++** splice z0\.d, p0, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (splice_f64_tied1, svfloat64_t, ++ z0 = svsplice_f64 (p0, z0, z1), ++ z0 = svsplice (p0, z0, z1)) ++ ++/* ++** splice_f64_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** splice z0\.d, p0, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (splice_f64_tied2, svfloat64_t, ++ z0 = svsplice_f64 (p0, z1, z0), ++ z0 = svsplice (p0, z1, z0)) ++ ++/* ++** splice_f64_untied: ++** movprfx z0, z1 ++** splice z0\.d, p0, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (splice_f64_untied, svfloat64_t, ++ z0 = svsplice_f64 (p0, z1, z2), ++ z0 = svsplice (p0, z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_s16.c +new file mode 100644 +index 000000000..8796c6ecd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_s16.c +@@ -0,0 +1,33 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** splice_s16_tied1: ++** splice z0\.h, p0, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (splice_s16_tied1, svint16_t, ++ z0 = svsplice_s16 (p0, z0, z1), ++ z0 = svsplice (p0, z0, z1)) ++ ++/* ++** splice_s16_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** splice z0\.h, p0, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (splice_s16_tied2, svint16_t, ++ z0 = svsplice_s16 (p0, z1, z0), ++ z0 = svsplice (p0, z1, z0)) ++ ++/* ++** splice_s16_untied: ++** movprfx z0, z1 ++** splice z0\.h, p0, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (splice_s16_untied, svint16_t, ++ z0 = svsplice_s16 (p0, z1, z2), ++ z0 = svsplice (p0, z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_s32.c +new file mode 100644 +index 000000000..5f2798e06 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_s32.c +@@ -0,0 +1,33 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** splice_s32_tied1: ++** splice z0\.s, p0, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (splice_s32_tied1, svint32_t, ++ z0 = svsplice_s32 (p0, z0, z1), ++ z0 = svsplice (p0, z0, z1)) ++ ++/* ++** splice_s32_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** splice z0\.s, p0, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (splice_s32_tied2, svint32_t, ++ z0 = svsplice_s32 (p0, z1, z0), ++ z0 = svsplice (p0, z1, z0)) ++ ++/* ++** splice_s32_untied: ++** movprfx z0, z1 ++** splice z0\.s, p0, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (splice_s32_untied, svint32_t, ++ z0 = svsplice_s32 (p0, z1, z2), ++ z0 = svsplice (p0, z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_s64.c +new file mode 100644 +index 000000000..024bfa479 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_s64.c +@@ -0,0 +1,33 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** splice_s64_tied1: ++** splice z0\.d, p0, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (splice_s64_tied1, svint64_t, ++ z0 = svsplice_s64 (p0, z0, z1), ++ z0 = svsplice (p0, z0, z1)) ++ ++/* ++** splice_s64_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** splice z0\.d, p0, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (splice_s64_tied2, svint64_t, ++ z0 = svsplice_s64 (p0, z1, z0), ++ z0 = svsplice (p0, z1, z0)) ++ ++/* ++** splice_s64_untied: ++** movprfx z0, z1 ++** splice z0\.d, p0, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (splice_s64_untied, svint64_t, ++ z0 = svsplice_s64 (p0, z1, z2), ++ z0 = svsplice (p0, z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_s8.c +new file mode 100644 +index 000000000..cd91ee245 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_s8.c +@@ -0,0 +1,33 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** splice_s8_tied1: ++** splice z0\.b, p0, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (splice_s8_tied1, svint8_t, ++ z0 = svsplice_s8 (p0, z0, z1), ++ z0 = svsplice (p0, z0, z1)) ++ ++/* ++** splice_s8_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** splice z0\.b, p0, z0\.b, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (splice_s8_tied2, svint8_t, ++ z0 = svsplice_s8 (p0, z1, z0), ++ z0 = svsplice (p0, z1, z0)) ++ ++/* ++** splice_s8_untied: ++** movprfx z0, z1 ++** splice z0\.b, p0, z0\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (splice_s8_untied, svint8_t, ++ z0 = svsplice_s8 (p0, z1, z2), ++ z0 = svsplice (p0, z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_u16.c +new file mode 100644 +index 000000000..821ebaee6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_u16.c +@@ -0,0 +1,33 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** splice_u16_tied1: ++** splice z0\.h, p0, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (splice_u16_tied1, svuint16_t, ++ z0 = svsplice_u16 (p0, z0, z1), ++ z0 = svsplice (p0, z0, z1)) ++ ++/* ++** splice_u16_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** splice z0\.h, p0, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (splice_u16_tied2, svuint16_t, ++ z0 = svsplice_u16 (p0, z1, z0), ++ z0 = svsplice (p0, z1, z0)) ++ ++/* ++** splice_u16_untied: ++** movprfx z0, z1 ++** splice z0\.h, p0, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (splice_u16_untied, svuint16_t, ++ z0 = svsplice_u16 (p0, z1, z2), ++ z0 = svsplice (p0, z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_u32.c +new file mode 100644 +index 000000000..200364f20 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_u32.c +@@ -0,0 +1,33 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** splice_u32_tied1: ++** splice z0\.s, p0, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (splice_u32_tied1, svuint32_t, ++ z0 = svsplice_u32 (p0, z0, z1), ++ z0 = svsplice (p0, z0, z1)) ++ ++/* ++** splice_u32_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** splice z0\.s, p0, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (splice_u32_tied2, svuint32_t, ++ z0 = svsplice_u32 (p0, z1, z0), ++ z0 = svsplice (p0, z1, z0)) ++ ++/* ++** splice_u32_untied: ++** movprfx z0, z1 ++** splice z0\.s, p0, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (splice_u32_untied, svuint32_t, ++ z0 = svsplice_u32 (p0, z1, z2), ++ z0 = svsplice (p0, z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_u64.c +new file mode 100644 +index 000000000..352bcdeed +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_u64.c +@@ -0,0 +1,33 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** splice_u64_tied1: ++** splice z0\.d, p0, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (splice_u64_tied1, svuint64_t, ++ z0 = svsplice_u64 (p0, z0, z1), ++ z0 = svsplice (p0, z0, z1)) ++ ++/* ++** splice_u64_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** splice z0\.d, p0, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (splice_u64_tied2, svuint64_t, ++ z0 = svsplice_u64 (p0, z1, z0), ++ z0 = svsplice (p0, z1, z0)) ++ ++/* ++** splice_u64_untied: ++** movprfx z0, z1 ++** splice z0\.d, p0, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (splice_u64_untied, svuint64_t, ++ z0 = svsplice_u64 (p0, z1, z2), ++ z0 = svsplice (p0, z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_u8.c +new file mode 100644 +index 000000000..6c24fe64d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_u8.c +@@ -0,0 +1,33 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** splice_u8_tied1: ++** splice z0\.b, p0, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (splice_u8_tied1, svuint8_t, ++ z0 = svsplice_u8 (p0, z0, z1), ++ z0 = svsplice (p0, z0, z1)) ++ ++/* ++** splice_u8_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** splice z0\.b, p0, z0\.b, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (splice_u8_tied2, svuint8_t, ++ z0 = svsplice_u8 (p0, z1, z0), ++ z0 = svsplice (p0, z1, z0)) ++ ++/* ++** splice_u8_untied: ++** movprfx z0, z1 ++** splice z0\.b, p0, z0\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (splice_u8_untied, svuint8_t, ++ z0 = svsplice_u8 (p0, z1, z2), ++ z0 = svsplice (p0, z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sqrt_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sqrt_f16.c +new file mode 100644 +index 000000000..6dc5940fb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sqrt_f16.c +@@ -0,0 +1,103 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** sqrt_f16_m_tied12: ++** fsqrt z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sqrt_f16_m_tied12, svfloat16_t, ++ z0 = svsqrt_f16_m (z0, p0, z0), ++ z0 = svsqrt_m (z0, p0, z0)) ++ ++/* ++** sqrt_f16_m_tied1: ++** fsqrt z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sqrt_f16_m_tied1, svfloat16_t, ++ z0 = svsqrt_f16_m (z0, p0, z1), ++ z0 = svsqrt_m (z0, p0, z1)) ++ ++/* ++** sqrt_f16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fsqrt z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sqrt_f16_m_tied2, svfloat16_t, ++ z0 = svsqrt_f16_m (z1, p0, z0), ++ z0 = svsqrt_m (z1, p0, z0)) ++ ++/* ++** sqrt_f16_m_untied: ++** movprfx z0, z2 ++** fsqrt z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sqrt_f16_m_untied, svfloat16_t, ++ z0 = svsqrt_f16_m (z2, p0, z1), ++ z0 = svsqrt_m (z2, p0, z1)) ++ ++/* ++** sqrt_f16_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.h, p0/z, \1\.h ++** fsqrt z0\.h, p0/m, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sqrt_f16_z_tied1, svfloat16_t, ++ z0 = svsqrt_f16_z (p0, z0), ++ z0 = svsqrt_z (p0, z0)) ++ ++/* ++** sqrt_f16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** fsqrt z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sqrt_f16_z_untied, svfloat16_t, ++ z0 = svsqrt_f16_z (p0, z1), ++ z0 = svsqrt_z (p0, z1)) ++ ++/* ++** sqrt_f16_x_tied1: ++** fsqrt z0\.h, p0/m, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sqrt_f16_x_tied1, svfloat16_t, ++ z0 = svsqrt_f16_x (p0, z0), ++ z0 = svsqrt_x (p0, z0)) ++ ++/* ++** sqrt_f16_x_untied: ++** fsqrt z0\.h, p0/m, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sqrt_f16_x_untied, svfloat16_t, ++ z0 = svsqrt_f16_x (p0, z1), ++ z0 = svsqrt_x (p0, z1)) ++ ++/* ++** ptrue_sqrt_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sqrt_f16_x_tied1, svfloat16_t, ++ z0 = svsqrt_f16_x (svptrue_b16 (), z0), ++ z0 = svsqrt_x (svptrue_b16 (), z0)) ++ ++/* ++** ptrue_sqrt_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sqrt_f16_x_untied, svfloat16_t, ++ z0 = svsqrt_f16_x (svptrue_b16 (), z1), ++ z0 = svsqrt_x (svptrue_b16 (), z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sqrt_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sqrt_f32.c +new file mode 100644 +index 000000000..71d1f8f74 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sqrt_f32.c +@@ -0,0 +1,103 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** sqrt_f32_m_tied12: ++** fsqrt z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sqrt_f32_m_tied12, svfloat32_t, ++ z0 = svsqrt_f32_m (z0, p0, z0), ++ z0 = svsqrt_m (z0, p0, z0)) ++ ++/* ++** sqrt_f32_m_tied1: ++** fsqrt z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sqrt_f32_m_tied1, svfloat32_t, ++ z0 = svsqrt_f32_m (z0, p0, z1), ++ z0 = svsqrt_m (z0, p0, z1)) ++ ++/* ++** sqrt_f32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fsqrt z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sqrt_f32_m_tied2, svfloat32_t, ++ z0 = svsqrt_f32_m (z1, p0, z0), ++ z0 = svsqrt_m (z1, p0, z0)) ++ ++/* ++** sqrt_f32_m_untied: ++** movprfx z0, z2 ++** fsqrt z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sqrt_f32_m_untied, svfloat32_t, ++ z0 = svsqrt_f32_m (z2, p0, z1), ++ z0 = svsqrt_m (z2, p0, z1)) ++ ++/* ++** sqrt_f32_z_tied1: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0\.s, p0/z, \1\.s ++** fsqrt z0\.s, p0/m, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sqrt_f32_z_tied1, svfloat32_t, ++ z0 = svsqrt_f32_z (p0, z0), ++ z0 = svsqrt_z (p0, z0)) ++ ++/* ++** sqrt_f32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** fsqrt z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sqrt_f32_z_untied, svfloat32_t, ++ z0 = svsqrt_f32_z (p0, z1), ++ z0 = svsqrt_z (p0, z1)) ++ ++/* ++** sqrt_f32_x_tied1: ++** fsqrt z0\.s, p0/m, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sqrt_f32_x_tied1, svfloat32_t, ++ z0 = svsqrt_f32_x (p0, z0), ++ z0 = svsqrt_x (p0, z0)) ++ ++/* ++** sqrt_f32_x_untied: ++** fsqrt z0\.s, p0/m, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sqrt_f32_x_untied, svfloat32_t, ++ z0 = svsqrt_f32_x (p0, z1), ++ z0 = svsqrt_x (p0, z1)) ++ ++/* ++** ptrue_sqrt_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sqrt_f32_x_tied1, svfloat32_t, ++ z0 = svsqrt_f32_x (svptrue_b32 (), z0), ++ z0 = svsqrt_x (svptrue_b32 (), z0)) ++ ++/* ++** ptrue_sqrt_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sqrt_f32_x_untied, svfloat32_t, ++ z0 = svsqrt_f32_x (svptrue_b32 (), z1), ++ z0 = svsqrt_x (svptrue_b32 (), z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sqrt_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sqrt_f64.c +new file mode 100644 +index 000000000..7771df545 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sqrt_f64.c +@@ -0,0 +1,103 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** sqrt_f64_m_tied12: ++** fsqrt z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (sqrt_f64_m_tied12, svfloat64_t, ++ z0 = svsqrt_f64_m (z0, p0, z0), ++ z0 = svsqrt_m (z0, p0, z0)) ++ ++/* ++** sqrt_f64_m_tied1: ++** fsqrt z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (sqrt_f64_m_tied1, svfloat64_t, ++ z0 = svsqrt_f64_m (z0, p0, z1), ++ z0 = svsqrt_m (z0, p0, z1)) ++ ++/* ++** sqrt_f64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fsqrt z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (sqrt_f64_m_tied2, svfloat64_t, ++ z0 = svsqrt_f64_m (z1, p0, z0), ++ z0 = svsqrt_m (z1, p0, z0)) ++ ++/* ++** sqrt_f64_m_untied: ++** movprfx z0, z2 ++** fsqrt z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (sqrt_f64_m_untied, svfloat64_t, ++ z0 = svsqrt_f64_m (z2, p0, z1), ++ z0 = svsqrt_m (z2, p0, z1)) ++ ++/* ++** sqrt_f64_z_tied1: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0\.d, p0/z, \1 ++** fsqrt z0\.d, p0/m, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (sqrt_f64_z_tied1, svfloat64_t, ++ z0 = svsqrt_f64_z (p0, z0), ++ z0 = svsqrt_z (p0, z0)) ++ ++/* ++** sqrt_f64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** fsqrt z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (sqrt_f64_z_untied, svfloat64_t, ++ z0 = svsqrt_f64_z (p0, z1), ++ z0 = svsqrt_z (p0, z1)) ++ ++/* ++** sqrt_f64_x_tied1: ++** fsqrt z0\.d, p0/m, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (sqrt_f64_x_tied1, svfloat64_t, ++ z0 = svsqrt_f64_x (p0, z0), ++ z0 = svsqrt_x (p0, z0)) ++ ++/* ++** sqrt_f64_x_untied: ++** fsqrt z0\.d, p0/m, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (sqrt_f64_x_untied, svfloat64_t, ++ z0 = svsqrt_f64_x (p0, z1), ++ z0 = svsqrt_x (p0, z1)) ++ ++/* ++** ptrue_sqrt_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sqrt_f64_x_tied1, svfloat64_t, ++ z0 = svsqrt_f64_x (svptrue_b64 (), z0), ++ z0 = svsqrt_x (svptrue_b64 (), z0)) ++ ++/* ++** ptrue_sqrt_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sqrt_f64_x_untied, svfloat64_t, ++ z0 = svsqrt_f64_x (svptrue_b64 (), z1), ++ z0 = svsqrt_x (svptrue_b64 (), z1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_bf16.c +new file mode 100644 +index 000000000..ec3dbe318 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_bf16.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st1_bf16_base: ++** st1h z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_bf16_base, svbfloat16_t, bfloat16_t, ++ svst1_bf16 (p0, x0, z0), ++ svst1 (p0, x0, z0)) ++ ++/* ++** st1_bf16_index: ++** st1h z0\.h, p0, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_STORE (st1_bf16_index, svbfloat16_t, bfloat16_t, ++ svst1_bf16 (p0, x0 + x1, z0), ++ svst1 (p0, x0 + x1, z0)) ++ ++/* ++** st1_bf16_1: ++** st1h z0\.h, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_bf16_1, svbfloat16_t, bfloat16_t, ++ svst1_bf16 (p0, x0 + svcnth (), z0), ++ svst1 (p0, x0 + svcnth (), z0)) ++ ++/* ++** st1_bf16_7: ++** st1h z0\.h, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_bf16_7, svbfloat16_t, bfloat16_t, ++ svst1_bf16 (p0, x0 + svcnth () * 7, z0), ++ svst1 (p0, x0 + svcnth () * 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1_bf16_8: ++** incb x0, all, mul #8 ++** st1h z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_bf16_8, svbfloat16_t, bfloat16_t, ++ svst1_bf16 (p0, x0 + svcnth () * 8, z0), ++ svst1 (p0, x0 + svcnth () * 8, z0)) ++ ++/* ++** st1_bf16_m1: ++** st1h z0\.h, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_bf16_m1, svbfloat16_t, bfloat16_t, ++ svst1_bf16 (p0, x0 - svcnth (), z0), ++ svst1 (p0, x0 - svcnth (), z0)) ++ ++/* ++** st1_bf16_m8: ++** st1h z0\.h, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_bf16_m8, svbfloat16_t, bfloat16_t, ++ svst1_bf16 (p0, x0 - svcnth () * 8, z0), ++ svst1 (p0, x0 - svcnth () * 8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1_bf16_m9: ++** decb x0, all, mul #9 ++** st1h z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_bf16_m9, svbfloat16_t, bfloat16_t, ++ svst1_bf16 (p0, x0 - svcnth () * 9, z0), ++ svst1 (p0, x0 - svcnth () * 9, z0)) ++ ++/* ++** st1_vnum_bf16_0: ++** st1h z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_vnum_bf16_0, svbfloat16_t, bfloat16_t, ++ svst1_vnum_bf16 (p0, x0, 0, z0), ++ svst1_vnum (p0, x0, 0, z0)) ++ ++/* ++** st1_vnum_bf16_1: ++** st1h z0\.h, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_vnum_bf16_1, svbfloat16_t, bfloat16_t, ++ svst1_vnum_bf16 (p0, x0, 1, z0), ++ svst1_vnum (p0, x0, 1, z0)) ++ ++/* ++** st1_vnum_bf16_7: ++** st1h z0\.h, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_vnum_bf16_7, svbfloat16_t, bfloat16_t, ++ svst1_vnum_bf16 (p0, x0, 7, z0), ++ svst1_vnum (p0, x0, 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1_vnum_bf16_8: ++** incb x0, all, mul #8 ++** st1h z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_vnum_bf16_8, svbfloat16_t, bfloat16_t, ++ svst1_vnum_bf16 (p0, x0, 8, z0), ++ svst1_vnum (p0, x0, 8, z0)) ++ ++/* ++** st1_vnum_bf16_m1: ++** st1h z0\.h, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_vnum_bf16_m1, svbfloat16_t, bfloat16_t, ++ svst1_vnum_bf16 (p0, x0, -1, z0), ++ svst1_vnum (p0, x0, -1, z0)) ++ ++/* ++** st1_vnum_bf16_m8: ++** st1h z0\.h, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_vnum_bf16_m8, svbfloat16_t, bfloat16_t, ++ svst1_vnum_bf16 (p0, x0, -8, z0), ++ svst1_vnum (p0, x0, -8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1_vnum_bf16_m9: ++** decb x0, all, mul #9 ++** st1h z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_vnum_bf16_m9, svbfloat16_t, bfloat16_t, ++ svst1_vnum_bf16 (p0, x0, -9, z0), ++ svst1_vnum (p0, x0, -9, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** st1_vnum_bf16_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** st1h z0\.h, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (st1_vnum_bf16_x1, svbfloat16_t, bfloat16_t, ++ svst1_vnum_bf16 (p0, x0, x1, z0), ++ svst1_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_f16.c +new file mode 100644 +index 000000000..2406cfd97 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_f16.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st1_f16_base: ++** st1h z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_f16_base, svfloat16_t, float16_t, ++ svst1_f16 (p0, x0, z0), ++ svst1 (p0, x0, z0)) ++ ++/* ++** st1_f16_index: ++** st1h z0\.h, p0, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_STORE (st1_f16_index, svfloat16_t, float16_t, ++ svst1_f16 (p0, x0 + x1, z0), ++ svst1 (p0, x0 + x1, z0)) ++ ++/* ++** st1_f16_1: ++** st1h z0\.h, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_f16_1, svfloat16_t, float16_t, ++ svst1_f16 (p0, x0 + svcnth (), z0), ++ svst1 (p0, x0 + svcnth (), z0)) ++ ++/* ++** st1_f16_7: ++** st1h z0\.h, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_f16_7, svfloat16_t, float16_t, ++ svst1_f16 (p0, x0 + svcnth () * 7, z0), ++ svst1 (p0, x0 + svcnth () * 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1_f16_8: ++** incb x0, all, mul #8 ++** st1h z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_f16_8, svfloat16_t, float16_t, ++ svst1_f16 (p0, x0 + svcnth () * 8, z0), ++ svst1 (p0, x0 + svcnth () * 8, z0)) ++ ++/* ++** st1_f16_m1: ++** st1h z0\.h, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_f16_m1, svfloat16_t, float16_t, ++ svst1_f16 (p0, x0 - svcnth (), z0), ++ svst1 (p0, x0 - svcnth (), z0)) ++ ++/* ++** st1_f16_m8: ++** st1h z0\.h, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_f16_m8, svfloat16_t, float16_t, ++ svst1_f16 (p0, x0 - svcnth () * 8, z0), ++ svst1 (p0, x0 - svcnth () * 8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1_f16_m9: ++** decb x0, all, mul #9 ++** st1h z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_f16_m9, svfloat16_t, float16_t, ++ svst1_f16 (p0, x0 - svcnth () * 9, z0), ++ svst1 (p0, x0 - svcnth () * 9, z0)) ++ ++/* ++** st1_vnum_f16_0: ++** st1h z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_vnum_f16_0, svfloat16_t, float16_t, ++ svst1_vnum_f16 (p0, x0, 0, z0), ++ svst1_vnum (p0, x0, 0, z0)) ++ ++/* ++** st1_vnum_f16_1: ++** st1h z0\.h, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_vnum_f16_1, svfloat16_t, float16_t, ++ svst1_vnum_f16 (p0, x0, 1, z0), ++ svst1_vnum (p0, x0, 1, z0)) ++ ++/* ++** st1_vnum_f16_7: ++** st1h z0\.h, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_vnum_f16_7, svfloat16_t, float16_t, ++ svst1_vnum_f16 (p0, x0, 7, z0), ++ svst1_vnum (p0, x0, 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1_vnum_f16_8: ++** incb x0, all, mul #8 ++** st1h z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_vnum_f16_8, svfloat16_t, float16_t, ++ svst1_vnum_f16 (p0, x0, 8, z0), ++ svst1_vnum (p0, x0, 8, z0)) ++ ++/* ++** st1_vnum_f16_m1: ++** st1h z0\.h, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_vnum_f16_m1, svfloat16_t, float16_t, ++ svst1_vnum_f16 (p0, x0, -1, z0), ++ svst1_vnum (p0, x0, -1, z0)) ++ ++/* ++** st1_vnum_f16_m8: ++** st1h z0\.h, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_vnum_f16_m8, svfloat16_t, float16_t, ++ svst1_vnum_f16 (p0, x0, -8, z0), ++ svst1_vnum (p0, x0, -8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1_vnum_f16_m9: ++** decb x0, all, mul #9 ++** st1h z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_vnum_f16_m9, svfloat16_t, float16_t, ++ svst1_vnum_f16 (p0, x0, -9, z0), ++ svst1_vnum (p0, x0, -9, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** st1_vnum_f16_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** st1h z0\.h, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (st1_vnum_f16_x1, svfloat16_t, float16_t, ++ svst1_vnum_f16 (p0, x0, x1, z0), ++ svst1_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_f32.c +new file mode 100644 +index 000000000..5fad7f06f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_f32.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st1_f32_base: ++** st1w z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_f32_base, svfloat32_t, float32_t, ++ svst1_f32 (p0, x0, z0), ++ svst1 (p0, x0, z0)) ++ ++/* ++** st1_f32_index: ++** st1w z0\.s, p0, \[x0, x1, lsl 2\] ++** ret ++*/ ++TEST_STORE (st1_f32_index, svfloat32_t, float32_t, ++ svst1_f32 (p0, x0 + x1, z0), ++ svst1 (p0, x0 + x1, z0)) ++ ++/* ++** st1_f32_1: ++** st1w z0\.s, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_f32_1, svfloat32_t, float32_t, ++ svst1_f32 (p0, x0 + svcntw (), z0), ++ svst1 (p0, x0 + svcntw (), z0)) ++ ++/* ++** st1_f32_7: ++** st1w z0\.s, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_f32_7, svfloat32_t, float32_t, ++ svst1_f32 (p0, x0 + svcntw () * 7, z0), ++ svst1 (p0, x0 + svcntw () * 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1_f32_8: ++** incb x0, all, mul #8 ++** st1w z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_f32_8, svfloat32_t, float32_t, ++ svst1_f32 (p0, x0 + svcntw () * 8, z0), ++ svst1 (p0, x0 + svcntw () * 8, z0)) ++ ++/* ++** st1_f32_m1: ++** st1w z0\.s, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_f32_m1, svfloat32_t, float32_t, ++ svst1_f32 (p0, x0 - svcntw (), z0), ++ svst1 (p0, x0 - svcntw (), z0)) ++ ++/* ++** st1_f32_m8: ++** st1w z0\.s, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_f32_m8, svfloat32_t, float32_t, ++ svst1_f32 (p0, x0 - svcntw () * 8, z0), ++ svst1 (p0, x0 - svcntw () * 8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1_f32_m9: ++** decb x0, all, mul #9 ++** st1w z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_f32_m9, svfloat32_t, float32_t, ++ svst1_f32 (p0, x0 - svcntw () * 9, z0), ++ svst1 (p0, x0 - svcntw () * 9, z0)) ++ ++/* ++** st1_vnum_f32_0: ++** st1w z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_vnum_f32_0, svfloat32_t, float32_t, ++ svst1_vnum_f32 (p0, x0, 0, z0), ++ svst1_vnum (p0, x0, 0, z0)) ++ ++/* ++** st1_vnum_f32_1: ++** st1w z0\.s, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_vnum_f32_1, svfloat32_t, float32_t, ++ svst1_vnum_f32 (p0, x0, 1, z0), ++ svst1_vnum (p0, x0, 1, z0)) ++ ++/* ++** st1_vnum_f32_7: ++** st1w z0\.s, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_vnum_f32_7, svfloat32_t, float32_t, ++ svst1_vnum_f32 (p0, x0, 7, z0), ++ svst1_vnum (p0, x0, 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1_vnum_f32_8: ++** incb x0, all, mul #8 ++** st1w z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_vnum_f32_8, svfloat32_t, float32_t, ++ svst1_vnum_f32 (p0, x0, 8, z0), ++ svst1_vnum (p0, x0, 8, z0)) ++ ++/* ++** st1_vnum_f32_m1: ++** st1w z0\.s, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_vnum_f32_m1, svfloat32_t, float32_t, ++ svst1_vnum_f32 (p0, x0, -1, z0), ++ svst1_vnum (p0, x0, -1, z0)) ++ ++/* ++** st1_vnum_f32_m8: ++** st1w z0\.s, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_vnum_f32_m8, svfloat32_t, float32_t, ++ svst1_vnum_f32 (p0, x0, -8, z0), ++ svst1_vnum (p0, x0, -8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1_vnum_f32_m9: ++** decb x0, all, mul #9 ++** st1w z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_vnum_f32_m9, svfloat32_t, float32_t, ++ svst1_vnum_f32 (p0, x0, -9, z0), ++ svst1_vnum (p0, x0, -9, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** st1_vnum_f32_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** st1w z0\.s, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (st1_vnum_f32_x1, svfloat32_t, float32_t, ++ svst1_vnum_f32 (p0, x0, x1, z0), ++ svst1_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_f64.c +new file mode 100644 +index 000000000..486f92beb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_f64.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st1_f64_base: ++** st1d z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_f64_base, svfloat64_t, float64_t, ++ svst1_f64 (p0, x0, z0), ++ svst1 (p0, x0, z0)) ++ ++/* ++** st1_f64_index: ++** st1d z0\.d, p0, \[x0, x1, lsl 3\] ++** ret ++*/ ++TEST_STORE (st1_f64_index, svfloat64_t, float64_t, ++ svst1_f64 (p0, x0 + x1, z0), ++ svst1 (p0, x0 + x1, z0)) ++ ++/* ++** st1_f64_1: ++** st1d z0\.d, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_f64_1, svfloat64_t, float64_t, ++ svst1_f64 (p0, x0 + svcntd (), z0), ++ svst1 (p0, x0 + svcntd (), z0)) ++ ++/* ++** st1_f64_7: ++** st1d z0\.d, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_f64_7, svfloat64_t, float64_t, ++ svst1_f64 (p0, x0 + svcntd () * 7, z0), ++ svst1 (p0, x0 + svcntd () * 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1_f64_8: ++** incb x0, all, mul #8 ++** st1d z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_f64_8, svfloat64_t, float64_t, ++ svst1_f64 (p0, x0 + svcntd () * 8, z0), ++ svst1 (p0, x0 + svcntd () * 8, z0)) ++ ++/* ++** st1_f64_m1: ++** st1d z0\.d, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_f64_m1, svfloat64_t, float64_t, ++ svst1_f64 (p0, x0 - svcntd (), z0), ++ svst1 (p0, x0 - svcntd (), z0)) ++ ++/* ++** st1_f64_m8: ++** st1d z0\.d, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_f64_m8, svfloat64_t, float64_t, ++ svst1_f64 (p0, x0 - svcntd () * 8, z0), ++ svst1 (p0, x0 - svcntd () * 8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1_f64_m9: ++** decb x0, all, mul #9 ++** st1d z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_f64_m9, svfloat64_t, float64_t, ++ svst1_f64 (p0, x0 - svcntd () * 9, z0), ++ svst1 (p0, x0 - svcntd () * 9, z0)) ++ ++/* ++** st1_vnum_f64_0: ++** st1d z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_vnum_f64_0, svfloat64_t, float64_t, ++ svst1_vnum_f64 (p0, x0, 0, z0), ++ svst1_vnum (p0, x0, 0, z0)) ++ ++/* ++** st1_vnum_f64_1: ++** st1d z0\.d, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_vnum_f64_1, svfloat64_t, float64_t, ++ svst1_vnum_f64 (p0, x0, 1, z0), ++ svst1_vnum (p0, x0, 1, z0)) ++ ++/* ++** st1_vnum_f64_7: ++** st1d z0\.d, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_vnum_f64_7, svfloat64_t, float64_t, ++ svst1_vnum_f64 (p0, x0, 7, z0), ++ svst1_vnum (p0, x0, 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1_vnum_f64_8: ++** incb x0, all, mul #8 ++** st1d z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_vnum_f64_8, svfloat64_t, float64_t, ++ svst1_vnum_f64 (p0, x0, 8, z0), ++ svst1_vnum (p0, x0, 8, z0)) ++ ++/* ++** st1_vnum_f64_m1: ++** st1d z0\.d, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_vnum_f64_m1, svfloat64_t, float64_t, ++ svst1_vnum_f64 (p0, x0, -1, z0), ++ svst1_vnum (p0, x0, -1, z0)) ++ ++/* ++** st1_vnum_f64_m8: ++** st1d z0\.d, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_vnum_f64_m8, svfloat64_t, float64_t, ++ svst1_vnum_f64 (p0, x0, -8, z0), ++ svst1_vnum (p0, x0, -8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1_vnum_f64_m9: ++** decb x0, all, mul #9 ++** st1d z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_vnum_f64_m9, svfloat64_t, float64_t, ++ svst1_vnum_f64 (p0, x0, -9, z0), ++ svst1_vnum (p0, x0, -9, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** st1_vnum_f64_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** st1d z0\.d, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (st1_vnum_f64_x1, svfloat64_t, float64_t, ++ svst1_vnum_f64 (p0, x0, x1, z0), ++ svst1_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_s16.c +new file mode 100644 +index 000000000..7d4ac25d2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_s16.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st1_s16_base: ++** st1h z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_s16_base, svint16_t, int16_t, ++ svst1_s16 (p0, x0, z0), ++ svst1 (p0, x0, z0)) ++ ++/* ++** st1_s16_index: ++** st1h z0\.h, p0, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_STORE (st1_s16_index, svint16_t, int16_t, ++ svst1_s16 (p0, x0 + x1, z0), ++ svst1 (p0, x0 + x1, z0)) ++ ++/* ++** st1_s16_1: ++** st1h z0\.h, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_s16_1, svint16_t, int16_t, ++ svst1_s16 (p0, x0 + svcnth (), z0), ++ svst1 (p0, x0 + svcnth (), z0)) ++ ++/* ++** st1_s16_7: ++** st1h z0\.h, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_s16_7, svint16_t, int16_t, ++ svst1_s16 (p0, x0 + svcnth () * 7, z0), ++ svst1 (p0, x0 + svcnth () * 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1_s16_8: ++** incb x0, all, mul #8 ++** st1h z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_s16_8, svint16_t, int16_t, ++ svst1_s16 (p0, x0 + svcnth () * 8, z0), ++ svst1 (p0, x0 + svcnth () * 8, z0)) ++ ++/* ++** st1_s16_m1: ++** st1h z0\.h, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_s16_m1, svint16_t, int16_t, ++ svst1_s16 (p0, x0 - svcnth (), z0), ++ svst1 (p0, x0 - svcnth (), z0)) ++ ++/* ++** st1_s16_m8: ++** st1h z0\.h, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_s16_m8, svint16_t, int16_t, ++ svst1_s16 (p0, x0 - svcnth () * 8, z0), ++ svst1 (p0, x0 - svcnth () * 8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1_s16_m9: ++** decb x0, all, mul #9 ++** st1h z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_s16_m9, svint16_t, int16_t, ++ svst1_s16 (p0, x0 - svcnth () * 9, z0), ++ svst1 (p0, x0 - svcnth () * 9, z0)) ++ ++/* ++** st1_vnum_s16_0: ++** st1h z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_vnum_s16_0, svint16_t, int16_t, ++ svst1_vnum_s16 (p0, x0, 0, z0), ++ svst1_vnum (p0, x0, 0, z0)) ++ ++/* ++** st1_vnum_s16_1: ++** st1h z0\.h, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_vnum_s16_1, svint16_t, int16_t, ++ svst1_vnum_s16 (p0, x0, 1, z0), ++ svst1_vnum (p0, x0, 1, z0)) ++ ++/* ++** st1_vnum_s16_7: ++** st1h z0\.h, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_vnum_s16_7, svint16_t, int16_t, ++ svst1_vnum_s16 (p0, x0, 7, z0), ++ svst1_vnum (p0, x0, 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1_vnum_s16_8: ++** incb x0, all, mul #8 ++** st1h z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_vnum_s16_8, svint16_t, int16_t, ++ svst1_vnum_s16 (p0, x0, 8, z0), ++ svst1_vnum (p0, x0, 8, z0)) ++ ++/* ++** st1_vnum_s16_m1: ++** st1h z0\.h, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_vnum_s16_m1, svint16_t, int16_t, ++ svst1_vnum_s16 (p0, x0, -1, z0), ++ svst1_vnum (p0, x0, -1, z0)) ++ ++/* ++** st1_vnum_s16_m8: ++** st1h z0\.h, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_vnum_s16_m8, svint16_t, int16_t, ++ svst1_vnum_s16 (p0, x0, -8, z0), ++ svst1_vnum (p0, x0, -8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1_vnum_s16_m9: ++** decb x0, all, mul #9 ++** st1h z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_vnum_s16_m9, svint16_t, int16_t, ++ svst1_vnum_s16 (p0, x0, -9, z0), ++ svst1_vnum (p0, x0, -9, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** st1_vnum_s16_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** st1h z0\.h, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (st1_vnum_s16_x1, svint16_t, int16_t, ++ svst1_vnum_s16 (p0, x0, x1, z0), ++ svst1_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_s32.c +new file mode 100644 +index 000000000..e2bcc3403 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_s32.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st1_s32_base: ++** st1w z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_s32_base, svint32_t, int32_t, ++ svst1_s32 (p0, x0, z0), ++ svst1 (p0, x0, z0)) ++ ++/* ++** st1_s32_index: ++** st1w z0\.s, p0, \[x0, x1, lsl 2\] ++** ret ++*/ ++TEST_STORE (st1_s32_index, svint32_t, int32_t, ++ svst1_s32 (p0, x0 + x1, z0), ++ svst1 (p0, x0 + x1, z0)) ++ ++/* ++** st1_s32_1: ++** st1w z0\.s, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_s32_1, svint32_t, int32_t, ++ svst1_s32 (p0, x0 + svcntw (), z0), ++ svst1 (p0, x0 + svcntw (), z0)) ++ ++/* ++** st1_s32_7: ++** st1w z0\.s, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_s32_7, svint32_t, int32_t, ++ svst1_s32 (p0, x0 + svcntw () * 7, z0), ++ svst1 (p0, x0 + svcntw () * 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1_s32_8: ++** incb x0, all, mul #8 ++** st1w z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_s32_8, svint32_t, int32_t, ++ svst1_s32 (p0, x0 + svcntw () * 8, z0), ++ svst1 (p0, x0 + svcntw () * 8, z0)) ++ ++/* ++** st1_s32_m1: ++** st1w z0\.s, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_s32_m1, svint32_t, int32_t, ++ svst1_s32 (p0, x0 - svcntw (), z0), ++ svst1 (p0, x0 - svcntw (), z0)) ++ ++/* ++** st1_s32_m8: ++** st1w z0\.s, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_s32_m8, svint32_t, int32_t, ++ svst1_s32 (p0, x0 - svcntw () * 8, z0), ++ svst1 (p0, x0 - svcntw () * 8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1_s32_m9: ++** decb x0, all, mul #9 ++** st1w z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_s32_m9, svint32_t, int32_t, ++ svst1_s32 (p0, x0 - svcntw () * 9, z0), ++ svst1 (p0, x0 - svcntw () * 9, z0)) ++ ++/* ++** st1_vnum_s32_0: ++** st1w z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_vnum_s32_0, svint32_t, int32_t, ++ svst1_vnum_s32 (p0, x0, 0, z0), ++ svst1_vnum (p0, x0, 0, z0)) ++ ++/* ++** st1_vnum_s32_1: ++** st1w z0\.s, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_vnum_s32_1, svint32_t, int32_t, ++ svst1_vnum_s32 (p0, x0, 1, z0), ++ svst1_vnum (p0, x0, 1, z0)) ++ ++/* ++** st1_vnum_s32_7: ++** st1w z0\.s, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_vnum_s32_7, svint32_t, int32_t, ++ svst1_vnum_s32 (p0, x0, 7, z0), ++ svst1_vnum (p0, x0, 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1_vnum_s32_8: ++** incb x0, all, mul #8 ++** st1w z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_vnum_s32_8, svint32_t, int32_t, ++ svst1_vnum_s32 (p0, x0, 8, z0), ++ svst1_vnum (p0, x0, 8, z0)) ++ ++/* ++** st1_vnum_s32_m1: ++** st1w z0\.s, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_vnum_s32_m1, svint32_t, int32_t, ++ svst1_vnum_s32 (p0, x0, -1, z0), ++ svst1_vnum (p0, x0, -1, z0)) ++ ++/* ++** st1_vnum_s32_m8: ++** st1w z0\.s, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_vnum_s32_m8, svint32_t, int32_t, ++ svst1_vnum_s32 (p0, x0, -8, z0), ++ svst1_vnum (p0, x0, -8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1_vnum_s32_m9: ++** decb x0, all, mul #9 ++** st1w z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_vnum_s32_m9, svint32_t, int32_t, ++ svst1_vnum_s32 (p0, x0, -9, z0), ++ svst1_vnum (p0, x0, -9, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** st1_vnum_s32_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** st1w z0\.s, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (st1_vnum_s32_x1, svint32_t, int32_t, ++ svst1_vnum_s32 (p0, x0, x1, z0), ++ svst1_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_s64.c +new file mode 100644 +index 000000000..8e0b69f73 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_s64.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st1_s64_base: ++** st1d z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_s64_base, svint64_t, int64_t, ++ svst1_s64 (p0, x0, z0), ++ svst1 (p0, x0, z0)) ++ ++/* ++** st1_s64_index: ++** st1d z0\.d, p0, \[x0, x1, lsl 3\] ++** ret ++*/ ++TEST_STORE (st1_s64_index, svint64_t, int64_t, ++ svst1_s64 (p0, x0 + x1, z0), ++ svst1 (p0, x0 + x1, z0)) ++ ++/* ++** st1_s64_1: ++** st1d z0\.d, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_s64_1, svint64_t, int64_t, ++ svst1_s64 (p0, x0 + svcntd (), z0), ++ svst1 (p0, x0 + svcntd (), z0)) ++ ++/* ++** st1_s64_7: ++** st1d z0\.d, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_s64_7, svint64_t, int64_t, ++ svst1_s64 (p0, x0 + svcntd () * 7, z0), ++ svst1 (p0, x0 + svcntd () * 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1_s64_8: ++** incb x0, all, mul #8 ++** st1d z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_s64_8, svint64_t, int64_t, ++ svst1_s64 (p0, x0 + svcntd () * 8, z0), ++ svst1 (p0, x0 + svcntd () * 8, z0)) ++ ++/* ++** st1_s64_m1: ++** st1d z0\.d, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_s64_m1, svint64_t, int64_t, ++ svst1_s64 (p0, x0 - svcntd (), z0), ++ svst1 (p0, x0 - svcntd (), z0)) ++ ++/* ++** st1_s64_m8: ++** st1d z0\.d, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_s64_m8, svint64_t, int64_t, ++ svst1_s64 (p0, x0 - svcntd () * 8, z0), ++ svst1 (p0, x0 - svcntd () * 8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1_s64_m9: ++** decb x0, all, mul #9 ++** st1d z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_s64_m9, svint64_t, int64_t, ++ svst1_s64 (p0, x0 - svcntd () * 9, z0), ++ svst1 (p0, x0 - svcntd () * 9, z0)) ++ ++/* ++** st1_vnum_s64_0: ++** st1d z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_vnum_s64_0, svint64_t, int64_t, ++ svst1_vnum_s64 (p0, x0, 0, z0), ++ svst1_vnum (p0, x0, 0, z0)) ++ ++/* ++** st1_vnum_s64_1: ++** st1d z0\.d, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_vnum_s64_1, svint64_t, int64_t, ++ svst1_vnum_s64 (p0, x0, 1, z0), ++ svst1_vnum (p0, x0, 1, z0)) ++ ++/* ++** st1_vnum_s64_7: ++** st1d z0\.d, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_vnum_s64_7, svint64_t, int64_t, ++ svst1_vnum_s64 (p0, x0, 7, z0), ++ svst1_vnum (p0, x0, 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1_vnum_s64_8: ++** incb x0, all, mul #8 ++** st1d z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_vnum_s64_8, svint64_t, int64_t, ++ svst1_vnum_s64 (p0, x0, 8, z0), ++ svst1_vnum (p0, x0, 8, z0)) ++ ++/* ++** st1_vnum_s64_m1: ++** st1d z0\.d, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_vnum_s64_m1, svint64_t, int64_t, ++ svst1_vnum_s64 (p0, x0, -1, z0), ++ svst1_vnum (p0, x0, -1, z0)) ++ ++/* ++** st1_vnum_s64_m8: ++** st1d z0\.d, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_vnum_s64_m8, svint64_t, int64_t, ++ svst1_vnum_s64 (p0, x0, -8, z0), ++ svst1_vnum (p0, x0, -8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1_vnum_s64_m9: ++** decb x0, all, mul #9 ++** st1d z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_vnum_s64_m9, svint64_t, int64_t, ++ svst1_vnum_s64 (p0, x0, -9, z0), ++ svst1_vnum (p0, x0, -9, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** st1_vnum_s64_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** st1d z0\.d, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (st1_vnum_s64_x1, svint64_t, int64_t, ++ svst1_vnum_s64 (p0, x0, x1, z0), ++ svst1_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_s8.c +new file mode 100644 +index 000000000..4155683ab +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_s8.c +@@ -0,0 +1,162 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st1_s8_base: ++** st1b z0\.b, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_s8_base, svint8_t, int8_t, ++ svst1_s8 (p0, x0, z0), ++ svst1 (p0, x0, z0)) ++ ++/* ++** st1_s8_index: ++** st1b z0\.b, p0, \[x0, x1\] ++** ret ++*/ ++TEST_STORE (st1_s8_index, svint8_t, int8_t, ++ svst1_s8 (p0, x0 + x1, z0), ++ svst1 (p0, x0 + x1, z0)) ++ ++/* ++** st1_s8_1: ++** st1b z0\.b, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_s8_1, svint8_t, int8_t, ++ svst1_s8 (p0, x0 + svcntb (), z0), ++ svst1 (p0, x0 + svcntb (), z0)) ++ ++/* ++** st1_s8_7: ++** st1b z0\.b, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_s8_7, svint8_t, int8_t, ++ svst1_s8 (p0, x0 + svcntb () * 7, z0), ++ svst1 (p0, x0 + svcntb () * 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1_s8_8: ++** incb x0, all, mul #8 ++** st1b z0\.b, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_s8_8, svint8_t, int8_t, ++ svst1_s8 (p0, x0 + svcntb () * 8, z0), ++ svst1 (p0, x0 + svcntb () * 8, z0)) ++ ++/* ++** st1_s8_m1: ++** st1b z0\.b, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_s8_m1, svint8_t, int8_t, ++ svst1_s8 (p0, x0 - svcntb (), z0), ++ svst1 (p0, x0 - svcntb (), z0)) ++ ++/* ++** st1_s8_m8: ++** st1b z0\.b, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_s8_m8, svint8_t, int8_t, ++ svst1_s8 (p0, x0 - svcntb () * 8, z0), ++ svst1 (p0, x0 - svcntb () * 8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1_s8_m9: ++** decb x0, all, mul #9 ++** st1b z0\.b, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_s8_m9, svint8_t, int8_t, ++ svst1_s8 (p0, x0 - svcntb () * 9, z0), ++ svst1 (p0, x0 - svcntb () * 9, z0)) ++ ++/* ++** st1_vnum_s8_0: ++** st1b z0\.b, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_vnum_s8_0, svint8_t, int8_t, ++ svst1_vnum_s8 (p0, x0, 0, z0), ++ svst1_vnum (p0, x0, 0, z0)) ++ ++/* ++** st1_vnum_s8_1: ++** st1b z0\.b, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_vnum_s8_1, svint8_t, int8_t, ++ svst1_vnum_s8 (p0, x0, 1, z0), ++ svst1_vnum (p0, x0, 1, z0)) ++ ++/* ++** st1_vnum_s8_7: ++** st1b z0\.b, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_vnum_s8_7, svint8_t, int8_t, ++ svst1_vnum_s8 (p0, x0, 7, z0), ++ svst1_vnum (p0, x0, 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1_vnum_s8_8: ++** incb x0, all, mul #8 ++** st1b z0\.b, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_vnum_s8_8, svint8_t, int8_t, ++ svst1_vnum_s8 (p0, x0, 8, z0), ++ svst1_vnum (p0, x0, 8, z0)) ++ ++/* ++** st1_vnum_s8_m1: ++** st1b z0\.b, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_vnum_s8_m1, svint8_t, int8_t, ++ svst1_vnum_s8 (p0, x0, -1, z0), ++ svst1_vnum (p0, x0, -1, z0)) ++ ++/* ++** st1_vnum_s8_m8: ++** st1b z0\.b, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_vnum_s8_m8, svint8_t, int8_t, ++ svst1_vnum_s8 (p0, x0, -8, z0), ++ svst1_vnum (p0, x0, -8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1_vnum_s8_m9: ++** decb x0, all, mul #9 ++** st1b z0\.b, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_vnum_s8_m9, svint8_t, int8_t, ++ svst1_vnum_s8 (p0, x0, -9, z0), ++ svst1_vnum (p0, x0, -9, z0)) ++ ++/* ++** st1_vnum_s8_x1: ++** cntb (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** st1b z0\.b, p0, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** st1b z0\.b, p0, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_STORE (st1_vnum_s8_x1, svint8_t, int8_t, ++ svst1_vnum_s8 (p0, x0, x1, z0), ++ svst1_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_f32.c +new file mode 100644 +index 000000000..cb6774ad0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_f32.c +@@ -0,0 +1,227 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st1_scatter_f32: ++** st1w z0\.s, p0, \[z1\.s\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_f32, svfloat32_t, svuint32_t, ++ svst1_scatter_u32base_f32 (p0, z1, z0), ++ svst1_scatter (p0, z1, z0)) ++ ++/* ++** st1_scatter_x0_f32_offset: ++** st1w z0\.s, p0, \[x0, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_x0_f32_offset, svfloat32_t, svuint32_t, ++ svst1_scatter_u32base_offset_f32 (p0, z1, x0, z0), ++ svst1_scatter_offset (p0, z1, x0, z0)) ++ ++/* ++** st1_scatter_m4_f32_offset: ++** mov (x[0-9]+), #?-4 ++** st1w z0\.s, p0, \[\1, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_m4_f32_offset, svfloat32_t, svuint32_t, ++ svst1_scatter_u32base_offset_f32 (p0, z1, -4, z0), ++ svst1_scatter_offset (p0, z1, -4, z0)) ++ ++/* ++** st1_scatter_0_f32_offset: ++** st1w z0\.s, p0, \[z1\.s\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_0_f32_offset, svfloat32_t, svuint32_t, ++ svst1_scatter_u32base_offset_f32 (p0, z1, 0, z0), ++ svst1_scatter_offset (p0, z1, 0, z0)) ++ ++/* ++** st1_scatter_5_f32_offset: ++** mov (x[0-9]+), #?5 ++** st1w z0\.s, p0, \[\1, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_5_f32_offset, svfloat32_t, svuint32_t, ++ svst1_scatter_u32base_offset_f32 (p0, z1, 5, z0), ++ svst1_scatter_offset (p0, z1, 5, z0)) ++ ++/* ++** st1_scatter_6_f32_offset: ++** mov (x[0-9]+), #?6 ++** st1w z0\.s, p0, \[\1, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_6_f32_offset, svfloat32_t, svuint32_t, ++ svst1_scatter_u32base_offset_f32 (p0, z1, 6, z0), ++ svst1_scatter_offset (p0, z1, 6, z0)) ++ ++/* ++** st1_scatter_7_f32_offset: ++** mov (x[0-9]+), #?7 ++** st1w z0\.s, p0, \[\1, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_7_f32_offset, svfloat32_t, svuint32_t, ++ svst1_scatter_u32base_offset_f32 (p0, z1, 7, z0), ++ svst1_scatter_offset (p0, z1, 7, z0)) ++ ++/* ++** st1_scatter_8_f32_offset: ++** st1w z0\.s, p0, \[z1\.s, #8\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_8_f32_offset, svfloat32_t, svuint32_t, ++ svst1_scatter_u32base_offset_f32 (p0, z1, 8, z0), ++ svst1_scatter_offset (p0, z1, 8, z0)) ++ ++/* ++** st1_scatter_124_f32_offset: ++** st1w z0\.s, p0, \[z1\.s, #124\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_124_f32_offset, svfloat32_t, svuint32_t, ++ svst1_scatter_u32base_offset_f32 (p0, z1, 124, z0), ++ svst1_scatter_offset (p0, z1, 124, z0)) ++ ++/* ++** st1_scatter_128_f32_offset: ++** mov (x[0-9]+), #?128 ++** st1w z0\.s, p0, \[\1, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_128_f32_offset, svfloat32_t, svuint32_t, ++ svst1_scatter_u32base_offset_f32 (p0, z1, 128, z0), ++ svst1_scatter_offset (p0, z1, 128, z0)) ++ ++/* ++** st1_scatter_x0_f32_index: ++** lsl (x[0-9]+), x0, #?2 ++** st1w z0\.s, p0, \[\1, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_x0_f32_index, svfloat32_t, svuint32_t, ++ svst1_scatter_u32base_index_f32 (p0, z1, x0, z0), ++ svst1_scatter_index (p0, z1, x0, z0)) ++ ++/* ++** st1_scatter_m1_f32_index: ++** mov (x[0-9]+), #?-4 ++** st1w z0\.s, p0, \[\1, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_m1_f32_index, svfloat32_t, svuint32_t, ++ svst1_scatter_u32base_index_f32 (p0, z1, -1, z0), ++ svst1_scatter_index (p0, z1, -1, z0)) ++ ++/* ++** st1_scatter_0_f32_index: ++** st1w z0\.s, p0, \[z1\.s\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_0_f32_index, svfloat32_t, svuint32_t, ++ svst1_scatter_u32base_index_f32 (p0, z1, 0, z0), ++ svst1_scatter_index (p0, z1, 0, z0)) ++ ++/* ++** st1_scatter_5_f32_index: ++** st1w z0\.s, p0, \[z1\.s, #20\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_5_f32_index, svfloat32_t, svuint32_t, ++ svst1_scatter_u32base_index_f32 (p0, z1, 5, z0), ++ svst1_scatter_index (p0, z1, 5, z0)) ++ ++/* ++** st1_scatter_31_f32_index: ++** st1w z0\.s, p0, \[z1\.s, #124\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_31_f32_index, svfloat32_t, svuint32_t, ++ svst1_scatter_u32base_index_f32 (p0, z1, 31, z0), ++ svst1_scatter_index (p0, z1, 31, z0)) ++ ++/* ++** st1_scatter_32_f32_index: ++** mov (x[0-9]+), #?128 ++** st1w z0\.s, p0, \[\1, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_32_f32_index, svfloat32_t, svuint32_t, ++ svst1_scatter_u32base_index_f32 (p0, z1, 32, z0), ++ svst1_scatter_index (p0, z1, 32, z0)) ++ ++/* ++** st1_scatter_x0_f32_s32offset: ++** st1w z0\.s, p0, \[x0, z1\.s, sxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_x0_f32_s32offset, svfloat32_t, float32_t, svint32_t, ++ svst1_scatter_s32offset_f32 (p0, x0, z1, z0), ++ svst1_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1_scatter_f32_s32offset: ++** st1w z0\.s, p0, \[x0, z1\.s, sxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_f32_s32offset, svfloat32_t, float32_t, svint32_t, ++ svst1_scatter_s32offset_f32 (p0, x0, z1, z0), ++ svst1_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1_scatter_x0_f32_u32offset: ++** st1w z0\.s, p0, \[x0, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_x0_f32_u32offset, svfloat32_t, float32_t, svuint32_t, ++ svst1_scatter_u32offset_f32 (p0, x0, z1, z0), ++ svst1_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1_scatter_f32_u32offset: ++** st1w z0\.s, p0, \[x0, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_f32_u32offset, svfloat32_t, float32_t, svuint32_t, ++ svst1_scatter_u32offset_f32 (p0, x0, z1, z0), ++ svst1_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1_scatter_x0_f32_s32index: ++** st1w z0\.s, p0, \[x0, z1\.s, sxtw 2\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_x0_f32_s32index, svfloat32_t, float32_t, svint32_t, ++ svst1_scatter_s32index_f32 (p0, x0, z1, z0), ++ svst1_scatter_index (p0, x0, z1, z0)) ++ ++/* ++** st1_scatter_f32_s32index: ++** st1w z0\.s, p0, \[x0, z1\.s, sxtw 2\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_f32_s32index, svfloat32_t, float32_t, svint32_t, ++ svst1_scatter_s32index_f32 (p0, x0, z1, z0), ++ svst1_scatter_index (p0, x0, z1, z0)) ++ ++/* ++** st1_scatter_x0_f32_u32index: ++** st1w z0\.s, p0, \[x0, z1\.s, uxtw 2\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_x0_f32_u32index, svfloat32_t, float32_t, svuint32_t, ++ svst1_scatter_u32index_f32 (p0, x0, z1, z0), ++ svst1_scatter_index (p0, x0, z1, z0)) ++ ++/* ++** st1_scatter_f32_u32index: ++** st1w z0\.s, p0, \[x0, z1\.s, uxtw 2\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_f32_u32index, svfloat32_t, float32_t, svuint32_t, ++ svst1_scatter_u32index_f32 (p0, x0, z1, z0), ++ svst1_scatter_index (p0, x0, z1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_f64.c +new file mode 100644 +index 000000000..fe978bbe5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_f64.c +@@ -0,0 +1,303 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st1_scatter_f64: ++** st1d z0\.d, p0, \[z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_f64, svfloat64_t, svuint64_t, ++ svst1_scatter_u64base_f64 (p0, z1, z0), ++ svst1_scatter (p0, z1, z0)) ++ ++/* ++** st1_scatter_x0_f64_offset: ++** st1d z0\.d, p0, \[x0, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_x0_f64_offset, svfloat64_t, svuint64_t, ++ svst1_scatter_u64base_offset_f64 (p0, z1, x0, z0), ++ svst1_scatter_offset (p0, z1, x0, z0)) ++ ++/* ++** st1_scatter_m8_f64_offset: ++** mov (x[0-9]+), #?-8 ++** st1d z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_m8_f64_offset, svfloat64_t, svuint64_t, ++ svst1_scatter_u64base_offset_f64 (p0, z1, -8, z0), ++ svst1_scatter_offset (p0, z1, -8, z0)) ++ ++/* ++** st1_scatter_0_f64_offset: ++** st1d z0\.d, p0, \[z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_0_f64_offset, svfloat64_t, svuint64_t, ++ svst1_scatter_u64base_offset_f64 (p0, z1, 0, z0), ++ svst1_scatter_offset (p0, z1, 0, z0)) ++ ++/* ++** st1_scatter_9_f64_offset: ++** mov (x[0-9]+), #?9 ++** st1d z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_9_f64_offset, svfloat64_t, svuint64_t, ++ svst1_scatter_u64base_offset_f64 (p0, z1, 9, z0), ++ svst1_scatter_offset (p0, z1, 9, z0)) ++ ++/* ++** st1_scatter_10_f64_offset: ++** mov (x[0-9]+), #?10 ++** st1d z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_10_f64_offset, svfloat64_t, svuint64_t, ++ svst1_scatter_u64base_offset_f64 (p0, z1, 10, z0), ++ svst1_scatter_offset (p0, z1, 10, z0)) ++ ++/* ++** st1_scatter_11_f64_offset: ++** mov (x[0-9]+), #?11 ++** st1d z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_11_f64_offset, svfloat64_t, svuint64_t, ++ svst1_scatter_u64base_offset_f64 (p0, z1, 11, z0), ++ svst1_scatter_offset (p0, z1, 11, z0)) ++ ++/* ++** st1_scatter_12_f64_offset: ++** mov (x[0-9]+), #?12 ++** st1d z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_12_f64_offset, svfloat64_t, svuint64_t, ++ svst1_scatter_u64base_offset_f64 (p0, z1, 12, z0), ++ svst1_scatter_offset (p0, z1, 12, z0)) ++ ++/* ++** st1_scatter_13_f64_offset: ++** mov (x[0-9]+), #?13 ++** st1d z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_13_f64_offset, svfloat64_t, svuint64_t, ++ svst1_scatter_u64base_offset_f64 (p0, z1, 13, z0), ++ svst1_scatter_offset (p0, z1, 13, z0)) ++ ++/* ++** st1_scatter_14_f64_offset: ++** mov (x[0-9]+), #?14 ++** st1d z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_14_f64_offset, svfloat64_t, svuint64_t, ++ svst1_scatter_u64base_offset_f64 (p0, z1, 14, z0), ++ svst1_scatter_offset (p0, z1, 14, z0)) ++ ++/* ++** st1_scatter_15_f64_offset: ++** mov (x[0-9]+), #?15 ++** st1d z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_15_f64_offset, svfloat64_t, svuint64_t, ++ svst1_scatter_u64base_offset_f64 (p0, z1, 15, z0), ++ svst1_scatter_offset (p0, z1, 15, z0)) ++ ++/* ++** st1_scatter_16_f64_offset: ++** st1d z0\.d, p0, \[z1\.d, #16\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_16_f64_offset, svfloat64_t, svuint64_t, ++ svst1_scatter_u64base_offset_f64 (p0, z1, 16, z0), ++ svst1_scatter_offset (p0, z1, 16, z0)) ++ ++/* ++** st1_scatter_248_f64_offset: ++** st1d z0\.d, p0, \[z1\.d, #248\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_248_f64_offset, svfloat64_t, svuint64_t, ++ svst1_scatter_u64base_offset_f64 (p0, z1, 248, z0), ++ svst1_scatter_offset (p0, z1, 248, z0)) ++ ++/* ++** st1_scatter_256_f64_offset: ++** mov (x[0-9]+), #?256 ++** st1d z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_256_f64_offset, svfloat64_t, svuint64_t, ++ svst1_scatter_u64base_offset_f64 (p0, z1, 256, z0), ++ svst1_scatter_offset (p0, z1, 256, z0)) ++ ++/* ++** st1_scatter_x0_f64_index: ++** lsl (x[0-9]+), x0, #?3 ++** st1d z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_x0_f64_index, svfloat64_t, svuint64_t, ++ svst1_scatter_u64base_index_f64 (p0, z1, x0, z0), ++ svst1_scatter_index (p0, z1, x0, z0)) ++ ++/* ++** st1_scatter_m1_f64_index: ++** mov (x[0-9]+), #?-8 ++** st1d z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_m1_f64_index, svfloat64_t, svuint64_t, ++ svst1_scatter_u64base_index_f64 (p0, z1, -1, z0), ++ svst1_scatter_index (p0, z1, -1, z0)) ++ ++/* ++** st1_scatter_0_f64_index: ++** st1d z0\.d, p0, \[z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_0_f64_index, svfloat64_t, svuint64_t, ++ svst1_scatter_u64base_index_f64 (p0, z1, 0, z0), ++ svst1_scatter_index (p0, z1, 0, z0)) ++ ++/* ++** st1_scatter_5_f64_index: ++** st1d z0\.d, p0, \[z1\.d, #40\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_5_f64_index, svfloat64_t, svuint64_t, ++ svst1_scatter_u64base_index_f64 (p0, z1, 5, z0), ++ svst1_scatter_index (p0, z1, 5, z0)) ++ ++/* ++** st1_scatter_31_f64_index: ++** st1d z0\.d, p0, \[z1\.d, #248\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_31_f64_index, svfloat64_t, svuint64_t, ++ svst1_scatter_u64base_index_f64 (p0, z1, 31, z0), ++ svst1_scatter_index (p0, z1, 31, z0)) ++ ++/* ++** st1_scatter_32_f64_index: ++** mov (x[0-9]+), #?256 ++** st1d z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_32_f64_index, svfloat64_t, svuint64_t, ++ svst1_scatter_u64base_index_f64 (p0, z1, 32, z0), ++ svst1_scatter_index (p0, z1, 32, z0)) ++ ++/* ++** st1_scatter_x0_f64_s64offset: ++** st1d z0\.d, p0, \[x0, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_x0_f64_s64offset, svfloat64_t, float64_t, svint64_t, ++ svst1_scatter_s64offset_f64 (p0, x0, z1, z0), ++ svst1_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1_scatter_f64_s64offset: ++** st1d z0\.d, p0, \[x0, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_f64_s64offset, svfloat64_t, float64_t, svint64_t, ++ svst1_scatter_s64offset_f64 (p0, x0, z1, z0), ++ svst1_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1_scatter_ext_f64_s64offset: ++** st1d z0\.d, p0, \[x0, z1\.d, sxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_ext_f64_s64offset, svfloat64_t, float64_t, svint64_t, ++ svst1_scatter_s64offset_f64 (p0, x0, svextw_s64_x (p0, z1), z0), ++ svst1_scatter_offset (p0, x0, svextw_x (p0, z1), z0)) ++ ++/* ++** st1_scatter_x0_f64_u64offset: ++** st1d z0\.d, p0, \[x0, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_x0_f64_u64offset, svfloat64_t, float64_t, svuint64_t, ++ svst1_scatter_u64offset_f64 (p0, x0, z1, z0), ++ svst1_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1_scatter_f64_u64offset: ++** st1d z0\.d, p0, \[x0, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_f64_u64offset, svfloat64_t, float64_t, svuint64_t, ++ svst1_scatter_u64offset_f64 (p0, x0, z1, z0), ++ svst1_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1_scatter_ext_f64_u64offset: ++** st1d z0\.d, p0, \[x0, z1\.d, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_ext_f64_u64offset, svfloat64_t, float64_t, svuint64_t, ++ svst1_scatter_u64offset_f64 (p0, x0, svextw_u64_x (p0, z1), z0), ++ svst1_scatter_offset (p0, x0, svextw_x (p0, z1), z0)) ++ ++/* ++** st1_scatter_x0_f64_s64index: ++** st1d z0\.d, p0, \[x0, z1\.d, lsl 3\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_x0_f64_s64index, svfloat64_t, float64_t, svint64_t, ++ svst1_scatter_s64index_f64 (p0, x0, z1, z0), ++ svst1_scatter_index (p0, x0, z1, z0)) ++ ++/* ++** st1_scatter_f64_s64index: ++** st1d z0\.d, p0, \[x0, z1\.d, lsl 3\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_f64_s64index, svfloat64_t, float64_t, svint64_t, ++ svst1_scatter_s64index_f64 (p0, x0, z1, z0), ++ svst1_scatter_index (p0, x0, z1, z0)) ++ ++/* ++** st1_scatter_ext_f64_s64index: ++** st1d z0\.d, p0, \[x0, z1\.d, sxtw 3\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_ext_f64_s64index, svfloat64_t, float64_t, svint64_t, ++ svst1_scatter_s64index_f64 (p0, x0, svextw_s64_x (p0, z1), z0), ++ svst1_scatter_index (p0, x0, svextw_x (p0, z1), z0)) ++ ++/* ++** st1_scatter_x0_f64_u64index: ++** st1d z0\.d, p0, \[x0, z1\.d, lsl 3\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_x0_f64_u64index, svfloat64_t, float64_t, svuint64_t, ++ svst1_scatter_u64index_f64 (p0, x0, z1, z0), ++ svst1_scatter_index (p0, x0, z1, z0)) ++ ++/* ++** st1_scatter_f64_u64index: ++** st1d z0\.d, p0, \[x0, z1\.d, lsl 3\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_f64_u64index, svfloat64_t, float64_t, svuint64_t, ++ svst1_scatter_u64index_f64 (p0, x0, z1, z0), ++ svst1_scatter_index (p0, x0, z1, z0)) ++ ++/* ++** st1_scatter_ext_f64_u64index: ++** st1d z0\.d, p0, \[x0, z1\.d, uxtw 3\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_ext_f64_u64index, svfloat64_t, float64_t, svuint64_t, ++ svst1_scatter_u64index_f64 (p0, x0, svextw_u64_x (p0, z1), z0), ++ svst1_scatter_index (p0, x0, svextw_x (p0, z1), z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_s32.c +new file mode 100644 +index 000000000..d244e701a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_s32.c +@@ -0,0 +1,227 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st1_scatter_s32: ++** st1w z0\.s, p0, \[z1\.s\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_s32, svint32_t, svuint32_t, ++ svst1_scatter_u32base_s32 (p0, z1, z0), ++ svst1_scatter (p0, z1, z0)) ++ ++/* ++** st1_scatter_x0_s32_offset: ++** st1w z0\.s, p0, \[x0, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_x0_s32_offset, svint32_t, svuint32_t, ++ svst1_scatter_u32base_offset_s32 (p0, z1, x0, z0), ++ svst1_scatter_offset (p0, z1, x0, z0)) ++ ++/* ++** st1_scatter_m4_s32_offset: ++** mov (x[0-9]+), #?-4 ++** st1w z0\.s, p0, \[\1, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_m4_s32_offset, svint32_t, svuint32_t, ++ svst1_scatter_u32base_offset_s32 (p0, z1, -4, z0), ++ svst1_scatter_offset (p0, z1, -4, z0)) ++ ++/* ++** st1_scatter_0_s32_offset: ++** st1w z0\.s, p0, \[z1\.s\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_0_s32_offset, svint32_t, svuint32_t, ++ svst1_scatter_u32base_offset_s32 (p0, z1, 0, z0), ++ svst1_scatter_offset (p0, z1, 0, z0)) ++ ++/* ++** st1_scatter_5_s32_offset: ++** mov (x[0-9]+), #?5 ++** st1w z0\.s, p0, \[\1, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_5_s32_offset, svint32_t, svuint32_t, ++ svst1_scatter_u32base_offset_s32 (p0, z1, 5, z0), ++ svst1_scatter_offset (p0, z1, 5, z0)) ++ ++/* ++** st1_scatter_6_s32_offset: ++** mov (x[0-9]+), #?6 ++** st1w z0\.s, p0, \[\1, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_6_s32_offset, svint32_t, svuint32_t, ++ svst1_scatter_u32base_offset_s32 (p0, z1, 6, z0), ++ svst1_scatter_offset (p0, z1, 6, z0)) ++ ++/* ++** st1_scatter_7_s32_offset: ++** mov (x[0-9]+), #?7 ++** st1w z0\.s, p0, \[\1, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_7_s32_offset, svint32_t, svuint32_t, ++ svst1_scatter_u32base_offset_s32 (p0, z1, 7, z0), ++ svst1_scatter_offset (p0, z1, 7, z0)) ++ ++/* ++** st1_scatter_8_s32_offset: ++** st1w z0\.s, p0, \[z1\.s, #8\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_8_s32_offset, svint32_t, svuint32_t, ++ svst1_scatter_u32base_offset_s32 (p0, z1, 8, z0), ++ svst1_scatter_offset (p0, z1, 8, z0)) ++ ++/* ++** st1_scatter_124_s32_offset: ++** st1w z0\.s, p0, \[z1\.s, #124\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_124_s32_offset, svint32_t, svuint32_t, ++ svst1_scatter_u32base_offset_s32 (p0, z1, 124, z0), ++ svst1_scatter_offset (p0, z1, 124, z0)) ++ ++/* ++** st1_scatter_128_s32_offset: ++** mov (x[0-9]+), #?128 ++** st1w z0\.s, p0, \[\1, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_128_s32_offset, svint32_t, svuint32_t, ++ svst1_scatter_u32base_offset_s32 (p0, z1, 128, z0), ++ svst1_scatter_offset (p0, z1, 128, z0)) ++ ++/* ++** st1_scatter_x0_s32_index: ++** lsl (x[0-9]+), x0, #?2 ++** st1w z0\.s, p0, \[\1, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_x0_s32_index, svint32_t, svuint32_t, ++ svst1_scatter_u32base_index_s32 (p0, z1, x0, z0), ++ svst1_scatter_index (p0, z1, x0, z0)) ++ ++/* ++** st1_scatter_m1_s32_index: ++** mov (x[0-9]+), #?-4 ++** st1w z0\.s, p0, \[\1, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_m1_s32_index, svint32_t, svuint32_t, ++ svst1_scatter_u32base_index_s32 (p0, z1, -1, z0), ++ svst1_scatter_index (p0, z1, -1, z0)) ++ ++/* ++** st1_scatter_0_s32_index: ++** st1w z0\.s, p0, \[z1\.s\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_0_s32_index, svint32_t, svuint32_t, ++ svst1_scatter_u32base_index_s32 (p0, z1, 0, z0), ++ svst1_scatter_index (p0, z1, 0, z0)) ++ ++/* ++** st1_scatter_5_s32_index: ++** st1w z0\.s, p0, \[z1\.s, #20\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_5_s32_index, svint32_t, svuint32_t, ++ svst1_scatter_u32base_index_s32 (p0, z1, 5, z0), ++ svst1_scatter_index (p0, z1, 5, z0)) ++ ++/* ++** st1_scatter_31_s32_index: ++** st1w z0\.s, p0, \[z1\.s, #124\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_31_s32_index, svint32_t, svuint32_t, ++ svst1_scatter_u32base_index_s32 (p0, z1, 31, z0), ++ svst1_scatter_index (p0, z1, 31, z0)) ++ ++/* ++** st1_scatter_32_s32_index: ++** mov (x[0-9]+), #?128 ++** st1w z0\.s, p0, \[\1, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_32_s32_index, svint32_t, svuint32_t, ++ svst1_scatter_u32base_index_s32 (p0, z1, 32, z0), ++ svst1_scatter_index (p0, z1, 32, z0)) ++ ++/* ++** st1_scatter_x0_s32_s32offset: ++** st1w z0\.s, p0, \[x0, z1\.s, sxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_x0_s32_s32offset, svint32_t, int32_t, svint32_t, ++ svst1_scatter_s32offset_s32 (p0, x0, z1, z0), ++ svst1_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1_scatter_s32_s32offset: ++** st1w z0\.s, p0, \[x0, z1\.s, sxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_s32_s32offset, svint32_t, int32_t, svint32_t, ++ svst1_scatter_s32offset_s32 (p0, x0, z1, z0), ++ svst1_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1_scatter_x0_s32_u32offset: ++** st1w z0\.s, p0, \[x0, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_x0_s32_u32offset, svint32_t, int32_t, svuint32_t, ++ svst1_scatter_u32offset_s32 (p0, x0, z1, z0), ++ svst1_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1_scatter_s32_u32offset: ++** st1w z0\.s, p0, \[x0, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_s32_u32offset, svint32_t, int32_t, svuint32_t, ++ svst1_scatter_u32offset_s32 (p0, x0, z1, z0), ++ svst1_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1_scatter_x0_s32_s32index: ++** st1w z0\.s, p0, \[x0, z1\.s, sxtw 2\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_x0_s32_s32index, svint32_t, int32_t, svint32_t, ++ svst1_scatter_s32index_s32 (p0, x0, z1, z0), ++ svst1_scatter_index (p0, x0, z1, z0)) ++ ++/* ++** st1_scatter_s32_s32index: ++** st1w z0\.s, p0, \[x0, z1\.s, sxtw 2\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_s32_s32index, svint32_t, int32_t, svint32_t, ++ svst1_scatter_s32index_s32 (p0, x0, z1, z0), ++ svst1_scatter_index (p0, x0, z1, z0)) ++ ++/* ++** st1_scatter_x0_s32_u32index: ++** st1w z0\.s, p0, \[x0, z1\.s, uxtw 2\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_x0_s32_u32index, svint32_t, int32_t, svuint32_t, ++ svst1_scatter_u32index_s32 (p0, x0, z1, z0), ++ svst1_scatter_index (p0, x0, z1, z0)) ++ ++/* ++** st1_scatter_s32_u32index: ++** st1w z0\.s, p0, \[x0, z1\.s, uxtw 2\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_s32_u32index, svint32_t, int32_t, svuint32_t, ++ svst1_scatter_u32index_s32 (p0, x0, z1, z0), ++ svst1_scatter_index (p0, x0, z1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_s64.c +new file mode 100644 +index 000000000..5c4ebf440 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_s64.c +@@ -0,0 +1,303 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st1_scatter_s64: ++** st1d z0\.d, p0, \[z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_s64, svint64_t, svuint64_t, ++ svst1_scatter_u64base_s64 (p0, z1, z0), ++ svst1_scatter (p0, z1, z0)) ++ ++/* ++** st1_scatter_x0_s64_offset: ++** st1d z0\.d, p0, \[x0, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_x0_s64_offset, svint64_t, svuint64_t, ++ svst1_scatter_u64base_offset_s64 (p0, z1, x0, z0), ++ svst1_scatter_offset (p0, z1, x0, z0)) ++ ++/* ++** st1_scatter_m8_s64_offset: ++** mov (x[0-9]+), #?-8 ++** st1d z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_m8_s64_offset, svint64_t, svuint64_t, ++ svst1_scatter_u64base_offset_s64 (p0, z1, -8, z0), ++ svst1_scatter_offset (p0, z1, -8, z0)) ++ ++/* ++** st1_scatter_0_s64_offset: ++** st1d z0\.d, p0, \[z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_0_s64_offset, svint64_t, svuint64_t, ++ svst1_scatter_u64base_offset_s64 (p0, z1, 0, z0), ++ svst1_scatter_offset (p0, z1, 0, z0)) ++ ++/* ++** st1_scatter_9_s64_offset: ++** mov (x[0-9]+), #?9 ++** st1d z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_9_s64_offset, svint64_t, svuint64_t, ++ svst1_scatter_u64base_offset_s64 (p0, z1, 9, z0), ++ svst1_scatter_offset (p0, z1, 9, z0)) ++ ++/* ++** st1_scatter_10_s64_offset: ++** mov (x[0-9]+), #?10 ++** st1d z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_10_s64_offset, svint64_t, svuint64_t, ++ svst1_scatter_u64base_offset_s64 (p0, z1, 10, z0), ++ svst1_scatter_offset (p0, z1, 10, z0)) ++ ++/* ++** st1_scatter_11_s64_offset: ++** mov (x[0-9]+), #?11 ++** st1d z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_11_s64_offset, svint64_t, svuint64_t, ++ svst1_scatter_u64base_offset_s64 (p0, z1, 11, z0), ++ svst1_scatter_offset (p0, z1, 11, z0)) ++ ++/* ++** st1_scatter_12_s64_offset: ++** mov (x[0-9]+), #?12 ++** st1d z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_12_s64_offset, svint64_t, svuint64_t, ++ svst1_scatter_u64base_offset_s64 (p0, z1, 12, z0), ++ svst1_scatter_offset (p0, z1, 12, z0)) ++ ++/* ++** st1_scatter_13_s64_offset: ++** mov (x[0-9]+), #?13 ++** st1d z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_13_s64_offset, svint64_t, svuint64_t, ++ svst1_scatter_u64base_offset_s64 (p0, z1, 13, z0), ++ svst1_scatter_offset (p0, z1, 13, z0)) ++ ++/* ++** st1_scatter_14_s64_offset: ++** mov (x[0-9]+), #?14 ++** st1d z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_14_s64_offset, svint64_t, svuint64_t, ++ svst1_scatter_u64base_offset_s64 (p0, z1, 14, z0), ++ svst1_scatter_offset (p0, z1, 14, z0)) ++ ++/* ++** st1_scatter_15_s64_offset: ++** mov (x[0-9]+), #?15 ++** st1d z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_15_s64_offset, svint64_t, svuint64_t, ++ svst1_scatter_u64base_offset_s64 (p0, z1, 15, z0), ++ svst1_scatter_offset (p0, z1, 15, z0)) ++ ++/* ++** st1_scatter_16_s64_offset: ++** st1d z0\.d, p0, \[z1\.d, #16\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_16_s64_offset, svint64_t, svuint64_t, ++ svst1_scatter_u64base_offset_s64 (p0, z1, 16, z0), ++ svst1_scatter_offset (p0, z1, 16, z0)) ++ ++/* ++** st1_scatter_248_s64_offset: ++** st1d z0\.d, p0, \[z1\.d, #248\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_248_s64_offset, svint64_t, svuint64_t, ++ svst1_scatter_u64base_offset_s64 (p0, z1, 248, z0), ++ svst1_scatter_offset (p0, z1, 248, z0)) ++ ++/* ++** st1_scatter_256_s64_offset: ++** mov (x[0-9]+), #?256 ++** st1d z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_256_s64_offset, svint64_t, svuint64_t, ++ svst1_scatter_u64base_offset_s64 (p0, z1, 256, z0), ++ svst1_scatter_offset (p0, z1, 256, z0)) ++ ++/* ++** st1_scatter_x0_s64_index: ++** lsl (x[0-9]+), x0, #?3 ++** st1d z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_x0_s64_index, svint64_t, svuint64_t, ++ svst1_scatter_u64base_index_s64 (p0, z1, x0, z0), ++ svst1_scatter_index (p0, z1, x0, z0)) ++ ++/* ++** st1_scatter_m1_s64_index: ++** mov (x[0-9]+), #?-8 ++** st1d z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_m1_s64_index, svint64_t, svuint64_t, ++ svst1_scatter_u64base_index_s64 (p0, z1, -1, z0), ++ svst1_scatter_index (p0, z1, -1, z0)) ++ ++/* ++** st1_scatter_0_s64_index: ++** st1d z0\.d, p0, \[z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_0_s64_index, svint64_t, svuint64_t, ++ svst1_scatter_u64base_index_s64 (p0, z1, 0, z0), ++ svst1_scatter_index (p0, z1, 0, z0)) ++ ++/* ++** st1_scatter_5_s64_index: ++** st1d z0\.d, p0, \[z1\.d, #40\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_5_s64_index, svint64_t, svuint64_t, ++ svst1_scatter_u64base_index_s64 (p0, z1, 5, z0), ++ svst1_scatter_index (p0, z1, 5, z0)) ++ ++/* ++** st1_scatter_31_s64_index: ++** st1d z0\.d, p0, \[z1\.d, #248\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_31_s64_index, svint64_t, svuint64_t, ++ svst1_scatter_u64base_index_s64 (p0, z1, 31, z0), ++ svst1_scatter_index (p0, z1, 31, z0)) ++ ++/* ++** st1_scatter_32_s64_index: ++** mov (x[0-9]+), #?256 ++** st1d z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_32_s64_index, svint64_t, svuint64_t, ++ svst1_scatter_u64base_index_s64 (p0, z1, 32, z0), ++ svst1_scatter_index (p0, z1, 32, z0)) ++ ++/* ++** st1_scatter_x0_s64_s64offset: ++** st1d z0\.d, p0, \[x0, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_x0_s64_s64offset, svint64_t, int64_t, svint64_t, ++ svst1_scatter_s64offset_s64 (p0, x0, z1, z0), ++ svst1_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1_scatter_s64_s64offset: ++** st1d z0\.d, p0, \[x0, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_s64_s64offset, svint64_t, int64_t, svint64_t, ++ svst1_scatter_s64offset_s64 (p0, x0, z1, z0), ++ svst1_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1_scatter_ext_s64_s64offset: ++** st1d z0\.d, p0, \[x0, z1\.d, sxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_ext_s64_s64offset, svint64_t, int64_t, svint64_t, ++ svst1_scatter_s64offset_s64 (p0, x0, svextw_s64_x (p0, z1), z0), ++ svst1_scatter_offset (p0, x0, svextw_x (p0, z1), z0)) ++ ++/* ++** st1_scatter_x0_s64_u64offset: ++** st1d z0\.d, p0, \[x0, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_x0_s64_u64offset, svint64_t, int64_t, svuint64_t, ++ svst1_scatter_u64offset_s64 (p0, x0, z1, z0), ++ svst1_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1_scatter_s64_u64offset: ++** st1d z0\.d, p0, \[x0, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_s64_u64offset, svint64_t, int64_t, svuint64_t, ++ svst1_scatter_u64offset_s64 (p0, x0, z1, z0), ++ svst1_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1_scatter_ext_s64_u64offset: ++** st1d z0\.d, p0, \[x0, z1\.d, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_ext_s64_u64offset, svint64_t, int64_t, svuint64_t, ++ svst1_scatter_u64offset_s64 (p0, x0, svextw_u64_x (p0, z1), z0), ++ svst1_scatter_offset (p0, x0, svextw_x (p0, z1), z0)) ++ ++/* ++** st1_scatter_x0_s64_s64index: ++** st1d z0\.d, p0, \[x0, z1\.d, lsl 3\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_x0_s64_s64index, svint64_t, int64_t, svint64_t, ++ svst1_scatter_s64index_s64 (p0, x0, z1, z0), ++ svst1_scatter_index (p0, x0, z1, z0)) ++ ++/* ++** st1_scatter_s64_s64index: ++** st1d z0\.d, p0, \[x0, z1\.d, lsl 3\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_s64_s64index, svint64_t, int64_t, svint64_t, ++ svst1_scatter_s64index_s64 (p0, x0, z1, z0), ++ svst1_scatter_index (p0, x0, z1, z0)) ++ ++/* ++** st1_scatter_ext_s64_s64index: ++** st1d z0\.d, p0, \[x0, z1\.d, sxtw 3\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_ext_s64_s64index, svint64_t, int64_t, svint64_t, ++ svst1_scatter_s64index_s64 (p0, x0, svextw_s64_x (p0, z1), z0), ++ svst1_scatter_index (p0, x0, svextw_x (p0, z1), z0)) ++ ++/* ++** st1_scatter_x0_s64_u64index: ++** st1d z0\.d, p0, \[x0, z1\.d, lsl 3\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_x0_s64_u64index, svint64_t, int64_t, svuint64_t, ++ svst1_scatter_u64index_s64 (p0, x0, z1, z0), ++ svst1_scatter_index (p0, x0, z1, z0)) ++ ++/* ++** st1_scatter_s64_u64index: ++** st1d z0\.d, p0, \[x0, z1\.d, lsl 3\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_s64_u64index, svint64_t, int64_t, svuint64_t, ++ svst1_scatter_u64index_s64 (p0, x0, z1, z0), ++ svst1_scatter_index (p0, x0, z1, z0)) ++ ++/* ++** st1_scatter_ext_s64_u64index: ++** st1d z0\.d, p0, \[x0, z1\.d, uxtw 3\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_ext_s64_u64index, svint64_t, int64_t, svuint64_t, ++ svst1_scatter_u64index_s64 (p0, x0, svextw_u64_x (p0, z1), z0), ++ svst1_scatter_index (p0, x0, svextw_x (p0, z1), z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_u32.c +new file mode 100644 +index 000000000..fe3f7259f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_u32.c +@@ -0,0 +1,227 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st1_scatter_u32: ++** st1w z0\.s, p0, \[z1\.s\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_u32, svuint32_t, svuint32_t, ++ svst1_scatter_u32base_u32 (p0, z1, z0), ++ svst1_scatter (p0, z1, z0)) ++ ++/* ++** st1_scatter_x0_u32_offset: ++** st1w z0\.s, p0, \[x0, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_x0_u32_offset, svuint32_t, svuint32_t, ++ svst1_scatter_u32base_offset_u32 (p0, z1, x0, z0), ++ svst1_scatter_offset (p0, z1, x0, z0)) ++ ++/* ++** st1_scatter_m4_u32_offset: ++** mov (x[0-9]+), #?-4 ++** st1w z0\.s, p0, \[\1, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_m4_u32_offset, svuint32_t, svuint32_t, ++ svst1_scatter_u32base_offset_u32 (p0, z1, -4, z0), ++ svst1_scatter_offset (p0, z1, -4, z0)) ++ ++/* ++** st1_scatter_0_u32_offset: ++** st1w z0\.s, p0, \[z1\.s\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_0_u32_offset, svuint32_t, svuint32_t, ++ svst1_scatter_u32base_offset_u32 (p0, z1, 0, z0), ++ svst1_scatter_offset (p0, z1, 0, z0)) ++ ++/* ++** st1_scatter_5_u32_offset: ++** mov (x[0-9]+), #?5 ++** st1w z0\.s, p0, \[\1, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_5_u32_offset, svuint32_t, svuint32_t, ++ svst1_scatter_u32base_offset_u32 (p0, z1, 5, z0), ++ svst1_scatter_offset (p0, z1, 5, z0)) ++ ++/* ++** st1_scatter_6_u32_offset: ++** mov (x[0-9]+), #?6 ++** st1w z0\.s, p0, \[\1, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_6_u32_offset, svuint32_t, svuint32_t, ++ svst1_scatter_u32base_offset_u32 (p0, z1, 6, z0), ++ svst1_scatter_offset (p0, z1, 6, z0)) ++ ++/* ++** st1_scatter_7_u32_offset: ++** mov (x[0-9]+), #?7 ++** st1w z0\.s, p0, \[\1, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_7_u32_offset, svuint32_t, svuint32_t, ++ svst1_scatter_u32base_offset_u32 (p0, z1, 7, z0), ++ svst1_scatter_offset (p0, z1, 7, z0)) ++ ++/* ++** st1_scatter_8_u32_offset: ++** st1w z0\.s, p0, \[z1\.s, #8\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_8_u32_offset, svuint32_t, svuint32_t, ++ svst1_scatter_u32base_offset_u32 (p0, z1, 8, z0), ++ svst1_scatter_offset (p0, z1, 8, z0)) ++ ++/* ++** st1_scatter_124_u32_offset: ++** st1w z0\.s, p0, \[z1\.s, #124\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_124_u32_offset, svuint32_t, svuint32_t, ++ svst1_scatter_u32base_offset_u32 (p0, z1, 124, z0), ++ svst1_scatter_offset (p0, z1, 124, z0)) ++ ++/* ++** st1_scatter_128_u32_offset: ++** mov (x[0-9]+), #?128 ++** st1w z0\.s, p0, \[\1, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_128_u32_offset, svuint32_t, svuint32_t, ++ svst1_scatter_u32base_offset_u32 (p0, z1, 128, z0), ++ svst1_scatter_offset (p0, z1, 128, z0)) ++ ++/* ++** st1_scatter_x0_u32_index: ++** lsl (x[0-9]+), x0, #?2 ++** st1w z0\.s, p0, \[\1, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_x0_u32_index, svuint32_t, svuint32_t, ++ svst1_scatter_u32base_index_u32 (p0, z1, x0, z0), ++ svst1_scatter_index (p0, z1, x0, z0)) ++ ++/* ++** st1_scatter_m1_u32_index: ++** mov (x[0-9]+), #?-4 ++** st1w z0\.s, p0, \[\1, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_m1_u32_index, svuint32_t, svuint32_t, ++ svst1_scatter_u32base_index_u32 (p0, z1, -1, z0), ++ svst1_scatter_index (p0, z1, -1, z0)) ++ ++/* ++** st1_scatter_0_u32_index: ++** st1w z0\.s, p0, \[z1\.s\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_0_u32_index, svuint32_t, svuint32_t, ++ svst1_scatter_u32base_index_u32 (p0, z1, 0, z0), ++ svst1_scatter_index (p0, z1, 0, z0)) ++ ++/* ++** st1_scatter_5_u32_index: ++** st1w z0\.s, p0, \[z1\.s, #20\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_5_u32_index, svuint32_t, svuint32_t, ++ svst1_scatter_u32base_index_u32 (p0, z1, 5, z0), ++ svst1_scatter_index (p0, z1, 5, z0)) ++ ++/* ++** st1_scatter_31_u32_index: ++** st1w z0\.s, p0, \[z1\.s, #124\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_31_u32_index, svuint32_t, svuint32_t, ++ svst1_scatter_u32base_index_u32 (p0, z1, 31, z0), ++ svst1_scatter_index (p0, z1, 31, z0)) ++ ++/* ++** st1_scatter_32_u32_index: ++** mov (x[0-9]+), #?128 ++** st1w z0\.s, p0, \[\1, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_32_u32_index, svuint32_t, svuint32_t, ++ svst1_scatter_u32base_index_u32 (p0, z1, 32, z0), ++ svst1_scatter_index (p0, z1, 32, z0)) ++ ++/* ++** st1_scatter_x0_u32_s32offset: ++** st1w z0\.s, p0, \[x0, z1\.s, sxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_x0_u32_s32offset, svuint32_t, uint32_t, svint32_t, ++ svst1_scatter_s32offset_u32 (p0, x0, z1, z0), ++ svst1_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1_scatter_u32_s32offset: ++** st1w z0\.s, p0, \[x0, z1\.s, sxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_u32_s32offset, svuint32_t, uint32_t, svint32_t, ++ svst1_scatter_s32offset_u32 (p0, x0, z1, z0), ++ svst1_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1_scatter_x0_u32_u32offset: ++** st1w z0\.s, p0, \[x0, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_x0_u32_u32offset, svuint32_t, uint32_t, svuint32_t, ++ svst1_scatter_u32offset_u32 (p0, x0, z1, z0), ++ svst1_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1_scatter_u32_u32offset: ++** st1w z0\.s, p0, \[x0, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_u32_u32offset, svuint32_t, uint32_t, svuint32_t, ++ svst1_scatter_u32offset_u32 (p0, x0, z1, z0), ++ svst1_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1_scatter_x0_u32_s32index: ++** st1w z0\.s, p0, \[x0, z1\.s, sxtw 2\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_x0_u32_s32index, svuint32_t, uint32_t, svint32_t, ++ svst1_scatter_s32index_u32 (p0, x0, z1, z0), ++ svst1_scatter_index (p0, x0, z1, z0)) ++ ++/* ++** st1_scatter_u32_s32index: ++** st1w z0\.s, p0, \[x0, z1\.s, sxtw 2\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_u32_s32index, svuint32_t, uint32_t, svint32_t, ++ svst1_scatter_s32index_u32 (p0, x0, z1, z0), ++ svst1_scatter_index (p0, x0, z1, z0)) ++ ++/* ++** st1_scatter_x0_u32_u32index: ++** st1w z0\.s, p0, \[x0, z1\.s, uxtw 2\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_x0_u32_u32index, svuint32_t, uint32_t, svuint32_t, ++ svst1_scatter_u32index_u32 (p0, x0, z1, z0), ++ svst1_scatter_index (p0, x0, z1, z0)) ++ ++/* ++** st1_scatter_u32_u32index: ++** st1w z0\.s, p0, \[x0, z1\.s, uxtw 2\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_u32_u32index, svuint32_t, uint32_t, svuint32_t, ++ svst1_scatter_u32index_u32 (p0, x0, z1, z0), ++ svst1_scatter_index (p0, x0, z1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_u64.c +new file mode 100644 +index 000000000..232123566 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_u64.c +@@ -0,0 +1,303 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st1_scatter_u64: ++** st1d z0\.d, p0, \[z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_u64, svuint64_t, svuint64_t, ++ svst1_scatter_u64base_u64 (p0, z1, z0), ++ svst1_scatter (p0, z1, z0)) ++ ++/* ++** st1_scatter_x0_u64_offset: ++** st1d z0\.d, p0, \[x0, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_x0_u64_offset, svuint64_t, svuint64_t, ++ svst1_scatter_u64base_offset_u64 (p0, z1, x0, z0), ++ svst1_scatter_offset (p0, z1, x0, z0)) ++ ++/* ++** st1_scatter_m8_u64_offset: ++** mov (x[0-9]+), #?-8 ++** st1d z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_m8_u64_offset, svuint64_t, svuint64_t, ++ svst1_scatter_u64base_offset_u64 (p0, z1, -8, z0), ++ svst1_scatter_offset (p0, z1, -8, z0)) ++ ++/* ++** st1_scatter_0_u64_offset: ++** st1d z0\.d, p0, \[z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_0_u64_offset, svuint64_t, svuint64_t, ++ svst1_scatter_u64base_offset_u64 (p0, z1, 0, z0), ++ svst1_scatter_offset (p0, z1, 0, z0)) ++ ++/* ++** st1_scatter_9_u64_offset: ++** mov (x[0-9]+), #?9 ++** st1d z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_9_u64_offset, svuint64_t, svuint64_t, ++ svst1_scatter_u64base_offset_u64 (p0, z1, 9, z0), ++ svst1_scatter_offset (p0, z1, 9, z0)) ++ ++/* ++** st1_scatter_10_u64_offset: ++** mov (x[0-9]+), #?10 ++** st1d z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_10_u64_offset, svuint64_t, svuint64_t, ++ svst1_scatter_u64base_offset_u64 (p0, z1, 10, z0), ++ svst1_scatter_offset (p0, z1, 10, z0)) ++ ++/* ++** st1_scatter_11_u64_offset: ++** mov (x[0-9]+), #?11 ++** st1d z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_11_u64_offset, svuint64_t, svuint64_t, ++ svst1_scatter_u64base_offset_u64 (p0, z1, 11, z0), ++ svst1_scatter_offset (p0, z1, 11, z0)) ++ ++/* ++** st1_scatter_12_u64_offset: ++** mov (x[0-9]+), #?12 ++** st1d z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_12_u64_offset, svuint64_t, svuint64_t, ++ svst1_scatter_u64base_offset_u64 (p0, z1, 12, z0), ++ svst1_scatter_offset (p0, z1, 12, z0)) ++ ++/* ++** st1_scatter_13_u64_offset: ++** mov (x[0-9]+), #?13 ++** st1d z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_13_u64_offset, svuint64_t, svuint64_t, ++ svst1_scatter_u64base_offset_u64 (p0, z1, 13, z0), ++ svst1_scatter_offset (p0, z1, 13, z0)) ++ ++/* ++** st1_scatter_14_u64_offset: ++** mov (x[0-9]+), #?14 ++** st1d z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_14_u64_offset, svuint64_t, svuint64_t, ++ svst1_scatter_u64base_offset_u64 (p0, z1, 14, z0), ++ svst1_scatter_offset (p0, z1, 14, z0)) ++ ++/* ++** st1_scatter_15_u64_offset: ++** mov (x[0-9]+), #?15 ++** st1d z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_15_u64_offset, svuint64_t, svuint64_t, ++ svst1_scatter_u64base_offset_u64 (p0, z1, 15, z0), ++ svst1_scatter_offset (p0, z1, 15, z0)) ++ ++/* ++** st1_scatter_16_u64_offset: ++** st1d z0\.d, p0, \[z1\.d, #16\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_16_u64_offset, svuint64_t, svuint64_t, ++ svst1_scatter_u64base_offset_u64 (p0, z1, 16, z0), ++ svst1_scatter_offset (p0, z1, 16, z0)) ++ ++/* ++** st1_scatter_248_u64_offset: ++** st1d z0\.d, p0, \[z1\.d, #248\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_248_u64_offset, svuint64_t, svuint64_t, ++ svst1_scatter_u64base_offset_u64 (p0, z1, 248, z0), ++ svst1_scatter_offset (p0, z1, 248, z0)) ++ ++/* ++** st1_scatter_256_u64_offset: ++** mov (x[0-9]+), #?256 ++** st1d z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_256_u64_offset, svuint64_t, svuint64_t, ++ svst1_scatter_u64base_offset_u64 (p0, z1, 256, z0), ++ svst1_scatter_offset (p0, z1, 256, z0)) ++ ++/* ++** st1_scatter_x0_u64_index: ++** lsl (x[0-9]+), x0, #?3 ++** st1d z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_x0_u64_index, svuint64_t, svuint64_t, ++ svst1_scatter_u64base_index_u64 (p0, z1, x0, z0), ++ svst1_scatter_index (p0, z1, x0, z0)) ++ ++/* ++** st1_scatter_m1_u64_index: ++** mov (x[0-9]+), #?-8 ++** st1d z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_m1_u64_index, svuint64_t, svuint64_t, ++ svst1_scatter_u64base_index_u64 (p0, z1, -1, z0), ++ svst1_scatter_index (p0, z1, -1, z0)) ++ ++/* ++** st1_scatter_0_u64_index: ++** st1d z0\.d, p0, \[z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_0_u64_index, svuint64_t, svuint64_t, ++ svst1_scatter_u64base_index_u64 (p0, z1, 0, z0), ++ svst1_scatter_index (p0, z1, 0, z0)) ++ ++/* ++** st1_scatter_5_u64_index: ++** st1d z0\.d, p0, \[z1\.d, #40\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_5_u64_index, svuint64_t, svuint64_t, ++ svst1_scatter_u64base_index_u64 (p0, z1, 5, z0), ++ svst1_scatter_index (p0, z1, 5, z0)) ++ ++/* ++** st1_scatter_31_u64_index: ++** st1d z0\.d, p0, \[z1\.d, #248\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_31_u64_index, svuint64_t, svuint64_t, ++ svst1_scatter_u64base_index_u64 (p0, z1, 31, z0), ++ svst1_scatter_index (p0, z1, 31, z0)) ++ ++/* ++** st1_scatter_32_u64_index: ++** mov (x[0-9]+), #?256 ++** st1d z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1_scatter_32_u64_index, svuint64_t, svuint64_t, ++ svst1_scatter_u64base_index_u64 (p0, z1, 32, z0), ++ svst1_scatter_index (p0, z1, 32, z0)) ++ ++/* ++** st1_scatter_x0_u64_s64offset: ++** st1d z0\.d, p0, \[x0, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_x0_u64_s64offset, svuint64_t, uint64_t, svint64_t, ++ svst1_scatter_s64offset_u64 (p0, x0, z1, z0), ++ svst1_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1_scatter_u64_s64offset: ++** st1d z0\.d, p0, \[x0, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_u64_s64offset, svuint64_t, uint64_t, svint64_t, ++ svst1_scatter_s64offset_u64 (p0, x0, z1, z0), ++ svst1_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1_scatter_ext_u64_s64offset: ++** st1d z0\.d, p0, \[x0, z1\.d, sxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_ext_u64_s64offset, svuint64_t, uint64_t, svint64_t, ++ svst1_scatter_s64offset_u64 (p0, x0, svextw_s64_x (p0, z1), z0), ++ svst1_scatter_offset (p0, x0, svextw_x (p0, z1), z0)) ++ ++/* ++** st1_scatter_x0_u64_u64offset: ++** st1d z0\.d, p0, \[x0, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_x0_u64_u64offset, svuint64_t, uint64_t, svuint64_t, ++ svst1_scatter_u64offset_u64 (p0, x0, z1, z0), ++ svst1_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1_scatter_u64_u64offset: ++** st1d z0\.d, p0, \[x0, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_u64_u64offset, svuint64_t, uint64_t, svuint64_t, ++ svst1_scatter_u64offset_u64 (p0, x0, z1, z0), ++ svst1_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1_scatter_ext_u64_u64offset: ++** st1d z0\.d, p0, \[x0, z1\.d, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_ext_u64_u64offset, svuint64_t, uint64_t, svuint64_t, ++ svst1_scatter_u64offset_u64 (p0, x0, svextw_u64_x (p0, z1), z0), ++ svst1_scatter_offset (p0, x0, svextw_x (p0, z1), z0)) ++ ++/* ++** st1_scatter_x0_u64_s64index: ++** st1d z0\.d, p0, \[x0, z1\.d, lsl 3\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_x0_u64_s64index, svuint64_t, uint64_t, svint64_t, ++ svst1_scatter_s64index_u64 (p0, x0, z1, z0), ++ svst1_scatter_index (p0, x0, z1, z0)) ++ ++/* ++** st1_scatter_u64_s64index: ++** st1d z0\.d, p0, \[x0, z1\.d, lsl 3\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_u64_s64index, svuint64_t, uint64_t, svint64_t, ++ svst1_scatter_s64index_u64 (p0, x0, z1, z0), ++ svst1_scatter_index (p0, x0, z1, z0)) ++ ++/* ++** st1_scatter_ext_u64_s64index: ++** st1d z0\.d, p0, \[x0, z1\.d, sxtw 3\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_ext_u64_s64index, svuint64_t, uint64_t, svint64_t, ++ svst1_scatter_s64index_u64 (p0, x0, svextw_s64_x (p0, z1), z0), ++ svst1_scatter_index (p0, x0, svextw_x (p0, z1), z0)) ++ ++/* ++** st1_scatter_x0_u64_u64index: ++** st1d z0\.d, p0, \[x0, z1\.d, lsl 3\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_x0_u64_u64index, svuint64_t, uint64_t, svuint64_t, ++ svst1_scatter_u64index_u64 (p0, x0, z1, z0), ++ svst1_scatter_index (p0, x0, z1, z0)) ++ ++/* ++** st1_scatter_u64_u64index: ++** st1d z0\.d, p0, \[x0, z1\.d, lsl 3\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_u64_u64index, svuint64_t, uint64_t, svuint64_t, ++ svst1_scatter_u64index_u64 (p0, x0, z1, z0), ++ svst1_scatter_index (p0, x0, z1, z0)) ++ ++/* ++** st1_scatter_ext_u64_u64index: ++** st1d z0\.d, p0, \[x0, z1\.d, uxtw 3\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1_scatter_ext_u64_u64index, svuint64_t, uint64_t, svuint64_t, ++ svst1_scatter_u64index_u64 (p0, x0, svextw_u64_x (p0, z1), z0), ++ svst1_scatter_index (p0, x0, svextw_x (p0, z1), z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_u16.c +new file mode 100644 +index 000000000..e9dc05219 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_u16.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st1_u16_base: ++** st1h z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_u16_base, svuint16_t, uint16_t, ++ svst1_u16 (p0, x0, z0), ++ svst1 (p0, x0, z0)) ++ ++/* ++** st1_u16_index: ++** st1h z0\.h, p0, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_STORE (st1_u16_index, svuint16_t, uint16_t, ++ svst1_u16 (p0, x0 + x1, z0), ++ svst1 (p0, x0 + x1, z0)) ++ ++/* ++** st1_u16_1: ++** st1h z0\.h, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_u16_1, svuint16_t, uint16_t, ++ svst1_u16 (p0, x0 + svcnth (), z0), ++ svst1 (p0, x0 + svcnth (), z0)) ++ ++/* ++** st1_u16_7: ++** st1h z0\.h, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_u16_7, svuint16_t, uint16_t, ++ svst1_u16 (p0, x0 + svcnth () * 7, z0), ++ svst1 (p0, x0 + svcnth () * 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1_u16_8: ++** incb x0, all, mul #8 ++** st1h z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_u16_8, svuint16_t, uint16_t, ++ svst1_u16 (p0, x0 + svcnth () * 8, z0), ++ svst1 (p0, x0 + svcnth () * 8, z0)) ++ ++/* ++** st1_u16_m1: ++** st1h z0\.h, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_u16_m1, svuint16_t, uint16_t, ++ svst1_u16 (p0, x0 - svcnth (), z0), ++ svst1 (p0, x0 - svcnth (), z0)) ++ ++/* ++** st1_u16_m8: ++** st1h z0\.h, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_u16_m8, svuint16_t, uint16_t, ++ svst1_u16 (p0, x0 - svcnth () * 8, z0), ++ svst1 (p0, x0 - svcnth () * 8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1_u16_m9: ++** decb x0, all, mul #9 ++** st1h z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_u16_m9, svuint16_t, uint16_t, ++ svst1_u16 (p0, x0 - svcnth () * 9, z0), ++ svst1 (p0, x0 - svcnth () * 9, z0)) ++ ++/* ++** st1_vnum_u16_0: ++** st1h z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_vnum_u16_0, svuint16_t, uint16_t, ++ svst1_vnum_u16 (p0, x0, 0, z0), ++ svst1_vnum (p0, x0, 0, z0)) ++ ++/* ++** st1_vnum_u16_1: ++** st1h z0\.h, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_vnum_u16_1, svuint16_t, uint16_t, ++ svst1_vnum_u16 (p0, x0, 1, z0), ++ svst1_vnum (p0, x0, 1, z0)) ++ ++/* ++** st1_vnum_u16_7: ++** st1h z0\.h, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_vnum_u16_7, svuint16_t, uint16_t, ++ svst1_vnum_u16 (p0, x0, 7, z0), ++ svst1_vnum (p0, x0, 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1_vnum_u16_8: ++** incb x0, all, mul #8 ++** st1h z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_vnum_u16_8, svuint16_t, uint16_t, ++ svst1_vnum_u16 (p0, x0, 8, z0), ++ svst1_vnum (p0, x0, 8, z0)) ++ ++/* ++** st1_vnum_u16_m1: ++** st1h z0\.h, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_vnum_u16_m1, svuint16_t, uint16_t, ++ svst1_vnum_u16 (p0, x0, -1, z0), ++ svst1_vnum (p0, x0, -1, z0)) ++ ++/* ++** st1_vnum_u16_m8: ++** st1h z0\.h, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_vnum_u16_m8, svuint16_t, uint16_t, ++ svst1_vnum_u16 (p0, x0, -8, z0), ++ svst1_vnum (p0, x0, -8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1_vnum_u16_m9: ++** decb x0, all, mul #9 ++** st1h z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_vnum_u16_m9, svuint16_t, uint16_t, ++ svst1_vnum_u16 (p0, x0, -9, z0), ++ svst1_vnum (p0, x0, -9, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** st1_vnum_u16_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** st1h z0\.h, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (st1_vnum_u16_x1, svuint16_t, uint16_t, ++ svst1_vnum_u16 (p0, x0, x1, z0), ++ svst1_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_u32.c +new file mode 100644 +index 000000000..8610ae4c8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_u32.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st1_u32_base: ++** st1w z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_u32_base, svuint32_t, uint32_t, ++ svst1_u32 (p0, x0, z0), ++ svst1 (p0, x0, z0)) ++ ++/* ++** st1_u32_index: ++** st1w z0\.s, p0, \[x0, x1, lsl 2\] ++** ret ++*/ ++TEST_STORE (st1_u32_index, svuint32_t, uint32_t, ++ svst1_u32 (p0, x0 + x1, z0), ++ svst1 (p0, x0 + x1, z0)) ++ ++/* ++** st1_u32_1: ++** st1w z0\.s, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_u32_1, svuint32_t, uint32_t, ++ svst1_u32 (p0, x0 + svcntw (), z0), ++ svst1 (p0, x0 + svcntw (), z0)) ++ ++/* ++** st1_u32_7: ++** st1w z0\.s, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_u32_7, svuint32_t, uint32_t, ++ svst1_u32 (p0, x0 + svcntw () * 7, z0), ++ svst1 (p0, x0 + svcntw () * 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1_u32_8: ++** incb x0, all, mul #8 ++** st1w z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_u32_8, svuint32_t, uint32_t, ++ svst1_u32 (p0, x0 + svcntw () * 8, z0), ++ svst1 (p0, x0 + svcntw () * 8, z0)) ++ ++/* ++** st1_u32_m1: ++** st1w z0\.s, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_u32_m1, svuint32_t, uint32_t, ++ svst1_u32 (p0, x0 - svcntw (), z0), ++ svst1 (p0, x0 - svcntw (), z0)) ++ ++/* ++** st1_u32_m8: ++** st1w z0\.s, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_u32_m8, svuint32_t, uint32_t, ++ svst1_u32 (p0, x0 - svcntw () * 8, z0), ++ svst1 (p0, x0 - svcntw () * 8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1_u32_m9: ++** decb x0, all, mul #9 ++** st1w z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_u32_m9, svuint32_t, uint32_t, ++ svst1_u32 (p0, x0 - svcntw () * 9, z0), ++ svst1 (p0, x0 - svcntw () * 9, z0)) ++ ++/* ++** st1_vnum_u32_0: ++** st1w z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_vnum_u32_0, svuint32_t, uint32_t, ++ svst1_vnum_u32 (p0, x0, 0, z0), ++ svst1_vnum (p0, x0, 0, z0)) ++ ++/* ++** st1_vnum_u32_1: ++** st1w z0\.s, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_vnum_u32_1, svuint32_t, uint32_t, ++ svst1_vnum_u32 (p0, x0, 1, z0), ++ svst1_vnum (p0, x0, 1, z0)) ++ ++/* ++** st1_vnum_u32_7: ++** st1w z0\.s, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_vnum_u32_7, svuint32_t, uint32_t, ++ svst1_vnum_u32 (p0, x0, 7, z0), ++ svst1_vnum (p0, x0, 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1_vnum_u32_8: ++** incb x0, all, mul #8 ++** st1w z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_vnum_u32_8, svuint32_t, uint32_t, ++ svst1_vnum_u32 (p0, x0, 8, z0), ++ svst1_vnum (p0, x0, 8, z0)) ++ ++/* ++** st1_vnum_u32_m1: ++** st1w z0\.s, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_vnum_u32_m1, svuint32_t, uint32_t, ++ svst1_vnum_u32 (p0, x0, -1, z0), ++ svst1_vnum (p0, x0, -1, z0)) ++ ++/* ++** st1_vnum_u32_m8: ++** st1w z0\.s, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_vnum_u32_m8, svuint32_t, uint32_t, ++ svst1_vnum_u32 (p0, x0, -8, z0), ++ svst1_vnum (p0, x0, -8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1_vnum_u32_m9: ++** decb x0, all, mul #9 ++** st1w z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_vnum_u32_m9, svuint32_t, uint32_t, ++ svst1_vnum_u32 (p0, x0, -9, z0), ++ svst1_vnum (p0, x0, -9, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** st1_vnum_u32_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** st1w z0\.s, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (st1_vnum_u32_x1, svuint32_t, uint32_t, ++ svst1_vnum_u32 (p0, x0, x1, z0), ++ svst1_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_u64.c +new file mode 100644 +index 000000000..5d4fae932 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_u64.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st1_u64_base: ++** st1d z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_u64_base, svuint64_t, uint64_t, ++ svst1_u64 (p0, x0, z0), ++ svst1 (p0, x0, z0)) ++ ++/* ++** st1_u64_index: ++** st1d z0\.d, p0, \[x0, x1, lsl 3\] ++** ret ++*/ ++TEST_STORE (st1_u64_index, svuint64_t, uint64_t, ++ svst1_u64 (p0, x0 + x1, z0), ++ svst1 (p0, x0 + x1, z0)) ++ ++/* ++** st1_u64_1: ++** st1d z0\.d, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_u64_1, svuint64_t, uint64_t, ++ svst1_u64 (p0, x0 + svcntd (), z0), ++ svst1 (p0, x0 + svcntd (), z0)) ++ ++/* ++** st1_u64_7: ++** st1d z0\.d, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_u64_7, svuint64_t, uint64_t, ++ svst1_u64 (p0, x0 + svcntd () * 7, z0), ++ svst1 (p0, x0 + svcntd () * 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1_u64_8: ++** incb x0, all, mul #8 ++** st1d z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_u64_8, svuint64_t, uint64_t, ++ svst1_u64 (p0, x0 + svcntd () * 8, z0), ++ svst1 (p0, x0 + svcntd () * 8, z0)) ++ ++/* ++** st1_u64_m1: ++** st1d z0\.d, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_u64_m1, svuint64_t, uint64_t, ++ svst1_u64 (p0, x0 - svcntd (), z0), ++ svst1 (p0, x0 - svcntd (), z0)) ++ ++/* ++** st1_u64_m8: ++** st1d z0\.d, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_u64_m8, svuint64_t, uint64_t, ++ svst1_u64 (p0, x0 - svcntd () * 8, z0), ++ svst1 (p0, x0 - svcntd () * 8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1_u64_m9: ++** decb x0, all, mul #9 ++** st1d z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_u64_m9, svuint64_t, uint64_t, ++ svst1_u64 (p0, x0 - svcntd () * 9, z0), ++ svst1 (p0, x0 - svcntd () * 9, z0)) ++ ++/* ++** st1_vnum_u64_0: ++** st1d z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_vnum_u64_0, svuint64_t, uint64_t, ++ svst1_vnum_u64 (p0, x0, 0, z0), ++ svst1_vnum (p0, x0, 0, z0)) ++ ++/* ++** st1_vnum_u64_1: ++** st1d z0\.d, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_vnum_u64_1, svuint64_t, uint64_t, ++ svst1_vnum_u64 (p0, x0, 1, z0), ++ svst1_vnum (p0, x0, 1, z0)) ++ ++/* ++** st1_vnum_u64_7: ++** st1d z0\.d, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_vnum_u64_7, svuint64_t, uint64_t, ++ svst1_vnum_u64 (p0, x0, 7, z0), ++ svst1_vnum (p0, x0, 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1_vnum_u64_8: ++** incb x0, all, mul #8 ++** st1d z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_vnum_u64_8, svuint64_t, uint64_t, ++ svst1_vnum_u64 (p0, x0, 8, z0), ++ svst1_vnum (p0, x0, 8, z0)) ++ ++/* ++** st1_vnum_u64_m1: ++** st1d z0\.d, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_vnum_u64_m1, svuint64_t, uint64_t, ++ svst1_vnum_u64 (p0, x0, -1, z0), ++ svst1_vnum (p0, x0, -1, z0)) ++ ++/* ++** st1_vnum_u64_m8: ++** st1d z0\.d, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_vnum_u64_m8, svuint64_t, uint64_t, ++ svst1_vnum_u64 (p0, x0, -8, z0), ++ svst1_vnum (p0, x0, -8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1_vnum_u64_m9: ++** decb x0, all, mul #9 ++** st1d z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_vnum_u64_m9, svuint64_t, uint64_t, ++ svst1_vnum_u64 (p0, x0, -9, z0), ++ svst1_vnum (p0, x0, -9, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** st1_vnum_u64_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** st1d z0\.d, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (st1_vnum_u64_x1, svuint64_t, uint64_t, ++ svst1_vnum_u64 (p0, x0, x1, z0), ++ svst1_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_u8.c +new file mode 100644 +index 000000000..52c79d0e0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_u8.c +@@ -0,0 +1,162 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st1_u8_base: ++** st1b z0\.b, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_u8_base, svuint8_t, uint8_t, ++ svst1_u8 (p0, x0, z0), ++ svst1 (p0, x0, z0)) ++ ++/* ++** st1_u8_index: ++** st1b z0\.b, p0, \[x0, x1\] ++** ret ++*/ ++TEST_STORE (st1_u8_index, svuint8_t, uint8_t, ++ svst1_u8 (p0, x0 + x1, z0), ++ svst1 (p0, x0 + x1, z0)) ++ ++/* ++** st1_u8_1: ++** st1b z0\.b, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_u8_1, svuint8_t, uint8_t, ++ svst1_u8 (p0, x0 + svcntb (), z0), ++ svst1 (p0, x0 + svcntb (), z0)) ++ ++/* ++** st1_u8_7: ++** st1b z0\.b, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_u8_7, svuint8_t, uint8_t, ++ svst1_u8 (p0, x0 + svcntb () * 7, z0), ++ svst1 (p0, x0 + svcntb () * 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1_u8_8: ++** incb x0, all, mul #8 ++** st1b z0\.b, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_u8_8, svuint8_t, uint8_t, ++ svst1_u8 (p0, x0 + svcntb () * 8, z0), ++ svst1 (p0, x0 + svcntb () * 8, z0)) ++ ++/* ++** st1_u8_m1: ++** st1b z0\.b, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_u8_m1, svuint8_t, uint8_t, ++ svst1_u8 (p0, x0 - svcntb (), z0), ++ svst1 (p0, x0 - svcntb (), z0)) ++ ++/* ++** st1_u8_m8: ++** st1b z0\.b, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_u8_m8, svuint8_t, uint8_t, ++ svst1_u8 (p0, x0 - svcntb () * 8, z0), ++ svst1 (p0, x0 - svcntb () * 8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1_u8_m9: ++** decb x0, all, mul #9 ++** st1b z0\.b, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_u8_m9, svuint8_t, uint8_t, ++ svst1_u8 (p0, x0 - svcntb () * 9, z0), ++ svst1 (p0, x0 - svcntb () * 9, z0)) ++ ++/* ++** st1_vnum_u8_0: ++** st1b z0\.b, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_vnum_u8_0, svuint8_t, uint8_t, ++ svst1_vnum_u8 (p0, x0, 0, z0), ++ svst1_vnum (p0, x0, 0, z0)) ++ ++/* ++** st1_vnum_u8_1: ++** st1b z0\.b, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_vnum_u8_1, svuint8_t, uint8_t, ++ svst1_vnum_u8 (p0, x0, 1, z0), ++ svst1_vnum (p0, x0, 1, z0)) ++ ++/* ++** st1_vnum_u8_7: ++** st1b z0\.b, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_vnum_u8_7, svuint8_t, uint8_t, ++ svst1_vnum_u8 (p0, x0, 7, z0), ++ svst1_vnum (p0, x0, 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1_vnum_u8_8: ++** incb x0, all, mul #8 ++** st1b z0\.b, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_vnum_u8_8, svuint8_t, uint8_t, ++ svst1_vnum_u8 (p0, x0, 8, z0), ++ svst1_vnum (p0, x0, 8, z0)) ++ ++/* ++** st1_vnum_u8_m1: ++** st1b z0\.b, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_vnum_u8_m1, svuint8_t, uint8_t, ++ svst1_vnum_u8 (p0, x0, -1, z0), ++ svst1_vnum (p0, x0, -1, z0)) ++ ++/* ++** st1_vnum_u8_m8: ++** st1b z0\.b, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (st1_vnum_u8_m8, svuint8_t, uint8_t, ++ svst1_vnum_u8 (p0, x0, -8, z0), ++ svst1_vnum (p0, x0, -8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1_vnum_u8_m9: ++** decb x0, all, mul #9 ++** st1b z0\.b, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1_vnum_u8_m9, svuint8_t, uint8_t, ++ svst1_vnum_u8 (p0, x0, -9, z0), ++ svst1_vnum (p0, x0, -9, z0)) ++ ++/* ++** st1_vnum_u8_x1: ++** cntb (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** st1b z0\.b, p0, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** st1b z0\.b, p0, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_STORE (st1_vnum_u8_x1, svuint8_t, uint8_t, ++ svst1_vnum_u8 (p0, x0, x1, z0), ++ svst1_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_s16.c +new file mode 100644 +index 000000000..770fb61e6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_s16.c +@@ -0,0 +1,162 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st1b_s16_base: ++** st1b z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1b_s16_base, svint16_t, int8_t, ++ svst1b_s16 (p0, x0, z0), ++ svst1b (p0, x0, z0)) ++ ++/* ++** st1b_s16_index: ++** st1b z0\.h, p0, \[x0, x1\] ++** ret ++*/ ++TEST_STORE (st1b_s16_index, svint16_t, int8_t, ++ svst1b_s16 (p0, x0 + x1, z0), ++ svst1b (p0, x0 + x1, z0)) ++ ++/* ++** st1b_s16_1: ++** st1b z0\.h, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1b_s16_1, svint16_t, int8_t, ++ svst1b_s16 (p0, x0 + svcnth (), z0), ++ svst1b (p0, x0 + svcnth (), z0)) ++ ++/* ++** st1b_s16_7: ++** st1b z0\.h, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (st1b_s16_7, svint16_t, int8_t, ++ svst1b_s16 (p0, x0 + svcnth () * 7, z0), ++ svst1b (p0, x0 + svcnth () * 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1b_s16_8: ++** incb x0, all, mul #4 ++** st1b z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1b_s16_8, svint16_t, int8_t, ++ svst1b_s16 (p0, x0 + svcnth () * 8, z0), ++ svst1b (p0, x0 + svcnth () * 8, z0)) ++ ++/* ++** st1b_s16_m1: ++** st1b z0\.h, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1b_s16_m1, svint16_t, int8_t, ++ svst1b_s16 (p0, x0 - svcnth (), z0), ++ svst1b (p0, x0 - svcnth (), z0)) ++ ++/* ++** st1b_s16_m8: ++** st1b z0\.h, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (st1b_s16_m8, svint16_t, int8_t, ++ svst1b_s16 (p0, x0 - svcnth () * 8, z0), ++ svst1b (p0, x0 - svcnth () * 8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1b_s16_m9: ++** dech x0, all, mul #9 ++** st1b z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1b_s16_m9, svint16_t, int8_t, ++ svst1b_s16 (p0, x0 - svcnth () * 9, z0), ++ svst1b (p0, x0 - svcnth () * 9, z0)) ++ ++/* ++** st1b_vnum_s16_0: ++** st1b z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1b_vnum_s16_0, svint16_t, int8_t, ++ svst1b_vnum_s16 (p0, x0, 0, z0), ++ svst1b_vnum (p0, x0, 0, z0)) ++ ++/* ++** st1b_vnum_s16_1: ++** st1b z0\.h, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1b_vnum_s16_1, svint16_t, int8_t, ++ svst1b_vnum_s16 (p0, x0, 1, z0), ++ svst1b_vnum (p0, x0, 1, z0)) ++ ++/* ++** st1b_vnum_s16_7: ++** st1b z0\.h, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (st1b_vnum_s16_7, svint16_t, int8_t, ++ svst1b_vnum_s16 (p0, x0, 7, z0), ++ svst1b_vnum (p0, x0, 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1b_vnum_s16_8: ++** incb x0, all, mul #4 ++** st1b z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1b_vnum_s16_8, svint16_t, int8_t, ++ svst1b_vnum_s16 (p0, x0, 8, z0), ++ svst1b_vnum (p0, x0, 8, z0)) ++ ++/* ++** st1b_vnum_s16_m1: ++** st1b z0\.h, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1b_vnum_s16_m1, svint16_t, int8_t, ++ svst1b_vnum_s16 (p0, x0, -1, z0), ++ svst1b_vnum (p0, x0, -1, z0)) ++ ++/* ++** st1b_vnum_s16_m8: ++** st1b z0\.h, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (st1b_vnum_s16_m8, svint16_t, int8_t, ++ svst1b_vnum_s16 (p0, x0, -8, z0), ++ svst1b_vnum (p0, x0, -8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1b_vnum_s16_m9: ++** dech x0, all, mul #9 ++** st1b z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1b_vnum_s16_m9, svint16_t, int8_t, ++ svst1b_vnum_s16 (p0, x0, -9, z0), ++ svst1b_vnum (p0, x0, -9, z0)) ++ ++/* ++** st1b_vnum_s16_x1: ++** cnth (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** st1b z0\.h, p0, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** st1b z0\.h, p0, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_STORE (st1b_vnum_s16_x1, svint16_t, int8_t, ++ svst1b_vnum_s16 (p0, x0, x1, z0), ++ svst1b_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_s32.c +new file mode 100644 +index 000000000..85333aea9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_s32.c +@@ -0,0 +1,162 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st1b_s32_base: ++** st1b z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1b_s32_base, svint32_t, int8_t, ++ svst1b_s32 (p0, x0, z0), ++ svst1b (p0, x0, z0)) ++ ++/* ++** st1b_s32_index: ++** st1b z0\.s, p0, \[x0, x1\] ++** ret ++*/ ++TEST_STORE (st1b_s32_index, svint32_t, int8_t, ++ svst1b_s32 (p0, x0 + x1, z0), ++ svst1b (p0, x0 + x1, z0)) ++ ++/* ++** st1b_s32_1: ++** st1b z0\.s, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1b_s32_1, svint32_t, int8_t, ++ svst1b_s32 (p0, x0 + svcntw (), z0), ++ svst1b (p0, x0 + svcntw (), z0)) ++ ++/* ++** st1b_s32_7: ++** st1b z0\.s, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (st1b_s32_7, svint32_t, int8_t, ++ svst1b_s32 (p0, x0 + svcntw () * 7, z0), ++ svst1b (p0, x0 + svcntw () * 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1b_s32_8: ++** incb x0, all, mul #2 ++** st1b z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1b_s32_8, svint32_t, int8_t, ++ svst1b_s32 (p0, x0 + svcntw () * 8, z0), ++ svst1b (p0, x0 + svcntw () * 8, z0)) ++ ++/* ++** st1b_s32_m1: ++** st1b z0\.s, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1b_s32_m1, svint32_t, int8_t, ++ svst1b_s32 (p0, x0 - svcntw (), z0), ++ svst1b (p0, x0 - svcntw (), z0)) ++ ++/* ++** st1b_s32_m8: ++** st1b z0\.s, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (st1b_s32_m8, svint32_t, int8_t, ++ svst1b_s32 (p0, x0 - svcntw () * 8, z0), ++ svst1b (p0, x0 - svcntw () * 8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1b_s32_m9: ++** decw x0, all, mul #9 ++** st1b z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1b_s32_m9, svint32_t, int8_t, ++ svst1b_s32 (p0, x0 - svcntw () * 9, z0), ++ svst1b (p0, x0 - svcntw () * 9, z0)) ++ ++/* ++** st1b_vnum_s32_0: ++** st1b z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1b_vnum_s32_0, svint32_t, int8_t, ++ svst1b_vnum_s32 (p0, x0, 0, z0), ++ svst1b_vnum (p0, x0, 0, z0)) ++ ++/* ++** st1b_vnum_s32_1: ++** st1b z0\.s, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1b_vnum_s32_1, svint32_t, int8_t, ++ svst1b_vnum_s32 (p0, x0, 1, z0), ++ svst1b_vnum (p0, x0, 1, z0)) ++ ++/* ++** st1b_vnum_s32_7: ++** st1b z0\.s, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (st1b_vnum_s32_7, svint32_t, int8_t, ++ svst1b_vnum_s32 (p0, x0, 7, z0), ++ svst1b_vnum (p0, x0, 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1b_vnum_s32_8: ++** incb x0, all, mul #2 ++** st1b z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1b_vnum_s32_8, svint32_t, int8_t, ++ svst1b_vnum_s32 (p0, x0, 8, z0), ++ svst1b_vnum (p0, x0, 8, z0)) ++ ++/* ++** st1b_vnum_s32_m1: ++** st1b z0\.s, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1b_vnum_s32_m1, svint32_t, int8_t, ++ svst1b_vnum_s32 (p0, x0, -1, z0), ++ svst1b_vnum (p0, x0, -1, z0)) ++ ++/* ++** st1b_vnum_s32_m8: ++** st1b z0\.s, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (st1b_vnum_s32_m8, svint32_t, int8_t, ++ svst1b_vnum_s32 (p0, x0, -8, z0), ++ svst1b_vnum (p0, x0, -8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1b_vnum_s32_m9: ++** decw x0, all, mul #9 ++** st1b z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1b_vnum_s32_m9, svint32_t, int8_t, ++ svst1b_vnum_s32 (p0, x0, -9, z0), ++ svst1b_vnum (p0, x0, -9, z0)) ++ ++/* ++** st1b_vnum_s32_x1: ++** cntw (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** st1b z0\.s, p0, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** st1b z0\.s, p0, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_STORE (st1b_vnum_s32_x1, svint32_t, int8_t, ++ svst1b_vnum_s32 (p0, x0, x1, z0), ++ svst1b_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_s64.c +new file mode 100644 +index 000000000..321f168d9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_s64.c +@@ -0,0 +1,162 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st1b_s64_base: ++** st1b z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1b_s64_base, svint64_t, int8_t, ++ svst1b_s64 (p0, x0, z0), ++ svst1b (p0, x0, z0)) ++ ++/* ++** st1b_s64_index: ++** st1b z0\.d, p0, \[x0, x1\] ++** ret ++*/ ++TEST_STORE (st1b_s64_index, svint64_t, int8_t, ++ svst1b_s64 (p0, x0 + x1, z0), ++ svst1b (p0, x0 + x1, z0)) ++ ++/* ++** st1b_s64_1: ++** st1b z0\.d, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1b_s64_1, svint64_t, int8_t, ++ svst1b_s64 (p0, x0 + svcntd (), z0), ++ svst1b (p0, x0 + svcntd (), z0)) ++ ++/* ++** st1b_s64_7: ++** st1b z0\.d, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (st1b_s64_7, svint64_t, int8_t, ++ svst1b_s64 (p0, x0 + svcntd () * 7, z0), ++ svst1b (p0, x0 + svcntd () * 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1b_s64_8: ++** incb x0 ++** st1b z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1b_s64_8, svint64_t, int8_t, ++ svst1b_s64 (p0, x0 + svcntd () * 8, z0), ++ svst1b (p0, x0 + svcntd () * 8, z0)) ++ ++/* ++** st1b_s64_m1: ++** st1b z0\.d, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1b_s64_m1, svint64_t, int8_t, ++ svst1b_s64 (p0, x0 - svcntd (), z0), ++ svst1b (p0, x0 - svcntd (), z0)) ++ ++/* ++** st1b_s64_m8: ++** st1b z0\.d, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (st1b_s64_m8, svint64_t, int8_t, ++ svst1b_s64 (p0, x0 - svcntd () * 8, z0), ++ svst1b (p0, x0 - svcntd () * 8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1b_s64_m9: ++** decd x0, all, mul #9 ++** st1b z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1b_s64_m9, svint64_t, int8_t, ++ svst1b_s64 (p0, x0 - svcntd () * 9, z0), ++ svst1b (p0, x0 - svcntd () * 9, z0)) ++ ++/* ++** st1b_vnum_s64_0: ++** st1b z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1b_vnum_s64_0, svint64_t, int8_t, ++ svst1b_vnum_s64 (p0, x0, 0, z0), ++ svst1b_vnum (p0, x0, 0, z0)) ++ ++/* ++** st1b_vnum_s64_1: ++** st1b z0\.d, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1b_vnum_s64_1, svint64_t, int8_t, ++ svst1b_vnum_s64 (p0, x0, 1, z0), ++ svst1b_vnum (p0, x0, 1, z0)) ++ ++/* ++** st1b_vnum_s64_7: ++** st1b z0\.d, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (st1b_vnum_s64_7, svint64_t, int8_t, ++ svst1b_vnum_s64 (p0, x0, 7, z0), ++ svst1b_vnum (p0, x0, 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1b_vnum_s64_8: ++** incb x0 ++** st1b z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1b_vnum_s64_8, svint64_t, int8_t, ++ svst1b_vnum_s64 (p0, x0, 8, z0), ++ svst1b_vnum (p0, x0, 8, z0)) ++ ++/* ++** st1b_vnum_s64_m1: ++** st1b z0\.d, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1b_vnum_s64_m1, svint64_t, int8_t, ++ svst1b_vnum_s64 (p0, x0, -1, z0), ++ svst1b_vnum (p0, x0, -1, z0)) ++ ++/* ++** st1b_vnum_s64_m8: ++** st1b z0\.d, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (st1b_vnum_s64_m8, svint64_t, int8_t, ++ svst1b_vnum_s64 (p0, x0, -8, z0), ++ svst1b_vnum (p0, x0, -8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1b_vnum_s64_m9: ++** decd x0, all, mul #9 ++** st1b z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1b_vnum_s64_m9, svint64_t, int8_t, ++ svst1b_vnum_s64 (p0, x0, -9, z0), ++ svst1b_vnum (p0, x0, -9, z0)) ++ ++/* ++** st1b_vnum_s64_x1: ++** cntd (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** st1b z0\.d, p0, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** st1b z0\.d, p0, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_STORE (st1b_vnum_s64_x1, svint64_t, int8_t, ++ svst1b_vnum_s64 (p0, x0, x1, z0), ++ svst1b_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_s32.c +new file mode 100644 +index 000000000..d59033356 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_s32.c +@@ -0,0 +1,104 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st1b_scatter_s32: ++** st1b z0\.s, p0, \[z1\.s\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1b_scatter_s32, svint32_t, svuint32_t, ++ svst1b_scatter_u32base_s32 (p0, z1, z0), ++ svst1b_scatter (p0, z1, z0)) ++ ++/* ++** st1b_scatter_x0_s32_offset: ++** st1b z0\.s, p0, \[x0, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1b_scatter_x0_s32_offset, svint32_t, svuint32_t, ++ svst1b_scatter_u32base_offset_s32 (p0, z1, x0, z0), ++ svst1b_scatter_offset (p0, z1, x0, z0)) ++ ++/* ++** st1b_scatter_m1_s32_offset: ++** mov (x[0-9]+), #?-1 ++** st1b z0\.s, p0, \[\1, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1b_scatter_m1_s32_offset, svint32_t, svuint32_t, ++ svst1b_scatter_u32base_offset_s32 (p0, z1, -1, z0), ++ svst1b_scatter_offset (p0, z1, -1, z0)) ++ ++/* ++** st1b_scatter_0_s32_offset: ++** st1b z0\.s, p0, \[z1\.s\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1b_scatter_0_s32_offset, svint32_t, svuint32_t, ++ svst1b_scatter_u32base_offset_s32 (p0, z1, 0, z0), ++ svst1b_scatter_offset (p0, z1, 0, z0)) ++ ++/* ++** st1b_scatter_5_s32_offset: ++** st1b z0\.s, p0, \[z1\.s, #5\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1b_scatter_5_s32_offset, svint32_t, svuint32_t, ++ svst1b_scatter_u32base_offset_s32 (p0, z1, 5, z0), ++ svst1b_scatter_offset (p0, z1, 5, z0)) ++ ++/* ++** st1b_scatter_31_s32_offset: ++** st1b z0\.s, p0, \[z1\.s, #31\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1b_scatter_31_s32_offset, svint32_t, svuint32_t, ++ svst1b_scatter_u32base_offset_s32 (p0, z1, 31, z0), ++ svst1b_scatter_offset (p0, z1, 31, z0)) ++ ++/* ++** st1b_scatter_32_s32_offset: ++** mov (x[0-9]+), #?32 ++** st1b z0\.s, p0, \[\1, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1b_scatter_32_s32_offset, svint32_t, svuint32_t, ++ svst1b_scatter_u32base_offset_s32 (p0, z1, 32, z0), ++ svst1b_scatter_offset (p0, z1, 32, z0)) ++ ++/* ++** st1b_scatter_x0_s32_s32offset: ++** st1b z0\.s, p0, \[x0, z1\.s, sxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1b_scatter_x0_s32_s32offset, svint32_t, int8_t, svint32_t, ++ svst1b_scatter_s32offset_s32 (p0, x0, z1, z0), ++ svst1b_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1b_scatter_s32_s32offset: ++** st1b z0\.s, p0, \[x0, z1\.s, sxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1b_scatter_s32_s32offset, svint32_t, int8_t, svint32_t, ++ svst1b_scatter_s32offset_s32 (p0, x0, z1, z0), ++ svst1b_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1b_scatter_x0_s32_u32offset: ++** st1b z0\.s, p0, \[x0, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1b_scatter_x0_s32_u32offset, svint32_t, int8_t, svuint32_t, ++ svst1b_scatter_u32offset_s32 (p0, x0, z1, z0), ++ svst1b_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1b_scatter_s32_u32offset: ++** st1b z0\.s, p0, \[x0, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1b_scatter_s32_u32offset, svint32_t, int8_t, svuint32_t, ++ svst1b_scatter_u32offset_s32 (p0, x0, z1, z0), ++ svst1b_scatter_offset (p0, x0, z1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_s64.c +new file mode 100644 +index 000000000..c7a35f1b4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_s64.c +@@ -0,0 +1,122 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st1b_scatter_s64: ++** st1b z0\.d, p0, \[z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1b_scatter_s64, svint64_t, svuint64_t, ++ svst1b_scatter_u64base_s64 (p0, z1, z0), ++ svst1b_scatter (p0, z1, z0)) ++ ++/* ++** st1b_scatter_x0_s64_offset: ++** st1b z0\.d, p0, \[x0, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1b_scatter_x0_s64_offset, svint64_t, svuint64_t, ++ svst1b_scatter_u64base_offset_s64 (p0, z1, x0, z0), ++ svst1b_scatter_offset (p0, z1, x0, z0)) ++ ++/* ++** st1b_scatter_m1_s64_offset: ++** mov (x[0-9]+), #?-1 ++** st1b z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1b_scatter_m1_s64_offset, svint64_t, svuint64_t, ++ svst1b_scatter_u64base_offset_s64 (p0, z1, -1, z0), ++ svst1b_scatter_offset (p0, z1, -1, z0)) ++ ++/* ++** st1b_scatter_0_s64_offset: ++** st1b z0\.d, p0, \[z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1b_scatter_0_s64_offset, svint64_t, svuint64_t, ++ svst1b_scatter_u64base_offset_s64 (p0, z1, 0, z0), ++ svst1b_scatter_offset (p0, z1, 0, z0)) ++ ++/* ++** st1b_scatter_5_s64_offset: ++** st1b z0\.d, p0, \[z1\.d, #5\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1b_scatter_5_s64_offset, svint64_t, svuint64_t, ++ svst1b_scatter_u64base_offset_s64 (p0, z1, 5, z0), ++ svst1b_scatter_offset (p0, z1, 5, z0)) ++ ++/* ++** st1b_scatter_31_s64_offset: ++** st1b z0\.d, p0, \[z1\.d, #31\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1b_scatter_31_s64_offset, svint64_t, svuint64_t, ++ svst1b_scatter_u64base_offset_s64 (p0, z1, 31, z0), ++ svst1b_scatter_offset (p0, z1, 31, z0)) ++ ++/* ++** st1b_scatter_32_s64_offset: ++** mov (x[0-9]+), #?32 ++** st1b z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1b_scatter_32_s64_offset, svint64_t, svuint64_t, ++ svst1b_scatter_u64base_offset_s64 (p0, z1, 32, z0), ++ svst1b_scatter_offset (p0, z1, 32, z0)) ++ ++/* ++** st1b_scatter_x0_s64_s64offset: ++** st1b z0\.d, p0, \[x0, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1b_scatter_x0_s64_s64offset, svint64_t, int8_t, svint64_t, ++ svst1b_scatter_s64offset_s64 (p0, x0, z1, z0), ++ svst1b_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1b_scatter_s64_s64offset: ++** st1b z0\.d, p0, \[x0, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1b_scatter_s64_s64offset, svint64_t, int8_t, svint64_t, ++ svst1b_scatter_s64offset_s64 (p0, x0, z1, z0), ++ svst1b_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1b_scatter_ext_s64_s64offset: ++** st1b z0\.d, p0, \[x0, z1\.d, sxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1b_scatter_ext_s64_s64offset, svint64_t, int8_t, svint64_t, ++ svst1b_scatter_s64offset_s64 (p0, x0, svextw_s64_x (p0, z1), z0), ++ svst1b_scatter_offset (p0, x0, svextw_x (p0, z1), z0)) ++ ++/* ++** st1b_scatter_x0_s64_u64offset: ++** st1b z0\.d, p0, \[x0, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1b_scatter_x0_s64_u64offset, svint64_t, int8_t, svuint64_t, ++ svst1b_scatter_u64offset_s64 (p0, x0, z1, z0), ++ svst1b_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1b_scatter_s64_u64offset: ++** st1b z0\.d, p0, \[x0, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1b_scatter_s64_u64offset, svint64_t, int8_t, svuint64_t, ++ svst1b_scatter_u64offset_s64 (p0, x0, z1, z0), ++ svst1b_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1b_scatter_ext_s64_u64offset: ++** st1b z0\.d, p0, \[x0, z1\.d, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1b_scatter_ext_s64_u64offset, svint64_t, int8_t, svuint64_t, ++ svst1b_scatter_u64offset_s64 (p0, x0, svextw_u64_x (p0, z1), z0), ++ svst1b_scatter_offset (p0, x0, svextw_x (p0, z1), z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_u32.c +new file mode 100644 +index 000000000..e098cb9b7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_u32.c +@@ -0,0 +1,104 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st1b_scatter_u32: ++** st1b z0\.s, p0, \[z1\.s\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1b_scatter_u32, svuint32_t, svuint32_t, ++ svst1b_scatter_u32base_u32 (p0, z1, z0), ++ svst1b_scatter (p0, z1, z0)) ++ ++/* ++** st1b_scatter_x0_u32_offset: ++** st1b z0\.s, p0, \[x0, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1b_scatter_x0_u32_offset, svuint32_t, svuint32_t, ++ svst1b_scatter_u32base_offset_u32 (p0, z1, x0, z0), ++ svst1b_scatter_offset (p0, z1, x0, z0)) ++ ++/* ++** st1b_scatter_m1_u32_offset: ++** mov (x[0-9]+), #?-1 ++** st1b z0\.s, p0, \[\1, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1b_scatter_m1_u32_offset, svuint32_t, svuint32_t, ++ svst1b_scatter_u32base_offset_u32 (p0, z1, -1, z0), ++ svst1b_scatter_offset (p0, z1, -1, z0)) ++ ++/* ++** st1b_scatter_0_u32_offset: ++** st1b z0\.s, p0, \[z1\.s\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1b_scatter_0_u32_offset, svuint32_t, svuint32_t, ++ svst1b_scatter_u32base_offset_u32 (p0, z1, 0, z0), ++ svst1b_scatter_offset (p0, z1, 0, z0)) ++ ++/* ++** st1b_scatter_5_u32_offset: ++** st1b z0\.s, p0, \[z1\.s, #5\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1b_scatter_5_u32_offset, svuint32_t, svuint32_t, ++ svst1b_scatter_u32base_offset_u32 (p0, z1, 5, z0), ++ svst1b_scatter_offset (p0, z1, 5, z0)) ++ ++/* ++** st1b_scatter_31_u32_offset: ++** st1b z0\.s, p0, \[z1\.s, #31\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1b_scatter_31_u32_offset, svuint32_t, svuint32_t, ++ svst1b_scatter_u32base_offset_u32 (p0, z1, 31, z0), ++ svst1b_scatter_offset (p0, z1, 31, z0)) ++ ++/* ++** st1b_scatter_32_u32_offset: ++** mov (x[0-9]+), #?32 ++** st1b z0\.s, p0, \[\1, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1b_scatter_32_u32_offset, svuint32_t, svuint32_t, ++ svst1b_scatter_u32base_offset_u32 (p0, z1, 32, z0), ++ svst1b_scatter_offset (p0, z1, 32, z0)) ++ ++/* ++** st1b_scatter_x0_u32_s32offset: ++** st1b z0\.s, p0, \[x0, z1\.s, sxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1b_scatter_x0_u32_s32offset, svuint32_t, uint8_t, svint32_t, ++ svst1b_scatter_s32offset_u32 (p0, x0, z1, z0), ++ svst1b_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1b_scatter_u32_s32offset: ++** st1b z0\.s, p0, \[x0, z1\.s, sxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1b_scatter_u32_s32offset, svuint32_t, uint8_t, svint32_t, ++ svst1b_scatter_s32offset_u32 (p0, x0, z1, z0), ++ svst1b_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1b_scatter_x0_u32_u32offset: ++** st1b z0\.s, p0, \[x0, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1b_scatter_x0_u32_u32offset, svuint32_t, uint8_t, svuint32_t, ++ svst1b_scatter_u32offset_u32 (p0, x0, z1, z0), ++ svst1b_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1b_scatter_u32_u32offset: ++** st1b z0\.s, p0, \[x0, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1b_scatter_u32_u32offset, svuint32_t, uint8_t, svuint32_t, ++ svst1b_scatter_u32offset_u32 (p0, x0, z1, z0), ++ svst1b_scatter_offset (p0, x0, z1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_u64.c +new file mode 100644 +index 000000000..058d1313f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_u64.c +@@ -0,0 +1,122 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st1b_scatter_u64: ++** st1b z0\.d, p0, \[z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1b_scatter_u64, svuint64_t, svuint64_t, ++ svst1b_scatter_u64base_u64 (p0, z1, z0), ++ svst1b_scatter (p0, z1, z0)) ++ ++/* ++** st1b_scatter_x0_u64_offset: ++** st1b z0\.d, p0, \[x0, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1b_scatter_x0_u64_offset, svuint64_t, svuint64_t, ++ svst1b_scatter_u64base_offset_u64 (p0, z1, x0, z0), ++ svst1b_scatter_offset (p0, z1, x0, z0)) ++ ++/* ++** st1b_scatter_m1_u64_offset: ++** mov (x[0-9]+), #?-1 ++** st1b z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1b_scatter_m1_u64_offset, svuint64_t, svuint64_t, ++ svst1b_scatter_u64base_offset_u64 (p0, z1, -1, z0), ++ svst1b_scatter_offset (p0, z1, -1, z0)) ++ ++/* ++** st1b_scatter_0_u64_offset: ++** st1b z0\.d, p0, \[z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1b_scatter_0_u64_offset, svuint64_t, svuint64_t, ++ svst1b_scatter_u64base_offset_u64 (p0, z1, 0, z0), ++ svst1b_scatter_offset (p0, z1, 0, z0)) ++ ++/* ++** st1b_scatter_5_u64_offset: ++** st1b z0\.d, p0, \[z1\.d, #5\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1b_scatter_5_u64_offset, svuint64_t, svuint64_t, ++ svst1b_scatter_u64base_offset_u64 (p0, z1, 5, z0), ++ svst1b_scatter_offset (p0, z1, 5, z0)) ++ ++/* ++** st1b_scatter_31_u64_offset: ++** st1b z0\.d, p0, \[z1\.d, #31\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1b_scatter_31_u64_offset, svuint64_t, svuint64_t, ++ svst1b_scatter_u64base_offset_u64 (p0, z1, 31, z0), ++ svst1b_scatter_offset (p0, z1, 31, z0)) ++ ++/* ++** st1b_scatter_32_u64_offset: ++** mov (x[0-9]+), #?32 ++** st1b z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1b_scatter_32_u64_offset, svuint64_t, svuint64_t, ++ svst1b_scatter_u64base_offset_u64 (p0, z1, 32, z0), ++ svst1b_scatter_offset (p0, z1, 32, z0)) ++ ++/* ++** st1b_scatter_x0_u64_s64offset: ++** st1b z0\.d, p0, \[x0, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1b_scatter_x0_u64_s64offset, svuint64_t, uint8_t, svint64_t, ++ svst1b_scatter_s64offset_u64 (p0, x0, z1, z0), ++ svst1b_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1b_scatter_u64_s64offset: ++** st1b z0\.d, p0, \[x0, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1b_scatter_u64_s64offset, svuint64_t, uint8_t, svint64_t, ++ svst1b_scatter_s64offset_u64 (p0, x0, z1, z0), ++ svst1b_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1b_scatter_ext_u64_s64offset: ++** st1b z0\.d, p0, \[x0, z1\.d, sxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1b_scatter_ext_u64_s64offset, svuint64_t, uint8_t, svint64_t, ++ svst1b_scatter_s64offset_u64 (p0, x0, svextw_s64_x (p0, z1), z0), ++ svst1b_scatter_offset (p0, x0, svextw_x (p0, z1), z0)) ++ ++/* ++** st1b_scatter_x0_u64_u64offset: ++** st1b z0\.d, p0, \[x0, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1b_scatter_x0_u64_u64offset, svuint64_t, uint8_t, svuint64_t, ++ svst1b_scatter_u64offset_u64 (p0, x0, z1, z0), ++ svst1b_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1b_scatter_u64_u64offset: ++** st1b z0\.d, p0, \[x0, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1b_scatter_u64_u64offset, svuint64_t, uint8_t, svuint64_t, ++ svst1b_scatter_u64offset_u64 (p0, x0, z1, z0), ++ svst1b_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1b_scatter_ext_u64_u64offset: ++** st1b z0\.d, p0, \[x0, z1\.d, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1b_scatter_ext_u64_u64offset, svuint64_t, uint8_t, svuint64_t, ++ svst1b_scatter_u64offset_u64 (p0, x0, svextw_u64_x (p0, z1), z0), ++ svst1b_scatter_offset (p0, x0, svextw_x (p0, z1), z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_u16.c +new file mode 100644 +index 000000000..025a2212a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_u16.c +@@ -0,0 +1,162 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st1b_u16_base: ++** st1b z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1b_u16_base, svuint16_t, uint8_t, ++ svst1b_u16 (p0, x0, z0), ++ svst1b (p0, x0, z0)) ++ ++/* ++** st1b_u16_index: ++** st1b z0\.h, p0, \[x0, x1\] ++** ret ++*/ ++TEST_STORE (st1b_u16_index, svuint16_t, uint8_t, ++ svst1b_u16 (p0, x0 + x1, z0), ++ svst1b (p0, x0 + x1, z0)) ++ ++/* ++** st1b_u16_1: ++** st1b z0\.h, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1b_u16_1, svuint16_t, uint8_t, ++ svst1b_u16 (p0, x0 + svcnth (), z0), ++ svst1b (p0, x0 + svcnth (), z0)) ++ ++/* ++** st1b_u16_7: ++** st1b z0\.h, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (st1b_u16_7, svuint16_t, uint8_t, ++ svst1b_u16 (p0, x0 + svcnth () * 7, z0), ++ svst1b (p0, x0 + svcnth () * 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1b_u16_8: ++** incb x0, all, mul #4 ++** st1b z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1b_u16_8, svuint16_t, uint8_t, ++ svst1b_u16 (p0, x0 + svcnth () * 8, z0), ++ svst1b (p0, x0 + svcnth () * 8, z0)) ++ ++/* ++** st1b_u16_m1: ++** st1b z0\.h, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1b_u16_m1, svuint16_t, uint8_t, ++ svst1b_u16 (p0, x0 - svcnth (), z0), ++ svst1b (p0, x0 - svcnth (), z0)) ++ ++/* ++** st1b_u16_m8: ++** st1b z0\.h, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (st1b_u16_m8, svuint16_t, uint8_t, ++ svst1b_u16 (p0, x0 - svcnth () * 8, z0), ++ svst1b (p0, x0 - svcnth () * 8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1b_u16_m9: ++** dech x0, all, mul #9 ++** st1b z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1b_u16_m9, svuint16_t, uint8_t, ++ svst1b_u16 (p0, x0 - svcnth () * 9, z0), ++ svst1b (p0, x0 - svcnth () * 9, z0)) ++ ++/* ++** st1b_vnum_u16_0: ++** st1b z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1b_vnum_u16_0, svuint16_t, uint8_t, ++ svst1b_vnum_u16 (p0, x0, 0, z0), ++ svst1b_vnum (p0, x0, 0, z0)) ++ ++/* ++** st1b_vnum_u16_1: ++** st1b z0\.h, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1b_vnum_u16_1, svuint16_t, uint8_t, ++ svst1b_vnum_u16 (p0, x0, 1, z0), ++ svst1b_vnum (p0, x0, 1, z0)) ++ ++/* ++** st1b_vnum_u16_7: ++** st1b z0\.h, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (st1b_vnum_u16_7, svuint16_t, uint8_t, ++ svst1b_vnum_u16 (p0, x0, 7, z0), ++ svst1b_vnum (p0, x0, 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1b_vnum_u16_8: ++** incb x0, all, mul #4 ++** st1b z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1b_vnum_u16_8, svuint16_t, uint8_t, ++ svst1b_vnum_u16 (p0, x0, 8, z0), ++ svst1b_vnum (p0, x0, 8, z0)) ++ ++/* ++** st1b_vnum_u16_m1: ++** st1b z0\.h, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1b_vnum_u16_m1, svuint16_t, uint8_t, ++ svst1b_vnum_u16 (p0, x0, -1, z0), ++ svst1b_vnum (p0, x0, -1, z0)) ++ ++/* ++** st1b_vnum_u16_m8: ++** st1b z0\.h, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (st1b_vnum_u16_m8, svuint16_t, uint8_t, ++ svst1b_vnum_u16 (p0, x0, -8, z0), ++ svst1b_vnum (p0, x0, -8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1b_vnum_u16_m9: ++** dech x0, all, mul #9 ++** st1b z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1b_vnum_u16_m9, svuint16_t, uint8_t, ++ svst1b_vnum_u16 (p0, x0, -9, z0), ++ svst1b_vnum (p0, x0, -9, z0)) ++ ++/* ++** st1b_vnum_u16_x1: ++** cnth (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** st1b z0\.h, p0, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** st1b z0\.h, p0, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_STORE (st1b_vnum_u16_x1, svuint16_t, uint8_t, ++ svst1b_vnum_u16 (p0, x0, x1, z0), ++ svst1b_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_u32.c +new file mode 100644 +index 000000000..5833cb44b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_u32.c +@@ -0,0 +1,162 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st1b_u32_base: ++** st1b z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1b_u32_base, svuint32_t, uint8_t, ++ svst1b_u32 (p0, x0, z0), ++ svst1b (p0, x0, z0)) ++ ++/* ++** st1b_u32_index: ++** st1b z0\.s, p0, \[x0, x1\] ++** ret ++*/ ++TEST_STORE (st1b_u32_index, svuint32_t, uint8_t, ++ svst1b_u32 (p0, x0 + x1, z0), ++ svst1b (p0, x0 + x1, z0)) ++ ++/* ++** st1b_u32_1: ++** st1b z0\.s, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1b_u32_1, svuint32_t, uint8_t, ++ svst1b_u32 (p0, x0 + svcntw (), z0), ++ svst1b (p0, x0 + svcntw (), z0)) ++ ++/* ++** st1b_u32_7: ++** st1b z0\.s, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (st1b_u32_7, svuint32_t, uint8_t, ++ svst1b_u32 (p0, x0 + svcntw () * 7, z0), ++ svst1b (p0, x0 + svcntw () * 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1b_u32_8: ++** incb x0, all, mul #2 ++** st1b z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1b_u32_8, svuint32_t, uint8_t, ++ svst1b_u32 (p0, x0 + svcntw () * 8, z0), ++ svst1b (p0, x0 + svcntw () * 8, z0)) ++ ++/* ++** st1b_u32_m1: ++** st1b z0\.s, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1b_u32_m1, svuint32_t, uint8_t, ++ svst1b_u32 (p0, x0 - svcntw (), z0), ++ svst1b (p0, x0 - svcntw (), z0)) ++ ++/* ++** st1b_u32_m8: ++** st1b z0\.s, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (st1b_u32_m8, svuint32_t, uint8_t, ++ svst1b_u32 (p0, x0 - svcntw () * 8, z0), ++ svst1b (p0, x0 - svcntw () * 8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1b_u32_m9: ++** decw x0, all, mul #9 ++** st1b z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1b_u32_m9, svuint32_t, uint8_t, ++ svst1b_u32 (p0, x0 - svcntw () * 9, z0), ++ svst1b (p0, x0 - svcntw () * 9, z0)) ++ ++/* ++** st1b_vnum_u32_0: ++** st1b z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1b_vnum_u32_0, svuint32_t, uint8_t, ++ svst1b_vnum_u32 (p0, x0, 0, z0), ++ svst1b_vnum (p0, x0, 0, z0)) ++ ++/* ++** st1b_vnum_u32_1: ++** st1b z0\.s, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1b_vnum_u32_1, svuint32_t, uint8_t, ++ svst1b_vnum_u32 (p0, x0, 1, z0), ++ svst1b_vnum (p0, x0, 1, z0)) ++ ++/* ++** st1b_vnum_u32_7: ++** st1b z0\.s, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (st1b_vnum_u32_7, svuint32_t, uint8_t, ++ svst1b_vnum_u32 (p0, x0, 7, z0), ++ svst1b_vnum (p0, x0, 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1b_vnum_u32_8: ++** incb x0, all, mul #2 ++** st1b z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1b_vnum_u32_8, svuint32_t, uint8_t, ++ svst1b_vnum_u32 (p0, x0, 8, z0), ++ svst1b_vnum (p0, x0, 8, z0)) ++ ++/* ++** st1b_vnum_u32_m1: ++** st1b z0\.s, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1b_vnum_u32_m1, svuint32_t, uint8_t, ++ svst1b_vnum_u32 (p0, x0, -1, z0), ++ svst1b_vnum (p0, x0, -1, z0)) ++ ++/* ++** st1b_vnum_u32_m8: ++** st1b z0\.s, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (st1b_vnum_u32_m8, svuint32_t, uint8_t, ++ svst1b_vnum_u32 (p0, x0, -8, z0), ++ svst1b_vnum (p0, x0, -8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1b_vnum_u32_m9: ++** decw x0, all, mul #9 ++** st1b z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1b_vnum_u32_m9, svuint32_t, uint8_t, ++ svst1b_vnum_u32 (p0, x0, -9, z0), ++ svst1b_vnum (p0, x0, -9, z0)) ++ ++/* ++** st1b_vnum_u32_x1: ++** cntw (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** st1b z0\.s, p0, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** st1b z0\.s, p0, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_STORE (st1b_vnum_u32_x1, svuint32_t, uint8_t, ++ svst1b_vnum_u32 (p0, x0, x1, z0), ++ svst1b_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_u64.c +new file mode 100644 +index 000000000..e96f4c486 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_u64.c +@@ -0,0 +1,162 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st1b_u64_base: ++** st1b z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1b_u64_base, svuint64_t, uint8_t, ++ svst1b_u64 (p0, x0, z0), ++ svst1b (p0, x0, z0)) ++ ++/* ++** st1b_u64_index: ++** st1b z0\.d, p0, \[x0, x1\] ++** ret ++*/ ++TEST_STORE (st1b_u64_index, svuint64_t, uint8_t, ++ svst1b_u64 (p0, x0 + x1, z0), ++ svst1b (p0, x0 + x1, z0)) ++ ++/* ++** st1b_u64_1: ++** st1b z0\.d, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1b_u64_1, svuint64_t, uint8_t, ++ svst1b_u64 (p0, x0 + svcntd (), z0), ++ svst1b (p0, x0 + svcntd (), z0)) ++ ++/* ++** st1b_u64_7: ++** st1b z0\.d, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (st1b_u64_7, svuint64_t, uint8_t, ++ svst1b_u64 (p0, x0 + svcntd () * 7, z0), ++ svst1b (p0, x0 + svcntd () * 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1b_u64_8: ++** incb x0 ++** st1b z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1b_u64_8, svuint64_t, uint8_t, ++ svst1b_u64 (p0, x0 + svcntd () * 8, z0), ++ svst1b (p0, x0 + svcntd () * 8, z0)) ++ ++/* ++** st1b_u64_m1: ++** st1b z0\.d, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1b_u64_m1, svuint64_t, uint8_t, ++ svst1b_u64 (p0, x0 - svcntd (), z0), ++ svst1b (p0, x0 - svcntd (), z0)) ++ ++/* ++** st1b_u64_m8: ++** st1b z0\.d, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (st1b_u64_m8, svuint64_t, uint8_t, ++ svst1b_u64 (p0, x0 - svcntd () * 8, z0), ++ svst1b (p0, x0 - svcntd () * 8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1b_u64_m9: ++** decd x0, all, mul #9 ++** st1b z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1b_u64_m9, svuint64_t, uint8_t, ++ svst1b_u64 (p0, x0 - svcntd () * 9, z0), ++ svst1b (p0, x0 - svcntd () * 9, z0)) ++ ++/* ++** st1b_vnum_u64_0: ++** st1b z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1b_vnum_u64_0, svuint64_t, uint8_t, ++ svst1b_vnum_u64 (p0, x0, 0, z0), ++ svst1b_vnum (p0, x0, 0, z0)) ++ ++/* ++** st1b_vnum_u64_1: ++** st1b z0\.d, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1b_vnum_u64_1, svuint64_t, uint8_t, ++ svst1b_vnum_u64 (p0, x0, 1, z0), ++ svst1b_vnum (p0, x0, 1, z0)) ++ ++/* ++** st1b_vnum_u64_7: ++** st1b z0\.d, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (st1b_vnum_u64_7, svuint64_t, uint8_t, ++ svst1b_vnum_u64 (p0, x0, 7, z0), ++ svst1b_vnum (p0, x0, 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1b_vnum_u64_8: ++** incb x0 ++** st1b z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1b_vnum_u64_8, svuint64_t, uint8_t, ++ svst1b_vnum_u64 (p0, x0, 8, z0), ++ svst1b_vnum (p0, x0, 8, z0)) ++ ++/* ++** st1b_vnum_u64_m1: ++** st1b z0\.d, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1b_vnum_u64_m1, svuint64_t, uint8_t, ++ svst1b_vnum_u64 (p0, x0, -1, z0), ++ svst1b_vnum (p0, x0, -1, z0)) ++ ++/* ++** st1b_vnum_u64_m8: ++** st1b z0\.d, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (st1b_vnum_u64_m8, svuint64_t, uint8_t, ++ svst1b_vnum_u64 (p0, x0, -8, z0), ++ svst1b_vnum (p0, x0, -8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1b_vnum_u64_m9: ++** decd x0, all, mul #9 ++** st1b z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1b_vnum_u64_m9, svuint64_t, uint8_t, ++ svst1b_vnum_u64 (p0, x0, -9, z0), ++ svst1b_vnum (p0, x0, -9, z0)) ++ ++/* ++** st1b_vnum_u64_x1: ++** cntd (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** st1b z0\.d, p0, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** st1b z0\.d, p0, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_STORE (st1b_vnum_u64_x1, svuint64_t, uint8_t, ++ svst1b_vnum_u64 (p0, x0, x1, z0), ++ svst1b_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_s32.c +new file mode 100644 +index 000000000..3466e3293 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_s32.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st1h_s32_base: ++** st1h z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1h_s32_base, svint32_t, int16_t, ++ svst1h_s32 (p0, x0, z0), ++ svst1h (p0, x0, z0)) ++ ++/* ++** st1h_s32_index: ++** st1h z0\.s, p0, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_STORE (st1h_s32_index, svint32_t, int16_t, ++ svst1h_s32 (p0, x0 + x1, z0), ++ svst1h (p0, x0 + x1, z0)) ++ ++/* ++** st1h_s32_1: ++** st1h z0\.s, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1h_s32_1, svint32_t, int16_t, ++ svst1h_s32 (p0, x0 + svcntw (), z0), ++ svst1h (p0, x0 + svcntw (), z0)) ++ ++/* ++** st1h_s32_7: ++** st1h z0\.s, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (st1h_s32_7, svint32_t, int16_t, ++ svst1h_s32 (p0, x0 + svcntw () * 7, z0), ++ svst1h (p0, x0 + svcntw () * 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1h_s32_8: ++** incb x0, all, mul #4 ++** st1h z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1h_s32_8, svint32_t, int16_t, ++ svst1h_s32 (p0, x0 + svcntw () * 8, z0), ++ svst1h (p0, x0 + svcntw () * 8, z0)) ++ ++/* ++** st1h_s32_m1: ++** st1h z0\.s, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1h_s32_m1, svint32_t, int16_t, ++ svst1h_s32 (p0, x0 - svcntw (), z0), ++ svst1h (p0, x0 - svcntw (), z0)) ++ ++/* ++** st1h_s32_m8: ++** st1h z0\.s, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (st1h_s32_m8, svint32_t, int16_t, ++ svst1h_s32 (p0, x0 - svcntw () * 8, z0), ++ svst1h (p0, x0 - svcntw () * 8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1h_s32_m9: ++** dech x0, all, mul #9 ++** st1h z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1h_s32_m9, svint32_t, int16_t, ++ svst1h_s32 (p0, x0 - svcntw () * 9, z0), ++ svst1h (p0, x0 - svcntw () * 9, z0)) ++ ++/* ++** st1h_vnum_s32_0: ++** st1h z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1h_vnum_s32_0, svint32_t, int16_t, ++ svst1h_vnum_s32 (p0, x0, 0, z0), ++ svst1h_vnum (p0, x0, 0, z0)) ++ ++/* ++** st1h_vnum_s32_1: ++** st1h z0\.s, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1h_vnum_s32_1, svint32_t, int16_t, ++ svst1h_vnum_s32 (p0, x0, 1, z0), ++ svst1h_vnum (p0, x0, 1, z0)) ++ ++/* ++** st1h_vnum_s32_7: ++** st1h z0\.s, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (st1h_vnum_s32_7, svint32_t, int16_t, ++ svst1h_vnum_s32 (p0, x0, 7, z0), ++ svst1h_vnum (p0, x0, 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1h_vnum_s32_8: ++** incb x0, all, mul #4 ++** st1h z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1h_vnum_s32_8, svint32_t, int16_t, ++ svst1h_vnum_s32 (p0, x0, 8, z0), ++ svst1h_vnum (p0, x0, 8, z0)) ++ ++/* ++** st1h_vnum_s32_m1: ++** st1h z0\.s, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1h_vnum_s32_m1, svint32_t, int16_t, ++ svst1h_vnum_s32 (p0, x0, -1, z0), ++ svst1h_vnum (p0, x0, -1, z0)) ++ ++/* ++** st1h_vnum_s32_m8: ++** st1h z0\.s, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (st1h_vnum_s32_m8, svint32_t, int16_t, ++ svst1h_vnum_s32 (p0, x0, -8, z0), ++ svst1h_vnum (p0, x0, -8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1h_vnum_s32_m9: ++** dech x0, all, mul #9 ++** st1h z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1h_vnum_s32_m9, svint32_t, int16_t, ++ svst1h_vnum_s32 (p0, x0, -9, z0), ++ svst1h_vnum (p0, x0, -9, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** st1h_vnum_s32_x1: ++** cnth (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** st1h z0\.s, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (st1h_vnum_s32_x1, svint32_t, int16_t, ++ svst1h_vnum_s32 (p0, x0, x1, z0), ++ svst1h_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_s64.c +new file mode 100644 +index 000000000..c5df3b0c3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_s64.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st1h_s64_base: ++** st1h z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1h_s64_base, svint64_t, int16_t, ++ svst1h_s64 (p0, x0, z0), ++ svst1h (p0, x0, z0)) ++ ++/* ++** st1h_s64_index: ++** st1h z0\.d, p0, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_STORE (st1h_s64_index, svint64_t, int16_t, ++ svst1h_s64 (p0, x0 + x1, z0), ++ svst1h (p0, x0 + x1, z0)) ++ ++/* ++** st1h_s64_1: ++** st1h z0\.d, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1h_s64_1, svint64_t, int16_t, ++ svst1h_s64 (p0, x0 + svcntd (), z0), ++ svst1h (p0, x0 + svcntd (), z0)) ++ ++/* ++** st1h_s64_7: ++** st1h z0\.d, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (st1h_s64_7, svint64_t, int16_t, ++ svst1h_s64 (p0, x0 + svcntd () * 7, z0), ++ svst1h (p0, x0 + svcntd () * 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1h_s64_8: ++** incb x0, all, mul #2 ++** st1h z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1h_s64_8, svint64_t, int16_t, ++ svst1h_s64 (p0, x0 + svcntd () * 8, z0), ++ svst1h (p0, x0 + svcntd () * 8, z0)) ++ ++/* ++** st1h_s64_m1: ++** st1h z0\.d, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1h_s64_m1, svint64_t, int16_t, ++ svst1h_s64 (p0, x0 - svcntd (), z0), ++ svst1h (p0, x0 - svcntd (), z0)) ++ ++/* ++** st1h_s64_m8: ++** st1h z0\.d, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (st1h_s64_m8, svint64_t, int16_t, ++ svst1h_s64 (p0, x0 - svcntd () * 8, z0), ++ svst1h (p0, x0 - svcntd () * 8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1h_s64_m9: ++** decw x0, all, mul #9 ++** st1h z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1h_s64_m9, svint64_t, int16_t, ++ svst1h_s64 (p0, x0 - svcntd () * 9, z0), ++ svst1h (p0, x0 - svcntd () * 9, z0)) ++ ++/* ++** st1h_vnum_s64_0: ++** st1h z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1h_vnum_s64_0, svint64_t, int16_t, ++ svst1h_vnum_s64 (p0, x0, 0, z0), ++ svst1h_vnum (p0, x0, 0, z0)) ++ ++/* ++** st1h_vnum_s64_1: ++** st1h z0\.d, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1h_vnum_s64_1, svint64_t, int16_t, ++ svst1h_vnum_s64 (p0, x0, 1, z0), ++ svst1h_vnum (p0, x0, 1, z0)) ++ ++/* ++** st1h_vnum_s64_7: ++** st1h z0\.d, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (st1h_vnum_s64_7, svint64_t, int16_t, ++ svst1h_vnum_s64 (p0, x0, 7, z0), ++ svst1h_vnum (p0, x0, 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1h_vnum_s64_8: ++** incb x0, all, mul #2 ++** st1h z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1h_vnum_s64_8, svint64_t, int16_t, ++ svst1h_vnum_s64 (p0, x0, 8, z0), ++ svst1h_vnum (p0, x0, 8, z0)) ++ ++/* ++** st1h_vnum_s64_m1: ++** st1h z0\.d, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1h_vnum_s64_m1, svint64_t, int16_t, ++ svst1h_vnum_s64 (p0, x0, -1, z0), ++ svst1h_vnum (p0, x0, -1, z0)) ++ ++/* ++** st1h_vnum_s64_m8: ++** st1h z0\.d, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (st1h_vnum_s64_m8, svint64_t, int16_t, ++ svst1h_vnum_s64 (p0, x0, -8, z0), ++ svst1h_vnum (p0, x0, -8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1h_vnum_s64_m9: ++** decw x0, all, mul #9 ++** st1h z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1h_vnum_s64_m9, svint64_t, int16_t, ++ svst1h_vnum_s64 (p0, x0, -9, z0), ++ svst1h_vnum (p0, x0, -9, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** st1h_vnum_s64_x1: ++** cntw (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** st1h z0\.d, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (st1h_vnum_s64_x1, svint64_t, int16_t, ++ svst1h_vnum_s64 (p0, x0, x1, z0), ++ svst1h_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_s32.c +new file mode 100644 +index 000000000..2a23d41f3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_s32.c +@@ -0,0 +1,207 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st1h_scatter_s32: ++** st1h z0\.s, p0, \[z1\.s\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_s32, svint32_t, svuint32_t, ++ svst1h_scatter_u32base_s32 (p0, z1, z0), ++ svst1h_scatter (p0, z1, z0)) ++ ++/* ++** st1h_scatter_x0_s32_offset: ++** st1h z0\.s, p0, \[x0, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_x0_s32_offset, svint32_t, svuint32_t, ++ svst1h_scatter_u32base_offset_s32 (p0, z1, x0, z0), ++ svst1h_scatter_offset (p0, z1, x0, z0)) ++ ++/* ++** st1h_scatter_m2_s32_offset: ++** mov (x[0-9]+), #?-2 ++** st1h z0\.s, p0, \[\1, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_m2_s32_offset, svint32_t, svuint32_t, ++ svst1h_scatter_u32base_offset_s32 (p0, z1, -2, z0), ++ svst1h_scatter_offset (p0, z1, -2, z0)) ++ ++/* ++** st1h_scatter_0_s32_offset: ++** st1h z0\.s, p0, \[z1\.s\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_0_s32_offset, svint32_t, svuint32_t, ++ svst1h_scatter_u32base_offset_s32 (p0, z1, 0, z0), ++ svst1h_scatter_offset (p0, z1, 0, z0)) ++ ++/* ++** st1h_scatter_5_s32_offset: ++** mov (x[0-9]+), #?5 ++** st1h z0\.s, p0, \[\1, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_5_s32_offset, svint32_t, svuint32_t, ++ svst1h_scatter_u32base_offset_s32 (p0, z1, 5, z0), ++ svst1h_scatter_offset (p0, z1, 5, z0)) ++ ++/* ++** st1h_scatter_6_s32_offset: ++** st1h z0\.s, p0, \[z1\.s, #6\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_6_s32_offset, svint32_t, svuint32_t, ++ svst1h_scatter_u32base_offset_s32 (p0, z1, 6, z0), ++ svst1h_scatter_offset (p0, z1, 6, z0)) ++ ++/* ++** st1h_scatter_62_s32_offset: ++** st1h z0\.s, p0, \[z1\.s, #62\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_62_s32_offset, svint32_t, svuint32_t, ++ svst1h_scatter_u32base_offset_s32 (p0, z1, 62, z0), ++ svst1h_scatter_offset (p0, z1, 62, z0)) ++ ++/* ++** st1h_scatter_64_s32_offset: ++** mov (x[0-9]+), #?64 ++** st1h z0\.s, p0, \[\1, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_64_s32_offset, svint32_t, svuint32_t, ++ svst1h_scatter_u32base_offset_s32 (p0, z1, 64, z0), ++ svst1h_scatter_offset (p0, z1, 64, z0)) ++ ++/* ++** st1h_scatter_x0_s32_index: ++** lsl (x[0-9]+), x0, #?1 ++** st1h z0\.s, p0, \[\1, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_x0_s32_index, svint32_t, svuint32_t, ++ svst1h_scatter_u32base_index_s32 (p0, z1, x0, z0), ++ svst1h_scatter_index (p0, z1, x0, z0)) ++ ++/* ++** st1h_scatter_m1_s32_index: ++** mov (x[0-9]+), #?-2 ++** st1h z0\.s, p0, \[\1, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_m1_s32_index, svint32_t, svuint32_t, ++ svst1h_scatter_u32base_index_s32 (p0, z1, -1, z0), ++ svst1h_scatter_index (p0, z1, -1, z0)) ++ ++/* ++** st1h_scatter_0_s32_index: ++** st1h z0\.s, p0, \[z1\.s\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_0_s32_index, svint32_t, svuint32_t, ++ svst1h_scatter_u32base_index_s32 (p0, z1, 0, z0), ++ svst1h_scatter_index (p0, z1, 0, z0)) ++ ++/* ++** st1h_scatter_5_s32_index: ++** st1h z0\.s, p0, \[z1\.s, #10\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_5_s32_index, svint32_t, svuint32_t, ++ svst1h_scatter_u32base_index_s32 (p0, z1, 5, z0), ++ svst1h_scatter_index (p0, z1, 5, z0)) ++ ++/* ++** st1h_scatter_31_s32_index: ++** st1h z0\.s, p0, \[z1\.s, #62\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_31_s32_index, svint32_t, svuint32_t, ++ svst1h_scatter_u32base_index_s32 (p0, z1, 31, z0), ++ svst1h_scatter_index (p0, z1, 31, z0)) ++ ++/* ++** st1h_scatter_32_s32_index: ++** mov (x[0-9]+), #?64 ++** st1h z0\.s, p0, \[\1, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_32_s32_index, svint32_t, svuint32_t, ++ svst1h_scatter_u32base_index_s32 (p0, z1, 32, z0), ++ svst1h_scatter_index (p0, z1, 32, z0)) ++ ++/* ++** st1h_scatter_x0_s32_s32offset: ++** st1h z0\.s, p0, \[x0, z1\.s, sxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1h_scatter_x0_s32_s32offset, svint32_t, int16_t, svint32_t, ++ svst1h_scatter_s32offset_s32 (p0, x0, z1, z0), ++ svst1h_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1h_scatter_s32_s32offset: ++** st1h z0\.s, p0, \[x0, z1\.s, sxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1h_scatter_s32_s32offset, svint32_t, int16_t, svint32_t, ++ svst1h_scatter_s32offset_s32 (p0, x0, z1, z0), ++ svst1h_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1h_scatter_x0_s32_u32offset: ++** st1h z0\.s, p0, \[x0, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1h_scatter_x0_s32_u32offset, svint32_t, int16_t, svuint32_t, ++ svst1h_scatter_u32offset_s32 (p0, x0, z1, z0), ++ svst1h_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1h_scatter_s32_u32offset: ++** st1h z0\.s, p0, \[x0, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1h_scatter_s32_u32offset, svint32_t, int16_t, svuint32_t, ++ svst1h_scatter_u32offset_s32 (p0, x0, z1, z0), ++ svst1h_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1h_scatter_x0_s32_s32index: ++** st1h z0\.s, p0, \[x0, z1\.s, sxtw 1\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1h_scatter_x0_s32_s32index, svint32_t, int16_t, svint32_t, ++ svst1h_scatter_s32index_s32 (p0, x0, z1, z0), ++ svst1h_scatter_index (p0, x0, z1, z0)) ++ ++/* ++** st1h_scatter_s32_s32index: ++** st1h z0\.s, p0, \[x0, z1\.s, sxtw 1\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1h_scatter_s32_s32index, svint32_t, int16_t, svint32_t, ++ svst1h_scatter_s32index_s32 (p0, x0, z1, z0), ++ svst1h_scatter_index (p0, x0, z1, z0)) ++ ++/* ++** st1h_scatter_x0_s32_u32index: ++** st1h z0\.s, p0, \[x0, z1\.s, uxtw 1\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1h_scatter_x0_s32_u32index, svint32_t, int16_t, svuint32_t, ++ svst1h_scatter_u32index_s32 (p0, x0, z1, z0), ++ svst1h_scatter_index (p0, x0, z1, z0)) ++ ++/* ++** st1h_scatter_s32_u32index: ++** st1h z0\.s, p0, \[x0, z1\.s, uxtw 1\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1h_scatter_s32_u32index, svint32_t, int16_t, svuint32_t, ++ svst1h_scatter_u32index_s32 (p0, x0, z1, z0), ++ svst1h_scatter_index (p0, x0, z1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_s64.c +new file mode 100644 +index 000000000..6a1adb056 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_s64.c +@@ -0,0 +1,243 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st1h_scatter_s64: ++** st1h z0\.d, p0, \[z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_s64, svint64_t, svuint64_t, ++ svst1h_scatter_u64base_s64 (p0, z1, z0), ++ svst1h_scatter (p0, z1, z0)) ++ ++/* ++** st1h_scatter_x0_s64_offset: ++** st1h z0\.d, p0, \[x0, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_x0_s64_offset, svint64_t, svuint64_t, ++ svst1h_scatter_u64base_offset_s64 (p0, z1, x0, z0), ++ svst1h_scatter_offset (p0, z1, x0, z0)) ++ ++/* ++** st1h_scatter_m2_s64_offset: ++** mov (x[0-9]+), #?-2 ++** st1h z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_m2_s64_offset, svint64_t, svuint64_t, ++ svst1h_scatter_u64base_offset_s64 (p0, z1, -2, z0), ++ svst1h_scatter_offset (p0, z1, -2, z0)) ++ ++/* ++** st1h_scatter_0_s64_offset: ++** st1h z0\.d, p0, \[z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_0_s64_offset, svint64_t, svuint64_t, ++ svst1h_scatter_u64base_offset_s64 (p0, z1, 0, z0), ++ svst1h_scatter_offset (p0, z1, 0, z0)) ++ ++/* ++** st1h_scatter_5_s64_offset: ++** mov (x[0-9]+), #?5 ++** st1h z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_5_s64_offset, svint64_t, svuint64_t, ++ svst1h_scatter_u64base_offset_s64 (p0, z1, 5, z0), ++ svst1h_scatter_offset (p0, z1, 5, z0)) ++ ++/* ++** st1h_scatter_6_s64_offset: ++** st1h z0\.d, p0, \[z1\.d, #6\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_6_s64_offset, svint64_t, svuint64_t, ++ svst1h_scatter_u64base_offset_s64 (p0, z1, 6, z0), ++ svst1h_scatter_offset (p0, z1, 6, z0)) ++ ++/* ++** st1h_scatter_62_s64_offset: ++** st1h z0\.d, p0, \[z1\.d, #62\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_62_s64_offset, svint64_t, svuint64_t, ++ svst1h_scatter_u64base_offset_s64 (p0, z1, 62, z0), ++ svst1h_scatter_offset (p0, z1, 62, z0)) ++ ++/* ++** st1h_scatter_64_s64_offset: ++** mov (x[0-9]+), #?64 ++** st1h z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_64_s64_offset, svint64_t, svuint64_t, ++ svst1h_scatter_u64base_offset_s64 (p0, z1, 64, z0), ++ svst1h_scatter_offset (p0, z1, 64, z0)) ++ ++/* ++** st1h_scatter_x0_s64_index: ++** lsl (x[0-9]+), x0, #?1 ++** st1h z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_x0_s64_index, svint64_t, svuint64_t, ++ svst1h_scatter_u64base_index_s64 (p0, z1, x0, z0), ++ svst1h_scatter_index (p0, z1, x0, z0)) ++ ++/* ++** st1h_scatter_m1_s64_index: ++** mov (x[0-9]+), #?-2 ++** st1h z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_m1_s64_index, svint64_t, svuint64_t, ++ svst1h_scatter_u64base_index_s64 (p0, z1, -1, z0), ++ svst1h_scatter_index (p0, z1, -1, z0)) ++ ++/* ++** st1h_scatter_0_s64_index: ++** st1h z0\.d, p0, \[z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_0_s64_index, svint64_t, svuint64_t, ++ svst1h_scatter_u64base_index_s64 (p0, z1, 0, z0), ++ svst1h_scatter_index (p0, z1, 0, z0)) ++ ++/* ++** st1h_scatter_5_s64_index: ++** st1h z0\.d, p0, \[z1\.d, #10\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_5_s64_index, svint64_t, svuint64_t, ++ svst1h_scatter_u64base_index_s64 (p0, z1, 5, z0), ++ svst1h_scatter_index (p0, z1, 5, z0)) ++ ++/* ++** st1h_scatter_31_s64_index: ++** st1h z0\.d, p0, \[z1\.d, #62\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_31_s64_index, svint64_t, svuint64_t, ++ svst1h_scatter_u64base_index_s64 (p0, z1, 31, z0), ++ svst1h_scatter_index (p0, z1, 31, z0)) ++ ++/* ++** st1h_scatter_32_s64_index: ++** mov (x[0-9]+), #?64 ++** st1h z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_32_s64_index, svint64_t, svuint64_t, ++ svst1h_scatter_u64base_index_s64 (p0, z1, 32, z0), ++ svst1h_scatter_index (p0, z1, 32, z0)) ++ ++/* ++** st1h_scatter_x0_s64_s64offset: ++** st1h z0\.d, p0, \[x0, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1h_scatter_x0_s64_s64offset, svint64_t, int16_t, svint64_t, ++ svst1h_scatter_s64offset_s64 (p0, x0, z1, z0), ++ svst1h_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1h_scatter_s64_s64offset: ++** st1h z0\.d, p0, \[x0, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1h_scatter_s64_s64offset, svint64_t, int16_t, svint64_t, ++ svst1h_scatter_s64offset_s64 (p0, x0, z1, z0), ++ svst1h_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1h_scatter_ext_s64_s64offset: ++** st1h z0\.d, p0, \[x0, z1\.d, sxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1h_scatter_ext_s64_s64offset, svint64_t, int16_t, svint64_t, ++ svst1h_scatter_s64offset_s64 (p0, x0, svextw_s64_x (p0, z1), z0), ++ svst1h_scatter_offset (p0, x0, svextw_x (p0, z1), z0)) ++ ++/* ++** st1h_scatter_x0_s64_u64offset: ++** st1h z0\.d, p0, \[x0, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1h_scatter_x0_s64_u64offset, svint64_t, int16_t, svuint64_t, ++ svst1h_scatter_u64offset_s64 (p0, x0, z1, z0), ++ svst1h_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1h_scatter_s64_u64offset: ++** st1h z0\.d, p0, \[x0, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1h_scatter_s64_u64offset, svint64_t, int16_t, svuint64_t, ++ svst1h_scatter_u64offset_s64 (p0, x0, z1, z0), ++ svst1h_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1h_scatter_ext_s64_u64offset: ++** st1h z0\.d, p0, \[x0, z1\.d, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1h_scatter_ext_s64_u64offset, svint64_t, int16_t, svuint64_t, ++ svst1h_scatter_u64offset_s64 (p0, x0, svextw_u64_x (p0, z1), z0), ++ svst1h_scatter_offset (p0, x0, svextw_x (p0, z1), z0)) ++ ++/* ++** st1h_scatter_x0_s64_s64index: ++** st1h z0\.d, p0, \[x0, z1\.d, lsl 1\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1h_scatter_x0_s64_s64index, svint64_t, int16_t, svint64_t, ++ svst1h_scatter_s64index_s64 (p0, x0, z1, z0), ++ svst1h_scatter_index (p0, x0, z1, z0)) ++ ++/* ++** st1h_scatter_s64_s64index: ++** st1h z0\.d, p0, \[x0, z1\.d, lsl 1\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1h_scatter_s64_s64index, svint64_t, int16_t, svint64_t, ++ svst1h_scatter_s64index_s64 (p0, x0, z1, z0), ++ svst1h_scatter_index (p0, x0, z1, z0)) ++ ++/* ++** st1h_scatter_ext_s64_s64index: ++** st1h z0\.d, p0, \[x0, z1\.d, sxtw 1\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1h_scatter_ext_s64_s64index, svint64_t, int16_t, svint64_t, ++ svst1h_scatter_s64index_s64 (p0, x0, svextw_s64_x (p0, z1), z0), ++ svst1h_scatter_index (p0, x0, svextw_x (p0, z1), z0)) ++ ++/* ++** st1h_scatter_x0_s64_u64index: ++** st1h z0\.d, p0, \[x0, z1\.d, lsl 1\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1h_scatter_x0_s64_u64index, svint64_t, int16_t, svuint64_t, ++ svst1h_scatter_u64index_s64 (p0, x0, z1, z0), ++ svst1h_scatter_index (p0, x0, z1, z0)) ++ ++/* ++** st1h_scatter_s64_u64index: ++** st1h z0\.d, p0, \[x0, z1\.d, lsl 1\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1h_scatter_s64_u64index, svint64_t, int16_t, svuint64_t, ++ svst1h_scatter_u64index_s64 (p0, x0, z1, z0), ++ svst1h_scatter_index (p0, x0, z1, z0)) ++ ++/* ++** st1h_scatter_ext_s64_u64index: ++** st1h z0\.d, p0, \[x0, z1\.d, uxtw 1\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1h_scatter_ext_s64_u64index, svint64_t, int16_t, svuint64_t, ++ svst1h_scatter_u64index_s64 (p0, x0, svextw_u64_x (p0, z1), z0), ++ svst1h_scatter_index (p0, x0, svextw_x (p0, z1), z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_u32.c +new file mode 100644 +index 000000000..12197315d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_u32.c +@@ -0,0 +1,207 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st1h_scatter_u32: ++** st1h z0\.s, p0, \[z1\.s\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_u32, svuint32_t, svuint32_t, ++ svst1h_scatter_u32base_u32 (p0, z1, z0), ++ svst1h_scatter (p0, z1, z0)) ++ ++/* ++** st1h_scatter_x0_u32_offset: ++** st1h z0\.s, p0, \[x0, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_x0_u32_offset, svuint32_t, svuint32_t, ++ svst1h_scatter_u32base_offset_u32 (p0, z1, x0, z0), ++ svst1h_scatter_offset (p0, z1, x0, z0)) ++ ++/* ++** st1h_scatter_m2_u32_offset: ++** mov (x[0-9]+), #?-2 ++** st1h z0\.s, p0, \[\1, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_m2_u32_offset, svuint32_t, svuint32_t, ++ svst1h_scatter_u32base_offset_u32 (p0, z1, -2, z0), ++ svst1h_scatter_offset (p0, z1, -2, z0)) ++ ++/* ++** st1h_scatter_0_u32_offset: ++** st1h z0\.s, p0, \[z1\.s\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_0_u32_offset, svuint32_t, svuint32_t, ++ svst1h_scatter_u32base_offset_u32 (p0, z1, 0, z0), ++ svst1h_scatter_offset (p0, z1, 0, z0)) ++ ++/* ++** st1h_scatter_5_u32_offset: ++** mov (x[0-9]+), #?5 ++** st1h z0\.s, p0, \[\1, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_5_u32_offset, svuint32_t, svuint32_t, ++ svst1h_scatter_u32base_offset_u32 (p0, z1, 5, z0), ++ svst1h_scatter_offset (p0, z1, 5, z0)) ++ ++/* ++** st1h_scatter_6_u32_offset: ++** st1h z0\.s, p0, \[z1\.s, #6\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_6_u32_offset, svuint32_t, svuint32_t, ++ svst1h_scatter_u32base_offset_u32 (p0, z1, 6, z0), ++ svst1h_scatter_offset (p0, z1, 6, z0)) ++ ++/* ++** st1h_scatter_62_u32_offset: ++** st1h z0\.s, p0, \[z1\.s, #62\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_62_u32_offset, svuint32_t, svuint32_t, ++ svst1h_scatter_u32base_offset_u32 (p0, z1, 62, z0), ++ svst1h_scatter_offset (p0, z1, 62, z0)) ++ ++/* ++** st1h_scatter_64_u32_offset: ++** mov (x[0-9]+), #?64 ++** st1h z0\.s, p0, \[\1, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_64_u32_offset, svuint32_t, svuint32_t, ++ svst1h_scatter_u32base_offset_u32 (p0, z1, 64, z0), ++ svst1h_scatter_offset (p0, z1, 64, z0)) ++ ++/* ++** st1h_scatter_x0_u32_index: ++** lsl (x[0-9]+), x0, #?1 ++** st1h z0\.s, p0, \[\1, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_x0_u32_index, svuint32_t, svuint32_t, ++ svst1h_scatter_u32base_index_u32 (p0, z1, x0, z0), ++ svst1h_scatter_index (p0, z1, x0, z0)) ++ ++/* ++** st1h_scatter_m1_u32_index: ++** mov (x[0-9]+), #?-2 ++** st1h z0\.s, p0, \[\1, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_m1_u32_index, svuint32_t, svuint32_t, ++ svst1h_scatter_u32base_index_u32 (p0, z1, -1, z0), ++ svst1h_scatter_index (p0, z1, -1, z0)) ++ ++/* ++** st1h_scatter_0_u32_index: ++** st1h z0\.s, p0, \[z1\.s\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_0_u32_index, svuint32_t, svuint32_t, ++ svst1h_scatter_u32base_index_u32 (p0, z1, 0, z0), ++ svst1h_scatter_index (p0, z1, 0, z0)) ++ ++/* ++** st1h_scatter_5_u32_index: ++** st1h z0\.s, p0, \[z1\.s, #10\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_5_u32_index, svuint32_t, svuint32_t, ++ svst1h_scatter_u32base_index_u32 (p0, z1, 5, z0), ++ svst1h_scatter_index (p0, z1, 5, z0)) ++ ++/* ++** st1h_scatter_31_u32_index: ++** st1h z0\.s, p0, \[z1\.s, #62\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_31_u32_index, svuint32_t, svuint32_t, ++ svst1h_scatter_u32base_index_u32 (p0, z1, 31, z0), ++ svst1h_scatter_index (p0, z1, 31, z0)) ++ ++/* ++** st1h_scatter_32_u32_index: ++** mov (x[0-9]+), #?64 ++** st1h z0\.s, p0, \[\1, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_32_u32_index, svuint32_t, svuint32_t, ++ svst1h_scatter_u32base_index_u32 (p0, z1, 32, z0), ++ svst1h_scatter_index (p0, z1, 32, z0)) ++ ++/* ++** st1h_scatter_x0_u32_s32offset: ++** st1h z0\.s, p0, \[x0, z1\.s, sxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1h_scatter_x0_u32_s32offset, svuint32_t, uint16_t, svint32_t, ++ svst1h_scatter_s32offset_u32 (p0, x0, z1, z0), ++ svst1h_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1h_scatter_u32_s32offset: ++** st1h z0\.s, p0, \[x0, z1\.s, sxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1h_scatter_u32_s32offset, svuint32_t, uint16_t, svint32_t, ++ svst1h_scatter_s32offset_u32 (p0, x0, z1, z0), ++ svst1h_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1h_scatter_x0_u32_u32offset: ++** st1h z0\.s, p0, \[x0, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1h_scatter_x0_u32_u32offset, svuint32_t, uint16_t, svuint32_t, ++ svst1h_scatter_u32offset_u32 (p0, x0, z1, z0), ++ svst1h_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1h_scatter_u32_u32offset: ++** st1h z0\.s, p0, \[x0, z1\.s, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1h_scatter_u32_u32offset, svuint32_t, uint16_t, svuint32_t, ++ svst1h_scatter_u32offset_u32 (p0, x0, z1, z0), ++ svst1h_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1h_scatter_x0_u32_s32index: ++** st1h z0\.s, p0, \[x0, z1\.s, sxtw 1\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1h_scatter_x0_u32_s32index, svuint32_t, uint16_t, svint32_t, ++ svst1h_scatter_s32index_u32 (p0, x0, z1, z0), ++ svst1h_scatter_index (p0, x0, z1, z0)) ++ ++/* ++** st1h_scatter_u32_s32index: ++** st1h z0\.s, p0, \[x0, z1\.s, sxtw 1\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1h_scatter_u32_s32index, svuint32_t, uint16_t, svint32_t, ++ svst1h_scatter_s32index_u32 (p0, x0, z1, z0), ++ svst1h_scatter_index (p0, x0, z1, z0)) ++ ++/* ++** st1h_scatter_x0_u32_u32index: ++** st1h z0\.s, p0, \[x0, z1\.s, uxtw 1\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1h_scatter_x0_u32_u32index, svuint32_t, uint16_t, svuint32_t, ++ svst1h_scatter_u32index_u32 (p0, x0, z1, z0), ++ svst1h_scatter_index (p0, x0, z1, z0)) ++ ++/* ++** st1h_scatter_u32_u32index: ++** st1h z0\.s, p0, \[x0, z1\.s, uxtw 1\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1h_scatter_u32_u32index, svuint32_t, uint16_t, svuint32_t, ++ svst1h_scatter_u32index_u32 (p0, x0, z1, z0), ++ svst1h_scatter_index (p0, x0, z1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_u64.c +new file mode 100644 +index 000000000..7021ea68f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_u64.c +@@ -0,0 +1,243 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st1h_scatter_u64: ++** st1h z0\.d, p0, \[z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_u64, svuint64_t, svuint64_t, ++ svst1h_scatter_u64base_u64 (p0, z1, z0), ++ svst1h_scatter (p0, z1, z0)) ++ ++/* ++** st1h_scatter_x0_u64_offset: ++** st1h z0\.d, p0, \[x0, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_x0_u64_offset, svuint64_t, svuint64_t, ++ svst1h_scatter_u64base_offset_u64 (p0, z1, x0, z0), ++ svst1h_scatter_offset (p0, z1, x0, z0)) ++ ++/* ++** st1h_scatter_m2_u64_offset: ++** mov (x[0-9]+), #?-2 ++** st1h z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_m2_u64_offset, svuint64_t, svuint64_t, ++ svst1h_scatter_u64base_offset_u64 (p0, z1, -2, z0), ++ svst1h_scatter_offset (p0, z1, -2, z0)) ++ ++/* ++** st1h_scatter_0_u64_offset: ++** st1h z0\.d, p0, \[z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_0_u64_offset, svuint64_t, svuint64_t, ++ svst1h_scatter_u64base_offset_u64 (p0, z1, 0, z0), ++ svst1h_scatter_offset (p0, z1, 0, z0)) ++ ++/* ++** st1h_scatter_5_u64_offset: ++** mov (x[0-9]+), #?5 ++** st1h z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_5_u64_offset, svuint64_t, svuint64_t, ++ svst1h_scatter_u64base_offset_u64 (p0, z1, 5, z0), ++ svst1h_scatter_offset (p0, z1, 5, z0)) ++ ++/* ++** st1h_scatter_6_u64_offset: ++** st1h z0\.d, p0, \[z1\.d, #6\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_6_u64_offset, svuint64_t, svuint64_t, ++ svst1h_scatter_u64base_offset_u64 (p0, z1, 6, z0), ++ svst1h_scatter_offset (p0, z1, 6, z0)) ++ ++/* ++** st1h_scatter_62_u64_offset: ++** st1h z0\.d, p0, \[z1\.d, #62\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_62_u64_offset, svuint64_t, svuint64_t, ++ svst1h_scatter_u64base_offset_u64 (p0, z1, 62, z0), ++ svst1h_scatter_offset (p0, z1, 62, z0)) ++ ++/* ++** st1h_scatter_64_u64_offset: ++** mov (x[0-9]+), #?64 ++** st1h z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_64_u64_offset, svuint64_t, svuint64_t, ++ svst1h_scatter_u64base_offset_u64 (p0, z1, 64, z0), ++ svst1h_scatter_offset (p0, z1, 64, z0)) ++ ++/* ++** st1h_scatter_x0_u64_index: ++** lsl (x[0-9]+), x0, #?1 ++** st1h z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_x0_u64_index, svuint64_t, svuint64_t, ++ svst1h_scatter_u64base_index_u64 (p0, z1, x0, z0), ++ svst1h_scatter_index (p0, z1, x0, z0)) ++ ++/* ++** st1h_scatter_m1_u64_index: ++** mov (x[0-9]+), #?-2 ++** st1h z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_m1_u64_index, svuint64_t, svuint64_t, ++ svst1h_scatter_u64base_index_u64 (p0, z1, -1, z0), ++ svst1h_scatter_index (p0, z1, -1, z0)) ++ ++/* ++** st1h_scatter_0_u64_index: ++** st1h z0\.d, p0, \[z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_0_u64_index, svuint64_t, svuint64_t, ++ svst1h_scatter_u64base_index_u64 (p0, z1, 0, z0), ++ svst1h_scatter_index (p0, z1, 0, z0)) ++ ++/* ++** st1h_scatter_5_u64_index: ++** st1h z0\.d, p0, \[z1\.d, #10\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_5_u64_index, svuint64_t, svuint64_t, ++ svst1h_scatter_u64base_index_u64 (p0, z1, 5, z0), ++ svst1h_scatter_index (p0, z1, 5, z0)) ++ ++/* ++** st1h_scatter_31_u64_index: ++** st1h z0\.d, p0, \[z1\.d, #62\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_31_u64_index, svuint64_t, svuint64_t, ++ svst1h_scatter_u64base_index_u64 (p0, z1, 31, z0), ++ svst1h_scatter_index (p0, z1, 31, z0)) ++ ++/* ++** st1h_scatter_32_u64_index: ++** mov (x[0-9]+), #?64 ++** st1h z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1h_scatter_32_u64_index, svuint64_t, svuint64_t, ++ svst1h_scatter_u64base_index_u64 (p0, z1, 32, z0), ++ svst1h_scatter_index (p0, z1, 32, z0)) ++ ++/* ++** st1h_scatter_x0_u64_s64offset: ++** st1h z0\.d, p0, \[x0, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1h_scatter_x0_u64_s64offset, svuint64_t, uint16_t, svint64_t, ++ svst1h_scatter_s64offset_u64 (p0, x0, z1, z0), ++ svst1h_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1h_scatter_u64_s64offset: ++** st1h z0\.d, p0, \[x0, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1h_scatter_u64_s64offset, svuint64_t, uint16_t, svint64_t, ++ svst1h_scatter_s64offset_u64 (p0, x0, z1, z0), ++ svst1h_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1h_scatter_ext_u64_s64offset: ++** st1h z0\.d, p0, \[x0, z1\.d, sxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1h_scatter_ext_u64_s64offset, svuint64_t, uint16_t, svint64_t, ++ svst1h_scatter_s64offset_u64 (p0, x0, svextw_s64_x (p0, z1), z0), ++ svst1h_scatter_offset (p0, x0, svextw_x (p0, z1), z0)) ++ ++/* ++** st1h_scatter_x0_u64_u64offset: ++** st1h z0\.d, p0, \[x0, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1h_scatter_x0_u64_u64offset, svuint64_t, uint16_t, svuint64_t, ++ svst1h_scatter_u64offset_u64 (p0, x0, z1, z0), ++ svst1h_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1h_scatter_u64_u64offset: ++** st1h z0\.d, p0, \[x0, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1h_scatter_u64_u64offset, svuint64_t, uint16_t, svuint64_t, ++ svst1h_scatter_u64offset_u64 (p0, x0, z1, z0), ++ svst1h_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1h_scatter_ext_u64_u64offset: ++** st1h z0\.d, p0, \[x0, z1\.d, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1h_scatter_ext_u64_u64offset, svuint64_t, uint16_t, svuint64_t, ++ svst1h_scatter_u64offset_u64 (p0, x0, svextw_u64_x (p0, z1), z0), ++ svst1h_scatter_offset (p0, x0, svextw_x (p0, z1), z0)) ++ ++/* ++** st1h_scatter_x0_u64_s64index: ++** st1h z0\.d, p0, \[x0, z1\.d, lsl 1\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1h_scatter_x0_u64_s64index, svuint64_t, uint16_t, svint64_t, ++ svst1h_scatter_s64index_u64 (p0, x0, z1, z0), ++ svst1h_scatter_index (p0, x0, z1, z0)) ++ ++/* ++** st1h_scatter_u64_s64index: ++** st1h z0\.d, p0, \[x0, z1\.d, lsl 1\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1h_scatter_u64_s64index, svuint64_t, uint16_t, svint64_t, ++ svst1h_scatter_s64index_u64 (p0, x0, z1, z0), ++ svst1h_scatter_index (p0, x0, z1, z0)) ++ ++/* ++** st1h_scatter_ext_u64_s64index: ++** st1h z0\.d, p0, \[x0, z1\.d, sxtw 1\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1h_scatter_ext_u64_s64index, svuint64_t, uint16_t, svint64_t, ++ svst1h_scatter_s64index_u64 (p0, x0, svextw_s64_x (p0, z1), z0), ++ svst1h_scatter_index (p0, x0, svextw_x (p0, z1), z0)) ++ ++/* ++** st1h_scatter_x0_u64_u64index: ++** st1h z0\.d, p0, \[x0, z1\.d, lsl 1\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1h_scatter_x0_u64_u64index, svuint64_t, uint16_t, svuint64_t, ++ svst1h_scatter_u64index_u64 (p0, x0, z1, z0), ++ svst1h_scatter_index (p0, x0, z1, z0)) ++ ++/* ++** st1h_scatter_u64_u64index: ++** st1h z0\.d, p0, \[x0, z1\.d, lsl 1\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1h_scatter_u64_u64index, svuint64_t, uint16_t, svuint64_t, ++ svst1h_scatter_u64index_u64 (p0, x0, z1, z0), ++ svst1h_scatter_index (p0, x0, z1, z0)) ++ ++/* ++** st1h_scatter_ext_u64_u64index: ++** st1h z0\.d, p0, \[x0, z1\.d, uxtw 1\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1h_scatter_ext_u64_u64index, svuint64_t, uint16_t, svuint64_t, ++ svst1h_scatter_u64index_u64 (p0, x0, svextw_u64_x (p0, z1), z0), ++ svst1h_scatter_index (p0, x0, svextw_x (p0, z1), z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_u32.c +new file mode 100644 +index 000000000..49111043b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_u32.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st1h_u32_base: ++** st1h z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1h_u32_base, svuint32_t, uint16_t, ++ svst1h_u32 (p0, x0, z0), ++ svst1h (p0, x0, z0)) ++ ++/* ++** st1h_u32_index: ++** st1h z0\.s, p0, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_STORE (st1h_u32_index, svuint32_t, uint16_t, ++ svst1h_u32 (p0, x0 + x1, z0), ++ svst1h (p0, x0 + x1, z0)) ++ ++/* ++** st1h_u32_1: ++** st1h z0\.s, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1h_u32_1, svuint32_t, uint16_t, ++ svst1h_u32 (p0, x0 + svcntw (), z0), ++ svst1h (p0, x0 + svcntw (), z0)) ++ ++/* ++** st1h_u32_7: ++** st1h z0\.s, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (st1h_u32_7, svuint32_t, uint16_t, ++ svst1h_u32 (p0, x0 + svcntw () * 7, z0), ++ svst1h (p0, x0 + svcntw () * 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1h_u32_8: ++** incb x0, all, mul #4 ++** st1h z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1h_u32_8, svuint32_t, uint16_t, ++ svst1h_u32 (p0, x0 + svcntw () * 8, z0), ++ svst1h (p0, x0 + svcntw () * 8, z0)) ++ ++/* ++** st1h_u32_m1: ++** st1h z0\.s, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1h_u32_m1, svuint32_t, uint16_t, ++ svst1h_u32 (p0, x0 - svcntw (), z0), ++ svst1h (p0, x0 - svcntw (), z0)) ++ ++/* ++** st1h_u32_m8: ++** st1h z0\.s, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (st1h_u32_m8, svuint32_t, uint16_t, ++ svst1h_u32 (p0, x0 - svcntw () * 8, z0), ++ svst1h (p0, x0 - svcntw () * 8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1h_u32_m9: ++** dech x0, all, mul #9 ++** st1h z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1h_u32_m9, svuint32_t, uint16_t, ++ svst1h_u32 (p0, x0 - svcntw () * 9, z0), ++ svst1h (p0, x0 - svcntw () * 9, z0)) ++ ++/* ++** st1h_vnum_u32_0: ++** st1h z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1h_vnum_u32_0, svuint32_t, uint16_t, ++ svst1h_vnum_u32 (p0, x0, 0, z0), ++ svst1h_vnum (p0, x0, 0, z0)) ++ ++/* ++** st1h_vnum_u32_1: ++** st1h z0\.s, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1h_vnum_u32_1, svuint32_t, uint16_t, ++ svst1h_vnum_u32 (p0, x0, 1, z0), ++ svst1h_vnum (p0, x0, 1, z0)) ++ ++/* ++** st1h_vnum_u32_7: ++** st1h z0\.s, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (st1h_vnum_u32_7, svuint32_t, uint16_t, ++ svst1h_vnum_u32 (p0, x0, 7, z0), ++ svst1h_vnum (p0, x0, 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1h_vnum_u32_8: ++** incb x0, all, mul #4 ++** st1h z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1h_vnum_u32_8, svuint32_t, uint16_t, ++ svst1h_vnum_u32 (p0, x0, 8, z0), ++ svst1h_vnum (p0, x0, 8, z0)) ++ ++/* ++** st1h_vnum_u32_m1: ++** st1h z0\.s, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1h_vnum_u32_m1, svuint32_t, uint16_t, ++ svst1h_vnum_u32 (p0, x0, -1, z0), ++ svst1h_vnum (p0, x0, -1, z0)) ++ ++/* ++** st1h_vnum_u32_m8: ++** st1h z0\.s, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (st1h_vnum_u32_m8, svuint32_t, uint16_t, ++ svst1h_vnum_u32 (p0, x0, -8, z0), ++ svst1h_vnum (p0, x0, -8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1h_vnum_u32_m9: ++** dech x0, all, mul #9 ++** st1h z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1h_vnum_u32_m9, svuint32_t, uint16_t, ++ svst1h_vnum_u32 (p0, x0, -9, z0), ++ svst1h_vnum (p0, x0, -9, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** st1h_vnum_u32_x1: ++** cnth (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** st1h z0\.s, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (st1h_vnum_u32_x1, svuint32_t, uint16_t, ++ svst1h_vnum_u32 (p0, x0, x1, z0), ++ svst1h_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_u64.c +new file mode 100644 +index 000000000..448cadb49 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_u64.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st1h_u64_base: ++** st1h z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1h_u64_base, svuint64_t, uint16_t, ++ svst1h_u64 (p0, x0, z0), ++ svst1h (p0, x0, z0)) ++ ++/* ++** st1h_u64_index: ++** st1h z0\.d, p0, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_STORE (st1h_u64_index, svuint64_t, uint16_t, ++ svst1h_u64 (p0, x0 + x1, z0), ++ svst1h (p0, x0 + x1, z0)) ++ ++/* ++** st1h_u64_1: ++** st1h z0\.d, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1h_u64_1, svuint64_t, uint16_t, ++ svst1h_u64 (p0, x0 + svcntd (), z0), ++ svst1h (p0, x0 + svcntd (), z0)) ++ ++/* ++** st1h_u64_7: ++** st1h z0\.d, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (st1h_u64_7, svuint64_t, uint16_t, ++ svst1h_u64 (p0, x0 + svcntd () * 7, z0), ++ svst1h (p0, x0 + svcntd () * 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1h_u64_8: ++** incb x0, all, mul #2 ++** st1h z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1h_u64_8, svuint64_t, uint16_t, ++ svst1h_u64 (p0, x0 + svcntd () * 8, z0), ++ svst1h (p0, x0 + svcntd () * 8, z0)) ++ ++/* ++** st1h_u64_m1: ++** st1h z0\.d, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1h_u64_m1, svuint64_t, uint16_t, ++ svst1h_u64 (p0, x0 - svcntd (), z0), ++ svst1h (p0, x0 - svcntd (), z0)) ++ ++/* ++** st1h_u64_m8: ++** st1h z0\.d, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (st1h_u64_m8, svuint64_t, uint16_t, ++ svst1h_u64 (p0, x0 - svcntd () * 8, z0), ++ svst1h (p0, x0 - svcntd () * 8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1h_u64_m9: ++** decw x0, all, mul #9 ++** st1h z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1h_u64_m9, svuint64_t, uint16_t, ++ svst1h_u64 (p0, x0 - svcntd () * 9, z0), ++ svst1h (p0, x0 - svcntd () * 9, z0)) ++ ++/* ++** st1h_vnum_u64_0: ++** st1h z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1h_vnum_u64_0, svuint64_t, uint16_t, ++ svst1h_vnum_u64 (p0, x0, 0, z0), ++ svst1h_vnum (p0, x0, 0, z0)) ++ ++/* ++** st1h_vnum_u64_1: ++** st1h z0\.d, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1h_vnum_u64_1, svuint64_t, uint16_t, ++ svst1h_vnum_u64 (p0, x0, 1, z0), ++ svst1h_vnum (p0, x0, 1, z0)) ++ ++/* ++** st1h_vnum_u64_7: ++** st1h z0\.d, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (st1h_vnum_u64_7, svuint64_t, uint16_t, ++ svst1h_vnum_u64 (p0, x0, 7, z0), ++ svst1h_vnum (p0, x0, 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1h_vnum_u64_8: ++** incb x0, all, mul #2 ++** st1h z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1h_vnum_u64_8, svuint64_t, uint16_t, ++ svst1h_vnum_u64 (p0, x0, 8, z0), ++ svst1h_vnum (p0, x0, 8, z0)) ++ ++/* ++** st1h_vnum_u64_m1: ++** st1h z0\.d, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1h_vnum_u64_m1, svuint64_t, uint16_t, ++ svst1h_vnum_u64 (p0, x0, -1, z0), ++ svst1h_vnum (p0, x0, -1, z0)) ++ ++/* ++** st1h_vnum_u64_m8: ++** st1h z0\.d, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (st1h_vnum_u64_m8, svuint64_t, uint16_t, ++ svst1h_vnum_u64 (p0, x0, -8, z0), ++ svst1h_vnum (p0, x0, -8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1h_vnum_u64_m9: ++** decw x0, all, mul #9 ++** st1h z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1h_vnum_u64_m9, svuint64_t, uint16_t, ++ svst1h_vnum_u64 (p0, x0, -9, z0), ++ svst1h_vnum (p0, x0, -9, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** st1h_vnum_u64_x1: ++** cntw (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** st1h z0\.d, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (st1h_vnum_u64_x1, svuint64_t, uint16_t, ++ svst1h_vnum_u64 (p0, x0, x1, z0), ++ svst1h_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1w_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1w_s64.c +new file mode 100644 +index 000000000..0893ce926 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1w_s64.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st1w_s64_base: ++** st1w z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1w_s64_base, svint64_t, int32_t, ++ svst1w_s64 (p0, x0, z0), ++ svst1w (p0, x0, z0)) ++ ++/* ++** st1w_s64_index: ++** st1w z0\.d, p0, \[x0, x1, lsl 2\] ++** ret ++*/ ++TEST_STORE (st1w_s64_index, svint64_t, int32_t, ++ svst1w_s64 (p0, x0 + x1, z0), ++ svst1w (p0, x0 + x1, z0)) ++ ++/* ++** st1w_s64_1: ++** st1w z0\.d, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1w_s64_1, svint64_t, int32_t, ++ svst1w_s64 (p0, x0 + svcntd (), z0), ++ svst1w (p0, x0 + svcntd (), z0)) ++ ++/* ++** st1w_s64_7: ++** st1w z0\.d, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (st1w_s64_7, svint64_t, int32_t, ++ svst1w_s64 (p0, x0 + svcntd () * 7, z0), ++ svst1w (p0, x0 + svcntd () * 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1w_s64_8: ++** incb x0, all, mul #4 ++** st1w z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1w_s64_8, svint64_t, int32_t, ++ svst1w_s64 (p0, x0 + svcntd () * 8, z0), ++ svst1w (p0, x0 + svcntd () * 8, z0)) ++ ++/* ++** st1w_s64_m1: ++** st1w z0\.d, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1w_s64_m1, svint64_t, int32_t, ++ svst1w_s64 (p0, x0 - svcntd (), z0), ++ svst1w (p0, x0 - svcntd (), z0)) ++ ++/* ++** st1w_s64_m8: ++** st1w z0\.d, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (st1w_s64_m8, svint64_t, int32_t, ++ svst1w_s64 (p0, x0 - svcntd () * 8, z0), ++ svst1w (p0, x0 - svcntd () * 8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1w_s64_m9: ++** dech x0, all, mul #9 ++** st1w z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1w_s64_m9, svint64_t, int32_t, ++ svst1w_s64 (p0, x0 - svcntd () * 9, z0), ++ svst1w (p0, x0 - svcntd () * 9, z0)) ++ ++/* ++** st1w_vnum_s64_0: ++** st1w z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1w_vnum_s64_0, svint64_t, int32_t, ++ svst1w_vnum_s64 (p0, x0, 0, z0), ++ svst1w_vnum (p0, x0, 0, z0)) ++ ++/* ++** st1w_vnum_s64_1: ++** st1w z0\.d, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1w_vnum_s64_1, svint64_t, int32_t, ++ svst1w_vnum_s64 (p0, x0, 1, z0), ++ svst1w_vnum (p0, x0, 1, z0)) ++ ++/* ++** st1w_vnum_s64_7: ++** st1w z0\.d, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (st1w_vnum_s64_7, svint64_t, int32_t, ++ svst1w_vnum_s64 (p0, x0, 7, z0), ++ svst1w_vnum (p0, x0, 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1w_vnum_s64_8: ++** incb x0, all, mul #4 ++** st1w z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1w_vnum_s64_8, svint64_t, int32_t, ++ svst1w_vnum_s64 (p0, x0, 8, z0), ++ svst1w_vnum (p0, x0, 8, z0)) ++ ++/* ++** st1w_vnum_s64_m1: ++** st1w z0\.d, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1w_vnum_s64_m1, svint64_t, int32_t, ++ svst1w_vnum_s64 (p0, x0, -1, z0), ++ svst1w_vnum (p0, x0, -1, z0)) ++ ++/* ++** st1w_vnum_s64_m8: ++** st1w z0\.d, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (st1w_vnum_s64_m8, svint64_t, int32_t, ++ svst1w_vnum_s64 (p0, x0, -8, z0), ++ svst1w_vnum (p0, x0, -8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1w_vnum_s64_m9: ++** dech x0, all, mul #9 ++** st1w z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1w_vnum_s64_m9, svint64_t, int32_t, ++ svst1w_vnum_s64 (p0, x0, -9, z0), ++ svst1w_vnum (p0, x0, -9, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** st1w_vnum_s64_x1: ++** cnth (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** st1w z0\.d, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (st1w_vnum_s64_x1, svint64_t, int32_t, ++ svst1w_vnum_s64 (p0, x0, x1, z0), ++ svst1w_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1w_scatter_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1w_scatter_s64.c +new file mode 100644 +index 000000000..2363f592b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1w_scatter_s64.c +@@ -0,0 +1,263 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st1w_scatter_s64: ++** st1w z0\.d, p0, \[z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1w_scatter_s64, svint64_t, svuint64_t, ++ svst1w_scatter_u64base_s64 (p0, z1, z0), ++ svst1w_scatter (p0, z1, z0)) ++ ++/* ++** st1w_scatter_x0_s64_offset: ++** st1w z0\.d, p0, \[x0, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1w_scatter_x0_s64_offset, svint64_t, svuint64_t, ++ svst1w_scatter_u64base_offset_s64 (p0, z1, x0, z0), ++ svst1w_scatter_offset (p0, z1, x0, z0)) ++ ++/* ++** st1w_scatter_m4_s64_offset: ++** mov (x[0-9]+), #?-4 ++** st1w z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1w_scatter_m4_s64_offset, svint64_t, svuint64_t, ++ svst1w_scatter_u64base_offset_s64 (p0, z1, -4, z0), ++ svst1w_scatter_offset (p0, z1, -4, z0)) ++ ++/* ++** st1w_scatter_0_s64_offset: ++** st1w z0\.d, p0, \[z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1w_scatter_0_s64_offset, svint64_t, svuint64_t, ++ svst1w_scatter_u64base_offset_s64 (p0, z1, 0, z0), ++ svst1w_scatter_offset (p0, z1, 0, z0)) ++ ++/* ++** st1w_scatter_5_s64_offset: ++** mov (x[0-9]+), #?5 ++** st1w z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1w_scatter_5_s64_offset, svint64_t, svuint64_t, ++ svst1w_scatter_u64base_offset_s64 (p0, z1, 5, z0), ++ svst1w_scatter_offset (p0, z1, 5, z0)) ++ ++/* ++** st1w_scatter_6_s64_offset: ++** mov (x[0-9]+), #?6 ++** st1w z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1w_scatter_6_s64_offset, svint64_t, svuint64_t, ++ svst1w_scatter_u64base_offset_s64 (p0, z1, 6, z0), ++ svst1w_scatter_offset (p0, z1, 6, z0)) ++ ++/* ++** st1w_scatter_7_s64_offset: ++** mov (x[0-9]+), #?7 ++** st1w z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1w_scatter_7_s64_offset, svint64_t, svuint64_t, ++ svst1w_scatter_u64base_offset_s64 (p0, z1, 7, z0), ++ svst1w_scatter_offset (p0, z1, 7, z0)) ++ ++/* ++** st1w_scatter_8_s64_offset: ++** st1w z0\.d, p0, \[z1\.d, #8\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1w_scatter_8_s64_offset, svint64_t, svuint64_t, ++ svst1w_scatter_u64base_offset_s64 (p0, z1, 8, z0), ++ svst1w_scatter_offset (p0, z1, 8, z0)) ++ ++/* ++** st1w_scatter_124_s64_offset: ++** st1w z0\.d, p0, \[z1\.d, #124\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1w_scatter_124_s64_offset, svint64_t, svuint64_t, ++ svst1w_scatter_u64base_offset_s64 (p0, z1, 124, z0), ++ svst1w_scatter_offset (p0, z1, 124, z0)) ++ ++/* ++** st1w_scatter_128_s64_offset: ++** mov (x[0-9]+), #?128 ++** st1w z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1w_scatter_128_s64_offset, svint64_t, svuint64_t, ++ svst1w_scatter_u64base_offset_s64 (p0, z1, 128, z0), ++ svst1w_scatter_offset (p0, z1, 128, z0)) ++ ++/* ++** st1w_scatter_x0_s64_index: ++** lsl (x[0-9]+), x0, #?2 ++** st1w z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1w_scatter_x0_s64_index, svint64_t, svuint64_t, ++ svst1w_scatter_u64base_index_s64 (p0, z1, x0, z0), ++ svst1w_scatter_index (p0, z1, x0, z0)) ++ ++/* ++** st1w_scatter_m1_s64_index: ++** mov (x[0-9]+), #?-4 ++** st1w z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1w_scatter_m1_s64_index, svint64_t, svuint64_t, ++ svst1w_scatter_u64base_index_s64 (p0, z1, -1, z0), ++ svst1w_scatter_index (p0, z1, -1, z0)) ++ ++/* ++** st1w_scatter_0_s64_index: ++** st1w z0\.d, p0, \[z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1w_scatter_0_s64_index, svint64_t, svuint64_t, ++ svst1w_scatter_u64base_index_s64 (p0, z1, 0, z0), ++ svst1w_scatter_index (p0, z1, 0, z0)) ++ ++/* ++** st1w_scatter_5_s64_index: ++** st1w z0\.d, p0, \[z1\.d, #20\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1w_scatter_5_s64_index, svint64_t, svuint64_t, ++ svst1w_scatter_u64base_index_s64 (p0, z1, 5, z0), ++ svst1w_scatter_index (p0, z1, 5, z0)) ++ ++/* ++** st1w_scatter_31_s64_index: ++** st1w z0\.d, p0, \[z1\.d, #124\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1w_scatter_31_s64_index, svint64_t, svuint64_t, ++ svst1w_scatter_u64base_index_s64 (p0, z1, 31, z0), ++ svst1w_scatter_index (p0, z1, 31, z0)) ++ ++/* ++** st1w_scatter_32_s64_index: ++** mov (x[0-9]+), #?128 ++** st1w z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1w_scatter_32_s64_index, svint64_t, svuint64_t, ++ svst1w_scatter_u64base_index_s64 (p0, z1, 32, z0), ++ svst1w_scatter_index (p0, z1, 32, z0)) ++ ++/* ++** st1w_scatter_x0_s64_s64offset: ++** st1w z0\.d, p0, \[x0, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1w_scatter_x0_s64_s64offset, svint64_t, int32_t, svint64_t, ++ svst1w_scatter_s64offset_s64 (p0, x0, z1, z0), ++ svst1w_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1w_scatter_s64_s64offset: ++** st1w z0\.d, p0, \[x0, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1w_scatter_s64_s64offset, svint64_t, int32_t, svint64_t, ++ svst1w_scatter_s64offset_s64 (p0, x0, z1, z0), ++ svst1w_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1w_scatter_ext_s64_s64offset: ++** st1w z0\.d, p0, \[x0, z1\.d, sxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1w_scatter_ext_s64_s64offset, svint64_t, int32_t, svint64_t, ++ svst1w_scatter_s64offset_s64 (p0, x0, svextw_s64_x (p0, z1), z0), ++ svst1w_scatter_offset (p0, x0, svextw_x (p0, z1), z0)) ++ ++/* ++** st1w_scatter_x0_s64_u64offset: ++** st1w z0\.d, p0, \[x0, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1w_scatter_x0_s64_u64offset, svint64_t, int32_t, svuint64_t, ++ svst1w_scatter_u64offset_s64 (p0, x0, z1, z0), ++ svst1w_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1w_scatter_s64_u64offset: ++** st1w z0\.d, p0, \[x0, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1w_scatter_s64_u64offset, svint64_t, int32_t, svuint64_t, ++ svst1w_scatter_u64offset_s64 (p0, x0, z1, z0), ++ svst1w_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1w_scatter_ext_s64_u64offset: ++** st1w z0\.d, p0, \[x0, z1\.d, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1w_scatter_ext_s64_u64offset, svint64_t, int32_t, svuint64_t, ++ svst1w_scatter_u64offset_s64 (p0, x0, svextw_u64_x (p0, z1), z0), ++ svst1w_scatter_offset (p0, x0, svextw_x (p0, z1), z0)) ++ ++/* ++** st1w_scatter_x0_s64_s64index: ++** st1w z0\.d, p0, \[x0, z1\.d, lsl 2\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1w_scatter_x0_s64_s64index, svint64_t, int32_t, svint64_t, ++ svst1w_scatter_s64index_s64 (p0, x0, z1, z0), ++ svst1w_scatter_index (p0, x0, z1, z0)) ++ ++/* ++** st1w_scatter_s64_s64index: ++** st1w z0\.d, p0, \[x0, z1\.d, lsl 2\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1w_scatter_s64_s64index, svint64_t, int32_t, svint64_t, ++ svst1w_scatter_s64index_s64 (p0, x0, z1, z0), ++ svst1w_scatter_index (p0, x0, z1, z0)) ++ ++/* ++** st1w_scatter_ext_s64_s64index: ++** st1w z0\.d, p0, \[x0, z1\.d, sxtw 2\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1w_scatter_ext_s64_s64index, svint64_t, int32_t, svint64_t, ++ svst1w_scatter_s64index_s64 (p0, x0, svextw_s64_x (p0, z1), z0), ++ svst1w_scatter_index (p0, x0, svextw_x (p0, z1), z0)) ++ ++/* ++** st1w_scatter_x0_s64_u64index: ++** st1w z0\.d, p0, \[x0, z1\.d, lsl 2\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1w_scatter_x0_s64_u64index, svint64_t, int32_t, svuint64_t, ++ svst1w_scatter_u64index_s64 (p0, x0, z1, z0), ++ svst1w_scatter_index (p0, x0, z1, z0)) ++ ++/* ++** st1w_scatter_s64_u64index: ++** st1w z0\.d, p0, \[x0, z1\.d, lsl 2\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1w_scatter_s64_u64index, svint64_t, int32_t, svuint64_t, ++ svst1w_scatter_u64index_s64 (p0, x0, z1, z0), ++ svst1w_scatter_index (p0, x0, z1, z0)) ++ ++/* ++** st1w_scatter_ext_s64_u64index: ++** st1w z0\.d, p0, \[x0, z1\.d, uxtw 2\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1w_scatter_ext_s64_u64index, svint64_t, int32_t, svuint64_t, ++ svst1w_scatter_u64index_s64 (p0, x0, svextw_u64_x (p0, z1), z0), ++ svst1w_scatter_index (p0, x0, svextw_x (p0, z1), z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1w_scatter_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1w_scatter_u64.c +new file mode 100644 +index 000000000..767c009b4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1w_scatter_u64.c +@@ -0,0 +1,263 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st1w_scatter_u64: ++** st1w z0\.d, p0, \[z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1w_scatter_u64, svuint64_t, svuint64_t, ++ svst1w_scatter_u64base_u64 (p0, z1, z0), ++ svst1w_scatter (p0, z1, z0)) ++ ++/* ++** st1w_scatter_x0_u64_offset: ++** st1w z0\.d, p0, \[x0, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1w_scatter_x0_u64_offset, svuint64_t, svuint64_t, ++ svst1w_scatter_u64base_offset_u64 (p0, z1, x0, z0), ++ svst1w_scatter_offset (p0, z1, x0, z0)) ++ ++/* ++** st1w_scatter_m4_u64_offset: ++** mov (x[0-9]+), #?-4 ++** st1w z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1w_scatter_m4_u64_offset, svuint64_t, svuint64_t, ++ svst1w_scatter_u64base_offset_u64 (p0, z1, -4, z0), ++ svst1w_scatter_offset (p0, z1, -4, z0)) ++ ++/* ++** st1w_scatter_0_u64_offset: ++** st1w z0\.d, p0, \[z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1w_scatter_0_u64_offset, svuint64_t, svuint64_t, ++ svst1w_scatter_u64base_offset_u64 (p0, z1, 0, z0), ++ svst1w_scatter_offset (p0, z1, 0, z0)) ++ ++/* ++** st1w_scatter_5_u64_offset: ++** mov (x[0-9]+), #?5 ++** st1w z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1w_scatter_5_u64_offset, svuint64_t, svuint64_t, ++ svst1w_scatter_u64base_offset_u64 (p0, z1, 5, z0), ++ svst1w_scatter_offset (p0, z1, 5, z0)) ++ ++/* ++** st1w_scatter_6_u64_offset: ++** mov (x[0-9]+), #?6 ++** st1w z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1w_scatter_6_u64_offset, svuint64_t, svuint64_t, ++ svst1w_scatter_u64base_offset_u64 (p0, z1, 6, z0), ++ svst1w_scatter_offset (p0, z1, 6, z0)) ++ ++/* ++** st1w_scatter_7_u64_offset: ++** mov (x[0-9]+), #?7 ++** st1w z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1w_scatter_7_u64_offset, svuint64_t, svuint64_t, ++ svst1w_scatter_u64base_offset_u64 (p0, z1, 7, z0), ++ svst1w_scatter_offset (p0, z1, 7, z0)) ++ ++/* ++** st1w_scatter_8_u64_offset: ++** st1w z0\.d, p0, \[z1\.d, #8\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1w_scatter_8_u64_offset, svuint64_t, svuint64_t, ++ svst1w_scatter_u64base_offset_u64 (p0, z1, 8, z0), ++ svst1w_scatter_offset (p0, z1, 8, z0)) ++ ++/* ++** st1w_scatter_124_u64_offset: ++** st1w z0\.d, p0, \[z1\.d, #124\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1w_scatter_124_u64_offset, svuint64_t, svuint64_t, ++ svst1w_scatter_u64base_offset_u64 (p0, z1, 124, z0), ++ svst1w_scatter_offset (p0, z1, 124, z0)) ++ ++/* ++** st1w_scatter_128_u64_offset: ++** mov (x[0-9]+), #?128 ++** st1w z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1w_scatter_128_u64_offset, svuint64_t, svuint64_t, ++ svst1w_scatter_u64base_offset_u64 (p0, z1, 128, z0), ++ svst1w_scatter_offset (p0, z1, 128, z0)) ++ ++/* ++** st1w_scatter_x0_u64_index: ++** lsl (x[0-9]+), x0, #?2 ++** st1w z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1w_scatter_x0_u64_index, svuint64_t, svuint64_t, ++ svst1w_scatter_u64base_index_u64 (p0, z1, x0, z0), ++ svst1w_scatter_index (p0, z1, x0, z0)) ++ ++/* ++** st1w_scatter_m1_u64_index: ++** mov (x[0-9]+), #?-4 ++** st1w z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1w_scatter_m1_u64_index, svuint64_t, svuint64_t, ++ svst1w_scatter_u64base_index_u64 (p0, z1, -1, z0), ++ svst1w_scatter_index (p0, z1, -1, z0)) ++ ++/* ++** st1w_scatter_0_u64_index: ++** st1w z0\.d, p0, \[z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1w_scatter_0_u64_index, svuint64_t, svuint64_t, ++ svst1w_scatter_u64base_index_u64 (p0, z1, 0, z0), ++ svst1w_scatter_index (p0, z1, 0, z0)) ++ ++/* ++** st1w_scatter_5_u64_index: ++** st1w z0\.d, p0, \[z1\.d, #20\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1w_scatter_5_u64_index, svuint64_t, svuint64_t, ++ svst1w_scatter_u64base_index_u64 (p0, z1, 5, z0), ++ svst1w_scatter_index (p0, z1, 5, z0)) ++ ++/* ++** st1w_scatter_31_u64_index: ++** st1w z0\.d, p0, \[z1\.d, #124\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1w_scatter_31_u64_index, svuint64_t, svuint64_t, ++ svst1w_scatter_u64base_index_u64 (p0, z1, 31, z0), ++ svst1w_scatter_index (p0, z1, 31, z0)) ++ ++/* ++** st1w_scatter_32_u64_index: ++** mov (x[0-9]+), #?128 ++** st1w z0\.d, p0, \[\1, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_ZS (st1w_scatter_32_u64_index, svuint64_t, svuint64_t, ++ svst1w_scatter_u64base_index_u64 (p0, z1, 32, z0), ++ svst1w_scatter_index (p0, z1, 32, z0)) ++ ++/* ++** st1w_scatter_x0_u64_s64offset: ++** st1w z0\.d, p0, \[x0, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1w_scatter_x0_u64_s64offset, svuint64_t, uint32_t, svint64_t, ++ svst1w_scatter_s64offset_u64 (p0, x0, z1, z0), ++ svst1w_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1w_scatter_u64_s64offset: ++** st1w z0\.d, p0, \[x0, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1w_scatter_u64_s64offset, svuint64_t, uint32_t, svint64_t, ++ svst1w_scatter_s64offset_u64 (p0, x0, z1, z0), ++ svst1w_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1w_scatter_ext_u64_s64offset: ++** st1w z0\.d, p0, \[x0, z1\.d, sxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1w_scatter_ext_u64_s64offset, svuint64_t, uint32_t, svint64_t, ++ svst1w_scatter_s64offset_u64 (p0, x0, svextw_s64_x (p0, z1), z0), ++ svst1w_scatter_offset (p0, x0, svextw_x (p0, z1), z0)) ++ ++/* ++** st1w_scatter_x0_u64_u64offset: ++** st1w z0\.d, p0, \[x0, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1w_scatter_x0_u64_u64offset, svuint64_t, uint32_t, svuint64_t, ++ svst1w_scatter_u64offset_u64 (p0, x0, z1, z0), ++ svst1w_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1w_scatter_u64_u64offset: ++** st1w z0\.d, p0, \[x0, z1\.d\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1w_scatter_u64_u64offset, svuint64_t, uint32_t, svuint64_t, ++ svst1w_scatter_u64offset_u64 (p0, x0, z1, z0), ++ svst1w_scatter_offset (p0, x0, z1, z0)) ++ ++/* ++** st1w_scatter_ext_u64_u64offset: ++** st1w z0\.d, p0, \[x0, z1\.d, uxtw\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1w_scatter_ext_u64_u64offset, svuint64_t, uint32_t, svuint64_t, ++ svst1w_scatter_u64offset_u64 (p0, x0, svextw_u64_x (p0, z1), z0), ++ svst1w_scatter_offset (p0, x0, svextw_x (p0, z1), z0)) ++ ++/* ++** st1w_scatter_x0_u64_s64index: ++** st1w z0\.d, p0, \[x0, z1\.d, lsl 2\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1w_scatter_x0_u64_s64index, svuint64_t, uint32_t, svint64_t, ++ svst1w_scatter_s64index_u64 (p0, x0, z1, z0), ++ svst1w_scatter_index (p0, x0, z1, z0)) ++ ++/* ++** st1w_scatter_u64_s64index: ++** st1w z0\.d, p0, \[x0, z1\.d, lsl 2\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1w_scatter_u64_s64index, svuint64_t, uint32_t, svint64_t, ++ svst1w_scatter_s64index_u64 (p0, x0, z1, z0), ++ svst1w_scatter_index (p0, x0, z1, z0)) ++ ++/* ++** st1w_scatter_ext_u64_s64index: ++** st1w z0\.d, p0, \[x0, z1\.d, sxtw 2\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1w_scatter_ext_u64_s64index, svuint64_t, uint32_t, svint64_t, ++ svst1w_scatter_s64index_u64 (p0, x0, svextw_s64_x (p0, z1), z0), ++ svst1w_scatter_index (p0, x0, svextw_x (p0, z1), z0)) ++ ++/* ++** st1w_scatter_x0_u64_u64index: ++** st1w z0\.d, p0, \[x0, z1\.d, lsl 2\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1w_scatter_x0_u64_u64index, svuint64_t, uint32_t, svuint64_t, ++ svst1w_scatter_u64index_u64 (p0, x0, z1, z0), ++ svst1w_scatter_index (p0, x0, z1, z0)) ++ ++/* ++** st1w_scatter_u64_u64index: ++** st1w z0\.d, p0, \[x0, z1\.d, lsl 2\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1w_scatter_u64_u64index, svuint64_t, uint32_t, svuint64_t, ++ svst1w_scatter_u64index_u64 (p0, x0, z1, z0), ++ svst1w_scatter_index (p0, x0, z1, z0)) ++ ++/* ++** st1w_scatter_ext_u64_u64index: ++** st1w z0\.d, p0, \[x0, z1\.d, uxtw 2\] ++** ret ++*/ ++TEST_STORE_SCATTER_SZ (st1w_scatter_ext_u64_u64index, svuint64_t, uint32_t, svuint64_t, ++ svst1w_scatter_u64index_u64 (p0, x0, svextw_u64_x (p0, z1), z0), ++ svst1w_scatter_index (p0, x0, svextw_x (p0, z1), z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1w_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1w_u64.c +new file mode 100644 +index 000000000..882abebbb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1w_u64.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st1w_u64_base: ++** st1w z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1w_u64_base, svuint64_t, uint32_t, ++ svst1w_u64 (p0, x0, z0), ++ svst1w (p0, x0, z0)) ++ ++/* ++** st1w_u64_index: ++** st1w z0\.d, p0, \[x0, x1, lsl 2\] ++** ret ++*/ ++TEST_STORE (st1w_u64_index, svuint64_t, uint32_t, ++ svst1w_u64 (p0, x0 + x1, z0), ++ svst1w (p0, x0 + x1, z0)) ++ ++/* ++** st1w_u64_1: ++** st1w z0\.d, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1w_u64_1, svuint64_t, uint32_t, ++ svst1w_u64 (p0, x0 + svcntd (), z0), ++ svst1w (p0, x0 + svcntd (), z0)) ++ ++/* ++** st1w_u64_7: ++** st1w z0\.d, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (st1w_u64_7, svuint64_t, uint32_t, ++ svst1w_u64 (p0, x0 + svcntd () * 7, z0), ++ svst1w (p0, x0 + svcntd () * 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1w_u64_8: ++** incb x0, all, mul #4 ++** st1w z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1w_u64_8, svuint64_t, uint32_t, ++ svst1w_u64 (p0, x0 + svcntd () * 8, z0), ++ svst1w (p0, x0 + svcntd () * 8, z0)) ++ ++/* ++** st1w_u64_m1: ++** st1w z0\.d, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1w_u64_m1, svuint64_t, uint32_t, ++ svst1w_u64 (p0, x0 - svcntd (), z0), ++ svst1w (p0, x0 - svcntd (), z0)) ++ ++/* ++** st1w_u64_m8: ++** st1w z0\.d, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (st1w_u64_m8, svuint64_t, uint32_t, ++ svst1w_u64 (p0, x0 - svcntd () * 8, z0), ++ svst1w (p0, x0 - svcntd () * 8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1w_u64_m9: ++** dech x0, all, mul #9 ++** st1w z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1w_u64_m9, svuint64_t, uint32_t, ++ svst1w_u64 (p0, x0 - svcntd () * 9, z0), ++ svst1w (p0, x0 - svcntd () * 9, z0)) ++ ++/* ++** st1w_vnum_u64_0: ++** st1w z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1w_vnum_u64_0, svuint64_t, uint32_t, ++ svst1w_vnum_u64 (p0, x0, 0, z0), ++ svst1w_vnum (p0, x0, 0, z0)) ++ ++/* ++** st1w_vnum_u64_1: ++** st1w z0\.d, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1w_vnum_u64_1, svuint64_t, uint32_t, ++ svst1w_vnum_u64 (p0, x0, 1, z0), ++ svst1w_vnum (p0, x0, 1, z0)) ++ ++/* ++** st1w_vnum_u64_7: ++** st1w z0\.d, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (st1w_vnum_u64_7, svuint64_t, uint32_t, ++ svst1w_vnum_u64 (p0, x0, 7, z0), ++ svst1w_vnum (p0, x0, 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1w_vnum_u64_8: ++** incb x0, all, mul #4 ++** st1w z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1w_vnum_u64_8, svuint64_t, uint32_t, ++ svst1w_vnum_u64 (p0, x0, 8, z0), ++ svst1w_vnum (p0, x0, 8, z0)) ++ ++/* ++** st1w_vnum_u64_m1: ++** st1w z0\.d, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (st1w_vnum_u64_m1, svuint64_t, uint32_t, ++ svst1w_vnum_u64 (p0, x0, -1, z0), ++ svst1w_vnum (p0, x0, -1, z0)) ++ ++/* ++** st1w_vnum_u64_m8: ++** st1w z0\.d, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (st1w_vnum_u64_m8, svuint64_t, uint32_t, ++ svst1w_vnum_u64 (p0, x0, -8, z0), ++ svst1w_vnum (p0, x0, -8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st1w_vnum_u64_m9: ++** dech x0, all, mul #9 ++** st1w z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st1w_vnum_u64_m9, svuint64_t, uint32_t, ++ svst1w_vnum_u64 (p0, x0, -9, z0), ++ svst1w_vnum (p0, x0, -9, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** st1w_vnum_u64_x1: ++** cnth (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** st1w z0\.d, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (st1w_vnum_u64_x1, svuint64_t, uint32_t, ++ svst1w_vnum_u64 (p0, x0, x1, z0), ++ svst1w_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_bf16.c +new file mode 100644 +index 000000000..a4a57af08 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_bf16.c +@@ -0,0 +1,200 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st2_bf16_base: ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_bf16_base, svbfloat16x2_t, bfloat16_t, ++ svst2_bf16 (p0, x0, z0), ++ svst2 (p0, x0, z0)) ++ ++/* ++** st2_bf16_index: ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_STORE (st2_bf16_index, svbfloat16x2_t, bfloat16_t, ++ svst2_bf16 (p0, x0 + x1, z0), ++ svst2 (p0, x0 + x1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_bf16_1: ++** incb x0 ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_bf16_1, svbfloat16x2_t, bfloat16_t, ++ svst2_bf16 (p0, x0 + svcnth (), z0), ++ svst2 (p0, x0 + svcnth (), z0)) ++ ++/* ++** st2_bf16_2: ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #2, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_bf16_2, svbfloat16x2_t, bfloat16_t, ++ svst2_bf16 (p0, x0 + svcnth () * 2, z0), ++ svst2 (p0, x0 + svcnth () * 2, z0)) ++ ++/* ++** st2_bf16_14: ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #14, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_bf16_14, svbfloat16x2_t, bfloat16_t, ++ svst2_bf16 (p0, x0 + svcnth () * 14, z0), ++ svst2 (p0, x0 + svcnth () * 14, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_bf16_16: ++** incb x0, all, mul #16 ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_bf16_16, svbfloat16x2_t, bfloat16_t, ++ svst2_bf16 (p0, x0 + svcnth () * 16, z0), ++ svst2 (p0, x0 + svcnth () * 16, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_bf16_m1: ++** decb x0 ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_bf16_m1, svbfloat16x2_t, bfloat16_t, ++ svst2_bf16 (p0, x0 - svcnth (), z0), ++ svst2 (p0, x0 - svcnth (), z0)) ++ ++/* ++** st2_bf16_m2: ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #-2, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_bf16_m2, svbfloat16x2_t, bfloat16_t, ++ svst2_bf16 (p0, x0 - svcnth () * 2, z0), ++ svst2 (p0, x0 - svcnth () * 2, z0)) ++ ++/* ++** st2_bf16_m16: ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #-16, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_bf16_m16, svbfloat16x2_t, bfloat16_t, ++ svst2_bf16 (p0, x0 - svcnth () * 16, z0), ++ svst2 (p0, x0 - svcnth () * 16, z0)) ++ ++/* ++** st2_bf16_m18: ++** addvl (x[0-9]+), x0, #-18 ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st2_bf16_m18, svbfloat16x2_t, bfloat16_t, ++ svst2_bf16 (p0, x0 - svcnth () * 18, z0), ++ svst2 (p0, x0 - svcnth () * 18, z0)) ++ ++/* ++** st2_vnum_bf16_0: ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_vnum_bf16_0, svbfloat16x2_t, bfloat16_t, ++ svst2_vnum_bf16 (p0, x0, 0, z0), ++ svst2_vnum (p0, x0, 0, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_vnum_bf16_1: ++** incb x0 ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_vnum_bf16_1, svbfloat16x2_t, bfloat16_t, ++ svst2_vnum_bf16 (p0, x0, 1, z0), ++ svst2_vnum (p0, x0, 1, z0)) ++ ++/* ++** st2_vnum_bf16_2: ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #2, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_vnum_bf16_2, svbfloat16x2_t, bfloat16_t, ++ svst2_vnum_bf16 (p0, x0, 2, z0), ++ svst2_vnum (p0, x0, 2, z0)) ++ ++/* ++** st2_vnum_bf16_14: ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #14, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_vnum_bf16_14, svbfloat16x2_t, bfloat16_t, ++ svst2_vnum_bf16 (p0, x0, 14, z0), ++ svst2_vnum (p0, x0, 14, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_vnum_bf16_16: ++** incb x0, all, mul #16 ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_vnum_bf16_16, svbfloat16x2_t, bfloat16_t, ++ svst2_vnum_bf16 (p0, x0, 16, z0), ++ svst2_vnum (p0, x0, 16, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_vnum_bf16_m1: ++** decb x0 ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_vnum_bf16_m1, svbfloat16x2_t, bfloat16_t, ++ svst2_vnum_bf16 (p0, x0, -1, z0), ++ svst2_vnum (p0, x0, -1, z0)) ++ ++/* ++** st2_vnum_bf16_m2: ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #-2, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_vnum_bf16_m2, svbfloat16x2_t, bfloat16_t, ++ svst2_vnum_bf16 (p0, x0, -2, z0), ++ svst2_vnum (p0, x0, -2, z0)) ++ ++/* ++** st2_vnum_bf16_m16: ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #-16, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_vnum_bf16_m16, svbfloat16x2_t, bfloat16_t, ++ svst2_vnum_bf16 (p0, x0, -16, z0), ++ svst2_vnum (p0, x0, -16, z0)) ++ ++/* ++** st2_vnum_bf16_m18: ++** addvl (x[0-9]+), x0, #-18 ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st2_vnum_bf16_m18, svbfloat16x2_t, bfloat16_t, ++ svst2_vnum_bf16 (p0, x0, -18, z0), ++ svst2_vnum (p0, x0, -18, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** st2_vnum_bf16_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (st2_vnum_bf16_x1, svbfloat16x2_t, bfloat16_t, ++ svst2_vnum_bf16 (p0, x0, x1, z0), ++ svst2_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_f16.c +new file mode 100644 +index 000000000..014203be6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_f16.c +@@ -0,0 +1,200 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st2_f16_base: ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_f16_base, svfloat16x2_t, float16_t, ++ svst2_f16 (p0, x0, z0), ++ svst2 (p0, x0, z0)) ++ ++/* ++** st2_f16_index: ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_STORE (st2_f16_index, svfloat16x2_t, float16_t, ++ svst2_f16 (p0, x0 + x1, z0), ++ svst2 (p0, x0 + x1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_f16_1: ++** incb x0 ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_f16_1, svfloat16x2_t, float16_t, ++ svst2_f16 (p0, x0 + svcnth (), z0), ++ svst2 (p0, x0 + svcnth (), z0)) ++ ++/* ++** st2_f16_2: ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #2, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_f16_2, svfloat16x2_t, float16_t, ++ svst2_f16 (p0, x0 + svcnth () * 2, z0), ++ svst2 (p0, x0 + svcnth () * 2, z0)) ++ ++/* ++** st2_f16_14: ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #14, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_f16_14, svfloat16x2_t, float16_t, ++ svst2_f16 (p0, x0 + svcnth () * 14, z0), ++ svst2 (p0, x0 + svcnth () * 14, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_f16_16: ++** incb x0, all, mul #16 ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_f16_16, svfloat16x2_t, float16_t, ++ svst2_f16 (p0, x0 + svcnth () * 16, z0), ++ svst2 (p0, x0 + svcnth () * 16, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_f16_m1: ++** decb x0 ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_f16_m1, svfloat16x2_t, float16_t, ++ svst2_f16 (p0, x0 - svcnth (), z0), ++ svst2 (p0, x0 - svcnth (), z0)) ++ ++/* ++** st2_f16_m2: ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #-2, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_f16_m2, svfloat16x2_t, float16_t, ++ svst2_f16 (p0, x0 - svcnth () * 2, z0), ++ svst2 (p0, x0 - svcnth () * 2, z0)) ++ ++/* ++** st2_f16_m16: ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #-16, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_f16_m16, svfloat16x2_t, float16_t, ++ svst2_f16 (p0, x0 - svcnth () * 16, z0), ++ svst2 (p0, x0 - svcnth () * 16, z0)) ++ ++/* ++** st2_f16_m18: ++** addvl (x[0-9]+), x0, #-18 ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st2_f16_m18, svfloat16x2_t, float16_t, ++ svst2_f16 (p0, x0 - svcnth () * 18, z0), ++ svst2 (p0, x0 - svcnth () * 18, z0)) ++ ++/* ++** st2_vnum_f16_0: ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_vnum_f16_0, svfloat16x2_t, float16_t, ++ svst2_vnum_f16 (p0, x0, 0, z0), ++ svst2_vnum (p0, x0, 0, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_vnum_f16_1: ++** incb x0 ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_vnum_f16_1, svfloat16x2_t, float16_t, ++ svst2_vnum_f16 (p0, x0, 1, z0), ++ svst2_vnum (p0, x0, 1, z0)) ++ ++/* ++** st2_vnum_f16_2: ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #2, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_vnum_f16_2, svfloat16x2_t, float16_t, ++ svst2_vnum_f16 (p0, x0, 2, z0), ++ svst2_vnum (p0, x0, 2, z0)) ++ ++/* ++** st2_vnum_f16_14: ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #14, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_vnum_f16_14, svfloat16x2_t, float16_t, ++ svst2_vnum_f16 (p0, x0, 14, z0), ++ svst2_vnum (p0, x0, 14, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_vnum_f16_16: ++** incb x0, all, mul #16 ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_vnum_f16_16, svfloat16x2_t, float16_t, ++ svst2_vnum_f16 (p0, x0, 16, z0), ++ svst2_vnum (p0, x0, 16, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_vnum_f16_m1: ++** decb x0 ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_vnum_f16_m1, svfloat16x2_t, float16_t, ++ svst2_vnum_f16 (p0, x0, -1, z0), ++ svst2_vnum (p0, x0, -1, z0)) ++ ++/* ++** st2_vnum_f16_m2: ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #-2, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_vnum_f16_m2, svfloat16x2_t, float16_t, ++ svst2_vnum_f16 (p0, x0, -2, z0), ++ svst2_vnum (p0, x0, -2, z0)) ++ ++/* ++** st2_vnum_f16_m16: ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #-16, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_vnum_f16_m16, svfloat16x2_t, float16_t, ++ svst2_vnum_f16 (p0, x0, -16, z0), ++ svst2_vnum (p0, x0, -16, z0)) ++ ++/* ++** st2_vnum_f16_m18: ++** addvl (x[0-9]+), x0, #-18 ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st2_vnum_f16_m18, svfloat16x2_t, float16_t, ++ svst2_vnum_f16 (p0, x0, -18, z0), ++ svst2_vnum (p0, x0, -18, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** st2_vnum_f16_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (st2_vnum_f16_x1, svfloat16x2_t, float16_t, ++ svst2_vnum_f16 (p0, x0, x1, z0), ++ svst2_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_f32.c +new file mode 100644 +index 000000000..ba271882e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_f32.c +@@ -0,0 +1,200 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st2_f32_base: ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_f32_base, svfloat32x2_t, float32_t, ++ svst2_f32 (p0, x0, z0), ++ svst2 (p0, x0, z0)) ++ ++/* ++** st2_f32_index: ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, x1, lsl 2\] ++** ret ++*/ ++TEST_STORE (st2_f32_index, svfloat32x2_t, float32_t, ++ svst2_f32 (p0, x0 + x1, z0), ++ svst2 (p0, x0 + x1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_f32_1: ++** incb x0 ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_f32_1, svfloat32x2_t, float32_t, ++ svst2_f32 (p0, x0 + svcntw (), z0), ++ svst2 (p0, x0 + svcntw (), z0)) ++ ++/* ++** st2_f32_2: ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, #2, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_f32_2, svfloat32x2_t, float32_t, ++ svst2_f32 (p0, x0 + svcntw () * 2, z0), ++ svst2 (p0, x0 + svcntw () * 2, z0)) ++ ++/* ++** st2_f32_14: ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, #14, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_f32_14, svfloat32x2_t, float32_t, ++ svst2_f32 (p0, x0 + svcntw () * 14, z0), ++ svst2 (p0, x0 + svcntw () * 14, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_f32_16: ++** incb x0, all, mul #16 ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_f32_16, svfloat32x2_t, float32_t, ++ svst2_f32 (p0, x0 + svcntw () * 16, z0), ++ svst2 (p0, x0 + svcntw () * 16, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_f32_m1: ++** decb x0 ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_f32_m1, svfloat32x2_t, float32_t, ++ svst2_f32 (p0, x0 - svcntw (), z0), ++ svst2 (p0, x0 - svcntw (), z0)) ++ ++/* ++** st2_f32_m2: ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, #-2, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_f32_m2, svfloat32x2_t, float32_t, ++ svst2_f32 (p0, x0 - svcntw () * 2, z0), ++ svst2 (p0, x0 - svcntw () * 2, z0)) ++ ++/* ++** st2_f32_m16: ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, #-16, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_f32_m16, svfloat32x2_t, float32_t, ++ svst2_f32 (p0, x0 - svcntw () * 16, z0), ++ svst2 (p0, x0 - svcntw () * 16, z0)) ++ ++/* ++** st2_f32_m18: ++** addvl (x[0-9]+), x0, #-18 ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st2_f32_m18, svfloat32x2_t, float32_t, ++ svst2_f32 (p0, x0 - svcntw () * 18, z0), ++ svst2 (p0, x0 - svcntw () * 18, z0)) ++ ++/* ++** st2_vnum_f32_0: ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_vnum_f32_0, svfloat32x2_t, float32_t, ++ svst2_vnum_f32 (p0, x0, 0, z0), ++ svst2_vnum (p0, x0, 0, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_vnum_f32_1: ++** incb x0 ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_vnum_f32_1, svfloat32x2_t, float32_t, ++ svst2_vnum_f32 (p0, x0, 1, z0), ++ svst2_vnum (p0, x0, 1, z0)) ++ ++/* ++** st2_vnum_f32_2: ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, #2, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_vnum_f32_2, svfloat32x2_t, float32_t, ++ svst2_vnum_f32 (p0, x0, 2, z0), ++ svst2_vnum (p0, x0, 2, z0)) ++ ++/* ++** st2_vnum_f32_14: ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, #14, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_vnum_f32_14, svfloat32x2_t, float32_t, ++ svst2_vnum_f32 (p0, x0, 14, z0), ++ svst2_vnum (p0, x0, 14, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_vnum_f32_16: ++** incb x0, all, mul #16 ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_vnum_f32_16, svfloat32x2_t, float32_t, ++ svst2_vnum_f32 (p0, x0, 16, z0), ++ svst2_vnum (p0, x0, 16, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_vnum_f32_m1: ++** decb x0 ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_vnum_f32_m1, svfloat32x2_t, float32_t, ++ svst2_vnum_f32 (p0, x0, -1, z0), ++ svst2_vnum (p0, x0, -1, z0)) ++ ++/* ++** st2_vnum_f32_m2: ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, #-2, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_vnum_f32_m2, svfloat32x2_t, float32_t, ++ svst2_vnum_f32 (p0, x0, -2, z0), ++ svst2_vnum (p0, x0, -2, z0)) ++ ++/* ++** st2_vnum_f32_m16: ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, #-16, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_vnum_f32_m16, svfloat32x2_t, float32_t, ++ svst2_vnum_f32 (p0, x0, -16, z0), ++ svst2_vnum (p0, x0, -16, z0)) ++ ++/* ++** st2_vnum_f32_m18: ++** addvl (x[0-9]+), x0, #-18 ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st2_vnum_f32_m18, svfloat32x2_t, float32_t, ++ svst2_vnum_f32 (p0, x0, -18, z0), ++ svst2_vnum (p0, x0, -18, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** st2_vnum_f32_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (st2_vnum_f32_x1, svfloat32x2_t, float32_t, ++ svst2_vnum_f32 (p0, x0, x1, z0), ++ svst2_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_f64.c +new file mode 100644 +index 000000000..c499ba0fe +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_f64.c +@@ -0,0 +1,200 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st2_f64_base: ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_f64_base, svfloat64x2_t, float64_t, ++ svst2_f64 (p0, x0, z0), ++ svst2 (p0, x0, z0)) ++ ++/* ++** st2_f64_index: ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, x1, lsl 3\] ++** ret ++*/ ++TEST_STORE (st2_f64_index, svfloat64x2_t, float64_t, ++ svst2_f64 (p0, x0 + x1, z0), ++ svst2 (p0, x0 + x1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_f64_1: ++** incb x0 ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_f64_1, svfloat64x2_t, float64_t, ++ svst2_f64 (p0, x0 + svcntd (), z0), ++ svst2 (p0, x0 + svcntd (), z0)) ++ ++/* ++** st2_f64_2: ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, #2, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_f64_2, svfloat64x2_t, float64_t, ++ svst2_f64 (p0, x0 + svcntd () * 2, z0), ++ svst2 (p0, x0 + svcntd () * 2, z0)) ++ ++/* ++** st2_f64_14: ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, #14, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_f64_14, svfloat64x2_t, float64_t, ++ svst2_f64 (p0, x0 + svcntd () * 14, z0), ++ svst2 (p0, x0 + svcntd () * 14, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_f64_16: ++** incb x0, all, mul #16 ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_f64_16, svfloat64x2_t, float64_t, ++ svst2_f64 (p0, x0 + svcntd () * 16, z0), ++ svst2 (p0, x0 + svcntd () * 16, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_f64_m1: ++** decb x0 ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_f64_m1, svfloat64x2_t, float64_t, ++ svst2_f64 (p0, x0 - svcntd (), z0), ++ svst2 (p0, x0 - svcntd (), z0)) ++ ++/* ++** st2_f64_m2: ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, #-2, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_f64_m2, svfloat64x2_t, float64_t, ++ svst2_f64 (p0, x0 - svcntd () * 2, z0), ++ svst2 (p0, x0 - svcntd () * 2, z0)) ++ ++/* ++** st2_f64_m16: ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, #-16, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_f64_m16, svfloat64x2_t, float64_t, ++ svst2_f64 (p0, x0 - svcntd () * 16, z0), ++ svst2 (p0, x0 - svcntd () * 16, z0)) ++ ++/* ++** st2_f64_m18: ++** addvl (x[0-9]+), x0, #-18 ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st2_f64_m18, svfloat64x2_t, float64_t, ++ svst2_f64 (p0, x0 - svcntd () * 18, z0), ++ svst2 (p0, x0 - svcntd () * 18, z0)) ++ ++/* ++** st2_vnum_f64_0: ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_vnum_f64_0, svfloat64x2_t, float64_t, ++ svst2_vnum_f64 (p0, x0, 0, z0), ++ svst2_vnum (p0, x0, 0, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_vnum_f64_1: ++** incb x0 ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_vnum_f64_1, svfloat64x2_t, float64_t, ++ svst2_vnum_f64 (p0, x0, 1, z0), ++ svst2_vnum (p0, x0, 1, z0)) ++ ++/* ++** st2_vnum_f64_2: ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, #2, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_vnum_f64_2, svfloat64x2_t, float64_t, ++ svst2_vnum_f64 (p0, x0, 2, z0), ++ svst2_vnum (p0, x0, 2, z0)) ++ ++/* ++** st2_vnum_f64_14: ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, #14, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_vnum_f64_14, svfloat64x2_t, float64_t, ++ svst2_vnum_f64 (p0, x0, 14, z0), ++ svst2_vnum (p0, x0, 14, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_vnum_f64_16: ++** incb x0, all, mul #16 ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_vnum_f64_16, svfloat64x2_t, float64_t, ++ svst2_vnum_f64 (p0, x0, 16, z0), ++ svst2_vnum (p0, x0, 16, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_vnum_f64_m1: ++** decb x0 ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_vnum_f64_m1, svfloat64x2_t, float64_t, ++ svst2_vnum_f64 (p0, x0, -1, z0), ++ svst2_vnum (p0, x0, -1, z0)) ++ ++/* ++** st2_vnum_f64_m2: ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, #-2, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_vnum_f64_m2, svfloat64x2_t, float64_t, ++ svst2_vnum_f64 (p0, x0, -2, z0), ++ svst2_vnum (p0, x0, -2, z0)) ++ ++/* ++** st2_vnum_f64_m16: ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, #-16, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_vnum_f64_m16, svfloat64x2_t, float64_t, ++ svst2_vnum_f64 (p0, x0, -16, z0), ++ svst2_vnum (p0, x0, -16, z0)) ++ ++/* ++** st2_vnum_f64_m18: ++** addvl (x[0-9]+), x0, #-18 ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st2_vnum_f64_m18, svfloat64x2_t, float64_t, ++ svst2_vnum_f64 (p0, x0, -18, z0), ++ svst2_vnum (p0, x0, -18, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** st2_vnum_f64_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (st2_vnum_f64_x1, svfloat64x2_t, float64_t, ++ svst2_vnum_f64 (p0, x0, x1, z0), ++ svst2_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_s16.c +new file mode 100644 +index 000000000..860b45eac +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_s16.c +@@ -0,0 +1,200 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st2_s16_base: ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_s16_base, svint16x2_t, int16_t, ++ svst2_s16 (p0, x0, z0), ++ svst2 (p0, x0, z0)) ++ ++/* ++** st2_s16_index: ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_STORE (st2_s16_index, svint16x2_t, int16_t, ++ svst2_s16 (p0, x0 + x1, z0), ++ svst2 (p0, x0 + x1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_s16_1: ++** incb x0 ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_s16_1, svint16x2_t, int16_t, ++ svst2_s16 (p0, x0 + svcnth (), z0), ++ svst2 (p0, x0 + svcnth (), z0)) ++ ++/* ++** st2_s16_2: ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #2, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_s16_2, svint16x2_t, int16_t, ++ svst2_s16 (p0, x0 + svcnth () * 2, z0), ++ svst2 (p0, x0 + svcnth () * 2, z0)) ++ ++/* ++** st2_s16_14: ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #14, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_s16_14, svint16x2_t, int16_t, ++ svst2_s16 (p0, x0 + svcnth () * 14, z0), ++ svst2 (p0, x0 + svcnth () * 14, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_s16_16: ++** incb x0, all, mul #16 ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_s16_16, svint16x2_t, int16_t, ++ svst2_s16 (p0, x0 + svcnth () * 16, z0), ++ svst2 (p0, x0 + svcnth () * 16, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_s16_m1: ++** decb x0 ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_s16_m1, svint16x2_t, int16_t, ++ svst2_s16 (p0, x0 - svcnth (), z0), ++ svst2 (p0, x0 - svcnth (), z0)) ++ ++/* ++** st2_s16_m2: ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #-2, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_s16_m2, svint16x2_t, int16_t, ++ svst2_s16 (p0, x0 - svcnth () * 2, z0), ++ svst2 (p0, x0 - svcnth () * 2, z0)) ++ ++/* ++** st2_s16_m16: ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #-16, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_s16_m16, svint16x2_t, int16_t, ++ svst2_s16 (p0, x0 - svcnth () * 16, z0), ++ svst2 (p0, x0 - svcnth () * 16, z0)) ++ ++/* ++** st2_s16_m18: ++** addvl (x[0-9]+), x0, #-18 ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st2_s16_m18, svint16x2_t, int16_t, ++ svst2_s16 (p0, x0 - svcnth () * 18, z0), ++ svst2 (p0, x0 - svcnth () * 18, z0)) ++ ++/* ++** st2_vnum_s16_0: ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_vnum_s16_0, svint16x2_t, int16_t, ++ svst2_vnum_s16 (p0, x0, 0, z0), ++ svst2_vnum (p0, x0, 0, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_vnum_s16_1: ++** incb x0 ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_vnum_s16_1, svint16x2_t, int16_t, ++ svst2_vnum_s16 (p0, x0, 1, z0), ++ svst2_vnum (p0, x0, 1, z0)) ++ ++/* ++** st2_vnum_s16_2: ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #2, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_vnum_s16_2, svint16x2_t, int16_t, ++ svst2_vnum_s16 (p0, x0, 2, z0), ++ svst2_vnum (p0, x0, 2, z0)) ++ ++/* ++** st2_vnum_s16_14: ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #14, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_vnum_s16_14, svint16x2_t, int16_t, ++ svst2_vnum_s16 (p0, x0, 14, z0), ++ svst2_vnum (p0, x0, 14, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_vnum_s16_16: ++** incb x0, all, mul #16 ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_vnum_s16_16, svint16x2_t, int16_t, ++ svst2_vnum_s16 (p0, x0, 16, z0), ++ svst2_vnum (p0, x0, 16, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_vnum_s16_m1: ++** decb x0 ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_vnum_s16_m1, svint16x2_t, int16_t, ++ svst2_vnum_s16 (p0, x0, -1, z0), ++ svst2_vnum (p0, x0, -1, z0)) ++ ++/* ++** st2_vnum_s16_m2: ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #-2, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_vnum_s16_m2, svint16x2_t, int16_t, ++ svst2_vnum_s16 (p0, x0, -2, z0), ++ svst2_vnum (p0, x0, -2, z0)) ++ ++/* ++** st2_vnum_s16_m16: ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #-16, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_vnum_s16_m16, svint16x2_t, int16_t, ++ svst2_vnum_s16 (p0, x0, -16, z0), ++ svst2_vnum (p0, x0, -16, z0)) ++ ++/* ++** st2_vnum_s16_m18: ++** addvl (x[0-9]+), x0, #-18 ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st2_vnum_s16_m18, svint16x2_t, int16_t, ++ svst2_vnum_s16 (p0, x0, -18, z0), ++ svst2_vnum (p0, x0, -18, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** st2_vnum_s16_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (st2_vnum_s16_x1, svint16x2_t, int16_t, ++ svst2_vnum_s16 (p0, x0, x1, z0), ++ svst2_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_s32.c +new file mode 100644 +index 000000000..16b674992 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_s32.c +@@ -0,0 +1,200 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st2_s32_base: ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_s32_base, svint32x2_t, int32_t, ++ svst2_s32 (p0, x0, z0), ++ svst2 (p0, x0, z0)) ++ ++/* ++** st2_s32_index: ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, x1, lsl 2\] ++** ret ++*/ ++TEST_STORE (st2_s32_index, svint32x2_t, int32_t, ++ svst2_s32 (p0, x0 + x1, z0), ++ svst2 (p0, x0 + x1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_s32_1: ++** incb x0 ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_s32_1, svint32x2_t, int32_t, ++ svst2_s32 (p0, x0 + svcntw (), z0), ++ svst2 (p0, x0 + svcntw (), z0)) ++ ++/* ++** st2_s32_2: ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, #2, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_s32_2, svint32x2_t, int32_t, ++ svst2_s32 (p0, x0 + svcntw () * 2, z0), ++ svst2 (p0, x0 + svcntw () * 2, z0)) ++ ++/* ++** st2_s32_14: ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, #14, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_s32_14, svint32x2_t, int32_t, ++ svst2_s32 (p0, x0 + svcntw () * 14, z0), ++ svst2 (p0, x0 + svcntw () * 14, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_s32_16: ++** incb x0, all, mul #16 ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_s32_16, svint32x2_t, int32_t, ++ svst2_s32 (p0, x0 + svcntw () * 16, z0), ++ svst2 (p0, x0 + svcntw () * 16, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_s32_m1: ++** decb x0 ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_s32_m1, svint32x2_t, int32_t, ++ svst2_s32 (p0, x0 - svcntw (), z0), ++ svst2 (p0, x0 - svcntw (), z0)) ++ ++/* ++** st2_s32_m2: ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, #-2, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_s32_m2, svint32x2_t, int32_t, ++ svst2_s32 (p0, x0 - svcntw () * 2, z0), ++ svst2 (p0, x0 - svcntw () * 2, z0)) ++ ++/* ++** st2_s32_m16: ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, #-16, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_s32_m16, svint32x2_t, int32_t, ++ svst2_s32 (p0, x0 - svcntw () * 16, z0), ++ svst2 (p0, x0 - svcntw () * 16, z0)) ++ ++/* ++** st2_s32_m18: ++** addvl (x[0-9]+), x0, #-18 ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st2_s32_m18, svint32x2_t, int32_t, ++ svst2_s32 (p0, x0 - svcntw () * 18, z0), ++ svst2 (p0, x0 - svcntw () * 18, z0)) ++ ++/* ++** st2_vnum_s32_0: ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_vnum_s32_0, svint32x2_t, int32_t, ++ svst2_vnum_s32 (p0, x0, 0, z0), ++ svst2_vnum (p0, x0, 0, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_vnum_s32_1: ++** incb x0 ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_vnum_s32_1, svint32x2_t, int32_t, ++ svst2_vnum_s32 (p0, x0, 1, z0), ++ svst2_vnum (p0, x0, 1, z0)) ++ ++/* ++** st2_vnum_s32_2: ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, #2, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_vnum_s32_2, svint32x2_t, int32_t, ++ svst2_vnum_s32 (p0, x0, 2, z0), ++ svst2_vnum (p0, x0, 2, z0)) ++ ++/* ++** st2_vnum_s32_14: ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, #14, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_vnum_s32_14, svint32x2_t, int32_t, ++ svst2_vnum_s32 (p0, x0, 14, z0), ++ svst2_vnum (p0, x0, 14, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_vnum_s32_16: ++** incb x0, all, mul #16 ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_vnum_s32_16, svint32x2_t, int32_t, ++ svst2_vnum_s32 (p0, x0, 16, z0), ++ svst2_vnum (p0, x0, 16, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_vnum_s32_m1: ++** decb x0 ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_vnum_s32_m1, svint32x2_t, int32_t, ++ svst2_vnum_s32 (p0, x0, -1, z0), ++ svst2_vnum (p0, x0, -1, z0)) ++ ++/* ++** st2_vnum_s32_m2: ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, #-2, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_vnum_s32_m2, svint32x2_t, int32_t, ++ svst2_vnum_s32 (p0, x0, -2, z0), ++ svst2_vnum (p0, x0, -2, z0)) ++ ++/* ++** st2_vnum_s32_m16: ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, #-16, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_vnum_s32_m16, svint32x2_t, int32_t, ++ svst2_vnum_s32 (p0, x0, -16, z0), ++ svst2_vnum (p0, x0, -16, z0)) ++ ++/* ++** st2_vnum_s32_m18: ++** addvl (x[0-9]+), x0, #-18 ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st2_vnum_s32_m18, svint32x2_t, int32_t, ++ svst2_vnum_s32 (p0, x0, -18, z0), ++ svst2_vnum (p0, x0, -18, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** st2_vnum_s32_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (st2_vnum_s32_x1, svint32x2_t, int32_t, ++ svst2_vnum_s32 (p0, x0, x1, z0), ++ svst2_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_s64.c +new file mode 100644 +index 000000000..1421333cb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_s64.c +@@ -0,0 +1,200 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st2_s64_base: ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_s64_base, svint64x2_t, int64_t, ++ svst2_s64 (p0, x0, z0), ++ svst2 (p0, x0, z0)) ++ ++/* ++** st2_s64_index: ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, x1, lsl 3\] ++** ret ++*/ ++TEST_STORE (st2_s64_index, svint64x2_t, int64_t, ++ svst2_s64 (p0, x0 + x1, z0), ++ svst2 (p0, x0 + x1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_s64_1: ++** incb x0 ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_s64_1, svint64x2_t, int64_t, ++ svst2_s64 (p0, x0 + svcntd (), z0), ++ svst2 (p0, x0 + svcntd (), z0)) ++ ++/* ++** st2_s64_2: ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, #2, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_s64_2, svint64x2_t, int64_t, ++ svst2_s64 (p0, x0 + svcntd () * 2, z0), ++ svst2 (p0, x0 + svcntd () * 2, z0)) ++ ++/* ++** st2_s64_14: ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, #14, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_s64_14, svint64x2_t, int64_t, ++ svst2_s64 (p0, x0 + svcntd () * 14, z0), ++ svst2 (p0, x0 + svcntd () * 14, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_s64_16: ++** incb x0, all, mul #16 ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_s64_16, svint64x2_t, int64_t, ++ svst2_s64 (p0, x0 + svcntd () * 16, z0), ++ svst2 (p0, x0 + svcntd () * 16, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_s64_m1: ++** decb x0 ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_s64_m1, svint64x2_t, int64_t, ++ svst2_s64 (p0, x0 - svcntd (), z0), ++ svst2 (p0, x0 - svcntd (), z0)) ++ ++/* ++** st2_s64_m2: ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, #-2, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_s64_m2, svint64x2_t, int64_t, ++ svst2_s64 (p0, x0 - svcntd () * 2, z0), ++ svst2 (p0, x0 - svcntd () * 2, z0)) ++ ++/* ++** st2_s64_m16: ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, #-16, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_s64_m16, svint64x2_t, int64_t, ++ svst2_s64 (p0, x0 - svcntd () * 16, z0), ++ svst2 (p0, x0 - svcntd () * 16, z0)) ++ ++/* ++** st2_s64_m18: ++** addvl (x[0-9]+), x0, #-18 ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st2_s64_m18, svint64x2_t, int64_t, ++ svst2_s64 (p0, x0 - svcntd () * 18, z0), ++ svst2 (p0, x0 - svcntd () * 18, z0)) ++ ++/* ++** st2_vnum_s64_0: ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_vnum_s64_0, svint64x2_t, int64_t, ++ svst2_vnum_s64 (p0, x0, 0, z0), ++ svst2_vnum (p0, x0, 0, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_vnum_s64_1: ++** incb x0 ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_vnum_s64_1, svint64x2_t, int64_t, ++ svst2_vnum_s64 (p0, x0, 1, z0), ++ svst2_vnum (p0, x0, 1, z0)) ++ ++/* ++** st2_vnum_s64_2: ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, #2, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_vnum_s64_2, svint64x2_t, int64_t, ++ svst2_vnum_s64 (p0, x0, 2, z0), ++ svst2_vnum (p0, x0, 2, z0)) ++ ++/* ++** st2_vnum_s64_14: ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, #14, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_vnum_s64_14, svint64x2_t, int64_t, ++ svst2_vnum_s64 (p0, x0, 14, z0), ++ svst2_vnum (p0, x0, 14, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_vnum_s64_16: ++** incb x0, all, mul #16 ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_vnum_s64_16, svint64x2_t, int64_t, ++ svst2_vnum_s64 (p0, x0, 16, z0), ++ svst2_vnum (p0, x0, 16, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_vnum_s64_m1: ++** decb x0 ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_vnum_s64_m1, svint64x2_t, int64_t, ++ svst2_vnum_s64 (p0, x0, -1, z0), ++ svst2_vnum (p0, x0, -1, z0)) ++ ++/* ++** st2_vnum_s64_m2: ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, #-2, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_vnum_s64_m2, svint64x2_t, int64_t, ++ svst2_vnum_s64 (p0, x0, -2, z0), ++ svst2_vnum (p0, x0, -2, z0)) ++ ++/* ++** st2_vnum_s64_m16: ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, #-16, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_vnum_s64_m16, svint64x2_t, int64_t, ++ svst2_vnum_s64 (p0, x0, -16, z0), ++ svst2_vnum (p0, x0, -16, z0)) ++ ++/* ++** st2_vnum_s64_m18: ++** addvl (x[0-9]+), x0, #-18 ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st2_vnum_s64_m18, svint64x2_t, int64_t, ++ svst2_vnum_s64 (p0, x0, -18, z0), ++ svst2_vnum (p0, x0, -18, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** st2_vnum_s64_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (st2_vnum_s64_x1, svint64x2_t, int64_t, ++ svst2_vnum_s64 (p0, x0, x1, z0), ++ svst2_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_s8.c +new file mode 100644 +index 000000000..f0b7df3c5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_s8.c +@@ -0,0 +1,204 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st2_s8_base: ++** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_s8_base, svint8x2_t, int8_t, ++ svst2_s8 (p0, x0, z0), ++ svst2 (p0, x0, z0)) ++ ++/* ++** st2_s8_index: ++** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0, x1\] ++** ret ++*/ ++TEST_STORE (st2_s8_index, svint8x2_t, int8_t, ++ svst2_s8 (p0, x0 + x1, z0), ++ svst2 (p0, x0 + x1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_s8_1: ++** incb x0 ++** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_s8_1, svint8x2_t, int8_t, ++ svst2_s8 (p0, x0 + svcntb (), z0), ++ svst2 (p0, x0 + svcntb (), z0)) ++ ++/* ++** st2_s8_2: ++** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0, #2, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_s8_2, svint8x2_t, int8_t, ++ svst2_s8 (p0, x0 + svcntb () * 2, z0), ++ svst2 (p0, x0 + svcntb () * 2, z0)) ++ ++/* ++** st2_s8_14: ++** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0, #14, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_s8_14, svint8x2_t, int8_t, ++ svst2_s8 (p0, x0 + svcntb () * 14, z0), ++ svst2 (p0, x0 + svcntb () * 14, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_s8_16: ++** incb x0, all, mul #16 ++** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_s8_16, svint8x2_t, int8_t, ++ svst2_s8 (p0, x0 + svcntb () * 16, z0), ++ svst2 (p0, x0 + svcntb () * 16, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_s8_m1: ++** decb x0 ++** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_s8_m1, svint8x2_t, int8_t, ++ svst2_s8 (p0, x0 - svcntb (), z0), ++ svst2 (p0, x0 - svcntb (), z0)) ++ ++/* ++** st2_s8_m2: ++** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0, #-2, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_s8_m2, svint8x2_t, int8_t, ++ svst2_s8 (p0, x0 - svcntb () * 2, z0), ++ svst2 (p0, x0 - svcntb () * 2, z0)) ++ ++/* ++** st2_s8_m16: ++** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0, #-16, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_s8_m16, svint8x2_t, int8_t, ++ svst2_s8 (p0, x0 - svcntb () * 16, z0), ++ svst2 (p0, x0 - svcntb () * 16, z0)) ++ ++/* ++** st2_s8_m18: ++** addvl (x[0-9]+), x0, #-18 ++** st2b {z0\.b(?: - |, )z1\.b}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st2_s8_m18, svint8x2_t, int8_t, ++ svst2_s8 (p0, x0 - svcntb () * 18, z0), ++ svst2 (p0, x0 - svcntb () * 18, z0)) ++ ++/* ++** st2_vnum_s8_0: ++** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_vnum_s8_0, svint8x2_t, int8_t, ++ svst2_vnum_s8 (p0, x0, 0, z0), ++ svst2_vnum (p0, x0, 0, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_vnum_s8_1: ++** incb x0 ++** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_vnum_s8_1, svint8x2_t, int8_t, ++ svst2_vnum_s8 (p0, x0, 1, z0), ++ svst2_vnum (p0, x0, 1, z0)) ++ ++/* ++** st2_vnum_s8_2: ++** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0, #2, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_vnum_s8_2, svint8x2_t, int8_t, ++ svst2_vnum_s8 (p0, x0, 2, z0), ++ svst2_vnum (p0, x0, 2, z0)) ++ ++/* ++** st2_vnum_s8_14: ++** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0, #14, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_vnum_s8_14, svint8x2_t, int8_t, ++ svst2_vnum_s8 (p0, x0, 14, z0), ++ svst2_vnum (p0, x0, 14, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_vnum_s8_16: ++** incb x0, all, mul #16 ++** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_vnum_s8_16, svint8x2_t, int8_t, ++ svst2_vnum_s8 (p0, x0, 16, z0), ++ svst2_vnum (p0, x0, 16, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_vnum_s8_m1: ++** decb x0 ++** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_vnum_s8_m1, svint8x2_t, int8_t, ++ svst2_vnum_s8 (p0, x0, -1, z0), ++ svst2_vnum (p0, x0, -1, z0)) ++ ++/* ++** st2_vnum_s8_m2: ++** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0, #-2, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_vnum_s8_m2, svint8x2_t, int8_t, ++ svst2_vnum_s8 (p0, x0, -2, z0), ++ svst2_vnum (p0, x0, -2, z0)) ++ ++/* ++** st2_vnum_s8_m16: ++** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0, #-16, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_vnum_s8_m16, svint8x2_t, int8_t, ++ svst2_vnum_s8 (p0, x0, -16, z0), ++ svst2_vnum (p0, x0, -16, z0)) ++ ++/* ++** st2_vnum_s8_m18: ++** addvl (x[0-9]+), x0, #-18 ++** st2b {z0\.b(?: - |, )z1\.b}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st2_vnum_s8_m18, svint8x2_t, int8_t, ++ svst2_vnum_s8 (p0, x0, -18, z0), ++ svst2_vnum (p0, x0, -18, z0)) ++ ++/* ++** st2_vnum_s8_x1: ++** cntb (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** st2b {z0\.b(?: - |, )z1\.b}, p0, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_STORE (st2_vnum_s8_x1, svint8x2_t, int8_t, ++ svst2_vnum_s8 (p0, x0, x1, z0), ++ svst2_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_u16.c +new file mode 100644 +index 000000000..edd32d81e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_u16.c +@@ -0,0 +1,200 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st2_u16_base: ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_u16_base, svuint16x2_t, uint16_t, ++ svst2_u16 (p0, x0, z0), ++ svst2 (p0, x0, z0)) ++ ++/* ++** st2_u16_index: ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_STORE (st2_u16_index, svuint16x2_t, uint16_t, ++ svst2_u16 (p0, x0 + x1, z0), ++ svst2 (p0, x0 + x1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_u16_1: ++** incb x0 ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_u16_1, svuint16x2_t, uint16_t, ++ svst2_u16 (p0, x0 + svcnth (), z0), ++ svst2 (p0, x0 + svcnth (), z0)) ++ ++/* ++** st2_u16_2: ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #2, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_u16_2, svuint16x2_t, uint16_t, ++ svst2_u16 (p0, x0 + svcnth () * 2, z0), ++ svst2 (p0, x0 + svcnth () * 2, z0)) ++ ++/* ++** st2_u16_14: ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #14, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_u16_14, svuint16x2_t, uint16_t, ++ svst2_u16 (p0, x0 + svcnth () * 14, z0), ++ svst2 (p0, x0 + svcnth () * 14, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_u16_16: ++** incb x0, all, mul #16 ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_u16_16, svuint16x2_t, uint16_t, ++ svst2_u16 (p0, x0 + svcnth () * 16, z0), ++ svst2 (p0, x0 + svcnth () * 16, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_u16_m1: ++** decb x0 ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_u16_m1, svuint16x2_t, uint16_t, ++ svst2_u16 (p0, x0 - svcnth (), z0), ++ svst2 (p0, x0 - svcnth (), z0)) ++ ++/* ++** st2_u16_m2: ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #-2, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_u16_m2, svuint16x2_t, uint16_t, ++ svst2_u16 (p0, x0 - svcnth () * 2, z0), ++ svst2 (p0, x0 - svcnth () * 2, z0)) ++ ++/* ++** st2_u16_m16: ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #-16, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_u16_m16, svuint16x2_t, uint16_t, ++ svst2_u16 (p0, x0 - svcnth () * 16, z0), ++ svst2 (p0, x0 - svcnth () * 16, z0)) ++ ++/* ++** st2_u16_m18: ++** addvl (x[0-9]+), x0, #-18 ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st2_u16_m18, svuint16x2_t, uint16_t, ++ svst2_u16 (p0, x0 - svcnth () * 18, z0), ++ svst2 (p0, x0 - svcnth () * 18, z0)) ++ ++/* ++** st2_vnum_u16_0: ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_vnum_u16_0, svuint16x2_t, uint16_t, ++ svst2_vnum_u16 (p0, x0, 0, z0), ++ svst2_vnum (p0, x0, 0, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_vnum_u16_1: ++** incb x0 ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_vnum_u16_1, svuint16x2_t, uint16_t, ++ svst2_vnum_u16 (p0, x0, 1, z0), ++ svst2_vnum (p0, x0, 1, z0)) ++ ++/* ++** st2_vnum_u16_2: ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #2, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_vnum_u16_2, svuint16x2_t, uint16_t, ++ svst2_vnum_u16 (p0, x0, 2, z0), ++ svst2_vnum (p0, x0, 2, z0)) ++ ++/* ++** st2_vnum_u16_14: ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #14, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_vnum_u16_14, svuint16x2_t, uint16_t, ++ svst2_vnum_u16 (p0, x0, 14, z0), ++ svst2_vnum (p0, x0, 14, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_vnum_u16_16: ++** incb x0, all, mul #16 ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_vnum_u16_16, svuint16x2_t, uint16_t, ++ svst2_vnum_u16 (p0, x0, 16, z0), ++ svst2_vnum (p0, x0, 16, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_vnum_u16_m1: ++** decb x0 ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_vnum_u16_m1, svuint16x2_t, uint16_t, ++ svst2_vnum_u16 (p0, x0, -1, z0), ++ svst2_vnum (p0, x0, -1, z0)) ++ ++/* ++** st2_vnum_u16_m2: ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #-2, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_vnum_u16_m2, svuint16x2_t, uint16_t, ++ svst2_vnum_u16 (p0, x0, -2, z0), ++ svst2_vnum (p0, x0, -2, z0)) ++ ++/* ++** st2_vnum_u16_m16: ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #-16, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_vnum_u16_m16, svuint16x2_t, uint16_t, ++ svst2_vnum_u16 (p0, x0, -16, z0), ++ svst2_vnum (p0, x0, -16, z0)) ++ ++/* ++** st2_vnum_u16_m18: ++** addvl (x[0-9]+), x0, #-18 ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st2_vnum_u16_m18, svuint16x2_t, uint16_t, ++ svst2_vnum_u16 (p0, x0, -18, z0), ++ svst2_vnum (p0, x0, -18, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** st2_vnum_u16_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** st2h {z0\.h(?: - |, )z1\.h}, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (st2_vnum_u16_x1, svuint16x2_t, uint16_t, ++ svst2_vnum_u16 (p0, x0, x1, z0), ++ svst2_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_u32.c +new file mode 100644 +index 000000000..46f1b5ca7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_u32.c +@@ -0,0 +1,200 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st2_u32_base: ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_u32_base, svuint32x2_t, uint32_t, ++ svst2_u32 (p0, x0, z0), ++ svst2 (p0, x0, z0)) ++ ++/* ++** st2_u32_index: ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, x1, lsl 2\] ++** ret ++*/ ++TEST_STORE (st2_u32_index, svuint32x2_t, uint32_t, ++ svst2_u32 (p0, x0 + x1, z0), ++ svst2 (p0, x0 + x1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_u32_1: ++** incb x0 ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_u32_1, svuint32x2_t, uint32_t, ++ svst2_u32 (p0, x0 + svcntw (), z0), ++ svst2 (p0, x0 + svcntw (), z0)) ++ ++/* ++** st2_u32_2: ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, #2, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_u32_2, svuint32x2_t, uint32_t, ++ svst2_u32 (p0, x0 + svcntw () * 2, z0), ++ svst2 (p0, x0 + svcntw () * 2, z0)) ++ ++/* ++** st2_u32_14: ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, #14, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_u32_14, svuint32x2_t, uint32_t, ++ svst2_u32 (p0, x0 + svcntw () * 14, z0), ++ svst2 (p0, x0 + svcntw () * 14, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_u32_16: ++** incb x0, all, mul #16 ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_u32_16, svuint32x2_t, uint32_t, ++ svst2_u32 (p0, x0 + svcntw () * 16, z0), ++ svst2 (p0, x0 + svcntw () * 16, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_u32_m1: ++** decb x0 ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_u32_m1, svuint32x2_t, uint32_t, ++ svst2_u32 (p0, x0 - svcntw (), z0), ++ svst2 (p0, x0 - svcntw (), z0)) ++ ++/* ++** st2_u32_m2: ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, #-2, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_u32_m2, svuint32x2_t, uint32_t, ++ svst2_u32 (p0, x0 - svcntw () * 2, z0), ++ svst2 (p0, x0 - svcntw () * 2, z0)) ++ ++/* ++** st2_u32_m16: ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, #-16, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_u32_m16, svuint32x2_t, uint32_t, ++ svst2_u32 (p0, x0 - svcntw () * 16, z0), ++ svst2 (p0, x0 - svcntw () * 16, z0)) ++ ++/* ++** st2_u32_m18: ++** addvl (x[0-9]+), x0, #-18 ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st2_u32_m18, svuint32x2_t, uint32_t, ++ svst2_u32 (p0, x0 - svcntw () * 18, z0), ++ svst2 (p0, x0 - svcntw () * 18, z0)) ++ ++/* ++** st2_vnum_u32_0: ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_vnum_u32_0, svuint32x2_t, uint32_t, ++ svst2_vnum_u32 (p0, x0, 0, z0), ++ svst2_vnum (p0, x0, 0, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_vnum_u32_1: ++** incb x0 ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_vnum_u32_1, svuint32x2_t, uint32_t, ++ svst2_vnum_u32 (p0, x0, 1, z0), ++ svst2_vnum (p0, x0, 1, z0)) ++ ++/* ++** st2_vnum_u32_2: ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, #2, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_vnum_u32_2, svuint32x2_t, uint32_t, ++ svst2_vnum_u32 (p0, x0, 2, z0), ++ svst2_vnum (p0, x0, 2, z0)) ++ ++/* ++** st2_vnum_u32_14: ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, #14, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_vnum_u32_14, svuint32x2_t, uint32_t, ++ svst2_vnum_u32 (p0, x0, 14, z0), ++ svst2_vnum (p0, x0, 14, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_vnum_u32_16: ++** incb x0, all, mul #16 ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_vnum_u32_16, svuint32x2_t, uint32_t, ++ svst2_vnum_u32 (p0, x0, 16, z0), ++ svst2_vnum (p0, x0, 16, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_vnum_u32_m1: ++** decb x0 ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_vnum_u32_m1, svuint32x2_t, uint32_t, ++ svst2_vnum_u32 (p0, x0, -1, z0), ++ svst2_vnum (p0, x0, -1, z0)) ++ ++/* ++** st2_vnum_u32_m2: ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, #-2, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_vnum_u32_m2, svuint32x2_t, uint32_t, ++ svst2_vnum_u32 (p0, x0, -2, z0), ++ svst2_vnum (p0, x0, -2, z0)) ++ ++/* ++** st2_vnum_u32_m16: ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, #-16, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_vnum_u32_m16, svuint32x2_t, uint32_t, ++ svst2_vnum_u32 (p0, x0, -16, z0), ++ svst2_vnum (p0, x0, -16, z0)) ++ ++/* ++** st2_vnum_u32_m18: ++** addvl (x[0-9]+), x0, #-18 ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st2_vnum_u32_m18, svuint32x2_t, uint32_t, ++ svst2_vnum_u32 (p0, x0, -18, z0), ++ svst2_vnum (p0, x0, -18, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** st2_vnum_u32_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** st2w {z0\.s(?: - |, )z1\.s}, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (st2_vnum_u32_x1, svuint32x2_t, uint32_t, ++ svst2_vnum_u32 (p0, x0, x1, z0), ++ svst2_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_u64.c +new file mode 100644 +index 000000000..0d9202b72 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_u64.c +@@ -0,0 +1,200 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st2_u64_base: ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_u64_base, svuint64x2_t, uint64_t, ++ svst2_u64 (p0, x0, z0), ++ svst2 (p0, x0, z0)) ++ ++/* ++** st2_u64_index: ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, x1, lsl 3\] ++** ret ++*/ ++TEST_STORE (st2_u64_index, svuint64x2_t, uint64_t, ++ svst2_u64 (p0, x0 + x1, z0), ++ svst2 (p0, x0 + x1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_u64_1: ++** incb x0 ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_u64_1, svuint64x2_t, uint64_t, ++ svst2_u64 (p0, x0 + svcntd (), z0), ++ svst2 (p0, x0 + svcntd (), z0)) ++ ++/* ++** st2_u64_2: ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, #2, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_u64_2, svuint64x2_t, uint64_t, ++ svst2_u64 (p0, x0 + svcntd () * 2, z0), ++ svst2 (p0, x0 + svcntd () * 2, z0)) ++ ++/* ++** st2_u64_14: ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, #14, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_u64_14, svuint64x2_t, uint64_t, ++ svst2_u64 (p0, x0 + svcntd () * 14, z0), ++ svst2 (p0, x0 + svcntd () * 14, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_u64_16: ++** incb x0, all, mul #16 ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_u64_16, svuint64x2_t, uint64_t, ++ svst2_u64 (p0, x0 + svcntd () * 16, z0), ++ svst2 (p0, x0 + svcntd () * 16, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_u64_m1: ++** decb x0 ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_u64_m1, svuint64x2_t, uint64_t, ++ svst2_u64 (p0, x0 - svcntd (), z0), ++ svst2 (p0, x0 - svcntd (), z0)) ++ ++/* ++** st2_u64_m2: ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, #-2, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_u64_m2, svuint64x2_t, uint64_t, ++ svst2_u64 (p0, x0 - svcntd () * 2, z0), ++ svst2 (p0, x0 - svcntd () * 2, z0)) ++ ++/* ++** st2_u64_m16: ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, #-16, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_u64_m16, svuint64x2_t, uint64_t, ++ svst2_u64 (p0, x0 - svcntd () * 16, z0), ++ svst2 (p0, x0 - svcntd () * 16, z0)) ++ ++/* ++** st2_u64_m18: ++** addvl (x[0-9]+), x0, #-18 ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st2_u64_m18, svuint64x2_t, uint64_t, ++ svst2_u64 (p0, x0 - svcntd () * 18, z0), ++ svst2 (p0, x0 - svcntd () * 18, z0)) ++ ++/* ++** st2_vnum_u64_0: ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_vnum_u64_0, svuint64x2_t, uint64_t, ++ svst2_vnum_u64 (p0, x0, 0, z0), ++ svst2_vnum (p0, x0, 0, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_vnum_u64_1: ++** incb x0 ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_vnum_u64_1, svuint64x2_t, uint64_t, ++ svst2_vnum_u64 (p0, x0, 1, z0), ++ svst2_vnum (p0, x0, 1, z0)) ++ ++/* ++** st2_vnum_u64_2: ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, #2, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_vnum_u64_2, svuint64x2_t, uint64_t, ++ svst2_vnum_u64 (p0, x0, 2, z0), ++ svst2_vnum (p0, x0, 2, z0)) ++ ++/* ++** st2_vnum_u64_14: ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, #14, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_vnum_u64_14, svuint64x2_t, uint64_t, ++ svst2_vnum_u64 (p0, x0, 14, z0), ++ svst2_vnum (p0, x0, 14, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_vnum_u64_16: ++** incb x0, all, mul #16 ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_vnum_u64_16, svuint64x2_t, uint64_t, ++ svst2_vnum_u64 (p0, x0, 16, z0), ++ svst2_vnum (p0, x0, 16, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_vnum_u64_m1: ++** decb x0 ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_vnum_u64_m1, svuint64x2_t, uint64_t, ++ svst2_vnum_u64 (p0, x0, -1, z0), ++ svst2_vnum (p0, x0, -1, z0)) ++ ++/* ++** st2_vnum_u64_m2: ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, #-2, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_vnum_u64_m2, svuint64x2_t, uint64_t, ++ svst2_vnum_u64 (p0, x0, -2, z0), ++ svst2_vnum (p0, x0, -2, z0)) ++ ++/* ++** st2_vnum_u64_m16: ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, #-16, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_vnum_u64_m16, svuint64x2_t, uint64_t, ++ svst2_vnum_u64 (p0, x0, -16, z0), ++ svst2_vnum (p0, x0, -16, z0)) ++ ++/* ++** st2_vnum_u64_m18: ++** addvl (x[0-9]+), x0, #-18 ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st2_vnum_u64_m18, svuint64x2_t, uint64_t, ++ svst2_vnum_u64 (p0, x0, -18, z0), ++ svst2_vnum (p0, x0, -18, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** st2_vnum_u64_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** st2d {z0\.d(?: - |, )z1\.d}, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (st2_vnum_u64_x1, svuint64x2_t, uint64_t, ++ svst2_vnum_u64 (p0, x0, x1, z0), ++ svst2_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_u8.c +new file mode 100644 +index 000000000..e7ea977a2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_u8.c +@@ -0,0 +1,204 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st2_u8_base: ++** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_u8_base, svuint8x2_t, uint8_t, ++ svst2_u8 (p0, x0, z0), ++ svst2 (p0, x0, z0)) ++ ++/* ++** st2_u8_index: ++** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0, x1\] ++** ret ++*/ ++TEST_STORE (st2_u8_index, svuint8x2_t, uint8_t, ++ svst2_u8 (p0, x0 + x1, z0), ++ svst2 (p0, x0 + x1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_u8_1: ++** incb x0 ++** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_u8_1, svuint8x2_t, uint8_t, ++ svst2_u8 (p0, x0 + svcntb (), z0), ++ svst2 (p0, x0 + svcntb (), z0)) ++ ++/* ++** st2_u8_2: ++** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0, #2, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_u8_2, svuint8x2_t, uint8_t, ++ svst2_u8 (p0, x0 + svcntb () * 2, z0), ++ svst2 (p0, x0 + svcntb () * 2, z0)) ++ ++/* ++** st2_u8_14: ++** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0, #14, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_u8_14, svuint8x2_t, uint8_t, ++ svst2_u8 (p0, x0 + svcntb () * 14, z0), ++ svst2 (p0, x0 + svcntb () * 14, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_u8_16: ++** incb x0, all, mul #16 ++** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_u8_16, svuint8x2_t, uint8_t, ++ svst2_u8 (p0, x0 + svcntb () * 16, z0), ++ svst2 (p0, x0 + svcntb () * 16, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_u8_m1: ++** decb x0 ++** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_u8_m1, svuint8x2_t, uint8_t, ++ svst2_u8 (p0, x0 - svcntb (), z0), ++ svst2 (p0, x0 - svcntb (), z0)) ++ ++/* ++** st2_u8_m2: ++** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0, #-2, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_u8_m2, svuint8x2_t, uint8_t, ++ svst2_u8 (p0, x0 - svcntb () * 2, z0), ++ svst2 (p0, x0 - svcntb () * 2, z0)) ++ ++/* ++** st2_u8_m16: ++** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0, #-16, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_u8_m16, svuint8x2_t, uint8_t, ++ svst2_u8 (p0, x0 - svcntb () * 16, z0), ++ svst2 (p0, x0 - svcntb () * 16, z0)) ++ ++/* ++** st2_u8_m18: ++** addvl (x[0-9]+), x0, #-18 ++** st2b {z0\.b(?: - |, )z1\.b}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st2_u8_m18, svuint8x2_t, uint8_t, ++ svst2_u8 (p0, x0 - svcntb () * 18, z0), ++ svst2 (p0, x0 - svcntb () * 18, z0)) ++ ++/* ++** st2_vnum_u8_0: ++** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_vnum_u8_0, svuint8x2_t, uint8_t, ++ svst2_vnum_u8 (p0, x0, 0, z0), ++ svst2_vnum (p0, x0, 0, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_vnum_u8_1: ++** incb x0 ++** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_vnum_u8_1, svuint8x2_t, uint8_t, ++ svst2_vnum_u8 (p0, x0, 1, z0), ++ svst2_vnum (p0, x0, 1, z0)) ++ ++/* ++** st2_vnum_u8_2: ++** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0, #2, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_vnum_u8_2, svuint8x2_t, uint8_t, ++ svst2_vnum_u8 (p0, x0, 2, z0), ++ svst2_vnum (p0, x0, 2, z0)) ++ ++/* ++** st2_vnum_u8_14: ++** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0, #14, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_vnum_u8_14, svuint8x2_t, uint8_t, ++ svst2_vnum_u8 (p0, x0, 14, z0), ++ svst2_vnum (p0, x0, 14, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_vnum_u8_16: ++** incb x0, all, mul #16 ++** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_vnum_u8_16, svuint8x2_t, uint8_t, ++ svst2_vnum_u8 (p0, x0, 16, z0), ++ svst2_vnum (p0, x0, 16, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st2_vnum_u8_m1: ++** decb x0 ++** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st2_vnum_u8_m1, svuint8x2_t, uint8_t, ++ svst2_vnum_u8 (p0, x0, -1, z0), ++ svst2_vnum (p0, x0, -1, z0)) ++ ++/* ++** st2_vnum_u8_m2: ++** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0, #-2, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_vnum_u8_m2, svuint8x2_t, uint8_t, ++ svst2_vnum_u8 (p0, x0, -2, z0), ++ svst2_vnum (p0, x0, -2, z0)) ++ ++/* ++** st2_vnum_u8_m16: ++** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0, #-16, mul vl\] ++** ret ++*/ ++TEST_STORE (st2_vnum_u8_m16, svuint8x2_t, uint8_t, ++ svst2_vnum_u8 (p0, x0, -16, z0), ++ svst2_vnum (p0, x0, -16, z0)) ++ ++/* ++** st2_vnum_u8_m18: ++** addvl (x[0-9]+), x0, #-18 ++** st2b {z0\.b(?: - |, )z1\.b}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st2_vnum_u8_m18, svuint8x2_t, uint8_t, ++ svst2_vnum_u8 (p0, x0, -18, z0), ++ svst2_vnum (p0, x0, -18, z0)) ++ ++/* ++** st2_vnum_u8_x1: ++** cntb (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** st2b {z0\.b(?: - |, )z1\.b}, p0, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_STORE (st2_vnum_u8_x1, svuint8x2_t, uint8_t, ++ svst2_vnum_u8 (p0, x0, x1, z0), ++ svst2_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_bf16.c +new file mode 100644 +index 000000000..2f921687c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_bf16.c +@@ -0,0 +1,242 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st3_bf16_base: ++** st3h {z0\.h - z2\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_bf16_base, svbfloat16x3_t, bfloat16_t, ++ svst3_bf16 (p0, x0, z0), ++ svst3 (p0, x0, z0)) ++ ++/* ++** st3_bf16_index: ++** st3h {z0\.h - z2\.h}, p0, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_STORE (st3_bf16_index, svbfloat16x3_t, bfloat16_t, ++ svst3_bf16 (p0, x0 + x1, z0), ++ svst3 (p0, x0 + x1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_bf16_1: ++** incb x0 ++** st3h {z0\.h - z2\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_bf16_1, svbfloat16x3_t, bfloat16_t, ++ svst3_bf16 (p0, x0 + svcnth (), z0), ++ svst3 (p0, x0 + svcnth (), z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_bf16_2: ++** incb x0, all, mul #2 ++** st3h {z0\.h - z2\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_bf16_2, svbfloat16x3_t, bfloat16_t, ++ svst3_bf16 (p0, x0 + svcnth () * 2, z0), ++ svst3 (p0, x0 + svcnth () * 2, z0)) ++ ++/* ++** st3_bf16_3: ++** st3h {z0\.h - z2\.h}, p0, \[x0, #3, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_bf16_3, svbfloat16x3_t, bfloat16_t, ++ svst3_bf16 (p0, x0 + svcnth () * 3, z0), ++ svst3 (p0, x0 + svcnth () * 3, z0)) ++ ++/* ++** st3_bf16_21: ++** st3h {z0\.h - z2\.h}, p0, \[x0, #21, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_bf16_21, svbfloat16x3_t, bfloat16_t, ++ svst3_bf16 (p0, x0 + svcnth () * 21, z0), ++ svst3 (p0, x0 + svcnth () * 21, z0)) ++ ++/* ++** st3_bf16_24: ++** addvl (x[0-9]+), x0, #24 ++** st3h {z0\.h - z2\.h}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st3_bf16_24, svbfloat16x3_t, bfloat16_t, ++ svst3_bf16 (p0, x0 + svcnth () * 24, z0), ++ svst3 (p0, x0 + svcnth () * 24, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_bf16_m1: ++** decb x0 ++** st3h {z0\.h - z2\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_bf16_m1, svbfloat16x3_t, bfloat16_t, ++ svst3_bf16 (p0, x0 - svcnth (), z0), ++ svst3 (p0, x0 - svcnth (), z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_bf16_m2: ++** decb x0, all, mul #2 ++** st3h {z0\.h - z2\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_bf16_m2, svbfloat16x3_t, bfloat16_t, ++ svst3_bf16 (p0, x0 - svcnth () * 2, z0), ++ svst3 (p0, x0 - svcnth () * 2, z0)) ++ ++/* ++** st3_bf16_m3: ++** st3h {z0\.h - z2\.h}, p0, \[x0, #-3, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_bf16_m3, svbfloat16x3_t, bfloat16_t, ++ svst3_bf16 (p0, x0 - svcnth () * 3, z0), ++ svst3 (p0, x0 - svcnth () * 3, z0)) ++ ++/* ++** st3_bf16_m24: ++** st3h {z0\.h - z2\.h}, p0, \[x0, #-24, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_bf16_m24, svbfloat16x3_t, bfloat16_t, ++ svst3_bf16 (p0, x0 - svcnth () * 24, z0), ++ svst3 (p0, x0 - svcnth () * 24, z0)) ++ ++/* ++** st3_bf16_m27: ++** addvl (x[0-9]+), x0, #-27 ++** st3h {z0\.h - z2\.h}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st3_bf16_m27, svbfloat16x3_t, bfloat16_t, ++ svst3_bf16 (p0, x0 - svcnth () * 27, z0), ++ svst3 (p0, x0 - svcnth () * 27, z0)) ++ ++/* ++** st3_vnum_bf16_0: ++** st3h {z0\.h - z2\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_bf16_0, svbfloat16x3_t, bfloat16_t, ++ svst3_vnum_bf16 (p0, x0, 0, z0), ++ svst3_vnum (p0, x0, 0, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_vnum_bf16_1: ++** incb x0 ++** st3h {z0\.h - z2\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_bf16_1, svbfloat16x3_t, bfloat16_t, ++ svst3_vnum_bf16 (p0, x0, 1, z0), ++ svst3_vnum (p0, x0, 1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_vnum_bf16_2: ++** incb x0, all, mul #2 ++** st3h {z0\.h - z2\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_bf16_2, svbfloat16x3_t, bfloat16_t, ++ svst3_vnum_bf16 (p0, x0, 2, z0), ++ svst3_vnum (p0, x0, 2, z0)) ++ ++/* ++** st3_vnum_bf16_3: ++** st3h {z0\.h - z2\.h}, p0, \[x0, #3, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_vnum_bf16_3, svbfloat16x3_t, bfloat16_t, ++ svst3_vnum_bf16 (p0, x0, 3, z0), ++ svst3_vnum (p0, x0, 3, z0)) ++ ++/* ++** st3_vnum_bf16_21: ++** st3h {z0\.h - z2\.h}, p0, \[x0, #21, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_vnum_bf16_21, svbfloat16x3_t, bfloat16_t, ++ svst3_vnum_bf16 (p0, x0, 21, z0), ++ svst3_vnum (p0, x0, 21, z0)) ++ ++/* ++** st3_vnum_bf16_24: ++** addvl (x[0-9]+), x0, #24 ++** st3h {z0\.h - z2\.h}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st3_vnum_bf16_24, svbfloat16x3_t, bfloat16_t, ++ svst3_vnum_bf16 (p0, x0, 24, z0), ++ svst3_vnum (p0, x0, 24, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_vnum_bf16_m1: ++** decb x0 ++** st3h {z0\.h - z2\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_bf16_m1, svbfloat16x3_t, bfloat16_t, ++ svst3_vnum_bf16 (p0, x0, -1, z0), ++ svst3_vnum (p0, x0, -1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_vnum_bf16_m2: ++** decb x0, all, mul #2 ++** st3h {z0\.h - z2\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_bf16_m2, svbfloat16x3_t, bfloat16_t, ++ svst3_vnum_bf16 (p0, x0, -2, z0), ++ svst3_vnum (p0, x0, -2, z0)) ++ ++/* ++** st3_vnum_bf16_m3: ++** st3h {z0\.h - z2\.h}, p0, \[x0, #-3, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_vnum_bf16_m3, svbfloat16x3_t, bfloat16_t, ++ svst3_vnum_bf16 (p0, x0, -3, z0), ++ svst3_vnum (p0, x0, -3, z0)) ++ ++/* ++** st3_vnum_bf16_m24: ++** st3h {z0\.h - z2\.h}, p0, \[x0, #-24, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_vnum_bf16_m24, svbfloat16x3_t, bfloat16_t, ++ svst3_vnum_bf16 (p0, x0, -24, z0), ++ svst3_vnum (p0, x0, -24, z0)) ++ ++/* ++** st3_vnum_bf16_m27: ++** addvl (x[0-9]+), x0, #-27 ++** st3h {z0\.h - z2\.h}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st3_vnum_bf16_m27, svbfloat16x3_t, bfloat16_t, ++ svst3_vnum_bf16 (p0, x0, -27, z0), ++ svst3_vnum (p0, x0, -27, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** st3_vnum_bf16_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** st3h {z0\.h - z2\.h}, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (st3_vnum_bf16_x1, svbfloat16x3_t, bfloat16_t, ++ svst3_vnum_bf16 (p0, x0, x1, z0), ++ svst3_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_f16.c +new file mode 100644 +index 000000000..388eb3708 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_f16.c +@@ -0,0 +1,242 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st3_f16_base: ++** st3h {z0\.h - z2\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_f16_base, svfloat16x3_t, float16_t, ++ svst3_f16 (p0, x0, z0), ++ svst3 (p0, x0, z0)) ++ ++/* ++** st3_f16_index: ++** st3h {z0\.h - z2\.h}, p0, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_STORE (st3_f16_index, svfloat16x3_t, float16_t, ++ svst3_f16 (p0, x0 + x1, z0), ++ svst3 (p0, x0 + x1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_f16_1: ++** incb x0 ++** st3h {z0\.h - z2\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_f16_1, svfloat16x3_t, float16_t, ++ svst3_f16 (p0, x0 + svcnth (), z0), ++ svst3 (p0, x0 + svcnth (), z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_f16_2: ++** incb x0, all, mul #2 ++** st3h {z0\.h - z2\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_f16_2, svfloat16x3_t, float16_t, ++ svst3_f16 (p0, x0 + svcnth () * 2, z0), ++ svst3 (p0, x0 + svcnth () * 2, z0)) ++ ++/* ++** st3_f16_3: ++** st3h {z0\.h - z2\.h}, p0, \[x0, #3, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_f16_3, svfloat16x3_t, float16_t, ++ svst3_f16 (p0, x0 + svcnth () * 3, z0), ++ svst3 (p0, x0 + svcnth () * 3, z0)) ++ ++/* ++** st3_f16_21: ++** st3h {z0\.h - z2\.h}, p0, \[x0, #21, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_f16_21, svfloat16x3_t, float16_t, ++ svst3_f16 (p0, x0 + svcnth () * 21, z0), ++ svst3 (p0, x0 + svcnth () * 21, z0)) ++ ++/* ++** st3_f16_24: ++** addvl (x[0-9]+), x0, #24 ++** st3h {z0\.h - z2\.h}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st3_f16_24, svfloat16x3_t, float16_t, ++ svst3_f16 (p0, x0 + svcnth () * 24, z0), ++ svst3 (p0, x0 + svcnth () * 24, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_f16_m1: ++** decb x0 ++** st3h {z0\.h - z2\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_f16_m1, svfloat16x3_t, float16_t, ++ svst3_f16 (p0, x0 - svcnth (), z0), ++ svst3 (p0, x0 - svcnth (), z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_f16_m2: ++** decb x0, all, mul #2 ++** st3h {z0\.h - z2\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_f16_m2, svfloat16x3_t, float16_t, ++ svst3_f16 (p0, x0 - svcnth () * 2, z0), ++ svst3 (p0, x0 - svcnth () * 2, z0)) ++ ++/* ++** st3_f16_m3: ++** st3h {z0\.h - z2\.h}, p0, \[x0, #-3, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_f16_m3, svfloat16x3_t, float16_t, ++ svst3_f16 (p0, x0 - svcnth () * 3, z0), ++ svst3 (p0, x0 - svcnth () * 3, z0)) ++ ++/* ++** st3_f16_m24: ++** st3h {z0\.h - z2\.h}, p0, \[x0, #-24, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_f16_m24, svfloat16x3_t, float16_t, ++ svst3_f16 (p0, x0 - svcnth () * 24, z0), ++ svst3 (p0, x0 - svcnth () * 24, z0)) ++ ++/* ++** st3_f16_m27: ++** addvl (x[0-9]+), x0, #-27 ++** st3h {z0\.h - z2\.h}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st3_f16_m27, svfloat16x3_t, float16_t, ++ svst3_f16 (p0, x0 - svcnth () * 27, z0), ++ svst3 (p0, x0 - svcnth () * 27, z0)) ++ ++/* ++** st3_vnum_f16_0: ++** st3h {z0\.h - z2\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_f16_0, svfloat16x3_t, float16_t, ++ svst3_vnum_f16 (p0, x0, 0, z0), ++ svst3_vnum (p0, x0, 0, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_vnum_f16_1: ++** incb x0 ++** st3h {z0\.h - z2\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_f16_1, svfloat16x3_t, float16_t, ++ svst3_vnum_f16 (p0, x0, 1, z0), ++ svst3_vnum (p0, x0, 1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_vnum_f16_2: ++** incb x0, all, mul #2 ++** st3h {z0\.h - z2\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_f16_2, svfloat16x3_t, float16_t, ++ svst3_vnum_f16 (p0, x0, 2, z0), ++ svst3_vnum (p0, x0, 2, z0)) ++ ++/* ++** st3_vnum_f16_3: ++** st3h {z0\.h - z2\.h}, p0, \[x0, #3, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_vnum_f16_3, svfloat16x3_t, float16_t, ++ svst3_vnum_f16 (p0, x0, 3, z0), ++ svst3_vnum (p0, x0, 3, z0)) ++ ++/* ++** st3_vnum_f16_21: ++** st3h {z0\.h - z2\.h}, p0, \[x0, #21, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_vnum_f16_21, svfloat16x3_t, float16_t, ++ svst3_vnum_f16 (p0, x0, 21, z0), ++ svst3_vnum (p0, x0, 21, z0)) ++ ++/* ++** st3_vnum_f16_24: ++** addvl (x[0-9]+), x0, #24 ++** st3h {z0\.h - z2\.h}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st3_vnum_f16_24, svfloat16x3_t, float16_t, ++ svst3_vnum_f16 (p0, x0, 24, z0), ++ svst3_vnum (p0, x0, 24, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_vnum_f16_m1: ++** decb x0 ++** st3h {z0\.h - z2\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_f16_m1, svfloat16x3_t, float16_t, ++ svst3_vnum_f16 (p0, x0, -1, z0), ++ svst3_vnum (p0, x0, -1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_vnum_f16_m2: ++** decb x0, all, mul #2 ++** st3h {z0\.h - z2\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_f16_m2, svfloat16x3_t, float16_t, ++ svst3_vnum_f16 (p0, x0, -2, z0), ++ svst3_vnum (p0, x0, -2, z0)) ++ ++/* ++** st3_vnum_f16_m3: ++** st3h {z0\.h - z2\.h}, p0, \[x0, #-3, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_vnum_f16_m3, svfloat16x3_t, float16_t, ++ svst3_vnum_f16 (p0, x0, -3, z0), ++ svst3_vnum (p0, x0, -3, z0)) ++ ++/* ++** st3_vnum_f16_m24: ++** st3h {z0\.h - z2\.h}, p0, \[x0, #-24, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_vnum_f16_m24, svfloat16x3_t, float16_t, ++ svst3_vnum_f16 (p0, x0, -24, z0), ++ svst3_vnum (p0, x0, -24, z0)) ++ ++/* ++** st3_vnum_f16_m27: ++** addvl (x[0-9]+), x0, #-27 ++** st3h {z0\.h - z2\.h}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st3_vnum_f16_m27, svfloat16x3_t, float16_t, ++ svst3_vnum_f16 (p0, x0, -27, z0), ++ svst3_vnum (p0, x0, -27, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** st3_vnum_f16_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** st3h {z0\.h - z2\.h}, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (st3_vnum_f16_x1, svfloat16x3_t, float16_t, ++ svst3_vnum_f16 (p0, x0, x1, z0), ++ svst3_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_f32.c +new file mode 100644 +index 000000000..a5e3bdb45 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_f32.c +@@ -0,0 +1,242 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st3_f32_base: ++** st3w {z0\.s - z2\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_f32_base, svfloat32x3_t, float32_t, ++ svst3_f32 (p0, x0, z0), ++ svst3 (p0, x0, z0)) ++ ++/* ++** st3_f32_index: ++** st3w {z0\.s - z2\.s}, p0, \[x0, x1, lsl 2\] ++** ret ++*/ ++TEST_STORE (st3_f32_index, svfloat32x3_t, float32_t, ++ svst3_f32 (p0, x0 + x1, z0), ++ svst3 (p0, x0 + x1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_f32_1: ++** incb x0 ++** st3w {z0\.s - z2\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_f32_1, svfloat32x3_t, float32_t, ++ svst3_f32 (p0, x0 + svcntw (), z0), ++ svst3 (p0, x0 + svcntw (), z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_f32_2: ++** incb x0, all, mul #2 ++** st3w {z0\.s - z2\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_f32_2, svfloat32x3_t, float32_t, ++ svst3_f32 (p0, x0 + svcntw () * 2, z0), ++ svst3 (p0, x0 + svcntw () * 2, z0)) ++ ++/* ++** st3_f32_3: ++** st3w {z0\.s - z2\.s}, p0, \[x0, #3, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_f32_3, svfloat32x3_t, float32_t, ++ svst3_f32 (p0, x0 + svcntw () * 3, z0), ++ svst3 (p0, x0 + svcntw () * 3, z0)) ++ ++/* ++** st3_f32_21: ++** st3w {z0\.s - z2\.s}, p0, \[x0, #21, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_f32_21, svfloat32x3_t, float32_t, ++ svst3_f32 (p0, x0 + svcntw () * 21, z0), ++ svst3 (p0, x0 + svcntw () * 21, z0)) ++ ++/* ++** st3_f32_24: ++** addvl (x[0-9]+), x0, #24 ++** st3w {z0\.s - z2\.s}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st3_f32_24, svfloat32x3_t, float32_t, ++ svst3_f32 (p0, x0 + svcntw () * 24, z0), ++ svst3 (p0, x0 + svcntw () * 24, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_f32_m1: ++** decb x0 ++** st3w {z0\.s - z2\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_f32_m1, svfloat32x3_t, float32_t, ++ svst3_f32 (p0, x0 - svcntw (), z0), ++ svst3 (p0, x0 - svcntw (), z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_f32_m2: ++** decb x0, all, mul #2 ++** st3w {z0\.s - z2\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_f32_m2, svfloat32x3_t, float32_t, ++ svst3_f32 (p0, x0 - svcntw () * 2, z0), ++ svst3 (p0, x0 - svcntw () * 2, z0)) ++ ++/* ++** st3_f32_m3: ++** st3w {z0\.s - z2\.s}, p0, \[x0, #-3, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_f32_m3, svfloat32x3_t, float32_t, ++ svst3_f32 (p0, x0 - svcntw () * 3, z0), ++ svst3 (p0, x0 - svcntw () * 3, z0)) ++ ++/* ++** st3_f32_m24: ++** st3w {z0\.s - z2\.s}, p0, \[x0, #-24, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_f32_m24, svfloat32x3_t, float32_t, ++ svst3_f32 (p0, x0 - svcntw () * 24, z0), ++ svst3 (p0, x0 - svcntw () * 24, z0)) ++ ++/* ++** st3_f32_m27: ++** addvl (x[0-9]+), x0, #-27 ++** st3w {z0\.s - z2\.s}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st3_f32_m27, svfloat32x3_t, float32_t, ++ svst3_f32 (p0, x0 - svcntw () * 27, z0), ++ svst3 (p0, x0 - svcntw () * 27, z0)) ++ ++/* ++** st3_vnum_f32_0: ++** st3w {z0\.s - z2\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_f32_0, svfloat32x3_t, float32_t, ++ svst3_vnum_f32 (p0, x0, 0, z0), ++ svst3_vnum (p0, x0, 0, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_vnum_f32_1: ++** incb x0 ++** st3w {z0\.s - z2\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_f32_1, svfloat32x3_t, float32_t, ++ svst3_vnum_f32 (p0, x0, 1, z0), ++ svst3_vnum (p0, x0, 1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_vnum_f32_2: ++** incb x0, all, mul #2 ++** st3w {z0\.s - z2\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_f32_2, svfloat32x3_t, float32_t, ++ svst3_vnum_f32 (p0, x0, 2, z0), ++ svst3_vnum (p0, x0, 2, z0)) ++ ++/* ++** st3_vnum_f32_3: ++** st3w {z0\.s - z2\.s}, p0, \[x0, #3, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_vnum_f32_3, svfloat32x3_t, float32_t, ++ svst3_vnum_f32 (p0, x0, 3, z0), ++ svst3_vnum (p0, x0, 3, z0)) ++ ++/* ++** st3_vnum_f32_21: ++** st3w {z0\.s - z2\.s}, p0, \[x0, #21, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_vnum_f32_21, svfloat32x3_t, float32_t, ++ svst3_vnum_f32 (p0, x0, 21, z0), ++ svst3_vnum (p0, x0, 21, z0)) ++ ++/* ++** st3_vnum_f32_24: ++** addvl (x[0-9]+), x0, #24 ++** st3w {z0\.s - z2\.s}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st3_vnum_f32_24, svfloat32x3_t, float32_t, ++ svst3_vnum_f32 (p0, x0, 24, z0), ++ svst3_vnum (p0, x0, 24, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_vnum_f32_m1: ++** decb x0 ++** st3w {z0\.s - z2\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_f32_m1, svfloat32x3_t, float32_t, ++ svst3_vnum_f32 (p0, x0, -1, z0), ++ svst3_vnum (p0, x0, -1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_vnum_f32_m2: ++** decb x0, all, mul #2 ++** st3w {z0\.s - z2\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_f32_m2, svfloat32x3_t, float32_t, ++ svst3_vnum_f32 (p0, x0, -2, z0), ++ svst3_vnum (p0, x0, -2, z0)) ++ ++/* ++** st3_vnum_f32_m3: ++** st3w {z0\.s - z2\.s}, p0, \[x0, #-3, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_vnum_f32_m3, svfloat32x3_t, float32_t, ++ svst3_vnum_f32 (p0, x0, -3, z0), ++ svst3_vnum (p0, x0, -3, z0)) ++ ++/* ++** st3_vnum_f32_m24: ++** st3w {z0\.s - z2\.s}, p0, \[x0, #-24, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_vnum_f32_m24, svfloat32x3_t, float32_t, ++ svst3_vnum_f32 (p0, x0, -24, z0), ++ svst3_vnum (p0, x0, -24, z0)) ++ ++/* ++** st3_vnum_f32_m27: ++** addvl (x[0-9]+), x0, #-27 ++** st3w {z0\.s - z2\.s}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st3_vnum_f32_m27, svfloat32x3_t, float32_t, ++ svst3_vnum_f32 (p0, x0, -27, z0), ++ svst3_vnum (p0, x0, -27, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** st3_vnum_f32_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** st3w {z0\.s - z2\.s}, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (st3_vnum_f32_x1, svfloat32x3_t, float32_t, ++ svst3_vnum_f32 (p0, x0, x1, z0), ++ svst3_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_f64.c +new file mode 100644 +index 000000000..30407da8a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_f64.c +@@ -0,0 +1,242 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st3_f64_base: ++** st3d {z0\.d - z2\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_f64_base, svfloat64x3_t, float64_t, ++ svst3_f64 (p0, x0, z0), ++ svst3 (p0, x0, z0)) ++ ++/* ++** st3_f64_index: ++** st3d {z0\.d - z2\.d}, p0, \[x0, x1, lsl 3\] ++** ret ++*/ ++TEST_STORE (st3_f64_index, svfloat64x3_t, float64_t, ++ svst3_f64 (p0, x0 + x1, z0), ++ svst3 (p0, x0 + x1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_f64_1: ++** incb x0 ++** st3d {z0\.d - z2\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_f64_1, svfloat64x3_t, float64_t, ++ svst3_f64 (p0, x0 + svcntd (), z0), ++ svst3 (p0, x0 + svcntd (), z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_f64_2: ++** incb x0, all, mul #2 ++** st3d {z0\.d - z2\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_f64_2, svfloat64x3_t, float64_t, ++ svst3_f64 (p0, x0 + svcntd () * 2, z0), ++ svst3 (p0, x0 + svcntd () * 2, z0)) ++ ++/* ++** st3_f64_3: ++** st3d {z0\.d - z2\.d}, p0, \[x0, #3, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_f64_3, svfloat64x3_t, float64_t, ++ svst3_f64 (p0, x0 + svcntd () * 3, z0), ++ svst3 (p0, x0 + svcntd () * 3, z0)) ++ ++/* ++** st3_f64_21: ++** st3d {z0\.d - z2\.d}, p0, \[x0, #21, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_f64_21, svfloat64x3_t, float64_t, ++ svst3_f64 (p0, x0 + svcntd () * 21, z0), ++ svst3 (p0, x0 + svcntd () * 21, z0)) ++ ++/* ++** st3_f64_24: ++** addvl (x[0-9]+), x0, #24 ++** st3d {z0\.d - z2\.d}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st3_f64_24, svfloat64x3_t, float64_t, ++ svst3_f64 (p0, x0 + svcntd () * 24, z0), ++ svst3 (p0, x0 + svcntd () * 24, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_f64_m1: ++** decb x0 ++** st3d {z0\.d - z2\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_f64_m1, svfloat64x3_t, float64_t, ++ svst3_f64 (p0, x0 - svcntd (), z0), ++ svst3 (p0, x0 - svcntd (), z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_f64_m2: ++** decb x0, all, mul #2 ++** st3d {z0\.d - z2\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_f64_m2, svfloat64x3_t, float64_t, ++ svst3_f64 (p0, x0 - svcntd () * 2, z0), ++ svst3 (p0, x0 - svcntd () * 2, z0)) ++ ++/* ++** st3_f64_m3: ++** st3d {z0\.d - z2\.d}, p0, \[x0, #-3, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_f64_m3, svfloat64x3_t, float64_t, ++ svst3_f64 (p0, x0 - svcntd () * 3, z0), ++ svst3 (p0, x0 - svcntd () * 3, z0)) ++ ++/* ++** st3_f64_m24: ++** st3d {z0\.d - z2\.d}, p0, \[x0, #-24, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_f64_m24, svfloat64x3_t, float64_t, ++ svst3_f64 (p0, x0 - svcntd () * 24, z0), ++ svst3 (p0, x0 - svcntd () * 24, z0)) ++ ++/* ++** st3_f64_m27: ++** addvl (x[0-9]+), x0, #-27 ++** st3d {z0\.d - z2\.d}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st3_f64_m27, svfloat64x3_t, float64_t, ++ svst3_f64 (p0, x0 - svcntd () * 27, z0), ++ svst3 (p0, x0 - svcntd () * 27, z0)) ++ ++/* ++** st3_vnum_f64_0: ++** st3d {z0\.d - z2\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_f64_0, svfloat64x3_t, float64_t, ++ svst3_vnum_f64 (p0, x0, 0, z0), ++ svst3_vnum (p0, x0, 0, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_vnum_f64_1: ++** incb x0 ++** st3d {z0\.d - z2\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_f64_1, svfloat64x3_t, float64_t, ++ svst3_vnum_f64 (p0, x0, 1, z0), ++ svst3_vnum (p0, x0, 1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_vnum_f64_2: ++** incb x0, all, mul #2 ++** st3d {z0\.d - z2\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_f64_2, svfloat64x3_t, float64_t, ++ svst3_vnum_f64 (p0, x0, 2, z0), ++ svst3_vnum (p0, x0, 2, z0)) ++ ++/* ++** st3_vnum_f64_3: ++** st3d {z0\.d - z2\.d}, p0, \[x0, #3, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_vnum_f64_3, svfloat64x3_t, float64_t, ++ svst3_vnum_f64 (p0, x0, 3, z0), ++ svst3_vnum (p0, x0, 3, z0)) ++ ++/* ++** st3_vnum_f64_21: ++** st3d {z0\.d - z2\.d}, p0, \[x0, #21, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_vnum_f64_21, svfloat64x3_t, float64_t, ++ svst3_vnum_f64 (p0, x0, 21, z0), ++ svst3_vnum (p0, x0, 21, z0)) ++ ++/* ++** st3_vnum_f64_24: ++** addvl (x[0-9]+), x0, #24 ++** st3d {z0\.d - z2\.d}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st3_vnum_f64_24, svfloat64x3_t, float64_t, ++ svst3_vnum_f64 (p0, x0, 24, z0), ++ svst3_vnum (p0, x0, 24, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_vnum_f64_m1: ++** decb x0 ++** st3d {z0\.d - z2\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_f64_m1, svfloat64x3_t, float64_t, ++ svst3_vnum_f64 (p0, x0, -1, z0), ++ svst3_vnum (p0, x0, -1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_vnum_f64_m2: ++** decb x0, all, mul #2 ++** st3d {z0\.d - z2\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_f64_m2, svfloat64x3_t, float64_t, ++ svst3_vnum_f64 (p0, x0, -2, z0), ++ svst3_vnum (p0, x0, -2, z0)) ++ ++/* ++** st3_vnum_f64_m3: ++** st3d {z0\.d - z2\.d}, p0, \[x0, #-3, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_vnum_f64_m3, svfloat64x3_t, float64_t, ++ svst3_vnum_f64 (p0, x0, -3, z0), ++ svst3_vnum (p0, x0, -3, z0)) ++ ++/* ++** st3_vnum_f64_m24: ++** st3d {z0\.d - z2\.d}, p0, \[x0, #-24, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_vnum_f64_m24, svfloat64x3_t, float64_t, ++ svst3_vnum_f64 (p0, x0, -24, z0), ++ svst3_vnum (p0, x0, -24, z0)) ++ ++/* ++** st3_vnum_f64_m27: ++** addvl (x[0-9]+), x0, #-27 ++** st3d {z0\.d - z2\.d}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st3_vnum_f64_m27, svfloat64x3_t, float64_t, ++ svst3_vnum_f64 (p0, x0, -27, z0), ++ svst3_vnum (p0, x0, -27, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** st3_vnum_f64_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** st3d {z0\.d - z2\.d}, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (st3_vnum_f64_x1, svfloat64x3_t, float64_t, ++ svst3_vnum_f64 (p0, x0, x1, z0), ++ svst3_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_s16.c +new file mode 100644 +index 000000000..a4a1109c5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_s16.c +@@ -0,0 +1,242 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st3_s16_base: ++** st3h {z0\.h - z2\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_s16_base, svint16x3_t, int16_t, ++ svst3_s16 (p0, x0, z0), ++ svst3 (p0, x0, z0)) ++ ++/* ++** st3_s16_index: ++** st3h {z0\.h - z2\.h}, p0, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_STORE (st3_s16_index, svint16x3_t, int16_t, ++ svst3_s16 (p0, x0 + x1, z0), ++ svst3 (p0, x0 + x1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_s16_1: ++** incb x0 ++** st3h {z0\.h - z2\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_s16_1, svint16x3_t, int16_t, ++ svst3_s16 (p0, x0 + svcnth (), z0), ++ svst3 (p0, x0 + svcnth (), z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_s16_2: ++** incb x0, all, mul #2 ++** st3h {z0\.h - z2\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_s16_2, svint16x3_t, int16_t, ++ svst3_s16 (p0, x0 + svcnth () * 2, z0), ++ svst3 (p0, x0 + svcnth () * 2, z0)) ++ ++/* ++** st3_s16_3: ++** st3h {z0\.h - z2\.h}, p0, \[x0, #3, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_s16_3, svint16x3_t, int16_t, ++ svst3_s16 (p0, x0 + svcnth () * 3, z0), ++ svst3 (p0, x0 + svcnth () * 3, z0)) ++ ++/* ++** st3_s16_21: ++** st3h {z0\.h - z2\.h}, p0, \[x0, #21, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_s16_21, svint16x3_t, int16_t, ++ svst3_s16 (p0, x0 + svcnth () * 21, z0), ++ svst3 (p0, x0 + svcnth () * 21, z0)) ++ ++/* ++** st3_s16_24: ++** addvl (x[0-9]+), x0, #24 ++** st3h {z0\.h - z2\.h}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st3_s16_24, svint16x3_t, int16_t, ++ svst3_s16 (p0, x0 + svcnth () * 24, z0), ++ svst3 (p0, x0 + svcnth () * 24, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_s16_m1: ++** decb x0 ++** st3h {z0\.h - z2\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_s16_m1, svint16x3_t, int16_t, ++ svst3_s16 (p0, x0 - svcnth (), z0), ++ svst3 (p0, x0 - svcnth (), z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_s16_m2: ++** decb x0, all, mul #2 ++** st3h {z0\.h - z2\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_s16_m2, svint16x3_t, int16_t, ++ svst3_s16 (p0, x0 - svcnth () * 2, z0), ++ svst3 (p0, x0 - svcnth () * 2, z0)) ++ ++/* ++** st3_s16_m3: ++** st3h {z0\.h - z2\.h}, p0, \[x0, #-3, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_s16_m3, svint16x3_t, int16_t, ++ svst3_s16 (p0, x0 - svcnth () * 3, z0), ++ svst3 (p0, x0 - svcnth () * 3, z0)) ++ ++/* ++** st3_s16_m24: ++** st3h {z0\.h - z2\.h}, p0, \[x0, #-24, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_s16_m24, svint16x3_t, int16_t, ++ svst3_s16 (p0, x0 - svcnth () * 24, z0), ++ svst3 (p0, x0 - svcnth () * 24, z0)) ++ ++/* ++** st3_s16_m27: ++** addvl (x[0-9]+), x0, #-27 ++** st3h {z0\.h - z2\.h}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st3_s16_m27, svint16x3_t, int16_t, ++ svst3_s16 (p0, x0 - svcnth () * 27, z0), ++ svst3 (p0, x0 - svcnth () * 27, z0)) ++ ++/* ++** st3_vnum_s16_0: ++** st3h {z0\.h - z2\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_s16_0, svint16x3_t, int16_t, ++ svst3_vnum_s16 (p0, x0, 0, z0), ++ svst3_vnum (p0, x0, 0, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_vnum_s16_1: ++** incb x0 ++** st3h {z0\.h - z2\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_s16_1, svint16x3_t, int16_t, ++ svst3_vnum_s16 (p0, x0, 1, z0), ++ svst3_vnum (p0, x0, 1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_vnum_s16_2: ++** incb x0, all, mul #2 ++** st3h {z0\.h - z2\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_s16_2, svint16x3_t, int16_t, ++ svst3_vnum_s16 (p0, x0, 2, z0), ++ svst3_vnum (p0, x0, 2, z0)) ++ ++/* ++** st3_vnum_s16_3: ++** st3h {z0\.h - z2\.h}, p0, \[x0, #3, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_vnum_s16_3, svint16x3_t, int16_t, ++ svst3_vnum_s16 (p0, x0, 3, z0), ++ svst3_vnum (p0, x0, 3, z0)) ++ ++/* ++** st3_vnum_s16_21: ++** st3h {z0\.h - z2\.h}, p0, \[x0, #21, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_vnum_s16_21, svint16x3_t, int16_t, ++ svst3_vnum_s16 (p0, x0, 21, z0), ++ svst3_vnum (p0, x0, 21, z0)) ++ ++/* ++** st3_vnum_s16_24: ++** addvl (x[0-9]+), x0, #24 ++** st3h {z0\.h - z2\.h}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st3_vnum_s16_24, svint16x3_t, int16_t, ++ svst3_vnum_s16 (p0, x0, 24, z0), ++ svst3_vnum (p0, x0, 24, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_vnum_s16_m1: ++** decb x0 ++** st3h {z0\.h - z2\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_s16_m1, svint16x3_t, int16_t, ++ svst3_vnum_s16 (p0, x0, -1, z0), ++ svst3_vnum (p0, x0, -1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_vnum_s16_m2: ++** decb x0, all, mul #2 ++** st3h {z0\.h - z2\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_s16_m2, svint16x3_t, int16_t, ++ svst3_vnum_s16 (p0, x0, -2, z0), ++ svst3_vnum (p0, x0, -2, z0)) ++ ++/* ++** st3_vnum_s16_m3: ++** st3h {z0\.h - z2\.h}, p0, \[x0, #-3, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_vnum_s16_m3, svint16x3_t, int16_t, ++ svst3_vnum_s16 (p0, x0, -3, z0), ++ svst3_vnum (p0, x0, -3, z0)) ++ ++/* ++** st3_vnum_s16_m24: ++** st3h {z0\.h - z2\.h}, p0, \[x0, #-24, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_vnum_s16_m24, svint16x3_t, int16_t, ++ svst3_vnum_s16 (p0, x0, -24, z0), ++ svst3_vnum (p0, x0, -24, z0)) ++ ++/* ++** st3_vnum_s16_m27: ++** addvl (x[0-9]+), x0, #-27 ++** st3h {z0\.h - z2\.h}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st3_vnum_s16_m27, svint16x3_t, int16_t, ++ svst3_vnum_s16 (p0, x0, -27, z0), ++ svst3_vnum (p0, x0, -27, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** st3_vnum_s16_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** st3h {z0\.h - z2\.h}, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (st3_vnum_s16_x1, svint16x3_t, int16_t, ++ svst3_vnum_s16 (p0, x0, x1, z0), ++ svst3_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_s32.c +new file mode 100644 +index 000000000..2442d9b28 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_s32.c +@@ -0,0 +1,242 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st3_s32_base: ++** st3w {z0\.s - z2\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_s32_base, svint32x3_t, int32_t, ++ svst3_s32 (p0, x0, z0), ++ svst3 (p0, x0, z0)) ++ ++/* ++** st3_s32_index: ++** st3w {z0\.s - z2\.s}, p0, \[x0, x1, lsl 2\] ++** ret ++*/ ++TEST_STORE (st3_s32_index, svint32x3_t, int32_t, ++ svst3_s32 (p0, x0 + x1, z0), ++ svst3 (p0, x0 + x1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_s32_1: ++** incb x0 ++** st3w {z0\.s - z2\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_s32_1, svint32x3_t, int32_t, ++ svst3_s32 (p0, x0 + svcntw (), z0), ++ svst3 (p0, x0 + svcntw (), z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_s32_2: ++** incb x0, all, mul #2 ++** st3w {z0\.s - z2\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_s32_2, svint32x3_t, int32_t, ++ svst3_s32 (p0, x0 + svcntw () * 2, z0), ++ svst3 (p0, x0 + svcntw () * 2, z0)) ++ ++/* ++** st3_s32_3: ++** st3w {z0\.s - z2\.s}, p0, \[x0, #3, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_s32_3, svint32x3_t, int32_t, ++ svst3_s32 (p0, x0 + svcntw () * 3, z0), ++ svst3 (p0, x0 + svcntw () * 3, z0)) ++ ++/* ++** st3_s32_21: ++** st3w {z0\.s - z2\.s}, p0, \[x0, #21, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_s32_21, svint32x3_t, int32_t, ++ svst3_s32 (p0, x0 + svcntw () * 21, z0), ++ svst3 (p0, x0 + svcntw () * 21, z0)) ++ ++/* ++** st3_s32_24: ++** addvl (x[0-9]+), x0, #24 ++** st3w {z0\.s - z2\.s}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st3_s32_24, svint32x3_t, int32_t, ++ svst3_s32 (p0, x0 + svcntw () * 24, z0), ++ svst3 (p0, x0 + svcntw () * 24, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_s32_m1: ++** decb x0 ++** st3w {z0\.s - z2\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_s32_m1, svint32x3_t, int32_t, ++ svst3_s32 (p0, x0 - svcntw (), z0), ++ svst3 (p0, x0 - svcntw (), z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_s32_m2: ++** decb x0, all, mul #2 ++** st3w {z0\.s - z2\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_s32_m2, svint32x3_t, int32_t, ++ svst3_s32 (p0, x0 - svcntw () * 2, z0), ++ svst3 (p0, x0 - svcntw () * 2, z0)) ++ ++/* ++** st3_s32_m3: ++** st3w {z0\.s - z2\.s}, p0, \[x0, #-3, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_s32_m3, svint32x3_t, int32_t, ++ svst3_s32 (p0, x0 - svcntw () * 3, z0), ++ svst3 (p0, x0 - svcntw () * 3, z0)) ++ ++/* ++** st3_s32_m24: ++** st3w {z0\.s - z2\.s}, p0, \[x0, #-24, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_s32_m24, svint32x3_t, int32_t, ++ svst3_s32 (p0, x0 - svcntw () * 24, z0), ++ svst3 (p0, x0 - svcntw () * 24, z0)) ++ ++/* ++** st3_s32_m27: ++** addvl (x[0-9]+), x0, #-27 ++** st3w {z0\.s - z2\.s}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st3_s32_m27, svint32x3_t, int32_t, ++ svst3_s32 (p0, x0 - svcntw () * 27, z0), ++ svst3 (p0, x0 - svcntw () * 27, z0)) ++ ++/* ++** st3_vnum_s32_0: ++** st3w {z0\.s - z2\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_s32_0, svint32x3_t, int32_t, ++ svst3_vnum_s32 (p0, x0, 0, z0), ++ svst3_vnum (p0, x0, 0, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_vnum_s32_1: ++** incb x0 ++** st3w {z0\.s - z2\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_s32_1, svint32x3_t, int32_t, ++ svst3_vnum_s32 (p0, x0, 1, z0), ++ svst3_vnum (p0, x0, 1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_vnum_s32_2: ++** incb x0, all, mul #2 ++** st3w {z0\.s - z2\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_s32_2, svint32x3_t, int32_t, ++ svst3_vnum_s32 (p0, x0, 2, z0), ++ svst3_vnum (p0, x0, 2, z0)) ++ ++/* ++** st3_vnum_s32_3: ++** st3w {z0\.s - z2\.s}, p0, \[x0, #3, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_vnum_s32_3, svint32x3_t, int32_t, ++ svst3_vnum_s32 (p0, x0, 3, z0), ++ svst3_vnum (p0, x0, 3, z0)) ++ ++/* ++** st3_vnum_s32_21: ++** st3w {z0\.s - z2\.s}, p0, \[x0, #21, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_vnum_s32_21, svint32x3_t, int32_t, ++ svst3_vnum_s32 (p0, x0, 21, z0), ++ svst3_vnum (p0, x0, 21, z0)) ++ ++/* ++** st3_vnum_s32_24: ++** addvl (x[0-9]+), x0, #24 ++** st3w {z0\.s - z2\.s}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st3_vnum_s32_24, svint32x3_t, int32_t, ++ svst3_vnum_s32 (p0, x0, 24, z0), ++ svst3_vnum (p0, x0, 24, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_vnum_s32_m1: ++** decb x0 ++** st3w {z0\.s - z2\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_s32_m1, svint32x3_t, int32_t, ++ svst3_vnum_s32 (p0, x0, -1, z0), ++ svst3_vnum (p0, x0, -1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_vnum_s32_m2: ++** decb x0, all, mul #2 ++** st3w {z0\.s - z2\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_s32_m2, svint32x3_t, int32_t, ++ svst3_vnum_s32 (p0, x0, -2, z0), ++ svst3_vnum (p0, x0, -2, z0)) ++ ++/* ++** st3_vnum_s32_m3: ++** st3w {z0\.s - z2\.s}, p0, \[x0, #-3, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_vnum_s32_m3, svint32x3_t, int32_t, ++ svst3_vnum_s32 (p0, x0, -3, z0), ++ svst3_vnum (p0, x0, -3, z0)) ++ ++/* ++** st3_vnum_s32_m24: ++** st3w {z0\.s - z2\.s}, p0, \[x0, #-24, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_vnum_s32_m24, svint32x3_t, int32_t, ++ svst3_vnum_s32 (p0, x0, -24, z0), ++ svst3_vnum (p0, x0, -24, z0)) ++ ++/* ++** st3_vnum_s32_m27: ++** addvl (x[0-9]+), x0, #-27 ++** st3w {z0\.s - z2\.s}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st3_vnum_s32_m27, svint32x3_t, int32_t, ++ svst3_vnum_s32 (p0, x0, -27, z0), ++ svst3_vnum (p0, x0, -27, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** st3_vnum_s32_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** st3w {z0\.s - z2\.s}, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (st3_vnum_s32_x1, svint32x3_t, int32_t, ++ svst3_vnum_s32 (p0, x0, x1, z0), ++ svst3_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_s64.c +new file mode 100644 +index 000000000..eca6a7cea +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_s64.c +@@ -0,0 +1,242 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st3_s64_base: ++** st3d {z0\.d - z2\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_s64_base, svint64x3_t, int64_t, ++ svst3_s64 (p0, x0, z0), ++ svst3 (p0, x0, z0)) ++ ++/* ++** st3_s64_index: ++** st3d {z0\.d - z2\.d}, p0, \[x0, x1, lsl 3\] ++** ret ++*/ ++TEST_STORE (st3_s64_index, svint64x3_t, int64_t, ++ svst3_s64 (p0, x0 + x1, z0), ++ svst3 (p0, x0 + x1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_s64_1: ++** incb x0 ++** st3d {z0\.d - z2\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_s64_1, svint64x3_t, int64_t, ++ svst3_s64 (p0, x0 + svcntd (), z0), ++ svst3 (p0, x0 + svcntd (), z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_s64_2: ++** incb x0, all, mul #2 ++** st3d {z0\.d - z2\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_s64_2, svint64x3_t, int64_t, ++ svst3_s64 (p0, x0 + svcntd () * 2, z0), ++ svst3 (p0, x0 + svcntd () * 2, z0)) ++ ++/* ++** st3_s64_3: ++** st3d {z0\.d - z2\.d}, p0, \[x0, #3, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_s64_3, svint64x3_t, int64_t, ++ svst3_s64 (p0, x0 + svcntd () * 3, z0), ++ svst3 (p0, x0 + svcntd () * 3, z0)) ++ ++/* ++** st3_s64_21: ++** st3d {z0\.d - z2\.d}, p0, \[x0, #21, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_s64_21, svint64x3_t, int64_t, ++ svst3_s64 (p0, x0 + svcntd () * 21, z0), ++ svst3 (p0, x0 + svcntd () * 21, z0)) ++ ++/* ++** st3_s64_24: ++** addvl (x[0-9]+), x0, #24 ++** st3d {z0\.d - z2\.d}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st3_s64_24, svint64x3_t, int64_t, ++ svst3_s64 (p0, x0 + svcntd () * 24, z0), ++ svst3 (p0, x0 + svcntd () * 24, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_s64_m1: ++** decb x0 ++** st3d {z0\.d - z2\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_s64_m1, svint64x3_t, int64_t, ++ svst3_s64 (p0, x0 - svcntd (), z0), ++ svst3 (p0, x0 - svcntd (), z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_s64_m2: ++** decb x0, all, mul #2 ++** st3d {z0\.d - z2\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_s64_m2, svint64x3_t, int64_t, ++ svst3_s64 (p0, x0 - svcntd () * 2, z0), ++ svst3 (p0, x0 - svcntd () * 2, z0)) ++ ++/* ++** st3_s64_m3: ++** st3d {z0\.d - z2\.d}, p0, \[x0, #-3, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_s64_m3, svint64x3_t, int64_t, ++ svst3_s64 (p0, x0 - svcntd () * 3, z0), ++ svst3 (p0, x0 - svcntd () * 3, z0)) ++ ++/* ++** st3_s64_m24: ++** st3d {z0\.d - z2\.d}, p0, \[x0, #-24, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_s64_m24, svint64x3_t, int64_t, ++ svst3_s64 (p0, x0 - svcntd () * 24, z0), ++ svst3 (p0, x0 - svcntd () * 24, z0)) ++ ++/* ++** st3_s64_m27: ++** addvl (x[0-9]+), x0, #-27 ++** st3d {z0\.d - z2\.d}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st3_s64_m27, svint64x3_t, int64_t, ++ svst3_s64 (p0, x0 - svcntd () * 27, z0), ++ svst3 (p0, x0 - svcntd () * 27, z0)) ++ ++/* ++** st3_vnum_s64_0: ++** st3d {z0\.d - z2\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_s64_0, svint64x3_t, int64_t, ++ svst3_vnum_s64 (p0, x0, 0, z0), ++ svst3_vnum (p0, x0, 0, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_vnum_s64_1: ++** incb x0 ++** st3d {z0\.d - z2\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_s64_1, svint64x3_t, int64_t, ++ svst3_vnum_s64 (p0, x0, 1, z0), ++ svst3_vnum (p0, x0, 1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_vnum_s64_2: ++** incb x0, all, mul #2 ++** st3d {z0\.d - z2\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_s64_2, svint64x3_t, int64_t, ++ svst3_vnum_s64 (p0, x0, 2, z0), ++ svst3_vnum (p0, x0, 2, z0)) ++ ++/* ++** st3_vnum_s64_3: ++** st3d {z0\.d - z2\.d}, p0, \[x0, #3, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_vnum_s64_3, svint64x3_t, int64_t, ++ svst3_vnum_s64 (p0, x0, 3, z0), ++ svst3_vnum (p0, x0, 3, z0)) ++ ++/* ++** st3_vnum_s64_21: ++** st3d {z0\.d - z2\.d}, p0, \[x0, #21, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_vnum_s64_21, svint64x3_t, int64_t, ++ svst3_vnum_s64 (p0, x0, 21, z0), ++ svst3_vnum (p0, x0, 21, z0)) ++ ++/* ++** st3_vnum_s64_24: ++** addvl (x[0-9]+), x0, #24 ++** st3d {z0\.d - z2\.d}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st3_vnum_s64_24, svint64x3_t, int64_t, ++ svst3_vnum_s64 (p0, x0, 24, z0), ++ svst3_vnum (p0, x0, 24, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_vnum_s64_m1: ++** decb x0 ++** st3d {z0\.d - z2\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_s64_m1, svint64x3_t, int64_t, ++ svst3_vnum_s64 (p0, x0, -1, z0), ++ svst3_vnum (p0, x0, -1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_vnum_s64_m2: ++** decb x0, all, mul #2 ++** st3d {z0\.d - z2\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_s64_m2, svint64x3_t, int64_t, ++ svst3_vnum_s64 (p0, x0, -2, z0), ++ svst3_vnum (p0, x0, -2, z0)) ++ ++/* ++** st3_vnum_s64_m3: ++** st3d {z0\.d - z2\.d}, p0, \[x0, #-3, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_vnum_s64_m3, svint64x3_t, int64_t, ++ svst3_vnum_s64 (p0, x0, -3, z0), ++ svst3_vnum (p0, x0, -3, z0)) ++ ++/* ++** st3_vnum_s64_m24: ++** st3d {z0\.d - z2\.d}, p0, \[x0, #-24, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_vnum_s64_m24, svint64x3_t, int64_t, ++ svst3_vnum_s64 (p0, x0, -24, z0), ++ svst3_vnum (p0, x0, -24, z0)) ++ ++/* ++** st3_vnum_s64_m27: ++** addvl (x[0-9]+), x0, #-27 ++** st3d {z0\.d - z2\.d}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st3_vnum_s64_m27, svint64x3_t, int64_t, ++ svst3_vnum_s64 (p0, x0, -27, z0), ++ svst3_vnum (p0, x0, -27, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** st3_vnum_s64_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** st3d {z0\.d - z2\.d}, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (st3_vnum_s64_x1, svint64x3_t, int64_t, ++ svst3_vnum_s64 (p0, x0, x1, z0), ++ svst3_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_s8.c +new file mode 100644 +index 000000000..a54ff4b74 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_s8.c +@@ -0,0 +1,246 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st3_s8_base: ++** st3b {z0\.b - z2\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_s8_base, svint8x3_t, int8_t, ++ svst3_s8 (p0, x0, z0), ++ svst3 (p0, x0, z0)) ++ ++/* ++** st3_s8_index: ++** st3b {z0\.b - z2\.b}, p0, \[x0, x1\] ++** ret ++*/ ++TEST_STORE (st3_s8_index, svint8x3_t, int8_t, ++ svst3_s8 (p0, x0 + x1, z0), ++ svst3 (p0, x0 + x1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_s8_1: ++** incb x0 ++** st3b {z0\.b - z2\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_s8_1, svint8x3_t, int8_t, ++ svst3_s8 (p0, x0 + svcntb (), z0), ++ svst3 (p0, x0 + svcntb (), z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_s8_2: ++** incb x0, all, mul #2 ++** st3b {z0\.b - z2\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_s8_2, svint8x3_t, int8_t, ++ svst3_s8 (p0, x0 + svcntb () * 2, z0), ++ svst3 (p0, x0 + svcntb () * 2, z0)) ++ ++/* ++** st3_s8_3: ++** st3b {z0\.b - z2\.b}, p0, \[x0, #3, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_s8_3, svint8x3_t, int8_t, ++ svst3_s8 (p0, x0 + svcntb () * 3, z0), ++ svst3 (p0, x0 + svcntb () * 3, z0)) ++ ++/* ++** st3_s8_21: ++** st3b {z0\.b - z2\.b}, p0, \[x0, #21, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_s8_21, svint8x3_t, int8_t, ++ svst3_s8 (p0, x0 + svcntb () * 21, z0), ++ svst3 (p0, x0 + svcntb () * 21, z0)) ++ ++/* ++** st3_s8_24: ++** addvl (x[0-9]+), x0, #24 ++** st3b {z0\.b - z2\.b}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st3_s8_24, svint8x3_t, int8_t, ++ svst3_s8 (p0, x0 + svcntb () * 24, z0), ++ svst3 (p0, x0 + svcntb () * 24, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_s8_m1: ++** decb x0 ++** st3b {z0\.b - z2\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_s8_m1, svint8x3_t, int8_t, ++ svst3_s8 (p0, x0 - svcntb (), z0), ++ svst3 (p0, x0 - svcntb (), z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_s8_m2: ++** decb x0, all, mul #2 ++** st3b {z0\.b - z2\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_s8_m2, svint8x3_t, int8_t, ++ svst3_s8 (p0, x0 - svcntb () * 2, z0), ++ svst3 (p0, x0 - svcntb () * 2, z0)) ++ ++/* ++** st3_s8_m3: ++** st3b {z0\.b - z2\.b}, p0, \[x0, #-3, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_s8_m3, svint8x3_t, int8_t, ++ svst3_s8 (p0, x0 - svcntb () * 3, z0), ++ svst3 (p0, x0 - svcntb () * 3, z0)) ++ ++/* ++** st3_s8_m24: ++** st3b {z0\.b - z2\.b}, p0, \[x0, #-24, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_s8_m24, svint8x3_t, int8_t, ++ svst3_s8 (p0, x0 - svcntb () * 24, z0), ++ svst3 (p0, x0 - svcntb () * 24, z0)) ++ ++/* ++** st3_s8_m27: ++** addvl (x[0-9]+), x0, #-27 ++** st3b {z0\.b - z2\.b}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st3_s8_m27, svint8x3_t, int8_t, ++ svst3_s8 (p0, x0 - svcntb () * 27, z0), ++ svst3 (p0, x0 - svcntb () * 27, z0)) ++ ++/* ++** st3_vnum_s8_0: ++** st3b {z0\.b - z2\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_s8_0, svint8x3_t, int8_t, ++ svst3_vnum_s8 (p0, x0, 0, z0), ++ svst3_vnum (p0, x0, 0, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_vnum_s8_1: ++** incb x0 ++** st3b {z0\.b - z2\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_s8_1, svint8x3_t, int8_t, ++ svst3_vnum_s8 (p0, x0, 1, z0), ++ svst3_vnum (p0, x0, 1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_vnum_s8_2: ++** incb x0, all, mul #2 ++** st3b {z0\.b - z2\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_s8_2, svint8x3_t, int8_t, ++ svst3_vnum_s8 (p0, x0, 2, z0), ++ svst3_vnum (p0, x0, 2, z0)) ++ ++/* ++** st3_vnum_s8_3: ++** st3b {z0\.b - z2\.b}, p0, \[x0, #3, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_vnum_s8_3, svint8x3_t, int8_t, ++ svst3_vnum_s8 (p0, x0, 3, z0), ++ svst3_vnum (p0, x0, 3, z0)) ++ ++/* ++** st3_vnum_s8_21: ++** st3b {z0\.b - z2\.b}, p0, \[x0, #21, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_vnum_s8_21, svint8x3_t, int8_t, ++ svst3_vnum_s8 (p0, x0, 21, z0), ++ svst3_vnum (p0, x0, 21, z0)) ++ ++/* ++** st3_vnum_s8_24: ++** addvl (x[0-9]+), x0, #24 ++** st3b {z0\.b - z2\.b}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st3_vnum_s8_24, svint8x3_t, int8_t, ++ svst3_vnum_s8 (p0, x0, 24, z0), ++ svst3_vnum (p0, x0, 24, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_vnum_s8_m1: ++** decb x0 ++** st3b {z0\.b - z2\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_s8_m1, svint8x3_t, int8_t, ++ svst3_vnum_s8 (p0, x0, -1, z0), ++ svst3_vnum (p0, x0, -1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_vnum_s8_m2: ++** decb x0, all, mul #2 ++** st3b {z0\.b - z2\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_s8_m2, svint8x3_t, int8_t, ++ svst3_vnum_s8 (p0, x0, -2, z0), ++ svst3_vnum (p0, x0, -2, z0)) ++ ++/* ++** st3_vnum_s8_m3: ++** st3b {z0\.b - z2\.b}, p0, \[x0, #-3, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_vnum_s8_m3, svint8x3_t, int8_t, ++ svst3_vnum_s8 (p0, x0, -3, z0), ++ svst3_vnum (p0, x0, -3, z0)) ++ ++/* ++** st3_vnum_s8_m24: ++** st3b {z0\.b - z2\.b}, p0, \[x0, #-24, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_vnum_s8_m24, svint8x3_t, int8_t, ++ svst3_vnum_s8 (p0, x0, -24, z0), ++ svst3_vnum (p0, x0, -24, z0)) ++ ++/* ++** st3_vnum_s8_m27: ++** addvl (x[0-9]+), x0, #-27 ++** st3b {z0\.b - z2\.b}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st3_vnum_s8_m27, svint8x3_t, int8_t, ++ svst3_vnum_s8 (p0, x0, -27, z0), ++ svst3_vnum (p0, x0, -27, z0)) ++ ++/* ++** st3_vnum_s8_x1: ++** cntb (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** st3b {z0\.b - z2\.b}, p0, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** st3b {z0\.b - z2\.b}, p0, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_STORE (st3_vnum_s8_x1, svint8x3_t, int8_t, ++ svst3_vnum_s8 (p0, x0, x1, z0), ++ svst3_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_u16.c +new file mode 100644 +index 000000000..d4e8efca3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_u16.c +@@ -0,0 +1,242 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st3_u16_base: ++** st3h {z0\.h - z2\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_u16_base, svuint16x3_t, uint16_t, ++ svst3_u16 (p0, x0, z0), ++ svst3 (p0, x0, z0)) ++ ++/* ++** st3_u16_index: ++** st3h {z0\.h - z2\.h}, p0, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_STORE (st3_u16_index, svuint16x3_t, uint16_t, ++ svst3_u16 (p0, x0 + x1, z0), ++ svst3 (p0, x0 + x1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_u16_1: ++** incb x0 ++** st3h {z0\.h - z2\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_u16_1, svuint16x3_t, uint16_t, ++ svst3_u16 (p0, x0 + svcnth (), z0), ++ svst3 (p0, x0 + svcnth (), z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_u16_2: ++** incb x0, all, mul #2 ++** st3h {z0\.h - z2\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_u16_2, svuint16x3_t, uint16_t, ++ svst3_u16 (p0, x0 + svcnth () * 2, z0), ++ svst3 (p0, x0 + svcnth () * 2, z0)) ++ ++/* ++** st3_u16_3: ++** st3h {z0\.h - z2\.h}, p0, \[x0, #3, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_u16_3, svuint16x3_t, uint16_t, ++ svst3_u16 (p0, x0 + svcnth () * 3, z0), ++ svst3 (p0, x0 + svcnth () * 3, z0)) ++ ++/* ++** st3_u16_21: ++** st3h {z0\.h - z2\.h}, p0, \[x0, #21, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_u16_21, svuint16x3_t, uint16_t, ++ svst3_u16 (p0, x0 + svcnth () * 21, z0), ++ svst3 (p0, x0 + svcnth () * 21, z0)) ++ ++/* ++** st3_u16_24: ++** addvl (x[0-9]+), x0, #24 ++** st3h {z0\.h - z2\.h}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st3_u16_24, svuint16x3_t, uint16_t, ++ svst3_u16 (p0, x0 + svcnth () * 24, z0), ++ svst3 (p0, x0 + svcnth () * 24, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_u16_m1: ++** decb x0 ++** st3h {z0\.h - z2\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_u16_m1, svuint16x3_t, uint16_t, ++ svst3_u16 (p0, x0 - svcnth (), z0), ++ svst3 (p0, x0 - svcnth (), z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_u16_m2: ++** decb x0, all, mul #2 ++** st3h {z0\.h - z2\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_u16_m2, svuint16x3_t, uint16_t, ++ svst3_u16 (p0, x0 - svcnth () * 2, z0), ++ svst3 (p0, x0 - svcnth () * 2, z0)) ++ ++/* ++** st3_u16_m3: ++** st3h {z0\.h - z2\.h}, p0, \[x0, #-3, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_u16_m3, svuint16x3_t, uint16_t, ++ svst3_u16 (p0, x0 - svcnth () * 3, z0), ++ svst3 (p0, x0 - svcnth () * 3, z0)) ++ ++/* ++** st3_u16_m24: ++** st3h {z0\.h - z2\.h}, p0, \[x0, #-24, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_u16_m24, svuint16x3_t, uint16_t, ++ svst3_u16 (p0, x0 - svcnth () * 24, z0), ++ svst3 (p0, x0 - svcnth () * 24, z0)) ++ ++/* ++** st3_u16_m27: ++** addvl (x[0-9]+), x0, #-27 ++** st3h {z0\.h - z2\.h}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st3_u16_m27, svuint16x3_t, uint16_t, ++ svst3_u16 (p0, x0 - svcnth () * 27, z0), ++ svst3 (p0, x0 - svcnth () * 27, z0)) ++ ++/* ++** st3_vnum_u16_0: ++** st3h {z0\.h - z2\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_u16_0, svuint16x3_t, uint16_t, ++ svst3_vnum_u16 (p0, x0, 0, z0), ++ svst3_vnum (p0, x0, 0, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_vnum_u16_1: ++** incb x0 ++** st3h {z0\.h - z2\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_u16_1, svuint16x3_t, uint16_t, ++ svst3_vnum_u16 (p0, x0, 1, z0), ++ svst3_vnum (p0, x0, 1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_vnum_u16_2: ++** incb x0, all, mul #2 ++** st3h {z0\.h - z2\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_u16_2, svuint16x3_t, uint16_t, ++ svst3_vnum_u16 (p0, x0, 2, z0), ++ svst3_vnum (p0, x0, 2, z0)) ++ ++/* ++** st3_vnum_u16_3: ++** st3h {z0\.h - z2\.h}, p0, \[x0, #3, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_vnum_u16_3, svuint16x3_t, uint16_t, ++ svst3_vnum_u16 (p0, x0, 3, z0), ++ svst3_vnum (p0, x0, 3, z0)) ++ ++/* ++** st3_vnum_u16_21: ++** st3h {z0\.h - z2\.h}, p0, \[x0, #21, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_vnum_u16_21, svuint16x3_t, uint16_t, ++ svst3_vnum_u16 (p0, x0, 21, z0), ++ svst3_vnum (p0, x0, 21, z0)) ++ ++/* ++** st3_vnum_u16_24: ++** addvl (x[0-9]+), x0, #24 ++** st3h {z0\.h - z2\.h}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st3_vnum_u16_24, svuint16x3_t, uint16_t, ++ svst3_vnum_u16 (p0, x0, 24, z0), ++ svst3_vnum (p0, x0, 24, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_vnum_u16_m1: ++** decb x0 ++** st3h {z0\.h - z2\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_u16_m1, svuint16x3_t, uint16_t, ++ svst3_vnum_u16 (p0, x0, -1, z0), ++ svst3_vnum (p0, x0, -1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_vnum_u16_m2: ++** decb x0, all, mul #2 ++** st3h {z0\.h - z2\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_u16_m2, svuint16x3_t, uint16_t, ++ svst3_vnum_u16 (p0, x0, -2, z0), ++ svst3_vnum (p0, x0, -2, z0)) ++ ++/* ++** st3_vnum_u16_m3: ++** st3h {z0\.h - z2\.h}, p0, \[x0, #-3, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_vnum_u16_m3, svuint16x3_t, uint16_t, ++ svst3_vnum_u16 (p0, x0, -3, z0), ++ svst3_vnum (p0, x0, -3, z0)) ++ ++/* ++** st3_vnum_u16_m24: ++** st3h {z0\.h - z2\.h}, p0, \[x0, #-24, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_vnum_u16_m24, svuint16x3_t, uint16_t, ++ svst3_vnum_u16 (p0, x0, -24, z0), ++ svst3_vnum (p0, x0, -24, z0)) ++ ++/* ++** st3_vnum_u16_m27: ++** addvl (x[0-9]+), x0, #-27 ++** st3h {z0\.h - z2\.h}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st3_vnum_u16_m27, svuint16x3_t, uint16_t, ++ svst3_vnum_u16 (p0, x0, -27, z0), ++ svst3_vnum (p0, x0, -27, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** st3_vnum_u16_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** st3h {z0\.h - z2\.h}, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (st3_vnum_u16_x1, svuint16x3_t, uint16_t, ++ svst3_vnum_u16 (p0, x0, x1, z0), ++ svst3_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_u32.c +new file mode 100644 +index 000000000..8be3aa957 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_u32.c +@@ -0,0 +1,242 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st3_u32_base: ++** st3w {z0\.s - z2\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_u32_base, svuint32x3_t, uint32_t, ++ svst3_u32 (p0, x0, z0), ++ svst3 (p0, x0, z0)) ++ ++/* ++** st3_u32_index: ++** st3w {z0\.s - z2\.s}, p0, \[x0, x1, lsl 2\] ++** ret ++*/ ++TEST_STORE (st3_u32_index, svuint32x3_t, uint32_t, ++ svst3_u32 (p0, x0 + x1, z0), ++ svst3 (p0, x0 + x1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_u32_1: ++** incb x0 ++** st3w {z0\.s - z2\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_u32_1, svuint32x3_t, uint32_t, ++ svst3_u32 (p0, x0 + svcntw (), z0), ++ svst3 (p0, x0 + svcntw (), z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_u32_2: ++** incb x0, all, mul #2 ++** st3w {z0\.s - z2\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_u32_2, svuint32x3_t, uint32_t, ++ svst3_u32 (p0, x0 + svcntw () * 2, z0), ++ svst3 (p0, x0 + svcntw () * 2, z0)) ++ ++/* ++** st3_u32_3: ++** st3w {z0\.s - z2\.s}, p0, \[x0, #3, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_u32_3, svuint32x3_t, uint32_t, ++ svst3_u32 (p0, x0 + svcntw () * 3, z0), ++ svst3 (p0, x0 + svcntw () * 3, z0)) ++ ++/* ++** st3_u32_21: ++** st3w {z0\.s - z2\.s}, p0, \[x0, #21, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_u32_21, svuint32x3_t, uint32_t, ++ svst3_u32 (p0, x0 + svcntw () * 21, z0), ++ svst3 (p0, x0 + svcntw () * 21, z0)) ++ ++/* ++** st3_u32_24: ++** addvl (x[0-9]+), x0, #24 ++** st3w {z0\.s - z2\.s}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st3_u32_24, svuint32x3_t, uint32_t, ++ svst3_u32 (p0, x0 + svcntw () * 24, z0), ++ svst3 (p0, x0 + svcntw () * 24, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_u32_m1: ++** decb x0 ++** st3w {z0\.s - z2\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_u32_m1, svuint32x3_t, uint32_t, ++ svst3_u32 (p0, x0 - svcntw (), z0), ++ svst3 (p0, x0 - svcntw (), z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_u32_m2: ++** decb x0, all, mul #2 ++** st3w {z0\.s - z2\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_u32_m2, svuint32x3_t, uint32_t, ++ svst3_u32 (p0, x0 - svcntw () * 2, z0), ++ svst3 (p0, x0 - svcntw () * 2, z0)) ++ ++/* ++** st3_u32_m3: ++** st3w {z0\.s - z2\.s}, p0, \[x0, #-3, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_u32_m3, svuint32x3_t, uint32_t, ++ svst3_u32 (p0, x0 - svcntw () * 3, z0), ++ svst3 (p0, x0 - svcntw () * 3, z0)) ++ ++/* ++** st3_u32_m24: ++** st3w {z0\.s - z2\.s}, p0, \[x0, #-24, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_u32_m24, svuint32x3_t, uint32_t, ++ svst3_u32 (p0, x0 - svcntw () * 24, z0), ++ svst3 (p0, x0 - svcntw () * 24, z0)) ++ ++/* ++** st3_u32_m27: ++** addvl (x[0-9]+), x0, #-27 ++** st3w {z0\.s - z2\.s}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st3_u32_m27, svuint32x3_t, uint32_t, ++ svst3_u32 (p0, x0 - svcntw () * 27, z0), ++ svst3 (p0, x0 - svcntw () * 27, z0)) ++ ++/* ++** st3_vnum_u32_0: ++** st3w {z0\.s - z2\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_u32_0, svuint32x3_t, uint32_t, ++ svst3_vnum_u32 (p0, x0, 0, z0), ++ svst3_vnum (p0, x0, 0, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_vnum_u32_1: ++** incb x0 ++** st3w {z0\.s - z2\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_u32_1, svuint32x3_t, uint32_t, ++ svst3_vnum_u32 (p0, x0, 1, z0), ++ svst3_vnum (p0, x0, 1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_vnum_u32_2: ++** incb x0, all, mul #2 ++** st3w {z0\.s - z2\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_u32_2, svuint32x3_t, uint32_t, ++ svst3_vnum_u32 (p0, x0, 2, z0), ++ svst3_vnum (p0, x0, 2, z0)) ++ ++/* ++** st3_vnum_u32_3: ++** st3w {z0\.s - z2\.s}, p0, \[x0, #3, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_vnum_u32_3, svuint32x3_t, uint32_t, ++ svst3_vnum_u32 (p0, x0, 3, z0), ++ svst3_vnum (p0, x0, 3, z0)) ++ ++/* ++** st3_vnum_u32_21: ++** st3w {z0\.s - z2\.s}, p0, \[x0, #21, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_vnum_u32_21, svuint32x3_t, uint32_t, ++ svst3_vnum_u32 (p0, x0, 21, z0), ++ svst3_vnum (p0, x0, 21, z0)) ++ ++/* ++** st3_vnum_u32_24: ++** addvl (x[0-9]+), x0, #24 ++** st3w {z0\.s - z2\.s}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st3_vnum_u32_24, svuint32x3_t, uint32_t, ++ svst3_vnum_u32 (p0, x0, 24, z0), ++ svst3_vnum (p0, x0, 24, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_vnum_u32_m1: ++** decb x0 ++** st3w {z0\.s - z2\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_u32_m1, svuint32x3_t, uint32_t, ++ svst3_vnum_u32 (p0, x0, -1, z0), ++ svst3_vnum (p0, x0, -1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_vnum_u32_m2: ++** decb x0, all, mul #2 ++** st3w {z0\.s - z2\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_u32_m2, svuint32x3_t, uint32_t, ++ svst3_vnum_u32 (p0, x0, -2, z0), ++ svst3_vnum (p0, x0, -2, z0)) ++ ++/* ++** st3_vnum_u32_m3: ++** st3w {z0\.s - z2\.s}, p0, \[x0, #-3, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_vnum_u32_m3, svuint32x3_t, uint32_t, ++ svst3_vnum_u32 (p0, x0, -3, z0), ++ svst3_vnum (p0, x0, -3, z0)) ++ ++/* ++** st3_vnum_u32_m24: ++** st3w {z0\.s - z2\.s}, p0, \[x0, #-24, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_vnum_u32_m24, svuint32x3_t, uint32_t, ++ svst3_vnum_u32 (p0, x0, -24, z0), ++ svst3_vnum (p0, x0, -24, z0)) ++ ++/* ++** st3_vnum_u32_m27: ++** addvl (x[0-9]+), x0, #-27 ++** st3w {z0\.s - z2\.s}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st3_vnum_u32_m27, svuint32x3_t, uint32_t, ++ svst3_vnum_u32 (p0, x0, -27, z0), ++ svst3_vnum (p0, x0, -27, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** st3_vnum_u32_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** st3w {z0\.s - z2\.s}, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (st3_vnum_u32_x1, svuint32x3_t, uint32_t, ++ svst3_vnum_u32 (p0, x0, x1, z0), ++ svst3_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_u64.c +new file mode 100644 +index 000000000..31cb304ea +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_u64.c +@@ -0,0 +1,242 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st3_u64_base: ++** st3d {z0\.d - z2\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_u64_base, svuint64x3_t, uint64_t, ++ svst3_u64 (p0, x0, z0), ++ svst3 (p0, x0, z0)) ++ ++/* ++** st3_u64_index: ++** st3d {z0\.d - z2\.d}, p0, \[x0, x1, lsl 3\] ++** ret ++*/ ++TEST_STORE (st3_u64_index, svuint64x3_t, uint64_t, ++ svst3_u64 (p0, x0 + x1, z0), ++ svst3 (p0, x0 + x1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_u64_1: ++** incb x0 ++** st3d {z0\.d - z2\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_u64_1, svuint64x3_t, uint64_t, ++ svst3_u64 (p0, x0 + svcntd (), z0), ++ svst3 (p0, x0 + svcntd (), z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_u64_2: ++** incb x0, all, mul #2 ++** st3d {z0\.d - z2\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_u64_2, svuint64x3_t, uint64_t, ++ svst3_u64 (p0, x0 + svcntd () * 2, z0), ++ svst3 (p0, x0 + svcntd () * 2, z0)) ++ ++/* ++** st3_u64_3: ++** st3d {z0\.d - z2\.d}, p0, \[x0, #3, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_u64_3, svuint64x3_t, uint64_t, ++ svst3_u64 (p0, x0 + svcntd () * 3, z0), ++ svst3 (p0, x0 + svcntd () * 3, z0)) ++ ++/* ++** st3_u64_21: ++** st3d {z0\.d - z2\.d}, p0, \[x0, #21, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_u64_21, svuint64x3_t, uint64_t, ++ svst3_u64 (p0, x0 + svcntd () * 21, z0), ++ svst3 (p0, x0 + svcntd () * 21, z0)) ++ ++/* ++** st3_u64_24: ++** addvl (x[0-9]+), x0, #24 ++** st3d {z0\.d - z2\.d}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st3_u64_24, svuint64x3_t, uint64_t, ++ svst3_u64 (p0, x0 + svcntd () * 24, z0), ++ svst3 (p0, x0 + svcntd () * 24, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_u64_m1: ++** decb x0 ++** st3d {z0\.d - z2\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_u64_m1, svuint64x3_t, uint64_t, ++ svst3_u64 (p0, x0 - svcntd (), z0), ++ svst3 (p0, x0 - svcntd (), z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_u64_m2: ++** decb x0, all, mul #2 ++** st3d {z0\.d - z2\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_u64_m2, svuint64x3_t, uint64_t, ++ svst3_u64 (p0, x0 - svcntd () * 2, z0), ++ svst3 (p0, x0 - svcntd () * 2, z0)) ++ ++/* ++** st3_u64_m3: ++** st3d {z0\.d - z2\.d}, p0, \[x0, #-3, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_u64_m3, svuint64x3_t, uint64_t, ++ svst3_u64 (p0, x0 - svcntd () * 3, z0), ++ svst3 (p0, x0 - svcntd () * 3, z0)) ++ ++/* ++** st3_u64_m24: ++** st3d {z0\.d - z2\.d}, p0, \[x0, #-24, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_u64_m24, svuint64x3_t, uint64_t, ++ svst3_u64 (p0, x0 - svcntd () * 24, z0), ++ svst3 (p0, x0 - svcntd () * 24, z0)) ++ ++/* ++** st3_u64_m27: ++** addvl (x[0-9]+), x0, #-27 ++** st3d {z0\.d - z2\.d}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st3_u64_m27, svuint64x3_t, uint64_t, ++ svst3_u64 (p0, x0 - svcntd () * 27, z0), ++ svst3 (p0, x0 - svcntd () * 27, z0)) ++ ++/* ++** st3_vnum_u64_0: ++** st3d {z0\.d - z2\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_u64_0, svuint64x3_t, uint64_t, ++ svst3_vnum_u64 (p0, x0, 0, z0), ++ svst3_vnum (p0, x0, 0, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_vnum_u64_1: ++** incb x0 ++** st3d {z0\.d - z2\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_u64_1, svuint64x3_t, uint64_t, ++ svst3_vnum_u64 (p0, x0, 1, z0), ++ svst3_vnum (p0, x0, 1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_vnum_u64_2: ++** incb x0, all, mul #2 ++** st3d {z0\.d - z2\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_u64_2, svuint64x3_t, uint64_t, ++ svst3_vnum_u64 (p0, x0, 2, z0), ++ svst3_vnum (p0, x0, 2, z0)) ++ ++/* ++** st3_vnum_u64_3: ++** st3d {z0\.d - z2\.d}, p0, \[x0, #3, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_vnum_u64_3, svuint64x3_t, uint64_t, ++ svst3_vnum_u64 (p0, x0, 3, z0), ++ svst3_vnum (p0, x0, 3, z0)) ++ ++/* ++** st3_vnum_u64_21: ++** st3d {z0\.d - z2\.d}, p0, \[x0, #21, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_vnum_u64_21, svuint64x3_t, uint64_t, ++ svst3_vnum_u64 (p0, x0, 21, z0), ++ svst3_vnum (p0, x0, 21, z0)) ++ ++/* ++** st3_vnum_u64_24: ++** addvl (x[0-9]+), x0, #24 ++** st3d {z0\.d - z2\.d}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st3_vnum_u64_24, svuint64x3_t, uint64_t, ++ svst3_vnum_u64 (p0, x0, 24, z0), ++ svst3_vnum (p0, x0, 24, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_vnum_u64_m1: ++** decb x0 ++** st3d {z0\.d - z2\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_u64_m1, svuint64x3_t, uint64_t, ++ svst3_vnum_u64 (p0, x0, -1, z0), ++ svst3_vnum (p0, x0, -1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_vnum_u64_m2: ++** decb x0, all, mul #2 ++** st3d {z0\.d - z2\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_u64_m2, svuint64x3_t, uint64_t, ++ svst3_vnum_u64 (p0, x0, -2, z0), ++ svst3_vnum (p0, x0, -2, z0)) ++ ++/* ++** st3_vnum_u64_m3: ++** st3d {z0\.d - z2\.d}, p0, \[x0, #-3, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_vnum_u64_m3, svuint64x3_t, uint64_t, ++ svst3_vnum_u64 (p0, x0, -3, z0), ++ svst3_vnum (p0, x0, -3, z0)) ++ ++/* ++** st3_vnum_u64_m24: ++** st3d {z0\.d - z2\.d}, p0, \[x0, #-24, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_vnum_u64_m24, svuint64x3_t, uint64_t, ++ svst3_vnum_u64 (p0, x0, -24, z0), ++ svst3_vnum (p0, x0, -24, z0)) ++ ++/* ++** st3_vnum_u64_m27: ++** addvl (x[0-9]+), x0, #-27 ++** st3d {z0\.d - z2\.d}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st3_vnum_u64_m27, svuint64x3_t, uint64_t, ++ svst3_vnum_u64 (p0, x0, -27, z0), ++ svst3_vnum (p0, x0, -27, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** st3_vnum_u64_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** st3d {z0\.d - z2\.d}, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (st3_vnum_u64_x1, svuint64x3_t, uint64_t, ++ svst3_vnum_u64 (p0, x0, x1, z0), ++ svst3_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_u8.c +new file mode 100644 +index 000000000..e2d5a19ad +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_u8.c +@@ -0,0 +1,246 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st3_u8_base: ++** st3b {z0\.b - z2\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_u8_base, svuint8x3_t, uint8_t, ++ svst3_u8 (p0, x0, z0), ++ svst3 (p0, x0, z0)) ++ ++/* ++** st3_u8_index: ++** st3b {z0\.b - z2\.b}, p0, \[x0, x1\] ++** ret ++*/ ++TEST_STORE (st3_u8_index, svuint8x3_t, uint8_t, ++ svst3_u8 (p0, x0 + x1, z0), ++ svst3 (p0, x0 + x1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_u8_1: ++** incb x0 ++** st3b {z0\.b - z2\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_u8_1, svuint8x3_t, uint8_t, ++ svst3_u8 (p0, x0 + svcntb (), z0), ++ svst3 (p0, x0 + svcntb (), z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_u8_2: ++** incb x0, all, mul #2 ++** st3b {z0\.b - z2\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_u8_2, svuint8x3_t, uint8_t, ++ svst3_u8 (p0, x0 + svcntb () * 2, z0), ++ svst3 (p0, x0 + svcntb () * 2, z0)) ++ ++/* ++** st3_u8_3: ++** st3b {z0\.b - z2\.b}, p0, \[x0, #3, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_u8_3, svuint8x3_t, uint8_t, ++ svst3_u8 (p0, x0 + svcntb () * 3, z0), ++ svst3 (p0, x0 + svcntb () * 3, z0)) ++ ++/* ++** st3_u8_21: ++** st3b {z0\.b - z2\.b}, p0, \[x0, #21, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_u8_21, svuint8x3_t, uint8_t, ++ svst3_u8 (p0, x0 + svcntb () * 21, z0), ++ svst3 (p0, x0 + svcntb () * 21, z0)) ++ ++/* ++** st3_u8_24: ++** addvl (x[0-9]+), x0, #24 ++** st3b {z0\.b - z2\.b}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st3_u8_24, svuint8x3_t, uint8_t, ++ svst3_u8 (p0, x0 + svcntb () * 24, z0), ++ svst3 (p0, x0 + svcntb () * 24, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_u8_m1: ++** decb x0 ++** st3b {z0\.b - z2\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_u8_m1, svuint8x3_t, uint8_t, ++ svst3_u8 (p0, x0 - svcntb (), z0), ++ svst3 (p0, x0 - svcntb (), z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_u8_m2: ++** decb x0, all, mul #2 ++** st3b {z0\.b - z2\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_u8_m2, svuint8x3_t, uint8_t, ++ svst3_u8 (p0, x0 - svcntb () * 2, z0), ++ svst3 (p0, x0 - svcntb () * 2, z0)) ++ ++/* ++** st3_u8_m3: ++** st3b {z0\.b - z2\.b}, p0, \[x0, #-3, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_u8_m3, svuint8x3_t, uint8_t, ++ svst3_u8 (p0, x0 - svcntb () * 3, z0), ++ svst3 (p0, x0 - svcntb () * 3, z0)) ++ ++/* ++** st3_u8_m24: ++** st3b {z0\.b - z2\.b}, p0, \[x0, #-24, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_u8_m24, svuint8x3_t, uint8_t, ++ svst3_u8 (p0, x0 - svcntb () * 24, z0), ++ svst3 (p0, x0 - svcntb () * 24, z0)) ++ ++/* ++** st3_u8_m27: ++** addvl (x[0-9]+), x0, #-27 ++** st3b {z0\.b - z2\.b}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st3_u8_m27, svuint8x3_t, uint8_t, ++ svst3_u8 (p0, x0 - svcntb () * 27, z0), ++ svst3 (p0, x0 - svcntb () * 27, z0)) ++ ++/* ++** st3_vnum_u8_0: ++** st3b {z0\.b - z2\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_u8_0, svuint8x3_t, uint8_t, ++ svst3_vnum_u8 (p0, x0, 0, z0), ++ svst3_vnum (p0, x0, 0, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_vnum_u8_1: ++** incb x0 ++** st3b {z0\.b - z2\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_u8_1, svuint8x3_t, uint8_t, ++ svst3_vnum_u8 (p0, x0, 1, z0), ++ svst3_vnum (p0, x0, 1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_vnum_u8_2: ++** incb x0, all, mul #2 ++** st3b {z0\.b - z2\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_u8_2, svuint8x3_t, uint8_t, ++ svst3_vnum_u8 (p0, x0, 2, z0), ++ svst3_vnum (p0, x0, 2, z0)) ++ ++/* ++** st3_vnum_u8_3: ++** st3b {z0\.b - z2\.b}, p0, \[x0, #3, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_vnum_u8_3, svuint8x3_t, uint8_t, ++ svst3_vnum_u8 (p0, x0, 3, z0), ++ svst3_vnum (p0, x0, 3, z0)) ++ ++/* ++** st3_vnum_u8_21: ++** st3b {z0\.b - z2\.b}, p0, \[x0, #21, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_vnum_u8_21, svuint8x3_t, uint8_t, ++ svst3_vnum_u8 (p0, x0, 21, z0), ++ svst3_vnum (p0, x0, 21, z0)) ++ ++/* ++** st3_vnum_u8_24: ++** addvl (x[0-9]+), x0, #24 ++** st3b {z0\.b - z2\.b}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st3_vnum_u8_24, svuint8x3_t, uint8_t, ++ svst3_vnum_u8 (p0, x0, 24, z0), ++ svst3_vnum (p0, x0, 24, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_vnum_u8_m1: ++** decb x0 ++** st3b {z0\.b - z2\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_u8_m1, svuint8x3_t, uint8_t, ++ svst3_vnum_u8 (p0, x0, -1, z0), ++ svst3_vnum (p0, x0, -1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st3_vnum_u8_m2: ++** decb x0, all, mul #2 ++** st3b {z0\.b - z2\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st3_vnum_u8_m2, svuint8x3_t, uint8_t, ++ svst3_vnum_u8 (p0, x0, -2, z0), ++ svst3_vnum (p0, x0, -2, z0)) ++ ++/* ++** st3_vnum_u8_m3: ++** st3b {z0\.b - z2\.b}, p0, \[x0, #-3, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_vnum_u8_m3, svuint8x3_t, uint8_t, ++ svst3_vnum_u8 (p0, x0, -3, z0), ++ svst3_vnum (p0, x0, -3, z0)) ++ ++/* ++** st3_vnum_u8_m24: ++** st3b {z0\.b - z2\.b}, p0, \[x0, #-24, mul vl\] ++** ret ++*/ ++TEST_STORE (st3_vnum_u8_m24, svuint8x3_t, uint8_t, ++ svst3_vnum_u8 (p0, x0, -24, z0), ++ svst3_vnum (p0, x0, -24, z0)) ++ ++/* ++** st3_vnum_u8_m27: ++** addvl (x[0-9]+), x0, #-27 ++** st3b {z0\.b - z2\.b}, p0, \[\1\] ++** ret ++*/ ++TEST_STORE (st3_vnum_u8_m27, svuint8x3_t, uint8_t, ++ svst3_vnum_u8 (p0, x0, -27, z0), ++ svst3_vnum (p0, x0, -27, z0)) ++ ++/* ++** st3_vnum_u8_x1: ++** cntb (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** st3b {z0\.b - z2\.b}, p0, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** st3b {z0\.b - z2\.b}, p0, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_STORE (st3_vnum_u8_x1, svuint8x3_t, uint8_t, ++ svst3_vnum_u8 (p0, x0, x1, z0), ++ svst3_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_bf16.c +new file mode 100644 +index 000000000..b8d9f4afa +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_bf16.c +@@ -0,0 +1,286 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st4_bf16_base: ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_bf16_base, svbfloat16x4_t, bfloat16_t, ++ svst4_bf16 (p0, x0, z0), ++ svst4 (p0, x0, z0)) ++ ++/* ++** st4_bf16_index: ++** st4h {z0\.h - z3\.h}, p0, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_STORE (st4_bf16_index, svbfloat16x4_t, bfloat16_t, ++ svst4_bf16 (p0, x0 + x1, z0), ++ svst4 (p0, x0 + x1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_bf16_1: ++** incb x0 ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_bf16_1, svbfloat16x4_t, bfloat16_t, ++ svst4_bf16 (p0, x0 + svcnth (), z0), ++ svst4 (p0, x0 + svcnth (), z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_bf16_2: ++** incb x0, all, mul #2 ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_bf16_2, svbfloat16x4_t, bfloat16_t, ++ svst4_bf16 (p0, x0 + svcnth () * 2, z0), ++ svst4 (p0, x0 + svcnth () * 2, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_bf16_3: ++** incb x0, all, mul #3 ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_bf16_3, svbfloat16x4_t, bfloat16_t, ++ svst4_bf16 (p0, x0 + svcnth () * 3, z0), ++ svst4 (p0, x0 + svcnth () * 3, z0)) ++ ++/* ++** st4_bf16_4: ++** st4h {z0\.h - z3\.h}, p0, \[x0, #4, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_bf16_4, svbfloat16x4_t, bfloat16_t, ++ svst4_bf16 (p0, x0 + svcnth () * 4, z0), ++ svst4 (p0, x0 + svcnth () * 4, z0)) ++ ++/* ++** st4_bf16_28: ++** st4h {z0\.h - z3\.h}, p0, \[x0, #28, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_bf16_28, svbfloat16x4_t, bfloat16_t, ++ svst4_bf16 (p0, x0 + svcnth () * 28, z0), ++ svst4 (p0, x0 + svcnth () * 28, z0)) ++ ++/* ++** st4_bf16_32: ++** [^{]* ++** st4h {z0\.h - z3\.h}, p0, \[x[0-9]+\] ++** ret ++*/ ++TEST_STORE (st4_bf16_32, svbfloat16x4_t, bfloat16_t, ++ svst4_bf16 (p0, x0 + svcnth () * 32, z0), ++ svst4 (p0, x0 + svcnth () * 32, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_bf16_m1: ++** decb x0 ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_bf16_m1, svbfloat16x4_t, bfloat16_t, ++ svst4_bf16 (p0, x0 - svcnth (), z0), ++ svst4 (p0, x0 - svcnth (), z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_bf16_m2: ++** decb x0, all, mul #2 ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_bf16_m2, svbfloat16x4_t, bfloat16_t, ++ svst4_bf16 (p0, x0 - svcnth () * 2, z0), ++ svst4 (p0, x0 - svcnth () * 2, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_bf16_m3: ++** decb x0, all, mul #3 ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_bf16_m3, svbfloat16x4_t, bfloat16_t, ++ svst4_bf16 (p0, x0 - svcnth () * 3, z0), ++ svst4 (p0, x0 - svcnth () * 3, z0)) ++ ++/* ++** st4_bf16_m4: ++** st4h {z0\.h - z3\.h}, p0, \[x0, #-4, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_bf16_m4, svbfloat16x4_t, bfloat16_t, ++ svst4_bf16 (p0, x0 - svcnth () * 4, z0), ++ svst4 (p0, x0 - svcnth () * 4, z0)) ++ ++/* ++** st4_bf16_m32: ++** st4h {z0\.h - z3\.h}, p0, \[x0, #-32, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_bf16_m32, svbfloat16x4_t, bfloat16_t, ++ svst4_bf16 (p0, x0 - svcnth () * 32, z0), ++ svst4 (p0, x0 - svcnth () * 32, z0)) ++ ++/* ++** st4_bf16_m36: ++** [^{]* ++** st4h {z0\.h - z3\.h}, p0, \[x[0-9]+\] ++** ret ++*/ ++TEST_STORE (st4_bf16_m36, svbfloat16x4_t, bfloat16_t, ++ svst4_bf16 (p0, x0 - svcnth () * 36, z0), ++ svst4 (p0, x0 - svcnth () * 36, z0)) ++ ++/* ++** st4_vnum_bf16_0: ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_bf16_0, svbfloat16x4_t, bfloat16_t, ++ svst4_vnum_bf16 (p0, x0, 0, z0), ++ svst4_vnum (p0, x0, 0, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_bf16_1: ++** incb x0 ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_bf16_1, svbfloat16x4_t, bfloat16_t, ++ svst4_vnum_bf16 (p0, x0, 1, z0), ++ svst4_vnum (p0, x0, 1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_bf16_2: ++** incb x0, all, mul #2 ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_bf16_2, svbfloat16x4_t, bfloat16_t, ++ svst4_vnum_bf16 (p0, x0, 2, z0), ++ svst4_vnum (p0, x0, 2, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_bf16_3: ++** incb x0, all, mul #3 ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_bf16_3, svbfloat16x4_t, bfloat16_t, ++ svst4_vnum_bf16 (p0, x0, 3, z0), ++ svst4_vnum (p0, x0, 3, z0)) ++ ++/* ++** st4_vnum_bf16_4: ++** st4h {z0\.h - z3\.h}, p0, \[x0, #4, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_vnum_bf16_4, svbfloat16x4_t, bfloat16_t, ++ svst4_vnum_bf16 (p0, x0, 4, z0), ++ svst4_vnum (p0, x0, 4, z0)) ++ ++/* ++** st4_vnum_bf16_28: ++** st4h {z0\.h - z3\.h}, p0, \[x0, #28, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_vnum_bf16_28, svbfloat16x4_t, bfloat16_t, ++ svst4_vnum_bf16 (p0, x0, 28, z0), ++ svst4_vnum (p0, x0, 28, z0)) ++ ++/* ++** st4_vnum_bf16_32: ++** [^{]* ++** st4h {z0\.h - z3\.h}, p0, \[x[0-9]+\] ++** ret ++*/ ++TEST_STORE (st4_vnum_bf16_32, svbfloat16x4_t, bfloat16_t, ++ svst4_vnum_bf16 (p0, x0, 32, z0), ++ svst4_vnum (p0, x0, 32, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_bf16_m1: ++** decb x0 ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_bf16_m1, svbfloat16x4_t, bfloat16_t, ++ svst4_vnum_bf16 (p0, x0, -1, z0), ++ svst4_vnum (p0, x0, -1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_bf16_m2: ++** decb x0, all, mul #2 ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_bf16_m2, svbfloat16x4_t, bfloat16_t, ++ svst4_vnum_bf16 (p0, x0, -2, z0), ++ svst4_vnum (p0, x0, -2, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_bf16_m3: ++** decb x0, all, mul #3 ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_bf16_m3, svbfloat16x4_t, bfloat16_t, ++ svst4_vnum_bf16 (p0, x0, -3, z0), ++ svst4_vnum (p0, x0, -3, z0)) ++ ++/* ++** st4_vnum_bf16_m4: ++** st4h {z0\.h - z3\.h}, p0, \[x0, #-4, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_vnum_bf16_m4, svbfloat16x4_t, bfloat16_t, ++ svst4_vnum_bf16 (p0, x0, -4, z0), ++ svst4_vnum (p0, x0, -4, z0)) ++ ++/* ++** st4_vnum_bf16_m32: ++** st4h {z0\.h - z3\.h}, p0, \[x0, #-32, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_vnum_bf16_m32, svbfloat16x4_t, bfloat16_t, ++ svst4_vnum_bf16 (p0, x0, -32, z0), ++ svst4_vnum (p0, x0, -32, z0)) ++ ++/* ++** st4_vnum_bf16_m36: ++** [^{]* ++** st4h {z0\.h - z3\.h}, p0, \[x[0-9]+\] ++** ret ++*/ ++TEST_STORE (st4_vnum_bf16_m36, svbfloat16x4_t, bfloat16_t, ++ svst4_vnum_bf16 (p0, x0, -36, z0), ++ svst4_vnum (p0, x0, -36, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** st4_vnum_bf16_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** st4h {z0\.h - z3\.h}, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (st4_vnum_bf16_x1, svbfloat16x4_t, bfloat16_t, ++ svst4_vnum_bf16 (p0, x0, x1, z0), ++ svst4_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_f16.c +new file mode 100644 +index 000000000..296bdb4a6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_f16.c +@@ -0,0 +1,286 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st4_f16_base: ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_f16_base, svfloat16x4_t, float16_t, ++ svst4_f16 (p0, x0, z0), ++ svst4 (p0, x0, z0)) ++ ++/* ++** st4_f16_index: ++** st4h {z0\.h - z3\.h}, p0, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_STORE (st4_f16_index, svfloat16x4_t, float16_t, ++ svst4_f16 (p0, x0 + x1, z0), ++ svst4 (p0, x0 + x1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_f16_1: ++** incb x0 ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_f16_1, svfloat16x4_t, float16_t, ++ svst4_f16 (p0, x0 + svcnth (), z0), ++ svst4 (p0, x0 + svcnth (), z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_f16_2: ++** incb x0, all, mul #2 ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_f16_2, svfloat16x4_t, float16_t, ++ svst4_f16 (p0, x0 + svcnth () * 2, z0), ++ svst4 (p0, x0 + svcnth () * 2, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_f16_3: ++** incb x0, all, mul #3 ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_f16_3, svfloat16x4_t, float16_t, ++ svst4_f16 (p0, x0 + svcnth () * 3, z0), ++ svst4 (p0, x0 + svcnth () * 3, z0)) ++ ++/* ++** st4_f16_4: ++** st4h {z0\.h - z3\.h}, p0, \[x0, #4, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_f16_4, svfloat16x4_t, float16_t, ++ svst4_f16 (p0, x0 + svcnth () * 4, z0), ++ svst4 (p0, x0 + svcnth () * 4, z0)) ++ ++/* ++** st4_f16_28: ++** st4h {z0\.h - z3\.h}, p0, \[x0, #28, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_f16_28, svfloat16x4_t, float16_t, ++ svst4_f16 (p0, x0 + svcnth () * 28, z0), ++ svst4 (p0, x0 + svcnth () * 28, z0)) ++ ++/* ++** st4_f16_32: ++** [^{]* ++** st4h {z0\.h - z3\.h}, p0, \[x[0-9]+\] ++** ret ++*/ ++TEST_STORE (st4_f16_32, svfloat16x4_t, float16_t, ++ svst4_f16 (p0, x0 + svcnth () * 32, z0), ++ svst4 (p0, x0 + svcnth () * 32, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_f16_m1: ++** decb x0 ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_f16_m1, svfloat16x4_t, float16_t, ++ svst4_f16 (p0, x0 - svcnth (), z0), ++ svst4 (p0, x0 - svcnth (), z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_f16_m2: ++** decb x0, all, mul #2 ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_f16_m2, svfloat16x4_t, float16_t, ++ svst4_f16 (p0, x0 - svcnth () * 2, z0), ++ svst4 (p0, x0 - svcnth () * 2, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_f16_m3: ++** decb x0, all, mul #3 ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_f16_m3, svfloat16x4_t, float16_t, ++ svst4_f16 (p0, x0 - svcnth () * 3, z0), ++ svst4 (p0, x0 - svcnth () * 3, z0)) ++ ++/* ++** st4_f16_m4: ++** st4h {z0\.h - z3\.h}, p0, \[x0, #-4, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_f16_m4, svfloat16x4_t, float16_t, ++ svst4_f16 (p0, x0 - svcnth () * 4, z0), ++ svst4 (p0, x0 - svcnth () * 4, z0)) ++ ++/* ++** st4_f16_m32: ++** st4h {z0\.h - z3\.h}, p0, \[x0, #-32, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_f16_m32, svfloat16x4_t, float16_t, ++ svst4_f16 (p0, x0 - svcnth () * 32, z0), ++ svst4 (p0, x0 - svcnth () * 32, z0)) ++ ++/* ++** st4_f16_m36: ++** [^{]* ++** st4h {z0\.h - z3\.h}, p0, \[x[0-9]+\] ++** ret ++*/ ++TEST_STORE (st4_f16_m36, svfloat16x4_t, float16_t, ++ svst4_f16 (p0, x0 - svcnth () * 36, z0), ++ svst4 (p0, x0 - svcnth () * 36, z0)) ++ ++/* ++** st4_vnum_f16_0: ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_f16_0, svfloat16x4_t, float16_t, ++ svst4_vnum_f16 (p0, x0, 0, z0), ++ svst4_vnum (p0, x0, 0, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_f16_1: ++** incb x0 ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_f16_1, svfloat16x4_t, float16_t, ++ svst4_vnum_f16 (p0, x0, 1, z0), ++ svst4_vnum (p0, x0, 1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_f16_2: ++** incb x0, all, mul #2 ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_f16_2, svfloat16x4_t, float16_t, ++ svst4_vnum_f16 (p0, x0, 2, z0), ++ svst4_vnum (p0, x0, 2, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_f16_3: ++** incb x0, all, mul #3 ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_f16_3, svfloat16x4_t, float16_t, ++ svst4_vnum_f16 (p0, x0, 3, z0), ++ svst4_vnum (p0, x0, 3, z0)) ++ ++/* ++** st4_vnum_f16_4: ++** st4h {z0\.h - z3\.h}, p0, \[x0, #4, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_vnum_f16_4, svfloat16x4_t, float16_t, ++ svst4_vnum_f16 (p0, x0, 4, z0), ++ svst4_vnum (p0, x0, 4, z0)) ++ ++/* ++** st4_vnum_f16_28: ++** st4h {z0\.h - z3\.h}, p0, \[x0, #28, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_vnum_f16_28, svfloat16x4_t, float16_t, ++ svst4_vnum_f16 (p0, x0, 28, z0), ++ svst4_vnum (p0, x0, 28, z0)) ++ ++/* ++** st4_vnum_f16_32: ++** [^{]* ++** st4h {z0\.h - z3\.h}, p0, \[x[0-9]+\] ++** ret ++*/ ++TEST_STORE (st4_vnum_f16_32, svfloat16x4_t, float16_t, ++ svst4_vnum_f16 (p0, x0, 32, z0), ++ svst4_vnum (p0, x0, 32, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_f16_m1: ++** decb x0 ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_f16_m1, svfloat16x4_t, float16_t, ++ svst4_vnum_f16 (p0, x0, -1, z0), ++ svst4_vnum (p0, x0, -1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_f16_m2: ++** decb x0, all, mul #2 ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_f16_m2, svfloat16x4_t, float16_t, ++ svst4_vnum_f16 (p0, x0, -2, z0), ++ svst4_vnum (p0, x0, -2, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_f16_m3: ++** decb x0, all, mul #3 ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_f16_m3, svfloat16x4_t, float16_t, ++ svst4_vnum_f16 (p0, x0, -3, z0), ++ svst4_vnum (p0, x0, -3, z0)) ++ ++/* ++** st4_vnum_f16_m4: ++** st4h {z0\.h - z3\.h}, p0, \[x0, #-4, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_vnum_f16_m4, svfloat16x4_t, float16_t, ++ svst4_vnum_f16 (p0, x0, -4, z0), ++ svst4_vnum (p0, x0, -4, z0)) ++ ++/* ++** st4_vnum_f16_m32: ++** st4h {z0\.h - z3\.h}, p0, \[x0, #-32, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_vnum_f16_m32, svfloat16x4_t, float16_t, ++ svst4_vnum_f16 (p0, x0, -32, z0), ++ svst4_vnum (p0, x0, -32, z0)) ++ ++/* ++** st4_vnum_f16_m36: ++** [^{]* ++** st4h {z0\.h - z3\.h}, p0, \[x[0-9]+\] ++** ret ++*/ ++TEST_STORE (st4_vnum_f16_m36, svfloat16x4_t, float16_t, ++ svst4_vnum_f16 (p0, x0, -36, z0), ++ svst4_vnum (p0, x0, -36, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** st4_vnum_f16_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** st4h {z0\.h - z3\.h}, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (st4_vnum_f16_x1, svfloat16x4_t, float16_t, ++ svst4_vnum_f16 (p0, x0, x1, z0), ++ svst4_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_f32.c +new file mode 100644 +index 000000000..313ed7bc0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_f32.c +@@ -0,0 +1,286 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st4_f32_base: ++** st4w {z0\.s - z3\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_f32_base, svfloat32x4_t, float32_t, ++ svst4_f32 (p0, x0, z0), ++ svst4 (p0, x0, z0)) ++ ++/* ++** st4_f32_index: ++** st4w {z0\.s - z3\.s}, p0, \[x0, x1, lsl 2\] ++** ret ++*/ ++TEST_STORE (st4_f32_index, svfloat32x4_t, float32_t, ++ svst4_f32 (p0, x0 + x1, z0), ++ svst4 (p0, x0 + x1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_f32_1: ++** incb x0 ++** st4w {z0\.s - z3\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_f32_1, svfloat32x4_t, float32_t, ++ svst4_f32 (p0, x0 + svcntw (), z0), ++ svst4 (p0, x0 + svcntw (), z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_f32_2: ++** incb x0, all, mul #2 ++** st4w {z0\.s - z3\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_f32_2, svfloat32x4_t, float32_t, ++ svst4_f32 (p0, x0 + svcntw () * 2, z0), ++ svst4 (p0, x0 + svcntw () * 2, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_f32_3: ++** incb x0, all, mul #3 ++** st4w {z0\.s - z3\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_f32_3, svfloat32x4_t, float32_t, ++ svst4_f32 (p0, x0 + svcntw () * 3, z0), ++ svst4 (p0, x0 + svcntw () * 3, z0)) ++ ++/* ++** st4_f32_4: ++** st4w {z0\.s - z3\.s}, p0, \[x0, #4, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_f32_4, svfloat32x4_t, float32_t, ++ svst4_f32 (p0, x0 + svcntw () * 4, z0), ++ svst4 (p0, x0 + svcntw () * 4, z0)) ++ ++/* ++** st4_f32_28: ++** st4w {z0\.s - z3\.s}, p0, \[x0, #28, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_f32_28, svfloat32x4_t, float32_t, ++ svst4_f32 (p0, x0 + svcntw () * 28, z0), ++ svst4 (p0, x0 + svcntw () * 28, z0)) ++ ++/* ++** st4_f32_32: ++** [^{]* ++** st4w {z0\.s - z3\.s}, p0, \[x[0-9]+\] ++** ret ++*/ ++TEST_STORE (st4_f32_32, svfloat32x4_t, float32_t, ++ svst4_f32 (p0, x0 + svcntw () * 32, z0), ++ svst4 (p0, x0 + svcntw () * 32, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_f32_m1: ++** decb x0 ++** st4w {z0\.s - z3\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_f32_m1, svfloat32x4_t, float32_t, ++ svst4_f32 (p0, x0 - svcntw (), z0), ++ svst4 (p0, x0 - svcntw (), z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_f32_m2: ++** decb x0, all, mul #2 ++** st4w {z0\.s - z3\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_f32_m2, svfloat32x4_t, float32_t, ++ svst4_f32 (p0, x0 - svcntw () * 2, z0), ++ svst4 (p0, x0 - svcntw () * 2, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_f32_m3: ++** decb x0, all, mul #3 ++** st4w {z0\.s - z3\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_f32_m3, svfloat32x4_t, float32_t, ++ svst4_f32 (p0, x0 - svcntw () * 3, z0), ++ svst4 (p0, x0 - svcntw () * 3, z0)) ++ ++/* ++** st4_f32_m4: ++** st4w {z0\.s - z3\.s}, p0, \[x0, #-4, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_f32_m4, svfloat32x4_t, float32_t, ++ svst4_f32 (p0, x0 - svcntw () * 4, z0), ++ svst4 (p0, x0 - svcntw () * 4, z0)) ++ ++/* ++** st4_f32_m32: ++** st4w {z0\.s - z3\.s}, p0, \[x0, #-32, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_f32_m32, svfloat32x4_t, float32_t, ++ svst4_f32 (p0, x0 - svcntw () * 32, z0), ++ svst4 (p0, x0 - svcntw () * 32, z0)) ++ ++/* ++** st4_f32_m36: ++** [^{]* ++** st4w {z0\.s - z3\.s}, p0, \[x[0-9]+\] ++** ret ++*/ ++TEST_STORE (st4_f32_m36, svfloat32x4_t, float32_t, ++ svst4_f32 (p0, x0 - svcntw () * 36, z0), ++ svst4 (p0, x0 - svcntw () * 36, z0)) ++ ++/* ++** st4_vnum_f32_0: ++** st4w {z0\.s - z3\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_f32_0, svfloat32x4_t, float32_t, ++ svst4_vnum_f32 (p0, x0, 0, z0), ++ svst4_vnum (p0, x0, 0, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_f32_1: ++** incb x0 ++** st4w {z0\.s - z3\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_f32_1, svfloat32x4_t, float32_t, ++ svst4_vnum_f32 (p0, x0, 1, z0), ++ svst4_vnum (p0, x0, 1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_f32_2: ++** incb x0, all, mul #2 ++** st4w {z0\.s - z3\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_f32_2, svfloat32x4_t, float32_t, ++ svst4_vnum_f32 (p0, x0, 2, z0), ++ svst4_vnum (p0, x0, 2, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_f32_3: ++** incb x0, all, mul #3 ++** st4w {z0\.s - z3\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_f32_3, svfloat32x4_t, float32_t, ++ svst4_vnum_f32 (p0, x0, 3, z0), ++ svst4_vnum (p0, x0, 3, z0)) ++ ++/* ++** st4_vnum_f32_4: ++** st4w {z0\.s - z3\.s}, p0, \[x0, #4, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_vnum_f32_4, svfloat32x4_t, float32_t, ++ svst4_vnum_f32 (p0, x0, 4, z0), ++ svst4_vnum (p0, x0, 4, z0)) ++ ++/* ++** st4_vnum_f32_28: ++** st4w {z0\.s - z3\.s}, p0, \[x0, #28, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_vnum_f32_28, svfloat32x4_t, float32_t, ++ svst4_vnum_f32 (p0, x0, 28, z0), ++ svst4_vnum (p0, x0, 28, z0)) ++ ++/* ++** st4_vnum_f32_32: ++** [^{]* ++** st4w {z0\.s - z3\.s}, p0, \[x[0-9]+\] ++** ret ++*/ ++TEST_STORE (st4_vnum_f32_32, svfloat32x4_t, float32_t, ++ svst4_vnum_f32 (p0, x0, 32, z0), ++ svst4_vnum (p0, x0, 32, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_f32_m1: ++** decb x0 ++** st4w {z0\.s - z3\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_f32_m1, svfloat32x4_t, float32_t, ++ svst4_vnum_f32 (p0, x0, -1, z0), ++ svst4_vnum (p0, x0, -1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_f32_m2: ++** decb x0, all, mul #2 ++** st4w {z0\.s - z3\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_f32_m2, svfloat32x4_t, float32_t, ++ svst4_vnum_f32 (p0, x0, -2, z0), ++ svst4_vnum (p0, x0, -2, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_f32_m3: ++** decb x0, all, mul #3 ++** st4w {z0\.s - z3\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_f32_m3, svfloat32x4_t, float32_t, ++ svst4_vnum_f32 (p0, x0, -3, z0), ++ svst4_vnum (p0, x0, -3, z0)) ++ ++/* ++** st4_vnum_f32_m4: ++** st4w {z0\.s - z3\.s}, p0, \[x0, #-4, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_vnum_f32_m4, svfloat32x4_t, float32_t, ++ svst4_vnum_f32 (p0, x0, -4, z0), ++ svst4_vnum (p0, x0, -4, z0)) ++ ++/* ++** st4_vnum_f32_m32: ++** st4w {z0\.s - z3\.s}, p0, \[x0, #-32, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_vnum_f32_m32, svfloat32x4_t, float32_t, ++ svst4_vnum_f32 (p0, x0, -32, z0), ++ svst4_vnum (p0, x0, -32, z0)) ++ ++/* ++** st4_vnum_f32_m36: ++** [^{]* ++** st4w {z0\.s - z3\.s}, p0, \[x[0-9]+\] ++** ret ++*/ ++TEST_STORE (st4_vnum_f32_m36, svfloat32x4_t, float32_t, ++ svst4_vnum_f32 (p0, x0, -36, z0), ++ svst4_vnum (p0, x0, -36, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** st4_vnum_f32_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** st4w {z0\.s - z3\.s}, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (st4_vnum_f32_x1, svfloat32x4_t, float32_t, ++ svst4_vnum_f32 (p0, x0, x1, z0), ++ svst4_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_f64.c +new file mode 100644 +index 000000000..6c65ef016 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_f64.c +@@ -0,0 +1,286 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st4_f64_base: ++** st4d {z0\.d - z3\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_f64_base, svfloat64x4_t, float64_t, ++ svst4_f64 (p0, x0, z0), ++ svst4 (p0, x0, z0)) ++ ++/* ++** st4_f64_index: ++** st4d {z0\.d - z3\.d}, p0, \[x0, x1, lsl 3\] ++** ret ++*/ ++TEST_STORE (st4_f64_index, svfloat64x4_t, float64_t, ++ svst4_f64 (p0, x0 + x1, z0), ++ svst4 (p0, x0 + x1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_f64_1: ++** incb x0 ++** st4d {z0\.d - z3\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_f64_1, svfloat64x4_t, float64_t, ++ svst4_f64 (p0, x0 + svcntd (), z0), ++ svst4 (p0, x0 + svcntd (), z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_f64_2: ++** incb x0, all, mul #2 ++** st4d {z0\.d - z3\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_f64_2, svfloat64x4_t, float64_t, ++ svst4_f64 (p0, x0 + svcntd () * 2, z0), ++ svst4 (p0, x0 + svcntd () * 2, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_f64_3: ++** incb x0, all, mul #3 ++** st4d {z0\.d - z3\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_f64_3, svfloat64x4_t, float64_t, ++ svst4_f64 (p0, x0 + svcntd () * 3, z0), ++ svst4 (p0, x0 + svcntd () * 3, z0)) ++ ++/* ++** st4_f64_4: ++** st4d {z0\.d - z3\.d}, p0, \[x0, #4, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_f64_4, svfloat64x4_t, float64_t, ++ svst4_f64 (p0, x0 + svcntd () * 4, z0), ++ svst4 (p0, x0 + svcntd () * 4, z0)) ++ ++/* ++** st4_f64_28: ++** st4d {z0\.d - z3\.d}, p0, \[x0, #28, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_f64_28, svfloat64x4_t, float64_t, ++ svst4_f64 (p0, x0 + svcntd () * 28, z0), ++ svst4 (p0, x0 + svcntd () * 28, z0)) ++ ++/* ++** st4_f64_32: ++** [^{]* ++** st4d {z0\.d - z3\.d}, p0, \[x[0-9]+\] ++** ret ++*/ ++TEST_STORE (st4_f64_32, svfloat64x4_t, float64_t, ++ svst4_f64 (p0, x0 + svcntd () * 32, z0), ++ svst4 (p0, x0 + svcntd () * 32, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_f64_m1: ++** decb x0 ++** st4d {z0\.d - z3\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_f64_m1, svfloat64x4_t, float64_t, ++ svst4_f64 (p0, x0 - svcntd (), z0), ++ svst4 (p0, x0 - svcntd (), z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_f64_m2: ++** decb x0, all, mul #2 ++** st4d {z0\.d - z3\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_f64_m2, svfloat64x4_t, float64_t, ++ svst4_f64 (p0, x0 - svcntd () * 2, z0), ++ svst4 (p0, x0 - svcntd () * 2, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_f64_m3: ++** decb x0, all, mul #3 ++** st4d {z0\.d - z3\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_f64_m3, svfloat64x4_t, float64_t, ++ svst4_f64 (p0, x0 - svcntd () * 3, z0), ++ svst4 (p0, x0 - svcntd () * 3, z0)) ++ ++/* ++** st4_f64_m4: ++** st4d {z0\.d - z3\.d}, p0, \[x0, #-4, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_f64_m4, svfloat64x4_t, float64_t, ++ svst4_f64 (p0, x0 - svcntd () * 4, z0), ++ svst4 (p0, x0 - svcntd () * 4, z0)) ++ ++/* ++** st4_f64_m32: ++** st4d {z0\.d - z3\.d}, p0, \[x0, #-32, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_f64_m32, svfloat64x4_t, float64_t, ++ svst4_f64 (p0, x0 - svcntd () * 32, z0), ++ svst4 (p0, x0 - svcntd () * 32, z0)) ++ ++/* ++** st4_f64_m36: ++** [^{]* ++** st4d {z0\.d - z3\.d}, p0, \[x[0-9]+\] ++** ret ++*/ ++TEST_STORE (st4_f64_m36, svfloat64x4_t, float64_t, ++ svst4_f64 (p0, x0 - svcntd () * 36, z0), ++ svst4 (p0, x0 - svcntd () * 36, z0)) ++ ++/* ++** st4_vnum_f64_0: ++** st4d {z0\.d - z3\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_f64_0, svfloat64x4_t, float64_t, ++ svst4_vnum_f64 (p0, x0, 0, z0), ++ svst4_vnum (p0, x0, 0, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_f64_1: ++** incb x0 ++** st4d {z0\.d - z3\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_f64_1, svfloat64x4_t, float64_t, ++ svst4_vnum_f64 (p0, x0, 1, z0), ++ svst4_vnum (p0, x0, 1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_f64_2: ++** incb x0, all, mul #2 ++** st4d {z0\.d - z3\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_f64_2, svfloat64x4_t, float64_t, ++ svst4_vnum_f64 (p0, x0, 2, z0), ++ svst4_vnum (p0, x0, 2, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_f64_3: ++** incb x0, all, mul #3 ++** st4d {z0\.d - z3\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_f64_3, svfloat64x4_t, float64_t, ++ svst4_vnum_f64 (p0, x0, 3, z0), ++ svst4_vnum (p0, x0, 3, z0)) ++ ++/* ++** st4_vnum_f64_4: ++** st4d {z0\.d - z3\.d}, p0, \[x0, #4, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_vnum_f64_4, svfloat64x4_t, float64_t, ++ svst4_vnum_f64 (p0, x0, 4, z0), ++ svst4_vnum (p0, x0, 4, z0)) ++ ++/* ++** st4_vnum_f64_28: ++** st4d {z0\.d - z3\.d}, p0, \[x0, #28, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_vnum_f64_28, svfloat64x4_t, float64_t, ++ svst4_vnum_f64 (p0, x0, 28, z0), ++ svst4_vnum (p0, x0, 28, z0)) ++ ++/* ++** st4_vnum_f64_32: ++** [^{]* ++** st4d {z0\.d - z3\.d}, p0, \[x[0-9]+\] ++** ret ++*/ ++TEST_STORE (st4_vnum_f64_32, svfloat64x4_t, float64_t, ++ svst4_vnum_f64 (p0, x0, 32, z0), ++ svst4_vnum (p0, x0, 32, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_f64_m1: ++** decb x0 ++** st4d {z0\.d - z3\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_f64_m1, svfloat64x4_t, float64_t, ++ svst4_vnum_f64 (p0, x0, -1, z0), ++ svst4_vnum (p0, x0, -1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_f64_m2: ++** decb x0, all, mul #2 ++** st4d {z0\.d - z3\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_f64_m2, svfloat64x4_t, float64_t, ++ svst4_vnum_f64 (p0, x0, -2, z0), ++ svst4_vnum (p0, x0, -2, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_f64_m3: ++** decb x0, all, mul #3 ++** st4d {z0\.d - z3\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_f64_m3, svfloat64x4_t, float64_t, ++ svst4_vnum_f64 (p0, x0, -3, z0), ++ svst4_vnum (p0, x0, -3, z0)) ++ ++/* ++** st4_vnum_f64_m4: ++** st4d {z0\.d - z3\.d}, p0, \[x0, #-4, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_vnum_f64_m4, svfloat64x4_t, float64_t, ++ svst4_vnum_f64 (p0, x0, -4, z0), ++ svst4_vnum (p0, x0, -4, z0)) ++ ++/* ++** st4_vnum_f64_m32: ++** st4d {z0\.d - z3\.d}, p0, \[x0, #-32, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_vnum_f64_m32, svfloat64x4_t, float64_t, ++ svst4_vnum_f64 (p0, x0, -32, z0), ++ svst4_vnum (p0, x0, -32, z0)) ++ ++/* ++** st4_vnum_f64_m36: ++** [^{]* ++** st4d {z0\.d - z3\.d}, p0, \[x[0-9]+\] ++** ret ++*/ ++TEST_STORE (st4_vnum_f64_m36, svfloat64x4_t, float64_t, ++ svst4_vnum_f64 (p0, x0, -36, z0), ++ svst4_vnum (p0, x0, -36, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** st4_vnum_f64_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** st4d {z0\.d - z3\.d}, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (st4_vnum_f64_x1, svfloat64x4_t, float64_t, ++ svst4_vnum_f64 (p0, x0, x1, z0), ++ svst4_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_s16.c +new file mode 100644 +index 000000000..35ac5f803 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_s16.c +@@ -0,0 +1,286 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st4_s16_base: ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_s16_base, svint16x4_t, int16_t, ++ svst4_s16 (p0, x0, z0), ++ svst4 (p0, x0, z0)) ++ ++/* ++** st4_s16_index: ++** st4h {z0\.h - z3\.h}, p0, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_STORE (st4_s16_index, svint16x4_t, int16_t, ++ svst4_s16 (p0, x0 + x1, z0), ++ svst4 (p0, x0 + x1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_s16_1: ++** incb x0 ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_s16_1, svint16x4_t, int16_t, ++ svst4_s16 (p0, x0 + svcnth (), z0), ++ svst4 (p0, x0 + svcnth (), z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_s16_2: ++** incb x0, all, mul #2 ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_s16_2, svint16x4_t, int16_t, ++ svst4_s16 (p0, x0 + svcnth () * 2, z0), ++ svst4 (p0, x0 + svcnth () * 2, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_s16_3: ++** incb x0, all, mul #3 ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_s16_3, svint16x4_t, int16_t, ++ svst4_s16 (p0, x0 + svcnth () * 3, z0), ++ svst4 (p0, x0 + svcnth () * 3, z0)) ++ ++/* ++** st4_s16_4: ++** st4h {z0\.h - z3\.h}, p0, \[x0, #4, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_s16_4, svint16x4_t, int16_t, ++ svst4_s16 (p0, x0 + svcnth () * 4, z0), ++ svst4 (p0, x0 + svcnth () * 4, z0)) ++ ++/* ++** st4_s16_28: ++** st4h {z0\.h - z3\.h}, p0, \[x0, #28, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_s16_28, svint16x4_t, int16_t, ++ svst4_s16 (p0, x0 + svcnth () * 28, z0), ++ svst4 (p0, x0 + svcnth () * 28, z0)) ++ ++/* ++** st4_s16_32: ++** [^{]* ++** st4h {z0\.h - z3\.h}, p0, \[x[0-9]+\] ++** ret ++*/ ++TEST_STORE (st4_s16_32, svint16x4_t, int16_t, ++ svst4_s16 (p0, x0 + svcnth () * 32, z0), ++ svst4 (p0, x0 + svcnth () * 32, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_s16_m1: ++** decb x0 ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_s16_m1, svint16x4_t, int16_t, ++ svst4_s16 (p0, x0 - svcnth (), z0), ++ svst4 (p0, x0 - svcnth (), z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_s16_m2: ++** decb x0, all, mul #2 ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_s16_m2, svint16x4_t, int16_t, ++ svst4_s16 (p0, x0 - svcnth () * 2, z0), ++ svst4 (p0, x0 - svcnth () * 2, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_s16_m3: ++** decb x0, all, mul #3 ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_s16_m3, svint16x4_t, int16_t, ++ svst4_s16 (p0, x0 - svcnth () * 3, z0), ++ svst4 (p0, x0 - svcnth () * 3, z0)) ++ ++/* ++** st4_s16_m4: ++** st4h {z0\.h - z3\.h}, p0, \[x0, #-4, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_s16_m4, svint16x4_t, int16_t, ++ svst4_s16 (p0, x0 - svcnth () * 4, z0), ++ svst4 (p0, x0 - svcnth () * 4, z0)) ++ ++/* ++** st4_s16_m32: ++** st4h {z0\.h - z3\.h}, p0, \[x0, #-32, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_s16_m32, svint16x4_t, int16_t, ++ svst4_s16 (p0, x0 - svcnth () * 32, z0), ++ svst4 (p0, x0 - svcnth () * 32, z0)) ++ ++/* ++** st4_s16_m36: ++** [^{]* ++** st4h {z0\.h - z3\.h}, p0, \[x[0-9]+\] ++** ret ++*/ ++TEST_STORE (st4_s16_m36, svint16x4_t, int16_t, ++ svst4_s16 (p0, x0 - svcnth () * 36, z0), ++ svst4 (p0, x0 - svcnth () * 36, z0)) ++ ++/* ++** st4_vnum_s16_0: ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s16_0, svint16x4_t, int16_t, ++ svst4_vnum_s16 (p0, x0, 0, z0), ++ svst4_vnum (p0, x0, 0, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_s16_1: ++** incb x0 ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s16_1, svint16x4_t, int16_t, ++ svst4_vnum_s16 (p0, x0, 1, z0), ++ svst4_vnum (p0, x0, 1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_s16_2: ++** incb x0, all, mul #2 ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s16_2, svint16x4_t, int16_t, ++ svst4_vnum_s16 (p0, x0, 2, z0), ++ svst4_vnum (p0, x0, 2, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_s16_3: ++** incb x0, all, mul #3 ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s16_3, svint16x4_t, int16_t, ++ svst4_vnum_s16 (p0, x0, 3, z0), ++ svst4_vnum (p0, x0, 3, z0)) ++ ++/* ++** st4_vnum_s16_4: ++** st4h {z0\.h - z3\.h}, p0, \[x0, #4, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s16_4, svint16x4_t, int16_t, ++ svst4_vnum_s16 (p0, x0, 4, z0), ++ svst4_vnum (p0, x0, 4, z0)) ++ ++/* ++** st4_vnum_s16_28: ++** st4h {z0\.h - z3\.h}, p0, \[x0, #28, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s16_28, svint16x4_t, int16_t, ++ svst4_vnum_s16 (p0, x0, 28, z0), ++ svst4_vnum (p0, x0, 28, z0)) ++ ++/* ++** st4_vnum_s16_32: ++** [^{]* ++** st4h {z0\.h - z3\.h}, p0, \[x[0-9]+\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s16_32, svint16x4_t, int16_t, ++ svst4_vnum_s16 (p0, x0, 32, z0), ++ svst4_vnum (p0, x0, 32, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_s16_m1: ++** decb x0 ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s16_m1, svint16x4_t, int16_t, ++ svst4_vnum_s16 (p0, x0, -1, z0), ++ svst4_vnum (p0, x0, -1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_s16_m2: ++** decb x0, all, mul #2 ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s16_m2, svint16x4_t, int16_t, ++ svst4_vnum_s16 (p0, x0, -2, z0), ++ svst4_vnum (p0, x0, -2, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_s16_m3: ++** decb x0, all, mul #3 ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s16_m3, svint16x4_t, int16_t, ++ svst4_vnum_s16 (p0, x0, -3, z0), ++ svst4_vnum (p0, x0, -3, z0)) ++ ++/* ++** st4_vnum_s16_m4: ++** st4h {z0\.h - z3\.h}, p0, \[x0, #-4, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s16_m4, svint16x4_t, int16_t, ++ svst4_vnum_s16 (p0, x0, -4, z0), ++ svst4_vnum (p0, x0, -4, z0)) ++ ++/* ++** st4_vnum_s16_m32: ++** st4h {z0\.h - z3\.h}, p0, \[x0, #-32, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s16_m32, svint16x4_t, int16_t, ++ svst4_vnum_s16 (p0, x0, -32, z0), ++ svst4_vnum (p0, x0, -32, z0)) ++ ++/* ++** st4_vnum_s16_m36: ++** [^{]* ++** st4h {z0\.h - z3\.h}, p0, \[x[0-9]+\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s16_m36, svint16x4_t, int16_t, ++ svst4_vnum_s16 (p0, x0, -36, z0), ++ svst4_vnum (p0, x0, -36, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** st4_vnum_s16_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** st4h {z0\.h - z3\.h}, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s16_x1, svint16x4_t, int16_t, ++ svst4_vnum_s16 (p0, x0, x1, z0), ++ svst4_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_s32.c +new file mode 100644 +index 000000000..b8302f10d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_s32.c +@@ -0,0 +1,286 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st4_s32_base: ++** st4w {z0\.s - z3\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_s32_base, svint32x4_t, int32_t, ++ svst4_s32 (p0, x0, z0), ++ svst4 (p0, x0, z0)) ++ ++/* ++** st4_s32_index: ++** st4w {z0\.s - z3\.s}, p0, \[x0, x1, lsl 2\] ++** ret ++*/ ++TEST_STORE (st4_s32_index, svint32x4_t, int32_t, ++ svst4_s32 (p0, x0 + x1, z0), ++ svst4 (p0, x0 + x1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_s32_1: ++** incb x0 ++** st4w {z0\.s - z3\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_s32_1, svint32x4_t, int32_t, ++ svst4_s32 (p0, x0 + svcntw (), z0), ++ svst4 (p0, x0 + svcntw (), z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_s32_2: ++** incb x0, all, mul #2 ++** st4w {z0\.s - z3\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_s32_2, svint32x4_t, int32_t, ++ svst4_s32 (p0, x0 + svcntw () * 2, z0), ++ svst4 (p0, x0 + svcntw () * 2, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_s32_3: ++** incb x0, all, mul #3 ++** st4w {z0\.s - z3\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_s32_3, svint32x4_t, int32_t, ++ svst4_s32 (p0, x0 + svcntw () * 3, z0), ++ svst4 (p0, x0 + svcntw () * 3, z0)) ++ ++/* ++** st4_s32_4: ++** st4w {z0\.s - z3\.s}, p0, \[x0, #4, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_s32_4, svint32x4_t, int32_t, ++ svst4_s32 (p0, x0 + svcntw () * 4, z0), ++ svst4 (p0, x0 + svcntw () * 4, z0)) ++ ++/* ++** st4_s32_28: ++** st4w {z0\.s - z3\.s}, p0, \[x0, #28, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_s32_28, svint32x4_t, int32_t, ++ svst4_s32 (p0, x0 + svcntw () * 28, z0), ++ svst4 (p0, x0 + svcntw () * 28, z0)) ++ ++/* ++** st4_s32_32: ++** [^{]* ++** st4w {z0\.s - z3\.s}, p0, \[x[0-9]+\] ++** ret ++*/ ++TEST_STORE (st4_s32_32, svint32x4_t, int32_t, ++ svst4_s32 (p0, x0 + svcntw () * 32, z0), ++ svst4 (p0, x0 + svcntw () * 32, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_s32_m1: ++** decb x0 ++** st4w {z0\.s - z3\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_s32_m1, svint32x4_t, int32_t, ++ svst4_s32 (p0, x0 - svcntw (), z0), ++ svst4 (p0, x0 - svcntw (), z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_s32_m2: ++** decb x0, all, mul #2 ++** st4w {z0\.s - z3\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_s32_m2, svint32x4_t, int32_t, ++ svst4_s32 (p0, x0 - svcntw () * 2, z0), ++ svst4 (p0, x0 - svcntw () * 2, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_s32_m3: ++** decb x0, all, mul #3 ++** st4w {z0\.s - z3\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_s32_m3, svint32x4_t, int32_t, ++ svst4_s32 (p0, x0 - svcntw () * 3, z0), ++ svst4 (p0, x0 - svcntw () * 3, z0)) ++ ++/* ++** st4_s32_m4: ++** st4w {z0\.s - z3\.s}, p0, \[x0, #-4, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_s32_m4, svint32x4_t, int32_t, ++ svst4_s32 (p0, x0 - svcntw () * 4, z0), ++ svst4 (p0, x0 - svcntw () * 4, z0)) ++ ++/* ++** st4_s32_m32: ++** st4w {z0\.s - z3\.s}, p0, \[x0, #-32, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_s32_m32, svint32x4_t, int32_t, ++ svst4_s32 (p0, x0 - svcntw () * 32, z0), ++ svst4 (p0, x0 - svcntw () * 32, z0)) ++ ++/* ++** st4_s32_m36: ++** [^{]* ++** st4w {z0\.s - z3\.s}, p0, \[x[0-9]+\] ++** ret ++*/ ++TEST_STORE (st4_s32_m36, svint32x4_t, int32_t, ++ svst4_s32 (p0, x0 - svcntw () * 36, z0), ++ svst4 (p0, x0 - svcntw () * 36, z0)) ++ ++/* ++** st4_vnum_s32_0: ++** st4w {z0\.s - z3\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s32_0, svint32x4_t, int32_t, ++ svst4_vnum_s32 (p0, x0, 0, z0), ++ svst4_vnum (p0, x0, 0, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_s32_1: ++** incb x0 ++** st4w {z0\.s - z3\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s32_1, svint32x4_t, int32_t, ++ svst4_vnum_s32 (p0, x0, 1, z0), ++ svst4_vnum (p0, x0, 1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_s32_2: ++** incb x0, all, mul #2 ++** st4w {z0\.s - z3\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s32_2, svint32x4_t, int32_t, ++ svst4_vnum_s32 (p0, x0, 2, z0), ++ svst4_vnum (p0, x0, 2, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_s32_3: ++** incb x0, all, mul #3 ++** st4w {z0\.s - z3\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s32_3, svint32x4_t, int32_t, ++ svst4_vnum_s32 (p0, x0, 3, z0), ++ svst4_vnum (p0, x0, 3, z0)) ++ ++/* ++** st4_vnum_s32_4: ++** st4w {z0\.s - z3\.s}, p0, \[x0, #4, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s32_4, svint32x4_t, int32_t, ++ svst4_vnum_s32 (p0, x0, 4, z0), ++ svst4_vnum (p0, x0, 4, z0)) ++ ++/* ++** st4_vnum_s32_28: ++** st4w {z0\.s - z3\.s}, p0, \[x0, #28, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s32_28, svint32x4_t, int32_t, ++ svst4_vnum_s32 (p0, x0, 28, z0), ++ svst4_vnum (p0, x0, 28, z0)) ++ ++/* ++** st4_vnum_s32_32: ++** [^{]* ++** st4w {z0\.s - z3\.s}, p0, \[x[0-9]+\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s32_32, svint32x4_t, int32_t, ++ svst4_vnum_s32 (p0, x0, 32, z0), ++ svst4_vnum (p0, x0, 32, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_s32_m1: ++** decb x0 ++** st4w {z0\.s - z3\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s32_m1, svint32x4_t, int32_t, ++ svst4_vnum_s32 (p0, x0, -1, z0), ++ svst4_vnum (p0, x0, -1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_s32_m2: ++** decb x0, all, mul #2 ++** st4w {z0\.s - z3\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s32_m2, svint32x4_t, int32_t, ++ svst4_vnum_s32 (p0, x0, -2, z0), ++ svst4_vnum (p0, x0, -2, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_s32_m3: ++** decb x0, all, mul #3 ++** st4w {z0\.s - z3\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s32_m3, svint32x4_t, int32_t, ++ svst4_vnum_s32 (p0, x0, -3, z0), ++ svst4_vnum (p0, x0, -3, z0)) ++ ++/* ++** st4_vnum_s32_m4: ++** st4w {z0\.s - z3\.s}, p0, \[x0, #-4, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s32_m4, svint32x4_t, int32_t, ++ svst4_vnum_s32 (p0, x0, -4, z0), ++ svst4_vnum (p0, x0, -4, z0)) ++ ++/* ++** st4_vnum_s32_m32: ++** st4w {z0\.s - z3\.s}, p0, \[x0, #-32, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s32_m32, svint32x4_t, int32_t, ++ svst4_vnum_s32 (p0, x0, -32, z0), ++ svst4_vnum (p0, x0, -32, z0)) ++ ++/* ++** st4_vnum_s32_m36: ++** [^{]* ++** st4w {z0\.s - z3\.s}, p0, \[x[0-9]+\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s32_m36, svint32x4_t, int32_t, ++ svst4_vnum_s32 (p0, x0, -36, z0), ++ svst4_vnum (p0, x0, -36, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** st4_vnum_s32_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** st4w {z0\.s - z3\.s}, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s32_x1, svint32x4_t, int32_t, ++ svst4_vnum_s32 (p0, x0, x1, z0), ++ svst4_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_s64.c +new file mode 100644 +index 000000000..bf9cdf5e0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_s64.c +@@ -0,0 +1,286 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st4_s64_base: ++** st4d {z0\.d - z3\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_s64_base, svint64x4_t, int64_t, ++ svst4_s64 (p0, x0, z0), ++ svst4 (p0, x0, z0)) ++ ++/* ++** st4_s64_index: ++** st4d {z0\.d - z3\.d}, p0, \[x0, x1, lsl 3\] ++** ret ++*/ ++TEST_STORE (st4_s64_index, svint64x4_t, int64_t, ++ svst4_s64 (p0, x0 + x1, z0), ++ svst4 (p0, x0 + x1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_s64_1: ++** incb x0 ++** st4d {z0\.d - z3\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_s64_1, svint64x4_t, int64_t, ++ svst4_s64 (p0, x0 + svcntd (), z0), ++ svst4 (p0, x0 + svcntd (), z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_s64_2: ++** incb x0, all, mul #2 ++** st4d {z0\.d - z3\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_s64_2, svint64x4_t, int64_t, ++ svst4_s64 (p0, x0 + svcntd () * 2, z0), ++ svst4 (p0, x0 + svcntd () * 2, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_s64_3: ++** incb x0, all, mul #3 ++** st4d {z0\.d - z3\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_s64_3, svint64x4_t, int64_t, ++ svst4_s64 (p0, x0 + svcntd () * 3, z0), ++ svst4 (p0, x0 + svcntd () * 3, z0)) ++ ++/* ++** st4_s64_4: ++** st4d {z0\.d - z3\.d}, p0, \[x0, #4, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_s64_4, svint64x4_t, int64_t, ++ svst4_s64 (p0, x0 + svcntd () * 4, z0), ++ svst4 (p0, x0 + svcntd () * 4, z0)) ++ ++/* ++** st4_s64_28: ++** st4d {z0\.d - z3\.d}, p0, \[x0, #28, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_s64_28, svint64x4_t, int64_t, ++ svst4_s64 (p0, x0 + svcntd () * 28, z0), ++ svst4 (p0, x0 + svcntd () * 28, z0)) ++ ++/* ++** st4_s64_32: ++** [^{]* ++** st4d {z0\.d - z3\.d}, p0, \[x[0-9]+\] ++** ret ++*/ ++TEST_STORE (st4_s64_32, svint64x4_t, int64_t, ++ svst4_s64 (p0, x0 + svcntd () * 32, z0), ++ svst4 (p0, x0 + svcntd () * 32, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_s64_m1: ++** decb x0 ++** st4d {z0\.d - z3\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_s64_m1, svint64x4_t, int64_t, ++ svst4_s64 (p0, x0 - svcntd (), z0), ++ svst4 (p0, x0 - svcntd (), z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_s64_m2: ++** decb x0, all, mul #2 ++** st4d {z0\.d - z3\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_s64_m2, svint64x4_t, int64_t, ++ svst4_s64 (p0, x0 - svcntd () * 2, z0), ++ svst4 (p0, x0 - svcntd () * 2, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_s64_m3: ++** decb x0, all, mul #3 ++** st4d {z0\.d - z3\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_s64_m3, svint64x4_t, int64_t, ++ svst4_s64 (p0, x0 - svcntd () * 3, z0), ++ svst4 (p0, x0 - svcntd () * 3, z0)) ++ ++/* ++** st4_s64_m4: ++** st4d {z0\.d - z3\.d}, p0, \[x0, #-4, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_s64_m4, svint64x4_t, int64_t, ++ svst4_s64 (p0, x0 - svcntd () * 4, z0), ++ svst4 (p0, x0 - svcntd () * 4, z0)) ++ ++/* ++** st4_s64_m32: ++** st4d {z0\.d - z3\.d}, p0, \[x0, #-32, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_s64_m32, svint64x4_t, int64_t, ++ svst4_s64 (p0, x0 - svcntd () * 32, z0), ++ svst4 (p0, x0 - svcntd () * 32, z0)) ++ ++/* ++** st4_s64_m36: ++** [^{]* ++** st4d {z0\.d - z3\.d}, p0, \[x[0-9]+\] ++** ret ++*/ ++TEST_STORE (st4_s64_m36, svint64x4_t, int64_t, ++ svst4_s64 (p0, x0 - svcntd () * 36, z0), ++ svst4 (p0, x0 - svcntd () * 36, z0)) ++ ++/* ++** st4_vnum_s64_0: ++** st4d {z0\.d - z3\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s64_0, svint64x4_t, int64_t, ++ svst4_vnum_s64 (p0, x0, 0, z0), ++ svst4_vnum (p0, x0, 0, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_s64_1: ++** incb x0 ++** st4d {z0\.d - z3\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s64_1, svint64x4_t, int64_t, ++ svst4_vnum_s64 (p0, x0, 1, z0), ++ svst4_vnum (p0, x0, 1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_s64_2: ++** incb x0, all, mul #2 ++** st4d {z0\.d - z3\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s64_2, svint64x4_t, int64_t, ++ svst4_vnum_s64 (p0, x0, 2, z0), ++ svst4_vnum (p0, x0, 2, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_s64_3: ++** incb x0, all, mul #3 ++** st4d {z0\.d - z3\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s64_3, svint64x4_t, int64_t, ++ svst4_vnum_s64 (p0, x0, 3, z0), ++ svst4_vnum (p0, x0, 3, z0)) ++ ++/* ++** st4_vnum_s64_4: ++** st4d {z0\.d - z3\.d}, p0, \[x0, #4, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s64_4, svint64x4_t, int64_t, ++ svst4_vnum_s64 (p0, x0, 4, z0), ++ svst4_vnum (p0, x0, 4, z0)) ++ ++/* ++** st4_vnum_s64_28: ++** st4d {z0\.d - z3\.d}, p0, \[x0, #28, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s64_28, svint64x4_t, int64_t, ++ svst4_vnum_s64 (p0, x0, 28, z0), ++ svst4_vnum (p0, x0, 28, z0)) ++ ++/* ++** st4_vnum_s64_32: ++** [^{]* ++** st4d {z0\.d - z3\.d}, p0, \[x[0-9]+\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s64_32, svint64x4_t, int64_t, ++ svst4_vnum_s64 (p0, x0, 32, z0), ++ svst4_vnum (p0, x0, 32, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_s64_m1: ++** decb x0 ++** st4d {z0\.d - z3\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s64_m1, svint64x4_t, int64_t, ++ svst4_vnum_s64 (p0, x0, -1, z0), ++ svst4_vnum (p0, x0, -1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_s64_m2: ++** decb x0, all, mul #2 ++** st4d {z0\.d - z3\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s64_m2, svint64x4_t, int64_t, ++ svst4_vnum_s64 (p0, x0, -2, z0), ++ svst4_vnum (p0, x0, -2, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_s64_m3: ++** decb x0, all, mul #3 ++** st4d {z0\.d - z3\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s64_m3, svint64x4_t, int64_t, ++ svst4_vnum_s64 (p0, x0, -3, z0), ++ svst4_vnum (p0, x0, -3, z0)) ++ ++/* ++** st4_vnum_s64_m4: ++** st4d {z0\.d - z3\.d}, p0, \[x0, #-4, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s64_m4, svint64x4_t, int64_t, ++ svst4_vnum_s64 (p0, x0, -4, z0), ++ svst4_vnum (p0, x0, -4, z0)) ++ ++/* ++** st4_vnum_s64_m32: ++** st4d {z0\.d - z3\.d}, p0, \[x0, #-32, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s64_m32, svint64x4_t, int64_t, ++ svst4_vnum_s64 (p0, x0, -32, z0), ++ svst4_vnum (p0, x0, -32, z0)) ++ ++/* ++** st4_vnum_s64_m36: ++** [^{]* ++** st4d {z0\.d - z3\.d}, p0, \[x[0-9]+\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s64_m36, svint64x4_t, int64_t, ++ svst4_vnum_s64 (p0, x0, -36, z0), ++ svst4_vnum (p0, x0, -36, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** st4_vnum_s64_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** st4d {z0\.d - z3\.d}, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s64_x1, svint64x4_t, int64_t, ++ svst4_vnum_s64 (p0, x0, x1, z0), ++ svst4_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_s8.c +new file mode 100644 +index 000000000..1eb0bf131 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_s8.c +@@ -0,0 +1,290 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st4_s8_base: ++** st4b {z0\.b - z3\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_s8_base, svint8x4_t, int8_t, ++ svst4_s8 (p0, x0, z0), ++ svst4 (p0, x0, z0)) ++ ++/* ++** st4_s8_index: ++** st4b {z0\.b - z3\.b}, p0, \[x0, x1\] ++** ret ++*/ ++TEST_STORE (st4_s8_index, svint8x4_t, int8_t, ++ svst4_s8 (p0, x0 + x1, z0), ++ svst4 (p0, x0 + x1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_s8_1: ++** incb x0 ++** st4b {z0\.b - z3\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_s8_1, svint8x4_t, int8_t, ++ svst4_s8 (p0, x0 + svcntb (), z0), ++ svst4 (p0, x0 + svcntb (), z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_s8_2: ++** incb x0, all, mul #2 ++** st4b {z0\.b - z3\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_s8_2, svint8x4_t, int8_t, ++ svst4_s8 (p0, x0 + svcntb () * 2, z0), ++ svst4 (p0, x0 + svcntb () * 2, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_s8_3: ++** incb x0, all, mul #3 ++** st4b {z0\.b - z3\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_s8_3, svint8x4_t, int8_t, ++ svst4_s8 (p0, x0 + svcntb () * 3, z0), ++ svst4 (p0, x0 + svcntb () * 3, z0)) ++ ++/* ++** st4_s8_4: ++** st4b {z0\.b - z3\.b}, p0, \[x0, #4, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_s8_4, svint8x4_t, int8_t, ++ svst4_s8 (p0, x0 + svcntb () * 4, z0), ++ svst4 (p0, x0 + svcntb () * 4, z0)) ++ ++/* ++** st4_s8_28: ++** st4b {z0\.b - z3\.b}, p0, \[x0, #28, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_s8_28, svint8x4_t, int8_t, ++ svst4_s8 (p0, x0 + svcntb () * 28, z0), ++ svst4 (p0, x0 + svcntb () * 28, z0)) ++ ++/* ++** st4_s8_32: ++** [^{]* ++** st4b {z0\.b - z3\.b}, p0, \[x[0-9]+\] ++** ret ++*/ ++TEST_STORE (st4_s8_32, svint8x4_t, int8_t, ++ svst4_s8 (p0, x0 + svcntb () * 32, z0), ++ svst4 (p0, x0 + svcntb () * 32, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_s8_m1: ++** decb x0 ++** st4b {z0\.b - z3\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_s8_m1, svint8x4_t, int8_t, ++ svst4_s8 (p0, x0 - svcntb (), z0), ++ svst4 (p0, x0 - svcntb (), z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_s8_m2: ++** decb x0, all, mul #2 ++** st4b {z0\.b - z3\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_s8_m2, svint8x4_t, int8_t, ++ svst4_s8 (p0, x0 - svcntb () * 2, z0), ++ svst4 (p0, x0 - svcntb () * 2, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_s8_m3: ++** decb x0, all, mul #3 ++** st4b {z0\.b - z3\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_s8_m3, svint8x4_t, int8_t, ++ svst4_s8 (p0, x0 - svcntb () * 3, z0), ++ svst4 (p0, x0 - svcntb () * 3, z0)) ++ ++/* ++** st4_s8_m4: ++** st4b {z0\.b - z3\.b}, p0, \[x0, #-4, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_s8_m4, svint8x4_t, int8_t, ++ svst4_s8 (p0, x0 - svcntb () * 4, z0), ++ svst4 (p0, x0 - svcntb () * 4, z0)) ++ ++/* ++** st4_s8_m32: ++** st4b {z0\.b - z3\.b}, p0, \[x0, #-32, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_s8_m32, svint8x4_t, int8_t, ++ svst4_s8 (p0, x0 - svcntb () * 32, z0), ++ svst4 (p0, x0 - svcntb () * 32, z0)) ++ ++/* ++** st4_s8_m36: ++** [^{]* ++** st4b {z0\.b - z3\.b}, p0, \[x[0-9]+\] ++** ret ++*/ ++TEST_STORE (st4_s8_m36, svint8x4_t, int8_t, ++ svst4_s8 (p0, x0 - svcntb () * 36, z0), ++ svst4 (p0, x0 - svcntb () * 36, z0)) ++ ++/* ++** st4_vnum_s8_0: ++** st4b {z0\.b - z3\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s8_0, svint8x4_t, int8_t, ++ svst4_vnum_s8 (p0, x0, 0, z0), ++ svst4_vnum (p0, x0, 0, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_s8_1: ++** incb x0 ++** st4b {z0\.b - z3\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s8_1, svint8x4_t, int8_t, ++ svst4_vnum_s8 (p0, x0, 1, z0), ++ svst4_vnum (p0, x0, 1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_s8_2: ++** incb x0, all, mul #2 ++** st4b {z0\.b - z3\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s8_2, svint8x4_t, int8_t, ++ svst4_vnum_s8 (p0, x0, 2, z0), ++ svst4_vnum (p0, x0, 2, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_s8_3: ++** incb x0, all, mul #3 ++** st4b {z0\.b - z3\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s8_3, svint8x4_t, int8_t, ++ svst4_vnum_s8 (p0, x0, 3, z0), ++ svst4_vnum (p0, x0, 3, z0)) ++ ++/* ++** st4_vnum_s8_4: ++** st4b {z0\.b - z3\.b}, p0, \[x0, #4, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s8_4, svint8x4_t, int8_t, ++ svst4_vnum_s8 (p0, x0, 4, z0), ++ svst4_vnum (p0, x0, 4, z0)) ++ ++/* ++** st4_vnum_s8_28: ++** st4b {z0\.b - z3\.b}, p0, \[x0, #28, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s8_28, svint8x4_t, int8_t, ++ svst4_vnum_s8 (p0, x0, 28, z0), ++ svst4_vnum (p0, x0, 28, z0)) ++ ++/* ++** st4_vnum_s8_32: ++** [^{]* ++** st4b {z0\.b - z3\.b}, p0, \[x[0-9]+\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s8_32, svint8x4_t, int8_t, ++ svst4_vnum_s8 (p0, x0, 32, z0), ++ svst4_vnum (p0, x0, 32, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_s8_m1: ++** decb x0 ++** st4b {z0\.b - z3\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s8_m1, svint8x4_t, int8_t, ++ svst4_vnum_s8 (p0, x0, -1, z0), ++ svst4_vnum (p0, x0, -1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_s8_m2: ++** decb x0, all, mul #2 ++** st4b {z0\.b - z3\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s8_m2, svint8x4_t, int8_t, ++ svst4_vnum_s8 (p0, x0, -2, z0), ++ svst4_vnum (p0, x0, -2, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_s8_m3: ++** decb x0, all, mul #3 ++** st4b {z0\.b - z3\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s8_m3, svint8x4_t, int8_t, ++ svst4_vnum_s8 (p0, x0, -3, z0), ++ svst4_vnum (p0, x0, -3, z0)) ++ ++/* ++** st4_vnum_s8_m4: ++** st4b {z0\.b - z3\.b}, p0, \[x0, #-4, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s8_m4, svint8x4_t, int8_t, ++ svst4_vnum_s8 (p0, x0, -4, z0), ++ svst4_vnum (p0, x0, -4, z0)) ++ ++/* ++** st4_vnum_s8_m32: ++** st4b {z0\.b - z3\.b}, p0, \[x0, #-32, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s8_m32, svint8x4_t, int8_t, ++ svst4_vnum_s8 (p0, x0, -32, z0), ++ svst4_vnum (p0, x0, -32, z0)) ++ ++/* ++** st4_vnum_s8_m36: ++** [^{]* ++** st4b {z0\.b - z3\.b}, p0, \[x[0-9]+\] ++** ret ++*/ ++TEST_STORE (st4_vnum_s8_m36, svint8x4_t, int8_t, ++ svst4_vnum_s8 (p0, x0, -36, z0), ++ svst4_vnum (p0, x0, -36, z0)) ++ ++/* ++** st4_vnum_s8_x1: ++** cntb (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** st4b {z0\.b - z3\.b}, p0, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** st4b {z0\.b - z3\.b}, p0, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_STORE (st4_vnum_s8_x1, svint8x4_t, int8_t, ++ svst4_vnum_s8 (p0, x0, x1, z0), ++ svst4_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_u16.c +new file mode 100644 +index 000000000..5272c7f61 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_u16.c +@@ -0,0 +1,286 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st4_u16_base: ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_u16_base, svuint16x4_t, uint16_t, ++ svst4_u16 (p0, x0, z0), ++ svst4 (p0, x0, z0)) ++ ++/* ++** st4_u16_index: ++** st4h {z0\.h - z3\.h}, p0, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_STORE (st4_u16_index, svuint16x4_t, uint16_t, ++ svst4_u16 (p0, x0 + x1, z0), ++ svst4 (p0, x0 + x1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_u16_1: ++** incb x0 ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_u16_1, svuint16x4_t, uint16_t, ++ svst4_u16 (p0, x0 + svcnth (), z0), ++ svst4 (p0, x0 + svcnth (), z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_u16_2: ++** incb x0, all, mul #2 ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_u16_2, svuint16x4_t, uint16_t, ++ svst4_u16 (p0, x0 + svcnth () * 2, z0), ++ svst4 (p0, x0 + svcnth () * 2, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_u16_3: ++** incb x0, all, mul #3 ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_u16_3, svuint16x4_t, uint16_t, ++ svst4_u16 (p0, x0 + svcnth () * 3, z0), ++ svst4 (p0, x0 + svcnth () * 3, z0)) ++ ++/* ++** st4_u16_4: ++** st4h {z0\.h - z3\.h}, p0, \[x0, #4, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_u16_4, svuint16x4_t, uint16_t, ++ svst4_u16 (p0, x0 + svcnth () * 4, z0), ++ svst4 (p0, x0 + svcnth () * 4, z0)) ++ ++/* ++** st4_u16_28: ++** st4h {z0\.h - z3\.h}, p0, \[x0, #28, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_u16_28, svuint16x4_t, uint16_t, ++ svst4_u16 (p0, x0 + svcnth () * 28, z0), ++ svst4 (p0, x0 + svcnth () * 28, z0)) ++ ++/* ++** st4_u16_32: ++** [^{]* ++** st4h {z0\.h - z3\.h}, p0, \[x[0-9]+\] ++** ret ++*/ ++TEST_STORE (st4_u16_32, svuint16x4_t, uint16_t, ++ svst4_u16 (p0, x0 + svcnth () * 32, z0), ++ svst4 (p0, x0 + svcnth () * 32, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_u16_m1: ++** decb x0 ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_u16_m1, svuint16x4_t, uint16_t, ++ svst4_u16 (p0, x0 - svcnth (), z0), ++ svst4 (p0, x0 - svcnth (), z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_u16_m2: ++** decb x0, all, mul #2 ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_u16_m2, svuint16x4_t, uint16_t, ++ svst4_u16 (p0, x0 - svcnth () * 2, z0), ++ svst4 (p0, x0 - svcnth () * 2, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_u16_m3: ++** decb x0, all, mul #3 ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_u16_m3, svuint16x4_t, uint16_t, ++ svst4_u16 (p0, x0 - svcnth () * 3, z0), ++ svst4 (p0, x0 - svcnth () * 3, z0)) ++ ++/* ++** st4_u16_m4: ++** st4h {z0\.h - z3\.h}, p0, \[x0, #-4, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_u16_m4, svuint16x4_t, uint16_t, ++ svst4_u16 (p0, x0 - svcnth () * 4, z0), ++ svst4 (p0, x0 - svcnth () * 4, z0)) ++ ++/* ++** st4_u16_m32: ++** st4h {z0\.h - z3\.h}, p0, \[x0, #-32, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_u16_m32, svuint16x4_t, uint16_t, ++ svst4_u16 (p0, x0 - svcnth () * 32, z0), ++ svst4 (p0, x0 - svcnth () * 32, z0)) ++ ++/* ++** st4_u16_m36: ++** [^{]* ++** st4h {z0\.h - z3\.h}, p0, \[x[0-9]+\] ++** ret ++*/ ++TEST_STORE (st4_u16_m36, svuint16x4_t, uint16_t, ++ svst4_u16 (p0, x0 - svcnth () * 36, z0), ++ svst4 (p0, x0 - svcnth () * 36, z0)) ++ ++/* ++** st4_vnum_u16_0: ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u16_0, svuint16x4_t, uint16_t, ++ svst4_vnum_u16 (p0, x0, 0, z0), ++ svst4_vnum (p0, x0, 0, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_u16_1: ++** incb x0 ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u16_1, svuint16x4_t, uint16_t, ++ svst4_vnum_u16 (p0, x0, 1, z0), ++ svst4_vnum (p0, x0, 1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_u16_2: ++** incb x0, all, mul #2 ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u16_2, svuint16x4_t, uint16_t, ++ svst4_vnum_u16 (p0, x0, 2, z0), ++ svst4_vnum (p0, x0, 2, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_u16_3: ++** incb x0, all, mul #3 ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u16_3, svuint16x4_t, uint16_t, ++ svst4_vnum_u16 (p0, x0, 3, z0), ++ svst4_vnum (p0, x0, 3, z0)) ++ ++/* ++** st4_vnum_u16_4: ++** st4h {z0\.h - z3\.h}, p0, \[x0, #4, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u16_4, svuint16x4_t, uint16_t, ++ svst4_vnum_u16 (p0, x0, 4, z0), ++ svst4_vnum (p0, x0, 4, z0)) ++ ++/* ++** st4_vnum_u16_28: ++** st4h {z0\.h - z3\.h}, p0, \[x0, #28, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u16_28, svuint16x4_t, uint16_t, ++ svst4_vnum_u16 (p0, x0, 28, z0), ++ svst4_vnum (p0, x0, 28, z0)) ++ ++/* ++** st4_vnum_u16_32: ++** [^{]* ++** st4h {z0\.h - z3\.h}, p0, \[x[0-9]+\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u16_32, svuint16x4_t, uint16_t, ++ svst4_vnum_u16 (p0, x0, 32, z0), ++ svst4_vnum (p0, x0, 32, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_u16_m1: ++** decb x0 ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u16_m1, svuint16x4_t, uint16_t, ++ svst4_vnum_u16 (p0, x0, -1, z0), ++ svst4_vnum (p0, x0, -1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_u16_m2: ++** decb x0, all, mul #2 ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u16_m2, svuint16x4_t, uint16_t, ++ svst4_vnum_u16 (p0, x0, -2, z0), ++ svst4_vnum (p0, x0, -2, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_u16_m3: ++** decb x0, all, mul #3 ++** st4h {z0\.h - z3\.h}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u16_m3, svuint16x4_t, uint16_t, ++ svst4_vnum_u16 (p0, x0, -3, z0), ++ svst4_vnum (p0, x0, -3, z0)) ++ ++/* ++** st4_vnum_u16_m4: ++** st4h {z0\.h - z3\.h}, p0, \[x0, #-4, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u16_m4, svuint16x4_t, uint16_t, ++ svst4_vnum_u16 (p0, x0, -4, z0), ++ svst4_vnum (p0, x0, -4, z0)) ++ ++/* ++** st4_vnum_u16_m32: ++** st4h {z0\.h - z3\.h}, p0, \[x0, #-32, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u16_m32, svuint16x4_t, uint16_t, ++ svst4_vnum_u16 (p0, x0, -32, z0), ++ svst4_vnum (p0, x0, -32, z0)) ++ ++/* ++** st4_vnum_u16_m36: ++** [^{]* ++** st4h {z0\.h - z3\.h}, p0, \[x[0-9]+\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u16_m36, svuint16x4_t, uint16_t, ++ svst4_vnum_u16 (p0, x0, -36, z0), ++ svst4_vnum (p0, x0, -36, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** st4_vnum_u16_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** st4h {z0\.h - z3\.h}, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u16_x1, svuint16x4_t, uint16_t, ++ svst4_vnum_u16 (p0, x0, x1, z0), ++ svst4_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_u32.c +new file mode 100644 +index 000000000..8b9b322e5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_u32.c +@@ -0,0 +1,286 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st4_u32_base: ++** st4w {z0\.s - z3\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_u32_base, svuint32x4_t, uint32_t, ++ svst4_u32 (p0, x0, z0), ++ svst4 (p0, x0, z0)) ++ ++/* ++** st4_u32_index: ++** st4w {z0\.s - z3\.s}, p0, \[x0, x1, lsl 2\] ++** ret ++*/ ++TEST_STORE (st4_u32_index, svuint32x4_t, uint32_t, ++ svst4_u32 (p0, x0 + x1, z0), ++ svst4 (p0, x0 + x1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_u32_1: ++** incb x0 ++** st4w {z0\.s - z3\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_u32_1, svuint32x4_t, uint32_t, ++ svst4_u32 (p0, x0 + svcntw (), z0), ++ svst4 (p0, x0 + svcntw (), z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_u32_2: ++** incb x0, all, mul #2 ++** st4w {z0\.s - z3\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_u32_2, svuint32x4_t, uint32_t, ++ svst4_u32 (p0, x0 + svcntw () * 2, z0), ++ svst4 (p0, x0 + svcntw () * 2, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_u32_3: ++** incb x0, all, mul #3 ++** st4w {z0\.s - z3\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_u32_3, svuint32x4_t, uint32_t, ++ svst4_u32 (p0, x0 + svcntw () * 3, z0), ++ svst4 (p0, x0 + svcntw () * 3, z0)) ++ ++/* ++** st4_u32_4: ++** st4w {z0\.s - z3\.s}, p0, \[x0, #4, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_u32_4, svuint32x4_t, uint32_t, ++ svst4_u32 (p0, x0 + svcntw () * 4, z0), ++ svst4 (p0, x0 + svcntw () * 4, z0)) ++ ++/* ++** st4_u32_28: ++** st4w {z0\.s - z3\.s}, p0, \[x0, #28, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_u32_28, svuint32x4_t, uint32_t, ++ svst4_u32 (p0, x0 + svcntw () * 28, z0), ++ svst4 (p0, x0 + svcntw () * 28, z0)) ++ ++/* ++** st4_u32_32: ++** [^{]* ++** st4w {z0\.s - z3\.s}, p0, \[x[0-9]+\] ++** ret ++*/ ++TEST_STORE (st4_u32_32, svuint32x4_t, uint32_t, ++ svst4_u32 (p0, x0 + svcntw () * 32, z0), ++ svst4 (p0, x0 + svcntw () * 32, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_u32_m1: ++** decb x0 ++** st4w {z0\.s - z3\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_u32_m1, svuint32x4_t, uint32_t, ++ svst4_u32 (p0, x0 - svcntw (), z0), ++ svst4 (p0, x0 - svcntw (), z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_u32_m2: ++** decb x0, all, mul #2 ++** st4w {z0\.s - z3\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_u32_m2, svuint32x4_t, uint32_t, ++ svst4_u32 (p0, x0 - svcntw () * 2, z0), ++ svst4 (p0, x0 - svcntw () * 2, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_u32_m3: ++** decb x0, all, mul #3 ++** st4w {z0\.s - z3\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_u32_m3, svuint32x4_t, uint32_t, ++ svst4_u32 (p0, x0 - svcntw () * 3, z0), ++ svst4 (p0, x0 - svcntw () * 3, z0)) ++ ++/* ++** st4_u32_m4: ++** st4w {z0\.s - z3\.s}, p0, \[x0, #-4, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_u32_m4, svuint32x4_t, uint32_t, ++ svst4_u32 (p0, x0 - svcntw () * 4, z0), ++ svst4 (p0, x0 - svcntw () * 4, z0)) ++ ++/* ++** st4_u32_m32: ++** st4w {z0\.s - z3\.s}, p0, \[x0, #-32, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_u32_m32, svuint32x4_t, uint32_t, ++ svst4_u32 (p0, x0 - svcntw () * 32, z0), ++ svst4 (p0, x0 - svcntw () * 32, z0)) ++ ++/* ++** st4_u32_m36: ++** [^{]* ++** st4w {z0\.s - z3\.s}, p0, \[x[0-9]+\] ++** ret ++*/ ++TEST_STORE (st4_u32_m36, svuint32x4_t, uint32_t, ++ svst4_u32 (p0, x0 - svcntw () * 36, z0), ++ svst4 (p0, x0 - svcntw () * 36, z0)) ++ ++/* ++** st4_vnum_u32_0: ++** st4w {z0\.s - z3\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u32_0, svuint32x4_t, uint32_t, ++ svst4_vnum_u32 (p0, x0, 0, z0), ++ svst4_vnum (p0, x0, 0, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_u32_1: ++** incb x0 ++** st4w {z0\.s - z3\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u32_1, svuint32x4_t, uint32_t, ++ svst4_vnum_u32 (p0, x0, 1, z0), ++ svst4_vnum (p0, x0, 1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_u32_2: ++** incb x0, all, mul #2 ++** st4w {z0\.s - z3\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u32_2, svuint32x4_t, uint32_t, ++ svst4_vnum_u32 (p0, x0, 2, z0), ++ svst4_vnum (p0, x0, 2, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_u32_3: ++** incb x0, all, mul #3 ++** st4w {z0\.s - z3\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u32_3, svuint32x4_t, uint32_t, ++ svst4_vnum_u32 (p0, x0, 3, z0), ++ svst4_vnum (p0, x0, 3, z0)) ++ ++/* ++** st4_vnum_u32_4: ++** st4w {z0\.s - z3\.s}, p0, \[x0, #4, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u32_4, svuint32x4_t, uint32_t, ++ svst4_vnum_u32 (p0, x0, 4, z0), ++ svst4_vnum (p0, x0, 4, z0)) ++ ++/* ++** st4_vnum_u32_28: ++** st4w {z0\.s - z3\.s}, p0, \[x0, #28, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u32_28, svuint32x4_t, uint32_t, ++ svst4_vnum_u32 (p0, x0, 28, z0), ++ svst4_vnum (p0, x0, 28, z0)) ++ ++/* ++** st4_vnum_u32_32: ++** [^{]* ++** st4w {z0\.s - z3\.s}, p0, \[x[0-9]+\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u32_32, svuint32x4_t, uint32_t, ++ svst4_vnum_u32 (p0, x0, 32, z0), ++ svst4_vnum (p0, x0, 32, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_u32_m1: ++** decb x0 ++** st4w {z0\.s - z3\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u32_m1, svuint32x4_t, uint32_t, ++ svst4_vnum_u32 (p0, x0, -1, z0), ++ svst4_vnum (p0, x0, -1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_u32_m2: ++** decb x0, all, mul #2 ++** st4w {z0\.s - z3\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u32_m2, svuint32x4_t, uint32_t, ++ svst4_vnum_u32 (p0, x0, -2, z0), ++ svst4_vnum (p0, x0, -2, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_u32_m3: ++** decb x0, all, mul #3 ++** st4w {z0\.s - z3\.s}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u32_m3, svuint32x4_t, uint32_t, ++ svst4_vnum_u32 (p0, x0, -3, z0), ++ svst4_vnum (p0, x0, -3, z0)) ++ ++/* ++** st4_vnum_u32_m4: ++** st4w {z0\.s - z3\.s}, p0, \[x0, #-4, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u32_m4, svuint32x4_t, uint32_t, ++ svst4_vnum_u32 (p0, x0, -4, z0), ++ svst4_vnum (p0, x0, -4, z0)) ++ ++/* ++** st4_vnum_u32_m32: ++** st4w {z0\.s - z3\.s}, p0, \[x0, #-32, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u32_m32, svuint32x4_t, uint32_t, ++ svst4_vnum_u32 (p0, x0, -32, z0), ++ svst4_vnum (p0, x0, -32, z0)) ++ ++/* ++** st4_vnum_u32_m36: ++** [^{]* ++** st4w {z0\.s - z3\.s}, p0, \[x[0-9]+\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u32_m36, svuint32x4_t, uint32_t, ++ svst4_vnum_u32 (p0, x0, -36, z0), ++ svst4_vnum (p0, x0, -36, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** st4_vnum_u32_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** st4w {z0\.s - z3\.s}, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u32_x1, svuint32x4_t, uint32_t, ++ svst4_vnum_u32 (p0, x0, x1, z0), ++ svst4_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_u64.c +new file mode 100644 +index 000000000..53b78f5ba +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_u64.c +@@ -0,0 +1,286 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st4_u64_base: ++** st4d {z0\.d - z3\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_u64_base, svuint64x4_t, uint64_t, ++ svst4_u64 (p0, x0, z0), ++ svst4 (p0, x0, z0)) ++ ++/* ++** st4_u64_index: ++** st4d {z0\.d - z3\.d}, p0, \[x0, x1, lsl 3\] ++** ret ++*/ ++TEST_STORE (st4_u64_index, svuint64x4_t, uint64_t, ++ svst4_u64 (p0, x0 + x1, z0), ++ svst4 (p0, x0 + x1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_u64_1: ++** incb x0 ++** st4d {z0\.d - z3\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_u64_1, svuint64x4_t, uint64_t, ++ svst4_u64 (p0, x0 + svcntd (), z0), ++ svst4 (p0, x0 + svcntd (), z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_u64_2: ++** incb x0, all, mul #2 ++** st4d {z0\.d - z3\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_u64_2, svuint64x4_t, uint64_t, ++ svst4_u64 (p0, x0 + svcntd () * 2, z0), ++ svst4 (p0, x0 + svcntd () * 2, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_u64_3: ++** incb x0, all, mul #3 ++** st4d {z0\.d - z3\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_u64_3, svuint64x4_t, uint64_t, ++ svst4_u64 (p0, x0 + svcntd () * 3, z0), ++ svst4 (p0, x0 + svcntd () * 3, z0)) ++ ++/* ++** st4_u64_4: ++** st4d {z0\.d - z3\.d}, p0, \[x0, #4, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_u64_4, svuint64x4_t, uint64_t, ++ svst4_u64 (p0, x0 + svcntd () * 4, z0), ++ svst4 (p0, x0 + svcntd () * 4, z0)) ++ ++/* ++** st4_u64_28: ++** st4d {z0\.d - z3\.d}, p0, \[x0, #28, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_u64_28, svuint64x4_t, uint64_t, ++ svst4_u64 (p0, x0 + svcntd () * 28, z0), ++ svst4 (p0, x0 + svcntd () * 28, z0)) ++ ++/* ++** st4_u64_32: ++** [^{]* ++** st4d {z0\.d - z3\.d}, p0, \[x[0-9]+\] ++** ret ++*/ ++TEST_STORE (st4_u64_32, svuint64x4_t, uint64_t, ++ svst4_u64 (p0, x0 + svcntd () * 32, z0), ++ svst4 (p0, x0 + svcntd () * 32, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_u64_m1: ++** decb x0 ++** st4d {z0\.d - z3\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_u64_m1, svuint64x4_t, uint64_t, ++ svst4_u64 (p0, x0 - svcntd (), z0), ++ svst4 (p0, x0 - svcntd (), z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_u64_m2: ++** decb x0, all, mul #2 ++** st4d {z0\.d - z3\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_u64_m2, svuint64x4_t, uint64_t, ++ svst4_u64 (p0, x0 - svcntd () * 2, z0), ++ svst4 (p0, x0 - svcntd () * 2, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_u64_m3: ++** decb x0, all, mul #3 ++** st4d {z0\.d - z3\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_u64_m3, svuint64x4_t, uint64_t, ++ svst4_u64 (p0, x0 - svcntd () * 3, z0), ++ svst4 (p0, x0 - svcntd () * 3, z0)) ++ ++/* ++** st4_u64_m4: ++** st4d {z0\.d - z3\.d}, p0, \[x0, #-4, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_u64_m4, svuint64x4_t, uint64_t, ++ svst4_u64 (p0, x0 - svcntd () * 4, z0), ++ svst4 (p0, x0 - svcntd () * 4, z0)) ++ ++/* ++** st4_u64_m32: ++** st4d {z0\.d - z3\.d}, p0, \[x0, #-32, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_u64_m32, svuint64x4_t, uint64_t, ++ svst4_u64 (p0, x0 - svcntd () * 32, z0), ++ svst4 (p0, x0 - svcntd () * 32, z0)) ++ ++/* ++** st4_u64_m36: ++** [^{]* ++** st4d {z0\.d - z3\.d}, p0, \[x[0-9]+\] ++** ret ++*/ ++TEST_STORE (st4_u64_m36, svuint64x4_t, uint64_t, ++ svst4_u64 (p0, x0 - svcntd () * 36, z0), ++ svst4 (p0, x0 - svcntd () * 36, z0)) ++ ++/* ++** st4_vnum_u64_0: ++** st4d {z0\.d - z3\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u64_0, svuint64x4_t, uint64_t, ++ svst4_vnum_u64 (p0, x0, 0, z0), ++ svst4_vnum (p0, x0, 0, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_u64_1: ++** incb x0 ++** st4d {z0\.d - z3\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u64_1, svuint64x4_t, uint64_t, ++ svst4_vnum_u64 (p0, x0, 1, z0), ++ svst4_vnum (p0, x0, 1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_u64_2: ++** incb x0, all, mul #2 ++** st4d {z0\.d - z3\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u64_2, svuint64x4_t, uint64_t, ++ svst4_vnum_u64 (p0, x0, 2, z0), ++ svst4_vnum (p0, x0, 2, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_u64_3: ++** incb x0, all, mul #3 ++** st4d {z0\.d - z3\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u64_3, svuint64x4_t, uint64_t, ++ svst4_vnum_u64 (p0, x0, 3, z0), ++ svst4_vnum (p0, x0, 3, z0)) ++ ++/* ++** st4_vnum_u64_4: ++** st4d {z0\.d - z3\.d}, p0, \[x0, #4, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u64_4, svuint64x4_t, uint64_t, ++ svst4_vnum_u64 (p0, x0, 4, z0), ++ svst4_vnum (p0, x0, 4, z0)) ++ ++/* ++** st4_vnum_u64_28: ++** st4d {z0\.d - z3\.d}, p0, \[x0, #28, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u64_28, svuint64x4_t, uint64_t, ++ svst4_vnum_u64 (p0, x0, 28, z0), ++ svst4_vnum (p0, x0, 28, z0)) ++ ++/* ++** st4_vnum_u64_32: ++** [^{]* ++** st4d {z0\.d - z3\.d}, p0, \[x[0-9]+\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u64_32, svuint64x4_t, uint64_t, ++ svst4_vnum_u64 (p0, x0, 32, z0), ++ svst4_vnum (p0, x0, 32, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_u64_m1: ++** decb x0 ++** st4d {z0\.d - z3\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u64_m1, svuint64x4_t, uint64_t, ++ svst4_vnum_u64 (p0, x0, -1, z0), ++ svst4_vnum (p0, x0, -1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_u64_m2: ++** decb x0, all, mul #2 ++** st4d {z0\.d - z3\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u64_m2, svuint64x4_t, uint64_t, ++ svst4_vnum_u64 (p0, x0, -2, z0), ++ svst4_vnum (p0, x0, -2, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_u64_m3: ++** decb x0, all, mul #3 ++** st4d {z0\.d - z3\.d}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u64_m3, svuint64x4_t, uint64_t, ++ svst4_vnum_u64 (p0, x0, -3, z0), ++ svst4_vnum (p0, x0, -3, z0)) ++ ++/* ++** st4_vnum_u64_m4: ++** st4d {z0\.d - z3\.d}, p0, \[x0, #-4, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u64_m4, svuint64x4_t, uint64_t, ++ svst4_vnum_u64 (p0, x0, -4, z0), ++ svst4_vnum (p0, x0, -4, z0)) ++ ++/* ++** st4_vnum_u64_m32: ++** st4d {z0\.d - z3\.d}, p0, \[x0, #-32, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u64_m32, svuint64x4_t, uint64_t, ++ svst4_vnum_u64 (p0, x0, -32, z0), ++ svst4_vnum (p0, x0, -32, z0)) ++ ++/* ++** st4_vnum_u64_m36: ++** [^{]* ++** st4d {z0\.d - z3\.d}, p0, \[x[0-9]+\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u64_m36, svuint64x4_t, uint64_t, ++ svst4_vnum_u64 (p0, x0, -36, z0), ++ svst4_vnum (p0, x0, -36, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** st4_vnum_u64_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** st4d {z0\.d - z3\.d}, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u64_x1, svuint64x4_t, uint64_t, ++ svst4_vnum_u64 (p0, x0, x1, z0), ++ svst4_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_u8.c +new file mode 100644 +index 000000000..e7c2e7d76 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_u8.c +@@ -0,0 +1,290 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** st4_u8_base: ++** st4b {z0\.b - z3\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_u8_base, svuint8x4_t, uint8_t, ++ svst4_u8 (p0, x0, z0), ++ svst4 (p0, x0, z0)) ++ ++/* ++** st4_u8_index: ++** st4b {z0\.b - z3\.b}, p0, \[x0, x1\] ++** ret ++*/ ++TEST_STORE (st4_u8_index, svuint8x4_t, uint8_t, ++ svst4_u8 (p0, x0 + x1, z0), ++ svst4 (p0, x0 + x1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_u8_1: ++** incb x0 ++** st4b {z0\.b - z3\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_u8_1, svuint8x4_t, uint8_t, ++ svst4_u8 (p0, x0 + svcntb (), z0), ++ svst4 (p0, x0 + svcntb (), z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_u8_2: ++** incb x0, all, mul #2 ++** st4b {z0\.b - z3\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_u8_2, svuint8x4_t, uint8_t, ++ svst4_u8 (p0, x0 + svcntb () * 2, z0), ++ svst4 (p0, x0 + svcntb () * 2, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_u8_3: ++** incb x0, all, mul #3 ++** st4b {z0\.b - z3\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_u8_3, svuint8x4_t, uint8_t, ++ svst4_u8 (p0, x0 + svcntb () * 3, z0), ++ svst4 (p0, x0 + svcntb () * 3, z0)) ++ ++/* ++** st4_u8_4: ++** st4b {z0\.b - z3\.b}, p0, \[x0, #4, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_u8_4, svuint8x4_t, uint8_t, ++ svst4_u8 (p0, x0 + svcntb () * 4, z0), ++ svst4 (p0, x0 + svcntb () * 4, z0)) ++ ++/* ++** st4_u8_28: ++** st4b {z0\.b - z3\.b}, p0, \[x0, #28, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_u8_28, svuint8x4_t, uint8_t, ++ svst4_u8 (p0, x0 + svcntb () * 28, z0), ++ svst4 (p0, x0 + svcntb () * 28, z0)) ++ ++/* ++** st4_u8_32: ++** [^{]* ++** st4b {z0\.b - z3\.b}, p0, \[x[0-9]+\] ++** ret ++*/ ++TEST_STORE (st4_u8_32, svuint8x4_t, uint8_t, ++ svst4_u8 (p0, x0 + svcntb () * 32, z0), ++ svst4 (p0, x0 + svcntb () * 32, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_u8_m1: ++** decb x0 ++** st4b {z0\.b - z3\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_u8_m1, svuint8x4_t, uint8_t, ++ svst4_u8 (p0, x0 - svcntb (), z0), ++ svst4 (p0, x0 - svcntb (), z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_u8_m2: ++** decb x0, all, mul #2 ++** st4b {z0\.b - z3\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_u8_m2, svuint8x4_t, uint8_t, ++ svst4_u8 (p0, x0 - svcntb () * 2, z0), ++ svst4 (p0, x0 - svcntb () * 2, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_u8_m3: ++** decb x0, all, mul #3 ++** st4b {z0\.b - z3\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_u8_m3, svuint8x4_t, uint8_t, ++ svst4_u8 (p0, x0 - svcntb () * 3, z0), ++ svst4 (p0, x0 - svcntb () * 3, z0)) ++ ++/* ++** st4_u8_m4: ++** st4b {z0\.b - z3\.b}, p0, \[x0, #-4, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_u8_m4, svuint8x4_t, uint8_t, ++ svst4_u8 (p0, x0 - svcntb () * 4, z0), ++ svst4 (p0, x0 - svcntb () * 4, z0)) ++ ++/* ++** st4_u8_m32: ++** st4b {z0\.b - z3\.b}, p0, \[x0, #-32, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_u8_m32, svuint8x4_t, uint8_t, ++ svst4_u8 (p0, x0 - svcntb () * 32, z0), ++ svst4 (p0, x0 - svcntb () * 32, z0)) ++ ++/* ++** st4_u8_m36: ++** [^{]* ++** st4b {z0\.b - z3\.b}, p0, \[x[0-9]+\] ++** ret ++*/ ++TEST_STORE (st4_u8_m36, svuint8x4_t, uint8_t, ++ svst4_u8 (p0, x0 - svcntb () * 36, z0), ++ svst4 (p0, x0 - svcntb () * 36, z0)) ++ ++/* ++** st4_vnum_u8_0: ++** st4b {z0\.b - z3\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u8_0, svuint8x4_t, uint8_t, ++ svst4_vnum_u8 (p0, x0, 0, z0), ++ svst4_vnum (p0, x0, 0, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_u8_1: ++** incb x0 ++** st4b {z0\.b - z3\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u8_1, svuint8x4_t, uint8_t, ++ svst4_vnum_u8 (p0, x0, 1, z0), ++ svst4_vnum (p0, x0, 1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_u8_2: ++** incb x0, all, mul #2 ++** st4b {z0\.b - z3\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u8_2, svuint8x4_t, uint8_t, ++ svst4_vnum_u8 (p0, x0, 2, z0), ++ svst4_vnum (p0, x0, 2, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_u8_3: ++** incb x0, all, mul #3 ++** st4b {z0\.b - z3\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u8_3, svuint8x4_t, uint8_t, ++ svst4_vnum_u8 (p0, x0, 3, z0), ++ svst4_vnum (p0, x0, 3, z0)) ++ ++/* ++** st4_vnum_u8_4: ++** st4b {z0\.b - z3\.b}, p0, \[x0, #4, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u8_4, svuint8x4_t, uint8_t, ++ svst4_vnum_u8 (p0, x0, 4, z0), ++ svst4_vnum (p0, x0, 4, z0)) ++ ++/* ++** st4_vnum_u8_28: ++** st4b {z0\.b - z3\.b}, p0, \[x0, #28, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u8_28, svuint8x4_t, uint8_t, ++ svst4_vnum_u8 (p0, x0, 28, z0), ++ svst4_vnum (p0, x0, 28, z0)) ++ ++/* ++** st4_vnum_u8_32: ++** [^{]* ++** st4b {z0\.b - z3\.b}, p0, \[x[0-9]+\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u8_32, svuint8x4_t, uint8_t, ++ svst4_vnum_u8 (p0, x0, 32, z0), ++ svst4_vnum (p0, x0, 32, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_u8_m1: ++** decb x0 ++** st4b {z0\.b - z3\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u8_m1, svuint8x4_t, uint8_t, ++ svst4_vnum_u8 (p0, x0, -1, z0), ++ svst4_vnum (p0, x0, -1, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_u8_m2: ++** decb x0, all, mul #2 ++** st4b {z0\.b - z3\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u8_m2, svuint8x4_t, uint8_t, ++ svst4_vnum_u8 (p0, x0, -2, z0), ++ svst4_vnum (p0, x0, -2, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** st4_vnum_u8_m3: ++** decb x0, all, mul #3 ++** st4b {z0\.b - z3\.b}, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u8_m3, svuint8x4_t, uint8_t, ++ svst4_vnum_u8 (p0, x0, -3, z0), ++ svst4_vnum (p0, x0, -3, z0)) ++ ++/* ++** st4_vnum_u8_m4: ++** st4b {z0\.b - z3\.b}, p0, \[x0, #-4, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u8_m4, svuint8x4_t, uint8_t, ++ svst4_vnum_u8 (p0, x0, -4, z0), ++ svst4_vnum (p0, x0, -4, z0)) ++ ++/* ++** st4_vnum_u8_m32: ++** st4b {z0\.b - z3\.b}, p0, \[x0, #-32, mul vl\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u8_m32, svuint8x4_t, uint8_t, ++ svst4_vnum_u8 (p0, x0, -32, z0), ++ svst4_vnum (p0, x0, -32, z0)) ++ ++/* ++** st4_vnum_u8_m36: ++** [^{]* ++** st4b {z0\.b - z3\.b}, p0, \[x[0-9]+\] ++** ret ++*/ ++TEST_STORE (st4_vnum_u8_m36, svuint8x4_t, uint8_t, ++ svst4_vnum_u8 (p0, x0, -36, z0), ++ svst4_vnum (p0, x0, -36, z0)) ++ ++/* ++** st4_vnum_u8_x1: ++** cntb (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** st4b {z0\.b - z3\.b}, p0, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** st4b {z0\.b - z3\.b}, p0, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_STORE (st4_vnum_u8_x1, svuint8x4_t, uint8_t, ++ svst4_vnum_u8 (p0, x0, x1, z0), ++ svst4_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_bf16.c +new file mode 100644 +index 000000000..3c4d21f27 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_bf16.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** stnt1_bf16_base: ++** stnt1h z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_bf16_base, svbfloat16_t, bfloat16_t, ++ svstnt1_bf16 (p0, x0, z0), ++ svstnt1 (p0, x0, z0)) ++ ++/* ++** stnt1_bf16_index: ++** stnt1h z0\.h, p0, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_STORE (stnt1_bf16_index, svbfloat16_t, bfloat16_t, ++ svstnt1_bf16 (p0, x0 + x1, z0), ++ svstnt1 (p0, x0 + x1, z0)) ++ ++/* ++** stnt1_bf16_1: ++** stnt1h z0\.h, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_bf16_1, svbfloat16_t, bfloat16_t, ++ svstnt1_bf16 (p0, x0 + svcnth (), z0), ++ svstnt1 (p0, x0 + svcnth (), z0)) ++ ++/* ++** stnt1_bf16_7: ++** stnt1h z0\.h, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_bf16_7, svbfloat16_t, bfloat16_t, ++ svstnt1_bf16 (p0, x0 + svcnth () * 7, z0), ++ svstnt1 (p0, x0 + svcnth () * 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** stnt1_bf16_8: ++** incb x0, all, mul #8 ++** stnt1h z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_bf16_8, svbfloat16_t, bfloat16_t, ++ svstnt1_bf16 (p0, x0 + svcnth () * 8, z0), ++ svstnt1 (p0, x0 + svcnth () * 8, z0)) ++ ++/* ++** stnt1_bf16_m1: ++** stnt1h z0\.h, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_bf16_m1, svbfloat16_t, bfloat16_t, ++ svstnt1_bf16 (p0, x0 - svcnth (), z0), ++ svstnt1 (p0, x0 - svcnth (), z0)) ++ ++/* ++** stnt1_bf16_m8: ++** stnt1h z0\.h, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_bf16_m8, svbfloat16_t, bfloat16_t, ++ svstnt1_bf16 (p0, x0 - svcnth () * 8, z0), ++ svstnt1 (p0, x0 - svcnth () * 8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** stnt1_bf16_m9: ++** decb x0, all, mul #9 ++** stnt1h z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_bf16_m9, svbfloat16_t, bfloat16_t, ++ svstnt1_bf16 (p0, x0 - svcnth () * 9, z0), ++ svstnt1 (p0, x0 - svcnth () * 9, z0)) ++ ++/* ++** stnt1_vnum_bf16_0: ++** stnt1h z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_bf16_0, svbfloat16_t, bfloat16_t, ++ svstnt1_vnum_bf16 (p0, x0, 0, z0), ++ svstnt1_vnum (p0, x0, 0, z0)) ++ ++/* ++** stnt1_vnum_bf16_1: ++** stnt1h z0\.h, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_bf16_1, svbfloat16_t, bfloat16_t, ++ svstnt1_vnum_bf16 (p0, x0, 1, z0), ++ svstnt1_vnum (p0, x0, 1, z0)) ++ ++/* ++** stnt1_vnum_bf16_7: ++** stnt1h z0\.h, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_bf16_7, svbfloat16_t, bfloat16_t, ++ svstnt1_vnum_bf16 (p0, x0, 7, z0), ++ svstnt1_vnum (p0, x0, 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** stnt1_vnum_bf16_8: ++** incb x0, all, mul #8 ++** stnt1h z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_bf16_8, svbfloat16_t, bfloat16_t, ++ svstnt1_vnum_bf16 (p0, x0, 8, z0), ++ svstnt1_vnum (p0, x0, 8, z0)) ++ ++/* ++** stnt1_vnum_bf16_m1: ++** stnt1h z0\.h, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_bf16_m1, svbfloat16_t, bfloat16_t, ++ svstnt1_vnum_bf16 (p0, x0, -1, z0), ++ svstnt1_vnum (p0, x0, -1, z0)) ++ ++/* ++** stnt1_vnum_bf16_m8: ++** stnt1h z0\.h, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_bf16_m8, svbfloat16_t, bfloat16_t, ++ svstnt1_vnum_bf16 (p0, x0, -8, z0), ++ svstnt1_vnum (p0, x0, -8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** stnt1_vnum_bf16_m9: ++** decb x0, all, mul #9 ++** stnt1h z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_bf16_m9, svbfloat16_t, bfloat16_t, ++ svstnt1_vnum_bf16 (p0, x0, -9, z0), ++ svstnt1_vnum (p0, x0, -9, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** stnt1_vnum_bf16_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** stnt1h z0\.h, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_bf16_x1, svbfloat16_t, bfloat16_t, ++ svstnt1_vnum_bf16 (p0, x0, x1, z0), ++ svstnt1_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_f16.c +new file mode 100644 +index 000000000..a3d89caf1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_f16.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** stnt1_f16_base: ++** stnt1h z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_f16_base, svfloat16_t, float16_t, ++ svstnt1_f16 (p0, x0, z0), ++ svstnt1 (p0, x0, z0)) ++ ++/* ++** stnt1_f16_index: ++** stnt1h z0\.h, p0, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_STORE (stnt1_f16_index, svfloat16_t, float16_t, ++ svstnt1_f16 (p0, x0 + x1, z0), ++ svstnt1 (p0, x0 + x1, z0)) ++ ++/* ++** stnt1_f16_1: ++** stnt1h z0\.h, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_f16_1, svfloat16_t, float16_t, ++ svstnt1_f16 (p0, x0 + svcnth (), z0), ++ svstnt1 (p0, x0 + svcnth (), z0)) ++ ++/* ++** stnt1_f16_7: ++** stnt1h z0\.h, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_f16_7, svfloat16_t, float16_t, ++ svstnt1_f16 (p0, x0 + svcnth () * 7, z0), ++ svstnt1 (p0, x0 + svcnth () * 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** stnt1_f16_8: ++** incb x0, all, mul #8 ++** stnt1h z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_f16_8, svfloat16_t, float16_t, ++ svstnt1_f16 (p0, x0 + svcnth () * 8, z0), ++ svstnt1 (p0, x0 + svcnth () * 8, z0)) ++ ++/* ++** stnt1_f16_m1: ++** stnt1h z0\.h, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_f16_m1, svfloat16_t, float16_t, ++ svstnt1_f16 (p0, x0 - svcnth (), z0), ++ svstnt1 (p0, x0 - svcnth (), z0)) ++ ++/* ++** stnt1_f16_m8: ++** stnt1h z0\.h, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_f16_m8, svfloat16_t, float16_t, ++ svstnt1_f16 (p0, x0 - svcnth () * 8, z0), ++ svstnt1 (p0, x0 - svcnth () * 8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** stnt1_f16_m9: ++** decb x0, all, mul #9 ++** stnt1h z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_f16_m9, svfloat16_t, float16_t, ++ svstnt1_f16 (p0, x0 - svcnth () * 9, z0), ++ svstnt1 (p0, x0 - svcnth () * 9, z0)) ++ ++/* ++** stnt1_vnum_f16_0: ++** stnt1h z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_f16_0, svfloat16_t, float16_t, ++ svstnt1_vnum_f16 (p0, x0, 0, z0), ++ svstnt1_vnum (p0, x0, 0, z0)) ++ ++/* ++** stnt1_vnum_f16_1: ++** stnt1h z0\.h, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_f16_1, svfloat16_t, float16_t, ++ svstnt1_vnum_f16 (p0, x0, 1, z0), ++ svstnt1_vnum (p0, x0, 1, z0)) ++ ++/* ++** stnt1_vnum_f16_7: ++** stnt1h z0\.h, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_f16_7, svfloat16_t, float16_t, ++ svstnt1_vnum_f16 (p0, x0, 7, z0), ++ svstnt1_vnum (p0, x0, 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** stnt1_vnum_f16_8: ++** incb x0, all, mul #8 ++** stnt1h z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_f16_8, svfloat16_t, float16_t, ++ svstnt1_vnum_f16 (p0, x0, 8, z0), ++ svstnt1_vnum (p0, x0, 8, z0)) ++ ++/* ++** stnt1_vnum_f16_m1: ++** stnt1h z0\.h, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_f16_m1, svfloat16_t, float16_t, ++ svstnt1_vnum_f16 (p0, x0, -1, z0), ++ svstnt1_vnum (p0, x0, -1, z0)) ++ ++/* ++** stnt1_vnum_f16_m8: ++** stnt1h z0\.h, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_f16_m8, svfloat16_t, float16_t, ++ svstnt1_vnum_f16 (p0, x0, -8, z0), ++ svstnt1_vnum (p0, x0, -8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** stnt1_vnum_f16_m9: ++** decb x0, all, mul #9 ++** stnt1h z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_f16_m9, svfloat16_t, float16_t, ++ svstnt1_vnum_f16 (p0, x0, -9, z0), ++ svstnt1_vnum (p0, x0, -9, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** stnt1_vnum_f16_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** stnt1h z0\.h, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_f16_x1, svfloat16_t, float16_t, ++ svstnt1_vnum_f16 (p0, x0, x1, z0), ++ svstnt1_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_f32.c +new file mode 100644 +index 000000000..24e890512 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_f32.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** stnt1_f32_base: ++** stnt1w z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_f32_base, svfloat32_t, float32_t, ++ svstnt1_f32 (p0, x0, z0), ++ svstnt1 (p0, x0, z0)) ++ ++/* ++** stnt1_f32_index: ++** stnt1w z0\.s, p0, \[x0, x1, lsl 2\] ++** ret ++*/ ++TEST_STORE (stnt1_f32_index, svfloat32_t, float32_t, ++ svstnt1_f32 (p0, x0 + x1, z0), ++ svstnt1 (p0, x0 + x1, z0)) ++ ++/* ++** stnt1_f32_1: ++** stnt1w z0\.s, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_f32_1, svfloat32_t, float32_t, ++ svstnt1_f32 (p0, x0 + svcntw (), z0), ++ svstnt1 (p0, x0 + svcntw (), z0)) ++ ++/* ++** stnt1_f32_7: ++** stnt1w z0\.s, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_f32_7, svfloat32_t, float32_t, ++ svstnt1_f32 (p0, x0 + svcntw () * 7, z0), ++ svstnt1 (p0, x0 + svcntw () * 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** stnt1_f32_8: ++** incb x0, all, mul #8 ++** stnt1w z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_f32_8, svfloat32_t, float32_t, ++ svstnt1_f32 (p0, x0 + svcntw () * 8, z0), ++ svstnt1 (p0, x0 + svcntw () * 8, z0)) ++ ++/* ++** stnt1_f32_m1: ++** stnt1w z0\.s, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_f32_m1, svfloat32_t, float32_t, ++ svstnt1_f32 (p0, x0 - svcntw (), z0), ++ svstnt1 (p0, x0 - svcntw (), z0)) ++ ++/* ++** stnt1_f32_m8: ++** stnt1w z0\.s, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_f32_m8, svfloat32_t, float32_t, ++ svstnt1_f32 (p0, x0 - svcntw () * 8, z0), ++ svstnt1 (p0, x0 - svcntw () * 8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** stnt1_f32_m9: ++** decb x0, all, mul #9 ++** stnt1w z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_f32_m9, svfloat32_t, float32_t, ++ svstnt1_f32 (p0, x0 - svcntw () * 9, z0), ++ svstnt1 (p0, x0 - svcntw () * 9, z0)) ++ ++/* ++** stnt1_vnum_f32_0: ++** stnt1w z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_f32_0, svfloat32_t, float32_t, ++ svstnt1_vnum_f32 (p0, x0, 0, z0), ++ svstnt1_vnum (p0, x0, 0, z0)) ++ ++/* ++** stnt1_vnum_f32_1: ++** stnt1w z0\.s, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_f32_1, svfloat32_t, float32_t, ++ svstnt1_vnum_f32 (p0, x0, 1, z0), ++ svstnt1_vnum (p0, x0, 1, z0)) ++ ++/* ++** stnt1_vnum_f32_7: ++** stnt1w z0\.s, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_f32_7, svfloat32_t, float32_t, ++ svstnt1_vnum_f32 (p0, x0, 7, z0), ++ svstnt1_vnum (p0, x0, 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** stnt1_vnum_f32_8: ++** incb x0, all, mul #8 ++** stnt1w z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_f32_8, svfloat32_t, float32_t, ++ svstnt1_vnum_f32 (p0, x0, 8, z0), ++ svstnt1_vnum (p0, x0, 8, z0)) ++ ++/* ++** stnt1_vnum_f32_m1: ++** stnt1w z0\.s, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_f32_m1, svfloat32_t, float32_t, ++ svstnt1_vnum_f32 (p0, x0, -1, z0), ++ svstnt1_vnum (p0, x0, -1, z0)) ++ ++/* ++** stnt1_vnum_f32_m8: ++** stnt1w z0\.s, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_f32_m8, svfloat32_t, float32_t, ++ svstnt1_vnum_f32 (p0, x0, -8, z0), ++ svstnt1_vnum (p0, x0, -8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** stnt1_vnum_f32_m9: ++** decb x0, all, mul #9 ++** stnt1w z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_f32_m9, svfloat32_t, float32_t, ++ svstnt1_vnum_f32 (p0, x0, -9, z0), ++ svstnt1_vnum (p0, x0, -9, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** stnt1_vnum_f32_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** stnt1w z0\.s, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_f32_x1, svfloat32_t, float32_t, ++ svstnt1_vnum_f32 (p0, x0, x1, z0), ++ svstnt1_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_f64.c +new file mode 100644 +index 000000000..9555a1faf +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_f64.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** stnt1_f64_base: ++** stnt1d z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_f64_base, svfloat64_t, float64_t, ++ svstnt1_f64 (p0, x0, z0), ++ svstnt1 (p0, x0, z0)) ++ ++/* ++** stnt1_f64_index: ++** stnt1d z0\.d, p0, \[x0, x1, lsl 3\] ++** ret ++*/ ++TEST_STORE (stnt1_f64_index, svfloat64_t, float64_t, ++ svstnt1_f64 (p0, x0 + x1, z0), ++ svstnt1 (p0, x0 + x1, z0)) ++ ++/* ++** stnt1_f64_1: ++** stnt1d z0\.d, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_f64_1, svfloat64_t, float64_t, ++ svstnt1_f64 (p0, x0 + svcntd (), z0), ++ svstnt1 (p0, x0 + svcntd (), z0)) ++ ++/* ++** stnt1_f64_7: ++** stnt1d z0\.d, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_f64_7, svfloat64_t, float64_t, ++ svstnt1_f64 (p0, x0 + svcntd () * 7, z0), ++ svstnt1 (p0, x0 + svcntd () * 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** stnt1_f64_8: ++** incb x0, all, mul #8 ++** stnt1d z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_f64_8, svfloat64_t, float64_t, ++ svstnt1_f64 (p0, x0 + svcntd () * 8, z0), ++ svstnt1 (p0, x0 + svcntd () * 8, z0)) ++ ++/* ++** stnt1_f64_m1: ++** stnt1d z0\.d, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_f64_m1, svfloat64_t, float64_t, ++ svstnt1_f64 (p0, x0 - svcntd (), z0), ++ svstnt1 (p0, x0 - svcntd (), z0)) ++ ++/* ++** stnt1_f64_m8: ++** stnt1d z0\.d, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_f64_m8, svfloat64_t, float64_t, ++ svstnt1_f64 (p0, x0 - svcntd () * 8, z0), ++ svstnt1 (p0, x0 - svcntd () * 8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** stnt1_f64_m9: ++** decb x0, all, mul #9 ++** stnt1d z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_f64_m9, svfloat64_t, float64_t, ++ svstnt1_f64 (p0, x0 - svcntd () * 9, z0), ++ svstnt1 (p0, x0 - svcntd () * 9, z0)) ++ ++/* ++** stnt1_vnum_f64_0: ++** stnt1d z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_f64_0, svfloat64_t, float64_t, ++ svstnt1_vnum_f64 (p0, x0, 0, z0), ++ svstnt1_vnum (p0, x0, 0, z0)) ++ ++/* ++** stnt1_vnum_f64_1: ++** stnt1d z0\.d, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_f64_1, svfloat64_t, float64_t, ++ svstnt1_vnum_f64 (p0, x0, 1, z0), ++ svstnt1_vnum (p0, x0, 1, z0)) ++ ++/* ++** stnt1_vnum_f64_7: ++** stnt1d z0\.d, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_f64_7, svfloat64_t, float64_t, ++ svstnt1_vnum_f64 (p0, x0, 7, z0), ++ svstnt1_vnum (p0, x0, 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** stnt1_vnum_f64_8: ++** incb x0, all, mul #8 ++** stnt1d z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_f64_8, svfloat64_t, float64_t, ++ svstnt1_vnum_f64 (p0, x0, 8, z0), ++ svstnt1_vnum (p0, x0, 8, z0)) ++ ++/* ++** stnt1_vnum_f64_m1: ++** stnt1d z0\.d, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_f64_m1, svfloat64_t, float64_t, ++ svstnt1_vnum_f64 (p0, x0, -1, z0), ++ svstnt1_vnum (p0, x0, -1, z0)) ++ ++/* ++** stnt1_vnum_f64_m8: ++** stnt1d z0\.d, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_f64_m8, svfloat64_t, float64_t, ++ svstnt1_vnum_f64 (p0, x0, -8, z0), ++ svstnt1_vnum (p0, x0, -8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** stnt1_vnum_f64_m9: ++** decb x0, all, mul #9 ++** stnt1d z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_f64_m9, svfloat64_t, float64_t, ++ svstnt1_vnum_f64 (p0, x0, -9, z0), ++ svstnt1_vnum (p0, x0, -9, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** stnt1_vnum_f64_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** stnt1d z0\.d, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_f64_x1, svfloat64_t, float64_t, ++ svstnt1_vnum_f64 (p0, x0, x1, z0), ++ svstnt1_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_s16.c +new file mode 100644 +index 000000000..62e31450d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_s16.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** stnt1_s16_base: ++** stnt1h z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_s16_base, svint16_t, int16_t, ++ svstnt1_s16 (p0, x0, z0), ++ svstnt1 (p0, x0, z0)) ++ ++/* ++** stnt1_s16_index: ++** stnt1h z0\.h, p0, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_STORE (stnt1_s16_index, svint16_t, int16_t, ++ svstnt1_s16 (p0, x0 + x1, z0), ++ svstnt1 (p0, x0 + x1, z0)) ++ ++/* ++** stnt1_s16_1: ++** stnt1h z0\.h, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_s16_1, svint16_t, int16_t, ++ svstnt1_s16 (p0, x0 + svcnth (), z0), ++ svstnt1 (p0, x0 + svcnth (), z0)) ++ ++/* ++** stnt1_s16_7: ++** stnt1h z0\.h, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_s16_7, svint16_t, int16_t, ++ svstnt1_s16 (p0, x0 + svcnth () * 7, z0), ++ svstnt1 (p0, x0 + svcnth () * 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** stnt1_s16_8: ++** incb x0, all, mul #8 ++** stnt1h z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_s16_8, svint16_t, int16_t, ++ svstnt1_s16 (p0, x0 + svcnth () * 8, z0), ++ svstnt1 (p0, x0 + svcnth () * 8, z0)) ++ ++/* ++** stnt1_s16_m1: ++** stnt1h z0\.h, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_s16_m1, svint16_t, int16_t, ++ svstnt1_s16 (p0, x0 - svcnth (), z0), ++ svstnt1 (p0, x0 - svcnth (), z0)) ++ ++/* ++** stnt1_s16_m8: ++** stnt1h z0\.h, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_s16_m8, svint16_t, int16_t, ++ svstnt1_s16 (p0, x0 - svcnth () * 8, z0), ++ svstnt1 (p0, x0 - svcnth () * 8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** stnt1_s16_m9: ++** decb x0, all, mul #9 ++** stnt1h z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_s16_m9, svint16_t, int16_t, ++ svstnt1_s16 (p0, x0 - svcnth () * 9, z0), ++ svstnt1 (p0, x0 - svcnth () * 9, z0)) ++ ++/* ++** stnt1_vnum_s16_0: ++** stnt1h z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_s16_0, svint16_t, int16_t, ++ svstnt1_vnum_s16 (p0, x0, 0, z0), ++ svstnt1_vnum (p0, x0, 0, z0)) ++ ++/* ++** stnt1_vnum_s16_1: ++** stnt1h z0\.h, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_s16_1, svint16_t, int16_t, ++ svstnt1_vnum_s16 (p0, x0, 1, z0), ++ svstnt1_vnum (p0, x0, 1, z0)) ++ ++/* ++** stnt1_vnum_s16_7: ++** stnt1h z0\.h, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_s16_7, svint16_t, int16_t, ++ svstnt1_vnum_s16 (p0, x0, 7, z0), ++ svstnt1_vnum (p0, x0, 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** stnt1_vnum_s16_8: ++** incb x0, all, mul #8 ++** stnt1h z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_s16_8, svint16_t, int16_t, ++ svstnt1_vnum_s16 (p0, x0, 8, z0), ++ svstnt1_vnum (p0, x0, 8, z0)) ++ ++/* ++** stnt1_vnum_s16_m1: ++** stnt1h z0\.h, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_s16_m1, svint16_t, int16_t, ++ svstnt1_vnum_s16 (p0, x0, -1, z0), ++ svstnt1_vnum (p0, x0, -1, z0)) ++ ++/* ++** stnt1_vnum_s16_m8: ++** stnt1h z0\.h, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_s16_m8, svint16_t, int16_t, ++ svstnt1_vnum_s16 (p0, x0, -8, z0), ++ svstnt1_vnum (p0, x0, -8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** stnt1_vnum_s16_m9: ++** decb x0, all, mul #9 ++** stnt1h z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_s16_m9, svint16_t, int16_t, ++ svstnt1_vnum_s16 (p0, x0, -9, z0), ++ svstnt1_vnum (p0, x0, -9, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** stnt1_vnum_s16_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** stnt1h z0\.h, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_s16_x1, svint16_t, int16_t, ++ svstnt1_vnum_s16 (p0, x0, x1, z0), ++ svstnt1_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_s32.c +new file mode 100644 +index 000000000..ff1f27c05 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_s32.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** stnt1_s32_base: ++** stnt1w z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_s32_base, svint32_t, int32_t, ++ svstnt1_s32 (p0, x0, z0), ++ svstnt1 (p0, x0, z0)) ++ ++/* ++** stnt1_s32_index: ++** stnt1w z0\.s, p0, \[x0, x1, lsl 2\] ++** ret ++*/ ++TEST_STORE (stnt1_s32_index, svint32_t, int32_t, ++ svstnt1_s32 (p0, x0 + x1, z0), ++ svstnt1 (p0, x0 + x1, z0)) ++ ++/* ++** stnt1_s32_1: ++** stnt1w z0\.s, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_s32_1, svint32_t, int32_t, ++ svstnt1_s32 (p0, x0 + svcntw (), z0), ++ svstnt1 (p0, x0 + svcntw (), z0)) ++ ++/* ++** stnt1_s32_7: ++** stnt1w z0\.s, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_s32_7, svint32_t, int32_t, ++ svstnt1_s32 (p0, x0 + svcntw () * 7, z0), ++ svstnt1 (p0, x0 + svcntw () * 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** stnt1_s32_8: ++** incb x0, all, mul #8 ++** stnt1w z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_s32_8, svint32_t, int32_t, ++ svstnt1_s32 (p0, x0 + svcntw () * 8, z0), ++ svstnt1 (p0, x0 + svcntw () * 8, z0)) ++ ++/* ++** stnt1_s32_m1: ++** stnt1w z0\.s, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_s32_m1, svint32_t, int32_t, ++ svstnt1_s32 (p0, x0 - svcntw (), z0), ++ svstnt1 (p0, x0 - svcntw (), z0)) ++ ++/* ++** stnt1_s32_m8: ++** stnt1w z0\.s, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_s32_m8, svint32_t, int32_t, ++ svstnt1_s32 (p0, x0 - svcntw () * 8, z0), ++ svstnt1 (p0, x0 - svcntw () * 8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** stnt1_s32_m9: ++** decb x0, all, mul #9 ++** stnt1w z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_s32_m9, svint32_t, int32_t, ++ svstnt1_s32 (p0, x0 - svcntw () * 9, z0), ++ svstnt1 (p0, x0 - svcntw () * 9, z0)) ++ ++/* ++** stnt1_vnum_s32_0: ++** stnt1w z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_s32_0, svint32_t, int32_t, ++ svstnt1_vnum_s32 (p0, x0, 0, z0), ++ svstnt1_vnum (p0, x0, 0, z0)) ++ ++/* ++** stnt1_vnum_s32_1: ++** stnt1w z0\.s, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_s32_1, svint32_t, int32_t, ++ svstnt1_vnum_s32 (p0, x0, 1, z0), ++ svstnt1_vnum (p0, x0, 1, z0)) ++ ++/* ++** stnt1_vnum_s32_7: ++** stnt1w z0\.s, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_s32_7, svint32_t, int32_t, ++ svstnt1_vnum_s32 (p0, x0, 7, z0), ++ svstnt1_vnum (p0, x0, 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** stnt1_vnum_s32_8: ++** incb x0, all, mul #8 ++** stnt1w z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_s32_8, svint32_t, int32_t, ++ svstnt1_vnum_s32 (p0, x0, 8, z0), ++ svstnt1_vnum (p0, x0, 8, z0)) ++ ++/* ++** stnt1_vnum_s32_m1: ++** stnt1w z0\.s, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_s32_m1, svint32_t, int32_t, ++ svstnt1_vnum_s32 (p0, x0, -1, z0), ++ svstnt1_vnum (p0, x0, -1, z0)) ++ ++/* ++** stnt1_vnum_s32_m8: ++** stnt1w z0\.s, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_s32_m8, svint32_t, int32_t, ++ svstnt1_vnum_s32 (p0, x0, -8, z0), ++ svstnt1_vnum (p0, x0, -8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** stnt1_vnum_s32_m9: ++** decb x0, all, mul #9 ++** stnt1w z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_s32_m9, svint32_t, int32_t, ++ svstnt1_vnum_s32 (p0, x0, -9, z0), ++ svstnt1_vnum (p0, x0, -9, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** stnt1_vnum_s32_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** stnt1w z0\.s, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_s32_x1, svint32_t, int32_t, ++ svstnt1_vnum_s32 (p0, x0, x1, z0), ++ svstnt1_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_s64.c +new file mode 100644 +index 000000000..7d548f8f2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_s64.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** stnt1_s64_base: ++** stnt1d z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_s64_base, svint64_t, int64_t, ++ svstnt1_s64 (p0, x0, z0), ++ svstnt1 (p0, x0, z0)) ++ ++/* ++** stnt1_s64_index: ++** stnt1d z0\.d, p0, \[x0, x1, lsl 3\] ++** ret ++*/ ++TEST_STORE (stnt1_s64_index, svint64_t, int64_t, ++ svstnt1_s64 (p0, x0 + x1, z0), ++ svstnt1 (p0, x0 + x1, z0)) ++ ++/* ++** stnt1_s64_1: ++** stnt1d z0\.d, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_s64_1, svint64_t, int64_t, ++ svstnt1_s64 (p0, x0 + svcntd (), z0), ++ svstnt1 (p0, x0 + svcntd (), z0)) ++ ++/* ++** stnt1_s64_7: ++** stnt1d z0\.d, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_s64_7, svint64_t, int64_t, ++ svstnt1_s64 (p0, x0 + svcntd () * 7, z0), ++ svstnt1 (p0, x0 + svcntd () * 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** stnt1_s64_8: ++** incb x0, all, mul #8 ++** stnt1d z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_s64_8, svint64_t, int64_t, ++ svstnt1_s64 (p0, x0 + svcntd () * 8, z0), ++ svstnt1 (p0, x0 + svcntd () * 8, z0)) ++ ++/* ++** stnt1_s64_m1: ++** stnt1d z0\.d, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_s64_m1, svint64_t, int64_t, ++ svstnt1_s64 (p0, x0 - svcntd (), z0), ++ svstnt1 (p0, x0 - svcntd (), z0)) ++ ++/* ++** stnt1_s64_m8: ++** stnt1d z0\.d, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_s64_m8, svint64_t, int64_t, ++ svstnt1_s64 (p0, x0 - svcntd () * 8, z0), ++ svstnt1 (p0, x0 - svcntd () * 8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** stnt1_s64_m9: ++** decb x0, all, mul #9 ++** stnt1d z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_s64_m9, svint64_t, int64_t, ++ svstnt1_s64 (p0, x0 - svcntd () * 9, z0), ++ svstnt1 (p0, x0 - svcntd () * 9, z0)) ++ ++/* ++** stnt1_vnum_s64_0: ++** stnt1d z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_s64_0, svint64_t, int64_t, ++ svstnt1_vnum_s64 (p0, x0, 0, z0), ++ svstnt1_vnum (p0, x0, 0, z0)) ++ ++/* ++** stnt1_vnum_s64_1: ++** stnt1d z0\.d, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_s64_1, svint64_t, int64_t, ++ svstnt1_vnum_s64 (p0, x0, 1, z0), ++ svstnt1_vnum (p0, x0, 1, z0)) ++ ++/* ++** stnt1_vnum_s64_7: ++** stnt1d z0\.d, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_s64_7, svint64_t, int64_t, ++ svstnt1_vnum_s64 (p0, x0, 7, z0), ++ svstnt1_vnum (p0, x0, 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** stnt1_vnum_s64_8: ++** incb x0, all, mul #8 ++** stnt1d z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_s64_8, svint64_t, int64_t, ++ svstnt1_vnum_s64 (p0, x0, 8, z0), ++ svstnt1_vnum (p0, x0, 8, z0)) ++ ++/* ++** stnt1_vnum_s64_m1: ++** stnt1d z0\.d, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_s64_m1, svint64_t, int64_t, ++ svstnt1_vnum_s64 (p0, x0, -1, z0), ++ svstnt1_vnum (p0, x0, -1, z0)) ++ ++/* ++** stnt1_vnum_s64_m8: ++** stnt1d z0\.d, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_s64_m8, svint64_t, int64_t, ++ svstnt1_vnum_s64 (p0, x0, -8, z0), ++ svstnt1_vnum (p0, x0, -8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** stnt1_vnum_s64_m9: ++** decb x0, all, mul #9 ++** stnt1d z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_s64_m9, svint64_t, int64_t, ++ svstnt1_vnum_s64 (p0, x0, -9, z0), ++ svstnt1_vnum (p0, x0, -9, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** stnt1_vnum_s64_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** stnt1d z0\.d, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_s64_x1, svint64_t, int64_t, ++ svstnt1_vnum_s64 (p0, x0, x1, z0), ++ svstnt1_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_s8.c +new file mode 100644 +index 000000000..87c88035d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_s8.c +@@ -0,0 +1,162 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** stnt1_s8_base: ++** stnt1b z0\.b, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_s8_base, svint8_t, int8_t, ++ svstnt1_s8 (p0, x0, z0), ++ svstnt1 (p0, x0, z0)) ++ ++/* ++** stnt1_s8_index: ++** stnt1b z0\.b, p0, \[x0, x1\] ++** ret ++*/ ++TEST_STORE (stnt1_s8_index, svint8_t, int8_t, ++ svstnt1_s8 (p0, x0 + x1, z0), ++ svstnt1 (p0, x0 + x1, z0)) ++ ++/* ++** stnt1_s8_1: ++** stnt1b z0\.b, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_s8_1, svint8_t, int8_t, ++ svstnt1_s8 (p0, x0 + svcntb (), z0), ++ svstnt1 (p0, x0 + svcntb (), z0)) ++ ++/* ++** stnt1_s8_7: ++** stnt1b z0\.b, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_s8_7, svint8_t, int8_t, ++ svstnt1_s8 (p0, x0 + svcntb () * 7, z0), ++ svstnt1 (p0, x0 + svcntb () * 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** stnt1_s8_8: ++** incb x0, all, mul #8 ++** stnt1b z0\.b, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_s8_8, svint8_t, int8_t, ++ svstnt1_s8 (p0, x0 + svcntb () * 8, z0), ++ svstnt1 (p0, x0 + svcntb () * 8, z0)) ++ ++/* ++** stnt1_s8_m1: ++** stnt1b z0\.b, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_s8_m1, svint8_t, int8_t, ++ svstnt1_s8 (p0, x0 - svcntb (), z0), ++ svstnt1 (p0, x0 - svcntb (), z0)) ++ ++/* ++** stnt1_s8_m8: ++** stnt1b z0\.b, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_s8_m8, svint8_t, int8_t, ++ svstnt1_s8 (p0, x0 - svcntb () * 8, z0), ++ svstnt1 (p0, x0 - svcntb () * 8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** stnt1_s8_m9: ++** decb x0, all, mul #9 ++** stnt1b z0\.b, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_s8_m9, svint8_t, int8_t, ++ svstnt1_s8 (p0, x0 - svcntb () * 9, z0), ++ svstnt1 (p0, x0 - svcntb () * 9, z0)) ++ ++/* ++** stnt1_vnum_s8_0: ++** stnt1b z0\.b, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_s8_0, svint8_t, int8_t, ++ svstnt1_vnum_s8 (p0, x0, 0, z0), ++ svstnt1_vnum (p0, x0, 0, z0)) ++ ++/* ++** stnt1_vnum_s8_1: ++** stnt1b z0\.b, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_s8_1, svint8_t, int8_t, ++ svstnt1_vnum_s8 (p0, x0, 1, z0), ++ svstnt1_vnum (p0, x0, 1, z0)) ++ ++/* ++** stnt1_vnum_s8_7: ++** stnt1b z0\.b, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_s8_7, svint8_t, int8_t, ++ svstnt1_vnum_s8 (p0, x0, 7, z0), ++ svstnt1_vnum (p0, x0, 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** stnt1_vnum_s8_8: ++** incb x0, all, mul #8 ++** stnt1b z0\.b, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_s8_8, svint8_t, int8_t, ++ svstnt1_vnum_s8 (p0, x0, 8, z0), ++ svstnt1_vnum (p0, x0, 8, z0)) ++ ++/* ++** stnt1_vnum_s8_m1: ++** stnt1b z0\.b, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_s8_m1, svint8_t, int8_t, ++ svstnt1_vnum_s8 (p0, x0, -1, z0), ++ svstnt1_vnum (p0, x0, -1, z0)) ++ ++/* ++** stnt1_vnum_s8_m8: ++** stnt1b z0\.b, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_s8_m8, svint8_t, int8_t, ++ svstnt1_vnum_s8 (p0, x0, -8, z0), ++ svstnt1_vnum (p0, x0, -8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** stnt1_vnum_s8_m9: ++** decb x0, all, mul #9 ++** stnt1b z0\.b, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_s8_m9, svint8_t, int8_t, ++ svstnt1_vnum_s8 (p0, x0, -9, z0), ++ svstnt1_vnum (p0, x0, -9, z0)) ++ ++/* ++** stnt1_vnum_s8_x1: ++** cntb (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** stnt1b z0\.b, p0, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** stnt1b z0\.b, p0, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_STORE (stnt1_vnum_s8_x1, svint8_t, int8_t, ++ svstnt1_vnum_s8 (p0, x0, x1, z0), ++ svstnt1_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_u16.c +new file mode 100644 +index 000000000..7d32df362 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_u16.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** stnt1_u16_base: ++** stnt1h z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_u16_base, svuint16_t, uint16_t, ++ svstnt1_u16 (p0, x0, z0), ++ svstnt1 (p0, x0, z0)) ++ ++/* ++** stnt1_u16_index: ++** stnt1h z0\.h, p0, \[x0, x1, lsl 1\] ++** ret ++*/ ++TEST_STORE (stnt1_u16_index, svuint16_t, uint16_t, ++ svstnt1_u16 (p0, x0 + x1, z0), ++ svstnt1 (p0, x0 + x1, z0)) ++ ++/* ++** stnt1_u16_1: ++** stnt1h z0\.h, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_u16_1, svuint16_t, uint16_t, ++ svstnt1_u16 (p0, x0 + svcnth (), z0), ++ svstnt1 (p0, x0 + svcnth (), z0)) ++ ++/* ++** stnt1_u16_7: ++** stnt1h z0\.h, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_u16_7, svuint16_t, uint16_t, ++ svstnt1_u16 (p0, x0 + svcnth () * 7, z0), ++ svstnt1 (p0, x0 + svcnth () * 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** stnt1_u16_8: ++** incb x0, all, mul #8 ++** stnt1h z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_u16_8, svuint16_t, uint16_t, ++ svstnt1_u16 (p0, x0 + svcnth () * 8, z0), ++ svstnt1 (p0, x0 + svcnth () * 8, z0)) ++ ++/* ++** stnt1_u16_m1: ++** stnt1h z0\.h, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_u16_m1, svuint16_t, uint16_t, ++ svstnt1_u16 (p0, x0 - svcnth (), z0), ++ svstnt1 (p0, x0 - svcnth (), z0)) ++ ++/* ++** stnt1_u16_m8: ++** stnt1h z0\.h, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_u16_m8, svuint16_t, uint16_t, ++ svstnt1_u16 (p0, x0 - svcnth () * 8, z0), ++ svstnt1 (p0, x0 - svcnth () * 8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** stnt1_u16_m9: ++** decb x0, all, mul #9 ++** stnt1h z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_u16_m9, svuint16_t, uint16_t, ++ svstnt1_u16 (p0, x0 - svcnth () * 9, z0), ++ svstnt1 (p0, x0 - svcnth () * 9, z0)) ++ ++/* ++** stnt1_vnum_u16_0: ++** stnt1h z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_u16_0, svuint16_t, uint16_t, ++ svstnt1_vnum_u16 (p0, x0, 0, z0), ++ svstnt1_vnum (p0, x0, 0, z0)) ++ ++/* ++** stnt1_vnum_u16_1: ++** stnt1h z0\.h, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_u16_1, svuint16_t, uint16_t, ++ svstnt1_vnum_u16 (p0, x0, 1, z0), ++ svstnt1_vnum (p0, x0, 1, z0)) ++ ++/* ++** stnt1_vnum_u16_7: ++** stnt1h z0\.h, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_u16_7, svuint16_t, uint16_t, ++ svstnt1_vnum_u16 (p0, x0, 7, z0), ++ svstnt1_vnum (p0, x0, 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** stnt1_vnum_u16_8: ++** incb x0, all, mul #8 ++** stnt1h z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_u16_8, svuint16_t, uint16_t, ++ svstnt1_vnum_u16 (p0, x0, 8, z0), ++ svstnt1_vnum (p0, x0, 8, z0)) ++ ++/* ++** stnt1_vnum_u16_m1: ++** stnt1h z0\.h, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_u16_m1, svuint16_t, uint16_t, ++ svstnt1_vnum_u16 (p0, x0, -1, z0), ++ svstnt1_vnum (p0, x0, -1, z0)) ++ ++/* ++** stnt1_vnum_u16_m8: ++** stnt1h z0\.h, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_u16_m8, svuint16_t, uint16_t, ++ svstnt1_vnum_u16 (p0, x0, -8, z0), ++ svstnt1_vnum (p0, x0, -8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** stnt1_vnum_u16_m9: ++** decb x0, all, mul #9 ++** stnt1h z0\.h, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_u16_m9, svuint16_t, uint16_t, ++ svstnt1_vnum_u16 (p0, x0, -9, z0), ++ svstnt1_vnum (p0, x0, -9, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** stnt1_vnum_u16_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** stnt1h z0\.h, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_u16_x1, svuint16_t, uint16_t, ++ svstnt1_vnum_u16 (p0, x0, x1, z0), ++ svstnt1_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_u32.c +new file mode 100644 +index 000000000..cd4ccaba9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_u32.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** stnt1_u32_base: ++** stnt1w z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_u32_base, svuint32_t, uint32_t, ++ svstnt1_u32 (p0, x0, z0), ++ svstnt1 (p0, x0, z0)) ++ ++/* ++** stnt1_u32_index: ++** stnt1w z0\.s, p0, \[x0, x1, lsl 2\] ++** ret ++*/ ++TEST_STORE (stnt1_u32_index, svuint32_t, uint32_t, ++ svstnt1_u32 (p0, x0 + x1, z0), ++ svstnt1 (p0, x0 + x1, z0)) ++ ++/* ++** stnt1_u32_1: ++** stnt1w z0\.s, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_u32_1, svuint32_t, uint32_t, ++ svstnt1_u32 (p0, x0 + svcntw (), z0), ++ svstnt1 (p0, x0 + svcntw (), z0)) ++ ++/* ++** stnt1_u32_7: ++** stnt1w z0\.s, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_u32_7, svuint32_t, uint32_t, ++ svstnt1_u32 (p0, x0 + svcntw () * 7, z0), ++ svstnt1 (p0, x0 + svcntw () * 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** stnt1_u32_8: ++** incb x0, all, mul #8 ++** stnt1w z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_u32_8, svuint32_t, uint32_t, ++ svstnt1_u32 (p0, x0 + svcntw () * 8, z0), ++ svstnt1 (p0, x0 + svcntw () * 8, z0)) ++ ++/* ++** stnt1_u32_m1: ++** stnt1w z0\.s, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_u32_m1, svuint32_t, uint32_t, ++ svstnt1_u32 (p0, x0 - svcntw (), z0), ++ svstnt1 (p0, x0 - svcntw (), z0)) ++ ++/* ++** stnt1_u32_m8: ++** stnt1w z0\.s, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_u32_m8, svuint32_t, uint32_t, ++ svstnt1_u32 (p0, x0 - svcntw () * 8, z0), ++ svstnt1 (p0, x0 - svcntw () * 8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** stnt1_u32_m9: ++** decb x0, all, mul #9 ++** stnt1w z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_u32_m9, svuint32_t, uint32_t, ++ svstnt1_u32 (p0, x0 - svcntw () * 9, z0), ++ svstnt1 (p0, x0 - svcntw () * 9, z0)) ++ ++/* ++** stnt1_vnum_u32_0: ++** stnt1w z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_u32_0, svuint32_t, uint32_t, ++ svstnt1_vnum_u32 (p0, x0, 0, z0), ++ svstnt1_vnum (p0, x0, 0, z0)) ++ ++/* ++** stnt1_vnum_u32_1: ++** stnt1w z0\.s, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_u32_1, svuint32_t, uint32_t, ++ svstnt1_vnum_u32 (p0, x0, 1, z0), ++ svstnt1_vnum (p0, x0, 1, z0)) ++ ++/* ++** stnt1_vnum_u32_7: ++** stnt1w z0\.s, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_u32_7, svuint32_t, uint32_t, ++ svstnt1_vnum_u32 (p0, x0, 7, z0), ++ svstnt1_vnum (p0, x0, 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** stnt1_vnum_u32_8: ++** incb x0, all, mul #8 ++** stnt1w z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_u32_8, svuint32_t, uint32_t, ++ svstnt1_vnum_u32 (p0, x0, 8, z0), ++ svstnt1_vnum (p0, x0, 8, z0)) ++ ++/* ++** stnt1_vnum_u32_m1: ++** stnt1w z0\.s, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_u32_m1, svuint32_t, uint32_t, ++ svstnt1_vnum_u32 (p0, x0, -1, z0), ++ svstnt1_vnum (p0, x0, -1, z0)) ++ ++/* ++** stnt1_vnum_u32_m8: ++** stnt1w z0\.s, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_u32_m8, svuint32_t, uint32_t, ++ svstnt1_vnum_u32 (p0, x0, -8, z0), ++ svstnt1_vnum (p0, x0, -8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** stnt1_vnum_u32_m9: ++** decb x0, all, mul #9 ++** stnt1w z0\.s, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_u32_m9, svuint32_t, uint32_t, ++ svstnt1_vnum_u32 (p0, x0, -9, z0), ++ svstnt1_vnum (p0, x0, -9, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** stnt1_vnum_u32_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** stnt1w z0\.s, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_u32_x1, svuint32_t, uint32_t, ++ svstnt1_vnum_u32 (p0, x0, x1, z0), ++ svstnt1_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_u64.c +new file mode 100644 +index 000000000..c8145f65c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_u64.c +@@ -0,0 +1,158 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** stnt1_u64_base: ++** stnt1d z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_u64_base, svuint64_t, uint64_t, ++ svstnt1_u64 (p0, x0, z0), ++ svstnt1 (p0, x0, z0)) ++ ++/* ++** stnt1_u64_index: ++** stnt1d z0\.d, p0, \[x0, x1, lsl 3\] ++** ret ++*/ ++TEST_STORE (stnt1_u64_index, svuint64_t, uint64_t, ++ svstnt1_u64 (p0, x0 + x1, z0), ++ svstnt1 (p0, x0 + x1, z0)) ++ ++/* ++** stnt1_u64_1: ++** stnt1d z0\.d, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_u64_1, svuint64_t, uint64_t, ++ svstnt1_u64 (p0, x0 + svcntd (), z0), ++ svstnt1 (p0, x0 + svcntd (), z0)) ++ ++/* ++** stnt1_u64_7: ++** stnt1d z0\.d, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_u64_7, svuint64_t, uint64_t, ++ svstnt1_u64 (p0, x0 + svcntd () * 7, z0), ++ svstnt1 (p0, x0 + svcntd () * 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** stnt1_u64_8: ++** incb x0, all, mul #8 ++** stnt1d z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_u64_8, svuint64_t, uint64_t, ++ svstnt1_u64 (p0, x0 + svcntd () * 8, z0), ++ svstnt1 (p0, x0 + svcntd () * 8, z0)) ++ ++/* ++** stnt1_u64_m1: ++** stnt1d z0\.d, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_u64_m1, svuint64_t, uint64_t, ++ svstnt1_u64 (p0, x0 - svcntd (), z0), ++ svstnt1 (p0, x0 - svcntd (), z0)) ++ ++/* ++** stnt1_u64_m8: ++** stnt1d z0\.d, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_u64_m8, svuint64_t, uint64_t, ++ svstnt1_u64 (p0, x0 - svcntd () * 8, z0), ++ svstnt1 (p0, x0 - svcntd () * 8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** stnt1_u64_m9: ++** decb x0, all, mul #9 ++** stnt1d z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_u64_m9, svuint64_t, uint64_t, ++ svstnt1_u64 (p0, x0 - svcntd () * 9, z0), ++ svstnt1 (p0, x0 - svcntd () * 9, z0)) ++ ++/* ++** stnt1_vnum_u64_0: ++** stnt1d z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_u64_0, svuint64_t, uint64_t, ++ svstnt1_vnum_u64 (p0, x0, 0, z0), ++ svstnt1_vnum (p0, x0, 0, z0)) ++ ++/* ++** stnt1_vnum_u64_1: ++** stnt1d z0\.d, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_u64_1, svuint64_t, uint64_t, ++ svstnt1_vnum_u64 (p0, x0, 1, z0), ++ svstnt1_vnum (p0, x0, 1, z0)) ++ ++/* ++** stnt1_vnum_u64_7: ++** stnt1d z0\.d, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_u64_7, svuint64_t, uint64_t, ++ svstnt1_vnum_u64 (p0, x0, 7, z0), ++ svstnt1_vnum (p0, x0, 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** stnt1_vnum_u64_8: ++** incb x0, all, mul #8 ++** stnt1d z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_u64_8, svuint64_t, uint64_t, ++ svstnt1_vnum_u64 (p0, x0, 8, z0), ++ svstnt1_vnum (p0, x0, 8, z0)) ++ ++/* ++** stnt1_vnum_u64_m1: ++** stnt1d z0\.d, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_u64_m1, svuint64_t, uint64_t, ++ svstnt1_vnum_u64 (p0, x0, -1, z0), ++ svstnt1_vnum (p0, x0, -1, z0)) ++ ++/* ++** stnt1_vnum_u64_m8: ++** stnt1d z0\.d, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_u64_m8, svuint64_t, uint64_t, ++ svstnt1_vnum_u64 (p0, x0, -8, z0), ++ svstnt1_vnum (p0, x0, -8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** stnt1_vnum_u64_m9: ++** decb x0, all, mul #9 ++** stnt1d z0\.d, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_u64_m9, svuint64_t, uint64_t, ++ svstnt1_vnum_u64 (p0, x0, -9, z0), ++ svstnt1_vnum (p0, x0, -9, z0)) ++ ++/* Using MUL to calculate an index would also be OK. */ ++/* ++** stnt1_vnum_u64_x1: ++** cntb (x[0-9]+) ++** madd (x[0-9]+), (x1, \1|\1, x1), x0 ++** stnt1d z0\.d, p0, \[\2\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_u64_x1, svuint64_t, uint64_t, ++ svstnt1_vnum_u64 (p0, x0, x1, z0), ++ svstnt1_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_u8.c +new file mode 100644 +index 000000000..11c68f555 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_u8.c +@@ -0,0 +1,162 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** stnt1_u8_base: ++** stnt1b z0\.b, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_u8_base, svuint8_t, uint8_t, ++ svstnt1_u8 (p0, x0, z0), ++ svstnt1 (p0, x0, z0)) ++ ++/* ++** stnt1_u8_index: ++** stnt1b z0\.b, p0, \[x0, x1\] ++** ret ++*/ ++TEST_STORE (stnt1_u8_index, svuint8_t, uint8_t, ++ svstnt1_u8 (p0, x0 + x1, z0), ++ svstnt1 (p0, x0 + x1, z0)) ++ ++/* ++** stnt1_u8_1: ++** stnt1b z0\.b, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_u8_1, svuint8_t, uint8_t, ++ svstnt1_u8 (p0, x0 + svcntb (), z0), ++ svstnt1 (p0, x0 + svcntb (), z0)) ++ ++/* ++** stnt1_u8_7: ++** stnt1b z0\.b, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_u8_7, svuint8_t, uint8_t, ++ svstnt1_u8 (p0, x0 + svcntb () * 7, z0), ++ svstnt1 (p0, x0 + svcntb () * 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** stnt1_u8_8: ++** incb x0, all, mul #8 ++** stnt1b z0\.b, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_u8_8, svuint8_t, uint8_t, ++ svstnt1_u8 (p0, x0 + svcntb () * 8, z0), ++ svstnt1 (p0, x0 + svcntb () * 8, z0)) ++ ++/* ++** stnt1_u8_m1: ++** stnt1b z0\.b, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_u8_m1, svuint8_t, uint8_t, ++ svstnt1_u8 (p0, x0 - svcntb (), z0), ++ svstnt1 (p0, x0 - svcntb (), z0)) ++ ++/* ++** stnt1_u8_m8: ++** stnt1b z0\.b, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_u8_m8, svuint8_t, uint8_t, ++ svstnt1_u8 (p0, x0 - svcntb () * 8, z0), ++ svstnt1 (p0, x0 - svcntb () * 8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** stnt1_u8_m9: ++** decb x0, all, mul #9 ++** stnt1b z0\.b, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_u8_m9, svuint8_t, uint8_t, ++ svstnt1_u8 (p0, x0 - svcntb () * 9, z0), ++ svstnt1 (p0, x0 - svcntb () * 9, z0)) ++ ++/* ++** stnt1_vnum_u8_0: ++** stnt1b z0\.b, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_u8_0, svuint8_t, uint8_t, ++ svstnt1_vnum_u8 (p0, x0, 0, z0), ++ svstnt1_vnum (p0, x0, 0, z0)) ++ ++/* ++** stnt1_vnum_u8_1: ++** stnt1b z0\.b, p0, \[x0, #1, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_u8_1, svuint8_t, uint8_t, ++ svstnt1_vnum_u8 (p0, x0, 1, z0), ++ svstnt1_vnum (p0, x0, 1, z0)) ++ ++/* ++** stnt1_vnum_u8_7: ++** stnt1b z0\.b, p0, \[x0, #7, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_u8_7, svuint8_t, uint8_t, ++ svstnt1_vnum_u8 (p0, x0, 7, z0), ++ svstnt1_vnum (p0, x0, 7, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** stnt1_vnum_u8_8: ++** incb x0, all, mul #8 ++** stnt1b z0\.b, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_u8_8, svuint8_t, uint8_t, ++ svstnt1_vnum_u8 (p0, x0, 8, z0), ++ svstnt1_vnum (p0, x0, 8, z0)) ++ ++/* ++** stnt1_vnum_u8_m1: ++** stnt1b z0\.b, p0, \[x0, #-1, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_u8_m1, svuint8_t, uint8_t, ++ svstnt1_vnum_u8 (p0, x0, -1, z0), ++ svstnt1_vnum (p0, x0, -1, z0)) ++ ++/* ++** stnt1_vnum_u8_m8: ++** stnt1b z0\.b, p0, \[x0, #-8, mul vl\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_u8_m8, svuint8_t, uint8_t, ++ svstnt1_vnum_u8 (p0, x0, -8, z0), ++ svstnt1_vnum (p0, x0, -8, z0)) ++ ++/* Moving the constant into a register would also be OK. */ ++/* ++** stnt1_vnum_u8_m9: ++** decb x0, all, mul #9 ++** stnt1b z0\.b, p0, \[x0\] ++** ret ++*/ ++TEST_STORE (stnt1_vnum_u8_m9, svuint8_t, uint8_t, ++ svstnt1_vnum_u8 (p0, x0, -9, z0), ++ svstnt1_vnum (p0, x0, -9, z0)) ++ ++/* ++** stnt1_vnum_u8_x1: ++** cntb (x[0-9]+) ++** ( ++** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 ++** stnt1b z0\.b, p0, \[\2\] ++** | ++** mul (x[0-9]+), (?:x1, \1|\1, x1) ++** stnt1b z0\.b, p0, \[x0, \3\] ++** ) ++** ret ++*/ ++TEST_STORE (stnt1_vnum_u8_x1, svuint8_t, uint8_t, ++ svstnt1_vnum_u8 (p0, x0, x1, z0), ++ svstnt1_vnum (p0, x0, x1, z0)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_f16.c +new file mode 100644 +index 000000000..bf4a0ab1e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_f16.c +@@ -0,0 +1,577 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** sub_f16_m_tied1: ++** fsub z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f16_m_tied1, svfloat16_t, ++ z0 = svsub_f16_m (p0, z0, z1), ++ z0 = svsub_m (p0, z0, z1)) ++ ++/* ++** sub_f16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fsub z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f16_m_tied2, svfloat16_t, ++ z0 = svsub_f16_m (p0, z1, z0), ++ z0 = svsub_m (p0, z1, z0)) ++ ++/* ++** sub_f16_m_untied: ++** movprfx z0, z1 ++** fsub z0\.h, p0/m, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f16_m_untied, svfloat16_t, ++ z0 = svsub_f16_m (p0, z1, z2), ++ z0 = svsub_m (p0, z1, z2)) ++ ++/* ++** sub_h4_f16_m_tied1: ++** mov (z[0-9]+\.h), h4 ++** fsub z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (sub_h4_f16_m_tied1, svfloat16_t, __fp16, ++ z0 = svsub_n_f16_m (p0, z0, d4), ++ z0 = svsub_m (p0, z0, d4)) ++ ++/* ++** sub_h4_f16_m_untied: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0, z1 ++** fsub z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (sub_h4_f16_m_untied, svfloat16_t, __fp16, ++ z0 = svsub_n_f16_m (p0, z1, d4), ++ z0 = svsub_m (p0, z1, d4)) ++ ++/* ++** sub_1_f16_m_tied1: ++** fsub z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_f16_m_tied1, svfloat16_t, ++ z0 = svsub_n_f16_m (p0, z0, 1), ++ z0 = svsub_m (p0, z0, 1)) ++ ++/* ++** sub_1_f16_m_untied: ++** movprfx z0, z1 ++** fsub z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_f16_m_untied, svfloat16_t, ++ z0 = svsub_n_f16_m (p0, z1, 1), ++ z0 = svsub_m (p0, z1, 1)) ++ ++/* ++** sub_0p5_f16_m_tied1: ++** fsub z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_0p5_f16_m_tied1, svfloat16_t, ++ z0 = svsub_n_f16_m (p0, z0, 0.5), ++ z0 = svsub_m (p0, z0, 0.5)) ++ ++/* ++** sub_0p5_f16_m_untied: ++** movprfx z0, z1 ++** fsub z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_0p5_f16_m_untied, svfloat16_t, ++ z0 = svsub_n_f16_m (p0, z1, 0.5), ++ z0 = svsub_m (p0, z1, 0.5)) ++ ++/* ++** sub_m1_f16_m_tied1: ++** fadd z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m1_f16_m_tied1, svfloat16_t, ++ z0 = svsub_n_f16_m (p0, z0, -1), ++ z0 = svsub_m (p0, z0, -1)) ++ ++/* ++** sub_m1_f16_m_untied: ++** movprfx z0, z1 ++** fadd z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m1_f16_m_untied, svfloat16_t, ++ z0 = svsub_n_f16_m (p0, z1, -1), ++ z0 = svsub_m (p0, z1, -1)) ++ ++/* ++** sub_m0p5_f16_m_tied1: ++** fadd z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m0p5_f16_m_tied1, svfloat16_t, ++ z0 = svsub_n_f16_m (p0, z0, -0.5), ++ z0 = svsub_m (p0, z0, -0.5)) ++ ++/* ++** sub_m0p5_f16_m_untied: ++** movprfx z0, z1 ++** fadd z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m0p5_f16_m_untied, svfloat16_t, ++ z0 = svsub_n_f16_m (p0, z1, -0.5), ++ z0 = svsub_m (p0, z1, -0.5)) ++ ++/* ++** sub_m2_f16_m: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** fadd z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m2_f16_m, svfloat16_t, ++ z0 = svsub_n_f16_m (p0, z0, -2), ++ z0 = svsub_m (p0, z0, -2)) ++ ++/* ++** sub_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fsub z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f16_z_tied1, svfloat16_t, ++ z0 = svsub_f16_z (p0, z0, z1), ++ z0 = svsub_z (p0, z0, z1)) ++ ++/* ++** sub_f16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** fsubr z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f16_z_tied2, svfloat16_t, ++ z0 = svsub_f16_z (p0, z1, z0), ++ z0 = svsub_z (p0, z1, z0)) ++ ++/* ++** sub_f16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fsub z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** fsubr z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f16_z_untied, svfloat16_t, ++ z0 = svsub_f16_z (p0, z1, z2), ++ z0 = svsub_z (p0, z1, z2)) ++ ++/* ++** sub_h4_f16_z_tied1: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0\.h, p0/z, z0\.h ++** fsub z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (sub_h4_f16_z_tied1, svfloat16_t, __fp16, ++ z0 = svsub_n_f16_z (p0, z0, d4), ++ z0 = svsub_z (p0, z0, d4)) ++ ++/* ++** sub_h4_f16_z_untied: ++** mov (z[0-9]+\.h), h4 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fsub z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** fsubr z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (sub_h4_f16_z_untied, svfloat16_t, __fp16, ++ z0 = svsub_n_f16_z (p0, z1, d4), ++ z0 = svsub_z (p0, z1, d4)) ++ ++/* ++** sub_1_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fsub z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_f16_z_tied1, svfloat16_t, ++ z0 = svsub_n_f16_z (p0, z0, 1), ++ z0 = svsub_z (p0, z0, 1)) ++ ++/* ++** sub_1_f16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** fsub z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_f16_z_untied, svfloat16_t, ++ z0 = svsub_n_f16_z (p0, z1, 1), ++ z0 = svsub_z (p0, z1, 1)) ++ ++/* ++** sub_0p5_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fsub z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_0p5_f16_z_tied1, svfloat16_t, ++ z0 = svsub_n_f16_z (p0, z0, 0.5), ++ z0 = svsub_z (p0, z0, 0.5)) ++ ++/* ++** sub_0p5_f16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** fsub z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_0p5_f16_z_untied, svfloat16_t, ++ z0 = svsub_n_f16_z (p0, z1, 0.5), ++ z0 = svsub_z (p0, z1, 0.5)) ++ ++/* ++** sub_m1_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fadd z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m1_f16_z_tied1, svfloat16_t, ++ z0 = svsub_n_f16_z (p0, z0, -1), ++ z0 = svsub_z (p0, z0, -1)) ++ ++/* ++** sub_m1_f16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** fadd z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m1_f16_z_untied, svfloat16_t, ++ z0 = svsub_n_f16_z (p0, z1, -1), ++ z0 = svsub_z (p0, z1, -1)) ++ ++/* ++** sub_m0p5_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fadd z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m0p5_f16_z_tied1, svfloat16_t, ++ z0 = svsub_n_f16_z (p0, z0, -0.5), ++ z0 = svsub_z (p0, z0, -0.5)) ++ ++/* ++** sub_m0p5_f16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** fadd z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m0p5_f16_z_untied, svfloat16_t, ++ z0 = svsub_n_f16_z (p0, z1, -0.5), ++ z0 = svsub_z (p0, z1, -0.5)) ++ ++/* ++** sub_m2_f16_z: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** movprfx z0\.h, p0/z, z0\.h ++** fadd z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m2_f16_z, svfloat16_t, ++ z0 = svsub_n_f16_z (p0, z0, -2), ++ z0 = svsub_z (p0, z0, -2)) ++ ++/* ++** sub_f16_x_tied1: ++** fsub z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f16_x_tied1, svfloat16_t, ++ z0 = svsub_f16_x (p0, z0, z1), ++ z0 = svsub_x (p0, z0, z1)) ++ ++/* ++** sub_f16_x_tied2: ++** fsubr z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f16_x_tied2, svfloat16_t, ++ z0 = svsub_f16_x (p0, z1, z0), ++ z0 = svsub_x (p0, z1, z0)) ++ ++/* ++** sub_f16_x_untied: ++** ( ++** movprfx z0, z1 ++** fsub z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0, z2 ++** fsubr z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f16_x_untied, svfloat16_t, ++ z0 = svsub_f16_x (p0, z1, z2), ++ z0 = svsub_x (p0, z1, z2)) ++ ++/* ++** sub_h4_f16_x_tied1: ++** mov (z[0-9]+\.h), h4 ++** fsub z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (sub_h4_f16_x_tied1, svfloat16_t, __fp16, ++ z0 = svsub_n_f16_x (p0, z0, d4), ++ z0 = svsub_x (p0, z0, d4)) ++ ++/* ++** sub_h4_f16_x_untied: { xfail *-*-* } ++** mov z0\.h, h4 ++** fsubr z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZD (sub_h4_f16_x_untied, svfloat16_t, __fp16, ++ z0 = svsub_n_f16_x (p0, z1, d4), ++ z0 = svsub_x (p0, z1, d4)) ++ ++/* ++** sub_1_f16_x_tied1: ++** fsub z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_f16_x_tied1, svfloat16_t, ++ z0 = svsub_n_f16_x (p0, z0, 1), ++ z0 = svsub_x (p0, z0, 1)) ++ ++/* ++** sub_1_f16_x_untied: ++** movprfx z0, z1 ++** fsub z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_f16_x_untied, svfloat16_t, ++ z0 = svsub_n_f16_x (p0, z1, 1), ++ z0 = svsub_x (p0, z1, 1)) ++ ++/* ++** sub_0p5_f16_x_tied1: ++** fsub z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_0p5_f16_x_tied1, svfloat16_t, ++ z0 = svsub_n_f16_x (p0, z0, 0.5), ++ z0 = svsub_x (p0, z0, 0.5)) ++ ++/* ++** sub_0p5_f16_x_untied: ++** movprfx z0, z1 ++** fsub z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_0p5_f16_x_untied, svfloat16_t, ++ z0 = svsub_n_f16_x (p0, z1, 0.5), ++ z0 = svsub_x (p0, z1, 0.5)) ++ ++/* ++** sub_m1_f16_x_tied1: ++** fadd z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m1_f16_x_tied1, svfloat16_t, ++ z0 = svsub_n_f16_x (p0, z0, -1), ++ z0 = svsub_x (p0, z0, -1)) ++ ++/* ++** sub_m1_f16_x_untied: ++** movprfx z0, z1 ++** fadd z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m1_f16_x_untied, svfloat16_t, ++ z0 = svsub_n_f16_x (p0, z1, -1), ++ z0 = svsub_x (p0, z1, -1)) ++ ++/* ++** sub_m0p5_f16_x_tied1: ++** fadd z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m0p5_f16_x_tied1, svfloat16_t, ++ z0 = svsub_n_f16_x (p0, z0, -0.5), ++ z0 = svsub_x (p0, z0, -0.5)) ++ ++/* ++** sub_m0p5_f16_x_untied: ++** movprfx z0, z1 ++** fadd z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m0p5_f16_x_untied, svfloat16_t, ++ z0 = svsub_n_f16_x (p0, z1, -0.5), ++ z0 = svsub_x (p0, z1, -0.5)) ++ ++/* ++** sub_2_f16_x_tied1: ++** fmov (z[0-9]+\.h), #-2\.0(?:e\+0)? ++** fadd z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_2_f16_x_tied1, svfloat16_t, ++ z0 = svsub_n_f16_x (p0, z0, 2), ++ z0 = svsub_x (p0, z0, 2)) ++ ++/* ++** sub_2_f16_x_untied: ++** fmov z0\.h, #-2\.0(?:e\+0)? ++** fadd z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sub_2_f16_x_untied, svfloat16_t, ++ z0 = svsub_n_f16_x (p0, z1, 2), ++ z0 = svsub_x (p0, z1, 2)) ++ ++/* ++** ptrue_sub_f16_x_tied1: ++** fsub z0\.h, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_f16_x_tied1, svfloat16_t, ++ z0 = svsub_f16_x (svptrue_b16 (), z0, z1), ++ z0 = svsub_x (svptrue_b16 (), z0, z1)) ++ ++/* ++** ptrue_sub_f16_x_tied2: ++** fsub z0\.h, z1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_f16_x_tied2, svfloat16_t, ++ z0 = svsub_f16_x (svptrue_b16 (), z1, z0), ++ z0 = svsub_x (svptrue_b16 (), z1, z0)) ++ ++/* ++** ptrue_sub_f16_x_untied: ++** fsub z0\.h, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_f16_x_untied, svfloat16_t, ++ z0 = svsub_f16_x (svptrue_b16 (), z1, z2), ++ z0 = svsub_x (svptrue_b16 (), z1, z2)) ++ ++/* ++** ptrue_sub_1_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_1_f16_x_tied1, svfloat16_t, ++ z0 = svsub_n_f16_x (svptrue_b16 (), z0, 1), ++ z0 = svsub_x (svptrue_b16 (), z0, 1)) ++ ++/* ++** ptrue_sub_1_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_1_f16_x_untied, svfloat16_t, ++ z0 = svsub_n_f16_x (svptrue_b16 (), z1, 1), ++ z0 = svsub_x (svptrue_b16 (), z1, 1)) ++ ++/* ++** ptrue_sub_0p5_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_0p5_f16_x_tied1, svfloat16_t, ++ z0 = svsub_n_f16_x (svptrue_b16 (), z0, 0.5), ++ z0 = svsub_x (svptrue_b16 (), z0, 0.5)) ++ ++/* ++** ptrue_sub_0p5_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_0p5_f16_x_untied, svfloat16_t, ++ z0 = svsub_n_f16_x (svptrue_b16 (), z1, 0.5), ++ z0 = svsub_x (svptrue_b16 (), z1, 0.5)) ++ ++/* ++** ptrue_sub_m1_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_m1_f16_x_tied1, svfloat16_t, ++ z0 = svsub_n_f16_x (svptrue_b16 (), z0, -1), ++ z0 = svsub_x (svptrue_b16 (), z0, -1)) ++ ++/* ++** ptrue_sub_m1_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_m1_f16_x_untied, svfloat16_t, ++ z0 = svsub_n_f16_x (svptrue_b16 (), z1, -1), ++ z0 = svsub_x (svptrue_b16 (), z1, -1)) ++ ++/* ++** ptrue_sub_m0p5_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_m0p5_f16_x_tied1, svfloat16_t, ++ z0 = svsub_n_f16_x (svptrue_b16 (), z0, -0.5), ++ z0 = svsub_x (svptrue_b16 (), z0, -0.5)) ++ ++/* ++** ptrue_sub_m0p5_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_m0p5_f16_x_untied, svfloat16_t, ++ z0 = svsub_n_f16_x (svptrue_b16 (), z1, -0.5), ++ z0 = svsub_x (svptrue_b16 (), z1, -0.5)) ++ ++/* ++** ptrue_sub_2_f16_x_tied1: ++** fmov (z[0-9]+\.h), #-2\.0(?:e\+0)? ++** fadd z0\.h, (z0\.h, \1|\1, z0\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_2_f16_x_tied1, svfloat16_t, ++ z0 = svsub_n_f16_x (svptrue_b16 (), z0, 2), ++ z0 = svsub_x (svptrue_b16 (), z0, 2)) ++ ++/* ++** ptrue_sub_2_f16_x_untied: ++** fmov (z[0-9]+\.h), #-2\.0(?:e\+0)? ++** fadd z0\.h, (z1\.h, \1|\1, z1\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_2_f16_x_untied, svfloat16_t, ++ z0 = svsub_n_f16_x (svptrue_b16 (), z1, 2), ++ z0 = svsub_x (svptrue_b16 (), z1, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_f16_notrap.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_f16_notrap.c +new file mode 100644 +index 000000000..e45098944 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_f16_notrap.c +@@ -0,0 +1,572 @@ ++/* { dg-additional-options "-fno-trapping-math" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** sub_f16_m_tied1: ++** fsub z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f16_m_tied1, svfloat16_t, ++ z0 = svsub_f16_m (p0, z0, z1), ++ z0 = svsub_m (p0, z0, z1)) ++ ++/* ++** sub_f16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fsub z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f16_m_tied2, svfloat16_t, ++ z0 = svsub_f16_m (p0, z1, z0), ++ z0 = svsub_m (p0, z1, z0)) ++ ++/* ++** sub_f16_m_untied: ++** movprfx z0, z1 ++** fsub z0\.h, p0/m, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f16_m_untied, svfloat16_t, ++ z0 = svsub_f16_m (p0, z1, z2), ++ z0 = svsub_m (p0, z1, z2)) ++ ++/* ++** sub_h4_f16_m_tied1: ++** mov (z[0-9]+\.h), h4 ++** fsub z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (sub_h4_f16_m_tied1, svfloat16_t, __fp16, ++ z0 = svsub_n_f16_m (p0, z0, d4), ++ z0 = svsub_m (p0, z0, d4)) ++ ++/* ++** sub_h4_f16_m_untied: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0, z1 ++** fsub z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (sub_h4_f16_m_untied, svfloat16_t, __fp16, ++ z0 = svsub_n_f16_m (p0, z1, d4), ++ z0 = svsub_m (p0, z1, d4)) ++ ++/* ++** sub_1_f16_m_tied1: ++** fsub z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_f16_m_tied1, svfloat16_t, ++ z0 = svsub_n_f16_m (p0, z0, 1), ++ z0 = svsub_m (p0, z0, 1)) ++ ++/* ++** sub_1_f16_m_untied: ++** movprfx z0, z1 ++** fsub z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_f16_m_untied, svfloat16_t, ++ z0 = svsub_n_f16_m (p0, z1, 1), ++ z0 = svsub_m (p0, z1, 1)) ++ ++/* ++** sub_0p5_f16_m_tied1: ++** fsub z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_0p5_f16_m_tied1, svfloat16_t, ++ z0 = svsub_n_f16_m (p0, z0, 0.5), ++ z0 = svsub_m (p0, z0, 0.5)) ++ ++/* ++** sub_0p5_f16_m_untied: ++** movprfx z0, z1 ++** fsub z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_0p5_f16_m_untied, svfloat16_t, ++ z0 = svsub_n_f16_m (p0, z1, 0.5), ++ z0 = svsub_m (p0, z1, 0.5)) ++ ++/* ++** sub_m1_f16_m_tied1: ++** fadd z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m1_f16_m_tied1, svfloat16_t, ++ z0 = svsub_n_f16_m (p0, z0, -1), ++ z0 = svsub_m (p0, z0, -1)) ++ ++/* ++** sub_m1_f16_m_untied: ++** movprfx z0, z1 ++** fadd z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m1_f16_m_untied, svfloat16_t, ++ z0 = svsub_n_f16_m (p0, z1, -1), ++ z0 = svsub_m (p0, z1, -1)) ++ ++/* ++** sub_m0p5_f16_m_tied1: ++** fadd z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m0p5_f16_m_tied1, svfloat16_t, ++ z0 = svsub_n_f16_m (p0, z0, -0.5), ++ z0 = svsub_m (p0, z0, -0.5)) ++ ++/* ++** sub_m0p5_f16_m_untied: ++** movprfx z0, z1 ++** fadd z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m0p5_f16_m_untied, svfloat16_t, ++ z0 = svsub_n_f16_m (p0, z1, -0.5), ++ z0 = svsub_m (p0, z1, -0.5)) ++ ++/* ++** sub_m2_f16_m: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** fadd z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m2_f16_m, svfloat16_t, ++ z0 = svsub_n_f16_m (p0, z0, -2), ++ z0 = svsub_m (p0, z0, -2)) ++ ++/* ++** sub_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fsub z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f16_z_tied1, svfloat16_t, ++ z0 = svsub_f16_z (p0, z0, z1), ++ z0 = svsub_z (p0, z0, z1)) ++ ++/* ++** sub_f16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** fsubr z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f16_z_tied2, svfloat16_t, ++ z0 = svsub_f16_z (p0, z1, z0), ++ z0 = svsub_z (p0, z1, z0)) ++ ++/* ++** sub_f16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fsub z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** fsubr z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f16_z_untied, svfloat16_t, ++ z0 = svsub_f16_z (p0, z1, z2), ++ z0 = svsub_z (p0, z1, z2)) ++ ++/* ++** sub_h4_f16_z_tied1: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0\.h, p0/z, z0\.h ++** fsub z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (sub_h4_f16_z_tied1, svfloat16_t, __fp16, ++ z0 = svsub_n_f16_z (p0, z0, d4), ++ z0 = svsub_z (p0, z0, d4)) ++ ++/* ++** sub_h4_f16_z_untied: ++** mov (z[0-9]+\.h), h4 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fsub z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** fsubr z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (sub_h4_f16_z_untied, svfloat16_t, __fp16, ++ z0 = svsub_n_f16_z (p0, z1, d4), ++ z0 = svsub_z (p0, z1, d4)) ++ ++/* ++** sub_1_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fsub z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_f16_z_tied1, svfloat16_t, ++ z0 = svsub_n_f16_z (p0, z0, 1), ++ z0 = svsub_z (p0, z0, 1)) ++ ++/* ++** sub_1_f16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** fsub z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_f16_z_untied, svfloat16_t, ++ z0 = svsub_n_f16_z (p0, z1, 1), ++ z0 = svsub_z (p0, z1, 1)) ++ ++/* ++** sub_0p5_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fsub z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_0p5_f16_z_tied1, svfloat16_t, ++ z0 = svsub_n_f16_z (p0, z0, 0.5), ++ z0 = svsub_z (p0, z0, 0.5)) ++ ++/* ++** sub_0p5_f16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** fsub z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_0p5_f16_z_untied, svfloat16_t, ++ z0 = svsub_n_f16_z (p0, z1, 0.5), ++ z0 = svsub_z (p0, z1, 0.5)) ++ ++/* ++** sub_m1_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fadd z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m1_f16_z_tied1, svfloat16_t, ++ z0 = svsub_n_f16_z (p0, z0, -1), ++ z0 = svsub_z (p0, z0, -1)) ++ ++/* ++** sub_m1_f16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** fadd z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m1_f16_z_untied, svfloat16_t, ++ z0 = svsub_n_f16_z (p0, z1, -1), ++ z0 = svsub_z (p0, z1, -1)) ++ ++/* ++** sub_m0p5_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fadd z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m0p5_f16_z_tied1, svfloat16_t, ++ z0 = svsub_n_f16_z (p0, z0, -0.5), ++ z0 = svsub_z (p0, z0, -0.5)) ++ ++/* ++** sub_m0p5_f16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** fadd z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m0p5_f16_z_untied, svfloat16_t, ++ z0 = svsub_n_f16_z (p0, z1, -0.5), ++ z0 = svsub_z (p0, z1, -0.5)) ++ ++/* ++** sub_m2_f16_z: ++** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? ++** movprfx z0\.h, p0/z, z0\.h ++** fadd z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m2_f16_z, svfloat16_t, ++ z0 = svsub_n_f16_z (p0, z0, -2), ++ z0 = svsub_z (p0, z0, -2)) ++ ++/* ++** sub_f16_x_tied1: ++** fsub z0\.h, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f16_x_tied1, svfloat16_t, ++ z0 = svsub_f16_x (p0, z0, z1), ++ z0 = svsub_x (p0, z0, z1)) ++ ++/* ++** sub_f16_x_tied2: ++** fsub z0\.h, z1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f16_x_tied2, svfloat16_t, ++ z0 = svsub_f16_x (p0, z1, z0), ++ z0 = svsub_x (p0, z1, z0)) ++ ++/* ++** sub_f16_x_untied: ++** fsub z0\.h, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f16_x_untied, svfloat16_t, ++ z0 = svsub_f16_x (p0, z1, z2), ++ z0 = svsub_x (p0, z1, z2)) ++ ++/* ++** sub_h4_f16_x_tied1: ++** mov (z[0-9]+\.h), h4 ++** fsub z0\.h, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (sub_h4_f16_x_tied1, svfloat16_t, __fp16, ++ z0 = svsub_n_f16_x (p0, z0, d4), ++ z0 = svsub_x (p0, z0, d4)) ++ ++/* ++** sub_h4_f16_x_untied: ++** mov (z[0-9]+\.h), h4 ++** fsub z0\.h, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (sub_h4_f16_x_untied, svfloat16_t, __fp16, ++ z0 = svsub_n_f16_x (p0, z1, d4), ++ z0 = svsub_x (p0, z1, d4)) ++ ++/* ++** sub_1_f16_x_tied1: ++** fsub z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_f16_x_tied1, svfloat16_t, ++ z0 = svsub_n_f16_x (p0, z0, 1), ++ z0 = svsub_x (p0, z0, 1)) ++ ++/* ++** sub_1_f16_x_untied: ++** movprfx z0, z1 ++** fsub z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_f16_x_untied, svfloat16_t, ++ z0 = svsub_n_f16_x (p0, z1, 1), ++ z0 = svsub_x (p0, z1, 1)) ++ ++/* ++** sub_0p5_f16_x_tied1: ++** fsub z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_0p5_f16_x_tied1, svfloat16_t, ++ z0 = svsub_n_f16_x (p0, z0, 0.5), ++ z0 = svsub_x (p0, z0, 0.5)) ++ ++/* ++** sub_0p5_f16_x_untied: ++** movprfx z0, z1 ++** fsub z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_0p5_f16_x_untied, svfloat16_t, ++ z0 = svsub_n_f16_x (p0, z1, 0.5), ++ z0 = svsub_x (p0, z1, 0.5)) ++ ++/* ++** sub_m1_f16_x_tied1: ++** fadd z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m1_f16_x_tied1, svfloat16_t, ++ z0 = svsub_n_f16_x (p0, z0, -1), ++ z0 = svsub_x (p0, z0, -1)) ++ ++/* ++** sub_m1_f16_x_untied: ++** movprfx z0, z1 ++** fadd z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m1_f16_x_untied, svfloat16_t, ++ z0 = svsub_n_f16_x (p0, z1, -1), ++ z0 = svsub_x (p0, z1, -1)) ++ ++/* ++** sub_m0p5_f16_x_tied1: ++** fadd z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m0p5_f16_x_tied1, svfloat16_t, ++ z0 = svsub_n_f16_x (p0, z0, -0.5), ++ z0 = svsub_x (p0, z0, -0.5)) ++ ++/* ++** sub_m0p5_f16_x_untied: ++** movprfx z0, z1 ++** fadd z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m0p5_f16_x_untied, svfloat16_t, ++ z0 = svsub_n_f16_x (p0, z1, -0.5), ++ z0 = svsub_x (p0, z1, -0.5)) ++ ++/* ++** sub_2_f16_x_tied1: ++** fmov (z[0-9]+\.h), #-2\.0(?:e\+0)? ++** fadd z0\.h, (z0\.h, \1|\1, z0\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_2_f16_x_tied1, svfloat16_t, ++ z0 = svsub_n_f16_x (p0, z0, 2), ++ z0 = svsub_x (p0, z0, 2)) ++ ++/* ++** sub_2_f16_x_untied: ++** fmov (z[0-9]+\.h), #-2\.0(?:e\+0)? ++** fadd z0\.h, (z1\.h, \1|\1, z1\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_2_f16_x_untied, svfloat16_t, ++ z0 = svsub_n_f16_x (p0, z1, 2), ++ z0 = svsub_x (p0, z1, 2)) ++ ++/* ++** ptrue_sub_f16_x_tied1: ++** fsub z0\.h, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_f16_x_tied1, svfloat16_t, ++ z0 = svsub_f16_x (svptrue_b16 (), z0, z1), ++ z0 = svsub_x (svptrue_b16 (), z0, z1)) ++ ++/* ++** ptrue_sub_f16_x_tied2: ++** fsub z0\.h, z1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_f16_x_tied2, svfloat16_t, ++ z0 = svsub_f16_x (svptrue_b16 (), z1, z0), ++ z0 = svsub_x (svptrue_b16 (), z1, z0)) ++ ++/* ++** ptrue_sub_f16_x_untied: ++** fsub z0\.h, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_f16_x_untied, svfloat16_t, ++ z0 = svsub_f16_x (svptrue_b16 (), z1, z2), ++ z0 = svsub_x (svptrue_b16 (), z1, z2)) ++ ++/* ++** ptrue_sub_1_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_1_f16_x_tied1, svfloat16_t, ++ z0 = svsub_n_f16_x (svptrue_b16 (), z0, 1), ++ z0 = svsub_x (svptrue_b16 (), z0, 1)) ++ ++/* ++** ptrue_sub_1_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_1_f16_x_untied, svfloat16_t, ++ z0 = svsub_n_f16_x (svptrue_b16 (), z1, 1), ++ z0 = svsub_x (svptrue_b16 (), z1, 1)) ++ ++/* ++** ptrue_sub_0p5_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_0p5_f16_x_tied1, svfloat16_t, ++ z0 = svsub_n_f16_x (svptrue_b16 (), z0, 0.5), ++ z0 = svsub_x (svptrue_b16 (), z0, 0.5)) ++ ++/* ++** ptrue_sub_0p5_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_0p5_f16_x_untied, svfloat16_t, ++ z0 = svsub_n_f16_x (svptrue_b16 (), z1, 0.5), ++ z0 = svsub_x (svptrue_b16 (), z1, 0.5)) ++ ++/* ++** ptrue_sub_m1_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_m1_f16_x_tied1, svfloat16_t, ++ z0 = svsub_n_f16_x (svptrue_b16 (), z0, -1), ++ z0 = svsub_x (svptrue_b16 (), z0, -1)) ++ ++/* ++** ptrue_sub_m1_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_m1_f16_x_untied, svfloat16_t, ++ z0 = svsub_n_f16_x (svptrue_b16 (), z1, -1), ++ z0 = svsub_x (svptrue_b16 (), z1, -1)) ++ ++/* ++** ptrue_sub_m0p5_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_m0p5_f16_x_tied1, svfloat16_t, ++ z0 = svsub_n_f16_x (svptrue_b16 (), z0, -0.5), ++ z0 = svsub_x (svptrue_b16 (), z0, -0.5)) ++ ++/* ++** ptrue_sub_m0p5_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_m0p5_f16_x_untied, svfloat16_t, ++ z0 = svsub_n_f16_x (svptrue_b16 (), z1, -0.5), ++ z0 = svsub_x (svptrue_b16 (), z1, -0.5)) ++ ++/* ++** ptrue_sub_2_f16_x_tied1: ++** fmov (z[0-9]+\.h), #-2\.0(?:e\+0)? ++** fadd z0\.h, (z0\.h, \1|\1, z0\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_2_f16_x_tied1, svfloat16_t, ++ z0 = svsub_n_f16_x (svptrue_b16 (), z0, 2), ++ z0 = svsub_x (svptrue_b16 (), z0, 2)) ++ ++/* ++** ptrue_sub_2_f16_x_untied: ++** fmov (z[0-9]+\.h), #-2\.0(?:e\+0)? ++** fadd z0\.h, (z1\.h, \1|\1, z1\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_2_f16_x_untied, svfloat16_t, ++ z0 = svsub_n_f16_x (svptrue_b16 (), z1, 2), ++ z0 = svsub_x (svptrue_b16 (), z1, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_f32.c +new file mode 100644 +index 000000000..05be52bad +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_f32.c +@@ -0,0 +1,577 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** sub_f32_m_tied1: ++** fsub z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f32_m_tied1, svfloat32_t, ++ z0 = svsub_f32_m (p0, z0, z1), ++ z0 = svsub_m (p0, z0, z1)) ++ ++/* ++** sub_f32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fsub z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f32_m_tied2, svfloat32_t, ++ z0 = svsub_f32_m (p0, z1, z0), ++ z0 = svsub_m (p0, z1, z0)) ++ ++/* ++** sub_f32_m_untied: ++** movprfx z0, z1 ++** fsub z0\.s, p0/m, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f32_m_untied, svfloat32_t, ++ z0 = svsub_f32_m (p0, z1, z2), ++ z0 = svsub_m (p0, z1, z2)) ++ ++/* ++** sub_s4_f32_m_tied1: ++** mov (z[0-9]+\.s), s4 ++** fsub z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (sub_s4_f32_m_tied1, svfloat32_t, float, ++ z0 = svsub_n_f32_m (p0, z0, d4), ++ z0 = svsub_m (p0, z0, d4)) ++ ++/* ++** sub_s4_f32_m_untied: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0, z1 ++** fsub z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (sub_s4_f32_m_untied, svfloat32_t, float, ++ z0 = svsub_n_f32_m (p0, z1, d4), ++ z0 = svsub_m (p0, z1, d4)) ++ ++/* ++** sub_1_f32_m_tied1: ++** fsub z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_f32_m_tied1, svfloat32_t, ++ z0 = svsub_n_f32_m (p0, z0, 1), ++ z0 = svsub_m (p0, z0, 1)) ++ ++/* ++** sub_1_f32_m_untied: ++** movprfx z0, z1 ++** fsub z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_f32_m_untied, svfloat32_t, ++ z0 = svsub_n_f32_m (p0, z1, 1), ++ z0 = svsub_m (p0, z1, 1)) ++ ++/* ++** sub_0p5_f32_m_tied1: ++** fsub z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_0p5_f32_m_tied1, svfloat32_t, ++ z0 = svsub_n_f32_m (p0, z0, 0.5), ++ z0 = svsub_m (p0, z0, 0.5)) ++ ++/* ++** sub_0p5_f32_m_untied: ++** movprfx z0, z1 ++** fsub z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_0p5_f32_m_untied, svfloat32_t, ++ z0 = svsub_n_f32_m (p0, z1, 0.5), ++ z0 = svsub_m (p0, z1, 0.5)) ++ ++/* ++** sub_m1_f32_m_tied1: ++** fadd z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m1_f32_m_tied1, svfloat32_t, ++ z0 = svsub_n_f32_m (p0, z0, -1), ++ z0 = svsub_m (p0, z0, -1)) ++ ++/* ++** sub_m1_f32_m_untied: ++** movprfx z0, z1 ++** fadd z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m1_f32_m_untied, svfloat32_t, ++ z0 = svsub_n_f32_m (p0, z1, -1), ++ z0 = svsub_m (p0, z1, -1)) ++ ++/* ++** sub_m0p5_f32_m_tied1: ++** fadd z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m0p5_f32_m_tied1, svfloat32_t, ++ z0 = svsub_n_f32_m (p0, z0, -0.5), ++ z0 = svsub_m (p0, z0, -0.5)) ++ ++/* ++** sub_m0p5_f32_m_untied: ++** movprfx z0, z1 ++** fadd z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m0p5_f32_m_untied, svfloat32_t, ++ z0 = svsub_n_f32_m (p0, z1, -0.5), ++ z0 = svsub_m (p0, z1, -0.5)) ++ ++/* ++** sub_m2_f32_m: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** fadd z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m2_f32_m, svfloat32_t, ++ z0 = svsub_n_f32_m (p0, z0, -2), ++ z0 = svsub_m (p0, z0, -2)) ++ ++/* ++** sub_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fsub z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f32_z_tied1, svfloat32_t, ++ z0 = svsub_f32_z (p0, z0, z1), ++ z0 = svsub_z (p0, z0, z1)) ++ ++/* ++** sub_f32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** fsubr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f32_z_tied2, svfloat32_t, ++ z0 = svsub_f32_z (p0, z1, z0), ++ z0 = svsub_z (p0, z1, z0)) ++ ++/* ++** sub_f32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fsub z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** fsubr z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f32_z_untied, svfloat32_t, ++ z0 = svsub_f32_z (p0, z1, z2), ++ z0 = svsub_z (p0, z1, z2)) ++ ++/* ++** sub_s4_f32_z_tied1: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0\.s, p0/z, z0\.s ++** fsub z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (sub_s4_f32_z_tied1, svfloat32_t, float, ++ z0 = svsub_n_f32_z (p0, z0, d4), ++ z0 = svsub_z (p0, z0, d4)) ++ ++/* ++** sub_s4_f32_z_untied: ++** mov (z[0-9]+\.s), s4 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fsub z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** fsubr z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (sub_s4_f32_z_untied, svfloat32_t, float, ++ z0 = svsub_n_f32_z (p0, z1, d4), ++ z0 = svsub_z (p0, z1, d4)) ++ ++/* ++** sub_1_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fsub z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_f32_z_tied1, svfloat32_t, ++ z0 = svsub_n_f32_z (p0, z0, 1), ++ z0 = svsub_z (p0, z0, 1)) ++ ++/* ++** sub_1_f32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** fsub z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_f32_z_untied, svfloat32_t, ++ z0 = svsub_n_f32_z (p0, z1, 1), ++ z0 = svsub_z (p0, z1, 1)) ++ ++/* ++** sub_0p5_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fsub z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_0p5_f32_z_tied1, svfloat32_t, ++ z0 = svsub_n_f32_z (p0, z0, 0.5), ++ z0 = svsub_z (p0, z0, 0.5)) ++ ++/* ++** sub_0p5_f32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** fsub z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_0p5_f32_z_untied, svfloat32_t, ++ z0 = svsub_n_f32_z (p0, z1, 0.5), ++ z0 = svsub_z (p0, z1, 0.5)) ++ ++/* ++** sub_m1_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fadd z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m1_f32_z_tied1, svfloat32_t, ++ z0 = svsub_n_f32_z (p0, z0, -1), ++ z0 = svsub_z (p0, z0, -1)) ++ ++/* ++** sub_m1_f32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** fadd z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m1_f32_z_untied, svfloat32_t, ++ z0 = svsub_n_f32_z (p0, z1, -1), ++ z0 = svsub_z (p0, z1, -1)) ++ ++/* ++** sub_m0p5_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fadd z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m0p5_f32_z_tied1, svfloat32_t, ++ z0 = svsub_n_f32_z (p0, z0, -0.5), ++ z0 = svsub_z (p0, z0, -0.5)) ++ ++/* ++** sub_m0p5_f32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** fadd z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m0p5_f32_z_untied, svfloat32_t, ++ z0 = svsub_n_f32_z (p0, z1, -0.5), ++ z0 = svsub_z (p0, z1, -0.5)) ++ ++/* ++** sub_m2_f32_z: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** movprfx z0\.s, p0/z, z0\.s ++** fadd z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m2_f32_z, svfloat32_t, ++ z0 = svsub_n_f32_z (p0, z0, -2), ++ z0 = svsub_z (p0, z0, -2)) ++ ++/* ++** sub_f32_x_tied1: ++** fsub z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f32_x_tied1, svfloat32_t, ++ z0 = svsub_f32_x (p0, z0, z1), ++ z0 = svsub_x (p0, z0, z1)) ++ ++/* ++** sub_f32_x_tied2: ++** fsubr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f32_x_tied2, svfloat32_t, ++ z0 = svsub_f32_x (p0, z1, z0), ++ z0 = svsub_x (p0, z1, z0)) ++ ++/* ++** sub_f32_x_untied: ++** ( ++** movprfx z0, z1 ++** fsub z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0, z2 ++** fsubr z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f32_x_untied, svfloat32_t, ++ z0 = svsub_f32_x (p0, z1, z2), ++ z0 = svsub_x (p0, z1, z2)) ++ ++/* ++** sub_s4_f32_x_tied1: ++** mov (z[0-9]+\.s), s4 ++** fsub z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (sub_s4_f32_x_tied1, svfloat32_t, float, ++ z0 = svsub_n_f32_x (p0, z0, d4), ++ z0 = svsub_x (p0, z0, d4)) ++ ++/* ++** sub_s4_f32_x_untied: { xfail *-*-* } ++** mov z0\.s, s4 ++** fsubr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZD (sub_s4_f32_x_untied, svfloat32_t, float, ++ z0 = svsub_n_f32_x (p0, z1, d4), ++ z0 = svsub_x (p0, z1, d4)) ++ ++/* ++** sub_1_f32_x_tied1: ++** fsub z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_f32_x_tied1, svfloat32_t, ++ z0 = svsub_n_f32_x (p0, z0, 1), ++ z0 = svsub_x (p0, z0, 1)) ++ ++/* ++** sub_1_f32_x_untied: ++** movprfx z0, z1 ++** fsub z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_f32_x_untied, svfloat32_t, ++ z0 = svsub_n_f32_x (p0, z1, 1), ++ z0 = svsub_x (p0, z1, 1)) ++ ++/* ++** sub_0p5_f32_x_tied1: ++** fsub z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_0p5_f32_x_tied1, svfloat32_t, ++ z0 = svsub_n_f32_x (p0, z0, 0.5), ++ z0 = svsub_x (p0, z0, 0.5)) ++ ++/* ++** sub_0p5_f32_x_untied: ++** movprfx z0, z1 ++** fsub z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_0p5_f32_x_untied, svfloat32_t, ++ z0 = svsub_n_f32_x (p0, z1, 0.5), ++ z0 = svsub_x (p0, z1, 0.5)) ++ ++/* ++** sub_m1_f32_x_tied1: ++** fadd z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m1_f32_x_tied1, svfloat32_t, ++ z0 = svsub_n_f32_x (p0, z0, -1), ++ z0 = svsub_x (p0, z0, -1)) ++ ++/* ++** sub_m1_f32_x_untied: ++** movprfx z0, z1 ++** fadd z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m1_f32_x_untied, svfloat32_t, ++ z0 = svsub_n_f32_x (p0, z1, -1), ++ z0 = svsub_x (p0, z1, -1)) ++ ++/* ++** sub_m0p5_f32_x_tied1: ++** fadd z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m0p5_f32_x_tied1, svfloat32_t, ++ z0 = svsub_n_f32_x (p0, z0, -0.5), ++ z0 = svsub_x (p0, z0, -0.5)) ++ ++/* ++** sub_m0p5_f32_x_untied: ++** movprfx z0, z1 ++** fadd z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m0p5_f32_x_untied, svfloat32_t, ++ z0 = svsub_n_f32_x (p0, z1, -0.5), ++ z0 = svsub_x (p0, z1, -0.5)) ++ ++/* ++** sub_2_f32_x_tied1: ++** fmov (z[0-9]+\.s), #-2\.0(?:e\+0)? ++** fadd z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_2_f32_x_tied1, svfloat32_t, ++ z0 = svsub_n_f32_x (p0, z0, 2), ++ z0 = svsub_x (p0, z0, 2)) ++ ++/* ++** sub_2_f32_x_untied: ++** fmov z0\.s, #-2\.0(?:e\+0)? ++** fadd z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sub_2_f32_x_untied, svfloat32_t, ++ z0 = svsub_n_f32_x (p0, z1, 2), ++ z0 = svsub_x (p0, z1, 2)) ++ ++/* ++** ptrue_sub_f32_x_tied1: ++** fsub z0\.s, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_f32_x_tied1, svfloat32_t, ++ z0 = svsub_f32_x (svptrue_b32 (), z0, z1), ++ z0 = svsub_x (svptrue_b32 (), z0, z1)) ++ ++/* ++** ptrue_sub_f32_x_tied2: ++** fsub z0\.s, z1\.s, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_f32_x_tied2, svfloat32_t, ++ z0 = svsub_f32_x (svptrue_b32 (), z1, z0), ++ z0 = svsub_x (svptrue_b32 (), z1, z0)) ++ ++/* ++** ptrue_sub_f32_x_untied: ++** fsub z0\.s, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_f32_x_untied, svfloat32_t, ++ z0 = svsub_f32_x (svptrue_b32 (), z1, z2), ++ z0 = svsub_x (svptrue_b32 (), z1, z2)) ++ ++/* ++** ptrue_sub_1_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_1_f32_x_tied1, svfloat32_t, ++ z0 = svsub_n_f32_x (svptrue_b32 (), z0, 1), ++ z0 = svsub_x (svptrue_b32 (), z0, 1)) ++ ++/* ++** ptrue_sub_1_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_1_f32_x_untied, svfloat32_t, ++ z0 = svsub_n_f32_x (svptrue_b32 (), z1, 1), ++ z0 = svsub_x (svptrue_b32 (), z1, 1)) ++ ++/* ++** ptrue_sub_0p5_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_0p5_f32_x_tied1, svfloat32_t, ++ z0 = svsub_n_f32_x (svptrue_b32 (), z0, 0.5), ++ z0 = svsub_x (svptrue_b32 (), z0, 0.5)) ++ ++/* ++** ptrue_sub_0p5_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_0p5_f32_x_untied, svfloat32_t, ++ z0 = svsub_n_f32_x (svptrue_b32 (), z1, 0.5), ++ z0 = svsub_x (svptrue_b32 (), z1, 0.5)) ++ ++/* ++** ptrue_sub_m1_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_m1_f32_x_tied1, svfloat32_t, ++ z0 = svsub_n_f32_x (svptrue_b32 (), z0, -1), ++ z0 = svsub_x (svptrue_b32 (), z0, -1)) ++ ++/* ++** ptrue_sub_m1_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_m1_f32_x_untied, svfloat32_t, ++ z0 = svsub_n_f32_x (svptrue_b32 (), z1, -1), ++ z0 = svsub_x (svptrue_b32 (), z1, -1)) ++ ++/* ++** ptrue_sub_m0p5_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_m0p5_f32_x_tied1, svfloat32_t, ++ z0 = svsub_n_f32_x (svptrue_b32 (), z0, -0.5), ++ z0 = svsub_x (svptrue_b32 (), z0, -0.5)) ++ ++/* ++** ptrue_sub_m0p5_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_m0p5_f32_x_untied, svfloat32_t, ++ z0 = svsub_n_f32_x (svptrue_b32 (), z1, -0.5), ++ z0 = svsub_x (svptrue_b32 (), z1, -0.5)) ++ ++/* ++** ptrue_sub_2_f32_x_tied1: ++** fmov (z[0-9]+\.s), #-2\.0(?:e\+0)? ++** fadd z0\.s, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_2_f32_x_tied1, svfloat32_t, ++ z0 = svsub_n_f32_x (svptrue_b32 (), z0, 2), ++ z0 = svsub_x (svptrue_b32 (), z0, 2)) ++ ++/* ++** ptrue_sub_2_f32_x_untied: ++** fmov (z[0-9]+\.s), #-2\.0(?:e\+0)? ++** fadd z0\.s, (z1\.s, \1|\1, z1\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_2_f32_x_untied, svfloat32_t, ++ z0 = svsub_n_f32_x (svptrue_b32 (), z1, 2), ++ z0 = svsub_x (svptrue_b32 (), z1, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_f32_notrap.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_f32_notrap.c +new file mode 100644 +index 000000000..eb79a253a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_f32_notrap.c +@@ -0,0 +1,572 @@ ++/* { dg-additional-options "-fno-trapping-math" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** sub_f32_m_tied1: ++** fsub z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f32_m_tied1, svfloat32_t, ++ z0 = svsub_f32_m (p0, z0, z1), ++ z0 = svsub_m (p0, z0, z1)) ++ ++/* ++** sub_f32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fsub z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f32_m_tied2, svfloat32_t, ++ z0 = svsub_f32_m (p0, z1, z0), ++ z0 = svsub_m (p0, z1, z0)) ++ ++/* ++** sub_f32_m_untied: ++** movprfx z0, z1 ++** fsub z0\.s, p0/m, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f32_m_untied, svfloat32_t, ++ z0 = svsub_f32_m (p0, z1, z2), ++ z0 = svsub_m (p0, z1, z2)) ++ ++/* ++** sub_s4_f32_m_tied1: ++** mov (z[0-9]+\.s), s4 ++** fsub z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (sub_s4_f32_m_tied1, svfloat32_t, float, ++ z0 = svsub_n_f32_m (p0, z0, d4), ++ z0 = svsub_m (p0, z0, d4)) ++ ++/* ++** sub_s4_f32_m_untied: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0, z1 ++** fsub z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (sub_s4_f32_m_untied, svfloat32_t, float, ++ z0 = svsub_n_f32_m (p0, z1, d4), ++ z0 = svsub_m (p0, z1, d4)) ++ ++/* ++** sub_1_f32_m_tied1: ++** fsub z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_f32_m_tied1, svfloat32_t, ++ z0 = svsub_n_f32_m (p0, z0, 1), ++ z0 = svsub_m (p0, z0, 1)) ++ ++/* ++** sub_1_f32_m_untied: ++** movprfx z0, z1 ++** fsub z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_f32_m_untied, svfloat32_t, ++ z0 = svsub_n_f32_m (p0, z1, 1), ++ z0 = svsub_m (p0, z1, 1)) ++ ++/* ++** sub_0p5_f32_m_tied1: ++** fsub z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_0p5_f32_m_tied1, svfloat32_t, ++ z0 = svsub_n_f32_m (p0, z0, 0.5), ++ z0 = svsub_m (p0, z0, 0.5)) ++ ++/* ++** sub_0p5_f32_m_untied: ++** movprfx z0, z1 ++** fsub z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_0p5_f32_m_untied, svfloat32_t, ++ z0 = svsub_n_f32_m (p0, z1, 0.5), ++ z0 = svsub_m (p0, z1, 0.5)) ++ ++/* ++** sub_m1_f32_m_tied1: ++** fadd z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m1_f32_m_tied1, svfloat32_t, ++ z0 = svsub_n_f32_m (p0, z0, -1), ++ z0 = svsub_m (p0, z0, -1)) ++ ++/* ++** sub_m1_f32_m_untied: ++** movprfx z0, z1 ++** fadd z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m1_f32_m_untied, svfloat32_t, ++ z0 = svsub_n_f32_m (p0, z1, -1), ++ z0 = svsub_m (p0, z1, -1)) ++ ++/* ++** sub_m0p5_f32_m_tied1: ++** fadd z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m0p5_f32_m_tied1, svfloat32_t, ++ z0 = svsub_n_f32_m (p0, z0, -0.5), ++ z0 = svsub_m (p0, z0, -0.5)) ++ ++/* ++** sub_m0p5_f32_m_untied: ++** movprfx z0, z1 ++** fadd z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m0p5_f32_m_untied, svfloat32_t, ++ z0 = svsub_n_f32_m (p0, z1, -0.5), ++ z0 = svsub_m (p0, z1, -0.5)) ++ ++/* ++** sub_m2_f32_m: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** fadd z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m2_f32_m, svfloat32_t, ++ z0 = svsub_n_f32_m (p0, z0, -2), ++ z0 = svsub_m (p0, z0, -2)) ++ ++/* ++** sub_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fsub z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f32_z_tied1, svfloat32_t, ++ z0 = svsub_f32_z (p0, z0, z1), ++ z0 = svsub_z (p0, z0, z1)) ++ ++/* ++** sub_f32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** fsubr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f32_z_tied2, svfloat32_t, ++ z0 = svsub_f32_z (p0, z1, z0), ++ z0 = svsub_z (p0, z1, z0)) ++ ++/* ++** sub_f32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fsub z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** fsubr z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f32_z_untied, svfloat32_t, ++ z0 = svsub_f32_z (p0, z1, z2), ++ z0 = svsub_z (p0, z1, z2)) ++ ++/* ++** sub_s4_f32_z_tied1: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0\.s, p0/z, z0\.s ++** fsub z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (sub_s4_f32_z_tied1, svfloat32_t, float, ++ z0 = svsub_n_f32_z (p0, z0, d4), ++ z0 = svsub_z (p0, z0, d4)) ++ ++/* ++** sub_s4_f32_z_untied: ++** mov (z[0-9]+\.s), s4 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fsub z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** fsubr z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (sub_s4_f32_z_untied, svfloat32_t, float, ++ z0 = svsub_n_f32_z (p0, z1, d4), ++ z0 = svsub_z (p0, z1, d4)) ++ ++/* ++** sub_1_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fsub z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_f32_z_tied1, svfloat32_t, ++ z0 = svsub_n_f32_z (p0, z0, 1), ++ z0 = svsub_z (p0, z0, 1)) ++ ++/* ++** sub_1_f32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** fsub z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_f32_z_untied, svfloat32_t, ++ z0 = svsub_n_f32_z (p0, z1, 1), ++ z0 = svsub_z (p0, z1, 1)) ++ ++/* ++** sub_0p5_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fsub z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_0p5_f32_z_tied1, svfloat32_t, ++ z0 = svsub_n_f32_z (p0, z0, 0.5), ++ z0 = svsub_z (p0, z0, 0.5)) ++ ++/* ++** sub_0p5_f32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** fsub z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_0p5_f32_z_untied, svfloat32_t, ++ z0 = svsub_n_f32_z (p0, z1, 0.5), ++ z0 = svsub_z (p0, z1, 0.5)) ++ ++/* ++** sub_m1_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fadd z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m1_f32_z_tied1, svfloat32_t, ++ z0 = svsub_n_f32_z (p0, z0, -1), ++ z0 = svsub_z (p0, z0, -1)) ++ ++/* ++** sub_m1_f32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** fadd z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m1_f32_z_untied, svfloat32_t, ++ z0 = svsub_n_f32_z (p0, z1, -1), ++ z0 = svsub_z (p0, z1, -1)) ++ ++/* ++** sub_m0p5_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fadd z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m0p5_f32_z_tied1, svfloat32_t, ++ z0 = svsub_n_f32_z (p0, z0, -0.5), ++ z0 = svsub_z (p0, z0, -0.5)) ++ ++/* ++** sub_m0p5_f32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** fadd z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m0p5_f32_z_untied, svfloat32_t, ++ z0 = svsub_n_f32_z (p0, z1, -0.5), ++ z0 = svsub_z (p0, z1, -0.5)) ++ ++/* ++** sub_m2_f32_z: ++** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? ++** movprfx z0\.s, p0/z, z0\.s ++** fadd z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m2_f32_z, svfloat32_t, ++ z0 = svsub_n_f32_z (p0, z0, -2), ++ z0 = svsub_z (p0, z0, -2)) ++ ++/* ++** sub_f32_x_tied1: ++** fsub z0\.s, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f32_x_tied1, svfloat32_t, ++ z0 = svsub_f32_x (p0, z0, z1), ++ z0 = svsub_x (p0, z0, z1)) ++ ++/* ++** sub_f32_x_tied2: ++** fsub z0\.s, z1\.s, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f32_x_tied2, svfloat32_t, ++ z0 = svsub_f32_x (p0, z1, z0), ++ z0 = svsub_x (p0, z1, z0)) ++ ++/* ++** sub_f32_x_untied: ++** fsub z0\.s, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f32_x_untied, svfloat32_t, ++ z0 = svsub_f32_x (p0, z1, z2), ++ z0 = svsub_x (p0, z1, z2)) ++ ++/* ++** sub_s4_f32_x_tied1: ++** mov (z[0-9]+\.s), s4 ++** fsub z0\.s, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (sub_s4_f32_x_tied1, svfloat32_t, float, ++ z0 = svsub_n_f32_x (p0, z0, d4), ++ z0 = svsub_x (p0, z0, d4)) ++ ++/* ++** sub_s4_f32_x_untied: ++** mov (z[0-9]+\.s), s4 ++** fsub z0\.s, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (sub_s4_f32_x_untied, svfloat32_t, float, ++ z0 = svsub_n_f32_x (p0, z1, d4), ++ z0 = svsub_x (p0, z1, d4)) ++ ++/* ++** sub_1_f32_x_tied1: ++** fsub z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_f32_x_tied1, svfloat32_t, ++ z0 = svsub_n_f32_x (p0, z0, 1), ++ z0 = svsub_x (p0, z0, 1)) ++ ++/* ++** sub_1_f32_x_untied: ++** movprfx z0, z1 ++** fsub z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_f32_x_untied, svfloat32_t, ++ z0 = svsub_n_f32_x (p0, z1, 1), ++ z0 = svsub_x (p0, z1, 1)) ++ ++/* ++** sub_0p5_f32_x_tied1: ++** fsub z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_0p5_f32_x_tied1, svfloat32_t, ++ z0 = svsub_n_f32_x (p0, z0, 0.5), ++ z0 = svsub_x (p0, z0, 0.5)) ++ ++/* ++** sub_0p5_f32_x_untied: ++** movprfx z0, z1 ++** fsub z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_0p5_f32_x_untied, svfloat32_t, ++ z0 = svsub_n_f32_x (p0, z1, 0.5), ++ z0 = svsub_x (p0, z1, 0.5)) ++ ++/* ++** sub_m1_f32_x_tied1: ++** fadd z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m1_f32_x_tied1, svfloat32_t, ++ z0 = svsub_n_f32_x (p0, z0, -1), ++ z0 = svsub_x (p0, z0, -1)) ++ ++/* ++** sub_m1_f32_x_untied: ++** movprfx z0, z1 ++** fadd z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m1_f32_x_untied, svfloat32_t, ++ z0 = svsub_n_f32_x (p0, z1, -1), ++ z0 = svsub_x (p0, z1, -1)) ++ ++/* ++** sub_m0p5_f32_x_tied1: ++** fadd z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m0p5_f32_x_tied1, svfloat32_t, ++ z0 = svsub_n_f32_x (p0, z0, -0.5), ++ z0 = svsub_x (p0, z0, -0.5)) ++ ++/* ++** sub_m0p5_f32_x_untied: ++** movprfx z0, z1 ++** fadd z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m0p5_f32_x_untied, svfloat32_t, ++ z0 = svsub_n_f32_x (p0, z1, -0.5), ++ z0 = svsub_x (p0, z1, -0.5)) ++ ++/* ++** sub_2_f32_x_tied1: ++** fmov (z[0-9]+\.s), #-2\.0(?:e\+0)? ++** fadd z0\.s, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_2_f32_x_tied1, svfloat32_t, ++ z0 = svsub_n_f32_x (p0, z0, 2), ++ z0 = svsub_x (p0, z0, 2)) ++ ++/* ++** sub_2_f32_x_untied: ++** fmov (z[0-9]+\.s), #-2\.0(?:e\+0)? ++** fadd z0\.s, (z1\.s, \1|\1, z1\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_2_f32_x_untied, svfloat32_t, ++ z0 = svsub_n_f32_x (p0, z1, 2), ++ z0 = svsub_x (p0, z1, 2)) ++ ++/* ++** ptrue_sub_f32_x_tied1: ++** fsub z0\.s, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_f32_x_tied1, svfloat32_t, ++ z0 = svsub_f32_x (svptrue_b32 (), z0, z1), ++ z0 = svsub_x (svptrue_b32 (), z0, z1)) ++ ++/* ++** ptrue_sub_f32_x_tied2: ++** fsub z0\.s, z1\.s, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_f32_x_tied2, svfloat32_t, ++ z0 = svsub_f32_x (svptrue_b32 (), z1, z0), ++ z0 = svsub_x (svptrue_b32 (), z1, z0)) ++ ++/* ++** ptrue_sub_f32_x_untied: ++** fsub z0\.s, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_f32_x_untied, svfloat32_t, ++ z0 = svsub_f32_x (svptrue_b32 (), z1, z2), ++ z0 = svsub_x (svptrue_b32 (), z1, z2)) ++ ++/* ++** ptrue_sub_1_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_1_f32_x_tied1, svfloat32_t, ++ z0 = svsub_n_f32_x (svptrue_b32 (), z0, 1), ++ z0 = svsub_x (svptrue_b32 (), z0, 1)) ++ ++/* ++** ptrue_sub_1_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_1_f32_x_untied, svfloat32_t, ++ z0 = svsub_n_f32_x (svptrue_b32 (), z1, 1), ++ z0 = svsub_x (svptrue_b32 (), z1, 1)) ++ ++/* ++** ptrue_sub_0p5_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_0p5_f32_x_tied1, svfloat32_t, ++ z0 = svsub_n_f32_x (svptrue_b32 (), z0, 0.5), ++ z0 = svsub_x (svptrue_b32 (), z0, 0.5)) ++ ++/* ++** ptrue_sub_0p5_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_0p5_f32_x_untied, svfloat32_t, ++ z0 = svsub_n_f32_x (svptrue_b32 (), z1, 0.5), ++ z0 = svsub_x (svptrue_b32 (), z1, 0.5)) ++ ++/* ++** ptrue_sub_m1_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_m1_f32_x_tied1, svfloat32_t, ++ z0 = svsub_n_f32_x (svptrue_b32 (), z0, -1), ++ z0 = svsub_x (svptrue_b32 (), z0, -1)) ++ ++/* ++** ptrue_sub_m1_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_m1_f32_x_untied, svfloat32_t, ++ z0 = svsub_n_f32_x (svptrue_b32 (), z1, -1), ++ z0 = svsub_x (svptrue_b32 (), z1, -1)) ++ ++/* ++** ptrue_sub_m0p5_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_m0p5_f32_x_tied1, svfloat32_t, ++ z0 = svsub_n_f32_x (svptrue_b32 (), z0, -0.5), ++ z0 = svsub_x (svptrue_b32 (), z0, -0.5)) ++ ++/* ++** ptrue_sub_m0p5_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_m0p5_f32_x_untied, svfloat32_t, ++ z0 = svsub_n_f32_x (svptrue_b32 (), z1, -0.5), ++ z0 = svsub_x (svptrue_b32 (), z1, -0.5)) ++ ++/* ++** ptrue_sub_2_f32_x_tied1: ++** fmov (z[0-9]+\.s), #-2\.0(?:e\+0)? ++** fadd z0\.s, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_2_f32_x_tied1, svfloat32_t, ++ z0 = svsub_n_f32_x (svptrue_b32 (), z0, 2), ++ z0 = svsub_x (svptrue_b32 (), z0, 2)) ++ ++/* ++** ptrue_sub_2_f32_x_untied: ++** fmov (z[0-9]+\.s), #-2\.0(?:e\+0)? ++** fadd z0\.s, (z1\.s, \1|\1, z1\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_2_f32_x_untied, svfloat32_t, ++ z0 = svsub_n_f32_x (svptrue_b32 (), z1, 2), ++ z0 = svsub_x (svptrue_b32 (), z1, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_f64.c +new file mode 100644 +index 000000000..2179382c3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_f64.c +@@ -0,0 +1,577 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** sub_f64_m_tied1: ++** fsub z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f64_m_tied1, svfloat64_t, ++ z0 = svsub_f64_m (p0, z0, z1), ++ z0 = svsub_m (p0, z0, z1)) ++ ++/* ++** sub_f64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fsub z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f64_m_tied2, svfloat64_t, ++ z0 = svsub_f64_m (p0, z1, z0), ++ z0 = svsub_m (p0, z1, z0)) ++ ++/* ++** sub_f64_m_untied: ++** movprfx z0, z1 ++** fsub z0\.d, p0/m, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f64_m_untied, svfloat64_t, ++ z0 = svsub_f64_m (p0, z1, z2), ++ z0 = svsub_m (p0, z1, z2)) ++ ++/* ++** sub_d4_f64_m_tied1: ++** mov (z[0-9]+\.d), d4 ++** fsub z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (sub_d4_f64_m_tied1, svfloat64_t, double, ++ z0 = svsub_n_f64_m (p0, z0, d4), ++ z0 = svsub_m (p0, z0, d4)) ++ ++/* ++** sub_d4_f64_m_untied: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0, z1 ++** fsub z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (sub_d4_f64_m_untied, svfloat64_t, double, ++ z0 = svsub_n_f64_m (p0, z1, d4), ++ z0 = svsub_m (p0, z1, d4)) ++ ++/* ++** sub_1_f64_m_tied1: ++** fsub z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_f64_m_tied1, svfloat64_t, ++ z0 = svsub_n_f64_m (p0, z0, 1), ++ z0 = svsub_m (p0, z0, 1)) ++ ++/* ++** sub_1_f64_m_untied: ++** movprfx z0, z1 ++** fsub z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_f64_m_untied, svfloat64_t, ++ z0 = svsub_n_f64_m (p0, z1, 1), ++ z0 = svsub_m (p0, z1, 1)) ++ ++/* ++** sub_0p5_f64_m_tied1: ++** fsub z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_0p5_f64_m_tied1, svfloat64_t, ++ z0 = svsub_n_f64_m (p0, z0, 0.5), ++ z0 = svsub_m (p0, z0, 0.5)) ++ ++/* ++** sub_0p5_f64_m_untied: ++** movprfx z0, z1 ++** fsub z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_0p5_f64_m_untied, svfloat64_t, ++ z0 = svsub_n_f64_m (p0, z1, 0.5), ++ z0 = svsub_m (p0, z1, 0.5)) ++ ++/* ++** sub_m1_f64_m_tied1: ++** fadd z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m1_f64_m_tied1, svfloat64_t, ++ z0 = svsub_n_f64_m (p0, z0, -1), ++ z0 = svsub_m (p0, z0, -1)) ++ ++/* ++** sub_m1_f64_m_untied: ++** movprfx z0, z1 ++** fadd z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m1_f64_m_untied, svfloat64_t, ++ z0 = svsub_n_f64_m (p0, z1, -1), ++ z0 = svsub_m (p0, z1, -1)) ++ ++/* ++** sub_m0p5_f64_m_tied1: ++** fadd z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m0p5_f64_m_tied1, svfloat64_t, ++ z0 = svsub_n_f64_m (p0, z0, -0.5), ++ z0 = svsub_m (p0, z0, -0.5)) ++ ++/* ++** sub_m0p5_f64_m_untied: ++** movprfx z0, z1 ++** fadd z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m0p5_f64_m_untied, svfloat64_t, ++ z0 = svsub_n_f64_m (p0, z1, -0.5), ++ z0 = svsub_m (p0, z1, -0.5)) ++ ++/* ++** sub_m2_f64_m: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** fadd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m2_f64_m, svfloat64_t, ++ z0 = svsub_n_f64_m (p0, z0, -2), ++ z0 = svsub_m (p0, z0, -2)) ++ ++/* ++** sub_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fsub z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f64_z_tied1, svfloat64_t, ++ z0 = svsub_f64_z (p0, z0, z1), ++ z0 = svsub_z (p0, z0, z1)) ++ ++/* ++** sub_f64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** fsubr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f64_z_tied2, svfloat64_t, ++ z0 = svsub_f64_z (p0, z1, z0), ++ z0 = svsub_z (p0, z1, z0)) ++ ++/* ++** sub_f64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fsub z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** fsubr z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f64_z_untied, svfloat64_t, ++ z0 = svsub_f64_z (p0, z1, z2), ++ z0 = svsub_z (p0, z1, z2)) ++ ++/* ++** sub_d4_f64_z_tied1: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0\.d, p0/z, z0\.d ++** fsub z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (sub_d4_f64_z_tied1, svfloat64_t, double, ++ z0 = svsub_n_f64_z (p0, z0, d4), ++ z0 = svsub_z (p0, z0, d4)) ++ ++/* ++** sub_d4_f64_z_untied: ++** mov (z[0-9]+\.d), d4 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fsub z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** fsubr z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (sub_d4_f64_z_untied, svfloat64_t, double, ++ z0 = svsub_n_f64_z (p0, z1, d4), ++ z0 = svsub_z (p0, z1, d4)) ++ ++/* ++** sub_1_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fsub z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_f64_z_tied1, svfloat64_t, ++ z0 = svsub_n_f64_z (p0, z0, 1), ++ z0 = svsub_z (p0, z0, 1)) ++ ++/* ++** sub_1_f64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** fsub z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_f64_z_untied, svfloat64_t, ++ z0 = svsub_n_f64_z (p0, z1, 1), ++ z0 = svsub_z (p0, z1, 1)) ++ ++/* ++** sub_0p5_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fsub z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_0p5_f64_z_tied1, svfloat64_t, ++ z0 = svsub_n_f64_z (p0, z0, 0.5), ++ z0 = svsub_z (p0, z0, 0.5)) ++ ++/* ++** sub_0p5_f64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** fsub z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_0p5_f64_z_untied, svfloat64_t, ++ z0 = svsub_n_f64_z (p0, z1, 0.5), ++ z0 = svsub_z (p0, z1, 0.5)) ++ ++/* ++** sub_m1_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fadd z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m1_f64_z_tied1, svfloat64_t, ++ z0 = svsub_n_f64_z (p0, z0, -1), ++ z0 = svsub_z (p0, z0, -1)) ++ ++/* ++** sub_m1_f64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** fadd z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m1_f64_z_untied, svfloat64_t, ++ z0 = svsub_n_f64_z (p0, z1, -1), ++ z0 = svsub_z (p0, z1, -1)) ++ ++/* ++** sub_m0p5_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fadd z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m0p5_f64_z_tied1, svfloat64_t, ++ z0 = svsub_n_f64_z (p0, z0, -0.5), ++ z0 = svsub_z (p0, z0, -0.5)) ++ ++/* ++** sub_m0p5_f64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** fadd z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m0p5_f64_z_untied, svfloat64_t, ++ z0 = svsub_n_f64_z (p0, z1, -0.5), ++ z0 = svsub_z (p0, z1, -0.5)) ++ ++/* ++** sub_m2_f64_z: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** movprfx z0\.d, p0/z, z0\.d ++** fadd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m2_f64_z, svfloat64_t, ++ z0 = svsub_n_f64_z (p0, z0, -2), ++ z0 = svsub_z (p0, z0, -2)) ++ ++/* ++** sub_f64_x_tied1: ++** fsub z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f64_x_tied1, svfloat64_t, ++ z0 = svsub_f64_x (p0, z0, z1), ++ z0 = svsub_x (p0, z0, z1)) ++ ++/* ++** sub_f64_x_tied2: ++** fsubr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f64_x_tied2, svfloat64_t, ++ z0 = svsub_f64_x (p0, z1, z0), ++ z0 = svsub_x (p0, z1, z0)) ++ ++/* ++** sub_f64_x_untied: ++** ( ++** movprfx z0, z1 ++** fsub z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0, z2 ++** fsubr z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f64_x_untied, svfloat64_t, ++ z0 = svsub_f64_x (p0, z1, z2), ++ z0 = svsub_x (p0, z1, z2)) ++ ++/* ++** sub_d4_f64_x_tied1: ++** mov (z[0-9]+\.d), d4 ++** fsub z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (sub_d4_f64_x_tied1, svfloat64_t, double, ++ z0 = svsub_n_f64_x (p0, z0, d4), ++ z0 = svsub_x (p0, z0, d4)) ++ ++/* ++** sub_d4_f64_x_untied: { xfail *-*-* } ++** mov z0\.d, d4 ++** fsubr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZD (sub_d4_f64_x_untied, svfloat64_t, double, ++ z0 = svsub_n_f64_x (p0, z1, d4), ++ z0 = svsub_x (p0, z1, d4)) ++ ++/* ++** sub_1_f64_x_tied1: ++** fsub z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_f64_x_tied1, svfloat64_t, ++ z0 = svsub_n_f64_x (p0, z0, 1), ++ z0 = svsub_x (p0, z0, 1)) ++ ++/* ++** sub_1_f64_x_untied: ++** movprfx z0, z1 ++** fsub z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_f64_x_untied, svfloat64_t, ++ z0 = svsub_n_f64_x (p0, z1, 1), ++ z0 = svsub_x (p0, z1, 1)) ++ ++/* ++** sub_0p5_f64_x_tied1: ++** fsub z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_0p5_f64_x_tied1, svfloat64_t, ++ z0 = svsub_n_f64_x (p0, z0, 0.5), ++ z0 = svsub_x (p0, z0, 0.5)) ++ ++/* ++** sub_0p5_f64_x_untied: ++** movprfx z0, z1 ++** fsub z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_0p5_f64_x_untied, svfloat64_t, ++ z0 = svsub_n_f64_x (p0, z1, 0.5), ++ z0 = svsub_x (p0, z1, 0.5)) ++ ++/* ++** sub_m1_f64_x_tied1: ++** fadd z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m1_f64_x_tied1, svfloat64_t, ++ z0 = svsub_n_f64_x (p0, z0, -1), ++ z0 = svsub_x (p0, z0, -1)) ++ ++/* ++** sub_m1_f64_x_untied: ++** movprfx z0, z1 ++** fadd z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m1_f64_x_untied, svfloat64_t, ++ z0 = svsub_n_f64_x (p0, z1, -1), ++ z0 = svsub_x (p0, z1, -1)) ++ ++/* ++** sub_m0p5_f64_x_tied1: ++** fadd z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m0p5_f64_x_tied1, svfloat64_t, ++ z0 = svsub_n_f64_x (p0, z0, -0.5), ++ z0 = svsub_x (p0, z0, -0.5)) ++ ++/* ++** sub_m0p5_f64_x_untied: ++** movprfx z0, z1 ++** fadd z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m0p5_f64_x_untied, svfloat64_t, ++ z0 = svsub_n_f64_x (p0, z1, -0.5), ++ z0 = svsub_x (p0, z1, -0.5)) ++ ++/* ++** sub_2_f64_x_tied1: ++** fmov (z[0-9]+\.d), #-2\.0(?:e\+0)? ++** fadd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_2_f64_x_tied1, svfloat64_t, ++ z0 = svsub_n_f64_x (p0, z0, 2), ++ z0 = svsub_x (p0, z0, 2)) ++ ++/* ++** sub_2_f64_x_untied: ++** fmov z0\.d, #-2\.0(?:e\+0)? ++** fadd z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (sub_2_f64_x_untied, svfloat64_t, ++ z0 = svsub_n_f64_x (p0, z1, 2), ++ z0 = svsub_x (p0, z1, 2)) ++ ++/* ++** ptrue_sub_f64_x_tied1: ++** fsub z0\.d, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_f64_x_tied1, svfloat64_t, ++ z0 = svsub_f64_x (svptrue_b64 (), z0, z1), ++ z0 = svsub_x (svptrue_b64 (), z0, z1)) ++ ++/* ++** ptrue_sub_f64_x_tied2: ++** fsub z0\.d, z1\.d, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_f64_x_tied2, svfloat64_t, ++ z0 = svsub_f64_x (svptrue_b64 (), z1, z0), ++ z0 = svsub_x (svptrue_b64 (), z1, z0)) ++ ++/* ++** ptrue_sub_f64_x_untied: ++** fsub z0\.d, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_f64_x_untied, svfloat64_t, ++ z0 = svsub_f64_x (svptrue_b64 (), z1, z2), ++ z0 = svsub_x (svptrue_b64 (), z1, z2)) ++ ++/* ++** ptrue_sub_1_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_1_f64_x_tied1, svfloat64_t, ++ z0 = svsub_n_f64_x (svptrue_b64 (), z0, 1), ++ z0 = svsub_x (svptrue_b64 (), z0, 1)) ++ ++/* ++** ptrue_sub_1_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_1_f64_x_untied, svfloat64_t, ++ z0 = svsub_n_f64_x (svptrue_b64 (), z1, 1), ++ z0 = svsub_x (svptrue_b64 (), z1, 1)) ++ ++/* ++** ptrue_sub_0p5_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_0p5_f64_x_tied1, svfloat64_t, ++ z0 = svsub_n_f64_x (svptrue_b64 (), z0, 0.5), ++ z0 = svsub_x (svptrue_b64 (), z0, 0.5)) ++ ++/* ++** ptrue_sub_0p5_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_0p5_f64_x_untied, svfloat64_t, ++ z0 = svsub_n_f64_x (svptrue_b64 (), z1, 0.5), ++ z0 = svsub_x (svptrue_b64 (), z1, 0.5)) ++ ++/* ++** ptrue_sub_m1_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_m1_f64_x_tied1, svfloat64_t, ++ z0 = svsub_n_f64_x (svptrue_b64 (), z0, -1), ++ z0 = svsub_x (svptrue_b64 (), z0, -1)) ++ ++/* ++** ptrue_sub_m1_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_m1_f64_x_untied, svfloat64_t, ++ z0 = svsub_n_f64_x (svptrue_b64 (), z1, -1), ++ z0 = svsub_x (svptrue_b64 (), z1, -1)) ++ ++/* ++** ptrue_sub_m0p5_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_m0p5_f64_x_tied1, svfloat64_t, ++ z0 = svsub_n_f64_x (svptrue_b64 (), z0, -0.5), ++ z0 = svsub_x (svptrue_b64 (), z0, -0.5)) ++ ++/* ++** ptrue_sub_m0p5_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_m0p5_f64_x_untied, svfloat64_t, ++ z0 = svsub_n_f64_x (svptrue_b64 (), z1, -0.5), ++ z0 = svsub_x (svptrue_b64 (), z1, -0.5)) ++ ++/* ++** ptrue_sub_2_f64_x_tied1: ++** fmov (z[0-9]+\.d), #-2\.0(?:e\+0)? ++** fadd z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_2_f64_x_tied1, svfloat64_t, ++ z0 = svsub_n_f64_x (svptrue_b64 (), z0, 2), ++ z0 = svsub_x (svptrue_b64 (), z0, 2)) ++ ++/* ++** ptrue_sub_2_f64_x_untied: ++** fmov (z[0-9]+\.d), #-2\.0(?:e\+0)? ++** fadd z0\.d, (z1\.d, \1|\1, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_2_f64_x_untied, svfloat64_t, ++ z0 = svsub_n_f64_x (svptrue_b64 (), z1, 2), ++ z0 = svsub_x (svptrue_b64 (), z1, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_f64_notrap.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_f64_notrap.c +new file mode 100644 +index 000000000..bd89f44b4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_f64_notrap.c +@@ -0,0 +1,572 @@ ++/* { dg-additional-options "-fno-trapping-math" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** sub_f64_m_tied1: ++** fsub z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f64_m_tied1, svfloat64_t, ++ z0 = svsub_f64_m (p0, z0, z1), ++ z0 = svsub_m (p0, z0, z1)) ++ ++/* ++** sub_f64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fsub z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f64_m_tied2, svfloat64_t, ++ z0 = svsub_f64_m (p0, z1, z0), ++ z0 = svsub_m (p0, z1, z0)) ++ ++/* ++** sub_f64_m_untied: ++** movprfx z0, z1 ++** fsub z0\.d, p0/m, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f64_m_untied, svfloat64_t, ++ z0 = svsub_f64_m (p0, z1, z2), ++ z0 = svsub_m (p0, z1, z2)) ++ ++/* ++** sub_d4_f64_m_tied1: ++** mov (z[0-9]+\.d), d4 ++** fsub z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (sub_d4_f64_m_tied1, svfloat64_t, double, ++ z0 = svsub_n_f64_m (p0, z0, d4), ++ z0 = svsub_m (p0, z0, d4)) ++ ++/* ++** sub_d4_f64_m_untied: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0, z1 ++** fsub z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (sub_d4_f64_m_untied, svfloat64_t, double, ++ z0 = svsub_n_f64_m (p0, z1, d4), ++ z0 = svsub_m (p0, z1, d4)) ++ ++/* ++** sub_1_f64_m_tied1: ++** fsub z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_f64_m_tied1, svfloat64_t, ++ z0 = svsub_n_f64_m (p0, z0, 1), ++ z0 = svsub_m (p0, z0, 1)) ++ ++/* ++** sub_1_f64_m_untied: ++** movprfx z0, z1 ++** fsub z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_f64_m_untied, svfloat64_t, ++ z0 = svsub_n_f64_m (p0, z1, 1), ++ z0 = svsub_m (p0, z1, 1)) ++ ++/* ++** sub_0p5_f64_m_tied1: ++** fsub z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_0p5_f64_m_tied1, svfloat64_t, ++ z0 = svsub_n_f64_m (p0, z0, 0.5), ++ z0 = svsub_m (p0, z0, 0.5)) ++ ++/* ++** sub_0p5_f64_m_untied: ++** movprfx z0, z1 ++** fsub z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_0p5_f64_m_untied, svfloat64_t, ++ z0 = svsub_n_f64_m (p0, z1, 0.5), ++ z0 = svsub_m (p0, z1, 0.5)) ++ ++/* ++** sub_m1_f64_m_tied1: ++** fadd z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m1_f64_m_tied1, svfloat64_t, ++ z0 = svsub_n_f64_m (p0, z0, -1), ++ z0 = svsub_m (p0, z0, -1)) ++ ++/* ++** sub_m1_f64_m_untied: ++** movprfx z0, z1 ++** fadd z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m1_f64_m_untied, svfloat64_t, ++ z0 = svsub_n_f64_m (p0, z1, -1), ++ z0 = svsub_m (p0, z1, -1)) ++ ++/* ++** sub_m0p5_f64_m_tied1: ++** fadd z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m0p5_f64_m_tied1, svfloat64_t, ++ z0 = svsub_n_f64_m (p0, z0, -0.5), ++ z0 = svsub_m (p0, z0, -0.5)) ++ ++/* ++** sub_m0p5_f64_m_untied: ++** movprfx z0, z1 ++** fadd z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m0p5_f64_m_untied, svfloat64_t, ++ z0 = svsub_n_f64_m (p0, z1, -0.5), ++ z0 = svsub_m (p0, z1, -0.5)) ++ ++/* ++** sub_m2_f64_m: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** fadd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m2_f64_m, svfloat64_t, ++ z0 = svsub_n_f64_m (p0, z0, -2), ++ z0 = svsub_m (p0, z0, -2)) ++ ++/* ++** sub_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fsub z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f64_z_tied1, svfloat64_t, ++ z0 = svsub_f64_z (p0, z0, z1), ++ z0 = svsub_z (p0, z0, z1)) ++ ++/* ++** sub_f64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** fsubr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f64_z_tied2, svfloat64_t, ++ z0 = svsub_f64_z (p0, z1, z0), ++ z0 = svsub_z (p0, z1, z0)) ++ ++/* ++** sub_f64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fsub z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** fsubr z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f64_z_untied, svfloat64_t, ++ z0 = svsub_f64_z (p0, z1, z2), ++ z0 = svsub_z (p0, z1, z2)) ++ ++/* ++** sub_d4_f64_z_tied1: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0\.d, p0/z, z0\.d ++** fsub z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (sub_d4_f64_z_tied1, svfloat64_t, double, ++ z0 = svsub_n_f64_z (p0, z0, d4), ++ z0 = svsub_z (p0, z0, d4)) ++ ++/* ++** sub_d4_f64_z_untied: ++** mov (z[0-9]+\.d), d4 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fsub z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** fsubr z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (sub_d4_f64_z_untied, svfloat64_t, double, ++ z0 = svsub_n_f64_z (p0, z1, d4), ++ z0 = svsub_z (p0, z1, d4)) ++ ++/* ++** sub_1_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fsub z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_f64_z_tied1, svfloat64_t, ++ z0 = svsub_n_f64_z (p0, z0, 1), ++ z0 = svsub_z (p0, z0, 1)) ++ ++/* ++** sub_1_f64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** fsub z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_f64_z_untied, svfloat64_t, ++ z0 = svsub_n_f64_z (p0, z1, 1), ++ z0 = svsub_z (p0, z1, 1)) ++ ++/* ++** sub_0p5_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fsub z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_0p5_f64_z_tied1, svfloat64_t, ++ z0 = svsub_n_f64_z (p0, z0, 0.5), ++ z0 = svsub_z (p0, z0, 0.5)) ++ ++/* ++** sub_0p5_f64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** fsub z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_0p5_f64_z_untied, svfloat64_t, ++ z0 = svsub_n_f64_z (p0, z1, 0.5), ++ z0 = svsub_z (p0, z1, 0.5)) ++ ++/* ++** sub_m1_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fadd z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m1_f64_z_tied1, svfloat64_t, ++ z0 = svsub_n_f64_z (p0, z0, -1), ++ z0 = svsub_z (p0, z0, -1)) ++ ++/* ++** sub_m1_f64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** fadd z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m1_f64_z_untied, svfloat64_t, ++ z0 = svsub_n_f64_z (p0, z1, -1), ++ z0 = svsub_z (p0, z1, -1)) ++ ++/* ++** sub_m0p5_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fadd z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m0p5_f64_z_tied1, svfloat64_t, ++ z0 = svsub_n_f64_z (p0, z0, -0.5), ++ z0 = svsub_z (p0, z0, -0.5)) ++ ++/* ++** sub_m0p5_f64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** fadd z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m0p5_f64_z_untied, svfloat64_t, ++ z0 = svsub_n_f64_z (p0, z1, -0.5), ++ z0 = svsub_z (p0, z1, -0.5)) ++ ++/* ++** sub_m2_f64_z: ++** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? ++** movprfx z0\.d, p0/z, z0\.d ++** fadd z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m2_f64_z, svfloat64_t, ++ z0 = svsub_n_f64_z (p0, z0, -2), ++ z0 = svsub_z (p0, z0, -2)) ++ ++/* ++** sub_f64_x_tied1: ++** fsub z0\.d, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f64_x_tied1, svfloat64_t, ++ z0 = svsub_f64_x (p0, z0, z1), ++ z0 = svsub_x (p0, z0, z1)) ++ ++/* ++** sub_f64_x_tied2: ++** fsub z0\.d, z1\.d, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f64_x_tied2, svfloat64_t, ++ z0 = svsub_f64_x (p0, z1, z0), ++ z0 = svsub_x (p0, z1, z0)) ++ ++/* ++** sub_f64_x_untied: ++** fsub z0\.d, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (sub_f64_x_untied, svfloat64_t, ++ z0 = svsub_f64_x (p0, z1, z2), ++ z0 = svsub_x (p0, z1, z2)) ++ ++/* ++** sub_d4_f64_x_tied1: ++** mov (z[0-9]+\.d), d4 ++** fsub z0\.d, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (sub_d4_f64_x_tied1, svfloat64_t, double, ++ z0 = svsub_n_f64_x (p0, z0, d4), ++ z0 = svsub_x (p0, z0, d4)) ++ ++/* ++** sub_d4_f64_x_untied: ++** mov (z[0-9]+\.d), d4 ++** fsub z0\.d, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (sub_d4_f64_x_untied, svfloat64_t, double, ++ z0 = svsub_n_f64_x (p0, z1, d4), ++ z0 = svsub_x (p0, z1, d4)) ++ ++/* ++** sub_1_f64_x_tied1: ++** fsub z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_f64_x_tied1, svfloat64_t, ++ z0 = svsub_n_f64_x (p0, z0, 1), ++ z0 = svsub_x (p0, z0, 1)) ++ ++/* ++** sub_1_f64_x_untied: ++** movprfx z0, z1 ++** fsub z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_f64_x_untied, svfloat64_t, ++ z0 = svsub_n_f64_x (p0, z1, 1), ++ z0 = svsub_x (p0, z1, 1)) ++ ++/* ++** sub_0p5_f64_x_tied1: ++** fsub z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_0p5_f64_x_tied1, svfloat64_t, ++ z0 = svsub_n_f64_x (p0, z0, 0.5), ++ z0 = svsub_x (p0, z0, 0.5)) ++ ++/* ++** sub_0p5_f64_x_untied: ++** movprfx z0, z1 ++** fsub z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_0p5_f64_x_untied, svfloat64_t, ++ z0 = svsub_n_f64_x (p0, z1, 0.5), ++ z0 = svsub_x (p0, z1, 0.5)) ++ ++/* ++** sub_m1_f64_x_tied1: ++** fadd z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m1_f64_x_tied1, svfloat64_t, ++ z0 = svsub_n_f64_x (p0, z0, -1), ++ z0 = svsub_x (p0, z0, -1)) ++ ++/* ++** sub_m1_f64_x_untied: ++** movprfx z0, z1 ++** fadd z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m1_f64_x_untied, svfloat64_t, ++ z0 = svsub_n_f64_x (p0, z1, -1), ++ z0 = svsub_x (p0, z1, -1)) ++ ++/* ++** sub_m0p5_f64_x_tied1: ++** fadd z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m0p5_f64_x_tied1, svfloat64_t, ++ z0 = svsub_n_f64_x (p0, z0, -0.5), ++ z0 = svsub_x (p0, z0, -0.5)) ++ ++/* ++** sub_m0p5_f64_x_untied: ++** movprfx z0, z1 ++** fadd z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m0p5_f64_x_untied, svfloat64_t, ++ z0 = svsub_n_f64_x (p0, z1, -0.5), ++ z0 = svsub_x (p0, z1, -0.5)) ++ ++/* ++** sub_2_f64_x_tied1: ++** fmov (z[0-9]+\.d), #-2\.0(?:e\+0)? ++** fadd z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_2_f64_x_tied1, svfloat64_t, ++ z0 = svsub_n_f64_x (p0, z0, 2), ++ z0 = svsub_x (p0, z0, 2)) ++ ++/* ++** sub_2_f64_x_untied: ++** fmov (z[0-9]+\.d), #-2\.0(?:e\+0)? ++** fadd z0\.d, (z1\.d, \1|\1, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_2_f64_x_untied, svfloat64_t, ++ z0 = svsub_n_f64_x (p0, z1, 2), ++ z0 = svsub_x (p0, z1, 2)) ++ ++/* ++** ptrue_sub_f64_x_tied1: ++** fsub z0\.d, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_f64_x_tied1, svfloat64_t, ++ z0 = svsub_f64_x (svptrue_b64 (), z0, z1), ++ z0 = svsub_x (svptrue_b64 (), z0, z1)) ++ ++/* ++** ptrue_sub_f64_x_tied2: ++** fsub z0\.d, z1\.d, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_f64_x_tied2, svfloat64_t, ++ z0 = svsub_f64_x (svptrue_b64 (), z1, z0), ++ z0 = svsub_x (svptrue_b64 (), z1, z0)) ++ ++/* ++** ptrue_sub_f64_x_untied: ++** fsub z0\.d, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_f64_x_untied, svfloat64_t, ++ z0 = svsub_f64_x (svptrue_b64 (), z1, z2), ++ z0 = svsub_x (svptrue_b64 (), z1, z2)) ++ ++/* ++** ptrue_sub_1_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_1_f64_x_tied1, svfloat64_t, ++ z0 = svsub_n_f64_x (svptrue_b64 (), z0, 1), ++ z0 = svsub_x (svptrue_b64 (), z0, 1)) ++ ++/* ++** ptrue_sub_1_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_1_f64_x_untied, svfloat64_t, ++ z0 = svsub_n_f64_x (svptrue_b64 (), z1, 1), ++ z0 = svsub_x (svptrue_b64 (), z1, 1)) ++ ++/* ++** ptrue_sub_0p5_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_0p5_f64_x_tied1, svfloat64_t, ++ z0 = svsub_n_f64_x (svptrue_b64 (), z0, 0.5), ++ z0 = svsub_x (svptrue_b64 (), z0, 0.5)) ++ ++/* ++** ptrue_sub_0p5_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_0p5_f64_x_untied, svfloat64_t, ++ z0 = svsub_n_f64_x (svptrue_b64 (), z1, 0.5), ++ z0 = svsub_x (svptrue_b64 (), z1, 0.5)) ++ ++/* ++** ptrue_sub_m1_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_m1_f64_x_tied1, svfloat64_t, ++ z0 = svsub_n_f64_x (svptrue_b64 (), z0, -1), ++ z0 = svsub_x (svptrue_b64 (), z0, -1)) ++ ++/* ++** ptrue_sub_m1_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_m1_f64_x_untied, svfloat64_t, ++ z0 = svsub_n_f64_x (svptrue_b64 (), z1, -1), ++ z0 = svsub_x (svptrue_b64 (), z1, -1)) ++ ++/* ++** ptrue_sub_m0p5_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_m0p5_f64_x_tied1, svfloat64_t, ++ z0 = svsub_n_f64_x (svptrue_b64 (), z0, -0.5), ++ z0 = svsub_x (svptrue_b64 (), z0, -0.5)) ++ ++/* ++** ptrue_sub_m0p5_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_m0p5_f64_x_untied, svfloat64_t, ++ z0 = svsub_n_f64_x (svptrue_b64 (), z1, -0.5), ++ z0 = svsub_x (svptrue_b64 (), z1, -0.5)) ++ ++/* ++** ptrue_sub_2_f64_x_tied1: ++** fmov (z[0-9]+\.d), #-2\.0(?:e\+0)? ++** fadd z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_2_f64_x_tied1, svfloat64_t, ++ z0 = svsub_n_f64_x (svptrue_b64 (), z0, 2), ++ z0 = svsub_x (svptrue_b64 (), z0, 2)) ++ ++/* ++** ptrue_sub_2_f64_x_untied: ++** fmov (z[0-9]+\.d), #-2\.0(?:e\+0)? ++** fadd z0\.d, (z1\.d, \1|\1, z1\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_sub_2_f64_x_untied, svfloat64_t, ++ z0 = svsub_n_f64_x (svptrue_b64 (), z1, 2), ++ z0 = svsub_x (svptrue_b64 (), z1, 2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_s16.c +new file mode 100644 +index 000000000..aea8ea2b4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_s16.c +@@ -0,0 +1,377 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** sub_s16_m_tied1: ++** sub z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sub_s16_m_tied1, svint16_t, ++ z0 = svsub_s16_m (p0, z0, z1), ++ z0 = svsub_m (p0, z0, z1)) ++ ++/* ++** sub_s16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** sub z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sub_s16_m_tied2, svint16_t, ++ z0 = svsub_s16_m (p0, z1, z0), ++ z0 = svsub_m (p0, z1, z0)) ++ ++/* ++** sub_s16_m_untied: ++** movprfx z0, z1 ++** sub z0\.h, p0/m, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sub_s16_m_untied, svint16_t, ++ z0 = svsub_s16_m (p0, z1, z2), ++ z0 = svsub_m (p0, z1, z2)) ++ ++/* ++** sub_w0_s16_m_tied1: ++** mov (z[0-9]+\.h), w0 ++** sub z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (sub_w0_s16_m_tied1, svint16_t, int16_t, ++ z0 = svsub_n_s16_m (p0, z0, x0), ++ z0 = svsub_m (p0, z0, x0)) ++ ++/* ++** sub_w0_s16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), w0 ++** movprfx z0, z1 ++** sub z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (sub_w0_s16_m_untied, svint16_t, int16_t, ++ z0 = svsub_n_s16_m (p0, z1, x0), ++ z0 = svsub_m (p0, z1, x0)) ++ ++/* ++** sub_1_s16_m_tied1: ++** mov (z[0-9]+)\.b, #-1 ++** add z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_s16_m_tied1, svint16_t, ++ z0 = svsub_n_s16_m (p0, z0, 1), ++ z0 = svsub_m (p0, z0, 1)) ++ ++/* ++** sub_1_s16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+)\.b, #-1 ++** movprfx z0, z1 ++** add z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_s16_m_untied, svint16_t, ++ z0 = svsub_n_s16_m (p0, z1, 1), ++ z0 = svsub_m (p0, z1, 1)) ++ ++/* ++** sub_m2_s16_m: ++** mov (z[0-9]+\.h), #2 ++** add z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m2_s16_m, svint16_t, ++ z0 = svsub_n_s16_m (p0, z0, -2), ++ z0 = svsub_m (p0, z0, -2)) ++ ++/* ++** sub_s16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** sub z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sub_s16_z_tied1, svint16_t, ++ z0 = svsub_s16_z (p0, z0, z1), ++ z0 = svsub_z (p0, z0, z1)) ++ ++/* ++** sub_s16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** subr z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sub_s16_z_tied2, svint16_t, ++ z0 = svsub_s16_z (p0, z1, z0), ++ z0 = svsub_z (p0, z1, z0)) ++ ++/* ++** sub_s16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** sub z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** subr z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_s16_z_untied, svint16_t, ++ z0 = svsub_s16_z (p0, z1, z2), ++ z0 = svsub_z (p0, z1, z2)) ++ ++/* ++** sub_w0_s16_z_tied1: ++** mov (z[0-9]+\.h), w0 ++** movprfx z0\.h, p0/z, z0\.h ++** sub z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (sub_w0_s16_z_tied1, svint16_t, int16_t, ++ z0 = svsub_n_s16_z (p0, z0, x0), ++ z0 = svsub_z (p0, z0, x0)) ++ ++/* ++** sub_w0_s16_z_untied: ++** mov (z[0-9]+\.h), w0 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** sub z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** subr z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (sub_w0_s16_z_untied, svint16_t, int16_t, ++ z0 = svsub_n_s16_z (p0, z1, x0), ++ z0 = svsub_z (p0, z1, x0)) ++ ++/* ++** sub_1_s16_z_tied1: ++** mov (z[0-9]+)\.b, #-1 ++** movprfx z0\.h, p0/z, z0\.h ++** add z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_s16_z_tied1, svint16_t, ++ z0 = svsub_n_s16_z (p0, z0, 1), ++ z0 = svsub_z (p0, z0, 1)) ++ ++/* ++** sub_1_s16_z_untied: ++** mov (z[0-9]+)\.b, #-1 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** add z0\.h, p0/m, z0\.h, \1\.h ++** | ++** movprfx z0\.h, p0/z, \1\.h ++** add z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_s16_z_untied, svint16_t, ++ z0 = svsub_n_s16_z (p0, z1, 1), ++ z0 = svsub_z (p0, z1, 1)) ++ ++/* ++** sub_s16_x_tied1: ++** sub z0\.h, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sub_s16_x_tied1, svint16_t, ++ z0 = svsub_s16_x (p0, z0, z1), ++ z0 = svsub_x (p0, z0, z1)) ++ ++/* ++** sub_s16_x_tied2: ++** sub z0\.h, z1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sub_s16_x_tied2, svint16_t, ++ z0 = svsub_s16_x (p0, z1, z0), ++ z0 = svsub_x (p0, z1, z0)) ++ ++/* ++** sub_s16_x_untied: ++** sub z0\.h, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sub_s16_x_untied, svint16_t, ++ z0 = svsub_s16_x (p0, z1, z2), ++ z0 = svsub_x (p0, z1, z2)) ++ ++/* ++** sub_w0_s16_x_tied1: ++** mov (z[0-9]+\.h), w0 ++** sub z0\.h, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (sub_w0_s16_x_tied1, svint16_t, int16_t, ++ z0 = svsub_n_s16_x (p0, z0, x0), ++ z0 = svsub_x (p0, z0, x0)) ++ ++/* ++** sub_w0_s16_x_untied: ++** mov (z[0-9]+\.h), w0 ++** sub z0\.h, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (sub_w0_s16_x_untied, svint16_t, int16_t, ++ z0 = svsub_n_s16_x (p0, z1, x0), ++ z0 = svsub_x (p0, z1, x0)) ++ ++/* ++** sub_1_s16_x_tied1: ++** sub z0\.h, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_s16_x_tied1, svint16_t, ++ z0 = svsub_n_s16_x (p0, z0, 1), ++ z0 = svsub_x (p0, z0, 1)) ++ ++/* ++** sub_1_s16_x_untied: ++** movprfx z0, z1 ++** sub z0\.h, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_s16_x_untied, svint16_t, ++ z0 = svsub_n_s16_x (p0, z1, 1), ++ z0 = svsub_x (p0, z1, 1)) ++ ++/* ++** sub_127_s16_x: ++** sub z0\.h, z0\.h, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_127_s16_x, svint16_t, ++ z0 = svsub_n_s16_x (p0, z0, 127), ++ z0 = svsub_x (p0, z0, 127)) ++ ++/* ++** sub_128_s16_x: ++** sub z0\.h, z0\.h, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_128_s16_x, svint16_t, ++ z0 = svsub_n_s16_x (p0, z0, 128), ++ z0 = svsub_x (p0, z0, 128)) ++ ++/* ++** sub_255_s16_x: ++** sub z0\.h, z0\.h, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_255_s16_x, svint16_t, ++ z0 = svsub_n_s16_x (p0, z0, 255), ++ z0 = svsub_x (p0, z0, 255)) ++ ++/* ++** sub_256_s16_x: ++** add z0\.h, z0\.h, #65280 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_256_s16_x, svint16_t, ++ z0 = svsub_n_s16_x (p0, z0, 256), ++ z0 = svsub_x (p0, z0, 256)) ++ ++/* ++** sub_257_s16_x: ++** mov (z[0-9]+\.h), #-257 ++** add z0\.h, (z0\.h, \1|\1, z0\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_257_s16_x, svint16_t, ++ z0 = svsub_n_s16_x (p0, z0, 257), ++ z0 = svsub_x (p0, z0, 257)) ++ ++/* ++** sub_512_s16_x: ++** add z0\.h, z0\.h, #65024 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_512_s16_x, svint16_t, ++ z0 = svsub_n_s16_x (p0, z0, 512), ++ z0 = svsub_x (p0, z0, 512)) ++ ++/* ++** sub_65280_s16_x: ++** add z0\.h, z0\.h, #256 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_65280_s16_x, svint16_t, ++ z0 = svsub_n_s16_x (p0, z0, 0xff00), ++ z0 = svsub_x (p0, z0, 0xff00)) ++ ++/* ++** sub_m1_s16_x: ++** add z0\.h, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m1_s16_x, svint16_t, ++ z0 = svsub_n_s16_x (p0, z0, -1), ++ z0 = svsub_x (p0, z0, -1)) ++ ++/* ++** sub_m127_s16_x: ++** add z0\.h, z0\.h, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m127_s16_x, svint16_t, ++ z0 = svsub_n_s16_x (p0, z0, -127), ++ z0 = svsub_x (p0, z0, -127)) ++ ++/* ++** sub_m128_s16_x: ++** add z0\.h, z0\.h, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m128_s16_x, svint16_t, ++ z0 = svsub_n_s16_x (p0, z0, -128), ++ z0 = svsub_x (p0, z0, -128)) ++ ++/* ++** sub_m255_s16_x: ++** add z0\.h, z0\.h, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m255_s16_x, svint16_t, ++ z0 = svsub_n_s16_x (p0, z0, -255), ++ z0 = svsub_x (p0, z0, -255)) ++ ++/* ++** sub_m256_s16_x: ++** add z0\.h, z0\.h, #256 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m256_s16_x, svint16_t, ++ z0 = svsub_n_s16_x (p0, z0, -256), ++ z0 = svsub_x (p0, z0, -256)) ++ ++/* ++** sub_m257_s16_x: ++** mov (z[0-9]+)\.b, #1 ++** add z0\.h, (z0\.h, \1\.h|\1\.h, z0\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m257_s16_x, svint16_t, ++ z0 = svsub_n_s16_x (p0, z0, -257), ++ z0 = svsub_x (p0, z0, -257)) ++ ++/* ++** sub_m512_s16_x: ++** add z0\.h, z0\.h, #512 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m512_s16_x, svint16_t, ++ z0 = svsub_n_s16_x (p0, z0, -512), ++ z0 = svsub_x (p0, z0, -512)) ++ ++/* ++** sub_m32768_s16_x: ++** add z0\.h, z0\.h, #32768 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m32768_s16_x, svint16_t, ++ z0 = svsub_n_s16_x (p0, z0, -0x8000), ++ z0 = svsub_x (p0, z0, -0x8000)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_s32.c +new file mode 100644 +index 000000000..db6f3df90 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_s32.c +@@ -0,0 +1,426 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** sub_s32_m_tied1: ++** sub z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sub_s32_m_tied1, svint32_t, ++ z0 = svsub_s32_m (p0, z0, z1), ++ z0 = svsub_m (p0, z0, z1)) ++ ++/* ++** sub_s32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** sub z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sub_s32_m_tied2, svint32_t, ++ z0 = svsub_s32_m (p0, z1, z0), ++ z0 = svsub_m (p0, z1, z0)) ++ ++/* ++** sub_s32_m_untied: ++** movprfx z0, z1 ++** sub z0\.s, p0/m, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sub_s32_m_untied, svint32_t, ++ z0 = svsub_s32_m (p0, z1, z2), ++ z0 = svsub_m (p0, z1, z2)) ++ ++/* ++** sub_w0_s32_m_tied1: ++** mov (z[0-9]+\.s), w0 ++** sub z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (sub_w0_s32_m_tied1, svint32_t, int32_t, ++ z0 = svsub_n_s32_m (p0, z0, x0), ++ z0 = svsub_m (p0, z0, x0)) ++ ++/* ++** sub_w0_s32_m_untied: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0, z1 ++** sub z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (sub_w0_s32_m_untied, svint32_t, int32_t, ++ z0 = svsub_n_s32_m (p0, z1, x0), ++ z0 = svsub_m (p0, z1, x0)) ++ ++/* ++** sub_1_s32_m_tied1: ++** mov (z[0-9]+)\.b, #-1 ++** add z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_s32_m_tied1, svint32_t, ++ z0 = svsub_n_s32_m (p0, z0, 1), ++ z0 = svsub_m (p0, z0, 1)) ++ ++/* ++** sub_1_s32_m_untied: { xfail *-*-* } ++** mov (z[0-9]+)\.b, #-1 ++** movprfx z0, z1 ++** add z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_s32_m_untied, svint32_t, ++ z0 = svsub_n_s32_m (p0, z1, 1), ++ z0 = svsub_m (p0, z1, 1)) ++ ++/* ++** sub_m2_s32_m: ++** mov (z[0-9]+\.s), #2 ++** add z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m2_s32_m, svint32_t, ++ z0 = svsub_n_s32_m (p0, z0, -2), ++ z0 = svsub_m (p0, z0, -2)) ++ ++/* ++** sub_s32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** sub z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sub_s32_z_tied1, svint32_t, ++ z0 = svsub_s32_z (p0, z0, z1), ++ z0 = svsub_z (p0, z0, z1)) ++ ++/* ++** sub_s32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** subr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sub_s32_z_tied2, svint32_t, ++ z0 = svsub_s32_z (p0, z1, z0), ++ z0 = svsub_z (p0, z1, z0)) ++ ++/* ++** sub_s32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** sub z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** subr z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_s32_z_untied, svint32_t, ++ z0 = svsub_s32_z (p0, z1, z2), ++ z0 = svsub_z (p0, z1, z2)) ++ ++/* ++** sub_w0_s32_z_tied1: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0\.s, p0/z, z0\.s ++** sub z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (sub_w0_s32_z_tied1, svint32_t, int32_t, ++ z0 = svsub_n_s32_z (p0, z0, x0), ++ z0 = svsub_z (p0, z0, x0)) ++ ++/* ++** sub_w0_s32_z_untied: ++** mov (z[0-9]+\.s), w0 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** sub z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** subr z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (sub_w0_s32_z_untied, svint32_t, int32_t, ++ z0 = svsub_n_s32_z (p0, z1, x0), ++ z0 = svsub_z (p0, z1, x0)) ++ ++/* ++** sub_1_s32_z_tied1: ++** mov (z[0-9]+)\.b, #-1 ++** movprfx z0\.s, p0/z, z0\.s ++** add z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_s32_z_tied1, svint32_t, ++ z0 = svsub_n_s32_z (p0, z0, 1), ++ z0 = svsub_z (p0, z0, 1)) ++ ++/* ++** sub_1_s32_z_untied: ++** mov (z[0-9]+)\.b, #-1 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** add z0\.s, p0/m, z0\.s, \1\.s ++** | ++** movprfx z0\.s, p0/z, \1\.s ++** add z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_s32_z_untied, svint32_t, ++ z0 = svsub_n_s32_z (p0, z1, 1), ++ z0 = svsub_z (p0, z1, 1)) ++ ++/* ++** sub_s32_x_tied1: ++** sub z0\.s, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sub_s32_x_tied1, svint32_t, ++ z0 = svsub_s32_x (p0, z0, z1), ++ z0 = svsub_x (p0, z0, z1)) ++ ++/* ++** sub_s32_x_tied2: ++** sub z0\.s, z1\.s, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sub_s32_x_tied2, svint32_t, ++ z0 = svsub_s32_x (p0, z1, z0), ++ z0 = svsub_x (p0, z1, z0)) ++ ++/* ++** sub_s32_x_untied: ++** sub z0\.s, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sub_s32_x_untied, svint32_t, ++ z0 = svsub_s32_x (p0, z1, z2), ++ z0 = svsub_x (p0, z1, z2)) ++ ++/* ++** sub_w0_s32_x_tied1: ++** mov (z[0-9]+\.s), w0 ++** sub z0\.s, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (sub_w0_s32_x_tied1, svint32_t, int32_t, ++ z0 = svsub_n_s32_x (p0, z0, x0), ++ z0 = svsub_x (p0, z0, x0)) ++ ++/* ++** sub_w0_s32_x_untied: ++** mov (z[0-9]+\.s), w0 ++** sub z0\.s, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (sub_w0_s32_x_untied, svint32_t, int32_t, ++ z0 = svsub_n_s32_x (p0, z1, x0), ++ z0 = svsub_x (p0, z1, x0)) ++ ++/* ++** sub_1_s32_x_tied1: ++** sub z0\.s, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_s32_x_tied1, svint32_t, ++ z0 = svsub_n_s32_x (p0, z0, 1), ++ z0 = svsub_x (p0, z0, 1)) ++ ++/* ++** sub_1_s32_x_untied: ++** movprfx z0, z1 ++** sub z0\.s, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_s32_x_untied, svint32_t, ++ z0 = svsub_n_s32_x (p0, z1, 1), ++ z0 = svsub_x (p0, z1, 1)) ++ ++/* ++** sub_127_s32_x: ++** sub z0\.s, z0\.s, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_127_s32_x, svint32_t, ++ z0 = svsub_n_s32_x (p0, z0, 127), ++ z0 = svsub_x (p0, z0, 127)) ++ ++/* ++** sub_128_s32_x: ++** sub z0\.s, z0\.s, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_128_s32_x, svint32_t, ++ z0 = svsub_n_s32_x (p0, z0, 128), ++ z0 = svsub_x (p0, z0, 128)) ++ ++/* ++** sub_255_s32_x: ++** sub z0\.s, z0\.s, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_255_s32_x, svint32_t, ++ z0 = svsub_n_s32_x (p0, z0, 255), ++ z0 = svsub_x (p0, z0, 255)) ++ ++/* ++** sub_256_s32_x: ++** sub z0\.s, z0\.s, #256 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_256_s32_x, svint32_t, ++ z0 = svsub_n_s32_x (p0, z0, 256), ++ z0 = svsub_x (p0, z0, 256)) ++ ++/* ++** sub_511_s32_x: ++** mov (z[0-9]+\.s), #-511 ++** add z0\.s, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_511_s32_x, svint32_t, ++ z0 = svsub_n_s32_x (p0, z0, 511), ++ z0 = svsub_x (p0, z0, 511)) ++ ++/* ++** sub_512_s32_x: ++** sub z0\.s, z0\.s, #512 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_512_s32_x, svint32_t, ++ z0 = svsub_n_s32_x (p0, z0, 512), ++ z0 = svsub_x (p0, z0, 512)) ++ ++/* ++** sub_65280_s32_x: ++** sub z0\.s, z0\.s, #65280 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_65280_s32_x, svint32_t, ++ z0 = svsub_n_s32_x (p0, z0, 0xff00), ++ z0 = svsub_x (p0, z0, 0xff00)) ++ ++/* ++** sub_65535_s32_x: ++** mov (z[0-9]+\.s), #-65535 ++** add z0\.s, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_65535_s32_x, svint32_t, ++ z0 = svsub_n_s32_x (p0, z0, 65535), ++ z0 = svsub_x (p0, z0, 65535)) ++ ++/* ++** sub_65536_s32_x: ++** mov (z[0-9]+\.s), #-65536 ++** add z0\.s, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_65536_s32_x, svint32_t, ++ z0 = svsub_n_s32_x (p0, z0, 65536), ++ z0 = svsub_x (p0, z0, 65536)) ++ ++/* ++** sub_m1_s32_x: ++** add z0\.s, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m1_s32_x, svint32_t, ++ z0 = svsub_n_s32_x (p0, z0, -1), ++ z0 = svsub_x (p0, z0, -1)) ++ ++/* ++** sub_m127_s32_x: ++** add z0\.s, z0\.s, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m127_s32_x, svint32_t, ++ z0 = svsub_n_s32_x (p0, z0, -127), ++ z0 = svsub_x (p0, z0, -127)) ++ ++/* ++** sub_m128_s32_x: ++** add z0\.s, z0\.s, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m128_s32_x, svint32_t, ++ z0 = svsub_n_s32_x (p0, z0, -128), ++ z0 = svsub_x (p0, z0, -128)) ++ ++/* ++** sub_m255_s32_x: ++** add z0\.s, z0\.s, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m255_s32_x, svint32_t, ++ z0 = svsub_n_s32_x (p0, z0, -255), ++ z0 = svsub_x (p0, z0, -255)) ++ ++/* ++** sub_m256_s32_x: ++** add z0\.s, z0\.s, #256 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m256_s32_x, svint32_t, ++ z0 = svsub_n_s32_x (p0, z0, -256), ++ z0 = svsub_x (p0, z0, -256)) ++ ++/* ++** sub_m511_s32_x: ++** mov (z[0-9]+\.s), #511 ++** add z0\.s, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m511_s32_x, svint32_t, ++ z0 = svsub_n_s32_x (p0, z0, -511), ++ z0 = svsub_x (p0, z0, -511)) ++ ++/* ++** sub_m512_s32_x: ++** add z0\.s, z0\.s, #512 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m512_s32_x, svint32_t, ++ z0 = svsub_n_s32_x (p0, z0, -512), ++ z0 = svsub_x (p0, z0, -512)) ++ ++/* ++** sub_m32768_s32_x: ++** add z0\.s, z0\.s, #32768 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m32768_s32_x, svint32_t, ++ z0 = svsub_n_s32_x (p0, z0, -0x8000), ++ z0 = svsub_x (p0, z0, -0x8000)) ++ ++/* ++** sub_m65280_s32_x: ++** add z0\.s, z0\.s, #65280 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m65280_s32_x, svint32_t, ++ z0 = svsub_n_s32_x (p0, z0, -0xff00), ++ z0 = svsub_x (p0, z0, -0xff00)) ++ ++/* ++** sub_m65535_s32_x: ++** mov (z[0-9]+\.s), #65535 ++** add z0\.s, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m65535_s32_x, svint32_t, ++ z0 = svsub_n_s32_x (p0, z0, -65535), ++ z0 = svsub_x (p0, z0, -65535)) ++ ++/* ++** sub_m65536_s32_x: ++** mov (z[0-9]+\.s), #65536 ++** add z0\.s, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m65536_s32_x, svint32_t, ++ z0 = svsub_n_s32_x (p0, z0, -65536), ++ z0 = svsub_x (p0, z0, -65536)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_s64.c +new file mode 100644 +index 000000000..b9184c3a8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_s64.c +@@ -0,0 +1,426 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** sub_s64_m_tied1: ++** sub z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (sub_s64_m_tied1, svint64_t, ++ z0 = svsub_s64_m (p0, z0, z1), ++ z0 = svsub_m (p0, z0, z1)) ++ ++/* ++** sub_s64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** sub z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_s64_m_tied2, svint64_t, ++ z0 = svsub_s64_m (p0, z1, z0), ++ z0 = svsub_m (p0, z1, z0)) ++ ++/* ++** sub_s64_m_untied: ++** movprfx z0, z1 ++** sub z0\.d, p0/m, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (sub_s64_m_untied, svint64_t, ++ z0 = svsub_s64_m (p0, z1, z2), ++ z0 = svsub_m (p0, z1, z2)) ++ ++/* ++** sub_x0_s64_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** sub z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (sub_x0_s64_m_tied1, svint64_t, int64_t, ++ z0 = svsub_n_s64_m (p0, z0, x0), ++ z0 = svsub_m (p0, z0, x0)) ++ ++/* ++** sub_x0_s64_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** sub z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (sub_x0_s64_m_untied, svint64_t, int64_t, ++ z0 = svsub_n_s64_m (p0, z1, x0), ++ z0 = svsub_m (p0, z1, x0)) ++ ++/* ++** sub_1_s64_m_tied1: ++** mov (z[0-9]+)\.b, #-1 ++** add z0\.d, p0/m, z0\.d, \1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_s64_m_tied1, svint64_t, ++ z0 = svsub_n_s64_m (p0, z0, 1), ++ z0 = svsub_m (p0, z0, 1)) ++ ++/* ++** sub_1_s64_m_untied: { xfail *-*-* } ++** mov (z[0-9]+)\.b, #-1 ++** movprfx z0, z1 ++** add z0\.d, p0/m, z0\.d, \1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_s64_m_untied, svint64_t, ++ z0 = svsub_n_s64_m (p0, z1, 1), ++ z0 = svsub_m (p0, z1, 1)) ++ ++/* ++** sub_m2_s64_m: ++** mov (z[0-9]+\.d), #2 ++** add z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m2_s64_m, svint64_t, ++ z0 = svsub_n_s64_m (p0, z0, -2), ++ z0 = svsub_m (p0, z0, -2)) ++ ++/* ++** sub_s64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** sub z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (sub_s64_z_tied1, svint64_t, ++ z0 = svsub_s64_z (p0, z0, z1), ++ z0 = svsub_z (p0, z0, z1)) ++ ++/* ++** sub_s64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** subr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (sub_s64_z_tied2, svint64_t, ++ z0 = svsub_s64_z (p0, z1, z0), ++ z0 = svsub_z (p0, z1, z0)) ++ ++/* ++** sub_s64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** sub z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** subr z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_s64_z_untied, svint64_t, ++ z0 = svsub_s64_z (p0, z1, z2), ++ z0 = svsub_z (p0, z1, z2)) ++ ++/* ++** sub_x0_s64_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.d, p0/z, z0\.d ++** sub z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (sub_x0_s64_z_tied1, svint64_t, int64_t, ++ z0 = svsub_n_s64_z (p0, z0, x0), ++ z0 = svsub_z (p0, z0, x0)) ++ ++/* ++** sub_x0_s64_z_untied: ++** mov (z[0-9]+\.d), x0 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** sub z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** subr z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (sub_x0_s64_z_untied, svint64_t, int64_t, ++ z0 = svsub_n_s64_z (p0, z1, x0), ++ z0 = svsub_z (p0, z1, x0)) ++ ++/* ++** sub_1_s64_z_tied1: ++** mov (z[0-9]+)\.b, #-1 ++** movprfx z0\.d, p0/z, z0\.d ++** add z0\.d, p0/m, z0\.d, \1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_s64_z_tied1, svint64_t, ++ z0 = svsub_n_s64_z (p0, z0, 1), ++ z0 = svsub_z (p0, z0, 1)) ++ ++/* ++** sub_1_s64_z_untied: ++** mov (z[0-9]+)\.b, #-1 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** add z0\.d, p0/m, z0\.d, \1\.d ++** | ++** movprfx z0\.d, p0/z, \1\.d ++** add z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_s64_z_untied, svint64_t, ++ z0 = svsub_n_s64_z (p0, z1, 1), ++ z0 = svsub_z (p0, z1, 1)) ++ ++/* ++** sub_s64_x_tied1: ++** sub z0\.d, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (sub_s64_x_tied1, svint64_t, ++ z0 = svsub_s64_x (p0, z0, z1), ++ z0 = svsub_x (p0, z0, z1)) ++ ++/* ++** sub_s64_x_tied2: ++** sub z0\.d, z1\.d, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (sub_s64_x_tied2, svint64_t, ++ z0 = svsub_s64_x (p0, z1, z0), ++ z0 = svsub_x (p0, z1, z0)) ++ ++/* ++** sub_s64_x_untied: ++** sub z0\.d, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (sub_s64_x_untied, svint64_t, ++ z0 = svsub_s64_x (p0, z1, z2), ++ z0 = svsub_x (p0, z1, z2)) ++ ++/* ++** sub_x0_s64_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** sub z0\.d, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (sub_x0_s64_x_tied1, svint64_t, int64_t, ++ z0 = svsub_n_s64_x (p0, z0, x0), ++ z0 = svsub_x (p0, z0, x0)) ++ ++/* ++** sub_x0_s64_x_untied: ++** mov (z[0-9]+\.d), x0 ++** sub z0\.d, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (sub_x0_s64_x_untied, svint64_t, int64_t, ++ z0 = svsub_n_s64_x (p0, z1, x0), ++ z0 = svsub_x (p0, z1, x0)) ++ ++/* ++** sub_1_s64_x_tied1: ++** sub z0\.d, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_s64_x_tied1, svint64_t, ++ z0 = svsub_n_s64_x (p0, z0, 1), ++ z0 = svsub_x (p0, z0, 1)) ++ ++/* ++** sub_1_s64_x_untied: ++** movprfx z0, z1 ++** sub z0\.d, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_s64_x_untied, svint64_t, ++ z0 = svsub_n_s64_x (p0, z1, 1), ++ z0 = svsub_x (p0, z1, 1)) ++ ++/* ++** sub_127_s64_x: ++** sub z0\.d, z0\.d, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_127_s64_x, svint64_t, ++ z0 = svsub_n_s64_x (p0, z0, 127), ++ z0 = svsub_x (p0, z0, 127)) ++ ++/* ++** sub_128_s64_x: ++** sub z0\.d, z0\.d, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_128_s64_x, svint64_t, ++ z0 = svsub_n_s64_x (p0, z0, 128), ++ z0 = svsub_x (p0, z0, 128)) ++ ++/* ++** sub_255_s64_x: ++** sub z0\.d, z0\.d, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_255_s64_x, svint64_t, ++ z0 = svsub_n_s64_x (p0, z0, 255), ++ z0 = svsub_x (p0, z0, 255)) ++ ++/* ++** sub_256_s64_x: ++** sub z0\.d, z0\.d, #256 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_256_s64_x, svint64_t, ++ z0 = svsub_n_s64_x (p0, z0, 256), ++ z0 = svsub_x (p0, z0, 256)) ++ ++/* ++** sub_511_s64_x: ++** mov (z[0-9]+\.d), #-511 ++** add z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_511_s64_x, svint64_t, ++ z0 = svsub_n_s64_x (p0, z0, 511), ++ z0 = svsub_x (p0, z0, 511)) ++ ++/* ++** sub_512_s64_x: ++** sub z0\.d, z0\.d, #512 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_512_s64_x, svint64_t, ++ z0 = svsub_n_s64_x (p0, z0, 512), ++ z0 = svsub_x (p0, z0, 512)) ++ ++/* ++** sub_65280_s64_x: ++** sub z0\.d, z0\.d, #65280 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_65280_s64_x, svint64_t, ++ z0 = svsub_n_s64_x (p0, z0, 0xff00), ++ z0 = svsub_x (p0, z0, 0xff00)) ++ ++/* ++** sub_65535_s64_x: ++** mov (z[0-9]+\.d), #-65535 ++** add z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_65535_s64_x, svint64_t, ++ z0 = svsub_n_s64_x (p0, z0, 65535), ++ z0 = svsub_x (p0, z0, 65535)) ++ ++/* ++** sub_65536_s64_x: ++** mov (z[0-9]+\.d), #-65536 ++** add z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_65536_s64_x, svint64_t, ++ z0 = svsub_n_s64_x (p0, z0, 65536), ++ z0 = svsub_x (p0, z0, 65536)) ++ ++/* ++** sub_m1_s64_x: ++** add z0\.d, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m1_s64_x, svint64_t, ++ z0 = svsub_n_s64_x (p0, z0, -1), ++ z0 = svsub_x (p0, z0, -1)) ++ ++/* ++** sub_m127_s64_x: ++** add z0\.d, z0\.d, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m127_s64_x, svint64_t, ++ z0 = svsub_n_s64_x (p0, z0, -127), ++ z0 = svsub_x (p0, z0, -127)) ++ ++/* ++** sub_m128_s64_x: ++** add z0\.d, z0\.d, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m128_s64_x, svint64_t, ++ z0 = svsub_n_s64_x (p0, z0, -128), ++ z0 = svsub_x (p0, z0, -128)) ++ ++/* ++** sub_m255_s64_x: ++** add z0\.d, z0\.d, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m255_s64_x, svint64_t, ++ z0 = svsub_n_s64_x (p0, z0, -255), ++ z0 = svsub_x (p0, z0, -255)) ++ ++/* ++** sub_m256_s64_x: ++** add z0\.d, z0\.d, #256 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m256_s64_x, svint64_t, ++ z0 = svsub_n_s64_x (p0, z0, -256), ++ z0 = svsub_x (p0, z0, -256)) ++ ++/* ++** sub_m511_s64_x: ++** mov (z[0-9]+\.d), #511 ++** add z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m511_s64_x, svint64_t, ++ z0 = svsub_n_s64_x (p0, z0, -511), ++ z0 = svsub_x (p0, z0, -511)) ++ ++/* ++** sub_m512_s64_x: ++** add z0\.d, z0\.d, #512 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m512_s64_x, svint64_t, ++ z0 = svsub_n_s64_x (p0, z0, -512), ++ z0 = svsub_x (p0, z0, -512)) ++ ++/* ++** sub_m32768_s64_x: ++** add z0\.d, z0\.d, #32768 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m32768_s64_x, svint64_t, ++ z0 = svsub_n_s64_x (p0, z0, -0x8000), ++ z0 = svsub_x (p0, z0, -0x8000)) ++ ++/* ++** sub_m65280_s64_x: ++** add z0\.d, z0\.d, #65280 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m65280_s64_x, svint64_t, ++ z0 = svsub_n_s64_x (p0, z0, -0xff00), ++ z0 = svsub_x (p0, z0, -0xff00)) ++ ++/* ++** sub_m65535_s64_x: ++** mov (z[0-9]+\.d), #65535 ++** add z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m65535_s64_x, svint64_t, ++ z0 = svsub_n_s64_x (p0, z0, -65535), ++ z0 = svsub_x (p0, z0, -65535)) ++ ++/* ++** sub_m65536_s64_x: ++** mov (z[0-9]+\.d), #65536 ++** add z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m65536_s64_x, svint64_t, ++ z0 = svsub_n_s64_x (p0, z0, -65536), ++ z0 = svsub_x (p0, z0, -65536)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_s8.c +new file mode 100644 +index 000000000..0d7ba99aa +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_s8.c +@@ -0,0 +1,294 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** sub_s8_m_tied1: ++** sub z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (sub_s8_m_tied1, svint8_t, ++ z0 = svsub_s8_m (p0, z0, z1), ++ z0 = svsub_m (p0, z0, z1)) ++ ++/* ++** sub_s8_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** sub z0\.b, p0/m, z0\.b, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (sub_s8_m_tied2, svint8_t, ++ z0 = svsub_s8_m (p0, z1, z0), ++ z0 = svsub_m (p0, z1, z0)) ++ ++/* ++** sub_s8_m_untied: ++** movprfx z0, z1 ++** sub z0\.b, p0/m, z0\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (sub_s8_m_untied, svint8_t, ++ z0 = svsub_s8_m (p0, z1, z2), ++ z0 = svsub_m (p0, z1, z2)) ++ ++/* ++** sub_w0_s8_m_tied1: ++** mov (z[0-9]+\.b), w0 ++** sub z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (sub_w0_s8_m_tied1, svint8_t, int8_t, ++ z0 = svsub_n_s8_m (p0, z0, x0), ++ z0 = svsub_m (p0, z0, x0)) ++ ++/* ++** sub_w0_s8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), w0 ++** movprfx z0, z1 ++** sub z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (sub_w0_s8_m_untied, svint8_t, int8_t, ++ z0 = svsub_n_s8_m (p0, z1, x0), ++ z0 = svsub_m (p0, z1, x0)) ++ ++/* ++** sub_1_s8_m_tied1: ++** mov (z[0-9]+\.b), #-1 ++** add z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_s8_m_tied1, svint8_t, ++ z0 = svsub_n_s8_m (p0, z0, 1), ++ z0 = svsub_m (p0, z0, 1)) ++ ++/* ++** sub_1_s8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), #-1 ++** movprfx z0, z1 ++** add z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_s8_m_untied, svint8_t, ++ z0 = svsub_n_s8_m (p0, z1, 1), ++ z0 = svsub_m (p0, z1, 1)) ++ ++/* ++** sub_m1_s8_m: ++** mov (z[0-9]+\.b), #1 ++** add z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m1_s8_m, svint8_t, ++ z0 = svsub_n_s8_m (p0, z0, -1), ++ z0 = svsub_m (p0, z0, -1)) ++ ++/* ++** sub_s8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** sub z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (sub_s8_z_tied1, svint8_t, ++ z0 = svsub_s8_z (p0, z0, z1), ++ z0 = svsub_z (p0, z0, z1)) ++ ++/* ++** sub_s8_z_tied2: ++** movprfx z0\.b, p0/z, z0\.b ++** subr z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (sub_s8_z_tied2, svint8_t, ++ z0 = svsub_s8_z (p0, z1, z0), ++ z0 = svsub_z (p0, z1, z0)) ++ ++/* ++** sub_s8_z_untied: ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** sub z0\.b, p0/m, z0\.b, z2\.b ++** | ++** movprfx z0\.b, p0/z, z2\.b ++** subr z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_s8_z_untied, svint8_t, ++ z0 = svsub_s8_z (p0, z1, z2), ++ z0 = svsub_z (p0, z1, z2)) ++ ++/* ++** sub_w0_s8_z_tied1: ++** mov (z[0-9]+\.b), w0 ++** movprfx z0\.b, p0/z, z0\.b ++** sub z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (sub_w0_s8_z_tied1, svint8_t, int8_t, ++ z0 = svsub_n_s8_z (p0, z0, x0), ++ z0 = svsub_z (p0, z0, x0)) ++ ++/* ++** sub_w0_s8_z_untied: ++** mov (z[0-9]+\.b), w0 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** sub z0\.b, p0/m, z0\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** subr z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (sub_w0_s8_z_untied, svint8_t, int8_t, ++ z0 = svsub_n_s8_z (p0, z1, x0), ++ z0 = svsub_z (p0, z1, x0)) ++ ++/* ++** sub_1_s8_z_tied1: ++** mov (z[0-9]+\.b), #-1 ++** movprfx z0\.b, p0/z, z0\.b ++** add z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_s8_z_tied1, svint8_t, ++ z0 = svsub_n_s8_z (p0, z0, 1), ++ z0 = svsub_z (p0, z0, 1)) ++ ++/* ++** sub_1_s8_z_untied: ++** mov (z[0-9]+\.b), #-1 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** add z0\.b, p0/m, z0\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** add z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_s8_z_untied, svint8_t, ++ z0 = svsub_n_s8_z (p0, z1, 1), ++ z0 = svsub_z (p0, z1, 1)) ++ ++/* ++** sub_s8_x_tied1: ++** sub z0\.b, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (sub_s8_x_tied1, svint8_t, ++ z0 = svsub_s8_x (p0, z0, z1), ++ z0 = svsub_x (p0, z0, z1)) ++ ++/* ++** sub_s8_x_tied2: ++** sub z0\.b, z1\.b, z0\.b ++** ret ++*/ ++TEST_UNIFORM_Z (sub_s8_x_tied2, svint8_t, ++ z0 = svsub_s8_x (p0, z1, z0), ++ z0 = svsub_x (p0, z1, z0)) ++ ++/* ++** sub_s8_x_untied: ++** sub z0\.b, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (sub_s8_x_untied, svint8_t, ++ z0 = svsub_s8_x (p0, z1, z2), ++ z0 = svsub_x (p0, z1, z2)) ++ ++/* ++** sub_w0_s8_x_tied1: ++** mov (z[0-9]+\.b), w0 ++** sub z0\.b, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (sub_w0_s8_x_tied1, svint8_t, int8_t, ++ z0 = svsub_n_s8_x (p0, z0, x0), ++ z0 = svsub_x (p0, z0, x0)) ++ ++/* ++** sub_w0_s8_x_untied: ++** mov (z[0-9]+\.b), w0 ++** sub z0\.b, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (sub_w0_s8_x_untied, svint8_t, int8_t, ++ z0 = svsub_n_s8_x (p0, z1, x0), ++ z0 = svsub_x (p0, z1, x0)) ++ ++/* ++** sub_1_s8_x_tied1: ++** add z0\.b, z0\.b, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_s8_x_tied1, svint8_t, ++ z0 = svsub_n_s8_x (p0, z0, 1), ++ z0 = svsub_x (p0, z0, 1)) ++ ++/* ++** sub_1_s8_x_untied: ++** movprfx z0, z1 ++** add z0\.b, z0\.b, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_s8_x_untied, svint8_t, ++ z0 = svsub_n_s8_x (p0, z1, 1), ++ z0 = svsub_x (p0, z1, 1)) ++ ++/* ++** sub_127_s8_x: ++** add z0\.b, z0\.b, #129 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_127_s8_x, svint8_t, ++ z0 = svsub_n_s8_x (p0, z0, 127), ++ z0 = svsub_x (p0, z0, 127)) ++ ++/* ++** sub_128_s8_x: ++** add z0\.b, z0\.b, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_128_s8_x, svint8_t, ++ z0 = svsub_n_s8_x (p0, z0, 128), ++ z0 = svsub_x (p0, z0, 128)) ++ ++/* ++** sub_255_s8_x: ++** add z0\.b, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_255_s8_x, svint8_t, ++ z0 = svsub_n_s8_x (p0, z0, 255), ++ z0 = svsub_x (p0, z0, 255)) ++ ++/* ++** sub_m1_s8_x: ++** add z0\.b, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m1_s8_x, svint8_t, ++ z0 = svsub_n_s8_x (p0, z0, -1), ++ z0 = svsub_x (p0, z0, -1)) ++ ++/* ++** sub_m127_s8_x: ++** add z0\.b, z0\.b, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m127_s8_x, svint8_t, ++ z0 = svsub_n_s8_x (p0, z0, -127), ++ z0 = svsub_x (p0, z0, -127)) ++ ++/* ++** sub_m128_s8_x: ++** add z0\.b, z0\.b, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m128_s8_x, svint8_t, ++ z0 = svsub_n_s8_x (p0, z0, -128), ++ z0 = svsub_x (p0, z0, -128)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_u16.c +new file mode 100644 +index 000000000..89620e159 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_u16.c +@@ -0,0 +1,377 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** sub_u16_m_tied1: ++** sub z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sub_u16_m_tied1, svuint16_t, ++ z0 = svsub_u16_m (p0, z0, z1), ++ z0 = svsub_m (p0, z0, z1)) ++ ++/* ++** sub_u16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** sub z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sub_u16_m_tied2, svuint16_t, ++ z0 = svsub_u16_m (p0, z1, z0), ++ z0 = svsub_m (p0, z1, z0)) ++ ++/* ++** sub_u16_m_untied: ++** movprfx z0, z1 ++** sub z0\.h, p0/m, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sub_u16_m_untied, svuint16_t, ++ z0 = svsub_u16_m (p0, z1, z2), ++ z0 = svsub_m (p0, z1, z2)) ++ ++/* ++** sub_w0_u16_m_tied1: ++** mov (z[0-9]+\.h), w0 ++** sub z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (sub_w0_u16_m_tied1, svuint16_t, uint16_t, ++ z0 = svsub_n_u16_m (p0, z0, x0), ++ z0 = svsub_m (p0, z0, x0)) ++ ++/* ++** sub_w0_u16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), w0 ++** movprfx z0, z1 ++** sub z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (sub_w0_u16_m_untied, svuint16_t, uint16_t, ++ z0 = svsub_n_u16_m (p0, z1, x0), ++ z0 = svsub_m (p0, z1, x0)) ++ ++/* ++** sub_1_u16_m_tied1: ++** mov (z[0-9]+)\.b, #-1 ++** add z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_u16_m_tied1, svuint16_t, ++ z0 = svsub_n_u16_m (p0, z0, 1), ++ z0 = svsub_m (p0, z0, 1)) ++ ++/* ++** sub_1_u16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+)\.b, #-1 ++** movprfx z0, z1 ++** add z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_u16_m_untied, svuint16_t, ++ z0 = svsub_n_u16_m (p0, z1, 1), ++ z0 = svsub_m (p0, z1, 1)) ++ ++/* ++** sub_m2_u16_m: ++** mov (z[0-9]+\.h), #2 ++** add z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m2_u16_m, svuint16_t, ++ z0 = svsub_n_u16_m (p0, z0, -2), ++ z0 = svsub_m (p0, z0, -2)) ++ ++/* ++** sub_u16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** sub z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sub_u16_z_tied1, svuint16_t, ++ z0 = svsub_u16_z (p0, z0, z1), ++ z0 = svsub_z (p0, z0, z1)) ++ ++/* ++** sub_u16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** subr z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sub_u16_z_tied2, svuint16_t, ++ z0 = svsub_u16_z (p0, z1, z0), ++ z0 = svsub_z (p0, z1, z0)) ++ ++/* ++** sub_u16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** sub z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** subr z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_u16_z_untied, svuint16_t, ++ z0 = svsub_u16_z (p0, z1, z2), ++ z0 = svsub_z (p0, z1, z2)) ++ ++/* ++** sub_w0_u16_z_tied1: ++** mov (z[0-9]+\.h), w0 ++** movprfx z0\.h, p0/z, z0\.h ++** sub z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (sub_w0_u16_z_tied1, svuint16_t, uint16_t, ++ z0 = svsub_n_u16_z (p0, z0, x0), ++ z0 = svsub_z (p0, z0, x0)) ++ ++/* ++** sub_w0_u16_z_untied: ++** mov (z[0-9]+\.h), w0 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** sub z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** subr z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (sub_w0_u16_z_untied, svuint16_t, uint16_t, ++ z0 = svsub_n_u16_z (p0, z1, x0), ++ z0 = svsub_z (p0, z1, x0)) ++ ++/* ++** sub_1_u16_z_tied1: ++** mov (z[0-9]+)\.b, #-1 ++** movprfx z0\.h, p0/z, z0\.h ++** add z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_u16_z_tied1, svuint16_t, ++ z0 = svsub_n_u16_z (p0, z0, 1), ++ z0 = svsub_z (p0, z0, 1)) ++ ++/* ++** sub_1_u16_z_untied: ++** mov (z[0-9]+)\.b, #-1 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** add z0\.h, p0/m, z0\.h, \1\.h ++** | ++** movprfx z0\.h, p0/z, \1\.h ++** add z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_u16_z_untied, svuint16_t, ++ z0 = svsub_n_u16_z (p0, z1, 1), ++ z0 = svsub_z (p0, z1, 1)) ++ ++/* ++** sub_u16_x_tied1: ++** sub z0\.h, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sub_u16_x_tied1, svuint16_t, ++ z0 = svsub_u16_x (p0, z0, z1), ++ z0 = svsub_x (p0, z0, z1)) ++ ++/* ++** sub_u16_x_tied2: ++** sub z0\.h, z1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sub_u16_x_tied2, svuint16_t, ++ z0 = svsub_u16_x (p0, z1, z0), ++ z0 = svsub_x (p0, z1, z0)) ++ ++/* ++** sub_u16_x_untied: ++** sub z0\.h, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (sub_u16_x_untied, svuint16_t, ++ z0 = svsub_u16_x (p0, z1, z2), ++ z0 = svsub_x (p0, z1, z2)) ++ ++/* ++** sub_w0_u16_x_tied1: ++** mov (z[0-9]+\.h), w0 ++** sub z0\.h, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (sub_w0_u16_x_tied1, svuint16_t, uint16_t, ++ z0 = svsub_n_u16_x (p0, z0, x0), ++ z0 = svsub_x (p0, z0, x0)) ++ ++/* ++** sub_w0_u16_x_untied: ++** mov (z[0-9]+\.h), w0 ++** sub z0\.h, z1\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (sub_w0_u16_x_untied, svuint16_t, uint16_t, ++ z0 = svsub_n_u16_x (p0, z1, x0), ++ z0 = svsub_x (p0, z1, x0)) ++ ++/* ++** sub_1_u16_x_tied1: ++** sub z0\.h, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_u16_x_tied1, svuint16_t, ++ z0 = svsub_n_u16_x (p0, z0, 1), ++ z0 = svsub_x (p0, z0, 1)) ++ ++/* ++** sub_1_u16_x_untied: ++** movprfx z0, z1 ++** sub z0\.h, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_u16_x_untied, svuint16_t, ++ z0 = svsub_n_u16_x (p0, z1, 1), ++ z0 = svsub_x (p0, z1, 1)) ++ ++/* ++** sub_127_u16_x: ++** sub z0\.h, z0\.h, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_127_u16_x, svuint16_t, ++ z0 = svsub_n_u16_x (p0, z0, 127), ++ z0 = svsub_x (p0, z0, 127)) ++ ++/* ++** sub_128_u16_x: ++** sub z0\.h, z0\.h, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_128_u16_x, svuint16_t, ++ z0 = svsub_n_u16_x (p0, z0, 128), ++ z0 = svsub_x (p0, z0, 128)) ++ ++/* ++** sub_255_u16_x: ++** sub z0\.h, z0\.h, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_255_u16_x, svuint16_t, ++ z0 = svsub_n_u16_x (p0, z0, 255), ++ z0 = svsub_x (p0, z0, 255)) ++ ++/* ++** sub_256_u16_x: ++** add z0\.h, z0\.h, #65280 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_256_u16_x, svuint16_t, ++ z0 = svsub_n_u16_x (p0, z0, 256), ++ z0 = svsub_x (p0, z0, 256)) ++ ++/* ++** sub_257_u16_x: ++** mov (z[0-9]+\.h), #-257 ++** add z0\.h, (z0\.h, \1|\1, z0\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_257_u16_x, svuint16_t, ++ z0 = svsub_n_u16_x (p0, z0, 257), ++ z0 = svsub_x (p0, z0, 257)) ++ ++/* ++** sub_512_u16_x: ++** add z0\.h, z0\.h, #65024 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_512_u16_x, svuint16_t, ++ z0 = svsub_n_u16_x (p0, z0, 512), ++ z0 = svsub_x (p0, z0, 512)) ++ ++/* ++** sub_65280_u16_x: ++** add z0\.h, z0\.h, #256 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_65280_u16_x, svuint16_t, ++ z0 = svsub_n_u16_x (p0, z0, 0xff00), ++ z0 = svsub_x (p0, z0, 0xff00)) ++ ++/* ++** sub_m1_u16_x: ++** add z0\.h, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m1_u16_x, svuint16_t, ++ z0 = svsub_n_u16_x (p0, z0, -1), ++ z0 = svsub_x (p0, z0, -1)) ++ ++/* ++** sub_m127_u16_x: ++** add z0\.h, z0\.h, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m127_u16_x, svuint16_t, ++ z0 = svsub_n_u16_x (p0, z0, -127), ++ z0 = svsub_x (p0, z0, -127)) ++ ++/* ++** sub_m128_u16_x: ++** add z0\.h, z0\.h, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m128_u16_x, svuint16_t, ++ z0 = svsub_n_u16_x (p0, z0, -128), ++ z0 = svsub_x (p0, z0, -128)) ++ ++/* ++** sub_m255_u16_x: ++** add z0\.h, z0\.h, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m255_u16_x, svuint16_t, ++ z0 = svsub_n_u16_x (p0, z0, -255), ++ z0 = svsub_x (p0, z0, -255)) ++ ++/* ++** sub_m256_u16_x: ++** add z0\.h, z0\.h, #256 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m256_u16_x, svuint16_t, ++ z0 = svsub_n_u16_x (p0, z0, -256), ++ z0 = svsub_x (p0, z0, -256)) ++ ++/* ++** sub_m257_u16_x: ++** mov (z[0-9]+)\.b, #1 ++** add z0\.h, (z0\.h, \1\.h|\1\.h, z0\.h) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m257_u16_x, svuint16_t, ++ z0 = svsub_n_u16_x (p0, z0, -257), ++ z0 = svsub_x (p0, z0, -257)) ++ ++/* ++** sub_m512_u16_x: ++** add z0\.h, z0\.h, #512 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m512_u16_x, svuint16_t, ++ z0 = svsub_n_u16_x (p0, z0, -512), ++ z0 = svsub_x (p0, z0, -512)) ++ ++/* ++** sub_m32768_u16_x: ++** add z0\.h, z0\.h, #32768 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m32768_u16_x, svuint16_t, ++ z0 = svsub_n_u16_x (p0, z0, -0x8000), ++ z0 = svsub_x (p0, z0, -0x8000)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_u32.c +new file mode 100644 +index 000000000..c4b405d4d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_u32.c +@@ -0,0 +1,426 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** sub_u32_m_tied1: ++** sub z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sub_u32_m_tied1, svuint32_t, ++ z0 = svsub_u32_m (p0, z0, z1), ++ z0 = svsub_m (p0, z0, z1)) ++ ++/* ++** sub_u32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** sub z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sub_u32_m_tied2, svuint32_t, ++ z0 = svsub_u32_m (p0, z1, z0), ++ z0 = svsub_m (p0, z1, z0)) ++ ++/* ++** sub_u32_m_untied: ++** movprfx z0, z1 ++** sub z0\.s, p0/m, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sub_u32_m_untied, svuint32_t, ++ z0 = svsub_u32_m (p0, z1, z2), ++ z0 = svsub_m (p0, z1, z2)) ++ ++/* ++** sub_w0_u32_m_tied1: ++** mov (z[0-9]+\.s), w0 ++** sub z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (sub_w0_u32_m_tied1, svuint32_t, uint32_t, ++ z0 = svsub_n_u32_m (p0, z0, x0), ++ z0 = svsub_m (p0, z0, x0)) ++ ++/* ++** sub_w0_u32_m_untied: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0, z1 ++** sub z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (sub_w0_u32_m_untied, svuint32_t, uint32_t, ++ z0 = svsub_n_u32_m (p0, z1, x0), ++ z0 = svsub_m (p0, z1, x0)) ++ ++/* ++** sub_1_u32_m_tied1: ++** mov (z[0-9]+)\.b, #-1 ++** add z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_u32_m_tied1, svuint32_t, ++ z0 = svsub_n_u32_m (p0, z0, 1), ++ z0 = svsub_m (p0, z0, 1)) ++ ++/* ++** sub_1_u32_m_untied: { xfail *-*-* } ++** mov (z[0-9]+)\.b, #-1 ++** movprfx z0, z1 ++** add z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_u32_m_untied, svuint32_t, ++ z0 = svsub_n_u32_m (p0, z1, 1), ++ z0 = svsub_m (p0, z1, 1)) ++ ++/* ++** sub_m2_u32_m: ++** mov (z[0-9]+\.s), #2 ++** add z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m2_u32_m, svuint32_t, ++ z0 = svsub_n_u32_m (p0, z0, -2), ++ z0 = svsub_m (p0, z0, -2)) ++ ++/* ++** sub_u32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** sub z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sub_u32_z_tied1, svuint32_t, ++ z0 = svsub_u32_z (p0, z0, z1), ++ z0 = svsub_z (p0, z0, z1)) ++ ++/* ++** sub_u32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** subr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sub_u32_z_tied2, svuint32_t, ++ z0 = svsub_u32_z (p0, z1, z0), ++ z0 = svsub_z (p0, z1, z0)) ++ ++/* ++** sub_u32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** sub z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** subr z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_u32_z_untied, svuint32_t, ++ z0 = svsub_u32_z (p0, z1, z2), ++ z0 = svsub_z (p0, z1, z2)) ++ ++/* ++** sub_w0_u32_z_tied1: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0\.s, p0/z, z0\.s ++** sub z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (sub_w0_u32_z_tied1, svuint32_t, uint32_t, ++ z0 = svsub_n_u32_z (p0, z0, x0), ++ z0 = svsub_z (p0, z0, x0)) ++ ++/* ++** sub_w0_u32_z_untied: ++** mov (z[0-9]+\.s), w0 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** sub z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** subr z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (sub_w0_u32_z_untied, svuint32_t, uint32_t, ++ z0 = svsub_n_u32_z (p0, z1, x0), ++ z0 = svsub_z (p0, z1, x0)) ++ ++/* ++** sub_1_u32_z_tied1: ++** mov (z[0-9]+)\.b, #-1 ++** movprfx z0\.s, p0/z, z0\.s ++** add z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_u32_z_tied1, svuint32_t, ++ z0 = svsub_n_u32_z (p0, z0, 1), ++ z0 = svsub_z (p0, z0, 1)) ++ ++/* ++** sub_1_u32_z_untied: ++** mov (z[0-9]+)\.b, #-1 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** add z0\.s, p0/m, z0\.s, \1\.s ++** | ++** movprfx z0\.s, p0/z, \1\.s ++** add z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_u32_z_untied, svuint32_t, ++ z0 = svsub_n_u32_z (p0, z1, 1), ++ z0 = svsub_z (p0, z1, 1)) ++ ++/* ++** sub_u32_x_tied1: ++** sub z0\.s, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sub_u32_x_tied1, svuint32_t, ++ z0 = svsub_u32_x (p0, z0, z1), ++ z0 = svsub_x (p0, z0, z1)) ++ ++/* ++** sub_u32_x_tied2: ++** sub z0\.s, z1\.s, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sub_u32_x_tied2, svuint32_t, ++ z0 = svsub_u32_x (p0, z1, z0), ++ z0 = svsub_x (p0, z1, z0)) ++ ++/* ++** sub_u32_x_untied: ++** sub z0\.s, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (sub_u32_x_untied, svuint32_t, ++ z0 = svsub_u32_x (p0, z1, z2), ++ z0 = svsub_x (p0, z1, z2)) ++ ++/* ++** sub_w0_u32_x_tied1: ++** mov (z[0-9]+\.s), w0 ++** sub z0\.s, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (sub_w0_u32_x_tied1, svuint32_t, uint32_t, ++ z0 = svsub_n_u32_x (p0, z0, x0), ++ z0 = svsub_x (p0, z0, x0)) ++ ++/* ++** sub_w0_u32_x_untied: ++** mov (z[0-9]+\.s), w0 ++** sub z0\.s, z1\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (sub_w0_u32_x_untied, svuint32_t, uint32_t, ++ z0 = svsub_n_u32_x (p0, z1, x0), ++ z0 = svsub_x (p0, z1, x0)) ++ ++/* ++** sub_1_u32_x_tied1: ++** sub z0\.s, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_u32_x_tied1, svuint32_t, ++ z0 = svsub_n_u32_x (p0, z0, 1), ++ z0 = svsub_x (p0, z0, 1)) ++ ++/* ++** sub_1_u32_x_untied: ++** movprfx z0, z1 ++** sub z0\.s, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_u32_x_untied, svuint32_t, ++ z0 = svsub_n_u32_x (p0, z1, 1), ++ z0 = svsub_x (p0, z1, 1)) ++ ++/* ++** sub_127_u32_x: ++** sub z0\.s, z0\.s, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_127_u32_x, svuint32_t, ++ z0 = svsub_n_u32_x (p0, z0, 127), ++ z0 = svsub_x (p0, z0, 127)) ++ ++/* ++** sub_128_u32_x: ++** sub z0\.s, z0\.s, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_128_u32_x, svuint32_t, ++ z0 = svsub_n_u32_x (p0, z0, 128), ++ z0 = svsub_x (p0, z0, 128)) ++ ++/* ++** sub_255_u32_x: ++** sub z0\.s, z0\.s, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_255_u32_x, svuint32_t, ++ z0 = svsub_n_u32_x (p0, z0, 255), ++ z0 = svsub_x (p0, z0, 255)) ++ ++/* ++** sub_256_u32_x: ++** sub z0\.s, z0\.s, #256 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_256_u32_x, svuint32_t, ++ z0 = svsub_n_u32_x (p0, z0, 256), ++ z0 = svsub_x (p0, z0, 256)) ++ ++/* ++** sub_511_u32_x: ++** mov (z[0-9]+\.s), #-511 ++** add z0\.s, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_511_u32_x, svuint32_t, ++ z0 = svsub_n_u32_x (p0, z0, 511), ++ z0 = svsub_x (p0, z0, 511)) ++ ++/* ++** sub_512_u32_x: ++** sub z0\.s, z0\.s, #512 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_512_u32_x, svuint32_t, ++ z0 = svsub_n_u32_x (p0, z0, 512), ++ z0 = svsub_x (p0, z0, 512)) ++ ++/* ++** sub_65280_u32_x: ++** sub z0\.s, z0\.s, #65280 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_65280_u32_x, svuint32_t, ++ z0 = svsub_n_u32_x (p0, z0, 0xff00), ++ z0 = svsub_x (p0, z0, 0xff00)) ++ ++/* ++** sub_65535_u32_x: ++** mov (z[0-9]+\.s), #-65535 ++** add z0\.s, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_65535_u32_x, svuint32_t, ++ z0 = svsub_n_u32_x (p0, z0, 65535), ++ z0 = svsub_x (p0, z0, 65535)) ++ ++/* ++** sub_65536_u32_x: ++** mov (z[0-9]+\.s), #-65536 ++** add z0\.s, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_65536_u32_x, svuint32_t, ++ z0 = svsub_n_u32_x (p0, z0, 65536), ++ z0 = svsub_x (p0, z0, 65536)) ++ ++/* ++** sub_m1_u32_x: ++** add z0\.s, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m1_u32_x, svuint32_t, ++ z0 = svsub_n_u32_x (p0, z0, -1), ++ z0 = svsub_x (p0, z0, -1)) ++ ++/* ++** sub_m127_u32_x: ++** add z0\.s, z0\.s, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m127_u32_x, svuint32_t, ++ z0 = svsub_n_u32_x (p0, z0, -127), ++ z0 = svsub_x (p0, z0, -127)) ++ ++/* ++** sub_m128_u32_x: ++** add z0\.s, z0\.s, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m128_u32_x, svuint32_t, ++ z0 = svsub_n_u32_x (p0, z0, -128), ++ z0 = svsub_x (p0, z0, -128)) ++ ++/* ++** sub_m255_u32_x: ++** add z0\.s, z0\.s, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m255_u32_x, svuint32_t, ++ z0 = svsub_n_u32_x (p0, z0, -255), ++ z0 = svsub_x (p0, z0, -255)) ++ ++/* ++** sub_m256_u32_x: ++** add z0\.s, z0\.s, #256 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m256_u32_x, svuint32_t, ++ z0 = svsub_n_u32_x (p0, z0, -256), ++ z0 = svsub_x (p0, z0, -256)) ++ ++/* ++** sub_m511_u32_x: ++** mov (z[0-9]+\.s), #511 ++** add z0\.s, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m511_u32_x, svuint32_t, ++ z0 = svsub_n_u32_x (p0, z0, -511), ++ z0 = svsub_x (p0, z0, -511)) ++ ++/* ++** sub_m512_u32_x: ++** add z0\.s, z0\.s, #512 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m512_u32_x, svuint32_t, ++ z0 = svsub_n_u32_x (p0, z0, -512), ++ z0 = svsub_x (p0, z0, -512)) ++ ++/* ++** sub_m32768_u32_x: ++** add z0\.s, z0\.s, #32768 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m32768_u32_x, svuint32_t, ++ z0 = svsub_n_u32_x (p0, z0, -0x8000), ++ z0 = svsub_x (p0, z0, -0x8000)) ++ ++/* ++** sub_m65280_u32_x: ++** add z0\.s, z0\.s, #65280 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m65280_u32_x, svuint32_t, ++ z0 = svsub_n_u32_x (p0, z0, -0xff00), ++ z0 = svsub_x (p0, z0, -0xff00)) ++ ++/* ++** sub_m65535_u32_x: ++** mov (z[0-9]+\.s), #65535 ++** add z0\.s, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m65535_u32_x, svuint32_t, ++ z0 = svsub_n_u32_x (p0, z0, -65535), ++ z0 = svsub_x (p0, z0, -65535)) ++ ++/* ++** sub_m65536_u32_x: ++** mov (z[0-9]+\.s), #65536 ++** add z0\.s, (z0\.s, \1|\1, z0\.s) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m65536_u32_x, svuint32_t, ++ z0 = svsub_n_u32_x (p0, z0, -65536), ++ z0 = svsub_x (p0, z0, -65536)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_u64.c +new file mode 100644 +index 000000000..fb7f7173a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_u64.c +@@ -0,0 +1,426 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** sub_u64_m_tied1: ++** sub z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (sub_u64_m_tied1, svuint64_t, ++ z0 = svsub_u64_m (p0, z0, z1), ++ z0 = svsub_m (p0, z0, z1)) ++ ++/* ++** sub_u64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** sub z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_u64_m_tied2, svuint64_t, ++ z0 = svsub_u64_m (p0, z1, z0), ++ z0 = svsub_m (p0, z1, z0)) ++ ++/* ++** sub_u64_m_untied: ++** movprfx z0, z1 ++** sub z0\.d, p0/m, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (sub_u64_m_untied, svuint64_t, ++ z0 = svsub_u64_m (p0, z1, z2), ++ z0 = svsub_m (p0, z1, z2)) ++ ++/* ++** sub_x0_u64_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** sub z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (sub_x0_u64_m_tied1, svuint64_t, uint64_t, ++ z0 = svsub_n_u64_m (p0, z0, x0), ++ z0 = svsub_m (p0, z0, x0)) ++ ++/* ++** sub_x0_u64_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** sub z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (sub_x0_u64_m_untied, svuint64_t, uint64_t, ++ z0 = svsub_n_u64_m (p0, z1, x0), ++ z0 = svsub_m (p0, z1, x0)) ++ ++/* ++** sub_1_u64_m_tied1: ++** mov (z[0-9]+)\.b, #-1 ++** add z0\.d, p0/m, z0\.d, \1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_u64_m_tied1, svuint64_t, ++ z0 = svsub_n_u64_m (p0, z0, 1), ++ z0 = svsub_m (p0, z0, 1)) ++ ++/* ++** sub_1_u64_m_untied: { xfail *-*-* } ++** mov (z[0-9]+)\.b, #-1 ++** movprfx z0, z1 ++** add z0\.d, p0/m, z0\.d, \1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_u64_m_untied, svuint64_t, ++ z0 = svsub_n_u64_m (p0, z1, 1), ++ z0 = svsub_m (p0, z1, 1)) ++ ++/* ++** sub_m2_u64_m: ++** mov (z[0-9]+\.d), #2 ++** add z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m2_u64_m, svuint64_t, ++ z0 = svsub_n_u64_m (p0, z0, -2), ++ z0 = svsub_m (p0, z0, -2)) ++ ++/* ++** sub_u64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** sub z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (sub_u64_z_tied1, svuint64_t, ++ z0 = svsub_u64_z (p0, z0, z1), ++ z0 = svsub_z (p0, z0, z1)) ++ ++/* ++** sub_u64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** subr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (sub_u64_z_tied2, svuint64_t, ++ z0 = svsub_u64_z (p0, z1, z0), ++ z0 = svsub_z (p0, z1, z0)) ++ ++/* ++** sub_u64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** sub z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** subr z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_u64_z_untied, svuint64_t, ++ z0 = svsub_u64_z (p0, z1, z2), ++ z0 = svsub_z (p0, z1, z2)) ++ ++/* ++** sub_x0_u64_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.d, p0/z, z0\.d ++** sub z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (sub_x0_u64_z_tied1, svuint64_t, uint64_t, ++ z0 = svsub_n_u64_z (p0, z0, x0), ++ z0 = svsub_z (p0, z0, x0)) ++ ++/* ++** sub_x0_u64_z_untied: ++** mov (z[0-9]+\.d), x0 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** sub z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** subr z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (sub_x0_u64_z_untied, svuint64_t, uint64_t, ++ z0 = svsub_n_u64_z (p0, z1, x0), ++ z0 = svsub_z (p0, z1, x0)) ++ ++/* ++** sub_1_u64_z_tied1: ++** mov (z[0-9]+)\.b, #-1 ++** movprfx z0\.d, p0/z, z0\.d ++** add z0\.d, p0/m, z0\.d, \1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_u64_z_tied1, svuint64_t, ++ z0 = svsub_n_u64_z (p0, z0, 1), ++ z0 = svsub_z (p0, z0, 1)) ++ ++/* ++** sub_1_u64_z_untied: ++** mov (z[0-9]+)\.b, #-1 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** add z0\.d, p0/m, z0\.d, \1\.d ++** | ++** movprfx z0\.d, p0/z, \1\.d ++** add z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_u64_z_untied, svuint64_t, ++ z0 = svsub_n_u64_z (p0, z1, 1), ++ z0 = svsub_z (p0, z1, 1)) ++ ++/* ++** sub_u64_x_tied1: ++** sub z0\.d, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (sub_u64_x_tied1, svuint64_t, ++ z0 = svsub_u64_x (p0, z0, z1), ++ z0 = svsub_x (p0, z0, z1)) ++ ++/* ++** sub_u64_x_tied2: ++** sub z0\.d, z1\.d, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (sub_u64_x_tied2, svuint64_t, ++ z0 = svsub_u64_x (p0, z1, z0), ++ z0 = svsub_x (p0, z1, z0)) ++ ++/* ++** sub_u64_x_untied: ++** sub z0\.d, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (sub_u64_x_untied, svuint64_t, ++ z0 = svsub_u64_x (p0, z1, z2), ++ z0 = svsub_x (p0, z1, z2)) ++ ++/* ++** sub_x0_u64_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** sub z0\.d, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (sub_x0_u64_x_tied1, svuint64_t, uint64_t, ++ z0 = svsub_n_u64_x (p0, z0, x0), ++ z0 = svsub_x (p0, z0, x0)) ++ ++/* ++** sub_x0_u64_x_untied: ++** mov (z[0-9]+\.d), x0 ++** sub z0\.d, z1\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (sub_x0_u64_x_untied, svuint64_t, uint64_t, ++ z0 = svsub_n_u64_x (p0, z1, x0), ++ z0 = svsub_x (p0, z1, x0)) ++ ++/* ++** sub_1_u64_x_tied1: ++** sub z0\.d, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_u64_x_tied1, svuint64_t, ++ z0 = svsub_n_u64_x (p0, z0, 1), ++ z0 = svsub_x (p0, z0, 1)) ++ ++/* ++** sub_1_u64_x_untied: ++** movprfx z0, z1 ++** sub z0\.d, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_u64_x_untied, svuint64_t, ++ z0 = svsub_n_u64_x (p0, z1, 1), ++ z0 = svsub_x (p0, z1, 1)) ++ ++/* ++** sub_127_u64_x: ++** sub z0\.d, z0\.d, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_127_u64_x, svuint64_t, ++ z0 = svsub_n_u64_x (p0, z0, 127), ++ z0 = svsub_x (p0, z0, 127)) ++ ++/* ++** sub_128_u64_x: ++** sub z0\.d, z0\.d, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_128_u64_x, svuint64_t, ++ z0 = svsub_n_u64_x (p0, z0, 128), ++ z0 = svsub_x (p0, z0, 128)) ++ ++/* ++** sub_255_u64_x: ++** sub z0\.d, z0\.d, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_255_u64_x, svuint64_t, ++ z0 = svsub_n_u64_x (p0, z0, 255), ++ z0 = svsub_x (p0, z0, 255)) ++ ++/* ++** sub_256_u64_x: ++** sub z0\.d, z0\.d, #256 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_256_u64_x, svuint64_t, ++ z0 = svsub_n_u64_x (p0, z0, 256), ++ z0 = svsub_x (p0, z0, 256)) ++ ++/* ++** sub_511_u64_x: ++** mov (z[0-9]+\.d), #-511 ++** add z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_511_u64_x, svuint64_t, ++ z0 = svsub_n_u64_x (p0, z0, 511), ++ z0 = svsub_x (p0, z0, 511)) ++ ++/* ++** sub_512_u64_x: ++** sub z0\.d, z0\.d, #512 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_512_u64_x, svuint64_t, ++ z0 = svsub_n_u64_x (p0, z0, 512), ++ z0 = svsub_x (p0, z0, 512)) ++ ++/* ++** sub_65280_u64_x: ++** sub z0\.d, z0\.d, #65280 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_65280_u64_x, svuint64_t, ++ z0 = svsub_n_u64_x (p0, z0, 0xff00), ++ z0 = svsub_x (p0, z0, 0xff00)) ++ ++/* ++** sub_65535_u64_x: ++** mov (z[0-9]+\.d), #-65535 ++** add z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_65535_u64_x, svuint64_t, ++ z0 = svsub_n_u64_x (p0, z0, 65535), ++ z0 = svsub_x (p0, z0, 65535)) ++ ++/* ++** sub_65536_u64_x: ++** mov (z[0-9]+\.d), #-65536 ++** add z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_65536_u64_x, svuint64_t, ++ z0 = svsub_n_u64_x (p0, z0, 65536), ++ z0 = svsub_x (p0, z0, 65536)) ++ ++/* ++** sub_m1_u64_x: ++** add z0\.d, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m1_u64_x, svuint64_t, ++ z0 = svsub_n_u64_x (p0, z0, -1), ++ z0 = svsub_x (p0, z0, -1)) ++ ++/* ++** sub_m127_u64_x: ++** add z0\.d, z0\.d, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m127_u64_x, svuint64_t, ++ z0 = svsub_n_u64_x (p0, z0, -127), ++ z0 = svsub_x (p0, z0, -127)) ++ ++/* ++** sub_m128_u64_x: ++** add z0\.d, z0\.d, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m128_u64_x, svuint64_t, ++ z0 = svsub_n_u64_x (p0, z0, -128), ++ z0 = svsub_x (p0, z0, -128)) ++ ++/* ++** sub_m255_u64_x: ++** add z0\.d, z0\.d, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m255_u64_x, svuint64_t, ++ z0 = svsub_n_u64_x (p0, z0, -255), ++ z0 = svsub_x (p0, z0, -255)) ++ ++/* ++** sub_m256_u64_x: ++** add z0\.d, z0\.d, #256 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m256_u64_x, svuint64_t, ++ z0 = svsub_n_u64_x (p0, z0, -256), ++ z0 = svsub_x (p0, z0, -256)) ++ ++/* ++** sub_m511_u64_x: ++** mov (z[0-9]+\.d), #511 ++** add z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m511_u64_x, svuint64_t, ++ z0 = svsub_n_u64_x (p0, z0, -511), ++ z0 = svsub_x (p0, z0, -511)) ++ ++/* ++** sub_m512_u64_x: ++** add z0\.d, z0\.d, #512 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m512_u64_x, svuint64_t, ++ z0 = svsub_n_u64_x (p0, z0, -512), ++ z0 = svsub_x (p0, z0, -512)) ++ ++/* ++** sub_m32768_u64_x: ++** add z0\.d, z0\.d, #32768 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m32768_u64_x, svuint64_t, ++ z0 = svsub_n_u64_x (p0, z0, -0x8000), ++ z0 = svsub_x (p0, z0, -0x8000)) ++ ++/* ++** sub_m65280_u64_x: ++** add z0\.d, z0\.d, #65280 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m65280_u64_x, svuint64_t, ++ z0 = svsub_n_u64_x (p0, z0, -0xff00), ++ z0 = svsub_x (p0, z0, -0xff00)) ++ ++/* ++** sub_m65535_u64_x: ++** mov (z[0-9]+\.d), #65535 ++** add z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m65535_u64_x, svuint64_t, ++ z0 = svsub_n_u64_x (p0, z0, -65535), ++ z0 = svsub_x (p0, z0, -65535)) ++ ++/* ++** sub_m65536_u64_x: ++** mov (z[0-9]+\.d), #65536 ++** add z0\.d, (z0\.d, \1|\1, z0\.d) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m65536_u64_x, svuint64_t, ++ z0 = svsub_n_u64_x (p0, z0, -65536), ++ z0 = svsub_x (p0, z0, -65536)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_u8.c +new file mode 100644 +index 000000000..455204191 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_u8.c +@@ -0,0 +1,294 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** sub_u8_m_tied1: ++** sub z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (sub_u8_m_tied1, svuint8_t, ++ z0 = svsub_u8_m (p0, z0, z1), ++ z0 = svsub_m (p0, z0, z1)) ++ ++/* ++** sub_u8_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** sub z0\.b, p0/m, z0\.b, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (sub_u8_m_tied2, svuint8_t, ++ z0 = svsub_u8_m (p0, z1, z0), ++ z0 = svsub_m (p0, z1, z0)) ++ ++/* ++** sub_u8_m_untied: ++** movprfx z0, z1 ++** sub z0\.b, p0/m, z0\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (sub_u8_m_untied, svuint8_t, ++ z0 = svsub_u8_m (p0, z1, z2), ++ z0 = svsub_m (p0, z1, z2)) ++ ++/* ++** sub_w0_u8_m_tied1: ++** mov (z[0-9]+\.b), w0 ++** sub z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (sub_w0_u8_m_tied1, svuint8_t, uint8_t, ++ z0 = svsub_n_u8_m (p0, z0, x0), ++ z0 = svsub_m (p0, z0, x0)) ++ ++/* ++** sub_w0_u8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), w0 ++** movprfx z0, z1 ++** sub z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (sub_w0_u8_m_untied, svuint8_t, uint8_t, ++ z0 = svsub_n_u8_m (p0, z1, x0), ++ z0 = svsub_m (p0, z1, x0)) ++ ++/* ++** sub_1_u8_m_tied1: ++** mov (z[0-9]+\.b), #-1 ++** add z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_u8_m_tied1, svuint8_t, ++ z0 = svsub_n_u8_m (p0, z0, 1), ++ z0 = svsub_m (p0, z0, 1)) ++ ++/* ++** sub_1_u8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), #-1 ++** movprfx z0, z1 ++** add z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_u8_m_untied, svuint8_t, ++ z0 = svsub_n_u8_m (p0, z1, 1), ++ z0 = svsub_m (p0, z1, 1)) ++ ++/* ++** sub_m1_u8_m: ++** mov (z[0-9]+\.b), #1 ++** add z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m1_u8_m, svuint8_t, ++ z0 = svsub_n_u8_m (p0, z0, -1), ++ z0 = svsub_m (p0, z0, -1)) ++ ++/* ++** sub_u8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** sub z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (sub_u8_z_tied1, svuint8_t, ++ z0 = svsub_u8_z (p0, z0, z1), ++ z0 = svsub_z (p0, z0, z1)) ++ ++/* ++** sub_u8_z_tied2: ++** movprfx z0\.b, p0/z, z0\.b ++** subr z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (sub_u8_z_tied2, svuint8_t, ++ z0 = svsub_u8_z (p0, z1, z0), ++ z0 = svsub_z (p0, z1, z0)) ++ ++/* ++** sub_u8_z_untied: ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** sub z0\.b, p0/m, z0\.b, z2\.b ++** | ++** movprfx z0\.b, p0/z, z2\.b ++** subr z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_u8_z_untied, svuint8_t, ++ z0 = svsub_u8_z (p0, z1, z2), ++ z0 = svsub_z (p0, z1, z2)) ++ ++/* ++** sub_w0_u8_z_tied1: ++** mov (z[0-9]+\.b), w0 ++** movprfx z0\.b, p0/z, z0\.b ++** sub z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (sub_w0_u8_z_tied1, svuint8_t, uint8_t, ++ z0 = svsub_n_u8_z (p0, z0, x0), ++ z0 = svsub_z (p0, z0, x0)) ++ ++/* ++** sub_w0_u8_z_untied: ++** mov (z[0-9]+\.b), w0 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** sub z0\.b, p0/m, z0\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** subr z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (sub_w0_u8_z_untied, svuint8_t, uint8_t, ++ z0 = svsub_n_u8_z (p0, z1, x0), ++ z0 = svsub_z (p0, z1, x0)) ++ ++/* ++** sub_1_u8_z_tied1: ++** mov (z[0-9]+\.b), #-1 ++** movprfx z0\.b, p0/z, z0\.b ++** add z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_u8_z_tied1, svuint8_t, ++ z0 = svsub_n_u8_z (p0, z0, 1), ++ z0 = svsub_z (p0, z0, 1)) ++ ++/* ++** sub_1_u8_z_untied: ++** mov (z[0-9]+\.b), #-1 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** add z0\.b, p0/m, z0\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** add z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_u8_z_untied, svuint8_t, ++ z0 = svsub_n_u8_z (p0, z1, 1), ++ z0 = svsub_z (p0, z1, 1)) ++ ++/* ++** sub_u8_x_tied1: ++** sub z0\.b, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (sub_u8_x_tied1, svuint8_t, ++ z0 = svsub_u8_x (p0, z0, z1), ++ z0 = svsub_x (p0, z0, z1)) ++ ++/* ++** sub_u8_x_tied2: ++** sub z0\.b, z1\.b, z0\.b ++** ret ++*/ ++TEST_UNIFORM_Z (sub_u8_x_tied2, svuint8_t, ++ z0 = svsub_u8_x (p0, z1, z0), ++ z0 = svsub_x (p0, z1, z0)) ++ ++/* ++** sub_u8_x_untied: ++** sub z0\.b, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (sub_u8_x_untied, svuint8_t, ++ z0 = svsub_u8_x (p0, z1, z2), ++ z0 = svsub_x (p0, z1, z2)) ++ ++/* ++** sub_w0_u8_x_tied1: ++** mov (z[0-9]+\.b), w0 ++** sub z0\.b, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (sub_w0_u8_x_tied1, svuint8_t, uint8_t, ++ z0 = svsub_n_u8_x (p0, z0, x0), ++ z0 = svsub_x (p0, z0, x0)) ++ ++/* ++** sub_w0_u8_x_untied: ++** mov (z[0-9]+\.b), w0 ++** sub z0\.b, z1\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (sub_w0_u8_x_untied, svuint8_t, uint8_t, ++ z0 = svsub_n_u8_x (p0, z1, x0), ++ z0 = svsub_x (p0, z1, x0)) ++ ++/* ++** sub_1_u8_x_tied1: ++** add z0\.b, z0\.b, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_u8_x_tied1, svuint8_t, ++ z0 = svsub_n_u8_x (p0, z0, 1), ++ z0 = svsub_x (p0, z0, 1)) ++ ++/* ++** sub_1_u8_x_untied: ++** movprfx z0, z1 ++** add z0\.b, z0\.b, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_1_u8_x_untied, svuint8_t, ++ z0 = svsub_n_u8_x (p0, z1, 1), ++ z0 = svsub_x (p0, z1, 1)) ++ ++/* ++** sub_127_u8_x: ++** add z0\.b, z0\.b, #129 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_127_u8_x, svuint8_t, ++ z0 = svsub_n_u8_x (p0, z0, 127), ++ z0 = svsub_x (p0, z0, 127)) ++ ++/* ++** sub_128_u8_x: ++** add z0\.b, z0\.b, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_128_u8_x, svuint8_t, ++ z0 = svsub_n_u8_x (p0, z0, 128), ++ z0 = svsub_x (p0, z0, 128)) ++ ++/* ++** sub_255_u8_x: ++** add z0\.b, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_255_u8_x, svuint8_t, ++ z0 = svsub_n_u8_x (p0, z0, 255), ++ z0 = svsub_x (p0, z0, 255)) ++ ++/* ++** sub_m1_u8_x: ++** add z0\.b, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m1_u8_x, svuint8_t, ++ z0 = svsub_n_u8_x (p0, z0, -1), ++ z0 = svsub_x (p0, z0, -1)) ++ ++/* ++** sub_m127_u8_x: ++** add z0\.b, z0\.b, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m127_u8_x, svuint8_t, ++ z0 = svsub_n_u8_x (p0, z0, -127), ++ z0 = svsub_x (p0, z0, -127)) ++ ++/* ++** sub_m128_u8_x: ++** add z0\.b, z0\.b, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (sub_m128_u8_x, svuint8_t, ++ z0 = svsub_n_u8_x (p0, z0, -128), ++ z0 = svsub_x (p0, z0, -128)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_f16.c +new file mode 100644 +index 000000000..e14357db2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_f16.c +@@ -0,0 +1,444 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** subr_f16_m_tied1: ++** fsubr z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f16_m_tied1, svfloat16_t, ++ z0 = svsubr_f16_m (p0, z0, z1), ++ z0 = svsubr_m (p0, z0, z1)) ++ ++/* ++** subr_f16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fsubr z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f16_m_tied2, svfloat16_t, ++ z0 = svsubr_f16_m (p0, z1, z0), ++ z0 = svsubr_m (p0, z1, z0)) ++ ++/* ++** subr_f16_m_untied: ++** movprfx z0, z1 ++** fsubr z0\.h, p0/m, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f16_m_untied, svfloat16_t, ++ z0 = svsubr_f16_m (p0, z1, z2), ++ z0 = svsubr_m (p0, z1, z2)) ++ ++/* ++** subr_h4_f16_m_tied1: ++** mov (z[0-9]+\.h), h4 ++** fsubr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (subr_h4_f16_m_tied1, svfloat16_t, __fp16, ++ z0 = svsubr_n_f16_m (p0, z0, d4), ++ z0 = svsubr_m (p0, z0, d4)) ++ ++/* ++** subr_h4_f16_m_untied: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0, z1 ++** fsubr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (subr_h4_f16_m_untied, svfloat16_t, __fp16, ++ z0 = svsubr_n_f16_m (p0, z1, d4), ++ z0 = svsubr_m (p0, z1, d4)) ++ ++/* ++** subr_1_f16_m_tied1: ++** fsubr z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_f16_m_tied1, svfloat16_t, ++ z0 = svsubr_n_f16_m (p0, z0, 1), ++ z0 = svsubr_m (p0, z0, 1)) ++ ++/* ++** subr_1_f16_m_untied: ++** movprfx z0, z1 ++** fsubr z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_f16_m_untied, svfloat16_t, ++ z0 = svsubr_n_f16_m (p0, z1, 1), ++ z0 = svsubr_m (p0, z1, 1)) ++ ++/* ++** subr_0p5_f16_m_tied1: ++** fsubr z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_0p5_f16_m_tied1, svfloat16_t, ++ z0 = svsubr_n_f16_m (p0, z0, 0.5), ++ z0 = svsubr_m (p0, z0, 0.5)) ++ ++/* ++** subr_0p5_f16_m_untied: ++** movprfx z0, z1 ++** fsubr z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_0p5_f16_m_untied, svfloat16_t, ++ z0 = svsubr_n_f16_m (p0, z1, 0.5), ++ z0 = svsubr_m (p0, z1, 0.5)) ++ ++/* ++** subr_m1_f16_m_tied1: ++** fmov (z[0-9]+\.h), #-1\.0(?:e\+0)? ++** fsubr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m1_f16_m_tied1, svfloat16_t, ++ z0 = svsubr_n_f16_m (p0, z0, -1), ++ z0 = svsubr_m (p0, z0, -1)) ++ ++/* ++** subr_m1_f16_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.h), #-1\.0(?:e\+0)? ++** movprfx z0, z1 ++** fsubr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m1_f16_m_untied, svfloat16_t, ++ z0 = svsubr_n_f16_m (p0, z1, -1), ++ z0 = svsubr_m (p0, z1, -1)) ++ ++/* ++** subr_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fsubr z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f16_z_tied1, svfloat16_t, ++ z0 = svsubr_f16_z (p0, z0, z1), ++ z0 = svsubr_z (p0, z0, z1)) ++ ++/* ++** subr_f16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** fsub z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f16_z_tied2, svfloat16_t, ++ z0 = svsubr_f16_z (p0, z1, z0), ++ z0 = svsubr_z (p0, z1, z0)) ++ ++/* ++** subr_f16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fsubr z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** fsub z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f16_z_untied, svfloat16_t, ++ z0 = svsubr_f16_z (p0, z1, z2), ++ z0 = svsubr_z (p0, z1, z2)) ++ ++/* ++** subr_h4_f16_z_tied1: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0\.h, p0/z, z0\.h ++** fsubr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (subr_h4_f16_z_tied1, svfloat16_t, __fp16, ++ z0 = svsubr_n_f16_z (p0, z0, d4), ++ z0 = svsubr_z (p0, z0, d4)) ++ ++/* ++** subr_h4_f16_z_untied: ++** mov (z[0-9]+\.h), h4 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fsubr z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** fsub z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (subr_h4_f16_z_untied, svfloat16_t, __fp16, ++ z0 = svsubr_n_f16_z (p0, z1, d4), ++ z0 = svsubr_z (p0, z1, d4)) ++ ++/* ++** subr_1_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fsubr z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_f16_z_tied1, svfloat16_t, ++ z0 = svsubr_n_f16_z (p0, z0, 1), ++ z0 = svsubr_z (p0, z0, 1)) ++ ++/* ++** subr_1_f16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** fsubr z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_f16_z_untied, svfloat16_t, ++ z0 = svsubr_n_f16_z (p0, z1, 1), ++ z0 = svsubr_z (p0, z1, 1)) ++ ++/* ++** subr_0p5_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fsubr z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_0p5_f16_z_tied1, svfloat16_t, ++ z0 = svsubr_n_f16_z (p0, z0, 0.5), ++ z0 = svsubr_z (p0, z0, 0.5)) ++ ++/* ++** subr_0p5_f16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** fsubr z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_0p5_f16_z_untied, svfloat16_t, ++ z0 = svsubr_n_f16_z (p0, z1, 0.5), ++ z0 = svsubr_z (p0, z1, 0.5)) ++ ++/* ++** subr_m1_f16_z_tied1: ++** fmov (z[0-9]+\.h), #-1\.0(?:e\+0)? ++** movprfx z0\.h, p0/z, z0\.h ++** fsubr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m1_f16_z_tied1, svfloat16_t, ++ z0 = svsubr_n_f16_z (p0, z0, -1), ++ z0 = svsubr_z (p0, z0, -1)) ++ ++/* ++** subr_m1_f16_z_untied: ++** fmov (z[0-9]+\.h), #-1\.0(?:e\+0)? ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fsubr z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** fsub z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m1_f16_z_untied, svfloat16_t, ++ z0 = svsubr_n_f16_z (p0, z1, -1), ++ z0 = svsubr_z (p0, z1, -1)) ++ ++/* ++** subr_f16_x_tied1: ++** fsubr z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f16_x_tied1, svfloat16_t, ++ z0 = svsubr_f16_x (p0, z0, z1), ++ z0 = svsubr_x (p0, z0, z1)) ++ ++/* ++** subr_f16_x_tied2: ++** fsub z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f16_x_tied2, svfloat16_t, ++ z0 = svsubr_f16_x (p0, z1, z0), ++ z0 = svsubr_x (p0, z1, z0)) ++ ++/* ++** subr_f16_x_untied: ++** ( ++** movprfx z0, z1 ++** fsubr z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0, z2 ++** fsub z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f16_x_untied, svfloat16_t, ++ z0 = svsubr_f16_x (p0, z1, z2), ++ z0 = svsubr_x (p0, z1, z2)) ++ ++/* ++** subr_h4_f16_x_tied1: ++** mov (z[0-9]+\.h), h4 ++** fsubr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (subr_h4_f16_x_tied1, svfloat16_t, __fp16, ++ z0 = svsubr_n_f16_x (p0, z0, d4), ++ z0 = svsubr_x (p0, z0, d4)) ++ ++/* ++** subr_h4_f16_x_untied: { xfail *-*-* } ++** mov z0\.h, h4 ++** fsub z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZD (subr_h4_f16_x_untied, svfloat16_t, __fp16, ++ z0 = svsubr_n_f16_x (p0, z1, d4), ++ z0 = svsubr_x (p0, z1, d4)) ++ ++/* ++** subr_1_f16_x_tied1: ++** fsubr z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_f16_x_tied1, svfloat16_t, ++ z0 = svsubr_n_f16_x (p0, z0, 1), ++ z0 = svsubr_x (p0, z0, 1)) ++ ++/* ++** subr_1_f16_x_untied: ++** movprfx z0, z1 ++** fsubr z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_f16_x_untied, svfloat16_t, ++ z0 = svsubr_n_f16_x (p0, z1, 1), ++ z0 = svsubr_x (p0, z1, 1)) ++ ++/* ++** subr_0p5_f16_x_tied1: ++** fsubr z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_0p5_f16_x_tied1, svfloat16_t, ++ z0 = svsubr_n_f16_x (p0, z0, 0.5), ++ z0 = svsubr_x (p0, z0, 0.5)) ++ ++/* ++** subr_0p5_f16_x_untied: ++** movprfx z0, z1 ++** fsubr z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_0p5_f16_x_untied, svfloat16_t, ++ z0 = svsubr_n_f16_x (p0, z1, 0.5), ++ z0 = svsubr_x (p0, z1, 0.5)) ++ ++/* ++** subr_m1_f16_x_tied1: ++** fmov (z[0-9]+\.h), #-1\.0(?:e\+0)? ++** fsubr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m1_f16_x_tied1, svfloat16_t, ++ z0 = svsubr_n_f16_x (p0, z0, -1), ++ z0 = svsubr_x (p0, z0, -1)) ++ ++/* ++** subr_m1_f16_x_untied: ++** fmov z0\.h, #-1\.0(?:e\+0)? ++** fsub z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m1_f16_x_untied, svfloat16_t, ++ z0 = svsubr_n_f16_x (p0, z1, -1), ++ z0 = svsubr_x (p0, z1, -1)) ++ ++/* ++** ptrue_subr_f16_x_tied1: ++** fsub z0\.h, z1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_f16_x_tied1, svfloat16_t, ++ z0 = svsubr_f16_x (svptrue_b16 (), z0, z1), ++ z0 = svsubr_x (svptrue_b16 (), z0, z1)) ++ ++/* ++** ptrue_subr_f16_x_tied2: ++** fsub z0\.h, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_f16_x_tied2, svfloat16_t, ++ z0 = svsubr_f16_x (svptrue_b16 (), z1, z0), ++ z0 = svsubr_x (svptrue_b16 (), z1, z0)) ++ ++/* ++** ptrue_subr_f16_x_untied: ++** fsub z0\.h, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_f16_x_untied, svfloat16_t, ++ z0 = svsubr_f16_x (svptrue_b16 (), z1, z2), ++ z0 = svsubr_x (svptrue_b16 (), z1, z2)) ++ ++/* ++** ptrue_subr_1_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_1_f16_x_tied1, svfloat16_t, ++ z0 = svsubr_n_f16_x (svptrue_b16 (), z0, 1), ++ z0 = svsubr_x (svptrue_b16 (), z0, 1)) ++ ++/* ++** ptrue_subr_1_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_1_f16_x_untied, svfloat16_t, ++ z0 = svsubr_n_f16_x (svptrue_b16 (), z1, 1), ++ z0 = svsubr_x (svptrue_b16 (), z1, 1)) ++ ++/* ++** ptrue_subr_0p5_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_0p5_f16_x_tied1, svfloat16_t, ++ z0 = svsubr_n_f16_x (svptrue_b16 (), z0, 0.5), ++ z0 = svsubr_x (svptrue_b16 (), z0, 0.5)) ++ ++/* ++** ptrue_subr_0p5_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_0p5_f16_x_untied, svfloat16_t, ++ z0 = svsubr_n_f16_x (svptrue_b16 (), z1, 0.5), ++ z0 = svsubr_x (svptrue_b16 (), z1, 0.5)) ++ ++/* ++** ptrue_subr_m1_f16_x_tied1: ++** fmov (z[0-9]+\.h), #-1\.0(?:e\+0)? ++** fsub z0\.h, \1, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_m1_f16_x_tied1, svfloat16_t, ++ z0 = svsubr_n_f16_x (svptrue_b16 (), z0, -1), ++ z0 = svsubr_x (svptrue_b16 (), z0, -1)) ++ ++/* ++** ptrue_subr_m1_f16_x_untied: ++** fmov (z[0-9]+\.h), #-1\.0(?:e\+0)? ++** fsub z0\.h, \1, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_m1_f16_x_untied, svfloat16_t, ++ z0 = svsubr_n_f16_x (svptrue_b16 (), z1, -1), ++ z0 = svsubr_x (svptrue_b16 (), z1, -1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_f16_notrap.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_f16_notrap.c +new file mode 100644 +index 000000000..a31ebd2ef +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_f16_notrap.c +@@ -0,0 +1,439 @@ ++/* { dg-additional-options "-fno-trapping-math" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** subr_f16_m_tied1: ++** fsubr z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f16_m_tied1, svfloat16_t, ++ z0 = svsubr_f16_m (p0, z0, z1), ++ z0 = svsubr_m (p0, z0, z1)) ++ ++/* ++** subr_f16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fsubr z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f16_m_tied2, svfloat16_t, ++ z0 = svsubr_f16_m (p0, z1, z0), ++ z0 = svsubr_m (p0, z1, z0)) ++ ++/* ++** subr_f16_m_untied: ++** movprfx z0, z1 ++** fsubr z0\.h, p0/m, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f16_m_untied, svfloat16_t, ++ z0 = svsubr_f16_m (p0, z1, z2), ++ z0 = svsubr_m (p0, z1, z2)) ++ ++/* ++** subr_h4_f16_m_tied1: ++** mov (z[0-9]+\.h), h4 ++** fsubr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (subr_h4_f16_m_tied1, svfloat16_t, __fp16, ++ z0 = svsubr_n_f16_m (p0, z0, d4), ++ z0 = svsubr_m (p0, z0, d4)) ++ ++/* ++** subr_h4_f16_m_untied: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0, z1 ++** fsubr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (subr_h4_f16_m_untied, svfloat16_t, __fp16, ++ z0 = svsubr_n_f16_m (p0, z1, d4), ++ z0 = svsubr_m (p0, z1, d4)) ++ ++/* ++** subr_1_f16_m_tied1: ++** fsubr z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_f16_m_tied1, svfloat16_t, ++ z0 = svsubr_n_f16_m (p0, z0, 1), ++ z0 = svsubr_m (p0, z0, 1)) ++ ++/* ++** subr_1_f16_m_untied: ++** movprfx z0, z1 ++** fsubr z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_f16_m_untied, svfloat16_t, ++ z0 = svsubr_n_f16_m (p0, z1, 1), ++ z0 = svsubr_m (p0, z1, 1)) ++ ++/* ++** subr_0p5_f16_m_tied1: ++** fsubr z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_0p5_f16_m_tied1, svfloat16_t, ++ z0 = svsubr_n_f16_m (p0, z0, 0.5), ++ z0 = svsubr_m (p0, z0, 0.5)) ++ ++/* ++** subr_0p5_f16_m_untied: ++** movprfx z0, z1 ++** fsubr z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_0p5_f16_m_untied, svfloat16_t, ++ z0 = svsubr_n_f16_m (p0, z1, 0.5), ++ z0 = svsubr_m (p0, z1, 0.5)) ++ ++/* ++** subr_m1_f16_m_tied1: ++** fmov (z[0-9]+\.h), #-1\.0(?:e\+0)? ++** fsubr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m1_f16_m_tied1, svfloat16_t, ++ z0 = svsubr_n_f16_m (p0, z0, -1), ++ z0 = svsubr_m (p0, z0, -1)) ++ ++/* ++** subr_m1_f16_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.h), #-1\.0(?:e\+0)? ++** movprfx z0, z1 ++** fsubr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m1_f16_m_untied, svfloat16_t, ++ z0 = svsubr_n_f16_m (p0, z1, -1), ++ z0 = svsubr_m (p0, z1, -1)) ++ ++/* ++** subr_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fsubr z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f16_z_tied1, svfloat16_t, ++ z0 = svsubr_f16_z (p0, z0, z1), ++ z0 = svsubr_z (p0, z0, z1)) ++ ++/* ++** subr_f16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** fsub z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f16_z_tied2, svfloat16_t, ++ z0 = svsubr_f16_z (p0, z1, z0), ++ z0 = svsubr_z (p0, z1, z0)) ++ ++/* ++** subr_f16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fsubr z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** fsub z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f16_z_untied, svfloat16_t, ++ z0 = svsubr_f16_z (p0, z1, z2), ++ z0 = svsubr_z (p0, z1, z2)) ++ ++/* ++** subr_h4_f16_z_tied1: ++** mov (z[0-9]+\.h), h4 ++** movprfx z0\.h, p0/z, z0\.h ++** fsubr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (subr_h4_f16_z_tied1, svfloat16_t, __fp16, ++ z0 = svsubr_n_f16_z (p0, z0, d4), ++ z0 = svsubr_z (p0, z0, d4)) ++ ++/* ++** subr_h4_f16_z_untied: ++** mov (z[0-9]+\.h), h4 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fsubr z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** fsub z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (subr_h4_f16_z_untied, svfloat16_t, __fp16, ++ z0 = svsubr_n_f16_z (p0, z1, d4), ++ z0 = svsubr_z (p0, z1, d4)) ++ ++/* ++** subr_1_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fsubr z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_f16_z_tied1, svfloat16_t, ++ z0 = svsubr_n_f16_z (p0, z0, 1), ++ z0 = svsubr_z (p0, z0, 1)) ++ ++/* ++** subr_1_f16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** fsubr z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_f16_z_untied, svfloat16_t, ++ z0 = svsubr_n_f16_z (p0, z1, 1), ++ z0 = svsubr_z (p0, z1, 1)) ++ ++/* ++** subr_0p5_f16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** fsubr z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_0p5_f16_z_tied1, svfloat16_t, ++ z0 = svsubr_n_f16_z (p0, z0, 0.5), ++ z0 = svsubr_z (p0, z0, 0.5)) ++ ++/* ++** subr_0p5_f16_z_untied: ++** movprfx z0\.h, p0/z, z1\.h ++** fsubr z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_0p5_f16_z_untied, svfloat16_t, ++ z0 = svsubr_n_f16_z (p0, z1, 0.5), ++ z0 = svsubr_z (p0, z1, 0.5)) ++ ++/* ++** subr_m1_f16_z_tied1: ++** fmov (z[0-9]+\.h), #-1\.0(?:e\+0)? ++** movprfx z0\.h, p0/z, z0\.h ++** fsubr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m1_f16_z_tied1, svfloat16_t, ++ z0 = svsubr_n_f16_z (p0, z0, -1), ++ z0 = svsubr_z (p0, z0, -1)) ++ ++/* ++** subr_m1_f16_z_untied: ++** fmov (z[0-9]+\.h), #-1\.0(?:e\+0)? ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** fsubr z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** fsub z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m1_f16_z_untied, svfloat16_t, ++ z0 = svsubr_n_f16_z (p0, z1, -1), ++ z0 = svsubr_z (p0, z1, -1)) ++ ++/* ++** subr_f16_x_tied1: ++** fsub z0\.h, z1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f16_x_tied1, svfloat16_t, ++ z0 = svsubr_f16_x (p0, z0, z1), ++ z0 = svsubr_x (p0, z0, z1)) ++ ++/* ++** subr_f16_x_tied2: ++** fsub z0\.h, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f16_x_tied2, svfloat16_t, ++ z0 = svsubr_f16_x (p0, z1, z0), ++ z0 = svsubr_x (p0, z1, z0)) ++ ++/* ++** subr_f16_x_untied: ++** fsub z0\.h, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f16_x_untied, svfloat16_t, ++ z0 = svsubr_f16_x (p0, z1, z2), ++ z0 = svsubr_x (p0, z1, z2)) ++ ++/* ++** subr_h4_f16_x_tied1: ++** mov (z[0-9]+\.h), h4 ++** fsub z0\.h, \1, z0\.h ++** ret ++*/ ++TEST_UNIFORM_ZD (subr_h4_f16_x_tied1, svfloat16_t, __fp16, ++ z0 = svsubr_n_f16_x (p0, z0, d4), ++ z0 = svsubr_x (p0, z0, d4)) ++ ++/* ++** subr_h4_f16_x_untied: ++** mov (z[0-9]+\.h), h4 ++** fsub z0\.h, \1, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZD (subr_h4_f16_x_untied, svfloat16_t, __fp16, ++ z0 = svsubr_n_f16_x (p0, z1, d4), ++ z0 = svsubr_x (p0, z1, d4)) ++ ++/* ++** subr_1_f16_x_tied1: ++** fsubr z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_f16_x_tied1, svfloat16_t, ++ z0 = svsubr_n_f16_x (p0, z0, 1), ++ z0 = svsubr_x (p0, z0, 1)) ++ ++/* ++** subr_1_f16_x_untied: ++** movprfx z0, z1 ++** fsubr z0\.h, p0/m, z0\.h, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_f16_x_untied, svfloat16_t, ++ z0 = svsubr_n_f16_x (p0, z1, 1), ++ z0 = svsubr_x (p0, z1, 1)) ++ ++/* ++** subr_0p5_f16_x_tied1: ++** fsubr z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_0p5_f16_x_tied1, svfloat16_t, ++ z0 = svsubr_n_f16_x (p0, z0, 0.5), ++ z0 = svsubr_x (p0, z0, 0.5)) ++ ++/* ++** subr_0p5_f16_x_untied: ++** movprfx z0, z1 ++** fsubr z0\.h, p0/m, z0\.h, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_0p5_f16_x_untied, svfloat16_t, ++ z0 = svsubr_n_f16_x (p0, z1, 0.5), ++ z0 = svsubr_x (p0, z1, 0.5)) ++ ++/* ++** subr_m1_f16_x_tied1: ++** fmov (z[0-9]+\.h), #-1\.0(?:e\+0)? ++** fsub z0\.h, \1, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m1_f16_x_tied1, svfloat16_t, ++ z0 = svsubr_n_f16_x (p0, z0, -1), ++ z0 = svsubr_x (p0, z0, -1)) ++ ++/* ++** subr_m1_f16_x_untied: ++** fmov (z[0-9]+\.h), #-1\.0(?:e\+0)? ++** fsub z0\.h, \1, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m1_f16_x_untied, svfloat16_t, ++ z0 = svsubr_n_f16_x (p0, z1, -1), ++ z0 = svsubr_x (p0, z1, -1)) ++ ++/* ++** ptrue_subr_f16_x_tied1: ++** fsub z0\.h, z1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_f16_x_tied1, svfloat16_t, ++ z0 = svsubr_f16_x (svptrue_b16 (), z0, z1), ++ z0 = svsubr_x (svptrue_b16 (), z0, z1)) ++ ++/* ++** ptrue_subr_f16_x_tied2: ++** fsub z0\.h, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_f16_x_tied2, svfloat16_t, ++ z0 = svsubr_f16_x (svptrue_b16 (), z1, z0), ++ z0 = svsubr_x (svptrue_b16 (), z1, z0)) ++ ++/* ++** ptrue_subr_f16_x_untied: ++** fsub z0\.h, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_f16_x_untied, svfloat16_t, ++ z0 = svsubr_f16_x (svptrue_b16 (), z1, z2), ++ z0 = svsubr_x (svptrue_b16 (), z1, z2)) ++ ++/* ++** ptrue_subr_1_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_1_f16_x_tied1, svfloat16_t, ++ z0 = svsubr_n_f16_x (svptrue_b16 (), z0, 1), ++ z0 = svsubr_x (svptrue_b16 (), z0, 1)) ++ ++/* ++** ptrue_subr_1_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_1_f16_x_untied, svfloat16_t, ++ z0 = svsubr_n_f16_x (svptrue_b16 (), z1, 1), ++ z0 = svsubr_x (svptrue_b16 (), z1, 1)) ++ ++/* ++** ptrue_subr_0p5_f16_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_0p5_f16_x_tied1, svfloat16_t, ++ z0 = svsubr_n_f16_x (svptrue_b16 (), z0, 0.5), ++ z0 = svsubr_x (svptrue_b16 (), z0, 0.5)) ++ ++/* ++** ptrue_subr_0p5_f16_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_0p5_f16_x_untied, svfloat16_t, ++ z0 = svsubr_n_f16_x (svptrue_b16 (), z1, 0.5), ++ z0 = svsubr_x (svptrue_b16 (), z1, 0.5)) ++ ++/* ++** ptrue_subr_m1_f16_x_tied1: ++** fmov (z[0-9]+\.h), #-1\.0(?:e\+0)? ++** fsub z0\.h, \1, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_m1_f16_x_tied1, svfloat16_t, ++ z0 = svsubr_n_f16_x (svptrue_b16 (), z0, -1), ++ z0 = svsubr_x (svptrue_b16 (), z0, -1)) ++ ++/* ++** ptrue_subr_m1_f16_x_untied: ++** fmov (z[0-9]+\.h), #-1\.0(?:e\+0)? ++** fsub z0\.h, \1, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_m1_f16_x_untied, svfloat16_t, ++ z0 = svsubr_n_f16_x (svptrue_b16 (), z1, -1), ++ z0 = svsubr_x (svptrue_b16 (), z1, -1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_f32.c +new file mode 100644 +index 000000000..98dc7ad2b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_f32.c +@@ -0,0 +1,444 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** subr_f32_m_tied1: ++** fsubr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f32_m_tied1, svfloat32_t, ++ z0 = svsubr_f32_m (p0, z0, z1), ++ z0 = svsubr_m (p0, z0, z1)) ++ ++/* ++** subr_f32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fsubr z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f32_m_tied2, svfloat32_t, ++ z0 = svsubr_f32_m (p0, z1, z0), ++ z0 = svsubr_m (p0, z1, z0)) ++ ++/* ++** subr_f32_m_untied: ++** movprfx z0, z1 ++** fsubr z0\.s, p0/m, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f32_m_untied, svfloat32_t, ++ z0 = svsubr_f32_m (p0, z1, z2), ++ z0 = svsubr_m (p0, z1, z2)) ++ ++/* ++** subr_s4_f32_m_tied1: ++** mov (z[0-9]+\.s), s4 ++** fsubr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (subr_s4_f32_m_tied1, svfloat32_t, float, ++ z0 = svsubr_n_f32_m (p0, z0, d4), ++ z0 = svsubr_m (p0, z0, d4)) ++ ++/* ++** subr_s4_f32_m_untied: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0, z1 ++** fsubr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (subr_s4_f32_m_untied, svfloat32_t, float, ++ z0 = svsubr_n_f32_m (p0, z1, d4), ++ z0 = svsubr_m (p0, z1, d4)) ++ ++/* ++** subr_1_f32_m_tied1: ++** fsubr z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_f32_m_tied1, svfloat32_t, ++ z0 = svsubr_n_f32_m (p0, z0, 1), ++ z0 = svsubr_m (p0, z0, 1)) ++ ++/* ++** subr_1_f32_m_untied: ++** movprfx z0, z1 ++** fsubr z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_f32_m_untied, svfloat32_t, ++ z0 = svsubr_n_f32_m (p0, z1, 1), ++ z0 = svsubr_m (p0, z1, 1)) ++ ++/* ++** subr_0p5_f32_m_tied1: ++** fsubr z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_0p5_f32_m_tied1, svfloat32_t, ++ z0 = svsubr_n_f32_m (p0, z0, 0.5), ++ z0 = svsubr_m (p0, z0, 0.5)) ++ ++/* ++** subr_0p5_f32_m_untied: ++** movprfx z0, z1 ++** fsubr z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_0p5_f32_m_untied, svfloat32_t, ++ z0 = svsubr_n_f32_m (p0, z1, 0.5), ++ z0 = svsubr_m (p0, z1, 0.5)) ++ ++/* ++** subr_m1_f32_m_tied1: ++** fmov (z[0-9]+\.s), #-1\.0(?:e\+0)? ++** fsubr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m1_f32_m_tied1, svfloat32_t, ++ z0 = svsubr_n_f32_m (p0, z0, -1), ++ z0 = svsubr_m (p0, z0, -1)) ++ ++/* ++** subr_m1_f32_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.s), #-1\.0(?:e\+0)? ++** movprfx z0, z1 ++** fsubr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m1_f32_m_untied, svfloat32_t, ++ z0 = svsubr_n_f32_m (p0, z1, -1), ++ z0 = svsubr_m (p0, z1, -1)) ++ ++/* ++** subr_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fsubr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f32_z_tied1, svfloat32_t, ++ z0 = svsubr_f32_z (p0, z0, z1), ++ z0 = svsubr_z (p0, z0, z1)) ++ ++/* ++** subr_f32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** fsub z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f32_z_tied2, svfloat32_t, ++ z0 = svsubr_f32_z (p0, z1, z0), ++ z0 = svsubr_z (p0, z1, z0)) ++ ++/* ++** subr_f32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fsubr z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** fsub z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f32_z_untied, svfloat32_t, ++ z0 = svsubr_f32_z (p0, z1, z2), ++ z0 = svsubr_z (p0, z1, z2)) ++ ++/* ++** subr_s4_f32_z_tied1: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0\.s, p0/z, z0\.s ++** fsubr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (subr_s4_f32_z_tied1, svfloat32_t, float, ++ z0 = svsubr_n_f32_z (p0, z0, d4), ++ z0 = svsubr_z (p0, z0, d4)) ++ ++/* ++** subr_s4_f32_z_untied: ++** mov (z[0-9]+\.s), s4 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fsubr z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** fsub z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (subr_s4_f32_z_untied, svfloat32_t, float, ++ z0 = svsubr_n_f32_z (p0, z1, d4), ++ z0 = svsubr_z (p0, z1, d4)) ++ ++/* ++** subr_1_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fsubr z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_f32_z_tied1, svfloat32_t, ++ z0 = svsubr_n_f32_z (p0, z0, 1), ++ z0 = svsubr_z (p0, z0, 1)) ++ ++/* ++** subr_1_f32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** fsubr z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_f32_z_untied, svfloat32_t, ++ z0 = svsubr_n_f32_z (p0, z1, 1), ++ z0 = svsubr_z (p0, z1, 1)) ++ ++/* ++** subr_0p5_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fsubr z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_0p5_f32_z_tied1, svfloat32_t, ++ z0 = svsubr_n_f32_z (p0, z0, 0.5), ++ z0 = svsubr_z (p0, z0, 0.5)) ++ ++/* ++** subr_0p5_f32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** fsubr z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_0p5_f32_z_untied, svfloat32_t, ++ z0 = svsubr_n_f32_z (p0, z1, 0.5), ++ z0 = svsubr_z (p0, z1, 0.5)) ++ ++/* ++** subr_m1_f32_z_tied1: ++** fmov (z[0-9]+\.s), #-1\.0(?:e\+0)? ++** movprfx z0\.s, p0/z, z0\.s ++** fsubr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m1_f32_z_tied1, svfloat32_t, ++ z0 = svsubr_n_f32_z (p0, z0, -1), ++ z0 = svsubr_z (p0, z0, -1)) ++ ++/* ++** subr_m1_f32_z_untied: ++** fmov (z[0-9]+\.s), #-1\.0(?:e\+0)? ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fsubr z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** fsub z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m1_f32_z_untied, svfloat32_t, ++ z0 = svsubr_n_f32_z (p0, z1, -1), ++ z0 = svsubr_z (p0, z1, -1)) ++ ++/* ++** subr_f32_x_tied1: ++** fsubr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f32_x_tied1, svfloat32_t, ++ z0 = svsubr_f32_x (p0, z0, z1), ++ z0 = svsubr_x (p0, z0, z1)) ++ ++/* ++** subr_f32_x_tied2: ++** fsub z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f32_x_tied2, svfloat32_t, ++ z0 = svsubr_f32_x (p0, z1, z0), ++ z0 = svsubr_x (p0, z1, z0)) ++ ++/* ++** subr_f32_x_untied: ++** ( ++** movprfx z0, z1 ++** fsubr z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0, z2 ++** fsub z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f32_x_untied, svfloat32_t, ++ z0 = svsubr_f32_x (p0, z1, z2), ++ z0 = svsubr_x (p0, z1, z2)) ++ ++/* ++** subr_s4_f32_x_tied1: ++** mov (z[0-9]+\.s), s4 ++** fsubr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (subr_s4_f32_x_tied1, svfloat32_t, float, ++ z0 = svsubr_n_f32_x (p0, z0, d4), ++ z0 = svsubr_x (p0, z0, d4)) ++ ++/* ++** subr_s4_f32_x_untied: { xfail *-*-* } ++** mov z0\.s, s4 ++** fsub z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZD (subr_s4_f32_x_untied, svfloat32_t, float, ++ z0 = svsubr_n_f32_x (p0, z1, d4), ++ z0 = svsubr_x (p0, z1, d4)) ++ ++/* ++** subr_1_f32_x_tied1: ++** fsubr z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_f32_x_tied1, svfloat32_t, ++ z0 = svsubr_n_f32_x (p0, z0, 1), ++ z0 = svsubr_x (p0, z0, 1)) ++ ++/* ++** subr_1_f32_x_untied: ++** movprfx z0, z1 ++** fsubr z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_f32_x_untied, svfloat32_t, ++ z0 = svsubr_n_f32_x (p0, z1, 1), ++ z0 = svsubr_x (p0, z1, 1)) ++ ++/* ++** subr_0p5_f32_x_tied1: ++** fsubr z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_0p5_f32_x_tied1, svfloat32_t, ++ z0 = svsubr_n_f32_x (p0, z0, 0.5), ++ z0 = svsubr_x (p0, z0, 0.5)) ++ ++/* ++** subr_0p5_f32_x_untied: ++** movprfx z0, z1 ++** fsubr z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_0p5_f32_x_untied, svfloat32_t, ++ z0 = svsubr_n_f32_x (p0, z1, 0.5), ++ z0 = svsubr_x (p0, z1, 0.5)) ++ ++/* ++** subr_m1_f32_x_tied1: ++** fmov (z[0-9]+\.s), #-1\.0(?:e\+0)? ++** fsubr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m1_f32_x_tied1, svfloat32_t, ++ z0 = svsubr_n_f32_x (p0, z0, -1), ++ z0 = svsubr_x (p0, z0, -1)) ++ ++/* ++** subr_m1_f32_x_untied: ++** fmov z0\.s, #-1\.0(?:e\+0)? ++** fsub z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m1_f32_x_untied, svfloat32_t, ++ z0 = svsubr_n_f32_x (p0, z1, -1), ++ z0 = svsubr_x (p0, z1, -1)) ++ ++/* ++** ptrue_subr_f32_x_tied1: ++** fsub z0\.s, z1\.s, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_f32_x_tied1, svfloat32_t, ++ z0 = svsubr_f32_x (svptrue_b32 (), z0, z1), ++ z0 = svsubr_x (svptrue_b32 (), z0, z1)) ++ ++/* ++** ptrue_subr_f32_x_tied2: ++** fsub z0\.s, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_f32_x_tied2, svfloat32_t, ++ z0 = svsubr_f32_x (svptrue_b32 (), z1, z0), ++ z0 = svsubr_x (svptrue_b32 (), z1, z0)) ++ ++/* ++** ptrue_subr_f32_x_untied: ++** fsub z0\.s, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_f32_x_untied, svfloat32_t, ++ z0 = svsubr_f32_x (svptrue_b32 (), z1, z2), ++ z0 = svsubr_x (svptrue_b32 (), z1, z2)) ++ ++/* ++** ptrue_subr_1_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_1_f32_x_tied1, svfloat32_t, ++ z0 = svsubr_n_f32_x (svptrue_b32 (), z0, 1), ++ z0 = svsubr_x (svptrue_b32 (), z0, 1)) ++ ++/* ++** ptrue_subr_1_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_1_f32_x_untied, svfloat32_t, ++ z0 = svsubr_n_f32_x (svptrue_b32 (), z1, 1), ++ z0 = svsubr_x (svptrue_b32 (), z1, 1)) ++ ++/* ++** ptrue_subr_0p5_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_0p5_f32_x_tied1, svfloat32_t, ++ z0 = svsubr_n_f32_x (svptrue_b32 (), z0, 0.5), ++ z0 = svsubr_x (svptrue_b32 (), z0, 0.5)) ++ ++/* ++** ptrue_subr_0p5_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_0p5_f32_x_untied, svfloat32_t, ++ z0 = svsubr_n_f32_x (svptrue_b32 (), z1, 0.5), ++ z0 = svsubr_x (svptrue_b32 (), z1, 0.5)) ++ ++/* ++** ptrue_subr_m1_f32_x_tied1: ++** fmov (z[0-9]+\.s), #-1\.0(?:e\+0)? ++** fsub z0\.s, \1, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_m1_f32_x_tied1, svfloat32_t, ++ z0 = svsubr_n_f32_x (svptrue_b32 (), z0, -1), ++ z0 = svsubr_x (svptrue_b32 (), z0, -1)) ++ ++/* ++** ptrue_subr_m1_f32_x_untied: ++** fmov (z[0-9]+\.s), #-1\.0(?:e\+0)? ++** fsub z0\.s, \1, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_m1_f32_x_untied, svfloat32_t, ++ z0 = svsubr_n_f32_x (svptrue_b32 (), z1, -1), ++ z0 = svsubr_x (svptrue_b32 (), z1, -1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_f32_notrap.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_f32_notrap.c +new file mode 100644 +index 000000000..75ae0dc61 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_f32_notrap.c +@@ -0,0 +1,439 @@ ++/* { dg-additional-options "-fno-trapping-math" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** subr_f32_m_tied1: ++** fsubr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f32_m_tied1, svfloat32_t, ++ z0 = svsubr_f32_m (p0, z0, z1), ++ z0 = svsubr_m (p0, z0, z1)) ++ ++/* ++** subr_f32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** fsubr z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f32_m_tied2, svfloat32_t, ++ z0 = svsubr_f32_m (p0, z1, z0), ++ z0 = svsubr_m (p0, z1, z0)) ++ ++/* ++** subr_f32_m_untied: ++** movprfx z0, z1 ++** fsubr z0\.s, p0/m, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f32_m_untied, svfloat32_t, ++ z0 = svsubr_f32_m (p0, z1, z2), ++ z0 = svsubr_m (p0, z1, z2)) ++ ++/* ++** subr_s4_f32_m_tied1: ++** mov (z[0-9]+\.s), s4 ++** fsubr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (subr_s4_f32_m_tied1, svfloat32_t, float, ++ z0 = svsubr_n_f32_m (p0, z0, d4), ++ z0 = svsubr_m (p0, z0, d4)) ++ ++/* ++** subr_s4_f32_m_untied: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0, z1 ++** fsubr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (subr_s4_f32_m_untied, svfloat32_t, float, ++ z0 = svsubr_n_f32_m (p0, z1, d4), ++ z0 = svsubr_m (p0, z1, d4)) ++ ++/* ++** subr_1_f32_m_tied1: ++** fsubr z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_f32_m_tied1, svfloat32_t, ++ z0 = svsubr_n_f32_m (p0, z0, 1), ++ z0 = svsubr_m (p0, z0, 1)) ++ ++/* ++** subr_1_f32_m_untied: ++** movprfx z0, z1 ++** fsubr z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_f32_m_untied, svfloat32_t, ++ z0 = svsubr_n_f32_m (p0, z1, 1), ++ z0 = svsubr_m (p0, z1, 1)) ++ ++/* ++** subr_0p5_f32_m_tied1: ++** fsubr z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_0p5_f32_m_tied1, svfloat32_t, ++ z0 = svsubr_n_f32_m (p0, z0, 0.5), ++ z0 = svsubr_m (p0, z0, 0.5)) ++ ++/* ++** subr_0p5_f32_m_untied: ++** movprfx z0, z1 ++** fsubr z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_0p5_f32_m_untied, svfloat32_t, ++ z0 = svsubr_n_f32_m (p0, z1, 0.5), ++ z0 = svsubr_m (p0, z1, 0.5)) ++ ++/* ++** subr_m1_f32_m_tied1: ++** fmov (z[0-9]+\.s), #-1\.0(?:e\+0)? ++** fsubr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m1_f32_m_tied1, svfloat32_t, ++ z0 = svsubr_n_f32_m (p0, z0, -1), ++ z0 = svsubr_m (p0, z0, -1)) ++ ++/* ++** subr_m1_f32_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.s), #-1\.0(?:e\+0)? ++** movprfx z0, z1 ++** fsubr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m1_f32_m_untied, svfloat32_t, ++ z0 = svsubr_n_f32_m (p0, z1, -1), ++ z0 = svsubr_m (p0, z1, -1)) ++ ++/* ++** subr_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fsubr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f32_z_tied1, svfloat32_t, ++ z0 = svsubr_f32_z (p0, z0, z1), ++ z0 = svsubr_z (p0, z0, z1)) ++ ++/* ++** subr_f32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** fsub z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f32_z_tied2, svfloat32_t, ++ z0 = svsubr_f32_z (p0, z1, z0), ++ z0 = svsubr_z (p0, z1, z0)) ++ ++/* ++** subr_f32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fsubr z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** fsub z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f32_z_untied, svfloat32_t, ++ z0 = svsubr_f32_z (p0, z1, z2), ++ z0 = svsubr_z (p0, z1, z2)) ++ ++/* ++** subr_s4_f32_z_tied1: ++** mov (z[0-9]+\.s), s4 ++** movprfx z0\.s, p0/z, z0\.s ++** fsubr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (subr_s4_f32_z_tied1, svfloat32_t, float, ++ z0 = svsubr_n_f32_z (p0, z0, d4), ++ z0 = svsubr_z (p0, z0, d4)) ++ ++/* ++** subr_s4_f32_z_untied: ++** mov (z[0-9]+\.s), s4 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fsubr z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** fsub z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (subr_s4_f32_z_untied, svfloat32_t, float, ++ z0 = svsubr_n_f32_z (p0, z1, d4), ++ z0 = svsubr_z (p0, z1, d4)) ++ ++/* ++** subr_1_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fsubr z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_f32_z_tied1, svfloat32_t, ++ z0 = svsubr_n_f32_z (p0, z0, 1), ++ z0 = svsubr_z (p0, z0, 1)) ++ ++/* ++** subr_1_f32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** fsubr z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_f32_z_untied, svfloat32_t, ++ z0 = svsubr_n_f32_z (p0, z1, 1), ++ z0 = svsubr_z (p0, z1, 1)) ++ ++/* ++** subr_0p5_f32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** fsubr z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_0p5_f32_z_tied1, svfloat32_t, ++ z0 = svsubr_n_f32_z (p0, z0, 0.5), ++ z0 = svsubr_z (p0, z0, 0.5)) ++ ++/* ++** subr_0p5_f32_z_untied: ++** movprfx z0\.s, p0/z, z1\.s ++** fsubr z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_0p5_f32_z_untied, svfloat32_t, ++ z0 = svsubr_n_f32_z (p0, z1, 0.5), ++ z0 = svsubr_z (p0, z1, 0.5)) ++ ++/* ++** subr_m1_f32_z_tied1: ++** fmov (z[0-9]+\.s), #-1\.0(?:e\+0)? ++** movprfx z0\.s, p0/z, z0\.s ++** fsubr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m1_f32_z_tied1, svfloat32_t, ++ z0 = svsubr_n_f32_z (p0, z0, -1), ++ z0 = svsubr_z (p0, z0, -1)) ++ ++/* ++** subr_m1_f32_z_untied: ++** fmov (z[0-9]+\.s), #-1\.0(?:e\+0)? ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** fsubr z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** fsub z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m1_f32_z_untied, svfloat32_t, ++ z0 = svsubr_n_f32_z (p0, z1, -1), ++ z0 = svsubr_z (p0, z1, -1)) ++ ++/* ++** subr_f32_x_tied1: ++** fsub z0\.s, z1\.s, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f32_x_tied1, svfloat32_t, ++ z0 = svsubr_f32_x (p0, z0, z1), ++ z0 = svsubr_x (p0, z0, z1)) ++ ++/* ++** subr_f32_x_tied2: ++** fsub z0\.s, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f32_x_tied2, svfloat32_t, ++ z0 = svsubr_f32_x (p0, z1, z0), ++ z0 = svsubr_x (p0, z1, z0)) ++ ++/* ++** subr_f32_x_untied: ++** fsub z0\.s, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f32_x_untied, svfloat32_t, ++ z0 = svsubr_f32_x (p0, z1, z2), ++ z0 = svsubr_x (p0, z1, z2)) ++ ++/* ++** subr_s4_f32_x_tied1: ++** mov (z[0-9]+\.s), s4 ++** fsub z0\.s, \1, z0\.s ++** ret ++*/ ++TEST_UNIFORM_ZD (subr_s4_f32_x_tied1, svfloat32_t, float, ++ z0 = svsubr_n_f32_x (p0, z0, d4), ++ z0 = svsubr_x (p0, z0, d4)) ++ ++/* ++** subr_s4_f32_x_untied: ++** mov (z[0-9]+\.s), s4 ++** fsub z0\.s, \1, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZD (subr_s4_f32_x_untied, svfloat32_t, float, ++ z0 = svsubr_n_f32_x (p0, z1, d4), ++ z0 = svsubr_x (p0, z1, d4)) ++ ++/* ++** subr_1_f32_x_tied1: ++** fsubr z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_f32_x_tied1, svfloat32_t, ++ z0 = svsubr_n_f32_x (p0, z0, 1), ++ z0 = svsubr_x (p0, z0, 1)) ++ ++/* ++** subr_1_f32_x_untied: ++** movprfx z0, z1 ++** fsubr z0\.s, p0/m, z0\.s, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_f32_x_untied, svfloat32_t, ++ z0 = svsubr_n_f32_x (p0, z1, 1), ++ z0 = svsubr_x (p0, z1, 1)) ++ ++/* ++** subr_0p5_f32_x_tied1: ++** fsubr z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_0p5_f32_x_tied1, svfloat32_t, ++ z0 = svsubr_n_f32_x (p0, z0, 0.5), ++ z0 = svsubr_x (p0, z0, 0.5)) ++ ++/* ++** subr_0p5_f32_x_untied: ++** movprfx z0, z1 ++** fsubr z0\.s, p0/m, z0\.s, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_0p5_f32_x_untied, svfloat32_t, ++ z0 = svsubr_n_f32_x (p0, z1, 0.5), ++ z0 = svsubr_x (p0, z1, 0.5)) ++ ++/* ++** subr_m1_f32_x_tied1: ++** fmov (z[0-9]+\.s), #-1\.0(?:e\+0)? ++** fsub z0\.s, \1, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m1_f32_x_tied1, svfloat32_t, ++ z0 = svsubr_n_f32_x (p0, z0, -1), ++ z0 = svsubr_x (p0, z0, -1)) ++ ++/* ++** subr_m1_f32_x_untied: ++** fmov (z[0-9]+\.s), #-1\.0(?:e\+0)? ++** fsub z0\.s, \1, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m1_f32_x_untied, svfloat32_t, ++ z0 = svsubr_n_f32_x (p0, z1, -1), ++ z0 = svsubr_x (p0, z1, -1)) ++ ++/* ++** ptrue_subr_f32_x_tied1: ++** fsub z0\.s, z1\.s, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_f32_x_tied1, svfloat32_t, ++ z0 = svsubr_f32_x (svptrue_b32 (), z0, z1), ++ z0 = svsubr_x (svptrue_b32 (), z0, z1)) ++ ++/* ++** ptrue_subr_f32_x_tied2: ++** fsub z0\.s, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_f32_x_tied2, svfloat32_t, ++ z0 = svsubr_f32_x (svptrue_b32 (), z1, z0), ++ z0 = svsubr_x (svptrue_b32 (), z1, z0)) ++ ++/* ++** ptrue_subr_f32_x_untied: ++** fsub z0\.s, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_f32_x_untied, svfloat32_t, ++ z0 = svsubr_f32_x (svptrue_b32 (), z1, z2), ++ z0 = svsubr_x (svptrue_b32 (), z1, z2)) ++ ++/* ++** ptrue_subr_1_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_1_f32_x_tied1, svfloat32_t, ++ z0 = svsubr_n_f32_x (svptrue_b32 (), z0, 1), ++ z0 = svsubr_x (svptrue_b32 (), z0, 1)) ++ ++/* ++** ptrue_subr_1_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_1_f32_x_untied, svfloat32_t, ++ z0 = svsubr_n_f32_x (svptrue_b32 (), z1, 1), ++ z0 = svsubr_x (svptrue_b32 (), z1, 1)) ++ ++/* ++** ptrue_subr_0p5_f32_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_0p5_f32_x_tied1, svfloat32_t, ++ z0 = svsubr_n_f32_x (svptrue_b32 (), z0, 0.5), ++ z0 = svsubr_x (svptrue_b32 (), z0, 0.5)) ++ ++/* ++** ptrue_subr_0p5_f32_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_0p5_f32_x_untied, svfloat32_t, ++ z0 = svsubr_n_f32_x (svptrue_b32 (), z1, 0.5), ++ z0 = svsubr_x (svptrue_b32 (), z1, 0.5)) ++ ++/* ++** ptrue_subr_m1_f32_x_tied1: ++** fmov (z[0-9]+\.s), #-1\.0(?:e\+0)? ++** fsub z0\.s, \1, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_m1_f32_x_tied1, svfloat32_t, ++ z0 = svsubr_n_f32_x (svptrue_b32 (), z0, -1), ++ z0 = svsubr_x (svptrue_b32 (), z0, -1)) ++ ++/* ++** ptrue_subr_m1_f32_x_untied: ++** fmov (z[0-9]+\.s), #-1\.0(?:e\+0)? ++** fsub z0\.s, \1, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_m1_f32_x_untied, svfloat32_t, ++ z0 = svsubr_n_f32_x (svptrue_b32 (), z1, -1), ++ z0 = svsubr_x (svptrue_b32 (), z1, -1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_f64.c +new file mode 100644 +index 000000000..81f1112d7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_f64.c +@@ -0,0 +1,444 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** subr_f64_m_tied1: ++** fsubr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f64_m_tied1, svfloat64_t, ++ z0 = svsubr_f64_m (p0, z0, z1), ++ z0 = svsubr_m (p0, z0, z1)) ++ ++/* ++** subr_f64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fsubr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f64_m_tied2, svfloat64_t, ++ z0 = svsubr_f64_m (p0, z1, z0), ++ z0 = svsubr_m (p0, z1, z0)) ++ ++/* ++** subr_f64_m_untied: ++** movprfx z0, z1 ++** fsubr z0\.d, p0/m, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f64_m_untied, svfloat64_t, ++ z0 = svsubr_f64_m (p0, z1, z2), ++ z0 = svsubr_m (p0, z1, z2)) ++ ++/* ++** subr_d4_f64_m_tied1: ++** mov (z[0-9]+\.d), d4 ++** fsubr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (subr_d4_f64_m_tied1, svfloat64_t, double, ++ z0 = svsubr_n_f64_m (p0, z0, d4), ++ z0 = svsubr_m (p0, z0, d4)) ++ ++/* ++** subr_d4_f64_m_untied: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0, z1 ++** fsubr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (subr_d4_f64_m_untied, svfloat64_t, double, ++ z0 = svsubr_n_f64_m (p0, z1, d4), ++ z0 = svsubr_m (p0, z1, d4)) ++ ++/* ++** subr_1_f64_m_tied1: ++** fsubr z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_f64_m_tied1, svfloat64_t, ++ z0 = svsubr_n_f64_m (p0, z0, 1), ++ z0 = svsubr_m (p0, z0, 1)) ++ ++/* ++** subr_1_f64_m_untied: ++** movprfx z0, z1 ++** fsubr z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_f64_m_untied, svfloat64_t, ++ z0 = svsubr_n_f64_m (p0, z1, 1), ++ z0 = svsubr_m (p0, z1, 1)) ++ ++/* ++** subr_0p5_f64_m_tied1: ++** fsubr z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_0p5_f64_m_tied1, svfloat64_t, ++ z0 = svsubr_n_f64_m (p0, z0, 0.5), ++ z0 = svsubr_m (p0, z0, 0.5)) ++ ++/* ++** subr_0p5_f64_m_untied: ++** movprfx z0, z1 ++** fsubr z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_0p5_f64_m_untied, svfloat64_t, ++ z0 = svsubr_n_f64_m (p0, z1, 0.5), ++ z0 = svsubr_m (p0, z1, 0.5)) ++ ++/* ++** subr_m1_f64_m_tied1: ++** fmov (z[0-9]+\.d), #-1\.0(?:e\+0)? ++** fsubr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m1_f64_m_tied1, svfloat64_t, ++ z0 = svsubr_n_f64_m (p0, z0, -1), ++ z0 = svsubr_m (p0, z0, -1)) ++ ++/* ++** subr_m1_f64_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.d), #-1\.0(?:e\+0)? ++** movprfx z0, z1 ++** fsubr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m1_f64_m_untied, svfloat64_t, ++ z0 = svsubr_n_f64_m (p0, z1, -1), ++ z0 = svsubr_m (p0, z1, -1)) ++ ++/* ++** subr_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fsubr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f64_z_tied1, svfloat64_t, ++ z0 = svsubr_f64_z (p0, z0, z1), ++ z0 = svsubr_z (p0, z0, z1)) ++ ++/* ++** subr_f64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** fsub z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f64_z_tied2, svfloat64_t, ++ z0 = svsubr_f64_z (p0, z1, z0), ++ z0 = svsubr_z (p0, z1, z0)) ++ ++/* ++** subr_f64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fsubr z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** fsub z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f64_z_untied, svfloat64_t, ++ z0 = svsubr_f64_z (p0, z1, z2), ++ z0 = svsubr_z (p0, z1, z2)) ++ ++/* ++** subr_d4_f64_z_tied1: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0\.d, p0/z, z0\.d ++** fsubr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (subr_d4_f64_z_tied1, svfloat64_t, double, ++ z0 = svsubr_n_f64_z (p0, z0, d4), ++ z0 = svsubr_z (p0, z0, d4)) ++ ++/* ++** subr_d4_f64_z_untied: ++** mov (z[0-9]+\.d), d4 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fsubr z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** fsub z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (subr_d4_f64_z_untied, svfloat64_t, double, ++ z0 = svsubr_n_f64_z (p0, z1, d4), ++ z0 = svsubr_z (p0, z1, d4)) ++ ++/* ++** subr_1_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fsubr z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_f64_z_tied1, svfloat64_t, ++ z0 = svsubr_n_f64_z (p0, z0, 1), ++ z0 = svsubr_z (p0, z0, 1)) ++ ++/* ++** subr_1_f64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** fsubr z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_f64_z_untied, svfloat64_t, ++ z0 = svsubr_n_f64_z (p0, z1, 1), ++ z0 = svsubr_z (p0, z1, 1)) ++ ++/* ++** subr_0p5_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fsubr z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_0p5_f64_z_tied1, svfloat64_t, ++ z0 = svsubr_n_f64_z (p0, z0, 0.5), ++ z0 = svsubr_z (p0, z0, 0.5)) ++ ++/* ++** subr_0p5_f64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** fsubr z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_0p5_f64_z_untied, svfloat64_t, ++ z0 = svsubr_n_f64_z (p0, z1, 0.5), ++ z0 = svsubr_z (p0, z1, 0.5)) ++ ++/* ++** subr_m1_f64_z_tied1: ++** fmov (z[0-9]+\.d), #-1\.0(?:e\+0)? ++** movprfx z0\.d, p0/z, z0\.d ++** fsubr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m1_f64_z_tied1, svfloat64_t, ++ z0 = svsubr_n_f64_z (p0, z0, -1), ++ z0 = svsubr_z (p0, z0, -1)) ++ ++/* ++** subr_m1_f64_z_untied: ++** fmov (z[0-9]+\.d), #-1\.0(?:e\+0)? ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fsubr z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** fsub z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m1_f64_z_untied, svfloat64_t, ++ z0 = svsubr_n_f64_z (p0, z1, -1), ++ z0 = svsubr_z (p0, z1, -1)) ++ ++/* ++** subr_f64_x_tied1: ++** fsubr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f64_x_tied1, svfloat64_t, ++ z0 = svsubr_f64_x (p0, z0, z1), ++ z0 = svsubr_x (p0, z0, z1)) ++ ++/* ++** subr_f64_x_tied2: ++** fsub z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f64_x_tied2, svfloat64_t, ++ z0 = svsubr_f64_x (p0, z1, z0), ++ z0 = svsubr_x (p0, z1, z0)) ++ ++/* ++** subr_f64_x_untied: ++** ( ++** movprfx z0, z1 ++** fsubr z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0, z2 ++** fsub z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f64_x_untied, svfloat64_t, ++ z0 = svsubr_f64_x (p0, z1, z2), ++ z0 = svsubr_x (p0, z1, z2)) ++ ++/* ++** subr_d4_f64_x_tied1: ++** mov (z[0-9]+\.d), d4 ++** fsubr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (subr_d4_f64_x_tied1, svfloat64_t, double, ++ z0 = svsubr_n_f64_x (p0, z0, d4), ++ z0 = svsubr_x (p0, z0, d4)) ++ ++/* ++** subr_d4_f64_x_untied: { xfail *-*-* } ++** mov z0\.d, d4 ++** fsub z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZD (subr_d4_f64_x_untied, svfloat64_t, double, ++ z0 = svsubr_n_f64_x (p0, z1, d4), ++ z0 = svsubr_x (p0, z1, d4)) ++ ++/* ++** subr_1_f64_x_tied1: ++** fsubr z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_f64_x_tied1, svfloat64_t, ++ z0 = svsubr_n_f64_x (p0, z0, 1), ++ z0 = svsubr_x (p0, z0, 1)) ++ ++/* ++** subr_1_f64_x_untied: ++** movprfx z0, z1 ++** fsubr z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_f64_x_untied, svfloat64_t, ++ z0 = svsubr_n_f64_x (p0, z1, 1), ++ z0 = svsubr_x (p0, z1, 1)) ++ ++/* ++** subr_0p5_f64_x_tied1: ++** fsubr z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_0p5_f64_x_tied1, svfloat64_t, ++ z0 = svsubr_n_f64_x (p0, z0, 0.5), ++ z0 = svsubr_x (p0, z0, 0.5)) ++ ++/* ++** subr_0p5_f64_x_untied: ++** movprfx z0, z1 ++** fsubr z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_0p5_f64_x_untied, svfloat64_t, ++ z0 = svsubr_n_f64_x (p0, z1, 0.5), ++ z0 = svsubr_x (p0, z1, 0.5)) ++ ++/* ++** subr_m1_f64_x_tied1: ++** fmov (z[0-9]+\.d), #-1\.0(?:e\+0)? ++** fsubr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m1_f64_x_tied1, svfloat64_t, ++ z0 = svsubr_n_f64_x (p0, z0, -1), ++ z0 = svsubr_x (p0, z0, -1)) ++ ++/* ++** subr_m1_f64_x_untied: ++** fmov z0\.d, #-1\.0(?:e\+0)? ++** fsub z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m1_f64_x_untied, svfloat64_t, ++ z0 = svsubr_n_f64_x (p0, z1, -1), ++ z0 = svsubr_x (p0, z1, -1)) ++ ++/* ++** ptrue_subr_f64_x_tied1: ++** fsub z0\.d, z1\.d, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_f64_x_tied1, svfloat64_t, ++ z0 = svsubr_f64_x (svptrue_b64 (), z0, z1), ++ z0 = svsubr_x (svptrue_b64 (), z0, z1)) ++ ++/* ++** ptrue_subr_f64_x_tied2: ++** fsub z0\.d, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_f64_x_tied2, svfloat64_t, ++ z0 = svsubr_f64_x (svptrue_b64 (), z1, z0), ++ z0 = svsubr_x (svptrue_b64 (), z1, z0)) ++ ++/* ++** ptrue_subr_f64_x_untied: ++** fsub z0\.d, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_f64_x_untied, svfloat64_t, ++ z0 = svsubr_f64_x (svptrue_b64 (), z1, z2), ++ z0 = svsubr_x (svptrue_b64 (), z1, z2)) ++ ++/* ++** ptrue_subr_1_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_1_f64_x_tied1, svfloat64_t, ++ z0 = svsubr_n_f64_x (svptrue_b64 (), z0, 1), ++ z0 = svsubr_x (svptrue_b64 (), z0, 1)) ++ ++/* ++** ptrue_subr_1_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_1_f64_x_untied, svfloat64_t, ++ z0 = svsubr_n_f64_x (svptrue_b64 (), z1, 1), ++ z0 = svsubr_x (svptrue_b64 (), z1, 1)) ++ ++/* ++** ptrue_subr_0p5_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_0p5_f64_x_tied1, svfloat64_t, ++ z0 = svsubr_n_f64_x (svptrue_b64 (), z0, 0.5), ++ z0 = svsubr_x (svptrue_b64 (), z0, 0.5)) ++ ++/* ++** ptrue_subr_0p5_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_0p5_f64_x_untied, svfloat64_t, ++ z0 = svsubr_n_f64_x (svptrue_b64 (), z1, 0.5), ++ z0 = svsubr_x (svptrue_b64 (), z1, 0.5)) ++ ++/* ++** ptrue_subr_m1_f64_x_tied1: ++** fmov (z[0-9]+\.d), #-1\.0(?:e\+0)? ++** fsub z0\.d, \1, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_m1_f64_x_tied1, svfloat64_t, ++ z0 = svsubr_n_f64_x (svptrue_b64 (), z0, -1), ++ z0 = svsubr_x (svptrue_b64 (), z0, -1)) ++ ++/* ++** ptrue_subr_m1_f64_x_untied: ++** fmov (z[0-9]+\.d), #-1\.0(?:e\+0)? ++** fsub z0\.d, \1, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_m1_f64_x_untied, svfloat64_t, ++ z0 = svsubr_n_f64_x (svptrue_b64 (), z1, -1), ++ z0 = svsubr_x (svptrue_b64 (), z1, -1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_f64_notrap.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_f64_notrap.c +new file mode 100644 +index 000000000..98598dd77 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_f64_notrap.c +@@ -0,0 +1,439 @@ ++/* { dg-additional-options "-fno-trapping-math" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** subr_f64_m_tied1: ++** fsubr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f64_m_tied1, svfloat64_t, ++ z0 = svsubr_f64_m (p0, z0, z1), ++ z0 = svsubr_m (p0, z0, z1)) ++ ++/* ++** subr_f64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** fsubr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f64_m_tied2, svfloat64_t, ++ z0 = svsubr_f64_m (p0, z1, z0), ++ z0 = svsubr_m (p0, z1, z0)) ++ ++/* ++** subr_f64_m_untied: ++** movprfx z0, z1 ++** fsubr z0\.d, p0/m, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f64_m_untied, svfloat64_t, ++ z0 = svsubr_f64_m (p0, z1, z2), ++ z0 = svsubr_m (p0, z1, z2)) ++ ++/* ++** subr_d4_f64_m_tied1: ++** mov (z[0-9]+\.d), d4 ++** fsubr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (subr_d4_f64_m_tied1, svfloat64_t, double, ++ z0 = svsubr_n_f64_m (p0, z0, d4), ++ z0 = svsubr_m (p0, z0, d4)) ++ ++/* ++** subr_d4_f64_m_untied: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0, z1 ++** fsubr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (subr_d4_f64_m_untied, svfloat64_t, double, ++ z0 = svsubr_n_f64_m (p0, z1, d4), ++ z0 = svsubr_m (p0, z1, d4)) ++ ++/* ++** subr_1_f64_m_tied1: ++** fsubr z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_f64_m_tied1, svfloat64_t, ++ z0 = svsubr_n_f64_m (p0, z0, 1), ++ z0 = svsubr_m (p0, z0, 1)) ++ ++/* ++** subr_1_f64_m_untied: ++** movprfx z0, z1 ++** fsubr z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_f64_m_untied, svfloat64_t, ++ z0 = svsubr_n_f64_m (p0, z1, 1), ++ z0 = svsubr_m (p0, z1, 1)) ++ ++/* ++** subr_0p5_f64_m_tied1: ++** fsubr z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_0p5_f64_m_tied1, svfloat64_t, ++ z0 = svsubr_n_f64_m (p0, z0, 0.5), ++ z0 = svsubr_m (p0, z0, 0.5)) ++ ++/* ++** subr_0p5_f64_m_untied: ++** movprfx z0, z1 ++** fsubr z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_0p5_f64_m_untied, svfloat64_t, ++ z0 = svsubr_n_f64_m (p0, z1, 0.5), ++ z0 = svsubr_m (p0, z1, 0.5)) ++ ++/* ++** subr_m1_f64_m_tied1: ++** fmov (z[0-9]+\.d), #-1\.0(?:e\+0)? ++** fsubr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m1_f64_m_tied1, svfloat64_t, ++ z0 = svsubr_n_f64_m (p0, z0, -1), ++ z0 = svsubr_m (p0, z0, -1)) ++ ++/* ++** subr_m1_f64_m_untied: { xfail *-*-* } ++** fmov (z[0-9]+\.d), #-1\.0(?:e\+0)? ++** movprfx z0, z1 ++** fsubr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m1_f64_m_untied, svfloat64_t, ++ z0 = svsubr_n_f64_m (p0, z1, -1), ++ z0 = svsubr_m (p0, z1, -1)) ++ ++/* ++** subr_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fsubr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f64_z_tied1, svfloat64_t, ++ z0 = svsubr_f64_z (p0, z0, z1), ++ z0 = svsubr_z (p0, z0, z1)) ++ ++/* ++** subr_f64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** fsub z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f64_z_tied2, svfloat64_t, ++ z0 = svsubr_f64_z (p0, z1, z0), ++ z0 = svsubr_z (p0, z1, z0)) ++ ++/* ++** subr_f64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fsubr z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** fsub z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f64_z_untied, svfloat64_t, ++ z0 = svsubr_f64_z (p0, z1, z2), ++ z0 = svsubr_z (p0, z1, z2)) ++ ++/* ++** subr_d4_f64_z_tied1: ++** mov (z[0-9]+\.d), d4 ++** movprfx z0\.d, p0/z, z0\.d ++** fsubr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZD (subr_d4_f64_z_tied1, svfloat64_t, double, ++ z0 = svsubr_n_f64_z (p0, z0, d4), ++ z0 = svsubr_z (p0, z0, d4)) ++ ++/* ++** subr_d4_f64_z_untied: ++** mov (z[0-9]+\.d), d4 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fsubr z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** fsub z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZD (subr_d4_f64_z_untied, svfloat64_t, double, ++ z0 = svsubr_n_f64_z (p0, z1, d4), ++ z0 = svsubr_z (p0, z1, d4)) ++ ++/* ++** subr_1_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fsubr z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_f64_z_tied1, svfloat64_t, ++ z0 = svsubr_n_f64_z (p0, z0, 1), ++ z0 = svsubr_z (p0, z0, 1)) ++ ++/* ++** subr_1_f64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** fsubr z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_f64_z_untied, svfloat64_t, ++ z0 = svsubr_n_f64_z (p0, z1, 1), ++ z0 = svsubr_z (p0, z1, 1)) ++ ++/* ++** subr_0p5_f64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** fsubr z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_0p5_f64_z_tied1, svfloat64_t, ++ z0 = svsubr_n_f64_z (p0, z0, 0.5), ++ z0 = svsubr_z (p0, z0, 0.5)) ++ ++/* ++** subr_0p5_f64_z_untied: ++** movprfx z0\.d, p0/z, z1\.d ++** fsubr z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_0p5_f64_z_untied, svfloat64_t, ++ z0 = svsubr_n_f64_z (p0, z1, 0.5), ++ z0 = svsubr_z (p0, z1, 0.5)) ++ ++/* ++** subr_m1_f64_z_tied1: ++** fmov (z[0-9]+\.d), #-1\.0(?:e\+0)? ++** movprfx z0\.d, p0/z, z0\.d ++** fsubr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m1_f64_z_tied1, svfloat64_t, ++ z0 = svsubr_n_f64_z (p0, z0, -1), ++ z0 = svsubr_z (p0, z0, -1)) ++ ++/* ++** subr_m1_f64_z_untied: ++** fmov (z[0-9]+\.d), #-1\.0(?:e\+0)? ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** fsubr z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** fsub z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m1_f64_z_untied, svfloat64_t, ++ z0 = svsubr_n_f64_z (p0, z1, -1), ++ z0 = svsubr_z (p0, z1, -1)) ++ ++/* ++** subr_f64_x_tied1: ++** fsub z0\.d, z1\.d, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f64_x_tied1, svfloat64_t, ++ z0 = svsubr_f64_x (p0, z0, z1), ++ z0 = svsubr_x (p0, z0, z1)) ++ ++/* ++** subr_f64_x_tied2: ++** fsub z0\.d, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f64_x_tied2, svfloat64_t, ++ z0 = svsubr_f64_x (p0, z1, z0), ++ z0 = svsubr_x (p0, z1, z0)) ++ ++/* ++** subr_f64_x_untied: ++** fsub z0\.d, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (subr_f64_x_untied, svfloat64_t, ++ z0 = svsubr_f64_x (p0, z1, z2), ++ z0 = svsubr_x (p0, z1, z2)) ++ ++/* ++** subr_d4_f64_x_tied1: ++** mov (z[0-9]+\.d), d4 ++** fsub z0\.d, \1, z0\.d ++** ret ++*/ ++TEST_UNIFORM_ZD (subr_d4_f64_x_tied1, svfloat64_t, double, ++ z0 = svsubr_n_f64_x (p0, z0, d4), ++ z0 = svsubr_x (p0, z0, d4)) ++ ++/* ++** subr_d4_f64_x_untied: ++** mov (z[0-9]+\.d), d4 ++** fsub z0\.d, \1, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZD (subr_d4_f64_x_untied, svfloat64_t, double, ++ z0 = svsubr_n_f64_x (p0, z1, d4), ++ z0 = svsubr_x (p0, z1, d4)) ++ ++/* ++** subr_1_f64_x_tied1: ++** fsubr z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_f64_x_tied1, svfloat64_t, ++ z0 = svsubr_n_f64_x (p0, z0, 1), ++ z0 = svsubr_x (p0, z0, 1)) ++ ++/* ++** subr_1_f64_x_untied: ++** movprfx z0, z1 ++** fsubr z0\.d, p0/m, z0\.d, #1\.0 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_f64_x_untied, svfloat64_t, ++ z0 = svsubr_n_f64_x (p0, z1, 1), ++ z0 = svsubr_x (p0, z1, 1)) ++ ++/* ++** subr_0p5_f64_x_tied1: ++** fsubr z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_0p5_f64_x_tied1, svfloat64_t, ++ z0 = svsubr_n_f64_x (p0, z0, 0.5), ++ z0 = svsubr_x (p0, z0, 0.5)) ++ ++/* ++** subr_0p5_f64_x_untied: ++** movprfx z0, z1 ++** fsubr z0\.d, p0/m, z0\.d, #0\.5 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_0p5_f64_x_untied, svfloat64_t, ++ z0 = svsubr_n_f64_x (p0, z1, 0.5), ++ z0 = svsubr_x (p0, z1, 0.5)) ++ ++/* ++** subr_m1_f64_x_tied1: ++** fmov (z[0-9]+\.d), #-1\.0(?:e\+0)? ++** fsub z0\.d, \1, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m1_f64_x_tied1, svfloat64_t, ++ z0 = svsubr_n_f64_x (p0, z0, -1), ++ z0 = svsubr_x (p0, z0, -1)) ++ ++/* ++** subr_m1_f64_x_untied: ++** fmov (z[0-9]+\.d), #-1\.0(?:e\+0)? ++** fsub z0\.d, \1, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m1_f64_x_untied, svfloat64_t, ++ z0 = svsubr_n_f64_x (p0, z1, -1), ++ z0 = svsubr_x (p0, z1, -1)) ++ ++/* ++** ptrue_subr_f64_x_tied1: ++** fsub z0\.d, z1\.d, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_f64_x_tied1, svfloat64_t, ++ z0 = svsubr_f64_x (svptrue_b64 (), z0, z1), ++ z0 = svsubr_x (svptrue_b64 (), z0, z1)) ++ ++/* ++** ptrue_subr_f64_x_tied2: ++** fsub z0\.d, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_f64_x_tied2, svfloat64_t, ++ z0 = svsubr_f64_x (svptrue_b64 (), z1, z0), ++ z0 = svsubr_x (svptrue_b64 (), z1, z0)) ++ ++/* ++** ptrue_subr_f64_x_untied: ++** fsub z0\.d, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_f64_x_untied, svfloat64_t, ++ z0 = svsubr_f64_x (svptrue_b64 (), z1, z2), ++ z0 = svsubr_x (svptrue_b64 (), z1, z2)) ++ ++/* ++** ptrue_subr_1_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_1_f64_x_tied1, svfloat64_t, ++ z0 = svsubr_n_f64_x (svptrue_b64 (), z0, 1), ++ z0 = svsubr_x (svptrue_b64 (), z0, 1)) ++ ++/* ++** ptrue_subr_1_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_1_f64_x_untied, svfloat64_t, ++ z0 = svsubr_n_f64_x (svptrue_b64 (), z1, 1), ++ z0 = svsubr_x (svptrue_b64 (), z1, 1)) ++ ++/* ++** ptrue_subr_0p5_f64_x_tied1: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_0p5_f64_x_tied1, svfloat64_t, ++ z0 = svsubr_n_f64_x (svptrue_b64 (), z0, 0.5), ++ z0 = svsubr_x (svptrue_b64 (), z0, 0.5)) ++ ++/* ++** ptrue_subr_0p5_f64_x_untied: ++** ... ++** ptrue p[0-9]+\.b[^\n]* ++** ... ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_0p5_f64_x_untied, svfloat64_t, ++ z0 = svsubr_n_f64_x (svptrue_b64 (), z1, 0.5), ++ z0 = svsubr_x (svptrue_b64 (), z1, 0.5)) ++ ++/* ++** ptrue_subr_m1_f64_x_tied1: ++** fmov (z[0-9]+\.d), #-1\.0(?:e\+0)? ++** fsub z0\.d, \1, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_m1_f64_x_tied1, svfloat64_t, ++ z0 = svsubr_n_f64_x (svptrue_b64 (), z0, -1), ++ z0 = svsubr_x (svptrue_b64 (), z0, -1)) ++ ++/* ++** ptrue_subr_m1_f64_x_untied: ++** fmov (z[0-9]+\.d), #-1\.0(?:e\+0)? ++** fsub z0\.d, \1, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (ptrue_subr_m1_f64_x_untied, svfloat64_t, ++ z0 = svsubr_n_f64_x (svptrue_b64 (), z1, -1), ++ z0 = svsubr_x (svptrue_b64 (), z1, -1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_s16.c +new file mode 100644 +index 000000000..d3dad62da +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_s16.c +@@ -0,0 +1,324 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** subr_s16_m_tied1: ++** subr z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (subr_s16_m_tied1, svint16_t, ++ z0 = svsubr_s16_m (p0, z0, z1), ++ z0 = svsubr_m (p0, z0, z1)) ++ ++/* ++** subr_s16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** subr z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (subr_s16_m_tied2, svint16_t, ++ z0 = svsubr_s16_m (p0, z1, z0), ++ z0 = svsubr_m (p0, z1, z0)) ++ ++/* ++** subr_s16_m_untied: ++** movprfx z0, z1 ++** subr z0\.h, p0/m, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (subr_s16_m_untied, svint16_t, ++ z0 = svsubr_s16_m (p0, z1, z2), ++ z0 = svsubr_m (p0, z1, z2)) ++ ++/* ++** subr_w0_s16_m_tied1: ++** mov (z[0-9]+\.h), w0 ++** subr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (subr_w0_s16_m_tied1, svint16_t, int16_t, ++ z0 = svsubr_n_s16_m (p0, z0, x0), ++ z0 = svsubr_m (p0, z0, x0)) ++ ++/* ++** subr_w0_s16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), w0 ++** movprfx z0, z1 ++** subr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (subr_w0_s16_m_untied, svint16_t, int16_t, ++ z0 = svsubr_n_s16_m (p0, z1, x0), ++ z0 = svsubr_m (p0, z1, x0)) ++ ++/* ++** subr_1_s16_m_tied1: ++** mov (z[0-9]+\.h), #1 ++** subr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_s16_m_tied1, svint16_t, ++ z0 = svsubr_n_s16_m (p0, z0, 1), ++ z0 = svsubr_m (p0, z0, 1)) ++ ++/* ++** subr_1_s16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), #1 ++** movprfx z0, z1 ++** subr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_s16_m_untied, svint16_t, ++ z0 = svsubr_n_s16_m (p0, z1, 1), ++ z0 = svsubr_m (p0, z1, 1)) ++ ++/* ++** subr_m2_s16_m: ++** mov (z[0-9]+\.h), #-2 ++** subr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m2_s16_m, svint16_t, ++ z0 = svsubr_n_s16_m (p0, z0, -2), ++ z0 = svsubr_m (p0, z0, -2)) ++ ++/* ++** subr_s16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** subr z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (subr_s16_z_tied1, svint16_t, ++ z0 = svsubr_s16_z (p0, z0, z1), ++ z0 = svsubr_z (p0, z0, z1)) ++ ++/* ++** subr_s16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** sub z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (subr_s16_z_tied2, svint16_t, ++ z0 = svsubr_s16_z (p0, z1, z0), ++ z0 = svsubr_z (p0, z1, z0)) ++ ++/* ++** subr_s16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** subr z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** sub z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (subr_s16_z_untied, svint16_t, ++ z0 = svsubr_s16_z (p0, z1, z2), ++ z0 = svsubr_z (p0, z1, z2)) ++ ++/* ++** subr_w0_s16_z_tied1: ++** mov (z[0-9]+\.h), w0 ++** movprfx z0\.h, p0/z, z0\.h ++** subr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (subr_w0_s16_z_tied1, svint16_t, int16_t, ++ z0 = svsubr_n_s16_z (p0, z0, x0), ++ z0 = svsubr_z (p0, z0, x0)) ++ ++/* ++** subr_w0_s16_z_untied: ++** mov (z[0-9]+\.h), w0 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** subr z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** sub z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (subr_w0_s16_z_untied, svint16_t, int16_t, ++ z0 = svsubr_n_s16_z (p0, z1, x0), ++ z0 = svsubr_z (p0, z1, x0)) ++ ++/* ++** subr_1_s16_z_tied1: ++** mov (z[0-9]+\.h), #1 ++** movprfx z0\.h, p0/z, z0\.h ++** subr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_s16_z_tied1, svint16_t, ++ z0 = svsubr_n_s16_z (p0, z0, 1), ++ z0 = svsubr_z (p0, z0, 1)) ++ ++/* ++** subr_1_s16_z_untied: ++** mov (z[0-9]+\.h), #1 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** subr z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** sub z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_s16_z_untied, svint16_t, ++ z0 = svsubr_n_s16_z (p0, z1, 1), ++ z0 = svsubr_z (p0, z1, 1)) ++ ++/* ++** subr_s16_x_tied1: ++** sub z0\.h, z1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (subr_s16_x_tied1, svint16_t, ++ z0 = svsubr_s16_x (p0, z0, z1), ++ z0 = svsubr_x (p0, z0, z1)) ++ ++/* ++** subr_s16_x_tied2: ++** sub z0\.h, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (subr_s16_x_tied2, svint16_t, ++ z0 = svsubr_s16_x (p0, z1, z0), ++ z0 = svsubr_x (p0, z1, z0)) ++ ++/* ++** subr_s16_x_untied: ++** sub z0\.h, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (subr_s16_x_untied, svint16_t, ++ z0 = svsubr_s16_x (p0, z1, z2), ++ z0 = svsubr_x (p0, z1, z2)) ++ ++/* ++** subr_w0_s16_x_tied1: ++** mov (z[0-9]+\.h), w0 ++** sub z0\.h, \1, z0\.h ++** ret ++*/ ++TEST_UNIFORM_ZX (subr_w0_s16_x_tied1, svint16_t, int16_t, ++ z0 = svsubr_n_s16_x (p0, z0, x0), ++ z0 = svsubr_x (p0, z0, x0)) ++ ++/* ++** subr_w0_s16_x_untied: ++** mov (z[0-9]+\.h), w0 ++** sub z0\.h, \1, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZX (subr_w0_s16_x_untied, svint16_t, int16_t, ++ z0 = svsubr_n_s16_x (p0, z1, x0), ++ z0 = svsubr_x (p0, z1, x0)) ++ ++/* ++** subr_1_s16_x_tied1: ++** subr z0\.h, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_s16_x_tied1, svint16_t, ++ z0 = svsubr_n_s16_x (p0, z0, 1), ++ z0 = svsubr_x (p0, z0, 1)) ++ ++/* ++** subr_1_s16_x_untied: ++** movprfx z0, z1 ++** subr z0\.h, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_s16_x_untied, svint16_t, ++ z0 = svsubr_n_s16_x (p0, z1, 1), ++ z0 = svsubr_x (p0, z1, 1)) ++ ++/* ++** subr_127_s16_x: ++** subr z0\.h, z0\.h, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_127_s16_x, svint16_t, ++ z0 = svsubr_n_s16_x (p0, z0, 127), ++ z0 = svsubr_x (p0, z0, 127)) ++ ++/* ++** subr_128_s16_x: ++** subr z0\.h, z0\.h, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_128_s16_x, svint16_t, ++ z0 = svsubr_n_s16_x (p0, z0, 128), ++ z0 = svsubr_x (p0, z0, 128)) ++ ++/* ++** subr_255_s16_x: ++** subr z0\.h, z0\.h, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_255_s16_x, svint16_t, ++ z0 = svsubr_n_s16_x (p0, z0, 255), ++ z0 = svsubr_x (p0, z0, 255)) ++ ++/* ++** subr_256_s16_x: ++** subr z0\.h, z0\.h, #256 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_256_s16_x, svint16_t, ++ z0 = svsubr_n_s16_x (p0, z0, 256), ++ z0 = svsubr_x (p0, z0, 256)) ++ ++/* ++** subr_257_s16_x: ++** mov (z[0-9]+)\.b, #1 ++** sub z0\.h, \1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (subr_257_s16_x, svint16_t, ++ z0 = svsubr_n_s16_x (p0, z0, 257), ++ z0 = svsubr_x (p0, z0, 257)) ++ ++/* ++** subr_512_s16_x: ++** subr z0\.h, z0\.h, #512 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_512_s16_x, svint16_t, ++ z0 = svsubr_n_s16_x (p0, z0, 512), ++ z0 = svsubr_x (p0, z0, 512)) ++ ++/* ++** subr_65280_s16_x: ++** subr z0\.h, z0\.h, #65280 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_65280_s16_x, svint16_t, ++ z0 = svsubr_n_s16_x (p0, z0, 0xff00), ++ z0 = svsubr_x (p0, z0, 0xff00)) ++ ++/* ++** subr_m1_s16_x_tied1: ++** mov (z[0-9]+)\.b, #-1 ++** sub z0\.h, \1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m1_s16_x_tied1, svint16_t, ++ z0 = svsubr_n_s16_x (p0, z0, -1), ++ z0 = svsubr_x (p0, z0, -1)) ++ ++/* ++** subr_m1_s16_x_untied: ++** mov (z[0-9]+)\.b, #-1 ++** sub z0\.h, \1\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m1_s16_x_untied, svint16_t, ++ z0 = svsubr_n_s16_x (p0, z1, -1), ++ z0 = svsubr_x (p0, z1, -1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_s32.c +new file mode 100644 +index 000000000..ce62e2f21 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_s32.c +@@ -0,0 +1,344 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** subr_s32_m_tied1: ++** subr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (subr_s32_m_tied1, svint32_t, ++ z0 = svsubr_s32_m (p0, z0, z1), ++ z0 = svsubr_m (p0, z0, z1)) ++ ++/* ++** subr_s32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** subr z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (subr_s32_m_tied2, svint32_t, ++ z0 = svsubr_s32_m (p0, z1, z0), ++ z0 = svsubr_m (p0, z1, z0)) ++ ++/* ++** subr_s32_m_untied: ++** movprfx z0, z1 ++** subr z0\.s, p0/m, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (subr_s32_m_untied, svint32_t, ++ z0 = svsubr_s32_m (p0, z1, z2), ++ z0 = svsubr_m (p0, z1, z2)) ++ ++/* ++** subr_w0_s32_m_tied1: ++** mov (z[0-9]+\.s), w0 ++** subr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (subr_w0_s32_m_tied1, svint32_t, int32_t, ++ z0 = svsubr_n_s32_m (p0, z0, x0), ++ z0 = svsubr_m (p0, z0, x0)) ++ ++/* ++** subr_w0_s32_m_untied: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0, z1 ++** subr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (subr_w0_s32_m_untied, svint32_t, int32_t, ++ z0 = svsubr_n_s32_m (p0, z1, x0), ++ z0 = svsubr_m (p0, z1, x0)) ++ ++/* ++** subr_1_s32_m_tied1: ++** mov (z[0-9]+\.s), #1 ++** subr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_s32_m_tied1, svint32_t, ++ z0 = svsubr_n_s32_m (p0, z0, 1), ++ z0 = svsubr_m (p0, z0, 1)) ++ ++/* ++** subr_1_s32_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.s), #1 ++** movprfx z0, z1 ++** subr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_s32_m_untied, svint32_t, ++ z0 = svsubr_n_s32_m (p0, z1, 1), ++ z0 = svsubr_m (p0, z1, 1)) ++ ++/* ++** subr_m2_s32_m: ++** mov (z[0-9]+\.s), #-2 ++** subr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m2_s32_m, svint32_t, ++ z0 = svsubr_n_s32_m (p0, z0, -2), ++ z0 = svsubr_m (p0, z0, -2)) ++ ++/* ++** subr_s32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** subr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (subr_s32_z_tied1, svint32_t, ++ z0 = svsubr_s32_z (p0, z0, z1), ++ z0 = svsubr_z (p0, z0, z1)) ++ ++/* ++** subr_s32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** sub z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (subr_s32_z_tied2, svint32_t, ++ z0 = svsubr_s32_z (p0, z1, z0), ++ z0 = svsubr_z (p0, z1, z0)) ++ ++/* ++** subr_s32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** subr z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** sub z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (subr_s32_z_untied, svint32_t, ++ z0 = svsubr_s32_z (p0, z1, z2), ++ z0 = svsubr_z (p0, z1, z2)) ++ ++/* ++** subr_w0_s32_z_tied1: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0\.s, p0/z, z0\.s ++** subr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (subr_w0_s32_z_tied1, svint32_t, int32_t, ++ z0 = svsubr_n_s32_z (p0, z0, x0), ++ z0 = svsubr_z (p0, z0, x0)) ++ ++/* ++** subr_w0_s32_z_untied: ++** mov (z[0-9]+\.s), w0 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** subr z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** sub z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (subr_w0_s32_z_untied, svint32_t, int32_t, ++ z0 = svsubr_n_s32_z (p0, z1, x0), ++ z0 = svsubr_z (p0, z1, x0)) ++ ++/* ++** subr_1_s32_z_tied1: ++** mov (z[0-9]+\.s), #1 ++** movprfx z0\.s, p0/z, z0\.s ++** subr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_s32_z_tied1, svint32_t, ++ z0 = svsubr_n_s32_z (p0, z0, 1), ++ z0 = svsubr_z (p0, z0, 1)) ++ ++/* ++** subr_1_s32_z_untied: ++** mov (z[0-9]+\.s), #1 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** subr z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** sub z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_s32_z_untied, svint32_t, ++ z0 = svsubr_n_s32_z (p0, z1, 1), ++ z0 = svsubr_z (p0, z1, 1)) ++ ++/* ++** subr_s32_x_tied1: ++** sub z0\.s, z1\.s, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (subr_s32_x_tied1, svint32_t, ++ z0 = svsubr_s32_x (p0, z0, z1), ++ z0 = svsubr_x (p0, z0, z1)) ++ ++/* ++** subr_s32_x_tied2: ++** sub z0\.s, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (subr_s32_x_tied2, svint32_t, ++ z0 = svsubr_s32_x (p0, z1, z0), ++ z0 = svsubr_x (p0, z1, z0)) ++ ++/* ++** subr_s32_x_untied: ++** sub z0\.s, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (subr_s32_x_untied, svint32_t, ++ z0 = svsubr_s32_x (p0, z1, z2), ++ z0 = svsubr_x (p0, z1, z2)) ++ ++/* ++** subr_w0_s32_x_tied1: ++** mov (z[0-9]+\.s), w0 ++** sub z0\.s, \1, z0\.s ++** ret ++*/ ++TEST_UNIFORM_ZX (subr_w0_s32_x_tied1, svint32_t, int32_t, ++ z0 = svsubr_n_s32_x (p0, z0, x0), ++ z0 = svsubr_x (p0, z0, x0)) ++ ++/* ++** subr_w0_s32_x_untied: ++** mov (z[0-9]+\.s), w0 ++** sub z0\.s, \1, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZX (subr_w0_s32_x_untied, svint32_t, int32_t, ++ z0 = svsubr_n_s32_x (p0, z1, x0), ++ z0 = svsubr_x (p0, z1, x0)) ++ ++/* ++** subr_1_s32_x_tied1: ++** subr z0\.s, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_s32_x_tied1, svint32_t, ++ z0 = svsubr_n_s32_x (p0, z0, 1), ++ z0 = svsubr_x (p0, z0, 1)) ++ ++/* ++** subr_1_s32_x_untied: ++** movprfx z0, z1 ++** subr z0\.s, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_s32_x_untied, svint32_t, ++ z0 = svsubr_n_s32_x (p0, z1, 1), ++ z0 = svsubr_x (p0, z1, 1)) ++ ++/* ++** subr_127_s32_x: ++** subr z0\.s, z0\.s, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_127_s32_x, svint32_t, ++ z0 = svsubr_n_s32_x (p0, z0, 127), ++ z0 = svsubr_x (p0, z0, 127)) ++ ++/* ++** subr_128_s32_x: ++** subr z0\.s, z0\.s, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_128_s32_x, svint32_t, ++ z0 = svsubr_n_s32_x (p0, z0, 128), ++ z0 = svsubr_x (p0, z0, 128)) ++ ++/* ++** subr_255_s32_x: ++** subr z0\.s, z0\.s, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_255_s32_x, svint32_t, ++ z0 = svsubr_n_s32_x (p0, z0, 255), ++ z0 = svsubr_x (p0, z0, 255)) ++ ++/* ++** subr_256_s32_x: ++** subr z0\.s, z0\.s, #256 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_256_s32_x, svint32_t, ++ z0 = svsubr_n_s32_x (p0, z0, 256), ++ z0 = svsubr_x (p0, z0, 256)) ++ ++/* ++** subr_511_s32_x: ++** mov (z[0-9]+\.s), #511 ++** sub z0\.s, \1, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (subr_511_s32_x, svint32_t, ++ z0 = svsubr_n_s32_x (p0, z0, 511), ++ z0 = svsubr_x (p0, z0, 511)) ++ ++/* ++** subr_512_s32_x: ++** subr z0\.s, z0\.s, #512 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_512_s32_x, svint32_t, ++ z0 = svsubr_n_s32_x (p0, z0, 512), ++ z0 = svsubr_x (p0, z0, 512)) ++ ++/* ++** subr_65280_s32_x: ++** subr z0\.s, z0\.s, #65280 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_65280_s32_x, svint32_t, ++ z0 = svsubr_n_s32_x (p0, z0, 0xff00), ++ z0 = svsubr_x (p0, z0, 0xff00)) ++ ++/* ++** subr_65535_s32_x: ++** mov (z[0-9]+\.s), #65535 ++** sub z0\.s, \1, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (subr_65535_s32_x, svint32_t, ++ z0 = svsubr_n_s32_x (p0, z0, 65535), ++ z0 = svsubr_x (p0, z0, 65535)) ++ ++/* ++** subr_65536_s32_x: ++** mov (z[0-9]+\.s), #65536 ++** sub z0\.s, \1, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (subr_65536_s32_x, svint32_t, ++ z0 = svsubr_n_s32_x (p0, z0, 65536), ++ z0 = svsubr_x (p0, z0, 65536)) ++ ++/* ++** subr_m1_s32_x_tied1: ++** mov (z[0-9]+)\.b, #-1 ++** sub z0\.s, \1\.s, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m1_s32_x_tied1, svint32_t, ++ z0 = svsubr_n_s32_x (p0, z0, -1), ++ z0 = svsubr_x (p0, z0, -1)) ++ ++/* ++** subr_m1_s32_x_untied: ++** mov (z[0-9]+)\.b, #-1 ++** sub z0\.s, \1\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m1_s32_x_untied, svint32_t, ++ z0 = svsubr_n_s32_x (p0, z1, -1), ++ z0 = svsubr_x (p0, z1, -1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_s64.c +new file mode 100644 +index 000000000..ada9e977c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_s64.c +@@ -0,0 +1,344 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** subr_s64_m_tied1: ++** subr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (subr_s64_m_tied1, svint64_t, ++ z0 = svsubr_s64_m (p0, z0, z1), ++ z0 = svsubr_m (p0, z0, z1)) ++ ++/* ++** subr_s64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** subr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_s64_m_tied2, svint64_t, ++ z0 = svsubr_s64_m (p0, z1, z0), ++ z0 = svsubr_m (p0, z1, z0)) ++ ++/* ++** subr_s64_m_untied: ++** movprfx z0, z1 ++** subr z0\.d, p0/m, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (subr_s64_m_untied, svint64_t, ++ z0 = svsubr_s64_m (p0, z1, z2), ++ z0 = svsubr_m (p0, z1, z2)) ++ ++/* ++** subr_x0_s64_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** subr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (subr_x0_s64_m_tied1, svint64_t, int64_t, ++ z0 = svsubr_n_s64_m (p0, z0, x0), ++ z0 = svsubr_m (p0, z0, x0)) ++ ++/* ++** subr_x0_s64_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** subr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (subr_x0_s64_m_untied, svint64_t, int64_t, ++ z0 = svsubr_n_s64_m (p0, z1, x0), ++ z0 = svsubr_m (p0, z1, x0)) ++ ++/* ++** subr_1_s64_m_tied1: ++** mov (z[0-9]+\.d), #1 ++** subr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_s64_m_tied1, svint64_t, ++ z0 = svsubr_n_s64_m (p0, z0, 1), ++ z0 = svsubr_m (p0, z0, 1)) ++ ++/* ++** subr_1_s64_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), #1 ++** movprfx z0, z1 ++** subr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_s64_m_untied, svint64_t, ++ z0 = svsubr_n_s64_m (p0, z1, 1), ++ z0 = svsubr_m (p0, z1, 1)) ++ ++/* ++** subr_m2_s64_m: ++** mov (z[0-9]+\.d), #-2 ++** subr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m2_s64_m, svint64_t, ++ z0 = svsubr_n_s64_m (p0, z0, -2), ++ z0 = svsubr_m (p0, z0, -2)) ++ ++/* ++** subr_s64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** subr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (subr_s64_z_tied1, svint64_t, ++ z0 = svsubr_s64_z (p0, z0, z1), ++ z0 = svsubr_z (p0, z0, z1)) ++ ++/* ++** subr_s64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** sub z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (subr_s64_z_tied2, svint64_t, ++ z0 = svsubr_s64_z (p0, z1, z0), ++ z0 = svsubr_z (p0, z1, z0)) ++ ++/* ++** subr_s64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** subr z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** sub z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (subr_s64_z_untied, svint64_t, ++ z0 = svsubr_s64_z (p0, z1, z2), ++ z0 = svsubr_z (p0, z1, z2)) ++ ++/* ++** subr_x0_s64_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.d, p0/z, z0\.d ++** subr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (subr_x0_s64_z_tied1, svint64_t, int64_t, ++ z0 = svsubr_n_s64_z (p0, z0, x0), ++ z0 = svsubr_z (p0, z0, x0)) ++ ++/* ++** subr_x0_s64_z_untied: ++** mov (z[0-9]+\.d), x0 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** subr z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** sub z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (subr_x0_s64_z_untied, svint64_t, int64_t, ++ z0 = svsubr_n_s64_z (p0, z1, x0), ++ z0 = svsubr_z (p0, z1, x0)) ++ ++/* ++** subr_1_s64_z_tied1: ++** mov (z[0-9]+\.d), #1 ++** movprfx z0\.d, p0/z, z0\.d ++** subr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_s64_z_tied1, svint64_t, ++ z0 = svsubr_n_s64_z (p0, z0, 1), ++ z0 = svsubr_z (p0, z0, 1)) ++ ++/* ++** subr_1_s64_z_untied: ++** mov (z[0-9]+\.d), #1 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** subr z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** sub z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_s64_z_untied, svint64_t, ++ z0 = svsubr_n_s64_z (p0, z1, 1), ++ z0 = svsubr_z (p0, z1, 1)) ++ ++/* ++** subr_s64_x_tied1: ++** sub z0\.d, z1\.d, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (subr_s64_x_tied1, svint64_t, ++ z0 = svsubr_s64_x (p0, z0, z1), ++ z0 = svsubr_x (p0, z0, z1)) ++ ++/* ++** subr_s64_x_tied2: ++** sub z0\.d, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (subr_s64_x_tied2, svint64_t, ++ z0 = svsubr_s64_x (p0, z1, z0), ++ z0 = svsubr_x (p0, z1, z0)) ++ ++/* ++** subr_s64_x_untied: ++** sub z0\.d, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (subr_s64_x_untied, svint64_t, ++ z0 = svsubr_s64_x (p0, z1, z2), ++ z0 = svsubr_x (p0, z1, z2)) ++ ++/* ++** subr_x0_s64_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** sub z0\.d, \1, z0\.d ++** ret ++*/ ++TEST_UNIFORM_ZX (subr_x0_s64_x_tied1, svint64_t, int64_t, ++ z0 = svsubr_n_s64_x (p0, z0, x0), ++ z0 = svsubr_x (p0, z0, x0)) ++ ++/* ++** subr_x0_s64_x_untied: ++** mov (z[0-9]+\.d), x0 ++** sub z0\.d, \1, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZX (subr_x0_s64_x_untied, svint64_t, int64_t, ++ z0 = svsubr_n_s64_x (p0, z1, x0), ++ z0 = svsubr_x (p0, z1, x0)) ++ ++/* ++** subr_1_s64_x_tied1: ++** subr z0\.d, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_s64_x_tied1, svint64_t, ++ z0 = svsubr_n_s64_x (p0, z0, 1), ++ z0 = svsubr_x (p0, z0, 1)) ++ ++/* ++** subr_1_s64_x_untied: ++** movprfx z0, z1 ++** subr z0\.d, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_s64_x_untied, svint64_t, ++ z0 = svsubr_n_s64_x (p0, z1, 1), ++ z0 = svsubr_x (p0, z1, 1)) ++ ++/* ++** subr_127_s64_x: ++** subr z0\.d, z0\.d, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_127_s64_x, svint64_t, ++ z0 = svsubr_n_s64_x (p0, z0, 127), ++ z0 = svsubr_x (p0, z0, 127)) ++ ++/* ++** subr_128_s64_x: ++** subr z0\.d, z0\.d, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_128_s64_x, svint64_t, ++ z0 = svsubr_n_s64_x (p0, z0, 128), ++ z0 = svsubr_x (p0, z0, 128)) ++ ++/* ++** subr_255_s64_x: ++** subr z0\.d, z0\.d, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_255_s64_x, svint64_t, ++ z0 = svsubr_n_s64_x (p0, z0, 255), ++ z0 = svsubr_x (p0, z0, 255)) ++ ++/* ++** subr_256_s64_x: ++** subr z0\.d, z0\.d, #256 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_256_s64_x, svint64_t, ++ z0 = svsubr_n_s64_x (p0, z0, 256), ++ z0 = svsubr_x (p0, z0, 256)) ++ ++/* ++** subr_511_s64_x: ++** mov (z[0-9]+\.d), #511 ++** sub z0\.d, \1, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (subr_511_s64_x, svint64_t, ++ z0 = svsubr_n_s64_x (p0, z0, 511), ++ z0 = svsubr_x (p0, z0, 511)) ++ ++/* ++** subr_512_s64_x: ++** subr z0\.d, z0\.d, #512 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_512_s64_x, svint64_t, ++ z0 = svsubr_n_s64_x (p0, z0, 512), ++ z0 = svsubr_x (p0, z0, 512)) ++ ++/* ++** subr_65280_s64_x: ++** subr z0\.d, z0\.d, #65280 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_65280_s64_x, svint64_t, ++ z0 = svsubr_n_s64_x (p0, z0, 0xff00), ++ z0 = svsubr_x (p0, z0, 0xff00)) ++ ++/* ++** subr_65535_s64_x: ++** mov (z[0-9]+\.d), #65535 ++** sub z0\.d, \1, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (subr_65535_s64_x, svint64_t, ++ z0 = svsubr_n_s64_x (p0, z0, 65535), ++ z0 = svsubr_x (p0, z0, 65535)) ++ ++/* ++** subr_65536_s64_x: ++** mov (z[0-9]+\.d), #65536 ++** sub z0\.d, \1, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (subr_65536_s64_x, svint64_t, ++ z0 = svsubr_n_s64_x (p0, z0, 65536), ++ z0 = svsubr_x (p0, z0, 65536)) ++ ++/* ++** subr_m1_s64_x_tied1: ++** mov (z[0-9]+)\.b, #-1 ++** sub z0\.d, \1\.d, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m1_s64_x_tied1, svint64_t, ++ z0 = svsubr_n_s64_x (p0, z0, -1), ++ z0 = svsubr_x (p0, z0, -1)) ++ ++/* ++** subr_m1_s64_x_untied: ++** mov (z[0-9]+)\.b, #-1 ++** sub z0\.d, \1\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m1_s64_x_untied, svint64_t, ++ z0 = svsubr_n_s64_x (p0, z1, -1), ++ z0 = svsubr_x (p0, z1, -1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_s8.c +new file mode 100644 +index 000000000..90d2a6de9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_s8.c +@@ -0,0 +1,294 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** subr_s8_m_tied1: ++** subr z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (subr_s8_m_tied1, svint8_t, ++ z0 = svsubr_s8_m (p0, z0, z1), ++ z0 = svsubr_m (p0, z0, z1)) ++ ++/* ++** subr_s8_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** subr z0\.b, p0/m, z0\.b, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (subr_s8_m_tied2, svint8_t, ++ z0 = svsubr_s8_m (p0, z1, z0), ++ z0 = svsubr_m (p0, z1, z0)) ++ ++/* ++** subr_s8_m_untied: ++** movprfx z0, z1 ++** subr z0\.b, p0/m, z0\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (subr_s8_m_untied, svint8_t, ++ z0 = svsubr_s8_m (p0, z1, z2), ++ z0 = svsubr_m (p0, z1, z2)) ++ ++/* ++** subr_w0_s8_m_tied1: ++** mov (z[0-9]+\.b), w0 ++** subr z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (subr_w0_s8_m_tied1, svint8_t, int8_t, ++ z0 = svsubr_n_s8_m (p0, z0, x0), ++ z0 = svsubr_m (p0, z0, x0)) ++ ++/* ++** subr_w0_s8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), w0 ++** movprfx z0, z1 ++** subr z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (subr_w0_s8_m_untied, svint8_t, int8_t, ++ z0 = svsubr_n_s8_m (p0, z1, x0), ++ z0 = svsubr_m (p0, z1, x0)) ++ ++/* ++** subr_1_s8_m_tied1: ++** mov (z[0-9]+\.b), #1 ++** subr z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_s8_m_tied1, svint8_t, ++ z0 = svsubr_n_s8_m (p0, z0, 1), ++ z0 = svsubr_m (p0, z0, 1)) ++ ++/* ++** subr_1_s8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), #1 ++** movprfx z0, z1 ++** subr z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_s8_m_untied, svint8_t, ++ z0 = svsubr_n_s8_m (p0, z1, 1), ++ z0 = svsubr_m (p0, z1, 1)) ++ ++/* ++** subr_m1_s8_m: ++** mov (z[0-9]+\.b), #-1 ++** subr z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m1_s8_m, svint8_t, ++ z0 = svsubr_n_s8_m (p0, z0, -1), ++ z0 = svsubr_m (p0, z0, -1)) ++ ++/* ++** subr_s8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** subr z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (subr_s8_z_tied1, svint8_t, ++ z0 = svsubr_s8_z (p0, z0, z1), ++ z0 = svsubr_z (p0, z0, z1)) ++ ++/* ++** subr_s8_z_tied2: ++** movprfx z0\.b, p0/z, z0\.b ++** sub z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (subr_s8_z_tied2, svint8_t, ++ z0 = svsubr_s8_z (p0, z1, z0), ++ z0 = svsubr_z (p0, z1, z0)) ++ ++/* ++** subr_s8_z_untied: ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** subr z0\.b, p0/m, z0\.b, z2\.b ++** | ++** movprfx z0\.b, p0/z, z2\.b ++** sub z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (subr_s8_z_untied, svint8_t, ++ z0 = svsubr_s8_z (p0, z1, z2), ++ z0 = svsubr_z (p0, z1, z2)) ++ ++/* ++** subr_w0_s8_z_tied1: ++** mov (z[0-9]+\.b), w0 ++** movprfx z0\.b, p0/z, z0\.b ++** subr z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (subr_w0_s8_z_tied1, svint8_t, int8_t, ++ z0 = svsubr_n_s8_z (p0, z0, x0), ++ z0 = svsubr_z (p0, z0, x0)) ++ ++/* ++** subr_w0_s8_z_untied: ++** mov (z[0-9]+\.b), w0 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** subr z0\.b, p0/m, z0\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** sub z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (subr_w0_s8_z_untied, svint8_t, int8_t, ++ z0 = svsubr_n_s8_z (p0, z1, x0), ++ z0 = svsubr_z (p0, z1, x0)) ++ ++/* ++** subr_1_s8_z_tied1: ++** mov (z[0-9]+\.b), #1 ++** movprfx z0\.b, p0/z, z0\.b ++** subr z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_s8_z_tied1, svint8_t, ++ z0 = svsubr_n_s8_z (p0, z0, 1), ++ z0 = svsubr_z (p0, z0, 1)) ++ ++/* ++** subr_1_s8_z_untied: ++** mov (z[0-9]+\.b), #1 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** subr z0\.b, p0/m, z0\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** sub z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_s8_z_untied, svint8_t, ++ z0 = svsubr_n_s8_z (p0, z1, 1), ++ z0 = svsubr_z (p0, z1, 1)) ++ ++/* ++** subr_s8_x_tied1: ++** sub z0\.b, z1\.b, z0\.b ++** ret ++*/ ++TEST_UNIFORM_Z (subr_s8_x_tied1, svint8_t, ++ z0 = svsubr_s8_x (p0, z0, z1), ++ z0 = svsubr_x (p0, z0, z1)) ++ ++/* ++** subr_s8_x_tied2: ++** sub z0\.b, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (subr_s8_x_tied2, svint8_t, ++ z0 = svsubr_s8_x (p0, z1, z0), ++ z0 = svsubr_x (p0, z1, z0)) ++ ++/* ++** subr_s8_x_untied: ++** sub z0\.b, z2\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (subr_s8_x_untied, svint8_t, ++ z0 = svsubr_s8_x (p0, z1, z2), ++ z0 = svsubr_x (p0, z1, z2)) ++ ++/* ++** subr_w0_s8_x_tied1: ++** mov (z[0-9]+\.b), w0 ++** sub z0\.b, \1, z0\.b ++** ret ++*/ ++TEST_UNIFORM_ZX (subr_w0_s8_x_tied1, svint8_t, int8_t, ++ z0 = svsubr_n_s8_x (p0, z0, x0), ++ z0 = svsubr_x (p0, z0, x0)) ++ ++/* ++** subr_w0_s8_x_untied: ++** mov (z[0-9]+\.b), w0 ++** sub z0\.b, \1, z1\.b ++** ret ++*/ ++TEST_UNIFORM_ZX (subr_w0_s8_x_untied, svint8_t, int8_t, ++ z0 = svsubr_n_s8_x (p0, z1, x0), ++ z0 = svsubr_x (p0, z1, x0)) ++ ++/* ++** subr_1_s8_x_tied1: ++** subr z0\.b, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_s8_x_tied1, svint8_t, ++ z0 = svsubr_n_s8_x (p0, z0, 1), ++ z0 = svsubr_x (p0, z0, 1)) ++ ++/* ++** subr_1_s8_x_untied: ++** movprfx z0, z1 ++** subr z0\.b, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_s8_x_untied, svint8_t, ++ z0 = svsubr_n_s8_x (p0, z1, 1), ++ z0 = svsubr_x (p0, z1, 1)) ++ ++/* ++** subr_127_s8_x: ++** subr z0\.b, z0\.b, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_127_s8_x, svint8_t, ++ z0 = svsubr_n_s8_x (p0, z0, 127), ++ z0 = svsubr_x (p0, z0, 127)) ++ ++/* ++** subr_128_s8_x: ++** subr z0\.b, z0\.b, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_128_s8_x, svint8_t, ++ z0 = svsubr_n_s8_x (p0, z0, 128), ++ z0 = svsubr_x (p0, z0, 128)) ++ ++/* ++** subr_255_s8_x: ++** subr z0\.b, z0\.b, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_255_s8_x, svint8_t, ++ z0 = svsubr_n_s8_x (p0, z0, 255), ++ z0 = svsubr_x (p0, z0, 255)) ++ ++/* ++** subr_m1_s8_x: ++** subr z0\.b, z0\.b, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m1_s8_x, svint8_t, ++ z0 = svsubr_n_s8_x (p0, z0, -1), ++ z0 = svsubr_x (p0, z0, -1)) ++ ++/* ++** subr_m127_s8_x: ++** subr z0\.b, z0\.b, #129 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m127_s8_x, svint8_t, ++ z0 = svsubr_n_s8_x (p0, z0, -127), ++ z0 = svsubr_x (p0, z0, -127)) ++ ++/* ++** subr_m128_s8_x: ++** subr z0\.b, z0\.b, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m128_s8_x, svint8_t, ++ z0 = svsubr_n_s8_x (p0, z0, -128), ++ z0 = svsubr_x (p0, z0, -128)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_u16.c +new file mode 100644 +index 000000000..379a80fb1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_u16.c +@@ -0,0 +1,324 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** subr_u16_m_tied1: ++** subr z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (subr_u16_m_tied1, svuint16_t, ++ z0 = svsubr_u16_m (p0, z0, z1), ++ z0 = svsubr_m (p0, z0, z1)) ++ ++/* ++** subr_u16_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** subr z0\.h, p0/m, z0\.h, \1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (subr_u16_m_tied2, svuint16_t, ++ z0 = svsubr_u16_m (p0, z1, z0), ++ z0 = svsubr_m (p0, z1, z0)) ++ ++/* ++** subr_u16_m_untied: ++** movprfx z0, z1 ++** subr z0\.h, p0/m, z0\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (subr_u16_m_untied, svuint16_t, ++ z0 = svsubr_u16_m (p0, z1, z2), ++ z0 = svsubr_m (p0, z1, z2)) ++ ++/* ++** subr_w0_u16_m_tied1: ++** mov (z[0-9]+\.h), w0 ++** subr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (subr_w0_u16_m_tied1, svuint16_t, uint16_t, ++ z0 = svsubr_n_u16_m (p0, z0, x0), ++ z0 = svsubr_m (p0, z0, x0)) ++ ++/* ++** subr_w0_u16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), w0 ++** movprfx z0, z1 ++** subr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (subr_w0_u16_m_untied, svuint16_t, uint16_t, ++ z0 = svsubr_n_u16_m (p0, z1, x0), ++ z0 = svsubr_m (p0, z1, x0)) ++ ++/* ++** subr_1_u16_m_tied1: ++** mov (z[0-9]+\.h), #1 ++** subr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_u16_m_tied1, svuint16_t, ++ z0 = svsubr_n_u16_m (p0, z0, 1), ++ z0 = svsubr_m (p0, z0, 1)) ++ ++/* ++** subr_1_u16_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.h), #1 ++** movprfx z0, z1 ++** subr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_u16_m_untied, svuint16_t, ++ z0 = svsubr_n_u16_m (p0, z1, 1), ++ z0 = svsubr_m (p0, z1, 1)) ++ ++/* ++** subr_m2_u16_m: ++** mov (z[0-9]+\.h), #-2 ++** subr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m2_u16_m, svuint16_t, ++ z0 = svsubr_n_u16_m (p0, z0, -2), ++ z0 = svsubr_m (p0, z0, -2)) ++ ++/* ++** subr_u16_z_tied1: ++** movprfx z0\.h, p0/z, z0\.h ++** subr z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (subr_u16_z_tied1, svuint16_t, ++ z0 = svsubr_u16_z (p0, z0, z1), ++ z0 = svsubr_z (p0, z0, z1)) ++ ++/* ++** subr_u16_z_tied2: ++** movprfx z0\.h, p0/z, z0\.h ++** sub z0\.h, p0/m, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (subr_u16_z_tied2, svuint16_t, ++ z0 = svsubr_u16_z (p0, z1, z0), ++ z0 = svsubr_z (p0, z1, z0)) ++ ++/* ++** subr_u16_z_untied: ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** subr z0\.h, p0/m, z0\.h, z2\.h ++** | ++** movprfx z0\.h, p0/z, z2\.h ++** sub z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (subr_u16_z_untied, svuint16_t, ++ z0 = svsubr_u16_z (p0, z1, z2), ++ z0 = svsubr_z (p0, z1, z2)) ++ ++/* ++** subr_w0_u16_z_tied1: ++** mov (z[0-9]+\.h), w0 ++** movprfx z0\.h, p0/z, z0\.h ++** subr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (subr_w0_u16_z_tied1, svuint16_t, uint16_t, ++ z0 = svsubr_n_u16_z (p0, z0, x0), ++ z0 = svsubr_z (p0, z0, x0)) ++ ++/* ++** subr_w0_u16_z_untied: ++** mov (z[0-9]+\.h), w0 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** subr z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** sub z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (subr_w0_u16_z_untied, svuint16_t, uint16_t, ++ z0 = svsubr_n_u16_z (p0, z1, x0), ++ z0 = svsubr_z (p0, z1, x0)) ++ ++/* ++** subr_1_u16_z_tied1: ++** mov (z[0-9]+\.h), #1 ++** movprfx z0\.h, p0/z, z0\.h ++** subr z0\.h, p0/m, z0\.h, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_u16_z_tied1, svuint16_t, ++ z0 = svsubr_n_u16_z (p0, z0, 1), ++ z0 = svsubr_z (p0, z0, 1)) ++ ++/* ++** subr_1_u16_z_untied: ++** mov (z[0-9]+\.h), #1 ++** ( ++** movprfx z0\.h, p0/z, z1\.h ++** subr z0\.h, p0/m, z0\.h, \1 ++** | ++** movprfx z0\.h, p0/z, \1 ++** sub z0\.h, p0/m, z0\.h, z1\.h ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_u16_z_untied, svuint16_t, ++ z0 = svsubr_n_u16_z (p0, z1, 1), ++ z0 = svsubr_z (p0, z1, 1)) ++ ++/* ++** subr_u16_x_tied1: ++** sub z0\.h, z1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (subr_u16_x_tied1, svuint16_t, ++ z0 = svsubr_u16_x (p0, z0, z1), ++ z0 = svsubr_x (p0, z0, z1)) ++ ++/* ++** subr_u16_x_tied2: ++** sub z0\.h, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (subr_u16_x_tied2, svuint16_t, ++ z0 = svsubr_u16_x (p0, z1, z0), ++ z0 = svsubr_x (p0, z1, z0)) ++ ++/* ++** subr_u16_x_untied: ++** sub z0\.h, z2\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (subr_u16_x_untied, svuint16_t, ++ z0 = svsubr_u16_x (p0, z1, z2), ++ z0 = svsubr_x (p0, z1, z2)) ++ ++/* ++** subr_w0_u16_x_tied1: ++** mov (z[0-9]+\.h), w0 ++** sub z0\.h, \1, z0\.h ++** ret ++*/ ++TEST_UNIFORM_ZX (subr_w0_u16_x_tied1, svuint16_t, uint16_t, ++ z0 = svsubr_n_u16_x (p0, z0, x0), ++ z0 = svsubr_x (p0, z0, x0)) ++ ++/* ++** subr_w0_u16_x_untied: ++** mov (z[0-9]+\.h), w0 ++** sub z0\.h, \1, z1\.h ++** ret ++*/ ++TEST_UNIFORM_ZX (subr_w0_u16_x_untied, svuint16_t, uint16_t, ++ z0 = svsubr_n_u16_x (p0, z1, x0), ++ z0 = svsubr_x (p0, z1, x0)) ++ ++/* ++** subr_1_u16_x_tied1: ++** subr z0\.h, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_u16_x_tied1, svuint16_t, ++ z0 = svsubr_n_u16_x (p0, z0, 1), ++ z0 = svsubr_x (p0, z0, 1)) ++ ++/* ++** subr_1_u16_x_untied: ++** movprfx z0, z1 ++** subr z0\.h, z0\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_u16_x_untied, svuint16_t, ++ z0 = svsubr_n_u16_x (p0, z1, 1), ++ z0 = svsubr_x (p0, z1, 1)) ++ ++/* ++** subr_127_u16_x: ++** subr z0\.h, z0\.h, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_127_u16_x, svuint16_t, ++ z0 = svsubr_n_u16_x (p0, z0, 127), ++ z0 = svsubr_x (p0, z0, 127)) ++ ++/* ++** subr_128_u16_x: ++** subr z0\.h, z0\.h, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_128_u16_x, svuint16_t, ++ z0 = svsubr_n_u16_x (p0, z0, 128), ++ z0 = svsubr_x (p0, z0, 128)) ++ ++/* ++** subr_255_u16_x: ++** subr z0\.h, z0\.h, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_255_u16_x, svuint16_t, ++ z0 = svsubr_n_u16_x (p0, z0, 255), ++ z0 = svsubr_x (p0, z0, 255)) ++ ++/* ++** subr_256_u16_x: ++** subr z0\.h, z0\.h, #256 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_256_u16_x, svuint16_t, ++ z0 = svsubr_n_u16_x (p0, z0, 256), ++ z0 = svsubr_x (p0, z0, 256)) ++ ++/* ++** subr_257_u16_x: ++** mov (z[0-9]+)\.b, #1 ++** sub z0\.h, \1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (subr_257_u16_x, svuint16_t, ++ z0 = svsubr_n_u16_x (p0, z0, 257), ++ z0 = svsubr_x (p0, z0, 257)) ++ ++/* ++** subr_512_u16_x: ++** subr z0\.h, z0\.h, #512 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_512_u16_x, svuint16_t, ++ z0 = svsubr_n_u16_x (p0, z0, 512), ++ z0 = svsubr_x (p0, z0, 512)) ++ ++/* ++** subr_65280_u16_x: ++** subr z0\.h, z0\.h, #65280 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_65280_u16_x, svuint16_t, ++ z0 = svsubr_n_u16_x (p0, z0, 0xff00), ++ z0 = svsubr_x (p0, z0, 0xff00)) ++ ++/* ++** subr_m1_u16_x_tied1: ++** mov (z[0-9]+)\.b, #-1 ++** sub z0\.h, \1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m1_u16_x_tied1, svuint16_t, ++ z0 = svsubr_n_u16_x (p0, z0, -1), ++ z0 = svsubr_x (p0, z0, -1)) ++ ++/* ++** subr_m1_u16_x_untied: ++** mov (z[0-9]+)\.b, #-1 ++** sub z0\.h, \1\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m1_u16_x_untied, svuint16_t, ++ z0 = svsubr_n_u16_x (p0, z1, -1), ++ z0 = svsubr_x (p0, z1, -1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_u32.c +new file mode 100644 +index 000000000..215f8b449 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_u32.c +@@ -0,0 +1,344 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** subr_u32_m_tied1: ++** subr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (subr_u32_m_tied1, svuint32_t, ++ z0 = svsubr_u32_m (p0, z0, z1), ++ z0 = svsubr_m (p0, z0, z1)) ++ ++/* ++** subr_u32_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** subr z0\.s, p0/m, z0\.s, \1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (subr_u32_m_tied2, svuint32_t, ++ z0 = svsubr_u32_m (p0, z1, z0), ++ z0 = svsubr_m (p0, z1, z0)) ++ ++/* ++** subr_u32_m_untied: ++** movprfx z0, z1 ++** subr z0\.s, p0/m, z0\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (subr_u32_m_untied, svuint32_t, ++ z0 = svsubr_u32_m (p0, z1, z2), ++ z0 = svsubr_m (p0, z1, z2)) ++ ++/* ++** subr_w0_u32_m_tied1: ++** mov (z[0-9]+\.s), w0 ++** subr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (subr_w0_u32_m_tied1, svuint32_t, uint32_t, ++ z0 = svsubr_n_u32_m (p0, z0, x0), ++ z0 = svsubr_m (p0, z0, x0)) ++ ++/* ++** subr_w0_u32_m_untied: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0, z1 ++** subr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (subr_w0_u32_m_untied, svuint32_t, uint32_t, ++ z0 = svsubr_n_u32_m (p0, z1, x0), ++ z0 = svsubr_m (p0, z1, x0)) ++ ++/* ++** subr_1_u32_m_tied1: ++** mov (z[0-9]+\.s), #1 ++** subr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_u32_m_tied1, svuint32_t, ++ z0 = svsubr_n_u32_m (p0, z0, 1), ++ z0 = svsubr_m (p0, z0, 1)) ++ ++/* ++** subr_1_u32_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.s), #1 ++** movprfx z0, z1 ++** subr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_u32_m_untied, svuint32_t, ++ z0 = svsubr_n_u32_m (p0, z1, 1), ++ z0 = svsubr_m (p0, z1, 1)) ++ ++/* ++** subr_m2_u32_m: ++** mov (z[0-9]+\.s), #-2 ++** subr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m2_u32_m, svuint32_t, ++ z0 = svsubr_n_u32_m (p0, z0, -2), ++ z0 = svsubr_m (p0, z0, -2)) ++ ++/* ++** subr_u32_z_tied1: ++** movprfx z0\.s, p0/z, z0\.s ++** subr z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (subr_u32_z_tied1, svuint32_t, ++ z0 = svsubr_u32_z (p0, z0, z1), ++ z0 = svsubr_z (p0, z0, z1)) ++ ++/* ++** subr_u32_z_tied2: ++** movprfx z0\.s, p0/z, z0\.s ++** sub z0\.s, p0/m, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (subr_u32_z_tied2, svuint32_t, ++ z0 = svsubr_u32_z (p0, z1, z0), ++ z0 = svsubr_z (p0, z1, z0)) ++ ++/* ++** subr_u32_z_untied: ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** subr z0\.s, p0/m, z0\.s, z2\.s ++** | ++** movprfx z0\.s, p0/z, z2\.s ++** sub z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (subr_u32_z_untied, svuint32_t, ++ z0 = svsubr_u32_z (p0, z1, z2), ++ z0 = svsubr_z (p0, z1, z2)) ++ ++/* ++** subr_w0_u32_z_tied1: ++** mov (z[0-9]+\.s), w0 ++** movprfx z0\.s, p0/z, z0\.s ++** subr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (subr_w0_u32_z_tied1, svuint32_t, uint32_t, ++ z0 = svsubr_n_u32_z (p0, z0, x0), ++ z0 = svsubr_z (p0, z0, x0)) ++ ++/* ++** subr_w0_u32_z_untied: ++** mov (z[0-9]+\.s), w0 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** subr z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** sub z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (subr_w0_u32_z_untied, svuint32_t, uint32_t, ++ z0 = svsubr_n_u32_z (p0, z1, x0), ++ z0 = svsubr_z (p0, z1, x0)) ++ ++/* ++** subr_1_u32_z_tied1: ++** mov (z[0-9]+\.s), #1 ++** movprfx z0\.s, p0/z, z0\.s ++** subr z0\.s, p0/m, z0\.s, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_u32_z_tied1, svuint32_t, ++ z0 = svsubr_n_u32_z (p0, z0, 1), ++ z0 = svsubr_z (p0, z0, 1)) ++ ++/* ++** subr_1_u32_z_untied: ++** mov (z[0-9]+\.s), #1 ++** ( ++** movprfx z0\.s, p0/z, z1\.s ++** subr z0\.s, p0/m, z0\.s, \1 ++** | ++** movprfx z0\.s, p0/z, \1 ++** sub z0\.s, p0/m, z0\.s, z1\.s ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_u32_z_untied, svuint32_t, ++ z0 = svsubr_n_u32_z (p0, z1, 1), ++ z0 = svsubr_z (p0, z1, 1)) ++ ++/* ++** subr_u32_x_tied1: ++** sub z0\.s, z1\.s, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (subr_u32_x_tied1, svuint32_t, ++ z0 = svsubr_u32_x (p0, z0, z1), ++ z0 = svsubr_x (p0, z0, z1)) ++ ++/* ++** subr_u32_x_tied2: ++** sub z0\.s, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (subr_u32_x_tied2, svuint32_t, ++ z0 = svsubr_u32_x (p0, z1, z0), ++ z0 = svsubr_x (p0, z1, z0)) ++ ++/* ++** subr_u32_x_untied: ++** sub z0\.s, z2\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (subr_u32_x_untied, svuint32_t, ++ z0 = svsubr_u32_x (p0, z1, z2), ++ z0 = svsubr_x (p0, z1, z2)) ++ ++/* ++** subr_w0_u32_x_tied1: ++** mov (z[0-9]+\.s), w0 ++** sub z0\.s, \1, z0\.s ++** ret ++*/ ++TEST_UNIFORM_ZX (subr_w0_u32_x_tied1, svuint32_t, uint32_t, ++ z0 = svsubr_n_u32_x (p0, z0, x0), ++ z0 = svsubr_x (p0, z0, x0)) ++ ++/* ++** subr_w0_u32_x_untied: ++** mov (z[0-9]+\.s), w0 ++** sub z0\.s, \1, z1\.s ++** ret ++*/ ++TEST_UNIFORM_ZX (subr_w0_u32_x_untied, svuint32_t, uint32_t, ++ z0 = svsubr_n_u32_x (p0, z1, x0), ++ z0 = svsubr_x (p0, z1, x0)) ++ ++/* ++** subr_1_u32_x_tied1: ++** subr z0\.s, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_u32_x_tied1, svuint32_t, ++ z0 = svsubr_n_u32_x (p0, z0, 1), ++ z0 = svsubr_x (p0, z0, 1)) ++ ++/* ++** subr_1_u32_x_untied: ++** movprfx z0, z1 ++** subr z0\.s, z0\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_u32_x_untied, svuint32_t, ++ z0 = svsubr_n_u32_x (p0, z1, 1), ++ z0 = svsubr_x (p0, z1, 1)) ++ ++/* ++** subr_127_u32_x: ++** subr z0\.s, z0\.s, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_127_u32_x, svuint32_t, ++ z0 = svsubr_n_u32_x (p0, z0, 127), ++ z0 = svsubr_x (p0, z0, 127)) ++ ++/* ++** subr_128_u32_x: ++** subr z0\.s, z0\.s, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_128_u32_x, svuint32_t, ++ z0 = svsubr_n_u32_x (p0, z0, 128), ++ z0 = svsubr_x (p0, z0, 128)) ++ ++/* ++** subr_255_u32_x: ++** subr z0\.s, z0\.s, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_255_u32_x, svuint32_t, ++ z0 = svsubr_n_u32_x (p0, z0, 255), ++ z0 = svsubr_x (p0, z0, 255)) ++ ++/* ++** subr_256_u32_x: ++** subr z0\.s, z0\.s, #256 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_256_u32_x, svuint32_t, ++ z0 = svsubr_n_u32_x (p0, z0, 256), ++ z0 = svsubr_x (p0, z0, 256)) ++ ++/* ++** subr_511_u32_x: ++** mov (z[0-9]+\.s), #511 ++** sub z0\.s, \1, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (subr_511_u32_x, svuint32_t, ++ z0 = svsubr_n_u32_x (p0, z0, 511), ++ z0 = svsubr_x (p0, z0, 511)) ++ ++/* ++** subr_512_u32_x: ++** subr z0\.s, z0\.s, #512 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_512_u32_x, svuint32_t, ++ z0 = svsubr_n_u32_x (p0, z0, 512), ++ z0 = svsubr_x (p0, z0, 512)) ++ ++/* ++** subr_65280_u32_x: ++** subr z0\.s, z0\.s, #65280 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_65280_u32_x, svuint32_t, ++ z0 = svsubr_n_u32_x (p0, z0, 0xff00), ++ z0 = svsubr_x (p0, z0, 0xff00)) ++ ++/* ++** subr_65535_u32_x: ++** mov (z[0-9]+\.s), #65535 ++** sub z0\.s, \1, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (subr_65535_u32_x, svuint32_t, ++ z0 = svsubr_n_u32_x (p0, z0, 65535), ++ z0 = svsubr_x (p0, z0, 65535)) ++ ++/* ++** subr_65536_u32_x: ++** mov (z[0-9]+\.s), #65536 ++** sub z0\.s, \1, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (subr_65536_u32_x, svuint32_t, ++ z0 = svsubr_n_u32_x (p0, z0, 65536), ++ z0 = svsubr_x (p0, z0, 65536)) ++ ++/* ++** subr_m1_u32_x_tied1: ++** mov (z[0-9]+)\.b, #-1 ++** sub z0\.s, \1\.s, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m1_u32_x_tied1, svuint32_t, ++ z0 = svsubr_n_u32_x (p0, z0, -1), ++ z0 = svsubr_x (p0, z0, -1)) ++ ++/* ++** subr_m1_u32_x_untied: ++** mov (z[0-9]+)\.b, #-1 ++** sub z0\.s, \1\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m1_u32_x_untied, svuint32_t, ++ z0 = svsubr_n_u32_x (p0, z1, -1), ++ z0 = svsubr_x (p0, z1, -1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_u64.c +new file mode 100644 +index 000000000..78d94515b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_u64.c +@@ -0,0 +1,344 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** subr_u64_m_tied1: ++** subr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (subr_u64_m_tied1, svuint64_t, ++ z0 = svsubr_u64_m (p0, z0, z1), ++ z0 = svsubr_m (p0, z0, z1)) ++ ++/* ++** subr_u64_m_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** subr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_u64_m_tied2, svuint64_t, ++ z0 = svsubr_u64_m (p0, z1, z0), ++ z0 = svsubr_m (p0, z1, z0)) ++ ++/* ++** subr_u64_m_untied: ++** movprfx z0, z1 ++** subr z0\.d, p0/m, z0\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (subr_u64_m_untied, svuint64_t, ++ z0 = svsubr_u64_m (p0, z1, z2), ++ z0 = svsubr_m (p0, z1, z2)) ++ ++/* ++** subr_x0_u64_m_tied1: ++** mov (z[0-9]+\.d), x0 ++** subr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (subr_x0_u64_m_tied1, svuint64_t, uint64_t, ++ z0 = svsubr_n_u64_m (p0, z0, x0), ++ z0 = svsubr_m (p0, z0, x0)) ++ ++/* ++** subr_x0_u64_m_untied: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0, z1 ++** subr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (subr_x0_u64_m_untied, svuint64_t, uint64_t, ++ z0 = svsubr_n_u64_m (p0, z1, x0), ++ z0 = svsubr_m (p0, z1, x0)) ++ ++/* ++** subr_1_u64_m_tied1: ++** mov (z[0-9]+\.d), #1 ++** subr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_u64_m_tied1, svuint64_t, ++ z0 = svsubr_n_u64_m (p0, z0, 1), ++ z0 = svsubr_m (p0, z0, 1)) ++ ++/* ++** subr_1_u64_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.d), #1 ++** movprfx z0, z1 ++** subr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_u64_m_untied, svuint64_t, ++ z0 = svsubr_n_u64_m (p0, z1, 1), ++ z0 = svsubr_m (p0, z1, 1)) ++ ++/* ++** subr_m2_u64_m: ++** mov (z[0-9]+\.d), #-2 ++** subr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m2_u64_m, svuint64_t, ++ z0 = svsubr_n_u64_m (p0, z0, -2), ++ z0 = svsubr_m (p0, z0, -2)) ++ ++/* ++** subr_u64_z_tied1: ++** movprfx z0\.d, p0/z, z0\.d ++** subr z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (subr_u64_z_tied1, svuint64_t, ++ z0 = svsubr_u64_z (p0, z0, z1), ++ z0 = svsubr_z (p0, z0, z1)) ++ ++/* ++** subr_u64_z_tied2: ++** movprfx z0\.d, p0/z, z0\.d ++** sub z0\.d, p0/m, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (subr_u64_z_tied2, svuint64_t, ++ z0 = svsubr_u64_z (p0, z1, z0), ++ z0 = svsubr_z (p0, z1, z0)) ++ ++/* ++** subr_u64_z_untied: ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** subr z0\.d, p0/m, z0\.d, z2\.d ++** | ++** movprfx z0\.d, p0/z, z2\.d ++** sub z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (subr_u64_z_untied, svuint64_t, ++ z0 = svsubr_u64_z (p0, z1, z2), ++ z0 = svsubr_z (p0, z1, z2)) ++ ++/* ++** subr_x0_u64_z_tied1: ++** mov (z[0-9]+\.d), x0 ++** movprfx z0\.d, p0/z, z0\.d ++** subr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (subr_x0_u64_z_tied1, svuint64_t, uint64_t, ++ z0 = svsubr_n_u64_z (p0, z0, x0), ++ z0 = svsubr_z (p0, z0, x0)) ++ ++/* ++** subr_x0_u64_z_untied: ++** mov (z[0-9]+\.d), x0 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** subr z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** sub z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (subr_x0_u64_z_untied, svuint64_t, uint64_t, ++ z0 = svsubr_n_u64_z (p0, z1, x0), ++ z0 = svsubr_z (p0, z1, x0)) ++ ++/* ++** subr_1_u64_z_tied1: ++** mov (z[0-9]+\.d), #1 ++** movprfx z0\.d, p0/z, z0\.d ++** subr z0\.d, p0/m, z0\.d, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_u64_z_tied1, svuint64_t, ++ z0 = svsubr_n_u64_z (p0, z0, 1), ++ z0 = svsubr_z (p0, z0, 1)) ++ ++/* ++** subr_1_u64_z_untied: ++** mov (z[0-9]+\.d), #1 ++** ( ++** movprfx z0\.d, p0/z, z1\.d ++** subr z0\.d, p0/m, z0\.d, \1 ++** | ++** movprfx z0\.d, p0/z, \1 ++** sub z0\.d, p0/m, z0\.d, z1\.d ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_u64_z_untied, svuint64_t, ++ z0 = svsubr_n_u64_z (p0, z1, 1), ++ z0 = svsubr_z (p0, z1, 1)) ++ ++/* ++** subr_u64_x_tied1: ++** sub z0\.d, z1\.d, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (subr_u64_x_tied1, svuint64_t, ++ z0 = svsubr_u64_x (p0, z0, z1), ++ z0 = svsubr_x (p0, z0, z1)) ++ ++/* ++** subr_u64_x_tied2: ++** sub z0\.d, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (subr_u64_x_tied2, svuint64_t, ++ z0 = svsubr_u64_x (p0, z1, z0), ++ z0 = svsubr_x (p0, z1, z0)) ++ ++/* ++** subr_u64_x_untied: ++** sub z0\.d, z2\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (subr_u64_x_untied, svuint64_t, ++ z0 = svsubr_u64_x (p0, z1, z2), ++ z0 = svsubr_x (p0, z1, z2)) ++ ++/* ++** subr_x0_u64_x_tied1: ++** mov (z[0-9]+\.d), x0 ++** sub z0\.d, \1, z0\.d ++** ret ++*/ ++TEST_UNIFORM_ZX (subr_x0_u64_x_tied1, svuint64_t, uint64_t, ++ z0 = svsubr_n_u64_x (p0, z0, x0), ++ z0 = svsubr_x (p0, z0, x0)) ++ ++/* ++** subr_x0_u64_x_untied: ++** mov (z[0-9]+\.d), x0 ++** sub z0\.d, \1, z1\.d ++** ret ++*/ ++TEST_UNIFORM_ZX (subr_x0_u64_x_untied, svuint64_t, uint64_t, ++ z0 = svsubr_n_u64_x (p0, z1, x0), ++ z0 = svsubr_x (p0, z1, x0)) ++ ++/* ++** subr_1_u64_x_tied1: ++** subr z0\.d, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_u64_x_tied1, svuint64_t, ++ z0 = svsubr_n_u64_x (p0, z0, 1), ++ z0 = svsubr_x (p0, z0, 1)) ++ ++/* ++** subr_1_u64_x_untied: ++** movprfx z0, z1 ++** subr z0\.d, z0\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_u64_x_untied, svuint64_t, ++ z0 = svsubr_n_u64_x (p0, z1, 1), ++ z0 = svsubr_x (p0, z1, 1)) ++ ++/* ++** subr_127_u64_x: ++** subr z0\.d, z0\.d, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_127_u64_x, svuint64_t, ++ z0 = svsubr_n_u64_x (p0, z0, 127), ++ z0 = svsubr_x (p0, z0, 127)) ++ ++/* ++** subr_128_u64_x: ++** subr z0\.d, z0\.d, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_128_u64_x, svuint64_t, ++ z0 = svsubr_n_u64_x (p0, z0, 128), ++ z0 = svsubr_x (p0, z0, 128)) ++ ++/* ++** subr_255_u64_x: ++** subr z0\.d, z0\.d, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_255_u64_x, svuint64_t, ++ z0 = svsubr_n_u64_x (p0, z0, 255), ++ z0 = svsubr_x (p0, z0, 255)) ++ ++/* ++** subr_256_u64_x: ++** subr z0\.d, z0\.d, #256 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_256_u64_x, svuint64_t, ++ z0 = svsubr_n_u64_x (p0, z0, 256), ++ z0 = svsubr_x (p0, z0, 256)) ++ ++/* ++** subr_511_u64_x: ++** mov (z[0-9]+\.d), #511 ++** sub z0\.d, \1, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (subr_511_u64_x, svuint64_t, ++ z0 = svsubr_n_u64_x (p0, z0, 511), ++ z0 = svsubr_x (p0, z0, 511)) ++ ++/* ++** subr_512_u64_x: ++** subr z0\.d, z0\.d, #512 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_512_u64_x, svuint64_t, ++ z0 = svsubr_n_u64_x (p0, z0, 512), ++ z0 = svsubr_x (p0, z0, 512)) ++ ++/* ++** subr_65280_u64_x: ++** subr z0\.d, z0\.d, #65280 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_65280_u64_x, svuint64_t, ++ z0 = svsubr_n_u64_x (p0, z0, 0xff00), ++ z0 = svsubr_x (p0, z0, 0xff00)) ++ ++/* ++** subr_65535_u64_x: ++** mov (z[0-9]+\.d), #65535 ++** sub z0\.d, \1, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (subr_65535_u64_x, svuint64_t, ++ z0 = svsubr_n_u64_x (p0, z0, 65535), ++ z0 = svsubr_x (p0, z0, 65535)) ++ ++/* ++** subr_65536_u64_x: ++** mov (z[0-9]+\.d), #65536 ++** sub z0\.d, \1, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (subr_65536_u64_x, svuint64_t, ++ z0 = svsubr_n_u64_x (p0, z0, 65536), ++ z0 = svsubr_x (p0, z0, 65536)) ++ ++/* ++** subr_m1_u64_x_tied1: ++** mov (z[0-9]+)\.b, #-1 ++** sub z0\.d, \1\.d, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m1_u64_x_tied1, svuint64_t, ++ z0 = svsubr_n_u64_x (p0, z0, -1), ++ z0 = svsubr_x (p0, z0, -1)) ++ ++/* ++** subr_m1_u64_x_untied: ++** mov (z[0-9]+)\.b, #-1 ++** sub z0\.d, \1\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m1_u64_x_untied, svuint64_t, ++ z0 = svsubr_n_u64_x (p0, z1, -1), ++ z0 = svsubr_x (p0, z1, -1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_u8.c +new file mode 100644 +index 000000000..fe5f96da8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_u8.c +@@ -0,0 +1,294 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** subr_u8_m_tied1: ++** subr z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (subr_u8_m_tied1, svuint8_t, ++ z0 = svsubr_u8_m (p0, z0, z1), ++ z0 = svsubr_m (p0, z0, z1)) ++ ++/* ++** subr_u8_m_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** subr z0\.b, p0/m, z0\.b, \1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (subr_u8_m_tied2, svuint8_t, ++ z0 = svsubr_u8_m (p0, z1, z0), ++ z0 = svsubr_m (p0, z1, z0)) ++ ++/* ++** subr_u8_m_untied: ++** movprfx z0, z1 ++** subr z0\.b, p0/m, z0\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (subr_u8_m_untied, svuint8_t, ++ z0 = svsubr_u8_m (p0, z1, z2), ++ z0 = svsubr_m (p0, z1, z2)) ++ ++/* ++** subr_w0_u8_m_tied1: ++** mov (z[0-9]+\.b), w0 ++** subr z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (subr_w0_u8_m_tied1, svuint8_t, uint8_t, ++ z0 = svsubr_n_u8_m (p0, z0, x0), ++ z0 = svsubr_m (p0, z0, x0)) ++ ++/* ++** subr_w0_u8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), w0 ++** movprfx z0, z1 ++** subr z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (subr_w0_u8_m_untied, svuint8_t, uint8_t, ++ z0 = svsubr_n_u8_m (p0, z1, x0), ++ z0 = svsubr_m (p0, z1, x0)) ++ ++/* ++** subr_1_u8_m_tied1: ++** mov (z[0-9]+\.b), #1 ++** subr z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_u8_m_tied1, svuint8_t, ++ z0 = svsubr_n_u8_m (p0, z0, 1), ++ z0 = svsubr_m (p0, z0, 1)) ++ ++/* ++** subr_1_u8_m_untied: { xfail *-*-* } ++** mov (z[0-9]+\.b), #1 ++** movprfx z0, z1 ++** subr z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_u8_m_untied, svuint8_t, ++ z0 = svsubr_n_u8_m (p0, z1, 1), ++ z0 = svsubr_m (p0, z1, 1)) ++ ++/* ++** subr_m1_u8_m: ++** mov (z[0-9]+\.b), #-1 ++** subr z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m1_u8_m, svuint8_t, ++ z0 = svsubr_n_u8_m (p0, z0, -1), ++ z0 = svsubr_m (p0, z0, -1)) ++ ++/* ++** subr_u8_z_tied1: ++** movprfx z0\.b, p0/z, z0\.b ++** subr z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (subr_u8_z_tied1, svuint8_t, ++ z0 = svsubr_u8_z (p0, z0, z1), ++ z0 = svsubr_z (p0, z0, z1)) ++ ++/* ++** subr_u8_z_tied2: ++** movprfx z0\.b, p0/z, z0\.b ++** sub z0\.b, p0/m, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (subr_u8_z_tied2, svuint8_t, ++ z0 = svsubr_u8_z (p0, z1, z0), ++ z0 = svsubr_z (p0, z1, z0)) ++ ++/* ++** subr_u8_z_untied: ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** subr z0\.b, p0/m, z0\.b, z2\.b ++** | ++** movprfx z0\.b, p0/z, z2\.b ++** sub z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (subr_u8_z_untied, svuint8_t, ++ z0 = svsubr_u8_z (p0, z1, z2), ++ z0 = svsubr_z (p0, z1, z2)) ++ ++/* ++** subr_w0_u8_z_tied1: ++** mov (z[0-9]+\.b), w0 ++** movprfx z0\.b, p0/z, z0\.b ++** subr z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_ZX (subr_w0_u8_z_tied1, svuint8_t, uint8_t, ++ z0 = svsubr_n_u8_z (p0, z0, x0), ++ z0 = svsubr_z (p0, z0, x0)) ++ ++/* ++** subr_w0_u8_z_untied: ++** mov (z[0-9]+\.b), w0 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** subr z0\.b, p0/m, z0\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** sub z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_ZX (subr_w0_u8_z_untied, svuint8_t, uint8_t, ++ z0 = svsubr_n_u8_z (p0, z1, x0), ++ z0 = svsubr_z (p0, z1, x0)) ++ ++/* ++** subr_1_u8_z_tied1: ++** mov (z[0-9]+\.b), #1 ++** movprfx z0\.b, p0/z, z0\.b ++** subr z0\.b, p0/m, z0\.b, \1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_u8_z_tied1, svuint8_t, ++ z0 = svsubr_n_u8_z (p0, z0, 1), ++ z0 = svsubr_z (p0, z0, 1)) ++ ++/* ++** subr_1_u8_z_untied: ++** mov (z[0-9]+\.b), #1 ++** ( ++** movprfx z0\.b, p0/z, z1\.b ++** subr z0\.b, p0/m, z0\.b, \1 ++** | ++** movprfx z0\.b, p0/z, \1 ++** sub z0\.b, p0/m, z0\.b, z1\.b ++** ) ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_u8_z_untied, svuint8_t, ++ z0 = svsubr_n_u8_z (p0, z1, 1), ++ z0 = svsubr_z (p0, z1, 1)) ++ ++/* ++** subr_u8_x_tied1: ++** sub z0\.b, z1\.b, z0\.b ++** ret ++*/ ++TEST_UNIFORM_Z (subr_u8_x_tied1, svuint8_t, ++ z0 = svsubr_u8_x (p0, z0, z1), ++ z0 = svsubr_x (p0, z0, z1)) ++ ++/* ++** subr_u8_x_tied2: ++** sub z0\.b, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (subr_u8_x_tied2, svuint8_t, ++ z0 = svsubr_u8_x (p0, z1, z0), ++ z0 = svsubr_x (p0, z1, z0)) ++ ++/* ++** subr_u8_x_untied: ++** sub z0\.b, z2\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (subr_u8_x_untied, svuint8_t, ++ z0 = svsubr_u8_x (p0, z1, z2), ++ z0 = svsubr_x (p0, z1, z2)) ++ ++/* ++** subr_w0_u8_x_tied1: ++** mov (z[0-9]+\.b), w0 ++** sub z0\.b, \1, z0\.b ++** ret ++*/ ++TEST_UNIFORM_ZX (subr_w0_u8_x_tied1, svuint8_t, uint8_t, ++ z0 = svsubr_n_u8_x (p0, z0, x0), ++ z0 = svsubr_x (p0, z0, x0)) ++ ++/* ++** subr_w0_u8_x_untied: ++** mov (z[0-9]+\.b), w0 ++** sub z0\.b, \1, z1\.b ++** ret ++*/ ++TEST_UNIFORM_ZX (subr_w0_u8_x_untied, svuint8_t, uint8_t, ++ z0 = svsubr_n_u8_x (p0, z1, x0), ++ z0 = svsubr_x (p0, z1, x0)) ++ ++/* ++** subr_1_u8_x_tied1: ++** subr z0\.b, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_u8_x_tied1, svuint8_t, ++ z0 = svsubr_n_u8_x (p0, z0, 1), ++ z0 = svsubr_x (p0, z0, 1)) ++ ++/* ++** subr_1_u8_x_untied: ++** movprfx z0, z1 ++** subr z0\.b, z0\.b, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_1_u8_x_untied, svuint8_t, ++ z0 = svsubr_n_u8_x (p0, z1, 1), ++ z0 = svsubr_x (p0, z1, 1)) ++ ++/* ++** subr_127_u8_x: ++** subr z0\.b, z0\.b, #127 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_127_u8_x, svuint8_t, ++ z0 = svsubr_n_u8_x (p0, z0, 127), ++ z0 = svsubr_x (p0, z0, 127)) ++ ++/* ++** subr_128_u8_x: ++** subr z0\.b, z0\.b, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_128_u8_x, svuint8_t, ++ z0 = svsubr_n_u8_x (p0, z0, 128), ++ z0 = svsubr_x (p0, z0, 128)) ++ ++/* ++** subr_255_u8_x: ++** subr z0\.b, z0\.b, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_255_u8_x, svuint8_t, ++ z0 = svsubr_n_u8_x (p0, z0, 255), ++ z0 = svsubr_x (p0, z0, 255)) ++ ++/* ++** subr_m1_u8_x: ++** subr z0\.b, z0\.b, #255 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m1_u8_x, svuint8_t, ++ z0 = svsubr_n_u8_x (p0, z0, -1), ++ z0 = svsubr_x (p0, z0, -1)) ++ ++/* ++** subr_m127_u8_x: ++** subr z0\.b, z0\.b, #129 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m127_u8_x, svuint8_t, ++ z0 = svsubr_n_u8_x (p0, z0, -127), ++ z0 = svsubr_x (p0, z0, -127)) ++ ++/* ++** subr_m128_u8_x: ++** subr z0\.b, z0\.b, #128 ++** ret ++*/ ++TEST_UNIFORM_Z (subr_m128_u8_x, svuint8_t, ++ z0 = svsubr_n_u8_x (p0, z0, -128), ++ z0 = svsubr_x (p0, z0, -128)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sudot_lane_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sudot_lane_s32.c +new file mode 100644 +index 000000000..c6d74a4af +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sudot_lane_s32.c +@@ -0,0 +1,97 @@ ++/* { dg-require-effective-target aarch64_asm_i8mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+sve+i8mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** sudot_lane_0_s32_tied1: ++** sudot z0\.s, z2\.b, z4\.b\[0\] ++** ret ++*/ ++TEST_TRIPLE_Z (sudot_lane_0_s32_tied1, svint32_t, svint8_t, svuint8_t, ++ z0 = svsudot_lane_s32 (z0, z2, z4, 0), ++ z0 = svsudot_lane (z0, z2, z4, 0)) ++ ++/* ++** sudot_lane_0_s32_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z2 ++** sudot z0\.s, \1\.b, z4\.b\[0\] ++** ret ++*/ ++TEST_TRIPLE_Z_REV2 (sudot_lane_0_s32_tied2, svint32_t, svint8_t, svuint8_t, ++ z0_res = svsudot_lane_s32 (z2, z0, z4, 0), ++ z0_res = svsudot_lane (z2, z0, z4, 0)) ++ ++/* ++** sudot_lane_0_s32_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** sudot z0\.s, z2\.b, \1\.b\[0\] ++** ret ++*/ ++TEST_TRIPLE_Z_REV (sudot_lane_0_s32_tied3, svint32_t, svint8_t, svuint8_t, ++ z0_res = svsudot_lane_s32 (z4, z2, z0, 0), ++ z0_res = svsudot_lane (z4, z2, z0, 0)) ++ ++/* ++** sudot_lane_0_s32_untied: ++** movprfx z0, z1 ++** sudot z0\.s, z2\.b, z4\.b\[0\] ++** ret ++*/ ++TEST_TRIPLE_Z (sudot_lane_0_s32_untied, svint32_t, svint8_t, svuint8_t, ++ z0 = svsudot_lane_s32 (z1, z2, z4, 0), ++ z0 = svsudot_lane (z1, z2, z4, 0)) ++ ++/* ++** sudot_lane_1_s32: ++** sudot z0\.s, z2\.b, z5\.b\[1\] ++** ret ++*/ ++TEST_TRIPLE_Z (sudot_lane_1_s32, svint32_t, svint8_t, svuint8_t, ++ z0 = svsudot_lane_s32 (z0, z2, z5, 1), ++ z0 = svsudot_lane (z0, z2, z5, 1)) ++ ++/* ++** sudot_lane_2_s32: ++** sudot z0\.s, z2\.b, z5\.b\[2\] ++** ret ++*/ ++TEST_TRIPLE_Z (sudot_lane_2_s32, svint32_t, svint8_t, svuint8_t, ++ z0 = svsudot_lane_s32 (z0, z2, z5, 2), ++ z0 = svsudot_lane (z0, z2, z5, 2)) ++ ++/* ++** sudot_lane_3_s32: ++** sudot z0\.s, z2\.b, z5\.b\[3\] ++** ret ++*/ ++TEST_TRIPLE_Z (sudot_lane_3_s32, svint32_t, svint8_t, svuint8_t, ++ z0 = svsudot_lane_s32 (z0, z2, z5, 3), ++ z0 = svsudot_lane (z0, z2, z5, 3)) ++ ++/* ++** sudot_lane_z8_s32: ++** str d8, \[sp, -16\]! ++** mov (z[0-7])\.d, z8\.d ++** sudot z0\.s, z1\.b, \1\.b\[1\] ++** ldr d8, \[sp\], 16 ++** ret ++*/ ++TEST_TRIPLE_LANE_REG (sudot_lane_z8_s32, svint32_t, svint8_t, svuint8_t, ++ z8, ++ z0 = svsudot_lane_s32 (z0, z1, z8, 1), ++ z0 = svsudot_lane (z0, z1, z8, 1)) ++ ++/* ++** sudot_lane_z16_s32: ++** mov (z[0-7])\.d, z16\.d ++** sudot z0\.s, z1\.b, \1\.b\[1\] ++** ret ++*/ ++TEST_TRIPLE_LANE_REG (sudot_lane_z16_s32, svint32_t, svint8_t, svuint8_t, ++ z16, ++ z0 = svsudot_lane_s32 (z0, z1, z16, 1), ++ z0 = svsudot_lane (z0, z1, z16, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sudot_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sudot_s32.c +new file mode 100644 +index 000000000..4b452619e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sudot_s32.c +@@ -0,0 +1,45 @@ ++/* { dg-require-effective-target aarch64_asm_i8mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+sve+i8mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** sudot_s32_tied1: ++** usdot z0\.s, z2\.b, z4\.b ++** ret ++*/ ++TEST_TRIPLE_Z (sudot_s32_tied1, svint32_t, svint8_t, svuint8_t, ++ z0 = svsudot_s32 (z0, z2, z4), ++ z0 = svsudot (z0, z2, z4)) ++ ++/* ++** sudot_s32_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** usdot z0\.s, z2\.b, \1\.b ++** ret ++*/ ++TEST_TRIPLE_Z_REV (sudot_s32_tied2, svint32_t, svint8_t, svuint8_t, ++ z0_res = svsudot_s32 (z4, z2, z0), ++ z0_res = svsudot (z4, z2, z0)) ++ ++/* ++** sudot_w0_s32_tied: ++** mov (z[0-9]+\.b), w0 ++** usdot z0\.s, z2\.b, \1 ++** ret ++*/ ++TEST_TRIPLE_ZX (sudot_w0_s32_tied, svint32_t, svint8_t, uint8_t, ++ z0 = svsudot_n_s32 (z0, z2, x0), ++ z0 = svsudot (z0, z2, x0)) ++ ++/* ++** sudot_9_s32_tied: ++** mov (z[0-9]+\.b), #9 ++** usdot z0\.s, z2\.b, \1 ++** ret ++*/ ++TEST_TRIPLE_Z (sudot_9_s32_tied, svint32_t, svint8_t, uint8_t, ++ z0 = svsudot_n_s32 (z0, z2, 9), ++ z0 = svsudot (z0, z2, 9)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_bf16.c +new file mode 100644 +index 000000000..8c077d118 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_bf16.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** tbl_bf16_tied1: ++** tbl z0\.h, z0\.h, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (tbl_bf16_tied1, svbfloat16_t, svuint16_t, ++ z0 = svtbl_bf16 (z0, z4), ++ z0 = svtbl (z0, z4)) ++ ++/* ++** tbl_bf16_tied2: ++** tbl z0\.h, z4\.h, z0\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (tbl_bf16_tied2, svbfloat16_t, svuint16_t, ++ z0_res = svtbl_bf16 (z4, z0), ++ z0_res = svtbl (z4, z0)) ++ ++/* ++** tbl_bf16_untied: ++** tbl z0\.h, z1\.h, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (tbl_bf16_untied, svbfloat16_t, svuint16_t, ++ z0 = svtbl_bf16 (z1, z4), ++ z0 = svtbl (z1, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_f16.c +new file mode 100644 +index 000000000..94b610412 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_f16.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** tbl_f16_tied1: ++** tbl z0\.h, z0\.h, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (tbl_f16_tied1, svfloat16_t, svuint16_t, ++ z0 = svtbl_f16 (z0, z4), ++ z0 = svtbl (z0, z4)) ++ ++/* ++** tbl_f16_tied2: ++** tbl z0\.h, z4\.h, z0\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (tbl_f16_tied2, svfloat16_t, svuint16_t, ++ z0_res = svtbl_f16 (z4, z0), ++ z0_res = svtbl (z4, z0)) ++ ++/* ++** tbl_f16_untied: ++** tbl z0\.h, z1\.h, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (tbl_f16_untied, svfloat16_t, svuint16_t, ++ z0 = svtbl_f16 (z1, z4), ++ z0 = svtbl (z1, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_f32.c +new file mode 100644 +index 000000000..741d3bdcf +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_f32.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** tbl_f32_tied1: ++** tbl z0\.s, z0\.s, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (tbl_f32_tied1, svfloat32_t, svuint32_t, ++ z0 = svtbl_f32 (z0, z4), ++ z0 = svtbl (z0, z4)) ++ ++/* ++** tbl_f32_tied2: ++** tbl z0\.s, z4\.s, z0\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (tbl_f32_tied2, svfloat32_t, svuint32_t, ++ z0_res = svtbl_f32 (z4, z0), ++ z0_res = svtbl (z4, z0)) ++ ++/* ++** tbl_f32_untied: ++** tbl z0\.s, z1\.s, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (tbl_f32_untied, svfloat32_t, svuint32_t, ++ z0 = svtbl_f32 (z1, z4), ++ z0 = svtbl (z1, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_f64.c +new file mode 100644 +index 000000000..3c24e9a59 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_f64.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** tbl_f64_tied1: ++** tbl z0\.d, z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (tbl_f64_tied1, svfloat64_t, svuint64_t, ++ z0 = svtbl_f64 (z0, z4), ++ z0 = svtbl (z0, z4)) ++ ++/* ++** tbl_f64_tied2: ++** tbl z0\.d, z4\.d, z0\.d ++** ret ++*/ ++TEST_DUAL_Z_REV (tbl_f64_tied2, svfloat64_t, svuint64_t, ++ z0_res = svtbl_f64 (z4, z0), ++ z0_res = svtbl (z4, z0)) ++ ++/* ++** tbl_f64_untied: ++** tbl z0\.d, z1\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (tbl_f64_untied, svfloat64_t, svuint64_t, ++ z0 = svtbl_f64 (z1, z4), ++ z0 = svtbl (z1, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_s16.c +new file mode 100644 +index 000000000..2ec9c389a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_s16.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** tbl_s16_tied1: ++** tbl z0\.h, z0\.h, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (tbl_s16_tied1, svint16_t, svuint16_t, ++ z0 = svtbl_s16 (z0, z4), ++ z0 = svtbl (z0, z4)) ++ ++/* ++** tbl_s16_tied2: ++** tbl z0\.h, z4\.h, z0\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (tbl_s16_tied2, svint16_t, svuint16_t, ++ z0_res = svtbl_s16 (z4, z0), ++ z0_res = svtbl (z4, z0)) ++ ++/* ++** tbl_s16_untied: ++** tbl z0\.h, z1\.h, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (tbl_s16_untied, svint16_t, svuint16_t, ++ z0 = svtbl_s16 (z1, z4), ++ z0 = svtbl (z1, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_s32.c +new file mode 100644 +index 000000000..98b2d8d8b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_s32.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** tbl_s32_tied1: ++** tbl z0\.s, z0\.s, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (tbl_s32_tied1, svint32_t, svuint32_t, ++ z0 = svtbl_s32 (z0, z4), ++ z0 = svtbl (z0, z4)) ++ ++/* ++** tbl_s32_tied2: ++** tbl z0\.s, z4\.s, z0\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (tbl_s32_tied2, svint32_t, svuint32_t, ++ z0_res = svtbl_s32 (z4, z0), ++ z0_res = svtbl (z4, z0)) ++ ++/* ++** tbl_s32_untied: ++** tbl z0\.s, z1\.s, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (tbl_s32_untied, svint32_t, svuint32_t, ++ z0 = svtbl_s32 (z1, z4), ++ z0 = svtbl (z1, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_s64.c +new file mode 100644 +index 000000000..0138a80d2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_s64.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** tbl_s64_tied1: ++** tbl z0\.d, z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (tbl_s64_tied1, svint64_t, svuint64_t, ++ z0 = svtbl_s64 (z0, z4), ++ z0 = svtbl (z0, z4)) ++ ++/* ++** tbl_s64_tied2: ++** tbl z0\.d, z4\.d, z0\.d ++** ret ++*/ ++TEST_DUAL_Z_REV (tbl_s64_tied2, svint64_t, svuint64_t, ++ z0_res = svtbl_s64 (z4, z0), ++ z0_res = svtbl (z4, z0)) ++ ++/* ++** tbl_s64_untied: ++** tbl z0\.d, z1\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (tbl_s64_untied, svint64_t, svuint64_t, ++ z0 = svtbl_s64 (z1, z4), ++ z0 = svtbl (z1, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_s8.c +new file mode 100644 +index 000000000..7818d1b6d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_s8.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** tbl_s8_tied1: ++** tbl z0\.b, z0\.b, z4\.b ++** ret ++*/ ++TEST_DUAL_Z (tbl_s8_tied1, svint8_t, svuint8_t, ++ z0 = svtbl_s8 (z0, z4), ++ z0 = svtbl (z0, z4)) ++ ++/* ++** tbl_s8_tied2: ++** tbl z0\.b, z4\.b, z0\.b ++** ret ++*/ ++TEST_DUAL_Z_REV (tbl_s8_tied2, svint8_t, svuint8_t, ++ z0_res = svtbl_s8 (z4, z0), ++ z0_res = svtbl (z4, z0)) ++ ++/* ++** tbl_s8_untied: ++** tbl z0\.b, z1\.b, z4\.b ++** ret ++*/ ++TEST_DUAL_Z (tbl_s8_untied, svint8_t, svuint8_t, ++ z0 = svtbl_s8 (z1, z4), ++ z0 = svtbl (z1, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_u16.c +new file mode 100644 +index 000000000..f15da9211 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_u16.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** tbl_u16_tied1: ++** tbl z0\.h, z0\.h, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (tbl_u16_tied1, svuint16_t, svuint16_t, ++ z0 = svtbl_u16 (z0, z4), ++ z0 = svtbl (z0, z4)) ++ ++/* ++** tbl_u16_tied2: ++** tbl z0\.h, z4\.h, z0\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (tbl_u16_tied2, svuint16_t, svuint16_t, ++ z0_res = svtbl_u16 (z4, z0), ++ z0_res = svtbl (z4, z0)) ++ ++/* ++** tbl_u16_untied: ++** tbl z0\.h, z1\.h, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (tbl_u16_untied, svuint16_t, svuint16_t, ++ z0 = svtbl_u16 (z1, z4), ++ z0 = svtbl (z1, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_u32.c +new file mode 100644 +index 000000000..494300436 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_u32.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** tbl_u32_tied1: ++** tbl z0\.s, z0\.s, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (tbl_u32_tied1, svuint32_t, svuint32_t, ++ z0 = svtbl_u32 (z0, z4), ++ z0 = svtbl (z0, z4)) ++ ++/* ++** tbl_u32_tied2: ++** tbl z0\.s, z4\.s, z0\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (tbl_u32_tied2, svuint32_t, svuint32_t, ++ z0_res = svtbl_u32 (z4, z0), ++ z0_res = svtbl (z4, z0)) ++ ++/* ++** tbl_u32_untied: ++** tbl z0\.s, z1\.s, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (tbl_u32_untied, svuint32_t, svuint32_t, ++ z0 = svtbl_u32 (z1, z4), ++ z0 = svtbl (z1, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_u64.c +new file mode 100644 +index 000000000..158990e12 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_u64.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** tbl_u64_tied1: ++** tbl z0\.d, z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (tbl_u64_tied1, svuint64_t, svuint64_t, ++ z0 = svtbl_u64 (z0, z4), ++ z0 = svtbl (z0, z4)) ++ ++/* ++** tbl_u64_tied2: ++** tbl z0\.d, z4\.d, z0\.d ++** ret ++*/ ++TEST_DUAL_Z_REV (tbl_u64_tied2, svuint64_t, svuint64_t, ++ z0_res = svtbl_u64 (z4, z0), ++ z0_res = svtbl (z4, z0)) ++ ++/* ++** tbl_u64_untied: ++** tbl z0\.d, z1\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (tbl_u64_untied, svuint64_t, svuint64_t, ++ z0 = svtbl_u64 (z1, z4), ++ z0 = svtbl (z1, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_u8.c +new file mode 100644 +index 000000000..a46309a95 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_u8.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** tbl_u8_tied1: ++** tbl z0\.b, z0\.b, z4\.b ++** ret ++*/ ++TEST_DUAL_Z (tbl_u8_tied1, svuint8_t, svuint8_t, ++ z0 = svtbl_u8 (z0, z4), ++ z0 = svtbl (z0, z4)) ++ ++/* ++** tbl_u8_tied2: ++** tbl z0\.b, z4\.b, z0\.b ++** ret ++*/ ++TEST_DUAL_Z_REV (tbl_u8_tied2, svuint8_t, svuint8_t, ++ z0_res = svtbl_u8 (z4, z0), ++ z0_res = svtbl (z4, z0)) ++ ++/* ++** tbl_u8_untied: ++** tbl z0\.b, z1\.b, z4\.b ++** ret ++*/ ++TEST_DUAL_Z (tbl_u8_untied, svuint8_t, svuint8_t, ++ z0 = svtbl_u8 (z1, z4), ++ z0 = svtbl (z1, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h +new file mode 100644 +index 000000000..d1f8fdb13 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h +@@ -0,0 +1,424 @@ ++#ifndef TEST_SVE_ACLE_H ++#define TEST_SVE_ACLE_H 1 ++ ++#include ++ ++#if defined (TEST_OVERLOADS) ++#define INVOKE(CODE1, CODE2) CODE2 ++#elif defined (TEST_FULL) ++#define INVOKE(CODE1, CODE2) CODE1 ++#else ++#error "Please define -DTEST_OVERLOADS or -DTEST_FULL" ++#endif ++ ++#ifdef __cplusplus ++#define PROTO(NAME, RET, ARGS) extern "C" RET NAME ARGS; RET NAME ARGS ++#else ++#define PROTO(NAME, RET, ARGS) RET NAME ARGS ++#endif ++ ++#define TEST_UNIFORM_Z(NAME, TYPE, CODE1, CODE2) \ ++ PROTO (NAME, TYPE, (TYPE z0, TYPE z1, TYPE z2, TYPE z3, \ ++ svbool_t p0, svbool_t p1)) \ ++ { \ ++ INVOKE (CODE1, CODE2); \ ++ return z0; \ ++ } ++ ++#define TEST_UNIFORM_P(NAME, CODE1, CODE2) \ ++ PROTO (NAME, svbool_t, (svbool_t p0, svbool_t p1, \ ++ svbool_t p2, svbool_t p3)) \ ++ { \ ++ INVOKE (CODE1, CODE2); \ ++ return p0; \ ++ } ++ ++#define TEST_UNIFORM_P_SINGLE(NAME, CODE) \ ++ PROTO (NAME, svbool_t, (svbool_t p0, svbool_t p1, \ ++ svbool_t p2, svbool_t p3)) \ ++ { \ ++ CODE; \ ++ return p0; \ ++ } ++ ++#define TEST_UNIFORM_S(NAME, TYPE, CODE1, CODE2) \ ++ PROTO (NAME, TYPE, (TYPE x0, TYPE x1, TYPE x2, TYPE x3, \ ++ svbool_t p0, svbool_t p1)) \ ++ { \ ++ INVOKE (CODE1, CODE2); \ ++ return x0; \ ++ } ++ ++#define TEST_DUAL_Z(NAME, TYPE1, TYPE2, CODE1, CODE2) \ ++ PROTO (NAME, TYPE1, (TYPE1 z0, TYPE1 z1, TYPE1 z2, TYPE1 z3, \ ++ TYPE2 z4, TYPE2 z5, TYPE2 z6, TYPE2 z7, \ ++ svbool_t p0, svbool_t p1)) \ ++ { \ ++ INVOKE (CODE1, CODE2); \ ++ return z0; \ ++ } ++ ++#define TEST_DUAL_Z_REV(NAME, TYPE1, TYPE2, CODE1, CODE2) \ ++ PROTO (NAME, TYPE1, (TYPE2 z0, TYPE2 z1, TYPE2 z2, TYPE2 z3, \ ++ TYPE1 z4, TYPE1 z5, TYPE1 z6, TYPE1 z7, \ ++ svbool_t p0, svbool_t p1)) \ ++ { \ ++ TYPE1 z0_res; \ ++ INVOKE (CODE1, CODE2); \ ++ return z0_res; \ ++ } ++ ++#define TEST_TRIPLE_Z(NAME, TYPE1, TYPE2, TYPE3, CODE1, CODE2) \ ++ PROTO (NAME, TYPE1, (TYPE1 z0, TYPE1 z1, TYPE2 z2, TYPE2 z3, \ ++ TYPE3 z4, TYPE3 z5, \ ++ svbool_t p0, svbool_t p1)) \ ++ { \ ++ INVOKE (CODE1, CODE2); \ ++ return z0; \ ++ } ++ ++#define TEST_TRIPLE_Z_REV2(NAME, TYPE1, TYPE2, TYPE3, CODE1, CODE2)\ ++ PROTO (NAME, TYPE1, (TYPE2 z0, TYPE2 z1, TYPE1 z2, TYPE1 z3, \ ++ TYPE3 z4, TYPE3 z5, \ ++ svbool_t p0, svbool_t p1)) \ ++ { \ ++ TYPE1 z0_res; \ ++ INVOKE (CODE1, CODE2); \ ++ return z0_res; \ ++ } ++ ++#define TEST_TRIPLE_Z_REV(NAME, TYPE1, TYPE2, TYPE3, CODE1, CODE2)\ ++ PROTO (NAME, TYPE1, (TYPE3 z0, TYPE3 z1, TYPE2 z2, TYPE2 z3, \ ++ TYPE1 z4, TYPE1 z5, \ ++ svbool_t p0, svbool_t p1)) \ ++ { \ ++ TYPE1 z0_res; \ ++ INVOKE (CODE1, CODE2); \ ++ return z0_res; \ ++ } ++ ++#define TEST_DUAL_LANE_REG(NAME, ZTYPE1, ZTYPE2, REG, CODE1, CODE2) \ ++ PROTO (NAME, void, (void)) \ ++ { \ ++ register ZTYPE1 z0 __asm ("z0"); \ ++ register ZTYPE2 z1 __asm ("z1"); \ ++ register ZTYPE2 REG __asm (#REG); \ ++ __asm volatile ("" : "=w" (z0), "=w" (z1), "=w" (REG)); \ ++ INVOKE (CODE1, CODE2); \ ++ __asm volatile ("" :: "w" (z0)); \ ++ } ++ ++#define TEST_TYPE_CHANGE_Z(NAME, TYPE1, TYPE2, CODE1, CODE2) \ ++ PROTO (NAME, TYPE1, (TYPE2 z0, TYPE2 z1, TYPE2 z2, TYPE2 z3, \ ++ svbool_t p0, svbool_t p1)) \ ++ { \ ++ TYPE1 z0_res; \ ++ INVOKE (CODE1, CODE2); \ ++ return z0_res; \ ++ } ++ ++#define TEST_TRIPLE_LANE_REG(NAME, ZTYPE1, ZTYPE2, ZTYPE3, REG, CODE1, CODE2) \ ++ PROTO (NAME, void, (void)) \ ++ { \ ++ register ZTYPE1 z0 __asm ("z0"); \ ++ register ZTYPE2 z1 __asm ("z1"); \ ++ register ZTYPE3 REG __asm (#REG); \ ++ __asm volatile ("" : "=w" (z0), "=w" (z1), "=w" (REG)); \ ++ INVOKE (CODE1, CODE2); \ ++ __asm volatile ("" :: "w" (z0)); \ ++ } ++ ++#define TEST_TRIPLE_ZX(NAME, TYPE1, TYPE2, TYPE3, CODE1, CODE2) \ ++ PROTO (NAME, TYPE1, (TYPE1 z0, TYPE1 z1, TYPE2 z2, TYPE2 z3, \ ++ TYPE3 x0, TYPE3 x1, \ ++ svbool_t p0, svbool_t p1)) \ ++ { \ ++ INVOKE (CODE1, CODE2); \ ++ return z0; \ ++ } ++ ++#define TEST_UNIFORM_ZX(NAME, ZTYPE, STYPE, CODE1, CODE2) \ ++ PROTO (NAME, ZTYPE, (ZTYPE z0, ZTYPE z1, ZTYPE z2, ZTYPE z3, \ ++ svbool_t p0, STYPE x0)) \ ++ { \ ++ INVOKE (CODE1, CODE2); \ ++ return z0; \ ++ } ++ ++#define TEST_UNIFORM_ZD(NAME, ZTYPE, STYPE, CODE1, CODE2) \ ++ PROTO (NAME, ZTYPE, (ZTYPE z0, ZTYPE z1, ZTYPE z2, ZTYPE z3, \ ++ svbool_t p0, STYPE d4)) \ ++ { \ ++ INVOKE (CODE1, CODE2); \ ++ return z0; \ ++ } ++ ++#define TEST_UNIFORM_PS(NAME, CODE1, CODE2) \ ++ PROTO (NAME, svbool_t, (svbool_t p0, svbool_t p1, \ ++ svbool_t p2, svbool_t p3, bool x0)) \ ++ { \ ++ INVOKE (CODE1, CODE2); \ ++ return p0; \ ++ } ++ ++#define TEST_DUAL_ZD(NAME, ZTYPE1, ZTYPE2, STYPE, CODE1, CODE2) \ ++ PROTO (NAME, ZTYPE1, (ZTYPE1 z0, ZTYPE1 z1, ZTYPE1 z2, \ ++ ZTYPE1 z3, ZTYPE2 z4, ZTYPE2 z5, \ ++ ZTYPE2 z6, STYPE d7, svbool_t p0, \ ++ svbool_t p1)) \ ++ { \ ++ INVOKE (CODE1, CODE2); \ ++ return z0; \ ++ } ++ ++#define TEST_DUAL_ZX(NAME, ZTYPE1, ZTYPE2, STYPE, CODE1, CODE2) \ ++ PROTO (NAME, ZTYPE1, (ZTYPE1 z0, ZTYPE1 z1, ZTYPE1 z2, \ ++ ZTYPE1 z3, ZTYPE2 z4, ZTYPE2 z5, \ ++ ZTYPE2 z6, ZTYPE2 z7, svbool_t p0, \ ++ svbool_t p1, STYPE x0)) \ ++ { \ ++ INVOKE (CODE1, CODE2); \ ++ return z0; \ ++ } ++ ++#define TEST_TYPE_CHANGE_ZX(NAME, ZTYPE1, ZTYPE2, STYPE, CODE1, CODE2) \ ++ PROTO (NAME, ZTYPE1, (ZTYPE2 z0, ZTYPE2 z1, ZTYPE2 z2, \ ++ ZTYPE2 z3, svbool_t p0, svbool_t p1, \ ++ STYPE x0)) \ ++ { \ ++ ZTYPE1 z0_res; \ ++ INVOKE (CODE1, CODE2); \ ++ return z0_res; \ ++ } ++ ++#define TEST_LOAD(NAME, ZTYPE, STYPE, CODE1, CODE2) \ ++ PROTO (NAME, ZTYPE, (svbool_t p0, const STYPE *x0, \ ++ intptr_t x1)) \ ++ { \ ++ ZTYPE z0; \ ++ INVOKE (CODE1, CODE2); \ ++ return z0; \ ++ } ++ ++#define TEST_LOAD_GATHER_SZ(NAME, RES_TYPE, STYPE, ZTYPE, CODE1, CODE2) \ ++ PROTO (NAME, RES_TYPE, (ZTYPE z0, ZTYPE z1, svbool_t p0, \ ++ const STYPE *x0)) \ ++ { \ ++ RES_TYPE z0_res; \ ++ INVOKE (CODE1, CODE2); \ ++ return z0_res; \ ++ } ++ ++#define TEST_LOAD_GATHER_ZS(NAME, RES_TYPE, ZTYPE, CODE1, CODE2) \ ++ PROTO (NAME, RES_TYPE, (ZTYPE z0, ZTYPE z1, svbool_t p0, \ ++ int64_t x0)) \ ++ { \ ++ RES_TYPE z0_res; \ ++ INVOKE (CODE1, CODE2); \ ++ return z0_res; \ ++ } ++ ++#define TEST_PREFETCH(NAME, STYPE, CODE1, CODE2) \ ++ PROTO (NAME, void, (svbool_t p0, const STYPE *x0, \ ++ intptr_t x1)) \ ++ { \ ++ INVOKE (CODE1, CODE2); \ ++ } ++ ++#define TEST_PREFETCH_GATHER_SZ(NAME, ZTYPE, CODE1, CODE2) \ ++ PROTO (NAME, void, (ZTYPE z0, ZTYPE z1, svbool_t p0, \ ++ const void *x0)) \ ++ { \ ++ INVOKE (CODE1, CODE2); \ ++ } ++ ++#define TEST_PREFETCH_GATHER_ZS(NAME, ZTYPE, CODE1, CODE2) \ ++ PROTO (NAME, void, (ZTYPE z0, ZTYPE z1, svbool_t p0, \ ++ int64_t x0)) \ ++ { \ ++ INVOKE (CODE1, CODE2); \ ++ } ++ ++#define TEST_STORE(NAME, ZTYPE, STYPE, CODE1, CODE2) \ ++ PROTO (NAME, void, (ZTYPE z0, svbool_t p0, STYPE *x0, \ ++ intptr_t x1)) \ ++ { \ ++ INVOKE (CODE1, CODE2); \ ++ } ++ ++#define TEST_STORE_SCATTER_SZ(NAME, DATA_TYPE, STYPE, ZTYPE, CODE1, CODE2) \ ++ PROTO (NAME, void, (DATA_TYPE z0, ZTYPE z1, svbool_t p0, \ ++ STYPE *x0)) \ ++ { \ ++ INVOKE (CODE1, CODE2); \ ++ } ++ ++#define TEST_STORE_SCATTER_ZS(NAME, DATA_TYPE, ZTYPE, CODE1, CODE2) \ ++ PROTO (NAME, void, (DATA_TYPE z0, ZTYPE z1, svbool_t p0, \ ++ int64_t x0)) \ ++ { \ ++ INVOKE (CODE1, CODE2); \ ++ } ++ ++#define TEST_P(NAME, CODE1, CODE2) \ ++ PROTO (NAME, svbool_t, (void)) \ ++ { \ ++ svbool_t p0; \ ++ INVOKE (CODE1, CODE2); \ ++ return p0; \ ++ } ++ ++#define TEST_PTEST(NAME, TYPE, CODE) \ ++ PROTO (NAME, TYPE, (svbool_t p0, svbool_t p1, svbool_t p2, \ ++ svbool_t p3, TYPE x0, TYPE x1)) \ ++ { \ ++ INVOKE (CODE, CODE); \ ++ return x0; \ ++ } ++ ++#define TEST_COMPARE_S(NAME, TYPE, CODE1, CODE2) \ ++ PROTO (NAME, svbool_t, (TYPE x0, TYPE x1)) \ ++ { \ ++ svbool_t p0; \ ++ INVOKE (CODE1, CODE2); \ ++ return p0; \ ++ } ++ ++#define TEST_COMPARE_Z(NAME, TYPE, CODE1, CODE2) \ ++ PROTO (NAME, svbool_t, (TYPE z0, TYPE z1, \ ++ svbool_t p0, svbool_t p1)) \ ++ { \ ++ INVOKE (CODE1, CODE2); \ ++ return p0; \ ++ } ++ ++#define TEST_COMPARE_ZX(NAME, ZTYPE, STYPE, CODE1, CODE2) \ ++ PROTO (NAME, svbool_t, (ZTYPE z0, ZTYPE z1, svbool_t p0, \ ++ svbool_t p1, STYPE x0)) \ ++ { \ ++ INVOKE (CODE1, CODE2); \ ++ return p0; \ ++ } ++ ++#define TEST_COMPARE_ZD(NAME, ZTYPE, STYPE, CODE1, CODE2) \ ++ PROTO (NAME, svbool_t, (ZTYPE z0, ZTYPE z1, ZTYPE z2, \ ++ ZTYPE z3, svbool_t p0, svbool_t p1, \ ++ STYPE d4)) \ ++ { \ ++ INVOKE (CODE1, CODE2); \ ++ return p0; \ ++ } ++ ++#define TEST_COMPARE_DUAL_Z(NAME, TYPE1, TYPE2, CODE1, CODE2) \ ++ PROTO (NAME, svbool_t, (TYPE1 z0, TYPE2 z1, \ ++ svbool_t p0, svbool_t p1)) \ ++ { \ ++ INVOKE (CODE1, CODE2); \ ++ return p0; \ ++ } ++ ++#define TEST_REDUCTION_X(NAME, STYPE, ZTYPE, CODE1, CODE2) \ ++ PROTO (NAME, STYPE, (ZTYPE z0, ZTYPE z1, svbool_t p0)) \ ++ { \ ++ STYPE x0; \ ++ INVOKE (CODE1, CODE2); \ ++ return x0; \ ++ } ++ ++#define TEST_REDUCTION_D(NAME, STYPE, ZTYPE, CODE1, CODE2) \ ++ PROTO (NAME, STYPE, (ZTYPE z0, ZTYPE z1, svbool_t p0)) \ ++ { \ ++ STYPE d0; \ ++ INVOKE (CODE1, CODE2); \ ++ return d0; \ ++ } ++ ++#define TEST_FOLD_LEFT_D(NAME, STYPE, ZTYPE, CODE1, CODE2) \ ++ PROTO (NAME, STYPE, (STYPE d0, STYPE d1, ZTYPE z2, \ ++ svbool_t p0)) \ ++ { \ ++ INVOKE (CODE1, CODE2); \ ++ return d0; \ ++ } ++ ++#define TEST_FOLD_LEFT_X(NAME, STYPE, ZTYPE, CODE1, CODE2) \ ++ PROTO (NAME, STYPE, (STYPE x0, STYPE x1, ZTYPE z0, \ ++ svbool_t p0)) \ ++ { \ ++ INVOKE (CODE1, CODE2); \ ++ return x0; \ ++ } ++ ++#define TEST_S(NAME, ZTYPE, STYPE, CODE) \ ++ PROTO (NAME, ZTYPE, (STYPE x0, STYPE x1)) \ ++ { \ ++ ZTYPE z0; \ ++ CODE; \ ++ return z0; \ ++ } ++ ++#define TEST_ADR(NAME, TYPE1, TYPE2, CODE1, CODE2) \ ++ PROTO (NAME, TYPE1, (TYPE1 z0, TYPE2 z1)) \ ++ { \ ++ INVOKE (CODE1, CODE2); \ ++ return z0; \ ++ } ++ ++#define TEST_UNDEF(NAME, TYPE, CODE) \ ++ PROTO (NAME, TYPE, (void)) \ ++ { \ ++ TYPE z0; \ ++ CODE; \ ++ return z0; \ ++ } ++ ++#define TEST_CREATE(NAME, TTYPE, ZTYPE, CODE1, CODE2) \ ++ PROTO (NAME, TTYPE, (ZTYPE unused0, ZTYPE unused1, \ ++ ZTYPE unused2, ZTYPE unused3, \ ++ ZTYPE z4, ZTYPE z5, ZTYPE z6, ZTYPE z7)) \ ++ { \ ++ TTYPE z0; \ ++ INVOKE (CODE1, CODE2); \ ++ return z0; \ ++ } ++ ++#define TEST_GET(NAME, TTYPE, ZTYPE, CODE1, CODE2) \ ++ PROTO (NAME, void, (ZTYPE unused0, ZTYPE unused1, \ ++ ZTYPE unused2, ZTYPE unused3, TTYPE z4)) \ ++ { \ ++ register ZTYPE z0 __asm ("z0"); \ ++ register ZTYPE z4_res __asm ("z4"); \ ++ register ZTYPE z5_res __asm ("z5"); \ ++ register ZTYPE z6_res __asm ("z6"); \ ++ register ZTYPE z7_res __asm ("z7"); \ ++ INVOKE (CODE1, CODE2); \ ++ __asm volatile ("" :: "w" (z0), "w" (z4_res), "w" (z5_res), \ ++ "w" (z6_res), "w" (z7_res)); \ ++ } ++ ++#define TEST_SET(NAME, TTYPE, ZTYPE, CODE1, CODE2) \ ++ PROTO (NAME, void, (ZTYPE z0, ZTYPE z1, ZTYPE z2, ZTYPE z3, \ ++ TTYPE z4)) \ ++ { \ ++ register TTYPE z24 __asm ("z24"); \ ++ INVOKE (CODE1, CODE2); \ ++ __asm volatile ("" :: "w" (z4), "w" (z24)); \ ++ } ++ ++#define TEST_TBL2(NAME, TTYPE, ZTYPE, UTYPE, CODE1, CODE2) \ ++ PROTO (NAME, ZTYPE, (TTYPE z0, TTYPE z2, UTYPE z4)) \ ++ { \ ++ register ZTYPE z0_res __asm ("z0"); \ ++ INVOKE (CODE1, CODE2); \ ++ return z0_res; \ ++ } ++ ++#define TEST_TBL2_REV(NAME, TTYPE, ZTYPE, UTYPE, CODE1, CODE2) \ ++ PROTO (NAME, ZTYPE, (UTYPE z0, TTYPE z1, TTYPE z3)) \ ++ { \ ++ register ZTYPE z0_res __asm ("z0"); \ ++ INVOKE (CODE1, CODE2); \ ++ return z0_res; \ ++ } ++ ++#endif +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tmad_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tmad_f16.c +new file mode 100644 +index 000000000..3a00716e3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tmad_f16.c +@@ -0,0 +1,96 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** tmad_0_f16_tied1: ++** ftmad z0\.h, z0\.h, z1\.h, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (tmad_0_f16_tied1, svfloat16_t, ++ z0 = svtmad_f16 (z0, z1, 0), ++ z0 = svtmad (z0, z1, 0)) ++ ++/* ++** tmad_0_f16_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** ftmad z0\.h, z0\.h, \1\.h, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (tmad_0_f16_tied2, svfloat16_t, ++ z0 = svtmad_f16 (z1, z0, 0), ++ z0 = svtmad (z1, z0, 0)) ++ ++/* ++** tmad_0_f16_untied: ++** movprfx z0, z1 ++** ftmad z0\.h, z0\.h, z2\.h, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (tmad_0_f16_untied, svfloat16_t, ++ z0 = svtmad_f16 (z1, z2, 0), ++ z0 = svtmad (z1, z2, 0)) ++ ++/* ++** tmad_1_f16: ++** ftmad z0\.h, z0\.h, z1\.h, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (tmad_1_f16, svfloat16_t, ++ z0 = svtmad_f16 (z0, z1, 1), ++ z0 = svtmad (z0, z1, 1)) ++ ++/* ++** tmad_2_f16: ++** ftmad z0\.h, z0\.h, z1\.h, #2 ++** ret ++*/ ++TEST_UNIFORM_Z (tmad_2_f16, svfloat16_t, ++ z0 = svtmad_f16 (z0, z1, 2), ++ z0 = svtmad (z0, z1, 2)) ++ ++/* ++** tmad_3_f16: ++** ftmad z0\.h, z0\.h, z1\.h, #3 ++** ret ++*/ ++TEST_UNIFORM_Z (tmad_3_f16, svfloat16_t, ++ z0 = svtmad_f16 (z0, z1, 3), ++ z0 = svtmad (z0, z1, 3)) ++ ++/* ++** tmad_4_f16: ++** ftmad z0\.h, z0\.h, z1\.h, #4 ++** ret ++*/ ++TEST_UNIFORM_Z (tmad_4_f16, svfloat16_t, ++ z0 = svtmad_f16 (z0, z1, 4), ++ z0 = svtmad (z0, z1, 4)) ++ ++/* ++** tmad_5_f16: ++** ftmad z0\.h, z0\.h, z1\.h, #5 ++** ret ++*/ ++TEST_UNIFORM_Z (tmad_5_f16, svfloat16_t, ++ z0 = svtmad_f16 (z0, z1, 5), ++ z0 = svtmad (z0, z1, 5)) ++ ++/* ++** tmad_6_f16: ++** ftmad z0\.h, z0\.h, z1\.h, #6 ++** ret ++*/ ++TEST_UNIFORM_Z (tmad_6_f16, svfloat16_t, ++ z0 = svtmad_f16 (z0, z1, 6), ++ z0 = svtmad (z0, z1, 6)) ++ ++/* ++** tmad_7_f16: ++** ftmad z0\.h, z0\.h, z1\.h, #7 ++** ret ++*/ ++TEST_UNIFORM_Z (tmad_7_f16, svfloat16_t, ++ z0 = svtmad_f16 (z0, z1, 7), ++ z0 = svtmad (z0, z1, 7)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tmad_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tmad_f32.c +new file mode 100644 +index 000000000..b73d420fb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tmad_f32.c +@@ -0,0 +1,96 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** tmad_0_f32_tied1: ++** ftmad z0\.s, z0\.s, z1\.s, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (tmad_0_f32_tied1, svfloat32_t, ++ z0 = svtmad_f32 (z0, z1, 0), ++ z0 = svtmad (z0, z1, 0)) ++ ++/* ++** tmad_0_f32_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z1 ++** ftmad z0\.s, z0\.s, \1\.s, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (tmad_0_f32_tied2, svfloat32_t, ++ z0 = svtmad_f32 (z1, z0, 0), ++ z0 = svtmad (z1, z0, 0)) ++ ++/* ++** tmad_0_f32_untied: ++** movprfx z0, z1 ++** ftmad z0\.s, z0\.s, z2\.s, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (tmad_0_f32_untied, svfloat32_t, ++ z0 = svtmad_f32 (z1, z2, 0), ++ z0 = svtmad (z1, z2, 0)) ++ ++/* ++** tmad_1_f32: ++** ftmad z0\.s, z0\.s, z1\.s, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (tmad_1_f32, svfloat32_t, ++ z0 = svtmad_f32 (z0, z1, 1), ++ z0 = svtmad (z0, z1, 1)) ++ ++/* ++** tmad_2_f32: ++** ftmad z0\.s, z0\.s, z1\.s, #2 ++** ret ++*/ ++TEST_UNIFORM_Z (tmad_2_f32, svfloat32_t, ++ z0 = svtmad_f32 (z0, z1, 2), ++ z0 = svtmad (z0, z1, 2)) ++ ++/* ++** tmad_3_f32: ++** ftmad z0\.s, z0\.s, z1\.s, #3 ++** ret ++*/ ++TEST_UNIFORM_Z (tmad_3_f32, svfloat32_t, ++ z0 = svtmad_f32 (z0, z1, 3), ++ z0 = svtmad (z0, z1, 3)) ++ ++/* ++** tmad_4_f32: ++** ftmad z0\.s, z0\.s, z1\.s, #4 ++** ret ++*/ ++TEST_UNIFORM_Z (tmad_4_f32, svfloat32_t, ++ z0 = svtmad_f32 (z0, z1, 4), ++ z0 = svtmad (z0, z1, 4)) ++ ++/* ++** tmad_5_f32: ++** ftmad z0\.s, z0\.s, z1\.s, #5 ++** ret ++*/ ++TEST_UNIFORM_Z (tmad_5_f32, svfloat32_t, ++ z0 = svtmad_f32 (z0, z1, 5), ++ z0 = svtmad (z0, z1, 5)) ++ ++/* ++** tmad_6_f32: ++** ftmad z0\.s, z0\.s, z1\.s, #6 ++** ret ++*/ ++TEST_UNIFORM_Z (tmad_6_f32, svfloat32_t, ++ z0 = svtmad_f32 (z0, z1, 6), ++ z0 = svtmad (z0, z1, 6)) ++ ++/* ++** tmad_7_f32: ++** ftmad z0\.s, z0\.s, z1\.s, #7 ++** ret ++*/ ++TEST_UNIFORM_Z (tmad_7_f32, svfloat32_t, ++ z0 = svtmad_f32 (z0, z1, 7), ++ z0 = svtmad (z0, z1, 7)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tmad_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tmad_f64.c +new file mode 100644 +index 000000000..fc31928a6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tmad_f64.c +@@ -0,0 +1,96 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** tmad_0_f64_tied1: ++** ftmad z0\.d, z0\.d, z1\.d, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (tmad_0_f64_tied1, svfloat64_t, ++ z0 = svtmad_f64 (z0, z1, 0), ++ z0 = svtmad (z0, z1, 0)) ++ ++/* ++** tmad_0_f64_tied2: ++** mov (z[0-9]+\.d), z0\.d ++** movprfx z0, z1 ++** ftmad z0\.d, z0\.d, \1, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (tmad_0_f64_tied2, svfloat64_t, ++ z0 = svtmad_f64 (z1, z0, 0), ++ z0 = svtmad (z1, z0, 0)) ++ ++/* ++** tmad_0_f64_untied: ++** movprfx z0, z1 ++** ftmad z0\.d, z0\.d, z2\.d, #0 ++** ret ++*/ ++TEST_UNIFORM_Z (tmad_0_f64_untied, svfloat64_t, ++ z0 = svtmad_f64 (z1, z2, 0), ++ z0 = svtmad (z1, z2, 0)) ++ ++/* ++** tmad_1_f64: ++** ftmad z0\.d, z0\.d, z1\.d, #1 ++** ret ++*/ ++TEST_UNIFORM_Z (tmad_1_f64, svfloat64_t, ++ z0 = svtmad_f64 (z0, z1, 1), ++ z0 = svtmad (z0, z1, 1)) ++ ++/* ++** tmad_2_f64: ++** ftmad z0\.d, z0\.d, z1\.d, #2 ++** ret ++*/ ++TEST_UNIFORM_Z (tmad_2_f64, svfloat64_t, ++ z0 = svtmad_f64 (z0, z1, 2), ++ z0 = svtmad (z0, z1, 2)) ++ ++/* ++** tmad_3_f64: ++** ftmad z0\.d, z0\.d, z1\.d, #3 ++** ret ++*/ ++TEST_UNIFORM_Z (tmad_3_f64, svfloat64_t, ++ z0 = svtmad_f64 (z0, z1, 3), ++ z0 = svtmad (z0, z1, 3)) ++ ++/* ++** tmad_4_f64: ++** ftmad z0\.d, z0\.d, z1\.d, #4 ++** ret ++*/ ++TEST_UNIFORM_Z (tmad_4_f64, svfloat64_t, ++ z0 = svtmad_f64 (z0, z1, 4), ++ z0 = svtmad (z0, z1, 4)) ++ ++/* ++** tmad_5_f64: ++** ftmad z0\.d, z0\.d, z1\.d, #5 ++** ret ++*/ ++TEST_UNIFORM_Z (tmad_5_f64, svfloat64_t, ++ z0 = svtmad_f64 (z0, z1, 5), ++ z0 = svtmad (z0, z1, 5)) ++ ++/* ++** tmad_6_f64: ++** ftmad z0\.d, z0\.d, z1\.d, #6 ++** ret ++*/ ++TEST_UNIFORM_Z (tmad_6_f64, svfloat64_t, ++ z0 = svtmad_f64 (z0, z1, 6), ++ z0 = svtmad (z0, z1, 6)) ++ ++/* ++** tmad_7_f64: ++** ftmad z0\.d, z0\.d, z1\.d, #7 ++** ret ++*/ ++TEST_UNIFORM_Z (tmad_7_f64, svfloat64_t, ++ z0 = svtmad_f64 (z0, z1, 7), ++ z0 = svtmad (z0, z1, 7)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_b16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_b16.c +new file mode 100644 +index 000000000..902f8c397 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_b16.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn1_b16_tied1: ++** trn1 p0\.h, p0\.h, p1\.h ++** ret ++*/ ++TEST_UNIFORM_P (trn1_b16_tied1, ++ p0 = svtrn1_b16 (p0, p1), ++ p0 = svtrn1_b16 (p0, p1)) ++ ++/* ++** trn1_b16_tied2: ++** trn1 p0\.h, p1\.h, p0\.h ++** ret ++*/ ++TEST_UNIFORM_P (trn1_b16_tied2, ++ p0 = svtrn1_b16 (p1, p0), ++ p0 = svtrn1_b16 (p1, p0)) ++ ++/* ++** trn1_b16_untied: ++** trn1 p0\.h, p1\.h, p2\.h ++** ret ++*/ ++TEST_UNIFORM_P (trn1_b16_untied, ++ p0 = svtrn1_b16 (p1, p2), ++ p0 = svtrn1_b16 (p1, p2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_b32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_b32.c +new file mode 100644 +index 000000000..8c9ed5152 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_b32.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn1_b32_tied1: ++** trn1 p0\.s, p0\.s, p1\.s ++** ret ++*/ ++TEST_UNIFORM_P (trn1_b32_tied1, ++ p0 = svtrn1_b32 (p0, p1), ++ p0 = svtrn1_b32 (p0, p1)) ++ ++/* ++** trn1_b32_tied2: ++** trn1 p0\.s, p1\.s, p0\.s ++** ret ++*/ ++TEST_UNIFORM_P (trn1_b32_tied2, ++ p0 = svtrn1_b32 (p1, p0), ++ p0 = svtrn1_b32 (p1, p0)) ++ ++/* ++** trn1_b32_untied: ++** trn1 p0\.s, p1\.s, p2\.s ++** ret ++*/ ++TEST_UNIFORM_P (trn1_b32_untied, ++ p0 = svtrn1_b32 (p1, p2), ++ p0 = svtrn1_b32 (p1, p2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_b64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_b64.c +new file mode 100644 +index 000000000..55b00571d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_b64.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn1_b64_tied1: ++** trn1 p0\.d, p0\.d, p1\.d ++** ret ++*/ ++TEST_UNIFORM_P (trn1_b64_tied1, ++ p0 = svtrn1_b64 (p0, p1), ++ p0 = svtrn1_b64 (p0, p1)) ++ ++/* ++** trn1_b64_tied2: ++** trn1 p0\.d, p1\.d, p0\.d ++** ret ++*/ ++TEST_UNIFORM_P (trn1_b64_tied2, ++ p0 = svtrn1_b64 (p1, p0), ++ p0 = svtrn1_b64 (p1, p0)) ++ ++/* ++** trn1_b64_untied: ++** trn1 p0\.d, p1\.d, p2\.d ++** ret ++*/ ++TEST_UNIFORM_P (trn1_b64_untied, ++ p0 = svtrn1_b64 (p1, p2), ++ p0 = svtrn1_b64 (p1, p2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_b8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_b8.c +new file mode 100644 +index 000000000..4b5e80fbe +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_b8.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn1_b8_tied1: ++** trn1 p0\.b, p0\.b, p1\.b ++** ret ++*/ ++TEST_UNIFORM_P (trn1_b8_tied1, ++ p0 = svtrn1_b8 (p0, p1), ++ p0 = svtrn1_b8 (p0, p1)) ++ ++/* ++** trn1_b8_tied2: ++** trn1 p0\.b, p1\.b, p0\.b ++** ret ++*/ ++TEST_UNIFORM_P (trn1_b8_tied2, ++ p0 = svtrn1_b8 (p1, p0), ++ p0 = svtrn1_b8 (p1, p0)) ++ ++/* ++** trn1_b8_untied: ++** trn1 p0\.b, p1\.b, p2\.b ++** ret ++*/ ++TEST_UNIFORM_P (trn1_b8_untied, ++ p0 = svtrn1_b8 (p1, p2), ++ p0 = svtrn1_b8 (p1, p2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_bf16.c +new file mode 100644 +index 000000000..b04c7da4f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_bf16.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn1_bf16_tied1: ++** trn1 z0\.h, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (trn1_bf16_tied1, svbfloat16_t, ++ z0 = svtrn1_bf16 (z0, z1), ++ z0 = svtrn1 (z0, z1)) ++ ++/* ++** trn1_bf16_tied2: ++** trn1 z0\.h, z1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (trn1_bf16_tied2, svbfloat16_t, ++ z0 = svtrn1_bf16 (z1, z0), ++ z0 = svtrn1 (z1, z0)) ++ ++/* ++** trn1_bf16_untied: ++** trn1 z0\.h, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (trn1_bf16_untied, svbfloat16_t, ++ z0 = svtrn1_bf16 (z1, z2), ++ z0 = svtrn1 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_f16.c +new file mode 100644 +index 000000000..373eb9dd9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_f16.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn1_f16_tied1: ++** trn1 z0\.h, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (trn1_f16_tied1, svfloat16_t, ++ z0 = svtrn1_f16 (z0, z1), ++ z0 = svtrn1 (z0, z1)) ++ ++/* ++** trn1_f16_tied2: ++** trn1 z0\.h, z1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (trn1_f16_tied2, svfloat16_t, ++ z0 = svtrn1_f16 (z1, z0), ++ z0 = svtrn1 (z1, z0)) ++ ++/* ++** trn1_f16_untied: ++** trn1 z0\.h, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (trn1_f16_untied, svfloat16_t, ++ z0 = svtrn1_f16 (z1, z2), ++ z0 = svtrn1 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_f32.c +new file mode 100644 +index 000000000..ccd84d94e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_f32.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn1_f32_tied1: ++** trn1 z0\.s, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (trn1_f32_tied1, svfloat32_t, ++ z0 = svtrn1_f32 (z0, z1), ++ z0 = svtrn1 (z0, z1)) ++ ++/* ++** trn1_f32_tied2: ++** trn1 z0\.s, z1\.s, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (trn1_f32_tied2, svfloat32_t, ++ z0 = svtrn1_f32 (z1, z0), ++ z0 = svtrn1 (z1, z0)) ++ ++/* ++** trn1_f32_untied: ++** trn1 z0\.s, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (trn1_f32_untied, svfloat32_t, ++ z0 = svtrn1_f32 (z1, z2), ++ z0 = svtrn1 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_f64.c +new file mode 100644 +index 000000000..d3cc51948 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_f64.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn1_f64_tied1: ++** trn1 z0\.d, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (trn1_f64_tied1, svfloat64_t, ++ z0 = svtrn1_f64 (z0, z1), ++ z0 = svtrn1 (z0, z1)) ++ ++/* ++** trn1_f64_tied2: ++** trn1 z0\.d, z1\.d, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (trn1_f64_tied2, svfloat64_t, ++ z0 = svtrn1_f64 (z1, z0), ++ z0 = svtrn1 (z1, z0)) ++ ++/* ++** trn1_f64_untied: ++** trn1 z0\.d, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (trn1_f64_untied, svfloat64_t, ++ z0 = svtrn1_f64 (z1, z2), ++ z0 = svtrn1 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_s16.c +new file mode 100644 +index 000000000..466bb8c02 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_s16.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn1_s16_tied1: ++** trn1 z0\.h, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (trn1_s16_tied1, svint16_t, ++ z0 = svtrn1_s16 (z0, z1), ++ z0 = svtrn1 (z0, z1)) ++ ++/* ++** trn1_s16_tied2: ++** trn1 z0\.h, z1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (trn1_s16_tied2, svint16_t, ++ z0 = svtrn1_s16 (z1, z0), ++ z0 = svtrn1 (z1, z0)) ++ ++/* ++** trn1_s16_untied: ++** trn1 z0\.h, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (trn1_s16_untied, svint16_t, ++ z0 = svtrn1_s16 (z1, z2), ++ z0 = svtrn1 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_s32.c +new file mode 100644 +index 000000000..24655e622 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_s32.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn1_s32_tied1: ++** trn1 z0\.s, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (trn1_s32_tied1, svint32_t, ++ z0 = svtrn1_s32 (z0, z1), ++ z0 = svtrn1 (z0, z1)) ++ ++/* ++** trn1_s32_tied2: ++** trn1 z0\.s, z1\.s, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (trn1_s32_tied2, svint32_t, ++ z0 = svtrn1_s32 (z1, z0), ++ z0 = svtrn1 (z1, z0)) ++ ++/* ++** trn1_s32_untied: ++** trn1 z0\.s, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (trn1_s32_untied, svint32_t, ++ z0 = svtrn1_s32 (z1, z2), ++ z0 = svtrn1 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_s64.c +new file mode 100644 +index 000000000..553fb610b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_s64.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn1_s64_tied1: ++** trn1 z0\.d, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (trn1_s64_tied1, svint64_t, ++ z0 = svtrn1_s64 (z0, z1), ++ z0 = svtrn1 (z0, z1)) ++ ++/* ++** trn1_s64_tied2: ++** trn1 z0\.d, z1\.d, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (trn1_s64_tied2, svint64_t, ++ z0 = svtrn1_s64 (z1, z0), ++ z0 = svtrn1 (z1, z0)) ++ ++/* ++** trn1_s64_untied: ++** trn1 z0\.d, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (trn1_s64_untied, svint64_t, ++ z0 = svtrn1_s64 (z1, z2), ++ z0 = svtrn1 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_s8.c +new file mode 100644 +index 000000000..1fa150792 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_s8.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn1_s8_tied1: ++** trn1 z0\.b, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (trn1_s8_tied1, svint8_t, ++ z0 = svtrn1_s8 (z0, z1), ++ z0 = svtrn1 (z0, z1)) ++ ++/* ++** trn1_s8_tied2: ++** trn1 z0\.b, z1\.b, z0\.b ++** ret ++*/ ++TEST_UNIFORM_Z (trn1_s8_tied2, svint8_t, ++ z0 = svtrn1_s8 (z1, z0), ++ z0 = svtrn1 (z1, z0)) ++ ++/* ++** trn1_s8_untied: ++** trn1 z0\.b, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (trn1_s8_untied, svint8_t, ++ z0 = svtrn1_s8 (z1, z2), ++ z0 = svtrn1 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_u16.c +new file mode 100644 +index 000000000..a3ce936f3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_u16.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn1_u16_tied1: ++** trn1 z0\.h, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (trn1_u16_tied1, svuint16_t, ++ z0 = svtrn1_u16 (z0, z1), ++ z0 = svtrn1 (z0, z1)) ++ ++/* ++** trn1_u16_tied2: ++** trn1 z0\.h, z1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (trn1_u16_tied2, svuint16_t, ++ z0 = svtrn1_u16 (z1, z0), ++ z0 = svtrn1 (z1, z0)) ++ ++/* ++** trn1_u16_untied: ++** trn1 z0\.h, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (trn1_u16_untied, svuint16_t, ++ z0 = svtrn1_u16 (z1, z2), ++ z0 = svtrn1 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_u32.c +new file mode 100644 +index 000000000..b14d7a67a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_u32.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn1_u32_tied1: ++** trn1 z0\.s, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (trn1_u32_tied1, svuint32_t, ++ z0 = svtrn1_u32 (z0, z1), ++ z0 = svtrn1 (z0, z1)) ++ ++/* ++** trn1_u32_tied2: ++** trn1 z0\.s, z1\.s, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (trn1_u32_tied2, svuint32_t, ++ z0 = svtrn1_u32 (z1, z0), ++ z0 = svtrn1 (z1, z0)) ++ ++/* ++** trn1_u32_untied: ++** trn1 z0\.s, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (trn1_u32_untied, svuint32_t, ++ z0 = svtrn1_u32 (z1, z2), ++ z0 = svtrn1 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_u64.c +new file mode 100644 +index 000000000..2ccda1d72 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_u64.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn1_u64_tied1: ++** trn1 z0\.d, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (trn1_u64_tied1, svuint64_t, ++ z0 = svtrn1_u64 (z0, z1), ++ z0 = svtrn1 (z0, z1)) ++ ++/* ++** trn1_u64_tied2: ++** trn1 z0\.d, z1\.d, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (trn1_u64_tied2, svuint64_t, ++ z0 = svtrn1_u64 (z1, z0), ++ z0 = svtrn1 (z1, z0)) ++ ++/* ++** trn1_u64_untied: ++** trn1 z0\.d, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (trn1_u64_untied, svuint64_t, ++ z0 = svtrn1_u64 (z1, z2), ++ z0 = svtrn1 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_u8.c +new file mode 100644 +index 000000000..84f8d31e8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_u8.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn1_u8_tied1: ++** trn1 z0\.b, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (trn1_u8_tied1, svuint8_t, ++ z0 = svtrn1_u8 (z0, z1), ++ z0 = svtrn1 (z0, z1)) ++ ++/* ++** trn1_u8_tied2: ++** trn1 z0\.b, z1\.b, z0\.b ++** ret ++*/ ++TEST_UNIFORM_Z (trn1_u8_tied2, svuint8_t, ++ z0 = svtrn1_u8 (z1, z0), ++ z0 = svtrn1 (z1, z0)) ++ ++/* ++** trn1_u8_untied: ++** trn1 z0\.b, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (trn1_u8_untied, svuint8_t, ++ z0 = svtrn1_u8 (z1, z2), ++ z0 = svtrn1 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_bf16.c +new file mode 100644 +index 000000000..f1810da9e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_bf16.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn1q_bf16_tied1: ++** trn1 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn1q_bf16_tied1, svbfloat16_t, ++ z0 = svtrn1q_bf16 (z0, z1), ++ z0 = svtrn1q (z0, z1)) ++ ++/* ++** trn1q_bf16_tied2: ++** trn1 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn1q_bf16_tied2, svbfloat16_t, ++ z0 = svtrn1q_bf16 (z1, z0), ++ z0 = svtrn1q (z1, z0)) ++ ++/* ++** trn1q_bf16_untied: ++** trn1 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn1q_bf16_untied, svbfloat16_t, ++ z0 = svtrn1q_bf16 (z1, z2), ++ z0 = svtrn1q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_f16.c +new file mode 100644 +index 000000000..6420d0f0a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_f16.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn1q_f16_tied1: ++** trn1 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn1q_f16_tied1, svfloat16_t, ++ z0 = svtrn1q_f16 (z0, z1), ++ z0 = svtrn1q (z0, z1)) ++ ++/* ++** trn1q_f16_tied2: ++** trn1 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn1q_f16_tied2, svfloat16_t, ++ z0 = svtrn1q_f16 (z1, z0), ++ z0 = svtrn1q (z1, z0)) ++ ++/* ++** trn1q_f16_untied: ++** trn1 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn1q_f16_untied, svfloat16_t, ++ z0 = svtrn1q_f16 (z1, z2), ++ z0 = svtrn1q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_f32.c +new file mode 100644 +index 000000000..6fb2eecf5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_f32.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn1q_f32_tied1: ++** trn1 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn1q_f32_tied1, svfloat32_t, ++ z0 = svtrn1q_f32 (z0, z1), ++ z0 = svtrn1q (z0, z1)) ++ ++/* ++** trn1q_f32_tied2: ++** trn1 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn1q_f32_tied2, svfloat32_t, ++ z0 = svtrn1q_f32 (z1, z0), ++ z0 = svtrn1q (z1, z0)) ++ ++/* ++** trn1q_f32_untied: ++** trn1 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn1q_f32_untied, svfloat32_t, ++ z0 = svtrn1q_f32 (z1, z2), ++ z0 = svtrn1q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_f64.c +new file mode 100644 +index 000000000..e786a8d04 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_f64.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn1q_f64_tied1: ++** trn1 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn1q_f64_tied1, svfloat64_t, ++ z0 = svtrn1q_f64 (z0, z1), ++ z0 = svtrn1q (z0, z1)) ++ ++/* ++** trn1q_f64_tied2: ++** trn1 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn1q_f64_tied2, svfloat64_t, ++ z0 = svtrn1q_f64 (z1, z0), ++ z0 = svtrn1q (z1, z0)) ++ ++/* ++** trn1q_f64_untied: ++** trn1 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn1q_f64_untied, svfloat64_t, ++ z0 = svtrn1q_f64 (z1, z2), ++ z0 = svtrn1q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_s16.c +new file mode 100644 +index 000000000..548360719 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_s16.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn1q_s16_tied1: ++** trn1 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn1q_s16_tied1, svint16_t, ++ z0 = svtrn1q_s16 (z0, z1), ++ z0 = svtrn1q (z0, z1)) ++ ++/* ++** trn1q_s16_tied2: ++** trn1 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn1q_s16_tied2, svint16_t, ++ z0 = svtrn1q_s16 (z1, z0), ++ z0 = svtrn1q (z1, z0)) ++ ++/* ++** trn1q_s16_untied: ++** trn1 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn1q_s16_untied, svint16_t, ++ z0 = svtrn1q_s16 (z1, z2), ++ z0 = svtrn1q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_s32.c +new file mode 100644 +index 000000000..ccb8319f7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_s32.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn1q_s32_tied1: ++** trn1 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn1q_s32_tied1, svint32_t, ++ z0 = svtrn1q_s32 (z0, z1), ++ z0 = svtrn1q (z0, z1)) ++ ++/* ++** trn1q_s32_tied2: ++** trn1 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn1q_s32_tied2, svint32_t, ++ z0 = svtrn1q_s32 (z1, z0), ++ z0 = svtrn1q (z1, z0)) ++ ++/* ++** trn1q_s32_untied: ++** trn1 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn1q_s32_untied, svint32_t, ++ z0 = svtrn1q_s32 (z1, z2), ++ z0 = svtrn1q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_s64.c +new file mode 100644 +index 000000000..fe8125a8a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_s64.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn1q_s64_tied1: ++** trn1 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn1q_s64_tied1, svint64_t, ++ z0 = svtrn1q_s64 (z0, z1), ++ z0 = svtrn1q (z0, z1)) ++ ++/* ++** trn1q_s64_tied2: ++** trn1 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn1q_s64_tied2, svint64_t, ++ z0 = svtrn1q_s64 (z1, z0), ++ z0 = svtrn1q (z1, z0)) ++ ++/* ++** trn1q_s64_untied: ++** trn1 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn1q_s64_untied, svint64_t, ++ z0 = svtrn1q_s64 (z1, z2), ++ z0 = svtrn1q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_s8.c +new file mode 100644 +index 000000000..48040c1ad +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_s8.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn1q_s8_tied1: ++** trn1 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn1q_s8_tied1, svint8_t, ++ z0 = svtrn1q_s8 (z0, z1), ++ z0 = svtrn1q (z0, z1)) ++ ++/* ++** trn1q_s8_tied2: ++** trn1 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn1q_s8_tied2, svint8_t, ++ z0 = svtrn1q_s8 (z1, z0), ++ z0 = svtrn1q (z1, z0)) ++ ++/* ++** trn1q_s8_untied: ++** trn1 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn1q_s8_untied, svint8_t, ++ z0 = svtrn1q_s8 (z1, z2), ++ z0 = svtrn1q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_u16.c +new file mode 100644 +index 000000000..3657f919e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_u16.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn1q_u16_tied1: ++** trn1 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn1q_u16_tied1, svuint16_t, ++ z0 = svtrn1q_u16 (z0, z1), ++ z0 = svtrn1q (z0, z1)) ++ ++/* ++** trn1q_u16_tied2: ++** trn1 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn1q_u16_tied2, svuint16_t, ++ z0 = svtrn1q_u16 (z1, z0), ++ z0 = svtrn1q (z1, z0)) ++ ++/* ++** trn1q_u16_untied: ++** trn1 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn1q_u16_untied, svuint16_t, ++ z0 = svtrn1q_u16 (z1, z2), ++ z0 = svtrn1q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_u32.c +new file mode 100644 +index 000000000..cc5ea2878 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_u32.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn1q_u32_tied1: ++** trn1 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn1q_u32_tied1, svuint32_t, ++ z0 = svtrn1q_u32 (z0, z1), ++ z0 = svtrn1q (z0, z1)) ++ ++/* ++** trn1q_u32_tied2: ++** trn1 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn1q_u32_tied2, svuint32_t, ++ z0 = svtrn1q_u32 (z1, z0), ++ z0 = svtrn1q (z1, z0)) ++ ++/* ++** trn1q_u32_untied: ++** trn1 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn1q_u32_untied, svuint32_t, ++ z0 = svtrn1q_u32 (z1, z2), ++ z0 = svtrn1q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_u64.c +new file mode 100644 +index 000000000..4435b53d0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_u64.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn1q_u64_tied1: ++** trn1 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn1q_u64_tied1, svuint64_t, ++ z0 = svtrn1q_u64 (z0, z1), ++ z0 = svtrn1q (z0, z1)) ++ ++/* ++** trn1q_u64_tied2: ++** trn1 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn1q_u64_tied2, svuint64_t, ++ z0 = svtrn1q_u64 (z1, z0), ++ z0 = svtrn1q (z1, z0)) ++ ++/* ++** trn1q_u64_untied: ++** trn1 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn1q_u64_untied, svuint64_t, ++ z0 = svtrn1q_u64 (z1, z2), ++ z0 = svtrn1q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_u8.c +new file mode 100644 +index 000000000..4ebfedbea +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_u8.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn1q_u8_tied1: ++** trn1 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn1q_u8_tied1, svuint8_t, ++ z0 = svtrn1q_u8 (z0, z1), ++ z0 = svtrn1q (z0, z1)) ++ ++/* ++** trn1q_u8_tied2: ++** trn1 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn1q_u8_tied2, svuint8_t, ++ z0 = svtrn1q_u8 (z1, z0), ++ z0 = svtrn1q (z1, z0)) ++ ++/* ++** trn1q_u8_untied: ++** trn1 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn1q_u8_untied, svuint8_t, ++ z0 = svtrn1q_u8 (z1, z2), ++ z0 = svtrn1q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_b16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_b16.c +new file mode 100644 +index 000000000..54b593afe +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_b16.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn2_b16_tied1: ++** trn2 p0\.h, p0\.h, p1\.h ++** ret ++*/ ++TEST_UNIFORM_P (trn2_b16_tied1, ++ p0 = svtrn2_b16 (p0, p1), ++ p0 = svtrn2_b16 (p0, p1)) ++ ++/* ++** trn2_b16_tied2: ++** trn2 p0\.h, p1\.h, p0\.h ++** ret ++*/ ++TEST_UNIFORM_P (trn2_b16_tied2, ++ p0 = svtrn2_b16 (p1, p0), ++ p0 = svtrn2_b16 (p1, p0)) ++ ++/* ++** trn2_b16_untied: ++** trn2 p0\.h, p1\.h, p2\.h ++** ret ++*/ ++TEST_UNIFORM_P (trn2_b16_untied, ++ p0 = svtrn2_b16 (p1, p2), ++ p0 = svtrn2_b16 (p1, p2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_b32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_b32.c +new file mode 100644 +index 000000000..ead3d85cf +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_b32.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn2_b32_tied1: ++** trn2 p0\.s, p0\.s, p1\.s ++** ret ++*/ ++TEST_UNIFORM_P (trn2_b32_tied1, ++ p0 = svtrn2_b32 (p0, p1), ++ p0 = svtrn2_b32 (p0, p1)) ++ ++/* ++** trn2_b32_tied2: ++** trn2 p0\.s, p1\.s, p0\.s ++** ret ++*/ ++TEST_UNIFORM_P (trn2_b32_tied2, ++ p0 = svtrn2_b32 (p1, p0), ++ p0 = svtrn2_b32 (p1, p0)) ++ ++/* ++** trn2_b32_untied: ++** trn2 p0\.s, p1\.s, p2\.s ++** ret ++*/ ++TEST_UNIFORM_P (trn2_b32_untied, ++ p0 = svtrn2_b32 (p1, p2), ++ p0 = svtrn2_b32 (p1, p2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_b64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_b64.c +new file mode 100644 +index 000000000..ccca03557 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_b64.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn2_b64_tied1: ++** trn2 p0\.d, p0\.d, p1\.d ++** ret ++*/ ++TEST_UNIFORM_P (trn2_b64_tied1, ++ p0 = svtrn2_b64 (p0, p1), ++ p0 = svtrn2_b64 (p0, p1)) ++ ++/* ++** trn2_b64_tied2: ++** trn2 p0\.d, p1\.d, p0\.d ++** ret ++*/ ++TEST_UNIFORM_P (trn2_b64_tied2, ++ p0 = svtrn2_b64 (p1, p0), ++ p0 = svtrn2_b64 (p1, p0)) ++ ++/* ++** trn2_b64_untied: ++** trn2 p0\.d, p1\.d, p2\.d ++** ret ++*/ ++TEST_UNIFORM_P (trn2_b64_untied, ++ p0 = svtrn2_b64 (p1, p2), ++ p0 = svtrn2_b64 (p1, p2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_b8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_b8.c +new file mode 100644 +index 000000000..7b0803e79 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_b8.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn2_b8_tied1: ++** trn2 p0\.b, p0\.b, p1\.b ++** ret ++*/ ++TEST_UNIFORM_P (trn2_b8_tied1, ++ p0 = svtrn2_b8 (p0, p1), ++ p0 = svtrn2_b8 (p0, p1)) ++ ++/* ++** trn2_b8_tied2: ++** trn2 p0\.b, p1\.b, p0\.b ++** ret ++*/ ++TEST_UNIFORM_P (trn2_b8_tied2, ++ p0 = svtrn2_b8 (p1, p0), ++ p0 = svtrn2_b8 (p1, p0)) ++ ++/* ++** trn2_b8_untied: ++** trn2 p0\.b, p1\.b, p2\.b ++** ret ++*/ ++TEST_UNIFORM_P (trn2_b8_untied, ++ p0 = svtrn2_b8 (p1, p2), ++ p0 = svtrn2_b8 (p1, p2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_bf16.c +new file mode 100644 +index 000000000..12028b0f6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_bf16.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn2_bf16_tied1: ++** trn2 z0\.h, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (trn2_bf16_tied1, svbfloat16_t, ++ z0 = svtrn2_bf16 (z0, z1), ++ z0 = svtrn2 (z0, z1)) ++ ++/* ++** trn2_bf16_tied2: ++** trn2 z0\.h, z1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (trn2_bf16_tied2, svbfloat16_t, ++ z0 = svtrn2_bf16 (z1, z0), ++ z0 = svtrn2 (z1, z0)) ++ ++/* ++** trn2_bf16_untied: ++** trn2 z0\.h, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (trn2_bf16_untied, svbfloat16_t, ++ z0 = svtrn2_bf16 (z1, z2), ++ z0 = svtrn2 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_f16.c +new file mode 100644 +index 000000000..112567725 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_f16.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn2_f16_tied1: ++** trn2 z0\.h, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (trn2_f16_tied1, svfloat16_t, ++ z0 = svtrn2_f16 (z0, z1), ++ z0 = svtrn2 (z0, z1)) ++ ++/* ++** trn2_f16_tied2: ++** trn2 z0\.h, z1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (trn2_f16_tied2, svfloat16_t, ++ z0 = svtrn2_f16 (z1, z0), ++ z0 = svtrn2 (z1, z0)) ++ ++/* ++** trn2_f16_untied: ++** trn2 z0\.h, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (trn2_f16_untied, svfloat16_t, ++ z0 = svtrn2_f16 (z1, z2), ++ z0 = svtrn2 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_f32.c +new file mode 100644 +index 000000000..daee566cc +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_f32.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn2_f32_tied1: ++** trn2 z0\.s, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (trn2_f32_tied1, svfloat32_t, ++ z0 = svtrn2_f32 (z0, z1), ++ z0 = svtrn2 (z0, z1)) ++ ++/* ++** trn2_f32_tied2: ++** trn2 z0\.s, z1\.s, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (trn2_f32_tied2, svfloat32_t, ++ z0 = svtrn2_f32 (z1, z0), ++ z0 = svtrn2 (z1, z0)) ++ ++/* ++** trn2_f32_untied: ++** trn2 z0\.s, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (trn2_f32_untied, svfloat32_t, ++ z0 = svtrn2_f32 (z1, z2), ++ z0 = svtrn2 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_f64.c +new file mode 100644 +index 000000000..338fee49f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_f64.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn2_f64_tied1: ++** trn2 z0\.d, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (trn2_f64_tied1, svfloat64_t, ++ z0 = svtrn2_f64 (z0, z1), ++ z0 = svtrn2 (z0, z1)) ++ ++/* ++** trn2_f64_tied2: ++** trn2 z0\.d, z1\.d, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (trn2_f64_tied2, svfloat64_t, ++ z0 = svtrn2_f64 (z1, z0), ++ z0 = svtrn2 (z1, z0)) ++ ++/* ++** trn2_f64_untied: ++** trn2 z0\.d, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (trn2_f64_untied, svfloat64_t, ++ z0 = svtrn2_f64 (z1, z2), ++ z0 = svtrn2 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_s16.c +new file mode 100644 +index 000000000..93f63de5e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_s16.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn2_s16_tied1: ++** trn2 z0\.h, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (trn2_s16_tied1, svint16_t, ++ z0 = svtrn2_s16 (z0, z1), ++ z0 = svtrn2 (z0, z1)) ++ ++/* ++** trn2_s16_tied2: ++** trn2 z0\.h, z1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (trn2_s16_tied2, svint16_t, ++ z0 = svtrn2_s16 (z1, z0), ++ z0 = svtrn2 (z1, z0)) ++ ++/* ++** trn2_s16_untied: ++** trn2 z0\.h, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (trn2_s16_untied, svint16_t, ++ z0 = svtrn2_s16 (z1, z2), ++ z0 = svtrn2 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_s32.c +new file mode 100644 +index 000000000..82edd72f7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_s32.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn2_s32_tied1: ++** trn2 z0\.s, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (trn2_s32_tied1, svint32_t, ++ z0 = svtrn2_s32 (z0, z1), ++ z0 = svtrn2 (z0, z1)) ++ ++/* ++** trn2_s32_tied2: ++** trn2 z0\.s, z1\.s, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (trn2_s32_tied2, svint32_t, ++ z0 = svtrn2_s32 (z1, z0), ++ z0 = svtrn2 (z1, z0)) ++ ++/* ++** trn2_s32_untied: ++** trn2 z0\.s, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (trn2_s32_untied, svint32_t, ++ z0 = svtrn2_s32 (z1, z2), ++ z0 = svtrn2 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_s64.c +new file mode 100644 +index 000000000..5f43441d5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_s64.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn2_s64_tied1: ++** trn2 z0\.d, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (trn2_s64_tied1, svint64_t, ++ z0 = svtrn2_s64 (z0, z1), ++ z0 = svtrn2 (z0, z1)) ++ ++/* ++** trn2_s64_tied2: ++** trn2 z0\.d, z1\.d, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (trn2_s64_tied2, svint64_t, ++ z0 = svtrn2_s64 (z1, z0), ++ z0 = svtrn2 (z1, z0)) ++ ++/* ++** trn2_s64_untied: ++** trn2 z0\.d, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (trn2_s64_untied, svint64_t, ++ z0 = svtrn2_s64 (z1, z2), ++ z0 = svtrn2 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_s8.c +new file mode 100644 +index 000000000..716538119 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_s8.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn2_s8_tied1: ++** trn2 z0\.b, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (trn2_s8_tied1, svint8_t, ++ z0 = svtrn2_s8 (z0, z1), ++ z0 = svtrn2 (z0, z1)) ++ ++/* ++** trn2_s8_tied2: ++** trn2 z0\.b, z1\.b, z0\.b ++** ret ++*/ ++TEST_UNIFORM_Z (trn2_s8_tied2, svint8_t, ++ z0 = svtrn2_s8 (z1, z0), ++ z0 = svtrn2 (z1, z0)) ++ ++/* ++** trn2_s8_untied: ++** trn2 z0\.b, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (trn2_s8_untied, svint8_t, ++ z0 = svtrn2_s8 (z1, z2), ++ z0 = svtrn2 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_u16.c +new file mode 100644 +index 000000000..e68d233b8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_u16.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn2_u16_tied1: ++** trn2 z0\.h, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (trn2_u16_tied1, svuint16_t, ++ z0 = svtrn2_u16 (z0, z1), ++ z0 = svtrn2 (z0, z1)) ++ ++/* ++** trn2_u16_tied2: ++** trn2 z0\.h, z1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (trn2_u16_tied2, svuint16_t, ++ z0 = svtrn2_u16 (z1, z0), ++ z0 = svtrn2 (z1, z0)) ++ ++/* ++** trn2_u16_untied: ++** trn2 z0\.h, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (trn2_u16_untied, svuint16_t, ++ z0 = svtrn2_u16 (z1, z2), ++ z0 = svtrn2 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_u32.c +new file mode 100644 +index 000000000..e48aad179 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_u32.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn2_u32_tied1: ++** trn2 z0\.s, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (trn2_u32_tied1, svuint32_t, ++ z0 = svtrn2_u32 (z0, z1), ++ z0 = svtrn2 (z0, z1)) ++ ++/* ++** trn2_u32_tied2: ++** trn2 z0\.s, z1\.s, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (trn2_u32_tied2, svuint32_t, ++ z0 = svtrn2_u32 (z1, z0), ++ z0 = svtrn2 (z1, z0)) ++ ++/* ++** trn2_u32_untied: ++** trn2 z0\.s, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (trn2_u32_untied, svuint32_t, ++ z0 = svtrn2_u32 (z1, z2), ++ z0 = svtrn2 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_u64.c +new file mode 100644 +index 000000000..aa452275b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_u64.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn2_u64_tied1: ++** trn2 z0\.d, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (trn2_u64_tied1, svuint64_t, ++ z0 = svtrn2_u64 (z0, z1), ++ z0 = svtrn2 (z0, z1)) ++ ++/* ++** trn2_u64_tied2: ++** trn2 z0\.d, z1\.d, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (trn2_u64_tied2, svuint64_t, ++ z0 = svtrn2_u64 (z1, z0), ++ z0 = svtrn2 (z1, z0)) ++ ++/* ++** trn2_u64_untied: ++** trn2 z0\.d, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (trn2_u64_untied, svuint64_t, ++ z0 = svtrn2_u64 (z1, z2), ++ z0 = svtrn2 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_u8.c +new file mode 100644 +index 000000000..cb26b2338 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_u8.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn2_u8_tied1: ++** trn2 z0\.b, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (trn2_u8_tied1, svuint8_t, ++ z0 = svtrn2_u8 (z0, z1), ++ z0 = svtrn2 (z0, z1)) ++ ++/* ++** trn2_u8_tied2: ++** trn2 z0\.b, z1\.b, z0\.b ++** ret ++*/ ++TEST_UNIFORM_Z (trn2_u8_tied2, svuint8_t, ++ z0 = svtrn2_u8 (z1, z0), ++ z0 = svtrn2 (z1, z0)) ++ ++/* ++** trn2_u8_untied: ++** trn2 z0\.b, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (trn2_u8_untied, svuint8_t, ++ z0 = svtrn2_u8 (z1, z2), ++ z0 = svtrn2 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_bf16.c +new file mode 100644 +index 000000000..5623b54f0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_bf16.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn2q_bf16_tied1: ++** trn2 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn2q_bf16_tied1, svbfloat16_t, ++ z0 = svtrn2q_bf16 (z0, z1), ++ z0 = svtrn2q (z0, z1)) ++ ++/* ++** trn2q_bf16_tied2: ++** trn2 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn2q_bf16_tied2, svbfloat16_t, ++ z0 = svtrn2q_bf16 (z1, z0), ++ z0 = svtrn2q (z1, z0)) ++ ++/* ++** trn2q_bf16_untied: ++** trn2 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn2q_bf16_untied, svbfloat16_t, ++ z0 = svtrn2q_bf16 (z1, z2), ++ z0 = svtrn2q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_f16.c +new file mode 100644 +index 000000000..db2190929 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_f16.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn2q_f16_tied1: ++** trn2 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn2q_f16_tied1, svfloat16_t, ++ z0 = svtrn2q_f16 (z0, z1), ++ z0 = svtrn2q (z0, z1)) ++ ++/* ++** trn2q_f16_tied2: ++** trn2 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn2q_f16_tied2, svfloat16_t, ++ z0 = svtrn2q_f16 (z1, z0), ++ z0 = svtrn2q (z1, z0)) ++ ++/* ++** trn2q_f16_untied: ++** trn2 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn2q_f16_untied, svfloat16_t, ++ z0 = svtrn2q_f16 (z1, z2), ++ z0 = svtrn2q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_f32.c +new file mode 100644 +index 000000000..1367a1e06 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_f32.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn2q_f32_tied1: ++** trn2 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn2q_f32_tied1, svfloat32_t, ++ z0 = svtrn2q_f32 (z0, z1), ++ z0 = svtrn2q (z0, z1)) ++ ++/* ++** trn2q_f32_tied2: ++** trn2 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn2q_f32_tied2, svfloat32_t, ++ z0 = svtrn2q_f32 (z1, z0), ++ z0 = svtrn2q (z1, z0)) ++ ++/* ++** trn2q_f32_untied: ++** trn2 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn2q_f32_untied, svfloat32_t, ++ z0 = svtrn2q_f32 (z1, z2), ++ z0 = svtrn2q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_f64.c +new file mode 100644 +index 000000000..54325e705 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_f64.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn2q_f64_tied1: ++** trn2 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn2q_f64_tied1, svfloat64_t, ++ z0 = svtrn2q_f64 (z0, z1), ++ z0 = svtrn2q (z0, z1)) ++ ++/* ++** trn2q_f64_tied2: ++** trn2 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn2q_f64_tied2, svfloat64_t, ++ z0 = svtrn2q_f64 (z1, z0), ++ z0 = svtrn2q (z1, z0)) ++ ++/* ++** trn2q_f64_untied: ++** trn2 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn2q_f64_untied, svfloat64_t, ++ z0 = svtrn2q_f64 (z1, z2), ++ z0 = svtrn2q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_s16.c +new file mode 100644 +index 000000000..a0b641278 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_s16.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn2q_s16_tied1: ++** trn2 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn2q_s16_tied1, svint16_t, ++ z0 = svtrn2q_s16 (z0, z1), ++ z0 = svtrn2q (z0, z1)) ++ ++/* ++** trn2q_s16_tied2: ++** trn2 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn2q_s16_tied2, svint16_t, ++ z0 = svtrn2q_s16 (z1, z0), ++ z0 = svtrn2q (z1, z0)) ++ ++/* ++** trn2q_s16_untied: ++** trn2 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn2q_s16_untied, svint16_t, ++ z0 = svtrn2q_s16 (z1, z2), ++ z0 = svtrn2q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_s32.c +new file mode 100644 +index 000000000..7c128c6ef +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_s32.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn2q_s32_tied1: ++** trn2 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn2q_s32_tied1, svint32_t, ++ z0 = svtrn2q_s32 (z0, z1), ++ z0 = svtrn2q (z0, z1)) ++ ++/* ++** trn2q_s32_tied2: ++** trn2 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn2q_s32_tied2, svint32_t, ++ z0 = svtrn2q_s32 (z1, z0), ++ z0 = svtrn2q (z1, z0)) ++ ++/* ++** trn2q_s32_untied: ++** trn2 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn2q_s32_untied, svint32_t, ++ z0 = svtrn2q_s32 (z1, z2), ++ z0 = svtrn2q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_s64.c +new file mode 100644 +index 000000000..f22222525 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_s64.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn2q_s64_tied1: ++** trn2 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn2q_s64_tied1, svint64_t, ++ z0 = svtrn2q_s64 (z0, z1), ++ z0 = svtrn2q (z0, z1)) ++ ++/* ++** trn2q_s64_tied2: ++** trn2 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn2q_s64_tied2, svint64_t, ++ z0 = svtrn2q_s64 (z1, z0), ++ z0 = svtrn2q (z1, z0)) ++ ++/* ++** trn2q_s64_untied: ++** trn2 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn2q_s64_untied, svint64_t, ++ z0 = svtrn2q_s64 (z1, z2), ++ z0 = svtrn2q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_s8.c +new file mode 100644 +index 000000000..bd5243f35 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_s8.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn2q_s8_tied1: ++** trn2 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn2q_s8_tied1, svint8_t, ++ z0 = svtrn2q_s8 (z0, z1), ++ z0 = svtrn2q (z0, z1)) ++ ++/* ++** trn2q_s8_tied2: ++** trn2 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn2q_s8_tied2, svint8_t, ++ z0 = svtrn2q_s8 (z1, z0), ++ z0 = svtrn2q (z1, z0)) ++ ++/* ++** trn2q_s8_untied: ++** trn2 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn2q_s8_untied, svint8_t, ++ z0 = svtrn2q_s8 (z1, z2), ++ z0 = svtrn2q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_u16.c +new file mode 100644 +index 000000000..8da8563b2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_u16.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn2q_u16_tied1: ++** trn2 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn2q_u16_tied1, svuint16_t, ++ z0 = svtrn2q_u16 (z0, z1), ++ z0 = svtrn2q (z0, z1)) ++ ++/* ++** trn2q_u16_tied2: ++** trn2 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn2q_u16_tied2, svuint16_t, ++ z0 = svtrn2q_u16 (z1, z0), ++ z0 = svtrn2q (z1, z0)) ++ ++/* ++** trn2q_u16_untied: ++** trn2 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn2q_u16_untied, svuint16_t, ++ z0 = svtrn2q_u16 (z1, z2), ++ z0 = svtrn2q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_u32.c +new file mode 100644 +index 000000000..6c0af02da +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_u32.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn2q_u32_tied1: ++** trn2 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn2q_u32_tied1, svuint32_t, ++ z0 = svtrn2q_u32 (z0, z1), ++ z0 = svtrn2q (z0, z1)) ++ ++/* ++** trn2q_u32_tied2: ++** trn2 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn2q_u32_tied2, svuint32_t, ++ z0 = svtrn2q_u32 (z1, z0), ++ z0 = svtrn2q (z1, z0)) ++ ++/* ++** trn2q_u32_untied: ++** trn2 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn2q_u32_untied, svuint32_t, ++ z0 = svtrn2q_u32 (z1, z2), ++ z0 = svtrn2q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_u64.c +new file mode 100644 +index 000000000..857595cbb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_u64.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn2q_u64_tied1: ++** trn2 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn2q_u64_tied1, svuint64_t, ++ z0 = svtrn2q_u64 (z0, z1), ++ z0 = svtrn2q (z0, z1)) ++ ++/* ++** trn2q_u64_tied2: ++** trn2 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn2q_u64_tied2, svuint64_t, ++ z0 = svtrn2q_u64 (z1, z0), ++ z0 = svtrn2q (z1, z0)) ++ ++/* ++** trn2q_u64_untied: ++** trn2 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn2q_u64_untied, svuint64_t, ++ z0 = svtrn2q_u64 (z1, z2), ++ z0 = svtrn2q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_u8.c +new file mode 100644 +index 000000000..1fb85b249 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_u8.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** trn2q_u8_tied1: ++** trn2 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn2q_u8_tied1, svuint8_t, ++ z0 = svtrn2q_u8 (z0, z1), ++ z0 = svtrn2q (z0, z1)) ++ ++/* ++** trn2q_u8_tied2: ++** trn2 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn2q_u8_tied2, svuint8_t, ++ z0 = svtrn2q_u8 (z1, z0), ++ z0 = svtrn2q (z1, z0)) ++ ++/* ++** trn2q_u8_untied: ++** trn2 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (trn2q_u8_untied, svuint8_t, ++ z0 = svtrn2q_u8 (z1, z2), ++ z0 = svtrn2q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tsmul_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tsmul_f16.c +new file mode 100644 +index 000000000..94bc696eb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tsmul_f16.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** tsmul_f16_tied1: ++** ftsmul z0\.h, z0\.h, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (tsmul_f16_tied1, svfloat16_t, svuint16_t, ++ z0 = svtsmul_f16 (z0, z4), ++ z0 = svtsmul (z0, z4)) ++ ++/* ++** tsmul_f16_tied2: ++** ftsmul z0\.h, z4\.h, z0\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (tsmul_f16_tied2, svfloat16_t, svuint16_t, ++ z0_res = svtsmul_f16 (z4, z0), ++ z0_res = svtsmul (z4, z0)) ++ ++/* ++** tsmul_f16_untied: ++** ftsmul z0\.h, z1\.h, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (tsmul_f16_untied, svfloat16_t, svuint16_t, ++ z0 = svtsmul_f16 (z1, z4), ++ z0 = svtsmul (z1, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tsmul_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tsmul_f32.c +new file mode 100644 +index 000000000..d0ec91882 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tsmul_f32.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** tsmul_f32_tied1: ++** ftsmul z0\.s, z0\.s, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (tsmul_f32_tied1, svfloat32_t, svuint32_t, ++ z0 = svtsmul_f32 (z0, z4), ++ z0 = svtsmul (z0, z4)) ++ ++/* ++** tsmul_f32_tied2: ++** ftsmul z0\.s, z4\.s, z0\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (tsmul_f32_tied2, svfloat32_t, svuint32_t, ++ z0_res = svtsmul_f32 (z4, z0), ++ z0_res = svtsmul (z4, z0)) ++ ++/* ++** tsmul_f32_untied: ++** ftsmul z0\.s, z1\.s, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (tsmul_f32_untied, svfloat32_t, svuint32_t, ++ z0 = svtsmul_f32 (z1, z4), ++ z0 = svtsmul (z1, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tsmul_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tsmul_f64.c +new file mode 100644 +index 000000000..23e0da3f7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tsmul_f64.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** tsmul_f64_tied1: ++** ftsmul z0\.d, z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (tsmul_f64_tied1, svfloat64_t, svuint64_t, ++ z0 = svtsmul_f64 (z0, z4), ++ z0 = svtsmul (z0, z4)) ++ ++/* ++** tsmul_f64_tied2: ++** ftsmul z0\.d, z4\.d, z0\.d ++** ret ++*/ ++TEST_DUAL_Z_REV (tsmul_f64_tied2, svfloat64_t, svuint64_t, ++ z0_res = svtsmul_f64 (z4, z0), ++ z0_res = svtsmul (z4, z0)) ++ ++/* ++** tsmul_f64_untied: ++** ftsmul z0\.d, z1\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (tsmul_f64_untied, svfloat64_t, svuint64_t, ++ z0 = svtsmul_f64 (z1, z4), ++ z0 = svtsmul (z1, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tssel_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tssel_f16.c +new file mode 100644 +index 000000000..e7c3ea03b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tssel_f16.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** tssel_f16_tied1: ++** ftssel z0\.h, z0\.h, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (tssel_f16_tied1, svfloat16_t, svuint16_t, ++ z0 = svtssel_f16 (z0, z4), ++ z0 = svtssel (z0, z4)) ++ ++/* ++** tssel_f16_tied2: ++** ftssel z0\.h, z4\.h, z0\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (tssel_f16_tied2, svfloat16_t, svuint16_t, ++ z0_res = svtssel_f16 (z4, z0), ++ z0_res = svtssel (z4, z0)) ++ ++/* ++** tssel_f16_untied: ++** ftssel z0\.h, z1\.h, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (tssel_f16_untied, svfloat16_t, svuint16_t, ++ z0 = svtssel_f16 (z1, z4), ++ z0 = svtssel (z1, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tssel_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tssel_f32.c +new file mode 100644 +index 000000000..022573a19 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tssel_f32.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** tssel_f32_tied1: ++** ftssel z0\.s, z0\.s, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (tssel_f32_tied1, svfloat32_t, svuint32_t, ++ z0 = svtssel_f32 (z0, z4), ++ z0 = svtssel (z0, z4)) ++ ++/* ++** tssel_f32_tied2: ++** ftssel z0\.s, z4\.s, z0\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (tssel_f32_tied2, svfloat32_t, svuint32_t, ++ z0_res = svtssel_f32 (z4, z0), ++ z0_res = svtssel (z4, z0)) ++ ++/* ++** tssel_f32_untied: ++** ftssel z0\.s, z1\.s, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (tssel_f32_untied, svfloat32_t, svuint32_t, ++ z0 = svtssel_f32 (z1, z4), ++ z0 = svtssel (z1, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tssel_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tssel_f64.c +new file mode 100644 +index 000000000..ffcdf4224 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tssel_f64.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** tssel_f64_tied1: ++** ftssel z0\.d, z0\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (tssel_f64_tied1, svfloat64_t, svuint64_t, ++ z0 = svtssel_f64 (z0, z4), ++ z0 = svtssel (z0, z4)) ++ ++/* ++** tssel_f64_tied2: ++** ftssel z0\.d, z4\.d, z0\.d ++** ret ++*/ ++TEST_DUAL_Z_REV (tssel_f64_tied2, svfloat64_t, svuint64_t, ++ z0_res = svtssel_f64 (z4, z0), ++ z0_res = svtssel (z4, z0)) ++ ++/* ++** tssel_f64_untied: ++** ftssel z0\.d, z1\.d, z4\.d ++** ret ++*/ ++TEST_DUAL_Z (tssel_f64_untied, svfloat64_t, svuint64_t, ++ z0 = svtssel_f64 (z1, z4), ++ z0 = svtssel (z1, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/undef2_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/undef2_1.c +new file mode 100644 +index 000000000..fe6c4c7c7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/undef2_1.c +@@ -0,0 +1,87 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** int8: ++** ret ++*/ ++TEST_UNDEF (int8, svint8x2_t, ++ z0 = svundef2_s8 ()) ++ ++/* ++** uint8: ++** ret ++*/ ++TEST_UNDEF (uint8, svuint8x2_t, ++ z0 = svundef2_u8 ()) ++ ++/* ++** int16: ++** ret ++*/ ++TEST_UNDEF (int16, svint16x2_t, ++ z0 = svundef2_s16 ()) ++ ++/* ++** uint16: ++** ret ++*/ ++TEST_UNDEF (uint16, svuint16x2_t, ++ z0 = svundef2_u16 ()) ++ ++/* ++** float16: ++** ret ++*/ ++TEST_UNDEF (float16, svfloat16x2_t, ++ z0 = svundef2_f16 ()) ++ ++/* ++** bfloat16: ++** ret ++*/ ++TEST_UNDEF (bfloat16, svbfloat16x2_t, ++ z0 = svundef2_bf16 ()) ++ ++/* ++** int32: ++** ret ++*/ ++TEST_UNDEF (int32, svint32x2_t, ++ z0 = svundef2_s32 ()) ++ ++/* ++** uint32: ++** ret ++*/ ++TEST_UNDEF (uint32, svuint32x2_t, ++ z0 = svundef2_u32 ()) ++ ++/* ++** float32: ++** ret ++*/ ++TEST_UNDEF (float32, svfloat32x2_t, ++ z0 = svundef2_f32 ()) ++ ++/* ++** int64: ++** ret ++*/ ++TEST_UNDEF (int64, svint64x2_t, ++ z0 = svundef2_s64 ()) ++ ++/* ++** uint64: ++** ret ++*/ ++TEST_UNDEF (uint64, svuint64x2_t, ++ z0 = svundef2_u64 ()) ++ ++/* ++** float64: ++** ret ++*/ ++TEST_UNDEF (float64, svfloat64x2_t, ++ z0 = svundef2_f64 ()) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/undef3_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/undef3_1.c +new file mode 100644 +index 000000000..5c18c6317 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/undef3_1.c +@@ -0,0 +1,87 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** int8: ++** ret ++*/ ++TEST_UNDEF (int8, svint8x3_t, ++ z0 = svundef3_s8 ()) ++ ++/* ++** uint8: ++** ret ++*/ ++TEST_UNDEF (uint8, svuint8x3_t, ++ z0 = svundef3_u8 ()) ++ ++/* ++** int16: ++** ret ++*/ ++TEST_UNDEF (int16, svint16x3_t, ++ z0 = svundef3_s16 ()) ++ ++/* ++** uint16: ++** ret ++*/ ++TEST_UNDEF (uint16, svuint16x3_t, ++ z0 = svundef3_u16 ()) ++ ++/* ++** float16: ++** ret ++*/ ++TEST_UNDEF (float16, svfloat16x3_t, ++ z0 = svundef3_f16 ()) ++ ++/* ++** bfloat16: ++** ret ++*/ ++TEST_UNDEF (bfloat16, svbfloat16x3_t, ++ z0 = svundef3_bf16 ()) ++ ++/* ++** int32: ++** ret ++*/ ++TEST_UNDEF (int32, svint32x3_t, ++ z0 = svundef3_s32 ()) ++ ++/* ++** uint32: ++** ret ++*/ ++TEST_UNDEF (uint32, svuint32x3_t, ++ z0 = svundef3_u32 ()) ++ ++/* ++** float32: ++** ret ++*/ ++TEST_UNDEF (float32, svfloat32x3_t, ++ z0 = svundef3_f32 ()) ++ ++/* ++** int64: ++** ret ++*/ ++TEST_UNDEF (int64, svint64x3_t, ++ z0 = svundef3_s64 ()) ++ ++/* ++** uint64: ++** ret ++*/ ++TEST_UNDEF (uint64, svuint64x3_t, ++ z0 = svundef3_u64 ()) ++ ++/* ++** float64: ++** ret ++*/ ++TEST_UNDEF (float64, svfloat64x3_t, ++ z0 = svundef3_f64 ()) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/undef4_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/undef4_1.c +new file mode 100644 +index 000000000..4d6b86b04 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/undef4_1.c +@@ -0,0 +1,87 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** int8: ++** ret ++*/ ++TEST_UNDEF (int8, svint8x4_t, ++ z0 = svundef4_s8 ()) ++ ++/* ++** uint8: ++** ret ++*/ ++TEST_UNDEF (uint8, svuint8x4_t, ++ z0 = svundef4_u8 ()) ++ ++/* ++** int16: ++** ret ++*/ ++TEST_UNDEF (int16, svint16x4_t, ++ z0 = svundef4_s16 ()) ++ ++/* ++** uint16: ++** ret ++*/ ++TEST_UNDEF (uint16, svuint16x4_t, ++ z0 = svundef4_u16 ()) ++ ++/* ++** float16: ++** ret ++*/ ++TEST_UNDEF (float16, svfloat16x4_t, ++ z0 = svundef4_f16 ()) ++ ++/* ++** bfloat16: ++** ret ++*/ ++TEST_UNDEF (bfloat16, svbfloat16x4_t, ++ z0 = svundef4_bf16 ()) ++ ++/* ++** int32: ++** ret ++*/ ++TEST_UNDEF (int32, svint32x4_t, ++ z0 = svundef4_s32 ()) ++ ++/* ++** uint32: ++** ret ++*/ ++TEST_UNDEF (uint32, svuint32x4_t, ++ z0 = svundef4_u32 ()) ++ ++/* ++** float32: ++** ret ++*/ ++TEST_UNDEF (float32, svfloat32x4_t, ++ z0 = svundef4_f32 ()) ++ ++/* ++** int64: ++** ret ++*/ ++TEST_UNDEF (int64, svint64x4_t, ++ z0 = svundef4_s64 ()) ++ ++/* ++** uint64: ++** ret ++*/ ++TEST_UNDEF (uint64, svuint64x4_t, ++ z0 = svundef4_u64 ()) ++ ++/* ++** float64: ++** ret ++*/ ++TEST_UNDEF (float64, svfloat64x4_t, ++ z0 = svundef4_f64 ()) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/undef_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/undef_1.c +new file mode 100644 +index 000000000..62873b6e1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/undef_1.c +@@ -0,0 +1,87 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** int8: ++** ret ++*/ ++TEST_UNDEF (int8, svint8_t, ++ z0 = svundef_s8 ()) ++ ++/* ++** uint8: ++** ret ++*/ ++TEST_UNDEF (uint8, svuint8_t, ++ z0 = svundef_u8 ()) ++ ++/* ++** int16: ++** ret ++*/ ++TEST_UNDEF (int16, svint16_t, ++ z0 = svundef_s16 ()) ++ ++/* ++** uint16: ++** ret ++*/ ++TEST_UNDEF (uint16, svuint16_t, ++ z0 = svundef_u16 ()) ++ ++/* ++** float16: ++** ret ++*/ ++TEST_UNDEF (float16, svfloat16_t, ++ z0 = svundef_f16 ()) ++ ++/* ++** bfloat16: ++** ret ++*/ ++TEST_UNDEF (bfloat16, svbfloat16_t, ++ z0 = svundef_bf16 ()) ++ ++/* ++** int32: ++** ret ++*/ ++TEST_UNDEF (int32, svint32_t, ++ z0 = svundef_s32 ()) ++ ++/* ++** uint32: ++** ret ++*/ ++TEST_UNDEF (uint32, svuint32_t, ++ z0 = svundef_u32 ()) ++ ++/* ++** float32: ++** ret ++*/ ++TEST_UNDEF (float32, svfloat32_t, ++ z0 = svundef_f32 ()) ++ ++/* ++** int64: ++** ret ++*/ ++TEST_UNDEF (int64, svint64_t, ++ z0 = svundef_s64 ()) ++ ++/* ++** uint64: ++** ret ++*/ ++TEST_UNDEF (uint64, svuint64_t, ++ z0 = svundef_u64 ()) ++ ++/* ++** float64: ++** ret ++*/ ++TEST_UNDEF (float64, svfloat64_t, ++ z0 = svundef_f64 ()) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpkhi_b.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpkhi_b.c +new file mode 100644 +index 000000000..ff1a84aac +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpkhi_b.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** unpkhi_b_tied1: ++** punpkhi p0\.h, p0\.b ++** ret ++*/ ++TEST_UNIFORM_P (unpkhi_b_tied1, ++ p0 = svunpkhi_b (p0), ++ p0 = svunpkhi (p0)) ++ ++/* ++** unpkhi_b_untied: ++** punpkhi p0\.h, p1\.b ++** ret ++*/ ++TEST_UNIFORM_P (unpkhi_b_untied, ++ p0 = svunpkhi_b (p1), ++ p0 = svunpkhi (p1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpkhi_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpkhi_s16.c +new file mode 100644 +index 000000000..3f79ac65f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpkhi_s16.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** unpkhi_s16_tied1: ++** sunpkhi z0\.h, z0\.b ++** ret ++*/ ++TEST_DUAL_Z_REV (unpkhi_s16_tied1, svint16_t, svint8_t, ++ z0_res = svunpkhi_s16 (z0), ++ z0_res = svunpkhi (z0)) ++ ++/* ++** unpkhi_s16_untied: ++** sunpkhi z0\.h, z4\.b ++** ret ++*/ ++TEST_DUAL_Z (unpkhi_s16_untied, svint16_t, svint8_t, ++ z0 = svunpkhi_s16 (z4), ++ z0 = svunpkhi (z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpkhi_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpkhi_s32.c +new file mode 100644 +index 000000000..619fb0882 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpkhi_s32.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** unpkhi_s32_tied1: ++** sunpkhi z0\.s, z0\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (unpkhi_s32_tied1, svint32_t, svint16_t, ++ z0_res = svunpkhi_s32 (z0), ++ z0_res = svunpkhi (z0)) ++ ++/* ++** unpkhi_s32_untied: ++** sunpkhi z0\.s, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (unpkhi_s32_untied, svint32_t, svint16_t, ++ z0 = svunpkhi_s32 (z4), ++ z0 = svunpkhi (z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpkhi_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpkhi_s64.c +new file mode 100644 +index 000000000..5d6da1768 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpkhi_s64.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** unpkhi_s64_tied1: ++** sunpkhi z0\.d, z0\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (unpkhi_s64_tied1, svint64_t, svint32_t, ++ z0_res = svunpkhi_s64 (z0), ++ z0_res = svunpkhi (z0)) ++ ++/* ++** unpkhi_s64_untied: ++** sunpkhi z0\.d, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (unpkhi_s64_untied, svint64_t, svint32_t, ++ z0 = svunpkhi_s64 (z4), ++ z0 = svunpkhi (z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpkhi_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpkhi_u16.c +new file mode 100644 +index 000000000..68f47a282 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpkhi_u16.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** unpkhi_u16_tied1: ++** uunpkhi z0\.h, z0\.b ++** ret ++*/ ++TEST_DUAL_Z_REV (unpkhi_u16_tied1, svuint16_t, svuint8_t, ++ z0_res = svunpkhi_u16 (z0), ++ z0_res = svunpkhi (z0)) ++ ++/* ++** unpkhi_u16_untied: ++** uunpkhi z0\.h, z4\.b ++** ret ++*/ ++TEST_DUAL_Z (unpkhi_u16_untied, svuint16_t, svuint8_t, ++ z0 = svunpkhi_u16 (z4), ++ z0 = svunpkhi (z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpkhi_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpkhi_u32.c +new file mode 100644 +index 000000000..3c4b161e4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpkhi_u32.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** unpkhi_u32_tied1: ++** uunpkhi z0\.s, z0\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (unpkhi_u32_tied1, svuint32_t, svuint16_t, ++ z0_res = svunpkhi_u32 (z0), ++ z0_res = svunpkhi (z0)) ++ ++/* ++** unpkhi_u32_untied: ++** uunpkhi z0\.s, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (unpkhi_u32_untied, svuint32_t, svuint16_t, ++ z0 = svunpkhi_u32 (z4), ++ z0 = svunpkhi (z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpkhi_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpkhi_u64.c +new file mode 100644 +index 000000000..94cfbd493 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpkhi_u64.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** unpkhi_u64_tied1: ++** uunpkhi z0\.d, z0\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (unpkhi_u64_tied1, svuint64_t, svuint32_t, ++ z0_res = svunpkhi_u64 (z0), ++ z0_res = svunpkhi (z0)) ++ ++/* ++** unpkhi_u64_untied: ++** uunpkhi z0\.d, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (unpkhi_u64_untied, svuint64_t, svuint32_t, ++ z0 = svunpkhi_u64 (z4), ++ z0 = svunpkhi (z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpklo_b.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpklo_b.c +new file mode 100644 +index 000000000..476ec8bc3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpklo_b.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** unpklo_b_tied1: ++** punpklo p0\.h, p0\.b ++** ret ++*/ ++TEST_UNIFORM_P (unpklo_b_tied1, ++ p0 = svunpklo_b (p0), ++ p0 = svunpklo (p0)) ++ ++/* ++** unpklo_b_untied: ++** punpklo p0\.h, p1\.b ++** ret ++*/ ++TEST_UNIFORM_P (unpklo_b_untied, ++ p0 = svunpklo_b (p1), ++ p0 = svunpklo (p1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpklo_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpklo_s16.c +new file mode 100644 +index 000000000..a0e83ff1b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpklo_s16.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** unpklo_s16_tied1: ++** sunpklo z0\.h, z0\.b ++** ret ++*/ ++TEST_DUAL_Z_REV (unpklo_s16_tied1, svint16_t, svint8_t, ++ z0_res = svunpklo_s16 (z0), ++ z0_res = svunpklo (z0)) ++ ++/* ++** unpklo_s16_untied: ++** sunpklo z0\.h, z4\.b ++** ret ++*/ ++TEST_DUAL_Z (unpklo_s16_untied, svint16_t, svint8_t, ++ z0 = svunpklo_s16 (z4), ++ z0 = svunpklo (z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpklo_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpklo_s32.c +new file mode 100644 +index 000000000..49a14fb7b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpklo_s32.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** unpklo_s32_tied1: ++** sunpklo z0\.s, z0\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (unpklo_s32_tied1, svint32_t, svint16_t, ++ z0_res = svunpklo_s32 (z0), ++ z0_res = svunpklo (z0)) ++ ++/* ++** unpklo_s32_untied: ++** sunpklo z0\.s, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (unpklo_s32_untied, svint32_t, svint16_t, ++ z0 = svunpklo_s32 (z4), ++ z0 = svunpklo (z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpklo_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpklo_s64.c +new file mode 100644 +index 000000000..c430047e1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpklo_s64.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** unpklo_s64_tied1: ++** sunpklo z0\.d, z0\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (unpklo_s64_tied1, svint64_t, svint32_t, ++ z0_res = svunpklo_s64 (z0), ++ z0_res = svunpklo (z0)) ++ ++/* ++** unpklo_s64_untied: ++** sunpklo z0\.d, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (unpklo_s64_untied, svint64_t, svint32_t, ++ z0 = svunpklo_s64 (z4), ++ z0 = svunpklo (z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpklo_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpklo_u16.c +new file mode 100644 +index 000000000..6feee4427 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpklo_u16.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** unpklo_u16_tied1: ++** uunpklo z0\.h, z0\.b ++** ret ++*/ ++TEST_DUAL_Z_REV (unpklo_u16_tied1, svuint16_t, svuint8_t, ++ z0_res = svunpklo_u16 (z0), ++ z0_res = svunpklo (z0)) ++ ++/* ++** unpklo_u16_untied: ++** uunpklo z0\.h, z4\.b ++** ret ++*/ ++TEST_DUAL_Z (unpklo_u16_untied, svuint16_t, svuint8_t, ++ z0 = svunpklo_u16 (z4), ++ z0 = svunpklo (z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpklo_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpklo_u32.c +new file mode 100644 +index 000000000..c4d4efc86 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpklo_u32.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** unpklo_u32_tied1: ++** uunpklo z0\.s, z0\.h ++** ret ++*/ ++TEST_DUAL_Z_REV (unpklo_u32_tied1, svuint32_t, svuint16_t, ++ z0_res = svunpklo_u32 (z0), ++ z0_res = svunpklo (z0)) ++ ++/* ++** unpklo_u32_untied: ++** uunpklo z0\.s, z4\.h ++** ret ++*/ ++TEST_DUAL_Z (unpklo_u32_untied, svuint32_t, svuint16_t, ++ z0 = svunpklo_u32 (z4), ++ z0 = svunpklo (z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpklo_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpklo_u64.c +new file mode 100644 +index 000000000..2845e37a5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpklo_u64.c +@@ -0,0 +1,21 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** unpklo_u64_tied1: ++** uunpklo z0\.d, z0\.s ++** ret ++*/ ++TEST_DUAL_Z_REV (unpklo_u64_tied1, svuint64_t, svuint32_t, ++ z0_res = svunpklo_u64 (z0), ++ z0_res = svunpklo (z0)) ++ ++/* ++** unpklo_u64_untied: ++** uunpklo z0\.d, z4\.s ++** ret ++*/ ++TEST_DUAL_Z (unpklo_u64_untied, svuint64_t, svuint32_t, ++ z0 = svunpklo_u64 (z4), ++ z0 = svunpklo (z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/usdot_lane_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/usdot_lane_s32.c +new file mode 100644 +index 000000000..8fd255687 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/usdot_lane_s32.c +@@ -0,0 +1,97 @@ ++/* { dg-require-effective-target aarch64_asm_i8mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+sve+i8mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** usdot_lane_0_s32_tied1: ++** usdot z0\.s, z2\.b, z4\.b\[0\] ++** ret ++*/ ++TEST_TRIPLE_Z (usdot_lane_0_s32_tied1, svint32_t, svuint8_t, svint8_t, ++ z0 = svusdot_lane_s32 (z0, z2, z4, 0), ++ z0 = svusdot_lane (z0, z2, z4, 0)) ++ ++/* ++** usdot_lane_0_s32_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z2 ++** usdot z0\.s, \1\.b, z4\.b\[0\] ++** ret ++*/ ++TEST_TRIPLE_Z_REV2 (usdot_lane_0_s32_tied2, svint32_t, svuint8_t, svint8_t, ++ z0_res = svusdot_lane_s32 (z2, z0, z4, 0), ++ z0_res = svusdot_lane (z2, z0, z4, 0)) ++ ++/* ++** usdot_lane_0_s32_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** usdot z0\.s, z2\.b, \1\.b\[0\] ++** ret ++*/ ++TEST_TRIPLE_Z_REV (usdot_lane_0_s32_tied3, svint32_t, svuint8_t, svint8_t, ++ z0_res = svusdot_lane_s32 (z4, z2, z0, 0), ++ z0_res = svusdot_lane (z4, z2, z0, 0)) ++ ++/* ++** usdot_lane_0_s32_untied: ++** movprfx z0, z1 ++** usdot z0\.s, z2\.b, z4\.b\[0\] ++** ret ++*/ ++TEST_TRIPLE_Z (usdot_lane_0_s32_untied, svint32_t, svuint8_t, svint8_t, ++ z0 = svusdot_lane_s32 (z1, z2, z4, 0), ++ z0 = svusdot_lane (z1, z2, z4, 0)) ++ ++/* ++** usdot_lane_1_s32: ++** usdot z0\.s, z2\.b, z5\.b\[1\] ++** ret ++*/ ++TEST_TRIPLE_Z (usdot_lane_1_s32, svint32_t, svuint8_t, svint8_t, ++ z0 = svusdot_lane_s32 (z0, z2, z5, 1), ++ z0 = svusdot_lane (z0, z2, z5, 1)) ++ ++/* ++** usdot_lane_2_s32: ++** usdot z0\.s, z2\.b, z5\.b\[2\] ++** ret ++*/ ++TEST_TRIPLE_Z (usdot_lane_2_s32, svint32_t, svuint8_t, svint8_t, ++ z0 = svusdot_lane_s32 (z0, z2, z5, 2), ++ z0 = svusdot_lane (z0, z2, z5, 2)) ++ ++/* ++** usdot_lane_3_s32: ++** usdot z0\.s, z2\.b, z5\.b\[3\] ++** ret ++*/ ++TEST_TRIPLE_Z (usdot_lane_3_s32, svint32_t, svuint8_t, svint8_t, ++ z0 = svusdot_lane_s32 (z0, z2, z5, 3), ++ z0 = svusdot_lane (z0, z2, z5, 3)) ++ ++/* ++** usdot_lane_z8_s32: ++** str d8, \[sp, -16\]! ++** mov (z[0-7])\.d, z8\.d ++** usdot z0\.s, z1\.b, \1\.b\[1\] ++** ldr d8, \[sp\], 16 ++** ret ++*/ ++TEST_TRIPLE_LANE_REG (usdot_lane_z8_s32, svint32_t, svuint8_t, svint8_t, ++ z8, ++ z0 = svusdot_lane_s32 (z0, z1, z8, 1), ++ z0 = svusdot_lane (z0, z1, z8, 1)) ++ ++/* ++** usdot_lane_z16_s32: ++** mov (z[0-7])\.d, z16\.d ++** usdot z0\.s, z1\.b, \1\.b\[1\] ++** ret ++*/ ++TEST_TRIPLE_LANE_REG (usdot_lane_z16_s32, svint32_t, svuint8_t, svint8_t, ++ z16, ++ z0 = svusdot_lane_s32 (z0, z1, z16, 1), ++ z0 = svusdot_lane (z0, z1, z16, 1)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/usdot_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/usdot_s32.c +new file mode 100644 +index 000000000..ccac5cae5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/usdot_s32.c +@@ -0,0 +1,46 @@ ++/* { dg-require-effective-target aarch64_asm_i8mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+sve+i8mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** usdot_s32_tied1: ++** usdot z0\.s, z2\.b, z4\.b ++** ret ++*/ ++TEST_TRIPLE_Z (usdot_s32_tied1, svint32_t, svuint8_t, svint8_t, ++ z0 = svusdot_s32 (z0, z2, z4), ++ z0 = svusdot (z0, z2, z4)) ++ ++/* ++** usdot_s32_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** usdot z0\.s, z2\.b, \1\.b ++** ret ++*/ ++TEST_TRIPLE_Z_REV (usdot_s32_tied2, svint32_t, svuint8_t, svint8_t, ++ z0_res = svusdot_s32 (z4, z2, z0), ++ z0_res = svusdot (z4, z2, z0)) ++ ++/* ++** usdot_w0_s32_tied: ++** mov (z[0-9]+\.b), w0 ++** usdot z0\.s, z2\.b, \1 ++** ret ++*/ ++TEST_TRIPLE_ZX (usdot_w0_s32_tied, svint32_t, svuint8_t, int8_t, ++ z0 = svusdot_n_s32 (z0, z2, x0), ++ z0 = svusdot (z0, z2, x0)) ++ ++/* ++** usdot_9_s32_tied: ++** mov (z[0-9]+\.b), #9 ++** usdot z0\.s, z2\.b, \1 ++** ret ++*/ ++TEST_TRIPLE_Z (usdot_9_s32_tied, svint32_t, svuint8_t, int8_t, ++ z0 = svusdot_n_s32 (z0, z2, 9), ++ z0 = svusdot (z0, z2, 9)) ++ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/usmmla_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/usmmla_s32.c +new file mode 100644 +index 000000000..9440f3fd9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/usmmla_s32.c +@@ -0,0 +1,46 @@ ++/* { dg-require-effective-target aarch64_asm_i8mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+sve+i8mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** usmmla_s32_tied1: ++** usmmla z0\.s, z2\.b, z4\.b ++** ret ++*/ ++TEST_TRIPLE_Z (usmmla_s32_tied1, svint32_t, svuint8_t, svint8_t, ++ z0 = svusmmla_s32 (z0, z2, z4), ++ z0 = svusmmla (z0, z2, z4)) ++ ++/* ++** usmmla_s32_tied2: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z2 ++** usmmla z0\.s, \1\.b, z4\.b ++** ret ++*/ ++TEST_TRIPLE_Z_REV2 (usmmla_s32_tied2, svint32_t, svuint8_t, svint8_t, ++ z0_res = svusmmla_s32 (z2, z0, z4), ++ z0_res = svusmmla (z2, z0, z4)) ++ ++/* ++** usmmla_s32_tied3: ++** mov (z[0-9]+)\.d, z0\.d ++** movprfx z0, z4 ++** usmmla z0\.s, z2\.b, \1\.b ++** ret ++*/ ++TEST_TRIPLE_Z_REV (usmmla_s32_tied3, svint32_t, svuint8_t, svint8_t, ++ z0_res = svusmmla_s32 (z4, z2, z0), ++ z0_res = svusmmla (z4, z2, z0)) ++ ++/* ++** usmmla_s32_untied: ++** movprfx z0, z1 ++** usmmla z0\.s, z2\.b, z4\.b ++** ret ++*/ ++TEST_TRIPLE_Z (usmmla_s32_untied, svint32_t, svuint8_t, svint8_t, ++ z0 = svusmmla_s32 (z1, z2, z4), ++ z0 = svusmmla (z1, z2, z4)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_b16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_b16.c +new file mode 100644 +index 000000000..245e401aa +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_b16.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp1_b16_tied1: ++** uzp1 p0\.h, p0\.h, p1\.h ++** ret ++*/ ++TEST_UNIFORM_P (uzp1_b16_tied1, ++ p0 = svuzp1_b16 (p0, p1), ++ p0 = svuzp1_b16 (p0, p1)) ++ ++/* ++** uzp1_b16_tied2: ++** uzp1 p0\.h, p1\.h, p0\.h ++** ret ++*/ ++TEST_UNIFORM_P (uzp1_b16_tied2, ++ p0 = svuzp1_b16 (p1, p0), ++ p0 = svuzp1_b16 (p1, p0)) ++ ++/* ++** uzp1_b16_untied: ++** uzp1 p0\.h, p1\.h, p2\.h ++** ret ++*/ ++TEST_UNIFORM_P (uzp1_b16_untied, ++ p0 = svuzp1_b16 (p1, p2), ++ p0 = svuzp1_b16 (p1, p2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_b32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_b32.c +new file mode 100644 +index 000000000..c88034492 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_b32.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp1_b32_tied1: ++** uzp1 p0\.s, p0\.s, p1\.s ++** ret ++*/ ++TEST_UNIFORM_P (uzp1_b32_tied1, ++ p0 = svuzp1_b32 (p0, p1), ++ p0 = svuzp1_b32 (p0, p1)) ++ ++/* ++** uzp1_b32_tied2: ++** uzp1 p0\.s, p1\.s, p0\.s ++** ret ++*/ ++TEST_UNIFORM_P (uzp1_b32_tied2, ++ p0 = svuzp1_b32 (p1, p0), ++ p0 = svuzp1_b32 (p1, p0)) ++ ++/* ++** uzp1_b32_untied: ++** uzp1 p0\.s, p1\.s, p2\.s ++** ret ++*/ ++TEST_UNIFORM_P (uzp1_b32_untied, ++ p0 = svuzp1_b32 (p1, p2), ++ p0 = svuzp1_b32 (p1, p2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_b64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_b64.c +new file mode 100644 +index 000000000..71ac5c150 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_b64.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp1_b64_tied1: ++** uzp1 p0\.d, p0\.d, p1\.d ++** ret ++*/ ++TEST_UNIFORM_P (uzp1_b64_tied1, ++ p0 = svuzp1_b64 (p0, p1), ++ p0 = svuzp1_b64 (p0, p1)) ++ ++/* ++** uzp1_b64_tied2: ++** uzp1 p0\.d, p1\.d, p0\.d ++** ret ++*/ ++TEST_UNIFORM_P (uzp1_b64_tied2, ++ p0 = svuzp1_b64 (p1, p0), ++ p0 = svuzp1_b64 (p1, p0)) ++ ++/* ++** uzp1_b64_untied: ++** uzp1 p0\.d, p1\.d, p2\.d ++** ret ++*/ ++TEST_UNIFORM_P (uzp1_b64_untied, ++ p0 = svuzp1_b64 (p1, p2), ++ p0 = svuzp1_b64 (p1, p2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_b8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_b8.c +new file mode 100644 +index 000000000..250054bb6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_b8.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp1_b8_tied1: ++** uzp1 p0\.b, p0\.b, p1\.b ++** ret ++*/ ++TEST_UNIFORM_P (uzp1_b8_tied1, ++ p0 = svuzp1_b8 (p0, p1), ++ p0 = svuzp1_b8 (p0, p1)) ++ ++/* ++** uzp1_b8_tied2: ++** uzp1 p0\.b, p1\.b, p0\.b ++** ret ++*/ ++TEST_UNIFORM_P (uzp1_b8_tied2, ++ p0 = svuzp1_b8 (p1, p0), ++ p0 = svuzp1_b8 (p1, p0)) ++ ++/* ++** uzp1_b8_untied: ++** uzp1 p0\.b, p1\.b, p2\.b ++** ret ++*/ ++TEST_UNIFORM_P (uzp1_b8_untied, ++ p0 = svuzp1_b8 (p1, p2), ++ p0 = svuzp1_b8 (p1, p2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_bf16.c +new file mode 100644 +index 000000000..19d43ed11 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_bf16.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp1_bf16_tied1: ++** uzp1 z0\.h, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1_bf16_tied1, svbfloat16_t, ++ z0 = svuzp1_bf16 (z0, z1), ++ z0 = svuzp1 (z0, z1)) ++ ++/* ++** uzp1_bf16_tied2: ++** uzp1 z0\.h, z1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1_bf16_tied2, svbfloat16_t, ++ z0 = svuzp1_bf16 (z1, z0), ++ z0 = svuzp1 (z1, z0)) ++ ++/* ++** uzp1_bf16_untied: ++** uzp1 z0\.h, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1_bf16_untied, svbfloat16_t, ++ z0 = svuzp1_bf16 (z1, z2), ++ z0 = svuzp1 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_f16.c +new file mode 100644 +index 000000000..313673e9d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_f16.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp1_f16_tied1: ++** uzp1 z0\.h, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1_f16_tied1, svfloat16_t, ++ z0 = svuzp1_f16 (z0, z1), ++ z0 = svuzp1 (z0, z1)) ++ ++/* ++** uzp1_f16_tied2: ++** uzp1 z0\.h, z1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1_f16_tied2, svfloat16_t, ++ z0 = svuzp1_f16 (z1, z0), ++ z0 = svuzp1 (z1, z0)) ++ ++/* ++** uzp1_f16_untied: ++** uzp1 z0\.h, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1_f16_untied, svfloat16_t, ++ z0 = svuzp1_f16 (z1, z2), ++ z0 = svuzp1 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_f32.c +new file mode 100644 +index 000000000..5bbac2c60 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_f32.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp1_f32_tied1: ++** uzp1 z0\.s, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1_f32_tied1, svfloat32_t, ++ z0 = svuzp1_f32 (z0, z1), ++ z0 = svuzp1 (z0, z1)) ++ ++/* ++** uzp1_f32_tied2: ++** uzp1 z0\.s, z1\.s, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1_f32_tied2, svfloat32_t, ++ z0 = svuzp1_f32 (z1, z0), ++ z0 = svuzp1 (z1, z0)) ++ ++/* ++** uzp1_f32_untied: ++** uzp1 z0\.s, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1_f32_untied, svfloat32_t, ++ z0 = svuzp1_f32 (z1, z2), ++ z0 = svuzp1 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_f64.c +new file mode 100644 +index 000000000..ef97b1765 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_f64.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp1_f64_tied1: ++** uzp1 z0\.d, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1_f64_tied1, svfloat64_t, ++ z0 = svuzp1_f64 (z0, z1), ++ z0 = svuzp1 (z0, z1)) ++ ++/* ++** uzp1_f64_tied2: ++** uzp1 z0\.d, z1\.d, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1_f64_tied2, svfloat64_t, ++ z0 = svuzp1_f64 (z1, z0), ++ z0 = svuzp1 (z1, z0)) ++ ++/* ++** uzp1_f64_untied: ++** uzp1 z0\.d, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1_f64_untied, svfloat64_t, ++ z0 = svuzp1_f64 (z1, z2), ++ z0 = svuzp1 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_s16.c +new file mode 100644 +index 000000000..b77832b07 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_s16.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp1_s16_tied1: ++** uzp1 z0\.h, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1_s16_tied1, svint16_t, ++ z0 = svuzp1_s16 (z0, z1), ++ z0 = svuzp1 (z0, z1)) ++ ++/* ++** uzp1_s16_tied2: ++** uzp1 z0\.h, z1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1_s16_tied2, svint16_t, ++ z0 = svuzp1_s16 (z1, z0), ++ z0 = svuzp1 (z1, z0)) ++ ++/* ++** uzp1_s16_untied: ++** uzp1 z0\.h, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1_s16_untied, svint16_t, ++ z0 = svuzp1_s16 (z1, z2), ++ z0 = svuzp1 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_s32.c +new file mode 100644 +index 000000000..64291afbe +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_s32.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp1_s32_tied1: ++** uzp1 z0\.s, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1_s32_tied1, svint32_t, ++ z0 = svuzp1_s32 (z0, z1), ++ z0 = svuzp1 (z0, z1)) ++ ++/* ++** uzp1_s32_tied2: ++** uzp1 z0\.s, z1\.s, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1_s32_tied2, svint32_t, ++ z0 = svuzp1_s32 (z1, z0), ++ z0 = svuzp1 (z1, z0)) ++ ++/* ++** uzp1_s32_untied: ++** uzp1 z0\.s, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1_s32_untied, svint32_t, ++ z0 = svuzp1_s32 (z1, z2), ++ z0 = svuzp1 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_s64.c +new file mode 100644 +index 000000000..e8f7799f6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_s64.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp1_s64_tied1: ++** uzp1 z0\.d, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1_s64_tied1, svint64_t, ++ z0 = svuzp1_s64 (z0, z1), ++ z0 = svuzp1 (z0, z1)) ++ ++/* ++** uzp1_s64_tied2: ++** uzp1 z0\.d, z1\.d, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1_s64_tied2, svint64_t, ++ z0 = svuzp1_s64 (z1, z0), ++ z0 = svuzp1 (z1, z0)) ++ ++/* ++** uzp1_s64_untied: ++** uzp1 z0\.d, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1_s64_untied, svint64_t, ++ z0 = svuzp1_s64 (z1, z2), ++ z0 = svuzp1 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_s8.c +new file mode 100644 +index 000000000..98464b790 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_s8.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp1_s8_tied1: ++** uzp1 z0\.b, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1_s8_tied1, svint8_t, ++ z0 = svuzp1_s8 (z0, z1), ++ z0 = svuzp1 (z0, z1)) ++ ++/* ++** uzp1_s8_tied2: ++** uzp1 z0\.b, z1\.b, z0\.b ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1_s8_tied2, svint8_t, ++ z0 = svuzp1_s8 (z1, z0), ++ z0 = svuzp1 (z1, z0)) ++ ++/* ++** uzp1_s8_untied: ++** uzp1 z0\.b, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1_s8_untied, svint8_t, ++ z0 = svuzp1_s8 (z1, z2), ++ z0 = svuzp1 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_u16.c +new file mode 100644 +index 000000000..da95171fe +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_u16.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp1_u16_tied1: ++** uzp1 z0\.h, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1_u16_tied1, svuint16_t, ++ z0 = svuzp1_u16 (z0, z1), ++ z0 = svuzp1 (z0, z1)) ++ ++/* ++** uzp1_u16_tied2: ++** uzp1 z0\.h, z1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1_u16_tied2, svuint16_t, ++ z0 = svuzp1_u16 (z1, z0), ++ z0 = svuzp1 (z1, z0)) ++ ++/* ++** uzp1_u16_untied: ++** uzp1 z0\.h, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1_u16_untied, svuint16_t, ++ z0 = svuzp1_u16 (z1, z2), ++ z0 = svuzp1 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_u32.c +new file mode 100644 +index 000000000..a57cdcc06 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_u32.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp1_u32_tied1: ++** uzp1 z0\.s, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1_u32_tied1, svuint32_t, ++ z0 = svuzp1_u32 (z0, z1), ++ z0 = svuzp1 (z0, z1)) ++ ++/* ++** uzp1_u32_tied2: ++** uzp1 z0\.s, z1\.s, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1_u32_tied2, svuint32_t, ++ z0 = svuzp1_u32 (z1, z0), ++ z0 = svuzp1 (z1, z0)) ++ ++/* ++** uzp1_u32_untied: ++** uzp1 z0\.s, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1_u32_untied, svuint32_t, ++ z0 = svuzp1_u32 (z1, z2), ++ z0 = svuzp1 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_u64.c +new file mode 100644 +index 000000000..24d820359 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_u64.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp1_u64_tied1: ++** uzp1 z0\.d, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1_u64_tied1, svuint64_t, ++ z0 = svuzp1_u64 (z0, z1), ++ z0 = svuzp1 (z0, z1)) ++ ++/* ++** uzp1_u64_tied2: ++** uzp1 z0\.d, z1\.d, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1_u64_tied2, svuint64_t, ++ z0 = svuzp1_u64 (z1, z0), ++ z0 = svuzp1 (z1, z0)) ++ ++/* ++** uzp1_u64_untied: ++** uzp1 z0\.d, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1_u64_untied, svuint64_t, ++ z0 = svuzp1_u64 (z1, z2), ++ z0 = svuzp1 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_u8.c +new file mode 100644 +index 000000000..359d4c5f8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_u8.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp1_u8_tied1: ++** uzp1 z0\.b, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1_u8_tied1, svuint8_t, ++ z0 = svuzp1_u8 (z0, z1), ++ z0 = svuzp1 (z0, z1)) ++ ++/* ++** uzp1_u8_tied2: ++** uzp1 z0\.b, z1\.b, z0\.b ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1_u8_tied2, svuint8_t, ++ z0 = svuzp1_u8 (z1, z0), ++ z0 = svuzp1 (z1, z0)) ++ ++/* ++** uzp1_u8_untied: ++** uzp1 z0\.b, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1_u8_untied, svuint8_t, ++ z0 = svuzp1_u8 (z1, z2), ++ z0 = svuzp1 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_bf16.c +new file mode 100644 +index 000000000..30a199241 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_bf16.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp1q_bf16_tied1: ++** uzp1 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1q_bf16_tied1, svbfloat16_t, ++ z0 = svuzp1q_bf16 (z0, z1), ++ z0 = svuzp1q (z0, z1)) ++ ++/* ++** uzp1q_bf16_tied2: ++** uzp1 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1q_bf16_tied2, svbfloat16_t, ++ z0 = svuzp1q_bf16 (z1, z0), ++ z0 = svuzp1q (z1, z0)) ++ ++/* ++** uzp1q_bf16_untied: ++** uzp1 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1q_bf16_untied, svbfloat16_t, ++ z0 = svuzp1q_bf16 (z1, z2), ++ z0 = svuzp1q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_f16.c +new file mode 100644 +index 000000000..c11e5bdc4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_f16.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp1q_f16_tied1: ++** uzp1 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1q_f16_tied1, svfloat16_t, ++ z0 = svuzp1q_f16 (z0, z1), ++ z0 = svuzp1q (z0, z1)) ++ ++/* ++** uzp1q_f16_tied2: ++** uzp1 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1q_f16_tied2, svfloat16_t, ++ z0 = svuzp1q_f16 (z1, z0), ++ z0 = svuzp1q (z1, z0)) ++ ++/* ++** uzp1q_f16_untied: ++** uzp1 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1q_f16_untied, svfloat16_t, ++ z0 = svuzp1q_f16 (z1, z2), ++ z0 = svuzp1q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_f32.c +new file mode 100644 +index 000000000..d0ac94543 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_f32.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp1q_f32_tied1: ++** uzp1 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1q_f32_tied1, svfloat32_t, ++ z0 = svuzp1q_f32 (z0, z1), ++ z0 = svuzp1q (z0, z1)) ++ ++/* ++** uzp1q_f32_tied2: ++** uzp1 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1q_f32_tied2, svfloat32_t, ++ z0 = svuzp1q_f32 (z1, z0), ++ z0 = svuzp1q (z1, z0)) ++ ++/* ++** uzp1q_f32_untied: ++** uzp1 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1q_f32_untied, svfloat32_t, ++ z0 = svuzp1q_f32 (z1, z2), ++ z0 = svuzp1q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_f64.c +new file mode 100644 +index 000000000..ac2e5c5cf +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_f64.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp1q_f64_tied1: ++** uzp1 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1q_f64_tied1, svfloat64_t, ++ z0 = svuzp1q_f64 (z0, z1), ++ z0 = svuzp1q (z0, z1)) ++ ++/* ++** uzp1q_f64_tied2: ++** uzp1 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1q_f64_tied2, svfloat64_t, ++ z0 = svuzp1q_f64 (z1, z0), ++ z0 = svuzp1q (z1, z0)) ++ ++/* ++** uzp1q_f64_untied: ++** uzp1 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1q_f64_untied, svfloat64_t, ++ z0 = svuzp1q_f64 (z1, z2), ++ z0 = svuzp1q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_s16.c +new file mode 100644 +index 000000000..aa200b24e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_s16.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp1q_s16_tied1: ++** uzp1 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1q_s16_tied1, svint16_t, ++ z0 = svuzp1q_s16 (z0, z1), ++ z0 = svuzp1q (z0, z1)) ++ ++/* ++** uzp1q_s16_tied2: ++** uzp1 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1q_s16_tied2, svint16_t, ++ z0 = svuzp1q_s16 (z1, z0), ++ z0 = svuzp1q (z1, z0)) ++ ++/* ++** uzp1q_s16_untied: ++** uzp1 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1q_s16_untied, svint16_t, ++ z0 = svuzp1q_s16 (z1, z2), ++ z0 = svuzp1q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_s32.c +new file mode 100644 +index 000000000..eb849df74 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_s32.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp1q_s32_tied1: ++** uzp1 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1q_s32_tied1, svint32_t, ++ z0 = svuzp1q_s32 (z0, z1), ++ z0 = svuzp1q (z0, z1)) ++ ++/* ++** uzp1q_s32_tied2: ++** uzp1 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1q_s32_tied2, svint32_t, ++ z0 = svuzp1q_s32 (z1, z0), ++ z0 = svuzp1q (z1, z0)) ++ ++/* ++** uzp1q_s32_untied: ++** uzp1 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1q_s32_untied, svint32_t, ++ z0 = svuzp1q_s32 (z1, z2), ++ z0 = svuzp1q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_s64.c +new file mode 100644 +index 000000000..e1049761c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_s64.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp1q_s64_tied1: ++** uzp1 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1q_s64_tied1, svint64_t, ++ z0 = svuzp1q_s64 (z0, z1), ++ z0 = svuzp1q (z0, z1)) ++ ++/* ++** uzp1q_s64_tied2: ++** uzp1 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1q_s64_tied2, svint64_t, ++ z0 = svuzp1q_s64 (z1, z0), ++ z0 = svuzp1q (z1, z0)) ++ ++/* ++** uzp1q_s64_untied: ++** uzp1 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1q_s64_untied, svint64_t, ++ z0 = svuzp1q_s64 (z1, z2), ++ z0 = svuzp1q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_s8.c +new file mode 100644 +index 000000000..8aa592199 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_s8.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp1q_s8_tied1: ++** uzp1 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1q_s8_tied1, svint8_t, ++ z0 = svuzp1q_s8 (z0, z1), ++ z0 = svuzp1q (z0, z1)) ++ ++/* ++** uzp1q_s8_tied2: ++** uzp1 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1q_s8_tied2, svint8_t, ++ z0 = svuzp1q_s8 (z1, z0), ++ z0 = svuzp1q (z1, z0)) ++ ++/* ++** uzp1q_s8_untied: ++** uzp1 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1q_s8_untied, svint8_t, ++ z0 = svuzp1q_s8 (z1, z2), ++ z0 = svuzp1q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_u16.c +new file mode 100644 +index 000000000..00ffaab06 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_u16.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp1q_u16_tied1: ++** uzp1 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1q_u16_tied1, svuint16_t, ++ z0 = svuzp1q_u16 (z0, z1), ++ z0 = svuzp1q (z0, z1)) ++ ++/* ++** uzp1q_u16_tied2: ++** uzp1 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1q_u16_tied2, svuint16_t, ++ z0 = svuzp1q_u16 (z1, z0), ++ z0 = svuzp1q (z1, z0)) ++ ++/* ++** uzp1q_u16_untied: ++** uzp1 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1q_u16_untied, svuint16_t, ++ z0 = svuzp1q_u16 (z1, z2), ++ z0 = svuzp1q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_u32.c +new file mode 100644 +index 000000000..cd2e4db26 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_u32.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp1q_u32_tied1: ++** uzp1 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1q_u32_tied1, svuint32_t, ++ z0 = svuzp1q_u32 (z0, z1), ++ z0 = svuzp1q (z0, z1)) ++ ++/* ++** uzp1q_u32_tied2: ++** uzp1 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1q_u32_tied2, svuint32_t, ++ z0 = svuzp1q_u32 (z1, z0), ++ z0 = svuzp1q (z1, z0)) ++ ++/* ++** uzp1q_u32_untied: ++** uzp1 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1q_u32_untied, svuint32_t, ++ z0 = svuzp1q_u32 (z1, z2), ++ z0 = svuzp1q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_u64.c +new file mode 100644 +index 000000000..7d8823329 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_u64.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp1q_u64_tied1: ++** uzp1 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1q_u64_tied1, svuint64_t, ++ z0 = svuzp1q_u64 (z0, z1), ++ z0 = svuzp1q (z0, z1)) ++ ++/* ++** uzp1q_u64_tied2: ++** uzp1 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1q_u64_tied2, svuint64_t, ++ z0 = svuzp1q_u64 (z1, z0), ++ z0 = svuzp1q (z1, z0)) ++ ++/* ++** uzp1q_u64_untied: ++** uzp1 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1q_u64_untied, svuint64_t, ++ z0 = svuzp1q_u64 (z1, z2), ++ z0 = svuzp1q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_u8.c +new file mode 100644 +index 000000000..701a1d575 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_u8.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp1q_u8_tied1: ++** uzp1 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1q_u8_tied1, svuint8_t, ++ z0 = svuzp1q_u8 (z0, z1), ++ z0 = svuzp1q (z0, z1)) ++ ++/* ++** uzp1q_u8_tied2: ++** uzp1 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1q_u8_tied2, svuint8_t, ++ z0 = svuzp1q_u8 (z1, z0), ++ z0 = svuzp1q (z1, z0)) ++ ++/* ++** uzp1q_u8_untied: ++** uzp1 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp1q_u8_untied, svuint8_t, ++ z0 = svuzp1q_u8 (z1, z2), ++ z0 = svuzp1q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_b16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_b16.c +new file mode 100644 +index 000000000..c3a91e7fc +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_b16.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp2_b16_tied1: ++** uzp2 p0\.h, p0\.h, p1\.h ++** ret ++*/ ++TEST_UNIFORM_P (uzp2_b16_tied1, ++ p0 = svuzp2_b16 (p0, p1), ++ p0 = svuzp2_b16 (p0, p1)) ++ ++/* ++** uzp2_b16_tied2: ++** uzp2 p0\.h, p1\.h, p0\.h ++** ret ++*/ ++TEST_UNIFORM_P (uzp2_b16_tied2, ++ p0 = svuzp2_b16 (p1, p0), ++ p0 = svuzp2_b16 (p1, p0)) ++ ++/* ++** uzp2_b16_untied: ++** uzp2 p0\.h, p1\.h, p2\.h ++** ret ++*/ ++TEST_UNIFORM_P (uzp2_b16_untied, ++ p0 = svuzp2_b16 (p1, p2), ++ p0 = svuzp2_b16 (p1, p2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_b32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_b32.c +new file mode 100644 +index 000000000..e3294a6f3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_b32.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp2_b32_tied1: ++** uzp2 p0\.s, p0\.s, p1\.s ++** ret ++*/ ++TEST_UNIFORM_P (uzp2_b32_tied1, ++ p0 = svuzp2_b32 (p0, p1), ++ p0 = svuzp2_b32 (p0, p1)) ++ ++/* ++** uzp2_b32_tied2: ++** uzp2 p0\.s, p1\.s, p0\.s ++** ret ++*/ ++TEST_UNIFORM_P (uzp2_b32_tied2, ++ p0 = svuzp2_b32 (p1, p0), ++ p0 = svuzp2_b32 (p1, p0)) ++ ++/* ++** uzp2_b32_untied: ++** uzp2 p0\.s, p1\.s, p2\.s ++** ret ++*/ ++TEST_UNIFORM_P (uzp2_b32_untied, ++ p0 = svuzp2_b32 (p1, p2), ++ p0 = svuzp2_b32 (p1, p2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_b64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_b64.c +new file mode 100644 +index 000000000..3ae72e10c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_b64.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp2_b64_tied1: ++** uzp2 p0\.d, p0\.d, p1\.d ++** ret ++*/ ++TEST_UNIFORM_P (uzp2_b64_tied1, ++ p0 = svuzp2_b64 (p0, p1), ++ p0 = svuzp2_b64 (p0, p1)) ++ ++/* ++** uzp2_b64_tied2: ++** uzp2 p0\.d, p1\.d, p0\.d ++** ret ++*/ ++TEST_UNIFORM_P (uzp2_b64_tied2, ++ p0 = svuzp2_b64 (p1, p0), ++ p0 = svuzp2_b64 (p1, p0)) ++ ++/* ++** uzp2_b64_untied: ++** uzp2 p0\.d, p1\.d, p2\.d ++** ret ++*/ ++TEST_UNIFORM_P (uzp2_b64_untied, ++ p0 = svuzp2_b64 (p1, p2), ++ p0 = svuzp2_b64 (p1, p2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_b8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_b8.c +new file mode 100644 +index 000000000..726a9a079 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_b8.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp2_b8_tied1: ++** uzp2 p0\.b, p0\.b, p1\.b ++** ret ++*/ ++TEST_UNIFORM_P (uzp2_b8_tied1, ++ p0 = svuzp2_b8 (p0, p1), ++ p0 = svuzp2_b8 (p0, p1)) ++ ++/* ++** uzp2_b8_tied2: ++** uzp2 p0\.b, p1\.b, p0\.b ++** ret ++*/ ++TEST_UNIFORM_P (uzp2_b8_tied2, ++ p0 = svuzp2_b8 (p1, p0), ++ p0 = svuzp2_b8 (p1, p0)) ++ ++/* ++** uzp2_b8_untied: ++** uzp2 p0\.b, p1\.b, p2\.b ++** ret ++*/ ++TEST_UNIFORM_P (uzp2_b8_untied, ++ p0 = svuzp2_b8 (p1, p2), ++ p0 = svuzp2_b8 (p1, p2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_bf16.c +new file mode 100644 +index 000000000..b5566bfdf +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_bf16.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp2_bf16_tied1: ++** uzp2 z0\.h, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2_bf16_tied1, svbfloat16_t, ++ z0 = svuzp2_bf16 (z0, z1), ++ z0 = svuzp2 (z0, z1)) ++ ++/* ++** uzp2_bf16_tied2: ++** uzp2 z0\.h, z1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2_bf16_tied2, svbfloat16_t, ++ z0 = svuzp2_bf16 (z1, z0), ++ z0 = svuzp2 (z1, z0)) ++ ++/* ++** uzp2_bf16_untied: ++** uzp2 z0\.h, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2_bf16_untied, svbfloat16_t, ++ z0 = svuzp2_bf16 (z1, z2), ++ z0 = svuzp2 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_f16.c +new file mode 100644 +index 000000000..d4847ef37 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_f16.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp2_f16_tied1: ++** uzp2 z0\.h, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2_f16_tied1, svfloat16_t, ++ z0 = svuzp2_f16 (z0, z1), ++ z0 = svuzp2 (z0, z1)) ++ ++/* ++** uzp2_f16_tied2: ++** uzp2 z0\.h, z1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2_f16_tied2, svfloat16_t, ++ z0 = svuzp2_f16 (z1, z0), ++ z0 = svuzp2 (z1, z0)) ++ ++/* ++** uzp2_f16_untied: ++** uzp2 z0\.h, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2_f16_untied, svfloat16_t, ++ z0 = svuzp2_f16 (z1, z2), ++ z0 = svuzp2 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_f32.c +new file mode 100644 +index 000000000..c1699fc9c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_f32.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp2_f32_tied1: ++** uzp2 z0\.s, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2_f32_tied1, svfloat32_t, ++ z0 = svuzp2_f32 (z0, z1), ++ z0 = svuzp2 (z0, z1)) ++ ++/* ++** uzp2_f32_tied2: ++** uzp2 z0\.s, z1\.s, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2_f32_tied2, svfloat32_t, ++ z0 = svuzp2_f32 (z1, z0), ++ z0 = svuzp2 (z1, z0)) ++ ++/* ++** uzp2_f32_untied: ++** uzp2 z0\.s, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2_f32_untied, svfloat32_t, ++ z0 = svuzp2_f32 (z1, z2), ++ z0 = svuzp2 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_f64.c +new file mode 100644 +index 000000000..afbf5c11a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_f64.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp2_f64_tied1: ++** uzp2 z0\.d, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2_f64_tied1, svfloat64_t, ++ z0 = svuzp2_f64 (z0, z1), ++ z0 = svuzp2 (z0, z1)) ++ ++/* ++** uzp2_f64_tied2: ++** uzp2 z0\.d, z1\.d, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2_f64_tied2, svfloat64_t, ++ z0 = svuzp2_f64 (z1, z0), ++ z0 = svuzp2 (z1, z0)) ++ ++/* ++** uzp2_f64_untied: ++** uzp2 z0\.d, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2_f64_untied, svfloat64_t, ++ z0 = svuzp2_f64 (z1, z2), ++ z0 = svuzp2 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_s16.c +new file mode 100644 +index 000000000..e88df8734 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_s16.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp2_s16_tied1: ++** uzp2 z0\.h, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2_s16_tied1, svint16_t, ++ z0 = svuzp2_s16 (z0, z1), ++ z0 = svuzp2 (z0, z1)) ++ ++/* ++** uzp2_s16_tied2: ++** uzp2 z0\.h, z1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2_s16_tied2, svint16_t, ++ z0 = svuzp2_s16 (z1, z0), ++ z0 = svuzp2 (z1, z0)) ++ ++/* ++** uzp2_s16_untied: ++** uzp2 z0\.h, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2_s16_untied, svint16_t, ++ z0 = svuzp2_s16 (z1, z2), ++ z0 = svuzp2 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_s32.c +new file mode 100644 +index 000000000..2e9a73d1f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_s32.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp2_s32_tied1: ++** uzp2 z0\.s, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2_s32_tied1, svint32_t, ++ z0 = svuzp2_s32 (z0, z1), ++ z0 = svuzp2 (z0, z1)) ++ ++/* ++** uzp2_s32_tied2: ++** uzp2 z0\.s, z1\.s, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2_s32_tied2, svint32_t, ++ z0 = svuzp2_s32 (z1, z0), ++ z0 = svuzp2 (z1, z0)) ++ ++/* ++** uzp2_s32_untied: ++** uzp2 z0\.s, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2_s32_untied, svint32_t, ++ z0 = svuzp2_s32 (z1, z2), ++ z0 = svuzp2 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_s64.c +new file mode 100644 +index 000000000..ffec78ccc +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_s64.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp2_s64_tied1: ++** uzp2 z0\.d, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2_s64_tied1, svint64_t, ++ z0 = svuzp2_s64 (z0, z1), ++ z0 = svuzp2 (z0, z1)) ++ ++/* ++** uzp2_s64_tied2: ++** uzp2 z0\.d, z1\.d, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2_s64_tied2, svint64_t, ++ z0 = svuzp2_s64 (z1, z0), ++ z0 = svuzp2 (z1, z0)) ++ ++/* ++** uzp2_s64_untied: ++** uzp2 z0\.d, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2_s64_untied, svint64_t, ++ z0 = svuzp2_s64 (z1, z2), ++ z0 = svuzp2 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_s8.c +new file mode 100644 +index 000000000..72037a088 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_s8.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp2_s8_tied1: ++** uzp2 z0\.b, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2_s8_tied1, svint8_t, ++ z0 = svuzp2_s8 (z0, z1), ++ z0 = svuzp2 (z0, z1)) ++ ++/* ++** uzp2_s8_tied2: ++** uzp2 z0\.b, z1\.b, z0\.b ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2_s8_tied2, svint8_t, ++ z0 = svuzp2_s8 (z1, z0), ++ z0 = svuzp2 (z1, z0)) ++ ++/* ++** uzp2_s8_untied: ++** uzp2 z0\.b, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2_s8_untied, svint8_t, ++ z0 = svuzp2_s8 (z1, z2), ++ z0 = svuzp2 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_u16.c +new file mode 100644 +index 000000000..d84f8c9ed +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_u16.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp2_u16_tied1: ++** uzp2 z0\.h, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2_u16_tied1, svuint16_t, ++ z0 = svuzp2_u16 (z0, z1), ++ z0 = svuzp2 (z0, z1)) ++ ++/* ++** uzp2_u16_tied2: ++** uzp2 z0\.h, z1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2_u16_tied2, svuint16_t, ++ z0 = svuzp2_u16 (z1, z0), ++ z0 = svuzp2 (z1, z0)) ++ ++/* ++** uzp2_u16_untied: ++** uzp2 z0\.h, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2_u16_untied, svuint16_t, ++ z0 = svuzp2_u16 (z1, z2), ++ z0 = svuzp2 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_u32.c +new file mode 100644 +index 000000000..0285ff91f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_u32.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp2_u32_tied1: ++** uzp2 z0\.s, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2_u32_tied1, svuint32_t, ++ z0 = svuzp2_u32 (z0, z1), ++ z0 = svuzp2 (z0, z1)) ++ ++/* ++** uzp2_u32_tied2: ++** uzp2 z0\.s, z1\.s, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2_u32_tied2, svuint32_t, ++ z0 = svuzp2_u32 (z1, z0), ++ z0 = svuzp2 (z1, z0)) ++ ++/* ++** uzp2_u32_untied: ++** uzp2 z0\.s, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2_u32_untied, svuint32_t, ++ z0 = svuzp2_u32 (z1, z2), ++ z0 = svuzp2 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_u64.c +new file mode 100644 +index 000000000..1b51baf90 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_u64.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp2_u64_tied1: ++** uzp2 z0\.d, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2_u64_tied1, svuint64_t, ++ z0 = svuzp2_u64 (z0, z1), ++ z0 = svuzp2 (z0, z1)) ++ ++/* ++** uzp2_u64_tied2: ++** uzp2 z0\.d, z1\.d, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2_u64_tied2, svuint64_t, ++ z0 = svuzp2_u64 (z1, z0), ++ z0 = svuzp2 (z1, z0)) ++ ++/* ++** uzp2_u64_untied: ++** uzp2 z0\.d, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2_u64_untied, svuint64_t, ++ z0 = svuzp2_u64 (z1, z2), ++ z0 = svuzp2 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_u8.c +new file mode 100644 +index 000000000..662e0b818 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_u8.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp2_u8_tied1: ++** uzp2 z0\.b, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2_u8_tied1, svuint8_t, ++ z0 = svuzp2_u8 (z0, z1), ++ z0 = svuzp2 (z0, z1)) ++ ++/* ++** uzp2_u8_tied2: ++** uzp2 z0\.b, z1\.b, z0\.b ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2_u8_tied2, svuint8_t, ++ z0 = svuzp2_u8 (z1, z0), ++ z0 = svuzp2 (z1, z0)) ++ ++/* ++** uzp2_u8_untied: ++** uzp2 z0\.b, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2_u8_untied, svuint8_t, ++ z0 = svuzp2_u8 (z1, z2), ++ z0 = svuzp2 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_bf16.c +new file mode 100644 +index 000000000..bbac53a7a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_bf16.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp2q_bf16_tied1: ++** uzp2 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2q_bf16_tied1, svbfloat16_t, ++ z0 = svuzp2q_bf16 (z0, z1), ++ z0 = svuzp2q (z0, z1)) ++ ++/* ++** uzp2q_bf16_tied2: ++** uzp2 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2q_bf16_tied2, svbfloat16_t, ++ z0 = svuzp2q_bf16 (z1, z0), ++ z0 = svuzp2q (z1, z0)) ++ ++/* ++** uzp2q_bf16_untied: ++** uzp2 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2q_bf16_untied, svbfloat16_t, ++ z0 = svuzp2q_bf16 (z1, z2), ++ z0 = svuzp2q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_f16.c +new file mode 100644 +index 000000000..e19d118fb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_f16.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp2q_f16_tied1: ++** uzp2 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2q_f16_tied1, svfloat16_t, ++ z0 = svuzp2q_f16 (z0, z1), ++ z0 = svuzp2q (z0, z1)) ++ ++/* ++** uzp2q_f16_tied2: ++** uzp2 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2q_f16_tied2, svfloat16_t, ++ z0 = svuzp2q_f16 (z1, z0), ++ z0 = svuzp2q (z1, z0)) ++ ++/* ++** uzp2q_f16_untied: ++** uzp2 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2q_f16_untied, svfloat16_t, ++ z0 = svuzp2q_f16 (z1, z2), ++ z0 = svuzp2q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_f32.c +new file mode 100644 +index 000000000..af7112b15 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_f32.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp2q_f32_tied1: ++** uzp2 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2q_f32_tied1, svfloat32_t, ++ z0 = svuzp2q_f32 (z0, z1), ++ z0 = svuzp2q (z0, z1)) ++ ++/* ++** uzp2q_f32_tied2: ++** uzp2 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2q_f32_tied2, svfloat32_t, ++ z0 = svuzp2q_f32 (z1, z0), ++ z0 = svuzp2q (z1, z0)) ++ ++/* ++** uzp2q_f32_untied: ++** uzp2 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2q_f32_untied, svfloat32_t, ++ z0 = svuzp2q_f32 (z1, z2), ++ z0 = svuzp2q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_f64.c +new file mode 100644 +index 000000000..4109b843c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_f64.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp2q_f64_tied1: ++** uzp2 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2q_f64_tied1, svfloat64_t, ++ z0 = svuzp2q_f64 (z0, z1), ++ z0 = svuzp2q (z0, z1)) ++ ++/* ++** uzp2q_f64_tied2: ++** uzp2 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2q_f64_tied2, svfloat64_t, ++ z0 = svuzp2q_f64 (z1, z0), ++ z0 = svuzp2q (z1, z0)) ++ ++/* ++** uzp2q_f64_untied: ++** uzp2 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2q_f64_untied, svfloat64_t, ++ z0 = svuzp2q_f64 (z1, z2), ++ z0 = svuzp2q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_s16.c +new file mode 100644 +index 000000000..0c6ab25cf +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_s16.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp2q_s16_tied1: ++** uzp2 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2q_s16_tied1, svint16_t, ++ z0 = svuzp2q_s16 (z0, z1), ++ z0 = svuzp2q (z0, z1)) ++ ++/* ++** uzp2q_s16_tied2: ++** uzp2 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2q_s16_tied2, svint16_t, ++ z0 = svuzp2q_s16 (z1, z0), ++ z0 = svuzp2q (z1, z0)) ++ ++/* ++** uzp2q_s16_untied: ++** uzp2 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2q_s16_untied, svint16_t, ++ z0 = svuzp2q_s16 (z1, z2), ++ z0 = svuzp2q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_s32.c +new file mode 100644 +index 000000000..9b914e704 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_s32.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp2q_s32_tied1: ++** uzp2 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2q_s32_tied1, svint32_t, ++ z0 = svuzp2q_s32 (z0, z1), ++ z0 = svuzp2q (z0, z1)) ++ ++/* ++** uzp2q_s32_tied2: ++** uzp2 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2q_s32_tied2, svint32_t, ++ z0 = svuzp2q_s32 (z1, z0), ++ z0 = svuzp2q (z1, z0)) ++ ++/* ++** uzp2q_s32_untied: ++** uzp2 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2q_s32_untied, svint32_t, ++ z0 = svuzp2q_s32 (z1, z2), ++ z0 = svuzp2q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_s64.c +new file mode 100644 +index 000000000..697e37d78 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_s64.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp2q_s64_tied1: ++** uzp2 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2q_s64_tied1, svint64_t, ++ z0 = svuzp2q_s64 (z0, z1), ++ z0 = svuzp2q (z0, z1)) ++ ++/* ++** uzp2q_s64_tied2: ++** uzp2 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2q_s64_tied2, svint64_t, ++ z0 = svuzp2q_s64 (z1, z0), ++ z0 = svuzp2q (z1, z0)) ++ ++/* ++** uzp2q_s64_untied: ++** uzp2 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2q_s64_untied, svint64_t, ++ z0 = svuzp2q_s64 (z1, z2), ++ z0 = svuzp2q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_s8.c +new file mode 100644 +index 000000000..576262c5d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_s8.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp2q_s8_tied1: ++** uzp2 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2q_s8_tied1, svint8_t, ++ z0 = svuzp2q_s8 (z0, z1), ++ z0 = svuzp2q (z0, z1)) ++ ++/* ++** uzp2q_s8_tied2: ++** uzp2 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2q_s8_tied2, svint8_t, ++ z0 = svuzp2q_s8 (z1, z0), ++ z0 = svuzp2q (z1, z0)) ++ ++/* ++** uzp2q_s8_untied: ++** uzp2 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2q_s8_untied, svint8_t, ++ z0 = svuzp2q_s8 (z1, z2), ++ z0 = svuzp2q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_u16.c +new file mode 100644 +index 000000000..f2debc28f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_u16.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp2q_u16_tied1: ++** uzp2 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2q_u16_tied1, svuint16_t, ++ z0 = svuzp2q_u16 (z0, z1), ++ z0 = svuzp2q (z0, z1)) ++ ++/* ++** uzp2q_u16_tied2: ++** uzp2 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2q_u16_tied2, svuint16_t, ++ z0 = svuzp2q_u16 (z1, z0), ++ z0 = svuzp2q (z1, z0)) ++ ++/* ++** uzp2q_u16_untied: ++** uzp2 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2q_u16_untied, svuint16_t, ++ z0 = svuzp2q_u16 (z1, z2), ++ z0 = svuzp2q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_u32.c +new file mode 100644 +index 000000000..ad6a4bcc0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_u32.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp2q_u32_tied1: ++** uzp2 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2q_u32_tied1, svuint32_t, ++ z0 = svuzp2q_u32 (z0, z1), ++ z0 = svuzp2q (z0, z1)) ++ ++/* ++** uzp2q_u32_tied2: ++** uzp2 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2q_u32_tied2, svuint32_t, ++ z0 = svuzp2q_u32 (z1, z0), ++ z0 = svuzp2q (z1, z0)) ++ ++/* ++** uzp2q_u32_untied: ++** uzp2 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2q_u32_untied, svuint32_t, ++ z0 = svuzp2q_u32 (z1, z2), ++ z0 = svuzp2q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_u64.c +new file mode 100644 +index 000000000..a846aa295 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_u64.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp2q_u64_tied1: ++** uzp2 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2q_u64_tied1, svuint64_t, ++ z0 = svuzp2q_u64 (z0, z1), ++ z0 = svuzp2q (z0, z1)) ++ ++/* ++** uzp2q_u64_tied2: ++** uzp2 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2q_u64_tied2, svuint64_t, ++ z0 = svuzp2q_u64 (z1, z0), ++ z0 = svuzp2q (z1, z0)) ++ ++/* ++** uzp2q_u64_untied: ++** uzp2 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2q_u64_untied, svuint64_t, ++ z0 = svuzp2q_u64 (z1, z2), ++ z0 = svuzp2q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_u8.c +new file mode 100644 +index 000000000..163c22659 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_u8.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** uzp2q_u8_tied1: ++** uzp2 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2q_u8_tied1, svuint8_t, ++ z0 = svuzp2q_u8 (z0, z1), ++ z0 = svuzp2q (z0, z1)) ++ ++/* ++** uzp2q_u8_tied2: ++** uzp2 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2q_u8_tied2, svuint8_t, ++ z0 = svuzp2q_u8 (z1, z0), ++ z0 = svuzp2q (z1, z0)) ++ ++/* ++** uzp2q_u8_untied: ++** uzp2 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (uzp2q_u8_untied, svuint8_t, ++ z0 = svuzp2q_u8 (z1, z2), ++ z0 = svuzp2q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/whilele_b16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/whilele_b16.c +new file mode 100644 +index 000000000..c285a7a73 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/whilele_b16.c +@@ -0,0 +1,173 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** whilele_rr_b16_s32: ++** whilele p0\.h, w0, w1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_rr_b16_s32, int32_t, ++ p0 = svwhilele_b16_s32 (x0, x1), ++ p0 = svwhilele_b16 (x0, x1)) ++ ++/* ++** whilele_0r_b16_s32: ++** whilele p0\.h, wzr, w1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_0r_b16_s32, int32_t, ++ p0 = svwhilele_b16_s32 (0, x1), ++ p0 = svwhilele_b16 (0, x1)) ++ ++/* ++** whilele_5r_b16_s32: ++** mov (w[0-9]+), #?5 ++** whilele p0\.h, \1, w1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_5r_b16_s32, int32_t, ++ p0 = svwhilele_b16_s32 (5, x1), ++ p0 = svwhilele_b16 (5, x1)) ++ ++/* ++** whilele_r0_b16_s32: ++** whilele p0\.h, w0, wzr ++** ret ++*/ ++TEST_COMPARE_S (whilele_r0_b16_s32, int32_t, ++ p0 = svwhilele_b16_s32 (x0, 0), ++ p0 = svwhilele_b16 (x0, 0)) ++ ++/* ++** whilele_r5_b16_s32: ++** mov (w[0-9]+), #?5 ++** whilele p0\.h, w0, \1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_r5_b16_s32, int32_t, ++ p0 = svwhilele_b16_s32 (x0, 5), ++ p0 = svwhilele_b16 (x0, 5)) ++ ++/* ++** whilele_rr_b16_s64: ++** whilele p0\.h, x0, x1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_rr_b16_s64, int64_t, ++ p0 = svwhilele_b16_s64 (x0, x1), ++ p0 = svwhilele_b16 (x0, x1)) ++ ++/* ++** whilele_0r_b16_s64: ++** whilele p0\.h, xzr, x1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_0r_b16_s64, int64_t, ++ p0 = svwhilele_b16_s64 (0, x1), ++ p0 = svwhilele_b16 ((int64_t) 0, x1)) ++ ++/* ++** whilele_5r_b16_s64: ++** mov (x[0-9]+), #?5 ++** whilele p0\.h, \1, x1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_5r_b16_s64, int64_t, ++ p0 = svwhilele_b16_s64 (5, x1), ++ p0 = svwhilele_b16 ((int64_t) 5, x1)) ++ ++/* ++** whilele_r0_b16_s64: ++** whilele p0\.h, x0, xzr ++** ret ++*/ ++TEST_COMPARE_S (whilele_r0_b16_s64, int64_t, ++ p0 = svwhilele_b16_s64 (x0, 0), ++ p0 = svwhilele_b16 (x0, (int64_t) 0)) ++ ++/* ++** whilele_r5_b16_s64: ++** mov (x[0-9]+), #?5 ++** whilele p0\.h, x0, \1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_r5_b16_s64, int64_t, ++ p0 = svwhilele_b16_s64 (x0, 5), ++ p0 = svwhilele_b16 (x0, (int64_t) 5)) ++ ++/* ++** whilele_rr_b16_u32: ++** whilels p0\.h, w0, w1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_rr_b16_u32, uint32_t, ++ p0 = svwhilele_b16_u32 (x0, x1), ++ p0 = svwhilele_b16 (x0, x1)) ++ ++/* ++** whilele_0r_b16_u32: ++** whilels p0\.h, wzr, w1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_0r_b16_u32, uint32_t, ++ p0 = svwhilele_b16_u32 (0, x1), ++ p0 = svwhilele_b16 ((uint32_t) 0, x1)) ++ ++/* ++** whilele_5r_b16_u32: ++** mov (w[0-9]+), #?5 ++** whilels p0\.h, \1, w1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_5r_b16_u32, uint32_t, ++ p0 = svwhilele_b16_u32 (5, x1), ++ p0 = svwhilele_b16 ((uint32_t) 5, x1)) ++ ++/* ++** whilele_r5_b16_u32: ++** mov (w[0-9]+), #?5 ++** whilels p0\.h, w0, \1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_r5_b16_u32, uint32_t, ++ p0 = svwhilele_b16_u32 (x0, 5), ++ p0 = svwhilele_b16 (x0, (uint32_t) 5)) ++ ++/* ++** whilele_rr_b16_u64: ++** whilels p0\.h, x0, x1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_rr_b16_u64, uint64_t, ++ p0 = svwhilele_b16_u64 (x0, x1), ++ p0 = svwhilele_b16 (x0, x1)) ++ ++/* ++** whilele_0r_b16_u64: ++** whilels p0\.h, xzr, x1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_0r_b16_u64, uint64_t, ++ p0 = svwhilele_b16_u64 (0, x1), ++ p0 = svwhilele_b16 ((uint64_t) 0, x1)) ++ ++/* ++** whilele_5r_b16_u64: ++** mov (x[0-9]+), #?5 ++** whilels p0\.h, \1, x1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_5r_b16_u64, uint64_t, ++ p0 = svwhilele_b16_u64 (5, x1), ++ p0 = svwhilele_b16 ((uint64_t) 5, x1)) ++ ++/* ++** whilele_r5_b16_u64: ++** mov (x[0-9]+), #?5 ++** whilels p0\.h, x0, \1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_r5_b16_u64, uint64_t, ++ p0 = svwhilele_b16_u64 (x0, 5), ++ p0 = svwhilele_b16 (x0, (uint64_t) 5)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/whilele_b32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/whilele_b32.c +new file mode 100644 +index 000000000..d369ccfa3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/whilele_b32.c +@@ -0,0 +1,173 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** whilele_rr_b32_s32: ++** whilele p0\.s, w0, w1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_rr_b32_s32, int32_t, ++ p0 = svwhilele_b32_s32 (x0, x1), ++ p0 = svwhilele_b32 (x0, x1)) ++ ++/* ++** whilele_0r_b32_s32: ++** whilele p0\.s, wzr, w1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_0r_b32_s32, int32_t, ++ p0 = svwhilele_b32_s32 (0, x1), ++ p0 = svwhilele_b32 (0, x1)) ++ ++/* ++** whilele_5r_b32_s32: ++** mov (w[0-9]+), #?5 ++** whilele p0\.s, \1, w1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_5r_b32_s32, int32_t, ++ p0 = svwhilele_b32_s32 (5, x1), ++ p0 = svwhilele_b32 (5, x1)) ++ ++/* ++** whilele_r0_b32_s32: ++** whilele p0\.s, w0, wzr ++** ret ++*/ ++TEST_COMPARE_S (whilele_r0_b32_s32, int32_t, ++ p0 = svwhilele_b32_s32 (x0, 0), ++ p0 = svwhilele_b32 (x0, 0)) ++ ++/* ++** whilele_r5_b32_s32: ++** mov (w[0-9]+), #?5 ++** whilele p0\.s, w0, \1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_r5_b32_s32, int32_t, ++ p0 = svwhilele_b32_s32 (x0, 5), ++ p0 = svwhilele_b32 (x0, 5)) ++ ++/* ++** whilele_rr_b32_s64: ++** whilele p0\.s, x0, x1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_rr_b32_s64, int64_t, ++ p0 = svwhilele_b32_s64 (x0, x1), ++ p0 = svwhilele_b32 (x0, x1)) ++ ++/* ++** whilele_0r_b32_s64: ++** whilele p0\.s, xzr, x1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_0r_b32_s64, int64_t, ++ p0 = svwhilele_b32_s64 (0, x1), ++ p0 = svwhilele_b32 ((int64_t) 0, x1)) ++ ++/* ++** whilele_5r_b32_s64: ++** mov (x[0-9]+), #?5 ++** whilele p0\.s, \1, x1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_5r_b32_s64, int64_t, ++ p0 = svwhilele_b32_s64 (5, x1), ++ p0 = svwhilele_b32 ((int64_t) 5, x1)) ++ ++/* ++** whilele_r0_b32_s64: ++** whilele p0\.s, x0, xzr ++** ret ++*/ ++TEST_COMPARE_S (whilele_r0_b32_s64, int64_t, ++ p0 = svwhilele_b32_s64 (x0, 0), ++ p0 = svwhilele_b32 (x0, (int64_t) 0)) ++ ++/* ++** whilele_r5_b32_s64: ++** mov (x[0-9]+), #?5 ++** whilele p0\.s, x0, \1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_r5_b32_s64, int64_t, ++ p0 = svwhilele_b32_s64 (x0, 5), ++ p0 = svwhilele_b32 (x0, (int64_t) 5)) ++ ++/* ++** whilele_rr_b32_u32: ++** whilels p0\.s, w0, w1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_rr_b32_u32, uint32_t, ++ p0 = svwhilele_b32_u32 (x0, x1), ++ p0 = svwhilele_b32 (x0, x1)) ++ ++/* ++** whilele_0r_b32_u32: ++** whilels p0\.s, wzr, w1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_0r_b32_u32, uint32_t, ++ p0 = svwhilele_b32_u32 (0, x1), ++ p0 = svwhilele_b32 ((uint32_t) 0, x1)) ++ ++/* ++** whilele_5r_b32_u32: ++** mov (w[0-9]+), #?5 ++** whilels p0\.s, \1, w1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_5r_b32_u32, uint32_t, ++ p0 = svwhilele_b32_u32 (5, x1), ++ p0 = svwhilele_b32 ((uint32_t) 5, x1)) ++ ++/* ++** whilele_r5_b32_u32: ++** mov (w[0-9]+), #?5 ++** whilels p0\.s, w0, \1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_r5_b32_u32, uint32_t, ++ p0 = svwhilele_b32_u32 (x0, 5), ++ p0 = svwhilele_b32 (x0, (uint32_t) 5)) ++ ++/* ++** whilele_rr_b32_u64: ++** whilels p0\.s, x0, x1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_rr_b32_u64, uint64_t, ++ p0 = svwhilele_b32_u64 (x0, x1), ++ p0 = svwhilele_b32 (x0, x1)) ++ ++/* ++** whilele_0r_b32_u64: ++** whilels p0\.s, xzr, x1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_0r_b32_u64, uint64_t, ++ p0 = svwhilele_b32_u64 (0, x1), ++ p0 = svwhilele_b32 ((uint64_t) 0, x1)) ++ ++/* ++** whilele_5r_b32_u64: ++** mov (x[0-9]+), #?5 ++** whilels p0\.s, \1, x1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_5r_b32_u64, uint64_t, ++ p0 = svwhilele_b32_u64 (5, x1), ++ p0 = svwhilele_b32 ((uint64_t) 5, x1)) ++ ++/* ++** whilele_r5_b32_u64: ++** mov (x[0-9]+), #?5 ++** whilels p0\.s, x0, \1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_r5_b32_u64, uint64_t, ++ p0 = svwhilele_b32_u64 (x0, 5), ++ p0 = svwhilele_b32 (x0, (uint64_t) 5)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/whilele_b64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/whilele_b64.c +new file mode 100644 +index 000000000..394f51f44 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/whilele_b64.c +@@ -0,0 +1,173 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** whilele_rr_b64_s32: ++** whilele p0\.d, w0, w1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_rr_b64_s32, int32_t, ++ p0 = svwhilele_b64_s32 (x0, x1), ++ p0 = svwhilele_b64 (x0, x1)) ++ ++/* ++** whilele_0r_b64_s32: ++** whilele p0\.d, wzr, w1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_0r_b64_s32, int32_t, ++ p0 = svwhilele_b64_s32 (0, x1), ++ p0 = svwhilele_b64 (0, x1)) ++ ++/* ++** whilele_5r_b64_s32: ++** mov (w[0-9]+), #?5 ++** whilele p0\.d, \1, w1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_5r_b64_s32, int32_t, ++ p0 = svwhilele_b64_s32 (5, x1), ++ p0 = svwhilele_b64 (5, x1)) ++ ++/* ++** whilele_r0_b64_s32: ++** whilele p0\.d, w0, wzr ++** ret ++*/ ++TEST_COMPARE_S (whilele_r0_b64_s32, int32_t, ++ p0 = svwhilele_b64_s32 (x0, 0), ++ p0 = svwhilele_b64 (x0, 0)) ++ ++/* ++** whilele_r5_b64_s32: ++** mov (w[0-9]+), #?5 ++** whilele p0\.d, w0, \1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_r5_b64_s32, int32_t, ++ p0 = svwhilele_b64_s32 (x0, 5), ++ p0 = svwhilele_b64 (x0, 5)) ++ ++/* ++** whilele_rr_b64_s64: ++** whilele p0\.d, x0, x1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_rr_b64_s64, int64_t, ++ p0 = svwhilele_b64_s64 (x0, x1), ++ p0 = svwhilele_b64 (x0, x1)) ++ ++/* ++** whilele_0r_b64_s64: ++** whilele p0\.d, xzr, x1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_0r_b64_s64, int64_t, ++ p0 = svwhilele_b64_s64 (0, x1), ++ p0 = svwhilele_b64 ((int64_t) 0, x1)) ++ ++/* ++** whilele_5r_b64_s64: ++** mov (x[0-9]+), #?5 ++** whilele p0\.d, \1, x1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_5r_b64_s64, int64_t, ++ p0 = svwhilele_b64_s64 (5, x1), ++ p0 = svwhilele_b64 ((int64_t) 5, x1)) ++ ++/* ++** whilele_r0_b64_s64: ++** whilele p0\.d, x0, xzr ++** ret ++*/ ++TEST_COMPARE_S (whilele_r0_b64_s64, int64_t, ++ p0 = svwhilele_b64_s64 (x0, 0), ++ p0 = svwhilele_b64 (x0, (int64_t) 0)) ++ ++/* ++** whilele_r5_b64_s64: ++** mov (x[0-9]+), #?5 ++** whilele p0\.d, x0, \1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_r5_b64_s64, int64_t, ++ p0 = svwhilele_b64_s64 (x0, 5), ++ p0 = svwhilele_b64 (x0, (int64_t) 5)) ++ ++/* ++** whilele_rr_b64_u32: ++** whilels p0\.d, w0, w1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_rr_b64_u32, uint32_t, ++ p0 = svwhilele_b64_u32 (x0, x1), ++ p0 = svwhilele_b64 (x0, x1)) ++ ++/* ++** whilele_0r_b64_u32: ++** whilels p0\.d, wzr, w1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_0r_b64_u32, uint32_t, ++ p0 = svwhilele_b64_u32 (0, x1), ++ p0 = svwhilele_b64 ((uint32_t) 0, x1)) ++ ++/* ++** whilele_5r_b64_u32: ++** mov (w[0-9]+), #?5 ++** whilels p0\.d, \1, w1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_5r_b64_u32, uint32_t, ++ p0 = svwhilele_b64_u32 (5, x1), ++ p0 = svwhilele_b64 ((uint32_t) 5, x1)) ++ ++/* ++** whilele_r5_b64_u32: ++** mov (w[0-9]+), #?5 ++** whilels p0\.d, w0, \1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_r5_b64_u32, uint32_t, ++ p0 = svwhilele_b64_u32 (x0, 5), ++ p0 = svwhilele_b64 (x0, (uint32_t) 5)) ++ ++/* ++** whilele_rr_b64_u64: ++** whilels p0\.d, x0, x1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_rr_b64_u64, uint64_t, ++ p0 = svwhilele_b64_u64 (x0, x1), ++ p0 = svwhilele_b64 (x0, x1)) ++ ++/* ++** whilele_0r_b64_u64: ++** whilels p0\.d, xzr, x1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_0r_b64_u64, uint64_t, ++ p0 = svwhilele_b64_u64 (0, x1), ++ p0 = svwhilele_b64 ((uint64_t) 0, x1)) ++ ++/* ++** whilele_5r_b64_u64: ++** mov (x[0-9]+), #?5 ++** whilels p0\.d, \1, x1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_5r_b64_u64, uint64_t, ++ p0 = svwhilele_b64_u64 (5, x1), ++ p0 = svwhilele_b64 ((uint64_t) 5, x1)) ++ ++/* ++** whilele_r5_b64_u64: ++** mov (x[0-9]+), #?5 ++** whilels p0\.d, x0, \1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_r5_b64_u64, uint64_t, ++ p0 = svwhilele_b64_u64 (x0, 5), ++ p0 = svwhilele_b64 (x0, (uint64_t) 5)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/whilele_b8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/whilele_b8.c +new file mode 100644 +index 000000000..2ec101473 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/whilele_b8.c +@@ -0,0 +1,173 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** whilele_rr_b8_s32: ++** whilele p0\.b, w0, w1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_rr_b8_s32, int32_t, ++ p0 = svwhilele_b8_s32 (x0, x1), ++ p0 = svwhilele_b8 (x0, x1)) ++ ++/* ++** whilele_0r_b8_s32: ++** whilele p0\.b, wzr, w1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_0r_b8_s32, int32_t, ++ p0 = svwhilele_b8_s32 (0, x1), ++ p0 = svwhilele_b8 (0, x1)) ++ ++/* ++** whilele_5r_b8_s32: ++** mov (w[0-9]+), #?5 ++** whilele p0\.b, \1, w1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_5r_b8_s32, int32_t, ++ p0 = svwhilele_b8_s32 (5, x1), ++ p0 = svwhilele_b8 (5, x1)) ++ ++/* ++** whilele_r0_b8_s32: ++** whilele p0\.b, w0, wzr ++** ret ++*/ ++TEST_COMPARE_S (whilele_r0_b8_s32, int32_t, ++ p0 = svwhilele_b8_s32 (x0, 0), ++ p0 = svwhilele_b8 (x0, 0)) ++ ++/* ++** whilele_r5_b8_s32: ++** mov (w[0-9]+), #?5 ++** whilele p0\.b, w0, \1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_r5_b8_s32, int32_t, ++ p0 = svwhilele_b8_s32 (x0, 5), ++ p0 = svwhilele_b8 (x0, 5)) ++ ++/* ++** whilele_rr_b8_s64: ++** whilele p0\.b, x0, x1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_rr_b8_s64, int64_t, ++ p0 = svwhilele_b8_s64 (x0, x1), ++ p0 = svwhilele_b8 (x0, x1)) ++ ++/* ++** whilele_0r_b8_s64: ++** whilele p0\.b, xzr, x1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_0r_b8_s64, int64_t, ++ p0 = svwhilele_b8_s64 (0, x1), ++ p0 = svwhilele_b8 ((int64_t) 0, x1)) ++ ++/* ++** whilele_5r_b8_s64: ++** mov (x[0-9]+), #?5 ++** whilele p0\.b, \1, x1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_5r_b8_s64, int64_t, ++ p0 = svwhilele_b8_s64 (5, x1), ++ p0 = svwhilele_b8 ((int64_t) 5, x1)) ++ ++/* ++** whilele_r0_b8_s64: ++** whilele p0\.b, x0, xzr ++** ret ++*/ ++TEST_COMPARE_S (whilele_r0_b8_s64, int64_t, ++ p0 = svwhilele_b8_s64 (x0, 0), ++ p0 = svwhilele_b8 (x0, (int64_t) 0)) ++ ++/* ++** whilele_r5_b8_s64: ++** mov (x[0-9]+), #?5 ++** whilele p0\.b, x0, \1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_r5_b8_s64, int64_t, ++ p0 = svwhilele_b8_s64 (x0, 5), ++ p0 = svwhilele_b8 (x0, (int64_t) 5)) ++ ++/* ++** whilele_rr_b8_u32: ++** whilels p0\.b, w0, w1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_rr_b8_u32, uint32_t, ++ p0 = svwhilele_b8_u32 (x0, x1), ++ p0 = svwhilele_b8 (x0, x1)) ++ ++/* ++** whilele_0r_b8_u32: ++** whilels p0\.b, wzr, w1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_0r_b8_u32, uint32_t, ++ p0 = svwhilele_b8_u32 (0, x1), ++ p0 = svwhilele_b8 ((uint32_t) 0, x1)) ++ ++/* ++** whilele_5r_b8_u32: ++** mov (w[0-9]+), #?5 ++** whilels p0\.b, \1, w1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_5r_b8_u32, uint32_t, ++ p0 = svwhilele_b8_u32 (5, x1), ++ p0 = svwhilele_b8 ((uint32_t) 5, x1)) ++ ++/* ++** whilele_r5_b8_u32: ++** mov (w[0-9]+), #?5 ++** whilels p0\.b, w0, \1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_r5_b8_u32, uint32_t, ++ p0 = svwhilele_b8_u32 (x0, 5), ++ p0 = svwhilele_b8 (x0, (uint32_t) 5)) ++ ++/* ++** whilele_rr_b8_u64: ++** whilels p0\.b, x0, x1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_rr_b8_u64, uint64_t, ++ p0 = svwhilele_b8_u64 (x0, x1), ++ p0 = svwhilele_b8 (x0, x1)) ++ ++/* ++** whilele_0r_b8_u64: ++** whilels p0\.b, xzr, x1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_0r_b8_u64, uint64_t, ++ p0 = svwhilele_b8_u64 (0, x1), ++ p0 = svwhilele_b8 ((uint64_t) 0, x1)) ++ ++/* ++** whilele_5r_b8_u64: ++** mov (x[0-9]+), #?5 ++** whilels p0\.b, \1, x1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_5r_b8_u64, uint64_t, ++ p0 = svwhilele_b8_u64 (5, x1), ++ p0 = svwhilele_b8 ((uint64_t) 5, x1)) ++ ++/* ++** whilele_r5_b8_u64: ++** mov (x[0-9]+), #?5 ++** whilels p0\.b, x0, \1 ++** ret ++*/ ++TEST_COMPARE_S (whilele_r5_b8_u64, uint64_t, ++ p0 = svwhilele_b8_u64 (x0, 5), ++ p0 = svwhilele_b8 (x0, (uint64_t) 5)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/whilelt_b16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/whilelt_b16.c +new file mode 100644 +index 000000000..14a60432b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/whilelt_b16.c +@@ -0,0 +1,173 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** whilelt_rr_b16_s32: ++** whilelt p0\.h, w0, w1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_rr_b16_s32, int32_t, ++ p0 = svwhilelt_b16_s32 (x0, x1), ++ p0 = svwhilelt_b16 (x0, x1)) ++ ++/* ++** whilelt_0r_b16_s32: ++** whilelt p0\.h, wzr, w1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_0r_b16_s32, int32_t, ++ p0 = svwhilelt_b16_s32 (0, x1), ++ p0 = svwhilelt_b16 (0, x1)) ++ ++/* ++** whilelt_5r_b16_s32: ++** mov (w[0-9]+), #?5 ++** whilelt p0\.h, \1, w1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_5r_b16_s32, int32_t, ++ p0 = svwhilelt_b16_s32 (5, x1), ++ p0 = svwhilelt_b16 (5, x1)) ++ ++/* ++** whilelt_r0_b16_s32: ++** whilelt p0\.h, w0, wzr ++** ret ++*/ ++TEST_COMPARE_S (whilelt_r0_b16_s32, int32_t, ++ p0 = svwhilelt_b16_s32 (x0, 0), ++ p0 = svwhilelt_b16 (x0, 0)) ++ ++/* ++** whilelt_r5_b16_s32: ++** mov (w[0-9]+), #?5 ++** whilelt p0\.h, w0, \1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_r5_b16_s32, int32_t, ++ p0 = svwhilelt_b16_s32 (x0, 5), ++ p0 = svwhilelt_b16 (x0, 5)) ++ ++/* ++** whilelt_rr_b16_s64: ++** whilelt p0\.h, x0, x1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_rr_b16_s64, int64_t, ++ p0 = svwhilelt_b16_s64 (x0, x1), ++ p0 = svwhilelt_b16 (x0, x1)) ++ ++/* ++** whilelt_0r_b16_s64: ++** whilelt p0\.h, xzr, x1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_0r_b16_s64, int64_t, ++ p0 = svwhilelt_b16_s64 (0, x1), ++ p0 = svwhilelt_b16 ((int64_t) 0, x1)) ++ ++/* ++** whilelt_5r_b16_s64: ++** mov (x[0-9]+), #?5 ++** whilelt p0\.h, \1, x1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_5r_b16_s64, int64_t, ++ p0 = svwhilelt_b16_s64 (5, x1), ++ p0 = svwhilelt_b16 ((int64_t) 5, x1)) ++ ++/* ++** whilelt_r0_b16_s64: ++** whilelt p0\.h, x0, xzr ++** ret ++*/ ++TEST_COMPARE_S (whilelt_r0_b16_s64, int64_t, ++ p0 = svwhilelt_b16_s64 (x0, 0), ++ p0 = svwhilelt_b16 (x0, (int64_t) 0)) ++ ++/* ++** whilelt_r5_b16_s64: ++** mov (x[0-9]+), #?5 ++** whilelt p0\.h, x0, \1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_r5_b16_s64, int64_t, ++ p0 = svwhilelt_b16_s64 (x0, 5), ++ p0 = svwhilelt_b16 (x0, (int64_t) 5)) ++ ++/* ++** whilelt_rr_b16_u32: ++** whilelo p0\.h, w0, w1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_rr_b16_u32, uint32_t, ++ p0 = svwhilelt_b16_u32 (x0, x1), ++ p0 = svwhilelt_b16 (x0, x1)) ++ ++/* ++** whilelt_0r_b16_u32: ++** whilelo p0\.h, wzr, w1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_0r_b16_u32, uint32_t, ++ p0 = svwhilelt_b16_u32 (0, x1), ++ p0 = svwhilelt_b16 ((uint32_t) 0, x1)) ++ ++/* ++** whilelt_5r_b16_u32: ++** mov (w[0-9]+), #?5 ++** whilelo p0\.h, \1, w1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_5r_b16_u32, uint32_t, ++ p0 = svwhilelt_b16_u32 (5, x1), ++ p0 = svwhilelt_b16 ((uint32_t) 5, x1)) ++ ++/* ++** whilelt_r5_b16_u32: ++** mov (w[0-9]+), #?5 ++** whilelo p0\.h, w0, \1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_r5_b16_u32, uint32_t, ++ p0 = svwhilelt_b16_u32 (x0, 5), ++ p0 = svwhilelt_b16 (x0, (uint32_t) 5)) ++ ++/* ++** whilelt_rr_b16_u64: ++** whilelo p0\.h, x0, x1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_rr_b16_u64, uint64_t, ++ p0 = svwhilelt_b16_u64 (x0, x1), ++ p0 = svwhilelt_b16 (x0, x1)) ++ ++/* ++** whilelt_0r_b16_u64: ++** whilelo p0\.h, xzr, x1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_0r_b16_u64, uint64_t, ++ p0 = svwhilelt_b16_u64 (0, x1), ++ p0 = svwhilelt_b16 ((uint64_t) 0, x1)) ++ ++/* ++** whilelt_5r_b16_u64: ++** mov (x[0-9]+), #?5 ++** whilelo p0\.h, \1, x1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_5r_b16_u64, uint64_t, ++ p0 = svwhilelt_b16_u64 (5, x1), ++ p0 = svwhilelt_b16 ((uint64_t) 5, x1)) ++ ++/* ++** whilelt_r5_b16_u64: ++** mov (x[0-9]+), #?5 ++** whilelo p0\.h, x0, \1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_r5_b16_u64, uint64_t, ++ p0 = svwhilelt_b16_u64 (x0, 5), ++ p0 = svwhilelt_b16 (x0, (uint64_t) 5)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/whilelt_b32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/whilelt_b32.c +new file mode 100644 +index 000000000..0e50bb07a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/whilelt_b32.c +@@ -0,0 +1,173 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** whilelt_rr_b32_s32: ++** whilelt p0\.s, w0, w1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_rr_b32_s32, int32_t, ++ p0 = svwhilelt_b32_s32 (x0, x1), ++ p0 = svwhilelt_b32 (x0, x1)) ++ ++/* ++** whilelt_0r_b32_s32: ++** whilelt p0\.s, wzr, w1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_0r_b32_s32, int32_t, ++ p0 = svwhilelt_b32_s32 (0, x1), ++ p0 = svwhilelt_b32 (0, x1)) ++ ++/* ++** whilelt_5r_b32_s32: ++** mov (w[0-9]+), #?5 ++** whilelt p0\.s, \1, w1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_5r_b32_s32, int32_t, ++ p0 = svwhilelt_b32_s32 (5, x1), ++ p0 = svwhilelt_b32 (5, x1)) ++ ++/* ++** whilelt_r0_b32_s32: ++** whilelt p0\.s, w0, wzr ++** ret ++*/ ++TEST_COMPARE_S (whilelt_r0_b32_s32, int32_t, ++ p0 = svwhilelt_b32_s32 (x0, 0), ++ p0 = svwhilelt_b32 (x0, 0)) ++ ++/* ++** whilelt_r5_b32_s32: ++** mov (w[0-9]+), #?5 ++** whilelt p0\.s, w0, \1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_r5_b32_s32, int32_t, ++ p0 = svwhilelt_b32_s32 (x0, 5), ++ p0 = svwhilelt_b32 (x0, 5)) ++ ++/* ++** whilelt_rr_b32_s64: ++** whilelt p0\.s, x0, x1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_rr_b32_s64, int64_t, ++ p0 = svwhilelt_b32_s64 (x0, x1), ++ p0 = svwhilelt_b32 (x0, x1)) ++ ++/* ++** whilelt_0r_b32_s64: ++** whilelt p0\.s, xzr, x1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_0r_b32_s64, int64_t, ++ p0 = svwhilelt_b32_s64 (0, x1), ++ p0 = svwhilelt_b32 ((int64_t) 0, x1)) ++ ++/* ++** whilelt_5r_b32_s64: ++** mov (x[0-9]+), #?5 ++** whilelt p0\.s, \1, x1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_5r_b32_s64, int64_t, ++ p0 = svwhilelt_b32_s64 (5, x1), ++ p0 = svwhilelt_b32 ((int64_t) 5, x1)) ++ ++/* ++** whilelt_r0_b32_s64: ++** whilelt p0\.s, x0, xzr ++** ret ++*/ ++TEST_COMPARE_S (whilelt_r0_b32_s64, int64_t, ++ p0 = svwhilelt_b32_s64 (x0, 0), ++ p0 = svwhilelt_b32 (x0, (int64_t) 0)) ++ ++/* ++** whilelt_r5_b32_s64: ++** mov (x[0-9]+), #?5 ++** whilelt p0\.s, x0, \1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_r5_b32_s64, int64_t, ++ p0 = svwhilelt_b32_s64 (x0, 5), ++ p0 = svwhilelt_b32 (x0, (int64_t) 5)) ++ ++/* ++** whilelt_rr_b32_u32: ++** whilelo p0\.s, w0, w1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_rr_b32_u32, uint32_t, ++ p0 = svwhilelt_b32_u32 (x0, x1), ++ p0 = svwhilelt_b32 (x0, x1)) ++ ++/* ++** whilelt_0r_b32_u32: ++** whilelo p0\.s, wzr, w1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_0r_b32_u32, uint32_t, ++ p0 = svwhilelt_b32_u32 (0, x1), ++ p0 = svwhilelt_b32 ((uint32_t) 0, x1)) ++ ++/* ++** whilelt_5r_b32_u32: ++** mov (w[0-9]+), #?5 ++** whilelo p0\.s, \1, w1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_5r_b32_u32, uint32_t, ++ p0 = svwhilelt_b32_u32 (5, x1), ++ p0 = svwhilelt_b32 ((uint32_t) 5, x1)) ++ ++/* ++** whilelt_r5_b32_u32: ++** mov (w[0-9]+), #?5 ++** whilelo p0\.s, w0, \1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_r5_b32_u32, uint32_t, ++ p0 = svwhilelt_b32_u32 (x0, 5), ++ p0 = svwhilelt_b32 (x0, (uint32_t) 5)) ++ ++/* ++** whilelt_rr_b32_u64: ++** whilelo p0\.s, x0, x1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_rr_b32_u64, uint64_t, ++ p0 = svwhilelt_b32_u64 (x0, x1), ++ p0 = svwhilelt_b32 (x0, x1)) ++ ++/* ++** whilelt_0r_b32_u64: ++** whilelo p0\.s, xzr, x1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_0r_b32_u64, uint64_t, ++ p0 = svwhilelt_b32_u64 (0, x1), ++ p0 = svwhilelt_b32 ((uint64_t) 0, x1)) ++ ++/* ++** whilelt_5r_b32_u64: ++** mov (x[0-9]+), #?5 ++** whilelo p0\.s, \1, x1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_5r_b32_u64, uint64_t, ++ p0 = svwhilelt_b32_u64 (5, x1), ++ p0 = svwhilelt_b32 ((uint64_t) 5, x1)) ++ ++/* ++** whilelt_r5_b32_u64: ++** mov (x[0-9]+), #?5 ++** whilelo p0\.s, x0, \1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_r5_b32_u64, uint64_t, ++ p0 = svwhilelt_b32_u64 (x0, 5), ++ p0 = svwhilelt_b32 (x0, (uint64_t) 5)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/whilelt_b64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/whilelt_b64.c +new file mode 100644 +index 000000000..539c93347 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/whilelt_b64.c +@@ -0,0 +1,173 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** whilelt_rr_b64_s32: ++** whilelt p0\.d, w0, w1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_rr_b64_s32, int32_t, ++ p0 = svwhilelt_b64_s32 (x0, x1), ++ p0 = svwhilelt_b64 (x0, x1)) ++ ++/* ++** whilelt_0r_b64_s32: ++** whilelt p0\.d, wzr, w1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_0r_b64_s32, int32_t, ++ p0 = svwhilelt_b64_s32 (0, x1), ++ p0 = svwhilelt_b64 (0, x1)) ++ ++/* ++** whilelt_5r_b64_s32: ++** mov (w[0-9]+), #?5 ++** whilelt p0\.d, \1, w1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_5r_b64_s32, int32_t, ++ p0 = svwhilelt_b64_s32 (5, x1), ++ p0 = svwhilelt_b64 (5, x1)) ++ ++/* ++** whilelt_r0_b64_s32: ++** whilelt p0\.d, w0, wzr ++** ret ++*/ ++TEST_COMPARE_S (whilelt_r0_b64_s32, int32_t, ++ p0 = svwhilelt_b64_s32 (x0, 0), ++ p0 = svwhilelt_b64 (x0, 0)) ++ ++/* ++** whilelt_r5_b64_s32: ++** mov (w[0-9]+), #?5 ++** whilelt p0\.d, w0, \1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_r5_b64_s32, int32_t, ++ p0 = svwhilelt_b64_s32 (x0, 5), ++ p0 = svwhilelt_b64 (x0, 5)) ++ ++/* ++** whilelt_rr_b64_s64: ++** whilelt p0\.d, x0, x1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_rr_b64_s64, int64_t, ++ p0 = svwhilelt_b64_s64 (x0, x1), ++ p0 = svwhilelt_b64 (x0, x1)) ++ ++/* ++** whilelt_0r_b64_s64: ++** whilelt p0\.d, xzr, x1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_0r_b64_s64, int64_t, ++ p0 = svwhilelt_b64_s64 (0, x1), ++ p0 = svwhilelt_b64 ((int64_t) 0, x1)) ++ ++/* ++** whilelt_5r_b64_s64: ++** mov (x[0-9]+), #?5 ++** whilelt p0\.d, \1, x1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_5r_b64_s64, int64_t, ++ p0 = svwhilelt_b64_s64 (5, x1), ++ p0 = svwhilelt_b64 ((int64_t) 5, x1)) ++ ++/* ++** whilelt_r0_b64_s64: ++** whilelt p0\.d, x0, xzr ++** ret ++*/ ++TEST_COMPARE_S (whilelt_r0_b64_s64, int64_t, ++ p0 = svwhilelt_b64_s64 (x0, 0), ++ p0 = svwhilelt_b64 (x0, (int64_t) 0)) ++ ++/* ++** whilelt_r5_b64_s64: ++** mov (x[0-9]+), #?5 ++** whilelt p0\.d, x0, \1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_r5_b64_s64, int64_t, ++ p0 = svwhilelt_b64_s64 (x0, 5), ++ p0 = svwhilelt_b64 (x0, (int64_t) 5)) ++ ++/* ++** whilelt_rr_b64_u32: ++** whilelo p0\.d, w0, w1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_rr_b64_u32, uint32_t, ++ p0 = svwhilelt_b64_u32 (x0, x1), ++ p0 = svwhilelt_b64 (x0, x1)) ++ ++/* ++** whilelt_0r_b64_u32: ++** whilelo p0\.d, wzr, w1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_0r_b64_u32, uint32_t, ++ p0 = svwhilelt_b64_u32 (0, x1), ++ p0 = svwhilelt_b64 ((uint32_t) 0, x1)) ++ ++/* ++** whilelt_5r_b64_u32: ++** mov (w[0-9]+), #?5 ++** whilelo p0\.d, \1, w1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_5r_b64_u32, uint32_t, ++ p0 = svwhilelt_b64_u32 (5, x1), ++ p0 = svwhilelt_b64 ((uint32_t) 5, x1)) ++ ++/* ++** whilelt_r5_b64_u32: ++** mov (w[0-9]+), #?5 ++** whilelo p0\.d, w0, \1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_r5_b64_u32, uint32_t, ++ p0 = svwhilelt_b64_u32 (x0, 5), ++ p0 = svwhilelt_b64 (x0, (uint32_t) 5)) ++ ++/* ++** whilelt_rr_b64_u64: ++** whilelo p0\.d, x0, x1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_rr_b64_u64, uint64_t, ++ p0 = svwhilelt_b64_u64 (x0, x1), ++ p0 = svwhilelt_b64 (x0, x1)) ++ ++/* ++** whilelt_0r_b64_u64: ++** whilelo p0\.d, xzr, x1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_0r_b64_u64, uint64_t, ++ p0 = svwhilelt_b64_u64 (0, x1), ++ p0 = svwhilelt_b64 ((uint64_t) 0, x1)) ++ ++/* ++** whilelt_5r_b64_u64: ++** mov (x[0-9]+), #?5 ++** whilelo p0\.d, \1, x1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_5r_b64_u64, uint64_t, ++ p0 = svwhilelt_b64_u64 (5, x1), ++ p0 = svwhilelt_b64 ((uint64_t) 5, x1)) ++ ++/* ++** whilelt_r5_b64_u64: ++** mov (x[0-9]+), #?5 ++** whilelo p0\.d, x0, \1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_r5_b64_u64, uint64_t, ++ p0 = svwhilelt_b64_u64 (x0, 5), ++ p0 = svwhilelt_b64 (x0, (uint64_t) 5)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/whilelt_b8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/whilelt_b8.c +new file mode 100644 +index 000000000..5b6a5c44d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/whilelt_b8.c +@@ -0,0 +1,173 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** whilelt_rr_b8_s32: ++** whilelt p0\.b, w0, w1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_rr_b8_s32, int32_t, ++ p0 = svwhilelt_b8_s32 (x0, x1), ++ p0 = svwhilelt_b8 (x0, x1)) ++ ++/* ++** whilelt_0r_b8_s32: ++** whilelt p0\.b, wzr, w1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_0r_b8_s32, int32_t, ++ p0 = svwhilelt_b8_s32 (0, x1), ++ p0 = svwhilelt_b8 (0, x1)) ++ ++/* ++** whilelt_5r_b8_s32: ++** mov (w[0-9]+), #?5 ++** whilelt p0\.b, \1, w1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_5r_b8_s32, int32_t, ++ p0 = svwhilelt_b8_s32 (5, x1), ++ p0 = svwhilelt_b8 (5, x1)) ++ ++/* ++** whilelt_r0_b8_s32: ++** whilelt p0\.b, w0, wzr ++** ret ++*/ ++TEST_COMPARE_S (whilelt_r0_b8_s32, int32_t, ++ p0 = svwhilelt_b8_s32 (x0, 0), ++ p0 = svwhilelt_b8 (x0, 0)) ++ ++/* ++** whilelt_r5_b8_s32: ++** mov (w[0-9]+), #?5 ++** whilelt p0\.b, w0, \1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_r5_b8_s32, int32_t, ++ p0 = svwhilelt_b8_s32 (x0, 5), ++ p0 = svwhilelt_b8 (x0, 5)) ++ ++/* ++** whilelt_rr_b8_s64: ++** whilelt p0\.b, x0, x1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_rr_b8_s64, int64_t, ++ p0 = svwhilelt_b8_s64 (x0, x1), ++ p0 = svwhilelt_b8 (x0, x1)) ++ ++/* ++** whilelt_0r_b8_s64: ++** whilelt p0\.b, xzr, x1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_0r_b8_s64, int64_t, ++ p0 = svwhilelt_b8_s64 (0, x1), ++ p0 = svwhilelt_b8 ((int64_t) 0, x1)) ++ ++/* ++** whilelt_5r_b8_s64: ++** mov (x[0-9]+), #?5 ++** whilelt p0\.b, \1, x1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_5r_b8_s64, int64_t, ++ p0 = svwhilelt_b8_s64 (5, x1), ++ p0 = svwhilelt_b8 ((int64_t) 5, x1)) ++ ++/* ++** whilelt_r0_b8_s64: ++** whilelt p0\.b, x0, xzr ++** ret ++*/ ++TEST_COMPARE_S (whilelt_r0_b8_s64, int64_t, ++ p0 = svwhilelt_b8_s64 (x0, 0), ++ p0 = svwhilelt_b8 (x0, (int64_t) 0)) ++ ++/* ++** whilelt_r5_b8_s64: ++** mov (x[0-9]+), #?5 ++** whilelt p0\.b, x0, \1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_r5_b8_s64, int64_t, ++ p0 = svwhilelt_b8_s64 (x0, 5), ++ p0 = svwhilelt_b8 (x0, (int64_t) 5)) ++ ++/* ++** whilelt_rr_b8_u32: ++** whilelo p0\.b, w0, w1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_rr_b8_u32, uint32_t, ++ p0 = svwhilelt_b8_u32 (x0, x1), ++ p0 = svwhilelt_b8 (x0, x1)) ++ ++/* ++** whilelt_0r_b8_u32: ++** whilelo p0\.b, wzr, w1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_0r_b8_u32, uint32_t, ++ p0 = svwhilelt_b8_u32 (0, x1), ++ p0 = svwhilelt_b8 ((uint32_t) 0, x1)) ++ ++/* ++** whilelt_5r_b8_u32: ++** mov (w[0-9]+), #?5 ++** whilelo p0\.b, \1, w1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_5r_b8_u32, uint32_t, ++ p0 = svwhilelt_b8_u32 (5, x1), ++ p0 = svwhilelt_b8 ((uint32_t) 5, x1)) ++ ++/* ++** whilelt_r5_b8_u32: ++** mov (w[0-9]+), #?5 ++** whilelo p0\.b, w0, \1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_r5_b8_u32, uint32_t, ++ p0 = svwhilelt_b8_u32 (x0, 5), ++ p0 = svwhilelt_b8 (x0, (uint32_t) 5)) ++ ++/* ++** whilelt_rr_b8_u64: ++** whilelo p0\.b, x0, x1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_rr_b8_u64, uint64_t, ++ p0 = svwhilelt_b8_u64 (x0, x1), ++ p0 = svwhilelt_b8 (x0, x1)) ++ ++/* ++** whilelt_0r_b8_u64: ++** whilelo p0\.b, xzr, x1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_0r_b8_u64, uint64_t, ++ p0 = svwhilelt_b8_u64 (0, x1), ++ p0 = svwhilelt_b8 ((uint64_t) 0, x1)) ++ ++/* ++** whilelt_5r_b8_u64: ++** mov (x[0-9]+), #?5 ++** whilelo p0\.b, \1, x1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_5r_b8_u64, uint64_t, ++ p0 = svwhilelt_b8_u64 (5, x1), ++ p0 = svwhilelt_b8 ((uint64_t) 5, x1)) ++ ++/* ++** whilelt_r5_b8_u64: ++** mov (x[0-9]+), #?5 ++** whilelo p0\.b, x0, \1 ++** ret ++*/ ++TEST_COMPARE_S (whilelt_r5_b8_u64, uint64_t, ++ p0 = svwhilelt_b8_u64 (x0, 5), ++ p0 = svwhilelt_b8 (x0, (uint64_t) 5)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_b16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_b16.c +new file mode 100644 +index 000000000..269260eb4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_b16.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip1_b16_tied1: ++** zip1 p0\.h, p0\.h, p1\.h ++** ret ++*/ ++TEST_UNIFORM_P (zip1_b16_tied1, ++ p0 = svzip1_b16 (p0, p1), ++ p0 = svzip1_b16 (p0, p1)) ++ ++/* ++** zip1_b16_tied2: ++** zip1 p0\.h, p1\.h, p0\.h ++** ret ++*/ ++TEST_UNIFORM_P (zip1_b16_tied2, ++ p0 = svzip1_b16 (p1, p0), ++ p0 = svzip1_b16 (p1, p0)) ++ ++/* ++** zip1_b16_untied: ++** zip1 p0\.h, p1\.h, p2\.h ++** ret ++*/ ++TEST_UNIFORM_P (zip1_b16_untied, ++ p0 = svzip1_b16 (p1, p2), ++ p0 = svzip1_b16 (p1, p2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_b32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_b32.c +new file mode 100644 +index 000000000..027609a7d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_b32.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip1_b32_tied1: ++** zip1 p0\.s, p0\.s, p1\.s ++** ret ++*/ ++TEST_UNIFORM_P (zip1_b32_tied1, ++ p0 = svzip1_b32 (p0, p1), ++ p0 = svzip1_b32 (p0, p1)) ++ ++/* ++** zip1_b32_tied2: ++** zip1 p0\.s, p1\.s, p0\.s ++** ret ++*/ ++TEST_UNIFORM_P (zip1_b32_tied2, ++ p0 = svzip1_b32 (p1, p0), ++ p0 = svzip1_b32 (p1, p0)) ++ ++/* ++** zip1_b32_untied: ++** zip1 p0\.s, p1\.s, p2\.s ++** ret ++*/ ++TEST_UNIFORM_P (zip1_b32_untied, ++ p0 = svzip1_b32 (p1, p2), ++ p0 = svzip1_b32 (p1, p2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_b64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_b64.c +new file mode 100644 +index 000000000..8add16d8e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_b64.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip1_b64_tied1: ++** zip1 p0\.d, p0\.d, p1\.d ++** ret ++*/ ++TEST_UNIFORM_P (zip1_b64_tied1, ++ p0 = svzip1_b64 (p0, p1), ++ p0 = svzip1_b64 (p0, p1)) ++ ++/* ++** zip1_b64_tied2: ++** zip1 p0\.d, p1\.d, p0\.d ++** ret ++*/ ++TEST_UNIFORM_P (zip1_b64_tied2, ++ p0 = svzip1_b64 (p1, p0), ++ p0 = svzip1_b64 (p1, p0)) ++ ++/* ++** zip1_b64_untied: ++** zip1 p0\.d, p1\.d, p2\.d ++** ret ++*/ ++TEST_UNIFORM_P (zip1_b64_untied, ++ p0 = svzip1_b64 (p1, p2), ++ p0 = svzip1_b64 (p1, p2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_b8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_b8.c +new file mode 100644 +index 000000000..8648298ac +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_b8.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip1_b8_tied1: ++** zip1 p0\.b, p0\.b, p1\.b ++** ret ++*/ ++TEST_UNIFORM_P (zip1_b8_tied1, ++ p0 = svzip1_b8 (p0, p1), ++ p0 = svzip1_b8 (p0, p1)) ++ ++/* ++** zip1_b8_tied2: ++** zip1 p0\.b, p1\.b, p0\.b ++** ret ++*/ ++TEST_UNIFORM_P (zip1_b8_tied2, ++ p0 = svzip1_b8 (p1, p0), ++ p0 = svzip1_b8 (p1, p0)) ++ ++/* ++** zip1_b8_untied: ++** zip1 p0\.b, p1\.b, p2\.b ++** ret ++*/ ++TEST_UNIFORM_P (zip1_b8_untied, ++ p0 = svzip1_b8 (p1, p2), ++ p0 = svzip1_b8 (p1, p2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_bf16.c +new file mode 100644 +index 000000000..6017cde41 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_bf16.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip1_bf16_tied1: ++** zip1 z0\.h, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (zip1_bf16_tied1, svbfloat16_t, ++ z0 = svzip1_bf16 (z0, z1), ++ z0 = svzip1 (z0, z1)) ++ ++/* ++** zip1_bf16_tied2: ++** zip1 z0\.h, z1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (zip1_bf16_tied2, svbfloat16_t, ++ z0 = svzip1_bf16 (z1, z0), ++ z0 = svzip1 (z1, z0)) ++ ++/* ++** zip1_bf16_untied: ++** zip1 z0\.h, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (zip1_bf16_untied, svbfloat16_t, ++ z0 = svzip1_bf16 (z1, z2), ++ z0 = svzip1 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_f16.c +new file mode 100644 +index 000000000..1c6ce4e7d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_f16.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip1_f16_tied1: ++** zip1 z0\.h, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (zip1_f16_tied1, svfloat16_t, ++ z0 = svzip1_f16 (z0, z1), ++ z0 = svzip1 (z0, z1)) ++ ++/* ++** zip1_f16_tied2: ++** zip1 z0\.h, z1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (zip1_f16_tied2, svfloat16_t, ++ z0 = svzip1_f16 (z1, z0), ++ z0 = svzip1 (z1, z0)) ++ ++/* ++** zip1_f16_untied: ++** zip1 z0\.h, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (zip1_f16_untied, svfloat16_t, ++ z0 = svzip1_f16 (z1, z2), ++ z0 = svzip1 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_f32.c +new file mode 100644 +index 000000000..288ceff3f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_f32.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip1_f32_tied1: ++** zip1 z0\.s, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (zip1_f32_tied1, svfloat32_t, ++ z0 = svzip1_f32 (z0, z1), ++ z0 = svzip1 (z0, z1)) ++ ++/* ++** zip1_f32_tied2: ++** zip1 z0\.s, z1\.s, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (zip1_f32_tied2, svfloat32_t, ++ z0 = svzip1_f32 (z1, z0), ++ z0 = svzip1 (z1, z0)) ++ ++/* ++** zip1_f32_untied: ++** zip1 z0\.s, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (zip1_f32_untied, svfloat32_t, ++ z0 = svzip1_f32 (z1, z2), ++ z0 = svzip1 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_f64.c +new file mode 100644 +index 000000000..5abbea1cd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_f64.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip1_f64_tied1: ++** zip1 z0\.d, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (zip1_f64_tied1, svfloat64_t, ++ z0 = svzip1_f64 (z0, z1), ++ z0 = svzip1 (z0, z1)) ++ ++/* ++** zip1_f64_tied2: ++** zip1 z0\.d, z1\.d, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (zip1_f64_tied2, svfloat64_t, ++ z0 = svzip1_f64 (z1, z0), ++ z0 = svzip1 (z1, z0)) ++ ++/* ++** zip1_f64_untied: ++** zip1 z0\.d, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (zip1_f64_untied, svfloat64_t, ++ z0 = svzip1_f64 (z1, z2), ++ z0 = svzip1 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_s16.c +new file mode 100644 +index 000000000..8ecd20142 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_s16.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip1_s16_tied1: ++** zip1 z0\.h, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (zip1_s16_tied1, svint16_t, ++ z0 = svzip1_s16 (z0, z1), ++ z0 = svzip1 (z0, z1)) ++ ++/* ++** zip1_s16_tied2: ++** zip1 z0\.h, z1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (zip1_s16_tied2, svint16_t, ++ z0 = svzip1_s16 (z1, z0), ++ z0 = svzip1 (z1, z0)) ++ ++/* ++** zip1_s16_untied: ++** zip1 z0\.h, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (zip1_s16_untied, svint16_t, ++ z0 = svzip1_s16 (z1, z2), ++ z0 = svzip1 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_s32.c +new file mode 100644 +index 000000000..c523885ea +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_s32.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip1_s32_tied1: ++** zip1 z0\.s, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (zip1_s32_tied1, svint32_t, ++ z0 = svzip1_s32 (z0, z1), ++ z0 = svzip1 (z0, z1)) ++ ++/* ++** zip1_s32_tied2: ++** zip1 z0\.s, z1\.s, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (zip1_s32_tied2, svint32_t, ++ z0 = svzip1_s32 (z1, z0), ++ z0 = svzip1 (z1, z0)) ++ ++/* ++** zip1_s32_untied: ++** zip1 z0\.s, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (zip1_s32_untied, svint32_t, ++ z0 = svzip1_s32 (z1, z2), ++ z0 = svzip1 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_s64.c +new file mode 100644 +index 000000000..d1dca7ee9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_s64.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip1_s64_tied1: ++** zip1 z0\.d, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (zip1_s64_tied1, svint64_t, ++ z0 = svzip1_s64 (z0, z1), ++ z0 = svzip1 (z0, z1)) ++ ++/* ++** zip1_s64_tied2: ++** zip1 z0\.d, z1\.d, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (zip1_s64_tied2, svint64_t, ++ z0 = svzip1_s64 (z1, z0), ++ z0 = svzip1 (z1, z0)) ++ ++/* ++** zip1_s64_untied: ++** zip1 z0\.d, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (zip1_s64_untied, svint64_t, ++ z0 = svzip1_s64 (z1, z2), ++ z0 = svzip1 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_s8.c +new file mode 100644 +index 000000000..1600ab586 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_s8.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip1_s8_tied1: ++** zip1 z0\.b, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (zip1_s8_tied1, svint8_t, ++ z0 = svzip1_s8 (z0, z1), ++ z0 = svzip1 (z0, z1)) ++ ++/* ++** zip1_s8_tied2: ++** zip1 z0\.b, z1\.b, z0\.b ++** ret ++*/ ++TEST_UNIFORM_Z (zip1_s8_tied2, svint8_t, ++ z0 = svzip1_s8 (z1, z0), ++ z0 = svzip1 (z1, z0)) ++ ++/* ++** zip1_s8_untied: ++** zip1 z0\.b, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (zip1_s8_untied, svint8_t, ++ z0 = svzip1_s8 (z1, z2), ++ z0 = svzip1 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_u16.c +new file mode 100644 +index 000000000..3773ed22f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_u16.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip1_u16_tied1: ++** zip1 z0\.h, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (zip1_u16_tied1, svuint16_t, ++ z0 = svzip1_u16 (z0, z1), ++ z0 = svzip1 (z0, z1)) ++ ++/* ++** zip1_u16_tied2: ++** zip1 z0\.h, z1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (zip1_u16_tied2, svuint16_t, ++ z0 = svzip1_u16 (z1, z0), ++ z0 = svzip1 (z1, z0)) ++ ++/* ++** zip1_u16_untied: ++** zip1 z0\.h, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (zip1_u16_untied, svuint16_t, ++ z0 = svzip1_u16 (z1, z2), ++ z0 = svzip1 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_u32.c +new file mode 100644 +index 000000000..e67c121e5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_u32.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip1_u32_tied1: ++** zip1 z0\.s, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (zip1_u32_tied1, svuint32_t, ++ z0 = svzip1_u32 (z0, z1), ++ z0 = svzip1 (z0, z1)) ++ ++/* ++** zip1_u32_tied2: ++** zip1 z0\.s, z1\.s, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (zip1_u32_tied2, svuint32_t, ++ z0 = svzip1_u32 (z1, z0), ++ z0 = svzip1 (z1, z0)) ++ ++/* ++** zip1_u32_untied: ++** zip1 z0\.s, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (zip1_u32_untied, svuint32_t, ++ z0 = svzip1_u32 (z1, z2), ++ z0 = svzip1 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_u64.c +new file mode 100644 +index 000000000..bb6380a6a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_u64.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip1_u64_tied1: ++** zip1 z0\.d, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (zip1_u64_tied1, svuint64_t, ++ z0 = svzip1_u64 (z0, z1), ++ z0 = svzip1 (z0, z1)) ++ ++/* ++** zip1_u64_tied2: ++** zip1 z0\.d, z1\.d, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (zip1_u64_tied2, svuint64_t, ++ z0 = svzip1_u64 (z1, z0), ++ z0 = svzip1 (z1, z0)) ++ ++/* ++** zip1_u64_untied: ++** zip1 z0\.d, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (zip1_u64_untied, svuint64_t, ++ z0 = svzip1_u64 (z1, z2), ++ z0 = svzip1 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_u8.c +new file mode 100644 +index 000000000..01d89d4fe +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_u8.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip1_u8_tied1: ++** zip1 z0\.b, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (zip1_u8_tied1, svuint8_t, ++ z0 = svzip1_u8 (z0, z1), ++ z0 = svzip1 (z0, z1)) ++ ++/* ++** zip1_u8_tied2: ++** zip1 z0\.b, z1\.b, z0\.b ++** ret ++*/ ++TEST_UNIFORM_Z (zip1_u8_tied2, svuint8_t, ++ z0 = svzip1_u8 (z1, z0), ++ z0 = svzip1 (z1, z0)) ++ ++/* ++** zip1_u8_untied: ++** zip1 z0\.b, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (zip1_u8_untied, svuint8_t, ++ z0 = svzip1_u8 (z1, z2), ++ z0 = svzip1 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_bf16.c +new file mode 100644 +index 000000000..aabf7c0e1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_bf16.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip1q_bf16_tied1: ++** zip1 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip1q_bf16_tied1, svbfloat16_t, ++ z0 = svzip1q_bf16 (z0, z1), ++ z0 = svzip1q (z0, z1)) ++ ++/* ++** zip1q_bf16_tied2: ++** zip1 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip1q_bf16_tied2, svbfloat16_t, ++ z0 = svzip1q_bf16 (z1, z0), ++ z0 = svzip1q (z1, z0)) ++ ++/* ++** zip1q_bf16_untied: ++** zip1 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip1q_bf16_untied, svbfloat16_t, ++ z0 = svzip1q_bf16 (z1, z2), ++ z0 = svzip1q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_f16.c +new file mode 100644 +index 000000000..1170cc5e7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_f16.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip1q_f16_tied1: ++** zip1 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip1q_f16_tied1, svfloat16_t, ++ z0 = svzip1q_f16 (z0, z1), ++ z0 = svzip1q (z0, z1)) ++ ++/* ++** zip1q_f16_tied2: ++** zip1 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip1q_f16_tied2, svfloat16_t, ++ z0 = svzip1q_f16 (z1, z0), ++ z0 = svzip1q (z1, z0)) ++ ++/* ++** zip1q_f16_untied: ++** zip1 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip1q_f16_untied, svfloat16_t, ++ z0 = svzip1q_f16 (z1, z2), ++ z0 = svzip1q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_f32.c +new file mode 100644 +index 000000000..09666da1b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_f32.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip1q_f32_tied1: ++** zip1 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip1q_f32_tied1, svfloat32_t, ++ z0 = svzip1q_f32 (z0, z1), ++ z0 = svzip1q (z0, z1)) ++ ++/* ++** zip1q_f32_tied2: ++** zip1 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip1q_f32_tied2, svfloat32_t, ++ z0 = svzip1q_f32 (z1, z0), ++ z0 = svzip1q (z1, z0)) ++ ++/* ++** zip1q_f32_untied: ++** zip1 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip1q_f32_untied, svfloat32_t, ++ z0 = svzip1q_f32 (z1, z2), ++ z0 = svzip1q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_f64.c +new file mode 100644 +index 000000000..d77fb1c90 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_f64.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip1q_f64_tied1: ++** zip1 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip1q_f64_tied1, svfloat64_t, ++ z0 = svzip1q_f64 (z0, z1), ++ z0 = svzip1q (z0, z1)) ++ ++/* ++** zip1q_f64_tied2: ++** zip1 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip1q_f64_tied2, svfloat64_t, ++ z0 = svzip1q_f64 (z1, z0), ++ z0 = svzip1q (z1, z0)) ++ ++/* ++** zip1q_f64_untied: ++** zip1 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip1q_f64_untied, svfloat64_t, ++ z0 = svzip1q_f64 (z1, z2), ++ z0 = svzip1q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_s16.c +new file mode 100644 +index 000000000..92a6b5514 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_s16.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip1q_s16_tied1: ++** zip1 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip1q_s16_tied1, svint16_t, ++ z0 = svzip1q_s16 (z0, z1), ++ z0 = svzip1q (z0, z1)) ++ ++/* ++** zip1q_s16_tied2: ++** zip1 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip1q_s16_tied2, svint16_t, ++ z0 = svzip1q_s16 (z1, z0), ++ z0 = svzip1q (z1, z0)) ++ ++/* ++** zip1q_s16_untied: ++** zip1 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip1q_s16_untied, svint16_t, ++ z0 = svzip1q_s16 (z1, z2), ++ z0 = svzip1q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_s32.c +new file mode 100644 +index 000000000..a918d2d4c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_s32.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip1q_s32_tied1: ++** zip1 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip1q_s32_tied1, svint32_t, ++ z0 = svzip1q_s32 (z0, z1), ++ z0 = svzip1q (z0, z1)) ++ ++/* ++** zip1q_s32_tied2: ++** zip1 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip1q_s32_tied2, svint32_t, ++ z0 = svzip1q_s32 (z1, z0), ++ z0 = svzip1q (z1, z0)) ++ ++/* ++** zip1q_s32_untied: ++** zip1 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip1q_s32_untied, svint32_t, ++ z0 = svzip1q_s32 (z1, z2), ++ z0 = svzip1q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_s64.c +new file mode 100644 +index 000000000..be3524fd5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_s64.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip1q_s64_tied1: ++** zip1 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip1q_s64_tied1, svint64_t, ++ z0 = svzip1q_s64 (z0, z1), ++ z0 = svzip1q (z0, z1)) ++ ++/* ++** zip1q_s64_tied2: ++** zip1 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip1q_s64_tied2, svint64_t, ++ z0 = svzip1q_s64 (z1, z0), ++ z0 = svzip1q (z1, z0)) ++ ++/* ++** zip1q_s64_untied: ++** zip1 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip1q_s64_untied, svint64_t, ++ z0 = svzip1q_s64 (z1, z2), ++ z0 = svzip1q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_s8.c +new file mode 100644 +index 000000000..24ea2399c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_s8.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip1q_s8_tied1: ++** zip1 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip1q_s8_tied1, svint8_t, ++ z0 = svzip1q_s8 (z0, z1), ++ z0 = svzip1q (z0, z1)) ++ ++/* ++** zip1q_s8_tied2: ++** zip1 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip1q_s8_tied2, svint8_t, ++ z0 = svzip1q_s8 (z1, z0), ++ z0 = svzip1q (z1, z0)) ++ ++/* ++** zip1q_s8_untied: ++** zip1 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip1q_s8_untied, svint8_t, ++ z0 = svzip1q_s8 (z1, z2), ++ z0 = svzip1q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_u16.c +new file mode 100644 +index 000000000..65caf9706 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_u16.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip1q_u16_tied1: ++** zip1 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip1q_u16_tied1, svuint16_t, ++ z0 = svzip1q_u16 (z0, z1), ++ z0 = svzip1q (z0, z1)) ++ ++/* ++** zip1q_u16_tied2: ++** zip1 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip1q_u16_tied2, svuint16_t, ++ z0 = svzip1q_u16 (z1, z0), ++ z0 = svzip1q (z1, z0)) ++ ++/* ++** zip1q_u16_untied: ++** zip1 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip1q_u16_untied, svuint16_t, ++ z0 = svzip1q_u16 (z1, z2), ++ z0 = svzip1q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_u32.c +new file mode 100644 +index 000000000..abd76b74f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_u32.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip1q_u32_tied1: ++** zip1 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip1q_u32_tied1, svuint32_t, ++ z0 = svzip1q_u32 (z0, z1), ++ z0 = svzip1q (z0, z1)) ++ ++/* ++** zip1q_u32_tied2: ++** zip1 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip1q_u32_tied2, svuint32_t, ++ z0 = svzip1q_u32 (z1, z0), ++ z0 = svzip1q (z1, z0)) ++ ++/* ++** zip1q_u32_untied: ++** zip1 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip1q_u32_untied, svuint32_t, ++ z0 = svzip1q_u32 (z1, z2), ++ z0 = svzip1q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_u64.c +new file mode 100644 +index 000000000..0e91929b7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_u64.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip1q_u64_tied1: ++** zip1 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip1q_u64_tied1, svuint64_t, ++ z0 = svzip1q_u64 (z0, z1), ++ z0 = svzip1q (z0, z1)) ++ ++/* ++** zip1q_u64_tied2: ++** zip1 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip1q_u64_tied2, svuint64_t, ++ z0 = svzip1q_u64 (z1, z0), ++ z0 = svzip1q (z1, z0)) ++ ++/* ++** zip1q_u64_untied: ++** zip1 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip1q_u64_untied, svuint64_t, ++ z0 = svzip1q_u64 (z1, z2), ++ z0 = svzip1q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_u8.c +new file mode 100644 +index 000000000..07d484b0b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_u8.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip1q_u8_tied1: ++** zip1 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip1q_u8_tied1, svuint8_t, ++ z0 = svzip1q_u8 (z0, z1), ++ z0 = svzip1q (z0, z1)) ++ ++/* ++** zip1q_u8_tied2: ++** zip1 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip1q_u8_tied2, svuint8_t, ++ z0 = svzip1q_u8 (z1, z0), ++ z0 = svzip1q (z1, z0)) ++ ++/* ++** zip1q_u8_untied: ++** zip1 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip1q_u8_untied, svuint8_t, ++ z0 = svzip1q_u8 (z1, z2), ++ z0 = svzip1q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_b16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_b16.c +new file mode 100644 +index 000000000..5624c9815 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_b16.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip2_b16_tied1: ++** zip2 p0\.h, p0\.h, p1\.h ++** ret ++*/ ++TEST_UNIFORM_P (zip2_b16_tied1, ++ p0 = svzip2_b16 (p0, p1), ++ p0 = svzip2_b16 (p0, p1)) ++ ++/* ++** zip2_b16_tied2: ++** zip2 p0\.h, p1\.h, p0\.h ++** ret ++*/ ++TEST_UNIFORM_P (zip2_b16_tied2, ++ p0 = svzip2_b16 (p1, p0), ++ p0 = svzip2_b16 (p1, p0)) ++ ++/* ++** zip2_b16_untied: ++** zip2 p0\.h, p1\.h, p2\.h ++** ret ++*/ ++TEST_UNIFORM_P (zip2_b16_untied, ++ p0 = svzip2_b16 (p1, p2), ++ p0 = svzip2_b16 (p1, p2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_b32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_b32.c +new file mode 100644 +index 000000000..b73d5b490 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_b32.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip2_b32_tied1: ++** zip2 p0\.s, p0\.s, p1\.s ++** ret ++*/ ++TEST_UNIFORM_P (zip2_b32_tied1, ++ p0 = svzip2_b32 (p0, p1), ++ p0 = svzip2_b32 (p0, p1)) ++ ++/* ++** zip2_b32_tied2: ++** zip2 p0\.s, p1\.s, p0\.s ++** ret ++*/ ++TEST_UNIFORM_P (zip2_b32_tied2, ++ p0 = svzip2_b32 (p1, p0), ++ p0 = svzip2_b32 (p1, p0)) ++ ++/* ++** zip2_b32_untied: ++** zip2 p0\.s, p1\.s, p2\.s ++** ret ++*/ ++TEST_UNIFORM_P (zip2_b32_untied, ++ p0 = svzip2_b32 (p1, p2), ++ p0 = svzip2_b32 (p1, p2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_b64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_b64.c +new file mode 100644 +index 000000000..9ebf050b8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_b64.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip2_b64_tied1: ++** zip2 p0\.d, p0\.d, p1\.d ++** ret ++*/ ++TEST_UNIFORM_P (zip2_b64_tied1, ++ p0 = svzip2_b64 (p0, p1), ++ p0 = svzip2_b64 (p0, p1)) ++ ++/* ++** zip2_b64_tied2: ++** zip2 p0\.d, p1\.d, p0\.d ++** ret ++*/ ++TEST_UNIFORM_P (zip2_b64_tied2, ++ p0 = svzip2_b64 (p1, p0), ++ p0 = svzip2_b64 (p1, p0)) ++ ++/* ++** zip2_b64_untied: ++** zip2 p0\.d, p1\.d, p2\.d ++** ret ++*/ ++TEST_UNIFORM_P (zip2_b64_untied, ++ p0 = svzip2_b64 (p1, p2), ++ p0 = svzip2_b64 (p1, p2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_b8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_b8.c +new file mode 100644 +index 000000000..223a22f99 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_b8.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip2_b8_tied1: ++** zip2 p0\.b, p0\.b, p1\.b ++** ret ++*/ ++TEST_UNIFORM_P (zip2_b8_tied1, ++ p0 = svzip2_b8 (p0, p1), ++ p0 = svzip2_b8 (p0, p1)) ++ ++/* ++** zip2_b8_tied2: ++** zip2 p0\.b, p1\.b, p0\.b ++** ret ++*/ ++TEST_UNIFORM_P (zip2_b8_tied2, ++ p0 = svzip2_b8 (p1, p0), ++ p0 = svzip2_b8 (p1, p0)) ++ ++/* ++** zip2_b8_untied: ++** zip2 p0\.b, p1\.b, p2\.b ++** ret ++*/ ++TEST_UNIFORM_P (zip2_b8_untied, ++ p0 = svzip2_b8 (p1, p2), ++ p0 = svzip2_b8 (p1, p2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_bf16.c +new file mode 100644 +index 000000000..a9e0cfc93 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_bf16.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip2_bf16_tied1: ++** zip2 z0\.h, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (zip2_bf16_tied1, svbfloat16_t, ++ z0 = svzip2_bf16 (z0, z1), ++ z0 = svzip2 (z0, z1)) ++ ++/* ++** zip2_bf16_tied2: ++** zip2 z0\.h, z1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (zip2_bf16_tied2, svbfloat16_t, ++ z0 = svzip2_bf16 (z1, z0), ++ z0 = svzip2 (z1, z0)) ++ ++/* ++** zip2_bf16_untied: ++** zip2 z0\.h, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (zip2_bf16_untied, svbfloat16_t, ++ z0 = svzip2_bf16 (z1, z2), ++ z0 = svzip2 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_f16.c +new file mode 100644 +index 000000000..73d4272bc +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_f16.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip2_f16_tied1: ++** zip2 z0\.h, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (zip2_f16_tied1, svfloat16_t, ++ z0 = svzip2_f16 (z0, z1), ++ z0 = svzip2 (z0, z1)) ++ ++/* ++** zip2_f16_tied2: ++** zip2 z0\.h, z1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (zip2_f16_tied2, svfloat16_t, ++ z0 = svzip2_f16 (z1, z0), ++ z0 = svzip2 (z1, z0)) ++ ++/* ++** zip2_f16_untied: ++** zip2 z0\.h, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (zip2_f16_untied, svfloat16_t, ++ z0 = svzip2_f16 (z1, z2), ++ z0 = svzip2 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_f32.c +new file mode 100644 +index 000000000..2ad8ff81d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_f32.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip2_f32_tied1: ++** zip2 z0\.s, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (zip2_f32_tied1, svfloat32_t, ++ z0 = svzip2_f32 (z0, z1), ++ z0 = svzip2 (z0, z1)) ++ ++/* ++** zip2_f32_tied2: ++** zip2 z0\.s, z1\.s, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (zip2_f32_tied2, svfloat32_t, ++ z0 = svzip2_f32 (z1, z0), ++ z0 = svzip2 (z1, z0)) ++ ++/* ++** zip2_f32_untied: ++** zip2 z0\.s, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (zip2_f32_untied, svfloat32_t, ++ z0 = svzip2_f32 (z1, z2), ++ z0 = svzip2 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_f64.c +new file mode 100644 +index 000000000..de5c2646f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_f64.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip2_f64_tied1: ++** zip2 z0\.d, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (zip2_f64_tied1, svfloat64_t, ++ z0 = svzip2_f64 (z0, z1), ++ z0 = svzip2 (z0, z1)) ++ ++/* ++** zip2_f64_tied2: ++** zip2 z0\.d, z1\.d, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (zip2_f64_tied2, svfloat64_t, ++ z0 = svzip2_f64 (z1, z0), ++ z0 = svzip2 (z1, z0)) ++ ++/* ++** zip2_f64_untied: ++** zip2 z0\.d, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (zip2_f64_untied, svfloat64_t, ++ z0 = svzip2_f64 (z1, z2), ++ z0 = svzip2 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_s16.c +new file mode 100644 +index 000000000..fc366c991 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_s16.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip2_s16_tied1: ++** zip2 z0\.h, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (zip2_s16_tied1, svint16_t, ++ z0 = svzip2_s16 (z0, z1), ++ z0 = svzip2 (z0, z1)) ++ ++/* ++** zip2_s16_tied2: ++** zip2 z0\.h, z1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (zip2_s16_tied2, svint16_t, ++ z0 = svzip2_s16 (z1, z0), ++ z0 = svzip2 (z1, z0)) ++ ++/* ++** zip2_s16_untied: ++** zip2 z0\.h, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (zip2_s16_untied, svint16_t, ++ z0 = svzip2_s16 (z1, z2), ++ z0 = svzip2 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_s32.c +new file mode 100644 +index 000000000..e56934d26 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_s32.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip2_s32_tied1: ++** zip2 z0\.s, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (zip2_s32_tied1, svint32_t, ++ z0 = svzip2_s32 (z0, z1), ++ z0 = svzip2 (z0, z1)) ++ ++/* ++** zip2_s32_tied2: ++** zip2 z0\.s, z1\.s, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (zip2_s32_tied2, svint32_t, ++ z0 = svzip2_s32 (z1, z0), ++ z0 = svzip2 (z1, z0)) ++ ++/* ++** zip2_s32_untied: ++** zip2 z0\.s, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (zip2_s32_untied, svint32_t, ++ z0 = svzip2_s32 (z1, z2), ++ z0 = svzip2 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_s64.c +new file mode 100644 +index 000000000..cefc73b72 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_s64.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip2_s64_tied1: ++** zip2 z0\.d, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (zip2_s64_tied1, svint64_t, ++ z0 = svzip2_s64 (z0, z1), ++ z0 = svzip2 (z0, z1)) ++ ++/* ++** zip2_s64_tied2: ++** zip2 z0\.d, z1\.d, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (zip2_s64_tied2, svint64_t, ++ z0 = svzip2_s64 (z1, z0), ++ z0 = svzip2 (z1, z0)) ++ ++/* ++** zip2_s64_untied: ++** zip2 z0\.d, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (zip2_s64_untied, svint64_t, ++ z0 = svzip2_s64 (z1, z2), ++ z0 = svzip2 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_s8.c +new file mode 100644 +index 000000000..452bbce26 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_s8.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip2_s8_tied1: ++** zip2 z0\.b, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (zip2_s8_tied1, svint8_t, ++ z0 = svzip2_s8 (z0, z1), ++ z0 = svzip2 (z0, z1)) ++ ++/* ++** zip2_s8_tied2: ++** zip2 z0\.b, z1\.b, z0\.b ++** ret ++*/ ++TEST_UNIFORM_Z (zip2_s8_tied2, svint8_t, ++ z0 = svzip2_s8 (z1, z0), ++ z0 = svzip2 (z1, z0)) ++ ++/* ++** zip2_s8_untied: ++** zip2 z0\.b, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (zip2_s8_untied, svint8_t, ++ z0 = svzip2_s8 (z1, z2), ++ z0 = svzip2 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_u16.c +new file mode 100644 +index 000000000..9a20b4ed1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_u16.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip2_u16_tied1: ++** zip2 z0\.h, z0\.h, z1\.h ++** ret ++*/ ++TEST_UNIFORM_Z (zip2_u16_tied1, svuint16_t, ++ z0 = svzip2_u16 (z0, z1), ++ z0 = svzip2 (z0, z1)) ++ ++/* ++** zip2_u16_tied2: ++** zip2 z0\.h, z1\.h, z0\.h ++** ret ++*/ ++TEST_UNIFORM_Z (zip2_u16_tied2, svuint16_t, ++ z0 = svzip2_u16 (z1, z0), ++ z0 = svzip2 (z1, z0)) ++ ++/* ++** zip2_u16_untied: ++** zip2 z0\.h, z1\.h, z2\.h ++** ret ++*/ ++TEST_UNIFORM_Z (zip2_u16_untied, svuint16_t, ++ z0 = svzip2_u16 (z1, z2), ++ z0 = svzip2 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_u32.c +new file mode 100644 +index 000000000..70626c66e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_u32.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip2_u32_tied1: ++** zip2 z0\.s, z0\.s, z1\.s ++** ret ++*/ ++TEST_UNIFORM_Z (zip2_u32_tied1, svuint32_t, ++ z0 = svzip2_u32 (z0, z1), ++ z0 = svzip2 (z0, z1)) ++ ++/* ++** zip2_u32_tied2: ++** zip2 z0\.s, z1\.s, z0\.s ++** ret ++*/ ++TEST_UNIFORM_Z (zip2_u32_tied2, svuint32_t, ++ z0 = svzip2_u32 (z1, z0), ++ z0 = svzip2 (z1, z0)) ++ ++/* ++** zip2_u32_untied: ++** zip2 z0\.s, z1\.s, z2\.s ++** ret ++*/ ++TEST_UNIFORM_Z (zip2_u32_untied, svuint32_t, ++ z0 = svzip2_u32 (z1, z2), ++ z0 = svzip2 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_u64.c +new file mode 100644 +index 000000000..43a43ff7c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_u64.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip2_u64_tied1: ++** zip2 z0\.d, z0\.d, z1\.d ++** ret ++*/ ++TEST_UNIFORM_Z (zip2_u64_tied1, svuint64_t, ++ z0 = svzip2_u64 (z0, z1), ++ z0 = svzip2 (z0, z1)) ++ ++/* ++** zip2_u64_tied2: ++** zip2 z0\.d, z1\.d, z0\.d ++** ret ++*/ ++TEST_UNIFORM_Z (zip2_u64_tied2, svuint64_t, ++ z0 = svzip2_u64 (z1, z0), ++ z0 = svzip2 (z1, z0)) ++ ++/* ++** zip2_u64_untied: ++** zip2 z0\.d, z1\.d, z2\.d ++** ret ++*/ ++TEST_UNIFORM_Z (zip2_u64_untied, svuint64_t, ++ z0 = svzip2_u64 (z1, z2), ++ z0 = svzip2 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_u8.c +new file mode 100644 +index 000000000..015f1844b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_u8.c +@@ -0,0 +1,30 @@ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip2_u8_tied1: ++** zip2 z0\.b, z0\.b, z1\.b ++** ret ++*/ ++TEST_UNIFORM_Z (zip2_u8_tied1, svuint8_t, ++ z0 = svzip2_u8 (z0, z1), ++ z0 = svzip2 (z0, z1)) ++ ++/* ++** zip2_u8_tied2: ++** zip2 z0\.b, z1\.b, z0\.b ++** ret ++*/ ++TEST_UNIFORM_Z (zip2_u8_tied2, svuint8_t, ++ z0 = svzip2_u8 (z1, z0), ++ z0 = svzip2 (z1, z0)) ++ ++/* ++** zip2_u8_untied: ++** zip2 z0\.b, z1\.b, z2\.b ++** ret ++*/ ++TEST_UNIFORM_Z (zip2_u8_untied, svuint8_t, ++ z0 = svzip2_u8 (z1, z2), ++ z0 = svzip2 (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_bf16.c +new file mode 100644 +index 000000000..6d79136cf +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_bf16.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip2q_bf16_tied1: ++** zip2 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip2q_bf16_tied1, svbfloat16_t, ++ z0 = svzip2q_bf16 (z0, z1), ++ z0 = svzip2q (z0, z1)) ++ ++/* ++** zip2q_bf16_tied2: ++** zip2 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip2q_bf16_tied2, svbfloat16_t, ++ z0 = svzip2q_bf16 (z1, z0), ++ z0 = svzip2q (z1, z0)) ++ ++/* ++** zip2q_bf16_untied: ++** zip2 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip2q_bf16_untied, svbfloat16_t, ++ z0 = svzip2q_bf16 (z1, z2), ++ z0 = svzip2q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_f16.c +new file mode 100644 +index 000000000..984240e19 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_f16.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip2q_f16_tied1: ++** zip2 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip2q_f16_tied1, svfloat16_t, ++ z0 = svzip2q_f16 (z0, z1), ++ z0 = svzip2q (z0, z1)) ++ ++/* ++** zip2q_f16_tied2: ++** zip2 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip2q_f16_tied2, svfloat16_t, ++ z0 = svzip2q_f16 (z1, z0), ++ z0 = svzip2q (z1, z0)) ++ ++/* ++** zip2q_f16_untied: ++** zip2 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip2q_f16_untied, svfloat16_t, ++ z0 = svzip2q_f16 (z1, z2), ++ z0 = svzip2q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_f32.c +new file mode 100644 +index 000000000..0f8ccd804 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_f32.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip2q_f32_tied1: ++** zip2 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip2q_f32_tied1, svfloat32_t, ++ z0 = svzip2q_f32 (z0, z1), ++ z0 = svzip2q (z0, z1)) ++ ++/* ++** zip2q_f32_tied2: ++** zip2 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip2q_f32_tied2, svfloat32_t, ++ z0 = svzip2q_f32 (z1, z0), ++ z0 = svzip2q (z1, z0)) ++ ++/* ++** zip2q_f32_untied: ++** zip2 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip2q_f32_untied, svfloat32_t, ++ z0 = svzip2q_f32 (z1, z2), ++ z0 = svzip2q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_f64.c +new file mode 100644 +index 000000000..b5411cff7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_f64.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip2q_f64_tied1: ++** zip2 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip2q_f64_tied1, svfloat64_t, ++ z0 = svzip2q_f64 (z0, z1), ++ z0 = svzip2q (z0, z1)) ++ ++/* ++** zip2q_f64_tied2: ++** zip2 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip2q_f64_tied2, svfloat64_t, ++ z0 = svzip2q_f64 (z1, z0), ++ z0 = svzip2q (z1, z0)) ++ ++/* ++** zip2q_f64_untied: ++** zip2 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip2q_f64_untied, svfloat64_t, ++ z0 = svzip2q_f64 (z1, z2), ++ z0 = svzip2q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_s16.c +new file mode 100644 +index 000000000..66751fc7f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_s16.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip2q_s16_tied1: ++** zip2 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip2q_s16_tied1, svint16_t, ++ z0 = svzip2q_s16 (z0, z1), ++ z0 = svzip2q (z0, z1)) ++ ++/* ++** zip2q_s16_tied2: ++** zip2 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip2q_s16_tied2, svint16_t, ++ z0 = svzip2q_s16 (z1, z0), ++ z0 = svzip2q (z1, z0)) ++ ++/* ++** zip2q_s16_untied: ++** zip2 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip2q_s16_untied, svint16_t, ++ z0 = svzip2q_s16 (z1, z2), ++ z0 = svzip2q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_s32.c +new file mode 100644 +index 000000000..830de3311 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_s32.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip2q_s32_tied1: ++** zip2 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip2q_s32_tied1, svint32_t, ++ z0 = svzip2q_s32 (z0, z1), ++ z0 = svzip2q (z0, z1)) ++ ++/* ++** zip2q_s32_tied2: ++** zip2 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip2q_s32_tied2, svint32_t, ++ z0 = svzip2q_s32 (z1, z0), ++ z0 = svzip2q (z1, z0)) ++ ++/* ++** zip2q_s32_untied: ++** zip2 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip2q_s32_untied, svint32_t, ++ z0 = svzip2q_s32 (z1, z2), ++ z0 = svzip2q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_s64.c +new file mode 100644 +index 000000000..917be4f40 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_s64.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip2q_s64_tied1: ++** zip2 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip2q_s64_tied1, svint64_t, ++ z0 = svzip2q_s64 (z0, z1), ++ z0 = svzip2q (z0, z1)) ++ ++/* ++** zip2q_s64_tied2: ++** zip2 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip2q_s64_tied2, svint64_t, ++ z0 = svzip2q_s64 (z1, z0), ++ z0 = svzip2q (z1, z0)) ++ ++/* ++** zip2q_s64_untied: ++** zip2 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip2q_s64_untied, svint64_t, ++ z0 = svzip2q_s64 (z1, z2), ++ z0 = svzip2q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_s8.c +new file mode 100644 +index 000000000..dff6e2d7b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_s8.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip2q_s8_tied1: ++** zip2 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip2q_s8_tied1, svint8_t, ++ z0 = svzip2q_s8 (z0, z1), ++ z0 = svzip2q (z0, z1)) ++ ++/* ++** zip2q_s8_tied2: ++** zip2 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip2q_s8_tied2, svint8_t, ++ z0 = svzip2q_s8 (z1, z0), ++ z0 = svzip2q (z1, z0)) ++ ++/* ++** zip2q_s8_untied: ++** zip2 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip2q_s8_untied, svint8_t, ++ z0 = svzip2q_s8 (z1, z2), ++ z0 = svzip2q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_u16.c +new file mode 100644 +index 000000000..9e194425c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_u16.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip2q_u16_tied1: ++** zip2 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip2q_u16_tied1, svuint16_t, ++ z0 = svzip2q_u16 (z0, z1), ++ z0 = svzip2q (z0, z1)) ++ ++/* ++** zip2q_u16_tied2: ++** zip2 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip2q_u16_tied2, svuint16_t, ++ z0 = svzip2q_u16 (z1, z0), ++ z0 = svzip2q (z1, z0)) ++ ++/* ++** zip2q_u16_untied: ++** zip2 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip2q_u16_untied, svuint16_t, ++ z0 = svzip2q_u16 (z1, z2), ++ z0 = svzip2q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_u32.c +new file mode 100644 +index 000000000..89de27f6b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_u32.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip2q_u32_tied1: ++** zip2 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip2q_u32_tied1, svuint32_t, ++ z0 = svzip2q_u32 (z0, z1), ++ z0 = svzip2q (z0, z1)) ++ ++/* ++** zip2q_u32_tied2: ++** zip2 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip2q_u32_tied2, svuint32_t, ++ z0 = svzip2q_u32 (z1, z0), ++ z0 = svzip2q (z1, z0)) ++ ++/* ++** zip2q_u32_untied: ++** zip2 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip2q_u32_untied, svuint32_t, ++ z0 = svzip2q_u32 (z1, z2), ++ z0 = svzip2q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_u64.c +new file mode 100644 +index 000000000..f2c9852ac +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_u64.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip2q_u64_tied1: ++** zip2 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip2q_u64_tied1, svuint64_t, ++ z0 = svzip2q_u64 (z0, z1), ++ z0 = svzip2q (z0, z1)) ++ ++/* ++** zip2q_u64_tied2: ++** zip2 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip2q_u64_tied2, svuint64_t, ++ z0 = svzip2q_u64 (z1, z0), ++ z0 = svzip2q (z1, z0)) ++ ++/* ++** zip2q_u64_untied: ++** zip2 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip2q_u64_untied, svuint64_t, ++ z0 = svzip2q_u64 (z1, z2), ++ z0 = svzip2q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_u8.c +new file mode 100644 +index 000000000..a12905586 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_u8.c +@@ -0,0 +1,32 @@ ++/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ ++/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ ++/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ ++ ++#include "test_sve_acle.h" ++ ++/* ++** zip2q_u8_tied1: ++** zip2 z0\.q, z0\.q, z1\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip2q_u8_tied1, svuint8_t, ++ z0 = svzip2q_u8 (z0, z1), ++ z0 = svzip2q (z0, z1)) ++ ++/* ++** zip2q_u8_tied2: ++** zip2 z0\.q, z1\.q, z0\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip2q_u8_tied2, svuint8_t, ++ z0 = svzip2q_u8 (z1, z0), ++ z0 = svzip2q (z1, z0)) ++ ++/* ++** zip2q_u8_untied: ++** zip2 z0\.q, z1\.q, z2\.q ++** ret ++*/ ++TEST_UNIFORM_Z (zip2q_u8_untied, svuint8_t, ++ z0 = svzip2q_u8 (z1, z2), ++ z0 = svzip2q (z1, z2)) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/adr_index_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/adr_index_1.c +new file mode 100644 +index 000000000..714265ed1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/adr_index_1.c +@@ -0,0 +1,38 @@ ++/* { dg-do compile } */ ++/* { dg-options "-std=c99" } */ ++ ++#include ++ ++void ++f1 (svbool_t pg, uint32_t *u32_ptr, svuint8_t u8, svuint16_t u16, ++ svint32_t s32, svuint32_t u32, svfloat32_t f32, ++ svint64_t s64, svuint64_t u64, svfloat64_t f64) ++{ ++ svadrh_index (u32); /* { dg-error {too few arguments to function 'svadrh_index'} } */ ++ svadrh_index (u32, u32, u32); /* { dg-error {too many arguments to function 'svadrh_index'} } */ ++ svadrh_index (u32_ptr, s32); /* { dg-error {passing '[^']*\*'[^\n]* to argument 1 of 'svadrh_index', which expects an SVE vector type} } */ ++ svadrh_index (0, s32); /* { dg-error {passing 'int' to argument 1 of 'svadrh_index', which expects an SVE vector type} } */ ++ svadrh_index (u16, u16); /* { dg-error {passing 'svuint16_t' to argument 1 of 'svadrh_index', which expects 'svuint32_t' or 'svuint64_t'} } */ ++ svadrh_index (s32, s32); /* { dg-error {passing 'svint32_t' to argument 1 of 'svadrh_index', which expects 'svuint32_t' or 'svuint64_t'} } */ ++ svadrh_index (f32, s32); /* { dg-error {passing 'svfloat32_t' to argument 1 of 'svadrh_index', which expects 'svuint32_t' or 'svuint64_t'} } */ ++ svadrh_index (pg, s32); /* { dg-error {passing 'svbool_t' to argument 1 of 'svadrh_index', which expects 'svuint32_t' or 'svuint64_t'} } */ ++ ++ svadrh_index (u32, 0); /* { dg-error {passing 'int' to argument 2 of 'svadrh_index', which expects an SVE vector type} } */ ++ svadrh_index (u32, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svadrh_index', which expects a vector of 32-bit or 64-bit integers} } */ ++ svadrh_index (u32, u16); /* { dg-error {passing 'svuint16_t' to argument 2 of 'svadrh_index', which expects a vector of 32-bit or 64-bit integers} } */ ++ svadrh_index (u32, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svadrh_index', which expects a vector of integers} } */ ++ ++ svadrh_index (u32, s32); ++ svadrh_index (u32, u32); ++ svadrh_index (u32, f32); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svadrh_index', which expects a vector of integers} } */ ++ svadrh_index (u32, s64); /* { dg-error {cannot combine a base of type 'svuint32_t' with an index of type 'svint64_t'} } */ ++ svadrh_index (u32, u64); /* { dg-error {cannot combine a base of type 'svuint32_t' with an index of type 'svuint64_t'} } */ ++ svadrh_index (u32, f64); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svadrh_index', which expects a vector of integers} } */ ++ ++ svadrh_index (u64, s32); /* { dg-error {cannot combine a base of type 'svuint64_t' with an index of type 'svint32_t'} } */ ++ svadrh_index (u64, u32); /* { dg-error {cannot combine a base of type 'svuint64_t' with an index of type 'svuint32_t'} } */ ++ svadrh_index (u64, f32); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svadrh_index', which expects a vector of integers} } */ ++ svadrh_index (u64, s64); ++ svadrh_index (u64, u64); ++ svadrh_index (u64, f64); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svadrh_index', which expects a vector of integers} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/adr_offset_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/adr_offset_1.c +new file mode 100644 +index 000000000..528d7ac51 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/adr_offset_1.c +@@ -0,0 +1,38 @@ ++/* { dg-do compile } */ ++/* { dg-options "-std=c99" } */ ++ ++#include ++ ++void ++f1 (svbool_t pg, uint32_t *u32_ptr, svuint8_t u8, svuint16_t u16, ++ svint32_t s32, svuint32_t u32, svfloat32_t f32, ++ svint64_t s64, svuint64_t u64, svfloat64_t f64) ++{ ++ svadrb_offset (u32); /* { dg-error {too few arguments to function 'svadrb_offset'} } */ ++ svadrb_offset (u32, u32, u32); /* { dg-error {too many arguments to function 'svadrb_offset'} } */ ++ svadrb_offset (u32_ptr, s32); /* { dg-error {passing '[^']*\*'[^\n]* to argument 1 of 'svadrb_offset', which expects an SVE vector type} } */ ++ svadrb_offset (0, s32); /* { dg-error {passing 'int' to argument 1 of 'svadrb_offset', which expects an SVE vector type} } */ ++ svadrb_offset (u16, u16); /* { dg-error {passing 'svuint16_t' to argument 1 of 'svadrb_offset', which expects 'svuint32_t' or 'svuint64_t'} } */ ++ svadrb_offset (s32, s32); /* { dg-error {passing 'svint32_t' to argument 1 of 'svadrb_offset', which expects 'svuint32_t' or 'svuint64_t'} } */ ++ svadrb_offset (f32, s32); /* { dg-error {passing 'svfloat32_t' to argument 1 of 'svadrb_offset', which expects 'svuint32_t' or 'svuint64_t'} } */ ++ svadrb_offset (pg, s32); /* { dg-error {passing 'svbool_t' to argument 1 of 'svadrb_offset', which expects 'svuint32_t' or 'svuint64_t'} } */ ++ ++ svadrb_offset (u32, 0); /* { dg-error {passing 'int' to argument 2 of 'svadrb_offset', which expects an SVE vector type} } */ ++ svadrb_offset (u32, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svadrb_offset', which expects a vector of 32-bit or 64-bit integers} } */ ++ svadrb_offset (u32, u16); /* { dg-error {passing 'svuint16_t' to argument 2 of 'svadrb_offset', which expects a vector of 32-bit or 64-bit integers} } */ ++ svadrb_offset (u32, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svadrb_offset', which expects a vector of integers} } */ ++ ++ svadrb_offset (u32, s32); ++ svadrb_offset (u32, u32); ++ svadrb_offset (u32, f32); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svadrb_offset', which expects a vector of integers} } */ ++ svadrb_offset (u32, s64); /* { dg-error {cannot combine a base of type 'svuint32_t' with an offset of type 'svint64_t'} } */ ++ svadrb_offset (u32, u64); /* { dg-error {cannot combine a base of type 'svuint32_t' with an offset of type 'svuint64_t'} } */ ++ svadrb_offset (u32, f64); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svadrb_offset', which expects a vector of integers} } */ ++ ++ svadrb_offset (u64, s32); /* { dg-error {cannot combine a base of type 'svuint64_t' with an offset of type 'svint32_t'} } */ ++ svadrb_offset (u64, u32); /* { dg-error {cannot combine a base of type 'svuint64_t' with an offset of type 'svuint32_t'} } */ ++ svadrb_offset (u64, f32); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svadrb_offset', which expects a vector of integers} } */ ++ svadrb_offset (u64, s64); ++ svadrb_offset (u64, u64); ++ svadrb_offset (u64, f64); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svadrb_offset', which expects a vector of integers} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_1.c +new file mode 100644 +index 000000000..8ce89fa10 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_1.c +@@ -0,0 +1,14 @@ ++/* { dg-do compile } */ ++ ++#include ++ ++svuint8_t ++f1 (svbool_t pg, svuint8_t u8, svint16_t s16) ++{ ++ svzip1 (pg); /* { dg-error {too few arguments to function 'svzip1'} } */ ++ svzip1 (pg, u8, u8); /* { dg-error {too many arguments to function 'svzip1'} } */ ++ svzip1 (pg, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svzip1', but previous arguments had type 'svbool_t'} } */ ++ svzip1 (u8, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svzip1', but previous arguments had type 'svuint8_t'} } */ ++ svzip1 (u8, s16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svzip1', but previous arguments had type 'svuint8_t'} } */ ++ svzip1 (u8, 0); /* { dg-error {passing 'int' to argument 2 of 'svzip1', which expects an SVE vector type} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_int_opt_n.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_int_opt_n.c +new file mode 100644 +index 000000000..965e9a13c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_int_opt_n.c +@@ -0,0 +1,28 @@ ++/* { dg-do compile } */ ++ ++#include ++ ++void ++f1 (svbool_t pg, svfloat16_t f16, svint16_t s16, svuint16_t u16, ++ svfloat32_t f32, svint32_t s32, svuint32_t u32) ++{ ++ svscale_x (pg, f16); /* { dg-error {too few arguments to function 'svscale_x'} } */ ++ svscale_x (pg, f16, s16, s16); /* { dg-error {too many arguments to function 'svscale_x'} } */ ++ svscale_x (s32, f16, s32); /* { dg-error {passing 'svint32_t' to argument 1 of 'svscale_x', which expects 'svbool_t'} } */ ++ svscale_x (1, f16, s32); /* { dg-error {passing 'int' to argument 1 of 'svscale_x', which expects 'svbool_t'} } */ ++ svscale_x (pg, pg, s16); /* { dg-error {'svscale_x' has no form that takes 'svbool_t' arguments} } */ ++ svscale_x (pg, 1, s16); /* { dg-error {passing 'int' to argument 2 of 'svscale_x', which expects an SVE vector type} } */ ++ svscale_x (pg, f16, s16); ++ svscale_x (pg, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svscale_x', which expects a vector of signed integers} } */ ++ svscale_x (pg, f16, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svscale_x', which expects a vector of signed integers} } */ ++ svscale_x (pg, f16, s32); /* { dg-error {arguments 2 and 3 of 'svscale_x' must have the same element size, but the values passed here have type 'svfloat16_t' and 'svint32_t' respectively} } */ ++ svscale_x (pg, f16, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svscale_x', which expects a vector of signed integers} } */ ++ svscale_x (pg, f16, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svscale_x', which expects a vector of signed integers} } */ ++ svscale_x (pg, f16, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svscale_x', which expects a vector of signed integers} } */ ++ svscale_x (pg, f16, 0); ++ svscale_x (pg, s16, s16); /* { dg-error {'svscale_x' has no form that takes 'svint16_t' arguments} } */ ++ svscale_x (pg, s16, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svscale_x', which expects a vector of signed integers} } */ ++ svscale_x (pg, s16, s32); /* { dg-error {'svscale_x' has no form that takes 'svint16_t' arguments} } */ ++ svscale_x (pg, s16, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svscale_x', which expects a vector of signed integers} } */ ++ svscale_x (pg, u16, s16); /* { dg-error {'svscale_x' has no form that takes 'svuint16_t' arguments} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_lane_1.c +new file mode 100644 +index 000000000..f1879ca6e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_lane_1.c +@@ -0,0 +1,33 @@ ++/* { dg-do compile } */ ++ ++#include ++ ++void ++f1 (svbool_t pg, svfloat16_t f16, svfloat32_t f32, svfloat64_t f64, ++ svint32_t s32, int i) ++{ ++ svmul_lane (f32, f32); /* { dg-error {too few arguments to function 'svmul_lane'} } */ ++ svmul_lane (f32, f32, 0, 0); /* { dg-error {too many arguments to function 'svmul_lane'} } */ ++ svmul_lane (pg, pg, 0); /* { dg-error {'svmul_lane' has no form that takes 'svbool_t' arguments} } */ ++ svmul_lane (s32, s32, 0); /* { dg-error {'svmul_lane' has no form that takes 'svint32_t' arguments} } */ ++ svmul_lane (1, f32, 0); /* { dg-error {passing 'int' to argument 1 of 'svmul_lane', which expects an SVE vector type} } */ ++ svmul_lane (f32, 1, 0); /* { dg-error {passing 'int' to argument 2 of 'svmul_lane', which expects an SVE vector type} } */ ++ svmul_lane (f32, f64, 0); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svmul_lane', but previous arguments had type 'svfloat32_t'} } */ ++ svmul_lane (f32, f32, s32); /* { dg-error {argument 3 of 'svmul_lane' must be an integer constant expression} } */ ++ svmul_lane (f32, f32, i); /* { dg-error {argument 3 of 'svmul_lane' must be an integer constant expression} } */ ++ ++ svmul_lane (f16, f16, 0); ++ svmul_lane (f16, f16, 7); ++ svmul_lane (f16, f16, 8); /* { dg-error {passing 8 to argument 3 of 'svmul_lane', which expects a value in the range \[0, 7\]} } */ ++ svmul_lane (f16, f16, -1); /* { dg-error {passing -1 to argument 3 of 'svmul_lane', which expects a value in the range \[0, 7\]} } */ ++ ++ svmul_lane (f32, f32, 0); ++ svmul_lane (f32, f32, 3); ++ svmul_lane (f32, f32, 4); /* { dg-error {passing 4 to argument 3 of 'svmul_lane', which expects a value in the range \[0, 3\]} } */ ++ svmul_lane (f32, f32, -1); /* { dg-error {passing -1 to argument 3 of 'svmul_lane', which expects a value in the range \[0, 3\]} } */ ++ ++ svmul_lane (f64, f64, 0); ++ svmul_lane (f64, f64, 1); ++ svmul_lane (f64, f64, 2); /* { dg-error {passing 2 to argument 3 of 'svmul_lane', which expects a value in the range \[0, 1\]} } */ ++ svmul_lane (f64, f64, -1); /* { dg-error {passing -1 to argument 3 of 'svmul_lane', which expects a value in the range \[0, 1\]} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_n_1.c +new file mode 100644 +index 000000000..0c69e66a1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_n_1.c +@@ -0,0 +1,19 @@ ++/* { dg-do compile } */ ++ ++#include ++ ++void ++f1 (svbool_t pg, svuint8_t u8, svfloat16_t f16, int i, float f) ++{ ++ svinsr (u8); /* { dg-error {too few arguments to function 'svinsr'} } */ ++ svinsr (u8, 0, 0); /* { dg-error {too many arguments to function 'svinsr'} } */ ++ svinsr (0, 0); /* { dg-error {passing 'int' to argument 1 of 'svinsr', which expects an SVE vector type} } */ ++ svinsr (u8, 0); ++ svinsr (u8, -1); ++ svinsr (u8, i); ++ svinsr (u8, f); ++ svinsr (u8, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svinsr', which expects a scalar element} } */ ++ svinsr (pg, 0); /* { dg-error {'svinsr' has no form that takes 'svbool_t' arguments} } */ ++ svinsr (f16, f); ++ svinsr (f16, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svinsr', which expects a scalar element} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_1.c +new file mode 100644 +index 000000000..29615e5be +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_1.c +@@ -0,0 +1,12 @@ ++/* { dg-do compile } */ ++ ++#include ++ ++svuint8_t ++f1 (svbool_t pg, svuint8_t u8, svint8_t s8) ++{ ++ svadd_u8_x (pg, u8, s8); /* { dg-error {incompatible type for argument 3 of 'svadd_u8_x'} } */ ++ svadd_u8_x (pg, u8); /* { dg-error {too few arguments to function 'svadd_u8_x'} } */ ++ svadd_u8_x (pg, u8, u8, u8); /* { dg-error {too many arguments to function 'svadd_u8_x'} } */ ++ return svadd_s8_x (pg, s8, s8); /* { dg-error {incompatible types when returning type 'svint8_t' but 'svuint8_t' was expected} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_2.c +new file mode 100644 +index 000000000..9fa83ca99 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_2.c +@@ -0,0 +1,26 @@ ++/* { dg-do compile } */ ++ ++#include ++ ++void ++f1 (svbool_t pg, svint8_t s8, svuint8_t u8, ++ svint16_t s16, svuint16_t u16, svfloat16_t f16) ++{ ++ svadd_x (pg, u8); /* { dg-error {too few arguments to function 'svadd_x'} } */ ++ svadd_x (pg, u8, u8, u8); /* { dg-error {too many arguments to function 'svadd_x'} } */ ++ svadd_x (u8, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 1 of 'svadd_x', which expects 'svbool_t'} } */ ++ svadd_x (pg, pg, pg); /* { dg-error {'svadd_x' has no form that takes 'svbool_t' arguments} } */ ++ svadd_x (pg, 1, u8); /* { dg-error {passing 'int' to argument 2 of 'svadd_x', which expects an SVE vector type} } */ ++ svadd_x (pg, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svadd_x', but previous arguments had type 'svuint8_t'} } */ ++ svadd_x (pg, u8, u8); ++ svadd_x (pg, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svadd_x', but previous arguments had type 'svuint8_t'} } */ ++ svadd_x (pg, u8, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svadd_x', but previous arguments had type 'svuint8_t'} } */ ++ svadd_x (pg, u8, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svadd_x', but previous arguments had type 'svuint8_t'} } */ ++ svadd_x (pg, u8, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svadd_x', but previous arguments had type 'svuint8_t'} } */ ++ svadd_x (pg, u8, 0); ++ ++ svadd_x (pg, f16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svadd_x', but previous arguments had type 'svfloat16_t'} } */ ++ svadd_x (pg, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svadd_x', but previous arguments had type 'svfloat16_t'} } */ ++ svadd_x (pg, f16, f16); ++ svadd_x (pg, f16, 1); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_3.c +new file mode 100644 +index 000000000..4d0b253e3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_3.c +@@ -0,0 +1,29 @@ ++/* { dg-do compile } */ ++ ++#include ++ ++void ++f1 (svbool_t pg, svint8_t s8, svuint8_t u8, ++ svint16_t s16, svuint16_t u16, svfloat16_t f16) ++{ ++ svand_z (pg, u8); /* { dg-error {too few arguments to function 'svand_z'} } */ ++ svand_z (pg, u8, u8, u8); /* { dg-error {too many arguments to function 'svand_z'} } */ ++ svand_z (u8, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 1 of 'svand_z', which expects 'svbool_t'} } */ ++ svand_z (pg, pg, pg); ++ svand_z (pg, 1, u8); /* { dg-error {passing 'int' to argument 2 of 'svand_z', which expects an SVE vector type} } */ ++ svand_z (pg, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svand_z', but previous arguments had type 'svuint8_t'} } */ ++ svand_z (pg, u8, u8); ++ svand_z (pg, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svand_z', but previous arguments had type 'svuint8_t'} } */ ++ svand_z (pg, u8, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svand_z', but previous arguments had type 'svuint8_t'} } */ ++ svand_z (pg, u8, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svand_z', but previous arguments had type 'svuint8_t'} } */ ++ svand_z (pg, u8, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svand_z', but previous arguments had type 'svuint8_t'} } */ ++ svand_z (pg, u8, 0); ++ ++ svand_z (pg, pg, u8); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svand_z', but previous arguments had type 'svbool_t'} } */ ++ svand_z (pg, pg, 0); /* { dg-error {passing 'int' to argument 3 of 'svand_z', but its 'svbool_t' form does not accept scalars} } */ ++ ++ svand_z (pg, f16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svand_z', but previous arguments had type 'svfloat16_t'} } */ ++ svand_z (pg, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svand_z', but previous arguments had type 'svfloat16_t'} } */ ++ svand_z (pg, f16, f16); /* { dg-error {'svand_z' has no form that takes 'svfloat16_t' arguments} } */ ++ svand_z (pg, f16, 1); /* { dg-error {'svand_z' has no form that takes 'svfloat16_t' arguments} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_rotate_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_rotate_1.c +new file mode 100644 +index 000000000..8ffe91bce +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_rotate_1.c +@@ -0,0 +1,24 @@ ++/* { dg-do compile } */ ++ ++#include ++ ++void ++f1 (svbool_t pg, svfloat32_t f32, svfloat64_t f64, svint32_t s32, int i) ++{ ++ svcadd_x (pg, f32, f32); /* { dg-error {too few arguments to function 'svcadd_x'} } */ ++ svcadd_x (pg, f32, f32, 90, 90); /* { dg-error {too many arguments to function 'svcadd_x'} } */ ++ svcadd_x (f32, f32, f32, 90); /* { dg-error {passing 'svfloat32_t' to argument 1 of 'svcadd_x', which expects 'svbool_t'} } */ ++ svcadd_x (pg, pg, pg, 90); /* { dg-error {'svcadd_x' has no form that takes 'svbool_t' arguments} } */ ++ svcadd_x (pg, s32, s32, 90); /* { dg-error {'svcadd_x' has no form that takes 'svint32_t' arguments} } */ ++ svcadd_x (pg, 1, f32, 90); /* { dg-error {passing 'int' to argument 2 of 'svcadd_x', which expects an SVE vector type} } */ ++ svcadd_x (pg, f32, 1, 90); /* { dg-error {passing 'int' to argument 3 of 'svcadd_x', which expects an SVE vector type} } */ ++ svcadd_x (pg, f32, f64, 90); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svcadd_x', but previous arguments had type 'svfloat32_t'} } */ ++ svcadd_x (pg, f32, f32, s32); /* { dg-error {argument 4 of 'svcadd_x' must be an integer constant expression} } */ ++ svcadd_x (pg, f32, f32, i); /* { dg-error {argument 4 of 'svcadd_x' must be an integer constant expression} } */ ++ svcadd_x (pg, f32, f32, -90); /* { dg-error {passing -90 to argument 4 of 'svcadd_x', which expects either 90 or 270} } */ ++ svcadd_x (pg, f32, f32, 0); /* { dg-error {passing 0 to argument 4 of 'svcadd_x', which expects either 90 or 270} } */ ++ svcadd_x (pg, f32, f32, 1); /* { dg-error {passing 1 to argument 4 of 'svcadd_x', which expects either 90 or 270} } */ ++ svcadd_x (pg, f32, f32, 90); ++ svcadd_x (pg, f32, f32, 180); /* { dg-error {passing 180 to argument 4 of 'svcadd_x', which expects either 90 or 270} } */ ++ svcadd_x (pg, f32, f32, 270); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint64_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint64_n_1.c +new file mode 100644 +index 000000000..c8ca5f746 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint64_n_1.c +@@ -0,0 +1,17 @@ ++/* { dg-do compile } */ ++ ++#include ++ ++void ++f1 (svbool_t pg, svuint8_t u8, int i, float f) ++{ ++ svdupq_lane (u8); /* { dg-error {too few arguments to function 'svdupq_lane'} } */ ++ svdupq_lane (u8, 0, 0); /* { dg-error {too many arguments to function 'svdupq_lane'} } */ ++ svdupq_lane (0, 0); /* { dg-error {passing 'int' to argument 1 of 'svdupq_lane', which expects an SVE vector type} } */ ++ svdupq_lane (u8, 0); ++ svdupq_lane (u8, -1); ++ svdupq_lane (u8, i); ++ svdupq_lane (u8, f); ++ svdupq_lane (u8, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svdupq_lane', which expects 'uint64_t'} } */ ++ svdupq_lane (pg, 0); /* { dg-error {'svdupq_lane' has no form that takes 'svbool_t' arguments} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint64_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint64_opt_n_1.c +new file mode 100644 +index 000000000..27726a80f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint64_opt_n_1.c +@@ -0,0 +1,12 @@ ++/* { dg-do compile } */ ++ ++#include ++ ++svuint8_t ++f1 (svbool_t pg, svuint8_t u8, svint8_t s8, svuint64_t u64) ++{ ++ svlsl_wide_u8_x (pg, u8, u8); /* { dg-error {incompatible type for argument 3 of 'svlsl_wide_u8_x'} } */ ++ svlsl_wide_u8_x (pg, u8); /* { dg-error {too few arguments to function 'svlsl_wide_u8_x'} } */ ++ svlsl_wide_u8_x (pg, u8, u64, u8); /* { dg-error {too many arguments to function 'svlsl_wide_u8_x'} } */ ++ return svlsl_wide_s8_x (pg, s8, u64); /* { dg-error {incompatible types when returning type 'svint8_t' but 'svuint8_t' was expected} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint64_opt_n_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint64_opt_n_2.c +new file mode 100644 +index 000000000..be217394f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint64_opt_n_2.c +@@ -0,0 +1,14 @@ ++/* { dg-do compile } */ ++ ++#include ++ ++void ++f1 (svbool_t pg, svuint8_t u8, svuint64_t u64) ++{ ++ svlsl_wide_x (pg, u8); /* { dg-error {too few arguments to function 'svlsl_wide_x'} } */ ++ svlsl_wide_x (pg, u8, u8, u8); /* { dg-error {too many arguments to function 'svlsl_wide_x'} } */ ++ svlsl_wide_x (u8, u8, u64); /* { dg-error {passing 'svuint8_t' to argument 1 of 'svlsl_wide_x', which expects 'svbool_t'} } */ ++ svlsl_wide_x (pg, 1, u64); /* { dg-error {passing 'int' to argument 2 of 'svlsl_wide_x', which expects an SVE vector type} } */ ++ svlsl_wide_x (pg, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svlsl_wide_x', which expects 'svuint64_t'} } */ ++ svlsl_wide_x (pg, u64, u64); /* { dg-error {'svlsl_wide_x' has no form that takes 'svuint64_t' arguments} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint_1.c +new file mode 100644 +index 000000000..8f86c50b6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint_1.c +@@ -0,0 +1,44 @@ ++/* { dg-do compile } */ ++ ++#include ++ ++void ++f1 (svbool_t pg, svuint8_t u8, svint8_t s8, svuint16_t u16, svint16_t s16, ++ svfloat16_t f16) ++{ ++ svtbl (u8); /* { dg-error {too few arguments to function 'svtbl'} } */ ++ svtbl (u8, u8, u8); /* { dg-error {too many arguments to function 'svtbl'} } */ ++ svtbl (pg, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svtbl', which expects a vector of unsigned integers} } */ ++ svtbl (pg, u8); /* { dg-error {'svtbl' has no form that takes 'svbool_t' arguments} } */ ++ ++ svtbl (u8, 0); /* { dg-error {passing 'int' to argument 2 of 'svtbl', which expects an SVE vector type} } */ ++ svtbl (u8, u8); ++ svtbl (u8, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svtbl', which expects a vector of unsigned integers} } */ ++ svtbl (u8, u16); /* { dg-error {arguments 1 and 2 of 'svtbl' must have the same element size, but the values passed here have type 'svuint8_t' and 'svuint16_t' respectively} } */ ++ svtbl (u8, s16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svtbl', which expects a vector of unsigned integers} } */ ++ svtbl (u8, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svtbl', which expects a vector of unsigned integers} } */ ++ ++ svtbl (s8, u8); ++ svtbl (s8, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svtbl', which expects a vector of unsigned integers} } */ ++ svtbl (s8, u16); /* { dg-error {arguments 1 and 2 of 'svtbl' must have the same element size, but the values passed here have type 'svint8_t' and 'svuint16_t' respectively} } */ ++ svtbl (s8, s16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svtbl', which expects a vector of unsigned integers} } */ ++ svtbl (s8, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svtbl', which expects a vector of unsigned integers} } */ ++ ++ svtbl (u16, u8); /* { dg-error {arguments 1 and 2 of 'svtbl' must have the same element size, but the values passed here have type 'svuint16_t' and 'svuint8_t' respectively} } */ ++ svtbl (u16, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svtbl', which expects a vector of unsigned integers} } */ ++ svtbl (u16, u16); ++ svtbl (u16, s16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svtbl', which expects a vector of unsigned integers} } */ ++ svtbl (u16, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svtbl', which expects a vector of unsigned integers} } */ ++ ++ svtbl (s16, u8); /* { dg-error {arguments 1 and 2 of 'svtbl' must have the same element size, but the values passed here have type 'svint16_t' and 'svuint8_t' respectively} } */ ++ svtbl (s16, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svtbl', which expects a vector of unsigned integers} } */ ++ svtbl (s16, u16); ++ svtbl (s16, s16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svtbl', which expects a vector of unsigned integers} } */ ++ svtbl (s16, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svtbl', which expects a vector of unsigned integers} } */ ++ ++ svtbl (f16, u8); /* { dg-error {arguments 1 and 2 of 'svtbl' must have the same element size, but the values passed here have type 'svfloat16_t' and 'svuint8_t' respectively} } */ ++ svtbl (f16, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svtbl', which expects a vector of unsigned integers} } */ ++ svtbl (f16, u16); ++ svtbl (f16, s16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svtbl', which expects a vector of unsigned integers} } */ ++ svtbl (f16, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svtbl', which expects a vector of unsigned integers} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint_n_1.c +new file mode 100644 +index 000000000..36a902e69 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint_n_1.c +@@ -0,0 +1,17 @@ ++/* { dg-do compile } */ ++ ++#include ++ ++void ++f1 (svbool_t pg, svuint8_t u8, int i, float f) ++{ ++ svdup_lane (u8); /* { dg-error {too few arguments to function 'svdup_lane'} } */ ++ svdup_lane (u8, 0, 0); /* { dg-error {too many arguments to function 'svdup_lane'} } */ ++ svdup_lane (0, 0); /* { dg-error {passing 'int' to argument 1 of 'svdup_lane', which expects an SVE vector type} } */ ++ svdup_lane (u8, 0); ++ svdup_lane (u8, -1); ++ svdup_lane (u8, i); ++ svdup_lane (u8, f); ++ svdup_lane (u8, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svdup_lane', which expects a scalar integer} } */ ++ svdup_lane (pg, 0); /* { dg-error {'svdup_lane' has no form that takes 'svbool_t' arguments} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint_opt_n_1.c +new file mode 100644 +index 000000000..b162ab405 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint_opt_n_1.c +@@ -0,0 +1,27 @@ ++/* { dg-do compile } */ ++ ++#include ++ ++void ++f1 (svbool_t pg, svfloat16_t f16, svint16_t s16, svuint16_t u16, ++ svfloat32_t f32, svint32_t s32, svuint32_t u32) ++{ ++ svlsl_x (pg, s16); /* { dg-error {too few arguments to function 'svlsl_x'} } */ ++ svlsl_x (pg, s16, u16, u16); /* { dg-error {too many arguments to function 'svlsl_x'} } */ ++ svlsl_x (s32, s32, u32); /* { dg-error {passing 'svint32_t' to argument 1 of 'svlsl_x', which expects 'svbool_t'} } */ ++ svlsl_x (1, s32, u32); /* { dg-error {passing 'int' to argument 1 of 'svlsl_x', which expects 'svbool_t'} } */ ++ svlsl_x (pg, pg, u16); /* { dg-error {'svlsl_x' has no form that takes 'svbool_t' arguments} } */ ++ svlsl_x (pg, 1, s16); /* { dg-error {passing 'int' to argument 2 of 'svlsl_x', which expects an SVE vector type} } */ ++ svlsl_x (pg, s16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svlsl_x', which expects a vector of unsigned integers} } */ ++ svlsl_x (pg, s16, u16); ++ svlsl_x (pg, s16, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svlsl_x', which expects a vector of unsigned integers} } */ ++ svlsl_x (pg, s16, s32); /* { dg-error {passing 'svint32_t' to argument 3 of 'svlsl_x', which expects a vector of unsigned integers} } */ ++ svlsl_x (pg, s16, u32); /* { dg-error {arguments 2 and 3 of 'svlsl_x' must have the same element size, but the values passed here have type 'svint16_t' and 'svuint32_t' respectively} } */ ++ svlsl_x (pg, s16, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svlsl_x', which expects a vector of unsigned integers} } */ ++ svlsl_x (pg, s16, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svlsl_x', which expects a vector of unsigned integers} } */ ++ svlsl_x (pg, s16, 0); ++ svlsl_x (pg, f16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svlsl_x', which expects a vector of unsigned integers} } */ ++ svlsl_x (pg, f16, u16); /* { dg-error {'svlsl_x' has no form that takes 'svfloat16_t' arguments} } */ ++ svlsl_x (pg, f16, s32); /* { dg-error {passing 'svint32_t' to argument 3 of 'svlsl_x', which expects a vector of unsigned integers} } */ ++ svlsl_x (pg, f16, u32); /* { dg-error {'svlsl_x' has no form that takes 'svfloat16_t' arguments} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/clast_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/clast_1.c +new file mode 100644 +index 000000000..cb9ac946c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/clast_1.c +@@ -0,0 +1,15 @@ ++#include ++ ++void ++test (svbool_t pg, svint32_t s32, svint64_t s64, int i) ++{ ++ svclasta (pg, 1); /* { dg-error {too few arguments to function 'svclasta'} } */ ++ svclasta (pg, 1, s32, 1); /* { dg-error {too many arguments to function 'svclasta'} } */ ++ svclasta (1, 1, s32); /* { dg-error {passing 'int' to argument 1 of 'svclasta', which expects 'svbool_t'} } */ ++ svclasta (pg, 1, 1); /* { dg-error {passing 'int' to argument 3 of 'svclasta', which expects an SVE vector type} } */ ++ svclasta (pg, 1, pg); /* { dg-error {'svclasta' has no form that takes 'svbool_t' arguments} } */ ++ svclasta (pg, i, s32); ++ svclasta (pg, s32, 1); /* { dg-error {passing 'int' to argument 3 of 'svclasta', which expects an SVE vector type} } */ ++ svclasta (pg, s32, s64); /* { dg-error {passing 'svint64_t' to argument 3 of 'svclasta', but previous arguments had type 'svint32_t'} } */ ++ svclasta (pg, pg, pg); /* { dg-error {'svclasta' has no form that takes 'svbool_t' arguments} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_opt_n_1.c +new file mode 100644 +index 000000000..71c8e86d5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_opt_n_1.c +@@ -0,0 +1,26 @@ ++/* { dg-do compile } */ ++ ++#include ++ ++void ++f1 (svbool_t pg, svint8_t s8, svuint8_t u8, ++ svint16_t s16, svuint16_t u16, svfloat16_t f16) ++{ ++ svcmpeq (pg, u8); /* { dg-error {too few arguments to function 'svcmpeq'} } */ ++ svcmpeq (pg, u8, u8, u8); /* { dg-error {too many arguments to function 'svcmpeq'} } */ ++ svcmpeq (u8, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 1 of 'svcmpeq', which expects 'svbool_t'} } */ ++ svcmpeq (pg, pg, pg); /* { dg-error {'svcmpeq' has no form that takes 'svbool_t' arguments} } */ ++ svcmpeq (pg, 1, u8); /* { dg-error {passing 'int' to argument 2 of 'svcmpeq', which expects an SVE vector type} } */ ++ svcmpeq (pg, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svcmpeq', but previous arguments had type 'svuint8_t'} } */ ++ svcmpeq (pg, u8, u8); ++ svcmpeq (pg, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svcmpeq', but previous arguments had type 'svuint8_t'} } */ ++ svcmpeq (pg, u8, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svcmpeq', but previous arguments had type 'svuint8_t'} } */ ++ svcmpeq (pg, u8, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svcmpeq', but previous arguments had type 'svuint8_t'} } */ ++ svcmpeq (pg, u8, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svcmpeq', but previous arguments had type 'svuint8_t'} } */ ++ svcmpeq (pg, u8, 0); ++ ++ svcmpeq (pg, f16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svcmpeq', but previous arguments had type 'svfloat16_t'} } */ ++ svcmpeq (pg, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svcmpeq', but previous arguments had type 'svfloat16_t'} } */ ++ svcmpeq (pg, f16, f16); ++ svcmpeq (pg, f16, 1); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_scalar_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_scalar_1.c +new file mode 100644 +index 000000000..d5a60f841 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_scalar_1.c +@@ -0,0 +1,85 @@ ++/* { dg-do compile } */ ++ ++#include ++#include ++ ++enum signed_enum { SA = -1, SB }; ++enum unsigned_enum { UA, UB }; ++ ++void ++test (int8_t s8, int16_t s16, int32_t s32, int64_t s64, ++ uint8_t u8, uint16_t u16, uint32_t u32, uint64_t u64, ++ bool b, enum signed_enum se, enum unsigned_enum ue, ++ int *ptr, float f32, svbool_t pg, svint32_t vec) ++{ ++ svwhilele_b8 (s32); /* { dg-error {too few arguments to function 'svwhilele_b8'} } */ ++ svwhilele_b8 (s32, s32, s32); /* { dg-error {too many arguments to function 'svwhilele_b8'} } */ ++ ++ svwhilele_b8 (b, b); ++ svwhilele_b8 (se, se); ++ svwhilele_b8 (ue, ue); ++ svwhilele_b8 (s8, s8); ++ svwhilele_b8 (u8, u8); ++ svwhilele_b8 (s16, s16); ++ svwhilele_b8 (u16, u16); ++ svwhilele_b8 (ptr, ptr); /* { dg-error {passing 'int \*' to argument 1 of 'svwhilele_b8', which expects a 32-bit or 64-bit integer type} } */ ++ svwhilele_b8 (f32, f32); /* { dg-error {passing 'float' to argument 1 of 'svwhilele_b8', which expects a 32-bit or 64-bit integer type} } */ ++ svwhilele_b8 (pg, pg); /* { dg-error {passing 'svbool_t' to argument 1 of 'svwhilele_b8', which expects a 32-bit or 64-bit integer type} } */ ++ svwhilele_b8 (vec, vec); /* { dg-error {passing 'svint32_t' to argument 1 of 'svwhilele_b8', which expects a 32-bit or 64-bit integer type} } */ ++ ++ svwhilele_b8 (s32, b); ++ svwhilele_b8 (s32, se); ++ svwhilele_b8 (s32, ue); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'int32_t' but argument 2 has type 'uint32_t'} } */ ++ svwhilele_b8 (s32, s8); ++ svwhilele_b8 (s32, u8); ++ svwhilele_b8 (s32, s16); ++ svwhilele_b8 (s32, u16); ++ ++ svwhilele_b8 (u32, b); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'uint32_t' but argument 2 has type 'int32_t'} } */ ++ svwhilele_b8 (u32, se); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'uint32_t' but argument 2 has type 'int32_t'} } */ ++ svwhilele_b8 (u32, ue); ++ svwhilele_b8 (u32, s8); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'uint32_t' but argument 2 has type 'int32_t'} } */ ++ svwhilele_b8 (u32, u8); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'uint32_t' but argument 2 has type 'int32_t'} } */ ++ svwhilele_b8 (u32, s16); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'uint32_t' but argument 2 has type 'int32_t'} } */ ++ svwhilele_b8 (u32, u16); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'uint32_t' but argument 2 has type 'int32_t'} } */ ++ ++ svwhilele_b8 (s32, s32); ++ svwhilele_b8 (s32, u32); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'int32_t' but argument 2 has type 'uint32_t'} } */ ++ svwhilele_b8 (s32, s64); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'int32_t' but argument 2 has type 'int64_t'} } */ ++ svwhilele_b8 (s32, u64); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'int32_t' but argument 2 has type 'uint64_t'} } */ ++ ++ svwhilele_b8 (u32, s32); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'uint32_t' but argument 2 has type 'int32_t'} } */ ++ svwhilele_b8 (u32, u32); ++ svwhilele_b8 (u32, s64); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'uint32_t' but argument 2 has type 'int64_t'} } */ ++ svwhilele_b8 (u32, u64); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'uint32_t' but argument 2 has type 'uint64_t'} } */ ++ ++ svwhilele_b8 (s64, s32); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'int64_t' but argument 2 has type 'int32_t'} } */ ++ svwhilele_b8 (s64, u32); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'int64_t' but argument 2 has type 'uint32_t'} } */ ++ svwhilele_b8 (s64, s64); ++ svwhilele_b8 (s64, u64); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'int64_t' but argument 2 has type 'uint64_t'} } */ ++ ++ svwhilele_b8 (u64, s32); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'uint64_t' but argument 2 has type 'int32_t'} } */ ++ svwhilele_b8 (u64, u32); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'uint64_t' but argument 2 has type 'uint32_t'} } */ ++ svwhilele_b8 (u64, s64); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'uint64_t' but argument 2 has type 'int64_t'} } */ ++ svwhilele_b8 (u64, u64); ++ ++ svwhilele_b8 (0, s32); ++ svwhilele_b8 (0, u32); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'int32_t' but argument 2 has type 'uint32_t'} } */ ++ svwhilele_b8 (0, s64); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'int32_t' but argument 2 has type 'int64_t'} } */ ++ svwhilele_b8 (0, u64); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'int32_t' but argument 2 has type 'uint64_t'} } */ ++ ++ svwhilele_b8 (s32, 0); ++ svwhilele_b8 (u32, 0); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'uint32_t' but argument 2 has type 'int32_t'} } */ ++ svwhilele_b8 (s64, 0); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'int64_t' but argument 2 has type 'int32_t'} } */ ++ svwhilele_b8 (u64, 0); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'uint64_t' but argument 2 has type 'int32_t'} } */ ++ ++ svwhilele_b8 (0U, s32); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'uint32_t' but argument 2 has type 'int32_t'} } */ ++ svwhilele_b8 (0U, u32); ++ svwhilele_b8 (0U, s64); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'uint32_t' but argument 2 has type 'int64_t'} } */ ++ svwhilele_b8 (0U, u64); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'uint32_t' but argument 2 has type 'uint64_t'} } */ ++ ++ svwhilele_b8 (s32, 0U); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'int32_t' but argument 2 has type 'uint32_t'} } */ ++ svwhilele_b8 (u32, 0U); ++ svwhilele_b8 (s64, 0U); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'int64_t' but argument 2 has type 'uint32_t'} } */ ++ svwhilele_b8 (u64, 0U); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'uint64_t' but argument 2 has type 'uint32_t'} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_wide_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_wide_opt_n_1.c +new file mode 100644 +index 000000000..fc5e45663 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_wide_opt_n_1.c +@@ -0,0 +1,26 @@ ++/* { dg-do compile } */ ++ ++#include ++ ++svuint8_t ++f1 (svbool_t pg, svuint8_t u8, svint8_t s8, svint64_t s64, svuint64_t u64, ++ svfloat32_t f32, svfloat64_t f64, unsigned int x) ++{ ++ svcmpeq_wide (pg, s8); /* { dg-error {too few arguments to function 'svcmpeq_wide'} } */ ++ svcmpeq_wide (pg, s8, s64, s8); /* { dg-error {too many arguments to function 'svcmpeq_wide'} } */ ++ svcmpeq_wide (s8, s8, s64); /* { dg-error {passing 'svint8_t' to argument 1 of 'svcmpeq_wide', which expects 'svbool_t'} } */ ++ svcmpeq_wide (pg, 0, s64); /* { dg-error {passing 'int' to argument 2 of 'svcmpeq_wide', which expects an SVE vector type} } */ ++ svcmpeq_wide (pg, s8, 0); ++ svcmpeq_wide (pg, s8, x); ++ svcmpeq_wide (pg, s8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svcmpeq_wide', which expects a vector of 64-bit elements} } */ ++ svcmpeq_wide (pg, s8, u8); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svcmpeq_wide', which expects a vector of 64-bit elements} } */ ++ svcmpeq_wide (pg, s8, s64); ++ svcmpeq_wide (pg, s8, u64); /* { dg-error {arguments 2 and 3 of 'svcmpeq_wide' must have the same signedness, but the values passed here have type 'svint8_t' and 'svuint64_t' respectively} } */ ++ svcmpeq_wide (pg, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svcmpeq_wide', which expects a vector of 64-bit elements} } */ ++ svcmpeq_wide (pg, u8, u64); /* { dg-error {'svcmpeq_wide' has no form that takes 'svuint8_t' arguments} } */ ++ svcmpeq_wide (pg, s64, s64); /* { dg-error {'svcmpeq_wide' has no form that takes 'svint64_t' arguments} } */ ++ svcmpeq_wide (pg, f32, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svcmpeq_wide', which expects a vector of 64-bit elements} } */ ++ svcmpeq_wide (pg, f32, f64); /* { dg-error {'svcmpeq_wide' has no form that takes 'svfloat32_t' arguments} } */ ++ svcmpeq_wide (pg, f64, f64); /* { dg-error {'svcmpeq_wide' has no form that takes 'svfloat64_t' arguments} } */ ++ svcmpeq_wide (pg, pg, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svcmpeq_wide', which expects a vector of 64-bit elements} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/count_pat_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/count_pat_1.c +new file mode 100644 +index 000000000..8dd76a553 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/count_pat_1.c +@@ -0,0 +1,42 @@ ++#include ++ ++void ++test (enum svpattern pat, int i) ++{ ++ svcntb_pat (pat); /* { dg-error {argument 1 of 'svcntb_pat' must be an integer constant expression} } */ ++ svcntb_pat (i); /* { dg-error {argument 1 of 'svcntb_pat' must be an integer constant expression} } */ ++ svcntb_pat ((enum svpattern) -1); /* { dg-error {passing 4294967295 to argument 1 of 'svcntb_pat', which expects a valid 'enum svpattern' value} } */ ++ svcntb_pat ((enum svpattern) 0); ++ svcntb_pat ((enum svpattern) 1); ++ svcntb_pat ((enum svpattern) 2); ++ svcntb_pat ((enum svpattern) 3); ++ svcntb_pat ((enum svpattern) 4); ++ svcntb_pat ((enum svpattern) 5); ++ svcntb_pat ((enum svpattern) 6); ++ svcntb_pat ((enum svpattern) 7); ++ svcntb_pat ((enum svpattern) 8); ++ svcntb_pat ((enum svpattern) 9); ++ svcntb_pat ((enum svpattern) 10); ++ svcntb_pat ((enum svpattern) 11); ++ svcntb_pat ((enum svpattern) 12); ++ svcntb_pat ((enum svpattern) 13); ++ svcntb_pat ((enum svpattern) 14); /* { dg-error {passing 14 to argument 1 of 'svcntb_pat', which expects a valid 'enum svpattern' value} } */ ++ svcntb_pat ((enum svpattern) 15); /* { dg-error {passing 15 to argument 1 of 'svcntb_pat', which expects a valid 'enum svpattern' value} } */ ++ svcntb_pat ((enum svpattern) 16); /* { dg-error {passing 16 to argument 1 of 'svcntb_pat', which expects a valid 'enum svpattern' value} } */ ++ svcntb_pat ((enum svpattern) 17); /* { dg-error {passing 17 to argument 1 of 'svcntb_pat', which expects a valid 'enum svpattern' value} } */ ++ svcntb_pat ((enum svpattern) 18); /* { dg-error {passing 18 to argument 1 of 'svcntb_pat', which expects a valid 'enum svpattern' value} } */ ++ svcntb_pat ((enum svpattern) 19); /* { dg-error {passing 19 to argument 1 of 'svcntb_pat', which expects a valid 'enum svpattern' value} } */ ++ svcntb_pat ((enum svpattern) 20); /* { dg-error {passing 20 to argument 1 of 'svcntb_pat', which expects a valid 'enum svpattern' value} } */ ++ svcntb_pat ((enum svpattern) 21); /* { dg-error {passing 21 to argument 1 of 'svcntb_pat', which expects a valid 'enum svpattern' value} } */ ++ svcntb_pat ((enum svpattern) 22); /* { dg-error {passing 22 to argument 1 of 'svcntb_pat', which expects a valid 'enum svpattern' value} } */ ++ svcntb_pat ((enum svpattern) 23); /* { dg-error {passing 23 to argument 1 of 'svcntb_pat', which expects a valid 'enum svpattern' value} } */ ++ svcntb_pat ((enum svpattern) 24); /* { dg-error {passing 24 to argument 1 of 'svcntb_pat', which expects a valid 'enum svpattern' value} } */ ++ svcntb_pat ((enum svpattern) 25); /* { dg-error {passing 25 to argument 1 of 'svcntb_pat', which expects a valid 'enum svpattern' value} } */ ++ svcntb_pat ((enum svpattern) 26); /* { dg-error {passing 26 to argument 1 of 'svcntb_pat', which expects a valid 'enum svpattern' value} } */ ++ svcntb_pat ((enum svpattern) 27); /* { dg-error {passing 27 to argument 1 of 'svcntb_pat', which expects a valid 'enum svpattern' value} } */ ++ svcntb_pat ((enum svpattern) 28); /* { dg-error {passing 28 to argument 1 of 'svcntb_pat', which expects a valid 'enum svpattern' value} } */ ++ svcntb_pat ((enum svpattern) 29); ++ svcntb_pat ((enum svpattern) 30); ++ svcntb_pat ((enum svpattern) 31); ++ svcntb_pat ((enum svpattern) 32); /* { dg-error {passing 32 to argument 1 of 'svcntb_pat', which expects a valid 'enum svpattern' value} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/count_vector_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/count_vector_1.c +new file mode 100644 +index 000000000..daf9e0d5b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/count_vector_1.c +@@ -0,0 +1,13 @@ ++/* { dg-do compile } */ ++ ++#include ++ ++void ++f1 (svbool_t pg, svuint32_t u32, svuint32x2_t u32x2) ++{ ++ svlen (); /* { dg-error {too few arguments to function 'svlen'} } */ ++ svlen (u32, u32); /* { dg-error {too many arguments to function 'svlen'} } */ ++ svlen (0); /* { dg-error {passing 'int' to argument 1 of 'svlen', which expects an SVE vector type} } */ ++ svlen (pg); /* { dg-error {'svlen' has no form that takes 'svbool_t' arguments} } */ ++ svlen (u32x2); /* { dg-error {passing 'svuint32x2_t' to argument 1 of 'svlen', which expects a single SVE vector rather than a tuple} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_1.c +new file mode 100644 +index 000000000..31321a046 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_1.c +@@ -0,0 +1,21 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-std=c99 -Wall -Wextra" } */ ++ ++#include ++ ++void ++f1 (svuint8x2_t *ptr, svbool_t pg, svuint8_t u8, svfloat64_t f64, ++ svuint8x2_t u8x2, int x) ++{ ++ *ptr = svcreate2 (u8); /* { dg-error {too few arguments to function 'svcreate2'} } */ ++ *ptr = svcreate2 (u8, u8, u8); /* { dg-error {too many arguments to function 'svcreate2'} } */ ++ *ptr = svcreate2 (u8x2, u8x2); /* { dg-error {passing 'svuint8x2_t' to argument 1 of 'svcreate2', which expects a single SVE vector rather than a tuple} } */ ++ *ptr = svcreate2 (u8, f64); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svcreate2', but previous arguments had type 'svuint8_t'} } */ ++ *ptr = svcreate2 (u8, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svcreate2', but previous arguments had type 'svuint8_t'} } */ ++ *ptr = svcreate2 (u8, x); /* { dg-error {passing 'int' to argument 2 of 'svcreate2', which expects an SVE vector type} } */ ++ *ptr = svcreate2 (x, u8); /* { dg-error {passing 'int' to argument 1 of 'svcreate2', which expects an SVE vector type} } */ ++ *ptr = svcreate2 (pg, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svcreate2', but previous arguments had type 'svbool_t'} } */ ++ *ptr = svcreate2 (pg, pg); /* { dg-error {'svcreate2' has no form that takes 'svbool_t' arguments} } */ ++ *ptr = svcreate2 (u8, u8); ++ *ptr = svcreate2 (f64, f64); /* { dg-error {incompatible types when assigning to type 'svuint8x2_t' from type 'svfloat64x2_t'} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_2.c +new file mode 100644 +index 000000000..28ad16c2d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_2.c +@@ -0,0 +1,23 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-std=c99 -Wall -Wextra" } */ ++ ++#include ++ ++void ++f1 (svuint8x2_t *ptr, svbool_t pg, svuint8_t u8, svfloat64_t f64, ++ svuint8x2_t u8x2, int x) ++{ ++ *ptr = svcreate2_u8 (u8); /* { dg-error {too few arguments to function 'svcreate2_u8'} } */ ++ *ptr = svcreate2_u8 (u8, u8, u8); /* { dg-error {too many arguments to function 'svcreate2_u8'} } */ ++ *ptr = svcreate2_u8 (u8x2, u8x2); /* { dg-error {incompatible type for argument 1 of 'svcreate2_u8'} } */ ++ /* { dg-error {incompatible type for argument 2 of 'svcreate2_u8'} "" { target *-*-* } .-1 } */ ++ *ptr = svcreate2_u8 (u8, f64); /* { dg-error {incompatible type for argument 2 of 'svcreate2_u8'} } */ ++ *ptr = svcreate2_u8 (u8, pg); /* { dg-error {incompatible type for argument 2 of 'svcreate2_u8'} } */ ++ *ptr = svcreate2_u8 (u8, x); /* { dg-error {incompatible type for argument 2 of 'svcreate2_u8'} } */ ++ *ptr = svcreate2_u8 (x, u8); /* { dg-error {incompatible type for argument 1 of 'svcreate2_u8'} } */ ++ *ptr = svcreate2_u8 (pg, u8); /* { dg-error {incompatible type for argument 1 of 'svcreate2_u8'} } */ ++ *ptr = svcreate2_u8 (pg, pg); /* { dg-error {incompatible type for argument 1 of 'svcreate2_u8'} } */ ++ /* { dg-error {incompatible type for argument 2 of 'svcreate2_u8'} "" { target *-*-* } .-1 } */ ++ *ptr = svcreate2_u8 (u8, u8); ++ *ptr = svcreate2_f64 (f64, f64); /* { dg-error {incompatible types when assigning to type 'svuint8x2_t' from type 'svfloat64x2_t'} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_3.c +new file mode 100644 +index 000000000..a88e56b31 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_3.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-std=c99 -Wall -Wextra" } */ ++ ++#include ++ ++void ++f1 (svfloat16x3_t *ptr, svbool_t pg, svfloat16_t f16, svfloat64_t f64, ++ svfloat16x3_t f16x3, int x) ++{ ++ *ptr = svcreate3 (f16); /* { dg-error {too few arguments to function 'svcreate3'} } */ ++ *ptr = svcreate3 (f16, f16); /* { dg-error {too few arguments to function 'svcreate3'} } */ ++ *ptr = svcreate3 (f16, f16, f16, f16); /* { dg-error {too many arguments to function 'svcreate3'} } */ ++ *ptr = svcreate3 (f16x3, f16x3, f16x3); /* { dg-error {passing 'svfloat16x3_t' to argument 1 of 'svcreate3', which expects a single SVE vector rather than a tuple} } */ ++ *ptr = svcreate3 (f16, f16, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svcreate3', but previous arguments had type 'svfloat16_t'} } */ ++ *ptr = svcreate3 (f16, pg, f16); /* { dg-error {passing 'svbool_t' to argument 2 of 'svcreate3', but previous arguments had type 'svfloat16_t'} } */ ++ *ptr = svcreate3 (f16, x, f16); /* { dg-error {passing 'int' to argument 2 of 'svcreate3', which expects an SVE vector type} } */ ++ *ptr = svcreate3 (x, f16, f16); /* { dg-error {passing 'int' to argument 1 of 'svcreate3', which expects an SVE vector type} } */ ++ *ptr = svcreate3 (pg, f16, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svcreate3', but previous arguments had type 'svbool_t'} } */ ++ *ptr = svcreate3 (pg, pg, pg); /* { dg-error {'svcreate3' has no form that takes 'svbool_t' arguments} } */ ++ *ptr = svcreate3 (f16, f16, f16); ++ *ptr = svcreate3 (f64, f64, f64); /* { dg-error {incompatible types when assigning to type 'svfloat16x3_t' from type 'svfloat64x3_t'} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_4.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_4.c +new file mode 100644 +index 000000000..c111e9f29 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_4.c +@@ -0,0 +1,26 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-std=c99 -Wall -Wextra" } */ ++ ++#include ++ ++void ++f1 (svfloat16x3_t *ptr, svbool_t pg, svfloat16_t f16, svfloat64_t f64, ++ svfloat16x3_t f16x3, int x) ++{ ++ *ptr = svcreate3_f16 (f16); /* { dg-error {too few arguments to function 'svcreate3_f16'} } */ ++ *ptr = svcreate3_f16 (f16, f16); /* { dg-error {too few arguments to function 'svcreate3_f16'} } */ ++ *ptr = svcreate3_f16 (f16, f16, f16, f16); /* { dg-error {too many arguments to function 'svcreate3_f16'} } */ ++ *ptr = svcreate3_f16 (f16x3, f16x3, f16x3); /* { dg-error {incompatible type for argument 1 of 'svcreate3_f16'} } */ ++ /* { dg-error {incompatible type for argument 2 of 'svcreate3_f16'} "" { target *-*-* } .-1 } */ ++ /* { dg-error {incompatible type for argument 3 of 'svcreate3_f16'} "" { target *-*-* } .-2 } */ ++ *ptr = svcreate3_f16 (f16, f16, f64); /* { dg-error {incompatible type for argument 3 of 'svcreate3_f16'} } */ ++ *ptr = svcreate3_f16 (f16, pg, f16); /* { dg-error {incompatible type for argument 2 of 'svcreate3_f16'} } */ ++ *ptr = svcreate3_f16 (f16, x, f16); /* { dg-error {incompatible type for argument 2 of 'svcreate3_f16'} } */ ++ *ptr = svcreate3_f16 (x, f16, f16); /* { dg-error {incompatible type for argument 1 of 'svcreate3_f16'} } */ ++ *ptr = svcreate3_f16 (pg, f16, f16); /* { dg-error {incompatible type for argument 1 of 'svcreate3_f16'} } */ ++ *ptr = svcreate3_f16 (pg, pg, pg); /* { dg-error {incompatible type for argument 1 of 'svcreate3_f16'} } */ ++ /* { dg-error {incompatible type for argument 2 of 'svcreate3_f16'} "" { target *-*-* } .-1 } */ ++ /* { dg-error {incompatible type for argument 3 of 'svcreate3_f16'} "" { target *-*-* } .-2 } */ ++ *ptr = svcreate3_f16 (f16, f16, f16); ++ *ptr = svcreate3_f64 (f64, f64, f64); /* { dg-error {incompatible types when assigning to type 'svfloat16x3_t' from type 'svfloat64x3_t'} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_5.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_5.c +new file mode 100644 +index 000000000..fed124506 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_5.c +@@ -0,0 +1,23 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-std=c99 -Wall -Wextra" } */ ++ ++#include ++ ++void ++f1 (svint32x4_t *ptr, svbool_t pg, svint32_t s32, svfloat64_t f64, ++ svint32x4_t s32x4, int x) ++{ ++ *ptr = svcreate4 (s32); /* { dg-error {too few arguments to function 'svcreate4'} } */ ++ *ptr = svcreate4 (s32, s32); /* { dg-error {too few arguments to function 'svcreate4'} } */ ++ *ptr = svcreate4 (s32, s32, s32); /* { dg-error {too few arguments to function 'svcreate4'} } */ ++ *ptr = svcreate4 (s32, s32, s32, s32, s32); /* { dg-error {too many arguments to function 'svcreate4'} } */ ++ *ptr = svcreate4 (s32x4, s32x4, s32x4, s32x4); /* { dg-error {passing 'svint32x4_t' to argument 1 of 'svcreate4', which expects a single SVE vector rather than a tuple} } */ ++ *ptr = svcreate4 (s32, s32, s32, f64); /* { dg-error {passing 'svfloat64_t' to argument 4 of 'svcreate4', but previous arguments had type 'svint32_t'} } */ ++ *ptr = svcreate4 (s32, s32, pg, s32); /* { dg-error {passing 'svbool_t' to argument 3 of 'svcreate4', but previous arguments had type 'svint32_t'} } */ ++ *ptr = svcreate4 (s32, x, s32, s32); /* { dg-error {passing 'int' to argument 2 of 'svcreate4', which expects an SVE vector type} } */ ++ *ptr = svcreate4 (x, s32, s32, s32); /* { dg-error {passing 'int' to argument 1 of 'svcreate4', which expects an SVE vector type} } */ ++ *ptr = svcreate4 (pg, s32, s32, s32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svcreate4', but previous arguments had type 'svbool_t'} } */ ++ *ptr = svcreate4 (pg, pg, pg, pg); /* { dg-error {'svcreate4' has no form that takes 'svbool_t' arguments} } */ ++ *ptr = svcreate4 (s32, s32, s32, s32); ++ *ptr = svcreate4 (f64, f64, f64, f64); /* { dg-error {incompatible types when assigning to type 'svint32x4_t' from type 'svfloat64x4_t'} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_6.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_6.c +new file mode 100644 +index 000000000..b9e298acf +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_6.c +@@ -0,0 +1,29 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-std=c99 -Wall -Wextra" } */ ++ ++#include ++ ++void ++f1 (svint32x4_t *ptr, svbool_t pg, svint32_t s32, svfloat64_t f64, ++ svint32x4_t s32x4, int x) ++{ ++ *ptr = svcreate4_s32 (s32); /* { dg-error {too few arguments to function 'svcreate4_s32'} } */ ++ *ptr = svcreate4_s32 (s32, s32); /* { dg-error {too few arguments to function 'svcreate4_s32'} } */ ++ *ptr = svcreate4_s32 (s32, s32, s32); /* { dg-error {too few arguments to function 'svcreate4_s32'} } */ ++ *ptr = svcreate4_s32 (s32, s32, s32, s32, s32); /* { dg-error {too many arguments to function 'svcreate4_s32'} } */ ++ *ptr = svcreate4_s32 (s32x4, s32x4, s32x4, s32x4); /* { dg-error {incompatible type for argument 1 of 'svcreate4_s32'} } */ ++ /* { dg-error {incompatible type for argument 2 of 'svcreate4_s32'} "" { target *-*-* } .-1 } */ ++ /* { dg-error {incompatible type for argument 3 of 'svcreate4_s32'} "" { target *-*-* } .-2 } */ ++ /* { dg-error {incompatible type for argument 4 of 'svcreate4_s32'} "" { target *-*-* } .-3 } */ ++ *ptr = svcreate4_s32 (s32, s32, s32, f64); /* { dg-error {incompatible type for argument 4 of 'svcreate4_s32'} } */ ++ *ptr = svcreate4_s32 (s32, s32, pg, s32); /* { dg-error {incompatible type for argument 3 of 'svcreate4_s32'} } */ ++ *ptr = svcreate4_s32 (s32, x, s32, s32); /* { dg-error {incompatible type for argument 2 of 'svcreate4_s32'} } */ ++ *ptr = svcreate4_s32 (x, s32, s32, s32); /* { dg-error {incompatible type for argument 1 of 'svcreate4_s32'} } */ ++ *ptr = svcreate4_s32 (pg, s32, s32, s32); /* { dg-error {incompatible type for argument 1 of 'svcreate4_s32'} } */ ++ *ptr = svcreate4_s32 (pg, pg, pg, pg); /* { dg-error {incompatible type for argument 1 of 'svcreate4_s32'} } */ ++ /* { dg-error {incompatible type for argument 2 of 'svcreate4_s32'} "" { target *-*-* } .-1 } */ ++ /* { dg-error {incompatible type for argument 3 of 'svcreate4_s32'} "" { target *-*-* } .-2 } */ ++ /* { dg-error {incompatible type for argument 4 of 'svcreate4_s32'} "" { target *-*-* } .-3 } */ ++ *ptr = svcreate4_s32 (s32, s32, s32, s32); ++ *ptr = svcreate4_f64 (f64, f64, f64, f64); /* { dg-error {incompatible types when assigning to type 'svint32x4_t' from type 'svfloat64x4_t'} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ext_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ext_1.c +new file mode 100644 +index 000000000..bdce3926d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ext_1.c +@@ -0,0 +1,67 @@ ++/* { dg-do compile } */ ++ ++#include ++ ++void ++f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16, ++ svfloat16_t f16, svint32_t s32, svuint32_t u32, svfloat32_t f32, ++ svint64_t s64, svuint64_t u64, svfloat64_t f64, int i) ++{ ++ svext (pg, pg, 0); /* { dg-error {'svext' has no form that takes 'svbool_t' arguments} } */ ++ svext (s8, s8, i); /* { dg-error {argument 3 of 'svext' must be an integer constant expression} } */ ++ ++ svext (s8, s8, -1); /* { dg-error {passing -1 to argument 3 of 'svext', which expects a value in the range \[0, 255\]} } */ ++ svext (s8, s8, 0); ++ svext (s8, s8, 255); ++ svext (s8, s8, 256); /* { dg-error {passing 256 to argument 3 of 'svext', which expects a value in the range \[0, 255\]} } */ ++ ++ svext (u8, u8, -1); /* { dg-error {passing -1 to argument 3 of 'svext', which expects a value in the range \[0, 255\]} } */ ++ svext (u8, u8, 0); ++ svext (u8, u8, 255); ++ svext (u8, u8, 256); /* { dg-error {passing 256 to argument 3 of 'svext', which expects a value in the range \[0, 255\]} } */ ++ ++ svext (s16, s16, -1); /* { dg-error {passing -1 to argument 3 of 'svext', which expects a value in the range \[0, 127\]} } */ ++ svext (s16, s16, 0); ++ svext (s16, s16, 127); ++ svext (s16, s16, 128); /* { dg-error {passing 128 to argument 3 of 'svext', which expects a value in the range \[0, 127\]} } */ ++ ++ svext (u16, u16, -1); /* { dg-error {passing -1 to argument 3 of 'svext', which expects a value in the range \[0, 127\]} } */ ++ svext (u16, u16, 0); ++ svext (u16, u16, 127); ++ svext (u16, u16, 128); /* { dg-error {passing 128 to argument 3 of 'svext', which expects a value in the range \[0, 127\]} } */ ++ ++ svext (f16, f16, -1); /* { dg-error {passing -1 to argument 3 of 'svext', which expects a value in the range \[0, 127\]} } */ ++ svext (f16, f16, 0); ++ svext (f16, f16, 127); ++ svext (f16, f16, 128); /* { dg-error {passing 128 to argument 3 of 'svext', which expects a value in the range \[0, 127\]} } */ ++ ++ svext (s32, s32, -1); /* { dg-error {passing -1 to argument 3 of 'svext', which expects a value in the range \[0, 63\]} } */ ++ svext (s32, s32, 0); ++ svext (s32, s32, 63); ++ svext (s32, s32, 64); /* { dg-error {passing 64 to argument 3 of 'svext', which expects a value in the range \[0, 63\]} } */ ++ ++ svext (u32, u32, -1); /* { dg-error {passing -1 to argument 3 of 'svext', which expects a value in the range \[0, 63\]} } */ ++ svext (u32, u32, 0); ++ svext (u32, u32, 63); ++ svext (u32, u32, 64); /* { dg-error {passing 64 to argument 3 of 'svext', which expects a value in the range \[0, 63\]} } */ ++ ++ svext (f32, f32, -1); /* { dg-error {passing -1 to argument 3 of 'svext', which expects a value in the range \[0, 63\]} } */ ++ svext (f32, f32, 0); ++ svext (f32, f32, 63); ++ svext (f32, f32, 64); /* { dg-error {passing 64 to argument 3 of 'svext', which expects a value in the range \[0, 63\]} } */ ++ ++ svext (s64, s64, -1); /* { dg-error {passing -1 to argument 3 of 'svext', which expects a value in the range \[0, 31\]} } */ ++ svext (s64, s64, 0); ++ svext (s64, s64, 31); ++ svext (s64, s64, 32); /* { dg-error {passing 32 to argument 3 of 'svext', which expects a value in the range \[0, 31\]} } */ ++ ++ svext (u64, u64, -1); /* { dg-error {passing -1 to argument 3 of 'svext', which expects a value in the range \[0, 31\]} } */ ++ svext (u64, u64, 0); ++ svext (u64, u64, 31); ++ svext (u64, u64, 32); /* { dg-error {passing 32 to argument 3 of 'svext', which expects a value in the range \[0, 31\]} } */ ++ ++ svext (f64, f64, -1); /* { dg-error {passing -1 to argument 3 of 'svext', which expects a value in the range \[0, 31\]} } */ ++ svext (f64, f64, 0); ++ svext (f64, f64, 31); ++ svext (f64, f64, 32); /* { dg-error {passing 32 to argument 3 of 'svext', which expects a value in the range \[0, 31\]} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/fold_left_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/fold_left_1.c +new file mode 100644 +index 000000000..1d292786d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/fold_left_1.c +@@ -0,0 +1,21 @@ ++/* { dg-do compile } */ ++ ++#include ++ ++svuint8_t ++f1 (svbool_t pg, int i, float f, double d, void *ptr, svfloat32_t f32, ++ svint32_t i32) ++{ ++ svadda (pg, f); /* { dg-error {too few arguments to function 'svadda'} } */ ++ svadda (pg, f, f32, f32); /* { dg-error {too many arguments to function 'svadda'} } */ ++ svadda (f32, f, f32); /* { dg-error {passing 'svfloat32_t' to argument 1 of 'svadda', which expects 'svbool_t'} } */ ++ svadda (pg, i, f32); ++ svadda (pg, f, f32); ++ svadda (pg, d, f32); ++ svadda (pg, ptr, f32); /* { dg-error {incompatible type for argument 2 of 'svadda_f32'} } */ ++ svadda (pg, pg, f32); /* { dg-error {passing 'svbool_t' to argument 2 of 'svadda', which expects a scalar element} } */ ++ svadda (pg, f32, f32); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svadda', which expects a scalar element} } */ ++ svadda (pg, f, f); /* { dg-error {passing 'float' to argument 3 of 'svadda', which expects an SVE vector type} } */ ++ svadda (pg, i, i32); /* { dg-error {'svadda' has no form that takes 'svint32_t' arguments} } */ ++ svadda (pg, i, i); /* { dg-error {passing 'int' to argument 3 of 'svadda', which expects an SVE vector type} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_1.c +new file mode 100644 +index 000000000..e1b99fa36 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_1.c +@@ -0,0 +1,5 @@ ++/* { dg-do compile } */ ++ ++int svadd_n_u8_x; /* { dg-message "note: previous declaration of 'svadd_n_u8_x' was here" } */ ++ ++#pragma GCC aarch64 "arm_sve.h" /* { dg-error {'svadd_n_u8_x' redeclared} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_2.c +new file mode 100644 +index 000000000..7f653f117 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_2.c +@@ -0,0 +1,5 @@ ++/* { dg-do compile } */ ++ ++int svadd_n_u8_x = 1; /* { dg-message "note: previous definition of 'svadd_n_u8_x' was here" } */ ++ ++#pragma GCC aarch64 "arm_sve.h" /* { dg-error {'svadd_n_u8_x' redeclared} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_3.c +new file mode 100644 +index 000000000..d9ff15a6c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_3.c +@@ -0,0 +1,5 @@ ++/* { dg-do compile } */ ++ ++extern __SVInt8_t svadd_u8_x (__SVBool_t, __SVInt8_t, __SVInt8_t); /* { dg-message "note: previous declaration of 'svadd_u8_x' was here" } */ ++ ++#pragma GCC aarch64 "arm_sve.h" /* { dg-error {conflicting types for 'svadd_u8_x'} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_4.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_4.c +new file mode 100644 +index 000000000..9591e3d01 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_4.c +@@ -0,0 +1,9 @@ ++/* { dg-do compile } */ ++ ++/* Although somewhat suspect, this isn't actively wrong, and doesn't need ++ to be diagnosed. Any attempt to call the function before including ++ arm_sve.h will lead to a link failure. (Same for taking its address, ++ etc.) */ ++extern __SVUint8_t svadd_u8_x (__SVBool_t, __SVUint8_t, __SVUint8_t); ++ ++#pragma GCC aarch64 "arm_sve.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_5.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_5.c +new file mode 100644 +index 000000000..85923611d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_5.c +@@ -0,0 +1,21 @@ ++/* { dg-do compile } */ ++ ++/* There's no requirement to diagnose this. In particular, arm_sve.h ++ is allowed to use macros to implement the functions, and defining ++ a macro that matches an existing symbol would not be diagnosed. ++ ++ At the moment this works like other built-ins in the sense that the ++ explicit definition "wins". This isn't supported behavior though. */ ++__SVUint8_t ++svadd_u8_x (__SVBool_t pg, __SVUint8_t x, __SVUint8_t y) ++{ ++ return x; ++} ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++svuint8_t ++f (svbool_t pg, svuint8_t x, svuint8_t y) ++{ ++ return svadd_u8_x (pg, x, y); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_6.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_6.c +new file mode 100644 +index 000000000..1f04e4644 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_6.c +@@ -0,0 +1,5 @@ ++/* { dg-do compile } */ ++ ++typedef int svadd_u8_x; /* { dg-message "note: previous declaration of 'svadd_u8_x' was here" } */ ++ ++#pragma GCC aarch64 "arm_sve.h" /* { dg-error {'svadd_u8_x' redeclared} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/get_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/get_1.c +new file mode 100644 +index 000000000..a3ac08fa8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/get_1.c +@@ -0,0 +1,31 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-std=c99 -Wall -Wextra" } */ ++ ++#include ++ ++svfloat64_t ++f1 (svbool_t pg, svuint8_t u8, svuint8x2_t u8x2, svuint8x3_t u8x3, int x) ++{ ++ const int one = 1; ++ svfloat64_t f64; ++ ++ u8 = svget2 (u8x2); /* { dg-error {too few arguments to function 'svget2'} } */ ++ u8 = svget2 (u8x2, 1, 2); /* { dg-error {too many arguments to function 'svget2'} } */ ++ u8 = svget2 (u8, 0); /* { dg-error {passing single vector 'svuint8_t' to argument 1 of 'svget2', which expects a tuple of 2 vectors} } */ ++ u8 = svget2 (u8x3, 0); /* { dg-error {passing 'svuint8x3_t' to argument 1 of 'svget2', which expects a tuple of 2 vectors} } */ ++ u8 = svget2 (pg, 0); /* { dg-error {passing 'svbool_t' to argument 1 of 'svget2', which expects a tuple of 2 vectors} } */ ++ u8 = svget2 (u8x2, x); /* { dg-error {argument 2 of 'svget2' must be an integer constant expression} } */ ++ u8 = svget2 (u8x2, 0); ++ f64 = svget2 (u8x2, 0); /* { dg-error {incompatible types when assigning to type 'svfloat64_t' from type 'svuint8_t'} } */ ++ u8 = svget2 (u8x2, 1); ++ u8 = svget2 (u8x2, 2); /* { dg-error {passing 2 to argument 2 of 'svget2', which expects a value in the range \[0, 1\]} } */ ++ u8 = svget2 (u8x2, 3); /* { dg-error {passing 3 to argument 2 of 'svget2', which expects a value in the range \[0, 1\]} } */ ++ u8 = svget2 (u8x2, 4); /* { dg-error {passing 4 to argument 2 of 'svget2', which expects a value in the range \[0, 1\]} } */ ++ u8 = svget2 (u8x2, 5); /* { dg-error {passing 5 to argument 2 of 'svget2', which expects a value in the range \[0, 1\]} } */ ++ u8 = svget2 (u8x2, ~0U); /* { dg-error {passing [^ ]* to argument 2 of 'svget2', which expects a value in the range \[0, 1\]} } */ ++ u8 = svget2 (u8x2, one); /* { dg-error {argument 2 of 'svget2' must be an integer constant expression} } */ ++ u8 = svget2 (u8x2, 3 - 2); ++ u8 = svget2 (u8x2, 1.0); ++ ++ return f64; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/get_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/get_2.c +new file mode 100644 +index 000000000..4eee2439e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/get_2.c +@@ -0,0 +1,33 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-std=c99 -Wall -Wextra" } */ ++ ++#include ++ ++svfloat64_t ++f1 (svbool_t pg, svuint8_t u8, svuint8x2_t u8x2, svint8x2_t s8x2, ++ svuint8x3_t u8x3, int x) ++{ ++ const int one = 1; ++ svfloat64_t f64; ++ ++ u8 = svget2_u8 (u8x2); /* { dg-error {too few arguments to function 'svget2_u8'} } */ ++ u8 = svget2_u8 (u8x2, 1, 2); /* { dg-error {too many arguments to function 'svget2_u8'} } */ ++ u8 = svget2_u8 (u8, 0); /* { dg-error {incompatible type for argument 1 of 'svget2_u8'} } */ ++ u8 = svget2_u8 (s8x2, 0); /* { dg-error {incompatible type for argument 1 of 'svget2_u8'} } */ ++ u8 = svget2_u8 (u8x3, 0); /* { dg-error {incompatible type for argument 1 of 'svget2_u8'} } */ ++ u8 = svget2_u8 (pg, 0); /* { dg-error {incompatible type for argument 1 of 'svget2_u8'} } */ ++ u8 = svget2_u8 (u8x2, x); /* { dg-error {argument 2 of 'svget2_u8' must be an integer constant expression} } */ ++ u8 = svget2_u8 (u8x2, 0); ++ f64 = svget2_u8 (u8x2, 0); /* { dg-error {incompatible types when assigning to type 'svfloat64_t' from type 'svuint8_t'} } */ ++ u8 = svget2_u8 (u8x2, 1); ++ u8 = svget2_u8 (u8x2, 2); /* { dg-error {passing 2 to argument 2 of 'svget2_u8', which expects a value in the range \[0, 1\]} } */ ++ u8 = svget2_u8 (u8x2, 3); /* { dg-error {passing 3 to argument 2 of 'svget2_u8', which expects a value in the range \[0, 1\]} } */ ++ u8 = svget2_u8 (u8x2, 4); /* { dg-error {passing 4 to argument 2 of 'svget2_u8', which expects a value in the range \[0, 1\]} } */ ++ u8 = svget2_u8 (u8x2, 5); /* { dg-error {passing 5 to argument 2 of 'svget2_u8', which expects a value in the range \[0, 1\]} } */ ++ u8 = svget2_u8 (u8x2, ~0U); /* { dg-error {passing [^ ]* to argument 2 of 'svget2_u8', which expects a value in the range \[0, 1\]} } */ ++ u8 = svget2_u8 (u8x2, one); /* { dg-error {argument 2 of 'svget2_u8' must be an integer constant expression} } */ ++ u8 = svget2_u8 (u8x2, 3 - 2); ++ u8 = svget2_u8 (u8x2, 1.0); ++ ++ return f64; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/get_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/get_3.c +new file mode 100644 +index 000000000..0e7b2e227 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/get_3.c +@@ -0,0 +1,32 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-std=c99 -Wall -Wextra" } */ ++ ++#include ++ ++svfloat64_t ++f1 (svbool_t pg, svfloat16_t f16, svfloat16x3_t f16x3, svfloat16x4_t f16x4, ++ int x) ++{ ++ const int one = 1; ++ svfloat64_t f64; ++ ++ f16 = svget3 (f16x3); /* { dg-error {too few arguments to function 'svget3'} } */ ++ f16 = svget3 (f16x3, 1, 2); /* { dg-error {too many arguments to function 'svget3'} } */ ++ f16 = svget3 (f16, 0); /* { dg-error {passing single vector 'svfloat16_t' to argument 1 of 'svget3', which expects a tuple of 3 vectors} } */ ++ f16 = svget3 (f16x4, 0); /* { dg-error {passing 'svfloat16x4_t' to argument 1 of 'svget3', which expects a tuple of 3 vectors} } */ ++ f16 = svget3 (pg, 0); /* { dg-error {passing 'svbool_t' to argument 1 of 'svget3', which expects a tuple of 3 vectors} } */ ++ f16 = svget3 (f16x3, x); /* { dg-error {argument 2 of 'svget3' must be an integer constant expression} } */ ++ f16 = svget3 (f16x3, 0); ++ f64 = svget3 (f16x3, 0); /* { dg-error {incompatible types when assigning to type 'svfloat64_t' from type 'svfloat16_t'} } */ ++ f16 = svget3 (f16x3, 1); ++ f16 = svget3 (f16x3, 2); ++ f16 = svget3 (f16x3, 3); /* { dg-error {passing 3 to argument 2 of 'svget3', which expects a value in the range \[0, 2\]} } */ ++ f16 = svget3 (f16x3, 4); /* { dg-error {passing 4 to argument 2 of 'svget3', which expects a value in the range \[0, 2\]} } */ ++ f16 = svget3 (f16x3, 5); /* { dg-error {passing 5 to argument 2 of 'svget3', which expects a value in the range \[0, 2\]} } */ ++ f16 = svget3 (f16x3, ~0U); /* { dg-error {passing [^ ]* to argument 2 of 'svget3', which expects a value in the range \[0, 2\]} } */ ++ f16 = svget3 (f16x3, one); /* { dg-error {argument 2 of 'svget3' must be an integer constant expression} } */ ++ f16 = svget3 (f16x3, 3 - 2); ++ f16 = svget3 (f16x3, 1.0); ++ ++ return f64; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/get_4.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/get_4.c +new file mode 100644 +index 000000000..72b4f82a6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/get_4.c +@@ -0,0 +1,33 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-std=c99 -Wall -Wextra" } */ ++ ++#include ++ ++svfloat64_t ++f1 (svbool_t pg, svfloat16_t f16, svfloat16x3_t f16x3, svfloat32x3_t f32x3, ++ svfloat16x4_t f16x4, int x) ++{ ++ const int one = 1; ++ svfloat64_t f64; ++ ++ f16 = svget3_f16 (f16x3); /* { dg-error {too few arguments to function 'svget3_f16'} } */ ++ f16 = svget3_f16 (f16x3, 1, 2); /* { dg-error {too many arguments to function 'svget3_f16'} } */ ++ f16 = svget3_f16 (f16, 0); /* { dg-error {incompatible type for argument 1 of 'svget3_f16'} } */ ++ f16 = svget3_f16 (f32x3, 0); /* { dg-error {incompatible type for argument 1 of 'svget3_f16'} } */ ++ f16 = svget3_f16 (f16x4, 0); /* { dg-error {incompatible type for argument 1 of 'svget3_f16'} } */ ++ f16 = svget3_f16 (pg, 0); /* { dg-error {incompatible type for argument 1 of 'svget3_f16'} } */ ++ f16 = svget3_f16 (f16x3, x); /* { dg-error {argument 2 of 'svget3_f16' must be an integer constant expression} } */ ++ f16 = svget3_f16 (f16x3, 0); ++ f64 = svget3_f16 (f16x3, 0); /* { dg-error {incompatible types when assigning to type 'svfloat64_t' from type 'svfloat16_t'} } */ ++ f16 = svget3_f16 (f16x3, 1); ++ f16 = svget3_f16 (f16x3, 2); ++ f16 = svget3_f16 (f16x3, 3); /* { dg-error {passing 3 to argument 2 of 'svget3_f16', which expects a value in the range \[0, 2\]} } */ ++ f16 = svget3_f16 (f16x3, 4); /* { dg-error {passing 4 to argument 2 of 'svget3_f16', which expects a value in the range \[0, 2\]} } */ ++ f16 = svget3_f16 (f16x3, 5); /* { dg-error {passing 5 to argument 2 of 'svget3_f16', which expects a value in the range \[0, 2\]} } */ ++ f16 = svget3_f16 (f16x3, ~0U); /* { dg-error {passing [^ ]* to argument 2 of 'svget3_f16', which expects a value in the range \[0, 2\]} } */ ++ f16 = svget3_f16 (f16x3, one); /* { dg-error {argument 2 of 'svget3_f16' must be an integer constant expression} } */ ++ f16 = svget3_f16 (f16x3, 3 - 2); ++ f16 = svget3_f16 (f16x3, 1.0); ++ ++ return f64; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/get_5.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/get_5.c +new file mode 100644 +index 000000000..b0b69b95e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/get_5.c +@@ -0,0 +1,31 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-std=c99 -Wall -Wextra" } */ ++ ++#include ++ ++svfloat64_t ++f1 (svbool_t pg, svint32_t s32, svint32x4_t s32x4, svint32x2_t s32x2, int x) ++{ ++ const int one = 1; ++ svfloat64_t f64; ++ ++ s32 = svget4 (s32x4); /* { dg-error {too few arguments to function 'svget4'} } */ ++ s32 = svget4 (s32x4, 1, 2); /* { dg-error {too many arguments to function 'svget4'} } */ ++ s32 = svget4 (s32, 0); /* { dg-error {passing single vector 'svint32_t' to argument 1 of 'svget4', which expects a tuple of 4 vectors} } */ ++ s32 = svget4 (s32x2, 0); /* { dg-error {passing 'svint32x2_t' to argument 1 of 'svget4', which expects a tuple of 4 vectors} } */ ++ s32 = svget4 (pg, 0); /* { dg-error {passing 'svbool_t' to argument 1 of 'svget4', which expects a tuple of 4 vectors} } */ ++ s32 = svget4 (s32x4, x); /* { dg-error {argument 2 of 'svget4' must be an integer constant expression} } */ ++ s32 = svget4 (s32x4, 0); ++ f64 = svget4 (s32x4, 0); /* { dg-error {incompatible types when assigning to type 'svfloat64_t' from type 'svint32_t'} } */ ++ s32 = svget4 (s32x4, 1); ++ s32 = svget4 (s32x4, 2); ++ s32 = svget4 (s32x4, 3); ++ s32 = svget4 (s32x4, 4); /* { dg-error {passing 4 to argument 2 of 'svget4', which expects a value in the range \[0, 3\]} } */ ++ s32 = svget4 (s32x4, 5); /* { dg-error {passing 5 to argument 2 of 'svget4', which expects a value in the range \[0, 3\]} } */ ++ s32 = svget4 (s32x4, ~0U); /* { dg-error {passing [^ ]* to argument 2 of 'svget4', which expects a value in the range \[0, 3\]} } */ ++ s32 = svget4 (s32x4, one); /* { dg-error {argument 2 of 'svget4' must be an integer constant expression} } */ ++ s32 = svget4 (s32x4, 3 - 2); ++ s32 = svget4 (s32x4, 1.0); ++ ++ return f64; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/get_6.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/get_6.c +new file mode 100644 +index 000000000..3801c0c4e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/get_6.c +@@ -0,0 +1,33 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-std=c99 -Wall -Wextra" } */ ++ ++#include ++ ++svfloat64_t ++f1 (svbool_t pg, svint32_t s32, svint32x4_t s32x4, svfloat32x4_t f32x4, ++ svint32x2_t s32x2, int x) ++{ ++ const int one = 1; ++ svfloat64_t f64; ++ ++ s32 = svget4_s32 (s32x4); /* { dg-error {too few arguments to function 'svget4_s32'} } */ ++ s32 = svget4_s32 (s32x4, 1, 2); /* { dg-error {too many arguments to function 'svget4_s32'} } */ ++ s32 = svget4_s32 (s32, 0); /* { dg-error {incompatible type for argument 1 of 'svget4_s32'} } */ ++ s32 = svget4_s32 (f32x4, 0); /* { dg-error {incompatible type for argument 1 of 'svget4_s32'} } */ ++ s32 = svget4_s32 (s32x2, 0); /* { dg-error {incompatible type for argument 1 of 'svget4_s32'} } */ ++ s32 = svget4_s32 (pg, 0); /* { dg-error {incompatible type for argument 1 of 'svget4_s32'} } */ ++ s32 = svget4_s32 (s32x4, x); /* { dg-error {argument 2 of 'svget4_s32' must be an integer constant expression} } */ ++ s32 = svget4_s32 (s32x4, 0); ++ f64 = svget4_s32 (s32x4, 0); /* { dg-error {incompatible types when assigning to type 'svfloat64_t' from type 'svint32_t'} } */ ++ s32 = svget4_s32 (s32x4, 1); ++ s32 = svget4_s32 (s32x4, 2); ++ s32 = svget4_s32 (s32x4, 3); ++ s32 = svget4_s32 (s32x4, 4); /* { dg-error {passing 4 to argument 2 of 'svget4_s32', which expects a value in the range \[0, 3\]} } */ ++ s32 = svget4_s32 (s32x4, 5); /* { dg-error {passing 5 to argument 2 of 'svget4_s32', which expects a value in the range \[0, 3\]} } */ ++ s32 = svget4_s32 (s32x4, ~0U); /* { dg-error {passing [^ ]* to argument 2 of 'svget4_s32', which expects a value in the range \[0, 3\]} } */ ++ s32 = svget4_s32 (s32x4, one); /* { dg-error {argument 2 of 'svget4_s32' must be an integer constant expression} } */ ++ s32 = svget4_s32 (s32x4, 3 - 2); ++ s32 = svget4_s32 (s32x4, 1.0); ++ ++ return f64; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_1.c +new file mode 100644 +index 000000000..dcd291da6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_1.c +@@ -0,0 +1,37 @@ ++#include ++ ++void ++test (svbool_t pg, svint8_t s8, svuint8_t u8, ++ svint16_t s16, svuint16_t u16, svint32_t s32, svuint32_t u32, ++ svint64_t s64, svuint64_t u64, int16_t sh, uint16_t uh, ++ int32_t sw, uint32_t uw, int64_t sd, uint64_t ud, ++ float f, int i) ++{ ++ svqincb (sw); /* { dg-error {too few arguments to function 'svqincb'} } */ ++ svqincb (sw, 1, 1); /* { dg-error {too many arguments to function 'svqincb'} } */ ++ ++ svqincb (pg, 1); /* { dg-error {'svqincb' has no form that takes 'svbool_t' arguments} } */ ++ svqincb (s8, 1); /* { dg-error {'svqincb' has no form that takes 'svint8_t' arguments} } */ ++ svqincb (u8, 1); /* { dg-error {'svqincb' has no form that takes 'svuint8_t' arguments} } */ ++ svqincb (s16, 1); /* { dg-error {'svqincb' has no form that takes 'svint16_t' arguments} } */ ++ svqincb (u16, 1); /* { dg-error {'svqincb' has no form that takes 'svuint16_t' arguments} } */ ++ svqincb (s32, 1); /* { dg-error {'svqincb' has no form that takes 'svint32_t' arguments} } */ ++ svqincb (u32, 1); /* { dg-error {'svqincb' has no form that takes 'svuint32_t' arguments} } */ ++ svqincb (s64, 1); /* { dg-error {'svqincb' has no form that takes 'svint64_t' arguments} } */ ++ svqincb (u64, 1); /* { dg-error {'svqincb' has no form that takes 'svuint64_t' arguments} } */ ++ svqincb (sh, 1); ++ svqincb (sw, 1); ++ svqincb (sd, 1); ++ svqincb (uh, 1); ++ svqincb (uw, 1); ++ svqincb (ud, 1); ++ svqincb (f, 1); /* { dg-error {passing 'float' to argument 1 of 'svqincb', which expects a 32-bit or 64-bit integer type} } */ ++ svqincb (ud, i); /* { dg-error {argument 2 of 'svqincb' must be an integer constant expression} } */ ++ ++ svqincb (sw, -1); /* { dg-error {passing -1 to argument 2 of 'svqincb', which expects a value in the range \[1, 16\]} } */ ++ svqincb (sw, 0); /* { dg-error {passing 0 to argument 2 of 'svqincb', which expects a value in the range \[1, 16\]} } */ ++ svqincb (sw, 1); ++ svqincb (sw, 2); ++ svqincb (sw, 16); ++ svqincb (sw, 17); /* { dg-error {passing 17 to argument 2 of 'svqincb', which expects a value in the range \[1, 16\]} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_2.c +new file mode 100644 +index 000000000..e5acad187 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_2.c +@@ -0,0 +1,13 @@ ++#include ++ ++void ++test (int32_t sw, int i) ++{ ++ svqincb_n_s32 (sw, -1); /* { dg-error {passing -1 to argument 2 of 'svqincb_n_s32', which expects a value in the range \[1, 16\]} } */ ++ svqincb_n_s32 (sw, 0); /* { dg-error {passing 0 to argument 2 of 'svqincb_n_s32', which expects a value in the range \[1, 16\]} } */ ++ svqincb_n_s32 (sw, 1); ++ svqincb_n_s32 (sw, 2); ++ svqincb_n_s32 (sw, 16); ++ svqincb_n_s32 (sw, 17); /* { dg-error {passing 17 to argument 2 of 'svqincb_n_s32', which expects a value in the range \[1, 16\]} } */ ++ svqincb_n_s32 (sw, i); /* { dg-error {argument 2 of 'svqincb_n_s32' must be an integer constant expression} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_3.c +new file mode 100644 +index 000000000..351e7757f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_3.c +@@ -0,0 +1,26 @@ ++#include ++ ++void ++test (svbool_t pg, svint8_t s8, svuint8_t u8, ++ svint16_t s16, svuint16_t u16, svint32_t s32, svuint32_t u32, ++ svint64_t s64, svuint64_t u64, int16_t sh, uint16_t uh, ++ int32_t sw, uint32_t uw, int64_t sd, uint64_t ud, ++ float f) ++{ ++ svqinch (pg, 1); /* { dg-error {'svqinch' has no form that takes 'svbool_t' arguments} } */ ++ svqinch (s8, 1); /* { dg-error {'svqinch' has no form that takes 'svint8_t' arguments} } */ ++ svqinch (u8, 1); /* { dg-error {'svqinch' has no form that takes 'svuint8_t' arguments} } */ ++ svqinch (s16, 1); ++ svqinch (u16, 1); ++ svqinch (s32, 1); /* { dg-error {'svqinch' has no form that takes 'svint32_t' arguments} } */ ++ svqinch (u32, 1); /* { dg-error {'svqinch' has no form that takes 'svuint32_t' arguments} } */ ++ svqinch (s64, 1); /* { dg-error {'svqinch' has no form that takes 'svint64_t' arguments} } */ ++ svqinch (u64, 1); /* { dg-error {'svqinch' has no form that takes 'svuint64_t' arguments} } */ ++ svqinch (sh, 1); ++ svqinch (sw, 1); ++ svqinch (sd, 1); ++ svqinch (uh, 1); ++ svqinch (uw, 1); ++ svqinch (ud, 1); ++ svqinch (f, 1); /* { dg-error {passing 'float' to argument 1 of 'svqinch', which expects a 32-bit or 64-bit integer type} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_4.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_4.c +new file mode 100644 +index 000000000..e071c0229 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_4.c +@@ -0,0 +1,26 @@ ++#include ++ ++void ++test (svbool_t pg, svint8_t s8, svuint8_t u8, ++ svint16_t s16, svuint16_t u16, svint32_t s32, svuint32_t u32, ++ svint64_t s64, svuint64_t u64, int16_t sh, uint16_t uh, ++ int32_t sw, uint32_t uw, int64_t sd, uint64_t ud, ++ float f) ++{ ++ svqincw (pg, 1); /* { dg-error {'svqincw' has no form that takes 'svbool_t' arguments} } */ ++ svqincw (s8, 1); /* { dg-error {'svqincw' has no form that takes 'svint8_t' arguments} } */ ++ svqincw (u8, 1); /* { dg-error {'svqincw' has no form that takes 'svuint8_t' arguments} } */ ++ svqincw (s16, 1); /* { dg-error {'svqincw' has no form that takes 'svint16_t' arguments} } */ ++ svqincw (u16, 1); /* { dg-error {'svqincw' has no form that takes 'svuint16_t' arguments} } */ ++ svqincw (s32, 1); ++ svqincw (u32, 1); ++ svqincw (s64, 1); /* { dg-error {'svqincw' has no form that takes 'svint64_t' arguments} } */ ++ svqincw (u64, 1); /* { dg-error {'svqincw' has no form that takes 'svuint64_t' arguments} } */ ++ svqincw (sh, 1); ++ svqincw (sw, 1); ++ svqincw (sd, 1); ++ svqincw (uh, 1); ++ svqincw (uw, 1); ++ svqincw (ud, 1); ++ svqincw (f, 1); /* { dg-error {passing 'float' to argument 1 of 'svqincw', which expects a 32-bit or 64-bit integer type} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_5.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_5.c +new file mode 100644 +index 000000000..be9c76928 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_5.c +@@ -0,0 +1,26 @@ ++#include ++ ++void ++test (svbool_t pg, svint8_t s8, svuint8_t u8, ++ svint16_t s16, svuint16_t u16, svint32_t s32, svuint32_t u32, ++ svint64_t s64, svuint64_t u64, int16_t sh, uint16_t uh, ++ int32_t sw, uint32_t uw, int64_t sd, uint64_t ud, ++ float f) ++{ ++ svqincd (pg, 1); /* { dg-error {'svqincd' has no form that takes 'svbool_t' arguments} } */ ++ svqincd (s8, 1); /* { dg-error {'svqincd' has no form that takes 'svint8_t' arguments} } */ ++ svqincd (u8, 1); /* { dg-error {'svqincd' has no form that takes 'svuint8_t' arguments} } */ ++ svqincd (s16, 1); /* { dg-error {'svqincd' has no form that takes 'svint16_t' arguments} } */ ++ svqincd (u16, 1); /* { dg-error {'svqincd' has no form that takes 'svuint16_t' arguments} } */ ++ svqincd (s32, 1); /* { dg-error {'svqincd' has no form that takes 'svint32_t' arguments} } */ ++ svqincd (u32, 1); /* { dg-error {'svqincd' has no form that takes 'svuint32_t' arguments} } */ ++ svqincd (s64, 1); ++ svqincd (u64, 1); ++ svqincd (sh, 1); ++ svqincd (sw, 1); ++ svqincd (sd, 1); ++ svqincd (uh, 1); ++ svqincd (uw, 1); ++ svqincd (ud, 1); ++ svqincd (f, 1); /* { dg-error {passing 'float' to argument 1 of 'svqincd', which expects a 32-bit or 64-bit integer type} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_pat_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_pat_1.c +new file mode 100644 +index 000000000..f2e5841d4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_pat_1.c +@@ -0,0 +1,47 @@ ++#include ++ ++void ++test (enum svpattern pat, svbool_t pg, svint8_t s8, svuint8_t u8, ++ svint16_t s16, svuint16_t u16, svint32_t s32, svuint32_t u32, ++ svint64_t s64, svuint64_t u64, int16_t sh, uint16_t uh, ++ int32_t sw, uint32_t uw, int64_t sd, uint64_t ud, ++ float f, int i) ++{ ++ svqincb_pat (sw, pat); /* { dg-error {too few arguments to function 'svqincb_pat'} } */ ++ svqincb_pat (sw, pat, 1, 1); /* { dg-error {too many arguments to function 'svqincb_pat'} } */ ++ ++ svqincb_pat (pg, SV_ALL, 1); /* { dg-error {'svqincb_pat' has no form that takes 'svbool_t' arguments} } */ ++ svqincb_pat (s8, SV_ALL, 1); /* { dg-error {'svqincb_pat' has no form that takes 'svint8_t' arguments} } */ ++ svqincb_pat (u8, SV_ALL, 1); /* { dg-error {'svqincb_pat' has no form that takes 'svuint8_t' arguments} } */ ++ svqincb_pat (s16, SV_ALL, 1); /* { dg-error {'svqincb_pat' has no form that takes 'svint16_t' arguments} } */ ++ svqincb_pat (u16, SV_ALL, 1); /* { dg-error {'svqincb_pat' has no form that takes 'svuint16_t' arguments} } */ ++ svqincb_pat (s32, SV_ALL, 1); /* { dg-error {'svqincb_pat' has no form that takes 'svint32_t' arguments} } */ ++ svqincb_pat (u32, SV_ALL, 1); /* { dg-error {'svqincb_pat' has no form that takes 'svuint32_t' arguments} } */ ++ svqincb_pat (s64, SV_ALL, 1); /* { dg-error {'svqincb_pat' has no form that takes 'svint64_t' arguments} } */ ++ svqincb_pat (u64, SV_ALL, 1); /* { dg-error {'svqincb_pat' has no form that takes 'svuint64_t' arguments} } */ ++ svqincb_pat (sh, SV_ALL, 1); ++ svqincb_pat (sw, SV_ALL, 1); ++ svqincb_pat (sd, SV_ALL, 1); ++ svqincb_pat (uh, SV_ALL, 1); ++ svqincb_pat (uw, SV_ALL, 1); ++ svqincb_pat (ud, SV_ALL, 1); ++ svqincb_pat (f, SV_ALL, 1); /* { dg-error {passing 'float' to argument 1 of 'svqincb_pat', which expects a 32-bit or 64-bit integer type} } */ ++ ++ svqincb_pat (sw, pat, 1); /* { dg-error {argument 2 of 'svqincb_pat' must be an integer constant expression} } */ ++ svqincb_pat (sw, i, 1); /* { dg-error {argument 2 of 'svqincb_pat' must be an integer constant expression} } */ ++ svqincb_pat (sw, (enum svpattern) -1, 1); /* { dg-error {passing 4294967295 to argument 2 of 'svqincb_pat', which expects a valid 'enum svpattern' value} } */ ++ svqincb_pat (sw, (enum svpattern) 0, 1); ++ svqincb_pat (sw, (enum svpattern) 13, 1); ++ svqincb_pat (sw, (enum svpattern) 14, 1); /* { dg-error {passing 14 to argument 2 of 'svqincb_pat', which expects a valid 'enum svpattern' value} } */ ++ svqincb_pat (sw, (enum svpattern) 28, 1); /* { dg-error {passing 28 to argument 2 of 'svqincb_pat', which expects a valid 'enum svpattern' value} } */ ++ svqincb_pat (sw, (enum svpattern) 29, 1); ++ svqincb_pat (sw, (enum svpattern) 31, 1); ++ svqincb_pat (sw, (enum svpattern) 32, 1); /* { dg-error {passing 32 to argument 2 of 'svqincb_pat', which expects a valid 'enum svpattern' value} } */ ++ ++ svqincb_pat (sw, SV_POW2, -1); /* { dg-error {passing -1 to argument 3 of 'svqincb_pat', which expects a value in the range \[1, 16\]} } */ ++ svqincb_pat (sw, SV_POW2, 0); /* { dg-error {passing 0 to argument 3 of 'svqincb_pat', which expects a value in the range \[1, 16\]} } */ ++ svqincb_pat (sw, SV_POW2, 1); ++ svqincb_pat (sw, SV_POW2, 2); ++ svqincb_pat (sw, SV_POW2, 16); ++ svqincb_pat (sw, SV_POW2, 17); /* { dg-error {passing 17 to argument 3 of 'svqincb_pat', which expects a value in the range \[1, 16\]} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_pat_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_pat_2.c +new file mode 100644 +index 000000000..c1c1ab9d9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_pat_2.c +@@ -0,0 +1,23 @@ ++#include ++ ++void ++test (int32_t sw, enum svpattern pat, int i) ++{ ++ svqincb_pat_n_s32 (sw, pat, 1); /* { dg-error {argument 2 of 'svqincb_pat_n_s32' must be an integer constant expression} } */ ++ svqincb_pat_n_s32 (sw, i, 1); /* { dg-error {argument 2 of 'svqincb_pat_n_s32' must be an integer constant expression} } */ ++ svqincb_pat_n_s32 (sw, (enum svpattern) -1, 1); /* { dg-error {passing 4294967295 to argument 2 of 'svqincb_pat_n_s32', which expects a valid 'enum svpattern' value} } */ ++ svqincb_pat_n_s32 (sw, (enum svpattern) 0, 1); ++ svqincb_pat_n_s32 (sw, (enum svpattern) 13, 1); ++ svqincb_pat_n_s32 (sw, (enum svpattern) 14, 1); /* { dg-error {passing 14 to argument 2 of 'svqincb_pat_n_s32', which expects a valid 'enum svpattern' value} } */ ++ svqincb_pat_n_s32 (sw, (enum svpattern) 28, 1); /* { dg-error {passing 28 to argument 2 of 'svqincb_pat_n_s32', which expects a valid 'enum svpattern' value} } */ ++ svqincb_pat_n_s32 (sw, (enum svpattern) 29, 1); ++ svqincb_pat_n_s32 (sw, (enum svpattern) 31, 1); ++ svqincb_pat_n_s32 (sw, (enum svpattern) 32, 1); /* { dg-error {passing 32 to argument 2 of 'svqincb_pat_n_s32', which expects a valid 'enum svpattern' value} } */ ++ ++ svqincb_pat_n_s32 (sw, SV_POW2, -1); /* { dg-error {passing -1 to argument 3 of 'svqincb_pat_n_s32', which expects a value in the range \[1, 16\]} } */ ++ svqincb_pat_n_s32 (sw, SV_POW2, 0); /* { dg-error {passing 0 to argument 3 of 'svqincb_pat_n_s32', which expects a value in the range \[1, 16\]} } */ ++ svqincb_pat_n_s32 (sw, SV_POW2, 1); ++ svqincb_pat_n_s32 (sw, SV_POW2, 2); ++ svqincb_pat_n_s32 (sw, SV_POW2, 16); ++ svqincb_pat_n_s32 (sw, SV_POW2, 17); /* { dg-error {passing 17 to argument 3 of 'svqincb_pat_n_s32', which expects a value in the range \[1, 16\]} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_pat_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_pat_3.c +new file mode 100644 +index 000000000..4126b2461 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_pat_3.c +@@ -0,0 +1,26 @@ ++#include ++ ++void ++test (svbool_t pg, svint8_t s8, svuint8_t u8, ++ svint16_t s16, svuint16_t u16, svint32_t s32, svuint32_t u32, ++ svint64_t s64, svuint64_t u64, int16_t sh, uint16_t uh, ++ int32_t sw, uint32_t uw, int64_t sd, uint64_t ud, ++ float f) ++{ ++ svqinch_pat (pg, SV_ALL, 1); /* { dg-error {'svqinch_pat' has no form that takes 'svbool_t' arguments} } */ ++ svqinch_pat (s8, SV_ALL, 1); /* { dg-error {'svqinch_pat' has no form that takes 'svint8_t' arguments} } */ ++ svqinch_pat (u8, SV_ALL, 1); /* { dg-error {'svqinch_pat' has no form that takes 'svuint8_t' arguments} } */ ++ svqinch_pat (s16, SV_ALL, 1); ++ svqinch_pat (u16, SV_ALL, 1); ++ svqinch_pat (s32, SV_ALL, 1); /* { dg-error {'svqinch_pat' has no form that takes 'svint32_t' arguments} } */ ++ svqinch_pat (u32, SV_ALL, 1); /* { dg-error {'svqinch_pat' has no form that takes 'svuint32_t' arguments} } */ ++ svqinch_pat (s64, SV_ALL, 1); /* { dg-error {'svqinch_pat' has no form that takes 'svint64_t' arguments} } */ ++ svqinch_pat (u64, SV_ALL, 1); /* { dg-error {'svqinch_pat' has no form that takes 'svuint64_t' arguments} } */ ++ svqinch_pat (sh, SV_ALL, 1); ++ svqinch_pat (sw, SV_ALL, 1); ++ svqinch_pat (sd, SV_ALL, 1); ++ svqinch_pat (uh, SV_ALL, 1); ++ svqinch_pat (uw, SV_ALL, 1); ++ svqinch_pat (ud, SV_ALL, 1); ++ svqinch_pat (f, SV_ALL, 1); /* { dg-error {passing 'float' to argument 1 of 'svqinch_pat', which expects a 32-bit or 64-bit integer type} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_pat_4.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_pat_4.c +new file mode 100644 +index 000000000..9aabbd714 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_pat_4.c +@@ -0,0 +1,26 @@ ++#include ++ ++void ++test (svbool_t pg, svint8_t s8, svuint8_t u8, ++ svint16_t s16, svuint16_t u16, svint32_t s32, svuint32_t u32, ++ svint64_t s64, svuint64_t u64, int16_t sh, uint16_t uh, ++ int32_t sw, uint32_t uw, int64_t sd, uint64_t ud, ++ float f) ++{ ++ svqincw_pat (pg, SV_ALL, 1); /* { dg-error {'svqincw_pat' has no form that takes 'svbool_t' arguments} } */ ++ svqincw_pat (s8, SV_ALL, 1); /* { dg-error {'svqincw_pat' has no form that takes 'svint8_t' arguments} } */ ++ svqincw_pat (u8, SV_ALL, 1); /* { dg-error {'svqincw_pat' has no form that takes 'svuint8_t' arguments} } */ ++ svqincw_pat (s16, SV_ALL, 1); /* { dg-error {'svqincw_pat' has no form that takes 'svint16_t' arguments} } */ ++ svqincw_pat (u16, SV_ALL, 1); /* { dg-error {'svqincw_pat' has no form that takes 'svuint16_t' arguments} } */ ++ svqincw_pat (s32, SV_ALL, 1); ++ svqincw_pat (u32, SV_ALL, 1); ++ svqincw_pat (s64, SV_ALL, 1); /* { dg-error {'svqincw_pat' has no form that takes 'svint64_t' arguments} } */ ++ svqincw_pat (u64, SV_ALL, 1); /* { dg-error {'svqincw_pat' has no form that takes 'svuint64_t' arguments} } */ ++ svqincw_pat (sh, SV_ALL, 1); ++ svqincw_pat (sw, SV_ALL, 1); ++ svqincw_pat (sd, SV_ALL, 1); ++ svqincw_pat (uh, SV_ALL, 1); ++ svqincw_pat (uw, SV_ALL, 1); ++ svqincw_pat (ud, SV_ALL, 1); ++ svqincw_pat (f, SV_ALL, 1); /* { dg-error {passing 'float' to argument 1 of 'svqincw_pat', which expects a 32-bit or 64-bit integer type} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_pat_5.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_pat_5.c +new file mode 100644 +index 000000000..5df88c649 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_pat_5.c +@@ -0,0 +1,26 @@ ++#include ++ ++void ++test (svbool_t pg, svint8_t s8, svuint8_t u8, ++ svint16_t s16, svuint16_t u16, svint32_t s32, svuint32_t u32, ++ svint64_t s64, svuint64_t u64, int16_t sh, uint16_t uh, ++ int32_t sw, uint32_t uw, int64_t sd, uint64_t ud, ++ float f) ++{ ++ svqincd_pat (pg, SV_ALL, 1); /* { dg-error {'svqincd_pat' has no form that takes 'svbool_t' arguments} } */ ++ svqincd_pat (s8, SV_ALL, 1); /* { dg-error {'svqincd_pat' has no form that takes 'svint8_t' arguments} } */ ++ svqincd_pat (u8, SV_ALL, 1); /* { dg-error {'svqincd_pat' has no form that takes 'svuint8_t' arguments} } */ ++ svqincd_pat (s16, SV_ALL, 1); /* { dg-error {'svqincd_pat' has no form that takes 'svint16_t' arguments} } */ ++ svqincd_pat (u16, SV_ALL, 1); /* { dg-error {'svqincd_pat' has no form that takes 'svuint16_t' arguments} } */ ++ svqincd_pat (s32, SV_ALL, 1); /* { dg-error {'svqincd_pat' has no form that takes 'svint32_t' arguments} } */ ++ svqincd_pat (u32, SV_ALL, 1); /* { dg-error {'svqincd_pat' has no form that takes 'svuint32_t' arguments} } */ ++ svqincd_pat (s64, SV_ALL, 1); ++ svqincd_pat (u64, SV_ALL, 1); ++ svqincd_pat (sh, SV_ALL, 1); ++ svqincd_pat (sw, SV_ALL, 1); ++ svqincd_pat (sd, SV_ALL, 1); ++ svqincd_pat (uh, SV_ALL, 1); ++ svqincd_pat (uw, SV_ALL, 1); ++ svqincd_pat (ud, SV_ALL, 1); ++ svqincd_pat (f, SV_ALL, 1); /* { dg-error {passing 'float' to argument 1 of 'svqincd_pat', which expects a 32-bit or 64-bit integer type} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_pred_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_pred_1.c +new file mode 100644 +index 000000000..a61afcd2d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_pred_1.c +@@ -0,0 +1,22 @@ ++#include ++ ++void ++test (svbool_t pg, svint8_t s8, svuint8_t u8, ++ svint16_t s16, svuint16_t u16, svint32_t s32, svuint32_t u32, ++ svint64_t s64, svuint64_t u64, int i) ++{ ++ svqincp (s32); /* { dg-error {too few arguments to function 'svqincp'} } */ ++ svqincp (s32, pg, pg); /* { dg-error {too many arguments to function 'svqincp'} } */ ++ svqincp (i, pg); /* { dg-error {passing 'int' to argument 1 of 'svqincp', which expects an SVE vector type} } */ ++ svqincp (pg, pg); /* { dg-error {'svqincp' has no form that takes 'svbool_t' arguments} } */ ++ svqincp (s8, pg); /* { dg-error {'svqincp' has no form that takes 'svint8_t' arguments} } */ ++ svqincp (u8, pg); /* { dg-error {'svqincp' has no form that takes 'svuint8_t' arguments} } */ ++ svqincp (s16, pg); ++ svqincp (u16, pg); ++ svqincp (s32, pg); ++ svqincp (u32, pg); ++ svqincp (s64, pg); ++ svqincp (u64, pg); ++ svqincp (u64, 0); /* { dg-error {passing 'int' to argument 2 of 'svqincp', which expects 'svbool_t'} } */ ++ svqincp (u64, u64); /* { dg-error {passing 'svuint64_t' to argument 2 of 'svqincp', which expects 'svbool_t'} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_pred_scalar_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_pred_scalar_1.c +new file mode 100644 +index 000000000..94ebe7e7a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_pred_scalar_1.c +@@ -0,0 +1,19 @@ ++#include ++ ++void ++test (svbool_t pg, svint32_t s32, svuint64_t u64, int16_t sh, uint16_t uh, ++ int32_t sw, uint32_t uw, int64_t sd, uint64_t ud) ++{ ++ svqincp_b8 (s32); /* { dg-error {too few arguments to function 'svqincp_b8'} } */ ++ svqincp_b8 (s32, pg, pg); /* { dg-error {too many arguments to function 'svqincp_b8'} } */ ++ svqincp_b8 (pg, pg); /* { dg-error {passing 'svbool_t' to argument 1 of 'svqincp_b8', which expects a 32-bit or 64-bit integer type} } */ ++ svqincp_b8 (s32, pg); /* { dg-error {passing 'svint32_t' to argument 1 of 'svqincp_b8', which expects a 32-bit or 64-bit integer type} } */ ++ svqincp_b8 (sh, pg); ++ svqincp_b8 (uh, pg); ++ svqincp_b8 (sw, pg); ++ svqincp_b8 (uw, pg); ++ svqincp_b8 (sd, pg); ++ svqincp_b8 (ud, pg); ++ svqincp_b8 (ud, 0); /* { dg-error {passing 'int' to argument 2 of 'svqincp_b8', which expects 'svbool_t'} } */ ++ svqincp_b8 (ud, u64); /* { dg-error {passing 'svuint64_t' to argument 2 of 'svqincp_b8', which expects 'svbool_t'} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ld1sh_gather_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ld1sh_gather_1.c +new file mode 100644 +index 000000000..91f37f6a5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ld1sh_gather_1.c +@@ -0,0 +1,35 @@ ++/* { dg-do compile } */ ++/* { dg-options "-std=c99 -Wpointer-sign" } */ ++ ++#include ++ ++struct s { int i; }; ++ ++void ++f1 (svbool_t pg, short *s16_ptr, unsigned short *u16_ptr, ++ svint8_t s8, svint16_t s16, ++ svint32_t s32, svuint32_t u32, svfloat32_t f32, ++ svint64_t s64, svuint64_t u64, svfloat64_t f64, struct s s) ++{ ++ svld1sh_gather_index (pg, s16_ptr, s32); /* { dg-warning {implicit declaration of function 'svld1sh_gather_index'; did you mean 'svld1_gather_index'} } */ ++ svld1sh_gather_index_u32 (pg, s16_ptr); /* { dg-error {too few arguments to function 'svld1sh_gather_index_u32'} } */ ++ svld1sh_gather_index_u32 (pg, s16_ptr, s32, 0); /* { dg-error {too many arguments to function 'svld1sh_gather_index_u32'} } */ ++ svld1sh_gather_index_u32 (pg, u16_ptr, s32); /* { dg-warning {pointer targets in passing argument 2 of 'svld1sh_gather_s32index_u32' differ in signedness} } */ ++ svld1sh_gather_index_u32 (pg, s16_ptr, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svld1sh_gather_index_u32', which expects a vector of 32-bit integers} } */ ++ svld1sh_gather_index_u32 (pg, s16_ptr, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svld1sh_gather_index_u32', which expects a vector of 32-bit integers} } */ ++ svld1sh_gather_index_u32 (pg, s16_ptr, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svld1sh_gather_index_u32', which expects a vector of 32-bit integers} } */ ++ svld1sh_gather_index_u32 (pg, s16_ptr, s32); ++ svld1sh_gather_index_u32 (pg, s16_ptr, u32); ++ svld1sh_gather_index_u32 (pg, s16_ptr, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svld1sh_gather_index_u32', which expects a vector of 32-bit integers} } */ ++ svld1sh_gather_index_u32 (pg, s16_ptr, s64); /* { dg-error {passing 'svint64_t' to argument 3 of 'svld1sh_gather_index_u32', which expects a vector of 32-bit integers} } */ ++ svld1sh_gather_index_u32 (pg, s16_ptr, u64); /* { dg-error {passing 'svuint64_t' to argument 3 of 'svld1sh_gather_index_u32', which expects a vector of 32-bit integers} } */ ++ svld1sh_gather_index_u32 (pg, s16_ptr, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svld1sh_gather_index_u32', which expects a vector of 32-bit integers} } */ ++ ++ svld1sh_gather_index_u32 (pg, 0, s32); ++ svld1sh_gather_index_u32 (pg, s, s32); /* { dg-error {'struct s' to argument 2 of 'svld1sh_gather_index_u32', which expects a vector or pointer base address} } */ ++ ++ svld1sh_gather_index_u32 (pg, pg, 0); /* { dg-error {passing 'svbool_t' to argument 2 of 'svld1sh_gather_index_u32', which expects 'svuint32_t'} } */ ++ svld1sh_gather_index_u32 (pg, s32, 0); /* { dg-error {passing 'svint32_t' to argument 2 of 'svld1sh_gather_index_u32', which expects 'svuint32_t'} } */ ++ svld1sh_gather_index_u32 (pg, u32, 0); ++ svld1sh_gather_index_u32 (pg, u64, 0); /* { dg-error {passing 'svuint64_t' to argument 2 of 'svld1sh_gather_index_u32', which expects 'svuint32_t'} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_1.c +new file mode 100644 +index 000000000..34f989bf8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_1.c +@@ -0,0 +1,23 @@ ++/* { dg-do compile } */ ++/* { dg-options "-std=c99" } */ ++ ++#include ++ ++struct s { signed char x; }; ++ ++svuint8_t ++f1 (svbool_t pg, signed char *s8_ptr, void *void_ptr, struct s *s_ptr, ++ float *f32_ptr, _Complex float *cf32_ptr, int **ptr_ptr) ++{ ++ svld1 (pg); /* { dg-error {too few arguments to function 'svld1'} } */ ++ svld1 (pg, s8_ptr, 0); /* { dg-error {too many arguments to function 'svld1'} } */ ++ svld1 (0, s8_ptr); /* { dg-error {passing 'int' to argument 1 of 'svld1', which expects 'svbool_t'} } */ ++ svld1 (pg, 0); /* { dg-error {passing 'int' to argument 2 of 'svld1', which expects a pointer type} } */ ++ svld1 (pg, (int *) 0); ++ svld1 (pg, void_ptr); /* { dg-error {passing 'void \*' to argument 2 of 'svld1', but 'void' is not a valid SVE element type} } */ ++ svld1 (pg, s_ptr); /* { dg-error {passing 'struct s \*' to argument 2 of 'svld1', but 'struct s' is not a valid SVE element type} } */ ++ svld1 (pg, f32_ptr); ++ svld1 (pg, cf32_ptr); /* { dg-error {passing '_Complex float \*' to argument 2 of 'svld1', but 'complex float' is not a valid SVE element type} } */ ++ svld1 (pg, ptr_ptr); /* { dg-error {passing 'int \*\*' to argument 2 of 'svld1', but 'int \*' is not a valid SVE element type} } */ ++ return svld1 (pg, s8_ptr); /* { dg-error {incompatible types when returning type 'svint8_t' but 'svuint8_t' was expected} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_2.c +new file mode 100644 +index 000000000..beb07f138 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_2.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile } */ ++/* { dg-options "-std=c99" } */ ++ ++#include ++ ++struct s { signed char x; }; ++ ++svuint8_t ++f1 (svbool_t pg, signed char *s8_ptr, void *void_ptr, struct s *s_ptr, ++ float *f32_ptr, _Complex float *cf32_ptr) ++{ ++ svld1_s8 (pg); /* { dg-error {too few arguments to function 'svld1_s8'} } */ ++ svld1_s8 (pg, s8_ptr, 0); /* { dg-error {too many arguments to function 'svld1_s8'} } */ ++ svld1_s8 (0, 0); /* { dg-error {incompatible type for argument 1 of 'svld1_s8'} } */ ++ svld1_s8 (pg, 0); ++ svld1_s32 (pg, (int *) 0); ++ svld1_s8 (pg, void_ptr); ++ svld1_s8 (pg, s_ptr); /* { dg-warning {passing argument 2 of 'svld1_s8' from incompatible pointer type} } */ ++ svld1_f32 (pg, f32_ptr); ++ svld1_f32 (pg, cf32_ptr); /* { dg-warning {passing argument 2 of 'svld1_f32' from incompatible pointer type} } */ ++ return svld1_s8 (pg, s8_ptr); /* { dg-error {incompatible types when returning type 'svint8_t' but 'svuint8_t' was expected} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_3.c +new file mode 100644 +index 000000000..770203f64 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_3.c +@@ -0,0 +1,18 @@ ++/* { dg-do compile } */ ++/* { dg-options "-std=c99" } */ ++ ++#include ++ ++struct s { signed char x; }; ++ ++svuint8_t ++f1 (svbool_t pg, signed char *s8_ptr, svint8_t s8) ++{ ++ svld1_vnum (pg); /* { dg-error {too few arguments to function 'svld1_vnum'} } */ ++ svld1_vnum (pg, s8_ptr); /* { dg-error {too few arguments to function 'svld1_vnum'} } */ ++ svld1_vnum (pg, s8_ptr, 0, 0); /* { dg-error {too many arguments to function 'svld1_vnum'} } */ ++ svld1_vnum (0, s8_ptr, 0); /* { dg-error {passing 'int' to argument 1 of 'svld1_vnum', which expects 'svbool_t'} } */ ++ svld1_vnum (pg, 0, 0); /* { dg-error {passing 'int' to argument 2 of 'svld1_vnum', which expects a pointer type} } */ ++ svld1_vnum (pg, s8_ptr, s8_ptr); /* { dg-warning "passing argument 3 of 'svld1_vnum_s8' makes integer from pointer without a cast" } */ ++ svld1_vnum (pg, s8_ptr, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svld1_vnum', which expects 'int64_t'} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_ext_gather_index_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_ext_gather_index_1.c +new file mode 100644 +index 000000000..91f37f6a5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_ext_gather_index_1.c +@@ -0,0 +1,35 @@ ++/* { dg-do compile } */ ++/* { dg-options "-std=c99 -Wpointer-sign" } */ ++ ++#include ++ ++struct s { int i; }; ++ ++void ++f1 (svbool_t pg, short *s16_ptr, unsigned short *u16_ptr, ++ svint8_t s8, svint16_t s16, ++ svint32_t s32, svuint32_t u32, svfloat32_t f32, ++ svint64_t s64, svuint64_t u64, svfloat64_t f64, struct s s) ++{ ++ svld1sh_gather_index (pg, s16_ptr, s32); /* { dg-warning {implicit declaration of function 'svld1sh_gather_index'; did you mean 'svld1_gather_index'} } */ ++ svld1sh_gather_index_u32 (pg, s16_ptr); /* { dg-error {too few arguments to function 'svld1sh_gather_index_u32'} } */ ++ svld1sh_gather_index_u32 (pg, s16_ptr, s32, 0); /* { dg-error {too many arguments to function 'svld1sh_gather_index_u32'} } */ ++ svld1sh_gather_index_u32 (pg, u16_ptr, s32); /* { dg-warning {pointer targets in passing argument 2 of 'svld1sh_gather_s32index_u32' differ in signedness} } */ ++ svld1sh_gather_index_u32 (pg, s16_ptr, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svld1sh_gather_index_u32', which expects a vector of 32-bit integers} } */ ++ svld1sh_gather_index_u32 (pg, s16_ptr, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svld1sh_gather_index_u32', which expects a vector of 32-bit integers} } */ ++ svld1sh_gather_index_u32 (pg, s16_ptr, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svld1sh_gather_index_u32', which expects a vector of 32-bit integers} } */ ++ svld1sh_gather_index_u32 (pg, s16_ptr, s32); ++ svld1sh_gather_index_u32 (pg, s16_ptr, u32); ++ svld1sh_gather_index_u32 (pg, s16_ptr, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svld1sh_gather_index_u32', which expects a vector of 32-bit integers} } */ ++ svld1sh_gather_index_u32 (pg, s16_ptr, s64); /* { dg-error {passing 'svint64_t' to argument 3 of 'svld1sh_gather_index_u32', which expects a vector of 32-bit integers} } */ ++ svld1sh_gather_index_u32 (pg, s16_ptr, u64); /* { dg-error {passing 'svuint64_t' to argument 3 of 'svld1sh_gather_index_u32', which expects a vector of 32-bit integers} } */ ++ svld1sh_gather_index_u32 (pg, s16_ptr, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svld1sh_gather_index_u32', which expects a vector of 32-bit integers} } */ ++ ++ svld1sh_gather_index_u32 (pg, 0, s32); ++ svld1sh_gather_index_u32 (pg, s, s32); /* { dg-error {'struct s' to argument 2 of 'svld1sh_gather_index_u32', which expects a vector or pointer base address} } */ ++ ++ svld1sh_gather_index_u32 (pg, pg, 0); /* { dg-error {passing 'svbool_t' to argument 2 of 'svld1sh_gather_index_u32', which expects 'svuint32_t'} } */ ++ svld1sh_gather_index_u32 (pg, s32, 0); /* { dg-error {passing 'svint32_t' to argument 2 of 'svld1sh_gather_index_u32', which expects 'svuint32_t'} } */ ++ svld1sh_gather_index_u32 (pg, u32, 0); ++ svld1sh_gather_index_u32 (pg, u64, 0); /* { dg-error {passing 'svuint64_t' to argument 2 of 'svld1sh_gather_index_u32', which expects 'svuint32_t'} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_ext_gather_offset_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_ext_gather_offset_1.c +new file mode 100644 +index 000000000..dae4d0ce1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_ext_gather_offset_1.c +@@ -0,0 +1,35 @@ ++/* { dg-do compile } */ ++/* { dg-options "-std=c99" } */ ++ ++#include ++ ++struct s { int i; }; ++ ++void ++f1 (svbool_t pg, signed char *s8_ptr, short *s16_ptr, ++ svint8_t s8, svint16_t s16, ++ svint32_t s32, svuint32_t u32, svfloat32_t f32, ++ svint64_t s64, svuint64_t u64, svfloat64_t f64, struct s s) ++{ ++ svld1sb_gather_offset (pg, s8_ptr, s32); /* { dg-warning {implicit declaration of function 'svld1sb_gather_offset'; did you mean 'svld1_gather_offset'} } */ ++ svld1sb_gather_offset_s32 (pg, s8_ptr); /* { dg-error {too few arguments to function 'svld1sb_gather_offset_s32'} } */ ++ svld1sb_gather_offset_s32 (pg, s8_ptr, s32, 0); /* { dg-error {too many arguments to function 'svld1sb_gather_offset_s32'} } */ ++ svld1sb_gather_offset_s32 (pg, s16_ptr, s32); /* { dg-warning {passing argument 2 of 'svld1sb_gather_s32offset_s32' from incompatible pointer type} } */ ++ svld1sb_gather_offset_s32 (pg, s8_ptr, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svld1sb_gather_offset_s32', which expects a vector of 32-bit integers} } */ ++ svld1sb_gather_offset_s32 (pg, s8_ptr, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svld1sb_gather_offset_s32', which expects a vector of 32-bit integers} } */ ++ svld1sb_gather_offset_s32 (pg, s8_ptr, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svld1sb_gather_offset_s32', which expects a vector of 32-bit integers} } */ ++ svld1sb_gather_offset_s32 (pg, s8_ptr, s32); ++ svld1sb_gather_offset_s32 (pg, s8_ptr, u32); ++ svld1sb_gather_offset_s32 (pg, s8_ptr, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svld1sb_gather_offset_s32', which expects a vector of 32-bit integers} } */ ++ svld1sb_gather_offset_s32 (pg, s8_ptr, s64); /* { dg-error {passing 'svint64_t' to argument 3 of 'svld1sb_gather_offset_s32', which expects a vector of 32-bit integers} } */ ++ svld1sb_gather_offset_s32 (pg, s8_ptr, u64); /* { dg-error {passing 'svuint64_t' to argument 3 of 'svld1sb_gather_offset_s32', which expects a vector of 32-bit integers} } */ ++ svld1sb_gather_offset_s32 (pg, s8_ptr, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svld1sb_gather_offset_s32', which expects a vector of 32-bit integers} } */ ++ ++ svld1sb_gather_offset_s32 (pg, 0, s32); ++ svld1sb_gather_offset_s32 (pg, s, s32); /* { dg-error {'struct s' to argument 2 of 'svld1sb_gather_offset_s32', which expects a vector or pointer base address} } */ ++ ++ svld1sb_gather_offset_s32 (pg, pg, 0); /* { dg-error {passing 'svbool_t' to argument 2 of 'svld1sb_gather_offset_s32', which expects 'svuint32_t'} } */ ++ svld1sb_gather_offset_s32 (pg, s32, 0); /* { dg-error {passing 'svint32_t' to argument 2 of 'svld1sb_gather_offset_s32', which expects 'svuint32_t'} } */ ++ svld1sb_gather_offset_s32 (pg, u32, 0); ++ svld1sb_gather_offset_s32 (pg, u64, 0); /* { dg-error {passing 'svuint64_t' to argument 2 of 'svld1sb_gather_offset_s32', which expects 'svuint32_t'} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_ext_gather_offset_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_ext_gather_offset_2.c +new file mode 100644 +index 000000000..1bc66977c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_ext_gather_offset_2.c +@@ -0,0 +1,35 @@ ++/* { dg-do compile } */ ++/* { dg-options "-std=c99" } */ ++ ++#include ++ ++struct s { int i; }; ++ ++void ++f1 (svbool_t pg, signed char *s8_ptr, short *s16_ptr, ++ svint8_t s8, svint16_t s16, ++ svint32_t s32, svuint32_t u32, svfloat32_t f32, ++ svint64_t s64, svuint64_t u64, svfloat64_t f64, struct s s) ++{ ++ svld1sb_gather_offset (pg, s8_ptr, s32); /* { dg-warning {implicit declaration of function 'svld1sb_gather_offset'; did you mean 'svld1_gather_offset'} } */ ++ svld1sb_gather_offset_u32 (pg, s8_ptr); /* { dg-error {too few arguments to function 'svld1sb_gather_offset_u32'} } */ ++ svld1sb_gather_offset_u32 (pg, s8_ptr, s32, 0); /* { dg-error {too many arguments to function 'svld1sb_gather_offset_u32'} } */ ++ svld1sb_gather_offset_u32 (pg, s16_ptr, s32); /* { dg-warning {passing argument 2 of 'svld1sb_gather_s32offset_u32' from incompatible pointer type} } */ ++ svld1sb_gather_offset_u32 (pg, s8_ptr, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svld1sb_gather_offset_u32', which expects a vector of 32-bit integers} } */ ++ svld1sb_gather_offset_u32 (pg, s8_ptr, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svld1sb_gather_offset_u32', which expects a vector of 32-bit integers} } */ ++ svld1sb_gather_offset_u32 (pg, s8_ptr, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svld1sb_gather_offset_u32', which expects a vector of 32-bit integers} } */ ++ svld1sb_gather_offset_u32 (pg, s8_ptr, s32); ++ svld1sb_gather_offset_u32 (pg, s8_ptr, u32); ++ svld1sb_gather_offset_u32 (pg, s8_ptr, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svld1sb_gather_offset_u32', which expects a vector of 32-bit integers} } */ ++ svld1sb_gather_offset_u32 (pg, s8_ptr, s64); /* { dg-error {passing 'svint64_t' to argument 3 of 'svld1sb_gather_offset_u32', which expects a vector of 32-bit integers} } */ ++ svld1sb_gather_offset_u32 (pg, s8_ptr, u64); /* { dg-error {passing 'svuint64_t' to argument 3 of 'svld1sb_gather_offset_u32', which expects a vector of 32-bit integers} } */ ++ svld1sb_gather_offset_u32 (pg, s8_ptr, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svld1sb_gather_offset_u32', which expects a vector of 32-bit integers} } */ ++ ++ svld1sb_gather_offset_u32 (pg, 0, s32); ++ svld1sb_gather_offset_u32 (pg, s, s32); /* { dg-error {'struct s' to argument 2 of 'svld1sb_gather_offset_u32', which expects a vector or pointer base address} } */ ++ ++ svld1sb_gather_offset_u32 (pg, pg, 0); /* { dg-error {passing 'svbool_t' to argument 2 of 'svld1sb_gather_offset_u32', which expects 'svuint32_t'} } */ ++ svld1sb_gather_offset_u32 (pg, s32, 0); /* { dg-error {passing 'svint32_t' to argument 2 of 'svld1sb_gather_offset_u32', which expects 'svuint32_t'} } */ ++ svld1sb_gather_offset_u32 (pg, u32, 0); ++ svld1sb_gather_offset_u32 (pg, u64, 0); /* { dg-error {passing 'svuint64_t' to argument 2 of 'svld1sb_gather_offset_u32', which expects 'svuint32_t'} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_ext_gather_offset_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_ext_gather_offset_3.c +new file mode 100644 +index 000000000..6522889db +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_ext_gather_offset_3.c +@@ -0,0 +1,35 @@ ++/* { dg-do compile } */ ++/* { dg-options "-std=c99" } */ ++ ++#include ++ ++struct s { int i; }; ++ ++void ++f1 (svbool_t pg, signed char *s8_ptr, short *s16_ptr, ++ svint8_t s8, svint16_t s16, ++ svint32_t s32, svuint32_t u32, svfloat32_t f32, ++ svint64_t s64, svuint64_t u64, svfloat64_t f64, struct s s) ++{ ++ svld1sb_gather_offset (pg, s8_ptr, s64); /* { dg-warning {implicit declaration of function 'svld1sb_gather_offset'; did you mean 'svld1_gather_offset'} } */ ++ svld1sb_gather_offset_s64 (pg, s8_ptr); /* { dg-error {too few arguments to function 'svld1sb_gather_offset_s64'} } */ ++ svld1sb_gather_offset_s64 (pg, s8_ptr, s64, 0); /* { dg-error {too many arguments to function 'svld1sb_gather_offset_s64'} } */ ++ svld1sb_gather_offset_s64 (pg, s16_ptr, s64); /* { dg-warning {passing argument 2 of 'svld1sb_gather_s64offset_s64' from incompatible pointer type} } */ ++ svld1sb_gather_offset_s64 (pg, s8_ptr, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svld1sb_gather_offset_s64', which expects a vector of 64-bit integers} } */ ++ svld1sb_gather_offset_s64 (pg, s8_ptr, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svld1sb_gather_offset_s64', which expects a vector of 64-bit integers} } */ ++ svld1sb_gather_offset_s64 (pg, s8_ptr, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svld1sb_gather_offset_s64', which expects a vector of 64-bit integers} } */ ++ svld1sb_gather_offset_s64 (pg, s8_ptr, s32); /* { dg-error {passing 'svint32_t' to argument 3 of 'svld1sb_gather_offset_s64', which expects a vector of 64-bit integers} } */ ++ svld1sb_gather_offset_s64 (pg, s8_ptr, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svld1sb_gather_offset_s64', which expects a vector of 64-bit integers} } */ ++ svld1sb_gather_offset_s64 (pg, s8_ptr, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svld1sb_gather_offset_s64', which expects a vector of 64-bit integers} } */ ++ svld1sb_gather_offset_s64 (pg, s8_ptr, s64); ++ svld1sb_gather_offset_s64 (pg, s8_ptr, u64); ++ svld1sb_gather_offset_s64 (pg, s8_ptr, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svld1sb_gather_offset_s64', which expects a vector of 64-bit integers} } */ ++ ++ svld1sb_gather_offset_s64 (pg, 0, s64); ++ svld1sb_gather_offset_s64 (pg, s, s64); /* { dg-error {'struct s' to argument 2 of 'svld1sb_gather_offset_s64', which expects a vector or pointer base address} } */ ++ ++ svld1sb_gather_offset_s64 (pg, pg, 0); /* { dg-error {passing 'svbool_t' to argument 2 of 'svld1sb_gather_offset_s64', which expects 'svuint64_t'} } */ ++ svld1sb_gather_offset_s64 (pg, s32, 0); /* { dg-error {passing 'svint32_t' to argument 2 of 'svld1sb_gather_offset_s64', which expects 'svuint64_t'} } */ ++ svld1sb_gather_offset_s64 (pg, u32, 0); /* { dg-error {passing 'svuint32_t' to argument 2 of 'svld1sb_gather_offset_s64', which expects 'svuint64_t'} } */ ++ svld1sb_gather_offset_s64 (pg, u64, 0); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_ext_gather_offset_4.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_ext_gather_offset_4.c +new file mode 100644 +index 000000000..025621989 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_ext_gather_offset_4.c +@@ -0,0 +1,35 @@ ++/* { dg-do compile } */ ++/* { dg-options "-std=c99" } */ ++ ++#include ++ ++struct s { int i; }; ++ ++void ++f1 (svbool_t pg, signed char *s8_ptr, short *s16_ptr, ++ svint8_t s8, svint16_t s16, ++ svint32_t s32, svuint32_t u32, svfloat32_t f32, ++ svint64_t s64, svuint64_t u64, svfloat64_t f64, struct s s) ++{ ++ svld1sb_gather_offset (pg, s8_ptr, s64); /* { dg-warning {implicit declaration of function 'svld1sb_gather_offset'; did you mean 'svld1_gather_offset'} } */ ++ svld1sb_gather_offset_u64 (pg, s8_ptr); /* { dg-error {too few arguments to function 'svld1sb_gather_offset_u64'} } */ ++ svld1sb_gather_offset_u64 (pg, s8_ptr, s64, 0); /* { dg-error {too many arguments to function 'svld1sb_gather_offset_u64'} } */ ++ svld1sb_gather_offset_u64 (pg, s16_ptr, s64); /* { dg-warning {passing argument 2 of 'svld1sb_gather_s64offset_u64' from incompatible pointer type} } */ ++ svld1sb_gather_offset_u64 (pg, s8_ptr, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svld1sb_gather_offset_u64', which expects a vector of 64-bit integers} } */ ++ svld1sb_gather_offset_u64 (pg, s8_ptr, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svld1sb_gather_offset_u64', which expects a vector of 64-bit integers} } */ ++ svld1sb_gather_offset_u64 (pg, s8_ptr, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svld1sb_gather_offset_u64', which expects a vector of 64-bit integers} } */ ++ svld1sb_gather_offset_u64 (pg, s8_ptr, s32); /* { dg-error {passing 'svint32_t' to argument 3 of 'svld1sb_gather_offset_u64', which expects a vector of 64-bit integers} } */ ++ svld1sb_gather_offset_u64 (pg, s8_ptr, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svld1sb_gather_offset_u64', which expects a vector of 64-bit integers} } */ ++ svld1sb_gather_offset_u64 (pg, s8_ptr, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svld1sb_gather_offset_u64', which expects a vector of 64-bit integers} } */ ++ svld1sb_gather_offset_u64 (pg, s8_ptr, s64); ++ svld1sb_gather_offset_u64 (pg, s8_ptr, u64); ++ svld1sb_gather_offset_u64 (pg, s8_ptr, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svld1sb_gather_offset_u64', which expects a vector of 64-bit integers} } */ ++ ++ svld1sb_gather_offset_u64 (pg, 0, s64); ++ svld1sb_gather_offset_u64 (pg, s, s64); /* { dg-error {'struct s' to argument 2 of 'svld1sb_gather_offset_u64', which expects a vector or pointer base address} } */ ++ ++ svld1sb_gather_offset_u64 (pg, pg, 0); /* { dg-error {passing 'svbool_t' to argument 2 of 'svld1sb_gather_offset_u64', which expects 'svuint64_t'} } */ ++ svld1sb_gather_offset_u64 (pg, s32, 0); /* { dg-error {passing 'svint32_t' to argument 2 of 'svld1sb_gather_offset_u64', which expects 'svuint64_t'} } */ ++ svld1sb_gather_offset_u64 (pg, u32, 0); /* { dg-error {passing 'svuint32_t' to argument 2 of 'svld1sb_gather_offset_u64', which expects 'svuint64_t'} } */ ++ svld1sb_gather_offset_u64 (pg, u64, 0); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_ext_gather_offset_5.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_ext_gather_offset_5.c +new file mode 100644 +index 000000000..8d57aa020 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_ext_gather_offset_5.c +@@ -0,0 +1,35 @@ ++/* { dg-do compile } */ ++/* { dg-options "-std=c99" } */ ++ ++#include ++ ++struct s { int i; }; ++ ++void ++f1 (svbool_t pg, unsigned char *s8_ptr, unsigned short *s16_ptr, ++ svint8_t s8, svint16_t s16, ++ svint32_t s32, svuint32_t u32, svfloat32_t f32, ++ svint64_t s64, svuint64_t u64, svfloat64_t f64, struct s s) ++{ ++ svld1ub_gather_offset (pg, s8_ptr, s32); /* { dg-warning {implicit declaration of function 'svld1ub_gather_offset'; did you mean 'svld1_gather_offset'} } */ ++ svld1ub_gather_offset_s32 (pg, s8_ptr); /* { dg-error {too few arguments to function 'svld1ub_gather_offset_s32'} } */ ++ svld1ub_gather_offset_s32 (pg, s8_ptr, s32, 0); /* { dg-error {too many arguments to function 'svld1ub_gather_offset_s32'} } */ ++ svld1ub_gather_offset_s32 (pg, s16_ptr, s32); /* { dg-warning {passing argument 2 of 'svld1ub_gather_s32offset_s32' from incompatible pointer type} } */ ++ svld1ub_gather_offset_s32 (pg, s8_ptr, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svld1ub_gather_offset_s32', which expects a vector of 32-bit integers} } */ ++ svld1ub_gather_offset_s32 (pg, s8_ptr, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svld1ub_gather_offset_s32', which expects a vector of 32-bit integers} } */ ++ svld1ub_gather_offset_s32 (pg, s8_ptr, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svld1ub_gather_offset_s32', which expects a vector of 32-bit integers} } */ ++ svld1ub_gather_offset_s32 (pg, s8_ptr, s32); ++ svld1ub_gather_offset_s32 (pg, s8_ptr, u32); ++ svld1ub_gather_offset_s32 (pg, s8_ptr, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svld1ub_gather_offset_s32', which expects a vector of 32-bit integers} } */ ++ svld1ub_gather_offset_s32 (pg, s8_ptr, s64); /* { dg-error {passing 'svint64_t' to argument 3 of 'svld1ub_gather_offset_s32', which expects a vector of 32-bit integers} } */ ++ svld1ub_gather_offset_s32 (pg, s8_ptr, u64); /* { dg-error {passing 'svuint64_t' to argument 3 of 'svld1ub_gather_offset_s32', which expects a vector of 32-bit integers} } */ ++ svld1ub_gather_offset_s32 (pg, s8_ptr, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svld1ub_gather_offset_s32', which expects a vector of 32-bit integers} } */ ++ ++ svld1ub_gather_offset_s32 (pg, 0, s32); ++ svld1ub_gather_offset_s32 (pg, s, s32); /* { dg-error {'struct s' to argument 2 of 'svld1ub_gather_offset_s32', which expects a vector or pointer base address} } */ ++ ++ svld1ub_gather_offset_s32 (pg, pg, 0); /* { dg-error {passing 'svbool_t' to argument 2 of 'svld1ub_gather_offset_s32', which expects 'svuint32_t'} } */ ++ svld1ub_gather_offset_s32 (pg, s32, 0); /* { dg-error {passing 'svint32_t' to argument 2 of 'svld1ub_gather_offset_s32', which expects 'svuint32_t'} } */ ++ svld1ub_gather_offset_s32 (pg, u32, 0); ++ svld1ub_gather_offset_s32 (pg, u64, 0); /* { dg-error {passing 'svuint64_t' to argument 2 of 'svld1ub_gather_offset_s32', which expects 'svuint32_t'} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_gather_sv_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_gather_sv_1.c +new file mode 100644 +index 000000000..21566a9d9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_gather_sv_1.c +@@ -0,0 +1,80 @@ ++/* { dg-do compile } */ ++/* { dg-options "-std=c99" } */ ++ ++#include ++ ++struct s { signed char x; }; ++ ++svuint32_t ++f1 (svbool_t pg, signed char *s8_ptr, short *s16_ptr, ++ int32_t *s32_ptr, uint32_t *u32_ptr, float *f32_ptr, ++ int64_t *s64_ptr, uint64_t *u64_ptr, double *f64_ptr, ++ void *void_ptr, struct s *s_ptr, _Complex float *cf32_ptr, int **ptr_ptr, ++ svint8_t s8, svint16_t s16, ++ svint32_t s32, svuint32_t u32, svfloat32_t f32, ++ svint64_t s64, svuint64_t u64, svfloat64_t f64) ++{ ++ svld1_gather_offset (pg, s32_ptr); /* { dg-error {too few arguments to function 'svld1_gather_offset'} } */ ++ svld1_gather_offset (pg, s32_ptr, s32, 0); /* { dg-error {too many arguments to function 'svld1_gather_offset'} } */ ++ svld1_gather_offset (0, s32_ptr, s32); /* { dg-error {passing 'int' to argument 1 of 'svld1_gather_offset', which expects 'svbool_t'} } */ ++ svld1_gather_offset (pg, 0, s32); /* { dg-error {passing 'int' to argument 2 of 'svld1_gather_offset', which expects a pointer type} } */ ++ svld1_gather_offset (pg, (int *) 0, s32); ++ svld1_gather_offset (pg, void_ptr, s32); /* { dg-error {passing 'void \*' to argument 2 of 'svld1_gather_offset', but 'void' is not a valid SVE element type} } */ ++ svld1_gather_offset (pg, s_ptr, s32); /* { dg-error {passing 'struct s \*' to argument 2 of 'svld1_gather_offset', but 'struct s' is not a valid SVE element type} } */ ++ svld1_gather_offset (pg, f32_ptr, s32); ++ svld1_gather_offset (pg, cf32_ptr, s32); /* { dg-error {passing '_Complex float \*' to argument 2 of 'svld1_gather_offset', but 'complex float' is not a valid SVE element type} } */ ++ svld1_gather_offset (pg, ptr_ptr, u64); /* { dg-error {passing 'int \*\*' to argument 2 of 'svld1_gather_offset', but 'int \*' is not a valid SVE element type} } */ ++ svld1_gather_offset (pg, u32, 0); /* { dg-error {passing 'svuint32_t' to argument 2 of 'svld1_gather_offset', which expects a pointer type} } */ ++ /* { dg-message {an explicit type suffix is needed when using a vector of base addresses} "" { target *-*-* } .-1 } */ ++ svld1_gather_offset (pg, u64, 0); /* { dg-error {passing 'svuint64_t' to argument 2 of 'svld1_gather_offset', which expects a pointer type} } */ ++ /* { dg-message {an explicit type suffix is needed when using a vector of base addresses} "" { target *-*-* } .-1 } */ ++ ++ svld1_gather_offset (pg, s8_ptr, s8); /* { dg-error {passing 'signed char \*' to argument 2 of 'svld1_gather_offset', which expects a pointer to 32-bit or 64-bit elements} } */ ++ svld1_gather_offset (pg, s8_ptr, s32); /* { dg-error {passing 'signed char \*' to argument 2 of 'svld1_gather_offset', which expects a pointer to 32-bit or 64-bit elements} } */ ++ svld1_gather_offset (pg, s16_ptr, s16); /* { dg-error {passing 'short( int)? \*' to argument 2 of 'svld1_gather_offset', which expects a pointer to 32-bit or 64-bit elements} } */ ++ svld1_gather_offset (pg, s16_ptr, s32); /* { dg-error {passing 'short( int)? \*' to argument 2 of 'svld1_gather_offset', which expects a pointer to 32-bit or 64-bit elements} } */ ++ ++ svld1_gather_offset (pg, s32_ptr, s32); ++ svld1_gather_offset (pg, s32_ptr, u32); ++ svld1_gather_offset (pg, s32_ptr, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svld1_gather_offset', which when loading 'svint32_t' expects a vector of 32-bit integers} } */ ++ svld1_gather_offset (pg, s32_ptr, s64); /* { dg-error {passing 'svint64_t' to argument 3 of 'svld1_gather_offset', which when loading 'svint32_t' expects a vector of 32-bit integers} } */ ++ svld1_gather_offset (pg, s32_ptr, u64); /* { dg-error {passing 'svuint64_t' to argument 3 of 'svld1_gather_offset', which when loading 'svint32_t' expects a vector of 32-bit integers} } */ ++ svld1_gather_offset (pg, s32_ptr, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svld1_gather_offset', which when loading 'svint32_t' expects a vector of 32-bit integers} } */ ++ ++ svld1_gather_offset (pg, u32_ptr, s32); ++ svld1_gather_offset (pg, u32_ptr, u32); ++ svld1_gather_offset (pg, u32_ptr, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svld1_gather_offset', which when loading 'svuint32_t' expects a vector of 32-bit integers} } */ ++ svld1_gather_offset (pg, u32_ptr, s64); /* { dg-error {passing 'svint64_t' to argument 3 of 'svld1_gather_offset', which when loading 'svuint32_t' expects a vector of 32-bit integers} } */ ++ svld1_gather_offset (pg, u32_ptr, u64); /* { dg-error {passing 'svuint64_t' to argument 3 of 'svld1_gather_offset', which when loading 'svuint32_t' expects a vector of 32-bit integers} } */ ++ svld1_gather_offset (pg, u32_ptr, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svld1_gather_offset', which when loading 'svuint32_t' expects a vector of 32-bit integers} } */ ++ ++ svld1_gather_offset (pg, f32_ptr, s32); ++ svld1_gather_offset (pg, f32_ptr, u32); ++ svld1_gather_offset (pg, f32_ptr, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svld1_gather_offset', which when loading 'svfloat32_t' expects a vector of 32-bit integers} } */ ++ svld1_gather_offset (pg, f32_ptr, s64); /* { dg-error {passing 'svint64_t' to argument 3 of 'svld1_gather_offset', which when loading 'svfloat32_t' expects a vector of 32-bit integers} } */ ++ svld1_gather_offset (pg, f32_ptr, u64); /* { dg-error {passing 'svuint64_t' to argument 3 of 'svld1_gather_offset', which when loading 'svfloat32_t' expects a vector of 32-bit integers} } */ ++ svld1_gather_offset (pg, f32_ptr, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svld1_gather_offset', which when loading 'svfloat32_t' expects a vector of 32-bit integers} } */ ++ ++ svld1_gather_offset (pg, s64_ptr, s32); /* { dg-error {passing 'svint32_t' to argument 3 of 'svld1_gather_offset', which when loading 'svint64_t' expects a vector of 64-bit integers} } */ ++ svld1_gather_offset (pg, s64_ptr, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svld1_gather_offset', which when loading 'svint64_t' expects a vector of 64-bit integers} } */ ++ svld1_gather_offset (pg, s64_ptr, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svld1_gather_offset', which when loading 'svint64_t' expects a vector of 64-bit integers} } */ ++ svld1_gather_offset (pg, s64_ptr, s64); ++ svld1_gather_offset (pg, s64_ptr, u64); ++ svld1_gather_offset (pg, s64_ptr, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svld1_gather_offset', which when loading 'svint64_t' expects a vector of 64-bit integers} } */ ++ ++ svld1_gather_offset (pg, u64_ptr, s32); /* { dg-error {passing 'svint32_t' to argument 3 of 'svld1_gather_offset', which when loading 'svuint64_t' expects a vector of 64-bit integers} } */ ++ svld1_gather_offset (pg, u64_ptr, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svld1_gather_offset', which when loading 'svuint64_t' expects a vector of 64-bit integers} } */ ++ svld1_gather_offset (pg, u64_ptr, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svld1_gather_offset', which when loading 'svuint64_t' expects a vector of 64-bit integers} } */ ++ svld1_gather_offset (pg, u64_ptr, s64); ++ svld1_gather_offset (pg, u64_ptr, u64); ++ svld1_gather_offset (pg, u64_ptr, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svld1_gather_offset', which when loading 'svuint64_t' expects a vector of 64-bit integers} } */ ++ ++ svld1_gather_offset (pg, f64_ptr, s32); /* { dg-error {passing 'svint32_t' to argument 3 of 'svld1_gather_offset', which when loading 'svfloat64_t' expects a vector of 64-bit integers} } */ ++ svld1_gather_offset (pg, f64_ptr, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svld1_gather_offset', which when loading 'svfloat64_t' expects a vector of 64-bit integers} } */ ++ svld1_gather_offset (pg, f64_ptr, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svld1_gather_offset', which when loading 'svfloat64_t' expects a vector of 64-bit integers} } */ ++ svld1_gather_offset (pg, f64_ptr, s64); ++ svld1_gather_offset (pg, f64_ptr, u64); ++ svld1_gather_offset (pg, f64_ptr, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svld1_gather_offset', which when loading 'svfloat64_t' expects a vector of 64-bit integers} } */ ++ ++ return svld1_gather_offset (pg, s32_ptr, s32); /* { dg-error {incompatible types when returning type 'svint32_t' but 'svuint32_t' was expected} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_gather_sv_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_gather_sv_2.c +new file mode 100644 +index 000000000..4c15fc40c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_gather_sv_2.c +@@ -0,0 +1,80 @@ ++/* { dg-do compile } */ ++/* { dg-options "-std=c99" } */ ++ ++#include ++ ++struct s { signed char x; }; ++ ++svuint32_t ++f1 (svbool_t pg, signed char *s8_ptr, short *s16_ptr, ++ int32_t *s32_ptr, uint32_t *u32_ptr, float *f32_ptr, ++ int64_t *s64_ptr, uint64_t *u64_ptr, double *f64_ptr, ++ void *void_ptr, struct s *s_ptr, _Complex float *cf32_ptr, int **ptr_ptr, ++ svint8_t s8, svint16_t s16, ++ svint32_t s32, svuint32_t u32, svfloat32_t f32, ++ svint64_t s64, svuint64_t u64, svfloat64_t f64) ++{ ++ svld1_gather_index (pg, s32_ptr); /* { dg-error {too few arguments to function 'svld1_gather_index'} } */ ++ svld1_gather_index (pg, s32_ptr, s32, 0); /* { dg-error {too many arguments to function 'svld1_gather_index'} } */ ++ svld1_gather_index (0, s32_ptr, s32); /* { dg-error {passing 'int' to argument 1 of 'svld1_gather_index', which expects 'svbool_t'} } */ ++ svld1_gather_index (pg, 0, s32); /* { dg-error {passing 'int' to argument 2 of 'svld1_gather_index', which expects a pointer type} } */ ++ svld1_gather_index (pg, (int *) 0, s32); ++ svld1_gather_index (pg, void_ptr, s32); /* { dg-error {passing 'void \*' to argument 2 of 'svld1_gather_index', but 'void' is not a valid SVE element type} } */ ++ svld1_gather_index (pg, s_ptr, s32); /* { dg-error {passing 'struct s \*' to argument 2 of 'svld1_gather_index', but 'struct s' is not a valid SVE element type} } */ ++ svld1_gather_index (pg, f32_ptr, s32); ++ svld1_gather_index (pg, cf32_ptr, s32); /* { dg-error {passing '_Complex float \*' to argument 2 of 'svld1_gather_index', but 'complex float' is not a valid SVE element type} } */ ++ svld1_gather_index (pg, ptr_ptr, u64); /* { dg-error {passing 'int \*\*' to argument 2 of 'svld1_gather_index', but 'int \*' is not a valid SVE element type} } */ ++ svld1_gather_index (pg, u32, 0); /* { dg-error {passing 'svuint32_t' to argument 2 of 'svld1_gather_index', which expects a pointer type} } */ ++ /* { dg-message {an explicit type suffix is needed when using a vector of base addresses} "" { target *-*-* } .-1 } */ ++ svld1_gather_index (pg, u64, 0); /* { dg-error {passing 'svuint64_t' to argument 2 of 'svld1_gather_index', which expects a pointer type} } */ ++ /* { dg-message {an explicit type suffix is needed when using a vector of base addresses} "" { target *-*-* } .-1 } */ ++ ++ svld1_gather_index (pg, s8_ptr, s8); /* { dg-error {passing 'signed char \*' to argument 2 of 'svld1_gather_index', which expects a pointer to 32-bit or 64-bit elements} } */ ++ svld1_gather_index (pg, s8_ptr, s32); /* { dg-error {passing 'signed char \*' to argument 2 of 'svld1_gather_index', which expects a pointer to 32-bit or 64-bit elements} } */ ++ svld1_gather_index (pg, s16_ptr, s16); /* { dg-error {passing 'short( int)? \*' to argument 2 of 'svld1_gather_index', which expects a pointer to 32-bit or 64-bit elements} } */ ++ svld1_gather_index (pg, s16_ptr, s32); /* { dg-error {passing 'short( int)? \*' to argument 2 of 'svld1_gather_index', which expects a pointer to 32-bit or 64-bit elements} } */ ++ ++ svld1_gather_index (pg, s32_ptr, s32); ++ svld1_gather_index (pg, s32_ptr, u32); ++ svld1_gather_index (pg, s32_ptr, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svld1_gather_index', which when loading 'svint32_t' expects a vector of 32-bit integers} } */ ++ svld1_gather_index (pg, s32_ptr, s64); /* { dg-error {passing 'svint64_t' to argument 3 of 'svld1_gather_index', which when loading 'svint32_t' expects a vector of 32-bit integers} } */ ++ svld1_gather_index (pg, s32_ptr, u64); /* { dg-error {passing 'svuint64_t' to argument 3 of 'svld1_gather_index', which when loading 'svint32_t' expects a vector of 32-bit integers} } */ ++ svld1_gather_index (pg, s32_ptr, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svld1_gather_index', which when loading 'svint32_t' expects a vector of 32-bit integers} } */ ++ ++ svld1_gather_index (pg, u32_ptr, s32); ++ svld1_gather_index (pg, u32_ptr, u32); ++ svld1_gather_index (pg, u32_ptr, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svld1_gather_index', which when loading 'svuint32_t' expects a vector of 32-bit integers} } */ ++ svld1_gather_index (pg, u32_ptr, s64); /* { dg-error {passing 'svint64_t' to argument 3 of 'svld1_gather_index', which when loading 'svuint32_t' expects a vector of 32-bit integers} } */ ++ svld1_gather_index (pg, u32_ptr, u64); /* { dg-error {passing 'svuint64_t' to argument 3 of 'svld1_gather_index', which when loading 'svuint32_t' expects a vector of 32-bit integers} } */ ++ svld1_gather_index (pg, u32_ptr, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svld1_gather_index', which when loading 'svuint32_t' expects a vector of 32-bit integers} } */ ++ ++ svld1_gather_index (pg, f32_ptr, s32); ++ svld1_gather_index (pg, f32_ptr, u32); ++ svld1_gather_index (pg, f32_ptr, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svld1_gather_index', which when loading 'svfloat32_t' expects a vector of 32-bit integers} } */ ++ svld1_gather_index (pg, f32_ptr, s64); /* { dg-error {passing 'svint64_t' to argument 3 of 'svld1_gather_index', which when loading 'svfloat32_t' expects a vector of 32-bit integers} } */ ++ svld1_gather_index (pg, f32_ptr, u64); /* { dg-error {passing 'svuint64_t' to argument 3 of 'svld1_gather_index', which when loading 'svfloat32_t' expects a vector of 32-bit integers} } */ ++ svld1_gather_index (pg, f32_ptr, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svld1_gather_index', which when loading 'svfloat32_t' expects a vector of 32-bit integers} } */ ++ ++ svld1_gather_index (pg, s64_ptr, s32); /* { dg-error {passing 'svint32_t' to argument 3 of 'svld1_gather_index', which when loading 'svint64_t' expects a vector of 64-bit integers} } */ ++ svld1_gather_index (pg, s64_ptr, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svld1_gather_index', which when loading 'svint64_t' expects a vector of 64-bit integers} } */ ++ svld1_gather_index (pg, s64_ptr, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svld1_gather_index', which when loading 'svint64_t' expects a vector of 64-bit integers} } */ ++ svld1_gather_index (pg, s64_ptr, s64); ++ svld1_gather_index (pg, s64_ptr, u64); ++ svld1_gather_index (pg, s64_ptr, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svld1_gather_index', which when loading 'svint64_t' expects a vector of 64-bit integers} } */ ++ ++ svld1_gather_index (pg, u64_ptr, s32); /* { dg-error {passing 'svint32_t' to argument 3 of 'svld1_gather_index', which when loading 'svuint64_t' expects a vector of 64-bit integers} } */ ++ svld1_gather_index (pg, u64_ptr, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svld1_gather_index', which when loading 'svuint64_t' expects a vector of 64-bit integers} } */ ++ svld1_gather_index (pg, u64_ptr, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svld1_gather_index', which when loading 'svuint64_t' expects a vector of 64-bit integers} } */ ++ svld1_gather_index (pg, u64_ptr, s64); ++ svld1_gather_index (pg, u64_ptr, u64); ++ svld1_gather_index (pg, u64_ptr, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svld1_gather_index', which when loading 'svuint64_t' expects a vector of 64-bit integers} } */ ++ ++ svld1_gather_index (pg, f64_ptr, s32); /* { dg-error {passing 'svint32_t' to argument 3 of 'svld1_gather_index', which when loading 'svfloat64_t' expects a vector of 64-bit integers} } */ ++ svld1_gather_index (pg, f64_ptr, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svld1_gather_index', which when loading 'svfloat64_t' expects a vector of 64-bit integers} } */ ++ svld1_gather_index (pg, f64_ptr, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svld1_gather_index', which when loading 'svfloat64_t' expects a vector of 64-bit integers} } */ ++ svld1_gather_index (pg, f64_ptr, s64); ++ svld1_gather_index (pg, f64_ptr, u64); ++ svld1_gather_index (pg, f64_ptr, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svld1_gather_index', which when loading 'svfloat64_t' expects a vector of 64-bit integers} } */ ++ ++ return svld1_gather_index (pg, s32_ptr, s32); /* { dg-error {incompatible types when returning type 'svint32_t' but 'svuint32_t' was expected} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_replicate_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_replicate_1.c +new file mode 100644 +index 000000000..d4ff76ea8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_replicate_1.c +@@ -0,0 +1,23 @@ ++/* { dg-do compile } */ ++/* { dg-options "-std=c99" } */ ++ ++#include ++ ++struct s { signed char x; }; ++ ++svuint8_t ++f1 (svbool_t pg, signed char *s8_ptr, void *void_ptr, struct s *s_ptr, ++ float *f32_ptr, _Complex float *cf32_ptr, int **ptr_ptr) ++{ ++ svld1rq (pg); /* { dg-error {too few arguments to function 'svld1rq'} } */ ++ svld1rq (pg, s8_ptr, 0); /* { dg-error {too many arguments to function 'svld1rq'} } */ ++ svld1rq (0, s8_ptr); /* { dg-error {passing 'int' to argument 1 of 'svld1rq', which expects 'svbool_t'} } */ ++ svld1rq (pg, 0); /* { dg-error {passing 'int' to argument 2 of 'svld1rq', which expects a pointer type} } */ ++ svld1rq (pg, (int *) 0); ++ svld1rq (pg, void_ptr); /* { dg-error {passing 'void \*' to argument 2 of 'svld1rq', but 'void' is not a valid SVE element type} } */ ++ svld1rq (pg, s_ptr); /* { dg-error {passing 'struct s \*' to argument 2 of 'svld1rq', but 'struct s' is not a valid SVE element type} } */ ++ svld1rq (pg, f32_ptr); ++ svld1rq (pg, cf32_ptr); /* { dg-error {passing '_Complex float \*' to argument 2 of 'svld1rq', but 'complex float' is not a valid SVE element type} } */ ++ svld1rq (pg, ptr_ptr); /* { dg-error {passing 'int \*\*' to argument 2 of 'svld1rq', but 'int \*' is not a valid SVE element type} } */ ++ return svld1rq (pg, s8_ptr); /* { dg-error {incompatible types when returning type 'svint8_t' but 'svuint8_t' was expected} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_1.c +new file mode 100644 +index 000000000..5b0b00e96 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_1.c +@@ -0,0 +1,58 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-march=armv8.2-a+sve+i8mm+f32mm+f64mm" } */ ++ ++#include ++ ++svuint32_t ++f1 (svint32_t s32, svuint8_t u8, svint8_t s8, svuint32_t u32) ++{ ++ svmmla_s32 (s32); /* { dg-error {too few arguments to function 'svmmla_s32'} } */ ++ svmmla_s32 (s32, s8, s8, u32); /* { dg-error {too many arguments to function 'svmmla_s32'} } */ ++ svmmla_s32 (s32, u32, s8); /* { dg-error {incompatible type for argument 2 of 'svmmla_s32'} } */ ++ svmmla_s32 (s32, u8, s8); /* { dg-error {incompatible type for argument 2 of 'svmmla_s32'} } */ ++ svmmla_s32 (s32, s8, u8); /* { dg-error {incompatible type for argument 3 of 'svmmla_s32'} } */ ++ svmmla_s32 (s32, s8, s32); /* { dg-error {incompatible type for argument 3 of 'svmmla_s32'} } */ ++ svmmla_s32 (s32, s8, 0); /* { dg-error {incompatible type for argument 3 of 'svmmla_s32'} } */ ++ svmmla_s32 (s32, s8, s8); ++ return svmmla_s32 (s32, s8, s8); /* { dg-error {incompatible types when returning type 'svint32_t' but 'svuint32_t' was expected} } */ ++} ++ ++void ++f2 (svbool_t pg, svint8_t s8, svuint8_t u8, svuint32_t u32, svint32_t s32, ++ svfloat16_t f16, svfloat32_t f32, svfloat64_t f64) ++{ ++ svmmla (s32, s8); /* { dg-error {too few arguments to function 'svmmla'} } */ ++ svmmla (s32, s8, s8, s8); /* { dg-error {too many arguments to function 'svmmla'} } */ ++ svmmla (0, s8, s8); /* { dg-error {passing 'int' to argument 1 of 'svmmla', which expects an SVE vector type} } */ ++ svmmla (pg, s8, s8); /* { dg-error {'svmmla' has no form that takes 'svbool_t' arguments} } */ ++ svmmla (u8, s8, s8); /* { dg-error {'svmmla' has no form that takes 'svuint8_t' arguments} } */ ++ ++ svmmla (s32, 0, s8); /* { dg-error {passing 'int' to argument 2 of 'svmmla', which expects an SVE vector type} } */ ++ svmmla (s32, u8, s8); /* { dg-error {arguments 1 and 2 of 'svmmla' must have the same signedness, but the values passed here have type 'svint32_t' and 'svuint8_t' respectively} } */ ++ svmmla (s32, s8, u8); /* { dg-error {arguments 1 and 3 of 'svmmla' must have the same signedness, but the values passed here have type 'svint32_t' and 'svuint8_t' respectively} } */ ++ svmmla (s32, s8, 0); /* { dg-error {passing 'int' to argument 3 of 'svmmla', which expects an SVE vector type} } */ ++ svmmla (s32, s8, s8); ++ svmmla (s32, s32, s32); /* { dg-error {passing 'svint32_t' instead of the expected 'svint8_t' to argument 2 of 'svmmla', after passing 'svint32_t' to argument 1} } */ ++ svmmla (s32, u32, u32); /* { dg-error {passing 'svuint32_t' instead of the expected 'svint8_t' to argument 2 of 'svmmla', after passing 'svint32_t' to argument 1} } */ ++ ++ svmmla (u32, 0, u8); /* { dg-error {passing 'int' to argument 2 of 'svmmla', which expects an SVE vector type} } */ ++ svmmla (u32, s8, u8); /* { dg-error {arguments 1 and 2 of 'svmmla' must have the same signedness, but the values passed here have type 'svuint32_t' and 'svint8_t' respectively} } */ ++ svmmla (u32, u8, s8); /* { dg-error {arguments 1 and 3 of 'svmmla' must have the same signedness, but the values passed here have type 'svuint32_t' and 'svint8_t' respectively} } */ ++ svmmla (u32, u8, 0); /* { dg-error {passing 'int' to argument 3 of 'svmmla', which expects an SVE vector type} } */ ++ svmmla (u32, u8, u8); ++ svmmla (u32, s32, s32); /* { dg-error {passing 'svint32_t' instead of the expected 'svuint8_t' to argument 2 of 'svmmla', after passing 'svuint32_t' to argument 1} } */ ++ svmmla (u32, u32, u32); /* { dg-error {passing 'svuint32_t' instead of the expected 'svuint8_t' to argument 2 of 'svmmla', after passing 'svuint32_t' to argument 1} } */ ++ ++ svmmla (f16, s8, s8); /* { dg-error {'svmmla' has no form that takes 'svfloat16_t' arguments} } */ ++ svmmla (f32, s8, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svmmla', but previous arguments had type 'svfloat32_t'} } */ ++ svmmla (f32, s32, s32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svmmla', but previous arguments had type 'svfloat32_t'} } */ ++ svmmla (f32, f16, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svmmla', but previous arguments had type 'svfloat32_t'} } */ ++ svmmla (f64, f16, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svmmla', but previous arguments had type 'svfloat64_t'} } */ ++ svmmla (f32, f32, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svmmla', but previous arguments had type 'svfloat32_t'} } */ ++ svmmla (f64, f32, f16); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svmmla', but previous arguments had type 'svfloat64_t'} } */ ++ svmmla (f64, f64, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svmmla', but previous arguments had type 'svfloat64_t'} } */ ++ ++ svmmla (f16, f16, f16); /* { dg-error {'svmmla' has no form that takes 'svfloat16_t' arguments} } */ ++ svmmla (f32, f32, f32); ++ svmmla (f64, f64, f64); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_2.c +new file mode 100644 +index 000000000..b54725736 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_2.c +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-march=armv8.2-a+sve" } */ ++ ++#include ++ ++void ++f1 (svint32_t s32, svint8_t s8) ++{ ++ svmmla_s32 (s32, s8, s8); /* { dg-error {ACLE function 'svmmla_s32' requires ISA extension 'i8mm'} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_3.c +new file mode 100644 +index 000000000..d1c8297cc +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_3.c +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-march=armv8.2-a+sve" } */ ++ ++#include ++ ++void ++f1 (svint32_t s32, svint8_t s8) ++{ ++ svmmla (s32, s8, s8); /* { dg-error {ACLE function 'svmmla_s32' requires ISA extension 'i8mm'} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_4.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_4.c +new file mode 100644 +index 000000000..e6c3f5f94 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_4.c +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-march=armv8.2-a+sve" } */ ++ ++#include ++ ++void ++f1 (svfloat32_t f32) ++{ ++ svmmla_f32 (f32, f32, f32); /* { dg-error {ACLE function 'svmmla_f32' requires ISA extension 'f32mm'} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_5.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_5.c +new file mode 100644 +index 000000000..8f6f42366 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_5.c +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-march=armv8.2-a+sve" } */ ++ ++#include ++ ++void ++f1 (svfloat32_t f32) ++{ ++ svmmla (f32, f32, f32); /* { dg-error {ACLE function 'svmmla_f32' requires ISA extension 'f32mm'} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_6.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_6.c +new file mode 100644 +index 000000000..7ebeb4981 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_6.c +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-march=armv8.2-a+sve" } */ ++ ++#include ++ ++void ++f1 (svfloat64_t f64) ++{ ++ svmmla_f64 (f64, f64, f64); /* { dg-error {ACLE function 'svmmla_f64' requires ISA extension 'f64mm'} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_7.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_7.c +new file mode 100644 +index 000000000..e64ec1ea6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_7.c +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-march=armv8.2-a+sve" } */ ++ ++#include ++ ++void ++f1 (svfloat64_t f64) ++{ ++ svmmla (f64, f64, f64); /* { dg-error {ACLE function 'svmmla_f64' requires ISA extension 'f64mm'} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/pattern_pred_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/pattern_pred_1.c +new file mode 100644 +index 000000000..99b61bdf1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/pattern_pred_1.c +@@ -0,0 +1,14 @@ ++#include ++ ++void ++test () ++{ ++ svptrue_pat_b16 ((enum svpattern) -1); /* { dg-error {passing 4294967295 to argument 1 of 'svptrue_pat_b16', which expects a valid 'enum svpattern' value} } */ ++ svptrue_pat_b16 ((enum svpattern) 0); ++ svptrue_pat_b16 ((enum svpattern) 13); ++ svptrue_pat_b16 ((enum svpattern) 14); /* { dg-error {passing 14 to argument 1 of 'svptrue_pat_b16', which expects a valid 'enum svpattern' value} } */ ++ svptrue_pat_b16 ((enum svpattern) 28); /* { dg-error {passing 28 to argument 1 of 'svptrue_pat_b16', which expects a valid 'enum svpattern' value} } */ ++ svptrue_pat_b16 ((enum svpattern) 29); ++ svptrue_pat_b16 ((enum svpattern) 31); ++ svptrue_pat_b16 ((enum svpattern) 32); /* { dg-error {passing 32 to argument 1 of 'svptrue_pat_b16', which expects a valid 'enum svpattern' value} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_1.c +new file mode 100644 +index 000000000..316f77fc7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_1.c +@@ -0,0 +1,17 @@ ++/* { dg-do compile } */ ++/* { dg-options "-std=c99" } */ ++ ++#include ++ ++void ++f1 (svbool_t pg, int32_t *s32_ptr, enum svprfop op) ++{ ++ svprfb (pg, s32_ptr, op); /* { dg-error {argument 3 of 'svprfb' must be an integer constant expression} } */ ++ svprfb (pg, s32_ptr, (enum svprfop) -1); /* { dg-error {passing 4294967295 to argument 3 of 'svprfb', which expects a valid 'enum svprfop' value} } */ ++ svprfb (pg, s32_ptr, (enum svprfop) 0); ++ svprfb (pg, s32_ptr, (enum svprfop) 5); ++ svprfb (pg, s32_ptr, (enum svprfop) 6); /* { dg-error {passing 6 to argument 3 of 'svprfb', which expects a valid 'enum svprfop' value} } */ ++ svprfb (pg, s32_ptr, (enum svprfop) 7); /* { dg-error {passing 7 to argument 3 of 'svprfb', which expects a valid 'enum svprfop' value} } */ ++ svprfb (pg, s32_ptr, (enum svprfop) 8); ++ svprfb (pg, s32_ptr, (enum svprfop) 14); /* { dg-error {passing 14 to argument 3 of 'svprfb', which expects a valid 'enum svprfop' value} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_index_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_index_1.c +new file mode 100644 +index 000000000..c33c95440 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_index_1.c +@@ -0,0 +1,53 @@ ++/* { dg-do compile } */ ++/* { dg-options "-std=c99" } */ ++ ++#include ++ ++struct s { int i; }; ++ ++void ++f1 (svbool_t pg, int32_t *s32_ptr, void *void_ptr, void **ptr_ptr, ++ svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16, svfloat16_t f16, ++ svint32_t s32, svuint32_t u32, svfloat32_t f32, ++ svint64_t s64, svuint64_t u64, svfloat64_t f64, enum svprfop op, ++ struct s s) ++{ ++ svprfh_gather_index (pg, s32_ptr, s32); /* { dg-error {too few arguments to function 'svprfh_gather_index'} } */ ++ svprfh_gather_index (pg, s32_ptr, s32, SV_PLDL1KEEP, 0); /* { dg-error {too many arguments to function 'svprfh_gather_index'} } */ ++ svprfh_gather_index (0, s32_ptr, s32, SV_PLDL1KEEP); /* { dg-error {passing 'int' to argument 1 of 'svprfh_gather_index', which expects 'svbool_t'} } */ ++ svprfh_gather_index (pg, 0, s32, SV_PLDL1KEEP); ++ svprfh_gather_index (pg, (int *) 0, s32, SV_PLDL1KEEP); ++ svprfh_gather_index (pg, void_ptr, s32, SV_PLDL1KEEP); ++ svprfh_gather_index (pg, ptr_ptr, s32, SV_PLDL1KEEP); ++ svprfh_gather_index (pg, s, s32, SV_PLDL1KEEP); /* { dg-error {passing 'struct s' to argument 2 of 'svprfh_gather_index', which expects a vector or pointer base address} } */ ++ ++ svprfh_gather_index (pg, s32_ptr, s8, SV_PLDL1KEEP); /* { dg-error {passing 'svint8_t' to argument 3 of 'svprfh_gather_index', which expects a vector of 32-bit or 64-bit integers} } */ ++ svprfh_gather_index (pg, s32_ptr, u8, SV_PLDL1KEEP); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svprfh_gather_index', which expects a vector of 32-bit or 64-bit integers} } */ ++ svprfh_gather_index (pg, s32_ptr, s16, SV_PLDL1KEEP); /* { dg-error {passing 'svint16_t' to argument 3 of 'svprfh_gather_index', which expects a vector of 32-bit or 64-bit integers} } */ ++ svprfh_gather_index (pg, s32_ptr, u16, SV_PLDL1KEEP); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svprfh_gather_index', which expects a vector of 32-bit or 64-bit integers} } */ ++ svprfh_gather_index (pg, s32_ptr, f16, SV_PLDL1KEEP); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svprfh_gather_index', which expects a vector of integers} } */ ++ svprfh_gather_index (pg, s32_ptr, s32, SV_PLDL1KEEP); ++ svprfh_gather_index (pg, s32_ptr, u32, SV_PLDL1KEEP); ++ svprfh_gather_index (pg, s32_ptr, f32, SV_PLDL1KEEP); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svprfh_gather_index', which expects a vector of integers} } */ ++ svprfh_gather_index (pg, s32_ptr, s64, SV_PLDL1KEEP); ++ svprfh_gather_index (pg, s32_ptr, u64, SV_PLDL1KEEP); ++ svprfh_gather_index (pg, s32_ptr, f64, SV_PLDL1KEEP); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svprfh_gather_index', which expects a vector of integers} } */ ++ ++ svprfh_gather_index (pg, u8, 0, SV_PLDL1KEEP); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svprfh_gather_index', which expects 'svuint32_t' or 'svuint64_t'} } */ ++ svprfh_gather_index (pg, u16, 0, SV_PLDL1KEEP); /* { dg-error {passing 'svuint16_t' to argument 2 of 'svprfh_gather_index', which expects 'svuint32_t' or 'svuint64_t'} } */ ++ svprfh_gather_index (pg, s32, 0, SV_PLDL1KEEP); /* { dg-error {passing 'svint32_t' to argument 2 of 'svprfh_gather_index', which expects 'svuint32_t' or 'svuint64_t'} } */ ++ svprfh_gather_index (pg, u32, 0, SV_PLDL1KEEP); ++ svprfh_gather_index (pg, f32, 0, SV_PLDL1KEEP); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svprfh_gather_index', which expects 'svuint32_t' or 'svuint64_t'} } */ ++ svprfh_gather_index (pg, s64, 0, SV_PLDL1KEEP); /* { dg-error {passing 'svint64_t' to argument 2 of 'svprfh_gather_index', which expects 'svuint32_t' or 'svuint64_t'} } */ ++ svprfh_gather_index (pg, u64, 0, SV_PLDL1KEEP); ++ svprfh_gather_index (pg, f64, 0, SV_PLDL1KEEP); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svprfh_gather_index', which expects 'svuint32_t' or 'svuint64_t'} } */ ++ ++ svprfh_gather_index (pg, s32_ptr, s32, op); /* { dg-error {argument 4 of 'svprfh_gather_index' must be an integer constant expression} } */ ++ svprfh_gather_index (pg, s32_ptr, s32, (enum svprfop) -1); /* { dg-error {passing 4294967295 to argument 4 of 'svprfh_gather_index', which expects a valid 'enum svprfop' value} } */ ++ svprfh_gather_index (pg, s32_ptr, s32, (enum svprfop) 0); ++ svprfh_gather_index (pg, s32_ptr, s32, (enum svprfop) 5); ++ svprfh_gather_index (pg, s32_ptr, s32, (enum svprfop) 6); /* { dg-error {passing 6 to argument 4 of 'svprfh_gather_index', which expects a valid 'enum svprfop' value} } */ ++ svprfh_gather_index (pg, s32_ptr, s32, (enum svprfop) 7); /* { dg-error {passing 7 to argument 4 of 'svprfh_gather_index', which expects a valid 'enum svprfop' value} } */ ++ svprfh_gather_index (pg, s32_ptr, s32, (enum svprfop) 8); ++ svprfh_gather_index (pg, s32_ptr, s32, (enum svprfop) 14); /* { dg-error {passing 14 to argument 4 of 'svprfh_gather_index', which expects a valid 'enum svprfop' value} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_index_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_index_2.c +new file mode 100644 +index 000000000..3d7797305 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_index_2.c +@@ -0,0 +1,17 @@ ++/* { dg-do compile } */ ++/* { dg-options "-std=c99" } */ ++ ++#include ++ ++void ++f1 (svbool_t pg, int32_t *s32_ptr, svint32_t s32, enum svprfop op) ++{ ++ svprfh_gather_s32index (pg, s32_ptr, s32, op); /* { dg-error {argument 4 of 'svprfh_gather_s32index' must be an integer constant expression} } */ ++ svprfh_gather_s32index (pg, s32_ptr, s32, (enum svprfop) -1); /* { dg-error {passing 4294967295 to argument 4 of 'svprfh_gather_s32index', which expects a valid 'enum svprfop' value} } */ ++ svprfh_gather_s32index (pg, s32_ptr, s32, (enum svprfop) 0); ++ svprfh_gather_s32index (pg, s32_ptr, s32, (enum svprfop) 5); ++ svprfh_gather_s32index (pg, s32_ptr, s32, (enum svprfop) 6); /* { dg-error {passing 6 to argument 4 of 'svprfh_gather_s32index', which expects a valid 'enum svprfop' value} } */ ++ svprfh_gather_s32index (pg, s32_ptr, s32, (enum svprfop) 7); /* { dg-error {passing 7 to argument 4 of 'svprfh_gather_s32index', which expects a valid 'enum svprfop' value} } */ ++ svprfh_gather_s32index (pg, s32_ptr, s32, (enum svprfop) 8); ++ svprfh_gather_s32index (pg, s32_ptr, s32, (enum svprfop) 14); /* { dg-error {passing 14 to argument 4 of 'svprfh_gather_s32index', which expects a valid 'enum svprfop' value} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_1.c +new file mode 100644 +index 000000000..cc61901cb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_1.c +@@ -0,0 +1,53 @@ ++/* { dg-do compile } */ ++/* { dg-options "-std=c99" } */ ++ ++#include ++ ++struct s { int i; }; ++ ++void ++f1 (svbool_t pg, int32_t *s32_ptr, void *void_ptr, void **ptr_ptr, ++ svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16, svfloat16_t f16, ++ svint32_t s32, svuint32_t u32, svfloat32_t f32, ++ svint64_t s64, svuint64_t u64, svfloat64_t f64, enum svprfop op, ++ struct s s) ++{ ++ svprfb_gather_offset (pg, s32_ptr, s32); /* { dg-error {too few arguments to function 'svprfb_gather_offset'} } */ ++ svprfb_gather_offset (pg, s32_ptr, s32, SV_PLDL1KEEP, 0); /* { dg-error {too many arguments to function 'svprfb_gather_offset'} } */ ++ svprfb_gather_offset (0, s32_ptr, s32, SV_PLDL1KEEP); /* { dg-error {passing 'int' to argument 1 of 'svprfb_gather_offset', which expects 'svbool_t'} } */ ++ svprfb_gather_offset (pg, 0, s32, SV_PLDL1KEEP); ++ svprfb_gather_offset (pg, (int *) 0, s32, SV_PLDL1KEEP); ++ svprfb_gather_offset (pg, void_ptr, s32, SV_PLDL1KEEP); ++ svprfb_gather_offset (pg, ptr_ptr, s32, SV_PLDL1KEEP); ++ svprfb_gather_offset (pg, s, s32, SV_PLDL1KEEP); /* { dg-error {passing 'struct s' to argument 2 of 'svprfb_gather_offset', which expects a vector or pointer base address} } */ ++ ++ svprfb_gather_offset (pg, s32_ptr, s8, SV_PLDL1KEEP); /* { dg-error {passing 'svint8_t' to argument 3 of 'svprfb_gather_offset', which expects a vector of 32-bit or 64-bit integers} } */ ++ svprfb_gather_offset (pg, s32_ptr, u8, SV_PLDL1KEEP); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svprfb_gather_offset', which expects a vector of 32-bit or 64-bit integers} } */ ++ svprfb_gather_offset (pg, s32_ptr, s16, SV_PLDL1KEEP); /* { dg-error {passing 'svint16_t' to argument 3 of 'svprfb_gather_offset', which expects a vector of 32-bit or 64-bit integers} } */ ++ svprfb_gather_offset (pg, s32_ptr, u16, SV_PLDL1KEEP); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svprfb_gather_offset', which expects a vector of 32-bit or 64-bit integers} } */ ++ svprfb_gather_offset (pg, s32_ptr, f16, SV_PLDL1KEEP); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svprfb_gather_offset', which expects a vector of integers} } */ ++ svprfb_gather_offset (pg, s32_ptr, s32, SV_PLDL1KEEP); ++ svprfb_gather_offset (pg, s32_ptr, u32, SV_PLDL1KEEP); ++ svprfb_gather_offset (pg, s32_ptr, f32, SV_PLDL1KEEP); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svprfb_gather_offset', which expects a vector of integers} } */ ++ svprfb_gather_offset (pg, s32_ptr, s64, SV_PLDL1KEEP); ++ svprfb_gather_offset (pg, s32_ptr, u64, SV_PLDL1KEEP); ++ svprfb_gather_offset (pg, s32_ptr, f64, SV_PLDL1KEEP); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svprfb_gather_offset', which expects a vector of integers} } */ ++ ++ svprfb_gather_offset (pg, u8, 0, SV_PLDL1KEEP); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svprfb_gather_offset', which expects 'svuint32_t' or 'svuint64_t'} } */ ++ svprfb_gather_offset (pg, u16, 0, SV_PLDL1KEEP); /* { dg-error {passing 'svuint16_t' to argument 2 of 'svprfb_gather_offset', which expects 'svuint32_t' or 'svuint64_t'} } */ ++ svprfb_gather_offset (pg, s32, 0, SV_PLDL1KEEP); /* { dg-error {passing 'svint32_t' to argument 2 of 'svprfb_gather_offset', which expects 'svuint32_t' or 'svuint64_t'} } */ ++ svprfb_gather_offset (pg, u32, 0, SV_PLDL1KEEP); ++ svprfb_gather_offset (pg, f32, 0, SV_PLDL1KEEP); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svprfb_gather_offset', which expects 'svuint32_t' or 'svuint64_t'} } */ ++ svprfb_gather_offset (pg, s64, 0, SV_PLDL1KEEP); /* { dg-error {passing 'svint64_t' to argument 2 of 'svprfb_gather_offset', which expects 'svuint32_t' or 'svuint64_t'} } */ ++ svprfb_gather_offset (pg, u64, 0, SV_PLDL1KEEP); ++ svprfb_gather_offset (pg, f64, 0, SV_PLDL1KEEP); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svprfb_gather_offset', which expects 'svuint32_t' or 'svuint64_t'} } */ ++ ++ svprfb_gather_offset (pg, s32_ptr, s32, op); /* { dg-error {argument 4 of 'svprfb_gather_offset' must be an integer constant expression} } */ ++ svprfb_gather_offset (pg, s32_ptr, s32, (enum svprfop) -1); /* { dg-error {passing 4294967295 to argument 4 of 'svprfb_gather_offset', which expects a valid 'enum svprfop' value} } */ ++ svprfb_gather_offset (pg, s32_ptr, s32, (enum svprfop) 0); ++ svprfb_gather_offset (pg, s32_ptr, s32, (enum svprfop) 5); ++ svprfb_gather_offset (pg, s32_ptr, s32, (enum svprfop) 6); /* { dg-error {passing 6 to argument 4 of 'svprfb_gather_offset', which expects a valid 'enum svprfop' value} } */ ++ svprfb_gather_offset (pg, s32_ptr, s32, (enum svprfop) 7); /* { dg-error {passing 7 to argument 4 of 'svprfb_gather_offset', which expects a valid 'enum svprfop' value} } */ ++ svprfb_gather_offset (pg, s32_ptr, s32, (enum svprfop) 8); ++ svprfb_gather_offset (pg, s32_ptr, s32, (enum svprfop) 14); /* { dg-error {passing 14 to argument 4 of 'svprfb_gather_offset', which expects a valid 'enum svprfop' value} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_2.c +new file mode 100644 +index 000000000..b74721fad +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_2.c +@@ -0,0 +1,37 @@ ++/* { dg-do compile } */ ++/* { dg-options "-std=c99" } */ ++ ++#include ++ ++void ++f1 (svbool_t pg, svint8_t s8, svuint8_t u8, ++ svint16_t s16, svuint16_t u16, svfloat16_t f16, ++ svint32_t s32, svuint32_t u32, svfloat32_t f32, ++ svint64_t s64, svuint64_t u64, svfloat64_t f64, enum svprfop op) ++{ ++ svprfb_gather (pg, u32); /* { dg-error {too few arguments to function 'svprfb_gather'} } */ ++ svprfb_gather (pg, u32, SV_PLDL1KEEP, 0); /* { dg-error {too many arguments to function 'svprfb_gather'} } */ ++ svprfb_gather (0, u32, SV_PLDL1KEEP); /* { dg-error {passing 'int' to argument 1 of 'svprfb_gather', which expects 'svbool_t'} } */ ++ svprfb_gather (pg, 0, SV_PLDL1KEEP); /* { dg-error {passing 'int' to argument 2 of 'svprfb_gather', which expects an SVE vector type} } */ ++ ++ svprfb_gather (pg, s8, SV_PLDL1KEEP); /* { dg-error {passing 'svint8_t' to argument 2 of 'svprfb_gather', which expects 'svuint32_t' or 'svuint64_t'} } */ ++ svprfb_gather (pg, u8, SV_PLDL1KEEP); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svprfb_gather', which expects 'svuint32_t' or 'svuint64_t'} } */ ++ svprfb_gather (pg, s16, SV_PLDL1KEEP); /* { dg-error {passing 'svint16_t' to argument 2 of 'svprfb_gather', which expects 'svuint32_t' or 'svuint64_t'} } */ ++ svprfb_gather (pg, u16, SV_PLDL1KEEP); /* { dg-error {passing 'svuint16_t' to argument 2 of 'svprfb_gather', which expects 'svuint32_t' or 'svuint64_t'} } */ ++ svprfb_gather (pg, f16, SV_PLDL1KEEP); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svprfb_gather', which expects 'svuint32_t' or 'svuint64_t'} } */ ++ svprfb_gather (pg, s32, SV_PLDL1KEEP); /* { dg-error {passing 'svint32_t' to argument 2 of 'svprfb_gather', which expects 'svuint32_t' or 'svuint64_t'} } */ ++ svprfb_gather (pg, u32, SV_PLDL1KEEP); ++ svprfb_gather (pg, f32, SV_PLDL1KEEP); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svprfb_gather', which expects 'svuint32_t' or 'svuint64_t'} } */ ++ svprfb_gather (pg, s64, SV_PLDL1KEEP); /* { dg-error {passing 'svint64_t' to argument 2 of 'svprfb_gather', which expects 'svuint32_t' or 'svuint64_t'} } */ ++ svprfb_gather (pg, u64, SV_PLDL1KEEP); ++ svprfb_gather (pg, f64, SV_PLDL1KEEP); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svprfb_gather', which expects 'svuint32_t' or 'svuint64_t'} } */ ++ ++ svprfb_gather (pg, u32, op); /* { dg-error {argument 3 of 'svprfb_gather' must be an integer constant expression} } */ ++ svprfb_gather (pg, u32, (enum svprfop) -1); /* { dg-error {passing 4294967295 to argument 3 of 'svprfb_gather', which expects a valid 'enum svprfop' value} } */ ++ svprfb_gather (pg, u32, (enum svprfop) 0); ++ svprfb_gather (pg, u32, (enum svprfop) 5); ++ svprfb_gather (pg, u32, (enum svprfop) 6); /* { dg-error {passing 6 to argument 3 of 'svprfb_gather', which expects a valid 'enum svprfop' value} } */ ++ svprfb_gather (pg, u32, (enum svprfop) 7); /* { dg-error {passing 7 to argument 3 of 'svprfb_gather', which expects a valid 'enum svprfop' value} } */ ++ svprfb_gather (pg, u32, (enum svprfop) 8); ++ svprfb_gather (pg, u32, (enum svprfop) 14); /* { dg-error {passing 14 to argument 3 of 'svprfb_gather', which expects a valid 'enum svprfop' value} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_3.c +new file mode 100644 +index 000000000..24b4aa190 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_3.c +@@ -0,0 +1,17 @@ ++/* { dg-do compile } */ ++/* { dg-options "-std=c99" } */ ++ ++#include ++ ++void ++f1 (svbool_t pg, int32_t *s32_ptr, svint32_t s32, enum svprfop op) ++{ ++ svprfb_gather_s32offset (pg, s32_ptr, s32, op); /* { dg-error {argument 4 of 'svprfb_gather_s32offset' must be an integer constant expression} } */ ++ svprfb_gather_s32offset (pg, s32_ptr, s32, (enum svprfop) -1); /* { dg-error {passing 4294967295 to argument 4 of 'svprfb_gather_s32offset', which expects a valid 'enum svprfop' value} } */ ++ svprfb_gather_s32offset (pg, s32_ptr, s32, (enum svprfop) 0); ++ svprfb_gather_s32offset (pg, s32_ptr, s32, (enum svprfop) 5); ++ svprfb_gather_s32offset (pg, s32_ptr, s32, (enum svprfop) 6); /* { dg-error {passing 6 to argument 4 of 'svprfb_gather_s32offset', which expects a valid 'enum svprfop' value} } */ ++ svprfb_gather_s32offset (pg, s32_ptr, s32, (enum svprfop) 7); /* { dg-error {passing 7 to argument 4 of 'svprfb_gather_s32offset', which expects a valid 'enum svprfop' value} } */ ++ svprfb_gather_s32offset (pg, s32_ptr, s32, (enum svprfop) 8); ++ svprfb_gather_s32offset (pg, s32_ptr, s32, (enum svprfop) 14); /* { dg-error {passing 14 to argument 4 of 'svprfb_gather_s32offset', which expects a valid 'enum svprfop' value} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_4.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_4.c +new file mode 100644 +index 000000000..63ccdc5a4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_4.c +@@ -0,0 +1,17 @@ ++/* { dg-do compile } */ ++/* { dg-options "-std=c99" } */ ++ ++#include ++ ++void ++f1 (svbool_t pg, svuint32_t u32, enum svprfop op) ++{ ++ svprfb_gather_u32base (pg, u32, op); /* { dg-error {argument 3 of 'svprfb_gather_u32base' must be an integer constant expression} } */ ++ svprfb_gather_u32base (pg, u32, (enum svprfop) -1); /* { dg-error {passing 4294967295 to argument 3 of 'svprfb_gather_u32base', which expects a valid 'enum svprfop' value} } */ ++ svprfb_gather_u32base (pg, u32, (enum svprfop) 0); ++ svprfb_gather_u32base (pg, u32, (enum svprfop) 5); ++ svprfb_gather_u32base (pg, u32, (enum svprfop) 6); /* { dg-error {passing 6 to argument 3 of 'svprfb_gather_u32base', which expects a valid 'enum svprfop' value} } */ ++ svprfb_gather_u32base (pg, u32, (enum svprfop) 7); /* { dg-error {passing 7 to argument 3 of 'svprfb_gather_u32base', which expects a valid 'enum svprfop' value} } */ ++ svprfb_gather_u32base (pg, u32, (enum svprfop) 8); ++ svprfb_gather_u32base (pg, u32, (enum svprfop) 14); /* { dg-error {passing 14 to argument 3 of 'svprfb_gather_u32base', which expects a valid 'enum svprfop' value} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/reduction_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/reduction_1.c +new file mode 100644 +index 000000000..ab0ef304a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/reduction_1.c +@@ -0,0 +1,19 @@ ++/* { dg-do compile } */ ++ ++#include ++ ++void ++f1 (svbool_t pg, svint32_t s32, svuint32_t u32, svfloat32_t f32, ++ svuint32x2_t u32x2) ++{ ++ svorv (pg); /* { dg-error {too few arguments to function 'svorv'} } */ ++ svorv (pg, u32, u32); /* { dg-error {too many arguments to function 'svorv'} } */ ++ svorv (0, u32); /* { dg-error {passing 'int' to argument 1 of 'svorv', which expects 'svbool_t'} } */ ++ svorv (u32, u32); /* { dg-error {passing 'svuint32_t' to argument 1 of 'svorv', which expects 'svbool_t'} } */ ++ svorv (pg, 0); /* { dg-error {passing 'int' to argument 2 of 'svorv', which expects an SVE vector type} } */ ++ svorv (pg, pg); /* { dg-error {'svorv' has no form that takes 'svbool_t' arguments} } */ ++ svorv (pg, s32); ++ svorv (pg, u32); ++ svorv (pg, f32); /* { dg-error {'svorv' has no form that takes 'svfloat32_t' arguments} } */ ++ svorv (pg, u32x2); /* { dg-error {passing 'svuint32x2_t' to argument 2 of 'svorv', which expects a single SVE vector rather than a tuple} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/reduction_wide_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/reduction_wide_1.c +new file mode 100644 +index 000000000..f99a2887b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/reduction_wide_1.c +@@ -0,0 +1,19 @@ ++/* { dg-do compile } */ ++ ++#include ++ ++void ++f1 (svbool_t pg, svint32_t s32, svuint32_t u32, svfloat32_t f32, ++ svuint32x2_t u32x2) ++{ ++ svaddv (pg); /* { dg-error {too few arguments to function 'svaddv'} } */ ++ svaddv (pg, u32, u32); /* { dg-error {too many arguments to function 'svaddv'} } */ ++ svaddv (0, u32); /* { dg-error {passing 'int' to argument 1 of 'svaddv', which expects 'svbool_t'} } */ ++ svaddv (u32, u32); /* { dg-error {passing 'svuint32_t' to argument 1 of 'svaddv', which expects 'svbool_t'} } */ ++ svaddv (pg, 0); /* { dg-error {passing 'int' to argument 2 of 'svaddv', which expects an SVE vector type} } */ ++ svaddv (pg, pg); /* { dg-error {'svaddv' has no form that takes 'svbool_t' arguments} } */ ++ svaddv (pg, s32); ++ svaddv (pg, u32); ++ svaddv (pg, f32); ++ svaddv (pg, u32x2); /* { dg-error {passing 'svuint32x2_t' to argument 2 of 'svaddv', which expects a single SVE vector rather than a tuple} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_1.c +new file mode 100644 +index 000000000..f07c76102 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_1.c +@@ -0,0 +1,35 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-std=c99 -Wall -Wextra" } */ ++ ++#include ++ ++svfloat64_t ++f1 (svbool_t pg, svuint8_t u8, svuint8x2_t u8x2, svuint8x3_t u8x3, int x) ++{ ++ const int one = 1; ++ svfloat64_t f64; ++ ++ u8x2 = svset2 (u8x2); /* { dg-error {too few arguments to function 'svset2'} } */ ++ u8x2 = svset2 (u8x2, 1); /* { dg-error {too few arguments to function 'svset2'} } */ ++ u8x2 = svset2 (u8x2, 1, u8, 3); /* { dg-error {too many arguments to function 'svset2'} } */ ++ u8x2 = svset2 (u8, 0, u8); /* { dg-error {passing single vector 'svuint8_t' to argument 1 of 'svset2', which expects a tuple of 2 vectors} } */ ++ u8x2 = svset2 (u8x3, 0, u8); /* { dg-error {passing 'svuint8x3_t' to argument 1 of 'svset2', which expects a tuple of 2 vectors} } */ ++ u8x2 = svset2 (pg, 0, u8); /* { dg-error {passing 'svbool_t' to argument 1 of 'svset2', which expects a tuple of 2 vectors} } */ ++ u8x2 = svset2 (u8x2, 0, u8x2); /* { dg-error {passing 'svuint8x2_t' to argument 3 of 'svset2', which expects a single SVE vector rather than a tuple} } */ ++ u8x2 = svset2 (u8x2, 0, f64); /* { dg-error {passing 'svfloat64_t' instead of the expected 'svuint8_t' to argument 3 of 'svset2', after passing 'svuint8x2_t' to argument 1} } */ ++ u8x2 = svset2 (u8x2, 0, pg); /* { dg-error {passing 'svbool_t' instead of the expected 'svuint8_t' to argument 3 of 'svset2', after passing 'svuint8x2_t' to argument 1} } */ ++ u8x2 = svset2 (u8x2, x, u8); /* { dg-error {argument 2 of 'svset2' must be an integer constant expression} } */ ++ u8x2 = svset2 (u8x2, 0, u8); ++ f64 = svset2 (u8x2, 0, u8); /* { dg-error {incompatible types when assigning to type 'svfloat64_t' from type 'svuint8x2_t'} } */ ++ u8x2 = svset2 (u8x2, 1, u8); ++ u8x2 = svset2 (u8x2, 2, u8); /* { dg-error {passing 2 to argument 2 of 'svset2', which expects a value in the range \[0, 1\]} } */ ++ u8x2 = svset2 (u8x2, 3, u8); /* { dg-error {passing 3 to argument 2 of 'svset2', which expects a value in the range \[0, 1\]} } */ ++ u8x2 = svset2 (u8x2, 4, u8); /* { dg-error {passing 4 to argument 2 of 'svset2', which expects a value in the range \[0, 1\]} } */ ++ u8x2 = svset2 (u8x2, 5, u8); /* { dg-error {passing 5 to argument 2 of 'svset2', which expects a value in the range \[0, 1\]} } */ ++ u8x2 = svset2 (u8x2, ~0U, u8); /* { dg-error {passing [^ ]* to argument 2 of 'svset2', which expects a value in the range \[0, 1\]} } */ ++ u8x2 = svset2 (u8x2, one, u8); /* { dg-error {argument 2 of 'svset2' must be an integer constant expression} } */ ++ u8x2 = svset2 (u8x2, 3 - 2, u8); ++ u8x2 = svset2 (u8x2, 1.0, u8); ++ ++ return f64; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_2.c +new file mode 100644 +index 000000000..ae277eafd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_2.c +@@ -0,0 +1,37 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-std=c99 -Wall -Wextra" } */ ++ ++#include ++ ++svfloat64_t ++f1 (svbool_t pg, svuint8_t u8, svuint8x2_t u8x2, svint8x2_t s8x2, ++ svuint8x3_t u8x3, int x) ++{ ++ const int one = 1; ++ svfloat64_t f64; ++ ++ u8x2 = svset2_u8 (u8x2); /* { dg-error {too few arguments to function 'svset2_u8'} } */ ++ u8x2 = svset2_u8 (u8x2, 1); /* { dg-error {too few arguments to function 'svset2_u8'} } */ ++ u8x2 = svset2_u8 (u8x2, 1, u8, 3); /* { dg-error {too many arguments to function 'svset2_u8'} } */ ++ u8x2 = svset2_u8 (u8, 0, u8); /* { dg-error {incompatible type for argument 1 of 'svset2_u8'} } */ ++ u8x2 = svset2_u8 (s8x2, 0, u8); /* { dg-error {incompatible type for argument 1 of 'svset2_u8'} } */ ++ u8x2 = svset2_u8 (u8x3, 0, u8); /* { dg-error {incompatible type for argument 1 of 'svset2_u8'} } */ ++ u8x2 = svset2_u8 (pg, 0, u8); /* { dg-error {incompatible type for argument 1 of 'svset2_u8'} } */ ++ u8x2 = svset2_u8 (u8x2, 0, u8x2); /* { dg-error {incompatible type for argument 3 of 'svset2_u8'} } */ ++ u8x2 = svset2_u8 (u8x2, 0, f64); /* { dg-error {incompatible type for argument 3 of 'svset2_u8'} } */ ++ u8x2 = svset2_u8 (u8x2, 0, pg); /* { dg-error {incompatible type for argument 3 of 'svset2_u8'} } */ ++ u8x2 = svset2_u8 (u8x2, x, u8); /* { dg-error {argument 2 of 'svset2_u8' must be an integer constant expression} } */ ++ u8x2 = svset2_u8 (u8x2, 0, u8); ++ f64 = svset2_u8 (u8x2, 0, u8); /* { dg-error {incompatible types when assigning to type 'svfloat64_t' from type 'svuint8x2_t'} } */ ++ u8x2 = svset2_u8 (u8x2, 1, u8); ++ u8x2 = svset2_u8 (u8x2, 2, u8); /* { dg-error {passing 2 to argument 2 of 'svset2_u8', which expects a value in the range \[0, 1\]} } */ ++ u8x2 = svset2_u8 (u8x2, 3, u8); /* { dg-error {passing 3 to argument 2 of 'svset2_u8', which expects a value in the range \[0, 1\]} } */ ++ u8x2 = svset2_u8 (u8x2, 4, u8); /* { dg-error {passing 4 to argument 2 of 'svset2_u8', which expects a value in the range \[0, 1\]} } */ ++ u8x2 = svset2_u8 (u8x2, 5, u8); /* { dg-error {passing 5 to argument 2 of 'svset2_u8', which expects a value in the range \[0, 1\]} } */ ++ u8x2 = svset2_u8 (u8x2, ~0U, u8); /* { dg-error {passing [^ ]* to argument 2 of 'svset2_u8', which expects a value in the range \[0, 1\]} } */ ++ u8x2 = svset2_u8 (u8x2, one, u8); /* { dg-error {argument 2 of 'svset2_u8' must be an integer constant expression} } */ ++ u8x2 = svset2_u8 (u8x2, 3 - 2, u8); ++ u8x2 = svset2_u8 (u8x2, 1.0, u8); ++ ++ return f64; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_3.c +new file mode 100644 +index 000000000..543a1bea8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_3.c +@@ -0,0 +1,36 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-std=c99 -Wall -Wextra" } */ ++ ++#include ++ ++svfloat64_t ++f1 (svbool_t pg, svfloat16_t f16, svfloat16x3_t f16x3, svfloat16x4_t f16x4, ++ int x) ++{ ++ const int one = 1; ++ svfloat64_t f64; ++ ++ f16x3 = svset3 (f16x3); /* { dg-error {too few arguments to function 'svset3'} } */ ++ f16x3 = svset3 (f16x3, 1); /* { dg-error {too few arguments to function 'svset3'} } */ ++ f16x3 = svset3 (f16x3, 1, f16, 3); /* { dg-error {too many arguments to function 'svset3'} } */ ++ f16x3 = svset3 (f16, 0, f16); /* { dg-error {passing single vector 'svfloat16_t' to argument 1 of 'svset3', which expects a tuple of 3 vectors} } */ ++ f16x3 = svset3 (f16x4, 0, f16); /* { dg-error {passing 'svfloat16x4_t' to argument 1 of 'svset3', which expects a tuple of 3 vectors} } */ ++ f16x3 = svset3 (pg, 0, f16); /* { dg-error {passing 'svbool_t' to argument 1 of 'svset3', which expects a tuple of 3 vectors} } */ ++ f16x3 = svset3 (f16x3, 0, f16x3); /* { dg-error {passing 'svfloat16x3_t' to argument 3 of 'svset3', which expects a single SVE vector rather than a tuple} } */ ++ f16x3 = svset3 (f16x3, 0, f64); /* { dg-error {passing 'svfloat64_t' instead of the expected 'svfloat16_t' to argument 3 of 'svset3', after passing 'svfloat16x3_t' to argument 1} } */ ++ f16x3 = svset3 (f16x3, 0, pg); /* { dg-error {passing 'svbool_t' instead of the expected 'svfloat16_t' to argument 3 of 'svset3', after passing 'svfloat16x3_t' to argument 1} } */ ++ f16x3 = svset3 (f16x3, x, f16); /* { dg-error {argument 2 of 'svset3' must be an integer constant expression} } */ ++ f16x3 = svset3 (f16x3, 0, f16); ++ f64 = svset3 (f16x3, 0, f16); /* { dg-error {incompatible types when assigning to type 'svfloat64_t' from type 'svfloat16x3_t'} } */ ++ f16x3 = svset3 (f16x3, 1, f16); ++ f16x3 = svset3 (f16x3, 2, f16); ++ f16x3 = svset3 (f16x3, 3, f16); /* { dg-error {passing 3 to argument 2 of 'svset3', which expects a value in the range \[0, 2\]} } */ ++ f16x3 = svset3 (f16x3, 4, f16); /* { dg-error {passing 4 to argument 2 of 'svset3', which expects a value in the range \[0, 2\]} } */ ++ f16x3 = svset3 (f16x3, 5, f16); /* { dg-error {passing 5 to argument 2 of 'svset3', which expects a value in the range \[0, 2\]} } */ ++ f16x3 = svset3 (f16x3, ~0U, f16); /* { dg-error {passing [^ ]* to argument 2 of 'svset3', which expects a value in the range \[0, 2\]} } */ ++ f16x3 = svset3 (f16x3, one, f16); /* { dg-error {argument 2 of 'svset3' must be an integer constant expression} } */ ++ f16x3 = svset3 (f16x3, 3 - 2, f16); ++ f16x3 = svset3 (f16x3, 1.0, f16); ++ ++ return f64; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_4.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_4.c +new file mode 100644 +index 000000000..198b03407 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_4.c +@@ -0,0 +1,37 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-std=c99 -Wall -Wextra" } */ ++ ++#include ++ ++svfloat64_t ++f1 (svbool_t pg, svfloat16_t f16, svfloat16x3_t f16x3, svuint16x3_t u16x3, ++ svfloat16x4_t f16x4, int x) ++{ ++ const int one = 1; ++ svfloat64_t f64; ++ ++ f16x3 = svset3_f16 (f16x3); /* { dg-error {too few arguments to function 'svset3_f16'} } */ ++ f16x3 = svset3_f16 (f16x3, 1); /* { dg-error {too few arguments to function 'svset3_f16'} } */ ++ f16x3 = svset3_f16 (f16x3, 1, f16, 3); /* { dg-error {too many arguments to function 'svset3_f16'} } */ ++ f16x3 = svset3_f16 (f16, 0, f16); /* { dg-error {incompatible type for argument 1 of 'svset3_f16'} } */ ++ f16x3 = svset3_f16 (u16x3, 0, f16); /* { dg-error {incompatible type for argument 1 of 'svset3_f16'} } */ ++ f16x3 = svset3_f16 (f16x4, 0, f16); /* { dg-error {incompatible type for argument 1 of 'svset3_f16'} } */ ++ f16x3 = svset3_f16 (pg, 0, f16); /* { dg-error {incompatible type for argument 1 of 'svset3_f16'} } */ ++ f16x3 = svset3_f16 (f16x3, 0, f16x3); /* { dg-error {incompatible type for argument 3 of 'svset3_f16'} } */ ++ f16x3 = svset3_f16 (f16x3, 0, f64); /* { dg-error {incompatible type for argument 3 of 'svset3_f16'} } */ ++ f16x3 = svset3_f16 (f16x3, 0, pg); /* { dg-error {incompatible type for argument 3 of 'svset3_f16'} } */ ++ f16x3 = svset3_f16 (f16x3, x, f16); /* { dg-error {argument 2 of 'svset3_f16' must be an integer constant expression} } */ ++ f16x3 = svset3_f16 (f16x3, 0, f16); ++ f64 = svset3_f16 (f16x3, 0, f16); /* { dg-error {incompatible types when assigning to type 'svfloat64_t' from type 'svfloat16x3_t'} } */ ++ f16x3 = svset3_f16 (f16x3, 1, f16); ++ f16x3 = svset3_f16 (f16x3, 2, f16); ++ f16x3 = svset3_f16 (f16x3, 3, f16); /* { dg-error {passing 3 to argument 2 of 'svset3_f16', which expects a value in the range \[0, 2\]} } */ ++ f16x3 = svset3_f16 (f16x3, 4, f16); /* { dg-error {passing 4 to argument 2 of 'svset3_f16', which expects a value in the range \[0, 2\]} } */ ++ f16x3 = svset3_f16 (f16x3, 5, f16); /* { dg-error {passing 5 to argument 2 of 'svset3_f16', which expects a value in the range \[0, 2\]} } */ ++ f16x3 = svset3_f16 (f16x3, ~0U, f16); /* { dg-error {passing [^ ]* to argument 2 of 'svset3_f16', which expects a value in the range \[0, 2\]} } */ ++ f16x3 = svset3_f16 (f16x3, one, f16); /* { dg-error {argument 2 of 'svset3_f16' must be an integer constant expression} } */ ++ f16x3 = svset3_f16 (f16x3, 3 - 2, f16); ++ f16x3 = svset3_f16 (f16x3, 1.0, f16); ++ ++ return f64; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_5.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_5.c +new file mode 100644 +index 000000000..be911a731 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_5.c +@@ -0,0 +1,35 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-std=c99 -Wall -Wextra" } */ ++ ++#include ++ ++svfloat64_t ++f1 (svbool_t pg, svint32_t s32, svint32x4_t s32x4, svint32x2_t s32x2, int x) ++{ ++ const int one = 1; ++ svfloat64_t f64; ++ ++ s32x4 = svset4 (s32x4); /* { dg-error {too few arguments to function 'svset4'} } */ ++ s32x4 = svset4 (s32x4, 1); /* { dg-error {too few arguments to function 'svset4'} } */ ++ s32x4 = svset4 (s32x4, 1, s32, 3); /* { dg-error {too many arguments to function 'svset4'} } */ ++ s32x4 = svset4 (s32, 0, s32); /* { dg-error {passing single vector 'svint32_t' to argument 1 of 'svset4', which expects a tuple of 4 vectors} } */ ++ s32x4 = svset4 (s32x2, 0, s32); /* { dg-error {passing 'svint32x2_t' to argument 1 of 'svset4', which expects a tuple of 4 vectors} } */ ++ s32x4 = svset4 (pg, 0, s32); /* { dg-error {passing 'svbool_t' to argument 1 of 'svset4', which expects a tuple of 4 vectors} } */ ++ s32x4 = svset4 (s32x4, 0, s32x4); /* { dg-error {passing 'svint32x4_t' to argument 3 of 'svset4', which expects a single SVE vector rather than a tuple} } */ ++ s32x4 = svset4 (s32x4, 0, f64); /* { dg-error {passing 'svfloat64_t' instead of the expected 'svint32_t' to argument 3 of 'svset4', after passing 'svint32x4_t' to argument 1} } */ ++ s32x4 = svset4 (s32x4, 0, pg); /* { dg-error {passing 'svbool_t' instead of the expected 'svint32_t' to argument 3 of 'svset4', after passing 'svint32x4_t' to argument 1} } */ ++ s32x4 = svset4 (s32x4, x, s32); /* { dg-error {argument 2 of 'svset4' must be an integer constant expression} } */ ++ s32x4 = svset4 (s32x4, 0, s32); ++ f64 = svset4 (s32x4, 0, s32); /* { dg-error {incompatible types when assigning to type 'svfloat64_t' from type 'svint32x4_t'} } */ ++ s32x4 = svset4 (s32x4, 1, s32); ++ s32x4 = svset4 (s32x4, 2, s32); ++ s32x4 = svset4 (s32x4, 3, s32); ++ s32x4 = svset4 (s32x4, 4, s32); /* { dg-error {passing 4 to argument 2 of 'svset4', which expects a value in the range \[0, 3\]} } */ ++ s32x4 = svset4 (s32x4, 5, s32); /* { dg-error {passing 5 to argument 2 of 'svset4', which expects a value in the range \[0, 3\]} } */ ++ s32x4 = svset4 (s32x4, ~0U, s32); /* { dg-error {passing [^ ]* to argument 2 of 'svset4', which expects a value in the range \[0, 3\]} } */ ++ s32x4 = svset4 (s32x4, one, s32); /* { dg-error {argument 2 of 'svset4' must be an integer constant expression} } */ ++ s32x4 = svset4 (s32x4, 3 - 2, s32); ++ s32x4 = svset4 (s32x4, 1.0, s32); ++ ++ return f64; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_6.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_6.c +new file mode 100644 +index 000000000..cec435413 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_6.c +@@ -0,0 +1,37 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-std=c99 -Wall -Wextra" } */ ++ ++#include ++ ++svfloat64_t ++f1 (svbool_t pg, svint32_t s32, svint32x4_t s32x4, svfloat32x4_t f32x4, ++ svint32x2_t s32x2, int x) ++{ ++ const int one = 1; ++ svfloat64_t f64; ++ ++ s32x4 = svset4_s32 (s32x4); /* { dg-error {too few arguments to function 'svset4_s32'} } */ ++ s32x4 = svset4_s32 (s32x4, 1); /* { dg-error {too few arguments to function 'svset4_s32'} } */ ++ s32x4 = svset4_s32 (s32x4, 1, s32, 3); /* { dg-error {too many arguments to function 'svset4_s32'} } */ ++ s32x4 = svset4_s32 (s32, 0, s32); /* { dg-error {incompatible type for argument 1 of 'svset4_s32'} } */ ++ s32x4 = svset4_s32 (f32x4, 0, s32); /* { dg-error {incompatible type for argument 1 of 'svset4_s32'} } */ ++ s32x4 = svset4_s32 (s32x2, 0, s32); /* { dg-error {incompatible type for argument 1 of 'svset4_s32'} } */ ++ s32x4 = svset4_s32 (pg, 0, s32); /* { dg-error {incompatible type for argument 1 of 'svset4_s32'} } */ ++ s32x4 = svset4_s32 (s32x4, 0, s32x4); /* { dg-error {incompatible type for argument 3 of 'svset4_s32'} } */ ++ s32x4 = svset4_s32 (s32x4, 0, f64); /* { dg-error {incompatible type for argument 3 of 'svset4_s32'} } */ ++ s32x4 = svset4_s32 (s32x4, 0, pg); /* { dg-error {incompatible type for argument 3 of 'svset4_s32'} } */ ++ s32x4 = svset4_s32 (s32x4, x, s32); /* { dg-error {argument 2 of 'svset4_s32' must be an integer constant expression} } */ ++ s32x4 = svset4_s32 (s32x4, 0, s32); ++ f64 = svset4_s32 (s32x4, 0, s32); /* { dg-error {incompatible types when assigning to type 'svfloat64_t' from type 'svint32x4_t'} } */ ++ s32x4 = svset4_s32 (s32x4, 1, s32); ++ s32x4 = svset4_s32 (s32x4, 2, s32); ++ s32x4 = svset4_s32 (s32x4, 3, s32); ++ s32x4 = svset4_s32 (s32x4, 4, s32); /* { dg-error {passing 4 to argument 2 of 'svset4_s32', which expects a value in the range \[0, 3\]} } */ ++ s32x4 = svset4_s32 (s32x4, 5, s32); /* { dg-error {passing 5 to argument 2 of 'svset4_s32', which expects a value in the range \[0, 3\]} } */ ++ s32x4 = svset4_s32 (s32x4, ~0U, s32); /* { dg-error {passing [^ ]* to argument 2 of 'svset4_s32', which expects a value in the range \[0, 3\]} } */ ++ s32x4 = svset4_s32 (s32x4, one, s32); /* { dg-error {argument 2 of 'svset4_s32' must be an integer constant expression} } */ ++ s32x4 = svset4_s32 (s32x4, 3 - 2, s32); ++ s32x4 = svset4_s32 (s32x4, 1.0, s32); ++ ++ return f64; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/shift_right_imm_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/shift_right_imm_1.c +new file mode 100644 +index 000000000..4dd9a9c76 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/shift_right_imm_1.c +@@ -0,0 +1,34 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-std=c99 -Wall -Wextra" } */ ++ ++#include ++ ++void ++f1 (svbool_t pg, svuint8_t u8, svint8_t s8, svint16_t s16, ++ svint32_t s32, svint64_t s64, int x) ++{ ++ const int one = 1; ++ u8 = svasrd_x (pg, u8, 1); /* { dg-error {'svasrd_x' has no form that takes 'svuint8_t' arguments} } */ ++ s8 = svasrd_x (pg, s8, x); /* { dg-error {argument 3 of 'svasrd_x' must be an integer constant expression} } */ ++ s8 = svasrd_x (pg, s8, one); /* { dg-error {argument 3 of 'svasrd_x' must be an integer constant expression} } */ ++ s8 = svasrd_x (pg, s8, 0.4); /* { dg-error {passing 0 to argument 3 of 'svasrd_x', which expects a value in the range \[1, 8\]} } */ ++ s8 = svasrd_x (pg, s8, 1.0); ++ s8 = svasrd_x (pg, s8, 0); /* { dg-error {passing 0 to argument 3 of 'svasrd_x', which expects a value in the range \[1, 8\]} } */ ++ s8 = svasrd_x (pg, s8, 1); ++ s8 = svasrd_x (pg, s8, 1 + 1); ++ s8 = svasrd_x (pg, s8, 8); ++ s8 = svasrd_x (pg, s8, 9); /* { dg-error {passing 9 to argument 3 of 'svasrd_x', which expects a value in the range \[1, 8\]} } */ ++ s8 = svasrd_x (pg, s8, (1ULL << 62) + 1); /* { dg-error {passing [^ ]* to argument 3 of 'svasrd_x', which expects a value in the range \[1, 8\]} } */ ++ s16 = svasrd_x (pg, s16, 0); /* { dg-error {passing 0 to argument 3 of 'svasrd_x', which expects a value in the range \[1, 16\]} } */ ++ s16 = svasrd_x (pg, s16, 1); ++ s16 = svasrd_x (pg, s16, 16); ++ s16 = svasrd_x (pg, s16, 17); /* { dg-error {passing 17 to argument 3 of 'svasrd_x', which expects a value in the range \[1, 16\]} } */ ++ s32 = svasrd_x (pg, s32, 0); /* { dg-error {passing 0 to argument 3 of 'svasrd_x', which expects a value in the range \[1, 32\]} } */ ++ s32 = svasrd_x (pg, s32, 1); ++ s32 = svasrd_x (pg, s32, 32); ++ s32 = svasrd_x (pg, s32, 33); /* { dg-error {passing 33 to argument 3 of 'svasrd_x', which expects a value in the range \[1, 32\]} } */ ++ s64 = svasrd_x (pg, s64, 0); /* { dg-error {passing 0 to argument 3 of 'svasrd_x', which expects a value in the range \[1, 64\]} } */ ++ s64 = svasrd_x (pg, s64, 1); ++ s64 = svasrd_x (pg, s64, 64); ++ s64 = svasrd_x (pg, s64, 65); /* { dg-error {passing 65 to argument 3 of 'svasrd_x', which expects a value in the range \[1, 64\]} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/shift_right_imm_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/shift_right_imm_2.c +new file mode 100644 +index 000000000..4970689e9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/shift_right_imm_2.c +@@ -0,0 +1,33 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-std=c99 -Wall -Wextra" } */ ++ ++#include ++ ++void ++f1 (svbool_t pg, svint8_t s8, svint16_t s16, svint32_t s32, svint64_t s64, ++ int x) ++{ ++ const int one = 1; ++ s8 = svasrd_n_s8_x (pg, s8, x); /* { dg-error {argument 3 of 'svasrd_n_s8_x' must be an integer constant expression} } */ ++ s8 = svasrd_n_s8_x (pg, s8, one); /* { dg-error {argument 3 of 'svasrd_n_s8_x' must be an integer constant expression} } */ ++ s8 = svasrd_n_s8_x (pg, s8, 0.4); /* { dg-error {passing 0 to argument 3 of 'svasrd_n_s8_x', which expects a value in the range \[1, 8\]} } */ ++ s8 = svasrd_n_s8_x (pg, s8, 1.0); ++ s8 = svasrd_n_s8_x (pg, s8, 0); /* { dg-error {passing 0 to argument 3 of 'svasrd_n_s8_x', which expects a value in the range \[1, 8\]} } */ ++ s8 = svasrd_n_s8_x (pg, s8, 1); ++ s8 = svasrd_n_s8_x (pg, s8, 1 + 1); ++ s8 = svasrd_n_s8_x (pg, s8, 8); ++ s8 = svasrd_n_s8_x (pg, s8, 9); /* { dg-error {passing 9 to argument 3 of 'svasrd_n_s8_x', which expects a value in the range \[1, 8\]} } */ ++ s8 = svasrd_n_s8_x (pg, s8, (1ULL << 62) + 1); /* { dg-error {passing [^ ]* to argument 3 of 'svasrd_n_s8_x', which expects a value in the range \[1, 8\]} } */ ++ s16 = svasrd_n_s16_x (pg, s16, 0); /* { dg-error {passing 0 to argument 3 of 'svasrd_n_s16_x', which expects a value in the range \[1, 16\]} } */ ++ s16 = svasrd_n_s16_x (pg, s16, 1); ++ s16 = svasrd_n_s16_x (pg, s16, 16); ++ s16 = svasrd_n_s16_x (pg, s16, 17); /* { dg-error {passing 17 to argument 3 of 'svasrd_n_s16_x', which expects a value in the range \[1, 16\]} } */ ++ s32 = svasrd_n_s32_x (pg, s32, 0); /* { dg-error {passing 0 to argument 3 of 'svasrd_n_s32_x', which expects a value in the range \[1, 32\]} } */ ++ s32 = svasrd_n_s32_x (pg, s32, 1); ++ s32 = svasrd_n_s32_x (pg, s32, 32); ++ s32 = svasrd_n_s32_x (pg, s32, 33); /* { dg-error {passing 33 to argument 3 of 'svasrd_n_s32_x', which expects a value in the range \[1, 32\]} } */ ++ s64 = svasrd_n_s64_x (pg, s64, 0); /* { dg-error {passing 0 to argument 3 of 'svasrd_n_s64_x', which expects a value in the range \[1, 64\]} } */ ++ s64 = svasrd_n_s64_x (pg, s64, 1); ++ s64 = svasrd_n_s64_x (pg, s64, 64); ++ s64 = svasrd_n_s64_x (pg, s64, 65); /* { dg-error {passing 65 to argument 3 of 'svasrd_n_s64_x', which expects a value in the range \[1, 64\]} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_1.c +new file mode 100644 +index 000000000..267db83f7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_1.c +@@ -0,0 +1,26 @@ ++/* { dg-do compile } */ ++/* { dg-options "-std=c99" } */ ++ ++#include ++ ++struct s { signed char x; }; ++ ++svuint8_t ++f1 (svbool_t pg, signed char *s8_ptr, void *void_ptr, struct s *s_ptr, ++ float *f32_ptr, _Complex float *cf32_ptr, svint8_t s8, svfloat32_t f32, ++ struct s s) ++{ ++ svst1 (pg, s8_ptr); /* { dg-error {too few arguments to function 'svst1'} } */ ++ svst1 (pg, s8_ptr, s8, 0); /* { dg-error {too many arguments to function 'svst1'} } */ ++ svst1 (0, s8_ptr, s8); /* { dg-error {passing 'int' to argument 1 of 'svst1', which expects 'svbool_t'} } */ ++ svst1 (pg, void_ptr, 0); /* { dg-error {passing 'int' to argument 3 of 'svst1', which expects an SVE vector type} } */ ++ svst1 (pg, void_ptr, pg); /* { dg-error {'svst1' has no form that takes 'svbool_t' arguments} } */ ++ svst1 (pg, 0, s8); ++ svst1 (pg, (int *) 0, s8); /* { dg-warning "passing argument 2 of 'svst1_s8' from incompatible pointer type" } */ ++ svst1 (pg, void_ptr, s8); ++ svst1 (pg, s_ptr, s8); /* { dg-warning "passing argument 2 of 'svst1_s8' from incompatible pointer type" } */ ++ svst1 (pg, f32_ptr, s8); /* { dg-warning "passing argument 2 of 'svst1_s8' from incompatible pointer type" } */ ++ svst1 (pg, f32_ptr, f32); ++ svst1 (pg, cf32_ptr, f32); /* { dg-warning "passing argument 2 of 'svst1_f32' from incompatible pointer type" } */ ++ svst1 (pg, s, s8); /* { dg-error {passing 'struct s' to argument 2 of 'svst1', which expects a scalar pointer} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_2.c +new file mode 100644 +index 000000000..4e4fb3c6d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_2.c +@@ -0,0 +1,27 @@ ++/* { dg-do compile } */ ++/* { dg-options "-std=c99" } */ ++ ++#include ++ ++struct s { signed char x; }; ++ ++svuint8_t ++f1 (svbool_t pg, signed char *s8_ptr, void *void_ptr, struct s *s_ptr, ++ float *f32_ptr, _Complex float *cf32_ptr, svint8_t s8, svfloat32_t f32) ++{ ++ svst1_vnum (pg, s8_ptr, 0); /* { dg-error {too few arguments to function 'svst1_vnum'} } */ ++ svst1_vnum (pg, s8_ptr, 0, s8, 0); /* { dg-error {too many arguments to function 'svst1_vnum'} } */ ++ svst1_vnum (0, s8_ptr, 0, s8); /* { dg-error {passing 'int' to argument 1 of 'svst1_vnum', which expects 'svbool_t'} } */ ++ svst1_vnum (pg, s8_ptr, pg, s8); /* { dg-error {passing 'svbool_t' to argument 3 of 'svst1_vnum', which expects 'int64_t'} } */ ++ svst1_vnum (pg, s8_ptr, s8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svst1_vnum', which expects 'int64_t'} } */ ++ svst1_vnum (pg, s8_ptr, void_ptr, s8); /* { dg-warning "passing argument 3 of 'svst1_vnum_s8' makes integer from pointer without a cast" } */ ++ svst1_vnum (pg, void_ptr, 0, 0); /* { dg-error {passing 'int' to argument 4 of 'svst1_vnum', which expects an SVE vector type} } */ ++ svst1_vnum (pg, void_ptr, 0, pg); /* { dg-error {'svst1_vnum' has no form that takes 'svbool_t' arguments} } */ ++ svst1_vnum (pg, 0, 0, s8); ++ svst1_vnum (pg, (int *) 0, 0, s8); /* { dg-warning "passing argument 2 of 'svst1_vnum_s8' from incompatible pointer type" } */ ++ svst1_vnum (pg, void_ptr, 0, s8); ++ svst1_vnum (pg, s_ptr, 0, s8); /* { dg-warning "passing argument 2 of 'svst1_vnum_s8' from incompatible pointer type" } */ ++ svst1_vnum (pg, f32_ptr, 0, s8); /* { dg-warning "passing argument 2 of 'svst1_vnum_s8' from incompatible pointer type" } */ ++ svst1_vnum (pg, f32_ptr, 0, f32); ++ svst1_vnum (pg, cf32_ptr, 0, f32); /* { dg-warning "passing argument 2 of 'svst1_vnum_f32' from incompatible pointer type" } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_scatter_index_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_scatter_index_1.c +new file mode 100644 +index 000000000..3209149b6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_scatter_index_1.c +@@ -0,0 +1,101 @@ ++/* { dg-do compile } */ ++/* { dg-options "-std=c99" } */ ++ ++#include ++ ++struct s { signed char x; }; ++ ++svuint32_t ++f1 (svbool_t pg, signed char *s8_ptr, short *s16_ptr, ++ int32_t *s32_ptr, uint32_t *u32_ptr, float *f32_ptr, ++ int64_t *s64_ptr, uint64_t *u64_ptr, double *f64_ptr, ++ void *void_ptr, struct s *s_ptr, _Complex float *cf32_ptr, ++ svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16, svfloat16_t f16, ++ svint32_t s32, svuint32_t u32, svfloat32_t f32, ++ svint64_t s64, svuint64_t u64, svfloat64_t f64, struct s s) ++{ ++ svst1_scatter_index (pg, s32_ptr, s32); /* { dg-error {too few arguments to function 'svst1_scatter_index'} } */ ++ svst1_scatter_index (pg, s32_ptr, s32, s32, 0); /* { dg-error {too many arguments to function 'svst1_scatter_index'} } */ ++ svst1_scatter_index (0, s32_ptr, s32, s32); /* { dg-error {passing 'int' to argument 1 of 'svst1_scatter_index', which expects 'svbool_t'} } */ ++ svst1_scatter_index (pg, 0, s32, s32); ++ svst1_scatter_index (pg, (int *) 0, s32, s32); ++ svst1_scatter_index (pg, void_ptr, s32, s32); ++ svst1_scatter_index (pg, s_ptr, s32, s32); /* { dg-warning "passing argument 2 of 'svst1_scatter_s32index_s32' from incompatible pointer type" } */ ++ svst1_scatter_index (pg, f32_ptr, s32, s32); /* { dg-warning "passing argument 2 of 'svst1_scatter_s32index_s32' from incompatible pointer type" } */ ++ svst1_scatter_index (pg, f32_ptr, s32, f32); ++ svst1_scatter_index (pg, cf32_ptr, s32, f32); /* { dg-warning "passing argument 2 of 'svst1_scatter_s32index_f32' from incompatible pointer type" } */ ++ svst1_scatter_index (pg, s, s32, s32); /* { dg-error {passing 'struct s' to argument 2 of 'svst1_scatter_index', which expects a vector or pointer base address} } */ ++ ++ svst1_scatter_index (pg, u32, void_ptr, s32); /* { dg-warning "passing argument 3 of 'svst1_scatter_u32base_index_s32' makes integer from pointer without a cast" } */ ++ svst1_scatter_index (pg, u32, pg, s32); /* { dg-error {passing 'svbool_t' to argument 3 of 'svst1_scatter_index', which expects 'int64_t'} } */ ++ svst1_scatter_index (pg, u32, s32, s32); /* { dg-error {passing 'svint32_t' to argument 3 of 'svst1_scatter_index', which expects 'int64_t'} } */ ++ ++ svst1_scatter_index (pg, void_ptr, u32, pg); /* { dg-error {passing 'svbool_t' to argument 4 of 'svst1_scatter_index', which expects a vector of 32-bit or 64-bit elements} } */ ++ ++ svst1_scatter_index (pg, s8_ptr, u32, s8); /* { dg-error {passing 'svint8_t' to argument 4 of 'svst1_scatter_index', which expects a vector of 32-bit or 64-bit elements} } */ ++ svst1_scatter_index (pg, s8_ptr, u32, u8); /* { dg-error {passing 'svuint8_t' to argument 4 of 'svst1_scatter_index', which expects a vector of 32-bit or 64-bit elements} } */ ++ ++ svst1_scatter_index (pg, s16_ptr, u32, s16); /* { dg-error {passing 'svint16_t' to argument 4 of 'svst1_scatter_index', which expects a vector of 32-bit or 64-bit elements} } */ ++ svst1_scatter_index (pg, s16_ptr, u32, u16); /* { dg-error {passing 'svuint16_t' to argument 4 of 'svst1_scatter_index', which expects a vector of 32-bit or 64-bit elements} } */ ++ svst1_scatter_index (pg, s16_ptr, u32, f16); /* { dg-error {passing 'svfloat16_t' to argument 4 of 'svst1_scatter_index', which expects a vector of 32-bit or 64-bit elements} } */ ++ ++ svst1_scatter_index (pg, u32, 0, s32); ++ svst1_scatter_index (pg, s32, 0, s32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svst1_scatter_index', which expects 'svuint32_t'} } */ ++ ++ svst1_scatter_index (pg, u32, 0, u32); ++ svst1_scatter_index (pg, s32, 0, u32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svst1_scatter_index', which expects 'svuint32_t'} } */ ++ ++ svst1_scatter_index (pg, u32, 0, f32); ++ svst1_scatter_index (pg, s32, 0, f32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svst1_scatter_index', which expects 'svuint32_t'} } */ ++ ++ svst1_scatter_index (pg, u64, 0, s64); ++ svst1_scatter_index (pg, s64, 0, s64); /* { dg-error {passing 'svint64_t' to argument 2 of 'svst1_scatter_index', which expects 'svuint64_t'} } */ ++ ++ svst1_scatter_index (pg, u64, 0, u64); ++ svst1_scatter_index (pg, s64, 0, u64); /* { dg-error {passing 'svint64_t' to argument 2 of 'svst1_scatter_index', which expects 'svuint64_t'} } */ ++ ++ svst1_scatter_index (pg, u64, 0, f64); ++ svst1_scatter_index (pg, s64, 0, f64); /* { dg-error {passing 'svint64_t' to argument 2 of 'svst1_scatter_index', which expects 'svuint64_t'} } */ ++ ++ svst1_scatter_index (pg, s32_ptr, s32, s32); ++ svst1_scatter_index (pg, s32_ptr, u32, s32); ++ svst1_scatter_index (pg, s32_ptr, f32, s32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svst1_scatter_index', which when storing 'svint32_t' expects a vector of 32-bit integers} } */ ++ svst1_scatter_index (pg, s32_ptr, s64, s32); /* { dg-error {passing 'svint64_t' to argument 3 of 'svst1_scatter_index', which when storing 'svint32_t' expects a vector of 32-bit integers} } */ ++ svst1_scatter_index (pg, s32_ptr, u64, s32); /* { dg-error {passing 'svuint64_t' to argument 3 of 'svst1_scatter_index', which when storing 'svint32_t' expects a vector of 32-bit integers} } */ ++ svst1_scatter_index (pg, s32_ptr, f64, s32); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svst1_scatter_index', which when storing 'svint32_t' expects a vector of 32-bit integers} } */ ++ ++ svst1_scatter_index (pg, u32_ptr, s32, u32); ++ svst1_scatter_index (pg, u32_ptr, u32, u32); ++ svst1_scatter_index (pg, u32_ptr, f32, u32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svst1_scatter_index', which when storing 'svuint32_t' expects a vector of 32-bit integers} } */ ++ svst1_scatter_index (pg, u32_ptr, s64, u32); /* { dg-error {passing 'svint64_t' to argument 3 of 'svst1_scatter_index', which when storing 'svuint32_t' expects a vector of 32-bit integers} } */ ++ svst1_scatter_index (pg, u32_ptr, u64, u32); /* { dg-error {passing 'svuint64_t' to argument 3 of 'svst1_scatter_index', which when storing 'svuint32_t' expects a vector of 32-bit integers} } */ ++ svst1_scatter_index (pg, u32_ptr, f64, u32); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svst1_scatter_index', which when storing 'svuint32_t' expects a vector of 32-bit integers} } */ ++ ++ svst1_scatter_index (pg, f32_ptr, s32, f32); ++ svst1_scatter_index (pg, f32_ptr, u32, f32); ++ svst1_scatter_index (pg, f32_ptr, f32, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svst1_scatter_index', which when storing 'svfloat32_t' expects a vector of 32-bit integers} } */ ++ svst1_scatter_index (pg, f32_ptr, s64, f32); /* { dg-error {passing 'svint64_t' to argument 3 of 'svst1_scatter_index', which when storing 'svfloat32_t' expects a vector of 32-bit integers} } */ ++ svst1_scatter_index (pg, f32_ptr, u64, f32); /* { dg-error {passing 'svuint64_t' to argument 3 of 'svst1_scatter_index', which when storing 'svfloat32_t' expects a vector of 32-bit integers} } */ ++ svst1_scatter_index (pg, f32_ptr, f64, f32); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svst1_scatter_index', which when storing 'svfloat32_t' expects a vector of 32-bit integers} } */ ++ ++ svst1_scatter_index (pg, s64_ptr, s32, s64); /* { dg-error {passing 'svint32_t' to argument 3 of 'svst1_scatter_index', which when storing 'svint64_t' expects a vector of 64-bit integers} } */ ++ svst1_scatter_index (pg, s64_ptr, u32, s64); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svst1_scatter_index', which when storing 'svint64_t' expects a vector of 64-bit integers} } */ ++ svst1_scatter_index (pg, s64_ptr, f32, s64); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svst1_scatter_index', which when storing 'svint64_t' expects a vector of 64-bit integers} } */ ++ svst1_scatter_index (pg, s64_ptr, s64, s64); ++ svst1_scatter_index (pg, s64_ptr, u64, s64); ++ svst1_scatter_index (pg, s64_ptr, f64, s64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svst1_scatter_index', which when storing 'svint64_t' expects a vector of 64-bit integers} } */ ++ ++ svst1_scatter_index (pg, u64_ptr, s32, u64); /* { dg-error {passing 'svint32_t' to argument 3 of 'svst1_scatter_index', which when storing 'svuint64_t' expects a vector of 64-bit integers} } */ ++ svst1_scatter_index (pg, u64_ptr, u32, u64); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svst1_scatter_index', which when storing 'svuint64_t' expects a vector of 64-bit integers} } */ ++ svst1_scatter_index (pg, u64_ptr, f32, u64); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svst1_scatter_index', which when storing 'svuint64_t' expects a vector of 64-bit integers} } */ ++ svst1_scatter_index (pg, u64_ptr, s64, u64); ++ svst1_scatter_index (pg, u64_ptr, u64, u64); ++ svst1_scatter_index (pg, u64_ptr, f64, u64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svst1_scatter_index', which when storing 'svuint64_t' expects a vector of 64-bit integers} } */ ++ ++ svst1_scatter_index (pg, f64_ptr, s32, f64); /* { dg-error {passing 'svint32_t' to argument 3 of 'svst1_scatter_index', which when storing 'svfloat64_t' expects a vector of 64-bit integers} } */ ++ svst1_scatter_index (pg, f64_ptr, u32, f64); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svst1_scatter_index', which when storing 'svfloat64_t' expects a vector of 64-bit integers} } */ ++ svst1_scatter_index (pg, f64_ptr, f32, f64); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svst1_scatter_index', which when storing 'svfloat64_t' expects a vector of 64-bit integers} } */ ++ svst1_scatter_index (pg, f64_ptr, s64, f64); ++ svst1_scatter_index (pg, f64_ptr, u64, f64); ++ svst1_scatter_index (pg, f64_ptr, f64, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svst1_scatter_index', which when storing 'svfloat64_t' expects a vector of 64-bit integers} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_scatter_offset_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_scatter_offset_1.c +new file mode 100644 +index 000000000..10abf758c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_scatter_offset_1.c +@@ -0,0 +1,45 @@ ++/* { dg-do compile } */ ++/* { dg-options "-std=c99" } */ ++ ++#include ++ ++struct s { signed char x; }; ++ ++svuint32_t ++f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16, ++ svfloat16_t f16, svint32_t s32, svuint32_t u32, svfloat32_t f32, ++ svint64_t s64, svuint64_t u64, svfloat64_t f64) ++{ ++ svst1_scatter (pg, u32); /* { dg-error {too few arguments to function 'svst1_scatter'} } */ ++ svst1_scatter (pg, u32, u32, 0); /* { dg-error {too many arguments to function 'svst1_scatter'} } */ ++ svst1_scatter (0, u32, u32); /* { dg-error {passing 'int' to argument 1 of 'svst1_scatter', which expects 'svbool_t'} } */ ++ svst1_scatter (pg, 0, u32); /* { dg-error {passing 'int' to argument 2 of 'svst1_scatter', which expects an SVE vector type} } */ ++ svst1_scatter (pg, u32, 0); /* { dg-error {passing 'int' to argument 3 of 'svst1_scatter', which expects an SVE vector type} } */ ++ ++ svst1_scatter (pg, u32, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svst1_scatter', which expects a vector of 32-bit or 64-bit elements} } */ ++ ++ svst1_scatter (pg, u32, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svst1_scatter', which expects a vector of 32-bit or 64-bit elements} } */ ++ svst1_scatter (pg, u32, u8); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svst1_scatter', which expects a vector of 32-bit or 64-bit elements} } */ ++ ++ svst1_scatter (pg, u32, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svst1_scatter', which expects a vector of 32-bit or 64-bit elements} } */ ++ svst1_scatter (pg, u32, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svst1_scatter', which expects a vector of 32-bit or 64-bit elements} } */ ++ svst1_scatter (pg, u32, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svst1_scatter', which expects a vector of 32-bit or 64-bit elements} } */ ++ ++ svst1_scatter (pg, u32, s32); ++ svst1_scatter (pg, s32, s32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svst1_scatter', which expects 'svuint32_t'} } */ ++ ++ svst1_scatter (pg, u32, u32); ++ svst1_scatter (pg, s32, u32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svst1_scatter', which expects 'svuint32_t'} } */ ++ ++ svst1_scatter (pg, u32, f32); ++ svst1_scatter (pg, s32, f32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svst1_scatter', which expects 'svuint32_t'} } */ ++ ++ svst1_scatter (pg, u64, s64); ++ svst1_scatter (pg, s64, s64); /* { dg-error {passing 'svint64_t' to argument 2 of 'svst1_scatter', which expects 'svuint64_t'} } */ ++ ++ svst1_scatter (pg, u64, u64); ++ svst1_scatter (pg, s64, u64); /* { dg-error {passing 'svint64_t' to argument 2 of 'svst1_scatter', which expects 'svuint64_t'} } */ ++ ++ svst1_scatter (pg, u64, f64); ++ svst1_scatter (pg, s64, f64); /* { dg-error {passing 'svint64_t' to argument 2 of 'svst1_scatter', which expects 'svuint64_t'} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_scatter_offset_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_scatter_offset_2.c +new file mode 100644 +index 000000000..8ee8129fa +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_scatter_offset_2.c +@@ -0,0 +1,101 @@ ++/* { dg-do compile } */ ++/* { dg-options "-std=c99" } */ ++ ++#include ++ ++struct s { signed char x; }; ++ ++svuint32_t ++f1 (svbool_t pg, signed char *s8_ptr, short *s16_ptr, ++ int32_t *s32_ptr, uint32_t *u32_ptr, float *f32_ptr, ++ int64_t *s64_ptr, uint64_t *u64_ptr, double *f64_ptr, ++ void *void_ptr, struct s *s_ptr, _Complex float *cf32_ptr, ++ svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16, svfloat16_t f16, ++ svint32_t s32, svuint32_t u32, svfloat32_t f32, ++ svint64_t s64, svuint64_t u64, svfloat64_t f64, struct s s) ++{ ++ svst1_scatter_offset (pg, s32_ptr, s32); /* { dg-error {too few arguments to function 'svst1_scatter_offset'} } */ ++ svst1_scatter_offset (pg, s32_ptr, s32, s32, 0); /* { dg-error {too many arguments to function 'svst1_scatter_offset'} } */ ++ svst1_scatter_offset (0, s32_ptr, s32, s32); /* { dg-error {passing 'int' to argument 1 of 'svst1_scatter_offset', which expects 'svbool_t'} } */ ++ svst1_scatter_offset (pg, 0, s32, s32); ++ svst1_scatter_offset (pg, (int *) 0, s32, s32); ++ svst1_scatter_offset (pg, void_ptr, s32, s32); ++ svst1_scatter_offset (pg, s_ptr, s32, s32); /* { dg-warning "passing argument 2 of 'svst1_scatter_s32offset_s32' from incompatible pointer type" } */ ++ svst1_scatter_offset (pg, f32_ptr, s32, s32); /* { dg-warning "passing argument 2 of 'svst1_scatter_s32offset_s32' from incompatible pointer type" } */ ++ svst1_scatter_offset (pg, f32_ptr, s32, f32); ++ svst1_scatter_offset (pg, cf32_ptr, s32, f32); /* { dg-warning "passing argument 2 of 'svst1_scatter_s32offset_f32' from incompatible pointer type" } */ ++ svst1_scatter_offset (pg, s, s32, s32); /* { dg-error {passing 'struct s' to argument 2 of 'svst1_scatter_offset', which expects a vector or pointer base address} } */ ++ ++ svst1_scatter_offset (pg, u32, void_ptr, s32); /* { dg-warning "passing argument 3 of 'svst1_scatter_u32base_offset_s32' makes integer from pointer without a cast" } */ ++ svst1_scatter_offset (pg, u32, pg, s32); /* { dg-error {passing 'svbool_t' to argument 3 of 'svst1_scatter_offset', which expects 'int64_t'} } */ ++ svst1_scatter_offset (pg, u32, s32, s32); /* { dg-error {passing 'svint32_t' to argument 3 of 'svst1_scatter_offset', which expects 'int64_t'} } */ ++ ++ svst1_scatter_offset (pg, void_ptr, u32, pg); /* { dg-error {passing 'svbool_t' to argument 4 of 'svst1_scatter_offset', which expects a vector of 32-bit or 64-bit elements} } */ ++ ++ svst1_scatter_offset (pg, s8_ptr, u32, s8); /* { dg-error {passing 'svint8_t' to argument 4 of 'svst1_scatter_offset', which expects a vector of 32-bit or 64-bit elements} } */ ++ svst1_scatter_offset (pg, s8_ptr, u32, u8); /* { dg-error {passing 'svuint8_t' to argument 4 of 'svst1_scatter_offset', which expects a vector of 32-bit or 64-bit elements} } */ ++ ++ svst1_scatter_offset (pg, s16_ptr, u32, s16); /* { dg-error {passing 'svint16_t' to argument 4 of 'svst1_scatter_offset', which expects a vector of 32-bit or 64-bit elements} } */ ++ svst1_scatter_offset (pg, s16_ptr, u32, u16); /* { dg-error {passing 'svuint16_t' to argument 4 of 'svst1_scatter_offset', which expects a vector of 32-bit or 64-bit elements} } */ ++ svst1_scatter_offset (pg, s16_ptr, u32, f16); /* { dg-error {passing 'svfloat16_t' to argument 4 of 'svst1_scatter_offset', which expects a vector of 32-bit or 64-bit elements} } */ ++ ++ svst1_scatter_offset (pg, u32, 0, s32); ++ svst1_scatter_offset (pg, s32, 0, s32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svst1_scatter_offset', which expects 'svuint32_t'} } */ ++ ++ svst1_scatter_offset (pg, u32, 0, u32); ++ svst1_scatter_offset (pg, s32, 0, u32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svst1_scatter_offset', which expects 'svuint32_t'} } */ ++ ++ svst1_scatter_offset (pg, u32, 0, f32); ++ svst1_scatter_offset (pg, s32, 0, f32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svst1_scatter_offset', which expects 'svuint32_t'} } */ ++ ++ svst1_scatter_offset (pg, u64, 0, s64); ++ svst1_scatter_offset (pg, s64, 0, s64); /* { dg-error {passing 'svint64_t' to argument 2 of 'svst1_scatter_offset', which expects 'svuint64_t'} } */ ++ ++ svst1_scatter_offset (pg, u64, 0, u64); ++ svst1_scatter_offset (pg, s64, 0, u64); /* { dg-error {passing 'svint64_t' to argument 2 of 'svst1_scatter_offset', which expects 'svuint64_t'} } */ ++ ++ svst1_scatter_offset (pg, u64, 0, f64); ++ svst1_scatter_offset (pg, s64, 0, f64); /* { dg-error {passing 'svint64_t' to argument 2 of 'svst1_scatter_offset', which expects 'svuint64_t'} } */ ++ ++ svst1_scatter_offset (pg, s32_ptr, s32, s32); ++ svst1_scatter_offset (pg, s32_ptr, u32, s32); ++ svst1_scatter_offset (pg, s32_ptr, f32, s32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svst1_scatter_offset', which when storing 'svint32_t' expects a vector of 32-bit integers} } */ ++ svst1_scatter_offset (pg, s32_ptr, s64, s32); /* { dg-error {passing 'svint64_t' to argument 3 of 'svst1_scatter_offset', which when storing 'svint32_t' expects a vector of 32-bit integers} } */ ++ svst1_scatter_offset (pg, s32_ptr, u64, s32); /* { dg-error {passing 'svuint64_t' to argument 3 of 'svst1_scatter_offset', which when storing 'svint32_t' expects a vector of 32-bit integers} } */ ++ svst1_scatter_offset (pg, s32_ptr, f64, s32); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svst1_scatter_offset', which when storing 'svint32_t' expects a vector of 32-bit integers} } */ ++ ++ svst1_scatter_offset (pg, u32_ptr, s32, u32); ++ svst1_scatter_offset (pg, u32_ptr, u32, u32); ++ svst1_scatter_offset (pg, u32_ptr, f32, u32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svst1_scatter_offset', which when storing 'svuint32_t' expects a vector of 32-bit integers} } */ ++ svst1_scatter_offset (pg, u32_ptr, s64, u32); /* { dg-error {passing 'svint64_t' to argument 3 of 'svst1_scatter_offset', which when storing 'svuint32_t' expects a vector of 32-bit integers} } */ ++ svst1_scatter_offset (pg, u32_ptr, u64, u32); /* { dg-error {passing 'svuint64_t' to argument 3 of 'svst1_scatter_offset', which when storing 'svuint32_t' expects a vector of 32-bit integers} } */ ++ svst1_scatter_offset (pg, u32_ptr, f64, u32); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svst1_scatter_offset', which when storing 'svuint32_t' expects a vector of 32-bit integers} } */ ++ ++ svst1_scatter_offset (pg, f32_ptr, s32, f32); ++ svst1_scatter_offset (pg, f32_ptr, u32, f32); ++ svst1_scatter_offset (pg, f32_ptr, f32, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svst1_scatter_offset', which when storing 'svfloat32_t' expects a vector of 32-bit integers} } */ ++ svst1_scatter_offset (pg, f32_ptr, s64, f32); /* { dg-error {passing 'svint64_t' to argument 3 of 'svst1_scatter_offset', which when storing 'svfloat32_t' expects a vector of 32-bit integers} } */ ++ svst1_scatter_offset (pg, f32_ptr, u64, f32); /* { dg-error {passing 'svuint64_t' to argument 3 of 'svst1_scatter_offset', which when storing 'svfloat32_t' expects a vector of 32-bit integers} } */ ++ svst1_scatter_offset (pg, f32_ptr, f64, f32); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svst1_scatter_offset', which when storing 'svfloat32_t' expects a vector of 32-bit integers} } */ ++ ++ svst1_scatter_offset (pg, s64_ptr, s32, s64); /* { dg-error {passing 'svint32_t' to argument 3 of 'svst1_scatter_offset', which when storing 'svint64_t' expects a vector of 64-bit integers} } */ ++ svst1_scatter_offset (pg, s64_ptr, u32, s64); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svst1_scatter_offset', which when storing 'svint64_t' expects a vector of 64-bit integers} } */ ++ svst1_scatter_offset (pg, s64_ptr, f32, s64); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svst1_scatter_offset', which when storing 'svint64_t' expects a vector of 64-bit integers} } */ ++ svst1_scatter_offset (pg, s64_ptr, s64, s64); ++ svst1_scatter_offset (pg, s64_ptr, u64, s64); ++ svst1_scatter_offset (pg, s64_ptr, f64, s64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svst1_scatter_offset', which when storing 'svint64_t' expects a vector of 64-bit integers} } */ ++ ++ svst1_scatter_offset (pg, u64_ptr, s32, u64); /* { dg-error {passing 'svint32_t' to argument 3 of 'svst1_scatter_offset', which when storing 'svuint64_t' expects a vector of 64-bit integers} } */ ++ svst1_scatter_offset (pg, u64_ptr, u32, u64); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svst1_scatter_offset', which when storing 'svuint64_t' expects a vector of 64-bit integers} } */ ++ svst1_scatter_offset (pg, u64_ptr, f32, u64); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svst1_scatter_offset', which when storing 'svuint64_t' expects a vector of 64-bit integers} } */ ++ svst1_scatter_offset (pg, u64_ptr, s64, u64); ++ svst1_scatter_offset (pg, u64_ptr, u64, u64); ++ svst1_scatter_offset (pg, u64_ptr, f64, u64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svst1_scatter_offset', which when storing 'svuint64_t' expects a vector of 64-bit integers} } */ ++ ++ svst1_scatter_offset (pg, f64_ptr, s32, f64); /* { dg-error {passing 'svint32_t' to argument 3 of 'svst1_scatter_offset', which when storing 'svfloat64_t' expects a vector of 64-bit integers} } */ ++ svst1_scatter_offset (pg, f64_ptr, u32, f64); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svst1_scatter_offset', which when storing 'svfloat64_t' expects a vector of 64-bit integers} } */ ++ svst1_scatter_offset (pg, f64_ptr, f32, f64); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svst1_scatter_offset', which when storing 'svfloat64_t' expects a vector of 64-bit integers} } */ ++ svst1_scatter_offset (pg, f64_ptr, s64, f64); ++ svst1_scatter_offset (pg, f64_ptr, u64, f64); ++ svst1_scatter_offset (pg, f64_ptr, f64, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svst1_scatter_offset', which when storing 'svfloat64_t' expects a vector of 64-bit integers} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_1.c +new file mode 100644 +index 000000000..a9233324c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_1.c +@@ -0,0 +1,24 @@ ++/* { dg-do compile } */ ++ ++#include ++ ++#pragma GCC target ("arch=armv8.2-a+sve+bf16") ++ ++void ++f1 (svbool_t pg, svuint8_t u8, svuint16_t u16, svint32_t s32, ++ svbfloat16_t bf16, svfloat32_t f32, svfloat64_t f64, bfloat16_t bf) ++{ ++ svbfmmla (f32, bf16); /* { dg-error {too few arguments to function 'svbfmmla'} } */ ++ svbfmmla (f32, bf16, bf16, 0); /* { dg-error {too many arguments to function 'svbfmmla'} } */ ++ svbfmmla (0, bf16, bf16); /* { dg-error {passing 'int' to argument 1 of 'svbfmmla', which expects an SVE vector type} } */ ++ svbfmmla (pg, bf16, bf16); /* { dg-error {'svbfmmla' has no form that takes 'svbool_t' arguments} } */ ++ svbfmmla (u8, bf16, bf16); /* { dg-error {'svbfmmla' has no form that takes 'svuint8_t' arguments} } */ ++ svbfmmla (u16, bf16, bf16); /* { dg-error {'svbfmmla' has no form that takes 'svuint16_t' arguments} } */ ++ svbfmmla (f64, bf16, bf16); /* { dg-error {'svbfmmla' has no form that takes 'svfloat64_t' arguments} } */ ++ svbfmmla (f32, bf16, bf16); ++ svbfmmla (f32, 0, bf16); /* { dg-error {passing 'int' to argument 2 of 'svbfmmla', which expects 'svbfloat16_t'} } */ ++ svbfmmla (f32, f32, bf16); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svbfmmla', which expects 'svbfloat16_t'} } */ ++ svbfmmla (f32, bf16, 0); /* { dg-error {passing 'int' to argument 3 of 'svbfmmla', which expects 'svbfloat16_t'} } */ ++ svbfmmla (f32, bf16, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svbfmmla', which expects 'svbfloat16_t'} } */ ++ svbfmmla (f32, bf16, bf); /* { dg-error {passing 'bfloat16_t'[^\n]* to argument 3 of 'svbfmmla', which expects 'svbfloat16_t'} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lane_1.c +new file mode 100644 +index 000000000..23f027f2d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lane_1.c +@@ -0,0 +1,30 @@ ++/* { dg-do compile } */ ++ ++#include ++ ++#pragma GCC target ("arch=armv8.2-a+sve+bf16") ++ ++void ++f1 (svbool_t pg, svuint8_t u8, svuint16_t u16, svint32_t s32, ++ svbfloat16_t bf16, svfloat32_t f32, svfloat64_t f64, int i) ++{ ++ svbfmlalb_lane (f32, bf16, bf16); /* { dg-error {too few arguments to function 'svbfmlalb_lane'} } */ ++ svbfmlalb_lane (f32, bf16, bf16, 0, 0); /* { dg-error {too many arguments to function 'svbfmlalb_lane'} } */ ++ svbfmlalb_lane (0, bf16, bf16, 0); /* { dg-error {passing 'int' to argument 1 of 'svbfmlalb_lane', which expects an SVE vector type} } */ ++ svbfmlalb_lane (pg, bf16, bf16, 0); /* { dg-error {'svbfmlalb_lane' has no form that takes 'svbool_t' arguments} } */ ++ svbfmlalb_lane (u8, bf16, bf16, 0); /* { dg-error {'svbfmlalb_lane' has no form that takes 'svuint8_t' arguments} } */ ++ svbfmlalb_lane (u16, bf16, bf16, 0); /* { dg-error {'svbfmlalb_lane' has no form that takes 'svuint16_t' arguments} } */ ++ svbfmlalb_lane (f64, bf16, bf16, 0); /* { dg-error {'svbfmlalb_lane' has no form that takes 'svfloat64_t' arguments} } */ ++ svbfmlalb_lane (f32, bf16, bf16, 0); ++ svbfmlalb_lane (f32, 0, bf16, 0); /* { dg-error {passing 'int' to argument 2 of 'svbfmlalb_lane', which expects 'svbfloat16_t'} } */ ++ svbfmlalb_lane (f32, f32, bf16, 0); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svbfmlalb_lane', which expects 'svbfloat16_t'} } */ ++ svbfmlalb_lane (f32, bf16, 0, 0); /* { dg-error {passing 'int' to argument 3 of 'svbfmlalb_lane', which expects 'svbfloat16_t'} } */ ++ svbfmlalb_lane (f32, bf16, f32, 0); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svbfmlalb_lane', which expects 'svbfloat16_t'} } */ ++ svbfmlalb_lane (f32, bf16, bf16, s32); /* { dg-error {argument 4 of 'svbfmlalb_lane' must be an integer constant expression} } */ ++ svbfmlalb_lane (f32, bf16, bf16, i); /* { dg-error {argument 4 of 'svbfmlalb_lane' must be an integer constant expression} } */ ++ ++ svbfmlalb_lane (f32, bf16, bf16, 0); ++ svbfmlalb_lane (f32, bf16, bf16, 7); ++ svbfmlalb_lane (f32, bf16, bf16, 8); /* { dg-error {passing 8 to argument 4 of 'svbfmlalb_lane', which expects a value in the range \[0, 7\]} } */ ++ svbfmlalb_lane (f32, bf16, bf16, -1); /* { dg-error {passing -1 to argument 4 of 'svbfmlalb_lane', which expects a value in the range \[0, 7\]} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lanex2_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lanex2_1.c +new file mode 100644 +index 000000000..4755ca79a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lanex2_1.c +@@ -0,0 +1,30 @@ ++/* { dg-do compile } */ ++ ++#include ++ ++#pragma GCC target ("arch=armv8.2-a+sve+bf16") ++ ++void ++f1 (svbool_t pg, svuint8_t u8, svuint16_t u16, svint32_t s32, ++ svbfloat16_t bf16, svfloat32_t f32, svfloat64_t f64, int i) ++{ ++ svbfdot_lane (f32, bf16, bf16); /* { dg-error {too few arguments to function 'svbfdot_lane'} } */ ++ svbfdot_lane (f32, bf16, bf16, 0, 0); /* { dg-error {too many arguments to function 'svbfdot_lane'} } */ ++ svbfdot_lane (0, bf16, bf16, 0); /* { dg-error {passing 'int' to argument 1 of 'svbfdot_lane', which expects an SVE vector type} } */ ++ svbfdot_lane (pg, bf16, bf16, 0); /* { dg-error {'svbfdot_lane' has no form that takes 'svbool_t' arguments} } */ ++ svbfdot_lane (u8, bf16, bf16, 0); /* { dg-error {'svbfdot_lane' has no form that takes 'svuint8_t' arguments} } */ ++ svbfdot_lane (u16, bf16, bf16, 0); /* { dg-error {'svbfdot_lane' has no form that takes 'svuint16_t' arguments} } */ ++ svbfdot_lane (f64, bf16, bf16, 0); /* { dg-error {'svbfdot_lane' has no form that takes 'svfloat64_t' arguments} } */ ++ svbfdot_lane (f32, bf16, bf16, 0); ++ svbfdot_lane (f32, 0, bf16, 0); /* { dg-error {passing 'int' to argument 2 of 'svbfdot_lane', which expects 'svbfloat16_t'} } */ ++ svbfdot_lane (f32, f32, bf16, 0); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svbfdot_lane', which expects 'svbfloat16_t'} } */ ++ svbfdot_lane (f32, bf16, 0, 0); /* { dg-error {passing 'int' to argument 3 of 'svbfdot_lane', which expects 'svbfloat16_t'} } */ ++ svbfdot_lane (f32, bf16, f32, 0); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svbfdot_lane', which expects 'svbfloat16_t'} } */ ++ svbfdot_lane (f32, bf16, bf16, s32); /* { dg-error {argument 4 of 'svbfdot_lane' must be an integer constant expression} } */ ++ svbfdot_lane (f32, bf16, bf16, i); /* { dg-error {argument 4 of 'svbfdot_lane' must be an integer constant expression} } */ ++ ++ svbfdot_lane (f32, bf16, bf16, 0); ++ svbfdot_lane (f32, bf16, bf16, 3); ++ svbfdot_lane (f32, bf16, bf16, 4); /* { dg-error {passing 4 to argument 4 of 'svbfdot_lane', which expects a value in the range \[0, 3\]} } */ ++ svbfdot_lane (f32, bf16, bf16, -1); /* { dg-error {passing -1 to argument 4 of 'svbfdot_lane', which expects a value in the range \[0, 3\]} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_opt_n_1.c +new file mode 100644 +index 000000000..2d09a8eeb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_opt_n_1.c +@@ -0,0 +1,24 @@ ++/* { dg-do compile } */ ++ ++#include ++ ++#pragma GCC target ("arch=armv8.2-a+sve+bf16") ++ ++void ++f1 (svbool_t pg, svuint8_t u8, svuint16_t u16, svint32_t s32, ++ svbfloat16_t bf16, svfloat32_t f32, svfloat64_t f64, bfloat16_t bf) ++{ ++ svbfdot (f32, bf16); /* { dg-error {too few arguments to function 'svbfdot'} } */ ++ svbfdot (f32, bf16, bf16, 0); /* { dg-error {too many arguments to function 'svbfdot'} } */ ++ svbfdot (0, bf16, bf16); /* { dg-error {passing 'int' to argument 1 of 'svbfdot', which expects an SVE vector type} } */ ++ svbfdot (pg, bf16, bf16); /* { dg-error {'svbfdot' has no form that takes 'svbool_t' arguments} } */ ++ svbfdot (u8, bf16, bf16); /* { dg-error {'svbfdot' has no form that takes 'svuint8_t' arguments} } */ ++ svbfdot (u16, bf16, bf16); /* { dg-error {'svbfdot' has no form that takes 'svuint16_t' arguments} } */ ++ svbfdot (f64, bf16, bf16); /* { dg-error {'svbfdot' has no form that takes 'svfloat64_t' arguments} } */ ++ svbfdot (f32, bf16, bf16); ++ svbfdot (f32, 0, bf16); /* { dg-error {passing 'int' to argument 2 of 'svbfdot', which expects 'svbfloat16_t'} } */ ++ svbfdot (f32, f32, bf16); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svbfdot', which expects 'svbfloat16_t'} } */ ++ svbfdot (f32, bf16, 0); /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ ++ svbfdot (f32, bf16, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svbfdot', which expects 'svbfloat16_t'} } */ ++ svbfdot (f32, bf16, bf); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_intq_uintq_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_intq_uintq_lane_1.c +new file mode 100644 +index 000000000..600be05a8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_intq_uintq_lane_1.c +@@ -0,0 +1,32 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-march=armv8.6-a+sve+i8mm" } */ ++ ++#include ++ ++void ++f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16, ++ svint32_t s32, svuint32_t u32, svint64_t s64, svuint64_t u64, ++ svfloat32_t f32, int i) ++{ ++ svsudot_lane (s32, s8, u8); /* { dg-error {too few arguments to function 'svsudot_lane'} } */ ++ svsudot_lane (s32, s8, u8, 0, 0); /* { dg-error {too many arguments to function 'svsudot_lane'} } */ ++ svsudot_lane (0, s8, u8, 0); /* { dg-error {passing 'int' to argument 1 of 'svsudot_lane', which expects an SVE vector type} } */ ++ svsudot_lane (pg, s8, u8, 0); /* { dg-error {'svsudot_lane' has no form that takes 'svbool_t' arguments} } */ ++ svsudot_lane (u8, s8, u8, 0); /* { dg-error {'svsudot_lane' has no form that takes 'svuint8_t' arguments} } */ ++ svsudot_lane (f32, s8, u8, 0); /* { dg-error {'svsudot_lane' has no form that takes 'svfloat32_t' arguments} } */ ++ svsudot_lane (u32, s8, u8, 0); /* { dg-error {'svsudot_lane' has no form that takes 'svuint32_t' arguments} } */ ++ svsudot_lane (s32, s8, u8, 0); ++ svsudot_lane (s32, 0, u8, 0); /* { dg-error {passing 'int' to argument 2 of 'svsudot_lane', which expects an SVE vector type} } */ ++ svsudot_lane (s32, s8, 0, 0); /* { dg-error {passing 'int' to argument 3 of 'svsudot_lane', which expects an SVE vector type} } */ ++ ++ svsudot_lane (s32, s8, u8, 0); ++ svsudot_lane (s32, u8, u8, 0); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svsudot_lane', which expects a vector of signed integers} } */ ++ svsudot_lane (s32, s8, s8, 0); /* { dg-error {passing 'svint8_t' to argument 3 of 'svsudot_lane', which expects a vector of unsigned integers} } */ ++ svsudot_lane (s32, s32, s32, 0); /* { dg-error {passing 'svint32_t' instead of the expected 'svint8_t' to argument 2 of 'svsudot_lane', after passing 'svint32_t' to argument 1} } */ ++ ++ svsudot_lane (s32, s8, u8, i); /* { dg-error {argument 4 of 'svsudot_lane' must be an integer constant expression} } */ ++ svsudot_lane (s32, s8, u8, 0); ++ svsudot_lane (s32, s8, u8, 3); ++ svsudot_lane (s32, s8, u8, 4); /* { dg-error {passing 4 to argument 4 of 'svsudot_lane', which expects a value in the range \[0, 3\]} } */ ++ svsudot_lane (s32, s8, u8, -1); /* { dg-error {passing -1 to argument 4 of 'svsudot_lane', which expects a value in the range \[0, 3\]} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_intq_uintq_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_intq_uintq_opt_n_1.c +new file mode 100644 +index 000000000..f95ac582f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_intq_uintq_opt_n_1.c +@@ -0,0 +1,37 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-march=armv8.6-a+sve+i8mm" } */ ++ ++#include ++ ++svuint32_t ++f1 (svint32_t s32, svuint8_t u8, svint8_t s8, svuint32_t u32) ++{ ++ svsudot_s32 (s32); /* { dg-error {too few arguments to function 'svsudot_s32'} } */ ++ svsudot_s32 (s32, s8, u8, u32); /* { dg-error {too many arguments to function 'svsudot_s32'} } */ ++ svsudot_s32 (s32, s32, u8); /* { dg-error {incompatible type for argument 2 of 'svsudot_s32'} } */ ++ svsudot_s32 (s32, u8, u8); /* { dg-error {incompatible type for argument 2 of 'svsudot_s32'} } */ ++ svsudot_s32 (s32, s8, u32); /* { dg-error {incompatible type for argument 3 of 'svsudot_s32'} } */ ++ svsudot_s32 (s32, s8, s8); /* { dg-error {incompatible type for argument 3 of 'svsudot_s32'} } */ ++ svsudot_s32 (s32, s8, 0); /* { dg-error {incompatible type for argument 3 of 'svsudot_s32'} } */ ++ svsudot_s32 (s32, s8, u8); ++ return svsudot_s32 (s32, s8, u8); /* { dg-error {incompatible types when returning type 'svint32_t' but 'svuint32_t' was expected} } */ ++} ++ ++void ++f2 (svbool_t pg, svint8_t s8, svuint8_t u8, svuint32_t u32, ++ svint32_t s32, svfloat32_t f32) ++{ ++ svsudot (s32, s8); /* { dg-error {too few arguments to function 'svsudot'} } */ ++ svsudot (s32, s8, u8, u8); /* { dg-error {too many arguments to function 'svsudot'} } */ ++ svsudot (0, s8, u8); /* { dg-error {passing 'int' to argument 1 of 'svsudot', which expects an SVE vector type} } */ ++ svsudot (pg, s8, u8); /* { dg-error {'svsudot' has no form that takes 'svbool_t' arguments} } */ ++ svsudot (u8, s8, u8); /* { dg-error {'svsudot' has no form that takes 'svuint8_t' arguments} } */ ++ svsudot (f32, s8, u8); /* { dg-error {'svsudot' has no form that takes 'svfloat32_t' arguments} } */ ++ svsudot (s32, s8, u8); ++ svsudot (s32, 0, u8); /* { dg-error {passing 'int' to argument 2 of 'svsudot', which expects an SVE vector type} } */ ++ svsudot (s32, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svsudot', which expects a vector of signed integers} } */ ++ svsudot (s32, s8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svsudot', which expects a vector of unsigned integers} } */ ++ svsudot (s32, s8, 0); ++ svsudot (s32, s8, u8); ++ svsudot (s32, u32, u32); /* { dg-error {passing 'svuint32_t' to argument 2 of 'svsudot', which expects a vector of signed integers} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_1.c +new file mode 100644 +index 000000000..bbd1f91be +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_1.c +@@ -0,0 +1,35 @@ ++/* { dg-do compile } */ ++ ++#include ++ ++void ++f1 (svbool_t pg, svfloat16_t f16, svfloat32_t f32, svfloat64_t f64, ++ svint32_t s32, int i) ++{ ++ svmla_lane (f32, f32, f32); /* { dg-error {too few arguments to function 'svmla_lane'} } */ ++ svmla_lane (f32, f32, f32, 0, 0); /* { dg-error {too many arguments to function 'svmla_lane'} } */ ++ svmla_lane (pg, pg, pg, 0); /* { dg-error {'svmla_lane' has no form that takes 'svbool_t' arguments} } */ ++ svmla_lane (s32, s32, s32, 0); /* { dg-error {'svmla_lane' has no form that takes 'svint32_t' arguments} } */ ++ svmla_lane (1, f32, f32, 0); /* { dg-error {passing 'int' to argument 1 of 'svmla_lane', which expects an SVE vector type} } */ ++ svmla_lane (f32, 1, f32, 0); /* { dg-error {passing 'int' to argument 2 of 'svmla_lane', which expects an SVE vector type} } */ ++ svmla_lane (f32, f32, 1, 0); /* { dg-error {passing 'int' to argument 3 of 'svmla_lane', which expects an SVE vector type} } */ ++ svmla_lane (f32, f64, f32, 0); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svmla_lane', but previous arguments had type 'svfloat32_t'} } */ ++ svmla_lane (f32, f32, f64, 0); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svmla_lane', but previous arguments had type 'svfloat32_t'} } */ ++ svmla_lane (f32, f32, f32, s32); /* { dg-error {argument 4 of 'svmla_lane' must be an integer constant expression} } */ ++ svmla_lane (f32, f32, f32, i); /* { dg-error {argument 4 of 'svmla_lane' must be an integer constant expression} } */ ++ ++ svmla_lane (f16, f16, f16, 0); ++ svmla_lane (f16, f16, f16, 7); ++ svmla_lane (f16, f16, f16, 8); /* { dg-error {passing 8 to argument 4 of 'svmla_lane', which expects a value in the range \[0, 7\]} } */ ++ svmla_lane (f16, f16, f16, -1); /* { dg-error {passing -1 to argument 4 of 'svmla_lane', which expects a value in the range \[0, 7\]} } */ ++ ++ svmla_lane (f32, f32, f32, 0); ++ svmla_lane (f32, f32, f32, 3); ++ svmla_lane (f32, f32, f32, 4); /* { dg-error {passing 4 to argument 4 of 'svmla_lane', which expects a value in the range \[0, 3\]} } */ ++ svmla_lane (f32, f32, f32, -1); /* { dg-error {passing -1 to argument 4 of 'svmla_lane', which expects a value in the range \[0, 3\]} } */ ++ ++ svmla_lane (f64, f64, f64, 0); ++ svmla_lane (f64, f64, f64, 1); ++ svmla_lane (f64, f64, f64, 2); /* { dg-error {passing 2 to argument 4 of 'svmla_lane', which expects a value in the range \[0, 1\]} } */ ++ svmla_lane (f64, f64, f64, -1); /* { dg-error {passing -1 to argument 4 of 'svmla_lane', which expects a value in the range \[0, 1\]} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_rotate_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_rotate_1.c +new file mode 100644 +index 000000000..bccc6c7e2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_rotate_1.c +@@ -0,0 +1,38 @@ ++/* { dg-do compile } */ ++ ++#include ++ ++void ++f1 (svbool_t pg, svfloat16_t f16, svfloat32_t f32, svfloat64_t f64, ++ svint32_t s32, int i) ++{ ++ svcmla_lane (f32, f32, f32, 0); /* { dg-error {too few arguments to function 'svcmla_lane'} } */ ++ svcmla_lane (f32, f32, f32, 0, 90, 90); /* { dg-error {too many arguments to function 'svcmla_lane'} } */ ++ svcmla_lane (pg, pg, pg, 0, 90); /* { dg-error {'svcmla_lane' has no form that takes 'svbool_t' arguments} } */ ++ svcmla_lane (s32, s32, s32, 0, 90); /* { dg-error {'svcmla_lane' has no form that takes 'svint32_t' arguments} } */ ++ svcmla_lane (f64, f64, f64, 0, 90); /* { dg-error {'svcmla_lane' has no form that takes 'svfloat64_t' arguments} } */ ++ svcmla_lane (1, f32, f32, 0, 90); /* { dg-error {passing 'int' to argument 1 of 'svcmla_lane', which expects an SVE vector type} } */ ++ svcmla_lane (f32, 1, f32, 0, 90); /* { dg-error {passing 'int' to argument 2 of 'svcmla_lane', which expects an SVE vector type} } */ ++ svcmla_lane (f32, f32, 1, 0, 90); /* { dg-error {passing 'int' to argument 3 of 'svcmla_lane', which expects an SVE vector type} } */ ++ svcmla_lane (f32, f64, f32, 0, 90); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svcmla_lane', but previous arguments had type 'svfloat32_t'} } */ ++ svcmla_lane (f32, f32, f64, 0, 90); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svcmla_lane', but previous arguments had type 'svfloat32_t'} } */ ++ svcmla_lane (f32, f32, f32, s32, 0); /* { dg-error {argument 4 of 'svcmla_lane' must be an integer constant expression} } */ ++ svcmla_lane (f32, f32, f32, i, 0); /* { dg-error {argument 4 of 'svcmla_lane' must be an integer constant expression} } */ ++ ++ svcmla_lane (f16, f16, f16, 0, 0); ++ svcmla_lane (f16, f16, f16, 3, 0); ++ svcmla_lane (f16, f16, f16, 4, 0); /* { dg-error {passing 4 to argument 4 of 'svcmla_lane', which expects a value in the range \[0, 3\]} } */ ++ svcmla_lane (f16, f16, f16, -1, 0); /* { dg-error {passing -1 to argument 4 of 'svcmla_lane', which expects a value in the range \[0, 3\]} } */ ++ ++ svcmla_lane (f32, f32, f32, 0, 0); ++ svcmla_lane (f32, f32, f32, 1, 0); ++ svcmla_lane (f32, f32, f32, 2, 0); /* { dg-error {passing 2 to argument 4 of 'svcmla_lane', which expects a value in the range \[0, 1\]} } */ ++ svcmla_lane (f32, f32, f32, -1, 0); /* { dg-error {passing -1 to argument 4 of 'svcmla_lane', which expects a value in the range \[0, 1\]} } */ ++ ++ svcmla_lane (f32, f32, f32, 0, -90); /* { dg-error {passing -90 to argument 5 of 'svcmla_lane', which expects 0, 90, 180 or 270} } */ ++ svcmla_lane (f32, f32, f32, 0, 0); ++ svcmla_lane (f32, f32, f32, 0, 1); /* { dg-error {passing 1 to argument 5 of 'svcmla_lane', which expects 0, 90, 180 or 270} } */ ++ svcmla_lane (f32, f32, f32, 0, 90); ++ svcmla_lane (f32, f32, f32, 0, 180); ++ svcmla_lane (f32, f32, f32, 0, 270); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_opt_n_1.c +new file mode 100644 +index 000000000..c4a80e9da +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_opt_n_1.c +@@ -0,0 +1,34 @@ ++/* { dg-do compile } */ ++ ++#include ++ ++void ++f1 (svbool_t pg, svint8_t s8, svuint8_t u8, ++ svint16_t s16, svuint16_t u16, svfloat16_t f16) ++{ ++ svmla_x (pg, u8, u8); /* { dg-error {too few arguments to function 'svmla_x'} } */ ++ svmla_x (pg, u8, u8, u8, u8); /* { dg-error {too many arguments to function 'svmla_x'} } */ ++ svmla_x (u8, u8, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 1 of 'svmla_x', which expects 'svbool_t'} } */ ++ svmla_x (pg, pg, pg, pg); /* { dg-error {'svmla_x' has no form that takes 'svbool_t' arguments} } */ ++ svmla_x (pg, 1, u8, u8); /* { dg-error {passing 'int' to argument 2 of 'svmla_x', which expects an SVE vector type} } */ ++ svmla_x (pg, u8, s8, u8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ ++ svmla_x (pg, u8, u8, u8); ++ svmla_x (pg, u8, s16, u8); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ ++ svmla_x (pg, u8, u16, u8); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ ++ svmla_x (pg, u8, f16, u8); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ ++ svmla_x (pg, u8, pg, u8); /* { dg-error {passing 'svbool_t' to argument 3 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ ++ svmla_x (pg, u8, 0, u8); /* { dg-error {passing 'int' to argument 3 of 'svmla_x', which expects an SVE vector type} } */ ++ svmla_x (pg, u8, u8, s8); /* { dg-error {passing 'svint8_t' to argument 4 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ ++ svmla_x (pg, u8, u8, s16); /* { dg-error {passing 'svint16_t' to argument 4 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ ++ svmla_x (pg, u8, u8, u16); /* { dg-error {passing 'svuint16_t' to argument 4 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ ++ svmla_x (pg, u8, u8, f16); /* { dg-error {passing 'svfloat16_t' to argument 4 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ ++ svmla_x (pg, u8, u8, pg); /* { dg-error {passing 'svbool_t' to argument 4 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ ++ svmla_x (pg, u8, u8, 0); ++ ++ svmla_x (pg, f16, s16, f16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmla_x', but previous arguments had type 'svfloat16_t'} } */ ++ svmla_x (pg, f16, u16, f16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svmla_x', but previous arguments had type 'svfloat16_t'} } */ ++ svmla_x (pg, f16, f16, s16); /* { dg-error {passing 'svint16_t' to argument 4 of 'svmla_x', but previous arguments had type 'svfloat16_t'} } */ ++ svmla_x (pg, f16, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 4 of 'svmla_x', but previous arguments had type 'svfloat16_t'} } */ ++ svmla_x (pg, f16, f16, f16); ++ svmla_x (pg, f16, f16, 1); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_lane_1.c +new file mode 100644 +index 000000000..e81552b64 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_lane_1.c +@@ -0,0 +1,63 @@ ++/* { dg-do compile } */ ++ ++#include ++ ++void ++f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16, ++ svint32_t s32, svuint32_t u32, svint64_t s64, svuint64_t u64, ++ svfloat32_t f32, int i) ++{ ++ svdot_lane (u32, u8, u8); /* { dg-error {too few arguments to function 'svdot_lane'} } */ ++ svdot_lane (u32, u8, u8, 0, 0); /* { dg-error {too many arguments to function 'svdot_lane'} } */ ++ svdot_lane (0, u8, u8, 0); /* { dg-error {passing 'int' to argument 1 of 'svdot_lane', which expects an SVE vector type} } */ ++ svdot_lane (pg, u8, u8, 0); /* { dg-error {'svdot_lane' has no form that takes 'svbool_t' arguments} } */ ++ svdot_lane (u8, u8, u8, 0); /* { dg-error {'svdot_lane' has no form that takes 'svuint8_t' arguments} } */ ++ svdot_lane (f32, u8, u8, 0); /* { dg-error {'svdot_lane' has no form that takes 'svfloat32_t' arguments} } */ ++ svdot_lane (u32, u8, u8, 0); ++ svdot_lane (u32, 0, u8, 0); /* { dg-error {passing 'int' to argument 2 of 'svdot_lane', which expects an SVE vector type} } */ ++ svdot_lane (u32, u8, 0, 0); /* { dg-error {passing 'int' to argument 3 of 'svdot_lane', which expects an SVE vector type} } */ ++ ++ svdot_lane (s32, s8, s8, 0); ++ svdot_lane (s32, u8, s8, 0); /* { dg-error {arguments 1 and 2 of 'svdot_lane' must have the same signedness, but the values passed here have type 'svint32_t' and 'svuint8_t' respectively} } */ ++ svdot_lane (s32, s8, u8, 0); /* { dg-error {arguments 1 and 3 of 'svdot_lane' must have the same signedness, but the values passed here have type 'svint32_t' and 'svuint8_t' respectively} } */ ++ svdot_lane (s32, s32, s32, 0); /* { dg-error {passing 'svint32_t' instead of the expected 'svint8_t' to argument 2 of 'svdot_lane', after passing 'svint32_t' to argument 1} } */ ++ ++ svdot_lane (u32, u8, u8, 0); ++ svdot_lane (u32, s8, u8, 0); /* { dg-error {arguments 1 and 2 of 'svdot_lane' must have the same signedness, but the values passed here have type 'svuint32_t' and 'svint8_t' respectively} } */ ++ svdot_lane (u32, u8, s8, 0); /* { dg-error {arguments 1 and 3 of 'svdot_lane' must have the same signedness, but the values passed here have type 'svuint32_t' and 'svint8_t' respectively} } */ ++ svdot_lane (u32, u32, u32, 0); /* { dg-error {passing 'svuint32_t' instead of the expected 'svuint8_t' to argument 2 of 'svdot_lane', after passing 'svuint32_t' to argument 1} } */ ++ ++ svdot_lane (s64, s16, s16, 0); ++ svdot_lane (s64, u16, s16, 0); /* { dg-error {arguments 1 and 2 of 'svdot_lane' must have the same signedness, but the values passed here have type 'svint64_t' and 'svuint16_t' respectively} } */ ++ svdot_lane (s64, s16, u16, 0); /* { dg-error {arguments 1 and 3 of 'svdot_lane' must have the same signedness, but the values passed here have type 'svint64_t' and 'svuint16_t' respectively} } */ ++ svdot_lane (s64, s64, s64, 0); /* { dg-error {passing 'svint64_t' instead of the expected 'svint16_t' to argument 2 of 'svdot_lane', after passing 'svint64_t' to argument 1} } */ ++ ++ svdot_lane (u64, u16, u16, 0); ++ svdot_lane (u64, s16, u16, 0); /* { dg-error {arguments 1 and 2 of 'svdot_lane' must have the same signedness, but the values passed here have type 'svuint64_t' and 'svint16_t' respectively} } */ ++ svdot_lane (u64, u16, s16, 0); /* { dg-error {arguments 1 and 3 of 'svdot_lane' must have the same signedness, but the values passed here have type 'svuint64_t' and 'svint16_t' respectively} } */ ++ svdot_lane (u64, u64, u64, 0); /* { dg-error {passing 'svuint64_t' instead of the expected 'svuint16_t' to argument 2 of 'svdot_lane', after passing 'svuint64_t' to argument 1} } */ ++ ++ svdot_lane (s32, s8, s8, i); /* { dg-error {argument 4 of 'svdot_lane' must be an integer constant expression} } */ ++ svdot_lane (s32, s8, s8, 0); ++ svdot_lane (s32, s8, s8, 3); ++ svdot_lane (s32, s8, s8, 4); /* { dg-error {passing 4 to argument 4 of 'svdot_lane', which expects a value in the range \[0, 3\]} } */ ++ svdot_lane (s32, s8, s8, -1); /* { dg-error {passing -1 to argument 4 of 'svdot_lane', which expects a value in the range \[0, 3\]} } */ ++ ++ svdot_lane (u32, u8, u8, i); /* { dg-error {argument 4 of 'svdot_lane' must be an integer constant expression} } */ ++ svdot_lane (u32, u8, u8, 0); ++ svdot_lane (u32, u8, u8, 3); ++ svdot_lane (u32, u8, u8, 4); /* { dg-error {passing 4 to argument 4 of 'svdot_lane', which expects a value in the range \[0, 3\]} } */ ++ svdot_lane (u32, u8, u8, -1); /* { dg-error {passing -1 to argument 4 of 'svdot_lane', which expects a value in the range \[0, 3\]} } */ ++ ++ svdot_lane (s64, s16, s16, i); /* { dg-error {argument 4 of 'svdot_lane' must be an integer constant expression} } */ ++ svdot_lane (s64, s16, s16, 0); ++ svdot_lane (s64, s16, s16, 1); ++ svdot_lane (s64, s16, s16, 2); /* { dg-error {passing 2 to argument 4 of 'svdot_lane', which expects a value in the range \[0, 1\]} } */ ++ svdot_lane (s64, s16, s16, -1); /* { dg-error {passing -1 to argument 4 of 'svdot_lane', which expects a value in the range \[0, 1\]} } */ ++ ++ svdot_lane (u64, u16, u16, i); /* { dg-error {argument 4 of 'svdot_lane' must be an integer constant expression} } */ ++ svdot_lane (u64, u16, u16, 0); ++ svdot_lane (u64, u16, u16, 1); ++ svdot_lane (u64, u16, u16, 2); /* { dg-error {passing 2 to argument 4 of 'svdot_lane', which expects a value in the range \[0, 1\]} } */ ++ svdot_lane (u64, u16, u16, -1); /* { dg-error {passing -1 to argument 4 of 'svdot_lane', which expects a value in the range \[0, 1\]} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_opt_n_1.c +new file mode 100644 +index 000000000..b41e6fcce +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_opt_n_1.c +@@ -0,0 +1,15 @@ ++/* { dg-do compile } */ ++ ++#include ++ ++svint32_t ++f1 (svuint32_t u32, svuint8_t u8, svint8_t s8) ++{ ++ svdot_u32 (u32); /* { dg-error {too few arguments to function 'svdot_u32'} } */ ++ svdot_u32 (u32, u8, u8, u32); /* { dg-error {too many arguments to function 'svdot_u32'} } */ ++ svdot_u32 (u32, u32, u8); /* { dg-error {incompatible type for argument 2 of 'svdot_u32'} } */ ++ svdot_u32 (u32, s8, u8); /* { dg-error {incompatible type for argument 2 of 'svdot_u32'} } */ ++ svdot_u32 (u32, u8, u32); /* { dg-error {incompatible type for argument 3 of 'svdot_u32'} } */ ++ svdot_u32 (u32, u8, s8); /* { dg-error {incompatible type for argument 3 of 'svdot_u32'} } */ ++ return svdot_u32 (u32, u8, u8); /* { dg-error {incompatible types when returning type 'svuint32_t' but 'svint32_t' was expected} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_opt_n_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_opt_n_2.c +new file mode 100644 +index 000000000..fee4096fe +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_opt_n_2.c +@@ -0,0 +1,21 @@ ++/* { dg-do compile } */ ++ ++#include ++ ++void ++f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svuint32_t u32, ++ svfloat32_t f32) ++{ ++ svdot (u32, u8); /* { dg-error {too few arguments to function 'svdot'} } */ ++ svdot (u32, u8, u8, u8); /* { dg-error {too many arguments to function 'svdot'} } */ ++ svdot (0, u8, u8); /* { dg-error {passing 'int' to argument 1 of 'svdot', which expects an SVE vector type} } */ ++ svdot (pg, u8, u8); /* { dg-error {'svdot' has no form that takes 'svbool_t' arguments} } */ ++ svdot (u8, u8, u8); /* { dg-error {'svdot' has no form that takes 'svuint8_t' arguments} } */ ++ svdot (f32, u8, u8); /* { dg-error {'svdot' has no form that takes 'svfloat32_t' arguments} } */ ++ svdot (u32, u8, u8); ++ svdot (u32, 0, u8); /* { dg-error {passing 'int' to argument 2 of 'svdot', which expects an SVE vector type} } */ ++ svdot (u32, s8, u8); /* { dg-error {arguments 1 and 2 of 'svdot' must have the same signedness, but the values passed here have type 'svuint32_t' and 'svint8_t' respectively} } */ ++ svdot (u32, u8, 0); ++ svdot (u32, u8, s8); /* { dg-error {arguments 1 and 3 of 'svdot' must have the same signedness, but the values passed here have type 'svuint32_t' and 'svint8_t' respectively} } */ ++ svdot (u32, u32, u32); /* { dg-error {passing 'svuint32_t' instead of the expected 'svuint8_t' to argument 2 of 'svdot', after passing 'svuint32_t' to argument 1} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_rotate_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_rotate_1.c +new file mode 100644 +index 000000000..f340e3d1e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_rotate_1.c +@@ -0,0 +1,26 @@ ++/* { dg-do compile } */ ++ ++#include ++ ++void ++f1 (svbool_t pg, svfloat32_t f32, svfloat64_t f64, svint32_t s32, int i) ++{ ++ svcmla_x (pg, f32, f32, f32); /* { dg-error {too few arguments to function 'svcmla_x'} } */ ++ svcmla_x (pg, f32, f32, f32, 90, 90); /* { dg-error {too many arguments to function 'svcmla_x'} } */ ++ svcmla_x (f32, f32, f32, f32, 90); /* { dg-error {passing 'svfloat32_t' to argument 1 of 'svcmla_x', which expects 'svbool_t'} } */ ++ svcmla_x (pg, pg, pg, pg, 90); /* { dg-error {'svcmla_x' has no form that takes 'svbool_t' arguments} } */ ++ svcmla_x (pg, s32, s32, s32, 90); /* { dg-error {'svcmla_x' has no form that takes 'svint32_t' arguments} } */ ++ svcmla_x (pg, 1, f32, f32, 90); /* { dg-error {passing 'int' to argument 2 of 'svcmla_x', which expects an SVE vector type} } */ ++ svcmla_x (pg, f32, 1, f32, 90); /* { dg-error {passing 'int' to argument 3 of 'svcmla_x', which expects an SVE vector type} } */ ++ svcmla_x (pg, f32, f32, 1, 90); /* { dg-error {passing 'int' to argument 4 of 'svcmla_x', which expects an SVE vector type} } */ ++ svcmla_x (pg, f32, f64, f32, 90); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svcmla_x', but previous arguments had type 'svfloat32_t'} } */ ++ svcmla_x (pg, f32, f32, f64, 90); /* { dg-error {passing 'svfloat64_t' to argument 4 of 'svcmla_x', but previous arguments had type 'svfloat32_t'} } */ ++ svcmla_x (pg, f32, f32, f32, s32); /* { dg-error {argument 5 of 'svcmla_x' must be an integer constant expression} } */ ++ svcmla_x (pg, f32, f32, f32, i); /* { dg-error {argument 5 of 'svcmla_x' must be an integer constant expression} } */ ++ svcmla_x (pg, f32, f32, f32, -90); /* { dg-error {passing -90 to argument 5 of 'svcmla_x', which expects 0, 90, 180 or 270} } */ ++ svcmla_x (pg, f32, f32, f32, 0); ++ svcmla_x (pg, f32, f32, f32, 1); /* { dg-error {passing 1 to argument 5 of 'svcmla_x', which expects 0, 90, 180 or 270} } */ ++ svcmla_x (pg, f32, f32, f32, 90); ++ svcmla_x (pg, f32, f32, f32, 180); ++ svcmla_x (pg, f32, f32, f32, 270); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uintq_intq_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uintq_intq_1.c +new file mode 100644 +index 000000000..f52fb39bf +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uintq_intq_1.c +@@ -0,0 +1,37 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-march=armv8.6-a+sve+i8mm" } */ ++ ++#include ++ ++svuint32_t ++f1 (svint32_t s32, svuint8_t u8, svint8_t s8, svuint32_t u32) ++{ ++ svusmmla_s32 (s32); /* { dg-error {too few arguments to function 'svusmmla_s32'} } */ ++ svusmmla_s32 (s32, u8, s8, u32); /* { dg-error {too many arguments to function 'svusmmla_s32'} } */ ++ svusmmla_s32 (s32, u32, s8); /* { dg-error {incompatible type for argument 2 of 'svusmmla_s32'} } */ ++ svusmmla_s32 (s32, s8, s8); /* { dg-error {incompatible type for argument 2 of 'svusmmla_s32'} } */ ++ svusmmla_s32 (s32, u8, u8); /* { dg-error {incompatible type for argument 3 of 'svusmmla_s32'} } */ ++ svusmmla_s32 (s32, u8, s32); /* { dg-error {incompatible type for argument 3 of 'svusmmla_s32'} } */ ++ svusmmla_s32 (s32, u8, 0); /* { dg-error {incompatible type for argument 3 of 'svusmmla_s32'} } */ ++ svusmmla_s32 (s32, u8, s8); ++ return svusmmla_s32 (s32, u8, s8); /* { dg-error {incompatible types when returning type 'svint32_t' but 'svuint32_t' was expected} } */ ++} ++ ++void ++f2 (svbool_t pg, svint8_t s8, svuint8_t u8, svuint32_t u32, ++ svint32_t s32, svfloat32_t f32) ++{ ++ svusmmla (s32, u8); /* { dg-error {too few arguments to function 'svusmmla'} } */ ++ svusmmla (s32, u8, s8, u8); /* { dg-error {too many arguments to function 'svusmmla'} } */ ++ svusmmla (0, u8, s8); /* { dg-error {passing 'int' to argument 1 of 'svusmmla', which expects an SVE vector type} } */ ++ svusmmla (pg, u8, s8); /* { dg-error {'svusmmla' has no form that takes 'svbool_t' arguments} } */ ++ svusmmla (u8, u8, s8); /* { dg-error {'svusmmla' has no form that takes 'svuint8_t' arguments} } */ ++ svusmmla (f32, u8, s8); /* { dg-error {'svusmmla' has no form that takes 'svfloat32_t' arguments} } */ ++ svusmmla (s32, u8, s8); ++ svusmmla (s32, 0, s8); /* { dg-error {passing 'int' to argument 2 of 'svusmmla', which expects an SVE vector type} } */ ++ svusmmla (s32, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svusmmla', which expects a vector of signed integers} } */ ++ svusmmla (s32, s8, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svusmmla', which expects a vector of unsigned integers} } */ ++ svusmmla (s32, u8, 0); /* { dg-error {passing 'int' to argument 3 of 'svusmmla', which expects an SVE vector type} } */ ++ svusmmla (s32, u8, s8); ++ svusmmla (s32, u32, u32); /* { dg-error {passing 'svuint32_t' instead of the expected 'svuint8_t' to argument 2 of 'svusmmla', after passing 'svint32_t' to argument 1} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uintq_intq_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uintq_intq_lane_1.c +new file mode 100644 +index 000000000..b40cfe9e8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uintq_intq_lane_1.c +@@ -0,0 +1,32 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-march=armv8.6-a+sve+i8mm" } */ ++ ++#include ++ ++void ++f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16, ++ svint32_t s32, svuint32_t u32, svint64_t s64, svuint64_t u64, ++ svfloat32_t f32, int i) ++{ ++ svusdot_lane (s32, u8, s8); /* { dg-error {too few arguments to function 'svusdot_lane'} } */ ++ svusdot_lane (s32, u8, s8, 0, 0); /* { dg-error {too many arguments to function 'svusdot_lane'} } */ ++ svusdot_lane (0, u8, s8, 0); /* { dg-error {passing 'int' to argument 1 of 'svusdot_lane', which expects an SVE vector type} } */ ++ svusdot_lane (pg, u8, s8, 0); /* { dg-error {'svusdot_lane' has no form that takes 'svbool_t' arguments} } */ ++ svusdot_lane (u8, u8, s8, 0); /* { dg-error {'svusdot_lane' has no form that takes 'svuint8_t' arguments} } */ ++ svusdot_lane (f32, u8, s8, 0); /* { dg-error {'svusdot_lane' has no form that takes 'svfloat32_t' arguments} } */ ++ svusdot_lane (u32, u8, s8, 0); /* { dg-error {'svusdot_lane' has no form that takes 'svuint32_t' arguments} } */ ++ svusdot_lane (s32, u8, s8, 0); ++ svusdot_lane (s32, 0, s8, 0); /* { dg-error {passing 'int' to argument 2 of 'svusdot_lane', which expects an SVE vector type} } */ ++ svusdot_lane (s32, u8, 0, 0); /* { dg-error {passing 'int' to argument 3 of 'svusdot_lane', which expects an SVE vector type} } */ ++ ++ svusdot_lane (s32, u8, s8, 0); ++ svusdot_lane (s32, s8, s8, 0); /* { dg-error {passing 'svint8_t' to argument 2 of 'svusdot_lane', which expects a vector of unsigned integers} } */ ++ svusdot_lane (s32, u8, u8, 0); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svusdot_lane', which expects a vector of signed integers} } */ ++ svusdot_lane (s32, s32, s32, 0); /* { dg-error {passing 'svint32_t' to argument 2 of 'svusdot_lane', which expects a vector of unsigned integers} } */ ++ ++ svusdot_lane (s32, u8, s8, i); /* { dg-error {argument 4 of 'svusdot_lane' must be an integer constant expression} } */ ++ svusdot_lane (s32, u8, s8, 0); ++ svusdot_lane (s32, u8, s8, 3); ++ svusdot_lane (s32, u8, s8, 4); /* { dg-error {passing 4 to argument 4 of 'svusdot_lane', which expects a value in the range \[0, 3\]} } */ ++ svusdot_lane (s32, u8, s8, -1); /* { dg-error {passing -1 to argument 4 of 'svusdot_lane', which expects a value in the range \[0, 3\]} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uintq_intq_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uintq_intq_opt_n_1.c +new file mode 100644 +index 000000000..896b80390 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uintq_intq_opt_n_1.c +@@ -0,0 +1,37 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-march=armv8.6-a+sve+i8mm" } */ ++ ++#include ++ ++svuint32_t ++f1 (svint32_t s32, svuint8_t u8, svint8_t s8, svuint32_t u32) ++{ ++ svusdot_s32 (s32); /* { dg-error {too few arguments to function 'svusdot_s32'} } */ ++ svusdot_s32 (s32, u8, s8, u32); /* { dg-error {too many arguments to function 'svusdot_s32'} } */ ++ svusdot_s32 (s32, u32, s8); /* { dg-error {incompatible type for argument 2 of 'svusdot_s32'} } */ ++ svusdot_s32 (s32, s8, s8); /* { dg-error {incompatible type for argument 2 of 'svusdot_s32'} } */ ++ svusdot_s32 (s32, u8, u8); /* { dg-error {incompatible type for argument 3 of 'svusdot_s32'} } */ ++ svusdot_s32 (s32, u8, s32); /* { dg-error {incompatible type for argument 3 of 'svusdot_s32'} } */ ++ svusdot_s32 (s32, u8, 0); /* { dg-error {incompatible type for argument 3 of 'svusdot_s32'} } */ ++ svusdot_s32 (s32, u8, s8); ++ return svusdot_s32 (s32, u8, s8); /* { dg-error {incompatible types when returning type 'svint32_t' but 'svuint32_t' was expected} } */ ++} ++ ++void ++f2 (svbool_t pg, svint8_t s8, svuint8_t u8, svuint32_t u32, ++ svint32_t s32, svfloat32_t f32) ++{ ++ svusdot (s32, u8); /* { dg-error {too few arguments to function 'svusdot'} } */ ++ svusdot (s32, u8, s8, u8); /* { dg-error {too many arguments to function 'svusdot'} } */ ++ svusdot (0, u8, s8); /* { dg-error {passing 'int' to argument 1 of 'svusdot', which expects an SVE vector type} } */ ++ svusdot (pg, u8, s8); /* { dg-error {'svusdot' has no form that takes 'svbool_t' arguments} } */ ++ svusdot (u8, u8, s8); /* { dg-error {'svusdot' has no form that takes 'svuint8_t' arguments} } */ ++ svusdot (f32, u8, s8); /* { dg-error {'svusdot' has no form that takes 'svfloat32_t' arguments} } */ ++ svusdot (s32, u8, s8); ++ svusdot (s32, 0, s8); /* { dg-error {passing 'int' to argument 2 of 'svusdot', which expects an SVE vector type} } */ ++ svusdot (s32, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svusdot', which expects a vector of signed integers} } */ ++ svusdot (s32, s8, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svusdot', which expects a vector of unsigned integers} } */ ++ svusdot (s32, u8, 0); ++ svusdot (s32, u8, s8); ++ svusdot (s32, u32, u32); /* { dg-error {passing 'svuint32_t' instead of the expected 'svuint8_t' to argument 2 of 'svusdot', after passing 'svint32_t' to argument 1} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/tmad_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/tmad_1.c +new file mode 100644 +index 000000000..8b98fc24d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/tmad_1.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile } */ ++ ++#include ++ ++void ++f1 (svbool_t pg, svfloat32_t f32, svfloat64_t f64, svint32_t s32, int i) ++{ ++ svtmad (f32, f32); /* { dg-error {too few arguments to function 'svtmad'} } */ ++ svtmad (f32, f32, 0, 0); /* { dg-error {too many arguments to function 'svtmad'} } */ ++ svtmad (pg, pg, 0); /* { dg-error {'svtmad' has no form that takes 'svbool_t' arguments} } */ ++ svtmad (s32, s32, 0); /* { dg-error {'svtmad' has no form that takes 'svint32_t' arguments} } */ ++ svtmad (1, f32, 0); /* { dg-error {passing 'int' to argument 1 of 'svtmad', which expects an SVE vector type} } */ ++ svtmad (f32, 1, 0); /* { dg-error {passing 'int' to argument 2 of 'svtmad', which expects an SVE vector type} } */ ++ svtmad (f32, f64, 0); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svtmad', but previous arguments had type 'svfloat32_t'} } */ ++ svtmad (f32, f32, s32); /* { dg-error {argument 3 of 'svtmad' must be an integer constant expression} } */ ++ svtmad (f32, f32, i); /* { dg-error {argument 3 of 'svtmad' must be an integer constant expression} } */ ++ svtmad (f32, f32, -1); /* { dg-error {passing -1 to argument 3 of 'svtmad', which expects a value in the range \[0, 7\]} } */ ++ svtmad (f32, f32, 0); ++ svtmad (f32, f32, 1); ++ svtmad (f32, f32, 7); ++ svtmad (f32, f32, 8); /* { dg-error {passing 8 to argument 3 of 'svtmad', which expects a value in the range \[0, 7\]} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_1.c +new file mode 100644 +index 000000000..70b2d9dd1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_1.c +@@ -0,0 +1,5 @@ ++/* { dg-do compile } */ ++ ++int svbool_t; /* { dg-message "note: previous declaration of 'svbool_t' was here" } */ ++ ++#pragma GCC aarch64 "arm_sve.h" /* { dg-error {'svbool_t' redeclared} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_10.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_10.c +new file mode 100644 +index 000000000..8278c1cad +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_10.c +@@ -0,0 +1,5 @@ ++/* { dg-do compile } */ ++ ++typedef struct svint8x2_t svint8x2_t; /* { dg-message "note: previous declaration of 'svint8x2_t' was here" } */ ++ ++#pragma GCC aarch64 "arm_sve.h" /* { dg-error {conflicting types for 'svint8x2_t'} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_11.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_11.c +new file mode 100644 +index 000000000..2147df72c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_11.c +@@ -0,0 +1,12 @@ ++/* { dg-do compile } */ ++ ++/* This isn't explicitly allowed or disallowed, but mustn't ICE. */ ++struct svint8x2_t; ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++void ++f (svint8x2_t *a, struct svint8x2_t *b) ++{ ++ *a = *b; /* { dg-error {dereferencing pointer to incomplete type} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_12.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_12.c +new file mode 100644 +index 000000000..1a6ccbd05 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_12.c +@@ -0,0 +1,12 @@ ++/* { dg-do compile } */ ++ ++/* This isn't explicitly allowed or disallowed, but mustn't ICE. */ ++struct svint8x2_t { int x; }; ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++void ++f (svint8x2_t *a, struct svint8x2_t *b) ++{ ++ *a = *b; /* { dg-error {incompatible types} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_13.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_13.c +new file mode 100644 +index 000000000..62bab1f84 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_13.c +@@ -0,0 +1,5 @@ ++/* { dg-do compile } */ ++ ++#pragma GCC aarch64 "arm_sve.h" /* { dg-message "note: previous declaration of 'svint8x2_t' was here" } */ ++ ++int svint8x2_t; /* { dg-error {'svint8x2_t' redeclared} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_14.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_14.c +new file mode 100644 +index 000000000..0f00db1fb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_14.c +@@ -0,0 +1,5 @@ ++/* { dg-do compile } */ ++ ++enum svpattern { FOO }; /* { dg-message "note: originally defined here" } */ ++ ++#pragma GCC aarch64 "arm_sve.h" /* { dg-error {redeclaration of 'enum svpattern'} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_15.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_15.c +new file mode 100644 +index 000000000..ea9721749 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_15.c +@@ -0,0 +1,8 @@ ++/* { dg-do compile } */ ++ ++#pragma GCC aarch64 "arm_sve.h" /* { dg-message "note: originally defined here" } */ ++ ++enum svpattern { FOO }; /* { dg-error {redeclaration of 'enum svpattern'} } */ ++enum foo { SV_ALL }; /* { dg-error {redeclaration of enumerator 'SV_ALL'} } */ ++typedef int SV_POW2; /* { dg-error {'SV_POW2' redeclared as different kind of symbol} } */ ++int SV_VL3; /* { dg-error {'SV_VL3' redeclared as different kind of symbol} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_16.c +new file mode 100644 +index 000000000..a59dabc6c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_16.c +@@ -0,0 +1,5 @@ ++/* { dg-do compile } */ ++ ++struct svpattern { int x; }; ++ ++#pragma GCC aarch64 "arm_sve.h" /* { dg-error {'svpattern' defined as wrong kind of tag} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_17.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_17.c +new file mode 100644 +index 000000000..027fdb2b9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_17.c +@@ -0,0 +1,5 @@ ++/* { dg-do compile } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++struct svpattern { int x; }; /* { dg-error {'svpattern' defined as wrong kind of tag} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_18.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_18.c +new file mode 100644 +index 000000000..b6706150b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_18.c +@@ -0,0 +1,5 @@ ++/* { dg-do compile } */ ++ ++int svpattern; /* OK in C. */ ++ ++#pragma GCC aarch64 "arm_sve.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_19.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_19.c +new file mode 100644 +index 000000000..c6379f762 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_19.c +@@ -0,0 +1,5 @@ ++/* { dg-do compile } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++int svpattern; /* OK in C. */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_2.c +new file mode 100644 +index 000000000..ffd86ae7b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_2.c +@@ -0,0 +1,5 @@ ++/* { dg-do compile } */ ++ ++int svint8_t; /* { dg-message "note: previous declaration of 'svint8_t' was here" } */ ++ ++#pragma GCC aarch64 "arm_sve.h" /* { dg-error {'svint8_t' redeclared} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_20.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_20.c +new file mode 100644 +index 000000000..3d770a956 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_20.c +@@ -0,0 +1,9 @@ ++/* { dg-do compile } */ ++ ++enum foo { SV_VL4 }; ++typedef int SV_POW2; ++int SV_ALL; ++ ++#pragma GCC aarch64 "arm_sve.h" /* { dg-error {redeclaration of enumerator 'SV_VL4'} } */ ++/* { dg-error {'SV_POW2' redeclared as different kind of symbol} "" { target *-*-* } .-1 } */ ++/* { dg-error {'SV_ALL' redeclared as different kind of symbol} "" { target *-*-* } .-2 } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_3.c +new file mode 100644 +index 000000000..f42dd9680 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_3.c +@@ -0,0 +1,5 @@ ++/* { dg-do compile } */ ++ ++int svuint16_t; /* { dg-message "note: previous declaration of 'svuint16_t' was here" } */ ++ ++#pragma GCC aarch64 "arm_sve.h" /* { dg-error {'svuint16_t' redeclared} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_4.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_4.c +new file mode 100644 +index 000000000..91c95a1f5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_4.c +@@ -0,0 +1,5 @@ ++/* { dg-do compile } */ ++ ++int svfloat32_t; /* { dg-message "note: previous declaration of 'svfloat32_t' was here" } */ ++ ++#pragma GCC aarch64 "arm_sve.h" /* { dg-error {'svfloat32_t' redeclared} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_5.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_5.c +new file mode 100644 +index 000000000..3cb6b8a1c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_5.c +@@ -0,0 +1,5 @@ ++/* { dg-do compile } */ ++ ++typedef int svbool_t; /* { dg-message "note: previous declaration of 'svbool_t' was here" } */ ++ ++#pragma GCC aarch64 "arm_sve.h" /* { dg-error {conflicting types for 'svbool_t'} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_6.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_6.c +new file mode 100644 +index 000000000..c051897b6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_6.c +@@ -0,0 +1,6 @@ ++/* { dg-do compile } */ ++ ++typedef __SVBool_t svbool_t; /* { dg-message "note: previous declaration of 'svbool_t' was here" } */ ++ ++#pragma GCC aarch64 "arm_sve.h" /* { dg-error {redefinition of typedef 'svbool_t'} } */ ++ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_7.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_7.c +new file mode 100644 +index 000000000..fd4063154 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_7.c +@@ -0,0 +1,7 @@ ++/* { dg-do compile } */ ++/* { dg-options "-std=gnu90" } */ ++ ++typedef __SVBool_t svbool_t; ++ ++/* Without -pedantic-errors this should compile. */ ++#pragma GCC aarch64 "arm_sve.h" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_8.c +new file mode 100644 +index 000000000..41614a304 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_8.c +@@ -0,0 +1,5 @@ ++/* { dg-do compile } */ ++ ++int svint8x2_t; /* { dg-message "note: previous declaration of 'svint8x2_t' was here" } */ ++ ++#pragma GCC aarch64 "arm_sve.h" /* { dg-error {'svint8x2_t' redeclared} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_9.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_9.c +new file mode 100644 +index 000000000..83b6855df +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_9.c +@@ -0,0 +1,5 @@ ++/* { dg-do compile } */ ++ ++typedef int svint8x2_t; /* { dg-message "note: previous declaration of 'svint8x2_t' was here" } */ ++ ++#pragma GCC aarch64 "arm_sve.h" /* { dg-error {conflicting types for 'svint8x2_t'} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_1.c +new file mode 100644 +index 000000000..eef85a01d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_1.c +@@ -0,0 +1,21 @@ ++/* { dg-do compile } */ ++ ++#include ++ ++void ++f1 (svbool_t pg, svint32_t s32, svuint32_t u32, svfloat32_t f32) ++{ ++ svabs_m (s32, pg); /* { dg-error {too few arguments to function 'svabs_m'} } */ ++ svabs_m (s32, pg, s32, s32); /* { dg-error {too many arguments to function 'svabs_m'} } */ ++ svabs_m (0, pg, s32); /* { dg-error {passing 'int' to argument 1 of 'svabs_m', which expects an SVE vector type} } */ ++ svabs_m (s32, s32, s32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svabs_m', which expects 'svbool_t'} } */ ++ svabs_m (s32, 0, s32); /* { dg-error {passing 'int' to argument 2 of 'svabs_m', which expects 'svbool_t'} } */ ++ svabs_m (s32, pg, s32); ++ svabs_m (u32, pg, u32); /* { dg-error {'svabs_m' has no form that takes 'svuint32_t' arguments} } */ ++ svabs_m (f32, pg, f32); ++ svabs_m (s32, pg, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svabs_m', but previous arguments had type 'svint32_t'} } */ ++ svabs_m (s32, pg, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svabs_m', but previous arguments had type 'svint32_t'} } */ ++ svabs_m (s32, pg, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svabs_m', but previous arguments had type 'svint32_t'} } */ ++ svabs_m (pg, pg, s32); /* { dg-error {passing 'svint32_t' to argument 3 of 'svabs_m', but previous arguments had type 'svbool_t'} } */ ++ svabs_m (pg, pg, pg); /* { dg-error {'svabs_m' has no form that takes 'svbool_t' arguments} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_2.c +new file mode 100644 +index 000000000..e94673a66 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_2.c +@@ -0,0 +1,15 @@ ++/* { dg-do compile } */ ++ ++#include ++ ++void ++f1 (svbool_t pg, svint8_t s8, svuint8_t u8) ++{ ++ svabs_x (pg); /* { dg-error {too few arguments to function 'svabs_x'} } */ ++ svabs_x (pg, s8, s8); /* { dg-error {too many arguments to function 'svabs_x'} } */ ++ svabs_x (s8, s8); /* { dg-error {passing 'svint8_t' to argument 1 of 'svabs_x', which expects 'svbool_t'} } */ ++ svabs_x (pg, pg); /* { dg-error {'svabs_x' has no form that takes 'svbool_t' arguments} } */ ++ svabs_x (pg, 1); /* { dg-error {passing 'int' to argument 2 of 'svabs_x', which expects an SVE vector type} } */ ++ svabs_x (pg, s8); ++ svabs_x (pg, u8); /* { dg-error {'svabs_x' has no form that takes 'svuint8_t' arguments} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convert_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convert_1.c +new file mode 100644 +index 000000000..caa4e623d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convert_1.c +@@ -0,0 +1,73 @@ ++#include ++ ++void ++test (svbool_t pg, svint8_t s8, svuint8_t u8, ++ svint16_t s16, svuint16_t u16, svint32_t s32, svuint32_t u32, ++ svint64_t s64, svuint64_t u64, svfloat16_t f16, svfloat32_t f32, ++ svfloat64_t f64) ++{ ++ svcvt_f64_x (pg); /* { dg-error {too few arguments to function 'svcvt_f64_x'} } */ ++ svcvt_f64_x (pg, s32, 0); /* { dg-error {too many arguments to function 'svcvt_f64_x'} } */ ++ svcvt_f64_x (s32, s32); /* { dg-error {passing 'svint32_t' to argument 1 of 'svcvt_f64_x', which expects 'svbool_t'} } */ ++ svcvt_f64_x (pg, 0); /* { dg-error {passing 'int' to argument 2 of 'svcvt_f64_x', which expects an SVE vector type} } */ ++ ++ svcvt_f64_x (pg, s8); /* { dg-error {'svcvt_f64_x' has no form that takes 'svint8_t' arguments} } */ ++ svcvt_f64_x (pg, s16); /* { dg-error {'svcvt_f64_x' has no form that takes 'svint16_t' arguments} } */ ++ svcvt_f64_x (pg, s32); ++ svcvt_f64_x (pg, s64); ++ svcvt_f64_x (pg, u8); /* { dg-error {'svcvt_f64_x' has no form that takes 'svuint8_t' arguments} } */ ++ svcvt_f64_x (pg, u16); /* { dg-error {'svcvt_f64_x' has no form that takes 'svuint16_t' arguments} } */ ++ svcvt_f64_x (pg, u32); ++ svcvt_f64_x (pg, u64); ++ svcvt_f64_x (pg, f16); ++ svcvt_f64_x (pg, f32); ++ svcvt_f64_x (pg, f64); /* { dg-error {'svcvt_f64_x' has no form that takes 'svfloat64_t' arguments} } */ ++ ++ svcvt_f32_x (pg, s8); /* { dg-error {'svcvt_f32_x' has no form that takes 'svint8_t' arguments} } */ ++ svcvt_f32_x (pg, s16); /* { dg-error {'svcvt_f32_x' has no form that takes 'svint16_t' arguments} } */ ++ svcvt_f32_x (pg, s32); ++ svcvt_f32_x (pg, s64); ++ svcvt_f32_x (pg, u8); /* { dg-error {'svcvt_f32_x' has no form that takes 'svuint8_t' arguments} } */ ++ svcvt_f32_x (pg, u16); /* { dg-error {'svcvt_f32_x' has no form that takes 'svuint16_t' arguments} } */ ++ svcvt_f32_x (pg, u32); ++ svcvt_f32_x (pg, u64); ++ svcvt_f32_x (pg, f16); ++ svcvt_f32_x (pg, f32); /* { dg-error {'svcvt_f32_x' has no form that takes 'svfloat32_t' arguments} } */ ++ svcvt_f32_x (pg, f64); ++ ++ svcvt_f16_x (pg, s8); /* { dg-error {'svcvt_f16_x' has no form that takes 'svint8_t' arguments} } */ ++ svcvt_f16_x (pg, s16); ++ svcvt_f16_x (pg, s32); ++ svcvt_f16_x (pg, s64); ++ svcvt_f16_x (pg, u8); /* { dg-error {'svcvt_f16_x' has no form that takes 'svuint8_t' arguments} } */ ++ svcvt_f16_x (pg, u16); ++ svcvt_f16_x (pg, u32); ++ svcvt_f16_x (pg, u64); ++ svcvt_f16_x (pg, f16); /* { dg-error {'svcvt_f16_x' has no form that takes 'svfloat16_t' arguments} } */ ++ svcvt_f16_x (pg, f32); ++ svcvt_f16_x (pg, f64); ++ ++ svcvt_s64_x (pg, f16); ++ svcvt_s64_x (pg, f32); ++ svcvt_s64_x (pg, f64); ++ ++ svcvt_s32_x (pg, f16); ++ svcvt_s32_x (pg, f32); ++ svcvt_s32_x (pg, f64); ++ ++ svcvt_s16_x (pg, f16); ++ svcvt_s16_x (pg, f32); /* { dg-error {'svcvt_s16_x' has no form that takes 'svfloat32_t' arguments} } */ ++ svcvt_s16_x (pg, f64); /* { dg-error {'svcvt_s16_x' has no form that takes 'svfloat64_t' arguments} } */ ++ ++ svcvt_u64_x (pg, f16); ++ svcvt_u64_x (pg, f32); ++ svcvt_u64_x (pg, f64); ++ ++ svcvt_u32_x (pg, f16); ++ svcvt_u32_x (pg, f32); ++ svcvt_u32_x (pg, f64); ++ ++ svcvt_u16_x (pg, f16); ++ svcvt_u16_x (pg, f32); /* { dg-error {'svcvt_u16_x' has no form that takes 'svfloat32_t' arguments} } */ ++ svcvt_u16_x (pg, f64); /* { dg-error {'svcvt_u16_x' has no form that takes 'svfloat64_t' arguments} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convert_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convert_2.c +new file mode 100644 +index 000000000..ddbd93b69 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convert_2.c +@@ -0,0 +1,76 @@ ++#include ++ ++void ++test (svbool_t pg, svint8_t s8, svuint8_t u8, ++ svint16_t s16, svuint16_t u16, svint32_t s32, svuint32_t u32, ++ svint64_t s64, svuint64_t u64, svfloat16_t f16, svfloat32_t f32, ++ svfloat64_t f64) ++{ ++ svcvt_f64_m (f64, pg); /* { dg-error {too few arguments to function 'svcvt_f64_m'} } */ ++ svcvt_f64_m (f64, pg, s32, 0); /* { dg-error {too many arguments to function 'svcvt_f64_m'} } */ ++ svcvt_f64_m (f32, pg, s32); /* { dg-error {passing 'svfloat32_t' to argument 1 of 'svcvt_f64_m', which expects 'svfloat64_t'} } */ ++ svcvt_f64_m (0, pg, s32); /* { dg-error {passing 'int' to argument 1 of 'svcvt_f64_m', which expects 'svfloat64_t'} } */ ++ svcvt_f64_m (pg, pg, s32); /* { dg-error {passing 'svbool_t' to argument 1 of 'svcvt_f64_m', which expects 'svfloat64_t'} } */ ++ svcvt_f64_m (f64, s32, s32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svcvt_f64_m', which expects 'svbool_t'} } */ ++ svcvt_f64_m (f64, pg, 0); /* { dg-error {passing 'int' to argument 3 of 'svcvt_f64_m', which expects an SVE vector type} } */ ++ ++ svcvt_f64_m (f64, pg, s8); /* { dg-error {'svcvt_f64_m' has no form that takes 'svint8_t' arguments} } */ ++ svcvt_f64_m (f64, pg, s16); /* { dg-error {'svcvt_f64_m' has no form that takes 'svint16_t' arguments} } */ ++ svcvt_f64_m (f64, pg, s32); ++ svcvt_f64_m (f64, pg, s64); ++ svcvt_f64_m (f64, pg, u8); /* { dg-error {'svcvt_f64_m' has no form that takes 'svuint8_t' arguments} } */ ++ svcvt_f64_m (f64, pg, u16); /* { dg-error {'svcvt_f64_m' has no form that takes 'svuint16_t' arguments} } */ ++ svcvt_f64_m (f64, pg, u32); ++ svcvt_f64_m (f64, pg, u64); ++ svcvt_f64_m (f64, pg, f16); ++ svcvt_f64_m (f64, pg, f32); ++ svcvt_f64_m (f64, pg, f64); /* { dg-error {'svcvt_f64_m' has no form that takes 'svfloat64_t' arguments} } */ ++ ++ svcvt_f32_m (f32, pg, s8); /* { dg-error {'svcvt_f32_m' has no form that takes 'svint8_t' arguments} } */ ++ svcvt_f32_m (f32, pg, s16); /* { dg-error {'svcvt_f32_m' has no form that takes 'svint16_t' arguments} } */ ++ svcvt_f32_m (f32, pg, s32); ++ svcvt_f32_m (f32, pg, s64); ++ svcvt_f32_m (f32, pg, u8); /* { dg-error {'svcvt_f32_m' has no form that takes 'svuint8_t' arguments} } */ ++ svcvt_f32_m (f32, pg, u16); /* { dg-error {'svcvt_f32_m' has no form that takes 'svuint16_t' arguments} } */ ++ svcvt_f32_m (f32, pg, u32); ++ svcvt_f32_m (f32, pg, u64); ++ svcvt_f32_m (f32, pg, f16); ++ svcvt_f32_m (f32, pg, f32); /* { dg-error {'svcvt_f32_m' has no form that takes 'svfloat32_t' arguments} } */ ++ svcvt_f32_m (f32, pg, f64); ++ ++ svcvt_f16_m (f16, pg, s8); /* { dg-error {'svcvt_f16_m' has no form that takes 'svint8_t' arguments} } */ ++ svcvt_f16_m (f16, pg, s16); ++ svcvt_f16_m (f16, pg, s32); ++ svcvt_f16_m (f16, pg, s64); ++ svcvt_f16_m (f16, pg, u8); /* { dg-error {'svcvt_f16_m' has no form that takes 'svuint8_t' arguments} } */ ++ svcvt_f16_m (f16, pg, u16); ++ svcvt_f16_m (f16, pg, u32); ++ svcvt_f16_m (f16, pg, u64); ++ svcvt_f16_m (f16, pg, f16); /* { dg-error {'svcvt_f16_m' has no form that takes 'svfloat16_t' arguments} } */ ++ svcvt_f16_m (f16, pg, f32); ++ svcvt_f16_m (f16, pg, f64); ++ ++ svcvt_s64_m (s64, pg, f16); ++ svcvt_s64_m (s64, pg, f32); ++ svcvt_s64_m (s64, pg, f64); ++ ++ svcvt_s32_m (s32, pg, f16); ++ svcvt_s32_m (s32, pg, f32); ++ svcvt_s32_m (s32, pg, f64); ++ ++ svcvt_s16_m (s16, pg, f16); ++ svcvt_s16_m (s16, pg, f32); /* { dg-error {'svcvt_s16_m' has no form that takes 'svfloat32_t' arguments} } */ ++ svcvt_s16_m (s16, pg, f64); /* { dg-error {'svcvt_s16_m' has no form that takes 'svfloat64_t' arguments} } */ ++ ++ svcvt_u64_m (u64, pg, f16); ++ svcvt_u64_m (u64, pg, f32); ++ svcvt_u64_m (u64, pg, f64); ++ ++ svcvt_u32_m (u32, pg, f16); ++ svcvt_u32_m (u32, pg, f32); ++ svcvt_u32_m (u32, pg, f64); ++ ++ svcvt_u16_m (u16, pg, f16); ++ svcvt_u16_m (u16, pg, f32); /* { dg-error {'svcvt_u16_m' has no form that takes 'svfloat32_t' arguments} } */ ++ svcvt_u16_m (u16, pg, f64); /* { dg-error {'svcvt_u16_m' has no form that takes 'svfloat64_t' arguments} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_to_uint_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_to_uint_1.c +new file mode 100644 +index 000000000..888b52513 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_to_uint_1.c +@@ -0,0 +1,24 @@ ++/* { dg-do compile } */ ++ ++#include ++ ++void ++f1 (svbool_t pg, svint32_t s32, svuint32_t u32, svfloat32_t f32, ++ svint64_t s64, svuint64_t u64) ++{ ++ svclz_m (u32, pg); /* { dg-error {too few arguments to function 'svclz_m'} } */ ++ svclz_m (u32, pg, s32, s32); /* { dg-error {too many arguments to function 'svclz_m'} } */ ++ svclz_m (0, pg, f32); /* { dg-error {passing 'int' to argument 1 of 'svclz_m', which expects an SVE vector type} } */ ++ svclz_m (u32, u32, f32); /* { dg-error {passing 'svuint32_t' to argument 2 of 'svclz_m', which expects 'svbool_t'} } */ ++ svclz_m (u32, 0, f32); /* { dg-error {passing 'int' to argument 2 of 'svclz_m', which expects 'svbool_t'} } */ ++ svclz_m (u32, pg, s32); ++ svclz_m (u32, pg, u32); ++ svclz_m (u32, pg, f32); /* { dg-error {'svclz_m' has no form that takes 'svfloat32_t' arguments} } */ ++ svclz_m (u32, pg, pg); /* { dg-error {'svclz_m' has no form that takes 'svbool_t' arguments} } */ ++ ++ svclz_m (pg, pg, s32); /* { dg-error {passing 'svbool_t' to argument 1 of 'svclz_m', which expects a vector of unsigned integers} } */ ++ svclz_m (s32, pg, s32); /* { dg-error {passing 'svint32_t' to argument 1 of 'svclz_m', which expects a vector of unsigned integers} } */ ++ svclz_m (f32, pg, s32); /* { dg-error {passing 'svfloat32_t' to argument 1 of 'svclz_m', which expects a vector of unsigned integers} } */ ++ svclz_m (s64, pg, s32); /* { dg-error {passing 'svint64_t' to argument 1 of 'svclz_m', which expects a vector of unsigned integers} } */ ++ svclz_m (u64, pg, s32); /* { dg-error {arguments 1 and 3 of 'svclz_m' must have the same element size, but the values passed here have type 'svuint64_t' and 'svint32_t' respectively} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_to_uint_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_to_uint_2.c +new file mode 100644 +index 000000000..233e847e9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_to_uint_2.c +@@ -0,0 +1,25 @@ ++/* { dg-do compile } */ ++/* { dg-options "-flax-vector-conversions" } */ ++ ++#include ++ ++void ++f1 (svbool_t pg, svint32_t s32, svuint32_t u32, svfloat32_t f32, ++ svint64_t s64, svuint64_t u64) ++{ ++ svclz_m (u32, pg); /* { dg-error {too few arguments to function 'svclz_m'} } */ ++ svclz_m (u32, pg, s32, s32); /* { dg-error {too many arguments to function 'svclz_m'} } */ ++ svclz_m (0, pg, f32); /* { dg-error {passing 'int' to argument 1 of 'svclz_m', which expects an SVE vector type} } */ ++ svclz_m (u32, u32, f32); /* { dg-error {passing 'svuint32_t' to argument 2 of 'svclz_m', which expects 'svbool_t'} } */ ++ svclz_m (u32, 0, f32); /* { dg-error {passing 'int' to argument 2 of 'svclz_m', which expects 'svbool_t'} } */ ++ svclz_m (u32, pg, s32); ++ svclz_m (u32, pg, u32); ++ svclz_m (u32, pg, f32); /* { dg-error {'svclz_m' has no form that takes 'svfloat32_t' arguments} } */ ++ svclz_m (u32, pg, pg); /* { dg-error {'svclz_m' has no form that takes 'svbool_t' arguments} } */ ++ ++ svclz_m (pg, pg, s32); /* { dg-error {passing 'svbool_t' to argument 1 of 'svclz_m', which expects a vector of unsigned integers} } */ ++ svclz_m (s32, pg, s32); /* { dg-error {passing 'svint32_t' to argument 1 of 'svclz_m', which expects a vector of unsigned integers} } */ ++ svclz_m (f32, pg, s32); /* { dg-error {passing 'svfloat32_t' to argument 1 of 'svclz_m', which expects a vector of unsigned integers} } */ ++ svclz_m (s64, pg, s32); /* { dg-error {passing 'svint64_t' to argument 1 of 'svclz_m', which expects a vector of unsigned integers} } */ ++ svclz_m (u64, pg, s32); /* { dg-error {arguments 1 and 3 of 'svclz_m' must have the same element size, but the values passed here have type 'svuint64_t' and 'svint32_t' respectively} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_to_uint_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_to_uint_3.c +new file mode 100644 +index 000000000..da57b07ea +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_to_uint_3.c +@@ -0,0 +1,14 @@ ++/* { dg-do compile } */ ++ ++#include ++ ++void ++f1 (svbool_t pg, svuint8_t u8) ++{ ++ svcnt_x (pg); /* { dg-error {too few arguments to function 'svcnt_x'} } */ ++ svcnt_x (pg, u8, u8); /* { dg-error {too many arguments to function 'svcnt_x'} } */ ++ svcnt_x (u8, u8); /* { dg-error {passing 'svuint8_t' to argument 1 of 'svcnt_x', which expects 'svbool_t'} } */ ++ svcnt_x (pg, pg); /* { dg-error {'svcnt_x' has no form that takes 'svbool_t' arguments} } */ ++ svcnt_x (pg, 1); /* { dg-error {passing 'int' to argument 2 of 'svcnt_x', which expects an SVE vector type} } */ ++ svcnt_x (pg, u8); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_uint_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_uint_1.c +new file mode 100644 +index 000000000..9c8acdf2d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_uint_1.c +@@ -0,0 +1,19 @@ ++/* { dg-do compile } */ ++ ++#include ++ ++void ++f1 (svbool_t pg, svint8_t s8, svuint8_t u8, ++ svint16_t s16, svuint16_t u16, svfloat16_t f16) ++{ ++ svexpa (); /* { dg-error {too few arguments to function 'svexpa'} } */ ++ svexpa (u16, u16); /* { dg-error {too many arguments to function 'svexpa'} } */ ++ svexpa (1); /* { dg-error {passing 'int' to argument 1 of 'svexpa', which expects an SVE vector type} } */ ++ svexpa (pg); /* { dg-error {passing 'svbool_t' to argument 1 of 'svexpa', which expects a vector of unsigned integers} } */ ++ svexpa (s8); /* { dg-error {passing 'svint8_t' to argument 1 of 'svexpa', which expects a vector of unsigned integers} } */ ++ svexpa (s16); /* { dg-error {passing 'svint16_t' to argument 1 of 'svexpa', which expects a vector of unsigned integers} } */ ++ svexpa (f16); /* { dg-error {passing 'svfloat16_t' to argument 1 of 'svexpa', which expects a vector of unsigned integers} } */ ++ ++ svexpa (u8); /* { dg-error {'svexpa' has no form that takes 'svuint8_t' arguments} } */ ++ svexpa (u16); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_widen_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_widen_1.c +new file mode 100644 +index 000000000..95a97a72e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_widen_1.c +@@ -0,0 +1,25 @@ ++#include ++ ++void ++test (svbool_t pg, svint8_t s8, svuint8_t u8, ++ svint16_t s16, svuint16_t u16, svfloat16_t f16, ++ svint32_t s32, svuint32_t u32, svfloat32_t f32, ++ svint64_t s64, svuint64_t u64, svfloat64_t f64, float f, int i) ++{ ++ svunpklo (); /* { dg-error {too few arguments to function 'svunpklo'} } */ ++ svunpklo (pg, s8); /* { dg-error {too many arguments to function 'svunpklo'} } */ ++ svunpklo (i); /* { dg-error {passing 'int' to argument 1 of 'svunpklo', which expects an SVE vector type} } */ ++ svunpklo (f); /* { dg-error {passing 'float' to argument 1 of 'svunpklo', which expects an SVE vector type} } */ ++ svunpklo (pg); ++ svunpklo (s8); ++ svunpklo (s16); ++ svunpklo (s32); ++ svunpklo (s64); /* { dg-error {'svunpklo' has no form that takes 'svint64_t' arguments} } */ ++ svunpklo (u8); ++ svunpklo (u16); ++ svunpklo (u32); ++ svunpklo (u64); /* { dg-error {'svunpklo' has no form that takes 'svuint64_t' arguments} } */ ++ svunpklo (f16); /* { dg-error {'svunpklo' has no form that takes 'svfloat16_t' arguments} } */ ++ svunpklo (f32); /* { dg-error {'svunpklo' has no form that takes 'svfloat32_t' arguments} } */ ++ svunpklo (f64); /* { dg-error {'svunpklo' has no form that takes 'svfloat64_t' arguments} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/undeclared_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/undeclared_1.c +new file mode 100644 +index 000000000..37524c2ed +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/undeclared_1.c +@@ -0,0 +1,17 @@ ++#include ++ ++void ++f (svint8_t s8, svuint16_t u16, svfloat32_t f32, ++ svint16x2_t s16x2, svuint32x3_t u32x3, svfloat64x4_t f64x4, ++ svbool_t pg) ++{ ++ s8 = no_ret_s8 (); /* { dg-error {incompatible types when assigning to type 'svint8_t' from type 'int'} } */ ++ u16 = no_ret_u16 (); /* { dg-error {incompatible types when assigning to type 'svuint16_t' from type 'int'} } */ ++ f32 = no_ret_f32 (); /* { dg-error {incompatible types when assigning to type 'svfloat32_t' from type 'int'} } */ ++ s16x2 = no_ret_s16x2 (); /* { dg-error {incompatible types when assigning to type 'svint16x2_t' from type 'int'} } */ ++ u32x3 = no_ret_u32x3 (); /* { dg-error {incompatible types when assigning to type 'svuint32x3_t' from type 'int'} } */ ++ f64x4 = no_ret_f64x4 (); /* { dg-error {incompatible types when assigning to type 'svfloat64x4_t' from type 'int'} } */ ++ pg = no_ret_pg (); /* { dg-error {incompatible types when assigning to type 'svbool_t' from type 'int'} } */ ++ ++ no_pass_args (pg, u16, f32, s16x2, u32x3, f64x4, pg); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/undeclared_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/undeclared_2.c +new file mode 100644 +index 000000000..7e869bda8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/undeclared_2.c +@@ -0,0 +1,15 @@ ++#include ++ ++void ++f (svint8_t s8, svuint16_t u16, svfloat32_t f32, ++ svint16x2_t s16x2, svuint32x3_t u32x3, svfloat64x4_t f64x4, ++ svbool_t pg) ++{ ++ s8 = svlsr_x (pg, s8, 1); /* { dg-error {'svlsr_x' has no form that takes 'svint8_t' arguments} } */ ++ u16 = svneg_x (pg, u16); /* { dg-error {'svneg_x' has no form that takes 'svuint16_t' arguments} } */ ++ f32 = svclz_x (pg, f32); /* { dg-error {'svclz_x' has no form that takes 'svfloat32_t' arguments} } */ ++ s16x2 = svcreate2 (s8); /* { dg-error {too few arguments to function 'svcreate2'} } */ ++ u32x3 = svcreate3 (u16, u16, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svcreate3', but previous arguments had type 'svuint16_t'} } */ ++ f64x4 = svcreate4 (f32, f32, f32, f32, f32); /* { dg-error {too many arguments to function 'svcreate4'} } */ ++ pg = svadd_x (pg, pg, pg); /* { dg-error {'svadd_x' has no form that takes 'svbool_t' arguments} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/add_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/add_1.c +new file mode 100644 +index 000000000..f5c6285f8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/add_1.c +@@ -0,0 +1,13 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fdump-tree-optimized" } */ ++ ++#include ++ ++void ++foo (svint8_t *res1, svint8_t *res2, svbool_t pg, svint8_t a, svint8_t b) ++{ ++ *res1 = svadd_m (pg, a, b); ++ *res2 = svadd_m (pg, a, b); ++} ++ ++/* { dg-final { scan-tree-dump-times {svadd_s8_m|svadd_m} 1 "optimized" } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/and_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/and_1.c +new file mode 100644 +index 000000000..59348cece +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/and_1.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++#include ++ ++void ++test1 (svbool_t pg, svbool_t x, svbool_t y, int *any, svbool_t *ptr) ++{ ++ svbool_t res = svand_z (pg, x, y); ++ *any = svptest_any (pg, res); ++ *ptr = res; ++} ++ ++int ++test2 (svbool_t pg, svbool_t x, svbool_t y, int *any) ++{ ++ svbool_t res = svand_z (pg, x, y); ++ return svptest_any (pg, res); ++} ++ ++/* { dg-final { scan-assembler-times {\tands\t} 2 } } */ ++/* { dg-final { scan-assembler-not {\tand\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/bic_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/bic_1.c +new file mode 100644 +index 000000000..e1c484995 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/bic_1.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++#include ++ ++void ++test1 (svbool_t pg, svbool_t x, svbool_t y, int *any, svbool_t *ptr) ++{ ++ svbool_t res = svbic_z (pg, x, y); ++ *any = svptest_any (pg, res); ++ *ptr = res; ++} ++ ++int ++test2 (svbool_t pg, svbool_t x, svbool_t y, int *any) ++{ ++ svbool_t res = svbic_z (pg, x, y); ++ return svptest_any (pg, res); ++} ++ ++/* { dg-final { scan-assembler-times {\tbics\t} 2 } } */ ++/* { dg-final { scan-assembler-not {\tbic\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/brka_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/brka_1.c +new file mode 100644 +index 000000000..24aa8f317 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/brka_1.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++#include ++ ++void ++test1 (svbool_t pg, svbool_t x, svbool_t y, int *any, svbool_t *ptr) ++{ ++ svbool_t res = svbrka_m (x, pg, y); ++ *any = svptest_any (pg, res); ++ *ptr = res; ++} ++ ++int ++test2 (svbool_t pg, svbool_t x, svbool_t y, int *any) ++{ ++ svbool_t res = svbrka_m (x, pg, y); ++ return svptest_any (pg, res); ++} ++ ++/* { dg-final { scan-assembler-times {\tbrkas\tp[0-9]+\.b, p[0-9]+/m,} 2 } } */ ++/* { dg-final { scan-assembler-not {\tbrka\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/brka_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/brka_2.c +new file mode 100644 +index 000000000..8aa338867 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/brka_2.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++#include ++ ++void ++test1 (svbool_t pg, svbool_t x, int *any, svbool_t *ptr) ++{ ++ svbool_t res = svbrka_z (pg, x); ++ *any = svptest_any (pg, res); ++ *ptr = res; ++} ++ ++int ++test2 (svbool_t pg, svbool_t x, int *any) ++{ ++ svbool_t res = svbrka_z (pg, x); ++ return svptest_any (pg, res); ++} ++ ++/* { dg-final { scan-assembler-times {\tbrkas\tp[0-9]+\.b, p[0-9]+/z,} 2 } } */ ++/* { dg-final { scan-assembler-not {\tbrka\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/brkb_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/brkb_1.c +new file mode 100644 +index 000000000..07e3622ed +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/brkb_1.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++#include ++ ++void ++test1 (svbool_t pg, svbool_t x, svbool_t y, int *any, svbool_t *ptr) ++{ ++ svbool_t res = svbrkb_m (x, pg, y); ++ *any = svptest_any (pg, res); ++ *ptr = res; ++} ++ ++int ++test2 (svbool_t pg, svbool_t x, svbool_t y, int *any) ++{ ++ svbool_t res = svbrkb_m (x, pg, y); ++ return svptest_any (pg, res); ++} ++ ++/* { dg-final { scan-assembler-times {\tbrkbs\tp[0-9]+\.b, p[0-9]+/m,} 2 } } */ ++/* { dg-final { scan-assembler-not {\tbrkb\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/brkb_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/brkb_2.c +new file mode 100644 +index 000000000..ee677cedd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/brkb_2.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++#include ++ ++void ++test1 (svbool_t pg, svbool_t x, int *any, svbool_t *ptr) ++{ ++ svbool_t res = svbrkb_z (pg, x); ++ *any = svptest_any (pg, res); ++ *ptr = res; ++} ++ ++int ++test2 (svbool_t pg, svbool_t x, int *any) ++{ ++ svbool_t res = svbrkb_z (pg, x); ++ return svptest_any (pg, res); ++} ++ ++/* { dg-final { scan-assembler-times {\tbrkbs\tp[0-9]+\.b, p[0-9]+/z,} 2 } } */ ++/* { dg-final { scan-assembler-not {\tbrkb\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/brkn_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/brkn_1.c +new file mode 100644 +index 000000000..7fd9318c1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/brkn_1.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++#include ++ ++void ++test1 (svbool_t pg, svbool_t x, svbool_t y, int *any, svbool_t *ptr) ++{ ++ svbool_t res = svbrkn_z (pg, x, y); ++ *any = svptest_any (pg, res); ++ *ptr = res; ++} ++ ++int ++test2 (svbool_t pg, svbool_t x, svbool_t y, int *any) ++{ ++ svbool_t res = svbrkn_z (pg, x, y); ++ return svptest_any (pg, res); ++} ++ ++/* { dg-final { scan-assembler-times {\tbrkns\t} 2 } } */ ++/* { dg-final { scan-assembler-not {\tbrkn\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/brkpa_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/brkpa_1.c +new file mode 100644 +index 000000000..18cca370c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/brkpa_1.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++#include ++ ++void ++test1 (svbool_t pg, svbool_t x, svbool_t y, int *any, svbool_t *ptr) ++{ ++ svbool_t res = svbrkpa_z (pg, x, y); ++ *any = svptest_any (pg, res); ++ *ptr = res; ++} ++ ++int ++test2 (svbool_t pg, svbool_t x, svbool_t y, int *any) ++{ ++ svbool_t res = svbrkpa_z (pg, x, y); ++ return svptest_any (pg, res); ++} ++ ++/* { dg-final { scan-assembler-times {\tbrkpas\t} 2 } } */ ++/* { dg-final { scan-assembler-not {\tbrkpa\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/brkpb_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/brkpb_1.c +new file mode 100644 +index 000000000..73eb7094d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/brkpb_1.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++#include ++ ++void ++test1 (svbool_t pg, svbool_t x, svbool_t y, int *any, svbool_t *ptr) ++{ ++ svbool_t res = svbrkpb_z (pg, x, y); ++ *any = svptest_any (pg, res); ++ *ptr = res; ++} ++ ++int ++test2 (svbool_t pg, svbool_t x, svbool_t y, int *any) ++{ ++ svbool_t res = svbrkpb_z (pg, x, y); ++ return svptest_any (pg, res); ++} ++ ++/* { dg-final { scan-assembler-times {\tbrkpbs\t} 2 } } */ ++/* { dg-final { scan-assembler-not {\tbrkpb\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cmpeq_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cmpeq_1.c +new file mode 100644 +index 000000000..dd8f6c494 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cmpeq_1.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++#include ++ ++void ++test1 (svbool_t pg, svint8_t x, svint64_t y, int *any, svbool_t *ptr) ++{ ++ svbool_t res = svcmpeq_wide (pg, x, y); ++ *any = svptest_any (pg, res); ++ *ptr = res; ++} ++ ++int ++test2 (svbool_t pg, svint8_t x, svint64_t y, int *any) ++{ ++ svbool_t res = svcmpeq_wide (pg, x, y); ++ return svptest_any (pg, res); ++} ++ ++/* { dg-final { scan-assembler-times {\tcmpeq\t} 2 } } */ ++/* { dg-final { scan-assembler-not {\tptest\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cmpeq_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cmpeq_2.c +new file mode 100644 +index 000000000..028d37516 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cmpeq_2.c +@@ -0,0 +1,38 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++#include ++ ++void ++test1 (svbool_t pg, svint8_t x, svint8_t y, int *any, svbool_t *ptr) ++{ ++ svbool_t res = svcmpeq (pg, x, y); ++ *any = svptest_any (pg, res); ++ *ptr = res; ++} ++ ++int ++test2 (svbool_t pg, svint8_t x, svint8_t y, int *any) ++{ ++ svbool_t res = svcmpeq (pg, x, y); ++ return svptest_any (pg, res); ++} ++ ++void ++test3 (svbool_t pg, svint8_t x, int *any, svbool_t *ptr) ++{ ++ svbool_t res = svcmpeq (pg, x, 10); ++ *any = svptest_any (pg, res); ++ *ptr = res; ++} ++ ++int ++test4 (svbool_t pg, svint8_t x, int *any) ++{ ++ svbool_t res = svcmpeq (pg, x, 10); ++ return svptest_any (pg, res); ++} ++ ++/* { dg-final { scan-assembler-times {\tcmpeq\t} 4 } } */ ++/* { dg-final { scan-assembler-times {\tcmpeq\t[^\n]*, #10} 2 } } */ ++/* { dg-final { scan-assembler-not {\tptest\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cmpeq_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cmpeq_3.c +new file mode 100644 +index 000000000..115b26c8e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cmpeq_3.c +@@ -0,0 +1,38 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++#include ++ ++void ++test1 (svbool_t pg, svfloat32_t x, svfloat32_t y, int *any, svbool_t *ptr) ++{ ++ svbool_t res = svcmpeq (pg, x, y); ++ *any = svptest_any (pg, res); ++ *ptr = res; ++} ++ ++int ++test2 (svbool_t pg, svfloat32_t x, svfloat32_t y, int *any) ++{ ++ svbool_t res = svcmpeq (pg, x, y); ++ return svptest_any (pg, res); ++} ++ ++void ++test3 (svbool_t pg, svfloat32_t x, int *any, svbool_t *ptr) ++{ ++ svbool_t res = svcmpeq (pg, x, 0.0); ++ *any = svptest_any (pg, res); ++ *ptr = res; ++} ++ ++int ++test4 (svbool_t pg, svfloat32_t x, int *any) ++{ ++ svbool_t res = svcmpeq (pg, x, 0.0); ++ return svptest_any (pg, res); ++} ++ ++/* { dg-final { scan-assembler-times {\tfcmeq\t} 4 } } */ ++/* { dg-final { scan-assembler-times {\tfcmeq\t[^\n]*, #0\.0} 2 } } */ ++/* { dg-final { scan-assembler-times {\tptest\t} 4 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cntb_pat_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cntb_pat_1.c +new file mode 100644 +index 000000000..d57a75c20 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cntb_pat_1.c +@@ -0,0 +1,132 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-O -msve-vector-bits=256" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#include ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++/* ++** cntb_pow2: ++** mov x0, #?32 ++** ret ++*/ ++uint64_t cntb_pow2 () { return svcntb_pat (SV_POW2); } ++ ++/* ++** cntb_vl1: ++** mov x0, #?1 ++** ret ++*/ ++uint64_t cntb_vl1 () { return svcntb_pat (SV_VL1); } ++ ++/* ++** cntb_vl2: ++** mov x0, #?2 ++** ret ++*/ ++uint64_t cntb_vl2 () { return svcntb_pat (SV_VL2); } ++ ++/* ++** cntb_vl3: ++** mov x0, #?3 ++** ret ++*/ ++uint64_t cntb_vl3 () { return svcntb_pat (SV_VL3); } ++ ++/* ++** cntb_vl4: ++** mov x0, #?4 ++** ret ++*/ ++uint64_t cntb_vl4 () { return svcntb_pat (SV_VL4); } ++ ++/* ++** cntb_vl5: ++** mov x0, #?5 ++** ret ++*/ ++uint64_t cntb_vl5 () { return svcntb_pat (SV_VL5); } ++ ++/* ++** cntb_vl6: ++** mov x0, #?6 ++** ret ++*/ ++uint64_t cntb_vl6 () { return svcntb_pat (SV_VL6); } ++ ++/* ++** cntb_vl7: ++** mov x0, #?7 ++** ret ++*/ ++uint64_t cntb_vl7 () { return svcntb_pat (SV_VL7); } ++ ++/* ++** cntb_vl8: ++** mov x0, #?8 ++** ret ++*/ ++uint64_t cntb_vl8 () { return svcntb_pat (SV_VL8); } ++ ++/* ++** cntb_vl16: ++** mov x0, #?16 ++** ret ++*/ ++uint64_t cntb_vl16 () { return svcntb_pat (SV_VL16); } ++ ++/* ++** cntb_vl32: ++** mov x0, #?32 ++** ret ++*/ ++uint64_t cntb_vl32 () { return svcntb_pat (SV_VL32); } ++ ++/* ++** cntb_vl64: ++** mov x0, #?0 ++** ret ++*/ ++uint64_t cntb_vl64 () { return svcntb_pat (SV_VL64); } ++ ++/* ++** cntb_vl128: ++** mov x0, #?0 ++** ret ++*/ ++uint64_t cntb_vl128 () { return svcntb_pat (SV_VL128); } ++ ++/* ++** cntb_vl256: ++** mov x0, #?0 ++** ret ++*/ ++uint64_t cntb_vl256 () { return svcntb_pat (SV_VL256); } ++ ++/* ++** cntb_mul3: ++** mov x0, #?30 ++** ret ++*/ ++uint64_t cntb_mul3 () { return svcntb_pat (SV_MUL3); } ++ ++/* ++** cntb_mul4: ++** mov x0, #?32 ++** ret ++*/ ++uint64_t cntb_mul4 () { return svcntb_pat (SV_MUL4); } ++ ++/* ++** cntb_all: ++** mov x0, #?32 ++** ret ++*/ ++uint64_t cntb_all () { return svcntb_pat (SV_ALL); } ++ ++#ifdef __cplusplus ++} ++#endif +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cntd_pat_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cntd_pat_1.c +new file mode 100644 +index 000000000..d93a32054 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cntd_pat_1.c +@@ -0,0 +1,132 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-O -msve-vector-bits=256" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#include ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++/* ++** cntd_pow2: ++** mov x0, #?4 ++** ret ++*/ ++uint64_t cntd_pow2 () { return svcntd_pat (SV_POW2); } ++ ++/* ++** cntd_vl1: ++** mov x0, #?1 ++** ret ++*/ ++uint64_t cntd_vl1 () { return svcntd_pat (SV_VL1); } ++ ++/* ++** cntd_vl2: ++** mov x0, #?2 ++** ret ++*/ ++uint64_t cntd_vl2 () { return svcntd_pat (SV_VL2); } ++ ++/* ++** cntd_vl3: ++** mov x0, #?3 ++** ret ++*/ ++uint64_t cntd_vl3 () { return svcntd_pat (SV_VL3); } ++ ++/* ++** cntd_vl4: ++** mov x0, #?4 ++** ret ++*/ ++uint64_t cntd_vl4 () { return svcntd_pat (SV_VL4); } ++ ++/* ++** cntd_vl5: ++** mov x0, #?0 ++** ret ++*/ ++uint64_t cntd_vl5 () { return svcntd_pat (SV_VL5); } ++ ++/* ++** cntd_vl6: ++** mov x0, #?0 ++** ret ++*/ ++uint64_t cntd_vl6 () { return svcntd_pat (SV_VL6); } ++ ++/* ++** cntd_vl7: ++** mov x0, #?0 ++** ret ++*/ ++uint64_t cntd_vl7 () { return svcntd_pat (SV_VL7); } ++ ++/* ++** cntd_vl8: ++** mov x0, #?0 ++** ret ++*/ ++uint64_t cntd_vl8 () { return svcntd_pat (SV_VL8); } ++ ++/* ++** cntd_vl16: ++** mov x0, #?0 ++** ret ++*/ ++uint64_t cntd_vl16 () { return svcntd_pat (SV_VL16); } ++ ++/* ++** cntd_vl32: ++** mov x0, #?0 ++** ret ++*/ ++uint64_t cntd_vl32 () { return svcntd_pat (SV_VL32); } ++ ++/* ++** cntd_vl64: ++** mov x0, #?0 ++** ret ++*/ ++uint64_t cntd_vl64 () { return svcntd_pat (SV_VL64); } ++ ++/* ++** cntd_vl128: ++** mov x0, #?0 ++** ret ++*/ ++uint64_t cntd_vl128 () { return svcntd_pat (SV_VL128); } ++ ++/* ++** cntd_vl256: ++** mov x0, #?0 ++** ret ++*/ ++uint64_t cntd_vl256 () { return svcntd_pat (SV_VL256); } ++ ++/* ++** cntd_mul3: ++** mov x0, #?3 ++** ret ++*/ ++uint64_t cntd_mul3 () { return svcntd_pat (SV_MUL3); } ++ ++/* ++** cntd_mul4: ++** mov x0, #?4 ++** ret ++*/ ++uint64_t cntd_mul4 () { return svcntd_pat (SV_MUL4); } ++ ++/* ++** cntd_all: ++** mov x0, #?4 ++** ret ++*/ ++uint64_t cntd_all () { return svcntd_pat (SV_ALL); } ++ ++#ifdef __cplusplus ++} ++#endif +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cnth_pat_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cnth_pat_1.c +new file mode 100644 +index 000000000..bd988f53d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cnth_pat_1.c +@@ -0,0 +1,132 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-O -msve-vector-bits=256" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#include ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++/* ++** cnth_pow2: ++** mov x0, #?16 ++** ret ++*/ ++uint64_t cnth_pow2 () { return svcnth_pat (SV_POW2); } ++ ++/* ++** cnth_vl1: ++** mov x0, #?1 ++** ret ++*/ ++uint64_t cnth_vl1 () { return svcnth_pat (SV_VL1); } ++ ++/* ++** cnth_vl2: ++** mov x0, #?2 ++** ret ++*/ ++uint64_t cnth_vl2 () { return svcnth_pat (SV_VL2); } ++ ++/* ++** cnth_vl3: ++** mov x0, #?3 ++** ret ++*/ ++uint64_t cnth_vl3 () { return svcnth_pat (SV_VL3); } ++ ++/* ++** cnth_vl4: ++** mov x0, #?4 ++** ret ++*/ ++uint64_t cnth_vl4 () { return svcnth_pat (SV_VL4); } ++ ++/* ++** cnth_vl5: ++** mov x0, #?5 ++** ret ++*/ ++uint64_t cnth_vl5 () { return svcnth_pat (SV_VL5); } ++ ++/* ++** cnth_vl6: ++** mov x0, #?6 ++** ret ++*/ ++uint64_t cnth_vl6 () { return svcnth_pat (SV_VL6); } ++ ++/* ++** cnth_vl7: ++** mov x0, #?7 ++** ret ++*/ ++uint64_t cnth_vl7 () { return svcnth_pat (SV_VL7); } ++ ++/* ++** cnth_vl8: ++** mov x0, #?8 ++** ret ++*/ ++uint64_t cnth_vl8 () { return svcnth_pat (SV_VL8); } ++ ++/* ++** cnth_vl16: ++** mov x0, #?16 ++** ret ++*/ ++uint64_t cnth_vl16 () { return svcnth_pat (SV_VL16); } ++ ++/* ++** cnth_vl32: ++** mov x0, #?0 ++** ret ++*/ ++uint64_t cnth_vl32 () { return svcnth_pat (SV_VL32); } ++ ++/* ++** cnth_vl64: ++** mov x0, #?0 ++** ret ++*/ ++uint64_t cnth_vl64 () { return svcnth_pat (SV_VL64); } ++ ++/* ++** cnth_vl128: ++** mov x0, #?0 ++** ret ++*/ ++uint64_t cnth_vl128 () { return svcnth_pat (SV_VL128); } ++ ++/* ++** cnth_vl256: ++** mov x0, #?0 ++** ret ++*/ ++uint64_t cnth_vl256 () { return svcnth_pat (SV_VL256); } ++ ++/* ++** cnth_mul3: ++** mov x0, #?15 ++** ret ++*/ ++uint64_t cnth_mul3 () { return svcnth_pat (SV_MUL3); } ++ ++/* ++** cnth_mul4: ++** mov x0, #?16 ++** ret ++*/ ++uint64_t cnth_mul4 () { return svcnth_pat (SV_MUL4); } ++ ++/* ++** cnth_all: ++** mov x0, #?16 ++** ret ++*/ ++uint64_t cnth_all () { return svcnth_pat (SV_ALL); } ++ ++#ifdef __cplusplus ++} ++#endif +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cntw_pat_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cntw_pat_1.c +new file mode 100644 +index 000000000..53c8435b1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cntw_pat_1.c +@@ -0,0 +1,132 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-O -msve-vector-bits=256" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#include ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++/* ++** cntw_pow2: ++** mov x0, #?8 ++** ret ++*/ ++uint64_t cntw_pow2 () { return svcntw_pat (SV_POW2); } ++ ++/* ++** cntw_vl1: ++** mov x0, #?1 ++** ret ++*/ ++uint64_t cntw_vl1 () { return svcntw_pat (SV_VL1); } ++ ++/* ++** cntw_vl2: ++** mov x0, #?2 ++** ret ++*/ ++uint64_t cntw_vl2 () { return svcntw_pat (SV_VL2); } ++ ++/* ++** cntw_vl3: ++** mov x0, #?3 ++** ret ++*/ ++uint64_t cntw_vl3 () { return svcntw_pat (SV_VL3); } ++ ++/* ++** cntw_vl4: ++** mov x0, #?4 ++** ret ++*/ ++uint64_t cntw_vl4 () { return svcntw_pat (SV_VL4); } ++ ++/* ++** cntw_vl5: ++** mov x0, #?5 ++** ret ++*/ ++uint64_t cntw_vl5 () { return svcntw_pat (SV_VL5); } ++ ++/* ++** cntw_vl6: ++** mov x0, #?6 ++** ret ++*/ ++uint64_t cntw_vl6 () { return svcntw_pat (SV_VL6); } ++ ++/* ++** cntw_vl7: ++** mov x0, #?7 ++** ret ++*/ ++uint64_t cntw_vl7 () { return svcntw_pat (SV_VL7); } ++ ++/* ++** cntw_vl8: ++** mov x0, #?8 ++** ret ++*/ ++uint64_t cntw_vl8 () { return svcntw_pat (SV_VL8); } ++ ++/* ++** cntw_vl16: ++** mov x0, #?0 ++** ret ++*/ ++uint64_t cntw_vl16 () { return svcntw_pat (SV_VL16); } ++ ++/* ++** cntw_vl32: ++** mov x0, #?0 ++** ret ++*/ ++uint64_t cntw_vl32 () { return svcntw_pat (SV_VL32); } ++ ++/* ++** cntw_vl64: ++** mov x0, #?0 ++** ret ++*/ ++uint64_t cntw_vl64 () { return svcntw_pat (SV_VL64); } ++ ++/* ++** cntw_vl128: ++** mov x0, #?0 ++** ret ++*/ ++uint64_t cntw_vl128 () { return svcntw_pat (SV_VL128); } ++ ++/* ++** cntw_vl256: ++** mov x0, #?0 ++** ret ++*/ ++uint64_t cntw_vl256 () { return svcntw_pat (SV_VL256); } ++ ++/* ++** cntw_mul3: ++** mov x0, #?6 ++** ret ++*/ ++uint64_t cntw_mul3 () { return svcntw_pat (SV_MUL3); } ++ ++/* ++** cntw_mul4: ++** mov x0, #?8 ++** ret ++*/ ++uint64_t cntw_mul4 () { return svcntw_pat (SV_MUL4); } ++ ++/* ++** cntw_all: ++** mov x0, #?8 ++** ret ++*/ ++uint64_t cntw_all () { return svcntw_pat (SV_ALL); } ++ ++#ifdef __cplusplus ++} ++#endif +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/debug_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/debug_1.c +new file mode 100644 +index 000000000..0442efef3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/debug_1.c +@@ -0,0 +1,16 @@ ++/* { dg-options "-g" } */ ++ ++#include ++ ++svbool_t f_b (svbool_t x) { return x; } ++svint8_t f_s8 (svint8_t x) { return x; } ++svuint8_t f_u8 (svuint8_t x) { return x; } ++svint16_t f_s16 (svint16_t x) { return x; } ++svuint16_t f_u16 (svuint16_t x) { return x; } ++svfloat16_t f_f16 (svfloat16_t x) { return x; } ++svint32_t f_s32 (svint32_t x) { return x; } ++svuint32_t f_u32 (svuint32_t x) { return x; } ++svfloat32_t f_f32 (svfloat32_t x) { return x; } ++svint64_t f_s64 (svint64_t x) { return x; } ++svuint64_t f_u64 (svuint64_t x) { return x; } ++svfloat64_t f_f64 (svfloat64_t x) { return x; } +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/debug_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/debug_2.c +new file mode 100644 +index 000000000..63a26d2e9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/debug_2.c +@@ -0,0 +1,16 @@ ++/* { dg-options "-g" } */ ++ ++#include ++ ++svbool_t f_b (svbool_t x) { return svptrue_b32 (); } ++svint8_t f_s8 (svint8_t x) { return svdup_s8 (0); } ++svuint8_t f_u8 (svuint8_t x) { return svdup_u8 (1); } ++svint16_t f_s16 (svint16_t x) { return svdup_s16 (2); } ++svuint16_t f_u16 (svuint16_t x) { return svdup_u16 (3); } ++svfloat16_t f_f16 (svfloat16_t x) { return svdup_f16 (4); } ++svint32_t f_s32 (svint32_t x) { return svdup_s32 (5); } ++svuint32_t f_u32 (svuint32_t x) { return svdup_u32 (6); } ++svfloat32_t f_f32 (svfloat32_t x) { return svdup_f32 (7); } ++svint64_t f_s64 (svint64_t x) { return svdup_s64 (8); } ++svuint64_t f_u64 (svuint64_t x) { return svdup_u64 (9); } ++svfloat64_t f_f64 (svfloat64_t x) { return svdup_f64 (10); } +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/debug_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/debug_3.c +new file mode 100644 +index 000000000..ac151e465 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/debug_3.c +@@ -0,0 +1,39 @@ ++/* { dg-options "-g" } */ ++ ++#include ++ ++svint8x2_t f2_s8 (svint8x2_t x) { return x; } ++svuint8x2_t f2_u8 (svuint8x2_t x) { return x; } ++svint16x2_t f2_s16 (svint16x2_t x) { return x; } ++svuint16x2_t f2_u16 (svuint16x2_t x) { return x; } ++svfloat16x2_t f2_f16 (svfloat16x2_t x) { return x; } ++svint32x2_t f2_s32 (svint32x2_t x) { return x; } ++svuint32x2_t f2_u32 (svuint32x2_t x) { return x; } ++svfloat32x2_t f2_f32 (svfloat32x2_t x) { return x; } ++svint64x2_t f2_s64 (svint64x2_t x) { return x; } ++svuint64x2_t f2_u64 (svuint64x2_t x) { return x; } ++svfloat64x2_t f2_f64 (svfloat64x2_t x) { return x; } ++ ++svint8x3_t f3_s8 (svint8x3_t x) { return x; } ++svuint8x3_t f3_u8 (svuint8x3_t x) { return x; } ++svint16x3_t f3_s16 (svint16x3_t x) { return x; } ++svuint16x3_t f3_u16 (svuint16x3_t x) { return x; } ++svfloat16x3_t f3_f16 (svfloat16x3_t x) { return x; } ++svint32x3_t f3_s32 (svint32x3_t x) { return x; } ++svuint32x3_t f3_u32 (svuint32x3_t x) { return x; } ++svfloat32x3_t f3_f32 (svfloat32x3_t x) { return x; } ++svint64x3_t f3_s64 (svint64x3_t x) { return x; } ++svuint64x3_t f3_u64 (svuint64x3_t x) { return x; } ++svfloat64x3_t f3_f64 (svfloat64x3_t x) { return x; } ++ ++svint8x4_t f4_s8 (svint8x4_t x) { return x; } ++svuint8x4_t f4_u8 (svuint8x4_t x) { return x; } ++svint16x4_t f4_s16 (svint16x4_t x) { return x; } ++svuint16x4_t f4_u16 (svuint16x4_t x) { return x; } ++svfloat16x4_t f4_f16 (svfloat16x4_t x) { return x; } ++svint32x4_t f4_s32 (svint32x4_t x) { return x; } ++svuint32x4_t f4_u32 (svuint32x4_t x) { return x; } ++svfloat32x4_t f4_f42 (svfloat32x4_t x) { return x; } ++svint64x4_t f4_s64 (svint64x4_t x) { return x; } ++svuint64x4_t f4_u64 (svuint64x4_t x) { return x; } ++svfloat64x4_t f4_f64 (svfloat64x4_t x) { return x; } +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/double_pragma_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/double_pragma_1.c +new file mode 100644 +index 000000000..9b3c3697c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/double_pragma_1.c +@@ -0,0 +1,7 @@ ++/* { dg-do compile } */ ++/* { dg-options "" } */ ++ ++/* It doesn't really matter if this produces errors about redefinitions, ++ but it mustn't trigger an ICE. */ ++#pragma GCC aarch64 "arm_sve.h" ++#pragma GCC aarch64 "arm_sve.h" /* { dg-error "duplicate definition of 'arm_sve.h'" } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_1.c +new file mode 100644 +index 000000000..d71507baa +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_1.c +@@ -0,0 +1,15 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlittle-endian" } */ ++ ++#include ++ ++svint32_t ++dupq (int x) ++{ ++ return svdupq_s32 (x, 1, 2, 3); ++} ++ ++/* { dg-final { scan-assembler {\tldr\tq[0-9]+,} } } */ ++/* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[0\], w0\n} } } */ ++/* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */ ++/* { dg-final { scan-assembler {\t\.word\t1\n\t\.word\t2\n\t\.word\t3\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_10.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_10.c +new file mode 100644 +index 000000000..f8f797c97 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_10.c +@@ -0,0 +1,66 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2" } */ ++ ++#include ++ ++svbool_t __attribute__ ((noipa)) ++make_b8 (int8_t x0, int8_t x1, int8_t x2, int8_t x3, ++ int8_t x4, int8_t x5, int8_t x6, int8_t x7, ++ int8_t x8, int8_t x9, int8_t xa, int8_t xb, ++ int8_t xc, int8_t xd, int8_t xe, int8_t xf) ++{ ++ return svdupq_b8 (x0, x1, x2, x3, x4, x5, x6, x7, ++ x8, x9, xa, xb, xc, xd, xe, xf); ++} ++ ++svbool_t __attribute__ ((noipa)) ++make_b16 (int16_t x0, int16_t x1, int16_t x2, int16_t x3, ++ int16_t x4, int16_t x5, int16_t x6, int16_t x7) ++{ ++ return svdupq_b16 (x0, x1, x2, x3, x4, x5, x6, x7); ++} ++ ++svbool_t __attribute__ ((noipa)) ++make_b32 (int32_t x0, int32_t x1, int32_t x2, int32_t x3) ++{ ++ return svdupq_b32 (x0, x1, x2, x3); ++} ++ ++svbool_t __attribute__ ((noipa)) ++make_b64 (int64_t x0, int64_t x1) ++{ ++ return svdupq_b64 (x0, x1); ++} ++ ++int8_t a[16] = { 1, 0, 0, -3, 0, 9, 11, 0, 0, 1, 0, -4, 9, 9, 0, 0 }; ++ ++int ++main () ++{ ++ svbool_t pg = svptrue_pat_b8 (SV_VL16); ++ svbool_t b8 = make_b8 (a[0], a[1], a[2], a[3], ++ a[4], a[5], a[6], a[7], ++ a[8], a[9], a[10], a[11], ++ a[12], a[13], a[14], a[15]); ++ if (svptest_any (svptrue_b8 (), ++ sveor_z (pg, b8, svcmpne (pg, svld1 (pg, a), 0)))) ++ __builtin_abort (); ++ ++ svbool_t b16 = make_b16 (a[0], a[1], a[2], a[3], ++ a[4], a[5], a[6], a[7]); ++ if (svptest_any (svptrue_b16 (), ++ sveor_z (pg, b16, svcmpne (pg, svld1sb_u16 (pg, a), 0)))) ++ __builtin_abort (); ++ ++ svbool_t b32 = make_b32 (a[0], a[1], a[2], a[3]); ++ if (svptest_any (svptrue_b32 (), ++ sveor_z (pg, b32, svcmpne (pg, svld1sb_u32 (pg, a), 0)))) ++ __builtin_abort (); ++ ++ svbool_t b64 = make_b64 (a[0], a[1]); ++ if (svptest_any (svptrue_b64 (), ++ sveor_z (pg, b64, svcmpne (pg, svld1sb_u64 (pg, a), 0)))) ++ __builtin_abort (); ++ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_2.c +new file mode 100644 +index 000000000..d494943a2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_2.c +@@ -0,0 +1,16 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mbig-endian" } */ ++ ++/* To avoid needing big-endian header files. */ ++#pragma GCC aarch64 "arm_sve.h" ++ ++svint32_t ++dupq (int x) ++{ ++ return svdupq_s32 (x, 1, 2, 3); ++} ++ ++/* { dg-final { scan-assembler {\tldr\tq[0-9]+,} } } */ ++/* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[0\], w0\n} } } */ ++/* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */ ++/* { dg-final { scan-assembler {\t\.word\t3\n\t\.word\t2\n\t\.word\t1\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_3.c +new file mode 100644 +index 000000000..4bc8259df +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_3.c +@@ -0,0 +1,16 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlittle-endian" } */ ++ ++/* To avoid needing big-endian header files. */ ++#pragma GCC aarch64 "arm_sve.h" ++ ++svint32_t ++dupq (int x) ++{ ++ return svdupq_s32 (0, 1, x, 3); ++} ++ ++/* { dg-final { scan-assembler {\tldr\tq[0-9]+,} } } */ ++/* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[2\], w0\n} } } */ ++/* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */ ++/* { dg-final { scan-assembler {\t\.word\t0\n\t\.word\t1\n\t\.word\t[^\n]*\n\t\.word\t3\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_4.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_4.c +new file mode 100644 +index 000000000..6f9f9f2f2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_4.c +@@ -0,0 +1,16 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mbig-endian" } */ ++ ++/* To avoid needing big-endian header files. */ ++#pragma GCC aarch64 "arm_sve.h" ++ ++svint32_t ++dupq (int x) ++{ ++ return svdupq_s32 (0, 1, x, 3); ++} ++ ++/* { dg-final { scan-assembler {\tldr\tq[0-9]+,} } } */ ++/* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[2\], w0\n} } } */ ++/* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */ ++/* { dg-final { scan-assembler {\t\.word\t3\n\t\.word\t[^\n]*\n\t\.word\t1\n\t\.word\t0\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_5.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_5.c +new file mode 100644 +index 000000000..53426c9af +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_5.c +@@ -0,0 +1,17 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mlittle-endian" } */ ++ ++#include ++ ++svint32_t ++dupq (int x1, int x2, int x3, int x4) ++{ ++ return svdupq_s32 (x1, x2, x3, x4); ++} ++ ++/* { dg-final { scan-assembler-not {\tldr\t} } } */ ++/* { dg-final { scan-assembler {, [wx]0\n} } } */ ++/* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[1\], w1\n} } } */ ++/* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[2\], w2\n} } } */ ++/* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[3\], w3\n} } } */ ++/* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_6.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_6.c +new file mode 100644 +index 000000000..dfce5e7a1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_6.c +@@ -0,0 +1,18 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mbig-endian" } */ ++ ++/* To avoid needing big-endian header files. */ ++#pragma GCC aarch64 "arm_sve.h" ++ ++svint32_t ++dupq (int x1, int x2, int x3, int x4) ++{ ++ return svdupq_s32 (x1, x2, x3, x4); ++} ++ ++/* { dg-final { scan-assembler-not {\tldr\t} } } */ ++/* { dg-final { scan-assembler {, [wx]0\n} } } */ ++/* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[1\], w1\n} } } */ ++/* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[2\], w2\n} } } */ ++/* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[3\], w3\n} } } */ ++/* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_7.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_7.c +new file mode 100644 +index 000000000..08decb5f9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_7.c +@@ -0,0 +1,66 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2" } */ ++ ++#include ++ ++svint8_t __attribute__ ((noipa)) ++make_s8 (int8_t x0, int8_t x1, int8_t x2, int8_t x3, ++ int8_t x4, int8_t x5, int8_t x6, int8_t x7, ++ int8_t x8, int8_t x9, int8_t xa, int8_t xb, ++ int8_t xc, int8_t xd, int8_t xe, int8_t xf) ++{ ++ return svdupq_s8 (x0, x1, x2, x3, x4, x5, x6, x7, ++ x8, x9, xa, xb, xc, xd, xe, xf); ++} ++ ++svint16_t __attribute__ ((noipa)) ++make_s16 (int16_t x0, int16_t x1, int16_t x2, int16_t x3, ++ int16_t x4, int16_t x5, int16_t x6, int16_t x7) ++{ ++ return svdupq_s16 (x0, x1, x2, x3, x4, x5, x6, x7); ++} ++ ++svint32_t __attribute__ ((noipa)) ++make_s32 (int32_t x0, int32_t x1, int32_t x2, int32_t x3) ++{ ++ return svdupq_s32 (x0, x1, x2, x3); ++} ++ ++svint64_t __attribute__ ((noipa)) ++make_s64 (int64_t x0, int64_t x1) ++{ ++ return svdupq_s64 (x0, x1); ++} ++ ++int8_t a[16] = { 1, -44, 91, -24, 101, -55, 77, 83, ++ -30, 69, 121, -128, -1, 13, 127, 26 }; ++int16_t b[8] = { -716, -10288, 30604, -19258, -9418, -10435, -16001, 7300 }; ++int32_t c[4] = { 1268374995, -1023602831, -891830021, -1793452959 }; ++int64_t d[2] = { 0x123456789abcdefLL, -0x123456789abcdefLL }; ++ ++int ++main () ++{ ++ svbool_t pg = svptrue_pat_b8 (SV_VL16); ++ svint8_t s8 = make_s8 (a[0], a[1], a[2], a[3], ++ a[4], a[5], a[6], a[7], ++ a[8], a[9], a[10], a[11], ++ a[12], a[13], a[14], a[15]); ++ if (svptest_any (svptrue_b8 (), svcmpne (pg, s8, svld1 (pg, a)))) ++ __builtin_abort (); ++ ++ svint16_t s16 = make_s16 (b[0], b[1], b[2], b[3], ++ b[4], b[5], b[6], b[7]); ++ if (svptest_any (svptrue_b8 (), svcmpne (pg, s16, svld1 (pg, b)))) ++ __builtin_abort (); ++ ++ svint32_t s32 = make_s32 (c[0], c[1], c[2], c[3]); ++ if (svptest_any (svptrue_b8 (), svcmpne (pg, s32, svld1 (pg, c)))) ++ __builtin_abort (); ++ ++ svint64_t s64 = make_s64 (d[0], d[1]); ++ if (svptest_any (svptrue_b8 (), svcmpne (pg, s64, svld1 (pg, d)))) ++ __builtin_abort (); ++ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_8.c +new file mode 100644 +index 000000000..c20fb7324 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_8.c +@@ -0,0 +1,66 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2" } */ ++ ++#include ++ ++svuint8_t __attribute__ ((noipa)) ++make_u8 (uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, ++ uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7, ++ uint8_t x8, uint8_t x9, uint8_t xa, uint8_t xb, ++ uint8_t xc, uint8_t xd, uint8_t xe, uint8_t xf) ++{ ++ return svdupq_u8 (x0, x1, x2, x3, x4, x5, x6, x7, ++ x8, x9, xa, xb, xc, xd, xe, xf); ++} ++ ++svuint16_t __attribute__ ((noipa)) ++make_u16 (uint16_t x0, uint16_t x1, uint16_t x2, uint16_t x3, ++ uint16_t x4, uint16_t x5, uint16_t x6, uint16_t x7) ++{ ++ return svdupq_u16 (x0, x1, x2, x3, x4, x5, x6, x7); ++} ++ ++svuint32_t __attribute__ ((noipa)) ++make_u32 (uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3) ++{ ++ return svdupq_u32 (x0, x1, x2, x3); ++} ++ ++svuint64_t __attribute__ ((noipa)) ++make_u64 (uint64_t x0, uint64_t x1) ++{ ++ return svdupq_u64 (x0, x1); ++} ++ ++uint8_t a[16] = { 1, 212, 91, 232, 101, 201, 77, 83, ++ 226, 69, 121, 128, 255, 13, 127, 26 }; ++uint16_t b[8] = { 64820, 55248, 30604, 46278, 56118, 55101, 49535, 7300 }; ++uint32_t c[4] = { 1268374995, 3271364465, 3403137275, 2501514337 }; ++uint64_t d[2] = { 0x123456789abcdefULL, 0xfedcba9876543210ULL }; ++ ++int ++main () ++{ ++ svbool_t pg = svptrue_pat_b8 (SV_VL16); ++ svuint8_t u8 = make_u8 (a[0], a[1], a[2], a[3], ++ a[4], a[5], a[6], a[7], ++ a[8], a[9], a[10], a[11], ++ a[12], a[13], a[14], a[15]); ++ if (svptest_any (svptrue_b8 (), svcmpne (pg, u8, svld1 (pg, a)))) ++ __builtin_abort (); ++ ++ svuint16_t u16 = make_u16 (b[0], b[1], b[2], b[3], ++ b[4], b[5], b[6], b[7]); ++ if (svptest_any (svptrue_b8 (), svcmpne (pg, u16, svld1 (pg, b)))) ++ __builtin_abort (); ++ ++ svuint32_t u32 = make_u32 (c[0], c[1], c[2], c[3]); ++ if (svptest_any (svptrue_b8 (), svcmpne (pg, u32, svld1 (pg, c)))) ++ __builtin_abort (); ++ ++ svuint64_t u64 = make_u64 (d[0], d[1]); ++ if (svptest_any (svptrue_b8 (), svcmpne (pg, u64, svld1 (pg, d)))) ++ __builtin_abort (); ++ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_9.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_9.c +new file mode 100644 +index 000000000..b29aa9474 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_9.c +@@ -0,0 +1,47 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2" } */ ++ ++#include ++ ++svfloat16_t __attribute__ ((noipa)) ++make_f16 (float16_t x0, float16_t x1, float16_t x2, float16_t x3, ++ float16_t x4, float16_t x5, float16_t x6, float16_t x7) ++{ ++ return svdupq_f16 (x0, x1, x2, x3, x4, x5, x6, x7); ++} ++ ++svfloat32_t __attribute__ ((noipa)) ++make_f32 (float32_t x0, float32_t x1, float32_t x2, float32_t x3) ++{ ++ return svdupq_f32 (x0, x1, x2, x3); ++} ++ ++svfloat64_t __attribute__ ((noipa)) ++make_f64 (float64_t x0, float64_t x1) ++{ ++ return svdupq_f64 (x0, x1); ++} ++ ++float16_t a[8] = { 1.0, -4.25, 9.75, 6.5, -2.125, 5.5, -3.75, 7.625 }; ++float32_t b[4] = { 1.0, -90.25, -11.75, 141.5 }; ++float64_t c[2] = { 9221.5, -4491.25 }; ++ ++int ++main () ++{ ++ svbool_t pg = svptrue_pat_b8 (SV_VL16); ++ svfloat16_t f16 = make_f16 (a[0], a[1], a[2], a[3], ++ a[4], a[5], a[6], a[7]); ++ if (svptest_any (svptrue_b8 (), svcmpne (pg, f16, svld1 (pg, a)))) ++ __builtin_abort (); ++ ++ svfloat32_t f32 = make_f32 (b[0], b[1], b[2], b[3]); ++ if (svptest_any (svptrue_b8 (), svcmpne (pg, f32, svld1 (pg, b)))) ++ __builtin_abort (); ++ ++ svfloat64_t f64 = make_f64 (c[0], c[1]); ++ if (svptest_any (svptrue_b8 (), svcmpne (pg, f64, svld1 (pg, c)))) ++ __builtin_abort (); ++ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_lane_1.c +new file mode 100644 +index 000000000..32ccb08d6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_lane_1.c +@@ -0,0 +1,87 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2" } */ ++ ++#include ++ ++#ifndef TYPE ++#define TYPE svint8_t ++#define DUPQ svdupq_lane_s8 ++#define INDEX svindex_s8 ++#define COUNT 16 ++#endif ++ ++#define BASE 42 ++ ++TYPE __attribute__ ((noipa)) ++dupq_var (TYPE x, uint64_t y) ++{ ++ return DUPQ (x, y); ++} ++ ++TYPE __attribute__ ((noipa)) ++dupq_0 (TYPE x) ++{ ++ return DUPQ (x, 0); ++} ++ ++TYPE __attribute__ ((noipa)) ++dupq_1 (TYPE x) ++{ ++ return DUPQ (x, 1); ++} ++ ++TYPE __attribute__ ((noipa)) ++dupq_2 (TYPE x) ++{ ++ return DUPQ (x, 2); ++} ++ ++TYPE __attribute__ ((noipa)) ++dupq_3 (TYPE x) ++{ ++ return DUPQ (x, 3); ++} ++ ++TYPE __attribute__ ((noipa)) ++dupq_4 (TYPE x) ++{ ++ return DUPQ (x, 4); ++} ++ ++void __attribute__ ((noipa)) ++check (TYPE x, uint64_t y) ++{ ++ svbool_t pg = svptrue_b8 (); ++ if (y * 2 >= svcntd ()) ++ { ++ if (svptest_any (pg, svcmpne (pg, x, 0))) ++ __builtin_abort (); ++ } ++ else ++ { ++ TYPE repeat = svand_x (pg, INDEX (0, 1), COUNT - 1); ++ TYPE expected = svadd_x (pg, repeat, BASE + y * COUNT); ++ if (svptest_any (pg, svcmpne (pg, x, expected))) ++ __builtin_abort (); ++ } ++} ++ ++int ++main () ++{ ++ TYPE x = INDEX (BASE, 1); ++ ++ check (dupq_0 (x), 0); ++ check (dupq_1 (x), 1); ++ check (dupq_2 (x), 2); ++ check (dupq_3 (x), 3); ++ check (dupq_4 (x), 4); ++ ++ for (int i = 0; i < 63; ++i) ++ { ++ check (dupq_var (x, i), i); ++ check (dupq_var (x, (uint64_t) 1 << i), (uint64_t) 1 << i); ++ } ++ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_lane_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_lane_2.c +new file mode 100644 +index 000000000..40de1c7dc +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_lane_2.c +@@ -0,0 +1,9 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2" } */ ++ ++#define TYPE svuint8_t ++#define DUPQ svdupq_lane_u8 ++#define INDEX svindex_u8 ++#define COUNT 16 ++ ++#include "dupq_lane_1.c" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_lane_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_lane_3.c +new file mode 100644 +index 000000000..4ebe89545 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_lane_3.c +@@ -0,0 +1,9 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2" } */ ++ ++#define TYPE svint16_t ++#define DUPQ svdupq_lane_s16 ++#define INDEX svindex_s16 ++#define COUNT 8 ++ ++#include "dupq_lane_1.c" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_lane_4.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_lane_4.c +new file mode 100644 +index 000000000..1be20c8e1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_lane_4.c +@@ -0,0 +1,9 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2" } */ ++ ++#define TYPE svuint16_t ++#define DUPQ svdupq_lane_u16 ++#define INDEX svindex_u16 ++#define COUNT 8 ++ ++#include "dupq_lane_1.c" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_lane_5.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_lane_5.c +new file mode 100644 +index 000000000..67554d06a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_lane_5.c +@@ -0,0 +1,9 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2" } */ ++ ++#define TYPE svint32_t ++#define DUPQ svdupq_lane_s32 ++#define INDEX svindex_s32 ++#define COUNT 4 ++ ++#include "dupq_lane_1.c" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_lane_6.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_lane_6.c +new file mode 100644 +index 000000000..1914d2368 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_lane_6.c +@@ -0,0 +1,9 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2" } */ ++ ++#define TYPE svuint32_t ++#define DUPQ svdupq_lane_u32 ++#define INDEX svindex_u32 ++#define COUNT 4 ++ ++#include "dupq_lane_1.c" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_lane_7.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_lane_7.c +new file mode 100644 +index 000000000..d7a8e52f8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_lane_7.c +@@ -0,0 +1,9 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2" } */ ++ ++#define TYPE svint64_t ++#define DUPQ svdupq_lane_s64 ++#define INDEX svindex_s64 ++#define COUNT 2 ++ ++#include "dupq_lane_1.c" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_lane_8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_lane_8.c +new file mode 100644 +index 000000000..68655fefa +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_lane_8.c +@@ -0,0 +1,9 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2" } */ ++ ++#define TYPE svuint64_t ++#define DUPQ svdupq_lane_u64 ++#define INDEX svindex_u64 ++#define COUNT 2 ++ ++#include "dupq_lane_1.c" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/eor_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/eor_1.c +new file mode 100644 +index 000000000..357b0bfb8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/eor_1.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++#include ++ ++void ++test1 (svbool_t pg, svbool_t x, svbool_t y, int *any, svbool_t *ptr) ++{ ++ svbool_t res = sveor_z (pg, x, y); ++ *any = svptest_any (pg, res); ++ *ptr = res; ++} ++ ++int ++test2 (svbool_t pg, svbool_t x, svbool_t y, int *any) ++{ ++ svbool_t res = sveor_z (pg, x, y); ++ return svptest_any (pg, res); ++} ++ ++/* { dg-final { scan-assembler-times {\teors\t} 2 } } */ ++/* { dg-final { scan-assembler-not {\teor\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ld1_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ld1_1.c +new file mode 100644 +index 000000000..c68a9ed99 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ld1_1.c +@@ -0,0 +1,25 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#include ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++/* ++** nop1: ++** ret ++*/ ++void nop1 (int8_t *s) { svld1 (svptrue_b8 (), s); } ++ ++/* ++** nop2: ++** ret ++*/ ++void nop2 (svbool_t pg, int16_t *s) { svld1 (pg, s); } ++ ++#ifdef __cplusplus ++} ++#endif +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ldff1_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ldff1_1.c +new file mode 100644 +index 000000000..79f8bee1f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ldff1_1.c +@@ -0,0 +1,18 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++#include ++ ++/* Make sure that SETFFR comes first, however high the priority of the ++ LDFF1 is. */ ++svint8_t ++foo (svbool_t pg, int8_t *ptr) ++{ ++ svsetffr (); ++ svint8_t x = svldff1 (pg, ptr); ++ x = svadd_x (pg, x, x); ++ x = svmul_x (pg, x, x); ++ return x; ++} ++ ++/* { dg-final { scan-assembler {\tsetffr\n.*\tldff1b\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ldff1_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ldff1_2.c +new file mode 100644 +index 000000000..7c3c8d8b5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ldff1_2.c +@@ -0,0 +1,20 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++#include ++ ++/* Make sure that RDFFR comes after the LDFF1 and that the RDFFRs can ++ be CSEd. */ ++svint8_t ++foo (svbool_t pg, int8_t *__restrict ptr, ++ svbool_t *__restrict *__restrict preds) ++{ ++ svsetffr (); ++ svint8_t x = svldff1 (pg, ptr); ++ *preds[0] = svrdffr (); ++ *preds[1] = svrdffr (); ++ return x; ++} ++ ++/* { dg-final { scan-assembler {\tsetffr\n.*\tldff1b\t.*\trdffr\t} } } */ ++/* { dg-final { scan-assembler-times {\trdffr\t} 1 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ldff1_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ldff1_3.c +new file mode 100644 +index 000000000..41ad0bcea +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ldff1_3.c +@@ -0,0 +1,21 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++#include ++ ++/* Make sure that LDFF1s can be reordered. The load of x should come due ++ to its longer dependence chain. */ ++svint8_t ++foo (int8_t *ptr1, int8_t *ptr2) ++{ ++ svsetffr (); ++ svbool_t pg = svptrue_b8 (); ++ svint8_t y = svldff1 (pg, ptr2); ++ svint8_t x = svldff1 (pg, ptr1); ++ x = svadd_x (pg, x, x); ++ x = svmul_x (pg, x, x); ++ x = svadd_x (pg, x, y); ++ return x; ++} ++ ++/* { dg-final { scan-assembler {\tldff1b\tz[0-9]+\.b, p[0-7]/z, \[x0\]\n.*\tldff1b\tz[0-9]+\.b, p[0-7]/z, \[x1\]\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ldff1_4.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ldff1_4.c +new file mode 100644 +index 000000000..c27302139 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ldff1_4.c +@@ -0,0 +1,17 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++#include ++ ++/* Make sure that we can use RDFFRS to test for a fault. */ ++svint8_t ++foo (svbool_t pg, int8_t *ptr, int *fault) ++{ ++ svsetffr (); ++ svint8_t x = svldff1 (pg, ptr); ++ *fault = svptest_any (pg, svrdffr_z (pg)); ++ return x; ++} ++ ++/* { dg-final { scan-assembler {\tsetffr\n.*\tldff1b\t.*\trdffrs\t} } } */ ++/* { dg-final { scan-assembler-not {\trdffr\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ldff1_5.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ldff1_5.c +new file mode 100644 +index 000000000..76e7ab8ba +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ldff1_5.c +@@ -0,0 +1,20 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++#include ++ ++/* Make sure that we can use RDFFRS to read the FFR while testing for a ++ fault. */ ++svint8_t ++foo (svbool_t pg, int8_t *ptr, svbool_t *pred, int *fault) ++{ ++ svsetffr (); ++ svint8_t x = svldff1 (pg, ptr); ++ svbool_t ffr = svrdffr_z (pg); ++ *fault = svptest_any (pg, ffr); ++ *pred = ffr; ++ return x; ++} ++ ++/* { dg-final { scan-assembler {\tsetffr\n.*\tldff1b\t.*\trdffrs\t} } } */ ++/* { dg-final { scan-assembler-not {\trdffr\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ldff1_6.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ldff1_6.c +new file mode 100644 +index 000000000..7110e5f1a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ldff1_6.c +@@ -0,0 +1,17 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++#include ++ ++/* Make sure that we can use RDFFRS to test for a fault. */ ++svint8_t ++foo (svbool_t pg, int8_t *ptr, int *fault) ++{ ++ svsetffr (); ++ svint8_t x = svldff1 (pg, ptr); ++ *fault = svptest_any (svptrue_b8 (), svrdffr ()); ++ return x; ++} ++ ++/* { dg-final { scan-assembler {\tsetffr\n.*\tldff1b\t.*\trdffrs\t} } } */ ++/* { dg-final { scan-assembler-not {\trdffr\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ldff1_7.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ldff1_7.c +new file mode 100644 +index 000000000..355fe91f1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ldff1_7.c +@@ -0,0 +1,20 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++#include ++ ++/* Make sure that we can use RDFFRS to read the FFR while testing for a ++ fault. */ ++svint8_t ++foo (svbool_t pg, int8_t *ptr, svbool_t *pred, int *fault) ++{ ++ svsetffr (); ++ svint8_t x = svldff1 (pg, ptr); ++ svbool_t ffr = svrdffr (); ++ *fault = svptest_any (svptrue_b8 (), ffr); ++ *pred = ffr; ++ return x; ++} ++ ++/* { dg-final { scan-assembler {\tsetffr\n.*\tldff1b\t.*\trdffrs\t} } } */ ++/* { dg-final { scan-assembler-not {\trdffr\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/nand_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/nand_1.c +new file mode 100644 +index 000000000..0bc54c049 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/nand_1.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++#include ++ ++void ++test1 (svbool_t pg, svbool_t x, svbool_t y, int *any, svbool_t *ptr) ++{ ++ svbool_t res = svnand_z (pg, x, y); ++ *any = svptest_any (pg, res); ++ *ptr = res; ++} ++ ++int ++test2 (svbool_t pg, svbool_t x, svbool_t y, int *any) ++{ ++ svbool_t res = svnand_z (pg, x, y); ++ return svptest_any (pg, res); ++} ++ ++/* { dg-final { scan-assembler-times {\tnands\t} 2 } } */ ++/* { dg-final { scan-assembler-not {\tnand\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/nor_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/nor_1.c +new file mode 100644 +index 000000000..7973294d1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/nor_1.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++#include ++ ++void ++test1 (svbool_t pg, svbool_t x, svbool_t y, int *any, svbool_t *ptr) ++{ ++ svbool_t res = svnor_z (pg, x, y); ++ *any = svptest_any (pg, res); ++ *ptr = res; ++} ++ ++int ++test2 (svbool_t pg, svbool_t x, svbool_t y, int *any) ++{ ++ svbool_t res = svnor_z (pg, x, y); ++ return svptest_any (pg, res); ++} ++ ++/* { dg-final { scan-assembler-times {\tnors\t} 2 } } */ ++/* { dg-final { scan-assembler-not {\tnor\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/nosve_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/nosve_1.c +new file mode 100644 +index 000000000..09dfacd22 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/nosve_1.c +@@ -0,0 +1,17 @@ ++/* { dg-options "-march=armv8-a" } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++void ++f (svbool_t *x, svint8_t *y) ++{ ++ *x = svptrue_b8 (); /* { dg-error {ACLE function '(svbool_t svptrue_b8\(\)|svptrue_b8)' requires ISA extension 'sve'} } */ ++ /* { dg-message {note: you can enable 'sve' using the command-line option '-march', or by using the 'target' attribute or pragma} "" { target *-*-* } .-1 } */ ++ *x = svptrue_b8 (); ++ *x = svptrue_b8 (); ++ *x = svptrue_b8 (); ++ *x = svptrue_b8 (); ++ *x = svptrue_b8 (); ++ *x = svptrue_b8 (); ++ *y = svadd_m (*x, *y, 1); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/nosve_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/nosve_2.c +new file mode 100644 +index 000000000..594be1cf4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/nosve_2.c +@@ -0,0 +1,14 @@ ++/* { dg-options "-march=armv8-a" } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++#pragma GCC target "+sve" ++ ++void ++f (svbool_t *x, svint8_t *y) ++{ ++ *x = svptrue_b8 (); ++ *y = svadd_m (*x, *y, 1); ++} ++ ++/* { dg-final { scan-assembler {\tadd\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/nosve_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/nosve_3.c +new file mode 100644 +index 000000000..85f4eb3c0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/nosve_3.c +@@ -0,0 +1,12 @@ ++/* { dg-options "-march=armv8-a" } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++void __attribute__ ((target("+sve"))) ++f (svbool_t *x, svint8_t *y) ++{ ++ *x = svptrue_b8 (); ++ *y = svadd_m (*x, *y, 1); ++} ++ ++/* { dg-final { scan-assembler {\tadd\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/orn_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/orn_1.c +new file mode 100644 +index 000000000..c3ed1eb61 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/orn_1.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++#include ++ ++void ++test1 (svbool_t pg, svbool_t x, svbool_t y, int *any, svbool_t *ptr) ++{ ++ svbool_t res = svorn_z (pg, x, y); ++ *any = svptest_any (pg, res); ++ *ptr = res; ++} ++ ++int ++test2 (svbool_t pg, svbool_t x, svbool_t y, int *any) ++{ ++ svbool_t res = svorn_z (pg, x, y); ++ return svptest_any (pg, res); ++} ++ ++/* { dg-final { scan-assembler-times {\torns\t} 2 } } */ ++/* { dg-final { scan-assembler-not {\torn\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/orr_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/orr_1.c +new file mode 100644 +index 000000000..4456fa630 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/orr_1.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++#include ++ ++void ++test1 (svbool_t pg, svbool_t x, svbool_t y, int *any, svbool_t *ptr) ++{ ++ svbool_t res = svorr_z (pg, x, y); ++ *any = svptest_any (pg, res); ++ *ptr = res; ++} ++ ++int ++test2 (svbool_t pg, svbool_t x, svbool_t y, int *any) ++{ ++ svbool_t res = svorr_z (pg, x, y); ++ return svptest_any (pg, res); ++} ++ ++/* { dg-final { scan-assembler-times {\torrs\t} 2 } } */ ++/* { dg-final { scan-assembler-not {\torr\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pfirst_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pfirst_1.c +new file mode 100644 +index 000000000..de1ff691a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pfirst_1.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++#include ++ ++void ++test1 (svbool_t pg, int *last, svbool_t *ptr) ++{ ++ svbool_t res = svpfirst (pg, svpfalse ()); ++ *last = svptest_last (pg, res); ++ *ptr = res; ++} ++ ++int ++test2 (svbool_t pg) ++{ ++ svbool_t res = svpfirst (pg, svpfalse ()); ++ return svptest_last (pg, res); ++} ++ ++/* { dg-final { scan-assembler-times {\tpfirst\t} 2 } } */ ++/* { dg-final { scan-assembler-not {\tptest\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pnext_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pnext_1.c +new file mode 100644 +index 000000000..bf59cb963 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pnext_1.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++#include ++ ++void ++test1 (svbool_t pg, svbool_t prev, int *last, svbool_t *ptr) ++{ ++ svbool_t res = svpnext_b8 (pg, prev); ++ *last = svptest_last (pg, res); ++ *ptr = res; ++} ++ ++int ++test2 (svbool_t pg, svbool_t prev) ++{ ++ svbool_t res = svpnext_b8 (pg, prev); ++ return svptest_last (pg, res); ++} ++ ++/* { dg-final { scan-assembler-times {\tpnext\t} 2 } } */ ++/* { dg-final { scan-assembler-not {\tptest\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pnext_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pnext_2.c +new file mode 100644 +index 000000000..9926a2bee +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pnext_2.c +@@ -0,0 +1,52 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++#include ++ ++void ++test1 (svbool_t pg, svbool_t prev, int *last, svbool_t *ptr) ++{ ++ svbool_t res = svpnext_b16 (pg, prev); ++ *last = svptest_last (pg, res); ++ *ptr = res; ++} ++ ++int ++test2 (svbool_t pg, svbool_t prev) ++{ ++ svbool_t res = svpnext_b16 (pg, prev); ++ return svptest_last (pg, res); ++} ++ ++void ++test3 (svbool_t pg, svbool_t prev, int *last, svbool_t *ptr) ++{ ++ svbool_t res = svpnext_b32 (pg, prev); ++ *last = svptest_last (pg, res); ++ *ptr = res; ++} ++ ++int ++test4 (svbool_t pg, svbool_t prev) ++{ ++ svbool_t res = svpnext_b32 (pg, prev); ++ return svptest_last (pg, res); ++} ++ ++void ++test5 (svbool_t pg, svbool_t prev, int *last, svbool_t *ptr) ++{ ++ svbool_t res = svpnext_b64 (pg, prev); ++ *last = svptest_last (pg, res); ++ *ptr = res; ++} ++ ++int ++test6 (svbool_t pg, svbool_t prev) ++{ ++ svbool_t res = svpnext_b64 (pg, prev); ++ return svptest_last (pg, res); ++} ++ ++/* { dg-final { scan-assembler-times {\tpnext\t} 6 } } */ ++/* { dg-final { scan-assembler-times {\tptest\t} 6 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ptrue_pat_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ptrue_pat_1.c +new file mode 100644 +index 000000000..69bbb1ed0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ptrue_pat_1.c +@@ -0,0 +1,23 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++#include ++ ++void ++test1 (int *last, svbool_t *ptr) ++{ ++ svbool_t res = svptrue_pat_b8 (SV_VL32); ++ *last = svptest_last (svptrue_b8 (), res); ++ *ptr = res; ++} ++ ++int ++test2 () ++{ ++ svbool_t res = svptrue_pat_b8 (SV_VL32); ++ return svptest_last (svptrue_b8 (), res); ++} ++ ++/* { dg-final { scan-assembler-times {\tptrues\tp[0-9]+\.b, vl32\n} 2 } } */ ++/* { dg-final { scan-assembler-not {\tptrue\t} { xfail *-*-* } } } */ ++/* { dg-final { scan-assembler-not {\tptest\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ptrue_pat_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ptrue_pat_2.c +new file mode 100644 +index 000000000..ede83405e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ptrue_pat_2.c +@@ -0,0 +1,23 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++#include ++ ++void ++test1 (int *last, svbool_t *ptr) ++{ ++ svbool_t res = svptrue_pat_b16 (SV_VL16); ++ *last = svptest_last (svptrue_b16 (), res); ++ *ptr = res; ++} ++ ++int ++test2 () ++{ ++ svbool_t res = svptrue_pat_b16 (SV_VL16); ++ return svptest_last (svptrue_b16 (), res); ++} ++ ++/* { dg-final { scan-assembler-times {\tptrues\tp[0-9]+\.h, vl16\n} 2 } } */ ++/* { dg-final { scan-assembler-not {\tptrue\t} { xfail *-*-* } } } */ ++/* { dg-final { scan-assembler-not {\tptest\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ptrue_pat_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ptrue_pat_3.c +new file mode 100644 +index 000000000..d2eb3fc30 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ptrue_pat_3.c +@@ -0,0 +1,23 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++#include ++ ++void ++test1 (int *last, svbool_t *ptr) ++{ ++ svbool_t res = svptrue_pat_b32 (SV_VL16); ++ *last = svptest_last (svptrue_b32 (), res); ++ *ptr = res; ++} ++ ++int ++test2 () ++{ ++ svbool_t res = svptrue_pat_b32 (SV_VL16); ++ return svptest_last (svptrue_b32 (), res); ++} ++ ++/* { dg-final { scan-assembler-times {\tptrues\tp[0-9]+\.s, vl16\n} 2 } } */ ++/* { dg-final { scan-assembler-not {\tptrue\t} { xfail *-*-* } } } */ ++/* { dg-final { scan-assembler-not {\tptest\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ptrue_pat_4.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ptrue_pat_4.c +new file mode 100644 +index 000000000..59a21da9e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ptrue_pat_4.c +@@ -0,0 +1,23 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++#include ++ ++void ++test1 (int *any, svbool_t *ptr) ++{ ++ svbool_t res = svptrue_pat_b64 (SV_VL7); ++ *any = svptest_any (svptrue_b64 (), res); ++ *ptr = res; ++} ++ ++int ++test2 () ++{ ++ svbool_t res = svptrue_pat_b64 (SV_VL7); ++ return svptest_any (svptrue_b64 (), res); ++} ++ ++/* { dg-final { scan-assembler-times {\tptrues\tp[0-9]+\.d, vl7\n} 2 } } */ ++/* { dg-final { scan-assembler-not {\tptrue\t} { xfail *-*-* } } } */ ++/* { dg-final { scan-assembler-not {\tptest\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ptrue_pat_5.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ptrue_pat_5.c +new file mode 100644 +index 000000000..c8f6d8aca +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ptrue_pat_5.c +@@ -0,0 +1,188 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++#include ++ ++void ++b8_b16_1 (int *any, svbool_t *ptr) ++{ ++ svbool_t res = svptrue_pat_b8 (SV_VL64); ++ *any = svptest_any (svptrue_b16 (), res); ++ *ptr = res; ++} ++ ++int ++b8_b16_2 () ++{ ++ svbool_t res = svptrue_pat_b8 (SV_VL64); ++ return svptest_any (svptrue_b16 (), res); ++} ++ ++void ++b8_b32_1 (int *any, svbool_t *ptr) ++{ ++ svbool_t res = svptrue_pat_b8 (SV_VL32); ++ *any = svptest_any (svptrue_b32 (), res); ++ *ptr = res; ++} ++ ++int ++b8_b32_2 () ++{ ++ svbool_t res = svptrue_pat_b8 (SV_VL32); ++ return svptest_any (svptrue_b32 (), res); ++} ++ ++void ++b8_b64_1 (int *any, svbool_t *ptr) ++{ ++ svbool_t res = svptrue_pat_b8 (SV_VL128); ++ *any = svptest_any (svptrue_b64 (), res); ++ *ptr = res; ++} ++ ++int ++b8_b64_2 () ++{ ++ svbool_t res = svptrue_pat_b8 (SV_VL128); ++ return svptest_any (svptrue_b64 (), res); ++} ++ ++void ++b16_b8_1 (int *any, svbool_t *ptr) ++{ ++ svbool_t res = svptrue_pat_b16 (SV_VL32); ++ *any = svptest_any (svptrue_b8 (), res); ++ *ptr = res; ++} ++ ++int ++b16_b8_2 () ++{ ++ svbool_t res = svptrue_pat_b16 (SV_VL32); ++ return svptest_any (svptrue_b8 (), res); ++} ++ ++void ++b16_b32_1 (int *any, svbool_t *ptr) ++{ ++ svbool_t res = svptrue_pat_b16 (SV_VL16); ++ *any = svptest_any (svptrue_b32 (), res); ++ *ptr = res; ++} ++ ++int ++b16_b32_2 () ++{ ++ svbool_t res = svptrue_pat_b16 (SV_VL16); ++ return svptest_any (svptrue_b32 (), res); ++} ++ ++void ++b16_b64_1 (int *any, svbool_t *ptr) ++{ ++ svbool_t res = svptrue_pat_b16 (SV_VL64); ++ *any = svptest_any (svptrue_b64 (), res); ++ *ptr = res; ++} ++ ++int ++b16_b64_2 () ++{ ++ svbool_t res = svptrue_pat_b16 (SV_VL64); ++ return svptest_any (svptrue_b64 (), res); ++} ++ ++void ++b32_b8_1 (int *any, svbool_t *ptr) ++{ ++ svbool_t res = svptrue_pat_b32 (SV_VL16); ++ *any = svptest_any (svptrue_b8 (), res); ++ *ptr = res; ++} ++ ++int ++b32_b8_2 () ++{ ++ svbool_t res = svptrue_pat_b32 (SV_VL16); ++ return svptest_any (svptrue_b8 (), res); ++} ++ ++void ++b32_b16_1 (int *any, svbool_t *ptr) ++{ ++ svbool_t res = svptrue_pat_b32 (SV_VL6); ++ *any = svptest_any (svptrue_b16 (), res); ++ *ptr = res; ++} ++ ++int ++b32_b16_2 () ++{ ++ svbool_t res = svptrue_pat_b32 (SV_VL6); ++ return svptest_any (svptrue_b16 (), res); ++} ++ ++void ++b32_b64_1 (int *any, svbool_t *ptr) ++{ ++ svbool_t res = svptrue_pat_b32 (SV_VL32); ++ *any = svptest_any (svptrue_b64 (), res); ++ *ptr = res; ++} ++ ++int ++b32_b64_2 () ++{ ++ svbool_t res = svptrue_pat_b32 (SV_VL32); ++ return svptest_any (svptrue_b64 (), res); ++} ++ ++void ++b64_b8_1 (int *any, svbool_t *ptr) ++{ ++ svbool_t res = svptrue_pat_b64 (SV_VL7); ++ *any = svptest_any (svptrue_b8 (), res); ++ *ptr = res; ++} ++ ++int ++b64_b8_2 () ++{ ++ svbool_t res = svptrue_pat_b64 (SV_VL7); ++ return svptest_any (svptrue_b8 (), res); ++} ++ ++void ++b64_b16_1 (int *any, svbool_t *ptr) ++{ ++ svbool_t res = svptrue_pat_b64 (SV_VL16); ++ *any = svptest_any (svptrue_b16 (), res); ++ *ptr = res; ++} ++ ++int ++b64_b16_2 () ++{ ++ svbool_t res = svptrue_pat_b64 (SV_VL16); ++ return svptest_any (svptrue_b16 (), res); ++} ++ ++void ++b64_b32_1 (int *any, svbool_t *ptr) ++{ ++ svbool_t res = svptrue_pat_b64 (SV_VL32); ++ *any = svptest_any (svptrue_b32 (), res); ++ *ptr = res; ++} ++ ++int ++b64_b32_2 () ++{ ++ svbool_t res = svptrue_pat_b64 (SV_VL32); ++ return svptest_any (svptrue_b32 (), res); ++} ++ ++/* { dg-final { scan-assembler-not {\tptrues\n} } } */ ++/* { dg-final { scan-assembler-times {\tptrue\t} 48 } } */ ++/* { dg-final { scan-assembler-times {\tptest\t} 24 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/qincb_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/qincb_1.c +new file mode 100644 +index 000000000..ba512f406 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/qincb_1.c +@@ -0,0 +1,43 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-O" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#include ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++/* ++** qincb_s32_s: ++** sqincb x0, w0, all, mul #15 ++** ret ++*/ ++uint64_t qincb_s32_s (int32_t x) { return svqincb (x, 15); } ++ ++/* ++** qincb_s32_z: ++** sqincb x([0-9]+), w0, all, mul #15 ++** uxtw x0, w\1 ++** ret ++*/ ++uint64_t qincb_s32_z (int32_t x) { return (uint32_t) svqincb (x, 15); } ++ ++/* ++** qincb_u32_s: ++** uqincb (w[0-9]+), all, mul #15 ++** sxtw x0, \1 ++** ret ++*/ ++uint64_t qincb_u32_s (uint32_t x) { return (int32_t) svqincb (x, 15); } ++ ++/* ++** qincb_u32_z: ++** uqincb w0, all, mul #15 ++** ret ++*/ ++uint64_t qincb_u32_z (uint32_t x) { return svqincb (x, 15); } ++ ++#ifdef __cplusplus ++} ++#endif +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/struct_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/struct_1.c +new file mode 100644 +index 000000000..50892c85a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/struct_1.c +@@ -0,0 +1,16 @@ ++#include ++ ++void ++f (svint8x2_t *a, svint8x2_t *b) ++{ ++ svint8_t *ptr; ++ svint8x2_t x = *a; ++ *a = *b; ++ a = &x; ++ (void) (a == b); ++ (void) (a != b); ++ (void) (a < b); ++ (void) (a > b); ++ (void) (a <= b); ++ (void) (a >= b); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/temporaries_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/temporaries_1.c +new file mode 100644 +index 000000000..2543e1e62 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/temporaries_1.c +@@ -0,0 +1,70 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O" } */ ++/* { dg-final { check-function-bodies "**" "" "" { target { ! ilp32 } } } } */ ++ ++#include ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++/* ++** test_s8: ++** ptrue (p[0-7])\.b, all ++** ld1b (z[0-9]+\.b), \1/z, \[x0\] ++** add \2, \2, #1 ++** st1b \2, \1, \[x1\] ++** ret ++*/ ++void ++test_s8 (int8_t *x, int8_t *y) ++{ ++ int8_t tmp1[32], tmp2[32]; ++ ++ svbool_t pg = svptrue_b8 (); ++ svst1 (pg, tmp1, svld1 (pg, x)); ++ svst1 (pg, tmp2, svadd_x (pg, svld1 (pg, tmp1), 1)); ++ svst1 (pg, y, svld1 (pg, tmp2)); ++} ++ ++/* ++** test_s32_b8: ++** ptrue (p[0-7])\.b, all ++** ld1w (z[0-9]+\.s), \1/z, \[x0\] ++** add \2, \2, #1 ++** st1w \2, \1, \[x1\] ++** ret ++*/ ++void ++test_s32_b8 (int32_t *x, int32_t *y) ++{ ++ int32_t tmp1[8], tmp2[8]; ++ ++ svbool_t pg = svptrue_b8 (); ++ svst1 (pg, tmp1, svld1 (pg, x)); ++ svst1 (pg, tmp2, svadd_x (pg, svld1 (pg, tmp1), 1)); ++ svst1 (pg, y, svld1 (pg, tmp2)); ++} ++ ++/* ++** test_s32_b32: ++** ptrue (p[0-7])\.b, all ++** ld1w (z[0-9]+\.s), \1/z, \[x0\] ++** add \2, \2, #1 ++** st1w \2, \1, \[x1\] ++** ret ++*/ ++void ++test_s32_b32 (int32_t *x, int32_t *y) ++{ ++ int32_t tmp1[8], tmp2[8]; ++ ++ svbool_t pg = svptrue_b32 (); ++ svst1 (pg, tmp1, svld1 (pg, x)); ++ svst1 (pg, tmp2, svadd_x (pg, svld1 (pg, tmp1), 1)); ++ svst1 (pg, y, svld1 (pg, tmp2)); ++} ++ ++#ifdef __cplusplus ++} ++#endif +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_1.c +new file mode 100644 +index 000000000..1d5523e31 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_1.c +@@ -0,0 +1,23 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++#include ++ ++void ++test1 (int32_t x, int32_t y, int *any, svbool_t *ptr) ++{ ++ svbool_t res = svwhilele_b8 (x, y); ++ *any = svptest_last (svptrue_b8 (), res); ++ *ptr = res; ++} ++ ++int ++test2 (int32_t x, int32_t y) ++{ ++ svbool_t res = svwhilele_b8 (x, y); ++ return svptest_last (svptrue_b8 (), res); ++} ++ ++/* { dg-final { scan-assembler-times {\twhilele\t} 2 } } */ ++/* { dg-final { scan-assembler-not {\tptrue\t} } } */ ++/* { dg-final { scan-assembler-not {\tptest\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_10.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_10.c +new file mode 100644 +index 000000000..ca339c41c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_10.c +@@ -0,0 +1,28 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++#include ++ ++/* { dg-final { scan-assembler-not {\twhilele\t} } } */ ++/* { dg-final { scan-assembler-not {\twhilelt\t} } } */ ++/* { dg-final { scan-assembler-not {\tptrue\t} } } */ ++ ++void ++test1 (svbool_t *ptr) ++{ ++ *ptr = svwhilele_b32_u32 (-1, 0); ++} ++ ++void ++test2 (svbool_t *ptr) ++{ ++ *ptr = svwhilele_b16_u64 (0x80000000, 0); ++} ++ ++void ++test3 (svbool_t *ptr) ++{ ++ *ptr = svwhilele_b8_u64 (0x8000000000000001ULL, 0x7ffffffffffffffeULL); ++} ++ ++/* { dg-final { scan-assembler-times {\tpfalse\tp[0-7]\.b\n} 3 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_2.c +new file mode 100644 +index 000000000..020846007 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_2.c +@@ -0,0 +1,23 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++#include ++ ++void ++test1 (int32_t x, int32_t y, int *any, svbool_t *ptr) ++{ ++ svbool_t res = svwhilele_b16 (x, y); ++ *any = svptest_last (svptrue_b16 (), res); ++ *ptr = res; ++} ++ ++int ++test2 (int32_t x, int32_t y) ++{ ++ svbool_t res = svwhilele_b16 (x, y); ++ return svptest_last (svptrue_b16 (), res); ++} ++ ++/* { dg-final { scan-assembler-times {\twhilele\t} 2 } } */ ++/* { dg-final { scan-assembler-not {\tptrue\t} } } */ ++/* { dg-final { scan-assembler-not {\tptest\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_3.c +new file mode 100644 +index 000000000..4a1045cf6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_3.c +@@ -0,0 +1,23 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++#include ++ ++void ++test1 (int32_t x, int32_t y, int *any, svbool_t *ptr) ++{ ++ svbool_t res = svwhilele_b32 (x, y); ++ *any = svptest_last (svptrue_b32 (), res); ++ *ptr = res; ++} ++ ++int ++test2 (int32_t x, int32_t y) ++{ ++ svbool_t res = svwhilele_b32 (x, y); ++ return svptest_last (svptrue_b32 (), res); ++} ++ ++/* { dg-final { scan-assembler-times {\twhilele\t} 2 } } */ ++/* { dg-final { scan-assembler-not {\tptrue\t} } } */ ++/* { dg-final { scan-assembler-not {\tptest\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_4.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_4.c +new file mode 100644 +index 000000000..f6fb0d099 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_4.c +@@ -0,0 +1,23 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++#include ++ ++void ++test1 (int32_t x, int32_t y, int *any, svbool_t *ptr) ++{ ++ svbool_t res = svwhilele_b64 (x, y); ++ *any = svptest_last (svptrue_b64 (), res); ++ *ptr = res; ++} ++ ++int ++test2 (int32_t x, int32_t y) ++{ ++ svbool_t res = svwhilele_b64 (x, y); ++ return svptest_last (svptrue_b64 (), res); ++} ++ ++/* { dg-final { scan-assembler-times {\twhilele\t} 2 } } */ ++/* { dg-final { scan-assembler-not {\tptrue\t} } } */ ++/* { dg-final { scan-assembler-not {\tptest\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_5.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_5.c +new file mode 100644 +index 000000000..ada958b29 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_5.c +@@ -0,0 +1,47 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++#include ++ ++/* { dg-final { scan-assembler-not {\twhilele\t} } } */ ++/* { dg-final { scan-assembler-not {\twhilelt\t} } } */ ++ ++void ++test1 (svbool_t *ptr) ++{ ++ *ptr = svwhilele_b32_s32 (-8, -8); ++} ++ ++/* { dg-final { scan-assembler {\tptrue\tp[0-7]\.[bhsd], vl1\n} } } */ ++ ++void ++test2 (svbool_t *ptr) ++{ ++ *ptr = svwhilele_b16_s64 (-1, 1); ++} ++ ++/* { dg-final { scan-assembler {\tptrue\tp[0-7]\.h, vl3\n} } } */ ++ ++void ++test3 (svbool_t *ptr) ++{ ++ *ptr = svwhilele_b16_s32 (0x7ffffffb, 0x7fffffff); ++} ++ ++/* { dg-final { scan-assembler {\tptrue\tp[0-7]\.h, vl5\n} } } */ ++ ++void ++test4 (svbool_t *ptr) ++{ ++ *ptr = svwhilele_b8_s64 (svcntb (), svcntb () + 6); ++} ++ ++/* { dg-final { scan-assembler {\tptrue\tp[0-7]\.b, vl7\n} } } */ ++ ++void ++test5 (svbool_t *ptr) ++{ ++ *ptr = svwhilele_b64_s64 (0, 1); ++} ++ ++/* { dg-final { scan-assembler {\tptrue\tp[0-7]\.d, vl2\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_6.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_6.c +new file mode 100644 +index 000000000..00d92ba8a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_6.c +@@ -0,0 +1,40 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++#include ++ ++/* { dg-final { scan-assembler-not {\twhilele\t} } } */ ++/* { dg-final { scan-assembler-not {\twhilelt\t} } } */ ++/* { dg-final { scan-assembler-not {\tptrue\t} } } */ ++ ++void ++test1 (svbool_t *ptr) ++{ ++ *ptr = svwhilele_b32_s32 (-8, -9); ++} ++ ++void ++test2 (svbool_t *ptr) ++{ ++ *ptr = svwhilele_b16_s64 (50, -1); ++} ++ ++void ++test3 (svbool_t *ptr) ++{ ++ *ptr = svwhilele_b16_s32 (0x7ffffffb, 0x80000000); ++} ++ ++void ++test4 (svbool_t *ptr) ++{ ++ *ptr = svwhilele_b8_s64 (svcntb (), 15); ++} ++ ++void ++test5 (svbool_t *ptr) ++{ ++ *ptr = svwhilele_b8_s64 (svcntb (), svcntw ()); ++} ++ ++/* { dg-final { scan-assembler-times {\tpfalse\tp[0-7]\.b\n} 5 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_7.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_7.c +new file mode 100644 +index 000000000..92488f597 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_7.c +@@ -0,0 +1,31 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++#include ++ ++/* { dg-final { scan-assembler-not {\twhilel[et]\t} } } */ ++/* { dg-final { scan-assembler-not {\tpfalse\t} } } */ ++ ++void ++test1 (svbool_t *ptr) ++{ ++ *ptr = svwhilele_b8_s32 (-svcnth (), svcnth () - 1); ++} ++ ++/* { dg-final { scan-assembler {\tptrue\tp[0-7]\.b, all\n} } } */ ++ ++void ++test2 (svbool_t *ptr) ++{ ++ *ptr = svwhilele_b16_s64 (1, svcntw () * 2); ++} ++ ++/* { dg-final { scan-assembler {\tptrue\tp[0-7]\.h, all\n} } } */ ++ ++void ++test3 (svbool_t *ptr) ++{ ++ *ptr = svwhilele_b32_s32 (svcntd (), svcntw () + svcntd () - 1); ++} ++ ++/* { dg-final { scan-assembler {\tptrue\tp[0-7]\.s, all\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_9.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_9.c +new file mode 100644 +index 000000000..e7f81a86f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_9.c +@@ -0,0 +1,31 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++#include ++ ++/* { dg-final { scan-assembler-not {\twhilele\t} } } */ ++/* { dg-final { scan-assembler-not {\twhilelt\t} } } */ ++ ++void ++test1 (svbool_t *ptr) ++{ ++ *ptr = svwhilele_b32_u32 (1, 3); ++} ++ ++/* { dg-final { scan-assembler {\tptrue\tp[0-7]\.s, vl3\n} } } */ ++ ++void ++test2 (svbool_t *ptr) ++{ ++ *ptr = svwhilele_b16_u64 (svcntd (), svcntd () + 5); ++} ++ ++/* { dg-final { scan-assembler {\tptrue\tp[0-7]\.h, vl6\n} } } */ ++ ++void ++test3 (svbool_t *ptr) ++{ ++ *ptr = svwhilele_b8_u32 (0x7ffffffb, 0x80000002); ++} ++ ++/* { dg-final { scan-assembler {\tptrue\tp[0-7]\.b, vl8\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilelt_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilelt_1.c +new file mode 100644 +index 000000000..5c8f97e2f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilelt_1.c +@@ -0,0 +1,47 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++#include ++ ++/* { dg-final { scan-assembler-not {\twhilele\t} } } */ ++/* { dg-final { scan-assembler-not {\twhilelt\t} } } */ ++ ++void ++test1 (svbool_t *ptr) ++{ ++ *ptr = svwhilelt_b32_s32 (-8, -7); ++} ++ ++/* { dg-final { scan-assembler {\tptrue\tp[0-7]\.[bhsd], vl1\n} } } */ ++ ++void ++test2 (svbool_t *ptr) ++{ ++ *ptr = svwhilelt_b16_s64 (-1, 2); ++} ++ ++/* { dg-final { scan-assembler {\tptrue\tp[0-7]\.h, vl3\n} } } */ ++ ++void ++test3 (svbool_t *ptr) ++{ ++ *ptr = svwhilelt_b16_s32 (0x7ffffffa, 0x7fffffff); ++} ++ ++/* { dg-final { scan-assembler {\tptrue\tp[0-7]\.h, vl5\n} } } */ ++ ++void ++test4 (svbool_t *ptr) ++{ ++ *ptr = svwhilelt_b8_s64 (svcntb (), svcntb () + 7); ++} ++ ++/* { dg-final { scan-assembler {\tptrue\tp[0-7]\.b, vl7\n} } } */ ++ ++void ++test5 (svbool_t *ptr) ++{ ++ *ptr = svwhilelt_b64_s64 (0, 2); ++} ++ ++/* { dg-final { scan-assembler {\tptrue\tp[0-7]\.d, vl2\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilelt_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilelt_2.c +new file mode 100644 +index 000000000..2be3a5b0c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilelt_2.c +@@ -0,0 +1,40 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++#include ++ ++/* { dg-final { scan-assembler-not {\twhilele\t} } } */ ++/* { dg-final { scan-assembler-not {\twhilelt\t} } } */ ++/* { dg-final { scan-assembler-not {\tptrue\t} } } */ ++ ++void ++test1 (svbool_t *ptr) ++{ ++ *ptr = svwhilelt_b32_s32 (0, 0); ++} ++ ++void ++test2 (svbool_t *ptr) ++{ ++ *ptr = svwhilelt_b16_s64 (50, -1); ++} ++ ++void ++test3 (svbool_t *ptr) ++{ ++ *ptr = svwhilelt_b16_s32 (0x7ffffffb, 0x80000000); ++} ++ ++void ++test4 (svbool_t *ptr) ++{ ++ *ptr = svwhilelt_b8_s64 (svcntb (), svcntb ()); ++} ++ ++void ++test5 (svbool_t *ptr) ++{ ++ *ptr = svwhilelt_b8_s64 (svcntb (), svcntw ()); ++} ++ ++/* { dg-final { scan-assembler-times {\tpfalse\tp[0-7]\.b\n} 5 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilelt_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilelt_3.c +new file mode 100644 +index 000000000..650b2652f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilelt_3.c +@@ -0,0 +1,31 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++#include ++ ++/* { dg-final { scan-assembler-not {\twhilel[et]\t} } } */ ++/* { dg-final { scan-assembler-not {\tpfalse\t} } } */ ++ ++void ++test1 (svbool_t *ptr) ++{ ++ *ptr = svwhilelt_b8_s32 (-svcnth (), svcnth ()); ++} ++ ++/* { dg-final { scan-assembler {\tptrue\tp[0-7]\.b, all\n} } } */ ++ ++void ++test2 (svbool_t *ptr) ++{ ++ *ptr = svwhilelt_b16_s64 (0, svcntw () * 2); ++} ++ ++/* { dg-final { scan-assembler {\tptrue\tp[0-7]\.h, all\n} } } */ ++ ++void ++test3 (svbool_t *ptr) ++{ ++ *ptr = svwhilelt_b32_s32 (svcntd (), svcntw () + svcntd ()); ++} ++ ++/* { dg-final { scan-assembler {\tptrue\tp[0-7]\.s, all\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/adr_1.c b/gcc/testsuite/gcc.target/aarch64/sve/adr_1.c +new file mode 100644 +index 000000000..223351c2f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/adr_1.c +@@ -0,0 +1,46 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#ifndef FACTOR ++#define FACTOR 2 ++#endif ++ ++#define LOOP(TYPE) \ ++ __attribute__ ((noipa)) \ ++ void \ ++ test_##TYPE (TYPE *restrict dst, TYPE *restrict src, \ ++ int count) \ ++ { \ ++ for (int i = 0; i < count; ++i) \ ++ dst[i] += src[i] * FACTOR; \ ++ } ++ ++#define TEST_ALL(T) \ ++ T (int8_t) \ ++ T (int16_t) \ ++ T (int32_t) \ ++ T (int64_t) \ ++ T (uint8_t) \ ++ T (uint16_t) \ ++ T (uint32_t) \ ++ T (uint64_t) ++ ++TEST_ALL (LOOP) ++ ++/* { dg-final { scan-assembler-times {\tadd\tz[0-9]\.b,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]\.b,} 2 } } */ ++/* { dg-final { scan-assembler-not {\tadr\tz[0-9]\.b,} } } */ ++ ++/* { dg-final { scan-assembler-times {\tadd\tz[0-9]\.h,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]\.h,} 2 } } */ ++/* { dg-final { scan-assembler-not {\tadr\tz[0-9]\.h,} } } */ ++ ++/* { dg-final { scan-assembler-not {\tadd\tz[0-9]\.s,} } } */ ++/* { dg-final { scan-assembler-not {\tlsl\tz[0-9]\.s,} } } */ ++/* { dg-final { scan-assembler-times {\tadr\tz[0-9]\.s, \[z[0-9]\.s, z[0-9]\.s, lsl 1\]} 2 } } */ ++ ++/* { dg-final { scan-assembler-not {\tadd\tz[0-9]\.d,} } } */ ++/* { dg-final { scan-assembler-not {\tlsl\tz[0-9]\.d,} } } */ ++/* { dg-final { scan-assembler-times {\tadr\tz[0-9]\.d, \[z[0-9]\.d, z[0-9]\.d, lsl 1\]} 2 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/adr_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/adr_1_run.c +new file mode 100644 +index 000000000..383a90c24 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/adr_1_run.c +@@ -0,0 +1,31 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "adr_1.c" ++ ++#define N 131 ++ ++#define TEST_LOOP(TYPE) \ ++ { \ ++ TYPE a[N], b[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ a[i] = (TYPE) i * i + i % 5; \ ++ b[i] = (TYPE) i * 3 + i % 7; \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##TYPE (a, b, N); \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ TYPE expected = ((TYPE) (i * i + i % 5) \ ++ + ((TYPE) i * 3 + i % 7) * FACTOR); \ ++ if (a[i] != expected) \ ++ __builtin_abort (); \ ++ } \ ++ } ++ ++int __attribute__ ((optimize (1))) ++main (void) ++{ ++ TEST_ALL (TEST_LOOP) ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/adr_2.c b/gcc/testsuite/gcc.target/aarch64/sve/adr_2.c +new file mode 100644 +index 000000000..dc20ddbad +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/adr_2.c +@@ -0,0 +1,21 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#define FACTOR 4 ++#include "adr_1.c" ++ ++/* { dg-final { scan-assembler-times {\tadd\tz[0-9]\.b,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]\.b,} 2 } } */ ++/* { dg-final { scan-assembler-not {\tadr\tz[0-9]\.b,} } } */ ++ ++/* { dg-final { scan-assembler-times {\tadd\tz[0-9]\.h,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]\.h,} 2 } } */ ++/* { dg-final { scan-assembler-not {\tadr\tz[0-9]\.h,} } } */ ++ ++/* { dg-final { scan-assembler-not {\tadd\tz[0-9]\.s,} } } */ ++/* { dg-final { scan-assembler-not {\tlsl\tz[0-9]\.s,} } } */ ++/* { dg-final { scan-assembler-times {\tadr\tz[0-9]\.s, \[z[0-9]\.s, z[0-9]\.s, lsl 2\]} 2 } } */ ++ ++/* { dg-final { scan-assembler-not {\tadd\tz[0-9]\.d,} } } */ ++/* { dg-final { scan-assembler-not {\tlsl\tz[0-9]\.d,} } } */ ++/* { dg-final { scan-assembler-times {\tadr\tz[0-9]\.d, \[z[0-9]\.d, z[0-9]\.d, lsl 2\]} 2 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/adr_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve/adr_2_run.c +new file mode 100644 +index 000000000..e823d3d0a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/adr_2_run.c +@@ -0,0 +1,5 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#define FACTOR 4 ++#include "adr_1_run.c" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/adr_3.c b/gcc/testsuite/gcc.target/aarch64/sve/adr_3.c +new file mode 100644 +index 000000000..b0cb180dd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/adr_3.c +@@ -0,0 +1,21 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#define FACTOR 8 ++#include "adr_1.c" ++ ++/* { dg-final { scan-assembler-times {\tadd\tz[0-9]\.b,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]\.b,} 2 } } */ ++/* { dg-final { scan-assembler-not {\tadr\tz[0-9]\.b,} } } */ ++ ++/* { dg-final { scan-assembler-times {\tadd\tz[0-9]\.h,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]\.h,} 2 } } */ ++/* { dg-final { scan-assembler-not {\tadr\tz[0-9]\.h,} } } */ ++ ++/* { dg-final { scan-assembler-not {\tadd\tz[0-9]\.s,} } } */ ++/* { dg-final { scan-assembler-not {\tlsl\tz[0-9]\.s,} } } */ ++/* { dg-final { scan-assembler-times {\tadr\tz[0-9]\.s, \[z[0-9]\.s, z[0-9]\.s, lsl 3\]} 2 } } */ ++ ++/* { dg-final { scan-assembler-not {\tadd\tz[0-9]\.d,} } } */ ++/* { dg-final { scan-assembler-not {\tlsl\tz[0-9]\.d,} } } */ ++/* { dg-final { scan-assembler-times {\tadr\tz[0-9]\.d, \[z[0-9]\.d, z[0-9]\.d, lsl 3\]} 2 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/adr_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve/adr_3_run.c +new file mode 100644 +index 000000000..721dd68ef +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/adr_3_run.c +@@ -0,0 +1,5 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#define FACTOR 8 ++#include "adr_1_run.c" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/adr_4.c b/gcc/testsuite/gcc.target/aarch64/sve/adr_4.c +new file mode 100644 +index 000000000..7c039ba13 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/adr_4.c +@@ -0,0 +1,9 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#define FACTOR 16 ++#include "adr_1.c" ++ ++/* { dg-final { scan-assembler-times {\tadd\tz[0-9]\.[bhsd],} 8 } } */ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]\.[bhsd],} 8 } } */ ++/* { dg-final { scan-assembler-not {\tadr\tz[0-9]\.[bhsd],} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/adr_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve/adr_4_run.c +new file mode 100644 +index 000000000..3fb9099e1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/adr_4_run.c +@@ -0,0 +1,5 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#define FACTOR 16 ++#include "adr_1_run.c" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/adr_5.c b/gcc/testsuite/gcc.target/aarch64/sve/adr_5.c +new file mode 100644 +index 000000000..ce3991cb2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/adr_5.c +@@ -0,0 +1,27 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define LOOP(FACTOR) \ ++ __attribute__ ((noipa)) \ ++ void \ ++ test_##FACTOR (uint64_t *restrict dst, \ ++ uint64_t *restrict src, int count) \ ++ { \ ++ for (int i = 0; i < count; ++i) \ ++ dst[i] += (src[i] & 0xffffffff) * FACTOR; \ ++ } ++ ++#define TEST_ALL(T) T (1) T (2) T (4) T (8) ++ ++TEST_ALL (LOOP) ++ ++/* { dg-final { scan-assembler-not {\tadd\tz[0-9]\.d,} } } */ ++/* { dg-final { scan-assembler-not {\tlsl\tz[0-9]\.d,} } } */ ++/* { dg-final { scan-assembler-not {\tand\tz[0-9]\.d,} } } */ ++/* { dg-final { scan-assembler-not {\tuxtw\tz[0-9]\.d,} } } */ ++/* { dg-final { scan-assembler-times {\tadr\tz[0-9]\.d, \[z[0-9]\.d, z[0-9]\.d, uxtw\]} 1 } } */ ++/* { dg-final { scan-assembler-times {\tadr\tz[0-9]\.d, \[z[0-9]\.d, z[0-9]\.d, uxtw 1\]} 1 } } */ ++/* { dg-final { scan-assembler-times {\tadr\tz[0-9]\.d, \[z[0-9]\.d, z[0-9]\.d, uxtw 2\]} 1 } } */ ++/* { dg-final { scan-assembler-times {\tadr\tz[0-9]\.d, \[z[0-9]\.d, z[0-9]\.d, uxtw 3\]} 1 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/adr_5_run.c b/gcc/testsuite/gcc.target/aarch64/sve/adr_5_run.c +new file mode 100644 +index 000000000..025c38d23 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/adr_5_run.c +@@ -0,0 +1,32 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "adr_5.c" ++ ++#define N 131 ++ ++#define TEST_LOOP(FACTOR) \ ++ { \ ++ uint64_t a[N], b[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ a[i] = (uint64_t) i * i + i % 5; \ ++ b[i] = (uint64_t) (i * 3) << ((i & 7) * 8); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##FACTOR (a, b, N); \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ uint64_t expected = ((uint64_t) (i * i + i % 5) \ ++ + (((uint64_t) (i * 3) << ((i & 7) * 8)) \ ++ & 0xffffffff) * FACTOR); \ ++ if (a[i] != expected) \ ++ __builtin_abort (); \ ++ } \ ++ } ++ ++int __attribute__ ((optimize (1))) ++main (void) ++{ ++ TEST_ALL (TEST_LOOP) ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/asrdiv_1.c b/gcc/testsuite/gcc.target/aarch64/sve/asrdiv_1.c +new file mode 100644 +index 000000000..615d8b885 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/asrdiv_1.c +@@ -0,0 +1,51 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */ ++ ++#include ++ ++#define SIGNED(S) int##S##_t ++ ++#define DIV(x,y) ((x)/(y)) ++#define MOD(x,y) ((x)%(y)) ++ ++#define TEMPLATE(OP,SIZE) \ ++void __attribute__ ((noinline, noclone)) \ ++f_##OP##_##SIZE (SIGNED(SIZE) *restrict a, SIGNED(SIZE) *restrict b, \ ++ __INTPTR_TYPE__ n) \ ++{ \ ++ for (__INTPTR_TYPE__ i = 0; i < n; ++i) \ ++ a[i] = OP (b[i], ((SIGNED(SIZE))1 << ((SIZE)/2+1))); \ ++} ++#define DIVMOD(SIZE) \ ++TEMPLATE (DIV,SIZE); \ ++TEMPLATE (MOD,SIZE); ++ ++DIVMOD (8); ++DIVMOD (16); ++DIVMOD (32); ++DIVMOD (64); ++ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 8 "vect" } } */ ++ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+, z[0-9]+\n} 4 } } */ ++ ++/* { dg-final { scan-assembler-times {\tasrd\tz[0-9]+\.b, p[0-9]+/m, z[0-9]+\.b, #5\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, z[0-9]+\.b, #5\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tasrd\tz[0-9]+\.h, p[0-9]+/m, z[0-9]+\.h, #9\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, z[0-9]+\.h, #9\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tasrd\tz[0-9]+\.s, p[0-9]+/m, z[0-9]+\.s, #17\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, z[0-9]+\.s, #17\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tasrd\tz[0-9]+\.d, p[0-9]+/m, z[0-9]+\.d, #33\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, z[0-9]+\.d, #33\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-not {\tasr\t%} } } */ ++/* { dg-final { scan-assembler-not {\tlsr\t%} } } */ ++/* { dg-final { scan-assembler-not {\tcmplt\t%} } } */ ++/* { dg-final { scan-assembler-not {\tand\t%} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clastb_8.c b/gcc/testsuite/gcc.target/aarch64/sve/clastb_8.c +new file mode 100644 +index 000000000..d86a428a7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/clastb_8.c +@@ -0,0 +1,25 @@ ++/* { dg-do assemble { target aarch64_asm_sve_ok } } */ ++/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=256 --save-temps" } */ ++ ++#include ++ ++#define TEST_TYPE(TYPE) \ ++ void \ ++ test_##TYPE (TYPE *ptr, TYPE *a, TYPE *b, TYPE min_v) \ ++ { \ ++ TYPE last = *ptr; \ ++ for (int i = 0; i < 1024; i++) \ ++ if (a[i] < min_v) \ ++ last = b[i]; \ ++ *ptr = last; \ ++ } ++ ++TEST_TYPE (uint8_t); ++TEST_TYPE (uint16_t); ++TEST_TYPE (uint32_t); ++TEST_TYPE (uint64_t); ++ ++/* { dg-final { scan-assembler {\tclastb\t(b[0-9]+), p[0-7], \1, z[0-9]+\.b\n} } } */ ++/* { dg-final { scan-assembler {\tclastb\t(h[0-9]+), p[0-7], \1, z[0-9]+\.h\n} } } */ ++/* { dg-final { scan-assembler {\tclastb\t(s[0-9]+), p[0-7], \1, z[0-9]+\.s\n} } } */ ++/* { dg-final { scan-assembler {\tclastb\t(d[0-9]+), p[0-7], \1, z[0-9]+\.d\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clrsb_1.c b/gcc/testsuite/gcc.target/aarch64/sve/clrsb_1.c +new file mode 100644 +index 000000000..bdc9856fa +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/clrsb_1.c +@@ -0,0 +1,22 @@ ++/* { dg-do assemble { target aarch64_asm_sve_ok } } */ ++/* { dg-options "-O2 -ftree-vectorize --save-temps" } */ ++ ++#include ++ ++void __attribute__ ((noinline, noclone)) ++clrsb_32 (unsigned int *restrict dst, uint32_t *restrict src, int size) ++{ ++ for (int i = 0; i < size; ++i) ++ dst[i] = __builtin_clrsb (src[i]); ++} ++ ++void __attribute__ ((noinline, noclone)) ++clrsb_64 (unsigned int *restrict dst, uint64_t *restrict src, int size) ++{ ++ for (int i = 0; i < size; ++i) ++ dst[i] = __builtin_clrsbll (src[i]); ++} ++ ++/* { dg-final { scan-assembler-times {\tcls\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tcls\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clrsb_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/clrsb_1_run.c +new file mode 100644 +index 000000000..287630d7f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/clrsb_1_run.c +@@ -0,0 +1,50 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "clrsb_1.c" ++ ++extern void abort (void) __attribute__ ((noreturn)); ++ ++unsigned int data[] = { ++ 0xffffff80, 24, ++ 0xffffffff, 31, ++ 0x00000000, 31, ++ 0x80000000, 0, ++ 0x7fffffff, 0, ++ 0x000003ff, 21, ++ 0x1fffffff, 2, ++ 0x0000ffff, 15, ++ 0xffff0000, 15 ++}; ++ ++int __attribute__ ((optimize (1))) ++main (void) ++{ ++ unsigned int count = sizeof (data) / sizeof (data[0]) / 2; ++ ++ uint32_t in32[count]; ++ unsigned int out32[count]; ++ for (unsigned int i = 0; i < count; ++i) ++ { ++ in32[i] = data[i * 2]; ++ asm volatile ("" ::: "memory"); ++ } ++ clrsb_32 (out32, in32, count); ++ for (unsigned int i = 0; i < count; ++i) ++ if (out32[i] != data[i * 2 + 1]) ++ abort (); ++ ++ uint64_t in64[count]; ++ unsigned int out64[count]; ++ for (unsigned int i = 0; i < count; ++i) ++ { ++ in64[i] = (uint64_t) data[i * 2] << 32; ++ asm volatile ("" ::: "memory"); ++ } ++ clrsb_64 (out64, in64, count); ++ for (unsigned int i = 0; i < count; ++i) ++ if (out64[i] != (data[i * 2] ? data[i * 2 + 1] : 63)) ++ abort (); ++ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clz_1.c b/gcc/testsuite/gcc.target/aarch64/sve/clz_1.c +new file mode 100644 +index 000000000..0c7a4e6d7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/clz_1.c +@@ -0,0 +1,22 @@ ++/* { dg-do assemble { target aarch64_asm_sve_ok } } */ ++/* { dg-options "-O2 -ftree-vectorize --save-temps" } */ ++ ++#include ++ ++void __attribute__ ((noinline, noclone)) ++clz_32 (unsigned int *restrict dst, uint32_t *restrict src, int size) ++{ ++ for (int i = 0; i < size; ++i) ++ dst[i] = __builtin_clz (src[i]); ++} ++ ++void __attribute__ ((noinline, noclone)) ++clz_64 (unsigned int *restrict dst, uint64_t *restrict src, int size) ++{ ++ for (int i = 0; i < size; ++i) ++ dst[i] = __builtin_clzll (src[i]); ++} ++ ++/* { dg-final { scan-assembler-times {\tclz\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tclz\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clz_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/clz_1_run.c +new file mode 100644 +index 000000000..12d9cf276 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/clz_1_run.c +@@ -0,0 +1,50 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "clz_1.c" ++ ++extern void abort (void) __attribute__ ((noreturn)); ++ ++unsigned int data[] = { ++ 0xffffff80, 0, ++ 0xffffffff, 0, ++ 0x00000000, 32, ++ 0x80000000, 0, ++ 0x7fffffff, 1, ++ 0x000003ff, 22, ++ 0x1fffffff, 3, ++ 0x0000ffff, 16, ++ 0xffff0000, 0 ++}; ++ ++int __attribute__ ((optimize (1))) ++main (void) ++{ ++ unsigned int count = sizeof (data) / sizeof (data[0]) / 2; ++ ++ uint32_t in32[count]; ++ unsigned int out32[count]; ++ for (unsigned int i = 0; i < count; ++i) ++ { ++ in32[i] = data[i * 2]; ++ asm volatile ("" ::: "memory"); ++ } ++ clz_32 (out32, in32, count); ++ for (unsigned int i = 0; i < count; ++i) ++ if (out32[i] != data[i * 2 + 1]) ++ abort (); ++ ++ uint64_t in64[count]; ++ unsigned int out64[count]; ++ for (unsigned int i = 0; i < count; ++i) ++ { ++ in64[i] = (uint64_t) data[i * 2] << 10; ++ asm volatile ("" ::: "memory"); ++ } ++ clz_64 (out64, in64, count); ++ for (unsigned int i = 0; i < count; ++i) ++ if (out64[i] != (data[i * 2] ? data[i * 2 + 1] + 22 : 64)) ++ abort (); ++ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cnot_1.c b/gcc/testsuite/gcc.target/aarch64/sve/cnot_1.c +new file mode 100644 +index 000000000..5fa33461c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cnot_1.c +@@ -0,0 +1,30 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define DEF_LOOP(TYPE) \ ++ void __attribute__ ((noipa)) \ ++ test_##TYPE (TYPE *restrict r, TYPE *restrict a, int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ r[i] = !a[i]; \ ++ } ++ ++#define TEST_ALL(T) \ ++ T (int8_t) \ ++ T (int16_t) \ ++ T (int32_t) \ ++ T (int64_t) \ ++ T (uint8_t) \ ++ T (uint16_t) \ ++ T (uint32_t) \ ++ T (uint64_t) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ ++/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 2 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_1.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_1.c +new file mode 100644 +index 000000000..c02e8ae8e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_1.c +@@ -0,0 +1,42 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define abd(A, B) (((A) < (B) ? (B) : (A)) - ((A) < (B) ? (A) : (B))) ++ ++#define DEF_LOOP(TYPE) \ ++ void __attribute__ ((noinline, noclone)) \ ++ test_##TYPE (TYPE *__restrict r, TYPE *__restrict a, \ ++ TYPE *__restrict b, TYPE *__restrict c, \ ++ int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ r[i] = a[i] < 20 ? abd (b[i], c[i]) : b[i]; \ ++ } ++ ++#define TEST_ALL(T) \ ++ T (int8_t) \ ++ T (uint8_t) \ ++ T (int16_t) \ ++ T (uint16_t) \ ++ T (int32_t) \ ++ T (uint32_t) \ ++ T (int64_t) \ ++ T (uint64_t) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tsabd\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsabd\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsabd\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsabd\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tuabd\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tuabd\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tuabd\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tuabd\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ ++/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_1_run.c +new file mode 100644 +index 000000000..a45beefc2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_1_run.c +@@ -0,0 +1,33 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "cond_abd_1.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(TYPE) \ ++ { \ ++ TYPE r[N], a[N], b[N], c[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ a[i] = (i & 1 ? i : 3 * i); \ ++ b[i] = (i >> 4) << (i & 15); \ ++ c[i] = ((i + 2) % 3) * (i + 1); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##TYPE (r, a, b, c, N); \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ TYPE expected = a[i] < 20 ? abd (b[i], c[i]) : b[i]; \ ++ if (r[i] != expected) \ ++ __builtin_abort (); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ } ++ ++int ++main (void) ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_2.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_2.c +new file mode 100644 +index 000000000..97901b6f8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_2.c +@@ -0,0 +1,42 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define abd(A, B) (((A) < (B) ? (B) : (A)) - ((A) < (B) ? (A) : (B))) ++ ++#define DEF_LOOP(TYPE) \ ++ void __attribute__ ((noinline, noclone)) \ ++ test_##TYPE (TYPE *__restrict r, TYPE *__restrict a, \ ++ TYPE *__restrict b, TYPE *__restrict c, \ ++ int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ r[i] = a[i] < 20 ? abd (b[i], c[i]) : c[i]; \ ++ } ++ ++#define TEST_ALL(T) \ ++ T (int8_t) \ ++ T (uint8_t) \ ++ T (int16_t) \ ++ T (uint16_t) \ ++ T (int32_t) \ ++ T (uint32_t) \ ++ T (int64_t) \ ++ T (uint64_t) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tsabd\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsabd\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsabd\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsabd\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tuabd\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tuabd\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tuabd\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tuabd\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ ++/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_2_run.c +new file mode 100644 +index 000000000..474bc0f9a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_2_run.c +@@ -0,0 +1,33 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "cond_abd_2.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(TYPE) \ ++ { \ ++ TYPE r[N], a[N], b[N], c[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ a[i] = (i & 1 ? i : 3 * i); \ ++ b[i] = (i >> 4) << (i & 15); \ ++ c[i] = ((i + 2) % 3) * (i + 1); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##TYPE (r, a, b, c, N); \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ TYPE expected = a[i] < 20 ? abd (b[i], c[i]) : c[i]; \ ++ if (r[i] != expected) \ ++ __builtin_abort (); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ } ++ ++int ++main (void) ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_3.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_3.c +new file mode 100644 +index 000000000..dc8bc3cee +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_3.c +@@ -0,0 +1,46 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define abd(A, B) (((A) < (B) ? (B) : (A)) - ((A) < (B) ? (A) : (B))) ++ ++#define DEF_LOOP(TYPE) \ ++ void __attribute__ ((noinline, noclone)) \ ++ test_##TYPE (TYPE *__restrict r, TYPE *__restrict a, \ ++ TYPE *__restrict b, TYPE *__restrict c, \ ++ int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ r[i] = a[i] < 20 ? abd (b[i], c[i]) : a[i]; \ ++ } ++ ++#define TEST_ALL(T) \ ++ T (int8_t) \ ++ T (uint8_t) \ ++ T (int16_t) \ ++ T (uint16_t) \ ++ T (int32_t) \ ++ T (uint32_t) \ ++ T (int64_t) \ ++ T (uint64_t) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tsabd\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsabd\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsabd\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsabd\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tuabd\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tuabd\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tuabd\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tuabd\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 2 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_3_run.c +new file mode 100644 +index 000000000..9f1ac2df8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_3_run.c +@@ -0,0 +1,33 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "cond_abd_3.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(TYPE) \ ++ { \ ++ TYPE r[N], a[N], b[N], c[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ a[i] = (i & 1 ? i : 3 * i); \ ++ b[i] = (i >> 4) << (i & 15); \ ++ c[i] = ((i + 2) % 3) * (i + 1); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##TYPE (r, a, b, c, N); \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ TYPE expected = a[i] < 20 ? abd (b[i], c[i]) : a[i]; \ ++ if (r[i] != expected) \ ++ __builtin_abort (); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ } ++ ++int ++main (void) ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_4.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_4.c +new file mode 100644 +index 000000000..5c65e59ed +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_4.c +@@ -0,0 +1,42 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define abd(A, B) (((A) < (B) ? (B) : (A)) - ((A) < (B) ? (A) : (B))) ++ ++#define DEF_LOOP(TYPE) \ ++ void __attribute__ ((noinline, noclone)) \ ++ test_##TYPE (TYPE *__restrict r, TYPE *__restrict a, \ ++ TYPE *__restrict b, TYPE *__restrict c, \ ++ int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ r[i] = a[i] < 20 ? abd (b[i], c[i]) : 79; \ ++ } ++ ++#define TEST_ALL(T) \ ++ T (int8_t) \ ++ T (uint8_t) \ ++ T (int16_t) \ ++ T (uint16_t) \ ++ T (int32_t) \ ++ T (uint32_t) \ ++ T (int64_t) \ ++ T (uint64_t) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tsabd\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsabd\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsabd\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsabd\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tuabd\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tuabd\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tuabd\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tuabd\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ ++/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ ++/* { dg-final { scan-assembler-times {\tsel\t} 8 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_4_run.c +new file mode 100644 +index 000000000..47fd9e09f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_4_run.c +@@ -0,0 +1,33 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "cond_abd_4.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(TYPE) \ ++ { \ ++ TYPE r[N], a[N], b[N], c[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ a[i] = (i & 1 ? i : 3 * i); \ ++ b[i] = (i >> 4) << (i & 15); \ ++ c[i] = ((i + 2) % 3) * (i + 1); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##TYPE (r, a, b, c, N); \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ TYPE expected = a[i] < 20 ? abd (b[i], c[i]) : 79; \ ++ if (r[i] != expected) \ ++ __builtin_abort (); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ } ++ ++int ++main (void) ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_5.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_5.c +new file mode 100644 +index 000000000..f2c013158 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_5.c +@@ -0,0 +1,46 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define abd(A, B) (((A) < (B) ? (B) : (A)) - ((A) < (B) ? (A) : (B))) ++ ++#define DEF_LOOP(TYPE) \ ++ void __attribute__ ((noinline, noclone)) \ ++ test_##TYPE (TYPE *__restrict r, TYPE *__restrict a, \ ++ TYPE *__restrict b, TYPE *__restrict c, \ ++ int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ r[i] = a[i] < 20 ? abd (b[i], c[i]) : 0; \ ++ } ++ ++#define TEST_ALL(T) \ ++ T (int8_t) \ ++ T (uint8_t) \ ++ T (int16_t) \ ++ T (uint16_t) \ ++ T (int32_t) \ ++ T (uint32_t) \ ++ T (int64_t) \ ++ T (uint64_t) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tsabd\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsabd\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsabd\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsabd\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tuabd\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tuabd\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tuabd\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tuabd\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.b, p[0-7]/z, z[0-9]+\.b\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/z, z[0-9]+\.d\n} 2 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_5_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_5_run.c +new file mode 100644 +index 000000000..7cd44be38 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_5_run.c +@@ -0,0 +1,33 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "cond_abd_5.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(TYPE) \ ++ { \ ++ TYPE r[N], a[N], b[N], c[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ a[i] = (i & 1 ? i : 3 * i); \ ++ b[i] = (i >> 4) << (i & 15); \ ++ c[i] = ((i + 2) % 3) * (i + 1); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##TYPE (r, a, b, c, N); \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ TYPE expected = a[i] < 20 ? abd (b[i], c[i]) : 0; \ ++ if (r[i] != expected) \ ++ __builtin_abort (); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ } ++ ++int ++main (void) ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_1.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_1.c +new file mode 100644 +index 000000000..bd8776637 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_1.c +@@ -0,0 +1,35 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define DEF_LOOP(TYPE) \ ++ void __attribute__ ((noipa)) \ ++ test_##TYPE (TYPE *__restrict r, TYPE *__restrict a, \ ++ TYPE *__restrict b, int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ r[i] = a[i] == 0 ? !b[i] : b[i]; \ ++ } ++ ++#define TEST_ALL(T) \ ++ T (int8_t) \ ++ T (uint8_t) \ ++ T (int16_t) \ ++ T (uint16_t) \ ++ T (int32_t) \ ++ T (uint32_t) \ ++ T (int64_t) \ ++ T (uint64_t) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz} } } */ ++/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ ++/* Currently we canonicalize the ?: so that !b[i] is the "false" value. */ ++/* { dg-final { scan-assembler-not {\tsel\t} { xfail *-*-* } } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_1_run.c +new file mode 100644 +index 000000000..802bcbb2e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_1_run.c +@@ -0,0 +1,32 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "cond_cnot_1.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(TYPE) \ ++ { \ ++ TYPE r[N], a[N], b[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ a[i] = (i % 3) < (i % 5); \ ++ b[i] = i % 7 < 3; \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##TYPE (r, a, b, N); \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ TYPE expected = a[i] == 0 ? !b[i] : b[i]; \ ++ if (r[i] != expected) \ ++ __builtin_abort (); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ } ++ ++int ++main (void) ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_2.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_2.c +new file mode 100644 +index 000000000..3df2431be +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_2.c +@@ -0,0 +1,35 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define DEF_LOOP(TYPE) \ ++ void __attribute__ ((noipa)) \ ++ test_##TYPE (TYPE *__restrict r, TYPE *__restrict a, \ ++ TYPE *__restrict b, int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ r[i] = a[i] == 0 ? !b[i] : a[i]; \ ++ } ++ ++#define TEST_ALL(T) \ ++ T (int8_t) \ ++ T (uint8_t) \ ++ T (int16_t) \ ++ T (uint16_t) \ ++ T (int32_t) \ ++ T (uint32_t) \ ++ T (int64_t) \ ++ T (uint64_t) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz} } } */ ++/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ ++/* Currently we canonicalize the ?: so that !b[i] is the "false" value. */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_2_run.c +new file mode 100644 +index 000000000..6db8bf14e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_2_run.c +@@ -0,0 +1,32 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "cond_cnot_2.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(TYPE) \ ++ { \ ++ TYPE r[N], a[N], b[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ a[i] = (i % 3) < (i % 5); \ ++ b[i] = i % 7 < 3; \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##TYPE (r, a, b, N); \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ TYPE expected = a[i] == 0 ? !b[i] : a[i]; \ ++ if (r[i] != expected) \ ++ __builtin_abort (); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ } ++ ++int ++main (void) ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_3.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_3.c +new file mode 100644 +index 000000000..806e51788 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_3.c +@@ -0,0 +1,35 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define DEF_LOOP(TYPE) \ ++ void __attribute__ ((noipa)) \ ++ test_##TYPE (TYPE *__restrict r, TYPE *__restrict a, \ ++ TYPE *__restrict b, int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ r[i] = a[i] == 0 ? !b[i] : 127; \ ++ } ++ ++#define TEST_ALL(T) \ ++ T (int8_t) \ ++ T (uint8_t) \ ++ T (int16_t) \ ++ T (uint16_t) \ ++ T (int32_t) \ ++ T (uint32_t) \ ++ T (int64_t) \ ++ T (uint64_t) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ ++ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+, z[0-9]+\n} 8 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz[^\n]*z} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_3_run.c +new file mode 100644 +index 000000000..6e025e489 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_3_run.c +@@ -0,0 +1,32 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "cond_cnot_3.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(TYPE) \ ++ { \ ++ TYPE r[N], a[N], b[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ a[i] = (i % 3) < (i % 5); \ ++ b[i] = i % 7 < 3; \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##TYPE (r, a, b, N); \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ TYPE expected = a[i] == 0 ? !b[i] : 127; \ ++ if (r[i] != expected) \ ++ __builtin_abort (); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ } ++ ++int ++main (void) ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_1.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_1.c +new file mode 100644 +index 000000000..86064ebfc +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_1.c +@@ -0,0 +1,39 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize -fno-trapping-math" } */ ++ ++#include ++ ++#define DEF_LOOP(FLOAT_TYPE, INT_TYPE) \ ++ void __attribute__ ((noipa)) \ ++ test_##INT_TYPE (FLOAT_TYPE *__restrict r, \ ++ INT_TYPE *__restrict a, \ ++ FLOAT_TYPE *__restrict b, \ ++ INT_TYPE *__restrict pred, int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ { \ ++ FLOAT_TYPE bi = b[i]; \ ++ r[i] = pred[i] ? (FLOAT_TYPE) a[i] : bi; \ ++ } \ ++ } ++ ++#define TEST_ALL(T) \ ++ T (_Float16, int16_t) \ ++ T (_Float16, uint16_t) \ ++ T (float, int32_t) \ ++ T (float, uint32_t) \ ++ T (double, int64_t) \ ++ T (double, uint64_t) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tscvtf\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tucvtf\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tscvtf\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tucvtf\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tscvtf\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tucvtf\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz} } } */ ++/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_1_run.c +new file mode 100644 +index 000000000..1f712b485 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_1_run.c +@@ -0,0 +1,29 @@ ++/* { dg-do run { target { aarch64_sve_hw } } } */ ++/* { dg-options "-O2 -ftree-vectorize -ftrapping-math" } */ ++ ++#include "cond_convert_1.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(FLOAT_TYPE, INT_TYPE) \ ++ { \ ++ FLOAT_TYPE r[N], b[N]; \ ++ INT_TYPE a[N], pred[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ a[i] = (i & 1 ? i : 3 * i) * (i % 3 == 0 ? 1 : -1); \ ++ b[i] = (i % 9) * (i % 7 + 1); \ ++ pred[i] = (i % 7 < 4); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##INT_TYPE (r, a, b, pred, N); \ ++ for (int i = 0; i < N; ++i) \ ++ if (r[i] != (pred[i] ? (FLOAT_TYPE) a[i] : b[i])) \ ++ __builtin_abort (); \ ++ } ++ ++int main () ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_2.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_2.c +new file mode 100644 +index 000000000..0e60b4381 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_2.c +@@ -0,0 +1,36 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize -fno-trapping-math" } */ ++ ++#include ++ ++#define DEF_LOOP(FLOAT_TYPE, INT_TYPE) \ ++ void __attribute__ ((noipa)) \ ++ test_##INT_TYPE (FLOAT_TYPE *__restrict r, \ ++ INT_TYPE *__restrict a, \ ++ INT_TYPE *__restrict pred, int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ r[i] = pred[i] ? (FLOAT_TYPE) a[i] : 1.0; \ ++ } ++ ++#define TEST_ALL(T) \ ++ T (_Float16, int16_t) \ ++ T (_Float16, uint16_t) \ ++ T (float, int32_t) \ ++ T (float, uint32_t) \ ++ T (double, int64_t) \ ++ T (double, uint64_t) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tscvtf\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tucvtf\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tscvtf\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tucvtf\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tscvtf\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tucvtf\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+, z[0-9]+\n} 6 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz[^\n]*z} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_2_run.c +new file mode 100644 +index 000000000..9a4834921 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_2_run.c +@@ -0,0 +1,28 @@ ++/* { dg-do run { target { aarch64_sve_hw } } } */ ++/* { dg-options "-O2 -ftree-vectorize -ftrapping-math" } */ ++ ++#include "cond_convert_2.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(FLOAT_TYPE, INT_TYPE) \ ++ { \ ++ FLOAT_TYPE r[N]; \ ++ INT_TYPE a[N], pred[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ a[i] = (i & 1 ? i : 3 * i) * (i % 3 == 0 ? 1 : -1); \ ++ pred[i] = (i % 7 < 4); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##INT_TYPE (r, a, pred, N); \ ++ for (int i = 0; i < N; ++i) \ ++ if (r[i] != (pred[i] ? (FLOAT_TYPE) a[i] : 1.0)) \ ++ __builtin_abort (); \ ++ } ++ ++int main () ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_3.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_3.c +new file mode 100644 +index 000000000..a294effd4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_3.c +@@ -0,0 +1,40 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize -fno-trapping-math" } */ ++ ++#include ++ ++#define DEF_LOOP(FLOAT_TYPE, INT_TYPE) \ ++ void __attribute__ ((noipa)) \ ++ test_##INT_TYPE (FLOAT_TYPE *__restrict r, \ ++ INT_TYPE *__restrict a, \ ++ INT_TYPE *__restrict pred, int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ r[i] = pred[i] ? (FLOAT_TYPE) a[i] : 0.0; \ ++ } ++ ++#define TEST_ALL(T) \ ++ T (_Float16, int16_t) \ ++ T (_Float16, uint16_t) \ ++ T (float, int32_t) \ ++ T (float, uint32_t) \ ++ T (double, int64_t) \ ++ T (double, uint64_t) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tscvtf\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tucvtf\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tscvtf\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tucvtf\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tscvtf\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tucvtf\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* Really we should be able to use MOVPRFX /z here, but at the moment ++ we're relying on combine to merge a SEL and an arithmetic operation, ++ and the SEL doesn't allow the "false" value to be zero when the "true" ++ value is a register. */ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+, z[0-9]+\n} 6 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz[^\n]*z} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_3_run.c +new file mode 100644 +index 000000000..90021097c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_3_run.c +@@ -0,0 +1,28 @@ ++/* { dg-do run { target { aarch64_sve_hw } } } */ ++/* { dg-options "-O2 -ftree-vectorize -ftrapping-math" } */ ++ ++#include "cond_convert_3.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(FLOAT_TYPE, INT_TYPE) \ ++ { \ ++ FLOAT_TYPE r[N]; \ ++ INT_TYPE a[N], pred[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ a[i] = (i & 1 ? i : 3 * i) * (i % 3 == 0 ? 1 : -1); \ ++ pred[i] = (i % 7 < 4); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##INT_TYPE (r, a, pred, N); \ ++ for (int i = 0; i < N; ++i) \ ++ if (r[i] != (pred[i] ? (FLOAT_TYPE) a[i] : 0.0)) \ ++ __builtin_abort (); \ ++ } ++ ++int main () ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_4.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_4.c +new file mode 100644 +index 000000000..e3a947b26 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_4.c +@@ -0,0 +1,39 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize -fno-trapping-math" } */ ++ ++#include ++ ++#define DEF_LOOP(FLOAT_TYPE, INT_TYPE) \ ++ void __attribute__ ((noipa)) \ ++ test_##INT_TYPE (INT_TYPE *__restrict r, \ ++ FLOAT_TYPE *__restrict a, \ ++ INT_TYPE *__restrict b, \ ++ INT_TYPE *__restrict pred, int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ { \ ++ INT_TYPE bi = b[i]; \ ++ r[i] = pred[i] ? (INT_TYPE) a[i] : bi; \ ++ } \ ++ } ++ ++#define TEST_ALL(T) \ ++ T (_Float16, int16_t) \ ++ T (_Float16, uint16_t) \ ++ T (float, int32_t) \ ++ T (float, uint32_t) \ ++ T (double, int64_t) \ ++ T (double, uint64_t) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tfcvtzs\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfcvtzu\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfcvtzs\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfcvtzu\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfcvtzs\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfcvtzu\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz} } } */ ++/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_4_run.c +new file mode 100644 +index 000000000..eaadcb7d4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_4_run.c +@@ -0,0 +1,29 @@ ++/* { dg-do run { target { aarch64_sve_hw } } } */ ++/* { dg-options "-O2 -ftree-vectorize -ftrapping-math" } */ ++ ++#include "cond_convert_4.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(FLOAT_TYPE, INT_TYPE) \ ++ { \ ++ INT_TYPE r[N], b[N], pred[N]; \ ++ FLOAT_TYPE a[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ a[i] = (i & 1 ? i : 3 * i) * (i % 3 == 0 ? 1 : -1); \ ++ b[i] = (i % 9) * (i % 7 + 1); \ ++ pred[i] = (i % 7 < 4); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##INT_TYPE (r, a, b, pred, N); \ ++ for (int i = 0; i < N; ++i) \ ++ if (r[i] != (pred[i] ? (INT_TYPE) a[i] : b[i])) \ ++ __builtin_abort (); \ ++ } ++ ++int main () ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_5.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_5.c +new file mode 100644 +index 000000000..5f3da83e6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_5.c +@@ -0,0 +1,36 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize -fno-trapping-math" } */ ++ ++#include ++ ++#define DEF_LOOP(FLOAT_TYPE, INT_TYPE) \ ++ void __attribute__ ((noipa)) \ ++ test_##INT_TYPE (INT_TYPE *__restrict r, \ ++ FLOAT_TYPE *__restrict a, \ ++ INT_TYPE *__restrict pred, int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ r[i] = pred[i] ? (INT_TYPE) a[i] : 72; \ ++ } ++ ++#define TEST_ALL(T) \ ++ T (_Float16, int16_t) \ ++ T (_Float16, uint16_t) \ ++ T (float, int32_t) \ ++ T (float, uint32_t) \ ++ T (double, int64_t) \ ++ T (double, uint64_t) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tfcvtzs\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfcvtzu\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfcvtzs\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfcvtzu\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfcvtzs\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfcvtzu\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+, z[0-9]+\n} 6 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz[^\n]*z} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_5_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_5_run.c +new file mode 100644 +index 000000000..a1f2d4977 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_5_run.c +@@ -0,0 +1,28 @@ ++/* { dg-do run { target { aarch64_sve_hw } } } */ ++/* { dg-options "-O2 -ftree-vectorize -ftrapping-math" } */ ++ ++#include "cond_convert_5.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(FLOAT_TYPE, INT_TYPE) \ ++ { \ ++ INT_TYPE r[N], pred[N]; \ ++ FLOAT_TYPE a[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ a[i] = (i & 1 ? i : 3 * i) * (i % 3 == 0 ? 1 : -1); \ ++ pred[i] = (i % 7 < 4); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##INT_TYPE (r, a, pred, N); \ ++ for (int i = 0; i < N; ++i) \ ++ if (r[i] != (pred[i] ? (INT_TYPE) a[i] : 72)) \ ++ __builtin_abort (); \ ++ } ++ ++int main () ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_6.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_6.c +new file mode 100644 +index 000000000..6541a2ea4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_6.c +@@ -0,0 +1,40 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize -fno-trapping-math" } */ ++ ++#include ++ ++#define DEF_LOOP(FLOAT_TYPE, INT_TYPE) \ ++ void __attribute__ ((noipa)) \ ++ test_##INT_TYPE (INT_TYPE *__restrict r, \ ++ FLOAT_TYPE *__restrict a, \ ++ INT_TYPE *__restrict pred, int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ r[i] = pred[i] ? (INT_TYPE) a[i] : 0; \ ++ } ++ ++#define TEST_ALL(T) \ ++ T (_Float16, int16_t) \ ++ T (_Float16, uint16_t) \ ++ T (float, int32_t) \ ++ T (float, uint32_t) \ ++ T (double, int64_t) \ ++ T (double, uint64_t) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tfcvtzs\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfcvtzu\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfcvtzs\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfcvtzu\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfcvtzs\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfcvtzu\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* Really we should be able to use MOVPRFX /z here, but at the moment ++ we're relying on combine to merge a SEL and an arithmetic operation, ++ and the SEL doesn't allow the "false" value to be zero when the "true" ++ value is a register. */ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+, z[0-9]+\n} 6 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz[^\n]*z} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_6_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_6_run.c +new file mode 100644 +index 000000000..49a64b4fc +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_6_run.c +@@ -0,0 +1,28 @@ ++/* { dg-do run { target { aarch64_sve_hw } } } */ ++/* { dg-options "-O2 -ftree-vectorize -ftrapping-math" } */ ++ ++#include "cond_convert_6.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(FLOAT_TYPE, INT_TYPE) \ ++ { \ ++ INT_TYPE r[N], pred[N]; \ ++ FLOAT_TYPE a[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ a[i] = (i & 1 ? i : 3 * i) * (i % 3 == 0 ? 1 : -1); \ ++ pred[i] = (i % 7 < 4); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##INT_TYPE (r, a, pred, N); \ ++ for (int i = 0; i < N; ++i) \ ++ if (r[i] != (pred[i] ? (INT_TYPE) a[i] : 0)) \ ++ __builtin_abort (); \ ++ } ++ ++int main () ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_1.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_1.c +new file mode 100644 +index 000000000..c1f54e391 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_1.c +@@ -0,0 +1,29 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize -fno-trapping-math" } */ ++ ++#include ++ ++#define DEF_LOOP(TYPE, ABS) \ ++ void __attribute__ ((noinline, noclone)) \ ++ test_##TYPE (TYPE *__restrict r, TYPE *__restrict a, \ ++ TYPE *__restrict b, TYPE *__restrict c, \ ++ int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ r[i] = a[i] < 20 ? ABS (b[i] - c[i]) : b[i]; \ ++ } ++ ++#define TEST_ALL(T) \ ++ T (_Float16, __builtin_fabsf16) \ ++ T (float, __builtin_fabsf) \ ++ T (double, __builtin_fabs) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tfabd\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfabd\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfabd\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ ++/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_1_run.c +new file mode 100644 +index 000000000..a4d6972b9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_1_run.c +@@ -0,0 +1,33 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize -fno-trapping-math" } */ ++ ++#include "cond_fabd_1.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(TYPE, ABS) \ ++ { \ ++ TYPE r[N], a[N], b[N], c[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ a[i] = (i & 1 ? i : 3 * i); \ ++ b[i] = (i >> 4) << (i & 15); \ ++ c[i] = ((i + 2) % 3) * (i + 1); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##TYPE (r, a, b, c, N); \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ TYPE expected = a[i] < 20 ? ABS (b[i] - c[i]) : b[i]; \ ++ if (r[i] != expected) \ ++ __builtin_abort (); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ } ++ ++int ++main (void) ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_2.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_2.c +new file mode 100644 +index 000000000..dd6eecc17 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_2.c +@@ -0,0 +1,29 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize -fno-trapping-math" } */ ++ ++#include ++ ++#define DEF_LOOP(TYPE, ABS) \ ++ void __attribute__ ((noinline, noclone)) \ ++ test_##TYPE (TYPE *__restrict r, TYPE *__restrict a, \ ++ TYPE *__restrict b, TYPE *__restrict c, \ ++ int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ r[i] = a[i] < 20 ? ABS (b[i] - c[i]) : c[i]; \ ++ } ++ ++#define TEST_ALL(T) \ ++ T (_Float16, __builtin_fabsf16) \ ++ T (float, __builtin_fabsf) \ ++ T (double, __builtin_fabs) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tfabd\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfabd\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfabd\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ ++/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_2_run.c +new file mode 100644 +index 000000000..28dc7d011 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_2_run.c +@@ -0,0 +1,33 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize -fno-trapping-math" } */ ++ ++#include "cond_fabd_2.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(TYPE, ABS) \ ++ { \ ++ TYPE r[N], a[N], b[N], c[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ a[i] = (i & 1 ? i : 3 * i); \ ++ b[i] = (i >> 4) << (i & 15); \ ++ c[i] = ((i + 2) % 3) * (i + 1); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##TYPE (r, a, b, c, N); \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ TYPE expected = a[i] < 20 ? ABS (b[i] - c[i]) : c[i]; \ ++ if (r[i] != expected) \ ++ __builtin_abort (); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ } ++ ++int ++main (void) ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_3.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_3.c +new file mode 100644 +index 000000000..26fd7b265 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_3.c +@@ -0,0 +1,32 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize -fno-trapping-math" } */ ++ ++#include ++ ++#define DEF_LOOP(TYPE, ABS) \ ++ void __attribute__ ((noinline, noclone)) \ ++ test_##TYPE (TYPE *__restrict r, TYPE *__restrict a, \ ++ TYPE *__restrict b, TYPE *__restrict c, \ ++ int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ r[i] = a[i] < 20 ? ABS (b[i] - c[i]) : a[i]; \ ++ } ++ ++#define TEST_ALL(T) \ ++ T (_Float16, __builtin_fabsf16) \ ++ T (float, __builtin_fabsf) \ ++ T (double, __builtin_fabs) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tfabd\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfabd\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfabd\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_3_run.c +new file mode 100644 +index 000000000..be21b7f99 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_3_run.c +@@ -0,0 +1,33 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize -fno-trapping-math" } */ ++ ++#include "cond_fabd_3.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(TYPE, ABS) \ ++ { \ ++ TYPE r[N], a[N], b[N], c[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ a[i] = (i & 1 ? i : 3 * i); \ ++ b[i] = (i >> 4) << (i & 15); \ ++ c[i] = ((i + 2) % 3) * (i + 1); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##TYPE (r, a, b, c, N); \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ TYPE expected = a[i] < 20 ? ABS (b[i] - c[i]) : a[i]; \ ++ if (r[i] != expected) \ ++ __builtin_abort (); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ } ++ ++int ++main (void) ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_4.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_4.c +new file mode 100644 +index 000000000..78f1fd914 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_4.c +@@ -0,0 +1,29 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize -fno-trapping-math" } */ ++ ++#include ++ ++#define DEF_LOOP(TYPE, ABS) \ ++ void __attribute__ ((noinline, noclone)) \ ++ test_##TYPE (TYPE *__restrict r, TYPE *__restrict a, \ ++ TYPE *__restrict b, TYPE *__restrict c, \ ++ int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ r[i] = a[i] < 20 ? ABS (b[i] - c[i]) : 8.0; \ ++ } ++ ++#define TEST_ALL(T) \ ++ T (_Float16, __builtin_fabsf16) \ ++ T (float, __builtin_fabsf) \ ++ T (double, __builtin_fabs) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tfabd\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfabd\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfabd\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ ++/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ ++/* { dg-final { scan-assembler-times {\tsel\t} 3 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_4_run.c +new file mode 100644 +index 000000000..86bdab415 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_4_run.c +@@ -0,0 +1,33 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize -fno-trapping-math" } */ ++ ++#include "cond_fabd_4.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(TYPE, ABS) \ ++ { \ ++ TYPE r[N], a[N], b[N], c[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ a[i] = (i & 1 ? i : 3 * i); \ ++ b[i] = (i >> 4) << (i & 15); \ ++ c[i] = ((i + 2) % 3) * (i + 1); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##TYPE (r, a, b, c, N); \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ TYPE expected = a[i] < 20 ? ABS (b[i] - c[i]) : 8; \ ++ if (r[i] != expected) \ ++ __builtin_abort (); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ } ++ ++int ++main (void) ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_5.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_5.c +new file mode 100644 +index 000000000..e66477b3b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_5.c +@@ -0,0 +1,35 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize -fno-trapping-math" } */ ++ ++#include ++ ++#define DEF_LOOP(TYPE, ABS) \ ++ void __attribute__ ((noinline, noclone)) \ ++ test_##TYPE (TYPE *__restrict r, TYPE *__restrict a, \ ++ TYPE *__restrict b, TYPE *__restrict c, \ ++ int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ r[i] = a[i] < 20 ? ABS (b[i] - c[i]) : 0.0; \ ++ } ++ ++#define TEST_ALL(T) \ ++ T (_Float16, __builtin_fabsf16) \ ++ T (float, __builtin_fabsf) \ ++ T (double, __builtin_fabs) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tfabd\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfabd\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfabd\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* Really we should be able to use MOVPRFX /Z here, but at the moment ++ we're relying on combine to merge a SEL and an arithmetic operation, ++ and the SEL doesn't allow zero operands. */ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 1 { xfail *-*-* } } } */ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 1 { xfail *-*-* } } } */ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/z, z[0-9]+\.d\n} 1 { xfail *-*-* } } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} { xfail *-*-* } } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_5_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_5_run.c +new file mode 100644 +index 000000000..9fb5fbb81 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_5_run.c +@@ -0,0 +1,33 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize -fno-trapping-math" } */ ++ ++#include "cond_fabd_5.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(TYPE, ABS) \ ++ { \ ++ TYPE r[N], a[N], b[N], c[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ a[i] = (i & 1 ? i : 3 * i); \ ++ b[i] = (i >> 4) << (i & 15); \ ++ c[i] = ((i + 2) % 3) * (i + 1); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##TYPE (r, a, b, c, N); \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ TYPE expected = a[i] < 20 ? ABS (b[i] - c[i]) : 0; \ ++ if (r[i] != expected) \ ++ __builtin_abort (); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ } ++ ++int ++main (void) ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fadd_1.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fadd_1.c +new file mode 100644 +index 000000000..d103e1f38 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fadd_1.c +@@ -0,0 +1,62 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define DEF_LOOP(TYPE, PRED_TYPE, NAME, CONST) \ ++ void __attribute__ ((noipa)) \ ++ test_##TYPE##_##NAME (TYPE *__restrict x, \ ++ TYPE *__restrict y, \ ++ PRED_TYPE *__restrict pred, \ ++ int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ x[i] = pred[i] != 1 ? y[i] + (TYPE) CONST : y[i]; \ ++ } ++ ++#define TEST_TYPE(T, TYPE, PRED_TYPE) \ ++ T (TYPE, PRED_TYPE, half, 0.5) \ ++ T (TYPE, PRED_TYPE, one, 1.0) \ ++ T (TYPE, PRED_TYPE, two, 2.0) \ ++ T (TYPE, PRED_TYPE, minus_half, -0.5) \ ++ T (TYPE, PRED_TYPE, minus_one, -1.0) \ ++ T (TYPE, PRED_TYPE, minus_two, -2.0) ++ ++#define TEST_ALL(T) \ ++ TEST_TYPE (T, _Float16, int16_t) \ ++ TEST_TYPE (T, float, int32_t) \ ++ TEST_TYPE (T, double, int64_t) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0\.5\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0\.5\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0\.5\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1\.0\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0\.5\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0\.5\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0\.5\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1\.0\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.h, #2\.0} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #2\.0} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.h, #-2\.0} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #-2\.0} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #-2\.0} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz} } } */ ++/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fadd_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fadd_1_run.c +new file mode 100644 +index 000000000..956ae1435 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fadd_1_run.c +@@ -0,0 +1,32 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "cond_fadd_1.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(TYPE, PRED_TYPE, NAME, CONST) \ ++ { \ ++ TYPE x[N], y[N]; \ ++ PRED_TYPE pred[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ y[i] = i * i; \ ++ pred[i] = i % 3; \ ++ } \ ++ test_##TYPE##_##NAME (x, y, pred, N); \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ TYPE expected = i % 3 != 1 ? y[i] + (TYPE) CONST : y[i]; \ ++ if (x[i] != expected) \ ++ __builtin_abort (); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ } ++ ++int ++main (void) ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fadd_2.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fadd_2.c +new file mode 100644 +index 000000000..b7d02f4ad +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fadd_2.c +@@ -0,0 +1,56 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define DEF_LOOP(TYPE, NAME, CONST) \ ++ void __attribute__ ((noipa)) \ ++ test_##TYPE##_##NAME (TYPE *__restrict x, \ ++ TYPE *__restrict y, \ ++ TYPE *__restrict z, \ ++ int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ x[i] = y[i] < 8 ? z[i] + (TYPE) CONST : y[i]; \ ++ } ++ ++#define TEST_TYPE(T, TYPE) \ ++ T (TYPE, half, 0.5) \ ++ T (TYPE, one, 1.0) \ ++ T (TYPE, two, 2.0) \ ++ T (TYPE, minus_half, -0.5) \ ++ T (TYPE, minus_one, -1.0) \ ++ T (TYPE, minus_two, -2.0) ++ ++#define TEST_ALL(T) \ ++ TEST_TYPE (T, float) \ ++ TEST_TYPE (T, double) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0\.5\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0\.5\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1\.0\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0\.5\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0\.5\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1\.0\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #2\.0} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #-2\.0} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #-2\.0} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ ++ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 6 } } */ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 6 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fadd_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fadd_2_run.c +new file mode 100644 +index 000000000..debf395cc +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fadd_2_run.c +@@ -0,0 +1,31 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "cond_fadd_2.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(TYPE, NAME, CONST) \ ++ { \ ++ TYPE x[N], y[N], z[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ y[i] = i % 13; \ ++ z[i] = i * i; \ ++ } \ ++ test_##TYPE##_##NAME (x, y, z, N); \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ TYPE expected = y[i] < 8 ? z[i] + (TYPE) CONST : y[i]; \ ++ if (x[i] != expected) \ ++ __builtin_abort (); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ } ++ ++int ++main (void) ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fadd_3.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fadd_3.c +new file mode 100644 +index 000000000..aec0e5aca +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fadd_3.c +@@ -0,0 +1,65 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define DEF_LOOP(TYPE, PRED_TYPE, NAME, CONST) \ ++ void __attribute__ ((noipa)) \ ++ test_##TYPE##_##NAME (TYPE *__restrict x, \ ++ TYPE *__restrict y, \ ++ PRED_TYPE *__restrict pred, \ ++ int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ x[i] = pred[i] != 1 ? y[i] + (TYPE) CONST : 4; \ ++ } ++ ++#define TEST_TYPE(T, TYPE, PRED_TYPE) \ ++ T (TYPE, PRED_TYPE, half, 0.5) \ ++ T (TYPE, PRED_TYPE, one, 1.0) \ ++ T (TYPE, PRED_TYPE, two, 2.0) \ ++ T (TYPE, PRED_TYPE, minus_half, -0.5) \ ++ T (TYPE, PRED_TYPE, minus_one, -1.0) \ ++ T (TYPE, PRED_TYPE, minus_two, -2.0) ++ ++#define TEST_ALL(T) \ ++ TEST_TYPE (T, _Float16, int16_t) \ ++ TEST_TYPE (T, float, int32_t) \ ++ TEST_TYPE (T, double, int64_t) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0\.5\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0\.5\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0\.5\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1\.0\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0\.5\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0\.5\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0\.5\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1\.0\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.h, #2\.0} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #2\.0} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.h, #-2\.0} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #-2\.0} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #-2\.0} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ ++ ++/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-7], z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ ++/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-7], z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ ++/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-7], z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ ++/* { dg-final { scan-assembler-not {\tmov\tz} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fadd_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fadd_3_run.c +new file mode 100644 +index 000000000..d5268c5ca +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fadd_3_run.c +@@ -0,0 +1,32 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "cond_fadd_3.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(TYPE, PRED_TYPE, NAME, CONST) \ ++ { \ ++ TYPE x[N], y[N]; \ ++ PRED_TYPE pred[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ y[i] = i * i; \ ++ pred[i] = i % 3; \ ++ } \ ++ test_##TYPE##_##NAME (x, y, pred, N); \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ TYPE expected = i % 3 != 1 ? y[i] + (TYPE) CONST : 4; \ ++ if (x[i] != expected) \ ++ __builtin_abort (); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ } ++ ++int ++main (void) ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fadd_4.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fadd_4.c +new file mode 100644 +index 000000000..bb276c140 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fadd_4.c +@@ -0,0 +1,64 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define DEF_LOOP(TYPE, PRED_TYPE, NAME, CONST) \ ++ void __attribute__ ((noipa)) \ ++ test_##TYPE##_##NAME (TYPE *__restrict x, \ ++ TYPE *__restrict y, \ ++ PRED_TYPE *__restrict pred, \ ++ int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ x[i] = pred[i] != 1 ? y[i] + (TYPE) CONST : 0; \ ++ } ++ ++#define TEST_TYPE(T, TYPE, PRED_TYPE) \ ++ T (TYPE, PRED_TYPE, half, 0.5) \ ++ T (TYPE, PRED_TYPE, one, 1.0) \ ++ T (TYPE, PRED_TYPE, two, 2.0) \ ++ T (TYPE, PRED_TYPE, minus_half, -0.5) \ ++ T (TYPE, PRED_TYPE, minus_one, -1.0) \ ++ T (TYPE, PRED_TYPE, minus_two, -2.0) ++ ++#define TEST_ALL(T) \ ++ TEST_TYPE (T, _Float16, int16_t) \ ++ TEST_TYPE (T, float, int32_t) \ ++ TEST_TYPE (T, double, int64_t) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0\.5\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0\.5\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0\.5\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1\.0\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0\.5\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0\.5\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0\.5\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1\.0\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.h, #2\.0} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #2\.0} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.h, #-2\.0} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #-2\.0} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #-2\.0} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ ++ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 6 } } */ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/z, z[0-9]+\.d\n} 6 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fadd_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fadd_4_run.c +new file mode 100644 +index 000000000..4ea8be661 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fadd_4_run.c +@@ -0,0 +1,32 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "cond_fadd_4.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(TYPE, PRED_TYPE, NAME, CONST) \ ++ { \ ++ TYPE x[N], y[N]; \ ++ PRED_TYPE pred[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ y[i] = i * i; \ ++ pred[i] = i % 3; \ ++ } \ ++ test_##TYPE##_##NAME (x, y, pred, N); \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ TYPE expected = i % 3 != 1 ? y[i] + (TYPE) CONST : 0; \ ++ if (x[i] != expected) \ ++ __builtin_abort (); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ } ++ ++int ++main (void) ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fmaxnm_1.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmaxnm_1.c +new file mode 100644 +index 000000000..d0db0900e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmaxnm_1.c +@@ -0,0 +1,55 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ ++ ++#include ++ ++#ifndef FN ++#define FN(X) __builtin_fmax##X ++#endif ++ ++#define DEF_LOOP(FN, TYPE, PRED_TYPE, NAME, CONST) \ ++ void __attribute__ ((noipa)) \ ++ test_##TYPE##_##NAME (TYPE *__restrict x, \ ++ TYPE *__restrict y, \ ++ PRED_TYPE *__restrict pred, \ ++ int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ x[i] = pred[i] != 1 ? FN (y[i], CONST) : y[i]; \ ++ } ++ ++#define TEST_TYPE(T, FN, TYPE, PRED_TYPE) \ ++ T (FN, TYPE, PRED_TYPE, zero, 0) \ ++ T (FN, TYPE, PRED_TYPE, one, 1) \ ++ T (FN, TYPE, PRED_TYPE, two, 2) ++ ++#define TEST_ALL(T) \ ++ TEST_TYPE (T, FN (f16), _Float16, int16_t) \ ++ TEST_TYPE (T, FN (f32), float, int32_t) \ ++ TEST_TYPE (T, FN (f64), double, int64_t) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0\.0\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1\.0\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1\.0\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.h, #2\.0} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #2\.0} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz} } } */ ++/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fmaxnm_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmaxnm_1_run.c +new file mode 100644 +index 000000000..00a3c41f2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmaxnm_1_run.c +@@ -0,0 +1,32 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ ++ ++#include "cond_fmaxnm_1.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(FN, TYPE, PRED_TYPE, NAME, CONST) \ ++ { \ ++ TYPE x[N], y[N]; \ ++ PRED_TYPE pred[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ y[i] = i * i; \ ++ pred[i] = i % 3; \ ++ } \ ++ test_##TYPE##_##NAME (x, y, pred, N); \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ TYPE expected = i % 3 != 1 ? FN (y[i], CONST) : y[i]; \ ++ if (x[i] != expected) \ ++ __builtin_abort (); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ } ++ ++int ++main (void) ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fmaxnm_2.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmaxnm_2.c +new file mode 100644 +index 000000000..0b535d15f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmaxnm_2.c +@@ -0,0 +1,48 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ ++ ++#include ++ ++#ifndef FN ++#define FN(X) __builtin_fmax##X ++#endif ++ ++#define DEF_LOOP(FN, TYPE, NAME, CONST) \ ++ void __attribute__ ((noipa)) \ ++ test_##TYPE##_##NAME (TYPE *__restrict x, \ ++ TYPE *__restrict y, \ ++ TYPE *__restrict z, \ ++ int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ x[i] = y[i] < 8 ? FN (z[i], CONST) : y[i]; \ ++ } ++ ++#define TEST_TYPE(T, FN, TYPE) \ ++ T (FN, TYPE, zero, 0) \ ++ T (FN, TYPE, one, 1) \ ++ T (FN, TYPE, two, 2) ++ ++#define TEST_ALL(T) \ ++ TEST_TYPE (T, FN (f32), float) \ ++ TEST_TYPE (T, FN (f64), double) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0\.0\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1\.0\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #2\.0} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 3 } } */ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 3 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fmaxnm_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmaxnm_2_run.c +new file mode 100644 +index 000000000..9eb4d80fc +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmaxnm_2_run.c +@@ -0,0 +1,31 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ ++ ++#include "cond_fmaxnm_2.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(FN, TYPE, NAME, CONST) \ ++ { \ ++ TYPE x[N], y[N], z[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ y[i] = i % 13; \ ++ z[i] = i * i; \ ++ } \ ++ test_##TYPE##_##NAME (x, y, z, N); \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ TYPE expected = y[i] < 8 ? FN (z[i], CONST) : y[i]; \ ++ if (x[i] != expected) \ ++ __builtin_abort (); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ } ++ ++int ++main (void) ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fmaxnm_3.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmaxnm_3.c +new file mode 100644 +index 000000000..741f8f6d0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmaxnm_3.c +@@ -0,0 +1,54 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ ++ ++#include ++ ++#ifndef FN ++#define FN(X) __builtin_fmax##X ++#endif ++ ++#define DEF_LOOP(FN, TYPE, PRED_TYPE, NAME, CONST) \ ++ void __attribute__ ((noipa)) \ ++ test_##TYPE##_##NAME (TYPE *__restrict x, \ ++ TYPE *__restrict y, \ ++ PRED_TYPE *__restrict pred, \ ++ int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ x[i] = pred[i] != 1 ? FN (y[i], CONST) : 4; \ ++ } ++ ++#define TEST_TYPE(T, FN, TYPE, PRED_TYPE) \ ++ T (FN, TYPE, PRED_TYPE, zero, 0) \ ++ T (FN, TYPE, PRED_TYPE, one, 1) \ ++ T (FN, TYPE, PRED_TYPE, two, 2) ++ ++#define TEST_ALL(T) \ ++ TEST_TYPE (T, FN (f16), _Float16, int16_t) \ ++ TEST_TYPE (T, FN (f32), float, int32_t) \ ++ TEST_TYPE (T, FN (f64), double, int64_t) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0\.0\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1\.0\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.h, #2\.0} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #2\.0} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-7], z[0-9]+\.h, z[0-9]+\.h\n} 3 } } */ ++/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-7], z[0-9]+\.h, z[0-9]+\.h\n} 3 } } */ ++/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-7], z[0-9]+\.h, z[0-9]+\.h\n} 3 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ ++/* { dg-final { scan-assembler-not {\tmov\tz} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fmaxnm_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmaxnm_3_run.c +new file mode 100644 +index 000000000..4aac75f0e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmaxnm_3_run.c +@@ -0,0 +1,32 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ ++ ++#include "cond_fmaxnm_3.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(FN, TYPE, PRED_TYPE, NAME, CONST) \ ++ { \ ++ TYPE x[N], y[N]; \ ++ PRED_TYPE pred[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ y[i] = i * i; \ ++ pred[i] = i % 3; \ ++ } \ ++ test_##TYPE##_##NAME (x, y, pred, N); \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ TYPE expected = i % 3 != 1 ? FN (y[i], CONST) : 4; \ ++ if (x[i] != expected) \ ++ __builtin_abort (); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ } ++ ++int ++main (void) ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fmaxnm_4.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmaxnm_4.c +new file mode 100644 +index 000000000..83a53c7d4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmaxnm_4.c +@@ -0,0 +1,53 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ ++ ++#include ++ ++#ifndef FN ++#define FN(X) __builtin_fmax##X ++#endif ++ ++#define DEF_LOOP(FN, TYPE, PRED_TYPE, NAME, CONST) \ ++ void __attribute__ ((noipa)) \ ++ test_##TYPE##_##NAME (TYPE *__restrict x, \ ++ TYPE *__restrict y, \ ++ PRED_TYPE *__restrict pred, \ ++ int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ x[i] = pred[i] != 1 ? FN (y[i], CONST) : 0; \ ++ } ++ ++#define TEST_TYPE(T, FN, TYPE, PRED_TYPE) \ ++ T (FN, TYPE, PRED_TYPE, zero, 0) \ ++ T (FN, TYPE, PRED_TYPE, one, 1) \ ++ T (FN, TYPE, PRED_TYPE, two, 2) ++ ++#define TEST_ALL(T) \ ++ TEST_TYPE (T, FN (f16), _Float16, int16_t) \ ++ TEST_TYPE (T, FN (f32), float, int32_t) \ ++ TEST_TYPE (T, FN (f64), double, int64_t) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0\.0\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1\.0\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.h, #2\.0} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #2\.0} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 3 } } */ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/z, z[0-9]+\.d\n} 3 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fmaxnm_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmaxnm_4_run.c +new file mode 100644 +index 000000000..e1d904338 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmaxnm_4_run.c +@@ -0,0 +1,32 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ ++ ++#include "cond_fmaxnm_4.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(FN, TYPE, PRED_TYPE, NAME, CONST) \ ++ { \ ++ TYPE x[N], y[N]; \ ++ PRED_TYPE pred[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ y[i] = i * i; \ ++ pred[i] = i % 3; \ ++ } \ ++ test_##TYPE##_##NAME (x, y, pred, N); \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ TYPE expected = i % 3 != 1 ? FN (y[i], CONST) : 0; \ ++ if (x[i] != expected) \ ++ __builtin_abort (); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ } ++ ++int ++main (void) ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fminnm_1.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fminnm_1.c +new file mode 100644 +index 000000000..d667b2088 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fminnm_1.c +@@ -0,0 +1,29 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ ++ ++#define FN(X) __builtin_fmin##X ++#include "cond_fmaxnm_1.c" ++ ++/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0\.0\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1\.0\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1\.0\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.h, #2\.0} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #2\.0} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz} } } */ ++/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fminnm_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fminnm_1_run.c +new file mode 100644 +index 000000000..5df2ff84b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fminnm_1_run.c +@@ -0,0 +1,5 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ ++ ++#define FN(X) __builtin_fmin##X ++#include "cond_fmaxnm_1_run.c" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fminnm_2.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fminnm_2.c +new file mode 100644 +index 000000000..d66a84b01 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fminnm_2.c +@@ -0,0 +1,23 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ ++ ++#define FN(X) __builtin_fmin##X ++#include "cond_fmaxnm_2.c" ++ ++/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0\.0\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1\.0\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #2\.0} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 3 } } */ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 3 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fminnm_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fminnm_2_run.c +new file mode 100644 +index 000000000..79a98bb77 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fminnm_2_run.c +@@ -0,0 +1,5 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ ++ ++#define FN(X) __builtin_fmin##X ++#include "cond_fmaxnm_2_run.c" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fminnm_3.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fminnm_3.c +new file mode 100644 +index 000000000..d39dd1825 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fminnm_3.c +@@ -0,0 +1,28 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ ++ ++#define FN(X) __builtin_fmin##X ++#include "cond_fmaxnm_3.c" ++ ++/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0\.0\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1\.0\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.h, #2\.0} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #2\.0} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-7], z[0-9]+\.h, z[0-9]+\.h\n} 3 } } */ ++/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-7], z[0-9]+\.h, z[0-9]+\.h\n} 3 } } */ ++/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-7], z[0-9]+\.h, z[0-9]+\.h\n} 3 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ ++/* { dg-final { scan-assembler-not {\tmov\tz} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fminnm_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fminnm_3_run.c +new file mode 100644 +index 000000000..ca1a047da +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fminnm_3_run.c +@@ -0,0 +1,5 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ ++ ++#define FN(X) __builtin_fmin##X ++#include "cond_fmaxnm_3_run.c" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fminnm_4.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fminnm_4.c +new file mode 100644 +index 000000000..fff6fdd37 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fminnm_4.c +@@ -0,0 +1,27 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ ++ ++#define FN(X) __builtin_fmin##X ++#include "cond_fmaxnm_4.c" ++ ++/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0\.0\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1\.0\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.h, #2\.0} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #2\.0} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 3 } } */ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/z, z[0-9]+\.d\n} 3 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fminnm_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fminnm_4_run.c +new file mode 100644 +index 000000000..b945d0470 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fminnm_4_run.c +@@ -0,0 +1,5 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ ++ ++#define FN(X) __builtin_fmin##X ++#include "cond_fmaxnm_4_run.c" +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fmul_1.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmul_1.c +new file mode 100644 +index 000000000..ce417ed85 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmul_1.c +@@ -0,0 +1,47 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define DEF_LOOP(TYPE, PRED_TYPE, NAME, CONST) \ ++ void __attribute__ ((noipa)) \ ++ test_##TYPE##_##NAME (TYPE *__restrict x, \ ++ TYPE *__restrict y, \ ++ PRED_TYPE *__restrict pred, \ ++ int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ x[i] = pred[i] != 1 ? y[i] * (TYPE) CONST : y[i]; \ ++ } ++ ++#define TEST_TYPE(T, TYPE, PRED_TYPE) \ ++ T (TYPE, PRED_TYPE, half, 0.5) \ ++ T (TYPE, PRED_TYPE, two, 2.0) \ ++ T (TYPE, PRED_TYPE, four, 4.0) ++ ++#define TEST_ALL(T) \ ++ TEST_TYPE (T, _Float16, int16_t) \ ++ TEST_TYPE (T, float, int32_t) \ ++ TEST_TYPE (T, double, int64_t) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0\.5\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0\.5\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0\.5\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #2\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #2\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #2\.0\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.h, #4\.0} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #4\.0} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #4\.0} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz} } } */ ++/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fmul_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmul_1_run.c +new file mode 100644 +index 000000000..9ca5b5080 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmul_1_run.c +@@ -0,0 +1,32 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "cond_fmul_1.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(TYPE, PRED_TYPE, NAME, CONST) \ ++ { \ ++ TYPE x[N], y[N]; \ ++ PRED_TYPE pred[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ y[i] = i * i; \ ++ pred[i] = i % 3; \ ++ } \ ++ test_##TYPE##_##NAME (x, y, pred, N); \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ TYPE expected = i % 3 != 1 ? y[i] * (TYPE) CONST : y[i]; \ ++ if (x[i] != expected) \ ++ __builtin_abort (); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ } ++ ++int ++main (void) ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fmul_2.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmul_2.c +new file mode 100644 +index 000000000..cbf9d13a5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmul_2.c +@@ -0,0 +1,44 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define DEF_LOOP(TYPE, NAME, CONST) \ ++ void __attribute__ ((noipa)) \ ++ test_##TYPE##_##NAME (TYPE *__restrict x, \ ++ TYPE *__restrict y, \ ++ TYPE *__restrict z, \ ++ int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ x[i] = y[i] < 8 ? z[i] * (TYPE) CONST : y[i]; \ ++ } ++ ++#define TEST_TYPE(T, TYPE) \ ++ T (TYPE, half, 0.5) \ ++ T (TYPE, two, 2.0) \ ++ T (TYPE, four, 4.0) ++ ++#define TEST_ALL(T) \ ++ TEST_TYPE (T, float) \ ++ TEST_TYPE (T, double) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0\.5\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0\.5\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #2\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #2\.0\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #4\.0} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #4\.0} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 3 } } */ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 3 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fmul_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmul_2_run.c +new file mode 100644 +index 000000000..44b283ba3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmul_2_run.c +@@ -0,0 +1,31 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "cond_fmul_2.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(TYPE, NAME, CONST) \ ++ { \ ++ TYPE x[N], y[N], z[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ y[i] = i % 13; \ ++ z[i] = i * i; \ ++ } \ ++ test_##TYPE##_##NAME (x, y, z, N); \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ TYPE expected = y[i] < 8 ? z[i] * (TYPE) CONST : y[i]; \ ++ if (x[i] != expected) \ ++ __builtin_abort (); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ } ++ ++int ++main (void) ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fmul_3.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmul_3.c +new file mode 100644 +index 000000000..4da147e15 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmul_3.c +@@ -0,0 +1,50 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define DEF_LOOP(TYPE, PRED_TYPE, NAME, CONST) \ ++ void __attribute__ ((noipa)) \ ++ test_##TYPE##_##NAME (TYPE *__restrict x, \ ++ TYPE *__restrict y, \ ++ PRED_TYPE *__restrict pred, \ ++ int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ x[i] = pred[i] != 1 ? y[i] * (TYPE) CONST : 8; \ ++ } ++ ++#define TEST_TYPE(T, TYPE, PRED_TYPE) \ ++ T (TYPE, PRED_TYPE, half, 0.5) \ ++ T (TYPE, PRED_TYPE, two, 2.0) \ ++ T (TYPE, PRED_TYPE, four, 4.0) ++ ++#define TEST_ALL(T) \ ++ TEST_TYPE (T, _Float16, int16_t) \ ++ TEST_TYPE (T, float, int32_t) \ ++ TEST_TYPE (T, double, int64_t) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0\.5\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0\.5\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0\.5\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #2\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #2\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #2\.0\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.h, #4\.0} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #4\.0} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #4\.0} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-7], z[0-9]+\.h, z[0-9]+\.h\n} 3 } } */ ++/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-7], z[0-9]+\.h, z[0-9]+\.h\n} 3 } } */ ++/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-7], z[0-9]+\.h, z[0-9]+\.h\n} 3 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ ++/* { dg-final { scan-assembler-not {\tmov\tz} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fmul_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmul_3_run.c +new file mode 100644 +index 000000000..9b81d43c9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmul_3_run.c +@@ -0,0 +1,32 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "cond_fmul_3.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(TYPE, PRED_TYPE, NAME, CONST) \ ++ { \ ++ TYPE x[N], y[N]; \ ++ PRED_TYPE pred[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ y[i] = i * i; \ ++ pred[i] = i % 3; \ ++ } \ ++ test_##TYPE##_##NAME (x, y, pred, N); \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ TYPE expected = i % 3 != 1 ? y[i] * (TYPE) CONST : 8; \ ++ if (x[i] != expected) \ ++ __builtin_abort (); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ } ++ ++int ++main (void) ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fmul_4.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmul_4.c +new file mode 100644 +index 000000000..c4fdb2b2b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmul_4.c +@@ -0,0 +1,49 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define DEF_LOOP(TYPE, PRED_TYPE, NAME, CONST) \ ++ void __attribute__ ((noipa)) \ ++ test_##TYPE##_##NAME (TYPE *__restrict x, \ ++ TYPE *__restrict y, \ ++ PRED_TYPE *__restrict pred, \ ++ int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ x[i] = pred[i] != 1 ? y[i] * (TYPE) CONST : 0; \ ++ } ++ ++#define TEST_TYPE(T, TYPE, PRED_TYPE) \ ++ T (TYPE, PRED_TYPE, half, 0.5) \ ++ T (TYPE, PRED_TYPE, two, 2.0) \ ++ T (TYPE, PRED_TYPE, four, 4.0) ++ ++#define TEST_ALL(T) \ ++ TEST_TYPE (T, _Float16, int16_t) \ ++ TEST_TYPE (T, float, int32_t) \ ++ TEST_TYPE (T, double, int64_t) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0\.5\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0\.5\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0\.5\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #2\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #2\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #2\.0\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.h, #4\.0} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #4\.0} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #4\.0} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 3 } } */ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/z, z[0-9]+\.d\n} 3 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fmul_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmul_4_run.c +new file mode 100644 +index 000000000..b93e031e5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmul_4_run.c +@@ -0,0 +1,32 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "cond_fmul_4.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(TYPE, PRED_TYPE, NAME, CONST) \ ++ { \ ++ TYPE x[N], y[N]; \ ++ PRED_TYPE pred[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ y[i] = i * i; \ ++ pred[i] = i % 3; \ ++ } \ ++ test_##TYPE##_##NAME (x, y, pred, N); \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ TYPE expected = i % 3 != 1 ? y[i] * (TYPE) CONST : 0; \ ++ if (x[i] != expected) \ ++ __builtin_abort (); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ } ++ ++int ++main (void) ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fsubr_1.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fsubr_1.c +new file mode 100644 +index 000000000..8e7172af4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fsubr_1.c +@@ -0,0 +1,47 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define DEF_LOOP(TYPE, PRED_TYPE, NAME, CONST) \ ++ void __attribute__ ((noipa)) \ ++ test_##TYPE##_##NAME (TYPE *__restrict x, \ ++ TYPE *__restrict y, \ ++ PRED_TYPE *__restrict pred, \ ++ int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ x[i] = pred[i] != 1 ? (TYPE) CONST - y[i] : y[i]; \ ++ } ++ ++#define TEST_TYPE(T, TYPE, PRED_TYPE) \ ++ T (TYPE, PRED_TYPE, half, 0.5) \ ++ T (TYPE, PRED_TYPE, one, 1.0) \ ++ T (TYPE, PRED_TYPE, two, 2.0) ++ ++#define TEST_ALL(T) \ ++ TEST_TYPE (T, _Float16, int16_t) \ ++ TEST_TYPE (T, float, int32_t) \ ++ TEST_TYPE (T, double, int64_t) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0\.5\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0\.5\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0\.5\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1\.0\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.h, #2\.0} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #2\.0} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz} } } */ ++/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fsubr_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fsubr_1_run.c +new file mode 100644 +index 000000000..61ffac429 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fsubr_1_run.c +@@ -0,0 +1,32 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "cond_fsubr_1.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(TYPE, PRED_TYPE, NAME, CONST) \ ++ { \ ++ TYPE x[N], y[N]; \ ++ PRED_TYPE pred[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ y[i] = i * i; \ ++ pred[i] = i % 3; \ ++ } \ ++ test_##TYPE##_##NAME (x, y, pred, N); \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ TYPE expected = i % 3 != 1 ? (TYPE) CONST - y[i] : y[i]; \ ++ if (x[i] != expected) \ ++ __builtin_abort (); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ } ++ ++int ++main (void) ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fsubr_2.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fsubr_2.c +new file mode 100644 +index 000000000..6d2efde94 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fsubr_2.c +@@ -0,0 +1,44 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define DEF_LOOP(TYPE, NAME, CONST) \ ++ void __attribute__ ((noipa)) \ ++ test_##TYPE##_##NAME (TYPE *__restrict x, \ ++ TYPE *__restrict y, \ ++ TYPE *__restrict z, \ ++ int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ x[i] = y[i] < 8 ? (TYPE) CONST - z[i] : y[i]; \ ++ } ++ ++#define TEST_TYPE(T, TYPE) \ ++ T (TYPE, half, 0.5) \ ++ T (TYPE, one, 1.0) \ ++ T (TYPE, two, 2.0) ++ ++#define TEST_ALL(T) \ ++ TEST_TYPE (T, float) \ ++ TEST_TYPE (T, double) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0\.5\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0\.5\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1\.0\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #2\.0} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 3 } } */ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 3 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fsubr_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fsubr_2_run.c +new file mode 100644 +index 000000000..1b25392b0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fsubr_2_run.c +@@ -0,0 +1,31 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "cond_fsubr_2.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(TYPE, NAME, CONST) \ ++ { \ ++ TYPE x[N], y[N], z[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ y[i] = i % 13; \ ++ z[i] = i * i; \ ++ } \ ++ test_##TYPE##_##NAME (x, y, z, N); \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ TYPE expected = y[i] < 8 ? (TYPE) CONST - z[i] : y[i]; \ ++ if (x[i] != expected) \ ++ __builtin_abort (); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ } ++ ++int ++main (void) ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fsubr_3.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fsubr_3.c +new file mode 100644 +index 000000000..328af5741 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fsubr_3.c +@@ -0,0 +1,50 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define DEF_LOOP(TYPE, PRED_TYPE, NAME, CONST) \ ++ void __attribute__ ((noipa)) \ ++ test_##TYPE##_##NAME (TYPE *__restrict x, \ ++ TYPE *__restrict y, \ ++ PRED_TYPE *__restrict pred, \ ++ int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ x[i] = pred[i] != 1 ? (TYPE) CONST - y[i] : 4; \ ++ } ++ ++#define TEST_TYPE(T, TYPE, PRED_TYPE) \ ++ T (TYPE, PRED_TYPE, half, 0.5) \ ++ T (TYPE, PRED_TYPE, one, 1.0) \ ++ T (TYPE, PRED_TYPE, two, 2.0) ++ ++#define TEST_ALL(T) \ ++ TEST_TYPE (T, _Float16, int16_t) \ ++ TEST_TYPE (T, float, int32_t) \ ++ TEST_TYPE (T, double, int64_t) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0\.5\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0\.5\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0\.5\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1\.0\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.h, #2\.0} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #2\.0} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-7], z[0-9]+\.h, z[0-9]+\.h\n} 3 } } */ ++/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-7], z[0-9]+\.h, z[0-9]+\.h\n} 3 } } */ ++/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-7], z[0-9]+\.h, z[0-9]+\.h\n} 3 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ ++/* { dg-final { scan-assembler-not {\tmov\tz} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fsubr_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fsubr_3_run.c +new file mode 100644 +index 000000000..8978287df +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fsubr_3_run.c +@@ -0,0 +1,32 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "cond_fsubr_3.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(TYPE, PRED_TYPE, NAME, CONST) \ ++ { \ ++ TYPE x[N], y[N]; \ ++ PRED_TYPE pred[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ y[i] = i * i; \ ++ pred[i] = i % 3; \ ++ } \ ++ test_##TYPE##_##NAME (x, y, pred, N); \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ TYPE expected = i % 3 != 1 ? (TYPE) CONST - y[i] : 4; \ ++ if (x[i] != expected) \ ++ __builtin_abort (); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ } ++ ++int ++main (void) ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fsubr_4.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fsubr_4.c +new file mode 100644 +index 000000000..1d420b104 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fsubr_4.c +@@ -0,0 +1,49 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define DEF_LOOP(TYPE, PRED_TYPE, NAME, CONST) \ ++ void __attribute__ ((noipa)) \ ++ test_##TYPE##_##NAME (TYPE *__restrict x, \ ++ TYPE *__restrict y, \ ++ PRED_TYPE *__restrict pred, \ ++ int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ x[i] = pred[i] != 1 ? (TYPE) CONST - y[i] : 0; \ ++ } ++ ++#define TEST_TYPE(T, TYPE, PRED_TYPE) \ ++ T (TYPE, PRED_TYPE, half, 0.5) \ ++ T (TYPE, PRED_TYPE, one, 1.0) \ ++ T (TYPE, PRED_TYPE, two, 2.0) ++ ++#define TEST_ALL(T) \ ++ TEST_TYPE (T, _Float16, int16_t) \ ++ TEST_TYPE (T, float, int32_t) \ ++ TEST_TYPE (T, double, int64_t) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0\.5\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0\.5\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0\.5\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1\.0\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.h, #2\.0} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #2\.0} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 3 } } */ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/z, z[0-9]+\.d\n} 3 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fsubr_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fsubr_4_run.c +new file mode 100644 +index 000000000..2cb3409af +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fsubr_4_run.c +@@ -0,0 +1,32 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "cond_fsubr_4.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(TYPE, PRED_TYPE, NAME, CONST) \ ++ { \ ++ TYPE x[N], y[N]; \ ++ PRED_TYPE pred[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ y[i] = i * i; \ ++ pred[i] = i % 3; \ ++ } \ ++ test_##TYPE##_##NAME (x, y, pred, N); \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ TYPE expected = i % 3 != 1 ? (TYPE) CONST - y[i] : 0; \ ++ if (x[i] != expected) \ ++ __builtin_abort (); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ } ++ ++int ++main (void) ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_1.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_1.c +new file mode 100644 +index 000000000..a1e80b8a9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_1.c +@@ -0,0 +1,62 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define bit_and(A, B) ((A) & (B)) ++#define bit_or(A, B) ((A) | (B)) ++#define bit_xor(A, B) ((A) ^ (B)) ++#define bit_bic(A, B) ((A) & ~(B)) ++ ++#define DEF_LOOP(TYPE, OP) \ ++ void __attribute__ ((noinline, noclone)) \ ++ test_##TYPE##_##OP (TYPE *__restrict r, \ ++ TYPE *__restrict a, \ ++ TYPE *__restrict b, \ ++ TYPE *__restrict c, int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ r[i] = a[i] < 20 ? OP (b[i], c[i]) : b[i]; \ ++ } ++ ++#define TEST_TYPE(T, TYPE) \ ++ T (TYPE, bit_and) \ ++ T (TYPE, bit_or) \ ++ T (TYPE, bit_xor) \ ++ T (TYPE, bit_bic) ++ ++#define TEST_ALL(T) \ ++ TEST_TYPE (T, int8_t) \ ++ TEST_TYPE (T, uint8_t) \ ++ TEST_TYPE (T, int16_t) \ ++ TEST_TYPE (T, uint16_t) \ ++ TEST_TYPE (T, int32_t) \ ++ TEST_TYPE (T, uint32_t) \ ++ TEST_TYPE (T, int64_t) \ ++ TEST_TYPE (T, uint64_t) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ ++ ++/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ ++ ++/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ ++ ++/* { dg-final { scan-assembler-times {\tbic\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tbic\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tbic\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tbic\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ ++/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_1_run.c +new file mode 100644 +index 000000000..cb12e5609 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_1_run.c +@@ -0,0 +1,33 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "cond_logical_1.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(TYPE, OP) \ ++ { \ ++ TYPE r[N], a[N], b[N], c[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ a[i] = (i & 1 ? i : 3 * i); \ ++ b[i] = (i >> 4) << (i & 15); \ ++ c[i] = ((i + 2) % 3) * (i + 1); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##TYPE##_##OP (r, a, b, c, N); \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ TYPE expected = a[i] < 20 ? OP (b[i], c[i]) : b[i]; \ ++ if (r[i] != expected) \ ++ __builtin_abort (); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ } ++ ++int ++main (void) ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_2.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_2.c +new file mode 100644 +index 000000000..c476fe2ff +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_2.c +@@ -0,0 +1,63 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define bit_and(A, B) ((A) & (B)) ++#define bit_or(A, B) ((A) | (B)) ++#define bit_xor(A, B) ((A) ^ (B)) ++#define bit_bic(A, B) ((A) & ~(B)) ++ ++#define DEF_LOOP(TYPE, OP) \ ++ void __attribute__ ((noinline, noclone)) \ ++ test_##TYPE##_##OP (TYPE *__restrict r, \ ++ TYPE *__restrict a, \ ++ TYPE *__restrict b, \ ++ TYPE *__restrict c, int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ r[i] = a[i] < 20 ? OP (b[i], c[i]) : c[i]; \ ++ } ++ ++#define TEST_TYPE(T, TYPE) \ ++ T (TYPE, bit_and) \ ++ T (TYPE, bit_or) \ ++ T (TYPE, bit_xor) \ ++ T (TYPE, bit_bic) ++ ++#define TEST_ALL(T) \ ++ TEST_TYPE (T, int8_t) \ ++ TEST_TYPE (T, uint8_t) \ ++ TEST_TYPE (T, int16_t) \ ++ TEST_TYPE (T, uint16_t) \ ++ TEST_TYPE (T, int32_t) \ ++ TEST_TYPE (T, uint32_t) \ ++ TEST_TYPE (T, int64_t) \ ++ TEST_TYPE (T, uint64_t) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ ++ ++/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ ++ ++/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ ++ ++/* { dg-final { scan-assembler-times {\tbic\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tbic\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tbic\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tbic\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ ++/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ ++/* There's no BICR or equivalent, so the BIC functions need a select. */ ++/* { dg-final { scan-assembler-times {\tsel\t} 8 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_2_run.c +new file mode 100644 +index 000000000..9b9918cc8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_2_run.c +@@ -0,0 +1,33 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "cond_logical_2.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(TYPE, OP) \ ++ { \ ++ TYPE r[N], a[N], b[N], c[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ a[i] = (i & 1 ? i : 3 * i); \ ++ b[i] = (i >> 4) << (i & 15); \ ++ c[i] = ((i + 2) % 3) * (i + 1); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##TYPE##_##OP (r, a, b, c, N); \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ TYPE expected = a[i] < 20 ? OP (b[i], c[i]) : c[i]; \ ++ if (r[i] != expected) \ ++ __builtin_abort (); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ } ++ ++int ++main (void) ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_3.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_3.c +new file mode 100644 +index 000000000..7ad2c4ea3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_3.c +@@ -0,0 +1,66 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define bit_and(A, B) ((A) & (B)) ++#define bit_or(A, B) ((A) | (B)) ++#define bit_xor(A, B) ((A) ^ (B)) ++#define bit_bic(A, B) ((A) & ~(B)) ++ ++#define DEF_LOOP(TYPE, OP) \ ++ void __attribute__ ((noinline, noclone)) \ ++ test_##TYPE##_##OP (TYPE *__restrict r, \ ++ TYPE *__restrict a, \ ++ TYPE *__restrict b, \ ++ TYPE *__restrict c, int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ r[i] = a[i] < 20 ? OP (b[i], c[i]) : a[i]; \ ++ } ++ ++#define TEST_TYPE(T, TYPE) \ ++ T (TYPE, bit_and) \ ++ T (TYPE, bit_or) \ ++ T (TYPE, bit_xor) \ ++ T (TYPE, bit_bic) ++ ++#define TEST_ALL(T) \ ++ TEST_TYPE (T, int8_t) \ ++ TEST_TYPE (T, uint8_t) \ ++ TEST_TYPE (T, int16_t) \ ++ TEST_TYPE (T, uint16_t) \ ++ TEST_TYPE (T, int32_t) \ ++ TEST_TYPE (T, uint32_t) \ ++ TEST_TYPE (T, int64_t) \ ++ TEST_TYPE (T, uint64_t) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ ++ ++/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ ++ ++/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ ++ ++/* { dg-final { scan-assembler-times {\tbic\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tbic\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tbic\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tbic\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ ++ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b\n} 8 } } */ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 8 } } */ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 8 } } */ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 8 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_3_run.c +new file mode 100644 +index 000000000..05dc78ab3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_3_run.c +@@ -0,0 +1,33 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "cond_logical_3.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(TYPE, OP) \ ++ { \ ++ TYPE r[N], a[N], b[N], c[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ a[i] = (i & 1 ? i : 3 * i); \ ++ b[i] = (i >> 4) << (i & 15); \ ++ c[i] = ((i + 2) % 3) * (i + 1); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##TYPE##_##OP (r, a, b, c, N); \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ TYPE expected = a[i] < 20 ? OP (b[i], c[i]) : a[i]; \ ++ if (r[i] != expected) \ ++ __builtin_abort (); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ } ++ ++int ++main (void) ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_4.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_4.c +new file mode 100644 +index 000000000..00217bffa +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_4.c +@@ -0,0 +1,62 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define bit_and(A, B) ((A) & (B)) ++#define bit_or(A, B) ((A) | (B)) ++#define bit_xor(A, B) ((A) ^ (B)) ++#define bit_bic(A, B) ((A) & ~(B)) ++ ++#define DEF_LOOP(TYPE, OP) \ ++ void __attribute__ ((noinline, noclone)) \ ++ test_##TYPE##_##OP (TYPE *__restrict r, \ ++ TYPE *__restrict a, \ ++ TYPE *__restrict b, \ ++ TYPE *__restrict c, int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ r[i] = a[i] < 20 ? OP (b[i], c[i]) : 42; \ ++ } ++ ++#define TEST_TYPE(T, TYPE) \ ++ T (TYPE, bit_and) \ ++ T (TYPE, bit_or) \ ++ T (TYPE, bit_xor) \ ++ T (TYPE, bit_bic) ++ ++#define TEST_ALL(T) \ ++ TEST_TYPE (T, int8_t) \ ++ TEST_TYPE (T, uint8_t) \ ++ TEST_TYPE (T, int16_t) \ ++ TEST_TYPE (T, uint16_t) \ ++ TEST_TYPE (T, int32_t) \ ++ TEST_TYPE (T, uint32_t) \ ++ TEST_TYPE (T, int64_t) \ ++ TEST_TYPE (T, uint64_t) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ ++ ++/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ ++ ++/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ ++ ++/* { dg-final { scan-assembler-times {\tbic\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tbic\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tbic\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tbic\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ ++/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ ++/* { dg-final { scan-assembler-times {\tsel\t} 32 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_4_run.c +new file mode 100644 +index 000000000..46fb11594 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_4_run.c +@@ -0,0 +1,33 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "cond_logical_4.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(TYPE, OP) \ ++ { \ ++ TYPE r[N], a[N], b[N], c[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ a[i] = (i & 1 ? i : 3 * i); \ ++ b[i] = (i >> 4) << (i & 15); \ ++ c[i] = ((i + 2) % 3) * (i + 1); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##TYPE##_##OP (r, a, b, c, N); \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ TYPE expected = a[i] < 20 ? OP (b[i], c[i]) : 42; \ ++ if (r[i] != expected) \ ++ __builtin_abort (); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ } ++ ++int ++main (void) ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_5.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_5.c +new file mode 100644 +index 000000000..36b541f21 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_5.c +@@ -0,0 +1,66 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define bit_and(A, B) ((A) & (B)) ++#define bit_or(A, B) ((A) | (B)) ++#define bit_xor(A, B) ((A) ^ (B)) ++#define bit_bic(A, B) ((A) & ~(B)) ++ ++#define DEF_LOOP(TYPE, OP) \ ++ void __attribute__ ((noinline, noclone)) \ ++ test_##TYPE##_##OP (TYPE *__restrict r, \ ++ TYPE *__restrict a, \ ++ TYPE *__restrict b, \ ++ TYPE *__restrict c, int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ r[i] = a[i] < 20 ? OP (b[i], c[i]) : 0; \ ++ } ++ ++#define TEST_TYPE(T, TYPE) \ ++ T (TYPE, bit_and) \ ++ T (TYPE, bit_or) \ ++ T (TYPE, bit_xor) \ ++ T (TYPE, bit_bic) ++ ++#define TEST_ALL(T) \ ++ TEST_TYPE (T, int8_t) \ ++ TEST_TYPE (T, uint8_t) \ ++ TEST_TYPE (T, int16_t) \ ++ TEST_TYPE (T, uint16_t) \ ++ TEST_TYPE (T, int32_t) \ ++ TEST_TYPE (T, uint32_t) \ ++ TEST_TYPE (T, int64_t) \ ++ TEST_TYPE (T, uint64_t) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ ++ ++/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ ++ ++/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ ++ ++/* { dg-final { scan-assembler-times {\tbic\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tbic\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tbic\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tbic\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ ++ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.b, p[0-7]/z, z[0-9]+\.b\n} 8 } } */ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 8 } } */ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 8 } } */ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/z, z[0-9]+\.d\n} 8 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_5_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_5_run.c +new file mode 100644 +index 000000000..e0da5fe58 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_5_run.c +@@ -0,0 +1,33 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "cond_logical_5.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(TYPE, OP) \ ++ { \ ++ TYPE r[N], a[N], b[N], c[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ a[i] = (i & 1 ? i : 3 * i); \ ++ b[i] = (i >> 4) << (i & 15); \ ++ c[i] = ((i + 2) % 3) * (i + 1); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##TYPE##_##OP (r, a, b, c, N); \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ TYPE expected = a[i] < 20 ? OP (b[i], c[i]) : 0; \ ++ if (r[i] != expected) \ ++ __builtin_abort (); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ } ++ ++int ++main (void) ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_1.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_1.c +new file mode 100644 +index 000000000..cb01d50f3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_1.c +@@ -0,0 +1,52 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define DEF_LOOP(TYPE, NAME, OP) \ ++ void __attribute__ ((noipa)) \ ++ test_##TYPE##_##NAME (TYPE *__restrict r, \ ++ TYPE *__restrict a, \ ++ TYPE *__restrict b, TYPE c, \ ++ TYPE *__restrict pred, int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ r[i] = pred[i] != 1 ? a[i] OP b[i] * c : b[i]; \ ++ } ++ ++#define TEST_TYPE(T, TYPE) \ ++ T (TYPE, add, +) \ ++ T (TYPE, sub, -) ++ ++#define TEST_ALL(T) \ ++ TEST_TYPE (T, uint8_t) \ ++ TEST_TYPE (T, uint16_t) \ ++ TEST_TYPE (T, uint32_t) \ ++ TEST_TYPE (T, uint64_t) \ ++ TEST_TYPE (T, _Float16) \ ++ TEST_TYPE (T, float) \ ++ TEST_TYPE (T, double) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tmad\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tmad\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tmad\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tmad\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tmsb\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tmsb\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tmsb\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tmsb\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ ++/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_1_run.c +new file mode 100644 +index 000000000..bcfc62280 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_1_run.c +@@ -0,0 +1,35 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "cond_mla_1.c" ++ ++#define FACTOR 17 ++#define N 99 ++ ++#define TEST_LOOP(TYPE, NAME, OP) \ ++ { \ ++ TYPE r[N], a[N], b[N], pred[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ a[i] = (i & 1 ? i : 3 * i); \ ++ b[i] = (i >> 4) << (i & 15); \ ++ pred[i] = i % 3 < i % 5; \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##TYPE##_##NAME (r, a, b, FACTOR, pred, N); \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ TYPE expected \ ++ = pred[i] != 1 ? a[i] OP b[i] * (TYPE) FACTOR : b[i]; \ ++ if (r[i] != expected) \ ++ __builtin_abort (); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ } ++ ++int ++main (void) ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_2.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_2.c +new file mode 100644 +index 000000000..b6ea1a3e2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_2.c +@@ -0,0 +1,53 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define DEF_LOOP(TYPE, NAME, OP) \ ++ void __attribute__ ((noipa)) \ ++ test_##TYPE##_##NAME (TYPE *__restrict r, \ ++ TYPE *__restrict a, \ ++ TYPE *__restrict b, TYPE c, \ ++ TYPE *__restrict pred, int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ r[i] = pred[i] != 1 ? a[i] OP b[i] * c : c; \ ++ } ++ ++#define TEST_TYPE(T, TYPE) \ ++ T (TYPE, add, +) \ ++ T (TYPE, sub, -) ++ ++#define TEST_ALL(T) \ ++ TEST_TYPE (T, uint8_t) \ ++ TEST_TYPE (T, uint16_t) \ ++ TEST_TYPE (T, uint32_t) \ ++ TEST_TYPE (T, uint64_t) \ ++ TEST_TYPE (T, _Float16) \ ++ TEST_TYPE (T, float) \ ++ TEST_TYPE (T, double) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tmad\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tmad\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tmad\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tmad\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tmsb\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tmsb\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tmsb\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tmsb\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+, z[0-9]+\n} 14 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_2_run.c +new file mode 100644 +index 000000000..79998b84e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_2_run.c +@@ -0,0 +1,36 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "cond_mla_2.c" ++ ++#define FACTOR 17 ++#define N 99 ++ ++#define TEST_LOOP(TYPE, NAME, OP) \ ++ { \ ++ TYPE r[N], a[N], b[N], pred[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ a[i] = (i & 1 ? i : 3 * i); \ ++ b[i] = (i >> 4) << (i & 15); \ ++ pred[i] = i % 3 < i % 5; \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##TYPE##_##NAME (r, a, b, FACTOR, pred, N); \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ TYPE expected = (pred[i] != 1 \ ++ ? a[i] OP b[i] * (TYPE) FACTOR \ ++ : (TYPE) FACTOR); \ ++ if (r[i] != expected) \ ++ __builtin_abort (); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ } ++ ++int ++main (void) ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_3.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_3.c +new file mode 100644 +index 000000000..085fccf53 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_3.c +@@ -0,0 +1,52 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define DEF_LOOP(TYPE, NAME, OP) \ ++ void __attribute__ ((noipa)) \ ++ test_##TYPE##_##NAME (TYPE *__restrict r, \ ++ TYPE *__restrict a, \ ++ TYPE *__restrict b, TYPE c, \ ++ TYPE *__restrict pred, int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ r[i] = pred[i] != 1 ? a[i] OP b[i] * c : a[i]; \ ++ } ++ ++#define TEST_TYPE(T, TYPE) \ ++ T (TYPE, add, +) \ ++ T (TYPE, sub, -) ++ ++#define TEST_ALL(T) \ ++ TEST_TYPE (T, uint8_t) \ ++ TEST_TYPE (T, uint16_t) \ ++ TEST_TYPE (T, uint32_t) \ ++ TEST_TYPE (T, uint64_t) \ ++ TEST_TYPE (T, _Float16) \ ++ TEST_TYPE (T, float) \ ++ TEST_TYPE (T, double) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tmla\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tmla\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tmla\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tmla\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tmls\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tmls\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tmls\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tmls\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ ++/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_3_run.c +new file mode 100644 +index 000000000..cbd1185b2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_3_run.c +@@ -0,0 +1,35 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "cond_mla_3.c" ++ ++#define FACTOR 17 ++#define N 99 ++ ++#define TEST_LOOP(TYPE, NAME, OP) \ ++ { \ ++ TYPE r[N], a[N], b[N], pred[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ a[i] = (i & 1 ? i : 3 * i); \ ++ b[i] = (i >> 4) << (i & 15); \ ++ pred[i] = i % 3 < i % 5; \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##TYPE##_##NAME (r, a, b, FACTOR, pred, N); \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ TYPE expected \ ++ = pred[i] != 1 ? a[i] OP b[i] * (TYPE) FACTOR : a[i]; \ ++ if (r[i] != expected) \ ++ __builtin_abort (); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ } ++ ++int ++main (void) ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_4.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_4.c +new file mode 100644 +index 000000000..ed9f73e9c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_4.c +@@ -0,0 +1,56 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define DEF_LOOP(TYPE, NAME, OP) \ ++ void __attribute__ ((noipa)) \ ++ test_##TYPE##_##NAME (TYPE *__restrict r, \ ++ TYPE *__restrict a, \ ++ TYPE *__restrict b, TYPE c, \ ++ TYPE *__restrict pred, int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ r[i] = pred[i] == 1 ? a[i] OP b[i] * c : pred[i]; \ ++ } ++ ++#define TEST_TYPE(T, TYPE) \ ++ T (TYPE, add, +) \ ++ T (TYPE, sub, -) ++ ++#define TEST_ALL(T) \ ++ TEST_TYPE (T, uint8_t) \ ++ TEST_TYPE (T, uint16_t) \ ++ TEST_TYPE (T, uint32_t) \ ++ TEST_TYPE (T, uint64_t) \ ++ TEST_TYPE (T, _Float16) \ ++ TEST_TYPE (T, float) \ ++ TEST_TYPE (T, double) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tmla\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tmla\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tmla\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tmla\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tmls\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tmls\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tmls\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tmls\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m,} 4 } } */ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m,} 4 } } */ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/m,} 4 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_4_run.c +new file mode 100644 +index 000000000..5e078594a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_4_run.c +@@ -0,0 +1,36 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "cond_mla_4.c" ++ ++#define FACTOR 17 ++#define N 99 ++ ++#define TEST_LOOP(TYPE, NAME, OP) \ ++ { \ ++ TYPE r[N], a[N], b[N], pred[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ a[i] = (i & 1 ? i : 3 * i); \ ++ b[i] = (i >> 4) << (i & 15); \ ++ pred[i] = i % 3; \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##TYPE##_##NAME (r, a, b, FACTOR, pred, N); \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ TYPE expected = (pred[i] == 1 \ ++ ? a[i] OP b[i] * (TYPE) FACTOR \ ++ : pred[i]); \ ++ if (r[i] != expected) \ ++ __builtin_abort (); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ } ++ ++int ++main (void) ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_5.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_5.c +new file mode 100644 +index 000000000..97e233579 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_5.c +@@ -0,0 +1,56 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define DEF_LOOP(TYPE, NAME, OP) \ ++ void __attribute__ ((noipa)) \ ++ test_##TYPE##_##NAME (TYPE *__restrict r, \ ++ TYPE *__restrict a, \ ++ TYPE *__restrict b, TYPE c, \ ++ TYPE *__restrict pred, int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ r[i] = pred[i] ? a[i] OP b[i] * c : 0; \ ++ } ++ ++#define TEST_TYPE(T, TYPE) \ ++ T (TYPE, add, +) \ ++ T (TYPE, sub, -) ++ ++#define TEST_ALL(T) \ ++ TEST_TYPE (T, uint8_t) \ ++ TEST_TYPE (T, uint16_t) \ ++ TEST_TYPE (T, uint32_t) \ ++ TEST_TYPE (T, uint64_t) \ ++ TEST_TYPE (T, _Float16) \ ++ TEST_TYPE (T, float) \ ++ TEST_TYPE (T, double) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\t(?:mla|mad)\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\t(?:mla|mad)\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\t(?:mla|mad)\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\t(?:mla|mad)\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\t(?:mls|msb)\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\t(?:mls|msb)\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\t(?:mls|msb)\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\t(?:mls|msb)\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\t(?:fmla|fmad)\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\t(?:fmla|fmad)\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\t(?:fmla|fmad)\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\t(?:fmls|fmsb)\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\t(?:fmls|fmsb)\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\t(?:fmls|fmsb)\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.b, p[0-7]/z,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z,} 4 } } */ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z,} 4 } } */ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/z,} 4 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_5_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_5_run.c +new file mode 100644 +index 000000000..9de46e30f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_5_run.c +@@ -0,0 +1,35 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "cond_mla_5.c" ++ ++#define FACTOR 17 ++#define N 99 ++ ++#define TEST_LOOP(TYPE, NAME, OP) \ ++ { \ ++ TYPE r[N], a[N], b[N], pred[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ a[i] = (i & 1 ? i : 3 * i); \ ++ b[i] = (i >> 4) << (i & 15); \ ++ pred[i] = i % 3 < i % 5; \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##TYPE##_##NAME (r, a, b, FACTOR, pred, N); \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ TYPE expected \ ++ = pred[i] ? a[i] OP b[i] * (TYPE) FACTOR : 0; \ ++ if (r[i] != expected) \ ++ __builtin_abort (); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ } ++ ++int ++main (void) ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_6.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_6.c +new file mode 100644 +index 000000000..832bdb3d8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_6.c +@@ -0,0 +1,53 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define DEF_LOOP(TYPE, NAME, OP) \ ++ void __attribute__ ((noipa)) \ ++ test_##TYPE##_##NAME (TYPE *__restrict r, \ ++ TYPE *__restrict a, \ ++ TYPE *__restrict b, TYPE c, \ ++ TYPE *__restrict pred, int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ r[i] = pred[i] ? a[i] OP b[i] * c : 5; \ ++ } ++ ++#define TEST_TYPE(T, TYPE) \ ++ T (TYPE, add, +) \ ++ T (TYPE, sub, -) ++ ++#define TEST_ALL(T) \ ++ TEST_TYPE (T, uint8_t) \ ++ TEST_TYPE (T, uint16_t) \ ++ TEST_TYPE (T, uint32_t) \ ++ TEST_TYPE (T, uint64_t) \ ++ TEST_TYPE (T, _Float16) \ ++ TEST_TYPE (T, float) \ ++ TEST_TYPE (T, double) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tmla\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tmla\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tmla\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tmla\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tmls\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tmls\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tmls\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tmls\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tsel\t} 14 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ ++/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_6_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_6_run.c +new file mode 100644 +index 000000000..59f57a2db +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_6_run.c +@@ -0,0 +1,35 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "cond_mla_6.c" ++ ++#define FACTOR 17 ++#define N 99 ++ ++#define TEST_LOOP(TYPE, NAME, OP) \ ++ { \ ++ TYPE r[N], a[N], b[N], pred[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ a[i] = (i & 1 ? i : 3 * i); \ ++ b[i] = (i >> 4) << (i & 15); \ ++ pred[i] = i % 3 < i % 5; \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##TYPE##_##NAME (r, a, b, FACTOR, pred, N); \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ TYPE expected \ ++ = pred[i] ? a[i] OP b[i] * (TYPE) FACTOR : 5; \ ++ if (r[i] != expected) \ ++ __builtin_abort (); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ } ++ ++int ++main (void) ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_7.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_7.c +new file mode 100644 +index 000000000..5561f4219 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_7.c +@@ -0,0 +1,62 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define DEF_LOOP(TYPE, NAME, OP, CONST) \ ++ void __attribute__ ((noipa)) \ ++ test_##TYPE##_##NAME##_##CONST (TYPE *__restrict r, \ ++ TYPE *__restrict a, \ ++ TYPE *__restrict b, \ ++ TYPE *__restrict pred, int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ r[i] = pred[i] != 1 ? a[i] OP b[i] * CONST : a[i]; \ ++ } ++ ++#define TEST_COUNT(T, TYPE, CONST) \ ++ T (TYPE, add, +, CONST) \ ++ T (TYPE, sub, -, CONST) ++ ++#define TEST_TYPE(T, TYPE, CONST) \ ++ TEST_COUNT (T, TYPE, 2) \ ++ TEST_COUNT (T, TYPE, 4) \ ++ TEST_COUNT (T, TYPE, CONST) ++ ++#define TEST_ALL(T) \ ++ TEST_TYPE (T, uint8_t, 0x80) \ ++ TEST_TYPE (T, uint16_t, 0x8000) \ ++ TEST_TYPE (T, uint32_t, 0x80000000) \ ++ TEST_TYPE (T, uint64_t, 0x8000000000000000ULL) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, z[0-9]+\.b, #1\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, z[0-9]+\.b, #2\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, z[0-9]+\.b, #7\n} 2 } } */ ++ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, z[0-9]+\.h, #1\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, z[0-9]+\.h, #2\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, z[0-9]+\.h, #15\n} 2 } } */ ++ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, z[0-9]+\.s, #1\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, z[0-9]+\.s, #2\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, z[0-9]+\.s, #31\n} 2 } } */ ++ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, z[0-9]+\.d, #1\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, z[0-9]+\.d, #2\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, z[0-9]+\.d, #63\n} 2 } } */ ++ ++/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 4 } } */ ++/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ ++/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */ ++/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */ ++ ++/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ ++/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_7_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_7_run.c +new file mode 100644 +index 000000000..b094f40a2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_7_run.c +@@ -0,0 +1,34 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "cond_mla_7.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(TYPE, NAME, OP, CONST) \ ++ { \ ++ TYPE r[N], a[N], b[N], pred[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ a[i] = (i & 1 ? i : 3 * i); \ ++ b[i] = (i >> 4) << (i & 15); \ ++ pred[i] = i % 3 < i % 5; \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##TYPE##_##NAME##_##CONST (r, a, b, pred, N); \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ TYPE expected \ ++ = pred[i] != 1 ? a[i] OP b[i] * CONST : a[i]; \ ++ if (r[i] != expected) \ ++ __builtin_abort (); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ } ++ ++int ++main (void) ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_8.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_8.c +new file mode 100644 +index 000000000..d5549272e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_8.c +@@ -0,0 +1,62 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define DEF_LOOP(TYPE, NAME, OP, CONST) \ ++ void __attribute__ ((noipa)) \ ++ test_##TYPE##_##NAME##_##CONST (TYPE *__restrict r, \ ++ TYPE *__restrict a, \ ++ TYPE *__restrict b, \ ++ TYPE *__restrict pred, int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ r[i] = pred[i] != 1 ? a[i] OP b[i] * -CONST : a[i]; \ ++ } ++ ++#define TEST_COUNT(T, TYPE, CONST) \ ++ T (TYPE, add, +, CONST) \ ++ T (TYPE, sub, -, CONST) ++ ++#define TEST_TYPE(T, TYPE, CONST) \ ++ TEST_COUNT (T, TYPE, 2) \ ++ TEST_COUNT (T, TYPE, 4) \ ++ TEST_COUNT (T, TYPE, CONST) ++ ++#define TEST_ALL(T) \ ++ TEST_TYPE (T, uint8_t, 0x80) \ ++ TEST_TYPE (T, uint16_t, 0x8000) \ ++ TEST_TYPE (T, uint32_t, 0x80000000) \ ++ TEST_TYPE (T, uint64_t, 0x8000000000000000ULL) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, z[0-9]+\.b, #1\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, z[0-9]+\.b, #2\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, z[0-9]+\.b, #7\n} 2 } } */ ++ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, z[0-9]+\.h, #1\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, z[0-9]+\.h, #2\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, z[0-9]+\.h, #15\n} 2 } } */ ++ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, z[0-9]+\.s, #1\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, z[0-9]+\.s, #2\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, z[0-9]+\.s, #31\n} 2 } } */ ++ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, z[0-9]+\.d, #1\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, z[0-9]+\.d, #2\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, z[0-9]+\.d, #63\n} 2 } } */ ++ ++/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 4 } } */ ++/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ ++/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */ ++/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */ ++ ++/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ ++/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_8_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_8_run.c +new file mode 100644 +index 000000000..7fb58aa70 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_8_run.c +@@ -0,0 +1,34 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "cond_mla_8.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(TYPE, NAME, OP, CONST) \ ++ { \ ++ TYPE r[N], a[N], b[N], pred[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ a[i] = (i & 1 ? i : 3 * i); \ ++ b[i] = (i >> 4) << (i & 15); \ ++ pred[i] = i % 3 < i % 5; \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##TYPE##_##NAME##_##CONST (r, a, b, pred, N); \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ TYPE expected \ ++ = pred[i] != 1 ? a[i] OP b[i] * -CONST : a[i]; \ ++ if (r[i] != expected) \ ++ __builtin_abort (); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ } ++ ++int ++main (void) ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_1.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_1.c +new file mode 100644 +index 000000000..f2c51b291 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_1.c +@@ -0,0 +1,48 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define DEF_LOOP(TYPE, NAME, OP) \ ++ void __attribute__ ((noipa)) \ ++ test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \ ++ TYPE *__restrict b, int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ r[i] = a[i] > 20 ? b[i] OP 3 : b[i]; \ ++ } ++ ++#define TEST_TYPE(T, TYPE) \ ++ T (TYPE, shl, <<) \ ++ T (TYPE, shr, >>) ++ ++#define TEST_ALL(T) \ ++ TEST_TYPE (T, int8_t) \ ++ TEST_TYPE (T, uint8_t) \ ++ TEST_TYPE (T, int16_t) \ ++ TEST_TYPE (T, uint16_t) \ ++ TEST_TYPE (T, int32_t) \ ++ TEST_TYPE (T, uint32_t) \ ++ TEST_TYPE (T, int64_t) \ ++ TEST_TYPE (T, uint64_t) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ ++ ++/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ ++/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_1_run.c +new file mode 100644 +index 000000000..acc403ec8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_1_run.c +@@ -0,0 +1,27 @@ ++/* { dg-do run { target { aarch64_sve_hw } } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "cond_shift_1.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(TYPE, NAME, OP) \ ++ { \ ++ TYPE r[N], a[N], b[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ a[i] = (i & 1 ? i : 3 * i); \ ++ b[i] = (i >> 4) << (i & 15); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##TYPE##_##NAME (r, a, b, N); \ ++ for (int i = 0; i < N; ++i) \ ++ if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP 3 : b[i])) \ ++ __builtin_abort (); \ ++ } ++ ++int main () ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_2.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_2.c +new file mode 100644 +index 000000000..c9082c9c8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_2.c +@@ -0,0 +1,52 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define DEF_LOOP(TYPE, NAME, OP) \ ++ void __attribute__ ((noipa)) \ ++ test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \ ++ TYPE *__restrict b, int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ r[i] = a[i] > 20 ? b[i] OP 3 : a[i]; \ ++ } ++ ++#define TEST_TYPE(T, TYPE) \ ++ T (TYPE, shl, <<) \ ++ T (TYPE, shr, >>) ++ ++#define TEST_ALL(T) \ ++ TEST_TYPE (T, int8_t) \ ++ TEST_TYPE (T, uint8_t) \ ++ TEST_TYPE (T, int16_t) \ ++ TEST_TYPE (T, uint16_t) \ ++ TEST_TYPE (T, int32_t) \ ++ TEST_TYPE (T, uint32_t) \ ++ TEST_TYPE (T, int64_t) \ ++ TEST_TYPE (T, uint64_t) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ ++ ++/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b\n} 4 } } */ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 4 } } */ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 4 } } */ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 4 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_2_run.c +new file mode 100644 +index 000000000..4917d3af6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_2_run.c +@@ -0,0 +1,27 @@ ++/* { dg-do run { target { aarch64_sve_hw } } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "cond_shift_2.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(TYPE, NAME, OP) \ ++ { \ ++ TYPE r[N], a[N], b[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ a[i] = (i & 1 ? i : 3 * i); \ ++ b[i] = (i >> 4) << (i & 15); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##TYPE##_##NAME (r, a, b, N); \ ++ for (int i = 0; i < N; ++i) \ ++ if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP 3 : a[i])) \ ++ __builtin_abort (); \ ++ } ++ ++int main () ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_3.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_3.c +new file mode 100644 +index 000000000..55e0de8aa +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_3.c +@@ -0,0 +1,48 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define DEF_LOOP(TYPE, NAME, OP) \ ++ void __attribute__ ((noipa)) \ ++ test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \ ++ TYPE *__restrict b, int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ r[i] = a[i] > 20 ? b[i] OP 3 : 72; \ ++ } ++ ++#define TEST_TYPE(T, TYPE) \ ++ T (TYPE, shl, <<) \ ++ T (TYPE, shr, >>) ++ ++#define TEST_ALL(T) \ ++ TEST_TYPE (T, int8_t) \ ++ TEST_TYPE (T, uint8_t) \ ++ TEST_TYPE (T, int16_t) \ ++ TEST_TYPE (T, uint16_t) \ ++ TEST_TYPE (T, int32_t) \ ++ TEST_TYPE (T, uint32_t) \ ++ TEST_TYPE (T, int64_t) \ ++ TEST_TYPE (T, uint64_t) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ ++ ++/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ ++/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ ++/* { dg-final { scan-assembler-times {\tsel\t} 16 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_3_run.c +new file mode 100644 +index 000000000..194c75b8d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_3_run.c +@@ -0,0 +1,27 @@ ++/* { dg-do run { target { aarch64_sve_hw } } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "cond_shift_3.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(TYPE, NAME, OP) \ ++ { \ ++ TYPE r[N], a[N], b[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ a[i] = (i & 1 ? i : 3 * i); \ ++ b[i] = (i >> 4) << (i & 15); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##TYPE##_##NAME (r, a, b, N); \ ++ for (int i = 0; i < N; ++i) \ ++ if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP 3 : 72)) \ ++ __builtin_abort (); \ ++ } ++ ++int main () ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_4.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_4.c +new file mode 100644 +index 000000000..32dd68199 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_4.c +@@ -0,0 +1,52 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define DEF_LOOP(TYPE, NAME, OP) \ ++ void __attribute__ ((noipa)) \ ++ test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \ ++ TYPE *__restrict b, int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ r[i] = a[i] > 20 ? b[i] OP 3 : 0; \ ++ } ++ ++#define TEST_TYPE(T, TYPE) \ ++ T (TYPE, shl, <<) \ ++ T (TYPE, shr, >>) ++ ++#define TEST_ALL(T) \ ++ TEST_TYPE (T, int8_t) \ ++ TEST_TYPE (T, uint8_t) \ ++ TEST_TYPE (T, int16_t) \ ++ TEST_TYPE (T, uint16_t) \ ++ TEST_TYPE (T, int32_t) \ ++ TEST_TYPE (T, uint32_t) \ ++ TEST_TYPE (T, int64_t) \ ++ TEST_TYPE (T, uint64_t) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ ++ ++/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.b, p[0-7]/z, z[0-9]+\.b\n} 4 } } */ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 4 } } */ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 4 } } */ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/z, z[0-9]+\.d\n} 4 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_4_run.c +new file mode 100644 +index 000000000..ee263000d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_4_run.c +@@ -0,0 +1,27 @@ ++/* { dg-do run { target { aarch64_sve_hw } } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "cond_shift_4.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(TYPE, NAME, OP) \ ++ { \ ++ TYPE r[N], a[N], b[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ a[i] = (i & 1 ? i : 3 * i); \ ++ b[i] = (i >> 4) << (i & 15); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##TYPE##_##NAME (r, a, b, N); \ ++ for (int i = 0; i < N; ++i) \ ++ if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP 3 : 0)) \ ++ __builtin_abort (); \ ++ } ++ ++int main () ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_5.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_5.c +new file mode 100644 +index 000000000..1d4491531 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_5.c +@@ -0,0 +1,38 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define DEF_LOOP(TYPE, NAME, OP) \ ++ void __attribute__ ((noipa)) \ ++ test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \ ++ TYPE *__restrict b, TYPE *__restrict c, int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ r[i] = a[i] > 20 ? b[i] OP c[i] : b[i]; \ ++ } ++ ++#define TEST_TYPE(T, TYPE) \ ++ T (TYPE, shl, <<) \ ++ T (TYPE, shr, >>) ++ ++#define TEST_ALL(T) \ ++ TEST_TYPE (T, int32_t) \ ++ TEST_TYPE (T, uint32_t) \ ++ TEST_TYPE (T, int64_t) \ ++ TEST_TYPE (T, uint64_t) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ ++ ++/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ ++/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_5_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_5_run.c +new file mode 100644 +index 000000000..35bf1b871 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_5_run.c +@@ -0,0 +1,28 @@ ++/* { dg-do run { target { aarch64_sve_hw } } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "cond_shift_5.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(TYPE, NAME, OP) \ ++ { \ ++ TYPE r[N], a[N], b[N], c[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ a[i] = (i & 1 ? i : 3 * i); \ ++ b[i] = (i >> 4) << (i & 15); \ ++ c[i] = ~i & 7; \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##TYPE##_##NAME (r, a, b, c, N); \ ++ for (int i = 0; i < N; ++i) \ ++ if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP c[i] : b[i])) \ ++ __builtin_abort (); \ ++ } ++ ++int main () ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_6.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_6.c +new file mode 100644 +index 000000000..35cb67677 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_6.c +@@ -0,0 +1,33 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define DEF_LOOP(TYPE, NAME, OP) \ ++ void __attribute__ ((noipa)) \ ++ test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \ ++ TYPE *__restrict b, TYPE *__restrict c, int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ r[i] = a[i] > 20 ? b[i] OP c[i] : c[i]; \ ++ } ++ ++#define TEST_TYPE(T, TYPE) \ ++ T (TYPE, shl, <<) \ ++ T (TYPE, shr, >>) ++ ++#define TEST_ALL(T) \ ++ TEST_TYPE (T, int32_t) \ ++ TEST_TYPE (T, uint32_t) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tlslr\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ ++ ++/* { dg-final { scan-assembler-times {\tasrr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tlsrr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ ++/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_6_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_6_run.c +new file mode 100644 +index 000000000..e601c6156 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_6_run.c +@@ -0,0 +1,28 @@ ++/* { dg-do run { target { aarch64_sve_hw } } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "cond_shift_6.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(TYPE, NAME, OP) \ ++ { \ ++ TYPE r[N], a[N], b[N], c[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ a[i] = (i & 1 ? i : 3 * i); \ ++ b[i] = (i >> 4) << (i & 15); \ ++ c[i] = ~i & 7; \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##TYPE##_##NAME (r, a, b, c, N); \ ++ for (int i = 0; i < N; ++i) \ ++ if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP c[i] : c[i])) \ ++ __builtin_abort (); \ ++ } ++ ++int main () ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_7.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_7.c +new file mode 100644 +index 000000000..80154b25e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_7.c +@@ -0,0 +1,40 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define DEF_LOOP(TYPE, NAME, OP) \ ++ void __attribute__ ((noipa)) \ ++ test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \ ++ TYPE *__restrict b, TYPE *__restrict c, int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ r[i] = a[i] > 20 ? b[i] OP c[i] : a[i]; \ ++ } ++ ++#define TEST_TYPE(T, TYPE) \ ++ T (TYPE, shl, <<) \ ++ T (TYPE, shr, >>) ++ ++#define TEST_ALL(T) \ ++ TEST_TYPE (T, int32_t) \ ++ TEST_TYPE (T, uint32_t) \ ++ TEST_TYPE (T, int64_t) \ ++ TEST_TYPE (T, uint64_t) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ ++ ++/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 4 } } */ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 4 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_7_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_7_run.c +new file mode 100644 +index 000000000..d23b0093d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_7_run.c +@@ -0,0 +1,28 @@ ++/* { dg-do run { target { aarch64_sve_hw } } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "cond_shift_7.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(TYPE, NAME, OP) \ ++ { \ ++ TYPE r[N], a[N], b[N], c[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ a[i] = (i & 1 ? i : 3 * i); \ ++ b[i] = (i >> 4) << (i & 15); \ ++ c[i] = ~i & 7; \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##TYPE##_##NAME (r, a, b, c, N); \ ++ for (int i = 0; i < N; ++i) \ ++ if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP c[i] : a[i])) \ ++ __builtin_abort (); \ ++ } ++ ++int main () ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_8.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_8.c +new file mode 100644 +index 000000000..b478c0c4f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_8.c +@@ -0,0 +1,38 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define DEF_LOOP(TYPE, NAME, OP) \ ++ void __attribute__ ((noipa)) \ ++ test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \ ++ TYPE *__restrict b, TYPE *__restrict c, int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ r[i] = a[i] > 20 ? b[i] OP c[i] : 91; \ ++ } ++ ++#define TEST_TYPE(T, TYPE) \ ++ T (TYPE, shl, <<) \ ++ T (TYPE, shr, >>) ++ ++#define TEST_ALL(T) \ ++ TEST_TYPE (T, int32_t) \ ++ TEST_TYPE (T, uint32_t) \ ++ TEST_TYPE (T, int64_t) \ ++ TEST_TYPE (T, uint64_t) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ ++ ++/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ ++/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ ++/* { dg-final { scan-assembler-times {\tsel\t} 8 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_8_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_8_run.c +new file mode 100644 +index 000000000..72e5a7b59 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_8_run.c +@@ -0,0 +1,28 @@ ++/* { dg-do run { target { aarch64_sve_hw } } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "cond_shift_8.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(TYPE, NAME, OP) \ ++ { \ ++ TYPE r[N], a[N], b[N], c[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ a[i] = (i & 1 ? i : 3 * i); \ ++ b[i] = (i >> 4) << (i & 15); \ ++ c[i] = ~i & 7; \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##TYPE##_##NAME (r, a, b, c, N); \ ++ for (int i = 0; i < N; ++i) \ ++ if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP c[i] : 91)) \ ++ __builtin_abort (); \ ++ } ++ ++int main () ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_9.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_9.c +new file mode 100644 +index 000000000..184e93ab8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_9.c +@@ -0,0 +1,40 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define DEF_LOOP(TYPE, NAME, OP) \ ++ void __attribute__ ((noipa)) \ ++ test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \ ++ TYPE *__restrict b, TYPE *__restrict c, int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ r[i] = a[i] > 20 ? b[i] OP c[i] : 0; \ ++ } ++ ++#define TEST_TYPE(T, TYPE) \ ++ T (TYPE, shl, <<) \ ++ T (TYPE, shr, >>) ++ ++#define TEST_ALL(T) \ ++ TEST_TYPE (T, int32_t) \ ++ TEST_TYPE (T, uint32_t) \ ++ TEST_TYPE (T, int64_t) \ ++ TEST_TYPE (T, uint64_t) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tlslr?\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ ++/* { dg-final { scan-assembler-times {\tlslr?\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ ++ ++/* { dg-final { scan-assembler-times {\tasrr?\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tasrr?\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tlsrr?\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tlsrr?\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 4 } } */ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/z, z[0-9]+\.d\n} 4 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_9_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_9_run.c +new file mode 100644 +index 000000000..6e41ac4da +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_9_run.c +@@ -0,0 +1,28 @@ ++/* { dg-do run { target { aarch64_sve_hw } } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "cond_shift_9.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(TYPE, NAME, OP) \ ++ { \ ++ TYPE r[N], a[N], b[N], c[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ a[i] = (i & 1 ? i : 3 * i); \ ++ b[i] = (i >> 4) << (i & 15); \ ++ c[i] = ~i & 7; \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##TYPE##_##NAME (r, a, b, c, N); \ ++ for (int i = 0; i < N; ++i) \ ++ if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP c[i] : 0)) \ ++ __builtin_abort (); \ ++ } ++ ++int main () ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_1.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_1.c +new file mode 100644 +index 000000000..2b5f9c345 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_1.c +@@ -0,0 +1,59 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define abs(A) ((A) < 0 ? -(A) : (A)) ++#define neg(A) (-(A)) ++ ++#define DEF_LOOP(TYPE, OP) \ ++ void __attribute__ ((noipa)) \ ++ test_##TYPE##_##OP (TYPE *__restrict r, TYPE *__restrict a, \ ++ TYPE *__restrict pred, int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ r[i] = pred[i] ? OP (a[i]) : a[i]; \ ++ } ++ ++#define TEST_INT_TYPE(T, TYPE) \ ++ T (TYPE, abs) \ ++ T (TYPE, neg) ++ ++#define TEST_FLOAT_TYPE(T, TYPE, SUFFIX) \ ++ T (TYPE, __builtin_fabs##SUFFIX) \ ++ T (TYPE, neg) ++ ++#define TEST_ALL(T) \ ++ TEST_INT_TYPE (T, int8_t) \ ++ TEST_INT_TYPE (T, int16_t) \ ++ TEST_INT_TYPE (T, int32_t) \ ++ TEST_INT_TYPE (T, int64_t) \ ++ TEST_FLOAT_TYPE (T, _Float16, f16) \ ++ TEST_FLOAT_TYPE (T, float, f) \ ++ TEST_FLOAT_TYPE (T, double, ) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tabs\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tabs\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tabs\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tabs\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz} } } */ ++/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ ++/* XFAILed because the ?: gets canonicalized so that the operation is in ++ the false arm. */ ++/* { dg-final { scan-assembler-not {\tsel\t} { xfail *-*-* } } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_1_run.c +new file mode 100644 +index 000000000..a6c1a49dd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_1_run.c +@@ -0,0 +1,27 @@ ++/* { dg-do run { target { aarch64_sve_hw } } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "cond_unary_1.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(TYPE, OP) \ ++ { \ ++ TYPE r[N], a[N], pred[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ a[i] = (i & 1 ? i : 3 * i) * (i % 3 == 0 ? 1 : -1); \ ++ pred[i] = (i % 7 < 4); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##TYPE##_##OP (r, a, pred, N); \ ++ for (int i = 0; i < N; ++i) \ ++ if (r[i] != (pred[i] ? OP (a[i]) : a[i])) \ ++ __builtin_abort (); \ ++ } ++ ++int main () ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_2.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_2.c +new file mode 100644 +index 000000000..97d1b8f5d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_2.c +@@ -0,0 +1,61 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define abs(A) ((A) < 0 ? -(A) : (A)) ++#define neg(A) (-(A)) ++ ++#define DEF_LOOP(TYPE, OP) \ ++ void __attribute__ ((noipa)) \ ++ test_##TYPE##_##OP (TYPE *__restrict r, TYPE *__restrict a, \ ++ TYPE *__restrict b, \ ++ TYPE *__restrict pred, int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ { \ ++ TYPE bi = b[i]; \ ++ r[i] = pred[i] ? OP (a[i]) : bi; \ ++ } \ ++ } ++ ++#define TEST_INT_TYPE(T, TYPE) \ ++ T (TYPE, abs) \ ++ T (TYPE, neg) ++ ++#define TEST_FLOAT_TYPE(T, TYPE, SUFFIX) \ ++ T (TYPE, __builtin_fabs##SUFFIX) \ ++ T (TYPE, neg) ++ ++#define TEST_ALL(T) \ ++ TEST_INT_TYPE (T, int8_t) \ ++ TEST_INT_TYPE (T, int16_t) \ ++ TEST_INT_TYPE (T, int32_t) \ ++ TEST_INT_TYPE (T, int64_t) \ ++ TEST_FLOAT_TYPE (T, _Float16, f16) \ ++ TEST_FLOAT_TYPE (T, float, f) \ ++ TEST_FLOAT_TYPE (T, double, ) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tabs\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tabs\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tabs\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tabs\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz} } } */ ++/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_2_run.c +new file mode 100644 +index 000000000..1a385c323 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_2_run.c +@@ -0,0 +1,28 @@ ++/* { dg-do run { target { aarch64_sve_hw } } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "cond_unary_2.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(TYPE, OP) \ ++ { \ ++ TYPE r[N], a[N], b[N], pred[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ a[i] = (i & 1 ? i : 3 * i) * (i % 3 == 0 ? 1 : -1); \ ++ b[i] = (i % 9) * (i % 7 + 1); \ ++ pred[i] = (i % 7 < 4); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##TYPE##_##OP (r, a, b, pred, N); \ ++ for (int i = 0; i < N; ++i) \ ++ if (r[i] != (pred[i] ? OP (a[i]) : b[i])) \ ++ __builtin_abort (); \ ++ } ++ ++int main () ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_3.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_3.c +new file mode 100644 +index 000000000..dde0fdd92 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_3.c +@@ -0,0 +1,58 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define abs(A) ((A) < 0 ? -(A) : (A)) ++#define neg(A) (-(A)) ++ ++#define DEF_LOOP(TYPE, OP) \ ++ void __attribute__ ((noipa)) \ ++ test_##TYPE##_##OP (TYPE *__restrict r, TYPE *__restrict a, \ ++ TYPE *__restrict pred, int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ r[i] = pred[i] ? OP (a[i]) : 5; \ ++ } ++ ++#define TEST_INT_TYPE(T, TYPE) \ ++ T (TYPE, abs) \ ++ T (TYPE, neg) ++ ++#define TEST_FLOAT_TYPE(T, TYPE, SUFFIX) \ ++ T (TYPE, __builtin_fabs##SUFFIX) \ ++ T (TYPE, neg) ++ ++#define TEST_ALL(T) \ ++ TEST_INT_TYPE (T, int8_t) \ ++ TEST_INT_TYPE (T, int16_t) \ ++ TEST_INT_TYPE (T, int32_t) \ ++ TEST_INT_TYPE (T, int64_t) \ ++ TEST_FLOAT_TYPE (T, _Float16, f16) \ ++ TEST_FLOAT_TYPE (T, float, f) \ ++ TEST_FLOAT_TYPE (T, double, ) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tabs\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tabs\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tabs\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tabs\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+, z[0-9]+\n} 14 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz[^\n]*z} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_3_run.c +new file mode 100644 +index 000000000..3c72b239a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_3_run.c +@@ -0,0 +1,27 @@ ++/* { dg-do run { target { aarch64_sve_hw } } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "cond_unary_3.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(TYPE, OP) \ ++ { \ ++ TYPE r[N], a[N], pred[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ a[i] = (i & 1 ? i : 3 * i) * (i % 3 == 0 ? 1 : -1); \ ++ pred[i] = (i % 7 < 4); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##TYPE##_##OP (r, a, pred, N); \ ++ for (int i = 0; i < N; ++i) \ ++ if (r[i] != (pred[i] ? OP (a[i]) : 5)) \ ++ __builtin_abort (); \ ++ } ++ ++int main () ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_4.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_4.c +new file mode 100644 +index 000000000..4604365fb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_4.c +@@ -0,0 +1,62 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define abs(A) ((A) < 0 ? -(A) : (A)) ++#define neg(A) (-(A)) ++ ++#define DEF_LOOP(TYPE, OP) \ ++ void __attribute__ ((noipa)) \ ++ test_##TYPE##_##OP (TYPE *__restrict r, TYPE *__restrict a, \ ++ TYPE *__restrict pred, int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ r[i] = pred[i] ? OP (a[i]) : 0; \ ++ } ++ ++#define TEST_INT_TYPE(T, TYPE) \ ++ T (TYPE, abs) \ ++ T (TYPE, neg) ++ ++#define TEST_FLOAT_TYPE(T, TYPE, SUFFIX) \ ++ T (TYPE, __builtin_fabs##SUFFIX) \ ++ T (TYPE, neg) ++ ++#define TEST_ALL(T) \ ++ TEST_INT_TYPE (T, int8_t) \ ++ TEST_INT_TYPE (T, int16_t) \ ++ TEST_INT_TYPE (T, int32_t) \ ++ TEST_INT_TYPE (T, int64_t) \ ++ TEST_FLOAT_TYPE (T, _Float16, f16) \ ++ TEST_FLOAT_TYPE (T, float, f) \ ++ TEST_FLOAT_TYPE (T, double, ) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tabs\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tabs\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tabs\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tabs\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ ++ ++/* Really we should be able to use MOVPRFX /z here, but at the moment ++ we're relying on combine to merge a SEL and an arithmetic operation, ++ and the SEL doesn't allow the "false" value to be zero when the "true" ++ value is a register. */ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+, z[0-9]+\n} 14 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz[^\n]*z} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_4_run.c +new file mode 100644 +index 000000000..48d254150 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_4_run.c +@@ -0,0 +1,27 @@ ++/* { dg-do run { target { aarch64_sve_hw } } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "cond_unary_4.c" ++ ++#define N 99 ++ ++#define TEST_LOOP(TYPE, OP) \ ++ { \ ++ TYPE r[N], a[N], pred[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ a[i] = (i & 1 ? i : 3 * i) * (i % 3 == 0 ? 1 : -1); \ ++ pred[i] = (i % 7 < 4); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##TYPE##_##OP (r, a, pred, N); \ ++ for (int i = 0; i < N; ++i) \ ++ if (r[i] != (pred[i] ? OP (a[i]) : 0)) \ ++ __builtin_abort (); \ ++ } ++ ++int main () ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_1.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_1.c +new file mode 100644 +index 000000000..05641199e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_1.c +@@ -0,0 +1,40 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define NUM_ELEMS(TYPE) (320 / sizeof (TYPE)) ++ ++#define DEF_LOOP(TYPE, CONST) \ ++ void __attribute__ ((noipa)) \ ++ test_##CONST##_##TYPE (TYPE *restrict r, TYPE *restrict a, \ ++ TYPE *restrict b) \ ++ { \ ++ for (int i = 0; i < NUM_ELEMS (TYPE); ++i) \ ++ r[i] = a[i] > 20 ? b[i] & CONST : b[i]; \ ++ } ++ ++#define TEST_ALL(T) \ ++ T (uint16_t, 0xff) \ ++ \ ++ T (uint32_t, 0xff) \ ++ T (uint32_t, 0xffff) \ ++ \ ++ T (uint64_t, 0xff) \ ++ T (uint64_t, 0xffff) \ ++ T (uint64_t, 0xffffffff) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+\.h), p[0-7]/z, \[x2,[^L]*\tuxtb\t\1, p[0-7]/m, \1\n} } } */ ++ ++/* { dg-final { scan-assembler {\tld1w\t(z[0-9]+\.s), p[0-7]/z, \[x2,[^L]*\tuxtb\t\1, p[0-7]/m, \1\n} } } */ ++/* { dg-final { scan-assembler {\tld1w\t(z[0-9]+\.s), p[0-7]/z, \[x2,[^L]*\tuxth\t\1, p[0-7]/m, \1\n} } } */ ++ ++/* { dg-final { scan-assembler {\tld1d\t(z[0-9]+\.d), p[0-7]/z, \[x2,[^L]*\tuxtb\t\1, p[0-7]/m, \1\n} } } */ ++/* { dg-final { scan-assembler {\tld1d\t(z[0-9]+\.d), p[0-7]/z, \[x2,[^L]*\tuxth\t\1, p[0-7]/m, \1\n} } } */ ++/* { dg-final { scan-assembler {\tld1d\t(z[0-9]+\.d), p[0-7]/z, \[x2,[^L]*\tuxtw\t\1, p[0-7]/m, \1\n} } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz} } } */ ++/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_1_run.c +new file mode 100644 +index 000000000..685f39478 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_1_run.c +@@ -0,0 +1,27 @@ ++/* { dg-do run { target { aarch64_sve_hw } } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "cond_uxt_1.c" ++ ++#define TEST_LOOP(TYPE, CONST) \ ++ { \ ++ TYPE r[NUM_ELEMS (TYPE)]; \ ++ TYPE a[NUM_ELEMS (TYPE)]; \ ++ TYPE b[NUM_ELEMS (TYPE)]; \ ++ for (int i = 0; i < NUM_ELEMS (TYPE); ++i) \ ++ { \ ++ a[i] = (i & 1 ? i : 3 * i); \ ++ b[i] = (i >> 4) << (i & 15); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##CONST##_##TYPE (r, a, b); \ ++ for (int i = 0; i < NUM_ELEMS (TYPE); ++i) \ ++ if (r[i] != (a[i] > 20 ? b[i] & CONST : b[i])) \ ++ __builtin_abort (); \ ++ } ++ ++int main () ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_2.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_2.c +new file mode 100644 +index 000000000..c900498a0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_2.c +@@ -0,0 +1,40 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define NUM_ELEMS(TYPE) (320 / sizeof (TYPE)) ++ ++#define DEF_LOOP(TYPE, CONST) \ ++ void __attribute__ ((noipa)) \ ++ test_##CONST##_##TYPE (TYPE *restrict r, TYPE *restrict a, \ ++ TYPE *restrict b) \ ++ { \ ++ for (int i = 0; i < NUM_ELEMS (TYPE); ++i) \ ++ r[i] = a[i] > 20 ? b[i] & CONST : a[i]; \ ++ } ++ ++#define TEST_ALL(T) \ ++ T (uint16_t, 0xff) \ ++ \ ++ T (uint32_t, 0xff) \ ++ T (uint32_t, 0xffff) \ ++ \ ++ T (uint64_t, 0xff) \ ++ T (uint64_t, 0xffff) \ ++ T (uint64_t, 0xffffffff) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+\.h), p[0-7]/z, \[x1,[^L]*\tld1h\t(z[0-9]+\.h), p[0-7]/z, \[x2,[^L]*\tuxtb\t\1, p[0-7]/m, \2\n} } } */ ++ ++/* { dg-final { scan-assembler {\tld1w\t(z[0-9]+\.s), p[0-7]/z, \[x1,[^L]*\tld1w\t(z[0-9]+\.s), p[0-7]/z, \[x2,[^L]*\tuxtb\t\1, p[0-7]/m, \2\n} } } */ ++/* { dg-final { scan-assembler {\tld1w\t(z[0-9]+\.s), p[0-7]/z, \[x1,[^L]*\tld1w\t(z[0-9]+\.s), p[0-7]/z, \[x2,[^L]*\tuxth\t\1, p[0-7]/m, \2\n} } } */ ++ ++/* { dg-final { scan-assembler {\tld1d\t(z[0-9]+\.d), p[0-7]/z, \[x1,[^L]*\tld1d\t(z[0-9]+\.d), p[0-7]/z, \[x2,[^L]*\tuxtb\t\1, p[0-7]/m, \2\n} } } */ ++/* { dg-final { scan-assembler {\tld1d\t(z[0-9]+\.d), p[0-7]/z, \[x1,[^L]*\tld1d\t(z[0-9]+\.d), p[0-7]/z, \[x2,[^L]*\tuxth\t\1, p[0-7]/m, \2\n} } } */ ++/* { dg-final { scan-assembler {\tld1d\t(z[0-9]+\.d), p[0-7]/z, \[x1,[^L]*\tld1d\t(z[0-9]+\.d), p[0-7]/z, \[x2,[^L]*\tuxtw\t\1, p[0-7]/m, \2\n} } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz} } } */ ++/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_2_run.c +new file mode 100644 +index 000000000..75679cdf9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_2_run.c +@@ -0,0 +1,27 @@ ++/* { dg-do run { target { aarch64_sve_hw } } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "cond_uxt_2.c" ++ ++#define TEST_LOOP(TYPE, CONST) \ ++ { \ ++ TYPE r[NUM_ELEMS (TYPE)]; \ ++ TYPE a[NUM_ELEMS (TYPE)]; \ ++ TYPE b[NUM_ELEMS (TYPE)]; \ ++ for (int i = 0; i < NUM_ELEMS (TYPE); ++i) \ ++ { \ ++ a[i] = (i & 1 ? i : 3 * i); \ ++ b[i] = (i >> 4) << (i & 15); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##CONST##_##TYPE (r, a, b); \ ++ for (int i = 0; i < NUM_ELEMS (TYPE); ++i) \ ++ if (r[i] != (a[i] > 20 ? b[i] & CONST : a[i])) \ ++ __builtin_abort (); \ ++ } ++ ++int main () ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_3.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_3.c +new file mode 100644 +index 000000000..cf1fd0029 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_3.c +@@ -0,0 +1,39 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define NUM_ELEMS(TYPE) (320 / sizeof (TYPE)) ++ ++#define DEF_LOOP(TYPE, CONST) \ ++ void __attribute__ ((noipa)) \ ++ test_##CONST##_##TYPE (TYPE *restrict r, TYPE *restrict a, \ ++ TYPE *restrict b) \ ++ { \ ++ for (int i = 0; i < NUM_ELEMS (TYPE); ++i) \ ++ r[i] = a[i] > 20 ? b[i] & CONST : 127; \ ++ } ++ ++#define TEST_ALL(T) \ ++ T (uint16_t, 0xff) \ ++ \ ++ T (uint32_t, 0xff) \ ++ T (uint32_t, 0xffff) \ ++ \ ++ T (uint64_t, 0xff) \ ++ T (uint64_t, 0xffff) \ ++ T (uint64_t, 0xffffffff) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+), z[0-9]+\n\tuxtb\t\1\.h, p[0-7]/m, z[0-9]+\.h\n} } } */ ++ ++/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+), z[0-9]+\n\tuxtb\t\1\.s, p[0-7]/m, z[0-9]+\.s\n} } } */ ++/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+), z[0-9]+\n\tuxth\t\1\.s, p[0-7]/m, z[0-9]+\.s\n} } } */ ++ ++/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+), z[0-9]+\n\tuxtb\t\1\.d, p[0-7]/m, z[0-9]+\.d\n} } } */ ++/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+), z[0-9]+\n\tuxth\t\1\.d, p[0-7]/m, z[0-9]+\.d\n} } } */ ++/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+), z[0-9]+\n\tuxtw\t\1\.d, p[0-7]/m, z[0-9]+\.d\n} } } */ ++ ++/* { dg-final { scan-assembler-not {\tmov\tz[^\n]*z} } } */ ++/* { dg-final { scan-assembler-not {\tsel\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_3_run.c +new file mode 100644 +index 000000000..3d33d3a39 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_3_run.c +@@ -0,0 +1,27 @@ ++/* { dg-do run { target { aarch64_sve_hw } } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "cond_uxt_3.c" ++ ++#define TEST_LOOP(TYPE, CONST) \ ++ { \ ++ TYPE r[NUM_ELEMS (TYPE)]; \ ++ TYPE a[NUM_ELEMS (TYPE)]; \ ++ TYPE b[NUM_ELEMS (TYPE)]; \ ++ for (int i = 0; i < NUM_ELEMS (TYPE); ++i) \ ++ { \ ++ a[i] = (i & 1 ? i : 3 * i); \ ++ b[i] = (i >> 4) << (i & 15); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##CONST##_##TYPE (r, a, b); \ ++ for (int i = 0; i < NUM_ELEMS (TYPE); ++i) \ ++ if (r[i] != (a[i] > 20 ? b[i] & CONST : 127)) \ ++ __builtin_abort (); \ ++ } ++ ++int main () ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_4.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_4.c +new file mode 100644 +index 000000000..25c664780 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_4.c +@@ -0,0 +1,36 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define NUM_ELEMS(TYPE) (320 / sizeof (TYPE)) ++ ++#define DEF_LOOP(TYPE, CONST) \ ++ void __attribute__ ((noipa)) \ ++ test_##CONST##_##TYPE (TYPE *restrict r, TYPE *restrict a, \ ++ TYPE *restrict b) \ ++ { \ ++ for (int i = 0; i < NUM_ELEMS (TYPE); ++i) \ ++ r[i] = a[i] > 20 ? b[i] & CONST : 0; \ ++ } ++ ++#define TEST_ALL(T) \ ++ T (uint16_t, 0xff) \ ++ \ ++ T (uint32_t, 0xff) \ ++ T (uint32_t, 0xffff) \ ++ \ ++ T (uint64_t, 0xff) \ ++ T (uint64_t, 0xffff) \ ++ T (uint64_t, 0xffffffff) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+\.h), (p[0-7])/z, z[0-9]+\.h\n\tuxtb\t\1, \2/m, z[0-9]+\.h\n} } } */ ++ ++/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+\.s), (p[0-7])/z, z[0-9]+\.s\n\tuxtb\t\1, \2/m, z[0-9]+\.s\n} } } */ ++/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+\.s), (p[0-7])/z, z[0-9]+\.s\n\tuxth\t\1, \2/m, z[0-9]+\.s\n} } } */ ++ ++/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+\.d), (p[0-7])/z, z[0-9]+\.d\n\tuxtb\t\1, \2/m, z[0-9]+\.d\n} } } */ ++/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+\.d), (p[0-7])/z, z[0-9]+\.d\n\tuxth\t\1, \2/m, z[0-9]+\.d\n} } } */ ++/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+\.d), (p[0-7])/z, z[0-9]+\.d\n\tuxtw\t\1, \2/m, z[0-9]+\.d\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_4_run.c +new file mode 100644 +index 000000000..f3c4374ba +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_4_run.c +@@ -0,0 +1,27 @@ ++/* { dg-do run { target { aarch64_sve_hw } } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "cond_uxt_4.c" ++ ++#define TEST_LOOP(TYPE, CONST) \ ++ { \ ++ TYPE r[NUM_ELEMS (TYPE)]; \ ++ TYPE a[NUM_ELEMS (TYPE)]; \ ++ TYPE b[NUM_ELEMS (TYPE)]; \ ++ for (int i = 0; i < NUM_ELEMS (TYPE); ++i) \ ++ { \ ++ a[i] = (i & 1 ? i : 3 * i); \ ++ b[i] = (i >> 4) << (i & 15); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##CONST##_##TYPE (r, a, b); \ ++ for (int i = 0; i < NUM_ELEMS (TYPE); ++i) \ ++ if (r[i] != (a[i] > 20 ? b[i] & CONST : 0)) \ ++ __builtin_abort (); \ ++ } ++ ++int main () ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/const_1.c b/gcc/testsuite/gcc.target/aarch64/sve/const_1.c +new file mode 100644 +index 000000000..ae25dcb73 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/const_1.c +@@ -0,0 +1,13 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3" } */ ++ ++#include ++ ++void ++set (uint64_t *dst, int count) ++{ ++ for (int i = 0; i < count; ++i) ++ dst[i] = 0xffff00ff00ffff00ULL; ++} ++ ++/* { dg-final { scan-assembler {\tmovi\tv([0-9]+)\.2d, 0xffff00ff00ffff00\n.*\tdup\tz[0-9]+\.q, z\1\.q\[0\]\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/const_2.c b/gcc/testsuite/gcc.target/aarch64/sve/const_2.c +new file mode 100644 +index 000000000..7b2b5c2a1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/const_2.c +@@ -0,0 +1,20 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3" } */ ++ ++#include ++ ++#define TEST(TYPE, CONST) \ ++ void \ ++ set_##TYPE (TYPE *dst, int count) \ ++ { \ ++ for (int i = 0; i < count; ++i) \ ++ dst[i] = CONST; \ ++ } ++ ++TEST (uint16_t, 129) ++TEST (uint32_t, 129) ++TEST (uint64_t, 129) ++ ++/* { dg-final { scan-assembler {\tmovi\tv([0-9]+)\.8h, 0x81\n[^:]*\tdup\tz[0-9]+\.q, z\1\.q\[0\]\n} } } */ ++/* { dg-final { scan-assembler {\tmovi\tv([0-9]+)\.4s, 0x81\n[^:]*\tdup\tz[0-9]+\.q, z\1\.q\[0\]\n} } } */ ++/* { dg-final { scan-assembler {\tmov\t(x[0-9]+), 129\n[^:]*\tmov\tz[0-9]+\.d, \1\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/const_3.c b/gcc/testsuite/gcc.target/aarch64/sve/const_3.c +new file mode 100644 +index 000000000..c18ceaedc +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/const_3.c +@@ -0,0 +1,20 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3" } */ ++ ++#include ++ ++#define TEST(TYPE, CONST) \ ++ void \ ++ set_##TYPE (TYPE *dst, int count) \ ++ { \ ++ for (int i = 0; i < count; ++i) \ ++ dst[i] = CONST; \ ++ } ++ ++TEST (uint16_t, 0x1234) ++TEST (uint32_t, 0x1234) ++TEST (uint64_t, 0x1234) ++ ++/* { dg-final { scan-assembler {\tmov\t(w[0-9]+), 4660\n[^:]*\tmov\tz[0-9]+\.h, \1\n} } } */ ++/* { dg-final { scan-assembler {\tmov\t(w[0-9]+), 4660\n[^:]*\tmov\tz[0-9]+\.s, \1\n} } } */ ++/* { dg-final { scan-assembler {\tmov\t(x[0-9]+), 4660\n[^:]*\tmov\tz[0-9]+\.d, \1\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/ext_2.c b/gcc/testsuite/gcc.target/aarch64/sve/ext_2.c +index 0fe7e4c28..5593b070c 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/ext_2.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/ext_2.c +@@ -14,5 +14,4 @@ foo (void) + asm volatile ("" :: "w" (x)); + } + +-/* { dg-final { scan-assembler {\tmov\tz0\.d, z1\.d\n} } } */ +-/* { dg-final { scan-assembler {\text\tz0\.b, z0\.b, z[01]\.b, #4\n} } } */ ++/* { dg-final { scan-assembler {\tmovprfx\tz0, z1\n\text\tz0\.b, z0\.b, z1\.b, #4\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/ext_3.c b/gcc/testsuite/gcc.target/aarch64/sve/ext_3.c +new file mode 100644 +index 000000000..83c04c856 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/ext_3.c +@@ -0,0 +1,17 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -msve-vector-bits=1024" } */ ++ ++typedef int vnx4si __attribute__((vector_size (128))); ++ ++void ++foo (void) ++{ ++ register int x asm ("z0"); ++ register vnx4si y asm ("z1"); ++ ++ asm volatile ("" : "=w" (y)); ++ x = y[21]; ++ asm volatile ("" :: "w" (x)); ++} ++ ++/* { dg-final { scan-assembler {\tmovprfx\tz0, z1\n\text\tz0\.b, z0\.b, z1\.b, #84\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fabd_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fabd_1.c +new file mode 100644 +index 000000000..13ad83be2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/fabd_1.c +@@ -0,0 +1,35 @@ ++/* { dg-do assemble { target aarch64_asm_sve_ok } } */ ++/* { dg-options "-O3 --save-temps" } */ ++ ++#define N 16 ++ ++typedef float *__restrict__ vnx4sf; ++typedef double *__restrict__ vnx2df; ++typedef _Float16 *__restrict__ vnx8hf_a; ++typedef __fp16 *__restrict__ vnx8hf_b; ++ ++extern float fabsf (float); ++extern double fabs (double); ++ ++#define FABD(type, abs, n) \ ++ void fabd_##type (type res, type a, type b) \ ++ { \ ++ int i; \ ++ for (i = 0; i < n; i++) \ ++ res[i] = abs (a[i] - b[i]); \ ++ } ++ ++#define TEST_SVE_F_MODES(FUNC) \ ++ FUNC (vnx2df, fabs, N) \ ++ FUNC (vnx4sf, fabsf, N) \ ++ FUNC (vnx8hf_a, fabsf, N) \ ++ FUNC (vnx8hf_b, fabsf, N) \ ++ ++TEST_SVE_F_MODES (FABD) ++ ++/* { dg-final { scan-assembler "fabd" } } */ ++/* { dg-final { scan-assembler-not "fsub" } } */ ++/* { dg-final { scan-assembler-not "fabs" } } */ ++/* { dg-final { scan-assembler-times {\tfabd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfabd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfabd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fadda_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fadda_1.c +new file mode 100644 +index 000000000..158cd6c84 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/fadda_1.c +@@ -0,0 +1,20 @@ ++/* { dg-do assemble { target aarch64_asm_sve_ok } } */ ++/* { dg-options "-O2 -ftree-vectorize --save-temps" } */ ++ ++#define DO_OPS(TYPE) \ ++TYPE fold_##TYPE (TYPE *src, int count) \ ++{ \ ++ TYPE res = 0; \ ++ for (int i = 0; i < count; ++i) \ ++ res += src[i]; \ ++ return res; \ ++} ++ ++DO_OPS (_Float16) ++DO_OPS (float) ++DO_OPS (double) ++ ++/* { dg-final { scan-assembler-times {\tfadda\th[0-9]+, p[0-7], h[0-9]+, z[0-9]+\.h\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfadda\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfadda\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d\n} 1 } } */ ++/* { dg-final { scan-assembler-not "sel" } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fmaxnm_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fmaxnm_1.c +new file mode 100644 +index 000000000..2f0d64bd4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/fmaxnm_1.c +@@ -0,0 +1,45 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#ifndef FN ++#define FN(X) __builtin_fmax##X ++#endif ++ ++#define DEF_LOOP(FN, TYPE, NAME, CONST) \ ++ void __attribute__ ((noipa)) \ ++ test_##TYPE##_##NAME (TYPE *__restrict x, \ ++ TYPE *__restrict y, int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ x[i] = FN (y[i], CONST); \ ++ } ++ ++#define TEST_TYPE(T, FN, TYPE) \ ++ T (FN, TYPE, zero, 0) \ ++ T (FN, TYPE, one, 1) \ ++ T (FN, TYPE, two, 2) ++ ++#define TEST_ALL(T) \ ++ TEST_TYPE (T, FN (f16), _Float16) \ ++ TEST_TYPE (T, FN (f32), float) \ ++ TEST_TYPE (T, FN (f64), double) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0\.0\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1\.0\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.h, #2\.0} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #2\.0} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fminnm_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fminnm_1.c +new file mode 100644 +index 000000000..547772e29 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/fminnm_1.c +@@ -0,0 +1,21 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#define FN(X) __builtin_fmin##X ++#include "fmaxnm_1.c" ++ ++/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0\.0\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1\.0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1\.0\n} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.h, #2\.0} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #2\.0} 1 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_1.c b/gcc/testsuite/gcc.target/aarch64/sve/init_1.c +new file mode 100644 +index 000000000..8e6004337 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/init_1.c +@@ -0,0 +1,22 @@ ++/* { dg-do assemble { target aarch64_asm_sve_ok } } */ ++/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++/* Case 1.1: Trailing constants with stepped sequence. */ ++ ++#include ++ ++typedef int32_t vnx4si __attribute__((vector_size (32))); ++ ++/* ++** foo: ++** index (z[0-9]+\.s), #1, #1 ++** insr \1, w1 ++** insr \1, w0 ++** ... ++*/ ++__attribute__((noipa)) ++vnx4si foo(int a, int b) ++{ ++ return (vnx4si) { a, b, 1, 2, 3, 4, 5, 6 }; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_10.c b/gcc/testsuite/gcc.target/aarch64/sve/init_10.c +new file mode 100644 +index 000000000..bee039415 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/init_10.c +@@ -0,0 +1,24 @@ ++/* { dg-do assemble { target aarch64_asm_sve_ok } } */ ++/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++/* Case 5.4: Interleaved repeating elements and non-repeating elements. */ ++ ++#include ++ ++typedef int32_t vnx4si __attribute__((vector_size (32))); ++ ++/* ++** foo: ++** mov (z[0-9]+\.s), w3 ++** mov (z[0-9]+\.s), w2 ++** insr \2, w1 ++** insr \2, w0 ++** zip1 \2, \2, \1 ++** ... ++*/ ++__attribute__((noipa)) ++vnx4si foo(int a, int b, int c, int f) ++{ ++ return (vnx4si) { a, f, b, f, c, f, c, f }; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_10_run.c b/gcc/testsuite/gcc.target/aarch64/sve/init_10_run.c +new file mode 100644 +index 000000000..9a6d8650e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/init_10_run.c +@@ -0,0 +1,21 @@ ++/* { dg-do run { target aarch64_sve256_hw } } */ ++/* { dg-options "-O2 -msve-vector-bits=256" } */ ++ ++#include "init_10.c" ++ ++int main() ++{ ++ int a = 10; ++ int b = 11; ++ int c = 12; ++ int f = 13; ++ ++ vnx4si v = foo (a, b, c, f); ++ int expected[] = { a, f, b, f, c, f, c, f }; ++ ++ for (int i = 0; i < 8; i++) ++ if (v[i] != expected[i]) ++ __builtin_abort (); ++ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_11.c b/gcc/testsuite/gcc.target/aarch64/sve/init_11.c +new file mode 100644 +index 000000000..8a9496f34 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/init_11.c +@@ -0,0 +1,23 @@ ++/* { dg-do assemble { target aarch64_asm_sve_ok } } */ ++/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++/* Case 5.5: Interleaved repeating elements and trailing same elements. */ ++ ++#include ++ ++typedef int32_t vnx4si __attribute__((vector_size (32))); ++ ++/* ++** foo: ++** mov (z[0-9]+\.s), w1 ++** insr \1, w0 ++** mov (z[0-9]+\.s), w2 ++** zip1 \1, \1, \2 ++** ... ++*/ ++__attribute__((noipa)) ++vnx4si foo(int a, int b, int f) ++{ ++ return (vnx4si) { a, f, b, f, b, f, b, f }; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_11_run.c b/gcc/testsuite/gcc.target/aarch64/sve/init_11_run.c +new file mode 100644 +index 000000000..437155581 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/init_11_run.c +@@ -0,0 +1,20 @@ ++/* { dg-do run { target aarch64_sve256_hw } } */ ++/* { dg-options "-O2 -msve-vector-bits=256" } */ ++ ++#include "init_11.c" ++ ++int main() ++{ ++ int a = 10; ++ int b = 11; ++ int f = 12; ++ ++ vnx4si v = foo (a, b, f); ++ int expected[] = { a, f, b, f, b, f, b, f }; ++ ++ for (int i = 0; i < 8; i++) ++ if (v[i] != expected[i]) ++ __builtin_abort (); ++ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_12.c b/gcc/testsuite/gcc.target/aarch64/sve/init_12.c +new file mode 100644 +index 000000000..bc698ddd3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/init_12.c +@@ -0,0 +1,26 @@ ++/* { dg-do assemble { target aarch64_asm_sve_ok } } */ ++/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++/* Case 5.5: Interleaved repeating elements and trailing same elements. */ ++ ++#include ++ ++typedef int32_t vnx4si __attribute__((vector_size (32))); ++ ++/* ++** foo: ++** fmov (s[0-9]+), w1 ++** mov (z[0-9]+\.s), w2 ++** mov (z[0-9]+\.s), w0 ++** insr \3, \1 ++** insr \3, \1 ++** insr \3, \1 ++** zip1 \3, \3, \2 ++** ... ++*/ ++__attribute__((noipa)) ++vnx4si foo(int a, int b, int f) ++{ ++ return (vnx4si) { b, f, b, f, b, f, a, f }; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_12_run.c b/gcc/testsuite/gcc.target/aarch64/sve/init_12_run.c +new file mode 100644 +index 000000000..5ce7edb1e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/init_12_run.c +@@ -0,0 +1,20 @@ ++/* { dg-do run { target aarch64_sve256_hw } } */ ++/* { dg-options "-O2 -msve-vector-bits=256" } */ ++ ++#include "init_12.c" ++ ++int main() ++{ ++ int a = 10; ++ int b = 11; ++ int f = 12; ++ ++ vnx4si v = foo (a, b, f); ++ int expected[] = { b, f, b, f, b, f, a, f }; ++ ++ for (int i = 0; i < 8; i++) ++ if (v[i] != expected[i]) ++ __builtin_abort (); ++ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_13.c b/gcc/testsuite/gcc.target/aarch64/sve/init_13.c +new file mode 100644 +index 000000000..eea417063 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/init_13.c +@@ -0,0 +1,17 @@ ++/* { dg-do assemble { target aarch64_asm_sve_ok } } */ ++/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++typedef float vnx4sf __attribute__((vector_size (32))); ++ ++/* ++** foo: ++** mov (z[0-9]+\.s), s0 ++** insr \1, wzr ++** ... ++*/ ++vnx4sf ++foo (float a) ++{ ++ return (vnx4sf) { 0.0f, a, a, a, a, a, a, a }; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/init_1_run.c +new file mode 100644 +index 000000000..824a5cbea +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/init_1_run.c +@@ -0,0 +1,19 @@ ++/* { dg-do run { target aarch64_sve256_hw } } */ ++/* { dg-options "-O2 -msve-vector-bits=256" } */ ++ ++#include "init_1.c" ++ ++int main() ++{ ++ int a = 10; ++ int b = 11; ++ ++ vnx4si v = foo (a, b); ++ int expected[] = { a, b, 1, 2, 3, 4, 5, 6 }; ++ ++ for (int i = 0; i < 8; i++) ++ if (v[i] != expected[i]) ++ __builtin_abort (); ++ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_2.c b/gcc/testsuite/gcc.target/aarch64/sve/init_2.c +new file mode 100644 +index 000000000..0a8aa8dec +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/init_2.c +@@ -0,0 +1,23 @@ ++/* { dg-do assemble { target aarch64_asm_sve_ok } } */ ++/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++/* Case 1.2: Trailing constants with repeating sequence. */ ++ ++#include ++ ++typedef int32_t vnx4si __attribute__((vector_size (32))); ++ ++/* ++** foo: ++** ... ++** ld1rd (z[0-9]+)\.d, p[0-9]+/z, \[x[0-9]+\] ++** insr \1\.s, w1 ++** insr \1\.s, w0 ++** ... ++*/ ++__attribute__((noipa)) ++vnx4si foo(int a, int b) ++{ ++ return (vnx4si) { a, b, 2, 3, 2, 3, 2, 3 }; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve/init_2_run.c +new file mode 100644 +index 000000000..86c191c77 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/init_2_run.c +@@ -0,0 +1,19 @@ ++/* { dg-do run { target aarch64_sve256_hw } } */ ++/* { dg-options "-O2 -msve-vector-bits=256" } */ ++ ++#include "init_2.c" ++ ++int main() ++{ ++ int a = 10; ++ int b = 11; ++ ++ vnx4si v = foo (a, b); ++ int expected[] = { a, b, 2, 3, 2, 3, 2, 3 }; ++ ++ for (int i = 0; i < 8; i++) ++ if (v[i] != expected[i]) ++ __builtin_abort (); ++ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_3.c b/gcc/testsuite/gcc.target/aarch64/sve/init_3.c +new file mode 100644 +index 000000000..4a418b633 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/init_3.c +@@ -0,0 +1,24 @@ ++/* { dg-do assemble { target aarch64_asm_sve_ok } } */ ++/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++/* Case 2.1: Leading constants with stepped sequence. */ ++ ++#include ++ ++typedef int32_t vnx4si __attribute__((vector_size (32))); ++ ++/* ++** foo: ++** index (z[0-9]+\.s), #6, #-1 ++** insr \1, w0 ++** insr \1, w1 ++** rev \1, \1 ++** ... ++*/ ++__attribute__((noipa)) ++vnx4si foo(int a, int b) ++{ ++ return (vnx4si) { 1, 2, 3, 4, 5, 6, a, b }; ++} ++ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve/init_3_run.c +new file mode 100644 +index 000000000..ce4de6950 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/init_3_run.c +@@ -0,0 +1,19 @@ ++/* { dg-do run { target aarch64_sve256_hw } } */ ++/* { dg-options "-O2 -msve-vector-bits=256" } */ ++ ++#include "init_3.c" ++ ++int main() ++{ ++ int a = 10; ++ int b = 11; ++ ++ vnx4si v = foo (a, b); ++ int expected[] = { 1, 2, 3, 4, 5, 6, a, b }; ++ ++ for (int i = 0; i < 8; i++) ++ if (v[i] != expected[i]) ++ __builtin_abort (); ++ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_4.c b/gcc/testsuite/gcc.target/aarch64/sve/init_4.c +new file mode 100644 +index 000000000..0fa99c151 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/init_4.c +@@ -0,0 +1,24 @@ ++/* { dg-do assemble { target aarch64_asm_sve_ok } } */ ++/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++/* Case 2.2: Leading constants with stepped sequence. */ ++ ++#include ++ ++typedef int32_t vnx4si __attribute__((vector_size (32))); ++ ++/* ++** foo: ++** ... ++** ld1rd (z[0-9]+)\.d, p[0-9]+/z, \[x[0-9]+\] ++** insr \1\.s, w1 ++** insr \1\.s, w0 ++** rev \1\.s, \1\.s ++** ... ++*/ ++__attribute__((noipa)) ++vnx4si foo(int a, int b) ++{ ++ return (vnx4si) { 3, 2, 3, 2, 3, 2, b, a }; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve/init_4_run.c +new file mode 100644 +index 000000000..defee421f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/init_4_run.c +@@ -0,0 +1,19 @@ ++/* { dg-do run { target aarch64_sve256_hw } } */ ++/* { dg-options "-O2 -msve-vector-bits=256" } */ ++ ++#include "init_4.c" ++ ++int main() ++{ ++ int a = 10; ++ int b = 11; ++ ++ vnx4si v = foo (a, b); ++ int expected[] = { 3, 2, 3, 2, 3, 2, b, a }; ++ ++ for (int i = 0; i < 8; i++) ++ if (v[i] != expected[i]) ++ __builtin_abort (); ++ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_5.c b/gcc/testsuite/gcc.target/aarch64/sve/init_5.c +new file mode 100644 +index 000000000..794e265c3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/init_5.c +@@ -0,0 +1,22 @@ ++/* { dg-do assemble { target aarch64_asm_sve_ok } } */ ++/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++/* Case 3: Trailing same element. */ ++ ++#include ++ ++typedef int32_t vnx4si __attribute__((vector_size (32))); ++ ++/* ++** foo: ++** mov (z[0-9]+\.s), w2 ++** insr \1, w1 ++** insr \1, w0 ++** ... ++*/ ++__attribute__((noipa)) ++vnx4si foo(int a, int b, int c) ++{ ++ return (vnx4si) { a, b, c, c, c, c, c, c }; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_5_run.c b/gcc/testsuite/gcc.target/aarch64/sve/init_5_run.c +new file mode 100644 +index 000000000..ba91d6fec +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/init_5_run.c +@@ -0,0 +1,20 @@ ++/* { dg-do run { target aarch64_sve256_hw } } */ ++/* { dg-options "-O2 -msve-vector-bits=256" } */ ++ ++#include "init_5.c" ++ ++int main() ++{ ++ int a = 10; ++ int b = 11; ++ int c = 12; ++ ++ vnx4si v = foo (a, b, c); ++ int expected[] = { a, b, c, c, c, c, c, c }; ++ ++ for (int i = 0; i < 8; i++) ++ if (v[i] != expected[i]) ++ __builtin_abort (); ++ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_6.c b/gcc/testsuite/gcc.target/aarch64/sve/init_6.c +new file mode 100644 +index 000000000..8443fc000 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/init_6.c +@@ -0,0 +1,23 @@ ++/* { dg-do assemble { target aarch64_asm_sve_ok } } */ ++/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++/* Case 3: Trailing same element. */ ++ ++#include ++ ++typedef int32_t vnx4si __attribute__((vector_size (32))); ++ ++/* ++** foo: ++** mov (z[0-9]+\.s), w2 ++** insr \1, w1 ++** insr \1, w0 ++** rev \1, \1 ++** ... ++*/ ++__attribute__((noipa)) ++vnx4si foo(int a, int b, int c) ++{ ++ return (vnx4si) { c, c, c, c, c, c, b, a }; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_6_run.c b/gcc/testsuite/gcc.target/aarch64/sve/init_6_run.c +new file mode 100644 +index 000000000..802b28f98 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/init_6_run.c +@@ -0,0 +1,20 @@ ++/* { dg-do run { target aarch64_sve256_hw } } */ ++/* { dg-options "-O2 -msve-vector-bits=256" } */ ++ ++#include "init_6.c" ++ ++int main() ++{ ++ int a = 10; ++ int b = 11; ++ int c = 12; ++ ++ vnx4si v = foo (a, b, c); ++ int expected[] = { c, c, c, c, c, c, b, a }; ++ ++ for (int i = 0; i < 8; i++) ++ if (v[i] != expected[i]) ++ __builtin_abort (); ++ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_7.c b/gcc/testsuite/gcc.target/aarch64/sve/init_7.c +new file mode 100644 +index 000000000..63dbbbe61 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/init_7.c +@@ -0,0 +1,27 @@ ++/* { dg-do assemble { target aarch64_asm_sve_ok } } */ ++/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++/* Case 5.1: All elements. */ ++ ++#include ++ ++typedef int32_t vnx4si __attribute__((vector_size (32))); ++ ++/* ++** foo: ++** mov (z[0-9]+\.s), w7 ++** insr \1, w6 ++** insr \1, w5 ++** insr \1, w4 ++** insr \1, w3 ++** insr \1, w2 ++** insr \1, w1 ++** insr \1, w0 ++** ... ++*/ ++__attribute__((noipa)) ++vnx4si foo(int a, int b, int c, int d, int e, int f, int g, int h) ++{ ++ return (vnx4si) { a, b, c, d, e, f, g, h }; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_7_run.c b/gcc/testsuite/gcc.target/aarch64/sve/init_7_run.c +new file mode 100644 +index 000000000..61fe28508 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/init_7_run.c +@@ -0,0 +1,25 @@ ++/* { dg-do run { target aarch64_sve256_hw } } */ ++/* { dg-options "-O2 -msve-vector-bits=256" } */ ++ ++#include "init_7.c" ++ ++int main() ++{ ++ int a = 10; ++ int b = 11; ++ int c = 12; ++ int d = 13; ++ int e = 14; ++ int f = 15; ++ int g = 16; ++ int h = 17; ++ ++ vnx4si v = foo (a, b, c, d, e, f, g, h); ++ int expected[] = { a, b, c, d, e, f, g, h }; ++ ++ for (int i = 0; i < 8; i++) ++ if (v[i] != expected[i]) ++ __builtin_abort (); ++ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_8.c b/gcc/testsuite/gcc.target/aarch64/sve/init_8.c +new file mode 100644 +index 000000000..9c2456785 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/init_8.c +@@ -0,0 +1,26 @@ ++/* { dg-do assemble { target aarch64_asm_sve_ok } } */ ++/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++/* Case 5.2: Interleaved elements and constants. */ ++ ++#include ++ ++typedef int32_t vnx4si __attribute__((vector_size (32))); ++ ++/* ++** foo: ++** ... ++** ld1w (z[0-9]+\.s), p[0-9]+/z, \[x[0-9]+\] ++** mov (z[0-9]+\.s), w3 ++** insr \2, w2 ++** insr \2, w1 ++** insr \2, w0 ++** zip1 \2, \2, \1 ++** ... ++*/ ++__attribute__((noipa)) ++vnx4si foo(int a, int b, int c, int d) ++{ ++ return (vnx4si) { a, 1, b, 2, c, 3, d, 4 }; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_8_run.c b/gcc/testsuite/gcc.target/aarch64/sve/init_8_run.c +new file mode 100644 +index 000000000..24a0a6e06 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/init_8_run.c +@@ -0,0 +1,21 @@ ++/* { dg-do run { target aarch64_sve256_hw } } */ ++/* { dg-options "-O2 -msve-vector-bits=256" } */ ++ ++#include "init_8.c" ++ ++int main() ++{ ++ int a = 10; ++ int b = 11; ++ int c = 12; ++ int d = 13; ++ ++ vnx4si v = foo (a, b, c, d); ++ int expected[] = { a, 1, b, 2, c, 3, d, 4 }; ++ ++ for (int i = 0; i < 8; i++) ++ if (v[i] != expected[i]) ++ __builtin_abort (); ++ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_9.c b/gcc/testsuite/gcc.target/aarch64/sve/init_9.c +new file mode 100644 +index 000000000..d22ab71e6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/init_9.c +@@ -0,0 +1,22 @@ ++/* { dg-do assemble { target aarch64_asm_sve_ok } } */ ++/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++/* Case 5.3: Repeated elements. */ ++ ++#include ++ ++typedef int32_t vnx4si __attribute__((vector_size (32))); ++ ++/* ++** foo: ++** mov (z[0-9]+\.s), w0 ++** mov (z[0-9]+\.s), w1 ++** zip1 \1, \1, \2 ++** ... ++*/ ++__attribute__((noipa)) ++vnx4si foo(int a, int b) ++{ ++ return (vnx4si) { a, b, a, b, a, b, a, b }; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_9_run.c b/gcc/testsuite/gcc.target/aarch64/sve/init_9_run.c +new file mode 100644 +index 000000000..636ae3b8b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/init_9_run.c +@@ -0,0 +1,19 @@ ++/* { dg-do run { target aarch64_sve256_hw } } */ ++/* { dg-options "-O2 -msve-vector-bits=256" } */ ++ ++#include "init_9.c" ++ ++int main() ++{ ++ int a = 10; ++ int b = 11; ++ ++ vnx4si v = foo (a, b); ++ int expected[] = { a, b, a, b, a, b, a, b }; ++ ++ for (int i = 0; i < 8; i++) ++ if (v[i] != expected[i]) ++ __builtin_abort (); ++ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/ld1r_2.c b/gcc/testsuite/gcc.target/aarch64/sve/ld1r_2.c +index 2e6b59ab4..e0e0f4ee6 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/ld1r_2.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/ld1r_2.c +@@ -28,22 +28,6 @@ + T (int64_t) + + #define FOR_EACH_LOAD_BROADCAST_IMM(T) \ +- T (int16_t, 129, imm_129) \ +- T (int32_t, 129, imm_129) \ +- T (int64_t, 129, imm_129) \ +- \ +- T (int16_t, -130, imm_m130) \ +- T (int32_t, -130, imm_m130) \ +- T (int64_t, -130, imm_m130) \ +- \ +- T (int16_t, 0x1234, imm_0x1234) \ +- T (int32_t, 0x1234, imm_0x1234) \ +- T (int64_t, 0x1234, imm_0x1234) \ +- \ +- T (int16_t, 0xFEDC, imm_0xFEDC) \ +- T (int32_t, 0xFEDC, imm_0xFEDC) \ +- T (int64_t, 0xFEDC, imm_0xFEDC) \ +- \ + T (int32_t, 0x12345678, imm_0x12345678) \ + T (int64_t, 0x12345678, imm_0x12345678) \ + \ +@@ -56,6 +40,6 @@ FOR_EACH_LOAD_BROADCAST (DEF_LOAD_BROADCAST) + FOR_EACH_LOAD_BROADCAST_IMM (DEF_LOAD_BROADCAST_IMM) + + /* { dg-final { scan-assembler-times {\tld1rb\tz[0-9]+\.b, p[0-7]/z, } 1 } } */ +-/* { dg-final { scan-assembler-times {\tld1rh\tz[0-9]+\.h, p[0-7]/z, } 5 } } */ +-/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, p[0-7]/z, } 7 } } */ +-/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, p[0-7]/z, } 8 } } */ ++/* { dg-final { scan-assembler-times {\tld1rh\tz[0-9]+\.h, p[0-7]/z, } 1 } } */ ++/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, p[0-7]/z, } 3 } } */ ++/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, p[0-7]/z, } 4 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/loop_add_4.c b/gcc/testsuite/gcc.target/aarch64/sve/loop_add_4.c +index 7f02497e8..9ead9c21b 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/loop_add_4.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/loop_add_4.c +@@ -68,7 +68,8 @@ TEST_ALL (LOOP) + /* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.s, w[0-9]+, w[0-9]+\n} 3 } } */ + /* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]+/z, \[x[0-9]+, x[0-9]+, lsl 2\]} 8 } } */ + /* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7]+, \[x[0-9]+, x[0-9]+, lsl 2\]} 8 } } */ +-/* { dg-final { scan-assembler-times {\tincw\tx[0-9]+\n} 8 } } */ ++/* 2 for the calculations of -17 and 17. */ ++/* { dg-final { scan-assembler-times {\tincw\tx[0-9]+\n} 10 } } */ + + /* { dg-final { scan-assembler-times {\tdecw\tz[0-9]+\.s, all, mul #16\n} 1 } } */ + /* { dg-final { scan-assembler-times {\tdecw\tz[0-9]+\.s, all, mul #15\n} 1 } } */ +@@ -85,7 +86,8 @@ TEST_ALL (LOOP) + /* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.d, x[0-9]+, x[0-9]+\n} 3 } } */ + /* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]+/z, \[x[0-9]+, x[0-9]+, lsl 3\]} 8 } } */ + /* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7]+, \[x[0-9]+, x[0-9]+, lsl 3\]} 8 } } */ +-/* { dg-final { scan-assembler-times {\tincd\tx[0-9]+\n} 8 } } */ ++/* 2 for the calculations of -17 and 17. */ ++/* { dg-final { scan-assembler-times {\tincd\tx[0-9]+\n} 10 } } */ + + /* { dg-final { scan-assembler-times {\tdecd\tz[0-9]+\.d, all, mul #16\n} 1 } } */ + /* { dg-final { scan-assembler-times {\tdecd\tz[0-9]+\.d, all, mul #15\n} 1 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/aarch64-sve-pcs.exp b/gcc/testsuite/gcc.target/aarch64/sve/pcs/aarch64-sve-pcs.exp +new file mode 100644 +index 000000000..745887593 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/aarch64-sve-pcs.exp +@@ -0,0 +1,52 @@ ++# Specific regression driver for AArch64 SVE. ++# Copyright (C) 2009-2019 Free Software Foundation, Inc. ++# Contributed by ARM Ltd. ++# ++# This file is part of GCC. ++# ++# GCC is free software; you can redistribute it and/or modify it ++# under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 3, or (at your option) ++# any later version. ++# ++# GCC is distributed in the hope that it will be useful, but ++# WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++# General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with GCC; see the file COPYING3. If not see ++# . */ ++ ++# GCC testsuite that uses the `dg.exp' driver. ++ ++# Exit immediately if this isn't an AArch64 target. ++if {![istarget aarch64*-*-*] } then { ++ return ++} ++ ++# Load support procs. ++load_lib gcc-dg.exp ++ ++# If a testcase doesn't have special options, use these. ++global DEFAULT_CFLAGS ++if ![info exists DEFAULT_CFLAGS] then { ++ set DEFAULT_CFLAGS " -ansi -pedantic-errors" ++} ++ ++# Initialize `dg'. ++dg-init ++ ++# Force SVE if we're not testing it already. ++if { [check_effective_target_aarch64_sve] } { ++ set sve_flags "" ++} else { ++ set sve_flags "-march=armv8.2-a+sve" ++} ++ ++# Main loop. ++dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.c]] \ ++ $sve_flags $DEFAULT_CFLAGS ++ ++# All done. ++dg-finish +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/annotate_1.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/annotate_1.c +new file mode 100644 +index 000000000..12ae76789 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/annotate_1.c +@@ -0,0 +1,112 @@ ++/* { dg-do compile } */ ++ ++#include ++ ++svbool_t ret_b (void) { return svptrue_b8 (); } ++ ++svint8_t ret_s8 (void) { return svdup_s8 (0); } ++svint16_t ret_s16 (void) { return svdup_s16 (0); } ++svint32_t ret_s32 (void) { return svdup_s32 (0); } ++svint64_t ret_s64 (void) { return svdup_s64 (0); } ++svuint8_t ret_u8 (void) { return svdup_u8 (0); } ++svuint16_t ret_u16 (void) { return svdup_u16 (0); } ++svuint32_t ret_u32 (void) { return svdup_u32 (0); } ++svuint64_t ret_u64 (void) { return svdup_u64 (0); } ++svbfloat16_t ret_bf16 (void) { return svundef_bf16 (); } ++svfloat16_t ret_f16 (void) { return svdup_f16 (0); } ++svfloat32_t ret_f32 (void) { return svdup_f32 (0); } ++svfloat64_t ret_f64 (void) { return svdup_f64 (0); } ++ ++svint8x2_t ret_s8x2 (void) { return svundef2_s8 (); } ++svint16x2_t ret_s16x2 (void) { return svundef2_s16 (); } ++svint32x2_t ret_s32x2 (void) { return svundef2_s32 (); } ++svint64x2_t ret_s64x2 (void) { return svundef2_s64 (); } ++svuint8x2_t ret_u8x2 (void) { return svundef2_u8 (); } ++svuint16x2_t ret_u16x2 (void) { return svundef2_u16 (); } ++svuint32x2_t ret_u32x2 (void) { return svundef2_u32 (); } ++svuint64x2_t ret_u64x2 (void) { return svundef2_u64 (); } ++svbfloat16x2_t ret_bf16x2 (void) { return svundef2_bf16 (); } ++svfloat16x2_t ret_f16x2 (void) { return svundef2_f16 (); } ++svfloat32x2_t ret_f32x2 (void) { return svundef2_f32 (); } ++svfloat64x2_t ret_f64x2 (void) { return svundef2_f64 (); } ++ ++svint8x3_t ret_s8x3 (void) { return svundef3_s8 (); } ++svint16x3_t ret_s16x3 (void) { return svundef3_s16 (); } ++svint32x3_t ret_s32x3 (void) { return svundef3_s32 (); } ++svint64x3_t ret_s64x3 (void) { return svundef3_s64 (); } ++svuint8x3_t ret_u8x3 (void) { return svundef3_u8 (); } ++svuint16x3_t ret_u16x3 (void) { return svundef3_u16 (); } ++svuint32x3_t ret_u32x3 (void) { return svundef3_u32 (); } ++svuint64x3_t ret_u64x3 (void) { return svundef3_u64 (); } ++svbfloat16x3_t ret_bf16x3 (void) { return svundef3_bf16 (); } ++svfloat16x3_t ret_f16x3 (void) { return svundef3_f16 (); } ++svfloat32x3_t ret_f32x3 (void) { return svundef3_f32 (); } ++svfloat64x3_t ret_f64x3 (void) { return svundef3_f64 (); } ++ ++svint8x4_t ret_s8x4 (void) { return svundef4_s8 (); } ++svint16x4_t ret_s16x4 (void) { return svundef4_s16 (); } ++svint32x4_t ret_s32x4 (void) { return svundef4_s32 (); } ++svint64x4_t ret_s64x4 (void) { return svundef4_s64 (); } ++svuint8x4_t ret_u8x4 (void) { return svundef4_u8 (); } ++svuint16x4_t ret_u16x4 (void) { return svundef4_u16 (); } ++svuint32x4_t ret_u32x4 (void) { return svundef4_u32 (); } ++svuint64x4_t ret_u64x4 (void) { return svundef4_u64 (); } ++svbfloat16x4_t ret_bf16x4 (void) { return svundef4_bf16 (); } ++svfloat16x4_t ret_f16x4 (void) { return svundef4_f16 (); } ++svfloat32x4_t ret_f32x4 (void) { return svundef4_f32 (); } ++svfloat64x4_t ret_f64x4 (void) { return svundef4_f64 (); } ++ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tret_b\n} } } */ ++ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tret_s8\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tret_s16\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tret_s32\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tret_s64\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tret_u8\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tret_u16\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tret_u32\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tret_u64\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tret_bf16\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tret_f16\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tret_f32\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tret_f64\n} } } */ ++ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tret_s8x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tret_s16x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tret_s32x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tret_s64x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tret_u8x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tret_u16x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tret_u32x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tret_u64x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tret_bf16x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tret_f16x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tret_f32x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tret_f64x2\n} } } */ ++ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tret_s8x3\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tret_s16x3\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tret_s16x3\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tret_s32x3\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tret_s64x3\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tret_u8x3\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tret_u16x3\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tret_u32x3\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tret_u64x3\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tret_bf16x3\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tret_f16x3\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tret_f32x3\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tret_f64x3\n} } } */ ++ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tret_s8x4\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tret_s16x4\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tret_s32x4\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tret_s64x4\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tret_u8x4\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tret_u16x4\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tret_u32x4\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tret_u64x4\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tret_bf16x4\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tret_f16x4\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tret_f32x4\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tret_f64x4\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/annotate_2.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/annotate_2.c +new file mode 100644 +index 000000000..9f0741e3c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/annotate_2.c +@@ -0,0 +1,111 @@ ++/* { dg-do compile } */ ++ ++#include ++ ++void fn_b (svbool_t x) {} ++ ++void fn_s8 (svint8_t x) {} ++void fn_s16 (svint16_t x) {} ++void fn_s32 (svint32_t x) {} ++void fn_s64 (svint64_t x) {} ++void fn_u8 (svuint8_t x) {} ++void fn_u16 (svuint16_t x) {} ++void fn_u32 (svuint32_t x) {} ++void fn_u64 (svuint64_t x) {} ++void fn_bf16 (svbfloat16_t x) {} ++void fn_f16 (svfloat16_t x) {} ++void fn_f32 (svfloat32_t x) {} ++void fn_f64 (svfloat64_t x) {} ++ ++void fn_s8x2 (svint8x2_t x) {} ++void fn_s16x2 (svint16x2_t x) {} ++void fn_s32x2 (svint32x2_t x) {} ++void fn_s64x2 (svint64x2_t x) {} ++void fn_u8x2 (svuint8x2_t x) {} ++void fn_u16x2 (svuint16x2_t x) {} ++void fn_u32x2 (svuint32x2_t x) {} ++void fn_u64x2 (svuint64x2_t x) {} ++void fn_bf16x2 (svbfloat16x2_t x) {} ++void fn_f16x2 (svfloat16x2_t x) {} ++void fn_f32x2 (svfloat32x2_t x) {} ++void fn_f64x2 (svfloat64x2_t x) {} ++ ++void fn_s8x3 (svint8x3_t x) {} ++void fn_s16x3 (svint16x3_t x) {} ++void fn_s32x3 (svint32x3_t x) {} ++void fn_s64x3 (svint64x3_t x) {} ++void fn_u8x3 (svuint8x3_t x) {} ++void fn_u16x3 (svuint16x3_t x) {} ++void fn_u32x3 (svuint32x3_t x) {} ++void fn_u64x3 (svuint64x3_t x) {} ++void fn_bf16x3 (svbfloat16x3_t x) {} ++void fn_f16x3 (svfloat16x3_t x) {} ++void fn_f32x3 (svfloat32x3_t x) {} ++void fn_f64x3 (svfloat64x3_t x) {} ++ ++void fn_s8x4 (svint8x4_t x) {} ++void fn_s16x4 (svint16x4_t x) {} ++void fn_s32x4 (svint32x4_t x) {} ++void fn_s64x4 (svint64x4_t x) {} ++void fn_u8x4 (svuint8x4_t x) {} ++void fn_u16x4 (svuint16x4_t x) {} ++void fn_u32x4 (svuint32x4_t x) {} ++void fn_u64x4 (svuint64x4_t x) {} ++void fn_bf16x4 (svbfloat16x4_t x) {} ++void fn_f16x4 (svfloat16x4_t x) {} ++void fn_f32x4 (svfloat32x4_t x) {} ++void fn_f64x4 (svfloat64x4_t x) {} ++ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_b\n} } } */ ++ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s8\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s16\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s32\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s64\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u8\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u16\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u32\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u64\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_bf16\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f16\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f32\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f64\n} } } */ ++ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s8x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s16x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s32x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s64x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u8x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u16x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u32x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u64x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_bf16x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f16x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f32x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f64x2\n} } } */ ++ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s8x3\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s16x3\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s32x3\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s64x3\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u8x3\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u16x3\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u32x3\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u64x3\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_bf16x3\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f16x3\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f32x3\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f64x3\n} } } */ ++ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s8x4\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s16x4\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s32x4\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s64x4\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u8x4\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u16x4\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u32x4\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u64x4\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_bf16x4\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f16x4\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f32x4\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f64x4\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/annotate_3.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/annotate_3.c +new file mode 100644 +index 000000000..42e7860ff +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/annotate_3.c +@@ -0,0 +1,107 @@ ++/* { dg-do compile } */ ++ ++#include ++ ++void fn_s8 (float d0, float d1, float d2, float d3, svint8_t x) {} ++void fn_s16 (float d0, float d1, float d2, float d3, svint16_t x) {} ++void fn_s32 (float d0, float d1, float d2, float d3, svint32_t x) {} ++void fn_s64 (float d0, float d1, float d2, float d3, svint64_t x) {} ++void fn_u8 (float d0, float d1, float d2, float d3, svuint8_t x) {} ++void fn_u16 (float d0, float d1, float d2, float d3, svuint16_t x) {} ++void fn_u32 (float d0, float d1, float d2, float d3, svuint32_t x) {} ++void fn_u64 (float d0, float d1, float d2, float d3, svuint64_t x) {} ++void fn_bf16 (float d0, float d1, float d2, float d3, svbfloat16_t x) {} ++void fn_f16 (float d0, float d1, float d2, float d3, svfloat16_t x) {} ++void fn_f32 (float d0, float d1, float d2, float d3, svfloat32_t x) {} ++void fn_f64 (float d0, float d1, float d2, float d3, svfloat64_t x) {} ++ ++void fn_s8x2 (float d0, float d1, float d2, float d3, svint8x2_t x) {} ++void fn_s16x2 (float d0, float d1, float d2, float d3, svint16x2_t x) {} ++void fn_s32x2 (float d0, float d1, float d2, float d3, svint32x2_t x) {} ++void fn_s64x2 (float d0, float d1, float d2, float d3, svint64x2_t x) {} ++void fn_u8x2 (float d0, float d1, float d2, float d3, svuint8x2_t x) {} ++void fn_u16x2 (float d0, float d1, float d2, float d3, svuint16x2_t x) {} ++void fn_u32x2 (float d0, float d1, float d2, float d3, svuint32x2_t x) {} ++void fn_u64x2 (float d0, float d1, float d2, float d3, svuint64x2_t x) {} ++void fn_bf16x2 (float d0, float d1, float d2, float d3, svbfloat16x2_t x) {} ++void fn_f16x2 (float d0, float d1, float d2, float d3, svfloat16x2_t x) {} ++void fn_f32x2 (float d0, float d1, float d2, float d3, svfloat32x2_t x) {} ++void fn_f64x2 (float d0, float d1, float d2, float d3, svfloat64x2_t x) {} ++ ++void fn_s8x3 (float d0, float d1, float d2, float d3, svint8x3_t x) {} ++void fn_s16x3 (float d0, float d1, float d2, float d3, svint16x3_t x) {} ++void fn_s32x3 (float d0, float d1, float d2, float d3, svint32x3_t x) {} ++void fn_s64x3 (float d0, float d1, float d2, float d3, svint64x3_t x) {} ++void fn_u8x3 (float d0, float d1, float d2, float d3, svuint8x3_t x) {} ++void fn_u16x3 (float d0, float d1, float d2, float d3, svuint16x3_t x) {} ++void fn_u32x3 (float d0, float d1, float d2, float d3, svuint32x3_t x) {} ++void fn_u64x3 (float d0, float d1, float d2, float d3, svuint64x3_t x) {} ++void fn_bf16x3 (float d0, float d1, float d2, float d3, svbfloat16x3_t x) {} ++void fn_f16x3 (float d0, float d1, float d2, float d3, svfloat16x3_t x) {} ++void fn_f32x3 (float d0, float d1, float d2, float d3, svfloat32x3_t x) {} ++void fn_f64x3 (float d0, float d1, float d2, float d3, svfloat64x3_t x) {} ++ ++void fn_s8x4 (float d0, float d1, float d2, float d3, svint8x4_t x) {} ++void fn_s16x4 (float d0, float d1, float d2, float d3, svint16x4_t x) {} ++void fn_s32x4 (float d0, float d1, float d2, float d3, svint32x4_t x) {} ++void fn_s64x4 (float d0, float d1, float d2, float d3, svint64x4_t x) {} ++void fn_u8x4 (float d0, float d1, float d2, float d3, svuint8x4_t x) {} ++void fn_u16x4 (float d0, float d1, float d2, float d3, svuint16x4_t x) {} ++void fn_u32x4 (float d0, float d1, float d2, float d3, svuint32x4_t x) {} ++void fn_u64x4 (float d0, float d1, float d2, float d3, svuint64x4_t x) {} ++void fn_bf16x4 (float d0, float d1, float d2, float d3, svbfloat16x4_t x) {} ++void fn_f16x4 (float d0, float d1, float d2, float d3, svfloat16x4_t x) {} ++void fn_f32x4 (float d0, float d1, float d2, float d3, svfloat32x4_t x) {} ++void fn_f64x4 (float d0, float d1, float d2, float d3, svfloat64x4_t x) {} ++ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s8\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s16\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s32\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s64\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u8\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u16\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u32\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u64\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_bf16\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f16\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f32\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f64\n} } } */ ++ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s8x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s16x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s32x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s64x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u8x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u16x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u32x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u64x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_bf16x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f16x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f32x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f64x2\n} } } */ ++ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s8x3\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s16x3\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s32x3\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s64x3\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u8x3\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u16x3\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u32x3\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u64x3\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_bf16x3\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f16x3\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f32x3\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f64x3\n} } } */ ++ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s8x4\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s16x4\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s32x4\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s64x4\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u8x4\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u16x4\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u32x4\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u64x4\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_bf16x4\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f16x4\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f32x4\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f64x4\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/annotate_4.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/annotate_4.c +new file mode 100644 +index 000000000..7e4438ed4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/annotate_4.c +@@ -0,0 +1,155 @@ ++/* { dg-do compile } */ ++ ++#include ++ ++void fn_s8 (float d0, float d1, float d2, float d3, ++ float d4, svint8_t x) {} ++void fn_s16 (float d0, float d1, float d2, float d3, ++ float d4, svint16_t x) {} ++void fn_s32 (float d0, float d1, float d2, float d3, ++ float d4, svint32_t x) {} ++void fn_s64 (float d0, float d1, float d2, float d3, ++ float d4, svint64_t x) {} ++void fn_u8 (float d0, float d1, float d2, float d3, ++ float d4, svuint8_t x) {} ++void fn_u16 (float d0, float d1, float d2, float d3, ++ float d4, svuint16_t x) {} ++void fn_u32 (float d0, float d1, float d2, float d3, ++ float d4, svuint32_t x) {} ++void fn_u64 (float d0, float d1, float d2, float d3, ++ float d4, svuint64_t x) {} ++void fn_bf16 (float d0, float d1, float d2, float d3, ++ float d4, svbfloat16_t x) {} ++void fn_f16 (float d0, float d1, float d2, float d3, ++ float d4, svfloat16_t x) {} ++void fn_f32 (float d0, float d1, float d2, float d3, ++ float d4, svfloat32_t x) {} ++void fn_f64 (float d0, float d1, float d2, float d3, ++ float d4, svfloat64_t x) {} ++ ++void fn_s8x2 (float d0, float d1, float d2, float d3, ++ float d4, svint8x2_t x) {} ++void fn_s16x2 (float d0, float d1, float d2, float d3, ++ float d4, svint16x2_t x) {} ++void fn_s32x2 (float d0, float d1, float d2, float d3, ++ float d4, svint32x2_t x) {} ++void fn_s64x2 (float d0, float d1, float d2, float d3, ++ float d4, svint64x2_t x) {} ++void fn_u8x2 (float d0, float d1, float d2, float d3, ++ float d4, svuint8x2_t x) {} ++void fn_u16x2 (float d0, float d1, float d2, float d3, ++ float d4, svuint16x2_t x) {} ++void fn_u32x2 (float d0, float d1, float d2, float d3, ++ float d4, svuint32x2_t x) {} ++void fn_u64x2 (float d0, float d1, float d2, float d3, ++ float d4, svuint64x2_t x) {} ++void fn_bf16x2 (float d0, float d1, float d2, float d3, ++ float d4, svbfloat16x2_t x) {} ++void fn_f16x2 (float d0, float d1, float d2, float d3, ++ float d4, svfloat16x2_t x) {} ++void fn_f32x2 (float d0, float d1, float d2, float d3, ++ float d4, svfloat32x2_t x) {} ++void fn_f64x2 (float d0, float d1, float d2, float d3, ++ float d4, svfloat64x2_t x) {} ++ ++void fn_s8x3 (float d0, float d1, float d2, float d3, ++ float d4, svint8x3_t x) {} ++void fn_s16x3 (float d0, float d1, float d2, float d3, ++ float d4, svint16x3_t x) {} ++void fn_s32x3 (float d0, float d1, float d2, float d3, ++ float d4, svint32x3_t x) {} ++void fn_s64x3 (float d0, float d1, float d2, float d3, ++ float d4, svint64x3_t x) {} ++void fn_u8x3 (float d0, float d1, float d2, float d3, ++ float d4, svuint8x3_t x) {} ++void fn_u16x3 (float d0, float d1, float d2, float d3, ++ float d4, svuint16x3_t x) {} ++void fn_u32x3 (float d0, float d1, float d2, float d3, ++ float d4, svuint32x3_t x) {} ++void fn_u64x3 (float d0, float d1, float d2, float d3, ++ float d4, svuint64x3_t x) {} ++void fn_bf16x3 (float d0, float d1, float d2, float d3, ++ float d4, svbfloat16x3_t x) {} ++void fn_f16x3 (float d0, float d1, float d2, float d3, ++ float d4, svfloat16x3_t x) {} ++void fn_f32x3 (float d0, float d1, float d2, float d3, ++ float d4, svfloat32x3_t x) {} ++void fn_f64x3 (float d0, float d1, float d2, float d3, ++ float d4, svfloat64x3_t x) {} ++ ++void fn_s8x4 (float d0, float d1, float d2, float d3, ++ float d4, svint8x4_t x) {} ++void fn_s16x4 (float d0, float d1, float d2, float d3, ++ float d4, svint16x4_t x) {} ++void fn_s32x4 (float d0, float d1, float d2, float d3, ++ float d4, svint32x4_t x) {} ++void fn_s64x4 (float d0, float d1, float d2, float d3, ++ float d4, svint64x4_t x) {} ++void fn_u8x4 (float d0, float d1, float d2, float d3, ++ float d4, svuint8x4_t x) {} ++void fn_u16x4 (float d0, float d1, float d2, float d3, ++ float d4, svuint16x4_t x) {} ++void fn_u32x4 (float d0, float d1, float d2, float d3, ++ float d4, svuint32x4_t x) {} ++void fn_u64x4 (float d0, float d1, float d2, float d3, ++ float d4, svuint64x4_t x) {} ++void fn_bf16x4 (float d0, float d1, float d2, float d3, ++ float d4, svbfloat16x4_t x) {} ++void fn_f16x4 (float d0, float d1, float d2, float d3, ++ float d4, svfloat16x4_t x) {} ++void fn_f32x4 (float d0, float d1, float d2, float d3, ++ float d4, svfloat32x4_t x) {} ++void fn_f64x4 (float d0, float d1, float d2, float d3, ++ float d4, svfloat64x4_t x) {} ++ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s8\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s16\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s32\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s64\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u8\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u16\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u32\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u64\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_bf16\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f16\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f32\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f64\n} } } */ ++ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s8x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s16x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s32x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s64x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u8x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u16x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u32x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u64x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_bf16x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f16x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f32x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f64x2\n} } } */ ++ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s8x3\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s16x3\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s32x3\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s64x3\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u8x3\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u16x3\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u32x3\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u64x3\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_bf16x3\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f16x3\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f32x3\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f64x3\n} } } */ ++ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_s8x4\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_s16x4\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_s32x4\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_s64x4\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u8x4\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u16x4\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u32x4\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u64x4\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_bf16x4\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_f16x4\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_f32x4\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_f64x4\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/annotate_5.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/annotate_5.c +new file mode 100644 +index 000000000..6dadc0492 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/annotate_5.c +@@ -0,0 +1,155 @@ ++/* { dg-do compile } */ ++ ++#include ++ ++void fn_s8 (float d0, float d1, float d2, float d3, ++ float d4, float d5, svint8_t x) {} ++void fn_s16 (float d0, float d1, float d2, float d3, ++ float d4, float d5, svint16_t x) {} ++void fn_s32 (float d0, float d1, float d2, float d3, ++ float d4, float d5, svint32_t x) {} ++void fn_s64 (float d0, float d1, float d2, float d3, ++ float d4, float d5, svint64_t x) {} ++void fn_u8 (float d0, float d1, float d2, float d3, ++ float d4, float d5, svuint8_t x) {} ++void fn_u16 (float d0, float d1, float d2, float d3, ++ float d4, float d5, svuint16_t x) {} ++void fn_u32 (float d0, float d1, float d2, float d3, ++ float d4, float d5, svuint32_t x) {} ++void fn_u64 (float d0, float d1, float d2, float d3, ++ float d4, float d5, svuint64_t x) {} ++void fn_bf16 (float d0, float d1, float d2, float d3, ++ float d4, float d5, svbfloat16_t x) {} ++void fn_f16 (float d0, float d1, float d2, float d3, ++ float d4, float d5, svfloat16_t x) {} ++void fn_f32 (float d0, float d1, float d2, float d3, ++ float d4, float d5, svfloat32_t x) {} ++void fn_f64 (float d0, float d1, float d2, float d3, ++ float d4, float d5, svfloat64_t x) {} ++ ++void fn_s8x2 (float d0, float d1, float d2, float d3, ++ float d4, float d5, svint8x2_t x) {} ++void fn_s16x2 (float d0, float d1, float d2, float d3, ++ float d4, float d5, svint16x2_t x) {} ++void fn_s32x2 (float d0, float d1, float d2, float d3, ++ float d4, float d5, svint32x2_t x) {} ++void fn_s64x2 (float d0, float d1, float d2, float d3, ++ float d4, float d5, svint64x2_t x) {} ++void fn_u8x2 (float d0, float d1, float d2, float d3, ++ float d4, float d5, svuint8x2_t x) {} ++void fn_u16x2 (float d0, float d1, float d2, float d3, ++ float d4, float d5, svuint16x2_t x) {} ++void fn_u32x2 (float d0, float d1, float d2, float d3, ++ float d4, float d5, svuint32x2_t x) {} ++void fn_u64x2 (float d0, float d1, float d2, float d3, ++ float d4, float d5, svuint64x2_t x) {} ++void fn_bf16x2 (float d0, float d1, float d2, float d3, ++ float d4, float d5, svbfloat16x2_t x) {} ++void fn_f16x2 (float d0, float d1, float d2, float d3, ++ float d4, float d5, svfloat16x2_t x) {} ++void fn_f32x2 (float d0, float d1, float d2, float d3, ++ float d4, float d5, svfloat32x2_t x) {} ++void fn_f64x2 (float d0, float d1, float d2, float d3, ++ float d4, float d5, svfloat64x2_t x) {} ++ ++void fn_s8x3 (float d0, float d1, float d2, float d3, ++ float d4, float d5, svint8x3_t x) {} ++void fn_s16x3 (float d0, float d1, float d2, float d3, ++ float d4, float d5, svint16x3_t x) {} ++void fn_s32x3 (float d0, float d1, float d2, float d3, ++ float d4, float d5, svint32x3_t x) {} ++void fn_s64x3 (float d0, float d1, float d2, float d3, ++ float d4, float d5, svint64x3_t x) {} ++void fn_u8x3 (float d0, float d1, float d2, float d3, ++ float d4, float d5, svuint8x3_t x) {} ++void fn_u16x3 (float d0, float d1, float d2, float d3, ++ float d4, float d5, svuint16x3_t x) {} ++void fn_u32x3 (float d0, float d1, float d2, float d3, ++ float d4, float d5, svuint32x3_t x) {} ++void fn_u64x3 (float d0, float d1, float d2, float d3, ++ float d4, float d5, svuint64x3_t x) {} ++void fn_bf16x3 (float d0, float d1, float d2, float d3, ++ float d4, float d5, svbfloat16x3_t x) {} ++void fn_f16x3 (float d0, float d1, float d2, float d3, ++ float d4, float d5, svfloat16x3_t x) {} ++void fn_f32x3 (float d0, float d1, float d2, float d3, ++ float d4, float d5, svfloat32x3_t x) {} ++void fn_f64x3 (float d0, float d1, float d2, float d3, ++ float d4, float d5, svfloat64x3_t x) {} ++ ++void fn_s8x4 (float d0, float d1, float d2, float d3, ++ float d4, float d5, svint8x4_t x) {} ++void fn_s16x4 (float d0, float d1, float d2, float d3, ++ float d4, float d5, svint16x4_t x) {} ++void fn_s32x4 (float d0, float d1, float d2, float d3, ++ float d4, float d5, svint32x4_t x) {} ++void fn_s64x4 (float d0, float d1, float d2, float d3, ++ float d4, float d5, svint64x4_t x) {} ++void fn_u8x4 (float d0, float d1, float d2, float d3, ++ float d4, float d5, svuint8x4_t x) {} ++void fn_u16x4 (float d0, float d1, float d2, float d3, ++ float d4, float d5, svuint16x4_t x) {} ++void fn_u32x4 (float d0, float d1, float d2, float d3, ++ float d4, float d5, svuint32x4_t x) {} ++void fn_u64x4 (float d0, float d1, float d2, float d3, ++ float d4, float d5, svuint64x4_t x) {} ++void fn_bf16x4 (float d0, float d1, float d2, float d3, ++ float d4, float d5, svbfloat16x4_t x) {} ++void fn_f16x4 (float d0, float d1, float d2, float d3, ++ float d4, float d5, svfloat16x4_t x) {} ++void fn_f32x4 (float d0, float d1, float d2, float d3, ++ float d4, float d5, svfloat32x4_t x) {} ++void fn_f64x4 (float d0, float d1, float d2, float d3, ++ float d4, float d5, svfloat64x4_t x) {} ++ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s8\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s16\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s32\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s64\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u8\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u16\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u32\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u64\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_bf16\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f16\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f32\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f64\n} } } */ ++ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s8x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s16x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s32x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s64x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u8x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u16x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u32x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u64x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_bf16x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f16x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f32x2\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f64x2\n} } } */ ++ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_s8x3\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_s16x3\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_s32x3\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_s64x3\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u8x3\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u16x3\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u32x3\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u64x3\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_bf16x3\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_f16x3\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_f32x3\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_f64x3\n} } } */ ++ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_s8x4\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_s16x4\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_s32x4\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_s64x4\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u8x4\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u16x4\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u32x4\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u64x4\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_bf16x4\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_f16x4\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_f32x4\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_f64x4\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/annotate_6.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/annotate_6.c +new file mode 100644 +index 000000000..0ff73e259 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/annotate_6.c +@@ -0,0 +1,155 @@ ++/* { dg-do compile } */ ++ ++#include ++ ++void fn_s8 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, svint8_t x) {} ++void fn_s16 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, svint16_t x) {} ++void fn_s32 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, svint32_t x) {} ++void fn_s64 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, svint64_t x) {} ++void fn_u8 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, svuint8_t x) {} ++void fn_u16 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, svuint16_t x) {} ++void fn_u32 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, svuint32_t x) {} ++void fn_u64 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, svuint64_t x) {} ++void fn_bf16 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, svbfloat16_t x) {} ++void fn_f16 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, svfloat16_t x) {} ++void fn_f32 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, svfloat32_t x) {} ++void fn_f64 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, svfloat64_t x) {} ++ ++void fn_s8x2 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, svint8x2_t x) {} ++void fn_s16x2 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, svint16x2_t x) {} ++void fn_s32x2 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, svint32x2_t x) {} ++void fn_s64x2 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, svint64x2_t x) {} ++void fn_u8x2 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, svuint8x2_t x) {} ++void fn_u16x2 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, svuint16x2_t x) {} ++void fn_u32x2 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, svuint32x2_t x) {} ++void fn_u64x2 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, svuint64x2_t x) {} ++void fn_bf16x2 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, svbfloat16x2_t x) {} ++void fn_f16x2 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, svfloat16x2_t x) {} ++void fn_f32x2 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, svfloat32x2_t x) {} ++void fn_f64x2 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, svfloat64x2_t x) {} ++ ++void fn_s8x3 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, svint8x3_t x) {} ++void fn_s16x3 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, svint16x3_t x) {} ++void fn_s32x3 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, svint32x3_t x) {} ++void fn_s64x3 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, svint64x3_t x) {} ++void fn_u8x3 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, svuint8x3_t x) {} ++void fn_u16x3 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, svuint16x3_t x) {} ++void fn_u32x3 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, svuint32x3_t x) {} ++void fn_u64x3 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, svuint64x3_t x) {} ++void fn_bf16x3 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, svbfloat16x3_t x) {} ++void fn_f16x3 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, svfloat16x3_t x) {} ++void fn_f32x3 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, svfloat32x3_t x) {} ++void fn_f64x3 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, svfloat64x3_t x) {} ++ ++void fn_s8x4 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, svint8x4_t x) {} ++void fn_s16x4 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, svint16x4_t x) {} ++void fn_s32x4 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, svint32x4_t x) {} ++void fn_s64x4 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, svint64x4_t x) {} ++void fn_u8x4 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, svuint8x4_t x) {} ++void fn_u16x4 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, svuint16x4_t x) {} ++void fn_u32x4 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, svuint32x4_t x) {} ++void fn_u64x4 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, svuint64x4_t x) {} ++void fn_bf16x4 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, svbfloat16x4_t x) {} ++void fn_f16x4 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, svfloat16x4_t x) {} ++void fn_f32x4 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, svfloat32x4_t x) {} ++void fn_f64x4 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, svfloat64x4_t x) {} ++ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s8\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s16\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s32\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s64\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u8\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u16\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u32\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u64\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_bf16\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f16\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f32\n} } } */ ++/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f64\n} } } */ ++ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_s8x2\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_s16x2\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_s32x2\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_s64x2\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u8x2\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u16x2\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u32x2\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u64x2\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_bf16x2\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_f16x2\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_f32x2\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_f64x2\n} } } */ ++ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_s8x3\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_s16x3\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_s32x3\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_s64x3\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u8x3\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u16x3\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u32x3\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u64x3\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_bf16x3\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_f16x3\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_f32x3\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_f64x3\n} } } */ ++ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_s8x4\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_s16x4\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_s32x4\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_s64x4\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u8x4\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u16x4\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u32x4\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u64x4\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_bf16x4\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_f16x4\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_f32x4\n} } } */ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_f64x4\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/annotate_7.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/annotate_7.c +new file mode 100644 +index 000000000..4f3ff8107 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/annotate_7.c +@@ -0,0 +1,105 @@ ++/* { dg-do compile } */ ++ ++#include ++ ++void fn_s8 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, float d7, svint8_t x) {} ++void fn_s16 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, float d7, svint16_t x) {} ++void fn_s32 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, float d7, svint32_t x) {} ++void fn_s64 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, float d7, svint64_t x) {} ++void fn_u8 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, float d7, svuint8_t x) {} ++void fn_u16 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, float d7, svuint16_t x) {} ++void fn_u32 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, float d7, svuint32_t x) {} ++void fn_u64 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, float d7, svuint64_t x) {} ++void fn_bf16 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, float d7, svbfloat16_t x) {} ++void fn_f16 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, float d7, svfloat16_t x) {} ++void fn_f32 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, float d7, svfloat32_t x) {} ++void fn_f64 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, float d7, svfloat64_t x) {} ++ ++void fn_s8x2 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, float d7, svint8x2_t x) {} ++void fn_s16x2 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, float d7, svint16x2_t x) {} ++void fn_s32x2 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, float d7, svint32x2_t x) {} ++void fn_s64x2 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, float d7, svint64x2_t x) {} ++void fn_u8x2 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, float d7, svuint8x2_t x) {} ++void fn_u16x2 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, float d7, svuint16x2_t x) {} ++void fn_u32x2 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, float d7, svuint32x2_t x) {} ++void fn_u64x2 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, float d7, svuint64x2_t x) {} ++void fn_bf16x2 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, float d7, svbfloat16x2_t x) {} ++void fn_f16x2 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, float d7, svfloat16x2_t x) {} ++void fn_f32x2 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, float d7, svfloat32x2_t x) {} ++void fn_f64x2 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, float d7, svfloat64x2_t x) {} ++ ++void fn_s8x3 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, float d7, svint8x3_t x) {} ++void fn_s16x3 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, float d7, svint16x3_t x) {} ++void fn_s32x3 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, float d7, svint32x3_t x) {} ++void fn_s64x3 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, float d7, svint64x3_t x) {} ++void fn_u8x3 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, float d7, svuint8x3_t x) {} ++void fn_u16x3 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, float d7, svuint16x3_t x) {} ++void fn_u32x3 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, float d7, svuint32x3_t x) {} ++void fn_u64x3 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, float d7, svuint64x3_t x) {} ++void fn_bf16x3 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, float d7, svbfloat16x3_t x) {} ++void fn_f16x3 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, float d7, svfloat16x3_t x) {} ++void fn_f32x3 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, float d7, svfloat32x3_t x) {} ++void fn_f64x3 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, float d7, svfloat64x3_t x) {} ++ ++void fn_s8x4 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, float d7, svint8x4_t x) {} ++void fn_s16x4 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, float d7, svint16x4_t x) {} ++void fn_s32x4 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, float d7, svint32x4_t x) {} ++void fn_s64x4 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, float d7, svint64x4_t x) {} ++void fn_u8x4 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, float d7, svuint8x4_t x) {} ++void fn_u16x4 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, float d7, svuint16x4_t x) {} ++void fn_u32x4 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, float d7, svuint32x4_t x) {} ++void fn_u64x4 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, float d7, svuint64x4_t x) {} ++void fn_bf16x4 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, float d7, svbfloat16x4_t x) {} ++void fn_f16x4 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, float d7, svfloat16x4_t x) {} ++void fn_f32x4 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, float d7, svfloat32x4_t x) {} ++void fn_f64x4 (float d0, float d1, float d2, float d3, ++ float d4, float d5, float d6, float d7, svfloat64x4_t x) {} ++ ++/* { dg-final { scan-assembler-not {\t\.variant_pcs\t\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_1.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_1.c +new file mode 100644 +index 000000000..fd9932e2e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_1.c +@@ -0,0 +1,49 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -g" } */ ++/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ ++ ++#include ++ ++/* ++** callee_pred: ++** ldr (p[0-9]+), \[x0\] ++** ldr (p[0-9]+), \[x1\] ++** brkpa (p[0-7])\.b, p0/z, p1\.b, p2\.b ++** brkpb (p[0-7])\.b, \3/z, p3\.b, \1\.b ++** brka p0\.b, \4/z, \2\.b ++** ret ++*/ ++__SVBool_t __attribute__((noipa)) ++callee_pred (__SVBool_t p0, __SVBool_t p1, __SVBool_t p2, __SVBool_t p3, ++ __SVBool_t mem0, __SVBool_t mem1) ++{ ++ p0 = svbrkpa_z (p0, p1, p2); ++ p0 = svbrkpb_z (p0, p3, mem0); ++ return svbrka_z (p0, mem1); ++} ++ ++/* ++** caller_pred: ++** ... ++** ptrue (p[0-9]+)\.b, vl5 ++** str \1, \[x0\] ++** ... ++** ptrue (p[0-9]+)\.h, vl6 ++** str \2, \[x1\] ++** ptrue p3\.d, vl4 ++** ptrue p2\.s, vl3 ++** ptrue p1\.h, vl2 ++** ptrue p0\.b, vl1 ++** bl callee_pred ++** ... ++*/ ++__SVBool_t __attribute__((noipa)) ++caller_pred (void) ++{ ++ return callee_pred (svptrue_pat_b8 (SV_VL1), ++ svptrue_pat_b16 (SV_VL2), ++ svptrue_pat_b32 (SV_VL3), ++ svptrue_pat_b64 (SV_VL4), ++ svptrue_pat_b8 (SV_VL5), ++ svptrue_pat_b16 (SV_VL6)); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_10.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_10.c +new file mode 100644 +index 000000000..1bbcb770d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_10.c +@@ -0,0 +1,33 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#include ++ ++/* ++** callee: ++** fadd s0, (s0, s6|s6, s0) ++** ret ++*/ ++float __attribute__((noipa)) ++callee (float s0, double d1, svfloat32x4_t z2, svfloat64x4_t stack1, ++ float s6, double d7) ++{ ++ return s0 + s6; ++} ++ ++float __attribute__((noipa)) ++caller (float32_t *x0, float64_t *x1) ++{ ++ return callee (0.0f, 1.0, ++ svld4 (svptrue_b8 (), x0), ++ svld4 (svptrue_b8 (), x1), ++ 6.0f, 7.0); ++} ++ ++/* { dg-final { scan-assembler {\tld4w\t{z2\.s - z5\.s}, p[0-7]/z, \[x0\]\n} } } */ ++/* { dg-final { scan-assembler {\tld4d\t{z[0-9]+\.d - z[0-9]+\.d}, p[0-7]/z, \[x1\]\n} } } */ ++/* { dg-final { scan-assembler {\tmovi\tv0\.[24]s, #0\n} } } */ ++/* { dg-final { scan-assembler {\tfmov\td1, #?1\.0} } } */ ++/* { dg-final { scan-assembler {\tfmov\ts6, #?6\.0} } } */ ++/* { dg-final { scan-assembler {\tfmov\td7, #?7\.0} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_11_nosc.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_11_nosc.c +new file mode 100644 +index 000000000..0f62e0b08 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_11_nosc.c +@@ -0,0 +1,61 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O0 -g" } */ ++ ++#include ++ ++void __attribute__((noipa)) ++callee (svbool_t p, svint8_t s8, svuint16x4_t u16, svfloat32x3_t f32, ++ svint64x2_t s64) ++{ ++ svbool_t pg; ++ pg = svptrue_b8 (); ++ ++ if (svptest_any (pg, sveor_z (pg, p, svptrue_pat_b8 (SV_VL7)))) ++ __builtin_abort (); ++ ++ if (svptest_any (pg, svcmpne (pg, s8, svindex_s8 (1, 2)))) ++ __builtin_abort (); ++ ++ if (svptest_any (pg, svcmpne (pg, svget4 (u16, 0), svindex_u16 (2, 3)))) ++ __builtin_abort (); ++ ++ if (svptest_any (pg, svcmpne (pg, svget4 (u16, 1), svindex_u16 (3, 4)))) ++ __builtin_abort (); ++ ++ if (svptest_any (pg, svcmpne (pg, svget4 (u16, 2), svindex_u16 (4, 5)))) ++ __builtin_abort (); ++ ++ if (svptest_any (pg, svcmpne (pg, svget4 (u16, 3), svindex_u16 (5, 6)))) ++ __builtin_abort (); ++ ++ if (svptest_any (pg, svcmpne (pg, svget3 (f32, 0), svdup_f32 (1.0)))) ++ __builtin_abort (); ++ ++ if (svptest_any (pg, svcmpne (pg, svget3 (f32, 1), svdup_f32 (2.0)))) ++ __builtin_abort (); ++ ++ if (svptest_any (pg, svcmpne (pg, svget3 (f32, 2), svdup_f32 (3.0)))) ++ __builtin_abort (); ++ ++ if (svptest_any (pg, svcmpne (pg, svget2 (s64, 0), svindex_s64 (6, 7)))) ++ __builtin_abort (); ++ ++ if (svptest_any (pg, svcmpne (pg, svget2 (s64, 1), svindex_s64 (7, 8)))) ++ __builtin_abort (); ++} ++ ++int __attribute__((noipa)) ++main (void) ++{ ++ callee (svptrue_pat_b8 (SV_VL7), ++ svindex_s8 (1, 2), ++ svcreate4 (svindex_u16 (2, 3), ++ svindex_u16 (3, 4), ++ svindex_u16 (4, 5), ++ svindex_u16 (5, 6)), ++ svcreate3 (svdup_f32 (1.0), ++ svdup_f32 (2.0), ++ svdup_f32 (3.0)), ++ svcreate2 (svindex_s64 (6, 7), ++ svindex_s64 (7, 8))); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_11_sc.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_11_sc.c +new file mode 100644 +index 000000000..8a98d58ce +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_11_sc.c +@@ -0,0 +1,61 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O0 -fstack-clash-protection -g" } */ ++ ++#include ++ ++void __attribute__((noipa)) ++callee (svbool_t p, svint8_t s8, svuint16x4_t u16, svfloat32x3_t f32, ++ svint64x2_t s64) ++{ ++ svbool_t pg; ++ pg = svptrue_b8 (); ++ ++ if (svptest_any (pg, sveor_z (pg, p, svptrue_pat_b8 (SV_VL7)))) ++ __builtin_abort (); ++ ++ if (svptest_any (pg, svcmpne (pg, s8, svindex_s8 (1, 2)))) ++ __builtin_abort (); ++ ++ if (svptest_any (pg, svcmpne (pg, svget4 (u16, 0), svindex_u16 (2, 3)))) ++ __builtin_abort (); ++ ++ if (svptest_any (pg, svcmpne (pg, svget4 (u16, 1), svindex_u16 (3, 4)))) ++ __builtin_abort (); ++ ++ if (svptest_any (pg, svcmpne (pg, svget4 (u16, 2), svindex_u16 (4, 5)))) ++ __builtin_abort (); ++ ++ if (svptest_any (pg, svcmpne (pg, svget4 (u16, 3), svindex_u16 (5, 6)))) ++ __builtin_abort (); ++ ++ if (svptest_any (pg, svcmpne (pg, svget3 (f32, 0), svdup_f32 (1.0)))) ++ __builtin_abort (); ++ ++ if (svptest_any (pg, svcmpne (pg, svget3 (f32, 1), svdup_f32 (2.0)))) ++ __builtin_abort (); ++ ++ if (svptest_any (pg, svcmpne (pg, svget3 (f32, 2), svdup_f32 (3.0)))) ++ __builtin_abort (); ++ ++ if (svptest_any (pg, svcmpne (pg, svget2 (s64, 0), svindex_s64 (6, 7)))) ++ __builtin_abort (); ++ ++ if (svptest_any (pg, svcmpne (pg, svget2 (s64, 1), svindex_s64 (7, 8)))) ++ __builtin_abort (); ++} ++ ++int __attribute__((noipa)) ++main (void) ++{ ++ callee (svptrue_pat_b8 (SV_VL7), ++ svindex_s8 (1, 2), ++ svcreate4 (svindex_u16 (2, 3), ++ svindex_u16 (3, 4), ++ svindex_u16 (4, 5), ++ svindex_u16 (5, 6)), ++ svcreate3 (svdup_f32 (1.0), ++ svdup_f32 (2.0), ++ svdup_f32 (3.0)), ++ svcreate2 (svindex_s64 (6, 7), ++ svindex_s64 (7, 8))); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_2.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_2.c +new file mode 100644 +index 000000000..43a50887d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_2.c +@@ -0,0 +1,70 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ ++ ++#include ++ ++/* ++** callee_int: ++** ptrue p3\.b, all ++** ld1b (z(?:2[4-9]|3[0-1]).b), p3/z, \[x4\] ++** st1b \1, p2, \[x0\] ++** st1b z4\.b, p1, \[x0\] ++** st1h z5\.h, p1, \[x1\] ++** st1w z6\.s, p1, \[x2\] ++** st1d z7\.d, p1, \[x3\] ++** st1b z0\.b, p0, \[x0\] ++** st1h z1\.h, p0, \[x1\] ++** st1w z2\.s, p0, \[x2\] ++** st1d z3\.d, p0, \[x3\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee_int (int8_t *x0, int16_t *x1, int32_t *x2, int64_t *x3, ++ svint8_t z0, svint16_t z1, svint32_t z2, svint64_t z3, ++ svint8_t z4, svint16_t z5, svint32_t z6, svint64_t z7, ++ svint8_t z8, ++ svbool_t p0, svbool_t p1, svbool_t p2) ++{ ++ svst1 (p2, x0, z8); ++ svst1 (p1, x0, z4); ++ svst1 (p1, x1, z5); ++ svst1 (p1, x2, z6); ++ svst1 (p1, x3, z7); ++ svst1 (p0, x0, z0); ++ svst1 (p0, x1, z1); ++ svst1 (p0, x2, z2); ++ svst1 (p0, x3, z3); ++} ++ ++void __attribute__((noipa)) ++caller_int (int8_t *x0, int16_t *x1, int32_t *x2, int64_t *x3) ++{ ++ callee_int (x0, x1, x2, x3, ++ svdup_s8 (0), ++ svdup_s16 (1), ++ svdup_s32 (2), ++ svdup_s64 (3), ++ svdup_s8 (4), ++ svdup_s16 (5), ++ svdup_s32 (6), ++ svdup_s64 (7), ++ svdup_s8 (8), ++ svptrue_pat_b8 (SV_VL1), ++ svptrue_pat_b16 (SV_VL2), ++ svptrue_pat_b32 (SV_VL3)); ++} ++ ++/* { dg-final { scan-assembler {\tmov\tz0\.b, #0\n} } } */ ++/* { dg-final { scan-assembler {\tmov\tz1\.h, #1\n} } } */ ++/* { dg-final { scan-assembler {\tmov\tz2\.s, #2\n} } } */ ++/* { dg-final { scan-assembler {\tmov\tz3\.d, #3\n} } } */ ++/* { dg-final { scan-assembler {\tmov\tz4\.b, #4\n} } } */ ++/* { dg-final { scan-assembler {\tmov\tz5\.h, #5\n} } } */ ++/* { dg-final { scan-assembler {\tmov\tz6\.s, #6\n} } } */ ++/* { dg-final { scan-assembler {\tmov\tz7\.d, #7\n} } } */ ++/* { dg-final { scan-assembler {\tmov\tx4, sp\n} } } */ ++/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.b), #8\n.*\tst1b\t\1, p[0-7], \[x4\]\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_3.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_3.c +new file mode 100644 +index 000000000..49fdfc984 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_3.c +@@ -0,0 +1,70 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ ++ ++#include ++ ++/* ++** callee_uint: ++** ptrue p3\.b, all ++** ld1b (z(?:2[4-9]|3[0-1]).b), p3/z, \[x4\] ++** st1b \1, p2, \[x0\] ++** st1b z4\.b, p1, \[x0\] ++** st1h z5\.h, p1, \[x1\] ++** st1w z6\.s, p1, \[x2\] ++** st1d z7\.d, p1, \[x3\] ++** st1b z0\.b, p0, \[x0\] ++** st1h z1\.h, p0, \[x1\] ++** st1w z2\.s, p0, \[x2\] ++** st1d z3\.d, p0, \[x3\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee_uint (uint8_t *x0, uint16_t *x1, uint32_t *x2, uint64_t *x3, ++ svuint8_t z0, svuint16_t z1, svuint32_t z2, svuint64_t z3, ++ svuint8_t z4, svuint16_t z5, svuint32_t z6, svuint64_t z7, ++ svuint8_t z8, ++ svbool_t p0, svbool_t p1, svbool_t p2) ++{ ++ svst1 (p2, x0, z8); ++ svst1 (p1, x0, z4); ++ svst1 (p1, x1, z5); ++ svst1 (p1, x2, z6); ++ svst1 (p1, x3, z7); ++ svst1 (p0, x0, z0); ++ svst1 (p0, x1, z1); ++ svst1 (p0, x2, z2); ++ svst1 (p0, x3, z3); ++} ++ ++void __attribute__((noipa)) ++caller_uint (uint8_t *x0, uint16_t *x1, uint32_t *x2, uint64_t *x3) ++{ ++ callee_uint (x0, x1, x2, x3, ++ svdup_u8 (0), ++ svdup_u16 (1), ++ svdup_u32 (2), ++ svdup_u64 (3), ++ svdup_u8 (4), ++ svdup_u16 (5), ++ svdup_u32 (6), ++ svdup_u64 (7), ++ svdup_u8 (8), ++ svptrue_pat_b8 (SV_VL1), ++ svptrue_pat_b16 (SV_VL2), ++ svptrue_pat_b32 (SV_VL3)); ++} ++ ++/* { dg-final { scan-assembler {\tmov\tz0\.b, #0\n} } } */ ++/* { dg-final { scan-assembler {\tmov\tz1\.h, #1\n} } } */ ++/* { dg-final { scan-assembler {\tmov\tz2\.s, #2\n} } } */ ++/* { dg-final { scan-assembler {\tmov\tz3\.d, #3\n} } } */ ++/* { dg-final { scan-assembler {\tmov\tz4\.b, #4\n} } } */ ++/* { dg-final { scan-assembler {\tmov\tz5\.h, #5\n} } } */ ++/* { dg-final { scan-assembler {\tmov\tz6\.s, #6\n} } } */ ++/* { dg-final { scan-assembler {\tmov\tz7\.d, #7\n} } } */ ++/* { dg-final { scan-assembler {\tmov\tx4, sp\n} } } */ ++/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.b), #8\n.*\tst1b\t\1, p[0-7], \[x4\]\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_4.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_4.c +new file mode 100644 +index 000000000..4f15fdd50 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_4.c +@@ -0,0 +1,70 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ ++ ++#include ++ ++/* ++** callee_float: ++** ptrue p3\.b, all ++** ld1h (z(?:2[4-9]|3[0-1]).h), p3/z, \[x4\] ++** st1h \1, p2, \[x0\] ++** st1h z4\.h, p1, \[x0\] ++** st1h z5\.h, p1, \[x1\] ++** st1w z6\.s, p1, \[x2\] ++** st1d z7\.d, p1, \[x3\] ++** st1h z0\.h, p0, \[x0\] ++** st1h z1\.h, p0, \[x1\] ++** st1w z2\.s, p0, \[x2\] ++** st1d z3\.d, p0, \[x3\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee_float (float16_t *x0, float16_t *x1, float32_t *x2, float64_t *x3, ++ svfloat16_t z0, svfloat16_t z1, svfloat32_t z2, svfloat64_t z3, ++ svfloat16_t z4, svfloat16_t z5, svfloat32_t z6, svfloat64_t z7, ++ svfloat16_t z8, ++ svbool_t p0, svbool_t p1, svbool_t p2) ++{ ++ svst1 (p2, x0, z8); ++ svst1 (p1, x0, z4); ++ svst1 (p1, x1, z5); ++ svst1 (p1, x2, z6); ++ svst1 (p1, x3, z7); ++ svst1 (p0, x0, z0); ++ svst1 (p0, x1, z1); ++ svst1 (p0, x2, z2); ++ svst1 (p0, x3, z3); ++} ++ ++void __attribute__((noipa)) ++caller_float (float16_t *x0, float16_t *x1, float32_t *x2, float64_t *x3) ++{ ++ callee_float (x0, x1, x2, x3, ++ svdup_f16 (0), ++ svdup_f16 (1), ++ svdup_f32 (2), ++ svdup_f64 (3), ++ svdup_f16 (4), ++ svdup_f16 (5), ++ svdup_f32 (6), ++ svdup_f64 (7), ++ svdup_f16 (8), ++ svptrue_pat_b8 (SV_VL1), ++ svptrue_pat_b16 (SV_VL2), ++ svptrue_pat_b32 (SV_VL3)); ++} ++ ++/* { dg-final { scan-assembler {\tmov\tz0\.[bhsd], #0\n} } } */ ++/* { dg-final { scan-assembler {\tfmov\tz1\.h, #1\.0} } } */ ++/* { dg-final { scan-assembler {\tfmov\tz2\.s, #2\.0} } } */ ++/* { dg-final { scan-assembler {\tfmov\tz3\.d, #3\.0} } } */ ++/* { dg-final { scan-assembler {\tfmov\tz4\.h, #4\.0} } } */ ++/* { dg-final { scan-assembler {\tfmov\tz5\.h, #5\.0} } } */ ++/* { dg-final { scan-assembler {\tfmov\tz6\.s, #6\.0} } } */ ++/* { dg-final { scan-assembler {\tfmov\tz7\.d, #7\.0} } } */ ++/* { dg-final { scan-assembler {\tmov\tx4, sp\n} } } */ ++/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.h), #8\.0.*\tst1h\t\1, p[0-7], \[x4\]\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_bf16.c +new file mode 100644 +index 000000000..e9b63a45d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_bf16.c +@@ -0,0 +1,63 @@ ++/* { dg-do compile { target lp64 } } */ ++/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** callee: ++** addvl sp, sp, #-1 ++** str p4, \[sp\] ++** ptrue p4\.b, all ++** ( ++** ld1h (z[0-9]+\.h), p4/z, \[x1, #1, mul vl\] ++** ld1h (z[0-9]+\.h), p4/z, \[x1\] ++** st2h {\2 - \1}, p0, \[x0\] ++** | ++** ld1h (z[0-9]+\.h), p4/z, \[x1\] ++** ld1h (z[0-9]+\.h), p4/z, \[x1, #1, mul vl\] ++** st2h {\3 - \4}, p0, \[x0\] ++** ) ++** st4h {z0\.h - z3\.h}, p1, \[x0\] ++** st3h {z4\.h - z6\.h}, p2, \[x0\] ++** st1h z7\.h, p3, \[x0\] ++** ldr p4, \[sp\] ++** addvl sp, sp, #1 ++** ret ++*/ ++void __attribute__((noipa)) ++callee (void *x0, svbfloat16x4_t z0, svbfloat16x3_t z4, svbfloat16x2_t stack, ++ svbfloat16_t z7, svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) ++{ ++ svst2 (p0, x0, stack); ++ svst4 (p1, x0, z0); ++ svst3 (p2, x0, z4); ++ svst1_bf16 (p3, x0, z7); ++} ++ ++void __attribute__((noipa)) ++caller (void *x0) ++{ ++ svbool_t pg; ++ pg = svptrue_b8 (); ++ callee (x0, ++ svld4_vnum_bf16 (pg, x0, -8), ++ svld3_vnum_bf16 (pg, x0, -3), ++ svld2_vnum_bf16 (pg, x0, 0), ++ svld1_vnum_bf16 (pg, x0, 2), ++ svptrue_pat_b8 (SV_VL1), ++ svptrue_pat_b16 (SV_VL2), ++ svptrue_pat_b32 (SV_VL3), ++ svptrue_pat_b64 (SV_VL4)); ++} ++ ++/* { dg-final { scan-assembler {\tld4h\t{z0\.h - z3\.h}, p[0-7]/z, \[x0, #-8, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld3h\t{z4\.h - z6\.h}, p[0-7]/z, \[x0, #-3, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld1h\tz7\.h, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tmov\tx1, sp\n} } } */ ++/* { dg-final { scan-assembler {\tld2h\t{(z[0-9]+\.h) - z[0-9]+\.h}.*\tst1h\t\1, p[0-7], \[x1\]\n} } } */ ++/* { dg-final { scan-assembler {\tld2h\t{z[0-9]+\.h - (z[0-9]+\.h)}.*\tst1h\t\1, p[0-7], \[x1, #1, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp3\.d, vl4\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_f16.c +new file mode 100644 +index 000000000..4152f9125 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_f16.c +@@ -0,0 +1,63 @@ ++/* { dg-do compile { target lp64 } } */ ++/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** callee: ++** addvl sp, sp, #-1 ++** str p4, \[sp\] ++** ptrue p4\.b, all ++** ( ++** ld1h (z[0-9]+\.h), p4/z, \[x1, #1, mul vl\] ++** ld1h (z[0-9]+\.h), p4/z, \[x1\] ++** st2h {\2 - \1}, p0, \[x0\] ++** | ++** ld1h (z[0-9]+\.h), p4/z, \[x1\] ++** ld1h (z[0-9]+\.h), p4/z, \[x1, #1, mul vl\] ++** st2h {\3 - \4}, p0, \[x0\] ++** ) ++** st4h {z0\.h - z3\.h}, p1, \[x0\] ++** st3h {z4\.h - z6\.h}, p2, \[x0\] ++** st1h z7\.h, p3, \[x0\] ++** ldr p4, \[sp\] ++** addvl sp, sp, #1 ++** ret ++*/ ++void __attribute__((noipa)) ++callee (void *x0, svfloat16x4_t z0, svfloat16x3_t z4, svfloat16x2_t stack, ++ svfloat16_t z7, svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) ++{ ++ svst2 (p0, x0, stack); ++ svst4 (p1, x0, z0); ++ svst3 (p2, x0, z4); ++ svst1_f16 (p3, x0, z7); ++} ++ ++void __attribute__((noipa)) ++caller (void *x0) ++{ ++ svbool_t pg; ++ pg = svptrue_b8 (); ++ callee (x0, ++ svld4_vnum_f16 (pg, x0, -8), ++ svld3_vnum_f16 (pg, x0, -3), ++ svld2_vnum_f16 (pg, x0, 0), ++ svld1_vnum_f16 (pg, x0, 2), ++ svptrue_pat_b8 (SV_VL1), ++ svptrue_pat_b16 (SV_VL2), ++ svptrue_pat_b32 (SV_VL3), ++ svptrue_pat_b64 (SV_VL4)); ++} ++ ++/* { dg-final { scan-assembler {\tld4h\t{z0\.h - z3\.h}, p[0-7]/z, \[x0, #-8, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld3h\t{z4\.h - z6\.h}, p[0-7]/z, \[x0, #-3, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld1h\tz7\.h, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tmov\tx1, sp\n} } } */ ++/* { dg-final { scan-assembler {\tld2h\t{(z[0-9]+\.h) - z[0-9]+\.h}.*\tst1h\t\1, p[0-7], \[x1\]\n} } } */ ++/* { dg-final { scan-assembler {\tld2h\t{z[0-9]+\.h - (z[0-9]+\.h)}.*\tst1h\t\1, p[0-7], \[x1, #1, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp3\.d, vl4\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_f32.c +new file mode 100644 +index 000000000..0f78fac79 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_f32.c +@@ -0,0 +1,63 @@ ++/* { dg-do compile { target lp64 } } */ ++/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** callee: ++** addvl sp, sp, #-1 ++** str p4, \[sp\] ++** ptrue p4\.b, all ++** ( ++** ld1w (z[0-9]+\.s), p4/z, \[x1, #1, mul vl\] ++** ld1w (z[0-9]+\.s), p4/z, \[x1\] ++** st2w {\2 - \1}, p0, \[x0\] ++** | ++** ld1w (z[0-9]+\.s), p4/z, \[x1\] ++** ld1w (z[0-9]+\.s), p4/z, \[x1, #1, mul vl\] ++** st2w {\3 - \4}, p0, \[x0\] ++** ) ++** st4w {z0\.s - z3\.s}, p1, \[x0\] ++** st3w {z4\.s - z6\.s}, p2, \[x0\] ++** st1w z7\.s, p3, \[x0\] ++** ldr p4, \[sp\] ++** addvl sp, sp, #1 ++** ret ++*/ ++void __attribute__((noipa)) ++callee (void *x0, svfloat32x4_t z0, svfloat32x3_t z4, svfloat32x2_t stack, ++ svfloat32_t z7, svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) ++{ ++ svst2 (p0, x0, stack); ++ svst4 (p1, x0, z0); ++ svst3 (p2, x0, z4); ++ svst1_f32 (p3, x0, z7); ++} ++ ++void __attribute__((noipa)) ++caller (void *x0) ++{ ++ svbool_t pg; ++ pg = svptrue_b8 (); ++ callee (x0, ++ svld4_vnum_f32 (pg, x0, -8), ++ svld3_vnum_f32 (pg, x0, -3), ++ svld2_vnum_f32 (pg, x0, 0), ++ svld1_vnum_f32 (pg, x0, 2), ++ svptrue_pat_b8 (SV_VL1), ++ svptrue_pat_b16 (SV_VL2), ++ svptrue_pat_b32 (SV_VL3), ++ svptrue_pat_b64 (SV_VL4)); ++} ++ ++/* { dg-final { scan-assembler {\tld4w\t{z0\.s - z3\.s}, p[0-7]/z, \[x0, #-8, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld3w\t{z4\.s - z6\.s}, p[0-7]/z, \[x0, #-3, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld1w\tz7\.s, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tmov\tx1, sp\n} } } */ ++/* { dg-final { scan-assembler {\tld2w\t{(z[0-9]+\.s) - z[0-9]+\.s}.*\tst1w\t\1, p[0-7], \[x1\]\n} } } */ ++/* { dg-final { scan-assembler {\tld2w\t{z[0-9]+\.s - (z[0-9]+\.s)}.*\tst1w\t\1, p[0-7], \[x1, #1, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp3\.d, vl4\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_f64.c +new file mode 100644 +index 000000000..fe832d0d0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_f64.c +@@ -0,0 +1,63 @@ ++/* { dg-do compile { target lp64 } } */ ++/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** callee: ++** addvl sp, sp, #-1 ++** str p4, \[sp\] ++** ptrue p4\.b, all ++** ( ++** ld1d (z[0-9]+\.d), p4/z, \[x1, #1, mul vl\] ++** ld1d (z[0-9]+\.d), p4/z, \[x1\] ++** st2d {\2 - \1}, p0, \[x0\] ++** | ++** ld1d (z[0-9]+\.d), p4/z, \[x1\] ++** ld1d (z[0-9]+\.d), p4/z, \[x1, #1, mul vl\] ++** st2d {\3 - \4}, p0, \[x0\] ++** ) ++** st4d {z0\.d - z3\.d}, p1, \[x0\] ++** st3d {z4\.d - z6\.d}, p2, \[x0\] ++** st1d z7\.d, p3, \[x0\] ++** ldr p4, \[sp\] ++** addvl sp, sp, #1 ++** ret ++*/ ++void __attribute__((noipa)) ++callee (void *x0, svfloat64x4_t z0, svfloat64x3_t z4, svfloat64x2_t stack, ++ svfloat64_t z7, svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) ++{ ++ svst2 (p0, x0, stack); ++ svst4 (p1, x0, z0); ++ svst3 (p2, x0, z4); ++ svst1_f64 (p3, x0, z7); ++} ++ ++void __attribute__((noipa)) ++caller (void *x0) ++{ ++ svbool_t pg; ++ pg = svptrue_b8 (); ++ callee (x0, ++ svld4_vnum_f64 (pg, x0, -8), ++ svld3_vnum_f64 (pg, x0, -3), ++ svld2_vnum_f64 (pg, x0, 0), ++ svld1_vnum_f64 (pg, x0, 2), ++ svptrue_pat_b8 (SV_VL1), ++ svptrue_pat_b16 (SV_VL2), ++ svptrue_pat_b32 (SV_VL3), ++ svptrue_pat_b64 (SV_VL4)); ++} ++ ++/* { dg-final { scan-assembler {\tld4d\t{z0\.d - z3\.d}, p[0-7]/z, \[x0, #-8, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld3d\t{z4\.d - z6\.d}, p[0-7]/z, \[x0, #-3, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld1d\tz7\.d, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tmov\tx1, sp\n} } } */ ++/* { dg-final { scan-assembler {\tld2d\t{(z[0-9]+\.d) - z[0-9]+\.d}.*\tst1d\t\1, p[0-7], \[x1\]\n} } } */ ++/* { dg-final { scan-assembler {\tld2d\t{z[0-9]+\.d - (z[0-9]+\.d)}.*\tst1d\t\1, p[0-7], \[x1, #1, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp3\.d, vl4\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_s16.c +new file mode 100644 +index 000000000..3f708e0f0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_s16.c +@@ -0,0 +1,63 @@ ++/* { dg-do compile { target lp64 } } */ ++/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** callee: ++** addvl sp, sp, #-1 ++** str p4, \[sp\] ++** ptrue p4\.b, all ++** ( ++** ld1h (z[0-9]+\.h), p4/z, \[x1, #1, mul vl\] ++** ld1h (z[0-9]+\.h), p4/z, \[x1\] ++** st2h {\2 - \1}, p0, \[x0\] ++** | ++** ld1h (z[0-9]+\.h), p4/z, \[x1\] ++** ld1h (z[0-9]+\.h), p4/z, \[x1, #1, mul vl\] ++** st2h {\3 - \4}, p0, \[x0\] ++** ) ++** st4h {z0\.h - z3\.h}, p1, \[x0\] ++** st3h {z4\.h - z6\.h}, p2, \[x0\] ++** st1h z7\.h, p3, \[x0\] ++** ldr p4, \[sp\] ++** addvl sp, sp, #1 ++** ret ++*/ ++void __attribute__((noipa)) ++callee (void *x0, svint16x4_t z0, svint16x3_t z4, svint16x2_t stack, ++ svint16_t z7, svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) ++{ ++ svst2 (p0, x0, stack); ++ svst4 (p1, x0, z0); ++ svst3 (p2, x0, z4); ++ svst1_s16 (p3, x0, z7); ++} ++ ++void __attribute__((noipa)) ++caller (void *x0) ++{ ++ svbool_t pg; ++ pg = svptrue_b8 (); ++ callee (x0, ++ svld4_vnum_s16 (pg, x0, -8), ++ svld3_vnum_s16 (pg, x0, -3), ++ svld2_vnum_s16 (pg, x0, 0), ++ svld1_vnum_s16 (pg, x0, 2), ++ svptrue_pat_b8 (SV_VL1), ++ svptrue_pat_b16 (SV_VL2), ++ svptrue_pat_b32 (SV_VL3), ++ svptrue_pat_b64 (SV_VL4)); ++} ++ ++/* { dg-final { scan-assembler {\tld4h\t{z0\.h - z3\.h}, p[0-7]/z, \[x0, #-8, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld3h\t{z4\.h - z6\.h}, p[0-7]/z, \[x0, #-3, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld1h\tz7\.h, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tmov\tx1, sp\n} } } */ ++/* { dg-final { scan-assembler {\tld2h\t{(z[0-9]+\.h) - z[0-9]+\.h}.*\tst1h\t\1, p[0-7], \[x1\]\n} } } */ ++/* { dg-final { scan-assembler {\tld2h\t{z[0-9]+\.h - (z[0-9]+\.h)}.*\tst1h\t\1, p[0-7], \[x1, #1, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp3\.d, vl4\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_s32.c +new file mode 100644 +index 000000000..8c57190ea +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_s32.c +@@ -0,0 +1,63 @@ ++/* { dg-do compile { target lp64 } } */ ++/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** callee: ++** addvl sp, sp, #-1 ++** str p4, \[sp\] ++** ptrue p4\.b, all ++** ( ++** ld1w (z[0-9]+\.s), p4/z, \[x1, #1, mul vl\] ++** ld1w (z[0-9]+\.s), p4/z, \[x1\] ++** st2w {\2 - \1}, p0, \[x0\] ++** | ++** ld1w (z[0-9]+\.s), p4/z, \[x1\] ++** ld1w (z[0-9]+\.s), p4/z, \[x1, #1, mul vl\] ++** st2w {\3 - \4}, p0, \[x0\] ++** ) ++** st4w {z0\.s - z3\.s}, p1, \[x0\] ++** st3w {z4\.s - z6\.s}, p2, \[x0\] ++** st1w z7\.s, p3, \[x0\] ++** ldr p4, \[sp\] ++** addvl sp, sp, #1 ++** ret ++*/ ++void __attribute__((noipa)) ++callee (void *x0, svint32x4_t z0, svint32x3_t z4, svint32x2_t stack, ++ svint32_t z7, svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) ++{ ++ svst2 (p0, x0, stack); ++ svst4 (p1, x0, z0); ++ svst3 (p2, x0, z4); ++ svst1_s32 (p3, x0, z7); ++} ++ ++void __attribute__((noipa)) ++caller (void *x0) ++{ ++ svbool_t pg; ++ pg = svptrue_b8 (); ++ callee (x0, ++ svld4_vnum_s32 (pg, x0, -8), ++ svld3_vnum_s32 (pg, x0, -3), ++ svld2_vnum_s32 (pg, x0, 0), ++ svld1_vnum_s32 (pg, x0, 2), ++ svptrue_pat_b8 (SV_VL1), ++ svptrue_pat_b16 (SV_VL2), ++ svptrue_pat_b32 (SV_VL3), ++ svptrue_pat_b64 (SV_VL4)); ++} ++ ++/* { dg-final { scan-assembler {\tld4w\t{z0\.s - z3\.s}, p[0-7]/z, \[x0, #-8, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld3w\t{z4\.s - z6\.s}, p[0-7]/z, \[x0, #-3, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld1w\tz7\.s, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tmov\tx1, sp\n} } } */ ++/* { dg-final { scan-assembler {\tld2w\t{(z[0-9]+\.s) - z[0-9]+\.s}.*\tst1w\t\1, p[0-7], \[x1\]\n} } } */ ++/* { dg-final { scan-assembler {\tld2w\t{z[0-9]+\.s - (z[0-9]+\.s)}.*\tst1w\t\1, p[0-7], \[x1, #1, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp3\.d, vl4\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_s64.c +new file mode 100644 +index 000000000..e60d049fb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_s64.c +@@ -0,0 +1,63 @@ ++/* { dg-do compile { target lp64 } } */ ++/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** callee: ++** addvl sp, sp, #-1 ++** str p4, \[sp\] ++** ptrue p4\.b, all ++** ( ++** ld1d (z[0-9]+\.d), p4/z, \[x1, #1, mul vl\] ++** ld1d (z[0-9]+\.d), p4/z, \[x1\] ++** st2d {\2 - \1}, p0, \[x0\] ++** | ++** ld1d (z[0-9]+\.d), p4/z, \[x1\] ++** ld1d (z[0-9]+\.d), p4/z, \[x1, #1, mul vl\] ++** st2d {\3 - \4}, p0, \[x0\] ++** ) ++** st4d {z0\.d - z3\.d}, p1, \[x0\] ++** st3d {z4\.d - z6\.d}, p2, \[x0\] ++** st1d z7\.d, p3, \[x0\] ++** ldr p4, \[sp\] ++** addvl sp, sp, #1 ++** ret ++*/ ++void __attribute__((noipa)) ++callee (void *x0, svint64x4_t z0, svint64x3_t z4, svint64x2_t stack, ++ svint64_t z7, svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) ++{ ++ svst2 (p0, x0, stack); ++ svst4 (p1, x0, z0); ++ svst3 (p2, x0, z4); ++ svst1_s64 (p3, x0, z7); ++} ++ ++void __attribute__((noipa)) ++caller (void *x0) ++{ ++ svbool_t pg; ++ pg = svptrue_b8 (); ++ callee (x0, ++ svld4_vnum_s64 (pg, x0, -8), ++ svld3_vnum_s64 (pg, x0, -3), ++ svld2_vnum_s64 (pg, x0, 0), ++ svld1_vnum_s64 (pg, x0, 2), ++ svptrue_pat_b8 (SV_VL1), ++ svptrue_pat_b16 (SV_VL2), ++ svptrue_pat_b32 (SV_VL3), ++ svptrue_pat_b64 (SV_VL4)); ++} ++ ++/* { dg-final { scan-assembler {\tld4d\t{z0\.d - z3\.d}, p[0-7]/z, \[x0, #-8, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld3d\t{z4\.d - z6\.d}, p[0-7]/z, \[x0, #-3, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld1d\tz7\.d, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tmov\tx1, sp\n} } } */ ++/* { dg-final { scan-assembler {\tld2d\t{(z[0-9]+\.d) - z[0-9]+\.d}.*\tst1d\t\1, p[0-7], \[x1\]\n} } } */ ++/* { dg-final { scan-assembler {\tld2d\t{z[0-9]+\.d - (z[0-9]+\.d)}.*\tst1d\t\1, p[0-7], \[x1, #1, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp3\.d, vl4\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_s8.c +new file mode 100644 +index 000000000..bc0058372 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_s8.c +@@ -0,0 +1,63 @@ ++/* { dg-do compile { target lp64 } } */ ++/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** callee: ++** addvl sp, sp, #-1 ++** str p4, \[sp\] ++** ptrue p4\.b, all ++** ( ++** ld1b (z[0-9]+\.b), p4/z, \[x1, #1, mul vl\] ++** ld1b (z[0-9]+\.b), p4/z, \[x1\] ++** st2b {\2 - \1}, p0, \[x0\] ++** | ++** ld1b (z[0-9]+\.b), p4/z, \[x1\] ++** ld1b (z[0-9]+\.b), p4/z, \[x1, #1, mul vl\] ++** st2b {\3 - \4}, p0, \[x0\] ++** ) ++** st4b {z0\.b - z3\.b}, p1, \[x0\] ++** st3b {z4\.b - z6\.b}, p2, \[x0\] ++** st1b z7\.b, p3, \[x0\] ++** ldr p4, \[sp\] ++** addvl sp, sp, #1 ++** ret ++*/ ++void __attribute__((noipa)) ++callee (void *x0, svint8x4_t z0, svint8x3_t z4, svint8x2_t stack, ++ svint8_t z7, svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) ++{ ++ svst2 (p0, x0, stack); ++ svst4 (p1, x0, z0); ++ svst3 (p2, x0, z4); ++ svst1_s8 (p3, x0, z7); ++} ++ ++void __attribute__((noipa)) ++caller (void *x0) ++{ ++ svbool_t pg; ++ pg = svptrue_b8 (); ++ callee (x0, ++ svld4_vnum_s8 (pg, x0, -8), ++ svld3_vnum_s8 (pg, x0, -3), ++ svld2_vnum_s8 (pg, x0, 0), ++ svld1_vnum_s8 (pg, x0, 2), ++ svptrue_pat_b8 (SV_VL1), ++ svptrue_pat_b16 (SV_VL2), ++ svptrue_pat_b32 (SV_VL3), ++ svptrue_pat_b64 (SV_VL4)); ++} ++ ++/* { dg-final { scan-assembler {\tld4b\t{z0\.b - z3\.b}, p[0-7]/z, \[x0, #-8, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld3b\t{z4\.b - z6\.b}, p[0-7]/z, \[x0, #-3, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld1b\tz7\.b, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tmov\tx1, sp\n} } } */ ++/* { dg-final { scan-assembler {\tld2b\t{(z[0-9]+\.b) - z[0-9]+\.b}.*\tst1b\t\1, p[0-7], \[x1\]\n} } } */ ++/* { dg-final { scan-assembler {\tld2b\t{z[0-9]+\.b - (z[0-9]+\.b)}.*\tst1b\t\1, p[0-7], \[x1, #1, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp3\.d, vl4\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_u16.c +new file mode 100644 +index 000000000..8aa651a41 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_u16.c +@@ -0,0 +1,63 @@ ++/* { dg-do compile { target lp64 } } */ ++/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** callee: ++** addvl sp, sp, #-1 ++** str p4, \[sp\] ++** ptrue p4\.b, all ++** ( ++** ld1h (z[0-9]+\.h), p4/z, \[x1, #1, mul vl\] ++** ld1h (z[0-9]+\.h), p4/z, \[x1\] ++** st2h {\2 - \1}, p0, \[x0\] ++** | ++** ld1h (z[0-9]+\.h), p4/z, \[x1\] ++** ld1h (z[0-9]+\.h), p4/z, \[x1, #1, mul vl\] ++** st2h {\3 - \4}, p0, \[x0\] ++** ) ++** st4h {z0\.h - z3\.h}, p1, \[x0\] ++** st3h {z4\.h - z6\.h}, p2, \[x0\] ++** st1h z7\.h, p3, \[x0\] ++** ldr p4, \[sp\] ++** addvl sp, sp, #1 ++** ret ++*/ ++void __attribute__((noipa)) ++callee (void *x0, svuint16x4_t z0, svuint16x3_t z4, svuint16x2_t stack, ++ svuint16_t z7, svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) ++{ ++ svst2 (p0, x0, stack); ++ svst4 (p1, x0, z0); ++ svst3 (p2, x0, z4); ++ svst1_u16 (p3, x0, z7); ++} ++ ++void __attribute__((noipa)) ++caller (void *x0) ++{ ++ svbool_t pg; ++ pg = svptrue_b8 (); ++ callee (x0, ++ svld4_vnum_u16 (pg, x0, -8), ++ svld3_vnum_u16 (pg, x0, -3), ++ svld2_vnum_u16 (pg, x0, 0), ++ svld1_vnum_u16 (pg, x0, 2), ++ svptrue_pat_b8 (SV_VL1), ++ svptrue_pat_b16 (SV_VL2), ++ svptrue_pat_b32 (SV_VL3), ++ svptrue_pat_b64 (SV_VL4)); ++} ++ ++/* { dg-final { scan-assembler {\tld4h\t{z0\.h - z3\.h}, p[0-7]/z, \[x0, #-8, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld3h\t{z4\.h - z6\.h}, p[0-7]/z, \[x0, #-3, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld1h\tz7\.h, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tmov\tx1, sp\n} } } */ ++/* { dg-final { scan-assembler {\tld2h\t{(z[0-9]+\.h) - z[0-9]+\.h}.*\tst1h\t\1, p[0-7], \[x1\]\n} } } */ ++/* { dg-final { scan-assembler {\tld2h\t{z[0-9]+\.h - (z[0-9]+\.h)}.*\tst1h\t\1, p[0-7], \[x1, #1, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp3\.d, vl4\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_u32.c +new file mode 100644 +index 000000000..9ea3066ed +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_u32.c +@@ -0,0 +1,63 @@ ++/* { dg-do compile { target lp64 } } */ ++/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** callee: ++** addvl sp, sp, #-1 ++** str p4, \[sp\] ++** ptrue p4\.b, all ++** ( ++** ld1w (z[0-9]+\.s), p4/z, \[x1, #1, mul vl\] ++** ld1w (z[0-9]+\.s), p4/z, \[x1\] ++** st2w {\2 - \1}, p0, \[x0\] ++** | ++** ld1w (z[0-9]+\.s), p4/z, \[x1\] ++** ld1w (z[0-9]+\.s), p4/z, \[x1, #1, mul vl\] ++** st2w {\3 - \4}, p0, \[x0\] ++** ) ++** st4w {z0\.s - z3\.s}, p1, \[x0\] ++** st3w {z4\.s - z6\.s}, p2, \[x0\] ++** st1w z7\.s, p3, \[x0\] ++** ldr p4, \[sp\] ++** addvl sp, sp, #1 ++** ret ++*/ ++void __attribute__((noipa)) ++callee (void *x0, svuint32x4_t z0, svuint32x3_t z4, svuint32x2_t stack, ++ svuint32_t z7, svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) ++{ ++ svst2 (p0, x0, stack); ++ svst4 (p1, x0, z0); ++ svst3 (p2, x0, z4); ++ svst1_u32 (p3, x0, z7); ++} ++ ++void __attribute__((noipa)) ++caller (void *x0) ++{ ++ svbool_t pg; ++ pg = svptrue_b8 (); ++ callee (x0, ++ svld4_vnum_u32 (pg, x0, -8), ++ svld3_vnum_u32 (pg, x0, -3), ++ svld2_vnum_u32 (pg, x0, 0), ++ svld1_vnum_u32 (pg, x0, 2), ++ svptrue_pat_b8 (SV_VL1), ++ svptrue_pat_b16 (SV_VL2), ++ svptrue_pat_b32 (SV_VL3), ++ svptrue_pat_b64 (SV_VL4)); ++} ++ ++/* { dg-final { scan-assembler {\tld4w\t{z0\.s - z3\.s}, p[0-7]/z, \[x0, #-8, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld3w\t{z4\.s - z6\.s}, p[0-7]/z, \[x0, #-3, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld1w\tz7\.s, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tmov\tx1, sp\n} } } */ ++/* { dg-final { scan-assembler {\tld2w\t{(z[0-9]+\.s) - z[0-9]+\.s}.*\tst1w\t\1, p[0-7], \[x1\]\n} } } */ ++/* { dg-final { scan-assembler {\tld2w\t{z[0-9]+\.s - (z[0-9]+\.s)}.*\tst1w\t\1, p[0-7], \[x1, #1, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp3\.d, vl4\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_u64.c +new file mode 100644 +index 000000000..b64f3b6d5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_u64.c +@@ -0,0 +1,63 @@ ++/* { dg-do compile { target lp64 } } */ ++/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** callee: ++** addvl sp, sp, #-1 ++** str p4, \[sp\] ++** ptrue p4\.b, all ++** ( ++** ld1d (z[0-9]+\.d), p4/z, \[x1, #1, mul vl\] ++** ld1d (z[0-9]+\.d), p4/z, \[x1\] ++** st2d {\2 - \1}, p0, \[x0\] ++** | ++** ld1d (z[0-9]+\.d), p4/z, \[x1\] ++** ld1d (z[0-9]+\.d), p4/z, \[x1, #1, mul vl\] ++** st2d {\3 - \4}, p0, \[x0\] ++** ) ++** st4d {z0\.d - z3\.d}, p1, \[x0\] ++** st3d {z4\.d - z6\.d}, p2, \[x0\] ++** st1d z7\.d, p3, \[x0\] ++** ldr p4, \[sp\] ++** addvl sp, sp, #1 ++** ret ++*/ ++void __attribute__((noipa)) ++callee (void *x0, svuint64x4_t z0, svuint64x3_t z4, svuint64x2_t stack, ++ svuint64_t z7, svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) ++{ ++ svst2 (p0, x0, stack); ++ svst4 (p1, x0, z0); ++ svst3 (p2, x0, z4); ++ svst1_u64 (p3, x0, z7); ++} ++ ++void __attribute__((noipa)) ++caller (void *x0) ++{ ++ svbool_t pg; ++ pg = svptrue_b8 (); ++ callee (x0, ++ svld4_vnum_u64 (pg, x0, -8), ++ svld3_vnum_u64 (pg, x0, -3), ++ svld2_vnum_u64 (pg, x0, 0), ++ svld1_vnum_u64 (pg, x0, 2), ++ svptrue_pat_b8 (SV_VL1), ++ svptrue_pat_b16 (SV_VL2), ++ svptrue_pat_b32 (SV_VL3), ++ svptrue_pat_b64 (SV_VL4)); ++} ++ ++/* { dg-final { scan-assembler {\tld4d\t{z0\.d - z3\.d}, p[0-7]/z, \[x0, #-8, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld3d\t{z4\.d - z6\.d}, p[0-7]/z, \[x0, #-3, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld1d\tz7\.d, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tmov\tx1, sp\n} } } */ ++/* { dg-final { scan-assembler {\tld2d\t{(z[0-9]+\.d) - z[0-9]+\.d}.*\tst1d\t\1, p[0-7], \[x1\]\n} } } */ ++/* { dg-final { scan-assembler {\tld2d\t{z[0-9]+\.d - (z[0-9]+\.d)}.*\tst1d\t\1, p[0-7], \[x1, #1, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp3\.d, vl4\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_u8.c +new file mode 100644 +index 000000000..5575673ae +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_u8.c +@@ -0,0 +1,63 @@ ++/* { dg-do compile { target lp64 } } */ ++/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** callee: ++** addvl sp, sp, #-1 ++** str p4, \[sp\] ++** ptrue p4\.b, all ++** ( ++** ld1b (z[0-9]+\.b), p4/z, \[x1, #1, mul vl\] ++** ld1b (z[0-9]+\.b), p4/z, \[x1\] ++** st2b {\2 - \1}, p0, \[x0\] ++** | ++** ld1b (z[0-9]+\.b), p4/z, \[x1\] ++** ld1b (z[0-9]+\.b), p4/z, \[x1, #1, mul vl\] ++** st2b {\3 - \4}, p0, \[x0\] ++** ) ++** st4b {z0\.b - z3\.b}, p1, \[x0\] ++** st3b {z4\.b - z6\.b}, p2, \[x0\] ++** st1b z7\.b, p3, \[x0\] ++** ldr p4, \[sp\] ++** addvl sp, sp, #1 ++** ret ++*/ ++void __attribute__((noipa)) ++callee (void *x0, svuint8x4_t z0, svuint8x3_t z4, svuint8x2_t stack, ++ svuint8_t z7, svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) ++{ ++ svst2 (p0, x0, stack); ++ svst4 (p1, x0, z0); ++ svst3 (p2, x0, z4); ++ svst1_u8 (p3, x0, z7); ++} ++ ++void __attribute__((noipa)) ++caller (void *x0) ++{ ++ svbool_t pg; ++ pg = svptrue_b8 (); ++ callee (x0, ++ svld4_vnum_u8 (pg, x0, -8), ++ svld3_vnum_u8 (pg, x0, -3), ++ svld2_vnum_u8 (pg, x0, 0), ++ svld1_vnum_u8 (pg, x0, 2), ++ svptrue_pat_b8 (SV_VL1), ++ svptrue_pat_b16 (SV_VL2), ++ svptrue_pat_b32 (SV_VL3), ++ svptrue_pat_b64 (SV_VL4)); ++} ++ ++/* { dg-final { scan-assembler {\tld4b\t{z0\.b - z3\.b}, p[0-7]/z, \[x0, #-8, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld3b\t{z4\.b - z6\.b}, p[0-7]/z, \[x0, #-3, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld1b\tz7\.b, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tmov\tx1, sp\n} } } */ ++/* { dg-final { scan-assembler {\tld2b\t{(z[0-9]+\.b) - z[0-9]+\.b}.*\tst1b\t\1, p[0-7], \[x1\]\n} } } */ ++/* { dg-final { scan-assembler {\tld2b\t{z[0-9]+\.b - (z[0-9]+\.b)}.*\tst1b\t\1, p[0-7], \[x1, #1, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp3\.d, vl4\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_bf16.c +new file mode 100644 +index 000000000..94d84df4a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_bf16.c +@@ -0,0 +1,58 @@ ++/* { dg-do compile { target lp64 } } */ ++/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** callee: ++** ( ++** ldr (z[0-9]+), \[x1, #1, mul vl\] ++** ldr (z[0-9]+), \[x1\] ++** st2h {\2\.h - \1\.h}, p0, \[x0\] ++** | ++** ldr (z[0-9]+), \[x1\] ++** ldr (z[0-9]+), \[x1, #1, mul vl\] ++** st2h {\3\.h - \4\.h}, p0, \[x0\] ++** ) ++** st4h {z0\.h - z3\.h}, p1, \[x0\] ++** st3h {z4\.h - z6\.h}, p2, \[x0\] ++** st1h z7\.h, p3, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee (void *x0, svbfloat16x4_t z0, svbfloat16x3_t z4, svbfloat16x2_t stack, ++ svbfloat16_t z7, svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) ++{ ++ svst2 (p0, x0, stack); ++ svst4 (p1, x0, z0); ++ svst3 (p2, x0, z4); ++ svst1_bf16 (p3, x0, z7); ++} ++ ++void __attribute__((noipa)) ++caller (void *x0) ++{ ++ svbool_t pg; ++ pg = svptrue_b8 (); ++ callee (x0, ++ svld4_vnum_bf16 (pg, x0, -8), ++ svld3_vnum_bf16 (pg, x0, -3), ++ svld2_vnum_bf16 (pg, x0, 0), ++ svld1_vnum_bf16 (pg, x0, 2), ++ svptrue_pat_b8 (SV_VL1), ++ svptrue_pat_b16 (SV_VL2), ++ svptrue_pat_b32 (SV_VL3), ++ svptrue_pat_b64 (SV_VL4)); ++} ++ ++/* { dg-final { scan-assembler {\tld4h\t{z0\.h - z3\.h}, p[0-7]/z, \[x0, #-8, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld3h\t{z4\.h - z6\.h}, p[0-7]/z, \[x0, #-3, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld1h\tz7\.h, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tmov\tx1, sp\n} } } */ ++/* { dg-final { scan-assembler {\tld2h\t{(z[0-9]+)\.h - z[0-9]+\.h}.*\tstr\t\1, \[x1\]\n} } } */ ++/* { dg-final { scan-assembler {\tld2h\t{z[0-9]+\.h - (z[0-9]+)\.h}.*\tstr\t\1, \[x1, #1, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp3\.d, vl4\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_f16.c +new file mode 100644 +index 000000000..6271365c7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_f16.c +@@ -0,0 +1,58 @@ ++/* { dg-do compile { target lp64 } } */ ++/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** callee: ++** ( ++** ldr (z[0-9]+), \[x1, #1, mul vl\] ++** ldr (z[0-9]+), \[x1\] ++** st2h {\2\.h - \1\.h}, p0, \[x0\] ++** | ++** ldr (z[0-9]+), \[x1\] ++** ldr (z[0-9]+), \[x1, #1, mul vl\] ++** st2h {\3\.h - \4\.h}, p0, \[x0\] ++** ) ++** st4h {z0\.h - z3\.h}, p1, \[x0\] ++** st3h {z4\.h - z6\.h}, p2, \[x0\] ++** st1h z7\.h, p3, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee (void *x0, svfloat16x4_t z0, svfloat16x3_t z4, svfloat16x2_t stack, ++ svfloat16_t z7, svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) ++{ ++ svst2 (p0, x0, stack); ++ svst4 (p1, x0, z0); ++ svst3 (p2, x0, z4); ++ svst1_f16 (p3, x0, z7); ++} ++ ++void __attribute__((noipa)) ++caller (void *x0) ++{ ++ svbool_t pg; ++ pg = svptrue_b8 (); ++ callee (x0, ++ svld4_vnum_f16 (pg, x0, -8), ++ svld3_vnum_f16 (pg, x0, -3), ++ svld2_vnum_f16 (pg, x0, 0), ++ svld1_vnum_f16 (pg, x0, 2), ++ svptrue_pat_b8 (SV_VL1), ++ svptrue_pat_b16 (SV_VL2), ++ svptrue_pat_b32 (SV_VL3), ++ svptrue_pat_b64 (SV_VL4)); ++} ++ ++/* { dg-final { scan-assembler {\tld4h\t{z0\.h - z3\.h}, p[0-7]/z, \[x0, #-8, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld3h\t{z4\.h - z6\.h}, p[0-7]/z, \[x0, #-3, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld1h\tz7\.h, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tmov\tx1, sp\n} } } */ ++/* { dg-final { scan-assembler {\tld2h\t{(z[0-9]+)\.h - z[0-9]+\.h}.*\tstr\t\1, \[x1\]\n} } } */ ++/* { dg-final { scan-assembler {\tld2h\t{z[0-9]+\.h - (z[0-9]+)\.h}.*\tstr\t\1, \[x1, #1, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp3\.d, vl4\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_f32.c +new file mode 100644 +index 000000000..ef89de216 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_f32.c +@@ -0,0 +1,58 @@ ++/* { dg-do compile { target lp64 } } */ ++/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** callee: ++** ( ++** ldr (z[0-9]+), \[x1, #1, mul vl\] ++** ldr (z[0-9]+), \[x1\] ++** st2w {\2\.s - \1\.s}, p0, \[x0\] ++** | ++** ldr (z[0-9]+), \[x1\] ++** ldr (z[0-9]+), \[x1, #1, mul vl\] ++** st2w {\3\.s - \4\.s}, p0, \[x0\] ++** ) ++** st4w {z0\.s - z3\.s}, p1, \[x0\] ++** st3w {z4\.s - z6\.s}, p2, \[x0\] ++** st1w z7\.s, p3, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee (void *x0, svfloat32x4_t z0, svfloat32x3_t z4, svfloat32x2_t stack, ++ svfloat32_t z7, svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) ++{ ++ svst2 (p0, x0, stack); ++ svst4 (p1, x0, z0); ++ svst3 (p2, x0, z4); ++ svst1_f32 (p3, x0, z7); ++} ++ ++void __attribute__((noipa)) ++caller (void *x0) ++{ ++ svbool_t pg; ++ pg = svptrue_b8 (); ++ callee (x0, ++ svld4_vnum_f32 (pg, x0, -8), ++ svld3_vnum_f32 (pg, x0, -3), ++ svld2_vnum_f32 (pg, x0, 0), ++ svld1_vnum_f32 (pg, x0, 2), ++ svptrue_pat_b8 (SV_VL1), ++ svptrue_pat_b16 (SV_VL2), ++ svptrue_pat_b32 (SV_VL3), ++ svptrue_pat_b64 (SV_VL4)); ++} ++ ++/* { dg-final { scan-assembler {\tld4w\t{z0\.s - z3\.s}, p[0-7]/z, \[x0, #-8, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld3w\t{z4\.s - z6\.s}, p[0-7]/z, \[x0, #-3, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld1w\tz7\.s, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tmov\tx1, sp\n} } } */ ++/* { dg-final { scan-assembler {\tld2w\t{(z[0-9]+)\.s - z[0-9]+\.s}.*\tstr\t\1, \[x1\]\n} } } */ ++/* { dg-final { scan-assembler {\tld2w\t{z[0-9]+\.s - (z[0-9]+)\.s}.*\tstr\t\1, \[x1, #1, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp3\.d, vl4\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_f64.c +new file mode 100644 +index 000000000..4eddf2d1f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_f64.c +@@ -0,0 +1,58 @@ ++/* { dg-do compile { target lp64 } } */ ++/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** callee: ++** ( ++** ldr (z[0-9]+), \[x1, #1, mul vl\] ++** ldr (z[0-9]+), \[x1\] ++** st2d {\2\.d - \1\.d}, p0, \[x0\] ++** | ++** ldr (z[0-9]+), \[x1\] ++** ldr (z[0-9]+), \[x1, #1, mul vl\] ++** st2d {\3\.d - \4\.d}, p0, \[x0\] ++** ) ++** st4d {z0\.d - z3\.d}, p1, \[x0\] ++** st3d {z4\.d - z6\.d}, p2, \[x0\] ++** st1d z7\.d, p3, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee (void *x0, svfloat64x4_t z0, svfloat64x3_t z4, svfloat64x2_t stack, ++ svfloat64_t z7, svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) ++{ ++ svst2 (p0, x0, stack); ++ svst4 (p1, x0, z0); ++ svst3 (p2, x0, z4); ++ svst1_f64 (p3, x0, z7); ++} ++ ++void __attribute__((noipa)) ++caller (void *x0) ++{ ++ svbool_t pg; ++ pg = svptrue_b8 (); ++ callee (x0, ++ svld4_vnum_f64 (pg, x0, -8), ++ svld3_vnum_f64 (pg, x0, -3), ++ svld2_vnum_f64 (pg, x0, 0), ++ svld1_vnum_f64 (pg, x0, 2), ++ svptrue_pat_b8 (SV_VL1), ++ svptrue_pat_b16 (SV_VL2), ++ svptrue_pat_b32 (SV_VL3), ++ svptrue_pat_b64 (SV_VL4)); ++} ++ ++/* { dg-final { scan-assembler {\tld4d\t{z0\.d - z3\.d}, p[0-7]/z, \[x0, #-8, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld3d\t{z4\.d - z6\.d}, p[0-7]/z, \[x0, #-3, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld1d\tz7\.d, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tmov\tx1, sp\n} } } */ ++/* { dg-final { scan-assembler {\tld2d\t{(z[0-9]+)\.d - z[0-9]+\.d}.*\tstr\t\1, \[x1\]\n} } } */ ++/* { dg-final { scan-assembler {\tld2d\t{z[0-9]+\.d - (z[0-9]+)\.d}.*\tstr\t\1, \[x1, #1, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp3\.d, vl4\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_s16.c +new file mode 100644 +index 000000000..a4b6af071 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_s16.c +@@ -0,0 +1,58 @@ ++/* { dg-do compile { target lp64 } } */ ++/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** callee: ++** ( ++** ldr (z[0-9]+), \[x1, #1, mul vl\] ++** ldr (z[0-9]+), \[x1\] ++** st2h {\2\.h - \1\.h}, p0, \[x0\] ++** | ++** ldr (z[0-9]+), \[x1\] ++** ldr (z[0-9]+), \[x1, #1, mul vl\] ++** st2h {\3\.h - \4\.h}, p0, \[x0\] ++** ) ++** st4h {z0\.h - z3\.h}, p1, \[x0\] ++** st3h {z4\.h - z6\.h}, p2, \[x0\] ++** st1h z7\.h, p3, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee (void *x0, svint16x4_t z0, svint16x3_t z4, svint16x2_t stack, ++ svint16_t z7, svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) ++{ ++ svst2 (p0, x0, stack); ++ svst4 (p1, x0, z0); ++ svst3 (p2, x0, z4); ++ svst1_s16 (p3, x0, z7); ++} ++ ++void __attribute__((noipa)) ++caller (void *x0) ++{ ++ svbool_t pg; ++ pg = svptrue_b8 (); ++ callee (x0, ++ svld4_vnum_s16 (pg, x0, -8), ++ svld3_vnum_s16 (pg, x0, -3), ++ svld2_vnum_s16 (pg, x0, 0), ++ svld1_vnum_s16 (pg, x0, 2), ++ svptrue_pat_b8 (SV_VL1), ++ svptrue_pat_b16 (SV_VL2), ++ svptrue_pat_b32 (SV_VL3), ++ svptrue_pat_b64 (SV_VL4)); ++} ++ ++/* { dg-final { scan-assembler {\tld4h\t{z0\.h - z3\.h}, p[0-7]/z, \[x0, #-8, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld3h\t{z4\.h - z6\.h}, p[0-7]/z, \[x0, #-3, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld1h\tz7\.h, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tmov\tx1, sp\n} } } */ ++/* { dg-final { scan-assembler {\tld2h\t{(z[0-9]+)\.h - z[0-9]+\.h}.*\tstr\t\1, \[x1\]\n} } } */ ++/* { dg-final { scan-assembler {\tld2h\t{z[0-9]+\.h - (z[0-9]+)\.h}.*\tstr\t\1, \[x1, #1, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp3\.d, vl4\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_s32.c +new file mode 100644 +index 000000000..60b58d6fc +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_s32.c +@@ -0,0 +1,58 @@ ++/* { dg-do compile { target lp64 } } */ ++/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** callee: ++** ( ++** ldr (z[0-9]+), \[x1, #1, mul vl\] ++** ldr (z[0-9]+), \[x1\] ++** st2w {\2\.s - \1\.s}, p0, \[x0\] ++** | ++** ldr (z[0-9]+), \[x1\] ++** ldr (z[0-9]+), \[x1, #1, mul vl\] ++** st2w {\3\.s - \4\.s}, p0, \[x0\] ++** ) ++** st4w {z0\.s - z3\.s}, p1, \[x0\] ++** st3w {z4\.s - z6\.s}, p2, \[x0\] ++** st1w z7\.s, p3, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee (void *x0, svint32x4_t z0, svint32x3_t z4, svint32x2_t stack, ++ svint32_t z7, svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) ++{ ++ svst2 (p0, x0, stack); ++ svst4 (p1, x0, z0); ++ svst3 (p2, x0, z4); ++ svst1_s32 (p3, x0, z7); ++} ++ ++void __attribute__((noipa)) ++caller (void *x0) ++{ ++ svbool_t pg; ++ pg = svptrue_b8 (); ++ callee (x0, ++ svld4_vnum_s32 (pg, x0, -8), ++ svld3_vnum_s32 (pg, x0, -3), ++ svld2_vnum_s32 (pg, x0, 0), ++ svld1_vnum_s32 (pg, x0, 2), ++ svptrue_pat_b8 (SV_VL1), ++ svptrue_pat_b16 (SV_VL2), ++ svptrue_pat_b32 (SV_VL3), ++ svptrue_pat_b64 (SV_VL4)); ++} ++ ++/* { dg-final { scan-assembler {\tld4w\t{z0\.s - z3\.s}, p[0-7]/z, \[x0, #-8, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld3w\t{z4\.s - z6\.s}, p[0-7]/z, \[x0, #-3, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld1w\tz7\.s, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tmov\tx1, sp\n} } } */ ++/* { dg-final { scan-assembler {\tld2w\t{(z[0-9]+)\.s - z[0-9]+\.s}.*\tstr\t\1, \[x1\]\n} } } */ ++/* { dg-final { scan-assembler {\tld2w\t{z[0-9]+\.s - (z[0-9]+)\.s}.*\tstr\t\1, \[x1, #1, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp3\.d, vl4\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_s64.c +new file mode 100644 +index 000000000..b6126aa4c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_s64.c +@@ -0,0 +1,58 @@ ++/* { dg-do compile { target lp64 } } */ ++/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** callee: ++** ( ++** ldr (z[0-9]+), \[x1, #1, mul vl\] ++** ldr (z[0-9]+), \[x1\] ++** st2d {\2\.d - \1\.d}, p0, \[x0\] ++** | ++** ldr (z[0-9]+), \[x1\] ++** ldr (z[0-9]+), \[x1, #1, mul vl\] ++** st2d {\3\.d - \4\.d}, p0, \[x0\] ++** ) ++** st4d {z0\.d - z3\.d}, p1, \[x0\] ++** st3d {z4\.d - z6\.d}, p2, \[x0\] ++** st1d z7\.d, p3, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee (void *x0, svint64x4_t z0, svint64x3_t z4, svint64x2_t stack, ++ svint64_t z7, svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) ++{ ++ svst2 (p0, x0, stack); ++ svst4 (p1, x0, z0); ++ svst3 (p2, x0, z4); ++ svst1_s64 (p3, x0, z7); ++} ++ ++void __attribute__((noipa)) ++caller (void *x0) ++{ ++ svbool_t pg; ++ pg = svptrue_b8 (); ++ callee (x0, ++ svld4_vnum_s64 (pg, x0, -8), ++ svld3_vnum_s64 (pg, x0, -3), ++ svld2_vnum_s64 (pg, x0, 0), ++ svld1_vnum_s64 (pg, x0, 2), ++ svptrue_pat_b8 (SV_VL1), ++ svptrue_pat_b16 (SV_VL2), ++ svptrue_pat_b32 (SV_VL3), ++ svptrue_pat_b64 (SV_VL4)); ++} ++ ++/* { dg-final { scan-assembler {\tld4d\t{z0\.d - z3\.d}, p[0-7]/z, \[x0, #-8, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld3d\t{z4\.d - z6\.d}, p[0-7]/z, \[x0, #-3, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld1d\tz7\.d, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tmov\tx1, sp\n} } } */ ++/* { dg-final { scan-assembler {\tld2d\t{(z[0-9]+)\.d - z[0-9]+\.d}.*\tstr\t\1, \[x1\]\n} } } */ ++/* { dg-final { scan-assembler {\tld2d\t{z[0-9]+\.d - (z[0-9]+)\.d}.*\tstr\t\1, \[x1, #1, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp3\.d, vl4\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_s8.c +new file mode 100644 +index 000000000..5c16c3c8f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_s8.c +@@ -0,0 +1,58 @@ ++/* { dg-do compile { target lp64 } } */ ++/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** callee: ++** ( ++** ldr (z[0-9]+), \[x1, #1, mul vl\] ++** ldr (z[0-9]+), \[x1\] ++** st2b {\2\.b - \1\.b}, p0, \[x0\] ++** | ++** ldr (z[0-9]+), \[x1\] ++** ldr (z[0-9]+), \[x1, #1, mul vl\] ++** st2b {\3\.b - \4\.b}, p0, \[x0\] ++** ) ++** st4b {z0\.b - z3\.b}, p1, \[x0\] ++** st3b {z4\.b - z6\.b}, p2, \[x0\] ++** st1b z7\.b, p3, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee (void *x0, svint8x4_t z0, svint8x3_t z4, svint8x2_t stack, ++ svint8_t z7, svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) ++{ ++ svst2 (p0, x0, stack); ++ svst4 (p1, x0, z0); ++ svst3 (p2, x0, z4); ++ svst1_s8 (p3, x0, z7); ++} ++ ++void __attribute__((noipa)) ++caller (void *x0) ++{ ++ svbool_t pg; ++ pg = svptrue_b8 (); ++ callee (x0, ++ svld4_vnum_s8 (pg, x0, -8), ++ svld3_vnum_s8 (pg, x0, -3), ++ svld2_vnum_s8 (pg, x0, 0), ++ svld1_vnum_s8 (pg, x0, 2), ++ svptrue_pat_b8 (SV_VL1), ++ svptrue_pat_b16 (SV_VL2), ++ svptrue_pat_b32 (SV_VL3), ++ svptrue_pat_b64 (SV_VL4)); ++} ++ ++/* { dg-final { scan-assembler {\tld4b\t{z0\.b - z3\.b}, p[0-7]/z, \[x0, #-8, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld3b\t{z4\.b - z6\.b}, p[0-7]/z, \[x0, #-3, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld1b\tz7\.b, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tmov\tx1, sp\n} } } */ ++/* { dg-final { scan-assembler {\tld2b\t{(z[0-9]+)\.b - z[0-9]+\.b}.*\tstr\t\1, \[x1\]\n} } } */ ++/* { dg-final { scan-assembler {\tld2b\t{z[0-9]+\.b - (z[0-9]+)\.b}.*\tstr\t\1, \[x1, #1, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp3\.d, vl4\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_u16.c +new file mode 100644 +index 000000000..2b9a90025 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_u16.c +@@ -0,0 +1,58 @@ ++/* { dg-do compile { target lp64 } } */ ++/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** callee: ++** ( ++** ldr (z[0-9]+), \[x1, #1, mul vl\] ++** ldr (z[0-9]+), \[x1\] ++** st2h {\2\.h - \1\.h}, p0, \[x0\] ++** | ++** ldr (z[0-9]+), \[x1\] ++** ldr (z[0-9]+), \[x1, #1, mul vl\] ++** st2h {\3\.h - \4\.h}, p0, \[x0\] ++** ) ++** st4h {z0\.h - z3\.h}, p1, \[x0\] ++** st3h {z4\.h - z6\.h}, p2, \[x0\] ++** st1h z7\.h, p3, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee (void *x0, svuint16x4_t z0, svuint16x3_t z4, svuint16x2_t stack, ++ svuint16_t z7, svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) ++{ ++ svst2 (p0, x0, stack); ++ svst4 (p1, x0, z0); ++ svst3 (p2, x0, z4); ++ svst1_u16 (p3, x0, z7); ++} ++ ++void __attribute__((noipa)) ++caller (void *x0) ++{ ++ svbool_t pg; ++ pg = svptrue_b8 (); ++ callee (x0, ++ svld4_vnum_u16 (pg, x0, -8), ++ svld3_vnum_u16 (pg, x0, -3), ++ svld2_vnum_u16 (pg, x0, 0), ++ svld1_vnum_u16 (pg, x0, 2), ++ svptrue_pat_b8 (SV_VL1), ++ svptrue_pat_b16 (SV_VL2), ++ svptrue_pat_b32 (SV_VL3), ++ svptrue_pat_b64 (SV_VL4)); ++} ++ ++/* { dg-final { scan-assembler {\tld4h\t{z0\.h - z3\.h}, p[0-7]/z, \[x0, #-8, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld3h\t{z4\.h - z6\.h}, p[0-7]/z, \[x0, #-3, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld1h\tz7\.h, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tmov\tx1, sp\n} } } */ ++/* { dg-final { scan-assembler {\tld2h\t{(z[0-9]+)\.h - z[0-9]+\.h}.*\tstr\t\1, \[x1\]\n} } } */ ++/* { dg-final { scan-assembler {\tld2h\t{z[0-9]+\.h - (z[0-9]+)\.h}.*\tstr\t\1, \[x1, #1, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp3\.d, vl4\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_u32.c +new file mode 100644 +index 000000000..2902f59b4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_u32.c +@@ -0,0 +1,58 @@ ++/* { dg-do compile { target lp64 } } */ ++/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** callee: ++** ( ++** ldr (z[0-9]+), \[x1, #1, mul vl\] ++** ldr (z[0-9]+), \[x1\] ++** st2w {\2\.s - \1\.s}, p0, \[x0\] ++** | ++** ldr (z[0-9]+), \[x1\] ++** ldr (z[0-9]+), \[x1, #1, mul vl\] ++** st2w {\3\.s - \4\.s}, p0, \[x0\] ++** ) ++** st4w {z0\.s - z3\.s}, p1, \[x0\] ++** st3w {z4\.s - z6\.s}, p2, \[x0\] ++** st1w z7\.s, p3, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee (void *x0, svuint32x4_t z0, svuint32x3_t z4, svuint32x2_t stack, ++ svuint32_t z7, svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) ++{ ++ svst2 (p0, x0, stack); ++ svst4 (p1, x0, z0); ++ svst3 (p2, x0, z4); ++ svst1_u32 (p3, x0, z7); ++} ++ ++void __attribute__((noipa)) ++caller (void *x0) ++{ ++ svbool_t pg; ++ pg = svptrue_b8 (); ++ callee (x0, ++ svld4_vnum_u32 (pg, x0, -8), ++ svld3_vnum_u32 (pg, x0, -3), ++ svld2_vnum_u32 (pg, x0, 0), ++ svld1_vnum_u32 (pg, x0, 2), ++ svptrue_pat_b8 (SV_VL1), ++ svptrue_pat_b16 (SV_VL2), ++ svptrue_pat_b32 (SV_VL3), ++ svptrue_pat_b64 (SV_VL4)); ++} ++ ++/* { dg-final { scan-assembler {\tld4w\t{z0\.s - z3\.s}, p[0-7]/z, \[x0, #-8, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld3w\t{z4\.s - z6\.s}, p[0-7]/z, \[x0, #-3, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld1w\tz7\.s, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tmov\tx1, sp\n} } } */ ++/* { dg-final { scan-assembler {\tld2w\t{(z[0-9]+)\.s - z[0-9]+\.s}.*\tstr\t\1, \[x1\]\n} } } */ ++/* { dg-final { scan-assembler {\tld2w\t{z[0-9]+\.s - (z[0-9]+)\.s}.*\tstr\t\1, \[x1, #1, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp3\.d, vl4\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_u64.c +new file mode 100644 +index 000000000..85b3cfdad +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_u64.c +@@ -0,0 +1,58 @@ ++/* { dg-do compile { target lp64 } } */ ++/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** callee: ++** ( ++** ldr (z[0-9]+), \[x1, #1, mul vl\] ++** ldr (z[0-9]+), \[x1\] ++** st2d {\2\.d - \1\.d}, p0, \[x0\] ++** | ++** ldr (z[0-9]+), \[x1\] ++** ldr (z[0-9]+), \[x1, #1, mul vl\] ++** st2d {\3\.d - \4\.d}, p0, \[x0\] ++** ) ++** st4d {z0\.d - z3\.d}, p1, \[x0\] ++** st3d {z4\.d - z6\.d}, p2, \[x0\] ++** st1d z7\.d, p3, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee (void *x0, svuint64x4_t z0, svuint64x3_t z4, svuint64x2_t stack, ++ svuint64_t z7, svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) ++{ ++ svst2 (p0, x0, stack); ++ svst4 (p1, x0, z0); ++ svst3 (p2, x0, z4); ++ svst1_u64 (p3, x0, z7); ++} ++ ++void __attribute__((noipa)) ++caller (void *x0) ++{ ++ svbool_t pg; ++ pg = svptrue_b8 (); ++ callee (x0, ++ svld4_vnum_u64 (pg, x0, -8), ++ svld3_vnum_u64 (pg, x0, -3), ++ svld2_vnum_u64 (pg, x0, 0), ++ svld1_vnum_u64 (pg, x0, 2), ++ svptrue_pat_b8 (SV_VL1), ++ svptrue_pat_b16 (SV_VL2), ++ svptrue_pat_b32 (SV_VL3), ++ svptrue_pat_b64 (SV_VL4)); ++} ++ ++/* { dg-final { scan-assembler {\tld4d\t{z0\.d - z3\.d}, p[0-7]/z, \[x0, #-8, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld3d\t{z4\.d - z6\.d}, p[0-7]/z, \[x0, #-3, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld1d\tz7\.d, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tmov\tx1, sp\n} } } */ ++/* { dg-final { scan-assembler {\tld2d\t{(z[0-9]+)\.d - z[0-9]+\.d}.*\tstr\t\1, \[x1\]\n} } } */ ++/* { dg-final { scan-assembler {\tld2d\t{z[0-9]+\.d - (z[0-9]+)\.d}.*\tstr\t\1, \[x1, #1, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp3\.d, vl4\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_u8.c +new file mode 100644 +index 000000000..f56acb693 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_u8.c +@@ -0,0 +1,58 @@ ++/* { dg-do compile { target lp64 } } */ ++/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** callee: ++** ( ++** ldr (z[0-9]+), \[x1, #1, mul vl\] ++** ldr (z[0-9]+), \[x1\] ++** st2b {\2\.b - \1\.b}, p0, \[x0\] ++** | ++** ldr (z[0-9]+), \[x1\] ++** ldr (z[0-9]+), \[x1, #1, mul vl\] ++** st2b {\3\.b - \4\.b}, p0, \[x0\] ++** ) ++** st4b {z0\.b - z3\.b}, p1, \[x0\] ++** st3b {z4\.b - z6\.b}, p2, \[x0\] ++** st1b z7\.b, p3, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee (void *x0, svuint8x4_t z0, svuint8x3_t z4, svuint8x2_t stack, ++ svuint8_t z7, svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) ++{ ++ svst2 (p0, x0, stack); ++ svst4 (p1, x0, z0); ++ svst3 (p2, x0, z4); ++ svst1_u8 (p3, x0, z7); ++} ++ ++void __attribute__((noipa)) ++caller (void *x0) ++{ ++ svbool_t pg; ++ pg = svptrue_b8 (); ++ callee (x0, ++ svld4_vnum_u8 (pg, x0, -8), ++ svld3_vnum_u8 (pg, x0, -3), ++ svld2_vnum_u8 (pg, x0, 0), ++ svld1_vnum_u8 (pg, x0, 2), ++ svptrue_pat_b8 (SV_VL1), ++ svptrue_pat_b16 (SV_VL2), ++ svptrue_pat_b32 (SV_VL3), ++ svptrue_pat_b64 (SV_VL4)); ++} ++ ++/* { dg-final { scan-assembler {\tld4b\t{z0\.b - z3\.b}, p[0-7]/z, \[x0, #-8, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld3b\t{z4\.b - z6\.b}, p[0-7]/z, \[x0, #-3, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld1b\tz7\.b, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tmov\tx1, sp\n} } } */ ++/* { dg-final { scan-assembler {\tld2b\t{(z[0-9]+)\.b - z[0-9]+\.b}.*\tstr\t\1, \[x1\]\n} } } */ ++/* { dg-final { scan-assembler {\tld2b\t{z[0-9]+\.b - (z[0-9]+)\.b}.*\tstr\t\1, \[x1, #1, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp3\.d, vl4\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_bf16.c +new file mode 100644 +index 000000000..84d2c406c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_bf16.c +@@ -0,0 +1,71 @@ ++/* { dg-do compile { target lp64 } } */ ++/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** callee1: ++** ptrue p3\.b, all ++** ... ++** ld1h (z[0-9]+\.h), p3/z, \[x1, #3, mul vl\] ++** ... ++** st4h {z[0-9]+\.h - \1}, p0, \[x0\] ++** st2h {z3\.h - z4\.h}, p1, \[x0\] ++** st3h {z5\.h - z7\.h}, p2, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee1 (void *x0, svbfloat16x3_t z0, svbfloat16x2_t z3, svbfloat16x3_t z5, ++ svbfloat16x4_t stack1, svbfloat16_t stack2, svbool_t p0, ++ svbool_t p1, svbool_t p2) ++{ ++ svst4_bf16 (p0, x0, stack1); ++ svst2_bf16 (p1, x0, z3); ++ svst3_bf16 (p2, x0, z5); ++} ++ ++/* ++** callee2: ++** ptrue p3\.b, all ++** ld1h (z[0-9]+\.h), p3/z, \[x2\] ++** st1h \1, p0, \[x0\] ++** st2h {z3\.h - z4\.h}, p1, \[x0\] ++** st3h {z0\.h - z2\.h}, p2, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee2 (void *x0, svbfloat16x3_t z0, svbfloat16x2_t z3, svbfloat16x3_t z5, ++ svbfloat16x4_t stack1, svbfloat16_t stack2, svbool_t p0, ++ svbool_t p1, svbool_t p2) ++{ ++ svst1_bf16 (p0, x0, stack2); ++ svst2_bf16 (p1, x0, z3); ++ svst3_bf16 (p2, x0, z0); ++} ++ ++void __attribute__((noipa)) ++caller (void *x0) ++{ ++ svbool_t pg; ++ pg = svptrue_b8 (); ++ callee1 (x0, ++ svld3_vnum_bf16 (pg, x0, -9), ++ svld2_vnum_bf16 (pg, x0, -2), ++ svld3_vnum_bf16 (pg, x0, 0), ++ svld4_vnum_bf16 (pg, x0, 8), ++ svld1_vnum_bf16 (pg, x0, 5), ++ svptrue_pat_b8 (SV_VL1), ++ svptrue_pat_b16 (SV_VL2), ++ svptrue_pat_b32 (SV_VL3)); ++} ++ ++/* { dg-final { scan-assembler {\tld3h\t{z0\.h - z2\.h}, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld2h\t{z3\.h - z4\.h}, p[0-7]/z, \[x0, #-2, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld3h\t{z5\.h - z7\.h}, p[0-7]/z, \[x0\]\n} } } */ ++/* { dg-final { scan-assembler {\tld4h\t{(z[0-9]+\.h) - z[0-9]+\.h}.*\tst1h\t\1, p[0-7], \[x1\]\n} } } */ ++/* { dg-final { scan-assembler {\tld4h\t{z[0-9]+\.h - (z[0-9]+\.h)}.*\tst1h\t\1, p[0-7], \[x1, #3, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+\.h), p[0-7]/z, \[x0, #5, mul vl\]\n.*\tst1h\t\1, p[0-7], \[x2\]\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_f16.c +new file mode 100644 +index 000000000..dd4ccc3b2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_f16.c +@@ -0,0 +1,71 @@ ++/* { dg-do compile { target lp64 } } */ ++/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** callee1: ++** ptrue p3\.b, all ++** ... ++** ld1h (z[0-9]+\.h), p3/z, \[x1, #3, mul vl\] ++** ... ++** st4h {z[0-9]+\.h - \1}, p0, \[x0\] ++** st2h {z3\.h - z4\.h}, p1, \[x0\] ++** st3h {z5\.h - z7\.h}, p2, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee1 (void *x0, svfloat16x3_t z0, svfloat16x2_t z3, svfloat16x3_t z5, ++ svfloat16x4_t stack1, svfloat16_t stack2, svbool_t p0, ++ svbool_t p1, svbool_t p2) ++{ ++ svst4_f16 (p0, x0, stack1); ++ svst2_f16 (p1, x0, z3); ++ svst3_f16 (p2, x0, z5); ++} ++ ++/* ++** callee2: ++** ptrue p3\.b, all ++** ld1h (z[0-9]+\.h), p3/z, \[x2\] ++** st1h \1, p0, \[x0\] ++** st2h {z3\.h - z4\.h}, p1, \[x0\] ++** st3h {z0\.h - z2\.h}, p2, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee2 (void *x0, svfloat16x3_t z0, svfloat16x2_t z3, svfloat16x3_t z5, ++ svfloat16x4_t stack1, svfloat16_t stack2, svbool_t p0, ++ svbool_t p1, svbool_t p2) ++{ ++ svst1_f16 (p0, x0, stack2); ++ svst2_f16 (p1, x0, z3); ++ svst3_f16 (p2, x0, z0); ++} ++ ++void __attribute__((noipa)) ++caller (void *x0) ++{ ++ svbool_t pg; ++ pg = svptrue_b8 (); ++ callee1 (x0, ++ svld3_vnum_f16 (pg, x0, -9), ++ svld2_vnum_f16 (pg, x0, -2), ++ svld3_vnum_f16 (pg, x0, 0), ++ svld4_vnum_f16 (pg, x0, 8), ++ svld1_vnum_f16 (pg, x0, 5), ++ svptrue_pat_b8 (SV_VL1), ++ svptrue_pat_b16 (SV_VL2), ++ svptrue_pat_b32 (SV_VL3)); ++} ++ ++/* { dg-final { scan-assembler {\tld3h\t{z0\.h - z2\.h}, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld2h\t{z3\.h - z4\.h}, p[0-7]/z, \[x0, #-2, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld3h\t{z5\.h - z7\.h}, p[0-7]/z, \[x0\]\n} } } */ ++/* { dg-final { scan-assembler {\tld4h\t{(z[0-9]+\.h) - z[0-9]+\.h}.*\tst1h\t\1, p[0-7], \[x1\]\n} } } */ ++/* { dg-final { scan-assembler {\tld4h\t{z[0-9]+\.h - (z[0-9]+\.h)}.*\tst1h\t\1, p[0-7], \[x1, #3, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+\.h), p[0-7]/z, \[x0, #5, mul vl\]\n.*\tst1h\t\1, p[0-7], \[x2\]\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_f32.c +new file mode 100644 +index 000000000..26ea2a308 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_f32.c +@@ -0,0 +1,71 @@ ++/* { dg-do compile { target lp64 } } */ ++/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** callee1: ++** ptrue p3\.b, all ++** ... ++** ld1w (z[0-9]+\.s), p3/z, \[x1, #3, mul vl\] ++** ... ++** st4w {z[0-9]+\.s - \1}, p0, \[x0\] ++** st2w {z3\.s - z4\.s}, p1, \[x0\] ++** st3w {z5\.s - z7\.s}, p2, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee1 (void *x0, svfloat32x3_t z0, svfloat32x2_t z3, svfloat32x3_t z5, ++ svfloat32x4_t stack1, svfloat32_t stack2, svbool_t p0, ++ svbool_t p1, svbool_t p2) ++{ ++ svst4_f32 (p0, x0, stack1); ++ svst2_f32 (p1, x0, z3); ++ svst3_f32 (p2, x0, z5); ++} ++ ++/* ++** callee2: ++** ptrue p3\.b, all ++** ld1w (z[0-9]+\.s), p3/z, \[x2\] ++** st1w \1, p0, \[x0\] ++** st2w {z3\.s - z4\.s}, p1, \[x0\] ++** st3w {z0\.s - z2\.s}, p2, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee2 (void *x0, svfloat32x3_t z0, svfloat32x2_t z3, svfloat32x3_t z5, ++ svfloat32x4_t stack1, svfloat32_t stack2, svbool_t p0, ++ svbool_t p1, svbool_t p2) ++{ ++ svst1_f32 (p0, x0, stack2); ++ svst2_f32 (p1, x0, z3); ++ svst3_f32 (p2, x0, z0); ++} ++ ++void __attribute__((noipa)) ++caller (void *x0) ++{ ++ svbool_t pg; ++ pg = svptrue_b8 (); ++ callee1 (x0, ++ svld3_vnum_f32 (pg, x0, -9), ++ svld2_vnum_f32 (pg, x0, -2), ++ svld3_vnum_f32 (pg, x0, 0), ++ svld4_vnum_f32 (pg, x0, 8), ++ svld1_vnum_f32 (pg, x0, 5), ++ svptrue_pat_b8 (SV_VL1), ++ svptrue_pat_b16 (SV_VL2), ++ svptrue_pat_b32 (SV_VL3)); ++} ++ ++/* { dg-final { scan-assembler {\tld3w\t{z0\.s - z2\.s}, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld2w\t{z3\.s - z4\.s}, p[0-7]/z, \[x0, #-2, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld3w\t{z5\.s - z7\.s}, p[0-7]/z, \[x0\]\n} } } */ ++/* { dg-final { scan-assembler {\tld4w\t{(z[0-9]+\.s) - z[0-9]+\.s}.*\tst1w\t\1, p[0-7], \[x1\]\n} } } */ ++/* { dg-final { scan-assembler {\tld4w\t{z[0-9]+\.s - (z[0-9]+\.s)}.*\tst1w\t\1, p[0-7], \[x1, #3, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld1w\t(z[0-9]+\.s), p[0-7]/z, \[x0, #5, mul vl\]\n.*\tst1w\t\1, p[0-7], \[x2\]\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_f64.c +new file mode 100644 +index 000000000..62aded51c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_f64.c +@@ -0,0 +1,71 @@ ++/* { dg-do compile { target lp64 } } */ ++/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** callee1: ++** ptrue p3\.b, all ++** ... ++** ld1d (z[0-9]+\.d), p3/z, \[x1, #3, mul vl\] ++** ... ++** st4d {z[0-9]+\.d - \1}, p0, \[x0\] ++** st2d {z3\.d - z4\.d}, p1, \[x0\] ++** st3d {z5\.d - z7\.d}, p2, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee1 (void *x0, svfloat64x3_t z0, svfloat64x2_t z3, svfloat64x3_t z5, ++ svfloat64x4_t stack1, svfloat64_t stack2, svbool_t p0, ++ svbool_t p1, svbool_t p2) ++{ ++ svst4_f64 (p0, x0, stack1); ++ svst2_f64 (p1, x0, z3); ++ svst3_f64 (p2, x0, z5); ++} ++ ++/* ++** callee2: ++** ptrue p3\.b, all ++** ld1d (z[0-9]+\.d), p3/z, \[x2\] ++** st1d \1, p0, \[x0\] ++** st2d {z3\.d - z4\.d}, p1, \[x0\] ++** st3d {z0\.d - z2\.d}, p2, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee2 (void *x0, svfloat64x3_t z0, svfloat64x2_t z3, svfloat64x3_t z5, ++ svfloat64x4_t stack1, svfloat64_t stack2, svbool_t p0, ++ svbool_t p1, svbool_t p2) ++{ ++ svst1_f64 (p0, x0, stack2); ++ svst2_f64 (p1, x0, z3); ++ svst3_f64 (p2, x0, z0); ++} ++ ++void __attribute__((noipa)) ++caller (void *x0) ++{ ++ svbool_t pg; ++ pg = svptrue_b8 (); ++ callee1 (x0, ++ svld3_vnum_f64 (pg, x0, -9), ++ svld2_vnum_f64 (pg, x0, -2), ++ svld3_vnum_f64 (pg, x0, 0), ++ svld4_vnum_f64 (pg, x0, 8), ++ svld1_vnum_f64 (pg, x0, 5), ++ svptrue_pat_b8 (SV_VL1), ++ svptrue_pat_b16 (SV_VL2), ++ svptrue_pat_b32 (SV_VL3)); ++} ++ ++/* { dg-final { scan-assembler {\tld3d\t{z0\.d - z2\.d}, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld2d\t{z3\.d - z4\.d}, p[0-7]/z, \[x0, #-2, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld3d\t{z5\.d - z7\.d}, p[0-7]/z, \[x0\]\n} } } */ ++/* { dg-final { scan-assembler {\tld4d\t{(z[0-9]+\.d) - z[0-9]+\.d}.*\tst1d\t\1, p[0-7], \[x1\]\n} } } */ ++/* { dg-final { scan-assembler {\tld4d\t{z[0-9]+\.d - (z[0-9]+\.d)}.*\tst1d\t\1, p[0-7], \[x1, #3, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld1d\t(z[0-9]+\.d), p[0-7]/z, \[x0, #5, mul vl\]\n.*\tst1d\t\1, p[0-7], \[x2\]\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_s16.c +new file mode 100644 +index 000000000..204ef9a92 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_s16.c +@@ -0,0 +1,71 @@ ++/* { dg-do compile { target lp64 } } */ ++/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** callee1: ++** ptrue p3\.b, all ++** ... ++** ld1h (z[0-9]+\.h), p3/z, \[x1, #3, mul vl\] ++** ... ++** st4h {z[0-9]+\.h - \1}, p0, \[x0\] ++** st2h {z3\.h - z4\.h}, p1, \[x0\] ++** st3h {z5\.h - z7\.h}, p2, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee1 (void *x0, svint16x3_t z0, svint16x2_t z3, svint16x3_t z5, ++ svint16x4_t stack1, svint16_t stack2, svbool_t p0, ++ svbool_t p1, svbool_t p2) ++{ ++ svst4_s16 (p0, x0, stack1); ++ svst2_s16 (p1, x0, z3); ++ svst3_s16 (p2, x0, z5); ++} ++ ++/* ++** callee2: ++** ptrue p3\.b, all ++** ld1h (z[0-9]+\.h), p3/z, \[x2\] ++** st1h \1, p0, \[x0\] ++** st2h {z3\.h - z4\.h}, p1, \[x0\] ++** st3h {z0\.h - z2\.h}, p2, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee2 (void *x0, svint16x3_t z0, svint16x2_t z3, svint16x3_t z5, ++ svint16x4_t stack1, svint16_t stack2, svbool_t p0, ++ svbool_t p1, svbool_t p2) ++{ ++ svst1_s16 (p0, x0, stack2); ++ svst2_s16 (p1, x0, z3); ++ svst3_s16 (p2, x0, z0); ++} ++ ++void __attribute__((noipa)) ++caller (void *x0) ++{ ++ svbool_t pg; ++ pg = svptrue_b8 (); ++ callee1 (x0, ++ svld3_vnum_s16 (pg, x0, -9), ++ svld2_vnum_s16 (pg, x0, -2), ++ svld3_vnum_s16 (pg, x0, 0), ++ svld4_vnum_s16 (pg, x0, 8), ++ svld1_vnum_s16 (pg, x0, 5), ++ svptrue_pat_b8 (SV_VL1), ++ svptrue_pat_b16 (SV_VL2), ++ svptrue_pat_b32 (SV_VL3)); ++} ++ ++/* { dg-final { scan-assembler {\tld3h\t{z0\.h - z2\.h}, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld2h\t{z3\.h - z4\.h}, p[0-7]/z, \[x0, #-2, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld3h\t{z5\.h - z7\.h}, p[0-7]/z, \[x0\]\n} } } */ ++/* { dg-final { scan-assembler {\tld4h\t{(z[0-9]+\.h) - z[0-9]+\.h}.*\tst1h\t\1, p[0-7], \[x1\]\n} } } */ ++/* { dg-final { scan-assembler {\tld4h\t{z[0-9]+\.h - (z[0-9]+\.h)}.*\tst1h\t\1, p[0-7], \[x1, #3, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+\.h), p[0-7]/z, \[x0, #5, mul vl\]\n.*\tst1h\t\1, p[0-7], \[x2\]\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_s32.c +new file mode 100644 +index 000000000..9ae4567a4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_s32.c +@@ -0,0 +1,71 @@ ++/* { dg-do compile { target lp64 } } */ ++/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** callee1: ++** ptrue p3\.b, all ++** ... ++** ld1w (z[0-9]+\.s), p3/z, \[x1, #3, mul vl\] ++** ... ++** st4w {z[0-9]+\.s - \1\}, p0, \[x0\] ++** st2w {z3\.s - z4\.s}, p1, \[x0\] ++** st3w {z5\.s - z7\.s}, p2, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee1 (void *x0, svint32x3_t z0, svint32x2_t z3, svint32x3_t z5, ++ svint32x4_t stack1, svint32_t stack2, svbool_t p0, ++ svbool_t p1, svbool_t p2) ++{ ++ svst4_s32 (p0, x0, stack1); ++ svst2_s32 (p1, x0, z3); ++ svst3_s32 (p2, x0, z5); ++} ++ ++/* ++** callee2: ++** ptrue p3\.b, all ++** ld1w (z[0-9]+\.s), p3/z, \[x2\] ++** st1w \1, p0, \[x0\] ++** st2w {z3\.s - z4\.s}, p1, \[x0\] ++** st3w {z0\.s - z2\.s}, p2, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee2 (void *x0, svint32x3_t z0, svint32x2_t z3, svint32x3_t z5, ++ svint32x4_t stack1, svint32_t stack2, svbool_t p0, ++ svbool_t p1, svbool_t p2) ++{ ++ svst1_s32 (p0, x0, stack2); ++ svst2_s32 (p1, x0, z3); ++ svst3_s32 (p2, x0, z0); ++} ++ ++void __attribute__((noipa)) ++caller (void *x0) ++{ ++ svbool_t pg; ++ pg = svptrue_b8 (); ++ callee1 (x0, ++ svld3_vnum_s32 (pg, x0, -9), ++ svld2_vnum_s32 (pg, x0, -2), ++ svld3_vnum_s32 (pg, x0, 0), ++ svld4_vnum_s32 (pg, x0, 8), ++ svld1_vnum_s32 (pg, x0, 5), ++ svptrue_pat_b8 (SV_VL1), ++ svptrue_pat_b16 (SV_VL2), ++ svptrue_pat_b32 (SV_VL3)); ++} ++ ++/* { dg-final { scan-assembler {\tld3w\t{z0\.s - z2\.s}, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld2w\t{z3\.s - z4\.s}, p[0-7]/z, \[x0, #-2, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld3w\t{z5\.s - z7\.s}, p[0-7]/z, \[x0\]\n} } } */ ++/* { dg-final { scan-assembler {\tld4w\t{(z[0-9]+\.s) - z[0-9]+\.s}.*\tst1w\t\1, p[0-7], \[x1\]\n} } } */ ++/* { dg-final { scan-assembler {\tld4w\t{z[0-9]+\.s - (z[0-9]+\.s)}.*\tst1w\t\1, p[0-7], \[x1, #3, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld1w\t(z[0-9]+\.s), p[0-7]/z, \[x0, #5, mul vl\]\n.*\tst1w\t\1, p[0-7], \[x2\]\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_s64.c +new file mode 100644 +index 000000000..0b8a2e213 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_s64.c +@@ -0,0 +1,71 @@ ++/* { dg-do compile { target lp64 } } */ ++/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** callee1: ++** ptrue p3\.b, all ++** ... ++** ld1d (z[0-9]+\.d), p3/z, \[x1, #3, mul vl\] ++** ... ++** st4d {z[0-9]+\.d - \1}, p0, \[x0\] ++** st2d {z3\.d - z4\.d}, p1, \[x0\] ++** st3d {z5\.d - z7\.d}, p2, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee1 (void *x0, svint64x3_t z0, svint64x2_t z3, svint64x3_t z5, ++ svint64x4_t stack1, svint64_t stack2, svbool_t p0, ++ svbool_t p1, svbool_t p2) ++{ ++ svst4_s64 (p0, x0, stack1); ++ svst2_s64 (p1, x0, z3); ++ svst3_s64 (p2, x0, z5); ++} ++ ++/* ++** callee2: ++** ptrue p3\.b, all ++** ld1d (z[0-9]+\.d), p3/z, \[x2\] ++** st1d \1, p0, \[x0\] ++** st2d {z3\.d - z4\.d}, p1, \[x0\] ++** st3d {z0\.d - z2\.d}, p2, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee2 (void *x0, svint64x3_t z0, svint64x2_t z3, svint64x3_t z5, ++ svint64x4_t stack1, svint64_t stack2, svbool_t p0, ++ svbool_t p1, svbool_t p2) ++{ ++ svst1_s64 (p0, x0, stack2); ++ svst2_s64 (p1, x0, z3); ++ svst3_s64 (p2, x0, z0); ++} ++ ++void __attribute__((noipa)) ++caller (void *x0) ++{ ++ svbool_t pg; ++ pg = svptrue_b8 (); ++ callee1 (x0, ++ svld3_vnum_s64 (pg, x0, -9), ++ svld2_vnum_s64 (pg, x0, -2), ++ svld3_vnum_s64 (pg, x0, 0), ++ svld4_vnum_s64 (pg, x0, 8), ++ svld1_vnum_s64 (pg, x0, 5), ++ svptrue_pat_b8 (SV_VL1), ++ svptrue_pat_b16 (SV_VL2), ++ svptrue_pat_b32 (SV_VL3)); ++} ++ ++/* { dg-final { scan-assembler {\tld3d\t{z0\.d - z2\.d}, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld2d\t{z3\.d - z4\.d}, p[0-7]/z, \[x0, #-2, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld3d\t{z5\.d - z7\.d}, p[0-7]/z, \[x0\]\n} } } */ ++/* { dg-final { scan-assembler {\tld4d\t{(z[0-9]+\.d) - z[0-9]+\.d}.*\tst1d\t\1, p[0-7], \[x1\]\n} } } */ ++/* { dg-final { scan-assembler {\tld4d\t{z[0-9]+\.d - (z[0-9]+\.d)}.*\tst1d\t\1, p[0-7], \[x1, #3, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld1d\t(z[0-9]+\.d), p[0-7]/z, \[x0, #5, mul vl\]\n.*\tst1d\t\1, p[0-7], \[x2\]\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_s8.c +new file mode 100644 +index 000000000..0afbe71aa +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_s8.c +@@ -0,0 +1,71 @@ ++/* { dg-do compile { target lp64 } } */ ++/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** callee1: ++** ptrue p3\.b, all ++** ... ++** ld1b (z[0-9]+\.b), p3/z, \[x1, #3, mul vl\] ++** ... ++** st4b {z[0-9]+\.b - \1}, p0, \[x0\] ++** st2b {z3\.b - z4\.b}, p1, \[x0\] ++** st3b {z5\.b - z7\.b}, p2, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee1 (void *x0, svint8x3_t z0, svint8x2_t z3, svint8x3_t z5, ++ svint8x4_t stack1, svint8_t stack2, svbool_t p0, ++ svbool_t p1, svbool_t p2) ++{ ++ svst4_s8 (p0, x0, stack1); ++ svst2_s8 (p1, x0, z3); ++ svst3_s8 (p2, x0, z5); ++} ++ ++/* ++** callee2: ++** ptrue p3\.b, all ++** ld1b (z[0-9]+\.b), p3/z, \[x2\] ++** st1b \1, p0, \[x0\] ++** st2b {z3\.b - z4\.b}, p1, \[x0\] ++** st3b {z0\.b - z2\.b}, p2, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee2 (void *x0, svint8x3_t z0, svint8x2_t z3, svint8x3_t z5, ++ svint8x4_t stack1, svint8_t stack2, svbool_t p0, ++ svbool_t p1, svbool_t p2) ++{ ++ svst1_s8 (p0, x0, stack2); ++ svst2_s8 (p1, x0, z3); ++ svst3_s8 (p2, x0, z0); ++} ++ ++void __attribute__((noipa)) ++caller (void *x0) ++{ ++ svbool_t pg; ++ pg = svptrue_b8 (); ++ callee1 (x0, ++ svld3_vnum_s8 (pg, x0, -9), ++ svld2_vnum_s8 (pg, x0, -2), ++ svld3_vnum_s8 (pg, x0, 0), ++ svld4_vnum_s8 (pg, x0, 8), ++ svld1_vnum_s8 (pg, x0, 5), ++ svptrue_pat_b8 (SV_VL1), ++ svptrue_pat_b16 (SV_VL2), ++ svptrue_pat_b32 (SV_VL3)); ++} ++ ++/* { dg-final { scan-assembler {\tld3b\t{z0\.b - z2\.b}, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld2b\t{z3\.b - z4\.b}, p[0-7]/z, \[x0, #-2, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld3b\t{z5\.b - z7\.b}, p[0-7]/z, \[x0\]\n} } } */ ++/* { dg-final { scan-assembler {\tld4b\t{(z[0-9]+\.b) - z[0-9]+\.b}.*\tst1b\t\1, p[0-7], \[x1\]\n} } } */ ++/* { dg-final { scan-assembler {\tld4b\t{z[0-9]+\.b - (z[0-9]+\.b)}.*\tst1b\t\1, p[0-7], \[x1, #3, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+\.b), p[0-7]/z, \[x0, #5, mul vl\]\n.*\tst1b\t\1, p[0-7], \[x2\]\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_u16.c +new file mode 100644 +index 000000000..f010f5ebb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_u16.c +@@ -0,0 +1,71 @@ ++/* { dg-do compile { target lp64 } } */ ++/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** callee1: ++** ptrue p3\.b, all ++** ... ++** ld1h (z[0-9]+\.h), p3/z, \[x1, #3, mul vl\] ++** ... ++** st4h {z[0-9]+\.h - \1}, p0, \[x0\] ++** st2h {z3\.h - z4\.h}, p1, \[x0\] ++** st3h {z5\.h - z7\.h}, p2, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee1 (void *x0, svuint16x3_t z0, svuint16x2_t z3, svuint16x3_t z5, ++ svuint16x4_t stack1, svuint16_t stack2, svbool_t p0, ++ svbool_t p1, svbool_t p2) ++{ ++ svst4_u16 (p0, x0, stack1); ++ svst2_u16 (p1, x0, z3); ++ svst3_u16 (p2, x0, z5); ++} ++ ++/* ++** callee2: ++** ptrue p3\.b, all ++** ld1h (z[0-9]+\.h), p3/z, \[x2\] ++** st1h \1, p0, \[x0\] ++** st2h {z3\.h - z4\.h}, p1, \[x0\] ++** st3h {z0\.h - z2\.h}, p2, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee2 (void *x0, svuint16x3_t z0, svuint16x2_t z3, svuint16x3_t z5, ++ svuint16x4_t stack1, svuint16_t stack2, svbool_t p0, ++ svbool_t p1, svbool_t p2) ++{ ++ svst1_u16 (p0, x0, stack2); ++ svst2_u16 (p1, x0, z3); ++ svst3_u16 (p2, x0, z0); ++} ++ ++void __attribute__((noipa)) ++caller (void *x0) ++{ ++ svbool_t pg; ++ pg = svptrue_b8 (); ++ callee1 (x0, ++ svld3_vnum_u16 (pg, x0, -9), ++ svld2_vnum_u16 (pg, x0, -2), ++ svld3_vnum_u16 (pg, x0, 0), ++ svld4_vnum_u16 (pg, x0, 8), ++ svld1_vnum_u16 (pg, x0, 5), ++ svptrue_pat_b8 (SV_VL1), ++ svptrue_pat_b16 (SV_VL2), ++ svptrue_pat_b32 (SV_VL3)); ++} ++ ++/* { dg-final { scan-assembler {\tld3h\t{z0\.h - z2\.h}, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld2h\t{z3\.h - z4\.h}, p[0-7]/z, \[x0, #-2, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld3h\t{z5\.h - z7\.h}, p[0-7]/z, \[x0\]\n} } } */ ++/* { dg-final { scan-assembler {\tld4h\t{(z[0-9]+\.h) - z[0-9]+\.h}.*\tst1h\t\1, p[0-7], \[x1\]\n} } } */ ++/* { dg-final { scan-assembler {\tld4h\t{z[0-9]+\.h - (z[0-9]+\.h)}.*\tst1h\t\1, p[0-7], \[x1, #3, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+\.h), p[0-7]/z, \[x0, #5, mul vl\]\n.*\tst1h\t\1, p[0-7], \[x2\]\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_u32.c +new file mode 100644 +index 000000000..60d903a31 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_u32.c +@@ -0,0 +1,71 @@ ++/* { dg-do compile { target lp64 } } */ ++/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** callee1: ++** ptrue p3\.b, all ++** ... ++** ld1w (z[0-9]+\.s), p3/z, \[x1, #3, mul vl\] ++** ... ++** st4w {z[0-9]+\.s - \1}, p0, \[x0\] ++** st2w {z3\.s - z4\.s}, p1, \[x0\] ++** st3w {z5\.s - z7\.s}, p2, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee1 (void *x0, svuint32x3_t z0, svuint32x2_t z3, svuint32x3_t z5, ++ svuint32x4_t stack1, svuint32_t stack2, svbool_t p0, ++ svbool_t p1, svbool_t p2) ++{ ++ svst4_u32 (p0, x0, stack1); ++ svst2_u32 (p1, x0, z3); ++ svst3_u32 (p2, x0, z5); ++} ++ ++/* ++** callee2: ++** ptrue p3\.b, all ++** ld1w (z[0-9]+\.s), p3/z, \[x2\] ++** st1w \1, p0, \[x0\] ++** st2w {z3\.s - z4\.s}, p1, \[x0\] ++** st3w {z0\.s - z2\.s}, p2, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee2 (void *x0, svuint32x3_t z0, svuint32x2_t z3, svuint32x3_t z5, ++ svuint32x4_t stack1, svuint32_t stack2, svbool_t p0, ++ svbool_t p1, svbool_t p2) ++{ ++ svst1_u32 (p0, x0, stack2); ++ svst2_u32 (p1, x0, z3); ++ svst3_u32 (p2, x0, z0); ++} ++ ++void __attribute__((noipa)) ++caller (void *x0) ++{ ++ svbool_t pg; ++ pg = svptrue_b8 (); ++ callee1 (x0, ++ svld3_vnum_u32 (pg, x0, -9), ++ svld2_vnum_u32 (pg, x0, -2), ++ svld3_vnum_u32 (pg, x0, 0), ++ svld4_vnum_u32 (pg, x0, 8), ++ svld1_vnum_u32 (pg, x0, 5), ++ svptrue_pat_b8 (SV_VL1), ++ svptrue_pat_b16 (SV_VL2), ++ svptrue_pat_b32 (SV_VL3)); ++} ++ ++/* { dg-final { scan-assembler {\tld3w\t{z0\.s - z2\.s}, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld2w\t{z3\.s - z4\.s}, p[0-7]/z, \[x0, #-2, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld3w\t{z5\.s - z7\.s}, p[0-7]/z, \[x0\]\n} } } */ ++/* { dg-final { scan-assembler {\tld4w\t{(z[0-9]+\.s) - z[0-9]+\.s}.*\tst1w\t\1, p[0-7], \[x1\]\n} } } */ ++/* { dg-final { scan-assembler {\tld4w\t{z[0-9]+\.s - (z[0-9]+\.s)}.*\tst1w\t\1, p[0-7], \[x1, #3, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld1w\t(z[0-9]+\.s), p[0-7]/z, \[x0, #5, mul vl\]\n.*\tst1w\t\1, p[0-7], \[x2\]\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_u64.c +new file mode 100644 +index 000000000..948f426f9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_u64.c +@@ -0,0 +1,71 @@ ++/* { dg-do compile { target lp64 } } */ ++/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** callee1: ++** ptrue p3\.b, all ++** ... ++** ld1d (z[0-9]+\.d), p3/z, \[x1, #3, mul vl\] ++** ... ++** st4d {z[0-9]+\.d - \1}, p0, \[x0\] ++** st2d {z3\.d - z4\.d}, p1, \[x0\] ++** st3d {z5\.d - z7\.d}, p2, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee1 (void *x0, svuint64x3_t z0, svuint64x2_t z3, svuint64x3_t z5, ++ svuint64x4_t stack1, svuint64_t stack2, svbool_t p0, ++ svbool_t p1, svbool_t p2) ++{ ++ svst4_u64 (p0, x0, stack1); ++ svst2_u64 (p1, x0, z3); ++ svst3_u64 (p2, x0, z5); ++} ++ ++/* ++** callee2: ++** ptrue p3\.b, all ++** ld1d (z[0-9]+\.d), p3/z, \[x2\] ++** st1d \1, p0, \[x0\] ++** st2d {z3\.d - z4\.d}, p1, \[x0\] ++** st3d {z0\.d - z2\.d}, p2, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee2 (void *x0, svuint64x3_t z0, svuint64x2_t z3, svuint64x3_t z5, ++ svuint64x4_t stack1, svuint64_t stack2, svbool_t p0, ++ svbool_t p1, svbool_t p2) ++{ ++ svst1_u64 (p0, x0, stack2); ++ svst2_u64 (p1, x0, z3); ++ svst3_u64 (p2, x0, z0); ++} ++ ++void __attribute__((noipa)) ++caller (void *x0) ++{ ++ svbool_t pg; ++ pg = svptrue_b8 (); ++ callee1 (x0, ++ svld3_vnum_u64 (pg, x0, -9), ++ svld2_vnum_u64 (pg, x0, -2), ++ svld3_vnum_u64 (pg, x0, 0), ++ svld4_vnum_u64 (pg, x0, 8), ++ svld1_vnum_u64 (pg, x0, 5), ++ svptrue_pat_b8 (SV_VL1), ++ svptrue_pat_b16 (SV_VL2), ++ svptrue_pat_b32 (SV_VL3)); ++} ++ ++/* { dg-final { scan-assembler {\tld3d\t{z0\.d - z2\.d}, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld2d\t{z3\.d - z4\.d}, p[0-7]/z, \[x0, #-2, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld3d\t{z5\.d - z7\.d}, p[0-7]/z, \[x0\]\n} } } */ ++/* { dg-final { scan-assembler {\tld4d\t{(z[0-9]+\.d) - z[0-9]+\.d}.*\tst1d\t\1, p[0-7], \[x1\]\n} } } */ ++/* { dg-final { scan-assembler {\tld4d\t{z[0-9]+\.d - (z[0-9]+\.d)}.*\tst1d\t\1, p[0-7], \[x1, #3, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld1d\t(z[0-9]+\.d), p[0-7]/z, \[x0, #5, mul vl\]\n.*\tst1d\t\1, p[0-7], \[x2\]\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_u8.c +new file mode 100644 +index 000000000..8049ec078 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_u8.c +@@ -0,0 +1,71 @@ ++/* { dg-do compile { target lp64 } } */ ++/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** callee1: ++** ptrue p3\.b, all ++** ... ++** ld1b (z[0-9]+\.b), p3/z, \[x1, #3, mul vl\] ++** ... ++** st4b {z[0-9]+\.b - \1}, p0, \[x0\] ++** st2b {z3\.b - z4\.b}, p1, \[x0\] ++** st3b {z5\.b - z7\.b}, p2, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee1 (void *x0, svuint8x3_t z0, svuint8x2_t z3, svuint8x3_t z5, ++ svuint8x4_t stack1, svuint8_t stack2, svbool_t p0, ++ svbool_t p1, svbool_t p2) ++{ ++ svst4_u8 (p0, x0, stack1); ++ svst2_u8 (p1, x0, z3); ++ svst3_u8 (p2, x0, z5); ++} ++ ++/* ++** callee2: ++** ptrue p3\.b, all ++** ld1b (z[0-9]+\.b), p3/z, \[x2\] ++** st1b \1, p0, \[x0\] ++** st2b {z3\.b - z4\.b}, p1, \[x0\] ++** st3b {z0\.b - z2\.b}, p2, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee2 (void *x0, svuint8x3_t z0, svuint8x2_t z3, svuint8x3_t z5, ++ svuint8x4_t stack1, svuint8_t stack2, svbool_t p0, ++ svbool_t p1, svbool_t p2) ++{ ++ svst1_u8 (p0, x0, stack2); ++ svst2_u8 (p1, x0, z3); ++ svst3_u8 (p2, x0, z0); ++} ++ ++void __attribute__((noipa)) ++caller (void *x0) ++{ ++ svbool_t pg; ++ pg = svptrue_b8 (); ++ callee1 (x0, ++ svld3_vnum_u8 (pg, x0, -9), ++ svld2_vnum_u8 (pg, x0, -2), ++ svld3_vnum_u8 (pg, x0, 0), ++ svld4_vnum_u8 (pg, x0, 8), ++ svld1_vnum_u8 (pg, x0, 5), ++ svptrue_pat_b8 (SV_VL1), ++ svptrue_pat_b16 (SV_VL2), ++ svptrue_pat_b32 (SV_VL3)); ++} ++ ++/* { dg-final { scan-assembler {\tld3b\t{z0\.b - z2\.b}, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld2b\t{z3\.b - z4\.b}, p[0-7]/z, \[x0, #-2, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld3b\t{z5\.b - z7\.b}, p[0-7]/z, \[x0\]\n} } } */ ++/* { dg-final { scan-assembler {\tld4b\t{(z[0-9]+\.b) - z[0-9]+\.b}.*\tst1b\t\1, p[0-7], \[x1\]\n} } } */ ++/* { dg-final { scan-assembler {\tld4b\t{z[0-9]+\.b - (z[0-9]+\.b)}.*\tst1b\t\1, p[0-7], \[x1, #3, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+\.b), p[0-7]/z, \[x0, #5, mul vl\]\n.*\tst1b\t\1, p[0-7], \[x2\]\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_bf16.c +new file mode 100644 +index 000000000..3dc9e42ed +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_bf16.c +@@ -0,0 +1,70 @@ ++/* { dg-do compile { target lp64 } } */ ++/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** callee1: ++** ... ++** ldr (z[0-9]+), \[x1, #3, mul vl\] ++** ... ++** st4h {z[0-9]+\.h - \1\.h}, p0, \[x0\] ++** st2h {z3\.h - z4\.h}, p1, \[x0\] ++** st3h {z5\.h - z7\.h}, p2, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee1 (void *x0, svbfloat16x3_t z0, svbfloat16x2_t z3, svbfloat16x3_t z5, ++ svbfloat16x4_t stack1, svbfloat16_t stack2, svbool_t p0, ++ svbool_t p1, svbool_t p2) ++{ ++ svst4_bf16 (p0, x0, stack1); ++ svst2_bf16 (p1, x0, z3); ++ svst3_bf16 (p2, x0, z5); ++} ++ ++/* ++** callee2: ++** ptrue p3\.b, all ++** ld1h (z[0-9]+\.h), p3/z, \[x2\] ++** st1h \1, p0, \[x0\] ++** st2h {z3\.h - z4\.h}, p1, \[x0\] ++** st3h {z0\.h - z2\.h}, p2, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee2 (void *x0, svbfloat16x3_t z0, svbfloat16x2_t z3, svbfloat16x3_t z5, ++ svbfloat16x4_t stack1, svbfloat16_t stack2, svbool_t p0, ++ svbool_t p1, svbool_t p2) ++{ ++ svst1_bf16 (p0, x0, stack2); ++ svst2_bf16 (p1, x0, z3); ++ svst3_bf16 (p2, x0, z0); ++} ++ ++void __attribute__((noipa)) ++caller (void *x0) ++{ ++ svbool_t pg; ++ pg = svptrue_b8 (); ++ callee1 (x0, ++ svld3_vnum_bf16 (pg, x0, -9), ++ svld2_vnum_bf16 (pg, x0, -2), ++ svld3_vnum_bf16 (pg, x0, 0), ++ svld4_vnum_bf16 (pg, x0, 8), ++ svld1_vnum_bf16 (pg, x0, 5), ++ svptrue_pat_b8 (SV_VL1), ++ svptrue_pat_b16 (SV_VL2), ++ svptrue_pat_b32 (SV_VL3)); ++} ++ ++/* { dg-final { scan-assembler {\tld3h\t{z0\.h - z2\.h}, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld2h\t{z3\.h - z4\.h}, p[0-7]/z, \[x0, #-2, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld3h\t{z5\.h - z7\.h}, p[0-7]/z, \[x0\]\n} } } */ ++/* { dg-final { scan-assembler {\tld4h\t{(z[0-9]+)\.h - z[0-9]+\.h}.*\tstr\t\1, \[x1\]\n} } } */ ++/* { dg-final { scan-assembler {\tld4h\t{z[0-9]+\.h - (z[0-9]+)\.h}.*\tstr\t\1, \[x1, #3, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+\.h), p[0-7]/z, \[x0, #5, mul vl\]\n.*\tst1h\t\1, p[0-7], \[x2\]\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_f16.c +new file mode 100644 +index 000000000..80a2e3aae +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_f16.c +@@ -0,0 +1,70 @@ ++/* { dg-do compile { target lp64 } } */ ++/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** callee1: ++** ... ++** ldr (z[0-9]+), \[x1, #3, mul vl\] ++** ... ++** st4h {z[0-9]+\.h - \1\.h}, p0, \[x0\] ++** st2h {z3\.h - z4\.h}, p1, \[x0\] ++** st3h {z5\.h - z7\.h}, p2, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee1 (void *x0, svfloat16x3_t z0, svfloat16x2_t z3, svfloat16x3_t z5, ++ svfloat16x4_t stack1, svfloat16_t stack2, svbool_t p0, ++ svbool_t p1, svbool_t p2) ++{ ++ svst4_f16 (p0, x0, stack1); ++ svst2_f16 (p1, x0, z3); ++ svst3_f16 (p2, x0, z5); ++} ++ ++/* ++** callee2: ++** ptrue p3\.b, all ++** ld1h (z[0-9]+\.h), p3/z, \[x2\] ++** st1h \1, p0, \[x0\] ++** st2h {z3\.h - z4\.h}, p1, \[x0\] ++** st3h {z0\.h - z2\.h}, p2, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee2 (void *x0, svfloat16x3_t z0, svfloat16x2_t z3, svfloat16x3_t z5, ++ svfloat16x4_t stack1, svfloat16_t stack2, svbool_t p0, ++ svbool_t p1, svbool_t p2) ++{ ++ svst1_f16 (p0, x0, stack2); ++ svst2_f16 (p1, x0, z3); ++ svst3_f16 (p2, x0, z0); ++} ++ ++void __attribute__((noipa)) ++caller (void *x0) ++{ ++ svbool_t pg; ++ pg = svptrue_b8 (); ++ callee1 (x0, ++ svld3_vnum_f16 (pg, x0, -9), ++ svld2_vnum_f16 (pg, x0, -2), ++ svld3_vnum_f16 (pg, x0, 0), ++ svld4_vnum_f16 (pg, x0, 8), ++ svld1_vnum_f16 (pg, x0, 5), ++ svptrue_pat_b8 (SV_VL1), ++ svptrue_pat_b16 (SV_VL2), ++ svptrue_pat_b32 (SV_VL3)); ++} ++ ++/* { dg-final { scan-assembler {\tld3h\t{z0\.h - z2\.h}, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld2h\t{z3\.h - z4\.h}, p[0-7]/z, \[x0, #-2, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld3h\t{z5\.h - z7\.h}, p[0-7]/z, \[x0\]\n} } } */ ++/* { dg-final { scan-assembler {\tld4h\t{(z[0-9]+)\.h - z[0-9]+\.h}.*\tstr\t\1, \[x1\]\n} } } */ ++/* { dg-final { scan-assembler {\tld4h\t{z[0-9]+\.h - (z[0-9]+)\.h}.*\tstr\t\1, \[x1, #3, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+\.h), p[0-7]/z, \[x0, #5, mul vl\]\n.*\tst1h\t\1, p[0-7], \[x2\]\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_f32.c +new file mode 100644 +index 000000000..40ff42128 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_f32.c +@@ -0,0 +1,70 @@ ++/* { dg-do compile { target lp64 } } */ ++/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** callee1: ++** ... ++** ldr (z[0-9]+), \[x1, #3, mul vl\] ++** ... ++** st4w {z[0-9]+\.s - \1\.s}, p0, \[x0\] ++** st2w {z3\.s - z4\.s}, p1, \[x0\] ++** st3w {z5\.s - z7\.s}, p2, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee1 (void *x0, svfloat32x3_t z0, svfloat32x2_t z3, svfloat32x3_t z5, ++ svfloat32x4_t stack1, svfloat32_t stack2, svbool_t p0, ++ svbool_t p1, svbool_t p2) ++{ ++ svst4_f32 (p0, x0, stack1); ++ svst2_f32 (p1, x0, z3); ++ svst3_f32 (p2, x0, z5); ++} ++ ++/* ++** callee2: ++** ptrue p3\.b, all ++** ld1w (z[0-9]+\.s), p3/z, \[x2\] ++** st1w \1, p0, \[x0\] ++** st2w {z3\.s - z4\.s}, p1, \[x0\] ++** st3w {z0\.s - z2\.s}, p2, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee2 (void *x0, svfloat32x3_t z0, svfloat32x2_t z3, svfloat32x3_t z5, ++ svfloat32x4_t stack1, svfloat32_t stack2, svbool_t p0, ++ svbool_t p1, svbool_t p2) ++{ ++ svst1_f32 (p0, x0, stack2); ++ svst2_f32 (p1, x0, z3); ++ svst3_f32 (p2, x0, z0); ++} ++ ++void __attribute__((noipa)) ++caller (void *x0) ++{ ++ svbool_t pg; ++ pg = svptrue_b8 (); ++ callee1 (x0, ++ svld3_vnum_f32 (pg, x0, -9), ++ svld2_vnum_f32 (pg, x0, -2), ++ svld3_vnum_f32 (pg, x0, 0), ++ svld4_vnum_f32 (pg, x0, 8), ++ svld1_vnum_f32 (pg, x0, 5), ++ svptrue_pat_b8 (SV_VL1), ++ svptrue_pat_b16 (SV_VL2), ++ svptrue_pat_b32 (SV_VL3)); ++} ++ ++/* { dg-final { scan-assembler {\tld3w\t{z0\.s - z2\.s}, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld2w\t{z3\.s - z4\.s}, p[0-7]/z, \[x0, #-2, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld3w\t{z5\.s - z7\.s}, p[0-7]/z, \[x0\]\n} } } */ ++/* { dg-final { scan-assembler {\tld4w\t{(z[0-9]+)\.s - z[0-9]+\.s}.*\tstr\t\1, \[x1\]\n} } } */ ++/* { dg-final { scan-assembler {\tld4w\t{z[0-9]+\.s - (z[0-9]+)\.s}.*\tstr\t\1, \[x1, #3, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld1w\t(z[0-9]+\.s), p[0-7]/z, \[x0, #5, mul vl\]\n.*\tst1w\t\1, p[0-7], \[x2\]\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_f64.c +new file mode 100644 +index 000000000..ee219ccdc +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_f64.c +@@ -0,0 +1,70 @@ ++/* { dg-do compile { target lp64 } } */ ++/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** callee1: ++** ... ++** ldr (z[0-9]+), \[x1, #3, mul vl\] ++** ... ++** st4d {z[0-9]+\.d - \1\.d}, p0, \[x0\] ++** st2d {z3\.d - z4\.d}, p1, \[x0\] ++** st3d {z5\.d - z7\.d}, p2, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee1 (void *x0, svfloat64x3_t z0, svfloat64x2_t z3, svfloat64x3_t z5, ++ svfloat64x4_t stack1, svfloat64_t stack2, svbool_t p0, ++ svbool_t p1, svbool_t p2) ++{ ++ svst4_f64 (p0, x0, stack1); ++ svst2_f64 (p1, x0, z3); ++ svst3_f64 (p2, x0, z5); ++} ++ ++/* ++** callee2: ++** ptrue p3\.b, all ++** ld1d (z[0-9]+\.d), p3/z, \[x2\] ++** st1d \1, p0, \[x0\] ++** st2d {z3\.d - z4\.d}, p1, \[x0\] ++** st3d {z0\.d - z2\.d}, p2, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee2 (void *x0, svfloat64x3_t z0, svfloat64x2_t z3, svfloat64x3_t z5, ++ svfloat64x4_t stack1, svfloat64_t stack2, svbool_t p0, ++ svbool_t p1, svbool_t p2) ++{ ++ svst1_f64 (p0, x0, stack2); ++ svst2_f64 (p1, x0, z3); ++ svst3_f64 (p2, x0, z0); ++} ++ ++void __attribute__((noipa)) ++caller (void *x0) ++{ ++ svbool_t pg; ++ pg = svptrue_b8 (); ++ callee1 (x0, ++ svld3_vnum_f64 (pg, x0, -9), ++ svld2_vnum_f64 (pg, x0, -2), ++ svld3_vnum_f64 (pg, x0, 0), ++ svld4_vnum_f64 (pg, x0, 8), ++ svld1_vnum_f64 (pg, x0, 5), ++ svptrue_pat_b8 (SV_VL1), ++ svptrue_pat_b16 (SV_VL2), ++ svptrue_pat_b32 (SV_VL3)); ++} ++ ++/* { dg-final { scan-assembler {\tld3d\t{z0\.d - z2\.d}, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld2d\t{z3\.d - z4\.d}, p[0-7]/z, \[x0, #-2, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld3d\t{z5\.d - z7\.d}, p[0-7]/z, \[x0\]\n} } } */ ++/* { dg-final { scan-assembler {\tld4d\t{(z[0-9]+)\.d - z[0-9]+\.d}.*\tstr\t\1, \[x1\]\n} } } */ ++/* { dg-final { scan-assembler {\tld4d\t{z[0-9]+\.d - (z[0-9]+)\.d}.*\tstr\t\1, \[x1, #3, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld1d\t(z[0-9]+\.d), p[0-7]/z, \[x0, #5, mul vl\]\n.*\tst1d\t\1, p[0-7], \[x2\]\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_s16.c +new file mode 100644 +index 000000000..ade75cb34 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_s16.c +@@ -0,0 +1,70 @@ ++/* { dg-do compile { target lp64 } } */ ++/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** callee1: ++** ... ++** ldr (z[0-9]+), \[x1, #3, mul vl\] ++** ... ++** st4h {z[0-9]+\.h - \1\.h}, p0, \[x0\] ++** st2h {z3\.h - z4\.h}, p1, \[x0\] ++** st3h {z5\.h - z7\.h}, p2, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee1 (void *x0, svint16x3_t z0, svint16x2_t z3, svint16x3_t z5, ++ svint16x4_t stack1, svint16_t stack2, svbool_t p0, ++ svbool_t p1, svbool_t p2) ++{ ++ svst4_s16 (p0, x0, stack1); ++ svst2_s16 (p1, x0, z3); ++ svst3_s16 (p2, x0, z5); ++} ++ ++/* ++** callee2: ++** ptrue p3\.b, all ++** ld1h (z[0-9]+\.h), p3/z, \[x2\] ++** st1h \1, p0, \[x0\] ++** st2h {z3\.h - z4\.h}, p1, \[x0\] ++** st3h {z0\.h - z2\.h}, p2, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee2 (void *x0, svint16x3_t z0, svint16x2_t z3, svint16x3_t z5, ++ svint16x4_t stack1, svint16_t stack2, svbool_t p0, ++ svbool_t p1, svbool_t p2) ++{ ++ svst1_s16 (p0, x0, stack2); ++ svst2_s16 (p1, x0, z3); ++ svst3_s16 (p2, x0, z0); ++} ++ ++void __attribute__((noipa)) ++caller (void *x0) ++{ ++ svbool_t pg; ++ pg = svptrue_b8 (); ++ callee1 (x0, ++ svld3_vnum_s16 (pg, x0, -9), ++ svld2_vnum_s16 (pg, x0, -2), ++ svld3_vnum_s16 (pg, x0, 0), ++ svld4_vnum_s16 (pg, x0, 8), ++ svld1_vnum_s16 (pg, x0, 5), ++ svptrue_pat_b8 (SV_VL1), ++ svptrue_pat_b16 (SV_VL2), ++ svptrue_pat_b32 (SV_VL3)); ++} ++ ++/* { dg-final { scan-assembler {\tld3h\t{z0\.h - z2\.h}, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld2h\t{z3\.h - z4\.h}, p[0-7]/z, \[x0, #-2, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld3h\t{z5\.h - z7\.h}, p[0-7]/z, \[x0\]\n} } } */ ++/* { dg-final { scan-assembler {\tld4h\t{(z[0-9]+)\.h - z[0-9]+\.h}.*\tstr\t\1, \[x1\]\n} } } */ ++/* { dg-final { scan-assembler {\tld4h\t{z[0-9]+\.h - (z[0-9]+)\.h}.*\tstr\t\1, \[x1, #3, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+\.h), p[0-7]/z, \[x0, #5, mul vl\]\n.*\tst1h\t\1, p[0-7], \[x2\]\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_s32.c +new file mode 100644 +index 000000000..a6c06e235 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_s32.c +@@ -0,0 +1,70 @@ ++/* { dg-do compile { target lp64 } } */ ++/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** callee1: ++** ... ++** ldr (z[0-9]+), \[x1, #3, mul vl\] ++** ... ++** st4w {z[0-9]+\.s - \1\.s}, p0, \[x0\] ++** st2w {z3\.s - z4\.s}, p1, \[x0\] ++** st3w {z5\.s - z7\.s}, p2, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee1 (void *x0, svint32x3_t z0, svint32x2_t z3, svint32x3_t z5, ++ svint32x4_t stack1, svint32_t stack2, svbool_t p0, ++ svbool_t p1, svbool_t p2) ++{ ++ svst4_s32 (p0, x0, stack1); ++ svst2_s32 (p1, x0, z3); ++ svst3_s32 (p2, x0, z5); ++} ++ ++/* ++** callee2: ++** ptrue p3\.b, all ++** ld1w (z[0-9]+\.s), p3/z, \[x2\] ++** st1w \1, p0, \[x0\] ++** st2w {z3\.s - z4\.s}, p1, \[x0\] ++** st3w {z0\.s - z2\.s}, p2, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee2 (void *x0, svint32x3_t z0, svint32x2_t z3, svint32x3_t z5, ++ svint32x4_t stack1, svint32_t stack2, svbool_t p0, ++ svbool_t p1, svbool_t p2) ++{ ++ svst1_s32 (p0, x0, stack2); ++ svst2_s32 (p1, x0, z3); ++ svst3_s32 (p2, x0, z0); ++} ++ ++void __attribute__((noipa)) ++caller (void *x0) ++{ ++ svbool_t pg; ++ pg = svptrue_b8 (); ++ callee1 (x0, ++ svld3_vnum_s32 (pg, x0, -9), ++ svld2_vnum_s32 (pg, x0, -2), ++ svld3_vnum_s32 (pg, x0, 0), ++ svld4_vnum_s32 (pg, x0, 8), ++ svld1_vnum_s32 (pg, x0, 5), ++ svptrue_pat_b8 (SV_VL1), ++ svptrue_pat_b16 (SV_VL2), ++ svptrue_pat_b32 (SV_VL3)); ++} ++ ++/* { dg-final { scan-assembler {\tld3w\t{z0\.s - z2\.s}, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld2w\t{z3\.s - z4\.s}, p[0-7]/z, \[x0, #-2, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld3w\t{z5\.s - z7\.s}, p[0-7]/z, \[x0\]\n} } } */ ++/* { dg-final { scan-assembler {\tld4w\t{(z[0-9]+)\.s - z[0-9]+\.s}.*\tstr\t\1, \[x1\]\n} } } */ ++/* { dg-final { scan-assembler {\tld4w\t{z[0-9]+\.s - (z[0-9]+)\.s}.*\tstr\t\1, \[x1, #3, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld1w\t(z[0-9]+\.s), p[0-7]/z, \[x0, #5, mul vl\]\n.*\tst1w\t\1, p[0-7], \[x2\]\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_s64.c +new file mode 100644 +index 000000000..219c71d82 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_s64.c +@@ -0,0 +1,70 @@ ++/* { dg-do compile { target lp64 } } */ ++/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** callee1: ++** ... ++** ldr (z[0-9]+), \[x1, #3, mul vl\] ++** ... ++** st4d {z[0-9]+\.d - \1\.d}, p0, \[x0\] ++** st2d {z3\.d - z4\.d}, p1, \[x0\] ++** st3d {z5\.d - z7\.d}, p2, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee1 (void *x0, svint64x3_t z0, svint64x2_t z3, svint64x3_t z5, ++ svint64x4_t stack1, svint64_t stack2, svbool_t p0, ++ svbool_t p1, svbool_t p2) ++{ ++ svst4_s64 (p0, x0, stack1); ++ svst2_s64 (p1, x0, z3); ++ svst3_s64 (p2, x0, z5); ++} ++ ++/* ++** callee2: ++** ptrue p3\.b, all ++** ld1d (z[0-9]+\.d), p3/z, \[x2\] ++** st1d \1, p0, \[x0\] ++** st2d {z3\.d - z4\.d}, p1, \[x0\] ++** st3d {z0\.d - z2\.d}, p2, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee2 (void *x0, svint64x3_t z0, svint64x2_t z3, svint64x3_t z5, ++ svint64x4_t stack1, svint64_t stack2, svbool_t p0, ++ svbool_t p1, svbool_t p2) ++{ ++ svst1_s64 (p0, x0, stack2); ++ svst2_s64 (p1, x0, z3); ++ svst3_s64 (p2, x0, z0); ++} ++ ++void __attribute__((noipa)) ++caller (void *x0) ++{ ++ svbool_t pg; ++ pg = svptrue_b8 (); ++ callee1 (x0, ++ svld3_vnum_s64 (pg, x0, -9), ++ svld2_vnum_s64 (pg, x0, -2), ++ svld3_vnum_s64 (pg, x0, 0), ++ svld4_vnum_s64 (pg, x0, 8), ++ svld1_vnum_s64 (pg, x0, 5), ++ svptrue_pat_b8 (SV_VL1), ++ svptrue_pat_b16 (SV_VL2), ++ svptrue_pat_b32 (SV_VL3)); ++} ++ ++/* { dg-final { scan-assembler {\tld3d\t{z0\.d - z2\.d}, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld2d\t{z3\.d - z4\.d}, p[0-7]/z, \[x0, #-2, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld3d\t{z5\.d - z7\.d}, p[0-7]/z, \[x0\]\n} } } */ ++/* { dg-final { scan-assembler {\tld4d\t{(z[0-9]+)\.d - z[0-9]+\.d}.*\tstr\t\1, \[x1\]\n} } } */ ++/* { dg-final { scan-assembler {\tld4d\t{z[0-9]+\.d - (z[0-9]+)\.d}.*\tstr\t\1, \[x1, #3, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld1d\t(z[0-9]+\.d), p[0-7]/z, \[x0, #5, mul vl\]\n.*\tst1d\t\1, p[0-7], \[x2\]\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_s8.c +new file mode 100644 +index 000000000..c48d391ca +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_s8.c +@@ -0,0 +1,70 @@ ++/* { dg-do compile { target lp64 } } */ ++/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** callee1: ++** ... ++** ldr (z[0-9]+), \[x1, #3, mul vl\] ++** ... ++** st4b {z[0-9]+\.b - \1\.b}, p0, \[x0\] ++** st2b {z3\.b - z4\.b}, p1, \[x0\] ++** st3b {z5\.b - z7\.b}, p2, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee1 (void *x0, svint8x3_t z0, svint8x2_t z3, svint8x3_t z5, ++ svint8x4_t stack1, svint8_t stack2, svbool_t p0, ++ svbool_t p1, svbool_t p2) ++{ ++ svst4_s8 (p0, x0, stack1); ++ svst2_s8 (p1, x0, z3); ++ svst3_s8 (p2, x0, z5); ++} ++ ++/* ++** callee2: ++** ptrue p3\.b, all ++** ld1b (z[0-9]+\.b), p3/z, \[x2\] ++** st1b \1, p0, \[x0\] ++** st2b {z3\.b - z4\.b}, p1, \[x0\] ++** st3b {z0\.b - z2\.b}, p2, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee2 (void *x0, svint8x3_t z0, svint8x2_t z3, svint8x3_t z5, ++ svint8x4_t stack1, svint8_t stack2, svbool_t p0, ++ svbool_t p1, svbool_t p2) ++{ ++ svst1_s8 (p0, x0, stack2); ++ svst2_s8 (p1, x0, z3); ++ svst3_s8 (p2, x0, z0); ++} ++ ++void __attribute__((noipa)) ++caller (void *x0) ++{ ++ svbool_t pg; ++ pg = svptrue_b8 (); ++ callee1 (x0, ++ svld3_vnum_s8 (pg, x0, -9), ++ svld2_vnum_s8 (pg, x0, -2), ++ svld3_vnum_s8 (pg, x0, 0), ++ svld4_vnum_s8 (pg, x0, 8), ++ svld1_vnum_s8 (pg, x0, 5), ++ svptrue_pat_b8 (SV_VL1), ++ svptrue_pat_b16 (SV_VL2), ++ svptrue_pat_b32 (SV_VL3)); ++} ++ ++/* { dg-final { scan-assembler {\tld3b\t{z0\.b - z2\.b}, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld2b\t{z3\.b - z4\.b}, p[0-7]/z, \[x0, #-2, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld3b\t{z5\.b - z7\.b}, p[0-7]/z, \[x0\]\n} } } */ ++/* { dg-final { scan-assembler {\tld4b\t{(z[0-9]+)\.b - z[0-9]+\.b}.*\tstr\t\1, \[x1\]\n} } } */ ++/* { dg-final { scan-assembler {\tld4b\t{z[0-9]+\.b - (z[0-9]+)\.b}.*\tstr\t\1, \[x1, #3, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+\.b), p[0-7]/z, \[x0, #5, mul vl\]\n.*\tst1b\t\1, p[0-7], \[x2\]\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_u16.c +new file mode 100644 +index 000000000..6c635fd94 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_u16.c +@@ -0,0 +1,70 @@ ++/* { dg-do compile { target lp64 } } */ ++/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** callee1: ++** ... ++** ldr (z[0-9]+), \[x1, #3, mul vl\] ++** ... ++** st4h {z[0-9]+\.h - \1\.h}, p0, \[x0\] ++** st2h {z3\.h - z4\.h}, p1, \[x0\] ++** st3h {z5\.h - z7\.h}, p2, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee1 (void *x0, svuint16x3_t z0, svuint16x2_t z3, svuint16x3_t z5, ++ svuint16x4_t stack1, svuint16_t stack2, svbool_t p0, ++ svbool_t p1, svbool_t p2) ++{ ++ svst4_u16 (p0, x0, stack1); ++ svst2_u16 (p1, x0, z3); ++ svst3_u16 (p2, x0, z5); ++} ++ ++/* ++** callee2: ++** ptrue p3\.b, all ++** ld1h (z[0-9]+\.h), p3/z, \[x2\] ++** st1h \1, p0, \[x0\] ++** st2h {z3\.h - z4\.h}, p1, \[x0\] ++** st3h {z0\.h - z2\.h}, p2, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee2 (void *x0, svuint16x3_t z0, svuint16x2_t z3, svuint16x3_t z5, ++ svuint16x4_t stack1, svuint16_t stack2, svbool_t p0, ++ svbool_t p1, svbool_t p2) ++{ ++ svst1_u16 (p0, x0, stack2); ++ svst2_u16 (p1, x0, z3); ++ svst3_u16 (p2, x0, z0); ++} ++ ++void __attribute__((noipa)) ++caller (void *x0) ++{ ++ svbool_t pg; ++ pg = svptrue_b8 (); ++ callee1 (x0, ++ svld3_vnum_u16 (pg, x0, -9), ++ svld2_vnum_u16 (pg, x0, -2), ++ svld3_vnum_u16 (pg, x0, 0), ++ svld4_vnum_u16 (pg, x0, 8), ++ svld1_vnum_u16 (pg, x0, 5), ++ svptrue_pat_b8 (SV_VL1), ++ svptrue_pat_b16 (SV_VL2), ++ svptrue_pat_b32 (SV_VL3)); ++} ++ ++/* { dg-final { scan-assembler {\tld3h\t{z0\.h - z2\.h}, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld2h\t{z3\.h - z4\.h}, p[0-7]/z, \[x0, #-2, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld3h\t{z5\.h - z7\.h}, p[0-7]/z, \[x0\]\n} } } */ ++/* { dg-final { scan-assembler {\tld4h\t{(z[0-9]+)\.h - z[0-9]+\.h}.*\tstr\t\1, \[x1\]\n} } } */ ++/* { dg-final { scan-assembler {\tld4h\t{z[0-9]+\.h - (z[0-9]+)\.h}.*\tstr\t\1, \[x1, #3, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+\.h), p[0-7]/z, \[x0, #5, mul vl\]\n.*\tst1h\t\1, p[0-7], \[x2\]\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_u32.c +new file mode 100644 +index 000000000..c31d45426 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_u32.c +@@ -0,0 +1,70 @@ ++/* { dg-do compile { target lp64 } } */ ++/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** callee1: ++** ... ++** ldr (z[0-9]+), \[x1, #3, mul vl\] ++** ... ++** st4w {z[0-9]+\.s - \1\.s}, p0, \[x0\] ++** st2w {z3\.s - z4\.s}, p1, \[x0\] ++** st3w {z5\.s - z7\.s}, p2, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee1 (void *x0, svuint32x3_t z0, svuint32x2_t z3, svuint32x3_t z5, ++ svuint32x4_t stack1, svuint32_t stack2, svbool_t p0, ++ svbool_t p1, svbool_t p2) ++{ ++ svst4_u32 (p0, x0, stack1); ++ svst2_u32 (p1, x0, z3); ++ svst3_u32 (p2, x0, z5); ++} ++ ++/* ++** callee2: ++** ptrue p3\.b, all ++** ld1w (z[0-9]+\.s), p3/z, \[x2\] ++** st1w \1, p0, \[x0\] ++** st2w {z3\.s - z4\.s}, p1, \[x0\] ++** st3w {z0\.s - z2\.s}, p2, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee2 (void *x0, svuint32x3_t z0, svuint32x2_t z3, svuint32x3_t z5, ++ svuint32x4_t stack1, svuint32_t stack2, svbool_t p0, ++ svbool_t p1, svbool_t p2) ++{ ++ svst1_u32 (p0, x0, stack2); ++ svst2_u32 (p1, x0, z3); ++ svst3_u32 (p2, x0, z0); ++} ++ ++void __attribute__((noipa)) ++caller (void *x0) ++{ ++ svbool_t pg; ++ pg = svptrue_b8 (); ++ callee1 (x0, ++ svld3_vnum_u32 (pg, x0, -9), ++ svld2_vnum_u32 (pg, x0, -2), ++ svld3_vnum_u32 (pg, x0, 0), ++ svld4_vnum_u32 (pg, x0, 8), ++ svld1_vnum_u32 (pg, x0, 5), ++ svptrue_pat_b8 (SV_VL1), ++ svptrue_pat_b16 (SV_VL2), ++ svptrue_pat_b32 (SV_VL3)); ++} ++ ++/* { dg-final { scan-assembler {\tld3w\t{z0\.s - z2\.s}, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld2w\t{z3\.s - z4\.s}, p[0-7]/z, \[x0, #-2, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld3w\t{z5\.s - z7\.s}, p[0-7]/z, \[x0\]\n} } } */ ++/* { dg-final { scan-assembler {\tld4w\t{(z[0-9]+)\.s - z[0-9]+\.s}.*\tstr\t\1, \[x1\]\n} } } */ ++/* { dg-final { scan-assembler {\tld4w\t{z[0-9]+\.s - (z[0-9]+)\.s}.*\tstr\t\1, \[x1, #3, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld1w\t(z[0-9]+\.s), p[0-7]/z, \[x0, #5, mul vl\]\n.*\tst1w\t\1, p[0-7], \[x2\]\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_u64.c +new file mode 100644 +index 000000000..969b258b7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_u64.c +@@ -0,0 +1,70 @@ ++/* { dg-do compile { target lp64 } } */ ++/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** callee1: ++** ... ++** ldr (z[0-9]+), \[x1, #3, mul vl\] ++** ... ++** st4d {z[0-9]+\.d - \1\.d}, p0, \[x0\] ++** st2d {z3\.d - z4\.d}, p1, \[x0\] ++** st3d {z5\.d - z7\.d}, p2, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee1 (void *x0, svuint64x3_t z0, svuint64x2_t z3, svuint64x3_t z5, ++ svuint64x4_t stack1, svuint64_t stack2, svbool_t p0, ++ svbool_t p1, svbool_t p2) ++{ ++ svst4_u64 (p0, x0, stack1); ++ svst2_u64 (p1, x0, z3); ++ svst3_u64 (p2, x0, z5); ++} ++ ++/* ++** callee2: ++** ptrue p3\.b, all ++** ld1d (z[0-9]+\.d), p3/z, \[x2\] ++** st1d \1, p0, \[x0\] ++** st2d {z3\.d - z4\.d}, p1, \[x0\] ++** st3d {z0\.d - z2\.d}, p2, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee2 (void *x0, svuint64x3_t z0, svuint64x2_t z3, svuint64x3_t z5, ++ svuint64x4_t stack1, svuint64_t stack2, svbool_t p0, ++ svbool_t p1, svbool_t p2) ++{ ++ svst1_u64 (p0, x0, stack2); ++ svst2_u64 (p1, x0, z3); ++ svst3_u64 (p2, x0, z0); ++} ++ ++void __attribute__((noipa)) ++caller (void *x0) ++{ ++ svbool_t pg; ++ pg = svptrue_b8 (); ++ callee1 (x0, ++ svld3_vnum_u64 (pg, x0, -9), ++ svld2_vnum_u64 (pg, x0, -2), ++ svld3_vnum_u64 (pg, x0, 0), ++ svld4_vnum_u64 (pg, x0, 8), ++ svld1_vnum_u64 (pg, x0, 5), ++ svptrue_pat_b8 (SV_VL1), ++ svptrue_pat_b16 (SV_VL2), ++ svptrue_pat_b32 (SV_VL3)); ++} ++ ++/* { dg-final { scan-assembler {\tld3d\t{z0\.d - z2\.d}, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld2d\t{z3\.d - z4\.d}, p[0-7]/z, \[x0, #-2, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld3d\t{z5\.d - z7\.d}, p[0-7]/z, \[x0\]\n} } } */ ++/* { dg-final { scan-assembler {\tld4d\t{(z[0-9]+)\.d - z[0-9]+\.d}.*\tstr\t\1, \[x1\]\n} } } */ ++/* { dg-final { scan-assembler {\tld4d\t{z[0-9]+\.d - (z[0-9]+)\.d}.*\tstr\t\1, \[x1, #3, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld1d\t(z[0-9]+\.d), p[0-7]/z, \[x0, #5, mul vl\]\n.*\tst1d\t\1, p[0-7], \[x2\]\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_u8.c +new file mode 100644 +index 000000000..d18604784 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_u8.c +@@ -0,0 +1,70 @@ ++/* { dg-do compile { target lp64 } } */ ++/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** callee1: ++** ... ++** ldr (z[0-9]+), \[x1, #3, mul vl\] ++** ... ++** st4b {z[0-9]+\.b - \1\.b}, p0, \[x0\] ++** st2b {z3\.b - z4\.b}, p1, \[x0\] ++** st3b {z5\.b - z7\.b}, p2, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee1 (void *x0, svuint8x3_t z0, svuint8x2_t z3, svuint8x3_t z5, ++ svuint8x4_t stack1, svuint8_t stack2, svbool_t p0, ++ svbool_t p1, svbool_t p2) ++{ ++ svst4_u8 (p0, x0, stack1); ++ svst2_u8 (p1, x0, z3); ++ svst3_u8 (p2, x0, z5); ++} ++ ++/* ++** callee2: ++** ptrue p3\.b, all ++** ld1b (z[0-9]+\.b), p3/z, \[x2\] ++** st1b \1, p0, \[x0\] ++** st2b {z3\.b - z4\.b}, p1, \[x0\] ++** st3b {z0\.b - z2\.b}, p2, \[x0\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee2 (void *x0, svuint8x3_t z0, svuint8x2_t z3, svuint8x3_t z5, ++ svuint8x4_t stack1, svuint8_t stack2, svbool_t p0, ++ svbool_t p1, svbool_t p2) ++{ ++ svst1_u8 (p0, x0, stack2); ++ svst2_u8 (p1, x0, z3); ++ svst3_u8 (p2, x0, z0); ++} ++ ++void __attribute__((noipa)) ++caller (void *x0) ++{ ++ svbool_t pg; ++ pg = svptrue_b8 (); ++ callee1 (x0, ++ svld3_vnum_u8 (pg, x0, -9), ++ svld2_vnum_u8 (pg, x0, -2), ++ svld3_vnum_u8 (pg, x0, 0), ++ svld4_vnum_u8 (pg, x0, 8), ++ svld1_vnum_u8 (pg, x0, 5), ++ svptrue_pat_b8 (SV_VL1), ++ svptrue_pat_b16 (SV_VL2), ++ svptrue_pat_b32 (SV_VL3)); ++} ++ ++/* { dg-final { scan-assembler {\tld3b\t{z0\.b - z2\.b}, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld2b\t{z3\.b - z4\.b}, p[0-7]/z, \[x0, #-2, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld3b\t{z5\.b - z7\.b}, p[0-7]/z, \[x0\]\n} } } */ ++/* { dg-final { scan-assembler {\tld4b\t{(z[0-9]+)\.b - z[0-9]+\.b}.*\tstr\t\1, \[x1\]\n} } } */ ++/* { dg-final { scan-assembler {\tld4b\t{z[0-9]+\.b - (z[0-9]+)\.b}.*\tstr\t\1, \[x1, #3, mul vl\]\n} } } */ ++/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+\.b), p[0-7]/z, \[x0, #5, mul vl\]\n.*\tst1b\t\1, p[0-7], \[x2\]\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_7.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_7.c +new file mode 100644 +index 000000000..15c022486 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_7.c +@@ -0,0 +1,30 @@ ++/* { dg-do compile { target lp64 } } */ ++/* { dg-options "-O -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#include ++ ++/* ++** callee: ++** ... ++** ldr (x[0-9]+), \[sp\] ++** ... ++** ld1b (z[0-9]+\.b), p[1-3]/z, \[\1\] ++** st1b \2, p0, \[x0, x7\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee (int8_t *x0, int x1, int x2, int x3, ++ int x4, int x5, svbool_t p0, int x6, int64_t x7, ++ svint32x4_t z0, svint32x4_t z4, svint8_t stack) ++{ ++ svst1 (p0, x0 + x7, stack); ++} ++ ++void __attribute__((noipa)) ++caller (int8_t *x0, svbool_t p0, svint32x4_t z0, svint32x4_t z4) ++{ ++ callee (x0, 1, 2, 3, 4, 5, p0, 6, 7, z0, z4, svdup_s8 (42)); ++} ++ ++/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.b), #42\n.*\tst1b\t\1, p[0-7], \[(x[0-9]+)\]\n.*\tstr\t\2, \[sp\]\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_8.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_8.c +new file mode 100644 +index 000000000..93ace26f5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_8.c +@@ -0,0 +1,28 @@ ++/* { dg-do compile { target lp64 } } */ ++/* { dg-options "-O -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#include ++ ++/* ++** callee: ++** ptrue (p[1-3])\.b, all ++** ld1b (z[0-9]+\.b), \1/z, \[x4\] ++** st1b \2, p0, \[x0, x7\] ++** ret ++*/ ++void __attribute__((noipa)) ++callee (int8_t *x0, int x1, int x2, int x3, ++ svint32x4_t z0, svint32x4_t z4, svint8_t stack, ++ int x5, svbool_t p0, int x6, int64_t x7) ++{ ++ svst1 (p0, x0 + x7, stack); ++} ++ ++void __attribute__((noipa)) ++caller (int8_t *x0, svbool_t p0, svint32x4_t z0, svint32x4_t z4) ++{ ++ callee (x0, 1, 2, 3, z0, z4, svdup_s8 (42), 5, p0, 6, 7); ++} ++ ++/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.b), #42\n.*\tst1b\t\1, p[0-7], \[x4\]\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_9.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_9.c +new file mode 100644 +index 000000000..ad9affadf +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_9.c +@@ -0,0 +1,49 @@ ++/* { dg-do compile { target lp64 } } */ ++/* { dg-options "-O -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#include ++ ++/* ++** callee: ++** ldr (x[0-9]+), \[sp, 8\] ++** ldr p0, \[\1\] ++** ret ++*/ ++svbool_t __attribute__((noipa)) ++callee (svint64x4_t z0, svint16x4_t z4, ++ svint64_t stack1, svint32_t stack2, ++ svint16_t stack3, svint8_t stack4, ++ svuint64_t stack5, svuint32_t stack6, ++ svuint16_t stack7, svuint8_t stack8, ++ svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3, ++ svbool_t stack9, svbool_t stack10) ++{ ++ return stack10; ++} ++ ++uint64_t __attribute__((noipa)) ++caller (int64_t *x0, int16_t *x1, svbool_t p0) ++{ ++ svbool_t res; ++ res = callee (svld4 (p0, x0), ++ svld4 (p0, x1), ++ svdup_s64 (1), ++ svdup_s32 (2), ++ svdup_s16 (3), ++ svdup_s8 (4), ++ svdup_u64 (5), ++ svdup_u32 (6), ++ svdup_u16 (7), ++ svdup_u8 (8), ++ svptrue_pat_b8 (SV_VL5), ++ svptrue_pat_b16 (SV_VL6), ++ svptrue_pat_b32 (SV_VL7), ++ svptrue_pat_b64 (SV_VL8), ++ svptrue_pat_b8 (SV_MUL3), ++ svptrue_pat_b16 (SV_MUL3)); ++ return svcntp_b8 (res, res); ++} ++ ++/* { dg-final { scan-assembler {\tptrue\t(p[0-9]+)\.b, mul3\n\tstr\t\1, \[(x[0-9]+)\]\n.*\tstr\t\2, \[sp\]\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\t(p[0-9]+)\.h, mul3\n\tstr\t\1, \[(x[0-9]+)\]\n.*\tstr\t\2, \[sp, 8\]\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/gnu_vectors_1.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/gnu_vectors_1.c +new file mode 100644 +index 000000000..e5fceb14b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/gnu_vectors_1.c +@@ -0,0 +1,107 @@ ++/* { dg-options "-O -msve-vector-bits=256 -fomit-frame-pointer" } */ ++ ++#include ++ ++typedef bfloat16_t bfloat16x16_t __attribute__((vector_size (32))); ++typedef float16_t float16x16_t __attribute__((vector_size (32))); ++typedef float32_t float32x8_t __attribute__((vector_size (32))); ++typedef float64_t float64x4_t __attribute__((vector_size (32))); ++typedef int8_t int8x32_t __attribute__((vector_size (32))); ++typedef int16_t int16x16_t __attribute__((vector_size (32))); ++typedef int32_t int32x8_t __attribute__((vector_size (32))); ++typedef int64_t int64x4_t __attribute__((vector_size (32))); ++typedef uint8_t uint8x32_t __attribute__((vector_size (32))); ++typedef uint16_t uint16x16_t __attribute__((vector_size (32))); ++typedef uint32_t uint32x8_t __attribute__((vector_size (32))); ++typedef uint64_t uint64x4_t __attribute__((vector_size (32))); ++ ++void bfloat16_callee (bfloat16x16_t); ++void float16_callee (float16x16_t); ++void float32_callee (float32x8_t); ++void float64_callee (float64x4_t); ++void int8_callee (int8x32_t); ++void int16_callee (int16x16_t); ++void int32_callee (int32x8_t); ++void int64_callee (int64x4_t); ++void uint8_callee (uint8x32_t); ++void uint16_callee (uint16x16_t); ++void uint32_callee (uint32x8_t); ++void uint64_callee (uint64x4_t); ++ ++void ++bfloat16_caller (bfloat16_t val) ++{ ++ bfloat16_callee (svdup_bf16 (val)); ++} ++ ++void ++float16_caller (void) ++{ ++ float16_callee (svdup_f16 (1.0)); ++} ++ ++void ++float32_caller (void) ++{ ++ float32_callee (svdup_f32 (2.0)); ++} ++ ++void ++float64_caller (void) ++{ ++ float64_callee (svdup_f64 (3.0)); ++} ++ ++void ++int8_caller (void) ++{ ++ int8_callee (svindex_s8 (0, 1)); ++} ++ ++void ++int16_caller (void) ++{ ++ int16_callee (svindex_s16 (0, 2)); ++} ++ ++void ++int32_caller (void) ++{ ++ int32_callee (svindex_s32 (0, 3)); ++} ++ ++void ++int64_caller (void) ++{ ++ int64_callee (svindex_s64 (0, 4)); ++} ++ ++void ++uint8_caller (void) ++{ ++ uint8_callee (svindex_u8 (1, 1)); ++} ++ ++void ++uint16_caller (void) ++{ ++ uint16_callee (svindex_u16 (1, 2)); ++} ++ ++void ++uint32_caller (void) ++{ ++ uint32_callee (svindex_u32 (1, 3)); ++} ++ ++void ++uint64_caller (void) ++{ ++ uint64_callee (svindex_u64 (1, 4)); ++} ++ ++/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b, p[0-7], \[x0\]} 2 } } */ ++/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h, p[0-7], \[x0\]} 4 } } */ ++/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x0\]} 3 } } */ ++/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x0\]} 3 } } */ ++/* { dg-final { scan-assembler-times {\tadd\tx0, sp, #?16\n} 12 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/gnu_vectors_2.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/gnu_vectors_2.c +new file mode 100644 +index 000000000..875567f01 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/gnu_vectors_2.c +@@ -0,0 +1,107 @@ ++/* { dg-options "-O -msve-vector-bits=256 -fomit-frame-pointer" } */ ++ ++#include ++ ++typedef bfloat16_t bfloat16x16_t __attribute__((vector_size (32))); ++typedef float16_t float16x16_t __attribute__((vector_size (32))); ++typedef float32_t float32x8_t __attribute__((vector_size (32))); ++typedef float64_t float64x4_t __attribute__((vector_size (32))); ++typedef int8_t int8x32_t __attribute__((vector_size (32))); ++typedef int16_t int16x16_t __attribute__((vector_size (32))); ++typedef int32_t int32x8_t __attribute__((vector_size (32))); ++typedef int64_t int64x4_t __attribute__((vector_size (32))); ++typedef uint8_t uint8x32_t __attribute__((vector_size (32))); ++typedef uint16_t uint16x16_t __attribute__((vector_size (32))); ++typedef uint32_t uint32x8_t __attribute__((vector_size (32))); ++typedef uint64_t uint64x4_t __attribute__((vector_size (32))); ++ ++void bfloat16_callee (svbfloat16_t); ++void float16_callee (svfloat16_t); ++void float32_callee (svfloat32_t); ++void float64_callee (svfloat64_t); ++void int8_callee (svint8_t); ++void int16_callee (svint16_t); ++void int32_callee (svint32_t); ++void int64_callee (svint64_t); ++void uint8_callee (svuint8_t); ++void uint16_callee (svuint16_t); ++void uint32_callee (svuint32_t); ++void uint64_callee (svuint64_t); ++ ++void ++bfloat16_caller (bfloat16x16_t arg) ++{ ++ bfloat16_callee (arg); ++} ++ ++void ++float16_caller (float16x16_t arg) ++{ ++ float16_callee (arg); ++} ++ ++void ++float32_caller (float32x8_t arg) ++{ ++ float32_callee (arg); ++} ++ ++void ++float64_caller (float64x4_t arg) ++{ ++ float64_callee (arg); ++} ++ ++void ++int8_caller (int8x32_t arg) ++{ ++ int8_callee (arg); ++} ++ ++void ++int16_caller (int16x16_t arg) ++{ ++ int16_callee (arg); ++} ++ ++void ++int32_caller (int32x8_t arg) ++{ ++ int32_callee (arg); ++} ++ ++void ++int64_caller (int64x4_t arg) ++{ ++ int64_callee (arg); ++} ++ ++void ++uint8_caller (uint8x32_t arg) ++{ ++ uint8_callee (arg); ++} ++ ++void ++uint16_caller (uint16x16_t arg) ++{ ++ uint16_callee (arg); ++} ++ ++void ++uint32_caller (uint32x8_t arg) ++{ ++ uint32_callee (arg); ++} ++ ++void ++uint64_caller (uint64x4_t arg) ++{ ++ uint64_callee (arg); ++} ++ ++/* { dg-final { scan-assembler-times {\tld1b\tz0\.b, p[0-7]/z, \[x0\]} 2 } } */ ++/* { dg-final { scan-assembler-times {\tld1h\tz0\.h, p[0-7]/z, \[x0\]} 4 } } */ ++/* { dg-final { scan-assembler-times {\tld1w\tz0\.s, p[0-7]/z, \[x0\]} 3 } } */ ++/* { dg-final { scan-assembler-times {\tld1d\tz0\.d, p[0-7]/z, \[x0\]} 3 } } */ ++/* { dg-final { scan-assembler-not {\tst1[bhwd]\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/nosve_1.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/nosve_1.c +new file mode 100644 +index 000000000..26802c87f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/nosve_1.c +@@ -0,0 +1,14 @@ ++/* { dg-do compile } */ ++/* { dg-prune-output "compilation terminated" } */ ++ ++#include ++ ++#pragma GCC target "+nosve" ++ ++svbool_t return_bool (); ++ ++void ++f (void) ++{ ++ return_bool (); /* { dg-error {'return_bool' requires the SVE ISA extension} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/nosve_2.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/nosve_2.c +new file mode 100644 +index 000000000..663165f89 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/nosve_2.c +@@ -0,0 +1,14 @@ ++/* { dg-do compile } */ ++/* { dg-prune-output "compilation terminated" } */ ++ ++#include ++ ++#pragma GCC target "+nosve" ++ ++svbool_t return_bool (); ++ ++void ++f (svbool_t *ptr) ++{ ++ *ptr = return_bool (); /* { dg-error {'return_bool' requires the SVE ISA extension} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/nosve_3.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/nosve_3.c +new file mode 100644 +index 000000000..6d5823cfd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/nosve_3.c +@@ -0,0 +1,14 @@ ++/* { dg-do compile } */ ++/* { dg-prune-output "compilation terminated" } */ ++ ++#include ++ ++#pragma GCC target "+nosve" ++ ++svbool_t (*return_bool) (); ++ ++void ++f (svbool_t *ptr) ++{ ++ *ptr = return_bool (); /* { dg-error {calls to functions of type 'svbool_t\(\)' require the SVE ISA extension} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/nosve_4.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/nosve_4.c +new file mode 100644 +index 000000000..81e31cf4f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/nosve_4.c +@@ -0,0 +1,14 @@ ++/* { dg-do compile } */ ++/* { dg-prune-output "compilation terminated" } */ ++ ++#include ++ ++#pragma GCC target "+nosve" ++ ++void take_svuint8 (svuint8_t); ++ ++void ++f (svuint8_t *ptr) ++{ ++ take_svuint8 (*ptr); /* { dg-error {'take_svuint8' requires the SVE ISA extension} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/nosve_5.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/nosve_5.c +new file mode 100644 +index 000000000..300ed00a0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/nosve_5.c +@@ -0,0 +1,15 @@ ++/* { dg-do compile } */ ++/* { dg-prune-output "compilation terminated" } */ ++ ++#include ++ ++#pragma GCC target "+nosve" ++ ++void take_svuint8_eventually (float, float, float, float, ++ float, float, float, float, svuint8_t); ++ ++void ++f (svuint8_t *ptr) ++{ ++ take_svuint8_eventually (0, 0, 0, 0, 0, 0, 0, 0, *ptr); /* { dg-error {arguments of type '(svuint8_t|__SVUint8_t)' require the SVE ISA extension} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/nosve_6.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/nosve_6.c +new file mode 100644 +index 000000000..4bddf76f8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/nosve_6.c +@@ -0,0 +1,14 @@ ++/* { dg-do compile } */ ++/* { dg-prune-output "compilation terminated" } */ ++ ++#include ++ ++#pragma GCC target "+nosve" ++ ++void unprototyped (); ++ ++void ++f (svuint8_t *ptr) ++{ ++ unprototyped (*ptr); /* { dg-error {arguments of type '(svuint8_t|__SVUint8_t)' require the SVE ISA extension} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/nosve_7.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/nosve_7.c +new file mode 100644 +index 000000000..ef742711d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/nosve_7.c +@@ -0,0 +1,8 @@ ++/* { dg-do compile } */ ++/* { dg-prune-output "compilation terminated" } */ ++ ++#include ++ ++#pragma GCC target "+nosve" ++ ++void f (svuint8_t x) {} /* { dg-error {'f' requires the SVE ISA extension} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/nosve_8.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/nosve_8.c +new file mode 100644 +index 000000000..45b549f12 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/nosve_8.c +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++/* { dg-prune-output "compilation terminated" } */ ++ ++#include ++ ++#pragma GCC target "+nosve" ++ ++void ++f (float a, float b, float c, float d, float e, float f, float g, float h, svuint8_t x) /* { dg-error {arguments of type '(svuint8_t|__SVUint8_t)' require the SVE ISA extension} } */ ++{ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_1.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_1.c +new file mode 100644 +index 000000000..f6328c901 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_1.c +@@ -0,0 +1,32 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -g" } */ ++/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ ++ ++/* ++** callee_pred: ++** ldr p0, \[x0\] ++** ret ++*/ ++__SVBool_t __attribute__((noipa)) ++callee_pred (__SVBool_t *ptr) ++{ ++ return *ptr; ++} ++ ++#include ++ ++/* ++** caller_pred: ++** ... ++** bl callee_pred ++** cntp x0, p0, p0.b ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++uint64_t __attribute__((noipa)) ++caller_pred (__SVBool_t *ptr1) ++{ ++ __SVBool_t p; ++ p = callee_pred (ptr1); ++ return svcntp_b8 (p, p); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_1_1024.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_1_1024.c +new file mode 100644 +index 000000000..450a3f029 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_1_1024.c +@@ -0,0 +1,31 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -msve-vector-bits=1024 -g" } */ ++/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ ++ ++/* ++** callee_pred: ++** ldr p0, \[x0\] ++** ret ++*/ ++__SVBool_t __attribute__((noipa)) ++callee_pred (__SVBool_t *ptr) ++{ ++ return *ptr; ++} ++ ++#include ++ ++/* ++** caller_pred: ++** ... ++** bl callee_pred ++** cntp x0, p0, p0.b ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++uint64_t __attribute__((noipa)) ++caller_pred (__SVBool_t *ptr1) ++{ ++ __SVBool_t p = callee_pred (ptr1); ++ return svcntp_b8 (p, p); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_1_2048.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_1_2048.c +new file mode 100644 +index 000000000..c9ea26899 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_1_2048.c +@@ -0,0 +1,31 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -msve-vector-bits=2048 -g" } */ ++/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ ++ ++/* ++** callee_pred: ++** ldr p0, \[x0\] ++** ret ++*/ ++__SVBool_t __attribute__((noipa)) ++callee_pred (__SVBool_t *ptr) ++{ ++ return *ptr; ++} ++ ++#include ++ ++/* ++** caller_pred: ++** ... ++** bl callee_pred ++** cntp x0, p0, p0.b ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++uint64_t __attribute__((noipa)) ++caller_pred (__SVBool_t *ptr1) ++{ ++ __SVBool_t p = callee_pred (ptr1); ++ return svcntp_b8 (p, p); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_1_256.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_1_256.c +new file mode 100644 +index 000000000..62bc695d1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_1_256.c +@@ -0,0 +1,31 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -msve-vector-bits=256 -g" } */ ++/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ ++ ++/* ++** callee_pred: ++** ldr p0, \[x0\] ++** ret ++*/ ++__SVBool_t __attribute__((noipa)) ++callee_pred (__SVBool_t *ptr) ++{ ++ return *ptr; ++} ++ ++#include ++ ++/* ++** caller_pred: ++** ... ++** bl callee_pred ++** cntp x0, p0, p0.b ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++uint64_t __attribute__((noipa)) ++caller_pred (__SVBool_t *ptr1) ++{ ++ __SVBool_t p = callee_pred (ptr1); ++ return svcntp_b8 (p, p); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_1_512.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_1_512.c +new file mode 100644 +index 000000000..f687689ce +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_1_512.c +@@ -0,0 +1,31 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -msve-vector-bits=512 -g" } */ ++/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ ++ ++/* ++** callee_pred: ++** ldr p0, \[x0\] ++** ret ++*/ ++__SVBool_t __attribute__((noipa)) ++callee_pred (__SVBool_t *ptr) ++{ ++ return *ptr; ++} ++ ++#include ++ ++/* ++** caller_pred: ++** ... ++** bl callee_pred ++** cntp x0, p0, p0.b ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++uint64_t __attribute__((noipa)) ++caller_pred (__SVBool_t *ptr1) ++{ ++ __SVBool_t p = callee_pred (ptr1); ++ return svcntp_b8 (p, p); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_2.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_2.c +new file mode 100644 +index 000000000..efaa81394 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_2.c +@@ -0,0 +1,32 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -g" } */ ++/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ ++ ++#include ++ ++/* ++** callee_pred: ++** ldr p0, \[x0\] ++** ret ++*/ ++svbool_t __attribute__((noipa)) ++callee_pred (svbool_t *ptr) ++{ ++ return *ptr; ++} ++ ++/* ++** caller_pred: ++** ... ++** bl callee_pred ++** cntp x0, p0, p0.b ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++uint64_t __attribute__((noipa)) ++caller_pred (svbool_t *ptr1) ++{ ++ svbool_t p; ++ p = callee_pred (ptr1); ++ return svcntp_b8 (p, p); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_3.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_3.c +new file mode 100644 +index 000000000..71046447d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_3.c +@@ -0,0 +1,34 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -g" } */ ++/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ ++ ++#include ++ ++typedef svbool_t my_pred; ++ ++/* ++** callee_pred: ++** ldr p0, \[x0\] ++** ret ++*/ ++my_pred __attribute__((noipa)) ++callee_pred (my_pred *ptr) ++{ ++ return *ptr; ++} ++ ++/* ++** caller_pred: ++** ... ++** bl callee_pred ++** cntp x0, p0, p0.b ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++uint64_t __attribute__((noipa)) ++caller_pred (my_pred *ptr1) ++{ ++ my_pred p; ++ p = callee_pred (ptr1); ++ return svcntp_b8 (p, p); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_4.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_4.c +new file mode 100644 +index 000000000..00eb2cbda +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_4.c +@@ -0,0 +1,264 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -g" } */ ++/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ ++ ++#define CALLEE(SUFFIX, TYPE) \ ++ TYPE __attribute__((noipa)) \ ++ callee_##SUFFIX (TYPE *ptr) \ ++ { \ ++ return *ptr; \ ++ } ++ ++/* ++** callee_s8: ++** ptrue (p[0-7])\.b, all ++** ld1b z0\.b, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (s8, __SVInt8_t) ++ ++/* ++** callee_u8: ++** ptrue (p[0-7])\.b, all ++** ld1b z0\.b, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (u8, __SVUint8_t) ++ ++/* ++** callee_s16: ++** ptrue (p[0-7])\.b, all ++** ld1h z0\.h, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (s16, __SVInt16_t) ++ ++/* ++** callee_u16: ++** ptrue (p[0-7])\.b, all ++** ld1h z0\.h, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (u16, __SVUint16_t) ++ ++/* ++** callee_f16: ++** ptrue (p[0-7])\.b, all ++** ld1h z0\.h, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (f16, __SVFloat16_t) ++ ++/* ++** callee_bf16: ++** ptrue (p[0-7])\.b, all ++** ld1h z0\.h, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (bf16, __SVBfloat16_t) ++ ++/* ++** callee_s32: ++** ptrue (p[0-7])\.b, all ++** ld1w z0\.s, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (s32, __SVInt32_t) ++ ++/* ++** callee_u32: ++** ptrue (p[0-7])\.b, all ++** ld1w z0\.s, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (u32, __SVUint32_t) ++ ++/* ++** callee_f32: ++** ptrue (p[0-7])\.b, all ++** ld1w z0\.s, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (f32, __SVFloat32_t) ++ ++/* ++** callee_s64: ++** ptrue (p[0-7])\.b, all ++** ld1d z0\.d, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (s64, __SVInt64_t) ++ ++/* ++** callee_u64: ++** ptrue (p[0-7])\.b, all ++** ld1d z0\.d, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (u64, __SVUint64_t) ++ ++/* ++** callee_f64: ++** ptrue (p[0-7])\.b, all ++** ld1d z0\.d, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (f64, __SVFloat64_t) ++ ++#include ++ ++#define CALLER(SUFFIX, TYPE) \ ++ typeof (svaddv (svptrue_b8 (), *(TYPE *) 0)) \ ++ __attribute__((noipa)) \ ++ caller_##SUFFIX (TYPE *ptr1) \ ++ { \ ++ return svaddv (svptrue_b8 (), callee_##SUFFIX (ptr1)); \ ++ } ++ ++#define CALLER_BF16(SUFFIX, TYPE) \ ++ typeof (svlasta (svptrue_b8 (), *(TYPE *) 0)) \ ++ __attribute__((noipa)) \ ++ caller_##SUFFIX (TYPE *ptr1) \ ++ { \ ++ return svlasta (svptrue_b8 (), callee_##SUFFIX (ptr1)); \ ++ } ++ ++/* ++** caller_s8: ++** ... ++** bl callee_s8 ++** ptrue (p[0-7])\.b, all ++** saddv (d[0-9]+), \1, z0\.b ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (s8, __SVInt8_t) ++ ++/* ++** caller_u8: ++** ... ++** bl callee_u8 ++** ptrue (p[0-7])\.b, all ++** uaddv (d[0-9]+), \1, z0\.b ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (u8, __SVUint8_t) ++ ++/* ++** caller_s16: ++** ... ++** bl callee_s16 ++** ptrue (p[0-7])\.b, all ++** saddv (d[0-9]+), \1, z0\.h ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (s16, __SVInt16_t) ++ ++/* ++** caller_u16: ++** ... ++** bl callee_u16 ++** ptrue (p[0-7])\.b, all ++** uaddv (d[0-9]+), \1, z0\.h ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (u16, __SVUint16_t) ++ ++/* ++** caller_f16: ++** ... ++** bl callee_f16 ++** ptrue (p[0-7])\.b, all ++** faddv h0, \1, z0\.h ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (f16, __SVFloat16_t) ++ ++/* ++** caller_bf16: ++** ... ++** bl callee_bf16 ++** ptrue (p[0-7])\.b, all ++** lasta h0, \1, z0\.h ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER_BF16 (bf16, __SVBfloat16_t) ++ ++/* ++** caller_s32: ++** ... ++** bl callee_s32 ++** ptrue (p[0-7])\.b, all ++** saddv (d[0-9]+), \1, z0\.s ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (s32, __SVInt32_t) ++ ++/* ++** caller_u32: ++** ... ++** bl callee_u32 ++** ptrue (p[0-7])\.b, all ++** uaddv (d[0-9]+), \1, z0\.s ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (u32, __SVUint32_t) ++ ++/* ++** caller_f32: ++** ... ++** bl callee_f32 ++** ptrue (p[0-7])\.b, all ++** faddv s0, \1, z0\.s ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (f32, __SVFloat32_t) ++ ++/* ++** caller_s64: ++** ... ++** bl callee_s64 ++** ptrue (p[0-7])\.b, all ++** uaddv (d[0-9]+), \1, z0\.d ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (s64, __SVInt64_t) ++ ++/* ++** caller_u64: ++** ... ++** bl callee_u64 ++** ptrue (p[0-7])\.b, all ++** uaddv (d[0-9]+), \1, z0\.d ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (u64, __SVUint64_t) ++ ++/* ++** caller_f64: ++** ... ++** bl callee_f64 ++** ptrue (p[0-7])\.b, all ++** faddv d0, \1, z0\.d ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (f64, __SVFloat64_t) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_4_1024.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_4_1024.c +new file mode 100644 +index 000000000..43519634c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_4_1024.c +@@ -0,0 +1,264 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -msve-vector-bits=1024 -g" } */ ++/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ ++ ++#define CALLEE(SUFFIX, TYPE) \ ++ TYPE __attribute__((noipa)) \ ++ callee_##SUFFIX (TYPE *ptr) \ ++ { \ ++ return *ptr; \ ++ } ++ ++/* ++** callee_s8: ++** ptrue (p[0-7])\.b, vl128 ++** ld1b z0\.b, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (s8, __SVInt8_t) ++ ++/* ++** callee_u8: ++** ptrue (p[0-7])\.b, vl128 ++** ld1b z0\.b, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (u8, __SVUint8_t) ++ ++/* ++** callee_s16: ++** ptrue (p[0-7])\.b, vl128 ++** ld1h z0\.h, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (s16, __SVInt16_t) ++ ++/* ++** callee_u16: ++** ptrue (p[0-7])\.b, vl128 ++** ld1h z0\.h, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (u16, __SVUint16_t) ++ ++/* ++** callee_f16: ++** ptrue (p[0-7])\.b, vl128 ++** ld1h z0\.h, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (f16, __SVFloat16_t) ++ ++/* ++** callee_bf16: ++** ptrue (p[0-7])\.b, vl128 ++** ld1h z0\.h, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (bf16, __SVBfloat16_t) ++ ++/* ++** callee_s32: ++** ptrue (p[0-7])\.b, vl128 ++** ld1w z0\.s, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (s32, __SVInt32_t) ++ ++/* ++** callee_u32: ++** ptrue (p[0-7])\.b, vl128 ++** ld1w z0\.s, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (u32, __SVUint32_t) ++ ++/* ++** callee_f32: ++** ptrue (p[0-7])\.b, vl128 ++** ld1w z0\.s, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (f32, __SVFloat32_t) ++ ++/* ++** callee_s64: ++** ptrue (p[0-7])\.b, vl128 ++** ld1d z0\.d, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (s64, __SVInt64_t) ++ ++/* ++** callee_u64: ++** ptrue (p[0-7])\.b, vl128 ++** ld1d z0\.d, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (u64, __SVUint64_t) ++ ++/* ++** callee_f64: ++** ptrue (p[0-7])\.b, vl128 ++** ld1d z0\.d, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (f64, __SVFloat64_t) ++ ++#include ++ ++#define CALLER(SUFFIX, TYPE) \ ++ typeof (svaddv (svptrue_b8 (), *(TYPE *) 0)) \ ++ __attribute__((noipa)) \ ++ caller_##SUFFIX (TYPE *ptr1) \ ++ { \ ++ return svaddv (svptrue_b8 (), callee_##SUFFIX (ptr1)); \ ++ } ++ ++#define CALLER_BF16(SUFFIX, TYPE) \ ++ typeof (svlasta (svptrue_b8 (), *(TYPE *) 0)) \ ++ __attribute__((noipa)) \ ++ caller_##SUFFIX (TYPE *ptr1) \ ++ { \ ++ return svlasta (svptrue_b8 (), callee_##SUFFIX (ptr1)); \ ++ } ++ ++/* ++** caller_s8: ++** ... ++** bl callee_s8 ++** ptrue (p[0-7])\.b, vl128 ++** saddv (d[0-9]+), \1, z0\.b ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (s8, __SVInt8_t) ++ ++/* ++** caller_u8: ++** ... ++** bl callee_u8 ++** ptrue (p[0-7])\.b, vl128 ++** uaddv (d[0-9]+), \1, z0\.b ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (u8, __SVUint8_t) ++ ++/* ++** caller_s16: ++** ... ++** bl callee_s16 ++** ptrue (p[0-7])\.b, vl128 ++** saddv (d[0-9]+), \1, z0\.h ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (s16, __SVInt16_t) ++ ++/* ++** caller_u16: ++** ... ++** bl callee_u16 ++** ptrue (p[0-7])\.b, vl128 ++** uaddv (d[0-9]+), \1, z0\.h ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (u16, __SVUint16_t) ++ ++/* ++** caller_f16: ++** ... ++** bl callee_f16 ++** ptrue (p[0-7])\.b, vl128 ++** faddv h0, \1, z0\.h ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (f16, __SVFloat16_t) ++ ++/* ++** caller_bf16: ++** ... ++** bl callee_bf16 ++** ptrue (p[0-7])\.b, vl128 ++** lasta h0, \1, z0\.h ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER_BF16 (bf16, __SVBfloat16_t) ++ ++/* ++** caller_s32: ++** ... ++** bl callee_s32 ++** ptrue (p[0-7])\.b, vl128 ++** saddv (d[0-9]+), \1, z0\.s ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (s32, __SVInt32_t) ++ ++/* ++** caller_u32: ++** ... ++** bl callee_u32 ++** ptrue (p[0-7])\.b, vl128 ++** uaddv (d[0-9]+), \1, z0\.s ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (u32, __SVUint32_t) ++ ++/* ++** caller_f32: ++** ... ++** bl callee_f32 ++** ptrue (p[0-7])\.b, vl128 ++** faddv s0, \1, z0\.s ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (f32, __SVFloat32_t) ++ ++/* ++** caller_s64: ++** ... ++** bl callee_s64 ++** ptrue (p[0-7])\.b, vl128 ++** uaddv (d[0-9]+), \1, z0\.d ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (s64, __SVInt64_t) ++ ++/* ++** caller_u64: ++** ... ++** bl callee_u64 ++** ptrue (p[0-7])\.b, vl128 ++** uaddv (d[0-9]+), \1, z0\.d ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (u64, __SVUint64_t) ++ ++/* ++** caller_f64: ++** ... ++** bl callee_f64 ++** ptrue (p[0-7])\.b, vl128 ++** faddv d0, \1, z0\.d ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (f64, __SVFloat64_t) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_4_2048.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_4_2048.c +new file mode 100644 +index 000000000..8256645f5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_4_2048.c +@@ -0,0 +1,264 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -msve-vector-bits=2048 -g" } */ ++/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ ++ ++#define CALLEE(SUFFIX, TYPE) \ ++ TYPE __attribute__((noipa)) \ ++ callee_##SUFFIX (TYPE *ptr) \ ++ { \ ++ return *ptr; \ ++ } ++ ++/* ++** callee_s8: ++** ptrue (p[0-7])\.b, vl256 ++** ld1b z0\.b, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (s8, __SVInt8_t) ++ ++/* ++** callee_u8: ++** ptrue (p[0-7])\.b, vl256 ++** ld1b z0\.b, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (u8, __SVUint8_t) ++ ++/* ++** callee_s16: ++** ptrue (p[0-7])\.b, vl256 ++** ld1h z0\.h, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (s16, __SVInt16_t) ++ ++/* ++** callee_u16: ++** ptrue (p[0-7])\.b, vl256 ++** ld1h z0\.h, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (u16, __SVUint16_t) ++ ++/* ++** callee_f16: ++** ptrue (p[0-7])\.b, vl256 ++** ld1h z0\.h, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (f16, __SVFloat16_t) ++ ++/* ++** callee_bf16: ++** ptrue (p[0-7])\.b, vl256 ++** ld1h z0\.h, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (bf16, __SVBfloat16_t) ++ ++/* ++** callee_s32: ++** ptrue (p[0-7])\.b, vl256 ++** ld1w z0\.s, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (s32, __SVInt32_t) ++ ++/* ++** callee_u32: ++** ptrue (p[0-7])\.b, vl256 ++** ld1w z0\.s, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (u32, __SVUint32_t) ++ ++/* ++** callee_f32: ++** ptrue (p[0-7])\.b, vl256 ++** ld1w z0\.s, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (f32, __SVFloat32_t) ++ ++/* ++** callee_s64: ++** ptrue (p[0-7])\.b, vl256 ++** ld1d z0\.d, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (s64, __SVInt64_t) ++ ++/* ++** callee_u64: ++** ptrue (p[0-7])\.b, vl256 ++** ld1d z0\.d, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (u64, __SVUint64_t) ++ ++/* ++** callee_f64: ++** ptrue (p[0-7])\.b, vl256 ++** ld1d z0\.d, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (f64, __SVFloat64_t) ++ ++#include ++ ++#define CALLER(SUFFIX, TYPE) \ ++ typeof (svaddv (svptrue_b8 (), *(TYPE *) 0)) \ ++ __attribute__((noipa)) \ ++ caller_##SUFFIX (TYPE *ptr1) \ ++ { \ ++ return svaddv (svptrue_b8 (), callee_##SUFFIX (ptr1)); \ ++ } ++ ++#define CALLER_BF16(SUFFIX, TYPE) \ ++ typeof (svlasta (svptrue_b8 (), *(TYPE *) 0)) \ ++ __attribute__((noipa)) \ ++ caller_##SUFFIX (TYPE *ptr1) \ ++ { \ ++ return svlasta (svptrue_b8 (), callee_##SUFFIX (ptr1)); \ ++ } ++ ++/* ++** caller_s8: ++** ... ++** bl callee_s8 ++** ptrue (p[0-7])\.b, vl256 ++** saddv (d[0-9]+), \1, z0\.b ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (s8, __SVInt8_t) ++ ++/* ++** caller_u8: ++** ... ++** bl callee_u8 ++** ptrue (p[0-7])\.b, vl256 ++** uaddv (d[0-9]+), \1, z0\.b ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (u8, __SVUint8_t) ++ ++/* ++** caller_s16: ++** ... ++** bl callee_s16 ++** ptrue (p[0-7])\.b, vl256 ++** saddv (d[0-9]+), \1, z0\.h ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (s16, __SVInt16_t) ++ ++/* ++** caller_u16: ++** ... ++** bl callee_u16 ++** ptrue (p[0-7])\.b, vl256 ++** uaddv (d[0-9]+), \1, z0\.h ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (u16, __SVUint16_t) ++ ++/* ++** caller_f16: ++** ... ++** bl callee_f16 ++** ptrue (p[0-7])\.b, vl256 ++** faddv h0, \1, z0\.h ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (f16, __SVFloat16_t) ++ ++/* ++** caller_bf16: ++** ... ++** bl callee_bf16 ++** ptrue (p[0-7])\.b, vl256 ++** lasta h0, \1, z0\.h ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER_BF16 (bf16, __SVBfloat16_t) ++ ++/* ++** caller_s32: ++** ... ++** bl callee_s32 ++** ptrue (p[0-7])\.b, vl256 ++** saddv (d[0-9]+), \1, z0\.s ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (s32, __SVInt32_t) ++ ++/* ++** caller_u32: ++** ... ++** bl callee_u32 ++** ptrue (p[0-7])\.b, vl256 ++** uaddv (d[0-9]+), \1, z0\.s ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (u32, __SVUint32_t) ++ ++/* ++** caller_f32: ++** ... ++** bl callee_f32 ++** ptrue (p[0-7])\.b, vl256 ++** faddv s0, \1, z0\.s ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (f32, __SVFloat32_t) ++ ++/* ++** caller_s64: ++** ... ++** bl callee_s64 ++** ptrue (p[0-7])\.b, vl256 ++** uaddv (d[0-9]+), \1, z0\.d ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (s64, __SVInt64_t) ++ ++/* ++** caller_u64: ++** ... ++** bl callee_u64 ++** ptrue (p[0-7])\.b, vl256 ++** uaddv (d[0-9]+), \1, z0\.d ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (u64, __SVUint64_t) ++ ++/* ++** caller_f64: ++** ... ++** bl callee_f64 ++** ptrue (p[0-7])\.b, vl256 ++** faddv d0, \1, z0\.d ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (f64, __SVFloat64_t) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_4_256.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_4_256.c +new file mode 100644 +index 000000000..1e0f6bb96 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_4_256.c +@@ -0,0 +1,264 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -msve-vector-bits=256 -g" } */ ++/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ ++ ++#define CALLEE(SUFFIX, TYPE) \ ++ TYPE __attribute__((noipa)) \ ++ callee_##SUFFIX (TYPE *ptr) \ ++ { \ ++ return *ptr; \ ++ } ++ ++/* ++** callee_s8: ++** ptrue (p[0-7])\.b, vl32 ++** ld1b z0\.b, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (s8, __SVInt8_t) ++ ++/* ++** callee_u8: ++** ptrue (p[0-7])\.b, vl32 ++** ld1b z0\.b, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (u8, __SVUint8_t) ++ ++/* ++** callee_s16: ++** ptrue (p[0-7])\.b, vl32 ++** ld1h z0\.h, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (s16, __SVInt16_t) ++ ++/* ++** callee_u16: ++** ptrue (p[0-7])\.b, vl32 ++** ld1h z0\.h, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (u16, __SVUint16_t) ++ ++/* ++** callee_f16: ++** ptrue (p[0-7])\.b, vl32 ++** ld1h z0\.h, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (f16, __SVFloat16_t) ++ ++/* ++** callee_bf16: ++** ptrue (p[0-7])\.b, vl32 ++** ld1h z0\.h, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (bf16, __SVBfloat16_t) ++ ++/* ++** callee_s32: ++** ptrue (p[0-7])\.b, vl32 ++** ld1w z0\.s, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (s32, __SVInt32_t) ++ ++/* ++** callee_u32: ++** ptrue (p[0-7])\.b, vl32 ++** ld1w z0\.s, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (u32, __SVUint32_t) ++ ++/* ++** callee_f32: ++** ptrue (p[0-7])\.b, vl32 ++** ld1w z0\.s, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (f32, __SVFloat32_t) ++ ++/* ++** callee_s64: ++** ptrue (p[0-7])\.b, vl32 ++** ld1d z0\.d, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (s64, __SVInt64_t) ++ ++/* ++** callee_u64: ++** ptrue (p[0-7])\.b, vl32 ++** ld1d z0\.d, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (u64, __SVUint64_t) ++ ++/* ++** callee_f64: ++** ptrue (p[0-7])\.b, vl32 ++** ld1d z0\.d, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (f64, __SVFloat64_t) ++ ++#include ++ ++#define CALLER(SUFFIX, TYPE) \ ++ typeof (svaddv (svptrue_b8 (), *(TYPE *) 0)) \ ++ __attribute__((noipa)) \ ++ caller_##SUFFIX (TYPE *ptr1) \ ++ { \ ++ return svaddv (svptrue_b8 (), callee_##SUFFIX (ptr1)); \ ++ } ++ ++#define CALLER_BF16(SUFFIX, TYPE) \ ++ typeof (svlasta (svptrue_b8 (), *(TYPE *) 0)) \ ++ __attribute__((noipa)) \ ++ caller_##SUFFIX (TYPE *ptr1) \ ++ { \ ++ return svlasta (svptrue_b8 (), callee_##SUFFIX (ptr1)); \ ++ } ++ ++/* ++** caller_s8: ++** ... ++** bl callee_s8 ++** ptrue (p[0-7])\.b, vl32 ++** saddv (d[0-9]+), \1, z0\.b ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (s8, __SVInt8_t) ++ ++/* ++** caller_u8: ++** ... ++** bl callee_u8 ++** ptrue (p[0-7])\.b, vl32 ++** uaddv (d[0-9]+), \1, z0\.b ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (u8, __SVUint8_t) ++ ++/* ++** caller_s16: ++** ... ++** bl callee_s16 ++** ptrue (p[0-7])\.b, vl32 ++** saddv (d[0-9]+), \1, z0\.h ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (s16, __SVInt16_t) ++ ++/* ++** caller_u16: ++** ... ++** bl callee_u16 ++** ptrue (p[0-7])\.b, vl32 ++** uaddv (d[0-9]+), \1, z0\.h ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (u16, __SVUint16_t) ++ ++/* ++** caller_f16: ++** ... ++** bl callee_f16 ++** ptrue (p[0-7])\.b, vl32 ++** faddv h0, \1, z0\.h ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (f16, __SVFloat16_t) ++ ++/* ++** caller_bf16: ++** ... ++** bl callee_bf16 ++** ptrue (p[0-7])\.b, vl32 ++** lasta h0, \1, z0\.h ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER_BF16 (bf16, __SVBfloat16_t) ++ ++/* ++** caller_s32: ++** ... ++** bl callee_s32 ++** ptrue (p[0-7])\.b, vl32 ++** saddv (d[0-9]+), \1, z0\.s ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (s32, __SVInt32_t) ++ ++/* ++** caller_u32: ++** ... ++** bl callee_u32 ++** ptrue (p[0-7])\.b, vl32 ++** uaddv (d[0-9]+), \1, z0\.s ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (u32, __SVUint32_t) ++ ++/* ++** caller_f32: ++** ... ++** bl callee_f32 ++** ptrue (p[0-7])\.b, vl32 ++** faddv s0, \1, z0\.s ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (f32, __SVFloat32_t) ++ ++/* ++** caller_s64: ++** ... ++** bl callee_s64 ++** ptrue (p[0-7])\.b, vl32 ++** uaddv (d[0-9]+), \1, z0\.d ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (s64, __SVInt64_t) ++ ++/* ++** caller_u64: ++** ... ++** bl callee_u64 ++** ptrue (p[0-7])\.b, vl32 ++** uaddv (d[0-9]+), \1, z0\.d ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (u64, __SVUint64_t) ++ ++/* ++** caller_f64: ++** ... ++** bl callee_f64 ++** ptrue (p[0-7])\.b, vl32 ++** faddv d0, \1, z0\.d ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (f64, __SVFloat64_t) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_4_512.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_4_512.c +new file mode 100644 +index 000000000..5b58ed734 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_4_512.c +@@ -0,0 +1,264 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -msve-vector-bits=512 -g" } */ ++/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ ++ ++#define CALLEE(SUFFIX, TYPE) \ ++ TYPE __attribute__((noipa)) \ ++ callee_##SUFFIX (TYPE *ptr) \ ++ { \ ++ return *ptr; \ ++ } ++ ++/* ++** callee_s8: ++** ptrue (p[0-7])\.b, vl64 ++** ld1b z0\.b, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (s8, __SVInt8_t) ++ ++/* ++** callee_u8: ++** ptrue (p[0-7])\.b, vl64 ++** ld1b z0\.b, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (u8, __SVUint8_t) ++ ++/* ++** callee_s16: ++** ptrue (p[0-7])\.b, vl64 ++** ld1h z0\.h, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (s16, __SVInt16_t) ++ ++/* ++** callee_u16: ++** ptrue (p[0-7])\.b, vl64 ++** ld1h z0\.h, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (u16, __SVUint16_t) ++ ++/* ++** callee_f16: ++** ptrue (p[0-7])\.b, vl64 ++** ld1h z0\.h, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (f16, __SVFloat16_t) ++ ++/* ++** callee_bf16: ++** ptrue (p[0-7])\.b, vl64 ++** ld1h z0\.h, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (bf16, __SVBfloat16_t) ++ ++/* ++** callee_s32: ++** ptrue (p[0-7])\.b, vl64 ++** ld1w z0\.s, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (s32, __SVInt32_t) ++ ++/* ++** callee_u32: ++** ptrue (p[0-7])\.b, vl64 ++** ld1w z0\.s, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (u32, __SVUint32_t) ++ ++/* ++** callee_f32: ++** ptrue (p[0-7])\.b, vl64 ++** ld1w z0\.s, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (f32, __SVFloat32_t) ++ ++/* ++** callee_s64: ++** ptrue (p[0-7])\.b, vl64 ++** ld1d z0\.d, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (s64, __SVInt64_t) ++ ++/* ++** callee_u64: ++** ptrue (p[0-7])\.b, vl64 ++** ld1d z0\.d, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (u64, __SVUint64_t) ++ ++/* ++** callee_f64: ++** ptrue (p[0-7])\.b, vl64 ++** ld1d z0\.d, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (f64, __SVFloat64_t) ++ ++#include ++ ++#define CALLER(SUFFIX, TYPE) \ ++ typeof (svaddv (svptrue_b8 (), *(TYPE *) 0)) \ ++ __attribute__((noipa)) \ ++ caller_##SUFFIX (TYPE *ptr1) \ ++ { \ ++ return svaddv (svptrue_b8 (), callee_##SUFFIX (ptr1)); \ ++ } ++ ++#define CALLER_BF16(SUFFIX, TYPE) \ ++ typeof (svlasta (svptrue_b8 (), *(TYPE *) 0)) \ ++ __attribute__((noipa)) \ ++ caller_##SUFFIX (TYPE *ptr1) \ ++ { \ ++ return svlasta (svptrue_b8 (), callee_##SUFFIX (ptr1)); \ ++ } ++ ++/* ++** caller_s8: ++** ... ++** bl callee_s8 ++** ptrue (p[0-7])\.b, vl64 ++** saddv (d[0-9]+), \1, z0\.b ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (s8, __SVInt8_t) ++ ++/* ++** caller_u8: ++** ... ++** bl callee_u8 ++** ptrue (p[0-7])\.b, vl64 ++** uaddv (d[0-9]+), \1, z0\.b ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (u8, __SVUint8_t) ++ ++/* ++** caller_s16: ++** ... ++** bl callee_s16 ++** ptrue (p[0-7])\.b, vl64 ++** saddv (d[0-9]+), \1, z0\.h ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (s16, __SVInt16_t) ++ ++/* ++** caller_u16: ++** ... ++** bl callee_u16 ++** ptrue (p[0-7])\.b, vl64 ++** uaddv (d[0-9]+), \1, z0\.h ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (u16, __SVUint16_t) ++ ++/* ++** caller_f16: ++** ... ++** bl callee_f16 ++** ptrue (p[0-7])\.b, vl64 ++** faddv h0, \1, z0\.h ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (f16, __SVFloat16_t) ++ ++/* ++** caller_bf16: ++** ... ++** bl callee_bf16 ++** ptrue (p[0-7])\.b, vl64 ++** lasta h0, \1, z0\.h ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER_BF16 (bf16, __SVBfloat16_t) ++ ++/* ++** caller_s32: ++** ... ++** bl callee_s32 ++** ptrue (p[0-7])\.b, vl64 ++** saddv (d[0-9]+), \1, z0\.s ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (s32, __SVInt32_t) ++ ++/* ++** caller_u32: ++** ... ++** bl callee_u32 ++** ptrue (p[0-7])\.b, vl64 ++** uaddv (d[0-9]+), \1, z0\.s ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (u32, __SVUint32_t) ++ ++/* ++** caller_f32: ++** ... ++** bl callee_f32 ++** ptrue (p[0-7])\.b, vl64 ++** faddv s0, \1, z0\.s ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (f32, __SVFloat32_t) ++ ++/* ++** caller_s64: ++** ... ++** bl callee_s64 ++** ptrue (p[0-7])\.b, vl64 ++** uaddv (d[0-9]+), \1, z0\.d ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (s64, __SVInt64_t) ++ ++/* ++** caller_u64: ++** ... ++** bl callee_u64 ++** ptrue (p[0-7])\.b, vl64 ++** uaddv (d[0-9]+), \1, z0\.d ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (u64, __SVUint64_t) ++ ++/* ++** caller_f64: ++** ... ++** bl callee_f64 ++** ptrue (p[0-7])\.b, vl64 ++** faddv d0, \1, z0\.d ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (f64, __SVFloat64_t) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_5.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_5.c +new file mode 100644 +index 000000000..55c78e16f +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_5.c +@@ -0,0 +1,264 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -g" } */ ++/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ ++ ++#include ++ ++#define CALLEE(SUFFIX, TYPE) \ ++ TYPE __attribute__((noipa)) \ ++ callee_##SUFFIX (TYPE *ptr) \ ++ { \ ++ return *ptr; \ ++ } ++ ++/* ++** callee_s8: ++** ptrue (p[0-7])\.b, all ++** ld1b z0\.b, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (s8, svint8_t) ++ ++/* ++** callee_u8: ++** ptrue (p[0-7])\.b, all ++** ld1b z0\.b, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (u8, svuint8_t) ++ ++/* ++** callee_s16: ++** ptrue (p[0-7])\.b, all ++** ld1h z0\.h, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (s16, svint16_t) ++ ++/* ++** callee_u16: ++** ptrue (p[0-7])\.b, all ++** ld1h z0\.h, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (u16, svuint16_t) ++ ++/* ++** callee_f16: ++** ptrue (p[0-7])\.b, all ++** ld1h z0\.h, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (f16, svfloat16_t) ++ ++/* ++** callee_bf16: ++** ptrue (p[0-7])\.b, all ++** ld1h z0\.h, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (bf16, svbfloat16_t) ++ ++/* ++** callee_s32: ++** ptrue (p[0-7])\.b, all ++** ld1w z0\.s, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (s32, svint32_t) ++ ++/* ++** callee_u32: ++** ptrue (p[0-7])\.b, all ++** ld1w z0\.s, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (u32, svuint32_t) ++ ++/* ++** callee_f32: ++** ptrue (p[0-7])\.b, all ++** ld1w z0\.s, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (f32, svfloat32_t) ++ ++/* ++** callee_s64: ++** ptrue (p[0-7])\.b, all ++** ld1d z0\.d, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (s64, svint64_t) ++ ++/* ++** callee_u64: ++** ptrue (p[0-7])\.b, all ++** ld1d z0\.d, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (u64, svuint64_t) ++ ++/* ++** callee_f64: ++** ptrue (p[0-7])\.b, all ++** ld1d z0\.d, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (f64, svfloat64_t) ++ ++#define CALLER(SUFFIX, TYPE) \ ++ typeof (svaddv (svptrue_b8 (), *(TYPE *) 0)) \ ++ __attribute__((noipa)) \ ++ caller_##SUFFIX (TYPE *ptr1) \ ++ { \ ++ return svaddv (svptrue_b8 (), callee_##SUFFIX (ptr1)); \ ++ } ++ ++#define CALLER_BF16(SUFFIX, TYPE) \ ++ typeof (svlasta (svptrue_b8 (), *(TYPE *) 0)) \ ++ __attribute__((noipa)) \ ++ caller_##SUFFIX (TYPE *ptr1) \ ++ { \ ++ return svlasta (svptrue_b8 (), callee_##SUFFIX (ptr1)); \ ++ } ++ ++/* ++** caller_s8: ++** ... ++** bl callee_s8 ++** ptrue (p[0-7])\.b, all ++** saddv (d[0-9]+), \1, z0\.b ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (s8, svint8_t) ++ ++/* ++** caller_u8: ++** ... ++** bl callee_u8 ++** ptrue (p[0-7])\.b, all ++** uaddv (d[0-9]+), \1, z0\.b ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (u8, svuint8_t) ++ ++/* ++** caller_s16: ++** ... ++** bl callee_s16 ++** ptrue (p[0-7])\.b, all ++** saddv (d[0-9]+), \1, z0\.h ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (s16, svint16_t) ++ ++/* ++** caller_u16: ++** ... ++** bl callee_u16 ++** ptrue (p[0-7])\.b, all ++** uaddv (d[0-9]+), \1, z0\.h ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (u16, svuint16_t) ++ ++/* ++** caller_f16: ++** ... ++** bl callee_f16 ++** ptrue (p[0-7])\.b, all ++** faddv h0, \1, z0\.h ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (f16, svfloat16_t) ++ ++/* ++** caller_bf16: ++** ... ++** bl callee_bf16 ++** ptrue (p[0-7])\.b, all ++** lasta h0, \1, z0\.h ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER_BF16 (bf16, svbfloat16_t) ++ ++/* ++** caller_s32: ++** ... ++** bl callee_s32 ++** ptrue (p[0-7])\.b, all ++** saddv (d[0-9]+), \1, z0\.s ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (s32, svint32_t) ++ ++/* ++** caller_u32: ++** ... ++** bl callee_u32 ++** ptrue (p[0-7])\.b, all ++** uaddv (d[0-9]+), \1, z0\.s ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (u32, svuint32_t) ++ ++/* ++** caller_f32: ++** ... ++** bl callee_f32 ++** ptrue (p[0-7])\.b, all ++** faddv s0, \1, z0\.s ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (f32, svfloat32_t) ++ ++/* ++** caller_s64: ++** ... ++** bl callee_s64 ++** ptrue (p[0-7])\.b, all ++** uaddv (d[0-9]+), \1, z0\.d ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (s64, svint64_t) ++ ++/* ++** caller_u64: ++** ... ++** bl callee_u64 ++** ptrue (p[0-7])\.b, all ++** uaddv (d[0-9]+), \1, z0\.d ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (u64, svuint64_t) ++ ++/* ++** caller_f64: ++** ... ++** bl callee_f64 ++** ptrue (p[0-7])\.b, all ++** faddv d0, \1, z0\.d ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (f64, svfloat64_t) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_5_1024.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_5_1024.c +new file mode 100644 +index 000000000..52e9916d8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_5_1024.c +@@ -0,0 +1,264 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -msve-vector-bits=1024 -g" } */ ++/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ ++ ++#include ++ ++#define CALLEE(SUFFIX, TYPE) \ ++ TYPE __attribute__((noipa)) \ ++ callee_##SUFFIX (TYPE *ptr) \ ++ { \ ++ return *ptr; \ ++ } ++ ++/* ++** callee_s8: ++** ptrue (p[0-7])\.b, vl128 ++** ld1b z0\.b, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (s8, svint8_t) ++ ++/* ++** callee_u8: ++** ptrue (p[0-7])\.b, vl128 ++** ld1b z0\.b, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (u8, svuint8_t) ++ ++/* ++** callee_s16: ++** ptrue (p[0-7])\.b, vl128 ++** ld1h z0\.h, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (s16, svint16_t) ++ ++/* ++** callee_u16: ++** ptrue (p[0-7])\.b, vl128 ++** ld1h z0\.h, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (u16, svuint16_t) ++ ++/* ++** callee_f16: ++** ptrue (p[0-7])\.b, vl128 ++** ld1h z0\.h, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (f16, svfloat16_t) ++ ++/* ++** callee_bf16: ++** ptrue (p[0-7])\.b, vl128 ++** ld1h z0\.h, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (bf16, svbfloat16_t) ++ ++/* ++** callee_s32: ++** ptrue (p[0-7])\.b, vl128 ++** ld1w z0\.s, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (s32, svint32_t) ++ ++/* ++** callee_u32: ++** ptrue (p[0-7])\.b, vl128 ++** ld1w z0\.s, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (u32, svuint32_t) ++ ++/* ++** callee_f32: ++** ptrue (p[0-7])\.b, vl128 ++** ld1w z0\.s, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (f32, svfloat32_t) ++ ++/* ++** callee_s64: ++** ptrue (p[0-7])\.b, vl128 ++** ld1d z0\.d, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (s64, svint64_t) ++ ++/* ++** callee_u64: ++** ptrue (p[0-7])\.b, vl128 ++** ld1d z0\.d, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (u64, svuint64_t) ++ ++/* ++** callee_f64: ++** ptrue (p[0-7])\.b, vl128 ++** ld1d z0\.d, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (f64, svfloat64_t) ++ ++#define CALLER(SUFFIX, TYPE) \ ++ typeof (svaddv (svptrue_b8 (), *(TYPE *) 0)) \ ++ __attribute__((noipa)) \ ++ caller_##SUFFIX (TYPE *ptr1) \ ++ { \ ++ return svaddv (svptrue_b8 (), callee_##SUFFIX (ptr1)); \ ++ } ++ ++#define CALLER_BF16(SUFFIX, TYPE) \ ++ typeof (svlasta (svptrue_b8 (), *(TYPE *) 0)) \ ++ __attribute__((noipa)) \ ++ caller_##SUFFIX (TYPE *ptr1) \ ++ { \ ++ return svlasta (svptrue_b8 (), callee_##SUFFIX (ptr1)); \ ++ } ++ ++/* ++** caller_s8: ++** ... ++** bl callee_s8 ++** ptrue (p[0-7])\.b, vl128 ++** saddv (d[0-9]+), \1, z0\.b ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (s8, svint8_t) ++ ++/* ++** caller_u8: ++** ... ++** bl callee_u8 ++** ptrue (p[0-7])\.b, vl128 ++** uaddv (d[0-9]+), \1, z0\.b ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (u8, svuint8_t) ++ ++/* ++** caller_s16: ++** ... ++** bl callee_s16 ++** ptrue (p[0-7])\.b, vl128 ++** saddv (d[0-9]+), \1, z0\.h ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (s16, svint16_t) ++ ++/* ++** caller_u16: ++** ... ++** bl callee_u16 ++** ptrue (p[0-7])\.b, vl128 ++** uaddv (d[0-9]+), \1, z0\.h ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (u16, svuint16_t) ++ ++/* ++** caller_f16: ++** ... ++** bl callee_f16 ++** ptrue (p[0-7])\.b, vl128 ++** faddv h0, \1, z0\.h ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (f16, svfloat16_t) ++ ++/* ++** caller_bf16: ++** ... ++** bl callee_bf16 ++** ptrue (p[0-7])\.b, vl128 ++** lasta h0, \1, z0\.h ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER_BF16 (bf16, svbfloat16_t) ++ ++/* ++** caller_s32: ++** ... ++** bl callee_s32 ++** ptrue (p[0-7])\.b, vl128 ++** saddv (d[0-9]+), \1, z0\.s ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (s32, svint32_t) ++ ++/* ++** caller_u32: ++** ... ++** bl callee_u32 ++** ptrue (p[0-7])\.b, vl128 ++** uaddv (d[0-9]+), \1, z0\.s ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (u32, svuint32_t) ++ ++/* ++** caller_f32: ++** ... ++** bl callee_f32 ++** ptrue (p[0-7])\.b, vl128 ++** faddv s0, \1, z0\.s ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (f32, svfloat32_t) ++ ++/* ++** caller_s64: ++** ... ++** bl callee_s64 ++** ptrue (p[0-7])\.b, vl128 ++** uaddv (d[0-9]+), \1, z0\.d ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (s64, svint64_t) ++ ++/* ++** caller_u64: ++** ... ++** bl callee_u64 ++** ptrue (p[0-7])\.b, vl128 ++** uaddv (d[0-9]+), \1, z0\.d ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (u64, svuint64_t) ++ ++/* ++** caller_f64: ++** ... ++** bl callee_f64 ++** ptrue (p[0-7])\.b, vl128 ++** faddv d0, \1, z0\.d ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (f64, svfloat64_t) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_5_2048.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_5_2048.c +new file mode 100644 +index 000000000..6f37d9d6c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_5_2048.c +@@ -0,0 +1,264 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -msve-vector-bits=2048 -g" } */ ++/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ ++ ++#include ++ ++#define CALLEE(SUFFIX, TYPE) \ ++ TYPE __attribute__((noipa)) \ ++ callee_##SUFFIX (TYPE *ptr) \ ++ { \ ++ return *ptr; \ ++ } ++ ++/* ++** callee_s8: ++** ptrue (p[0-7])\.b, vl256 ++** ld1b z0\.b, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (s8, svint8_t) ++ ++/* ++** callee_u8: ++** ptrue (p[0-7])\.b, vl256 ++** ld1b z0\.b, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (u8, svuint8_t) ++ ++/* ++** callee_s16: ++** ptrue (p[0-7])\.b, vl256 ++** ld1h z0\.h, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (s16, svint16_t) ++ ++/* ++** callee_u16: ++** ptrue (p[0-7])\.b, vl256 ++** ld1h z0\.h, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (u16, svuint16_t) ++ ++/* ++** callee_f16: ++** ptrue (p[0-7])\.b, vl256 ++** ld1h z0\.h, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (f16, svfloat16_t) ++ ++/* ++** callee_bf16: ++** ptrue (p[0-7])\.b, vl256 ++** ld1h z0\.h, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (bf16, svbfloat16_t) ++ ++/* ++** callee_s32: ++** ptrue (p[0-7])\.b, vl256 ++** ld1w z0\.s, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (s32, svint32_t) ++ ++/* ++** callee_u32: ++** ptrue (p[0-7])\.b, vl256 ++** ld1w z0\.s, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (u32, svuint32_t) ++ ++/* ++** callee_f32: ++** ptrue (p[0-7])\.b, vl256 ++** ld1w z0\.s, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (f32, svfloat32_t) ++ ++/* ++** callee_s64: ++** ptrue (p[0-7])\.b, vl256 ++** ld1d z0\.d, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (s64, svint64_t) ++ ++/* ++** callee_u64: ++** ptrue (p[0-7])\.b, vl256 ++** ld1d z0\.d, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (u64, svuint64_t) ++ ++/* ++** callee_f64: ++** ptrue (p[0-7])\.b, vl256 ++** ld1d z0\.d, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (f64, svfloat64_t) ++ ++#define CALLER(SUFFIX, TYPE) \ ++ typeof (svaddv (svptrue_b8 (), *(TYPE *) 0)) \ ++ __attribute__((noipa)) \ ++ caller_##SUFFIX (TYPE *ptr1) \ ++ { \ ++ return svaddv (svptrue_b8 (), callee_##SUFFIX (ptr1)); \ ++ } ++ ++#define CALLER_BF16(SUFFIX, TYPE) \ ++ typeof (svlasta (svptrue_b8 (), *(TYPE *) 0)) \ ++ __attribute__((noipa)) \ ++ caller_##SUFFIX (TYPE *ptr1) \ ++ { \ ++ return svlasta (svptrue_b8 (), callee_##SUFFIX (ptr1)); \ ++ } ++ ++/* ++** caller_s8: ++** ... ++** bl callee_s8 ++** ptrue (p[0-7])\.b, vl256 ++** saddv (d[0-9]+), \1, z0\.b ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (s8, svint8_t) ++ ++/* ++** caller_u8: ++** ... ++** bl callee_u8 ++** ptrue (p[0-7])\.b, vl256 ++** uaddv (d[0-9]+), \1, z0\.b ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (u8, svuint8_t) ++ ++/* ++** caller_s16: ++** ... ++** bl callee_s16 ++** ptrue (p[0-7])\.b, vl256 ++** saddv (d[0-9]+), \1, z0\.h ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (s16, svint16_t) ++ ++/* ++** caller_u16: ++** ... ++** bl callee_u16 ++** ptrue (p[0-7])\.b, vl256 ++** uaddv (d[0-9]+), \1, z0\.h ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (u16, svuint16_t) ++ ++/* ++** caller_f16: ++** ... ++** bl callee_f16 ++** ptrue (p[0-7])\.b, vl256 ++** faddv h0, \1, z0\.h ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (f16, svfloat16_t) ++ ++/* ++** caller_bf16: ++** ... ++** bl callee_bf16 ++** ptrue (p[0-7])\.b, vl256 ++** lasta h0, \1, z0\.h ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER_BF16 (bf16, svbfloat16_t) ++ ++/* ++** caller_s32: ++** ... ++** bl callee_s32 ++** ptrue (p[0-7])\.b, vl256 ++** saddv (d[0-9]+), \1, z0\.s ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (s32, svint32_t) ++ ++/* ++** caller_u32: ++** ... ++** bl callee_u32 ++** ptrue (p[0-7])\.b, vl256 ++** uaddv (d[0-9]+), \1, z0\.s ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (u32, svuint32_t) ++ ++/* ++** caller_f32: ++** ... ++** bl callee_f32 ++** ptrue (p[0-7])\.b, vl256 ++** faddv s0, \1, z0\.s ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (f32, svfloat32_t) ++ ++/* ++** caller_s64: ++** ... ++** bl callee_s64 ++** ptrue (p[0-7])\.b, vl256 ++** uaddv (d[0-9]+), \1, z0\.d ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (s64, svint64_t) ++ ++/* ++** caller_u64: ++** ... ++** bl callee_u64 ++** ptrue (p[0-7])\.b, vl256 ++** uaddv (d[0-9]+), \1, z0\.d ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (u64, svuint64_t) ++ ++/* ++** caller_f64: ++** ... ++** bl callee_f64 ++** ptrue (p[0-7])\.b, vl256 ++** faddv d0, \1, z0\.d ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (f64, svfloat64_t) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_5_256.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_5_256.c +new file mode 100644 +index 000000000..7ba094e16 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_5_256.c +@@ -0,0 +1,264 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -msve-vector-bits=256 -g" } */ ++/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ ++ ++#include ++ ++#define CALLEE(SUFFIX, TYPE) \ ++ TYPE __attribute__((noipa)) \ ++ callee_##SUFFIX (TYPE *ptr) \ ++ { \ ++ return *ptr; \ ++ } ++ ++/* ++** callee_s8: ++** ptrue (p[0-7])\.b, vl32 ++** ld1b z0\.b, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (s8, svint8_t) ++ ++/* ++** callee_u8: ++** ptrue (p[0-7])\.b, vl32 ++** ld1b z0\.b, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (u8, svuint8_t) ++ ++/* ++** callee_s16: ++** ptrue (p[0-7])\.b, vl32 ++** ld1h z0\.h, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (s16, svint16_t) ++ ++/* ++** callee_u16: ++** ptrue (p[0-7])\.b, vl32 ++** ld1h z0\.h, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (u16, svuint16_t) ++ ++/* ++** callee_f16: ++** ptrue (p[0-7])\.b, vl32 ++** ld1h z0\.h, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (f16, svfloat16_t) ++ ++/* ++** callee_bf16: ++** ptrue (p[0-7])\.b, vl32 ++** ld1h z0\.h, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (bf16, svbfloat16_t) ++ ++/* ++** callee_s32: ++** ptrue (p[0-7])\.b, vl32 ++** ld1w z0\.s, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (s32, svint32_t) ++ ++/* ++** callee_u32: ++** ptrue (p[0-7])\.b, vl32 ++** ld1w z0\.s, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (u32, svuint32_t) ++ ++/* ++** callee_f32: ++** ptrue (p[0-7])\.b, vl32 ++** ld1w z0\.s, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (f32, svfloat32_t) ++ ++/* ++** callee_s64: ++** ptrue (p[0-7])\.b, vl32 ++** ld1d z0\.d, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (s64, svint64_t) ++ ++/* ++** callee_u64: ++** ptrue (p[0-7])\.b, vl32 ++** ld1d z0\.d, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (u64, svuint64_t) ++ ++/* ++** callee_f64: ++** ptrue (p[0-7])\.b, vl32 ++** ld1d z0\.d, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (f64, svfloat64_t) ++ ++#define CALLER(SUFFIX, TYPE) \ ++ typeof (svaddv (svptrue_b8 (), *(TYPE *) 0)) \ ++ __attribute__((noipa)) \ ++ caller_##SUFFIX (TYPE *ptr1) \ ++ { \ ++ return svaddv (svptrue_b8 (), callee_##SUFFIX (ptr1)); \ ++ } ++ ++#define CALLER_BF16(SUFFIX, TYPE) \ ++ typeof (svlasta (svptrue_b8 (), *(TYPE *) 0)) \ ++ __attribute__((noipa)) \ ++ caller_##SUFFIX (TYPE *ptr1) \ ++ { \ ++ return svlasta (svptrue_b8 (), callee_##SUFFIX (ptr1)); \ ++ } ++ ++/* ++** caller_s8: ++** ... ++** bl callee_s8 ++** ptrue (p[0-7])\.b, vl32 ++** saddv (d[0-9]+), \1, z0\.b ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (s8, svint8_t) ++ ++/* ++** caller_u8: ++** ... ++** bl callee_u8 ++** ptrue (p[0-7])\.b, vl32 ++** uaddv (d[0-9]+), \1, z0\.b ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (u8, svuint8_t) ++ ++/* ++** caller_s16: ++** ... ++** bl callee_s16 ++** ptrue (p[0-7])\.b, vl32 ++** saddv (d[0-9]+), \1, z0\.h ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (s16, svint16_t) ++ ++/* ++** caller_u16: ++** ... ++** bl callee_u16 ++** ptrue (p[0-7])\.b, vl32 ++** uaddv (d[0-9]+), \1, z0\.h ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (u16, svuint16_t) ++ ++/* ++** caller_f16: ++** ... ++** bl callee_f16 ++** ptrue (p[0-7])\.b, vl32 ++** faddv h0, \1, z0\.h ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (f16, svfloat16_t) ++ ++/* ++** caller_bf16: ++** ... ++** bl callee_bf16 ++** ptrue (p[0-7])\.b, vl32 ++** lasta h0, \1, z0\.h ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER_BF16 (bf16, svbfloat16_t) ++ ++/* ++** caller_s32: ++** ... ++** bl callee_s32 ++** ptrue (p[0-7])\.b, vl32 ++** saddv (d[0-9]+), \1, z0\.s ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (s32, svint32_t) ++ ++/* ++** caller_u32: ++** ... ++** bl callee_u32 ++** ptrue (p[0-7])\.b, vl32 ++** uaddv (d[0-9]+), \1, z0\.s ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (u32, svuint32_t) ++ ++/* ++** caller_f32: ++** ... ++** bl callee_f32 ++** ptrue (p[0-7])\.b, vl32 ++** faddv s0, \1, z0\.s ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (f32, svfloat32_t) ++ ++/* ++** caller_s64: ++** ... ++** bl callee_s64 ++** ptrue (p[0-7])\.b, vl32 ++** uaddv (d[0-9]+), \1, z0\.d ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (s64, svint64_t) ++ ++/* ++** caller_u64: ++** ... ++** bl callee_u64 ++** ptrue (p[0-7])\.b, vl32 ++** uaddv (d[0-9]+), \1, z0\.d ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (u64, svuint64_t) ++ ++/* ++** caller_f64: ++** ... ++** bl callee_f64 ++** ptrue (p[0-7])\.b, vl32 ++** faddv d0, \1, z0\.d ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (f64, svfloat64_t) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_5_512.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_5_512.c +new file mode 100644 +index 000000000..36b14d420 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_5_512.c +@@ -0,0 +1,264 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -msve-vector-bits=512 -g" } */ ++/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ ++ ++#include ++ ++#define CALLEE(SUFFIX, TYPE) \ ++ TYPE __attribute__((noipa)) \ ++ callee_##SUFFIX (TYPE *ptr) \ ++ { \ ++ return *ptr; \ ++ } ++ ++/* ++** callee_s8: ++** ptrue (p[0-7])\.b, vl64 ++** ld1b z0\.b, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (s8, svint8_t) ++ ++/* ++** callee_u8: ++** ptrue (p[0-7])\.b, vl64 ++** ld1b z0\.b, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (u8, svuint8_t) ++ ++/* ++** callee_s16: ++** ptrue (p[0-7])\.b, vl64 ++** ld1h z0\.h, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (s16, svint16_t) ++ ++/* ++** callee_u16: ++** ptrue (p[0-7])\.b, vl64 ++** ld1h z0\.h, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (u16, svuint16_t) ++ ++/* ++** callee_f16: ++** ptrue (p[0-7])\.b, vl64 ++** ld1h z0\.h, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (f16, svfloat16_t) ++ ++/* ++** callee_bf16: ++** ptrue (p[0-7])\.b, vl64 ++** ld1h z0\.h, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (bf16, svbfloat16_t) ++ ++/* ++** callee_s32: ++** ptrue (p[0-7])\.b, vl64 ++** ld1w z0\.s, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (s32, svint32_t) ++ ++/* ++** callee_u32: ++** ptrue (p[0-7])\.b, vl64 ++** ld1w z0\.s, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (u32, svuint32_t) ++ ++/* ++** callee_f32: ++** ptrue (p[0-7])\.b, vl64 ++** ld1w z0\.s, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (f32, svfloat32_t) ++ ++/* ++** callee_s64: ++** ptrue (p[0-7])\.b, vl64 ++** ld1d z0\.d, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (s64, svint64_t) ++ ++/* ++** callee_u64: ++** ptrue (p[0-7])\.b, vl64 ++** ld1d z0\.d, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (u64, svuint64_t) ++ ++/* ++** callee_f64: ++** ptrue (p[0-7])\.b, vl64 ++** ld1d z0\.d, \1/z, \[x0\] ++** ret ++*/ ++CALLEE (f64, svfloat64_t) ++ ++#define CALLER(SUFFIX, TYPE) \ ++ typeof (svaddv (svptrue_b8 (), *(TYPE *) 0)) \ ++ __attribute__((noipa)) \ ++ caller_##SUFFIX (TYPE *ptr1) \ ++ { \ ++ return svaddv (svptrue_b8 (), callee_##SUFFIX (ptr1)); \ ++ } ++ ++#define CALLER_BF16(SUFFIX, TYPE) \ ++ typeof (svlasta (svptrue_b8 (), *(TYPE *) 0)) \ ++ __attribute__((noipa)) \ ++ caller_##SUFFIX (TYPE *ptr1) \ ++ { \ ++ return svlasta (svptrue_b8 (), callee_##SUFFIX (ptr1)); \ ++ } ++ ++/* ++** caller_s8: ++** ... ++** bl callee_s8 ++** ptrue (p[0-7])\.b, vl64 ++** saddv (d[0-9]+), \1, z0\.b ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (s8, svint8_t) ++ ++/* ++** caller_u8: ++** ... ++** bl callee_u8 ++** ptrue (p[0-7])\.b, vl64 ++** uaddv (d[0-9]+), \1, z0\.b ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (u8, svuint8_t) ++ ++/* ++** caller_s16: ++** ... ++** bl callee_s16 ++** ptrue (p[0-7])\.b, vl64 ++** saddv (d[0-9]+), \1, z0\.h ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (s16, svint16_t) ++ ++/* ++** caller_u16: ++** ... ++** bl callee_u16 ++** ptrue (p[0-7])\.b, vl64 ++** uaddv (d[0-9]+), \1, z0\.h ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (u16, svuint16_t) ++ ++/* ++** caller_f16: ++** ... ++** bl callee_f16 ++** ptrue (p[0-7])\.b, vl64 ++** faddv h0, \1, z0\.h ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (f16, svfloat16_t) ++ ++/* ++** caller_bf16: ++** ... ++** bl callee_bf16 ++** ptrue (p[0-7])\.b, vl64 ++** lasta h0, \1, z0\.h ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER_BF16 (bf16, svbfloat16_t) ++ ++/* ++** caller_s32: ++** ... ++** bl callee_s32 ++** ptrue (p[0-7])\.b, vl64 ++** saddv (d[0-9]+), \1, z0\.s ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (s32, svint32_t) ++ ++/* ++** caller_u32: ++** ... ++** bl callee_u32 ++** ptrue (p[0-7])\.b, vl64 ++** uaddv (d[0-9]+), \1, z0\.s ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (u32, svuint32_t) ++ ++/* ++** caller_f32: ++** ... ++** bl callee_f32 ++** ptrue (p[0-7])\.b, vl64 ++** faddv s0, \1, z0\.s ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (f32, svfloat32_t) ++ ++/* ++** caller_s64: ++** ... ++** bl callee_s64 ++** ptrue (p[0-7])\.b, vl64 ++** uaddv (d[0-9]+), \1, z0\.d ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (s64, svint64_t) ++ ++/* ++** caller_u64: ++** ... ++** bl callee_u64 ++** ptrue (p[0-7])\.b, vl64 ++** uaddv (d[0-9]+), \1, z0\.d ++** fmov x0, \2 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (u64, svuint64_t) ++ ++/* ++** caller_f64: ++** ... ++** bl callee_f64 ++** ptrue (p[0-7])\.b, vl64 ++** faddv d0, \1, z0\.d ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++CALLER (f64, svfloat64_t) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_6.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_6.c +new file mode 100644 +index 000000000..72468eab1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_6.c +@@ -0,0 +1,272 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -g" } */ ++/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ ++ ++#include ++ ++typedef int8_t svint8_t __attribute__ ((vector_size (32))); ++typedef uint8_t svuint8_t __attribute__ ((vector_size (32))); ++ ++typedef int16_t svint16_t __attribute__ ((vector_size (32))); ++typedef uint16_t svuint16_t __attribute__ ((vector_size (32))); ++typedef __fp16 svfloat16_t __attribute__ ((vector_size (32))); ++typedef __bf16 svbfloat16_t __attribute__ ((vector_size (32))); ++ ++typedef int32_t svint32_t __attribute__ ((vector_size (32))); ++typedef uint32_t svuint32_t __attribute__ ((vector_size (32))); ++typedef float svfloat32_t __attribute__ ((vector_size (32))); ++ ++typedef int64_t svint64_t __attribute__ ((vector_size (32))); ++typedef uint64_t svuint64_t __attribute__ ((vector_size (32))); ++typedef double svfloat64_t __attribute__ ((vector_size (32))); ++ ++#define CALLEE(SUFFIX, TYPE) \ ++ TYPE __attribute__((noipa)) \ ++ callee_##SUFFIX (TYPE *ptr) \ ++ { \ ++ return *ptr; \ ++ } ++ ++/* ++** callee_s8: ++** ( ++** ld1 ({v.*}), \[x0\] ++** st1 \1, \[x8\] ++** | ++** ldp (q[0-9]+, q[0-9]+), \[x0\] ++** stp \2, \[x8\] ++** ) ++** ret ++*/ ++CALLEE (s8, svint8_t) ++ ++/* ++** callee_u8: ++** ( ++** ld1 ({v.*}), \[x0\] ++** st1 \1, \[x8\] ++** | ++** ldp (q[0-9]+, q[0-9]+), \[x0\] ++** stp \2, \[x8\] ++** ) ++** ret ++*/ ++CALLEE (u8, svuint8_t) ++ ++/* ++** callee_s16: ++** ( ++** ld1 ({v.*}), \[x0\] ++** st1 \1, \[x8\] ++** | ++** ldp (q[0-9]+, q[0-9]+), \[x0\] ++** stp \2, \[x8\] ++** ) ++** ret ++*/ ++CALLEE (s16, svint16_t) ++ ++/* ++** callee_u16: ++** ( ++** ld1 ({v.*}), \[x0\] ++** st1 \1, \[x8\] ++** | ++** ldp (q[0-9]+, q[0-9]+), \[x0\] ++** stp \2, \[x8\] ++** ) ++** ret ++*/ ++CALLEE (u16, svuint16_t) ++ ++/* Currently we scalarize this. */ ++CALLEE (f16, svfloat16_t) ++ ++/* Currently we scalarize this. */ ++CALLEE (bf16, svbfloat16_t) ++ ++/* ++** callee_s32: ++** ( ++** ld1 ({v.*}), \[x0\] ++** st1 \1, \[x8\] ++** | ++** ldp (q[0-9]+, q[0-9]+), \[x0\] ++** stp \2, \[x8\] ++** ) ++** ret ++*/ ++CALLEE (s32, svint32_t) ++ ++/* ++** callee_u32: ++** ( ++** ld1 ({v.*}), \[x0\] ++** st1 \1, \[x8\] ++** | ++** ldp (q[0-9]+, q[0-9]+), \[x0\] ++** stp \2, \[x8\] ++** ) ++** ret ++*/ ++CALLEE (u32, svuint32_t) ++ ++/* Currently we scalarize this. */ ++CALLEE (f32, svfloat32_t) ++ ++/* ++** callee_s64: ++** ( ++** ld1 ({v.*}), \[x0\] ++** st1 \1, \[x8\] ++** | ++** ldp (q[0-9]+, q[0-9]+), \[x0\] ++** stp \2, \[x8\] ++** ) ++** ret ++*/ ++CALLEE (s64, svint64_t) ++ ++/* ++** callee_u64: ++** ( ++** ld1 ({v.*}), \[x0\] ++** st1 \1, \[x8\] ++** | ++** ldp (q[0-9]+, q[0-9]+), \[x0\] ++** stp \2, \[x8\] ++** ) ++** ret ++*/ ++CALLEE (u64, svuint64_t) ++ ++/* Currently we scalarize this. */ ++CALLEE (f64, svfloat64_t) ++ ++#define CALLER(SUFFIX, TYPE) \ ++ typeof ((*(TYPE *) 0)[0]) \ ++ __attribute__((noipa)) \ ++ caller_##SUFFIX (TYPE *ptr1) \ ++ { \ ++ return callee_##SUFFIX (ptr1)[0]; \ ++ } ++ ++/* ++** caller_s8: ++** ... ++** bl callee_s8 ++** ldrb w0, \[sp, 16\] ++** ldp x29, x30, \[sp\], 48 ++** ret ++*/ ++CALLER (s8, svint8_t) ++ ++/* ++** caller_u8: ++** ... ++** bl callee_u8 ++** ldrb w0, \[sp, 16\] ++** ldp x29, x30, \[sp\], 48 ++** ret ++*/ ++CALLER (u8, svuint8_t) ++ ++/* ++** caller_s16: ++** ... ++** bl callee_s16 ++** ldrh w0, \[sp, 16\] ++** ldp x29, x30, \[sp\], 48 ++** ret ++*/ ++CALLER (s16, svint16_t) ++ ++/* ++** caller_u16: ++** ... ++** bl callee_u16 ++** ldrh w0, \[sp, 16\] ++** ldp x29, x30, \[sp\], 48 ++** ret ++*/ ++CALLER (u16, svuint16_t) ++ ++/* ++** caller_f16: ++** ... ++** bl callee_f16 ++** ldr h0, \[sp, 16\] ++** ldp x29, x30, \[sp\], 48 ++** ret ++*/ ++CALLER (f16, svfloat16_t) ++ ++/* ++** caller_bf16: ++** ... ++** bl callee_bf16 ++** ldr h0, \[sp, 16\] ++** ldp x29, x30, \[sp\], 48 ++** ret ++*/ ++CALLER (bf16, svbfloat16_t) ++ ++/* ++** caller_s32: ++** ... ++** bl callee_s32 ++** ldr w0, \[sp, 16\] ++** ldp x29, x30, \[sp\], 48 ++** ret ++*/ ++CALLER (s32, svint32_t) ++ ++/* ++** caller_u32: ++** ... ++** bl callee_u32 ++** ldr w0, \[sp, 16\] ++** ldp x29, x30, \[sp\], 48 ++** ret ++*/ ++CALLER (u32, svuint32_t) ++ ++/* ++** caller_f32: ++** ... ++** bl callee_f32 ++** ldr s0, \[sp, 16\] ++** ldp x29, x30, \[sp\], 48 ++** ret ++*/ ++CALLER (f32, svfloat32_t) ++ ++/* ++** caller_s64: ++** ... ++** bl callee_s64 ++** ldr x0, \[sp, 16\] ++** ldp x29, x30, \[sp\], 48 ++** ret ++*/ ++CALLER (s64, svint64_t) ++ ++/* ++** caller_u64: ++** ... ++** bl callee_u64 ++** ldr x0, \[sp, 16\] ++** ldp x29, x30, \[sp\], 48 ++** ret ++*/ ++CALLER (u64, svuint64_t) ++ ++/* ++** caller_f64: ++** ... ++** bl callee_f64 ++** ldr d0, \[sp, 16\] ++** ldp x29, x30, \[sp\], 48 ++** ret ++*/ ++CALLER (f64, svfloat64_t) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_6_1024.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_6_1024.c +new file mode 100644 +index 000000000..b6f267e76 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_6_1024.c +@@ -0,0 +1,287 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -msve-vector-bits=1024 -g" } */ ++/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ ++ ++#include ++ ++typedef int8_t svint8_t __attribute__ ((vector_size (128))); ++typedef uint8_t svuint8_t __attribute__ ((vector_size (128))); ++ ++typedef int16_t svint16_t __attribute__ ((vector_size (128))); ++typedef uint16_t svuint16_t __attribute__ ((vector_size (128))); ++typedef __fp16 svfloat16_t __attribute__ ((vector_size (128))); ++typedef __bf16 svbfloat16_t __attribute__ ((vector_size (128))); ++ ++typedef int32_t svint32_t __attribute__ ((vector_size (128))); ++typedef uint32_t svuint32_t __attribute__ ((vector_size (128))); ++typedef float svfloat32_t __attribute__ ((vector_size (128))); ++ ++typedef int64_t svint64_t __attribute__ ((vector_size (128))); ++typedef uint64_t svuint64_t __attribute__ ((vector_size (128))); ++typedef double svfloat64_t __attribute__ ((vector_size (128))); ++ ++#define CALLEE(SUFFIX, TYPE) \ ++ TYPE __attribute__((noipa)) \ ++ callee_##SUFFIX (TYPE *ptr) \ ++ { \ ++ return *ptr; \ ++ } ++ ++/* ++** callee_s8: ++** ptrue (p[0-7])\.b, vl128 ++** ld1b z0\.b, \1/z, \[x0\] ++** st1b z0\.b, \1, \[x8\] ++** ret ++*/ ++CALLEE (s8, svint8_t) ++ ++/* ++** callee_u8: ++** ptrue (p[0-7])\.b, vl128 ++** ld1b z0\.b, \1/z, \[x0\] ++** st1b z0\.b, \1, \[x8\] ++** ret ++*/ ++CALLEE (u8, svuint8_t) ++ ++/* ++** callee_s16: ++** ptrue (p[0-7])\.b, vl128 ++** ld1h z0\.h, \1/z, \[x0\] ++** st1h z0\.h, \1, \[x8\] ++** ret ++*/ ++CALLEE (s16, svint16_t) ++ ++/* ++** callee_u16: ++** ptrue (p[0-7])\.b, vl128 ++** ld1h z0\.h, \1/z, \[x0\] ++** st1h z0\.h, \1, \[x8\] ++** ret ++*/ ++CALLEE (u16, svuint16_t) ++ ++/* ++** callee_f16: ++** ptrue (p[0-7])\.b, vl128 ++** ld1h z0\.h, \1/z, \[x0\] ++** st1h z0\.h, \1, \[x8\] ++** ret ++*/ ++CALLEE (f16, svfloat16_t) ++ ++/* ++** callee_bf16: ++** ptrue (p[0-7])\.b, vl128 ++** ld1h z0\.h, \1/z, \[x0\] ++** st1h z0\.h, \1, \[x8\] ++** ret ++*/ ++CALLEE (bf16, svbfloat16_t) ++ ++/* ++** callee_s32: ++** ptrue (p[0-7])\.b, vl128 ++** ld1w z0\.s, \1/z, \[x0\] ++** st1w z0\.s, \1, \[x8\] ++** ret ++*/ ++CALLEE (s32, svint32_t) ++ ++/* ++** callee_u32: ++** ptrue (p[0-7])\.b, vl128 ++** ld1w z0\.s, \1/z, \[x0\] ++** st1w z0\.s, \1, \[x8\] ++** ret ++*/ ++CALLEE (u32, svuint32_t) ++ ++/* ++** callee_f32: ++** ptrue (p[0-7])\.b, vl128 ++** ld1w z0\.s, \1/z, \[x0\] ++** st1w z0\.s, \1, \[x8\] ++** ret ++*/ ++CALLEE (f32, svfloat32_t) ++ ++/* ++** callee_s64: ++** ptrue (p[0-7])\.b, vl128 ++** ld1d z0\.d, \1/z, \[x0\] ++** st1d z0\.d, \1, \[x8\] ++** ret ++*/ ++CALLEE (s64, svint64_t) ++ ++/* ++** callee_u64: ++** ptrue (p[0-7])\.b, vl128 ++** ld1d z0\.d, \1/z, \[x0\] ++** st1d z0\.d, \1, \[x8\] ++** ret ++*/ ++CALLEE (u64, svuint64_t) ++ ++/* ++** callee_f64: ++** ptrue (p[0-7])\.b, vl128 ++** ld1d z0\.d, \1/z, \[x0\] ++** st1d z0\.d, \1, \[x8\] ++** ret ++*/ ++CALLEE (f64, svfloat64_t) ++ ++#define CALLER(SUFFIX, TYPE) \ ++ void __attribute__((noipa)) \ ++ caller_##SUFFIX (TYPE *ptr1, TYPE *ptr2) \ ++ { \ ++ *ptr2 = callee_##SUFFIX (ptr1); \ ++ } ++ ++/* ++** caller_s8: ++** ... ++** bl callee_s8 ++** ... ++** ld1b (z[0-9]+\.b), (p[0-7])/z, \[[^]]*\] ++** st1b \1, \2, \[[^]]*\] ++** ... ++** ret ++*/ ++CALLER (s8, svint8_t) ++ ++/* ++** caller_u8: ++** ... ++** bl callee_u8 ++** ... ++** ld1b (z[0-9]+\.b), (p[0-7])/z, \[[^]]*\] ++** st1b \1, \2, \[[^]]*\] ++** ... ++** ret ++*/ ++CALLER (u8, svuint8_t) ++ ++/* ++** caller_s16: ++** ... ++** bl callee_s16 ++** ... ++** ld1h (z[0-9]+\.h), (p[0-7])/z, \[[^]]*\] ++** st1h \1, \2, \[[^]]*\] ++** ... ++** ret ++*/ ++CALLER (s16, svint16_t) ++ ++/* ++** caller_u16: ++** ... ++** bl callee_u16 ++** ... ++** ld1h (z[0-9]+\.h), (p[0-7])/z, \[[^]]*\] ++** st1h \1, \2, \[[^]]*\] ++** ... ++** ret ++*/ ++CALLER (u16, svuint16_t) ++ ++/* ++** caller_f16: ++** ... ++** bl callee_f16 ++** ... ++** ld1h (z[0-9]+\.h), (p[0-7])/z, \[[^]]*\] ++** st1h \1, \2, \[[^]]*\] ++** ... ++** ret ++*/ ++CALLER (f16, svfloat16_t) ++ ++/* ++** caller_bf16: ++** ... ++** bl callee_bf16 ++** ... ++** ld1h (z[0-9]+\.h), (p[0-7])/z, \[[^]]*\] ++** st1h \1, \2, \[[^]]*\] ++** ... ++** ret ++*/ ++CALLER (bf16, svbfloat16_t) ++ ++/* ++** caller_s32: ++** ... ++** bl callee_s32 ++** ... ++** ld1w (z[0-9]+\.s), (p[0-7])/z, \[[^]]*\] ++** st1w \1, \2, \[[^]]*\] ++** ... ++** ret ++*/ ++CALLER (s32, svint32_t) ++ ++/* ++** caller_u32: ++** ... ++** bl callee_u32 ++** ... ++** ld1w (z[0-9]+\.s), (p[0-7])/z, \[[^]]*\] ++** st1w \1, \2, \[[^]]*\] ++** ... ++** ret ++*/ ++CALLER (u32, svuint32_t) ++ ++/* ++** caller_f32: ++** ... ++** bl callee_f32 ++** ... ++** ld1w (z[0-9]+\.s), (p[0-7])/z, \[[^]]*\] ++** st1w \1, \2, \[[^]]*\] ++** ... ++** ret ++*/ ++CALLER (f32, svfloat32_t) ++ ++/* ++** caller_s64: ++** ... ++** bl callee_s64 ++** ... ++** ld1d (z[0-9]+\.d), (p[0-7])/z, \[[^]]*\] ++** st1d \1, \2, \[[^]]*\] ++** ... ++** ret ++*/ ++CALLER (s64, svint64_t) ++ ++/* ++** caller_u64: ++** ... ++** bl callee_u64 ++** ... ++** ld1d (z[0-9]+\.d), (p[0-7])/z, \[[^]]*\] ++** st1d \1, \2, \[[^]]*\] ++** ... ++** ret ++*/ ++CALLER (u64, svuint64_t) ++ ++/* ++** caller_f64: ++** ... ++** bl callee_f64 ++** ... ++** ld1d (z[0-9]+\.d), (p[0-7])/z, \[[^]]*\] ++** st1d \1, \2, \[[^]]*\] ++** ... ++** ret ++*/ ++CALLER (f64, svfloat64_t) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_6_2048.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_6_2048.c +new file mode 100644 +index 000000000..46b7d683e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_6_2048.c +@@ -0,0 +1,287 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -msve-vector-bits=2048 -g" } */ ++/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ ++ ++#include ++ ++typedef int8_t svint8_t __attribute__ ((vector_size (256))); ++typedef uint8_t svuint8_t __attribute__ ((vector_size (256))); ++ ++typedef int16_t svint16_t __attribute__ ((vector_size (256))); ++typedef uint16_t svuint16_t __attribute__ ((vector_size (256))); ++typedef __fp16 svfloat16_t __attribute__ ((vector_size (256))); ++typedef __bf16 svbfloat16_t __attribute__ ((vector_size (256))); ++ ++typedef int32_t svint32_t __attribute__ ((vector_size (256))); ++typedef uint32_t svuint32_t __attribute__ ((vector_size (256))); ++typedef float svfloat32_t __attribute__ ((vector_size (256))); ++ ++typedef int64_t svint64_t __attribute__ ((vector_size (256))); ++typedef uint64_t svuint64_t __attribute__ ((vector_size (256))); ++typedef double svfloat64_t __attribute__ ((vector_size (256))); ++ ++#define CALLEE(SUFFIX, TYPE) \ ++ TYPE __attribute__((noipa)) \ ++ callee_##SUFFIX (TYPE *ptr) \ ++ { \ ++ return *ptr; \ ++ } ++ ++/* ++** callee_s8: ++** ptrue (p[0-7])\.b, vl256 ++** ld1b z0\.b, \1/z, \[x0\] ++** st1b z0\.b, \1, \[x8\] ++** ret ++*/ ++CALLEE (s8, svint8_t) ++ ++/* ++** callee_u8: ++** ptrue (p[0-7])\.b, vl256 ++** ld1b z0\.b, \1/z, \[x0\] ++** st1b z0\.b, \1, \[x8\] ++** ret ++*/ ++CALLEE (u8, svuint8_t) ++ ++/* ++** callee_s16: ++** ptrue (p[0-7])\.b, vl256 ++** ld1h z0\.h, \1/z, \[x0\] ++** st1h z0\.h, \1, \[x8\] ++** ret ++*/ ++CALLEE (s16, svint16_t) ++ ++/* ++** callee_u16: ++** ptrue (p[0-7])\.b, vl256 ++** ld1h z0\.h, \1/z, \[x0\] ++** st1h z0\.h, \1, \[x8\] ++** ret ++*/ ++CALLEE (u16, svuint16_t) ++ ++/* ++** callee_f16: ++** ptrue (p[0-7])\.b, vl256 ++** ld1h z0\.h, \1/z, \[x0\] ++** st1h z0\.h, \1, \[x8\] ++** ret ++*/ ++CALLEE (f16, svfloat16_t) ++ ++/* ++** callee_bf16: ++** ptrue (p[0-7])\.b, vl256 ++** ld1h z0\.h, \1/z, \[x0\] ++** st1h z0\.h, \1, \[x8\] ++** ret ++*/ ++CALLEE (bf16, svbfloat16_t) ++ ++/* ++** callee_s32: ++** ptrue (p[0-7])\.b, vl256 ++** ld1w z0\.s, \1/z, \[x0\] ++** st1w z0\.s, \1, \[x8\] ++** ret ++*/ ++CALLEE (s32, svint32_t) ++ ++/* ++** callee_u32: ++** ptrue (p[0-7])\.b, vl256 ++** ld1w z0\.s, \1/z, \[x0\] ++** st1w z0\.s, \1, \[x8\] ++** ret ++*/ ++CALLEE (u32, svuint32_t) ++ ++/* ++** callee_f32: ++** ptrue (p[0-7])\.b, vl256 ++** ld1w z0\.s, \1/z, \[x0\] ++** st1w z0\.s, \1, \[x8\] ++** ret ++*/ ++CALLEE (f32, svfloat32_t) ++ ++/* ++** callee_s64: ++** ptrue (p[0-7])\.b, vl256 ++** ld1d z0\.d, \1/z, \[x0\] ++** st1d z0\.d, \1, \[x8\] ++** ret ++*/ ++CALLEE (s64, svint64_t) ++ ++/* ++** callee_u64: ++** ptrue (p[0-7])\.b, vl256 ++** ld1d z0\.d, \1/z, \[x0\] ++** st1d z0\.d, \1, \[x8\] ++** ret ++*/ ++CALLEE (u64, svuint64_t) ++ ++/* ++** callee_f64: ++** ptrue (p[0-7])\.b, vl256 ++** ld1d z0\.d, \1/z, \[x0\] ++** st1d z0\.d, \1, \[x8\] ++** ret ++*/ ++CALLEE (f64, svfloat64_t) ++ ++#define CALLER(SUFFIX, TYPE) \ ++ void __attribute__((noipa)) \ ++ caller_##SUFFIX (TYPE *ptr1, TYPE *ptr2) \ ++ { \ ++ *ptr2 = callee_##SUFFIX (ptr1); \ ++ } ++ ++/* ++** caller_s8: ++** ... ++** bl callee_s8 ++** ... ++** ld1b (z[0-9]+\.b), (p[0-7])/z, \[[^]]*\] ++** st1b \1, \2, \[[^]]*\] ++** ... ++** ret ++*/ ++CALLER (s8, svint8_t) ++ ++/* ++** caller_u8: ++** ... ++** bl callee_u8 ++** ... ++** ld1b (z[0-9]+\.b), (p[0-7])/z, \[[^]]*\] ++** st1b \1, \2, \[[^]]*\] ++** ... ++** ret ++*/ ++CALLER (u8, svuint8_t) ++ ++/* ++** caller_s16: ++** ... ++** bl callee_s16 ++** ... ++** ld1h (z[0-9]+\.h), (p[0-7])/z, \[[^]]*\] ++** st1h \1, \2, \[[^]]*\] ++** ... ++** ret ++*/ ++CALLER (s16, svint16_t) ++ ++/* ++** caller_u16: ++** ... ++** bl callee_u16 ++** ... ++** ld1h (z[0-9]+\.h), (p[0-7])/z, \[[^]]*\] ++** st1h \1, \2, \[[^]]*\] ++** ... ++** ret ++*/ ++CALLER (u16, svuint16_t) ++ ++/* ++** caller_f16: ++** ... ++** bl callee_f16 ++** ... ++** ld1h (z[0-9]+\.h), (p[0-7])/z, \[[^]]*\] ++** st1h \1, \2, \[[^]]*\] ++** ... ++** ret ++*/ ++CALLER (f16, svfloat16_t) ++ ++/* ++** caller_bf16: ++** ... ++** bl callee_bf16 ++** ... ++** ld1h (z[0-9]+\.h), (p[0-7])/z, \[[^]]*\] ++** st1h \1, \2, \[[^]]*\] ++** ... ++** ret ++*/ ++CALLER (bf16, svbfloat16_t) ++ ++/* ++** caller_s32: ++** ... ++** bl callee_s32 ++** ... ++** ld1w (z[0-9]+\.s), (p[0-7])/z, \[[^]]*\] ++** st1w \1, \2, \[[^]]*\] ++** ... ++** ret ++*/ ++CALLER (s32, svint32_t) ++ ++/* ++** caller_u32: ++** ... ++** bl callee_u32 ++** ... ++** ld1w (z[0-9]+\.s), (p[0-7])/z, \[[^]]*\] ++** st1w \1, \2, \[[^]]*\] ++** ... ++** ret ++*/ ++CALLER (u32, svuint32_t) ++ ++/* ++** caller_f32: ++** ... ++** bl callee_f32 ++** ... ++** ld1w (z[0-9]+\.s), (p[0-7])/z, \[[^]]*\] ++** st1w \1, \2, \[[^]]*\] ++** ... ++** ret ++*/ ++CALLER (f32, svfloat32_t) ++ ++/* ++** caller_s64: ++** ... ++** bl callee_s64 ++** ... ++** ld1d (z[0-9]+\.d), (p[0-7])/z, \[[^]]*\] ++** st1d \1, \2, \[[^]]*\] ++** ... ++** ret ++*/ ++CALLER (s64, svint64_t) ++ ++/* ++** caller_u64: ++** ... ++** bl callee_u64 ++** ... ++** ld1d (z[0-9]+\.d), (p[0-7])/z, \[[^]]*\] ++** st1d \1, \2, \[[^]]*\] ++** ... ++** ret ++*/ ++CALLER (u64, svuint64_t) ++ ++/* ++** caller_f64: ++** ... ++** bl callee_f64 ++** ... ++** ld1d (z[0-9]+\.d), (p[0-7])/z, \[[^]]*\] ++** st1d \1, \2, \[[^]]*\] ++** ... ++** ret ++*/ ++CALLER (f64, svfloat64_t) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_6_256.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_6_256.c +new file mode 100644 +index 000000000..04872493c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_6_256.c +@@ -0,0 +1,287 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -msve-vector-bits=256 -g" } */ ++/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ ++ ++#include ++ ++typedef int8_t svint8_t __attribute__ ((vector_size (32))); ++typedef uint8_t svuint8_t __attribute__ ((vector_size (32))); ++ ++typedef int16_t svint16_t __attribute__ ((vector_size (32))); ++typedef uint16_t svuint16_t __attribute__ ((vector_size (32))); ++typedef __fp16 svfloat16_t __attribute__ ((vector_size (32))); ++typedef __bf16 svbfloat16_t __attribute__ ((vector_size (32))); ++ ++typedef int32_t svint32_t __attribute__ ((vector_size (32))); ++typedef uint32_t svuint32_t __attribute__ ((vector_size (32))); ++typedef float svfloat32_t __attribute__ ((vector_size (32))); ++ ++typedef int64_t svint64_t __attribute__ ((vector_size (32))); ++typedef uint64_t svuint64_t __attribute__ ((vector_size (32))); ++typedef double svfloat64_t __attribute__ ((vector_size (32))); ++ ++#define CALLEE(SUFFIX, TYPE) \ ++ TYPE __attribute__((noipa)) \ ++ callee_##SUFFIX (TYPE *ptr) \ ++ { \ ++ return *ptr; \ ++ } ++ ++/* ++** callee_s8: ++** ptrue (p[0-7])\.b, vl32 ++** ld1b z0\.b, \1/z, \[x0\] ++** st1b z0\.b, \1, \[x8\] ++** ret ++*/ ++CALLEE (s8, svint8_t) ++ ++/* ++** callee_u8: ++** ptrue (p[0-7])\.b, vl32 ++** ld1b z0\.b, \1/z, \[x0\] ++** st1b z0\.b, \1, \[x8\] ++** ret ++*/ ++CALLEE (u8, svuint8_t) ++ ++/* ++** callee_s16: ++** ptrue (p[0-7])\.b, vl32 ++** ld1h z0\.h, \1/z, \[x0\] ++** st1h z0\.h, \1, \[x8\] ++** ret ++*/ ++CALLEE (s16, svint16_t) ++ ++/* ++** callee_u16: ++** ptrue (p[0-7])\.b, vl32 ++** ld1h z0\.h, \1/z, \[x0\] ++** st1h z0\.h, \1, \[x8\] ++** ret ++*/ ++CALLEE (u16, svuint16_t) ++ ++/* ++** callee_f16: ++** ptrue (p[0-7])\.b, vl32 ++** ld1h z0\.h, \1/z, \[x0\] ++** st1h z0\.h, \1, \[x8\] ++** ret ++*/ ++CALLEE (f16, svfloat16_t) ++ ++/* ++** callee_bf16: ++** ptrue (p[0-7])\.b, vl32 ++** ld1h z0\.h, \1/z, \[x0\] ++** st1h z0\.h, \1, \[x8\] ++** ret ++*/ ++CALLEE (bf16, svbfloat16_t) ++ ++/* ++** callee_s32: ++** ptrue (p[0-7])\.b, vl32 ++** ld1w z0\.s, \1/z, \[x0\] ++** st1w z0\.s, \1, \[x8\] ++** ret ++*/ ++CALLEE (s32, svint32_t) ++ ++/* ++** callee_u32: ++** ptrue (p[0-7])\.b, vl32 ++** ld1w z0\.s, \1/z, \[x0\] ++** st1w z0\.s, \1, \[x8\] ++** ret ++*/ ++CALLEE (u32, svuint32_t) ++ ++/* ++** callee_f32: ++** ptrue (p[0-7])\.b, vl32 ++** ld1w z0\.s, \1/z, \[x0\] ++** st1w z0\.s, \1, \[x8\] ++** ret ++*/ ++CALLEE (f32, svfloat32_t) ++ ++/* ++** callee_s64: ++** ptrue (p[0-7])\.b, vl32 ++** ld1d z0\.d, \1/z, \[x0\] ++** st1d z0\.d, \1, \[x8\] ++** ret ++*/ ++CALLEE (s64, svint64_t) ++ ++/* ++** callee_u64: ++** ptrue (p[0-7])\.b, vl32 ++** ld1d z0\.d, \1/z, \[x0\] ++** st1d z0\.d, \1, \[x8\] ++** ret ++*/ ++CALLEE (u64, svuint64_t) ++ ++/* ++** callee_f64: ++** ptrue (p[0-7])\.b, vl32 ++** ld1d z0\.d, \1/z, \[x0\] ++** st1d z0\.d, \1, \[x8\] ++** ret ++*/ ++CALLEE (f64, svfloat64_t) ++ ++#define CALLER(SUFFIX, TYPE) \ ++ void __attribute__((noipa)) \ ++ caller_##SUFFIX (TYPE *ptr1, TYPE *ptr2) \ ++ { \ ++ *ptr2 = callee_##SUFFIX (ptr1); \ ++ } ++ ++/* ++** caller_s8: ++** ... ++** bl callee_s8 ++** ... ++** ld1b (z[0-9]+\.b), (p[0-7])/z, \[[^]]*\] ++** st1b \1, \2, \[[^]]*\] ++** ... ++** ret ++*/ ++CALLER (s8, svint8_t) ++ ++/* ++** caller_u8: ++** ... ++** bl callee_u8 ++** ... ++** ld1b (z[0-9]+\.b), (p[0-7])/z, \[[^]]*\] ++** st1b \1, \2, \[[^]]*\] ++** ... ++** ret ++*/ ++CALLER (u8, svuint8_t) ++ ++/* ++** caller_s16: ++** ... ++** bl callee_s16 ++** ... ++** ld1h (z[0-9]+\.h), (p[0-7])/z, \[[^]]*\] ++** st1h \1, \2, \[[^]]*\] ++** ... ++** ret ++*/ ++CALLER (s16, svint16_t) ++ ++/* ++** caller_u16: ++** ... ++** bl callee_u16 ++** ... ++** ld1h (z[0-9]+\.h), (p[0-7])/z, \[[^]]*\] ++** st1h \1, \2, \[[^]]*\] ++** ... ++** ret ++*/ ++CALLER (u16, svuint16_t) ++ ++/* ++** caller_f16: ++** ... ++** bl callee_f16 ++** ... ++** ld1h (z[0-9]+\.h), (p[0-7])/z, \[[^]]*\] ++** st1h \1, \2, \[[^]]*\] ++** ... ++** ret ++*/ ++CALLER (f16, svfloat16_t) ++ ++/* ++** caller_bf16: ++** ... ++** bl callee_bf16 ++** ... ++** ld1h (z[0-9]+\.h), (p[0-7])/z, \[[^]]*\] ++** st1h \1, \2, \[[^]]*\] ++** ... ++** ret ++*/ ++CALLER (bf16, svbfloat16_t) ++ ++/* ++** caller_s32: ++** ... ++** bl callee_s32 ++** ... ++** ld1w (z[0-9]+\.s), (p[0-7])/z, \[[^]]*\] ++** st1w \1, \2, \[[^]]*\] ++** ... ++** ret ++*/ ++CALLER (s32, svint32_t) ++ ++/* ++** caller_u32: ++** ... ++** bl callee_u32 ++** ... ++** ld1w (z[0-9]+\.s), (p[0-7])/z, \[[^]]*\] ++** st1w \1, \2, \[[^]]*\] ++** ... ++** ret ++*/ ++CALLER (u32, svuint32_t) ++ ++/* ++** caller_f32: ++** ... ++** bl callee_f32 ++** ... ++** ld1w (z[0-9]+\.s), (p[0-7])/z, \[[^]]*\] ++** st1w \1, \2, \[[^]]*\] ++** ... ++** ret ++*/ ++CALLER (f32, svfloat32_t) ++ ++/* ++** caller_s64: ++** ... ++** bl callee_s64 ++** ... ++** ld1d (z[0-9]+\.d), (p[0-7])/z, \[[^]]*\] ++** st1d \1, \2, \[[^]]*\] ++** ... ++** ret ++*/ ++CALLER (s64, svint64_t) ++ ++/* ++** caller_u64: ++** ... ++** bl callee_u64 ++** ... ++** ld1d (z[0-9]+\.d), (p[0-7])/z, \[[^]]*\] ++** st1d \1, \2, \[[^]]*\] ++** ... ++** ret ++*/ ++CALLER (u64, svuint64_t) ++ ++/* ++** caller_f64: ++** ... ++** bl callee_f64 ++** ... ++** ld1d (z[0-9]+\.d), (p[0-7])/z, \[[^]]*\] ++** st1d \1, \2, \[[^]]*\] ++** ... ++** ret ++*/ ++CALLER (f64, svfloat64_t) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_6_512.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_6_512.c +new file mode 100644 +index 000000000..9817d856a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_6_512.c +@@ -0,0 +1,287 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -msve-vector-bits=512 -g" } */ ++/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ ++ ++#include ++ ++typedef int8_t svint8_t __attribute__ ((vector_size (64))); ++typedef uint8_t svuint8_t __attribute__ ((vector_size (64))); ++ ++typedef int16_t svint16_t __attribute__ ((vector_size (64))); ++typedef uint16_t svuint16_t __attribute__ ((vector_size (64))); ++typedef __fp16 svfloat16_t __attribute__ ((vector_size (64))); ++typedef __bf16 svbfloat16_t __attribute__ ((vector_size (64))); ++ ++typedef int32_t svint32_t __attribute__ ((vector_size (64))); ++typedef uint32_t svuint32_t __attribute__ ((vector_size (64))); ++typedef float svfloat32_t __attribute__ ((vector_size (64))); ++ ++typedef int64_t svint64_t __attribute__ ((vector_size (64))); ++typedef uint64_t svuint64_t __attribute__ ((vector_size (64))); ++typedef double svfloat64_t __attribute__ ((vector_size (64))); ++ ++#define CALLEE(SUFFIX, TYPE) \ ++ TYPE __attribute__((noipa)) \ ++ callee_##SUFFIX (TYPE *ptr) \ ++ { \ ++ return *ptr; \ ++ } ++ ++/* ++** callee_s8: ++** ptrue (p[0-7])\.b, vl64 ++** ld1b z0\.b, \1/z, \[x0\] ++** st1b z0\.b, \1, \[x8\] ++** ret ++*/ ++CALLEE (s8, svint8_t) ++ ++/* ++** callee_u8: ++** ptrue (p[0-7])\.b, vl64 ++** ld1b z0\.b, \1/z, \[x0\] ++** st1b z0\.b, \1, \[x8\] ++** ret ++*/ ++CALLEE (u8, svuint8_t) ++ ++/* ++** callee_s16: ++** ptrue (p[0-7])\.b, vl64 ++** ld1h z0\.h, \1/z, \[x0\] ++** st1h z0\.h, \1, \[x8\] ++** ret ++*/ ++CALLEE (s16, svint16_t) ++ ++/* ++** callee_u16: ++** ptrue (p[0-7])\.b, vl64 ++** ld1h z0\.h, \1/z, \[x0\] ++** st1h z0\.h, \1, \[x8\] ++** ret ++*/ ++CALLEE (u16, svuint16_t) ++ ++/* ++** callee_f16: ++** ptrue (p[0-7])\.b, vl64 ++** ld1h z0\.h, \1/z, \[x0\] ++** st1h z0\.h, \1, \[x8\] ++** ret ++*/ ++CALLEE (f16, svfloat16_t) ++ ++/* ++** callee_bf16: ++** ptrue (p[0-7])\.b, vl64 ++** ld1h z0\.h, \1/z, \[x0\] ++** st1h z0\.h, \1, \[x8\] ++** ret ++*/ ++CALLEE (bf16, svbfloat16_t) ++ ++/* ++** callee_s32: ++** ptrue (p[0-7])\.b, vl64 ++** ld1w z0\.s, \1/z, \[x0\] ++** st1w z0\.s, \1, \[x8\] ++** ret ++*/ ++CALLEE (s32, svint32_t) ++ ++/* ++** callee_u32: ++** ptrue (p[0-7])\.b, vl64 ++** ld1w z0\.s, \1/z, \[x0\] ++** st1w z0\.s, \1, \[x8\] ++** ret ++*/ ++CALLEE (u32, svuint32_t) ++ ++/* ++** callee_f32: ++** ptrue (p[0-7])\.b, vl64 ++** ld1w z0\.s, \1/z, \[x0\] ++** st1w z0\.s, \1, \[x8\] ++** ret ++*/ ++CALLEE (f32, svfloat32_t) ++ ++/* ++** callee_s64: ++** ptrue (p[0-7])\.b, vl64 ++** ld1d z0\.d, \1/z, \[x0\] ++** st1d z0\.d, \1, \[x8\] ++** ret ++*/ ++CALLEE (s64, svint64_t) ++ ++/* ++** callee_u64: ++** ptrue (p[0-7])\.b, vl64 ++** ld1d z0\.d, \1/z, \[x0\] ++** st1d z0\.d, \1, \[x8\] ++** ret ++*/ ++CALLEE (u64, svuint64_t) ++ ++/* ++** callee_f64: ++** ptrue (p[0-7])\.b, vl64 ++** ld1d z0\.d, \1/z, \[x0\] ++** st1d z0\.d, \1, \[x8\] ++** ret ++*/ ++CALLEE (f64, svfloat64_t) ++ ++#define CALLER(SUFFIX, TYPE) \ ++ void __attribute__((noipa)) \ ++ caller_##SUFFIX (TYPE *ptr1, TYPE *ptr2) \ ++ { \ ++ *ptr2 = callee_##SUFFIX (ptr1); \ ++ } ++ ++/* ++** caller_s8: ++** ... ++** bl callee_s8 ++** ... ++** ld1b (z[0-9]+\.b), (p[0-7])/z, \[[^]]*\] ++** st1b \1, \2, \[[^]]*\] ++** ... ++** ret ++*/ ++CALLER (s8, svint8_t) ++ ++/* ++** caller_u8: ++** ... ++** bl callee_u8 ++** ... ++** ld1b (z[0-9]+\.b), (p[0-7])/z, \[[^]]*\] ++** st1b \1, \2, \[[^]]*\] ++** ... ++** ret ++*/ ++CALLER (u8, svuint8_t) ++ ++/* ++** caller_s16: ++** ... ++** bl callee_s16 ++** ... ++** ld1h (z[0-9]+\.h), (p[0-7])/z, \[[^]]*\] ++** st1h \1, \2, \[[^]]*\] ++** ... ++** ret ++*/ ++CALLER (s16, svint16_t) ++ ++/* ++** caller_u16: ++** ... ++** bl callee_u16 ++** ... ++** ld1h (z[0-9]+\.h), (p[0-7])/z, \[[^]]*\] ++** st1h \1, \2, \[[^]]*\] ++** ... ++** ret ++*/ ++CALLER (u16, svuint16_t) ++ ++/* ++** caller_f16: ++** ... ++** bl callee_f16 ++** ... ++** ld1h (z[0-9]+\.h), (p[0-7])/z, \[[^]]*\] ++** st1h \1, \2, \[[^]]*\] ++** ... ++** ret ++*/ ++CALLER (f16, svfloat16_t) ++ ++/* ++** caller_bf16: ++** ... ++** bl callee_bf16 ++** ... ++** ld1h (z[0-9]+\.h), (p[0-7])/z, \[[^]]*\] ++** st1h \1, \2, \[[^]]*\] ++** ... ++** ret ++*/ ++CALLER (bf16, svbfloat16_t) ++ ++/* ++** caller_s32: ++** ... ++** bl callee_s32 ++** ... ++** ld1w (z[0-9]+\.s), (p[0-7])/z, \[[^]]*\] ++** st1w \1, \2, \[[^]]*\] ++** ... ++** ret ++*/ ++CALLER (s32, svint32_t) ++ ++/* ++** caller_u32: ++** ... ++** bl callee_u32 ++** ... ++** ld1w (z[0-9]+\.s), (p[0-7])/z, \[[^]]*\] ++** st1w \1, \2, \[[^]]*\] ++** ... ++** ret ++*/ ++CALLER (u32, svuint32_t) ++ ++/* ++** caller_f32: ++** ... ++** bl callee_f32 ++** ... ++** ld1w (z[0-9]+\.s), (p[0-7])/z, \[[^]]*\] ++** st1w \1, \2, \[[^]]*\] ++** ... ++** ret ++*/ ++CALLER (f32, svfloat32_t) ++ ++/* ++** caller_s64: ++** ... ++** bl callee_s64 ++** ... ++** ld1d (z[0-9]+\.d), (p[0-7])/z, \[[^]]*\] ++** st1d \1, \2, \[[^]]*\] ++** ... ++** ret ++*/ ++CALLER (s64, svint64_t) ++ ++/* ++** caller_u64: ++** ... ++** bl callee_u64 ++** ... ++** ld1d (z[0-9]+\.d), (p[0-7])/z, \[[^]]*\] ++** st1d \1, \2, \[[^]]*\] ++** ... ++** ret ++*/ ++CALLER (u64, svuint64_t) ++ ++/* ++** caller_f64: ++** ... ++** bl callee_f64 ++** ... ++** ld1d (z[0-9]+\.d), (p[0-7])/z, \[[^]]*\] ++** st1d \1, \2, \[[^]]*\] ++** ... ++** ret ++*/ ++CALLER (f64, svfloat64_t) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_7.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_7.c +new file mode 100644 +index 000000000..55456a3b4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_7.c +@@ -0,0 +1,341 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#include ++ ++/* ++** callee_s8: ++** mov z0\.b, #1 ++** mov z1\.b, #2 ++** ret ++*/ ++svint8x2_t __attribute__((noipa)) ++callee_s8 (void) ++{ ++ return svcreate2 (svdup_s8 (1), svdup_s8 (2)); ++} ++ ++/* ++** caller_s8: ++** ... ++** bl callee_s8 ++** trn1 z0\.b, z0\.b, z1\.b ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++svint8_t __attribute__((noipa)) ++caller_s8 (void) ++{ ++ svint8x2_t res; ++ res = callee_s8 (); ++ return svtrn1 (svget2 (res, 0), svget2 (res, 1)); ++} ++ ++/* ++** callee_u8: ++** mov z0\.b, #3 ++** mov z1\.b, #4 ++** ret ++*/ ++svuint8x2_t __attribute__((noipa)) ++callee_u8 (void) ++{ ++ return svcreate2 (svdup_u8 (3), svdup_u8 (4)); ++} ++ ++/* ++** caller_u8: ++** ... ++** bl callee_u8 ++** trn2 z0\.b, z1\.b, z0\.b ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++svuint8_t __attribute__((noipa)) ++caller_u8 (void) ++{ ++ svuint8x2_t res; ++ res = callee_u8 (); ++ return svtrn2 (svget2 (res, 1), svget2 (res, 0)); ++} ++ ++/* ++** callee_s16: ++** mov z0\.h, #1 ++** mov z1\.h, #2 ++** ret ++*/ ++svint16x2_t __attribute__((noipa)) ++callee_s16 (void) ++{ ++ return svcreate2 (svdup_s16 (1), svdup_s16 (2)); ++} ++ ++/* ++** caller_s16: ++** ... ++** bl callee_s16 ++** trn1 z0\.h, z0\.h, z1\.h ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++svint16_t __attribute__((noipa)) ++caller_s16 (void) ++{ ++ svint16x2_t res; ++ res = callee_s16 (); ++ return svtrn1 (svget2 (res, 0), svget2 (res, 1)); ++} ++ ++/* ++** callee_u16: ++** mov z0\.h, #3 ++** mov z1\.h, #4 ++** ret ++*/ ++svuint16x2_t __attribute__((noipa)) ++callee_u16 (void) ++{ ++ return svcreate2 (svdup_u16 (3), svdup_u16 (4)); ++} ++ ++/* ++** caller_u16: ++** ... ++** bl callee_u16 ++** trn2 z0\.h, z1\.h, z0\.h ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++svuint16_t __attribute__((noipa)) ++caller_u16 (void) ++{ ++ svuint16x2_t res; ++ res = callee_u16 (); ++ return svtrn2 (svget2 (res, 1), svget2 (res, 0)); ++} ++ ++/* ++** callee_f16: ++** fmov z0\.h, #5\.0(?:e\+0)? ++** fmov z1\.h, #6\.0(?:e\+0)? ++** ret ++*/ ++svfloat16x2_t __attribute__((noipa)) ++callee_f16 (void) ++{ ++ return svcreate2 (svdup_f16 (5), svdup_f16 (6)); ++} ++ ++/* ++** caller_f16: ++** ... ++** bl callee_f16 ++** zip1 z0\.h, z1\.h, z0\.h ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++svfloat16_t __attribute__((noipa)) ++caller_f16 (void) ++{ ++ svfloat16x2_t res; ++ res = callee_f16 (); ++ return svzip1 (svget2 (res, 1), svget2 (res, 0)); ++} ++ ++/* ++** callee_bf16: ++** mov z0\.h, h2 ++** mov z1\.h, h3 ++** ret ++*/ ++svbfloat16x2_t __attribute__((noipa)) ++callee_bf16 (bfloat16_t h0, bfloat16_t h1, bfloat16_t h2, bfloat16_t h3) ++{ ++ return svcreate2 (svdup_bf16 (h2), svdup_bf16 (h3)); ++} ++ ++/* ++** caller_bf16: ++** ... ++** bl callee_bf16 ++** zip2 z0\.h, z1\.h, z0\.h ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++svbfloat16_t __attribute__((noipa)) ++caller_bf16 (bfloat16_t h0, bfloat16_t h1, bfloat16_t h2, bfloat16_t h3) ++{ ++ svbfloat16x2_t res; ++ res = callee_bf16 (h0, h1, h2, h3); ++ return svzip2 (svget2 (res, 1), svget2 (res, 0)); ++} ++ ++/* ++** callee_s32: ++** mov z0\.s, #1 ++** mov z1\.s, #2 ++** ret ++*/ ++svint32x2_t __attribute__((noipa)) ++callee_s32 (void) ++{ ++ return svcreate2 (svdup_s32 (1), svdup_s32 (2)); ++} ++ ++/* ++** caller_s32: ++** ... ++** bl callee_s32 ++** trn1 z0\.s, z0\.s, z1\.s ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++svint32_t __attribute__((noipa)) ++caller_s32 (void) ++{ ++ svint32x2_t res; ++ res = callee_s32 (); ++ return svtrn1 (svget2 (res, 0), svget2 (res, 1)); ++} ++ ++/* ++** callee_u32: ++** mov z0\.s, #3 ++** mov z1\.s, #4 ++** ret ++*/ ++svuint32x2_t __attribute__((noipa)) ++callee_u32 (void) ++{ ++ return svcreate2 (svdup_u32 (3), svdup_u32 (4)); ++} ++ ++/* ++** caller_u32: ++** ... ++** bl callee_u32 ++** trn2 z0\.s, z1\.s, z0\.s ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++svuint32_t __attribute__((noipa)) ++caller_u32 (void) ++{ ++ svuint32x2_t res; ++ res = callee_u32 (); ++ return svtrn2 (svget2 (res, 1), svget2 (res, 0)); ++} ++ ++/* ++** callee_f32: ++** fmov z0\.s, #5\.0(?:e\+0)? ++** fmov z1\.s, #6\.0(?:e\+0)? ++** ret ++*/ ++svfloat32x2_t __attribute__((noipa)) ++callee_f32 (void) ++{ ++ return svcreate2 (svdup_f32 (5), svdup_f32 (6)); ++} ++ ++/* ++** caller_f32: ++** ... ++** bl callee_f32 ++** zip1 z0\.s, z1\.s, z0\.s ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++svfloat32_t __attribute__((noipa)) ++caller_f32 (void) ++{ ++ svfloat32x2_t res; ++ res = callee_f32 (); ++ return svzip1 (svget2 (res, 1), svget2 (res, 0)); ++} ++ ++/* ++** callee_s64: ++** mov z0\.d, #1 ++** mov z1\.d, #2 ++** ret ++*/ ++svint64x2_t __attribute__((noipa)) ++callee_s64 (void) ++{ ++ return svcreate2 (svdup_s64 (1), svdup_s64 (2)); ++} ++ ++/* ++** caller_s64: ++** ... ++** bl callee_s64 ++** trn1 z0\.d, z0\.d, z1\.d ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++svint64_t __attribute__((noipa)) ++caller_s64 (void) ++{ ++ svint64x2_t res; ++ res = callee_s64 (); ++ return svtrn1 (svget2 (res, 0), svget2 (res, 1)); ++} ++ ++/* ++** callee_u64: ++** mov z0\.d, #3 ++** mov z1\.d, #4 ++** ret ++*/ ++svuint64x2_t __attribute__((noipa)) ++callee_u64 (void) ++{ ++ return svcreate2 (svdup_u64 (3), svdup_u64 (4)); ++} ++ ++/* ++** caller_u64: ++** ... ++** bl callee_u64 ++** trn2 z0\.d, z1\.d, z0\.d ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++svuint64_t __attribute__((noipa)) ++caller_u64 (void) ++{ ++ svuint64x2_t res; ++ res = callee_u64 (); ++ return svtrn2 (svget2 (res, 1), svget2 (res, 0)); ++} ++ ++/* ++** callee_f64: ++** fmov z0\.d, #5\.0(?:e\+0)? ++** fmov z1\.d, #6\.0(?:e\+0)? ++** ret ++*/ ++svfloat64x2_t __attribute__((noipa)) ++callee_f64 (void) ++{ ++ return svcreate2 (svdup_f64 (5), svdup_f64 (6)); ++} ++ ++/* ++** caller_f64: ++** ... ++** bl callee_f64 ++** zip1 z0\.d, z1\.d, z0\.d ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++svfloat64_t __attribute__((noipa)) ++caller_f64 (void) ++{ ++ svfloat64x2_t res; ++ res = callee_f64 (); ++ return svzip1 (svget2 (res, 1), svget2 (res, 0)); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_8.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_8.c +new file mode 100644 +index 000000000..9581811e7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_8.c +@@ -0,0 +1,375 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -frename-registers -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#include ++ ++/* ++** callee_s8: ++** mov z0\.b, #1 ++** mov z1\.b, #2 ++** mov z2\.b, #3 ++** ret ++*/ ++svint8x3_t __attribute__((noipa)) ++callee_s8 (void) ++{ ++ return svcreate3 (svdup_s8 (1), svdup_s8 (2), svdup_s8 (3)); ++} ++ ++/* ++** caller_s8: ++** ... ++** bl callee_s8 ++** ptrue (p[0-7])\.b, all ++** mad z0\.b, \1/m, z1\.b, z2\.b ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++svint8_t __attribute__((noipa)) ++caller_s8 (void) ++{ ++ svint8x3_t res; ++ res = callee_s8 (); ++ return svmad_x (svptrue_b8 (), ++ svget3 (res, 0), svget3 (res, 1), svget3 (res, 2)); ++} ++ ++/* ++** callee_u8: ++** mov z0\.b, #4 ++** mov z1\.b, #5 ++** mov z2\.b, #6 ++** ret ++*/ ++svuint8x3_t __attribute__((noipa)) ++callee_u8 (void) ++{ ++ return svcreate3 (svdup_u8 (4), svdup_u8 (5), svdup_u8 (6)); ++} ++ ++/* ++** caller_u8: ++** ... ++** bl callee_u8 ++** ptrue (p[0-7])\.b, all ++** msb z0\.b, \1/m, z1\.b, z2\.b ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++svuint8_t __attribute__((noipa)) ++caller_u8 (void) ++{ ++ svuint8x3_t res; ++ res = callee_u8 (); ++ return svmsb_x (svptrue_b8 (), ++ svget3 (res, 0), svget3 (res, 1), svget3 (res, 2)); ++} ++ ++/* ++** callee_s16: ++** mov z0\.h, #1 ++** mov z1\.h, #2 ++** mov z2\.h, #3 ++** ret ++*/ ++svint16x3_t __attribute__((noipa)) ++callee_s16 (void) ++{ ++ return svcreate3 (svdup_s16 (1), svdup_s16 (2), svdup_s16 (3)); ++} ++ ++/* ++** caller_s16: ++** ... ++** bl callee_s16 ++** ptrue (p[0-7])\.b, all ++** mls z0\.h, \1/m, z1\.h, z2\.h ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++svint16_t __attribute__((noipa)) ++caller_s16 (void) ++{ ++ svint16x3_t res; ++ res = callee_s16 (); ++ return svmls_x (svptrue_b16 (), ++ svget3 (res, 0), svget3 (res, 1), svget3 (res, 2)); ++} ++ ++/* ++** callee_u16: ++** mov z0\.h, #4 ++** mov z1\.h, #5 ++** mov z2\.h, #6 ++** ret ++*/ ++svuint16x3_t __attribute__((noipa)) ++callee_u16 (void) ++{ ++ return svcreate3 (svdup_u16 (4), svdup_u16 (5), svdup_u16 (6)); ++} ++ ++/* ++** caller_u16: ++** ... ++** bl callee_u16 ++** ptrue (p[0-7])\.b, all ++** mla z0\.h, \1/m, z1\.h, z2\.h ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++svuint16_t __attribute__((noipa)) ++caller_u16 (void) ++{ ++ svuint16x3_t res; ++ res = callee_u16 (); ++ return svmla_x (svptrue_b16 (), ++ svget3 (res, 0), svget3 (res, 1), svget3 (res, 2)); ++} ++ ++/* ++** callee_f16: ++** fmov z0\.h, #1\.0(?:e\+0)? ++** fmov z1\.h, #2\.0(?:e\+0)? ++** fmov z2\.h, #3\.0(?:e\+0)? ++** ret ++*/ ++svfloat16x3_t __attribute__((noipa)) ++callee_f16 (void) ++{ ++ return svcreate3 (svdup_f16 (1), svdup_f16 (2), svdup_f16 (3)); ++} ++ ++/* ++** caller_f16: ++** ... ++** bl callee_f16 ++** ptrue (p[0-7])\.b, all ++** fmla z0\.h, \1/m, z1\.h, z2\.h ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++svfloat16_t __attribute__((noipa)) ++caller_f16 (void) ++{ ++ svfloat16x3_t res; ++ res = callee_f16 (); ++ return svmla_x (svptrue_b16 (), ++ svget3 (res, 0), svget3 (res, 1), svget3 (res, 2)); ++} ++ ++/* ++** callee_bf16: ++** mov z0\.h, h0 ++** mov z1\.h, h1 ++** mov z2\.h, h2 ++** ret ++*/ ++svbfloat16x3_t __attribute__((noipa)) ++callee_bf16 (bfloat16_t h0, bfloat16_t h1, bfloat16_t h2) ++{ ++ return svcreate3 (svdup_bf16 (h0), svdup_bf16 (h1), svdup_bf16 (h2)); ++} ++ ++/* ++** caller_bf16: ++** ... ++** bl callee_bf16 ++** trn2 z0\.h, z0\.h, z2\.h ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++svbfloat16_t __attribute__((noipa)) ++caller_bf16 (bfloat16_t h0, bfloat16_t h1, bfloat16_t h2) ++{ ++ svbfloat16x3_t res; ++ res = callee_bf16 (h0, h1, h2); ++ return svtrn2 (svget3 (res, 0), svget3 (res, 2)); ++} ++ ++/* ++** callee_s32: ++** mov z0\.s, #1 ++** mov z1\.s, #2 ++** mov z2\.s, #3 ++** ret ++*/ ++svint32x3_t __attribute__((noipa)) ++callee_s32 (void) ++{ ++ return svcreate3 (svdup_s32 (1), svdup_s32 (2), svdup_s32 (3)); ++} ++ ++/* ++** caller_s32: ++** ... ++** bl callee_s32 ++** ptrue (p[0-7])\.b, all ++** mad z0\.s, \1/m, z1\.s, z2\.s ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++svint32_t __attribute__((noipa)) ++caller_s32 (void) ++{ ++ svint32x3_t res; ++ res = callee_s32 (); ++ return svmad_x (svptrue_b32 (), ++ svget3 (res, 0), svget3 (res, 1), svget3 (res, 2)); ++} ++ ++/* ++** callee_u32: ++** mov z0\.s, #4 ++** mov z1\.s, #5 ++** mov z2\.s, #6 ++** ret ++*/ ++svuint32x3_t __attribute__((noipa)) ++callee_u32 (void) ++{ ++ return svcreate3 (svdup_u32 (4), svdup_u32 (5), svdup_u32 (6)); ++} ++ ++/* ++** caller_u32: ++** ... ++** bl callee_u32 ++** ptrue (p[0-7])\.b, all ++** msb z0\.s, \1/m, z1\.s, z2\.s ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++svuint32_t __attribute__((noipa)) ++caller_u32 (void) ++{ ++ svuint32x3_t res; ++ res = callee_u32 (); ++ return svmsb_x (svptrue_b32 (), ++ svget3 (res, 0), svget3 (res, 1), svget3 (res, 2)); ++} ++ ++/* ++** callee_f32: ++** fmov z0\.s, #1\.0(?:e\+0)? ++** fmov z1\.s, #2\.0(?:e\+0)? ++** fmov z2\.s, #3\.0(?:e\+0)? ++** ret ++*/ ++svfloat32x3_t __attribute__((noipa)) ++callee_f32 (void) ++{ ++ return svcreate3 (svdup_f32 (1), svdup_f32 (2), svdup_f32 (3)); ++} ++ ++/* ++** caller_f32: ++** ... ++** bl callee_f32 ++** ptrue (p[0-7])\.b, all ++** fmla z0\.s, \1/m, z1\.s, z2\.s ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++svfloat32_t __attribute__((noipa)) ++caller_f32 (void) ++{ ++ svfloat32x3_t res; ++ res = callee_f32 (); ++ return svmla_x (svptrue_b32 (), ++ svget3 (res, 0), svget3 (res, 1), svget3 (res, 2)); ++} ++ ++/* ++** callee_s64: ++** mov z0\.d, #1 ++** mov z1\.d, #2 ++** mov z2\.d, #3 ++** ret ++*/ ++svint64x3_t __attribute__((noipa)) ++callee_s64 (void) ++{ ++ return svcreate3 (svdup_s64 (1), svdup_s64 (2), svdup_s64 (3)); ++} ++ ++/* ++** caller_s64: ++** ... ++** bl callee_s64 ++** ptrue (p[0-7])\.b, all ++** mls z0\.d, \1/m, z1\.d, z2\.d ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++svint64_t __attribute__((noipa)) ++caller_s64 (void) ++{ ++ svint64x3_t res; ++ res = callee_s64 (); ++ return svmls_x (svptrue_b64 (), ++ svget3 (res, 0), svget3 (res, 1), svget3 (res, 2)); ++} ++ ++/* ++** callee_u64: ++** mov z0\.d, #4 ++** mov z1\.d, #5 ++** mov z2\.d, #6 ++** ret ++*/ ++svuint64x3_t __attribute__((noipa)) ++callee_u64 (void) ++{ ++ return svcreate3 (svdup_u64 (4), svdup_u64 (5), svdup_u64 (6)); ++} ++ ++/* ++** caller_u64: ++** ... ++** bl callee_u64 ++** ptrue (p[0-7])\.b, all ++** mla z0\.d, \1/m, z1\.d, z2\.d ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++svuint64_t __attribute__((noipa)) ++caller_u64 (void) ++{ ++ svuint64x3_t res; ++ res = callee_u64 (); ++ return svmla_x (svptrue_b64 (), ++ svget3 (res, 0), svget3 (res, 1), svget3 (res, 2)); ++} ++ ++/* ++** callee_f64: ++** fmov z0\.d, #1\.0(?:e\+0)? ++** fmov z1\.d, #2\.0(?:e\+0)? ++** fmov z2\.d, #3\.0(?:e\+0)? ++** ret ++*/ ++svfloat64x3_t __attribute__((noipa)) ++callee_f64 (void) ++{ ++ return svcreate3 (svdup_f64 (1), svdup_f64 (2), svdup_f64 (3)); ++} ++ ++/* ++** caller_f64: ++** ... ++** bl callee_f64 ++** ptrue (p[0-7])\.b, all ++** fmla z0\.d, \1/m, z1\.d, z2\.d ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++svfloat64_t __attribute__((noipa)) ++caller_f64 (void) ++{ ++ svfloat64x3_t res; ++ res = callee_f64 (); ++ return svmla_x (svptrue_b64 (), ++ svget3 (res, 0), svget3 (res, 1), svget3 (res, 2)); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_9.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_9.c +new file mode 100644 +index 000000000..ad32e1fe5 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_9.c +@@ -0,0 +1,438 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -frename-registers -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#include ++ ++/* ++** callee_s8: ++** mov z0\.b, #1 ++** mov z1\.b, #2 ++** mov z2\.b, #3 ++** mov z3\.b, #4 ++** ret ++*/ ++svint8x4_t __attribute__((noipa)) ++callee_s8 (void) ++{ ++ return svcreate4 (svdup_s8 (1), svdup_s8 (2), svdup_s8 (3), svdup_s8 (4)); ++} ++ ++/* ++** caller_s8: ++** ... ++** bl callee_s8 ++** add (z[2-7]\.b), z2\.b, z3\.b ++** ptrue (p[0-7])\.b, all ++** mla z0\.b, \2/m, (z1\.b, \1|\1, z1\.b) ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++svint8_t __attribute__((noipa)) ++caller_s8 (void) ++{ ++ svint8x4_t res; ++ res = callee_s8 (); ++ return svmla_x (svptrue_b8 (), svget4 (res, 0), svget4 (res, 1), ++ svadd_x (svptrue_b8 (), ++ svget4 (res, 2), ++ svget4 (res, 3))); ++} ++ ++/* ++** callee_u8: ++** mov z0\.b, #4 ++** mov z1\.b, #5 ++** mov z2\.b, #6 ++** mov z3\.b, #7 ++** ret ++*/ ++svuint8x4_t __attribute__((noipa)) ++callee_u8 (void) ++{ ++ return svcreate4 (svdup_u8 (4), svdup_u8 (5), svdup_u8 (6), svdup_u8 (7)); ++} ++ ++/* ++** caller_u8: ++** ... ++** bl callee_u8 ++** sub (z[2-7]\.b), z2\.b, z3\.b ++** ptrue (p[0-7])\.b, all ++** mla z0\.b, \2/m, (z1\.b, \1|\1, z1\.b) ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++svuint8_t __attribute__((noipa)) ++caller_u8 (void) ++{ ++ svuint8x4_t res; ++ res = callee_u8 (); ++ return svmla_x (svptrue_b8 (), svget4 (res, 0), svget4 (res, 1), ++ svsub_x (svptrue_b8 (), ++ svget4 (res, 2), ++ svget4 (res, 3))); ++} ++ ++/* ++** callee_s16: ++** mov z0\.h, #1 ++** mov z1\.h, #2 ++** mov z2\.h, #3 ++** mov z3\.h, #4 ++** ret ++*/ ++svint16x4_t __attribute__((noipa)) ++callee_s16 (void) ++{ ++ return svcreate4 (svdup_s16 (1), svdup_s16 (2), ++ svdup_s16 (3), svdup_s16 (4)); ++} ++ ++/* ++** caller_s16: ++** ... ++** bl callee_s16 ++** add (z[2-7]\.h), z2\.h, z3\.h ++** ptrue (p[0-7])\.b, all ++** mad z0\.h, \2/m, (z1\.h, \1|\1, z1\.h) ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++svint16_t __attribute__((noipa)) ++caller_s16 (void) ++{ ++ svint16x4_t res; ++ res = callee_s16 (); ++ return svmad_x (svptrue_b16 (), svget4 (res, 0), svget4 (res, 1), ++ svadd_x (svptrue_b16 (), ++ svget4 (res, 2), ++ svget4 (res, 3))); ++} ++ ++/* ++** callee_u16: ++** mov z0\.h, #4 ++** mov z1\.h, #5 ++** mov z2\.h, #6 ++** mov z3\.h, #7 ++** ret ++*/ ++svuint16x4_t __attribute__((noipa)) ++callee_u16 (void) ++{ ++ return svcreate4 (svdup_u16 (4), svdup_u16 (5), ++ svdup_u16 (6), svdup_u16 (7)); ++} ++ ++/* ++** caller_u16: ++** ... ++** bl callee_u16 ++** sub (z[2-7]\.h), z2\.h, z3\.h ++** ptrue (p[0-7])\.b, all ++** mad z0\.h, \2/m, (z1\.h, \1|\1, z1\.h) ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++svuint16_t __attribute__((noipa)) ++caller_u16 (void) ++{ ++ svuint16x4_t res; ++ res = callee_u16 (); ++ return svmad_x (svptrue_b16 (), svget4 (res, 0), svget4 (res, 1), ++ svsub_x (svptrue_b16 (), ++ svget4 (res, 2), ++ svget4 (res, 3))); ++} ++ ++/* ++** callee_f16: ++** fmov z0\.h, #1\.0(?:e\+0)? ++** fmov z1\.h, #2\.0(?:e\+0)? ++** fmov z2\.h, #3\.0(?:e\+0)? ++** fmov z3\.h, #4\.0(?:e\+0)? ++** ret ++*/ ++svfloat16x4_t __attribute__((noipa)) ++callee_f16 (void) ++{ ++ return svcreate4 (svdup_f16 (1), svdup_f16 (2), ++ svdup_f16 (3), svdup_f16 (4)); ++} ++ ++/* ++** caller_f16: ++** ... ++** bl callee_f16 ++** fadd (z[0-9]+\.h), z0\.h, z1\.h ++** fmul (z[0-9]+\.h), \1, z2\.h ++** fadd z0\.h, \2, z3\.h ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++svfloat16_t __attribute__((noipa)) ++caller_f16 (void) ++{ ++ svfloat16x4_t res; ++ res = callee_f16 (); ++ return svadd_x (svptrue_b16 (), ++ svmul_x (svptrue_b16 (), ++ svadd_x (svptrue_b16 (), svget4 (res, 0), ++ svget4 (res, 1)), ++ svget4 (res, 2)), ++ svget4 (res, 3)); ++} ++ ++/* ++** callee_bf16: ++** mov z0\.h, h4 ++** mov z1\.h, h5 ++** mov z2\.h, h6 ++** mov z3\.h, h7 ++** ret ++*/ ++svbfloat16x4_t __attribute__((noipa)) ++callee_bf16 (bfloat16_t h0, bfloat16_t h1, bfloat16_t h2, bfloat16_t h3, ++ bfloat16_t h4, bfloat16_t h5, bfloat16_t h6, bfloat16_t h7) ++{ ++ return svcreate4 (svdup_bf16 (h4), svdup_bf16 (h5), ++ svdup_bf16 (h6), svdup_bf16 (h7)); ++} ++ ++/* ++** caller_bf16: ++** ... ++** bl callee_bf16 ++** trn2 z0\.h, z0\.h, z3\.h ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++svbfloat16_t __attribute__((noipa)) ++caller_bf16 (bfloat16_t h0, bfloat16_t h1, bfloat16_t h2, bfloat16_t h3, ++ bfloat16_t h4, bfloat16_t h5, bfloat16_t h6, bfloat16_t h7) ++{ ++ svbfloat16x4_t res; ++ res = callee_bf16 (h0, h1, h2, h3, h4, h5, h6, h7); ++ return svtrn2 (svget4 (res, 0), svget4 (res, 3)); ++} ++ ++/* ++** callee_s32: ++** mov z0\.s, #1 ++** mov z1\.s, #2 ++** mov z2\.s, #3 ++** mov z3\.s, #4 ++** ret ++*/ ++svint32x4_t __attribute__((noipa)) ++callee_s32 (void) ++{ ++ return svcreate4 (svdup_s32 (1), svdup_s32 (2), ++ svdup_s32 (3), svdup_s32 (4)); ++} ++ ++/* ++** caller_s32: ++** ... ++** bl callee_s32 ++** add (z[2-7]\.s), z2\.s, z3\.s ++** ptrue (p[0-7])\.b, all ++** msb z0\.s, \2/m, (z1\.s, \1|\1, z1\.s) ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++svint32_t __attribute__((noipa)) ++caller_s32 (void) ++{ ++ svint32x4_t res; ++ res = callee_s32 (); ++ return svmsb_x (svptrue_b32 (), svget4 (res, 0), svget4 (res, 1), ++ svadd_x (svptrue_b32 (), ++ svget4 (res, 2), ++ svget4 (res, 3))); ++} ++ ++/* ++** callee_u32: ++** mov z0\.s, #4 ++** mov z1\.s, #5 ++** mov z2\.s, #6 ++** mov z3\.s, #7 ++** ret ++*/ ++svuint32x4_t __attribute__((noipa)) ++callee_u32 (void) ++{ ++ return svcreate4 (svdup_u32 (4), svdup_u32 (5), ++ svdup_u32 (6), svdup_u32 (7)); ++} ++ ++/* ++** caller_u32: ++** ... ++** bl callee_u32 ++** sub (z[2-7]\.s), z2\.s, z3\.s ++** ptrue (p[0-7])\.b, all ++** msb z0\.s, \2/m, (z1\.s, \1|\1, z1\.s) ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++svuint32_t __attribute__((noipa)) ++caller_u32 (void) ++{ ++ svuint32x4_t res; ++ res = callee_u32 (); ++ return svmsb_x (svptrue_b32 (), svget4 (res, 0), svget4 (res, 1), ++ svsub_x (svptrue_b32 (), ++ svget4 (res, 2), ++ svget4 (res, 3))); ++} ++ ++/* ++** callee_f32: ++** fmov z0\.s, #1\.0(?:e\+0)? ++** fmov z1\.s, #2\.0(?:e\+0)? ++** fmov z2\.s, #3\.0(?:e\+0)? ++** fmov z3\.s, #4\.0(?:e\+0)? ++** ret ++*/ ++svfloat32x4_t __attribute__((noipa)) ++callee_f32 (void) ++{ ++ return svcreate4 (svdup_f32 (1), svdup_f32 (2), ++ svdup_f32 (3), svdup_f32 (4)); ++} ++ ++/* ++** caller_f32: ++** ... ++** bl callee_f32 ++** fadd (z[0-9]+\.s), z0\.s, z1\.s ++** fmul (z[0-9]+\.s), \1, z2\.s ++** fadd z0\.s, \2, z3\.s ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++svfloat32_t __attribute__((noipa)) ++caller_f32 (void) ++{ ++ svfloat32x4_t res; ++ res = callee_f32 (); ++ return svadd_x (svptrue_b32 (), ++ svmul_x (svptrue_b32 (), ++ svadd_x (svptrue_b32 (), svget4 (res, 0), ++ svget4 (res, 1)), ++ svget4 (res, 2)), ++ svget4 (res, 3)); ++} ++ ++/* ++** callee_s64: ++** mov z0\.d, #1 ++** mov z1\.d, #2 ++** mov z2\.d, #3 ++** mov z3\.d, #4 ++** ret ++*/ ++svint64x4_t __attribute__((noipa)) ++callee_s64 (void) ++{ ++ return svcreate4 (svdup_s64 (1), svdup_s64 (2), ++ svdup_s64 (3), svdup_s64 (4)); ++} ++ ++/* ++** caller_s64: ++** ... ++** bl callee_s64 ++** add (z[2-7]\.d), z2\.d, z3\.d ++** ptrue (p[0-7])\.b, all ++** mls z0\.d, \2/m, (z1\.d, \1|\1, z1\.d) ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++svint64_t __attribute__((noipa)) ++caller_s64 (void) ++{ ++ svint64x4_t res; ++ res = callee_s64 (); ++ return svmls_x (svptrue_b64 (), svget4 (res, 0), svget4 (res, 1), ++ svadd_x (svptrue_b64 (), ++ svget4 (res, 2), ++ svget4 (res, 3))); ++} ++ ++/* ++** callee_u64: ++** mov z0\.d, #4 ++** mov z1\.d, #5 ++** mov z2\.d, #6 ++** mov z3\.d, #7 ++** ret ++*/ ++svuint64x4_t __attribute__((noipa)) ++callee_u64 (void) ++{ ++ return svcreate4 (svdup_u64 (4), svdup_u64 (5), ++ svdup_u64 (6), svdup_u64 (7)); ++} ++ ++/* ++** caller_u64: ++** ... ++** bl callee_u64 ++** sub (z[2-7]\.d), z2\.d, z3\.d ++** ptrue (p[0-7])\.b, all ++** mls z0\.d, \2/m, (z1\.d, \1|\1, z1\.d) ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++svuint64_t __attribute__((noipa)) ++caller_u64 (void) ++{ ++ svuint64x4_t res; ++ res = callee_u64 (); ++ return svmls_x (svptrue_b64 (), svget4 (res, 0), svget4 (res, 1), ++ svsub_x (svptrue_b64 (), ++ svget4 (res, 2), ++ svget4 (res, 3))); ++} ++ ++/* ++** callee_f64: ++** fmov z0\.d, #1\.0(?:e\+0)? ++** fmov z1\.d, #2\.0(?:e\+0)? ++** fmov z2\.d, #3\.0(?:e\+0)? ++** fmov z3\.d, #4\.0(?:e\+0)? ++** ret ++*/ ++svfloat64x4_t __attribute__((noipa)) ++callee_f64 (void) ++{ ++ return svcreate4 (svdup_f64 (1), svdup_f64 (2), ++ svdup_f64 (3), svdup_f64 (4)); ++} ++ ++/* ++** caller_f64: ++** ... ++** bl callee_f64 ++** fadd (z[0-9]+\.d), z0\.d, z1\.d ++** fmul (z[0-9]+\.d), \1, z2\.d ++** fadd z0\.d, \2, z3\.d ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++svfloat64_t __attribute__((noipa)) ++caller_f64 (void) ++{ ++ svfloat64x4_t res; ++ res = callee_f64 (); ++ return svadd_x (svptrue_b64 (), ++ svmul_x (svptrue_b64 (), ++ svadd_x (svptrue_b64 (), svget4 (res, 0), ++ svget4 (res, 1)), ++ svget4 (res, 2)), ++ svget4 (res, 3)); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_1_be_nowrap.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_1_be_nowrap.c +new file mode 100644 +index 000000000..4eee04226 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_1_be_nowrap.c +@@ -0,0 +1,196 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -mbig-endian -fno-shrink-wrap -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** test_1: ++** addvl sp, sp, #-17 ++** str p4, \[sp\] ++** str p5, \[sp, #1, mul vl\] ++** str p6, \[sp, #2, mul vl\] ++** str p7, \[sp, #3, mul vl\] ++** str p8, \[sp, #4, mul vl\] ++** str p9, \[sp, #5, mul vl\] ++** str p10, \[sp, #6, mul vl\] ++** str p11, \[sp, #7, mul vl\] ++** ptrue p1\.b, all ++** st1d z8\.d, p1, \[sp, #1, mul vl\] ++** st1d z9\.d, p1, \[sp, #2, mul vl\] ++** st1d z10\.d, p1, \[sp, #3, mul vl\] ++** st1d z11\.d, p1, \[sp, #4, mul vl\] ++** st1d z12\.d, p1, \[sp, #5, mul vl\] ++** st1d z13\.d, p1, \[sp, #6, mul vl\] ++** st1d z14\.d, p1, \[sp, #7, mul vl\] ++** addvl x11, sp, #16 ++** st1d z15\.d, p1, \[x11, #-8, mul vl\] ++** str z16, \[sp, #9, mul vl\] ++** str z17, \[sp, #10, mul vl\] ++** str z18, \[sp, #11, mul vl\] ++** str z19, \[sp, #12, mul vl\] ++** str z20, \[sp, #13, mul vl\] ++** str z21, \[sp, #14, mul vl\] ++** str z22, \[sp, #15, mul vl\] ++** str z23, \[sp, #16, mul vl\] ++** ptrue p0\.b, all ++** ptrue p1\.b, all ++** ld1d z8\.d, p1/z, \[sp, #1, mul vl\] ++** ld1d z9\.d, p1/z, \[sp, #2, mul vl\] ++** ld1d z10\.d, p1/z, \[sp, #3, mul vl\] ++** ld1d z11\.d, p1/z, \[sp, #4, mul vl\] ++** ld1d z12\.d, p1/z, \[sp, #5, mul vl\] ++** ld1d z13\.d, p1/z, \[sp, #6, mul vl\] ++** ld1d z14\.d, p1/z, \[sp, #7, mul vl\] ++** addvl x11, sp, #16 ++** ld1d z15\.d, p1/z, \[x11, #-8, mul vl\] ++** ldr z16, \[sp, #9, mul vl\] ++** ldr z17, \[sp, #10, mul vl\] ++** ldr z18, \[sp, #11, mul vl\] ++** ldr z19, \[sp, #12, mul vl\] ++** ldr z20, \[sp, #13, mul vl\] ++** ldr z21, \[sp, #14, mul vl\] ++** ldr z22, \[sp, #15, mul vl\] ++** ldr z23, \[sp, #16, mul vl\] ++** ldr p4, \[sp\] ++** ldr p5, \[sp, #1, mul vl\] ++** ldr p6, \[sp, #2, mul vl\] ++** ldr p7, \[sp, #3, mul vl\] ++** ldr p8, \[sp, #4, mul vl\] ++** ldr p9, \[sp, #5, mul vl\] ++** ldr p10, \[sp, #6, mul vl\] ++** ldr p11, \[sp, #7, mul vl\] ++** addvl sp, sp, #17 ++** ret ++*/ ++svbool_t ++test_1 (void) ++{ ++ asm volatile ("" ::: ++ "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", ++ "z8", "z9", "z10", "z11", "z12", "z13", "z14", "z15", ++ "z16", "z17", "z18", "z19", "z20", "z21", "z22", "z23", ++ "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31", ++ "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", ++ "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_2: ++** ptrue p0\.b, all ++** ret ++*/ ++svbool_t ++test_2 (void) ++{ ++ asm volatile ("" ::: ++ "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", ++ "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31", ++ "p0", "p1", "p2", "p3", "p12", "p13", "p14", "p15"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_3: ++** addvl sp, sp, #-6 ++** str p5, \[sp\] ++** str p6, \[sp, #1, mul vl\] ++** str p11, \[sp, #2, mul vl\] ++** ptrue p1\.b, all ++** st1d z8\.d, p1, \[sp, #1, mul vl\] ++** st1d z13\.d, p1, \[sp, #2, mul vl\] ++** str z19, \[sp, #3, mul vl\] ++** str z20, \[sp, #4, mul vl\] ++** str z22, \[sp, #5, mul vl\] ++** ptrue p0\.b, all ++** ptrue p1\.b, all ++** ld1d z8\.d, p1/z, \[sp, #1, mul vl\] ++** ld1d z13\.d, p1/z, \[sp, #2, mul vl\] ++** ldr z19, \[sp, #3, mul vl\] ++** ldr z20, \[sp, #4, mul vl\] ++** ldr z22, \[sp, #5, mul vl\] ++** ldr p5, \[sp\] ++** ldr p6, \[sp, #1, mul vl\] ++** ldr p11, \[sp, #2, mul vl\] ++** addvl sp, sp, #6 ++** ret ++*/ ++svbool_t ++test_3 (void) ++{ ++ asm volatile ("" ::: ++ "z8", "z13", "z19", "z20", "z22", ++ "p5", "p6", "p11"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_4: ++** addvl sp, sp, #-1 ++** str p4, \[sp\] ++** ptrue p0\.b, all ++** ldr p4, \[sp\] ++** addvl sp, sp, #1 ++** ret ++*/ ++svbool_t ++test_4 (void) ++{ ++ asm volatile ("" ::: "p4"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_5: ++** addvl sp, sp, #-1 ++** ptrue p1\.b, all ++** st1d z15\.d, p1, \[sp\] ++** ptrue p0\.b, all ++** ptrue p1\.b, all ++** ld1d z15\.d, p1/z, \[sp\] ++** addvl sp, sp, #1 ++** ret ++*/ ++svbool_t ++test_5 (void) ++{ ++ asm volatile ("" ::: "z15"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_6: ++** addvl sp, sp, #-2 ++** str p4, \[sp\] ++** ptrue p4\.b, all ++** st1d z15\.d, p4, \[sp, #1, mul vl\] ++** mov z0\.b, #1 ++** ptrue p4\.b, all ++** ld1d z15\.d, p4/z, \[sp, #1, mul vl\] ++** ldr p4, \[sp\] ++** addvl sp, sp, #2 ++** ret ++*/ ++svint8_t ++test_6 (svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) ++{ ++ asm volatile ("" :: "Upa" (p0), "Upa" (p1), "Upa" (p2), "Upa" (p3) : "z15"); ++ return svdup_s8 (1); ++} ++ ++/* ++** test_7: ++** addvl sp, sp, #-1 ++** str z16, \[sp\] ++** ptrue p0\.b, all ++** ldr z16, \[sp\] ++** addvl sp, sp, #1 ++** ret ++*/ ++svbool_t ++test_7 (void) ++{ ++ asm volatile ("" ::: "z16"); ++ return svptrue_b8 (); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_1_be_wrap.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_1_be_wrap.c +new file mode 100644 +index 000000000..e88a3dd1d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_1_be_wrap.c +@@ -0,0 +1,196 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -mbig-endian -fshrink-wrap -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** test_1: ++** addvl sp, sp, #-17 ++** str p4, \[sp\] ++** str p5, \[sp, #1, mul vl\] ++** str p6, \[sp, #2, mul vl\] ++** str p7, \[sp, #3, mul vl\] ++** str p8, \[sp, #4, mul vl\] ++** str p9, \[sp, #5, mul vl\] ++** str p10, \[sp, #6, mul vl\] ++** str p11, \[sp, #7, mul vl\] ++** ptrue p1\.b, all ++** st1d z8\.d, p1, \[sp, #1, mul vl\] ++** st1d z9\.d, p1, \[sp, #2, mul vl\] ++** st1d z10\.d, p1, \[sp, #3, mul vl\] ++** st1d z11\.d, p1, \[sp, #4, mul vl\] ++** st1d z12\.d, p1, \[sp, #5, mul vl\] ++** st1d z13\.d, p1, \[sp, #6, mul vl\] ++** st1d z14\.d, p1, \[sp, #7, mul vl\] ++** addvl x11, sp, #16 ++** st1d z15\.d, p1, \[x11, #-8, mul vl\] ++** str z16, \[sp, #9, mul vl\] ++** str z17, \[sp, #10, mul vl\] ++** str z18, \[sp, #11, mul vl\] ++** str z19, \[sp, #12, mul vl\] ++** str z20, \[sp, #13, mul vl\] ++** str z21, \[sp, #14, mul vl\] ++** str z22, \[sp, #15, mul vl\] ++** str z23, \[sp, #16, mul vl\] ++** ptrue p0\.b, all ++** ptrue p1\.b, all ++** ld1d z8\.d, p1/z, \[sp, #1, mul vl\] ++** ld1d z9\.d, p1/z, \[sp, #2, mul vl\] ++** ld1d z10\.d, p1/z, \[sp, #3, mul vl\] ++** ld1d z11\.d, p1/z, \[sp, #4, mul vl\] ++** ld1d z12\.d, p1/z, \[sp, #5, mul vl\] ++** ld1d z13\.d, p1/z, \[sp, #6, mul vl\] ++** ld1d z14\.d, p1/z, \[sp, #7, mul vl\] ++** addvl x11, sp, #16 ++** ld1d z15\.d, p1/z, \[x11, #-8, mul vl\] ++** ldr z16, \[sp, #9, mul vl\] ++** ldr z17, \[sp, #10, mul vl\] ++** ldr z18, \[sp, #11, mul vl\] ++** ldr z19, \[sp, #12, mul vl\] ++** ldr z20, \[sp, #13, mul vl\] ++** ldr z21, \[sp, #14, mul vl\] ++** ldr z22, \[sp, #15, mul vl\] ++** ldr z23, \[sp, #16, mul vl\] ++** ldr p4, \[sp\] ++** ldr p5, \[sp, #1, mul vl\] ++** ldr p6, \[sp, #2, mul vl\] ++** ldr p7, \[sp, #3, mul vl\] ++** ldr p8, \[sp, #4, mul vl\] ++** ldr p9, \[sp, #5, mul vl\] ++** ldr p10, \[sp, #6, mul vl\] ++** ldr p11, \[sp, #7, mul vl\] ++** addvl sp, sp, #17 ++** ret ++*/ ++svbool_t ++test_1 (void) ++{ ++ asm volatile ("" ::: ++ "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", ++ "z8", "z9", "z10", "z11", "z12", "z13", "z14", "z15", ++ "z16", "z17", "z18", "z19", "z20", "z21", "z22", "z23", ++ "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31", ++ "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", ++ "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_2: ++** ptrue p0\.b, all ++** ret ++*/ ++svbool_t ++test_2 (void) ++{ ++ asm volatile ("" ::: ++ "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", ++ "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31", ++ "p0", "p1", "p2", "p3", "p12", "p13", "p14", "p15"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_3: ++** addvl sp, sp, #-6 ++** str p5, \[sp\] ++** str p6, \[sp, #1, mul vl\] ++** str p11, \[sp, #2, mul vl\] ++** ptrue p1\.b, all ++** st1d z8\.d, p1, \[sp, #1, mul vl\] ++** st1d z13\.d, p1, \[sp, #2, mul vl\] ++** str z19, \[sp, #3, mul vl\] ++** str z20, \[sp, #4, mul vl\] ++** str z22, \[sp, #5, mul vl\] ++** ptrue p0\.b, all ++** ptrue p1\.b, all ++** ld1d z8\.d, p1/z, \[sp, #1, mul vl\] ++** ld1d z13\.d, p1/z, \[sp, #2, mul vl\] ++** ldr z19, \[sp, #3, mul vl\] ++** ldr z20, \[sp, #4, mul vl\] ++** ldr z22, \[sp, #5, mul vl\] ++** ldr p5, \[sp\] ++** ldr p6, \[sp, #1, mul vl\] ++** ldr p11, \[sp, #2, mul vl\] ++** addvl sp, sp, #6 ++** ret ++*/ ++svbool_t ++test_3 (void) ++{ ++ asm volatile ("" ::: ++ "z8", "z13", "z19", "z20", "z22", ++ "p5", "p6", "p11"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_4: ++** addvl sp, sp, #-1 ++** str p4, \[sp\] ++** ptrue p0\.b, all ++** ldr p4, \[sp\] ++** addvl sp, sp, #1 ++** ret ++*/ ++svbool_t ++test_4 (void) ++{ ++ asm volatile ("" ::: "p4"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_5: ++** addvl sp, sp, #-1 ++** ptrue p1\.b, all ++** st1d z15\.d, p1, \[sp\] ++** ptrue p0\.b, all ++** ptrue p1\.b, all ++** ld1d z15\.d, p1/z, \[sp\] ++** addvl sp, sp, #1 ++** ret ++*/ ++svbool_t ++test_5 (void) ++{ ++ asm volatile ("" ::: "z15"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_6: ++** addvl sp, sp, #-2 ++** str p4, \[sp\] ++** ptrue p4\.b, all ++** st1d z15\.d, p4, \[sp, #1, mul vl\] ++** mov z0\.b, #1 ++** ptrue p4\.b, all ++** ld1d z15\.d, p4/z, \[sp, #1, mul vl\] ++** ldr p4, \[sp\] ++** addvl sp, sp, #2 ++** ret ++*/ ++svint8_t ++test_6 (svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) ++{ ++ asm volatile ("" :: "Upa" (p0), "Upa" (p1), "Upa" (p2), "Upa" (p3) : "z15"); ++ return svdup_s8 (1); ++} ++ ++/* ++** test_7: ++** addvl sp, sp, #-1 ++** str z16, \[sp\] ++** ptrue p0\.b, all ++** ldr z16, \[sp\] ++** addvl sp, sp, #1 ++** ret ++*/ ++svbool_t ++test_7 (void) ++{ ++ asm volatile ("" ::: "z16"); ++ return svptrue_b8 (); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_1_le_nowrap.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_1_le_nowrap.c +new file mode 100644 +index 000000000..d14cd79b1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_1_le_nowrap.c +@@ -0,0 +1,184 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -mlittle-endian -fno-shrink-wrap -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** test_1: ++** addvl sp, sp, #-17 ++** str p4, \[sp\] ++** str p5, \[sp, #1, mul vl\] ++** str p6, \[sp, #2, mul vl\] ++** str p7, \[sp, #3, mul vl\] ++** str p8, \[sp, #4, mul vl\] ++** str p9, \[sp, #5, mul vl\] ++** str p10, \[sp, #6, mul vl\] ++** str p11, \[sp, #7, mul vl\] ++** str z8, \[sp, #1, mul vl\] ++** str z9, \[sp, #2, mul vl\] ++** str z10, \[sp, #3, mul vl\] ++** str z11, \[sp, #4, mul vl\] ++** str z12, \[sp, #5, mul vl\] ++** str z13, \[sp, #6, mul vl\] ++** str z14, \[sp, #7, mul vl\] ++** str z15, \[sp, #8, mul vl\] ++** str z16, \[sp, #9, mul vl\] ++** str z17, \[sp, #10, mul vl\] ++** str z18, \[sp, #11, mul vl\] ++** str z19, \[sp, #12, mul vl\] ++** str z20, \[sp, #13, mul vl\] ++** str z21, \[sp, #14, mul vl\] ++** str z22, \[sp, #15, mul vl\] ++** str z23, \[sp, #16, mul vl\] ++** ptrue p0\.b, all ++** ldr z8, \[sp, #1, mul vl\] ++** ldr z9, \[sp, #2, mul vl\] ++** ldr z10, \[sp, #3, mul vl\] ++** ldr z11, \[sp, #4, mul vl\] ++** ldr z12, \[sp, #5, mul vl\] ++** ldr z13, \[sp, #6, mul vl\] ++** ldr z14, \[sp, #7, mul vl\] ++** ldr z15, \[sp, #8, mul vl\] ++** ldr z16, \[sp, #9, mul vl\] ++** ldr z17, \[sp, #10, mul vl\] ++** ldr z18, \[sp, #11, mul vl\] ++** ldr z19, \[sp, #12, mul vl\] ++** ldr z20, \[sp, #13, mul vl\] ++** ldr z21, \[sp, #14, mul vl\] ++** ldr z22, \[sp, #15, mul vl\] ++** ldr z23, \[sp, #16, mul vl\] ++** ldr p4, \[sp\] ++** ldr p5, \[sp, #1, mul vl\] ++** ldr p6, \[sp, #2, mul vl\] ++** ldr p7, \[sp, #3, mul vl\] ++** ldr p8, \[sp, #4, mul vl\] ++** ldr p9, \[sp, #5, mul vl\] ++** ldr p10, \[sp, #6, mul vl\] ++** ldr p11, \[sp, #7, mul vl\] ++** addvl sp, sp, #17 ++** ret ++*/ ++svbool_t ++test_1 (void) ++{ ++ asm volatile ("" ::: ++ "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", ++ "z8", "z9", "z10", "z11", "z12", "z13", "z14", "z15", ++ "z16", "z17", "z18", "z19", "z20", "z21", "z22", "z23", ++ "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31", ++ "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", ++ "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_2: ++** ptrue p0\.b, all ++** ret ++*/ ++svbool_t ++test_2 (void) ++{ ++ asm volatile ("" ::: ++ "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", ++ "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31", ++ "p0", "p1", "p2", "p3", "p12", "p13", "p14", "p15"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_3: ++** addvl sp, sp, #-6 ++** str p5, \[sp\] ++** str p6, \[sp, #1, mul vl\] ++** str p11, \[sp, #2, mul vl\] ++** str z8, \[sp, #1, mul vl\] ++** str z13, \[sp, #2, mul vl\] ++** str z19, \[sp, #3, mul vl\] ++** str z20, \[sp, #4, mul vl\] ++** str z22, \[sp, #5, mul vl\] ++** ptrue p0\.b, all ++** ldr z8, \[sp, #1, mul vl\] ++** ldr z13, \[sp, #2, mul vl\] ++** ldr z19, \[sp, #3, mul vl\] ++** ldr z20, \[sp, #4, mul vl\] ++** ldr z22, \[sp, #5, mul vl\] ++** ldr p5, \[sp\] ++** ldr p6, \[sp, #1, mul vl\] ++** ldr p11, \[sp, #2, mul vl\] ++** addvl sp, sp, #6 ++** ret ++*/ ++svbool_t ++test_3 (void) ++{ ++ asm volatile ("" ::: ++ "z8", "z13", "z19", "z20", "z22", ++ "p5", "p6", "p11"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_4: ++** addvl sp, sp, #-1 ++** str p4, \[sp\] ++** ptrue p0\.b, all ++** ldr p4, \[sp\] ++** addvl sp, sp, #1 ++** ret ++*/ ++svbool_t ++test_4 (void) ++{ ++ asm volatile ("" ::: "p4"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_5: ++** addvl sp, sp, #-1 ++** str z15, \[sp\] ++** ptrue p0\.b, all ++** ldr z15, \[sp\] ++** addvl sp, sp, #1 ++** ret ++*/ ++svbool_t ++test_5 (void) ++{ ++ asm volatile ("" ::: "z15"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_6: ++** addvl sp, sp, #-1 ++** str z15, \[sp\] ++** mov z0\.b, #1 ++** ldr z15, \[sp\] ++** addvl sp, sp, #1 ++** ret ++*/ ++svint8_t ++test_6 (svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) ++{ ++ asm volatile ("" :: "Upa" (p0), "Upa" (p1), "Upa" (p2), "Upa" (p3) : "z15"); ++ return svdup_s8 (1); ++} ++ ++/* ++** test_7: ++** addvl sp, sp, #-1 ++** str z16, \[sp\] ++** ptrue p0\.b, all ++** ldr z16, \[sp\] ++** addvl sp, sp, #1 ++** ret ++*/ ++svbool_t ++test_7 (void) ++{ ++ asm volatile ("" ::: "z16"); ++ return svptrue_b8 (); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_1_le_wrap.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_1_le_wrap.c +new file mode 100644 +index 000000000..d81dd8e6b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_1_le_wrap.c +@@ -0,0 +1,184 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -mlittle-endian -fshrink-wrap -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** test_1: ++** addvl sp, sp, #-17 ++** str p4, \[sp\] ++** str p5, \[sp, #1, mul vl\] ++** str p6, \[sp, #2, mul vl\] ++** str p7, \[sp, #3, mul vl\] ++** str p8, \[sp, #4, mul vl\] ++** str p9, \[sp, #5, mul vl\] ++** str p10, \[sp, #6, mul vl\] ++** str p11, \[sp, #7, mul vl\] ++** str z8, \[sp, #1, mul vl\] ++** str z9, \[sp, #2, mul vl\] ++** str z10, \[sp, #3, mul vl\] ++** str z11, \[sp, #4, mul vl\] ++** str z12, \[sp, #5, mul vl\] ++** str z13, \[sp, #6, mul vl\] ++** str z14, \[sp, #7, mul vl\] ++** str z15, \[sp, #8, mul vl\] ++** str z16, \[sp, #9, mul vl\] ++** str z17, \[sp, #10, mul vl\] ++** str z18, \[sp, #11, mul vl\] ++** str z19, \[sp, #12, mul vl\] ++** str z20, \[sp, #13, mul vl\] ++** str z21, \[sp, #14, mul vl\] ++** str z22, \[sp, #15, mul vl\] ++** str z23, \[sp, #16, mul vl\] ++** ptrue p0\.b, all ++** ldr z8, \[sp, #1, mul vl\] ++** ldr z9, \[sp, #2, mul vl\] ++** ldr z10, \[sp, #3, mul vl\] ++** ldr z11, \[sp, #4, mul vl\] ++** ldr z12, \[sp, #5, mul vl\] ++** ldr z13, \[sp, #6, mul vl\] ++** ldr z14, \[sp, #7, mul vl\] ++** ldr z15, \[sp, #8, mul vl\] ++** ldr z16, \[sp, #9, mul vl\] ++** ldr z17, \[sp, #10, mul vl\] ++** ldr z18, \[sp, #11, mul vl\] ++** ldr z19, \[sp, #12, mul vl\] ++** ldr z20, \[sp, #13, mul vl\] ++** ldr z21, \[sp, #14, mul vl\] ++** ldr z22, \[sp, #15, mul vl\] ++** ldr z23, \[sp, #16, mul vl\] ++** ldr p4, \[sp\] ++** ldr p5, \[sp, #1, mul vl\] ++** ldr p6, \[sp, #2, mul vl\] ++** ldr p7, \[sp, #3, mul vl\] ++** ldr p8, \[sp, #4, mul vl\] ++** ldr p9, \[sp, #5, mul vl\] ++** ldr p10, \[sp, #6, mul vl\] ++** ldr p11, \[sp, #7, mul vl\] ++** addvl sp, sp, #17 ++** ret ++*/ ++svbool_t ++test_1 (void) ++{ ++ asm volatile ("" ::: ++ "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", ++ "z8", "z9", "z10", "z11", "z12", "z13", "z14", "z15", ++ "z16", "z17", "z18", "z19", "z20", "z21", "z22", "z23", ++ "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31", ++ "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", ++ "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_2: ++** ptrue p0\.b, all ++** ret ++*/ ++svbool_t ++test_2 (void) ++{ ++ asm volatile ("" ::: ++ "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", ++ "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31", ++ "p0", "p1", "p2", "p3", "p12", "p13", "p14", "p15"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_3: ++** addvl sp, sp, #-6 ++** str p5, \[sp\] ++** str p6, \[sp, #1, mul vl\] ++** str p11, \[sp, #2, mul vl\] ++** str z8, \[sp, #1, mul vl\] ++** str z13, \[sp, #2, mul vl\] ++** str z19, \[sp, #3, mul vl\] ++** str z20, \[sp, #4, mul vl\] ++** str z22, \[sp, #5, mul vl\] ++** ptrue p0\.b, all ++** ldr z8, \[sp, #1, mul vl\] ++** ldr z13, \[sp, #2, mul vl\] ++** ldr z19, \[sp, #3, mul vl\] ++** ldr z20, \[sp, #4, mul vl\] ++** ldr z22, \[sp, #5, mul vl\] ++** ldr p5, \[sp\] ++** ldr p6, \[sp, #1, mul vl\] ++** ldr p11, \[sp, #2, mul vl\] ++** addvl sp, sp, #6 ++** ret ++*/ ++svbool_t ++test_3 (void) ++{ ++ asm volatile ("" ::: ++ "z8", "z13", "z19", "z20", "z22", ++ "p5", "p6", "p11"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_4: ++** addvl sp, sp, #-1 ++** str p4, \[sp\] ++** ptrue p0\.b, all ++** ldr p4, \[sp\] ++** addvl sp, sp, #1 ++** ret ++*/ ++svbool_t ++test_4 (void) ++{ ++ asm volatile ("" ::: "p4"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_5: ++** addvl sp, sp, #-1 ++** str z15, \[sp\] ++** ptrue p0\.b, all ++** ldr z15, \[sp\] ++** addvl sp, sp, #1 ++** ret ++*/ ++svbool_t ++test_5 (void) ++{ ++ asm volatile ("" ::: "z15"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_6: ++** addvl sp, sp, #-1 ++** str z15, \[sp\] ++** mov z0\.b, #1 ++** ldr z15, \[sp\] ++** addvl sp, sp, #1 ++** ret ++*/ ++svint8_t ++test_6 (svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) ++{ ++ asm volatile ("" :: "Upa" (p0), "Upa" (p1), "Upa" (p2), "Upa" (p3) : "z15"); ++ return svdup_s8 (1); ++} ++ ++/* ++** test_7: ++** addvl sp, sp, #-1 ++** str z16, \[sp\] ++** ptrue p0\.b, all ++** ldr z16, \[sp\] ++** addvl sp, sp, #1 ++** ret ++*/ ++svbool_t ++test_7 (void) ++{ ++ asm volatile ("" ::: "z16"); ++ return svptrue_b8 (); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_2_be_nowrap.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_2_be_nowrap.c +new file mode 100644 +index 000000000..05aa18b3c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_2_be_nowrap.c +@@ -0,0 +1,271 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -mbig-endian -fno-shrink-wrap -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ ++ ++void standard_callee (void); ++__attribute__((aarch64_vector_pcs)) void vpcs_callee (void); ++ ++/* ++** calls_standard: ++** stp x29, x30, \[sp, -16\]! ++** mov x29, sp ++** addvl sp, sp, #-17 ++** str p4, \[sp\] ++** str p5, \[sp, #1, mul vl\] ++** str p6, \[sp, #2, mul vl\] ++** str p7, \[sp, #3, mul vl\] ++** str p8, \[sp, #4, mul vl\] ++** str p9, \[sp, #5, mul vl\] ++** str p10, \[sp, #6, mul vl\] ++** str p11, \[sp, #7, mul vl\] ++** ptrue p0\.b, all ++** st1d z8\.d, p0, \[sp, #1, mul vl\] ++** st1d z9\.d, p0, \[sp, #2, mul vl\] ++** st1d z10\.d, p0, \[sp, #3, mul vl\] ++** st1d z11\.d, p0, \[sp, #4, mul vl\] ++** st1d z12\.d, p0, \[sp, #5, mul vl\] ++** st1d z13\.d, p0, \[sp, #6, mul vl\] ++** st1d z14\.d, p0, \[sp, #7, mul vl\] ++** addvl x11, sp, #16 ++** st1d z15\.d, p0, \[x11, #-8, mul vl\] ++** str z16, \[sp, #9, mul vl\] ++** str z17, \[sp, #10, mul vl\] ++** str z18, \[sp, #11, mul vl\] ++** str z19, \[sp, #12, mul vl\] ++** str z20, \[sp, #13, mul vl\] ++** str z21, \[sp, #14, mul vl\] ++** str z22, \[sp, #15, mul vl\] ++** str z23, \[sp, #16, mul vl\] ++** bl standard_callee ++** ptrue p0\.b, all ++** ld1d z8\.d, p0/z, \[sp, #1, mul vl\] ++** ld1d z9\.d, p0/z, \[sp, #2, mul vl\] ++** ld1d z10\.d, p0/z, \[sp, #3, mul vl\] ++** ld1d z11\.d, p0/z, \[sp, #4, mul vl\] ++** ld1d z12\.d, p0/z, \[sp, #5, mul vl\] ++** ld1d z13\.d, p0/z, \[sp, #6, mul vl\] ++** ld1d z14\.d, p0/z, \[sp, #7, mul vl\] ++** addvl x11, sp, #16 ++** ld1d z15\.d, p0/z, \[x11, #-8, mul vl\] ++** ldr z16, \[sp, #9, mul vl\] ++** ldr z17, \[sp, #10, mul vl\] ++** ldr z18, \[sp, #11, mul vl\] ++** ldr z19, \[sp, #12, mul vl\] ++** ldr z20, \[sp, #13, mul vl\] ++** ldr z21, \[sp, #14, mul vl\] ++** ldr z22, \[sp, #15, mul vl\] ++** ldr z23, \[sp, #16, mul vl\] ++** ldr p4, \[sp\] ++** ldr p5, \[sp, #1, mul vl\] ++** ldr p6, \[sp, #2, mul vl\] ++** ldr p7, \[sp, #3, mul vl\] ++** ldr p8, \[sp, #4, mul vl\] ++** ldr p9, \[sp, #5, mul vl\] ++** ldr p10, \[sp, #6, mul vl\] ++** ldr p11, \[sp, #7, mul vl\] ++** addvl sp, sp, #17 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++void calls_standard (__SVInt8_t x) { standard_callee (); } ++ ++/* ++** calls_vpcs: ++** stp x29, x30, \[sp, -16\]! ++** mov x29, sp ++** addvl sp, sp, #-17 ++** str p4, \[sp\] ++** str p5, \[sp, #1, mul vl\] ++** str p6, \[sp, #2, mul vl\] ++** str p7, \[sp, #3, mul vl\] ++** str p8, \[sp, #4, mul vl\] ++** str p9, \[sp, #5, mul vl\] ++** str p10, \[sp, #6, mul vl\] ++** str p11, \[sp, #7, mul vl\] ++** ptrue p0\.b, all ++** st1d z8\.d, p0, \[sp, #1, mul vl\] ++** st1d z9\.d, p0, \[sp, #2, mul vl\] ++** st1d z10\.d, p0, \[sp, #3, mul vl\] ++** st1d z11\.d, p0, \[sp, #4, mul vl\] ++** st1d z12\.d, p0, \[sp, #5, mul vl\] ++** st1d z13\.d, p0, \[sp, #6, mul vl\] ++** st1d z14\.d, p0, \[sp, #7, mul vl\] ++** addvl x11, sp, #16 ++** st1d z15\.d, p0, \[x11, #-8, mul vl\] ++** str z16, \[sp, #9, mul vl\] ++** str z17, \[sp, #10, mul vl\] ++** str z18, \[sp, #11, mul vl\] ++** str z19, \[sp, #12, mul vl\] ++** str z20, \[sp, #13, mul vl\] ++** str z21, \[sp, #14, mul vl\] ++** str z22, \[sp, #15, mul vl\] ++** str z23, \[sp, #16, mul vl\] ++** bl vpcs_callee ++** ptrue p0\.b, all ++** ld1d z8\.d, p0/z, \[sp, #1, mul vl\] ++** ld1d z9\.d, p0/z, \[sp, #2, mul vl\] ++** ld1d z10\.d, p0/z, \[sp, #3, mul vl\] ++** ld1d z11\.d, p0/z, \[sp, #4, mul vl\] ++** ld1d z12\.d, p0/z, \[sp, #5, mul vl\] ++** ld1d z13\.d, p0/z, \[sp, #6, mul vl\] ++** ld1d z14\.d, p0/z, \[sp, #7, mul vl\] ++** addvl x11, sp, #16 ++** ld1d z15\.d, p0/z, \[x11, #-8, mul vl\] ++** ldr z16, \[sp, #9, mul vl\] ++** ldr z17, \[sp, #10, mul vl\] ++** ldr z18, \[sp, #11, mul vl\] ++** ldr z19, \[sp, #12, mul vl\] ++** ldr z20, \[sp, #13, mul vl\] ++** ldr z21, \[sp, #14, mul vl\] ++** ldr z22, \[sp, #15, mul vl\] ++** ldr z23, \[sp, #16, mul vl\] ++** ldr p4, \[sp\] ++** ldr p5, \[sp, #1, mul vl\] ++** ldr p6, \[sp, #2, mul vl\] ++** ldr p7, \[sp, #3, mul vl\] ++** ldr p8, \[sp, #4, mul vl\] ++** ldr p9, \[sp, #5, mul vl\] ++** ldr p10, \[sp, #6, mul vl\] ++** ldr p11, \[sp, #7, mul vl\] ++** addvl sp, sp, #17 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++void calls_vpcs (__SVInt8_t x) { vpcs_callee (); } ++ ++/* ++** calls_standard_ptr: ++** stp x29, x30, \[sp, -16\]! ++** mov x29, sp ++** addvl sp, sp, #-17 ++** str p4, \[sp\] ++** str p5, \[sp, #1, mul vl\] ++** str p6, \[sp, #2, mul vl\] ++** str p7, \[sp, #3, mul vl\] ++** str p8, \[sp, #4, mul vl\] ++** str p9, \[sp, #5, mul vl\] ++** str p10, \[sp, #6, mul vl\] ++** str p11, \[sp, #7, mul vl\] ++** ptrue p0\.b, all ++** st1d z8\.d, p0, \[sp, #1, mul vl\] ++** st1d z9\.d, p0, \[sp, #2, mul vl\] ++** st1d z10\.d, p0, \[sp, #3, mul vl\] ++** st1d z11\.d, p0, \[sp, #4, mul vl\] ++** st1d z12\.d, p0, \[sp, #5, mul vl\] ++** st1d z13\.d, p0, \[sp, #6, mul vl\] ++** st1d z14\.d, p0, \[sp, #7, mul vl\] ++** addvl x11, sp, #16 ++** st1d z15\.d, p0, \[x11, #-8, mul vl\] ++** str z16, \[sp, #9, mul vl\] ++** str z17, \[sp, #10, mul vl\] ++** str z18, \[sp, #11, mul vl\] ++** str z19, \[sp, #12, mul vl\] ++** str z20, \[sp, #13, mul vl\] ++** str z21, \[sp, #14, mul vl\] ++** str z22, \[sp, #15, mul vl\] ++** str z23, \[sp, #16, mul vl\] ++** blr x0 ++** ptrue p0\.b, all ++** ld1d z8\.d, p0/z, \[sp, #1, mul vl\] ++** ld1d z9\.d, p0/z, \[sp, #2, mul vl\] ++** ld1d z10\.d, p0/z, \[sp, #3, mul vl\] ++** ld1d z11\.d, p0/z, \[sp, #4, mul vl\] ++** ld1d z12\.d, p0/z, \[sp, #5, mul vl\] ++** ld1d z13\.d, p0/z, \[sp, #6, mul vl\] ++** ld1d z14\.d, p0/z, \[sp, #7, mul vl\] ++** addvl x11, sp, #16 ++** ld1d z15\.d, p0/z, \[x11, #-8, mul vl\] ++** ldr z16, \[sp, #9, mul vl\] ++** ldr z17, \[sp, #10, mul vl\] ++** ldr z18, \[sp, #11, mul vl\] ++** ldr z19, \[sp, #12, mul vl\] ++** ldr z20, \[sp, #13, mul vl\] ++** ldr z21, \[sp, #14, mul vl\] ++** ldr z22, \[sp, #15, mul vl\] ++** ldr z23, \[sp, #16, mul vl\] ++** ldr p4, \[sp\] ++** ldr p5, \[sp, #1, mul vl\] ++** ldr p6, \[sp, #2, mul vl\] ++** ldr p7, \[sp, #3, mul vl\] ++** ldr p8, \[sp, #4, mul vl\] ++** ldr p9, \[sp, #5, mul vl\] ++** ldr p10, \[sp, #6, mul vl\] ++** ldr p11, \[sp, #7, mul vl\] ++** addvl sp, sp, #17 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++void ++calls_standard_ptr (__SVInt8_t x, void (*fn) (void)) ++{ ++ fn (); ++} ++ ++/* ++** calls_vpcs_ptr: ++** stp x29, x30, \[sp, -16\]! ++** mov x29, sp ++** addvl sp, sp, #-17 ++** str p4, \[sp\] ++** str p5, \[sp, #1, mul vl\] ++** str p6, \[sp, #2, mul vl\] ++** str p7, \[sp, #3, mul vl\] ++** str p8, \[sp, #4, mul vl\] ++** str p9, \[sp, #5, mul vl\] ++** str p10, \[sp, #6, mul vl\] ++** str p11, \[sp, #7, mul vl\] ++** ptrue p0\.b, all ++** st1d z8\.d, p0, \[sp, #1, mul vl\] ++** st1d z9\.d, p0, \[sp, #2, mul vl\] ++** st1d z10\.d, p0, \[sp, #3, mul vl\] ++** st1d z11\.d, p0, \[sp, #4, mul vl\] ++** st1d z12\.d, p0, \[sp, #5, mul vl\] ++** st1d z13\.d, p0, \[sp, #6, mul vl\] ++** st1d z14\.d, p0, \[sp, #7, mul vl\] ++** addvl x11, sp, #16 ++** st1d z15\.d, p0, \[x11, #-8, mul vl\] ++** str z16, \[sp, #9, mul vl\] ++** str z17, \[sp, #10, mul vl\] ++** str z18, \[sp, #11, mul vl\] ++** str z19, \[sp, #12, mul vl\] ++** str z20, \[sp, #13, mul vl\] ++** str z21, \[sp, #14, mul vl\] ++** str z22, \[sp, #15, mul vl\] ++** str z23, \[sp, #16, mul vl\] ++** blr x0 ++** ptrue p0\.b, all ++** ld1d z8\.d, p0/z, \[sp, #1, mul vl\] ++** ld1d z9\.d, p0/z, \[sp, #2, mul vl\] ++** ld1d z10\.d, p0/z, \[sp, #3, mul vl\] ++** ld1d z11\.d, p0/z, \[sp, #4, mul vl\] ++** ld1d z12\.d, p0/z, \[sp, #5, mul vl\] ++** ld1d z13\.d, p0/z, \[sp, #6, mul vl\] ++** ld1d z14\.d, p0/z, \[sp, #7, mul vl\] ++** addvl x11, sp, #16 ++** ld1d z15\.d, p0/z, \[x11, #-8, mul vl\] ++** ldr z16, \[sp, #9, mul vl\] ++** ldr z17, \[sp, #10, mul vl\] ++** ldr z18, \[sp, #11, mul vl\] ++** ldr z19, \[sp, #12, mul vl\] ++** ldr z20, \[sp, #13, mul vl\] ++** ldr z21, \[sp, #14, mul vl\] ++** ldr z22, \[sp, #15, mul vl\] ++** ldr z23, \[sp, #16, mul vl\] ++** ldr p4, \[sp\] ++** ldr p5, \[sp, #1, mul vl\] ++** ldr p6, \[sp, #2, mul vl\] ++** ldr p7, \[sp, #3, mul vl\] ++** ldr p8, \[sp, #4, mul vl\] ++** ldr p9, \[sp, #5, mul vl\] ++** ldr p10, \[sp, #6, mul vl\] ++** ldr p11, \[sp, #7, mul vl\] ++** addvl sp, sp, #17 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++void ++calls_vpcs_ptr (__SVInt8_t x, ++ void (*__attribute__((aarch64_vector_pcs)) fn) (void)) ++{ ++ fn (); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_2_be_wrap.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_2_be_wrap.c +new file mode 100644 +index 000000000..85b7794d7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_2_be_wrap.c +@@ -0,0 +1,271 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -mbig-endian -fshrink-wrap -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ ++ ++void standard_callee (void); ++__attribute__((aarch64_vector_pcs)) void vpcs_callee (void); ++ ++/* ++** calls_standard: ++** stp x29, x30, \[sp, -16\]! ++** mov x29, sp ++** addvl sp, sp, #-17 ++** str p4, \[sp\] ++** str p5, \[sp, #1, mul vl\] ++** str p6, \[sp, #2, mul vl\] ++** str p7, \[sp, #3, mul vl\] ++** str p8, \[sp, #4, mul vl\] ++** str p9, \[sp, #5, mul vl\] ++** str p10, \[sp, #6, mul vl\] ++** str p11, \[sp, #7, mul vl\] ++** ptrue p0\.b, all ++** st1d z8\.d, p0, \[sp, #1, mul vl\] ++** st1d z9\.d, p0, \[sp, #2, mul vl\] ++** st1d z10\.d, p0, \[sp, #3, mul vl\] ++** st1d z11\.d, p0, \[sp, #4, mul vl\] ++** st1d z12\.d, p0, \[sp, #5, mul vl\] ++** st1d z13\.d, p0, \[sp, #6, mul vl\] ++** st1d z14\.d, p0, \[sp, #7, mul vl\] ++** addvl x11, sp, #16 ++** st1d z15\.d, p0, \[x11, #-8, mul vl\] ++** str z16, \[sp, #9, mul vl\] ++** str z17, \[sp, #10, mul vl\] ++** str z18, \[sp, #11, mul vl\] ++** str z19, \[sp, #12, mul vl\] ++** str z20, \[sp, #13, mul vl\] ++** str z21, \[sp, #14, mul vl\] ++** str z22, \[sp, #15, mul vl\] ++** str z23, \[sp, #16, mul vl\] ++** bl standard_callee ++** ptrue p0\.b, all ++** ld1d z8\.d, p0/z, \[sp, #1, mul vl\] ++** ld1d z9\.d, p0/z, \[sp, #2, mul vl\] ++** ld1d z10\.d, p0/z, \[sp, #3, mul vl\] ++** ld1d z11\.d, p0/z, \[sp, #4, mul vl\] ++** ld1d z12\.d, p0/z, \[sp, #5, mul vl\] ++** ld1d z13\.d, p0/z, \[sp, #6, mul vl\] ++** ld1d z14\.d, p0/z, \[sp, #7, mul vl\] ++** addvl x11, sp, #16 ++** ld1d z15\.d, p0/z, \[x11, #-8, mul vl\] ++** ldr z16, \[sp, #9, mul vl\] ++** ldr z17, \[sp, #10, mul vl\] ++** ldr z18, \[sp, #11, mul vl\] ++** ldr z19, \[sp, #12, mul vl\] ++** ldr z20, \[sp, #13, mul vl\] ++** ldr z21, \[sp, #14, mul vl\] ++** ldr z22, \[sp, #15, mul vl\] ++** ldr z23, \[sp, #16, mul vl\] ++** ldr p4, \[sp\] ++** ldr p5, \[sp, #1, mul vl\] ++** ldr p6, \[sp, #2, mul vl\] ++** ldr p7, \[sp, #3, mul vl\] ++** ldr p8, \[sp, #4, mul vl\] ++** ldr p9, \[sp, #5, mul vl\] ++** ldr p10, \[sp, #6, mul vl\] ++** ldr p11, \[sp, #7, mul vl\] ++** addvl sp, sp, #17 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++void calls_standard (__SVInt8_t x) { standard_callee (); } ++ ++/* ++** calls_vpcs: ++** stp x29, x30, \[sp, -16\]! ++** mov x29, sp ++** addvl sp, sp, #-17 ++** str p4, \[sp\] ++** str p5, \[sp, #1, mul vl\] ++** str p6, \[sp, #2, mul vl\] ++** str p7, \[sp, #3, mul vl\] ++** str p8, \[sp, #4, mul vl\] ++** str p9, \[sp, #5, mul vl\] ++** str p10, \[sp, #6, mul vl\] ++** str p11, \[sp, #7, mul vl\] ++** ptrue p0\.b, all ++** st1d z8\.d, p0, \[sp, #1, mul vl\] ++** st1d z9\.d, p0, \[sp, #2, mul vl\] ++** st1d z10\.d, p0, \[sp, #3, mul vl\] ++** st1d z11\.d, p0, \[sp, #4, mul vl\] ++** st1d z12\.d, p0, \[sp, #5, mul vl\] ++** st1d z13\.d, p0, \[sp, #6, mul vl\] ++** st1d z14\.d, p0, \[sp, #7, mul vl\] ++** addvl x11, sp, #16 ++** st1d z15\.d, p0, \[x11, #-8, mul vl\] ++** str z16, \[sp, #9, mul vl\] ++** str z17, \[sp, #10, mul vl\] ++** str z18, \[sp, #11, mul vl\] ++** str z19, \[sp, #12, mul vl\] ++** str z20, \[sp, #13, mul vl\] ++** str z21, \[sp, #14, mul vl\] ++** str z22, \[sp, #15, mul vl\] ++** str z23, \[sp, #16, mul vl\] ++** bl vpcs_callee ++** ptrue p0\.b, all ++** ld1d z8\.d, p0/z, \[sp, #1, mul vl\] ++** ld1d z9\.d, p0/z, \[sp, #2, mul vl\] ++** ld1d z10\.d, p0/z, \[sp, #3, mul vl\] ++** ld1d z11\.d, p0/z, \[sp, #4, mul vl\] ++** ld1d z12\.d, p0/z, \[sp, #5, mul vl\] ++** ld1d z13\.d, p0/z, \[sp, #6, mul vl\] ++** ld1d z14\.d, p0/z, \[sp, #7, mul vl\] ++** addvl x11, sp, #16 ++** ld1d z15\.d, p0/z, \[x11, #-8, mul vl\] ++** ldr z16, \[sp, #9, mul vl\] ++** ldr z17, \[sp, #10, mul vl\] ++** ldr z18, \[sp, #11, mul vl\] ++** ldr z19, \[sp, #12, mul vl\] ++** ldr z20, \[sp, #13, mul vl\] ++** ldr z21, \[sp, #14, mul vl\] ++** ldr z22, \[sp, #15, mul vl\] ++** ldr z23, \[sp, #16, mul vl\] ++** ldr p4, \[sp\] ++** ldr p5, \[sp, #1, mul vl\] ++** ldr p6, \[sp, #2, mul vl\] ++** ldr p7, \[sp, #3, mul vl\] ++** ldr p8, \[sp, #4, mul vl\] ++** ldr p9, \[sp, #5, mul vl\] ++** ldr p10, \[sp, #6, mul vl\] ++** ldr p11, \[sp, #7, mul vl\] ++** addvl sp, sp, #17 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++void calls_vpcs (__SVInt8_t x) { vpcs_callee (); } ++ ++/* ++** calls_standard_ptr: ++** stp x29, x30, \[sp, -16\]! ++** mov x29, sp ++** addvl sp, sp, #-17 ++** str p4, \[sp\] ++** str p5, \[sp, #1, mul vl\] ++** str p6, \[sp, #2, mul vl\] ++** str p7, \[sp, #3, mul vl\] ++** str p8, \[sp, #4, mul vl\] ++** str p9, \[sp, #5, mul vl\] ++** str p10, \[sp, #6, mul vl\] ++** str p11, \[sp, #7, mul vl\] ++** ptrue p0\.b, all ++** st1d z8\.d, p0, \[sp, #1, mul vl\] ++** st1d z9\.d, p0, \[sp, #2, mul vl\] ++** st1d z10\.d, p0, \[sp, #3, mul vl\] ++** st1d z11\.d, p0, \[sp, #4, mul vl\] ++** st1d z12\.d, p0, \[sp, #5, mul vl\] ++** st1d z13\.d, p0, \[sp, #6, mul vl\] ++** st1d z14\.d, p0, \[sp, #7, mul vl\] ++** addvl x11, sp, #16 ++** st1d z15\.d, p0, \[x11, #-8, mul vl\] ++** str z16, \[sp, #9, mul vl\] ++** str z17, \[sp, #10, mul vl\] ++** str z18, \[sp, #11, mul vl\] ++** str z19, \[sp, #12, mul vl\] ++** str z20, \[sp, #13, mul vl\] ++** str z21, \[sp, #14, mul vl\] ++** str z22, \[sp, #15, mul vl\] ++** str z23, \[sp, #16, mul vl\] ++** blr x0 ++** ptrue p0\.b, all ++** ld1d z8\.d, p0/z, \[sp, #1, mul vl\] ++** ld1d z9\.d, p0/z, \[sp, #2, mul vl\] ++** ld1d z10\.d, p0/z, \[sp, #3, mul vl\] ++** ld1d z11\.d, p0/z, \[sp, #4, mul vl\] ++** ld1d z12\.d, p0/z, \[sp, #5, mul vl\] ++** ld1d z13\.d, p0/z, \[sp, #6, mul vl\] ++** ld1d z14\.d, p0/z, \[sp, #7, mul vl\] ++** addvl x11, sp, #16 ++** ld1d z15\.d, p0/z, \[x11, #-8, mul vl\] ++** ldr z16, \[sp, #9, mul vl\] ++** ldr z17, \[sp, #10, mul vl\] ++** ldr z18, \[sp, #11, mul vl\] ++** ldr z19, \[sp, #12, mul vl\] ++** ldr z20, \[sp, #13, mul vl\] ++** ldr z21, \[sp, #14, mul vl\] ++** ldr z22, \[sp, #15, mul vl\] ++** ldr z23, \[sp, #16, mul vl\] ++** ldr p4, \[sp\] ++** ldr p5, \[sp, #1, mul vl\] ++** ldr p6, \[sp, #2, mul vl\] ++** ldr p7, \[sp, #3, mul vl\] ++** ldr p8, \[sp, #4, mul vl\] ++** ldr p9, \[sp, #5, mul vl\] ++** ldr p10, \[sp, #6, mul vl\] ++** ldr p11, \[sp, #7, mul vl\] ++** addvl sp, sp, #17 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++void ++calls_standard_ptr (__SVInt8_t x, void (*fn) (void)) ++{ ++ fn (); ++} ++ ++/* ++** calls_vpcs_ptr: ++** stp x29, x30, \[sp, -16\]! ++** mov x29, sp ++** addvl sp, sp, #-17 ++** str p4, \[sp\] ++** str p5, \[sp, #1, mul vl\] ++** str p6, \[sp, #2, mul vl\] ++** str p7, \[sp, #3, mul vl\] ++** str p8, \[sp, #4, mul vl\] ++** str p9, \[sp, #5, mul vl\] ++** str p10, \[sp, #6, mul vl\] ++** str p11, \[sp, #7, mul vl\] ++** ptrue p0\.b, all ++** st1d z8\.d, p0, \[sp, #1, mul vl\] ++** st1d z9\.d, p0, \[sp, #2, mul vl\] ++** st1d z10\.d, p0, \[sp, #3, mul vl\] ++** st1d z11\.d, p0, \[sp, #4, mul vl\] ++** st1d z12\.d, p0, \[sp, #5, mul vl\] ++** st1d z13\.d, p0, \[sp, #6, mul vl\] ++** st1d z14\.d, p0, \[sp, #7, mul vl\] ++** addvl x11, sp, #16 ++** st1d z15\.d, p0, \[x11, #-8, mul vl\] ++** str z16, \[sp, #9, mul vl\] ++** str z17, \[sp, #10, mul vl\] ++** str z18, \[sp, #11, mul vl\] ++** str z19, \[sp, #12, mul vl\] ++** str z20, \[sp, #13, mul vl\] ++** str z21, \[sp, #14, mul vl\] ++** str z22, \[sp, #15, mul vl\] ++** str z23, \[sp, #16, mul vl\] ++** blr x0 ++** ptrue p0\.b, all ++** ld1d z8\.d, p0/z, \[sp, #1, mul vl\] ++** ld1d z9\.d, p0/z, \[sp, #2, mul vl\] ++** ld1d z10\.d, p0/z, \[sp, #3, mul vl\] ++** ld1d z11\.d, p0/z, \[sp, #4, mul vl\] ++** ld1d z12\.d, p0/z, \[sp, #5, mul vl\] ++** ld1d z13\.d, p0/z, \[sp, #6, mul vl\] ++** ld1d z14\.d, p0/z, \[sp, #7, mul vl\] ++** addvl x11, sp, #16 ++** ld1d z15\.d, p0/z, \[x11, #-8, mul vl\] ++** ldr z16, \[sp, #9, mul vl\] ++** ldr z17, \[sp, #10, mul vl\] ++** ldr z18, \[sp, #11, mul vl\] ++** ldr z19, \[sp, #12, mul vl\] ++** ldr z20, \[sp, #13, mul vl\] ++** ldr z21, \[sp, #14, mul vl\] ++** ldr z22, \[sp, #15, mul vl\] ++** ldr z23, \[sp, #16, mul vl\] ++** ldr p4, \[sp\] ++** ldr p5, \[sp, #1, mul vl\] ++** ldr p6, \[sp, #2, mul vl\] ++** ldr p7, \[sp, #3, mul vl\] ++** ldr p8, \[sp, #4, mul vl\] ++** ldr p9, \[sp, #5, mul vl\] ++** ldr p10, \[sp, #6, mul vl\] ++** ldr p11, \[sp, #7, mul vl\] ++** addvl sp, sp, #17 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++void ++calls_vpcs_ptr (__SVInt8_t x, ++ void (*__attribute__((aarch64_vector_pcs)) fn) (void)) ++{ ++ fn (); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_2_le_nowrap.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_2_le_nowrap.c +new file mode 100644 +index 000000000..0fcd357a0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_2_le_nowrap.c +@@ -0,0 +1,255 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -mlittle-endian -fno-shrink-wrap -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ ++ ++void standard_callee (void); ++__attribute__((aarch64_vector_pcs)) void vpcs_callee (void); ++ ++/* ++** calls_standard: ++** stp x29, x30, \[sp, -16\]! ++** mov x29, sp ++** addvl sp, sp, #-17 ++** str p4, \[sp\] ++** str p5, \[sp, #1, mul vl\] ++** str p6, \[sp, #2, mul vl\] ++** str p7, \[sp, #3, mul vl\] ++** str p8, \[sp, #4, mul vl\] ++** str p9, \[sp, #5, mul vl\] ++** str p10, \[sp, #6, mul vl\] ++** str p11, \[sp, #7, mul vl\] ++** str z8, \[sp, #1, mul vl\] ++** str z9, \[sp, #2, mul vl\] ++** str z10, \[sp, #3, mul vl\] ++** str z11, \[sp, #4, mul vl\] ++** str z12, \[sp, #5, mul vl\] ++** str z13, \[sp, #6, mul vl\] ++** str z14, \[sp, #7, mul vl\] ++** str z15, \[sp, #8, mul vl\] ++** str z16, \[sp, #9, mul vl\] ++** str z17, \[sp, #10, mul vl\] ++** str z18, \[sp, #11, mul vl\] ++** str z19, \[sp, #12, mul vl\] ++** str z20, \[sp, #13, mul vl\] ++** str z21, \[sp, #14, mul vl\] ++** str z22, \[sp, #15, mul vl\] ++** str z23, \[sp, #16, mul vl\] ++** bl standard_callee ++** ldr z8, \[sp, #1, mul vl\] ++** ldr z9, \[sp, #2, mul vl\] ++** ldr z10, \[sp, #3, mul vl\] ++** ldr z11, \[sp, #4, mul vl\] ++** ldr z12, \[sp, #5, mul vl\] ++** ldr z13, \[sp, #6, mul vl\] ++** ldr z14, \[sp, #7, mul vl\] ++** ldr z15, \[sp, #8, mul vl\] ++** ldr z16, \[sp, #9, mul vl\] ++** ldr z17, \[sp, #10, mul vl\] ++** ldr z18, \[sp, #11, mul vl\] ++** ldr z19, \[sp, #12, mul vl\] ++** ldr z20, \[sp, #13, mul vl\] ++** ldr z21, \[sp, #14, mul vl\] ++** ldr z22, \[sp, #15, mul vl\] ++** ldr z23, \[sp, #16, mul vl\] ++** ldr p4, \[sp\] ++** ldr p5, \[sp, #1, mul vl\] ++** ldr p6, \[sp, #2, mul vl\] ++** ldr p7, \[sp, #3, mul vl\] ++** ldr p8, \[sp, #4, mul vl\] ++** ldr p9, \[sp, #5, mul vl\] ++** ldr p10, \[sp, #6, mul vl\] ++** ldr p11, \[sp, #7, mul vl\] ++** addvl sp, sp, #17 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++void calls_standard (__SVInt8_t x) { standard_callee (); } ++ ++/* ++** calls_vpcs: ++** stp x29, x30, \[sp, -16\]! ++** mov x29, sp ++** addvl sp, sp, #-17 ++** str p4, \[sp\] ++** str p5, \[sp, #1, mul vl\] ++** str p6, \[sp, #2, mul vl\] ++** str p7, \[sp, #3, mul vl\] ++** str p8, \[sp, #4, mul vl\] ++** str p9, \[sp, #5, mul vl\] ++** str p10, \[sp, #6, mul vl\] ++** str p11, \[sp, #7, mul vl\] ++** str z8, \[sp, #1, mul vl\] ++** str z9, \[sp, #2, mul vl\] ++** str z10, \[sp, #3, mul vl\] ++** str z11, \[sp, #4, mul vl\] ++** str z12, \[sp, #5, mul vl\] ++** str z13, \[sp, #6, mul vl\] ++** str z14, \[sp, #7, mul vl\] ++** str z15, \[sp, #8, mul vl\] ++** str z16, \[sp, #9, mul vl\] ++** str z17, \[sp, #10, mul vl\] ++** str z18, \[sp, #11, mul vl\] ++** str z19, \[sp, #12, mul vl\] ++** str z20, \[sp, #13, mul vl\] ++** str z21, \[sp, #14, mul vl\] ++** str z22, \[sp, #15, mul vl\] ++** str z23, \[sp, #16, mul vl\] ++** bl vpcs_callee ++** ldr z8, \[sp, #1, mul vl\] ++** ldr z9, \[sp, #2, mul vl\] ++** ldr z10, \[sp, #3, mul vl\] ++** ldr z11, \[sp, #4, mul vl\] ++** ldr z12, \[sp, #5, mul vl\] ++** ldr z13, \[sp, #6, mul vl\] ++** ldr z14, \[sp, #7, mul vl\] ++** ldr z15, \[sp, #8, mul vl\] ++** ldr z16, \[sp, #9, mul vl\] ++** ldr z17, \[sp, #10, mul vl\] ++** ldr z18, \[sp, #11, mul vl\] ++** ldr z19, \[sp, #12, mul vl\] ++** ldr z20, \[sp, #13, mul vl\] ++** ldr z21, \[sp, #14, mul vl\] ++** ldr z22, \[sp, #15, mul vl\] ++** ldr z23, \[sp, #16, mul vl\] ++** ldr p4, \[sp\] ++** ldr p5, \[sp, #1, mul vl\] ++** ldr p6, \[sp, #2, mul vl\] ++** ldr p7, \[sp, #3, mul vl\] ++** ldr p8, \[sp, #4, mul vl\] ++** ldr p9, \[sp, #5, mul vl\] ++** ldr p10, \[sp, #6, mul vl\] ++** ldr p11, \[sp, #7, mul vl\] ++** addvl sp, sp, #17 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++void calls_vpcs (__SVInt8_t x) { vpcs_callee (); } ++ ++/* ++** calls_standard_ptr: ++** stp x29, x30, \[sp, -16\]! ++** mov x29, sp ++** addvl sp, sp, #-17 ++** str p4, \[sp\] ++** str p5, \[sp, #1, mul vl\] ++** str p6, \[sp, #2, mul vl\] ++** str p7, \[sp, #3, mul vl\] ++** str p8, \[sp, #4, mul vl\] ++** str p9, \[sp, #5, mul vl\] ++** str p10, \[sp, #6, mul vl\] ++** str p11, \[sp, #7, mul vl\] ++** str z8, \[sp, #1, mul vl\] ++** str z9, \[sp, #2, mul vl\] ++** str z10, \[sp, #3, mul vl\] ++** str z11, \[sp, #4, mul vl\] ++** str z12, \[sp, #5, mul vl\] ++** str z13, \[sp, #6, mul vl\] ++** str z14, \[sp, #7, mul vl\] ++** str z15, \[sp, #8, mul vl\] ++** str z16, \[sp, #9, mul vl\] ++** str z17, \[sp, #10, mul vl\] ++** str z18, \[sp, #11, mul vl\] ++** str z19, \[sp, #12, mul vl\] ++** str z20, \[sp, #13, mul vl\] ++** str z21, \[sp, #14, mul vl\] ++** str z22, \[sp, #15, mul vl\] ++** str z23, \[sp, #16, mul vl\] ++** blr x0 ++** ldr z8, \[sp, #1, mul vl\] ++** ldr z9, \[sp, #2, mul vl\] ++** ldr z10, \[sp, #3, mul vl\] ++** ldr z11, \[sp, #4, mul vl\] ++** ldr z12, \[sp, #5, mul vl\] ++** ldr z13, \[sp, #6, mul vl\] ++** ldr z14, \[sp, #7, mul vl\] ++** ldr z15, \[sp, #8, mul vl\] ++** ldr z16, \[sp, #9, mul vl\] ++** ldr z17, \[sp, #10, mul vl\] ++** ldr z18, \[sp, #11, mul vl\] ++** ldr z19, \[sp, #12, mul vl\] ++** ldr z20, \[sp, #13, mul vl\] ++** ldr z21, \[sp, #14, mul vl\] ++** ldr z22, \[sp, #15, mul vl\] ++** ldr z23, \[sp, #16, mul vl\] ++** ldr p4, \[sp\] ++** ldr p5, \[sp, #1, mul vl\] ++** ldr p6, \[sp, #2, mul vl\] ++** ldr p7, \[sp, #3, mul vl\] ++** ldr p8, \[sp, #4, mul vl\] ++** ldr p9, \[sp, #5, mul vl\] ++** ldr p10, \[sp, #6, mul vl\] ++** ldr p11, \[sp, #7, mul vl\] ++** addvl sp, sp, #17 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++void ++calls_standard_ptr (__SVInt8_t x, void (*fn) (void)) ++{ ++ fn (); ++} ++ ++/* ++** calls_vpcs_ptr: ++** stp x29, x30, \[sp, -16\]! ++** mov x29, sp ++** addvl sp, sp, #-17 ++** str p4, \[sp\] ++** str p5, \[sp, #1, mul vl\] ++** str p6, \[sp, #2, mul vl\] ++** str p7, \[sp, #3, mul vl\] ++** str p8, \[sp, #4, mul vl\] ++** str p9, \[sp, #5, mul vl\] ++** str p10, \[sp, #6, mul vl\] ++** str p11, \[sp, #7, mul vl\] ++** str z8, \[sp, #1, mul vl\] ++** str z9, \[sp, #2, mul vl\] ++** str z10, \[sp, #3, mul vl\] ++** str z11, \[sp, #4, mul vl\] ++** str z12, \[sp, #5, mul vl\] ++** str z13, \[sp, #6, mul vl\] ++** str z14, \[sp, #7, mul vl\] ++** str z15, \[sp, #8, mul vl\] ++** str z16, \[sp, #9, mul vl\] ++** str z17, \[sp, #10, mul vl\] ++** str z18, \[sp, #11, mul vl\] ++** str z19, \[sp, #12, mul vl\] ++** str z20, \[sp, #13, mul vl\] ++** str z21, \[sp, #14, mul vl\] ++** str z22, \[sp, #15, mul vl\] ++** str z23, \[sp, #16, mul vl\] ++** blr x0 ++** ldr z8, \[sp, #1, mul vl\] ++** ldr z9, \[sp, #2, mul vl\] ++** ldr z10, \[sp, #3, mul vl\] ++** ldr z11, \[sp, #4, mul vl\] ++** ldr z12, \[sp, #5, mul vl\] ++** ldr z13, \[sp, #6, mul vl\] ++** ldr z14, \[sp, #7, mul vl\] ++** ldr z15, \[sp, #8, mul vl\] ++** ldr z16, \[sp, #9, mul vl\] ++** ldr z17, \[sp, #10, mul vl\] ++** ldr z18, \[sp, #11, mul vl\] ++** ldr z19, \[sp, #12, mul vl\] ++** ldr z20, \[sp, #13, mul vl\] ++** ldr z21, \[sp, #14, mul vl\] ++** ldr z22, \[sp, #15, mul vl\] ++** ldr z23, \[sp, #16, mul vl\] ++** ldr p4, \[sp\] ++** ldr p5, \[sp, #1, mul vl\] ++** ldr p6, \[sp, #2, mul vl\] ++** ldr p7, \[sp, #3, mul vl\] ++** ldr p8, \[sp, #4, mul vl\] ++** ldr p9, \[sp, #5, mul vl\] ++** ldr p10, \[sp, #6, mul vl\] ++** ldr p11, \[sp, #7, mul vl\] ++** addvl sp, sp, #17 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++void ++calls_vpcs_ptr (__SVInt8_t x, ++ void (*__attribute__((aarch64_vector_pcs)) fn) (void)) ++{ ++ fn (); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_2_le_wrap.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_2_le_wrap.c +new file mode 100644 +index 000000000..e81194c74 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_2_le_wrap.c +@@ -0,0 +1,255 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -mlittle-endian -fshrink-wrap -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ ++ ++void standard_callee (void); ++__attribute__((aarch64_vector_pcs)) void vpcs_callee (void); ++ ++/* ++** calls_standard: ++** stp x29, x30, \[sp, -16\]! ++** mov x29, sp ++** addvl sp, sp, #-17 ++** str p4, \[sp\] ++** str p5, \[sp, #1, mul vl\] ++** str p6, \[sp, #2, mul vl\] ++** str p7, \[sp, #3, mul vl\] ++** str p8, \[sp, #4, mul vl\] ++** str p9, \[sp, #5, mul vl\] ++** str p10, \[sp, #6, mul vl\] ++** str p11, \[sp, #7, mul vl\] ++** str z8, \[sp, #1, mul vl\] ++** str z9, \[sp, #2, mul vl\] ++** str z10, \[sp, #3, mul vl\] ++** str z11, \[sp, #4, mul vl\] ++** str z12, \[sp, #5, mul vl\] ++** str z13, \[sp, #6, mul vl\] ++** str z14, \[sp, #7, mul vl\] ++** str z15, \[sp, #8, mul vl\] ++** str z16, \[sp, #9, mul vl\] ++** str z17, \[sp, #10, mul vl\] ++** str z18, \[sp, #11, mul vl\] ++** str z19, \[sp, #12, mul vl\] ++** str z20, \[sp, #13, mul vl\] ++** str z21, \[sp, #14, mul vl\] ++** str z22, \[sp, #15, mul vl\] ++** str z23, \[sp, #16, mul vl\] ++** bl standard_callee ++** ldr z8, \[sp, #1, mul vl\] ++** ldr z9, \[sp, #2, mul vl\] ++** ldr z10, \[sp, #3, mul vl\] ++** ldr z11, \[sp, #4, mul vl\] ++** ldr z12, \[sp, #5, mul vl\] ++** ldr z13, \[sp, #6, mul vl\] ++** ldr z14, \[sp, #7, mul vl\] ++** ldr z15, \[sp, #8, mul vl\] ++** ldr z16, \[sp, #9, mul vl\] ++** ldr z17, \[sp, #10, mul vl\] ++** ldr z18, \[sp, #11, mul vl\] ++** ldr z19, \[sp, #12, mul vl\] ++** ldr z20, \[sp, #13, mul vl\] ++** ldr z21, \[sp, #14, mul vl\] ++** ldr z22, \[sp, #15, mul vl\] ++** ldr z23, \[sp, #16, mul vl\] ++** ldr p4, \[sp\] ++** ldr p5, \[sp, #1, mul vl\] ++** ldr p6, \[sp, #2, mul vl\] ++** ldr p7, \[sp, #3, mul vl\] ++** ldr p8, \[sp, #4, mul vl\] ++** ldr p9, \[sp, #5, mul vl\] ++** ldr p10, \[sp, #6, mul vl\] ++** ldr p11, \[sp, #7, mul vl\] ++** addvl sp, sp, #17 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++void calls_standard (__SVInt8_t x) { standard_callee (); } ++ ++/* ++** calls_vpcs: ++** stp x29, x30, \[sp, -16\]! ++** mov x29, sp ++** addvl sp, sp, #-17 ++** str p4, \[sp\] ++** str p5, \[sp, #1, mul vl\] ++** str p6, \[sp, #2, mul vl\] ++** str p7, \[sp, #3, mul vl\] ++** str p8, \[sp, #4, mul vl\] ++** str p9, \[sp, #5, mul vl\] ++** str p10, \[sp, #6, mul vl\] ++** str p11, \[sp, #7, mul vl\] ++** str z8, \[sp, #1, mul vl\] ++** str z9, \[sp, #2, mul vl\] ++** str z10, \[sp, #3, mul vl\] ++** str z11, \[sp, #4, mul vl\] ++** str z12, \[sp, #5, mul vl\] ++** str z13, \[sp, #6, mul vl\] ++** str z14, \[sp, #7, mul vl\] ++** str z15, \[sp, #8, mul vl\] ++** str z16, \[sp, #9, mul vl\] ++** str z17, \[sp, #10, mul vl\] ++** str z18, \[sp, #11, mul vl\] ++** str z19, \[sp, #12, mul vl\] ++** str z20, \[sp, #13, mul vl\] ++** str z21, \[sp, #14, mul vl\] ++** str z22, \[sp, #15, mul vl\] ++** str z23, \[sp, #16, mul vl\] ++** bl vpcs_callee ++** ldr z8, \[sp, #1, mul vl\] ++** ldr z9, \[sp, #2, mul vl\] ++** ldr z10, \[sp, #3, mul vl\] ++** ldr z11, \[sp, #4, mul vl\] ++** ldr z12, \[sp, #5, mul vl\] ++** ldr z13, \[sp, #6, mul vl\] ++** ldr z14, \[sp, #7, mul vl\] ++** ldr z15, \[sp, #8, mul vl\] ++** ldr z16, \[sp, #9, mul vl\] ++** ldr z17, \[sp, #10, mul vl\] ++** ldr z18, \[sp, #11, mul vl\] ++** ldr z19, \[sp, #12, mul vl\] ++** ldr z20, \[sp, #13, mul vl\] ++** ldr z21, \[sp, #14, mul vl\] ++** ldr z22, \[sp, #15, mul vl\] ++** ldr z23, \[sp, #16, mul vl\] ++** ldr p4, \[sp\] ++** ldr p5, \[sp, #1, mul vl\] ++** ldr p6, \[sp, #2, mul vl\] ++** ldr p7, \[sp, #3, mul vl\] ++** ldr p8, \[sp, #4, mul vl\] ++** ldr p9, \[sp, #5, mul vl\] ++** ldr p10, \[sp, #6, mul vl\] ++** ldr p11, \[sp, #7, mul vl\] ++** addvl sp, sp, #17 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++void calls_vpcs (__SVInt8_t x) { vpcs_callee (); } ++ ++/* ++** calls_standard_ptr: ++** stp x29, x30, \[sp, -16\]! ++** mov x29, sp ++** addvl sp, sp, #-17 ++** str p4, \[sp\] ++** str p5, \[sp, #1, mul vl\] ++** str p6, \[sp, #2, mul vl\] ++** str p7, \[sp, #3, mul vl\] ++** str p8, \[sp, #4, mul vl\] ++** str p9, \[sp, #5, mul vl\] ++** str p10, \[sp, #6, mul vl\] ++** str p11, \[sp, #7, mul vl\] ++** str z8, \[sp, #1, mul vl\] ++** str z9, \[sp, #2, mul vl\] ++** str z10, \[sp, #3, mul vl\] ++** str z11, \[sp, #4, mul vl\] ++** str z12, \[sp, #5, mul vl\] ++** str z13, \[sp, #6, mul vl\] ++** str z14, \[sp, #7, mul vl\] ++** str z15, \[sp, #8, mul vl\] ++** str z16, \[sp, #9, mul vl\] ++** str z17, \[sp, #10, mul vl\] ++** str z18, \[sp, #11, mul vl\] ++** str z19, \[sp, #12, mul vl\] ++** str z20, \[sp, #13, mul vl\] ++** str z21, \[sp, #14, mul vl\] ++** str z22, \[sp, #15, mul vl\] ++** str z23, \[sp, #16, mul vl\] ++** blr x0 ++** ldr z8, \[sp, #1, mul vl\] ++** ldr z9, \[sp, #2, mul vl\] ++** ldr z10, \[sp, #3, mul vl\] ++** ldr z11, \[sp, #4, mul vl\] ++** ldr z12, \[sp, #5, mul vl\] ++** ldr z13, \[sp, #6, mul vl\] ++** ldr z14, \[sp, #7, mul vl\] ++** ldr z15, \[sp, #8, mul vl\] ++** ldr z16, \[sp, #9, mul vl\] ++** ldr z17, \[sp, #10, mul vl\] ++** ldr z18, \[sp, #11, mul vl\] ++** ldr z19, \[sp, #12, mul vl\] ++** ldr z20, \[sp, #13, mul vl\] ++** ldr z21, \[sp, #14, mul vl\] ++** ldr z22, \[sp, #15, mul vl\] ++** ldr z23, \[sp, #16, mul vl\] ++** ldr p4, \[sp\] ++** ldr p5, \[sp, #1, mul vl\] ++** ldr p6, \[sp, #2, mul vl\] ++** ldr p7, \[sp, #3, mul vl\] ++** ldr p8, \[sp, #4, mul vl\] ++** ldr p9, \[sp, #5, mul vl\] ++** ldr p10, \[sp, #6, mul vl\] ++** ldr p11, \[sp, #7, mul vl\] ++** addvl sp, sp, #17 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++void ++calls_standard_ptr (__SVInt8_t x, void (*fn) (void)) ++{ ++ fn (); ++} ++ ++/* ++** calls_vpcs_ptr: ++** stp x29, x30, \[sp, -16\]! ++** mov x29, sp ++** addvl sp, sp, #-17 ++** str p4, \[sp\] ++** str p5, \[sp, #1, mul vl\] ++** str p6, \[sp, #2, mul vl\] ++** str p7, \[sp, #3, mul vl\] ++** str p8, \[sp, #4, mul vl\] ++** str p9, \[sp, #5, mul vl\] ++** str p10, \[sp, #6, mul vl\] ++** str p11, \[sp, #7, mul vl\] ++** str z8, \[sp, #1, mul vl\] ++** str z9, \[sp, #2, mul vl\] ++** str z10, \[sp, #3, mul vl\] ++** str z11, \[sp, #4, mul vl\] ++** str z12, \[sp, #5, mul vl\] ++** str z13, \[sp, #6, mul vl\] ++** str z14, \[sp, #7, mul vl\] ++** str z15, \[sp, #8, mul vl\] ++** str z16, \[sp, #9, mul vl\] ++** str z17, \[sp, #10, mul vl\] ++** str z18, \[sp, #11, mul vl\] ++** str z19, \[sp, #12, mul vl\] ++** str z20, \[sp, #13, mul vl\] ++** str z21, \[sp, #14, mul vl\] ++** str z22, \[sp, #15, mul vl\] ++** str z23, \[sp, #16, mul vl\] ++** blr x0 ++** ldr z8, \[sp, #1, mul vl\] ++** ldr z9, \[sp, #2, mul vl\] ++** ldr z10, \[sp, #3, mul vl\] ++** ldr z11, \[sp, #4, mul vl\] ++** ldr z12, \[sp, #5, mul vl\] ++** ldr z13, \[sp, #6, mul vl\] ++** ldr z14, \[sp, #7, mul vl\] ++** ldr z15, \[sp, #8, mul vl\] ++** ldr z16, \[sp, #9, mul vl\] ++** ldr z17, \[sp, #10, mul vl\] ++** ldr z18, \[sp, #11, mul vl\] ++** ldr z19, \[sp, #12, mul vl\] ++** ldr z20, \[sp, #13, mul vl\] ++** ldr z21, \[sp, #14, mul vl\] ++** ldr z22, \[sp, #15, mul vl\] ++** ldr z23, \[sp, #16, mul vl\] ++** ldr p4, \[sp\] ++** ldr p5, \[sp, #1, mul vl\] ++** ldr p6, \[sp, #2, mul vl\] ++** ldr p7, \[sp, #3, mul vl\] ++** ldr p8, \[sp, #4, mul vl\] ++** ldr p9, \[sp, #5, mul vl\] ++** ldr p10, \[sp, #6, mul vl\] ++** ldr p11, \[sp, #7, mul vl\] ++** addvl sp, sp, #17 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++void ++calls_vpcs_ptr (__SVInt8_t x, ++ void (*__attribute__((aarch64_vector_pcs)) fn) (void)) ++{ ++ fn (); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_3.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_3.c +new file mode 100644 +index 000000000..1fe86b0ea +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_3.c +@@ -0,0 +1,92 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -g" } */ ++/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ ++ ++#include ++ ++int sve_callee (svint8_t); ++ ++/* ++** standard_caller: ++** stp x29, x30, \[sp, -16\]! ++** mov x29, sp ++** mov z0\.b, #1 ++** bl sve_callee ++** add w0, w0, #?1 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++int standard_caller (void) { return sve_callee (svdup_s8 (1)) + 1; } ++ ++/* ++** vpcs_caller: ++** stp x29, x30, \[sp, -16\]! ++** mov x29, sp ++** mov z0\.b, #1 ++** bl sve_callee ++** add w0, w0, #?1 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++__attribute__((aarch64_vector_pcs)) ++int vpcs_caller (void) { return sve_callee (svdup_s8 (1)) + 1; } ++ ++/* ++** sve_caller: ++** stp x29, x30, \[sp, -16\]! ++** mov x29, sp ++** mov z0\.b, #1 ++** bl sve_callee ++** add w0, w0, #?1 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++int sve_caller (svbool_t p0) { return sve_callee (svdup_s8 (1)) + 1; } ++ ++/* ++** standard_caller_ptr: ++** stp x29, x30, \[sp, -16\]! ++** mov x29, sp ++** mov z0\.h, #1 ++** blr x0 ++** add w0, w0, #?1 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++int ++standard_caller_ptr (int (*fn) (__SVInt16_t)) ++{ ++ return fn (svdup_s16 (1)) + 1; ++} ++ ++/* ++** vpcs_caller_ptr: ++** stp x29, x30, \[sp, -16\]! ++** mov x29, sp ++** mov z0\.h, #1 ++** blr x0 ++** add w0, w0, #?1 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++int __attribute__((aarch64_vector_pcs)) ++vpcs_caller_ptr (int (*fn) (__SVInt16_t)) ++{ ++ return fn (svdup_s16 (1)) + 1; ++} ++ ++/* ++** sve_caller_ptr: ++** stp x29, x30, \[sp, -16\]! ++** mov x29, sp ++** mov z0\.h, #1 ++** blr x0 ++** add w0, w0, #?1 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++int ++sve_caller_ptr (svbool_t pg, int (*fn) (svint16_t)) ++{ ++ return fn (svdup_s16 (1)) + 1; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_4_be.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_4_be.c +new file mode 100644 +index 000000000..c42699dc7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_4_be.c +@@ -0,0 +1,84 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ ++ ++void standard_callee (__SVInt8_t *); ++ ++/* ++** calls_standard: ++** addvl sp, sp, #-1 ++** ( ++** stp x29, x30, \[sp, -16\]! ++** | ++** sub sp, sp, #?16 ++** stp x29, x30, \[sp\] ++** ) ++** mov x29, sp ++** addvl sp, sp, #-17 ++** str p4, \[sp\] ++** str p5, \[sp, #1, mul vl\] ++** str p6, \[sp, #2, mul vl\] ++** str p7, \[sp, #3, mul vl\] ++** str p8, \[sp, #4, mul vl\] ++** str p9, \[sp, #5, mul vl\] ++** str p10, \[sp, #6, mul vl\] ++** str p11, \[sp, #7, mul vl\] ++** ptrue p0\.b, all ++** st1d z8\.d, p0, \[sp, #1, mul vl\] ++** st1d z9\.d, p0, \[sp, #2, mul vl\] ++** st1d z10\.d, p0, \[sp, #3, mul vl\] ++** st1d z11\.d, p0, \[sp, #4, mul vl\] ++** st1d z12\.d, p0, \[sp, #5, mul vl\] ++** st1d z13\.d, p0, \[sp, #6, mul vl\] ++** st1d z14\.d, p0, \[sp, #7, mul vl\] ++** addvl x11, sp, #16 ++** st1d z15\.d, p0, \[x11, #-8, mul vl\] ++** str z16, \[sp, #9, mul vl\] ++** str z17, \[sp, #10, mul vl\] ++** str z18, \[sp, #11, mul vl\] ++** str z19, \[sp, #12, mul vl\] ++** str z20, \[sp, #13, mul vl\] ++** str z21, \[sp, #14, mul vl\] ++** str z22, \[sp, #15, mul vl\] ++** str z23, \[sp, #16, mul vl\] ++** addvl x0, sp, #17 ++** add x0, x0, #?16 ++** bl standard_callee ++** ptrue p0\.b, all ++** ld1d z8\.d, p0/z, \[sp, #1, mul vl\] ++** ld1d z9\.d, p0/z, \[sp, #2, mul vl\] ++** ld1d z10\.d, p0/z, \[sp, #3, mul vl\] ++** ld1d z11\.d, p0/z, \[sp, #4, mul vl\] ++** ld1d z12\.d, p0/z, \[sp, #5, mul vl\] ++** ld1d z13\.d, p0/z, \[sp, #6, mul vl\] ++** ld1d z14\.d, p0/z, \[sp, #7, mul vl\] ++** addvl x11, sp, #16 ++** ld1d z15\.d, p0/z, \[x11, #-8, mul vl\] ++** ldr z16, \[sp, #9, mul vl\] ++** ldr z17, \[sp, #10, mul vl\] ++** ldr z18, \[sp, #11, mul vl\] ++** ldr z19, \[sp, #12, mul vl\] ++** ldr z20, \[sp, #13, mul vl\] ++** ldr z21, \[sp, #14, mul vl\] ++** ldr z22, \[sp, #15, mul vl\] ++** ldr z23, \[sp, #16, mul vl\] ++** ldr p4, \[sp\] ++** ldr p5, \[sp, #1, mul vl\] ++** ldr p6, \[sp, #2, mul vl\] ++** ldr p7, \[sp, #3, mul vl\] ++** ldr p8, \[sp, #4, mul vl\] ++** ldr p9, \[sp, #5, mul vl\] ++** ldr p10, \[sp, #6, mul vl\] ++** ldr p11, \[sp, #7, mul vl\] ++** addvl sp, sp, #17 ++** ( ++** ldp x29, x30, \[sp\], 16 ++** addvl sp, sp, #1 ++** | ++** ldp x29, x30, \[sp\] ++** addvl sp, sp, #1 ++** add sp, sp, #?16 ++** ) ++** ret ++*/ ++void calls_standard (__SVInt8_t x) { __SVInt8_t tmp; standard_callee (&tmp); } +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_4_le.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_4_le.c +new file mode 100644 +index 000000000..49fe96800 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_4_le.c +@@ -0,0 +1,80 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ ++ ++void standard_callee (__SVInt8_t *); ++ ++/* ++** calls_standard: ++** addvl sp, sp, #-1 ++** ( ++** stp x29, x30, \[sp, -16\]! ++** | ++** sub sp, sp, #?16 ++** stp x29, x30, \[sp\] ++** ) ++** mov x29, sp ++** addvl sp, sp, #-17 ++** str p4, \[sp\] ++** str p5, \[sp, #1, mul vl\] ++** str p6, \[sp, #2, mul vl\] ++** str p7, \[sp, #3, mul vl\] ++** str p8, \[sp, #4, mul vl\] ++** str p9, \[sp, #5, mul vl\] ++** str p10, \[sp, #6, mul vl\] ++** str p11, \[sp, #7, mul vl\] ++** str z8, \[sp, #1, mul vl\] ++** str z9, \[sp, #2, mul vl\] ++** str z10, \[sp, #3, mul vl\] ++** str z11, \[sp, #4, mul vl\] ++** str z12, \[sp, #5, mul vl\] ++** str z13, \[sp, #6, mul vl\] ++** str z14, \[sp, #7, mul vl\] ++** str z15, \[sp, #8, mul vl\] ++** str z16, \[sp, #9, mul vl\] ++** str z17, \[sp, #10, mul vl\] ++** str z18, \[sp, #11, mul vl\] ++** str z19, \[sp, #12, mul vl\] ++** str z20, \[sp, #13, mul vl\] ++** str z21, \[sp, #14, mul vl\] ++** str z22, \[sp, #15, mul vl\] ++** str z23, \[sp, #16, mul vl\] ++** addvl x0, sp, #17 ++** add x0, x0, #?16 ++** bl standard_callee ++** ldr z8, \[sp, #1, mul vl\] ++** ldr z9, \[sp, #2, mul vl\] ++** ldr z10, \[sp, #3, mul vl\] ++** ldr z11, \[sp, #4, mul vl\] ++** ldr z12, \[sp, #5, mul vl\] ++** ldr z13, \[sp, #6, mul vl\] ++** ldr z14, \[sp, #7, mul vl\] ++** ldr z15, \[sp, #8, mul vl\] ++** ldr z16, \[sp, #9, mul vl\] ++** ldr z17, \[sp, #10, mul vl\] ++** ldr z18, \[sp, #11, mul vl\] ++** ldr z19, \[sp, #12, mul vl\] ++** ldr z20, \[sp, #13, mul vl\] ++** ldr z21, \[sp, #14, mul vl\] ++** ldr z22, \[sp, #15, mul vl\] ++** ldr z23, \[sp, #16, mul vl\] ++** ldr p4, \[sp\] ++** ldr p5, \[sp, #1, mul vl\] ++** ldr p6, \[sp, #2, mul vl\] ++** ldr p7, \[sp, #3, mul vl\] ++** ldr p8, \[sp, #4, mul vl\] ++** ldr p9, \[sp, #5, mul vl\] ++** ldr p10, \[sp, #6, mul vl\] ++** ldr p11, \[sp, #7, mul vl\] ++** addvl sp, sp, #17 ++** ( ++** ldp x29, x30, \[sp\], 16 ++** addvl sp, sp, #1 ++** | ++** ldp x29, x30, \[sp\] ++** addvl sp, sp, #1 ++** add sp, sp, #?16 ++** ) ++** ret ++*/ ++void calls_standard (__SVInt8_t x) { __SVInt8_t tmp; standard_callee (&tmp); } +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_5_be.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_5_be.c +new file mode 100644 +index 000000000..dc3282eee +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_5_be.c +@@ -0,0 +1,78 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++void standard_callee (void); ++ ++/* ++** calls_standard: ++** stp x29, x30, \[sp, -16\]! ++** mov x29, sp ++** addvl sp, sp, #-17 ++** ptrue p0\.b, all ++** st1d z8\.d, p0, \[sp, #1, mul vl\] ++** st1d z9\.d, p0, \[sp, #2, mul vl\] ++** st1d z10\.d, p0, \[sp, #3, mul vl\] ++** st1d z11\.d, p0, \[sp, #4, mul vl\] ++** st1d z12\.d, p0, \[sp, #5, mul vl\] ++** st1d z13\.d, p0, \[sp, #6, mul vl\] ++** st1d z14\.d, p0, \[sp, #7, mul vl\] ++** addvl x11, sp, #16 ++** st1d z15\.d, p0, \[x11, #-8, mul vl\] ++** cbnz w0, \.L[0-9]+ ++** ptrue p0\.b, all ++** ld1d z8\.d, p0/z, \[sp, #1, mul vl\] ++** ld1d z9\.d, p0/z, \[sp, #2, mul vl\] ++** ld1d z10\.d, p0/z, \[sp, #3, mul vl\] ++** ld1d z11\.d, p0/z, \[sp, #4, mul vl\] ++** ld1d z12\.d, p0/z, \[sp, #5, mul vl\] ++** ld1d z13\.d, p0/z, \[sp, #6, mul vl\] ++** ld1d z14\.d, p0/z, \[sp, #7, mul vl\] ++** addvl x11, sp, #16 ++** ld1d z15\.d, p0/z, \[x11, #-8, mul vl\] ++** addvl sp, sp, #17 ++** ldp x29, x30, \[sp\], 16 ++** ret ++** ... ++** str z16, \[sp, #9, mul vl\] ++** str z17, \[sp, #10, mul vl\] ++** str z18, \[sp, #11, mul vl\] ++** str z19, \[sp, #12, mul vl\] ++** str z20, \[sp, #13, mul vl\] ++** str z21, \[sp, #14, mul vl\] ++** str z22, \[sp, #15, mul vl\] ++** str z23, \[sp, #16, mul vl\] ++** str p4, \[sp\] ++** str p5, \[sp, #1, mul vl\] ++** str p6, \[sp, #2, mul vl\] ++** str p7, \[sp, #3, mul vl\] ++** str p8, \[sp, #4, mul vl\] ++** str p9, \[sp, #5, mul vl\] ++** str p10, \[sp, #6, mul vl\] ++** str p11, \[sp, #7, mul vl\] ++** bl standard_callee ++** ldr z16, \[sp, #9, mul vl\] ++** ldr z17, \[sp, #10, mul vl\] ++** ldr z18, \[sp, #11, mul vl\] ++** ldr z19, \[sp, #12, mul vl\] ++** ldr z20, \[sp, #13, mul vl\] ++** ldr z21, \[sp, #14, mul vl\] ++** ldr z22, \[sp, #15, mul vl\] ++** ldr z23, \[sp, #16, mul vl\] ++** ldr p4, \[sp\] ++** ldr p5, \[sp, #1, mul vl\] ++** ldr p6, \[sp, #2, mul vl\] ++** ldr p7, \[sp, #3, mul vl\] ++** ldr p8, \[sp, #4, mul vl\] ++** ldr p9, \[sp, #5, mul vl\] ++** ldr p10, \[sp, #6, mul vl\] ++** ldr p11, \[sp, #7, mul vl\] ++** b \.L[0-9]+ ++*/ ++void ++calls_standard (__SVInt8_t x, int y) ++{ ++ asm volatile ("" ::: "z8"); ++ if (__builtin_expect (y, 0)) ++ standard_callee (); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_5_le.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_5_le.c +new file mode 100644 +index 000000000..0d29ff2fd +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_5_le.c +@@ -0,0 +1,74 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++void standard_callee (void); ++ ++/* ++** calls_standard: ++** stp x29, x30, \[sp, -16\]! ++** mov x29, sp ++** addvl sp, sp, #-17 ++** str z8, \[sp, #1, mul vl\] ++** cbnz w0, \.L[0-9]+ ++** ldr z8, \[sp, #1, mul vl\] ++** addvl sp, sp, #17 ++** ldp x29, x30, \[sp\], 16 ++** ret ++** ... ++** str z9, \[sp, #2, mul vl\] ++** str z10, \[sp, #3, mul vl\] ++** str z11, \[sp, #4, mul vl\] ++** str z12, \[sp, #5, mul vl\] ++** str z13, \[sp, #6, mul vl\] ++** str z14, \[sp, #7, mul vl\] ++** str z15, \[sp, #8, mul vl\] ++** str z16, \[sp, #9, mul vl\] ++** str z17, \[sp, #10, mul vl\] ++** str z18, \[sp, #11, mul vl\] ++** str z19, \[sp, #12, mul vl\] ++** str z20, \[sp, #13, mul vl\] ++** str z21, \[sp, #14, mul vl\] ++** str z22, \[sp, #15, mul vl\] ++** str z23, \[sp, #16, mul vl\] ++** str p4, \[sp\] ++** str p5, \[sp, #1, mul vl\] ++** str p6, \[sp, #2, mul vl\] ++** str p7, \[sp, #3, mul vl\] ++** str p8, \[sp, #4, mul vl\] ++** str p9, \[sp, #5, mul vl\] ++** str p10, \[sp, #6, mul vl\] ++** str p11, \[sp, #7, mul vl\] ++** bl standard_callee ++** ldr z9, \[sp, #2, mul vl\] ++** ldr z10, \[sp, #3, mul vl\] ++** ldr z11, \[sp, #4, mul vl\] ++** ldr z12, \[sp, #5, mul vl\] ++** ldr z13, \[sp, #6, mul vl\] ++** ldr z14, \[sp, #7, mul vl\] ++** ldr z15, \[sp, #8, mul vl\] ++** ldr z16, \[sp, #9, mul vl\] ++** ldr z17, \[sp, #10, mul vl\] ++** ldr z18, \[sp, #11, mul vl\] ++** ldr z19, \[sp, #12, mul vl\] ++** ldr z20, \[sp, #13, mul vl\] ++** ldr z21, \[sp, #14, mul vl\] ++** ldr z22, \[sp, #15, mul vl\] ++** ldr z23, \[sp, #16, mul vl\] ++** ldr p4, \[sp\] ++** ldr p5, \[sp, #1, mul vl\] ++** ldr p6, \[sp, #2, mul vl\] ++** ldr p7, \[sp, #3, mul vl\] ++** ldr p8, \[sp, #4, mul vl\] ++** ldr p9, \[sp, #5, mul vl\] ++** ldr p10, \[sp, #6, mul vl\] ++** ldr p11, \[sp, #7, mul vl\] ++** b \.L[0-9]+ ++*/ ++void ++calls_standard (__SVInt8_t x, int y) ++{ ++ asm volatile ("" ::: "z8"); ++ if (__builtin_expect (y, 0)) ++ standard_callee (); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1.c +new file mode 100644 +index 000000000..485d01875 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1.c +@@ -0,0 +1,204 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -mlittle-endian -fshrink-wrap -fstack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** test_1: ++** cntb x12 ++** mov x13, #?17 ++** mul x12, x12, x13 ++** mov x11, sp ++** ... ++** sub sp, sp, x12 ++** str p4, \[sp\] ++** str p5, \[sp, #1, mul vl\] ++** str p6, \[sp, #2, mul vl\] ++** str p7, \[sp, #3, mul vl\] ++** str p8, \[sp, #4, mul vl\] ++** str p9, \[sp, #5, mul vl\] ++** str p10, \[sp, #6, mul vl\] ++** str p11, \[sp, #7, mul vl\] ++** str z8, \[sp, #1, mul vl\] ++** str z9, \[sp, #2, mul vl\] ++** str z10, \[sp, #3, mul vl\] ++** str z11, \[sp, #4, mul vl\] ++** str z12, \[sp, #5, mul vl\] ++** str z13, \[sp, #6, mul vl\] ++** str z14, \[sp, #7, mul vl\] ++** str z15, \[sp, #8, mul vl\] ++** str z16, \[sp, #9, mul vl\] ++** str z17, \[sp, #10, mul vl\] ++** str z18, \[sp, #11, mul vl\] ++** str z19, \[sp, #12, mul vl\] ++** str z20, \[sp, #13, mul vl\] ++** str z21, \[sp, #14, mul vl\] ++** str z22, \[sp, #15, mul vl\] ++** str z23, \[sp, #16, mul vl\] ++** ptrue p0\.b, all ++** ldr z8, \[sp, #1, mul vl\] ++** ldr z9, \[sp, #2, mul vl\] ++** ldr z10, \[sp, #3, mul vl\] ++** ldr z11, \[sp, #4, mul vl\] ++** ldr z12, \[sp, #5, mul vl\] ++** ldr z13, \[sp, #6, mul vl\] ++** ldr z14, \[sp, #7, mul vl\] ++** ldr z15, \[sp, #8, mul vl\] ++** ldr z16, \[sp, #9, mul vl\] ++** ldr z17, \[sp, #10, mul vl\] ++** ldr z18, \[sp, #11, mul vl\] ++** ldr z19, \[sp, #12, mul vl\] ++** ldr z20, \[sp, #13, mul vl\] ++** ldr z21, \[sp, #14, mul vl\] ++** ldr z22, \[sp, #15, mul vl\] ++** ldr z23, \[sp, #16, mul vl\] ++** ldr p4, \[sp\] ++** ldr p5, \[sp, #1, mul vl\] ++** ldr p6, \[sp, #2, mul vl\] ++** ldr p7, \[sp, #3, mul vl\] ++** ldr p8, \[sp, #4, mul vl\] ++** ldr p9, \[sp, #5, mul vl\] ++** ldr p10, \[sp, #6, mul vl\] ++** ldr p11, \[sp, #7, mul vl\] ++** addvl sp, sp, #17 ++** ret ++*/ ++svbool_t ++test_1 (void) ++{ ++ asm volatile ("" ::: ++ "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", ++ "z8", "z9", "z10", "z11", "z12", "z13", "z14", "z15", ++ "z16", "z17", "z18", "z19", "z20", "z21", "z22", "z23", ++ "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31", ++ "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", ++ "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_2: ++** ptrue p0\.b, all ++** ret ++*/ ++svbool_t ++test_2 (void) ++{ ++ asm volatile ("" ::: ++ "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", ++ "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31", ++ "p0", "p1", "p2", "p3", "p12", "p13", "p14", "p15"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_3: ++** cntb x12, all, mul #6 ++** mov x11, sp ++** ... ++** sub sp, sp, x12 ++** str p5, \[sp\] ++** str p6, \[sp, #1, mul vl\] ++** str p11, \[sp, #2, mul vl\] ++** str z8, \[sp, #1, mul vl\] ++** str z13, \[sp, #2, mul vl\] ++** str z19, \[sp, #3, mul vl\] ++** str z20, \[sp, #4, mul vl\] ++** str z22, \[sp, #5, mul vl\] ++** ptrue p0\.b, all ++** ldr z8, \[sp, #1, mul vl\] ++** ldr z13, \[sp, #2, mul vl\] ++** ldr z19, \[sp, #3, mul vl\] ++** ldr z20, \[sp, #4, mul vl\] ++** ldr z22, \[sp, #5, mul vl\] ++** ldr p5, \[sp\] ++** ldr p6, \[sp, #1, mul vl\] ++** ldr p11, \[sp, #2, mul vl\] ++** addvl sp, sp, #6 ++** ret ++*/ ++svbool_t ++test_3 (void) ++{ ++ asm volatile ("" ::: ++ "z8", "z13", "z19", "z20", "z22", ++ "p5", "p6", "p11"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_4: ++** cntb x12 ++** mov x11, sp ++** ... ++** sub sp, sp, x12 ++** str p4, \[sp\] ++** ptrue p0\.b, all ++** ldr p4, \[sp\] ++** addvl sp, sp, #1 ++** ret ++*/ ++svbool_t ++test_4 (void) ++{ ++ asm volatile ("" ::: "p4"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_5: ++** cntb x12 ++** mov x11, sp ++** ... ++** sub sp, sp, x12 ++** str z15, \[sp\] ++** ptrue p0\.b, all ++** ldr z15, \[sp\] ++** addvl sp, sp, #1 ++** ret ++*/ ++svbool_t ++test_5 (void) ++{ ++ asm volatile ("" ::: "z15"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_6: ++** cntb x12 ++** mov x11, sp ++** ... ++** sub sp, sp, x12 ++** str z15, \[sp\] ++** mov z0\.b, #1 ++** ldr z15, \[sp\] ++** addvl sp, sp, #1 ++** ret ++*/ ++svint8_t ++test_6 (svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) ++{ ++ asm volatile ("" :: "Upa" (p0), "Upa" (p1), "Upa" (p2), "Upa" (p3) : "z15"); ++ return svdup_s8 (1); ++} ++ ++/* ++** test_7: ++** cntb x12 ++** mov x11, sp ++** ... ++** sub sp, sp, x12 ++** str z16, \[sp\] ++** ptrue p0\.b, all ++** ldr z16, \[sp\] ++** addvl sp, sp, #1 ++** ret ++*/ ++svbool_t ++test_7 (void) ++{ ++ asm volatile ("" ::: "z16"); ++ return svptrue_b8 (); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1_1024.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1_1024.c +new file mode 100644 +index 000000000..087e8db9e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1_1024.c +@@ -0,0 +1,184 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -mlittle-endian -fshrink-wrap -fstack-clash-protection -msve-vector-bits=1024 -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** test_1: ++** sub sp, sp, #2176 ++** str p4, \[sp\] ++** str p5, \[sp, #1, mul vl\] ++** str p6, \[sp, #2, mul vl\] ++** str p7, \[sp, #3, mul vl\] ++** str p8, \[sp, #4, mul vl\] ++** str p9, \[sp, #5, mul vl\] ++** str p10, \[sp, #6, mul vl\] ++** str p11, \[sp, #7, mul vl\] ++** str z8, \[sp, #1, mul vl\] ++** str z9, \[sp, #2, mul vl\] ++** str z10, \[sp, #3, mul vl\] ++** str z11, \[sp, #4, mul vl\] ++** str z12, \[sp, #5, mul vl\] ++** str z13, \[sp, #6, mul vl\] ++** str z14, \[sp, #7, mul vl\] ++** str z15, \[sp, #8, mul vl\] ++** str z16, \[sp, #9, mul vl\] ++** str z17, \[sp, #10, mul vl\] ++** str z18, \[sp, #11, mul vl\] ++** str z19, \[sp, #12, mul vl\] ++** str z20, \[sp, #13, mul vl\] ++** str z21, \[sp, #14, mul vl\] ++** str z22, \[sp, #15, mul vl\] ++** str z23, \[sp, #16, mul vl\] ++** ptrue p0\.b, vl128 ++** ldr z8, \[sp, #1, mul vl\] ++** ldr z9, \[sp, #2, mul vl\] ++** ldr z10, \[sp, #3, mul vl\] ++** ldr z11, \[sp, #4, mul vl\] ++** ldr z12, \[sp, #5, mul vl\] ++** ldr z13, \[sp, #6, mul vl\] ++** ldr z14, \[sp, #7, mul vl\] ++** ldr z15, \[sp, #8, mul vl\] ++** ldr z16, \[sp, #9, mul vl\] ++** ldr z17, \[sp, #10, mul vl\] ++** ldr z18, \[sp, #11, mul vl\] ++** ldr z19, \[sp, #12, mul vl\] ++** ldr z20, \[sp, #13, mul vl\] ++** ldr z21, \[sp, #14, mul vl\] ++** ldr z22, \[sp, #15, mul vl\] ++** ldr z23, \[sp, #16, mul vl\] ++** ldr p4, \[sp\] ++** ldr p5, \[sp, #1, mul vl\] ++** ldr p6, \[sp, #2, mul vl\] ++** ldr p7, \[sp, #3, mul vl\] ++** ldr p8, \[sp, #4, mul vl\] ++** ldr p9, \[sp, #5, mul vl\] ++** ldr p10, \[sp, #6, mul vl\] ++** ldr p11, \[sp, #7, mul vl\] ++** add sp, sp, #?2176 ++** ret ++*/ ++svbool_t ++test_1 (void) ++{ ++ asm volatile ("" ::: ++ "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", ++ "z8", "z9", "z10", "z11", "z12", "z13", "z14", "z15", ++ "z16", "z17", "z18", "z19", "z20", "z21", "z22", "z23", ++ "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31", ++ "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", ++ "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_2: ++** ptrue p0\.b, vl128 ++** ret ++*/ ++svbool_t ++test_2 (void) ++{ ++ asm volatile ("" ::: ++ "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", ++ "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31", ++ "p0", "p1", "p2", "p3", "p12", "p13", "p14", "p15"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_3: ++** sub sp, sp, #768 ++** str p5, \[sp\] ++** str p6, \[sp, #1, mul vl\] ++** str p11, \[sp, #2, mul vl\] ++** str z8, \[sp, #1, mul vl\] ++** str z13, \[sp, #2, mul vl\] ++** str z19, \[sp, #3, mul vl\] ++** str z20, \[sp, #4, mul vl\] ++** str z22, \[sp, #5, mul vl\] ++** ptrue p0\.b, vl128 ++** ldr z8, \[sp, #1, mul vl\] ++** ldr z13, \[sp, #2, mul vl\] ++** ldr z19, \[sp, #3, mul vl\] ++** ldr z20, \[sp, #4, mul vl\] ++** ldr z22, \[sp, #5, mul vl\] ++** ldr p5, \[sp\] ++** ldr p6, \[sp, #1, mul vl\] ++** ldr p11, \[sp, #2, mul vl\] ++** add sp, sp, #?768 ++** ret ++*/ ++svbool_t ++test_3 (void) ++{ ++ asm volatile ("" ::: ++ "z8", "z13", "z19", "z20", "z22", ++ "p5", "p6", "p11"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_4: ++** sub sp, sp, #128 ++** str p4, \[sp\] ++** ptrue p0\.b, vl128 ++** ldr p4, \[sp\] ++** add sp, sp, #?128 ++** ret ++*/ ++svbool_t ++test_4 (void) ++{ ++ asm volatile ("" ::: "p4"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_5: ++** sub sp, sp, #128 ++** str z15, \[sp\] ++** ptrue p0\.b, vl128 ++** ldr z15, \[sp\] ++** add sp, sp, #?128 ++** ret ++*/ ++svbool_t ++test_5 (void) ++{ ++ asm volatile ("" ::: "z15"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_6: ++** sub sp, sp, #128 ++** str z15, \[sp\] ++** mov z0\.b, #1 ++** ldr z15, \[sp\] ++** add sp, sp, #?128 ++** ret ++*/ ++svint8_t ++test_6 (svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) ++{ ++ asm volatile ("" :: "Upa" (p0), "Upa" (p1), "Upa" (p2), "Upa" (p3) : "z15"); ++ return svdup_s8 (1); ++} ++ ++/* ++** test_7: ++** sub sp, sp, #128 ++** str z16, \[sp\] ++** ptrue p0\.b, vl128 ++** ldr z16, \[sp\] ++** add sp, sp, #?128 ++** ret ++*/ ++svbool_t ++test_7 (void) ++{ ++ asm volatile ("" ::: "z16"); ++ return svptrue_b8 (); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1_2048.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1_2048.c +new file mode 100644 +index 000000000..e8dc5d5e4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1_2048.c +@@ -0,0 +1,185 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -mlittle-endian -fshrink-wrap -fstack-clash-protection -msve-vector-bits=2048 -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** test_1: ++** mov x12, #?4352 ++** sub sp, sp, x12 ++** str p4, \[sp\] ++** str p5, \[sp, #1, mul vl\] ++** str p6, \[sp, #2, mul vl\] ++** str p7, \[sp, #3, mul vl\] ++** str p8, \[sp, #4, mul vl\] ++** str p9, \[sp, #5, mul vl\] ++** str p10, \[sp, #6, mul vl\] ++** str p11, \[sp, #7, mul vl\] ++** str z8, \[sp, #1, mul vl\] ++** str z9, \[sp, #2, mul vl\] ++** str z10, \[sp, #3, mul vl\] ++** str z11, \[sp, #4, mul vl\] ++** str z12, \[sp, #5, mul vl\] ++** str z13, \[sp, #6, mul vl\] ++** str z14, \[sp, #7, mul vl\] ++** str z15, \[sp, #8, mul vl\] ++** str z16, \[sp, #9, mul vl\] ++** str z17, \[sp, #10, mul vl\] ++** str z18, \[sp, #11, mul vl\] ++** str z19, \[sp, #12, mul vl\] ++** str z20, \[sp, #13, mul vl\] ++** str z21, \[sp, #14, mul vl\] ++** str z22, \[sp, #15, mul vl\] ++** str z23, \[sp, #16, mul vl\] ++** ptrue p0\.b, vl256 ++** ldr z8, \[sp, #1, mul vl\] ++** ldr z9, \[sp, #2, mul vl\] ++** ldr z10, \[sp, #3, mul vl\] ++** ldr z11, \[sp, #4, mul vl\] ++** ldr z12, \[sp, #5, mul vl\] ++** ldr z13, \[sp, #6, mul vl\] ++** ldr z14, \[sp, #7, mul vl\] ++** ldr z15, \[sp, #8, mul vl\] ++** ldr z16, \[sp, #9, mul vl\] ++** ldr z17, \[sp, #10, mul vl\] ++** ldr z18, \[sp, #11, mul vl\] ++** ldr z19, \[sp, #12, mul vl\] ++** ldr z20, \[sp, #13, mul vl\] ++** ldr z21, \[sp, #14, mul vl\] ++** ldr z22, \[sp, #15, mul vl\] ++** ldr z23, \[sp, #16, mul vl\] ++** ldr p4, \[sp\] ++** ldr p5, \[sp, #1, mul vl\] ++** ldr p6, \[sp, #2, mul vl\] ++** ldr p7, \[sp, #3, mul vl\] ++** ldr p8, \[sp, #4, mul vl\] ++** ldr p9, \[sp, #5, mul vl\] ++** ldr p10, \[sp, #6, mul vl\] ++** ldr p11, \[sp, #7, mul vl\] ++** add sp, sp, x12 ++** ret ++*/ ++svbool_t ++test_1 (void) ++{ ++ asm volatile ("" ::: ++ "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", ++ "z8", "z9", "z10", "z11", "z12", "z13", "z14", "z15", ++ "z16", "z17", "z18", "z19", "z20", "z21", "z22", "z23", ++ "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31", ++ "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", ++ "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_2: ++** ptrue p0\.b, vl256 ++** ret ++*/ ++svbool_t ++test_2 (void) ++{ ++ asm volatile ("" ::: ++ "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", ++ "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31", ++ "p0", "p1", "p2", "p3", "p12", "p13", "p14", "p15"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_3: ++** sub sp, sp, #1536 ++** str p5, \[sp\] ++** str p6, \[sp, #1, mul vl\] ++** str p11, \[sp, #2, mul vl\] ++** str z8, \[sp, #1, mul vl\] ++** str z13, \[sp, #2, mul vl\] ++** str z19, \[sp, #3, mul vl\] ++** str z20, \[sp, #4, mul vl\] ++** str z22, \[sp, #5, mul vl\] ++** ptrue p0\.b, vl256 ++** ldr z8, \[sp, #1, mul vl\] ++** ldr z13, \[sp, #2, mul vl\] ++** ldr z19, \[sp, #3, mul vl\] ++** ldr z20, \[sp, #4, mul vl\] ++** ldr z22, \[sp, #5, mul vl\] ++** ldr p5, \[sp\] ++** ldr p6, \[sp, #1, mul vl\] ++** ldr p11, \[sp, #2, mul vl\] ++** add sp, sp, #?1536 ++** ret ++*/ ++svbool_t ++test_3 (void) ++{ ++ asm volatile ("" ::: ++ "z8", "z13", "z19", "z20", "z22", ++ "p5", "p6", "p11"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_4: ++** sub sp, sp, #256 ++** str p4, \[sp\] ++** ptrue p0\.b, vl256 ++** ldr p4, \[sp\] ++** add sp, sp, #?256 ++** ret ++*/ ++svbool_t ++test_4 (void) ++{ ++ asm volatile ("" ::: "p4"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_5: ++** sub sp, sp, #256 ++** str z15, \[sp\] ++** ptrue p0\.b, vl256 ++** ldr z15, \[sp\] ++** add sp, sp, #?256 ++** ret ++*/ ++svbool_t ++test_5 (void) ++{ ++ asm volatile ("" ::: "z15"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_6: ++** sub sp, sp, #256 ++** str z15, \[sp\] ++** mov z0\.b, #1 ++** ldr z15, \[sp\] ++** add sp, sp, #?256 ++** ret ++*/ ++svint8_t ++test_6 (svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) ++{ ++ asm volatile ("" :: "Upa" (p0), "Upa" (p1), "Upa" (p2), "Upa" (p3) : "z15"); ++ return svdup_s8 (1); ++} ++ ++/* ++** test_7: ++** sub sp, sp, #256 ++** str z16, \[sp\] ++** ptrue p0\.b, vl256 ++** ldr z16, \[sp\] ++** add sp, sp, #?256 ++** ret ++*/ ++svbool_t ++test_7 (void) ++{ ++ asm volatile ("" ::: "z16"); ++ return svptrue_b8 (); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1_256.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1_256.c +new file mode 100644 +index 000000000..73c49e4d4 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1_256.c +@@ -0,0 +1,184 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -mlittle-endian -fshrink-wrap -fstack-clash-protection -msve-vector-bits=256 -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** test_1: ++** sub sp, sp, #544 ++** str p4, \[sp\] ++** str p5, \[sp, #1, mul vl\] ++** str p6, \[sp, #2, mul vl\] ++** str p7, \[sp, #3, mul vl\] ++** str p8, \[sp, #4, mul vl\] ++** str p9, \[sp, #5, mul vl\] ++** str p10, \[sp, #6, mul vl\] ++** str p11, \[sp, #7, mul vl\] ++** str z8, \[sp, #1, mul vl\] ++** str z9, \[sp, #2, mul vl\] ++** str z10, \[sp, #3, mul vl\] ++** str z11, \[sp, #4, mul vl\] ++** str z12, \[sp, #5, mul vl\] ++** str z13, \[sp, #6, mul vl\] ++** str z14, \[sp, #7, mul vl\] ++** str z15, \[sp, #8, mul vl\] ++** str z16, \[sp, #9, mul vl\] ++** str z17, \[sp, #10, mul vl\] ++** str z18, \[sp, #11, mul vl\] ++** str z19, \[sp, #12, mul vl\] ++** str z20, \[sp, #13, mul vl\] ++** str z21, \[sp, #14, mul vl\] ++** str z22, \[sp, #15, mul vl\] ++** str z23, \[sp, #16, mul vl\] ++** ptrue p0\.b, vl32 ++** ldr z8, \[sp, #1, mul vl\] ++** ldr z9, \[sp, #2, mul vl\] ++** ldr z10, \[sp, #3, mul vl\] ++** ldr z11, \[sp, #4, mul vl\] ++** ldr z12, \[sp, #5, mul vl\] ++** ldr z13, \[sp, #6, mul vl\] ++** ldr z14, \[sp, #7, mul vl\] ++** ldr z15, \[sp, #8, mul vl\] ++** ldr z16, \[sp, #9, mul vl\] ++** ldr z17, \[sp, #10, mul vl\] ++** ldr z18, \[sp, #11, mul vl\] ++** ldr z19, \[sp, #12, mul vl\] ++** ldr z20, \[sp, #13, mul vl\] ++** ldr z21, \[sp, #14, mul vl\] ++** ldr z22, \[sp, #15, mul vl\] ++** ldr z23, \[sp, #16, mul vl\] ++** ldr p4, \[sp\] ++** ldr p5, \[sp, #1, mul vl\] ++** ldr p6, \[sp, #2, mul vl\] ++** ldr p7, \[sp, #3, mul vl\] ++** ldr p8, \[sp, #4, mul vl\] ++** ldr p9, \[sp, #5, mul vl\] ++** ldr p10, \[sp, #6, mul vl\] ++** ldr p11, \[sp, #7, mul vl\] ++** add sp, sp, #?544 ++** ret ++*/ ++svbool_t ++test_1 (void) ++{ ++ asm volatile ("" ::: ++ "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", ++ "z8", "z9", "z10", "z11", "z12", "z13", "z14", "z15", ++ "z16", "z17", "z18", "z19", "z20", "z21", "z22", "z23", ++ "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31", ++ "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", ++ "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_2: ++** ptrue p0\.b, vl32 ++** ret ++*/ ++svbool_t ++test_2 (void) ++{ ++ asm volatile ("" ::: ++ "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", ++ "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31", ++ "p0", "p1", "p2", "p3", "p12", "p13", "p14", "p15"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_3: ++** sub sp, sp, #192 ++** str p5, \[sp\] ++** str p6, \[sp, #1, mul vl\] ++** str p11, \[sp, #2, mul vl\] ++** str z8, \[sp, #1, mul vl\] ++** str z13, \[sp, #2, mul vl\] ++** str z19, \[sp, #3, mul vl\] ++** str z20, \[sp, #4, mul vl\] ++** str z22, \[sp, #5, mul vl\] ++** ptrue p0\.b, vl32 ++** ldr z8, \[sp, #1, mul vl\] ++** ldr z13, \[sp, #2, mul vl\] ++** ldr z19, \[sp, #3, mul vl\] ++** ldr z20, \[sp, #4, mul vl\] ++** ldr z22, \[sp, #5, mul vl\] ++** ldr p5, \[sp\] ++** ldr p6, \[sp, #1, mul vl\] ++** ldr p11, \[sp, #2, mul vl\] ++** add sp, sp, #?192 ++** ret ++*/ ++svbool_t ++test_3 (void) ++{ ++ asm volatile ("" ::: ++ "z8", "z13", "z19", "z20", "z22", ++ "p5", "p6", "p11"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_4: ++** sub sp, sp, #32 ++** str p4, \[sp\] ++** ptrue p0\.b, vl32 ++** ldr p4, \[sp\] ++** add sp, sp, #?32 ++** ret ++*/ ++svbool_t ++test_4 (void) ++{ ++ asm volatile ("" ::: "p4"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_5: ++** sub sp, sp, #32 ++** str z15, \[sp\] ++** ptrue p0\.b, vl32 ++** ldr z15, \[sp\] ++** add sp, sp, #?32 ++** ret ++*/ ++svbool_t ++test_5 (void) ++{ ++ asm volatile ("" ::: "z15"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_6: ++** sub sp, sp, #32 ++** str z15, \[sp\] ++** mov z0\.b, #1 ++** ldr z15, \[sp\] ++** add sp, sp, #?32 ++** ret ++*/ ++svint8_t ++test_6 (svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) ++{ ++ asm volatile ("" :: "Upa" (p0), "Upa" (p1), "Upa" (p2), "Upa" (p3) : "z15"); ++ return svdup_s8 (1); ++} ++ ++/* ++** test_7: ++** sub sp, sp, #32 ++** str z16, \[sp\] ++** ptrue p0\.b, vl32 ++** ldr z16, \[sp\] ++** add sp, sp, #?32 ++** ret ++*/ ++svbool_t ++test_7 (void) ++{ ++ asm volatile ("" ::: "z16"); ++ return svptrue_b8 (); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1_512.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1_512.c +new file mode 100644 +index 000000000..d4b524147 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1_512.c +@@ -0,0 +1,184 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -mlittle-endian -fshrink-wrap -fstack-clash-protection -msve-vector-bits=512 -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** test_1: ++** sub sp, sp, #1088 ++** str p4, \[sp\] ++** str p5, \[sp, #1, mul vl\] ++** str p6, \[sp, #2, mul vl\] ++** str p7, \[sp, #3, mul vl\] ++** str p8, \[sp, #4, mul vl\] ++** str p9, \[sp, #5, mul vl\] ++** str p10, \[sp, #6, mul vl\] ++** str p11, \[sp, #7, mul vl\] ++** str z8, \[sp, #1, mul vl\] ++** str z9, \[sp, #2, mul vl\] ++** str z10, \[sp, #3, mul vl\] ++** str z11, \[sp, #4, mul vl\] ++** str z12, \[sp, #5, mul vl\] ++** str z13, \[sp, #6, mul vl\] ++** str z14, \[sp, #7, mul vl\] ++** str z15, \[sp, #8, mul vl\] ++** str z16, \[sp, #9, mul vl\] ++** str z17, \[sp, #10, mul vl\] ++** str z18, \[sp, #11, mul vl\] ++** str z19, \[sp, #12, mul vl\] ++** str z20, \[sp, #13, mul vl\] ++** str z21, \[sp, #14, mul vl\] ++** str z22, \[sp, #15, mul vl\] ++** str z23, \[sp, #16, mul vl\] ++** ptrue p0\.b, vl64 ++** ldr z8, \[sp, #1, mul vl\] ++** ldr z9, \[sp, #2, mul vl\] ++** ldr z10, \[sp, #3, mul vl\] ++** ldr z11, \[sp, #4, mul vl\] ++** ldr z12, \[sp, #5, mul vl\] ++** ldr z13, \[sp, #6, mul vl\] ++** ldr z14, \[sp, #7, mul vl\] ++** ldr z15, \[sp, #8, mul vl\] ++** ldr z16, \[sp, #9, mul vl\] ++** ldr z17, \[sp, #10, mul vl\] ++** ldr z18, \[sp, #11, mul vl\] ++** ldr z19, \[sp, #12, mul vl\] ++** ldr z20, \[sp, #13, mul vl\] ++** ldr z21, \[sp, #14, mul vl\] ++** ldr z22, \[sp, #15, mul vl\] ++** ldr z23, \[sp, #16, mul vl\] ++** ldr p4, \[sp\] ++** ldr p5, \[sp, #1, mul vl\] ++** ldr p6, \[sp, #2, mul vl\] ++** ldr p7, \[sp, #3, mul vl\] ++** ldr p8, \[sp, #4, mul vl\] ++** ldr p9, \[sp, #5, mul vl\] ++** ldr p10, \[sp, #6, mul vl\] ++** ldr p11, \[sp, #7, mul vl\] ++** add sp, sp, #?1088 ++** ret ++*/ ++svbool_t ++test_1 (void) ++{ ++ asm volatile ("" ::: ++ "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", ++ "z8", "z9", "z10", "z11", "z12", "z13", "z14", "z15", ++ "z16", "z17", "z18", "z19", "z20", "z21", "z22", "z23", ++ "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31", ++ "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", ++ "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_2: ++** ptrue p0\.b, vl64 ++** ret ++*/ ++svbool_t ++test_2 (void) ++{ ++ asm volatile ("" ::: ++ "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", ++ "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31", ++ "p0", "p1", "p2", "p3", "p12", "p13", "p14", "p15"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_3: ++** sub sp, sp, #384 ++** str p5, \[sp\] ++** str p6, \[sp, #1, mul vl\] ++** str p11, \[sp, #2, mul vl\] ++** str z8, \[sp, #1, mul vl\] ++** str z13, \[sp, #2, mul vl\] ++** str z19, \[sp, #3, mul vl\] ++** str z20, \[sp, #4, mul vl\] ++** str z22, \[sp, #5, mul vl\] ++** ptrue p0\.b, vl64 ++** ldr z8, \[sp, #1, mul vl\] ++** ldr z13, \[sp, #2, mul vl\] ++** ldr z19, \[sp, #3, mul vl\] ++** ldr z20, \[sp, #4, mul vl\] ++** ldr z22, \[sp, #5, mul vl\] ++** ldr p5, \[sp\] ++** ldr p6, \[sp, #1, mul vl\] ++** ldr p11, \[sp, #2, mul vl\] ++** add sp, sp, #?384 ++** ret ++*/ ++svbool_t ++test_3 (void) ++{ ++ asm volatile ("" ::: ++ "z8", "z13", "z19", "z20", "z22", ++ "p5", "p6", "p11"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_4: ++** sub sp, sp, #64 ++** str p4, \[sp\] ++** ptrue p0\.b, vl64 ++** ldr p4, \[sp\] ++** add sp, sp, #?64 ++** ret ++*/ ++svbool_t ++test_4 (void) ++{ ++ asm volatile ("" ::: "p4"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_5: ++** sub sp, sp, #64 ++** str z15, \[sp\] ++** ptrue p0\.b, vl64 ++** ldr z15, \[sp\] ++** add sp, sp, #?64 ++** ret ++*/ ++svbool_t ++test_5 (void) ++{ ++ asm volatile ("" ::: "z15"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_6: ++** sub sp, sp, #64 ++** str z15, \[sp\] ++** mov z0\.b, #1 ++** ldr z15, \[sp\] ++** add sp, sp, #?64 ++** ret ++*/ ++svint8_t ++test_6 (svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) ++{ ++ asm volatile ("" :: "Upa" (p0), "Upa" (p1), "Upa" (p2), "Upa" (p3) : "z15"); ++ return svdup_s8 (1); ++} ++ ++/* ++** test_7: ++** sub sp, sp, #64 ++** str z16, \[sp\] ++** ptrue p0\.b, vl64 ++** ldr z16, \[sp\] ++** add sp, sp, #?64 ++** ret ++*/ ++svbool_t ++test_7 (void) ++{ ++ asm volatile ("" ::: "z16"); ++ return svptrue_b8 (); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2.c +new file mode 100644 +index 000000000..4622a1eed +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2.c +@@ -0,0 +1,336 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -fshrink-wrap -fstack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++svbool_t take_stack_args (volatile void *, void *, int, int, int, ++ int, int, int, int); ++ ++/* ++** test_1: ++** cntb x12 ++** add x12, x12, #?16 ++** mov x11, sp ++** ... ++** sub sp, sp, x12 ++** str p4, \[sp\] ++** ... ++** ptrue p0\.b, all ++** ldr p4, \[sp\] ++** addvl sp, sp, #1 ++** add sp, sp, #?16 ++** ret ++*/ ++svbool_t ++test_1 (void) ++{ ++ volatile int x = 1; ++ asm volatile ("" ::: "p4"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_2: ++** stp x24, x25, \[sp, -48\]! ++** str x26, \[sp, 16\] ++** cntb x13 ++** mov x11, sp ++** ... ++** sub sp, sp, x13 ++** str p4, \[sp\] ++** ... ++** ptrue p0\.b, all ++** ldr p4, \[sp\] ++** addvl sp, sp, #1 ++** ldr x26, \[sp, 16\] ++** ldp x24, x25, \[sp\], 48 ++** ret ++*/ ++svbool_t ++test_2 (void) ++{ ++ volatile int x = 1; ++ asm volatile ("" ::: "p4", "x24", "x25", "x26"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_3: ++** cntb x12 ++** mov x13, #?4128 ++** add x12, x12, x13 ++** mov x11, sp ++** ... ++** sub sp, sp, x12 ++** addvl x11, sp, #1 ++** stp x24, x25, \[x11\] ++** str x26, \[x11, 16\] ++** str p4, \[sp\] ++** ... ++** ptrue p0\.b, all ++** ldr p4, \[sp\] ++** addvl sp, sp, #1 ++** ldp x24, x25, \[sp\] ++** ldr x26, \[sp, 16\] ++** mov x12, #?4128 ++** add sp, sp, x12 ++** ret ++*/ ++svbool_t ++test_3 (void) ++{ ++ volatile int x[1024]; ++ asm volatile ("" :: "r" (x) : "p4", "x24", "x25", "x26"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_4: ++** cntb x12, all, mul #2 ++** mov x11, sp ++** ... ++** sub sp, sp, x12 ++** str p4, \[sp\] ++** ... ++** ptrue p0\.h, all ++** ldr p4, \[sp\] ++** addvl sp, sp, #2 ++** ret ++*/ ++svbool_t ++test_4 (void) ++{ ++ volatile svint32_t b; ++ b = svdup_s32 (1); ++ asm volatile ("" ::: "p4"); ++ return svptrue_b16 (); ++} ++ ++/* ++** test_5: ++** cntb x12, all, mul #2 ++** add x12, x12, #?32 ++** mov x11, sp ++** ... ++** sub sp, sp, x12 ++** addvl x11, sp, #1 ++** stp x24, x25, \[x11\] ++** str x26, \[x11, 16\] ++** str p4, \[sp\] ++** ... ++** ptrue p0\.h, all ++** ldr p4, \[sp\] ++** addvl sp, sp, #1 ++** ldp x24, x25, \[sp\] ++** ldr x26, \[sp, 16\] ++** addvl sp, sp, #1 ++** add sp, sp, #?32 ++** ret ++*/ ++svbool_t ++test_5 (void) ++{ ++ volatile svint32_t b; ++ b = svdup_s32 (1); ++ asm volatile ("" ::: "p4", "x24", "x25", "x26"); ++ return svptrue_b16 (); ++} ++ ++/* ++** test_6: ++** stp x29, x30, \[sp, -16\]! ++** mov x29, sp ++** cntb x13 ++** mov x11, sp ++** ... ++** sub sp, sp, x13 ++** str p4, \[sp\] ++** sub sp, sp, #?16 ++** ... ++** ptrue p0\.b, all ++** add sp, sp, #?16 ++** ldr p4, \[sp\] ++** addvl sp, sp, #1 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++svbool_t ++test_6 (void) ++{ ++ take_stack_args (0, 0, 1, 2, 3, 4, 5, 6, 7); ++ asm volatile ("" ::: "p4"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_7: ++** cntb x12 ++** mov x13, #?4112 ++** add x12, x12, x13 ++** mov x11, sp ++** ... ++** sub sp, sp, x12 ++** addvl x11, sp, #1 ++** stp x29, x30, \[x11\] ++** addvl x29, sp, #1 ++** str p4, \[sp\] ++** sub sp, sp, #?16 ++** ... ++** ptrue p0\.b, all ++** add sp, sp, #?16 ++** ldr p4, \[sp\] ++** addvl sp, sp, #1 ++** ldp x29, x30, \[sp\] ++** mov x12, #?4112 ++** add sp, sp, x12 ++** ret ++*/ ++svbool_t ++test_7 (void) ++{ ++ volatile int x[1024]; ++ take_stack_args (x, 0, 1, 2, 3, 4, 5, 6, 7); ++ asm volatile ("" ::: "p4"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_8: ++** cntb x12 ++** mov x13, #?4144 ++** add x12, x12, x13 ++** mov x11, sp ++** ... ++** sub sp, sp, x12 ++** addvl x11, sp, #1 ++** stp x29, x30, \[x11\] ++** addvl x29, sp, #1 ++** stp x24, x25, \[x29, 16\] ++** str x26, \[x29, 32\] ++** str p4, \[sp\] ++** sub sp, sp, #?16 ++** ... ++** ptrue p0\.b, all ++** add sp, sp, #?16 ++** ldr p4, \[sp\] ++** addvl sp, sp, #1 ++** ldp x24, x25, \[sp, 16\] ++** ldr x26, \[sp, 32\] ++** ldp x29, x30, \[sp\] ++** mov x12, #?4144 ++** add sp, sp, x12 ++** ret ++*/ ++svbool_t ++test_8 (void) ++{ ++ volatile int x[1024]; ++ take_stack_args (x, 0, 1, 2, 3, 4, 5, 6, 7); ++ asm volatile ("" ::: "p4", "x24", "x25", "x26"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_9: ++** cntb x12 ++** mov x13, #?4112 ++** add x12, x12, x13 ++** mov x11, sp ++** ... ++** sub sp, sp, x12 ++** addvl x11, sp, #1 ++** stp x29, x30, \[x11\] ++** addvl x29, sp, #1 ++** str p4, \[sp\] ++** sub sp, sp, #?16 ++** ... ++** ptrue p0\.b, all ++** addvl sp, x29, #-1 ++** ldr p4, \[sp\] ++** addvl sp, sp, #1 ++** ldp x29, x30, \[sp\] ++** mov x12, #?4112 ++** add sp, sp, x12 ++** ret ++*/ ++svbool_t ++test_9 (int n) ++{ ++ volatile int x[1024]; ++ take_stack_args (x, __builtin_alloca (n), 1, 2, 3, 4, 5, 6, 7); ++ asm volatile ("" ::: "p4"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_10: ++** cntb x12 ++** mov x13, #?4144 ++** add x12, x12, x13 ++** mov x11, sp ++** ... ++** sub sp, sp, x12 ++** addvl x11, sp, #1 ++** stp x29, x30, \[x11\] ++** addvl x29, sp, #1 ++** stp x24, x25, \[x29, 16\] ++** str x26, \[x29, 32\] ++** str p4, \[sp\] ++** sub sp, sp, #?16 ++** ... ++** ptrue p0\.b, all ++** addvl sp, x29, #-1 ++** ldr p4, \[sp\] ++** addvl sp, sp, #1 ++** ldp x24, x25, \[sp, 16\] ++** ldr x26, \[sp, 32\] ++** ldp x29, x30, \[sp\] ++** mov x12, #?4144 ++** add sp, sp, x12 ++** ret ++*/ ++svbool_t ++test_10 (int n) ++{ ++ volatile int x[1024]; ++ take_stack_args (x, __builtin_alloca (n), 1, 2, 3, 4, 5, 6, 7); ++ asm volatile ("" ::: "p4", "x24", "x25", "x26"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_11: ++** cntb x12 ++** add x12, x12, #?3008 ++** add x12, x12, #?126976 ++** mov x11, sp ++** ... ++** sub sp, sp, x12 ++** addvl x11, sp, #1 ++** stp x29, x30, \[x11\] ++** addvl x29, sp, #1 ++** stp x24, x25, \[x29, 16\] ++** str x26, \[x29, 32\] ++** str p4, \[sp\] ++** sub sp, sp, #?16 ++** ... ++** ptrue p0\.b, all ++** addvl sp, x29, #-1 ++** ldr p4, \[sp\] ++** addvl sp, sp, #1 ++** ldp x24, x25, \[sp, 16\] ++** ldr x26, \[sp, 32\] ++** ldp x29, x30, \[sp\] ++** add sp, sp, #?3008 ++** add sp, sp, #?126976 ++** ret ++*/ ++svbool_t ++test_11 (int n) ++{ ++ volatile int x[0x7ee4]; ++ take_stack_args (x, __builtin_alloca (n), 1, 2, 3, 4, 5, 6, 7); ++ asm volatile ("" ::: "p4", "x24", "x25", "x26"); ++ return svptrue_b8 (); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_1024.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_1024.c +new file mode 100644 +index 000000000..d5a9d4444 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_1024.c +@@ -0,0 +1,285 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -fshrink-wrap -fstack-clash-protection -msve-vector-bits=1024 -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++svbool_t take_stack_args (volatile void *, void *, int, int, int, ++ int, int, int, int); ++ ++/* ++** test_1: ++** sub sp, sp, #144 ++** str p4, \[sp\] ++** ... ++** ptrue p0\.b, vl128 ++** ldr p4, \[sp\] ++** add sp, sp, #?144 ++** ret ++*/ ++svbool_t ++test_1 (void) ++{ ++ volatile int x = 1; ++ asm volatile ("" ::: "p4"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_2: ++** sub sp, sp, #176 ++** stp x24, x25, \[sp, 128\] ++** str x26, \[sp, 144\] ++** str p4, \[sp\] ++** ... ++** ptrue p0\.b, vl128 ++** ldr p4, \[sp\] ++** ldp x24, x25, \[sp, 128\] ++** ldr x26, \[sp, 144\] ++** add sp, sp, #?176 ++** ret ++*/ ++svbool_t ++test_2 (void) ++{ ++ volatile int x = 1; ++ asm volatile ("" ::: "p4", "x24", "x25", "x26"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_3: ++** mov x12, #?4256 ++** sub sp, sp, x12 ++** stp x24, x25, \[sp, 128\] ++** str x26, \[sp, 144\] ++** str p4, \[sp\] ++** ... ++** ptrue p0\.b, vl128 ++** ldr p4, \[sp\] ++** ldp x24, x25, \[sp, 128\] ++** ldr x26, \[sp, 144\] ++** add sp, sp, x12 ++** ret ++*/ ++svbool_t ++test_3 (void) ++{ ++ volatile int x[1024]; ++ asm volatile ("" :: "r" (x) : "p4", "x24", "x25", "x26"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_4: ++** sub sp, sp, #256 ++** str p4, \[sp\] ++** ... ++** ptrue p0\.h, vl64 ++** ldr p4, \[sp\] ++** add sp, sp, #?256 ++** ret ++*/ ++svbool_t ++test_4 (void) ++{ ++ volatile svint32_t b; ++ b = svdup_s32 (1); ++ asm volatile ("" ::: "p4"); ++ return svptrue_b16 (); ++} ++ ++/* ++** test_5: ++** sub sp, sp, #288 ++** stp x24, x25, \[sp, 128\] ++** str x26, \[sp, 144\] ++** str p4, \[sp\] ++** ... ++** ptrue p0\.h, vl64 ++** ldr p4, \[sp\] ++** ldp x24, x25, \[sp, 128\] ++** ldr x26, \[sp, 144\] ++** add sp, sp, #?288 ++** ret ++*/ ++svbool_t ++test_5 (void) ++{ ++ volatile svint32_t b; ++ b = svdup_s32 (1); ++ asm volatile ("" ::: "p4", "x24", "x25", "x26"); ++ return svptrue_b16 (); ++} ++ ++/* ++** test_6: ++** stp x29, x30, \[sp, -16\]! ++** mov x29, sp ++** sub sp, sp, #128 ++** str p4, \[sp\] ++** ... ++** ptrue p0\.b, vl128 ++** add sp, sp, #?16 ++** ldr p4, \[sp\] ++** add sp, sp, #?128 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++svbool_t ++test_6 (void) ++{ ++ take_stack_args (0, 0, 1, 2, 3, 4, 5, 6, 7); ++ asm volatile ("" ::: "p4"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_7: ++** mov x12, #?4240 ++** sub sp, sp, x12 ++** stp x29, x30, \[sp, 128\] ++** add x29, sp, #?128 ++** str p4, \[sp\] ++** sub sp, sp, #16 ++** ... ++** ptrue p0\.b, vl128 ++** add sp, sp, #?16 ++** ldr p4, \[sp\] ++** add sp, sp, #?128 ++** ldp x29, x30, \[sp\] ++** mov x12, #?4112 ++** add sp, sp, x12 ++** ret ++*/ ++svbool_t ++test_7 (void) ++{ ++ volatile int x[1024]; ++ take_stack_args (x, 0, 1, 2, 3, 4, 5, 6, 7); ++ asm volatile ("" ::: "p4"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_8: ++** mov x12, #?4272 ++** sub sp, sp, x12 ++** stp x29, x30, \[sp, 128\] ++** add x29, sp, #?128 ++** stp x24, x25, \[sp, 144\] ++** str x26, \[sp, 160\] ++** str p4, \[sp\] ++** sub sp, sp, #16 ++** ... ++** ptrue p0\.b, vl128 ++** add sp, sp, #?16 ++** ldr p4, \[sp\] ++** add sp, sp, #?128 ++** ldp x24, x25, \[sp, 16\] ++** ldr x26, \[sp, 32\] ++** ldp x29, x30, \[sp\] ++** mov x12, #?4144 ++** add sp, sp, x12 ++** ret ++*/ ++svbool_t ++test_8 (void) ++{ ++ volatile int x[1024]; ++ take_stack_args (x, 0, 1, 2, 3, 4, 5, 6, 7); ++ asm volatile ("" ::: "p4", "x24", "x25", "x26"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_9: ++** mov x12, #?4240 ++** sub sp, sp, x12 ++** stp x29, x30, \[sp, 128\] ++** add x29, sp, #?128 ++** str p4, \[sp\] ++** sub sp, sp, #16 ++** ... ++** ptrue p0\.b, vl128 ++** sub sp, x29, #128 ++** ldr p4, \[sp\] ++** add sp, sp, #?128 ++** ldp x29, x30, \[sp\] ++** mov x12, #?4112 ++** add sp, sp, x12 ++** ret ++*/ ++svbool_t ++test_9 (int n) ++{ ++ volatile int x[1024]; ++ take_stack_args (x, __builtin_alloca (n), 1, 2, 3, 4, 5, 6, 7); ++ asm volatile ("" ::: "p4"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_10: ++** mov x12, #?4272 ++** sub sp, sp, x12 ++** stp x29, x30, \[sp, 128\] ++** add x29, sp, #?128 ++** stp x24, x25, \[sp, 144\] ++** str x26, \[sp, 160\] ++** str p4, \[sp\] ++** sub sp, sp, #16 ++** ... ++** ptrue p0\.b, vl128 ++** sub sp, x29, #128 ++** ldr p4, \[sp\] ++** add sp, sp, #?128 ++** ldp x24, x25, \[sp, 16\] ++** ldr x26, \[sp, 32\] ++** ldp x29, x30, \[sp\] ++** mov x12, #?4144 ++** add sp, sp, x12 ++** ret ++*/ ++svbool_t ++test_10 (int n) ++{ ++ volatile int x[1024]; ++ take_stack_args (x, __builtin_alloca (n), 1, 2, 3, 4, 5, 6, 7); ++ asm volatile ("" ::: "p4", "x24", "x25", "x26"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_11: ++** sub sp, sp, #65536 ++** str xzr, \[sp, 1024\] ++** mov x12, #?64576 ++** sub sp, sp, x12 ++** str xzr, \[sp, 1024\] ++** stp x29, x30, \[sp, 128\] ++** add x29, sp, #?128 ++** stp x24, x25, \[sp, 144\] ++** str x26, \[sp, 160\] ++** str p4, \[sp\] ++** sub sp, sp, #16 ++** ... ++** ptrue p0\.b, vl128 ++** sub sp, x29, #128 ++** ldr p4, \[sp\] ++** add sp, sp, #?128 ++** ldp x24, x25, \[sp, 16\] ++** ldr x26, \[sp, 32\] ++** ldp x29, x30, \[sp\] ++** add sp, sp, #?3008 ++** add sp, sp, #?126976 ++** ret ++*/ ++svbool_t ++test_11 (int n) ++{ ++ volatile int x[0x7ee4]; ++ take_stack_args (x, __builtin_alloca (n), 1, 2, 3, 4, 5, 6, 7); ++ asm volatile ("" ::: "p4", "x24", "x25", "x26"); ++ return svptrue_b8 (); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_2048.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_2048.c +new file mode 100644 +index 000000000..c185e2e36 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_2048.c +@@ -0,0 +1,285 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -fshrink-wrap -fstack-clash-protection -msve-vector-bits=2048 -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++svbool_t take_stack_args (volatile void *, void *, int, int, int, ++ int, int, int, int); ++ ++/* ++** test_1: ++** sub sp, sp, #272 ++** str p4, \[sp\] ++** ... ++** ptrue p0\.b, vl256 ++** ldr p4, \[sp\] ++** add sp, sp, #?272 ++** ret ++*/ ++svbool_t ++test_1 (void) ++{ ++ volatile int x = 1; ++ asm volatile ("" ::: "p4"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_2: ++** sub sp, sp, #304 ++** stp x24, x25, \[sp, 256\] ++** str x26, \[sp, 272\] ++** str p4, \[sp\] ++** ... ++** ptrue p0\.b, vl256 ++** ldr p4, \[sp\] ++** ldp x24, x25, \[sp, 256\] ++** ldr x26, \[sp, 272\] ++** add sp, sp, #?304 ++** ret ++*/ ++svbool_t ++test_2 (void) ++{ ++ volatile int x = 1; ++ asm volatile ("" ::: "p4", "x24", "x25", "x26"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_3: ++** mov x12, #?4384 ++** sub sp, sp, x12 ++** stp x24, x25, \[sp, 256\] ++** str x26, \[sp, 272\] ++** str p4, \[sp\] ++** ... ++** ptrue p0\.b, vl256 ++** ldr p4, \[sp\] ++** ldp x24, x25, \[sp, 256\] ++** ldr x26, \[sp, 272\] ++** add sp, sp, x12 ++** ret ++*/ ++svbool_t ++test_3 (void) ++{ ++ volatile int x[1024]; ++ asm volatile ("" :: "r" (x) : "p4", "x24", "x25", "x26"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_4: ++** sub sp, sp, #512 ++** str p4, \[sp\] ++** ... ++** ptrue p0\.h, vl128 ++** ldr p4, \[sp\] ++** add sp, sp, #?512 ++** ret ++*/ ++svbool_t ++test_4 (void) ++{ ++ volatile svint32_t b; ++ b = svdup_s32 (1); ++ asm volatile ("" ::: "p4"); ++ return svptrue_b16 (); ++} ++ ++/* ++** test_5: ++** sub sp, sp, #544 ++** stp x24, x25, \[sp, 256\] ++** str x26, \[sp, 272\] ++** str p4, \[sp\] ++** ... ++** ptrue p0\.h, vl128 ++** ldr p4, \[sp\] ++** ldp x24, x25, \[sp, 256\] ++** ldr x26, \[sp, 272\] ++** add sp, sp, #?544 ++** ret ++*/ ++svbool_t ++test_5 (void) ++{ ++ volatile svint32_t b; ++ b = svdup_s32 (1); ++ asm volatile ("" ::: "p4", "x24", "x25", "x26"); ++ return svptrue_b16 (); ++} ++ ++/* ++** test_6: ++** stp x29, x30, \[sp, -16\]! ++** mov x29, sp ++** sub sp, sp, #256 ++** str p4, \[sp\] ++** ... ++** ptrue p0\.b, vl256 ++** add sp, sp, #?16 ++** ldr p4, \[sp\] ++** add sp, sp, #?256 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++svbool_t ++test_6 (void) ++{ ++ take_stack_args (0, 0, 1, 2, 3, 4, 5, 6, 7); ++ asm volatile ("" ::: "p4"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_7: ++** mov x12, #?4368 ++** sub sp, sp, x12 ++** stp x29, x30, \[sp, 256\] ++** add x29, sp, #?256 ++** str p4, \[sp\] ++** sub sp, sp, #16 ++** ... ++** ptrue p0\.b, vl256 ++** add sp, sp, #?16 ++** ldr p4, \[sp\] ++** add sp, sp, #?256 ++** ldp x29, x30, \[sp\] ++** mov x12, #?4112 ++** add sp, sp, x12 ++** ret ++*/ ++svbool_t ++test_7 (void) ++{ ++ volatile int x[1024]; ++ take_stack_args (x, 0, 1, 2, 3, 4, 5, 6, 7); ++ asm volatile ("" ::: "p4"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_8: ++** mov x12, #?4400 ++** sub sp, sp, x12 ++** stp x29, x30, \[sp, 256\] ++** add x29, sp, #?256 ++** stp x24, x25, \[sp, 272\] ++** str x26, \[sp, 288\] ++** str p4, \[sp\] ++** sub sp, sp, #16 ++** ... ++** ptrue p0\.b, vl256 ++** add sp, sp, #?16 ++** ldr p4, \[sp\] ++** add sp, sp, #?256 ++** ldp x24, x25, \[sp, 16\] ++** ldr x26, \[sp, 32\] ++** ldp x29, x30, \[sp\] ++** mov x12, #?4144 ++** add sp, sp, x12 ++** ret ++*/ ++svbool_t ++test_8 (void) ++{ ++ volatile int x[1024]; ++ take_stack_args (x, 0, 1, 2, 3, 4, 5, 6, 7); ++ asm volatile ("" ::: "p4", "x24", "x25", "x26"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_9: ++** mov x12, #?4368 ++** sub sp, sp, x12 ++** stp x29, x30, \[sp, 256\] ++** add x29, sp, #?256 ++** str p4, \[sp\] ++** sub sp, sp, #16 ++** ... ++** ptrue p0\.b, vl256 ++** sub sp, x29, #256 ++** ldr p4, \[sp\] ++** add sp, sp, #?256 ++** ldp x29, x30, \[sp\] ++** mov x12, #?4112 ++** add sp, sp, x12 ++** ret ++*/ ++svbool_t ++test_9 (int n) ++{ ++ volatile int x[1024]; ++ take_stack_args (x, __builtin_alloca (n), 1, 2, 3, 4, 5, 6, 7); ++ asm volatile ("" ::: "p4"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_10: ++** mov x12, #?4400 ++** sub sp, sp, x12 ++** stp x29, x30, \[sp, 256\] ++** add x29, sp, #?256 ++** stp x24, x25, \[sp, 272\] ++** str x26, \[sp, 288\] ++** str p4, \[sp\] ++** sub sp, sp, #16 ++** ... ++** ptrue p0\.b, vl256 ++** sub sp, x29, #256 ++** ldr p4, \[sp\] ++** add sp, sp, #?256 ++** ldp x24, x25, \[sp, 16\] ++** ldr x26, \[sp, 32\] ++** ldp x29, x30, \[sp\] ++** mov x12, #?4144 ++** add sp, sp, x12 ++** ret ++*/ ++svbool_t ++test_10 (int n) ++{ ++ volatile int x[1024]; ++ take_stack_args (x, __builtin_alloca (n), 1, 2, 3, 4, 5, 6, 7); ++ asm volatile ("" ::: "p4", "x24", "x25", "x26"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_11: ++** sub sp, sp, #65536 ++** str xzr, \[sp, 1024\] ++** mov x12, #?64704 ++** sub sp, sp, x12 ++** str xzr, \[sp, 1024\] ++** stp x29, x30, \[sp, 256\] ++** add x29, sp, #?256 ++** stp x24, x25, \[sp, 272\] ++** str x26, \[sp, 288\] ++** str p4, \[sp\] ++** sub sp, sp, #16 ++** ... ++** ptrue p0\.b, vl256 ++** sub sp, x29, #256 ++** ldr p4, \[sp\] ++** add sp, sp, #?256 ++** ldp x24, x25, \[sp, 16\] ++** ldr x26, \[sp, 32\] ++** ldp x29, x30, \[sp\] ++** add sp, sp, #?3008 ++** add sp, sp, #?126976 ++** ret ++*/ ++svbool_t ++test_11 (int n) ++{ ++ volatile int x[0x7ee4]; ++ take_stack_args (x, __builtin_alloca (n), 1, 2, 3, 4, 5, 6, 7); ++ asm volatile ("" ::: "p4", "x24", "x25", "x26"); ++ return svptrue_b8 (); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_256.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_256.c +new file mode 100644 +index 000000000..f8318b354 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_256.c +@@ -0,0 +1,284 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -fshrink-wrap -fstack-clash-protection -msve-vector-bits=256 -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++svbool_t take_stack_args (volatile void *, void *, int, int, int, ++ int, int, int, int); ++ ++/* ++** test_1: ++** sub sp, sp, #48 ++** str p4, \[sp\] ++** ... ++** ptrue p0\.b, vl32 ++** ldr p4, \[sp\] ++** add sp, sp, #?48 ++** ret ++*/ ++svbool_t ++test_1 (void) ++{ ++ volatile int x = 1; ++ asm volatile ("" ::: "p4"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_2: ++** sub sp, sp, #80 ++** stp x24, x25, \[sp, 32\] ++** str x26, \[sp, 48\] ++** str p4, \[sp\] ++** ... ++** ptrue p0\.b, vl32 ++** ldr p4, \[sp\] ++** ldp x24, x25, \[sp, 32\] ++** ldr x26, \[sp, 48\] ++** add sp, sp, #?80 ++** ret ++*/ ++svbool_t ++test_2 (void) ++{ ++ volatile int x = 1; ++ asm volatile ("" ::: "p4", "x24", "x25", "x26"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_3: ++** mov x12, #?4160 ++** sub sp, sp, x12 ++** stp x24, x25, \[sp, 32\] ++** str x26, \[sp, 48\] ++** str p4, \[sp\] ++** ... ++** ptrue p0\.b, vl32 ++** ldr p4, \[sp\] ++** ldp x24, x25, \[sp, 32\] ++** ldr x26, \[sp, 48\] ++** add sp, sp, x12 ++** ret ++*/ ++svbool_t ++test_3 (void) ++{ ++ volatile int x[1024]; ++ asm volatile ("" :: "r" (x) : "p4", "x24", "x25", "x26"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_4: ++** sub sp, sp, #64 ++** str p4, \[sp\] ++** ... ++** ptrue p0\.h, vl16 ++** ldr p4, \[sp\] ++** add sp, sp, #?64 ++** ret ++*/ ++svbool_t ++test_4 (void) ++{ ++ volatile svint32_t b; ++ b = svdup_s32 (1); ++ asm volatile ("" ::: "p4"); ++ return svptrue_b16 (); ++} ++ ++/* ++** test_5: ++** sub sp, sp, #96 ++** stp x24, x25, \[sp, 32\] ++** str x26, \[sp, 48\] ++** str p4, \[sp\] ++** ... ++** ptrue p0\.h, vl16 ++** ldr p4, \[sp\] ++** ldp x24, x25, \[sp, 32\] ++** ldr x26, \[sp, 48\] ++** add sp, sp, #?96 ++** ret ++*/ ++svbool_t ++test_5 (void) ++{ ++ volatile svint32_t b; ++ b = svdup_s32 (1); ++ asm volatile ("" ::: "p4", "x24", "x25", "x26"); ++ return svptrue_b16 (); ++} ++ ++/* ++** test_6: ++** stp x29, x30, \[sp, -16\]! ++** mov x29, sp ++** sub sp, sp, #32 ++** str p4, \[sp\] ++** ... ++** ptrue p0\.b, vl32 ++** add sp, sp, #?16 ++** ldr p4, \[sp\] ++** add sp, sp, #?32 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++svbool_t ++test_6 (void) ++{ ++ take_stack_args (0, 0, 1, 2, 3, 4, 5, 6, 7); ++ asm volatile ("" ::: "p4"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_7: ++** mov x12, #?4144 ++** sub sp, sp, x12 ++** stp x29, x30, \[sp, 32\] ++** add x29, sp, #?32 ++** str p4, \[sp\] ++** sub sp, sp, #16 ++** ... ++** ptrue p0\.b, vl32 ++** add sp, sp, #?16 ++** ldr p4, \[sp\] ++** add sp, sp, #?32 ++** ldp x29, x30, \[sp\] ++** mov x12, #?4112 ++** add sp, sp, x12 ++** ret ++*/ ++svbool_t ++test_7 (void) ++{ ++ volatile int x[1024]; ++ take_stack_args (x, 0, 1, 2, 3, 4, 5, 6, 7); ++ asm volatile ("" ::: "p4"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_8: ++** mov x12, #?4176 ++** sub sp, sp, x12 ++** stp x29, x30, \[sp, 32\] ++** add x29, sp, #?32 ++** stp x24, x25, \[sp, 48\] ++** str x26, \[sp, 64\] ++** str p4, \[sp\] ++** sub sp, sp, #16 ++** ... ++** ptrue p0\.b, vl32 ++** add sp, sp, #?16 ++** ldr p4, \[sp\] ++** add sp, sp, #?32 ++** ldp x24, x25, \[sp, 16\] ++** ldr x26, \[sp, 32\] ++** ldp x29, x30, \[sp\] ++** mov x12, #?4144 ++** add sp, sp, x12 ++** ret ++*/ ++svbool_t ++test_8 (void) ++{ ++ volatile int x[1024]; ++ take_stack_args (x, 0, 1, 2, 3, 4, 5, 6, 7); ++ asm volatile ("" ::: "p4", "x24", "x25", "x26"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_9: ++** mov x12, #?4144 ++** sub sp, sp, x12 ++** stp x29, x30, \[sp, 32\] ++** add x29, sp, #?32 ++** str p4, \[sp\] ++** sub sp, sp, #16 ++** ... ++** ptrue p0\.b, vl32 ++** sub sp, x29, #32 ++** ldr p4, \[sp\] ++** add sp, sp, #?32 ++** ldp x29, x30, \[sp\] ++** mov x12, #?4112 ++** add sp, sp, x12 ++** ret ++*/ ++svbool_t ++test_9 (int n) ++{ ++ volatile int x[1024]; ++ take_stack_args (x, __builtin_alloca (n), 1, 2, 3, 4, 5, 6, 7); ++ asm volatile ("" ::: "p4"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_10: ++** mov x12, #?4176 ++** sub sp, sp, x12 ++** stp x29, x30, \[sp, 32\] ++** add x29, sp, #?32 ++** stp x24, x25, \[sp, 48\] ++** str x26, \[sp, 64\] ++** str p4, \[sp\] ++** sub sp, sp, #16 ++** ... ++** ptrue p0\.b, vl32 ++** sub sp, x29, #32 ++** ldr p4, \[sp\] ++** add sp, sp, #?32 ++** ldp x24, x25, \[sp, 16\] ++** ldr x26, \[sp, 32\] ++** ldp x29, x30, \[sp\] ++** mov x12, #?4144 ++** add sp, sp, x12 ++** ret ++*/ ++svbool_t ++test_10 (int n) ++{ ++ volatile int x[1024]; ++ take_stack_args (x, __builtin_alloca (n), 1, 2, 3, 4, 5, 6, 7); ++ asm volatile ("" ::: "p4", "x24", "x25", "x26"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_11: ++** sub sp, sp, #65536 ++** str xzr, \[sp, 1024\] ++** mov x12, #?64480 ++** sub sp, sp, x12 ++** stp x29, x30, \[sp, 32\] ++** add x29, sp, #?32 ++** stp x24, x25, \[sp, 48\] ++** str x26, \[sp, 64\] ++** str p4, \[sp\] ++** sub sp, sp, #16 ++** ... ++** ptrue p0\.b, vl32 ++** sub sp, x29, #32 ++** ldr p4, \[sp\] ++** add sp, sp, #?32 ++** ldp x24, x25, \[sp, 16\] ++** ldr x26, \[sp, 32\] ++** ldp x29, x30, \[sp\] ++** add sp, sp, #?3008 ++** add sp, sp, #?126976 ++** ret ++*/ ++svbool_t ++test_11 (int n) ++{ ++ volatile int x[0x7ee4]; ++ take_stack_args (x, __builtin_alloca (n), 1, 2, 3, 4, 5, 6, 7); ++ asm volatile ("" ::: "p4", "x24", "x25", "x26"); ++ return svptrue_b8 (); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_512.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_512.c +new file mode 100644 +index 000000000..45a23ad49 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_512.c +@@ -0,0 +1,285 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -fshrink-wrap -fstack-clash-protection -msve-vector-bits=512 -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++svbool_t take_stack_args (volatile void *, void *, int, int, int, ++ int, int, int, int); ++ ++/* ++** test_1: ++** sub sp, sp, #80 ++** str p4, \[sp\] ++** ... ++** ptrue p0\.b, vl64 ++** ldr p4, \[sp\] ++** add sp, sp, #?80 ++** ret ++*/ ++svbool_t ++test_1 (void) ++{ ++ volatile int x = 1; ++ asm volatile ("" ::: "p4"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_2: ++** sub sp, sp, #112 ++** stp x24, x25, \[sp, 64\] ++** str x26, \[sp, 80\] ++** str p4, \[sp\] ++** ... ++** ptrue p0\.b, vl64 ++** ldr p4, \[sp\] ++** ldp x24, x25, \[sp, 64\] ++** ldr x26, \[sp, 80\] ++** add sp, sp, #?112 ++** ret ++*/ ++svbool_t ++test_2 (void) ++{ ++ volatile int x = 1; ++ asm volatile ("" ::: "p4", "x24", "x25", "x26"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_3: ++** mov x12, #?4192 ++** sub sp, sp, x12 ++** stp x24, x25, \[sp, 64\] ++** str x26, \[sp, 80\] ++** str p4, \[sp\] ++** ... ++** ptrue p0\.b, vl64 ++** ldr p4, \[sp\] ++** ldp x24, x25, \[sp, 64\] ++** ldr x26, \[sp, 80\] ++** add sp, sp, x12 ++** ret ++*/ ++svbool_t ++test_3 (void) ++{ ++ volatile int x[1024]; ++ asm volatile ("" :: "r" (x) : "p4", "x24", "x25", "x26"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_4: ++** sub sp, sp, #128 ++** str p4, \[sp\] ++** ... ++** ptrue p0\.h, vl32 ++** ldr p4, \[sp\] ++** add sp, sp, #?128 ++** ret ++*/ ++svbool_t ++test_4 (void) ++{ ++ volatile svint32_t b; ++ b = svdup_s32 (1); ++ asm volatile ("" ::: "p4"); ++ return svptrue_b16 (); ++} ++ ++/* ++** test_5: ++** sub sp, sp, #160 ++** stp x24, x25, \[sp, 64\] ++** str x26, \[sp, 80\] ++** str p4, \[sp\] ++** ... ++** ptrue p0\.h, vl32 ++** ldr p4, \[sp\] ++** ldp x24, x25, \[sp, 64\] ++** ldr x26, \[sp, 80\] ++** add sp, sp, #?160 ++** ret ++*/ ++svbool_t ++test_5 (void) ++{ ++ volatile svint32_t b; ++ b = svdup_s32 (1); ++ asm volatile ("" ::: "p4", "x24", "x25", "x26"); ++ return svptrue_b16 (); ++} ++ ++/* ++** test_6: ++** stp x29, x30, \[sp, -16\]! ++** mov x29, sp ++** sub sp, sp, #64 ++** str p4, \[sp\] ++** ... ++** ptrue p0\.b, vl64 ++** add sp, sp, #?16 ++** ldr p4, \[sp\] ++** add sp, sp, #?64 ++** ldp x29, x30, \[sp\], 16 ++** ret ++*/ ++svbool_t ++test_6 (void) ++{ ++ take_stack_args (0, 0, 1, 2, 3, 4, 5, 6, 7); ++ asm volatile ("" ::: "p4"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_7: ++** mov x12, #?4176 ++** sub sp, sp, x12 ++** stp x29, x30, \[sp, 64\] ++** add x29, sp, #?64 ++** str p4, \[sp\] ++** sub sp, sp, #16 ++** ... ++** ptrue p0\.b, vl64 ++** add sp, sp, #?16 ++** ldr p4, \[sp\] ++** add sp, sp, #?64 ++** ldp x29, x30, \[sp\] ++** mov x12, #?4112 ++** add sp, sp, x12 ++** ret ++*/ ++svbool_t ++test_7 (void) ++{ ++ volatile int x[1024]; ++ take_stack_args (x, 0, 1, 2, 3, 4, 5, 6, 7); ++ asm volatile ("" ::: "p4"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_8: ++** mov x12, #?4208 ++** sub sp, sp, x12 ++** stp x29, x30, \[sp, 64\] ++** add x29, sp, #?64 ++** stp x24, x25, \[sp, 80\] ++** str x26, \[sp, 96\] ++** str p4, \[sp\] ++** sub sp, sp, #16 ++** ... ++** ptrue p0\.b, vl64 ++** add sp, sp, #?16 ++** ldr p4, \[sp\] ++** add sp, sp, #?64 ++** ldp x24, x25, \[sp, 16\] ++** ldr x26, \[sp, 32\] ++** ldp x29, x30, \[sp\] ++** mov x12, #?4144 ++** add sp, sp, x12 ++** ret ++*/ ++svbool_t ++test_8 (void) ++{ ++ volatile int x[1024]; ++ take_stack_args (x, 0, 1, 2, 3, 4, 5, 6, 7); ++ asm volatile ("" ::: "p4", "x24", "x25", "x26"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_9: ++** mov x12, #?4176 ++** sub sp, sp, x12 ++** stp x29, x30, \[sp, 64\] ++** add x29, sp, #?64 ++** str p4, \[sp\] ++** sub sp, sp, #16 ++** ... ++** ptrue p0\.b, vl64 ++** sub sp, x29, #64 ++** ldr p4, \[sp\] ++** add sp, sp, #?64 ++** ldp x29, x30, \[sp\] ++** mov x12, #?4112 ++** add sp, sp, x12 ++** ret ++*/ ++svbool_t ++test_9 (int n) ++{ ++ volatile int x[1024]; ++ take_stack_args (x, __builtin_alloca (n), 1, 2, 3, 4, 5, 6, 7); ++ asm volatile ("" ::: "p4"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_10: ++** mov x12, #?4208 ++** sub sp, sp, x12 ++** stp x29, x30, \[sp, 64\] ++** add x29, sp, #?64 ++** stp x24, x25, \[sp, 80\] ++** str x26, \[sp, 96\] ++** str p4, \[sp\] ++** sub sp, sp, #16 ++** ... ++** ptrue p0\.b, vl64 ++** sub sp, x29, #64 ++** ldr p4, \[sp\] ++** add sp, sp, #?64 ++** ldp x24, x25, \[sp, 16\] ++** ldr x26, \[sp, 32\] ++** ldp x29, x30, \[sp\] ++** mov x12, #?4144 ++** add sp, sp, x12 ++** ret ++*/ ++svbool_t ++test_10 (int n) ++{ ++ volatile int x[1024]; ++ take_stack_args (x, __builtin_alloca (n), 1, 2, 3, 4, 5, 6, 7); ++ asm volatile ("" ::: "p4", "x24", "x25", "x26"); ++ return svptrue_b8 (); ++} ++ ++/* ++** test_11: ++** sub sp, sp, #65536 ++** str xzr, \[sp, 1024\] ++** mov x12, #?64512 ++** sub sp, sp, x12 ++** str xzr, \[sp, 1024\] ++** stp x29, x30, \[sp, 64\] ++** add x29, sp, #?64 ++** stp x24, x25, \[sp, 80\] ++** str x26, \[sp, 96\] ++** str p4, \[sp\] ++** sub sp, sp, #16 ++** ... ++** ptrue p0\.b, vl64 ++** sub sp, x29, #64 ++** ldr p4, \[sp\] ++** add sp, sp, #?64 ++** ldp x24, x25, \[sp, 16\] ++** ldr x26, \[sp, 32\] ++** ldp x29, x30, \[sp\] ++** add sp, sp, #?3008 ++** add sp, sp, #?126976 ++** ret ++*/ ++svbool_t ++test_11 (int n) ++{ ++ volatile int x[0x7ee4]; ++ take_stack_args (x, __builtin_alloca (n), 1, 2, 3, 4, 5, 6, 7); ++ asm volatile ("" ::: "p4", "x24", "x25", "x26"); ++ return svptrue_b8 (); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_3.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_3.c +new file mode 100644 +index 000000000..3e01ec36c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_3.c +@@ -0,0 +1,63 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -fshrink-wrap -fstack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++#pragma GCC aarch64 "arm_sve.h" ++ ++/* ++** test_1: ++** str x24, \[sp, -32\]! ++** cntb x13 ++** mov x11, sp ++** ... ++** sub sp, sp, x13 ++** str p4, \[sp\] ++** cbz w0, [^\n]* ++** ... ++** ptrue p0\.b, all ++** ldr p4, \[sp\] ++** addvl sp, sp, #1 ++** ldr x24, \[sp\], 32 ++** ret ++*/ ++svbool_t ++test_1 (int n) ++{ ++ asm volatile ("" ::: "x24"); ++ if (n) ++ { ++ volatile int x = 1; ++ asm volatile ("" ::: "p4"); ++ } ++ return svptrue_b8 (); ++} ++ ++/* ++** test_2: ++** str x24, \[sp, -32\]! ++** cntb x13 ++** mov x11, sp ++** ... ++** sub sp, sp, x13 ++** str p4, \[sp\] ++** cbz w0, [^\n]* ++** str p5, \[sp, #1, mul vl\] ++** str p6, \[sp, #2, mul vl\] ++** ... ++** ptrue p0\.b, all ++** ldr p4, \[sp\] ++** addvl sp, sp, #1 ++** ldr x24, \[sp\], 32 ++** ret ++*/ ++svbool_t ++test_2 (int n) ++{ ++ asm volatile ("" ::: "x24"); ++ if (n) ++ { ++ volatile int x = 1; ++ asm volatile ("" ::: "p4", "p5", "p6"); ++ } ++ return svptrue_b8 (); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/unprototyped_1.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/unprototyped_1.c +new file mode 100644 +index 000000000..5c7ed5167 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/unprototyped_1.c +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++ ++#include ++ ++void unprototyped (); ++ ++void ++f (svuint8_t *ptr) ++{ ++ unprototyped (*ptr); /* { dg-error {SVE type '(svuint8_t|__SVUint8_t)' cannot be passed to an unprototyped function} } */ ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_1.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_1.c +new file mode 100644 +index 000000000..6987245a6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_1.c +@@ -0,0 +1,170 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ ++ ++#include ++#include ++ ++/* ++** callee_0: ++** ... ++** ldr (p[0-7]), \[x1\] ++** ... ++** cntp x0, \1, \1\.b ++** ... ++** ret ++*/ ++uint64_t __attribute__((noipa)) ++callee_0 (int64_t *ptr, ...) ++{ ++ va_list va; ++ svbool_t pg; ++ ++ va_start (va, ptr); ++ pg = va_arg (va, svbool_t); ++ va_end (va); ++ return svcntp_b8 (pg, pg); ++} ++ ++/* ++** caller_0: ++** ... ++** ptrue (p[0-7])\.d, vl7 ++** ... ++** str \1, \[x1\] ++** ... ++** ret ++*/ ++uint64_t __attribute__((noipa)) ++caller_0 (int64_t *ptr) ++{ ++ return callee_0 (ptr, svptrue_pat_b64 (SV_VL7)); ++} ++ ++/* ++** callee_1: ++** ... ++** ldr (p[0-7]), \[x2\] ++** ... ++** cntp x0, \1, \1\.b ++** ... ++** ret ++*/ ++uint64_t __attribute__((noipa)) ++callee_1 (int64_t *ptr, ...) ++{ ++ va_list va; ++ svbool_t pg; ++ ++ va_start (va, ptr); ++ va_arg (va, int); ++ pg = va_arg (va, svbool_t); ++ va_end (va); ++ return svcntp_b8 (pg, pg); ++} ++ ++/* ++** caller_1: ++** ... ++** ptrue (p[0-7])\.d, vl7 ++** ... ++** str \1, \[x2\] ++** ... ++** ret ++*/ ++uint64_t __attribute__((noipa)) ++caller_1 (int64_t *ptr) ++{ ++ return callee_1 (ptr, 1, svptrue_pat_b64 (SV_VL7)); ++} ++ ++/* ++** callee_7: ++** ... ++** ldr (p[0-7]), \[x7\] ++** ... ++** cntp x0, \1, \1\.b ++** ... ++** ret ++*/ ++uint64_t __attribute__((noipa)) ++callee_7 (int64_t *ptr, ...) ++{ ++ va_list va; ++ svbool_t pg; ++ ++ va_start (va, ptr); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ pg = va_arg (va, svbool_t); ++ va_end (va); ++ return svcntp_b8 (pg, pg); ++} ++ ++/* ++** caller_7: ++** ... ++** ptrue (p[0-7])\.d, vl7 ++** ... ++** str \1, \[x7\] ++** ... ++** ret ++*/ ++uint64_t __attribute__((noipa)) ++caller_7 (int64_t *ptr) ++{ ++ return callee_7 (ptr, 1, 2, 3, 4, 5, 6, svptrue_pat_b64 (SV_VL7)); ++} ++ ++/* FIXME: We should be able to get rid of the va_list object. */ ++/* ++** callee_8: ++** sub sp, sp, #([0-9]+) ++** ... ++** ldr (x[0-9]+), \[sp, \1\] ++** ... ++** ldr (p[0-7]), \[\2\] ++** ... ++** cntp x0, \3, \3\.b ++** ... ++** ret ++*/ ++uint64_t __attribute__((noipa)) ++callee_8 (int64_t *ptr, ...) ++{ ++ va_list va; ++ svbool_t pg; ++ ++ va_start (va, ptr); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ pg = va_arg (va, svbool_t); ++ va_end (va); ++ return svcntp_b8 (pg, pg); ++} ++ ++/* ++** caller_8: ++** ... ++** ptrue (p[0-7])\.d, vl7 ++** ... ++** str \1, \[(x[0-9]+)\] ++** ... ++** str \2, \[sp\] ++** ... ++** ret ++*/ ++uint64_t __attribute__((noipa)) ++caller_8 (int64_t *ptr) ++{ ++ return callee_8 (ptr, 1, 2, 3, 4, 5, 6, 7, svptrue_pat_b64 (SV_VL7)); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f16.c +new file mode 100644 +index 000000000..79098851c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f16.c +@@ -0,0 +1,170 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ ++ ++#include ++#include ++ ++/* ++** callee_0: ++** ... ++** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x1\] ++** ... ++** st1h \1, \2, \[x0\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++callee_0 (int16_t *ptr, ...) ++{ ++ va_list va; ++ svint16_t vec; ++ ++ va_start (va, ptr); ++ vec = va_arg (va, svint16_t); ++ va_end (va); ++ svst1 (svptrue_b8 (), ptr, vec); ++} ++ ++/* ++** caller_0: ++** ... ++** fmov (z[0-9]+\.h), #9\.0[^\n]* ++** ... ++** st1h \1, p[0-7], \[x1\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++caller_0 (int16_t *ptr) ++{ ++ callee_0 (ptr, svdup_f16 (9)); ++} ++ ++/* ++** callee_1: ++** ... ++** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x2\] ++** ... ++** st1h \1, p[0-7], \[x0\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++callee_1 (int16_t *ptr, ...) ++{ ++ va_list va; ++ svint16_t vec; ++ ++ va_start (va, ptr); ++ va_arg (va, int); ++ vec = va_arg (va, svint16_t); ++ va_end (va); ++ svst1 (svptrue_b8 (), ptr, vec); ++} ++ ++/* ++** caller_1: ++** ... ++** fmov (z[0-9]+\.h), #9\.0[^\n]* ++** ... ++** st1h \1, p[0-7], \[x2\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++caller_1 (int16_t *ptr) ++{ ++ callee_1 (ptr, 1, svdup_f16 (9)); ++} ++ ++/* ++** callee_7: ++** ... ++** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x7\] ++** ... ++** st1h \1, p[0-7], \[x0\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++callee_7 (int16_t *ptr, ...) ++{ ++ va_list va; ++ svint16_t vec; ++ ++ va_start (va, ptr); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ vec = va_arg (va, svint16_t); ++ va_end (va); ++ svst1 (svptrue_b8 (), ptr, vec); ++} ++ ++/* ++** caller_7: ++** ... ++** fmov (z[0-9]+\.h), #9\.0[^\n]* ++** ... ++** st1h \1, p[0-7], \[x7\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++caller_7 (int16_t *ptr) ++{ ++ callee_7 (ptr, 1, 2, 3, 4, 5, 6, svdup_f16 (9)); ++} ++ ++/* FIXME: We should be able to get rid of the va_list object. */ ++/* ++** callee_8: ++** sub sp, sp, #([0-9]+) ++** ... ++** ldr (x[0-9]+), \[sp, \1\] ++** ... ++** ld1h (z[0-9]+\.h), (p[0-7])/z, \[\2\] ++** ... ++** st1h \3, \4, \[x0\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++callee_8 (int16_t *ptr, ...) ++{ ++ va_list va; ++ svint16_t vec; ++ ++ va_start (va, ptr); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ vec = va_arg (va, svint16_t); ++ va_end (va); ++ svst1 (svptrue_b8 (), ptr, vec); ++} ++ ++/* ++** caller_8: ++** ... ++** fmov (z[0-9]+\.h), #9\.0[^\n]* ++** ... ++** st1h \1, p[0-7], \[(x[0-9]+)\] ++** ... ++** str \2, \[sp\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++caller_8 (int16_t *ptr) ++{ ++ callee_8 (ptr, 1, 2, 3, 4, 5, 6, 7, svdup_f16 (9)); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f32.c +new file mode 100644 +index 000000000..325b0b2aa +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f32.c +@@ -0,0 +1,170 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ ++ ++#include ++#include ++ ++/* ++** callee_0: ++** ... ++** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x1\] ++** ... ++** st1w \1, \2, \[x0\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++callee_0 (int32_t *ptr, ...) ++{ ++ va_list va; ++ svint32_t vec; ++ ++ va_start (va, ptr); ++ vec = va_arg (va, svint32_t); ++ va_end (va); ++ svst1 (svptrue_b8 (), ptr, vec); ++} ++ ++/* ++** caller_0: ++** ... ++** fmov (z[0-9]+\.s), #9\.0[^\n]* ++** ... ++** st1w \1, p[0-7], \[x1\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++caller_0 (int32_t *ptr) ++{ ++ callee_0 (ptr, svdup_f32 (9)); ++} ++ ++/* ++** callee_1: ++** ... ++** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x2\] ++** ... ++** st1w \1, p[0-7], \[x0\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++callee_1 (int32_t *ptr, ...) ++{ ++ va_list va; ++ svint32_t vec; ++ ++ va_start (va, ptr); ++ va_arg (va, int); ++ vec = va_arg (va, svint32_t); ++ va_end (va); ++ svst1 (svptrue_b8 (), ptr, vec); ++} ++ ++/* ++** caller_1: ++** ... ++** fmov (z[0-9]+\.s), #9\.0[^\n]* ++** ... ++** st1w \1, p[0-7], \[x2\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++caller_1 (int32_t *ptr) ++{ ++ callee_1 (ptr, 1, svdup_f32 (9)); ++} ++ ++/* ++** callee_7: ++** ... ++** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x7\] ++** ... ++** st1w \1, p[0-7], \[x0\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++callee_7 (int32_t *ptr, ...) ++{ ++ va_list va; ++ svint32_t vec; ++ ++ va_start (va, ptr); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ vec = va_arg (va, svint32_t); ++ va_end (va); ++ svst1 (svptrue_b8 (), ptr, vec); ++} ++ ++/* ++** caller_7: ++** ... ++** fmov (z[0-9]+\.s), #9\.0[^\n]* ++** ... ++** st1w \1, p[0-7], \[x7\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++caller_7 (int32_t *ptr) ++{ ++ callee_7 (ptr, 1, 2, 3, 4, 5, 6, svdup_f32 (9)); ++} ++ ++/* FIXME: We should be able to get rid of the va_list object. */ ++/* ++** callee_8: ++** sub sp, sp, #([0-9]+) ++** ... ++** ldr (x[0-9]+), \[sp, \1\] ++** ... ++** ld1w (z[0-9]+\.s), (p[0-7])/z, \[\2\] ++** ... ++** st1w \3, \4, \[x0\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++callee_8 (int32_t *ptr, ...) ++{ ++ va_list va; ++ svint32_t vec; ++ ++ va_start (va, ptr); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ vec = va_arg (va, svint32_t); ++ va_end (va); ++ svst1 (svptrue_b8 (), ptr, vec); ++} ++ ++/* ++** caller_8: ++** ... ++** fmov (z[0-9]+\.s), #9\.0[^\n]* ++** ... ++** st1w \1, p[0-7], \[(x[0-9]+)\] ++** ... ++** str \2, \[sp\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++caller_8 (int32_t *ptr) ++{ ++ callee_8 (ptr, 1, 2, 3, 4, 5, 6, 7, svdup_f32 (9)); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f64.c +new file mode 100644 +index 000000000..07a6c707e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f64.c +@@ -0,0 +1,170 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ ++ ++#include ++#include ++ ++/* ++** callee_0: ++** ... ++** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x1\] ++** ... ++** st1d \1, \2, \[x0\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++callee_0 (int64_t *ptr, ...) ++{ ++ va_list va; ++ svint64_t vec; ++ ++ va_start (va, ptr); ++ vec = va_arg (va, svint64_t); ++ va_end (va); ++ svst1 (svptrue_b8 (), ptr, vec); ++} ++ ++/* ++** caller_0: ++** ... ++** fmov (z[0-9]+\.d), #9\.0[^\n]* ++** ... ++** st1d \1, p[0-7], \[x1\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++caller_0 (int64_t *ptr) ++{ ++ callee_0 (ptr, svdup_f64 (9)); ++} ++ ++/* ++** callee_1: ++** ... ++** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x2\] ++** ... ++** st1d \1, p[0-7], \[x0\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++callee_1 (int64_t *ptr, ...) ++{ ++ va_list va; ++ svint64_t vec; ++ ++ va_start (va, ptr); ++ va_arg (va, int); ++ vec = va_arg (va, svint64_t); ++ va_end (va); ++ svst1 (svptrue_b8 (), ptr, vec); ++} ++ ++/* ++** caller_1: ++** ... ++** fmov (z[0-9]+\.d), #9\.0[^\n]* ++** ... ++** st1d \1, p[0-7], \[x2\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++caller_1 (int64_t *ptr) ++{ ++ callee_1 (ptr, 1, svdup_f64 (9)); ++} ++ ++/* ++** callee_7: ++** ... ++** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x7\] ++** ... ++** st1d \1, p[0-7], \[x0\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++callee_7 (int64_t *ptr, ...) ++{ ++ va_list va; ++ svint64_t vec; ++ ++ va_start (va, ptr); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ vec = va_arg (va, svint64_t); ++ va_end (va); ++ svst1 (svptrue_b8 (), ptr, vec); ++} ++ ++/* ++** caller_7: ++** ... ++** fmov (z[0-9]+\.d), #9\.0[^\n]* ++** ... ++** st1d \1, p[0-7], \[x7\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++caller_7 (int64_t *ptr) ++{ ++ callee_7 (ptr, 1, 2, 3, 4, 5, 6, svdup_f64 (9)); ++} ++ ++/* FIXME: We should be able to get rid of the va_list object. */ ++/* ++** callee_8: ++** sub sp, sp, #([0-9]+) ++** ... ++** ldr (x[0-9]+), \[sp, \1\] ++** ... ++** ld1d (z[0-9]+\.d), (p[0-7])/z, \[\2\] ++** ... ++** st1d \3, \4, \[x0\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++callee_8 (int64_t *ptr, ...) ++{ ++ va_list va; ++ svint64_t vec; ++ ++ va_start (va, ptr); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ vec = va_arg (va, svint64_t); ++ va_end (va); ++ svst1 (svptrue_b8 (), ptr, vec); ++} ++ ++/* ++** caller_8: ++** ... ++** fmov (z[0-9]+\.d), #9\.0[^\n]* ++** ... ++** st1d \1, p[0-7], \[(x[0-9]+)\] ++** ... ++** str \2, \[sp\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++caller_8 (int64_t *ptr) ++{ ++ callee_8 (ptr, 1, 2, 3, 4, 5, 6, 7, svdup_f64 (9)); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s16.c +new file mode 100644 +index 000000000..173063833 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s16.c +@@ -0,0 +1,170 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ ++ ++#include ++#include ++ ++/* ++** callee_0: ++** ... ++** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x1\] ++** ... ++** st1h \1, \2, \[x0\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++callee_0 (int16_t *ptr, ...) ++{ ++ va_list va; ++ svint16_t vec; ++ ++ va_start (va, ptr); ++ vec = va_arg (va, svint16_t); ++ va_end (va); ++ svst1 (svptrue_b8 (), ptr, vec); ++} ++ ++/* ++** caller_0: ++** ... ++** mov (z[0-9]+\.h), #42 ++** ... ++** st1h \1, p[0-7], \[x1\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++caller_0 (int16_t *ptr) ++{ ++ callee_0 (ptr, svdup_s16 (42)); ++} ++ ++/* ++** callee_1: ++** ... ++** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x2\] ++** ... ++** st1h \1, p[0-7], \[x0\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++callee_1 (int16_t *ptr, ...) ++{ ++ va_list va; ++ svint16_t vec; ++ ++ va_start (va, ptr); ++ va_arg (va, int); ++ vec = va_arg (va, svint16_t); ++ va_end (va); ++ svst1 (svptrue_b8 (), ptr, vec); ++} ++ ++/* ++** caller_1: ++** ... ++** mov (z[0-9]+\.h), #42 ++** ... ++** st1h \1, p[0-7], \[x2\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++caller_1 (int16_t *ptr) ++{ ++ callee_1 (ptr, 1, svdup_s16 (42)); ++} ++ ++/* ++** callee_7: ++** ... ++** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x7\] ++** ... ++** st1h \1, p[0-7], \[x0\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++callee_7 (int16_t *ptr, ...) ++{ ++ va_list va; ++ svint16_t vec; ++ ++ va_start (va, ptr); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ vec = va_arg (va, svint16_t); ++ va_end (va); ++ svst1 (svptrue_b8 (), ptr, vec); ++} ++ ++/* ++** caller_7: ++** ... ++** mov (z[0-9]+\.h), #42 ++** ... ++** st1h \1, p[0-7], \[x7\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++caller_7 (int16_t *ptr) ++{ ++ callee_7 (ptr, 1, 2, 3, 4, 5, 6, svdup_s16 (42)); ++} ++ ++/* FIXME: We should be able to get rid of the va_list object. */ ++/* ++** callee_8: ++** sub sp, sp, #([0-9]+) ++** ... ++** ldr (x[0-9]+), \[sp, \1\] ++** ... ++** ld1h (z[0-9]+\.h), (p[0-7])/z, \[\2\] ++** ... ++** st1h \3, \4, \[x0\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++callee_8 (int16_t *ptr, ...) ++{ ++ va_list va; ++ svint16_t vec; ++ ++ va_start (va, ptr); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ vec = va_arg (va, svint16_t); ++ va_end (va); ++ svst1 (svptrue_b8 (), ptr, vec); ++} ++ ++/* ++** caller_8: ++** ... ++** mov (z[0-9]+\.h), #42 ++** ... ++** st1h \1, p[0-7], \[(x[0-9]+)\] ++** ... ++** str \2, \[sp\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++caller_8 (int16_t *ptr) ++{ ++ callee_8 (ptr, 1, 2, 3, 4, 5, 6, 7, svdup_s16 (42)); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s32.c +new file mode 100644 +index 000000000..d93db8fc8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s32.c +@@ -0,0 +1,170 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ ++ ++#include ++#include ++ ++/* ++** callee_0: ++** ... ++** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x1\] ++** ... ++** st1w \1, \2, \[x0\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++callee_0 (int32_t *ptr, ...) ++{ ++ va_list va; ++ svint32_t vec; ++ ++ va_start (va, ptr); ++ vec = va_arg (va, svint32_t); ++ va_end (va); ++ svst1 (svptrue_b8 (), ptr, vec); ++} ++ ++/* ++** caller_0: ++** ... ++** mov (z[0-9]+\.s), #42 ++** ... ++** st1w \1, p[0-7], \[x1\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++caller_0 (int32_t *ptr) ++{ ++ callee_0 (ptr, svdup_s32 (42)); ++} ++ ++/* ++** callee_1: ++** ... ++** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x2\] ++** ... ++** st1w \1, p[0-7], \[x0\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++callee_1 (int32_t *ptr, ...) ++{ ++ va_list va; ++ svint32_t vec; ++ ++ va_start (va, ptr); ++ va_arg (va, int); ++ vec = va_arg (va, svint32_t); ++ va_end (va); ++ svst1 (svptrue_b8 (), ptr, vec); ++} ++ ++/* ++** caller_1: ++** ... ++** mov (z[0-9]+\.s), #42 ++** ... ++** st1w \1, p[0-7], \[x2\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++caller_1 (int32_t *ptr) ++{ ++ callee_1 (ptr, 1, svdup_s32 (42)); ++} ++ ++/* ++** callee_7: ++** ... ++** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x7\] ++** ... ++** st1w \1, p[0-7], \[x0\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++callee_7 (int32_t *ptr, ...) ++{ ++ va_list va; ++ svint32_t vec; ++ ++ va_start (va, ptr); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ vec = va_arg (va, svint32_t); ++ va_end (va); ++ svst1 (svptrue_b8 (), ptr, vec); ++} ++ ++/* ++** caller_7: ++** ... ++** mov (z[0-9]+\.s), #42 ++** ... ++** st1w \1, p[0-7], \[x7\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++caller_7 (int32_t *ptr) ++{ ++ callee_7 (ptr, 1, 2, 3, 4, 5, 6, svdup_s32 (42)); ++} ++ ++/* FIXME: We should be able to get rid of the va_list object. */ ++/* ++** callee_8: ++** sub sp, sp, #([0-9]+) ++** ... ++** ldr (x[0-9]+), \[sp, \1\] ++** ... ++** ld1w (z[0-9]+\.s), (p[0-7])/z, \[\2\] ++** ... ++** st1w \3, \4, \[x0\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++callee_8 (int32_t *ptr, ...) ++{ ++ va_list va; ++ svint32_t vec; ++ ++ va_start (va, ptr); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ vec = va_arg (va, svint32_t); ++ va_end (va); ++ svst1 (svptrue_b8 (), ptr, vec); ++} ++ ++/* ++** caller_8: ++** ... ++** mov (z[0-9]+\.s), #42 ++** ... ++** st1w \1, p[0-7], \[(x[0-9]+)\] ++** ... ++** str \2, \[sp\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++caller_8 (int32_t *ptr) ++{ ++ callee_8 (ptr, 1, 2, 3, 4, 5, 6, 7, svdup_s32 (42)); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s64.c +new file mode 100644 +index 000000000..b8c77455d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s64.c +@@ -0,0 +1,170 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ ++ ++#include ++#include ++ ++/* ++** callee_0: ++** ... ++** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x1\] ++** ... ++** st1d \1, \2, \[x0\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++callee_0 (int64_t *ptr, ...) ++{ ++ va_list va; ++ svint64_t vec; ++ ++ va_start (va, ptr); ++ vec = va_arg (va, svint64_t); ++ va_end (va); ++ svst1 (svptrue_b8 (), ptr, vec); ++} ++ ++/* ++** caller_0: ++** ... ++** mov (z[0-9]+\.d), #42 ++** ... ++** st1d \1, p[0-7], \[x1\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++caller_0 (int64_t *ptr) ++{ ++ callee_0 (ptr, svdup_s64 (42)); ++} ++ ++/* ++** callee_1: ++** ... ++** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x2\] ++** ... ++** st1d \1, p[0-7], \[x0\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++callee_1 (int64_t *ptr, ...) ++{ ++ va_list va; ++ svint64_t vec; ++ ++ va_start (va, ptr); ++ va_arg (va, int); ++ vec = va_arg (va, svint64_t); ++ va_end (va); ++ svst1 (svptrue_b8 (), ptr, vec); ++} ++ ++/* ++** caller_1: ++** ... ++** mov (z[0-9]+\.d), #42 ++** ... ++** st1d \1, p[0-7], \[x2\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++caller_1 (int64_t *ptr) ++{ ++ callee_1 (ptr, 1, svdup_s64 (42)); ++} ++ ++/* ++** callee_7: ++** ... ++** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x7\] ++** ... ++** st1d \1, p[0-7], \[x0\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++callee_7 (int64_t *ptr, ...) ++{ ++ va_list va; ++ svint64_t vec; ++ ++ va_start (va, ptr); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ vec = va_arg (va, svint64_t); ++ va_end (va); ++ svst1 (svptrue_b8 (), ptr, vec); ++} ++ ++/* ++** caller_7: ++** ... ++** mov (z[0-9]+\.d), #42 ++** ... ++** st1d \1, p[0-7], \[x7\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++caller_7 (int64_t *ptr) ++{ ++ callee_7 (ptr, 1, 2, 3, 4, 5, 6, svdup_s64 (42)); ++} ++ ++/* FIXME: We should be able to get rid of the va_list object. */ ++/* ++** callee_8: ++** sub sp, sp, #([0-9]+) ++** ... ++** ldr (x[0-9]+), \[sp, \1\] ++** ... ++** ld1d (z[0-9]+\.d), (p[0-7])/z, \[\2\] ++** ... ++** st1d \3, \4, \[x0\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++callee_8 (int64_t *ptr, ...) ++{ ++ va_list va; ++ svint64_t vec; ++ ++ va_start (va, ptr); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ vec = va_arg (va, svint64_t); ++ va_end (va); ++ svst1 (svptrue_b8 (), ptr, vec); ++} ++ ++/* ++** caller_8: ++** ... ++** mov (z[0-9]+\.d), #42 ++** ... ++** st1d \1, p[0-7], \[(x[0-9]+)\] ++** ... ++** str \2, \[sp\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++caller_8 (int64_t *ptr) ++{ ++ callee_8 (ptr, 1, 2, 3, 4, 5, 6, 7, svdup_s64 (42)); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s8.c +new file mode 100644 +index 000000000..de7cbe37d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s8.c +@@ -0,0 +1,170 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ ++ ++#include ++#include ++ ++/* ++** callee_0: ++** ... ++** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x1\] ++** ... ++** st1b \1, \2, \[x0\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++callee_0 (int8_t *ptr, ...) ++{ ++ va_list va; ++ svint8_t vec; ++ ++ va_start (va, ptr); ++ vec = va_arg (va, svint8_t); ++ va_end (va); ++ svst1 (svptrue_b8 (), ptr, vec); ++} ++ ++/* ++** caller_0: ++** ... ++** mov (z[0-9]+\.b), #42 ++** ... ++** st1b \1, p[0-7], \[x1\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++caller_0 (int8_t *ptr) ++{ ++ callee_0 (ptr, svdup_s8 (42)); ++} ++ ++/* ++** callee_1: ++** ... ++** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x2\] ++** ... ++** st1b \1, p[0-7], \[x0\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++callee_1 (int8_t *ptr, ...) ++{ ++ va_list va; ++ svint8_t vec; ++ ++ va_start (va, ptr); ++ va_arg (va, int); ++ vec = va_arg (va, svint8_t); ++ va_end (va); ++ svst1 (svptrue_b8 (), ptr, vec); ++} ++ ++/* ++** caller_1: ++** ... ++** mov (z[0-9]+\.b), #42 ++** ... ++** st1b \1, p[0-7], \[x2\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++caller_1 (int8_t *ptr) ++{ ++ callee_1 (ptr, 1, svdup_s8 (42)); ++} ++ ++/* ++** callee_7: ++** ... ++** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x7\] ++** ... ++** st1b \1, p[0-7], \[x0\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++callee_7 (int8_t *ptr, ...) ++{ ++ va_list va; ++ svint8_t vec; ++ ++ va_start (va, ptr); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ vec = va_arg (va, svint8_t); ++ va_end (va); ++ svst1 (svptrue_b8 (), ptr, vec); ++} ++ ++/* ++** caller_7: ++** ... ++** mov (z[0-9]+\.b), #42 ++** ... ++** st1b \1, p[0-7], \[x7\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++caller_7 (int8_t *ptr) ++{ ++ callee_7 (ptr, 1, 2, 3, 4, 5, 6, svdup_s8 (42)); ++} ++ ++/* FIXME: We should be able to get rid of the va_list object. */ ++/* ++** callee_8: ++** sub sp, sp, #([0-9]+) ++** ... ++** ldr (x[0-9]+), \[sp, \1\] ++** ... ++** ld1b (z[0-9]+\.b), (p[0-7])/z, \[\2\] ++** ... ++** st1b \3, \4, \[x0\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++callee_8 (int8_t *ptr, ...) ++{ ++ va_list va; ++ svint8_t vec; ++ ++ va_start (va, ptr); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ vec = va_arg (va, svint8_t); ++ va_end (va); ++ svst1 (svptrue_b8 (), ptr, vec); ++} ++ ++/* ++** caller_8: ++** ... ++** mov (z[0-9]+\.b), #42 ++** ... ++** st1b \1, p[0-7], \[(x[0-9]+)\] ++** ... ++** str \2, \[sp\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++caller_8 (int8_t *ptr) ++{ ++ callee_8 (ptr, 1, 2, 3, 4, 5, 6, 7, svdup_s8 (42)); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u16.c +new file mode 100644 +index 000000000..59c9ca7db +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u16.c +@@ -0,0 +1,170 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ ++ ++#include ++#include ++ ++/* ++** callee_0: ++** ... ++** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x1\] ++** ... ++** st1h \1, \2, \[x0\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++callee_0 (int16_t *ptr, ...) ++{ ++ va_list va; ++ svint16_t vec; ++ ++ va_start (va, ptr); ++ vec = va_arg (va, svint16_t); ++ va_end (va); ++ svst1 (svptrue_b8 (), ptr, vec); ++} ++ ++/* ++** caller_0: ++** ... ++** mov (z[0-9]+\.h), #42 ++** ... ++** st1h \1, p[0-7], \[x1\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++caller_0 (int16_t *ptr) ++{ ++ callee_0 (ptr, svdup_u16 (42)); ++} ++ ++/* ++** callee_1: ++** ... ++** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x2\] ++** ... ++** st1h \1, p[0-7], \[x0\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++callee_1 (int16_t *ptr, ...) ++{ ++ va_list va; ++ svint16_t vec; ++ ++ va_start (va, ptr); ++ va_arg (va, int); ++ vec = va_arg (va, svint16_t); ++ va_end (va); ++ svst1 (svptrue_b8 (), ptr, vec); ++} ++ ++/* ++** caller_1: ++** ... ++** mov (z[0-9]+\.h), #42 ++** ... ++** st1h \1, p[0-7], \[x2\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++caller_1 (int16_t *ptr) ++{ ++ callee_1 (ptr, 1, svdup_u16 (42)); ++} ++ ++/* ++** callee_7: ++** ... ++** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x7\] ++** ... ++** st1h \1, p[0-7], \[x0\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++callee_7 (int16_t *ptr, ...) ++{ ++ va_list va; ++ svint16_t vec; ++ ++ va_start (va, ptr); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ vec = va_arg (va, svint16_t); ++ va_end (va); ++ svst1 (svptrue_b8 (), ptr, vec); ++} ++ ++/* ++** caller_7: ++** ... ++** mov (z[0-9]+\.h), #42 ++** ... ++** st1h \1, p[0-7], \[x7\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++caller_7 (int16_t *ptr) ++{ ++ callee_7 (ptr, 1, 2, 3, 4, 5, 6, svdup_u16 (42)); ++} ++ ++/* FIXME: We should be able to get rid of the va_list object. */ ++/* ++** callee_8: ++** sub sp, sp, #([0-9]+) ++** ... ++** ldr (x[0-9]+), \[sp, \1\] ++** ... ++** ld1h (z[0-9]+\.h), (p[0-7])/z, \[\2\] ++** ... ++** st1h \3, \4, \[x0\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++callee_8 (int16_t *ptr, ...) ++{ ++ va_list va; ++ svint16_t vec; ++ ++ va_start (va, ptr); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ vec = va_arg (va, svint16_t); ++ va_end (va); ++ svst1 (svptrue_b8 (), ptr, vec); ++} ++ ++/* ++** caller_8: ++** ... ++** mov (z[0-9]+\.h), #42 ++** ... ++** st1h \1, p[0-7], \[(x[0-9]+)\] ++** ... ++** str \2, \[sp\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++caller_8 (int16_t *ptr) ++{ ++ callee_8 (ptr, 1, 2, 3, 4, 5, 6, 7, svdup_u16 (42)); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u32.c +new file mode 100644 +index 000000000..3050ad5f6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u32.c +@@ -0,0 +1,170 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ ++ ++#include ++#include ++ ++/* ++** callee_0: ++** ... ++** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x1\] ++** ... ++** st1w \1, \2, \[x0\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++callee_0 (int32_t *ptr, ...) ++{ ++ va_list va; ++ svint32_t vec; ++ ++ va_start (va, ptr); ++ vec = va_arg (va, svint32_t); ++ va_end (va); ++ svst1 (svptrue_b8 (), ptr, vec); ++} ++ ++/* ++** caller_0: ++** ... ++** mov (z[0-9]+\.s), #42 ++** ... ++** st1w \1, p[0-7], \[x1\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++caller_0 (int32_t *ptr) ++{ ++ callee_0 (ptr, svdup_u32 (42)); ++} ++ ++/* ++** callee_1: ++** ... ++** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x2\] ++** ... ++** st1w \1, p[0-7], \[x0\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++callee_1 (int32_t *ptr, ...) ++{ ++ va_list va; ++ svint32_t vec; ++ ++ va_start (va, ptr); ++ va_arg (va, int); ++ vec = va_arg (va, svint32_t); ++ va_end (va); ++ svst1 (svptrue_b8 (), ptr, vec); ++} ++ ++/* ++** caller_1: ++** ... ++** mov (z[0-9]+\.s), #42 ++** ... ++** st1w \1, p[0-7], \[x2\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++caller_1 (int32_t *ptr) ++{ ++ callee_1 (ptr, 1, svdup_u32 (42)); ++} ++ ++/* ++** callee_7: ++** ... ++** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x7\] ++** ... ++** st1w \1, p[0-7], \[x0\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++callee_7 (int32_t *ptr, ...) ++{ ++ va_list va; ++ svint32_t vec; ++ ++ va_start (va, ptr); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ vec = va_arg (va, svint32_t); ++ va_end (va); ++ svst1 (svptrue_b8 (), ptr, vec); ++} ++ ++/* ++** caller_7: ++** ... ++** mov (z[0-9]+\.s), #42 ++** ... ++** st1w \1, p[0-7], \[x7\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++caller_7 (int32_t *ptr) ++{ ++ callee_7 (ptr, 1, 2, 3, 4, 5, 6, svdup_u32 (42)); ++} ++ ++/* FIXME: We should be able to get rid of the va_list object. */ ++/* ++** callee_8: ++** sub sp, sp, #([0-9]+) ++** ... ++** ldr (x[0-9]+), \[sp, \1\] ++** ... ++** ld1w (z[0-9]+\.s), (p[0-7])/z, \[\2\] ++** ... ++** st1w \3, \4, \[x0\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++callee_8 (int32_t *ptr, ...) ++{ ++ va_list va; ++ svint32_t vec; ++ ++ va_start (va, ptr); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ vec = va_arg (va, svint32_t); ++ va_end (va); ++ svst1 (svptrue_b8 (), ptr, vec); ++} ++ ++/* ++** caller_8: ++** ... ++** mov (z[0-9]+\.s), #42 ++** ... ++** st1w \1, p[0-7], \[(x[0-9]+)\] ++** ... ++** str \2, \[sp\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++caller_8 (int32_t *ptr) ++{ ++ callee_8 (ptr, 1, 2, 3, 4, 5, 6, 7, svdup_u32 (42)); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u64.c +new file mode 100644 +index 000000000..94322a34c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u64.c +@@ -0,0 +1,170 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ ++ ++#include ++#include ++ ++/* ++** callee_0: ++** ... ++** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x1\] ++** ... ++** st1d \1, \2, \[x0\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++callee_0 (int64_t *ptr, ...) ++{ ++ va_list va; ++ svint64_t vec; ++ ++ va_start (va, ptr); ++ vec = va_arg (va, svint64_t); ++ va_end (va); ++ svst1 (svptrue_b8 (), ptr, vec); ++} ++ ++/* ++** caller_0: ++** ... ++** mov (z[0-9]+\.d), #42 ++** ... ++** st1d \1, p[0-7], \[x1\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++caller_0 (int64_t *ptr) ++{ ++ callee_0 (ptr, svdup_u64 (42)); ++} ++ ++/* ++** callee_1: ++** ... ++** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x2\] ++** ... ++** st1d \1, p[0-7], \[x0\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++callee_1 (int64_t *ptr, ...) ++{ ++ va_list va; ++ svint64_t vec; ++ ++ va_start (va, ptr); ++ va_arg (va, int); ++ vec = va_arg (va, svint64_t); ++ va_end (va); ++ svst1 (svptrue_b8 (), ptr, vec); ++} ++ ++/* ++** caller_1: ++** ... ++** mov (z[0-9]+\.d), #42 ++** ... ++** st1d \1, p[0-7], \[x2\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++caller_1 (int64_t *ptr) ++{ ++ callee_1 (ptr, 1, svdup_u64 (42)); ++} ++ ++/* ++** callee_7: ++** ... ++** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x7\] ++** ... ++** st1d \1, p[0-7], \[x0\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++callee_7 (int64_t *ptr, ...) ++{ ++ va_list va; ++ svint64_t vec; ++ ++ va_start (va, ptr); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ vec = va_arg (va, svint64_t); ++ va_end (va); ++ svst1 (svptrue_b8 (), ptr, vec); ++} ++ ++/* ++** caller_7: ++** ... ++** mov (z[0-9]+\.d), #42 ++** ... ++** st1d \1, p[0-7], \[x7\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++caller_7 (int64_t *ptr) ++{ ++ callee_7 (ptr, 1, 2, 3, 4, 5, 6, svdup_u64 (42)); ++} ++ ++/* FIXME: We should be able to get rid of the va_list object. */ ++/* ++** callee_8: ++** sub sp, sp, #([0-9]+) ++** ... ++** ldr (x[0-9]+), \[sp, \1\] ++** ... ++** ld1d (z[0-9]+\.d), (p[0-7])/z, \[\2\] ++** ... ++** st1d \3, \4, \[x0\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++callee_8 (int64_t *ptr, ...) ++{ ++ va_list va; ++ svint64_t vec; ++ ++ va_start (va, ptr); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ vec = va_arg (va, svint64_t); ++ va_end (va); ++ svst1 (svptrue_b8 (), ptr, vec); ++} ++ ++/* ++** caller_8: ++** ... ++** mov (z[0-9]+\.d), #42 ++** ... ++** st1d \1, p[0-7], \[(x[0-9]+)\] ++** ... ++** str \2, \[sp\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++caller_8 (int64_t *ptr) ++{ ++ callee_8 (ptr, 1, 2, 3, 4, 5, 6, 7, svdup_u64 (42)); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u8.c +new file mode 100644 +index 000000000..cf8ac2171 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u8.c +@@ -0,0 +1,170 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fno-stack-clash-protection -g" } */ ++/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ ++ ++#include ++#include ++ ++/* ++** callee_0: ++** ... ++** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x1\] ++** ... ++** st1b \1, \2, \[x0\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++callee_0 (int8_t *ptr, ...) ++{ ++ va_list va; ++ svint8_t vec; ++ ++ va_start (va, ptr); ++ vec = va_arg (va, svint8_t); ++ va_end (va); ++ svst1 (svptrue_b8 (), ptr, vec); ++} ++ ++/* ++** caller_0: ++** ... ++** mov (z[0-9]+\.b), #42 ++** ... ++** st1b \1, p[0-7], \[x1\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++caller_0 (int8_t *ptr) ++{ ++ callee_0 (ptr, svdup_u8 (42)); ++} ++ ++/* ++** callee_1: ++** ... ++** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x2\] ++** ... ++** st1b \1, p[0-7], \[x0\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++callee_1 (int8_t *ptr, ...) ++{ ++ va_list va; ++ svint8_t vec; ++ ++ va_start (va, ptr); ++ va_arg (va, int); ++ vec = va_arg (va, svint8_t); ++ va_end (va); ++ svst1 (svptrue_b8 (), ptr, vec); ++} ++ ++/* ++** caller_1: ++** ... ++** mov (z[0-9]+\.b), #42 ++** ... ++** st1b \1, p[0-7], \[x2\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++caller_1 (int8_t *ptr) ++{ ++ callee_1 (ptr, 1, svdup_u8 (42)); ++} ++ ++/* ++** callee_7: ++** ... ++** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x7\] ++** ... ++** st1b \1, p[0-7], \[x0\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++callee_7 (int8_t *ptr, ...) ++{ ++ va_list va; ++ svint8_t vec; ++ ++ va_start (va, ptr); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ vec = va_arg (va, svint8_t); ++ va_end (va); ++ svst1 (svptrue_b8 (), ptr, vec); ++} ++ ++/* ++** caller_7: ++** ... ++** mov (z[0-9]+\.b), #42 ++** ... ++** st1b \1, p[0-7], \[x7\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++caller_7 (int8_t *ptr) ++{ ++ callee_7 (ptr, 1, 2, 3, 4, 5, 6, svdup_u8 (42)); ++} ++ ++/* FIXME: We should be able to get rid of the va_list object. */ ++/* ++** callee_8: ++** sub sp, sp, #([0-9]+) ++** ... ++** ldr (x[0-9]+), \[sp, \1\] ++** ... ++** ld1b (z[0-9]+\.b), (p[0-7])/z, \[\2\] ++** ... ++** st1b \3, \4, \[x0\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++callee_8 (int8_t *ptr, ...) ++{ ++ va_list va; ++ svint8_t vec; ++ ++ va_start (va, ptr); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ va_arg (va, int); ++ vec = va_arg (va, svint8_t); ++ va_end (va); ++ svst1 (svptrue_b8 (), ptr, vec); ++} ++ ++/* ++** caller_8: ++** ... ++** mov (z[0-9]+\.b), #42 ++** ... ++** st1b \1, p[0-7], \[(x[0-9]+)\] ++** ... ++** str \2, \[sp\] ++** ... ++** ret ++*/ ++void __attribute__((noipa)) ++caller_8 (int8_t *ptr) ++{ ++ callee_8 (ptr, 1, 2, 3, 4, 5, 6, 7, svdup_u8 (42)); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_3_nosc.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_3_nosc.c +new file mode 100644 +index 000000000..cea69cc88 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_3_nosc.c +@@ -0,0 +1,75 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O0 -g" } */ ++ ++#include ++#include ++ ++void __attribute__((noipa)) ++callee (int foo, ...) ++{ ++ va_list va; ++ svbool_t pg, p; ++ svint8_t s8; ++ svuint16x4_t u16; ++ svfloat32x3_t f32; ++ svint64x2_t s64; ++ ++ va_start (va, foo); ++ p = va_arg (va, svbool_t); ++ s8 = va_arg (va, svint8_t); ++ u16 = va_arg (va, svuint16x4_t); ++ f32 = va_arg (va, svfloat32x3_t); ++ s64 = va_arg (va, svint64x2_t); ++ ++ pg = svptrue_b8 (); ++ ++ if (svptest_any (pg, sveor_z (pg, p, svptrue_pat_b8 (SV_VL7)))) ++ __builtin_abort (); ++ ++ if (svptest_any (pg, svcmpne (pg, s8, svindex_s8 (1, 2)))) ++ __builtin_abort (); ++ ++ if (svptest_any (pg, svcmpne (pg, svget4 (u16, 0), svindex_u16 (2, 3)))) ++ __builtin_abort (); ++ ++ if (svptest_any (pg, svcmpne (pg, svget4 (u16, 1), svindex_u16 (3, 4)))) ++ __builtin_abort (); ++ ++ if (svptest_any (pg, svcmpne (pg, svget4 (u16, 2), svindex_u16 (4, 5)))) ++ __builtin_abort (); ++ ++ if (svptest_any (pg, svcmpne (pg, svget4 (u16, 3), svindex_u16 (5, 6)))) ++ __builtin_abort (); ++ ++ if (svptest_any (pg, svcmpne (pg, svget3 (f32, 0), svdup_f32 (1.0)))) ++ __builtin_abort (); ++ ++ if (svptest_any (pg, svcmpne (pg, svget3 (f32, 1), svdup_f32 (2.0)))) ++ __builtin_abort (); ++ ++ if (svptest_any (pg, svcmpne (pg, svget3 (f32, 2), svdup_f32 (3.0)))) ++ __builtin_abort (); ++ ++ if (svptest_any (pg, svcmpne (pg, svget2 (s64, 0), svindex_s64 (6, 7)))) ++ __builtin_abort (); ++ ++ if (svptest_any (pg, svcmpne (pg, svget2 (s64, 1), svindex_s64 (7, 8)))) ++ __builtin_abort (); ++} ++ ++int __attribute__((noipa)) ++main (void) ++{ ++ callee (100, ++ svptrue_pat_b8 (SV_VL7), ++ svindex_s8 (1, 2), ++ svcreate4 (svindex_u16 (2, 3), ++ svindex_u16 (3, 4), ++ svindex_u16 (4, 5), ++ svindex_u16 (5, 6)), ++ svcreate3 (svdup_f32 (1.0), ++ svdup_f32 (2.0), ++ svdup_f32 (3.0)), ++ svcreate2 (svindex_s64 (6, 7), ++ svindex_s64 (7, 8))); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_3_sc.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_3_sc.c +new file mode 100644 +index 000000000..b939aa5ea +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_3_sc.c +@@ -0,0 +1,75 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O0 -fstack-clash-protection -g" } */ ++ ++#include ++#include ++ ++void __attribute__((noipa)) ++callee (int foo, ...) ++{ ++ va_list va; ++ svbool_t pg, p; ++ svint8_t s8; ++ svuint16x4_t u16; ++ svfloat32x3_t f32; ++ svint64x2_t s64; ++ ++ va_start (va, foo); ++ p = va_arg (va, svbool_t); ++ s8 = va_arg (va, svint8_t); ++ u16 = va_arg (va, svuint16x4_t); ++ f32 = va_arg (va, svfloat32x3_t); ++ s64 = va_arg (va, svint64x2_t); ++ ++ pg = svptrue_b8 (); ++ ++ if (svptest_any (pg, sveor_z (pg, p, svptrue_pat_b8 (SV_VL7)))) ++ __builtin_abort (); ++ ++ if (svptest_any (pg, svcmpne (pg, s8, svindex_s8 (1, 2)))) ++ __builtin_abort (); ++ ++ if (svptest_any (pg, svcmpne (pg, svget4 (u16, 0), svindex_u16 (2, 3)))) ++ __builtin_abort (); ++ ++ if (svptest_any (pg, svcmpne (pg, svget4 (u16, 1), svindex_u16 (3, 4)))) ++ __builtin_abort (); ++ ++ if (svptest_any (pg, svcmpne (pg, svget4 (u16, 2), svindex_u16 (4, 5)))) ++ __builtin_abort (); ++ ++ if (svptest_any (pg, svcmpne (pg, svget4 (u16, 3), svindex_u16 (5, 6)))) ++ __builtin_abort (); ++ ++ if (svptest_any (pg, svcmpne (pg, svget3 (f32, 0), svdup_f32 (1.0)))) ++ __builtin_abort (); ++ ++ if (svptest_any (pg, svcmpne (pg, svget3 (f32, 1), svdup_f32 (2.0)))) ++ __builtin_abort (); ++ ++ if (svptest_any (pg, svcmpne (pg, svget3 (f32, 2), svdup_f32 (3.0)))) ++ __builtin_abort (); ++ ++ if (svptest_any (pg, svcmpne (pg, svget2 (s64, 0), svindex_s64 (6, 7)))) ++ __builtin_abort (); ++ ++ if (svptest_any (pg, svcmpne (pg, svget2 (s64, 1), svindex_s64 (7, 8)))) ++ __builtin_abort (); ++} ++ ++int __attribute__((noipa)) ++main (void) ++{ ++ callee (100, ++ svptrue_pat_b8 (SV_VL7), ++ svindex_s8 (1, 2), ++ svcreate4 (svindex_u16 (2, 3), ++ svindex_u16 (3, 4), ++ svindex_u16 (4, 5), ++ svindex_u16 (5, 6)), ++ svcreate3 (svdup_f32 (1.0), ++ svdup_f32 (2.0), ++ svdup_f32 (3.0)), ++ svcreate2 (svindex_s64 (6, 7), ++ svindex_s64 (7, 8))); ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/vpcs_1.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/vpcs_1.c +new file mode 100644 +index 000000000..d9f4e6c41 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/vpcs_1.c +@@ -0,0 +1,6 @@ ++/* { dg-do compile } */ ++ ++__attribute__ ((aarch64_vector_pcs)) void f1 (__SVBool_t); /* { dg-error {the 'aarch64_vector_pcs' attribute cannot be applied to an SVE function type} } */ ++__attribute__ ((aarch64_vector_pcs)) void f2 (__SVInt8_t s8) {} /* { dg-error {the 'aarch64_vector_pcs' attribute cannot be applied to an SVE function type} } */ ++__attribute__ ((aarch64_vector_pcs)) void (*f3) (__SVInt16_t); /* { dg-error {the 'aarch64_vector_pcs' attribute cannot be applied to an SVE function type} } */ ++typedef __attribute__ ((aarch64_vector_pcs)) void (*f4) (__SVInt32_t); /* { dg-error {the 'aarch64_vector_pcs' attribute cannot be applied to an SVE function type} } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_1.c b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_1.c +index a064c337b..156d04ae5 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_1.c +@@ -25,3 +25,4 @@ foo (void) + /* We should use an induction that starts at -5, with only the last + 7 elements of the first iteration being active. */ + /* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #-5, #5\n} } } */ ++/* { dg-final { scan-assembler {\tptrue\t(p[0-9]+\.b), vl1\n.*\tnot\tp[0-7]\.b, p[0-7]/z, \1\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_2.c b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_2.c +index f2113be90..e792cdf2c 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_2.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_2.c +@@ -20,3 +20,4 @@ foo (void) + /* { dg-final { scan-assembler {\t(adrp|adr)\tx[0-9]+, x\n} } } */ + /* We should unroll the loop three times. */ + /* { dg-final { scan-assembler-times "\tst1w\t" 3 } } */ ++/* { dg-final { scan-assembler {\tptrue\t(p[0-9]+)\.s, vl7\n.*\teor\tp[0-7]\.b, (p[0-7])/z, (\1\.b, \2\.b|\2\.b, \1\.b)\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/revb_1.c b/gcc/testsuite/gcc.target/aarch64/sve/revb_1.c +index 1a3d9b4ea..9cf2f27c8 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/revb_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/revb_1.c +@@ -1,9 +1,7 @@ + /* { dg-do assemble { target aarch64_asm_sve_ok } } */ +-/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */ ++/* { dg-options "-O -msve-vector-bits=256 --save-temps -mlittle-endian" } */ + +-#include +- +-typedef int8_t vnx16qi __attribute__((vector_size (32))); ++typedef __INT8_TYPE__ vnx16qi __attribute__((vector_size (32))); + + #define MASK_2(X, Y) (X) ^ (Y), (X + 1) ^ (Y) + #define MASK_4(X, Y) MASK_2 (X, Y), MASK_2 (X + 2, Y) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/revb_2.c b/gcc/testsuite/gcc.target/aarch64/sve/revb_2.c +new file mode 100644 +index 000000000..389739cc8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/revb_2.c +@@ -0,0 +1,10 @@ ++/* { dg-do assemble { target aarch64_asm_sve_ok } } */ ++/* { dg-options "-O -msve-vector-bits=256 --save-temps -mbig-endian" } */ ++ ++#include "revb_1.c" ++ ++/* { dg-final { scan-assembler-not {\ttbl\t} } } */ ++ ++/* { dg-final { scan-assembler-times {\trevb\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d} 1 } } */ ++/* { dg-final { scan-assembler-times {\trevb\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s} 1 } } */ ++/* { dg-final { scan-assembler-times {\trevb\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h} 1 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/revh_1.c b/gcc/testsuite/gcc.target/aarch64/sve/revh_1.c +index 76145812b..28a0399b9 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/revh_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/revh_1.c +@@ -1,9 +1,7 @@ + /* { dg-do assemble { target aarch64_asm_sve_ok } } */ +-/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */ ++/* { dg-options "-O -msve-vector-bits=256 --save-temps -mlittle-endian" } */ + +-#include +- +-typedef uint16_t vnx8hi __attribute__((vector_size (32))); ++typedef __UINT16_TYPE__ vnx8hi __attribute__((vector_size (32))); + typedef _Float16 vnx8hf __attribute__((vector_size (32))); + + #define MASK_2(X, Y) (X) ^ (Y), (X + 1) ^ (Y) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/revh_2.c b/gcc/testsuite/gcc.target/aarch64/sve/revh_2.c +new file mode 100644 +index 000000000..e821b6402 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/revh_2.c +@@ -0,0 +1,9 @@ ++/* { dg-do assemble { target aarch64_asm_sve_ok } } */ ++/* { dg-options "-O -msve-vector-bits=256 --save-temps -mbig-endian" } */ ++ ++#include "revh_1.c" ++ ++/* { dg-final { scan-assembler-not {\ttbl\t} } } */ ++ ++/* { dg-final { scan-assembler-times {\trevh\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d} 2 } } */ ++/* { dg-final { scan-assembler-times {\trevh\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s} 2 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/revw_1.c b/gcc/testsuite/gcc.target/aarch64/sve/revw_1.c +index 8ac68b782..de926753c 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/revw_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/revw_1.c +@@ -1,9 +1,7 @@ + /* { dg-do assemble { target aarch64_asm_sve_ok } } */ +-/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */ ++/* { dg-options "-O -msve-vector-bits=256 --save-temps -mlittle-endian" } */ + +-#include +- +-typedef uint32_t vnx4si __attribute__((vector_size (32))); ++typedef __UINT32_TYPE__ vnx4si __attribute__((vector_size (32))); + typedef float vnx4sf __attribute__((vector_size (32))); + + #define MASK_2(X, Y) (X) ^ (Y), (X + 1) ^ (Y) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/revw_2.c b/gcc/testsuite/gcc.target/aarch64/sve/revw_2.c +new file mode 100644 +index 000000000..17243c05c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/revw_2.c +@@ -0,0 +1,8 @@ ++/* { dg-do assemble { target aarch64_asm_sve_ok } } */ ++/* { dg-options "-O -msve-vector-bits=256 --save-temps -mbig-endian" } */ ++ ++#include "revw_1.c" ++ ++/* { dg-final { scan-assembler-not {\ttbl\t} } } */ ++ ++/* { dg-final { scan-assembler-times {\trevw\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d} 2 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/sad_1.c b/gcc/testsuite/gcc.target/aarch64/sve/sad_1.c +new file mode 100644 +index 000000000..e7bf64a57 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/sad_1.c +@@ -0,0 +1,28 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define DEF_SAD(TYPE1, TYPE2) \ ++TYPE1 __attribute__ ((noinline, noclone)) \ ++sum_abs_##TYPE1##_##TYPE2 (TYPE2 *restrict x, TYPE2 *restrict y, int n) \ ++{ \ ++ TYPE1 sum = 0; \ ++ for (int i = 0; i < n; i++) \ ++ { \ ++ sum += __builtin_abs (x[i] - y[i]); \ ++ } \ ++ return sum; \ ++} ++ ++DEF_SAD(int32_t, uint8_t) ++DEF_SAD(int32_t, int8_t) ++DEF_SAD(int64_t, uint16_t) ++DEF_SAD(int64_t, int16_t) ++ ++/* { dg-final { scan-assembler-times {\tuabd\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsabd\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tudot\tz[0-9]+\.s, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tuabd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsabd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tudot\tz[0-9]+\.d, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/sel_1.c b/gcc/testsuite/gcc.target/aarch64/sve/sel_1.c +new file mode 100644 +index 000000000..e651e5b93 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/sel_1.c +@@ -0,0 +1,27 @@ ++/* { dg-do assemble { target aarch64_asm_sve_ok } } */ ++/* { dg-options "-O2 -msve-vector-bits=256 --save-temps" } */ ++ ++#include ++ ++typedef int8_t vnx16qi __attribute__((vector_size (32))); ++ ++/* Predicate vector: 1 0 1 0 ... */ ++ ++#define MASK_32 { 0, 33, 2, 35, 4, 37, 6, 39, 8, 41, \ ++ 10, 43, 12, 45, 14, 47, 16, 49, 18, 51, \ ++ 20, 53, 22, 55, 24, 57, 26, 59, 28, 61, 30, 63 } ++ ++#define INDEX_32 vnx16qi ++ ++#define PERMUTE(type, nunits) \ ++type permute_##type (type x, type y) \ ++{ \ ++ return __builtin_shuffle (x, y, (INDEX_##nunits) MASK_##nunits); \ ++} ++ ++PERMUTE(vnx16qi, 32) ++ ++/* { dg-final { scan-assembler-not {\ttbl\t} } } */ ++ ++/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.b, p[0-9]+, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.h, vl16\n} 1 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/sel_2.c b/gcc/testsuite/gcc.target/aarch64/sve/sel_2.c +new file mode 100644 +index 000000000..05391474a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/sel_2.c +@@ -0,0 +1,41 @@ ++/* { dg-do assemble { target aarch64_asm_sve_ok } } */ ++/* { dg-options "-O2 -msve-vector-bits=256 --save-temps" } */ ++ ++#include ++ ++typedef int8_t vnx16qi __attribute__((vector_size (32))); ++typedef int16_t vnx8hi __attribute__((vector_size (32))); ++typedef int32_t vnx4si __attribute__((vector_size (32))); ++ ++typedef _Float16 vnx8hf __attribute__((vector_size (32))); ++typedef float vnx4sf __attribute__((vector_size (32))); ++ ++/* Predicate vector: 1 0 0 0 ... */ ++ ++#define MASK_32 { 0, 33, 34, 35, 4, 37, 38, 39, 8, 41, 42, 43, 12, \ ++ 45, 46, 47, 16, 49, 50, 51, 20, 53, 54, 55, 24, \ ++ 57, 58, 59, 28, 61, 62, 63 } ++ ++/* Predicate vector: 1 0 1 0 ... */ ++ ++#define MASK_16 {0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31} ++ ++#define INDEX_32 vnx16qi ++#define INDEX_16 vnx8hi ++ ++#define PERMUTE(type, nunits) \ ++type permute_##type (type x, type y) \ ++{ \ ++ return __builtin_shuffle (x, y, (INDEX_##nunits) MASK_##nunits); \ ++} ++ ++PERMUTE(vnx16qi, 32) ++PERMUTE(vnx8hi, 16) ++PERMUTE(vnx8hf, 16) ++ ++/* { dg-final { scan-assembler-not {\ttbl\t} } } */ ++ ++/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.b, p[0-9]+, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-9]+, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ ++ ++/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s, vl8\n} 3 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/sel_3.c b/gcc/testsuite/gcc.target/aarch64/sve/sel_3.c +new file mode 100644 +index 000000000..a87492d9d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/sel_3.c +@@ -0,0 +1,50 @@ ++/* { dg-do assemble { target aarch64_asm_sve_ok } } */ ++/* { dg-options "-O2 -msve-vector-bits=256 --save-temps" } */ ++ ++#include ++ ++typedef int8_t vnx16qi __attribute__((vector_size (32))); ++typedef int16_t vnx8hi __attribute__((vector_size (32))); ++typedef int32_t vnx4si __attribute__((vector_size (32))); ++typedef _Float16 vnx8hf __attribute__((vector_size (32))); ++typedef float vnx4sf __attribute__((vector_size (32))); ++ ++/* Predicate vector: 1 0 0 0 0 0 0 0 ... */ ++ ++#define MASK_32 { 0, 33, 34, 35, 36, 37, 38, 39, \ ++ 8, 41, 42, 43, 44, 45, 46, 47, \ ++ 16, 49, 50, 51, 52, 53, 54, 55, \ ++ 24, 57, 58, 59, 60, 61, 62, 63 } ++ ++/* Predicate vector: 1 0 0 0 ... */ ++ ++#define MASK_16 { 0, 17, 18, 19, 4, 21, 22, 23, \ ++ 8, 25, 26, 27, 12, 29, 30, 31 } ++ ++/* Predicate vector: 1 0 ... */ ++ ++#define MASK_8 { 0, 9, 2, 11, 4, 13, 6, 15 } ++ ++#define INDEX_32 vnx16qi ++#define INDEX_16 vnx8hi ++#define INDEX_8 vnx4si ++ ++#define PERMUTE(type, nunits) \ ++type permute_##type (type x, type y) \ ++{ \ ++ return __builtin_shuffle (x, y, (INDEX_##nunits) MASK_##nunits); \ ++} ++ ++PERMUTE(vnx16qi, 32) ++PERMUTE(vnx8hi, 16) ++PERMUTE(vnx4si, 8) ++PERMUTE(vnx8hf, 16) ++PERMUTE(vnx4sf, 8) ++ ++/* { dg-final { scan-assembler-not {\ttbl\t} } } */ ++ ++/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.b, p[0-9]+, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-9]+, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.s, p[0-9]+, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ ++ ++/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d, vl4\n} 5 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/sel_4.c b/gcc/testsuite/gcc.target/aarch64/sve/sel_4.c +new file mode 100644 +index 000000000..e9bbc5527 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/sel_4.c +@@ -0,0 +1,50 @@ ++/* { dg-do assemble { target aarch64_asm_sve_ok } } */ ++/* { dg-options "-O2 -msve-vector-bits=256 --save-temps" } */ ++ ++#include ++ ++typedef int8_t vnx16qi __attribute__((vector_size (32))); ++typedef int16_t vnx8hi __attribute__((vector_size (32))); ++typedef int32_t vnx4si __attribute__((vector_size (32))); ++typedef int64_t vnx2di __attribute__((vector_size (32))); ++ ++typedef _Float16 vnx8hf __attribute__((vector_size (32))); ++typedef float vnx4sf __attribute__((vector_size (32))); ++typedef double vnx2df __attribute__((vector_size (32))); ++ ++/* Predicate vector: 1 1 0 0 ... */ ++ ++#define MASK_32 { 0, 1, 34, 35, 4, 5, 38, 39, 8, 9, 42, 43, 12, 13, \ ++ 46, 47, 16, 17, 50, 51, 20, 21, 54, 55, 24, 25, \ ++ 58, 59, 28, 29, 62, 63 } ++ ++#define MASK_16 {0, 1, 18, 19, 4, 5, 22, 23, 8, 9, 26, 27, 12, 13, 30, 31} ++#define MASK_8 {0, 1, 10, 11, 4, 5, 14, 15} ++#define MASK_4 {0, 1, 6, 7} ++ ++#define INDEX_32 vnx16qi ++#define INDEX_16 vnx8hi ++#define INDEX_8 vnx4si ++#define INDEX_4 vnx2di ++ ++#define PERMUTE(type, nunits) \ ++type permute_##type (type x, type y) \ ++{ \ ++ return __builtin_shuffle (x, y, (INDEX_##nunits) MASK_##nunits); \ ++} ++ ++PERMUTE(vnx16qi, 32) ++PERMUTE(vnx8hi, 16) ++PERMUTE(vnx4si, 8) ++PERMUTE(vnx2di, 4) ++ ++PERMUTE(vnx8hf, 16) ++PERMUTE(vnx4sf, 8) ++PERMUTE(vnx2df, 4) ++ ++/* { dg-final { scan-assembler-not {\ttbl\t} } } */ ++ ++/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.b, p[0-9]+, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-9]+, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.s, p[0-9]+, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.d, p[0-9]+, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/sel_5.c b/gcc/testsuite/gcc.target/aarch64/sve/sel_5.c +new file mode 100644 +index 000000000..935abb54d +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/sel_5.c +@@ -0,0 +1,50 @@ ++/* { dg-do assemble { target aarch64_asm_sve_ok } } */ ++/* { dg-options "-O2 -msve-vector-bits=256 --save-temps" } */ ++ ++#include ++ ++typedef int8_t vnx16qi __attribute__((vector_size (32))); ++typedef int16_t vnx8hi __attribute__((vector_size (32))); ++typedef int32_t vnx4si __attribute__((vector_size (32))); ++typedef int64_t vnx2di __attribute__((vector_size (32))); ++ ++typedef _Float16 vnx8hf __attribute__((vector_size (32))); ++typedef float vnx4sf __attribute__((vector_size (32))); ++typedef double vnx2df __attribute__((vector_size (32))); ++ ++/* Predicate vector: 1 0 0 1 ... */ ++ ++#define MASK_32 { 0, 33, 34, 3, 4, 37, 38, 7, 8, 41, 42, 11, 12, 45, 46, \ ++ 15, 16, 49, 50, 19, 20, 53, 54, 23, 24, 57, 58, 27, 28, \ ++ 61, 62, 31 } ++ ++#define MASK_16 {0, 17, 18, 3, 4, 21, 22, 7, 8, 25, 26, 11, 12, 29, 30, 15} ++#define MASK_8 {0, 9, 10, 3, 4, 13, 14, 7} ++#define MASK_4 {0, 5, 6, 3} ++ ++#define INDEX_32 vnx16qi ++#define INDEX_16 vnx8hi ++#define INDEX_8 vnx4si ++#define INDEX_4 vnx2di ++ ++#define PERMUTE(type, nunits) \ ++type permute_##type (type x, type y) \ ++{ \ ++ return __builtin_shuffle (x, y, (INDEX_##nunits) MASK_##nunits); \ ++} ++ ++PERMUTE(vnx16qi, 32) ++PERMUTE(vnx8hi, 16) ++PERMUTE(vnx4si, 8) ++PERMUTE(vnx2di, 4) ++ ++PERMUTE(vnx8hf, 16) ++PERMUTE(vnx4sf, 8) ++PERMUTE(vnx2df, 4) ++ ++/* { dg-final { scan-assembler-not {\ttbl\t} } } */ ++ ++/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.b, p[0-9]+, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-9]+, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.s, p[0-9]+, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.d, p[0-9]+, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/sel_6.c b/gcc/testsuite/gcc.target/aarch64/sve/sel_6.c +new file mode 100644 +index 000000000..772938f68 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/sel_6.c +@@ -0,0 +1,42 @@ ++/* { dg-do assemble { target aarch64_asm_sve_ok } } */ ++/* { dg-options "-O2 -msve-vector-bits=256 --save-temps" } */ ++ ++#include ++ ++typedef int32_t vnx4si __attribute__((vector_size (32))); ++typedef int64_t vnx2di __attribute__((vector_size (32))); ++ ++typedef float vnx4sf __attribute__((vector_size (32))); ++typedef double vnx2df __attribute__((vector_size (32))); ++ ++/* Predicate vector: 1 0 0 0 ... */ ++ ++#define MASK_32 { 0, 33, 34, 35, 4, 37, 38, 39, 8, 41, 42, 43, 12, \ ++ 45, 46, 47, 16, 49, 50, 51, 20, 53, 54, 55, 24, \ ++ 57, 58, 59, 28, 61, 62, 63 } ++ ++#define MASK_16 {0, 17, 18, 19, 4, 21, 22, 23, 8, 25, 26, 27, 12, 29, 30, 31} ++#define MASK_8 {0, 9, 10, 11, 4, 13, 14, 15} ++#define MASK_4 {0, 5, 6, 7} ++ ++#define INDEX_8 vnx4si ++#define INDEX_4 vnx2di ++ ++#define PERMUTE(type, nunits) \ ++type permute_##type (type x, type y) \ ++{ \ ++ return __builtin_shuffle (x, y, (INDEX_##nunits) MASK_##nunits); \ ++} ++ ++PERMUTE(vnx4si, 8) ++PERMUTE(vnx2di, 4) ++ ++PERMUTE(vnx4sf, 8) ++PERMUTE(vnx2df, 4) ++ ++/* { dg-final { scan-assembler-not {\ttbl\t} } } */ ++ ++/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.s, p[0-9]+, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.d, p[0-9]+, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ ++ ++/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d, vl4\n} 2 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/shift_1.c b/gcc/testsuite/gcc.target/aarch64/sve/shift_1.c +index f4c5ebd46..5ee66da15 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/shift_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/shift_1.c +@@ -75,9 +75,9 @@ DO_IMMEDIATE_OPS (63, int64_t, 63); + /* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ + /* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ + +-/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ +-/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ +-/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tasrr?\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tlsrr?\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tlslr?\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ + + /* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.b, z[0-9]+\.b, #5\n} 1 } } */ + /* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.b, z[0-9]+\.b, #5\n} 1 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/single_1.c b/gcc/testsuite/gcc.target/aarch64/sve/single_1.c +index 11b88aef7..7764a1b0f 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/single_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/single_1.c +@@ -40,10 +40,7 @@ TEST_LOOP (double, 3.0) + /* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0e\+0\n} 1 } } */ + /* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #3\.0e\+0\n} 1 } } */ + +-/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl32\n} 2 } } */ +-/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.h, vl16\n} 3 } } */ +-/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s, vl8\n} 3 } } */ +-/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d, vl4\n} 3 } } */ ++/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl32\n} 11 } } */ + + /* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b,} 2 } } */ + /* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/single_2.c b/gcc/testsuite/gcc.target/aarch64/sve/single_2.c +index 1fbf4892c..42fc17b73 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/single_2.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/single_2.c +@@ -16,10 +16,7 @@ + /* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0e\+0\n} 1 } } */ + /* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #3\.0e\+0\n} 1 } } */ + +-/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl64\n} 2 } } */ +-/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.h, vl32\n} 3 } } */ +-/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s, vl16\n} 3 } } */ +-/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d, vl8\n} 3 } } */ ++/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl64\n} 11 } } */ + + /* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b,} 2 } } */ + /* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/single_3.c b/gcc/testsuite/gcc.target/aarch64/sve/single_3.c +index a3688b692..338ca1e3d 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/single_3.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/single_3.c +@@ -16,10 +16,7 @@ + /* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0e\+0\n} 1 } } */ + /* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #3\.0e\+0\n} 1 } } */ + +-/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl128\n} 2 } } */ +-/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.h, vl64\n} 3 } } */ +-/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s, vl32\n} 3 } } */ +-/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d, vl16\n} 3 } } */ ++/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl128\n} 11 } } */ + + /* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b,} 2 } } */ + /* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/single_4.c b/gcc/testsuite/gcc.target/aarch64/sve/single_4.c +index 08965d39f..37c78a659 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/single_4.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/single_4.c +@@ -16,10 +16,7 @@ + /* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0e\+0\n} 1 } } */ + /* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #3\.0e\+0\n} 1 } } */ + +-/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl256\n} 2 } } */ +-/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.h, vl128\n} 3 } } */ +-/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s, vl64\n} 3 } } */ +-/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d, vl32\n} 3 } } */ ++/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl256\n} 11 } } */ + + /* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b,} 2 } } */ + /* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_2.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_2.c +index 413532c07..d4b9776fe 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/slp_2.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_2.c +@@ -29,12 +29,9 @@ vec_slp_##TYPE (TYPE *restrict a, int n) \ + + TEST_ALL (VEC_PERM) + +-/* { dg-final { scan-assembler-times {\tld1rh\tz[0-9]+\.h, } 2 { target aarch64_little_endian } } } */ +-/* { dg-final { scan-assembler-times {\tld1rqb\tz[0-9]+\.b, } 2 { target aarch64_big_endian } } } */ +-/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, } 3 { target aarch64_little_endian } } } */ +-/* { dg-final { scan-assembler-times {\tld1rqh\tz[0-9]+\.h, } 3 { target aarch64_big_endian } } } */ +-/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 3 { target aarch64_little_endian } } } */ +-/* { dg-final { scan-assembler-times {\tld1rqw\tz[0-9]+\.s, } 3 { target aarch64_big_endian } } } */ ++/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, w[0-9]+\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, } 3 } } */ ++/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 3 } } */ + /* { dg-final { scan-assembler-times {\tld1rqd\tz[0-9]+\.d, } 3 } } */ + /* { dg-final { scan-assembler-not {\tzip1\t} } } */ + /* { dg-final { scan-assembler-not {\tzip2\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_3.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_3.c +index 0f9f01a00..82dd43a4d 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/slp_3.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_3.c +@@ -32,18 +32,17 @@ vec_slp_##TYPE (TYPE *restrict a, int n) \ + TEST_ALL (VEC_PERM) + + /* 1 for each 8-bit type. */ +-/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, } 2 { target aarch64_little_endian } } } */ +-/* { dg-final { scan-assembler-times {\tld1rqb\tz[0-9]+\.b, } 2 { target aarch64_big_endian } } } */ +-/* 1 for each 16-bit type and 4 for double. */ +-/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 7 { target aarch64_little_endian } } } */ +-/* { dg-final { scan-assembler-times {\tld1rqh\tz[0-9]+\.h, } 3 { target aarch64_big_endian } } } */ +-/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 4 { target aarch64_big_endian } } } */ ++/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, } 2 } } */ ++/* 1 for each 16-bit type plus 1 for double. */ ++/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 4 } } */ + /* 1 for each 32-bit type. */ + /* { dg-final { scan-assembler-times {\tld1rqw\tz[0-9]+\.s, } 3 } } */ + /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #41\n} 2 } } */ + /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #25\n} 2 } } */ + /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #31\n} 2 } } */ + /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #62\n} 2 } } */ ++/* 3 for double. */ ++/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, x[0-9]+\n} 3 } } */ + /* The 64-bit types need: + + ZIP1 ZIP1 (2 ZIP2s optimized away) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_4.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_4.c +index 8d9d5ab58..49fb828e8 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/slp_4.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_4.c +@@ -35,10 +35,8 @@ vec_slp_##TYPE (TYPE *restrict a, int n) \ + + TEST_ALL (VEC_PERM) + +-/* 1 for each 8-bit type, 4 for each 32-bit type and 8 for double. */ +-/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 22 { target aarch64_little_endian } } } */ +-/* { dg-final { scan-assembler-times {\tld1rqb\tz[0-9]+\.b, } 2 { target aarch64_big_endian } } } */ +-/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 20 { target aarch64_big_endian } } } */ ++/* 1 for each 8-bit type, 4 for each 32-bit type and 4 for double. */ ++/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 18 } } */ + /* 1 for each 16-bit type. */ + /* { dg-final { scan-assembler-times {\tld1rqh\tz[0-9]\.h, } 3 } } */ + /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #99\n} 2 } } */ +@@ -49,6 +47,8 @@ TEST_ALL (VEC_PERM) + /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #37\n} 2 } } */ + /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #24\n} 2 } } */ + /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #81\n} 2 } } */ ++/* 4 for double. */ ++/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, x[0-9]+\n} 4 } } */ + /* The 32-bit types need: + + ZIP1 ZIP1 (2 ZIP2s optimized away) +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/smax_1.c b/gcc/testsuite/gcc.target/aarch64/sve/smax_1.c +new file mode 100644 +index 000000000..050248c81 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/smax_1.c +@@ -0,0 +1,71 @@ ++/* { dg-do assemble { target aarch64_asm_sve_ok } } */ ++/* { dg-options "-O3 --save-temps" } */ ++ ++#include ++ ++#define DO_REGREG_OPS(TYPE) \ ++void varith_##TYPE##_reg (TYPE *dst, TYPE *src, int count) \ ++{ \ ++ for (int i = 0; i < count; ++i) \ ++ dst[i] = dst[i] > src[i] ? dst[i] : src[i]; \ ++} ++ ++#define DO_IMMEDIATE_OPS(VALUE, TYPE, NAME) \ ++void varithimm_##NAME##_##TYPE (TYPE *dst, int count) \ ++{ \ ++ for (int i = 0; i < count; ++i) \ ++ dst[i] = dst[i] > (TYPE) VALUE ? dst[i] : (TYPE) VALUE; \ ++} ++ ++#define DO_ARITH_OPS(TYPE) \ ++ DO_REGREG_OPS (TYPE); \ ++ DO_IMMEDIATE_OPS (0, TYPE, 0); \ ++ DO_IMMEDIATE_OPS (86, TYPE, 86); \ ++ DO_IMMEDIATE_OPS (109, TYPE, 109); \ ++ DO_IMMEDIATE_OPS (141, TYPE, 141); \ ++ DO_IMMEDIATE_OPS (-1, TYPE, minus1); \ ++ DO_IMMEDIATE_OPS (-110, TYPE, minus110); \ ++ DO_IMMEDIATE_OPS (-141, TYPE, minus141); ++ ++DO_ARITH_OPS (int8_t) ++DO_ARITH_OPS (int16_t) ++DO_ARITH_OPS (int32_t) ++DO_ARITH_OPS (int64_t) ++ ++/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, z[0-9]+\.b, #0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, z[0-9]+\.b, #86\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, z[0-9]+\.b, #109\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, z[0-9]+\.b, #115\n} 1 } } */ ++/* { dg-final { scan-assembler-not {\tsmax\tz[0-9]+\.b, z[0-9]+\.b, #141\n} } } */ ++/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, z[0-9]+\.b, #-1\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, z[0-9]+\.b, #-110\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, z[0-9]+\.b, #-115\n} 1 } } */ ++/* { dg-final { scan-assembler-not {\tsmax\tz[0-9]+\.b, z[0-9]+\.b, #-141\n} } } */ ++ ++/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 3 } } */ ++/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.h, z[0-9]+\.h, #0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.h, z[0-9]+\.h, #86\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.h, z[0-9]+\.h, #109\n} 1 } } */ ++/* { dg-final { scan-assembler-not {\tsmax\tz[0-9]+\.h, z[0-9]+\.h, #141\n} } } */ ++/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.h, z[0-9]+\.h, #-1\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.h, z[0-9]+\.h, #-110\n} 1 } } */ ++/* { dg-final { scan-assembler-not {\tsmax\tz[0-9]+\.h, z[0-9]+\.h, #-141\n} } } */ ++ ++/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */ ++/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.s, z[0-9]+\.s, #0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.s, z[0-9]+\.s, #86\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.s, z[0-9]+\.s, #109\n} 1 } } */ ++/* { dg-final { scan-assembler-not {\tsmax\tz[0-9]+\.s, z[0-9]+\.s, #141\n} } } */ ++/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.s, z[0-9]+\.s, #-1\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.s, z[0-9]+\.s, #-110\n} 1 } } */ ++/* { dg-final { scan-assembler-not {\tsmax\tz[0-9]+\.s, z[0-9]+\.s, #-141\n} } } */ ++ ++/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */ ++/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.d, z[0-9]+\.d, #0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.d, z[0-9]+\.d, #86\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.d, z[0-9]+\.d, #109\n} 1 } } */ ++/* { dg-final { scan-assembler-not {\tsmax\tz[0-9]+\.d, z[0-9]+\.d, #141\n} } } */ ++/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.d, z[0-9]+\.d, #-1\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.d, z[0-9]+\.d, #-110\n} 1 } } */ ++/* { dg-final { scan-assembler-not {\tsmax\tz[0-9]+\.d, z[0-9]+\.d, #-141\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/smin_1.c b/gcc/testsuite/gcc.target/aarch64/sve/smin_1.c +new file mode 100644 +index 000000000..d6a9e9467 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/smin_1.c +@@ -0,0 +1,71 @@ ++/* { dg-do assemble { target aarch64_asm_sve_ok } } */ ++/* { dg-options "-O3 --save-temps" } */ ++ ++#include ++ ++#define DO_REGREG_OPS(TYPE) \ ++void varith_##TYPE##_reg (TYPE *dst, TYPE *src, int count) \ ++{ \ ++ for (int i = 0; i < count; ++i) \ ++ dst[i] = dst[i] < src[i] ? dst[i] : src[i]; \ ++} ++ ++#define DO_IMMEDIATE_OPS(VALUE, TYPE, NAME) \ ++void varithimm_##NAME##_##TYPE (TYPE *dst, int count) \ ++{ \ ++ for (int i = 0; i < count; ++i) \ ++ dst[i] = dst[i] < (TYPE) VALUE ? dst[i] : (TYPE) VALUE; \ ++} ++ ++#define DO_ARITH_OPS(TYPE) \ ++ DO_REGREG_OPS (TYPE); \ ++ DO_IMMEDIATE_OPS (0, TYPE, 0); \ ++ DO_IMMEDIATE_OPS (86, TYPE, 86); \ ++ DO_IMMEDIATE_OPS (109, TYPE, 109); \ ++ DO_IMMEDIATE_OPS (141, TYPE, 141); \ ++ DO_IMMEDIATE_OPS (-1, TYPE, minus1); \ ++ DO_IMMEDIATE_OPS (-110, TYPE, minus110); \ ++ DO_IMMEDIATE_OPS (-141, TYPE, minus141); ++ ++DO_ARITH_OPS (int8_t) ++DO_ARITH_OPS (int16_t) ++DO_ARITH_OPS (int32_t) ++DO_ARITH_OPS (int64_t) ++ ++/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, z[0-9]+\.b, #0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, z[0-9]+\.b, #86\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, z[0-9]+\.b, #109\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, z[0-9]+\.b, #115\n} 1 } } */ ++/* { dg-final { scan-assembler-not {\tsmin\tz[0-9]+\.b, z[0-9]+\.b, #141\n} } } */ ++/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, z[0-9]+\.b, #-1\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, z[0-9]+\.b, #-110\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, z[0-9]+\.b, #-115\n} 1 } } */ ++/* { dg-final { scan-assembler-not {\tsmin\tz[0-9]+\.b, z[0-9]+\.b, #-141\n} } } */ ++ ++/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 3 } } */ ++/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.h, z[0-9]+\.h, #0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.h, z[0-9]+\.h, #86\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.h, z[0-9]+\.h, #109\n} 1 } } */ ++/* { dg-final { scan-assembler-not {\tsmin\tz[0-9]+\.h, z[0-9]+\.h, #141\n} } } */ ++/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.h, z[0-9]+\.h, #-1\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.h, z[0-9]+\.h, #-110\n} 1 } } */ ++/* { dg-final { scan-assembler-not {\tsmin\tz[0-9]+\.h, z[0-9]+\.h, #-141\n} } } */ ++ ++/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */ ++/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.s, z[0-9]+\.s, #0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.s, z[0-9]+\.s, #86\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.s, z[0-9]+\.s, #109\n} 1 } } */ ++/* { dg-final { scan-assembler-not {\tsmin\tz[0-9]+\.s, z[0-9]+\.s, #141\n} } } */ ++/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.s, z[0-9]+\.s, #-1\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.s, z[0-9]+\.s, #-110\n} 1 } } */ ++/* { dg-final { scan-assembler-not {\tsmin\tz[0-9]+\.s, z[0-9]+\.s, #-141\n} } } */ ++ ++/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */ ++/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.d, z[0-9]+\.d, #0\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.d, z[0-9]+\.d, #86\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.d, z[0-9]+\.d, #109\n} 1 } } */ ++/* { dg-final { scan-assembler-not {\tsmin\tz[0-9]+\.d, z[0-9]+\.d, #141\n} } } */ ++/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.d, z[0-9]+\.d, #-1\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.d, z[0-9]+\.d, #-110\n} 1 } } */ ++/* { dg-final { scan-assembler-not {\tsmin\tz[0-9]+\.d, z[0-9]+\.d, #-141\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/spill_2.c b/gcc/testsuite/gcc.target/aarch64/sve/spill_2.c +index 28fcc4429..fcd481611 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/spill_2.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/spill_2.c +@@ -9,29 +9,30 @@ void consumer (void *); + void \ + multi_loop_##TYPE (TYPE *x, TYPE val) \ + { \ +- for (int i = 0; i < 7; ++i) \ ++ for (int i = 0; i < 9; ++i) \ + x[i] += val; \ + consumer (x); \ +- for (int i = 0; i < 7; ++i) \ ++ for (int i = 0; i < 9; ++i) \ + x[i] += val; \ + consumer (x); \ +- for (int i = 0; i < 7; ++i) \ ++ for (int i = 0; i < 9; ++i) \ + x[i] += val; \ + consumer (x); \ + } + + /* One iteration is enough. */ + TEST_LOOP (uint8_t); ++/* Two iterations are enough. We specialize the second two loops based ++ on whether the first executes once or twice. */ + TEST_LOOP (uint16_t); +-/* Two iterations are enough. Complete unrolling makes sense +- even at -O2. */ ++/* Three iterations are needed; ought to stay a loop. */ + TEST_LOOP (uint32_t); +-/* Four iterations are needed; ought to stay a loop. */ ++/* Five iterations are needed; ought to stay a loop. */ + TEST_LOOP (uint64_t); + + /* { dg-final { scan-assembler-times {\twhilelo\tp[0-9]\.b} 3 } } */ +-/* { dg-final { scan-assembler-times {\twhilelo\tp[0-9]\.h} 3 } } */ +-/* { dg-final { scan-assembler {\twhilelo\tp[0-9]\.s} } } */ ++/* { dg-final { scan-assembler-times {\twhilelo\tp[0-9]\.h} 8 } } */ ++/* { dg-final { scan-assembler-times {\twhilelo\tp[0-9]\.s} 6 } } */ + /* { dg-final { scan-assembler-times {\twhilelo\tp[0-9]\.d} 6 } } */ + /* { dg-final { scan-assembler-not {\tldr\tz[0-9]} } } */ + /* { dg-final { scan-assembler-not {\tstr\tz[0-9]} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/spill_4.c b/gcc/testsuite/gcc.target/aarch64/sve/spill_4.c +index 29e1a49dc..81b3f6452 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/spill_4.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/spill_4.c +@@ -24,10 +24,11 @@ TEST_LOOP (uint16_t, 0x1234); + TEST_LOOP (uint32_t, 0x12345); + TEST_LOOP (uint64_t, 0x123456); + +-/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.h,} 3 } } */ +-/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s,} 3 } } */ +-/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d,} 3 } } */ +-/* { dg-final { scan-assembler-times {\tld1rh\tz[0-9]+\.h,} 3 } } */ ++/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.b,} 6 } } */ ++/* { dg-final { scan-assembler-not {\tptrue\tp[0-9]+\.h,} } } */ ++/* { dg-final { scan-assembler-not {\tptrue\tp[0-9]+\.s,} } } */ ++/* { dg-final { scan-assembler-not {\tptrue\tp[0-9]+\.d,} } } */ ++/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, w[0-9]+\n} 3 } } */ + /* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s,} 3 } } */ + /* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d,} 3 } } */ + /* { dg-final { scan-assembler-not {\tldr\tz[0-9]} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_1.c b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_1.c +index 6e3c8898a..918a58138 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_1.c +@@ -83,9 +83,9 @@ NAME(g4) (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c, + } + } + +-/* { dg-final { scan-assembler {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} } } */ ++/* { dg-final { scan-assembler {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} } } */ + /* { dg-final { scan-assembler {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} } } */ +-/* { dg-final { scan-assembler {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} } } */ +-/* { dg-final { scan-assembler {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} } } */ ++/* { dg-final { scan-assembler {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} } } */ ++/* { dg-final { scan-assembler {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} } } */ + /* { dg-final { scan-assembler {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} } } */ +-/* { dg-final { scan-assembler {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} } } */ ++/* { dg-final { scan-assembler {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_14.c b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_14.c +index 45644b67b..a16a79e51 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_14.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_14.c +@@ -43,12 +43,12 @@ + #undef NAME + #undef TYPE + +-/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} 1 } } */ + /* { dg-final { scan-assembler-times {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ +-/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ +-/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} 1 } } */ + /* { dg-final { scan-assembler-times {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */ +-/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} 1 } } */ + + /* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ + /* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_15.c b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_15.c +index 814dbb3ae..bc00267c8 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_15.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_15.c +@@ -3,12 +3,12 @@ + + #include "struct_vect_14.c" + +-/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} 1 } } */ + /* { dg-final { scan-assembler-times {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ +-/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ +-/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} 1 } } */ + /* { dg-final { scan-assembler-times {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */ +-/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} 1 } } */ + + /* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ + /* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_16.c b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_16.c +index 6ecf89b54..9e2a549f5 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_16.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_16.c +@@ -3,12 +3,12 @@ + + #include "struct_vect_14.c" + +-/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} 1 } } */ + /* { dg-final { scan-assembler-times {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ +-/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ +-/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} 1 } } */ + /* { dg-final { scan-assembler-times {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */ +-/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} 1 } } */ + + /* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ + /* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_17.c b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_17.c +index 571c6d0d3..e791e2e12 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_17.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_17.c +@@ -3,12 +3,12 @@ + + #include "struct_vect_14.c" + +-/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} 1 } } */ + /* { dg-final { scan-assembler-times {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ +-/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ +-/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} 1 } } */ + /* { dg-final { scan-assembler-times {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */ +-/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} 1 } } */ + + /* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ + /* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_18.c b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_18.c +index dc912e63c..3bc53b69d 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_18.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_18.c +@@ -46,4 +46,4 @@ TEST (test) + /* { dg-final { scan-assembler-times {\tstr\td} 1 } } */ + + /* The only branches should be in the vectorized loop. */ +-/* { dg-final { scan-assembler-times {\tb[a-z]+\t} 4 } } */ ++/* { dg-final { scan-assembler-times {\tb[.a-z]+\t} 4 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_19.c b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_19.c +index 6568dc71c..833bf0669 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_19.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_19.c +@@ -46,4 +46,4 @@ TEST (test) + /* Each function should have three branches: one directly to the exit + (n <= 0), one to the single scalar epilogue iteration (n == 1), + and one branch-back for the vectorized loop. */ +-/* { dg-final { scan-assembler-times {\tb[a-z]+\t} 12 } } */ ++/* { dg-final { scan-assembler-times {\tb[.a-z]+\t} 12 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_20.c b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_20.c +index 6c3520c2f..858ca74f8 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_20.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_20.c +@@ -46,4 +46,4 @@ TEST (test) + /* { dg-final { scan-assembler-times {\tstr\td} 1 } } */ + + /* The only branches should be in the vectorized loop. */ +-/* { dg-final { scan-assembler-times {\tb[a-z]+\t} 4 } } */ ++/* { dg-final { scan-assembler-times {\tb[.a-z]+\t} 4 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_21.c b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_21.c +index 4b2a5e463..95691fe9e 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_21.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_21.c +@@ -46,4 +46,4 @@ TEST (test) + /* Each function should have three branches: one directly to the exit + (n <= 0), one to the single scalar epilogue iteration (n == 1), + and one branch-back for the vectorized loop. */ +-/* { dg-final { scan-assembler-times {\tb[a-z]+\t} 12 } } */ ++/* { dg-final { scan-assembler-times {\tb[.a-z]+\t} 12 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_22.c b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_22.c +index b61536053..8eb072505 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_22.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_22.c +@@ -46,4 +46,4 @@ TEST (test) + /* { dg-final { scan-assembler-times {\tstr\td} 1 } } */ + + /* The only branches should be in the vectorized loop. */ +-/* { dg-final { scan-assembler-times {\tb[a-z]+\t} 4 } } */ ++/* { dg-final { scan-assembler-times {\tb[.a-z]+\t} 4 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_23.c b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_23.c +index b529e0386..705b2350a 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_23.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_23.c +@@ -46,4 +46,4 @@ TEST (test) + /* Each function should have three branches: one directly to the exit + (n <= 0), one to the single scalar epilogue iteration (n == 1), + and one branch-back for the vectorized loop. */ +-/* { dg-final { scan-assembler-times {\tb[a-z]+\t} 12 } } */ ++/* { dg-final { scan-assembler-times {\tb[.a-z]+\t} 12 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_7.c b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_7.c +index b74190149..3d3070e77 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_7.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_7.c +@@ -78,9 +78,9 @@ g4 (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c, + } + } + +-/* { dg-final { scan-assembler {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} } } */ ++/* { dg-final { scan-assembler {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} } } */ + /* { dg-final { scan-assembler {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} } } */ +-/* { dg-final { scan-assembler {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} } } */ +-/* { dg-final { scan-assembler {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} } } */ ++/* { dg-final { scan-assembler {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} } } */ ++/* { dg-final { scan-assembler {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} } } */ + /* { dg-final { scan-assembler {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} } } */ +-/* { dg-final { scan-assembler {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} } } */ ++/* { dg-final { scan-assembler {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/umax_1.c b/gcc/testsuite/gcc.target/aarch64/sve/umax_1.c +new file mode 100644 +index 000000000..fffedb9c3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/umax_1.c +@@ -0,0 +1,65 @@ ++/* { dg-do assemble { target aarch64_asm_sve_ok } } */ ++/* { dg-options "-O3 --save-temps" } */ ++ ++#include ++ ++#define DO_REGREG_OPS(TYPE) \ ++void varith_##TYPE##_reg (TYPE *dst, TYPE *src, int count) \ ++{ \ ++ for (int i = 0; i < count; ++i) \ ++ dst[i] = dst[i] > src[i] ? dst[i] : src[i]; \ ++} ++ ++#define DO_IMMEDIATE_OPS(VALUE, TYPE) \ ++void varithimm_##VALUE##_##TYPE (TYPE *dst, int count) \ ++{ \ ++ for (int i = 0; i < count; ++i) \ ++ dst[i] = dst[i] > (TYPE) VALUE ? dst[i] : (TYPE) VALUE; \ ++} ++ ++#define DO_ARITH_OPS(TYPE) \ ++ DO_REGREG_OPS (TYPE); \ ++ DO_IMMEDIATE_OPS (2, TYPE); \ ++ DO_IMMEDIATE_OPS (86, TYPE); \ ++ DO_IMMEDIATE_OPS (109, TYPE); \ ++ DO_IMMEDIATE_OPS (141, TYPE); \ ++ DO_IMMEDIATE_OPS (229, TYPE); \ ++ DO_IMMEDIATE_OPS (255, TYPE); \ ++ DO_IMMEDIATE_OPS (256, TYPE); ++ ++DO_ARITH_OPS (uint8_t) ++DO_ARITH_OPS (uint16_t) ++DO_ARITH_OPS (uint32_t) ++DO_ARITH_OPS (uint64_t) ++ ++/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.b, z[0-9]+\.b, #86\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.b, z[0-9]+\.b, #109\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.b, z[0-9]+\.b, #141\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.b, z[0-9]+\.b, #229\n} 1 } } */ ++/* { dg-final { scan-assembler-not {\tumax\tz[0-9]+\.b, z[0-9]+\.b, #255\n} } } */ ++/* { dg-final { scan-assembler-not {\tumax\tz[0-9]+\.b, z[0-9]+\.b, #256\n} } } */ ++ ++/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, z[0-9]+\.h, #86\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, z[0-9]+\.h, #109\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, z[0-9]+\.h, #141\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, z[0-9]+\.h, #229\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, z[0-9]+\.h, #255\n} 1 } } */ ++/* { dg-final { scan-assembler-not {\tumax\tz[0-9]+\.h, z[0-9]+\.h, #256\n} } } */ ++ ++/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, z[0-9]+\.s, #86\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, z[0-9]+\.s, #109\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, z[0-9]+\.s, #141\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, z[0-9]+\.s, #229\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, z[0-9]+\.s, #255\n} 1 } } */ ++/* { dg-final { scan-assembler-not {\tumax\tz[0-9]+\.s, z[0-9]+\.s, #256\n} } } */ ++ ++/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.d, z[0-9]+\.d, #86\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.d, z[0-9]+\.d, #109\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.d, z[0-9]+\.d, #141\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.d, z[0-9]+\.d, #229\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.d, z[0-9]+\.d, #255\n} 1 } } */ ++/* { dg-final { scan-assembler-not {\tumax\tz[0-9]+\.d, z[0-9]+\.d, #256\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/umin_1.c b/gcc/testsuite/gcc.target/aarch64/sve/umin_1.c +new file mode 100644 +index 000000000..f7cdba3b7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/umin_1.c +@@ -0,0 +1,65 @@ ++/* { dg-do assemble { target aarch64_asm_sve_ok } } */ ++/* { dg-options "-O3 --save-temps" } */ ++ ++#include ++ ++#define DO_REGREG_OPS(TYPE) \ ++void varith_##TYPE##_reg (TYPE *dst, TYPE *src, int count) \ ++{ \ ++ for (int i = 0; i < count; ++i) \ ++ dst[i] = dst[i] < src[i] ? dst[i] : src[i]; \ ++} ++ ++#define DO_IMMEDIATE_OPS(VALUE, TYPE) \ ++void varithimm_##VALUE##_##TYPE (TYPE *dst, int count) \ ++{ \ ++ for (int i = 0; i < count; ++i) \ ++ dst[i] = dst[i] < (TYPE) VALUE ? dst[i] : (TYPE) VALUE; \ ++} ++ ++#define DO_ARITH_OPS(TYPE) \ ++ DO_REGREG_OPS (TYPE); \ ++ DO_IMMEDIATE_OPS (2, TYPE); \ ++ DO_IMMEDIATE_OPS (86, TYPE); \ ++ DO_IMMEDIATE_OPS (109, TYPE); \ ++ DO_IMMEDIATE_OPS (141, TYPE); \ ++ DO_IMMEDIATE_OPS (229, TYPE); \ ++ DO_IMMEDIATE_OPS (255, TYPE); \ ++ DO_IMMEDIATE_OPS (256, TYPE); ++ ++DO_ARITH_OPS (uint8_t) ++DO_ARITH_OPS (uint16_t) ++DO_ARITH_OPS (uint32_t) ++DO_ARITH_OPS (uint64_t) ++ ++/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.b, z[0-9]+\.b, #86\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.b, z[0-9]+\.b, #109\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.b, z[0-9]+\.b, #141\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.b, z[0-9]+\.b, #229\n} 1 } } */ ++/* { dg-final { scan-assembler-not {\tumin\tz[0-9]+\.b, z[0-9]+\.b, #255\n} } } */ ++/* { dg-final { scan-assembler-not {\tumin\tz[0-9]+\.b, z[0-9]+\.b, #256\n} } } */ ++ ++/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.h, z[0-9]+\.h, #86\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.h, z[0-9]+\.h, #109\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.h, z[0-9]+\.h, #141\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.h, z[0-9]+\.h, #229\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.h, z[0-9]+\.h, #255\n} 1 } } */ ++/* { dg-final { scan-assembler-not {\tumin\tz[0-9]+\.h, z[0-9]+\.h, #256\n} } } */ ++ ++/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.s, z[0-9]+\.s, #86\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.s, z[0-9]+\.s, #109\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.s, z[0-9]+\.s, #141\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.s, z[0-9]+\.s, #229\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.s, z[0-9]+\.s, #255\n} 1 } } */ ++/* { dg-final { scan-assembler-not {\tumin\tz[0-9]+\.s, z[0-9]+\.s, #256\n} } } */ ++ ++/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.d, z[0-9]+\.d, #86\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.d, z[0-9]+\.d, #109\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.d, z[0-9]+\.d, #141\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.d, z[0-9]+\.d, #229\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.d, z[0-9]+\.d, #255\n} 1 } } */ ++/* { dg-final { scan-assembler-not {\tumin\tz[0-9]+\.d, z[0-9]+\.d, #256\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unroll-1.c b/gcc/testsuite/gcc.target/aarch64/sve/unroll-1.c +index d4353009e..e33777fc3 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/unroll-1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/unroll-1.c +@@ -10,4 +10,4 @@ fully_peel_me (double *x) + x[i] = x[i] * 2; + } + +-/* { dg-final { scan-assembler-times {b..\t\.L.\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tb[.a-z]+\t} 1 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_17.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_17.c +new file mode 100644 +index 000000000..cabcfa73e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_17.c +@@ -0,0 +1,94 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include ++ ++#define eq(A, B) ((A) == (B)) ++#define ne(A, B) ((A) != (B)) ++#define olt(A, B) ((A) < (B)) ++#define ole(A, B) ((A) <= (B)) ++#define oge(A, B) ((A) >= (B)) ++#define ogt(A, B) ((A) > (B)) ++#define ordered(A, B) (!__builtin_isunordered (A, B)) ++#define unordered(A, B) (__builtin_isunordered (A, B)) ++#define ueq(A, B) (!__builtin_islessgreater (A, B)) ++#define ult(A, B) (__builtin_isless (A, B)) ++#define ule(A, B) (__builtin_islessequal (A, B)) ++#define uge(A, B) (__builtin_isgreaterequal (A, B)) ++#define ugt(A, B) (__builtin_isgreater (A, B)) ++#define nueq(A, B) (__builtin_islessgreater (A, B)) ++#define nult(A, B) (!__builtin_isless (A, B)) ++#define nule(A, B) (!__builtin_islessequal (A, B)) ++#define nuge(A, B) (!__builtin_isgreaterequal (A, B)) ++#define nugt(A, B) (!__builtin_isgreater (A, B)) ++ ++#define DEF_LOOP(CMP, EXPECT_INVALID) \ ++ void __attribute__ ((noinline, noclone)) \ ++ test_##CMP##_var (__fp16 *restrict dest, __fp16 *restrict src, \ ++ __fp16 fallback, __fp16 *restrict a, \ ++ __fp16 *restrict b, int count) \ ++ { \ ++ for (int i = 0; i < count; ++i) \ ++ dest[i] = CMP (a[i], b[i]) ? src[i] : fallback; \ ++ } \ ++ \ ++ void __attribute__ ((noinline, noclone)) \ ++ test_##CMP##_zero (__fp16 *restrict dest, __fp16 *restrict src, \ ++ __fp16 fallback, __fp16 *restrict a, \ ++ int count) \ ++ { \ ++ for (int i = 0; i < count; ++i) \ ++ dest[i] = CMP (a[i], (__fp16) 0) ? src[i] : fallback; \ ++ } \ ++ \ ++ void __attribute__ ((noinline, noclone)) \ ++ test_##CMP##_sel (__fp16 *restrict dest, __fp16 if_true, \ ++ __fp16 if_false, __fp16 *restrict a, \ ++ __fp16 b, int count) \ ++ { \ ++ for (int i = 0; i < count; ++i) \ ++ dest[i] = CMP (a[i], b) ? if_true : if_false; \ ++ } ++ ++#define TEST_ALL(T) \ ++ T (eq, 0) \ ++ T (ne, 0) \ ++ T (olt, 1) \ ++ T (ole, 1) \ ++ T (oge, 1) \ ++ T (ogt, 1) \ ++ T (ordered, 0) \ ++ T (unordered, 0) \ ++ T (ueq, 0) \ ++ T (ult, 0) \ ++ T (ule, 0) \ ++ T (uge, 0) \ ++ T (ugt, 0) \ ++ T (nueq, 0) \ ++ T (nult, 0) \ ++ T (nule, 0) \ ++ T (nuge, 0) \ ++ T (nugt, 0) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler {\tfcmeq\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, #0\.0\n} { xfail *-*-* } } } */ ++/* { dg-final { scan-assembler {\tfcmeq\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} } } */ ++ ++/* { dg-final { scan-assembler {\tfcmne\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, #0\.0\n} } } */ ++/* { dg-final { scan-assembler {\tfcmne\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} } } */ ++ ++/* { dg-final { scan-assembler {\tfcmlt\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, #0\.0\n} } } */ ++/* { dg-final { scan-assembler {\tfcmlt\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} } } */ ++ ++/* { dg-final { scan-assembler {\tfcmle\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, #0\.0\n} } } */ ++/* { dg-final { scan-assembler {\tfcmle\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} } } */ ++ ++/* { dg-final { scan-assembler {\tfcmgt\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, #0\.0\n} } } */ ++/* { dg-final { scan-assembler {\tfcmgt\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} } } */ ++ ++/* { dg-final { scan-assembler {\tfcmge\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, #0\.0\n} } } */ ++/* { dg-final { scan-assembler {\tfcmge\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} } } */ ++ ++/* { dg-final { scan-assembler-not {\tfcmuo\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, #0\.0\n} } } */ ++/* { dg-final { scan-assembler {\tfcmuo\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_17_run.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_17_run.c +new file mode 100644 +index 000000000..4a228c8c2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_17_run.c +@@ -0,0 +1,54 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++/* { dg-require-effective-target fenv_exceptions } */ ++ ++#include ++ ++#include "vcond_17.c" ++ ++#define N 401 ++ ++#define TEST_LOOP(CMP, EXPECT_INVALID) \ ++ { \ ++ __fp16 dest1[N], dest2[N], dest3[N], src[N]; \ ++ __fp16 a[N], b[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ src[i] = i * i; \ ++ if (i % 5 == 0) \ ++ a[i] = 0; \ ++ else if (i % 3) \ ++ a[i] = i * 0.1; \ ++ else \ ++ a[i] = i; \ ++ if (i % 7 == 0) \ ++ b[i] = __builtin_nan (""); \ ++ else if (i % 6) \ ++ b[i] = i * 0.1; \ ++ else \ ++ b[i] = i; \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ feclearexcept (FE_ALL_EXCEPT); \ ++ test_##CMP##_var (dest1, src, 11, a, b, N); \ ++ test_##CMP##_zero (dest2, src, 22, a, N); \ ++ test_##CMP##_sel (dest3, 33, 44, a, 9, N); \ ++ if (!fetestexcept (FE_INVALID) != !(EXPECT_INVALID)) \ ++ __builtin_abort (); \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ if (dest1[i] != (CMP (a[i], b[i]) ? src[i] : 11)) \ ++ __builtin_abort (); \ ++ if (dest2[i] != (CMP (a[i], 0) ? src[i] : 22)) \ ++ __builtin_abort (); \ ++ if (dest3[i] != (CMP (a[i], 9) ? 33 : 44)) \ ++ __builtin_abort (); \ ++ } \ ++ } ++ ++int __attribute__ ((optimize (1))) ++main (void) ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_18.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_18.c +new file mode 100644 +index 000000000..a2590b9ee +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_18.c +@@ -0,0 +1,44 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#define DEF_LOOP(TYPE, NAME, CONST) \ ++ void \ ++ test_##TYPE##_##NAME (TYPE *restrict x, \ ++ TYPE *restrict pred, int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ x[i] = pred[i] > 0 ? CONST : 0; \ ++ } ++ ++#define TEST_TYPE(T, TYPE) \ ++ T (TYPE, 2, 2.0) \ ++ T (TYPE, 1p25, 1.25) \ ++ T (TYPE, 32p25, 32.25) \ ++ T (TYPE, m4, -4.0) \ ++ T (TYPE, m2p5, -2.5) \ ++ T (TYPE, m64p5, -64.5) ++ ++#define TEST_ALL(T) \ ++ TEST_TYPE (T, _Float16) \ ++ TEST_TYPE (T, float) \ ++ TEST_TYPE (T, double) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]/z, #16384\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]/z, #15616\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]/z, #-15360\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]/z, #-16128\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-7], z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ ++ ++/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+\.s), (p[0-7])/z, \1\n\tfmov\t\1, \2/m, #2\.0(?:e[+]0)?\n} } } */ ++/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+\.s), (p[0-7])/z, \1\n\tfmov\t\1, \2/m, #1\.25(?:e[+]0)?\n} } } */ ++/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+\.s), (p[0-7])/z, \1\n\tfmov\t\1, \2/m, #-4\.0(?:e[+]0)?\n} } } */ ++/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+\.s), (p[0-7])/z, \1\n\tfmov\t\1, \2/m, #-2\.5(?:e[+]0)?\n} } } */ ++/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.s, p[0-7], z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ ++ ++/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+\.d), (p[0-7])/z, \1\n\tfmov\t\1, \2/m, #2\.0(?:e[+]0)?\n} } } */ ++/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+\.d), (p[0-7])/z, \1\n\tfmov\t\1, \2/m, #1\.25(?:e[+]0)?\n} } } */ ++/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+\.d), (p[0-7])/z, \1\n\tfmov\t\1, \2/m, #-4\.0(?:e[+]0)?\n} } } */ ++/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+\.d), (p[0-7])/z, \1\n\tfmov\t\1, \2/m, #-2\.5(?:e[+]0)?\n} } } */ ++/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.d, p[0-7], z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_18_run.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_18_run.c +new file mode 100644 +index 000000000..279b0a3ba +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_18_run.c +@@ -0,0 +1,30 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "vcond_18.c" ++ ++#define N 97 ++ ++#define TEST_LOOP(TYPE, NAME, CONST) \ ++ { \ ++ TYPE x[N], pred[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ pred[i] = i % 5 <= i % 6; \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##TYPE##_##NAME (x, pred, N); \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ if (x[i] != (TYPE) (pred[i] > 0 ? CONST : 0)) \ ++ __builtin_abort (); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ } ++ ++int __attribute__ ((optimize (1))) ++main (int argc, char **argv) ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_19.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_19.c +new file mode 100644 +index 000000000..2347b7f28 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_19.c +@@ -0,0 +1,46 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#define DEF_LOOP(TYPE, NAME, CONST) \ ++ void \ ++ test_##TYPE##_##NAME (TYPE *restrict x, \ ++ TYPE *restrict pred, int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ x[i] = pred[i] > 0 ? CONST : pred[i]; \ ++ } ++ ++#define TEST_TYPE(T, TYPE) \ ++ T (TYPE, 2, 2.0) \ ++ T (TYPE, 1p25, 1.25) \ ++ T (TYPE, 32p25, 32.25) \ ++ T (TYPE, m4, -4.0) \ ++ T (TYPE, m2p5, -2.5) \ ++ T (TYPE, m64p5, -64.5) ++ ++#define TEST_ALL(T) \ ++ TEST_TYPE (T, _Float16) \ ++ TEST_TYPE (T, float) \ ++ TEST_TYPE (T, double) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]/m, #16384\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]/m, #15616\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]/m, #-15360\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]/m, #-16128\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-7], z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ ++ ++/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.s), p[0-7]/m, #2\.0(?:e[+]0)?\n} } } */ ++/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.s), p[0-7]/m, #1\.25(?:e[+]0)?\n} } } */ ++/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.s), p[0-7]/m, #-4\.0(?:e[+]0)?\n} } } */ ++/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.s), p[0-7]/m, #-2\.5(?:e[+]0)?\n} } } */ ++/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.s, p[0-7], z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ ++ ++/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.d), p[0-7]/m, #2\.0(?:e[+]0)?\n} } } */ ++/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.d), p[0-7]/m, #1\.25(?:e[+]0)?\n} } } */ ++/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.d), p[0-7]/m, #-4\.0(?:e[+]0)?\n} } } */ ++/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.d), p[0-7]/m, #-2\.5(?:e[+]0)?\n} } } */ ++/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.d, p[0-7], z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ ++ ++/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_19_run.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_19_run.c +new file mode 100644 +index 000000000..d93d8aa45 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_19_run.c +@@ -0,0 +1,30 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "vcond_19.c" ++ ++#define N 97 ++ ++#define TEST_LOOP(TYPE, NAME, CONST) \ ++ { \ ++ TYPE x[N], pred[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ pred[i] = i % 5 <= i % 6 ? i : 0; \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##TYPE##_##NAME (x, pred, N); \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ if (x[i] != (TYPE) (pred[i] > 0 ? CONST : pred[i])) \ ++ __builtin_abort (); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ } ++ ++int __attribute__ ((optimize (1))) ++main (int argc, char **argv) ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_20.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_20.c +new file mode 100644 +index 000000000..bf2af1c62 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_20.c +@@ -0,0 +1,46 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#define DEF_LOOP(TYPE, NAME, CONST) \ ++ void \ ++ test_##TYPE##_##NAME (TYPE *restrict x, \ ++ TYPE *restrict pred, int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ x[i] = pred[i] > 0 ? CONST : 12.0; \ ++ } ++ ++#define TEST_TYPE(T, TYPE) \ ++ T (TYPE, 2, 2.0) \ ++ T (TYPE, 1p25, 1.25) \ ++ T (TYPE, 32p25, 32.25) \ ++ T (TYPE, m4, -4.0) \ ++ T (TYPE, m2p5, -2.5) \ ++ T (TYPE, m64p5, -64.5) ++ ++#define TEST_ALL(T) \ ++ TEST_TYPE (T, _Float16) \ ++ TEST_TYPE (T, float) \ ++ TEST_TYPE (T, double) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]/m, #16384\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]/m, #15616\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]/m, #-15360\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]/m, #-16128\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-7], z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ ++ ++/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.s), p[0-7]/m, #2\.0(?:e[+]0)?\n} } } */ ++/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.s), p[0-7]/m, #1\.25(?:e[+]0)?\n} } } */ ++/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.s), p[0-7]/m, #-4\.0(?:e[+]0)?\n} } } */ ++/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.s), p[0-7]/m, #-2\.5(?:e[+]0)?\n} } } */ ++/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.s, p[0-7], z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ ++ ++/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.d), p[0-7]/m, #2\.0(?:e[+]0)?\n} } } */ ++/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.d), p[0-7]/m, #1\.25(?:e[+]0)?\n} } } */ ++/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.d), p[0-7]/m, #-4\.0(?:e[+]0)?\n} } } */ ++/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.d), p[0-7]/m, #-2\.5(?:e[+]0)?\n} } } */ ++/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.d, p[0-7], z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ ++ ++/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+, z[0-9]+\n} 12 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_20_run.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_20_run.c +new file mode 100644 +index 000000000..33c81deaa +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_20_run.c +@@ -0,0 +1,30 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "vcond_20.c" ++ ++#define N 97 ++ ++#define TEST_LOOP(TYPE, NAME, CONST) \ ++ { \ ++ TYPE x[N], pred[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ pred[i] = i % 5 <= i % 6; \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##TYPE##_##NAME (x, pred, N); \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ if (x[i] != (TYPE) (pred[i] > 0 ? CONST : 12.0)) \ ++ __builtin_abort (); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ } ++ ++int __attribute__ ((optimize (1))) ++main (int argc, char **argv) ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_21.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_21.c +new file mode 100644 +index 000000000..d5df2e199 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_21.c +@@ -0,0 +1,34 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#define DEF_LOOP(TYPE, ABS, NAME, OP) \ ++ void \ ++ test_##TYPE##_##NAME (TYPE *restrict r, \ ++ TYPE *restrict a, \ ++ TYPE *restrict b, int n) \ ++ { \ ++ for (int i = 0; i < n; ++i) \ ++ r[i] = ABS (a[i]) OP ABS (b[i]) ? 1.0 : 0.0; \ ++ } ++ ++#define TEST_TYPE(T, TYPE, ABS) \ ++ T (TYPE, ABS, lt, <) \ ++ T (TYPE, ABS, le, <=) \ ++ T (TYPE, ABS, ge, >=) \ ++ T (TYPE, ABS, gt, >) ++ ++#define TEST_ALL(T) \ ++ TEST_TYPE (T, _Float16, __builtin_fabsf16) \ ++ TEST_TYPE (T, float, __builtin_fabsf) \ ++ TEST_TYPE (T, double, __builtin_fabs) ++ ++TEST_ALL (DEF_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tfac[lg]t\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tfac[lg]e\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfac[lg]t\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tfac[lg]e\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ ++ ++/* { dg-final { scan-assembler-times {\tfac[lg]t\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ ++/* { dg-final { scan-assembler-times {\tfac[lg]e\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_21_run.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_21_run.c +new file mode 100644 +index 000000000..15c551324 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_21_run.c +@@ -0,0 +1,31 @@ ++/* { dg-do run { target aarch64_sve_hw } } */ ++/* { dg-options "-O2 -ftree-vectorize" } */ ++ ++#include "vcond_21.c" ++ ++#define N 97 ++ ++#define TEST_LOOP(TYPE, ABS, NAME, OP) \ ++ { \ ++ TYPE r[N], a[N], b[N]; \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ a[i] = i % 5 * (i & 1 ? -1 : 1); \ ++ b[i] = i % 9 * (i & 2 ? -1 : 1); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ test_##TYPE##_##NAME (r, a, b, N); \ ++ for (int i = 0; i < N; ++i) \ ++ { \ ++ if (r[i] != (ABS (a[i]) OP ABS (b[i]) ? 1.0 : 0.0)) \ ++ __builtin_abort (); \ ++ asm volatile ("" ::: "memory"); \ ++ } \ ++ } ++ ++int __attribute__ ((optimize (1))) ++main (int argc, char **argv) ++{ ++ TEST_ALL (TEST_LOOP) ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/while_1.c b/gcc/testsuite/gcc.target/aarch64/sve/while_1.c +index a93a04baa..2655c4242 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sve/while_1.c ++++ b/gcc/testsuite/gcc.target/aarch64/sve/while_1.c +@@ -42,3 +42,4 @@ TEST_ALL (ADD_LOOP) + /* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x0, x[0-9]+, lsl 2\]\n} 3 } } */ + /* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x0, x[0-9]+, lsl 3\]\n} 3 } } */ + /* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x0, x[0-9]+, lsl 3\]\n} 3 } } */ ++/* { dg-final { scan-assembler-times {\tb\.any\t} 10 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/while_10.c b/gcc/testsuite/gcc.target/aarch64/sve/while_10.c +new file mode 100644 +index 000000000..eaed326f9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/while_10.c +@@ -0,0 +1,25 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=512" } */ ++ ++#include ++ ++#define ADD_LOOP(TYPE, COUNT) \ ++ TYPE __attribute__ ((noinline, noclone)) \ ++ vec_while_##TYPE (TYPE *restrict a) \ ++ { \ ++ for (int i = 0; i < COUNT; ++i) \ ++ a[i] += 1; \ ++ } ++ ++#define TEST_ALL(T) \ ++ T (int8_t, 63) \ ++ T (int16_t, 30) \ ++ T (int32_t, 15) \ ++ T (int64_t, 6) ++ ++TEST_ALL (ADD_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, mul3\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.h, mul3\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s, mul3\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d, vl6\n} 1 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/while_6.c b/gcc/testsuite/gcc.target/aarch64/sve/while_6.c +new file mode 100644 +index 000000000..b4cc596ef +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/while_6.c +@@ -0,0 +1,25 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=scalable" } */ ++ ++#include ++ ++#define ADD_LOOP(TYPE) \ ++ TYPE __attribute__ ((noinline, noclone)) \ ++ vec_while_##TYPE (TYPE *restrict a) \ ++ { \ ++ for (int i = 0; i < 7; ++i) \ ++ a[i] += 1; \ ++ } ++ ++#define TEST_ALL(T) \ ++ T (int8_t) \ ++ T (int16_t) \ ++ T (int32_t) \ ++ T (int64_t) ++ ++TEST_ALL (ADD_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl7\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.h, vl7\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s,} 2 } } */ ++/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d,} 2 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/while_7.c b/gcc/testsuite/gcc.target/aarch64/sve/while_7.c +new file mode 100644 +index 000000000..d5ffb66a1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/while_7.c +@@ -0,0 +1,25 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=scalable" } */ ++ ++#include ++ ++#define ADD_LOOP(TYPE) \ ++ TYPE __attribute__ ((noinline, noclone)) \ ++ vec_while_##TYPE (TYPE *restrict a) \ ++ { \ ++ for (int i = 0; i < 8; ++i) \ ++ a[i] += 1; \ ++ } ++ ++#define TEST_ALL(T) \ ++ T (int8_t) \ ++ T (int16_t) \ ++ T (int32_t) \ ++ T (int64_t) ++ ++TEST_ALL (ADD_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl8\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.h, vl8\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s,} 2 } } */ ++/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d,} 2 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/while_8.c b/gcc/testsuite/gcc.target/aarch64/sve/while_8.c +new file mode 100644 +index 000000000..1c11aa849 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/while_8.c +@@ -0,0 +1,25 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=scalable" } */ ++ ++#include ++ ++#define ADD_LOOP(TYPE) \ ++ TYPE __attribute__ ((noinline, noclone)) \ ++ vec_while_##TYPE (TYPE *restrict a) \ ++ { \ ++ for (int i = 0; i < 9; ++i) \ ++ a[i] += 1; \ ++ } ++ ++#define TEST_ALL(T) \ ++ T (int8_t) \ ++ T (int16_t) \ ++ T (int32_t) \ ++ T (int64_t) ++ ++TEST_ALL (ADD_LOOP) ++ ++/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b,} 1 } } */ ++/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h,} 2 } } */ ++/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s,} 2 } } */ ++/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d,} 2 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/while_9.c b/gcc/testsuite/gcc.target/aarch64/sve/while_9.c +new file mode 100644 +index 000000000..9a8e5fe12 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/while_9.c +@@ -0,0 +1,25 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=scalable" } */ ++ ++#include ++ ++#define ADD_LOOP(TYPE) \ ++ TYPE __attribute__ ((noinline, noclone)) \ ++ vec_while_##TYPE (TYPE *restrict a) \ ++ { \ ++ for (int i = 0; i < 16; ++i) \ ++ a[i] += 1; \ ++ } ++ ++#define TEST_ALL(T) \ ++ T (int8_t) \ ++ T (int16_t) \ ++ T (int32_t) \ ++ T (int64_t) ++ ++TEST_ALL (ADD_LOOP) ++ ++/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl16\n} 1 } } */ ++/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h,} 2 } } */ ++/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s,} 2 } } */ ++/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d,} 2 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/symbol-range-tiny.c b/gcc/testsuite/gcc.target/aarch64/symbol-range-tiny.c +index d7e46b059..fc6a4f3ec 100644 +--- a/gcc/testsuite/gcc.target/aarch64/symbol-range-tiny.c ++++ b/gcc/testsuite/gcc.target/aarch64/symbol-range-tiny.c +@@ -1,12 +1,12 @@ +-/* { dg-do compile } */ ++/* { dg-do link } */ + /* { dg-options "-O3 -save-temps -mcmodel=tiny" } */ + +-int fixed_regs[0x00200000]; ++char fixed_regs[0x00080000]; + + int +-foo() ++main () + { +- return fixed_regs[0x00080000]; ++ return fixed_regs[0x000ff000]; + } + + /* { dg-final { scan-assembler-not "adr\tx\[0-9\]+, fixed_regs\\\+" } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/symbol-range.c b/gcc/testsuite/gcc.target/aarch64/symbol-range.c +index 6574cf431..d8e82fa1b 100644 +--- a/gcc/testsuite/gcc.target/aarch64/symbol-range.c ++++ b/gcc/testsuite/gcc.target/aarch64/symbol-range.c +@@ -1,12 +1,12 @@ +-/* { dg-do compile } */ ++/* { dg-do link } */ + /* { dg-options "-O3 -save-temps -mcmodel=small" } */ + +-int fixed_regs[0x200000000ULL]; ++char fixed_regs[0x80000000]; + + int +-foo() ++main () + { +- return fixed_regs[0x100000000ULL]; ++ return fixed_regs[0xfffff000]; + } + + /* { dg-final { scan-assembler-not "adrp\tx\[0-9\]+, fixed_regs\\\+" } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/sync-comp-swap.c b/gcc/testsuite/gcc.target/aarch64/sync-comp-swap.c +index e571b2f13..f56415f33 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sync-comp-swap.c ++++ b/gcc/testsuite/gcc.target/aarch64/sync-comp-swap.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-march=armv8-a+nolse -O2 -fno-ipa-icf" } */ ++/* { dg-options "-march=armv8-a+nolse -O2 -fno-ipa-icf -mno-outline-atomics" } */ + + #include "sync-comp-swap.x" + +diff --git a/gcc/testsuite/gcc.target/aarch64/sync-op-acquire.c b/gcc/testsuite/gcc.target/aarch64/sync-op-acquire.c +index 357bf1be3..39b3144aa 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sync-op-acquire.c ++++ b/gcc/testsuite/gcc.target/aarch64/sync-op-acquire.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-march=armv8-a+nolse -O2" } */ ++/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */ + + #include "sync-op-acquire.x" + +diff --git a/gcc/testsuite/gcc.target/aarch64/sync-op-full.c b/gcc/testsuite/gcc.target/aarch64/sync-op-full.c +index c6ba16299..6b8b2043f 100644 +--- a/gcc/testsuite/gcc.target/aarch64/sync-op-full.c ++++ b/gcc/testsuite/gcc.target/aarch64/sync-op-full.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-march=armv8-a+nolse -O2" } */ ++/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */ + + #include "sync-op-full.x" + +diff --git a/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-10.c b/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-10.c +new file mode 100644 +index 000000000..3d6893ee0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-10.c +@@ -0,0 +1,14 @@ ++/* { dg-do compile } */ ++ ++int __attribute__((aarch64_vector_pcs)) (*callee) (void); ++ ++int __attribute__ ((aarch64_vector_pcs)) ++caller (int *x) ++{ ++ return callee () + 1; ++} ++ ++/* { dg-final { scan-assembler-not {\tstp\tq} } } */ ++/* { dg-final { scan-assembler-not {\tldp\tq} } } */ ++/* { dg-final { scan-assembler-not {\tstr\tq} } } */ ++/* { dg-final { scan-assembler-not {\tldr\tq} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-11.c b/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-11.c +new file mode 100644 +index 000000000..de99bd701 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-11.c +@@ -0,0 +1,26 @@ ++/* { dg-do compile } */ ++ ++int (*callee) (void); ++ ++int __attribute__ ((aarch64_vector_pcs)) ++caller (int *x) ++{ ++ return callee () + 1; ++} ++ ++/* { dg-final { scan-assembler {\sstp\tq8, q9} } } */ ++/* { dg-final { scan-assembler {\sstp\tq10, q11} } } */ ++/* { dg-final { scan-assembler {\sstp\tq12, q13} } } */ ++/* { dg-final { scan-assembler {\sstp\tq14, q15} } } */ ++/* { dg-final { scan-assembler {\sstp\tq16, q17} } } */ ++/* { dg-final { scan-assembler {\sstp\tq18, q19} } } */ ++/* { dg-final { scan-assembler {\sstp\tq20, q21} } } */ ++/* { dg-final { scan-assembler {\sstp\tq22, q23} } } */ ++/* { dg-final { scan-assembler {\sldp\tq8, q9} } } */ ++/* { dg-final { scan-assembler {\sldp\tq10, q11} } } */ ++/* { dg-final { scan-assembler {\sldp\tq12, q13} } } */ ++/* { dg-final { scan-assembler {\sldp\tq14, q15} } } */ ++/* { dg-final { scan-assembler {\sldp\tq16, q17} } } */ ++/* { dg-final { scan-assembler {\sldp\tq18, q19} } } */ ++/* { dg-final { scan-assembler {\sldp\tq20, q21} } } */ ++/* { dg-final { scan-assembler {\sldp\tq22, q23} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-8.c b/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-8.c +new file mode 100644 +index 000000000..6463f6c50 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-8.c +@@ -0,0 +1,20 @@ ++/* { dg-do compile } */ ++/* { dg-options "-std=gnu99" } */ ++/* { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ ++ ++#include ++ ++void __attribute__ ((aarch64_vector_pcs)) f (void); ++ ++void ++g (int64x2x4_t *ptr) ++{ ++ register int64x2x4_t copy asm ("v8") = *ptr; ++ int64x2x4_t save; ++ asm volatile ("" : "=w" (save) : "0" (copy)); ++ f (); ++ *ptr = save; ++} ++ ++/* { dg-final { scan-assembler-times {\tld1\t} 1 } } */ ++/* { dg-final { scan-assembler-times {\tst1\t} 1 } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-9.c b/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-9.c +new file mode 100644 +index 000000000..aaa0316d1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-9.c +@@ -0,0 +1,48 @@ ++/* { dg-do compile } */ ++/* { dg-options "-fshrink-wrap -ffat-lto-objects" } */ ++/* { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ ++/* { dg-final { check-function-bodies "**" "" } } */ ++ ++int callee (void); ++ ++/* ++** caller: ++** ldr (w[0-9]+), \[x0\] ++** cbn?z \1, [^\n]* ++** ... ++** ret ++*/ ++int __attribute__ ((aarch64_vector_pcs)) ++caller (int *x) ++{ ++ if (*x) ++ return callee () + 1; ++ else ++ return 0; ++} ++ ++/* { dg-final { scan-assembler {\sstp\tq8, q9} } } */ ++/* { dg-final { scan-assembler {\sstp\tq10, q11} } } */ ++/* { dg-final { scan-assembler {\sstp\tq12, q13} } } */ ++/* { dg-final { scan-assembler {\sstp\tq14, q15} } } */ ++/* { dg-final { scan-assembler {\sstp\tq16, q17} } } */ ++/* { dg-final { scan-assembler {\sstp\tq18, q19} } } */ ++/* { dg-final { scan-assembler {\sstp\tq20, q21} } } */ ++/* { dg-final { scan-assembler {\sstp\tq22, q23} } } */ ++/* { dg-final { scan-assembler {\sldp\tq8, q9} } } */ ++/* { dg-final { scan-assembler {\sldp\tq10, q11} } } */ ++/* { dg-final { scan-assembler {\sldp\tq12, q13} } } */ ++/* { dg-final { scan-assembler {\sldp\tq14, q15} } } */ ++/* { dg-final { scan-assembler {\sldp\tq16, q17} } } */ ++/* { dg-final { scan-assembler {\sldp\tq18, q19} } } */ ++/* { dg-final { scan-assembler {\sldp\tq20, q21} } } */ ++/* { dg-final { scan-assembler {\sldp\tq22, q23} } } */ ++ ++/* { dg-final { scan-assembler-not {\tstp\tq[0-7],} } } */ ++/* { dg-final { scan-assembler-not {\tldp\tq[0-7],} } } */ ++/* { dg-final { scan-assembler-not {\tstp\tq2[4-9],} } } */ ++/* { dg-final { scan-assembler-not {\tldp\tq2[4-9],} } } */ ++/* { dg-final { scan-assembler-not {\tstp\td} } } */ ++/* { dg-final { scan-assembler-not {\tldp\td} } } */ ++/* { dg-final { scan-assembler-not {\tstr\tq} } } */ ++/* { dg-final { scan-assembler-not {\tldr\tq} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/usadv16qi-dotprod.c b/gcc/testsuite/gcc.target/aarch64/usadv16qi-dotprod.c +new file mode 100644 +index 000000000..ea8de4d69 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/usadv16qi-dotprod.c +@@ -0,0 +1,30 @@ ++/* { dg-do compile } */ ++/* { dg-require-effective-target arm_v8_2a_dotprod_neon_ok } */ ++/* { dg-add-options arm_v8_2a_dotprod_neon } */ ++/* { dg-additional-options "-O3" } */ ++ ++#pragma GCC target "+nosve" ++ ++#define N 1024 ++ ++unsigned char pix1[N], pix2[N]; ++ ++int foo (void) ++{ ++ int i_sum = 0; ++ int i; ++ ++ for (i = 0; i < N; i++) ++ i_sum += __builtin_abs (pix1[i] - pix2[i]); ++ ++ return i_sum; ++} ++ ++/* { dg-final { scan-assembler-not {\tushll\t} } } */ ++/* { dg-final { scan-assembler-not {\tushll2\t} } } */ ++/* { dg-final { scan-assembler-not {\tusubl\t} } } */ ++/* { dg-final { scan-assembler-not {\tusubl2\t} } } */ ++/* { dg-final { scan-assembler-not {\tabs\t} } } */ ++ ++/* { dg-final { scan-assembler {\tuabd\t} } } */ ++/* { dg-final { scan-assembler {\tudot\t} } } */ +diff --git a/gcc/testsuite/gcc.target/aarch64/usadv16qi.c b/gcc/testsuite/gcc.target/aarch64/usadv16qi.c +index 69ceaf425..a66e12096 100644 +--- a/gcc/testsuite/gcc.target/aarch64/usadv16qi.c ++++ b/gcc/testsuite/gcc.target/aarch64/usadv16qi.c +@@ -1,7 +1,7 @@ + /* { dg-do compile } */ + /* { dg-options "-O3" } */ + +-#pragma GCC target "+nosve" ++#pragma GCC target "+nosve+nodotprod" + + #define N 1024 + +diff --git a/gcc/testsuite/gcc.target/aarch64/vect-clz.c b/gcc/testsuite/gcc.target/aarch64/vect-clz.c +index 044fa9e99..cd181c346 100644 +--- a/gcc/testsuite/gcc.target/aarch64/vect-clz.c ++++ b/gcc/testsuite/gcc.target/aarch64/vect-clz.c +@@ -1,6 +1,8 @@ + /* { dg-do run } */ + /* { dg-options "-O3 -save-temps -fno-inline -fno-vect-cost-model" } */ + ++#pragma GCC target "+nosve" ++ + extern void abort (); + + void +diff --git a/gcc/testsuite/gcc.target/i386/asm-1.c b/gcc/testsuite/gcc.target/i386/asm-1.c +index cd60a09bd..5e516d882 100644 +--- a/gcc/testsuite/gcc.target/i386/asm-1.c ++++ b/gcc/testsuite/gcc.target/i386/asm-1.c +@@ -2,7 +2,7 @@ + /* { dg-require-effective-target ia32 } */ + /* { dg-options "" } */ + +-register unsigned int EAX asm ("r14"); /* { dg-error "register name" } */ ++register unsigned int EAX asm ("r14"); /* { dg-error "cannot be accessed" } */ + + void foo () + { +diff --git a/gcc/testsuite/gcc.target/i386/asm-7.c b/gcc/testsuite/gcc.target/i386/asm-7.c +new file mode 100644 +index 000000000..d2d113626 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/asm-7.c +@@ -0,0 +1,8 @@ ++/* { dg-do compile } */ ++/* { dg-require-effective-target ia32 } */ ++/* { dg-options "" } */ ++ ++void foo (void) ++{ ++ asm volatile ("" : : : "%r12"); /* { dg-error "cannot be clobbered" } */ ++} +diff --git a/gcc/testsuite/gcc.target/i386/asm-flag-0.c b/gcc/testsuite/gcc.target/i386/asm-flag-0.c +index b0c05239b..e7bd1a585 100644 +--- a/gcc/testsuite/gcc.target/i386/asm-flag-0.c ++++ b/gcc/testsuite/gcc.target/i386/asm-flag-0.c +@@ -11,5 +11,5 @@ void a(void) + void b(void) + { + char x; +- asm("" : "=@ccbad"(x)); /* { dg-error "unknown asm flag output" } */ ++ asm("" : "=@ccbad"(x)); /* { dg-error "unknown 'asm' flag output" } */ + } +diff --git a/gcc/testsuite/gcc.target/i386/funcspec-4.c b/gcc/testsuite/gcc.target/i386/funcspec-4.c +index 025b97dff..e345acdef 100644 +--- a/gcc/testsuite/gcc.target/i386/funcspec-4.c ++++ b/gcc/testsuite/gcc.target/i386/funcspec-4.c +@@ -5,7 +5,7 @@ + extern void error1 (void) __attribute__((__target__("fma400"))); /* { dg-error "unknown" } */ + + /* Multiple arch switches */ +-extern void error2 (void) __attribute__((__target__("arch=core2,arch=k8"))); /* { dg-error "already specified" } */ ++extern void error2 (void) __attribute__((__target__("arch=core2,arch=k8"))); /* { dg-error "attribute value 'arch=k8' was already specified in 'target' attribute" } */ + + /* Unknown tune target */ + extern void error3 (void) __attribute__((__target__("tune=foobar"))); /* { dg-error "bad value" } */ +diff --git a/gcc/testsuite/gcc.target/i386/inline_error.c b/gcc/testsuite/gcc.target/i386/inline_error.c +index 18e506631..57e60fbad 100644 +--- a/gcc/testsuite/gcc.target/i386/inline_error.c ++++ b/gcc/testsuite/gcc.target/i386/inline_error.c +@@ -2,7 +2,7 @@ + /* { dg-options "-O0 -mno-popcnt" } */ + + inline int __attribute__ ((__gnu_inline__, __always_inline__, target("popcnt"))) +-foo () /* { dg-error "inlining failed in call to always_inline .* target specific option mismatch" } */ ++foo () /* { dg-error "inlining failed in call to 'always_inline' .* target specific option mismatch" } */ + { + return 0; + } +diff --git a/gcc/testsuite/gcc.target/i386/interrupt-6.c b/gcc/testsuite/gcc.target/i386/interrupt-6.c +index bcbcc97c6..138b98fe1 100644 +--- a/gcc/testsuite/gcc.target/i386/interrupt-6.c ++++ b/gcc/testsuite/gcc.target/i386/interrupt-6.c +@@ -31,7 +31,7 @@ fn4 (uword_t error_code, void *frame) + error = error_code; + } + +-extern int fn5 (void *) __attribute__ ((interrupt)); /* { dg-error "interrupt service routine can't have non-void return value" } */ ++extern int fn5 (void *) __attribute__ ((interrupt)); /* { dg-error "interrupt service routine must return 'void'" } */ + + int + fn5 (void *frame) +diff --git a/gcc/testsuite/gcc.target/i386/interrupt-7.c b/gcc/testsuite/gcc.target/i386/interrupt-7.c +index 506f61afa..3e2f6a0eb 100644 +--- a/gcc/testsuite/gcc.target/i386/interrupt-7.c ++++ b/gcc/testsuite/gcc.target/i386/interrupt-7.c +@@ -8,5 +8,5 @@ extern void fn (void *) __attribute__((interrupt)); + void + foo (void) + { +- fn (&error); /* { dg-error "interrupt service routine can't be called directly" } */ ++ fn (&error); /* { dg-error "interrupt service routine cannot be called directly" } */ + } +diff --git a/gcc/testsuite/gcc.target/i386/pr30848.c b/gcc/testsuite/gcc.target/i386/pr30848.c +index 2a9285151..9c4e22ac7 100644 +--- a/gcc/testsuite/gcc.target/i386/pr30848.c ++++ b/gcc/testsuite/gcc.target/i386/pr30848.c +@@ -2,5 +2,5 @@ + + void foo(double d) + { +- __asm__ ("" : "=u" (d)); /* { dg-error "output regs" } */ ++ __asm__ ("" : "=u" (d)); /* { dg-error "output registers" } */ + } +diff --git a/gcc/testsuite/gcc.target/i386/pr39082-1.c b/gcc/testsuite/gcc.target/i386/pr39082-1.c +index 2af2264c3..85b5671e9 100644 +--- a/gcc/testsuite/gcc.target/i386/pr39082-1.c ++++ b/gcc/testsuite/gcc.target/i386/pr39082-1.c +@@ -13,7 +13,7 @@ extern int bar1 (union un); + extern union un bar2 (int); + + int +-foo1 (union un u) /* { dg-message "note: the ABI of passing union with long double has changed in GCC 4.4" } */ ++foo1 (union un u) /* { dg-message "note: the ABI of passing union with 'long double' has changed in GCC 4.4" } */ + { + bar1 (u); + return u.i; +diff --git a/gcc/testsuite/gcc.target/i386/pr39678.c b/gcc/testsuite/gcc.target/i386/pr39678.c +index 0548466d6..c94c002f1 100644 +--- a/gcc/testsuite/gcc.target/i386/pr39678.c ++++ b/gcc/testsuite/gcc.target/i386/pr39678.c +@@ -10,7 +10,7 @@ struct X { + + struct X + foo (float *p) +-{ /* { dg-message "note: the ABI of passing structure with complex float member has changed in GCC 4.4" } */ ++{ /* { dg-message "note: the ABI of passing structure with 'complex float' member has changed in GCC 4.4" } */ + struct X x; + x.c = -3; + __real x.val = p[0]; +diff --git a/gcc/testsuite/gcc.target/i386/pr57756.c b/gcc/testsuite/gcc.target/i386/pr57756.c +index 25c565c87..9a78f62c9 100644 +--- a/gcc/testsuite/gcc.target/i386/pr57756.c ++++ b/gcc/testsuite/gcc.target/i386/pr57756.c +@@ -3,7 +3,7 @@ + + /* callee cannot be inlined into caller because it has a higher target ISA. */ + __attribute__((always_inline,target("sse4.2"))) +-__inline int callee () /* { dg-error "inlining failed in call to always_inline" } */ ++__inline int callee () /* { dg-error "inlining failed in call to 'always_inline'" } */ + { + return 0; + } +diff --git a/gcc/testsuite/gcc.target/i386/pr62120.c b/gcc/testsuite/gcc.target/i386/pr62120.c +index bfb8c4703..28d85d377 100644 +--- a/gcc/testsuite/gcc.target/i386/pr62120.c ++++ b/gcc/testsuite/gcc.target/i386/pr62120.c +@@ -3,6 +3,6 @@ + + void foo () + { +- register int zmm_var asm ("ymm9");/* { dg-error "invalid register name" } */ +- register int zmm_var2 asm ("23");/* { dg-error "invalid register name" } */ ++ register int zmm_var asm ("ymm9");/* { dg-error "cannot be accessed" } */ ++ register int zmm_var2 asm ("23");/* { dg-error "cannot be accessed" } */ + } +diff --git a/gcc/testsuite/gcc.target/i386/pr68843-1.c b/gcc/testsuite/gcc.target/i386/pr68843-1.c +index da0676aa6..6198ea9af 100644 +--- a/gcc/testsuite/gcc.target/i386/pr68843-1.c ++++ b/gcc/testsuite/gcc.target/i386/pr68843-1.c +@@ -5,7 +5,7 @@ double + test () + { + double x = 1.0; +- asm ("fld %1" /* { dg-error "explicitly used regs must be grouped at top of stack" } */ ++ asm ("fld %1" /* { dg-error "explicitly used registers must be grouped at top of stack" } */ + : "=&t" (x) + : "u" (x)); + return x; +diff --git a/gcc/testsuite/gcc.target/i386/pr79804.c b/gcc/testsuite/gcc.target/i386/pr79804.c +index 10adb4466..08d1a3ea1 100644 +--- a/gcc/testsuite/gcc.target/i386/pr79804.c ++++ b/gcc/testsuite/gcc.target/i386/pr79804.c +@@ -7,4 +7,4 @@ void foo (void) + register int r19 asm ("19"); + + asm volatile ("# %0" : "=r"(r19)); /* { dg-error "invalid use of register" } */ +-} /* { dg-error "cannot be used in asm here" } */ ++} /* { dg-error "cannot be used in 'asm' here" } */ +diff --git a/gcc/testsuite/gcc.target/i386/pr82673.c b/gcc/testsuite/gcc.target/i386/pr82673.c +index 50eb5a3bc..161ec88e3 100644 +--- a/gcc/testsuite/gcc.target/i386/pr82673.c ++++ b/gcc/testsuite/gcc.target/i386/pr82673.c +@@ -9,4 +9,4 @@ void + bar (void) /* { dg-error "frame pointer required, but reserved" } */ + { + B = &y; +-} /* { dg-error "bp cannot be used in asm here" } */ ++} /* { dg-error "bp cannot be used in 'asm' here" } */ +diff --git a/gcc/testsuite/gcc.target/i386/pr88809-2.c b/gcc/testsuite/gcc.target/i386/pr88809-2.c +new file mode 100644 +index 000000000..b8ef51dab +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr88809-2.c +@@ -0,0 +1,9 @@ ++/* PR target/88809 */ ++/* { dg-options "-Os" } */ ++ ++unsigned int foo (const char *ptr) ++{ ++ return __builtin_strlen (ptr); ++} ++ ++/* { dg-final { scan-assembler "call\[ \t\]strlen" } } */ +diff --git a/gcc/testsuite/gcc.target/i386/pr88809.c b/gcc/testsuite/gcc.target/i386/pr88809.c +new file mode 100644 +index 000000000..20844ddb9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr88809.c +@@ -0,0 +1,9 @@ ++/* PR target/88809 */ ++/* { dg-options "-O" } */ ++ ++unsigned int foo (const char *ptr) ++{ ++ return __builtin_strlen (ptr); ++} ++ ++/* { dg-final { scan-assembler "call\[ \t\]strlen" } } */ +diff --git a/gcc/testsuite/gcc.target/i386/pr88828-1.c b/gcc/testsuite/gcc.target/i386/pr88828-1.c +new file mode 100644 +index 000000000..a15d1fea3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr88828-1.c +@@ -0,0 +1,49 @@ ++/* { dg-do run { target sse2_runtime } } */ ++/* { dg-options "-O2 -msse2" } */ ++ ++#include "pr88828-1a.c" ++#include "pr88828-1b.c" ++#include "pr88828-1c.c" ++ ++extern void abort (); ++ ++void ++do_check (__v4sf y, float f[4], float z) ++{ ++ int i; ++ ++ for (i = 0; i < 4; i++) ++ if (i == 0) ++ { ++ if (y[i] != z) ++ abort (); ++ } ++ else ++ { ++ if (y[i] != f[i]) ++ abort (); ++ } ++} ++ ++int ++main (void) ++{ ++ float f[4] = { -11, 2, 55553, -4 }; ++ float z = 134567; ++ __v4sf x = { f[0], f[1], f[2], f[3] }; ++ __v4sf y; ++ int i; ++ ++ for (i = 0; i < 4; i++) ++ if (x[i] != f[i]) ++ abort (); ++ ++ y = foo1 (x, z); ++ do_check (y, f, z); ++ y = foo2 (x, z); ++ do_check (y, f, z); ++ y = foo3 (x, z); ++ do_check (y, f, z); ++ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/i386/pr88828-1a.c b/gcc/testsuite/gcc.target/i386/pr88828-1a.c +new file mode 100644 +index 000000000..d37b24c66 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr88828-1a.c +@@ -0,0 +1,17 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -msse -mno-sse4" } */ ++/* { dg-final { scan-assembler "movss" } } */ ++/* { dg-final { scan-assembler-not "movaps" } } */ ++/* { dg-final { scan-assembler-not "movlhps" } } */ ++/* { dg-final { scan-assembler-not "unpcklps" } } */ ++/* { dg-final { scan-assembler-not "shufps" } } */ ++ ++typedef float __v4sf __attribute__ ((__vector_size__ (16))); ++ ++__attribute__((noinline, noclone)) ++__v4sf ++foo1 (__v4sf x, float f) ++{ ++ __v4sf y = { f, x[1], x[2], x[3] }; ++ return y; ++} +diff --git a/gcc/testsuite/gcc.target/i386/pr88828-1b.c b/gcc/testsuite/gcc.target/i386/pr88828-1b.c +new file mode 100644 +index 000000000..af4aced65 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr88828-1b.c +@@ -0,0 +1,23 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -msse -mno-sse4" } */ ++/* { dg-final { scan-assembler "movss" } } */ ++/* { dg-final { scan-assembler-not "movaps" } } */ ++/* { dg-final { scan-assembler-not "movlhps" } } */ ++/* { dg-final { scan-assembler-not "unpcklps" } } */ ++/* { dg-final { scan-assembler-not "shufps" } } */ ++ ++typedef float __v4sf __attribute__ ((__vector_size__ (16))); ++ ++static __v4sf ++vector_init (float f0,float f1, float f2,float f3) ++{ ++ __v4sf y = { f0, f1, f2, f3 }; ++ return y; ++} ++ ++__attribute__((noinline, noclone)) ++__v4sf ++foo2 (__v4sf x, float f) ++{ ++ return vector_init (f, x[1], x[2], x[3]) ; ++} +diff --git a/gcc/testsuite/gcc.target/i386/pr88828-1c.c b/gcc/testsuite/gcc.target/i386/pr88828-1c.c +new file mode 100644 +index 000000000..a117f3ec7 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr88828-1c.c +@@ -0,0 +1,18 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -msse -mno-sse4" } */ ++/* { dg-final { scan-assembler "movss" } } */ ++/* { dg-final { scan-assembler-not "movaps" } } */ ++/* { dg-final { scan-assembler-not "movlhps" } } */ ++/* { dg-final { scan-assembler-not "unpcklps" } } */ ++/* { dg-final { scan-assembler-not "shufps" } } */ ++ ++typedef float __v4sf __attribute__ ((__vector_size__ (16))); ++ ++__attribute__((noinline, noclone)) ++__v4sf ++foo3 (__v4sf x, float f) ++{ ++ __v4sf y = x; ++ y[0] = f; ++ return y; ++} +diff --git a/gcc/testsuite/gcc.target/i386/pr88828-4a.c b/gcc/testsuite/gcc.target/i386/pr88828-4a.c +new file mode 100644 +index 000000000..64043b985 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr88828-4a.c +@@ -0,0 +1,18 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -msse -mno-sse4" } */ ++/* { dg-final { scan-assembler "movss" } } */ ++/* { dg-final { scan-assembler-times "shufps" 1 } } */ ++/* { dg-final { scan-assembler-not "movaps" } } */ ++/* { dg-final { scan-assembler-not "movlhps" } } */ ++/* { dg-final { scan-assembler-not "unpcklps" } } */ ++ ++typedef float __v4sf __attribute__ ((__vector_size__ (16))); ++ ++__attribute__((noinline, noclone)) ++__v4sf ++foo (__v4sf x, float f) ++{ ++ __v4sf y = { x[0], x[2], x[3], x[1] }; ++ y[0] = f; ++ return y; ++} +diff --git a/gcc/testsuite/gcc.target/i386/pr88828-4b.c b/gcc/testsuite/gcc.target/i386/pr88828-4b.c +new file mode 100644 +index 000000000..ad8d2b985 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr88828-4b.c +@@ -0,0 +1,21 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mavx" } */ ++/* { dg-final { scan-assembler-times "vpermilps" 1 } } */ ++/* { dg-final { scan-assembler-times "vmovss" 1 { target { ! ia32 } } } } */ ++/* { dg-final { scan-assembler-times "vpinsrd" 1 { target ia32 } } } */ ++/* { dg-final { scan-assembler-not "vmovss" { target ia32 } } } */ ++/* { dg-final { scan-assembler-not "vshufps" } } */ ++/* { dg-final { scan-assembler-not "vmovaps" } } */ ++/* { dg-final { scan-assembler-not "vmovlhps" } } */ ++/* { dg-final { scan-assembler-not "vunpcklps" } } */ ++ ++typedef float __v4sf __attribute__ ((__vector_size__ (16))); ++ ++__attribute__((noinline, noclone)) ++__v4sf ++foo (__v4sf x, float f) ++{ ++ __v4sf y = { x[0], x[2], x[3], x[1] }; ++ y[0] = f; ++ return y; ++} +diff --git a/gcc/testsuite/gcc.target/i386/pr88828-5a.c b/gcc/testsuite/gcc.target/i386/pr88828-5a.c +new file mode 100644 +index 000000000..5e908faef +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr88828-5a.c +@@ -0,0 +1,18 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -msse -mno-sse4" } */ ++/* { dg-final { scan-assembler "movss" } } */ ++/* { dg-final { scan-assembler-times "shufps" 2 } } */ ++/* { dg-final { scan-assembler-times "movaps" 1 } } */ ++/* { dg-final { scan-assembler-not "movlhps" } } */ ++/* { dg-final { scan-assembler-not "unpcklps" } } */ ++ ++typedef float __v4sf __attribute__ ((__vector_size__ (16))); ++ ++__attribute__((noinline, noclone)) ++__v4sf ++foo (__v4sf x, float f) ++{ ++ __v4sf y = { x[0], x[2], x[3], x[0] }; ++ y[3] = f; ++ return y; ++} +diff --git a/gcc/testsuite/gcc.target/i386/pr88828-5b.c b/gcc/testsuite/gcc.target/i386/pr88828-5b.c +new file mode 100644 +index 000000000..988a48823 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr88828-5b.c +@@ -0,0 +1,20 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mavx" } */ ++/* { dg-final { scan-assembler-times "vpermilps" 1 } } */ ++/* { dg-final { scan-assembler-times "vinsertps" 1 } } */ ++/* { dg-final { scan-assembler-not "vshufps" } } */ ++/* { dg-final { scan-assembler-not "vmovss" } } */ ++/* { dg-final { scan-assembler-not "vmovaps" } } */ ++/* { dg-final { scan-assembler-not "vmovlhps" } } */ ++/* { dg-final { scan-assembler-not "vunpcklps" } } */ ++ ++typedef float __v4sf __attribute__ ((__vector_size__ (16))); ++ ++__attribute__((noinline, noclone)) ++__v4sf ++foo (__v4sf x, float f) ++{ ++ __v4sf y = { x[0], x[2], x[3], x[0] }; ++ y[3] = f; ++ return y; ++} +diff --git a/gcc/testsuite/gcc.target/i386/pr88828-7.c b/gcc/testsuite/gcc.target/i386/pr88828-7.c +new file mode 100644 +index 000000000..4302c2664 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr88828-7.c +@@ -0,0 +1,53 @@ ++/* { dg-do run { target sse2_runtime } } */ ++/* { dg-options "-O2 -msse2 -fexcess-precision=standard" } */ ++ ++#include "pr88828-7a.c" ++#include "pr88828-7b.c" ++ ++extern void abort (); ++ ++float ++bar (float x, float y) ++{ ++ return x / y - y * x; ++} ++ ++void ++do_check (__v4sf x, float f1[4], float f2[4]) ++{ ++ int i; ++ ++ for (i = 0; i < 4; i++) ++ if (i == 0) ++ { ++ if (x[i] != bar (f1[i], f2[i])) ++ abort (); ++ } ++ else ++ { ++ if (x[i] != f1[i]) ++ abort (); ++ } ++} ++ ++int ++main (void) ++{ ++ float f1[4] = { -11, 2, 55553, -4 }; ++ float f2[4] = { 111, 3.3, -55.553, 4.8 }; ++ __v4sf x = { f1[0], f1[1], f1[2], f1[3] }; ++ __v4sf y = { f2[0], f2[1], f2[2], f2[3] }; ++ __v4sf z; ++ int i; ++ ++ for (i = 0; i < 4; i++) ++ if (x[i] != f1[i] || y[i] != f2[i] ) ++ abort (); ++ ++ z = foo1 (x, y); ++ do_check (z, f1, f2); ++ x = foo2 (x, y); ++ do_check (z, f1, f2); ++ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/i386/pr88828-7a.c b/gcc/testsuite/gcc.target/i386/pr88828-7a.c +new file mode 100644 +index 000000000..f1ae57422 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr88828-7a.c +@@ -0,0 +1,16 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -msse -mno-sse4" } */ ++/* { dg-final { scan-assembler-not "movlhps" } } */ ++/* { dg-final { scan-assembler-not "unpckhps" } } */ ++/* { dg-final { scan-assembler-not "unpcklps" } } */ ++/* { dg-final { scan-assembler-not "shufps" } } */ ++ ++typedef float __v4sf __attribute__ ((__vector_size__ (16))); ++extern float bar (float, float); ++ ++__v4sf ++foo1 (__v4sf x, __v4sf y) ++{ ++ __v4sf z = { bar (x[0], y[0]), x[1], x[2], x[3] }; ++ return z; ++} +diff --git a/gcc/testsuite/gcc.target/i386/pr88828-7b.c b/gcc/testsuite/gcc.target/i386/pr88828-7b.c +new file mode 100644 +index 000000000..c027c5694 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr88828-7b.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -msse -mno-sse4" } */ ++/* { dg-final { scan-assembler-not "movlhps" } } */ ++/* { dg-final { scan-assembler-not "unpckhps" } } */ ++/* { dg-final { scan-assembler-not "unpcklps" } } */ ++/* { dg-final { scan-assembler-not "shufps" } } */ ++ ++typedef float __v4sf __attribute__ ((__vector_size__ (16))); ++extern float bar (float, float); ++ ++static __v4sf ++vector_init (float f0,float f1, float f2,float f3) ++{ ++ __v4sf y = { f0, f1, f2, f3 }; ++ return y; ++} ++ ++__v4sf ++foo2 (__v4sf x, __v4sf y) ++{ ++ return vector_init (bar (x[0], y[0]), x[1], x[2], x[3]) ; ++} +diff --git a/gcc/testsuite/gcc.target/i386/pr88828-8.c b/gcc/testsuite/gcc.target/i386/pr88828-8.c +new file mode 100644 +index 000000000..3b8eabd22 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr88828-8.c +@@ -0,0 +1,46 @@ ++/* { dg-do run { target sse2_runtime } } */ ++/* { dg-options "-O2 -msse2" } */ ++ ++#include "pr88828-8a.c" ++#include "pr88828-8b.c" ++ ++extern void abort (); ++ ++void ++do_check (__v4sf y, float f[4], float z) ++{ ++ int i; ++ ++ for (i = 0; i < 4; i++) ++ if (i == 0) ++ { ++ if (y[i] != z) ++ abort (); ++ } ++ else ++ { ++ if (y[i] != f[i]) ++ abort (); ++ } ++} ++ ++int ++main (void) ++{ ++ float f[4] = { -11, 2, 55553, -4 }; ++ float z = 11.4; ++ __v4sf x = { f[0], f[1], f[2], f[3] }; ++ __v4sf y; ++ int i; ++ ++ for (i = 0; i < 4; i++) ++ if (x[i] != f[i]) ++ abort (); ++ ++ y = foo1 (x); ++ do_check (y, f, z); ++ y = foo2 (x); ++ do_check (y, f, z); ++ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/i386/pr88828-8a.c b/gcc/testsuite/gcc.target/i386/pr88828-8a.c +new file mode 100644 +index 000000000..5d383dfd0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr88828-8a.c +@@ -0,0 +1,15 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -msse -mno-sse4" } */ ++/* { dg-final { scan-assembler-not "movlhps" } } */ ++/* { dg-final { scan-assembler-not "unpckhps" } } */ ++/* { dg-final { scan-assembler-not "unpcklps" } } */ ++/* { dg-final { scan-assembler-not "shufps" } } */ ++ ++typedef float __v4sf __attribute__ ((__vector_size__ (16))); ++ ++__v4sf ++foo1 (__v4sf x) ++{ ++ __v4sf z = { 11.4, x[1], x[2], x[3] }; ++ return z; ++} +diff --git a/gcc/testsuite/gcc.target/i386/pr88828-8b.c b/gcc/testsuite/gcc.target/i386/pr88828-8b.c +new file mode 100644 +index 000000000..5ffbc9c31 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr88828-8b.c +@@ -0,0 +1,21 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -msse -mno-sse4" } */ ++/* { dg-final { scan-assembler-not "movlhps" } } */ ++/* { dg-final { scan-assembler-not "unpckhps" } } */ ++/* { dg-final { scan-assembler-not "unpcklps" } } */ ++/* { dg-final { scan-assembler-not "shufps" } } */ ++ ++typedef float __v4sf __attribute__ ((__vector_size__ (16))); ++ ++static __v4sf ++vector_init (float f0,float f1, float f2,float f3) ++{ ++ __v4sf y = { f0, f1, f2, f3 }; ++ return y; ++} ++ ++__v4sf ++foo2 (__v4sf x) ++{ ++ return vector_init (11.4, x[1], x[2], x[3]) ; ++} +diff --git a/gcc/testsuite/gcc.target/i386/pr88828-9.c b/gcc/testsuite/gcc.target/i386/pr88828-9.c +new file mode 100644 +index 000000000..c33907b4a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr88828-9.c +@@ -0,0 +1,46 @@ ++/* { dg-do run { target sse2_runtime } } */ ++/* { dg-options "-O2 -msse2" } */ ++ ++#include "pr88828-9a.c" ++#include "pr88828-9b.c" ++ ++extern void abort (); ++ ++void ++do_check (__v4sf y, float f[4], float z) ++{ ++ int i; ++ ++ for (i = 0; i < 4; i++) ++ if (i == 0) ++ { ++ if (y[i] != z) ++ abort (); ++ } ++ else ++ { ++ if (y[i] != f[i]) ++ abort (); ++ } ++} ++ ++int ++main (void) ++{ ++ float f[4] = { -11, 2, 55553, -4 }; ++ float z = 11.4; ++ __m128 x = (__m128) (__v4sf) { f[0], f[1], f[2], f[3] }; ++ __m128 y; ++ int i; ++ ++ for (i = 0; i < 4; i++) ++ if (x[i] != f[i]) ++ abort (); ++ ++ y = foo1 (x); ++ do_check (y, f, z); ++ y = foo2 (x); ++ do_check (y, f, z); ++ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/i386/pr88828-9a.c b/gcc/testsuite/gcc.target/i386/pr88828-9a.c +new file mode 100644 +index 000000000..7f8306577 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr88828-9a.c +@@ -0,0 +1,16 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -msse -mno-sse4" } */ ++/* { dg-final { scan-assembler-not "movlhps" } } */ ++/* { dg-final { scan-assembler-not "unpckhps" } } */ ++/* { dg-final { scan-assembler-not "unpcklps" } } */ ++/* { dg-final { scan-assembler-not "shufps" } } */ ++ ++typedef float __v4sf __attribute__ ((__vector_size__ (16))); ++typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__)); ++ ++__m128 ++foo1 (__m128 x) ++{ ++ __v4sf z = { 11.4, ((__v4sf) x)[1], ((__v4sf) x)[2], ((__v4sf) x) [3] }; ++ return (__m128) z; ++} +diff --git a/gcc/testsuite/gcc.target/i386/pr88828-9b.c b/gcc/testsuite/gcc.target/i386/pr88828-9b.c +new file mode 100644 +index 000000000..6588ad15a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr88828-9b.c +@@ -0,0 +1,23 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -msse -mno-sse4" } */ ++/* { dg-final { scan-assembler-not "movlhps" } } */ ++/* { dg-final { scan-assembler-not "unpckhps" } } */ ++/* { dg-final { scan-assembler-not "unpcklps" } } */ ++/* { dg-final { scan-assembler-not "shufps" } } */ ++ ++typedef float __v4sf __attribute__ ((__vector_size__ (16))); ++typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__)); ++ ++static __m128 ++vector_init (float f0,float f1, float f2,float f3) ++{ ++ __v4sf y = { f0, f1, f2, f3 }; ++ return (__m128) y; ++} ++ ++__m128 ++foo2 (__m128 x) ++{ ++ return vector_init (11.4, ((__v4sf) x)[1], ((__v4sf) x)[2], ++ ((__v4sf) x) [3]); ++} +diff --git a/gcc/testsuite/gcc.target/i386/pr88963-1.c b/gcc/testsuite/gcc.target/i386/pr88963-1.c +new file mode 100644 +index 000000000..e6f15259e +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr88963-1.c +@@ -0,0 +1,13 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=x86-64 -mavx2 -fdump-tree-optimized" } */ ++ ++typedef int VInt __attribute__((vector_size(64))); ++ ++void test(VInt*__restrict a, VInt*__restrict b, ++ VInt*__restrict c) ++{ ++ *a = *b + *c; ++} ++ ++/* Vector loads and stores should be split. */ ++/* { dg-final { scan-tree-dump-not "vector\\(16\\)" "optimized" } } */ +diff --git a/gcc/testsuite/gcc.target/i386/pr88963-2.c b/gcc/testsuite/gcc.target/i386/pr88963-2.c +new file mode 100644 +index 000000000..114f1f5c3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr88963-2.c +@@ -0,0 +1,14 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -march=x86-64 -msse2 -fdump-tree-optimized" } */ ++ ++typedef int VInt __attribute__((vector_size(64))); ++ ++void test(VInt*__restrict a, VInt*__restrict b, ++ VInt*__restrict c) ++{ ++ *a = *b + *c; ++} ++ ++/* Vector loads and stores should be split. */ ++/* { dg-final { scan-tree-dump-not "vector\\(16\\)" "optimized" } } */ ++/* { dg-final { scan-tree-dump-not "vector\\(8\\)" "optimized" } } */ +diff --git a/gcc/testsuite/gcc.target/i386/pr89261.c b/gcc/testsuite/gcc.target/i386/pr89261.c +new file mode 100644 +index 000000000..63882c099 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr89261.c +@@ -0,0 +1,9 @@ ++/* PR target/89261 */ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++typedef double __v2df __attribute__ ((vector_size (16), aligned (1 << 28))); ++ ++__v2df foo = { 1.0, 2.0 }; ++ ++/* { dg-final { scan-assembler "\.align\[ \t]+268435456" } } */ +diff --git a/gcc/testsuite/gcc.target/i386/pr92645-2.c b/gcc/testsuite/gcc.target/i386/pr92645-2.c +new file mode 100644 +index 000000000..d34ed3aa8 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr92645-2.c +@@ -0,0 +1,34 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -msse2 -fdump-tree-cddce1" } */ ++ ++typedef int v4si __attribute__((vector_size(16))); ++typedef int v2si __attribute__((vector_size(8))); ++ ++void low (v2si *dst, v4si *srcp) ++{ ++ v4si src = *srcp; ++ *dst = (v2si) { src[0], src[1] }; ++} ++ ++void high (v2si *dst, v4si *srcp) ++{ ++ v4si src = *srcp; ++ *dst = (v2si) { src[2], src[3] }; ++} ++ ++void even (v2si *dst, v4si *srcp) ++{ ++ v4si src = *srcp; ++ *dst = (v2si) { src[0], src[2] }; ++} ++ ++void odd (v2si *dst, v4si *srcp) ++{ ++ v4si src = *srcp; ++ *dst = (v2si) { src[1], src[3] }; ++} ++ ++/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF" 4 "cddce1" } } */ ++/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 3 "cddce1" } } */ ++/* Ideally highpart extraction would elide the permutation as well. */ ++/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 2 "cddce1" { xfail *-*-* } } } */ +diff --git a/gcc/testsuite/gcc.target/i386/pr92645-3.c b/gcc/testsuite/gcc.target/i386/pr92645-3.c +new file mode 100644 +index 000000000..9c08c9fb6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr92645-3.c +@@ -0,0 +1,37 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mavx2 -fdump-tree-cddce1" } */ ++ ++typedef int v8si __attribute__((vector_size(32))); ++typedef float v4sf __attribute__((vector_size(16))); ++ ++void low (v4sf *dst, v8si *srcp) ++{ ++ v8si src = *srcp; ++ *dst = (v4sf) { src[0], src[1], src[2], src[3] }; ++} ++ ++void high (v4sf *dst, v8si *srcp) ++{ ++ v8si src = *srcp; ++ *dst = (v4sf) { src[4], src[5], src[6], src[7] }; ++} ++ ++void even (v4sf *dst, v8si *srcp) ++{ ++ v8si src = *srcp; ++ *dst = (v4sf) { src[0], src[2], src[4], src[6] }; ++} ++ ++void odd (v4sf *dst, v8si *srcp) ++{ ++ v8si src = *srcp; ++ *dst = (v4sf) { src[1], src[3], src[5], src[7] }; ++} ++ ++/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF" 4 "cddce1" } } */ ++/* Four conversions, on the smaller vector type, to not convert excess ++ elements. */ ++/* { dg-final { scan-tree-dump-times " = \\\(vector\\\(4\\\) float\\\)" 4 "cddce1" } } */ ++/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 3 "cddce1" } } */ ++/* Ideally highpart extraction would elide the VEC_PERM_EXPR as well. */ ++/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 2 "cddce1" { xfail *-*-* } } } */ +diff --git a/gcc/testsuite/gcc.target/i386/pr92645-4.c b/gcc/testsuite/gcc.target/i386/pr92645-4.c +new file mode 100644 +index 000000000..788a97ed1 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr92645-4.c +@@ -0,0 +1,56 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -mavx2 -fdump-tree-optimized -Wno-psabi" } */ ++ ++typedef unsigned int u32v4 __attribute__((vector_size(16))); ++typedef unsigned short u16v16 __attribute__((vector_size(32))); ++typedef unsigned char u8v16 __attribute__((vector_size(16))); ++ ++union vec128 { ++ u8v16 u8; ++ u32v4 u32; ++}; ++ ++#define memcpy __builtin_memcpy ++ ++static u16v16 zxt(u8v16 x) ++{ ++ return (u16v16) { ++ x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], ++ x[8], x[9], x[10], x[11], x[12], x[13], x[14], x[15] ++ }; ++} ++ ++static u8v16 narrow(u16v16 x) ++{ ++ return (u8v16) { ++ x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], ++ x[8], x[9], x[10], x[11], x[12], x[13], x[14], x[15] ++ }; ++} ++ ++void f(char *dst, char *src, unsigned long n, unsigned c) ++{ ++ unsigned ia = 255 - (c >> 24); ++ ia += ia >> 7; ++ ++ union vec128 c4 = {0}, ia16 = {0}; ++ c4.u32 += c; ++ ia16.u8 += (unsigned char)ia; ++ ++ u16v16 c16 = (zxt(c4.u8) << 8) + 128; ++ ++ for (; n; src += 16, dst += 16, n -= 4) { ++ union vec128 s; ++ memcpy(&s, src, sizeof s); ++ s.u8 = narrow((zxt(s.u8)*zxt(ia16.u8) + c16) >> 8); ++ memcpy(dst, &s, sizeof s); ++ } ++} ++ ++/* { dg-final { scan-tree-dump-times "vec_unpack_lo" 3 "optimized" } } */ ++/* We're missing an opportunity to, after later optimizations, combine ++ a uniform CTOR with a vec_unpack_lo_expr to a CTOR on a converted ++ element. */ ++/* { dg-final { scan-tree-dump-times "vec_unpack_lo" 2 "optimized" { xfail *-*-* } } } */ ++/* { dg-final { scan-tree-dump-times "VEC_PACK_TRUNC" 1 "optimized" } } */ ++/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF" 2 "optimized" } } */ +diff --git a/gcc/testsuite/gcc.target/i386/pr92803.c b/gcc/testsuite/gcc.target/i386/pr92803.c +new file mode 100644 +index 000000000..fc8d64efb +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr92803.c +@@ -0,0 +1,38 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -Wno-psabi -mavx2 -fdump-tree-forwprop1" } */ ++ ++typedef double v4df __attribute__((vector_size (32))); ++typedef float v8sf __attribute__((vector_size (32))); ++typedef float v4sf __attribute__((vector_size (16))); ++typedef int v4si __attribute__((vector_size (16))); ++typedef double v2df __attribute__((vector_size (16))); ++ ++v2df ++foo (v4df x, double *p, v2df y) ++{ ++ return (v2df) { x[3], *p }; ++} ++ ++v4sf ++bar (v4si x, float *p) ++{ ++ return (v4sf) { x[0], x[1], x[2], *p }; ++} ++ ++v4sf ++baz (v4si x) ++{ ++ return (v4sf) { x[0], x[1], 3.0f, 1.0f }; ++} ++ ++v4sf ++barf (v8sf x) ++{ ++ return (v4sf) { x[4], x[5], 1.0f, 2.0f }; ++} ++ ++/* We expect all CTORs to turn into permutes, the FP converting ones ++ to two each with the one with constants possibly elided in the future ++ by converting 3.0f and 1.0f "back" to integers. */ ++/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 6 "forwprop1" } } */ ++/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 5 "forwprop1" { xfail *-*-* } } } */ +diff --git a/gcc/testsuite/gfortran.dg/graphite/interchange-3.f90 b/gcc/testsuite/gfortran.dg/graphite/interchange-3.f90 +index 8070bbb4a..d827323ac 100644 +--- a/gcc/testsuite/gfortran.dg/graphite/interchange-3.f90 ++++ b/gcc/testsuite/gfortran.dg/graphite/interchange-3.f90 +@@ -23,5 +23,3 @@ Program FOO + 366 format(/, ' PC = ',E12.4,/,' UC = ',E12.4,/,' VC = ',E12.4,/) + + end Program FOO +- +-! { dg-final { scan-tree-dump "tiled" "graphite" } } +diff --git a/gcc/testsuite/gfortran.dg/pr88833.f90 b/gcc/testsuite/gfortran.dg/pr88833.f90 +new file mode 100644 +index 000000000..224e6ce5f +--- /dev/null ++++ b/gcc/testsuite/gfortran.dg/pr88833.f90 +@@ -0,0 +1,9 @@ ++! { dg-do assemble { target aarch64_asm_sve_ok } } ++! { dg-options "-O3 -march=armv8.2-a+sve --save-temps" } ++ ++subroutine foo(x) ++ real :: x(100) ++ x = x + 10 ++end subroutine foo ++ ++! { dg-final { scan-assembler {\twhilelo\tp[0-9]+\.s, wzr, (w[0-9]+).*\twhilelo\tp[0-9]+\.s, w[0-9]+, \1} } } +diff --git a/gcc/testsuite/gnat.dg/opt39.adb b/gcc/testsuite/gnat.dg/opt39.adb +index 3b12cf201..0a5ef67a2 100644 +--- a/gcc/testsuite/gnat.dg/opt39.adb ++++ b/gcc/testsuite/gnat.dg/opt39.adb +@@ -27,4 +27,5 @@ begin + end if; + end; + +--- { dg-final { scan-tree-dump-times "MEM" 1 "optimized" } } ++-- { dg-final { scan-tree-dump-not "MEM" "optimized" } } ++-- { dg-final { scan-tree-dump-not "tmp" "optimized" } } +diff --git a/gcc/testsuite/lib/prune.exp b/gcc/testsuite/lib/prune.exp +index 812c59e6f..a9beef48e 100644 +--- a/gcc/testsuite/lib/prune.exp ++++ b/gcc/testsuite/lib/prune.exp +@@ -21,7 +21,7 @@ load_lib multiline.exp + if ![info exists TEST_ALWAYS_FLAGS] { + set TEST_ALWAYS_FLAGS "" + } +-set TEST_ALWAYS_FLAGS "-fno-diagnostics-show-caret -fno-diagnostics-show-line-numbers -fdiagnostics-color=never $TEST_ALWAYS_FLAGS" ++set TEST_ALWAYS_FLAGS "-fno-diagnostics-show-caret -fno-diagnostics-show-line-numbers -fdiagnostics-color=never -fdiagnostics-urls=never $TEST_ALWAYS_FLAGS" + + proc prune_gcc_output { text } { + global srcdir +diff --git a/gcc/testsuite/lib/scanasm.exp b/gcc/testsuite/lib/scanasm.exp +index 35ccbc86f..4ff39dab3 100644 +--- a/gcc/testsuite/lib/scanasm.exp ++++ b/gcc/testsuite/lib/scanasm.exp +@@ -546,3 +546,179 @@ proc scan-lto-assembler { args } { + verbose "output_file: $output_file" + dg-scan "scan-lto-assembler" 1 $testcase $output_file $args + } ++ ++# Read assembly file FILENAME and store a mapping from function names ++# to function bodies in array RESULT. FILENAME has already been uploaded ++# locally where necessary and is known to exist. ++ ++proc parse_function_bodies { filename result } { ++ upvar $result up_result ++ ++ # Regexp for the start of a function definition (name in \1). ++ set label {^([a-zA-Z_]\S+):$} ++ ++ # Regexp for the end of a function definition. ++ set terminator {^\s*\.size} ++ ++ # Regexp for lines that aren't interesting. ++ set fluff {^\s*(?:\.|//)} ++ ++ set fd [open $filename r] ++ set in_function 0 ++ while { [gets $fd line] >= 0 } { ++ if { [regexp $label $line dummy function_name] } { ++ set in_function 1 ++ set function_body "" ++ } elseif { $in_function } { ++ if { [regexp $terminator $line] } { ++ set up_result($function_name) $function_body ++ set in_function 0 ++ } elseif { ![regexp $fluff $line] } { ++ append function_body $line "\n" ++ } ++ } ++ } ++ close $fd ++} ++ ++# FUNCTIONS is an array that maps function names to function bodies. ++# Return true if it contains a definition of function NAME and if ++# that definition matches BODY_REGEXP. ++ ++proc check_function_body { functions name body_regexp } { ++ upvar $functions up_functions ++ ++ if { ![info exists up_functions($name)] } { ++ return 0 ++ } ++ return [regexp "^$body_regexp\$" $up_functions($name)] ++} ++ ++# Check the implementations of functions against expected output. Used as: ++# ++# { dg-do { check-function-bodies PREFIX TERMINATOR[ OPTION[ SELECTOR]] } } ++# ++# See sourcebuild.texi for details. ++ ++proc check-function-bodies { args } { ++ if { [llength $args] < 2 } { ++ error "too few arguments to check-function-bodies" ++ } ++ if { [llength $args] > 4 } { ++ error "too many arguments to check-function-bodies" ++ } ++ ++ if { [llength $args] >= 3 } { ++ set required_flag [lindex $args 2] ++ ++ upvar 2 dg-extra-tool-flags extra_tool_flags ++ set flags $extra_tool_flags ++ ++ global torture_current_flags ++ if { [info exists torture_current_flags] } { ++ append flags " " $torture_current_flags ++ } ++ if { ![regexp " $required_flag " $flags] } { ++ return ++ } ++ } ++ ++ set xfail_all 0 ++ if { [llength $args] >= 4 } { ++ switch [dg-process-target [lindex $args 3]] { ++ "S" { } ++ "N" { return } ++ "F" { set xfail_all 1 } ++ "P" { } ++ } ++ } ++ ++ set testcase [testname-for-summary] ++ # The name might include a list of options; extract the file name. ++ set filename [lindex $testcase 0] ++ ++ global srcdir ++ set input_filename "$srcdir/$filename" ++ set output_filename "[file rootname [file tail $filename]].s" ++ ++ set prefix [lindex $args 0] ++ set prefix_len [string length $prefix] ++ set terminator [lindex $args 1] ++ if { [string equal $terminator ""] } { ++ set terminator "*/" ++ } ++ set terminator_len [string length $terminator] ++ ++ set have_bodies 0 ++ if { [is_remote host] } { ++ remote_upload host "$filename" ++ } ++ if { [file exists $output_filename] } { ++ parse_function_bodies $output_filename functions ++ set have_bodies 1 ++ } else { ++ verbose -log "$testcase: output file does not exist" ++ } ++ ++ set count 0 ++ set function_regexp "" ++ set label {^(\S+):$} ++ ++ set lineno 1 ++ set fd [open $input_filename r] ++ set in_function 0 ++ while { [gets $fd line] >= 0 } { ++ if { [string equal -length $prefix_len $line $prefix] } { ++ set line [string trim [string range $line $prefix_len end]] ++ if { !$in_function } { ++ if { [regexp "^(.*\\S)\\s+{(.*)}\$" $line dummy \ ++ line selector] } { ++ set selector [dg-process-target $selector] ++ } else { ++ set selector "P" ++ } ++ if { ![regexp $label $line dummy function_name] } { ++ close $fd ++ error "check-function-bodies: line $lineno does not have a function label" ++ } ++ set in_function 1 ++ set function_regexp "" ++ } elseif { [string equal $line "("] } { ++ append function_regexp "(?:" ++ } elseif { [string equal $line "|"] } { ++ append function_regexp "|" ++ } elseif { [string equal $line ")"] } { ++ append function_regexp ")" ++ } elseif { [string equal $line "..."] } { ++ append function_regexp ".*" ++ } else { ++ append function_regexp "\t" $line "\n" ++ } ++ } elseif { [string equal -length $terminator_len $line $terminator] } { ++ if { ![string equal $selector "N"] } { ++ if { $xfail_all || [string equal $selector "F"] } { ++ setup_xfail "*-*-*" ++ } ++ set testname "$testcase check-function-bodies $function_name" ++ if { !$have_bodies } { ++ unresolved $testname ++ } elseif { [check_function_body functions $function_name \ ++ $function_regexp] } { ++ pass $testname ++ } else { ++ fail $testname ++ } ++ } ++ set in_function 0 ++ incr count ++ } ++ incr lineno ++ } ++ close $fd ++ if { $in_function } { ++ error "check-function-bodies: missing \"$terminator\"" ++ } ++ if { $count == 0 } { ++ error "check-function-bodies: no matches found" ++ } ++} +diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp +index ea9a50ccb..2eeb6883a 100644 +--- a/gcc/testsuite/lib/target-supports.exp ++++ b/gcc/testsuite/lib/target-supports.exp +@@ -3336,6 +3336,24 @@ proc check_effective_target_aarch64_sve { } { + }] + } + ++# Return 1 if this is an AArch64 target supporting SVE2. ++proc check_effective_target_aarch64_sve2 { } { ++ if { ![istarget aarch64*-*-*] } { ++ return 0 ++ } ++ return [check_no_compiler_messages aarch64_sve2 assembly { ++ #if !defined (__ARM_FEATURE_SVE2) ++ #error FOO ++ #endif ++ }] ++} ++ ++# Return 1 if this is an AArch64 target only supporting SVE (not SVE2). ++proc check_effective_target_aarch64_sve1_only { } { ++ return [expr { [check_effective_target_aarch64_sve] ++ && ![check_effective_target_aarch64_sve2] }] ++} ++ + # Return the size in bits of an SVE vector, or 0 if the size is variable. + proc aarch64_sve_bits { } { + return [check_cached_effective_target aarch64_sve_bits { +@@ -4356,6 +4374,22 @@ proc check_effective_target_aarch64_sve_hw { } { + }] + } + ++# Return true if this is an AArch64 target that can run SVE2 code. ++ ++proc check_effective_target_aarch64_sve2_hw { } { ++ if { ![istarget aarch64*-*-*] } { ++ return 0 ++ } ++ return [check_runtime aarch64_sve2_hw_available { ++ int ++ main (void) ++ { ++ asm volatile ("addp z0.b, p0/m, z0.b, z1.b"); ++ return 0; ++ } ++ }] ++} ++ + # Return true if this is an AArch64 target that can run SVE code and + # if its SVE vectors have exactly BITS bits. + +@@ -4569,6 +4603,49 @@ proc add_options_for_arm_v8_2a_dotprod_neon { flags } { + return "$flags $et_arm_v8_2a_dotprod_neon_flags" + } + ++# Return 1 if the target supports ARMv8.2+i8mm Adv.SIMD Dot Product ++# instructions, 0 otherwise. The test is valid for ARM and for AArch64. ++# Record the command line options needed. ++ ++proc check_effective_target_arm_v8_2a_i8mm_ok_nocache { } { ++ global et_arm_v8_2a_i8mm_flags ++ set et_arm_v8_2a_i8mm_flags "" ++ ++ if { ![istarget arm*-*-*] && ![istarget aarch64*-*-*] } { ++ return 0; ++ } ++ ++ # Iterate through sets of options to find the compiler flags that ++ # need to be added to the -march option. ++ foreach flags {"" "-mfloat-abi=hard -mfpu=neon-fp-armv8" "-mfloat-abi=softfp -mfpu=neon-fp-armv8" } { ++ if { [check_no_compiler_messages_nocache \ ++ arm_v8_2a_i8mm_ok object { ++ #include ++ #if !defined (__ARM_FEATURE_MATMUL_INT8) ++ #error "__ARM_FEATURE_MATMUL_INT8 not defined" ++ #endif ++ } "$flags -march=armv8.2-a+i8mm"] } { ++ set et_arm_v8_2a_i8mm_flags "$flags -march=armv8.2-a+i8mm" ++ return 1 ++ } ++ } ++ ++ return 0; ++} ++ ++proc check_effective_target_arm_v8_2a_i8mm_ok { } { ++ return [check_cached_effective_target arm_v8_2a_i8mm_ok \ ++ check_effective_target_arm_v8_2a_i8mm_ok_nocache] ++} ++ ++proc add_options_for_arm_v8_2a_i8mm { flags } { ++ if { ! [check_effective_target_arm_v8_2a_i8mm_ok] } { ++ return "$flags" ++ } ++ global et_arm_v8_2a_i8mm_flags ++ return "$flags $et_arm_v8_2a_i8mm_flags" ++} ++ + # Return 1 if the target supports FP16 VFMAL and VFMSL + # instructions, 0 otherwise. + # Record the command line options needed. +@@ -4614,6 +4691,45 @@ proc add_options_for_arm_fp16fml_neon { flags } { + return "$flags $et_arm_fp16fml_neon_flags" + } + ++# Return 1 if the target supports BFloat16 SIMD instructions, 0 otherwise. ++# The test is valid for ARM and for AArch64. ++ ++proc check_effective_target_arm_v8_2a_bf16_neon_ok_nocache { } { ++ global et_arm_v8_2a_bf16_neon_flags ++ set et_arm_v8_2a_bf16_neon_flags "" ++ ++ if { ![istarget arm*-*-*] && ![istarget aarch64*-*-*] } { ++ return 0; ++ } ++ ++ foreach flags {"" "-mfloat-abi=hard -mfpu=neon-fp-armv8" "-mfloat-abi=softfp -mfpu=neon-fp-armv8" } { ++ if { [check_no_compiler_messages_nocache arm_v8_2a_bf16_neon_ok object { ++ #include ++ #if !defined (__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) ++ #error "__ARM_FEATURE_BF16_VECTOR_ARITHMETIC not defined" ++ #endif ++ } "$flags -march=armv8.2-a+bf16"] } { ++ set et_arm_v8_2a_bf16_neon_flags "$flags -march=armv8.2-a+bf16" ++ return 1 ++ } ++ } ++ ++ return 0; ++} ++ ++proc check_effective_target_arm_v8_2a_bf16_neon_ok { } { ++ return [check_cached_effective_target arm_v8_2a_bf16_neon_ok \ ++ check_effective_target_arm_v8_2a_bf16_neon_ok_nocache] ++} ++ ++proc add_options_for_arm_v8_2a_bf16_neon { flags } { ++ if { ! [check_effective_target_arm_v8_2a_bf16_neon_ok] } { ++ return "$flags" ++ } ++ global et_arm_v8_2a_bf16_neon_flags ++ return "$flags $et_arm_v8_2a_bf16_neon_flags" ++} ++ + # Return 1 if the target supports executing ARMv8 NEON instructions, 0 + # otherwise. + +@@ -6093,7 +6209,24 @@ proc check_effective_target_vect_usad_char { } { + + proc check_effective_target_vect_avg_qi {} { + return [expr { [istarget aarch64*-*-*] +- && ![check_effective_target_aarch64_sve] }] ++ && ![check_effective_target_aarch64_sve1_only] }] ++} ++ ++# Return 1 if the target plus current options supports both signed ++# and unsigned multiply-high-with-round-and-scale operations ++# on vectors of half-words. ++ ++proc check_effective_target_vect_mulhrs_hi {} { ++ return [expr { [istarget aarch64*-*-*] ++ && [check_effective_target_aarch64_sve2] }] ++} ++ ++# Return 1 if the target plus current options supports signed division ++# by power-of-2 operations on vectors of 4-byte integers. ++ ++proc check_effective_target_vect_sdiv_pow2_si {} { ++ return [expr { [istarget aarch64*-*-*] ++ && [check_effective_target_aarch64_sve] }] + } + + # Return 1 if the target plus current options supports a vector +@@ -8579,7 +8712,8 @@ proc check_effective_target_aarch64_tiny { } { + # Create functions to check that the AArch64 assembler supports the + # various architecture extensions via the .arch_extension pseudo-op. + +-foreach { aarch64_ext } { "fp" "simd" "crypto" "crc" "lse" "dotprod" "sve"} { ++foreach { aarch64_ext } { "fp" "simd" "crypto" "crc" "lse" "dotprod" "sve" ++ "i8mm" "f32mm" "f64mm" "bf16" } { + eval [string map [list FUNC $aarch64_ext] { + proc check_effective_target_aarch64_asm_FUNC_ok { } { + if { [istarget aarch64*-*-*] } { +diff --git a/gcc/testsuite/obj-c++.dg/stubify-1.mm b/gcc/testsuite/obj-c++.dg/stubify-1.mm +index e8f21882d..a32e28251 100644 +--- a/gcc/testsuite/obj-c++.dg/stubify-1.mm ++++ b/gcc/testsuite/obj-c++.dg/stubify-1.mm +@@ -4,7 +4,7 @@ + /* { dg-do compile { target *-*-darwin* } } */ + /* { dg-skip-if "" { *-*-* } { "-fgnu-runtime" } { "" } } */ + /* { dg-require-effective-target ilp32 } */ +-/* { dg-options "-mdynamic-no-pic -fno-exceptions -mmacosx-version-min=10.4 -msymbol-stubs" } */ ++/* { dg-options "-Os -mdynamic-no-pic -fno-exceptions -mmacosx-version-min=10.4 -msymbol-stubs" } */ + + typedef struct objc_object { } *id ; + int x = 41 ; +diff --git a/gcc/testsuite/obj-c++.dg/stubify-2.mm b/gcc/testsuite/obj-c++.dg/stubify-2.mm +index 1863f986c..69fea8def 100644 +--- a/gcc/testsuite/obj-c++.dg/stubify-2.mm ++++ b/gcc/testsuite/obj-c++.dg/stubify-2.mm +@@ -4,7 +4,7 @@ + /* { dg-do compile { target *-*-darwin* } } */ + /* { dg-skip-if "" { *-*-* } { "-fgnu-runtime" } { "" } } */ + /* { dg-require-effective-target ilp32 } */ +-/* { dg-options "-mdynamic-no-pic -mmacosx-version-min=10.4 -msymbol-stubs" } */ ++/* { dg-options "-mdynamic-no-pic -fdump-rtl-jump -mmacosx-version-min=10.4 -msymbol-stubs" } */ + + typedef struct objc_object { } *id ; + int x = 41 ; +@@ -30,6 +30,7 @@ extern int bogonic (int, int, int) ; + + /* Any symbol_ref of an un-stubified objc_msgSend is an error; look + for "objc_msgSend" in quotes, without the $stub suffix. */ ++/* { dg-final { scan-rtl-dump-not {symbol_ref.*"objc_msgSend"} "jump" { target powerpc*-*-darwin* } } } */ + + /* { dg-final { scan-assembler-not {(bl|call)[ \t]+_objc_msgSend\n} } } */ + /* { dg-final { scan-assembler {(bl|call)[ \t]+L_objc_msgSend\$stub\n} } } */ +diff --git a/gcc/testsuite/objc.dg/stubify-2.m b/gcc/testsuite/objc.dg/stubify-2.m +index 2930e46fc..904ac44b2 100644 +--- a/gcc/testsuite/objc.dg/stubify-2.m ++++ b/gcc/testsuite/objc.dg/stubify-2.m +@@ -4,7 +4,7 @@ + /* { dg-do compile { target *-*-darwin* } } */ + /* { dg-skip-if "" { *-*-* } { "-fgnu-runtime" } { "" } } */ + /* { dg-require-effective-target ilp32 } */ +-/* { dg-options "-mdynamic-no-pic -mmacosx-version-min=10.4 -msymbol-stubs" } */ ++/* { dg-options "-mdynamic-no-pic -fdump-rtl-jump -mmacosx-version-min=10.4 -msymbol-stubs" } */ + + typedef struct objc_object { } *id ; + int x = 41 ; +@@ -30,6 +30,7 @@ extern int bogonic (int, int, int) ; + + /* Any symbol_ref of an un-stubified objc_msgSend is an error; look + for "objc_msgSend" in quotes, without the $stub suffix. */ ++/* { dg-final { scan-rtl-dump-not {symbol_ref.*"objc_msgSend"} "jump" { target powerpc*-*-darwin* } } } */ + + /* { dg-final { scan-assembler-not {(bl|call)[ \t]+_objc_msgSend\n} } } */ + /* { dg-final { scan-assembler {(bl|call)[ \t]+L_objc_msgSend\$stub\n} } } */ +diff --git a/gcc/trans-mem.c b/gcc/trans-mem.c +index 0581aae2d..8fc9f44d8 100644 +--- a/gcc/trans-mem.c ++++ b/gcc/trans-mem.c +@@ -3237,8 +3237,7 @@ expand_block_edges (struct tm_region *const region, basic_block bb) + || (gimple_call_flags (call_stmt) & ECF_TM_BUILTIN) == 0) + continue; + +- if (DECL_FUNCTION_CODE (gimple_call_fndecl (call_stmt)) +- == BUILT_IN_TM_ABORT) ++ if (gimple_call_builtin_p (call_stmt, BUILT_IN_TM_ABORT)) + { + // If we have a ``_transaction_cancel [[outer]]'', there is only + // one abnormal edge: to the transaction marked OUTER. +diff --git a/gcc/tree-call-cdce.c b/gcc/tree-call-cdce.c +index 2e482b37e..43f1ec6ee 100644 +--- a/gcc/tree-call-cdce.c ++++ b/gcc/tree-call-cdce.c +@@ -1074,9 +1074,7 @@ use_internal_fn (gcall *call) + { + gimple_stmt_iterator gsi = gsi_for_stmt (call); + gcall *new_call = gimple_build_call_internal (IFN_SET_EDOM, 0); +- gimple_set_vuse (new_call, gimple_vuse (call)); +- gimple_set_vdef (new_call, gimple_vdef (call)); +- SSA_NAME_DEF_STMT (gimple_vdef (new_call)) = new_call; ++ gimple_move_vops (new_call, call); + gimple_set_location (new_call, gimple_location (call)); + gsi_replace (&gsi, new_call, false); + call = new_call; +diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c +index 621c8ea3d..527deffe4 100644 +--- a/gcc/tree-cfg.c ++++ b/gcc/tree-cfg.c +@@ -9547,7 +9547,8 @@ execute_fixup_cfg (void) + Keep access when store has side effect, i.e. in case when source + is volatile. */ + if (gimple_store_p (stmt) +- && !gimple_has_side_effects (stmt)) ++ && !gimple_has_side_effects (stmt) ++ && !optimize_debug) + { + tree lhs = get_base_address (gimple_get_lhs (stmt)); + +diff --git a/gcc/tree-core.h b/gcc/tree-core.h +index 41d052949..26b6f46ad 100644 +--- a/gcc/tree-core.h ++++ b/gcc/tree-core.h +@@ -1791,6 +1791,17 @@ struct GTY(()) tree_decl_non_common { + tree result; + }; + ++/* Classify a special function declaration type. */ ++ ++enum function_decl_type ++{ ++ NONE, ++ OPERATOR_NEW, ++ LAMBDA_FUNCTION ++ ++ /* 0 values left */ ++}; ++ + /* FUNCTION_DECL inherits from DECL_NON_COMMON because of the use of the + arguments/result/saved_tree fields by front ends. It was either inherit + FUNCTION_DECL from non_common, or inherit non_common from FUNCTION_DECL, +@@ -1815,34 +1826,32 @@ struct GTY(()) tree_function_decl { + /* Index within a virtual table. */ + tree vindex; + +- /* In a FUNCTION_DECL for which DECL_BUILT_IN holds, this is +- DECL_FUNCTION_CODE. Otherwise unused. +- ??? The bitfield needs to be able to hold all target function +- codes as well. */ +- ENUM_BITFIELD(built_in_function) function_code : 12; +- ENUM_BITFIELD(built_in_class) built_in_class : 2; ++ /* In a FUNCTION_DECL this is DECL_UNCHECKED_FUNCTION_CODE. */ ++ unsigned int function_code; + ++ ENUM_BITFIELD(built_in_class) built_in_class : 2; + unsigned static_ctor_flag : 1; + unsigned static_dtor_flag : 1; +- + unsigned uninlinable : 1; + unsigned possibly_inlined : 1; + unsigned novops_flag : 1; + unsigned returns_twice_flag : 1; ++ + unsigned malloc_flag : 1; +- unsigned operator_new_flag : 1; + unsigned declared_inline_flag : 1; + unsigned no_inline_warning_flag : 1; +- + unsigned no_instrument_function_entry_exit : 1; + unsigned no_limit_stack : 1; + unsigned disregard_inline_limits : 1; + unsigned pure_flag : 1; + unsigned looping_const_or_pure_flag : 1; ++ ++ /* Align the bitfield to boundary of a byte. */ ++ ENUM_BITFIELD(function_decl_type) decl_type: 2; + unsigned has_debug_args_flag : 1; + unsigned versioned_function : 1; +- unsigned lambda_function: 1; +- /* No bits left. */ ++ ++ /* 12 bits left for future expansion. */ + }; + + struct GTY(()) tree_translation_unit_decl { +diff --git a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c +index ac81e10a3..38ebe4092 100644 +--- a/gcc/tree-if-conv.c ++++ b/gcc/tree-if-conv.c +@@ -2142,9 +2142,7 @@ predicate_load_or_store (gimple_stmt_iterator *gsi, gassign *stmt, tree mask) + new_stmt + = gimple_build_call_internal (IFN_MASK_STORE, 4, addr, ptr, + mask, rhs); +- gimple_set_vuse (new_stmt, gimple_vuse (stmt)); +- gimple_set_vdef (new_stmt, gimple_vdef (stmt)); +- SSA_NAME_DEF_STMT (gimple_vdef (new_stmt)) = new_stmt; ++ gimple_move_vops (new_stmt, stmt); + } + gimple_call_set_nothrow (new_stmt, true); + return new_stmt; +diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c +index 1110089fa..784ab48c1 100644 +--- a/gcc/tree-inline.c ++++ b/gcc/tree-inline.c +@@ -4585,7 +4585,7 @@ expand_call_inline (basic_block bb, gimple *stmt, copy_body_data *id, + /* PR 20090218-1_0.c. Body can be provided by another module. */ + && (reason != CIF_BODY_NOT_AVAILABLE || !flag_generate_lto)) + { +- error ("inlining failed in call to always_inline %q+F: %s", fn, ++ error ("inlining failed in call to % %q+F: %s", fn, + cgraph_inline_failed_string (reason)); + if (gimple_location (stmt) != UNKNOWN_LOCATION) + inform (gimple_location (stmt), "called from here"); +@@ -4834,7 +4834,7 @@ expand_call_inline (basic_block bb, gimple *stmt, copy_body_data *id, + we may get confused if the compiler sees that the inlined new + function returns a pointer which was just deleted. See bug + 33407. */ +- if (DECL_IS_OPERATOR_NEW (fn)) ++ if (DECL_IS_OPERATOR_NEW_P (fn)) + { + return_slot = NULL; + modify_dest = NULL; +diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h +index 8741a9a49..1321a92c4 100644 +--- a/gcc/tree-pass.h ++++ b/gcc/tree-pass.h +@@ -584,8 +584,6 @@ extern rtl_opt_pass *make_pass_value_profile_transformations (gcc::context + extern rtl_opt_pass *make_pass_postreload_cse (gcc::context *ctxt); + extern rtl_opt_pass *make_pass_gcse2 (gcc::context *ctxt); + extern rtl_opt_pass *make_pass_split_after_reload (gcc::context *ctxt); +-extern rtl_opt_pass *make_pass_branch_target_load_optimize1 (gcc::context +- *ctxt); + extern rtl_opt_pass *make_pass_thread_prologue_and_epilogue (gcc::context + *ctxt); + extern rtl_opt_pass *make_pass_stack_adjustments (gcc::context *ctxt); +@@ -595,8 +593,6 @@ extern rtl_opt_pass *make_pass_if_after_reload (gcc::context *ctxt); + extern rtl_opt_pass *make_pass_regrename (gcc::context *ctxt); + extern rtl_opt_pass *make_pass_cprop_hardreg (gcc::context *ctxt); + extern rtl_opt_pass *make_pass_reorder_blocks (gcc::context *ctxt); +-extern rtl_opt_pass *make_pass_branch_target_load_optimize2 (gcc::context +- *ctxt); + extern rtl_opt_pass *make_pass_leaf_regs (gcc::context *ctxt); + extern rtl_opt_pass *make_pass_split_before_sched2 (gcc::context *ctxt); + extern rtl_opt_pass *make_pass_compare_elim_after_reload (gcc::context *ctxt); +diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c +index 8e4baf013..c36bf96ef 100644 +--- a/gcc/tree-sra.c ++++ b/gcc/tree-sra.c +@@ -106,6 +106,7 @@ along with GCC; see the file COPYING3. If not see + #include "ipa-utils.h" + #include "builtins.h" + ++ + /* Enumeration of all aggregate reductions we can do. */ + enum sra_mode { SRA_MODE_EARLY_IPA, /* early call regularization */ + SRA_MODE_EARLY_INTRA, /* early intraprocedural SRA */ +@@ -220,8 +221,11 @@ struct access + is not propagated in the access tree in any direction. */ + unsigned grp_scalar_write : 1; + +- /* Is this access an artificial one created to scalarize some record +- entirely? */ ++ /* In a root of an access tree, true means that the entire tree should be ++ totally scalarized - that all scalar leafs should be scalarized and ++ non-root grp_total_scalarization accesses should be honored. Otherwise, ++ non-root accesses with grp_total_scalarization should never get scalar ++ replacements. */ + unsigned grp_total_scalarization : 1; + + /* Other passes of the analysis use this bit to make function +@@ -242,6 +246,10 @@ struct access + access tree. */ + unsigned grp_unscalarized_data : 1; + ++ /* Set if all accesses in the group consist of the same chain of ++ COMPONENT_REFs and ARRAY_REFs. */ ++ unsigned grp_same_access_path : 1; ++ + /* Does this access and/or group contain a write access through a + BIT_FIELD_REF? */ + unsigned grp_partial_lhs : 1; +@@ -443,16 +451,18 @@ dump_access (FILE *f, struct access *access, bool grp) + "grp_scalar_write = %d, grp_total_scalarization = %d, " + "grp_hint = %d, grp_covered = %d, " + "grp_unscalarizable_region = %d, grp_unscalarized_data = %d, " +- "grp_partial_lhs = %d, grp_to_be_replaced = %d, " +- "grp_to_be_debug_replaced = %d, grp_maybe_modified = %d, " ++ "grp_same_access_path = %d, grp_partial_lhs = %d, " ++ "grp_to_be_replaced = %d, grp_to_be_debug_replaced = %d, " ++ "grp_maybe_modified = %d, " + "grp_not_necessarilly_dereferenced = %d\n", + access->grp_read, access->grp_write, access->grp_assignment_read, + access->grp_assignment_write, access->grp_scalar_read, + access->grp_scalar_write, access->grp_total_scalarization, + access->grp_hint, access->grp_covered, + access->grp_unscalarizable_region, access->grp_unscalarized_data, +- access->grp_partial_lhs, access->grp_to_be_replaced, +- access->grp_to_be_debug_replaced, access->grp_maybe_modified, ++ access->grp_same_access_path, access->grp_partial_lhs, ++ access->grp_to_be_replaced, access->grp_to_be_debug_replaced, ++ access->grp_maybe_modified, + access->grp_not_necessarilly_dereferenced); + else + fprintf (f, ", write = %d, grp_total_scalarization = %d, " +@@ -540,6 +550,15 @@ find_access_in_subtree (struct access *access, HOST_WIDE_INT offset, + access = child; + } + ++ /* Total scalarization does not replace single field structures with their ++ single field but rather creates an access for them underneath. Look for ++ it. */ ++ if (access) ++ while (access->first_child ++ && access->first_child->offset == offset ++ && access->first_child->size == size) ++ access = access->first_child; ++ + return access; + } + +@@ -971,7 +990,8 @@ create_access (tree expr, gimple *stmt, bool write) + static bool + scalarizable_type_p (tree type, bool const_decl) + { +- gcc_assert (!is_gimple_reg_type (type)); ++ if (is_gimple_reg_type (type)) ++ return true; + if (type_contains_placeholder_p (type)) + return false; + +@@ -986,8 +1006,7 @@ scalarizable_type_p (tree type, bool const_decl) + if (DECL_BIT_FIELD (fld)) + return false; + +- if (!is_gimple_reg_type (ft) +- && !scalarizable_type_p (ft, const_decl)) ++ if (!scalarizable_type_p (ft, const_decl)) + return false; + } + +@@ -1017,8 +1036,7 @@ scalarizable_type_p (tree type, bool const_decl) + return false; + + tree elem = TREE_TYPE (type); +- if (!is_gimple_reg_type (elem) +- && !scalarizable_type_p (elem, const_decl)) ++ if (!scalarizable_type_p (elem, const_decl)) + return false; + return true; + } +@@ -1027,114 +1045,6 @@ scalarizable_type_p (tree type, bool const_decl) + } + } + +-static void scalarize_elem (tree, HOST_WIDE_INT, HOST_WIDE_INT, bool, tree, tree); +- +-/* Create total_scalarization accesses for all scalar fields of a member +- of type DECL_TYPE conforming to scalarizable_type_p. BASE +- must be the top-most VAR_DECL representing the variable; within that, +- OFFSET locates the member and REF must be the memory reference expression for +- the member. */ +- +-static void +-completely_scalarize (tree base, tree decl_type, HOST_WIDE_INT offset, tree ref) +-{ +- switch (TREE_CODE (decl_type)) +- { +- case RECORD_TYPE: +- for (tree fld = TYPE_FIELDS (decl_type); fld; fld = DECL_CHAIN (fld)) +- if (TREE_CODE (fld) == FIELD_DECL) +- { +- HOST_WIDE_INT pos = offset + int_bit_position (fld); +- tree ft = TREE_TYPE (fld); +- tree nref = build3 (COMPONENT_REF, ft, ref, fld, NULL_TREE); +- +- scalarize_elem (base, pos, tree_to_uhwi (DECL_SIZE (fld)), +- TYPE_REVERSE_STORAGE_ORDER (decl_type), +- nref, ft); +- } +- break; +- case ARRAY_TYPE: +- { +- tree elemtype = TREE_TYPE (decl_type); +- tree elem_size = TYPE_SIZE (elemtype); +- gcc_assert (elem_size && tree_fits_shwi_p (elem_size)); +- HOST_WIDE_INT el_size = tree_to_shwi (elem_size); +- gcc_assert (el_size > 0); +- +- tree minidx = TYPE_MIN_VALUE (TYPE_DOMAIN (decl_type)); +- gcc_assert (TREE_CODE (minidx) == INTEGER_CST); +- tree maxidx = TYPE_MAX_VALUE (TYPE_DOMAIN (decl_type)); +- /* Skip (some) zero-length arrays; others have MAXIDX == MINIDX - 1. */ +- if (maxidx) +- { +- gcc_assert (TREE_CODE (maxidx) == INTEGER_CST); +- tree domain = TYPE_DOMAIN (decl_type); +- /* MINIDX and MAXIDX are inclusive, and must be interpreted in +- DOMAIN (e.g. signed int, whereas min/max may be size_int). */ +- offset_int idx = wi::to_offset (minidx); +- offset_int max = wi::to_offset (maxidx); +- if (!TYPE_UNSIGNED (domain)) +- { +- idx = wi::sext (idx, TYPE_PRECISION (domain)); +- max = wi::sext (max, TYPE_PRECISION (domain)); +- } +- for (int el_off = offset; idx <= max; ++idx) +- { +- tree nref = build4 (ARRAY_REF, elemtype, +- ref, +- wide_int_to_tree (domain, idx), +- NULL_TREE, NULL_TREE); +- scalarize_elem (base, el_off, el_size, +- TYPE_REVERSE_STORAGE_ORDER (decl_type), +- nref, elemtype); +- el_off += el_size; +- } +- } +- } +- break; +- default: +- gcc_unreachable (); +- } +-} +- +-/* Create total_scalarization accesses for a member of type TYPE, which must +- satisfy either is_gimple_reg_type or scalarizable_type_p. BASE must be the +- top-most VAR_DECL representing the variable; within that, POS and SIZE locate +- the member, REVERSE gives its torage order. and REF must be the reference +- expression for it. */ +- +-static void +-scalarize_elem (tree base, HOST_WIDE_INT pos, HOST_WIDE_INT size, bool reverse, +- tree ref, tree type) +-{ +- if (is_gimple_reg_type (type)) +- { +- struct access *access = create_access_1 (base, pos, size); +- access->expr = ref; +- access->type = type; +- access->grp_total_scalarization = 1; +- access->reverse = reverse; +- /* Accesses for intraprocedural SRA can have their stmt NULL. */ +- } +- else +- completely_scalarize (base, type, pos, ref); +-} +- +-/* Create a total_scalarization access for VAR as a whole. VAR must be of a +- RECORD_TYPE or ARRAY_TYPE conforming to scalarizable_type_p. */ +- +-static void +-create_total_scalarization_access (tree var) +-{ +- HOST_WIDE_INT size = tree_to_uhwi (DECL_SIZE (var)); +- struct access *access; +- +- access = create_access_1 (var, 0, size); +- access->expr = var; +- access->type = TREE_TYPE (var); +- access->grp_total_scalarization = 1; +-} +- + /* Return true if REF has an VIEW_CONVERT_EXPR somewhere in it. */ + + static inline bool +@@ -1795,6 +1705,30 @@ build_ref_for_offset (location_t loc, tree base, poly_int64 offset, + return mem_ref; + } + ++/* Construct and return a memory reference that is equal to a portion of ++ MODEL->expr but is based on BASE. If this cannot be done, return NULL. */ ++ ++static tree ++build_reconstructed_reference (location_t, tree base, struct access *model) ++{ ++ tree expr = model->expr, prev_expr = NULL; ++ while (!types_compatible_p (TREE_TYPE (expr), TREE_TYPE (base))) ++ { ++ if (!handled_component_p (expr)) ++ return NULL; ++ prev_expr = expr; ++ expr = TREE_OPERAND (expr, 0); ++ } ++ ++ if (get_object_alignment (base) < get_object_alignment (expr)) ++ return NULL; ++ ++ TREE_OPERAND (prev_expr, 0) = base; ++ tree ref = unshare_expr (model->expr); ++ TREE_OPERAND (prev_expr, 0) = expr; ++ return ref; ++} ++ + /* Construct a memory reference to a part of an aggregate BASE at the given + OFFSET and of the same type as MODEL. In case this is a reference to a + bit-field, the function will replicate the last component_ref of model's +@@ -1822,9 +1756,19 @@ build_ref_for_model (location_t loc, tree base, HOST_WIDE_INT offset, + NULL_TREE); + } + else +- return +- build_ref_for_offset (loc, base, offset, model->reverse, model->type, +- gsi, insert_after); ++ { ++ tree res; ++ if (model->grp_same_access_path ++ && !TREE_THIS_VOLATILE (base) ++ && offset <= model->offset ++ /* build_reconstructed_reference can still fail if we have already ++ massaged BASE because of another type incompatibility. */ ++ && (res = build_reconstructed_reference (loc, base, model))) ++ return res; ++ else ++ return build_ref_for_offset (loc, base, offset, model->reverse, ++ model->type, gsi, insert_after); ++ } + } + + /* Attempt to build a memory reference that we could but into a gimple +@@ -2076,6 +2020,69 @@ find_var_candidates (void) + return ret; + } + ++/* Return true if EXP is a reference chain of COMPONENT_REFs and AREAY_REFs ++ ending either with a DECL or a MEM_REF with zero offset. */ ++ ++static bool ++path_comparable_for_same_access (tree expr) ++{ ++ while (handled_component_p (expr)) ++ { ++ if (TREE_CODE (expr) == ARRAY_REF) ++ { ++ /* SSA name indices can occur here too when the array is of sie one. ++ But we cannot just re-use array_refs with SSA names elsewhere in ++ the function, so disallow non-constant indices. TODO: Remove this ++ limitation after teaching build_reconstructed_reference to replace ++ the index with the index type lower bound. */ ++ if (TREE_CODE (TREE_OPERAND (expr, 1)) != INTEGER_CST) ++ return false; ++ } ++ expr = TREE_OPERAND (expr, 0); ++ } ++ ++ if (TREE_CODE (expr) == MEM_REF) ++ { ++ if (!zerop (TREE_OPERAND (expr, 1))) ++ return false; ++ } ++ else ++ gcc_assert (DECL_P (expr)); ++ ++ return true; ++} ++ ++/* Assuming that EXP1 consists of only COMPONENT_REFs and ARRAY_REFs, return ++ true if the chain of these handled components are exactly the same as EXP2 ++ and the expression under them is the same DECL or an equivalent MEM_REF. ++ The reference picked by compare_access_positions must go to EXP1. */ ++ ++static bool ++same_access_path_p (tree exp1, tree exp2) ++{ ++ if (TREE_CODE (exp1) != TREE_CODE (exp2)) ++ { ++ /* Special case single-field structures loaded sometimes as the field ++ and sometimes as the structure. If the field is of a scalar type, ++ compare_access_positions will put it into exp1. ++ ++ TODO: The gimple register type condition can be removed if teach ++ compare_access_positions to put inner types first. */ ++ if (is_gimple_reg_type (TREE_TYPE (exp1)) ++ && TREE_CODE (exp1) == COMPONENT_REF ++ && (TYPE_MAIN_VARIANT (TREE_TYPE (TREE_OPERAND (exp1, 0))) ++ == TYPE_MAIN_VARIANT (TREE_TYPE (exp2)))) ++ exp1 = TREE_OPERAND (exp1, 0); ++ else ++ return false; ++ } ++ ++ if (!operand_equal_p (exp1, exp2, OEP_ADDRESS_OF)) ++ return false; ++ ++ return true; ++} ++ + /* Sort all accesses for the given variable, check for partial overlaps and + return NULL if there are any. If there are none, pick a representative for + each combination of offset and size and create a linked list out of them. +@@ -2112,10 +2119,10 @@ sort_and_splice_var_accesses (tree var) + bool grp_assignment_read = access->grp_assignment_read; + bool grp_assignment_write = access->grp_assignment_write; + bool multiple_scalar_reads = false; +- bool total_scalarization = access->grp_total_scalarization; + bool grp_partial_lhs = access->grp_partial_lhs; + bool first_scalar = is_gimple_reg_type (access->type); + bool unscalarizable_region = access->grp_unscalarizable_region; ++ bool grp_same_access_path = true; + bool bf_non_full_precision + = (INTEGRAL_TYPE_P (access->type) + && TYPE_PRECISION (access->type) != access->size +@@ -2134,6 +2141,8 @@ sort_and_splice_var_accesses (tree var) + gcc_assert (access->offset >= low + && access->offset + access->size <= high); + ++ grp_same_access_path = path_comparable_for_same_access (access->expr); ++ + j = i + 1; + while (j < access_count) + { +@@ -2161,7 +2170,6 @@ sort_and_splice_var_accesses (tree var) + grp_assignment_write |= ac2->grp_assignment_write; + grp_partial_lhs |= ac2->grp_partial_lhs; + unscalarizable_region |= ac2->grp_unscalarizable_region; +- total_scalarization |= ac2->grp_total_scalarization; + relink_to_new_repr (access, ac2); + + /* If there are both aggregate-type and scalar-type accesses with +@@ -2184,6 +2192,11 @@ sort_and_splice_var_accesses (tree var) + } + unscalarizable_region = true; + } ++ ++ if (grp_same_access_path ++ && !same_access_path_p (access->expr, ac2->expr)) ++ grp_same_access_path = false; ++ + ac2->group_representative = access; + j++; + } +@@ -2197,11 +2210,10 @@ sort_and_splice_var_accesses (tree var) + access->grp_scalar_write = grp_scalar_write; + access->grp_assignment_read = grp_assignment_read; + access->grp_assignment_write = grp_assignment_write; +- access->grp_hint = total_scalarization +- || (multiple_scalar_reads && !constant_decl_p (var)); +- access->grp_total_scalarization = total_scalarization; ++ access->grp_hint = multiple_scalar_reads && !constant_decl_p (var); + access->grp_partial_lhs = grp_partial_lhs; + access->grp_unscalarizable_region = unscalarizable_region; ++ access->grp_same_access_path = grp_same_access_path; + + *prev_acc_ptr = access; + prev_acc_ptr = &access->next_grp; +@@ -2395,6 +2407,88 @@ build_access_trees (struct access *access) + return true; + } + ++/* Traverse the access forest where ROOT is the first root and verify that ++ various important invariants hold true. */ ++ ++DEBUG_FUNCTION void ++verify_sra_access_forest (struct access *root) ++{ ++ struct access *access = root; ++ tree first_base = root->base; ++ gcc_assert (DECL_P (first_base)); ++ do ++ { ++ gcc_assert (access->base == first_base); ++ if (access->parent) ++ gcc_assert (access->offset >= access->parent->offset ++ && access->size <= access->parent->size); ++ if (access->next_sibling) ++ gcc_assert (access->next_sibling->offset ++ >= access->offset + access->size); ++ ++ poly_int64 poffset, psize, pmax_size; ++ bool reverse; ++ tree base = get_ref_base_and_extent (access->expr, &poffset, &psize, ++ &pmax_size, &reverse); ++ HOST_WIDE_INT offset, size, max_size; ++ if (!poffset.is_constant (&offset) ++ || !psize.is_constant (&size) ++ || !pmax_size.is_constant (&max_size)) ++ gcc_unreachable (); ++ gcc_assert (base == first_base); ++ gcc_assert (offset == access->offset); ++ gcc_assert (access->grp_unscalarizable_region ++ || size == max_size); ++ gcc_assert (max_size == access->size); ++ gcc_assert (reverse == access->reverse); ++ ++ if (access->first_child) ++ { ++ gcc_assert (access->first_child->parent == access); ++ access = access->first_child; ++ } ++ else if (access->next_sibling) ++ { ++ gcc_assert (access->next_sibling->parent == access->parent); ++ access = access->next_sibling; ++ } ++ else ++ { ++ while (access->parent && !access->next_sibling) ++ access = access->parent; ++ if (access->next_sibling) ++ access = access->next_sibling; ++ else ++ { ++ gcc_assert (access == root); ++ root = root->next_grp; ++ access = root; ++ } ++ } ++ } ++ while (access); ++} ++ ++/* Verify access forests of all candidates with accesses by calling ++ verify_access_forest on each on them. */ ++ ++DEBUG_FUNCTION void ++verify_all_sra_access_forests (void) ++{ ++ bitmap_iterator bi; ++ unsigned i; ++ EXECUTE_IF_SET_IN_BITMAP (candidate_bitmap, 0, i, bi) ++ { ++ tree var = candidate (i); ++ struct access *access = get_first_repr_for_decl (var); ++ if (access) ++ { ++ gcc_assert (access->base == var); ++ verify_sra_access_forest (access); ++ } ++ } ++} ++ + /* Return true if expr contains some ARRAY_REFs into a variable bounded + array. */ + +@@ -2412,15 +2506,16 @@ expr_with_var_bounded_array_refs_p (tree expr) + } + + /* Analyze the subtree of accesses rooted in ROOT, scheduling replacements when +- both seeming beneficial and when ALLOW_REPLACEMENTS allows it. Also set all +- sorts of access flags appropriately along the way, notably always set +- grp_read and grp_assign_read according to MARK_READ and grp_write when +- MARK_WRITE is true. ++ both seeming beneficial and when ALLOW_REPLACEMENTS allows it. If TOTALLY ++ is set, we are totally scalarizing the aggregate. Also set all sorts of ++ access flags appropriately along the way, notably always set grp_read and ++ grp_assign_read according to MARK_READ and grp_write when MARK_WRITE is ++ true. + + Creating a replacement for a scalar access is considered beneficial if its +- grp_hint is set (this means we are either attempting total scalarization or +- there is more than one direct read access) or according to the following +- table: ++ grp_hint ot TOTALLY is set (this means either that there is more than one ++ direct read access or that we are attempting total scalarization) or ++ according to the following table: + + Access written to through a scalar type (once or more times) + | +@@ -2451,7 +2546,7 @@ expr_with_var_bounded_array_refs_p (tree expr) + + static bool + analyze_access_subtree (struct access *root, struct access *parent, +- bool allow_replacements) ++ bool allow_replacements, bool totally) + { + struct access *child; + HOST_WIDE_INT limit = root->offset + root->size; +@@ -2469,8 +2564,8 @@ analyze_access_subtree (struct access *root, struct access *parent, + root->grp_write = 1; + if (parent->grp_assignment_write) + root->grp_assignment_write = 1; +- if (parent->grp_total_scalarization) +- root->grp_total_scalarization = 1; ++ if (!parent->grp_same_access_path) ++ root->grp_same_access_path = 0; + } + + if (root->grp_unscalarizable_region) +@@ -2483,10 +2578,10 @@ analyze_access_subtree (struct access *root, struct access *parent, + { + hole |= covered_to < child->offset; + sth_created |= analyze_access_subtree (child, root, +- allow_replacements && !scalar); ++ allow_replacements && !scalar, ++ totally); + + root->grp_unscalarized_data |= child->grp_unscalarized_data; +- root->grp_total_scalarization &= child->grp_total_scalarization; + if (child->grp_covered) + covered_to += child->size; + else +@@ -2494,7 +2589,9 @@ analyze_access_subtree (struct access *root, struct access *parent, + } + + if (allow_replacements && scalar && !root->first_child +- && (root->grp_hint ++ && (totally || !root->grp_total_scalarization) ++ && (totally ++ || root->grp_hint + || ((root->grp_scalar_read || root->grp_assignment_read) + && (root->grp_scalar_write || root->grp_assignment_write)))) + { +@@ -2536,6 +2633,7 @@ analyze_access_subtree (struct access *root, struct access *parent, + { + if (allow_replacements + && scalar && !root->first_child ++ && !root->grp_total_scalarization + && (root->grp_scalar_write || root->grp_assignment_write) + && !bitmap_bit_p (cannot_scalarize_away_bitmap, + DECL_UID (root->base))) +@@ -2556,7 +2654,7 @@ analyze_access_subtree (struct access *root, struct access *parent, + root->grp_total_scalarization = 0; + } + +- if (!hole || root->grp_total_scalarization) ++ if (!hole || totally) + root->grp_covered = 1; + else if (root->grp_write || comes_initialized_p (root->base)) + root->grp_unscalarized_data = 1; /* not covered and written to */ +@@ -2572,7 +2670,8 @@ analyze_access_trees (struct access *access) + + while (access) + { +- if (analyze_access_subtree (access, NULL, true)) ++ if (analyze_access_subtree (access, NULL, true, ++ access->grp_total_scalarization)) + ret = true; + access = access->next_grp; + } +@@ -2638,6 +2737,7 @@ create_artificial_child_access (struct access *parent, struct access *model, + access->offset = new_offset; + access->size = model->size; + access->type = model->type; ++ access->parent = parent; + access->grp_write = set_grp_write; + access->grp_read = false; + access->reverse = model->reverse; +@@ -2721,13 +2821,17 @@ propagate_subaccesses_across_link (struct access *lacc, struct access *racc) + lacc->type = racc->type; + if (build_user_friendly_ref_for_offset (&t, TREE_TYPE (t), + lacc->offset, racc->type)) +- lacc->expr = t; ++ { ++ lacc->expr = t; ++ lacc->grp_same_access_path = true; ++ } + else + { + lacc->expr = build_ref_for_model (EXPR_LOCATION (lacc->base), + lacc->base, lacc->offset, + racc, NULL, false); + lacc->grp_no_warning = true; ++ lacc->grp_same_access_path = false; + } + } + return ret; +@@ -2840,6 +2944,369 @@ propagate_all_subaccesses (void) + } + } + ++/* Return true if the forest beginning with ROOT does not contain ++ unscalarizable regions or non-byte aligned accesses. */ ++ ++static bool ++can_totally_scalarize_forest_p (struct access *root) ++{ ++ struct access *access = root; ++ do ++ { ++ if (access->grp_unscalarizable_region ++ || (access->offset % BITS_PER_UNIT) != 0 ++ || (access->size % BITS_PER_UNIT) != 0 ++ || (is_gimple_reg_type (access->type) ++ && access->first_child)) ++ return false; ++ ++ if (access->first_child) ++ access = access->first_child; ++ else if (access->next_sibling) ++ access = access->next_sibling; ++ else ++ { ++ while (access->parent && !access->next_sibling) ++ access = access->parent; ++ if (access->next_sibling) ++ access = access->next_sibling; ++ else ++ { ++ gcc_assert (access == root); ++ root = root->next_grp; ++ access = root; ++ } ++ } ++ } ++ while (access); ++ return true; ++} ++ ++/* Create and return an ACCESS in PARENT spanning from POS with SIZE, TYPE and ++ reference EXPR for total scalarization purposes and mark it as such. Within ++ the children of PARENT, link it in between PTR and NEXT_SIBLING. */ ++ ++static struct access * ++create_total_scalarization_access (struct access *parent, HOST_WIDE_INT pos, ++ HOST_WIDE_INT size, tree type, tree expr, ++ struct access **ptr, ++ struct access *next_sibling) ++{ ++ struct access *access = access_pool.allocate (); ++ memset (access, 0, sizeof (struct access)); ++ access->base = parent->base; ++ access->offset = pos; ++ access->size = size; ++ access->expr = expr; ++ access->type = type; ++ access->parent = parent; ++ access->grp_write = parent->grp_write; ++ access->grp_total_scalarization = 1; ++ access->grp_hint = 1; ++ access->grp_same_access_path = path_comparable_for_same_access (expr); ++ access->reverse = reverse_storage_order_for_component_p (expr); ++ ++ access->next_sibling = next_sibling; ++ *ptr = access; ++ return access; ++} ++ ++/* Create and return an ACCESS in PARENT spanning from POS with SIZE, TYPE and ++ reference EXPR for total scalarization purposes and mark it as such, link it ++ at *PTR and reshape the tree so that those elements at *PTR and their ++ siblings which fall within the part described by POS and SIZE are moved to ++ be children of the new access. If a partial overlap is detected, return ++ NULL. */ ++ ++static struct access * ++create_total_access_and_reshape (struct access *parent, HOST_WIDE_INT pos, ++ HOST_WIDE_INT size, tree type, tree expr, ++ struct access **ptr) ++{ ++ struct access **p = ptr; ++ ++ while (*p && (*p)->offset < pos + size) ++ { ++ if ((*p)->offset + (*p)->size > pos + size) ++ return NULL; ++ p = &(*p)->next_sibling; ++ } ++ ++ struct access *next_child = *ptr; ++ struct access *new_acc ++ = create_total_scalarization_access (parent, pos, size, type, expr, ++ ptr, *p); ++ if (p != ptr) ++ { ++ new_acc->first_child = next_child; ++ *p = NULL; ++ for (struct access *a = next_child; a; a = a->next_sibling) ++ a->parent = new_acc; ++ } ++ return new_acc; ++} ++ ++static bool totally_scalarize_subtree (struct access *root); ++ ++/* Return true if INNER is either the same type as OUTER or if it is the type ++ of a record field in OUTER at offset zero, possibly in nested ++ sub-records. */ ++ ++static bool ++access_and_field_type_match_p (tree outer, tree inner) ++{ ++ if (TYPE_MAIN_VARIANT (outer) == TYPE_MAIN_VARIANT (inner)) ++ return true; ++ if (TREE_CODE (outer) != RECORD_TYPE) ++ return false; ++ tree fld = TYPE_FIELDS (outer); ++ while (fld) ++ { ++ if (TREE_CODE (fld) == FIELD_DECL) ++ { ++ if (!zerop (DECL_FIELD_OFFSET (fld))) ++ return false; ++ if (TYPE_MAIN_VARIANT (TREE_TYPE (fld)) == inner) ++ return true; ++ if (TREE_CODE (TREE_TYPE (fld)) == RECORD_TYPE) ++ fld = TYPE_FIELDS (TREE_TYPE (fld)); ++ else ++ return false; ++ } ++ else ++ fld = DECL_CHAIN (fld); ++ } ++ return false; ++} ++ ++/* Return type of total_should_skip_creating_access indicating whether a total ++ scalarization access for a field/element should be created, whether it ++ already exists or whether the entire total scalarization has to fail. */ ++ ++enum total_sra_field_state {TOTAL_FLD_CREATE, TOTAL_FLD_DONE, TOTAL_FLD_FAILED}; ++ ++/* Do all the necessary steps in total scalarization when the given aggregate ++ type has a TYPE at POS with the given SIZE should be put into PARENT and ++ when we have processed all its siblings with smaller offsets up until and ++ including LAST_SEEN_SIBLING (which can be NULL). ++ ++ If some further siblings are to be skipped, set *LAST_SEEN_SIBLING as ++ appropriate. Return TOTAL_FLD_CREATE id the caller should carry on with ++ creating a new access, TOTAL_FLD_DONE if access or accesses capable of ++ representing the described part of the aggregate for the purposes of total ++ scalarization already exist or TOTAL_FLD_FAILED if there is a problem which ++ prevents total scalarization from happening at all. */ ++ ++static enum total_sra_field_state ++total_should_skip_creating_access (struct access *parent, ++ struct access **last_seen_sibling, ++ tree type, HOST_WIDE_INT pos, ++ HOST_WIDE_INT size) ++{ ++ struct access *next_child; ++ if (!*last_seen_sibling) ++ next_child = parent->first_child; ++ else ++ next_child = (*last_seen_sibling)->next_sibling; ++ ++ /* First, traverse the chain of siblings until it points to an access with ++ offset at least equal to POS. Check all skipped accesses whether they ++ span the POS boundary and if so, return with a failure. */ ++ while (next_child && next_child->offset < pos) ++ { ++ if (next_child->offset + next_child->size > pos) ++ return TOTAL_FLD_FAILED; ++ *last_seen_sibling = next_child; ++ next_child = next_child->next_sibling; ++ } ++ ++ /* Now check whether next_child has exactly the right POS and SIZE and if so, ++ whether it can represent what we need and can be totally scalarized ++ itself. */ ++ if (next_child && next_child->offset == pos ++ && next_child->size == size) ++ { ++ if (!is_gimple_reg_type (next_child->type) ++ && (!access_and_field_type_match_p (type, next_child->type) ++ || !totally_scalarize_subtree (next_child))) ++ return TOTAL_FLD_FAILED; ++ ++ *last_seen_sibling = next_child; ++ return TOTAL_FLD_DONE; ++ } ++ ++ /* If the child we're looking at would partially overlap, we just cannot ++ totally scalarize. */ ++ if (next_child ++ && next_child->offset < pos + size ++ && next_child->offset + next_child->size > pos + size) ++ return TOTAL_FLD_FAILED; ++ ++ if (is_gimple_reg_type (type)) ++ { ++ /* We don't scalarize accesses that are children of other scalar type ++ accesses, so if we go on and create an access for a register type, ++ there should not be any pre-existing children. There are rare cases ++ where the requested type is a vector but we already have register ++ accesses for all its elements which is equally good. Detect that ++ situation or whether we need to bail out. */ ++ ++ HOST_WIDE_INT covered = pos; ++ bool skipping = false; ++ while (next_child ++ && next_child->offset + next_child->size <= pos + size) ++ { ++ if (next_child->offset != covered ++ || !is_gimple_reg_type (next_child->type)) ++ return TOTAL_FLD_FAILED; ++ ++ covered += next_child->size; ++ *last_seen_sibling = next_child; ++ next_child = next_child->next_sibling; ++ skipping = true; ++ } ++ ++ if (skipping) ++ { ++ if (covered != pos + size) ++ return TOTAL_FLD_FAILED; ++ else ++ return TOTAL_FLD_DONE; ++ } ++ } ++ ++ return TOTAL_FLD_CREATE; ++} ++ ++/* Go over sub-tree rooted in ROOT and attempt to create scalar accesses ++ spanning all uncovered areas covered by ROOT, return false if the attempt ++ failed. All created accesses will have grp_unscalarizable_region set (and ++ should be ignored if the function returns false). */ ++ ++static bool ++totally_scalarize_subtree (struct access *root) ++{ ++ gcc_checking_assert (!root->grp_unscalarizable_region); ++ gcc_checking_assert (!is_gimple_reg_type (root->type)); ++ ++ struct access *last_seen_sibling = NULL; ++ ++ switch (TREE_CODE (root->type)) ++ { ++ case RECORD_TYPE: ++ for (tree fld = TYPE_FIELDS (root->type); fld; fld = DECL_CHAIN (fld)) ++ if (TREE_CODE (fld) == FIELD_DECL) ++ { ++ tree ft = TREE_TYPE (fld); ++ HOST_WIDE_INT fsize = tree_to_uhwi (DECL_SIZE (fld)); ++ if (!fsize) ++ continue; ++ ++ HOST_WIDE_INT pos = root->offset + int_bit_position (fld); ++ enum total_sra_field_state ++ state = total_should_skip_creating_access (root, ++ &last_seen_sibling, ++ ft, pos, fsize); ++ switch (state) ++ { ++ case TOTAL_FLD_FAILED: ++ return false; ++ case TOTAL_FLD_DONE: ++ continue; ++ case TOTAL_FLD_CREATE: ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ ++ struct access **p = (last_seen_sibling ++ ? &last_seen_sibling->next_sibling ++ : &root->first_child); ++ tree nref = build3 (COMPONENT_REF, ft, root->expr, fld, NULL_TREE); ++ struct access *new_child ++ = create_total_access_and_reshape (root, pos, fsize, ft, nref, p); ++ if (!new_child) ++ return false; ++ ++ if (!is_gimple_reg_type (ft) ++ && !totally_scalarize_subtree (new_child)) ++ return false; ++ last_seen_sibling = new_child; ++ } ++ break; ++ case ARRAY_TYPE: ++ { ++ tree elemtype = TREE_TYPE (root->type); ++ tree elem_size = TYPE_SIZE (elemtype); ++ gcc_assert (elem_size && tree_fits_shwi_p (elem_size)); ++ HOST_WIDE_INT el_size = tree_to_shwi (elem_size); ++ gcc_assert (el_size > 0); ++ ++ tree minidx = TYPE_MIN_VALUE (TYPE_DOMAIN (root->type)); ++ gcc_assert (TREE_CODE (minidx) == INTEGER_CST); ++ tree maxidx = TYPE_MAX_VALUE (TYPE_DOMAIN (root->type)); ++ /* Skip (some) zero-length arrays; others have MAXIDX == MINIDX - 1. */ ++ if (!maxidx) ++ goto out; ++ gcc_assert (TREE_CODE (maxidx) == INTEGER_CST); ++ tree domain = TYPE_DOMAIN (root->type); ++ /* MINIDX and MAXIDX are inclusive, and must be interpreted in ++ DOMAIN (e.g. signed int, whereas min/max may be size_int). */ ++ offset_int idx = wi::to_offset (minidx); ++ offset_int max = wi::to_offset (maxidx); ++ if (!TYPE_UNSIGNED (domain)) ++ { ++ idx = wi::sext (idx, TYPE_PRECISION (domain)); ++ max = wi::sext (max, TYPE_PRECISION (domain)); ++ } ++ for (HOST_WIDE_INT pos = root->offset; ++ idx <= max; ++ pos += el_size, ++idx) ++ { ++ enum total_sra_field_state ++ state = total_should_skip_creating_access (root, ++ &last_seen_sibling, ++ elemtype, pos, ++ el_size); ++ switch (state) ++ { ++ case TOTAL_FLD_FAILED: ++ return false; ++ case TOTAL_FLD_DONE: ++ continue; ++ case TOTAL_FLD_CREATE: ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ ++ struct access **p = (last_seen_sibling ++ ? &last_seen_sibling->next_sibling ++ : &root->first_child); ++ tree nref = build4 (ARRAY_REF, elemtype, root->expr, ++ wide_int_to_tree (domain, idx), ++ NULL_TREE, NULL_TREE); ++ struct access *new_child ++ = create_total_access_and_reshape (root, pos, el_size, elemtype, ++ nref, p); ++ if (!new_child) ++ return false; ++ ++ if (!is_gimple_reg_type (elemtype) ++ && !totally_scalarize_subtree (new_child)) ++ return false; ++ last_seen_sibling = new_child; ++ } ++ } ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ ++ out: ++ return true; ++} ++ + /* Go through all accesses collected throughout the (intraprocedural) analysis + stage, exclude overlapping ones, identify representatives and build trees + out of them, making decisions about scalarization on the way. Return true +@@ -2852,8 +3319,22 @@ analyze_all_variable_accesses (void) + bitmap tmp = BITMAP_ALLOC (NULL); + bitmap_iterator bi; + unsigned i; +- bool optimize_speed_p = !optimize_function_for_size_p (cfun); + ++ bitmap_copy (tmp, candidate_bitmap); ++ EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi) ++ { ++ tree var = candidate (i); ++ struct access *access; ++ ++ access = sort_and_splice_var_accesses (var); ++ if (!access || !build_access_trees (access)) ++ disqualify_candidate (var, ++ "No or inhibitingly overlapping accesses."); ++ } ++ ++ propagate_all_subaccesses (); ++ ++ bool optimize_speed_p = !optimize_function_for_size_p (cfun); + enum compiler_param param = optimize_speed_p + ? PARAM_SRA_MAX_SCALARIZATION_SIZE_SPEED + : PARAM_SRA_MAX_SCALARIZATION_SIZE_SIZE; +@@ -2872,46 +3353,59 @@ analyze_all_variable_accesses (void) + && !bitmap_bit_p (cannot_scalarize_away_bitmap, i)) + { + tree var = candidate (i); ++ if (!VAR_P (var)) ++ continue; + +- if (VAR_P (var) && scalarizable_type_p (TREE_TYPE (var), +- constant_decl_p (var))) ++ if (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (var))) > max_scalarization_size) + { +- if (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (var))) +- <= max_scalarization_size) +- { +- create_total_scalarization_access (var); +- completely_scalarize (var, TREE_TYPE (var), 0, var); +- statistics_counter_event (cfun, +- "Totally-scalarized aggregates", 1); +- if (dump_file && (dump_flags & TDF_DETAILS)) +- { +- fprintf (dump_file, "Will attempt to totally scalarize "); +- print_generic_expr (dump_file, var); +- fprintf (dump_file, " (UID: %u): \n", DECL_UID (var)); +- } +- } +- else if (dump_file && (dump_flags & TDF_DETAILS)) ++ if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Too big to totally scalarize: "); + print_generic_expr (dump_file, var); + fprintf (dump_file, " (UID: %u)\n", DECL_UID (var)); + } ++ continue; + } +- } + +- bitmap_copy (tmp, candidate_bitmap); +- EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi) +- { +- tree var = candidate (i); +- struct access *access; ++ bool all_types_ok = true; ++ for (struct access *access = get_first_repr_for_decl (var); ++ access; ++ access = access->next_grp) ++ if (!can_totally_scalarize_forest_p (access) ++ || !scalarizable_type_p (access->type, constant_decl_p (var))) ++ { ++ all_types_ok = false; ++ break; ++ } ++ if (!all_types_ok) ++ continue; + +- access = sort_and_splice_var_accesses (var); +- if (!access || !build_access_trees (access)) +- disqualify_candidate (var, +- "No or inhibitingly overlapping accesses."); +- } ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "Will attempt to totally scalarize "); ++ print_generic_expr (dump_file, var); ++ fprintf (dump_file, " (UID: %u): \n", DECL_UID (var)); ++ } ++ bool scalarized = true; ++ for (struct access *access = get_first_repr_for_decl (var); ++ access; ++ access = access->next_grp) ++ if (!is_gimple_reg_type (access->type) ++ && !totally_scalarize_subtree (access)) ++ { ++ scalarized = false; ++ break; ++ } + +- propagate_all_subaccesses (); ++ if (scalarized) ++ for (struct access *access = get_first_repr_for_decl (var); ++ access; ++ access = access->next_grp) ++ access->grp_total_scalarization = true; ++ } ++ ++ if (flag_checking) ++ verify_all_sra_access_forests (); + + bitmap_copy (tmp, candidate_bitmap); + EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi) +@@ -3775,25 +4269,39 @@ initialize_constant_pool_replacements (void) + tree var = candidate (i); + if (!constant_decl_p (var)) + continue; +- vec *access_vec = get_base_access_vector (var); +- if (!access_vec) +- continue; +- for (unsigned i = 0; i < access_vec->length (); i++) ++ ++ struct access *access = get_first_repr_for_decl (var); ++ ++ while (access) + { +- struct access *access = (*access_vec)[i]; +- if (!access->replacement_decl) +- continue; +- gassign *stmt +- = gimple_build_assign (get_access_replacement (access), +- unshare_expr (access->expr)); +- if (dump_file && (dump_flags & TDF_DETAILS)) ++ if (access->replacement_decl) + { +- fprintf (dump_file, "Generating constant initializer: "); +- print_gimple_stmt (dump_file, stmt, 0); +- fprintf (dump_file, "\n"); ++ gassign *stmt ++ = gimple_build_assign (get_access_replacement (access), ++ unshare_expr (access->expr)); ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "Generating constant initializer: "); ++ print_gimple_stmt (dump_file, stmt, 0); ++ fprintf (dump_file, "\n"); ++ } ++ gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); ++ update_stmt (stmt); ++ } ++ ++ if (access->first_child) ++ access = access->first_child; ++ else if (access->next_sibling) ++ access = access->next_sibling; ++ else ++ { ++ while (access->parent && !access->next_sibling) ++ access = access->parent; ++ if (access->next_sibling) ++ access = access->next_sibling; ++ else ++ access = access->next_grp; + } +- gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); +- update_stmt (stmt); + } + } + +diff --git a/gcc/tree-ssa-address.c b/gcc/tree-ssa-address.c +index 2e5d87734..3195a21c7 100644 +--- a/gcc/tree-ssa-address.c ++++ b/gcc/tree-ssa-address.c +@@ -1141,6 +1141,35 @@ maybe_fold_tmr (tree ref) + return new_ref; + } + ++/* Return the preferred index scale factor for accessing memory of mode ++ MEM_MODE in the address space of pointer BASE. Assume that we're ++ optimizing for speed if SPEED is true and for size otherwise. */ ++unsigned int ++preferred_mem_scale_factor (tree base, machine_mode mem_mode, ++ bool speed) ++{ ++ struct mem_address parts = {}; ++ addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (base)); ++ unsigned int fact = GET_MODE_UNIT_SIZE (mem_mode); ++ ++ /* Addressing mode "base + index". */ ++ parts.index = integer_one_node; ++ parts.base = integer_one_node; ++ rtx addr = addr_for_mem_ref (&parts, as, false); ++ unsigned cost = address_cost (addr, mem_mode, as, speed); ++ ++ /* Addressing mode "base + index << scale". */ ++ parts.step = wide_int_to_tree (sizetype, fact); ++ addr = addr_for_mem_ref (&parts, as, false); ++ unsigned new_cost = address_cost (addr, mem_mode, as, speed); ++ ++ /* Compare the cost of an address with an unscaled index with ++ a scaled index and return factor if useful. */ ++ if (new_cost < cost) ++ return GET_MODE_UNIT_SIZE (mem_mode); ++ return 1; ++} ++ + /* Dump PARTS to FILE. */ + + extern void dump_mem_address (FILE *, struct mem_address *); +diff --git a/gcc/tree-ssa-address.h b/gcc/tree-ssa-address.h +index 6fa4eae89..9812f36fb 100644 +--- a/gcc/tree-ssa-address.h ++++ b/gcc/tree-ssa-address.h +@@ -39,4 +39,7 @@ tree create_mem_ref (gimple_stmt_iterator *, tree, + extern void copy_ref_info (tree, tree); + tree maybe_fold_tmr (tree); + ++extern unsigned int preferred_mem_scale_factor (tree base, ++ machine_mode mem_mode, ++ bool speed); + #endif /* GCC_TREE_SSA_ADDRESS_H */ +diff --git a/gcc/tree-ssa-ccp.c b/gcc/tree-ssa-ccp.c +index 8db6a34e0..dbe2fda96 100644 +--- a/gcc/tree-ssa-ccp.c ++++ b/gcc/tree-ssa-ccp.c +@@ -614,9 +614,17 @@ get_value_for_expr (tree expr, bool for_bits_p) + val.mask = -1; + } + if (for_bits_p +- && val.lattice_val == CONSTANT +- && TREE_CODE (val.value) == ADDR_EXPR) +- val = get_value_from_alignment (val.value); ++ && val.lattice_val == CONSTANT) ++ { ++ if (TREE_CODE (val.value) == ADDR_EXPR) ++ val = get_value_from_alignment (val.value); ++ else if (TREE_CODE (val.value) != INTEGER_CST) ++ { ++ val.lattice_val = VARYING; ++ val.value = NULL_TREE; ++ val.mask = -1; ++ } ++ } + /* Fall back to a copy value. */ + if (!for_bits_p + && val.lattice_val == VARYING +@@ -2566,7 +2574,7 @@ optimize_stack_restore (gimple_stmt_iterator i) + || ALLOCA_FUNCTION_CODE_P (DECL_FUNCTION_CODE (callee))) + return NULL_TREE; + +- if (DECL_FUNCTION_CODE (callee) == BUILT_IN_STACK_RESTORE) ++ if (fndecl_built_in_p (callee, BUILT_IN_STACK_RESTORE)) + goto second_stack_restore; + } + +@@ -2625,9 +2633,6 @@ optimize_stdarg_builtin (gimple *call) + bool va_list_simple_ptr; + location_t loc = gimple_location (call); + +- if (gimple_code (call) != GIMPLE_CALL) +- return NULL_TREE; +- + callee = gimple_call_fndecl (call); + + cfun_va_list = targetm.fn_abi_va_list (callee); +@@ -2930,12 +2935,10 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip, + bit, flag); + gimple_call_set_lhs (g, new_lhs); + gimple_set_location (g, gimple_location (call)); +- gimple_set_vuse (g, gimple_vuse (call)); +- gimple_set_vdef (g, gimple_vdef (call)); ++ gimple_move_vops (g, call); + bool throws = stmt_can_throw_internal (cfun, call); + gimple_call_set_nothrow (as_a (g), + gimple_call_nothrow_p (as_a (call))); +- SSA_NAME_DEF_STMT (gimple_vdef (call)) = g; + gimple_stmt_iterator gsi = *gsip; + gsi_insert_after (&gsi, g, GSI_NEW_STMT); + edge e = NULL; +diff --git a/gcc/tree-ssa-dce.c b/gcc/tree-ssa-dce.c +index a38899edd..be9f501c9 100644 +--- a/gcc/tree-ssa-dce.c ++++ b/gcc/tree-ssa-dce.c +@@ -115,6 +115,14 @@ static bool cfg_altered; + static int *bb_postorder; + + ++/* True if we should treat any stmt with a vdef as necessary. */ ++ ++static inline bool ++keep_all_vdefs_p () ++{ ++ return optimize_debug; ++} ++ + /* If STMT is not already marked necessary, mark it, and add it to the + worklist if ADD_TO_WORKLIST is true. */ + +@@ -311,6 +319,12 @@ mark_stmt_if_obviously_necessary (gimple *stmt, bool aggressive) + return; + } + ++ if (gimple_vdef (stmt) && keep_all_vdefs_p ()) ++ { ++ mark_stmt_necessary (stmt, true); ++ return; ++ } ++ + return; + } + +@@ -526,6 +540,9 @@ mark_aliased_reaching_defs_necessary_1 (ao_ref *ref, tree vdef, void *data) + static void + mark_aliased_reaching_defs_necessary (gimple *stmt, tree ref) + { ++ /* Should have been caught before calling this function. */ ++ gcc_checking_assert (!keep_all_vdefs_p ()); ++ + unsigned int chain; + ao_ref refd; + gcc_assert (!chain_ovfl); +@@ -599,6 +616,8 @@ mark_all_reaching_defs_necessary_1 (ao_ref *ref ATTRIBUTE_UNUSED, + static void + mark_all_reaching_defs_necessary (gimple *stmt) + { ++ /* Should have been caught before calling this function. */ ++ gcc_checking_assert (!keep_all_vdefs_p ()); + walk_aliased_vdefs (NULL, gimple_vuse (stmt), + mark_all_reaching_defs_necessary_1, NULL, &visited); + } +@@ -798,6 +817,10 @@ propagate_necessity (bool aggressive) + if (!use) + continue; + ++ /* No need to search for vdefs if we intrinsicly keep them all. */ ++ if (keep_all_vdefs_p ()) ++ continue; ++ + /* If we dropped to simple mode make all immediately + reachable definitions necessary. */ + if (chain_ovfl) +diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c +index e753689a7..0d716062b 100644 +--- a/gcc/tree-ssa-forwprop.c ++++ b/gcc/tree-ssa-forwprop.c +@@ -2011,16 +2011,12 @@ get_bit_field_ref_def (tree val, enum tree_code &conv_code) + return NULL_TREE; + enum tree_code code = gimple_assign_rhs_code (def_stmt); + if (code == FLOAT_EXPR +- || code == FIX_TRUNC_EXPR) ++ || code == FIX_TRUNC_EXPR ++ || CONVERT_EXPR_CODE_P (code)) + { + tree op1 = gimple_assign_rhs1 (def_stmt); + if (conv_code == ERROR_MARK) +- { +- if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (val))), +- GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (op1))))) +- return NULL_TREE; +- conv_code = code; +- } ++ conv_code = code; + else if (conv_code != code) + return NULL_TREE; + if (TREE_CODE (op1) != SSA_NAME) +@@ -2041,109 +2037,213 @@ static bool + simplify_vector_constructor (gimple_stmt_iterator *gsi) + { + gimple *stmt = gsi_stmt (*gsi); +- tree op, op2, orig[2], type, elem_type; ++ tree op, orig[2], type, elem_type; + unsigned elem_size, i; + unsigned HOST_WIDE_INT nelts; ++ unsigned HOST_WIDE_INT refnelts; + enum tree_code conv_code; + constructor_elt *elt; + bool maybe_ident; + +- gcc_checking_assert (gimple_assign_rhs_code (stmt) == CONSTRUCTOR); +- + op = gimple_assign_rhs1 (stmt); + type = TREE_TYPE (op); +- gcc_checking_assert (TREE_CODE (type) == VECTOR_TYPE); ++ gcc_checking_assert (TREE_CODE (op) == CONSTRUCTOR ++ && TREE_CODE (type) == VECTOR_TYPE); + + if (!TYPE_VECTOR_SUBPARTS (type).is_constant (&nelts)) + return false; + elem_type = TREE_TYPE (type); + elem_size = TREE_INT_CST_LOW (TYPE_SIZE (elem_type)); + +- vec_perm_builder sel (nelts, nelts, 1); + orig[0] = NULL; + orig[1] = NULL; + conv_code = ERROR_MARK; + maybe_ident = true; + tree one_constant = NULL_TREE; ++ tree one_nonconstant = NULL_TREE; + auto_vec constants; + constants.safe_grow_cleared (nelts); ++ auto_vec, 64> elts; + FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (op), i, elt) + { + tree ref, op1; ++ unsigned int elem; + + if (i >= nelts) + return false; + ++ /* Look for elements extracted and possibly converted from ++ another vector. */ + op1 = get_bit_field_ref_def (elt->value, conv_code); +- if (op1) ++ if (op1 ++ && TREE_CODE ((ref = TREE_OPERAND (op1, 0))) == SSA_NAME ++ && VECTOR_TYPE_P (TREE_TYPE (ref)) ++ && useless_type_conversion_p (TREE_TYPE (op1), ++ TREE_TYPE (TREE_TYPE (ref))) ++ && constant_multiple_p (bit_field_offset (op1), ++ bit_field_size (op1), &elem) ++ && TYPE_VECTOR_SUBPARTS (TREE_TYPE (ref)).is_constant (&refnelts)) + { +- ref = TREE_OPERAND (op1, 0); + unsigned int j; + for (j = 0; j < 2; ++j) + { + if (!orig[j]) + { +- if (TREE_CODE (ref) != SSA_NAME) +- return false; +- if (! VECTOR_TYPE_P (TREE_TYPE (ref)) +- || ! useless_type_conversion_p (TREE_TYPE (op1), +- TREE_TYPE (TREE_TYPE (ref)))) +- return false; +- if (j && !useless_type_conversion_p (TREE_TYPE (orig[0]), +- TREE_TYPE (ref))) +- return false; +- orig[j] = ref; +- break; ++ if (j == 0 ++ || useless_type_conversion_p (TREE_TYPE (orig[0]), ++ TREE_TYPE (ref))) ++ break; + } + else if (ref == orig[j]) + break; + } +- if (j == 2) +- return false; +- +- unsigned int elt; +- if (maybe_ne (bit_field_size (op1), elem_size) +- || !constant_multiple_p (bit_field_offset (op1), elem_size, &elt)) +- return false; +- if (j) +- elt += nelts; +- if (elt != i) +- maybe_ident = false; +- sel.quick_push (elt); ++ /* Found a suitable vector element. */ ++ if (j < 2) ++ { ++ orig[j] = ref; ++ if (elem != i || j != 0) ++ maybe_ident = false; ++ elts.safe_push (std::make_pair (j, elem)); ++ continue; ++ } ++ /* Else fallthru. */ + } +- else if (CONSTANT_CLASS_P (elt->value)) ++ /* Handle elements not extracted from a vector. ++ 1. constants by permuting with constant vector ++ 2. a unique non-constant element by permuting with a splat vector */ ++ if (orig[1] ++ && orig[1] != error_mark_node) ++ return false; ++ orig[1] = error_mark_node; ++ if (CONSTANT_CLASS_P (elt->value)) + { +- if (orig[1] +- && orig[1] != error_mark_node) ++ if (one_nonconstant) + return false; +- orig[1] = error_mark_node; + if (!one_constant) + one_constant = elt->value; + constants[i] = elt->value; +- sel.quick_push (i + nelts); +- maybe_ident = false; + } + else +- return false; ++ { ++ if (one_constant) ++ return false; ++ if (!one_nonconstant) ++ one_nonconstant = elt->value; ++ else if (!operand_equal_p (one_nonconstant, elt->value, 0)) ++ return false; ++ } ++ elts.safe_push (std::make_pair (1, i)); ++ maybe_ident = false; + } + if (i < nelts) + return false; + +- if (! VECTOR_TYPE_P (TREE_TYPE (orig[0])) +- || maybe_ne (TYPE_VECTOR_SUBPARTS (type), +- TYPE_VECTOR_SUBPARTS (TREE_TYPE (orig[0])))) ++ if (! orig[0] ++ || ! VECTOR_TYPE_P (TREE_TYPE (orig[0]))) + return false; +- +- tree tem; +- if (conv_code != ERROR_MARK +- && (! supportable_convert_operation (conv_code, type, +- TREE_TYPE (orig[0]), +- &tem, &conv_code) +- || conv_code == CALL_EXPR)) ++ refnelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (orig[0])).to_constant (); ++ /* We currently do not handle larger destination vectors. */ ++ if (refnelts < nelts) + return false; + + if (maybe_ident) + { ++ tree conv_src_type ++ = (nelts != refnelts ++ ? (conv_code != ERROR_MARK ++ ? build_vector_type (TREE_TYPE (TREE_TYPE (orig[0])), nelts) ++ : type) ++ : TREE_TYPE (orig[0])); ++ if (conv_code != ERROR_MARK ++ && !supportable_convert_operation (conv_code, type, conv_src_type, ++ &conv_code)) ++ { ++ /* Only few targets implement direct conversion patterns so try ++ some simple special cases via VEC_[UN]PACK[_FLOAT]_LO_EXPR. */ ++ optab optab; ++ tree halfvectype, dblvectype; ++ if (CONVERT_EXPR_CODE_P (conv_code) ++ && (2 * TYPE_PRECISION (TREE_TYPE (TREE_TYPE (orig[0]))) ++ == TYPE_PRECISION (TREE_TYPE (type))) ++ && mode_for_vector (as_a ++ (TYPE_MODE (TREE_TYPE (TREE_TYPE (orig[0])))), ++ nelts * 2).exists () ++ && (dblvectype ++ = build_vector_type (TREE_TYPE (TREE_TYPE (orig[0])), ++ nelts * 2)) ++ && (optab = optab_for_tree_code (FLOAT_TYPE_P (TREE_TYPE (type)) ++ ? VEC_UNPACK_FLOAT_LO_EXPR ++ : VEC_UNPACK_LO_EXPR, ++ dblvectype, ++ optab_default)) ++ && (optab_handler (optab, TYPE_MODE (dblvectype)) ++ != CODE_FOR_nothing)) ++ { ++ gimple_seq stmts = NULL; ++ tree dbl; ++ if (refnelts == nelts) ++ { ++ /* ??? Paradoxical subregs don't exist, so insert into ++ the lower half of a wider zero vector. */ ++ dbl = gimple_build (&stmts, BIT_INSERT_EXPR, dblvectype, ++ build_zero_cst (dblvectype), orig[0], ++ bitsize_zero_node); ++ } ++ else if (refnelts == 2 * nelts) ++ dbl = orig[0]; ++ else ++ dbl = gimple_build (&stmts, BIT_FIELD_REF, dblvectype, ++ orig[0], TYPE_SIZE (dblvectype), ++ bitsize_zero_node); ++ gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); ++ gimple_assign_set_rhs_with_ops (gsi, ++ FLOAT_TYPE_P (TREE_TYPE (type)) ++ ? VEC_UNPACK_FLOAT_LO_EXPR ++ : VEC_UNPACK_LO_EXPR, ++ dbl); ++ } ++ else if (CONVERT_EXPR_CODE_P (conv_code) ++ && (TYPE_PRECISION (TREE_TYPE (TREE_TYPE (orig[0]))) ++ == 2 * TYPE_PRECISION (TREE_TYPE (type))) ++ && mode_for_vector (as_a ++ (TYPE_MODE ++ (TREE_TYPE (TREE_TYPE (orig[0])))), ++ nelts / 2).exists () ++ && (halfvectype ++ = build_vector_type (TREE_TYPE (TREE_TYPE (orig[0])), ++ nelts / 2)) ++ && (optab = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, ++ halfvectype, ++ optab_default)) ++ && (optab_handler (optab, TYPE_MODE (halfvectype)) ++ != CODE_FOR_nothing)) ++ { ++ gimple_seq stmts = NULL; ++ tree low = gimple_build (&stmts, BIT_FIELD_REF, halfvectype, ++ orig[0], TYPE_SIZE (halfvectype), ++ bitsize_zero_node); ++ tree hig = gimple_build (&stmts, BIT_FIELD_REF, halfvectype, ++ orig[0], TYPE_SIZE (halfvectype), ++ TYPE_SIZE (halfvectype)); ++ gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); ++ gimple_assign_set_rhs_with_ops (gsi, VEC_PACK_TRUNC_EXPR, ++ low, hig); ++ } ++ else ++ return false; ++ update_stmt (gsi_stmt (*gsi)); ++ return true; ++ } ++ if (nelts != refnelts) ++ { ++ gassign *lowpart ++ = gimple_build_assign (make_ssa_name (conv_src_type), ++ build3 (BIT_FIELD_REF, conv_src_type, ++ orig[0], TYPE_SIZE (conv_src_type), ++ bitsize_zero_node)); ++ gsi_insert_before (gsi, lowpart, GSI_SAME_STMT); ++ orig[0] = gimple_assign_lhs (lowpart); ++ } + if (conv_code == ERROR_MARK) + gimple_assign_set_rhs_from_tree (gsi, orig[0]); + else +@@ -2152,54 +2252,119 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) + } + else + { +- tree mask_type; ++ tree mask_type, perm_type, conv_src_type; ++ perm_type = TREE_TYPE (orig[0]); ++ conv_src_type = (nelts == refnelts ++ ? perm_type ++ : build_vector_type (TREE_TYPE (perm_type), nelts)); ++ if (conv_code != ERROR_MARK ++ && !supportable_convert_operation (conv_code, type, conv_src_type, ++ &conv_code)) ++ return false; + +- vec_perm_indices indices (sel, orig[1] ? 2 : 1, nelts); +- if (!can_vec_perm_const_p (TYPE_MODE (type), indices)) ++ /* Now that we know the number of elements of the source build the ++ permute vector. ++ ??? When the second vector has constant values we can shuffle ++ it and its source indexes to make the permutation supported. ++ For now it mimics a blend. */ ++ vec_perm_builder sel (refnelts, refnelts, 1); ++ bool all_same_p = true; ++ for (i = 0; i < elts.length (); ++i) ++ { ++ sel.quick_push (elts[i].second + elts[i].first * refnelts); ++ all_same_p &= known_eq (sel[i], sel[0]); ++ } ++ /* And fill the tail with "something". It's really don't care, ++ and ideally we'd allow VEC_PERM to have a smaller destination ++ vector. As a heuristic: ++ ++ (a) if what we have so far duplicates a single element, make the ++ tail do the same ++ ++ (b) otherwise preserve a uniform orig[0]. This facilitates ++ later pattern-matching of VEC_PERM_EXPR to a BIT_INSERT_EXPR. */ ++ for (; i < refnelts; ++i) ++ sel.quick_push (all_same_p ++ ? sel[0] ++ : (elts[0].second == 0 && elts[0].first == 0 ++ ? 0 : refnelts) + i); ++ vec_perm_indices indices (sel, orig[1] ? 2 : 1, refnelts); ++ if (!can_vec_perm_const_p (TYPE_MODE (perm_type), indices)) + return false; + mask_type + = build_vector_type (build_nonstandard_integer_type (elem_size, 1), +- nelts); ++ refnelts); + if (GET_MODE_CLASS (TYPE_MODE (mask_type)) != MODE_VECTOR_INT + || maybe_ne (GET_MODE_SIZE (TYPE_MODE (mask_type)), +- GET_MODE_SIZE (TYPE_MODE (type)))) ++ GET_MODE_SIZE (TYPE_MODE (perm_type)))) + return false; +- op2 = vec_perm_indices_to_tree (mask_type, indices); ++ tree op2 = vec_perm_indices_to_tree (mask_type, indices); ++ bool converted_orig1 = false; ++ gimple_seq stmts = NULL; + if (!orig[1]) + orig[1] = orig[0]; +- if (orig[1] == error_mark_node) ++ else if (orig[1] == error_mark_node ++ && one_nonconstant) + { +- tree_vector_builder vec (type, nelts, 1); +- for (unsigned i = 0; i < nelts; ++i) +- if (constants[i]) ++ /* ??? We can see if we can safely convert to the original ++ element type. */ ++ converted_orig1 = conv_code != ERROR_MARK; ++ orig[1] = gimple_build_vector_from_val (&stmts, UNKNOWN_LOCATION, ++ converted_orig1 ++ ? type : perm_type, ++ one_nonconstant); ++ } ++ else if (orig[1] == error_mark_node) ++ { ++ /* ??? See if we can convert the vector to the original type. */ ++ converted_orig1 = conv_code != ERROR_MARK; ++ unsigned n = converted_orig1 ? nelts : refnelts; ++ tree_vector_builder vec (converted_orig1 ++ ? type : perm_type, n, 1); ++ for (unsigned i = 0; i < n; ++i) ++ if (i < nelts && constants[i]) + vec.quick_push (constants[i]); + else + /* ??? Push a don't-care value. */ + vec.quick_push (one_constant); + orig[1] = vec.build (); + } +- if (conv_code == ERROR_MARK) +- gimple_assign_set_rhs_with_ops (gsi, VEC_PERM_EXPR, orig[0], +- orig[1], op2); +- else if (TREE_CODE (orig[1]) == VECTOR_CST) ++ tree blend_op2 = NULL_TREE; ++ if (converted_orig1) + { +- gimple *conv +- = gimple_build_assign (make_ssa_name (type), conv_code, orig[0]); +- orig[0] = gimple_assign_lhs (conv); +- gsi_insert_before (gsi, conv, GSI_SAME_STMT); +- gimple_assign_set_rhs_with_ops (gsi, VEC_PERM_EXPR, +- orig[0], orig[1], op2); +- } +- else +- { +- gimple *perm +- = gimple_build_assign (make_ssa_name (TREE_TYPE (orig[0])), +- VEC_PERM_EXPR, orig[0], orig[1], op2); +- orig[0] = gimple_assign_lhs (perm); +- gsi_insert_before (gsi, perm, GSI_SAME_STMT); +- gimple_assign_set_rhs_with_ops (gsi, conv_code, orig[0], +- NULL_TREE, NULL_TREE); ++ /* Make sure we can do a blend in the target type. */ ++ vec_perm_builder sel (nelts, nelts, 1); ++ for (i = 0; i < elts.length (); ++i) ++ sel.quick_push (elts[i].first ++ ? elts[i].second + nelts : i); ++ vec_perm_indices indices (sel, 2, nelts); ++ if (!can_vec_perm_const_p (TYPE_MODE (type), indices)) ++ return false; ++ mask_type ++ = build_vector_type (build_nonstandard_integer_type (elem_size, 1), ++ nelts); ++ if (GET_MODE_CLASS (TYPE_MODE (mask_type)) != MODE_VECTOR_INT ++ || maybe_ne (GET_MODE_SIZE (TYPE_MODE (mask_type)), ++ GET_MODE_SIZE (TYPE_MODE (type)))) ++ return false; ++ blend_op2 = vec_perm_indices_to_tree (mask_type, indices); + } ++ tree orig1_for_perm ++ = converted_orig1 ? build_zero_cst (perm_type) : orig[1]; ++ tree res = gimple_build (&stmts, VEC_PERM_EXPR, perm_type, ++ orig[0], orig1_for_perm, op2); ++ if (nelts != refnelts) ++ res = gimple_build (&stmts, BIT_FIELD_REF, ++ conv_code != ERROR_MARK ? conv_src_type : type, ++ res, TYPE_SIZE (type), bitsize_zero_node); ++ if (conv_code != ERROR_MARK) ++ res = gimple_build (&stmts, conv_code, type, res); ++ /* Blend in the actual constant. */ ++ if (converted_orig1) ++ res = gimple_build (&stmts, VEC_PERM_EXPR, type, ++ res, orig[1], blend_op2); ++ gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); ++ gimple_assign_set_rhs_with_ops (gsi, SSA_NAME, res); + } + update_stmt (gsi_stmt (*gsi)); + return true; +@@ -2449,6 +2614,72 @@ pass_forwprop::execute (function *fun) + else + gsi_next (&gsi); + } ++ else if (TREE_CODE (TREE_TYPE (lhs)) == VECTOR_TYPE ++ && TYPE_MODE (TREE_TYPE (lhs)) == BLKmode ++ && gimple_assign_load_p (stmt) ++ && !gimple_has_volatile_ops (stmt) ++ && (TREE_CODE (gimple_assign_rhs1 (stmt)) ++ != TARGET_MEM_REF) ++ && !stmt_can_throw_internal (cfun, stmt)) ++ { ++ /* Rewrite loads used only in BIT_FIELD_REF extractions to ++ component-wise loads. */ ++ use_operand_p use_p; ++ imm_use_iterator iter; ++ bool rewrite = true; ++ FOR_EACH_IMM_USE_FAST (use_p, iter, lhs) ++ { ++ gimple *use_stmt = USE_STMT (use_p); ++ if (is_gimple_debug (use_stmt)) ++ continue; ++ if (!is_gimple_assign (use_stmt) ++ || gimple_assign_rhs_code (use_stmt) != BIT_FIELD_REF) ++ { ++ rewrite = false; ++ break; ++ } ++ } ++ if (rewrite) ++ { ++ gimple *use_stmt; ++ FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs) ++ { ++ if (is_gimple_debug (use_stmt)) ++ { ++ if (gimple_debug_bind_p (use_stmt)) ++ { ++ gimple_debug_bind_reset_value (use_stmt); ++ update_stmt (use_stmt); ++ } ++ continue; ++ } ++ ++ tree bfr = gimple_assign_rhs1 (use_stmt); ++ tree new_rhs = fold_build3 (BIT_FIELD_REF, ++ TREE_TYPE (bfr), ++ unshare_expr (rhs), ++ TREE_OPERAND (bfr, 1), ++ TREE_OPERAND (bfr, 2)); ++ gimple *new_stmt ++ = gimple_build_assign (gimple_assign_lhs (use_stmt), ++ new_rhs); ++ ++ location_t loc = gimple_location (use_stmt); ++ gimple_set_location (new_stmt, loc); ++ gimple_stmt_iterator gsi2 = gsi_for_stmt (use_stmt); ++ unlink_stmt_vdef (use_stmt); ++ gsi_remove (&gsi2, true); ++ ++ gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); ++ } ++ ++ release_defs (stmt); ++ gsi_remove (&gsi, true); ++ } ++ else ++ gsi_next (&gsi); ++ } ++ + else if (code == COMPLEX_EXPR) + { + /* Rewrite stores of a single-use complex build expression +@@ -2489,6 +2720,66 @@ pass_forwprop::execute (function *fun) + else + gsi_next (&gsi); + } ++ else if (code == CONSTRUCTOR ++ && VECTOR_TYPE_P (TREE_TYPE (rhs)) ++ && TYPE_MODE (TREE_TYPE (rhs)) == BLKmode ++ && CONSTRUCTOR_NELTS (rhs) > 0 ++ && (!VECTOR_TYPE_P (TREE_TYPE (CONSTRUCTOR_ELT (rhs, 0)->value)) ++ || (TYPE_MODE (TREE_TYPE (CONSTRUCTOR_ELT (rhs, 0)->value)) ++ != BLKmode))) ++ { ++ /* Rewrite stores of a single-use vector constructors ++ to component-wise stores if the mode isn't supported. */ ++ use_operand_p use_p; ++ gimple *use_stmt; ++ if (single_imm_use (lhs, &use_p, &use_stmt) ++ && gimple_store_p (use_stmt) ++ && !gimple_has_volatile_ops (use_stmt) ++ && !stmt_can_throw_internal (cfun, use_stmt) ++ && is_gimple_assign (use_stmt) ++ && (TREE_CODE (gimple_assign_lhs (use_stmt)) ++ != TARGET_MEM_REF)) ++ { ++ tree elt_t = TREE_TYPE (CONSTRUCTOR_ELT (rhs, 0)->value); ++ unsigned HOST_WIDE_INT elt_w ++ = tree_to_uhwi (TYPE_SIZE (elt_t)); ++ unsigned HOST_WIDE_INT n ++ = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs))); ++ for (unsigned HOST_WIDE_INT bi = 0; bi < n; bi += elt_w) ++ { ++ unsigned HOST_WIDE_INT ci = bi / elt_w; ++ tree new_rhs; ++ if (ci < CONSTRUCTOR_NELTS (rhs)) ++ new_rhs = CONSTRUCTOR_ELT (rhs, ci)->value; ++ else ++ new_rhs = build_zero_cst (elt_t); ++ tree use_lhs = gimple_assign_lhs (use_stmt); ++ tree new_lhs = build3 (BIT_FIELD_REF, ++ elt_t, ++ unshare_expr (use_lhs), ++ bitsize_int (elt_w), ++ bitsize_int (bi)); ++ gimple *new_stmt = gimple_build_assign (new_lhs, new_rhs); ++ location_t loc = gimple_location (use_stmt); ++ gimple_set_location (new_stmt, loc); ++ gimple_set_vuse (new_stmt, gimple_vuse (use_stmt)); ++ gimple_set_vdef (new_stmt, ++ make_ssa_name (gimple_vop (cfun))); ++ SSA_NAME_DEF_STMT (gimple_vdef (new_stmt)) = new_stmt; ++ gimple_set_vuse (use_stmt, gimple_vdef (new_stmt)); ++ gimple_stmt_iterator gsi2 = gsi_for_stmt (use_stmt); ++ gsi_insert_before (&gsi2, new_stmt, GSI_SAME_STMT); ++ } ++ gimple_stmt_iterator gsi2 = gsi_for_stmt (use_stmt); ++ unlink_stmt_vdef (use_stmt); ++ release_defs (use_stmt); ++ gsi_remove (&gsi2, true); ++ release_defs (stmt); ++ gsi_remove (&gsi, true); ++ } ++ else ++ gsi_next (&gsi); ++ } + else + gsi_next (&gsi); + } +diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c +index fec378490..695646764 100644 +--- a/gcc/tree-ssa-loop-ivopts.c ++++ b/gcc/tree-ssa-loop-ivopts.c +@@ -2461,11 +2461,13 @@ get_mem_type_for_internal_fn (gcall *call, tree *op_p) + switch (gimple_call_internal_fn (call)) + { + case IFN_MASK_LOAD: ++ case IFN_MASK_LOAD_LANES: + if (op_p == gimple_call_arg_ptr (call, 0)) + return TREE_TYPE (gimple_call_lhs (call)); + return NULL_TREE; + + case IFN_MASK_STORE: ++ case IFN_MASK_STORE_LANES: + if (op_p == gimple_call_arg_ptr (call, 0)) + return TREE_TYPE (gimple_call_arg (call, 3)); + return NULL_TREE; +@@ -3510,6 +3512,26 @@ add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use) + basetype = sizetype; + record_common_cand (data, build_int_cst (basetype, 0), iv->step, use); + ++ /* Compare the cost of an address with an unscaled index with the cost of ++ an address with a scaled index and add candidate if useful. */ ++ poly_int64 step; ++ if (use != NULL ++ && poly_int_tree_p (iv->step, &step) ++ && address_p (use->type)) ++ { ++ poly_int64 new_step; ++ unsigned int fact = preferred_mem_scale_factor ++ (use->iv->base, ++ TYPE_MODE (use->mem_type), ++ optimize_loop_for_speed_p (data->current_loop)); ++ ++ if (fact != 1 ++ && multiple_p (step, fact, &new_step)) ++ add_candidate (data, size_int (0), ++ wide_int_to_tree (sizetype, new_step), ++ true, NULL); ++ } ++ + /* Record common candidate with constant offset stripped in base. + Like the use itself, we also add candidate directly for it. */ + base = strip_offset (iv->base, &offset); +@@ -4036,6 +4058,94 @@ get_computation_at (struct loop *loop, gimple *at, + return fold_convert (type, aff_combination_to_tree (&aff)); + } + ++/* Like get_computation_at, but try harder, even if the computation ++ is more expensive. Intended for debug stmts. */ ++ ++static tree ++get_debug_computation_at (class loop *loop, gimple *at, ++ struct iv_use *use, struct iv_cand *cand) ++{ ++ if (tree ret = get_computation_at (loop, at, use, cand)) ++ return ret; ++ ++ tree ubase = use->iv->base, ustep = use->iv->step; ++ tree cbase = cand->iv->base, cstep = cand->iv->step; ++ tree var; ++ tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase); ++ widest_int rat; ++ ++ /* We must have a precision to express the values of use. */ ++ if (TYPE_PRECISION (utype) >= TYPE_PRECISION (ctype)) ++ return NULL_TREE; ++ ++ /* Try to handle the case that get_computation_at doesn't, ++ try to express ++ use = ubase + (var - cbase) / ratio. */ ++ if (!constant_multiple_of (cstep, fold_convert (TREE_TYPE (cstep), ustep), ++ &rat)) ++ return NULL_TREE; ++ ++ bool neg_p = false; ++ if (wi::neg_p (rat)) ++ { ++ if (TYPE_UNSIGNED (ctype)) ++ return NULL_TREE; ++ neg_p = true; ++ rat = wi::neg (rat); ++ } ++ ++ /* If both IVs can wrap around and CAND doesn't have a power of two step, ++ it is unsafe. Consider uint16_t CAND with step 9, when wrapping around, ++ the values will be ... 0xfff0, 0xfff9, 2, 11 ... and when use is say ++ uint8_t with step 3, those values divided by 3 cast to uint8_t will be ++ ... 0x50, 0x53, 0, 3 ... rather than expected 0x50, 0x53, 0x56, 0x59. */ ++ if (!use->iv->no_overflow ++ && !cand->iv->no_overflow ++ && !integer_pow2p (cstep)) ++ return NULL_TREE; ++ ++ int bits = wi::exact_log2 (rat); ++ if (bits == -1) ++ bits = wi::floor_log2 (rat) + 1; ++ if (!cand->iv->no_overflow ++ && TYPE_PRECISION (utype) + bits > TYPE_PRECISION (ctype)) ++ return NULL_TREE; ++ ++ var = var_at_stmt (loop, cand, at); ++ ++ if (POINTER_TYPE_P (ctype)) ++ { ++ ctype = unsigned_type_for (ctype); ++ cbase = fold_convert (ctype, cbase); ++ cstep = fold_convert (ctype, cstep); ++ var = fold_convert (ctype, var); ++ } ++ ++ ubase = unshare_expr (ubase); ++ cbase = unshare_expr (cbase); ++ if (stmt_after_increment (loop, cand, at)) ++ var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var, ++ unshare_expr (cstep)); ++ ++ var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var, cbase); ++ var = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (var), var, ++ wide_int_to_tree (TREE_TYPE (var), rat)); ++ if (POINTER_TYPE_P (utype)) ++ { ++ var = fold_convert (sizetype, var); ++ if (neg_p) ++ var = fold_build1 (NEGATE_EXPR, sizetype, var); ++ var = fold_build2 (POINTER_PLUS_EXPR, utype, ubase, var); ++ } ++ else ++ { ++ var = fold_convert (utype, var); ++ var = fold_build2 (neg_p ? MINUS_EXPR : PLUS_EXPR, utype, ++ ubase, var); ++ } ++ return var; ++} ++ + /* Adjust the cost COST for being in loop setup rather than loop body. + If we're optimizing for space, the loop setup overhead is constant; + if we're optimizing for speed, amortize it over the per-iteration cost. +@@ -7122,6 +7232,8 @@ get_alias_ptr_type_for_ptr_address (iv_use *use) + { + case IFN_MASK_LOAD: + case IFN_MASK_STORE: ++ case IFN_MASK_LOAD_LANES: ++ case IFN_MASK_STORE_LANES: + /* The second argument contains the correct alias type. */ + gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0)); + return TREE_TYPE (gimple_call_arg (call, 1)); +@@ -7339,6 +7451,7 @@ remove_unused_ivs (struct ivopts_data *data, bitmap toremove) + struct iv_use dummy_use; + struct iv_cand *best_cand = NULL, *cand; + unsigned i, best_pref = 0, cand_pref; ++ tree comp = NULL_TREE; + + memset (&dummy_use, 0, sizeof (dummy_use)); + dummy_use.iv = info->iv; +@@ -7359,20 +7472,22 @@ remove_unused_ivs (struct ivopts_data *data, bitmap toremove) + ? 1 : 0; + if (best_cand == NULL || best_pref < cand_pref) + { +- best_cand = cand; +- best_pref = cand_pref; ++ tree this_comp ++ = get_debug_computation_at (data->current_loop, ++ SSA_NAME_DEF_STMT (def), ++ &dummy_use, cand); ++ if (this_comp) ++ { ++ best_cand = cand; ++ best_pref = cand_pref; ++ comp = this_comp; ++ } + } + } + + if (!best_cand) + continue; + +- tree comp = get_computation_at (data->current_loop, +- SSA_NAME_DEF_STMT (def), +- &dummy_use, best_cand); +- if (!comp) +- continue; +- + if (count > 1) + { + tree vexpr = make_node (DEBUG_EXPR_DECL); +diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c +index 3dfda7a4f..8607a59d4 100644 +--- a/gcc/tree-ssa-math-opts.c ++++ b/gcc/tree-ssa-math-opts.c +@@ -1040,14 +1040,9 @@ pass_cse_reciprocals::execute (function *fun) + else + stmt2 = gimple_build_call_internal_vec (ifn, args); + gimple_call_set_lhs (stmt2, arg1); +- if (gimple_vdef (call)) +- { +- gimple_set_vdef (stmt2, gimple_vdef (call)); +- SSA_NAME_DEF_STMT (gimple_vdef (stmt2)) = stmt2; +- } ++ gimple_move_vops (stmt2, call); + gimple_call_set_nothrow (stmt2, + gimple_call_nothrow_p (call)); +- gimple_set_vuse (stmt2, gimple_vuse (call)); + gimple_stmt_iterator gsi2 = gsi_for_stmt (call); + gsi_replace (&gsi2, stmt2, true); + } +@@ -3048,6 +3043,8 @@ last_fma_candidate_feeds_initial_phi (fma_deferring_state *state, + /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2 + with uses in additions and subtractions to form fused multiply-add + operations. Returns true if successful and MUL_STMT should be removed. ++ If MUL_COND is nonnull, the multiplication in MUL_STMT is conditional ++ on MUL_COND, otherwise it is unconditional. + + If STATE indicates that we are deferring FMA transformation, that means + that we do not produce FMAs for basic blocks which look like: +@@ -3064,7 +3061,7 @@ last_fma_candidate_feeds_initial_phi (fma_deferring_state *state, + + static bool + convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2, +- fma_deferring_state *state) ++ fma_deferring_state *state, tree mul_cond = NULL_TREE) + { + tree mul_result = gimple_get_lhs (mul_stmt); + tree type = TREE_TYPE (mul_result); +@@ -3178,6 +3175,9 @@ convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2, + return false; + } + ++ if (mul_cond && cond != mul_cond) ++ return false; ++ + if (cond) + { + if (cond == result || else_value == result) +@@ -3789,38 +3789,48 @@ math_opts_dom_walker::after_dom_children (basic_block bb) + } + else if (is_gimple_call (stmt)) + { +- tree fndecl = gimple_call_fndecl (stmt); +- if (fndecl && gimple_call_builtin_p (stmt, BUILT_IN_NORMAL)) ++ switch (gimple_call_combined_fn (stmt)) + { +- switch (DECL_FUNCTION_CODE (fndecl)) ++ CASE_CFN_POW: ++ if (gimple_call_lhs (stmt) ++ && TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST ++ && real_equal (&TREE_REAL_CST (gimple_call_arg (stmt, 1)), ++ &dconst2) ++ && convert_mult_to_fma (stmt, ++ gimple_call_arg (stmt, 0), ++ gimple_call_arg (stmt, 0), ++ &fma_state)) + { +- case BUILT_IN_POWF: +- case BUILT_IN_POW: +- case BUILT_IN_POWL: +- if (gimple_call_lhs (stmt) +- && TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST +- && real_equal +- (&TREE_REAL_CST (gimple_call_arg (stmt, 1)), +- &dconst2) +- && convert_mult_to_fma (stmt, +- gimple_call_arg (stmt, 0), +- gimple_call_arg (stmt, 0), +- &fma_state)) +- { +- unlink_stmt_vdef (stmt); +- if (gsi_remove (&gsi, true) +- && gimple_purge_dead_eh_edges (bb)) +- *m_cfg_changed_p = true; +- release_defs (stmt); +- continue; +- } +- break; ++ unlink_stmt_vdef (stmt); ++ if (gsi_remove (&gsi, true) ++ && gimple_purge_dead_eh_edges (bb)) ++ *m_cfg_changed_p = true; ++ release_defs (stmt); ++ continue; ++ } ++ break; + +- default:; ++ case CFN_COND_MUL: ++ if (convert_mult_to_fma (stmt, ++ gimple_call_arg (stmt, 1), ++ gimple_call_arg (stmt, 2), ++ &fma_state, ++ gimple_call_arg (stmt, 0))) ++ ++ { ++ gsi_remove (&gsi, true); ++ release_defs (stmt); ++ continue; + } ++ break; ++ ++ case CFN_LAST: ++ cancel_fma_deferring (&fma_state); ++ break; ++ ++ default: ++ break; + } +- else +- cancel_fma_deferring (&fma_state); + } + gsi_next (&gsi); + } +diff --git a/gcc/tree-ssa-propagate.c b/gcc/tree-ssa-propagate.c +index 6b78dc1c0..0862f83e9 100644 +--- a/gcc/tree-ssa-propagate.c ++++ b/gcc/tree-ssa-propagate.c +@@ -625,8 +625,7 @@ finish_update_gimple_call (gimple_stmt_iterator *si_p, gimple *new_stmt, + { + gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt)); + move_ssa_defining_stmt_for_defs (new_stmt, stmt); +- gimple_set_vuse (new_stmt, gimple_vuse (stmt)); +- gimple_set_vdef (new_stmt, gimple_vdef (stmt)); ++ gimple_move_vops (new_stmt, stmt); + gimple_set_location (new_stmt, gimple_location (stmt)); + if (gimple_block (new_stmt) == NULL_TREE) + gimple_set_block (new_stmt, gimple_block (stmt)); +@@ -706,8 +705,7 @@ update_call_from_tree (gimple_stmt_iterator *si_p, tree expr) + STRIP_USELESS_TYPE_CONVERSION (expr); + new_stmt = gimple_build_assign (lhs, expr); + move_ssa_defining_stmt_for_defs (new_stmt, stmt); +- gimple_set_vuse (new_stmt, gimple_vuse (stmt)); +- gimple_set_vdef (new_stmt, gimple_vdef (stmt)); ++ gimple_move_vops (new_stmt, stmt); + } + else if (!TREE_SIDE_EFFECTS (expr)) + { +@@ -732,8 +730,7 @@ update_call_from_tree (gimple_stmt_iterator *si_p, tree expr) + else + lhs = create_tmp_var (TREE_TYPE (expr)); + new_stmt = gimple_build_assign (lhs, expr); +- gimple_set_vuse (new_stmt, gimple_vuse (stmt)); +- gimple_set_vdef (new_stmt, gimple_vdef (stmt)); ++ gimple_move_vops (new_stmt, stmt); + move_ssa_defining_stmt_for_defs (new_stmt, stmt); + } + gimple_set_location (new_stmt, gimple_location (stmt)); +diff --git a/gcc/tree-ssa-threadedge.c b/gcc/tree-ssa-threadedge.c +index 91494d761..096584062 100644 +--- a/gcc/tree-ssa-threadedge.c ++++ b/gcc/tree-ssa-threadedge.c +@@ -331,6 +331,7 @@ record_temporary_equivalences_from_stmts_at_dest (edge e, + { + tree fndecl = gimple_call_fndecl (stmt); + if (fndecl ++ && fndecl_built_in_p (fndecl, BUILT_IN_NORMAL) + && (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_OBJECT_SIZE + || DECL_FUNCTION_CODE (fndecl) == BUILT_IN_CONSTANT_P)) + continue; +diff --git a/gcc/tree-streamer-in.c b/gcc/tree-streamer-in.c +index f6d137316..eb3e174fc 100644 +--- a/gcc/tree-streamer-in.c ++++ b/gcc/tree-streamer-in.c +@@ -324,8 +324,7 @@ unpack_ts_decl_with_vis_value_fields (struct bitpack_d *bp, tree expr) + static void + unpack_ts_function_decl_value_fields (struct bitpack_d *bp, tree expr) + { +- DECL_BUILT_IN_CLASS (expr) = bp_unpack_enum (bp, built_in_class, +- BUILT_IN_LAST); ++ built_in_class cl = bp_unpack_enum (bp, built_in_class, BUILT_IN_LAST); + DECL_STATIC_CONSTRUCTOR (expr) = (unsigned) bp_unpack_value (bp, 1); + DECL_STATIC_DESTRUCTOR (expr) = (unsigned) bp_unpack_value (bp, 1); + DECL_UNINLINABLE (expr) = (unsigned) bp_unpack_value (bp, 1); +@@ -333,7 +332,7 @@ unpack_ts_function_decl_value_fields (struct bitpack_d *bp, tree expr) + DECL_IS_NOVOPS (expr) = (unsigned) bp_unpack_value (bp, 1); + DECL_IS_RETURNS_TWICE (expr) = (unsigned) bp_unpack_value (bp, 1); + DECL_IS_MALLOC (expr) = (unsigned) bp_unpack_value (bp, 1); +- DECL_IS_OPERATOR_NEW (expr) = (unsigned) bp_unpack_value (bp, 1); ++ DECL_SET_IS_OPERATOR_NEW (expr, (unsigned) bp_unpack_value (bp, 1)); + DECL_DECLARED_INLINE_P (expr) = (unsigned) bp_unpack_value (bp, 1); + DECL_STATIC_CHAIN (expr) = (unsigned) bp_unpack_value (bp, 1); + DECL_NO_INLINE_WARNING_P (expr) = (unsigned) bp_unpack_value (bp, 1); +@@ -343,22 +342,22 @@ unpack_ts_function_decl_value_fields (struct bitpack_d *bp, tree expr) + DECL_DISREGARD_INLINE_LIMITS (expr) = (unsigned) bp_unpack_value (bp, 1); + DECL_PURE_P (expr) = (unsigned) bp_unpack_value (bp, 1); + DECL_LOOPING_CONST_OR_PURE_P (expr) = (unsigned) bp_unpack_value (bp, 1); +- if (DECL_BUILT_IN_CLASS (expr) != NOT_BUILT_IN) ++ unsigned int fcode = 0; ++ if (cl != NOT_BUILT_IN) + { +- DECL_FUNCTION_CODE (expr) = (enum built_in_function) bp_unpack_value (bp, +- 12); +- if (DECL_BUILT_IN_CLASS (expr) == BUILT_IN_NORMAL +- && DECL_FUNCTION_CODE (expr) >= END_BUILTINS) ++ fcode = bp_unpack_value (bp, 32); ++ if (cl == BUILT_IN_NORMAL && fcode >= END_BUILTINS) + fatal_error (input_location, + "machine independent builtin code out of range"); +- else if (DECL_BUILT_IN_CLASS (expr) == BUILT_IN_MD) ++ else if (cl == BUILT_IN_MD) + { +- tree result = targetm.builtin_decl (DECL_FUNCTION_CODE (expr), true); ++ tree result = targetm.builtin_decl (fcode, true); + if (!result || result == error_mark_node) + fatal_error (input_location, + "target specific builtin not available"); + } + } ++ set_decl_built_in_function (expr, cl, fcode); + } + + +diff --git a/gcc/tree-streamer-out.c b/gcc/tree-streamer-out.c +index 3f619e830..12693f6f4 100644 +--- a/gcc/tree-streamer-out.c ++++ b/gcc/tree-streamer-out.c +@@ -295,7 +295,7 @@ pack_ts_function_decl_value_fields (struct bitpack_d *bp, tree expr) + bp_pack_value (bp, DECL_IS_NOVOPS (expr), 1); + bp_pack_value (bp, DECL_IS_RETURNS_TWICE (expr), 1); + bp_pack_value (bp, DECL_IS_MALLOC (expr), 1); +- bp_pack_value (bp, DECL_IS_OPERATOR_NEW (expr), 1); ++ bp_pack_value (bp, DECL_IS_OPERATOR_NEW_P (expr), 1); + bp_pack_value (bp, DECL_DECLARED_INLINE_P (expr), 1); + bp_pack_value (bp, DECL_STATIC_CHAIN (expr), 1); + bp_pack_value (bp, DECL_NO_INLINE_WARNING_P (expr), 1); +@@ -305,7 +305,7 @@ pack_ts_function_decl_value_fields (struct bitpack_d *bp, tree expr) + bp_pack_value (bp, DECL_PURE_P (expr), 1); + bp_pack_value (bp, DECL_LOOPING_CONST_OR_PURE_P (expr), 1); + if (DECL_BUILT_IN_CLASS (expr) != NOT_BUILT_IN) +- bp_pack_value (bp, DECL_FUNCTION_CODE (expr), 12); ++ bp_pack_value (bp, DECL_UNCHECKED_FUNCTION_CODE (expr), 32); + } + + +diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c +index 39bc2a82b..8d97deaf2 100644 +--- a/gcc/tree-vect-generic.c ++++ b/gcc/tree-vect-generic.c +@@ -1671,7 +1671,6 @@ expand_vector_conversion (gimple_stmt_iterator *gsi) + gimple *g; + tree lhs = gimple_call_lhs (stmt); + tree arg = gimple_call_arg (stmt, 0); +- tree decl = NULL_TREE; + tree ret_type = TREE_TYPE (lhs); + tree arg_type = TREE_TYPE (arg); + tree new_rhs, compute_type = TREE_TYPE (arg_type); +@@ -1698,16 +1697,9 @@ expand_vector_conversion (gimple_stmt_iterator *gsi) + + if (modifier == NONE && (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)) + { +- if (supportable_convert_operation (code, ret_type, arg_type, &decl, +- &code1)) ++ if (supportable_convert_operation (code, ret_type, arg_type, &code1)) + { +- if (code1 == CALL_EXPR) +- { +- g = gimple_build_call (decl, 1, arg); +- gimple_call_set_lhs (g, lhs); +- } +- else +- g = gimple_build_assign (lhs, code1, arg); ++ g = gimple_build_assign (lhs, code1, arg); + gsi_replace (gsi, g, false); + return; + } +@@ -1726,11 +1718,11 @@ expand_vector_conversion (gimple_stmt_iterator *gsi) + tree ret1_type = build_vector_type (TREE_TYPE (ret_type), nelts); + tree arg1_type = build_vector_type (TREE_TYPE (arg_type), nelts); + if (supportable_convert_operation (code, ret1_type, arg1_type, +- &decl, &code1)) ++ &code1)) + { + new_rhs = expand_vector_piecewise (gsi, do_vec_conversion, + ret_type, arg1_type, arg, +- decl, code1); ++ NULL_TREE, code1); + g = gimple_build_assign (lhs, new_rhs); + gsi_replace (gsi, g, false); + return; +diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c +index 85be01748..b76728452 100644 +--- a/gcc/tree-vect-loop.c ++++ b/gcc/tree-vect-loop.c +@@ -5581,6 +5581,30 @@ vect_expand_fold_left (gimple_stmt_iterator *gsi, tree scalar_dest, + return lhs; + } + ++/* Get a masked internal function equivalent to REDUC_FN. VECTYPE_IN is the ++ type of the vector input. */ ++ ++static internal_fn ++get_masked_reduction_fn (internal_fn reduc_fn, tree vectype_in) ++{ ++ internal_fn mask_reduc_fn; ++ ++ switch (reduc_fn) ++ { ++ case IFN_FOLD_LEFT_PLUS: ++ mask_reduc_fn = IFN_MASK_FOLD_LEFT_PLUS; ++ break; ++ ++ default: ++ return IFN_LAST; ++ } ++ ++ if (direct_internal_fn_supported_p (mask_reduc_fn, vectype_in, ++ OPTIMIZE_FOR_SPEED)) ++ return mask_reduc_fn; ++ return IFN_LAST; ++} ++ + /* Perform an in-order reduction (FOLD_LEFT_REDUCTION). STMT_INFO is the + statement that sets the live-out value. REDUC_DEF_STMT is the phi + statement. CODE is the operation performed by STMT_INFO and OPS are +@@ -5603,6 +5627,7 @@ vectorize_fold_left_reduction (stmt_vec_info stmt_info, + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + tree vectype_out = STMT_VINFO_VECTYPE (stmt_info); + stmt_vec_info new_stmt_info = NULL; ++ internal_fn mask_reduc_fn = get_masked_reduction_fn (reduc_fn, vectype_in); + + int ncopies; + if (slp_node) +@@ -5673,16 +5698,21 @@ vectorize_fold_left_reduction (stmt_vec_info stmt_info, + def0 = negated; + } + +- if (mask) ++ if (mask && mask_reduc_fn == IFN_LAST) + def0 = merge_with_identity (gsi, mask, vectype_out, def0, + vector_identity); + + /* On the first iteration the input is simply the scalar phi + result, and for subsequent iterations it is the output of + the preceding operation. */ +- if (reduc_fn != IFN_LAST) ++ if (reduc_fn != IFN_LAST || (mask && mask_reduc_fn != IFN_LAST)) + { +- new_stmt = gimple_build_call_internal (reduc_fn, 2, reduc_var, def0); ++ if (mask && mask_reduc_fn != IFN_LAST) ++ new_stmt = gimple_build_call_internal (mask_reduc_fn, 3, reduc_var, ++ def0, mask); ++ else ++ new_stmt = gimple_build_call_internal (reduc_fn, 2, reduc_var, ++ def0); + /* For chained SLP reductions the output of the previous reduction + operation serves as the input of the next. For the final statement + the output cannot be a temporary - we reuse the original +@@ -5782,6 +5812,7 @@ use_mask_by_cond_expr_p (enum tree_code code, internal_fn cond_fn, + switch (code) + { + case DOT_PROD_EXPR: ++ case SAD_EXPR: + return true; + + default: +@@ -5811,6 +5842,17 @@ build_vect_cond_expr (enum tree_code code, tree vop[3], tree mask, + break; + } + ++ case SAD_EXPR: ++ { ++ tree vectype = TREE_TYPE (vop[1]); ++ tree masked_op1 = make_temp_ssa_name (vectype, NULL, "masked_op1"); ++ gassign *select = gimple_build_assign (masked_op1, VEC_COND_EXPR, ++ mask, vop[1], vop[0]); ++ gsi_insert_before (gsi, select, GSI_SAME_STMT); ++ vop[1] = masked_op1; ++ break; ++ } ++ + default: + gcc_unreachable (); + } +diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c +index 026148cc4..99df38711 100644 +--- a/gcc/tree-vect-patterns.c ++++ b/gcc/tree-vect-patterns.c +@@ -1302,7 +1302,7 @@ vect_recog_pow_pattern (stmt_vec_info stmt_vinfo, tree *type_out) + { + if (flag_unsafe_math_optimizations + && TREE_CODE (base) == REAL_CST +- && !gimple_call_internal_p (last_stmt)) ++ && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL)) + { + combined_fn log_cfn; + built_in_function exp_bfn; +@@ -1728,6 +1728,175 @@ vect_recog_over_widening_pattern (stmt_vec_info last_stmt_info, tree *type_out) + return pattern_stmt; + } + ++/* Recognize the following patterns: ++ ++ ATYPE a; // narrower than TYPE ++ BTYPE b; // narrower than TYPE ++ ++ 1) Multiply high with scaling ++ TYPE res = ((TYPE) a * (TYPE) b) >> c; ++ 2) ... or also with rounding ++ TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1; ++ ++ where only the bottom half of res is used. */ ++ ++static gimple * ++vect_recog_mulhs_pattern (stmt_vec_info last_stmt_info, tree *type_out) ++{ ++ /* Check for a right shift. */ ++ gassign *last_stmt = dyn_cast (last_stmt_info->stmt); ++ if (!last_stmt ++ || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR) ++ return NULL; ++ vec_info *vinfo = last_stmt_info->vinfo; ++ ++ /* Check that the shift result is wider than the users of the ++ result need (i.e. that narrowing would be a natural choice). */ ++ tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt)); ++ unsigned int target_precision ++ = vect_element_precision (last_stmt_info->min_output_precision); ++ if (!INTEGRAL_TYPE_P (lhs_type) ++ || target_precision >= TYPE_PRECISION (lhs_type)) ++ return NULL; ++ ++ /* Look through any change in sign on the outer shift input. */ ++ vect_unpromoted_value unprom_rshift_input; ++ tree rshift_input = vect_look_through_possible_promotion ++ (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input); ++ if (!rshift_input ++ || TYPE_PRECISION (TREE_TYPE (rshift_input)) ++ != TYPE_PRECISION (lhs_type)) ++ return NULL; ++ ++ /* Get the definition of the shift input. */ ++ stmt_vec_info rshift_input_stmt_info ++ = vect_get_internal_def (vinfo, rshift_input); ++ if (!rshift_input_stmt_info) ++ return NULL; ++ gassign *rshift_input_stmt ++ = dyn_cast (rshift_input_stmt_info->stmt); ++ if (!rshift_input_stmt) ++ return NULL; ++ ++ stmt_vec_info mulh_stmt_info; ++ tree scale_term; ++ internal_fn ifn; ++ unsigned int expect_offset; ++ ++ /* Check for the presence of the rounding term. */ ++ if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR) ++ { ++ /* Check that the outer shift was by 1. */ ++ if (!integer_onep (gimple_assign_rhs2 (last_stmt))) ++ return NULL; ++ ++ /* Check that the second operand of the PLUS_EXPR is 1. */ ++ if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt))) ++ return NULL; ++ ++ /* Look through any change in sign on the addition input. */ ++ vect_unpromoted_value unprom_plus_input; ++ tree plus_input = vect_look_through_possible_promotion ++ (vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input); ++ if (!plus_input ++ || TYPE_PRECISION (TREE_TYPE (plus_input)) ++ != TYPE_PRECISION (TREE_TYPE (rshift_input))) ++ return NULL; ++ ++ /* Get the definition of the multiply-high-scale part. */ ++ stmt_vec_info plus_input_stmt_info ++ = vect_get_internal_def (vinfo, plus_input); ++ if (!plus_input_stmt_info) ++ return NULL; ++ gassign *plus_input_stmt ++ = dyn_cast (plus_input_stmt_info->stmt); ++ if (!plus_input_stmt ++ || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR) ++ return NULL; ++ ++ /* Look through any change in sign on the scaling input. */ ++ vect_unpromoted_value unprom_scale_input; ++ tree scale_input = vect_look_through_possible_promotion ++ (vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input); ++ if (!scale_input ++ || TYPE_PRECISION (TREE_TYPE (scale_input)) ++ != TYPE_PRECISION (TREE_TYPE (plus_input))) ++ return NULL; ++ ++ /* Get the definition of the multiply-high part. */ ++ mulh_stmt_info = vect_get_internal_def (vinfo, scale_input); ++ if (!mulh_stmt_info) ++ return NULL; ++ ++ /* Get the scaling term. */ ++ scale_term = gimple_assign_rhs2 (plus_input_stmt); ++ ++ expect_offset = target_precision + 2; ++ ifn = IFN_MULHRS; ++ } ++ else ++ { ++ mulh_stmt_info = rshift_input_stmt_info; ++ scale_term = gimple_assign_rhs2 (last_stmt); ++ ++ expect_offset = target_precision + 1; ++ ifn = IFN_MULHS; ++ } ++ ++ /* Check that the scaling factor is correct. */ ++ if (TREE_CODE (scale_term) != INTEGER_CST ++ || wi::to_widest (scale_term) + expect_offset ++ != TYPE_PRECISION (lhs_type)) ++ return NULL; ++ ++ /* Check whether the scaling input term can be seen as two widened ++ inputs multiplied together. */ ++ vect_unpromoted_value unprom_mult[2]; ++ tree new_type; ++ unsigned int nops ++ = vect_widened_op_tree (mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR, ++ false, 2, unprom_mult, &new_type); ++ if (nops != 2) ++ return NULL; ++ ++ vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt); ++ ++ /* Adjust output precision. */ ++ if (TYPE_PRECISION (new_type) < target_precision) ++ new_type = build_nonstandard_integer_type ++ (target_precision, TYPE_UNSIGNED (new_type)); ++ ++ /* Check for target support. */ ++ tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type); ++ if (!new_vectype ++ || !direct_internal_fn_supported_p ++ (ifn, new_vectype, OPTIMIZE_FOR_SPEED)) ++ return NULL; ++ ++ /* The IR requires a valid vector type for the cast result, even though ++ it's likely to be discarded. */ ++ *type_out = get_vectype_for_scalar_type (vinfo, lhs_type); ++ if (!*type_out) ++ return NULL; ++ ++ /* Generate the IFN_MULHRS call. */ ++ tree new_var = vect_recog_temp_ssa_var (new_type, NULL); ++ tree new_ops[2]; ++ vect_convert_inputs (last_stmt_info, 2, new_ops, new_type, ++ unprom_mult, new_vectype); ++ gcall *mulhrs_stmt ++ = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]); ++ gimple_call_set_lhs (mulhrs_stmt, new_var); ++ gimple_set_location (mulhrs_stmt, gimple_location (last_stmt)); ++ ++ if (dump_enabled_p ()) ++ dump_printf_loc (MSG_NOTE, vect_location, ++ "created pattern stmt: %G", mulhrs_stmt); ++ ++ return vect_convert_output (last_stmt_info, lhs_type, ++ mulhrs_stmt, new_vectype); ++} ++ + /* Recognize the patterns: + + ATYPE a; // narrower than TYPE +@@ -2872,6 +3041,37 @@ vect_recog_divmod_pattern (stmt_vec_info stmt_vinfo, tree *type_out) + /* Pattern detected. */ + vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt); + ++ *type_out = vectype; ++ ++ /* Check if the target supports this internal function. */ ++ internal_fn ifn = IFN_DIV_POW2; ++ if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED)) ++ { ++ tree shift = build_int_cst (itype, tree_log2 (oprnd1)); ++ ++ tree var_div = vect_recog_temp_ssa_var (itype, NULL); ++ gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift); ++ gimple_call_set_lhs (div_stmt, var_div); ++ ++ if (rhs_code == TRUNC_MOD_EXPR) ++ { ++ append_pattern_def_seq (stmt_vinfo, div_stmt); ++ def_stmt ++ = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL), ++ LSHIFT_EXPR, var_div, shift); ++ append_pattern_def_seq (stmt_vinfo, def_stmt); ++ pattern_stmt ++ = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL), ++ MINUS_EXPR, oprnd0, ++ gimple_assign_lhs (def_stmt)); ++ } ++ else ++ pattern_stmt = div_stmt; ++ gimple_set_location (pattern_stmt, gimple_location (last_stmt)); ++ ++ return pattern_stmt; ++ } ++ + cond = build2 (LT_EXPR, boolean_type_node, oprnd0, + build_int_cst (itype, 0)); + if (rhs_code == TRUNC_DIV_EXPR +@@ -2948,7 +3148,6 @@ vect_recog_divmod_pattern (stmt_vec_info stmt_vinfo, tree *type_out) + signmask); + } + +- *type_out = vectype; + return pattern_stmt; + } + +@@ -4875,6 +5074,7 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = { + /* Must come after over_widening, which narrows the shift as much as + possible beforehand. */ + { vect_recog_average_pattern, "average" }, ++ { vect_recog_mulhs_pattern, "mult_high" }, + { vect_recog_cast_forwprop_pattern, "cast_forwprop" }, + { vect_recog_widen_mult_pattern, "widen_mult" }, + { vect_recog_dot_prod_pattern, "dot_prod" }, +diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c +index 82b868926..68a9f7574 100644 +--- a/gcc/tree-vect-stmts.c ++++ b/gcc/tree-vect-stmts.c +@@ -4497,7 +4497,6 @@ vectorizable_simd_clone_call (stmt_vec_info stmt_info, + + static gimple * + vect_gen_widened_results_half (enum tree_code code, +- tree decl, + tree vec_oprnd0, tree vec_oprnd1, int op_type, + tree vec_dest, gimple_stmt_iterator *gsi, + stmt_vec_info stmt_info) +@@ -4506,26 +4505,12 @@ vect_gen_widened_results_half (enum tree_code code, + tree new_temp; + + /* Generate half of the widened result: */ +- if (code == CALL_EXPR) +- { +- /* Target specific support */ +- if (op_type == binary_op) +- new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1); +- else +- new_stmt = gimple_build_call (decl, 1, vec_oprnd0); +- new_temp = make_ssa_name (vec_dest, new_stmt); +- gimple_call_set_lhs (new_stmt, new_temp); +- } +- else +- { +- /* Generic support */ +- gcc_assert (op_type == TREE_CODE_LENGTH (code)); +- if (op_type != binary_op) +- vec_oprnd1 = NULL; +- new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1); +- new_temp = make_ssa_name (vec_dest, new_stmt); +- gimple_assign_set_lhs (new_stmt, new_temp); +- } ++ gcc_assert (op_type == TREE_CODE_LENGTH (code)); ++ if (op_type != binary_op) ++ vec_oprnd1 = NULL; ++ new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1); ++ new_temp = make_ssa_name (vec_dest, new_stmt); ++ gimple_assign_set_lhs (new_stmt, new_temp); + vect_finish_stmt_generation (stmt_info, new_stmt, gsi); + + return new_stmt; +@@ -4651,8 +4636,7 @@ vect_create_vectorized_promotion_stmts (vec *vec_oprnds0, + stmt_vec_info stmt_info, tree vec_dest, + gimple_stmt_iterator *gsi, + enum tree_code code1, +- enum tree_code code2, tree decl1, +- tree decl2, int op_type) ++ enum tree_code code2, int op_type) + { + int i; + tree vop0, vop1, new_tmp1, new_tmp2; +@@ -4668,10 +4652,10 @@ vect_create_vectorized_promotion_stmts (vec *vec_oprnds0, + vop1 = NULL_TREE; + + /* Generate the two halves of promotion operation. */ +- new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1, ++ new_stmt1 = vect_gen_widened_results_half (code1, vop0, vop1, + op_type, vec_dest, gsi, + stmt_info); +- new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1, ++ new_stmt2 = vect_gen_widened_results_half (code2, vop0, vop1, + op_type, vec_dest, gsi, + stmt_info); + if (is_gimple_call (new_stmt1)) +@@ -4712,7 +4696,6 @@ vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, + loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK; + enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK; +- tree decl1 = NULL_TREE, decl2 = NULL_TREE; + tree new_temp; + enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; + int ndts = 2; +@@ -4883,8 +4866,7 @@ vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, + && code != FLOAT_EXPR + && !CONVERT_EXPR_CODE_P (code)) + return false; +- if (supportable_convert_operation (code, vectype_out, vectype_in, +- &decl1, &code1)) ++ if (supportable_convert_operation (code, vectype_out, vectype_in, &code1)) + break; + /* FALLTHRU */ + unsupported: +@@ -4924,7 +4906,7 @@ vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, + if (GET_MODE_SIZE (rhs_mode) == fltsz) + { + if (!supportable_convert_operation (code, vectype_out, +- cvt_type, &decl1, &codecvt1)) ++ cvt_type, &codecvt1)) + goto unsupported; + } + else if (!supportable_widening_operation (code, stmt_info, +@@ -4975,7 +4957,7 @@ vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, + if (cvt_type == NULL_TREE) + goto unsupported; + if (!supportable_convert_operation (code, cvt_type, vectype_in, +- &decl1, &codecvt1)) ++ &codecvt1)) + goto unsupported; + if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type, + &code1, &multi_step_cvt, +@@ -5084,24 +5066,12 @@ vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, + { + stmt_vec_info new_stmt_info; + /* Arguments are ready, create the new vector stmt. */ +- if (code1 == CALL_EXPR) +- { +- gcall *new_stmt = gimple_build_call (decl1, 1, vop0); +- new_temp = make_ssa_name (vec_dest, new_stmt); +- gimple_call_set_lhs (new_stmt, new_temp); +- new_stmt_info +- = vect_finish_stmt_generation (stmt_info, new_stmt, gsi); +- } +- else +- { +- gcc_assert (TREE_CODE_LENGTH (code1) == unary_op); +- gassign *new_stmt +- = gimple_build_assign (vec_dest, code1, vop0); +- new_temp = make_ssa_name (vec_dest, new_stmt); +- gimple_assign_set_lhs (new_stmt, new_temp); +- new_stmt_info +- = vect_finish_stmt_generation (stmt_info, new_stmt, gsi); +- } ++ gcc_assert (TREE_CODE_LENGTH (code1) == unary_op); ++ gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0); ++ new_temp = make_ssa_name (vec_dest, new_stmt); ++ gimple_assign_set_lhs (new_stmt, new_temp); ++ new_stmt_info ++ = vect_finish_stmt_generation (stmt_info, new_stmt, gsi); + + if (slp_node) + SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info); +@@ -5193,8 +5163,7 @@ vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, + vect_create_vectorized_promotion_stmts (&vec_oprnds0, + &vec_oprnds1, stmt_info, + this_dest, gsi, +- c1, c2, decl1, decl2, +- op_type); ++ c1, c2, op_type); + } + + FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0) +@@ -5202,25 +5171,12 @@ vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, + stmt_vec_info new_stmt_info; + if (cvt_type) + { +- if (codecvt1 == CALL_EXPR) +- { +- gcall *new_stmt = gimple_build_call (decl1, 1, vop0); +- new_temp = make_ssa_name (vec_dest, new_stmt); +- gimple_call_set_lhs (new_stmt, new_temp); +- new_stmt_info +- = vect_finish_stmt_generation (stmt_info, new_stmt, +- gsi); +- } +- else +- { +- gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op); +- new_temp = make_ssa_name (vec_dest); +- gassign *new_stmt +- = gimple_build_assign (new_temp, codecvt1, vop0); +- new_stmt_info +- = vect_finish_stmt_generation (stmt_info, new_stmt, +- gsi); +- } ++ gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op); ++ new_temp = make_ssa_name (vec_dest); ++ gassign *new_stmt ++ = gimple_build_assign (new_temp, codecvt1, vop0); ++ new_stmt_info ++ = vect_finish_stmt_generation (stmt_info, new_stmt, gsi); + } + else + new_stmt_info = vinfo->lookup_def (vop0); +@@ -5263,22 +5219,11 @@ vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, + if (cvt_type) + FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0) + { +- if (codecvt1 == CALL_EXPR) +- { +- gcall *new_stmt = gimple_build_call (decl1, 1, vop0); +- new_temp = make_ssa_name (vec_dest, new_stmt); +- gimple_call_set_lhs (new_stmt, new_temp); +- vect_finish_stmt_generation (stmt_info, new_stmt, gsi); +- } +- else +- { +- gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op); +- new_temp = make_ssa_name (vec_dest); +- gassign *new_stmt +- = gimple_build_assign (new_temp, codecvt1, vop0); +- vect_finish_stmt_generation (stmt_info, new_stmt, gsi); +- } +- ++ gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op); ++ new_temp = make_ssa_name (vec_dest); ++ gassign *new_stmt ++ = gimple_build_assign (new_temp, codecvt1, vop0); ++ vect_finish_stmt_generation (stmt_info, new_stmt, gsi); + vec_oprnds0[i] = new_temp; + } + +@@ -8774,8 +8719,7 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, + new_stmt = gimple_build_assign (vec_dest, data_ref); + new_temp = make_ssa_name (vec_dest, new_stmt); + gimple_assign_set_lhs (new_stmt, new_temp); +- gimple_set_vdef (new_stmt, gimple_vdef (stmt_info->stmt)); +- gimple_set_vuse (new_stmt, gimple_vuse (stmt_info->stmt)); ++ gimple_move_vops (new_stmt, stmt_info->stmt); + vect_finish_stmt_generation (stmt_info, new_stmt, gsi); + msq = new_temp; + +diff --git a/gcc/tree-vector-builder.c b/gcc/tree-vector-builder.c +index f31dc13b4..d02fb950c 100644 +--- a/gcc/tree-vector-builder.c ++++ b/gcc/tree-vector-builder.c +@@ -24,103 +24,6 @@ along with GCC; see the file COPYING3. If not see + #include "fold-const.h" + #include "tree-vector-builder.h" + +-/* Try to start building a new vector of type TYPE that holds the result of +- a unary operation on VECTOR_CST T. ALLOW_STEPPED_P is true if the +- operation can handle stepped encodings directly, without having to +- expand the full sequence. +- +- Return true if the operation is possible, which it always is when +- ALLOW_STEPPED_P is true. Leave the builder unchanged otherwise. */ +- +-bool +-tree_vector_builder::new_unary_operation (tree type, tree t, +- bool allow_stepped_p) +-{ +- poly_uint64 full_nelts = TYPE_VECTOR_SUBPARTS (type); +- gcc_assert (known_eq (full_nelts, TYPE_VECTOR_SUBPARTS (TREE_TYPE (t)))); +- unsigned int npatterns = VECTOR_CST_NPATTERNS (t); +- unsigned int nelts_per_pattern = VECTOR_CST_NELTS_PER_PATTERN (t); +- if (!allow_stepped_p && nelts_per_pattern > 2) +- { +- if (!full_nelts.is_constant ()) +- return false; +- npatterns = full_nelts.to_constant (); +- nelts_per_pattern = 1; +- } +- new_vector (type, npatterns, nelts_per_pattern); +- return true; +-} +- +-/* Try to start building a new vector of type TYPE that holds the result of +- a binary operation on VECTOR_CSTs T1 and T2. ALLOW_STEPPED_P is true if +- the operation can handle stepped encodings directly, without having to +- expand the full sequence. +- +- Return true if the operation is possible. Leave the builder unchanged +- otherwise. */ +- +-bool +-tree_vector_builder::new_binary_operation (tree type, tree t1, tree t2, +- bool allow_stepped_p) +-{ +- poly_uint64 full_nelts = TYPE_VECTOR_SUBPARTS (type); +- gcc_assert (known_eq (full_nelts, TYPE_VECTOR_SUBPARTS (TREE_TYPE (t1))) +- && known_eq (full_nelts, TYPE_VECTOR_SUBPARTS (TREE_TYPE (t2)))); +- /* Conceptually we split the patterns in T1 and T2 until we have +- an equal number for both. Each split pattern requires the same +- number of elements per pattern as the original. E.g. splitting: +- +- { 1, 2, 3, ... } +- +- into two gives: +- +- { 1, 3, 5, ... } +- { 2, 4, 6, ... } +- +- while splitting: +- +- { 1, 0, ... } +- +- into two gives: +- +- { 1, 0, ... } +- { 0, 0, ... }. */ +- unsigned int npatterns = least_common_multiple (VECTOR_CST_NPATTERNS (t1), +- VECTOR_CST_NPATTERNS (t2)); +- unsigned int nelts_per_pattern = MAX (VECTOR_CST_NELTS_PER_PATTERN (t1), +- VECTOR_CST_NELTS_PER_PATTERN (t2)); +- if (!allow_stepped_p && nelts_per_pattern > 2) +- { +- if (!full_nelts.is_constant ()) +- return false; +- npatterns = full_nelts.to_constant (); +- nelts_per_pattern = 1; +- } +- new_vector (type, npatterns, nelts_per_pattern); +- return true; +-} +- +-/* Return the number of elements that the caller needs to operate on in +- order to handle a binary operation on VECTOR_CSTs T1 and T2. This static +- function is used instead of new_binary_operation if the result of the +- operation is not a VECTOR_CST. */ +- +-unsigned int +-tree_vector_builder::binary_encoded_nelts (tree t1, tree t2) +-{ +- poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (t1)); +- gcc_assert (known_eq (nelts, TYPE_VECTOR_SUBPARTS (TREE_TYPE (t2)))); +- /* See new_binary_operation for details. */ +- unsigned int npatterns = least_common_multiple (VECTOR_CST_NPATTERNS (t1), +- VECTOR_CST_NPATTERNS (t2)); +- unsigned int nelts_per_pattern = MAX (VECTOR_CST_NELTS_PER_PATTERN (t1), +- VECTOR_CST_NELTS_PER_PATTERN (t2)); +- unsigned HOST_WIDE_INT const_nelts; +- if (nelts.is_constant (&const_nelts)) +- return MIN (npatterns * nelts_per_pattern, const_nelts); +- return npatterns * nelts_per_pattern; +-} +- + /* Return a vector element with the value BASE + FACTOR * STEP. */ + + tree +diff --git a/gcc/tree-vector-builder.h b/gcc/tree-vector-builder.h +index 13af74ad8..add79e476 100644 +--- a/gcc/tree-vector-builder.h ++++ b/gcc/tree-vector-builder.h +@@ -24,10 +24,11 @@ along with GCC; see the file COPYING3. If not see + + /* This class is used to build VECTOR_CSTs from a sequence of elements. + See vector_builder for more details. */ +-class tree_vector_builder : public vector_builder ++class tree_vector_builder : public vector_builder + { +- typedef vector_builder parent; +- friend class vector_builder; ++ typedef vector_builder parent; ++ friend class vector_builder; + + public: + tree_vector_builder () : m_type (0) {} +@@ -37,10 +38,6 @@ public: + tree type () const { return m_type; } + + void new_vector (tree, unsigned int, unsigned int); +- bool new_unary_operation (tree, tree, bool); +- bool new_binary_operation (tree, tree, tree, bool); +- +- static unsigned int binary_encoded_nelts (tree, tree); + + private: + bool equal_p (const_tree, const_tree) const; +@@ -51,6 +48,15 @@ private: + bool can_elide_p (const_tree) const; + void note_representative (tree *, tree); + ++ static poly_uint64 shape_nelts (const_tree t) ++ { return TYPE_VECTOR_SUBPARTS (t); } ++ static poly_uint64 nelts_of (const_tree t) ++ { return VECTOR_CST_NELTS (t); } ++ static unsigned int npatterns_of (const_tree t) ++ { return VECTOR_CST_NPATTERNS (t); } ++ static unsigned int nelts_per_pattern_of (const_tree t) ++ { return VECTOR_CST_NELTS_PER_PATTERN (t); } ++ + tree m_type; + }; + +diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c +index c2c6377d3..71ca80937 100644 +--- a/gcc/tree-vectorizer.c ++++ b/gcc/tree-vectorizer.c +@@ -288,10 +288,7 @@ adjust_simduid_builtins (hash_table *htab) + : BUILT_IN_GOMP_ORDERED_END); + gimple *g + = gimple_build_call (builtin_decl_explicit (bcode), 0); +- tree vdef = gimple_vdef (stmt); +- gimple_set_vdef (g, vdef); +- SSA_NAME_DEF_STMT (vdef) = g; +- gimple_set_vuse (g, gimple_vuse (stmt)); ++ gimple_move_vops (g, stmt); + gsi_replace (&i, g, true); + continue; + } +diff --git a/gcc/tree.c b/gcc/tree.c +index c4b8eea67..62607c63a 100644 +--- a/gcc/tree.c ++++ b/gcc/tree.c +@@ -1965,6 +1965,23 @@ build_index_vector (tree vec_type, poly_uint64 base, poly_uint64 step) + return v.build (); + } + ++/* Return a VECTOR_CST of type VEC_TYPE in which the first NUM_A ++ elements are A and the rest are B. */ ++ ++tree ++build_vector_a_then_b (tree vec_type, unsigned int num_a, tree a, tree b) ++{ ++ gcc_assert (known_le (num_a, TYPE_VECTOR_SUBPARTS (vec_type))); ++ unsigned int count = constant_lower_bound (TYPE_VECTOR_SUBPARTS (vec_type)); ++ /* Optimize the constant case. */ ++ if ((count & 1) == 0 && TYPE_VECTOR_SUBPARTS (vec_type).is_constant ()) ++ count /= 2; ++ tree_vector_builder builder (vec_type, count, 2); ++ for (unsigned int i = 0; i < count * 2; ++i) ++ builder.quick_push (i < num_a ? a : b); ++ return builder.build (); ++} ++ + /* Something has messed with the elements of CONSTRUCTOR C after it was built; + calculate TREE_CONSTANT and TREE_SIDE_EFFECTS. */ + +diff --git a/gcc/tree.h b/gcc/tree.h +index 6f73593fa..356a9f544 100644 +--- a/gcc/tree.h ++++ b/gcc/tree.h +@@ -2475,10 +2475,10 @@ extern machine_mode vector_type_mode (const_tree); + (DECL_COMMON_CHECK (NODE)->decl_common.mode = (MODE)) + + /* For FUNCTION_DECL, if it is built-in, this identifies which built-in +- operation it is. Note, however, that this field is overloaded, with +- DECL_BUILT_IN_CLASS as the discriminant, so the latter must always be +- checked before any access to the former. */ +-#define DECL_FUNCTION_CODE(NODE) \ ++ operation it is. This is only intended for low-level accesses; ++ normally DECL_FUNCTION_CODE, DECL_FE_FUNCTION_CODE or DECL_MD_FUNCTION ++ should be used instead. */ ++#define DECL_UNCHECKED_FUNCTION_CODE(NODE) \ + (FUNCTION_DECL_CHECK (NODE)->function_decl.function_code) + + /* Test if FCODE is a function code for an alloca operation. */ +@@ -2955,11 +2955,34 @@ extern void decl_fini_priority_insert (tree, priority_type); + #define DECL_IS_MALLOC(NODE) \ + (FUNCTION_DECL_CHECK (NODE)->function_decl.malloc_flag) + ++/* Macro for direct set and get of function_decl.decl_type. */ ++#define FUNCTION_DECL_DECL_TYPE(NODE) \ ++ (NODE->function_decl.decl_type) ++ ++/* Set decl_type of a DECL. Set it to T when SET is true, or reset ++ it to NONE. */ ++ ++static inline void ++set_function_decl_type (tree decl, function_decl_type t, bool set) ++{ ++ if (set) ++ { ++ gcc_assert (FUNCTION_DECL_DECL_TYPE (decl) == NONE ++ || FUNCTION_DECL_DECL_TYPE (decl) == t); ++ decl->function_decl.decl_type = t; ++ } ++ else if (FUNCTION_DECL_DECL_TYPE (decl) == t) ++ FUNCTION_DECL_DECL_TYPE (decl) = NONE; ++} ++ + /* Nonzero in a FUNCTION_DECL means this function should be treated as + C++ operator new, meaning that it returns a pointer for which we + should not use type based aliasing. */ +-#define DECL_IS_OPERATOR_NEW(NODE) \ +- (FUNCTION_DECL_CHECK (NODE)->function_decl.operator_new_flag) ++#define DECL_IS_OPERATOR_NEW_P(NODE) \ ++ (FUNCTION_DECL_CHECK (NODE)->function_decl.decl_type == OPERATOR_NEW) ++ ++#define DECL_SET_IS_OPERATOR_NEW(NODE, VAL) \ ++ set_function_decl_type (FUNCTION_DECL_CHECK (NODE), OPERATOR_NEW, VAL) + + /* Nonzero in a FUNCTION_DECL means this function may return more + than once. */ +@@ -3066,10 +3089,9 @@ extern vec **decl_debug_args_insert (tree); + #define DECL_STRUCT_FUNCTION(NODE) \ + (FUNCTION_DECL_CHECK (NODE)->function_decl.f) + +- + /* For a builtin function, identify which part of the compiler defined it. */ + #define DECL_BUILT_IN_CLASS(NODE) \ +- (FUNCTION_DECL_CHECK (NODE)->function_decl.built_in_class) ++ ((built_in_class) FUNCTION_DECL_CHECK (NODE)->function_decl.built_in_class) + + /* In FUNCTION_DECL, a chain of ..._DECL nodes. */ + #define DECL_ARGUMENTS(NODE) \ +@@ -3104,8 +3126,11 @@ extern vec **decl_debug_args_insert (tree); + (FUNCTION_DECL_CHECK (NODE)->decl_with_vis.cxx_destructor) + + /* In FUNCTION_DECL, this is set if this function is a lambda function. */ +-#define DECL_LAMBDA_FUNCTION(NODE) \ +- (FUNCTION_DECL_CHECK (NODE)->function_decl.lambda_function) ++#define DECL_LAMBDA_FUNCTION_P(NODE) \ ++ (FUNCTION_DECL_CHECK (NODE)->function_decl.decl_type == LAMBDA_FUNCTION) ++ ++#define DECL_SET_LAMBDA_FUNCTION(NODE, VAL) \ ++ set_function_decl_type (FUNCTION_DECL_CHECK (NODE), LAMBDA_FUNCTION, VAL) + + /* In FUNCTION_DECL that represent an virtual method this is set when + the method is final. */ +@@ -3788,6 +3813,61 @@ valid_vector_subparts_p (poly_uint64 subparts) + return true; + } + ++/* Return the built-in function that DECL represents, given that it is known ++ to be a FUNCTION_DECL with built-in class BUILT_IN_NORMAL. */ ++inline built_in_function ++DECL_FUNCTION_CODE (const_tree decl) ++{ ++ const tree_function_decl &fndecl = FUNCTION_DECL_CHECK (decl)->function_decl; ++ gcc_checking_assert (fndecl.built_in_class == BUILT_IN_NORMAL); ++ return (built_in_function) fndecl.function_code; ++} ++ ++/* Return the target-specific built-in function that DECL represents, ++ given that it is known to be a FUNCTION_DECL with built-in class ++ BUILT_IN_MD. */ ++inline int ++DECL_MD_FUNCTION_CODE (const_tree decl) ++{ ++ const tree_function_decl &fndecl = FUNCTION_DECL_CHECK (decl)->function_decl; ++ gcc_checking_assert (fndecl.built_in_class == BUILT_IN_MD); ++ return fndecl.function_code; ++} ++ ++/* Return the frontend-specific built-in function that DECL represents, ++ given that it is known to be a FUNCTION_DECL with built-in class ++ BUILT_IN_FRONTEND. */ ++inline int ++DECL_FE_FUNCTION_CODE (const_tree decl) ++{ ++ const tree_function_decl &fndecl = FUNCTION_DECL_CHECK (decl)->function_decl; ++ gcc_checking_assert (fndecl.built_in_class == BUILT_IN_FRONTEND); ++ return fndecl.function_code; ++} ++ ++/* Record that FUNCTION_DECL DECL represents built-in function FCODE of ++ class FCLASS. */ ++inline void ++set_decl_built_in_function (tree decl, built_in_class fclass, ++ unsigned int fcode) ++{ ++ tree_function_decl &fndecl = FUNCTION_DECL_CHECK (decl)->function_decl; ++ fndecl.built_in_class = fclass; ++ fndecl.function_code = fcode; ++} ++ ++/* Record that FUNCTION_DECL NEWDECL represents the same built-in function ++ as OLDDECL (or none, if OLDDECL doesn't represent a built-in function). */ ++inline void ++copy_decl_built_in_function (tree newdecl, const_tree olddecl) ++{ ++ tree_function_decl &newfndecl = FUNCTION_DECL_CHECK (newdecl)->function_decl; ++ const tree_function_decl &oldfndecl ++ = FUNCTION_DECL_CHECK (olddecl)->function_decl; ++ newfndecl.built_in_class = oldfndecl.built_in_class; ++ newfndecl.function_code = oldfndecl.function_code; ++} ++ + /* In NON_LVALUE_EXPR and VIEW_CONVERT_EXPR, set when this node is merely a + wrapper added to express a location_t on behalf of the node's child + (e.g. by maybe_wrap_with_location). */ +@@ -4212,6 +4292,7 @@ extern tree build_vector_from_val (tree, tree); + extern tree build_uniform_cst (tree, tree); + extern tree build_vec_series (tree, tree, tree); + extern tree build_index_vector (tree, poly_uint64, poly_uint64); ++extern tree build_vector_a_then_b (tree, unsigned int, tree, tree); + extern void recompute_constructor_flags (tree); + extern void verify_constructor_flags (tree); + extern tree build_constructor (tree, vec *); +@@ -5967,9 +6048,10 @@ fndecl_built_in_p (const_tree node, built_in_class klass) + of class KLASS with name equal to NAME. */ + + inline bool +-fndecl_built_in_p (const_tree node, int name, built_in_class klass) ++fndecl_built_in_p (const_tree node, unsigned int name, built_in_class klass) + { +- return (fndecl_built_in_p (node, klass) && DECL_FUNCTION_CODE (node) == name); ++ return (fndecl_built_in_p (node, klass) ++ && DECL_UNCHECKED_FUNCTION_CODE (node) == name); + } + + /* Return true if a FUNCTION_DECL NODE is a GCC built-in function +diff --git a/gcc/var-tracking.c b/gcc/var-tracking.c +index 96e0c93a6..982ef13d1 100644 +--- a/gcc/var-tracking.c ++++ b/gcc/var-tracking.c +@@ -116,6 +116,7 @@ + #include "rtl-iter.h" + #include "fibonacci_heap.h" + #include "print-rtl.h" ++#include "function-abi.h" + + typedef fibonacci_heap bb_heap_t; + typedef fibonacci_node bb_heap_node_t; +@@ -1238,7 +1239,7 @@ adjust_insn (basic_block bb, rtx_insn *insn) + amd.stack_adjust = -VTI (bb)->out.stack_adjust; + + amd.store = true; +- note_stores (PATTERN (insn), adjust_mem_stores, &amd); ++ note_stores (insn, adjust_mem_stores, &amd); + + amd.store = false; + if (GET_CODE (PATTERN (insn)) == PARALLEL +@@ -4899,12 +4900,11 @@ dataflow_set_clear_at_call (dataflow_set *set, rtx_insn *call_insn) + { + unsigned int r; + hard_reg_set_iterator hrsi; +- HARD_REG_SET invalidated_regs; + +- get_call_reg_set_usage (call_insn, &invalidated_regs, +- regs_invalidated_by_call); ++ HARD_REG_SET callee_clobbers ++ = insn_callee_abi (call_insn).full_reg_clobbers (); + +- EXECUTE_IF_SET_IN_HARD_REG_SET (invalidated_regs, 0, r, hrsi) ++ EXECUTE_IF_SET_IN_HARD_REG_SET (callee_clobbers, 0, r, hrsi) + var_regno_delete (set, r); + + if (MAY_HAVE_DEBUG_BIND_INSNS) +@@ -6292,14 +6292,12 @@ prepare_call_arguments (basic_block bb, rtx_insn *insn) + && targetm.calls.struct_value_rtx (type, 0) == 0) + { + tree struct_addr = build_pointer_type (TREE_TYPE (type)); +- machine_mode mode = TYPE_MODE (struct_addr); ++ function_arg_info arg (struct_addr, /*named=*/true); + rtx reg; + INIT_CUMULATIVE_ARGS (args_so_far_v, type, NULL_RTX, fndecl, + nargs + 1); +- reg = targetm.calls.function_arg (args_so_far, mode, +- struct_addr, true); +- targetm.calls.function_arg_advance (args_so_far, mode, +- struct_addr, true); ++ reg = targetm.calls.function_arg (args_so_far, arg); ++ targetm.calls.function_arg_advance (args_so_far, arg); + if (reg == NULL_RTX) + { + for (; link; link = XEXP (link, 1)) +@@ -6317,11 +6315,9 @@ prepare_call_arguments (basic_block bb, rtx_insn *insn) + nargs); + if (obj_type_ref && TYPE_ARG_TYPES (type) != void_list_node) + { +- machine_mode mode; + t = TYPE_ARG_TYPES (type); +- mode = TYPE_MODE (TREE_VALUE (t)); +- this_arg = targetm.calls.function_arg (args_so_far, mode, +- TREE_VALUE (t), true); ++ function_arg_info arg (TREE_VALUE (t), /*named=*/true); ++ this_arg = targetm.calls.function_arg (args_so_far, arg); + if (this_arg && !REG_P (this_arg)) + this_arg = NULL_RTX; + else if (this_arg == NULL_RTX) +@@ -6429,30 +6425,24 @@ prepare_call_arguments (basic_block bb, rtx_insn *insn) + } + if (t && t != void_list_node) + { +- tree argtype = TREE_VALUE (t); +- machine_mode mode = TYPE_MODE (argtype); + rtx reg; +- if (pass_by_reference (&args_so_far_v, mode, argtype, true)) +- { +- argtype = build_pointer_type (argtype); +- mode = TYPE_MODE (argtype); +- } +- reg = targetm.calls.function_arg (args_so_far, mode, +- argtype, true); +- if (TREE_CODE (argtype) == REFERENCE_TYPE +- && INTEGRAL_TYPE_P (TREE_TYPE (argtype)) ++ function_arg_info arg (TREE_VALUE (t), /*named=*/true); ++ apply_pass_by_reference_rules (&args_so_far_v, arg); ++ reg = targetm.calls.function_arg (args_so_far, arg); ++ if (TREE_CODE (arg.type) == REFERENCE_TYPE ++ && INTEGRAL_TYPE_P (TREE_TYPE (arg.type)) + && reg + && REG_P (reg) +- && GET_MODE (reg) == mode +- && (GET_MODE_CLASS (mode) == MODE_INT +- || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT) ++ && GET_MODE (reg) == arg.mode ++ && (GET_MODE_CLASS (arg.mode) == MODE_INT ++ || GET_MODE_CLASS (arg.mode) == MODE_PARTIAL_INT) + && REG_P (x) + && REGNO (x) == REGNO (reg) +- && GET_MODE (x) == mode ++ && GET_MODE (x) == arg.mode + && item) + { + machine_mode indmode +- = TYPE_MODE (TREE_TYPE (argtype)); ++ = TYPE_MODE (TREE_TYPE (arg.type)); + rtx mem = gen_rtx_MEM (indmode, x); + cselib_val *val = cselib_lookup (mem, indmode, 0, VOIDmode); + if (val && cselib_preserved_value_p (val)) +@@ -6492,8 +6482,7 @@ prepare_call_arguments (basic_block bb, rtx_insn *insn) + } + } + } +- targetm.calls.function_arg_advance (args_so_far, mode, +- argtype, true); ++ targetm.calls.function_arg_advance (args_so_far, arg); + t = TREE_CHAIN (t); + } + } +@@ -6642,7 +6631,7 @@ add_with_sets (rtx_insn *insn, struct cselib_set *sets, int n_sets) + insert notes before it without worrying about any + notes that MO_USEs might emit after the insn. */ + cui.store_p = true; +- note_stores (PATTERN (insn), add_stores, &cui); ++ note_stores (insn, add_stores, &cui); + n2 = VTI (bb)->mos.length () - 1; + mos = VTI (bb)->mos.address (); + +diff --git a/gcc/vector-builder.h b/gcc/vector-builder.h +index 9967daa6e..37911ac69 100644 +--- a/gcc/vector-builder.h ++++ b/gcc/vector-builder.h +@@ -45,8 +45,11 @@ along with GCC; see the file COPYING3. If not see + variable-length vectors. finalize () then canonicalizes the encoding + to a simpler form if possible. + +- The derived class Derived provides this functionality for specific Ts. +- Derived needs to provide the following interface: ++ Shape is the type that specifies the number of elements in the vector ++ and (where relevant) the type of each element. ++ ++ The derived class Derived provides the functionality of this class ++ for specific Ts. Derived needs to provide the following interface: + + bool equal_p (T elt1, T elt2) const; + +@@ -82,9 +85,30 @@ along with GCC; see the file COPYING3. If not see + + Record that ELT2 is being elided, given that ELT1_PTR points to + the last encoded element for the containing pattern. This is +- again provided for TREE_OVERFLOW handling. */ ++ again provided for TREE_OVERFLOW handling. ++ ++ static poly_uint64 shape_nelts (Shape shape); ++ ++ Return the number of elements in SHAPE. ++ ++ The class provides additional functionality for the case in which ++ T can describe a vector constant as well as an individual element. ++ This functionality requires: ++ ++ static poly_uint64 nelts_of (T x); ++ ++ Return the number of elements in vector constant X. ++ ++ static unsigned int npatterns_of (T x); + +-template ++ Return the number of patterns used to encode vector constant X. ++ ++ static unsigned int nelts_per_pattern_of (T x); ++ ++ Return the number of elements used to encode each pattern ++ in vector constant X. */ ++ ++template + class vector_builder : public auto_vec + { + public: +@@ -96,12 +120,18 @@ public: + unsigned int encoded_nelts () const; + bool encoded_full_vector_p () const; + T elt (unsigned int) const; ++ unsigned int count_dups (int, int, int) const; + + bool operator == (const Derived &) const; + bool operator != (const Derived &x) const { return !operator == (x); } + ++ bool new_unary_operation (Shape, T, bool); ++ bool new_binary_operation (Shape, T, T, bool); ++ + void finalize (); + ++ static unsigned int binary_encoded_nelts (T, T); ++ + protected: + void new_vector (poly_uint64, unsigned int, unsigned int); + void reshape (unsigned int, unsigned int); +@@ -120,16 +150,16 @@ private: + unsigned int m_nelts_per_pattern; + }; + +-template ++template + inline const Derived * +-vector_builder::derived () const ++vector_builder::derived () const + { + return static_cast (this); + } + +-template ++template + inline +-vector_builder::vector_builder () ++vector_builder::vector_builder () + : m_full_nelts (0), + m_npatterns (0), + m_nelts_per_pattern (0) +@@ -139,18 +169,18 @@ vector_builder::vector_builder () + starts with these explicitly-encoded elements and may contain additional + elided elements. */ + +-template ++template + inline unsigned int +-vector_builder::encoded_nelts () const ++vector_builder::encoded_nelts () const + { + return m_npatterns * m_nelts_per_pattern; + } + + /* Return true if every element of the vector is explicitly encoded. */ + +-template ++template + inline bool +-vector_builder::encoded_full_vector_p () const ++vector_builder::encoded_full_vector_p () const + { + return known_eq (m_npatterns * m_nelts_per_pattern, m_full_nelts); + } +@@ -158,11 +188,11 @@ vector_builder::encoded_full_vector_p () const + /* Start building a vector that has FULL_NELTS elements. Initially + encode it using NPATTERNS patterns with NELTS_PER_PATTERN each. */ + +-template ++template + void +-vector_builder::new_vector (poly_uint64 full_nelts, +- unsigned int npatterns, +- unsigned int nelts_per_pattern) ++vector_builder::new_vector (poly_uint64 full_nelts, ++ unsigned int npatterns, ++ unsigned int nelts_per_pattern) + { + m_full_nelts = full_nelts; + m_npatterns = npatterns; +@@ -174,9 +204,9 @@ vector_builder::new_vector (poly_uint64 full_nelts, + /* Return true if this vector and OTHER have the same elements and + are encoded in the same way. */ + +-template ++template + bool +-vector_builder::operator == (const Derived &other) const ++vector_builder::operator == (const Derived &other) const + { + if (maybe_ne (m_full_nelts, other.m_full_nelts) + || m_npatterns != other.m_npatterns +@@ -194,18 +224,19 @@ vector_builder::operator == (const Derived &other) const + /* Return the value of vector element I, which might or might not be + encoded explicitly. */ + +-template ++template + T +-vector_builder::elt (unsigned int i) const ++vector_builder::elt (unsigned int i) const + { +- /* This only makes sense if the encoding has been fully populated. */ +- gcc_checking_assert (encoded_nelts () <= this->length ()); +- + /* First handle elements that are already present in the underlying + vector, regardless of whether they're part of the encoding or not. */ + if (i < this->length ()) + return (*this)[i]; + ++ /* Extrapolation is only possible if the encoding has been fully ++ populated. */ ++ gcc_checking_assert (encoded_nelts () <= this->length ()); ++ + /* Identify the pattern that contains element I and work out the index of + the last encoded element for that pattern. */ + unsigned int pattern = i % m_npatterns; +@@ -223,13 +254,136 @@ vector_builder::elt (unsigned int i) const + derived ()->step (prev, final)); + } + ++/* Try to start building a new vector of shape SHAPE that holds the result of ++ a unary operation on vector constant VEC. ALLOW_STEPPED_P is true if the ++ operation can handle stepped encodings directly, without having to expand ++ the full sequence. ++ ++ Return true if the operation is possible, which it always is when ++ ALLOW_STEPPED_P is true. Leave the builder unchanged otherwise. */ ++ ++template ++bool ++vector_builder::new_unary_operation (Shape shape, T vec, ++ bool allow_stepped_p) ++{ ++ poly_uint64 full_nelts = Derived::shape_nelts (shape); ++ gcc_assert (known_eq (full_nelts, Derived::nelts_of (vec))); ++ unsigned int npatterns = Derived::npatterns_of (vec); ++ unsigned int nelts_per_pattern = Derived::nelts_per_pattern_of (vec); ++ if (!allow_stepped_p && nelts_per_pattern > 2) ++ { ++ if (!full_nelts.is_constant ()) ++ return false; ++ npatterns = full_nelts.to_constant (); ++ nelts_per_pattern = 1; ++ } ++ derived ()->new_vector (shape, npatterns, nelts_per_pattern); ++ return true; ++} ++ ++/* Try to start building a new vector of shape SHAPE that holds the result of ++ a binary operation on vector constants VEC1 and VEC2. ALLOW_STEPPED_P is ++ true if the operation can handle stepped encodings directly, without ++ having to expand the full sequence. ++ ++ Return true if the operation is possible. Leave the builder unchanged ++ otherwise. */ ++ ++template ++bool ++vector_builder::new_binary_operation (Shape shape, ++ T vec1, T vec2, ++ bool allow_stepped_p) ++{ ++ poly_uint64 full_nelts = Derived::shape_nelts (shape); ++ gcc_assert (known_eq (full_nelts, Derived::nelts_of (vec1)) ++ && known_eq (full_nelts, Derived::nelts_of (vec2))); ++ /* Conceptually we split the patterns in VEC1 and VEC2 until we have ++ an equal number for both. Each split pattern requires the same ++ number of elements per pattern as the original. E.g. splitting: ++ ++ { 1, 2, 3, ... } ++ ++ into two gives: ++ ++ { 1, 3, 5, ... } ++ { 2, 4, 6, ... } ++ ++ while splitting: ++ ++ { 1, 0, ... } ++ ++ into two gives: ++ ++ { 1, 0, ... } ++ { 0, 0, ... }. */ ++ unsigned int npatterns ++ = least_common_multiple (Derived::npatterns_of (vec1), ++ Derived::npatterns_of (vec2)); ++ unsigned int nelts_per_pattern ++ = MAX (Derived::nelts_per_pattern_of (vec1), ++ Derived::nelts_per_pattern_of (vec2)); ++ if (!allow_stepped_p && nelts_per_pattern > 2) ++ { ++ if (!full_nelts.is_constant ()) ++ return false; ++ npatterns = full_nelts.to_constant (); ++ nelts_per_pattern = 1; ++ } ++ derived ()->new_vector (shape, npatterns, nelts_per_pattern); ++ return true; ++} ++ ++/* Return the number of elements that the caller needs to operate on in ++ order to handle a binary operation on vector constants VEC1 and VEC2. ++ This static function is used instead of new_binary_operation if the ++ result of the operation is not a constant vector. */ ++ ++template ++unsigned int ++vector_builder::binary_encoded_nelts (T vec1, T vec2) ++{ ++ poly_uint64 nelts = Derived::nelts_of (vec1); ++ gcc_assert (known_eq (nelts, Derived::nelts_of (vec2))); ++ /* See new_binary_operation for details. */ ++ unsigned int npatterns ++ = least_common_multiple (Derived::npatterns_of (vec1), ++ Derived::npatterns_of (vec2)); ++ unsigned int nelts_per_pattern ++ = MAX (Derived::nelts_per_pattern_of (vec1), ++ Derived::nelts_per_pattern_of (vec2)); ++ unsigned HOST_WIDE_INT const_nelts; ++ if (nelts.is_constant (&const_nelts)) ++ return MIN (npatterns * nelts_per_pattern, const_nelts); ++ return npatterns * nelts_per_pattern; ++} ++ ++/* Return the number of leading duplicate elements in the range ++ [START:END:STEP]. The value is always at least 1. */ ++ ++template ++unsigned int ++vector_builder::count_dups (int start, int end, ++ int step) const ++{ ++ gcc_assert ((end - start) % step == 0); ++ ++ unsigned int ndups = 1; ++ for (int i = start + step; ++ i != end && derived ()->equal_p (elt (i), elt (start)); ++ i += step) ++ ndups++; ++ return ndups; ++} ++ + /* Change the encoding to NPATTERNS patterns of NELTS_PER_PATTERN each, + but without changing the underlying vector. */ + +-template ++template + void +-vector_builder::reshape (unsigned int npatterns, +- unsigned int nelts_per_pattern) ++vector_builder::reshape (unsigned int npatterns, ++ unsigned int nelts_per_pattern) + { + unsigned int old_encoded_nelts = encoded_nelts (); + unsigned int new_encoded_nelts = npatterns * nelts_per_pattern; +@@ -249,11 +403,11 @@ vector_builder::reshape (unsigned int npatterns, + /* Return true if elements [START, END) contain a repeating sequence of + STEP elements. */ + +-template ++template + bool +-vector_builder::repeating_sequence_p (unsigned int start, +- unsigned int end, +- unsigned int step) ++vector_builder::repeating_sequence_p (unsigned int start, ++ unsigned int end, ++ unsigned int step) + { + for (unsigned int i = start; i < end - step; ++i) + if (!derived ()->equal_p ((*this)[i], (*this)[i + step])) +@@ -264,11 +418,11 @@ vector_builder::repeating_sequence_p (unsigned int start, + /* Return true if elements [START, END) contain STEP interleaved linear + series. */ + +-template ++template + bool +-vector_builder::stepped_sequence_p (unsigned int start, +- unsigned int end, +- unsigned int step) ++vector_builder::stepped_sequence_p (unsigned int start, ++ unsigned int end, ++ unsigned int step) + { + if (!derived ()->allow_steps_p ()) + return false; +@@ -297,9 +451,9 @@ vector_builder::stepped_sequence_p (unsigned int start, + /* Try to change the number of encoded patterns to NPATTERNS, returning + true on success. */ + +-template ++template + bool +-vector_builder::try_npatterns (unsigned int npatterns) ++vector_builder::try_npatterns (unsigned int npatterns) + { + if (m_nelts_per_pattern == 1) + { +@@ -350,9 +504,9 @@ vector_builder::try_npatterns (unsigned int npatterns) + + /* Replace the current encoding with the canonical form. */ + +-template ++template + void +-vector_builder::finalize () ++vector_builder::finalize () + { + /* The encoding requires the same number of elements to come from each + pattern. */ +diff --git a/libgcc/config.host b/libgcc/config.host +index 0f15fda36..9500ec2ee 100644 +--- a/libgcc/config.host ++++ b/libgcc/config.host +@@ -356,6 +356,12 @@ aarch64*-*-freebsd*) + tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp t-crtfm" + md_unwind_header=aarch64/freebsd-unwind.h + ;; ++aarch64*-*-netbsd*) ++ extra_parts="$extra_parts crtfastmath.o" ++ tmake_file="${tmake_file} ${cpu_type}/t-aarch64" ++ tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp t-crtfm" ++ md_unwind_header=aarch64/aarch64-unwind.h ++ ;; + aarch64*-*-fuchsia*) + tmake_file="${tmake_file} ${cpu_type}/t-aarch64" + tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp" +diff --git a/libgcc/config/aarch64/aarch64-unwind.h b/libgcc/config/aarch64/aarch64-unwind.h +index 223ac9157..13e6e4a6a 100644 +--- a/libgcc/config/aarch64/aarch64-unwind.h ++++ b/libgcc/config/aarch64/aarch64-unwind.h +@@ -35,6 +35,23 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + #define MD_FROB_UPDATE_CONTEXT(context, fs) \ + aarch64_frob_update_context (context, fs) + ++static inline int ++aarch64_cie_signed_with_b_key (struct _Unwind_Context *context) ++{ ++ const struct dwarf_fde *fde = _Unwind_Find_FDE (context->bases.func, ++ &context->bases); ++ if (fde != NULL) ++ { ++ const struct dwarf_cie *cie = get_cie (fde); ++ if (cie != NULL) ++ { ++ char *aug_str = cie->augmentation; ++ return strchr (aug_str, 'B') == NULL ? 0 : 1; ++ } ++ } ++ return 0; ++} ++ + /* Do AArch64 private extraction on ADDR based on context info CONTEXT and + unwind frame info FS. If ADDR is signed, we do address authentication on it + using CFA of current frame. */ +@@ -43,9 +60,11 @@ static inline void * + aarch64_post_extract_frame_addr (struct _Unwind_Context *context, + _Unwind_FrameState *fs, void *addr) + { +- if (fs->regs.reg[DWARF_REGNUM_AARCH64_RA_STATE].loc.offset & 0x1) ++ if (context->flags & RA_SIGNED_BIT) + { + _Unwind_Word salt = (_Unwind_Word) context->cfa; ++ if (aarch64_cie_signed_with_b_key (context) != 0) ++ return __builtin_aarch64_autib1716 (addr, salt); + return __builtin_aarch64_autia1716 (addr, salt); + } + else +@@ -62,9 +81,14 @@ aarch64_post_frob_eh_handler_addr (struct _Unwind_Context *current, + ATTRIBUTE_UNUSED, + void *handler_addr) + { +- if (current->flags & RA_A_SIGNED_BIT) +- return __builtin_aarch64_pacia1716 (handler_addr, ++ if (current->flags & RA_SIGNED_BIT) ++ { ++ if (aarch64_cie_signed_with_b_key (current)) ++ return __builtin_aarch64_pacib1716 (handler_addr, ++ (_Unwind_Word) current->cfa); ++ return __builtin_aarch64_pacia1716 (handler_addr, + (_Unwind_Word) current->cfa); ++ } + else + return handler_addr; + } +@@ -79,7 +103,7 @@ aarch64_frob_update_context (struct _Unwind_Context *context, + { + if (fs->regs.reg[DWARF_REGNUM_AARCH64_RA_STATE].loc.offset & 0x1) + /* The flag is used for re-authenticating EH handler's address. */ +- context->flags |= RA_A_SIGNED_BIT; ++ context->flags |= RA_SIGNED_BIT; + + return; + } +diff --git a/libgcc/unwind-dw2-fde.c b/libgcc/unwind-dw2-fde.c +index 24b4ecee6..40ebf85a9 100644 +--- a/libgcc/unwind-dw2-fde.c ++++ b/libgcc/unwind-dw2-fde.c +@@ -334,6 +334,9 @@ get_cie_encoding (const struct dwarf_cie *cie) + /* LSDA encoding. */ + else if (*aug == 'L') + p++; ++ /* aarch64 b-key pointer authentication. */ ++ else if (*aug == 'B') ++ p++; + /* Otherwise end of string, or unknown augmentation. */ + else + return DW_EH_PE_absptr; +diff --git a/libgcc/unwind-dw2.c b/libgcc/unwind-dw2.c +index e6130af2f..e76a1cbc4 100644 +--- a/libgcc/unwind-dw2.c ++++ b/libgcc/unwind-dw2.c +@@ -136,8 +136,9 @@ struct _Unwind_Context + #define SIGNAL_FRAME_BIT ((~(_Unwind_Word) 0 >> 1) + 1) + /* Context which has version/args_size/by_value fields. */ + #define EXTENDED_CONTEXT_BIT ((~(_Unwind_Word) 0 >> 2) + 1) +- /* Bit reserved on AArch64, return address has been signed with A key. */ +-#define RA_A_SIGNED_BIT ((~(_Unwind_Word) 0 >> 3) + 1) ++ /* Bit reserved on AArch64, return address has been signed with A or B ++ key. */ ++#define RA_SIGNED_BIT ((~(_Unwind_Word) 0 >> 3) + 1) + _Unwind_Word flags; + /* 0 for now, can be increased when further fields are added to + struct _Unwind_Context. */ +@@ -502,6 +503,11 @@ extract_cie_info (const struct dwarf_cie *cie, struct _Unwind_Context *context, + fs->signal_frame = 1; + aug += 1; + } ++ /* aarch64 B-key pointer authentication. */ ++ else if (aug[0] == 'B') ++ { ++ aug += 1; ++ } + + /* Otherwise we have an unknown augmentation string. + Bail unless we saw a 'z' prefix. */ diff --git a/change-gcc-BASE-VER.patch b/change-gcc-BASE-VER.patch index 79dd167..95e8324 100644 --- a/change-gcc-BASE-VER.patch +++ b/change-gcc-BASE-VER.patch @@ -1,3 +1,9 @@ +This backport contains 1 patch from gcc main stream tree. +The commit id of these patchs list as following in the order of time. + +0001-Bump-BASE-VER-to-9.3.1.patch +9f26e5863a75744bbee1479792ecae084a3ceb20 + diff -Nurp a/gcc/BASE-VER b/gcc/BASE-VER --- a/gcc/BASE-VER 2020-08-19 10:47:14.100000000 +0800 +++ b/gcc/BASE-VER 2020-08-19 10:32:30.380000000 +0800 diff --git a/dont-generate-IF_THEN_ELSE.patch b/dont-generate-IF_THEN_ELSE.patch index 791b57b..16f28a4 100644 --- a/dont-generate-IF_THEN_ELSE.patch +++ b/dont-generate-IF_THEN_ELSE.patch @@ -1,3 +1,9 @@ +This backport contains 1 patch from gcc main stream tree. +The commit id of these patchs list as following in the order of time. + +0001-combine-Don-t-generate-IF_THEN_ELSE.patch +ddbb5da5199fb421dc398911c37fa7f896efc13f + diff --git a/gcc/combine.c b/gcc/combine.c index 4de759a8e6b..ce7aeecb5c2 100644 --- a/gcc/combine.c diff --git a/fix-ICE-IPA-compare-VRP-types.patch b/fix-ICE-IPA-compare-VRP-types.patch new file mode 100644 index 0000000..3f1b316 --- /dev/null +++ b/fix-ICE-IPA-compare-VRP-types.patch @@ -0,0 +1,51 @@ +This backport contains 1 patch from gcc main stream tree. +The commit id of these patchs list as following in the order of time. + +0001-IPA-compare-VRP-types.patch +a86623902767122c71c7229150a8b8a79cbb3673 + +diff -Nurp a/gcc/ipa-prop.c b/gcc/ipa-prop.c +--- a/gcc/ipa-prop.c 2020-11-28 00:19:34.340000000 +0800 ++++ b/gcc/ipa-prop.c 2020-11-28 00:21:24.680000000 +0800 +@@ -122,7 +122,8 @@ struct ipa_vr_ggc_hash_traits : public g + static bool + equal (const value_range_base *a, const value_range_base *b) + { +- return a->equal_p (*b); ++ return (a->equal_p (*b) ++ && types_compatible_p (a->type (), b->type ())); + } + static void + mark_empty (value_range_base *&p) +diff -Nurp a/gcc/testsuite/gcc.c-torture/execute/pr97404.c b/gcc/testsuite/gcc.c-torture/execute/pr97404.c +--- a/gcc/testsuite/gcc.c-torture/execute/pr97404.c 1970-01-01 08:00:00.000000000 +0800 ++++ b/gcc/testsuite/gcc.c-torture/execute/pr97404.c 2020-11-28 00:21:24.680000000 +0800 +@@ -0,0 +1,28 @@ ++/* PR ipa/97404 */ ++/* { dg-additional-options "-fno-inline" } */ ++ ++char a, b; ++long c; ++short d, e; ++long *f = &c; ++int g; ++char h(signed char i) { return 0; } ++static short j(short i, int k) { return i < 0 ? 0 : i >> k; } ++void l(void); ++void m(void) ++{ ++ e = j(d | 9766, 11); ++ *f = e; ++} ++void l(void) ++{ ++ a = 5 | g; ++ b = h(a); ++} ++int main() ++{ ++ m(); ++ if (c != 4) ++ __builtin_abort(); ++ return 0; ++} diff --git a/fix-ICE-in-affine-combination.patch b/fix-ICE-in-affine-combination.patch new file mode 100644 index 0000000..e582681 --- /dev/null +++ b/fix-ICE-in-affine-combination.patch @@ -0,0 +1,396 @@ +This backport contains 2 patchs from gcc main stream tree. +The commit id of these patchs list as following in the order of time. + +0001-tree-affine.c-expr_to_aff_combination-New-function-s.patch +5120e0d8d48f4590a275e60565de6c5a4e772fc1 + +0001-PR-tree-optimization-94574-aarch64-ICE-during-GIMPLE.patch +0447929f11e6a3e1b076841712b90a8b6bc7d33a + +diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c +--- a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c 1970-01-01 08:00:00.000000000 +0800 ++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c 2020-12-08 14:54:11.467633230 +0800 +@@ -0,0 +1,8 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */ ++ ++#define TYPE unsigned int ++ ++#include "pr83403.h" ++ ++/* { dg-final { scan-tree-dump-times "Executing store motion of" 10 "lim2" } } */ +diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c +--- a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c 1970-01-01 08:00:00.000000000 +0800 ++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c 2020-12-08 14:54:11.467633230 +0800 +@@ -0,0 +1,8 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */ ++ ++#define TYPE int ++ ++#include "pr83403.h" ++ ++/* { dg-final { scan-tree-dump-times "Executing store motion of" 10 "lim2" } } */ +diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h b/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h +--- a/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h 1970-01-01 08:00:00.000000000 +0800 ++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h 2020-12-08 14:54:11.467633230 +0800 +@@ -0,0 +1,30 @@ ++__attribute__ ((noinline)) void ++calculate (const double *__restrict__ A, const double *__restrict__ B, ++ double *__restrict__ C) ++{ ++ TYPE m = 0; ++ TYPE n = 0; ++ TYPE k = 0; ++ ++ A = (const double *) __builtin_assume_aligned (A, 16); ++ B = (const double *) __builtin_assume_aligned (B, 16); ++ C = (double *) __builtin_assume_aligned (C, 16); ++ ++ for (n = 0; n < 9; n++) ++ { ++ for (m = 0; m < 10; m++) ++ { ++ C[(n * 10) + m] = 0.0; ++ } ++ ++ for (k = 0; k < 17; k++) ++ { ++#pragma simd ++ for (m = 0; m < 10; m++) ++ { ++ C[(n * 10) + m] += A[(k * 20) + m] * B[(n * 20) + k]; ++ } ++ } ++ } ++} ++ +diff -Nurp a/gcc/tree-affine.c b/gcc/tree-affine.c +--- a/gcc/tree-affine.c 2020-12-09 09:01:13.179633230 +0800 ++++ b/gcc/tree-affine.c 2020-12-08 14:54:11.467633230 +0800 +@@ -259,104 +259,66 @@ aff_combination_convert (aff_tree *comb, + } + } + +-/* Splits EXPR into an affine combination of parts. */ ++/* Tries to handle OP0 CODE OP1 as affine combination of parts. Returns ++ true when that was successful and returns the combination in COMB. */ + +-void +-tree_to_aff_combination (tree expr, tree type, aff_tree *comb) ++static bool ++expr_to_aff_combination (aff_tree *comb, tree_code code, tree type, ++ tree op0, tree op1 = NULL_TREE) + { + aff_tree tmp; +- enum tree_code code; +- tree cst, core, toffset; + poly_int64 bitpos, bitsize, bytepos; +- machine_mode mode; +- int unsignedp, reversep, volatilep; +- +- STRIP_NOPS (expr); + +- code = TREE_CODE (expr); + switch (code) + { + case POINTER_PLUS_EXPR: +- tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb); +- tree_to_aff_combination (TREE_OPERAND (expr, 1), sizetype, &tmp); ++ tree_to_aff_combination (op0, type, comb); ++ tree_to_aff_combination (op1, sizetype, &tmp); + aff_combination_add (comb, &tmp); +- return; ++ return true; + + case PLUS_EXPR: + case MINUS_EXPR: +- tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb); +- tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp); ++ tree_to_aff_combination (op0, type, comb); ++ tree_to_aff_combination (op1, type, &tmp); + if (code == MINUS_EXPR) + aff_combination_scale (&tmp, -1); + aff_combination_add (comb, &tmp); +- return; ++ return true; + + case MULT_EXPR: +- cst = TREE_OPERAND (expr, 1); +- if (TREE_CODE (cst) != INTEGER_CST) ++ if (TREE_CODE (op1) != INTEGER_CST) + break; +- tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb); +- aff_combination_scale (comb, wi::to_widest (cst)); +- return; ++ tree_to_aff_combination (op0, type, comb); ++ aff_combination_scale (comb, wi::to_widest (op1)); ++ return true; + + case NEGATE_EXPR: +- tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb); ++ tree_to_aff_combination (op0, type, comb); + aff_combination_scale (comb, -1); +- return; ++ return true; + + case BIT_NOT_EXPR: + /* ~x = -x - 1 */ +- tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb); ++ tree_to_aff_combination (op0, type, comb); + aff_combination_scale (comb, -1); + aff_combination_add_cst (comb, -1); +- return; +- +- case ADDR_EXPR: +- /* Handle &MEM[ptr + CST] which is equivalent to POINTER_PLUS_EXPR. */ +- if (TREE_CODE (TREE_OPERAND (expr, 0)) == MEM_REF) +- { +- expr = TREE_OPERAND (expr, 0); +- tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb); +- tree_to_aff_combination (TREE_OPERAND (expr, 1), sizetype, &tmp); +- aff_combination_add (comb, &tmp); +- return; +- } +- core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos, +- &toffset, &mode, &unsignedp, &reversep, +- &volatilep); +- if (!multiple_p (bitpos, BITS_PER_UNIT, &bytepos)) +- break; +- aff_combination_const (comb, type, bytepos); +- if (TREE_CODE (core) == MEM_REF) +- { +- tree mem_offset = TREE_OPERAND (core, 1); +- aff_combination_add_cst (comb, wi::to_poly_widest (mem_offset)); +- core = TREE_OPERAND (core, 0); +- } +- else +- core = build_fold_addr_expr (core); +- +- if (TREE_CODE (core) == ADDR_EXPR) +- aff_combination_add_elt (comb, core, 1); +- else +- { +- tree_to_aff_combination (core, type, &tmp); +- aff_combination_add (comb, &tmp); +- } +- if (toffset) +- { +- tree_to_aff_combination (toffset, type, &tmp); +- aff_combination_add (comb, &tmp); +- } +- return; ++ return true; + + CASE_CONVERT: + { +- tree otype = TREE_TYPE (expr); +- tree inner = TREE_OPERAND (expr, 0); ++ tree otype = type; ++ tree inner = op0; + tree itype = TREE_TYPE (inner); + enum tree_code icode = TREE_CODE (inner); + ++ /* STRIP_NOPS */ ++ if (tree_nop_conversion_p (otype, itype)) ++ { ++ tree_to_aff_combination (op0, type, comb); ++ return true; ++ } ++ + /* In principle this is a valid folding, but it isn't necessarily + an optimization, so do it here and not in fold_unary. */ + if ((icode == PLUS_EXPR || icode == MINUS_EXPR || icode == MULT_EXPR) +@@ -376,38 +338,127 @@ tree_to_aff_combination (tree expr, tree + { + op0 = fold_convert (otype, op0); + op1 = fold_convert (otype, op1); +- expr = fold_build2 (icode, otype, op0, op1); +- tree_to_aff_combination (expr, type, comb); +- return; ++ return expr_to_aff_combination (comb, icode, otype, op0, op1); + } + wide_int minv, maxv; + /* If inner type has wrapping overflow behavior, fold conversion + for below case: +- (T1)(X - CST) -> (T1)X - (T1)CST +- if X - CST doesn't overflow by range information. Also handle +- (T1)(X + CST) as (T1)(X - (-CST)). */ ++ (T1)(X *+- CST) -> (T1)X *+- (T1)CST ++ if X *+- CST doesn't overflow by range information. */ + if (TYPE_UNSIGNED (itype) + && TYPE_OVERFLOW_WRAPS (itype) +- && TREE_CODE (op0) == SSA_NAME + && TREE_CODE (op1) == INTEGER_CST +- && icode != MULT_EXPR +- && get_range_info (op0, &minv, &maxv) == VR_RANGE) ++ && determine_value_range (op0, &minv, &maxv) == VR_RANGE) + { ++ wi::overflow_type overflow = wi::OVF_NONE; ++ signop sign = UNSIGNED; + if (icode == PLUS_EXPR) +- op1 = wide_int_to_tree (itype, -wi::to_wide (op1)); +- if (wi::geu_p (minv, wi::to_wide (op1))) ++ wi::add (maxv, wi::to_wide (op1), sign, &overflow); ++ else if (icode == MULT_EXPR) ++ wi::mul (maxv, wi::to_wide (op1), sign, &overflow); ++ else ++ wi::sub (minv, wi::to_wide (op1), sign, &overflow); ++ ++ if (overflow == wi::OVF_NONE) + { + op0 = fold_convert (otype, op0); + op1 = fold_convert (otype, op1); +- expr = fold_build2 (MINUS_EXPR, otype, op0, op1); +- tree_to_aff_combination (expr, type, comb); +- return; ++ return expr_to_aff_combination (comb, icode, otype, op0, ++ op1); + } + } + } + } + break; + ++ default:; ++ } ++ ++ return false; ++} ++ ++/* Splits EXPR into an affine combination of parts. */ ++ ++void ++tree_to_aff_combination (tree expr, tree type, aff_tree *comb) ++{ ++ aff_tree tmp; ++ enum tree_code code; ++ tree core, toffset; ++ poly_int64 bitpos, bitsize, bytepos; ++ machine_mode mode; ++ int unsignedp, reversep, volatilep; ++ ++ STRIP_NOPS (expr); ++ ++ code = TREE_CODE (expr); ++ switch (code) ++ { ++ case POINTER_PLUS_EXPR: ++ case PLUS_EXPR: ++ case MINUS_EXPR: ++ case MULT_EXPR: ++ if (expr_to_aff_combination (comb, code, type, TREE_OPERAND (expr, 0), ++ TREE_OPERAND (expr, 1))) ++ return; ++ break; ++ ++ case NEGATE_EXPR: ++ case BIT_NOT_EXPR: ++ if (expr_to_aff_combination (comb, code, type, TREE_OPERAND (expr, 0))) ++ return; ++ break; ++ ++ CASE_CONVERT: ++ /* ??? TREE_TYPE (expr) should be equal to type here, but IVOPTS ++ calls this with not showing an outer widening cast. */ ++ if (expr_to_aff_combination (comb, code, ++ TREE_TYPE (expr), TREE_OPERAND (expr, 0))) ++ { ++ aff_combination_convert (comb, type); ++ return; ++ } ++ break; ++ ++ case ADDR_EXPR: ++ /* Handle &MEM[ptr + CST] which is equivalent to POINTER_PLUS_EXPR. */ ++ if (TREE_CODE (TREE_OPERAND (expr, 0)) == MEM_REF) ++ { ++ expr = TREE_OPERAND (expr, 0); ++ tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb); ++ tree_to_aff_combination (TREE_OPERAND (expr, 1), sizetype, &tmp); ++ aff_combination_add (comb, &tmp); ++ return; ++ } ++ core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos, ++ &toffset, &mode, &unsignedp, &reversep, ++ &volatilep); ++ if (!multiple_p (bitpos, BITS_PER_UNIT, &bytepos)) ++ break; ++ aff_combination_const (comb, type, bytepos); ++ if (TREE_CODE (core) == MEM_REF) ++ { ++ tree mem_offset = TREE_OPERAND (core, 1); ++ aff_combination_add_cst (comb, wi::to_poly_widest (mem_offset)); ++ core = TREE_OPERAND (core, 0); ++ } ++ else ++ core = build_fold_addr_expr (core); ++ ++ if (TREE_CODE (core) == ADDR_EXPR) ++ aff_combination_add_elt (comb, core, 1); ++ else ++ { ++ tree_to_aff_combination (core, type, &tmp); ++ aff_combination_add (comb, &tmp); ++ } ++ if (toffset) ++ { ++ tree_to_aff_combination (toffset, type, &tmp); ++ aff_combination_add (comb, &tmp); ++ } ++ return; ++ + default: + { + if (poly_int_tree_p (expr)) +@@ -665,7 +716,7 @@ aff_combination_expand (aff_tree *comb A + { + unsigned i; + aff_tree to_add, current, curre; +- tree e, rhs; ++ tree e; + gimple *def; + widest_int scale; + struct name_expansion *exp; +@@ -715,20 +766,38 @@ aff_combination_expand (aff_tree *comb A + case PLUS_EXPR: + case MINUS_EXPR: + case MULT_EXPR: ++ if (!expr_to_aff_combination (¤t, code, TREE_TYPE (name), ++ gimple_assign_rhs1 (def), ++ gimple_assign_rhs2 (def))) ++ continue; ++ break; + case NEGATE_EXPR: + case BIT_NOT_EXPR: ++ if (!expr_to_aff_combination (¤t, code, TREE_TYPE (name), ++ gimple_assign_rhs1 (def))) ++ continue; ++ break; + CASE_CONVERT: +- rhs = gimple_assign_rhs_to_tree (def); ++ if (!expr_to_aff_combination (¤t, code, TREE_TYPE (name), ++ gimple_assign_rhs1 (def))) ++ /* This makes us always expand conversions which we did ++ in the past and makes gcc.dg/tree-ssa/ivopts-lt-2.c ++ PASS, eliminating one induction variable in IVOPTs. ++ ??? But it is really excessive and we should try ++ harder to do without it. */ ++ aff_combination_elt (¤t, TREE_TYPE (name), ++ fold_convert (TREE_TYPE (name), ++ gimple_assign_rhs1 (def))); + break; + case ADDR_EXPR: + case INTEGER_CST: + case POLY_INT_CST: +- rhs = gimple_assign_rhs1 (def); ++ tree_to_aff_combination (gimple_assign_rhs1 (def), ++ TREE_TYPE (name), ¤t); + break; + default: + continue; + } +- tree_to_aff_combination (rhs, TREE_TYPE (name), ¤t); + exp = XNEW (struct name_expansion); + exp->in_progress = 1; + if (!*cache) diff --git a/fix-ICE-in-compute_live_loop_exits.patch b/fix-ICE-in-compute_live_loop_exits.patch index 013ec83..5479487 100644 --- a/fix-ICE-in-compute_live_loop_exits.patch +++ b/fix-ICE-in-compute_live_loop_exits.patch @@ -1,3 +1,9 @@ +This backport contains 1 patch from gcc main stream tree. +The commit id of these patchs list as following in the order of time. + +0001-re-PR-tree-optimization-92085-ICE-tree-check-expecte.patch +3c8e341b996546607fa1f39a0fd9a9d7c2c38214 + diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr92085-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr92085-1.c --- a/gcc/testsuite/gcc.dg/tree-ssa/pr92085-1.c 1970-01-01 08:00:00.000000000 +0800 +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr92085-1.c 2020-07-09 11:05:23.136000000 +0800 diff --git a/fix-ICE-in-eliminate_stmt.patch b/fix-ICE-in-eliminate_stmt.patch index 7cc61c5..983e193 100644 --- a/fix-ICE-in-eliminate_stmt.patch +++ b/fix-ICE-in-eliminate_stmt.patch @@ -1,12 +1,12 @@ -This backport contains 2 patchs from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-Tweak-gcc.dg-vect-bb-slp-4-01-.c-PR92366.patch -3771033244b3ee1b53a8a00d734580b16384fdd3 - -0001-tree-vect-slp.c-vect_analyze_slp_instance-Dump-const.patch -140ee00a961fda084c1b4b3f0e7e489a917858f7 - +This backport contains 2 patchs from gcc main stream tree. +The commit id of these patchs list as following in the order of time. + +0001-Tweak-gcc.dg-vect-bb-slp-4-01-.c-PR92366.patch +3771033244b3ee1b53a8a00d734580b16384fdd3 + +0001-tree-vect-slp.c-vect_analyze_slp_instance-Dump-const.patch +140ee00a961fda084c1b4b3f0e7e489a917858f7 + diff -Nurp a/gcc/testsuite/gcc.dg/vect/bb-slp-40.c b/gcc/testsuite/gcc.dg/vect/bb-slp-40.c --- a/gcc/testsuite/gcc.dg/vect/bb-slp-40.c 2020-09-14 21:24:20.899694710 +0800 +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-40.c 2020-09-15 20:54:05.456027442 +0800 diff --git a/fix-ICE-in-pass-vect.patch b/fix-ICE-in-pass-vect.patch new file mode 100644 index 0000000..38effd1 --- /dev/null +++ b/fix-ICE-in-pass-vect.patch @@ -0,0 +1,37 @@ +diff -uprN a/gcc/testsuite/gcc.target/aarch64/sve/slp_fix_1.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_fix_1.c +--- a/gcc/testsuite/gcc.target/aarch64/sve/slp_fix_1.c 1970-01-01 08:00:00.000000000 +0800 ++++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_fix_1.c 2020-11-17 02:38:45.284000000 +0800 +@@ -0,0 +1,19 @@ ++/* { dg-do compiler} */ ++/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=256 -funsafe-math-optimizations" } */ ++ ++long a, b; ++float c, e; ++float *d; ++void f() { ++ float g, h, i, j; ++ b = 0; ++ for (; b < a; b++) { ++ i = d[0]; ++ g = g + i * e; ++ j = d[1]; ++ h = h - j * e; ++ d = d + 2; ++ } ++ c = g; ++ e = h; ++} +diff -uprN a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c +--- a/gcc/tree-vect-slp.c 2020-11-16 10:59:36.000000000 +0800 ++++ b/gcc/tree-vect-slp.c 2020-11-16 23:30:19.560000000 +0800 +@@ -4140,8 +4140,8 @@ vect_schedule_slp_instance (slp_tree nod + gimple *vstmt; + vstmt = gimple_build_assign (make_ssa_name (vectype), + VEC_PERM_EXPR, +- gimple_assign_lhs (v0[j]->stmt), +- gimple_assign_lhs (v1[j]->stmt), ++ gimple_get_lhs (v0[j]->stmt), ++ gimple_get_lhs (v1[j]->stmt), + tmask); + SLP_TREE_VEC_STMTS (node).quick_push + (vect_finish_stmt_generation (stmt_info, vstmt, &si)); diff --git a/fix-ICE-in-vect_create_epilog_for_reduction.patch b/fix-ICE-in-vect_create_epilog_for_reduction.patch index fef451b..7abf6aa 100644 --- a/fix-ICE-in-vect_create_epilog_for_reduction.patch +++ b/fix-ICE-in-vect_create_epilog_for_reduction.patch @@ -1,3 +1,9 @@ +This backport contains 1 patch from gcc main stream tree. +The commit id of these patchs list as following in the order of time. + +0001-Don-t-assign-a-cost-to-vectorizable_assignment.patch +e4020b28d02a00d478a3a769855ae6a8d9cc6b26 + diff -Nurp a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c --- a/gcc/tree-vect-loop.c 2020-07-09 10:42:35.824000000 +0800 +++ b/gcc/tree-vect-loop.c 2020-07-09 10:43:23.920000000 +0800 diff --git a/fix-ICE-in-vect_stmt_to_vectorize.patch b/fix-ICE-in-vect_stmt_to_vectorize.patch index 67c9818..80229b5 100644 --- a/fix-ICE-in-vect_stmt_to_vectorize.patch +++ b/fix-ICE-in-vect_stmt_to_vectorize.patch @@ -1,3 +1,9 @@ +This backport contains 1 patch from gcc main stream tree. +The commit id of these patchs list as following in the order of time. + +0001-re-PR-tree-optimization-92252-ICE-Segmentation-fault.patch +97c6bea819ec0a773041308e62a7c05c33f093b0 + diff -Nurp a/gcc/testsuite/gcc.dg/torture/pr92252.c b/gcc/testsuite/gcc.dg/torture/pr92252.c --- a/gcc/testsuite/gcc.dg/torture/pr92252.c 1970-01-01 08:00:00.000000000 +0800 +++ b/gcc/testsuite/gcc.dg/torture/pr92252.c 2020-07-03 10:39:44.808000000 +0800 diff --git a/fix-ICE-in-vect_update_misalignment_for_peel.patch b/fix-ICE-in-vect_update_misalignment_for_peel.patch new file mode 100644 index 0000000..30a9548 --- /dev/null +++ b/fix-ICE-in-vect_update_misalignment_for_peel.patch @@ -0,0 +1,784 @@ +This backport contains 5 patchs from gcc main stream tree. +The commit id of these patchs list as following in the order of time. + +8801ca5c28c3a9e9f36fa39a6a4455b48c8221fa +9ac1403ca2c65ba4f28cf051b5326617fa9298d1 +7e99af4816cfad578094fcf08e2377f3ed76e201 +ef8777c14ce8694f53eab7a88d24513cbf541ba4 +dccbf1e2a6e544f71b4a5795f0c79015db019fc3 + + +diff -Nurp a/gcc/testsuite/gcc.dg/vect/pr92677.c b/gcc/testsuite/gcc.dg/vect/pr92677.c +--- a/gcc/testsuite/gcc.dg/vect/pr92677.c 1970-01-01 08:00:00.000000000 +0800 ++++ b/gcc/testsuite/gcc.dg/vect/pr92677.c 2020-10-26 18:31:50.980000000 +0800 +@@ -0,0 +1,26 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-O3" } */ ++ ++int a, c; ++int *b; ++long d; ++double *e; ++ ++void fn1() { ++ long f; ++ double g, h; ++ while (c) { ++ if (d) { ++ g = *e; ++ *(b + 4) = g; ++ } ++ if (f) { ++ h = *(e + 2); ++ *(b + 6) = h; ++ } ++ e += a; ++ b += 8; ++ c--; ++ d += 2; ++ } ++} +diff -Nurp a/gcc/testsuite/gcc.dg/vect/slp-46.c b/gcc/testsuite/gcc.dg/vect/slp-46.c +--- a/gcc/testsuite/gcc.dg/vect/slp-46.c 1970-01-01 08:00:00.000000000 +0800 ++++ b/gcc/testsuite/gcc.dg/vect/slp-46.c 2020-10-26 18:31:56.512000000 +0800 +@@ -0,0 +1,96 @@ ++/* { dg-require-effective-target vect_double } */ ++ ++#include "tree-vect.h" ++ ++double x[1024], y[1024]; ++ ++void __attribute__((noipa)) foo() ++{ ++ for (int i = 0; i < 512; ++i) ++ { ++ x[2*i] = y[i]; ++ x[2*i+1] = y[i]; ++ } ++} ++ ++void __attribute__((noipa)) bar() ++{ ++ for (int i = 0; i < 512; ++i) ++ { ++ x[2*i] = y[2*i]; ++ x[2*i+1] = y[2*i]; ++ } ++} ++ ++void __attribute__((noipa)) baz() ++{ ++ for (int i = 0; i < 512; ++i) ++ { ++ x[2*i] = y[511-i]; ++ x[2*i+1] = y[511-i]; ++ } ++} ++ ++void __attribute__((noipa)) boo() ++{ ++ for (int i = 0; i < 512; ++i) ++ { ++ x[2*i] = y[2*(511-i)]; ++ x[2*i+1] = y[2*(511-i)]; ++ } ++} ++ ++int ++main () ++{ ++ check_vect (); ++ ++ for (int i = 0; i < 1024; ++i) ++ { ++ x[i] = 0; ++ y[i] = i; ++ __asm__ volatile (""); ++ } ++ ++ foo (); ++ for (int i = 0; i < 1024; ++i) ++ if (x[i] != y[i/2]) ++ abort (); ++ ++ for (int i = 0; i < 1024; ++i) ++ { ++ x[i] = 0; ++ __asm__ volatile (""); ++ } ++ ++ bar (); ++ for (int i = 0; i < 1024; ++i) ++ if (x[i] != y[2*(i/2)]) ++ abort (); ++ ++ for (int i = 0; i < 1024; ++i) ++ { ++ x[i] = 0; ++ __asm__ volatile (""); ++ } ++ ++ baz (); ++ for (int i = 0; i < 1024; ++i) ++ if (x[i] != y[511 - i/2]) ++ abort (); ++ ++ for (int i = 0; i < 1024; ++i) ++ { ++ x[i] = 0; ++ __asm__ volatile (""); ++ } ++ ++ boo (); ++ for (int i = 0; i < 1024; ++i) ++ if (x[i] != y[2*(511 - i/2)]) ++ abort (); ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */ +diff -Nurp a/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-5.c b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-5.c +--- a/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-5.c 1970-01-01 08:00:00.000000000 +0800 ++++ b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-5.c 2020-10-26 18:31:53.584000000 +0800 +@@ -0,0 +1,36 @@ ++#include "tree-vect.h" ++ ++#define N 512 ++ ++int a[N], b[N]; ++ ++int __attribute__((noipa)) ++foo (int aval, int bval) ++{ ++ int i, res = 0; ++ for (i=0; inum, + chrec_convert (type, evol, at_stmt), + code, rhs1, at_stmt); +- res = follow_ssa_edge +- (loop, SSA_NAME_DEF_STMT (rhs0), halting_phi, &evol, limit); ++ res = follow_ssa_edge_expr ++ (loop, at_stmt, rhs0, halting_phi, &evol, limit); + if (res == t_true) + *evolution_of_loop = evol; + else if (res == t_false) +@@ -979,8 +979,8 @@ follow_ssa_edge_binary (struct loop *loo + (loop->num, + chrec_convert (type, *evolution_of_loop, at_stmt), + code, rhs0, at_stmt); +- res = follow_ssa_edge +- (loop, SSA_NAME_DEF_STMT (rhs1), halting_phi, ++ res = follow_ssa_edge_expr ++ (loop, at_stmt, rhs1, halting_phi, + evolution_of_loop, limit); + if (res == t_true) + ; +@@ -1000,8 +1000,8 @@ follow_ssa_edge_binary (struct loop *loo + (loop->num, chrec_convert (type, *evolution_of_loop, + at_stmt), + code, rhs1, at_stmt); +- res = follow_ssa_edge +- (loop, SSA_NAME_DEF_STMT (rhs0), halting_phi, ++ res = follow_ssa_edge_expr ++ (loop, at_stmt, rhs0, halting_phi, + evolution_of_loop, limit); + if (res == t_true) + ; +@@ -1018,8 +1018,8 @@ follow_ssa_edge_binary (struct loop *loo + (loop->num, chrec_convert (type, *evolution_of_loop, + at_stmt), + code, rhs0, at_stmt); +- res = follow_ssa_edge +- (loop, SSA_NAME_DEF_STMT (rhs1), halting_phi, ++ res = follow_ssa_edge_expr ++ (loop, at_stmt, rhs1, halting_phi, + evolution_of_loop, limit); + if (res == t_true) + ; +@@ -1050,8 +1050,8 @@ follow_ssa_edge_binary (struct loop *loo + *evolution_of_loop = add_to_evolution + (loop->num, chrec_convert (type, *evolution_of_loop, at_stmt), + MINUS_EXPR, rhs1, at_stmt); +- res = follow_ssa_edge (loop, SSA_NAME_DEF_STMT (rhs0), halting_phi, +- evolution_of_loop, limit); ++ res = follow_ssa_edge_expr (loop, at_stmt, rhs0, halting_phi, ++ evolution_of_loop, limit); + if (res == t_true) + ; + else if (res == t_dont_know) +@@ -1071,140 +1071,6 @@ follow_ssa_edge_binary (struct loop *loo + return res; + } + +-/* Follow the ssa edge into the expression EXPR. +- Return true if the strongly connected component has been found. */ +- +-static t_bool +-follow_ssa_edge_expr (struct loop *loop, gimple *at_stmt, tree expr, +- gphi *halting_phi, tree *evolution_of_loop, +- int limit) +-{ +- enum tree_code code = TREE_CODE (expr); +- tree type = TREE_TYPE (expr), rhs0, rhs1; +- t_bool res; +- +- /* The EXPR is one of the following cases: +- - an SSA_NAME, +- - an INTEGER_CST, +- - a PLUS_EXPR, +- - a POINTER_PLUS_EXPR, +- - a MINUS_EXPR, +- - an ASSERT_EXPR, +- - other cases are not yet handled. */ +- +- switch (code) +- { +- CASE_CONVERT: +- /* This assignment is under the form "a_1 = (cast) rhs. */ +- res = follow_ssa_edge_expr (loop, at_stmt, TREE_OPERAND (expr, 0), +- halting_phi, evolution_of_loop, limit); +- *evolution_of_loop = chrec_convert (type, *evolution_of_loop, at_stmt); +- break; +- +- case INTEGER_CST: +- /* This assignment is under the form "a_1 = 7". */ +- res = t_false; +- break; +- +- case SSA_NAME: +- /* This assignment is under the form: "a_1 = b_2". */ +- res = follow_ssa_edge +- (loop, SSA_NAME_DEF_STMT (expr), halting_phi, evolution_of_loop, limit); +- break; +- +- case POINTER_PLUS_EXPR: +- case PLUS_EXPR: +- case MINUS_EXPR: +- /* This case is under the form "rhs0 +- rhs1". */ +- rhs0 = TREE_OPERAND (expr, 0); +- rhs1 = TREE_OPERAND (expr, 1); +- type = TREE_TYPE (rhs0); +- STRIP_USELESS_TYPE_CONVERSION (rhs0); +- STRIP_USELESS_TYPE_CONVERSION (rhs1); +- res = follow_ssa_edge_binary (loop, at_stmt, type, rhs0, code, rhs1, +- halting_phi, evolution_of_loop, limit); +- break; +- +- case ADDR_EXPR: +- /* Handle &MEM[ptr + CST] which is equivalent to POINTER_PLUS_EXPR. */ +- if (TREE_CODE (TREE_OPERAND (expr, 0)) == MEM_REF) +- { +- expr = TREE_OPERAND (expr, 0); +- rhs0 = TREE_OPERAND (expr, 0); +- rhs1 = TREE_OPERAND (expr, 1); +- type = TREE_TYPE (rhs0); +- STRIP_USELESS_TYPE_CONVERSION (rhs0); +- STRIP_USELESS_TYPE_CONVERSION (rhs1); +- res = follow_ssa_edge_binary (loop, at_stmt, type, +- rhs0, POINTER_PLUS_EXPR, rhs1, +- halting_phi, evolution_of_loop, limit); +- } +- else +- res = t_false; +- break; +- +- case ASSERT_EXPR: +- /* This assignment is of the form: "a_1 = ASSERT_EXPR " +- It must be handled as a copy assignment of the form a_1 = a_2. */ +- rhs0 = ASSERT_EXPR_VAR (expr); +- if (TREE_CODE (rhs0) == SSA_NAME) +- res = follow_ssa_edge (loop, SSA_NAME_DEF_STMT (rhs0), +- halting_phi, evolution_of_loop, limit); +- else +- res = t_false; +- break; +- +- default: +- res = t_false; +- break; +- } +- +- return res; +-} +- +-/* Follow the ssa edge into the right hand side of an assignment STMT. +- Return true if the strongly connected component has been found. */ +- +-static t_bool +-follow_ssa_edge_in_rhs (struct loop *loop, gimple *stmt, +- gphi *halting_phi, tree *evolution_of_loop, +- int limit) +-{ +- enum tree_code code = gimple_assign_rhs_code (stmt); +- tree type = gimple_expr_type (stmt), rhs1, rhs2; +- t_bool res; +- +- switch (code) +- { +- CASE_CONVERT: +- /* This assignment is under the form "a_1 = (cast) rhs. */ +- res = follow_ssa_edge_expr (loop, stmt, gimple_assign_rhs1 (stmt), +- halting_phi, evolution_of_loop, limit); +- *evolution_of_loop = chrec_convert (type, *evolution_of_loop, stmt); +- break; +- +- case POINTER_PLUS_EXPR: +- case PLUS_EXPR: +- case MINUS_EXPR: +- rhs1 = gimple_assign_rhs1 (stmt); +- rhs2 = gimple_assign_rhs2 (stmt); +- type = TREE_TYPE (rhs1); +- res = follow_ssa_edge_binary (loop, stmt, type, rhs1, code, rhs2, +- halting_phi, evolution_of_loop, limit); +- break; +- +- default: +- if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS) +- res = follow_ssa_edge_expr (loop, stmt, gimple_assign_rhs1 (stmt), +- halting_phi, evolution_of_loop, limit); +- else +- res = t_false; +- break; +- } +- +- return res; +-} +- + /* Checks whether the I-th argument of a PHI comes from a backedge. */ + + static bool +@@ -1244,8 +1110,8 @@ follow_ssa_edge_in_condition_phi_branch + if (TREE_CODE (branch) == SSA_NAME) + { + *evolution_of_branch = init_cond; +- return follow_ssa_edge (loop, SSA_NAME_DEF_STMT (branch), halting_phi, +- evolution_of_branch, limit); ++ return follow_ssa_edge_expr (loop, condition_phi, branch, halting_phi, ++ evolution_of_branch, limit); + } + + /* This case occurs when one of the condition branches sets +@@ -1352,65 +1218,158 @@ follow_ssa_edge_inner_loop_phi (struct l + evolution_of_loop, limit); + } + +-/* Follow an SSA edge from a loop-phi-node to itself, constructing a +- path that is analyzed on the return walk. */ ++/* Follow the ssa edge into the expression EXPR. ++ Return true if the strongly connected component has been found. */ + + static t_bool +-follow_ssa_edge (struct loop *loop, gimple *def, gphi *halting_phi, +- tree *evolution_of_loop, int limit) ++follow_ssa_edge_expr (struct loop *loop, gimple *at_stmt, tree expr, ++ gphi *halting_phi, tree *evolution_of_loop, ++ int limit) + { +- struct loop *def_loop; ++ enum tree_code code; ++ tree type, rhs0, rhs1 = NULL_TREE; + +- if (gimple_nop_p (def)) +- return t_false; ++ /* The EXPR is one of the following cases: ++ - an SSA_NAME, ++ - an INTEGER_CST, ++ - a PLUS_EXPR, ++ - a POINTER_PLUS_EXPR, ++ - a MINUS_EXPR, ++ - an ASSERT_EXPR, ++ - other cases are not yet handled. */ + +- /* Give up if the path is longer than the MAX that we allow. */ +- if (limit > PARAM_VALUE (PARAM_SCEV_MAX_EXPR_COMPLEXITY)) +- return t_dont_know; +- +- def_loop = loop_containing_stmt (def); +- +- switch (gimple_code (def)) +- { +- case GIMPLE_PHI: +- if (!loop_phi_node_p (def)) +- /* DEF is a condition-phi-node. Follow the branches, and +- record their evolutions. Finally, merge the collected +- information and set the approximation to the main +- variable. */ +- return follow_ssa_edge_in_condition_phi +- (loop, as_a (def), halting_phi, evolution_of_loop, +- limit); +- +- /* When the analyzed phi is the halting_phi, the +- depth-first search is over: we have found a path from +- the halting_phi to itself in the loop. */ +- if (def == halting_phi) +- return t_true; ++ /* For SSA_NAME look at the definition statement, handling ++ PHI nodes and otherwise expand appropriately for the expression ++ handling below. */ ++ if (TREE_CODE (expr) == SSA_NAME) ++ { ++ gimple *def = SSA_NAME_DEF_STMT (expr); + +- /* Otherwise, the evolution of the HALTING_PHI depends +- on the evolution of another loop-phi-node, i.e. the +- evolution function is a higher degree polynomial. */ +- if (def_loop == loop) ++ if (gimple_nop_p (def)) + return t_false; + +- /* Inner loop. */ +- if (flow_loop_nested_p (loop, def_loop)) +- return follow_ssa_edge_inner_loop_phi +- (loop, as_a (def), halting_phi, evolution_of_loop, +- limit + 1); ++ /* Give up if the path is longer than the MAX that we allow. */ ++ if (limit > PARAM_VALUE (PARAM_SCEV_MAX_EXPR_COMPLEXITY)) ++ return t_dont_know; + +- /* Outer loop. */ +- return t_false; ++ if (gphi *phi = dyn_cast (def)) ++ { ++ if (!loop_phi_node_p (phi)) ++ /* DEF is a condition-phi-node. Follow the branches, and ++ record their evolutions. Finally, merge the collected ++ information and set the approximation to the main ++ variable. */ ++ return follow_ssa_edge_in_condition_phi ++ (loop, phi, halting_phi, evolution_of_loop, limit); ++ ++ /* When the analyzed phi is the halting_phi, the ++ depth-first search is over: we have found a path from ++ the halting_phi to itself in the loop. */ ++ if (phi == halting_phi) ++ return t_true; ++ ++ /* Otherwise, the evolution of the HALTING_PHI depends ++ on the evolution of another loop-phi-node, i.e. the ++ evolution function is a higher degree polynomial. */ ++ class loop *def_loop = loop_containing_stmt (def); ++ if (def_loop == loop) ++ return t_false; ++ ++ /* Inner loop. */ ++ if (flow_loop_nested_p (loop, def_loop)) ++ return follow_ssa_edge_inner_loop_phi ++ (loop, phi, halting_phi, evolution_of_loop, ++ limit + 1); + +- case GIMPLE_ASSIGN: +- return follow_ssa_edge_in_rhs (loop, def, halting_phi, +- evolution_of_loop, limit); ++ /* Outer loop. */ ++ return t_false; ++ } + +- default: + /* At this level of abstraction, the program is just a set + of GIMPLE_ASSIGNs and PHI_NODEs. In principle there is no +- other node to be handled. */ ++ other def to be handled. */ ++ if (!is_gimple_assign (def)) ++ return t_false; ++ ++ code = gimple_assign_rhs_code (def); ++ switch (get_gimple_rhs_class (code)) ++ { ++ case GIMPLE_BINARY_RHS: ++ rhs0 = gimple_assign_rhs1 (def); ++ rhs1 = gimple_assign_rhs2 (def); ++ break; ++ case GIMPLE_UNARY_RHS: ++ case GIMPLE_SINGLE_RHS: ++ rhs0 = gimple_assign_rhs1 (def); ++ break; ++ default: ++ return t_false; ++ } ++ type = TREE_TYPE (gimple_assign_lhs (def)); ++ at_stmt = def; ++ } ++ else ++ { ++ code = TREE_CODE (expr); ++ type = TREE_TYPE (expr); ++ switch (code) ++ { ++ CASE_CONVERT: ++ rhs0 = TREE_OPERAND (expr, 0); ++ break; ++ case POINTER_PLUS_EXPR: ++ case PLUS_EXPR: ++ case MINUS_EXPR: ++ rhs0 = TREE_OPERAND (expr, 0); ++ rhs1 = TREE_OPERAND (expr, 1); ++ break; ++ default: ++ rhs0 = expr; ++ } ++ } ++ ++ switch (code) ++ { ++ CASE_CONVERT: ++ { ++ /* This assignment is under the form "a_1 = (cast) rhs. */ ++ t_bool res = follow_ssa_edge_expr (loop, at_stmt, rhs0, halting_phi, ++ evolution_of_loop, limit); ++ *evolution_of_loop = chrec_convert (type, *evolution_of_loop, at_stmt); ++ return res; ++ } ++ ++ case INTEGER_CST: ++ /* This assignment is under the form "a_1 = 7". */ ++ return t_false; ++ ++ case ADDR_EXPR: ++ { ++ /* Handle &MEM[ptr + CST] which is equivalent to POINTER_PLUS_EXPR. */ ++ if (TREE_CODE (TREE_OPERAND (rhs0, 0)) != MEM_REF) ++ return t_false; ++ tree mem = TREE_OPERAND (rhs0, 0); ++ rhs0 = TREE_OPERAND (mem, 0); ++ rhs1 = TREE_OPERAND (mem, 1); ++ code = POINTER_PLUS_EXPR; ++ } ++ /* Fallthru. */ ++ case POINTER_PLUS_EXPR: ++ case PLUS_EXPR: ++ case MINUS_EXPR: ++ /* This case is under the form "rhs0 +- rhs1". */ ++ STRIP_USELESS_TYPE_CONVERSION (rhs0); ++ STRIP_USELESS_TYPE_CONVERSION (rhs1); ++ return follow_ssa_edge_binary (loop, at_stmt, type, rhs0, code, rhs1, ++ halting_phi, evolution_of_loop, limit); ++ ++ case ASSERT_EXPR: ++ /* This assignment is of the form: "a_1 = ASSERT_EXPR " ++ It must be handled as a copy assignment of the form a_1 = a_2. */ ++ return follow_ssa_edge_expr (loop, at_stmt, ASSERT_EXPR_VAR (rhs0), ++ halting_phi, evolution_of_loop, limit); ++ ++ default: + return t_false; + } + } +@@ -1504,7 +1463,6 @@ analyze_evolution_in_loop (gphi *loop_ph + for (i = 0; i < n; i++) + { + tree arg = PHI_ARG_DEF (loop_phi_node, i); +- gimple *ssa_chain; + tree ev_fn; + t_bool res; + +@@ -1517,11 +1475,10 @@ analyze_evolution_in_loop (gphi *loop_ph + { + bool val = false; + +- ssa_chain = SSA_NAME_DEF_STMT (arg); +- + /* Pass in the initial condition to the follow edge function. */ + ev_fn = init_cond; +- res = follow_ssa_edge (loop, ssa_chain, loop_phi_node, &ev_fn, 0); ++ res = follow_ssa_edge_expr (loop, loop_phi_node, arg, ++ loop_phi_node, &ev_fn, 0); + + /* If ev_fn has no evolution in the inner loop, and the + init_cond is not equal to ev_fn, then we have an +diff -Nurp a/gcc/tree-ssa-sccvn.c b/gcc/tree-ssa-sccvn.c +--- a/gcc/tree-ssa-sccvn.c 2020-10-26 18:28:58.736000000 +0800 ++++ b/gcc/tree-ssa-sccvn.c 2020-10-26 18:31:45.768000000 +0800 +@@ -2456,7 +2456,8 @@ vn_reference_lookup_3 (ao_ref *ref, tree + (vuse, vr->set, vr->type, vr->operands, val); + } + /* For now handle clearing memory with partial defs. */ +- else if (integer_zerop (gimple_call_arg (def_stmt, 1)) ++ else if (known_eq (ref->size, maxsize) ++ && integer_zerop (gimple_call_arg (def_stmt, 1)) + && tree_to_poly_int64 (len).is_constant (&leni) + && offset.is_constant (&offseti) + && offset2.is_constant (&offset2i) +@@ -2494,7 +2495,8 @@ vn_reference_lookup_3 (ao_ref *ref, tree + return vn_reference_lookup_or_insert_for_pieces + (vuse, vr->set, vr->type, vr->operands, val); + } +- else if (maxsize.is_constant (&maxsizei) ++ else if (known_eq (ref->size, maxsize) ++ && maxsize.is_constant (&maxsizei) + && maxsizei % BITS_PER_UNIT == 0 + && offset.is_constant (&offseti) + && offseti % BITS_PER_UNIT == 0 +diff -Nurp a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c +--- a/gcc/tree-vect-data-refs.c 2020-10-26 18:28:58.792000000 +0800 ++++ b/gcc/tree-vect-data-refs.c 2020-10-26 18:31:56.512000000 +0800 +@@ -1045,7 +1045,7 @@ vect_compute_data_ref_alignment (dr_vec_ + if (tree_int_cst_sgn (drb->step) < 0) + /* PLUS because STEP is negative. */ + misalignment += ((TYPE_VECTOR_SUBPARTS (vectype) - 1) +- * TREE_INT_CST_LOW (drb->step)); ++ * -TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))); + + unsigned int const_misalignment; + if (!known_misalignment (misalignment, vect_align_c, &const_misalignment)) +diff -Nurp a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c +--- a/gcc/tree-vect-loop.c 2020-10-26 18:28:58.728000000 +0800 ++++ b/gcc/tree-vect-loop.c 2020-10-26 18:31:53.584000000 +0800 +@@ -1850,7 +1850,10 @@ vect_dissolve_slp_only_groups (loop_vec_ + DR_GROUP_FIRST_ELEMENT (vinfo) = vinfo; + DR_GROUP_NEXT_ELEMENT (vinfo) = NULL; + DR_GROUP_SIZE (vinfo) = 1; +- DR_GROUP_GAP (vinfo) = group_size - 1; ++ if (STMT_VINFO_STRIDED_P (first_element)) ++ DR_GROUP_GAP (vinfo) = 0; ++ else ++ DR_GROUP_GAP (vinfo) = group_size - 1; + vinfo = next; + } + } +@@ -4516,18 +4519,26 @@ vect_create_epilog_for_reduction (stmt_v + zeroes. */ + if (STMT_VINFO_REDUC_TYPE (reduc_info) == COND_REDUCTION) + { ++ auto_vec, 2> ccompares; + stmt_vec_info cond_info = STMT_VINFO_REDUC_DEF (reduc_info); + cond_info = vect_stmt_to_vectorize (cond_info); +- while (gimple_assign_rhs_code (cond_info->stmt) != COND_EXPR) ++ while (cond_info != reduc_info) + { ++ if (gimple_assign_rhs_code (cond_info->stmt) == COND_EXPR) ++ { ++ gimple *vec_stmt = STMT_VINFO_VEC_STMT (cond_info)->stmt; ++ gcc_assert (gimple_assign_rhs_code (vec_stmt) == VEC_COND_EXPR); ++ ccompares.safe_push ++ (std::make_pair (unshare_expr (gimple_assign_rhs1 (vec_stmt)), ++ STMT_VINFO_REDUC_IDX (cond_info) == 2)); ++ } + cond_info + = loop_vinfo->lookup_def (gimple_op (cond_info->stmt, + 1 + STMT_VINFO_REDUC_IDX + (cond_info))); + cond_info = vect_stmt_to_vectorize (cond_info); + } +- gimple *vec_stmt = STMT_VINFO_VEC_STMT (cond_info)->stmt; +- gcc_assert (gimple_assign_rhs_code (vec_stmt) == VEC_COND_EXPR); ++ gcc_assert (ccompares.length () != 0); + + tree indx_before_incr, indx_after_incr; + poly_uint64 nunits_out = TYPE_VECTOR_SUBPARTS (vectype); +@@ -4569,37 +4580,35 @@ vect_create_epilog_for_reduction (stmt_v + add_phi_arg (as_a (new_phi), vec_zero, + loop_preheader_edge (loop), UNKNOWN_LOCATION); + +- /* Now take the condition from the loops original cond_expr +- (VEC_STMT) and produce a new cond_expr (INDEX_COND_EXPR) which for ++ /* Now take the condition from the loops original cond_exprs ++ and produce a new cond_exprs (INDEX_COND_EXPR) which for + every match uses values from the induction variable + (INDEX_BEFORE_INCR) otherwise uses values from the phi node + (NEW_PHI_TREE). + Finally, we update the phi (NEW_PHI_TREE) to take the value of + the new cond_expr (INDEX_COND_EXPR). */ +- +- /* Duplicate the condition from vec_stmt. */ +- tree ccompare = unshare_expr (gimple_assign_rhs1 (vec_stmt)); +- +- /* Create a conditional, where the condition is taken from vec_stmt +- (CCOMPARE). The then and else values mirror the main VEC_COND_EXPR: +- the reduction phi corresponds to NEW_PHI_TREE and the new values +- correspond to INDEX_BEFORE_INCR. */ +- gcc_assert (STMT_VINFO_REDUC_IDX (cond_info) >= 1); +- tree index_cond_expr; +- if (STMT_VINFO_REDUC_IDX (cond_info) == 2) +- index_cond_expr = build3 (VEC_COND_EXPR, cr_index_vector_type, +- ccompare, indx_before_incr, new_phi_tree); +- else +- index_cond_expr = build3 (VEC_COND_EXPR, cr_index_vector_type, +- ccompare, new_phi_tree, indx_before_incr); +- induction_index = make_ssa_name (cr_index_vector_type); +- gimple *index_condition = gimple_build_assign (induction_index, +- index_cond_expr); +- gsi_insert_before (&incr_gsi, index_condition, GSI_SAME_STMT); +- stmt_vec_info index_vec_info = loop_vinfo->add_stmt (index_condition); ++ gimple_seq stmts = NULL; ++ for (int i = ccompares.length () - 1; i != -1; --i) ++ { ++ tree ccompare = ccompares[i].first; ++ if (ccompares[i].second) ++ new_phi_tree = gimple_build (&stmts, VEC_COND_EXPR, ++ cr_index_vector_type, ++ ccompare, ++ indx_before_incr, new_phi_tree); ++ else ++ new_phi_tree = gimple_build (&stmts, VEC_COND_EXPR, ++ cr_index_vector_type, ++ ccompare, ++ new_phi_tree, indx_before_incr); ++ } ++ gsi_insert_seq_before (&incr_gsi, stmts, GSI_SAME_STMT); ++ stmt_vec_info index_vec_info ++ = loop_vinfo->add_stmt (SSA_NAME_DEF_STMT (new_phi_tree)); + STMT_VINFO_VECTYPE (index_vec_info) = cr_index_vector_type; + + /* Update the phi with the vec cond. */ ++ induction_index = new_phi_tree; + add_phi_arg (as_a (new_phi), induction_index, + loop_latch_edge (loop), UNKNOWN_LOCATION); + } diff --git a/fix-ICE-in-verify_ssa.patch b/fix-ICE-in-verify_ssa.patch index 056c276..fa48fcb 100644 --- a/fix-ICE-in-verify_ssa.patch +++ b/fix-ICE-in-verify_ssa.patch @@ -1,3 +1,9 @@ +This backport contains 1 patch from gcc main stream tree. +The commit id of these patchs list as following in the order of time. + +0001-re-PR-tree-optimization-92461-ICE-verify_ssa-failed-.patch +830d1b18526dd1f085e8a2e1467a6dde18fc6434 + diff -Nurp a/gcc/testsuite/gcc.dg/torture/pr92461.c b/gcc/testsuite/gcc.dg/torture/pr92461.c --- a/gcc/testsuite/gcc.dg/torture/pr92461.c 1970-01-01 08:00:00.000000000 +0800 +++ b/gcc/testsuite/gcc.dg/torture/pr92461.c 2020-07-28 19:48:09.324000000 +0800 diff --git a/fix-ICE-when-vectorizing-nested-cycles.patch b/fix-ICE-when-vectorizing-nested-cycles.patch index d8a5b69..2aa3f46 100644 --- a/fix-ICE-when-vectorizing-nested-cycles.patch +++ b/fix-ICE-when-vectorizing-nested-cycles.patch @@ -1,3 +1,9 @@ +This backport contains 1 patch from gcc main stream tree. +The commit id of these patchs list as following in the order of time. + +0001-tree-optimization-96698-fix-ICE-when-vectorizing-nes.patch +2130efe6ac7beba72d289e3dd145daa10aeaed54 + diff -uprN a/gcc/testsuite/gcc.dg/vect/pr96698.c b/gcc/testsuite/gcc.dg/vect/pr96698.c --- a/gcc/testsuite/gcc.dg/vect/pr96698.c 1970-01-01 08:00:00.000000000 +0800 +++ b/gcc/testsuite/gcc.dg/vect/pr96698.c 2020-08-27 17:53:24.396000000 +0800 diff --git a/fix-PR-92351-When-peeling-for-alignment.patch b/fix-PR-92351-When-peeling-for-alignment.patch new file mode 100644 index 0000000..88866e6 --- /dev/null +++ b/fix-PR-92351-When-peeling-for-alignment.patch @@ -0,0 +1,152 @@ +This backport contains 1 patch from gcc main stream tree. +The commit id of these patchs list as following in the order of time. + +0001-vect-PR-92351-When-peeling-for-alignment-make-alignm.patch +4e9d58d16767b1bc686f0c4b3bd2da25dc71e8f3 + +diff --git a/gcc/testsuite/gcc.dg/vect/vect-peel-2-epilogues.c b/gcc/testsuite/gcc.dg/vect/vect-peel-2-epilogues.c +new file mode 100644 +index 00000000000..c06fa442faf +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/vect/vect-peel-2-epilogues.c +@@ -0,0 +1,3 @@ ++/* { dg-require-effective-target vect_int } */ ++ ++#include "vect-peel-2-src.c" +diff --git a/gcc/testsuite/gcc.dg/vect/vect-peel-2-src.c b/gcc/testsuite/gcc.dg/vect/vect-peel-2-src.c +new file mode 100644 +index 00000000000..f6fc134c870 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/vect/vect-peel-2-src.c +@@ -0,0 +1,48 @@ ++#include ++#include "tree-vect.h" ++ ++#define N 128 ++ ++/* unaligned store. */ ++ ++int ib[N+7]; ++ ++__attribute__ ((noinline)) ++int main1 () ++{ ++ int i; ++ int ia[N+1]; ++ ++ /* The store is aligned and the loads are misaligned with the same ++ misalignment. Cost model is disabled. If misaligned stores are supported, ++ we peel according to the loads to align them. */ ++ for (i = 0; i <= N; i++) ++ { ++ ia[i] = ib[i+2] + ib[i+6]; ++ } ++ ++ /* check results: */ ++ for (i = 1; i <= N; i++) ++ { ++ if (ia[i] != ib[i+2] + ib[i+6]) ++ abort (); ++ } ++ ++ return 0; ++} ++ ++int main (void) ++{ ++ int i; ++ ++ check_vect (); ++ ++ for (i = 0; i <= N+6; i++) ++ { ++ asm volatile ("" : "+r" (i)); ++ ib[i] = i; ++ } ++ ++ return main1 (); ++} ++ +diff --git a/gcc/testsuite/gcc.dg/vect/vect-peel-2.c b/gcc/testsuite/gcc.dg/vect/vect-peel-2.c +index b6061c3b855..65e70bd4417 100644 +--- a/gcc/testsuite/gcc.dg/vect/vect-peel-2.c ++++ b/gcc/testsuite/gcc.dg/vect/vect-peel-2.c +@@ -1,52 +1,8 @@ + /* { dg-require-effective-target vect_int } */ ++/* Disabling epilogues until we find a better way to deal with scans. */ ++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */ + +-#include +-#include "tree-vect.h" +- +-#define N 128 +- +-/* unaligned store. */ +- +-int ib[N+7]; +- +-__attribute__ ((noinline)) +-int main1 () +-{ +- int i; +- int ia[N+1]; +- +- /* The store is aligned and the loads are misaligned with the same +- misalignment. Cost model is disabled. If misaligned stores are supported, +- we peel according to the loads to align them. */ +- for (i = 0; i <= N; i++) +- { +- ia[i] = ib[i+2] + ib[i+6]; +- } +- +- /* check results: */ +- for (i = 1; i <= N; i++) +- { +- if (ia[i] != ib[i+2] + ib[i+6]) +- abort (); +- } +- +- return 0; +-} +- +-int main (void) +-{ +- int i; +- +- check_vect (); +- +- for (i = 0; i <= N+6; i++) +- { +- asm volatile ("" : "+r" (i)); +- ib[i] = i; +- } +- +- return main1 (); +-} ++#include "vect-peel-2-src.c" + + /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ + /* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { target { { vect_element_align } && { vect_aligned_arrays } } } } } */ +diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c +index 36639b697f1..88f14e73d65 100644 +--- a/gcc/tree-vect-data-refs.c ++++ b/gcc/tree-vect-data-refs.c +@@ -938,6 +938,18 @@ vect_compute_data_ref_alignment (dr_vec_info *dr_info) + = exact_div (vect_calculate_target_alignment (dr_info), BITS_PER_UNIT); + DR_TARGET_ALIGNMENT (dr_info) = vector_alignment; + ++ /* If the main loop has peeled for alignment we have no way of knowing ++ whether the data accesses in the epilogues are aligned. We can't at ++ compile time answer the question whether we have entered the main loop or ++ not. Fixes PR 92351. */ ++ if (loop_vinfo) ++ { ++ loop_vec_info orig_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo); ++ if (orig_loop_vinfo ++ && LOOP_VINFO_PEELING_FOR_ALIGNMENT (orig_loop_vinfo) != 0) ++ return; ++ } ++ + unsigned HOST_WIDE_INT vect_align_c; + if (!vector_alignment.is_constant (&vect_align_c)) + return; diff --git a/fix-addlosymdi-ICE-in-pass-reload.patch b/fix-addlosymdi-ICE-in-pass-reload.patch new file mode 100644 index 0000000..409a3ea --- /dev/null +++ b/fix-addlosymdi-ICE-in-pass-reload.patch @@ -0,0 +1,30 @@ +diff -uprN a/gcc/lra.c b/gcc/lra.c +--- a/gcc/lra.c 2020-12-14 15:26:36.331633230 +0800 ++++ b/gcc/lra.c 2020-12-15 18:56:33.699633230 +0800 +@@ -507,6 +507,26 @@ lra_emit_move (rtx x, rtx y) + data. */ + if (old != max_reg_num ()) + expand_reg_data (old); ++ while (insn != NULL) ++ { ++ if (GET_CODE (PATTERN (insn)) == SET ++ && GET_CODE (SET_SRC (PATTERN (insn))) == LO_SUM ++ && GET_CODE (SET_DEST (PATTERN (insn))) == REG ++ && strcmp (insn_data[recog_memoized (insn)].name, ++ "add_losym_di") == 0) ++ { ++ rtx add_losym_dest = SET_DEST (PATTERN (insn)); ++ for (int i = (int) max_reg_num () - 1; i >= old; i--) ++ { ++ if (regno_reg_rtx[i] == add_losym_dest) ++ { ++ setup_reg_classes (i, GENERAL_REGS, ++ NO_REGS, GENERAL_REGS); ++ } ++ } ++ } ++ insn = PREV_INSN (insn); ++ } + return; + } + lra_emit_add (x, XEXP (y, 0), XEXP (y, 1)); diff --git a/fix-an-ICE-in-vect_recog_mask_conversion_pattern.patch b/fix-an-ICE-in-vect_recog_mask_conversion_pattern.patch new file mode 100644 index 0000000..fc236e9 --- /dev/null +++ b/fix-an-ICE-in-vect_recog_mask_conversion_pattern.patch @@ -0,0 +1,115 @@ +This backport contains 1 patch from gcc main stream tree. +The commit id of these patchs list as following in the order of time. + +0001-aarch64-Fix-an-ICE-in-vect_recog_mask_conversion_pattern.patch: +91d80cf4bd2827dd9c40fe6a7c719c909d79083d + +diff -Nurp a/gcc/testsuite/gcc.target/aarch64/pr96757.c b/gcc/testsuite/gcc.target/aarch64/pr96757.c +--- a/gcc/testsuite/gcc.target/aarch64/pr96757.c 1969-12-31 19:00:00.000000000 -0500 ++++ b/gcc/testsuite/gcc.target/aarch64/pr96757.c 2020-10-12 08:32:12.192000000 -0400 +@@ -0,0 +1,23 @@ ++/* PR target/96757 */ ++/* { dg-do compile } */ ++/* { dg-options "-O3" } */ ++ ++short ++fun1(short i, short j) ++{ ++ return i * j; ++} ++ ++int ++fun(int a, int b, int c) ++{ ++ int *v, z, k, m; ++ short f, d; ++ for (int i=0; i m; ++ z = f > k; ++ *v += fun1(z,b); ++ } ++} +diff -Nurp a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c +--- a/gcc/tree-vect-patterns.c 2020-10-12 08:05:18.924000000 -0400 ++++ b/gcc/tree-vect-patterns.c 2020-10-12 08:50:56.996000000 -0400 +@@ -3917,6 +3917,8 @@ vect_recog_mask_conversion_pattern (stmt + tree vectype1, vectype2; + stmt_vec_info pattern_stmt_info; + vec_info *vinfo = stmt_vinfo->vinfo; ++ tree rhs1_op0 = NULL_TREE, rhs1_op1 = NULL_TREE; ++ tree rhs1_op0_type = NULL_TREE, rhs1_op1_type = NULL_TREE; + + /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */ + if (is_gimple_call (last_stmt) +@@ -4016,9 +4018,37 @@ vect_recog_mask_conversion_pattern (stmt + + it is better for b1 and b2 to use the mask type associated + with int elements rather bool (byte) elements. */ +- rhs1_type = search_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo); +- if (!rhs1_type) +- rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0)); ++ rhs1_op0 = TREE_OPERAND (rhs1, 0); ++ rhs1_op1 = TREE_OPERAND (rhs1, 1); ++ if (!rhs1_op0 || !rhs1_op1) ++ return NULL; ++ rhs1_op0_type = search_type_for_mask (rhs1_op0, vinfo); ++ rhs1_op1_type = search_type_for_mask (rhs1_op1, vinfo); ++ ++ if (!rhs1_op0_type) ++ rhs1_type = TREE_TYPE (rhs1_op0); ++ else if (!rhs1_op1_type) ++ rhs1_type = TREE_TYPE (rhs1_op1); ++ else if (TYPE_PRECISION (rhs1_op0_type) ++ != TYPE_PRECISION (rhs1_op1_type)) ++ { ++ int tmp0 = (int) TYPE_PRECISION (rhs1_op0_type) ++ - (int) TYPE_PRECISION (TREE_TYPE (lhs)); ++ int tmp1 = (int) TYPE_PRECISION (rhs1_op1_type) ++ - (int) TYPE_PRECISION (TREE_TYPE (lhs)); ++ if ((tmp0 > 0 && tmp1 > 0) || (tmp0 < 0 && tmp1 < 0)) ++ { ++ if (abs (tmp0) > abs (tmp1)) ++ rhs1_type = rhs1_op1_type; ++ else ++ rhs1_type = rhs1_op0_type; ++ } ++ else ++ rhs1_type = build_nonstandard_integer_type ++ (TYPE_PRECISION (TREE_TYPE (lhs)), 1); ++ } ++ else ++ rhs1_type = rhs1_op0_type; + } + else + return NULL; +@@ -4036,8 +4066,8 @@ vect_recog_mask_conversion_pattern (stmt + name from the outset. */ + if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1), + TYPE_VECTOR_SUBPARTS (vectype2)) +- && (TREE_CODE (rhs1) == SSA_NAME +- || rhs1_type == TREE_TYPE (TREE_OPERAND (rhs1, 0)))) ++ && !rhs1_op0_type ++ && !rhs1_op1_type) + return NULL; + + /* If rhs1 is invariant and we can promote it leave the COND_EXPR +@@ -4069,7 +4099,16 @@ vect_recog_mask_conversion_pattern (stmt + if (TREE_CODE (rhs1) != SSA_NAME) + { + tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL); +- pattern_stmt = gimple_build_assign (tmp, rhs1); ++ if (rhs1_op0_type ++ && TYPE_PRECISION (rhs1_op0_type) != TYPE_PRECISION (rhs1_type)) ++ rhs1_op0 = build_mask_conversion (rhs1_op0, ++ vectype2, stmt_vinfo); ++ if (rhs1_op1_type ++ && TYPE_PRECISION (rhs1_op1_type) != TYPE_PRECISION (rhs1_type)) ++ rhs1_op1 = build_mask_conversion (rhs1_op1, ++ vectype2, stmt_vinfo); ++ pattern_stmt = gimple_build_assign (tmp, TREE_CODE (rhs1), ++ rhs1_op0, rhs1_op1); + rhs1 = tmp; + append_pattern_def_seq (stmt_vinfo, pattern_stmt, vectype2); + } diff --git a/fix-avx512vl-vcvttpd2dq-2-fail.patch b/fix-avx512vl-vcvttpd2dq-2-fail.patch new file mode 100644 index 0000000..60afadd --- /dev/null +++ b/fix-avx512vl-vcvttpd2dq-2-fail.patch @@ -0,0 +1,301 @@ +This backport contains 1 patch from gcc main stream tree. +The commit id of these patchs list as following in the order of time. + +0001-re-PR-target-91124-gcc.target-i386-avx512vl-vpshldvd.patch +946732df902dbb23dd44abe97fea41e154e6e5f9 + +diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md +index 3ce22395c65..12d6dc0cb7e 100644 +--- a/gcc/config/i386/sse.md ++++ b/gcc/config/i386/sse.md +@@ -5927,16 +5927,16 @@ + (set_attr "btver2_decode" "vector") + (set_attr "mode" "OI")]) + +-(define_insn "sse2_cvtpd2dq" ++(define_insn "sse2_cvtpd2dq" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (vec_concat:V4SI + (unspec:V2SI [(match_operand:V2DF 1 "vector_operand" "vBm")] + UNSPEC_FIX_NOTRUNC) + (const_vector:V2SI [(const_int 0) (const_int 0)])))] +- "TARGET_SSE2 && " ++ "TARGET_SSE2" + { + if (TARGET_AVX) +- return "vcvtpd2dq{x}\t{%1, %0|%0, %1}"; ++ return "vcvtpd2dq{x}\t{%1, %0|%0, %1}"; + else + return "cvtpd2dq\t{%1, %0|%0, %1}"; + } +@@ -5949,6 +5949,38 @@ + (set_attr "athlon_decode" "vector") + (set_attr "bdver1_decode" "double")]) + ++(define_insn "sse2_cvtpd2dq_mask" ++ [(set (match_operand:V4SI 0 "register_operand" "=v") ++ (vec_concat:V4SI ++ (vec_merge:V2SI ++ (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "vm")] ++ UNSPEC_FIX_NOTRUNC) ++ (vec_select:V2SI ++ (match_operand:V4SI 2 "nonimm_or_0_operand" "0C") ++ (parallel [(const_int 0) (const_int 1)])) ++ (match_operand:QI 3 "register_operand" "Yk")) ++ (const_vector:V2SI [(const_int 0) (const_int 0)])))] ++ "TARGET_AVX512VL" ++ "vcvtpd2dq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" ++ [(set_attr "type" "ssecvt") ++ (set_attr "prefix" "evex") ++ (set_attr "mode" "TI")]) ++ ++(define_insn "*sse2_cvtpd2dq_mask_1" ++ [(set (match_operand:V4SI 0 "register_operand" "=v") ++ (vec_concat:V4SI ++ (vec_merge:V2SI ++ (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "vm")] ++ UNSPEC_FIX_NOTRUNC) ++ (const_vector:V2SI [(const_int 0) (const_int 0)]) ++ (match_operand:QI 2 "register_operand" "Yk")) ++ (const_vector:V2SI [(const_int 0) (const_int 0)])))] ++ "TARGET_AVX512VL" ++ "vcvtpd2dq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}" ++ [(set_attr "type" "ssecvt") ++ (set_attr "prefix" "evex") ++ (set_attr "mode" "TI")]) ++ + ;; For ufix_notrunc* insn patterns + (define_mode_attr pd2udqsuff + [(V8DF "") (V4DF "{y}")]) +@@ -5964,15 +5996,49 @@ + (set_attr "prefix" "evex") + (set_attr "mode" "")]) + +-(define_insn "ufix_notruncv2dfv2si2" ++(define_insn "ufix_notruncv2dfv2si2" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (vec_concat:V4SI + (unspec:V2SI + [(match_operand:V2DF 1 "nonimmediate_operand" "vm")] +- UNSPEC_UNSIGNED_FIX_NOTRUNC) ++ UNSPEC_UNSIGNED_FIX_NOTRUNC) + (const_vector:V2SI [(const_int 0) (const_int 0)])))] + "TARGET_AVX512VL" +- "vcvtpd2udq{x}\t{%1, %0|%0, %1}" ++ "vcvtpd2udq{x}\t{%1, %0|%0, %1}" ++ [(set_attr "type" "ssecvt") ++ (set_attr "prefix" "evex") ++ (set_attr "mode" "TI")]) ++ ++(define_insn "ufix_notruncv2dfv2si2_mask" ++ [(set (match_operand:V4SI 0 "register_operand" "=v") ++ (vec_concat:V4SI ++ (vec_merge:V2SI ++ (unspec:V2SI ++ [(match_operand:V2DF 1 "nonimmediate_operand" "vm")] ++ UNSPEC_UNSIGNED_FIX_NOTRUNC) ++ (vec_select:V2SI ++ (match_operand:V4SI 2 "nonimm_or_0_operand" "0C") ++ (parallel [(const_int 0) (const_int 1)])) ++ (match_operand:QI 3 "register_operand" "Yk")) ++ (const_vector:V2SI [(const_int 0) (const_int 0)])))] ++ "TARGET_AVX512VL" ++ "vcvtpd2udq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" ++ [(set_attr "type" "ssecvt") ++ (set_attr "prefix" "evex") ++ (set_attr "mode" "TI")]) ++ ++(define_insn "*ufix_notruncv2dfv2si2_mask_1" ++ [(set (match_operand:V4SI 0 "register_operand" "=v") ++ (vec_concat:V4SI ++ (vec_merge:V2SI ++ (unspec:V2SI ++ [(match_operand:V2DF 1 "nonimmediate_operand" "vm")] ++ UNSPEC_UNSIGNED_FIX_NOTRUNC) ++ (const_vector:V2SI [(const_int 0) (const_int 0)]) ++ (match_operand:QI 2 "register_operand" "Yk")) ++ (const_vector:V2SI [(const_int 0) (const_int 0)])))] ++ "TARGET_AVX512VL" ++ "vcvtpd2udq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "TI")]) +@@ -5987,13 +6053,43 @@ + (set_attr "prefix" "evex") + (set_attr "mode" "OI")]) + +-(define_insn "ufix_truncv2dfv2si2" ++(define_insn "ufix_truncv2dfv2si2" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (vec_concat:V4SI + (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm")) + (const_vector:V2SI [(const_int 0) (const_int 0)])))] + "TARGET_AVX512VL" +- "vcvttpd2udq{x}\t{%1, %0|%0, %1}" ++ "vcvttpd2udq{x}\t{%1, %0|%0, %1}" ++ [(set_attr "type" "ssecvt") ++ (set_attr "prefix" "evex") ++ (set_attr "mode" "TI")]) ++ ++(define_insn "ufix_truncv2dfv2si2_mask" ++ [(set (match_operand:V4SI 0 "register_operand" "=v") ++ (vec_concat:V4SI ++ (vec_merge:V2SI ++ (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm")) ++ (vec_select:V2SI ++ (match_operand:V4SI 2 "nonimm_or_0_operand" "0C") ++ (parallel [(const_int 0) (const_int 1)])) ++ (match_operand:QI 3 "register_operand" "Yk")) ++ (const_vector:V2SI [(const_int 0) (const_int 0)])))] ++ "TARGET_AVX512VL" ++ "vcvttpd2udq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" ++ [(set_attr "type" "ssecvt") ++ (set_attr "prefix" "evex") ++ (set_attr "mode" "TI")]) ++ ++(define_insn "*ufix_truncv2dfv2si2_mask_1" ++ [(set (match_operand:V4SI 0 "register_operand" "=v") ++ (vec_concat:V4SI ++ (vec_merge:V2SI ++ (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm")) ++ (const_vector:V2SI [(const_int 0) (const_int 0)]) ++ (match_operand:QI 2 "register_operand" "Yk")) ++ (const_vector:V2SI [(const_int 0) (const_int 0)])))] ++ "TARGET_AVX512VL" ++ "vcvttpd2udq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}" + [(set_attr "type" "ssecvt") + (set_attr "prefix" "evex") + (set_attr "mode" "TI")]) +@@ -6138,15 +6234,15 @@ + "TARGET_AVX" + "operands[2] = CONST0_RTX (V4SImode);") + +-(define_insn "sse2_cvttpd2dq" ++(define_insn "sse2_cvttpd2dq" + [(set (match_operand:V4SI 0 "register_operand" "=v") + (vec_concat:V4SI + (fix:V2SI (match_operand:V2DF 1 "vector_operand" "vBm")) + (const_vector:V2SI [(const_int 0) (const_int 0)])))] +- "TARGET_SSE2 && " ++ "TARGET_SSE2" + { + if (TARGET_AVX) +- return "vcvttpd2dq{x}\t{%1, %0|%0, %1}"; ++ return "vcvttpd2dq{x}\t{%1, %0|%0, %1}"; + else + return "cvttpd2dq\t{%1, %0|%0, %1}"; + } +@@ -6157,6 +6253,36 @@ + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "TI")]) + ++(define_insn "sse2_cvttpd2dq_mask" ++ [(set (match_operand:V4SI 0 "register_operand" "=v") ++ (vec_concat:V4SI ++ (vec_merge:V2SI ++ (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm")) ++ (vec_select:V2SI ++ (match_operand:V4SI 2 "nonimm_or_0_operand" "0C") ++ (parallel [(const_int 0) (const_int 1)])) ++ (match_operand:QI 3 "register_operand" "Yk")) ++ (const_vector:V2SI [(const_int 0) (const_int 0)])))] ++ "TARGET_AVX512VL" ++ "vcvttpd2dq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" ++ [(set_attr "type" "ssecvt") ++ (set_attr "prefix" "evex") ++ (set_attr "mode" "TI")]) ++ ++(define_insn "*sse2_cvttpd2dq_mask_1" ++ [(set (match_operand:V4SI 0 "register_operand" "=v") ++ (vec_concat:V4SI ++ (vec_merge:V2SI ++ (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm")) ++ (const_vector:V2SI [(const_int 0) (const_int 0)]) ++ (match_operand:QI 2 "register_operand" "Yk")) ++ (const_vector:V2SI [(const_int 0) (const_int 0)])))] ++ "TARGET_AVX512VL" ++ "vcvttpd2dq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}" ++ [(set_attr "type" "ssecvt") ++ (set_attr "prefix" "evex") ++ (set_attr "mode" "TI")]) ++ + (define_insn "sse2_cvtsd2ss" + [(set (match_operand:V4SF 0 "register_operand" "=x,x,v") + (vec_merge:V4SF +@@ -6276,26 +6402,28 @@ + + (define_expand "sse2_cvtpd2ps_mask" + [(set (match_operand:V4SF 0 "register_operand") +- (vec_merge:V4SF +- (vec_concat:V4SF ++ (vec_concat:V4SF ++ (vec_merge:V2SF + (float_truncate:V2SF + (match_operand:V2DF 1 "vector_operand")) +- (match_dup 4)) +- (match_operand:V4SF 2 "register_operand") +- (match_operand:QI 3 "register_operand")))] ++ (vec_select:V2SF ++ (match_operand:V4SF 2 "nonimm_or_0_operand") ++ (parallel [(const_int 0) (const_int 1)])) ++ (match_operand:QI 3 "register_operand")) ++ (match_dup 4)))] + "TARGET_SSE2" + "operands[4] = CONST0_RTX (V2SFmode);") + +-(define_insn "*sse2_cvtpd2ps" ++(define_insn "*sse2_cvtpd2ps" + [(set (match_operand:V4SF 0 "register_operand" "=v") + (vec_concat:V4SF + (float_truncate:V2SF + (match_operand:V2DF 1 "vector_operand" "vBm")) +- (match_operand:V2SF 2 "const0_operand")))] +- "TARGET_SSE2 && " ++ (match_operand:V2SF 2 "const0_operand" "C")))] ++ "TARGET_SSE2" + { + if (TARGET_AVX) +- return "vcvtpd2ps{x}\t{%1, %0|%0, %1}"; ++ return "vcvtpd2ps{x}\t{%1, %0|%0, %1}"; + else + return "cvtpd2ps\t{%1, %0|%0, %1}"; + } +@@ -6307,6 +6435,38 @@ + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "V4SF")]) + ++(define_insn "*sse2_cvtpd2ps_mask" ++ [(set (match_operand:V4SF 0 "register_operand" "=v") ++ (vec_concat:V4SF ++ (vec_merge:V2SF ++ (float_truncate:V2SF ++ (match_operand:V2DF 1 "nonimmediate_operand" "vm")) ++ (vec_select:V2SF ++ (match_operand:V4SF 2 "nonimm_or_0_operand" "0C") ++ (parallel [(const_int 0) (const_int 1)])) ++ (match_operand:QI 3 "register_operand" "Yk")) ++ (match_operand:V2SF 4 "const0_operand" "C")))] ++ "TARGET_AVX512VL" ++ "vcvtpd2ps{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" ++ [(set_attr "type" "ssecvt") ++ (set_attr "prefix" "evex") ++ (set_attr "mode" "V4SF")]) ++ ++(define_insn "*sse2_cvtpd2ps_mask_1" ++ [(set (match_operand:V4SF 0 "register_operand" "=v") ++ (vec_concat:V4SF ++ (vec_merge:V2SF ++ (float_truncate:V2SF ++ (match_operand:V2DF 1 "nonimmediate_operand" "vm")) ++ (match_operand:V2SF 3 "const0_operand" "C") ++ (match_operand:QI 2 "register_operand" "Yk")) ++ (match_operand:V2SF 4 "const0_operand" "C")))] ++ "TARGET_AVX512VL" ++ "vcvtpd2ps{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}" ++ [(set_attr "type" "ssecvt") ++ (set_attr "prefix" "evex") ++ (set_attr "mode" "V4SF")]) ++ + ;; For _cvtps2pd insn pattern + (define_mode_attr sf2dfmode + [(V8DF "V8SF") (V4DF "V4SF")]) diff --git a/fix-cost-of-plus.patch b/fix-cost-of-plus.patch index 7edb1b1..5a0e2f0 100644 --- a/fix-cost-of-plus.patch +++ b/fix-cost-of-plus.patch @@ -1,3 +1,6 @@ +This backport contains 1 patch from gcc main stream tree. +The commit id of these patchs list as following in the order of time. + AArch64-Fix-cost-of-plus-.-const_int-C.patch: commit 835d50c66aa5bde2f354a6e63a2afa7d2f76a05a diff --git a/fix-issue499-add-nop-convert.patch b/fix-issue499-add-nop-convert.patch new file mode 100644 index 0000000..fad9584 --- /dev/null +++ b/fix-issue499-add-nop-convert.patch @@ -0,0 +1,928 @@ +This patch is a combine of following 8 commits + +commit e944354ec05891474b0d204c6c239c04ee7b527b +Author: Robin Dapp +Date: Mon Aug 26 10:18:24 2019 +0000 + + [PATCH 1/2] Allow folding all statements. + +commit df7d46d925c7baca7bf9961aee900876d8aef225 +Author: Robin Dapp +Date: Mon Aug 26 10:24:44 2019 +0000 + + [PATCH 2/2] Add simplify rule for wrapped addition. + +commit 6c14d008122fcee4157be79a60f8d6685869ad19 +Author: Robin Dapp +Date: Tue Aug 27 12:08:58 2019 +0000 + + re PR testsuite/91549 (gcc.dg/wrapped-binop-simplify.c fails starting with r274925) + +commit 129bd066049f065e522990e63bb10ff92b3c018d +Author: Jakub Jelinek +Date: Tue Dec 3 10:20:43 2019 +0100 + + re PR tree-optimization/92734 (Missing match.pd simplification done by fold_binary_loc on generic) + +commit 526b4c716a340ee9464965e63eee2b9954fe21f1 +Author: Jakub Jelinek +Date: Wed Dec 4 10:38:48 2019 +0100 + + re PR tree-optimization/92734 (Missing match.pd simplification done by fold_binary_loc on generic) + +commit 28fabd43d9d249134244eb9d7815917c7ae44b64 +Author: Richard Biener +Date: Fri Dec 6 10:25:08 2019 +0000 + + genmatch.c (enum tree_code): Remove CONVERT{0,1,2} and VIEW_CONVERT{0,1,2}. + +commit e150da383346adc762bc904342f9877f2f071265 +Author: Richard Biener +Date: Fri Dec 6 11:44:27 2019 +0000 + + match.pd (nop_convert): Remove empty match. + +commit 496f4f884716ae061f771a62e44868a32dbd502f +Author: Jakub Jelinek +Date: Mon May 4 11:01:08 2020 +0200 + + match.pd: Decrease number of nop conversions around bitwise ops [PR94718] + +diff -Nurp a/gcc/genmatch.c b/gcc/genmatch.c +--- a/gcc/genmatch.c 2020-03-12 19:07:21.000000000 +0800 ++++ b/gcc/genmatch.c 2020-11-24 14:49:12.792000000 +0800 +@@ -224,12 +224,6 @@ output_line_directive (FILE *f, location + #define DEFTREECODE(SYM, STRING, TYPE, NARGS) SYM, + enum tree_code { + #include "tree.def" +-CONVERT0, +-CONVERT1, +-CONVERT2, +-VIEW_CONVERT0, +-VIEW_CONVERT1, +-VIEW_CONVERT2, + MAX_TREE_CODES + }; + #undef DEFTREECODE +@@ -695,11 +689,12 @@ struct expr : public operand + expr (id_base *operation_, location_t loc, bool is_commutative_ = false) + : operand (OP_EXPR, loc), operation (operation_), + ops (vNULL), expr_type (NULL), is_commutative (is_commutative_), +- is_generic (false), force_single_use (false) {} ++ is_generic (false), force_single_use (false), opt_grp (0) {} + expr (expr *e) + : operand (OP_EXPR, e->location), operation (e->operation), + ops (vNULL), expr_type (e->expr_type), is_commutative (e->is_commutative), +- is_generic (e->is_generic), force_single_use (e->force_single_use) {} ++ is_generic (e->is_generic), force_single_use (e->force_single_use), ++ opt_grp (e->opt_grp) {} + void append_op (operand *op) { ops.safe_push (op); } + /* The operator and its operands. */ + id_base *operation; +@@ -714,6 +709,8 @@ struct expr : public operand + /* Whether pushing any stmt to the sequence should be conditional + on this expression having a single-use. */ + bool force_single_use; ++ /* If non-zero, the group for optional handling. */ ++ unsigned char opt_grp; + virtual void gen_transform (FILE *f, int, const char *, bool, int, + const char *, capture_info *, + dt_operand ** = 0, int = 0); +@@ -1079,18 +1076,17 @@ lower_commutative (simplify *s, vec (o)) + { + if (c->what) + return new capture (c->location, c->where, +- lower_opt_convert (c->what, oper, to_oper, strip), ++ lower_opt (c->what, grp, strip), + c->value_match); + else + return c; +@@ -1100,36 +1096,34 @@ lower_opt_convert (operand *o, enum tree + if (!e) + return o; + +- if (*e->operation == oper) ++ if (e->opt_grp == grp) + { + if (strip) +- return lower_opt_convert (e->ops[0], oper, to_oper, strip); ++ return lower_opt (e->ops[0], grp, strip); + + expr *ne = new expr (e); +- ne->operation = (to_oper == CONVERT_EXPR +- ? get_operator ("CONVERT_EXPR") +- : get_operator ("VIEW_CONVERT_EXPR")); +- ne->append_op (lower_opt_convert (e->ops[0], oper, to_oper, strip)); ++ ne->opt_grp = 0; ++ ne->append_op (lower_opt (e->ops[0], grp, strip)); + return ne; + } + + expr *ne = new expr (e); + for (unsigned i = 0; i < e->ops.length (); ++i) +- ne->append_op (lower_opt_convert (e->ops[i], oper, to_oper, strip)); ++ ne->append_op (lower_opt (e->ops[i], grp, strip)); + + return ne; + } + +-/* Determine whether O or its children uses the conditional conversion +- operator OPER. */ ++/* Determine whether O or its children uses the conditional operation ++ group GRP. */ + + static bool +-has_opt_convert (operand *o, enum tree_code oper) ++has_opt (operand *o, unsigned char grp) + { + if (capture *c = dyn_cast (o)) + { + if (c->what) +- return has_opt_convert (c->what, oper); ++ return has_opt (c->what, grp); + else + return false; + } +@@ -1138,11 +1132,11 @@ has_opt_convert (operand *o, enum tree_c + if (!e) + return false; + +- if (*e->operation == oper) ++ if (e->opt_grp == grp) + return true; + + for (unsigned i = 0; i < e->ops.length (); ++i) +- if (has_opt_convert (e->ops[i], oper)) ++ if (has_opt (e->ops[i], grp)) + return true; + + return false; +@@ -1152,34 +1146,24 @@ has_opt_convert (operand *o, enum tree_c + if required. */ + + static vec +-lower_opt_convert (operand *o) ++lower_opt (operand *o) + { + vec v1 = vNULL, v2; + + v1.safe_push (o); + +- enum tree_code opers[] +- = { CONVERT0, CONVERT_EXPR, +- CONVERT1, CONVERT_EXPR, +- CONVERT2, CONVERT_EXPR, +- VIEW_CONVERT0, VIEW_CONVERT_EXPR, +- VIEW_CONVERT1, VIEW_CONVERT_EXPR, +- VIEW_CONVERT2, VIEW_CONVERT_EXPR }; +- +- /* Conditional converts are lowered to a pattern with the +- conversion and one without. The three different conditional +- convert codes are lowered separately. */ ++ /* Conditional operations are lowered to a pattern with the ++ operation and one without. All different conditional operation ++ groups are lowered separately. */ + +- for (unsigned i = 0; i < sizeof (opers) / sizeof (enum tree_code); i += 2) ++ for (unsigned i = 1; i <= 10; ++i) + { + v2 = vNULL; + for (unsigned j = 0; j < v1.length (); ++j) +- if (has_opt_convert (v1[j], opers[i])) ++ if (has_opt (v1[j], i)) + { +- v2.safe_push (lower_opt_convert (v1[j], +- opers[i], opers[i+1], false)); +- v2.safe_push (lower_opt_convert (v1[j], +- opers[i], opers[i+1], true)); ++ v2.safe_push (lower_opt (v1[j], i, false)); ++ v2.safe_push (lower_opt (v1[j], i, true)); + } + + if (v2 != vNULL) +@@ -1197,9 +1181,9 @@ lower_opt_convert (operand *o) + the resulting multiple patterns to SIMPLIFIERS. */ + + static void +-lower_opt_convert (simplify *s, vec& simplifiers) ++lower_opt (simplify *s, vec& simplifiers) + { +- vec matchers = lower_opt_convert (s->match); ++ vec matchers = lower_opt (s->match); + for (unsigned i = 0; i < matchers.length (); ++i) + { + simplify *ns = new simplify (s->kind, s->id, matchers[i], s->result, +@@ -1543,7 +1527,7 @@ lower (vec& simplifiers, boo + { + auto_vec out_simplifiers; + for (unsigned i = 0; i < simplifiers.length (); ++i) +- lower_opt_convert (simplifiers[i], out_simplifiers); ++ lower_opt (simplifiers[i], out_simplifiers); + + simplifiers.truncate (0); + for (unsigned i = 0; i < out_simplifiers.length (); ++i) +@@ -3927,7 +3911,7 @@ private: + + unsigned get_internal_capture_id (); + +- id_base *parse_operation (); ++ id_base *parse_operation (unsigned char &); + operand *parse_capture (operand *, bool); + operand *parse_expr (); + c_expr *parse_c_expr (cpp_ttype); +@@ -4118,47 +4102,36 @@ parser::record_operlist (location_t loc, + convert2? */ + + id_base * +-parser::parse_operation () ++parser::parse_operation (unsigned char &opt_grp) + { + const cpp_token *id_tok = peek (); ++ char *alt_id = NULL; + const char *id = get_ident (); + const cpp_token *token = peek (); +- if (strcmp (id, "convert0") == 0) +- fatal_at (id_tok, "use 'convert?' here"); +- else if (strcmp (id, "view_convert0") == 0) +- fatal_at (id_tok, "use 'view_convert?' here"); ++ opt_grp = 0; + if (token->type == CPP_QUERY + && !(token->flags & PREV_WHITE)) + { +- if (strcmp (id, "convert") == 0) +- id = "convert0"; +- else if (strcmp (id, "convert1") == 0) +- ; +- else if (strcmp (id, "convert2") == 0) +- ; +- else if (strcmp (id, "view_convert") == 0) +- id = "view_convert0"; +- else if (strcmp (id, "view_convert1") == 0) +- ; +- else if (strcmp (id, "view_convert2") == 0) +- ; +- else +- fatal_at (id_tok, "non-convert operator conditionalized"); +- + if (!parsing_match_operand) + fatal_at (id_tok, "conditional convert can only be used in " + "match expression"); ++ if (ISDIGIT (id[strlen (id) - 1])) ++ { ++ opt_grp = id[strlen (id) - 1] - '0' + 1; ++ alt_id = xstrdup (id); ++ alt_id[strlen (id) - 1] = '\0'; ++ if (opt_grp == 1) ++ fatal_at (id_tok, "use '%s?' here", alt_id); ++ } ++ else ++ opt_grp = 1; + eat_token (CPP_QUERY); + } +- else if (strcmp (id, "convert1") == 0 +- || strcmp (id, "convert2") == 0 +- || strcmp (id, "view_convert1") == 0 +- || strcmp (id, "view_convert2") == 0) +- fatal_at (id_tok, "expected '?' after conditional operator"); +- id_base *op = get_operator (id); ++ id_base *op = get_operator (alt_id ? alt_id : id); + if (!op) +- fatal_at (id_tok, "unknown operator %s", id); +- ++ fatal_at (id_tok, "unknown operator %s", alt_id ? alt_id : id); ++ if (alt_id) ++ free (alt_id); + user_id *p = dyn_cast (op); + if (p && p->is_oper_list) + { +@@ -4214,7 +4187,8 @@ struct operand * + parser::parse_expr () + { + const cpp_token *token = peek (); +- expr *e = new expr (parse_operation (), token->src_loc); ++ unsigned char opt_grp; ++ expr *e = new expr (parse_operation (opt_grp), token->src_loc); + token = peek (); + operand *op; + bool is_commutative = false; +@@ -4310,6 +4284,12 @@ parser::parse_expr () + "commutative"); + } + e->expr_type = expr_type; ++ if (opt_grp != 0) ++ { ++ if (e->ops.length () != 1) ++ fatal_at (token, "only unary operations can be conditional"); ++ e->opt_grp = opt_grp; ++ } + return op; + } + else if (!(token->flags & PREV_WHITE)) +@@ -4692,10 +4672,6 @@ parser::parse_for (location_t) + id_base *idb = get_operator (oper, true); + if (idb == NULL) + fatal_at (token, "no such operator '%s'", oper); +- if (*idb == CONVERT0 || *idb == CONVERT1 || *idb == CONVERT2 +- || *idb == VIEW_CONVERT0 || *idb == VIEW_CONVERT1 +- || *idb == VIEW_CONVERT2) +- fatal_at (token, "conditional operators cannot be used inside for"); + + if (arity == -1) + arity = idb->nargs; +@@ -5102,12 +5078,6 @@ main (int argc, char **argv) + add_operator (SYM, # SYM, # TYPE, NARGS); + #define END_OF_BASE_TREE_CODES + #include "tree.def" +-add_operator (CONVERT0, "convert0", "tcc_unary", 1); +-add_operator (CONVERT1, "convert1", "tcc_unary", 1); +-add_operator (CONVERT2, "convert2", "tcc_unary", 1); +-add_operator (VIEW_CONVERT0, "view_convert0", "tcc_unary", 1); +-add_operator (VIEW_CONVERT1, "view_convert1", "tcc_unary", 1); +-add_operator (VIEW_CONVERT2, "view_convert2", "tcc_unary", 1); + #undef END_OF_BASE_TREE_CODES + #undef DEFTREECODE + +diff -Nurp a/gcc/gimple-loop-versioning.cc b/gcc/gimple-loop-versioning.cc +--- a/gcc/gimple-loop-versioning.cc 2020-03-12 19:07:21.000000000 +0800 ++++ b/gcc/gimple-loop-versioning.cc 2020-11-24 14:49:12.792000000 +0800 +@@ -1264,6 +1264,12 @@ loop_versioning::record_address_fragment + continue; + } + } ++ if (CONVERT_EXPR_CODE_P (code)) ++ { ++ tree op1 = gimple_assign_rhs1 (assign); ++ address->terms[i].expr = strip_casts (op1); ++ continue; ++ } + } + i += 1; + } +diff -Nurp a/gcc/match.pd b/gcc/match.pd +--- a/gcc/match.pd 2020-11-24 14:54:43.576000000 +0800 ++++ b/gcc/match.pd 2020-11-24 14:49:12.792000000 +0800 +@@ -97,8 +97,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + (define_operator_list COND_TERNARY + IFN_COND_FMA IFN_COND_FMS IFN_COND_FNMA IFN_COND_FNMS) + +-/* As opposed to convert?, this still creates a single pattern, so +- it is not a suitable replacement for convert? in all cases. */ ++/* With nop_convert? combine convert? and view_convert? in one pattern ++ plus conditionalize on tree_nop_conversion_p conversions. */ + (match (nop_convert @0) + (convert @0) + (if (tree_nop_conversion_p (type, TREE_TYPE (@0))))) +@@ -108,9 +108,6 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + && known_eq (TYPE_VECTOR_SUBPARTS (type), + TYPE_VECTOR_SUBPARTS (TREE_TYPE (@0))) + && tree_nop_conversion_p (TREE_TYPE (type), TREE_TYPE (TREE_TYPE (@0)))))) +-/* This one has to be last, or it shadows the others. */ +-(match (nop_convert @0) +- @0) + + /* Transform likes of (char) ABS_EXPR <(int) x> into (char) ABSU_EXPR + ABSU_EXPR returns unsigned absolute value of the operand and the operand +@@ -1260,7 +1257,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + We combine the above two cases by using a conditional convert. */ + (for bitop (bit_and bit_ior bit_xor) + (simplify +- (bitop (convert @0) (convert? @1)) ++ (bitop (convert@2 @0) (convert?@3 @1)) + (if (((TREE_CODE (@1) == INTEGER_CST + && INTEGRAL_TYPE_P (TREE_TYPE (@0)) + && int_fits_type_p (@1, TREE_TYPE (@0))) +@@ -1279,8 +1276,24 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + || GET_MODE_CLASS (TYPE_MODE (type)) != MODE_INT + /* Or if the precision of TO is not the same as the precision + of its mode. */ +- || !type_has_mode_precision_p (type))) +- (convert (bitop @0 (convert @1)))))) ++ || !type_has_mode_precision_p (type) ++ /* In GIMPLE, getting rid of 2 conversions for one new results ++ in smaller IL. */ ++ || (GIMPLE ++ && TREE_CODE (@1) != INTEGER_CST ++ && tree_nop_conversion_p (type, TREE_TYPE (@0)) ++ && single_use (@2) ++ && single_use (@3)))) ++ (convert (bitop @0 (convert @1))))) ++ /* In GIMPLE, getting rid of 2 conversions for one new results ++ in smaller IL. */ ++ (simplify ++ (convert (bitop:cs@2 (nop_convert:s @0) @1)) ++ (if (GIMPLE ++ && TREE_CODE (@1) != INTEGER_CST ++ && tree_nop_conversion_p (type, TREE_TYPE (@2)) ++ && types_match (type, @0)) ++ (bitop @0 (convert @1))))) + + (for bitop (bit_and bit_ior) + rbitop (bit_ior bit_and) +@@ -1374,7 +1387,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + + /* Convert - (~A) to A + 1. */ + (simplify +- (negate (nop_convert (bit_not @0))) ++ (negate (nop_convert? (bit_not @0))) + (plus (view_convert @0) { build_each_one_cst (type); })) + + /* Convert ~ (A - 1) or ~ (A + -1) to -A. */ +@@ -1401,7 +1414,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + + /* Otherwise prefer ~(X ^ Y) to ~X ^ Y as more canonical. */ + (simplify +- (bit_xor:c (nop_convert:s (bit_not:s @0)) @1) ++ (bit_xor:c (nop_convert?:s (bit_not:s @0)) @1) + (if (tree_nop_conversion_p (type, TREE_TYPE (@0))) + (bit_not (bit_xor (view_convert @0) @1)))) + +@@ -1614,7 +1627,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + /* For equality, this is also true with wrapping overflow. */ + (for op (eq ne) + (simplify +- (op:c (nop_convert@3 (plus:c@2 @0 (convert1? @1))) (convert2? @1)) ++ (op:c (nop_convert?@3 (plus:c@2 @0 (convert1? @1))) (convert2? @1)) + (if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0)) + && (TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0)) + || TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0))) +@@ -1623,7 +1636,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + && tree_nop_conversion_p (TREE_TYPE (@3), TREE_TYPE (@1))) + (op @0 { build_zero_cst (TREE_TYPE (@0)); }))) + (simplify +- (op:c (nop_convert@3 (pointer_plus@2 (convert1? @0) @1)) (convert2? @0)) ++ (op:c (nop_convert?@3 (pointer_plus@2 (convert1? @0) @1)) (convert2? @0)) + (if (tree_nop_conversion_p (TREE_TYPE (@2), TREE_TYPE (@0)) + && tree_nop_conversion_p (TREE_TYPE (@3), TREE_TYPE (@0)) + && (CONSTANT_CLASS_P (@1) || (single_use (@2) && single_use (@3)))) +@@ -1866,7 +1879,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + || !HONOR_SIGN_DEPENDENT_ROUNDING (type))) + (convert (negate @1)))) + (simplify +- (negate (nop_convert (negate @1))) ++ (negate (nop_convert? (negate @1))) + (if (!TYPE_OVERFLOW_SANITIZED (type) + && !TYPE_OVERFLOW_SANITIZED (TREE_TYPE (@1))) + (view_convert @1))) +@@ -1883,20 +1896,26 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + /* A - (A +- B) -> -+ B */ + /* A +- (B -+ A) -> +- B */ + (simplify +- (minus (plus:c @0 @1) @0) +- @1) +- (simplify +- (minus (minus @0 @1) @0) +- (negate @1)) ++ (minus (nop_convert1? (plus:c (nop_convert2? @0) @1)) @0) ++ (view_convert @1)) + (simplify +- (plus:c (minus @0 @1) @1) +- @0) ++ (minus (nop_convert1? (minus (nop_convert2? @0) @1)) @0) ++ (if (!ANY_INTEGRAL_TYPE_P (type) ++ || TYPE_OVERFLOW_WRAPS (type)) ++ (negate (view_convert @1)) ++ (view_convert (negate @1)))) ++ (simplify ++ (plus:c (nop_convert1? (minus @0 (nop_convert2? @1))) @1) ++ (view_convert @0)) ++ (simplify ++ (minus @0 (nop_convert1? (plus:c (nop_convert2? @0) @1))) ++ (if (!ANY_INTEGRAL_TYPE_P (type) ++ || TYPE_OVERFLOW_WRAPS (type)) ++ (negate (view_convert @1)) ++ (view_convert (negate @1)))) + (simplify +- (minus @0 (plus:c @0 @1)) +- (negate @1)) +- (simplify +- (minus @0 (minus @0 @1)) +- @1) ++ (minus @0 (nop_convert1? (minus (nop_convert2? @0) @1))) ++ (view_convert @1)) + /* (A +- B) + (C - A) -> C +- B */ + /* (A + B) - (A - C) -> B + C */ + /* More cases are handled with comparisons. */ +@@ -1922,7 +1941,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + (for inner_op (plus minus) + neg_inner_op (minus plus) + (simplify +- (outer_op (nop_convert (inner_op @0 CONSTANT_CLASS_P@1)) ++ (outer_op (nop_convert? (inner_op @0 CONSTANT_CLASS_P@1)) + CONSTANT_CLASS_P@2) + /* If one of the types wraps, use that one. */ + (if (!ANY_INTEGRAL_TYPE_P (type) || TYPE_OVERFLOW_WRAPS (type)) +@@ -1961,17 +1980,70 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + /* (CST1 - A) +- CST2 -> CST3 - A */ + (for outer_op (plus minus) + (simplify +- (outer_op (minus CONSTANT_CLASS_P@1 @0) CONSTANT_CLASS_P@2) +- (with { tree cst = const_binop (outer_op, type, @1, @2); } +- (if (cst && !TREE_OVERFLOW (cst)) +- (minus { cst; } @0))))) +- +- /* CST1 - (CST2 - A) -> CST3 + A */ +- (simplify +- (minus CONSTANT_CLASS_P@1 (minus CONSTANT_CLASS_P@2 @0)) +- (with { tree cst = const_binop (MINUS_EXPR, type, @1, @2); } +- (if (cst && !TREE_OVERFLOW (cst)) +- (plus { cst; } @0)))) ++ (outer_op (nop_convert? (minus CONSTANT_CLASS_P@1 @0)) CONSTANT_CLASS_P@2) ++ /* If one of the types wraps, use that one. */ ++ (if (!ANY_INTEGRAL_TYPE_P (type) || TYPE_OVERFLOW_WRAPS (type)) ++ /* If all 3 captures are CONSTANT_CLASS_P, punt, as we might recurse ++ forever if something doesn't simplify into a constant. */ ++ (if (!CONSTANT_CLASS_P (@0)) ++ (minus (outer_op (view_convert @1) @2) (view_convert @0))) ++ (if (!ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0)) ++ || TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0))) ++ (view_convert (minus (outer_op @1 (view_convert @2)) @0)) ++ (if (types_match (type, @0)) ++ (with { tree cst = const_binop (outer_op, type, @1, @2); } ++ (if (cst && !TREE_OVERFLOW (cst)) ++ (minus { cst; } @0)))))))) ++ ++ /* CST1 - (CST2 - A) -> CST3 + A ++ Use view_convert because it is safe for vectors and equivalent for ++ scalars. */ ++ (simplify ++ (minus CONSTANT_CLASS_P@1 (nop_convert? (minus CONSTANT_CLASS_P@2 @0))) ++ /* If one of the types wraps, use that one. */ ++ (if (!ANY_INTEGRAL_TYPE_P (type) || TYPE_OVERFLOW_WRAPS (type)) ++ /* If all 3 captures are CONSTANT_CLASS_P, punt, as we might recurse ++ forever if something doesn't simplify into a constant. */ ++ (if (!CONSTANT_CLASS_P (@0)) ++ (plus (view_convert @0) (minus @1 (view_convert @2)))) ++ (if (!ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0)) ++ || TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0))) ++ (view_convert (plus @0 (minus (view_convert @1) @2))) ++ (if (types_match (type, @0)) ++ (with { tree cst = const_binop (MINUS_EXPR, type, @1, @2); } ++ (if (cst && !TREE_OVERFLOW (cst)) ++ (plus { cst; } @0))))))) ++ ++/* ((T)(A)) + CST -> (T)(A + CST) */ ++#if GIMPLE ++ (simplify ++ (plus (convert SSA_NAME@0) INTEGER_CST@1) ++ (if (TREE_CODE (TREE_TYPE (@0)) == INTEGER_TYPE ++ && TREE_CODE (type) == INTEGER_TYPE ++ && TYPE_PRECISION (type) > TYPE_PRECISION (TREE_TYPE (@0)) ++ && int_fits_type_p (@1, TREE_TYPE (@0))) ++ /* Perform binary operation inside the cast if the constant fits ++ and (A + CST)'s range does not overflow. */ ++ (with ++ { ++ wi::overflow_type min_ovf = wi::OVF_OVERFLOW, ++ max_ovf = wi::OVF_OVERFLOW; ++ tree inner_type = TREE_TYPE (@0); ++ ++ wide_int w1 = wide_int::from (wi::to_wide (@1), TYPE_PRECISION (inner_type), ++ TYPE_SIGN (inner_type)); ++ ++ wide_int wmin0, wmax0; ++ if (get_range_info (@0, &wmin0, &wmax0) == VR_RANGE) ++ { ++ wi::add (wmin0, w1, TYPE_SIGN (inner_type), &min_ovf); ++ wi::add (wmax0, w1, TYPE_SIGN (inner_type), &max_ovf); ++ } ++ } ++ (if (min_ovf == wi::OVF_NONE && max_ovf == wi::OVF_NONE) ++ (convert (plus @0 { wide_int_to_tree (TREE_TYPE (@0), w1); } ))) ++ ))) ++#endif + + /* ~A + A -> -1 */ + (simplify +diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-5.c b/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-5.c +--- a/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-5.c 2020-03-12 19:07:22.000000000 +0800 ++++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-5.c 2020-11-24 14:49:14.568000000 +0800 +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -fdump-tree-ch2-details" } */ ++/* { dg-options "-O2 -fno-tree-vrp -fdump-tree-ch2-details" } */ + + int is_sorted(int *a, int n) + { +diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-7.c b/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-7.c +--- a/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-7.c 2020-03-12 19:07:22.000000000 +0800 ++++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-7.c 2020-11-24 14:49:14.568000000 +0800 +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -fdump-tree-ch2-details --param logical-op-non-short-circuit=0" } */ ++/* { dg-options "-O2 -fno-tree-vrp -fdump-tree-ch2-details --param logical-op-non-short-circuit=0" } */ + + int is_sorted(int *a, int n, int m, int k) + { +diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/loop-15.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-15.c +--- a/gcc/testsuite/gcc.dg/tree-ssa/loop-15.c 2020-03-12 19:07:22.000000000 +0800 ++++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-15.c 2020-11-24 14:49:14.568000000 +0800 +@@ -19,7 +19,7 @@ int bla(void) + } + + /* Since the loop is removed, there should be no addition. */ +-/* { dg-final { scan-tree-dump-times " \\+ " 0 "optimized" { xfail *-*-* } } } */ ++/* { dg-final { scan-tree-dump-times " \\+ " 0 "optimized" } } */ + /* { dg-final { scan-tree-dump-times " \\* " 1 "optimized" } } */ + + /* The if from the loop header copying remains in the code. */ +diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr23744.c b/gcc/testsuite/gcc.dg/tree-ssa/pr23744.c +--- a/gcc/testsuite/gcc.dg/tree-ssa/pr23744.c 2020-03-12 19:07:22.000000000 +0800 ++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr23744.c 2020-11-24 14:49:14.568000000 +0800 +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -fno-tree-ccp -fdisable-tree-evrp -fdump-tree-vrp1" } */ ++/* { dg-options "-O2 -fno-tree-ccp -fdisable-tree-evrp -fdump-tree-vrp1-details" } */ + + void h (void); + +@@ -17,4 +17,4 @@ int g (int i, int j) + return 1; + } + +-/* { dg-final { scan-tree-dump-times "Folding predicate.*to 1" 1 "vrp1" } } */ ++/* { dg-final { scan-tree-dump-times "gimple_simplified" 1 "vrp1" } } */ +diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr92734-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr92734-2.c +--- a/gcc/testsuite/gcc.dg/tree-ssa/pr92734-2.c 1970-01-01 08:00:00.000000000 +0800 ++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr92734-2.c 2020-11-24 14:49:14.568000000 +0800 +@@ -0,0 +1,76 @@ ++/* PR tree-optimization/92734 */ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fdump-tree-optimized" } */ ++/* Verify there are no binary additions or subtractions left. There can ++ be just casts and negations. */ ++/* { dg-final { scan-tree-dump-not " \[+-] " "optimized" } } */ ++ ++int ++f1 (int x, unsigned y) ++{ ++ int a = x + y; ++ return a - x; ++} ++ ++unsigned ++f2 (unsigned x, int y) ++{ ++ unsigned a = (int) x + y; ++ return a - x; ++} ++ ++int ++f3 (int x, unsigned y) ++{ ++ int a = x - y; ++ return a - x; ++} ++ ++unsigned ++f4 (unsigned x, int y) ++{ ++ unsigned a = (int) x - y; ++ return a - x; ++} ++ ++int ++f5 (unsigned x, int y) ++{ ++ int a = x - y; ++ return a + y; ++} ++ ++unsigned ++f6 (int x, unsigned y) ++{ ++ unsigned a = x - (int) y; ++ return a + y; ++} ++ ++int ++f7 (int x, unsigned y) ++{ ++ int a = x + y; ++ return x - a; ++} ++ ++unsigned ++f8 (unsigned x, int y) ++{ ++ unsigned a = (int) x + y; ++ return x - a; ++} ++ ++int ++f9 (int x, unsigned y) ++{ ++ int a = x - y; ++ return x - a; ++} ++ ++unsigned ++f10 (unsigned x, int y) ++{ ++ unsigned a = (int) x - y; ++ return x - a; ++} +diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr92734.c b/gcc/testsuite/gcc.dg/tree-ssa/pr92734.c +--- a/gcc/testsuite/gcc.dg/tree-ssa/pr92734.c 1970-01-01 08:00:00.000000000 +0800 ++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr92734.c 2020-11-24 14:49:14.568000000 +0800 +@@ -0,0 +1,31 @@ ++/* PR tree-optimization/92734 */ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fdump-tree-forwprop1" } */ ++/* { dg-final { scan-tree-dump-times "return t_\[0-9]*\\\(D\\\);" 4 "forwprop1" } } */ ++ ++int ++f1 (int t) ++{ ++ return 1 - (int) (1U - t); ++} ++ ++int ++f2 (int t) ++{ ++ int a = 7U - t; ++ return 7 - a; ++} ++ ++int ++f3 (int t) ++{ ++ int a = 32U - t; ++ return 32 - a; ++} ++ ++int ++f4 (int t) ++{ ++ int a = 32 - t; ++ return (int) (32 - (unsigned) a); ++} +diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr94718-3.c b/gcc/testsuite/gcc.dg/tree-ssa/pr94718-3.c +--- a/gcc/testsuite/gcc.dg/tree-ssa/pr94718-3.c 1970-01-01 08:00:00.000000000 +0800 ++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr94718-3.c 2020-11-24 14:49:14.568000000 +0800 +@@ -0,0 +1,45 @@ ++/* PR tree-optimization/94718 */ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fno-ipa-icf -fdump-tree-optimized" } */ ++/* { dg-final { scan-tree-dump-times " \\\(int\\\) " 2 "optimized" } } */ ++/* { dg-final { scan-tree-dump-times " \\\(unsigned int\\\) " 2 "optimized" } } */ ++ ++int ++f1 (int x, int y) ++{ ++ return (int) ((unsigned) x | (unsigned) y); ++} ++ ++int ++f2 (int x, int y) ++{ ++ unsigned a = x; ++ unsigned b = y; ++ return a | b; ++} ++ ++int ++f3 (int x, unsigned y) ++{ ++ return (int) ((unsigned) x | y); ++} ++ ++int ++f4 (int x, unsigned y) ++{ ++ unsigned a = x; ++ return a | y; ++} ++ ++unsigned ++f5 (int x, unsigned y) ++{ ++ return (unsigned) (x | (int) y); ++} ++ ++unsigned ++f6 (int x, unsigned y) ++{ ++ int a = y; ++ return x | a; ++} +diff -Nurp a/gcc/testsuite/gcc.dg/wrapped-binop-simplify.c b/gcc/testsuite/gcc.dg/wrapped-binop-simplify.c +--- a/gcc/testsuite/gcc.dg/wrapped-binop-simplify.c 1970-01-01 08:00:00.000000000 +0800 ++++ b/gcc/testsuite/gcc.dg/wrapped-binop-simplify.c 2020-11-24 14:49:14.484000000 +0800 +@@ -0,0 +1,43 @@ ++/* { dg-do compile { target { { i?86-*-* x86_64-*-* s390*-*-* } && lp64 } } } */ ++/* { dg-options "-O2 -fdump-tree-vrp2-details" } */ ++/* { dg-final { scan-tree-dump-times "gimple_simplified to" 4 "vrp2" } } */ ++ ++void v1 (unsigned long *in, unsigned long *out, unsigned int n) ++{ ++ int i; ++ ++ for (i = 0; i < n; i++) ++ { ++ out[i] = in[i]; ++ } ++} ++ ++void v2 (unsigned long *in, unsigned long *out, int n) ++{ ++ int i; ++ ++ for (i = 0; i < n; i++) ++ { ++ out[i] = in[i]; ++ } ++} ++ ++void v3 (unsigned long *in, unsigned long *out, unsigned int n) ++{ ++ unsigned int i; ++ ++ for (i = 0; i < n; i++) ++ { ++ out[i] = in[i]; ++ } ++} ++ ++void v4 (unsigned long *in, unsigned long *out, int n) ++{ ++ unsigned int i; ++ ++ for (i = 0; i < n; i++) ++ { ++ out[i] = in[i]; ++ } ++} +diff -Nurp a/gcc/tree-ssa-propagate.c b/gcc/tree-ssa-propagate.c +--- a/gcc/tree-ssa-propagate.c 2020-11-24 14:54:42.556000000 +0800 ++++ b/gcc/tree-ssa-propagate.c 2020-11-24 14:49:12.792000000 +0800 +@@ -814,7 +814,6 @@ ssa_propagation_engine::ssa_propagate (v + ssa_prop_fini (); + } + +- + /* Return true if STMT is of the form 'mem_ref = RHS', where 'mem_ref' + is a non-volatile pointer dereference, a structure reference or a + reference to a single _DECL. Ignore volatile memory references +@@ -1071,6 +1070,14 @@ substitute_and_fold_dom_walker::before_d + stmt = gsi_stmt (i); + gimple_set_modified (stmt, true); + } ++ /* Also fold if we want to fold all statements. */ ++ else if (substitute_and_fold_engine->fold_all_stmts ++ && fold_stmt (&i, follow_single_use_edges)) ++ { ++ did_replace = true; ++ stmt = gsi_stmt (i); ++ gimple_set_modified (stmt, true); ++ } + + /* Some statements may be simplified using propagator + specific information. Do this before propagating +diff -Nurp a/gcc/tree-ssa-propagate.h b/gcc/tree-ssa-propagate.h +--- a/gcc/tree-ssa-propagate.h 2020-03-12 19:07:23.000000000 +0800 ++++ b/gcc/tree-ssa-propagate.h 2020-11-24 14:49:12.792000000 +0800 +@@ -100,6 +100,8 @@ class ssa_propagation_engine + class substitute_and_fold_engine + { + public: ++ substitute_and_fold_engine (bool fold_all_stmts = false) ++ : fold_all_stmts (fold_all_stmts) { } + virtual ~substitute_and_fold_engine (void) { } + virtual bool fold_stmt (gimple_stmt_iterator *) { return false; } + virtual tree get_value (tree) { return NULL_TREE; } +@@ -107,6 +109,10 @@ class substitute_and_fold_engine + bool substitute_and_fold (basic_block = NULL); + bool replace_uses_in (gimple *); + bool replace_phi_args_in (gphi *); ++ ++ /* Users like VRP can set this when they want to perform ++ folding for every propagation. */ ++ bool fold_all_stmts; + }; + + #endif /* _TREE_SSA_PROPAGATE_H */ +diff -Nurp a/gcc/tree-vrp.c b/gcc/tree-vrp.c +--- a/gcc/tree-vrp.c 2020-11-24 14:54:43.564000000 +0800 ++++ b/gcc/tree-vrp.c 2020-11-24 14:49:12.792000000 +0800 +@@ -6384,6 +6384,7 @@ vrp_prop::visit_phi (gphi *phi) + class vrp_folder : public substitute_and_fold_engine + { + public: ++ vrp_folder () : substitute_and_fold_engine (/* Fold all stmts. */ true) { } + tree get_value (tree) FINAL OVERRIDE; + bool fold_stmt (gimple_stmt_iterator *) FINAL OVERRIDE; + bool fold_predicate_in (gimple_stmt_iterator *); diff --git a/fix-issue604-ldist-dependency-fixup.patch b/fix-issue604-ldist-dependency-fixup.patch new file mode 100644 index 0000000..5aaf858 --- /dev/null +++ b/fix-issue604-ldist-dependency-fixup.patch @@ -0,0 +1,108 @@ +commit f6e1a4cd83190746b6544917f7526fa480ca5f18 +Author: Bin Cheng +Date: Wed May 13 11:37:47 2020 +0800 + + Add missing unit dependence vector in data dependence analysis + + Current data dependence analysis misses unit distant vector if DRs in + DDR have the same invariant access functions. This adds the vector as + the constant access function case. + + 2020-05-13 Bin Cheng + PR tree-optimization/94969 + + gcc/ + * tree-data-dependence.c (constant_access_functions): Rename to... + (invariant_access_functions): ...this. Add parameter. Check for + invariant access function, rather than constant. + (build_classic_dist_vector): Call above function. + * tree-loop-distribution.c (pg_add_dependence_edges): Add comment. + + gcc/testsuite/ + * gcc.dg/tree-ssa/pr94969.c: New test. + +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr94969.c b/gcc/testsuite/gcc.dg/tree-ssa/pr94969.c +new file mode 100644 +index 00000000000..056b015f97c +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr94969.c +@@ -0,0 +1,28 @@ ++/* PR tree-optimization/52267 */ ++/* { dg-do run } */ ++/* { dg-options "-O3 -fdump-tree-ldist-details" } */ ++ ++int a = 0, b = 0, c = 0; ++struct S { ++ signed m : 7; ++ signed e : 2; ++}; ++struct S f[2] = {{0, 0}, {0, 0}}; ++struct S g = {0, 0}; ++ ++void __attribute__((noinline)) ++k() ++{ ++ for (; c <= 1; c++) { ++ f[b] = g; ++ f[b].e ^= 1; ++ } ++} ++int main() ++{ ++ k(); ++ if (f[b].e != 1) ++ __builtin_abort (); ++} ++ ++/* { dg-final { scan-tree-dump-not "ldist" "Loop 1 distributed: split to 3 loops"} } */ +diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c +index 851225e1171..5505ba46778 100644 +--- a/gcc/tree-data-ref.c ++++ b/gcc/tree-data-ref.c +@@ -4821,17 +4821,19 @@ build_classic_dist_vector_1 (struct data_dependence_relation *ddr, + return true; + } + +-/* Return true when the DDR contains only constant access functions. */ ++/* Return true when the DDR contains only invariant access functions wrto. loop ++ number LNUM. */ + + static bool +-constant_access_functions (const struct data_dependence_relation *ddr) ++invariant_access_functions (const struct data_dependence_relation *ddr, ++ int lnum) + { + unsigned i; + subscript *sub; + + FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub) +- if (!evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 0)) +- || !evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 1))) ++ if (!evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 0), lnum) ++ || !evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 1), lnum)) + return false; + + return true; +@@ -5030,7 +5032,7 @@ build_classic_dist_vector (struct data_dependence_relation *ddr, + dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); + save_dist_v (ddr, dist_v); + +- if (constant_access_functions (ddr)) ++ if (invariant_access_functions (ddr, loop_nest->num)) + add_distance_for_zero_overlaps (ddr); + + if (DDR_NB_LOOPS (ddr) > 1) +diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c +index 44423215332..b122c3964a0 100644 +--- a/gcc/tree-loop-distribution.c ++++ b/gcc/tree-loop-distribution.c +@@ -2080,7 +2080,8 @@ loop_distribution::pg_add_dependence_edges (struct graph *rdg, int dir, + this_dir = -this_dir; + + /* Known dependences can still be unordered througout the +- iteration space, see gcc.dg/tree-ssa/ldist-16.c. */ ++ iteration space, see gcc.dg/tree-ssa/ldist-16.c and ++ gcc.dg/tree-ssa/pr94969.c. */ + if (DDR_NUM_DIST_VECTS (ddr) != 1) + this_dir = 2; + /* If the overlap is exact preserve stmt order. */ diff --git a/fix-when-peeling-for-alignment.patch b/fix-when-peeling-for-alignment.patch deleted file mode 100644 index 1d86732..0000000 --- a/fix-when-peeling-for-alignment.patch +++ /dev/null @@ -1,23 +0,0 @@ -diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c -index 36639b697f1..88f14e73d65 100644 ---- a/gcc/tree-vect-data-refs.c -+++ b/gcc/tree-vect-data-refs.c -@@ -938,6 +938,18 @@ vect_compute_data_ref_alignment (dr_vec_info *dr_info) - = exact_div (vect_calculate_target_alignment (dr_info), BITS_PER_UNIT); - DR_TARGET_ALIGNMENT (dr_info) = vector_alignment; - -+ /* If the main loop has peeled for alignment we have no way of knowing -+ whether the data accesses in the epilogues are aligned. We can't at -+ compile time answer the question whether we have entered the main loop or -+ not. Fixes PR 92351. */ -+ if (loop_vinfo) -+ { -+ loop_vec_info orig_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo); -+ if (orig_loop_vinfo -+ && LOOP_VINFO_PEELING_FOR_ALIGNMENT (orig_loop_vinfo) != 0) -+ return; -+ } -+ - unsigned HOST_WIDE_INT vect_align_c; - if (!vector_alignment.is_constant (&vect_align_c)) - return; diff --git a/gcc.spec b/gcc.spec index 5e4eb3b..07ae2a2 100644 --- a/gcc.spec +++ b/gcc.spec @@ -1,4 +1,4 @@ -%global DATE 20200922 +%global DATE 20201229 %global gcc_version 9.3.1 %global gcc_major 9.3.1 @@ -59,7 +59,7 @@ Summary: Various compilers (C, C++, Objective-C, ...) Name: gcc Version: %{gcc_version} -Release: %{DATE}.12 +Release: %{DATE}.13 License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD URL: https://gcc.gnu.org @@ -114,26 +114,26 @@ Provides: bundled(libiberty) Provides: gcc(major) = %{gcc_major} Patch0: enable-aarch64-libquadmath.patch -Patch1: medium-code-mode.patch -Patch2: generate-csel.patch -Patch3: delete-incorrect-smw.patch -Patch4: remove-array-index-inliner-hint.patch -Patch5: ivopts-1.patch -Patch6: ivopts-2.patch -Patch7: dont-generate-IF_THEN_ELSE.patch -Patch8: fix-cost-of-plus.patch -Patch9: div-opti.patch -Patch10: fix-SYMBOL_TINY_GOT-handling-for-ILP32.patch -Patch11: fix-ICE-during-pass-ccp.patch -Patch12: loop-split.patch -Patch13: loop-finite.patch -Patch14: loop-finite-bugfix.patch -Patch15: fix-regno-out-of-range.patch -Patch16: fix-ICE-in-vectorizable-load.patch -Patch17: address-calculation-optimization-within-loop.patch -Patch18: skip-debug-insns-when-computing-inline-costs.patch -Patch19: ipa-const-prop.patch -Patch20: ipa-const-prop-self-recursion-bugfix.patch +Patch1: generate-csel.patch +Patch2: delete-incorrect-smw.patch +Patch3: remove-array-index-inliner-hint.patch +Patch4: ivopts-1.patch +Patch5: ivopts-2.patch +Patch6: dont-generate-IF_THEN_ELSE.patch +Patch7: fix-cost-of-plus.patch +Patch8: div-opti.patch +Patch9: fix-SYMBOL_TINY_GOT-handling-for-ILP32.patch +Patch10: fix-ICE-during-pass-ccp.patch +Patch11: loop-split.patch +Patch12: loop-finite.patch +Patch13: loop-finite-bugfix.patch +Patch14: fix-regno-out-of-range.patch +Patch15: fix-ICE-in-vectorizable-load.patch +Patch16: address-calculation-optimization-within-loop.patch +Patch17: skip-debug-insns-when-computing-inline-costs.patch +Patch18: ipa-const-prop.patch +Patch19: ipa-const-prop-self-recursion-bugfix.patch +Patch20: ipa-const-prop-null-point-check-bugfix.patch Patch21: change-gcc-BASE-VER.patch Patch22: add-option-fallow-store-data-races.patch Patch23: tighten-range-for-generating-csel.patch @@ -177,16 +177,49 @@ Patch60: fix-load-eliding-in-SM.patch Patch61: fix-SSA-update-for-vectorizer-epilogue.patch Patch62: fix-ICE-when-vectorizing-nested-cycles.patch Patch63: fix-avoid-bogus-uninit-warning-with-store-motion.patch -Patch64: ipa-const-prop-null-point-check-bugfix.patch -Patch65: avoid-cycling-on-vertain-subreg-reloads.patch -Patch66: fix-ICE-in-verify_target_availability.patch -Patch67: fix-ICE-vect_slp_analyze_node_operations.patch -Patch68: fix-ICE-in-extract_constrain_insn.patch -Patch69: fix-ICE-during-GIMPLE-pass-dse.patch -Patch70: ipa-const-prop-buffer-overflow-bugfix.patch -Patch71: fix-ICE-in-eliminate_stmt.patch -Patch72: fix-make-ifcvt-clean-up-dead-comparisons.patch -Patch73: fix-when-peeling-for-alignment.patch +Patch64: avoid-cycling-on-vertain-subreg-reloads.patch +Patch65: fix-ICE-in-verify_target_availability.patch +Patch66: fix-ICE-vect_slp_analyze_node_operations.patch +Patch67: fix-ICE-in-extract_constrain_insn.patch +Patch68: fix-ICE-during-GIMPLE-pass-dse.patch +Patch69: ipa-const-prop-buffer-overflow-bugfix.patch +Patch70: fix-ICE-in-eliminate_stmt.patch +Patch71: fix-make-ifcvt-clean-up-dead-comparisons.patch +Patch72: fix-an-ICE-in-vect_recog_mask_conversion_pattern.patch +Patch73: fix-ICE-in-vect_update_misalignment_for_peel.patch +Patch74: redundant-loop-elimination.patch +Patch75: bf16-and-matrix-characteristic.patch +Patch76: medium-code-mode.patch +Patch77: tree-optimization-96920-another-ICE-when-vectorizing.patch +Patch78: reduction-paths-with-unhandled-live-stmt.patch +Patch79: aarch64-Fix-ash-lr-lshr-mode-3-expanders.patch +Patch80: tree-optimization-97812-fix-range-query-in-VRP-asser.patch +Patch81: aarch64-Fix-bf16-and-matrix-g++-gfortran.patch +Patch82: IRA-Handle-fully-tied-destinations.patch +Patch83: fix-ICE-in-pass-vect.patch +Patch84: SLP-VECT-Add-check-to-fix-96837.patch +Patch85: adjust-vector-cost-and-move-EXTRACT_LAST_REDUCTION-costing.patch +Patch86: fix-issue499-add-nop-convert.patch +Patch87: aarch64-fix-sve-acle-error.patch +Patch88: fix-ICE-IPA-compare-VRP-types.patch +Patch89: vectorizable-comparison-Swap-operands-only-once.patch +Patch90: sccvn-Improve-handling-of-load-masked-with-integer.patch +Patch91: speed-up-DDG-analysis-and-fix-bootstrap-compare-debug.patch +Patch92: x86-Fix-bf16-and-matrix.patch +Patch93: Fix-up-push_partial_def-little-endian-bitfield.patch +Patch94: modulo-sched-Carefully-process-loop-counter-initiali.patch +Patch95: fix-ICE-in-affine-combination.patch +Patch96: aarch64-Fix-mismatched-SVE-predicate-modes.patch +Patch97: Fix-EXTRACT_LAST_REDUCTION-segfault.patch +Patch98: fix-PR-92351-When-peeling-for-alignment.patch +Patch99: fix-addlosymdi-ICE-in-pass-reload.patch +Patch100: store-merging-Consider-also-overlapping-stores-earlier.patch +Patch101: AArch64-Fix-constraints-for-CPY-M.patch +Patch102: Fix-zero-masking-for-vcvtps2ph.patch +Patch103: re-PR-target-91124-gcc.target-i386-avx512vl-vpshldvd.patch +Patch104: fix-avx512vl-vcvttpd2dq-2-fail.patch +Patch105: fix-issue604-ldist-dependency-fixup.patch +Patch106: Apply-maximum-nunits-for-BB-SLP.patch %global gcc_target_platform %{_arch}-linux-gnu @@ -703,6 +736,39 @@ not stable, so plugins must be rebuilt any time GCC is updated. %patch71 -p1 %patch72 -p1 %patch73 -p1 +%patch74 -p1 +%patch75 -p1 +%patch76 -p1 +%patch77 -p1 +%patch78 -p1 +%patch79 -p1 +%patch80 -p1 +%patch81 -p1 +%patch82 -p1 +%patch83 -p1 +%patch84 -p1 +%patch85 -p1 +%patch86 -p1 +%patch87 -p1 +%patch88 -p1 +%patch89 -p1 +%patch90 -p1 +%patch91 -p1 +%patch92 -p1 +%patch93 -p1 +%patch94 -p1 +%patch95 -p1 +%patch96 -p1 +%patch97 -p1 +%patch98 -p1 +%patch99 -p1 +%patch100 -p1 +%patch101 -p1 +%patch102 -p1 +%patch103 -p1 +%patch104 -p1 +%patch105 -p1 +%patch106 -p1 %build @@ -2631,6 +2697,57 @@ end %doc rpm.doc/changelogs/libcc1/ChangeLog* %changelog +* Tue Dec 29 2020 eastb233 - 9.3.1-20201229.13 +- avoid-cycling-on-vertain-subreg-reloads.patch: Add patch source comment +- change-gcc-BASE-VER.patch: Likewise +- dont-generate-IF_THEN_ELSE.patch: Likewise +- fix-ICE-in-compute_live_loop_exits.patch: Likewise +- fix-ICE-in-eliminate_stmt.patch: Likewise +- fix-ICE-in-vect_create_epilog_for_reduction.patch: Likewise +- fix-ICE-in-vect_stmt_to_vectorize.patch: Likewise +- fix-ICE-in-verify_ssa.patch: Likewise +- fix-ICE-when-vectorizing-nested-cycles.patch: Likewise +- fix-cost-of-plus.patch: Likewise +- ipa-const-prop-self-recursion-bugfix.patch: Likewise +- simplify-removing-subregs.patch: Likewise +- medium-code-mode.patch: Bugfix +- fix-when-peeling-for-alignment.patch: Move to ... +- fix-PR-92351-When-peeling-for-alignment.patch: ... this +- AArch64-Fix-constraints-for-CPY-M.patch: New file +- Apply-maximum-nunits-for-BB-SLP.patch: New file +- Fix-EXTRACT_LAST_REDUCTION-segfault.patch: New file +- Fix-up-push_partial_def-little-endian-bitfield.patch: New file +- Fix-zero-masking-for-vcvtps2ph.patch: New file +- IRA-Handle-fully-tied-destinations.patch: New file +- SLP-VECT-Add-check-to-fix-96837.patch: New file +- aarch64-Fix-ash-lr-lshr-mode-3-expanders.patch: New file +- aarch64-Fix-bf16-and-matrix-g++-gfortran.patch: New file +- aarch64-Fix-mismatched-SVE-predicate-modes.patch: New file +- aarch64-fix-sve-acle-error.patch: New file +- adjust-vector-cost-and-move-EXTRACT_LAST_REDUCTION-costing.patch: New file +- bf16-and-matrix-characteristic.patch: New file +- fix-ICE-IPA-compare-VRP-types.patch: New file +- fix-ICE-in-affine-combination.patch: New file +- fix-ICE-in-pass-vect.patch: New file +- fix-ICE-in-vect_update_misalignment_for_peel.patch: New file +- fix-addlosymdi-ICE-in-pass-reload.patch: New file +- fix-an-ICE-in-vect_recog_mask_conversion_pattern.patch: New file +- fix-avx512vl-vcvttpd2dq-2-fail.patch: New file +- fix-issue499-add-nop-convert.patch: New file +- fix-issue604-ldist-dependency-fixup.patch: New file +- modulo-sched-Carefully-process-loop-counter-initiali.patch: New file +- re-PR-target-91124-gcc.target-i386-avx512vl-vpshldvd.patch: New file +- reduction-paths-with-unhandled-live-stmt.patch: New file +- redundant-loop-elimination.patch: New file +- sccvn-Improve-handling-of-load-masked-with-integer.patch: New file +- speed-up-DDG-analysis-and-fix-bootstrap-compare-debug.patch: New file +- store-merging-Consider-also-overlapping-stores-earlier.patch: New file +- tree-optimization-96920-another-ICE-when-vectorizing.patch: New file +- tree-optimization-97812-fix-range-query-in-VRP-asser.patch: New file +- vectorizable-comparison-Swap-operands-only-once.patch: New file +- x86-Fix-bf16-and-matrix.patch: New file +- gcc.spec: Add uploaded patch + * Tue Sep 22 2020 eastb233 - 9.3.1-20200922.12 - fix-when-peeling-for-alignment.patch: New file diff --git a/ipa-const-prop-self-recursion-bugfix.patch b/ipa-const-prop-self-recursion-bugfix.patch index 9e878a3..e407ff9 100644 --- a/ipa-const-prop-self-recursion-bugfix.patch +++ b/ipa-const-prop-self-recursion-bugfix.patch @@ -1,14 +1,11 @@ -This patch is backport from gcc-trunk. It is a combined patch from +This backport contains 2 patchs from gcc main stream tree. +The commit id of these patchs list as following in the order of time. -Find matched aggregate lattice for self-recursive CP (PR ipa/93084) -https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=42d73fa9d575e3c8c21e88bd7f65922e17b052f1 +0001-Find-matched-aggregate-lattice-for-self-recursive-CP.patch +709d7838e753bbb6f16e2ed88a118ed81c367040 -and - -Do not propagate self-dependent value (PR ipa/93763) -https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=47772af10c00f7e1e95cd52557fc893dc602a420 - -adapted the using of parameter to gcc9 style. +0002-Do-not-propagate-self-dependent-value-PR-ipa-93763.patch +47772af10c00f7e1e95cd52557fc893dc602a420 diff -Nurp a/gcc/ipa-cp.c b/gcc/ipa-cp.c --- a/gcc/ipa-cp.c 2020-05-23 16:16:58.032000000 +0800 diff --git a/medium-code-mode.patch b/medium-code-mode.patch index 9133683..cf629d2 100644 --- a/medium-code-mode.patch +++ b/medium-code-mode.patch @@ -194,8 +194,8 @@ diff -Nurp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c case AARCH64_CMODEL_SMALL: + AARCH64_SMALL_ROUTINE: /* Same reasoning as the tiny code model, but the offset cap here is - 4G. */ - if ((SYMBOL_REF_WEAK (x) + 1MB, allowing +/-3.9GB for the offset to the symbol. */ + @@ -13121,7 +13225,48 @@ aarch64_classify_symbol (rtx x, HOST_WID ? SYMBOL_SMALL_GOT_28K : SYMBOL_SMALL_GOT_4G); return SYMBOL_SMALL_ABSOLUTE; @@ -300,7 +300,7 @@ diff -Nurp a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md + UNSPEC_LOAD_SYMBOL_MEDIUM UNSPEC_LD1_SVE UNSPEC_ST1_SVE - UNSPEC_LD1RQ + UNSPEC_LDNT1_SVE @@ -6548,6 +6553,39 @@ [(set_attr "type" "load_4")] ) diff --git a/modulo-sched-Carefully-process-loop-counter-initiali.patch b/modulo-sched-Carefully-process-loop-counter-initiali.patch new file mode 100644 index 0000000..536d149 --- /dev/null +++ b/modulo-sched-Carefully-process-loop-counter-initiali.patch @@ -0,0 +1,251 @@ +This backport contains 1 patchs from gcc main stream tree. +The commit id of these patchs list as following in the order of time. + +0001-modulo-sched-Carefully-process-loop-counter-initiali.patch +4eb8f93d026eaa1de9b4820337069f3ce3465cd0 + +diff --git a/gcc/modulo-sched.c b/gcc/modulo-sched.c +index 6f699a874e3..4568674aa6c 100644 +--- a/gcc/modulo-sched.c ++++ b/gcc/modulo-sched.c +@@ -210,8 +210,6 @@ static int sms_order_nodes (ddg_ptr, int, int *, int *); + static void set_node_sched_params (ddg_ptr); + static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *); + static void permute_partial_schedule (partial_schedule_ptr, rtx_insn *); +-static void generate_prolog_epilog (partial_schedule_ptr, struct loop *, +- rtx, rtx); + static int calculate_stage_count (partial_schedule_ptr, int); + static void calculate_must_precede_follow (ddg_node_ptr, int, int, + int, int, sbitmap, sbitmap, sbitmap); +@@ -391,30 +389,40 @@ doloop_register_get (rtx_insn *head, rtx_insn *tail) + this constant. Otherwise return 0. */ + static rtx_insn * + const_iteration_count (rtx count_reg, basic_block pre_header, +- int64_t * count) ++ int64_t *count, bool* adjust_inplace) + { + rtx_insn *insn; + rtx_insn *head, *tail; + ++ *adjust_inplace = false; ++ bool read_after = false; ++ + if (! pre_header) + return NULL; + + get_ebb_head_tail (pre_header, pre_header, &head, &tail); + + for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn)) +- if (NONDEBUG_INSN_P (insn) && single_set (insn) && +- rtx_equal_p (count_reg, SET_DEST (single_set (insn)))) ++ if (single_set (insn) && rtx_equal_p (count_reg, ++ SET_DEST (single_set (insn)))) + { + rtx pat = single_set (insn); + + if (CONST_INT_P (SET_SRC (pat))) + { + *count = INTVAL (SET_SRC (pat)); ++ *adjust_inplace = !read_after; + return insn; + } + + return NULL; + } ++ else if (NONDEBUG_INSN_P (insn) && reg_mentioned_p (count_reg, insn)) ++ { ++ read_after = true; ++ if (reg_set_p (count_reg, insn)) ++ break; ++ } + + return NULL; + } +@@ -1126,7 +1134,7 @@ duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage, + /* Generate the instructions (including reg_moves) for prolog & epilog. */ + static void + generate_prolog_epilog (partial_schedule_ptr ps, struct loop *loop, +- rtx count_reg, rtx count_init) ++ rtx count_reg, bool adjust_init) + { + int i; + int last_stage = PS_STAGE_COUNT (ps) - 1; +@@ -1135,12 +1143,12 @@ generate_prolog_epilog (partial_schedule_ptr ps, class loop *loop, + /* Generate the prolog, inserting its insns on the loop-entry edge. */ + start_sequence (); + +- if (!count_init) ++ if (adjust_init) + { + /* Generate instructions at the beginning of the prolog to +- adjust the loop count by STAGE_COUNT. If loop count is constant +- (count_init), this constant is adjusted by STAGE_COUNT in +- generate_prolog_epilog function. */ ++ adjust the loop count by STAGE_COUNT. If loop count is constant ++ and it not used anywhere in prologue, this constant is adjusted by ++ STAGE_COUNT outside of generate_prolog_epilog function. */ + rtx sub_reg = NULL_RTX; + + sub_reg = expand_simple_binop (GET_MODE (count_reg), MINUS, count_reg, +@@ -1528,7 +1536,8 @@ sms_schedule (void) + rtx_insn *count_init; + int mii, rec_mii, stage_count, min_cycle; + int64_t loop_count = 0; +- bool opt_sc_p; ++ bool opt_sc_p, adjust_inplace = false; ++ basic_block pre_header; + + if (! (g = g_arr[loop->num])) + continue; +@@ -1569,19 +1578,13 @@ sms_schedule (void) + } + + +- /* In case of th loop have doloop register it gets special +- handling. */ +- count_init = NULL; +- if ((count_reg = doloop_register_get (head, tail))) +- { +- basic_block pre_header; +- +- pre_header = loop_preheader_edge (loop)->src; +- count_init = const_iteration_count (count_reg, pre_header, +- &loop_count); +- } ++ count_reg = doloop_register_get (head, tail); + gcc_assert (count_reg); + ++ pre_header = loop_preheader_edge (loop)->src; ++ count_init = const_iteration_count (count_reg, pre_header, &loop_count, ++ &adjust_inplace); ++ + if (dump_file && count_init) + { + fprintf (dump_file, "SMS const-doloop "); +@@ -1701,9 +1704,20 @@ sms_schedule (void) + print_partial_schedule (ps, dump_file); + } + +- /* case the BCT count is not known , Do loop-versioning */ +- if (count_reg && ! count_init) ++ if (count_init) ++ { ++ if (adjust_inplace) ++ { ++ /* When possible, set new iteration count of loop kernel in ++ place. Otherwise, generate_prolog_epilog creates an insn ++ to adjust. */ ++ SET_SRC (single_set (count_init)) = GEN_INT (loop_count ++ - stage_count + 1); ++ } ++ } ++ else + { ++ /* case the BCT count is not known , Do loop-versioning */ + rtx comp_rtx = gen_rtx_GT (VOIDmode, count_reg, + gen_int_mode (stage_count, + GET_MODE (count_reg))); +@@ -1713,12 +1727,7 @@ sms_schedule (void) + loop_version (loop, comp_rtx, &condition_bb, + prob, prob.invert (), + prob, prob.invert (), true); +- } +- +- /* Set new iteration count of loop kernel. */ +- if (count_reg && count_init) +- SET_SRC (single_set (count_init)) = GEN_INT (loop_count +- - stage_count + 1); ++ } + + /* Now apply the scheduled kernel to the RTL of the loop. */ + permute_partial_schedule (ps, g->closing_branch->first_note); +@@ -1735,7 +1744,7 @@ sms_schedule (void) + if (dump_file) + print_node_sched_params (dump_file, g->num_nodes, ps); + /* Generate prolog and epilog. */ +- generate_prolog_epilog (ps, loop, count_reg, count_init); ++ generate_prolog_epilog (ps, loop, count_reg, !adjust_inplace); + break; + } + +diff --git a/gcc/testsuite/gcc.c-torture/execute/pr97421-1.c b/gcc/testsuite/gcc.c-torture/execute/pr97421-1.c +new file mode 100644 +index 00000000000..e32fb129f18 +--- /dev/null ++++ b/gcc/testsuite/gcc.c-torture/execute/pr97421-1.c +@@ -0,0 +1,23 @@ ++/* PR rtl-optimization/97421 */ ++/* { dg-additional-options "-fmodulo-sched" } */ ++ ++int a, b, d, e; ++int *volatile c = &a; ++ ++__attribute__((noinline)) ++void f(void) ++{ ++ for (int g = 2; g >= 0; g--) { ++ d = 0; ++ for (b = 0; b <= 2; b++) ++ ; ++ e = *c; ++ } ++} ++ ++int main(void) ++{ ++ f(); ++ if (b != 3) ++ __builtin_abort(); ++} +diff --git a/gcc/testsuite/gcc.c-torture/execute/pr97421-2.c b/gcc/testsuite/gcc.c-torture/execute/pr97421-2.c +new file mode 100644 +index 00000000000..142bcbcee91 +--- /dev/null ++++ b/gcc/testsuite/gcc.c-torture/execute/pr97421-2.c +@@ -0,0 +1,18 @@ ++/* PR rtl-optimization/97421 */ ++/* { dg-additional-options "-fmodulo-sched -fno-dce -fno-strict-aliasing" } */ ++ ++static int a, b, c; ++int *d = &c; ++int **e = &d; ++int ***f = &e; ++int main() ++{ ++ int h; ++ for (a = 2; a; a--) ++ for (h = 0; h <= 2; h++) ++ for (b = 0; b <= 2; b++) ++ ***f = 6; ++ ++ if (b != 3) ++ __builtin_abort(); ++} +diff --git a/gcc/testsuite/gcc.c-torture/execute/pr97421-3.c b/gcc/testsuite/gcc.c-torture/execute/pr97421-3.c +new file mode 100644 +index 00000000000..3f1485a4a3d +--- /dev/null ++++ b/gcc/testsuite/gcc.c-torture/execute/pr97421-3.c +@@ -0,0 +1,22 @@ ++/* PR rtl-optimization/97421 */ ++/* { dg-additional-options "-fmodulo-sched" } */ ++ ++int a, b, c; ++short d; ++void e(void) { ++ unsigned f = 0; ++ for (; f <= 2; f++) { ++ int g[1]; ++ int h = (long)g; ++ c = 0; ++ for (; c < 10; c++) ++ g[0] = a = 0; ++ for (; a <= 2; a++) ++ b = d; ++ } ++} ++int main(void) { ++ e(); ++ if (a != 3) ++ __builtin_abort(); ++} diff --git a/re-PR-target-91124-gcc.target-i386-avx512vl-vpshldvd.patch b/re-PR-target-91124-gcc.target-i386-avx512vl-vpshldvd.patch new file mode 100644 index 0000000..d95d3b2 --- /dev/null +++ b/re-PR-target-91124-gcc.target-i386-avx512vl-vpshldvd.patch @@ -0,0 +1,215 @@ +This backport contains 1 patch from gcc main stream tree. +The commit id of these patchs list as following in the order of time. + +491b0b4015a70071a05e0faa5c2082c43a51a0d3 +0001-re-PR-target-91124-gcc.target-i386-avx512vl-vpshldvd.patch + +diff -urpN a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def +--- a/gcc/config/i386/i386-builtin.def 2020-03-12 07:07:21.000000000 -0400 ++++ b/gcc/config/i386/i386-builtin.def 2020-12-17 20:46:53.868000000 -0500 +@@ -2516,60 +2516,60 @@ BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPT + BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v2di_mask, "__builtin_ia32_vpshld_v2di_mask", IX86_BUILTIN_VPSHLDV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_INT) + + BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v32hi, "__builtin_ia32_vpshrdv_v32hi", IX86_BUILTIN_VPSHRDVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI) +-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vpshrdv_v32hi_mask, "__builtin_ia32_vpshrdv_v32hi_mask", IX86_BUILTIN_VPSHRDVV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_INT) +-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vpshrdv_v32hi_maskz, "__builtin_ia32_vpshrdv_v32hi_maskz", IX86_BUILTIN_VPSHRDVV32HI_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_INT) ++BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vpshrdv_v32hi_mask, "__builtin_ia32_vpshrdv_v32hi_mask", IX86_BUILTIN_VPSHRDVV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) ++BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vpshrdv_v32hi_maskz, "__builtin_ia32_vpshrdv_v32hi_maskz", IX86_BUILTIN_VPSHRDVV32HI_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) + BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v16hi, "__builtin_ia32_vpshrdv_v16hi", IX86_BUILTIN_VPSHRDVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI) +-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v16hi_mask, "__builtin_ia32_vpshrdv_v16hi_mask", IX86_BUILTIN_VPSHRDVV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_INT) +-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v16hi_maskz, "__builtin_ia32_vpshrdv_v16hi_maskz", IX86_BUILTIN_VPSHRDVV16HI_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_INT) ++BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v16hi_mask, "__builtin_ia32_vpshrdv_v16hi_mask", IX86_BUILTIN_VPSHRDVV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI) ++BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v16hi_maskz, "__builtin_ia32_vpshrdv_v16hi_maskz", IX86_BUILTIN_VPSHRDVV16HI_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI) + BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8hi, "__builtin_ia32_vpshrdv_v8hi", IX86_BUILTIN_VPSHRDVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI) +-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8hi_mask, "__builtin_ia32_vpshrdv_v8hi_mask", IX86_BUILTIN_VPSHRDVV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_INT) +-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8hi_maskz, "__builtin_ia32_vpshrdv_v8hi_maskz", IX86_BUILTIN_VPSHRDVV8HI_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_INT) ++BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8hi_mask, "__builtin_ia32_vpshrdv_v8hi_mask", IX86_BUILTIN_VPSHRDVV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI) ++BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8hi_maskz, "__builtin_ia32_vpshrdv_v8hi_maskz", IX86_BUILTIN_VPSHRDVV8HI_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI) + BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v16si, "__builtin_ia32_vpshrdv_v16si", IX86_BUILTIN_VPSHRDVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI) +-BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v16si_mask, "__builtin_ia32_vpshrdv_v16si_mask", IX86_BUILTIN_VPSHRDVV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT) +-BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v16si_maskz, "__builtin_ia32_vpshrdv_v16si_maskz", IX86_BUILTIN_VPSHRDVV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT) ++BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v16si_mask, "__builtin_ia32_vpshrdv_v16si_mask", IX86_BUILTIN_VPSHRDVV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) ++BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v16si_maskz, "__builtin_ia32_vpshrdv_v16si_maskz", IX86_BUILTIN_VPSHRDVV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) + BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8si, "__builtin_ia32_vpshrdv_v8si", IX86_BUILTIN_VPSHRDVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) +-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8si_mask, "__builtin_ia32_vpshrdv_v8si_mask", IX86_BUILTIN_VPSHRDVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT) +-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8si_maskz, "__builtin_ia32_vpshrdv_v8si_maskz", IX86_BUILTIN_VPSHRDVV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT) ++BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8si_mask, "__builtin_ia32_vpshrdv_v8si_mask", IX86_BUILTIN_VPSHRDVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) ++BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8si_maskz, "__builtin_ia32_vpshrdv_v8si_maskz", IX86_BUILTIN_VPSHRDVV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) + BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4si, "__builtin_ia32_vpshrdv_v4si", IX86_BUILTIN_VPSHRDVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI) +-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4si_mask, "__builtin_ia32_vpshrdv_v4si_mask", IX86_BUILTIN_VPSHRDVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT) +-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4si_maskz, "__builtin_ia32_vpshrdv_v4si_maskz", IX86_BUILTIN_VPSHRDVV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT) ++BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4si_mask, "__builtin_ia32_vpshrdv_v4si_mask", IX86_BUILTIN_VPSHRDVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) ++BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4si_maskz, "__builtin_ia32_vpshrdv_v4si_maskz", IX86_BUILTIN_VPSHRDVV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) + BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v8di, "__builtin_ia32_vpshrdv_v8di", IX86_BUILTIN_VPSHRDVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI) +-BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v8di_mask, "__builtin_ia32_vpshrdv_v8di_mask", IX86_BUILTIN_VPSHRDVV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT) +-BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v8di_maskz, "__builtin_ia32_vpshrdv_v8di_maskz", IX86_BUILTIN_VPSHRDVV8DI_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT) ++BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v8di_mask, "__builtin_ia32_vpshrdv_v8di_mask", IX86_BUILTIN_VPSHRDVV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) ++BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v8di_maskz, "__builtin_ia32_vpshrdv_v8di_maskz", IX86_BUILTIN_VPSHRDVV8DI_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) + BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4di, "__builtin_ia32_vpshrdv_v4di", IX86_BUILTIN_VPSHRDVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI) +-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4di_mask, "__builtin_ia32_vpshrdv_v4di_mask", IX86_BUILTIN_VPSHRDVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT) +-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4di_maskz, "__builtin_ia32_vpshrdv_v4di_maskz", IX86_BUILTIN_VPSHRDVV4DI_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT) ++BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4di_mask, "__builtin_ia32_vpshrdv_v4di_mask", IX86_BUILTIN_VPSHRDVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI) ++BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4di_maskz, "__builtin_ia32_vpshrdv_v4di_maskz", IX86_BUILTIN_VPSHRDVV4DI_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI) + BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v2di, "__builtin_ia32_vpshrdv_v2di", IX86_BUILTIN_VPSHRDVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI) +-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v2di_mask, "__builtin_ia32_vpshrdv_v2di_mask", IX86_BUILTIN_VPSHRDVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT) +-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v2di_maskz, "__builtin_ia32_vpshrdv_v2di_maskz", IX86_BUILTIN_VPSHRDVV2DI_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT) ++BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v2di_mask, "__builtin_ia32_vpshrdv_v2di_mask", IX86_BUILTIN_VPSHRDVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI) ++BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v2di_maskz, "__builtin_ia32_vpshrdv_v2di_maskz", IX86_BUILTIN_VPSHRDVV2DI_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI) + + BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v32hi, "__builtin_ia32_vpshldv_v32hi", IX86_BUILTIN_VPSHLDVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI) +-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vpshldv_v32hi_mask, "__builtin_ia32_vpshldv_v32hi_mask", IX86_BUILTIN_VPSHLDVV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_INT) +-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vpshldv_v32hi_maskz, "__builtin_ia32_vpshldv_v32hi_maskz", IX86_BUILTIN_VPSHLDVV32HI_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_INT) ++BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vpshldv_v32hi_mask, "__builtin_ia32_vpshldv_v32hi_mask", IX86_BUILTIN_VPSHLDVV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) ++BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vpshldv_v32hi_maskz, "__builtin_ia32_vpshldv_v32hi_maskz", IX86_BUILTIN_VPSHLDVV32HI_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) + BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v16hi, "__builtin_ia32_vpshldv_v16hi", IX86_BUILTIN_VPSHLDVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI) +-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v16hi_mask, "__builtin_ia32_vpshldv_v16hi_mask", IX86_BUILTIN_VPSHLDVV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_INT) +-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v16hi_maskz, "__builtin_ia32_vpshldv_v16hi_maskz", IX86_BUILTIN_VPSHLDVV16HI_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_INT) ++BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v16hi_mask, "__builtin_ia32_vpshldv_v16hi_mask", IX86_BUILTIN_VPSHLDVV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI) ++BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v16hi_maskz, "__builtin_ia32_vpshldv_v16hi_maskz", IX86_BUILTIN_VPSHLDVV16HI_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI) + BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8hi, "__builtin_ia32_vpshldv_v8hi", IX86_BUILTIN_VPSHLDVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI) +-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8hi_mask, "__builtin_ia32_vpshldv_v8hi_mask", IX86_BUILTIN_VPSHLDVV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_INT) +-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8hi_maskz, "__builtin_ia32_vpshldv_v8hi_maskz", IX86_BUILTIN_VPSHLDVV8HI_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_INT) ++BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8hi_mask, "__builtin_ia32_vpshldv_v8hi_mask", IX86_BUILTIN_VPSHLDVV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI) ++BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8hi_maskz, "__builtin_ia32_vpshldv_v8hi_maskz", IX86_BUILTIN_VPSHLDVV8HI_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI) + BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v16si, "__builtin_ia32_vpshldv_v16si", IX86_BUILTIN_VPSHLDVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI) +-BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v16si_mask, "__builtin_ia32_vpshldv_v16si_mask", IX86_BUILTIN_VPSHLDVV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT) +-BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v16si_maskz, "__builtin_ia32_vpshldv_v16si_maskz", IX86_BUILTIN_VPSHLDVV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT) ++BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v16si_mask, "__builtin_ia32_vpshldv_v16si_mask", IX86_BUILTIN_VPSHLDVV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) ++BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v16si_maskz, "__builtin_ia32_vpshldv_v16si_maskz", IX86_BUILTIN_VPSHLDVV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) + BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8si, "__builtin_ia32_vpshldv_v8si", IX86_BUILTIN_VPSHLDVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) +-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8si_mask, "__builtin_ia32_vpshldv_v8si_mask", IX86_BUILTIN_VPSHLDVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT) +-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8si_maskz, "__builtin_ia32_vpshldv_v8si_maskz", IX86_BUILTIN_VPSHLDVV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT) ++BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8si_mask, "__builtin_ia32_vpshldv_v8si_mask", IX86_BUILTIN_VPSHLDVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) ++BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8si_maskz, "__builtin_ia32_vpshldv_v8si_maskz", IX86_BUILTIN_VPSHLDVV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) + BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4si, "__builtin_ia32_vpshldv_v4si", IX86_BUILTIN_VPSHLDVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI) +-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4si_mask, "__builtin_ia32_vpshldv_v4si_mask", IX86_BUILTIN_VPSHLDVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT) +-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4si_maskz, "__builtin_ia32_vpshldv_v4si_maskz", IX86_BUILTIN_VPSHLDVV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT) ++BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4si_mask, "__builtin_ia32_vpshldv_v4si_mask", IX86_BUILTIN_VPSHLDVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) ++BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4si_maskz, "__builtin_ia32_vpshldv_v4si_maskz", IX86_BUILTIN_VPSHLDVV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) + BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v8di, "__builtin_ia32_vpshldv_v8di", IX86_BUILTIN_VPSHLDVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI) +-BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v8di_mask, "__builtin_ia32_vpshldv_v8di_mask", IX86_BUILTIN_VPSHLDVV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT) +-BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v8di_maskz, "__builtin_ia32_vpshldv_v8di_maskz", IX86_BUILTIN_VPSHLDVV8DI_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT) ++BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v8di_mask, "__builtin_ia32_vpshldv_v8di_mask", IX86_BUILTIN_VPSHLDVV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) ++BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v8di_maskz, "__builtin_ia32_vpshldv_v8di_maskz", IX86_BUILTIN_VPSHLDVV8DI_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) + BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4di, "__builtin_ia32_vpshldv_v4di", IX86_BUILTIN_VPSHLDVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI) +-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4di_mask, "__builtin_ia32_vpshldv_v4di_mask", IX86_BUILTIN_VPSHLDVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT) +-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4di_maskz, "__builtin_ia32_vpshldv_v4di_maskz", IX86_BUILTIN_VPSHLDVV4DI_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT) ++BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4di_mask, "__builtin_ia32_vpshldv_v4di_mask", IX86_BUILTIN_VPSHLDVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI) ++BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4di_maskz, "__builtin_ia32_vpshldv_v4di_maskz", IX86_BUILTIN_VPSHLDVV4DI_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI) + BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v2di, "__builtin_ia32_vpshldv_v2di", IX86_BUILTIN_VPSHLDVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI) +-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v2di_mask, "__builtin_ia32_vpshldv_v2di_mask", IX86_BUILTIN_VPSHLDVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT) +-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v2di_maskz, "__builtin_ia32_vpshldv_v2di_maskz", IX86_BUILTIN_VPSHLDVV2DI_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT) ++BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v2di_mask, "__builtin_ia32_vpshldv_v2di_mask", IX86_BUILTIN_VPSHLDVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI) ++BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v2di_maskz, "__builtin_ia32_vpshldv_v2di_maskz", IX86_BUILTIN_VPSHLDVV2DI_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI) + + /* GFNI */ + BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_vgf2p8affineinvqb_v64qi, "__builtin_ia32_vgf2p8affineinvqb_v64qi", IX86_BUILTIN_VGF2P8AFFINEINVQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT) +@@ -2594,44 +2594,44 @@ BDESC (OPTION_MASK_ISA_GFNI | OPTION_MAS + /* VNNI */ + + BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusd_v16si, "__builtin_ia32_vpdpbusd_v16si", IX86_BUILTIN_VPDPBUSDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI) +-BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusd_v16si_mask, "__builtin_ia32_vpdpbusd_v16si_mask", IX86_BUILTIN_VPDPBUSDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT) +-BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusd_v16si_maskz, "__builtin_ia32_vpdpbusd_v16si_maskz", IX86_BUILTIN_VPDPBUSDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT) ++BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusd_v16si_mask, "__builtin_ia32_vpdpbusd_v16si_mask", IX86_BUILTIN_VPDPBUSDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) ++BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusd_v16si_maskz, "__builtin_ia32_vpdpbusd_v16si_maskz", IX86_BUILTIN_VPDPBUSDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) + BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v8si, "__builtin_ia32_vpdpbusd_v8si", IX86_BUILTIN_VPDPBUSDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) +-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v8si_mask, "__builtin_ia32_vpdpbusd_v8si_mask", IX86_BUILTIN_VPDPBUSDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT) +-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v8si_maskz, "__builtin_ia32_vpdpbusd_v8si_maskz", IX86_BUILTIN_VPDPBUSDV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT) ++BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v8si_mask, "__builtin_ia32_vpdpbusd_v8si_mask", IX86_BUILTIN_VPDPBUSDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) ++BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v8si_maskz, "__builtin_ia32_vpdpbusd_v8si_maskz", IX86_BUILTIN_VPDPBUSDV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) + BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v4si, "__builtin_ia32_vpdpbusd_v4si", IX86_BUILTIN_VPDPBUSDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI) +-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v4si_mask, "__builtin_ia32_vpdpbusd_v4si_mask", IX86_BUILTIN_VPDPBUSDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT) +-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v4si_maskz, "__builtin_ia32_vpdpbusd_v4si_maskz", IX86_BUILTIN_VPDPBUSDV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT) ++BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v4si_mask, "__builtin_ia32_vpdpbusd_v4si_mask", IX86_BUILTIN_VPDPBUSDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) ++BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v4si_maskz, "__builtin_ia32_vpdpbusd_v4si_maskz", IX86_BUILTIN_VPDPBUSDV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) + + BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusds_v16si, "__builtin_ia32_vpdpbusds_v16si", IX86_BUILTIN_VPDPBUSDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI) +-BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusds_v16si_mask, "__builtin_ia32_vpdpbusds_v16si_mask", IX86_BUILTIN_VPDPBUSDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT) +-BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusds_v16si_maskz, "__builtin_ia32_vpdpbusds_v16si_maskz", IX86_BUILTIN_VPDPBUSDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT) ++BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusds_v16si_mask, "__builtin_ia32_vpdpbusds_v16si_mask", IX86_BUILTIN_VPDPBUSDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) ++BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusds_v16si_maskz, "__builtin_ia32_vpdpbusds_v16si_maskz", IX86_BUILTIN_VPDPBUSDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) + BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v8si, "__builtin_ia32_vpdpbusds_v8si", IX86_BUILTIN_VPDPBUSDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) +-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v8si_mask, "__builtin_ia32_vpdpbusds_v8si_mask", IX86_BUILTIN_VPDPBUSDSV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT) +-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v8si_maskz, "__builtin_ia32_vpdpbusds_v8si_maskz", IX86_BUILTIN_VPDPBUSDSV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT) ++BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v8si_mask, "__builtin_ia32_vpdpbusds_v8si_mask", IX86_BUILTIN_VPDPBUSDSV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) ++BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v8si_maskz, "__builtin_ia32_vpdpbusds_v8si_maskz", IX86_BUILTIN_VPDPBUSDSV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) + BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v4si, "__builtin_ia32_vpdpbusds_v4si", IX86_BUILTIN_VPDPBUSDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI) +-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v4si_mask, "__builtin_ia32_vpdpbusds_v4si_mask", IX86_BUILTIN_VPDPBUSDSV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT) +-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v4si_maskz, "__builtin_ia32_vpdpbusds_v4si_maskz", IX86_BUILTIN_VPDPBUSDSV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT) ++BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v4si_mask, "__builtin_ia32_vpdpbusds_v4si_mask", IX86_BUILTIN_VPDPBUSDSV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) ++BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v4si_maskz, "__builtin_ia32_vpdpbusds_v4si_maskz", IX86_BUILTIN_VPDPBUSDSV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) + + BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssd_v16si, "__builtin_ia32_vpdpwssd_v16si", IX86_BUILTIN_VPDPWSSDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI) +-BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssd_v16si_mask, "__builtin_ia32_vpdpwssd_v16si_mask", IX86_BUILTIN_VPDPWSSDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT) +-BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssd_v16si_maskz, "__builtin_ia32_vpdpwssd_v16si_maskz", IX86_BUILTIN_VPDPWSSDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT) ++BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssd_v16si_mask, "__builtin_ia32_vpdpwssd_v16si_mask", IX86_BUILTIN_VPDPWSSDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) ++BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssd_v16si_maskz, "__builtin_ia32_vpdpwssd_v16si_maskz", IX86_BUILTIN_VPDPWSSDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) + BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v8si, "__builtin_ia32_vpdpwssd_v8si", IX86_BUILTIN_VPDPWSSDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) +-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v8si_mask, "__builtin_ia32_vpdpwssd_v8si_mask", IX86_BUILTIN_VPDPWSSDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT) +-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v8si_maskz, "__builtin_ia32_vpdpwssd_v8si_maskz", IX86_BUILTIN_VPDPWSSDV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT) ++BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v8si_mask, "__builtin_ia32_vpdpwssd_v8si_mask", IX86_BUILTIN_VPDPWSSDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) ++BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v8si_maskz, "__builtin_ia32_vpdpwssd_v8si_maskz", IX86_BUILTIN_VPDPWSSDV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) + BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v4si, "__builtin_ia32_vpdpwssd_v4si", IX86_BUILTIN_VPDPWSSDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI) +-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v4si_mask, "__builtin_ia32_vpdpwssd_v4si_mask", IX86_BUILTIN_VPDPWSSDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT) +-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v4si_maskz, "__builtin_ia32_vpdpwssd_v4si_maskz", IX86_BUILTIN_VPDPWSSDV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT) ++BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v4si_mask, "__builtin_ia32_vpdpwssd_v4si_mask", IX86_BUILTIN_VPDPWSSDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) ++BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v4si_maskz, "__builtin_ia32_vpdpwssd_v4si_maskz", IX86_BUILTIN_VPDPWSSDV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) + + BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssds_v16si, "__builtin_ia32_vpdpwssds_v16si", IX86_BUILTIN_VPDPWSSDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI) +-BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssds_v16si_mask, "__builtin_ia32_vpdpwssds_v16si_mask", IX86_BUILTIN_VPDPWSSDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT) +-BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssds_v16si_maskz, "__builtin_ia32_vpdpwssds_v16si_maskz", IX86_BUILTIN_VPDPWSSDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT) ++BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssds_v16si_mask, "__builtin_ia32_vpdpwssds_v16si_mask", IX86_BUILTIN_VPDPWSSDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) ++BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssds_v16si_maskz, "__builtin_ia32_vpdpwssds_v16si_maskz", IX86_BUILTIN_VPDPWSSDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) + BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v8si, "__builtin_ia32_vpdpwssds_v8si", IX86_BUILTIN_VPDPWSSDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) +-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v8si_mask, "__builtin_ia32_vpdpwssds_v8si_mask", IX86_BUILTIN_VPDPWSSDSV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT) +-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v8si_maskz, "__builtin_ia32_vpdpwssds_v8si_maskz", IX86_BUILTIN_VPDPWSSDSV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT) ++BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v8si_mask, "__builtin_ia32_vpdpwssds_v8si_mask", IX86_BUILTIN_VPDPWSSDSV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) ++BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v8si_maskz, "__builtin_ia32_vpdpwssds_v8si_maskz", IX86_BUILTIN_VPDPWSSDSV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) + BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v4si, "__builtin_ia32_vpdpwssds_v4si", IX86_BUILTIN_VPDPWSSDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI) +-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v4si_mask, "__builtin_ia32_vpdpwssds_v4si_mask", IX86_BUILTIN_VPDPWSSDSV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT) +-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v4si_maskz, "__builtin_ia32_vpdpwssds_v4si_maskz", IX86_BUILTIN_VPDPWSSDSV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT) ++BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v4si_mask, "__builtin_ia32_vpdpwssds_v4si_mask", IX86_BUILTIN_VPDPWSSDSV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) ++BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v4si_maskz, "__builtin_ia32_vpdpwssds_v4si_maskz", IX86_BUILTIN_VPDPWSSDSV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) + + /* VPCLMULQDQ */ + BDESC (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpclmulqdq_v2di, "__builtin_ia32_vpclmulqdq_v2di", IX86_BUILTIN_VPCLMULQDQ2, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT) +diff -urpN a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def +--- a/gcc/config/i386/i386-builtin-types.def 2020-03-12 07:07:21.000000000 -0400 ++++ b/gcc/config/i386/i386-builtin-types.def 2020-12-17 20:46:53.868000000 -0500 +@@ -1246,17 +1246,8 @@ DEF_FUNCTION_TYPE (V8HI, V8HI, V8HI, INT + DEF_FUNCTION_TYPE (V4SI, V4SI, V4SI, INT, V4SI, INT) + DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI, INT, V2DI, INT) + DEF_FUNCTION_TYPE (V32HI, V32HI, V32HI, V32HI) +-DEF_FUNCTION_TYPE (V32HI, V32HI, V32HI, V32HI, INT) +-DEF_FUNCTION_TYPE (V16HI, V16HI, V16HI, V16HI, INT) +-DEF_FUNCTION_TYPE (V8HI, V8HI, V8HI, V8HI, INT) +-DEF_FUNCTION_TYPE (V8SI, V8SI, V8SI, V8SI, INT) +-DEF_FUNCTION_TYPE (V4SI, V4SI, V4SI, V4SI, INT) + DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI, V8DI) +-DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI, V8DI, INT) +-DEF_FUNCTION_TYPE (V4DI, V4DI, V4DI, V4DI, INT) + DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, V16SI) +-DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, V16SI, INT) +-DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI, V2DI, INT) + + # BITALG builtins + DEF_FUNCTION_TYPE (V4DI, V4DI) +diff -urpN a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c +--- a/gcc/config/i386/i386-expand.c 2020-12-17 20:44:55.508000000 -0500 ++++ b/gcc/config/i386/i386-expand.c 2020-12-17 20:46:53.872000000 -0500 +@@ -9437,15 +9437,6 @@ ix86_expand_args_builtin (const struct b + case USI_FTYPE_V32HI_V32HI_INT_USI: + case UHI_FTYPE_V16HI_V16HI_INT_UHI: + case UQI_FTYPE_V8HI_V8HI_INT_UQI: +- case V32HI_FTYPE_V32HI_V32HI_V32HI_INT: +- case V16HI_FTYPE_V16HI_V16HI_V16HI_INT: +- case V8HI_FTYPE_V8HI_V8HI_V8HI_INT: +- case V8SI_FTYPE_V8SI_V8SI_V8SI_INT: +- case V4DI_FTYPE_V4DI_V4DI_V4DI_INT: +- case V8DI_FTYPE_V8DI_V8DI_V8DI_INT: +- case V16SI_FTYPE_V16SI_V16SI_V16SI_INT: +- case V2DI_FTYPE_V2DI_V2DI_V2DI_INT: +- case V4SI_FTYPE_V4SI_V4SI_V4SI_INT: + nargs = 4; + mask_pos = 1; + nargs_constant = 1; diff --git a/reduction-paths-with-unhandled-live-stmt.patch b/reduction-paths-with-unhandled-live-stmt.patch new file mode 100644 index 0000000..22dc08d --- /dev/null +++ b/reduction-paths-with-unhandled-live-stmt.patch @@ -0,0 +1,64 @@ +This backport contains 1 patch from gcc main stream tree. +The commit id of these patchs list as following in the order of time. + +2686de5617bfb572343933be2883e8274c9735b5 +0001-tree-optimization-97760-reduction-paths-with-unhandl.patch + +diff --git a/gcc/testsuite/gcc.dg/vect/pr97760.c b/gcc/testsuite/gcc.dg/vect/pr97760.c +new file mode 100644 +index 00000000000..da5ac937a43 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/vect/pr97760.c +@@ -0,0 +1,26 @@ ++#include "tree-vect.h" ++ ++int b=1; ++static int *g = &b; ++ ++void __attribute__((noipa)) ++h (unsigned int n) ++{ ++ int i = 3; ++ int f = 3; ++ for (; f <= 50; f += 4) { ++ i += 4; ++ *g = i; ++ i += n; ++ } ++} ++ ++int main () ++{ ++ check_vect (); ++ ++ h (9); ++ if (*g != 150 || b != 150) ++ __builtin_abort (); ++ return 0; ++} +diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c +index 977633a3ce3..39b7319e825 100644 +--- a/gcc/tree-vect-loop.c ++++ b/gcc/tree-vect-loop.c +@@ -3326,14 +3326,17 @@ pop: + fail = true; + break; + } +- /* Check there's only a single stmt the op is used on inside +- of the loop. */ ++ /* Check there's only a single stmt the op is used on. For the ++ not value-changing tail and the last stmt allow out-of-loop uses. ++ ??? We could relax this and handle arbitrary live stmts by ++ forcing a scalar epilogue for example. */ + imm_use_iterator imm_iter; + gimple *op_use_stmt; + unsigned cnt = 0; + FOR_EACH_IMM_USE_STMT (op_use_stmt, imm_iter, op) + if (!is_gimple_debug (op_use_stmt) +- && flow_bb_inside_loop_p (loop, gimple_bb (op_use_stmt))) ++ && (*code != ERROR_MARK ++ || flow_bb_inside_loop_p (loop, gimple_bb (op_use_stmt)))) + { + /* We want to allow x + x but not x < 1 ? x : 2. */ + if (is_gimple_assign (op_use_stmt) + diff --git a/redundant-loop-elimination.patch b/redundant-loop-elimination.patch new file mode 100644 index 0000000..fb33bec --- /dev/null +++ b/redundant-loop-elimination.patch @@ -0,0 +1,486 @@ +diff -Nurp a/gcc/common.opt b/gcc/common.opt +--- a/gcc/common.opt 2020-11-23 03:24:54.760000000 -0500 ++++ b/gcc/common.opt 2020-11-23 03:23:59.716000000 -0500 +@@ -1150,6 +1150,10 @@ fcompare-elim + Common Report Var(flag_compare_elim_after_reload) Optimization + Perform comparison elimination after register allocation has finished. + ++floop-elim ++Common Report Var(flag_loop_elim) Init(0) Optimization ++Perform redundant loop elimination. ++ + fconserve-stack + Common Var(flag_conserve_stack) Optimization + Do not perform optimizations increasing noticeably stack usage. +diff -Nurp a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c +--- a/gcc/tree-ssa-phiopt.c 2020-11-23 03:24:54.760000000 -0500 ++++ b/gcc/tree-ssa-phiopt.c 2020-11-23 03:27:42.824000000 -0500 +@@ -71,6 +71,7 @@ static hash_set * get_non_trapping + static void replace_phi_edge_with_variable (basic_block, edge, gimple *, tree); + static void hoist_adjacent_loads (basic_block, basic_block, + basic_block, basic_block); ++static bool do_phiopt_pattern (basic_block, basic_block, basic_block); + static bool gate_hoist_loads (void); + + /* This pass tries to transform conditional stores into unconditional +@@ -259,6 +260,10 @@ tree_ssa_phiopt_worker (bool do_store_el + hoist_adjacent_loads (bb, bb1, bb2, bb3); + continue; + } ++ else if (flag_loop_elim && do_phiopt_pattern (bb, bb1, bb2)) ++ { ++ continue; ++ } + else + continue; + +@@ -2899,6 +2904,449 @@ hoist_adjacent_loads (basic_block bb0, b + } + } + ++static bool check_uses (tree, hash_set *); ++ ++/* Check SSA_NAME is used in ++ if (SSA_NAME == 0) ++ ... ++ or ++ if (SSA_NAME != 0) ++ ... ++*/ ++static bool ++check_uses_cond (tree ssa_name, gimple *stmt, ++ hash_set *hset ATTRIBUTE_UNUSED) ++{ ++ tree_code code = gimple_cond_code (stmt); ++ if (code != EQ_EXPR && code != NE_EXPR) ++ { ++ return false; ++ } ++ ++ tree lhs = gimple_cond_lhs (stmt); ++ tree rhs = gimple_cond_rhs (stmt); ++ if ((lhs == ssa_name && integer_zerop (rhs)) ++ || (rhs == ssa_name && integer_zerop (lhs))) ++ { ++ return true; ++ } ++ ++ return false; ++} ++ ++/* Check SSA_NAME is used in ++ _tmp = SSA_NAME == 0; ++ or ++ _tmp = SSA_NAME != 0; ++ or ++ _tmp = SSA_NAME | _tmp2; ++*/ ++static bool ++check_uses_assign (tree ssa_name, gimple *stmt, hash_set *hset) ++{ ++ tree_code code = gimple_assign_rhs_code (stmt); ++ tree lhs, rhs1, rhs2; ++ ++ switch (code) ++ { ++ case EQ_EXPR: ++ case NE_EXPR: ++ rhs1 = gimple_assign_rhs1 (stmt); ++ rhs2 = gimple_assign_rhs2 (stmt); ++ if ((rhs1 == ssa_name && integer_zerop (rhs2)) ++ || (rhs2 == ssa_name && integer_zerop (rhs1))) ++ { ++ return true; ++ } ++ break; ++ ++ case BIT_IOR_EXPR: ++ lhs = gimple_assign_lhs (stmt); ++ if (hset->contains (lhs)) ++ { ++ return false; ++ } ++ /* We should check the use of _tmp further. */ ++ return check_uses (lhs, hset); ++ ++ default: ++ break; ++ } ++ return false; ++} ++ ++/* Check SSA_NAME is used in ++ # result = PHI ++*/ ++static bool ++check_uses_phi (tree ssa_name, gimple *stmt, hash_set *hset) ++{ ++ for (unsigned i = 0; i < gimple_phi_num_args (stmt); i++) ++ { ++ tree arg = gimple_phi_arg_def (stmt, i); ++ if (!integer_zerop (arg) && arg != ssa_name) ++ { ++ return false; ++ } ++ } ++ ++ tree result = gimple_phi_result (stmt); ++ ++ /* It is used to avoid infinite recursion, ++ ++ if (cond) ++ goto ++ else ++ goto ++ ++ ++ # _tmp2 = PHI <0 (bb 1), _tmp3 (bb 3)> ++ {BODY} ++ if (cond) ++ goto ++ else ++ goto ++ ++ ++ # _tmp3 = PHI <0 (bb 1), _tmp2 (bb 2)> ++ {BODY} ++ if (cond) ++ goto ++ else ++ goto ++ ++ ++ ... ++ */ ++ if (hset->contains (result)) ++ { ++ return false; ++ } ++ ++ return check_uses (result, hset); ++} ++ ++/* Check the use of SSA_NAME, it should only be used in comparison ++ operation and PHI node. HSET is used to record the ssa_names ++ that have been already checked. */ ++static bool ++check_uses (tree ssa_name, hash_set *hset) ++{ ++ imm_use_iterator imm_iter; ++ use_operand_p use_p; ++ ++ if (TREE_CODE (ssa_name) != SSA_NAME) ++ { ++ return false; ++ } ++ ++ if (SSA_NAME_VAR (ssa_name) ++ && is_global_var (SSA_NAME_VAR (ssa_name))) ++ { ++ return false; ++ } ++ ++ hset->add (ssa_name); ++ ++ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, ssa_name) ++ { ++ gimple *stmt = USE_STMT (use_p); ++ ++ /* Ignore debug gimple statements. */ ++ if (is_gimple_debug (stmt)) ++ { ++ continue; ++ } ++ ++ switch (gimple_code (stmt)) ++ { ++ case GIMPLE_COND: ++ if (!check_uses_cond (ssa_name, stmt, hset)) ++ { ++ return false; ++ } ++ break; ++ ++ case GIMPLE_ASSIGN: ++ if (!check_uses_assign (ssa_name, stmt, hset)) ++ { ++ return false; ++ } ++ break; ++ ++ case GIMPLE_PHI: ++ if (!check_uses_phi (ssa_name, stmt, hset)) ++ { ++ return false; ++ } ++ break; ++ ++ default: ++ return false; ++ } ++ } ++ return true; ++} ++ ++static bool ++check_def_gimple (gimple *def1, gimple *def2, tree result) ++{ ++ /* def1 and def2 should be POINTER_PLUS_EXPR. */ ++ if (!is_gimple_assign (def1) || !is_gimple_assign (def2) ++ || gimple_assign_rhs_code (def1) != POINTER_PLUS_EXPR ++ || gimple_assign_rhs_code (def2) != POINTER_PLUS_EXPR) ++ { ++ return false; ++ } ++ ++ tree rhs12 = gimple_assign_rhs2 (def1); ++ ++ tree rhs21 = gimple_assign_rhs1 (def2); ++ tree rhs22 = gimple_assign_rhs2 (def2); ++ ++ if (rhs21 != result) ++ { ++ return false; ++ } ++ ++ /* We should have a positive pointer-plus constant to ensure ++ that the pointer value is continuously increasing. */ ++ if (TREE_CODE (rhs12) != INTEGER_CST || TREE_CODE (rhs22) != INTEGER_CST ++ || compare_tree_int (rhs12, 0) <= 0 || compare_tree_int (rhs22, 0) <= 0) ++ { ++ return false; ++ } ++ ++ return true; ++} ++ ++static bool ++check_loop_body (basic_block bb0, basic_block bb2, tree result) ++{ ++ gimple *g01 = first_stmt (bb0); ++ if (!g01 || !is_gimple_assign (g01) ++ || gimple_assign_rhs_code (g01) != MEM_REF ++ || TREE_OPERAND (gimple_assign_rhs1 (g01), 0) != result) ++ { ++ return false; ++ } ++ ++ gimple *g02 = g01->next; ++ /* GIMPLE_COND would be the last gimple in a basic block, ++ and have no other side effects on RESULT. */ ++ if (!g02 || gimple_code (g02) != GIMPLE_COND) ++ { ++ return false; ++ } ++ ++ if (first_stmt (bb2) != last_stmt (bb2)) ++ { ++ return false; ++ } ++ ++ return true; ++} ++ ++/* Pattern is like ++
++   arg1 = base (rhs11) + cst (rhs12); [def1]
++   goto 
++
++   
++   arg2 = result (rhs21) + cst (rhs22); [def2]
++
++   
++   # result = PHI 
++   _v = *result;  [g01]
++   if (_v == 0)   [g02]
++     goto 
++   else
++     goto 
++
++   
++   _1 = result - base;     [g1]
++   _2 = _1 /[ex] cst;      [g2]
++   _3 = (unsigned int) _2; [g3]
++   if (_3 == 0)
++   ...
++*/
++static bool
++check_bb_order (basic_block bb0, basic_block &bb1, basic_block &bb2,
++		gphi *phi_stmt, gimple *&output)
++{
++  /* Start check from PHI node in BB0.  */
++  if (gimple_phi_num_args (phi_stmt) != 2
++      || virtual_operand_p (gimple_phi_result (phi_stmt)))
++    {
++      return false;
++    }
++
++  tree result = gimple_phi_result (phi_stmt);
++  tree arg1 = gimple_phi_arg_def (phi_stmt, 0);
++  tree arg2 = gimple_phi_arg_def (phi_stmt, 1);
++
++  if (TREE_CODE (arg1) != SSA_NAME
++      || TREE_CODE (arg2) != SSA_NAME
++      || SSA_NAME_IS_DEFAULT_DEF (arg1)
++      || SSA_NAME_IS_DEFAULT_DEF (arg2))
++    {
++      return false;
++    }
++
++  gimple *def1 = SSA_NAME_DEF_STMT (arg1);
++  gimple *def2 = SSA_NAME_DEF_STMT (arg2);
++
++  /* Swap bb1 and bb2 if pattern is like
++     if (_v != 0)
++       goto 
++     else
++       goto 
++  */
++  if (gimple_bb (def2) == bb1 && EDGE_SUCC (bb1, 0)->dest == bb0)
++    {
++      std::swap (bb1, bb2);
++    }
++
++  /* prebb[def1] --> bb0 <-- bb2[def2] */
++  if (!gimple_bb (def1)
++      || EDGE_SUCC (gimple_bb (def1), 0)->dest != bb0
++      || gimple_bb (def2) != bb2 || EDGE_SUCC (bb2, 0)->dest != bb0)
++    {
++      return false;
++    }
++
++  /* Check whether define gimple meets the pattern requirements.  */
++  if (!check_def_gimple (def1, def2, result))
++    {
++      return false;
++    }
++
++  if (!check_loop_body (bb0, bb2, result))
++    {
++      return false;
++    }
++
++  output = def1;
++  return true;
++}
++
++/* Check pattern
++   
++   _1 = result - base;     [g1]
++   _2 = _1 /[ex] cst;      [g2]
++   _3 = (unsigned int) _2; [g3]
++   if (_3 == 0)
++   ...
++*/
++static bool
++check_gimple_order (basic_block bb1, tree base, tree cst, tree result,
++		    gimple *&output)
++{
++  gimple *g1 = first_stmt (bb1);
++  if (!g1 || !is_gimple_assign (g1)
++      || gimple_assign_rhs_code (g1) != POINTER_DIFF_EXPR
++      || gimple_assign_rhs1 (g1) != result
++      || gimple_assign_rhs2 (g1) != base)
++    {
++      return false;
++    }
++
++  gimple *g2 = g1->next;
++  if (!g2 || !is_gimple_assign (g2)
++      || gimple_assign_rhs_code (g2) != EXACT_DIV_EXPR
++      || gimple_assign_lhs (g1) != gimple_assign_rhs1 (g2)
++      || TREE_CODE (gimple_assign_rhs2 (g2)) != INTEGER_CST)
++    {
++      return false;
++    }
++
++  /* INTEGER_CST cst in gimple def1.  */
++  HOST_WIDE_INT num1 = TREE_INT_CST_LOW (cst);
++  /* INTEGER_CST cst in gimple g2.  */
++  HOST_WIDE_INT num2 = TREE_INT_CST_LOW (gimple_assign_rhs2 (g2));
++  /* _2 must be at least a positive number.  */
++  if (num2 == 0 || num1 / num2 <= 0)
++    {
++      return false;
++    }
++
++  gimple *g3 = g2->next;
++  if (!g3 || !is_gimple_assign (g3)
++      || gimple_assign_rhs_code (g3) != NOP_EXPR
++      || gimple_assign_lhs (g2) != gimple_assign_rhs1 (g3)
++      || TREE_CODE (gimple_assign_lhs (g3)) != SSA_NAME)
++    {
++      return false;
++    }
++
++  /* _3 should only be used in comparison operation or PHI node.  */
++  hash_set *hset = new hash_set;
++  if (!check_uses (gimple_assign_lhs (g3), hset))
++    {
++      delete hset;
++      return false;
++    }
++  delete hset;
++
++  output = g3;
++  return true;
++}
++
++static bool
++do_phiopt_pattern (basic_block bb0, basic_block bb1, basic_block bb2)
++{
++  gphi_iterator gsi;
++
++  for (gsi = gsi_start_phis (bb0); !gsi_end_p (gsi); gsi_next (&gsi))
++    {
++      gphi *phi_stmt = gsi.phi ();
++      gimple *def1 = NULL;
++      tree base, cst, result;
++
++      if (!check_bb_order (bb0, bb1, bb2, phi_stmt, def1))
++	{
++	  continue;
++	}
++
++      base = gimple_assign_rhs1 (def1);
++      cst = gimple_assign_rhs2 (def1);
++      result = gimple_phi_result (phi_stmt);
++
++      gimple *stmt = NULL;
++      if (!check_gimple_order (bb1, base, cst, result, stmt))
++	{
++	  continue;
++	}
++
++      gcc_assert (stmt);
++
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "PHIOPT pattern optimization (1) - Rewrite:\n");
++	  print_gimple_stmt (dump_file, stmt, 0);
++	  fprintf (dump_file, "to\n");
++	}
++
++      /* Rewrite statement
++	   _3 = (unsigned int) _2;
++	 to
++	   _3 = (unsigned int) 1;
++      */
++      tree type = TREE_TYPE (gimple_assign_rhs1 (stmt));
++      gimple_assign_set_rhs1 (stmt, build_int_cst (type, 1));
++      update_stmt (stmt);
++
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  print_gimple_stmt (dump_file, stmt, 0);
++	  fprintf (dump_file, "\n");
++	}
++
++      return true;
++    }
++  return false;
++}
++
+ /* Determine whether we should attempt to hoist adjacent loads out of
+    diamond patterns in pass_phiopt.  Always hoist loads if
+    -fhoist-adjacent-loads is specified and the target machine has
diff --git a/sccvn-Improve-handling-of-load-masked-with-integer.patch b/sccvn-Improve-handling-of-load-masked-with-integer.patch
new file mode 100644
index 0000000..f24d0c3
--- /dev/null
+++ b/sccvn-Improve-handling-of-load-masked-with-integer.patch
@@ -0,0 +1,2397 @@
+This backport contains 14 patch from gcc main stream tree.
+The commit id of these patchs list as following in the order of time.
+
+c2851dc2896bfc0d27b32c90cafc873f67cd6727
+0001-tree-ssa-sccvn.c-struct-vn_walk_cb_data-Add-orig_ref.patch
+
+69b5279e977593d656906288316ee03a8bf79c6a
+0001-gimple-parser.c-c_parser_gimple_postfix_expression-H.patch
+
+8389386c6d55d57afc3ae01f71546ac4468f7926
+0001-gimple-parser.c-c_parser_gimple_postfix_expression-S.patch
+
+d1f2e4c1027b826cf3ba353e86c37589f63f8efe
+0001-tree-ssa-sccvn.c-vn_walk_cb_data-push_partial_def-Re.patch
+
+62e3e66f130fc280eac0bbb6b69e9adca328c03b
+0001-re-PR-tree-optimization-83518-Missing-optimization-u.patch
+
+10f30ac9cda947d117e50f0cbd4cf94ee70a944f
+0001-re-PR-tree-optimization-91756-g-.dg-lto-alias-3-FAIL.patch
+
+1284e2b104a81ad93daab5110cd844981e501086
+0001-re-PR-tree-optimization-90883-Generated-code-is-wors.patch
+
+fb08a53b2eb01cc06d66f479c865aca55c91fd26
+0001-tree-ssa-sccvn.c-vn_walk_cb_data-push_partial_def-Ba.patch
+
+0849cdae714ddf056a4944f31eef53a465f1bcd0
+0001-tree-ssa-sccvn.c-vn_walk_cb_data-push_partial_def-Ha.patch
+
+5f0653a8b75a5ad5a5405a27dd92d3a5759eed4c
+0001-tree-optimization-91123-restore-redundant-store-remo.patch
+
+8aba425f4ebc5e2c054776d3cdddf13f7c1918f8
+0001-sccvn-Handle-bitfields-in-vn_reference_lookup_3-PR93.patch
+
+7f5617b00445dcc861a498a4cecc8aaa59e05b8c
+0001-sccvn-Handle-bitfields-in-push_partial_def-PR93582.patch
+
+5f9cd512c4278621435cce486dd00248ea2e821c
+0001-sccvn-Handle-non-byte-aligned-offset-or-size-for-mem.patch
+
+b07e4e7c7520ca3e798f514dec0711eea2c027be
+0001-sccvn-Improve-handling-of-load-masked-with-integer-c.patch
+
+diff -urpN a/gcc/c/gimple-parser.c b/gcc/c/gimple-parser.c
+--- a/gcc/c/gimple-parser.c	2020-11-26 22:26:34.848000000 -0500
++++ b/gcc/c/gimple-parser.c	2020-11-26 22:06:08.032000000 -0500
+@@ -1320,17 +1320,24 @@ c_parser_gimple_postfix_expression (gimp
+ 		}
+ 	      else
+ 		{
+-		  bool neg_p;
++		  bool neg_p, addr_p;
+ 		  if ((neg_p = c_parser_next_token_is (parser, CPP_MINUS)))
+ 		    c_parser_consume_token (parser);
++		  if ((addr_p = c_parser_next_token_is (parser, CPP_AND)))
++		    c_parser_consume_token (parser);
+ 		  tree val = c_parser_gimple_postfix_expression (parser).value;
+ 		  if (! val
+ 		      || val == error_mark_node
+-		      || ! CONSTANT_CLASS_P (val))
++		      || (!CONSTANT_CLASS_P (val)
++			  && !(addr_p
++			       && (TREE_CODE (val) == STRING_CST
++				   || DECL_P (val)))))
+ 		    {
+ 		      c_parser_error (parser, "invalid _Literal");
+ 		      return expr;
+ 		    }
++		  if (addr_p)
++		    val = build1 (ADDR_EXPR, type, val);
+ 		  if (neg_p)
+ 		    {
+ 		      val = const_unop (NEGATE_EXPR, TREE_TYPE (val), val);
+diff -urpN a/gcc/fold-const.c b/gcc/fold-const.c
+--- a/gcc/fold-const.c	2020-11-26 22:26:32.816000000 -0500
++++ b/gcc/fold-const.c	2020-11-26 22:06:08.036000000 -0500
+@@ -7773,6 +7773,70 @@ native_decode_vector_tree (tree type, ve
+   return builder.build ();
+ }
+ 
++/* Routines for manipulation of native_encode_expr encoded data if the encoded
++   or extracted constant positions and/or sizes aren't byte aligned.  */
++
++/* Shift left the bytes in PTR of SZ elements by AMNT bits, carrying over the
++   bits between adjacent elements.  AMNT should be within
++   [0, BITS_PER_UNIT).
++   Example, AMNT = 2:
++   00011111|11100000 << 2 = 01111111|10000000
++   PTR[1]  | PTR[0]	     PTR[1]  | PTR[0].  */
++
++void
++shift_bytes_in_array_left (unsigned char *ptr, unsigned int sz,
++			   unsigned int amnt)
++{
++  if (amnt == 0)
++    return;
++
++  unsigned char carry_over = 0U;
++  unsigned char carry_mask = (~0U) << (unsigned char) (BITS_PER_UNIT - amnt);
++  unsigned char clear_mask = (~0U) << amnt;
++
++  for (unsigned int i = 0; i < sz; i++)
++    {
++      unsigned prev_carry_over = carry_over;
++      carry_over = (ptr[i] & carry_mask) >> (BITS_PER_UNIT - amnt);
++
++      ptr[i] <<= amnt;
++      if (i != 0)
++	{
++	  ptr[i] &= clear_mask;
++	  ptr[i] |= prev_carry_over;
++	}
++    }
++}
++
++/* Like shift_bytes_in_array_left but for big-endian.
++   Shift right the bytes in PTR of SZ elements by AMNT bits, carrying over the
++   bits between adjacent elements.  AMNT should be within
++   [0, BITS_PER_UNIT).
++   Example, AMNT = 2:
++   00011111|11100000 >> 2 = 00000111|11111000
++   PTR[0]  | PTR[1]	     PTR[0]  | PTR[1].  */
++
++void
++shift_bytes_in_array_right (unsigned char *ptr, unsigned int sz,
++			    unsigned int amnt)
++{
++  if (amnt == 0)
++    return;
++
++  unsigned char carry_over = 0U;
++  unsigned char carry_mask = ~(~0U << amnt);
++
++  for (unsigned int i = 0; i < sz; i++)
++    {
++      unsigned prev_carry_over = carry_over;
++      carry_over = ptr[i] & carry_mask;
++
++      carry_over <<= (unsigned char) BITS_PER_UNIT - amnt;
++      ptr[i] >>= amnt;
++      ptr[i] |= prev_carry_over;
++    }
++}
++
+ /* Try to view-convert VECTOR_CST EXPR to VECTOR_TYPE TYPE by operating
+    directly on the VECTOR_CST encoding, in a way that works for variable-
+    length vectors.  Return the resulting VECTOR_CST on success or null
+diff -urpN a/gcc/fold-const.h b/gcc/fold-const.h
+--- a/gcc/fold-const.h	2020-11-26 22:26:32.816000000 -0500
++++ b/gcc/fold-const.h	2020-11-26 22:06:08.036000000 -0500
+@@ -27,6 +27,10 @@ extern int folding_initializer;
+ /* Convert between trees and native memory representation.  */
+ extern int native_encode_expr (const_tree, unsigned char *, int, int off = -1);
+ extern tree native_interpret_expr (tree, const unsigned char *, int);
++extern void shift_bytes_in_array_left (unsigned char *, unsigned int,
++				       unsigned int);
++extern void shift_bytes_in_array_right (unsigned char *, unsigned int,
++					unsigned int);
+ 
+ /* Fold constants as much as possible in an expression.
+    Returns the simplified expression.
+diff -urpN a/gcc/gimple-ssa-store-merging.c b/gcc/gimple-ssa-store-merging.c
+--- a/gcc/gimple-ssa-store-merging.c	2020-11-26 22:26:32.860000000 -0500
++++ b/gcc/gimple-ssa-store-merging.c	2020-11-26 22:06:08.036000000 -0500
+@@ -1464,66 +1464,6 @@ dump_char_array (FILE *fd, unsigned char
+   fprintf (fd, "\n");
+ }
+ 
+-/* Shift left the bytes in PTR of SZ elements by AMNT bits, carrying over the
+-   bits between adjacent elements.  AMNT should be within
+-   [0, BITS_PER_UNIT).
+-   Example, AMNT = 2:
+-   00011111|11100000 << 2 = 01111111|10000000
+-   PTR[1]  | PTR[0]         PTR[1]  | PTR[0].  */
+-
+-static void
+-shift_bytes_in_array (unsigned char *ptr, unsigned int sz, unsigned int amnt)
+-{
+-  if (amnt == 0)
+-    return;
+-
+-  unsigned char carry_over = 0U;
+-  unsigned char carry_mask = (~0U) << (unsigned char) (BITS_PER_UNIT - amnt);
+-  unsigned char clear_mask = (~0U) << amnt;
+-
+-  for (unsigned int i = 0; i < sz; i++)
+-    {
+-      unsigned prev_carry_over = carry_over;
+-      carry_over = (ptr[i] & carry_mask) >> (BITS_PER_UNIT - amnt);
+-
+-      ptr[i] <<= amnt;
+-      if (i != 0)
+-	{
+-	  ptr[i] &= clear_mask;
+-	  ptr[i] |= prev_carry_over;
+-	}
+-    }
+-}
+-
+-/* Like shift_bytes_in_array but for big-endian.
+-   Shift right the bytes in PTR of SZ elements by AMNT bits, carrying over the
+-   bits between adjacent elements.  AMNT should be within
+-   [0, BITS_PER_UNIT).
+-   Example, AMNT = 2:
+-   00011111|11100000 >> 2 = 00000111|11111000
+-   PTR[0]  | PTR[1]         PTR[0]  | PTR[1].  */
+-
+-static void
+-shift_bytes_in_array_right (unsigned char *ptr, unsigned int sz,
+-			    unsigned int amnt)
+-{
+-  if (amnt == 0)
+-    return;
+-
+-  unsigned char carry_over = 0U;
+-  unsigned char carry_mask = ~(~0U << amnt);
+-
+-  for (unsigned int i = 0; i < sz; i++)
+-    {
+-      unsigned prev_carry_over = carry_over;
+-      carry_over = ptr[i] & carry_mask;
+-
+-      carry_over <<= (unsigned char) BITS_PER_UNIT - amnt;
+-      ptr[i] >>= amnt;
+-      ptr[i] |= prev_carry_over;
+-    }
+-}
+-
+ /* Clear out LEN bits starting from bit START in the byte array
+    PTR.  This clears the bits to the *right* from START.
+    START must be within [0, BITS_PER_UNIT) and counts starting from
+@@ -1749,7 +1689,7 @@ encode_tree_to_bitpos (tree expr, unsign
+   /* Create the shifted version of EXPR.  */
+   if (!BYTES_BIG_ENDIAN)
+     {
+-      shift_bytes_in_array (tmpbuf, byte_size, shift_amnt);
++      shift_bytes_in_array_left (tmpbuf, byte_size, shift_amnt);
+       if (shift_amnt == 0)
+ 	byte_size--;
+     }
+@@ -4667,11 +4607,11 @@ verify_array_eq (unsigned char *x, unsig
+     }
+ }
+ 
+-/* Test shift_bytes_in_array and that it carries bits across between
++/* Test shift_bytes_in_array_left and that it carries bits across between
+    bytes correctly.  */
+ 
+ static void
+-verify_shift_bytes_in_array (void)
++verify_shift_bytes_in_array_left (void)
+ {
+    /* byte 1   | byte 0
+       00011111 | 11100000.  */
+@@ -4680,13 +4620,13 @@ verify_shift_bytes_in_array (void)
+   memcpy (in, orig, sizeof orig);
+ 
+   unsigned char expected[2] = { 0x80, 0x7f };
+-  shift_bytes_in_array (in, sizeof (in), 2);
++  shift_bytes_in_array_left (in, sizeof (in), 2);
+   verify_array_eq (in, expected, sizeof (in));
+ 
+   memcpy (in, orig, sizeof orig);
+   memcpy (expected, orig, sizeof orig);
+   /* Check that shifting by zero doesn't change anything.  */
+-  shift_bytes_in_array (in, sizeof (in), 0);
++  shift_bytes_in_array_left (in, sizeof (in), 0);
+   verify_array_eq (in, expected, sizeof (in));
+ 
+ }
+@@ -4771,7 +4711,7 @@ verify_clear_bit_region_be (void)
+ void
+ store_merging_c_tests (void)
+ {
+-  verify_shift_bytes_in_array ();
++  verify_shift_bytes_in_array_left ();
+   verify_shift_bytes_in_array_right ();
+   verify_clear_bit_region ();
+   verify_clear_bit_region_be ();
+diff -urpN a/gcc/testsuite/gcc.c-torture/execute/pr93582.c b/gcc/testsuite/gcc.c-torture/execute/pr93582.c
+--- a/gcc/testsuite/gcc.c-torture/execute/pr93582.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.c-torture/execute/pr93582.c	2020-11-26 22:25:43.532000000 -0500
+@@ -0,0 +1,22 @@
++/* PR tree-optimization/93582 */
++
++short a;
++int b, c;
++
++__attribute__((noipa)) void
++foo (void)
++{
++  b = c;
++  a &= 7;
++}
++
++int
++main ()
++{
++  c = 27;
++  a = 14;
++  foo ();
++  if (b != 27 || a != 6)
++    __builtin_abort ();
++  return 0;
++}
+diff -urpN a/gcc/testsuite/gcc.dg/gimplefe-42.c b/gcc/testsuite/gcc.dg/gimplefe-42.c
+--- a/gcc/testsuite/gcc.dg/gimplefe-42.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/gimplefe-42.c	2020-11-26 22:06:08.036000000 -0500
+@@ -0,0 +1,18 @@
++/* { dg-do compile } */
++/* { dg-options "-fgimple" } */
++
++typedef char ref_all_char __attribute__((may_alias));
++char a[7];
++__GIMPLE void f()
++{
++  int _1;
++  /* string literals inside __MEM need their address taken.  */
++  __MEM  ((ref_all_char *)&a)
++    = __MEM  (_Literal (char *) &"654321");
++  /* but plain assignment also works.  */
++  __MEM  ((ref_all_char *)&a) = "654321";
++  /* also punning with int.  */
++  _1 = __MEM  (_Literal (char *) &"654321");
++  __MEM  ((ref_all_char *)&a) = _1;
++  return;
++}
+diff -urpN a/gcc/testsuite/gcc.dg/pr93582.c b/gcc/testsuite/gcc.dg/pr93582.c
+--- a/gcc/testsuite/gcc.dg/pr93582.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/pr93582.c	2020-11-26 22:26:15.784000000 -0500
+@@ -0,0 +1,57 @@
++/* PR tree-optimization/93582 */
++/* { dg-do compile } */
++/* { dg-options "-O2 -Warray-bounds" } */
++
++struct S {
++  unsigned int s1:1;
++  unsigned int s2:1;
++  unsigned int s3:1;
++  unsigned int s4:1;
++  unsigned int s5:4;
++  unsigned char s6;
++  unsigned short s7;
++  unsigned short s8;
++};
++struct T {
++  int t1;
++  int t2;
++};
++
++static inline int
++bar (struct S *x)
++{
++  if (x->s4)
++    return ((struct T *)(x + 1))->t1 + ((struct T *)(x + 1))->t2;	/* { dg-bogus "array subscript 1 is outside array bounds of" } */
++  else
++    return 0;
++}
++
++int
++foo (int x, int y)
++{
++  struct S s;								/* { dg-bogus "while referencing" } */
++  s.s6 = x;
++  s.s7 = y & 0x1FFF;
++  s.s4 = 0;
++  return bar (&s);
++}
++
++static inline int
++qux (struct S *x)
++{
++  int s4 = x->s4;
++  if (s4)
++    return ((struct T *)(x + 1))->t1 + ((struct T *)(x + 1))->t2;
++  else
++    return 0;
++}
++
++int
++baz (int x, int y)
++{
++  struct S s;
++  s.s6 = x;
++  s.s7 = y & 0x1FFF;
++  s.s4 = 0;
++  return qux (&s);
++}
+diff -urpN a/gcc/testsuite/gcc.dg/torture/ssa-fre-5.c b/gcc/testsuite/gcc.dg/torture/ssa-fre-5.c
+--- a/gcc/testsuite/gcc.dg/torture/ssa-fre-5.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/torture/ssa-fre-5.c	2020-11-26 22:06:08.036000000 -0500
+@@ -0,0 +1,27 @@
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
++/* { dg-additional-options "-fgimple -fdump-tree-fre1" } */
++
++typedef int v4si __attribute__((vector_size(16)));
++
++int __GIMPLE (ssa,startwith("fre"))
++foo ()
++{
++  int * p;
++  int i;
++  int x[4];
++  long unsigned int _1;
++  long unsigned int _2;
++  int _7;
++
++  __BB(2):
++  i_3 = 0;
++  _1 = (long unsigned int) i_3;
++  _2 = _1 * 4ul;
++  p_4 = _Literal (int *) &x + _2;
++  __MEM  ((v4si *)p_4) = _Literal (v4si) { 1, 2, 3, 4 };
++  _7 = x[0];
++  return _7;
++}
++
++/* { dg-final { scan-tree-dump "return 1;" "fre1" } } */
+diff -urpN a/gcc/testsuite/gcc.dg/torture/ssa-fre-6.c b/gcc/testsuite/gcc.dg/torture/ssa-fre-6.c
+--- a/gcc/testsuite/gcc.dg/torture/ssa-fre-6.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/torture/ssa-fre-6.c	2020-11-26 22:06:08.036000000 -0500
+@@ -0,0 +1,27 @@
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
++/* { dg-additional-options "-fgimple -fdump-tree-fre1" } */
++
++typedef int v4si __attribute__((vector_size(16)));
++
++int __GIMPLE (ssa,startwith("fre"))
++foo ()
++{
++  int * p;
++  int i;
++  int x[4];
++  long unsigned int _1;
++  long unsigned int _2;
++  int _7;
++
++  __BB(2):
++  i_3 = 0;
++  _1 = (long unsigned int) i_3;
++  _2 = _1 * 4ul;
++  p_4 = _Literal (int *) &x + _2;
++  __MEM  ((v4si *)p_4) = _Literal (v4si) {};
++  _7 = x[0];
++  return _7;
++}
++
++/* { dg-final { scan-tree-dump "return 0;" "fre1" } } */
+diff -urpN a/gcc/testsuite/gcc.dg/torture/ssa-fre-7.c b/gcc/testsuite/gcc.dg/torture/ssa-fre-7.c
+--- a/gcc/testsuite/gcc.dg/torture/ssa-fre-7.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/torture/ssa-fre-7.c	2020-11-26 22:06:08.036000000 -0500
+@@ -0,0 +1,29 @@
++/* { dg-do compile } */
++/* { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
++/* { dg-additional-options "-fgimple -fdump-tree-fre1" } */
++
++typedef int v4si __attribute__((vector_size(16)));
++
++int __GIMPLE (ssa,startwith("fre"))
++foo (int c)
++{
++  int * p;
++  int i;
++  int x[4];
++  long unsigned int _1;
++  long unsigned int _2;
++  int _7;
++  v4si _6;
++
++  __BB(2):
++  i_3 = 0;
++  _1 = (long unsigned int) i_3;
++  _2 = _1 * 4ul;
++  p_4 = _Literal (int *) &x + _2;
++  _6 = _Literal (v4si) { c_5(D), c_5(D), c_5(D), c_5(D) };
++  __MEM  ((v4si *)p_4) = _6;
++  _7 = x[0];
++  return _7;
++}
++
++/* { dg-final { scan-tree-dump "return c_5\\(D\\);" "fre1" } } */
+diff -urpN a/gcc/testsuite/gcc.dg/tree-ssa/alias-access-path-1.c b/gcc/testsuite/gcc.dg/tree-ssa/alias-access-path-1.c
+--- a/gcc/testsuite/gcc.dg/tree-ssa/alias-access-path-1.c	2020-11-26 22:26:34.324000000 -0500
++++ b/gcc/testsuite/gcc.dg/tree-ssa/alias-access-path-1.c	2020-11-26 22:06:08.036000000 -0500
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-O2 -fdump-tree-fre3" } */
++/* { dg-options "-O2 -fdump-tree-fre1" } */
+ struct foo
+ {
+   int val;
+@@ -18,4 +18,4 @@ test ()
+   return barptr->val2;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "return 123" 1 "fre3"} } */
++/* { dg-final { scan-tree-dump-times "return 123" 1 "fre1"} } */
+diff -urpN a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-10.c b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-10.c
+--- a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-10.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-10.c	2020-11-26 22:24:45.812000000 -0500
+@@ -0,0 +1,29 @@
++/* PR tree-optimization/93582 */
++/* { dg-do compile { target int32 } } */
++/* { dg-options "-O2 -fdump-tree-fre1" } */
++/* { dg-final { scan-tree-dump "return 72876566;" "fre1" { target le } } } */
++/* { dg-final { scan-tree-dump "return 559957376;" "fre1" { target be } } } */
++
++union U {
++  struct S { int a : 12, b : 5, c : 10, d : 5; } s;
++  unsigned int i;
++};
++struct A { char a[12]; union U u; };
++void bar (struct A *);
++
++unsigned
++foo (void)
++{
++  struct A a;
++  bar (&a);
++  a.u.s.a = 1590;
++  a.u.s.c = -404;
++#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
++#define M 0x67e0a5f
++#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
++#define M 0xa5f067e0
++#else
++#define M 0
++#endif
++  return a.u.i & M;
++}
+diff -urpN a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-1.c
+--- a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-1.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-1.c	2020-11-26 22:18:39.368000000 -0500
+@@ -0,0 +1,18 @@
++/* PR tree-optimization/93582 */
++/* { dg-do compile { target int32 } } */
++/* { dg-options "-O2 -fdump-tree-fre1" } */
++/* { dg-final { scan-tree-dump "return 1;" "fre1" } } */
++
++union U {
++  struct S { int a : 1, b : 4, c : 27; } s;
++  struct T { int d : 2; int e : 2; int f : 28; } t;
++};
++
++int
++foo (void)
++{
++  union U u;
++  u.s.b = 10;
++  return u.t.e;
++}
++
+diff -urpN a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-2.c
+--- a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-2.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-2.c	2020-11-26 22:18:44.832000000 -0500
+@@ -0,0 +1,17 @@
++/* PR tree-optimization/93582 */
++/* { dg-do compile { target int32 } } */
++/* { dg-options "-O2 -fdump-tree-fre1" } */
++/* { dg-final { scan-tree-dump "return 593;" "fre1" } } */
++
++union U {
++  struct S { int a : 1, b : 14, c : 17; } s;
++  struct T { int d : 2; int e : 12; int f : 18; } t;
++};
++
++int
++foo (void)
++{
++  union U u;
++  u.s.b = -7005;
++  return u.t.e;
++}
+diff -urpN a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-3.c b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-3.c
+--- a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-3.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-3.c	2020-11-26 22:21:44.936000000 -0500
+@@ -0,0 +1,19 @@
++/* PR tree-optimization/93582 */
++/* { dg-do compile { target int32 } } */
++/* { dg-options "-O2 -fdump-tree-fre1" } */
++/* { dg-final { scan-tree-dump "return 1;" "fre1" { target be } } } */
++/* { dg-final { scan-tree-dump "return 2;" "fre1" { target le } } } */
++
++union U {
++  struct S { int a : 1, b : 14, c : 17; } s;
++  struct T { int d : 10; int e : 4; int f : 18; } t;
++};
++
++int
++foo (void)
++{
++  union U u;
++  u.s.b = -7005;
++  return u.t.e;
++}
++
+diff -urpN a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-4.c b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-4.c
+--- a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-4.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-4.c	2020-11-26 22:23:33.236000000 -0500
+@@ -0,0 +1,24 @@
++/* PR tree-optimization/93582 */
++/* { dg-do compile { target int32 } } */
++/* { dg-options "-O2 -fdump-tree-fre1" } */
++/* { dg-final { scan-tree-dump "return -1991560811;" "fre1" { target le } } } */
++/* { dg-final { scan-tree-dump "return -733324916;" "fre1" { target be } } } */
++
++union U {
++  struct S { int a : 1, b : 4, c : 27; } s;
++  unsigned int i;
++};
++struct A { char a[24]; union U u; };
++void bar (struct A *);
++
++int
++foo (void)
++{
++  struct A a;
++  bar (&a);
++  a.u.s.a = -1;
++  a.u.s.b = -6;
++  a.u.s.c = -62236276;
++  return a.u.i;
++}
++
+diff -urpN a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-5.c b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-5.c
+--- a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-5.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-5.c	2020-11-26 22:23:38.324000000 -0500
+@@ -0,0 +1,26 @@
++/* PR tree-optimization/93582 */
++/* { dg-do compile { target int32 } } */
++/* { dg-options "-O2 -fdump-tree-fre1" } */
++/* { dg-final { scan-tree-dump "return -1462729318;" "fre1" { target le } } } */
++/* { dg-final { scan-tree-dump "return 1300568597;" "fre1" { target be } } } */
++
++union U {
++  struct S { int a : 1, b : 7, c : 8, d : 11, e : 5; } s;
++  unsigned int i;
++};
++struct A { char a[8]; union U u; };
++void bar (struct A *);
++
++int
++foo (void)
++{
++  struct A a;
++  bar (&a);
++  a.u.s.a = 0;
++  a.u.s.b = -51;
++  a.u.s.c = -123;
++  a.u.s.d = 208;
++  a.u.s.e = -11;
++  return a.u.i;
++}
++
+diff -urpN a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-6.c b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-6.c
+--- a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-6.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-6.c	2020-11-26 22:23:42.348000000 -0500
+@@ -0,0 +1,25 @@
++/* PR tree-optimization/93582 */
++/* { dg-do compile { target int32 } } */
++/* { dg-options "-O2 -fdump-tree-fre1" } */
++/* { dg-final { scan-tree-dump "return 890118;" "fre1" { target le } } } */
++/* { dg-final { scan-tree-dump "return 447899;" "fre1" { target be } } } */
++
++union U {
++  struct S { int a : 16, b : 5, c : 10, d : 1; } s;
++  struct T { int a : 8, b : 21, c : 3; } t;
++};
++struct A { char a[4]; union U u; };
++void bar (struct A *);
++
++int
++foo (void)
++{
++  struct A a;
++  bar (&a);
++  a.u.s.a = 1590;
++  a.u.s.b = -11;
++  a.u.s.c = 620;
++  a.u.s.d = -1;
++  return a.u.t.b;
++}
++
+diff -urpN a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-7.c b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-7.c
+--- a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-7.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-7.c	2020-11-26 22:23:45.756000000 -0500
+@@ -0,0 +1,25 @@
++/* PR tree-optimization/93582 */
++/* { dg-do compile { target int32 } } */
++/* { dg-options "-O2 -fdump-tree-fre1" } */
++/* { dg-final { scan-tree-dump "return -413012;" "fre1" { target le } } } */
++/* { dg-final { scan-tree-dump "return -611112;" "fre1" { target be } } } */
++
++union U {
++  struct S { int a : 12, b : 5, c : 10, d : 5; } s;
++  struct T { int a : 7, b : 21, c : 4; } t;
++};
++struct A { char a[48]; union U u; };
++void bar (struct A *);
++
++int
++foo (void)
++{
++  struct A a;
++  bar (&a);
++  a.u.s.a = 1590;
++  a.u.s.b = -11;
++  a.u.s.c = -404;
++  a.u.s.d = 7;
++  return a.u.t.b;
++}
++
+diff -urpN a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-8.c b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-8.c
+--- a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-8.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-8.c	2020-11-26 22:23:53.088000000 -0500
+@@ -0,0 +1,15 @@
++/* PR tree-optimization/93582 */
++/* { dg-do compile { target int32 } } */
++/* { dg-options "-O2 -fdump-tree-fre1" } */
++/* { dg-final { scan-tree-dump "return 0;" "fre1" { target le } } } */
++/* { dg-final { scan-tree-dump "return -8531;" "fre1" { target be } } } */
++
++short
++foo (void)
++{
++  union U { char c[32]; short s[16]; int i[8]; } u;
++  __builtin_memset (u.c + 1, '\0', 5);
++  u.s[3] = 0xdead;
++  return u.i[1];
++}
++
+diff -urpN a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-82.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-82.c
+--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-82.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-82.c	2020-11-26 22:06:08.036000000 -0500
+@@ -0,0 +1,25 @@
++/* { dg-do run } */
++/* { dg-options "-O -fdump-tree-fre1-details" } */
++
++struct S { _Bool x; };
++
++void
++foo (struct S *s)
++{
++  __builtin_memset (s, 1, sizeof (struct S));
++  s->x = 1;
++}
++
++int
++main ()
++{
++  struct S s;
++  foo (&s);
++  char c;
++  __builtin_memcpy (&c, &s.x, 1);
++  if (c != 1)
++    __builtin_abort ();
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump "Deleted redundant store" "fre1" } } */
+diff -urpN a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-83.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-83.c
+--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-83.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-83.c	2020-11-26 22:06:08.036000000 -0500
+@@ -0,0 +1,32 @@
++/* { dg-do compile } */
++/* { dg-options "-O -fdump-tree-fre1-details" } */
++
++struct X
++{
++   int a : 1;
++   int b : 1;
++} x;
++
++void foo (int v)
++{
++  x.a = 1;
++  x.b = v;
++  x.a = 1;
++  x.b = v;
++}
++
++struct Y
++{
++   _Bool a;
++   _Bool b;
++} y;
++
++void bar (int v)
++{
++  y.a = 1;
++  y.b = v;
++  y.a = 1;
++  y.b = v;
++}
++
++/* { dg-final { scan-tree-dump-times "Deleted redundant store" 4 "fre1" } } */
+diff -urpN a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-84.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-84.c
+--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-84.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-84.c	2020-11-26 22:06:08.036000000 -0500
+@@ -0,0 +1,19 @@
++/* { dg-do compile } */
++/* { dg-options "-O -fdump-tree-fre1" } */
++
++typedef int v4si __attribute__((vector_size(16)));
++
++void foo (v4si *dst, int x)
++{
++  v4si v[2];
++  v[0][0] = 1;
++  v[0][1] = x;
++  v[0][2] = 2;
++  v[0][3] = 3;
++  v[0][1] = 0;
++  *dst = v[0];
++}
++
++/* The shadowed non-constant assign to v[0][1] shouldn't prevent us from
++   value-numbering the load to a constant.  */
++/* { dg-final { scan-tree-dump "\\*dst_\[0-9\]*\\\(D\\) = { 1, 0, 2, 3 };" "fre1" } } */
+diff -urpN a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-85.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-85.c
+--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-85.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-85.c	2020-11-26 22:06:08.036000000 -0500
+@@ -0,0 +1,14 @@
++/* { dg-do compile } */
++/* { dg-options "-O -fstrict-aliasing -fdump-tree-fre1-details" } */
++
++struct X { int i; int j; };
++
++struct X x, y;
++void foo ()
++{
++  x.i = 1;
++  y = x;
++  y.i = 1; // redundant
++}
++
++/* { dg-final { scan-tree-dump "Deleted redundant store y.i" "fre1" } } */
+diff -urpN a/gcc/tree-ssa-alias.c b/gcc/tree-ssa-alias.c
+--- a/gcc/tree-ssa-alias.c	2020-11-26 22:26:32.884000000 -0500
++++ b/gcc/tree-ssa-alias.c	2020-11-26 22:06:08.036000000 -0500
+@@ -2628,7 +2628,8 @@ static bool
+ maybe_skip_until (gimple *phi, tree &target, basic_block target_bb,
+ 		  ao_ref *ref, tree vuse, bool tbaa_p, unsigned int &limit,
+ 		  bitmap *visited, bool abort_on_visited,
+-		  void *(*translate)(ao_ref *, tree, void *, bool *),
++		  void *(*translate)(ao_ref *, tree, void *, translate_flags *),
++		  translate_flags disambiguate_only,
+ 		  void *data)
+ {
+   basic_block bb = gimple_bb (phi);
+@@ -2663,7 +2664,7 @@ maybe_skip_until (gimple *phi, tree &tar
+ 	    return !abort_on_visited;
+ 	  vuse = get_continuation_for_phi (def_stmt, ref, tbaa_p, limit,
+ 					   visited, abort_on_visited,
+-					   translate, data);
++					   translate, data, disambiguate_only);
+ 	  if (!vuse)
+ 	    return false;
+ 	  continue;
+@@ -2678,9 +2679,9 @@ maybe_skip_until (gimple *phi, tree &tar
+ 	  --limit;
+ 	  if (stmt_may_clobber_ref_p_1 (def_stmt, ref, tbaa_p))
+ 	    {
+-	      bool disambiguate_only = true;
++	      translate_flags tf = disambiguate_only;
+ 	      if (translate
+-		  && (*translate) (ref, vuse, data, &disambiguate_only) == NULL)
++		  && (*translate) (ref, vuse, data, &tf) == NULL)
+ 		;
+ 	      else
+ 		return false;
+@@ -2711,8 +2712,10 @@ tree
+ get_continuation_for_phi (gimple *phi, ao_ref *ref, bool tbaa_p,
+ 			  unsigned int &limit, bitmap *visited,
+ 			  bool abort_on_visited,
+-			  void *(*translate)(ao_ref *, tree, void *, bool *),
+-			  void *data)
++			  void *(*translate)(ao_ref *, tree, void *,
++					     translate_flags *),
++			  void *data,
++			  translate_flags disambiguate_only)
+ {
+   unsigned nargs = gimple_phi_num_args (phi);
+ 
+@@ -2754,13 +2757,15 @@ get_continuation_for_phi (gimple *phi, a
+       else if (! maybe_skip_until (phi, arg0, dom, ref, arg1, tbaa_p,
+ 				   limit, visited,
+ 				   abort_on_visited,
+-				   /* Do not translate when walking over
++				   translate,
++				   /* Do not valueize when walking over
+ 				      backedges.  */
+ 				   dominated_by_p
+ 				     (CDI_DOMINATORS,
+ 				      gimple_bb (SSA_NAME_DEF_STMT (arg1)),
+ 				      phi_bb)
+-				   ? NULL : translate, data))
++				   ? TR_DISAMBIGUATE
++				   : disambiguate_only, data))
+ 	return NULL_TREE;
+     }
+ 
+@@ -2798,7 +2803,8 @@ get_continuation_for_phi (gimple *phi, a
+ void *
+ walk_non_aliased_vuses (ao_ref *ref, tree vuse, bool tbaa_p,
+ 			void *(*walker)(ao_ref *, tree, void *),
+-			void *(*translate)(ao_ref *, tree, void *, bool *),
++			void *(*translate)(ao_ref *, tree, void *,
++					   translate_flags *),
+ 			tree (*valueize)(tree),
+ 			unsigned &limit, void *data)
+ {
+@@ -2851,7 +2857,7 @@ walk_non_aliased_vuses (ao_ref *ref, tre
+ 	    {
+ 	      if (!translate)
+ 		break;
+-	      bool disambiguate_only = false;
++	      translate_flags disambiguate_only = TR_TRANSLATE;
+ 	      res = (*translate) (ref, vuse, data, &disambiguate_only);
+ 	      /* Failed lookup and translation.  */
+ 	      if (res == (void *)-1)
+@@ -2863,7 +2869,7 @@ walk_non_aliased_vuses (ao_ref *ref, tre
+ 	      else if (res != NULL)
+ 		break;
+ 	      /* Translation succeeded, continue walking.  */
+-	      translated = translated || !disambiguate_only;
++	      translated = translated || disambiguate_only == TR_TRANSLATE;
+ 	    }
+ 	  vuse = gimple_vuse (def_stmt);
+ 	}
+diff -urpN a/gcc/tree-ssa-alias.h b/gcc/tree-ssa-alias.h
+--- a/gcc/tree-ssa-alias.h	2020-11-26 22:26:32.868000000 -0500
++++ b/gcc/tree-ssa-alias.h	2020-11-26 22:06:08.040000000 -0500
+@@ -131,13 +131,18 @@ extern bool call_may_clobber_ref_p (gcal
+ extern bool call_may_clobber_ref_p_1 (gcall *, ao_ref *);
+ extern bool stmt_kills_ref_p (gimple *, tree);
+ extern bool stmt_kills_ref_p (gimple *, ao_ref *);
++enum translate_flags
++  { TR_TRANSLATE, TR_VALUEIZE_AND_DISAMBIGUATE, TR_DISAMBIGUATE };
+ extern tree get_continuation_for_phi (gimple *, ao_ref *, bool,
+ 				      unsigned int &, bitmap *, bool,
+-				      void *(*)(ao_ref *, tree, void *, bool *),
+-				      void *);
++				      void *(*)(ao_ref *, tree, void *,
++						translate_flags *),
++				      void *, translate_flags
++					= TR_VALUEIZE_AND_DISAMBIGUATE);
+ extern void *walk_non_aliased_vuses (ao_ref *, tree, bool,
+ 				     void *(*)(ao_ref *, tree, void *),
+-				     void *(*)(ao_ref *, tree, void *, bool *),
++				     void *(*)(ao_ref *, tree, void *,
++					       translate_flags *),
+ 				     tree (*)(tree), unsigned &, void *);
+ extern int walk_aliased_vdefs (ao_ref *, tree,
+ 			       bool (*)(ao_ref *, tree, void *),
+diff -urpN a/gcc/tree-ssa-sccvn.c b/gcc/tree-ssa-sccvn.c
+--- a/gcc/tree-ssa-sccvn.c	2020-11-26 22:26:32.836000000 -0500
++++ b/gcc/tree-ssa-sccvn.c	2020-11-27 03:17:41.080000000 -0500
+@@ -1684,24 +1684,75 @@ struct pd_data
+ 
+ struct vn_walk_cb_data
+ {
+-  vn_walk_cb_data (vn_reference_t vr_, tree *last_vuse_ptr_,
+-		   vn_lookup_kind vn_walk_kind_, bool tbaa_p_)
+-    : vr (vr_), last_vuse_ptr (last_vuse_ptr_), vn_walk_kind (vn_walk_kind_),
+-      tbaa_p (tbaa_p_), known_ranges (NULL)
+-   {}
++  vn_walk_cb_data (vn_reference_t vr_, tree orig_ref_, tree *last_vuse_ptr_,
++		   vn_lookup_kind vn_walk_kind_, bool tbaa_p_, tree mask_)
++    : vr (vr_), last_vuse_ptr (last_vuse_ptr_), last_vuse (NULL_TREE),
++      mask (mask_), masked_result (NULL_TREE), vn_walk_kind (vn_walk_kind_),
++      tbaa_p (tbaa_p_), saved_operands (vNULL), first_set (-2),
++      known_ranges (NULL)
++  {
++    if (!last_vuse_ptr)
++      last_vuse_ptr = &last_vuse;
++    ao_ref_init (&orig_ref, orig_ref_);
++    if (mask)
++      {
++	wide_int w = wi::to_wide (mask);
++	unsigned int pos = 0, prec = w.get_precision ();
++	pd_data pd;
++	pd.rhs = build_constructor (NULL_TREE, NULL);
++	/* When bitwise and with a constant is done on a memory load,
++	   we don't really need all the bits to be defined or defined
++	   to constants, we don't really care what is in the position
++	   corresponding to 0 bits in the mask.
++	   So, push the ranges of those 0 bits in the mask as artificial
++	   zero stores and let the partial def handling code do the
++	   rest.  */
++	while (pos < prec)
++	  {
++	    int tz = wi::ctz (w);
++	    if (pos + tz > prec)
++	      tz = prec - pos;
++	    if (tz)
++	      {
++		if (BYTES_BIG_ENDIAN)
++		  pd.offset = prec - pos - tz;
++		else
++		  pd.offset = pos;
++		pd.size = tz;
++		void *r = push_partial_def (pd, 0, prec);
++		gcc_assert (r == NULL_TREE);
++	      }
++	    pos += tz;
++	    if (pos == prec)
++	      break;
++	    w = wi::lrshift (w, tz);
++	    tz = wi::ctz (wi::bit_not (w));
++	    if (pos + tz > prec)
++	      tz = prec - pos;
++	    pos += tz;
++	    w = wi::lrshift (w, tz);
++	  }
++      }
++  }
+   ~vn_walk_cb_data ();
+-  void *push_partial_def (const pd_data& pd, tree, HOST_WIDE_INT);
++  void *finish (alias_set_type, tree);
++  void *push_partial_def (const pd_data& pd, alias_set_type, HOST_WIDE_INT);
+ 
+   vn_reference_t vr;
++  ao_ref orig_ref;
+   tree *last_vuse_ptr;
++  tree last_vuse;
++  tree mask;
++  tree masked_result;
+   vn_lookup_kind vn_walk_kind;
+   bool tbaa_p;
++  vec saved_operands;
+ 
+   /* The VDEFs of partial defs we come along.  */
+   auto_vec partial_defs;
+   /* The first defs range to avoid splay tree setup in most cases.  */
+   pd_range first_range;
+-  tree first_vuse;
++  alias_set_type first_set;
+   splay_tree known_ranges;
+   obstack ranges_obstack;
+ };
+@@ -1713,6 +1764,23 @@ vn_walk_cb_data::~vn_walk_cb_data ()
+       splay_tree_delete (known_ranges);
+       obstack_free (&ranges_obstack, NULL);
+     }
++  saved_operands.release ();
++}
++
++void *
++vn_walk_cb_data::finish (alias_set_type set, tree val)
++{
++  if (first_set != -2)
++    set = first_set;
++  if (mask)
++    {
++      masked_result = val;
++      return (void *) -1;
++    }
++  vec &operands
++    = saved_operands.exists () ? saved_operands : vr->operands;
++  return vn_reference_lookup_or_insert_for_pieces (last_vuse, set,
++		  vr->type, operands, val);
+ }
+ 
+ /* pd_range splay-tree helpers.  */
+@@ -1742,168 +1810,306 @@ pd_tree_dealloc (void *, void *)
+ }
+ 
+ /* Push PD to the vector of partial definitions returning a
+-   value when we are ready to combine things with VUSE and MAXSIZEI,
++   value when we are ready to combine things with VUSE, SET and MAXSIZEI,
+    NULL when we want to continue looking for partial defs or -1
+    on failure.  */
+ 
+ void *
+-vn_walk_cb_data::push_partial_def (const pd_data &pd, tree vuse,
+-				   HOST_WIDE_INT maxsizei)
++vn_walk_cb_data::push_partial_def (const pd_data &pd,
++				   alias_set_type set, HOST_WIDE_INT maxsizei)
+ {
++  const HOST_WIDE_INT bufsize = 64;
++  /* We're using a fixed buffer for encoding so fail early if the object
++     we want to interpret is bigger.  */
++  if (maxsizei > bufsize * BITS_PER_UNIT
++      || CHAR_BIT != 8
++      || BITS_PER_UNIT != 8
++      /* Not prepared to handle PDP endian.  */
++      || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
++    return (void *)-1;
++
++  bool pd_constant_p = (TREE_CODE (pd.rhs) == CONSTRUCTOR
++			|| CONSTANT_CLASS_P (pd.rhs));
+   if (partial_defs.is_empty ())
+     {
++      if (!pd_constant_p)
++	return (void *)-1;
+       partial_defs.safe_push (pd);
+       first_range.offset = pd.offset;
+       first_range.size = pd.size;
+-      first_vuse = vuse;
++      first_set = set;
+       last_vuse_ptr = NULL;
++      /* Continue looking for partial defs.  */
++      return NULL;
++    }
++
++  if (!known_ranges)
++    {
++      /* ???  Optimize the case where the 2nd partial def completes things.  */
++      gcc_obstack_init (&ranges_obstack);
++      known_ranges = splay_tree_new_with_allocator (pd_range_compare, 0, 0,
++						    pd_tree_alloc,
++						    pd_tree_dealloc, this);
++      splay_tree_insert (known_ranges,
++			 (splay_tree_key)&first_range.offset,
++			 (splay_tree_value)&first_range);
++    }
++
++  pd_range newr = { pd.offset, pd.size };
++  splay_tree_node n;
++  pd_range *r;
++  /* Lookup the predecessor of offset + 1 and see if we need to merge.  */
++  HOST_WIDE_INT loffset = newr.offset + 1;
++  if ((n = splay_tree_predecessor (known_ranges, (splay_tree_key)&loffset))
++      && ((r = (pd_range *)n->value), true)
++      && ranges_known_overlap_p (r->offset, r->size + 1,
++				 newr.offset, newr.size))
++    {
++      /* Ignore partial defs already covered.  */
++      if (known_subrange_p (newr.offset, newr.size, r->offset, r->size))
++	return NULL;
++      r->size = MAX (r->offset + r->size, newr.offset + newr.size) - r->offset;
+     }
+   else
+     {
+-      if (!known_ranges)
+-	{
+-	  /* ???  Optimize the case where the second partial def
+-	     completes things.  */
+-	  gcc_obstack_init (&ranges_obstack);
+-	  known_ranges
+-	      = splay_tree_new_with_allocator (pd_range_compare, 0, 0,
+-					       pd_tree_alloc,
+-					       pd_tree_dealloc, this);
+-	  splay_tree_insert (known_ranges,
+-			     (splay_tree_key)&first_range.offset,
+-			     (splay_tree_value)&first_range);
+-	}
+-      if (known_ranges)
+-	{
+-	  pd_range newr = { pd.offset, pd.size };
+-	  splay_tree_node n;
+-	  pd_range *r;
+-	  /* Lookup the predecessor of offset + 1 and see if
+-	     we need to merge with it.  */
+-	  HOST_WIDE_INT loffset = newr.offset + 1;
+-	  if ((n = splay_tree_predecessor (known_ranges,
+-					   (splay_tree_key)&loffset))
+-	      && ((r = (pd_range *)n->value), true)
+-	      && ranges_known_overlap_p (r->offset, r->size + 1,
+-					 newr.offset, newr.size))
+-	    {
+-	      /* Ignore partial defs already covered.  */
+-	      if (known_subrange_p (newr.offset, newr.size,
+-				    r->offset, r->size))
+-		return NULL;
+-	      r->size = MAX (r->offset + r->size,
+-			     newr.offset + newr.size) - r->offset;
+-	    }
+-	  else
+-	    {
+-	      /* newr.offset wasn't covered yet, insert the
+-		 range.  */
+-	      r = XOBNEW (&ranges_obstack, pd_range);
+-	      *r = newr;
+-	      splay_tree_insert (known_ranges,
+-				 (splay_tree_key)&r->offset,
+-				 (splay_tree_value)r);
+-	    }
+-	  /* Merge r which now contains newr and is a member
+-	     of the splay tree with adjacent overlapping ranges.  */
+-	  pd_range *rafter;
+-	  while ((n = splay_tree_successor (known_ranges,
+-					    (splay_tree_key)&r->offset))
+-		 && ((rafter = (pd_range *)n->value), true)
+-		 && ranges_known_overlap_p (r->offset, r->size + 1,
+-					    rafter->offset, rafter->size))
+-	    {
+-	      r->size = MAX (r->offset + r->size,
+-			     rafter->offset + rafter->size) - r->offset;
+-	      splay_tree_remove (known_ranges,
+-				 (splay_tree_key)&rafter->offset);
+-	    }
+-	  partial_defs.safe_push (pd);
+-
+-	  /* Now we have merged newr into the range tree.
+-	     When we have covered [offseti, sizei] then the
+-	     tree will contain exactly one node which has
+-	     the desired properties and it will be 'r'.  */
+-	  if (known_subrange_p (0, maxsizei / BITS_PER_UNIT,
+-				r->offset, r->size))
+-	    {
+-	      /* Now simply native encode all partial defs
+-		 in reverse order.  */
+-	      unsigned ndefs = partial_defs.length ();
+-	      /* We support up to 512-bit values (for V8DFmode).  */
+-	      unsigned char buffer[64];
+-	      int len;
++      /* newr.offset wasn't covered yet, insert the range.  */
++      r = XOBNEW (&ranges_obstack, pd_range);
++      *r = newr;
++      splay_tree_insert (known_ranges, (splay_tree_key)&r->offset,
++			 (splay_tree_value)r);
++    }
++  /* Merge r which now contains newr and is a member of the splay tree with
++     adjacent overlapping ranges.  */
++  pd_range *rafter;
++  while ((n = splay_tree_successor (known_ranges, (splay_tree_key)&r->offset))
++	 && ((rafter = (pd_range *)n->value), true)
++	 && ranges_known_overlap_p (r->offset, r->size + 1,
++				    rafter->offset, rafter->size))
++    {
++      r->size = MAX (r->offset + r->size,
++		     rafter->offset + rafter->size) - r->offset;
++      splay_tree_remove (known_ranges, (splay_tree_key)&rafter->offset);
++    }
++  /* Non-constants are OK as long as they are shadowed by a constant.  */
++  if (!pd_constant_p)
++    return (void *)-1;
++  partial_defs.safe_push (pd);
++
++  /* Now we have merged newr into the range tree.  When we have covered
++     [offseti, sizei] then the tree will contain exactly one node which has
++     the desired properties and it will be 'r'.  */
++  if (!known_subrange_p (0, maxsizei, r->offset, r->size))
++    /* Continue looking for partial defs.  */
++    return NULL;
+ 
+-	      while (!partial_defs.is_empty ())
++  /* Now simply native encode all partial defs in reverse order.  */
++  unsigned ndefs = partial_defs.length ();
++  /* We support up to 512-bit values (for V8DFmode).  */
++  unsigned char buffer[bufsize + 1];
++  unsigned char this_buffer[bufsize + 1];
++  int len;
++
++  memset (buffer, 0, bufsize + 1);
++  unsigned needed_len = ROUND_UP (maxsizei, BITS_PER_UNIT) / BITS_PER_UNIT;
++  while (!partial_defs.is_empty ())
++    {
++      pd_data pd = partial_defs.pop ();
++      unsigned int amnt;
++      if (TREE_CODE (pd.rhs) == CONSTRUCTOR)
++	{
++	  /* Empty CONSTRUCTOR.  */
++	  if (pd.size >= needed_len * BITS_PER_UNIT)
++	    len = needed_len;
++	  else
++	    len = ROUND_UP (pd.size, BITS_PER_UNIT) / BITS_PER_UNIT;
++	  memset (this_buffer, 0, len);
++	}
++      else
++ 	{
++	  len = native_encode_expr (pd.rhs, this_buffer, bufsize,
++				    MAX (0, -pd.offset) / BITS_PER_UNIT);
++	  if (len <= 0
++	      || len < (ROUND_UP (pd.size, BITS_PER_UNIT) / BITS_PER_UNIT
++			- MAX (0, -pd.offset) / BITS_PER_UNIT))
++ 	    {
++	      if (dump_file && (dump_flags & TDF_DETAILS))
++		fprintf (dump_file, "Failed to encode %u "
++			 "partial definitions\n", ndefs);
++	      return (void *)-1;
++ 	    }
++	}
++
++      unsigned char *p = buffer;
++      HOST_WIDE_INT size = pd.size;
++      if (pd.offset < 0)
++	size -= ROUND_DOWN (-pd.offset, BITS_PER_UNIT);
++      this_buffer[len] = 0;
++      if (BYTES_BIG_ENDIAN)
++	{
++	  /* LSB of this_buffer[len - 1] byte should be at
++	     pd.offset + pd.size - 1 bits in buffer.  */
++	  amnt = ((unsigned HOST_WIDE_INT) pd.offset
++		  + pd.size) % BITS_PER_UNIT;
++	  if (amnt)
++	    shift_bytes_in_array_right (this_buffer, len + 1, amnt);
++	  unsigned char *q = this_buffer;
++	  unsigned int off = 0;
++	  if (pd.offset >= 0)
++	    {
++	      unsigned int msk;
++	      off = pd.offset / BITS_PER_UNIT;
++	      gcc_assert (off < needed_len);
++	      p = buffer + off;
++	      if (size <= amnt)
+ 		{
+-		  pd_data pd = partial_defs.pop ();
+-		  if (TREE_CODE (pd.rhs) == CONSTRUCTOR)
+-		    /* Empty CONSTRUCTOR.  */
+-		    memset (buffer + MAX (0, pd.offset),
+-			    0, MIN ((HOST_WIDE_INT)sizeof (buffer)
+-				     - MAX (0, pd.offset),
+-				    pd.size + MIN (0, pd.offset)));
+-		  else
++		  msk = ((1 << size) - 1) << (BITS_PER_UNIT - amnt);
++		  *p = (*p & ~msk) | (this_buffer[len] & msk);
++		  size = 0;
++		}
++	      else
++		{
++		  if (TREE_CODE (pd.rhs) != CONSTRUCTOR)
++		    q = (this_buffer + len
++			 - (ROUND_UP (size - amnt, BITS_PER_UNIT)
++			    / BITS_PER_UNIT));
++		  if (pd.offset % BITS_PER_UNIT)
+ 		    {
+-		      len = native_encode_expr (pd.rhs,
+-						buffer + MAX (0, pd.offset),
+-						sizeof (buffer)
+-						- MAX (0, pd.offset),
+-						MAX (0, -pd.offset));
+-		      if (len <= 0
+-			  || len < (pd.size - MAX (0, -pd.offset)))
+-			{
+-			  if (dump_file && (dump_flags & TDF_DETAILS))
+-			    fprintf (dump_file, "Failed to encode %u "
+-				     "partial definitions\n", ndefs);
+-			  return (void *)-1;
+-			}
++		      msk = -1U << (BITS_PER_UNIT
++				    - (pd.offset % BITS_PER_UNIT));
++		      *p = (*p & msk) | (*q & ~msk);
++		      p++;
++		      q++;
++		      off++;
++		      size -= BITS_PER_UNIT - (pd.offset % BITS_PER_UNIT);
++		      gcc_assert (size >= 0);
+ 		    }
+ 		}
+-
+-	      tree type = vr->type;
+-	      /* Make sure to interpret in a type that has a range
+-		 covering the whole access size.  */
+-	      if (INTEGRAL_TYPE_P (vr->type)
+-		  && maxsizei != TYPE_PRECISION (vr->type))
+-		type = build_nonstandard_integer_type (maxsizei,
+-						       TYPE_UNSIGNED (type));
+-	      tree val = native_interpret_expr (type, buffer,
+-						maxsizei / BITS_PER_UNIT);
+-	      /* If we chop off bits because the types precision doesn't
+-		 match the memory access size this is ok when optimizing
+-		 reads but not when called from the DSE code during
+-		 elimination.  */
+-	      if (val
+-		  && type != vr->type)
++	    }
++	  else if (TREE_CODE (pd.rhs) != CONSTRUCTOR)
++	    {
++	      q = (this_buffer + len
++		   - (ROUND_UP (size - amnt, BITS_PER_UNIT)
++		      / BITS_PER_UNIT));
++	      if (pd.offset % BITS_PER_UNIT)
+ 		{
+-		  if (! int_fits_type_p (val, vr->type))
+-		    val = NULL_TREE;
+-		  else
+-		    val = fold_convert (vr->type, val);
++		  q++;
++		  size -= BITS_PER_UNIT - ((unsigned HOST_WIDE_INT) pd.offset
++					   % BITS_PER_UNIT);
++		  gcc_assert (size >= 0);
+ 		}
+-
+-	      if (val)
++	    }
++	  if ((unsigned HOST_WIDE_INT) size / BITS_PER_UNIT + off
++	      > needed_len)
++	    size = (needed_len - off) * BITS_PER_UNIT;
++	  memcpy (p, q, size / BITS_PER_UNIT);
++	  if (size % BITS_PER_UNIT)
++	    {
++	      unsigned int msk
++		= -1U << (BITS_PER_UNIT - (size % BITS_PER_UNIT));
++	      p += size / BITS_PER_UNIT;
++	      q += size / BITS_PER_UNIT;
++	      *p = (*q & msk) | (*p & ~msk);
++	    }
++	}
++      else
++	{
++	  size = MIN (size, (HOST_WIDE_INT) needed_len * BITS_PER_UNIT);
++	  if (pd.offset >= 0)
++	    {
++	      /* LSB of this_buffer[0] byte should be at pd.offset bits
++		 in buffer.  */
++	      unsigned int msk;
++	      amnt = pd.offset % BITS_PER_UNIT;
++	      if (amnt)
++		shift_bytes_in_array_left (this_buffer, len + 1, amnt);
++	      unsigned int off = pd.offset / BITS_PER_UNIT;
++	      gcc_assert (off < needed_len);
++	      p = buffer + off;
++	      if (amnt + size < BITS_PER_UNIT)
+ 		{
+-		  if (dump_file && (dump_flags & TDF_DETAILS))
+-		    fprintf (dump_file, "Successfully combined %u "
+-			     "partial definitions\n", ndefs);
+-		  return vn_reference_lookup_or_insert_for_pieces
+-		      (first_vuse,
+-		       vr->set, vr->type, vr->operands, val);
++		  /* Low amnt bits come from *p, then size bits
++		     from this_buffer[0] and the remaining again from
++		     *p.  */
++		  msk = ((1 << size) - 1) << amnt;
++		  *p = (*p & ~msk) | (this_buffer[0] & msk);
++		  size = 0;
+ 		}
+-	      else
++	      else if (amnt)
+ 		{
+-		  if (dump_file && (dump_flags & TDF_DETAILS))
+-		    fprintf (dump_file, "Failed to interpret %u "
+-			     "encoded partial definitions\n", ndefs);
+-		  return (void *)-1;
++		  msk = -1U << amnt;
++		  *p = (*p & ~msk) | (this_buffer[0] & msk);
++		  p++;
++		  size -= (BITS_PER_UNIT - amnt);
+ 		}
+ 	    }
++	  else
++	    {
++	      amnt = (unsigned HOST_WIDE_INT) pd.offset % BITS_PER_UNIT;
++	      if (amnt)
++		shift_bytes_in_array_left (this_buffer, len + 1, amnt);
++	    }
++	  memcpy (p, this_buffer + (amnt != 0), size / BITS_PER_UNIT);
++	  p += size / BITS_PER_UNIT;
++	  if (size % BITS_PER_UNIT)
++	    {
++	      unsigned int msk = -1U << (size % BITS_PER_UNIT);
++	      *p = (this_buffer[(amnt != 0) + size / BITS_PER_UNIT]
++		    & ~msk) | (*p & msk);
++	    }
+ 	}
+     }
+-  /* Continue looking for partial defs.  */
+-  return NULL;
++
++  tree type = vr->type;
++  /* Make sure to interpret in a type that has a range covering the whole
++     access size.  */
++  if (INTEGRAL_TYPE_P (vr->type) && maxsizei != TYPE_PRECISION (vr->type))
++    type = build_nonstandard_integer_type (maxsizei, TYPE_UNSIGNED (type));
++  tree val;
++  if (BYTES_BIG_ENDIAN)
++    {
++      unsigned sz = needed_len;
++      if (maxsizei % BITS_PER_UNIT)
++	shift_bytes_in_array_right (buffer, needed_len,
++				    BITS_PER_UNIT
++				    - (maxsizei % BITS_PER_UNIT));
++      if (INTEGRAL_TYPE_P (type))
++	sz = GET_MODE_SIZE (SCALAR_INT_TYPE_MODE (type));
++      if (sz > needed_len)
++	{
++	  memcpy (this_buffer + (sz - needed_len), buffer, needed_len);
++	  val = native_interpret_expr (type, this_buffer, sz);
++	}
++      else
++	val = native_interpret_expr (type, buffer, needed_len);
++    }
++  else
++    val = native_interpret_expr (type, buffer, bufsize);
++  /* If we chop off bits because the types precision doesn't match the memory
++     access size this is ok when optimizing reads but not when called from
++     the DSE code during elimination.  */
++  if (val && type != vr->type)
++    {
++      if (! int_fits_type_p (val, vr->type))
++	val = NULL_TREE;
++      else
++	val = fold_convert (vr->type, val);
++    }
++  if (val)
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	fprintf (dump_file,
++		 "Successfully combined %u partial definitions\n", ndefs);
++      /* We are using the alias-set of the first store we encounter which
++	 should be appropriate here.  */
++      return finish (first_set, val);
++    }
++  else
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	fprintf (dump_file,
++		 "Failed to interpret %u encoded partial definitions\n", ndefs);
++      return (void *)-1;
++     }
+ }
+ 
+ /* Callback for walk_non_aliased_vuses.  Adjusts the vn_reference_t VR_
+@@ -1923,7 +2129,10 @@ vn_reference_lookup_2 (ao_ref *op ATTRIB
+     return NULL;
+ 
+   if (data->last_vuse_ptr)
+-    *data->last_vuse_ptr = vuse;
++    {
++      *data->last_vuse_ptr = vuse;
++      data->last_vuse = vuse;
++    }
+ 
+   /* Fixup vuse and hash.  */
+   if (vr->vuse)
+@@ -1935,7 +2144,11 @@ vn_reference_lookup_2 (ao_ref *op ATTRIB
+   hash = vr->hashcode;
+   slot = valid_info->references->find_slot_with_hash (vr, hash, NO_INSERT);
+   if (slot)
+-    return *slot;
++    {
++      if ((*slot)->result && data->saved_operands.exists ())
++	return data->finish (vr->set, (*slot)->result);
++      return *slot;
++    }
+ 
+   return NULL;
+ }
+@@ -2221,13 +2434,13 @@ adjust_offsets_for_equal_base_address (t
+ 
+ static void *
+ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_,
+-		       bool *disambiguate_only)
++		       translate_flags *disambiguate_only)
+ {
+   vn_walk_cb_data *data = (vn_walk_cb_data *)data_;
+   vn_reference_t vr = data->vr;
+   gimple *def_stmt = SSA_NAME_DEF_STMT (vuse);
+   tree base = ao_ref_base (ref);
+-  HOST_WIDE_INT offseti, maxsizei;
++  HOST_WIDE_INT offseti = 0, maxsizei, sizei = 0;
+   static vec lhs_ops;
+   ao_ref lhs_ref;
+   bool lhs_ref_ok = false;
+@@ -2242,8 +2455,11 @@ vn_reference_lookup_3 (ao_ref *ref, tree
+       lhs_ops.truncate (0);
+       basic_block saved_rpo_bb = vn_context_bb;
+       vn_context_bb = gimple_bb (def_stmt);
+-      copy_reference_ops_from_ref (lhs, &lhs_ops);
+-      lhs_ops = valueize_refs_1 (lhs_ops, &valueized_anything, true);
++      if (*disambiguate_only <= TR_VALUEIZE_AND_DISAMBIGUATE)
++	{
++	  copy_reference_ops_from_ref (lhs, &lhs_ops);
++	  lhs_ops = valueize_refs_1 (lhs_ops, &valueized_anything, true);
++	}
+       vn_context_bb = saved_rpo_bb;
+       if (valueized_anything)
+ 	{
+@@ -2253,7 +2469,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree
+ 	  if (lhs_ref_ok
+ 	      && !refs_may_alias_p_1 (ref, &lhs_ref, data->tbaa_p))
+ 	    {
+-	      *disambiguate_only = true;
++	      *disambiguate_only = TR_VALUEIZE_AND_DISAMBIGUATE;
+ 	      return NULL;
+ 	    }
+ 	}
+@@ -2263,6 +2479,30 @@ vn_reference_lookup_3 (ao_ref *ref, tree
+ 	  lhs_ref_ok = true;
+ 	}
+ 
++      /* Besides valueizing the LHS we can also use access-path based
++	  disambiguation on the original non-valueized ref.  */
++      if (!ref->ref
++	  && lhs_ref_ok
++	  && data->orig_ref.ref)
++	{
++	  /* We want to use the non-valueized LHS for this, but avoid redundant
++	     work.  */
++	  ao_ref *lref = &lhs_ref;
++	  ao_ref lref_alt;
++	  if (valueized_anything)
++	    {
++	      ao_ref_init (&lref_alt, lhs);
++	      lref = &lref_alt;
++	    }
++	  if (!refs_may_alias_p_1 (&data->orig_ref, lref, data->tbaa_p))
++	    {
++	      *disambiguate_only = (valueized_anything
++				    ? TR_VALUEIZE_AND_DISAMBIGUATE
++				    : TR_DISAMBIGUATE);
++	      return NULL;
++	    }
++	}
++
+       /* If we reach a clobbering statement try to skip it and see if
+          we find a VN result with exactly the same value as the
+ 	 possible clobber.  In this case we can ignore the clobber
+@@ -2299,7 +2539,8 @@ vn_reference_lookup_3 (ao_ref *ref, tree
+ 	    }
+ 	}
+     }
+-  else if (gimple_call_builtin_p (def_stmt, BUILT_IN_NORMAL)
++  else if (*disambiguate_only <= TR_VALUEIZE_AND_DISAMBIGUATE
++	   && gimple_call_builtin_p (def_stmt, BUILT_IN_NORMAL)
+ 	   && gimple_call_num_args (def_stmt) <= 4)
+     {
+       /* For builtin calls valueize its arguments and call the
+@@ -2328,15 +2569,13 @@ vn_reference_lookup_3 (ao_ref *ref, tree
+ 	    gimple_call_set_arg (def_stmt, i, oldargs[i]);
+ 	  if (!res)
+ 	    {
+-	      *disambiguate_only = true;
++	      *disambiguate_only = TR_VALUEIZE_AND_DISAMBIGUATE;
+ 	      return NULL;
+ 	    }
+ 	}
+     }
+ 
+-  /* If we are looking for redundant stores do not create new hashtable
+-     entries from aliasing defs with made up alias-sets.  */
+-  if (*disambiguate_only || !data->tbaa_p)
++  if (*disambiguate_only > TR_TRANSLATE)
+     return (void *)-1;
+ 
+   /* If we cannot constrain the size of the reference we cannot
+@@ -2359,10 +2598,14 @@ vn_reference_lookup_3 (ao_ref *ref, tree
+       && (integer_zerop (gimple_call_arg (def_stmt, 1))
+ 	  || ((TREE_CODE (gimple_call_arg (def_stmt, 1)) == INTEGER_CST
+ 	       || (INTEGRAL_TYPE_P (vr->type) && known_eq (ref->size, 8)))
+-	      && CHAR_BIT == 8 && BITS_PER_UNIT == 8
++	      && CHAR_BIT == 8
++	      && BITS_PER_UNIT == 8
++	      && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN
+ 	      && offset.is_constant (&offseti)
+-	      && offseti % BITS_PER_UNIT == 0
+ 	      && multiple_p (ref->size, BITS_PER_UNIT)))
++	      && ref->size.is_constant (&sizei)
++	      && (offseti % BITS_PER_UNIT == 0
++		  || TREE_CODE (gimple_call_arg (def_stmt, 1)) == INTEGER_CST)
+       && poly_int_tree_p (gimple_call_arg (def_stmt, 2))
+       && (TREE_CODE (gimple_call_arg (def_stmt, 0)) == ADDR_EXPR
+ 	  || TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME))
+@@ -2423,7 +2666,13 @@ vn_reference_lookup_3 (ao_ref *ref, tree
+       else
+ 	return (void *)-1;
+       tree len = gimple_call_arg (def_stmt, 2);
+-      HOST_WIDE_INT leni, offset2i, offseti;
++      HOST_WIDE_INT leni, offset2i;
++      /* Sometimes the above trickery is smarter than alias analysis.  Take
++	  advantage of that.  */
++      if (!ranges_maybe_overlap_p (offset, maxsize, offset2,
++				   (wi::to_poly_offset (len)
++				    << LOG2_BITS_PER_UNIT)))
++	return NULL;
+       if (data->partial_defs.is_empty ()
+ 	  && known_subrange_p (offset, maxsize, offset2,
+ 			       wi::to_poly_offset (len) << LOG2_BITS_PER_UNIT))
+@@ -2432,7 +2681,8 @@ vn_reference_lookup_3 (ao_ref *ref, tree
+ 	  if (integer_zerop (gimple_call_arg (def_stmt, 1)))
+ 	    val = build_zero_cst (vr->type);
+ 	  else if (INTEGRAL_TYPE_P (vr->type)
+-		   && known_eq (ref->size, 8))
++		   && known_eq (ref->size, 8)
++		   && offseti % BITS_PER_UNIT == 0)
+ 	    {
+ 	      gimple_match_op res_op (gimple_match_cond::UNCOND, NOP_EXPR,
+ 				      vr->type, gimple_call_arg (def_stmt, 1));
+@@ -2444,30 +2694,57 @@ vn_reference_lookup_3 (ao_ref *ref, tree
+ 	    }
+ 	  else
+ 	    {
+-	      unsigned len = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (vr->type));
+-	      unsigned char *buf = XALLOCAVEC (unsigned char, len);
++	      unsigned buflen = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (vr->type))
++						  + 1;
++	      if (INTEGRAL_TYPE_P (vr->type))
++		buflen = GET_MODE_SIZE (SCALAR_INT_TYPE_MODE (vr->type)) + 1;
++	      unsigned char *buf = XALLOCAVEC (unsigned char, buflen);
+ 	      memset (buf, TREE_INT_CST_LOW (gimple_call_arg (def_stmt, 1)),
+-		      len);
+-	      val = native_interpret_expr (vr->type, buf, len);
++		      buflen);
++	      if (BYTES_BIG_ENDIAN)
++		{
++		  unsigned int amnt
++		    = (((unsigned HOST_WIDE_INT) offseti + sizei)
++		       % BITS_PER_UNIT);
++		  if (amnt)
++		    {
++		      shift_bytes_in_array_right (buf, buflen,
++						  BITS_PER_UNIT - amnt);
++		      buf++;
++		      buflen--;
++		    }
++		}
++	      else if (offseti % BITS_PER_UNIT != 0)
++		{
++		  unsigned int amnt
++		    = BITS_PER_UNIT - ((unsigned HOST_WIDE_INT) offseti
++				       % BITS_PER_UNIT);
++		  shift_bytes_in_array_left (buf, buflen, amnt);
++		  buf++;
++		  buflen--;
++		}
++	      val = native_interpret_expr (vr->type, buf, buflen);
+ 	      if (!val)
+ 		return (void *)-1;
+ 	    }
+-	  return vn_reference_lookup_or_insert_for_pieces
+-	           (vuse, vr->set, vr->type, vr->operands, val);
++	  return data->finish (0, val);
+ 	}
+       /* For now handle clearing memory with partial defs.  */
+       else if (known_eq (ref->size, maxsize)
+ 	       && integer_zerop (gimple_call_arg (def_stmt, 1))
+ 	       && tree_to_poly_int64 (len).is_constant (&leni)
++	       && leni <= INTTYPE_MAXIMUM (HOST_WIDE_INT) / BITS_PER_UNIT
+ 	       && offset.is_constant (&offseti)
+ 	       && offset2.is_constant (&offset2i)
+-	       && maxsize.is_constant (&maxsizei))
++	       && maxsize.is_constant (&maxsizei)
++	       && ranges_known_overlap_p (offseti, maxsizei, offset2i,
++		       			  leni << LOG2_BITS_PER_UNIT))
+ 	{
+ 	  pd_data pd;
+ 	  pd.rhs = build_constructor (NULL_TREE, NULL);
+-	  pd.offset = (offset2i - offseti) / BITS_PER_UNIT;
+-	  pd.size = leni;
+-	  return data->push_partial_def (pd, vuse, maxsizei);
++	  pd.offset = offset2i - offseti;
++	  pd.size = leni << LOG2_BITS_PER_UNIT;
++	  return data->push_partial_def (pd, 0, maxsizei);
+ 	}
+     }
+ 
+@@ -2477,12 +2754,22 @@ vn_reference_lookup_3 (ao_ref *ref, tree
+ 	   && gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR
+ 	   && CONSTRUCTOR_NELTS (gimple_assign_rhs1 (def_stmt)) == 0)
+     {
++      tree lhs = gimple_assign_lhs (def_stmt);
+       tree base2;
+       poly_int64 offset2, size2, maxsize2;
+       HOST_WIDE_INT offset2i, size2i;
+       bool reverse;
+-      base2 = get_ref_base_and_extent (gimple_assign_lhs (def_stmt),
+-				       &offset2, &size2, &maxsize2, &reverse);
++      if (lhs_ref_ok)
++	{
++	  base2 = ao_ref_base (&lhs_ref);
++	  offset2 = lhs_ref.offset;
++	  size2 = lhs_ref.size;
++	  maxsize2 = lhs_ref.max_size;
++	  reverse = reverse_storage_order_for_component_p (lhs);
++	}
++      else
++	base2 = get_ref_base_and_extent (lhs,
++					 &offset2, &size2, &maxsize2, &reverse);
+       if (known_size_p (maxsize2)
+ 	  && known_eq (maxsize2, size2)
+ 	  && adjust_offsets_for_equal_base_address (base, &offset,
+@@ -2492,24 +2779,21 @@ vn_reference_lookup_3 (ao_ref *ref, tree
+ 	      && known_subrange_p (offset, maxsize, offset2, size2))
+ 	    {
+ 	      tree val = build_zero_cst (vr->type);
+-	      return vn_reference_lookup_or_insert_for_pieces
+-		  (vuse, vr->set, vr->type, vr->operands, val);
++	      return data->finish (get_alias_set (lhs), val);
+ 	    }
+ 	  else if (known_eq (ref->size, maxsize)
+ 		   && maxsize.is_constant (&maxsizei)
+-		   && maxsizei % BITS_PER_UNIT == 0
+ 		   && offset.is_constant (&offseti)
+-		   && offseti % BITS_PER_UNIT == 0
+ 		   && offset2.is_constant (&offset2i)
+-		   && offset2i % BITS_PER_UNIT == 0
+ 		   && size2.is_constant (&size2i)
+-		   && size2i % BITS_PER_UNIT == 0)
++		   && ranges_known_overlap_p (offseti, maxsizei,
++					      offset2i, size2i))
+ 	    {
+ 	      pd_data pd;
+ 	      pd.rhs = gimple_assign_rhs1 (def_stmt);
+-	      pd.offset = (offset2i - offseti) / BITS_PER_UNIT;
+-	      pd.size = size2i / BITS_PER_UNIT;
+-	      return data->push_partial_def (pd, vuse, maxsizei);
++	      pd.offset = offset2i - offseti;
++	      pd.size = size2i;
++	      return data->push_partial_def (pd, get_alias_set (lhs), maxsizei);
+ 	    }
+ 	}
+     }
+@@ -2520,28 +2804,36 @@ vn_reference_lookup_3 (ao_ref *ref, tree
+ 	   && is_gimple_reg_type (vr->type)
+ 	   && !contains_storage_order_barrier_p (vr->operands)
+ 	   && gimple_assign_single_p (def_stmt)
+-	   && CHAR_BIT == 8 && BITS_PER_UNIT == 8
++	   && CHAR_BIT == 8
++	   && BITS_PER_UNIT == 8
++	   && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN
+ 	   /* native_encode and native_decode operate on arrays of bytes
+ 	      and so fundamentally need a compile-time size and offset.  */
+ 	   && maxsize.is_constant (&maxsizei)
+-	   && maxsizei % BITS_PER_UNIT == 0
+ 	   && offset.is_constant (&offseti)
+-	   && offseti % BITS_PER_UNIT == 0
+ 	   && (is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt))
+ 	       || (TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME
+ 		   && is_gimple_min_invariant (SSA_VAL (gimple_assign_rhs1 (def_stmt))))))
+     {
++      tree lhs = gimple_assign_lhs (def_stmt);
+       tree base2;
+       poly_int64 offset2, size2, maxsize2;
+       HOST_WIDE_INT offset2i, size2i;
+       bool reverse;
+-      base2 = get_ref_base_and_extent (gimple_assign_lhs (def_stmt),
+-				       &offset2, &size2, &maxsize2, &reverse);
++      if (lhs_ref_ok)
++	{
++	  base2 = ao_ref_base (&lhs_ref);
++	  offset2 = lhs_ref.offset;
++	  size2 = lhs_ref.size;
++	  maxsize2 = lhs_ref.max_size;
++	  reverse = reverse_storage_order_for_component_p (lhs);
++	}
++      else
++	base2 = get_ref_base_and_extent (lhs,
++					 &offset2, &size2, &maxsize2, &reverse);
+       if (base2
+ 	  && !reverse
+ 	  && known_eq (maxsize2, size2)
+-	  && multiple_p (size2, BITS_PER_UNIT)
+-	  && multiple_p (offset2, BITS_PER_UNIT)
+ 	  && adjust_offsets_for_equal_base_address (base, &offset,
+ 						    base2, &offset2)
+ 	  && offset.is_constant (&offseti)
+@@ -2552,37 +2844,80 @@ vn_reference_lookup_3 (ao_ref *ref, tree
+ 	      && known_subrange_p (offseti, maxsizei, offset2, size2))
+ 	    {
+ 	      /* We support up to 512-bit values (for V8DFmode).  */
+-	      unsigned char buffer[64];
++	      unsigned char buffer[65];
+ 	      int len;
+ 
+ 	      tree rhs = gimple_assign_rhs1 (def_stmt);
+ 	      if (TREE_CODE (rhs) == SSA_NAME)
+ 		rhs = SSA_VAL (rhs);
+-	      unsigned pad = 0;
+-	      if (BYTES_BIG_ENDIAN
+-		  && is_a  (TYPE_MODE (TREE_TYPE (rhs))))
+-		{
+-		  /* On big-endian the padding is at the 'front' so
+-		     just skip the initial bytes.  */
+-		  fixed_size_mode mode
+-		    = as_a  (TYPE_MODE (TREE_TYPE (rhs)));
+-		  pad = GET_MODE_SIZE (mode) - size2i / BITS_PER_UNIT;
+-		}
+ 	      len = native_encode_expr (rhs,
+-					buffer, sizeof (buffer),
+-					((offseti - offset2i) / BITS_PER_UNIT
+-					 + pad));
++					buffer, sizeof (buffer) - 1,
++					(offseti - offset2i) / BITS_PER_UNIT);
+ 	      if (len > 0 && len * BITS_PER_UNIT >= maxsizei)
+ 		{
+ 		  tree type = vr->type;
++		  unsigned char *buf = buffer;
++		  unsigned int amnt = 0;
+ 		  /* Make sure to interpret in a type that has a range
+ 		     covering the whole access size.  */
+ 		  if (INTEGRAL_TYPE_P (vr->type)
+ 		      && maxsizei != TYPE_PRECISION (vr->type))
+ 		    type = build_nonstandard_integer_type (maxsizei,
+ 							   TYPE_UNSIGNED (type));
+-		  tree val = native_interpret_expr (type, buffer,
+-						    maxsizei / BITS_PER_UNIT);
++		  if (BYTES_BIG_ENDIAN)
++		    {
++		      /* For big-endian native_encode_expr stored the rhs
++			 such that the LSB of it is the LSB of buffer[len - 1].
++			 That bit is stored into memory at position
++			 offset2 + size2 - 1, i.e. in byte
++			 base + (offset2 + size2 - 1) / BITS_PER_UNIT.
++			 E.g. for offset2 1 and size2 14, rhs -1 and memory
++			 previously cleared that is:
++			 0	  1
++			 01111111|11111110
++			 Now, if we want to extract offset 2 and size 12 from
++			 it using native_interpret_expr (which actually works
++			 for integral bitfield types in terms of byte size of
++			 the mode), the native_encode_expr stored the value
++			 into buffer as
++			 XX111111|11111111
++			 and returned len 2 (the X bits are outside of
++			 precision).
++			 Let sz be maxsize / BITS_PER_UNIT if not extracting
++			 a bitfield, and GET_MODE_SIZE otherwise.
++			 We need to align the LSB of the value we want to
++			 extract as the LSB of buf[sz - 1].
++			 The LSB from memory we need to read is at position
++			 offset + maxsize - 1.  */
++		      HOST_WIDE_INT sz = maxsizei / BITS_PER_UNIT;
++		      if (INTEGRAL_TYPE_P (type))
++			sz = GET_MODE_SIZE (SCALAR_INT_TYPE_MODE (type));
++		      amnt = ((unsigned HOST_WIDE_INT) offset2i + size2i
++			      - offseti - maxsizei) % BITS_PER_UNIT;
++		      if (amnt)
++			shift_bytes_in_array_right (buffer, len, amnt);
++		      amnt = ((unsigned HOST_WIDE_INT) offset2i + size2i
++			      - offseti - maxsizei - amnt) / BITS_PER_UNIT;
++		      if ((unsigned HOST_WIDE_INT) sz + amnt > (unsigned) len)
++			len = 0;
++		      else
++			{
++			  buf = buffer + len - sz - amnt;
++			  len -= (buf - buffer);
++			}
++		    }
++		  else
++		    {
++		      amnt = ((unsigned HOST_WIDE_INT) offset2i
++			      - offseti) % BITS_PER_UNIT;
++		      if (amnt)
++			{
++			  buffer[len] = 0;
++			  shift_bytes_in_array_left (buffer, len + 1, amnt);
++			  buf = buffer + 1;
++			}
++		    }
++		  tree val = native_interpret_expr (type, buf, len);
+ 		  /* If we chop off bits because the types precision doesn't
+ 		     match the memory access size this is ok when optimizing
+ 		     reads but not when called from the DSE code during
+@@ -2597,73 +2932,95 @@ vn_reference_lookup_3 (ao_ref *ref, tree
+ 		    }
+ 
+ 		  if (val)
+-		    return vn_reference_lookup_or_insert_for_pieces
+-		      (vuse, vr->set, vr->type, vr->operands, val);
++		    return data->finish (get_alias_set (lhs), val);
+ 		}
+ 	    }
+-	  else if (ranges_known_overlap_p (offseti, maxsizei, offset2i, size2i))
++	  else if (ranges_known_overlap_p (offseti, maxsizei, offset2i,
++					   size2i))
+ 	    {
+ 	      pd_data pd;
+ 	      tree rhs = gimple_assign_rhs1 (def_stmt);
+ 	      if (TREE_CODE (rhs) == SSA_NAME)
+ 		rhs = SSA_VAL (rhs);
+ 	      pd.rhs = rhs;
+-	      pd.offset = (offset2i - offseti) / BITS_PER_UNIT;
+-	      pd.size = size2i / BITS_PER_UNIT;
+-	      return data->push_partial_def (pd, vuse, maxsizei);
++	      pd.offset = offset2i - offseti;
++	      pd.size = size2i;
++	      return data->push_partial_def (pd, get_alias_set (lhs), maxsizei);
+ 	    }
+ 	}
+     }
+ 
+   /* 4) Assignment from an SSA name which definition we may be able
+-     to access pieces from.  */
++     to access pieces from or we can combine to a larger entity.  */
+   else if (known_eq (ref->size, maxsize)
+ 	   && is_gimple_reg_type (vr->type)
+ 	   && !contains_storage_order_barrier_p (vr->operands)
+ 	   && gimple_assign_single_p (def_stmt)
+-	   && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME
+-	   /* A subset of partial defs from non-constants can be handled
+-	      by for example inserting a CONSTRUCTOR, a COMPLEX_EXPR or
+-	      even a (series of) BIT_INSERT_EXPR hoping for simplifications
+-	      downstream, not so much for actually doing the insertion.  */
+-	   && data->partial_defs.is_empty ())
++	   && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME)
+     {
++      tree lhs = gimple_assign_lhs (def_stmt);
+       tree base2;
+       poly_int64 offset2, size2, maxsize2;
++      HOST_WIDE_INT offset2i, size2i, offseti;
+       bool reverse;
+-      base2 = get_ref_base_and_extent (gimple_assign_lhs (def_stmt),
+-				       &offset2, &size2, &maxsize2,
+-				       &reverse);
++      if (lhs_ref_ok)
++	{
++	  base2 = ao_ref_base (&lhs_ref);
++	  offset2 = lhs_ref.offset;
++	  size2 = lhs_ref.size;
++	  maxsize2 = lhs_ref.max_size;
++	  reverse = reverse_storage_order_for_component_p (lhs);
++	}
++      else
++	base2 = get_ref_base_and_extent (lhs,
++					 &offset2, &size2, &maxsize2, &reverse);
+       tree def_rhs = gimple_assign_rhs1 (def_stmt);
+       if (!reverse
+ 	  && known_size_p (maxsize2)
+ 	  && known_eq (maxsize2, size2)
+ 	  && adjust_offsets_for_equal_base_address (base, &offset,
+-						    base2, &offset2)
+-	  && known_subrange_p (offset, maxsize, offset2, size2)
+-	  /* ???  We can't handle bitfield precision extracts without
+-	     either using an alternate type for the BIT_FIELD_REF and
+-	     then doing a conversion or possibly adjusting the offset
+-	     according to endianness.  */
+-	  && (! INTEGRAL_TYPE_P (vr->type)
+-	      || known_eq (ref->size, TYPE_PRECISION (vr->type)))
+-	  && multiple_p (ref->size, BITS_PER_UNIT)
+-	  && (! INTEGRAL_TYPE_P (TREE_TYPE (def_rhs))
+-	      || type_has_mode_precision_p (TREE_TYPE (def_rhs))))
+-	{
+-	  gimple_match_op op (gimple_match_cond::UNCOND,
+-			      BIT_FIELD_REF, vr->type,
+-			      vn_valueize (def_rhs),
+-			      bitsize_int (ref->size),
+-			      bitsize_int (offset - offset2));
+-	  tree val = vn_nary_build_or_lookup (&op);
+-	  if (val
+-	      && (TREE_CODE (val) != SSA_NAME
+-		  || ! SSA_NAME_OCCURS_IN_ABNORMAL_PHI (val)))
+-	    {
+-	      vn_reference_t res = vn_reference_lookup_or_insert_for_pieces
+-		  (vuse, vr->set, vr->type, vr->operands, val);
+-	      return res;
++						    base2, &offset2))
++	{
++	  if (data->partial_defs.is_empty ()
++	      && known_subrange_p (offset, maxsize, offset2, size2)
++	      /* ???  We can't handle bitfield precision extracts without
++		 either using an alternate type for the BIT_FIELD_REF and
++		 then doing a conversion or possibly adjusting the offset
++		 according to endianness.  */
++	      && (! INTEGRAL_TYPE_P (vr->type)
++		  || known_eq (ref->size, TYPE_PRECISION (vr->type)))
++	      && multiple_p (ref->size, BITS_PER_UNIT))
++	    {
++	      if (known_eq (ref->size, size2))
++		return vn_reference_lookup_or_insert_for_pieces
++		    (vuse, get_alias_set (lhs), vr->type, vr->operands,
++		     SSA_VAL (def_rhs));
++	      else if (! INTEGRAL_TYPE_P (TREE_TYPE (def_rhs))
++		       || type_has_mode_precision_p (TREE_TYPE (def_rhs)))
++		{
++		  gimple_match_op op (gimple_match_cond::UNCOND,
++				      BIT_FIELD_REF, vr->type,
++				      SSA_VAL (def_rhs),
++				      bitsize_int (ref->size),
++				      bitsize_int (offset - offset2));
++		  tree val = vn_nary_build_or_lookup (&op);
++		  if (val
++		      && (TREE_CODE (val) != SSA_NAME
++			  || ! SSA_NAME_OCCURS_IN_ABNORMAL_PHI (val)))
++		    return data->finish (get_alias_set (lhs), val);
++		}
++	    }
++	  else if (maxsize.is_constant (&maxsizei)
++		   && offset.is_constant (&offseti)
++		   && offset2.is_constant (&offset2i)
++		   && size2.is_constant (&size2i)
++		   && ranges_known_overlap_p (offset, maxsize, offset2, size2))
++	    {
++	      pd_data pd;
++	      pd.rhs = SSA_VAL (def_rhs);
++	      pd.offset = offset2i - offseti;
++	      pd.size = size2i;
++	      return data->push_partial_def (pd, get_alias_set (lhs), maxsizei);
+ 	    }
+ 	}
+     }
+@@ -2678,6 +3035,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree
+ 	   /* Handling this is more complicated, give up for now.  */
+ 	   && data->partial_defs.is_empty ())
+     {
++      tree lhs = gimple_assign_lhs (def_stmt);
+       tree base2;
+       int i, j, k;
+       auto_vec rhs;
+@@ -2747,7 +3105,8 @@ vn_reference_lookup_3 (ao_ref *ref, tree
+ 	}
+ 
+       /* Now re-write REF to be based on the rhs of the assignment.  */
+-      copy_reference_ops_from_ref (gimple_assign_rhs1 (def_stmt), &rhs);
++      tree rhs1 = gimple_assign_rhs1 (def_stmt);
++      copy_reference_ops_from_ref (rhs1, &rhs);
+ 
+       /* Apply an extra offset to the inner MEM_REF of the RHS.  */
+       if (maybe_ne (extra_off, 0))
+@@ -2764,6 +3123,11 @@ vn_reference_lookup_3 (ao_ref *ref, tree
+ 							extra_off));
+ 	}
+ 
++      /* Save the operands since we need to use the original ones for
++	 the hash entry we use.  */
++      if (!data->saved_operands.exists ())
++	data->saved_operands = vr->operands.copy ();
++
+       /* We need to pre-pend vr->operands[0..i] to rhs.  */
+       vec old = vr->operands;
+       if (i + 1 + rhs.length () > vr->operands.length ())
+@@ -2780,11 +3144,11 @@ vn_reference_lookup_3 (ao_ref *ref, tree
+       /* Try folding the new reference to a constant.  */
+       tree val = fully_constant_vn_reference_p (vr);
+       if (val)
+-	return vn_reference_lookup_or_insert_for_pieces
+-		 (vuse, vr->set, vr->type, vr->operands, val);
++	return data->finish (get_alias_set (lhs), val);
+ 
+       /* Adjust *ref from the new operands.  */
+-      if (!ao_ref_init_from_vn_reference (&r, vr->set, vr->type, vr->operands))
++      if (!ao_ref_init_from_vn_reference (&r, get_alias_set (rhs1),
++					  vr->type, vr->operands))
+ 	return (void *)-1;
+       /* This can happen with bitfields.  */
+       if (maybe_ne (ref->size, r.size))
+@@ -2793,6 +3157,12 @@ vn_reference_lookup_3 (ao_ref *ref, tree
+ 
+       /* Do not update last seen VUSE after translating.  */
+       data->last_vuse_ptr = NULL;
++      /* Invalidate the original access path since it now contains
++	  the wrong base.  */
++      data->orig_ref.ref = NULL_TREE;
++      /* Use the alias-set of this LHS for recording an eventual result.  */
++      if (data->first_set == -2)
++	data->first_set = get_alias_set (lhs);
+ 
+       /* Keep looking for the adjusted *REF / VR pair.  */
+       return NULL;
+@@ -2912,6 +3282,11 @@ vn_reference_lookup_3 (ao_ref *ref, tree
+       if (!known_subrange_p (at, byte_maxsize, lhs_offset, copy_size))
+ 	return (void *)-1;
+ 
++      /* Save the operands since we need to use the original ones for
++	 the hash entry we use.  */
++      if (!data->saved_operands.exists ())
++	data->saved_operands = vr->operands.copy ();
++
+       /* Make room for 2 operands in the new reference.  */
+       if (vr->operands.length () < 2)
+ 	{
+@@ -2940,11 +3315,10 @@ vn_reference_lookup_3 (ao_ref *ref, tree
+       /* Try folding the new reference to a constant.  */
+       tree val = fully_constant_vn_reference_p (vr);
+       if (val)
+-	return vn_reference_lookup_or_insert_for_pieces
+-		 (vuse, vr->set, vr->type, vr->operands, val);
++	return data->finish (0, val);
+ 
+       /* Adjust *ref from the new operands.  */
+-      if (!ao_ref_init_from_vn_reference (&r, vr->set, vr->type, vr->operands))
++      if (!ao_ref_init_from_vn_reference (&r, 0, vr->type, vr->operands))
+ 	return (void *)-1;
+       /* This can happen with bitfields.  */
+       if (maybe_ne (ref->size, r.size))
+@@ -2953,6 +3327,12 @@ vn_reference_lookup_3 (ao_ref *ref, tree
+ 
+       /* Do not update last seen VUSE after translating.  */
+       data->last_vuse_ptr = NULL;
++      /* Invalidate the original access path since it now contains
++	  the wrong base.  */
++      data->orig_ref.ref = NULL_TREE;
++      /* Use the alias-set of this stmt for recording an eventual result.  */
++      if (data->first_set == -2)
++	data->first_set = 0;
+ 
+       /* Keep looking for the adjusted *REF / VR pair.  */
+       return NULL;
+@@ -3013,13 +3393,13 @@ vn_reference_lookup_pieces (tree vuse, a
+     {
+       ao_ref r;
+       unsigned limit = PARAM_VALUE (PARAM_SCCVN_MAX_ALIAS_QUERIES_PER_ACCESS);
+-      vn_walk_cb_data data (&vr1, NULL, kind, true);
++      vn_walk_cb_data data (&vr1, NULL_TREE, NULL, kind, true, NULL_TREE);
+       if (ao_ref_init_from_vn_reference (&r, set, type, vr1.operands))
+-	*vnresult =
+-	  (vn_reference_t)walk_non_aliased_vuses (&r, vr1.vuse, true,
+-						  vn_reference_lookup_2,
+-						  vn_reference_lookup_3,
+-						  vuse_valueize, limit, &data);
++	*vnresult
++	  = ((vn_reference_t)
++	     walk_non_aliased_vuses (&r, vr1.vuse, true, vn_reference_lookup_2,
++				     vn_reference_lookup_3, vuse_valueize,
++				     limit, &data));
+       gcc_checking_assert (vr1.operands == shared_lookup_references);
+     }
+ 
+@@ -3035,15 +3415,19 @@ vn_reference_lookup_pieces (tree vuse, a
+    was NULL..  VNRESULT will be filled in with the vn_reference_t
+    stored in the hashtable if one exists.  When TBAA_P is false assume
+    we are looking up a store and treat it as having alias-set zero.
+-   *LAST_VUSE_PTR will be updated with the VUSE the value lookup succeeded.  */
++   *LAST_VUSE_PTR will be updated with the VUSE the value lookup succeeded.
++   MASK is either NULL_TREE, or can be an INTEGER_CST if the result of the
++   load is bitwise anded with MASK and so we are only interested in a subset
++   of the bits and can ignore if the other bits are uninitialized or
++   not initialized with constants.  */
+ 
+ tree
+ vn_reference_lookup (tree op, tree vuse, vn_lookup_kind kind,
+-		     vn_reference_t *vnresult, bool tbaa_p, tree *last_vuse_ptr)
++		     vn_reference_t *vnresult, bool tbaa_p,
++		     tree *last_vuse_ptr, tree mask)
+ {
+   vec operands;
+   struct vn_reference_s vr1;
+-  tree cst;
+   bool valuezied_anything;
+ 
+   if (vnresult)
+@@ -3055,11 +3439,11 @@ vn_reference_lookup (tree op, tree vuse,
+   vr1.type = TREE_TYPE (op);
+   vr1.set = get_alias_set (op);
+   vr1.hashcode = vn_reference_compute_hash (&vr1);
+-  if ((cst = fully_constant_vn_reference_p (&vr1)))
+-    return cst;
++  if (mask == NULL_TREE)
++    if (tree cst = fully_constant_vn_reference_p (&vr1))
++      return cst;
+ 
+-  if (kind != VN_NOWALK
+-      && vr1.vuse)
++  if (kind != VN_NOWALK && vr1.vuse)
+     {
+       vn_reference_t wvnresult;
+       ao_ref r;
+@@ -3070,23 +3454,32 @@ vn_reference_lookup (tree op, tree vuse,
+ 	  || !ao_ref_init_from_vn_reference (&r, vr1.set, vr1.type,
+ 					     vr1.operands))
+ 	ao_ref_init (&r, op);
+-      vn_walk_cb_data data (&vr1, last_vuse_ptr, kind, tbaa_p);
+-      wvnresult =
+-	(vn_reference_t)walk_non_aliased_vuses (&r, vr1.vuse, tbaa_p,
+-						vn_reference_lookup_2,
+-						vn_reference_lookup_3,
+-						vuse_valueize, limit, &data);
++      vn_walk_cb_data data (&vr1, r.ref ? NULL_TREE : op,
++			    last_vuse_ptr, kind, tbaa_p, mask);
++
++      wvnresult
++       = ((vn_reference_t)
++	   walk_non_aliased_vuses (&r, vr1.vuse, tbaa_p, vn_reference_lookup_2,
++				   vn_reference_lookup_3, vuse_valueize, limit,
++				   &data));
+       gcc_checking_assert (vr1.operands == shared_lookup_references);
+       if (wvnresult)
+ 	{
++	  gcc_assert (mask == NULL_TREE);
+ 	  if (vnresult)
+ 	    *vnresult = wvnresult;
+ 	  return wvnresult->result;
+ 	}
++      else if (mask)
++	return data.masked_result;
+ 
+       return NULL_TREE;
+     }
+ 
++  if (last_vuse_ptr)
++    *last_vuse_ptr = vr1.vuse;
++  if (mask)
++    return NULL_TREE;
+   return vn_reference_lookup_1 (&vr1, vnresult);
+ }
+ 
+@@ -4333,7 +4726,39 @@ visit_nary_op (tree lhs, gassign *stmt)
+ 		}
+ 	    }
+ 	}
+-    default:;
++      break;
++    case BIT_AND_EXPR:
++      if (INTEGRAL_TYPE_P (type)
++	  && TREE_CODE (rhs1) == SSA_NAME
++	  && TREE_CODE (gimple_assign_rhs2 (stmt)) == INTEGER_CST
++	  && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1)
++	  && default_vn_walk_kind != VN_NOWALK
++	  && CHAR_BIT == 8
++	  && BITS_PER_UNIT == 8
++	  && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN
++	  && !integer_all_onesp (gimple_assign_rhs2 (stmt))
++	  && !integer_zerop (gimple_assign_rhs2 (stmt)))
++	{
++	  gassign *ass = dyn_cast  (SSA_NAME_DEF_STMT (rhs1));
++	  if (ass
++	      && !gimple_has_volatile_ops (ass)
++	      && vn_get_stmt_kind (ass) == VN_REFERENCE)
++	    {
++	      tree last_vuse = gimple_vuse (ass);
++	      tree op = gimple_assign_rhs1 (ass);
++	      tree result = vn_reference_lookup (op, gimple_vuse (ass),
++						 default_vn_walk_kind,
++						 NULL, true, &last_vuse,
++						 gimple_assign_rhs2 (stmt));
++	      if (result
++		  && useless_type_conversion_p (TREE_TYPE (result),
++						TREE_TYPE (op)))
++		return set_ssa_val_to (lhs, result);
++	    }
++	}
++      break;
++    default:
++      break;
+     }
+ 
+   bool changed = set_ssa_val_to (lhs, lhs);
+@@ -4844,14 +5269,14 @@ visit_stmt (gimple *stmt, bool backedges
+ 	      switch (vn_get_stmt_kind (ass))
+ 		{
+ 		case VN_NARY:
+-		changed = visit_nary_op (lhs, ass);
+-		break;
++		  changed = visit_nary_op (lhs, ass);
++		  break;
+ 		case VN_REFERENCE:
+-		changed = visit_reference_op_load (lhs, rhs1, ass);
+-		break;
++		  changed = visit_reference_op_load (lhs, rhs1, ass);
++		  break;
+ 		default:
+-		changed = defs_to_varying (ass);
+-		break;
++		  changed = defs_to_varying (ass);
++		  break;
+ 		}
+ 	    }
+ 	}
+@@ -5525,8 +5950,48 @@ eliminate_dom_walker::eliminate_stmt (ba
+       tree val;
+       tree rhs = gimple_assign_rhs1 (stmt);
+       vn_reference_t vnresult;
+-      val = vn_reference_lookup (lhs, gimple_vuse (stmt), VN_WALKREWRITE,
+-				 &vnresult, false);
++      /* ???  gcc.dg/torture/pr91445.c shows that we lookup a boolean
++	 typed load of a byte known to be 0x11 as 1 so a store of
++	 a boolean 1 is detected as redundant.  Because of this we
++	 have to make sure to lookup with a ref where its size
++	 matches the precision.  */
++      tree lookup_lhs = lhs;
++      if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
++	  && (TREE_CODE (lhs) != COMPONENT_REF
++	      || !DECL_BIT_FIELD_TYPE (TREE_OPERAND (lhs, 1)))
++	  && !type_has_mode_precision_p (TREE_TYPE (lhs)))
++	{
++	  if (TREE_CODE (lhs) == COMPONENT_REF
++	      || TREE_CODE (lhs) == MEM_REF)
++	    {
++	      tree ltype = build_nonstandard_integer_type
++				(TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (lhs))),
++				 TYPE_UNSIGNED (TREE_TYPE (lhs)));
++	      if (TREE_CODE (lhs) == COMPONENT_REF)
++		{
++		  tree foff = component_ref_field_offset (lhs);
++		  tree f = TREE_OPERAND (lhs, 1);
++		  if (!poly_int_tree_p (foff))
++		    lookup_lhs = NULL_TREE;
++		  else
++		    lookup_lhs = build3 (BIT_FIELD_REF, ltype,
++					 TREE_OPERAND (lhs, 0),
++					 TYPE_SIZE (TREE_TYPE (lhs)),
++					 bit_from_pos
++					   (foff, DECL_FIELD_BIT_OFFSET (f)));
++		}
++	      else
++		lookup_lhs = build2 (MEM_REF, ltype,
++				     TREE_OPERAND (lhs, 0),
++				     TREE_OPERAND (lhs, 1));
++	    }
++	  else
++	    lookup_lhs = NULL_TREE;
++	}
++      val = NULL_TREE;
++      if (lookup_lhs)
++	val = vn_reference_lookup (lookup_lhs, gimple_vuse (stmt),
++				   VN_WALKREWRITE, &vnresult, false);
+       if (TREE_CODE (rhs) == SSA_NAME)
+ 	rhs = VN_INFO (rhs)->valnum;
+       if (val
+diff -urpN a/gcc/tree-ssa-sccvn.h b/gcc/tree-ssa-sccvn.h
+--- a/gcc/tree-ssa-sccvn.h	2020-11-26 22:26:32.856000000 -0500
++++ b/gcc/tree-ssa-sccvn.h	2020-11-26 22:06:08.040000000 -0500
+@@ -235,7 +235,7 @@ tree vn_reference_lookup_pieces (tree, a
+ 				 vec ,
+ 				 vn_reference_t *, vn_lookup_kind);
+ tree vn_reference_lookup (tree, tree, vn_lookup_kind, vn_reference_t *, bool,
+-			  tree * = NULL);
++			  tree * = NULL, tree = NULL_TREE);
+ void vn_reference_lookup_call (gcall *, vn_reference_t *, vn_reference_t);
+ vn_reference_t vn_reference_insert_pieces (tree, alias_set_type, tree,
+ 					   vec ,
diff --git a/simplify-removing-subregs.patch b/simplify-removing-subregs.patch
index fd6cbc6..cfd5804 100644
--- a/simplify-removing-subregs.patch
+++ b/simplify-removing-subregs.patch
@@ -1,3 +1,9 @@
+This backport contains 1 patch from gcc main stream tree.
+The commit id of these patchs list as following in the order of time.
+
+0001-expand-Simplify-removing-subregs-when-expanding-a-co.patch
+9a182ef9ee011935d827ab5c6c9a7cd8e22257d8
+
 diff -Nurp a/gcc/expr.c b/gcc/expr.c
 --- a/gcc/expr.c	2020-08-05 20:33:04.068000000 +0800
 +++ b/gcc/expr.c	2020-08-05 20:33:21.420000000 +0800
diff --git a/speed-up-DDG-analysis-and-fix-bootstrap-compare-debug.patch b/speed-up-DDG-analysis-and-fix-bootstrap-compare-debug.patch
new file mode 100644
index 0000000..da7b905
--- /dev/null
+++ b/speed-up-DDG-analysis-and-fix-bootstrap-compare-debug.patch
@@ -0,0 +1,718 @@
+This backport contains 2 patchs from gcc main stream tree.
+The commit id of these patchs list as following in the order of time.
+
+728c2e5eeaa91cf708f2b1b1f996653a7eebae59
+0001-modulo-sched-speed-up-DDG-analysis-PR90001.patch
+
+06d5d63d9944691bb4286e5f6b2422cc97148336
+0001-modulo-sched-fix-bootstrap-compare-debug-issue.patch
+
+diff -Nurp a/gcc/ddg.c b/gcc/ddg.c
+--- a/gcc/ddg.c	2020-11-28 18:40:12.371633230 +0800
++++ b/gcc/ddg.c	2020-11-28 18:38:33.835633230 +0800
+@@ -32,9 +32,6 @@ along with GCC; see the file COPYING3.
+ 
+ #ifdef INSN_SCHEDULING
+ 
+-/* A flag indicating that a ddg edge belongs to an SCC or not.  */
+-enum edge_flag {NOT_IN_SCC = 0, IN_SCC};
+-
+ /* Forward declarations.  */
+ static void add_backarc_to_ddg (ddg_ptr, ddg_edge_ptr);
+ static void add_backarc_to_scc (ddg_scc_ptr, ddg_edge_ptr);
+@@ -188,9 +185,6 @@ create_ddg_dep_from_intra_loop_link (ddg
+   else if (DEP_TYPE (link) == REG_DEP_OUTPUT)
+     t = OUTPUT_DEP;
+ 
+-  gcc_assert (!DEBUG_INSN_P (dest_node->insn) || t == ANTI_DEP);
+-  gcc_assert (!DEBUG_INSN_P (src_node->insn) || t == ANTI_DEP);
+-
+   /* We currently choose not to create certain anti-deps edges and
+      compensate for that by generating reg-moves based on the life-range
+      analysis.  The anti-deps that will be deleted are the ones which
+@@ -225,9 +219,9 @@ create_ddg_dep_from_intra_loop_link (ddg
+         }
+     }
+ 
+-   latency = dep_cost (link);
+-   e = create_ddg_edge (src_node, dest_node, t, dt, latency, distance);
+-   add_edge_to_ddg (g, e);
++  latency = dep_cost (link);
++  e = create_ddg_edge (src_node, dest_node, t, dt, latency, distance);
++  add_edge_to_ddg (g, e);
+ }
+ 
+ /* The same as the above function, but it doesn't require a link parameter.  */
+@@ -240,9 +234,6 @@ create_ddg_dep_no_link (ddg_ptr g, ddg_n
+   enum reg_note dep_kind;
+   struct _dep _dep, *dep = &_dep;
+ 
+-  gcc_assert (!DEBUG_INSN_P (to->insn) || d_t == ANTI_DEP);
+-  gcc_assert (!DEBUG_INSN_P (from->insn) || d_t == ANTI_DEP);
+-
+   if (d_t == ANTI_DEP)
+     dep_kind = REG_DEP_ANTI;
+   else if (d_t == OUTPUT_DEP)
+@@ -275,16 +266,15 @@ create_ddg_dep_no_link (ddg_ptr g, ddg_n
+ static void
+ add_cross_iteration_register_deps (ddg_ptr g, df_ref last_def)
+ {
+-  int regno = DF_REF_REGNO (last_def);
+   struct df_link *r_use;
+   int has_use_in_bb_p = false;
+-  rtx_insn *def_insn = DF_REF_INSN (last_def);
+-  ddg_node_ptr last_def_node = get_node_of_insn (g, def_insn);
+-  ddg_node_ptr use_node;
++  int regno = DF_REF_REGNO (last_def);
++  ddg_node_ptr last_def_node = get_node_of_insn (g, DF_REF_INSN (last_def));
+   df_ref first_def = df_bb_regno_first_def_find (g->bb, regno);
++  ddg_node_ptr first_def_node = get_node_of_insn (g, DF_REF_INSN (first_def));
++  ddg_node_ptr use_node;
+ 
+-  gcc_assert (last_def_node);
+-  gcc_assert (first_def);
++  gcc_assert (last_def_node && first_def && first_def_node);
+ 
+   if (flag_checking && DF_REF_ID (last_def) != DF_REF_ID (first_def))
+     {
+@@ -303,6 +293,9 @@ add_cross_iteration_register_deps (ddg_p
+ 
+       rtx_insn *use_insn = DF_REF_INSN (r_use->ref);
+ 
++      if (DEBUG_INSN_P (use_insn))
++	continue;
++
+       /* ??? Do not handle uses with DF_REF_IN_NOTE notes.  */
+       use_node = get_node_of_insn (g, use_insn);
+       gcc_assert (use_node);
+@@ -313,35 +306,28 @@ add_cross_iteration_register_deps (ddg_p
+ 	     iteration.  Any such upwards exposed use appears before
+ 	     the last_def def.  */
+ 	  create_ddg_dep_no_link (g, last_def_node, use_node,
+-				  DEBUG_INSN_P (use_insn) ? ANTI_DEP : TRUE_DEP,
+-				  REG_DEP, 1);
++				  TRUE_DEP, REG_DEP, 1);
+ 	}
+-      else if (!DEBUG_INSN_P (use_insn))
++      else
+ 	{
+ 	  /* Add anti deps from last_def's uses in the current iteration
+ 	     to the first def in the next iteration.  We do not add ANTI
+ 	     dep when there is an intra-loop TRUE dep in the opposite
+ 	     direction, but use regmoves to fix such disregarded ANTI
+ 	     deps when broken.	If the first_def reaches the USE then
+-	     there is such a dep.  */
+-	  ddg_node_ptr first_def_node = get_node_of_insn (g,
+-							  DF_REF_INSN (first_def));
+-
+-	  gcc_assert (first_def_node);
+-
+-         /* Always create the edge if the use node is a branch in
+-            order to prevent the creation of reg-moves.  
+-            If the address that is being auto-inc or auto-dec in LAST_DEF
+-            is used in USE_INSN then do not remove the edge to make sure
+-            reg-moves will not be created for that address.  */
+-          if (DF_REF_ID (last_def) != DF_REF_ID (first_def)
+-              || !flag_modulo_sched_allow_regmoves
++	     there is such a dep.
++	     Always create the edge if the use node is a branch in
++	     order to prevent the creation of reg-moves.
++	     If the address that is being auto-inc or auto-dec in LAST_DEF
++	     is used in USE_INSN then do not remove the edge to make sure
++	     reg-moves will not be created for that address.  */
++	  if (DF_REF_ID (last_def) != DF_REF_ID (first_def)
++	      || !flag_modulo_sched_allow_regmoves
+ 	      || JUMP_P (use_node->insn)
+-              || autoinc_var_is_used_p (DF_REF_INSN (last_def), use_insn)
++	      || autoinc_var_is_used_p (DF_REF_INSN (last_def), use_insn)
+ 	      || def_has_ccmode_p (DF_REF_INSN (last_def)))
+-            create_ddg_dep_no_link (g, use_node, first_def_node, ANTI_DEP,
+-                                    REG_DEP, 1);
+-
++	    create_ddg_dep_no_link (g, use_node, first_def_node, ANTI_DEP,
++				    REG_DEP, 1);
+ 	}
+     }
+   /* Create an inter-loop output dependence between LAST_DEF (which is the
+@@ -351,19 +337,11 @@ add_cross_iteration_register_deps (ddg_p
+      defs starting with a true dependence to a use which can be in the
+      next iteration; followed by an anti dependence of that use to the
+      first def (i.e. if there is a use between the two defs.)  */
+-  if (!has_use_in_bb_p)
+-    {
+-      ddg_node_ptr dest_node;
+-
+-      if (DF_REF_ID (last_def) == DF_REF_ID (first_def))
+-	return;
+-
+-      dest_node = get_node_of_insn (g, DF_REF_INSN (first_def));
+-      gcc_assert (dest_node);
+-      create_ddg_dep_no_link (g, last_def_node, dest_node,
+-			      OUTPUT_DEP, REG_DEP, 1);
+-    }
++  if (!has_use_in_bb_p && DF_REF_ID (last_def) != DF_REF_ID (first_def))
++    create_ddg_dep_no_link (g, last_def_node, first_def_node,
++			    OUTPUT_DEP, REG_DEP, 1);
+ }
++
+ /* Build inter-loop dependencies, by looking at DF analysis backwards.  */
+ static void
+ build_inter_loop_deps (ddg_ptr g)
+@@ -420,13 +398,9 @@ add_intra_loop_mem_dep (ddg_ptr g, ddg_n
+   if (mem_write_insn_p (from->insn))
+     {
+       if (mem_read_insn_p (to->insn))
+-	create_ddg_dep_no_link (g, from, to,
+-				DEBUG_INSN_P (to->insn)
+-				? ANTI_DEP : TRUE_DEP, MEM_DEP, 0);
++	create_ddg_dep_no_link (g, from, to, TRUE_DEP, MEM_DEP, 0);
+       else
+-	create_ddg_dep_no_link (g, from, to,
+-				DEBUG_INSN_P (to->insn)
+-				? ANTI_DEP : OUTPUT_DEP, MEM_DEP, 0);
++	create_ddg_dep_no_link (g, from, to, OUTPUT_DEP, MEM_DEP, 0);
+     }
+   else if (!mem_read_insn_p (to->insn))
+     create_ddg_dep_no_link (g, from, to, ANTI_DEP, MEM_DEP, 0);
+@@ -444,13 +418,9 @@ add_inter_loop_mem_dep (ddg_ptr g, ddg_n
+   if (mem_write_insn_p (from->insn))
+     {
+       if (mem_read_insn_p (to->insn))
+-  	create_ddg_dep_no_link (g, from, to,
+-				DEBUG_INSN_P (to->insn)
+-				? ANTI_DEP : TRUE_DEP, MEM_DEP, 1);
++	create_ddg_dep_no_link (g, from, to, TRUE_DEP, MEM_DEP, 1);
+       else if (from->cuid != to->cuid)
+-  	create_ddg_dep_no_link (g, from, to,
+-				DEBUG_INSN_P (to->insn)
+-				? ANTI_DEP : OUTPUT_DEP, MEM_DEP, 1);
++	create_ddg_dep_no_link (g, from, to, OUTPUT_DEP, MEM_DEP, 1);
+     }
+   else
+     {
+@@ -459,13 +429,9 @@ add_inter_loop_mem_dep (ddg_ptr g, ddg_n
+       else if (from->cuid != to->cuid)
+ 	{
+ 	  create_ddg_dep_no_link (g, from, to, ANTI_DEP, MEM_DEP, 1);
+-	  if (DEBUG_INSN_P (from->insn) || DEBUG_INSN_P (to->insn))
+-	    create_ddg_dep_no_link (g, to, from, ANTI_DEP, MEM_DEP, 1);
+-	  else
+-	    create_ddg_dep_no_link (g, to, from, TRUE_DEP, MEM_DEP, 1);
++	  create_ddg_dep_no_link (g, to, from, TRUE_DEP, MEM_DEP, 1);
+ 	}
+     }
+-
+ }
+ 
+ /* Perform intra-block Data Dependency analysis and connect the nodes in
+@@ -494,20 +460,10 @@ build_intra_loop_deps (ddg_ptr g)
+       sd_iterator_def sd_it;
+       dep_t dep;
+ 
+-      if (! INSN_P (dest_node->insn))
+-	continue;
+-
+       FOR_EACH_DEP (dest_node->insn, SD_LIST_BACK, sd_it, dep)
+ 	{
+ 	  rtx_insn *src_insn = DEP_PRO (dep);
+-	  ddg_node_ptr src_node;
+-
+-	  /* Don't add dependencies on debug insns to non-debug insns
+-	     to avoid codegen differences between -g and -g0.  */
+-	  if (DEBUG_INSN_P (src_insn) && !DEBUG_INSN_P (dest_node->insn))
+-	    continue;
+-
+-	  src_node = get_node_of_insn (g, src_insn);
++	  ddg_node_ptr src_node = get_node_of_insn (g, src_insn);
+ 
+ 	  if (!src_node)
+ 	    continue;
+@@ -524,8 +480,7 @@ build_intra_loop_deps (ddg_ptr g)
+ 	  for (j = 0; j <= i; j++)
+ 	    {
+ 	      ddg_node_ptr j_node = &g->nodes[j];
+-	      if (DEBUG_INSN_P (j_node->insn))
+-		continue;
++
+ 	      if (mem_access_insn_p (j_node->insn))
+ 		{
+ 		  /* Don't bother calculating inter-loop dep if an intra-loop dep
+@@ -564,7 +519,7 @@ create_ddg (basic_block bb, int closing_
+ {
+   ddg_ptr g;
+   rtx_insn *insn, *first_note;
+-  int i;
++  int i, j;
+   int num_nodes = 0;
+ 
+   g = (ddg_ptr) xcalloc (1, sizeof (struct ddg));
+@@ -576,23 +531,21 @@ create_ddg (basic_block bb, int closing_
+   for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb));
+        insn = NEXT_INSN (insn))
+     {
+-      if (! INSN_P (insn) || GET_CODE (PATTERN (insn)) == USE)
++      if (!INSN_P (insn) || GET_CODE (PATTERN (insn)) == USE)
+ 	continue;
+ 
+-      if (DEBUG_INSN_P (insn))
+-	g->num_debug++;
+-      else
++      if (NONDEBUG_INSN_P (insn))
+ 	{
+ 	  if (mem_read_insn_p (insn))
+ 	    g->num_loads++;
+ 	  if (mem_write_insn_p (insn))
+ 	    g->num_stores++;
++	  num_nodes++;
+ 	}
+-      num_nodes++;
+     }
+ 
+   /* There is nothing to do for this BB.  */
+-  if ((num_nodes - g->num_debug) <= 1)
++  if (num_nodes <= 1)
+     {
+       free (g);
+       return NULL;
+@@ -607,32 +560,39 @@ create_ddg (basic_block bb, int closing_
+   for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb));
+        insn = NEXT_INSN (insn))
+     {
+-      if (! INSN_P (insn))
+-	{
+-	  if (! first_note && NOTE_P (insn)
+-	      && NOTE_KIND (insn) !=  NOTE_INSN_BASIC_BLOCK)
+-	    first_note = insn;
+-	  continue;
+-	}
++      if (LABEL_P (insn) || NOTE_INSN_BASIC_BLOCK_P (insn))
++	continue;
++
++      if (!first_note && (INSN_P (insn) || NOTE_P (insn)))
++	first_note = insn;
++
++      if (!INSN_P (insn) || GET_CODE (PATTERN (insn)) == USE)
++	continue;
++
+       if (JUMP_P (insn))
+ 	{
+ 	  gcc_assert (!g->closing_branch);
+ 	  g->closing_branch = &g->nodes[i];
+ 	}
+-      else if (GET_CODE (PATTERN (insn)) == USE)
++
++      if (NONDEBUG_INSN_P (insn))
+ 	{
+-	  if (! first_note)
+-	    first_note = insn;
+-	  continue;
+-	}
++	  g->nodes[i].cuid = i;
++	  g->nodes[i].successors = sbitmap_alloc (num_nodes);
++	  bitmap_clear (g->nodes[i].successors);
++	  g->nodes[i].predecessors = sbitmap_alloc (num_nodes);
++	  bitmap_clear (g->nodes[i].predecessors);
++
++	  gcc_checking_assert (first_note);
++	  g->nodes[i].first_note = first_note;
++
++	  g->nodes[i].aux.count = -1;
++	  g->nodes[i].max_dist = XCNEWVEC (int, num_nodes);
++	  for (j = 0; j < num_nodes; j++)
++	    g->nodes[i].max_dist[j] = -1;
+ 
+-      g->nodes[i].cuid = i;
+-      g->nodes[i].successors = sbitmap_alloc (num_nodes);
+-      bitmap_clear (g->nodes[i].successors);
+-      g->nodes[i].predecessors = sbitmap_alloc (num_nodes);
+-      bitmap_clear (g->nodes[i].predecessors);
+-      g->nodes[i].first_note = (first_note ? first_note : insn);
+-      g->nodes[i++].insn = insn;
++	  g->nodes[i++].insn = insn;
++	}
+       first_note = NULL;
+     }
+ 
+@@ -668,6 +628,7 @@ free_ddg (ddg_ptr g)
+ 	}
+       sbitmap_free (g->nodes[i].successors);
+       sbitmap_free (g->nodes[i].predecessors);
++      free (g->nodes[i].max_dist);
+     }
+   if (g->num_backarcs > 0)
+     free (g->backarcs);
+@@ -792,7 +753,7 @@ create_ddg_edge (ddg_node_ptr src, ddg_n
+   e->latency = l;
+   e->distance = d;
+   e->next_in = e->next_out = NULL;
+-  e->aux.info = 0;
++  e->in_scc = false;
+   return e;
+ }
+ 
+@@ -820,7 +781,7 @@ add_edge_to_ddg (ddg_ptr g ATTRIBUTE_UNU
+    for now that cycles in the data dependence graph contain a single backarc.
+    This simplifies the algorithm, and can be generalized later.  */
+ static void
+-set_recurrence_length (ddg_scc_ptr scc, ddg_ptr g)
++set_recurrence_length (ddg_scc_ptr scc)
+ {
+   int j;
+   int result = -1;
+@@ -828,17 +789,14 @@ set_recurrence_length (ddg_scc_ptr scc,
+   for (j = 0; j < scc->num_backarcs; j++)
+     {
+       ddg_edge_ptr backarc = scc->backarcs[j];
+-      int length;
+       int distance = backarc->distance;
+       ddg_node_ptr src = backarc->dest;
+       ddg_node_ptr dest = backarc->src;
++      int length = src->max_dist[dest->cuid];
++
++      if (length < 0)
++        continue;
+ 
+-      length = longest_simple_path (g, src->cuid, dest->cuid, scc->nodes);
+-      if (length < 0 )
+-	{
+-	  /* fprintf (stderr, "Backarc not on simple cycle in SCC.\n"); */
+-	  continue;
+-	}
+       length += backarc->latency;
+       result = MAX (result, (length / distance));
+     }
+@@ -846,9 +804,9 @@ set_recurrence_length (ddg_scc_ptr scc,
+ }
+ 
+ /* Create a new SCC given the set of its nodes.  Compute its recurrence_length
+-   and mark edges that belong to this scc as IN_SCC.  */
++   and mark edges that belong to this scc.  */
+ static ddg_scc_ptr
+-create_scc (ddg_ptr g, sbitmap nodes)
++create_scc (ddg_ptr g, sbitmap nodes, int id)
+ {
+   ddg_scc_ptr scc;
+   unsigned int u = 0;
+@@ -866,16 +824,18 @@ create_scc (ddg_ptr g, sbitmap nodes)
+       ddg_edge_ptr e;
+       ddg_node_ptr n = &g->nodes[u];
+ 
++      gcc_assert (n->aux.count == -1);
++      n->aux.count = id;
++
+       for (e = n->out; e; e = e->next_out)
+ 	if (bitmap_bit_p (nodes, e->dest->cuid))
+ 	  {
+-	    e->aux.count = IN_SCC;
++	    e->in_scc = true;
+ 	    if (e->distance > 0)
+ 	      add_backarc_to_scc (scc, e);
+ 	  }
+     }
+ 
+-  set_recurrence_length (scc, g);
+   return scc;
+ }
+ 
+@@ -1018,7 +978,7 @@ check_sccs (ddg_all_sccs_ptr sccs, int n
+ ddg_all_sccs_ptr
+ create_ddg_all_sccs (ddg_ptr g)
+ {
+-  int i;
++  int i, j, k, scc, way;
+   int num_nodes = g->num_nodes;
+   auto_sbitmap from (num_nodes);
+   auto_sbitmap to (num_nodes);
+@@ -1038,7 +998,7 @@ create_ddg_all_sccs (ddg_ptr g)
+       ddg_node_ptr dest = backarc->dest;
+ 
+       /* If the backarc already belongs to an SCC, continue.  */
+-      if (backarc->aux.count == IN_SCC)
++      if (backarc->in_scc)
+ 	continue;
+ 
+       bitmap_clear (scc_nodes);
+@@ -1049,10 +1009,52 @@ create_ddg_all_sccs (ddg_ptr g)
+ 
+       if (find_nodes_on_paths (scc_nodes, g, from, to))
+ 	{
+-	  scc = create_scc (g, scc_nodes);
++	  scc = create_scc (g, scc_nodes, sccs->num_sccs);
+ 	  add_scc_to_ddg (sccs, scc);
+ 	}
+     }
++
++  /* Init max_dist arrays for Floyd–Warshall-like
++     longest patch calculation algorithm.  */
++  for (k = 0; k < num_nodes; k++)
++    {
++      ddg_edge_ptr e;
++      ddg_node_ptr n = &g->nodes[k];
++
++      if (n->aux.count == -1)
++        continue;
++
++      n->max_dist[k] = 0;
++      for (e = n->out; e; e = e->next_out)
++        if (e->distance == 0 && g->nodes[e->dest->cuid].aux.count == n->aux.count)
++          n->max_dist[e->dest->cuid] = e->latency;
++    }
++
++  /* Run main Floid-Warshall loop.  We use only non-backarc edges
++     inside each scc.  */
++  for (k = 0; k < num_nodes; k++)
++    {
++      scc = g->nodes[k].aux.count;
++      if (scc != -1)
++        {
++          for (i = 0; i < num_nodes; i++)
++            if (g->nodes[i].aux.count == scc)
++              for (j = 0; j < num_nodes; j++)
++                if (g->nodes[j].aux.count == scc
++                    && g->nodes[i].max_dist[k] >= 0
++                    && g->nodes[k].max_dist[j] >= 0)
++                  {
++                    way = g->nodes[i].max_dist[k] + g->nodes[k].max_dist[j];
++                    if (g->nodes[i].max_dist[j] < way)
++                      g->nodes[i].max_dist[j] = way;
++                  }
++        }
++    }
++
++  /* Calculate recurrence_length using max_dist info.  */
++  for (i = 0; i < sccs->num_sccs; i++)
++    set_recurrence_length (sccs->sccs[i]);
++
+   order_sccs (sccs);
+ 
+   if (flag_checking)
+@@ -1155,72 +1157,4 @@ find_nodes_on_paths (sbitmap result, ddg
+   return bitmap_and (result, reachable_from, reach_to);
+ }
+ 
+-
+-/* Updates the counts of U_NODE's successors (that belong to NODES) to be
+-   at-least as large as the count of U_NODE plus the latency between them.
+-   Sets a bit in TMP for each successor whose count was changed (increased).
+-   Returns nonzero if any count was changed.  */
+-static int
+-update_dist_to_successors (ddg_node_ptr u_node, sbitmap nodes, sbitmap tmp)
+-{
+-  ddg_edge_ptr e;
+-  int result = 0;
+-
+-  for (e = u_node->out; e; e = e->next_out)
+-    {
+-      ddg_node_ptr v_node = e->dest;
+-      int v = v_node->cuid;
+-
+-      if (bitmap_bit_p (nodes, v)
+-	  && (e->distance == 0)
+-	  && (v_node->aux.count < u_node->aux.count + e->latency))
+-	{
+-	  v_node->aux.count = u_node->aux.count + e->latency;
+-	  bitmap_set_bit (tmp, v);
+-	  result = 1;
+-	}
+-    }
+-  return result;
+-}
+-
+-
+-/* Find the length of a longest path from SRC to DEST in G,
+-   going only through NODES, and disregarding backarcs.  */
+-int
+-longest_simple_path (struct ddg * g, int src, int dest, sbitmap nodes)
+-{
+-  int i;
+-  unsigned int u = 0;
+-  int change = 1;
+-  int num_nodes = g->num_nodes;
+-  auto_sbitmap workset (num_nodes);
+-  auto_sbitmap tmp (num_nodes);
+-
+-
+-  /* Data will hold the distance of the longest path found so far from
+-     src to each node.  Initialize to -1 = less than minimum.  */
+-  for (i = 0; i < g->num_nodes; i++)
+-    g->nodes[i].aux.count = -1;
+-  g->nodes[src].aux.count = 0;
+-
+-  bitmap_clear (tmp);
+-  bitmap_set_bit (tmp, src);
+-
+-  while (change)
+-    {
+-      sbitmap_iterator sbi;
+-
+-      change = 0;
+-      bitmap_copy (workset, tmp);
+-      bitmap_clear (tmp);
+-      EXECUTE_IF_SET_IN_BITMAP (workset, 0, u, sbi)
+-	{
+-	  ddg_node_ptr u_node = &g->nodes[u];
+-
+-	  change |= update_dist_to_successors (u_node, nodes, tmp);
+-	}
+-    }
+-  return g->nodes[dest].aux.count;
+-}
+-
+ #endif /* INSN_SCHEDULING */
+diff -Nurp a/gcc/ddg.h b/gcc/ddg.h
+--- a/gcc/ddg.h	2020-03-12 19:07:21.000000000 +0800
++++ b/gcc/ddg.h	2020-11-28 18:38:33.835633230 +0800
+@@ -64,6 +64,10 @@ struct ddg_node
+   sbitmap successors;
+   sbitmap predecessors;
+ 
++  /* Temporary array used for Floyd-Warshall algorithm to find
++     scc recurrence length.  */
++  int *max_dist;
++
+   /* For general use by algorithms manipulating the ddg.  */
+   union {
+     int count;
+@@ -95,11 +99,8 @@ struct ddg_edge
+   ddg_edge_ptr next_in;
+   ddg_edge_ptr next_out;
+ 
+-  /* For general use by algorithms manipulating the ddg.  */
+-  union {
+-    int count;
+-    void *info;
+-  } aux;
++  /* Is true when edge is already in scc.  */
++  bool in_scc;
+ };
+ 
+ /* This structure holds the Data Dependence Graph for a basic block.  */
+@@ -115,9 +116,6 @@ struct ddg
+   int num_loads;
+   int num_stores;
+ 
+-  /* Number of debug instructions in the BB.  */
+-  int num_debug;
+-
+   /* This array holds the nodes in the graph; it is indexed by the node
+      cuid, which follows the order of the instructions in the BB.  */
+   ddg_node_ptr nodes;
+@@ -178,7 +176,6 @@ ddg_all_sccs_ptr create_ddg_all_sccs (dd
+ void free_ddg_all_sccs (ddg_all_sccs_ptr);
+ 
+ int find_nodes_on_paths (sbitmap result, ddg_ptr, sbitmap from, sbitmap to);
+-int longest_simple_path (ddg_ptr, int from, int to, sbitmap via);
+ 
+ bool autoinc_var_is_used_p (rtx_insn *, rtx_insn *);
+ 
+diff -Nurp a/gcc/modulo-sched.c b/gcc/modulo-sched.c
+--- a/gcc/modulo-sched.c	2020-03-12 19:07:21.000000000 +0800
++++ b/gcc/modulo-sched.c	2020-11-28 18:38:33.835633230 +0800
+@@ -370,7 +370,7 @@ doloop_register_get (rtx_insn *head, rtx
+                              : prev_nondebug_insn (tail));
+ 
+   for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn))
+-    if (!DEBUG_INSN_P (insn) && reg_mentioned_p (reg, insn))
++    if (NONDEBUG_INSN_P (insn) && reg_mentioned_p (reg, insn))
+       {
+         if (dump_file)
+         {
+@@ -429,7 +429,7 @@ res_MII (ddg_ptr g)
+   if (targetm.sched.sms_res_mii)
+     return targetm.sched.sms_res_mii (g);
+ 
+-  return ((g->num_nodes - g->num_debug) / issue_rate);
++  return g->num_nodes / issue_rate;
+ }
+ 
+ 
+@@ -2156,11 +2156,7 @@ sms_schedule_by_order (ddg_ptr g, int mi
+   	  ddg_node_ptr u_node = &ps->g->nodes[u];
+ 	  rtx_insn *insn = u_node->insn;
+ 
+-	  if (!NONDEBUG_INSN_P (insn))
+-	    {
+-	      bitmap_clear_bit (tobe_scheduled, u);
+-	      continue;
+-	    }
++	  gcc_checking_assert (NONDEBUG_INSN_P (insn));
+ 
+ 	  if (bitmap_bit_p (sched_nodes, u))
+ 	    continue;
+@@ -3162,9 +3158,6 @@ ps_has_conflicts (partial_schedule_ptr p
+ 	{
+ 	  rtx_insn *insn = ps_rtl_insn (ps, crr_insn->id);
+ 
+-	  if (!NONDEBUG_INSN_P (insn))
+-	    continue;
+-
+ 	  /* Check if there is room for the current insn.  */
+ 	  if (!can_issue_more || state_dead_lock_p (curr_state))
+ 	    return true;
+diff -Nurp a/gcc/testsuite/gcc.c-torture/execute/pr70127-debug-sms.c b/gcc/testsuite/gcc.c-torture/execute/pr70127-debug-sms.c
+--- a/gcc/testsuite/gcc.c-torture/execute/pr70127-debug-sms.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.c-torture/execute/pr70127-debug-sms.c	2020-11-28 18:38:33.835633230 +0800
+@@ -0,0 +1,23 @@
++/* { dg-additional-options "-fcompare-debug -fmodulo-sched" } */
++
++struct S { int f; signed int g : 2; } a[1], c = {5, 1}, d;
++short b;
++
++__attribute__((noinline, noclone)) void
++foo (int x)
++{
++  if (x != 1)
++    __builtin_abort ();
++}
++
++int
++main ()
++{
++  while (b++ <= 0)
++    {
++      struct S e = {1, 1};
++      d = e = a[0] = c;
++    }
++  foo (a[0].g);
++  return 0;
++}
+diff -Nurp a/gcc/testsuite/gcc.dg/torture/pr87197-debug-sms.c b/gcc/testsuite/gcc.dg/torture/pr87197-debug-sms.c
+--- a/gcc/testsuite/gcc.dg/torture/pr87197-debug-sms.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/torture/pr87197-debug-sms.c	2020-11-28 18:38:33.835633230 +0800
+@@ -0,0 +1,36 @@
++/* { dg-do compile } */
++/* { dg-additional-options "-fcompare-debug -fmodulo-sched --param sms-min-sc=1" } */
++
++int a, c, e, f, g;
++void
++h (int i)
++{
++  a = i;
++}
++void
++j (char *i, long k)
++{
++  while (k--)
++    c = *i++;
++}
++void
++l (unsigned char *i, long k)
++{
++  unsigned char *b = i + k;
++  while (i < b)
++    {
++      h (*i);
++      i++;
++    }
++}
++void
++m ()
++{
++  while (e)
++    {
++      float d = g;
++      l ((char *) &d, sizeof (g));
++      if (f)
++	j ((char *) &d, sizeof (g));
++    }
++}
diff --git a/store-merging-Consider-also-overlapping-stores-earlier.patch b/store-merging-Consider-also-overlapping-stores-earlier.patch
new file mode 100644
index 0000000..15dd0d1
--- /dev/null
+++ b/store-merging-Consider-also-overlapping-stores-earlier.patch
@@ -0,0 +1,359 @@
+This backport contains 1 patch from gcc main stream tree.
+The commit id of these patchs list as following in the order of time.
+
+0001-store-merging-Consider-also-overlapping-stores-earli.patch
+bd909071ac04e94f4b6f0baab64d0687ec55681d
+
+diff -uprN a/gcc/gimple-ssa-store-merging.c b/gcc/gimple-ssa-store-merging.c
+--- a/gcc/gimple-ssa-store-merging.c	2020-12-16 17:03:16.155633230 +0800
++++ b/gcc/gimple-ssa-store-merging.c	2020-12-16 11:15:58.575633230 +0800
+@@ -2021,7 +2021,8 @@ struct imm_store_chain_info
+       }
+   }
+   bool terminate_and_process_chain ();
+-  bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int);
++  bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int,
++			   unsigned int);
+   bool coalesce_immediate_stores ();
+   bool output_merged_store (merged_store_group *);
+   bool output_merged_stores ();
+@@ -2342,14 +2343,39 @@ gather_bswap_load_refs (vec *refs,
+    into the group.  That way it will be its own store group and will
+    not be touched.  If ALL_INTEGER_CST_P and there are overlapping
+    INTEGER_CST stores, those are mergeable using merge_overlapping,
+-   so don't return false for those.  */
++   so don't return false for those.
++
++   Similarly, check stores from FIRST_EARLIER (inclusive) to END_EARLIER
++   (exclusive), whether they don't overlap the bitrange START to END
++   and have order in between FIRST_ORDER and LAST_ORDER.  This is to
++   prevent merging in cases like:
++     MEM  [&b + 8B] = {};
++     MEM[(short *) &b] = 5;
++     _5 = *x_4(D);
++     MEM  [&b + 2B] = _5;
++     MEM[(char *)&b + 16B] = 88;
++     MEM[(int *)&b + 20B] = 1;
++   The = {} store comes in sort_by_bitpos before the = 88 store, and can't
++   be merged with it, because the = _5 store overlaps these and is in between
++   them in sort_by_order ordering.  If it was merged, the merged store would
++   go after the = _5 store and thus change behavior.  */
+ 
+ static bool
+ check_no_overlap (vec m_store_info, unsigned int i,
+-		  bool all_integer_cst_p, unsigned int last_order,
+-		  unsigned HOST_WIDE_INT end)
++		  bool all_integer_cst_p, unsigned int first_order,
++		  unsigned int last_order, unsigned HOST_WIDE_INT start,
++		  unsigned HOST_WIDE_INT end, unsigned int first_earlier,
++		  unsigned end_earlier)
+ {
+   unsigned int len = m_store_info.length ();
++  for (unsigned int j = first_earlier; j < end_earlier; j++)
++    {
++      store_immediate_info *info = m_store_info[j];
++      if (info->order > first_order
++	  && info->order < last_order
++	  && info->bitpos + info->bitsize > start)
++	return false;
++    }
+   for (++i; i < len; ++i)
+     {
+       store_immediate_info *info = m_store_info[i];
+@@ -2370,7 +2396,8 @@ check_no_overlap (vecbitsize;
+@@ -2509,7 +2536,8 @@ imm_store_chain_info::try_coalesce_bswap
+   if (n.base_addr == NULL_TREE && !is_gimple_val (n.src))
+     return false;
+ 
+-  if (!check_no_overlap (m_store_info, last, false, last_order, end))
++  if (!check_no_overlap (m_store_info, last, false, first_order, last_order,
++			 merged_store->start, end, first_earlier, first))
+     return false;
+ 
+   /* Don't handle memory copy this way if normal non-bswap processing
+@@ -2601,6 +2629,8 @@ imm_store_chain_info::coalesce_immediate
+ 
+   store_immediate_info *info;
+   unsigned int i, ignore = 0;
++  unsigned int first_earlier = 0;
++  unsigned int end_earlier = 0;
+ 
+   /* Order the stores by the bitposition they write to.  */
+   m_store_info.qsort (sort_by_bitpos);
+@@ -2615,6 +2645,12 @@ imm_store_chain_info::coalesce_immediate
+       if (i <= ignore)
+ 	goto done;
+ 
++      while (first_earlier < end_earlier
++	     && (m_store_info[first_earlier]->bitpos
++		 + m_store_info[first_earlier]->bitsize
++		 <= merged_store->start))
++	first_earlier++;
++
+       /* First try to handle group of stores like:
+ 	 p[0] = data >> 24;
+ 	 p[1] = data >> 16;
+@@ -2628,7 +2664,8 @@ imm_store_chain_info::coalesce_immediate
+ 	{
+ 	  unsigned int try_size;
+ 	  for (try_size = 64; try_size >= 16; try_size >>= 1)
+-	    if (try_coalesce_bswap (merged_store, i - 1, try_size))
++	    if (try_coalesce_bswap (merged_store, i - 1, try_size,
++				    first_earlier))
+ 	      break;
+ 
+ 	  if (try_size >= 16)
+@@ -2636,7 +2673,10 @@ imm_store_chain_info::coalesce_immediate
+ 	      ignore = i + merged_store->stores.length () - 1;
+ 	      m_merged_store_groups.safe_push (merged_store);
+ 	      if (ignore < m_store_info.length ())
+-		merged_store = new merged_store_group (m_store_info[ignore]);
++		{
++		  merged_store = new merged_store_group (m_store_info[ignore]);
++		  end_earlier = ignore;
++		}
+ 	      else
+ 		merged_store = NULL;
+ 	      goto done;
+@@ -2662,12 +2702,16 @@ imm_store_chain_info::coalesce_immediate
+ 	  /* Only allow overlapping stores of constants.  */
+ 	  if (info->rhs_code == INTEGER_CST && merged_store->only_constants)
+ 	    {
++	      unsigned int first_order
++		= MIN (merged_store->first_order, info->order);
+ 	      unsigned int last_order
+ 		= MAX (merged_store->last_order, info->order);
+ 	      unsigned HOST_WIDE_INT end
+ 		= MAX (merged_store->start + merged_store->width,
+ 		       info->bitpos + info->bitsize);
+-	      if (check_no_overlap (m_store_info, i, true, last_order, end))
++	      if (check_no_overlap (m_store_info, i, true, first_order,
++				    last_order, merged_store->start, end,
++				    first_earlier, end_earlier))
+ 		{
+ 		  /* check_no_overlap call above made sure there are no
+ 		     overlapping stores with non-INTEGER_CST rhs_code
+@@ -2696,6 +2740,7 @@ imm_store_chain_info::coalesce_immediate
+ 		  do
+ 		    {
+ 		      unsigned int max_order = 0;
++		      unsigned int min_order = first_order;
+ 		      unsigned first_nonmergeable_int_order = ~0U;
+ 		      unsigned HOST_WIDE_INT this_end = end;
+ 		      k = i;
+@@ -2721,6 +2766,7 @@ imm_store_chain_info::coalesce_immediate
+ 				  break;
+ 				}
+ 			      k = j;
++			      min_order = MIN (min_order, info2->order);
+ 			      this_end = MAX (this_end,
+ 					      info2->bitpos + info2->bitsize);
+ 			    }
+@@ -2736,6 +2782,12 @@ imm_store_chain_info::coalesce_immediate
+ 			    first_nonmergeable_order
+ 			      = MIN (first_nonmergeable_order, info2->order);
+ 			}
++		      if (k > i
++			  && !check_no_overlap (m_store_info, len - 1, true,
++						min_order, try_order,
++						merged_store->start, this_end,
++						first_earlier, end_earlier))
++			k = 0;
+ 		      if (k == 0)
+ 			{
+ 			  if (last_order == try_order)
+@@ -2821,9 +2873,12 @@ imm_store_chain_info::coalesce_immediate
+ 	      info->ops_swapped_p = true;
+ 	    }
+ 	  if (check_no_overlap (m_store_info, i, false,
++				MIN (merged_store->first_order, info->order),
+ 				MAX (merged_store->last_order, info->order),
++				merged_store->start,
+ 				MAX (merged_store->start + merged_store->width,
+-				     info->bitpos + info->bitsize)))
++				     info->bitpos + info->bitsize),
++				first_earlier, end_earlier))
+ 	    {
+ 	      /* Turn MEM_REF into BIT_INSERT_EXPR for bit-field stores.  */
+ 	      if (info->rhs_code == MEM_REF && infof->rhs_code != MEM_REF)
+@@ -2868,6 +2923,7 @@ imm_store_chain_info::coalesce_immediate
+ 	delete merged_store;
+ 
+       merged_store = new merged_store_group (info);
++      end_earlier = i;
+       if (dump_file && (dump_flags & TDF_DETAILS))
+ 	fputs ("New store group\n", dump_file);
+ 
+diff -uprN a/gcc/testsuite/gcc.dg/store_merging_31.c b/gcc/testsuite/gcc.dg/store_merging_31.c
+--- a/gcc/testsuite/gcc.dg/store_merging_31.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/store_merging_31.c	2020-12-16 11:15:58.575633230 +0800
+@@ -0,0 +1,27 @@
++/* PR tree-optimization/97053 */
++/* { dg-do run } */
++/* { dg-options "-O2" } */
++
++struct S { short a; char b[9]; int c; char d; int e; };
++
++__attribute__((noipa)) void
++foo (char *x, char *y)
++{
++  if (__builtin_strcmp (x, "ABCDXXXX") != 0
++      || __builtin_strcmp (y, "ABCDXXXX") != 0)
++    __builtin_abort ();
++}
++
++int
++main ()
++{
++  char a[9] = "XXXXXXXX";
++  struct S b = {};
++  __builtin_memcpy (a, "ABCD", 4);
++  b.a = 5;
++  __builtin_memcpy (b.b, a, 8); 
++  b.d = 'X';
++  b.e = 1;
++  foo (a, b.b);
++  return 0;
++}
+diff -uprN a/gcc/testsuite/gcc.dg/store_merging_32.c b/gcc/testsuite/gcc.dg/store_merging_32.c
+--- a/gcc/testsuite/gcc.dg/store_merging_32.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/store_merging_32.c	2020-12-16 11:15:58.575633230 +0800
+@@ -0,0 +1,129 @@
++/* PR tree-optimization/97053 */
++/* { dg-do run } */
++/* { dg-options "-O2 -fno-tree-dse" } */
++
++struct __attribute__((packed, may_alias)) S { long long s; };
++struct __attribute__((packed, may_alias)) T { short t; };
++
++__attribute__((noipa)) void
++test (char *p, char *q, int s)
++{
++  if ((s & 1) == 0)
++    {
++      if (*(short __attribute__((may_alias)) *) &p[sizeof (short)]
++	  != *(short __attribute__((may_alias)) *) &q[sizeof (short)]
++	  || (((struct S __attribute__((may_alias)) *) &p[1])->s
++	      != ((struct S __attribute__((may_alias)) *) &q[1])->s)
++	  || (*(short __attribute__((may_alias)) *) &p[2 * sizeof (short)]
++	      != *(short __attribute__((may_alias)) *) &q[2 * sizeof (short)]))
++	__builtin_abort ();
++    }
++  else
++    {
++      if (*(short __attribute__((may_alias)) *) &p[sizeof (short)]
++	  != *(short __attribute__((may_alias)) *) &q[sizeof (short)]
++	  || (((struct S __attribute__((may_alias)) *) &p[1])->s
++	      != ((struct S __attribute__((may_alias)) *) &q[1])->s)
++	  || (((struct T __attribute__((may_alias)) *) &p[2 * sizeof (short) - 1])->t
++	      != ((struct T __attribute__((may_alias)) *) &q[2 * sizeof (short) - 1])->t)
++	  || p[3 * sizeof (short) - 2] != q[3 * sizeof (short) - 2])
++	__builtin_abort ();
++    }
++}
++
++__attribute__((noipa)) void
++foo (long long *p, char *q, char *r, char *s)
++{
++  char a[64] __attribute__((aligned (__alignof (short))));
++  *(short __attribute__((may_alias)) *) &a[sizeof (short)] = 1;
++  ((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
++  *(short __attribute__((may_alias)) *) &a[2 * sizeof (short)] = 2;
++  *(short __attribute__((may_alias)) *) &q[sizeof (short)] = 1;
++  ((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
++  *(short __attribute__((may_alias)) *) &s[2 * sizeof (short)] = 2;
++  test (a, q, 0);
++}
++
++__attribute__((noipa)) void
++bar (long long *p, char *q, char *r, char *s, char *t)
++{
++  char a[64] __attribute__((aligned (__alignof (short))));
++  *(short __attribute__((may_alias)) *) &a[sizeof (short)] = 1;
++  ((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
++  ((struct T __attribute__((may_alias)) *) &a[2 * sizeof (short) - 1])->t = 2;
++  a[3 * sizeof (short) - 2] = 3;
++  *(short __attribute__((may_alias)) *) &q[sizeof (short)] = 1;
++  ((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
++  ((struct T __attribute__((may_alias)) *) &s[2 * sizeof (short) - 1])->t = 2;
++  t[3 * sizeof (short) - 2] = 3;
++  test (a, q, 1);
++}
++
++__attribute__((noipa)) void
++baz (long long *p, char *q, char *r, char *s)
++{
++  char a[64] __attribute__((aligned (__alignof (short))));
++  *(short __attribute__((may_alias)) *) &a[2 * sizeof (short)] = 2;
++  ((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
++  *(short __attribute__((may_alias)) *) &a[sizeof (short)] = 1;
++  *(short __attribute__((may_alias)) *) &q[2 * sizeof (short)] = 2;
++  ((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
++  *(short __attribute__((may_alias)) *) &s[sizeof (short)] = 1;
++  test (a, q, 2);
++}
++
++__attribute__((noipa)) void
++qux (long long *p, char *q, char *r, char *s, char *t)
++{
++  char a[64] __attribute__((aligned (__alignof (short))));
++  *(short __attribute__((may_alias)) *) &a[2 * sizeof (short) - 1] = 2;
++  ((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
++  a[3 * sizeof (short) - 2] = 3;
++  *(short __attribute__((may_alias)) *) &a[sizeof (short)] = 1;
++  ((struct T __attribute__((may_alias)) *) &q[2 * sizeof (short) - 1])->t = 2;
++  ((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
++  s[3 * sizeof (short) - 2] = 3;
++  ((struct T __attribute__((may_alias)) *) &t[sizeof (short)])->t = 1;
++  test (a, q, 3);
++}
++
++__attribute__((noipa)) void
++corge (long long *p, char *q, char *r, char *s, short u[3])
++{
++  char a[64] __attribute__((aligned (__alignof (short))));
++  *(short __attribute__((may_alias)) *) &a[2 * sizeof (short)] = u[2];
++  ((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
++  *(short __attribute__((may_alias)) *) &a[sizeof (short)] = u[1];
++  *(short __attribute__((may_alias)) *) &q[2 * sizeof (short)] = u[2];
++  ((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
++  *(short __attribute__((may_alias)) *) &s[sizeof (short)] = u[1];
++  test (a, q, 4);
++}
++
++__attribute__((noipa)) void
++garply (long long *p, char *q, char *r, char *s, short u[3])
++{
++  char a[64] __attribute__((aligned (__alignof (short))));
++  *(short __attribute__((may_alias)) *) &a[sizeof (short)] = u[1];
++  ((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
++  *(short __attribute__((may_alias)) *) &a[2 * sizeof (short)] = u[2];
++  *(short __attribute__((may_alias)) *) &s[sizeof (short)] = u[1];
++  ((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
++  *(short __attribute__((may_alias)) *) &q[2 * sizeof (short)] = u[2];
++  test (a, q, 6);
++}
++
++int
++main ()
++{
++  char a[64] __attribute__((aligned (__alignof (short))));
++  long long p = -1LL;
++  short u[] = { 1, 2, 3 };
++  foo (&p, &a[0], &a[0], &a[0]);
++  bar (&p, &a[0], &a[0], &a[0], &a[0]);
++  baz (&p, &a[0], &a[0], &a[0]);
++  qux (&p, &a[0], &a[0], &a[0], &a[0]);
++  corge (&p, &a[0], &a[0], &a[0], u);
++  garply (&p, &a[0], &a[0], &a[0], u);
++  return 0;
++}
diff --git a/tree-optimization-96920-another-ICE-when-vectorizing.patch b/tree-optimization-96920-another-ICE-when-vectorizing.patch
new file mode 100644
index 0000000..ae6122c
--- /dev/null
+++ b/tree-optimization-96920-another-ICE-when-vectorizing.patch
@@ -0,0 +1,316 @@
+This backport contains 1 patchs from gcc main stream tree.
+The commit id of these patchs list as following in the order of time.
+
+46a58c779af3055a4b10b285a1f4be28abe4351c
+0001-tree-optimization-96920-another-ICE-when-vectorizing.patch
+
+diff -uprN a/gcc/testsuite/gcc.dg/vect/pr96920.c b/gcc/testsuite/gcc.dg/vect/pr96920.c
+--- a/gcc/testsuite/gcc.dg/vect/pr96920.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/vect/pr96920.c	2020-10-26 21:46:25.316000000 +0800
+@@ -0,0 +1,20 @@
++/* { dg-do compile } */
++
++int a[1024];
++int b[2048];
++
++void foo (int x, int y)
++{
++  for (int i = 0; i < 1024; ++i)
++    {
++      int tem0 = b[2*i];
++      int tem1 = b[2*i+1];
++      for (int j = 0; j < 32; ++j)
++	{
++	  int tem = tem0;
++	  tem0 = tem1;
++	  tem1 = tem;
++	  a[i] += tem0;
++	}
++    }
++}
+diff -uprN a/gcc/testsuite/gfortran.dg/vect/pr96920.f90 b/gcc/testsuite/gfortran.dg/vect/pr96920.f90
+--- a/gcc/testsuite/gfortran.dg/vect/pr96920.f90	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gfortran.dg/vect/pr96920.f90	2020-10-26 21:46:25.316000000 +0800
+@@ -0,0 +1,37 @@
++! { dg-do compile }
++      subroutine ice(npoint, nterm, x, g)
++      implicit none
++      integer    norder
++      parameter (norder=10)
++      integer j
++      integer k
++      integer ii
++      integer nterm
++      integer npoint
++      real b(norder)
++      real c(norder)
++      real d(norder)
++      real x(npoint)
++      real g(npoint)
++      real gg
++      real prev
++      real prev2
++
++          j = 1
++    100   continue
++          j = j+1
++          if (nterm == j)  then
++             do ii=1,npoint
++                k = nterm
++                gg= d(k)
++                prev= 0.0
++                do k=k-1,1,-1
++                   prev2= prev
++                   prev= gg
++                   gg = d(k)+(x(ii)-b(k))*prev-c(k+1)*prev2
++                enddo
++                g(ii) = gg
++             enddo
++          endif
++          go to 100
++      end
+diff -uprN a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
+--- a/gcc/tree-vect-loop.c	2020-10-26 21:45:23.056000000 +0800
++++ b/gcc/tree-vect-loop.c	2020-10-26 21:49:02.884000000 +0800
+@@ -8166,6 +8166,47 @@ scale_profile_for_vect_loop (struct loop
+     scale_bbs_frequencies (&loop->latch, 1, exit_l->probability / prob);
+ }
+ 
++/* For a vectorized stmt DEF_STMT_INFO adjust all vectorized PHI
++   latch edge values originally defined by it.  */
++
++static void
++maybe_set_vectorized_backedge_value (loop_vec_info loop_vinfo,
++				     stmt_vec_info def_stmt_info)
++{
++  tree def = gimple_get_lhs (vect_orig_stmt (def_stmt_info)->stmt);
++  if (!def || TREE_CODE (def) != SSA_NAME)
++    return;
++  stmt_vec_info phi_info;
++  imm_use_iterator iter;
++  use_operand_p use_p;
++  FOR_EACH_IMM_USE_FAST (use_p, iter, def)
++    if (gphi *phi = dyn_cast  (USE_STMT (use_p)))
++      if (gimple_bb (phi)->loop_father->header == gimple_bb (phi)
++	  && (phi_info = loop_vinfo->lookup_stmt (phi))
++	  && VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (phi_info))
++	  && STMT_VINFO_REDUC_TYPE (phi_info) != FOLD_LEFT_REDUCTION
++	  && STMT_VINFO_REDUC_TYPE (phi_info) != EXTRACT_LAST_REDUCTION)
++	{
++	  loop_p loop = gimple_bb (phi)->loop_father;
++	  edge e = loop_latch_edge (loop);
++	  if (PHI_ARG_DEF_FROM_EDGE (phi, e) == def)
++	    {
++	      stmt_vec_info phi_defs = STMT_VINFO_VEC_STMT (phi_info);
++	      stmt_vec_info latch_defs = STMT_VINFO_VEC_STMT (def_stmt_info);
++	      while (phi_defs && latch_defs)
++		{
++		  add_phi_arg (as_a  (phi_defs->stmt),
++			       gimple_get_lhs (latch_defs->stmt), e,
++			       gimple_phi_arg_location (phi, e->dest_idx));
++		  phi_defs = STMT_VINFO_RELATED_STMT (phi_defs);
++		  latch_defs = STMT_VINFO_RELATED_STMT (latch_defs);
++		}
++	      gcc_assert (!latch_defs);
++	      gcc_assert (!phi_defs);
++	    }
++	}
++}
++
+ /* Vectorize STMT_INFO if relevant, inserting any new instructions before GSI.
+    When vectorizing STMT_INFO as a store, set *SEEN_STORE to its
+    stmt_vec_info.  */
+@@ -8533,7 +8574,7 @@ vect_transform_loop (loop_vec_info loop_
+ 
+       for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
+ 	   gsi_next (&si))
+-        {
++	{
+ 	  gphi *phi = si.phi ();
+ 	  if (dump_enabled_p ())
+ 	    dump_printf_loc (MSG_NOTE, vect_location,
+@@ -8568,6 +8609,27 @@ vect_transform_loop (loop_vec_info loop_
+ 	    }
+ 	}
+ 
++      for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
++	   gsi_next (&si))
++	{
++	  gphi *phi = si.phi ();
++	  stmt_info = loop_vinfo->lookup_stmt (phi);
++	  if (!stmt_info)
++	    continue;
++
++	  if (!STMT_VINFO_RELEVANT_P (stmt_info)
++	      && !STMT_VINFO_LIVE_P (stmt_info))
++	    continue;
++
++	  if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def
++	       || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
++	       || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def
++	       || STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
++	       || STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def)
++	      && ! PURE_SLP_STMT (stmt_info))
++	    maybe_set_vectorized_backedge_value (loop_vinfo, stmt_info);
++	}
++
+       for (gimple_stmt_iterator si = gsi_start_bb (bb);
+ 	   !gsi_end_p (si);)
+ 	{
+@@ -8604,9 +8666,16 @@ vect_transform_loop (loop_vec_info loop_
+ 			= STMT_VINFO_RELATED_STMT (stmt_info);
+ 		      vect_transform_loop_stmt (loop_vinfo, pat_stmt_info, &si,
+ 						&seen_store);
++		      maybe_set_vectorized_backedge_value (loop_vinfo,
++				      			   pat_stmt_info);
++		    }
++		  else
++		    {
++		      vect_transform_loop_stmt (loop_vinfo, stmt_info, &si,
++				      		&seen_store);
++		      maybe_set_vectorized_backedge_value (loop_vinfo,
++				      			   stmt_info);
+ 		    }
+-		  vect_transform_loop_stmt (loop_vinfo, stmt_info, &si,
+-					    &seen_store);
+ 		}
+ 	      gsi_next (&si);
+ 	      if (seen_store)
+@@ -8623,43 +8692,6 @@ vect_transform_loop (loop_vec_info loop_
+ 	    }
+ 	}
+ 
+-      /* Fill in backedge defs of reductions.  */
+-      for (unsigned i = 0; i < loop_vinfo->reduc_latch_defs.length (); ++i)
+-	{
+-	  stmt_vec_info stmt_info = loop_vinfo->reduc_latch_defs[i];
+-	  stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info);
+-	  stmt_vec_info phi_info
+-	    = STMT_VINFO_VEC_STMT (STMT_VINFO_REDUC_DEF (orig_stmt_info));
+-	  stmt_vec_info vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
+-	  gphi *phi
+-	    = dyn_cast  (STMT_VINFO_REDUC_DEF (orig_stmt_info)->stmt);
+-	  edge e = loop_latch_edge (gimple_bb (phi_info->stmt)->loop_father);
+-	  do
+-	    {
+-	      add_phi_arg (as_a  (phi_info->stmt),
+-			   gimple_get_lhs (vec_stmt->stmt), e,
+-			   gimple_phi_arg_location (phi, e->dest_idx));
+-	      phi_info = STMT_VINFO_RELATED_STMT (phi_info);
+-	      vec_stmt = STMT_VINFO_RELATED_STMT (vec_stmt);
+-	    }
+-	  while (phi_info);
+-	  gcc_assert (!vec_stmt);
+-	}
+-      for (unsigned i = 0; i < loop_vinfo->reduc_latch_slp_defs.length (); ++i)
+-	{
+-	  slp_tree slp_node = loop_vinfo->reduc_latch_slp_defs[i].first;
+-	  slp_tree phi_node = loop_vinfo->reduc_latch_slp_defs[i].second;
+-	  gphi *phi = as_a  (SLP_TREE_SCALAR_STMTS (phi_node)[0]->stmt);
+-	  e = loop_latch_edge (gimple_bb (phi)->loop_father);
+-	  gcc_assert (SLP_TREE_VEC_STMTS (phi_node).length ()
+-		      == SLP_TREE_VEC_STMTS (slp_node).length ());
+-	  for (unsigned j = 0; j < SLP_TREE_VEC_STMTS (phi_node).length (); ++j)
+-	    add_phi_arg (as_a  (SLP_TREE_VEC_STMTS (phi_node)[j]->stmt),
+-			 gimple_get_lhs
+-			     (SLP_TREE_VEC_STMTS (slp_node)[j]->stmt),
+-			 e, gimple_phi_arg_location (phi, e->dest_idx));
+-	}
+-
+       /* Stub out scalar statements that must not survive vectorization.
+ 	 Doing this here helps with grouped statements, or statements that
+ 	 are involved in patterns.  */
+diff -uprN a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
+--- a/gcc/tree-vectorizer.h	2020-10-26 21:45:23.052000000 +0800
++++ b/gcc/tree-vectorizer.h	2020-10-26 21:46:25.316000000 +0800
+@@ -575,11 +575,6 @@ typedef struct _loop_vec_info : public v
+      stmt in the chain.  */
+   auto_vec reduction_chains;
+ 
+-  /* The vectorized stmts defining the latch values of the reduction
+-     they are involved with.  */
+-  auto_vec reduc_latch_defs;
+-  auto_vec > reduc_latch_slp_defs;
+-
+   /* Cost vector for a single scalar iteration.  */
+   auto_vec scalar_cost_vec;
+ 
+diff -uprN a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
+--- a/gcc/tree-vect-slp.c	2020-10-26 21:45:23.052000000 +0800
++++ b/gcc/tree-vect-slp.c	2020-10-26 21:46:25.320000000 +0800
+@@ -2189,6 +2189,7 @@ vect_analyze_slp_instance (vec_info *vin
+ 	  SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
+ 	  SLP_INSTANCE_LOADS (new_instance) = vNULL;
+ 	  SLP_INSTANCE_ROOT_STMT (new_instance) = constructor ? stmt_info : NULL;
++	  new_instance->reduc_phis = NULL;
+ 
+ 	  vect_gather_slp_loads (new_instance, node);
+ 	  if (dump_enabled_p ())
+@@ -4282,6 +4283,26 @@ vect_schedule_slp (vec_info *vinfo)
+       stmt_vec_info store_info;
+       unsigned int j;
+ 
++      /* For reductions set the latch values of the vectorized PHIs.  */
++      if (instance->reduc_phis
++	  && STMT_VINFO_REDUC_TYPE (SLP_TREE_SCALAR_STMTS
++		  	(instance->reduc_phis)[0]) != FOLD_LEFT_REDUCTION
++	  && STMT_VINFO_REDUC_TYPE (SLP_TREE_SCALAR_STMTS
++		  	(instance->reduc_phis)[0]) != EXTRACT_LAST_REDUCTION)
++	{
++	  slp_tree slp_node = root;
++	  slp_tree phi_node = instance->reduc_phis;
++	  gphi *phi = as_a  (SLP_TREE_SCALAR_STMTS (phi_node)[0]->stmt);
++	  edge e = loop_latch_edge (gimple_bb (phi)->loop_father);
++	  gcc_assert (SLP_TREE_VEC_STMTS (phi_node).length ()
++		      == SLP_TREE_VEC_STMTS (slp_node).length ());
++	  for (unsigned j = 0; j < SLP_TREE_VEC_STMTS (phi_node).length (); ++j)
++	    add_phi_arg (as_a  (SLP_TREE_VEC_STMTS (phi_node)[j]->stmt),
++			 gimple_get_lhs
++			     (SLP_TREE_VEC_STMTS (slp_node)[j]->stmt),
++			 e, gimple_phi_arg_location (phi, e->dest_idx));
++	}
++
+       /* Remove scalar call stmts.  Do not do this for basic-block
+ 	 vectorization as not all uses may be vectorized.
+ 	 ???  Why should this be necessary?  DCE should be able to
+diff -uprN a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
+--- a/gcc/tree-vect-stmts.c	2020-10-26 21:45:23.012000000 +0800
++++ b/gcc/tree-vect-stmts.c	2020-10-26 21:46:25.320000000 +0800
+@@ -10229,37 +10229,6 @@ vect_transform_stmt (stmt_vec_info stmt_
+   if (STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
+     return is_store;
+ 
+-  /* If this stmt defines a value used on a backedge, record it so
+-     we can update the vectorized PHIs later.  */
+-  stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info);
+-  stmt_vec_info reduc_info;
+-  if (STMT_VINFO_REDUC_DEF (orig_stmt_info)
+-      && vect_stmt_to_vectorize (orig_stmt_info) == stmt_info
+-      && (reduc_info = info_for_reduction (orig_stmt_info))
+-      && STMT_VINFO_REDUC_TYPE (reduc_info) != FOLD_LEFT_REDUCTION
+-      && STMT_VINFO_REDUC_TYPE (reduc_info) != EXTRACT_LAST_REDUCTION)
+-    {
+-      gphi *phi;
+-      edge e;
+-      if (!slp_node
+-	  && (phi = dyn_cast 
+-		      (STMT_VINFO_REDUC_DEF (orig_stmt_info)->stmt))
+-	  && dominated_by_p (CDI_DOMINATORS,
+-			     gimple_bb (orig_stmt_info->stmt), gimple_bb (phi))
+-	  && (e = loop_latch_edge (gimple_bb (phi)->loop_father))
+-	  && (PHI_ARG_DEF_FROM_EDGE (phi, e)
+-	      == gimple_get_lhs (orig_stmt_info->stmt)))
+-	{
+-	  as_a  (vinfo)->reduc_latch_defs.safe_push (stmt_info);
+-	}
+-      else if (slp_node
+-	       && slp_node != slp_node_instance->reduc_phis)
+-	{
+-	  as_a  (vinfo)->reduc_latch_slp_defs.safe_push
+-	    (std::make_pair (slp_node, slp_node_instance->reduc_phis));
+-	}
+-    }
+-
+   /* Handle stmts whose DEF is used outside the loop-nest that is
+      being vectorized.  */
+   done = can_vectorize_live_stmts (stmt_info, gsi, slp_node,
diff --git a/tree-optimization-97812-fix-range-query-in-VRP-asser.patch b/tree-optimization-97812-fix-range-query-in-VRP-asser.patch
new file mode 100644
index 0000000..09c77ee
--- /dev/null
+++ b/tree-optimization-97812-fix-range-query-in-VRP-asser.patch
@@ -0,0 +1,48 @@
+This backport contains 1 patch from gcc main stream tree.
+The commit id of these patchs list as following in the order of time.
+
+dcfd302a79a5e2ea3bb16fc4fc45a5ee31cc0eab
+0001-tree-optimization-97812-fix-range-query-in-VRP-asser.patch
+
+diff --git a/gcc/testsuite/gcc.dg/torture/pr97812.c b/gcc/testsuite/gcc.dg/torture/pr97812.c
+new file mode 100644
+index 00000000000..4d468adf8fa
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/torture/pr97812.c
+@@ -0,0 +1,15 @@
++/* { dg-do run } */
++/* { dg-additional-options "-fdisable-tree-evrp" } */
++
++unsigned char c;
++
++int main() {
++volatile short b = 4066;
++  unsigned short bp = b;
++  unsigned d = bp & 2305;
++  signed char e = d;
++  c = e ? : e;
++  if (!d)
++    __builtin_abort ();
++  return 0;
++}
+diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
+index 54ce017e8b2..d661866630e 100644
+--- a/gcc/tree-vrp.c
++++ b/gcc/tree-vrp.c
+@@ -1740,8 +1740,14 @@ register_edge_assert_for_2 (tree name, edge e,
+ 	      && ((TYPE_PRECISION (TREE_TYPE (name))
+ 		   > TYPE_PRECISION (TREE_TYPE (rhs1)))
+ 		  || (get_range_info (rhs1, &rmin, &rmax) == VR_RANGE
+-		      && wi::fits_to_tree_p (rmin, TREE_TYPE (name))
+-		      && wi::fits_to_tree_p (rmax, TREE_TYPE (name)))))
++		      && wi::fits_to_tree_p
++			   (widest_int::from (rmin,
++					      TYPE_SIGN (TREE_TYPE (rhs1))),
++			    TREE_TYPE (name))
++		      && wi::fits_to_tree_p
++			   (widest_int::from (rmax,
++					      TYPE_SIGN (TREE_TYPE (rhs1))),
++			    TREE_TYPE (name)))))
+ 	    add_assert_info (asserts, rhs1, rhs1,
+ 		 	     comp_code, fold_convert (TREE_TYPE (rhs1), val));
+ 	}
diff --git a/vectorizable-comparison-Swap-operands-only-once.patch b/vectorizable-comparison-Swap-operands-only-once.patch
new file mode 100644
index 0000000..e42ef96
--- /dev/null
+++ b/vectorizable-comparison-Swap-operands-only-once.patch
@@ -0,0 +1,19 @@
+This backport contains 1 patch from gcc main stream tree.
+The commit id of these patchs list as following in the order of time.
+
+a0aeb7fb93da156b64fd08391c79ff35a69af7ba
+0001-tree-vect-stmts.c-vectorizable_comparison-Swap-opera.patch
+
+diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
+index e921225b5ec..601a6f55fbf 100644
+--- a/gcc/tree-vect-stmts.c
++++ b/gcc/tree-vect-stmts.c
+@@ -10369,7 +10369,7 @@ vectorizable_comparison (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ 
+       if (!slp_node)
+ 	{
+-	  if (swap_p)
++	  if (swap_p && j == 0)
+ 	    std::swap (vec_rhs1, vec_rhs2);
+ 	  vec_oprnds0.quick_push (vec_rhs1);
+ 	  vec_oprnds1.quick_push (vec_rhs2);
diff --git a/x86-Fix-bf16-and-matrix.patch b/x86-Fix-bf16-and-matrix.patch
new file mode 100644
index 0000000..8be95f8
--- /dev/null
+++ b/x86-Fix-bf16-and-matrix.patch
@@ -0,0 +1,321 @@
+This backport contains 4 patchs from gcc main stream tree.
+The commit id of these patchs list as following in the order of time.
+
+0001-re-PR-target-90424-memcpy-into-vector-builtin-not-op.patch
+1bf2a0b90f2457f6d9301535560eb5e05978261b
+
+0002-testsuite-aarch64-arm-Add-missing-quotes-to-expected.patch
+0ec537f3500924f29505977aa89c2a1d4671c584
+
+0003-x86-Tweak-testcases-for-PR82361.patch
+ad4644f378fe2f731cd987a4aff14b935f530b88
+
+0004-x86-Robustify-vzeroupper-handling-across-calls.patch
+2a2e3a0dfcbe0861915f421d11b828f0c35023f0
+
+diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
+index 9282a8fb6..ba72da1ec 100644
+--- a/gcc/config/i386/i386.c
++++ b/gcc/config/i386/i386.c
+@@ -95,6 +95,7 @@ along with GCC; see the file COPYING3.  If not see
+ #include "i386-builtins.h"
+ #include "i386-expand.h"
+ #include "i386-features.h"
++#include "function-abi.h"
+ 
+ /* This file should be included last.  */
+ #include "target-def.h"
+@@ -13529,6 +13530,15 @@ ix86_avx_u128_mode_needed (rtx_insn *insn)
+ 	    }
+ 	}
+ 
++      /* If the function is known to preserve some SSE registers,
++	 RA and previous passes can legitimately rely on that for
++	 modes wider than 256 bits.  It's only safe to issue a
++	 vzeroupper if all SSE registers are clobbered.  */
++      const function_abi &abi = insn_callee_abi (insn);
++      if (!hard_reg_set_subset_p (reg_class_contents[ALL_SSE_REGS],
++				  abi.mode_clobbers (V4DImode)))
++	return AVX_U128_ANY;
++
+       return AVX_U128_CLEAN;
+     }
+ 
+diff --git a/gcc/testsuite/g++.target/i386/pr90424-1.C b/gcc/testsuite/g++.target/i386/pr90424-1.C
+new file mode 100644
+index 000000000..9df8c089b
+--- /dev/null
++++ b/gcc/testsuite/g++.target/i386/pr90424-1.C
+@@ -0,0 +1,32 @@
++/* { dg-do compile { target c++11 } } */
++/* { dg-options "-O2 -msse2 -fdump-tree-optimized" } */
++
++template 
++using V [[gnu::vector_size(16)]] = T;
++
++template )>
++V load(const void *p) {
++  using W = V;
++  W r;
++  __builtin_memcpy(&r, p, M);
++  return r;
++}
++
++// movq or movsd
++template V load(const void *);     // bad
++template V load(const void *);   // bad
++template V load(const void *);       // bad
++template V load(const void *);     // good
++// the following is disabled because V2SF isn't a supported mode
++// template V load(const void *);   // bad
++template V load(const void *); // good (movsd?)
++
++// movd or movss
++template V load(const void *);   // bad
++template V load(const void *); // bad
++template V load(const void *);     // good
++template V load(const void *); // good
++
++/* We should end up with one load and one insert for each function.  */
++/* { dg-final { scan-tree-dump-times "BIT_INSERT_EXPR" 9 "optimized" } } */
++/* { dg-final { scan-tree-dump-times "MEM" 9 "optimized" } } */
+diff --git a/gcc/testsuite/g++.target/i386/pr90424-2.C b/gcc/testsuite/g++.target/i386/pr90424-2.C
+new file mode 100644
+index 000000000..3abb65f45
+--- /dev/null
++++ b/gcc/testsuite/g++.target/i386/pr90424-2.C
+@@ -0,0 +1,31 @@
++/* { dg-do compile { target c++11 } } */
++/* { dg-options "-O2 -msse2 -fdump-tree-optimized" } */
++
++template 
++using V [[gnu::vector_size(16)]] = T;
++
++template )>
++V load(const void *p) {
++  V r = {};
++  __builtin_memcpy(&r, p, M);
++  return r;
++}
++
++// movq or movsd
++template V load(const void *);     // bad
++template V load(const void *);   // bad
++template V load(const void *);       // bad
++template V load(const void *);     // good
++// the following is disabled because V2SF isn't a supported mode
++// template V load(const void *);   // bad
++template V load(const void *); // good (movsd?)
++
++// movd or movss
++template V load(const void *);   // bad
++template V load(const void *); // bad
++template V load(const void *);     // good
++template V load(const void *); // good
++
++/* We should end up with one load and one insert for each function.  */
++/* { dg-final { scan-tree-dump-times "BIT_INSERT_EXPR" 9 "optimized" } } */
++/* { dg-final { scan-tree-dump-times "MEM" 9 "optimized" } } */
+diff --git a/gcc/testsuite/gcc.target/aarch64/target_attr_10.c b/gcc/testsuite/gcc.target/aarch64/target_attr_10.c
+index 184990471..d96a8733a 100644
+--- a/gcc/testsuite/gcc.target/aarch64/target_attr_10.c
++++ b/gcc/testsuite/gcc.target/aarch64/target_attr_10.c
+@@ -13,4 +13,4 @@ foo (uint8x16_t a, uint8x16_t b, uint8x16_t c)
+   return vbslq_u8 (a, b, c); /* { dg-message "called from here" } */
+ }
+ 
+-/* { dg-error "inlining failed in call to always_inline" "" { target *-*-* } 0 } */
++/* { dg-error "inlining failed in call to 'always_inline'" "" { target *-*-* } 0 } */
+diff --git a/gcc/testsuite/gcc.target/arm/attr-neon-builtin-fail.c b/gcc/testsuite/gcc.target/arm/attr-neon-builtin-fail.c
+index 05dc579f2..fb6e0b9cd 100644
+--- a/gcc/testsuite/gcc.target/arm/attr-neon-builtin-fail.c
++++ b/gcc/testsuite/gcc.target/arm/attr-neon-builtin-fail.c
+@@ -14,5 +14,5 @@ foo (uint8x16_t *p)
+   *p = vmovq_n_u8 (3); /* { dg-message "called from here" } */
+ }
+ 
+-/* { dg-error "inlining failed in call to always_inline" "" { target *-*-* } 0 } */
++/* { dg-error "inlining failed in call to 'always_inline'" "" { target *-*-* } 0 } */
+ 
+diff --git a/gcc/testsuite/gcc.target/i386/pr82361-1.c b/gcc/testsuite/gcc.target/i386/pr82361-1.c
+index e7c356557..dec1792ae 100644
+--- a/gcc/testsuite/gcc.target/i386/pr82361-1.c
++++ b/gcc/testsuite/gcc.target/i386/pr82361-1.c
+@@ -4,50 +4,50 @@
+ /* We should be able to optimize all %eax to %rax zero extensions, because
+    div and idiv instructions with 32-bit operands zero-extend both results.   */
+ /* { dg-final { scan-assembler-not "movl\t%eax, %eax" } } */
+-/* FIXME: We are still not able to optimize the modulo in f1/f2, only manage
+-   one.  */
++/* FIXME: The compiler does not merge zero-extension to the modulo part
++   of f1 and f2.  */
+ /* { dg-final { scan-assembler-times "movl\t%edx" 2 } } */
+ 
+ void
+ f1 (unsigned int a, unsigned int b)
+ {
+-  unsigned long long c = a / b;
+-  unsigned long long d = a % b;
++  register unsigned long long c asm ("rax") = a / b;
++  register unsigned long long d asm ("rdx") = a % b;
+   asm volatile ("" : : "r" (c), "r" (d));
+ }
+ 
+ void
+ f2 (int a, int b)
+ {
+-  unsigned long long c = (unsigned int) (a / b);
+-  unsigned long long d = (unsigned int) (a % b);
++  register unsigned long long c asm ("rax") = (unsigned int) (a / b);
++  register unsigned long long d asm ("rdx") = (unsigned int) (a % b);
+   asm volatile ("" : : "r" (c), "r" (d));
+ }
+ 
+ void
+ f3 (unsigned int a, unsigned int b)
+ {
+-  unsigned long long c = a / b;
++  register unsigned long long c asm ("rax") = a / b;
+   asm volatile ("" : : "r" (c));
+ }
+ 
+ void
+ f4 (int a, int b)
+ {
+-  unsigned long long c = (unsigned int) (a / b);
++  register unsigned long long c asm ("rax") = (unsigned int) (a / b);
+   asm volatile ("" : : "r" (c));
+ }
+ 
+ void
+ f5 (unsigned int a, unsigned int b)
+ {
+-  unsigned long long d = a % b;
++  register unsigned long long d asm ("rdx") = a % b;
+   asm volatile ("" : : "r" (d));
+ }
+ 
+ void
+ f6 (int a, int b)
+ {
+-  unsigned long long d = (unsigned int) (a % b);
++  register unsigned long long d asm ("rdx") = (unsigned int) (a % b);
+   asm volatile ("" : : "r" (d));
+ }
+diff --git a/gcc/testsuite/gcc.target/i386/pr82361-2.c b/gcc/testsuite/gcc.target/i386/pr82361-2.c
+index c1e484d6e..2d87de182 100644
+--- a/gcc/testsuite/gcc.target/i386/pr82361-2.c
++++ b/gcc/testsuite/gcc.target/i386/pr82361-2.c
+@@ -4,7 +4,8 @@
+ /* We should be able to optimize all %eax to %rax zero extensions, because
+    div and idiv instructions with 32-bit operands zero-extend both results.   */
+ /* { dg-final { scan-assembler-not "movl\t%eax, %eax" } } */
+-/* Ditto %edx to %rdx zero extensions.  */
+-/* { dg-final { scan-assembler-not "movl\t%edx, %edx" } } */
++/* FIXME: The compiler does not merge zero-extension to the modulo part
++   of f1 and f2.  */
++/* { dg-final { scan-assembler-times "movl\t%edx" 4 } } */
+ 
+ #include "pr82361-1.c"
+diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
+index 527deffe4..be47519bc 100644
+--- a/gcc/tree-cfg.c
++++ b/gcc/tree-cfg.c
+@@ -4297,8 +4297,17 @@ verify_gimple_assign_ternary (gassign *stmt)
+ 	}
+       if (! ((INTEGRAL_TYPE_P (rhs1_type)
+ 	      && INTEGRAL_TYPE_P (rhs2_type))
++	     /* Vector element insert.  */
+ 	     || (VECTOR_TYPE_P (rhs1_type)
+-		 && types_compatible_p (TREE_TYPE (rhs1_type), rhs2_type))))
++		 && types_compatible_p (TREE_TYPE (rhs1_type), rhs2_type))
++	     /* Aligned sub-vector insert.  */
++	     || (VECTOR_TYPE_P (rhs1_type)
++		 && VECTOR_TYPE_P (rhs2_type)
++		 && types_compatible_p (TREE_TYPE (rhs1_type),
++					TREE_TYPE (rhs2_type))
++		 && multiple_p (TYPE_VECTOR_SUBPARTS (rhs1_type),
++				TYPE_VECTOR_SUBPARTS (rhs2_type))
++		 && multiple_of_p (bitsizetype, rhs3, TYPE_SIZE (rhs2_type)))))
+ 	{
+ 	  error ("not allowed type combination in BIT_INSERT_EXPR");
+ 	  debug_generic_expr (rhs1_type);
+diff --git a/gcc/tree-ssa.c b/gcc/tree-ssa.c
+index 1dc544b6d..a149f5e79 100644
+--- a/gcc/tree-ssa.c
++++ b/gcc/tree-ssa.c
+@@ -1522,8 +1522,6 @@ non_rewritable_lvalue_p (tree lhs)
+       if (DECL_P (decl)
+ 	  && VECTOR_TYPE_P (TREE_TYPE (decl))
+ 	  && TYPE_MODE (TREE_TYPE (decl)) != BLKmode
+-	  && operand_equal_p (TYPE_SIZE_UNIT (TREE_TYPE (lhs)),
+-			      TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (decl))), 0)
+ 	  && known_ge (mem_ref_offset (lhs), 0)
+ 	  && known_gt (wi::to_poly_offset (TYPE_SIZE_UNIT (TREE_TYPE (decl))),
+ 		       mem_ref_offset (lhs))
+@@ -1531,7 +1529,24 @@ non_rewritable_lvalue_p (tree lhs)
+ 			    TYPE_SIZE_UNIT (TREE_TYPE (lhs)))
+ 	  && known_ge (wi::to_poly_offset (TYPE_SIZE (TREE_TYPE (decl))),
+ 		       wi::to_poly_offset (TYPE_SIZE (TREE_TYPE (lhs)))))
+-	return false;
++	{
++	  poly_uint64 lhs_bits, nelts;
++	  if (poly_int_tree_p (TYPE_SIZE (TREE_TYPE (lhs)), &lhs_bits)
++	      && multiple_p (lhs_bits,
++			     tree_to_uhwi
++			       (TYPE_SIZE (TREE_TYPE (TREE_TYPE (decl)))),
++			     &nelts))
++	    {
++	      if (known_eq (nelts, 1u))
++		return false;
++	      /* For sub-vector inserts the insert vector mode has to be
++		 supported.  */
++	      tree vtype = build_vector_type (TREE_TYPE (TREE_TYPE (decl)),
++					      nelts);
++	      if (TYPE_MODE (vtype) != BLKmode)
++		return false;
++	    }
++	}
+     }
+ 
+   /* A vector-insert using a BIT_FIELD_REF is rewritable using
+@@ -1869,20 +1884,30 @@ execute_update_addresses_taken (void)
+ 		    && bitmap_bit_p (suitable_for_renaming, DECL_UID (sym))
+ 		    && VECTOR_TYPE_P (TREE_TYPE (sym))
+ 		    && TYPE_MODE (TREE_TYPE (sym)) != BLKmode
+-		    && operand_equal_p (TYPE_SIZE_UNIT (TREE_TYPE (lhs)),
+-					TYPE_SIZE_UNIT
+-					  (TREE_TYPE (TREE_TYPE (sym))), 0)
+-		    && tree_fits_uhwi_p (TREE_OPERAND (lhs, 1))
+-		    && tree_int_cst_lt (TREE_OPERAND (lhs, 1),
+-					TYPE_SIZE_UNIT (TREE_TYPE (sym)))
+-		    && (tree_to_uhwi (TREE_OPERAND (lhs, 1))
+-			% tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (lhs)))) == 0)
++		    && known_ge (mem_ref_offset (lhs), 0)
++		    && known_gt (wi::to_poly_offset
++				   (TYPE_SIZE_UNIT (TREE_TYPE (sym))),
++				 mem_ref_offset (lhs))
++		    && multiple_of_p (sizetype,
++				      TREE_OPERAND (lhs, 1),
++				      TYPE_SIZE_UNIT (TREE_TYPE (lhs))))
+ 		  {
+ 		    tree val = gimple_assign_rhs1 (stmt);
+ 		    if (! types_compatible_p (TREE_TYPE (val),
+ 					      TREE_TYPE (TREE_TYPE (sym))))
+ 		      {
+-			tree tem = make_ssa_name (TREE_TYPE (TREE_TYPE (sym)));
++			poly_uint64 lhs_bits, nelts;
++			tree temtype = TREE_TYPE (TREE_TYPE (sym));
++			if (poly_int_tree_p (TYPE_SIZE (TREE_TYPE (lhs)),
++					     &lhs_bits)
++			    && multiple_p (lhs_bits,
++					   tree_to_uhwi
++					     (TYPE_SIZE (TREE_TYPE
++							   (TREE_TYPE (sym)))),
++					   &nelts)
++			    && maybe_ne (nelts, 1u))
++			  temtype = build_vector_type (temtype, nelts);
++			tree tem = make_ssa_name (temtype);
+ 			gimple *pun
+ 			  = gimple_build_assign (tem,
+ 						 build1 (VIEW_CONVERT_EXPR,
-- 
Gitee